obtained gcc-4.6.4.tar.bz2 from upstream website;upstream

verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository.
author: upstream source tree <ports@midipix.org> 2015-03-15 20:14:05 -0400
committer: upstream source tree <ports@midipix.org> 2015-03-15 20:14:05 -0400
commit: 554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree: 976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/config
download: cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2
cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz
1540 files changed, 859149 insertions, 0 deletions
diff --git a/gcc/config/README b/gcc/config/README
new file mode 100644
index 000000000..60328ec5b
--- /dev/null
+++ b/gcc/config/README
@@ -0,0 +1,5 @@
+This directory contains machine-specific files for the GNU C compiler.
+It has a subdirectory for each basic CPU type.
+The only files in this directory itself
+are some .h files that pertain to particular operating systems
+and are used for more than one CPU type.
diff --git a/gcc/config/alpha/alpha-modes.def b/gcc/config/alpha/alpha-modes.def
new file mode 100644
index 000000000..9d5a04246
--- /dev/null
+++ b/gcc/config/alpha/alpha-modes.def
@@ -0,0 +1,27 @@
+/* Alpha extra machine modes. 
+   Copyright (C) 2003, 2004, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point.  This gets reset in alpha_option_override
+   if VAX float format is in use.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODE (INT, QI, 4);     /*                 V4QI */
+VECTOR_MODE (INT, QI, 2);     /*                 V2QI */
diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h
new file mode 100644
index 000000000..747716960
--- /dev/null
+++ b/gcc/config/alpha/alpha-protos.h
@@ -0,0 +1,131 @@
+/* Prototypes for alpha.c functions used in the md file & elsewhere.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern int alpha_next_sequence_number;
+
+extern void literal_section (void);
+extern int zap_mask (HOST_WIDE_INT);
+extern int direct_return (void);
+
+extern int alpha_sa_size (void);
+extern HOST_WIDE_INT alpha_initial_elimination_offset (unsigned int,
+						       unsigned int);
+extern void alpha_expand_prologue (void);
+extern void alpha_expand_epilogue (void);
+extern void alpha_output_filename (FILE *, const char *);
+
+extern rtx alpha_tablejump_addr_vec (rtx);
+extern rtx alpha_tablejump_best_label (rtx);
+
+extern bool alpha_legitimate_constant_p (rtx);
+extern rtx alpha_legitimize_reload_address (rtx, enum machine_mode,
+					    int, int, int);
+
+extern rtx split_small_symbolic_operand (rtx);
+
+extern void get_aligned_mem (rtx, rtx *, rtx *);
+extern rtx get_unaligned_address (rtx);
+extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT);
+extern enum reg_class alpha_preferred_reload_class (rtx, enum reg_class);
+
+extern void alpha_set_memflags (rtx, rtx);
+extern bool alpha_split_const_mov (enum machine_mode, rtx *);
+extern bool alpha_expand_mov (enum machine_mode, rtx *);
+extern bool alpha_expand_mov_nobwx (enum machine_mode, rtx *);
+extern void alpha_expand_movmisalign (enum machine_mode, rtx *);
+extern void alpha_emit_floatuns (rtx[]);
+extern rtx alpha_emit_conditional_move (rtx, enum machine_mode);
+extern void alpha_split_tmode_pair (rtx[], enum machine_mode, bool);
+extern void alpha_split_tfmode_frobsign (rtx[], rtx (*)(rtx, rtx, rtx));
+extern void alpha_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+					 HOST_WIDE_INT, int);
+extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+					  HOST_WIDE_INT);
+extern int alpha_expand_block_move (rtx []);
+extern int alpha_expand_block_clear (rtx []);
+extern rtx alpha_expand_zap_mask (HOST_WIDE_INT);
+extern void alpha_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx),
+					       enum machine_mode,
+					       rtx, rtx, rtx);
+extern void alpha_expand_builtin_establish_vms_condition_handler (rtx, rtx);
+extern void alpha_expand_builtin_revert_vms_condition_handler (rtx);
+
+extern rtx alpha_return_addr (int, rtx);
+extern rtx alpha_gp_save_rtx (void);
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void alpha_initialize_trampoline (rtx, rtx, rtx, int, int, int);
+
+extern rtx alpha_va_arg (tree, tree);
+extern rtx function_value (const_tree, const_tree, enum machine_mode);
+
+extern void alpha_start_function (FILE *, const char *, tree);
+extern void alpha_end_function (FILE *, const char *, tree);
+
+extern int alpha_find_lo_sum_using_gp (rtx);
+
+#ifdef REAL_VALUE_TYPE
+extern int check_float_value (enum machine_mode, REAL_VALUE_TYPE *, int);
+#endif
+
+#ifdef RTX_CODE
+extern void alpha_emit_conditional_branch (rtx[], enum machine_mode);
+extern bool alpha_emit_setcc (rtx[], enum machine_mode);
+extern int alpha_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx);
+extern void alpha_emit_xfloating_arith (enum rtx_code, rtx[]);
+extern void alpha_emit_xfloating_cvt (enum rtx_code, rtx[]);
+extern void alpha_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void alpha_split_compare_and_swap (rtx, rtx, rtx, rtx, rtx);
+extern void alpha_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx);
+extern void alpha_split_compare_and_swap_12 (enum machine_mode, rtx, rtx,
+					     rtx, rtx, rtx, rtx, rtx);
+extern void alpha_split_lock_test_and_set (rtx, rtx, rtx, rtx);
+extern void alpha_expand_lock_test_and_set_12 (rtx, rtx, rtx);
+extern void alpha_split_lock_test_and_set_12 (enum machine_mode, rtx, rtx,
+					      rtx, rtx, rtx);
+#endif
+
+extern rtx alpha_need_linkage (const char *, int);
+extern rtx alpha_use_linkage (rtx, tree, int, int);
+
+#if TARGET_ABI_OPEN_VMS
+extern enum avms_arg_type alpha_arg_type (enum machine_mode);
+extern rtx alpha_arg_info_reg_val (CUMULATIVE_ARGS);
+extern void avms_asm_output_external (FILE *, tree, const char *);
+extern void vms_output_aligned_decl_common (FILE *, tree, const char *,
+					    unsigned HOST_WIDE_INT,
+					    unsigned int);
+extern HOST_WIDE_INT alpha_vms_initial_elimination_offset (unsigned int,
+							   unsigned int);
+#endif
+
+extern rtx unicosmk_add_call_info_word (rtx);
+
+#if TARGET_ABI_UNICOSMK
+extern void unicosmk_defer_case_vector (rtx, rtx);
+extern void unicosmk_add_extern (const char *);
+extern void unicosmk_output_align (FILE *, int);
+extern void unicosmk_output_common (FILE *, const char *, int, int);
+extern int unicosmk_initial_elimination_offset (int, int);
+#endif
+
+extern int some_small_symbolic_operand_int (rtx *, void *);
+extern int tls_symbolic_operand_1 (rtx, int, int);
+extern rtx resolve_reload_operand (rtx);
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
new file mode 100644
index 000000000..ba6179e71
--- /dev/null
+++ b/gcc/config/alpha/alpha.c
@@ -0,0 +1,11210 @@
+/* Subroutines used for code generation on the DEC Alpha.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "integrate.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "splay-tree.h"
+#include "cfglayout.h"
+#include "gimple.h"
+#include "tree-flow.h"
+#include "tree-stdarg.h"
+#include "tm-constrs.h"
+#include "df.h"
+#include "libfuncs.h"
+
+/* Specify which cpu to schedule for.  */
+enum processor_type alpha_tune;
+
+/* Which cpu we're generating code for.  */
+enum processor_type alpha_cpu;
+
+static const char * const alpha_cpu_name[] =
+{
+  "ev4", "ev5", "ev6"
+};
+
+/* Specify how accurate floating-point traps need to be.  */
+
+enum alpha_trap_precision alpha_tp;
+
+/* Specify the floating-point rounding mode.  */
+
+enum alpha_fp_rounding_mode alpha_fprm;
+
+/* Specify which things cause traps.  */
+
+enum alpha_fp_trap_mode alpha_fptm;
+
+/* Nonzero if inside of a function, because the Alpha asm can't
+   handle .files inside of functions.  */
+
+static int inside_function = FALSE;
+
+/* The number of cycles of latency we should assume on memory reads.  */
+
+int alpha_memory_latency = 3;
+
+/* Whether the function needs the GP.  */
+
+static int alpha_function_needs_gp;
+
+/* The assembler name of the current function.  */
+
+static const char *alpha_fnname;
+
+/* The next explicit relocation sequence number.  */
+extern GTY(()) int alpha_next_sequence_number;
+int alpha_next_sequence_number = 1;
+
+/* The literal and gpdisp sequence numbers for this insn, as printed
+   by %# and %* respectively.  */
+extern GTY(()) int alpha_this_literal_sequence_number;
+extern GTY(()) int alpha_this_gpdisp_sequence_number;
+int alpha_this_literal_sequence_number;
+int alpha_this_gpdisp_sequence_number;
+
+/* Costs of various operations on the different architectures.  */
+
+struct alpha_rtx_cost_data
+{
+  unsigned char fp_add;
+  unsigned char fp_mult;
+  unsigned char fp_div_sf;
+  unsigned char fp_div_df;
+  unsigned char int_mult_si;
+  unsigned char int_mult_di;
+  unsigned char int_shift;
+  unsigned char int_cmov;
+  unsigned short int_div;
+};
+
+static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
+{
+  { /* EV4 */
+    COSTS_N_INSNS (6),		/* fp_add */
+    COSTS_N_INSNS (6),		/* fp_mult */
+    COSTS_N_INSNS (34),		/* fp_div_sf */
+    COSTS_N_INSNS (63),		/* fp_div_df */
+    COSTS_N_INSNS (23),		/* int_mult_si */
+    COSTS_N_INSNS (23),		/* int_mult_di */
+    COSTS_N_INSNS (2),		/* int_shift */
+    COSTS_N_INSNS (2),		/* int_cmov */
+    COSTS_N_INSNS (97),		/* int_div */
+  },
+  { /* EV5 */
+    COSTS_N_INSNS (4),		/* fp_add */
+    COSTS_N_INSNS (4),		/* fp_mult */
+    COSTS_N_INSNS (15),		/* fp_div_sf */
+    COSTS_N_INSNS (22),		/* fp_div_df */
+    COSTS_N_INSNS (8),		/* int_mult_si */
+    COSTS_N_INSNS (12),		/* int_mult_di */
+    COSTS_N_INSNS (1) + 1,	/* int_shift */
+    COSTS_N_INSNS (1),		/* int_cmov */
+    COSTS_N_INSNS (83),		/* int_div */
+  },
+  { /* EV6 */
+    COSTS_N_INSNS (4),		/* fp_add */
+    COSTS_N_INSNS (4),		/* fp_mult */
+    COSTS_N_INSNS (12),		/* fp_div_sf */
+    COSTS_N_INSNS (15),		/* fp_div_df */
+    COSTS_N_INSNS (7),		/* int_mult_si */
+    COSTS_N_INSNS (7),		/* int_mult_di */
+    COSTS_N_INSNS (1),		/* int_shift */
+    COSTS_N_INSNS (2),		/* int_cmov */
+    COSTS_N_INSNS (86),		/* int_div */
+  },
+};
+
+/* Similar but tuned for code size instead of execution latency.  The
+   extra +N is fractional cost tuning based on latency.  It's used to
+   encourage use of cheaper insns like shift, but only if there's just
+   one of them.  */
+
+static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
+{
+  COSTS_N_INSNS (1),		/* fp_add */
+  COSTS_N_INSNS (1),		/* fp_mult */
+  COSTS_N_INSNS (1),		/* fp_div_sf */
+  COSTS_N_INSNS (1) + 1,	/* fp_div_df */
+  COSTS_N_INSNS (1) + 1,	/* int_mult_si */
+  COSTS_N_INSNS (1) + 2,	/* int_mult_di */
+  COSTS_N_INSNS (1),		/* int_shift */
+  COSTS_N_INSNS (1),		/* int_cmov */
+  COSTS_N_INSNS (6),		/* int_div */
+};
+
+/* Get the number of args of a function in one of two ways.  */
+#if TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK
+#define NUM_ARGS crtl->args.info.num_args
+#else
+#define NUM_ARGS crtl->args.info
+#endif
+
+#define REG_PV 27
+#define REG_RA 26
+
+/* Declarations of static functions.  */
+static struct machine_function *alpha_init_machine_status (void);
+static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
+
+#if TARGET_ABI_OPEN_VMS
+static void alpha_write_linkage (FILE *, const char *, tree);
+static bool vms_valid_pointer_mode (enum machine_mode);
+#endif
+
+static void unicosmk_output_deferred_case_vectors (FILE *);
+static void unicosmk_gen_dsib (unsigned long *);
+static void unicosmk_output_ssib (FILE *, const char *);
+static int unicosmk_need_dex (rtx);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options alpha_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+alpha_handle_option (size_t code, const char *arg, int value)
+{
+  switch (code)
+    {
+    case OPT_mfp_regs:
+      if (value == 0)
+	target_flags |= MASK_SOFT_FP;
+      break;
+
+    case OPT_mieee:
+    case OPT_mieee_with_inexact:
+      target_flags |= MASK_IEEE_CONFORMANT;
+      break;
+
+    case OPT_mtls_size_:
+      if (value != 16 && value != 32 && value != 64)
+	error ("bad value %qs for -mtls-size switch", arg);
+      break;
+    }
+
+  return true;
+}
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+alpha_mangle_type (const_tree type)
+{
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* Parse target option strings.  */
+
+static void
+alpha_option_override (void)
+{
+  static const struct cpu_table {
+    const char *const name;
+    const enum processor_type processor;
+    const int flags;
+  } cpu_table[] = {
+    { "ev4",	PROCESSOR_EV4, 0 },
+    { "ev45",	PROCESSOR_EV4, 0 },
+    { "21064",	PROCESSOR_EV4, 0 },
+    { "ev5",	PROCESSOR_EV5, 0 },
+    { "21164",	PROCESSOR_EV5, 0 },
+    { "ev56",	PROCESSOR_EV5, MASK_BWX },
+    { "21164a",	PROCESSOR_EV5, MASK_BWX },
+    { "pca56",	PROCESSOR_EV5, MASK_BWX|MASK_MAX },
+    { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
+    { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX },
+    { "ev6",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
+    { "21264",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX },
+    { "ev67",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX },
+    { "21264a",	PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX }
+  };
+
+  int const ct_size = ARRAY_SIZE (cpu_table);
+  int i;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* Unicos/Mk doesn't have shared libraries.  */
+  if (TARGET_ABI_UNICOSMK && flag_pic)
+    {
+      warning (0, "-f%s ignored for Unicos/Mk (not supported)",
+	       (flag_pic > 1) ? "PIC" : "pic");
+      flag_pic = 0;
+    }
+
+  /* On Unicos/Mk, the native compiler consistently generates /d suffices for
+     floating-point instructions.  Make that the default for this target.  */
+  if (TARGET_ABI_UNICOSMK)
+    alpha_fprm = ALPHA_FPRM_DYN;
+  else
+    alpha_fprm = ALPHA_FPRM_NORM;
+
+  alpha_tp = ALPHA_TP_PROG;
+  alpha_fptm = ALPHA_FPTM_N;
+
+  /* We cannot use su and sui qualifiers for conversion instructions on
+     Unicos/Mk.  I'm not sure if this is due to assembler or hardware
+     limitations.  Right now, we issue a warning if -mieee is specified
+     and then ignore it; eventually, we should either get it right or
+     disable the option altogether.  */
+
+  if (TARGET_IEEE)
+    {
+      if (TARGET_ABI_UNICOSMK)
+	warning (0, "-mieee not supported on Unicos/Mk");
+      else
+	{
+	  alpha_tp = ALPHA_TP_INSN;
+	  alpha_fptm = ALPHA_FPTM_SU;
+	}
+    }
+
+  if (TARGET_IEEE_WITH_INEXACT)
+    {
+      if (TARGET_ABI_UNICOSMK)
+	warning (0, "-mieee-with-inexact not supported on Unicos/Mk");
+      else
+	{
+	  alpha_tp = ALPHA_TP_INSN;
+	  alpha_fptm = ALPHA_FPTM_SUI;
+	}
+    }
+
+  if (alpha_tp_string)
+    {
+      if (! strcmp (alpha_tp_string, "p"))
+	alpha_tp = ALPHA_TP_PROG;
+      else if (! strcmp (alpha_tp_string, "f"))
+	alpha_tp = ALPHA_TP_FUNC;
+      else if (! strcmp (alpha_tp_string, "i"))
+	alpha_tp = ALPHA_TP_INSN;
+      else
+	error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
+    }
+
+  if (alpha_fprm_string)
+    {
+      if (! strcmp (alpha_fprm_string, "n"))
+	alpha_fprm = ALPHA_FPRM_NORM;
+      else if (! strcmp (alpha_fprm_string, "m"))
+	alpha_fprm = ALPHA_FPRM_MINF;
+      else if (! strcmp (alpha_fprm_string, "c"))
+	alpha_fprm = ALPHA_FPRM_CHOP;
+      else if (! strcmp (alpha_fprm_string,"d"))
+	alpha_fprm = ALPHA_FPRM_DYN;
+      else
+	error ("bad value %qs for -mfp-rounding-mode switch",
+	       alpha_fprm_string);
+    }
+
+  if (alpha_fptm_string)
+    {
+      if (strcmp (alpha_fptm_string, "n") == 0)
+	alpha_fptm = ALPHA_FPTM_N;
+      else if (strcmp (alpha_fptm_string, "u") == 0)
+	alpha_fptm = ALPHA_FPTM_U;
+      else if (strcmp (alpha_fptm_string, "su") == 0)
+	alpha_fptm = ALPHA_FPTM_SU;
+      else if (strcmp (alpha_fptm_string, "sui") == 0)
+	alpha_fptm = ALPHA_FPTM_SUI;
+      else
+	error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
+    }
+
+  if (alpha_cpu_string)
+    {
+      for (i = 0; i < ct_size; i++)
+	if (! strcmp (alpha_cpu_string, cpu_table [i].name))
+	  {
+	    alpha_tune = alpha_cpu = cpu_table [i].processor;
+	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
+	    target_flags |= cpu_table [i].flags;
+	    break;
+	  }
+      if (i == ct_size)
+	error ("bad value %qs for -mcpu switch", alpha_cpu_string);
+    }
+
+  if (alpha_tune_string)
+    {
+      for (i = 0; i < ct_size; i++)
+	if (! strcmp (alpha_tune_string, cpu_table [i].name))
+	  {
+	    alpha_tune = cpu_table [i].processor;
+	    break;
+	  }
+      if (i == ct_size)
+	error ("bad value %qs for -mtune switch", alpha_tune_string);
+    }
+
+  /* Do some sanity checks on the above options.  */
+
+  if (TARGET_ABI_UNICOSMK && alpha_fptm != ALPHA_FPTM_N)
+    {
+      warning (0, "trap mode not supported on Unicos/Mk");
+      alpha_fptm = ALPHA_FPTM_N;
+    }
+
+  if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
+      && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
+    {
+      warning (0, "fp software completion requires -mtrap-precision=i");
+      alpha_tp = ALPHA_TP_INSN;
+    }
+
+  if (alpha_cpu == PROCESSOR_EV6)
+    {
+      /* Except for EV6 pass 1 (not released), we always have precise
+	 arithmetic traps.  Which means we can do software completion
+	 without minding trap shadows.  */
+      alpha_tp = ALPHA_TP_PROG;
+    }
+
+  if (TARGET_FLOAT_VAX)
+    {
+      if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
+	{
+	  warning (0, "rounding mode not supported for VAX floats");
+	  alpha_fprm = ALPHA_FPRM_NORM;
+	}
+      if (alpha_fptm == ALPHA_FPTM_SUI)
+	{
+	  warning (0, "trap mode not supported for VAX floats");
+	  alpha_fptm = ALPHA_FPTM_SU;
+	}
+      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
+	warning (0, "128-bit long double not supported for VAX floats");
+      target_flags &= ~MASK_LONG_DOUBLE_128;
+    }
+
+  {
+    char *end;
+    int lat;
+
+    if (!alpha_mlat_string)
+      alpha_mlat_string = "L1";
+
+    if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
+	&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
+      ;
+    else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
+	     && ISDIGIT ((unsigned char)alpha_mlat_string[1])
+	     && alpha_mlat_string[2] == '\0')
+      {
+	static int const cache_latency[][4] =
+	{
+	  { 3, 30, -1 },	/* ev4 -- Bcache is a guess */
+	  { 2, 12, 38 },	/* ev5 -- Bcache from PC164 LMbench numbers */
+	  { 3, 12, 30 },	/* ev6 -- Bcache from DS20 LMbench.  */
+	};
+
+	lat = alpha_mlat_string[1] - '0';
+	if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
+	  {
+	    warning (0, "L%d cache latency unknown for %s",
+		     lat, alpha_cpu_name[alpha_tune]);
+	    lat = 3;
+	  }
+	else
+	  lat = cache_latency[alpha_tune][lat-1];
+      }
+    else if (! strcmp (alpha_mlat_string, "main"))
+      {
+	/* Most current memories have about 370ns latency.  This is
+	   a reasonable guess for a fast cpu.  */
+	lat = 150;
+      }
+    else
+      {
+	warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
+	lat = 3;
+      }
+
+    alpha_memory_latency = lat;
+  }
+
+  /* Default the definition of "small data" to 8 bytes.  */
+  if (!global_options_set.x_g_switch_value)
+    g_switch_value = 8;
+
+  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
+  if (flag_pic == 1)
+    target_flags |= MASK_SMALL_DATA;
+  else if (flag_pic == 2)
+    target_flags &= ~MASK_SMALL_DATA;
+
+  /* Align labels and loops for optimal branching.  */
+  /* ??? Kludge these by not doing anything if we don't optimize and also if
+     we are writing ECOFF symbols to work around a bug in DEC's assembler.  */
+  if (optimize > 0 && write_symbols != SDB_DEBUG)
+    {
+      if (align_loops <= 0)
+	align_loops = 16;
+      if (align_jumps <= 0)
+	align_jumps = 16;
+    }
+  if (align_functions <= 0)
+    align_functions = 16;
+
+  /* Register variables and functions with the garbage collector.  */
+
+  /* Set up function hooks.  */
+  init_machine_status = alpha_init_machine_status;
+
+  /* Tell the compiler when we're using VAX floating point.  */
+  if (TARGET_FLOAT_VAX)
+    {
+      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
+      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
+      REAL_MODE_FORMAT (TFmode) = NULL;
+    }
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+
+  /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
+     can be optimized to ap = __builtin_next_arg (0).  */
+  if (TARGET_ABI_UNICOSMK)
+    targetm.expand_builtin_va_start = NULL;
+}
+
+/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
+
+int
+zap_mask (HOST_WIDE_INT value)
+{
+  int i;
+
+  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
+       i++, value >>= 8)
+    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
+      return 0;
+
+  return 1;
+}
+
+/* Return true if OP is valid for a particular TLS relocation.
+   We are already guaranteed that OP is a CONST.  */
+
+int
+tls_symbolic_operand_1 (rtx op, int size, int unspec)
+{
+  op = XEXP (op, 0);
+
+  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
+    return 0;
+  op = XVECEXP (op, 0, 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  switch (SYMBOL_REF_TLS_MODEL (op))
+    {
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
+    case TLS_MODEL_INITIAL_EXEC:
+      return unspec == UNSPEC_TPREL && size == 64;
+    case TLS_MODEL_LOCAL_EXEC:
+      return unspec == UNSPEC_TPREL && size == alpha_tls_size;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Used by aligned_memory_operand and unaligned_memory_operand to
+   resolve what reload is going to do with OP if it's a register.  */
+
+rtx
+resolve_reload_operand (rtx op)
+{
+  if (reload_in_progress)
+    {
+      rtx tmp = op;
+      if (GET_CODE (tmp) == SUBREG)
+	tmp = SUBREG_REG (tmp);
+      if (REG_P (tmp)
+	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
+	{
+	  op = reg_equiv_memory_loc[REGNO (tmp)];
+	  if (op == 0)
+	    return 0;
+	}
+    }
+  return op;
+}
+
+/* The scalar modes supported differs from the default check-what-c-supports
+   version in that sometimes TFmode is available even when long double
+   indicates only DFmode.  On unicosmk, we have the situation that HImode
+   doesn't map to any C type, but of course we still support that.  */
+
+static bool
+alpha_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode: /* via optabs.c */
+      return true;
+
+    case SFmode:
+    case DFmode:
+      return true;
+
+    case TFmode:
+      return TARGET_HAS_XFLOATING_LIBS;
+
+    default:
+      return false;
+    }
+}
+
+/* Alpha implements a couple of integer vector mode operations when
+   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
+   which allows the vectorizer to operate on e.g. move instructions,
+   or when expand_vector_operations can do something useful.  */
+
+static bool
+alpha_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
+}
+
+/* Return 1 if this function can directly return via $26.  */
+
+int
+direct_return (void)
+{
+  return (! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK
+	  && reload_completed
+	  && alpha_sa_size () == 0
+	  && get_frame_size () == 0
+	  && crtl->outgoing_args_size == 0
+	  && crtl->args.pretend_args_size == 0);
+}
+
+/* Return the ADDR_VEC associated with a tablejump insn.  */
+
+rtx
+alpha_tablejump_addr_vec (rtx insn)
+{
+  rtx tmp;
+
+  tmp = JUMP_LABEL (insn);
+  if (!tmp)
+    return NULL_RTX;
+  tmp = NEXT_INSN (tmp);
+  if (!tmp)
+    return NULL_RTX;
+  if (JUMP_P (tmp)
+      && GET_CODE (PATTERN (tmp)) == ADDR_DIFF_VEC)
+    return PATTERN (tmp);
+  return NULL_RTX;
+}
+
+/* Return the label of the predicted edge, or CONST0_RTX if we don't know.  */
+
+rtx
+alpha_tablejump_best_label (rtx insn)
+{
+  rtx jump_table = alpha_tablejump_addr_vec (insn);
+  rtx best_label = NULL_RTX;
+
+  /* ??? Once the CFG doesn't keep getting completely rebuilt, look
+     there for edge frequency counts from profile data.  */
+
+  if (jump_table)
+    {
+      int n_labels = XVECLEN (jump_table, 1);
+      int best_count = -1;
+      int i, j;
+
+      for (i = 0; i < n_labels; i++)
+	{
+	  int count = 1;
+
+	  for (j = i + 1; j < n_labels; j++)
+	    if (XEXP (XVECEXP (jump_table, 1, i), 0)
+		== XEXP (XVECEXP (jump_table, 1, j), 0))
+	      count++;
+
+	  if (count > best_count)
+	    best_count = count, best_label = XVECEXP (jump_table, 1, i);
+	}
+    }
+
+  return best_label ? best_label : const0_rtx;
+}
+
+/* Return the TLS model to use for SYMBOL.  */
+
+static enum tls_model
+tls_symbolic_operand_type (rtx symbol)
+{
+  enum tls_model model;
+
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return TLS_MODEL_NONE;
+  model = SYMBOL_REF_TLS_MODEL (symbol);
+
+  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
+  if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
+    model = TLS_MODEL_INITIAL_EXEC;
+
+  return model;
+}
+
+/* Return true if the function DECL will share the same GP as any
+   function in the current unit of translation.  */
+
+static bool
+decl_has_samegp (const_tree decl)
+{
+  /* Functions that are not local can be overridden, and thus may
+     not share the same gp.  */
+  if (!(*targetm.binds_local_p) (decl))
+    return false;
+
+  /* If -msmall-data is in effect, assume that there is only one GP
+     for the module, and so any local symbol has this property.  We
+     need explicit relocations to be able to enforce this for symbols
+     not defined in this unit of translation, however.  */
+  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
+    return true;
+
+  /* Functions that are not external are defined in this UoT.  */
+  /* ??? Irritatingly, static functions not yet emitted are still
+     marked "external".  Apply this to non-static functions only.  */
+  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
+}
+
+/* Return true if EXP should be placed in the small data section.  */
+
+static bool
+alpha_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  Duh.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+      if (strcmp (section, ".sdata") == 0
+	  || strcmp (section, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= g_switch_value)
+	return true;
+    }
+
+  return false;
+}
+
+#if TARGET_ABI_OPEN_VMS
+static bool
+vms_valid_pointer_mode (enum machine_mode mode)
+{
+  return (mode == SImode || mode == DImode);
+}
+
+static bool
+alpha_linkage_symbol_p (const char *symname)
+{
+  int symlen = strlen (symname);
+
+  if (symlen > 4)
+    return strcmp (&symname [symlen - 4], "..lk") == 0;
+
+  return false;
+}
+
+#define LINKAGE_SYMBOL_REF_P(X) \
+  ((GET_CODE (X) == SYMBOL_REF   \
+    && alpha_linkage_symbol_p (XSTR (X, 0))) \
+   || (GET_CODE (X) == CONST                 \
+       && GET_CODE (XEXP (X, 0)) == PLUS     \
+       && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
+       && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
+#endif
+
+/* legitimate_address_p recognizes an RTL expression that is a valid
+   memory address for an instruction.  The MODE argument is the
+   machine mode for the MEM expression that wants to use this address.
+
+   For Alpha, we have either a constant address or the sum of a
+   register and a constant address, or just a register.  For DImode,
+   any of those forms can be surrounded with an AND that clear the
+   low-order three bits; this is an "unaligned" access.  */
+
+static bool
+alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  /* If this is an ldq_u type address, discard the outer AND.  */
+  if (mode == DImode
+      && GET_CODE (x) == AND
+      && CONST_INT_P (XEXP (x, 1))
+      && INTVAL (XEXP (x, 1)) == -8)
+    x = XEXP (x, 0);
+
+  /* Discard non-paradoxical subregs.  */
+  if (GET_CODE (x) == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (x))
+	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+    x = SUBREG_REG (x);
+
+  /* Unadorned general registers are valid.  */
+  if (REG_P (x)
+      && (strict
+	  ? STRICT_REG_OK_FOR_BASE_P (x)
+	  : NONSTRICT_REG_OK_FOR_BASE_P (x)))
+    return true;
+
+  /* Constant addresses (i.e. +/- 32k) are valid.  */
+  if (CONSTANT_ADDRESS_P (x))
+    return true;
+
+#if TARGET_ABI_OPEN_VMS
+  if (LINKAGE_SYMBOL_REF_P (x))
+    return true;
+#endif
+
+  /* Register plus a small constant offset is valid.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx ofs = XEXP (x, 1);
+      x = XEXP (x, 0);
+
+      /* Discard non-paradoxical subregs.  */
+      if (GET_CODE (x) == SUBREG
+          && (GET_MODE_SIZE (GET_MODE (x))
+	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+	x = SUBREG_REG (x);
+
+      if (REG_P (x))
+	{
+	  if (! strict
+	      && NONSTRICT_REG_OK_FP_BASE_P (x)
+	      && CONST_INT_P (ofs))
+	    return true;
+	  if ((strict
+	       ? STRICT_REG_OK_FOR_BASE_P (x)
+	       : NONSTRICT_REG_OK_FOR_BASE_P (x))
+	      && CONSTANT_ADDRESS_P (ofs))
+	    return true;
+	}
+    }
+
+  /* If we're managing explicit relocations, LO_SUM is valid, as are small
+     data symbols.  Avoid explicit relocations of modes larger than word
+     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
+  else if (TARGET_EXPLICIT_RELOCS
+	   && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+    {
+      if (small_symbolic_operand (x, Pmode))
+	return true;
+
+      if (GET_CODE (x) == LO_SUM)
+	{
+	  rtx ofs = XEXP (x, 1);
+	  x = XEXP (x, 0);
+
+	  /* Discard non-paradoxical subregs.  */
+	  if (GET_CODE (x) == SUBREG
+	      && (GET_MODE_SIZE (GET_MODE (x))
+		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+	    x = SUBREG_REG (x);
+
+	  /* Must have a valid base register.  */
+	  if (! (REG_P (x)
+		 && (strict
+		     ? STRICT_REG_OK_FOR_BASE_P (x)
+		     : NONSTRICT_REG_OK_FOR_BASE_P (x))))
+	    return false;
+
+	  /* The symbol must be local.  */
+	  if (local_symbolic_operand (ofs, Pmode)
+	      || dtp32_symbolic_operand (ofs, Pmode)
+	      || tp32_symbolic_operand (ofs, Pmode))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.  */
+
+static rtx
+alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
+{
+  HOST_WIDE_INT addend;
+
+  /* If the address is (plus reg const_int) and the CONST_INT is not a
+     valid offset, compute the high part of the constant and add it to
+     the register.  Then our address is (plus temp low-part-const).  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1))
+      && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
+    {
+      addend = INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+      goto split_addend;
+    }
+
+  /* If the address is (const (plus FOO const_int)), find the low-order
+     part of the CONST_INT.  Then load FOO plus any high-order part of the
+     CONST_INT into a register.  Our address is (plus reg low-part-const).
+     This is done to reduce the number of GOT entries.  */
+  if (can_create_pseudo_p ()
+      && GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      addend = INTVAL (XEXP (XEXP (x, 0), 1));
+      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
+      goto split_addend;
+    }
+
+  /* If we have a (plus reg const), emit the load as in (2), then add
+     the two registers, and finally generate (plus reg low-part-const) as
+     our address.  */
+  if (can_create_pseudo_p ()
+      && GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
+    {
+      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
+      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
+			       XEXP (XEXP (XEXP (x, 1), 0), 0),
+			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      goto split_addend;
+    }
+
+  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
+     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
+     around +/- 32k offset.  */
+  if (TARGET_EXPLICIT_RELOCS
+      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+      && symbolic_operand (x, Pmode))
+    {
+      rtx r0, r16, eqv, tga, tp, insn, dest, seq;
+
+      switch (tls_symbolic_operand_type (x))
+	{
+	case TLS_MODEL_NONE:
+	  break;
+
+	case TLS_MODEL_GLOBAL_DYNAMIC:
+	  start_sequence ();
+
+	  r0 = gen_rtx_REG (Pmode, 0);
+	  r16 = gen_rtx_REG (Pmode, 16);
+	  tga = get_tls_get_addr ();
+	  dest = gen_reg_rtx (Pmode);
+	  seq = GEN_INT (alpha_next_sequence_number++);
+
+	  emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
+	  insn = gen_call_value_osf_tlsgd (r0, tga, seq);
+	  insn = emit_call_insn (insn);
+	  RTL_CONST_CALL_P (insn) = 1;
+	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
+
+          insn = get_insns ();
+	  end_sequence ();
+
+	  emit_libcall_block (insn, dest, r0, x);
+	  return dest;
+
+	case TLS_MODEL_LOCAL_DYNAMIC:
+	  start_sequence ();
+
+	  r0 = gen_rtx_REG (Pmode, 0);
+	  r16 = gen_rtx_REG (Pmode, 16);
+	  tga = get_tls_get_addr ();
+	  scratch = gen_reg_rtx (Pmode);
+	  seq = GEN_INT (alpha_next_sequence_number++);
+
+	  emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
+	  insn = gen_call_value_osf_tlsldm (r0, tga, seq);
+	  insn = emit_call_insn (insn);
+	  RTL_CONST_CALL_P (insn) = 1;
+	  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
+
+          insn = get_insns ();
+	  end_sequence ();
+
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				UNSPEC_TLSLDM_CALL);
+	  emit_libcall_block (insn, scratch, r0, eqv);
+
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+
+	  if (alpha_tls_size == 64)
+	    {
+	      dest = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
+	      emit_insn (gen_adddi3 (dest, dest, scratch));
+	      return dest;
+	    }
+	  if (alpha_tls_size == 32)
+	    {
+	      insn = gen_rtx_HIGH (Pmode, eqv);
+	      insn = gen_rtx_PLUS (Pmode, scratch, insn);
+	      scratch = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
+	    }
+	  return gen_rtx_LO_SUM (Pmode, scratch, eqv);
+
+	case TLS_MODEL_INITIAL_EXEC:
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+	  tp = gen_reg_rtx (Pmode);
+	  scratch = gen_reg_rtx (Pmode);
+	  dest = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_load_tp (tp));
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
+	  emit_insn (gen_adddi3 (dest, tp, scratch));
+	  return dest;
+
+	case TLS_MODEL_LOCAL_EXEC:
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+	  tp = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_load_tp (tp));
+	  if (alpha_tls_size == 32)
+	    {
+	      insn = gen_rtx_HIGH (Pmode, eqv);
+	      insn = gen_rtx_PLUS (Pmode, tp, insn);
+	      tp = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
+	    }
+	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (local_symbolic_operand (x, Pmode))
+	{
+	  if (small_symbolic_operand (x, Pmode))
+	    return x;
+	  else
+	    {
+	      if (can_create_pseudo_p ())
+	        scratch = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch,
+				      gen_rtx_HIGH (Pmode, x)));
+	      return gen_rtx_LO_SUM (Pmode, scratch, x);
+	    }
+	}
+    }
+
+  return NULL;
+
+ split_addend:
+  {
+    HOST_WIDE_INT low, high;
+
+    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
+    addend -= low;
+    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
+    addend -= high;
+
+    if (addend)
+      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
+			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
+			       1, OPTAB_LIB_WIDEN);
+    if (high)
+      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
+			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
+			       1, OPTAB_LIB_WIDEN);
+
+    return plus_constant (x, low);
+  }
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  Return X or the new, valid address.  */
+
+static rtx
+alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode)
+{
+  rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
+  return new_x ? new_x : x;
+}
+
+/* Primarily this is required for TLS symbols, but given that our move
+   patterns *ought* to be able to handle any symbol at any time, we
+   should never be spilling symbolic operands to the constant pool, ever.  */
+
+static bool
+alpha_cannot_force_const_mem (rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
+}
+
+/* We do not allow indirect calls to be optimized into sibling calls, nor
+   can we allow a call to a function with a different GP to be optimized
+   into a sibcall.  */
+
+static bool
+alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Can't do indirect tail calls, since we don't know if the target
+     uses the same GP.  */
+  if (!decl)
+    return false;
+
+  /* Otherwise, we can make a tail call if the target function shares
+     the same GP.  */
+  return decl_has_samegp (decl);
+}
+
+int
+some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  /* Don't re-split.  */
+  if (GET_CODE (x) == LO_SUM)
+    return -1;
+
+  return small_symbolic_operand (x, Pmode) != 0;
+}
+
+static int
+split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  /* Don't re-split.  */
+  if (GET_CODE (x) == LO_SUM)
+    return -1;
+
+  if (small_symbolic_operand (x, Pmode))
+    {
+      x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
+      *px = x;
+      return -1;
+    }
+
+  return 0;
+}
+
+rtx
+split_small_symbolic_operand (rtx x)
+{
+  x = copy_insn (x);
+  for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
+  return x;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is true for any insn
+   that we've marked with gpdisp relocs, since those have to stay in
+   1-1 correspondence with one another.
+
+   Technically we could copy them if we could set up a mapping from one
+   sequence number to another, across the set of insns to be duplicated.
+   This seems overly complicated and error-prone since interblock motion
+   from sched-ebb could move one of the pair of insns to a different block.
+
+   Also cannot allow jsr insns to be duplicated.  If they throw exceptions,
+   then they'll be in a different block from their ldgp.  Which could lead
+   the bb reorder code to think that it would be ok to copy just the block
+   containing the call and branch to the block containing the ldgp.  */
+
+static bool
+alpha_cannot_copy_insn_p (rtx insn)
+{
+  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
+    return false;
+  if (recog_memoized (insn) >= 0)
+    return get_attr_cannot_copy (insn);
+  else
+    return false;
+}
+
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and return the new rtx.  */
+
+rtx
+alpha_legitimize_reload_address (rtx x,
+				 enum machine_mode mode ATTRIBUTE_UNUSED,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  /* We wish to handle large displacements off a base register by
+     splitting the addend across an ldah and the mem insn.  This
+     cuts number of extra insns needed from 3 to 1.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
+      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT high
+	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+      /* Check for 32-bit overflow.  */
+      if (high + low != val)
+	return NULL_RTX;
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem directly.  */
+      x = gen_rtx_PLUS (GET_MODE (x),
+			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
+				      GEN_INT (high)),
+			GEN_INT (low));
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return x;
+    }
+
+  return NULL_RTX;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+alpha_rtx_costs (rtx x, int code, int outer_code, int *total,
+		 bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+  const struct alpha_rtx_cost_data *cost_data;
+
+  if (!speed)
+    cost_data = &alpha_rtx_cost_size;
+  else
+    cost_data = &alpha_rtx_cost_data[alpha_tune];
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* If this is an 8-bit constant, return zero since it can be used
+	 nearly anywhere with no cost.  If it is a valid operand for an
+	 ADD or AND, likewise return 0 if we know it will be used in that
+	 context.  Otherwise, return 2 since it might be used there later.
+	 All other constants take at least two insns.  */
+      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST_DOUBLE:
+      if (x == CONST0_RTX (mode))
+	*total = 0;
+      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
+	       || (outer_code == AND && and_operand (x, VOIDmode)))
+	*total = 0;
+      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
+	*total = 2;
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
+	*total = COSTS_N_INSNS (outer_code != MEM);
+      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
+	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
+      else if (tls_symbolic_operand_type (x))
+	/* Estimate of cost for call_pal rduniq.  */
+	/* ??? How many insns do we emit here?  More than one...  */
+	*total = COSTS_N_INSNS (15);
+      else
+	/* Otherwise we do a load from the GOT.  */
+	*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
+      return true;
+
+    case HIGH:
+      /* This is effectively an add_operand.  */
+      *total = 2;
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (float_mode_p)
+	*total = cost_data->fp_add;
+      else if (GET_CODE (XEXP (x, 0)) == MULT
+	       && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
+			      (enum rtx_code) outer_code, speed)
+		    + rtx_cost (XEXP (x, 1),
+				(enum rtx_code) outer_code, speed)
+		    + COSTS_N_INSNS (1));
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = cost_data->fp_mult;
+      else if (mode == DImode)
+	*total = cost_data->int_mult_di;
+      else
+	*total = cost_data->int_mult_si;
+      return false;
+
+    case ASHIFT:
+      if (CONST_INT_P (XEXP (x, 1))
+	  && INTVAL (XEXP (x, 1)) <= 3)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = cost_data->int_shift;
+      return false;
+
+    case IF_THEN_ELSE:
+      if (float_mode_p)
+        *total = cost_data->fp_add;
+      else
+        *total = cost_data->int_cmov;
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (!float_mode_p)
+	*total = cost_data->int_div;
+      else if (mode == SFmode)
+        *total = cost_data->fp_div_sf;
+      else
+        *total = cost_data->fp_div_df;
+      return false;
+
+    case MEM:
+      *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
+      return true;
+
+    case NEG:
+      if (! float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ABS:
+      if (! float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+    case FLOAT_TRUNCATE:
+      *total = cost_data->fp_add;
+      return false;
+
+    case FLOAT_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	*total = 0;
+      else
+	*total = cost_data->fp_add;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* REF is an alignable memory location.  Place an aligned SImode
+   reference into *PALIGNED_MEM and the number of bits to shift into
+   *PBITNUM.  SCRATCH is a free register for use in reloading out
+   of range stack slots.  */
+
+void
+get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
+{
+  rtx base;
+  HOST_WIDE_INT disp, offset;
+
+  gcc_assert (MEM_P (ref));
+
+  if (reload_in_progress
+      && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
+    {
+      base = find_replacement (&XEXP (ref, 0));
+      gcc_assert (memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    base = XEXP (ref, 0);
+
+  if (GET_CODE (base) == PLUS)
+    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
+  else
+    disp = 0;
+
+  /* Find the byte offset within an aligned word.  If the memory itself is
+     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
+     will have examined the base register and determined it is aligned, and
+     thus displacements from it are naturally alignable.  */
+  if (MEM_ALIGN (ref) >= 32)
+    offset = 0;
+  else
+    offset = disp & 3;
+
+  /* The location should not cross aligned word boundary.  */
+  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
+	      <= GET_MODE_SIZE (SImode));
+
+  /* Access the entire aligned word.  */
+  *paligned_mem = widen_memory_access (ref, SImode, -offset);
+
+  /* Convert the byte offset within the word to a bit offset.  */
+  if (WORDS_BIG_ENDIAN)
+    offset = 32 - (GET_MODE_BITSIZE (GET_MODE (ref)) + offset * 8);
+  else
+    offset *= 8;
+  *pbitnum = GEN_INT (offset);
+}
+
+/* Similar, but just get the address.  Handle the two reload cases.
+   Add EXTRA_OFFSET to the address we return.  */
+
+rtx
+get_unaligned_address (rtx ref)
+{
+  rtx base;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (MEM_P (ref));
+
+  if (reload_in_progress
+      && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
+    {
+      base = find_replacement (&XEXP (ref, 0));
+
+      gcc_assert (memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    base = XEXP (ref, 0);
+
+  if (GET_CODE (base) == PLUS)
+    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
+
+  return plus_constant (base, offset);
+}
+
+/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
+   X is always returned in a register.  */
+
+rtx
+get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
+{
+  if (GET_CODE (addr) == PLUS)
+    {
+      ofs += INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
+			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
+}
+
+/* On the Alpha, all (non-symbolic) constants except zero go into
+   a floating-point register via memory.  Note that we cannot
+   return anything that is not a subset of RCLASS, and that some
+   symbolic constants cannot be dropped to memory.  */
+
+enum reg_class
+alpha_preferred_reload_class(rtx x, enum reg_class rclass)
+{
+  /* Zero is present in any register class.  */
+  if (x == CONST0_RTX (GET_MODE (x)))
+    return rclass;
+
+  /* These sorts of constants we can easily drop to memory.  */
+  if (CONST_INT_P (x)
+      || GET_CODE (x) == CONST_DOUBLE
+      || GET_CODE (x) == CONST_VECTOR)
+    {
+      if (rclass == FLOAT_REGS)
+	return NO_REGS;
+      if (rclass == ALL_REGS)
+	return GENERAL_REGS;
+      return rclass;
+    }
+
+  /* All other kinds of constants should not (and in the case of HIGH
+     cannot) be dropped to memory -- instead we use a GENERAL_REGS
+     secondary reload.  */
+  if (CONSTANT_P (x))
+    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
+
+  return rclass;
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.  */
+
+static reg_class_t
+alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+			enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Loading and storing HImode or QImode values to and from memory
+     usually requires a scratch register.  */
+  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
+    {
+      if (any_memory_operand (x, mode))
+	{
+	  if (in_p)
+	    {
+	      if (!aligned_memory_operand (x, mode))
+		sri->icode = direct_optab_handler (reload_in_optab, mode);
+	    }
+	  else
+	    sri->icode = direct_optab_handler (reload_out_optab, mode);
+	  return NO_REGS;
+	}
+    }
+
+  /* We also cannot do integral arithmetic into FP regs, as might result
+     from register elimination into a DImode fp register.  */
+  if (rclass == FLOAT_REGS)
+    {
+      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
+	return GENERAL_REGS;
+      if (in_p && INTEGRAL_MODE_P (mode)
+	  && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
+	return GENERAL_REGS;
+    }
+
+  return NO_REGS;
+}
+
+/* Subfunction of the following function.  Update the flags of any MEM
+   found in part of X.  */
+
+static int
+alpha_set_memflags_1 (rtx *xp, void *data)
+{
+  rtx x = *xp, orig = (rtx) data;
+
+  if (!MEM_P (x))
+    return 0;
+
+  MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
+  MEM_IN_STRUCT_P (x) = MEM_IN_STRUCT_P (orig);
+  MEM_SCALAR_P (x) = MEM_SCALAR_P (orig);
+  MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
+  MEM_READONLY_P (x) = MEM_READONLY_P (orig);
+
+  /* Sadly, we cannot use alias sets because the extra aliasing
+     produced by the AND interferes.  Given that two-byte quantities
+     are the only thing we would be able to differentiate anyway,
+     there does not seem to be any point in convoluting the early
+     out of the alias check.  */
+
+  return -1;
+}
+
+/* Given SEQ, which is an INSN list, look for any MEMs in either
+   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
+   volatile flags from REF into each of the MEMs found.  If REF is not
+   a MEM, don't do anything.  */
+
+void
+alpha_set_memflags (rtx seq, rtx ref)
+{
+  rtx insn;
+
+  if (!MEM_P (ref))
+    return;
+
+  /* This is only called from alpha.md, after having had something
+     generated from one of the insn patterns.  So if everything is
+     zero, the pattern is already up-to-date.  */
+  if (!MEM_VOLATILE_P (ref)
+      && !MEM_IN_STRUCT_P (ref)
+      && !MEM_SCALAR_P (ref)
+      && !MEM_NOTRAP_P (ref)
+      && !MEM_READONLY_P (ref))
+    return;
+
+  for (insn = seq; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
+    else
+      gcc_unreachable ();
+}
+
+static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
+				 int, bool);
+
+/* Internal routine for alpha_emit_set_const to check for N or below insns.
+   If NO_OUTPUT is true, then we only check to see if N insns are possible,
+   and return pc_rtx if successful.  */
+
+static rtx
+alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
+			HOST_WIDE_INT c, int n, bool no_output)
+{
+  HOST_WIDE_INT new_const;
+  int i, bits;
+  /* Use a pseudo if highly optimizing and still generating RTL.  */
+  rtx subtarget
+    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
+  rtx temp, insn;
+
+  /* If this is a sign-extended 32-bit constant, we can do this in at most
+     three insns, so do it if we have enough insns left.  We always have
+     a sign-extended 32-bit constant when compiling on a narrow machine.  */
+
+  if (HOST_BITS_PER_WIDE_INT != 64
+      || c >> 31 == -1 || c >> 31 == 0)
+    {
+      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT tmp1 = c - low;
+      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT extra = 0;
+
+      /* If HIGH will be interpreted as negative but the constant is
+	 positive, we must adjust it to do two ldha insns.  */
+
+      if ((high & 0x8000) != 0 && c >= 0)
+	{
+	  extra = 0x4000;
+	  tmp1 -= 0x40000000;
+	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
+	}
+
+      if (c == low || (low == 0 && extra == 0))
+	{
+	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
+	     but that meant that we can't handle INT_MIN on 32-bit machines
+	     (like NT/Alpha), because we recurse indefinitely through
+	     emit_move_insn to gen_movdi.  So instead, since we know exactly
+	     what we want, create it explicitly.  */
+
+	  if (no_output)
+	    return pc_rtx;
+	  if (target == NULL)
+	    target = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
+	  return target;
+	}
+      else if (n >= 2 + (extra != 0))
+	{
+	  if (no_output)
+	    return pc_rtx;
+	  if (!can_create_pseudo_p ())
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
+	      temp = target;
+	    }
+	  else
+	    temp = copy_to_suggested_reg (GEN_INT (high << 16),
+					  subtarget, mode);
+
+	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
+	     This means that if we go through expand_binop, we'll try to
+	     generate extensions, etc, which will require new pseudos, which
+	     will fail during some split phases.  The SImode add patterns
+	     still exist, but are not named.  So build the insns by hand.  */
+
+	  if (extra != 0)
+	    {
+	      if (! subtarget)
+		subtarget = gen_reg_rtx (mode);
+	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
+	      insn = gen_rtx_SET (VOIDmode, subtarget, insn);
+	      emit_insn (insn);
+	      temp = subtarget;
+	    }
+
+	  if (target == NULL)
+	    target = gen_reg_rtx (mode);
+	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
+	  insn = gen_rtx_SET (VOIDmode, target, insn);
+	  emit_insn (insn);
+	  return target;
+	}
+    }
+
+  /* If we couldn't do it that way, try some other methods.  But if we have
+     no instructions left, don't bother.  Likewise, if this is SImode and
+     we can't make pseudos, we can't do anything since the expand_binop
+     and expand_unop calls will widen and try to make pseudos.  */
+
+  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
+    return 0;
+
+  /* Next, see if we can load a related constant and then shift and possibly
+     negate it to get the constant we want.  Try this once each increasing
+     numbers of insns.  */
+
+  for (i = 1; i < n; i++)
+    {
+      /* First, see if minus some low bits, we've an easy load of
+	 high bits.  */
+
+      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
+      if (new_const != 0)
+	{
+          temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
+	  if (temp)
+	    {
+	      if (no_output)
+		return temp;
+	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
+				   target, 0, OPTAB_WIDEN);
+	    }
+	}
+
+      /* Next try complementing.  */
+      temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
+      if (temp)
+	{
+	  if (no_output)
+	    return temp;
+	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
+	}
+
+      /* Next try to form a constant and do a left shift.  We can do this
+	 if some low-order bits are zero; the exact_log2 call below tells
+	 us that information.  The bits we are shifting out could be any
+	 value, but here we'll just try the 0- and sign-extended forms of
+	 the constant.  To try to increase the chance of having the same
+	 constant in more than one insn, start at the highest number of
+	 bits to shift, but try all possibilities in case a ZAPNOT will
+	 be useful.  */
+
+      bits = exact_log2 (c & -c);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c >> bits;
+	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp && c < 0)
+	      {
+		new_const = (unsigned HOST_WIDE_INT)c >> bits;
+		temp = alpha_emit_set_const (subtarget, mode, new_const,
+					     i, no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+	        return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
+				     target, 0, OPTAB_WIDEN);
+	      }
+	  }
+
+      /* Now try high-order zero bits.  Here we try the shifted-in bits as
+	 all zero and all ones.  Be careful to avoid shifting outside the
+	 mode and to avoid shifting outside the host wide int size.  */
+      /* On narrow hosts, don't shift a 1 into the high bit, since we'll
+	 confuse the recursive call and set all of the high 32 bits.  */
+
+      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
+	      - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c << bits;
+	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp)
+	      {
+		new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
+	        temp = alpha_emit_set_const (subtarget, mode, new_const,
+					     i, no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
+				     target, 1, OPTAB_WIDEN);
+	      }
+	  }
+
+      /* Now try high-order 1 bits.  We get that with a sign-extension.
+	 But one bit isn't enough here.  Be careful to avoid shifting outside
+	 the mode and to avoid shifting outside the host wide int size.  */
+
+      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
+	      - floor_log2 (~ c) - 2);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c << bits;
+	    temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp)
+	      {
+		new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
+	        temp = alpha_emit_set_const (subtarget, mode, new_const,
+					     i, no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
+				     target, 0, OPTAB_WIDEN);
+	      }
+	  }
+    }
+
+#if HOST_BITS_PER_WIDE_INT == 64
+  /* Finally, see if can load a value into the target that is the same as the
+     constant except that all bytes that are 0 are changed to be 0xff.  If we
+     can, then we can do a ZAPNOT to obtain the desired constant.  */
+
+  new_const = c;
+  for (i = 0; i < 64; i += 8)
+    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
+      new_const |= (HOST_WIDE_INT) 0xff << i;
+
+  /* We are only called for SImode and DImode.  If this is SImode, ensure that
+     we are sign extended to a full word.  */
+
+  if (mode == SImode)
+    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+  if (new_const != c)
+    {
+      temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
+      if (temp)
+	{
+	  if (no_output)
+	    return temp;
+	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
+			       target, 0, OPTAB_WIDEN);
+	}
+    }
+#endif
+
+  return 0;
+}
+
+/* Try to output insns to set TARGET equal to the constant C if it can be
+   done in less than N insns.  Do all computations in MODE.  Returns the place
+   where the output has been placed if it can be done and the insns have been
+   emitted.  If it would take more than N insns, zero is returned and no
+   insns and emitted.  */
+
+static rtx
+alpha_emit_set_const (rtx target, enum machine_mode mode,
+		      HOST_WIDE_INT c, int n, bool no_output)
+{
+  enum machine_mode orig_mode = mode;
+  rtx orig_target = target;
+  rtx result = 0;
+  int i;
+
+  /* If we can't make any pseudos, TARGET is an SImode hard register, we
+     can't load this constant in one insn, do this in DImode.  */
+  if (!can_create_pseudo_p () && mode == SImode
+      && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
+    {
+      result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
+      if (result)
+	return result;
+
+      target = no_output ? NULL : gen_lowpart (DImode, target);
+      mode = DImode;
+    }
+  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
+    {
+      target = no_output ? NULL : gen_lowpart (DImode, target);
+      mode = DImode;
+    }
+
+  /* Try 1 insn, then 2, then up to N.  */
+  for (i = 1; i <= n; i++)
+    {
+      result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
+      if (result)
+	{
+	  rtx insn, set;
+
+	  if (no_output)
+	    return result;
+
+	  insn = get_last_insn ();
+	  set = single_set (insn);
+	  if (! CONSTANT_P (SET_SRC (set)))
+	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
+	  break;
+	}
+    }
+
+  /* Allow for the case where we changed the mode of TARGET.  */
+  if (result)
+    {
+      if (result == target)
+	result = orig_target;
+      else if (mode != orig_mode)
+	result = gen_lowpart (orig_mode, result);
+    }
+
+  return result;
+}
+
+/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
+   fall back to a straight forward decomposition.  We do this to avoid
+   exponential run times encountered when looking for longer sequences
+   with alpha_emit_set_const.  */
+
+static rtx
+alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
+{
+  HOST_WIDE_INT d1, d2, d3, d4;
+
+  /* Decompose the entire word */
+#if HOST_BITS_PER_WIDE_INT >= 64
+  gcc_assert (c2 == -(c1 < 0));
+  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d1;
+  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  c1 = (c1 - d2) >> 32;
+  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d3;
+  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c1 == d4);
+#else
+  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d1;
+  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c1 == d2);
+  c2 += (d2 < 0);
+  d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
+  c2 -= d3;
+  d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c2 == d4);
+#endif
+
+  /* Construct the high word */
+  if (d4)
+    {
+      emit_move_insn (target, GEN_INT (d4));
+      if (d3)
+	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
+    }
+  else
+    emit_move_insn (target, GEN_INT (d3));
+
+  /* Shift it into place */
+  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
+
+  /* Add in the low bits.  */
+  if (d2)
+    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
+  if (d1)
+    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
+
+  return target;
+}
+
+/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return 
+   the low 64 bits.  */
+
+static void
+alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
+{
+  HOST_WIDE_INT i0, i1;
+
+  if (GET_CODE (x) == CONST_VECTOR)
+    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+
+
+  if (CONST_INT_P (x))
+    {
+      i0 = INTVAL (x);
+      i1 = -(i0 < 0);
+    }
+  else if (HOST_BITS_PER_WIDE_INT >= 64)
+    {
+      i0 = CONST_DOUBLE_LOW (x);
+      i1 = -(i0 < 0);
+    }
+  else
+    {
+      i0 = CONST_DOUBLE_LOW (x);
+      i1 = CONST_DOUBLE_HIGH (x);
+    }
+
+  *p0 = i0;
+  *p1 = i1;
+}
+
+/* Implement LEGITIMATE_CONSTANT_P.  This is all constants for which we
+   are willing to load the value into a register via a move pattern.
+   Normally this is all symbolic constants, integral constants that
+   take three or fewer instructions, and floating-point zero.  */
+
+bool
+alpha_legitimate_constant_p (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+  HOST_WIDE_INT i0, i1;
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+    case HIGH:
+      return true;
+
+    case CONST:
+      if (GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	return true;
+
+      if (GET_CODE (x) != SYMBOL_REF)
+	return true;
+
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* TLS symbols are never valid.  */
+      return SYMBOL_REF_TLS_MODEL (x) == 0;
+
+    case CONST_DOUBLE:
+      if (x == CONST0_RTX (mode))
+	return true;
+      if (FLOAT_MODE_P (mode))
+	return false;
+      goto do_integer;
+
+    case CONST_VECTOR:
+      if (x == CONST0_RTX (mode))
+	return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+	return false;
+      if (GET_MODE_SIZE (mode) != 8)
+	return false;
+      goto do_integer;
+
+    case CONST_INT:
+    do_integer:
+      if (TARGET_BUILD_CONSTANTS)
+	return true;
+      alpha_extract_integer (x, &i0, &i1);
+      if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
+        return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Operand 1 is known to be a constant, and should require more than one
+   instruction to load.  Emit that multi-part load.  */
+
+bool
+alpha_split_const_mov (enum machine_mode mode, rtx *operands)
+{
+  HOST_WIDE_INT i0, i1;
+  rtx temp = NULL_RTX;
+
+  alpha_extract_integer (operands[1], &i0, &i1);
+
+  if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
+    temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
+
+  if (!temp && TARGET_BUILD_CONSTANTS)
+    temp = alpha_emit_set_long_const (operands[0], i0, i1);
+
+  if (temp)
+    {
+      if (!rtx_equal_p (operands[0], temp))
+	emit_move_insn (operands[0], temp);
+      return true;
+    }
+
+  return false;
+}
+
+/* Expand a move instruction; return true if all work is done.
+   We don't handle non-bwx subword loads here.  */
+
+bool
+alpha_expand_mov (enum machine_mode mode, rtx *operands)
+{
+  rtx tmp;
+
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Allow legitimize_address to perform some simplifications.  */
+  if (mode == Pmode && symbolic_operand (operands[1], mode))
+    {
+      tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
+      if (tmp)
+	{
+	  if (tmp == operands[0])
+	    return true;
+	  operands[1] = tmp;
+	  return false;
+	}
+    }
+
+  /* Early out for non-constants and valid constants.  */
+  if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
+    return false;
+
+  /* Split large integers.  */
+  if (CONST_INT_P (operands[1])
+      || GET_CODE (operands[1]) == CONST_DOUBLE
+      || GET_CODE (operands[1]) == CONST_VECTOR)
+    {
+      if (alpha_split_const_mov (mode, operands))
+	return true;
+    }
+
+  /* Otherwise we've nothing left but to drop the thing to memory.  */
+  tmp = force_const_mem (mode, operands[1]);
+
+  if (tmp == NULL_RTX)
+    return false;
+
+  if (reload_in_progress)
+    {
+      emit_move_insn (operands[0], XEXP (tmp, 0));
+      operands[1] = replace_equiv_address (tmp, operands[0]);
+    }
+  else
+    operands[1] = validize_mem (tmp);
+  return false;
+}
+
+/* Expand a non-bwx QImode or HImode move instruction;
+   return true if all work is done.  */
+
+bool
+alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
+{
+  rtx seq;
+
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Handle four memory cases, unaligned and aligned for either the input
+     or the output.  The only case where we can be called during reload is
+     for aligned loads; all other cases require temporaries.  */
+
+  if (any_memory_operand (operands[1], mode))
+    {
+      if (aligned_memory_operand (operands[1], mode))
+	{
+	  if (reload_in_progress)
+	    {
+	      if (mode == QImode)
+		seq = gen_reload_inqi_aligned (operands[0], operands[1]);
+	      else
+		seq = gen_reload_inhi_aligned (operands[0], operands[1]);
+	      emit_insn (seq);
+	    }
+	  else
+	    {
+	      rtx aligned_mem, bitnum;
+	      rtx scratch = gen_reg_rtx (SImode);
+	      rtx subtarget;
+	      bool copyout;
+
+	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+
+	      subtarget = operands[0];
+	      if (REG_P (subtarget))
+		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
+	      else
+		subtarget = gen_reg_rtx (DImode), copyout = true;
+
+	      if (mode == QImode)
+		seq = gen_aligned_loadqi (subtarget, aligned_mem,
+					  bitnum, scratch);
+	      else
+		seq = gen_aligned_loadhi (subtarget, aligned_mem,
+					  bitnum, scratch);
+	      emit_insn (seq);
+
+	      if (copyout)
+		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
+	    }
+	}
+      else
+	{
+	  /* Don't pass these as parameters since that makes the generated
+	     code depend on parameter evaluation order which will cause
+	     bootstrap failures.  */
+
+	  rtx temp1, temp2, subtarget, ua;
+	  bool copyout;
+
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+
+	  subtarget = operands[0];
+	  if (REG_P (subtarget))
+	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
+	  else
+	    subtarget = gen_reg_rtx (DImode), copyout = true;
+
+	  ua = get_unaligned_address (operands[1]);
+	  if (mode == QImode)
+	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
+	  else
+	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
+
+	  alpha_set_memflags (seq, operands[1]);
+	  emit_insn (seq);
+
+	  if (copyout)
+	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
+	}
+      return true;
+    }
+
+  if (any_memory_operand (operands[0], mode))
+    {
+      if (aligned_memory_operand (operands[0], mode))
+	{
+	  rtx aligned_mem, bitnum;
+	  rtx temp1 = gen_reg_rtx (SImode);
+	  rtx temp2 = gen_reg_rtx (SImode);
+
+	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+
+	  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+					temp1, temp2));
+	}
+      else
+	{
+	  rtx temp1 = gen_reg_rtx (DImode);
+	  rtx temp2 = gen_reg_rtx (DImode);
+	  rtx temp3 = gen_reg_rtx (DImode);
+	  rtx ua = get_unaligned_address (operands[0]);
+
+	  if (mode == QImode)
+	    seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
+	  else
+	    seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
+
+	  alpha_set_memflags (seq, operands[0]);
+	  emit_insn (seq);
+	}
+      return true;
+    }
+
+  return false;
+}
+
+/* Implement the movmisalign patterns.  One of the operands is a memory
+   that is not naturally aligned.  Emit instructions to load it.  */
+
+void
+alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
+{
+  /* Honor misaligned loads, for those we promised to do so.  */
+  if (MEM_P (operands[1]))
+    {
+      rtx tmp;
+
+      if (register_operand (operands[0], mode))
+	tmp = operands[0];
+      else
+	tmp = gen_reg_rtx (mode);
+
+      alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
+      if (tmp != operands[0])
+	emit_move_insn (operands[0], tmp);
+    }
+  else if (MEM_P (operands[0]))
+    {
+      if (!reg_or_0_operand (operands[1], mode))
+	operands[1] = force_reg (mode, operands[1]);
+      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Generate an unsigned DImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.
+
+   For SFmode, this is the only construction I've found that can pass
+   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
+   intermediates will work, because you'll get intermediate rounding
+   that ruins the end result.  Some of this could be fixed by turning
+   on round-to-positive-infinity, but that requires diddling the fpsr,
+   which kills performance.  I tried turning this around and converting
+   to a negative number, so that I could turn on /m, but either I did
+   it wrong or there's something else cause I wound up with the exact
+   same single-bit error.  There is a branch-less form of this same code:
+
+	srl     $16,1,$1
+	and     $16,1,$2
+	cmplt   $16,0,$3
+	or      $1,$2,$2
+	cmovge  $16,$16,$2
+	itoft	$3,$f10
+	itoft	$2,$f11
+	cvtqs   $f11,$f11
+	adds    $f11,$f11,$f0
+	fcmoveq $f10,$f11,$f0
+
+   I'm not using it because it's the same number of instructions as
+   this branch-full form, and it has more serialized long latency
+   instructions on the critical path.
+
+   For DFmode, we can avoid rounding errors by breaking up the word
+   into two pieces, converting them separately, and adding them back:
+
+   LC0: .long 0,0x5f800000
+
+	itoft	$16,$f11
+	lda	$2,LC0
+	cmplt	$16,0,$1
+	cpyse	$f11,$f31,$f10
+	cpyse	$f31,$f11,$f11
+	s4addq	$1,$2,$1
+	lds	$f12,0($1)
+	cvtqt	$f10,$f10
+	cvtqt	$f11,$f11
+	addt	$f12,$f10,$f0
+	addt	$f0,$f11,$f0
+
+   This doesn't seem to be a clear-cut win over the optabs form.
+   It probably all depends on the distribution of numbers being
+   converted -- in the optabs form, all but high-bit-set has a
+   much lower minimum execution time.  */
+
+void
+alpha_emit_floatuns (rtx operands[2])
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+  enum machine_mode mode;
+
+  out = operands[0];
+  in = force_reg (DImode, operands[1]);
+  mode = GET_MODE (out);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
+  emit_insn (gen_anddi3 (i1, in, const1_rtx));
+  emit_insn (gen_iordi3 (i0, i0, i1));
+  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* Generate the comparison for a conditional branch.  */
+
+void
+alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
+{
+  enum rtx_code cmp_code, branch_code;
+  enum machine_mode branch_mode = VOIDmode;
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1], op1 = operands[2];
+  rtx tem;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  /* The general case: fold the comparison code to the types of compares
+     that we have, choosing the branch as necessary.  */
+  switch (code)
+    {
+    case EQ:  case LE:  case LT:  case LEU:  case LTU:
+    case UNORDERED:
+      /* We have these compares.  */
+      cmp_code = code, branch_code = NE;
+      break;
+
+    case NE:
+    case ORDERED:
+      /* These must be reversed.  */
+      cmp_code = reverse_condition (code), branch_code = EQ;
+      break;
+
+    case GE:  case GT: case GEU:  case GTU:
+      /* For FP, we swap them, for INT, we reverse them.  */
+      if (cmp_mode == DFmode)
+	{
+	  cmp_code = swap_condition (code);
+	  branch_code = NE;
+	  tem = op0, op0 = op1, op1 = tem;
+	}
+      else
+	{
+	  cmp_code = reverse_condition (code);
+	  branch_code = EQ;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DFmode)
+    {
+      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
+	{
+	  /* When we are not as concerned about non-finite values, and we
+	     are comparing against zero, we can branch directly.  */
+	  if (op1 == CONST0_RTX (DFmode))
+	    cmp_code = UNKNOWN, branch_code = code;
+	  else if (op0 == CONST0_RTX (DFmode))
+	    {
+	      /* Undo the swap we probably did just above.  */
+	      tem = op0, op0 = op1, op1 = tem;
+	      branch_code = swap_condition (cmp_code);
+	      cmp_code = UNKNOWN;
+	    }
+	}
+      else
+	{
+	  /* ??? We mark the branch mode to be CCmode to prevent the
+	     compare and branch from being combined, since the compare
+	     insn follows IEEE rules that the branch does not.  */
+	  branch_mode = CCmode;
+	}
+    }
+  else
+    {
+      /* The following optimizations are only for signed compares.  */
+      if (code != LEU && code != LTU && code != GEU && code != GTU)
+	{
+	  /* Whee.  Compare and branch against 0 directly.  */
+	  if (op1 == const0_rtx)
+	    cmp_code = UNKNOWN, branch_code = code;
+
+	  /* If the constants doesn't fit into an immediate, but can
+ 	     be generated by lda/ldah, we adjust the argument and
+ 	     compare against zero, so we can use beq/bne directly.  */
+	  /* ??? Don't do this when comparing against symbols, otherwise
+	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
+	     be declared false out of hand (at least for non-weak).  */
+	  else if (CONST_INT_P (op1)
+		   && (code == EQ || code == NE)
+		   && !(symbolic_operand (op0, VOIDmode)
+			|| (REG_P (op0) && REG_POINTER (op0))))
+	    {
+	      rtx n_op1 = GEN_INT (-INTVAL (op1));
+
+	      if (! satisfies_constraint_I (op1)
+		  && (satisfies_constraint_K (n_op1)
+		      || satisfies_constraint_L (n_op1)))
+		cmp_code = PLUS, branch_code = code, op1 = n_op1;
+	    }
+	}
+
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* Emit an initial compare instruction, if necessary.  */
+  tem = op0;
+  if (cmp_code != UNKNOWN)
+    {
+      tem = gen_reg_rtx (cmp_mode);
+      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
+    }
+
+  /* Emit the branch instruction.  */
+  tem = gen_rtx_SET (VOIDmode, pc_rtx,
+		     gen_rtx_IF_THEN_ELSE (VOIDmode,
+					   gen_rtx_fmt_ee (branch_code,
+							   branch_mode, tem,
+							   CONST0_RTX (cmp_mode)),
+					   gen_rtx_LABEL_REF (VOIDmode,
+							      operands[3]),
+					   pc_rtx));
+  emit_jump_insn (tem);
+}
+
+/* Certain simplifications can be done to make invalid setcc operations
+   valid.  Return the final comparison, or NULL if we can't work.  */
+
+bool
+alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
+{
+  enum rtx_code cmp_code;
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2], op1 = operands[3];
+  rtx tmp;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  if (cmp_mode == DFmode && !TARGET_FIX)
+    return 0;
+
+  /* The general case: fold the comparison code to the types of compares
+     that we have, choosing the branch as necessary.  */
+
+  cmp_code = UNKNOWN;
+  switch (code)
+    {
+    case EQ:  case LE:  case LT:  case LEU:  case LTU:
+    case UNORDERED:
+      /* We have these compares.  */
+      if (cmp_mode == DFmode)
+	cmp_code = code, code = NE;
+      break;
+
+    case NE:
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      /* FALLTHRU */
+
+    case ORDERED:
+      cmp_code = reverse_condition (code);
+      code = EQ;
+      break;
+
+    case GE:  case GT: case GEU:  case GTU:
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      if (cmp_mode == DFmode)
+	cmp_code = code, code = NE;
+      tmp = op0, op0 = op1, op1 = tmp;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!register_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* Emit an initial compare instruction, if necessary.  */
+  if (cmp_code != UNKNOWN)
+    {
+      tmp = gen_reg_rtx (cmp_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			      gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
+
+      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
+      op1 = const0_rtx;
+    }
+
+  /* Emit the setcc instruction.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (code, DImode, op0, op1)));
+  return true;
+}
+
+
+/* Rewrite a comparison against zero CMP of the form
+   (CODE (cc0) (const_int 0)) so it can be written validly in
+   a conditional move (if_then_else CMP ...).
+   If both of the operands that set cc0 are nonzero we must emit
+   an insn to perform the compare (it can't be done within
+   the conditional move).  */
+
+rtx
+alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (cmp);
+  enum rtx_code cmov_code = NE;
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+  enum machine_mode cmp_mode
+    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
+  enum machine_mode cmov_mode = VOIDmode;
+  int local_fast_math = flag_unsafe_math_optimizations;
+  rtx tem;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = alpha_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
+
+  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
+    {
+      enum rtx_code cmp_code;
+
+      if (! TARGET_FIX)
+	return 0;
+
+      /* If we have fp<->int register move instructions, do a cmov by
+	 performing the comparison in fp registers, and move the
+	 zero/nonzero value to integer registers, where we can then
+	 use a normal cmov, or vice-versa.  */
+
+      switch (code)
+	{
+	case EQ: case LE: case LT: case LEU: case LTU:
+	case UNORDERED:
+	  /* We have these compares.  */
+	  cmp_code = code, code = NE;
+	  break;
+
+	case NE:
+	case ORDERED:
+	  /* These must be reversed.  */
+	  cmp_code = reverse_condition (code), code = EQ;
+	  break;
+
+	case GE: case GT: case GEU: case GTU:
+	  /* These normally need swapping, but for integer zero we have
+	     special patterns that recognize swapped operands.  */
+	  if (cmp_mode == DImode && op1 == const0_rtx)
+	    cmp_code = code, code = NE;
+	  else
+	    {
+	      cmp_code = swap_condition (code);
+	      code = NE;
+	      tem = op0, op0 = op1, op1 = tem;
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (cmp_mode == DImode)
+	{
+	  if (!reg_or_0_operand (op0, DImode))
+	    op0 = force_reg (DImode, op0);
+	  if (!reg_or_8bit_operand (op1, DImode))
+	    op1 = force_reg (DImode, op1);
+	}
+
+      tem = gen_reg_rtx (cmp_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_fmt_ee (cmp_code, cmp_mode,
+					      op0, op1)));
+
+      cmp_mode = cmp_mode == DImode ? DFmode : DImode;
+      op0 = gen_lowpart (cmp_mode, tem);
+      op1 = CONST0_RTX (cmp_mode);
+      local_fast_math = 1;
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* We may be able to use a conditional move directly.
+     This avoids emitting spurious compares.  */
+  if (signed_comparison_operator (cmp, VOIDmode)
+      && (cmp_mode == DImode || local_fast_math)
+      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
+    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+
+  /* We can't put the comparison inside the conditional move;
+     emit a compare instruction and put that inside the
+     conditional move.  Make sure we emit only comparisons we have;
+     swap or reverse as necessary.  */
+
+  if (!can_create_pseudo_p ())
+    return NULL_RTX;
+
+  switch (code)
+    {
+    case EQ:  case LE:  case LT:  case LEU:  case LTU:
+    case UNORDERED:
+      /* We have these compares: */
+      break;
+
+    case NE:
+    case ORDERED:
+      /* These must be reversed.  */
+      code = reverse_condition (code);
+      cmov_code = EQ;
+      break;
+
+    case GE:  case GT:  case GEU:  case GTU:
+      /* These must be swapped.  */
+      if (op1 != CONST0_RTX (cmp_mode))
+	{
+	  code = swap_condition (code);
+	  tem = op0, op0 = op1, op1 = tem;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* ??? We mark the branch mode to be CCmode to prevent the compare
+     and cmov from being combined, since the compare insn follows IEEE
+     rules that the cmov does not.  */
+  if (cmp_mode == DFmode && !local_fast_math)
+    cmov_mode = CCmode;
+
+  tem = gen_reg_rtx (cmp_mode);
+  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
+  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
+}
+
+/* Simplify a conditional move of two constants into a setcc with
+   arithmetic.  This is done with a splitter since combine would
+   just undo the work if done during code generation.  It also catches
+   cases we wouldn't have before cse.  */
+
+int
+alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
+			      rtx t_rtx, rtx f_rtx)
+{
+  HOST_WIDE_INT t, f, diff;
+  enum machine_mode mode;
+  rtx target, subtarget, tmp;
+
+  mode = GET_MODE (dest);
+  t = INTVAL (t_rtx);
+  f = INTVAL (f_rtx);
+  diff = t - f;
+
+  if (((code == NE || code == EQ) && diff < 0)
+      || (code == GE || code == GT))
+    {
+      code = reverse_condition (code);
+      diff = t, t = f, f = diff;
+      diff = t - f;
+    }
+
+  subtarget = target = dest;
+  if (mode != DImode)
+    {
+      target = gen_lowpart (DImode, dest);
+      if (can_create_pseudo_p ())
+        subtarget = gen_reg_rtx (DImode);
+      else
+	subtarget = target;
+    }
+  /* Below, we must be careful to use copy_rtx on target and subtarget
+     in intermediate insns, as they may be a subreg rtx, which may not
+     be shared.  */
+
+  if (f == 0 && exact_log2 (diff) > 0
+      /* On EV6, we've got enough shifters to make non-arithmetic shifts
+	 viable over a longer latency cmove.  On EV5, the E0 slot is a
+	 scarce resource, and on EV4 shift has the same latency as a cmove.  */
+      && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
+
+      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
+			    GEN_INT (exact_log2 (t)));
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else if (f == 0 && t == -1)
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
+
+      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
+    }
+  else if (diff == 1 || diff == 4 || diff == 8)
+    {
+      rtx add_op;
+
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
+
+      if (diff == 1)
+	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
+      else
+	{
+	  add_op = GEN_INT (f);
+	  if (sext_add_operand (add_op, mode))
+	    {
+	      tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
+				  GEN_INT (diff));
+	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
+	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+	    }
+	  else
+	    return 0;
+	}
+    }
+  else
+    return 0;
+
+  return 1;
+}
+
+/* Look up the function X_floating library function name for the
+   given operation.  */
+
+struct GTY(()) xfloating_op
+{
+  const enum rtx_code code;
+  const char *const GTY((skip)) osf_func;
+  const char *const GTY((skip)) vms_func;
+  rtx libcall;
+};
+
+static GTY(()) struct xfloating_op xfloating_ops[] =
+{
+  { PLUS,		"_OtsAddX", "OTS$ADD_X", 0 },
+  { MINUS,		"_OtsSubX", "OTS$SUB_X", 0 },
+  { MULT,		"_OtsMulX", "OTS$MUL_X", 0 },
+  { DIV,		"_OtsDivX", "OTS$DIV_X", 0 },
+  { EQ,			"_OtsEqlX", "OTS$EQL_X", 0 },
+  { NE,			"_OtsNeqX", "OTS$NEQ_X", 0 },
+  { LT,			"_OtsLssX", "OTS$LSS_X", 0 },
+  { LE,			"_OtsLeqX", "OTS$LEQ_X", 0 },
+  { GT,			"_OtsGtrX", "OTS$GTR_X", 0 },
+  { GE,			"_OtsGeqX", "OTS$GEQ_X", 0 },
+  { FIX,		"_OtsCvtXQ", "OTS$CVTXQ", 0 },
+  { FLOAT,		"_OtsCvtQX", "OTS$CVTQX", 0 },
+  { UNSIGNED_FLOAT,	"_OtsCvtQUX", "OTS$CVTQUX", 0 },
+  { FLOAT_EXTEND,	"_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
+  { FLOAT_TRUNCATE,	"_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
+};
+
+static GTY(()) struct xfloating_op vax_cvt_ops[] =
+{
+  { FLOAT_EXTEND,	"_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
+  { FLOAT_TRUNCATE,	"_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
+};
+
+static rtx
+alpha_lookup_xfloating_lib_func (enum rtx_code code)
+{
+  struct xfloating_op *ops = xfloating_ops;
+  long n = ARRAY_SIZE (xfloating_ops);
+  long i;
+
+  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
+
+  /* How irritating.  Nothing to key off for the main table.  */
+  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
+    {
+      ops = vax_cvt_ops;
+      n = ARRAY_SIZE (vax_cvt_ops);
+    }
+
+  for (i = 0; i < n; ++i, ++ops)
+    if (ops->code == code)
+      {
+	rtx func = ops->libcall;
+	if (!func)
+	  {
+	    func = init_one_libfunc (TARGET_ABI_OPEN_VMS
+				     ? ops->vms_func : ops->osf_func);
+	    ops->libcall = func;
+	  }
+        return func;
+      }
+
+  gcc_unreachable ();
+}
+
+/* Most X_floating operations take the rounding mode as an argument.
+   Compute that here.  */
+
+static int
+alpha_compute_xfloating_mode_arg (enum rtx_code code,
+				  enum alpha_fp_rounding_mode round)
+{
+  int mode;
+
+  switch (round)
+    {
+    case ALPHA_FPRM_NORM:
+      mode = 2;
+      break;
+    case ALPHA_FPRM_MINF:
+      mode = 1;
+      break;
+    case ALPHA_FPRM_CHOP:
+      mode = 0;
+      break;
+    case ALPHA_FPRM_DYN:
+      mode = 4;
+      break;
+    default:
+      gcc_unreachable ();
+
+    /* XXX For reference, round to +inf is mode = 3.  */
+    }
+
+  if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
+    mode |= 0x10000;
+
+  return mode;
+}
+
+/* Emit an X_floating library function call.
+
+   Note that these functions do not follow normal calling conventions:
+   TFmode arguments are passed in two integer registers (as opposed to
+   indirect); TFmode return values appear in R16+R17.
+
+   FUNC is the function to call.
+   TARGET is where the output belongs.
+   OPERANDS are the inputs.
+   NOPERANDS is the count of inputs.
+   EQUIV is the expression equivalent for the function.
+*/
+
+static void
+alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
+			      int noperands, rtx equiv)
+{
+  rtx usage = NULL_RTX, tmp, reg;
+  int regno = 16, i;
+
+  start_sequence ();
+
+  for (i = 0; i < noperands; ++i)
+    {
+      switch (GET_MODE (operands[i]))
+	{
+	case TFmode:
+	  reg = gen_rtx_REG (TFmode, regno);
+	  regno += 2;
+	  break;
+
+	case DFmode:
+	  reg = gen_rtx_REG (DFmode, regno + 32);
+	  regno += 1;
+	  break;
+
+	case VOIDmode:
+	  gcc_assert (CONST_INT_P (operands[i]));
+	  /* FALLTHRU */
+	case DImode:
+	  reg = gen_rtx_REG (DImode, regno);
+	  regno += 1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_move_insn (reg, operands[i]);
+      usage = alloc_EXPR_LIST (0, gen_rtx_USE (VOIDmode, reg), usage);
+    }
+
+  switch (GET_MODE (target))
+    {
+    case TFmode:
+      reg = gen_rtx_REG (TFmode, 16);
+      break;
+    case DFmode:
+      reg = gen_rtx_REG (DFmode, 32);
+      break;
+    case DImode:
+      reg = gen_rtx_REG (DImode, 0);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  tmp = gen_rtx_MEM (QImode, func);
+  tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
+					const0_rtx, const0_rtx));
+  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
+  RTL_CONST_CALL_P (tmp) = 1;
+
+  tmp = get_insns ();
+  end_sequence ();
+
+  emit_libcall_block (tmp, target, reg, equiv);
+}
+
+/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
+
+void
+alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
+{
+  rtx func;
+  int mode;
+  rtx out_operands[3];
+
+  func = alpha_lookup_xfloating_lib_func (code);
+  mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
+
+  out_operands[0] = operands[1];
+  out_operands[1] = operands[2];
+  out_operands[2] = GEN_INT (mode);
+  alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
+				gen_rtx_fmt_ee (code, TFmode, operands[1],
+						operands[2]));
+}
+
+/* Emit an X_floating library function call for a comparison.  */
+
+static rtx
+alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
+{
+  enum rtx_code cmp_code, res_code;
+  rtx func, out, operands[2], note;
+
+  /* X_floating library comparison functions return
+	   -1  unordered
+	    0  false
+	    1  true
+     Convert the compare against the raw return value.  */
+
+  cmp_code = *pcode;
+  switch (cmp_code)
+    {
+    case UNORDERED:
+      cmp_code = EQ;
+      res_code = LT;
+      break;
+    case ORDERED:
+      cmp_code = EQ;
+      res_code = GE;
+      break;
+    case NE:
+      res_code = NE;
+      break;
+    case EQ:
+    case LT:
+    case GT:
+    case LE:
+    case GE:
+      res_code = GT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  *pcode = res_code;
+
+  func = alpha_lookup_xfloating_lib_func (cmp_code);
+
+  operands[0] = op0;
+  operands[1] = op1;
+  out = gen_reg_rtx (DImode);
+
+  /* What's actually returned is -1,0,1, not a proper boolean value,
+     so use an EXPR_LIST as with a generic libcall instead of a 
+     comparison type expression.  */
+  note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
+  note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
+  note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
+  alpha_emit_xfloating_libcall (func, out, operands, 2, note);
+
+  return out;
+}
+
+/* Emit an X_floating library function call for a conversion.  */
+
+void
+alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
+{
+  int noperands = 1, mode;
+  rtx out_operands[2];
+  rtx func;
+  enum rtx_code code = orig_code;
+
+  if (code == UNSIGNED_FIX)
+    code = FIX;
+
+  func = alpha_lookup_xfloating_lib_func (code);
+
+  out_operands[0] = operands[1];
+
+  switch (code)
+    {
+    case FIX:
+      mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
+      out_operands[1] = GEN_INT (mode);
+      noperands = 2;
+      break;
+    case FLOAT_TRUNCATE:
+      mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
+      out_operands[1] = GEN_INT (mode);
+      noperands = 2;
+      break;
+    default:
+      break;
+    }
+
+  alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
+				gen_rtx_fmt_e (orig_code,
+					       GET_MODE (operands[0]),
+					       operands[1]));
+}
+
+/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
+   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
+   guarantee that the sequence
+     set (OP[0] OP[2])
+     set (OP[1] OP[3])
+   is valid.  Naturally, output operand ordering is little-endian.
+   This is used by *movtf_internal and *movti_internal.  */
+  
+void
+alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
+			bool fixup_overlap)
+{
+  switch (GET_CODE (operands[1]))
+    {
+    case REG:
+      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
+      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
+      break;
+
+    case MEM:
+      operands[3] = adjust_address (operands[1], DImode, 8);
+      operands[2] = adjust_address (operands[1], DImode, 0);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      gcc_assert (operands[1] == CONST0_RTX (mode));
+      operands[2] = operands[3] = const0_rtx;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
+      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+      break;
+
+    case MEM:
+      operands[1] = adjust_address (operands[0], DImode, 8);
+      operands[0] = adjust_address (operands[0], DImode, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+}
+
+/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
+   op2 is a register containing the sign bit, operation is the
+   logical operation to be performed.  */
+
+void
+alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
+{
+  rtx high_bit = operands[2];
+  rtx scratch;
+  int move;
+
+  alpha_split_tmode_pair (operands, TFmode, false);
+
+  /* Detect three flavors of operand overlap.  */
+  move = 1;
+  if (rtx_equal_p (operands[0], operands[2]))
+    move = 0;
+  else if (rtx_equal_p (operands[1], operands[2]))
+    {
+      if (rtx_equal_p (operands[0], high_bit))
+	move = 2;
+      else
+	move = -1;
+    }
+
+  if (move < 0)
+    emit_move_insn (operands[0], operands[2]);
+
+  /* ??? If the destination overlaps both source tf and high_bit, then
+     assume source tf is dead in its entirety and use the other half
+     for a scratch register.  Otherwise "scratch" is just the proper
+     destination register.  */
+  scratch = operands[move < 2 ? 1 : 3];
+
+  emit_insn ((*operation) (scratch, high_bit, operands[3]));
+
+  if (move > 0)
+    {
+      emit_move_insn (operands[0], operands[2]);
+      if (move > 1)
+	emit_move_insn (operands[1], scratch);
+    }
+}
+
+/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
+   unaligned data:
+
+           unsigned:                       signed:
+   word:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
+           ldq_u  r2,X+1(r11)              ldq_u  r2,X+1(r11)
+           lda    r3,X(r11)                lda    r3,X+2(r11)
+           extwl  r1,r3,r1                 extql  r1,r3,r1
+           extwh  r2,r3,r2                 extqh  r2,r3,r2
+           or     r1.r2.r1                 or     r1,r2,r1
+                                           sra    r1,48,r1
+
+   long:   ldq_u  r1,X(r11)                ldq_u  r1,X(r11)
+           ldq_u  r2,X+3(r11)              ldq_u  r2,X+3(r11)
+           lda    r3,X(r11)                lda    r3,X(r11)
+           extll  r1,r3,r1                 extll  r1,r3,r1
+           extlh  r2,r3,r2                 extlh  r2,r3,r2
+           or     r1.r2.r1                 addl   r1,r2,r1
+
+   quad:   ldq_u  r1,X(r11)
+           ldq_u  r2,X+7(r11)
+           lda    r3,X(r11)
+           extql  r1,r3,r1
+           extqh  r2,r3,r2
+           or     r1.r2.r1
+*/
+
+void
+alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
+			     HOST_WIDE_INT ofs, int sign)
+{
+  rtx meml, memh, addr, extl, exth, tmp, mema;
+  enum machine_mode mode;
+
+  if (TARGET_BWX && size == 2)
+    {
+      meml = adjust_address (mem, QImode, ofs);
+      memh = adjust_address (mem, QImode, ofs+1);
+      if (BYTES_BIG_ENDIAN)
+	tmp = meml, meml = memh, memh = tmp;
+      extl = gen_reg_rtx (DImode);
+      exth = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendqidi2 (extl, meml));
+      emit_insn (gen_zero_extendqidi2 (exth, memh));
+      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
+				  NULL, 1, OPTAB_LIB_WIDEN);
+      addr = expand_simple_binop (DImode, IOR, extl, exth,
+				  NULL, 1, OPTAB_LIB_WIDEN);
+
+      if (sign && GET_MODE (tgt) != HImode)
+	{
+	  addr = gen_lowpart (HImode, addr);
+	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
+	}
+      else
+	{
+	  if (GET_MODE (tgt) != DImode)
+	    addr = gen_lowpart (GET_MODE (tgt), addr);
+	  emit_move_insn (tgt, addr);
+	}
+      return;
+    }
+
+  meml = gen_reg_rtx (DImode);
+  memh = gen_reg_rtx (DImode);
+  addr = gen_reg_rtx (DImode);
+  extl = gen_reg_rtx (DImode);
+  exth = gen_reg_rtx (DImode);
+
+  mema = XEXP (mem, 0);
+  if (GET_CODE (mema) == LO_SUM)
+    mema = force_reg (Pmode, mema);
+
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+
+  tmp = change_address (mem, DImode,
+			gen_rtx_AND (DImode,
+				     plus_constant (mema, ofs),
+				     GEN_INT (-8)));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (meml, tmp);
+
+  tmp = change_address (mem, DImode,
+			gen_rtx_AND (DImode,
+				     plus_constant (mema, ofs + size - 1),
+				     GEN_INT (-8)));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (memh, tmp);
+
+  if (WORDS_BIG_ENDIAN && sign && (size == 2 || size == 4))
+    {
+      emit_move_insn (addr, plus_constant (mema, -1));
+
+      emit_insn (gen_extqh_be (extl, meml, addr));
+      emit_insn (gen_extxl_be (exth, memh, GEN_INT (64), addr));
+
+      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
+      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (64 - size*8),
+			   addr, 1, OPTAB_WIDEN);
+    }
+  else if (sign && size == 2)
+    {
+      emit_move_insn (addr, plus_constant (mema, ofs+2));
+
+      emit_insn (gen_extxl_le (extl, meml, GEN_INT (64), addr));
+      emit_insn (gen_extqh_le (exth, memh, addr));
+
+      /* We must use tgt here for the target.  Alpha-vms port fails if we use
+	 addr for the target, because addr is marked as a pointer and combine
+	 knows that pointers are always sign-extended 32-bit values.  */
+      addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
+      addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
+			   addr, 1, OPTAB_WIDEN);
+    }
+  else
+    {
+      if (WORDS_BIG_ENDIAN)
+	{
+	  emit_move_insn (addr, plus_constant (mema, ofs+size-1));
+	  switch ((int) size)
+	    {
+	    case 2:
+	      emit_insn (gen_extwh_be (extl, meml, addr));
+	      mode = HImode;
+	      break;
+
+	    case 4:
+	      emit_insn (gen_extlh_be (extl, meml, addr));
+	      mode = SImode;
+	      break;
+
+	    case 8:
+	      emit_insn (gen_extqh_be (extl, meml, addr));
+	      mode = DImode;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  emit_insn (gen_extxl_be (exth, memh, GEN_INT (size*8), addr));
+	}
+      else
+	{
+	  emit_move_insn (addr, plus_constant (mema, ofs));
+	  emit_insn (gen_extxl_le (extl, meml, GEN_INT (size*8), addr));
+	  switch ((int) size)
+	    {
+	    case 2:
+	      emit_insn (gen_extwh_le (exth, memh, addr));
+	      mode = HImode;
+	      break;
+
+	    case 4:
+	      emit_insn (gen_extlh_le (exth, memh, addr));
+	      mode = SImode;
+	      break;
+
+	    case 8:
+	      emit_insn (gen_extqh_le (exth, memh, addr));
+	      mode = DImode;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
+			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
+			   sign, OPTAB_WIDEN);
+    }
+
+  if (addr != tgt)
+    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
+}
+
+/* Similarly, use ins and msk instructions to perform unaligned stores.  */
+
+void
+alpha_expand_unaligned_store (rtx dst, rtx src,
+			      HOST_WIDE_INT size, HOST_WIDE_INT ofs)
+{
+  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
+
+  if (TARGET_BWX && size == 2)
+    {
+      if (src != const0_rtx)
+	{
+	  dstl = gen_lowpart (QImode, src);
+	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
+				      NULL, 1, OPTAB_LIB_WIDEN);
+	  dsth = gen_lowpart (QImode, dsth);
+	}
+      else
+	dstl = dsth = const0_rtx;
+
+      meml = adjust_address (dst, QImode, ofs);
+      memh = adjust_address (dst, QImode, ofs+1);
+      if (BYTES_BIG_ENDIAN)
+	addr = meml, meml = memh, memh = addr;
+
+      emit_move_insn (meml, dstl);
+      emit_move_insn (memh, dsth);
+      return;
+    }
+
+  dstl = gen_reg_rtx (DImode);
+  dsth = gen_reg_rtx (DImode);
+  insl = gen_reg_rtx (DImode);
+  insh = gen_reg_rtx (DImode);
+
+  dsta = XEXP (dst, 0);
+  if (GET_CODE (dsta) == LO_SUM)
+    dsta = force_reg (Pmode, dsta);
+
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+
+  meml = change_address (dst, DImode,
+			 gen_rtx_AND (DImode,
+				      plus_constant (dsta, ofs),
+				      GEN_INT (-8)));
+  set_mem_alias_set (meml, 0);
+
+  memh = change_address (dst, DImode,
+			 gen_rtx_AND (DImode,
+				      plus_constant (dsta, ofs + size - 1),
+				      GEN_INT (-8)));
+  set_mem_alias_set (memh, 0);
+
+  emit_move_insn (dsth, memh);
+  emit_move_insn (dstl, meml);
+  if (WORDS_BIG_ENDIAN)
+    {
+      addr = copy_addr_to_reg (plus_constant (dsta, ofs+size-1));
+
+      if (src != const0_rtx)
+	{
+	  switch ((int) size)
+	    {
+	    case 2:
+	      emit_insn (gen_inswl_be (insh, gen_lowpart (HImode,src), addr));
+	      break;
+	    case 4:
+	      emit_insn (gen_insll_be (insh, gen_lowpart (SImode,src), addr));
+	      break;
+	    case 8:
+	      emit_insn (gen_insql_be (insh, gen_lowpart (DImode,src), addr));
+	      break;
+	    }
+	  emit_insn (gen_insxh (insl, gen_lowpart (DImode, src),
+				GEN_INT (size*8), addr));
+	}
+
+      switch ((int) size)
+	{
+	case 2:
+	  emit_insn (gen_mskxl_be (dsth, dsth, GEN_INT (0xffff), addr));
+	  break;
+	case 4:
+	  {
+	    rtx msk = immed_double_const (0xffffffff, 0, DImode);
+	    emit_insn (gen_mskxl_be (dsth, dsth, msk, addr));
+	    break;
+	  }
+	case 8:
+	  emit_insn (gen_mskxl_be (dsth, dsth, constm1_rtx, addr));
+	  break;
+	}
+
+      emit_insn (gen_mskxh (dstl, dstl, GEN_INT (size*8), addr));
+    }
+  else
+    {
+      addr = copy_addr_to_reg (plus_constant (dsta, ofs));
+
+      if (src != CONST0_RTX (GET_MODE (src)))
+	{
+	  emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
+				GEN_INT (size*8), addr));
+
+	  switch ((int) size)
+	    {
+	    case 2:
+	      emit_insn (gen_inswl_le (insl, gen_lowpart (HImode, src), addr));
+	      break;
+	    case 4:
+	      emit_insn (gen_insll_le (insl, gen_lowpart (SImode, src), addr));
+	      break;
+	    case 8:
+	      emit_insn (gen_insql_le (insl, gen_lowpart (DImode, src), addr));
+	      break;
+	    }
+	}
+
+      emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
+
+      switch ((int) size)
+	{
+	case 2:
+	  emit_insn (gen_mskxl_le (dstl, dstl, GEN_INT (0xffff), addr));
+	  break;
+	case 4:
+	  {
+	    rtx msk = immed_double_const (0xffffffff, 0, DImode);
+	    emit_insn (gen_mskxl_le (dstl, dstl, msk, addr));
+	    break;
+	  }
+	case 8:
+	  emit_insn (gen_mskxl_le (dstl, dstl, constm1_rtx, addr));
+	  break;
+	}
+    }
+
+  if (src != CONST0_RTX (GET_MODE (src)))
+    {
+      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
+      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
+    }
+
+  if (WORDS_BIG_ENDIAN)
+    {
+      emit_move_insn (meml, dstl);
+      emit_move_insn (memh, dsth);
+    }
+  else
+    {
+      /* Must store high before low for degenerate case of aligned.  */
+      emit_move_insn (memh, dsth);
+      emit_move_insn (meml, dstl);
+    }
+}
+
+/* The block move code tries to maximize speed by separating loads and
+   stores at the expense of register pressure: we load all of the data
+   before we store it back out.  There are two secondary effects worth
+   mentioning, that this speeds copying to/from aligned and unaligned
+   buffers, and that it makes the code significantly easier to write.  */
+
+#define MAX_MOVE_WORDS	8
+
+/* Load an integral number of consecutive unaligned quadwords.  */
+
+static void
+alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
+				   HOST_WIDE_INT words, HOST_WIDE_INT ofs)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx const i64 = GEN_INT (64);
+  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
+  rtx sreg, areg, tmp, smema;
+  HOST_WIDE_INT i;
+
+  smema = XEXP (smem, 0);
+  if (GET_CODE (smema) == LO_SUM)
+    smema = force_reg (Pmode, smema);
+
+  /* Generate all the tmp registers we need.  */
+  for (i = 0; i < words; ++i)
+    {
+      data_regs[i] = out_regs[i];
+      ext_tmps[i] = gen_reg_rtx (DImode);
+    }
+  data_regs[words] = gen_reg_rtx (DImode);
+
+  if (ofs != 0)
+    smem = adjust_address (smem, GET_MODE (smem), ofs);
+
+  /* Load up all of the source data.  */
+  for (i = 0; i < words; ++i)
+    {
+      tmp = change_address (smem, DImode,
+			    gen_rtx_AND (DImode,
+					 plus_constant (smema, 8*i),
+					 im8));
+      set_mem_alias_set (tmp, 0);
+      emit_move_insn (data_regs[i], tmp);
+    }
+
+  tmp = change_address (smem, DImode,
+			gen_rtx_AND (DImode,
+				     plus_constant (smema, 8*words - 1),
+				     im8));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (data_regs[words], tmp);
+
+  /* Extract the half-word fragments.  Unfortunately DEC decided to make
+     extxh with offset zero a noop instead of zeroing the register, so
+     we must take care of that edge condition ourselves with cmov.  */
+
+  sreg = copy_addr_to_reg (smema);
+  areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
+		       1, OPTAB_WIDEN);
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (sreg, plus_constant (sreg, 7));
+  for (i = 0; i < words; ++i)
+    {
+      if (WORDS_BIG_ENDIAN)
+	{
+	  emit_insn (gen_extqh_be (data_regs[i], data_regs[i], sreg));
+	  emit_insn (gen_extxl_be (ext_tmps[i], data_regs[i+1], i64, sreg));
+	}
+      else
+	{
+	  emit_insn (gen_extxl_le (data_regs[i], data_regs[i], i64, sreg));
+	  emit_insn (gen_extqh_le (ext_tmps[i], data_regs[i+1], sreg));
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
+			      gen_rtx_IF_THEN_ELSE (DImode,
+						    gen_rtx_EQ (DImode, areg,
+								const0_rtx),
+						    const0_rtx, ext_tmps[i])));
+    }
+
+  /* Merge the half-words into whole words.  */
+  for (i = 0; i < words; ++i)
+    {
+      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
+				  ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
+    }
+}
+
+/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
+   may be NULL to store zeros.  */
+
+static void
+alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
+				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx const i64 = GEN_INT (64);
+  rtx ins_tmps[MAX_MOVE_WORDS];
+  rtx st_tmp_1, st_tmp_2, dreg;
+  rtx st_addr_1, st_addr_2, dmema;
+  HOST_WIDE_INT i;
+
+  dmema = XEXP (dmem, 0);
+  if (GET_CODE (dmema) == LO_SUM)
+    dmema = force_reg (Pmode, dmema);
+
+  /* Generate all the tmp registers we need.  */
+  if (data_regs != NULL)
+    for (i = 0; i < words; ++i)
+      ins_tmps[i] = gen_reg_rtx(DImode);
+  st_tmp_1 = gen_reg_rtx(DImode);
+  st_tmp_2 = gen_reg_rtx(DImode);
+
+  if (ofs != 0)
+    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
+
+  st_addr_2 = change_address (dmem, DImode,
+			      gen_rtx_AND (DImode,
+					   plus_constant (dmema, words*8 - 1),
+				       im8));
+  set_mem_alias_set (st_addr_2, 0);
+
+  st_addr_1 = change_address (dmem, DImode,
+			      gen_rtx_AND (DImode, dmema, im8));
+  set_mem_alias_set (st_addr_1, 0);
+
+  /* Load up the destination end bits.  */
+  emit_move_insn (st_tmp_2, st_addr_2);
+  emit_move_insn (st_tmp_1, st_addr_1);
+
+  /* Shift the input data into place.  */
+  dreg = copy_addr_to_reg (dmema);
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (dreg, plus_constant (dreg, 7));
+  if (data_regs != NULL)
+    {
+      for (i = words-1; i >= 0; --i)
+	{
+	  if (WORDS_BIG_ENDIAN)
+	    {
+	      emit_insn (gen_insql_be (ins_tmps[i], data_regs[i], dreg));
+	      emit_insn (gen_insxh (data_regs[i], data_regs[i], i64, dreg));
+	    }
+	  else
+	    {
+	      emit_insn (gen_insxh (ins_tmps[i], data_regs[i], i64, dreg));
+	      emit_insn (gen_insql_le (data_regs[i], data_regs[i], dreg));
+	    }
+	}
+      for (i = words-1; i > 0; --i)
+	{
+	  ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
+					ins_tmps[i-1], ins_tmps[i-1], 1,
+					OPTAB_WIDEN);
+	}
+    }
+
+  /* Split and merge the ends with the destination data.  */
+  if (WORDS_BIG_ENDIAN)
+    {
+      emit_insn (gen_mskxl_be (st_tmp_2, st_tmp_2, constm1_rtx, dreg));
+      emit_insn (gen_mskxh (st_tmp_1, st_tmp_1, i64, dreg));
+    }
+  else
+    {
+      emit_insn (gen_mskxh (st_tmp_2, st_tmp_2, i64, dreg));
+      emit_insn (gen_mskxl_le (st_tmp_1, st_tmp_1, constm1_rtx, dreg));
+    }
+
+  if (data_regs != NULL)
+    {
+      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
+			       st_tmp_2, 1, OPTAB_WIDEN);
+      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
+			       st_tmp_1, 1, OPTAB_WIDEN);
+    }
+
+  /* Store it all.  */
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (st_addr_1, st_tmp_1);
+  else
+    emit_move_insn (st_addr_2, st_tmp_2);
+  for (i = words-1; i > 0; --i)
+    {
+      rtx tmp = change_address (dmem, DImode,
+				gen_rtx_AND (DImode,
+					     plus_constant(dmema,
+					     WORDS_BIG_ENDIAN ? i*8-1 : i*8),
+					     im8));
+      set_mem_alias_set (tmp, 0);
+      emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
+    }
+  if (WORDS_BIG_ENDIAN)
+    emit_move_insn (st_addr_2, st_tmp_2);
+  else
+    emit_move_insn (st_addr_1, st_tmp_1);
+}
+
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.  */
+
+int
+alpha_expand_block_move (rtx operands[])
+{
+  rtx bytes_rtx	= operands[2];
+  rtx align_rtx = operands[3];
+  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
+  HOST_WIDE_INT bytes = orig_bytes;
+  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
+  HOST_WIDE_INT dst_align = src_align;
+  rtx orig_src = operands[1];
+  rtx orig_dst = operands[0];
+  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
+  rtx tmp;
+  unsigned int i, words, ofs, nregs = 0;
+
+  if (orig_bytes <= 0)
+    return 1;
+  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
+    return 0;
+
+  /* Look for additional alignment information from recorded register info.  */
+
+  tmp = XEXP (orig_src, 0);
+  if (REG_P (tmp))
+    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS
+	   && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > src_align)
+	{
+          if (a >= 64 && c % 8 == 0)
+	    src_align = 64;
+          else if (a >= 32 && c % 4 == 0)
+	    src_align = 32;
+          else if (a >= 16 && c % 2 == 0)
+	    src_align = 16;
+	}
+    }
+
+  tmp = XEXP (orig_dst, 0);
+  if (REG_P (tmp))
+    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS
+	   && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > dst_align)
+	{
+          if (a >= 64 && c % 8 == 0)
+	    dst_align = 64;
+          else if (a >= 32 && c % 4 == 0)
+	    dst_align = 32;
+          else if (a >= 16 && c % 2 == 0)
+	    dst_align = 16;
+	}
+    }
+
+  ofs = 0;
+  if (src_align >= 64 && bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (DImode);
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (data_regs[nregs + i],
+			adjust_address (orig_src, DImode, ofs + i * 8));
+
+      nregs += words;
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  if (src_align >= 32 && bytes >= 4)
+    {
+      words = bytes / 4;
+
+      for (i = 0; i < words; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (SImode);
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (data_regs[nregs + i],
+			adjust_address (orig_src, SImode, ofs + i * 4));
+
+      nregs += words;
+      bytes -= words * 4;
+      ofs += words * 4;
+    }
+
+  if (bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words+1; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (DImode);
+
+      alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
+					 words, ofs);
+
+      nregs += words;
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  if (! TARGET_BWX && bytes >= 4)
+    {
+      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
+      alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
+      bytes -= 4;
+      ofs += 4;
+    }
+
+  if (bytes >= 2)
+    {
+      if (src_align >= 16)
+	{
+	  do {
+	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
+	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
+	    bytes -= 2;
+	    ofs += 2;
+	  } while (bytes >= 2);
+	}
+      else if (! TARGET_BWX)
+	{
+	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
+	  alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
+	  bytes -= 2;
+	  ofs += 2;
+	}
+    }
+
+  while (bytes > 0)
+    {
+      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
+      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
+      bytes -= 1;
+      ofs += 1;
+    }
+
+  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
+
+  /* Now save it back out again.  */
+
+  i = 0, ofs = 0;
+
+  /* Write out the data in whatever chunks reading the source allowed.  */
+  if (dst_align >= 64)
+    {
+      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
+	{
+	  emit_move_insn (adjust_address (orig_dst, DImode, ofs),
+			  data_regs[i]);
+	  ofs += 8;
+	  i++;
+	}
+    }
+
+  if (dst_align >= 32)
+    {
+      /* If the source has remaining DImode regs, write them out in
+	 two pieces.  */
+      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
+	{
+	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+			  gen_lowpart (SImode, data_regs[i]));
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
+			  gen_lowpart (SImode, tmp));
+	  ofs += 8;
+	  i++;
+	}
+
+      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
+	{
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+			  data_regs[i]);
+	  ofs += 4;
+	  i++;
+	}
+    }
+
+  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
+    {
+      /* Write out a remaining block of words using unaligned methods.  */
+
+      for (words = 1; i + words < nregs; words++)
+	if (GET_MODE (data_regs[i + words]) != DImode)
+	  break;
+
+      if (words == 1)
+	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+      else
+        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
+					    words, ofs);
+
+      i += words;
+      ofs += words * 8;
+    }
+
+  /* Due to the above, this won't be aligned.  */
+  /* ??? If we have more than one of these, consider constructing full
+     words in registers and using alpha_expand_unaligned_store_words.  */
+  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
+    {
+      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
+      ofs += 4;
+      i++;
+    }
+
+  if (dst_align >= 16)
+    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
+      {
+	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
+	i++;
+	ofs += 2;
+      }
+  else
+    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
+      {
+	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
+	i++;
+	ofs += 2;
+      }
+
+  /* The remainder must be byte copies.  */
+  while (i < nregs)
+    {
+      gcc_assert (GET_MODE (data_regs[i]) == QImode);
+      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
+      i++;
+      ofs += 1;
+    }
+
+  return 1;
+}
+
+int
+alpha_expand_block_clear (rtx operands[])
+{
+  rtx bytes_rtx	= operands[1];
+  rtx align_rtx = operands[3];
+  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
+  HOST_WIDE_INT bytes = orig_bytes;
+  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
+  HOST_WIDE_INT alignofs = 0;
+  rtx orig_dst = operands[0];
+  rtx tmp;
+  int i, words, ofs = 0;
+
+  if (orig_bytes <= 0)
+    return 1;
+  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
+    return 0;
+
+  /* Look for stricter alignment.  */
+  tmp = XEXP (orig_dst, 0);
+  if (REG_P (tmp))
+    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS
+	   && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > align)
+	{
+          if (a >= 64)
+	    align = a, alignofs = 8 - c % 8;
+          else if (a >= 32)
+	    align = a, alignofs = 4 - c % 4;
+          else if (a >= 16)
+	    align = a, alignofs = 2 - c % 2;
+	}
+    }
+
+  /* Handle an unaligned prefix first.  */
+
+  if (alignofs > 0)
+    {
+#if HOST_BITS_PER_WIDE_INT >= 64
+      /* Given that alignofs is bounded by align, the only time BWX could
+	 generate three stores is for a 7 byte fill.  Prefer two individual
+	 stores over a load/mask/store sequence.  */
+      if ((!TARGET_BWX || alignofs == 7)
+	       && align >= 32
+	       && !(alignofs == 4 && bytes >= 4))
+	{
+	  enum machine_mode mode = (align >= 64 ? DImode : SImode);
+	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
+	  if (bytes < alignofs)
+	    {
+	      mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
+	      ofs += bytes;
+	      bytes = 0;
+	    }
+	  else
+	    {
+	      bytes -= alignofs;
+	      ofs += alignofs;
+	    }
+	  alignofs = 0;
+
+	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	}
+#endif
+
+      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
+	{
+	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+	  bytes -= 1;
+	  ofs += 1;
+	  alignofs -= 1;
+	}
+      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
+	{
+	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
+	  bytes -= 2;
+	  ofs += 2;
+	  alignofs -= 2;
+	}
+      if (alignofs == 4 && bytes >= 4)
+	{
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
+	  bytes -= 4;
+	  ofs += 4;
+	  alignofs = 0;
+	}
+
+      /* If we've not used the extra lead alignment information by now,
+	 we won't be able to.  Downgrade align to match what's left over.  */
+      if (alignofs > 0)
+	{
+	  alignofs = alignofs & -alignofs;
+	  align = MIN (align, alignofs * BITS_PER_UNIT);
+	}
+    }
+
+  /* Handle a block of contiguous long-words.  */
+
+  if (align >= 64 && bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
+			const0_rtx);
+
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  /* If the block is large and appropriately aligned, emit a single
+     store followed by a sequence of stq_u insns.  */
+
+  if (align >= 32 && bytes > 16)
+    {
+      rtx orig_dsta;
+
+      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
+      bytes -= 4;
+      ofs += 4;
+
+      orig_dsta = XEXP (orig_dst, 0);
+      if (GET_CODE (orig_dsta) == LO_SUM)
+	orig_dsta = force_reg (Pmode, orig_dsta);
+
+      words = bytes / 8;
+      for (i = 0; i < words; ++i)
+	{
+	  rtx mem
+	    = change_address (orig_dst, DImode,
+			      gen_rtx_AND (DImode,
+					   plus_constant (orig_dsta, ofs + i*8),
+					   GEN_INT (-8)));
+	  set_mem_alias_set (mem, 0);
+	  emit_move_insn (mem, const0_rtx);
+	}
+
+      /* Depending on the alignment, the first stq_u may have overlapped
+	 with the initial stl, which means that the last stq_u didn't
+	 write as much as it would appear.  Leave those questionable bytes
+	 unaccounted for.  */
+      bytes -= words * 8 - 4;
+      ofs += words * 8 - 4;
+    }
+
+  /* Handle a smaller block of aligned words.  */
+
+  if ((align >= 64 && bytes == 4)
+      || (align == 32 && bytes >= 4))
+    {
+      words = bytes / 4;
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
+			const0_rtx);
+
+      bytes -= words * 4;
+      ofs += words * 4;
+    }
+
+  /* An unaligned block uses stq_u stores for as many as possible.  */
+
+  if (bytes >= 8)
+    {
+      words = bytes / 8;
+
+      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
+
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  /* Next clean up any trailing pieces.  */
+
+#if HOST_BITS_PER_WIDE_INT >= 64
+  /* Count the number of bits in BYTES for which aligned stores could
+     be emitted.  */
+  words = 0;
+  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
+    if (bytes & i)
+      words += 1;
+
+  /* If we have appropriate alignment (and it wouldn't take too many
+     instructions otherwise), mask out the bytes we need.  */
+  if (TARGET_BWX ? words > 2 : bytes > 0)
+    {
+      if (align >= 64)
+	{
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, DImode, ofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
+
+	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	  return 1;
+	}
+      else if (align >= 32 && bytes < 4)
+	{
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, SImode, ofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
+
+	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	  return 1;
+	}
+    }
+#endif
+
+  if (!TARGET_BWX && bytes >= 4)
+    {
+      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
+      bytes -= 4;
+      ofs += 4;
+    }
+
+  if (bytes >= 2)
+    {
+      if (align >= 16)
+	{
+	  do {
+	    emit_move_insn (adjust_address (orig_dst, HImode, ofs),
+			    const0_rtx);
+	    bytes -= 2;
+	    ofs += 2;
+	  } while (bytes >= 2);
+	}
+      else if (! TARGET_BWX)
+	{
+	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
+	  bytes -= 2;
+	  ofs += 2;
+	}
+    }
+
+  while (bytes > 0)
+    {
+      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+      bytes -= 1;
+      ofs += 1;
+    }
+
+  return 1;
+}
+
+/* Returns a mask so that zap(x, value) == x & mask.  */
+
+rtx
+alpha_expand_zap_mask (HOST_WIDE_INT value)
+{
+  rtx result;
+  int i;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    {
+      HOST_WIDE_INT mask = 0;
+
+      for (i = 7; i >= 0; --i)
+	{
+	  mask <<= 8;
+	  if (!((value >> i) & 1))
+	    mask |= 0xff;
+	}
+
+      result = gen_int_mode (mask, DImode);
+    }
+  else
+    {
+      HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
+
+      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
+      
+      for (i = 7; i >= 4; --i)
+	{
+	  mask_hi <<= 8;
+	  if (!((value >> i) & 1))
+	    mask_hi |= 0xff;
+	}
+
+      for (i = 3; i >= 0; --i)
+	{
+	  mask_lo <<= 8;
+	  if (!((value >> i) & 1))
+	    mask_lo |= 0xff;
+	}
+
+      result = immed_double_const (mask_lo, mask_hi, DImode);
+    }
+
+  return result;
+}
+
+void
+alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
+				   enum machine_mode mode,
+				   rtx op0, rtx op1, rtx op2)
+{
+  op0 = gen_lowpart (mode, op0);
+
+  if (op1 == const0_rtx)
+    op1 = CONST0_RTX (mode);
+  else
+    op1 = gen_lowpart (mode, op1);
+
+  if (op2 == const0_rtx)
+    op2 = CONST0_RTX (mode);
+  else
+    op2 = gen_lowpart (mode, op2);
+
+  emit_insn ((*gen) (op0, op1, op2));
+}
+
+/* A subroutine of the atomic operation splitters.  Jump to LABEL if
+   COND is true.  Mark the jump as unlikely to be taken.  */
+
+static void
+emit_unlikely_jump (rtx cond, rtx label)
+{
+  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+  rtx x;
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
+  x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
+  add_reg_note (x, REG_BR_PROB, very_unlikely);
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a load-locked
+   instruction in MODE.  */
+
+static void
+emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
+{
+  rtx (*fn) (rtx, rtx) = NULL;
+  if (mode == SImode)
+    fn = gen_load_locked_si;
+  else if (mode == DImode)
+    fn = gen_load_locked_di;
+  emit_insn (fn (reg, mem));
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a store-conditional
+   instruction in MODE.  */
+
+static void
+emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
+{
+  rtx (*fn) (rtx, rtx, rtx) = NULL;
+  if (mode == SImode)
+    fn = gen_store_conditional_si;
+  else if (mode == DImode)
+    fn = gen_store_conditional_di;
+  emit_insn (fn (res, mem, val));
+}
+
+/* A subroutine of the atomic operation splitters.  Emit an insxl
+   instruction in MODE.  */
+
+static rtx
+emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
+{
+  rtx ret = gen_reg_rtx (DImode);
+  rtx (*fn) (rtx, rtx, rtx);
+
+  if (WORDS_BIG_ENDIAN)
+    {
+      if (mode == QImode)
+	fn = gen_insbl_be;
+      else
+	fn = gen_inswl_be;
+    }
+  else
+    {
+      if (mode == QImode)
+	fn = gen_insbl_le;
+      else
+	fn = gen_inswl_le;
+    }
+  /* The insbl and inswl patterns require a register operand.  */
+  op1 = force_reg (mode, op1);
+  emit_insn (fn (ret, op1, op2));
+
+  return ret;
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second 
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  SCRATCH is
+   a scratch register.  */
+
+void
+alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val,
+		       rtx before, rtx after, rtx scratch)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
+
+  emit_insn (gen_memory_barrier ());
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (DImode, label);
+
+  if (before == NULL)
+    before = scratch;
+  emit_load_locked (mode, before, mem);
+
+  if (code == NOT)
+    {
+      x = gen_rtx_AND (mode, before, val);
+      emit_insn (gen_rtx_SET (VOIDmode, val, x));
+
+      x = gen_rtx_NOT (mode, val);
+    }
+  else
+    x = gen_rtx_fmt_ee (code, mode, before, val);
+  if (after)
+    emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  emit_insn (gen_memory_barrier ());
+}
+
+/* Expand a compare and swap operation.  */
+
+void
+alpha_split_compare_and_swap (rtx retval, rtx mem, rtx oldval, rtx newval,
+			      rtx scratch)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label1, label2, x, cond = gen_lowpart (DImode, scratch);
+
+  emit_insn (gen_memory_barrier ());
+
+  label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label1, 0));
+
+  emit_load_locked (mode, retval, mem);
+
+  x = gen_lowpart (DImode, retval);
+  if (oldval == const0_rtx)
+    x = gen_rtx_NE (DImode, x, const0_rtx);
+  else
+    {
+      x = gen_rtx_EQ (DImode, x, oldval);
+      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+    }
+  emit_unlikely_jump (x, label2);
+
+  emit_move_insn (scratch, newval);
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label1);
+
+  emit_insn (gen_memory_barrier ());
+  emit_label (XEXP (label2, 0));
+}
+
+void
+alpha_expand_compare_and_swap_12 (rtx dst, rtx mem, rtx oldval, rtx newval)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx addr, align, wdst;
+  rtx (*fn5) (rtx, rtx, rtx, rtx, rtx);
+
+  addr = force_reg (DImode, XEXP (mem, 0));
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
+			       NULL_RTX, 1, OPTAB_DIRECT);
+
+  oldval = convert_modes (DImode, mode, oldval, 1);
+  newval = emit_insxl (mode, newval, addr);
+
+  wdst = gen_reg_rtx (DImode);
+  if (mode == QImode)
+    fn5 = gen_sync_compare_and_swapqi_1;
+  else
+    fn5 = gen_sync_compare_and_swaphi_1;
+  emit_insn (fn5 (wdst, addr, oldval, newval, align));
+
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+alpha_split_compare_and_swap_12 (enum machine_mode mode, rtx dest, rtx addr,
+				 rtx oldval, rtx newval, rtx align,
+				 rtx scratch, rtx cond)
+{
+  rtx label1, label2, mem, width, mask, x;
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = 1;
+
+  emit_insn (gen_memory_barrier ());
+  label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label1, 0));
+
+  emit_load_locked (DImode, scratch, mem);
+  
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_extxl_be (dest, scratch, width, addr));
+  else
+    emit_insn (gen_extxl_le (dest, scratch, width, addr));
+
+  if (oldval == const0_rtx)
+    x = gen_rtx_NE (DImode, dest, const0_rtx);
+  else
+    {
+      x = gen_rtx_EQ (DImode, dest, oldval);
+      emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+    }
+  emit_unlikely_jump (x, label2);
+
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_mskxl_be (scratch, scratch, mask, addr));
+  else
+    emit_insn (gen_mskxl_le (scratch, scratch, mask, addr));
+  emit_insn (gen_iordi3 (scratch, scratch, newval));
+
+  emit_store_conditional (DImode, scratch, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
+  emit_unlikely_jump (x, label1);
+
+  emit_insn (gen_memory_barrier ());
+  emit_label (XEXP (label2, 0));
+}
+
+/* Expand an atomic exchange operation.  */
+
+void
+alpha_split_lock_test_and_set (rtx retval, rtx mem, rtx val, rtx scratch)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond = gen_lowpart (DImode, scratch);
+
+  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_load_locked (mode, retval, mem);
+  emit_move_insn (scratch, val);
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  emit_insn (gen_memory_barrier ());
+}
+
+void
+alpha_expand_lock_test_and_set_12 (rtx dst, rtx mem, rtx val)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx addr, align, wdst;
+  rtx (*fn4) (rtx, rtx, rtx, rtx);
+
+  /* Force the address into a register.  */
+  addr = force_reg (DImode, XEXP (mem, 0));
+
+  /* Align it to a multiple of 8.  */
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
+			       NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Insert val into the correct byte location within the word.  */
+  val = emit_insxl (mode, val, addr);
+
+  wdst = gen_reg_rtx (DImode);
+  if (mode == QImode)
+    fn4 = gen_sync_lock_test_and_setqi_1;
+  else
+    fn4 = gen_sync_lock_test_and_sethi_1;
+  emit_insn (fn4 (wdst, addr, val, align));
+
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+alpha_split_lock_test_and_set_12 (enum machine_mode mode, rtx dest, rtx addr,
+				  rtx val, rtx align, rtx scratch)
+{
+  rtx label, mem, width, mask, x;
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = 1;
+
+  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_load_locked (DImode, scratch, mem);
+  
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  if (WORDS_BIG_ENDIAN)
+    {
+      emit_insn (gen_extxl_be (dest, scratch, width, addr));
+      emit_insn (gen_mskxl_be (scratch, scratch, mask, addr));
+    }
+  else
+    {
+      emit_insn (gen_extxl_le (dest, scratch, width, addr));
+      emit_insn (gen_mskxl_le (scratch, scratch, mask, addr));
+    }
+  emit_insn (gen_iordi3 (scratch, scratch, val));
+
+  emit_store_conditional (DImode, scratch, mem, scratch);
+
+  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  emit_insn (gen_memory_barrier ());
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type dep_insn_type;
+
+  /* If the dependence is an anti-dependence, there is no cost.  For an
+     output dependence, there is sometimes a cost, but it doesn't seem
+     worth handling those few cases.  */
+  if (REG_NOTE_KIND (link) != 0)
+    return cost;
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
+
+  dep_insn_type = get_attr_type (dep_insn);
+
+  /* Bring in the user-defined memory latency.  */
+  if (dep_insn_type == TYPE_ILD
+      || dep_insn_type == TYPE_FLD
+      || dep_insn_type == TYPE_LDSYM)
+    cost += alpha_memory_latency-1;
+
+  /* Everything else handled in DFA bypasses now.  */
+
+  return cost;
+}
+
+/* The number of instructions that can be issued per cycle.  */
+
+static int
+alpha_issue_rate (void)
+{
+  return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
+}
+
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.
+
+   For EV4, loads can be issued to either IB0 or IB1, thus we have 2
+   alternative schedules.  For EV5, we can choose between E0/E1 and
+   FA/FM.  For EV6, an arithmetic insn can be issued to U0/U1/L0/L1.  */
+
+static int
+alpha_multipass_dfa_lookahead (void)
+{
+  return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
+}
+
+/* Machine-specific function data.  */
+
+struct GTY(()) machine_function
+{
+  /* For unicosmk.  */
+  /* List of call information words for calls from this function.  */
+  struct rtx_def *first_ciw;
+  struct rtx_def *last_ciw;
+  int ciw_count;
+
+  /* List of deferred case vectors.  */
+  struct rtx_def *addr_list;
+
+  /* For OSF.  */
+  const char *some_ld_name;
+
+  /* For TARGET_LD_BUGGY_LDGP.  */
+  struct rtx_def *gp_save_rtx;
+
+  /* For VMS condition handlers.  */
+  bool uses_condition_handler;  
+};
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+alpha_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Support for frame based VMS condition handlers.  */
+
+/* A VMS condition handler may be established for a function with a call to
+   __builtin_establish_vms_condition_handler, and cancelled with a call to
+   __builtin_revert_vms_condition_handler.
+
+   The VMS Condition Handling Facility knows about the existence of a handler
+   from the procedure descriptor .handler field.  As the VMS native compilers,
+   we store the user specified handler's address at a fixed location in the
+   stack frame and point the procedure descriptor at a common wrapper which
+   fetches the real handler's address and issues an indirect call.
+
+   The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
+
+   We force the procedure kind to PT_STACK, and the fixed frame location is
+   fp+8, just before the register save area. We use the handler_data field in
+   the procedure descriptor to state the fp offset at which the installed
+   handler address can be found.  */
+
+#define VMS_COND_HANDLER_FP_OFFSET 8
+
+/* Expand code to store the currently installed user VMS condition handler
+   into TARGET and install HANDLER as the new condition handler.  */
+
+void
+alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
+{
+  rtx handler_slot_address
+    = plus_constant (hard_frame_pointer_rtx, VMS_COND_HANDLER_FP_OFFSET);
+
+  rtx handler_slot
+    = gen_rtx_MEM (DImode, handler_slot_address);
+
+  emit_move_insn (target, handler_slot);
+  emit_move_insn (handler_slot, handler);
+
+  /* Notify the start/prologue/epilogue emitters that the condition handler
+     slot is needed.  In addition to reserving the slot space, this will force
+     the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
+     use above is correct.  */
+  cfun->machine->uses_condition_handler = true;
+}
+
+/* Expand code to store the current VMS condition handler into TARGET and
+   nullify it.  */
+
+void
+alpha_expand_builtin_revert_vms_condition_handler (rtx target)
+{
+  /* We implement this by establishing a null condition handler, with the tiny
+     side effect of setting uses_condition_handler.  This is a little bit
+     pessimistic if no actual builtin_establish call is ever issued, which is
+     not a real problem and expected never to happen anyway.  */
+
+  alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
+}
+
+/* Functions to save and restore alpha_return_addr_rtx.  */
+
+/* Start the ball rolling with RETURN_ADDR_RTX.  */
+
+rtx
+alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, REG_RA);
+}
+
+/* Return or create a memory slot containing the gp value for the current
+   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
+
+rtx
+alpha_gp_save_rtx (void)
+{
+  rtx seq, m = cfun->machine->gp_save_rtx;
+
+  if (m == NULL)
+    {
+      start_sequence ();
+
+      m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
+      m = validize_mem (m);
+      emit_move_insn (m, pic_offset_table_rtx);
+
+      seq = get_insns ();
+      end_sequence ();
+
+      /* We used to simply emit the sequence after entry_of_function.
+	 However this breaks the CFG if the first instruction in the
+	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
+	 label.  Emit the sequence properly on the edge.  We are only
+	 invoked from dw2_build_landing_pads and finish_eh_generation
+	 will call commit_edge_insertions thanks to a kludge.  */
+      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
+
+      cfun->machine->gp_save_rtx = m;
+    }
+
+  return m;
+}
+
+static void
+alpha_instantiate_decls (void)
+{
+  if (cfun->machine->gp_save_rtx != NULL_RTX)
+    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
+}
+
+static int
+alpha_ra_ever_killed (void)
+{
+  rtx top;
+
+  if (!has_hard_reg_initial_val (Pmode, REG_RA))
+    return (int)df_regs_ever_live_p (REG_RA);
+
+  push_topmost_sequence ();
+  top = get_insns ();
+  pop_topmost_sequence ();
+
+  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX);
+}
+
+
+/* Return the trap mode suffix applicable to the current
+   instruction, or NULL.  */
+
+static const char *
+get_trap_mode_suffix (void)
+{
+  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case TRAP_SUFFIX_NONE:
+      return NULL;
+
+    case TRAP_SUFFIX_SU:
+      if (alpha_fptm >= ALPHA_FPTM_SU)
+	return "su";
+      return NULL;
+
+    case TRAP_SUFFIX_SUI:
+      if (alpha_fptm >= ALPHA_FPTM_SUI)
+	return "sui";
+      return NULL;
+
+    case TRAP_SUFFIX_V_SV:
+      switch (alpha_fptm)
+	{
+	case ALPHA_FPTM_N:
+	  return NULL;
+	case ALPHA_FPTM_U:
+	  return "v";
+	case ALPHA_FPTM_SU:
+	case ALPHA_FPTM_SUI:
+	  return "sv";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TRAP_SUFFIX_V_SV_SVI:
+      switch (alpha_fptm)
+	{
+	case ALPHA_FPTM_N:
+	  return NULL;
+	case ALPHA_FPTM_U:
+	  return "v";
+	case ALPHA_FPTM_SU:
+	  return "sv";
+	case ALPHA_FPTM_SUI:
+	  return "svi";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case TRAP_SUFFIX_U_SU_SUI:
+      switch (alpha_fptm)
+	{
+	case ALPHA_FPTM_N:
+	  return NULL;
+	case ALPHA_FPTM_U:
+	  return "u";
+	case ALPHA_FPTM_SU:
+	  return "su";
+	case ALPHA_FPTM_SUI:
+	  return "sui";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+      
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Return the rounding mode suffix applicable to the current
+   instruction, or NULL.  */
+
+static const char *
+get_round_mode_suffix (void)
+{
+  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case ROUND_SUFFIX_NONE:
+      return NULL;
+    case ROUND_SUFFIX_NORMAL:
+      switch (alpha_fprm)
+	{
+	case ALPHA_FPRM_NORM:
+	  return NULL;
+	case ALPHA_FPRM_MINF:
+	  return "m";
+	case ALPHA_FPRM_CHOP:
+	  return "c";
+	case ALPHA_FPRM_DYN:
+	  return "d";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case ROUND_SUFFIX_C:
+      return "c";
+      
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in some movdi_er_tlsldm pattern.  */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+/* Print an operand.  Recognize special options, documented below.  */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  int i;
+
+  switch (code)
+    {
+    case '~':
+      /* Print the assembler name of the current function.  */
+      assemble_name (file, alpha_fnname);
+      break;
+
+    case '&':
+      assemble_name (file, get_some_local_dynamic_name ());
+      break;
+
+    case '/':
+      {
+	const char *trap = get_trap_mode_suffix ();
+	const char *round = get_round_mode_suffix ();
+
+	if (trap || round)
+	  fprintf (file, (TARGET_AS_SLASH_BEFORE_SUFFIX ? "/%s%s" : "%s%s"),
+		   (trap ? trap : ""), (round ? round : ""));
+	break;
+      }
+
+    case ',':
+      /* Generates single precision instruction suffix.  */
+      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
+      break;
+
+    case '-':
+      /* Generates double precision instruction suffix.  */
+      fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
+      break;
+
+    case '#':
+      if (alpha_this_literal_sequence_number == 0)
+	alpha_this_literal_sequence_number = alpha_next_sequence_number++;
+      fprintf (file, "%d", alpha_this_literal_sequence_number);
+      break;
+
+    case '*':
+      if (alpha_this_gpdisp_sequence_number == 0)
+	alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
+      fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
+      break;
+
+    case 'H':
+      if (GET_CODE (x) == HIGH)
+	output_addr_const (file, XEXP (x, 0));
+      else
+	output_operand_lossage ("invalid %%H value");
+      break;
+
+    case 'J':
+      {
+	const char *lituse;
+
+        if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
+	  {
+	    x = XVECEXP (x, 0, 0);
+	    lituse = "lituse_tlsgd";
+	  }
+	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
+	  {
+	    x = XVECEXP (x, 0, 0);
+	    lituse = "lituse_tlsldm";
+	  }
+	else if (CONST_INT_P (x))
+	  lituse = "lituse_jsr";
+	else
+	  {
+	    output_operand_lossage ("invalid %%J value");
+	    break;
+	  }
+
+	if (x != const0_rtx)
+	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
+      }
+      break;
+
+    case 'j':
+      {
+	const char *lituse;
+
+#ifdef HAVE_AS_JSRDIRECT_RELOCS
+	lituse = "lituse_jsrdirect";
+#else
+	lituse = "lituse_jsr";
+#endif
+
+	gcc_assert (INTVAL (x) != 0);
+	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
+      }
+      break;
+    case 'r':
+      /* If this operand is the constant zero, write it as "$31".  */
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fprintf (file, "$31");
+      else
+	output_operand_lossage ("invalid %%r value");
+      break;
+
+    case 'R':
+      /* Similar, but for floating-point.  */
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fprintf (file, "$f31");
+      else
+	output_operand_lossage ("invalid %%R value");
+      break;
+
+    case 'N':
+      /* Write the 1's complement of a constant.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%N value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
+      break;
+
+    case 'P':
+      /* Write 1 << C, for a constant C.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%P value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
+      break;
+
+    case 'h':
+      /* Write the high-order 16 bits of a constant, sign-extended.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%h value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
+      break;
+
+    case 'L':
+      /* Write the low-order 16 bits of a constant, sign-extended.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%L value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
+      break;
+
+    case 'm':
+      /* Write mask for ZAP insn.  */
+      if (GET_CODE (x) == CONST_DOUBLE)
+	{
+	  HOST_WIDE_INT mask = 0;
+	  HOST_WIDE_INT value;
+
+	  value = CONST_DOUBLE_LOW (x);
+	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
+	       i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << i);
+
+	  value = CONST_DOUBLE_HIGH (x);
+	  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
+	       i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << (i + sizeof (int)));
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
+	}
+
+      else if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
+
+	  for (i = 0; i < 8; i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << i);
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
+	}
+      else
+	output_operand_lossage ("invalid %%m value");
+      break;
+
+    case 'M':
+      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
+      if (!CONST_INT_P (x)
+	  || (INTVAL (x) != 8 && INTVAL (x) != 16
+	      && INTVAL (x) != 32 && INTVAL (x) != 64))
+	output_operand_lossage ("invalid %%M value");
+
+      fprintf (file, "%s",
+	       (INTVAL (x) == 8 ? "b"
+		: INTVAL (x) == 16 ? "w"
+		: INTVAL (x) == 32 ? "l"
+		: "q"));
+      break;
+
+    case 'U':
+      /* Similar, except do it from the mask.  */
+      if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT value = INTVAL (x);
+
+	  if (value == 0xff)
+	    {
+	      fputc ('b', file);
+	      break;
+	    }
+	  if (value == 0xffff)
+	    {
+	      fputc ('w', file);
+	      break;
+	    }
+	  if (value == 0xffffffff)
+	    {
+	      fputc ('l', file);
+	      break;
+	    }
+	  if (value == -1)
+	    {
+	      fputc ('q', file);
+	      break;
+	    }
+	}
+      else if (HOST_BITS_PER_WIDE_INT == 32
+	       && GET_CODE (x) == CONST_DOUBLE
+	       && CONST_DOUBLE_LOW (x) == 0xffffffff
+	       && CONST_DOUBLE_HIGH (x) == 0)
+	{
+	  fputc ('l', file);
+	  break;
+	}
+      output_operand_lossage ("invalid %%U value");
+      break;
+
+    case 's':
+      /* Write the constant value divided by 8 for little-endian mode or
+	 (56 - value) / 8 for big-endian mode.  */
+
+      if (!CONST_INT_P (x)
+	  || (unsigned HOST_WIDE_INT) INTVAL (x) >= (WORDS_BIG_ENDIAN
+						     ? 56
+						     : 64)
+	  || (INTVAL (x) & 7) != 0)
+	output_operand_lossage ("invalid %%s value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	       WORDS_BIG_ENDIAN
+	       ? (56 - INTVAL (x)) / 8
+	       : INTVAL (x) / 8);
+      break;
+
+    case 'S':
+      /* Same, except compute (64 - c) / 8 */
+
+      if (!CONST_INT_P (x)
+	  && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
+	  && (INTVAL (x) & 7) != 8)
+	output_operand_lossage ("invalid %%s value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
+      break;
+
+    case 't':
+      {
+        /* On Unicos/Mk systems: use a DEX expression if the symbol
+	   clashes with a register name.  */
+	int dex = unicosmk_need_dex (x);
+	if (dex)
+	  fprintf (file, "DEX(%d)", dex);
+	else
+	  output_addr_const (file, x);
+      }
+      break;
+
+    case 'C': case 'D': case 'c': case 'd':
+      /* Write out comparison name.  */
+      {
+	enum rtx_code c = GET_CODE (x);
+
+        if (!COMPARISON_P (x))
+	  output_operand_lossage ("invalid %%C value");
+
+	else if (code == 'D')
+	  c = reverse_condition (c);
+	else if (code == 'c')
+	  c = swap_condition (c);
+	else if (code == 'd')
+	  c = swap_condition (reverse_condition (c));
+
+        if (c == LEU)
+	  fprintf (file, "ule");
+        else if (c == LTU)
+	  fprintf (file, "ult");
+	else if (c == UNORDERED)
+	  fprintf (file, "un");
+        else
+	  fprintf (file, "%s", GET_RTX_NAME (c));
+      }
+      break;
+
+    case 'E':
+      /* Write the divide or modulus operator.  */
+      switch (GET_CODE (x))
+	{
+	case DIV:
+	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	case UDIV:
+	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	case MOD:
+	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	case UMOD:
+	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%E value");
+	  break;
+	}
+      break;
+
+    case 'A':
+      /* Write "_u" for unaligned access.  */
+      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
+	fprintf (file, "_u");
+      break;
+
+    case 0:
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (MEM_P (x))
+	output_address (XEXP (x, 0));
+      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
+	{
+	  switch (XINT (XEXP (x, 0), 1))
+	    {
+	    case UNSPEC_DTPREL:
+	    case UNSPEC_TPREL:
+	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
+	      break;
+	    default:
+	      output_operand_lossage ("unknown relocation unspec");
+	      break;
+	    }
+	}
+      else
+	output_addr_const (file, x);
+      break;
+
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+}
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  int basereg = 31;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (addr) == AND)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS
+      && CONST_INT_P (XEXP (addr, 1)))
+    {
+      offset = INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  if (GET_CODE (addr) == LO_SUM)
+    {
+      const char *reloc16, *reloclo;
+      rtx op1 = XEXP (addr, 1);
+
+      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
+	{
+	  op1 = XEXP (op1, 0);
+	  switch (XINT (op1, 1))
+	    {
+	    case UNSPEC_DTPREL:
+	      reloc16 = NULL;
+	      reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
+	      break;
+	    case UNSPEC_TPREL:
+	      reloc16 = NULL;
+	      reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
+	      break;
+	    default:
+	      output_operand_lossage ("unknown relocation unspec");
+	      return;
+	    }
+
+	  output_addr_const (file, XVECEXP (op1, 0, 0));
+	}
+      else
+	{
+	  reloc16 = "gprel";
+	  reloclo = "gprellow";
+	  output_addr_const (file, op1);
+	}
+
+      if (offset)
+	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
+
+      addr = XEXP (addr, 0);
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	  basereg = REGNO (addr);
+	  break;
+
+	case SUBREG:
+	  basereg = subreg_regno (addr);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      fprintf (file, "($%d)\t\t!%s", basereg,
+	       (basereg == 29 ? reloc16 : reloclo));
+      return;
+    }
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      basereg = REGNO (addr);
+      break;
+
+    case SUBREG:
+      basereg = subreg_regno (addr);
+      break;
+
+    case CONST_INT:
+      offset = INTVAL (addr);
+      break;
+
+#if TARGET_ABI_OPEN_VMS
+    case SYMBOL_REF:
+      fprintf (file, "%s", XSTR (addr, 0));
+      return;
+
+    case CONST:
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
+      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
+	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
+	       INTVAL (XEXP (XEXP (addr, 0), 1)));
+      return;
+    
+#endif
+    default:
+      gcc_unreachable ();
+    }
+
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline at
+   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
+   for the static chain value for the function.  */
+
+static void
+alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr, mem, word1, word2;
+
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+#ifdef POINTERS_EXTEND_UNSIGNED
+  fnaddr = convert_memory_address (Pmode, fnaddr);
+  chain_value = convert_memory_address (Pmode, chain_value);
+#endif
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      const char *fnname;
+      char *trname;
+
+      /* Construct the name of the trampoline entry point.  */
+      fnname = XSTR (fnaddr, 0);
+      trname = (char *) alloca (strlen (fnname) + 5);
+      strcpy (trname, fnname);
+      strcat (trname, "..tr");
+      fnname = ggc_alloc_string (trname, strlen (trname) + 1);
+      word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
+
+      /* Trampoline (or "bounded") procedure descriptor is constructed from
+	 the function's procedure descriptor with certain fields zeroed IAW
+	 the VMS calling standard. This is stored in the first quadword.  */
+      word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
+      word1 = expand_and (DImode, word1, GEN_INT (0xffff0fff0000fff0), NULL);
+    }
+  else
+    {
+      /* These 4 instructions are:
+	    ldq $1,24($27)
+	    ldq $27,16($27)
+	    jmp $31,($27),0
+	    nop
+	 We don't bother setting the HINT field of the jump; the nop
+	 is merely there for padding.  */
+      word1 = GEN_INT (0xa77b0010a43b0018);
+      word2 = GEN_INT (0x47ff041f6bfb0000);
+    }
+
+  /* Store the first two words, as computed above.  */
+  mem = adjust_address (m_tramp, DImode, 0);
+  emit_move_insn (mem, word1);
+  mem = adjust_address (m_tramp, DImode, 8);
+  emit_move_insn (mem, word2);
+
+  /* Store function address and static chain value.  */
+  mem = adjust_address (m_tramp, Pmode, 16);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, Pmode, 24);
+  emit_move_insn (mem, chain_value);
+
+  if (!TARGET_ABI_OPEN_VMS)
+    {
+      emit_insn (gen_imb ());
+#ifdef ENABLE_EXECUTE_STACK
+      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
+			 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+    }
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On Alpha the first 6 words of args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+alpha_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int basereg;
+  int num_args;
+
+  /* Don't get confused and pass small structures in FP registers.  */
+  if (type && AGGREGATE_TYPE_P (type))
+    basereg = 16;
+  else
+    {
+#ifdef ENABLE_CHECKING
+      /* With alpha_split_complex_arg, we shouldn't see any raw complex
+	 values here.  */
+      gcc_assert (!COMPLEX_MODE_P (mode));
+#endif
+
+      /* Set up defaults for FP operands passed in FP registers, and
+	 integral operands passed in integer registers.  */
+      if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
+	basereg = 32 + 16;
+      else
+	basereg = 16;
+    }
+
+  /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
+     the two platforms, so we can't avoid conditional compilation.  */
+#if TARGET_ABI_OPEN_VMS
+    {
+      if (mode == VOIDmode)
+	return alpha_arg_info_reg_val (*cum);
+
+      num_args = cum->num_args;
+      if (num_args >= 6
+	  || targetm.calls.must_pass_in_stack (mode, type))
+	return NULL_RTX;
+    }
+#elif TARGET_ABI_OSF
+    {
+      if (*cum >= 6)
+	return NULL_RTX;
+      num_args = *cum;
+
+      /* VOID is passed as a special flag for "last argument".  */
+      if (type == void_type_node)
+	basereg = 16;
+      else if (targetm.calls.must_pass_in_stack (mode, type))
+	return NULL_RTX;
+    }
+#else
+#error Unhandled ABI
+#endif
+
+  return gen_rtx_REG (mode, num_args + basereg);
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+alpha_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  bool onstack = targetm.calls.must_pass_in_stack (mode, type);
+  int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
+
+#if TARGET_ABI_OSF
+  *cum += increment;
+#else
+  if (!onstack && cum->num_args < 6)
+    cum->atypes[cum->num_args] = alpha_arg_type (mode);
+  cum->num_args += increment;
+#endif
+}
+
+static int
+alpha_arg_partial_bytes (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED,
+			 tree type ATTRIBUTE_UNUSED,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  int words = 0;
+
+#if TARGET_ABI_OPEN_VMS
+  if (cum->num_args < 6
+      && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
+    words = 6 - cum->num_args;
+#elif TARGET_ABI_UNICOSMK
+  /* Never any split arguments.  */
+#elif TARGET_ABI_OSF
+  if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
+    words = 6 - *cum;
+#else
+#error Unhandled ABI
+#endif
+
+  return words * UNITS_PER_WORD;
+}
+
+
+/* Return true if TYPE must be returned in memory, instead of in registers.  */
+
+static bool
+alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = VOIDmode;
+  int size;
+
+  if (type)
+    {
+      mode = TYPE_MODE (type);
+
+      /* All aggregates are returned in memory, except on OpenVMS where
+	 records that fit 64 bits should be returned by immediate value
+	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
+      if (TARGET_ABI_OPEN_VMS
+	  && TREE_CODE (type) != ARRAY_TYPE
+	  && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
+	return false;
+
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+    }
+
+  size = GET_MODE_SIZE (mode);
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_VECTOR_FLOAT:
+      /* Pass all float vectors in memory, like an aggregate.  */
+      return true;
+
+    case MODE_COMPLEX_FLOAT:
+      /* We judge complex floats on the size of their element,
+	 not the size of the whole type.  */
+      size = GET_MODE_UNIT_SIZE (mode);
+      break;
+
+    case MODE_INT:
+    case MODE_FLOAT:
+    case MODE_COMPLEX_INT:
+    case MODE_VECTOR_INT:
+      break;
+
+    default:
+      /* ??? We get called on all sorts of random stuff from
+	 aggregate_value_p.  We must return something, but it's not
+	 clear what's safe to return.  Pretend it's a struct I
+	 guess.  */
+      return true;
+    }
+
+  /* Otherwise types must fit in one register.  */
+  return size > UNITS_PER_WORD;
+}
+
+/* Return true if TYPE should be passed by invisible reference.  */
+
+static bool
+alpha_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
+			 enum machine_mode mode,
+			 const_tree type ATTRIBUTE_UNUSED,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  return mode == TFmode || mode == TCmode;
+}
+
+/* Define how to find the value returned by a function.  VALTYPE is the
+   data type of the value (as a tree).  If the precise function being
+   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
+   MODE is set instead of VALTYPE for libcalls.
+
+   On Alpha the value is found in $0 for integer functions and
+   $f0 for floating-point functions.  */
+
+rtx
+function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+		enum machine_mode mode)
+{
+  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
+  enum mode_class mclass;
+
+  gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
+
+  if (valtype)
+    mode = TYPE_MODE (valtype);
+
+  mclass = GET_MODE_CLASS (mode);
+  switch (mclass)
+    {
+    case MODE_INT:
+      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
+	 where we have them returning both SImode and DImode.  */
+      if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
+        PROMOTE_MODE (mode, dummy, valtype);
+      /* FALLTHRU */
+
+    case MODE_COMPLEX_INT:
+    case MODE_VECTOR_INT:
+      regnum = 0;
+      break;
+
+    case MODE_FLOAT:
+      regnum = 32;
+      break;
+
+    case MODE_COMPLEX_FLOAT:
+      {
+	enum machine_mode cmode = GET_MODE_INNER (mode);
+
+	return gen_rtx_PARALLEL
+	  (VOIDmode,
+	   gen_rtvec (2,
+		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
+				         const0_rtx),
+		      gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
+				         GEN_INT (GET_MODE_SIZE (cmode)))));
+      }
+
+    case MODE_RANDOM:
+      /* We should only reach here for BLKmode on VMS.  */
+      gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
+      regnum = 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_REG (mode, regnum);
+}
+
+/* TCmode complex values are passed by invisible reference.  We
+   should not split these values.  */
+
+static bool
+alpha_split_complex_arg (const_tree type)
+{
+  return TYPE_MODE (type) != TCmode;
+}
+
+static tree
+alpha_build_builtin_va_list (void)
+{
+  tree base, ofs, space, record, type_decl;
+
+  if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK)
+    return ptr_type_node;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+
+  /* C++? SET_IS_AGGR_TYPE (record, 1); */
+
+  /* Dummy field to prevent alignment warnings.  */
+  space = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, NULL_TREE, integer_type_node);
+  DECL_FIELD_CONTEXT (space) = record;
+  DECL_ARTIFICIAL (space) = 1;
+  DECL_IGNORED_P (space) = 1;
+
+  ofs = build_decl (BUILTINS_LOCATION,
+		    FIELD_DECL, get_identifier ("__offset"),
+		    integer_type_node);
+  DECL_FIELD_CONTEXT (ofs) = record;
+  DECL_CHAIN (ofs) = space;
+  /* ??? This is a hack, __offset is marked volatile to prevent
+     DCE that confuses stdarg optimization and results in
+     gcc.c-torture/execute/stdarg-1.c failure.  See PR 41089.  */
+  TREE_THIS_VOLATILE (ofs) = 1;
+
+  base = build_decl (BUILTINS_LOCATION,
+		     FIELD_DECL, get_identifier ("__base"),
+		     ptr_type_node);
+  DECL_FIELD_CONTEXT (base) = record;
+  DECL_CHAIN (base) = ofs;
+
+  TYPE_FIELDS (record) = base;
+  layout_type (record);
+
+  va_list_gpr_counter_field = ofs;
+  return record;
+}
+
+#if TARGET_ABI_OSF
+/* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
+   and constant additions.  */
+
+static gimple
+va_list_skip_additions (tree lhs)
+{
+  gimple stmt;
+
+  for (;;)
+    {
+      enum tree_code code;
+
+      stmt = SSA_NAME_DEF_STMT (lhs);
+
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	return stmt;
+
+      if (!is_gimple_assign (stmt)
+	  || gimple_assign_lhs (stmt) != lhs)
+	return NULL;
+
+      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
+	return stmt;
+      code = gimple_assign_rhs_code (stmt);
+      if (!CONVERT_EXPR_CODE_P (code)
+	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
+	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
+	      || !host_integerp (gimple_assign_rhs2 (stmt), 1)))
+	return stmt;
+
+      lhs = gimple_assign_rhs1 (stmt);
+    }
+}
+
+/* Check if LHS = RHS statement is
+   LHS = *(ap.__base + ap.__offset + cst)
+   or
+   LHS = *(ap.__base
+	   + ((ap.__offset + cst <= 47)
+	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
+   If the former, indicate that GPR registers are needed,
+   if the latter, indicate that FPR registers are needed.
+
+   Also look for LHS = (*ptr).field, where ptr is one of the forms
+   listed above.
+
+   On alpha, cfun->va_list_gpr_size is used as size of the needed
+   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
+   registers are needed and bit 1 set if FPR registers are needed.
+   Return true if va_list references should not be scanned for the
+   current statement.  */
+
+static bool
+alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
+{
+  tree base, offset, rhs;
+  int offset_arg = 1;
+  gimple base_stmt;
+
+  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+      != GIMPLE_SINGLE_RHS)
+    return false;
+
+  rhs = gimple_assign_rhs1 (stmt);
+  while (handled_component_p (rhs))
+    rhs = TREE_OPERAND (rhs, 0);
+  if (TREE_CODE (rhs) != MEM_REF
+      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
+    return false;
+
+  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
+  if (stmt == NULL
+      || !is_gimple_assign (stmt)
+      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
+    return false;
+
+  base = gimple_assign_rhs1 (stmt);
+  if (TREE_CODE (base) == SSA_NAME)
+    {
+      base_stmt = va_list_skip_additions (base);
+      if (base_stmt
+	  && is_gimple_assign (base_stmt)
+	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	base = gimple_assign_rhs1 (base_stmt);
+    }
+
+  if (TREE_CODE (base) != COMPONENT_REF
+      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
+    {
+      base = gimple_assign_rhs2 (stmt);
+      if (TREE_CODE (base) == SSA_NAME)
+	{
+	  base_stmt = va_list_skip_additions (base);
+	  if (base_stmt
+	      && is_gimple_assign (base_stmt)
+	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	    base = gimple_assign_rhs1 (base_stmt);
+	}
+
+      if (TREE_CODE (base) != COMPONENT_REF
+	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
+	return false;
+
+      offset_arg = 0;
+    }
+
+  base = get_base_address (base);
+  if (TREE_CODE (base) != VAR_DECL
+      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base)))
+    return false;
+
+  offset = gimple_op (stmt, 1 + offset_arg);
+  if (TREE_CODE (offset) == SSA_NAME)
+    {
+      gimple offset_stmt = va_list_skip_additions (offset);
+
+      if (offset_stmt
+	  && gimple_code (offset_stmt) == GIMPLE_PHI)
+	{
+	  HOST_WIDE_INT sub;
+	  gimple arg1_stmt, arg2_stmt;
+	  tree arg1, arg2;
+	  enum tree_code code1, code2;
+
+	  if (gimple_phi_num_args (offset_stmt) != 2)
+	    goto escapes;
+
+	  arg1_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
+	  arg2_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
+	  if (arg1_stmt == NULL
+	      || !is_gimple_assign (arg1_stmt)
+	      || arg2_stmt == NULL
+	      || !is_gimple_assign (arg2_stmt))
+	    goto escapes;
+
+	  code1 = gimple_assign_rhs_code (arg1_stmt);
+	  code2 = gimple_assign_rhs_code (arg2_stmt);
+	  if (code1 == COMPONENT_REF
+	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
+	    /* Do nothing.  */;
+	  else if (code2 == COMPONENT_REF
+		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
+	    {
+	      gimple tem = arg1_stmt;
+	      code2 = code1;
+	      arg1_stmt = arg2_stmt;
+	      arg2_stmt = tem;
+	    }
+	  else
+	    goto escapes;
+
+	  if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0))
+	    goto escapes;
+
+	  sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0);
+	  if (code2 == MINUS_EXPR)
+	    sub = -sub;
+	  if (sub < -48 || sub > -32)
+	    goto escapes;
+
+	  arg1 = gimple_assign_rhs1 (arg1_stmt);
+	  arg2 = gimple_assign_rhs1 (arg2_stmt);
+	  if (TREE_CODE (arg2) == SSA_NAME)
+	    {
+	      arg2_stmt = va_list_skip_additions (arg2);
+	      if (arg2_stmt == NULL
+		  || !is_gimple_assign (arg2_stmt)
+		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
+		goto escapes;
+	      arg2 = gimple_assign_rhs1 (arg2_stmt);
+	    }
+	  if (arg1 != arg2)
+	    goto escapes;
+
+	  if (TREE_CODE (arg1) != COMPONENT_REF
+	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
+	      || get_base_address (arg1) != base)
+	    goto escapes;
+
+	  /* Need floating point regs.  */
+	  cfun->va_list_fpr_size |= 2;
+	  return false;
+	}
+      if (offset_stmt
+	  && is_gimple_assign (offset_stmt)
+	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
+	offset = gimple_assign_rhs1 (offset_stmt);
+    }
+  if (TREE_CODE (offset) != COMPONENT_REF
+      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
+      || get_base_address (offset) != base)
+    goto escapes;
+  else
+    /* Need general regs.  */
+    cfun->va_list_fpr_size |= 1;
+  return false;
+
+escapes:
+  si->va_list_escapes = true;
+  return false;
+}
+#endif
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+static void
+alpha_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			      tree type, int *pretend_size, int no_rtl)
+{
+  CUMULATIVE_ARGS cum = *pcum;
+
+  /* Skip the current argument.  */
+  targetm.calls.function_arg_advance (&cum, mode, type, true);
+
+#if TARGET_ABI_UNICOSMK
+  /* On Unicos/Mk, the standard subroutine __T3E_MISMATCH stores all register
+     arguments on the stack. Unfortunately, it doesn't always store the first
+     one (i.e. the one that arrives in $16 or $f16). This is not a problem
+     with stdargs as we always have at least one named argument there.  */
+  if (cum.num_reg_words < 6)
+    {
+      if (!no_rtl)
+	{
+	  emit_insn (gen_umk_mismatch_args (GEN_INT (cum.num_reg_words)));
+	  emit_insn (gen_arg_home_umk ());
+	}
+      *pretend_size = 0;
+    }
+#elif TARGET_ABI_OPEN_VMS
+  /* For VMS, we allocate space for all 6 arg registers plus a count.
+
+     However, if NO registers need to be saved, don't allocate any space.
+     This is not only because we won't need the space, but because AP
+     includes the current_pretend_args_size and we don't want to mess up
+     any ap-relative addresses already made.  */
+  if (cum.num_args < 6)
+    {
+      if (!no_rtl)
+	{
+	  emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
+	  emit_insn (gen_arg_home ());
+	}
+      *pretend_size = 7 * UNITS_PER_WORD;
+    }
+#else
+  /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
+     only push those that are remaining.  However, if NO registers need to
+     be saved, don't allocate any space.  This is not only because we won't
+     need the space, but because AP includes the current_pretend_args_size
+     and we don't want to mess up any ap-relative addresses already made.
+
+     If we are not to use the floating-point registers, save the integer
+     registers where we would put the floating-point registers.  This is
+     not the most efficient way to implement varargs with just one register
+     class, but it isn't worth doing anything more efficient in this rare
+     case.  */
+  if (cum >= 6)
+    return;
+
+  if (!no_rtl)
+    {
+      int count;
+      alias_set_type set = get_varargs_alias_set ();
+      rtx tmp;
+
+      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
+      if (count > 6 - cum)
+	count = 6 - cum;
+
+      /* Detect whether integer registers or floating-point registers
+	 are needed by the detected va_arg statements.  See above for
+	 how these values are computed.  Note that the "escape" value
+	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of 
+	 these bits set.  */
+      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
+
+      if (cfun->va_list_fpr_size & 1)
+	{
+	  tmp = gen_rtx_MEM (BLKmode,
+			     plus_constant (virtual_incoming_args_rtx,
+					    (cum + 6) * UNITS_PER_WORD));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (16 + cum, tmp, count);
+	}
+
+      if (cfun->va_list_fpr_size & 2)
+	{
+	  tmp = gen_rtx_MEM (BLKmode,
+			     plus_constant (virtual_incoming_args_rtx,
+					    cum * UNITS_PER_WORD));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
+	}
+     }
+  *pretend_size = 12 * UNITS_PER_WORD;
+#endif
+}
+
+static void
+alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT offset;
+  tree t, offset_field, base_field;
+
+  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
+    return;
+
+  if (TARGET_ABI_UNICOSMK)
+    std_expand_builtin_va_start (valist, nextarg);
+
+  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
+     up by 48, storing fp arg registers in the first 48 bytes, and the
+     integer arg registers in the next 48 bytes.  This is only done,
+     however, if any integer registers need to be stored.
+
+     If no integer registers need be stored, then we must subtract 48
+     in order to account for the integer arg registers which are counted
+     in argsize above, but which are not actually stored on the stack.
+     Must further be careful here about structures straddling the last
+     integer argument register; that futzes with pretend_args_size,
+     which changes the meaning of AP.  */
+
+  if (NUM_ARGS < 6)
+    offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
+  else
+    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+      t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
+		 size_int (offset + NUM_ARGS * UNITS_PER_WORD));
+      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+  else
+    {
+      base_field = TYPE_FIELDS (TREE_TYPE (valist));
+      offset_field = DECL_CHAIN (base_field);
+
+      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
+			   valist, base_field, NULL_TREE);
+      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
+			     valist, offset_field, NULL_TREE);
+
+      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+      t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
+		  size_int (offset));
+      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+}
+
+static tree
+alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
+			 gimple_seq *pre_p)
+{
+  tree type_size, ptr_type, addend, t, addr;
+  gimple_seq internal_post;
+
+  /* If the type could not be passed in registers, skip the block
+     reserved for the registers.  */
+  if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
+    {
+      t = build_int_cst (TREE_TYPE (offset), 6*8);
+      gimplify_assign (offset,
+		       build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
+		       pre_p);
+    }
+
+  addend = offset;
+  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
+
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      tree real_part, imag_part, real_temp;
+
+      real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
+					   offset, pre_p);
+
+      /* Copy the value into a new temporary, lest the formal temporary
+	 be reused out from under us.  */
+      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+      imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
+					   offset, pre_p);
+
+      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
+    }
+  else if (TREE_CODE (type) == REAL_TYPE)
+    {
+      tree fpaddend, cond, fourtyeight;
+
+      fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
+      fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
+			      addend, fourtyeight);
+      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
+      addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
+			    fpaddend, addend);
+    }
+
+  /* Build the final address and force that value into a temporary.  */
+  addr = build2 (POINTER_PLUS_EXPR, ptr_type, fold_convert (ptr_type, base),
+	         fold_convert (sizetype, addend));
+  internal_post = NULL;
+  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
+  gimple_seq_add_seq (pre_p, internal_post);
+
+  /* Update the offset field.  */
+  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
+  if (type_size == NULL || TREE_OVERFLOW (type_size))
+    t = size_zero_node;
+  else
+    {
+      t = size_binop (PLUS_EXPR, type_size, size_int (7));
+      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
+      t = size_binop (MULT_EXPR, t, size_int (8));
+    }
+  t = fold_convert (TREE_TYPE (offset), t);
+  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
+      		   pre_p);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+static tree
+alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		       gimple_seq *post_p)
+{
+  tree offset_field, base_field, offset, base, t, r;
+  bool indirect;
+
+  if (TARGET_ABI_OPEN_VMS || TARGET_ABI_UNICOSMK)
+    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+  base_field = TYPE_FIELDS (va_list_type_node);
+  offset_field = DECL_CHAIN (base_field);
+  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
+		       valist, base_field, NULL_TREE);
+  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
+			 valist, offset_field, NULL_TREE);
+
+  /* Pull the fields of the structure out into temporaries.  Since we never
+     modify the base field, we can use a formal temporary.  Sign-extend the
+     offset field so that it's the proper width for pointer arithmetic.  */
+  base = get_formal_tmp_var (base_field, pre_p);
+
+  t = fold_convert (lang_hooks.types.type_for_size (64, 0), offset_field);
+  offset = get_initialized_tmp_var (t, pre_p, NULL);
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type_for_mode (type, ptr_mode, true);
+
+  /* Find the value.  Note that this will be a stable indirection, or
+     a composite of stable indirections in the case of complex.  */
+  r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
+
+  /* Stuff the offset temporary back into its field.  */
+  gimplify_assign (unshare_expr (offset_field),
+		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
+
+  if (indirect)
+    r = build_va_arg_indirect_ref (r);
+
+  return r;
+}
+
+/* Builtins.  */
+
+enum alpha_builtin
+{
+  ALPHA_BUILTIN_CMPBGE,
+  ALPHA_BUILTIN_EXTBL,
+  ALPHA_BUILTIN_EXTWL,
+  ALPHA_BUILTIN_EXTLL,
+  ALPHA_BUILTIN_EXTQL,
+  ALPHA_BUILTIN_EXTWH,
+  ALPHA_BUILTIN_EXTLH,
+  ALPHA_BUILTIN_EXTQH,
+  ALPHA_BUILTIN_INSBL,
+  ALPHA_BUILTIN_INSWL,
+  ALPHA_BUILTIN_INSLL,
+  ALPHA_BUILTIN_INSQL,
+  ALPHA_BUILTIN_INSWH,
+  ALPHA_BUILTIN_INSLH,
+  ALPHA_BUILTIN_INSQH,
+  ALPHA_BUILTIN_MSKBL,
+  ALPHA_BUILTIN_MSKWL,
+  ALPHA_BUILTIN_MSKLL,
+  ALPHA_BUILTIN_MSKQL,
+  ALPHA_BUILTIN_MSKWH,
+  ALPHA_BUILTIN_MSKLH,
+  ALPHA_BUILTIN_MSKQH,
+  ALPHA_BUILTIN_UMULH,
+  ALPHA_BUILTIN_ZAP,
+  ALPHA_BUILTIN_ZAPNOT,
+  ALPHA_BUILTIN_AMASK,
+  ALPHA_BUILTIN_IMPLVER,
+  ALPHA_BUILTIN_RPCC,
+  ALPHA_BUILTIN_THREAD_POINTER,
+  ALPHA_BUILTIN_SET_THREAD_POINTER,
+  ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
+  ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
+
+  /* TARGET_MAX */
+  ALPHA_BUILTIN_MINUB8,
+  ALPHA_BUILTIN_MINSB8,
+  ALPHA_BUILTIN_MINUW4,
+  ALPHA_BUILTIN_MINSW4,
+  ALPHA_BUILTIN_MAXUB8,
+  ALPHA_BUILTIN_MAXSB8,
+  ALPHA_BUILTIN_MAXUW4,
+  ALPHA_BUILTIN_MAXSW4,
+  ALPHA_BUILTIN_PERR,
+  ALPHA_BUILTIN_PKLB,
+  ALPHA_BUILTIN_PKWB,
+  ALPHA_BUILTIN_UNPKBL,
+  ALPHA_BUILTIN_UNPKBW,
+
+  /* TARGET_CIX */
+  ALPHA_BUILTIN_CTTZ,
+  ALPHA_BUILTIN_CTLZ,
+  ALPHA_BUILTIN_CTPOP,
+
+  ALPHA_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
+  CODE_FOR_builtin_cmpbge,
+  CODE_FOR_builtin_extbl,
+  CODE_FOR_builtin_extwl,
+  CODE_FOR_builtin_extll,
+  CODE_FOR_builtin_extql,
+  CODE_FOR_builtin_extwh,
+  CODE_FOR_builtin_extlh,
+  CODE_FOR_builtin_extqh,
+  CODE_FOR_builtin_insbl,
+  CODE_FOR_builtin_inswl,
+  CODE_FOR_builtin_insll,
+  CODE_FOR_builtin_insql,
+  CODE_FOR_builtin_inswh,
+  CODE_FOR_builtin_inslh,
+  CODE_FOR_builtin_insqh,
+  CODE_FOR_builtin_mskbl,
+  CODE_FOR_builtin_mskwl,
+  CODE_FOR_builtin_mskll,
+  CODE_FOR_builtin_mskql,
+  CODE_FOR_builtin_mskwh,
+  CODE_FOR_builtin_msklh,
+  CODE_FOR_builtin_mskqh,
+  CODE_FOR_umuldi3_highpart,
+  CODE_FOR_builtin_zap,
+  CODE_FOR_builtin_zapnot,
+  CODE_FOR_builtin_amask,
+  CODE_FOR_builtin_implver,
+  CODE_FOR_builtin_rpcc,
+  CODE_FOR_load_tp,
+  CODE_FOR_set_tp,
+  CODE_FOR_builtin_establish_vms_condition_handler,
+  CODE_FOR_builtin_revert_vms_condition_handler,
+
+  /* TARGET_MAX */
+  CODE_FOR_builtin_minub8,
+  CODE_FOR_builtin_minsb8,
+  CODE_FOR_builtin_minuw4,
+  CODE_FOR_builtin_minsw4,
+  CODE_FOR_builtin_maxub8,
+  CODE_FOR_builtin_maxsb8,
+  CODE_FOR_builtin_maxuw4,
+  CODE_FOR_builtin_maxsw4,
+  CODE_FOR_builtin_perr,
+  CODE_FOR_builtin_pklb,
+  CODE_FOR_builtin_pkwb,
+  CODE_FOR_builtin_unpkbl,
+  CODE_FOR_builtin_unpkbw,
+
+  /* TARGET_CIX */
+  CODE_FOR_ctzdi2,
+  CODE_FOR_clzdi2,
+  CODE_FOR_popcountdi2
+};
+
+struct alpha_builtin_def
+{
+  const char *name;
+  enum alpha_builtin code;
+  unsigned int target_mask;
+  bool is_const;
+};
+
+static struct alpha_builtin_def const zero_arg_builtins[] = {
+  { "__builtin_alpha_implver",	ALPHA_BUILTIN_IMPLVER,	0, true },
+  { "__builtin_alpha_rpcc",	ALPHA_BUILTIN_RPCC,	0, false }
+};
+
+static struct alpha_builtin_def const one_arg_builtins[] = {
+  { "__builtin_alpha_amask",	ALPHA_BUILTIN_AMASK,	0, true },
+  { "__builtin_alpha_pklb",	ALPHA_BUILTIN_PKLB,	MASK_MAX, true },
+  { "__builtin_alpha_pkwb",	ALPHA_BUILTIN_PKWB,	MASK_MAX, true },
+  { "__builtin_alpha_unpkbl",	ALPHA_BUILTIN_UNPKBL,	MASK_MAX, true },
+  { "__builtin_alpha_unpkbw",	ALPHA_BUILTIN_UNPKBW,	MASK_MAX, true },
+  { "__builtin_alpha_cttz",	ALPHA_BUILTIN_CTTZ,	MASK_CIX, true },
+  { "__builtin_alpha_ctlz",	ALPHA_BUILTIN_CTLZ,	MASK_CIX, true },
+  { "__builtin_alpha_ctpop",	ALPHA_BUILTIN_CTPOP,	MASK_CIX, true }
+};
+
+static struct alpha_builtin_def const two_arg_builtins[] = {
+  { "__builtin_alpha_cmpbge",	ALPHA_BUILTIN_CMPBGE,	0, true },
+  { "__builtin_alpha_extbl",	ALPHA_BUILTIN_EXTBL,	0, true },
+  { "__builtin_alpha_extwl",	ALPHA_BUILTIN_EXTWL,	0, true },
+  { "__builtin_alpha_extll",	ALPHA_BUILTIN_EXTLL,	0, true },
+  { "__builtin_alpha_extql",	ALPHA_BUILTIN_EXTQL,	0, true },
+  { "__builtin_alpha_extwh",	ALPHA_BUILTIN_EXTWH,	0, true },
+  { "__builtin_alpha_extlh",	ALPHA_BUILTIN_EXTLH,	0, true },
+  { "__builtin_alpha_extqh",	ALPHA_BUILTIN_EXTQH,	0, true },
+  { "__builtin_alpha_insbl",	ALPHA_BUILTIN_INSBL,	0, true },
+  { "__builtin_alpha_inswl",	ALPHA_BUILTIN_INSWL,	0, true },
+  { "__builtin_alpha_insll",	ALPHA_BUILTIN_INSLL,	0, true },
+  { "__builtin_alpha_insql",	ALPHA_BUILTIN_INSQL,	0, true },
+  { "__builtin_alpha_inswh",	ALPHA_BUILTIN_INSWH,	0, true },
+  { "__builtin_alpha_inslh",	ALPHA_BUILTIN_INSLH,	0, true },
+  { "__builtin_alpha_insqh",	ALPHA_BUILTIN_INSQH,	0, true },
+  { "__builtin_alpha_mskbl",	ALPHA_BUILTIN_MSKBL,	0, true },
+  { "__builtin_alpha_mskwl",	ALPHA_BUILTIN_MSKWL,	0, true },
+  { "__builtin_alpha_mskll",	ALPHA_BUILTIN_MSKLL,	0, true },
+  { "__builtin_alpha_mskql",	ALPHA_BUILTIN_MSKQL,	0, true },
+  { "__builtin_alpha_mskwh",	ALPHA_BUILTIN_MSKWH,	0, true },
+  { "__builtin_alpha_msklh",	ALPHA_BUILTIN_MSKLH,	0, true },
+  { "__builtin_alpha_mskqh",	ALPHA_BUILTIN_MSKQH,	0, true },
+  { "__builtin_alpha_umulh",	ALPHA_BUILTIN_UMULH,	0, true },
+  { "__builtin_alpha_zap",	ALPHA_BUILTIN_ZAP,	0, true },
+  { "__builtin_alpha_zapnot",	ALPHA_BUILTIN_ZAPNOT,	0, true },
+  { "__builtin_alpha_minub8",	ALPHA_BUILTIN_MINUB8,	MASK_MAX, true },
+  { "__builtin_alpha_minsb8",	ALPHA_BUILTIN_MINSB8,	MASK_MAX, true },
+  { "__builtin_alpha_minuw4",	ALPHA_BUILTIN_MINUW4,	MASK_MAX, true },
+  { "__builtin_alpha_minsw4",	ALPHA_BUILTIN_MINSW4,	MASK_MAX, true },
+  { "__builtin_alpha_maxub8",	ALPHA_BUILTIN_MAXUB8,	MASK_MAX, true },
+  { "__builtin_alpha_maxsb8",	ALPHA_BUILTIN_MAXSB8,	MASK_MAX, true },
+  { "__builtin_alpha_maxuw4",	ALPHA_BUILTIN_MAXUW4,	MASK_MAX, true },
+  { "__builtin_alpha_maxsw4",	ALPHA_BUILTIN_MAXSW4,	MASK_MAX, true },
+  { "__builtin_alpha_perr",	ALPHA_BUILTIN_PERR,	MASK_MAX, true }
+};
+
+static GTY(()) tree alpha_v8qi_u;
+static GTY(()) tree alpha_v8qi_s;
+static GTY(()) tree alpha_v4hi_u;
+static GTY(()) tree alpha_v4hi_s;
+
+static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
+
+/* Return the alpha builtin for CODE.  */
+
+static tree
+alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ALPHA_BUILTIN_max)
+    return error_mark_node;
+  return alpha_builtins[code];
+}
+
+/* Helper function of alpha_init_builtins.  Add the built-in specified
+   by NAME, TYPE, CODE, and ECF.  */
+
+static void
+alpha_builtin_function (const char *name, tree ftype,
+			enum alpha_builtin code, unsigned ecf)
+{
+  tree decl = add_builtin_function (name, ftype, (int) code,
+				    BUILT_IN_MD, NULL, NULL_TREE);
+
+  if (ecf & ECF_CONST)
+    TREE_READONLY (decl) = 1;
+  if (ecf & ECF_NOTHROW)
+    TREE_NOTHROW (decl) = 1;
+
+  alpha_builtins [(int) code] = decl;
+}
+
+/* Helper function of alpha_init_builtins.  Add the COUNT built-in
+   functions pointed to by P, with function type FTYPE.  */
+
+static void
+alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
+		    tree ftype)
+{
+  size_t i;
+
+  for (i = 0; i < count; ++i, ++p)
+    if ((target_flags & p->target_mask) == p->target_mask)
+      alpha_builtin_function (p->name, ftype, p->code,
+			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
+}
+
+static void
+alpha_init_builtins (void)
+{
+  tree dimode_integer_type_node;
+  tree ftype;
+
+  dimode_integer_type_node = lang_hooks.types.type_for_mode (DImode, 0);
+
+  /* Fwrite on VMS is non-standard.  */
+#if TARGET_ABI_OPEN_VMS
+  implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
+  implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
+#endif
+
+  ftype = build_function_type (dimode_integer_type_node, void_list_node);
+  alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins),
+		      ftype);
+
+  ftype = build_function_type_list (dimode_integer_type_node,
+				    dimode_integer_type_node, NULL_TREE);
+  alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins),
+		      ftype);
+
+  ftype = build_function_type_list (dimode_integer_type_node,
+				    dimode_integer_type_node,
+				    dimode_integer_type_node, NULL_TREE);
+  alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins),
+		      ftype);
+
+  ftype = build_function_type (ptr_type_node, void_list_node);
+  alpha_builtin_function ("__builtin_thread_pointer", ftype,
+			  ALPHA_BUILTIN_THREAD_POINTER, ECF_NOTHROW);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+  alpha_builtin_function ("__builtin_set_thread_pointer", ftype,
+			  ALPHA_BUILTIN_SET_THREAD_POINTER, ECF_NOTHROW);
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      ftype = build_function_type_list (ptr_type_node, ptr_type_node,
+					NULL_TREE);
+      alpha_builtin_function ("__builtin_establish_vms_condition_handler",
+			      ftype,
+			      ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
+			      0);
+
+      ftype = build_function_type_list (ptr_type_node, void_type_node,
+					NULL_TREE);
+      alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
+			      ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
+    }
+
+  alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
+  alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
+  alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
+  alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+alpha_expand_builtin (tree exp, rtx target,
+		      rtx subtarget ATTRIBUTE_UNUSED,
+		      enum machine_mode mode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg;
+  call_expr_arg_iterator iter;
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+
+  if (fcode >= ALPHA_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  arity = 0;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity > MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+      arity++;
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+        pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, op[0], op[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
+/* Several bits below assume HWI >= 64 bits.  This should be enforced
+   by config.gcc.  */
+#if HOST_BITS_PER_WIDE_INT < 64
+# error "HOST_WIDE_INT too small"
+#endif
+
+/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
+   with an 8-bit output vector.  OPINT contains the integer operands; bit N
+   of OP_CONST is set if OPINT[N] is valid.  */
+
+static tree
+alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  if (op_const == 3)
+    {
+      int i, val;
+      for (i = 0, val = 0; i < 8; ++i)
+	{
+	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
+	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
+	  if (c0 >= c1)
+	    val |= 1 << i;
+	}
+      return build_int_cst (long_integer_type_node, val);
+    }
+  else if (op_const == 2 && opint[1] == 0)
+    return build_int_cst (long_integer_type_node, 0xff);
+  return NULL;
+}
+
+/* Fold the builtin for the ZAPNOT instruction.  This is essentially a 
+   specialized form of an AND operation.  Other byte manipulation instructions
+   are defined in terms of this instruction, so this is also used as a
+   subroutine for other builtins.
+
+   OP contains the tree operands; OPINT contains the extracted integer values.
+   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
+   OPINT may be considered.  */
+
+static tree
+alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
+			   long op_const)
+{
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT mask = 0;
+      int i;
+
+      for (i = 0; i < 8; ++i)
+	if ((opint[1] >> i) & 1)
+	  mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
+
+      if (op_const & 1)
+	return build_int_cst (long_integer_type_node, opint[0] & mask);
+
+      if (op)
+	return fold_build2 (BIT_AND_EXPR, long_integer_type_node, op[0],
+			    build_int_cst (long_integer_type_node, mask));
+    }
+  else if ((op_const & 1) && opint[0] == 0)
+    return build_int_cst (long_integer_type_node, 0);
+  return NULL;
+}
+
+/* Fold the builtins for the EXT family of instructions.  */
+
+static tree
+alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  long zap_const = 2;
+  tree *zap_op = NULL;
+
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT loc;
+
+      loc = opint[1] & 7;
+      if (BYTES_BIG_ENDIAN)
+        loc ^= 7;
+      loc *= 8;
+
+      if (loc != 0)
+	{
+	  if (op_const & 1)
+	    {
+	      unsigned HOST_WIDE_INT temp = opint[0];
+	      if (is_high)
+		temp <<= loc;
+	      else
+		temp >>= loc;
+	      opint[0] = temp;
+	      zap_const = 3;
+	    }
+	}
+      else
+	zap_op = op;
+    }
+  
+  opint[1] = bytemask;
+  return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
+}
+
+/* Fold the builtins for the INS family of instructions.  */
+
+static tree
+alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  if ((op_const & 1) && opint[0] == 0)
+    return build_int_cst (long_integer_type_node, 0);
+
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT temp, loc, byteloc;
+      tree *zap_op = NULL;
+
+      loc = opint[1] & 7;
+      if (BYTES_BIG_ENDIAN)
+        loc ^= 7;
+      bytemask <<= loc;
+
+      temp = opint[0];
+      if (is_high)
+	{
+	  byteloc = (64 - (loc * 8)) & 0x3f;
+	  if (byteloc == 0)
+	    zap_op = op;
+	  else
+	    temp >>= byteloc;
+	  bytemask >>= 8;
+	}
+      else
+	{
+	  byteloc = loc * 8;
+	  if (byteloc == 0)
+	    zap_op = op;
+	  else
+	    temp <<= byteloc;
+	}
+
+      opint[0] = temp;
+      opint[1] = bytemask;
+      return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
+    }
+
+  return NULL;
+}
+
+static tree
+alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT loc;
+
+      loc = opint[1] & 7;
+      if (BYTES_BIG_ENDIAN)
+        loc ^= 7;
+      bytemask <<= loc;
+
+      if (is_high)
+	bytemask >>= 8;
+
+      opint[1] = bytemask ^ 0xff;
+    }
+
+  return alpha_fold_builtin_zapnot (op, opint, op_const);
+}
+
+static tree
+alpha_fold_builtin_umulh (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  switch (op_const)
+    {
+    case 3:
+      {
+	unsigned HOST_WIDE_INT l;
+	HOST_WIDE_INT h;
+
+	mul_double (opint[0], 0, opint[1], 0, &l, &h);
+
+#if HOST_BITS_PER_WIDE_INT > 64
+# error fixme
+#endif
+
+	return build_int_cst (long_integer_type_node, h);
+      }
+
+    case 1:
+      opint[1] = opint[0];
+      /* FALLTHRU */
+    case 2:
+      /* Note that (X*1) >> 64 == 0.  */
+      if (opint[1] == 0 || opint[1] == 1)
+	return build_int_cst (long_integer_type_node, 0);
+      break;
+    }
+  return NULL;
+}
+
+static tree
+alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
+{
+  tree op0 = fold_convert (vtype, op[0]);
+  tree op1 = fold_convert (vtype, op[1]);
+  tree val = fold_build2 (code, vtype, op0, op1);
+  return fold_build1 (VIEW_CONVERT_EXPR, long_integer_type_node, val);
+}
+
+static tree
+alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp = 0;
+  int i;
+
+  if (op_const != 3)
+    return NULL;
+
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
+      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
+      if (a >= b)
+	temp += a - b;
+      else
+	temp += b - a;
+    }
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] >> 24) & 0xff00;
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] >>  8) & 0xff00;
+  temp |= (opint[0] >> 16) & 0xff0000;
+  temp |= (opint[0] >> 24) & 0xff000000;
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] & 0xff00) << 24;
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] & 0x0000ff00) << 8;
+  temp |= (opint[0] & 0x00ff0000) << 16;
+  temp |= (opint[0] & 0xff000000) << 24;
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  if (opint[0] == 0)
+    temp = 64;
+  else
+    temp = exact_log2 (opint[0] & -opint[0]);
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  if (opint[0] == 0)
+    temp = 64;
+  else
+    temp = 64 - floor_log2 (opint[0]) - 1;
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+static tree
+alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp, op;
+
+  if (op_const == 0)
+    return NULL;
+
+  op = opint[0];
+  temp = 0;
+  while (op)
+    temp++, op &= op - 1;
+
+  return build_int_cst (long_integer_type_node, temp);
+}
+
+/* Fold one of our builtin functions.  */
+
+static tree
+alpha_fold_builtin (tree fndecl, int n_args, tree *op,
+		    bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT opint[MAX_ARGS];
+  long op_const = 0;
+  int i;
+
+  if (n_args >= MAX_ARGS)
+    return NULL;
+
+  for (i = 0; i < n_args; i++)
+    {
+      tree arg = op[i];
+      if (arg == error_mark_node)
+	return NULL;
+
+      opint[i] = 0;
+      if (TREE_CODE (arg) == INTEGER_CST)
+	{
+          op_const |= 1L << i;
+	  opint[i] = int_cst_value (arg);
+	}
+    }
+
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    case ALPHA_BUILTIN_CMPBGE:
+      return alpha_fold_builtin_cmpbge (opint, op_const);
+
+    case ALPHA_BUILTIN_EXTBL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
+    case ALPHA_BUILTIN_EXTWL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
+    case ALPHA_BUILTIN_EXTLL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
+    case ALPHA_BUILTIN_EXTQL:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
+    case ALPHA_BUILTIN_EXTWH:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
+    case ALPHA_BUILTIN_EXTLH:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
+    case ALPHA_BUILTIN_EXTQH:
+      return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
+
+    case ALPHA_BUILTIN_INSBL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
+    case ALPHA_BUILTIN_INSWL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
+    case ALPHA_BUILTIN_INSLL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
+    case ALPHA_BUILTIN_INSQL:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
+    case ALPHA_BUILTIN_INSWH:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
+    case ALPHA_BUILTIN_INSLH:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
+    case ALPHA_BUILTIN_INSQH:
+      return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
+
+    case ALPHA_BUILTIN_MSKBL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
+    case ALPHA_BUILTIN_MSKWL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
+    case ALPHA_BUILTIN_MSKLL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
+    case ALPHA_BUILTIN_MSKQL:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
+    case ALPHA_BUILTIN_MSKWH:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
+    case ALPHA_BUILTIN_MSKLH:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
+    case ALPHA_BUILTIN_MSKQH:
+      return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
+
+    case ALPHA_BUILTIN_UMULH:
+      return alpha_fold_builtin_umulh (opint, op_const);
+
+    case ALPHA_BUILTIN_ZAP:
+      opint[1] ^= 0xff;
+      /* FALLTHRU */
+    case ALPHA_BUILTIN_ZAPNOT:
+      return alpha_fold_builtin_zapnot (op, opint, op_const);
+
+    case ALPHA_BUILTIN_MINUB8:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
+    case ALPHA_BUILTIN_MINSB8:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
+    case ALPHA_BUILTIN_MINUW4:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
+    case ALPHA_BUILTIN_MINSW4:
+      return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
+    case ALPHA_BUILTIN_MAXUB8:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
+    case ALPHA_BUILTIN_MAXSB8:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
+    case ALPHA_BUILTIN_MAXUW4:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
+    case ALPHA_BUILTIN_MAXSW4:
+      return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
+
+    case ALPHA_BUILTIN_PERR:
+      return alpha_fold_builtin_perr (opint, op_const);
+    case ALPHA_BUILTIN_PKLB:
+      return alpha_fold_builtin_pklb (opint, op_const);
+    case ALPHA_BUILTIN_PKWB:
+      return alpha_fold_builtin_pkwb (opint, op_const);
+    case ALPHA_BUILTIN_UNPKBL:
+      return alpha_fold_builtin_unpkbl (opint, op_const);
+    case ALPHA_BUILTIN_UNPKBW:
+      return alpha_fold_builtin_unpkbw (opint, op_const);
+
+    case ALPHA_BUILTIN_CTTZ:
+      return alpha_fold_builtin_cttz (opint, op_const);
+    case ALPHA_BUILTIN_CTLZ:
+      return alpha_fold_builtin_ctlz (opint, op_const);
+    case ALPHA_BUILTIN_CTPOP:
+      return alpha_fold_builtin_ctpop (opint, op_const);
+
+    case ALPHA_BUILTIN_AMASK:
+    case ALPHA_BUILTIN_IMPLVER:
+    case ALPHA_BUILTIN_RPCC:
+    case ALPHA_BUILTIN_THREAD_POINTER:
+    case ALPHA_BUILTIN_SET_THREAD_POINTER:
+      /* None of these are foldable at compile-time.  */
+    default:
+      return NULL;
+    }
+}
+
+/* This page contains routines that are used to determine what the function
+   prologue and epilogue code will do and write them out.  */
+
+/* Compute the size of the save area in the stack.  */
+
+/* These variables are used for communication between the following functions.
+   They indicate various things about the current function being compiled
+   that are used to tell what kind of prologue, epilogue and procedure
+   descriptor to generate.  */
+
+/* Nonzero if we need a stack procedure.  */
+enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
+static enum alpha_procedure_types alpha_procedure_type;
+
+/* Register number (either FP or SP) that is used to unwind the frame.  */
+static int vms_unwind_regno;
+
+/* Register number used to save FP.  We need not have one for RA since
+   we don't modify it for register procedures.  This is only defined
+   for register frame procedures.  */
+static int vms_save_fp_regno;
+
+/* Register number used to reference objects off our PV.  */
+static int vms_base_regno;
+
+/* Compute register masks for saved registers.  */
+
+static void
+alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
+{
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  unsigned int i;
+
+  /* When outputting a thunk, we don't have valid register life info,
+     but assemble_start_function wants to output .frame and .mask
+     directives.  */
+  if (cfun->is_thunk)
+    {
+      *imaskP = 0;
+      *fmaskP = 0;
+      return;
+    }
+
+  if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
+    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
+
+  /* One for every register we have to save.  */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (! fixed_regs[i] && ! call_used_regs[i]
+	&& df_regs_ever_live_p (i) && i != REG_RA
+	&& (!TARGET_ABI_UNICOSMK || i != HARD_FRAME_POINTER_REGNUM))
+      {
+	if (i < 32)
+	  imask |= (1UL << i);
+	else
+	  fmask |= (1UL << (i - 32));
+      }
+
+  /* We need to restore these for the handler.  */
+  if (crtl->calls_eh_return)
+    {
+      for (i = 0; ; ++i)
+	{
+	  unsigned regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+	  imask |= 1UL << regno;
+	}
+    }
+
+  /* If any register spilled, then spill the return address also.  */
+  /* ??? This is required by the Digital stack unwind specification
+     and isn't needed if we're doing Dwarf2 unwinding.  */
+  if (imask || fmask || alpha_ra_ever_killed ())
+    imask |= (1UL << REG_RA);
+
+  *imaskP = imask;
+  *fmaskP = fmask;
+}
+
+int
+alpha_sa_size (void)
+{
+  unsigned long mask[2];
+  int sa_size = 0;
+  int i, j;
+
+  alpha_sa_mask (&mask[0], &mask[1]);
+
+  if (TARGET_ABI_UNICOSMK)
+    {
+      if (mask[0] || mask[1])
+	sa_size = 14;
+    }
+  else
+    {
+      for (j = 0; j < 2; ++j)
+	for (i = 0; i < 32; ++i)
+	  if ((mask[j] >> i) & 1)
+	    sa_size++;
+    }
+
+  if (TARGET_ABI_UNICOSMK)
+    {
+      /* We might not need to generate a frame if we don't make any calls
+	 (including calls to __T3E_MISMATCH if this is a vararg function),
+	 don't have any local variables which require stack slots, don't
+	 use alloca and have not determined that we need a frame for other
+	 reasons.  */
+
+      alpha_procedure_type
+	= (sa_size || get_frame_size() != 0
+	   || crtl->outgoing_args_size
+	   || cfun->stdarg || cfun->calls_alloca
+	   || frame_pointer_needed)
+	  ? PT_STACK : PT_REGISTER;
+
+      /* Always reserve space for saving callee-saved registers if we
+	 need a frame as required by the calling convention.  */
+      if (alpha_procedure_type == PT_STACK)
+        sa_size = 14;
+    }
+  else if (TARGET_ABI_OPEN_VMS)
+    {
+      /* Start with a stack procedure if we make any calls (REG_RA used), or
+	 need a frame pointer, with a register procedure if we otherwise need
+	 at least a slot, and with a null procedure in other cases.  */
+      if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
+	alpha_procedure_type = PT_STACK;
+      else if (get_frame_size() != 0)
+	alpha_procedure_type = PT_REGISTER;
+      else
+	alpha_procedure_type = PT_NULL;
+
+      /* Don't reserve space for saving FP & RA yet.  Do that later after we've
+	 made the final decision on stack procedure vs register procedure.  */
+      if (alpha_procedure_type == PT_STACK)
+	sa_size -= 2;
+
+      /* Decide whether to refer to objects off our PV via FP or PV.
+	 If we need FP for something else or if we receive a nonlocal
+	 goto (which expects PV to contain the value), we must use PV.
+	 Otherwise, start by assuming we can use FP.  */
+
+      vms_base_regno
+	= (frame_pointer_needed
+	   || cfun->has_nonlocal_label
+	   || alpha_procedure_type == PT_STACK
+	   || crtl->outgoing_args_size)
+	  ? REG_PV : HARD_FRAME_POINTER_REGNUM;
+
+      /* If we want to copy PV into FP, we need to find some register
+	 in which to save FP.  */
+
+      vms_save_fp_regno = -1;
+      if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
+	for (i = 0; i < 32; i++)
+	  if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
+	    vms_save_fp_regno = i;
+
+      /* A VMS condition handler requires a stack procedure in our
+	 implementation. (not required by the calling standard).  */
+      if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
+	  || cfun->machine->uses_condition_handler)
+	vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
+      else if (alpha_procedure_type == PT_NULL)
+	vms_base_regno = REG_PV;
+
+      /* Stack unwinding should be done via FP unless we use it for PV.  */
+      vms_unwind_regno = (vms_base_regno == REG_PV
+			  ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+
+      /* If this is a stack procedure, allow space for saving FP, RA and
+	 a condition handler slot if needed.  */
+      if (alpha_procedure_type == PT_STACK)
+	sa_size += 2 + cfun->machine->uses_condition_handler;
+    }
+  else
+    {
+      /* Our size must be even (multiple of 16 bytes).  */
+      if (sa_size & 1)
+	sa_size++;
+    }
+
+  return sa_size * 8;
+}
+
+/* Define the offset between two registers, one to be eliminated,
+   and the other its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+alpha_initial_elimination_offset (unsigned int from,
+				  unsigned int to ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT ret;
+
+  ret = alpha_sa_size ();
+  ret += ALPHA_ROUND (crtl->outgoing_args_size);
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      break;
+
+    case ARG_POINTER_REGNUM:
+      ret += (ALPHA_ROUND (get_frame_size ()
+			   + crtl->args.pretend_args_size)
+	      - crtl->args.pretend_args_size);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return ret;
+}
+
+#if TARGET_ABI_OPEN_VMS
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+static bool
+alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  /* We need the alpha_procedure_type to decide. Evaluate it now.  */
+  alpha_sa_size ();
+
+  switch (alpha_procedure_type)
+    {
+    case PT_NULL:
+      /* NULL procedures have no frame of their own and we only
+	 know how to resolve from the current stack pointer.  */
+      return to == STACK_POINTER_REGNUM;
+
+    case PT_REGISTER:
+    case PT_STACK:
+      /* We always eliminate except to the stack pointer if there is no
+	 usable frame pointer at hand.  */
+      return (to != STACK_POINTER_REGNUM
+	      || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
+    }
+
+  gcc_unreachable ();
+}
+
+/* FROM is to be eliminated for TO. Return the offset so that TO+offset
+   designates the same location as FROM.  */
+
+HOST_WIDE_INT
+alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
+{ 
+  /* The only possible attempts we ever expect are ARG or FRAME_PTR to
+     HARD_FRAME or STACK_PTR.  We need the alpha_procedure_type to decide
+     on the proper computations and will need the register save area size
+     in most cases.  */
+
+  HOST_WIDE_INT sa_size = alpha_sa_size ();
+
+  /* PT_NULL procedures have no frame of their own and we only allow
+     elimination to the stack pointer. This is the argument pointer and we
+     resolve the soft frame pointer to that as well.  */
+     
+  if (alpha_procedure_type == PT_NULL)
+    return 0;
+
+  /* For a PT_STACK procedure the frame layout looks as follows
+
+                      -----> decreasing addresses
+
+		   <             size rounded up to 16       |   likewise   >
+     --------------#------------------------------+++--------------+++-------#
+     incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
+     --------------#---------------------------------------------------------#
+                                   ^         ^              ^               ^
+			      ARG_PTR FRAME_PTR HARD_FRAME_PTR       STACK_PTR
+
+			      
+     PT_REGISTER procedures are similar in that they may have a frame of their
+     own. They have no regs-sa/pv/outgoing-args area.
+
+     We first compute offset to HARD_FRAME_PTR, then add what we need to get
+     to STACK_PTR if need be.  */
+  
+  {
+    HOST_WIDE_INT offset;
+    HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
+
+    switch (from)
+      {
+      case FRAME_POINTER_REGNUM:
+	offset = ALPHA_ROUND (sa_size + pv_save_size);
+	break;
+      case ARG_POINTER_REGNUM:
+	offset = (ALPHA_ROUND (sa_size + pv_save_size
+			       + get_frame_size ()
+			       + crtl->args.pretend_args_size)
+		  - crtl->args.pretend_args_size);
+	break;
+      default:
+	gcc_unreachable ();
+      }
+    
+    if (to == STACK_POINTER_REGNUM)
+      offset += ALPHA_ROUND (crtl->outgoing_args_size);
+    
+    return offset;
+  }
+}
+
+#define COMMON_OBJECT "common_object"
+
+static tree
+common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
+		       tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
+		       bool *no_add_attrs ATTRIBUTE_UNUSED)
+{
+  tree decl = *node;
+  gcc_assert (DECL_P (decl));
+
+  DECL_COMMON (decl) = 1;
+  return NULL_TREE;
+}
+
+static const struct attribute_spec vms_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { COMMON_OBJECT,   0, 1, true,  false, false, common_object_handler },
+  { NULL,            0, 0, false, false, false, NULL }
+};
+
+void
+vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
+			       unsigned HOST_WIDE_INT size,
+			       unsigned int align)
+{
+  tree attr = DECL_ATTRIBUTES (decl);
+  fprintf (file, "%s", COMMON_ASM_OP);
+  assemble_name (file, name);
+  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
+  /* ??? Unlike on OSF/1, the alignment factor is not in log units.  */
+  fprintf (file, ",%u", align / BITS_PER_UNIT);
+  if (attr)
+    {
+      attr = lookup_attribute (COMMON_OBJECT, attr);
+      if (attr)
+        fprintf (file, ",%s",
+		 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
+    }
+  fputc ('\n', file);
+}
+
+#undef COMMON_OBJECT
+
+#endif
+
+static int
+find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
+}
+
+int
+alpha_find_lo_sum_using_gp (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
+}
+
+static int
+alpha_does_function_need_gp (void)
+{
+  rtx insn;
+
+  /* The GP being variable is an OSF abi thing.  */
+  if (! TARGET_ABI_OSF)
+    return 0;
+
+  /* We need the gp to load the address of __mcount.  */
+  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
+    return 1;
+
+  /* The code emitted by alpha_output_mi_thunk_osf uses the gp.  */
+  if (cfun->is_thunk)
+    return 1;
+
+  /* The nonlocal receiver pattern assumes that the gp is valid for
+     the nested function.  Reasonable because it's almost always set
+     correctly already.  For the cases where that's wrong, make sure
+     the nested function loads its gp on entry.  */
+  if (crtl->has_nonlocal_goto)
+    return 1;
+
+  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
+     Even if we are a static function, we still need to do this in case
+     our address is taken and passed to something like qsort.  */
+
+  push_topmost_sequence ();
+  insn = get_insns ();
+  pop_topmost_sequence ();
+
+  for (; insn; insn = NEXT_INSN (insn))
+    if (NONDEBUG_INSN_P (insn)
+	&& ! JUMP_TABLE_DATA_P (insn)
+	&& GET_CODE (PATTERN (insn)) != USE
+	&& GET_CODE (PATTERN (insn)) != CLOBBER
+	&& get_attr_usegp (insn))
+      return 1;
+
+  return 0;
+}
+
+
+/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
+   sequences.  */
+
+static rtx
+set_frame_related_p (void)
+{
+  rtx seq = get_insns ();
+  rtx insn;
+
+  end_sequence ();
+
+  if (!seq)
+    return NULL_RTX;
+
+  if (INSN_P (seq))
+    {
+      insn = seq;
+      while (insn != NULL_RTX)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = NEXT_INSN (insn);
+	}
+      seq = emit_insn (seq);
+    }
+  else
+    {
+      seq = emit_insn (seq);
+      RTX_FRAME_RELATED_P (seq) = 1;
+    }
+  return seq;
+}
+
+#define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
+
+/* Generates a store with the proper unwind info attached.  VALUE is
+   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
+   contains SP+FRAME_BIAS, and that is the unwind info that should be
+   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
+   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
+
+static void
+emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
+		    HOST_WIDE_INT base_ofs, rtx frame_reg)
+{
+  rtx addr, mem, insn;
+
+  addr = plus_constant (base_reg, base_ofs);
+  mem = gen_frame_mem (DImode, addr);
+
+  insn = emit_move_insn (mem, value);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  if (frame_bias || value != frame_reg)
+    {
+      if (frame_bias)
+	{
+	  addr = plus_constant (stack_pointer_rtx, frame_bias + base_ofs);
+	  mem = gen_rtx_MEM (DImode, addr);
+	}
+
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode, mem, frame_reg));
+    }
+}
+
+static void
+emit_frame_store (unsigned int regno, rtx base_reg,
+		  HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
+{
+  rtx reg = gen_rtx_REG (DImode, regno);
+  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
+}
+
+/* Compute the frame size.  SIZE is the size of the "naked" frame
+   and SA_SIZE is the size of the register save area.  */
+
+static HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
+{
+  if (TARGET_ABI_OPEN_VMS)
+    return ALPHA_ROUND (sa_size 
+			+ (alpha_procedure_type == PT_STACK ? 8 : 0)
+			+ size
+			+ crtl->args.pretend_args_size);
+  else if (TARGET_ABI_UNICOSMK)
+    /* We have to allocate space for the DSIB if we generate a frame.  */
+    return ALPHA_ROUND (sa_size
+			+ (alpha_procedure_type == PT_STACK ? 48 : 0))
+	   + ALPHA_ROUND (size
+			  + crtl->outgoing_args_size);
+  else
+    return ALPHA_ROUND (crtl->outgoing_args_size)
+	   + sa_size
+	   + ALPHA_ROUND (size
+			  + crtl->args.pretend_args_size);
+}
+
+/* Write function prologue.  */
+
+/* On vms we have two kinds of functions:
+
+   - stack frame (PROC_STACK)
+	these are 'normal' functions with local vars and which are
+	calling other functions
+   - register frame (PROC_REGISTER)
+	keeps all data in registers, needs no stack
+
+   We must pass this to the assembler so it can generate the
+   proper pdsc (procedure descriptor)
+   This is done with the '.pdesc' command.
+
+   On not-vms, we don't really differentiate between the two, as we can
+   simply allocate stack without saving registers.  */
+
+void
+alpha_expand_prologue (void)
+{
+  /* Registers to save.  */
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size;
+  /* Probed stack size; it additionally includes the size of
+     the "reserve region" if any.  */
+  HOST_WIDE_INT probed_size;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  rtx sa_reg;
+  int i;
+
+  sa_size = alpha_sa_size ();
+  frame_size = compute_frame_size (get_frame_size (), sa_size);
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = frame_size;
+
+  if (TARGET_ABI_OPEN_VMS)
+    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
+  else
+    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
+
+  alpha_sa_mask (&imask, &fmask);
+
+  /* Emit an insn to reload GP, if needed.  */
+  if (TARGET_ABI_OSF)
+    {
+      alpha_function_needs_gp = alpha_does_function_need_gp ();
+      if (alpha_function_needs_gp)
+	emit_insn (gen_prologue_ldgp ());
+    }
+
+  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
+     the call to mcount ourselves, rather than having the linker do it
+     magically in response to -pg.  Since _mcount has special linkage,
+     don't represent the call as a call.  */
+  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
+    emit_insn (gen_prologue_mcount ());
+
+  if (TARGET_ABI_UNICOSMK)
+    unicosmk_gen_dsib (&imask);
+
+  /* Adjust the stack by the frame size.  If the frame size is > 4096
+     bytes, we need to be sure we probe somewhere in the first and last
+     4096 bytes (we can probably get away without the latter test) and
+     every 8192 bytes in between.  If the frame size is > 32768, we
+     do this in a loop.  Otherwise, we generate the explicit probe
+     instructions.
+
+     Note that we are only allowed to adjust sp once in the prologue.  */
+
+  probed_size = frame_size;
+  if (flag_stack_check)
+    probed_size += STACK_CHECK_PROTECT;
+
+  if (probed_size <= 32768)
+    {
+      if (probed_size > 4096)
+	{
+	  int probed;
+
+	  for (probed = 4096; probed < probed_size; probed += 8192)
+	    emit_insn (gen_probe_stack (GEN_INT (TARGET_ABI_UNICOSMK
+						 ? -probed + 64
+						 : -probed)));
+
+	  /* We only have to do this probe if we aren't saving registers or
+	     if we are probing beyond the frame because of -fstack-check.  */
+	  if ((sa_size == 0 && probed_size > probed - 4096)
+	      || flag_stack_check)
+	    emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
+	}
+
+      if (frame_size != 0)
+	FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (TARGET_ABI_UNICOSMK
+					     ? -frame_size + 64
+					     : -frame_size))));
+    }
+  else
+    {
+      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
+	 number of 8192 byte blocks to probe.  We then probe each block
+	 in the loop and then set SP to the proper location.  If the
+	 amount remaining is > 4096, we have to do one more probe if we
+	 are not saving any registers or if we are probing beyond the
+	 frame because of -fstack-check.  */
+
+      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
+      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
+      rtx ptr = gen_rtx_REG (DImode, 22);
+      rtx count = gen_rtx_REG (DImode, 23);
+      rtx seq;
+
+      emit_move_insn (count, GEN_INT (blocks));
+      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx,
+			     GEN_INT (TARGET_ABI_UNICOSMK ? 4096 - 64 : 4096)));
+
+      /* Because of the difficulty in emitting a new basic block this
+	 late in the compilation, generate the loop as a single insn.  */
+      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
+
+      if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
+	{
+	  rtx last = gen_rtx_MEM (DImode, plus_constant (ptr, -leftover));
+	  MEM_VOLATILE_P (last) = 1;
+	  emit_move_insn (last, const0_rtx);
+	}
+
+      if (TARGET_ABI_WINDOWS_NT || flag_stack_check)
+	{
+	  /* For NT stack unwind (done by 'reverse execution'), it's
+	     not OK to take the result of a loop, even though the value
+	     is already in ptr, so we reload it via a single operation
+	     and subtract it to sp.
+
+	     Same if -fstack-check is specified, because the probed stack
+	     size is not equal to the frame size.
+
+	     Yes, that's correct -- we have to reload the whole constant
+	     into a temporary via ldah+lda then subtract from sp.  */
+
+	  HOST_WIDE_INT lo, hi;
+	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
+	  hi = frame_size - lo;
+
+	  emit_move_insn (ptr, GEN_INT (hi));
+	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
+	  seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
+				       ptr));
+	}
+      else
+	{
+	  seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
+				       GEN_INT (-leftover)));
+	}
+
+      /* This alternative is special, because the DWARF code cannot
+         possibly intuit through the loop above.  So we invent this
+         note it looks at instead.  */
+      RTX_FRAME_RELATED_P (seq) = 1;
+      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					       GEN_INT (TARGET_ABI_UNICOSMK
+							? -frame_size + 64
+							: -frame_size))));
+    }
+
+  if (!TARGET_ABI_UNICOSMK)
+    {
+      HOST_WIDE_INT sa_bias = 0;
+
+      /* Cope with very large offsets to the register save area.  */
+      sa_reg = stack_pointer_rtx;
+      if (reg_offset + sa_size > 0x8000)
+	{
+	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
+	  rtx sa_bias_rtx;
+
+	  if (low + sa_size <= 0x8000)
+	    sa_bias = reg_offset - low, reg_offset = low;
+	  else
+	    sa_bias = reg_offset, reg_offset = 0;
+
+	  sa_reg = gen_rtx_REG (DImode, 24);
+	  sa_bias_rtx = GEN_INT (sa_bias);
+
+	  if (add_operand (sa_bias_rtx, DImode))
+	    emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
+	  else
+	    {
+	      emit_move_insn (sa_reg, sa_bias_rtx);
+	      emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
+	    }
+	}
+
+      /* Save regs in stack order.  Beginning with VMS PV.  */
+      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
+	emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
+
+      /* Save register RA next.  */
+      if (imask & (1UL << REG_RA))
+	{
+	  emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
+	  imask &= ~(1UL << REG_RA);
+	  reg_offset += 8;
+	}
+
+      /* Now save any other registers required to be saved.  */
+      for (i = 0; i < 31; i++)
+	if (imask & (1UL << i))
+	  {
+	    emit_frame_store (i, sa_reg, sa_bias, reg_offset);
+	    reg_offset += 8;
+	  }
+
+      for (i = 0; i < 31; i++)
+	if (fmask & (1UL << i))
+	  {
+	    emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
+	    reg_offset += 8;
+	  }
+    }
+  else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK)
+    {
+      /* The standard frame on the T3E includes space for saving registers.
+	 We just have to use it. We don't have to save the return address and
+	 the old frame pointer here - they are saved in the DSIB.  */
+
+      reg_offset = -56;
+      for (i = 9; i < 15; i++)
+	if (imask & (1UL << i))
+	  {
+	    emit_frame_store (i, hard_frame_pointer_rtx, 0, reg_offset);
+	    reg_offset -= 8;
+	  }
+      for (i = 2; i < 10; i++)
+	if (fmask & (1UL << i))
+	  {
+	    emit_frame_store (i+32, hard_frame_pointer_rtx, 0, reg_offset);
+	    reg_offset -= 8;
+	  }
+    }
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      /* Register frame procedures save the fp.  */
+      if (alpha_procedure_type == PT_REGISTER)
+	{
+	  rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
+				     hard_frame_pointer_rtx);
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
+	emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
+				    gen_rtx_REG (DImode, REG_PV)));
+
+      if (alpha_procedure_type != PT_NULL
+	  && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
+	FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+      /* If we have to allocate space for outgoing args, do it now.  */
+      if (crtl->outgoing_args_size != 0)
+	{
+	  rtx seq
+	    = emit_move_insn (stack_pointer_rtx,
+			      plus_constant
+			      (hard_frame_pointer_rtx,
+			       - (ALPHA_ROUND
+				  (crtl->outgoing_args_size))));
+
+	  /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
+	     if ! frame_pointer_needed. Setting the bit will change the CFA
+	     computation rule to use sp again, which would be wrong if we had
+	     frame_pointer_needed, as this means sp might move unpredictably
+	     later on.
+
+	     Also, note that
+	       frame_pointer_needed
+	       => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
+	     and
+	       crtl->outgoing_args_size != 0
+	       => alpha_procedure_type != PT_NULL,
+
+	     so when we are not setting the bit here, we are guaranteed to
+	     have emitted an FRP frame pointer update just before.  */
+	  RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
+	}
+    }
+  else if (!TARGET_ABI_UNICOSMK)
+    {
+      /* If we need a frame pointer, set it from the stack pointer.  */
+      if (frame_pointer_needed)
+	{
+	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
+	    FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+	  else
+	    /* This must always be the last instruction in the
+	       prologue, thus we emit a special move + clobber.  */
+	      FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
+				           stack_pointer_rtx, sa_reg)));
+	}
+    }
+
+  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
+     the prologue, for exception handling reasons, we cannot do this for
+     any insn that might fault.  We could prevent this for mems with a
+     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
+     have to prevent all such scheduling with a blockage.
+
+     Linux, on the other hand, never bothered to implement OSF/1's
+     exception handling, and so doesn't care about such things.  Anyone
+     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
+
+  if (! TARGET_CAN_FAULT_IN_PROLOGUE)
+    emit_insn (gen_blockage ());
+}
+
+/* Count the number of .file directives, so that .loc is up to date.  */
+int num_source_filenames = 0;
+
+/* Output the textual info surrounding the prologue.  */
+
+void
+alpha_start_function (FILE *file, const char *fnname,
+		      tree decl ATTRIBUTE_UNUSED)
+{
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size;
+  /* Complete stack size needed.  */
+  unsigned HOST_WIDE_INT frame_size;
+  /* The maximum debuggable frame size (512 Kbytes using Tru64 as).  */
+  unsigned HOST_WIDE_INT max_frame_size = TARGET_ABI_OSF && !TARGET_GAS
+					  ? 524288
+					  : 1UL << 31;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  char *entry_label = (char *) alloca (strlen (fnname) + 6);
+  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
+  int i;
+
+  /* Don't emit an extern directive for functions defined in the same file.  */
+  if (TARGET_ABI_UNICOSMK)
+    {
+      tree name_tree;
+      name_tree = get_identifier (fnname);
+      TREE_ASM_WRITTEN (name_tree) = 1;
+    }
+
+#if TARGET_ABI_OPEN_VMS
+  if (vms_debug_main
+      && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
+    {
+      targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
+      ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
+      switch_to_section (text_section);
+      vms_debug_main = NULL;
+    }
+#endif
+
+  alpha_fnname = fnname;
+  sa_size = alpha_sa_size ();
+  frame_size = compute_frame_size (get_frame_size (), sa_size);
+
+  if (TARGET_ABI_OPEN_VMS)
+    reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
+  else
+    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
+
+  alpha_sa_mask (&imask, &fmask);
+
+  /* Ecoff can handle multiple .file directives, so put out file and lineno.
+     We have to do that before the .ent directive as we cannot switch
+     files within procedures with native ecoff because line numbers are
+     linked to procedure descriptors.
+     Outputting the lineno helps debugging of one line functions as they
+     would otherwise get no line number at all. Please note that we would
+     like to put out last_linenum from final.c, but it is not accessible.  */
+
+  if (write_symbols == SDB_DEBUG)
+    {
+#ifdef ASM_OUTPUT_SOURCE_FILENAME
+      ASM_OUTPUT_SOURCE_FILENAME (file,
+				  DECL_SOURCE_FILE (current_function_decl));
+#endif
+#ifdef SDB_OUTPUT_SOURCE_LINE
+      if (debug_info_level != DINFO_LEVEL_TERSE)
+        SDB_OUTPUT_SOURCE_LINE (file,
+				DECL_SOURCE_LINE (current_function_decl));
+#endif
+    }
+
+  /* Issue function start and label.  */
+  if (TARGET_ABI_OPEN_VMS
+      || (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive))
+    {
+      fputs ("\t.ent ", file);
+      assemble_name (file, fnname);
+      putc ('\n', file);
+
+      /* If the function needs GP, we'll write the "..ng" label there.
+	 Otherwise, do it here.  */
+      if (TARGET_ABI_OSF
+          && ! alpha_function_needs_gp
+	  && ! cfun->is_thunk)
+	{
+	  putc ('$', file);
+	  assemble_name (file, fnname);
+	  fputs ("..ng:\n", file);
+	}
+    }
+  /* Nested functions on VMS that are potentially called via trampoline
+     get a special transfer entry point that loads the called functions
+     procedure descriptor and static chain.  */
+   if (TARGET_ABI_OPEN_VMS
+       && !TREE_PUBLIC (decl)
+       && DECL_CONTEXT (decl)
+       && !TYPE_P (DECL_CONTEXT (decl)))
+     {
+	strcpy (tramp_label, fnname);
+	strcat (tramp_label, "..tr");
+	ASM_OUTPUT_LABEL (file, tramp_label);
+	fprintf (file, "\tldq $1,24($27)\n");
+	fprintf (file, "\tldq $27,16($27)\n");
+     }
+
+  strcpy (entry_label, fnname);
+  if (TARGET_ABI_OPEN_VMS)
+    strcat (entry_label, "..en");
+
+  /* For public functions, the label must be globalized by appending an
+     additional colon.  */
+  if (TARGET_ABI_UNICOSMK && TREE_PUBLIC (decl))
+    strcat (entry_label, ":");
+
+  ASM_OUTPUT_LABEL (file, entry_label);
+  inside_function = TRUE;
+
+  if (TARGET_ABI_OPEN_VMS)
+    fprintf (file, "\t.base $%d\n", vms_base_regno);
+
+  if (!TARGET_ABI_OPEN_VMS && !TARGET_ABI_UNICOSMK && TARGET_IEEE_CONFORMANT
+      && !flag_inhibit_size_directive)
+    {
+      /* Set flags in procedure descriptor to request IEEE-conformant
+	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
+	 (/usr/include/pdsc.h).  */
+      fputs ("\t.eflag 48\n", file);
+    }
+
+  /* Set up offsets to alpha virtual arg/local debugging pointer.  */
+  alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
+  alpha_arg_offset = -frame_size + 48;
+
+  /* Describe our frame.  If the frame size is larger than an integer,
+     print it as zero to avoid an assembler error.  We won't be
+     properly describing such a frame, but that's the best we can do.  */
+  if (TARGET_ABI_UNICOSMK)
+    ;
+  else if (TARGET_ABI_OPEN_VMS)
+    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
+	     HOST_WIDE_INT_PRINT_DEC "\n",
+	     vms_unwind_regno,
+	     frame_size >= (1UL << 31) ? 0 : frame_size,
+	     reg_offset);
+  else if (!flag_inhibit_size_directive)
+    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
+	     (frame_pointer_needed
+	      ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
+	     frame_size >= max_frame_size ? 0 : frame_size,
+	     crtl->args.pretend_args_size);
+
+  /* Describe which registers were spilled.  */
+  if (TARGET_ABI_UNICOSMK)
+    ;
+  else if (TARGET_ABI_OPEN_VMS)
+    {
+      if (imask)
+        /* ??? Does VMS care if mask contains ra?  The old code didn't
+           set it, so I don't here.  */
+	fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
+      if (fmask)
+	fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
+      if (alpha_procedure_type == PT_REGISTER)
+	fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
+    }
+  else if (!flag_inhibit_size_directive)
+    {
+      if (imask)
+	{
+	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
+		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
+
+	  for (i = 0; i < 32; ++i)
+	    if (imask & (1UL << i))
+	      reg_offset += 8;
+	}
+
+      if (fmask)
+	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
+		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
+    }
+
+#if TARGET_ABI_OPEN_VMS
+  /* If a user condition handler has been installed at some point, emit
+     the procedure descriptor bits to point the Condition Handling Facility
+     at the indirection wrapper, and state the fp offset at which the user
+     handler may be found.  */
+  if (cfun->machine->uses_condition_handler)
+    {
+      fprintf (file, "\t.handler __gcc_shell_handler\n");
+      fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
+    }
+
+  /* Ifdef'ed cause link_section are only available then.  */
+  switch_to_section (readonly_data_section);
+  fprintf (file, "\t.align 3\n");
+  assemble_name (file, fnname); fputs ("..na:\n", file);
+  fputs ("\t.ascii \"", file);
+  assemble_name (file, fnname);
+  fputs ("\\0\"\n", file);
+  alpha_need_linkage (fnname, 1);
+  switch_to_section (text_section);
+#endif
+}
+
+/* Emit the .prologue note at the scheduled end of the prologue.  */
+
+static void
+alpha_output_function_end_prologue (FILE *file)
+{
+  if (TARGET_ABI_UNICOSMK)
+    ;
+  else if (TARGET_ABI_OPEN_VMS)
+    fputs ("\t.prologue\n", file);
+  else if (TARGET_ABI_WINDOWS_NT)
+    fputs ("\t.prologue 0\n", file);
+  else if (!flag_inhibit_size_directive)
+    fprintf (file, "\t.prologue %d\n",
+	     alpha_function_needs_gp || cfun->is_thunk);
+}
+
+/* Write function epilogue.  */
+
+void
+alpha_expand_epilogue (void)
+{
+  /* Registers to save.  */
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  int fp_is_frame_pointer, fp_offset;
+  rtx sa_reg, sa_reg_exp = NULL;
+  rtx sp_adj1, sp_adj2, mem, reg, insn;
+  rtx eh_ofs;
+  rtx cfa_restores = NULL_RTX;
+  int i;
+
+  sa_size = alpha_sa_size ();
+  frame_size = compute_frame_size (get_frame_size (), sa_size);
+
+  if (TARGET_ABI_OPEN_VMS)
+    {
+       if (alpha_procedure_type == PT_STACK)
+          reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
+       else
+          reg_offset = 0;
+    }
+  else
+    reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
+
+  alpha_sa_mask (&imask, &fmask);
+
+  fp_is_frame_pointer
+    = ((TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
+       || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed));
+  fp_offset = 0;
+  sa_reg = stack_pointer_rtx;
+
+  if (crtl->calls_eh_return)
+    eh_ofs = EH_RETURN_STACKADJ_RTX;
+  else
+    eh_ofs = NULL_RTX;
+
+  if (!TARGET_ABI_UNICOSMK && sa_size)
+    {
+      /* If we have a frame pointer, restore SP from it.  */
+      if ((TARGET_ABI_OPEN_VMS
+	   && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
+	  || (!TARGET_ABI_OPEN_VMS && frame_pointer_needed))
+	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+
+      /* Cope with very large offsets to the register save area.  */
+      if (reg_offset + sa_size > 0x8000)
+	{
+	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
+	  HOST_WIDE_INT bias;
+
+	  if (low + sa_size <= 0x8000)
+	    bias = reg_offset - low, reg_offset = low;
+	  else
+	    bias = reg_offset, reg_offset = 0;
+
+	  sa_reg = gen_rtx_REG (DImode, 22);
+	  sa_reg_exp = plus_constant (stack_pointer_rtx, bias);
+
+	  emit_move_insn (sa_reg, sa_reg_exp);
+	}
+
+      /* Restore registers in order, excepting a true frame pointer.  */
+
+      mem = gen_frame_mem (DImode, plus_constant (sa_reg, reg_offset));
+      reg = gen_rtx_REG (DImode, REG_RA);
+      emit_move_insn (reg, mem);
+      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+
+      reg_offset += 8;
+      imask &= ~(1UL << REG_RA);
+
+      for (i = 0; i < 31; ++i)
+	if (imask & (1UL << i))
+	  {
+	    if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
+	      fp_offset = reg_offset;
+	    else
+	      {
+		mem = gen_frame_mem (DImode,
+				     plus_constant (sa_reg, reg_offset));
+		reg = gen_rtx_REG (DImode, i);
+		emit_move_insn (reg, mem);
+		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					       cfa_restores);
+	      }
+	    reg_offset += 8;
+	  }
+
+      for (i = 0; i < 31; ++i)
+	if (fmask & (1UL << i))
+	  {
+	    mem = gen_frame_mem (DFmode, plus_constant (sa_reg, reg_offset));
+	    reg = gen_rtx_REG (DFmode, i+32);
+	    emit_move_insn (reg, mem);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	    reg_offset += 8;
+	  }
+    }
+  else if (TARGET_ABI_UNICOSMK && alpha_procedure_type == PT_STACK)
+    {
+      /* Restore callee-saved general-purpose registers.  */
+
+      reg_offset = -56;
+
+      for (i = 9; i < 15; i++)
+	if (imask & (1UL << i))
+	  {
+	    mem = gen_frame_mem (DImode,
+				 plus_constant (hard_frame_pointer_rtx,
+						reg_offset));
+	    reg = gen_rtx_REG (DImode, i);
+	    emit_move_insn (reg, mem);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	    reg_offset -= 8;
+	  }
+
+      for (i = 2; i < 10; i++)
+	if (fmask & (1UL << i))
+	  {
+	    mem = gen_frame_mem (DFmode,
+				 plus_constant (hard_frame_pointer_rtx,
+						reg_offset));
+	    reg = gen_rtx_REG (DFmode, i+32);
+	    emit_move_insn (reg, mem);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	    reg_offset -= 8;
+	  }
+
+      /* Restore the return address from the DSIB.  */
+      mem = gen_frame_mem (DImode, plus_constant (hard_frame_pointer_rtx, -8));
+      reg = gen_rtx_REG (DImode, REG_RA);
+      emit_move_insn (reg, mem);
+      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+    }
+
+  if (frame_size || eh_ofs)
+    {
+      sp_adj1 = stack_pointer_rtx;
+
+      if (eh_ofs)
+	{
+	  sp_adj1 = gen_rtx_REG (DImode, 23);
+	  emit_move_insn (sp_adj1,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
+	}
+
+      /* If the stack size is large, begin computation into a temporary
+	 register so as not to interfere with a potential fp restore,
+	 which must be consecutive with an SP restore.  */
+      if (frame_size < 32768
+	  && ! (TARGET_ABI_UNICOSMK && cfun->calls_alloca))
+	sp_adj2 = GEN_INT (frame_size);
+      else if (TARGET_ABI_UNICOSMK)
+	{
+	  sp_adj1 = gen_rtx_REG (DImode, 23);
+	  emit_move_insn (sp_adj1, hard_frame_pointer_rtx);
+	  sp_adj2 = const0_rtx;
+	}
+      else if (frame_size < 0x40007fffL)
+	{
+	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
+
+	  sp_adj2 = plus_constant (sp_adj1, frame_size - low);
+	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
+	    sp_adj1 = sa_reg;
+	  else
+	    {
+	      sp_adj1 = gen_rtx_REG (DImode, 23);
+	      emit_move_insn (sp_adj1, sp_adj2);
+	    }
+	  sp_adj2 = GEN_INT (low);
+	}
+      else
+	{
+	  rtx tmp = gen_rtx_REG (DImode, 23);
+	  sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
+	  if (!sp_adj2)
+	    {
+	      /* We can't drop new things to memory this late, afaik,
+		 so build it up by pieces.  */
+	      sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
+						   -(frame_size < 0));
+	      gcc_assert (sp_adj2);
+	    }
+	}
+
+      /* From now on, things must be in order.  So emit blockages.  */
+
+      /* Restore the frame pointer.  */
+      if (TARGET_ABI_UNICOSMK)
+	{
+	  emit_insn (gen_blockage ());
+	  mem = gen_frame_mem (DImode,
+			       plus_constant (hard_frame_pointer_rtx, -16));
+	  emit_move_insn (hard_frame_pointer_rtx, mem);
+	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+					 hard_frame_pointer_rtx, cfa_restores);
+	}
+      else if (fp_is_frame_pointer)
+	{
+	  emit_insn (gen_blockage ());
+	  mem = gen_frame_mem (DImode, plus_constant (sa_reg, fp_offset));
+	  emit_move_insn (hard_frame_pointer_rtx, mem);
+	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+					 hard_frame_pointer_rtx, cfa_restores);
+	}
+      else if (TARGET_ABI_OPEN_VMS)
+	{
+	  emit_insn (gen_blockage ());
+	  emit_move_insn (hard_frame_pointer_rtx,
+			  gen_rtx_REG (DImode, vms_save_fp_regno));
+	  cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+					 hard_frame_pointer_rtx, cfa_restores);
+	}
+
+      /* Restore the stack pointer.  */
+      emit_insn (gen_blockage ());
+      if (sp_adj2 == const0_rtx)
+	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
+      else
+	insn = emit_move_insn (stack_pointer_rtx,
+			       gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      gcc_assert (cfa_restores == NULL);
+
+      if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
+        {
+          emit_insn (gen_blockage ());
+          insn = emit_move_insn (hard_frame_pointer_rtx,
+				 gen_rtx_REG (DImode, vms_save_fp_regno));
+	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+        }
+      else if (TARGET_ABI_UNICOSMK && alpha_procedure_type != PT_STACK)
+	{
+	  /* Decrement the frame pointer if the function does not have a
+	     frame.  */
+	  emit_insn (gen_blockage ());
+	  emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
+				 hard_frame_pointer_rtx, constm1_rtx));
+        }
+    }
+}
+
+/* Output the rest of the textual info surrounding the epilogue.  */
+
+void
+alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
+{
+  rtx insn;
+
+  /* We output a nop after noreturn calls at the very end of the function to
+     ensure that the return address always remains in the caller's code range,
+     as not doing so might confuse unwinding engines.  */
+  insn = get_last_insn ();
+  if (!INSN_P (insn))
+    insn = prev_active_insn (insn);
+  if (insn && CALL_P (insn))
+    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
+
+#if TARGET_ABI_OPEN_VMS
+  alpha_write_linkage (file, fnname, decl);
+#endif
+
+  /* End the function.  */
+  if (!TARGET_ABI_UNICOSMK && !flag_inhibit_size_directive)
+    {
+      fputs ("\t.end ", file);
+      assemble_name (file, fnname);
+      putc ('\n', file);
+    }
+  inside_function = FALSE;
+
+  /* Output jump tables and the static subroutine information block.  */
+  if (TARGET_ABI_UNICOSMK)
+    {
+      unicosmk_output_ssib (file, fnname);
+      unicosmk_output_deferred_case_vectors (file);
+    }
+}
+
+#if TARGET_ABI_OPEN_VMS
+void avms_asm_output_external (FILE *file, tree decl ATTRIBUTE_UNUSED, const char *name)
+{
+#ifdef DO_CRTL_NAMES
+  DO_CRTL_NAMES;
+#endif
+}
+#endif
+
+#if TARGET_ABI_OSF
+/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
+
+   In order to avoid the hordes of differences between generated code
+   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
+   lots of code loading up large constants, generate rtl and emit it
+   instead of going straight to text.
+
+   Not sure why this idea hasn't been explored before...  */
+
+static void
+alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			   tree function)
+{
+  HOST_WIDE_INT hi, lo;
+  rtx this_rtx, insn, funexp;
+
+  /* We always require a valid GP.  */
+  emit_insn (gen_prologue_ldgp ());
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in $16.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 17);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 16);
+
+  /* Add DELTA.  When possible we use ldah+lda.  Otherwise load the
+     entire constant for the add.  */
+  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
+  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  if (hi + lo == delta)
+    {
+      if (hi)
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
+      if (lo)
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
+    }
+  else
+    {
+      rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
+					   delta, -(delta < 0));
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
+  if (vcall_offset)
+    {
+      rtx tmp, tmp2;
+
+      tmp = gen_rtx_REG (Pmode, 0);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
+      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      if (hi + lo == vcall_offset)
+	{
+	  if (hi)
+	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
+	}
+      else
+	{
+	  tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
+					    vcall_offset, -(vcall_offset < 0));
+          emit_insn (gen_adddi3 (tmp, tmp, tmp2));
+	  lo = 0;
+	}
+      if (lo)
+	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
+      else
+	tmp2 = tmp;
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
+
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+  insn = get_insns ();
+  insn_locators_alloc ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+}
+#endif /* TARGET_ABI_OSF */
+
+/* Debugging support.  */
+
+#include "gstab.h"
+
+/* Count the number of sdb related labels are generated (to find block
+   start and end boundaries).  */
+
+int sdb_label_count = 0;
+
+/* Name of the file containing the current function.  */
+
+static const char *current_function_file = "";
+
+/* Offsets to alpha virtual arg/local debugging pointers.  */
+
+long alpha_arg_offset;
+long alpha_auto_offset;
+
+/* Emit a new filename to a stream.  */
+
+void
+alpha_output_filename (FILE *stream, const char *name)
+{
+  static int first_time = TRUE;
+
+  if (first_time)
+    {
+      first_time = FALSE;
+      ++num_source_filenames;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+      output_quoted_string (stream, name);
+      fprintf (stream, "\n");
+      if (!TARGET_GAS && write_symbols == DBX_DEBUG)
+	fprintf (stream, "\t#@stabs\n");
+    }
+
+  else if (write_symbols == DBX_DEBUG)
+    /* dbxout.c will emit an appropriate .stabs directive.  */
+    return;
+
+  else if (name != current_function_file
+	   && strcmp (name, current_function_file) != 0)
+    {
+      if (inside_function && ! TARGET_GAS)
+	fprintf (stream, "\t#.file\t%d ", num_source_filenames);
+      else
+	{
+	  ++num_source_filenames;
+	  current_function_file = name;
+	  fprintf (stream, "\t.file\t%d ", num_source_filenames);
+	}
+
+      output_quoted_string (stream, name);
+      fprintf (stream, "\n");
+    }
+}
+
+/* Structure to show the current status of registers and memory.  */
+
+struct shadow_summary
+{
+  struct {
+    unsigned int i     : 31;	/* Mask of int regs */
+    unsigned int fp    : 31;	/* Mask of fp regs */
+    unsigned int mem   :  1;	/* mem == imem | fpmem */
+  } used, defd;
+};
+
+/* Summary the effects of expression X on the machine.  Update SUM, a pointer
+   to the summary structure.  SET is nonzero if the insn is setting the
+   object, otherwise zero.  */
+
+static void
+summarize_insn (rtx x, struct shadow_summary *sum, int set)
+{
+  const char *format_ptr;
+  int i, j;
+
+  if (x == 0)
+    return;
+
+  switch (GET_CODE (x))
+    {
+      /* ??? Note that this case would be incorrect if the Alpha had a
+	 ZERO_EXTRACT in SET_DEST.  */
+    case SET:
+      summarize_insn (SET_SRC (x), sum, 0);
+      summarize_insn (SET_DEST (x), sum, 1);
+      break;
+
+    case CLOBBER:
+      summarize_insn (XEXP (x, 0), sum, 1);
+      break;
+
+    case USE:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    case ASM_OPERANDS:
+      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
+	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
+      break;
+
+    case PARALLEL:
+      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	summarize_insn (XVECEXP (x, 0, i), sum, 0);
+      break;
+
+    case SUBREG:
+      summarize_insn (SUBREG_REG (x), sum, 0);
+      break;
+
+    case REG:
+      {
+	int regno = REGNO (x);
+	unsigned long mask = ((unsigned long) 1) << (regno % 32);
+
+	if (regno == 31 || regno == 63)
+	  break;
+
+	if (set)
+	  {
+	    if (regno < 32)
+	      sum->defd.i |= mask;
+	    else
+	      sum->defd.fp |= mask;
+	  }
+	else
+	  {
+	    if (regno < 32)
+	      sum->used.i  |= mask;
+	    else
+	      sum->used.fp |= mask;
+	  }
+	}
+      break;
+
+    case MEM:
+      if (set)
+	sum->defd.mem = 1;
+      else
+	sum->used.mem = 1;
+
+      /* Find the regs used in memory address computation: */
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    case CONST_INT:   case CONST_DOUBLE:
+    case SYMBOL_REF:  case LABEL_REF:     case CONST:
+    case SCRATCH:     case ASM_INPUT:
+      break;
+
+      /* Handle common unary and binary ops for efficiency.  */
+    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
+    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
+    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
+    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
+    case NE:       case EQ:      case GE:      case GT:        case LE:
+    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      summarize_insn (XEXP (x, 1), sum, 0);
+      break;
+
+    case NEG:  case NOT:  case SIGN_EXTEND:  case ZERO_EXTEND:
+    case TRUNCATE:  case FLOAT_EXTEND:  case FLOAT_TRUNCATE:  case FLOAT:
+    case FIX:  case UNSIGNED_FLOAT:  case UNSIGNED_FIX:  case ABS:
+    case SQRT:  case FFS:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    default:
+      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	switch (format_ptr[i])
+	  {
+	  case 'e':
+	    summarize_insn (XEXP (x, i), sum, 0);
+	    break;
+
+	  case 'E':
+	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	      summarize_insn (XVECEXP (x, i, j), sum, 0);
+	    break;
+
+	  case 'i':
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+    }
+}
+
+/* Ensure a sufficient number of `trapb' insns are in the code when
+   the user requests code with a trap precision of functions or
+   instructions.
+
+   In naive mode, when the user requests a trap-precision of
+   "instruction", a trapb is needed after every instruction that may
+   generate a trap.  This ensures that the code is resumption safe but
+   it is also slow.
+
+   When optimizations are turned on, we delay issuing a trapb as long
+   as possible.  In this context, a trap shadow is the sequence of
+   instructions that starts with a (potentially) trap generating
+   instruction and extends to the next trapb or call_pal instruction
+   (but GCC never generates call_pal by itself).  We can delay (and
+   therefore sometimes omit) a trapb subject to the following
+   conditions:
+
+   (a) On entry to the trap shadow, if any Alpha register or memory
+   location contains a value that is used as an operand value by some
+   instruction in the trap shadow (live on entry), then no instruction
+   in the trap shadow may modify the register or memory location.
+
+   (b) Within the trap shadow, the computation of the base register
+   for a memory load or store instruction may not involve using the
+   result of an instruction that might generate an UNPREDICTABLE
+   result.
+
+   (c) Within the trap shadow, no register may be used more than once
+   as a destination register.  (This is to make life easier for the
+   trap-handler.)
+
+   (d) The trap shadow may not include any branch instructions.  */
+
+static void
+alpha_handle_trap_shadows (void)
+{
+  struct shadow_summary shadow;
+  int trap_pending, exception_nesting;
+  rtx i, n;
+
+  trap_pending = 0;
+  exception_nesting = 0;
+  shadow.used.i = 0;
+  shadow.used.fp = 0;
+  shadow.used.mem = 0;
+  shadow.defd = shadow.used;
+
+  for (i = get_insns (); i ; i = NEXT_INSN (i))
+    {
+      if (NOTE_P (i))
+	{
+	  switch (NOTE_KIND (i))
+	    {
+	    case NOTE_INSN_EH_REGION_BEG:
+	      exception_nesting++;
+	      if (trap_pending)
+		goto close_shadow;
+	      break;
+
+	    case NOTE_INSN_EH_REGION_END:
+	      exception_nesting--;
+	      if (trap_pending)
+		goto close_shadow;
+	      break;
+
+	    case NOTE_INSN_EPILOGUE_BEG:
+	      if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
+		goto close_shadow;
+	      break;
+	    }
+	}
+      else if (trap_pending)
+	{
+	  if (alpha_tp == ALPHA_TP_FUNC)
+	    {
+	      if (JUMP_P (i)
+		  && GET_CODE (PATTERN (i)) == RETURN)
+		goto close_shadow;
+	    }
+	  else if (alpha_tp == ALPHA_TP_INSN)
+	    {
+	      if (optimize > 0)
+		{
+		  struct shadow_summary sum;
+
+		  sum.used.i = 0;
+		  sum.used.fp = 0;
+		  sum.used.mem = 0;
+		  sum.defd = sum.used;
+
+		  switch (GET_CODE (i))
+		    {
+		    case INSN:
+		      /* Annoyingly, get_attr_trap will die on these.  */
+		      if (GET_CODE (PATTERN (i)) == USE
+			  || GET_CODE (PATTERN (i)) == CLOBBER)
+			break;
+
+		      summarize_insn (PATTERN (i), &sum, 0);
+
+		      if ((sum.defd.i & shadow.defd.i)
+			  || (sum.defd.fp & shadow.defd.fp))
+			{
+			  /* (c) would be violated */
+			  goto close_shadow;
+			}
+
+		      /* Combine shadow with summary of current insn: */
+		      shadow.used.i   |= sum.used.i;
+		      shadow.used.fp  |= sum.used.fp;
+		      shadow.used.mem |= sum.used.mem;
+		      shadow.defd.i   |= sum.defd.i;
+		      shadow.defd.fp  |= sum.defd.fp;
+		      shadow.defd.mem |= sum.defd.mem;
+
+		      if ((sum.defd.i & shadow.used.i)
+			  || (sum.defd.fp & shadow.used.fp)
+			  || (sum.defd.mem & shadow.used.mem))
+			{
+			  /* (a) would be violated (also takes care of (b))  */
+			  gcc_assert (get_attr_trap (i) != TRAP_YES
+				      || (!(sum.defd.i & sum.used.i)
+					  && !(sum.defd.fp & sum.used.fp)));
+
+			  goto close_shadow;
+			}
+		      break;
+
+		    case JUMP_INSN:
+		    case CALL_INSN:
+		    case CODE_LABEL:
+		      goto close_shadow;
+
+		    default:
+		      gcc_unreachable ();
+		    }
+		}
+	      else
+		{
+		close_shadow:
+		  n = emit_insn_before (gen_trapb (), i);
+		  PUT_MODE (n, TImode);
+		  PUT_MODE (i, TImode);
+		  trap_pending = 0;
+		  shadow.used.i = 0;
+		  shadow.used.fp = 0;
+		  shadow.used.mem = 0;
+		  shadow.defd = shadow.used;
+		}
+	    }
+	}
+
+      if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
+	  && NONJUMP_INSN_P (i)
+	  && GET_CODE (PATTERN (i)) != USE
+	  && GET_CODE (PATTERN (i)) != CLOBBER
+	  && get_attr_trap (i) == TRAP_YES)
+	{
+	  if (optimize && !trap_pending)
+	    summarize_insn (PATTERN (i), &shadow, 0);
+	  trap_pending = 1;
+	}
+    }
+}
+
+/* Alpha can only issue instruction groups simultaneously if they are
+   suitably aligned.  This is very processor-specific.  */
+/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
+   that are marked "fake".  These instructions do not exist on that target,
+   but it is possible to see these insns with deranged combinations of 
+   command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
+   choose a result at random.  */
+
+enum alphaev4_pipe {
+  EV4_STOP = 0,
+  EV4_IB0 = 1,
+  EV4_IB1 = 2,
+  EV4_IBX = 4
+};
+
+enum alphaev5_pipe {
+  EV5_STOP = 0,
+  EV5_NONE = 1,
+  EV5_E01 = 2,
+  EV5_E0 = 4,
+  EV5_E1 = 8,
+  EV5_FAM = 16,
+  EV5_FA = 32,
+  EV5_FM = 64
+};
+
+static enum alphaev4_pipe
+alphaev4_insn_pipe (rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+    return EV4_STOP;
+  if (get_attr_length (insn) != 4)
+    return EV4_STOP;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ILD:
+    case TYPE_LDSYM:
+    case TYPE_FLD:
+    case TYPE_LD_L:
+      return EV4_IBX;
+
+    case TYPE_IADD:
+    case TYPE_ILOG:
+    case TYPE_ICMOV:
+    case TYPE_ICMP:
+    case TYPE_FST:
+    case TYPE_SHIFT:
+    case TYPE_IMUL:
+    case TYPE_FBR:
+    case TYPE_MVI:		/* fake */
+      return EV4_IB0;
+
+    case TYPE_IST:
+    case TYPE_MISC:
+    case TYPE_IBR:
+    case TYPE_JSR:
+    case TYPE_CALLPAL:
+    case TYPE_FCPYS:
+    case TYPE_FCMOV:
+    case TYPE_FADD:
+    case TYPE_FDIV:
+    case TYPE_FMUL:
+    case TYPE_ST_C:
+    case TYPE_MB:
+    case TYPE_FSQRT:		/* fake */
+    case TYPE_FTOI:		/* fake */
+    case TYPE_ITOF:		/* fake */
+      return EV4_IB1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static enum alphaev5_pipe
+alphaev5_insn_pipe (rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+    return EV5_STOP;
+  if (get_attr_length (insn) != 4)
+    return EV5_STOP;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ILD:
+    case TYPE_FLD:
+    case TYPE_LDSYM:
+    case TYPE_IADD:
+    case TYPE_ILOG:
+    case TYPE_ICMOV:
+    case TYPE_ICMP:
+      return EV5_E01;
+
+    case TYPE_IST:
+    case TYPE_FST:
+    case TYPE_SHIFT:
+    case TYPE_IMUL:
+    case TYPE_MISC:
+    case TYPE_MVI:
+    case TYPE_LD_L:
+    case TYPE_ST_C:
+    case TYPE_MB:
+    case TYPE_FTOI:		/* fake */
+    case TYPE_ITOF:		/* fake */
+      return EV5_E0;
+
+    case TYPE_IBR:
+    case TYPE_JSR:
+    case TYPE_CALLPAL:
+      return EV5_E1;
+
+    case TYPE_FCPYS:
+      return EV5_FAM;
+
+    case TYPE_FBR:
+    case TYPE_FCMOV:
+    case TYPE_FADD:
+    case TYPE_FDIV:
+    case TYPE_FSQRT:		/* fake */
+      return EV5_FA;
+
+    case TYPE_FMUL:
+      return EV5_FM;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* IN_USE is a mask of the slots currently filled within the insn group.
+   The mask bits come from alphaev4_pipe above.  If EV4_IBX is set, then
+   the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
+
+   LEN is, of course, the length of the group in bytes.  */
+
+static rtx
+alphaev4_next_group (rtx insn, int *pin_use, int *plen)
+{
+  int len, in_use;
+
+  len = in_use = 0;
+
+  if (! INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == CLOBBER
+      || GET_CODE (PATTERN (insn)) == USE)
+    goto next_and_done;
+
+  while (1)
+    {
+      enum alphaev4_pipe pipe;
+
+      pipe = alphaev4_insn_pipe (insn);
+      switch (pipe)
+	{
+	case EV4_STOP:
+	  /* Force complex instructions to start new groups.  */
+	  if (in_use)
+	    goto done;
+
+	  /* If this is a completely unrecognized insn, it's an asm.
+	     We don't know how long it is, so record length as -1 to
+	     signal a needed realignment.  */
+	  if (recog_memoized (insn) < 0)
+	    len = -1;
+	  else
+	    len = get_attr_length (insn);
+	  goto next_and_done;
+
+	case EV4_IBX:
+	  if (in_use & EV4_IB0)
+	    {
+	      if (in_use & EV4_IB1)
+		goto done;
+	      in_use |= EV4_IB1;
+	    }
+	  else
+	    in_use |= EV4_IB0 | EV4_IBX;
+	  break;
+
+	case EV4_IB0:
+	  if (in_use & EV4_IB0)
+	    {
+	      if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
+		goto done;
+	      in_use |= EV4_IB1;
+	    }
+	  in_use |= EV4_IB0;
+	  break;
+
+	case EV4_IB1:
+	  if (in_use & EV4_IB1)
+	    goto done;
+	  in_use |= EV4_IB1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      len += 4;
+
+      /* Haifa doesn't do well scheduling branches.  */
+      if (JUMP_P (insn))
+	goto next_and_done;
+
+    next:
+      insn = next_nonnote_insn (insn);
+
+      if (!insn || ! INSN_P (insn))
+	goto done;
+
+      /* Let Haifa tell us where it thinks insn group boundaries are.  */
+      if (GET_MODE (insn) == TImode)
+	goto done;
+
+      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
+	goto next;
+    }
+
+ next_and_done:
+  insn = next_nonnote_insn (insn);
+
+ done:
+  *plen = len;
+  *pin_use = in_use;
+  return insn;
+}
+
+/* IN_USE is a mask of the slots currently filled within the insn group.
+   The mask bits come from alphaev5_pipe above.  If EV5_E01 is set, then
+   the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
+
+   LEN is, of course, the length of the group in bytes.  */
+
+static rtx
+alphaev5_next_group (rtx insn, int *pin_use, int *plen)
+{
+  int len, in_use;
+
+  len = in_use = 0;
+
+  if (! INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == CLOBBER
+      || GET_CODE (PATTERN (insn)) == USE)
+    goto next_and_done;
+
+  while (1)
+    {
+      enum alphaev5_pipe pipe;
+
+      pipe = alphaev5_insn_pipe (insn);
+      switch (pipe)
+	{
+	case EV5_STOP:
+	  /* Force complex instructions to start new groups.  */
+	  if (in_use)
+	    goto done;
+
+	  /* If this is a completely unrecognized insn, it's an asm.
+	     We don't know how long it is, so record length as -1 to
+	     signal a needed realignment.  */
+	  if (recog_memoized (insn) < 0)
+	    len = -1;
+	  else
+	    len = get_attr_length (insn);
+	  goto next_and_done;
+
+	/* ??? Most of the places below, we would like to assert never
+	   happen, as it would indicate an error either in Haifa, or
+	   in the scheduling description.  Unfortunately, Haifa never
+	   schedules the last instruction of the BB, so we don't have
+	   an accurate TI bit to go off.  */
+	case EV5_E01:
+	  if (in_use & EV5_E0)
+	    {
+	      if (in_use & EV5_E1)
+		goto done;
+	      in_use |= EV5_E1;
+	    }
+	  else
+	    in_use |= EV5_E0 | EV5_E01;
+	  break;
+
+	case EV5_E0:
+	  if (in_use & EV5_E0)
+	    {
+	      if (!(in_use & EV5_E01) || (in_use & EV5_E1))
+		goto done;
+	      in_use |= EV5_E1;
+	    }
+	  in_use |= EV5_E0;
+	  break;
+
+	case EV5_E1:
+	  if (in_use & EV5_E1)
+	    goto done;
+	  in_use |= EV5_E1;
+	  break;
+
+	case EV5_FAM:
+	  if (in_use & EV5_FA)
+	    {
+	      if (in_use & EV5_FM)
+		goto done;
+	      in_use |= EV5_FM;
+	    }
+	  else
+	    in_use |= EV5_FA | EV5_FAM;
+	  break;
+
+	case EV5_FA:
+	  if (in_use & EV5_FA)
+	    goto done;
+	  in_use |= EV5_FA;
+	  break;
+
+	case EV5_FM:
+	  if (in_use & EV5_FM)
+	    goto done;
+	  in_use |= EV5_FM;
+	  break;
+
+	case EV5_NONE:
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      len += 4;
+
+      /* Haifa doesn't do well scheduling branches.  */
+      /* ??? If this is predicted not-taken, slotting continues, except
+	 that no more IBR, FBR, or JSR insns may be slotted.  */
+      if (JUMP_P (insn))
+	goto next_and_done;
+
+    next:
+      insn = next_nonnote_insn (insn);
+
+      if (!insn || ! INSN_P (insn))
+	goto done;
+
+      /* Let Haifa tell us where it thinks insn group boundaries are.  */
+      if (GET_MODE (insn) == TImode)
+	goto done;
+
+      if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
+	goto next;
+    }
+
+ next_and_done:
+  insn = next_nonnote_insn (insn);
+
+ done:
+  *plen = len;
+  *pin_use = in_use;
+  return insn;
+}
+
+static rtx
+alphaev4_next_nop (int *pin_use)
+{
+  int in_use = *pin_use;
+  rtx nop;
+
+  if (!(in_use & EV4_IB0))
+    {
+      in_use |= EV4_IB0;
+      nop = gen_nop ();
+    }
+  else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
+    {
+      in_use |= EV4_IB1;
+      nop = gen_nop ();
+    }
+  else if (TARGET_FP && !(in_use & EV4_IB1))
+    {
+      in_use |= EV4_IB1;
+      nop = gen_fnop ();
+    }
+  else
+    nop = gen_unop ();
+
+  *pin_use = in_use;
+  return nop;
+}
+
+static rtx
+alphaev5_next_nop (int *pin_use)
+{
+  int in_use = *pin_use;
+  rtx nop;
+
+  if (!(in_use & EV5_E1))
+    {
+      in_use |= EV5_E1;
+      nop = gen_nop ();
+    }
+  else if (TARGET_FP && !(in_use & EV5_FA))
+    {
+      in_use |= EV5_FA;
+      nop = gen_fnop ();
+    }
+  else if (TARGET_FP && !(in_use & EV5_FM))
+    {
+      in_use |= EV5_FM;
+      nop = gen_fnop ();
+    }
+  else
+    nop = gen_unop ();
+
+  *pin_use = in_use;
+  return nop;
+}
+
+/* The instruction group alignment main loop.  */
+
+static void
+alpha_align_insns (unsigned int max_align,
+		   rtx (*next_group) (rtx, int *, int *),
+		   rtx (*next_nop) (int *))
+{
+  /* ALIGN is the known alignment for the insn group.  */
+  unsigned int align;
+  /* OFS is the offset of the current insn in the insn group.  */
+  int ofs;
+  int prev_in_use, in_use, len, ldgp;
+  rtx i, next;
+
+  /* Let shorten branches care for assigning alignments to code labels.  */
+  shorten_branches (get_insns ());
+
+  if (align_functions < 4)
+    align = 4;
+  else if ((unsigned int) align_functions < max_align)
+    align = align_functions;
+  else
+    align = max_align;
+
+  ofs = prev_in_use = 0;
+  i = get_insns ();
+  if (NOTE_P (i))
+    i = next_nonnote_insn (i);
+
+  ldgp = alpha_function_needs_gp ? 8 : 0;
+
+  while (i)
+    {
+      next = (*next_group) (i, &in_use, &len);
+
+      /* When we see a label, resync alignment etc.  */
+      if (LABEL_P (i))
+	{
+	  unsigned int new_align = 1 << label_to_alignment (i);
+
+	  if (new_align >= align)
+	    {
+	      align = new_align < max_align ? new_align : max_align;
+	      ofs = 0;
+	    }
+
+	  else if (ofs & (new_align-1))
+	    ofs = (ofs | (new_align-1)) + 1;
+	  gcc_assert (!len);
+	}
+
+      /* Handle complex instructions special.  */
+      else if (in_use == 0)
+	{
+	  /* Asms will have length < 0.  This is a signal that we have
+	     lost alignment knowledge.  Assume, however, that the asm
+	     will not mis-align instructions.  */
+	  if (len < 0)
+	    {
+	      ofs = 0;
+	      align = 4;
+	      len = 0;
+	    }
+	}
+
+      /* If the known alignment is smaller than the recognized insn group,
+	 realign the output.  */
+      else if ((int) align < len)
+	{
+	  unsigned int new_log_align = len > 8 ? 4 : 3;
+	  rtx prev, where;
+
+	  where = prev = prev_nonnote_insn (i);
+	  if (!where || !LABEL_P (where))
+	    where = i;
+
+	  /* Can't realign between a call and its gp reload.  */
+	  if (! (TARGET_EXPLICIT_RELOCS
+		 && prev && CALL_P (prev)))
+	    {
+	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
+	      align = 1 << new_log_align;
+	      ofs = 0;
+	    }
+	}
+
+      /* We may not insert padding inside the initial ldgp sequence.  */
+      else if (ldgp > 0)
+	ldgp -= len;
+
+      /* If the group won't fit in the same INT16 as the previous,
+	 we need to add padding to keep the group together.  Rather
+	 than simply leaving the insn filling to the assembler, we
+	 can make use of the knowledge of what sorts of instructions
+	 were issued in the previous group to make sure that all of
+	 the added nops are really free.  */
+      else if (ofs + len > (int) align)
+	{
+	  int nop_count = (align - ofs) / 4;
+	  rtx where;
+
+	  /* Insert nops before labels, branches, and calls to truly merge
+	     the execution of the nops with the previous instruction group.  */
+	  where = prev_nonnote_insn (i);
+	  if (where)
+	    {
+	      if (LABEL_P (where))
+		{
+		  rtx where2 = prev_nonnote_insn (where);
+		  if (where2 && JUMP_P (where2))
+		    where = where2;
+		}
+	      else if (NONJUMP_INSN_P (where))
+		where = i;
+	    }
+	  else
+	    where = i;
+
+	  do
+	    emit_insn_before ((*next_nop)(&prev_in_use), where);
+	  while (--nop_count);
+	  ofs = 0;
+	}
+
+      ofs = (ofs + len) & (align - 1);
+      prev_in_use = in_use;
+      i = next;
+    }
+}
+
+/* Insert an unop between sibcall or noreturn function call and GP load.  */
+
+static void
+alpha_pad_function_end (void)
+{
+  rtx insn, next;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (! (CALL_P (insn)
+	     && (SIBLING_CALL_P (insn)
+		 || find_reg_note (insn, REG_NORETURN, NULL_RTX))))
+	continue;
+
+      next = next_active_insn (insn);
+
+      if (next)
+	{
+	  rtx pat = PATTERN (next);
+
+	  if (GET_CODE (pat) == SET
+	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
+	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
+	    emit_insn_after (gen_unop (), insn);
+	}
+    }
+}
+
+/* Machine dependent reorg pass.  */
+
+static void
+alpha_reorg (void)
+{
+  /* Workaround for a linker error that triggers when an exception
+     handler immediatelly follows a sibcall or a noreturn function.
+
+In the sibcall case:
+
+     The instruction stream from an object file:
+
+ 1d8:   00 00 fb 6b     jmp     (t12)
+ 1dc:   00 00 ba 27     ldah    gp,0(ra)
+ 1e0:   00 00 bd 23     lda     gp,0(gp)
+ 1e4:   00 00 7d a7     ldq     t12,0(gp)
+ 1e8:   00 40 5b 6b     jsr     ra,(t12),1ec <__funcZ+0x1ec>
+
+     was converted in the final link pass to:
+
+   12003aa88:   67 fa ff c3     br      120039428 <...>
+   12003aa8c:   00 00 fe 2f     unop
+   12003aa90:   00 00 fe 2f     unop
+   12003aa94:   48 83 7d a7     ldq     t12,-31928(gp)
+   12003aa98:   00 40 5b 6b     jsr     ra,(t12),12003aa9c <__func+0x1ec>
+
+And in the noreturn case:
+
+     The instruction stream from an object file:
+
+  54:   00 40 5b 6b     jsr     ra,(t12),58 <__func+0x58>
+  58:   00 00 ba 27     ldah    gp,0(ra)
+  5c:   00 00 bd 23     lda     gp,0(gp)
+  60:   00 00 7d a7     ldq     t12,0(gp)
+  64:   00 40 5b 6b     jsr     ra,(t12),68 <__func+0x68>
+
+     was converted in the final link pass to:
+
+   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
+   fdb28:       00 00 fe 2f     unop
+   fdb2c:       00 00 fe 2f     unop
+   fdb30:       30 82 7d a7     ldq     t12,-32208(gp)
+   fdb34:       00 40 5b 6b     jsr     ra,(t12),fdb38 <__func+0x68>
+
+     GP load instructions were wrongly cleared by the linker relaxation
+     pass.  This workaround prevents removal of GP loads by inserting
+     an unop instruction between a sibcall or noreturn function call and
+     exception handler prologue.  */
+
+  if (current_function_has_exception_handlers ())
+    alpha_pad_function_end ();
+
+  if (alpha_tp != ALPHA_TP_PROG || flag_exceptions)
+    alpha_handle_trap_shadows ();
+
+  /* Due to the number of extra trapb insns, don't bother fixing up
+     alignment when trap precision is instruction.  Moreover, we can
+     only do our job when sched2 is run.  */
+  if (optimize && !optimize_size
+      && alpha_tp != ALPHA_TP_INSN
+      && flag_schedule_insns_after_reload)
+    {
+      if (alpha_tune == PROCESSOR_EV4)
+	alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop);
+      else if (alpha_tune == PROCESSOR_EV5)
+	alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop);
+    }
+}
+
+#if !TARGET_ABI_UNICOSMK
+
+#ifdef HAVE_STAMP_H
+#include <stamp.h>
+#endif
+
+static void
+alpha_file_start (void)
+{
+#ifdef OBJECT_FORMAT_ELF
+  /* If emitting dwarf2 debug information, we cannot generate a .file
+     directive to start the file, as it will conflict with dwarf2out
+     file numbers.  So it's only useful when emitting mdebug output.  */
+  targetm.asm_file_start_file_directive = (write_symbols == DBX_DEBUG);
+#endif
+
+  default_file_start ();
+#ifdef MS_STAMP
+  fprintf (asm_out_file, "\t.verstamp %d %d\n", MS_STAMP, LS_STAMP);
+#endif
+
+  fputs ("\t.set noreorder\n", asm_out_file);
+  fputs ("\t.set volatile\n", asm_out_file);
+  if (!TARGET_ABI_OPEN_VMS)
+    fputs ("\t.set noat\n", asm_out_file);
+  if (TARGET_EXPLICIT_RELOCS)
+    fputs ("\t.set nomacro\n", asm_out_file);
+  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
+    {
+      const char *arch;
+
+      if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
+	arch = "ev6";
+      else if (TARGET_MAX)
+	arch = "pca56";
+      else if (TARGET_BWX)
+	arch = "ev56";
+      else if (alpha_cpu == PROCESSOR_EV5)
+	arch = "ev5";
+      else
+	arch = "ev4";
+
+      fprintf (asm_out_file, "\t.arch %s\n", arch);
+    }
+}
+#endif
+
+#ifdef OBJECT_FORMAT_ELF
+/* Since we don't have a .dynbss section, we should not allow global
+   relocations in the .rodata section.  */
+
+static int
+alpha_elf_reloc_rw_mask (void)
+{
+  return flag_pic ? 3 : 2;
+}
+
+/* Return a section for X.  The only special thing we do here is to
+   honor small data.  */
+
+static section *
+alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
+			      unsigned HOST_WIDE_INT align)
+{
+  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
+    /* ??? Consider using mergeable sdata sections.  */
+    return sdata_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+static unsigned int
+alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = 0;
+
+  if (strcmp (name, ".sdata") == 0
+      || strncmp (name, ".sdata.", 7) == 0
+      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
+      || strcmp (name, ".sbss") == 0
+      || strncmp (name, ".sbss.", 6) == 0
+      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
+    flags = SECTION_SMALL;
+
+  flags |= default_section_type_flags (decl, name, reloc);
+  return flags;
+}
+#endif /* OBJECT_FORMAT_ELF */
+
+/* Structure to collect function names for final output in link section.  */
+/* Note that items marked with GTY can't be ifdef'ed out.  */
+
+enum links_kind {KIND_UNUSED, KIND_LOCAL, KIND_EXTERN};
+enum reloc_kind {KIND_LINKAGE, KIND_CODEADDR};
+
+struct GTY(()) alpha_links
+{
+  int num;
+  const char *target;
+  rtx linkage;
+  enum links_kind lkind;
+  enum reloc_kind rkind;
+};
+
+struct GTY(()) alpha_funcs
+{
+  int num;
+  splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
+    links;
+};
+
+static GTY ((param1_is (char *), param2_is (struct alpha_links *)))
+  splay_tree alpha_links_tree;
+static GTY ((param1_is (tree), param2_is (struct alpha_funcs *)))
+  splay_tree alpha_funcs_tree;
+
+static GTY(()) int alpha_funcs_num;
+
+#if TARGET_ABI_OPEN_VMS
+
+/* Return the VMS argument type corresponding to MODE.  */
+
+enum avms_arg_type
+alpha_arg_type (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return TARGET_FLOAT_VAX ? FF : FS;
+    case DFmode:
+      return TARGET_FLOAT_VAX ? FD : FT;
+    default:
+      return I64;
+    }
+}
+
+/* Return an rtx for an integer representing the VMS Argument Information
+   register value.  */
+
+rtx
+alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
+{
+  unsigned HOST_WIDE_INT regval = cum.num_args;
+  int i;
+
+  for (i = 0; i < 6; i++)
+    regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
+
+  return GEN_INT (regval);
+}
+
+/* Register the need for a (fake) .linkage entry for calls to function NAME.
+   IS_LOCAL is 1 if this is for a definition, 0 if this is for a real call.
+   Return a SYMBOL_REF suited to the call instruction.  */
+
+rtx
+alpha_need_linkage (const char *name, int is_local)
+{
+  splay_tree_node node;
+  struct alpha_links *al;
+  const char *target;
+  tree id;
+
+  if (name[0] == '*')
+    name++;
+
+  if (is_local)
+    {
+      struct alpha_funcs *cfaf;
+
+      if (!alpha_funcs_tree)
+        alpha_funcs_tree = splay_tree_new_ggc
+	 (splay_tree_compare_pointers,
+	  ggc_alloc_splay_tree_tree_node_tree_node_splay_tree_s,
+	  ggc_alloc_splay_tree_tree_node_tree_node_splay_tree_node_s);
+
+
+      cfaf = ggc_alloc_alpha_funcs ();
+
+      cfaf->links = 0;
+      cfaf->num = ++alpha_funcs_num;
+
+      splay_tree_insert (alpha_funcs_tree,
+			 (splay_tree_key) current_function_decl,
+			 (splay_tree_value) cfaf);
+    }
+
+  if (alpha_links_tree)
+    {
+      /* Is this name already defined?  */
+
+      node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name);
+      if (node)
+	{
+	  al = (struct alpha_links *) node->value;
+	  if (is_local)
+	    {
+	      /* Defined here but external assumed.  */
+	      if (al->lkind == KIND_EXTERN)
+		al->lkind = KIND_LOCAL;
+	    }
+	  else
+	    {
+	      /* Used here but unused assumed.  */
+	      if (al->lkind == KIND_UNUSED)
+		al->lkind = KIND_LOCAL;
+	    }
+	  return al->linkage;
+	}
+    }
+  else
+    alpha_links_tree = splay_tree_new_ggc
+	 ((splay_tree_compare_fn) strcmp,
+	  ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
+	  ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
+
+  al = ggc_alloc_alpha_links ();
+  name = ggc_strdup (name);
+
+  /* Assume external if no definition.  */
+  al->lkind = (is_local ? KIND_UNUSED : KIND_EXTERN);
+
+  /* Ensure we have an IDENTIFIER so assemble_name can mark it used
+     and find the ultimate alias target like assemble_name.  */
+  id = get_identifier (name);
+  target = NULL;
+  while (IDENTIFIER_TRANSPARENT_ALIAS (id))
+    {
+      id = TREE_CHAIN (id);
+      target = IDENTIFIER_POINTER (id);
+    }
+
+  al->target = target ? target : name;
+  al->linkage = gen_rtx_SYMBOL_REF (Pmode, name);
+
+  splay_tree_insert (alpha_links_tree, (splay_tree_key) name,
+		     (splay_tree_value) al);
+
+  return al->linkage;
+}
+
+/* Return a SYMBOL_REF representing the reference to the .linkage entry
+   of function FUNC built for calls made from CFUNDECL.  LFLAG is 1 if
+   this is the reference to the linkage pointer value, 0 if this is the
+   reference to the function entry value.  RFLAG is 1 if this a reduced
+   reference (code address only), 0 if this is a full reference.  */
+
+rtx
+alpha_use_linkage (rtx func, tree cfundecl, int lflag, int rflag)
+{
+  splay_tree_node cfunnode;
+  struct alpha_funcs *cfaf;
+  struct alpha_links *al;
+  const char *name = XSTR (func, 0);
+
+  cfaf = (struct alpha_funcs *) 0;
+  al = (struct alpha_links *) 0;
+
+  cfunnode = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) cfundecl);
+  cfaf = (struct alpha_funcs *) cfunnode->value;
+
+  if (cfaf->links)
+    {
+      splay_tree_node lnode;
+
+      /* Is this name already defined?  */
+
+      lnode = splay_tree_lookup (cfaf->links, (splay_tree_key) name);
+      if (lnode)
+	al = (struct alpha_links *) lnode->value;
+    }
+  else
+    cfaf->links = splay_tree_new_ggc
+      ((splay_tree_compare_fn) strcmp,
+       ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
+       ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
+
+  if (!al)
+    {
+      size_t name_len;
+      size_t buflen;
+      char *linksym;
+      splay_tree_node node = 0;
+      struct alpha_links *anl;
+
+      if (name[0] == '*')
+	name++;
+
+      name_len = strlen (name);
+      linksym = (char *) alloca (name_len + 50);
+
+      al = ggc_alloc_alpha_links ();
+      al->num = cfaf->num;
+      al->target = NULL;
+
+      node = splay_tree_lookup (alpha_links_tree, (splay_tree_key) name);
+      if (node)
+	{
+	  anl = (struct alpha_links *) node->value;
+	  al->lkind = anl->lkind;
+	  name = anl->target;
+	}
+
+      sprintf (linksym, "$%d..%s..lk", cfaf->num, name);
+      buflen = strlen (linksym);
+
+      al->linkage = gen_rtx_SYMBOL_REF
+	(Pmode, ggc_alloc_string (linksym, buflen + 1));
+
+      splay_tree_insert (cfaf->links, (splay_tree_key) name,
+			 (splay_tree_value) al);
+    }
+
+  if (rflag)
+    al->rkind = KIND_CODEADDR;
+  else
+    al->rkind = KIND_LINKAGE;
+
+  if (lflag)
+    return gen_rtx_MEM (Pmode, plus_constant (al->linkage, 8));
+  else
+    return al->linkage;
+}
+
+static int
+alpha_write_one_linkage (splay_tree_node node, void *data)
+{
+  const char *const name = (const char *) node->key;
+  struct alpha_links *link = (struct alpha_links *) node->value;
+  FILE *stream = (FILE *) data;
+
+  fprintf (stream, "$%d..%s..lk:\n", link->num, name);
+  if (link->rkind == KIND_CODEADDR)
+    {
+      if (link->lkind == KIND_LOCAL)
+	{
+	  /* Local and used */
+	  fprintf (stream, "\t.quad %s..en\n", name);
+	}
+      else
+	{
+	  /* External and used, request code address.  */
+	  fprintf (stream, "\t.code_address %s\n", name);
+	}
+    }
+  else
+    {
+      if (link->lkind == KIND_LOCAL)
+	{
+	  /* Local and used, build linkage pair.  */
+	  fprintf (stream, "\t.quad %s..en\n", name);
+	  fprintf (stream, "\t.quad %s\n", name);
+	}
+      else
+	{
+	  /* External and used, request linkage pair.  */
+	  fprintf (stream, "\t.linkage %s\n", name);
+	}
+    }
+
+  return 0;
+}
+
+static void
+alpha_write_linkage (FILE *stream, const char *funname, tree fundecl)
+{
+  splay_tree_node node;
+  struct alpha_funcs *func;
+
+  fprintf (stream, "\t.link\n");
+  fprintf (stream, "\t.align 3\n");
+  in_section = NULL;
+
+  node = splay_tree_lookup (alpha_funcs_tree, (splay_tree_key) fundecl);
+  func = (struct alpha_funcs *) node->value;
+
+  fputs ("\t.name ", stream);
+  assemble_name (stream, funname);
+  fputs ("..na\n", stream);
+  ASM_OUTPUT_LABEL (stream, funname);
+  fprintf (stream, "\t.pdesc ");
+  assemble_name (stream, funname);
+  fprintf (stream, "..en,%s\n",
+	   alpha_procedure_type == PT_STACK ? "stack"
+	   : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
+
+  if (func->links)
+    {
+      splay_tree_foreach (func->links, alpha_write_one_linkage, stream);
+      /* splay_tree_delete (func->links); */
+    }
+}
+
+/* Switch to an arbitrary section NAME with attributes as specified
+   by FLAGS.  ALIGN specifies any known alignment requirements for
+   the section; 0 if the default should be used.  */
+
+static void
+vms_asm_named_section (const char *name, unsigned int flags, 
+		       tree decl ATTRIBUTE_UNUSED)
+{
+  fputc ('\n', asm_out_file);
+  fprintf (asm_out_file, ".section\t%s", name);
+
+  if (flags & SECTION_DEBUG)
+    fprintf (asm_out_file, ",NOWRT");
+
+  fputc ('\n', asm_out_file);
+}
+
+/* Record an element in the table of global constructors.  SYMBOL is
+   a SYMBOL_REF of the function to be called; PRIORITY is a number
+   between 0 and MAX_INIT_PRIORITY.
+
+   Differs from default_ctors_section_asm_out_constructor in that the
+   width of the .ctors entry is always 64 bits, rather than the 32 bits
+   used by a normal pointer.  */
+
+static void
+vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (ctors_section);
+  assemble_align (BITS_PER_WORD);
+  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
+}
+
+static void
+vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (dtors_section);
+  assemble_align (BITS_PER_WORD);
+  assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
+}
+#else
+
+rtx
+alpha_need_linkage (const char *name ATTRIBUTE_UNUSED,
+		    int is_local ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+rtx
+alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
+		   tree cfundecl ATTRIBUTE_UNUSED,
+		   int lflag ATTRIBUTE_UNUSED,
+		   int rflag ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+#endif /* TARGET_ABI_OPEN_VMS */
+
+#if TARGET_ABI_UNICOSMK
+
+/* This evaluates to true if we do not know how to pass TYPE solely in
+   registers.  This is the case for all arguments that do not fit in two
+   registers.  */
+
+static bool
+unicosmk_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (type == NULL)
+    return false;
+
+  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
+    return true;
+  if (TREE_ADDRESSABLE (type))
+    return true;
+
+  return ALPHA_ARG_SIZE (mode, type, 0) > 2;
+}
+
+/* Define the offset between two registers, one to be eliminated, and the
+   other its replacement, at the start of a routine.  */
+
+int
+unicosmk_initial_elimination_offset (int from, int to)
+{
+  int fixed_size;
+
+  fixed_size = alpha_sa_size();
+  if (fixed_size != 0)
+    fixed_size += 48;
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return -fixed_size;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return 0;
+  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return (ALPHA_ROUND (crtl->outgoing_args_size)
+	    + ALPHA_ROUND (get_frame_size()));
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return (ALPHA_ROUND (fixed_size)
+	    + ALPHA_ROUND (get_frame_size()
+			   + crtl->outgoing_args_size));
+  else
+    gcc_unreachable ();
+}
+
+/* Output the module name for .ident and .end directives. We have to strip
+   directories and add make sure that the module name starts with a letter
+   or '$'.  */
+
+static void
+unicosmk_output_module_name (FILE *file)
+{
+  const char *name = lbasename (main_input_filename);
+  unsigned len = strlen (name);
+  char *clean_name = alloca (len + 2);
+  char *ptr = clean_name;
+
+  /* CAM only accepts module names that start with a letter or '$'. We
+     prefix the module name with a '$' if necessary.  */
+
+  if (!ISALPHA (*name))
+    *ptr++ = '$';
+  memcpy (ptr, name, len + 1);
+  clean_symbol_name (clean_name);
+  fputs (clean_name, file);
+}
+
+/* Output the definition of a common variable.  */
+
+void
+unicosmk_output_common (FILE *file, const char *name, int size, int align)
+{
+  tree name_tree;
+  printf ("T3E__: common %s\n", name);
+
+  in_section = NULL;
+  fputs("\t.endp\n\n\t.psect ", file);
+  assemble_name(file, name);
+  fprintf(file, ",%d,common\n", floor_log2 (align / BITS_PER_UNIT));
+  fprintf(file, "\t.byte\t0:%d\n", size);
+
+  /* Mark the symbol as defined in this module.  */
+  name_tree = get_identifier (name);
+  TREE_ASM_WRITTEN (name_tree) = 1;
+}
+
+#define SECTION_PUBLIC SECTION_MACH_DEP
+#define SECTION_MAIN (SECTION_PUBLIC << 1)
+static int current_section_align;
+
+/* A get_unnamed_section callback for switching to the text section.  */
+
+static void
+unicosmk_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  static int count = 0;
+  fprintf (asm_out_file, "\t.endp\n\n\t.psect\tgcc@text___%d,code\n", count++);
+}
+
+/* A get_unnamed_section callback for switching to the data section.  */
+
+static void
+unicosmk_output_data_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  static int count = 1;
+  fprintf (asm_out_file, "\t.endp\n\n\t.psect\tgcc@data___%d,data\n", count++);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.
+
+   The Cray assembler is really weird with respect to sections. It has only
+   named sections and you can't reopen a section once it has been closed.
+   This means that we have to generate unique names whenever we want to
+   reenter the text or the data section.  */
+
+static void
+unicosmk_init_sections (void)
+{
+  text_section = get_unnamed_section (SECTION_CODE,
+				      unicosmk_output_text_section_asm_op,
+				      NULL);
+  data_section = get_unnamed_section (SECTION_WRITE,
+				      unicosmk_output_data_section_asm_op,
+				      NULL);
+  readonly_data_section = data_section;
+}
+
+static unsigned int
+unicosmk_section_type_flags (tree decl, const char *name,
+			     int reloc ATTRIBUTE_UNUSED)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (!decl)
+    return flags;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      current_section_align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+      if (align_functions_log > current_section_align)
+	current_section_align = align_functions_log;
+
+      if (! strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), "main"))
+	flags |= SECTION_MAIN;
+    }
+  else
+    current_section_align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
+
+  if (TREE_PUBLIC (decl))
+    flags |= SECTION_PUBLIC;
+
+  return flags;
+}
+
+/* Generate a section name for decl and associate it with the
+   declaration.  */
+
+static void
+unicosmk_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
+{
+  const char *name;
+  int len;
+
+  gcc_assert (decl);
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = default_strip_name_encoding (name);
+  len = strlen (name);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      char *string;
+
+      /* It is essential that we prefix the section name here because
+	 otherwise the section names generated for constructors and
+	 destructors confuse collect2.  */
+
+      string = alloca (len + 6);
+      sprintf (string, "code@%s", name);
+      DECL_SECTION_NAME (decl) = build_string (len + 5, string);
+    }
+  else if (TREE_PUBLIC (decl))
+    DECL_SECTION_NAME (decl) = build_string (len, name);
+  else
+    {
+      char *string;
+
+      string = alloca (len + 6);
+      sprintf (string, "data@%s", name);
+      DECL_SECTION_NAME (decl) = build_string (len + 5, string);
+    }
+}
+
+/* Switch to an arbitrary section NAME with attributes as specified
+   by FLAGS.  ALIGN specifies any known alignment requirements for
+   the section; 0 if the default should be used.  */
+
+static void
+unicosmk_asm_named_section (const char *name, unsigned int flags, 
+			    tree decl ATTRIBUTE_UNUSED)
+{
+  const char *kind;
+
+  /* Close the previous section.  */
+
+  fputs ("\t.endp\n\n", asm_out_file);
+
+  /* Find out what kind of section we are opening.  */
+
+  if (flags & SECTION_MAIN)
+    fputs ("\t.start\tmain\n", asm_out_file);
+
+  if (flags & SECTION_CODE)
+    kind = "code";
+  else if (flags & SECTION_PUBLIC)
+    kind = "common";
+  else
+    kind = "data";
+
+  if (current_section_align != 0)
+    fprintf (asm_out_file, "\t.psect\t%s,%d,%s\n", name,
+	     current_section_align, kind);
+  else
+    fprintf (asm_out_file, "\t.psect\t%s,%s\n", name, kind);
+}
+
+static void
+unicosmk_insert_attributes (tree decl, tree *attr_ptr ATTRIBUTE_UNUSED)
+{
+  if (DECL_P (decl)
+      && (TREE_PUBLIC (decl) || TREE_CODE (decl) == FUNCTION_DECL))
+    unicosmk_unique_section (decl, 0);
+}
+
+/* Output an alignment directive. We have to use the macro 'gcc@code@align'
+   in code sections because .align fill unused space with zeroes.  */
+
+void
+unicosmk_output_align (FILE *file, int align)
+{
+  if (inside_function)
+    fprintf (file, "\tgcc@code@align\t%d\n", align);
+  else
+    fprintf (file, "\t.align\t%d\n", align);
+}
+
+/* Add a case vector to the current function's list of deferred case
+   vectors. Case vectors have to be put into a separate section because CAM
+   does not allow data definitions in code sections.  */
+
+void
+unicosmk_defer_case_vector (rtx lab, rtx vec)
+{
+  struct machine_function *machine = cfun->machine;
+
+  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
+  machine->addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec,
+					  machine->addr_list);
+}
+
+/* Output a case vector.  */
+
+static void
+unicosmk_output_addr_vec (FILE *file, rtx vec)
+{
+  rtx lab  = XEXP (vec, 0);
+  rtx body = XEXP (vec, 1);
+  int vlen = XVECLEN (body, 0);
+  int idx;
+
+  (*targetm.asm_out.internal_label) (file, "L", CODE_LABEL_NUMBER (lab));
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_VEC_ELT
+        (file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
+    }
+}
+
+/* Output current function's deferred case vectors.  */
+
+static void
+unicosmk_output_deferred_case_vectors (FILE *file)
+{
+  struct machine_function *machine = cfun->machine;
+  rtx t;
+
+  if (machine->addr_list == NULL_RTX)
+    return;
+
+  switch_to_section (data_section);
+  for (t = machine->addr_list; t; t = XEXP (t, 1))
+    unicosmk_output_addr_vec (file, XEXP (t, 0));
+}
+
+/* Generate the name of the SSIB section for the current function.  */
+
+#define SSIB_PREFIX "__SSIB_"
+#define SSIB_PREFIX_LEN 7
+
+static const char *
+unicosmk_ssib_name (void)
+{
+  /* This is ok since CAM won't be able to deal with names longer than that
+     anyway.  */
+
+  static char name[256];
+
+  rtx x;
+  const char *fnname;
+  int len;
+
+  x = DECL_RTL (cfun->decl);
+  gcc_assert (MEM_P (x));
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  fnname = XSTR (x, 0);
+
+  len = strlen (fnname);
+  if (len + SSIB_PREFIX_LEN > 255)
+    len = 255 - SSIB_PREFIX_LEN;
+
+  strcpy (name, SSIB_PREFIX);
+  strncpy (name + SSIB_PREFIX_LEN, fnname, len);
+  name[len + SSIB_PREFIX_LEN] = 0;
+
+  return name;
+}
+
+/* Set up the dynamic subprogram information block (DSIB) and update the
+   frame pointer register ($15) for subroutines which have a frame. If the
+   subroutine doesn't have a frame, simply increment $15.  */
+
+static void
+unicosmk_gen_dsib (unsigned long *imaskP)
+{
+  if (alpha_procedure_type == PT_STACK)
+    {
+      const char *ssib_name;
+      rtx mem;
+
+      /* Allocate 64 bytes for the DSIB.  */
+
+      FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+                                  GEN_INT (-64))));
+      emit_insn (gen_blockage ());
+
+      /* Save the return address.  */
+
+      mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 56));
+      FRP (emit_move_insn (mem, gen_rtx_REG (DImode, REG_RA)));
+      (*imaskP) &= ~(1UL << REG_RA);
+
+      /* Save the old frame pointer.  */
+
+      mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 48));
+      FRP (emit_move_insn (mem, hard_frame_pointer_rtx));
+      (*imaskP) &= ~(1UL << HARD_FRAME_POINTER_REGNUM);
+
+      emit_insn (gen_blockage ());
+
+      /* Store the SSIB pointer.  */
+
+      ssib_name = ggc_strdup (unicosmk_ssib_name ());
+      mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 32));
+
+      FRP (emit_move_insn (gen_rtx_REG (DImode, 5),
+                           gen_rtx_SYMBOL_REF (Pmode, ssib_name)));
+      FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 5)));
+
+      /* Save the CIW index.  */
+
+      mem = gen_frame_mem (DImode, plus_constant (stack_pointer_rtx, 24));
+      FRP (emit_move_insn (mem, gen_rtx_REG (DImode, 25)));
+
+      emit_insn (gen_blockage ());
+
+      /* Set the new frame pointer.  */
+      FRP (emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
+                                  stack_pointer_rtx, GEN_INT (64))));
+    }
+  else
+    {
+      /* Increment the frame pointer register to indicate that we do not
+         have a frame.  */
+      emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
+                             hard_frame_pointer_rtx, const1_rtx));
+    }
+}
+
+/* Output the static subroutine information block for the current
+   function.  */
+
+static void
+unicosmk_output_ssib (FILE *file, const char *fnname)
+{
+  int len;
+  int i;
+  rtx x;
+  rtx ciw;
+  struct machine_function *machine = cfun->machine;
+
+  in_section = NULL;
+  fprintf (file, "\t.endp\n\n\t.psect\t%s%s,data\n", user_label_prefix,
+	   unicosmk_ssib_name ());
+
+  /* Some required stuff and the function name length.  */
+
+  len = strlen (fnname);
+  fprintf (file, "\t.quad\t^X20008%2.2X28\n", len);
+
+  /* Saved registers
+     ??? We don't do that yet.  */
+
+  fputs ("\t.quad\t0\n", file);
+
+  /* Function address.  */
+
+  fputs ("\t.quad\t", file);
+  assemble_name (file, fnname);
+  putc ('\n', file);
+
+  fputs ("\t.quad\t0\n", file);
+  fputs ("\t.quad\t0\n", file);
+
+  /* Function name.
+     ??? We do it the same way Cray CC does it but this could be
+     simplified.  */
+
+  for( i = 0; i < len; i++ )
+    fprintf (file, "\t.byte\t%d\n", (int)(fnname[i]));
+  if( (len % 8) == 0 )
+    fputs ("\t.quad\t0\n", file);
+  else
+    fprintf (file, "\t.bits\t%d : 0\n", (8 - (len % 8))*8);
+
+  /* All call information words used in the function.  */
+
+  for (x = machine->first_ciw; x; x = XEXP (x, 1))
+    {
+      ciw = XEXP (x, 0);
+#if HOST_BITS_PER_WIDE_INT == 32
+      fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_DOUBLE_HEX "\n",
+	       CONST_DOUBLE_HIGH (ciw), CONST_DOUBLE_LOW (ciw));
+#else
+      fprintf (file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n", INTVAL (ciw));
+#endif
+    }
+}
+
+/* Add a call information word (CIW) to the list of the current function's
+   CIWs and return its index.
+
+   X is a CONST_INT or CONST_DOUBLE representing the CIW.  */
+
+rtx
+unicosmk_add_call_info_word (rtx x)
+{
+  rtx node;
+  struct machine_function *machine = cfun->machine;
+
+  node = gen_rtx_EXPR_LIST (VOIDmode, x, NULL_RTX);
+  if (machine->first_ciw == NULL_RTX)
+    machine->first_ciw = node;
+  else
+    XEXP (machine->last_ciw, 1) = node;
+
+  machine->last_ciw = node;
+  ++machine->ciw_count;
+
+  return GEN_INT (machine->ciw_count
+		  + strlen (current_function_name ())/8 + 5);
+}
+
+/* The Cray assembler doesn't accept extern declarations for symbols which
+   are defined in the same file. We have to keep track of all global
+   symbols which are referenced and/or defined in a source file and output
+   extern declarations for those which are referenced but not defined at
+   the end of file.  */
+
+/* List of identifiers for which an extern declaration might have to be
+   emitted.  */
+/* FIXME: needs to use GC, so it can be saved and restored for PCH.  */
+
+struct unicosmk_extern_list
+{
+  struct unicosmk_extern_list *next;
+  const char *name;
+};
+
+static struct unicosmk_extern_list *unicosmk_extern_head = 0;
+
+/* Output extern declarations which are required for every asm file.  */
+
+static void
+unicosmk_output_default_externs (FILE *file)
+{
+  static const char *const externs[] =
+    { "__T3E_MISMATCH" };
+
+  int i;
+  int n;
+
+  n = ARRAY_SIZE (externs);
+
+  for (i = 0; i < n; i++)
+    fprintf (file, "\t.extern\t%s\n", externs[i]);
+}
+
+/* Output extern declarations for global symbols which are have been
+   referenced but not defined.  */
+
+static void
+unicosmk_output_externs (FILE *file)
+{
+  struct unicosmk_extern_list *p;
+  const char *real_name;
+  int len;
+  tree name_tree;
+
+  len = strlen (user_label_prefix);
+  for (p = unicosmk_extern_head; p != 0; p = p->next)
+    {
+      /* We have to strip the encoding and possibly remove user_label_prefix
+	 from the identifier in order to handle -fleading-underscore and
+	 explicit asm names correctly (cf. gcc.dg/asm-names-1.c).  */
+      real_name = default_strip_name_encoding (p->name);
+      if (len && p->name[0] == '*'
+	  && !memcmp (real_name, user_label_prefix, len))
+	real_name += len;
+
+      name_tree = get_identifier (real_name);
+      if (! TREE_ASM_WRITTEN (name_tree))
+	{
+	  TREE_ASM_WRITTEN (name_tree) = 1;
+	  fputs ("\t.extern\t", file);
+	  assemble_name (file, p->name);
+	  putc ('\n', file);
+	}
+    }
+}
+
+/* Record an extern.  */
+
+void
+unicosmk_add_extern (const char *name)
+{
+  struct unicosmk_extern_list *p;
+
+  p = (struct unicosmk_extern_list *)
+       xmalloc (sizeof (struct unicosmk_extern_list));
+  p->next = unicosmk_extern_head;
+  p->name = name;
+  unicosmk_extern_head = p;
+}
+
+/* The Cray assembler generates incorrect code if identifiers which
+   conflict with register names are used as instruction operands. We have
+   to replace such identifiers with DEX expressions.  */
+
+/* Structure to collect identifiers which have been replaced by DEX
+   expressions.  */
+/* FIXME: needs to use GC, so it can be saved and restored for PCH.  */
+
+struct unicosmk_dex {
+  struct unicosmk_dex *next;
+  const char *name;
+};
+
+/* List of identifiers which have been replaced by DEX expressions. The DEX
+   number is determined by the position in the list.  */
+
+static struct unicosmk_dex *unicosmk_dex_list = NULL;
+
+/* The number of elements in the DEX list.  */
+
+static int unicosmk_dex_count = 0;
+
+/* Check if NAME must be replaced by a DEX expression.  */
+
+static int
+unicosmk_special_name (const char *name)
+{
+  if (name[0] == '*')
+    ++name;
+
+  if (name[0] == '$')
+    ++name;
+
+  if (name[0] != 'r' && name[0] != 'f' && name[0] != 'R' && name[0] != 'F')
+    return 0;
+
+  switch (name[1])
+    {
+    case '1':  case '2':
+      return (name[2] == '\0' || (ISDIGIT (name[2]) && name[3] == '\0'));
+
+    case '3':
+      return (name[2] == '\0'
+	       || ((name[2] == '0' || name[2] == '1') && name[3] == '\0'));
+
+    default:
+      return (ISDIGIT (name[1]) && name[2] == '\0');
+    }
+}
+
+/* Return the DEX number if X must be replaced by a DEX expression and 0
+   otherwise.  */
+
+static int
+unicosmk_need_dex (rtx x)
+{
+  struct unicosmk_dex *dex;
+  const char *name;
+  int i;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return 0;
+
+  name = XSTR (x,0);
+  if (! unicosmk_special_name (name))
+    return 0;
+
+  i = unicosmk_dex_count;
+  for (dex = unicosmk_dex_list; dex; dex = dex->next)
+    {
+      if (! strcmp (name, dex->name))
+        return i;
+      --i;
+    }
+
+  dex = (struct unicosmk_dex *) xmalloc (sizeof (struct unicosmk_dex));
+  dex->name = name;
+  dex->next = unicosmk_dex_list;
+  unicosmk_dex_list = dex;
+
+  ++unicosmk_dex_count;
+  return unicosmk_dex_count;
+}
+
+/* Output the DEX definitions for this file.  */
+
+static void
+unicosmk_output_dex (FILE *file)
+{
+  struct unicosmk_dex *dex;
+  int i;
+
+  if (unicosmk_dex_list == NULL)
+    return;
+
+  fprintf (file, "\t.dexstart\n");
+
+  i = unicosmk_dex_count;
+  for (dex = unicosmk_dex_list; dex; dex = dex->next)
+    {
+      fprintf (file, "\tDEX (%d) = ", i);
+      assemble_name (file, dex->name);
+      putc ('\n', file);
+      --i;
+    }
+
+  fprintf (file, "\t.dexend\n");
+}
+
+/* Output text that to appear at the beginning of an assembler file.  */
+
+static void
+unicosmk_file_start (void)
+{
+  int i;
+
+  fputs ("\t.ident\t", asm_out_file);
+  unicosmk_output_module_name (asm_out_file);
+  fputs ("\n\n", asm_out_file);
+
+  /* The Unicos/Mk assembler uses different register names. Instead of trying
+     to support them, we simply use micro definitions.  */
+
+  /* CAM has different register names: rN for the integer register N and fN
+     for the floating-point register N. Instead of trying to use these in
+     alpha.md, we define the symbols $N and $fN to refer to the appropriate
+     register.  */
+
+  for (i = 0; i < 32; ++i)
+    fprintf (asm_out_file, "$%d <- r%d\n", i, i);
+
+  for (i = 0; i < 32; ++i)
+    fprintf (asm_out_file, "$f%d <- f%d\n", i, i);
+
+  putc ('\n', asm_out_file);
+
+  /* The .align directive fill unused space with zeroes which does not work
+     in code sections. We define the macro 'gcc@code@align' which uses nops
+     instead. Note that it assumes that code sections always have the
+     biggest possible alignment since . refers to the current offset from
+     the beginning of the section.  */
+
+  fputs ("\t.macro gcc@code@align n\n", asm_out_file);
+  fputs ("gcc@n@bytes = 1 << n\n", asm_out_file);
+  fputs ("gcc@here = . % gcc@n@bytes\n", asm_out_file);
+  fputs ("\t.if ne, gcc@here, 0\n", asm_out_file);
+  fputs ("\t.repeat (gcc@n@bytes - gcc@here) / 4\n", asm_out_file);
+  fputs ("\tbis r31,r31,r31\n", asm_out_file);
+  fputs ("\t.endr\n", asm_out_file);
+  fputs ("\t.endif\n", asm_out_file);
+  fputs ("\t.endm gcc@code@align\n\n", asm_out_file);
+
+  /* Output extern declarations which should always be visible.  */
+  unicosmk_output_default_externs (asm_out_file);
+
+  /* Open a dummy section. We always need to be inside a section for the
+     section-switching code to work correctly.
+     ??? This should be a module id or something like that. I still have to
+     figure out what the rules for those are.  */
+  fputs ("\n\t.psect\t$SG00000,data\n", asm_out_file);
+}
+
+/* Output text to appear at the end of an assembler file. This includes all
+   pending extern declarations and DEX expressions.  */
+
+static void
+unicosmk_file_end (void)
+{
+  fputs ("\t.endp\n\n", asm_out_file);
+
+  /* Output all pending externs.  */
+
+  unicosmk_output_externs (asm_out_file);
+
+  /* Output dex definitions used for functions whose names conflict with
+     register names.  */
+
+  unicosmk_output_dex (asm_out_file);
+
+  fputs ("\t.end\t", asm_out_file);
+  unicosmk_output_module_name (asm_out_file);
+  putc ('\n', asm_out_file);
+}
+
+#else
+
+static void
+unicosmk_output_deferred_case_vectors (FILE *file ATTRIBUTE_UNUSED)
+{}
+
+static void
+unicosmk_gen_dsib (unsigned long *imaskP ATTRIBUTE_UNUSED)
+{}
+
+static void
+unicosmk_output_ssib (FILE * file ATTRIBUTE_UNUSED,
+		      const char * fnname ATTRIBUTE_UNUSED)
+{}
+
+rtx
+unicosmk_add_call_info_word (rtx x ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+static int
+unicosmk_need_dex (rtx x ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+#endif /* TARGET_ABI_UNICOSMK */
+
+static void
+alpha_init_libfuncs (void)
+{
+  if (TARGET_ABI_UNICOSMK)
+    {
+      /* Prevent gcc from generating calls to __divsi3.  */
+      set_optab_libfunc (sdiv_optab, SImode, 0);
+      set_optab_libfunc (udiv_optab, SImode, 0);
+
+      /* Use the functions provided by the system library
+	 for DImode integer division.  */
+      set_optab_libfunc (sdiv_optab, DImode, "$sldiv");
+      set_optab_libfunc (udiv_optab, DImode, "$uldiv");
+    }
+  else if (TARGET_ABI_OPEN_VMS)
+    {
+      /* Use the VMS runtime library functions for division and
+	 remainder.  */
+      set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
+      set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
+      set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
+      set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
+      set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
+      set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
+      set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
+      set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
+      abort_libfunc = init_one_libfunc ("decc$abort");
+      memcmp_libfunc = init_one_libfunc ("decc$memcmp");
+#ifdef MEM_LIBFUNCS_INIT
+      MEM_LIBFUNCS_INIT;
+#endif
+    }
+}
+
+/* On the Alpha, we use this to disable the floating-point registers
+   when they don't exist.  */
+
+static void
+alpha_conditional_register_usage (void)
+{
+  int i;
+  if (! TARGET_FPREGS)
+    for (i = 32; i < 63; i++)
+      fixed_regs[i] = call_used_regs[i] = 1;
+}
+
+/* Initialize the GCC target structure.  */
+#if TARGET_ABI_OPEN_VMS
+# undef TARGET_ATTRIBUTE_TABLE
+# define TARGET_ATTRIBUTE_TABLE vms_attribute_table
+# undef TARGET_CAN_ELIMINATE
+# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
+#endif
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
+
+#if TARGET_ABI_UNICOSMK
+# undef TARGET_INSERT_ATTRIBUTES
+# define TARGET_INSERT_ATTRIBUTES unicosmk_insert_attributes
+# undef TARGET_SECTION_TYPE_FLAGS
+# define TARGET_SECTION_TYPE_FLAGS unicosmk_section_type_flags
+# undef TARGET_ASM_UNIQUE_SECTION
+# define TARGET_ASM_UNIQUE_SECTION unicosmk_unique_section
+#undef TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+# undef TARGET_ASM_GLOBALIZE_LABEL
+# define TARGET_ASM_GLOBALIZE_LABEL hook_void_FILEptr_constcharptr
+# undef TARGET_MUST_PASS_IN_STACK
+# define TARGET_MUST_PASS_IN_STACK unicosmk_must_pass_in_stack
+#endif
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+/* Default unaligned ops are provided for ELF systems.  To get unaligned
+   data for non-ELF systems, we have to turn off auto alignment.  */
+#if !defined (OBJECT_FORMAT_ELF) || TARGET_ABI_OPEN_VMS
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
+#endif
+
+#ifdef OBJECT_FORMAT_ELF
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK  alpha_elf_reloc_rw_mask
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION  alpha_elf_select_rtx_section
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  alpha_elf_section_type_flags
+#endif
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
+
+#if TARGET_ABI_UNICOSMK
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START unicosmk_file_start
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END unicosmk_file_end
+#else
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START alpha_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#endif
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  alpha_multipass_dfa_lookahead
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL  alpha_builtin_decl
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS alpha_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN alpha_expand_builtin
+#undef  TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN alpha_fold_builtin
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
+
+#if TARGET_ABI_OSF
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+#undef TARGET_STDARG_OPTIMIZE_HOOK
+#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
+#endif
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS alpha_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG alpha_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
+
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD alpha_secondary_reload
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
+
+/* The Alpha architecture does not require sequential consistency.  See
+   http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
+   for an example of how it can be violated in practice.  */
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING true
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+  (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION alpha_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE alpha_option_override
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE alpha_option_optimization_table
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE alpha_mangle_type
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+#include "gt-alpha.h"
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
new file mode 100644
index 000000000..409915abc
--- /dev/null
+++ b/gcc/config/alpha/alpha.h
@@ -0,0 +1,1344 @@
+/* Definitions of target machine for GNU compiler, for DEC Alpha.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+	builtin_define ("__alpha");			\
+	builtin_define ("__alpha__");			\
+	builtin_assert ("cpu=alpha");			\
+	builtin_assert ("machine=alpha");		\
+	if (TARGET_CIX)					\
+	  {						\
+	    builtin_define ("__alpha_cix__");		\
+	    builtin_assert ("cpu=cix");			\
+	  }						\
+	if (TARGET_FIX)					\
+	  {						\
+	    builtin_define ("__alpha_fix__");		\
+	    builtin_assert ("cpu=fix");			\
+	  }						\
+	if (TARGET_BWX)					\
+	  {						\
+	    builtin_define ("__alpha_bwx__");		\
+	    builtin_assert ("cpu=bwx");			\
+	  }						\
+	if (TARGET_MAX)					\
+	  {						\
+	    builtin_define ("__alpha_max__");		\
+	    builtin_assert ("cpu=max");			\
+	  }						\
+	if (alpha_cpu == PROCESSOR_EV6)			\
+	  {						\
+	    builtin_define ("__alpha_ev6__");		\
+	    builtin_assert ("cpu=ev6");			\
+	  }						\
+	else if (alpha_cpu == PROCESSOR_EV5)		\
+	  {						\
+	    builtin_define ("__alpha_ev5__");		\
+	    builtin_assert ("cpu=ev5");			\
+	  }						\
+	else	/* Presumably ev4.  */			\
+	  {						\
+	    builtin_define ("__alpha_ev4__");		\
+	    builtin_assert ("cpu=ev4");			\
+	  }						\
+	if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT)	\
+	  builtin_define ("_IEEE_FP");			\
+	if (TARGET_IEEE_WITH_INEXACT)			\
+	  builtin_define ("_IEEE_FP_INEXACT");		\
+	if (TARGET_LONG_DOUBLE_128)			\
+	  builtin_define ("__LONG_DOUBLE_128__");	\
+							\
+	/* Macros dependent on the C dialect.  */	\
+	SUBTARGET_LANGUAGE_CPP_BUILTINS();		\
+} while (0)
+
+#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS
+#define SUBTARGET_LANGUAGE_CPP_BUILTINS()		\
+  do							\
+    {							\
+      if (preprocessing_asm_p ())			\
+	builtin_define_std ("LANGUAGE_ASSEMBLY");	\
+      else if (c_dialect_cxx ())			\
+	{						\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS");	\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS__");	\
+	}						\
+      else						\
+	builtin_define_std ("LANGUAGE_C");		\
+      if (c_dialect_objc ())				\
+	{						\
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C");	\
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C__");	\
+	}						\
+    }							\
+  while (0)
+#endif
+
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* Which processor to schedule for. The cpu attribute defines a list that
+   mirrors this list, so changes to alpha.md must be made at the same time.  */
+
+enum processor_type
+{
+  PROCESSOR_EV4,			/* 2106[46]{a,} */
+  PROCESSOR_EV5,			/* 21164{a,pc,} */
+  PROCESSOR_EV6,			/* 21264 */
+  PROCESSOR_MAX
+};
+
+extern enum processor_type alpha_cpu;
+extern enum processor_type alpha_tune;
+
+enum alpha_trap_precision
+{
+  ALPHA_TP_PROG,	/* No precision (default).  */
+  ALPHA_TP_FUNC,      	/* Trap contained within originating function.  */
+  ALPHA_TP_INSN		/* Instruction accuracy and code is resumption safe.  */
+};
+
+enum alpha_fp_rounding_mode
+{
+  ALPHA_FPRM_NORM,	/* Normal rounding mode.  */
+  ALPHA_FPRM_MINF,	/* Round towards minus-infinity.  */
+  ALPHA_FPRM_CHOP,	/* Chopped rounding mode (towards 0).  */
+  ALPHA_FPRM_DYN	/* Dynamic rounding mode.  */
+};
+
+enum alpha_fp_trap_mode
+{
+  ALPHA_FPTM_N,		/* Normal trap mode.  */
+  ALPHA_FPTM_U,		/* Underflow traps enabled.  */
+  ALPHA_FPTM_SU,	/* Software completion, w/underflow traps */
+  ALPHA_FPTM_SUI	/* Software completion, w/underflow & inexact traps */
+};
+
+extern enum alpha_trap_precision alpha_tp;
+extern enum alpha_fp_rounding_mode alpha_fprm;
+extern enum alpha_fp_trap_mode alpha_fptm;
+
+/* Invert the easy way to make options work.  */
+#define TARGET_FP	(!TARGET_SOFT_FP)
+
+/* These are for target os support and cannot be changed at runtime.  */
+#define TARGET_ABI_WINDOWS_NT 0
+#define TARGET_ABI_OPEN_VMS 0
+#define TARGET_ABI_UNICOSMK 0
+#define TARGET_ABI_OSF (!TARGET_ABI_WINDOWS_NT	\
+			&& !TARGET_ABI_OPEN_VMS	\
+			&& !TARGET_ABI_UNICOSMK)
+
+#ifndef TARGET_AS_CAN_SUBTRACT_LABELS
+#define TARGET_AS_CAN_SUBTRACT_LABELS TARGET_GAS
+#endif
+#ifndef TARGET_AS_SLASH_BEFORE_SUFFIX
+#define TARGET_AS_SLASH_BEFORE_SUFFIX TARGET_GAS
+#endif
+#ifndef TARGET_CAN_FAULT_IN_PROLOGUE
+#define TARGET_CAN_FAULT_IN_PROLOGUE 0
+#endif
+#ifndef TARGET_HAS_XFLOATING_LIBS
+#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128
+#endif
+#ifndef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 0
+#endif
+#ifndef TARGET_LD_BUGGY_LDGP
+#define TARGET_LD_BUGGY_LDGP 0
+#endif
+#ifndef TARGET_FIXUP_EV5_PREFETCH
+#define TARGET_FIXUP_EV5_PREFETCH 0
+#endif
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#define TARGET_DEFAULT MASK_FPREGS
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS
+#ifdef HAVE_AS_EXPLICIT_RELOCS
+#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS
+#define TARGET_SUPPORT_ARCH 1
+#else
+#define TARGET_DEFAULT_EXPLICIT_RELOCS 0
+#endif
+#endif
+
+#ifndef TARGET_SUPPORT_ARCH
+#define TARGET_SUPPORT_ARCH 0
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+
+/* target machine storage layout */
+
+/* Define the size of `int'.  The default is the same as the word size.  */
+#define INT_TYPE_SIZE 32
+
+/* Define the size of `long long'.  The default is the twice the word size.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* The two floating-point formats we support are S-floating, which is
+   4 bytes, and T-floating, which is 8 bytes.  `float' is S and `double'
+   and `long double' are T.  */
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Work around target_flags dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+#define	WCHAR_TYPE "unsigned int"
+#define	WCHAR_TYPE_SIZE 32
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   For Alpha, we always store objects in a full register.  32-bit integers
+   are always sign-extended, but smaller objects retain their signedness.
+
+   Note that small vector types can get mapped onto integer modes at the
+   whim of not appearing in alpha-modes.def.  We never promoted these
+   values before; don't do so now that we've trimmed the set of modes to
+   those actually implemented in the backend.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)			\
+  if (GET_MODE_CLASS (MODE) == MODE_INT				\
+      && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE)	\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+    {								\
+      if ((MODE) == SImode)					\
+	(UNSIGNEDP) = 0;					\
+      (MODE) = DImode;						\
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+
+   There are no such instructions on the Alpha, but the documentation
+   is little endian.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   This is false on the Alpha.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+
+   For Alpha we can decide arbitrarily since there are no machine instructions
+   for them.  Might as well be consistent with bytes.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 8
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 64
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 128
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 64
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* For atomic access to objects, must have at least 32-bit alignment
+   unless the machine has byte operations.  */
+#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32))
+
+/* Align all constants and variables to at least a word boundary so
+   we can pick up pieces of them faster.  */
+/* ??? Only if block-move stuff knows about different source/destination
+   alignment.  */
+#if 0
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD)
+#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD)
+#endif
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.
+
+   Since we get an error message when we do one, call them invalid.  */
+
+#define STRICT_ALIGNMENT 1
+
+/* Set this nonzero if unaligned move instructions are extremely slow.
+
+   On the Alpha, they trap.  */
+
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   We define all 32 integer registers, even though $31 is always zero,
+   and all 32 floating-point registers, even though $f31 is also
+   always zero.  We do not bother defining the FP status register and
+   there are no other registers.
+
+   Since $31 is always zero, we will use register number 31 as the
+   argument pointer.  It will never appear in the generated code
+   because we will always be eliminating it in favor of the stack
+   pointer or hardware frame pointer.
+
+   Likewise, we use $f31 for the frame pointer, which will always
+   be eliminated in favor of the hardware frame pointer or the
+   stack pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 64
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, \
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  */
+
+#define REG_ALLOC_ORDER { \
+   1, 2, 3, 4, 5, 6, 7, 8,	/* nonsaved integer registers */	\
+   22, 23, 24, 25, 28,		/* likewise */				\
+   0,				/* likewise, but return value */	\
+   21, 20, 19, 18, 17, 16,	/* likewise, but input args */		\
+   27,				/* likewise, but OSF procedure value */	\
+									\
+   42, 43, 44, 45, 46, 47,	/* nonsaved floating-point registers */	\
+   54, 55, 56, 57, 58, 59,	/* likewise */				\
+   60, 61, 62,			/* likewise */				\
+   32, 33,			/* likewise, but return values */	\
+   53, 52, 51, 50, 49, 48,	/* likewise, but input args */		\
+									\
+   9, 10, 11, 12, 13, 14,	/* saved integer registers */		\
+   26,				/* return address */			\
+   15,				/* hard frame pointer */		\
+									\
+   34, 35, 36, 37, 38, 39,	/* saved floating-point registers */	\
+   40, 41,			/* likewise */				\
+									\
+   29, 30, 31, 63		/* gp, sp, ap, sfp */			\
+}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On Alpha, the integer registers can hold any mode.  The floating-point
+   registers can hold 64-bit integers as well, but not smaller values.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 				\
+  (IN_RANGE ((REGNO), 32, 62)						\
+   ? (MODE) == SFmode || (MODE) == DFmode || (MODE) == DImode		\
+     || (MODE) == SCmode || (MODE) == DCmode				\
+   : 1)
+
+/* A C expression that is nonzero if a value of mode
+   MODE1 is accessible in mode MODE2 without copying.
+
+   This asymmetric test is true when MODE1 could be put
+   in an FP register but MODE2 could not.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 				\
+  (HARD_REGNO_MODE_OK (32, (MODE1))				\
+   ? HARD_REGNO_MODE_OK (32, (MODE2))				\
+   : 1)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Alpha pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 30
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 15
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 31
+
+/* Base register for access to local variables of function.  */
+#define FRAME_POINTER_REGNUM 63
+
+/* Register in which static-chain is passed to a function.
+
+   For the Alpha, this is based on an example; the calling sequence
+   doesn't seem to specify this.  */
+#define STATIC_CHAIN_REGNUM 1
+
+/* The register number of the register used to address a table of
+   static data addresses in memory.  */
+#define PIC_OFFSET_TABLE_REGNUM 29
+
+/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM'
+   is clobbered by calls.  */
+/* ??? It is and it isn't.  It's required to be valid for a given
+   function when the function returns.  It isn't clobbered by
+   current_file functions.  Moreover, we do not expose the ldgp
+   until after reload, so we're probably safe.  */
+/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class {
+  NO_REGS, R0_REG, R24_REG, R25_REG, R27_REG,
+  GENERAL_REGS, FLOAT_REGS, ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES					\
+ {"NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG",	\
+  "GENERAL_REGS", "FLOAT_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS				\
+{ {0x00000000, 0x00000000},	/* NO_REGS */		\
+  {0x00000001, 0x00000000},	/* R0_REG */		\
+  {0x01000000, 0x00000000},	/* R24_REG */		\
+  {0x02000000, 0x00000000},	/* R25_REG */		\
+  {0x08000000, 0x00000000},	/* R27_REG */		\
+  {0xffffffff, 0x80000000},	/* GENERAL_REGS */	\
+  {0x00000000, 0x7fffffff},	/* FLOAT_REGS */	\
+  {0xffffffff, 0xffffffff} }
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						     \
+{									     \
+  GENERAL_REGS, FLOAT_REGS, LIM_REG_CLASSES				     \
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)			\
+ ((REGNO) == 0 ? R0_REG				\
+  : (REGNO) == 24 ? R24_REG			\
+  : (REGNO) == 25 ? R25_REG			\
+  : (REGNO) == 27 ? R27_REG			\
+  : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS	\
+  : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+
+#define PREFERRED_RELOAD_CLASS  alpha_preferred_reload_class
+
+/* If we are copying between general and FP registers, we need a memory
+   location unless the FIX extension is available.  */
+
+#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \
+ (! TARGET_FIX && (((CLASS1) == FLOAT_REGS && (CLASS2) != FLOAT_REGS) \
+                   || ((CLASS2) == FLOAT_REGS && (CLASS1) != FLOAT_REGS)))
+
+/* Specify the mode to be used for memory when a secondary memory
+   location is needed.  If MODE is floating-point, use it.  Otherwise,
+   widen to a word like the default.  This is needed because we always
+   store integers in FP registers in quadword format.  This whole
+   area is very tricky! */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+  (GET_MODE_CLASS (MODE) == MODE_FLOAT ? (MODE)		\
+   : GET_MODE_SIZE (MODE) >= 4 ? (MODE)			\
+   : mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE)				\
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Return the class of registers that cannot change mode from FROM to TO.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
+   ? reg_classes_intersect_p (FLOAT_REGS, CLASS) : 0)
+
+/* Define the cost of moving between registers of various classes.  Moving
+   between FLOAT_REGS and anything else except float regs is expensive.
+   In fact, we make it quite expensive because we really don't want to
+   do these moves unless it is clearly worth it.  Optimizations may
+   reduce the impact of not being able to allocate a pseudo to a
+   hard register.  */
+
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)		\
+  (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS)	? 2	\
+   : TARGET_FIX ? ((CLASS1) == FLOAT_REGS ? 6 : 8)		\
+   : 4+2*alpha_memory_latency)
+
+/* A C expressions returning the cost of moving data of MODE from a register to
+   or from memory.
+
+   On the Alpha, bump this up a bit.  */
+
+extern int alpha_memory_latency;
+#define MEMORY_MOVE_COST(MODE,CLASS,IN)  (2*alpha_memory_latency)
+
+/* Provide the cost of a branch.  Exact meaning under development.  */
+#define BRANCH_COST(speed_p, predictable_p) 5
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+/* #define FRAME_GROWS_DOWNWARD 0 */
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On Alpha, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Define this to be nonzero if stack checking is built into the ABI.  */
+#define STACK_CHECK_BUILTIN 1
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the Alpha.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+
+#define ELIMINABLE_REGS				     \
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	     \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},   \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},	     \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+/* Round up to a multiple of 16 bytes.  */
+#define ALPHA_ROUND(X) (((X) + 15) & ~ 15)
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = alpha_initial_elimination_offset(FROM, TO))
+
+/* Define this if stack space is still allocated for a parameter passed
+   in a register.  */
+/* #define REG_PARM_STACK_SPACE */
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.
+
+   On Alpha the value is found in $0 for integer functions and
+   $f0 for floating-point functions.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  function_value (VALTYPE, FUNC, VOIDmode)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) \
+  function_value (NULL, NULL, MODE)
+
+/* 1 if N is a possible register number for a function value
+   as seen by the caller.  */
+
+#define FUNCTION_VALUE_REGNO_P(N)  \
+  ((N) == 0 || (N) == 1 || (N) == 32 || (N) == 33)
+
+/* 1 if N is a possible register number for function argument passing.
+   On Alpha, these are $16-$21 and $f16-$f21.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On Alpha, this is a single integer, which is a number of words
+   of arguments scanned so far.
+   Thus 6 or more means all following args should go on the stack.  */
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+/* Define intermediate macro to compute the size (in registers) of an argument
+   for the Alpha.  */
+
+#define ALPHA_ARG_SIZE(MODE, TYPE, NAMED)				\
+  ((MODE) == TFmode || (MODE) == TCmode ? 1				\
+   : (((MODE) == BLKmode ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \
+      + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD)
+
+/* Make (or fake) .linkage entry for function call.
+   IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.  */
+
+/* This macro defines the start of an assembly comment.  */
+
+#define ASM_COMMENT_START " #"
+
+/* This macro produces the initial definition of a function.  */
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \
+  alpha_start_function(FILE,NAME,DECL);
+
+/* This macro closes up a function definition for the assembler.  */
+
+#define ASM_DECLARE_FUNCTION_SIZE(FILE,NAME,DECL) \
+  alpha_end_function(FILE,NAME,DECL)
+
+/* Output any profiling code before the prologue.  */
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Never use profile counters.  */
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  Under OSF/1, profiling is enabled
+   by simply passing -pg to the assembler and linker.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define registers used by the epilogue and return instruction.  */
+
+#define EPILOGUE_USES(REGNO)	((REGNO) == 26)
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE    32
+
+/* The alignment of a trampoline, in bits.  */
+
+#define TRAMPOLINE_ALIGNMENT  64
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of
+   the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined.  */
+
+#define RETURN_ADDR_RTX  alpha_return_addr
+
+/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders
+   can use DWARF_ALT_FRAME_RETURN_COLUMN defined below.  This is just the same
+   as the default definition in dwarf2out.c.  */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG)
+
+/* Before the prologue, RA lives in $26.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, 26)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26)
+#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64)
+#define DWARF_ZERO_REG 31
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) < 4 ? (N) + 16 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 28)
+#define EH_RETURN_HANDLER_RTX \
+  gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, \
+				     crtl->outgoing_args_size))
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32  \
+ || (REGNO) == 63 || reg_renumber[REGNO] == 63)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Recognize any constant value that is a valid address.  For the Alpha,
+   there are only constants none since we want to use LDA to load any
+   symbolic addresses into registers.  */
+
+#define CONSTANT_ADDRESS_P(X)   \
+  (CONST_INT_P (X)		\
+   && (unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000)
+
+/* Include all constant integers and constant doubles, but not
+   floating-point, except for floating-point zero.  */
+
+#define LEGITIMATE_CONSTANT_P  alpha_legitimate_constant_p
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define NONSTRICT_REG_OK_FOR_BASE_P(X)  \
+  (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* ??? Nonzero if X is the frame pointer, or some virtual register
+   that may eliminate to the frame pointer.  These will be allowed to
+   have offsets greater than 32K.  This is done because register
+   elimination offsets will change the hi/lo split, and if we split
+   before reload, we will require additional instructions.  */
+#define NONSTRICT_REG_OK_FP_BASE_P(X)		\
+  (REGNO (X) == 31 || REGNO (X) == 63		\
+   || (REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+       && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X)	STRICT_REG_OK_FOR_BASE_P (X)
+#else
+#define REG_OK_FOR_BASE_P(X)	NONSTRICT_REG_OK_FOR_BASE_P (X)
+#endif
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN)		     \
+do {									     \
+  rtx new_x = alpha_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \
+  if (new_x)								     \
+    {									     \
+      X = new_x;							     \
+      goto WIN;								     \
+    }									     \
+} while (0)
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+   On the Alpha this is true only for the unaligned modes.   We can
+   simplify this test since we know that the address must be valid.  */
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL)  \
+{ if (GET_CODE (ADDR) == AND) goto LABEL; }
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+
+   Do not define this if the table should contain absolute addresses.
+   On the Alpha, the table is really GP-relative, not relative to the PC
+   of the table, but we pretend that it is PC-relative; this should be OK,
+   but we should try to find some better way sometime.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move to or from memory
+   in one reasonably fast instruction.  */
+
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.
+
+   Without byte/word accesses, we want no more than four instructions;
+   with, several single byte accesses are better.  */
+
+#define MOVE_RATIO(speed)  (TARGET_BWX ? 7 : 2)
+
+/* Largest number of bytes of an object that can be placed in a register.
+   On the Alpha we have plenty of registers, so use TImode.  */
+#define MAX_FIXED_MODE_SIZE	GET_MODE_BITSIZE (TImode)
+
+/* Nonzero if access to memory by bytes is no faster than for words.
+   Also nonzero if doing byte operations (specifically shifts) in registers
+   is undesirable.
+
+   On the Alpha, we want to not use the byte operation and instead use
+   masking operations to access fields; these will save instructions.  */
+
+#define SLOW_BYTE_ACCESS	1
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* The CIX ctlz and cttz instructions return 64 for zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 64, TARGET_CIX)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 64, TARGET_CIX)
+
+/* Define the value returned by a floating-point comparison instruction.  */
+
+#define FLOAT_STORE_FLAG_VALUE(MODE) \
+  REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE))
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+
+#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \
+  do {									\
+    if (((CODE) == GE || (CODE) == GT || (CODE) == GEU || (CODE) == GTU) \
+	&& (REG_P (OP1) || (OP1) == const0_rtx))		\
+      {									\
+	rtx tem = (OP0);						\
+	(OP0) = (OP1);							\
+	(OP1) = tem;							\
+	(CODE) = swap_condition (CODE);					\
+      }									\
+    if (((CODE) == LT || (CODE) == LTU)					\
+	&& CONST_INT_P (OP1) && INTVAL (OP1) == 256)			\
+      {									\
+	(CODE) = (CODE) == LT ? LE : LEU;				\
+	(OP1) = GEN_INT (255);						\
+      }									\
+  } while (0)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode DImode
+
+/* Mode of a function address in a call instruction (for indexing purposes).  */
+
+#define FUNCTION_MODE Pmode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.
+
+   We define this on the Alpha so that gen_call and gen_call_value
+   get to see the SYMBOL_REF (for the hint field of the jsr).  It will
+   then copy it into a register, thus actually letting the address be
+   cse'ed.  */
+
+#define NO_FUNCTION_CSE
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "")
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "")
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before read-only data.  */
+
+#define READONLY_DATA_SECTION_ASM_OP "\t.rdata"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES						\
+{"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8",		\
+ "$9", "$10", "$11", "$12", "$13", "$14", "$15",		\
+ "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",	\
+ "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP",		\
+ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8",	\
+ "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",		\
+ "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",\
+ "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "FP"}
+
+/* Strip name encoding when emitting labels.  */
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+do {						\
+  const char *name_ = NAME;			\
+  if (*name_ == '@' || *name_ == '%')		\
+    name_ += 2;					\
+  if (*name_ == '*')				\
+    name_++;					\
+  else						\
+    fputs (user_label_prefix, STREAM);		\
+  fputs (name_, STREAM);			\
+} while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX ""
+
+/* This is how to output a label for a jump table.  Arguments are the same as
+   for (*targetm.asm_out.internal_label), except the insn for the jump table is
+   passed.  */
+
+#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN)	\
+{ ASM_OUTPUT_ALIGN (FILE, 2); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); }
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*$%s%ld", (PREFIX), (long)(NUM))
+
+/* We use the default ASCII-output routine, except that we don't write more
+   than 50 characters since the assembler doesn't support very long lines.  */
+
+#define ASM_OUTPUT_ASCII(MYFILE, MYSTRING, MYLENGTH) \
+  do {									      \
+    FILE *_hide_asm_out_file = (MYFILE);				      \
+    const unsigned char *_hide_p = (const unsigned char *) (MYSTRING);	      \
+    int _hide_thissize = (MYLENGTH);					      \
+    int _size_so_far = 0;						      \
+    {									      \
+      FILE *asm_out_file = _hide_asm_out_file;				      \
+      const unsigned char *p = _hide_p;					      \
+      int thissize = _hide_thissize;					      \
+      int i;								      \
+      fprintf (asm_out_file, "\t.ascii \"");				      \
+									      \
+      for (i = 0; i < thissize; i++)					      \
+	{								      \
+	  register int c = p[i];					      \
+									      \
+	  if (_size_so_far ++ > 50 && i < thissize - 4)			      \
+	    _size_so_far = 0, fprintf (asm_out_file, "\"\n\t.ascii \"");      \
+									      \
+	  if (c == '\"' || c == '\\')					      \
+	    putc ('\\', asm_out_file);					      \
+	  if (c >= ' ' && c < 0177)					      \
+	    putc (c, asm_out_file);					      \
+	  else								      \
+	    {								      \
+	      fprintf (asm_out_file, "\\%o", c);			      \
+	      /* After an octal-escape, if a digit follows,		      \
+		 terminate one string constant and start another.	      \
+		 The VAX assembler fails to stop reading the escape	      \
+		 after three digits, so this is the only way we		      \
+		 can get it to parse the data properly.  */		      \
+	      if (i < thissize - 1 && ISDIGIT (p[i + 1]))		      \
+		_size_so_far = 0, fprintf (asm_out_file, "\"\n\t.ascii \"");  \
+	  }								      \
+	}								      \
+      fprintf (asm_out_file, "\"\n");					      \
+    }									      \
+  }									      \
+  while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t.%s $L%d\n", TARGET_ABI_WINDOWS_NT ? "long" : "gprel32", \
+	   (VALUE))
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", LOG);
+
+/* This is how to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs ("\t.comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE,ROUNDED)	\
+( fputs ("\t.lcomm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)))
+
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+#define PRINT_OPERAND(FILE, X, CODE)  print_operand (FILE, X, CODE)
+
+/* Determine which codes are valid without a following integer.  These must
+   not be alphabetic.
+
+   ~    Generates the name of the current function.
+
+   /	Generates the instruction suffix.  The TRAP_SUFFIX and ROUND_SUFFIX
+	attributes are examined to determine what is appropriate.
+
+   ,    Generates single precision suffix for floating point
+	instructions (s for IEEE, f for VAX)
+
+   -	Generates double precision suffix for floating point
+	instructions (t for IEEE, g for VAX)
+   */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+  ((CODE) == '/' || (CODE) == ',' || (CODE) == '-' || (CODE) == '~' \
+   || (CODE) == '#' || (CODE) == '*' || (CODE) == '&')
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+  print_operand_address((FILE), (ADDR))
+
+/* Tell collect that the object format is ECOFF.  */
+#define OBJECT_FORMAT_COFF
+#define EXTENDED_COFF
+
+/* If we use NM, pass -g to it so it only lists globals.  */
+#define NM_FLAGS "-pg"
+
+/* Definitions for debugging.  */
+
+#define SDB_DEBUGGING_INFO 1		/* generate info for mips-tfile */
+#define DBX_DEBUGGING_INFO 1		/* generate embedded stabs */
+#define MIPS_DEBUGGING_INFO 1		/* MIPS specific debugging info */
+
+#ifndef PREFERRED_DEBUGGING_TYPE	/* assume SDB_DEBUGGING_INFO */
+#define PREFERRED_DEBUGGING_TYPE  SDB_DEBUG
+#endif
+
+
+/* Correct the offset of automatic variables and arguments.  Note that
+   the Alpha debug format wants all automatic variables and arguments
+   to be in terms of two different offsets from the virtual frame pointer,
+   which is the stack pointer before any adjustment in the function.
+   The offset for the argument pointer is fixed for the native compiler,
+   it is either zero (for the no arguments case) or large enough to hold
+   all argument registers.
+   The offset for the auto pointer is the fourth argument to the .frame
+   directive (local_offset).
+   To stay compatible with the native tools we use the same offsets
+   from the virtual frame pointer and adjust the debugger arg/auto offsets
+   accordingly. These debugger offsets are set up in output_prolog.  */
+
+extern long alpha_arg_offset;
+extern long alpha_auto_offset;
+#define DEBUGGER_AUTO_OFFSET(X) \
+  ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + alpha_auto_offset)
+#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + alpha_arg_offset)
+
+/* mips-tfile doesn't understand .stabd directives.  */
+#define DBX_OUTPUT_SOURCE_LINE(STREAM, LINE, COUNTER) do {	\
+  dbxout_begin_stabn_sline (LINE);				\
+  dbxout_stab_value_internal_label ("LM", &COUNTER);		\
+} while (0)
+
+/* We want to use MIPS-style .loc directives for SDB line numbers.  */
+extern int num_source_filenames;
+#define SDB_OUTPUT_SOURCE_LINE(STREAM, LINE)	\
+  fprintf (STREAM, "\t.loc\t%d %d\n", num_source_filenames, LINE)
+
+#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME)			\
+  alpha_output_filename (STREAM, NAME)
+
+/* mips-tfile.c limits us to strings of one page.  We must underestimate this
+   number, because the real length runs past this up to the next
+   continuation point.  This is really a dbxout.c bug.  */
+#define DBX_CONTIN_LENGTH 3000
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Stabs-in-ECOFF can't handle dbxout_function_end().  */
+#define NO_DBX_FUNCTION_END 1
+
+/* If we are smuggling stabs through the ALPHA ECOFF object
+   format, put a comment in front of the .stab<x> operation so
+   that the ALPHA assembler does not choke.  The mips-tfile program
+   will correctly put the stab into the object file.  */
+
+#define ASM_STABS_OP	((TARGET_GAS) ? "\t.stabs\t" : " #.stabs\t")
+#define ASM_STABN_OP	((TARGET_GAS) ? "\t.stabn\t" : " #.stabn\t")
+#define ASM_STABD_OP	((TARGET_GAS) ? "\t.stabd\t" : " #.stabd\t")
+
+/* Forward references to tags are allowed.  */
+#define SDB_ALLOW_FORWARD_REFERENCES
+
+/* Unknown tags are also allowed.  */
+#define SDB_ALLOW_UNKNOWN_REFERENCES
+
+#define PUT_SDB_DEF(a)					\
+do {							\
+  fprintf (asm_out_file, "\t%s.def\t",			\
+	   (TARGET_GAS) ? "" : "#");			\
+  ASM_OUTPUT_LABELREF (asm_out_file, a); 		\
+  fputc (';', asm_out_file);				\
+} while (0)
+
+#define PUT_SDB_PLAIN_DEF(a)				\
+do {							\
+  fprintf (asm_out_file, "\t%s.def\t.%s;",		\
+	   (TARGET_GAS) ? "" : "#", (a));		\
+} while (0)
+
+#define PUT_SDB_TYPE(a)					\
+do {							\
+  fprintf (asm_out_file, "\t.type\t0x%x;", (a));	\
+} while (0)
+
+/* For block start and end, we create labels, so that
+   later we can figure out where the correct offset is.
+   The normal .ent/.end serve well enough for functions,
+   so those are just commented out.  */
+
+extern int sdb_label_count;		/* block start/end next label # */
+
+#define PUT_SDB_BLOCK_START(LINE)			\
+do {							\
+  fprintf (asm_out_file,				\
+	   "$Lb%d:\n\t%s.begin\t$Lb%d\t%d\n",		\
+	   sdb_label_count,				\
+	   (TARGET_GAS) ? "" : "#",			\
+	   sdb_label_count,				\
+	   (LINE));					\
+  sdb_label_count++;					\
+} while (0)
+
+#define PUT_SDB_BLOCK_END(LINE)				\
+do {							\
+  fprintf (asm_out_file,				\
+	   "$Le%d:\n\t%s.bend\t$Le%d\t%d\n",		\
+	   sdb_label_count,				\
+	   (TARGET_GAS) ? "" : "#",			\
+	   sdb_label_count,				\
+	   (LINE));					\
+  sdb_label_count++;					\
+} while (0)
+
+#define PUT_SDB_FUNCTION_START(LINE)
+
+#define PUT_SDB_FUNCTION_END(LINE)
+
+#define PUT_SDB_EPILOGUE_END(NAME) ((void)(NAME))
+
+/* Macros for mips-tfile.c to encapsulate stabs in ECOFF, and for
+   mips-tdump.c to print them out.
+
+   These must match the corresponding definitions in gdb/mipsread.c.
+   Unfortunately, gcc and gdb do not currently share any directories.  */
+
+#define CODE_MASK 0x8F300
+#define MIPS_IS_STAB(sym) (((sym)->index & 0xFFF00) == CODE_MASK)
+#define MIPS_MARK_STAB(code) ((code)+CODE_MASK)
+#define MIPS_UNMARK_STAB(code) ((code)-CODE_MASK)
+
+/* Override some mips-tfile definitions.  */
+
+#define SHASH_SIZE 511
+#define THASH_SIZE 55
+
+/* Align ecoff symbol tables to avoid OSF1/1.3 nm complaints.  */
+
+#define ALIGN_SYMTABLE_OFFSET(OFFSET) (((OFFSET) + 7) & ~7)
+
+/* The system headers under Alpha systems are generally C++-aware.  */
+#define NO_IMPLICIT_EXTERN_C
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
new file mode 100644
index 000000000..d6fba76f7
--- /dev/null
+++ b/gcc/config/alpha/alpha.md
@@ -0,0 +1,7999 @@
+;; Machine description for DEC Alpha for GNU C compiler
+;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+;; 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Uses of UNSPEC in this file:
+
+(define_constants
+  [(UNSPEC_ARG_HOME	0)
+   (UNSPEC_LDGP1	1)
+   (UNSPEC_INSXH	2)
+   (UNSPEC_MSKXH	3)
+   (UNSPEC_CVTQL	4)
+   (UNSPEC_CVTLQ	5)
+   (UNSPEC_UMK_LAUM	6)
+   (UNSPEC_UMK_LALM	7)
+   (UNSPEC_UMK_LAL	8)
+   (UNSPEC_UMK_LOAD_CIW	9)
+   (UNSPEC_LDGP2	10)
+   (UNSPEC_LITERAL	11)
+   (UNSPEC_LITUSE	12)
+   (UNSPEC_SIBCALL	13)
+   (UNSPEC_SYMBOL	14)
+
+   ;; TLS Support
+   (UNSPEC_TLSGD_CALL	15)
+   (UNSPEC_TLSLDM_CALL	16)
+   (UNSPEC_TLSGD	17)
+   (UNSPEC_TLSLDM	18)
+   (UNSPEC_DTPREL	19)
+   (UNSPEC_TPREL	20)
+   (UNSPEC_TP		21)
+
+   ;; Builtins
+   (UNSPEC_CMPBGE	22)
+   (UNSPEC_ZAP		23)
+   (UNSPEC_AMASK	24)
+   (UNSPEC_IMPLVER	25)
+   (UNSPEC_PERR		26)
+   (UNSPEC_COPYSIGN     27)
+
+   ;; Atomic operations
+   (UNSPEC_MB		28)
+   (UNSPEC_ATOMIC	31)
+   (UNSPEC_CMPXCHG	32)
+   (UNSPEC_XCHG		33)
+  ])
+
+;; UNSPEC_VOLATILE:
+
+(define_constants
+  [(UNSPECV_IMB		0)
+   (UNSPECV_BLOCKAGE	1)
+   (UNSPECV_SETJMPR	2)	; builtin_setjmp_receiver
+   (UNSPECV_LONGJMP	3)	; builtin_longjmp
+   (UNSPECV_TRAPB	4)
+   (UNSPECV_PSPL	5)	; prologue_stack_probe_loop
+   (UNSPECV_REALIGN	6)
+   (UNSPECV_EHR		7)	; exception_receiver
+   (UNSPECV_MCOUNT	8)
+   (UNSPECV_FORCE_MOV	9)
+   (UNSPECV_LDGP1	10)
+   (UNSPECV_PLDGP2	11)	; prologue ldgp
+   (UNSPECV_SET_TP	12)
+   (UNSPECV_RPCC	13)
+   (UNSPECV_SETJMPR_ER	14)	; builtin_setjmp_receiver fragment
+   (UNSPECV_LL		15)	; load-locked
+   (UNSPECV_SC		16)	; store-conditional
+  ])
+
+;; On non-BWX targets, CQImode must be handled the similarly to HImode
+;; when generating reloads.
+(define_mode_iterator RELOAD12 [QI HI CQI])
+(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")])
+
+;; Other mode iterators
+(define_mode_iterator I12MODE [QI HI])
+(define_mode_iterator I48MODE [SI DI])
+(define_mode_attr modesuffix [(SI "l") (DI "q")])
+
+;; Where necessary, the suffixes _le and _be are used to distinguish between
+;; little-endian and big-endian patterns.
+;;
+;; Note that the Unicos/Mk assembler does not support the following
+;; opcodes: mov, fmov, nop, fnop, unop.
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in alpha.h.
+
+(define_attr "tune" "ev4,ev5,ev6"
+  (const (symbol_ref "((enum attr_tune) alpha_tune)")))
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations, among other purposes.  For the most part, we use the names
+;; defined in the EV4 documentation, but add a few that we have to know about
+;; separately.
+
+(define_attr "type"
+  "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,jsr,iadd,ilog,shift,icmov,fcmov,
+   icmp,imul,fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c,
+   multi,none"
+  (const_string "iadd"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+;; Define the operand size an insn operates on.  Used primarily by mul
+;; and div operations that have size dependent timings.
+
+(define_attr "opsize" "si,di,udi"
+  (const_string "di"))
+
+;; The TRAP attribute marks instructions that may generate traps
+;; (which are imprecise and may need a trapb if software completion
+;; is desired).
+
+(define_attr "trap" "no,yes"
+  (const_string "no"))
+
+;; The ROUND_SUFFIX attribute marks which instructions require a
+;; rounding-mode suffix.  The value NONE indicates no suffix,
+;; the value NORMAL indicates a suffix controlled by alpha_fprm.
+
+(define_attr "round_suffix" "none,normal,c"
+  (const_string "none"))
+
+;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix:
+;;   NONE	no suffix
+;;   SU		accepts only /su (cmpt et al)
+;;   SUI	accepts only /sui (cvtqt and cvtqs)
+;;   V_SV	accepts /v and /sv (cvtql only)
+;;   V_SV_SVI	accepts /v, /sv and /svi (cvttq only)
+;;   U_SU_SUI	accepts /u, /su and /sui (most fp instructions)
+;;
+;; The actual suffix emitted is controlled by alpha_fptm.
+
+(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui"
+  (const_string "none"))
+
+;; The length of an instruction sequence in bytes.
+
+(define_attr "length" ""
+  (const_int 4))
+
+;; The USEGP attribute marks instructions that have relocations that use
+;; the GP.
+
+(define_attr "usegp" "no,yes"
+  (cond [(eq_attr "type" "ldsym,jsr")
+	   (const_string "yes")
+	 (eq_attr "type" "ild,fld,ist,fst")
+	   (symbol_ref "((enum attr_usegp) alpha_find_lo_sum_using_gp (insn))")
+	]
+	(const_string "no")))
+
+;; The CANNOT_COPY attribute marks instructions with relocations that
+;; cannot easily be duplicated.  This includes insns with gpdisp relocs
+;; since they have to stay in 1-1 correspondence with one another.  This
+;; also includes jsr insns, since they must stay in correspondence with
+;; the immediately following gpdisp instructions.
+
+(define_attr "cannot_copy" "false,true"
+  (const_string "false"))
+
+;; Include scheduling descriptions.
+  
+(include "ev4.md")
+(include "ev5.md")
+(include "ev6.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; First define the arithmetic insns.  Note that the 32-bit forms also
+;; sign-extend.
+
+;; Handle 32-64 bit extension from memory to a floating point register
+;; specially, since this occurs frequently in int->double conversions.
+;;
+;; Note that while we must retain the =f case in the insn for reload's
+;; benefit, it should be eliminated after reload, so we should never emit
+;; code for that case.  But we don't reject the possibility.
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn "*cvtlq"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTLQ))]
+  ""
+  "cvtlq %1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*extendsidi2_1"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,!*f")
+	(sign_extend:DI
+	  (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))]
+  ""
+  "@
+   addl $31,%1,%0
+   ldl %0,%1
+   lds %0,%1\;cvtlq %0,%0"
+  [(set_attr "type" "iadd,ild,fld")
+   (set_attr "length" "*,*,8")])
+
+(define_split
+  [(set (match_operand:DI 0 "hard_fp_register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "memory_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));
+})
+
+;; Optimize sign-extension of SImode loads.  This shows up in the wake of
+;; reload when converting fp->int.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "hard_int_register_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:DI 2 "hard_int_register_operand" "")
+        (sign_extend:DI (match_dup 0)))]
+  "true_regnum (operands[0]) == true_regnum (operands[2])
+   || peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))]
+  "")
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ")
+		 (match_operand:SI 2 "add_operand" "rI,O,K,L")))]
+  ""
+  "@
+   addl %r1,%2,%0
+   subl %r1,%n2,%0
+   lda %0,%2(%r1)
+   ldah %0,%h2(%r1)")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))]
+  "! add_operand (operands[2], SImode)"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
+  HOST_WIDE_INT rest = val - low;
+
+  operands[3] = GEN_INT (rest);
+  operands[4] = GEN_INT (low);
+})
+
+(define_insn "*addsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		  (match_operand:SI 2 "sext_add_operand" "rI,O"))))]
+  ""
+  "@
+   addl %r1,%2,%0
+   subl %r1,%n2,%0")
+
+(define_insn "*addsi_se2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+			     (match_operand:DI 2 "sext_add_operand" "rI,O"))
+		    0)))]
+  ""
+  "@
+   addl %r1,%2,%0
+   subl %r1,%n2,%0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_not_elim_operand" "")
+		  (match_operand:SI 2 "const_int_operand" ""))))
+   (clobber (match_operand:SI 3 "reg_not_elim_operand" ""))]
+  "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0
+   && INTVAL (operands[2]) % 4 == 0"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3)
+							(match_dup 5))
+					       (match_dup 1))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]) / 4;
+  int mult = 4;
+
+  if (val % 2 == 0)
+    val /= 2, mult = 8;
+
+  operands[4] = GEN_INT (val);
+  operands[5] = GEN_INT (mult);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI
+	 (plus:SI (match_operator:SI 1 "comparison_operator"
+				     [(match_operand 2 "" "")
+				      (match_operand 3 "" "")])
+		  (match_operand:SI 4 "add_operand" ""))))
+   (clobber (match_operand:DI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (match_dup 6))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))]
+{
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
+				operands[2], operands[3]);
+  operands[7] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn "addvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		 (match_operand:SI 2 "sext_add_operand" "rI,O")))
+   (trap_if (ne (plus:DI (sign_extend:DI (match_dup 1))
+			 (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (plus:SI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+   addlv %r1,%2,%0
+   sublv %r1,%n2,%0")
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "add_operand" "")))]
+  ""
+  "")
+
+(define_insn "*adddi_er_lo16_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "dtp16_symbolic_operand" "")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!dtprel")
+
+(define_insn "*adddi_er_hi32_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "dtp32_symbolic_operand" ""))))]
+  "HAVE_AS_TLS"
+  "ldah %0,%2(%1)\t\t!dtprelhi")
+
+(define_insn "*adddi_er_lo32_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "dtp32_symbolic_operand" "")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!dtprello")
+
+(define_insn "*adddi_er_lo16_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "tp16_symbolic_operand" "")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!tprel")
+
+(define_insn "*adddi_er_hi32_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "tp32_symbolic_operand" ""))))]
+  "HAVE_AS_TLS"
+  "ldah %0,%2(%1)\t\t!tprelhi")
+
+(define_insn "*adddi_er_lo32_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "tp32_symbolic_operand" "")))]
+  "HAVE_AS_TLS"
+  "lda %0,%2(%1)\t\t!tprello")
+
+(define_insn "*adddi_er_high_l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "local_symbolic_operand" ""))))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  "ldah %0,%2(%1)\t\t!gprelhigh"
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (high:DI (match_operand:DI 1 "local_symbolic_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 2) (high:DI (match_dup 1))))]
+  "operands[2] = pic_offset_table_rtx;")
+
+;; We used to expend quite a lot of effort choosing addq/subq/lda.
+;; With complications like
+;;
+;;   The NT stack unwind code can't handle a subq to adjust the stack
+;;   (that's a bug, but not one we can do anything about).  As of NT4.0 SP3,
+;;   the exception handling code will loop if a subq is used and an
+;;   exception occurs.
+;;
+;;   The 19980616 change to emit prologues as RTL also confused some
+;;   versions of GDB, which also interprets prologues.  This has been
+;;   fixed as of GDB 4.18, but it does not harm to unconditionally
+;;   use lda here.
+;;
+;; and the fact that the three insns schedule exactly the same, it's
+;; just not worth the effort.
+
+(define_insn "*adddi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,r")
+		 (match_operand:DI 2 "add_operand" "r,K,L")))]
+  ""
+  "@
+   addq %1,%2,%0
+   lda %0,%2(%1)
+   ldah %0,%h2(%1)")
+
+;; ??? Allow large constants when basing off the frame pointer or some
+;; virtual register that may eliminate to the frame pointer.  This is
+;; done because register elimination offsets will change the hi/lo split,
+;; and if we split before reload, we will require additional instructions.
+
+(define_insn "*adddi_fp_hack"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+        (plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r")
+		 (match_operand:DI 2 "const_int_operand" "K,L,n")))]
+  "NONSTRICT_REG_OK_FP_BASE_P (operands[1])
+   && INTVAL (operands[2]) >= 0
+   /* This is the largest constant an lda+ldah pair can add, minus
+      an upper bound on the displacement between SP and AP during
+      register elimination.  See INITIAL_ELIMINATION_OFFSET.  */
+   && INTVAL (operands[2])
+	< (0x7fff8000
+	   - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD
+	   - ALPHA_ROUND(crtl->outgoing_args_size)
+	   - (ALPHA_ROUND (get_frame_size ()
+			   + max_reg_num () * UNITS_PER_WORD
+			   + crtl->args.pretend_args_size)
+	      - crtl->args.pretend_args_size))"
+  "@
+   lda %0,%2(%1)
+   ldah %0,%h2(%1)
+   #")
+
+;; Don't do this if we are adjusting SP since we don't want to do it
+;; in two steps.  Don't split FP sources for the reason listed above.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "const_int_operand" "")))]
+  "! add_operand (operands[2], DImode)
+   && operands[0] != stack_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[1] != arg_pointer_rtx"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
+  HOST_WIDE_INT rest = val - low;
+  rtx rest_rtx = GEN_INT (rest);
+
+  operands[4] = GEN_INT (low);
+  if (satisfies_constraint_L (rest_rtx))
+    operands[3] = rest_rtx;
+  else if (can_create_pseudo_p ())
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      emit_move_insn (operands[3], operands[2]);
+      emit_insn (gen_adddi3 (operands[0], operands[1], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn "*saddl"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
+			  (match_operand:SI 2 "const48_operand" "I,I"))
+		 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
+  ""
+  "@
+   s%2addl %1,%3,%0
+   s%2subl %1,%n3,%0")
+
+(define_insn "*saddl_se"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
+			   (match_operand:SI 2 "const48_operand" "I,I"))
+		  (match_operand:SI 3 "sext_add_operand" "rI,O"))))]
+  ""
+  "@
+   s%2addl %1,%3,%0
+   s%2subl %1,%n3,%0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI
+	 (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator"
+					      [(match_operand 2 "" "")
+					       (match_operand 3 "" "")])
+			   (match_operand:SI 4 "const48_operand" ""))
+		  (match_operand:SI 5 "sext_add_operand" ""))))
+   (clobber (match_operand:DI 6 "reg_not_elim_operand" ""))]
+  ""
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4))
+				 (match_dup 5))))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
+				operands[2], operands[3]);
+  operands[8] = gen_lowpart (SImode, operands[6]);
+})
+
+(define_insn "*saddq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
+			  (match_operand:DI 2 "const48_operand" "I,I"))
+		 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
+  ""
+  "@
+   s%2addq %1,%3,%0
+   s%2subq %1,%n3,%0")
+
+(define_insn "addvdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		 (match_operand:DI 2 "sext_add_operand" "rI,O")))
+   (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+			 (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (plus:DI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+   addqv %r1,%2,%0
+   subqv %r1,%n2,%0")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "subl $31,%1,%0")
+
+(define_insn "*negsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (neg:SI
+			 (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "subl $31,%1,%0")
+
+(define_insn "negvsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))
+   (trap_if (ne (neg:DI (sign_extend:DI (match_dup 1)))
+		(sign_extend:DI (neg:SI (match_dup 1))))
+	    (const_int 0))]
+  ""
+  "sublv $31,%1,%0")
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "subq $31,%1,%0")
+
+(define_insn "negvdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+		(sign_extend:TI (neg:DI (match_dup 1))))
+	    (const_int 0))]
+  ""
+  "subqv $31,%1,%0")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+		  (match_operand:SI 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "subl %r1,%2,%0")
+
+(define_insn "*subsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+				  (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "subl %r1,%2,%0")
+
+(define_insn "*subsi_se2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (match_operand:DI 2 "reg_or_8bit_operand" "rI"))
+		    0)))]
+  ""
+  "subl %r1,%2,%0")
+
+(define_insn "subvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+		  (match_operand:SI 2 "reg_or_8bit_operand" "rI")))
+   (trap_if (ne (minus:DI (sign_extend:DI (match_dup 1))
+			  (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (minus:SI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "sublv %r1,%2,%0")
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		  (match_operand:DI 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "subq %r1,%2,%0")
+
+(define_insn "*ssubl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
+			   (match_operand:SI 2 "const48_operand" "I"))
+		  (match_operand:SI 3 "reg_or_8bit_operand" "rI")))]
+  ""
+  "s%2subl %1,%3,%0")
+
+(define_insn "*ssubl_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
+			    (match_operand:SI 2 "const48_operand" "I"))
+		   (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "s%2subl %1,%3,%0")
+
+(define_insn "*ssubq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r")
+			   (match_operand:DI 2 "const48_operand" "I"))
+		  (match_operand:DI 3 "reg_or_8bit_operand" "rI")))]
+  ""
+  "s%2subq %1,%3,%0")
+
+(define_insn "subvdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		  (match_operand:DI 2 "reg_or_8bit_operand" "rI")))
+   (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+			  (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (minus:DI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "subqv %r1,%2,%0")
+
+;; The Unicos/Mk assembler doesn't support mull.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:SI 2 "reg_or_8bit_operand" "rI")))]
+  "!TARGET_ABI_UNICOSMK"
+  "mull %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "si")])
+
+(define_insn "*mulsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
+  "!TARGET_ABI_UNICOSMK"
+  "mull %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "si")])
+
+(define_insn "mulvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:SI 2 "reg_or_8bit_operand" "rI")))
+   (trap_if (ne (mult:DI (sign_extend:DI (match_dup 1))
+			 (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (mult:SI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  "!TARGET_ABI_UNICOSMK"
+  "mullv %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "si")])
+
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:DI 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "mulq %r1,%2,%0"
+  [(set_attr "type" "imul")])
+
+(define_insn "mulvdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:DI 2 "reg_or_8bit_operand" "rI")))
+   (trap_if (ne (mult:TI (sign_extend:TI (match_dup 1))
+			 (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (mult:DI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "mulqv %r1,%2,%0"
+  [(set_attr "type" "imul")])
+
+(define_expand "umuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand" ""))
+		   (match_operand:DI 2 "reg_or_8bit_operand" ""))
+	  (const_int 64))))]
+  ""
+{
+  if (REG_P (operands[2]))
+    operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]);
+})
+
+(define_insn "*umuldi3_highpart_reg"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand" "r"))
+		   (zero_extend:TI
+		     (match_operand:DI 2 "register_operand" "r")))
+	  (const_int 64))))]
+  ""
+  "umulh %1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "udi")])
+
+(define_insn "*umuldi3_highpart_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r"))
+		   (match_operand:TI 2 "cint8_operand" "I"))
+	  (const_int 64))))]
+  ""
+  "umulh %1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "udi")])
+
+;; The divide and remainder operations take their inputs from r24 and
+;; r25, put their output in r27, and clobber r23 and r28 on all
+;; systems except Unicos/Mk. On Unicos, the standard library provides
+;; subroutines which use the standard calling convention and work on
+;; DImode operands.
+
+;; ??? Force sign-extension here because some versions of OSF/1 and
+;; Interix/NT don't do the right thing if the inputs are not properly
+;; sign-extended.  But Linux, for instance, does not have this
+;; problem.  Is it worth the complication here to eliminate the sign
+;; extension?
+
+(define_expand "divsi3"
+  [(set (match_dup 3)
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "")))
+   (parallel [(set (match_dup 5)
+		   (sign_extend:DI (div:SI (match_dup 3) (match_dup 4))))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])
+   (set (match_operand:SI 0 "nonimmediate_operand" "")
+	(subreg:SI (match_dup 5) 0))]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+})
+
+(define_expand "udivsi3"
+  [(set (match_dup 3)
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "")))
+   (parallel [(set (match_dup 5)
+		   (sign_extend:DI (udiv:SI (match_dup 3) (match_dup 4))))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])
+   (set (match_operand:SI 0 "nonimmediate_operand" "")
+	(subreg:SI (match_dup 5) 0))]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+})
+
+(define_expand "modsi3"
+  [(set (match_dup 3)
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "")))
+   (parallel [(set (match_dup 5)
+		   (sign_extend:DI (mod:SI (match_dup 3) (match_dup 4))))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])
+   (set (match_operand:SI 0 "nonimmediate_operand" "")
+	(subreg:SI (match_dup 5) 0))]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+})
+
+(define_expand "umodsi3"
+  [(set (match_dup 3)
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "")))
+   (parallel [(set (match_dup 5)
+		   (sign_extend:DI (umod:SI (match_dup 3) (match_dup 4))))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])
+   (set (match_operand:SI 0 "nonimmediate_operand" "")
+	(subreg:SI (match_dup 5) 0))]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+})
+
+(define_expand "divdi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (div:DI (match_operand:DI 1 "register_operand" "")
+			   (match_operand:DI 2 "register_operand" "")))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+  "")
+
+(define_expand "udivdi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (udiv:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "register_operand" "")))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+  "")
+
+(define_expand "moddi3"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))]
+  "!TARGET_ABI_OPEN_VMS"
+{
+  if (TARGET_ABI_UNICOSMK)
+    emit_insn (gen_moddi3_umk (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_moddi3_dft (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "moddi3_dft"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (mod:DI (match_operand:DI 1 "register_operand" "")
+			   (match_operand:DI 2 "register_operand" "")))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+  "")
+
+;; On Unicos/Mk, we do as the system's C compiler does:
+;; compute the quotient, multiply and subtract.
+
+(define_expand "moddi3_umk"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))]
+  "TARGET_ABI_UNICOSMK"
+{
+  rtx div, mul = gen_reg_rtx (DImode);
+
+  div = expand_binop (DImode, sdiv_optab, operands[1], operands[2],
+		      NULL_RTX, 0, OPTAB_LIB);
+  div = force_reg (DImode, div);
+  emit_insn (gen_muldi3 (mul, operands[2], div));
+  emit_insn (gen_subdi3 (operands[0], operands[1], mul));
+  DONE;
+})
+
+(define_expand "umoddi3"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))]
+  "! TARGET_ABI_OPEN_VMS"
+{
+  if (TARGET_ABI_UNICOSMK)
+    emit_insn (gen_umoddi3_umk (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_umoddi3_dft (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "umoddi3_dft"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (umod:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "register_operand" "")))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+  "")
+
+(define_expand "umoddi3_umk"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))]
+  "TARGET_ABI_UNICOSMK"
+{
+  rtx div, mul = gen_reg_rtx (DImode);
+
+  div = expand_binop (DImode, udiv_optab, operands[1], operands[2],
+		      NULL_RTX, 1, OPTAB_LIB);
+  div = force_reg (DImode, div);
+  emit_insn (gen_muldi3 (mul, operands[2], div));
+  emit_insn (gen_subdi3 (operands[0], operands[1], mul));
+  DONE;
+})
+
+;; Lengths of 8 for ldq $t12,__divq($gp); jsr $t9,($t12),__divq as
+;; expanded by the assembler.
+
+(define_insn_and_split "*divmodsi_internal_er"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (sign_extend:DI (match_dup 3)))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+{
+  const char *str;
+  switch (GET_CODE (operands[3]))
+    {
+    case DIV: 
+      str = "__divl";
+      break; 
+    case UDIV:
+      str = "__divlu";
+      break;
+    case MOD:
+      str = "__reml";
+      break;
+    case UMOD:
+      str = "__remlu";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
+				  gen_rtx_SYMBOL_REF (DImode, str),
+				  operands[4]));
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+(define_insn "*divmodsi_internal_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+                        [(match_operand:DI 1 "register_operand" "a")
+                         (match_operand:DI 2 "register_operand" "b")])))
+   (use (match_operand:DI 4 "register_operand" "c"))
+   (use (match_operand 5 "const_int_operand" ""))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS"
+  "jsr $23,($27),__%E3%j5"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "4")])
+
+(define_insn "*divmodsi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+  "%E3 %1,%2,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*divmoddi_internal_er"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 3))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+{
+  const char *str;
+  switch (GET_CODE (operands[3]))
+    {
+    case DIV: 
+      str = "__divq";
+      break; 
+    case UDIV:
+      str = "__divqu";
+      break;
+    case MOD:
+      str = "__remq";
+      break;
+    case UMOD:
+      str = "__remqu";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
+				  gen_rtx_SYMBOL_REF (DImode, str),
+				  operands[4]));
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+(define_insn "*divmoddi_internal_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+                        [(match_operand:DI 1 "register_operand" "a")
+                         (match_operand:DI 2 "register_operand" "b")]))
+   (use (match_operand:DI 4 "register_operand" "c"))
+   (use (match_operand 5 "const_int_operand" ""))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && ! TARGET_ABI_OPEN_VMS"
+  "jsr $23,($27),__%E3%j5"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "4")])
+
+(define_insn "*divmoddi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "! TARGET_ABI_OPEN_VMS && ! TARGET_ABI_UNICOSMK"
+  "%E3 %1,%2,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "8")])
+
+;; Next are the basic logical operations.  We only expose the DImode operations
+;; to the rtl expanders, but SImode versions exist for combine as well as for
+;; the atomic operation splitters.
+
+(define_insn "*andsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
+		(match_operand:SI 2 "and_operand" "rI,N,MH")))]
+  ""
+  "@
+   and %r1,%2,%0
+   bic %r1,%N2,%0
+   zapnot %r1,%m2,%0"
+  [(set_attr "type" "ilog,ilog,shift")])
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
+		(match_operand:DI 2 "and_operand" "rI,N,MH")))]
+  ""
+  "@
+   and %r1,%2,%0
+   bic %r1,%N2,%0
+   zapnot %r1,%m2,%0"
+  [(set_attr "type" "ilog,ilog,shift")])
+
+;; There are times when we can split an AND into two AND insns.  This occurs
+;; when we can first clear any bytes and then clear anything else.  For
+;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07".
+;; Only do this when running on 64-bit host since the computations are
+;; too messy otherwise.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "const_int_operand" "")))]
+  "HOST_BITS_PER_WIDE_INT == 64 && ! and_operand (operands[2], DImode)"
+  [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
+{
+  unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]);
+  unsigned HOST_WIDE_INT mask2 = mask1;
+  int i;
+
+  /* For each byte that isn't all zeros, make it all ones.  */
+  for (i = 0; i < 64; i += 8)
+    if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0)
+      mask1 |= (HOST_WIDE_INT) 0xff << i;
+
+  /* Now turn on any bits we've just turned off.  */
+  mask2 |= ~ mask1;
+
+  operands[3] = GEN_INT (mask1);
+  operands[4] = GEN_INT (mask2);
+})
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (! TARGET_BWX)
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*zero_extendqihi2_bwx"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_BWX"
+  "@
+   and %1,0xff,%0
+   ldbu %0,%1"
+  [(set_attr "type" "ilog,ild")])
+
+(define_insn "*zero_extendqihi2_nobwx"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  "! TARGET_BWX"
+  "and %1,0xff,%0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (! TARGET_BWX)
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*zero_extendqisi2_bwx"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_BWX"
+  "@
+   and %1,0xff,%0
+   ldbu %0,%1"
+  [(set_attr "type" "ilog,ild")])
+
+(define_insn "*zero_extendqisi2_nobwx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  "! TARGET_BWX"
+  "and %1,0xff,%0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (! TARGET_BWX)
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*zero_extendqidi2_bwx"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_BWX"
+  "@
+   and %1,0xff,%0
+   ldbu %0,%1"
+  [(set_attr "type" "ilog,ild")])
+
+(define_insn "*zero_extendqidi2_nobwx"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  "! TARGET_BWX"
+  "and %1,0xff,%0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (! TARGET_BWX)
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "*zero_extendhisi2_bwx"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_BWX"
+  "@
+   zapnot %1,3,%0
+   ldwu %0,%1"
+  [(set_attr "type" "shift,ild")])
+
+(define_insn "*zero_extendhisi2_nobwx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  "! TARGET_BWX"
+  "zapnot %1,3,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (! TARGET_BWX)
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "*zero_extendhidi2_bwx"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_BWX"
+  "@
+   zapnot %1,3,%0
+   ldwu %0,%1"
+  [(set_attr "type" "shift,ild")])
+
+(define_insn "*zero_extendhidi2_nobwx"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "zapnot %1,3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "zapnot %1,15,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "*andnotsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI"))
+		(match_operand:SI 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "bic %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "andnotdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI"))
+		(match_operand:DI 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "bic %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iorsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:SI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   bis %r1,%2,%0
+   ornot %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:DI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   bis %r1,%2,%0
+   ornot %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*one_cmplsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornotsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI"))
+		(match_operand:SI 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "ornot %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornotdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI"))
+		(match_operand:DI 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "ornot %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xorsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:SI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   xor %r1,%2,%0
+   eqv %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:DI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   xor %r1,%2,%0
+   eqv %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornotsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (xor:SI (match_operand:SI 1 "register_operand" "%rJ")
+			(match_operand:SI 2 "register_operand" "rI"))))]
+  ""
+  "eqv %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornotdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (xor:DI (match_operand:DI 1 "register_operand" "%rJ")
+			(match_operand:DI 2 "register_operand" "rI"))))]
+  ""
+  "eqv %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+;; Handle FFS and related insns iff we support CIX.
+
+(define_expand "ffsdi2"
+  [(set (match_dup 2)
+	(ctz:DI (match_operand:DI 1 "register_operand" "")))
+   (set (match_dup 3)
+	(plus:DI (match_dup 2) (const_int 1)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (eq (match_dup 1) (const_int 0))
+			 (const_int 0) (match_dup 3)))]
+  "TARGET_CIX"
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+})
+
+(define_insn "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_CIX"
+  "ctlz %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ctz:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_CIX"
+  "cttz %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_CIX"
+  "ctpop %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(bswap:SI (match_operand:SI 1 "register_operand" "")))]
+  "!optimize_size"
+{
+  rtx t0, t1;
+
+  t0 = gen_reg_rtx (DImode);
+  t1 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_insxh (t0, gen_lowpart (DImode, operands[1]),
+			GEN_INT (32), GEN_INT (WORDS_BIG_ENDIAN ? 0 : 7)));
+  emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]),
+			      GEN_INT (24)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
+  emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x5)));
+  emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xa)));
+  emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0),
+			 gen_lowpart (SImode, t1)));
+  DONE;
+})
+
+(define_expand "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(bswap:DI (match_operand:DI 1 "register_operand" "")))]
+  "!optimize_size"
+{
+  rtx t0, t1;
+
+  t0 = gen_reg_rtx (DImode);
+  t1 = gen_reg_rtx (DImode);
+
+  /* This method of shifting and masking is not specific to Alpha, but
+     is only profitable on Alpha because of our handy byte zap insn.  */
+
+  emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32)));
+  emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
+  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16)));
+  emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xcc)));
+  emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x33)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8)));
+  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8)));
+  emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xaa)));
+  emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x55)));
+  emit_insn (gen_iordi3 (operands[0], t0, t1));
+  DONE;
+})
+
+;; Next come the shifts and the various extract and insert operations.
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ")
+		   (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (operands[2] == const1_rtx)
+	return "addq %r1,%r1,%0";
+      else
+	return "s%P2addq %r1,0,%0";
+    case 1:
+      return "sll %r1,%2,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "iadd,shift")])
+
+(define_insn "*ashldi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			       (match_operand:DI 2 "const_int_operand" "P"))
+		    0)))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3"
+{
+  if (operands[2] == const1_rtx)
+    return "addl %r1,%r1,%0";
+  else
+    return "s%P2addl %r1,0,%0";
+}
+  [(set_attr "type" "iadd")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  ""
+  "srl %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  ""
+  "sra %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "extendqihi2"
+  [(set (match_dup 2)
+	(ashift:DI (match_operand:QI 1 "some_operand" "")
+		   (const_int 56)))
+   (set (match_operand:HI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 2)
+		     (const_int 56)))]
+  ""
+{
+  if (TARGET_BWX)
+    {
+      emit_insn (gen_extendqihi2x (operands[0],
+				   force_reg (QImode, operands[1])));
+      DONE;
+    }
+
+ /* If we have an unaligned MEM, extend to DImode (which we do
+     specially) and then copy to the result.  */
+  if (unaligned_memory_operand (operands[1], HImode))
+    {
+      rtx temp = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendqidi2 (temp, operands[1]));
+      emit_move_insn (operands[0], gen_lowpart (HImode, temp));
+      DONE;
+    }
+
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, force_reg (QImode, operands[1]));
+  operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_insn "extendqidi2x"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendhidi2x"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextw %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendqisi2x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendhisi2x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextw %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendqihi2x"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_BWX"
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "extendqisi2"
+  [(set (match_dup 2)
+	(ashift:DI (match_operand:QI 1 "some_operand" "")
+		   (const_int 56)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 2)
+		     (const_int 56)))]
+  ""
+{
+  if (TARGET_BWX)
+    {
+      emit_insn (gen_extendqisi2x (operands[0],
+				   force_reg (QImode, operands[1])));
+      DONE;
+    }
+
+  /* If we have an unaligned MEM, extend to a DImode form of
+     the result (which we do specially).  */
+  if (unaligned_memory_operand (operands[1], QImode))
+    {
+      rtx temp = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendqidi2 (temp, operands[1]));
+      emit_move_insn (operands[0], gen_lowpart (SImode, temp));
+      DONE;
+    }
+
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, force_reg (QImode, operands[1]));
+  operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_expand "extendqidi2"
+  [(set (match_dup 2)
+	(ashift:DI (match_operand:QI 1 "some_operand" "")
+		   (const_int 56)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 2)
+		     (const_int 56)))]
+  ""
+{
+  if (TARGET_BWX)
+    {
+      emit_insn (gen_extendqidi2x (operands[0],
+				   force_reg (QImode, operands[1])));
+      DONE;
+    }
+
+  if (unaligned_memory_operand (operands[1], QImode))
+    {
+      rtx seq = gen_unaligned_extendqidi (operands[0], XEXP (operands[1], 0));
+      alpha_set_memflags (seq, operands[1]);
+      emit_insn (seq);
+      DONE;
+    }
+
+  operands[1] = gen_lowpart (DImode, force_reg (QImode, operands[1]));
+  operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_expand "extendhisi2"
+  [(set (match_dup 2)
+	(ashift:DI (match_operand:HI 1 "some_operand" "")
+		   (const_int 48)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 2)
+		     (const_int 48)))]
+  ""
+{
+  if (TARGET_BWX)
+    {
+      emit_insn (gen_extendhisi2x (operands[0],
+				   force_reg (HImode, operands[1])));
+      DONE;
+    }
+
+  /* If we have an unaligned MEM, extend to a DImode form of
+     the result (which we do specially).  */
+  if (unaligned_memory_operand (operands[1], HImode))
+    {
+      rtx temp = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendhidi2 (temp, operands[1]));
+      emit_move_insn (operands[0], gen_lowpart (SImode, temp));
+      DONE;
+    }
+
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, force_reg (HImode, operands[1]));
+  operands[2] = gen_reg_rtx (DImode);
+})
+
+(define_expand "extendhidi2"
+  [(set (match_dup 2)
+	(ashift:DI (match_operand:HI 1 "some_operand" "")
+		   (const_int 48)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 2)
+		     (const_int 48)))]
+  ""
+{
+  if (TARGET_BWX)
+    {
+      emit_insn (gen_extendhidi2x (operands[0],
+				   force_reg (HImode, operands[1])));
+      DONE;
+    }
+
+  if (unaligned_memory_operand (operands[1], HImode))
+    {
+      rtx seq = gen_unaligned_extendhidi (operands[0], XEXP (operands[1], 0));
+
+      alpha_set_memflags (seq, operands[1]);
+      emit_insn (seq);
+      DONE;
+    }
+
+  operands[1] = gen_lowpart (DImode, force_reg (HImode, operands[1]));
+  operands[2] = gen_reg_rtx (DImode);
+})
+
+;; Here's how we sign extend an unaligned byte and halfword.  Doing this
+;; as a pattern saves one instruction.  The code is similar to that for
+;; the unaligned loads (see below).
+;;
+;; Operand 1 is the address, operand 0 is the result.
+(define_expand "unaligned_extendqidi"
+  [(use (match_operand:QI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "address_operand" ""))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_unaligned_extendqidi_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_unaligned_extendqidi_le (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "unaligned_extendqidi_le"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (minus:DI (const_int 64)
+			     (ashift:DI
+			      (and:DI (match_dup 2) (const_int 7))
+			      (const_int 3)))))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 4) (const_int 56)))]
+  "! WORDS_BIG_ENDIAN"
+{
+  operands[2] = get_unaligned_offset (operands[1], 1);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_expand "unaligned_extendqidi_be"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (ashift:DI
+		     (and:DI
+		       (plus:DI (match_dup 2) (const_int 1))
+		       (const_int 7))
+		     (const_int 3))))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 4) (const_int 56)))]
+  "WORDS_BIG_ENDIAN"
+{
+  operands[2] = get_unaligned_offset (operands[1], -1);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_expand "unaligned_extendhidi"
+  [(use (match_operand:QI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "address_operand" ""))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_unaligned_extendhidi_be (operands[0], operands[1]));
+  else
+    emit_insn (gen_unaligned_extendhidi_le (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "unaligned_extendhidi_le"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (minus:DI (const_int 64)
+			     (ashift:DI
+			      (and:DI (match_dup 2) (const_int 7))
+			      (const_int 3)))))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 4) (const_int 48)))]
+  "! WORDS_BIG_ENDIAN"
+{
+  operands[2] = get_unaligned_offset (operands[1], 2);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_expand "unaligned_extendhidi_be"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "" "") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (ashift:DI
+		     (and:DI
+		       (plus:DI (match_dup 2) (const_int 1))
+		       (const_int 7))
+		     (const_int 3))))
+   (set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_dup 4) (const_int 48)))]
+  "WORDS_BIG_ENDIAN"
+{
+  operands[2] = get_unaligned_offset (operands[1], -1);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_insn "*extxl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (match_operand:DI 2 "mode_width_operand" "n")
+			 (match_operand:DI 3 "mul8_operand" "I")))]
+  ""
+  "ext%M2l %r1,%s3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extxl_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (match_operand:DI 2 "mode_width_operand" "n")
+			 (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+				    (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "ext%M2l %r1,%3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extxl_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (match_operand:DI 2 "mode_width_operand" "n")
+			 (minus:DI
+			   (const_int 56)
+			   (ashift:DI
+			     (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+			     (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "ext%M2l %r1,%3,%0"
+  [(set_attr "type" "shift")])
+
+;; Combine has some strange notion of preserving existing undefined behavior
+;; in shifts larger than a word size.  So capture these patterns that it
+;; should have turned into zero_extracts.
+
+(define_insn "*extxl_1_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			     (const_int 3)))
+	     (match_operand:DI 3 "mode_mask_operand" "n")))]
+  "! WORDS_BIG_ENDIAN"
+  "ext%U3l %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "*extxl_1_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (lshiftrt:DI
+		  (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		  (minus:DI (const_int 56)
+		    (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			       (const_int 3))))
+		(match_operand:DI 3 "mode_mask_operand" "n")))]
+  "WORDS_BIG_ENDIAN"
+  "ext%U3l %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "*extql_2_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		     (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "extql %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "*extql_2_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	  (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (minus:DI (const_int 56)
+		    (ashift:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "extql %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extqh_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  "! WORDS_BIG_ENDIAN"
+  "extqh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extqh_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	  (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (ashift:DI
+	    (and:DI
+	      (plus:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		       (const_int 1))
+	      (const_int 7))
+	    (const_int 3))))]
+  "WORDS_BIG_ENDIAN"
+  "extqh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extlh_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		 (const_int 2147483647))
+	 (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  "! WORDS_BIG_ENDIAN"
+  "extlh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extlh_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	  (ashift:DI
+	    (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	    (ashift:DI
+	      (and:DI
+		(plus:DI
+		  (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		  (const_int 1))
+		(const_int 7))
+	      (const_int 3)))
+	  (const_int 2147483647)))]
+  "WORDS_BIG_ENDIAN"
+  "extlh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extwh_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		 (const_int 65535))
+	 (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  "! WORDS_BIG_ENDIAN"
+  "extwh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extwh_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	  (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (ashift:DI
+		       (and:DI
+			 (plus:DI
+			   (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			   (const_int 1))
+			 (const_int 7))
+		       (const_int 3)))
+	  (const_int 65535)))]
+  "WORDS_BIG_ENDIAN"
+  "extwh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+;; This converts an extXl into an extXh with an appropriate adjustment
+;; to the address calculation.
+
+;;(define_split
+;;  [(set (match_operand:DI 0 "register_operand" "")
+;;	(ashift:DI (zero_extract:DI (match_operand:DI 1 "register_operand" "")
+;;				    (match_operand:DI 2 "mode_width_operand" "")
+;;				    (ashift:DI (match_operand:DI 3 "" "")
+;;					       (const_int 3)))
+;;		   (match_operand:DI 4 "const_int_operand" "")))
+;;   (clobber (match_operand:DI 5 "register_operand" ""))]
+;;  "INTVAL (operands[4]) == 64 - INTVAL (operands[2])"
+;;  [(set (match_dup 5) (match_dup 6))
+;;   (set (match_dup 0)
+;;	(ashift:DI (zero_extract:DI (match_dup 1) (match_dup 2)
+;;				    (ashift:DI (plus:DI (match_dup 5)
+;;							(match_dup 7))
+;;					       (const_int 3)))
+;;		   (match_dup 4)))]
+;;  "
+;;{
+;;  operands[6] = plus_constant (operands[3],
+;;			       INTVAL (operands[2]) / BITS_PER_UNIT);
+;;  operands[7] = GEN_INT (- INTVAL (operands[2]) / BITS_PER_UNIT);
+;;}")
+
+(define_insn "*insbl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "insbl %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "inswl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "inswl %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "*insll_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "insll %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insbl_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "insbl %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insbl_be"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+       (ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
+	 (minus:DI (const_int 56)
+	   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "insbl %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "inswl_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "inswl %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "inswl_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" "r"))
+	  (minus:DI (const_int 56)
+	    (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		       (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "inswl %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insll_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "insll %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insll_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	  (minus:DI (const_int 56)
+	    (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		       (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "insll %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insql_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "insql %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insql_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+	  (minus:DI (const_int 56)
+	    (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		       (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "insql %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+;; Combine has this sometimes habit of moving the and outside of the
+;; shift, making life more interesting.
+
+(define_insn "*insxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   	   (match_operand:DI 2 "mul8_operand" "I"))
+		(match_operand:DI 3 "immediate_operand" "i")))]
+  "HOST_BITS_PER_WIDE_INT == 64
+   && CONST_INT_P (operands[3])
+   && (((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
+        == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+       || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
+        == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+       || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
+        == (unsigned HOST_WIDE_INT) INTVAL (operands[3])))"
+{
+#if HOST_BITS_PER_WIDE_INT == 64
+  if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "insbl %1,%s2,%0";
+  if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "inswl %1,%s2,%0";
+  if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "insll %1,%s2,%0";
+#endif
+  gcc_unreachable ();
+}
+  [(set_attr "type" "shift")])
+
+;; We do not include the insXh insns because they are complex to express
+;; and it does not appear that we would ever want to generate them.
+;;
+;; Since we need them for block moves, though, cop out and use unspec.
+
+(define_insn "insxh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "mode_width_operand" "n")
+		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_INSXH))]
+  ""
+  "ins%M2h %1,%3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "mskxl_le"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (ashift:DI
+			 (match_operand:DI 2 "mode_mask_operand" "n")
+			 (ashift:DI
+			  (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+			  (const_int 3))))
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  "! WORDS_BIG_ENDIAN"
+  "msk%U2l %r1,%3,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "mskxl_be"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (ashift:DI
+			  (match_operand:DI 2 "mode_mask_operand" "n")
+			  (minus:DI (const_int 56)
+			    (ashift:DI
+			      (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+			      (const_int 3)))))
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  "WORDS_BIG_ENDIAN"
+  "msk%U2l %r1,%3,%0"
+  [(set_attr "type" "shift")])
+
+;; We do not include the mskXh insns because it does not appear we would
+;; ever generate one.
+;;
+;; Again, we do for block moves and we use unspec again.
+
+(define_insn "mskxh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "mode_width_operand" "n")
+		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MSKXH))]
+  ""
+  "msk%M2h %1,%3,%0"
+  [(set_attr "type" "shift")])
+
+;; Prefer AND + NE over LSHIFTRT + AND.
+
+(define_insn_and_split "*ze_and_ne"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "I")))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  "#"
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(ne:DI (match_dup 0) (const_int 0)))]
+  "operands[3] = GEN_INT (1 << INTVAL (operands[2]));")
+
+;; Floating-point operations.  All the double-precision insns can extend
+;; from single, so indicate that.  The exception are the ones that simply
+;; play with the sign bits; it's not clear what to do there.
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cpys $f31,%R1,%0"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "*nabssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP"
+  "cpysn $f31,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cpys $f31,%R1,%0"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "*nabsdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP"
+  "cpysn $f31,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_expand "abstf2"
+  [(parallel [(set (match_operand:TF 0 "register_operand" "")
+		   (abs:TF (match_operand:TF 1 "reg_or_0_operand" "")))
+	      (use (match_dup 2))])]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+#if HOST_BITS_PER_WIDE_INT >= 64
+  operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));
+#else
+  operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode));
+#endif
+})
+
+(define_insn_and_split "*abstf_internal"
+  [(set (match_operand:TF 0 "register_operand" "=r")
+	(abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "alpha_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;")
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cpysn %R1,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cpysn %R1,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_expand "negtf2"
+  [(parallel [(set (match_operand:TF 0 "register_operand" "")
+		   (neg:TF (match_operand:TF 1 "reg_or_0_operand" "")))
+	      (use (match_dup 2))])]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+#if HOST_BITS_PER_WIDE_INT >= 64
+  operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));
+#else
+  operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode));
+#endif
+})
+
+(define_insn_and_split "*negtf_internal"
+  [(set (match_operand:TF 0 "register_operand" "=r")
+	(neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "alpha_split_tfmode_frobsign (operands, gen_xordi3); DONE;")
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "reg_or_0_operand" "fG")
+		    (match_operand:SF 2 "reg_or_0_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  "TARGET_FP"
+  "cpys %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*ncopysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (unspec:SF [(match_operand:SF 1 "reg_or_0_operand" "fG")
+			    (match_operand:SF 2 "reg_or_0_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  "TARGET_FP"
+  "cpysn %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "copysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DF 1 "reg_or_0_operand" "fG")
+		    (match_operand:DF 2 "reg_or_0_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  "TARGET_FP"
+  "cpys %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*ncopysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (unspec:DF [(match_operand:DF 1 "reg_or_0_operand" "fG")
+			    (match_operand:DF 2 "reg_or_0_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  "TARGET_FP"
+  "cpysn %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*addsf_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(plus:SF (match_operand:SF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "add%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "add%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(plus:DF (match_operand:DF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "add%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (match_operand:DF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "add%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "add%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "add%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "addtf3"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (PLUS, operands); DONE;")
+
+;; Define conversion operators between DFmode and SImode, using the cvtql
+;; instruction.  To allow combine et al to do useful things, we keep the
+;; operation as a unit until after reload, at which point we split the
+;; instructions.
+;;
+;; Note that we (attempt to) only consider this optimization when the
+;; ultimate destination is memory.  If we will be doing further integer
+;; processing, it is cheaper to do the truncation in the int regs.
+
+(define_insn "*cvtql"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTQL))]
+  "TARGET_FP"
+  "cvtql%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "v_sv")])
+
+(define_insn_and_split "*fix_truncdfsi_ieee"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator" 
+	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+{
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn_and_split "*fix_truncdfsi_internal"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 3 "fix_operator" 
+	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 3 [(match_dup 1)]))
+   (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 4))]
+{
+  operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*fix_truncdfdi_ieee"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(match_operand:DF 1 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_insn "*fix_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(match_operand:DF 1 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_expand "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "")
+	(fix:DI (match_operand:DF 1 "reg_or_0_operand" "")))]
+  "TARGET_FP"
+  "")
+
+(define_expand "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "")
+	(unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand" "")))]
+  "TARGET_FP"
+  "")
+
+;; Likewise between SFmode and SImode.
+
+(define_insn_and_split "*fix_truncsfsi_ieee"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator" 
+	    [(float_extend:DF
+	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+{
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn_and_split "*fix_truncsfsi_internal"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 3 "fix_operator" 
+	    [(float_extend:DF
+	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 3 [(float_extend:DF (match_dup 1))]))
+   (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 4))]
+{
+  operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*fix_truncsfdi_ieee"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_insn "*fix_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f")
+	(match_operator:DI 2 "fix_operator" 
+	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP"
+  "cvt%-q%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")])
+
+(define_expand "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "")
+	(fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" ""))))]
+  "TARGET_FP"
+  "")
+
+(define_expand "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "")
+	(unsigned_fix:DI
+	  (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" ""))))]
+  "TARGET_FP"
+  "")
+
+(define_expand "fix_trunctfdi2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:TF 1 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FIX, operands); DONE;")
+
+(define_expand "fixuns_trunctfdi2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:TF 1 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;")
+
+(define_insn "*floatdisf_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvtq%,%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP"
+  "cvtq%,%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn_and_split "*floatsisf2_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float:SF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:SF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+})
+
+(define_insn_and_split "*floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_FP"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 0)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:SF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
+})
+
+(define_insn "*floatdidf_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvtq%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))]
+  "TARGET_FP"
+  "cvtq%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")])
+
+(define_insn_and_split "*floatsidf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float:DF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+})
+
+(define_insn_and_split "*floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_FP"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0]));
+})
+
+(define_expand "floatditf2"
+  [(use (match_operand:TF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FLOAT, operands); DONE;")
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_FP"
+  "alpha_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_FP"
+  "alpha_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsditf2"
+  [(use (match_operand:TF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))]
+  "TARGET_FP"
+{
+  if (alpha_fptm >= ALPHA_FPTM_SU)
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+;; The Unicos/Mk assembler doesn't support cvtst, but we've already
+;; asserted that alpha_fptm == ALPHA_FPTM_N.
+
+(define_insn "*extendsfdf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvtsts %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*extendsfdf2_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,m")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   cpys %1,%1,%0
+   ld%, %0,%1
+   st%- %1,%0"
+  [(set_attr "type" "fcpys,fld,fst")])
+
+;; Use register_operand for operand 1 to prevent compress_float_constant
+;; from doing something silly.  When optimizing we'll put things back 
+;; together anyway.
+(define_expand "extendsftf2"
+  [(use (match_operand:TF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "extenddftf2"
+  [(use (match_operand:TF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*truncdfsf2_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cvt%-%,%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "cvt%-%,%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "trunctfdf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:TF 1 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_expand "trunctfsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:TF 1 "general_operand" ""))]
+  "TARGET_FP && TARGET_HAS_XFLOATING_LIBS"
+{
+  rtx tmpf, sticky, arg, lo, hi;
+
+  tmpf = gen_reg_rtx (DFmode);
+  sticky = gen_reg_rtx (DImode);
+  arg = copy_to_mode_reg (TFmode, operands[1]);
+  lo = gen_lowpart (DImode, arg);
+  hi = gen_highpart (DImode, arg);
+
+  /* Convert the low word of the TFmode value into a sticky rounding bit,
+     then or it into the low bit of the high word.  This leaves the sticky
+     bit at bit 48 of the fraction, which is representable in DFmode,
+     which prevents rounding error in the final conversion to SFmode.  */
+
+  emit_insn (gen_rtx_SET (VOIDmode, sticky,
+			  gen_rtx_NE (DImode, lo, const0_rtx)));
+  emit_insn (gen_iordi3 (hi, hi, sticky));
+  emit_insn (gen_trunctfdf2 (tmpf, arg));
+  emit_insn (gen_truncdfsf2 (operands[0], tmpf));
+  DONE;
+})
+
+(define_insn "*divsf3_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(div:SF (match_operand:SF 1 "reg_or_0_operand" "fG")
+		(match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "div%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "si")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "reg_or_0_operand" "fG")
+		(match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "div%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "si")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf3_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(float_extend:DF (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "div%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "divtf3"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (DIV, operands); DONE;")
+
+(define_insn "*mulsf3_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(mult:SF (match_operand:SF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "mul%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "mul%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*muldf3_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(mult:DF (match_operand:DF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "mul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "reg_or_0_operand" "%fG")
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "mul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*muldf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "mul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*muldf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "mul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "multf3"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (MULT, operands); DONE;")
+
+(define_insn "*subsf3_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(minus:SF (match_operand:SF 1 "reg_or_0_operand" "fG")
+		  (match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "sub%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "reg_or_0_operand" "fG")
+		  (match_operand:SF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "sub%,%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf3_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "sub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_expand "subtf3"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "alpha_emit_xfloating_arith (MINUS, operands); DONE;")
+
+(define_insn "*sqrtsf2_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(sqrt:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && TARGET_FIX && alpha_fptm >= ALPHA_FPTM_SU"
+  "sqrt%,%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "si")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && TARGET_FIX"
+  "sqrt%,%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "si")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*sqrtdf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(sqrt:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && TARGET_FIX && alpha_fptm >= ALPHA_FPTM_SU"
+  "sqrt%-%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && TARGET_FIX"
+  "sqrt%-%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+;; Next are all the integer comparisons, and conditional moves and branches
+;; and some of the related define_expand's and define_split's.
+
+(define_insn "*setcc_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "alpha_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmp%C1 %2,%3,%0"
+  [(set_attr "type" "icmp")])
+
+;; Yes, we can technically support reg_or_8bit_operand in operand 2,
+;; but that's non-canonical rtl and allowing that causes inefficiencies
+;; from cse on.
+(define_insn "*setcc_swapped_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+        (match_operator 1 "alpha_swapped_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_0_operand" "rJ")]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmp%c1 %r3,%2,%0"
+  [(set_attr "type" "icmp")])
+
+;; Use match_operator rather than ne directly so that we can match
+;; multiple integer modes.
+(define_insn "*setne_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "signed_comparison_operator"
+			  [(match_operand:DI 2 "register_operand" "r")
+			   (const_int 0)]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_CODE (operands[1]) == NE
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmpult $31,%2,%0"
+  [(set_attr "type" "icmp")])
+
+;; The mode folding trick can't be used with const_int operands, since
+;; reload needs to know the proper mode.
+;;
+;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand
+;; in order to create more pairs of constants.  As long as we're allowing
+;; two constants at the same time, and will have to reload one of them...
+
+(define_insn "*movqicc_internal"
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:QI
+	 (match_operator 2 "signed_comparison_operator"
+			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
+			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
+	 (match_operand:QI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:QI 5 "add_operand" "0,rI,0,rI")))]
+  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
+  "@
+   cmov%C2 %r3,%1,%0
+   cmov%D2 %r3,%5,%0
+   cmov%c2 %r4,%1,%0
+   cmov%d2 %r4,%5,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movhicc_internal"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:HI
+	 (match_operator 2 "signed_comparison_operator"
+			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
+			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
+	 (match_operand:HI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:HI 5 "add_operand" "0,rI,0,rI")))]
+  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
+  "@
+   cmov%C2 %r3,%1,%0
+   cmov%D2 %r3,%5,%0
+   cmov%c2 %r4,%1,%0
+   cmov%d2 %r4,%5,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movsicc_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 2 "signed_comparison_operator"
+			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
+			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
+	 (match_operand:SI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:SI 5 "add_operand" "0,rI,0,rI")))]
+  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
+  "@
+   cmov%C2 %r3,%1,%0
+   cmov%D2 %r3,%5,%0
+   cmov%c2 %r4,%1,%0
+   cmov%d2 %r4,%5,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movdicc_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 2 "signed_comparison_operator"
+			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
+			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
+	 (match_operand:DI 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:DI 5 "add_operand" "0,rI,0,rI")))]
+  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
+  "@
+   cmov%C2 %r3,%1,%0
+   cmov%D2 %r3,%5,%0
+   cmov%c2 %r4,%1,%0
+   cmov%d2 %r4,%5,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movqicc_lbc"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(if_then_else:QI
+	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:QI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:QI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbc %r2,%1,%0
+   cmovlbs %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movhicc_lbc"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(if_then_else:HI
+	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:HI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:HI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbc %r2,%1,%0
+   cmovlbs %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movsicc_lbc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI
+	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:SI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:SI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbc %r2,%1,%0
+   cmovlbs %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movdicc_lbc"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(if_then_else:DI
+	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:DI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:DI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbc %r2,%1,%0
+   cmovlbs %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movqicc_lbs"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(if_then_else:QI
+	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:QI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:QI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbs %r2,%1,%0
+   cmovlbc %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movhicc_lbs"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(if_then_else:HI
+	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:HI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:HI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbs %r2,%1,%0
+   cmovlbc %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movsicc_lbs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(if_then_else:SI
+	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:SI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:SI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbs %r2,%1,%0
+   cmovlbc %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*movdicc_lbs"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(if_then_else:DI
+	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:DI 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:DI 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   cmovlbs %r2,%1,%0
+   cmovlbc %r2,%3,%0"
+  [(set_attr "type" "icmov")])
+
+;; For ABS, we have two choices, depending on whether the input and output
+;; registers are the same or not.
+(define_expand "absdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(abs:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode)));
+  else
+    emit_insn (gen_absdi2_diff (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "absdi2_same"
+  [(set (match_operand:DI 1 "register_operand" "")
+	(neg:DI (match_operand:DI 0 "register_operand" "")))
+   (set (match_dup 0)
+	(if_then_else:DI (ge (match_dup 0) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))]
+  ""
+  "")
+
+(define_expand "absdi2_diff"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(neg:DI (match_operand:DI 1 "register_operand" "")))
+   (set (match_dup 0)
+	(if_then_else:DI (lt (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(abs:DI (match_dup 0)))
+   (clobber (match_operand:DI 1 "register_operand" ""))]
+  ""
+  [(set (match_dup 1) (neg:DI (match_dup 0)))
+   (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0))
+				       (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(abs:DI (match_operand:DI 1 "register_operand" "")))]
+  "! rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 0) (neg:DI (match_dup 1)))
+   (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0))
+				       (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(neg:DI (abs:DI (match_dup 0))))
+   (clobber (match_operand:DI 1 "register_operand" ""))]
+  ""
+  [(set (match_dup 1) (neg:DI (match_dup 0)))
+   (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0))
+				       (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(neg:DI (abs:DI (match_operand:DI 1 "register_operand" ""))))]
+  "! rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 0) (neg:DI (match_dup 1)))
+   (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0))
+				       (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "sminqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(smin:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:QI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "minsb8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "uminqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(umin:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:QI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "minub8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "smaxqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(smax:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:QI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "maxsb8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "umaxqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(umax:QI (match_operand:QI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:QI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "maxub8 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "sminhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(smin:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:HI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "minsw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "uminhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(umin:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:HI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "minuw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "smaxhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(smax:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:HI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "maxsw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "umaxhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(umax:HI (match_operand:HI 1 "reg_or_0_operand" "%rJ")
+		 (match_operand:HI 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "maxuw4 %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "smaxdi3"
+  [(set (match_dup 3)
+	(le:DI (match_operand:DI 1 "reg_or_0_operand" "")
+	       (match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (eq (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (DImode); })
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(smax:DI (match_operand:DI 1 "reg_or_0_operand" "")
+		 (match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_insn "*smax_const0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(smax:DI (match_operand:DI 1 "register_operand" "0")
+		 (const_int 0)))]
+  ""
+  "cmovlt %0,0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_expand "smindi3"
+  [(set (match_dup 3)
+	(lt:DI (match_operand:DI 1 "reg_or_0_operand" "")
+	       (match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (DImode); })
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(smin:DI (match_operand:DI 1 "reg_or_0_operand" "")
+		 (match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_insn "*smin_const0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(smin:DI (match_operand:DI 1 "register_operand" "0")
+		 (const_int 0)))]
+  ""
+  "cmovgt %0,0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_expand "umaxdi3"
+  [(set (match_dup 3)
+	(leu:DI (match_operand:DI 1 "reg_or_0_operand" "")
+		(match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (eq (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(umax:DI (match_operand:DI 1 "reg_or_0_operand" "")
+		 (match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_expand "umindi3"
+  [(set (match_dup 3)
+	(ltu:DI (match_operand:DI 1 "reg_or_0_operand" "")
+		(match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(umin:DI (match_operand:DI 1 "reg_or_0_operand" "")
+		 (match_operand:DI 2 "reg_or_8bit_operand" "")))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_insn "*bcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "b%C1 %r2,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*bcc_reverse"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  ""
+  "b%c1 %2,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*blbs_normal"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "blbs %r1,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*blbc_normal"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "blbc %r1,%0"
+  [(set_attr "type" "ibr")])
+
+(define_split
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	   (match_operator 1 "comparison_operator"
+			   [(zero_extract:DI (match_operand:DI 2 "register_operand" "")
+					     (const_int 1)
+					     (match_operand:DI 3 "const_int_operand" ""))
+			    (const_int 0)])
+	   (label_ref (match_operand 0 "" ""))
+	   (pc)))
+     (clobber (match_operand:DI 4 "register_operand" ""))])]
+  "INTVAL (operands[3]) != 0"
+  [(set (match_dup 4)
+	(lshiftrt:DI (match_dup 2) (match_dup 3)))
+   (set (pc)
+	(if_then_else (match_op_dup 1
+				    [(zero_extract:DI (match_dup 4)
+						      (const_int 1)
+						      (const_int 0))
+				     (const_int 0)])
+		      (label_ref (match_dup 0))
+		      (pc)))]
+  "")
+
+;; The following are the corresponding floating-point insns.  Recall
+;; we need to have variants that expand the arguments from SFmode
+;; to DFmode.
+
+(define_insn "*cmpdf_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "alpha_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "cmp%-%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*movdfcc_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:DF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movsfcc_internal"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(if_then_else:SF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:SF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:SF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:DF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext3"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(if_then_else:SF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:SF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:SF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext4"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+  "@
+   fcmov%C3 %R4,%R1,%0
+   fcmov%D3 %R4,%R5,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_expand "smaxdf3"
+  [(set (match_dup 3)
+	(le:DF (match_operand:DF 1 "reg_or_0_operand" "")
+	       (match_operand:DF 2 "reg_or_0_operand" "")))
+   (set (match_operand:DF 0 "register_operand" "")
+	(if_then_else:DF (eq (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "smindf3"
+  [(set (match_dup 3)
+	(lt:DF (match_operand:DF 1 "reg_or_0_operand" "")
+	       (match_operand:DF 2 "reg_or_0_operand" "")))
+   (set (match_operand:DF 0 "register_operand" "")
+	(if_then_else:DF (ne (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "smaxsf3"
+  [(set (match_dup 3)
+	(le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" ""))
+	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand" ""))))
+   (set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (eq (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "sminsf3"
+  [(set (match_dup 3)
+	(lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" ""))
+	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand" ""))))
+   (set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (ne (match_dup 3) (match_dup 4))
+		      (match_dup 1) (match_dup 2)))]
+  "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_insn "*fbcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			  (match_operand:DF 3 "const0_operand" "G")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_FP"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+(define_insn "*fbcc_ext_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			  (match_operand:DF 3 "const0_operand" "G")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_FP"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+;; These are the main define_expand's used to make conditional branches
+;; and compares.
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "alpha_cbranch_operator"
+         [(match_operand:DF 1 "reg_or_0_operand" "")
+          (match_operand:DF 2 "reg_or_0_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_FP"
+  { alpha_emit_conditional_branch (operands, DFmode); DONE; })
+
+(define_expand "cbranchtf4"
+  [(use (match_operator 0 "alpha_cbranch_operator"
+         [(match_operand:TF 1 "general_operand")
+          (match_operand:TF 2 "general_operand")]))
+   (use (match_operand 3 ""))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  { alpha_emit_conditional_branch (operands, TFmode); DONE; })
+
+(define_expand "cbranchdi4"
+  [(use (match_operator 0 "alpha_cbranch_operator"
+         [(match_operand:DI 1 "some_operand")
+          (match_operand:DI 2 "some_operand")]))
+   (use (match_operand 3 ""))]
+  ""
+  { alpha_emit_conditional_branch (operands, DImode); DONE; })
+
+(define_expand "cstoredf4"
+  [(use (match_operator:DI 1 "alpha_cbranch_operator"
+         [(match_operand:DF 2 "reg_or_0_operand")
+          (match_operand:DF 3 "reg_or_0_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  "TARGET_FP"
+  { if (!alpha_emit_setcc (operands, DFmode)) FAIL; else DONE; })
+
+(define_expand "cstoretf4"
+  [(use (match_operator:DI 1 "alpha_cbranch_operator"
+         [(match_operand:TF 2 "general_operand")
+          (match_operand:TF 3 "general_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  { if (!alpha_emit_setcc (operands, TFmode)) FAIL; else DONE; })
+
+(define_expand "cstoredi4"
+  [(use (match_operator:DI 1 "alpha_cbranch_operator"
+         [(match_operand:DI 2 "some_operand")
+          (match_operand:DI 3 "some_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  ""
+  { if (!alpha_emit_setcc (operands, DImode)) FAIL; else DONE; })
+
+;; These are the main define_expand's used to make conditional moves.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "reg_or_8bit_operand" "")
+			 (match_operand:SI 3 "reg_or_8bit_operand" "")))]
+  ""
+{
+  if ((operands[1] = alpha_emit_conditional_move (operands[1], SImode)) == 0)
+    FAIL;
+})
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (match_operand 1 "comparison_operator" "")
+			 (match_operand:DI 2 "reg_or_8bit_operand" "")
+			 (match_operand:DI 3 "reg_or_8bit_operand" "")))]
+  ""
+{
+  if ((operands[1] = alpha_emit_conditional_move (operands[1], DImode)) == 0)
+    FAIL;
+})
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+			 (match_operand:SF 2 "reg_or_8bit_operand" "")
+			 (match_operand:SF 3 "reg_or_8bit_operand" "")))]
+  ""
+{
+  if ((operands[1] = alpha_emit_conditional_move (operands[1], SFmode)) == 0)
+    FAIL;
+})
+
+(define_expand "movdfcc"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(if_then_else:DF (match_operand 1 "comparison_operator" "")
+			 (match_operand:DF 2 "reg_or_8bit_operand" "")
+			 (match_operand:DF 3 "reg_or_8bit_operand" "")))]
+  ""
+{
+  if ((operands[1] = alpha_emit_conditional_move (operands[1], DFmode)) == 0)
+    FAIL;
+})
+
+;; These define_split definitions are used in cases when comparisons have
+;; not be stated in the correct way and we need to reverse the second
+;; comparison.  For example, x >= 7 has to be done as x < 6 with the
+;; comparison that tests the result being reversed.  We have one define_split
+;; for each use of a comparison.  They do not match valid insns and need
+;; not generate valid insns.
+;;
+;; We can also handle equality comparisons (and inequality comparisons in
+;; cases where the resulting add cannot overflow) by doing an add followed by
+;; a comparison with zero.  This is faster since the addition takes one
+;; less cycle than a compare when feeding into a conditional move.
+;; For this case, we also have an SImode pattern since we can merge the add
+;; and sign extend and the order doesn't matter.
+;;
+;; We do not do this for floating-point, since it isn't clear how the "wrong"
+;; operation could have been generated.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:DI 2 "reg_or_0_operand" "")
+			  (match_operand:DI 3 "reg_or_cint_operand" "")])
+	 (match_operand:DI 4 "reg_or_cint_operand" "")
+	 (match_operand:DI 5 "reg_or_cint_operand" "")))
+   (clobber (match_operand:DI 6 "register_operand" ""))]
+  "operands[3] != const0_rtx"
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
+
+  /* If we are comparing for equality with a constant and that constant
+     appears in the arm when the register equals the constant, use the
+     register since that is more likely to match (and to produce better code
+     if both would).  */
+
+  if (code == EQ && CONST_INT_P (operands[3])
+      && rtx_equal_p (operands[4], operands[3]))
+    operands[4] = operands[2];
+
+  else if (code == NE && CONST_INT_P (operands[3])
+	   && rtx_equal_p (operands[5], operands[3]))
+    operands[5] = operands[2];
+
+  if (code == NE || code == EQ
+      || (extended_count (operands[2], DImode, unsignedp) >= 1
+	  && extended_count (operands[3], DImode, unsignedp) >= 1))
+    {
+      if (CONST_INT_P (operands[3]))
+	operands[7] = gen_rtx_PLUS (DImode, operands[2],
+				    GEN_INT (- INTVAL (operands[3])));
+      else
+	operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]);
+
+      operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx);
+    }
+
+  else if (code == EQ || code == LE || code == LT
+	   || code == LEU || code == LTU)
+    {
+      operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]);
+      operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx);
+    }
+  else
+    {
+      operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode,
+				    operands[2], operands[3]);
+      operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 2 "reg_or_0_operand" "")
+			  (match_operand:SI 3 "reg_or_cint_operand" "")])
+	 (match_operand:DI 4 "reg_or_8bit_operand" "")
+	 (match_operand:DI 5 "reg_or_8bit_operand" "")))
+   (clobber (match_operand:DI 6 "register_operand" ""))]
+  "operands[3] != const0_rtx
+   && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)"
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
+  rtx tem;
+
+  if ((code != NE && code != EQ
+       && ! (extended_count (operands[2], DImode, unsignedp) >= 1
+	     && extended_count (operands[3], DImode, unsignedp) >= 1)))
+    FAIL;
+
+  if (CONST_INT_P (operands[3]))
+    tem = gen_rtx_PLUS (SImode, operands[2],
+			GEN_INT (- INTVAL (operands[3])));
+  else
+    tem = gen_rtx_MINUS (SImode, operands[2], operands[3]);
+
+  operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem);
+  operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+				operands[6], const0_rtx);
+})
+
+;; Prefer to use cmp and arithmetic when possible instead of a cmove.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(match_operand:DI 2 "reg_or_0_operand" "")
+			    (const_int 0)])
+	  (match_operand 3 "const_int_operand" "")
+	  (match_operand 4 "const_int_operand" "")))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0],
+				    operands[2], operands[3], operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+;; ??? Why combine is allowed to create such non-canonical rtl, I don't know.
+;; Oh well, we match it in movcc, so it must be partially our fault.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(const_int 0)
+			    (match_operand:DI 2 "reg_or_0_operand" "")])
+	  (match_operand 3 "const_int_operand" "")
+	  (match_operand 4 "const_int_operand" "")))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])),
+				    operands[0], operands[2], operands[3],
+				    operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn_and_split "*cmp_sadd_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (if_then_else:DI
+		   (match_operator 1 "alpha_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:DI 3 "const48_operand" "I")
+		   (const_int 0))
+	         (match_operand:DI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:DI (mult:DI (match_dup 5) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+})
+
+(define_insn_and_split "*cmp_sadd_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (if_then_else:SI
+		   (match_operator 1 "alpha_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:SI 3 "const48_operand" "I")
+		   (const_int 0))
+	         (match_operand:SI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 6) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = gen_lowpart (DImode, operands[0]);
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_sadd_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (plus:SI (if_then_else:SI
+		     (match_operator 1 "alpha_zero_comparison_operator"
+		       [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		        (const_int 0)])
+		     (match_operand:SI 3 "const48_operand" "I")
+		     (const_int 0))
+	           (match_operand:SI 4 "sext_add_operand" "rIO"))))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3))
+				 (match_dup 4))))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_ssub_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (if_then_else:DI
+		    (match_operator 1 "alpha_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:DI 3 "const48_operand" "I")
+		    (const_int 0))
+	          (match_operand:DI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:DI (mult:DI (match_dup 5) (match_dup 3))
+		  (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+})
+
+(define_insn_and_split "*cmp_ssub_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (if_then_else:SI
+		    (match_operator 1 "alpha_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:SI 3 "const48_operand" "I")
+		    (const_int 0))
+	          (match_operand:SI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:SI (mult:SI (match_dup 6) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = gen_lowpart (DImode, operands[0]);
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_ssub_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (minus:SI (if_then_else:SI
+		      (match_operator 1 "alpha_zero_comparison_operator"
+		        [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		         (const_int 0)])
+		      (match_operand:SI 3 "const48_operand" "I")
+		      (const_int 0))
+	            (match_operand:SI 4 "reg_or_8bit_operand" "rI"))))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3))
+				  (match_dup 4))))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+;; Here are the CALL and unconditional branch insns.  Calls on NT and OSF
+;; work differently, so we have different patterns for each.
+
+;; On Unicos/Mk a call information word (CIW) must be generated for each
+;; call. The CIW contains information about arguments passed in registers
+;; and is stored in the caller's SSIB. Its offset relative to the beginning
+;; of the SSIB is passed in $25. Handling this properly is quite complicated
+;; in the presence of inlining since the CIWs for calls performed by the
+;; inlined function must be stored in the SSIB of the function it is inlined
+;; into as well. We encode the CIW in an unspec and append it to the list
+;; of the CIWs for the current function only when the instruction for loading
+;; $25 is generated.
+
+(define_expand "call"
+  [(use (match_operand:DI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_ABI_WINDOWS_NT)
+    emit_call_insn (gen_call_nt (operands[0], operands[1]));
+  else if (TARGET_ABI_OPEN_VMS)
+    emit_call_insn (gen_call_vms (operands[0], operands[2]));
+  else if (TARGET_ABI_UNICOSMK)
+    emit_call_insn (gen_call_umk (operands[0], operands[2]));
+  else
+    emit_call_insn (gen_call_osf (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(parallel [(call (mem:DI (match_operand 0 "" ""))
+			    (match_operand 1 "" ""))
+	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
+  "TARGET_ABI_OSF"
+{
+  gcc_assert (MEM_P (operands[0]));
+  operands[0] = XEXP (operands[0], 0);
+})
+
+(define_expand "call_osf"
+  [(parallel [(call (mem:DI (match_operand 0 "" ""))
+		    (match_operand 1 "" ""))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  operands[0] = XEXP (operands[0], 0);
+  if (! call_operand (operands[0], Pmode))
+    operands[0] = copy_to_mode_reg (Pmode, operands[0]);
+})
+
+(define_expand "call_nt"
+  [(parallel [(call (mem:DI (match_operand 0 "" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  operands[0] = XEXP (operands[0], 0);
+  if (GET_CODE (operands[0]) != SYMBOL_REF && !REG_P (operands[0]))
+    operands[0] = force_reg (DImode, operands[0]);
+})
+
+;; Calls on Unicos/Mk are always indirect.
+;; op 0: symbol ref for called function
+;; op 1: CIW for $25 represented by an unspec
+
+(define_expand "call_umk"
+   [(parallel [(call (mem:DI (match_operand 0 "" ""))
+		     (match_operand 1 "" ""))
+	       (use (reg:DI 25))
+	       (clobber (reg:DI 26))])]
+   ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  /* Always load the address of the called function into a register;
+     load the CIW in $25.  */
+
+  operands[0] = XEXP (operands[0], 0);
+  if (!REG_P (operands[0]))
+    operands[0] = force_reg (DImode, operands[0]);
+
+  emit_move_insn (gen_rtx_REG (DImode, 25), operands[1]);
+})
+
+;;
+;; call openvms/alpha
+;; op 0: symbol ref for called function
+;; op 1: next_arg_reg (argument information value for R25)
+;;
+(define_expand "call_vms"
+  [(parallel [(call (mem:DI (match_operand 0 "" ""))
+		    (match_operand 1 "" ""))
+	      (use (match_dup 2))
+	      (use (reg:DI 25))
+	      (use (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  operands[0] = XEXP (operands[0], 0);
+
+  /* Always load AI with argument information, then handle symbolic and
+     indirect call differently.  Load RA and set operands[2] to PV in
+     both cases.  */
+
+  emit_move_insn (gen_rtx_REG (DImode, 25), operands[1]);
+  if (GET_CODE (operands[0]) == SYMBOL_REF)
+    {
+      alpha_need_linkage (XSTR (operands[0], 0), 0);
+
+      operands[2] = const0_rtx;
+    }
+  else
+    {
+      emit_move_insn (gen_rtx_REG (Pmode, 26),
+		      gen_rtx_MEM (Pmode, plus_constant (operands[0], 8)));
+      operands[2] = operands[0];
+    }
+
+})
+
+(define_expand "call_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:DI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+{
+  if (TARGET_ABI_WINDOWS_NT)
+    emit_call_insn (gen_call_value_nt (operands[0], operands[1], operands[2]));
+  else if (TARGET_ABI_OPEN_VMS)
+    emit_call_insn (gen_call_value_vms (operands[0], operands[1],
+					operands[3]));
+  else if (TARGET_ABI_UNICOSMK)
+    emit_call_insn (gen_call_value_umk (operands[0], operands[1],
+					operands[3]));
+  else
+    emit_call_insn (gen_call_value_osf (operands[0], operands[1],
+					operands[2]));
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand 1 "" ""))
+		         (match_operand 2 "" "")))
+	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
+  "TARGET_ABI_OSF"
+{
+  gcc_assert (MEM_P (operands[1]));
+  operands[1] = XEXP (operands[1], 0);
+})
+
+(define_expand "call_value_osf"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand 1 "" ""))
+			 (match_operand 2 "" "")))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+  if (! call_operand (operands[1], Pmode))
+    operands[1] = copy_to_mode_reg (Pmode, operands[1]);
+})
+
+(define_expand "call_value_nt"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand 1 "" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+  if (GET_CODE (operands[1]) != SYMBOL_REF && !REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+})
+
+(define_expand "call_value_vms"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand:DI 1 "" ""))
+			 (match_operand 2 "" "")))
+	      (use (match_dup 3))
+	      (use (reg:DI 25))
+	      (use (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+
+  /* Always load AI with argument information, then handle symbolic and
+     indirect call differently.  Load RA and set operands[3] to PV in
+     both cases.  */
+
+  emit_move_insn (gen_rtx_REG (DImode, 25), operands[2]);
+  if (GET_CODE (operands[1]) == SYMBOL_REF)
+    {
+      alpha_need_linkage (XSTR (operands[1], 0), 0);
+
+      operands[3] = const0_rtx;
+    }
+  else
+    {
+      emit_move_insn (gen_rtx_REG (Pmode, 26),
+		      gen_rtx_MEM (Pmode, plus_constant (operands[1], 8)));
+      operands[3] = operands[1];
+    }
+})
+
+(define_expand "call_value_umk"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand 1 "" ""))
+			 (match_operand 2 "" "")))
+	      (use (reg:DI 25))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+
+  emit_move_insn (gen_rtx_REG (DImode, 25), operands[2]);
+})
+
+(define_insn "*call_osf_1_er_noreturn"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,%0\t\t!samegp
+   ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1_er"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,(%0),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%0\t\t!samegp
+   ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.  Consider the case of { bar(); while (1); }.
+(define_peephole2
+  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand" ""))
+		    (match_operand 1 "" ""))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[0], Pmode)
+   && (peep2_regno_dead_p (1, 29)
+       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(call (mem:DI (match_dup 2))
+		    (match_dup 1))
+	      (use (reg:DI 29))
+	      (use (match_dup 0))
+	      (use (match_dup 3))
+	      (clobber (reg:DI 26))])]
+{
+  if (CONSTANT_P (operands[0]))
+    {
+      operands[2] = gen_rtx_REG (Pmode, 27);
+      operands[3] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
+				      operands[0], operands[3]));
+    }
+  else
+    {
+      operands[2] = operands[0];
+      operands[0] = const0_rtx;
+      operands[3] = const0_rtx;
+    }
+})
+
+(define_peephole2
+  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand" ""))
+		    (match_operand 1 "" ""))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[0], Pmode)
+   && ! (peep2_regno_dead_p (1, 29)
+         || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(call (mem:DI (match_dup 2))
+		    (match_dup 1))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))]
+{
+  if (CONSTANT_P (operands[0]))
+    {
+      operands[2] = gen_rtx_REG (Pmode, 27);
+      operands[4] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
+				      operands[0], operands[4]));
+    }
+  else
+    {
+      operands[2] = operands[0];
+      operands[0] = const0_rtx;
+      operands[4] = const0_rtx;
+    }
+  operands[3] = GEN_INT (alpha_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_insn "*call_osf_2_er_nogp"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 29))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "const_int_operand" ""))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%0),%2%J3"
+  [(set_attr "type" "jsr")])
+
+(define_insn "*call_osf_2_er"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1 "" ""))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand" "")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "const_int_operand" ""))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%0),%2%J3\;ldah $29,0($26)\t\t!gpdisp!%4"
+  [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_osf_1_noreturn"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,$%0..ng
+   jsr $26,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%0..ng
+   jsr $26,%0\;ldgp $29,0($26)"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*sibcall_osf_1_er"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
+	 (match_operand 1 "" ""))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,%0\t\t!samegp
+   ldq $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+;; Note that the DEC assembler expands "jmp foo" with $at, which
+;; doesn't do what we want.
+(define_insn "*sibcall_osf_1"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
+	 (match_operand 1 "" ""))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,$%0..ng
+   lda $27,%0\;jmp $31,($27),%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+(define_insn "*call_nt_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "r,R,s"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI 26))]
+  "TARGET_ABI_WINDOWS_NT"
+  "@
+   jsr $26,(%0)
+   bsr $26,%0
+   jsr $26,%0"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,12")])
+
+; GAS relies on the order and position of instructions output below in order
+; to generate relocs for VMS link to potentially optimize the call.
+; Please do not molest.
+(define_insn "*call_vms_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "r,s"))
+	 (match_operand 1 "" ""))
+   (use (match_operand:DI 2 "nonmemory_operand" "r,n"))
+   (use (reg:DI 25))
+   (use (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  switch (which_alternative)
+    {
+    case 0:
+   	return "mov %2,$27\;jsr $26,0\;ldq $27,0($29)";
+    case 1:
+	operands [2] = alpha_use_linkage (operands [0], cfun->decl, 1, 0);
+	operands [3] = alpha_use_linkage (operands [0], cfun->decl, 0, 0);
+   	return "ldq $26,%3\;ldq $27,%2\;jsr $26,%0\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,16")])
+
+(define_insn "*call_umk_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:DI 25))
+   (clobber (reg:DI 26))]
+  "TARGET_ABI_UNICOSMK"
+  "jsr $26,(%0)"
+  [(set_attr "type" "jsr")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "none")])
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "br $31,%l0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "return"
+  [(return)]
+  "direct_return ()"
+  "")
+
+(define_insn "*return_internal"
+  [(return)]
+  "reload_completed"
+  "ret $31,($26),1"
+  [(set_attr "type" "ibr")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc)
+		   (match_operand 0 "register_operand" ""))
+	      (use (label_ref:DI (match_operand 1 "" "")))])]
+  ""
+{
+  if (TARGET_ABI_WINDOWS_NT)
+    {
+      rtx dest = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (dest, operands[0]));
+      operands[0] = dest;
+    }
+  else if (TARGET_ABI_OSF)
+    {
+      rtx dest = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (dest, operands[0]));
+      emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest));	
+      operands[0] = dest;
+    }
+})
+
+(define_insn "*tablejump_osf_nt_internal"
+  [(set (pc)
+	(match_operand:DI 0 "register_operand" "r"))
+   (use (label_ref:DI (match_operand 1 "" "")))]
+  "(TARGET_ABI_OSF || TARGET_ABI_WINDOWS_NT)
+   && alpha_tablejump_addr_vec (insn)"
+{
+  operands[2] = alpha_tablejump_best_label (insn);
+  return "jmp $31,(%0),%2";
+}
+  [(set_attr "type" "ibr")])
+
+(define_insn "*tablejump_internal"
+  [(set (pc)
+	(match_operand:DI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+;; Cache flush.  Used by alpha_trampoline_init.  0x86 is PAL_imb, but we don't
+;; want to have to include pal.h in our .s file.
+(define_insn "imb"
+  [(unspec_volatile [(const_int 0)] UNSPECV_IMB)]
+  ""
+  "call_pal 0x86"
+  [(set_attr "type" "callpal")])
+
+;; BUGCHK is documented common to OSF/1 and VMS PALcode.
+;; NT does not document anything at 0x81 -- presumably it would generate
+;; the equivalent of SIGILL, but this isn't that important.
+;; ??? Presuming unicosmk uses either OSF/1 or VMS PALcode.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  "!TARGET_ABI_WINDOWS_NT"
+  "call_pal 0x81"
+  [(set_attr "type" "callpal")])
+
+;; For userland, we load the thread pointer from the TCB.
+;; For the kernel, we load the per-cpu private value.
+
+(define_insn "load_tp"
+  [(set (match_operand:DI 0 "register_operand" "=v")
+	(unspec:DI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_TLS_KERNEL)
+    return "call_pal 0x32";
+  else
+    return "call_pal 0x9e";
+}
+  [(set_attr "type" "callpal")])
+
+;; For completeness, and possibly a __builtin function, here's how to
+;; set the thread pointer.  Since we don't describe enough of this
+;; quantity for CSE, we have to use a volatile unspec, and then there's
+;; not much point in creating an R16_REG register class.
+
+(define_expand "set_tp"
+  [(set (reg:DI 16) (match_operand:DI 0 "input_operand" ""))
+   (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
+  "TARGET_ABI_OSF"
+  "")
+
+(define_insn "*set_tp"
+  [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_TLS_KERNEL)
+    return "call_pal 0x31";
+  else
+    return "call_pal 0x9f";
+}
+  [(set_attr "type" "callpal")])
+
+;; Special builtins for establishing and reverting VMS condition handlers.
+
+(define_expand "builtin_establish_vms_condition_handler"
+  [(set (reg:DI 0) (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "address_operand" ""))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  alpha_expand_builtin_establish_vms_condition_handler (operands[0],
+                                                        operands[1]);
+})
+
+(define_expand "builtin_revert_vms_condition_handler"
+  [(set (reg:DI 0) (match_operand:DI 0 "register_operand" ""))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  alpha_expand_builtin_revert_vms_condition_handler (operands[0]);
+})
+
+;; Finally, we have the basic data motion insns.  The byte and word insns
+;; are done via define_expand.  Start with the floating-point insns, since
+;; they are simpler.
+
+(define_insn "*movsf_nofix"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m")
+	(match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r"))]
+  "TARGET_FPREGS && ! TARGET_FIX
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  "@
+   cpys %R1,%R1,%0
+   ld%, %0,%1
+   bis $31,%r1,%0
+   ldl %0,%1
+   st%, %R1,%0
+   stl %r1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist")])
+
+(define_insn "*movsf_fix"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
+	(match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
+  "TARGET_FPREGS && TARGET_FIX
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  "@
+   cpys %R1,%R1,%0
+   ld%, %0,%1
+   bis $31,%r1,%0
+   ldl %0,%1
+   st%, %R1,%0
+   stl %r1,%0
+   itofs %1,%0
+   ftois %1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")])
+
+(define_insn "*movsf_nofp"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:SF 1 "input_operand" "rG,m,r"))]
+  "! TARGET_FPREGS
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  "@
+   bis $31,%r1,%0
+   ldl %0,%1
+   stl %r1,%0"
+  [(set_attr "type" "ilog,ild,ist")])
+
+(define_insn "*movdf_nofix"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m")
+	(match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r"))]
+  "TARGET_FPREGS && ! TARGET_FIX
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "@
+   cpys %R1,%R1,%0
+   ld%- %0,%1
+   bis $31,%r1,%0
+   ldq %0,%1
+   st%- %R1,%0
+   stq %r1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist")])
+
+(define_insn "*movdf_fix"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
+	(match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
+  "TARGET_FPREGS && TARGET_FIX
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "@
+   cpys %R1,%R1,%0
+   ld%- %0,%1
+   bis $31,%r1,%0
+   ldq %0,%1
+   st%- %R1,%0
+   stq %r1,%0
+   itoft %1,%0
+   ftoit %1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")])
+
+(define_insn "*movdf_nofp"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:DF 1 "input_operand" "rG,m,r"))]
+  "! TARGET_FPREGS
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "@
+   bis $31,%r1,%0
+   ldq %0,%1
+   stq %r1,%0"
+  [(set_attr "type" "ilog,ild,ist")])
+
+;; Subregs suck for register allocation.  Pretend we can move TFmode
+;; data between general registers until after reload.
+
+(define_insn_and_split "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o")
+	(match_operand:TF 1 "input_operand" "roG,rG"))]
+  "register_operand (operands[0], TFmode)
+   || reg_or_0_operand (operands[1], TFmode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  alpha_split_tmode_pair (operands, TFmode, true); 
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], SFmode))
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], DFmode))
+    operands[1] = force_reg (DFmode, operands[1]);
+})
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], TFmode))
+    operands[1] = force_reg (TFmode, operands[1]);
+})
+
+(define_insn "*movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,m")
+	(match_operand:SI 1 "input_operand" "rJ,K,L,n,m,rJ"))]
+  "(TARGET_ABI_OSF || TARGET_ABI_UNICOSMK)
+   && (register_operand (operands[0], SImode)
+       || reg_or_0_operand (operands[1], SImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   #
+   ldl %0,%1
+   stl %r1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,multi,ild,ist")])
+
+(define_insn "*movsi_nt_vms"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,m")
+	(match_operand:SI 1 "input_operand" "rJ,K,L,s,n,m,rJ"))]
+  "(TARGET_ABI_WINDOWS_NT || TARGET_ABI_OPEN_VMS)
+    && (register_operand (operands[0], SImode)
+        || reg_or_0_operand (operands[1], SImode))"
+  "@
+   bis $31,%1,%0
+   lda %0,%1
+   ldah %0,%h1
+   lda %0,%1
+   #
+   ldl %0,%1
+   stl %r1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,ldsym,multi,ild,ist")])
+
+(define_insn "*movhi_nobwx"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operand:HI 1 "input_operand" "rJ,n"))]
+  "! TARGET_BWX
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%L1($31)"
+  [(set_attr "type" "ilog,iadd")])
+
+(define_insn "*movhi_bwx"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))]
+  "TARGET_BWX
+   && (register_operand (operands[0], HImode)
+       || reg_or_0_operand (operands[1], HImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%L1($31)
+   ldwu %0,%1
+   stw %r1,%0"
+  [(set_attr "type" "ilog,iadd,ild,ist")])
+
+(define_insn "*movqi_nobwx"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(match_operand:QI 1 "input_operand" "rJ,n"))]
+  "! TARGET_BWX
+   && (register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%L1($31)"
+  [(set_attr "type" "ilog,iadd")])
+
+(define_insn "*movqi_bwx"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))]
+  "TARGET_BWX
+   && (register_operand (operands[0], QImode)
+       || reg_or_0_operand (operands[1], QImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%L1($31)
+   ldbu %0,%1
+   stb %r1,%0"
+  [(set_attr "type" "ilog,iadd,ild,ist")])
+
+;; We do two major things here: handle mem->mem and construct long
+;; constants.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (alpha_expand_mov (SImode, operands))
+    DONE;
+})
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "non_add_const_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_const_mov (SImode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Split the load of an address into a four-insn sequence on Unicos/Mk.
+;; Always generate a REG_EQUAL note for the last instruction to facilitate
+;; optimizations. If the symbolic operand is a label_ref, generate
+;; REG_LABEL_OPERAND notes and update LABEL_NUSES because this is not done
+;; automatically.  Labels may be incorrectly deleted if we don't do this.
+;;
+;; Describing what the individual instructions do correctly is too complicated
+;; so use UNSPECs for each of the three parts of an address.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "symbolic_operand" ""))]
+  "TARGET_ABI_UNICOSMK && reload_completed"
+  [(const_int 0)]
+{
+  rtx insn1, insn2, insn3;
+
+  insn1 = emit_insn (gen_umk_laum (operands[0], operands[1]));
+  emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32)));
+  insn2 = emit_insn (gen_umk_lalm (operands[0], operands[0], operands[1]));
+  insn3 = emit_insn (gen_umk_lal (operands[0], operands[0], operands[1]));
+  set_unique_reg_note (insn3, REG_EQUAL, operands[1]);
+
+  if (GET_CODE (operands[1]) == LABEL_REF)
+    {
+      rtx label;
+
+      label = XEXP (operands[1], 0);
+      add_reg_note (insn1, REG_LABEL_OPERAND, label);
+      add_reg_note (insn2, REG_LABEL_OPERAND, label);
+      add_reg_note (insn3, REG_LABEL_OPERAND, label);
+      LABEL_NUSES (label) += 3;
+    }
+  DONE;
+})
+
+;; Instructions for loading the three parts of an address on Unicos/Mk.
+
+(define_insn "umk_laum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
+		   UNSPEC_UMK_LAUM))]
+  "TARGET_ABI_UNICOSMK"
+  "laum %r0,%t1($31)"
+  [(set_attr "type" "iadd")])
+
+(define_insn "umk_lalm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
+			    UNSPEC_UMK_LALM)))] 
+  "TARGET_ABI_UNICOSMK"
+  "lalm %r0,%t2(%r1)"
+  [(set_attr "type" "iadd")])
+
+(define_insn "umk_lal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
+			    UNSPEC_UMK_LAL)))]
+  "TARGET_ABI_UNICOSMK"
+  "lal %r0,%t2(%r1)"
+  [(set_attr "type" "iadd")])
+
+;; Add a new call information word to the current function's list of CIWs
+;; and load its index into $25. Doing it here ensures that the CIW will be
+;; associated with the correct function even in the presence of inlining.
+
+(define_insn "*umk_load_ciw"
+  [(set (reg:DI 25)
+	(unspec:DI [(match_operand 0 "" "")] UNSPEC_UMK_LOAD_CIW))]
+  "TARGET_ABI_UNICOSMK"
+{
+  operands[0] = unicosmk_add_call_info_word (operands[0]);
+  return "lda $25,%0";
+}
+  [(set_attr "type" "iadd")])
+
+(define_insn "*movdi_er_low_l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "local_symbolic_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+  if (true_regnum (operands[1]) == 29)
+    return "lda %0,%2(%1)\t\t!gprel";
+  else
+    return "lda %0,%2(%1)\t\t!gprellow";
+}
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "small_symbolic_operand" ""))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(lo_sum:DI (match_dup 2) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "local_symbolic_operand" ""))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 2) (high:DI (match_dup 1))))
+   (set (match_dup 0)
+	(lo_sum:DI (match_dup 0) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(match_operand 0 "some_small_symbolic_operand" "")]
+  ""
+  [(match_dup 0)]
+  "operands[0] = split_small_symbolic_operand (operands[0]);")
+
+;; Accepts any symbolic, not just global, since function calls that
+;; don't go via bsr still use !literal in hopes of linker relaxation.
+(define_insn "movdi_er_high_g"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand" "")
+		    (match_operand 3 "const_int_operand" "")]
+		   UNSPEC_LITERAL))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldq %0,%2(%1)\t\t!literal";
+  else
+    return "ldq %0,%2(%1)\t\t!literal!%3";
+}
+  [(set_attr "type" "ldsym")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "global_symbolic_operand" ""))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)
+		    (const_int 0)] UNSPEC_LITERAL))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_insn "movdi_er_tlsgd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand" "")
+		    (match_operand 3 "const_int_operand" "")]
+		   UNSPEC_TLSGD))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "lda %0,%2(%1)\t\t!tlsgd";
+  else
+    return "lda %0,%2(%1)\t\t!tlsgd!%3";
+})
+
+(define_insn "movdi_er_tlsldm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand" "")]
+		   UNSPEC_TLSLDM))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[2]) == 0)
+    return "lda %0,%&(%1)\t\t!tlsldm";
+  else
+    return "lda %0,%&(%1)\t\t!tlsldm!%2";
+})
+
+(define_insn "*movdi_er_gotdtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  "HAVE_AS_TLS"
+  "ldq %0,%2(%1)\t\t!gotdtprel"
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "gotdtp_symbolic_operand" ""))]
+  "HAVE_AS_TLS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)] UNSPEC_DTPREL))]
+{
+  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
+  operands[2] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi_er_gottp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  "HAVE_AS_TLS"
+  "ldq %0,%2(%1)\t\t!gottprel"
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "gottp_symbolic_operand" ""))]
+  "HAVE_AS_TLS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)] UNSPEC_TPREL))]
+{
+  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
+  operands[2] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi_er_nofix"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,*f,*f,Q")
+	(match_operand:DI 1 "input_operand" "rJ,K,L,T,s,n,m,rJ,*fJ,Q,*f"))]
+  "TARGET_EXPLICIT_RELOCS && ! TARGET_FIX
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "@
+   mov %r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   #
+   #
+   #
+   ldq%A1 %0,%1
+   stq%A0 %r1,%0
+   fmov %R1,%0
+   ldt %0,%1
+   stt %R1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ild,ist,fcpys,fld,fst")
+   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*")])
+
+;; The 'U' constraint matches symbolic operands on Unicos/Mk. Those should
+;; have been split up by the rules above but we shouldn't reject the
+;; possibility of them getting through.
+
+(define_insn "*movdi_nofix"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,*f,*f,Q")
+	(match_operand:DI 1 "input_operand" "rJ,K,L,U,s,n,m,rJ,*fJ,Q,*f"))]
+  "! TARGET_FIX
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   laum %0,%t1($31)\;sll %0,32,%0\;lalm %0,%t1(%0)\;lal %0,%t1(%0)
+   lda %0,%1
+   #
+   ldq%A1 %0,%1
+   stq%A0 %r1,%0
+   cpys %R1,%R1,%0
+   ldt %0,%1
+   stt %R1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,ldsym,ldsym,multi,ild,ist,fcpys,fld,fst")
+   (set_attr "length" "*,*,*,16,*,*,*,*,*,*,*")])
+
+(define_insn "*movdi_er_fix"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+				"=r,r,r,r,r,r,r, m, *f,*f, Q, r,*f")
+	(match_operand:DI 1 "input_operand"
+				"rJ,K,L,T,s,n,m,rJ,*fJ, Q,*f,*f, r"))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_FIX
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "@
+   mov %r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   #
+   #
+   #
+   ldq%A1 %0,%1
+   stq%A0 %r1,%0
+   fmov %R1,%0
+   ldt %0,%1
+   stt %R1,%0
+   ftoit %1,%0
+   itoft %1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ild,ist,fcpys,fld,fst,ftoi,itof")
+   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*")])
+
+(define_insn "*movdi_fix"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,r,m,*f,*f,Q,r,*f")
+	(match_operand:DI 1 "input_operand" "rJ,K,L,s,n,m,rJ,*fJ,Q,*f,*f,r"))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_FIX
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "@
+   bis $31,%r1,%0
+   lda %0,%1($31)
+   ldah %0,%h1($31)
+   lda %0,%1
+   #
+   ldq%A1 %0,%1
+   stq%A0 %r1,%0
+   cpys %R1,%R1,%0
+   ldt %0,%1
+   stt %R1,%0
+   ftoit %1,%0
+   itoft %1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,ldsym,multi,ild,ist,fcpys,fld,fst,ftoi,itof")])
+
+;; VMS needs to set up "vms_base_regno" for unwinding.  This move
+;; often appears dead to the life analysis code, at which point we
+;; die for emitting dead prologue instructions.  Force this live.
+
+(define_insn "force_movdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")]
+			    UNSPECV_FORCE_MOV))]
+  ""
+  "mov %1,%0"
+  [(set_attr "type" "ilog")])
+
+;; We do three major things here: handle mem->mem, put 64-bit constants in
+;; memory, and construct long 32-bit constants.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (alpha_expand_mov (DImode, operands))
+    DONE;
+})
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "non_add_const_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_const_mov (DImode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; We need to prevent reload from splitting TImode moves, because it
+;; might decide to overwrite a pointer with the value it points to.
+;; In that case we have to do the loads in the appropriate order so
+;; that the pointer is not destroyed too early.
+
+(define_insn_and_split "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+        (match_operand:TI 1 "input_operand" "roJ,rJ"))]
+  "(register_operand (operands[0], TImode)
+    /* Prevent rematerialization of constants.  */
+    && ! CONSTANT_P (operands[1]))
+   || reg_or_0_operand (operands[1], TImode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  alpha_split_tmode_pair (operands, TImode, true);
+})
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], TImode))
+    operands[1] = force_reg (TImode, operands[1]);
+
+  if (operands[1] == const0_rtx)
+    ;
+  /* We must put 64-bit constants in memory.  We could keep the
+     32-bit constants in TImode and rely on the splitter, but
+     this doesn't seem to be worth the pain.  */
+  else if (CONST_INT_P (operands[1])
+	   || GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      rtx in[2], out[2], target;
+
+      gcc_assert (can_create_pseudo_p ());
+
+      split_double (operands[1], &in[0], &in[1]);
+
+      if (in[0] == const0_rtx)
+	out[0] = const0_rtx;
+      else
+	{
+	  out[0] = gen_reg_rtx (DImode);
+	  emit_insn (gen_movdi (out[0], in[0]));
+	}
+
+      if (in[1] == const0_rtx)
+	out[1] = const0_rtx;
+      else
+	{
+	  out[1] = gen_reg_rtx (DImode);
+	  emit_insn (gen_movdi (out[1], in[1]));
+	}
+
+      if (!REG_P (operands[0]))
+	target = gen_reg_rtx (TImode);
+      else
+	target = operands[0];
+
+      emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0]));
+      emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1]));
+
+      if (target != operands[0])
+	emit_insn (gen_rtx_SET (VOIDmode, operands[0], target));
+
+      DONE;
+    }
+})
+
+;; These are the partial-word cases.
+;;
+;; First we have the code to load an aligned word.  Operand 0 is the register
+;; in which to place the result.  It's mode is QImode or HImode.  Operand 1
+;; is an SImode MEM at the low-order byte of the proper word.  Operand 2 is the
+;; number of bits within the word that the value is.  Operand 3 is an SImode
+;; scratch register.  If operand 0 is a hard register, operand 3 may be the
+;; same register.  It is allowed to conflict with operand 1 as well.
+
+(define_expand "aligned_loadqi"
+  [(set (match_operand:SI 3 "register_operand" "")
+	(match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (subreg:DI (match_dup 3) 0)
+			 (const_int 8)
+			 (match_operand:DI 2 "const_int_operand" "")))]
+
+  ""
+  "")
+
+(define_expand "aligned_loadhi"
+  [(set (match_operand:SI 3 "register_operand" "")
+	(match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (subreg:DI (match_dup 3) 0)
+			 (const_int 16)
+			 (match_operand:DI 2 "const_int_operand" "")))]
+
+  ""
+  "")
+
+;; Similar for unaligned loads, where we use the sequence from the
+;; Alpha Architecture manual. We have to distinguish between little-endian
+;; and big-endian systems as the sequences are different.
+;;
+;; Operand 1 is the address.  Operands 2 and 3 are temporaries, where
+;; operand 3 can overlap the input and output registers.
+
+(define_expand "unaligned_loadqi"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "address_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))
+   (use (match_operand:DI 3 "register_operand" ""))]
+  ""
+{
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_unaligned_loadqi_be (operands[0], operands[1],
+					operands[2], operands[3]));
+  else
+    emit_insn (gen_unaligned_loadqi_le (operands[0], operands[1],
+					operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "unaligned_loadqi_le"
+  [(set (match_operand:DI 2 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand" "")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 8)
+			 (ashift:DI (match_dup 3) (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "")
+
+(define_expand "unaligned_loadqi_be"
+  [(set (match_operand:DI 2 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand" "")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 8)
+			 (minus:DI
+			   (const_int 56)
+			   (ashift:DI (match_dup 3) (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "")
+
+(define_expand "unaligned_loadhi"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "address_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))
+   (use (match_operand:DI 3 "register_operand" ""))]
+  ""
+{
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_unaligned_loadhi_be (operands[0], operands[1],
+					operands[2], operands[3]));
+  else
+    emit_insn (gen_unaligned_loadhi_le (operands[0], operands[1],
+					operands[2], operands[3]));
+  DONE;
+})
+
+(define_expand "unaligned_loadhi_le"
+  [(set (match_operand:DI 2 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand" "")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 16)
+			 (ashift:DI (match_dup 3) (const_int 3))))]
+  "! WORDS_BIG_ENDIAN"
+  "")
+
+(define_expand "unaligned_loadhi_be"
+  [(set (match_operand:DI 2 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand" "")
+	(plus:DI (match_dup 1) (const_int 1)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 16)
+			 (minus:DI
+			   (const_int 56)
+			   (ashift:DI (match_dup 3) (const_int 3)))))]
+  "WORDS_BIG_ENDIAN"
+  "")
+
+;; Storing an aligned byte or word requires two temporaries.  Operand 0 is the
+;; aligned SImode MEM.  Operand 1 is the register containing the
+;; byte or word to store.  Operand 2 is the number of bits within the word that
+;; the value should be placed.  Operands 3 and 4 are SImode temporaries.
+
+(define_expand "aligned_store"
+  [(set (match_operand:SI 3 "register_operand" "")
+	(match_operand:SI 0 "memory_operand" ""))
+   (set (subreg:DI (match_dup 3) 0)
+	(and:DI (subreg:DI (match_dup 3) 0) (match_dup 5)))
+   (set (subreg:DI (match_operand:SI 4 "register_operand" "") 0)
+	(ashift:DI (zero_extend:DI (match_operand 1 "register_operand" ""))
+		   (match_operand:DI 2 "const_int_operand" "")))
+   (set (subreg:DI (match_dup 4) 0)
+	(ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0)))
+   (set (match_dup 0) (match_dup 4))]
+  ""
+{
+  operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1]))
+			    << INTVAL (operands[2])));
+})
+
+;; For the unaligned byte and halfword cases, we use code similar to that
+;; in the ;; Architecture book, but reordered to lower the number of registers
+;; required.  Operand 0 is the address.  Operand 1 is the data to store.
+;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may
+;; be the same temporary, if desired.  If the address is in a register,
+;; operand 2 can be that register.
+
+(define_expand "unaligned_storeqi"
+  [(use (match_operand:DI 0 "address_operand" ""))
+   (use (match_operand:QI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))
+   (use (match_operand:DI 3 "register_operand" ""))
+   (use (match_operand:DI 4 "register_operand" ""))]
+  ""
+{
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_unaligned_storeqi_be (operands[0], operands[1],
+					 operands[2], operands[3],
+					 operands[4]));
+  else
+    emit_insn (gen_unaligned_storeqi_le (operands[0], operands[1],
+					 operands[2], operands[3],
+					 operands[4]));
+  DONE;
+})
+
+(define_expand "unaligned_storeqi_le"
+  [(set (match_operand:DI 3 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 0 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 2 "register_operand" "")
+	(match_dup 0))
+   (set (match_dup 3)
+	(and:DI (not:DI (ashift:DI (const_int 255)
+				   (ashift:DI (match_dup 2) (const_int 3))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand" "")
+	(ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" ""))
+		   (ashift:DI (match_dup 2) (const_int 3))))
+   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
+   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
+	(match_dup 4))]
+  "! WORDS_BIG_ENDIAN"
+  "")
+
+(define_expand "unaligned_storeqi_be"
+  [(set (match_operand:DI 3 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 0 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 2 "register_operand" "")
+	(match_dup 0))
+   (set (match_dup 3)
+	(and:DI (not:DI (ashift:DI (const_int 255)
+			  (minus:DI (const_int 56)
+				    (ashift:DI (match_dup 2) (const_int 3)))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand" "")
+	(ashift:DI (zero_extend:DI (match_operand:QI 1 "register_operand" ""))
+		   (minus:DI (const_int 56)
+		     (ashift:DI (match_dup 2) (const_int 3)))))
+   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
+   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
+	(match_dup 4))]
+  "WORDS_BIG_ENDIAN"
+  "")
+
+(define_expand "unaligned_storehi"
+  [(use (match_operand:DI 0 "address_operand" ""))
+   (use (match_operand:HI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "register_operand" ""))
+   (use (match_operand:DI 3 "register_operand" ""))
+   (use (match_operand:DI 4 "register_operand" ""))]
+  ""
+{
+  if (WORDS_BIG_ENDIAN)
+    emit_insn (gen_unaligned_storehi_be (operands[0], operands[1],
+					 operands[2], operands[3],
+					 operands[4]));
+  else
+    emit_insn (gen_unaligned_storehi_le (operands[0], operands[1],
+					 operands[2], operands[3],
+					 operands[4]));
+  DONE;
+})
+
+(define_expand "unaligned_storehi_le"
+  [(set (match_operand:DI 3 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 0 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 2 "register_operand" "")
+	(match_dup 0))
+   (set (match_dup 3)
+	(and:DI (not:DI (ashift:DI (const_int 65535)
+				   (ashift:DI (match_dup 2) (const_int 3))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand" "")
+	(ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" ""))
+		   (ashift:DI (match_dup 2) (const_int 3))))
+   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
+   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
+	(match_dup 4))]
+  "! WORDS_BIG_ENDIAN"
+  "")
+
+(define_expand "unaligned_storehi_be"
+  [(set (match_operand:DI 3 "register_operand" "")
+	(mem:DI (and:DI (match_operand:DI 0 "address_operand" "")
+			(const_int -8))))
+   (set (match_operand:DI 2 "register_operand" "")
+	(plus:DI (match_dup 5) (const_int 1)))
+   (set (match_dup 3)
+	(and:DI (not:DI (ashift:DI
+			  (const_int 65535)
+			  (minus:DI (const_int 56)
+				    (ashift:DI (match_dup 2) (const_int 3)))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand" "")
+	(ashift:DI (zero_extend:DI (match_operand:HI 1 "register_operand" ""))
+		   (minus:DI (const_int 56)
+			     (ashift:DI (match_dup 2) (const_int 3)))))
+   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
+   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
+	(match_dup 4))]
+  "WORDS_BIG_ENDIAN"
+  "operands[5] = force_reg (DImode, operands[0]);")
+
+;; Here are the define_expand's for QI and HI moves that use the above
+;; patterns.  We have the normal sets, plus the ones that need scratch
+;; registers for reload.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_BWX
+      ? alpha_expand_mov (QImode, operands)
+      : alpha_expand_mov_nobwx (QImode, operands))
+    DONE;
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_BWX
+      ? alpha_expand_mov (HImode, operands)
+      : alpha_expand_mov_nobwx (HImode, operands))
+    DONE;
+})
+
+;; We need to hook into the extra support that we have for HImode 
+;; reloads when BWX insns are not available.
+(define_expand "movcqi"
+  [(set (match_operand:CQI 0 "nonimmediate_operand" "")
+	(match_operand:CQI 1 "general_operand" ""))]
+  "!TARGET_BWX"
+{
+  if (GET_CODE (operands[0]) == CONCAT || GET_CODE (operands[1]) == CONCAT)
+    ;
+  else if (!any_memory_operand (operands[0], CQImode))
+    {
+      if (!any_memory_operand (operands[1], CQImode))
+	{
+	  emit_move_insn (gen_lowpart (HImode, operands[0]),
+			  gen_lowpart (HImode, operands[1]));
+	  DONE;
+	}
+      if (aligned_memory_operand (operands[1], CQImode))
+	{
+	  bool done;
+	do_aligned1:
+	  operands[1] = gen_lowpart (HImode, operands[1]);
+	do_aligned2:
+	  operands[0] = gen_lowpart (HImode, operands[0]);
+	  done = alpha_expand_mov_nobwx (HImode, operands);
+	  gcc_assert (done);
+	  DONE;
+	}
+    }
+  else if (aligned_memory_operand (operands[0], CQImode))
+    {
+      if (MEM_P (operands[1]))
+	{
+	  rtx x = gen_reg_rtx (HImode);
+	  emit_move_insn (gen_lowpart (CQImode, x), operands[1]);
+	  operands[1] = x;
+	  goto do_aligned2;
+	}
+      goto do_aligned1;
+    }
+
+  gcc_assert (!reload_in_progress);
+  emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
+;; Here are the versions for reload.
+;; 
+;; The aligned input case is recognized early in alpha_secondary_reload
+;; in order to avoid allocating an unnecessary scratch register.
+;; 
+;; Note that in the unaligned cases we know that the operand must not be
+;; a pseudo-register because stack slots are always aligned references.
+
+(define_expand "reload_in<mode>"
+  [(parallel [(match_operand:RELOAD12 0 "register_operand" "=r")
+	      (match_operand:RELOAD12 1 "any_memory_operand" "m")
+	      (match_operand:TI 2 "register_operand" "=&r")])]
+  "!TARGET_BWX"
+{
+  rtx scratch, seq, addr;
+  unsigned regno = REGNO (operands[2]);
+
+  /* It is possible that one of the registers we got for operands[2]
+     might coincide with that of operands[0] (which is why we made
+     it TImode).  Pick the other one to use as our scratch.  */
+  if (regno == REGNO (operands[0]))
+    regno++;
+  scratch = gen_rtx_REG (DImode, regno);
+
+  addr = get_unaligned_address (operands[1]);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  seq = gen_unaligned_load<reloadmode> (operands[0], addr,
+					scratch, operands[0]);
+  alpha_set_memflags (seq, operands[1]);
+
+  emit_insn (seq);
+  DONE;
+})
+
+(define_expand "reload_out<mode>"
+  [(parallel [(match_operand:RELOAD12 0 "any_memory_operand" "=m")
+	      (match_operand:RELOAD12 1 "register_operand" "r")
+	      (match_operand:TI 2 "register_operand" "=&r")])]
+  "! TARGET_BWX"
+{
+  unsigned regno = REGNO (operands[2]);
+
+  if (<MODE>mode == CQImode)
+    {
+      operands[0] = gen_lowpart (HImode, operands[0]);
+      operands[1] = gen_lowpart (HImode, operands[1]);
+    }
+
+  if (aligned_memory_operand (operands[0], <MODE>mode))
+    {
+      emit_insn (gen_reload_out<reloadmode>_aligned
+		 (operands[0], operands[1],
+		  gen_rtx_REG (SImode, regno),
+		  gen_rtx_REG (SImode, regno + 1)));
+    }
+  else
+    {
+      rtx addr = get_unaligned_address (operands[0]);
+      rtx scratch1 = gen_rtx_REG (DImode, regno);
+      rtx scratch2 = gen_rtx_REG (DImode, regno + 1);
+      rtx scratch3 = scratch1;
+      rtx seq;
+
+      if (REG_P (addr))
+	scratch1 = addr;
+
+      seq = gen_unaligned_store<reloadmode> (addr, operands[1], scratch1,
+					     scratch2, scratch3);
+      alpha_set_memflags (seq, operands[0]);
+      emit_insn (seq);
+    }
+  DONE;
+})
+
+;; Helpers for the above.  The way reload is structured, we can't
+;; always get a proper address for a stack slot during reload_foo
+;; expansion, so we must delay our address manipulations until after.
+
+(define_insn_and_split "reload_in<mode>_aligned"
+  [(set (match_operand:I12MODE 0 "register_operand" "=r")
+        (match_operand:I12MODE 1 "memory_operand" "m"))]
+  "!TARGET_BWX && (reload_in_progress || reload_completed)"
+  "#"
+  "!TARGET_BWX && reload_completed"
+  [(const_int 0)]
+{
+  rtx aligned_mem, bitnum;
+  get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+  emit_insn (gen_aligned_load<reloadmode>
+	     (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum,
+	      gen_rtx_REG (SImode, REGNO (operands[0]))));
+  DONE;
+})
+
+(define_insn_and_split "reload_out<mode>_aligned"
+  [(set (match_operand:I12MODE 0 "memory_operand" "=m")
+        (match_operand:I12MODE 1 "register_operand" "r"))
+   (clobber (match_operand:SI 2 "register_operand" "=r"))
+   (clobber (match_operand:SI 3 "register_operand" "=r"))]
+  "!TARGET_BWX && (reload_in_progress || reload_completed)"
+  "#"
+  "!TARGET_BWX && reload_completed"
+  [(const_int 0)]
+{
+  rtx aligned_mem, bitnum;
+  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+				operands[2], operands[3]));
+  DONE;
+})
+
+;; Vector operations
+
+(define_mode_iterator VEC [V8QI V4HI V2SI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "")
+        (match_operand:VEC 1 "general_operand" ""))]
+  ""
+{
+  if (alpha_expand_mov (<MODE>mode, operands))
+    DONE;
+})
+
+(define_split
+  [(set (match_operand:VEC 0 "register_operand" "")
+	(match_operand:VEC 1 "non_zero_const_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (alpha_split_const_mov (<MODE>mode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "")
+        (match_operand:VEC 1 "general_operand" ""))]
+  ""
+{
+  alpha_expand_movmisalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_fix"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f")
+	(match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))]
+  "TARGET_FIX
+   && (register_operand (operands[0], <MODE>mode)
+       || reg_or_0_operand (operands[1], <MODE>mode))"
+  "@
+   bis $31,%r1,%0
+   #
+   ldq %0,%1
+   stq %r1,%0
+   cpys %R1,%R1,%0
+   ldt %0,%1
+   stt %R1,%0
+   ftoit %1,%0
+   itoft %1,%0"
+  [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof")])
+
+(define_insn "*mov<mode>_nofix"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m")
+	(match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f"))]
+  "! TARGET_FIX
+   && (register_operand (operands[0], <MODE>mode)
+       || reg_or_0_operand (operands[1], <MODE>mode))"
+  "@
+   bis $31,%r1,%0
+   #
+   ldq %0,%1
+   stq %r1,%0
+   cpys %R1,%R1,%0
+   ldt %0,%1
+   stt %R1,%0"
+  [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst")])
+
+(define_insn "uminv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(umin:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V8QI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "minub8 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "sminv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(smin:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V8QI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "minsb8 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "uminv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(umin:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V4HI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "minuw4 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "sminv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(smin:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V4HI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "minsw4 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "umaxv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(umax:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V8QI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "maxub8 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "smaxv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(smax:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V8QI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "maxsb8 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "umaxv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(umax:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V4HI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "maxuw4 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "smaxv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(smax:V4HI (match_operand:V4HI 1 "reg_or_0_operand" "rW")
+		   (match_operand:V4HI 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "maxsw4 %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(not:VEC (match_operand:VEC 1 "register_operand" "r")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(and:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "and %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*andnot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r"))
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "bic %2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(ior:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "bis %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r"))
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "ornot %2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(xor:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "xor %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r")
+			  (match_operand:VEC 2 "register_operand" "r"))))]
+  ""
+  "eqv %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_operand:VEC 0 "register_operand" "")
+	(ashift:DI (match_operand:VEC 1 "register_operand" "")
+		   (match_operand:DI 2 "reg_or_6bit_operand" "")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:VEC 0 "register_operand" "")
+        (lshiftrt:DI (match_operand:VEC 1 "register_operand" "")
+                     (match_operand:DI 2 "reg_or_6bit_operand" "")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+;; Bit field extract patterns which use ext[wlq][lh]
+
+(define_expand "extv"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extract:DI (match_operand:QI 1 "memory_operand" "")
+			 (match_operand:DI 2 "immediate_operand" "")
+			 (match_operand:DI 3 "immediate_operand" "")))]
+  ""
+{
+  int ofs;
+
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so we force it to be a MEM here.  */
+  if (!MEM_P (operands[1]))
+    FAIL;
+
+  /* The bit number is relative to the mode of operand 1 which is
+     usually QImode (this might actually be a bug in expmed.c). Note 
+     that the bit number is negative in big-endian mode in this case.
+     We have to convert that to the offset.  */
+  if (WORDS_BIG_ENDIAN)
+    ofs = GET_MODE_BITSIZE (GET_MODE (operands[1]))
+          - INTVAL (operands[2]) - INTVAL (operands[3]);
+  else
+    ofs = INTVAL (operands[3]);
+
+  ofs = ofs / 8;
+
+  alpha_expand_unaligned_load (operands[0], operands[1],
+			       INTVAL (operands[2]) / 8,
+			       ofs, 1);
+  DONE;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extract:DI (match_operand:DI 1 "nonimmediate_operand" "")
+			 (match_operand:DI 2 "immediate_operand" "")
+			 (match_operand:DI 3 "immediate_operand" "")))]
+  ""
+{
+  /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 8
+	  && INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+
+  if (MEM_P (operands[1]))
+    {
+      int ofs;
+
+      /* Fail 8-bit fields, falling back on a simple byte load.  */
+      if (INTVAL (operands[2]) == 8)
+	FAIL;
+
+      /* The bit number is relative to the mode of operand 1 which is
+	 usually QImode (this might actually be a bug in expmed.c). Note 
+	 that the bit number is negative in big-endian mode in this case.
+	 We have to convert that to the offset.  */
+      if (WORDS_BIG_ENDIAN)
+	ofs = GET_MODE_BITSIZE (GET_MODE (operands[1]))
+	      - INTVAL (operands[2]) - INTVAL (operands[3]);
+      else
+	ofs = INTVAL (operands[3]);
+
+      ofs = ofs / 8;
+
+      alpha_expand_unaligned_load (operands[0], operands[1],
+			           INTVAL (operands[2]) / 8,
+				   ofs, 0);
+      DONE;
+    }
+})
+
+(define_expand "insv"
+  [(set (zero_extract:DI (match_operand:QI 0 "memory_operand" "")
+			 (match_operand:DI 1 "immediate_operand" "")
+			 (match_operand:DI 2 "immediate_operand" ""))
+	(match_operand:DI 3 "register_operand" ""))]
+  ""
+{
+  int ofs;
+
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[2]) % 8 != 0
+      || (INTVAL (operands[1]) != 16
+	  && INTVAL (operands[1]) != 32
+	  && INTVAL (operands[1]) != 64))
+    FAIL;
+
+  /* From mips.md: store_bit_field doesn't verify that our source
+     matches the predicate, so we force it to be a MEM here.  */
+  if (!MEM_P (operands[0]))
+    FAIL;
+
+  /* The bit number is relative to the mode of operand 1 which is
+     usually QImode (this might actually be a bug in expmed.c). Note 
+     that the bit number is negative in big-endian mode in this case.
+     We have to convert that to the offset.  */
+  if (WORDS_BIG_ENDIAN)
+    ofs = GET_MODE_BITSIZE (GET_MODE (operands[0]))
+          - INTVAL (operands[1]) - INTVAL (operands[2]);
+  else
+    ofs = INTVAL (operands[2]);
+
+  ofs = ofs / 8;
+
+  alpha_expand_unaligned_store (operands[0], operands[3],
+			        INTVAL (operands[1]) / 8, ofs);
+  DONE;
+})
+
+;; Block move/clear, see alpha.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemqi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (use (match_operand:DI 2 "immediate_operand" ""))
+	      (use (match_operand:DI 3 "immediate_operand" ""))])]
+  ""
+{
+  if (alpha_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "movmemdi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (use (match_operand:DI 2 "immediate_operand" ""))
+	      (use (match_operand:DI 3 "immediate_operand" ""))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 25))
+	      (clobber (reg:DI 16))
+	      (clobber (reg:DI 17))
+	      (clobber (reg:DI 18))
+	      (clobber (reg:DI 19))
+	      (clobber (reg:DI 20))
+	      (clobber (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  "TARGET_ABI_OPEN_VMS"
+{
+  operands[4] = alpha_need_linkage ("OTS$MOVE", 0);
+})
+
+(define_insn "*movmemdi_1"
+  [(set (match_operand:BLK 0 "memory_operand" "=m,=m")
+	(match_operand:BLK 1 "memory_operand" "m,m"))
+   (use (match_operand:DI 2 "nonmemory_operand" "r,i"))
+   (use (match_operand:DI 3 "immediate_operand" ""))
+   (use (match_operand:DI 4 "call_operand" "i,i"))
+   (clobber (reg:DI 25))
+   (clobber (reg:DI 16))
+   (clobber (reg:DI 17))
+   (clobber (reg:DI 18))
+   (clobber (reg:DI 19))
+   (clobber (reg:DI 20))
+   (clobber (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  operands [5] = alpha_use_linkage (operands [4], cfun->decl, 0, 1);
+  switch (which_alternative)
+    {
+    case 0:
+	return "lda $16,%0\;bis $31,%2,$17\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)";
+    case 1:
+	return "lda $16,%0\;lda $17,%2($31)\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "28")])
+
+(define_expand "setmemqi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (use (match_operand:DI 1 "immediate_operand" ""))
+	      (use (match_operand:DI 3 "immediate_operand" ""))])]
+  ""
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (alpha_expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "setmemdi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (use (match_operand:DI 1 "immediate_operand" ""))
+	      (use (match_operand:DI 3 "immediate_operand" ""))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 25))
+	      (clobber (reg:DI 16))
+	      (clobber (reg:DI 17))
+	      (clobber (reg:DI 26))
+	      (clobber (reg:DI 27))])]
+  "TARGET_ABI_OPEN_VMS"
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  operands[4] = alpha_need_linkage ("OTS$ZERO", 0);
+})
+
+(define_insn "*clrmemdi_1"
+  [(set (match_operand:BLK 0 "memory_operand" "=m,=m")
+		   (const_int 0))
+   (use (match_operand:DI 1 "nonmemory_operand" "r,i"))
+   (use (match_operand:DI 2 "immediate_operand" ""))
+   (use (match_operand:DI 3 "call_operand" "i,i"))
+   (clobber (reg:DI 25))
+   (clobber (reg:DI 16))
+   (clobber (reg:DI 17))
+   (clobber (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  operands [4] = alpha_use_linkage (operands [3], cfun->decl, 0, 1);
+  switch (which_alternative)
+    {
+    case 0:
+	return "lda $16,%0\;bis $31,%1,$17\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)";
+    case 1:
+	return "lda $16,%0\;lda $17,%1($31)\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+
+;; Subroutine of stack space allocation.  Perform a stack probe.
+(define_expand "probe_stack"
+  [(set (match_dup 1) (match_operand:DI 0 "const_int_operand" ""))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (DImode, plus_constant (stack_pointer_rtx,
+						    INTVAL (operands[0])));
+  MEM_VOLATILE_P (operands[1]) = 1;
+
+  operands[0] = const0_rtx;
+})
+
+;; This is how we allocate stack space.  If we are allocating a
+;; constant amount of space and we know it is less than 4096
+;; bytes, we need do nothing.
+;;
+;; If it is more than 4096 bytes, we need to probe the stack
+;; periodically.
+(define_expand "allocate_stack"
+  [(set (reg:DI 30)
+	(plus:DI (reg:DI 30)
+		 (match_operand:DI 1 "reg_or_cint_operand" "")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_dup 2))]
+  ""
+{
+  if (CONST_INT_P (operands[1])
+      && INTVAL (operands[1]) < 32768)
+    {
+      if (INTVAL (operands[1]) >= 4096)
+	{
+	  /* We do this the same way as in the prologue and generate explicit
+	     probes.  Then we update the stack by the constant.  */
+
+	  int probed = 4096;
+
+	  emit_insn (gen_probe_stack (GEN_INT (- probed)));
+	  while (probed + 8192 < INTVAL (operands[1]))
+	    emit_insn (gen_probe_stack (GEN_INT (- (probed += 8192))));
+
+	  if (probed + 4096 < INTVAL (operands[1]))
+	    emit_insn (gen_probe_stack (GEN_INT (- INTVAL(operands[1]))));
+	}
+
+      operands[1] = GEN_INT (- INTVAL (operands[1]));
+      operands[2] = virtual_stack_dynamic_rtx;
+    }
+  else
+    {
+      rtx out_label = 0;
+      rtx loop_label = gen_label_rtx ();
+      rtx want = gen_reg_rtx (Pmode);
+      rtx tmp = gen_reg_rtx (Pmode);
+      rtx memref, test;
+
+      emit_insn (gen_subdi3 (want, stack_pointer_rtx,
+			     force_reg (Pmode, operands[1])));
+
+      if (!CONST_INT_P (operands[1]))
+	{
+	  rtx limit = GEN_INT (4096);
+	  out_label = gen_label_rtx ();
+	  test = gen_rtx_LTU (VOIDmode, operands[1], limit);
+	  emit_jump_insn
+	    (gen_cbranchdi4 (test, operands[1], limit, out_label));
+	}
+
+      emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096)));
+      emit_label (loop_label);
+      memref = gen_rtx_MEM (DImode, tmp);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_move_insn (memref, const0_rtx);
+      emit_insn (gen_adddi3 (tmp, tmp, GEN_INT(-8192)));
+      test = gen_rtx_GTU (VOIDmode, tmp, want);
+      emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label));
+
+      memref = gen_rtx_MEM (DImode, want);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_move_insn (memref, const0_rtx);
+
+      if (out_label)
+	emit_label (out_label);
+
+      emit_move_insn (stack_pointer_rtx, want);
+      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+})
+
+;; This is used by alpha_expand_prolog to do the same thing as above,
+;; except we cannot at that time generate new basic blocks, so we hide
+;; the loop in this one insn.
+
+(define_insn "prologue_stack_probe_loop"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+		     (match_operand:DI 1 "register_operand" "r")]
+		    UNSPECV_PSPL)]
+  ""
+{
+  operands[2] = gen_label_rtx ();
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+			     CODE_LABEL_NUMBER (operands[2]));
+
+  return "stq $31,-8192(%1)\;subq %0,1,%0\;lda %1,-8192(%1)\;bne %0,%l2";
+}
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+{
+  alpha_expand_prologue ();
+  DONE;
+})
+
+;; These take care of emitting the ldgp insn in the prologue. This will be
+;; an lda/ldah pair and we want to align them properly.  So we have two
+;; unspec_volatile insns, the first of which emits the ldgp assembler macro
+;; and the second of which emits nothing.  However, both are marked as type
+;; IADD (the default) so the alignment code in alpha.c does the right thing
+;; with them.
+
+(define_expand "prologue_ldgp"
+  [(set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))]
+  ""
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 27);
+  operands[2] = (TARGET_EXPLICIT_RELOCS
+		 ? GEN_INT (alpha_next_sequence_number++)
+		 : const0_rtx);
+})
+
+(define_insn "*ldgp_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "")]
+			    UNSPECV_LDGP1))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "ldah %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand" "")]
+		   UNSPEC_LDGP2))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "lda %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "")]
+		   	    UNSPECV_PLDGP2))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "lda %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "")]
+			    UNSPECV_LDGP1))]
+  ""
+  "ldgp %0,0(%1)\n$%~..ng:"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand" "")]
+		   	    UNSPECV_PLDGP2))]
+  ""
+  "")
+
+;; The _mcount profiling hook has special calling conventions, and
+;; does not clobber all the registers that a normal call would.  So
+;; hide the fact this is a call at all.
+
+(define_insn "prologue_mcount"
+  [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)]
+  ""
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    /* Note that we cannot use a lituse_jsr reloc, since _mcount
+       cannot be called via the PLT.  */
+    return "ldq $28,_mcount($29)\t\t!literal\;jsr $28,($28),_mcount";
+  else
+    return "lda $28,_mcount\;jsr $28,($28),_mcount";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "init_fp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (match_operand:DI 1 "register_operand" "r"))
+   (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))]
+  ""
+  "bis $31,%1,%0")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  alpha_expand_epilogue ();
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  "TARGET_ABI_OSF"
+{
+  alpha_expand_epilogue ();
+  DONE;
+})
+
+(define_expand "builtin_longjmp"
+  [(use (match_operand:DI 0 "register_operand" "r"))]
+  "TARGET_ABI_OSF"
+{
+  /* The elements of the buffer are, in order:  */
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0], 8));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0], 16));
+  rtx pv = gen_rtx_REG (Pmode, 27);
+
+  /* This bit is the same as expand_builtin_longjmp.  */
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+  emit_move_insn (pv, lab);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* Load the label we are jumping through into $27 so that we know
+     where to look for it when we get back to setjmp's function for
+     restoring the gp.  */
+  emit_jump_insn (gen_builtin_longjmp_internal (pv));
+  emit_barrier ();
+  DONE;
+})
+
+;; This is effectively a copy of indirect_jump, but constrained such
+;; that register renaming cannot foil our cunning plan with $27.
+(define_insn "builtin_longjmp_internal"
+  [(set (pc)
+	(unspec_volatile [(match_operand:DI 0 "register_operand" "c")]
+			 UNSPECV_LONGJMP))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(unspec_volatile [(label_ref (match_operand 0 "" ""))] UNSPECV_SETJMPR)]
+  "TARGET_ABI_OSF"
+  "")
+
+(define_insn_and_split "*builtin_setjmp_receiver_1"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_SETJMPR)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    return "#";
+  else
+    return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)";
+}
+  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 1)
+	(unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1))
+   (set (match_dup 1)
+	(unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))]
+{
+  if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0))
+    emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]),
+					UNSPECV_SETJMPR_ER));
+  operands[1] = pic_offset_table_rtx;
+  operands[2] = gen_rtx_REG (Pmode, 27);
+  operands[3] = GEN_INT (alpha_next_sequence_number++);
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*builtin_setjmp_receiver_er_sl_1"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_SETJMPR_ER)]
+  "TARGET_ABI_OSF && TARGET_EXPLICIT_RELOCS && TARGET_AS_CAN_SUBTRACT_LABELS"
+  "lda $27,$LSJ%=-%l0($27)\n$LSJ%=:")
+  
+(define_insn "*builtin_setjmp_receiver_er_1"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_SETJMPR_ER)]
+  "TARGET_ABI_OSF && TARGET_EXPLICIT_RELOCS"
+  "br $27,$LSJ%=\n$LSJ%=:"
+  [(set_attr "type" "ibr")])
+
+;; When flag_reorder_blocks_and_partition is in effect, compiler puts
+;; exception landing pads in a cold section.  To prevent inter-section offset
+;; calculation, a jump to original landing pad is emitted in the place of the
+;; original landing pad.  Since landing pad is moved, RA-relative GP
+;; calculation in the prologue of landing pad breaks.  To solve this problem,
+;; we use alternative GP load approach, as in the case of TARGET_LD_BUGGY_LDGP.
+
+(define_expand "exception_receiver"
+  [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_LD_BUGGY_LDGP || flag_reorder_blocks_and_partition)
+    operands[0] = alpha_gp_save_rtx ();
+  else
+    operands[0] = const0_rtx;
+})
+
+(define_insn "*exception_receiver_2"
+  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)]
+  "TARGET_ABI_OSF 
+   && (TARGET_LD_BUGGY_LDGP || flag_reorder_blocks_and_partition)"
+  "ldq $29,%0"
+  [(set_attr "type" "ild")])
+
+(define_insn_and_split "*exception_receiver_1"
+  [(unspec_volatile [(const_int 0)] UNSPECV_EHR)]
+  "TARGET_ABI_OSF"
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    return "ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*";
+  else
+    return "ldgp $29,0($26)";
+}
+  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec:DI [(match_dup 0) (match_dup 2)] UNSPEC_LDGP2))]
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 26);
+  operands[2] = GEN_INT (alpha_next_sequence_number++);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+(define_expand "nonlocal_goto_receiver"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
+   (set (reg:DI 27) (mem:DI (reg:DI 29)))
+   (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
+   (use (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+  "")
+
+(define_insn "arg_home"
+  [(unspec [(const_int 0)] UNSPEC_ARG_HOME)
+   (use (reg:DI 1))
+   (use (reg:DI 25))
+   (use (reg:DI 16))
+   (use (reg:DI 17))
+   (use (reg:DI 18))
+   (use (reg:DI 19))
+   (use (reg:DI 20))
+   (use (reg:DI 21))
+   (use (reg:DI 48))
+   (use (reg:DI 49))
+   (use (reg:DI 50))
+   (use (reg:DI 51))
+   (use (reg:DI 52))
+   (use (reg:DI 53))
+   (clobber (mem:BLK (const_int 0)))
+   (clobber (reg:DI 24))
+   (clobber (reg:DI 25))
+   (clobber (reg:DI 0))]
+  "TARGET_ABI_OPEN_VMS"
+  "lda $0,OTS$HOME_ARGS\;ldq $0,8($0)\;jsr $0,OTS$HOME_ARGS"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+;; Load the CIW into r2 for calling __T3E_MISMATCH
+
+(define_expand "umk_mismatch_args"
+  [(set (match_dup 1) (mem:DI (plus:DI (reg:DI 15) (const_int -16))))
+   (set (match_dup 2) (mem:DI (plus:DI (match_dup 1) (const_int -32))))
+   (set (reg:DI 1) (match_operand:DI 0 "const_int_operand" ""))
+   (set (match_dup 3) (plus:DI (mult:DI (reg:DI 25)
+					(const_int 8))
+			       (match_dup 2)))
+   (set (reg:DI 2) (mem:DI (match_dup 3)))]
+  "TARGET_ABI_UNICOSMK"
+{
+  operands[1] = gen_reg_rtx (DImode);
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+})
+
+(define_insn "arg_home_umk"
+  [(unspec [(const_int 0)] UNSPEC_ARG_HOME)
+   (use (reg:DI 1))
+   (use (reg:DI 2))
+   (use (reg:DI 16))
+   (use (reg:DI 17))
+   (use (reg:DI 18))
+   (use (reg:DI 19))
+   (use (reg:DI 20))
+   (use (reg:DI 21))
+   (use (reg:DI 48))
+   (use (reg:DI 49))
+   (use (reg:DI 50))
+   (use (reg:DI 51))
+   (use (reg:DI 52))
+   (use (reg:DI 53))
+   (clobber (mem:BLK (const_int 0)))
+   (parallel [
+   (clobber (reg:DI 22))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 24))
+   (clobber (reg:DI 0))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (reg:DI 3))
+   (clobber (reg:DI 4))
+   (clobber (reg:DI 5))
+   (clobber (reg:DI 6))
+   (clobber (reg:DI 7))
+   (clobber (reg:DI 8))])]
+  "TARGET_ABI_UNICOSMK"
+  "laum $4,__T3E_MISMATCH($31)\;sll $4,32,$4\;lalm $4,__T3E_MISMATCH($4)\;lal $4,__T3E_MISMATCH($4)\;jsr $3,($4)"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+;; Prefetch data.  
+;;
+;; On EV4, these instructions are nops -- no load occurs.
+;;
+;; On EV5, these instructions act as a normal load, and thus can trap
+;; if the address is invalid.  The OS may (or may not) handle this in
+;; the entMM fault handler and suppress the fault.  If so, then this
+;; has the effect of a read prefetch instruction.
+;;
+;; On EV6, these become official prefetch instructions.
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  "TARGET_FIXUP_EV5_PREFETCH || alpha_cpu == PROCESSOR_EV6"
+{
+  /* Interpret "no temporal locality" as this data should be evicted once
+     it is used.  The "evict next" alternatives load the data into the cache
+     and leave the LRU eviction counter pointing to that block.  */
+  static const char * const alt[2][2] = {
+    { 
+      "ldq $31,%a0",		/* read, evict next */
+      "ldl $31,%a0",		/* read, evict last */
+    },
+    {
+      "ldt $f31,%a0",		/* write, evict next */
+      "lds $f31,%a0",		/* write, evict last */
+    }
+  };
+
+  bool write = INTVAL (operands[1]) != 0;
+  bool lru = INTVAL (operands[2]) != 0;
+
+  return alt[write][lru];
+}
+  [(set_attr "type" "ild")])
+
+;; Close the trap shadow of preceding instructions.  This is generated
+;; by alpha_reorg.
+
+(define_insn "trapb"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)]
+  ""
+  "trapb"
+  [(set_attr "type" "misc")])
+
+;; No-op instructions used by machine-dependent reorg to preserve
+;; alignment for instruction issue.
+;; The Unicos/Mk assembler does not support these opcodes.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "bis $31,$31,$31"
+  [(set_attr "type" "ilog")])
+
+(define_insn "fnop"
+  [(const_int 1)]
+  "TARGET_FP"
+  "cpys $f31,$f31,$f31"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "unop"
+  [(const_int 2)]
+  ""
+  "ldq_u $31,0($30)")
+
+;; On Unicos/Mk we use a macro for aligning code.
+
+(define_insn "realign"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
+		    UNSPECV_REALIGN)]
+  ""
+{
+  if (TARGET_ABI_UNICOSMK)
+    return "gcc@code@align %0";
+  else
+    return ".align %0 #realign";
+})
+
+;; Instructions to be emitted from __builtins.
+
+(define_insn "builtin_cmpbge"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_CMPBGE))]
+  ""
+  "cmpbge %r1,%2,%0"
+  ;; The EV6 data sheets list this as ILOG.  OTOH, EV6 doesn't 
+  ;; actually differentiate between ILOG and ICMP in the schedule.
+  [(set_attr "type" "icmp")])
+
+(define_expand "builtin_extbl"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extxl_be;
+  else
+    gen = gen_extxl_le;
+  emit_insn ((*gen) (operands[0], operands[1], GEN_INT (8), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_extwl"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extxl_be;
+  else
+    gen = gen_extxl_le;
+  emit_insn ((*gen) (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_extll"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extxl_be;
+  else
+    gen = gen_extxl_le;
+  emit_insn ((*gen) (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_extql"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extxl_be;
+  else
+    gen = gen_extxl_le;
+  emit_insn ((*gen) (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_extwh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extwh_be;
+  else
+    gen = gen_extwh_le;
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_extlh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extlh_be;
+  else
+    gen = gen_extlh_le;
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_extqh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_extqh_be;
+  else
+    gen = gen_extqh_le;
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insbl"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_insbl_be;
+  else
+    gen = gen_insbl_le;
+  operands[1] = gen_lowpart (QImode, operands[1]);
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_inswl"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_inswl_be;
+  else
+    gen = gen_inswl_le;
+  operands[1] = gen_lowpart (HImode, operands[1]);
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insll"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_insll_be;
+  else
+    gen = gen_insll_le;
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insql"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx);
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_insql_be;
+  else
+    gen = gen_insql_le;
+  emit_insn ((*gen) (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_inswh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_inslh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insqh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_mskbl"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx mask;
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_mskxl_be;
+  else
+    gen = gen_mskxl_le;
+  mask = GEN_INT (0xff);
+  emit_insn ((*gen) (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_mskwl"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx mask;
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_mskxl_be;
+  else
+    gen = gen_mskxl_le;
+  mask = GEN_INT (0xffff);
+  emit_insn ((*gen) (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_mskll"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx mask;
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_mskxl_be;
+  else
+    gen = gen_mskxl_le;
+  mask = immed_double_const (0xffffffff, 0, DImode);
+  emit_insn ((*gen) (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_mskql"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx mask;
+  if (WORDS_BIG_ENDIAN)
+    gen = gen_mskxl_be;
+  else
+    gen = gen_mskxl_le;
+  mask = constm1_rtx;
+  emit_insn ((*gen) (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_mskwh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_msklh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_mskqh"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "register_operand" "")
+   (match_operand:DI 2 "reg_or_8bit_operand" "")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_zap"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (unspec:DI
+		  [(match_operand:DI 2 "reg_or_cint_operand" "")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand" "")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx mask = alpha_expand_zap_mask (INTVAL (operands[2]));
+
+      if (mask == const0_rtx)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      if (mask == constm1_rtx)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_insn "*builtin_zap_1"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))]
+  ""
+  "@
+   #
+   #
+   bis $31,$31,%0
+   zap %r1,%2,%0"
+  [(set_attr "type" "shift,shift,ilog,shift")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "const_int_operand" "")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "const_int_operand" "")))]
+  ""
+  [(const_int 0)]
+{
+  rtx mask = alpha_expand_zap_mask (INTVAL (operands[2]));
+  if (HOST_BITS_PER_WIDE_INT >= 64 || CONST_INT_P (mask))
+    operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode);
+  else
+    {
+      HOST_WIDE_INT c_lo = INTVAL (operands[1]);
+      HOST_WIDE_INT c_hi = (c_lo < 0 ? -1 : 0);
+      operands[1] = immed_double_const (c_lo & CONST_DOUBLE_LOW (mask),
+					c_hi & CONST_DOUBLE_HIGH (mask),
+					DImode);
+    }
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "const_int_operand" "")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "register_operand" "")))]
+  ""
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 2)))]
+{
+  operands[2] = alpha_expand_zap_mask (INTVAL (operands[2]));
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      DONE;
+    }
+  if (operands[2] == constm1_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_expand "builtin_zapnot"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (unspec:DI
+		  [(not:QI (match_operand:DI 2 "reg_or_cint_operand" ""))]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand" "")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx mask = alpha_expand_zap_mask (~ INTVAL (operands[2]));
+
+      if (mask == const0_rtx)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      if (mask == constm1_rtx)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_insn "*builtin_zapnot_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (unspec:DI
+                  [(not:QI (match_operand:QI 2 "register_operand" "r"))]
+                  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  ""
+  "zapnot %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "builtin_amask"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_AMASK))]
+  ""
+  "amask %1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "builtin_implver"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec:DI [(const_int 0)] UNSPEC_IMPLVER))]
+  ""
+  "implver %0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "builtin_rpcc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))]
+  ""
+  "rpcc %0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "builtin_minub8"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minsb8"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minuw4"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minsw4"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxub8"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxsb8"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxuw4"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxsw4"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:DI 1 "reg_or_0_operand" "")
+   (match_operand:DI 2 "reg_or_0_operand" "")]
+  "TARGET_MAX"
+{
+  alpha_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "builtin_perr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rJ")]
+		   UNSPEC_PERR))]
+  "TARGET_MAX"
+  "perr %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_pklb"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(vec_concat:V8QI
+	  (vec_concat:V4QI
+	    (truncate:V2QI (match_operand:DI 1 "register_operand" ""))
+	    (match_dup 2))
+	  (match_dup 3)))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V8QImode, operands[0]);
+  operands[1] = gen_lowpart (V2SImode, operands[1]);
+  operands[2] = CONST0_RTX (V2QImode);
+  operands[3] = CONST0_RTX (V4QImode);
+})
+
+(define_insn "*pklb"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	  (vec_concat:V4QI
+	    (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r"))
+	    (match_operand:V2QI 2 "const0_operand" ""))
+	  (match_operand:V4QI 3 "const0_operand" "")))]
+  "TARGET_MAX"
+  "pklb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_pkwb"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(vec_concat:V8QI
+	  (truncate:V4QI (match_operand:DI 1 "register_operand" ""))
+	  (match_dup 2)))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V8QImode, operands[0]);
+  operands[1] = gen_lowpart (V4HImode, operands[1]);
+  operands[2] = CONST0_RTX (V4QImode);
+})
+
+(define_insn "*pkwb"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	  (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r"))
+	  (match_operand:V4QI 2 "const0_operand" "")))]
+  "TARGET_MAX"
+  "pkwb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_unpkbl"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:V2SI
+	  (vec_select:V2QI (match_operand:DI 1 "register_operand" "")
+			   (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V2SImode, operands[0]);
+  operands[1] = gen_lowpart (V8QImode, operands[1]);
+})
+
+(define_insn "*unpkbl"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(zero_extend:V2SI
+	  (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+			   (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_MAX"
+  "unpkbl %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_unpkbw"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:V4HI
+	  (vec_select:V4QI (match_operand:DI 1 "register_operand" "")
+			   (parallel [(const_int 0)
+				      (const_int 1)
+				      (const_int 2)
+				      (const_int 3)]))))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V4HImode, operands[0]);
+  operands[1] = gen_lowpart (V8QImode, operands[1]);
+})
+
+(define_insn "*unpkbw"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(zero_extend:V4HI
+	  (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+			   (parallel [(const_int 0)
+				      (const_int 1)
+				      (const_int 2)
+				      (const_int 3)]))))]
+  "TARGET_MAX"
+  "unpkbw %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(include "sync.md")
+
+;; The call patterns are at the end of the file because their
+;; wildcard operand0 interferes with nice recognition.
+
+(define_insn "*call_value_osf_1_er_noreturn"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,%1\t\t!samegp
+   ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),%1\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_value_osf_1_er"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,(%1),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%1\t\t!samegp
+   ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.  Consider the case of { bar(); while (1); }.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand:DI 1 "call_operand" ""))
+		         (match_operand 2 "" "")))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[1], Pmode)
+   && (peep2_regno_dead_p (1, 29)
+       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (match_dup 2)))
+	      (use (reg:DI 29))
+	      (use (match_dup 1))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 26))])]
+{
+  if (CONSTANT_P (operands[1]))
+    {
+      operands[3] = gen_rtx_REG (Pmode, 27);
+      operands[4] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
+				      operands[1], operands[4]));
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[1] = const0_rtx;
+      operands[4] = const0_rtx;
+    }
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:DI (match_operand:DI 1 "call_operand" ""))
+		         (match_operand 2 "" "")))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed
+   && ! samegp_function_operand (operands[1], Pmode)
+   && ! (peep2_regno_dead_p (1, 29)
+         || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (match_dup 2)))
+	      (set (match_dup 6)
+		   (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (match_dup 5))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 6)
+	(unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  if (CONSTANT_P (operands[1]))
+    {
+      operands[3] = gen_rtx_REG (Pmode, 27);
+      operands[5] = GEN_INT (alpha_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
+				      operands[1], operands[5]));
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[1] = const0_rtx;
+      operands[5] = const0_rtx;
+    }
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  operands[6] = pic_offset_table_rtx;
+})
+
+(define_insn "*call_value_osf_2_er_nogp"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 29))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%1),%3%J4"
+  [(set_attr "type" "jsr")])
+
+(define_insn "*call_value_osf_2_er"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
+	      (match_operand 2 "" "")))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand" "")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "jsr $26,(%1),%3%J4\;ldah $29,0($26)\t\t!gpdisp!%5"
+  [(set_attr "type" "jsr")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_osf_1_noreturn"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   jsr $26,($27),0
+   bsr $26,$%1..ng
+   jsr $26,%1"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn_and_split "call_value_osf_tlsgd"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" ""))
+	      (const_int 0)))
+   (unspec [(match_operand:DI 2 "const_int_operand" "")] UNSPEC_TLSGD_CALL)
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "HAVE_AS_TLS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(unspec:DI [(match_dup 5)
+		    (match_dup 1)
+		    (match_dup 2)] UNSPEC_LITERAL))
+   (parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (const_int 0)))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  operands[3] = gen_rtx_REG (Pmode, 27);
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "call_value_osf_tlsldm"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" ""))
+	      (const_int 0)))
+   (unspec [(match_operand:DI 2 "const_int_operand" "")] UNSPEC_TLSLDM_CALL)
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "HAVE_AS_TLS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(unspec:DI [(match_dup 5)
+		    (match_dup 1)
+		    (match_dup 2)] UNSPEC_LITERAL))
+   (parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (const_int 0)))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  operands[3] = gen_rtx_REG (Pmode, 27);
+  operands[4] = GEN_INT (alpha_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn "*call_value_osf_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   jsr $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%1..ng
+   jsr $26,%1\;ldgp $29,0($26)"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*sibcall_value_osf_1_er"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
+	      (match_operand 2 "" "")))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,%1\t\t!samegp
+   ldq $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+(define_insn "*sibcall_value_osf_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
+	      (match_operand 2 "" "")))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF"
+  "@
+   br $31,$%1..ng
+   lda $27,%1\;jmp $31,($27),%1"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,8")])
+
+(define_insn "*call_value_nt_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "r,R,s"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI 26))]
+  "TARGET_ABI_WINDOWS_NT"
+  "@
+   jsr $26,(%1)
+   bsr $26,%1
+   jsr $26,%1"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,12")])
+
+; GAS relies on the order and position of instructions output below in order
+; to generate relocs for VMS link to potentially optimize the call.
+; Please do not molest.
+(define_insn "*call_value_vms_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "r,s"))
+	      (match_operand 2 "" "")))
+   (use (match_operand:DI 3 "nonmemory_operand" "r,n"))
+   (use (reg:DI 25))
+   (use (reg:DI 26))
+   (clobber (reg:DI 27))]
+  "TARGET_ABI_OPEN_VMS"
+{
+  switch (which_alternative)
+    {
+    case 0:
+   	return "mov %3,$27\;jsr $26,0\;ldq $27,0($29)";
+    case 1:
+	operands [3] = alpha_use_linkage (operands [1], cfun->decl, 1, 0);
+	operands [4] = alpha_use_linkage (operands [1], cfun->decl, 0, 0);
+   	return "ldq $26,%4\;ldq $27,%3\;jsr $26,%1\;ldq $27,0($29)";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,16")])
+
+(define_insn "*call_value_umk"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:DI 25))
+   (clobber (reg:DI 26))]
+  "TARGET_ABI_UNICOSMK"
+  "jsr $26,(%1)"
+  [(set_attr "type" "jsr")])
diff --git a/gcc/config/alpha/alpha.opt b/gcc/config/alpha/alpha.opt
new file mode 100644
index 000000000..fb7db3775
--- /dev/null
+++ b/gcc/config/alpha/alpha.opt
@@ -0,0 +1,134 @@
+; Options for the DEC Alpha port of the compiler
+;
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msoft-float
+Target Report Mask(SOFT_FP)
+Do not use hardware fp
+
+mfp-regs
+Target Report Mask(FPREGS)
+Use fp registers
+
+mgas
+Target RejectNegative Mask(GAS)
+Assume GAS
+
+malpha-as
+Target RejectNegative InverseMask(GAS)
+Do not assume GAS
+
+mieee-conformant
+Target RejectNegative Mask(IEEE_CONFORMANT)
+Request IEEE-conformant math library routines (OSF/1)
+
+mieee
+Target Report RejectNegative Mask(IEEE)
+Emit IEEE-conformant code, without inexact exceptions
+
+mieee-with-inexact
+Target Report RejectNegative Mask(IEEE_WITH_INEXACT)
+
+mbuild-constants
+Target Report Mask(BUILD_CONSTANTS)
+Do not emit complex integer constants to read-only memory
+
+mfloat-vax
+Target Report RejectNegative Mask(FLOAT_VAX)
+Use VAX fp
+
+mfloat-ieee
+Target Report RejectNegative InverseMask(FLOAT_VAX)
+Do not use VAX fp
+
+mbwx
+Target Report Mask(BWX)
+Emit code for the byte/word ISA extension
+
+mmax
+Target Report Mask(MAX)
+Emit code for the motion video ISA extension
+
+mfix
+Target Report Mask(FIX)
+Emit code for the fp move and sqrt ISA extension
+
+mcix
+Target Report Mask(CIX)
+Emit code for the counting ISA extension
+
+mexplicit-relocs
+Target Report Mask(EXPLICIT_RELOCS)
+Emit code using explicit relocation directives
+
+msmall-data
+Target Report RejectNegative Mask(SMALL_DATA)
+Emit 16-bit relocations to the small data areas
+
+mlarge-data
+Target Report RejectNegative InverseMask(SMALL_DATA)
+Emit 32-bit relocations to the small data areas
+
+msmall-text
+Target Report RejectNegative Mask(SMALL_TEXT)
+Emit direct branches to local functions
+
+mlarge-text
+Target Report RejectNegative InverseMask(SMALL_TEXT)
+Emit indirect branches to local functions
+
+mtls-kernel
+Target Report Mask(TLS_KERNEL)
+Emit rdval instead of rduniq for thread pointer
+
+mlong-double-128
+Target Report RejectNegative Mask(LONG_DOUBLE_128)
+Use 128-bit long double
+
+mlong-double-64
+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double
+
+mcpu=
+Target RejectNegative Joined Var(alpha_cpu_string)
+Use features of and schedule given CPU
+
+mtune=
+Target RejectNegative Joined Var(alpha_tune_string)
+Schedule given CPU
+
+mfp-rounding-mode=
+Target RejectNegative Joined Var(alpha_fprm_string)
+Control the generated fp rounding mode
+
+mfp-trap-mode=
+Target RejectNegative Joined Var(alpha_fptm_string)
+Control the IEEE trap mode
+
+mtrap-precision=
+Target RejectNegative Joined Var(alpha_tp_string)
+Control the precision given to fp exceptions
+
+mmemory-latency=
+Target RejectNegative Joined Var(alpha_mlat_string)
+Tune expected memory latency
+
+mtls-size=
+Target RejectNegative Joined UInteger Var(alpha_tls_size) Init(32)
+Specify bit size of immediate TLS offsets
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
new file mode 100644
index 000000000..3e9a88776
--- /dev/null
+++ b/gcc/config/alpha/constraints.md
@@ -0,0 +1,121 @@
+;; Constraint definitions for DEC Alpha.
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;    ABCDEF               V  YZ
+;;;       de ghijklmnopq stu wxyz
+
+;; Integer register constraints.
+
+(define_register_constraint "a" "R24_REG"
+ "General register 24, input to division routine")
+
+(define_register_constraint "b" "R25_REG"
+ "General register 24, input to division routine")
+
+(define_register_constraint "c" "R27_REG"
+ "General register 27, function call address")
+
+(define_register_constraint "f" "FLOAT_REGS"
+ "Any floating-point register")
+
+(define_register_constraint "v" "R0_REG"
+ "General register 0, function value return address")
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "An unsigned 8 bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "J"
+  "The constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "Signed 16-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "L"
+  "A shifted signed 16-bit constant appropriate for LDAH"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0
+		    && (ival >> 31 == -1 || ival >> 31 == 0)")))
+
+(define_constraint "M"
+  "A valid operand of a ZAP insn"
+  (and (match_code "const_int")
+       (match_test "zap_mask (ival) != 0")))
+
+(define_constraint "N"
+  "A complemented unsigned 8-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (~ival, 0, 255)")))
+
+(define_constraint "O"
+  "A negated unsigned 8-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (-ival, 0, 255)")))
+
+(define_constraint "P"
+  "The constant 1, 2 or 3"
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 2 || ival == 3")))
+
+(define_constraint "H"
+  "A valid operand of a ZAP insn, when building with 32-bit HOST_WIDE_INT"
+  (and (match_code "const_double")
+       (match_test "mode == VOIDmode && zap_mask (hval) && zap_mask (lval)")))
+
+;; Floating-point constant constraints.
+(define_constraint "G"
+  "The floating point zero constant"
+  (and (match_code "const_double")
+       (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT
+		    && op == CONST0_RTX (mode)")))
+
+;; "Extra" constraints.
+(define_constraint "Q"
+  "@internal A normal_memory_operand"
+  (match_operand 0 "normal_memory_operand"))
+
+(define_constraint "R"
+  "@internal A direct_call_operand"
+  (match_operand:DI 0 "direct_call_operand"))
+
+(define_constraint "S"
+  "An unsigned 6-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "T"
+  "@internal A high-part symbol"
+  (match_code "high"))
+
+(define_constraint "U"
+  "@internal A UNICOSMK symbol"
+  (and (match_test "TARGET_ABI_UNICOSMK")
+       (match_operand 0 "symbolic_operand")))
+
+(define_constraint "W"
+  "A vector zero constant"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
diff --git a/gcc/config/alpha/crtfastmath.c b/gcc/config/alpha/crtfastmath.c
new file mode 100644
index 000000000..677b9edfa
--- /dev/null
+++ b/gcc/config/alpha/crtfastmath.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2001, 2009 Free Software Foundation, Inc.
+ * Contributed by Richard Henderson (rth@redhat.com)
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* Assume OSF/1 compatible interfaces.  */
+
+extern void __ieee_set_fp_control (unsigned long int);
+
+#define IEEE_MAP_DMZ  (1UL<<12)       /* Map denorm inputs to zero */
+#define IEEE_MAP_UMZ  (1UL<<13)       /* Map underflowed outputs to zero */
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+  __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ);
+}
diff --git a/gcc/config/alpha/driver-alpha.c b/gcc/config/alpha/driver-alpha.c
new file mode 100644
index 000000000..d787886d1
--- /dev/null
+++ b/gcc/config/alpha/driver-alpha.c
@@ -0,0 +1,100 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Arthur Loiret <aloiret@debian.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "cpu" or "tune" as argument depending on if -mcpu=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-mcpu=ev6" on an Alpha 21264 for
+   -mcpu=native.  If the routine can't detect a known processor,
+   the -mcpu or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu = NULL;
+  char buf[128];
+  FILE *f;
+
+  static const struct cpu_names {
+   const char *const name;
+   const char *const cpu;
+  } cpu_names[] = {
+    { "EV79",	"ev67" },
+    { "EV7",	"ev67" },
+    { "EV69",	"ev67" },
+    { "EV68CX",	"ev67" },
+    { "EV68CB",	"ev67" },
+    { "EV68AL",	"ev67" },
+    { "EV67",	"ev67" },
+    { "EV6",	"ev6" },
+    { "PCA57",	"pca56" },
+    { "PCA56",	"pca56" },
+    { "EV56",	"ev56" },
+    { "EV5",	"ev5" },
+    { "LCA45",	"ev45" },
+    { "EV45",	"ev45" },
+    { "LCA4",	"ev4" },
+    { "EV4",	"ev4" },
+/*  { "EV3",	"ev3" },  */
+    { 0, 0 }
+  };
+
+  int i;
+
+  if (argc < 1)
+    return NULL;
+
+  if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune"))
+    return NULL;
+
+  f = fopen ("/proc/cpuinfo", "r");
+  if (f == NULL)
+    return NULL;
+
+  while (fgets (buf, sizeof (buf), f) != NULL)
+    if (strncmp (buf, "cpu model", sizeof ("cpu model") - 1) == 0)
+      {
+        for (i = 0; cpu_names [i].name; i++)
+          if (strstr (buf, cpu_names [i].name) != NULL)
+	    {
+	      cpu = cpu_names [i].cpu;
+	      break;
+	    }
+	break;
+      }
+
+  fclose (f);
+
+  if (cpu == NULL)
+    return NULL;
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
diff --git a/gcc/config/alpha/elf.h b/gcc/config/alpha/elf.h
new file mode 100644
index 000000000..0293f26ee
--- /dev/null
+++ b/gcc/config/alpha/elf.h
@@ -0,0 +1,452 @@
+/* Definitions of target machine for GNU compiler, for DEC Alpha w/ELF.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+   Contributed by Richard Henderson (rth@tamu.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef OBJECT_FORMAT_COFF
+#undef EXTENDED_COFF
+#define OBJECT_FORMAT_ELF
+
+/* ??? Move all SDB stuff from alpha.h to osf.h.  */
+#undef SDB_DEBUGGING_INFO
+
+#define DBX_DEBUGGING_INFO 1
+#define DWARF2_DEBUGGING_INFO 1
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#undef ASM_FINAL_SPEC
+
+/* alpha/ doesn't use elfos.h for some reason.  */
+#define TARGET_OBJFMT_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__ELF__");		\
+    }						\
+  while (0)
+
+#undef  CC1_SPEC
+#define CC1_SPEC  "%{G*}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC  "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug}"
+
+#undef  IDENT_ASM_OP
+#define IDENT_ASM_OP "\t.ident\t"
+
+/* Output #ident as a .ident.  */
+#undef  ASM_OUTPUT_IDENT
+#define ASM_OUTPUT_IDENT(FILE, NAME) \
+  fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME);
+
+/* This is how to allocate empty space in some section.  The .zero
+   pseudo-op is used for this on most svr4 assemblers.  */
+
+#undef  SKIP_ASM_OP
+#define SKIP_ASM_OP	"\t.zero\t"
+
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+  fprintf (FILE, "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n", SKIP_ASM_OP, (SIZE))
+
+/* Output the label which precedes a jumptable.  Note that for all svr4
+   systems where we actually generate jumptables (which is to say every
+   svr4 target except i386, where we use casesi instead) we put the jump-
+   tables into the .rodata section and since other stuff could have been
+   put into the .rodata section prior to any given jumptable, we have to
+   make sure that the location counter for the .rodata section gets pro-
+   perly re-aligned prior to the actual beginning of the jump table.  */
+
+#undef  ALIGN_ASM_OP
+#define ALIGN_ASM_OP "\t.align\t"
+
+#ifndef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), 2);
+#endif
+
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)		\
+  do {									\
+    ASM_OUTPUT_BEFORE_CASE_LABEL (FILE, PREFIX, NUM, JUMPTABLE)		\
+    (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM);			\
+  } while (0)
+
+/* The standard SVR4 assembler seems to require that certain builtin
+   library routines (e.g. .udiv) be explicitly declared as .globl
+   in each assembly file where they are referenced.  */
+
+#undef  ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)				\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0))
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#undef  COMMON_ASM_OP
+#define COMMON_ASM_OP	"\t.comm\t"
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+  assemble_name ((FILE), (NAME));					\
+  fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", (SIZE), (ALIGN) / BITS_PER_UNIT);	\
+} while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+  if (!flag_inhibit_size_directive)					\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+  ASM_OUTPUT_LABEL(FILE, NAME);						\
+  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);				\
+} while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  */
+
+#undef  ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+do {									\
+  ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+/* The biggest alignment supported by ELF in bits. 32-bit ELF 
+   supports section alignment up to (0x80000000 * 8), while 
+   64-bit ELF supports (0x8000000000000000 * 8). If this macro 
+   is not defined, the default is the largest alignment supported 
+   by 32-bit ELF and representable on a 32-bit host. Use this
+   macro to limit the alignment which can be specified using
+   the `__attribute__ ((aligned (N)))' construct.
+
+   This value is really 2^63.  Since gcc figures the alignment in bits,
+   we could only potentially get to 2^60 on suitable hosts.  Due to other
+   considerations in varasm, we must restrict this to what fits in an int.  */
+
+#undef  MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (((unsigned int) 1 << 28) * 8)
+
+/* This is the pseudo-op used to generate a contiguous sequence of byte
+   values from a double-quoted string WITHOUT HAVING A TERMINATING NUL
+   AUTOMATICALLY APPENDED.  This is the same for most svr4 assemblers.  */
+
+#undef  ASCII_DATA_ASM_OP
+#define ASCII_DATA_ASM_OP	"\t.ascii\t"
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rodata"
+#undef  BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#undef  SBSS_SECTION_ASM_OP
+#define SBSS_SECTION_ASM_OP	"\t.section\t.sbss,\"aw\""
+#undef  SDATA_SECTION_ASM_OP
+#define SDATA_SECTION_ASM_OP	"\t.section\t.sdata,\"aw\""
+
+/* On svr4, we *do* have support for the .init and .fini sections, and we
+   can put stuff in there to be executed before and after `main'.  We let
+   crtstuff.c and other files know this by defining the following symbols.
+   The definitions say how to change sections to the .init and .fini
+   sections.  This is the same for all known svr4 assemblers.  */
+
+#undef  INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section\t.init"
+#undef  FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini"
+
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+
+#define ASM_SECTION_START_OP	"\t.subsection\t-1"
+
+/* Output assembly directive to move to the beginning of current section.  */
+#define ASM_OUTPUT_SECTION_START(FILE)	\
+  fprintf ((FILE), "%s\n", ASM_SECTION_START_OP)
+
+#endif
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+#define TARGET_ASM_SELECT_SECTION  default_elf_select_section
+
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#undef  TYPE_ASM_OP
+#define TYPE_ASM_OP	"\t.type\t"
+#undef  SIZE_ASM_OP
+#define SIZE_ASM_OP	"\t.size\t"
+
+/* This is how we tell the assembler that a symbol is weak.  */
+
+#undef  ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE, NAME) \
+  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+       fputc ('\n', FILE); } while (0)
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#undef  ASM_OUTPUT_DEF
+#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME)			\
+  do {								\
+    assemble_name(FILE, ALIAS);					\
+    fputs(" = ", FILE);						\
+    assemble_name(FILE, NAME);					\
+    fputc('\n', FILE);						\
+  } while (0)
+
+#undef  ASM_OUTPUT_DEF_FROM_DECLS
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)		\
+  do {								\
+    const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0);	\
+    const char *name = IDENTIFIER_POINTER (TARGET);		\
+    if (TREE_CODE (DECL) == FUNCTION_DECL)			\
+      {								\
+	fputc ('$', FILE);					\
+	assemble_name (FILE, alias);				\
+	fputs ("..ng = $", FILE);				\
+	assemble_name (FILE, name);				\
+	fputs ("..ng\n", FILE);					\
+      }								\
+    assemble_name(FILE, alias);					\
+    fputs(" = ", FILE);						\
+    assemble_name(FILE, name);					\
+    fputc('\n', FILE);						\
+  } while (0)
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#undef  TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+
+/* Write the extra assembler code needed to declare a function's result.
+   Most svr4 assemblers don't require any special declaration of the
+   result value, but there are exceptions.  */
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries in an ELF object file under SVR4.  These macros also output
+   the starting labels for the relevant functions/objects.  */
+
+/* Write the extra assembler code needed to declare an object properly.  */
+
+#ifdef HAVE_GAS_GNU_UNIQUE_OBJECT
+#define USE_GNU_UNIQUE_OBJECT 1
+#else
+#define USE_GNU_UNIQUE_OBJECT 0
+#endif
+
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
+  do {									\
+    HOST_WIDE_INT size;							\
+    									\
+    /* For template static data member instantiations or		\
+       inline fn local statics and their guard variables, use		\
+       gnu_unique_object so that they will be combined even under	\
+       RTLD_LOCAL.  Don't use gnu_unique_object for typeinfo,		\
+       vtables and other read-only artificial decls.  */		\
+    if (USE_GNU_UNIQUE_OBJECT	&& DECL_ONE_ONLY (DECL)			\
+	&& (!DECL_ARTIFICIAL (DECL) || !TREE_READONLY (DECL)))		\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "gnu_unique_object");	\
+    else								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+    									\
+    size_directive_output = 0;						\
+    if (!flag_inhibit_size_directive					\
+	&& (DECL) && DECL_SIZE (DECL))					\
+      {									\
+	size_directive_output = 1;					\
+	size = int_size_in_bytes (TREE_TYPE (DECL));			\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);			\
+      }									\
+    									\
+    ASM_OUTPUT_LABEL (FILE, NAME);					\
+  } while (0)
+
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set
+   by ASM_DECLARE_OBJECT_NAME when it was run for the same decl.  */
+
+#undef  ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	\
+  do {									\
+    const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		\
+    HOST_WIDE_INT size;							\
+    if (!flag_inhibit_size_directive					\
+	&& DECL_SIZE (DECL)						\
+	&& ! AT_END && TOP_LEVEL					\
+	&& DECL_INITIAL (DECL) == error_mark_node			\
+	&& !size_directive_output					\
+	&& (size = int_size_in_bytes (TREE_TYPE (DECL))) > 0)		\
+      {									\
+	size_directive_output = 1;					\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			\
+      }									\
+  } while (0)
+
+/* A table of bytes codes used by the ASM_OUTPUT_ASCII and
+   ASM_OUTPUT_LIMITED_STRING macros.  Each byte in the table
+   corresponds to a particular byte value [0..255].  For any
+   given byte value, if the value in the corresponding table
+   position is zero, the given character can be output directly.
+   If the table value is 1, the byte must be output as a \ooo
+   octal escape.  If the tables value is anything else, then the
+   byte value should be output as a \ followed by the value
+   in the table.  Note that we can use standard UN*X escape
+   sequences for many control characters, but we don't use
+   \a to represent BEL because some svr4 assemblers (e.g. on
+   the i386) don't know about that.  Also, we don't use \v
+   since some versions of gas, such as 2.2 did not accept it.  */
+
+#undef  ESCAPES
+#define ESCAPES \
+"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
+
+/* Some svr4 assemblers have a limit on the number of characters which
+   can appear in the operand of a .string directive.  If your assembler
+   has such a limitation, you should define STRING_LIMIT to reflect that
+   limit.  Note that at least some svr4 assemblers have a limit on the
+   actual number of bytes in the double-quoted string, and that they
+   count each character in an escape sequence as one byte.  Thus, an
+   escape sequence like \377 would count as four bytes.
+
+   If your target assembler doesn't support the .string directive, you
+   should define this to zero.  */
+
+#undef  STRING_LIMIT
+#define STRING_LIMIT	((unsigned) 256)
+#undef  STRING_ASM_OP
+#define STRING_ASM_OP	"\t.string\t"
+
+/* GAS is the only Alpha/ELF assembler.  */
+#undef  TARGET_GAS
+#define TARGET_GAS	(1)
+
+/* Provide a STARTFILE_SPEC appropriate for ELF.  Here we add the
+   (even more) magical crtbegin.o file which provides part of the
+   support for getting C++ file-scope static object constructed
+   before entering `main'.  */
+
+#undef	STARTFILE_SPEC
+#ifdef HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+/* Provide a ENDFILE_SPEC appropriate for ELF.  Here we tack on the
+   magical crtend.o file which provides part of the support for
+   getting C++ file-scope static object constructed before entering
+   `main', followed by a normal ELF "finalizer" file, `crtn.o'.  */
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   Since application size is already constrained to <2GB by the form of
+   the ldgp relocation, we can use a 32-bit pc-relative relocation to
+   static data.  Dynamic data is accessed indirectly to allow for read
+   only EH sections.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)       \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+
+/* If defined, a C statement to be executed just prior to the output of
+   assembler code for INSN.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
+ (alpha_this_literal_sequence_number = 0,		\
+  alpha_this_gpdisp_sequence_number = 0)
+extern int alpha_this_literal_sequence_number;
+extern int alpha_this_gpdisp_sequence_number;
+
+/* Since the bits of the _init and _fini function is spread across
+   many object files, each potentially with its own GP, we must assume
+   we need to load our GP.  Further, the .init/.fini section can
+   easily be more than 4MB away from the function to call so we can't
+   use bsr.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n"					\
+"	br $29,1f\n"					\
+"1:	ldgp $29,0($29)\n"				\
+"	unop\n"						\
+"	jsr $26," USER_LABEL_PREFIX #FUNC "\n"		\
+"	.align 3\n"					\
+"	.previous");
+
+/* If we have the capability create headers for efficient EH lookup.
+   As of Jan 2002, only glibc 2.2.4 can actually make use of this, but
+   I imagine that other systems will catch up.  In the meantime, it
+   doesn't harm to make sure that the data exists to be used later.  */
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM
+   any text necessary for declaring the name of an external symbol
+   named NAME which is referenced in this compilation but not defined.
+   It is needed to properly support non-default visibility.  */
+
+#ifndef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  default_elf_asm_output_external (FILE, DECL, NAME)
+#endif
diff --git a/gcc/config/alpha/elf.opt b/gcc/config/alpha/elf.opt
new file mode 100644
index 000000000..edafd5d37
--- /dev/null
+++ b/gcc/config/alpha/elf.opt
@@ -0,0 +1,30 @@
+; Alpha ELF options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+relax
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/alpha/ev4.md b/gcc/config/alpha/ev4.md
new file mode 100644
index 000000000..5b1899fc7
--- /dev/null
+++ b/gcc/config/alpha/ev4.md
@@ -0,0 +1,161 @@
+;; Scheduling description for Alpha EV4.
+;;   Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; On EV4 there are two classes of resources to consider: resources needed
+; to issue, and resources needed to execute.  IBUS[01] are in the first
+; category.  ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
+; (There are a few other register-like resources, but ...)
+
+(define_automaton "ev4_0,ev4_1,ev4_2")
+(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0")
+(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1")
+(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2")
+(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev4_multi" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "multi"))
+  "ev4_ib0+ev4_ib1")
+
+; Loads from L0 completes in three cycles.  adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev4_ld" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "ild,fld,ldsym,ld_l"))
+  "ev4_ib01+ev4_abox")
+
+; Stores can issue before the data (but not address) is ready.
+(define_insn_reservation "ev4_ist" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "ist"))
+  "ev4_ib1+ev4_abox")
+
+; ??? Separate from ev4_ist because store_data_bypass_p can't handle
+; the patterns with multiple sets, like store-conditional.
+(define_insn_reservation "ev4_ist_c" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "st_c"))
+  "ev4_ib1+ev4_abox")
+
+(define_insn_reservation "ev4_fst" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "fst"))
+  "ev4_ib0+ev4_abox")
+
+; Memory barrier blocks ABOX insns until it's acknowledged by the external
+; memory bus.  This may be *quite* slow.  Setting this to 4 cycles gets
+; about all the benefit without making the DFA too large.
+(define_insn_reservation "ev4_mb" 4
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "mb"))
+  "ev4_ib1+ev4_abox,ev4_abox*3")
+
+; Branches have no delay cost, but do tie up the unit for two cycles.
+(define_insn_reservation "ev4_ibr" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "ibr,jsr"))
+  "ev4_ib1+ev4_bbox,ev4_bbox")
+
+(define_insn_reservation "ev4_callpal" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "callpal"))
+  "ev4_ib1+ev4_bbox,ev4_bbox")
+
+(define_insn_reservation "ev4_fbr" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "fbr"))
+  "ev4_ib0+ev4_bbox,ev4_bbox")
+
+; Arithmetic insns are normally have their results available after
+; two cycles.  There are a number of exceptions.
+
+(define_insn_reservation "ev4_iaddlog" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "iadd,ilog"))
+  "ev4_ib0+ev4_ebox")
+
+(define_bypass 1
+  "ev4_iaddlog"
+  "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi")
+
+(define_insn_reservation "ev4_shiftcm" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "shift,icmov"))
+  "ev4_ib0+ev4_ebox")
+
+(define_insn_reservation "ev4_icmp" 2
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "icmp"))
+  "ev4_ib0+ev4_ebox")
+
+(define_bypass 1 "ev4_icmp" "ev4_ibr")
+
+(define_bypass 0
+  "ev4_iaddlog,ev4_shiftcm,ev4_icmp"
+  "ev4_ist"
+  "store_data_bypass_p")
+
+; Multiplies use a non-pipelined imul unit.  Also, "no [ebox] insn can
+; be issued exactly three cycles before an integer multiply completes".
+
+(define_insn_reservation "ev4_imulsi" 21
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "si")))
+  "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
+
+(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
+
+(define_insn_reservation "ev4_imuldi" 23
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "!si")))
+  "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
+
+(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
+
+; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
+(define_insn_reservation "ev4_fpop" 6
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "fadd,fmul,fcpys,fcmov"))
+  "ev4_ib1+ev4_fbox")
+
+(define_bypass 4 "ev4_fpop" "ev4_fpop")
+
+; The floating point divider is not pipelined.  Also, "no FPOP insn can be
+; issued exactly five or exactly six cycles before an fdiv insn completes".
+
+(define_insn_reservation "ev4_fdivsf" 34
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+(define_insn_reservation "ev4_fdivdf" 63
+  (and (eq_attr "tune" "ev4")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox")
+
+; Traps don't consume or produce data.
+(define_insn_reservation "ev4_misc" 1
+  (and (eq_attr "tune" "ev4")
+       (eq_attr "type" "misc"))
+  "ev4_ib1")
diff --git a/gcc/config/alpha/ev5.md b/gcc/config/alpha/ev5.md
new file mode 100644
index 000000000..f22d391a4
--- /dev/null
+++ b/gcc/config/alpha/ev5.md
@@ -0,0 +1,194 @@
+;; Scheduling description for Alpha EV5.
+;;   Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; EV5 has two asymmetric integer units, E0 and E1, plus separate
+;; FP add and multiply units.
+
+(define_automaton "ev5_0,ev5_1")
+(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0")
+(define_reservation "ev5_e01" "ev5_e0|ev5_e1")
+(define_reservation "ev5_fam" "ev5_fa|ev5_fm")
+(define_cpu_unit "ev5_imul" "ev5_0")
+(define_cpu_unit "ev5_fdiv" "ev5_1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev5_multi" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "multi"))
+  "ev5_e0+ev5_e1+ev5_fa+ev5_fm")
+
+; Stores can only issue to E0, and may not issue with loads.
+; Model this with some fake units.
+
+(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0")
+(define_reservation "ev5_ld" "ev5_l0|ev5_l1")
+(exclusion_set "ev5_l0,ev5_l1" "ev5_st")
+
+(define_insn_reservation "ev5_st" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ist,fst,st_c,mb"))
+  "ev5_e0+ev5_st")
+
+; Loads from L0 complete in two cycles.  adjust_cost still factors
+; in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev5_ld" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ild,fld,ldsym"))
+  "ev5_e01+ev5_ld")
+
+(define_insn_reservation "ev5_ld_l" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ld_l"))
+  "ev5_e0+ev5_ld")
+
+; Integer branches slot only to E1.
+(define_insn_reservation "ev5_ibr" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ibr"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_callpal" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "callpal"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_jsr" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "jsr"))
+  "ev5_e1")
+
+(define_insn_reservation "ev5_shift" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "shift"))
+  "ev5_e0")
+
+(define_insn_reservation "ev5_mvi" 2
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "mvi"))
+  "ev5_e0")
+
+(define_insn_reservation "ev5_cmov" 2
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "icmov"))
+  "ev5_e01")
+
+(define_insn_reservation "ev5_iadd" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "iadd"))
+  "ev5_e01")
+
+(define_insn_reservation "ev5_ilogcmp" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "ilog,icmp"))
+  "ev5_e01")
+
+; Conditional move and branch can issue the same cycle as the test.
+(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p")
+
+; Multiplies use a non-pipelined imul unit.  Also, "no insn can be issued
+; to E0 exactly two cycles before an integer multiply completes".
+
+(define_insn_reservation "ev5_imull" 8
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "si")))
+  "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulq" 12
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "di")))
+  "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0")
+
+(define_insn_reservation "ev5_imulh" 14
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "opsize" "udi")))
+  "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0")
+
+; The multiplier is unable to receive data from Ebox bypass paths.  The
+; instruction issues at the expected time, but its latency is increased
+; by the time it takes for the input data to become available to the
+; multiplier.  For example, an IMULL instruction issued one cycle later
+; than an ADDL instruction, which produced one of its operands, has a
+; latency of 10 (8 + 2).  If the IMULL instruction is issued two cycles
+; later than the ADDL instruction, the latency is 9 (8 + 1).
+;
+; Model this instead with increased latency on the input instruction.
+
+(define_bypass 3
+  "ev5_ld,ev5_ld_l,ev5_shift,ev5_mvi,ev5_cmov,ev5_iadd,ev5_ilogcmp"
+  "ev5_imull,ev5_imulq,ev5_imulh")
+
+(define_bypass  9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh")
+(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh")
+
+; Similarly for the FPU we have two asymmetric units.
+
+(define_insn_reservation "ev5_fadd" 4
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fadd,fcmov"))
+  "ev5_fa")
+
+(define_insn_reservation "ev5_fbr" 1
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fbr"))
+  "ev5_fa")
+
+(define_insn_reservation "ev5_fcpys" 4
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fcpys"))
+  "ev5_fam")
+
+(define_insn_reservation "ev5_fmul" 4
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "fmul"))
+  "ev5_fm")
+
+; The floating point divider is not pipelined.  Also, "no insn can be issued
+; to FA exactly five before an fdiv insn completes".
+;
+; ??? Do not model this late reservation due to the enormously increased
+; size of the resulting DFA.
+;
+; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces
+; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate
+; automata produces incorrect results for insns that can choose one or
+; the other, i.e. ev5_fcpys.
+
+(define_insn_reservation "ev5_fdivsf" 15
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  ; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+  "ev5_fa+ev5_fdiv,ev5_fdiv*14")
+
+(define_insn_reservation "ev5_fdivdf" 22
+  (and (eq_attr "tune" "ev5")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  ; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4"
+  "ev5_fa+ev5_fdiv,ev5_fdiv*21")
+
+; Traps don't consume or produce data; rpcc is latency 2 if we ever add it.
+(define_insn_reservation "ev5_misc" 2
+  (and (eq_attr "tune" "ev5")
+       (eq_attr "type" "misc"))
+  "ev5_e0")
diff --git a/gcc/config/alpha/ev6.md b/gcc/config/alpha/ev6.md
new file mode 100644
index 000000000..adfe504bf
--- /dev/null
+++ b/gcc/config/alpha/ev6.md
@@ -0,0 +1,177 @@
+;; Scheduling description for Alpha EV6.
+;;   Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; EV6 can issue 4 insns per clock.  It's out-of-order, so this isn't
+; expected to help over-much, but a precise description can be important
+; for software pipelining.
+;
+; EV6 has two symmetric pairs ("clusters") of two asymmetric integer
+; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+;
+; ??? The clusters have independent register files that are re-synced
+; every cycle.  Thus there is one additional cycle of latency between
+; insns issued on different clusters.  Possibly model that by duplicating
+; all EBOX insn_reservations that can issue to either cluster, increasing
+; all latencies by one, and adding bypasses within the cluster.
+;
+; ??? In addition, instruction order affects cluster issue.
+
+(define_automaton "ev6_0,ev6_1")
+(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0")
+(define_reservation "ev6_u" "ev6_u0|ev6_u1")
+(define_reservation "ev6_l" "ev6_l0|ev6_l1")
+(define_reservation "ev6_ebox" "ev6_u|ev6_l")
+
+(define_cpu_unit "ev6_fa" "ev6_1")
+(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0")
+(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "ev6_multi" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "multi"))
+  "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1")
+
+; Integer loads take at least 3 clocks, and only issue to lower units.
+; adjust_cost still factors in user-specified memory latency, so return 1 here.
+(define_insn_reservation "ev6_ild" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ild,ldsym,ld_l"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_ist" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ist,st_c"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_mb" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "mb"))
+  "ev6_l1")
+
+; FP loads take at least 4 clocks.  adjust_cost still factors
+; in user-specified memory latency, so return 2 here.
+(define_insn_reservation "ev6_fld" 2
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fld"))
+  "ev6_l")
+
+; The FPU communicates with memory and the integer register file
+; via two fp store units.  We need a slot in the fst immediately, and
+; a slot in LOW after the operand data is ready.  At which point the
+; data may be moved either to the store queue or the integer register
+; file and the insn retired.
+
+(define_insn_reservation "ev6_fst" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fst"))
+  "ev6_fst,nothing,ev6_l")
+
+; Arithmetic goes anywhere.
+(define_insn_reservation "ev6_arith" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "iadd,ilog,icmp"))
+  "ev6_ebox")
+
+; Motion video insns also issue only to U0, and take three ticks.
+(define_insn_reservation "ev6_mvi" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "mvi"))
+  "ev6_u0")
+
+; Shifts issue to upper units.
+(define_insn_reservation "ev6_shift" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "shift"))
+  "ev6_u")
+
+; Multiplies issue only to U1, and all take 7 ticks.
+(define_insn_reservation "ev6_imul" 7
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "imul"))
+  "ev6_u1")
+
+; Conditional moves decompose into two independent primitives, each taking
+; one cycle.  Since ev6 is out-of-order, we can't see anything but two cycles.
+(define_insn_reservation "ev6_icmov" 2
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "icmov"))
+  "ev6_ebox,ev6_ebox")
+
+; Integer branches issue to upper units
+(define_insn_reservation "ev6_ibr" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ibr,callpal"))
+  "ev6_u")
+
+; Calls only issue to L0.
+(define_insn_reservation "ev6_jsr" 1
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "jsr"))
+  "ev6_l0")
+
+; Ftoi/itof only issue to lower pipes.
+(define_insn_reservation "ev6_itof" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "itof"))
+  "ev6_l")
+
+(define_insn_reservation "ev6_ftoi" 3
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "ftoi"))
+  "ev6_fst,nothing,ev6_l")
+
+(define_insn_reservation "ev6_fmul" 4
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fmul"))
+  "ev6_fm")
+
+(define_insn_reservation "ev6_fadd" 4
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fadd,fcpys,fbr"))
+  "ev6_fa")
+
+(define_insn_reservation "ev6_fcmov" 8
+  (and (eq_attr "tune" "ev6")
+       (eq_attr "type" "fcmov"))
+  "ev6_fa,nothing*3,ev6_fa")
+
+(define_insn_reservation "ev6_fdivsf" 12
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  "ev6_fa*9")
+
+(define_insn_reservation "ev6_fdivdf" 15
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  "ev6_fa*12")
+
+(define_insn_reservation "ev6_sqrtsf" 18
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "si")))
+  "ev6_fa*15")
+
+(define_insn_reservation "ev6_sqrtdf" 33
+  (and (eq_attr "tune" "ev6")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "di")))
+  "ev6_fa*30")
diff --git a/gcc/config/alpha/freebsd.h b/gcc/config/alpha/freebsd.h
new file mode 100644
index 000000000..fbefde1fd
--- /dev/null
+++ b/gcc/config/alpha/freebsd.h
@@ -0,0 +1,81 @@
+/* Definitions for DEC Alpha/AXP running FreeBSD using the ELF format
+   Copyright (C) 2000, 2002, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef  EXTRA_SPECS
+#define EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+/* Provide a CPP_SPEC appropriate for FreeBSD/alpha -- dealing with
+   the GCC option `-posix'.  */
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+#define LINK_SPEC "%{G*} %{relax:-relax}				\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}							\
+  %{symbolic:-Bsymbolic}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* alpha.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE	32
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/alpha ELF)");
+
+#define TARGET_ELF	1
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT	(MASK_FPREGS | MASK_GAS)
+
+#undef HAS_INIT_SECTION
+
+/* Show that we need a GP when profiling.  */
+#undef  TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+
+#undef  DBX_CONTIN_CHAR
+#define DBX_CONTIN_CHAR	'?'
+
+/* Don't default to pcc-struct-return, we want to retain compatibility with
+   older FreeBSD releases AND pcc-struct-return may not be reentrant.  */
+
+#undef  DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
diff --git a/gcc/config/alpha/gnu.h b/gcc/config/alpha/gnu.h
new file mode 100644
index 000000000..ca7198039
--- /dev/null
+++ b/gcc/config/alpha/gnu.h
@@ -0,0 +1,49 @@
+/* Configuration for an Alpha running GNU with ELF as the target machine.
+
+Copyright (C) 2002, 2003, 2004, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (Alpha GNU)");
+
+#undef TARGET_OS_CPP_BUILTINS /* config.gcc includes alpha/linux.h.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	LINUX_TARGET_OS_CPP_BUILTINS();		\
+	builtin_define ("_LONGLONG");		\
+    } while (0)
+
+#undef ELF_DYNAMIC_LINKER
+#define ELF_DYNAMIC_LINKER	"/lib/ld.so"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{!static: \
+       %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} %{!p:crt1.o%s}}} \
+     %{static:crt0.o%s}} \
+   crti.o%s \
+   %{!static:%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}}"
+
+/* FIXME: Is a Hurd-specific fallback mechanism necessary?  */
+#undef MD_UNWIND_SUPPORT
diff --git a/gcc/config/alpha/host-osf.c b/gcc/config/alpha/host-osf.c
new file mode 100644
index 000000000..0a554b3ec
--- /dev/null
+++ b/gcc/config/alpha/host-osf.c
@@ -0,0 +1,147 @@
+/* Tru64 UNIX host-specific hook definitions.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include <sys/mman.h>
+/* Inhibit inclusion of <sys/mount.h>, unnecessary and errors out due to
+   use of poisoned bcmp, bcopy.  */
+#define _SYS_MOUNT_H_
+#include <sys/procfs.h>
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS osf_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS osf_gt_pch_use_address
+
+/* The mmap ADDR parameter may be ignored without MAP_FIXED set.  Before we
+   give up, check existing mappings with ioctl(PIOCMAP) to see if the space
+   is really free.  */
+
+static void *
+mmap_fixed (void *addr, size_t len, int prot, int flags, int fd, off_t off)
+{
+  void *base;
+
+  base = mmap ((caddr_t) addr, len, prot, flags, fd, off);
+  
+  if (base != addr)
+    {
+      /* PID_MAX is SHRT_MAX on Tru64 UNIX V4.0, but INT_MAX on V5.1.
+	 Allow for both.  "/proc/" + INT_MAX + '\0'.  */
+      char pname[6+10+1];
+      int procfd, nmap;
+      prmap_t *pmap;
+      int i, overlap = 0;
+
+      if (base != (void *) MAP_FAILED)
+	munmap ((caddr_t) base, len);
+
+      /* Check if there's any mapping overlapping [addr, addr+len).  */
+
+      snprintf (pname, sizeof (pname), "/proc/%d", getpid ());
+      procfd = open (pname, O_RDONLY);
+      if (procfd == -1)
+	return ((void *) MAP_FAILED);
+      if (ioctl (procfd, PIOCNMAP, &nmap) == -1)
+	return ((void *) MAP_FAILED);
+      pmap = (prmap_t *) xmalloc (sizeof (*pmap) * (nmap+1));
+      if (ioctl (procfd, PIOCMAP, pmap) == -1)
+	return ((void *) MAP_FAILED);
+
+      /* It seems like pmap[] is sorted by address, but can we rely on
+	 that?  */
+      for (i = 0; i < nmap; i++)
+	{
+	  uintptr_t map_start = (uintptr_t) pmap[i].pr_vaddr;
+	  uintptr_t map_end = map_start + pmap[i].pr_size;
+
+	  if ((uintptr_t) addr < map_end
+	      && (uintptr_t) addr+len > map_start)
+	    {
+	      overlap = 1;
+	      break;
+	    }
+	}
+      free (pmap);
+      close (procfd);
+
+      if (!overlap)
+	base = mmap ((caddr_t) addr, len, prot, flags | MAP_FIXED, fd, off);
+      else
+	base = mmap ((caddr_t) addr, len, prot, flags, fd, off);
+    }
+
+  return base;
+}
+
+/* For various ports, try to guess a fixed spot in the vm space that's
+   probably free.  Take the middle between start of text segment and
+   dynamic loader space.  See <sys/machine/addrconf.h> and Tru64 UNIX
+   Assembly Language Programmer's Guide, p.6-18, Figure 6-3: Default Layout
+   of Memory (User Program View).  */
+#define TRY_EMPTY_VM_SPACE	0x20050000000
+
+/* Determine a location where we might be able to reliably allocate
+   SIZE bytes.  FD is the PCH file, though we should return with the
+   file unmapped.  */
+
+static void *
+osf_gt_pch_get_address (size_t size, int fd)
+{
+  void *addr;
+
+  addr = mmap_fixed ((caddr_t) TRY_EMPTY_VM_SPACE, size,
+		     PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap ((caddr_t) addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+   mapping the data at BASE, -1 if we couldn't.  */
+
+static int
+osf_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  addr = mmap_fixed ((caddr_t) base, size,
+		     PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  return addr == base ? 1 : -1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/alpha/libgcc-alpha-ldbl.ver b/gcc/config/alpha/libgcc-alpha-ldbl.ver
new file mode 100644
index 000000000..8dc54a749
--- /dev/null
+++ b/gcc/config/alpha/libgcc-alpha-ldbl.ver
@@ -0,0 +1,50 @@
+# Copyright (C) 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+%ifdef __LONG_DOUBLE_128__
+
+# long double 128 bit support in libgcc_s.so.1 is only available
+# when configured with --with-long-double-128.  Make sure all the
+# symbols are available at @@GCC_LDBL_* versions to make it clear
+# there is a configurable symbol set.
+
+%exclude {
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+%inherit GCC_LDBL_3.0 GCC_3.0
+GCC_LDBL_3.0 {
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+}
+
+%inherit GCC_LDBL_4.0.0 GCC_4.0.0
+GCC_LDBL_4.0.0 {
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+%endif
diff --git a/gcc/config/alpha/linux-elf.h b/gcc/config/alpha/linux-elf.h
new file mode 100644
index 000000000..e8eac2f91
--- /dev/null
+++ b/gcc/config/alpha/linux-elf.h
@@ -0,0 +1,57 @@
+/* Definitions of target machine for GNU compiler
+   for Alpha Linux-based GNU systems using ELF.
+   Copyright (C) 1996, 1997, 1998, 2001, 2002, 2003, 2006, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Richard Henderson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (Alpha GNU/Linux for ELF)");
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS \
+{ "elf_dynamic_linker", ELF_DYNAMIC_LINKER },
+
+#define GLIBC_DYNAMIC_LINKER	"/lib/ld-linux.so.2"
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define LINUX_DYNAMIC_LINKER \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER)
+
+#define ELF_DYNAMIC_LINKER	LINUX_DYNAMIC_LINKER
+
+#define LINK_SPEC "-m elf64alpha %{G*} %{relax:-relax}		\
+  %{O*:-O3} %{!O*:-O1}						\
+  %{shared:-shared}						\
+  %{!shared:							\
+    %{!static:							\
+      %{rdynamic:-export-dynamic}				\
+      -dynamic-linker %(elf_dynamic_linker)}	\
+    %{static:-static}}"
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+"%{pthread:-lpthread} %{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} "
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
diff --git a/gcc/config/alpha/linux-unwind.h b/gcc/config/alpha/linux-unwind.h
new file mode 100644
index 000000000..8c04b3b41
--- /dev/null
+++ b/gcc/config/alpha/linux-unwind.h
@@ -0,0 +1,99 @@
+/* DWARF2 EH unwinding support for Alpha Linux.
+   Copyright (C) 2004, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR alpha_fallback_frame_state
+
+static _Unwind_Reason_Code
+alpha_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  unsigned int *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+  int i;
+
+  if (pc[0] != 0x47fe0410		/* mov $30,$16 */
+      || pc[2] != 0x00000083)		/* callsys */
+    return _URC_END_OF_STACK;
+  if (context->cfa == 0)
+    return _URC_END_OF_STACK;
+  if (pc[1] == 0x201f0067)		/* lda $0,NR_sigreturn */
+    sc = context->cfa;
+  else if (pc[1] == 0x201f015f)		/* lda $0,NR_rt_sigreturn */
+    {
+      struct rt_sigframe {
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+      sc = &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_regs[30];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 30;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+  for (i = 0; i < 30; ++i)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= (long) &sc->sc_regs[i] - new_cfa;
+    }
+  for (i = 0; i < 31; ++i)
+    {
+      fs->regs.reg[i+32].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i+32].loc.offset
+	= (long) &sc->sc_fpregs[i] - new_cfa;
+    }
+  fs->regs.reg[64].how = REG_SAVED_OFFSET;
+  fs->regs.reg[64].loc.offset = (long)&sc->sc_pc - new_cfa;
+  fs->retaddr_column = 64;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT alpha_frob_update_context
+
+/* Fix up for signal handlers that don't have S flag set.  */
+
+static void
+alpha_frob_update_context (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs ATTRIBUTE_UNUSED)
+{
+  unsigned int *pc = context->ra;
+
+  if (pc[0] == 0x47fe0410		/* mov $30,$16 */
+      && pc[2] == 0x00000083		/* callsys */
+      && (pc[1] == 0x201f0067		/* lda $0,NR_sigreturn */
+	  || pc[1] == 0x201f015f))	/* lda $0,NR_rt_sigreturn */
+    _Unwind_SetSignalFrame (context, 1);
+}
diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h
new file mode 100644
index 000000000..a1881c816
--- /dev/null
+++ b/gcc/config/alpha/linux.h
@@ -0,0 +1,106 @@
+/* Definitions of target machine for GNU compiler,
+   for Alpha Linux-based GNU systems.
+   Copyright (C) 1996, 1997, 1998, 2002, 2003, 2004, 2005, 2006, 2007, 2009,
+   2010 Free Software Foundation, Inc.
+   Contributed by Richard Henderson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS)
+
+#define TARGET_OS_CPP_BUILTINS()				\
+    do {							\
+	builtin_define ("__gnu_linux__");			\
+	builtin_define ("_LONGLONG");				\
+	builtin_define_std ("linux");				\
+	builtin_define_std ("unix");				\
+	builtin_assert ("system=linux");			\
+	builtin_assert ("system=unix");				\
+	builtin_assert ("system=posix");			\
+	/* The GNU C++ standard library requires this.  */	\
+	if (c_dialect_cxx ())					\
+	  builtin_define ("_GNU_SOURCE");			\
+    } while (0)
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared: %{profile:-lc_p}%{!profile:-lc}}"
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* Don't care about faults in the prologue.  */
+#undef TARGET_CAN_FAULT_IN_PROLOGUE
+#define TARGET_CAN_FAULT_IN_PROLOGUE 1
+
+/* OS fixes up EV5 data fault on prefetch.  */
+#undef TARGET_FIXUP_EV5_PREFETCH
+#define TARGET_FIXUP_EV5_PREFETCH 1
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#endif
+
+/* Determine whether the entire c99 runtime is present in the
+   runtime library.  */
+#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
+
+/* Whether we have sincos that follows the GNU extension.  */
+#define TARGET_HAS_SINCOS (OPTION_GLIBC)
+
+#define TARGET_POSIX_IO
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#define MD_UNWIND_SUPPORT "config/alpha/linux-unwind.h"
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an Alpha chip.  */
+#if defined(__alpha__) || defined(__alpha)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS						\
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MCPU_MTUNE_NATIVE_SPECS					\
+   " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		\
+   " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
diff --git a/gcc/config/alpha/netbsd.h b/gcc/config/alpha/netbsd.h
new file mode 100644
index 000000000..bd86d1c13
--- /dev/null
+++ b/gcc/config/alpha/netbsd.h
@@ -0,0 +1,83 @@
+/* Definitions of target machine for GNU compiler,
+   for Alpha NetBSD systems.
+   Copyright (C) 1998, 2002, 2003, 2004, 2005, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS)
+
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	NETBSD_OS_CPP_BUILTINS_ELF();		\
+    } while (0)
+
+
+/* NetBSD doesn't use the LANGUAGE* built-ins.  */
+#undef SUBTARGET_LANGUAGE_CPP_BUILTINS
+#define SUBTARGET_LANGUAGE_CPP_BUILTINS()	/* nothing */
+
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD/alpha.  We use
+   this to pull in CPP specs that all NetBSD configurations need.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS			\
+  { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF },	\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },	\
+  { "netbsd_endfile_spec", NETBSD_ENDFILE_SPEC },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/alpha ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{G*} %{relax:-relax} \
+   %{O*:-O3} %{!O*:-O1} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+
+/* Provide an ENDFILE_SPEC appropriate for NetBSD/alpha ELF.  Here we
+   add crtend.o, which provides part of the support for getting
+   C++ file-scope static objects deconstructed after exiting "main".
+
+   We also need to handle the GCC option `-ffast-math'.  */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC		\
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \
+   %(netbsd_endfile_spec)"
+
+
+/* Attempt to enable execute permissions on the stack.  */
+
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
+
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/alpha ELF)");
diff --git a/gcc/config/alpha/openbsd.h b/gcc/config/alpha/openbsd.h
new file mode 100644
index 000000000..8efbaf5f4
--- /dev/null
+++ b/gcc/config/alpha/openbsd.h
@@ -0,0 +1,45 @@
+/* Configuration file for an alpha OpenBSD target.
+   Copyright (C) 1999, 2003, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Controlling the compilation driver.  */
+
+/* run-time target specifications */
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	OPENBSD_OS_CPP_BUILTINS_ELF();		\
+	OPENBSD_OS_CPP_BUILTINS_LP64();		\
+    } while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+#define LOCAL_LABEL_PREFIX	"."
diff --git a/gcc/config/alpha/osf5-unwind.h b/gcc/config/alpha/osf5-unwind.h
new file mode 100644
index 000000000..c64909934
--- /dev/null
+++ b/gcc/config/alpha/osf5-unwind.h
@@ -0,0 +1,329 @@
+/* DWARF2 EH unwinding support for Alpha Tru64.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file implements the MD_FALLBACK_FRAME_STATE_FOR macro, triggered when
+   the GCC table based unwinding process hits a frame for which no unwind info
+   has been registered. This typically occurs when raising an exception from a
+   signal handler, because the handler is actually called from the OS kernel.
+
+   The basic idea is to detect that we are indeed trying to unwind past a
+   signal handler and to fill out the GCC internal unwinding structures for
+   the OS kernel frame as if it had been directly called from the interrupted
+   context.
+
+   This is all assuming that the code to set the handler asked the kernel to
+   pass a pointer to such context information.  */
+
+/* --------------------------------------------------------------------------
+   -- Basic principles of operation:
+   --------------------------------------------------------------------------
+
+   1/ We first need a way to detect if we are trying to unwind past a signal
+      handler.
+
+   The typical method that is used on most platforms is to look at the code
+   around the return address we have and check if it matches the OS code
+   calling a handler.  To determine what this code is expected to be, get a
+   breakpoint into a real signal handler and look at the code around the
+   return address.  Depending on the library versions the pattern of the
+   signal handler is different; this is the reason why we check against more
+   than one pattern.
+
+   On this target, the return address is right after the call and every
+   instruction is 4 bytes long.  For the simple case of a null dereference in
+   a single-threaded app, it went like:
+
+   # Check that we indeed have something we expect: the instruction right
+   # before the return address is within a __sigtramp function and is a call.
+
+   [... run gdb and break at the signal handler entry ...]
+
+   (gdb) x /i $ra-4
+   <__sigtramp+160>: jsr     ra,(a3),0x3ff800d0ed4 <_fpdata+36468>
+
+   # Look at the code around that return address, and eventually observe a
+   # significantly large chunk of *constant* code right before the call:
+
+   (gdb) x /10i  $ra-44
+   <__sigtramp+120>: lda     gp,-27988(gp)
+   <__sigtramp+124>: ldq     at,-18968(gp)
+   <__sigtramp+128>: lda     t0,-1
+   <__sigtramp+132>: stq     t0,0(at)
+   <__sigtramp+136>: ldq     at,-18960(gp)
+   <__sigtramp+140>: ldl     t1,8(at)
+   <__sigtramp+144>: ldq     at,-18960(gp)
+   <__sigtramp+148>: stl     t1,12(at)
+   <__sigtramp+152>: ldq     at,-18960(gp)
+   <__sigtramp+156>: stl     t0,8(at)
+
+   # The hexadecimal equivalent that we will have to match is:
+
+   (gdb) x /10x  $ra-44
+   <__sigtramp+120>: 0x23bd92ac    0xa79db5e8    0x203fffff   0xb43c0000
+   <__sigtramp+136>: 0xa79db5f0    0xa05c0008    0xa79db5f0   0xb05c000c
+   <__sigtramp+152>: 0xa79db5f0    0xb03c0008
+
+   The problem observed on this target with this approach is that although
+   we found a constant set of instruction patterns there were some
+   gp-related offsets that made the machine code to differ from one
+   installation to another.  This problem could have been overcome by masking
+   these offsets, but we found that it would be simpler and more efficient to
+   check whether the return address was part of a signal handler, by comparing
+   it against some expected code offset from __sigtramp.
+
+   # Check that we indeed have something we expect: the instruction
+   # right before the return address is within a __sigtramp
+   # function and is a call. We also need to obtain the offset
+   # between the return address and the start address of __sigtramp.
+
+   [... run gdb and break at the signal handler entry ...]
+
+   (gdb) x /2i $ra-4
+   <__sigtramp+160>: jsr     ra,(a3),0x3ff800d0ed4 <_fpdata+36468>
+   <__sigtramp+164>: ldah    gp,16381(ra)
+
+   (gdb) p (long)$ra - (long)&__sigtramp
+   $2 = 164
+
+   --------------------------------------------------------------------------
+
+   2/ Once we know we are going through a signal handler, we need a way to
+      retrieve information about the interrupted run-time context.
+
+   On this platform, the third handler's argument is a pointer to a structure
+   describing this context (struct sigcontext *). We unfortunately have no
+   direct way to transfer this value here, so a couple of tricks are required
+   to compute it.
+
+   As documented at least in some header files (e.g. sys/machine/context.h),
+   the structure the handler gets a pointer to is located on the stack.  As of
+   today, while writing this macro, we have unfortunately not been able to
+   find a detailed description of the full stack layout at handler entry time,
+   so we'll have to resort to empirism :)
+
+   When unwinding here, we have the handler's CFA at hand, as part of the
+   current unwinding context which is one of our arguments.  We presume that
+   for each call to a signal handler by the same kernel routine, the context's
+   structure location on the stack is always at the same offset from the
+   handler's CFA, and we compute that offset from bare observation:
+
+   For the simple case of a bare null dereference in a single-threaded app,
+   computing the offset was done using GNAT like this:
+
+   # Break on the first handler's instruction, before the prologue to have the
+   # CFA in $sp, and get there:
+
+   (gdb) b *&__gnat_error_handler
+   Breakpoint 1 at 0x120016090: file init.c, line 378.
+
+   (gdb) r
+   Program received signal SIGSEGV, Segmentation fault.
+
+   (gdb) c
+   Breakpoint 1, __gnat_error_handler (sig=..., sip=..., context=...)
+
+   # The displayed argument value are meaningless because we stopped before
+   # their final "homing". We know they are passed through $a0, $a1 and $a2
+   # from the ABI, though, so ...
+
+   # Observe that $sp and the context pointer are in the same (stack) area,
+   # and compute the offset:
+
+   (gdb) p /x $sp
+   $2 = 0x11fffbc80
+
+   (gdb) p /x $a2
+   $3 = 0x11fffbcf8
+
+   (gdb) p /x (long)$a2 - (long)$sp
+   $4 = 0x78
+
+   --------------------------------------------------------------------------
+
+   3/ Once we know we are unwinding through a signal handler and have the
+      address of the structure describing the interrupted context at hand, we
+      have to fill the internal frame-state/unwind-context structures properly
+      to allow the unwinding process to proceed.
+
+   Roughly, we are provided with an *unwinding* CONTEXT, describing the state
+   of some point P in the call chain we are unwinding through.  The macro we
+   implement has to fill a "frame state" structure FS that describe the P's
+   caller state, by way of *rules* to compute its CFA, return address, and
+   **saved** registers *locations*. 
+
+   For the case we are going to deal with, the caller is some kernel code
+   calling a signal handler, and:
+
+   o The saved registers are all in the interrupted run-time context,
+
+   o The CFA is the stack pointer value when the kernel code is entered, that
+     is, the stack pointer value at the interruption point, also part of the
+     interrupted run-time context.
+
+   o We want the return address to appear as the address of the active
+     instruction at the interruption point, so that the unwinder proceeds as
+     if the interruption had been a regular call.  This address is also part
+     of the interrupted run-time context.
+
+   --
+
+   Also, note that there is an important difference between the return address
+   we need to claim for the kernel frame and the value of the return address
+   register at the interruption point.
+
+   The latter might be required to be able to unwind past the interrupted
+   routine, for instance if it is interrupted before saving the incoming
+   register value in its own frame, which may typically happen during stack
+   probes for stack-checking purposes.
+
+   It is then essential that the rules stated to locate the kernel frame
+   return address don't clobber the rules describing where is saved the return
+   address register at the interruption point, so some scratch register state
+   entry should be used for the former. We have DWARF_ALT_FRAME_RETURN_COLUMN
+   at hand exactly for that purpose.
+
+   --------------------------------------------------------------------------
+
+   4/ Depending on the context (single-threaded or multi-threaded app, ...),
+   the code calling the handler and the handler-cfa to interrupted-context
+   offset might change, so we use a simple generic data structure to track
+   the possible variants.  */
+
+/* This is the structure to wrap information about each possible sighandler
+   caller we may have to identify.  */
+
+typedef struct {
+  /* Expected return address when being called from a sighandler.  */
+  void *ra_value;
+
+  /* Offset to get to the sigcontext structure from the handler's CFA
+     when the pattern matches.  */
+  int cfa_to_context_offset;
+
+} sighandler_call_t;
+
+/* Helper macro for MD_FALLBACK_FRAME_STATE_FOR below.
+
+   Look at RA to see if it matches within a sighandler caller.
+   Set SIGCTX to the corresponding sigcontext structure (computed from
+   CFA) if it does, or to 0 otherwise.  */
+
+#define COMPUTE_SIGCONTEXT_FOR(RA,CFA,SIGCTX)				    \
+do {									    \
+  /* Define and register the applicable patterns.  */			    \
+  extern void __sigtramp (void);					    \
+									    \
+  sighandler_call_t sighandler_calls [] = {				    \
+    {__sigtramp + 164, 0x78}						    \
+  };									    \
+									    \
+  int n_patterns_to_match						    \
+    = sizeof (sighandler_calls) / sizeof (sighandler_call_t);		    \
+									    \
+  int pn;  /* pattern number  */					    \
+									    \
+  int match = 0;  /* Did last pattern match ?  */			    \
+									    \
+  /* Try to match each pattern in turn.  */				    \
+  for (pn = 0; !match && pn < n_patterns_to_match; pn ++)		    \
+    match = ((RA) == sighandler_calls[pn].ra_value);			    \
+									    \
+  (SIGCTX) = (struct sigcontext *)					    \
+    (match ? ((CFA) + sighandler_calls[pn - 1].cfa_to_context_offset) : 0); \
+} while (0);
+
+#include <sys/context_t.h>
+
+#define REG_SP  30  /* hard reg for stack pointer */
+#define REG_RA  26  /* hard reg for return address */
+
+#define MD_FALLBACK_FRAME_STATE_FOR alpha_fallback_frame_state
+
+static _Unwind_Reason_Code
+alpha_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  /* Return address and CFA of the frame we're attempting to unwind through,
+     possibly a signal handler.  */
+  void *ctx_ra  = (void *)context->ra;
+  void *ctx_cfa = (void *)context->cfa;
+
+  /* CFA of the intermediate abstract kernel frame between the interrupted
+     code and the signal handler, if we're indeed unwinding through a signal
+     handler.  */
+  void *k_cfa;
+
+  /* Pointer to the sigcontext structure pushed by the kernel when we're
+     unwinding through a signal handler.  */
+  struct sigcontext *sigctx;
+  int i;
+
+  COMPUTE_SIGCONTEXT_FOR (ctx_ra, ctx_cfa, sigctx);
+
+  if (sigctx == 0)
+    return _URC_END_OF_STACK;
+
+  /* The kernel frame's CFA is exactly the stack pointer value at the
+     interruption point.  */
+  k_cfa = (void *) sigctx->sc_regs [REG_SP];
+
+  /* State the rules to compute the CFA we have the value of: use the
+     previous CFA and offset by the difference between the two.  See
+     uw_update_context_1 for the supporting details.  */
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = k_cfa - ctx_cfa;
+
+  /* Fill the internal frame_state structure with information stating
+     where each register of interest in the saved context can be found
+     from the CFA.  */
+
+  /* The general registers are in sigctx->sc_regs.  Leave out r31, which
+     is read-as-zero. It makes no sense restoring it, and we are going to
+     use the state entry for the kernel return address rule below.
+
+     This loop must cover at least all the callee-saved registers, and
+     we just don't bother specializing the set here.  */
+  for (i = 0; i <= 30; i ++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= (void *) &sigctx->sc_regs[i] - (void *) k_cfa;
+    }
+
+  /* Ditto for the floating point registers in sigctx->sc_fpregs.  */
+  for (i = 0; i <= 31; i ++)
+    {
+      fs->regs.reg[32+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[32+i].loc.offset
+	= (void *) &sigctx->sc_fpregs[i] - (void *) k_cfa;
+    }
+
+  /* State the rules to find the kernel's code "return address", which
+     is the address of the active instruction when the signal was caught,
+     in sigctx->sc_pc. Use DWARF_ALT_FRAME_RETURN_COLUMN since the return
+     address register is a general register and should be left alone.  */
+  fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN;
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_OFFSET;
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset
+    = (void *) &sigctx->sc_pc - (void *) k_cfa;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
diff --git a/gcc/config/alpha/osf5.h b/gcc/config/alpha/osf5.h
new file mode 100644
index 000000000..7713b7ee0
--- /dev/null
+++ b/gcc/config/alpha/osf5.h
@@ -0,0 +1,278 @@
+/* Definitions of target machine for GNU compiler, for DEC Alpha on
+   Tru64 UNIX V5.1.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001,
+   2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* As of DEC OSF/1 V4.0, as can subtract adjacent labels.  */
+
+#undef TARGET_AS_CAN_SUBTRACT_LABELS
+#define TARGET_AS_CAN_SUBTRACT_LABELS 1
+
+/* The GEM libraries for X_float are present, though not used by C.  */
+
+#undef TARGET_HAS_XFLOATING_LIBS
+#define TARGET_HAS_XFLOATING_LIBS 1
+
+/* Tru64 UNIX V5.1 uses IEEE QUAD format.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT	(MASK_FPREGS | MASK_LONG_DOUBLE_128)
+
+/* The linker appears to perform invalid code optimizations that result
+   in the ldgp emitted for the exception_receiver pattern being incorrectly
+   linked.  */
+#undef TARGET_LD_BUGGY_LDGP
+#define TARGET_LD_BUGGY_LDGP 1
+
+/* Tru64 UNIX V5.1 has the float and long double forms of math functions.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS  1
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define TARGET_OS_CPP_BUILTINS()			\
+    do {						\
+	builtin_define_std ("unix");			\
+	builtin_define_std ("SYSTYPE_BSD");		\
+	builtin_define ("_SYSTYPE_BSD");		\
+	builtin_define ("__osf__");			\
+	builtin_define ("__digital__");			\
+	builtin_define ("__arch64__");			\
+	builtin_define ("_LONGLONG");			\
+	builtin_assert ("system=unix");			\
+	builtin_assert ("system=xpg4");			\
+	/* Tru64 UNIX V5 has a 16 byte long		\
+	   double type and requires __X_FLOAT		\
+	   to be defined for <math.h>.  */		\
+        if (LONG_DOUBLE_TYPE_SIZE == 128)		\
+          builtin_define ("__X_FLOAT");			\
+							\
+	/* Tru64 UNIX V4/V5 provide several ISO C94	\
+	   features protected by the corresponding	\
+	   __STDC_VERSION__ macro.  libstdc++ v3	\
+	   needs them as well.  */			\
+	if (c_dialect_cxx ())				\
+	  builtin_define ("__STDC_VERSION__=199409L");	\
+    } while (0)
+
+/* Accept DEC C flags for multithreaded programs.  We use _PTHREAD_USE_D4
+   instead of PTHREAD_USE_D4 since both have the same effect and the former
+   doesn't invade the users' namespace.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+"%{pthread|threads:-D_REENTRANT} %{threads:-D_PTHREAD_USE_D4}"
+
+/* Under DEC OSF/1 V4, -p and -pg require -lprof1, and -lprof1 requires 
+   -lpdf.  */
+
+#define LIB_SPEC \
+"%{p|pg:-lprof1%{pthread|threads:_r} -lpdf} \
+ %{threads: -lpthreads} %{pthread|threads: -lpthread -lmach -lexc} -lc"
+
+/* Pass "-G 8" to ld because Alpha's CC does.  Pass -O3 if we are
+   optimizing, -O1 if we are not.  Pass -S to silence `weak symbol
+   multiply defined' warnings.  Pass -shared, -non_shared or
+   -call_shared as appropriate.  Pass -hidden_symbol so that our
+   constructor and call-frame data structures are not accidentally
+   overridden.  */
+#define LINK_SPEC  \
+  "-G 8 %{O*:-O3} %{!O*:-O1} -S %{static:-non_shared} \
+   %{!static:%{shared:-shared -hidden_symbol _GLOBAL_*} \
+   %{!shared:-call_shared}} %{pg} %{taso} %{rpath*}"
+
+#define STARTFILE_SPEC  \
+  "%{!shared:%{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}"
+
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+
+#define MD_STARTFILE_PREFIX "/usr/lib/cmplrs/cc/"
+
+/* In Tru64 UNIX V5.1, Compaq introduced a new assembler
+   (/usr/lib/cmplrs/cc/adu) which currently (versions between 3.04.29 and
+   3.04.32) breaks mips-tfile.  Passing the undocumented -oldas flag reverts
+   to using the old assembler (/usr/lib/cmplrs/cc/as[01]).
+
+   It is clearly not desirable to depend on this undocumented flag, and
+   Compaq wants -oldas to go away soon, but until they have released a
+   new adu that works with mips-tfile, this is the only option.
+
+   In some versions of the DTK, the assembler driver invokes ld after
+   assembly.  This has been fixed in current versions, but adding -c
+   works as expected for all versions.  */
+
+#define ASM_OLDAS_SPEC "-oldas -c"
+
+/* In OSF/1 v3.2c, the assembler by default does not output file names which
+   causes mips-tfile to fail.  Passing -g to the assembler fixes this problem.
+   ??? Strictly speaking, we need -g only if the user specifies -g.  Passing
+   it always means that we get slightly larger than necessary object files
+   if the user does not specify -g.  If we don't pass -g, then mips-tfile
+   will need to be fixed to work in this case.  Pass -O0 since some
+   optimization are broken and don't help us anyway.  */
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GAS) != 0
+#define ASM_SPEC "%{malpha-as:-g " ASM_OLDAS_SPEC " %{pg} -O0}"
+#else
+#define ASM_SPEC "%{!mgas:-g " ASM_OLDAS_SPEC " %{pg} -O0}"
+#endif
+
+/* Specify to run a post-processor, mips-tfile after the assembler
+   has run to stuff the ecoff debug information into the object file.
+   This is needed because the Alpha assembler provides no way
+   of specifying such information in the assembly file.  */
+
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GAS) != 0
+
+#define ASM_FINAL_SPEC "\
+%{malpha-as: %{!mno-mips-tfile: \
+	\n mips-tfile %{v*: -v} \
+		%{K: -I %b.o~} \
+		%{!K: %{save-temps: -I %b.o~}} \
+		%{c:%W{o*}%{!o*:-o %b.o}}%{!c:-o %U.o} \
+		%{,assembler:%i;:%g.s}}}"
+
+#else
+#define ASM_FINAL_SPEC "\
+%{!mgas: %{!mno-mips-tfile: \
+	\n mips-tfile %{v*: -v} \
+		%{K: -I %b.o~} \
+		%{!K: %{save-temps: -I %b.o~}} \
+		%{c:%W{o*}%{!o*:-o %b.o}}%{!c:-o %U.o} \
+		%{,assembler:%i;:%g.s}}}"
+
+#endif
+
+/* Indicate that we have a stamp.h to use.  */
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#define HAVE_STAMP_H 1
+#endif
+
+/* Attempt to turn on access permissions for the stack.  */
+
+#define ENABLE_EXECUTE_STACK						\
+void									\
+__enable_execute_stack (void *addr)					\
+{									\
+  extern int mprotect (const void *, size_t, int);			\
+  long size = getpagesize ();						\
+  long mask = ~(size-1);						\
+  char *page = (char *) (((long) addr) & mask);				\
+  char *end  = (char *) ((((long) (addr + TRAMPOLINE_SIZE)) & mask) + size); \
+									\
+  /* 7 is PROT_READ | PROT_WRITE | PROT_EXEC */				\
+  if (mprotect (page, end - page, 7) < 0)				\
+    perror ("mprotect of trampoline code");				\
+}
+
+/* Digital UNIX V4.0E (1091)/usr/include/sys/types.h 4.3.49.9 1997/08/14 */
+#define SIZE_TYPE	"long unsigned int"
+#define PTRDIFF_TYPE	"long int"
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long unsigned int"
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
+/* The linker will stick __main into the .init section.  */
+#define HAS_INIT_SECTION
+#define LD_INIT_SWITCH "-init"
+#define LD_FINI_SWITCH "-fini"
+
+/* From Tru64 UNIX Object File and Symbol Table Format Specification,
+   2.3.5 Alignment, p.19.  */
+#define MAX_OFILE_ALIGNMENT (64 * 1024 * BITS_PER_UNIT)
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+   
+   We really ought to be using the SREL32 relocations that ECOFF has,
+   but no version of the native assembler supports creating such things,
+   and Compaq has no plans to rectify this.  Worse, the dynamic loader
+   cannot handle unaligned relocations, so we have to make sure that
+   things get padded appropriately.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			     \
+  (TARGET_GAS								     \
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \
+   : DW_EH_PE_aligned)
+
+/* The Tru64 UNIX assembler warns on .lcomm with SIZE 0, so use 1 in that
+   case.  */
+#undef ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE,ROUNDED)	\
+( fputs ("\t.lcomm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE) ? (SIZE) : 1))
+
+/* This is how we tell the assembler that a symbol is weak.  */
+
+#define ASM_OUTPUT_WEAK_ALIAS(FILE, NAME, VALUE)	\
+  do							\
+    {							\
+      (*targetm.asm_out.globalize_label) (FILE, NAME);  \
+      fputs ("\t.weakext\t", FILE);			\
+      assemble_name (FILE, NAME);			\
+      if (VALUE)					\
+        {						\
+          fputc (' ', FILE);				\
+          assemble_name (FILE, VALUE);			\
+        }						\
+      fputc ('\n', FILE);				\
+    }							\
+  while (0)
+
+#define ASM_WEAKEN_LABEL(FILE, NAME) ASM_OUTPUT_WEAK_ALIAS(FILE, NAME, 0)
+
+/* The native assembler doesn't understand parenthesis.  */
+#define TARGET_ASM_OPEN_PAREN ""
+#define TARGET_ASM_CLOSE_PAREN ""
+
+/* Handle #pragma extern_prefix.  */
+#define TARGET_HANDLE_PRAGMA_EXTERN_PREFIX 1
+
+#define MD_UNWIND_SUPPORT "config/alpha/osf5-unwind.h"
diff --git a/gcc/config/alpha/osf5.opt b/gcc/config/alpha/osf5.opt
new file mode 100644
index 000000000..ba70e7ce9
--- /dev/null
+++ b/gcc/config/alpha/osf5.opt
@@ -0,0 +1,42 @@
+; Tru64 UNIX options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+K
+Driver
+
+mno-mips-tfile
+Target RejectNegative
+
+pthread
+Driver
+
+taso
+Driver
+
+threads
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/alpha/predicates.md b/gcc/config/alpha/predicates.md
new file mode 100644
index 000000000..2f17fb9e3
--- /dev/null
+++ b/gcc/config/alpha/predicates.md
@@ -0,0 +1,621 @@
+;; Predicate definitions for DEC Alpha.
+;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is the zero constant for MODE.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or
+;; any register.
+(define_predicate "reg_or_6bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is an 8-bit constant.
+(define_predicate "cint8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")))
+
+;; Return 1 if OP is an 8-bit constant or any register.
+(define_predicate "reg_or_8bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant or any register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+;; Return 1 if the operand is a valid second operand to an add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a valid second operand to a
+;; sign-extending add insn.
+(define_predicate "sext_add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a non-symbolic constant operand that
+;; does not satisfy add_operand.
+(define_predicate "non_add_const_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (not (match_operand 0 "add_operand"))))
+
+;; Return 1 if the operand is a non-symbolic, nonzero constant operand.
+(define_predicate "non_zero_const_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op != CONST0_RTX (mode)")))
+
+;; Return 1 if OP is the constant 4 or 8.
+(define_predicate "const48_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 4 || INTVAL (op) == 8")))
+
+;; Return 1 if OP is a valid first operand to an AND insn.
+(define_predicate "and_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
+		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
+		 || zap_mask (INTVAL (op))")
+    (if_then_else (match_code "const_double")
+      (match_test "GET_MODE (op) == VOIDmode
+		   && zap_mask (CONST_DOUBLE_LOW (op))
+		   && zap_mask (CONST_DOUBLE_HIGH (op))")
+      (match_operand 0 "register_operand"))))
+
+;; Return 1 if OP is a valid first operand to an IOR or XOR insn.
+(define_predicate "or_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
+		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant that is the width, in bits, of an integral
+;; mode not larger than DImode.
+(define_predicate "mode_width_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 8 || i == 16 || i == 32 || i == 64;
+})
+
+;; Return 1 if OP is a constant that is a mask of ones of width of an
+;; integral machine mode not larger than DImode.
+(define_predicate "mode_mask_operand"
+  (match_code "const_int,const_double")
+{
+  if (CONST_INT_P (op))
+    {
+      HOST_WIDE_INT value = INTVAL (op);
+
+      if (value == 0xff)
+	return 1;
+      if (value == 0xffff)
+	return 1;
+      if (value == 0xffffffff)
+	return 1;
+      if (value == -1)
+	return 1;
+    }
+  else if (HOST_BITS_PER_WIDE_INT == 32 && GET_CODE (op) == CONST_DOUBLE)
+    {
+      if (CONST_DOUBLE_LOW (op) == 0xffffffff && CONST_DOUBLE_HIGH (op) == 0)
+	return 1;
+    }
+  return 0;
+})
+
+;; Return 1 if OP is a multiple of 8 less than 64.
+(define_predicate "mul8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT i = INTVAL (op);
+  return i < 64 && i % 8 == 0;
+})
+
+;; Return 1 if OP is a hard floating-point register.
+(define_predicate "hard_fp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
+})
+
+;; Return 1 if OP is a hard general register.
+(define_predicate "hard_int_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
+})
+
+;; Return 1 if OP is something that can be reloaded into a register;
+;; if it is a MEM, it need not be valid.
+(define_predicate "some_operand"
+  (ior (match_code "reg,mem,const_int,const_double,const_vector,
+		    label_ref,symbol_ref,const,high")
+       (and (match_code "subreg")
+	    (match_test "some_operand (SUBREG_REG (op), VOIDmode)"))))
+
+;; Likewise, but don't accept constants.
+(define_predicate "some_ni_operand"
+  (ior (match_code "reg,mem")
+       (and (match_code "subreg")
+	    (match_test "some_ni_operand (SUBREG_REG (op), VOIDmode)"))))
+
+;; Return 1 if OP is a valid operand for the source of a move insn.
+(define_predicate "input_operand"
+  (match_code "label_ref,symbol_ref,const,high,reg,subreg,mem,
+	       const_double,const_vector,const_int")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST:
+      if (TARGET_EXPLICIT_RELOCS)
+	{
+	  /* We don't split symbolic operands into something unintelligable
+	     until after reload, but we do not wish non-small, non-global
+	     symbolic operands to be reconstructed from their high/lo_sum
+	     form.  */
+	  return (small_symbolic_operand (op, mode)
+		  || global_symbolic_operand (op, mode)
+		  || gotdtp_symbolic_operand (op, mode)
+		  || gottp_symbolic_operand (op, mode));
+	}
+
+      /* This handles both the Windows/NT and OSF cases.  */
+      return mode == ptr_mode || mode == DImode;
+
+    case HIGH:
+      return (TARGET_EXPLICIT_RELOCS
+	      && local_symbolic_operand (XEXP (op, 0), mode));
+
+    case REG:
+      return 1;
+
+    case SUBREG:
+      if (register_operand (op, mode))
+	return 1;
+      /* ... fall through ...  */
+    case MEM:
+      return ((TARGET_BWX || (mode != HImode && mode != QImode))
+	      && general_operand (op, mode));
+
+    case CONST_DOUBLE:
+      return op == CONST0_RTX (mode);
+
+    case CONST_VECTOR:
+      if (reload_in_progress || reload_completed)
+	return alpha_legitimate_constant_p (op);
+      return op == CONST0_RTX (mode);
+
+    case CONST_INT:
+      if (mode == QImode || mode == HImode)
+	return true;
+      if (reload_in_progress || reload_completed)
+	return alpha_legitimate_constant_p (op);
+      return add_operand (op, mode);
+
+    default:
+      gcc_unreachable ();
+    }
+  return 0;
+})
+
+;; Return 1 if OP is a SYMBOL_REF for a function known to be in this
+;; file, and in the same section as the current function.
+
+(define_predicate "samegp_function_operand"
+  (match_code "symbol_ref")
+{
+  /* Easy test for recursion.  */
+  if (op == XEXP (DECL_RTL (current_function_decl), 0))
+    return true;
+
+  /* Functions that are not local can be overridden, and thus may
+     not share the same gp.  */
+  if (! SYMBOL_REF_LOCAL_P (op))
+    return false;
+
+  /* If -msmall-data is in effect, assume that there is only one GP
+     for the module, and so any local symbol has this property.  We
+     need explicit relocations to be able to enforce this for symbols
+     not defined in this unit of translation, however.  */
+  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
+    return true;
+
+  /* Functions that are not external are defined in this UoT,
+     and thus must share the same gp.  */
+  return ! SYMBOL_REF_EXTERNAL_P (op);
+})
+
+;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr.
+(define_predicate "direct_call_operand"
+  (match_operand 0 "samegp_function_operand")
+{
+  /* If profiling is implemented via linker tricks, we can't jump
+     to the nogp alternate entry point.  Note that crtl->profile
+     would not be correct, since that doesn't indicate if the target
+     function uses profiling.  */
+  /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test,
+     but is approximately correct for the OSF ABIs.  Don't know
+     what to do for VMS, NT, or UMK.  */
+  if (!TARGET_PROFILING_NEEDS_GP && profile_flag)
+    return false;
+
+  /* Must be a function.  In some cases folks create thunks in static
+     data structures and then make calls to them.  If we allow the
+     direct call, we'll get an error from the linker about !samegp reloc
+     against a symbol without a .prologue directive.  */
+  if (!SYMBOL_REF_FUNCTION_P (op))
+    return false;
+  
+  /* Must be "near" so that the branch is assumed to reach.  With
+     -msmall-text, this is assumed true of all local symbols.  Since
+     we've already checked samegp, locality is already assured.  */
+  if (TARGET_SMALL_TEXT)
+    return true;
+
+  return false;
+})
+
+;; Return 1 if OP is a valid operand for the MEM of a CALL insn.
+;;
+;; For TARGET_ABI_OSF, we want to restrict to R27 or a pseudo.
+;; For TARGET_ABI_UNICOSMK, we want to restrict to registers.
+
+(define_predicate "call_operand"
+  (if_then_else (match_code "reg")
+    (match_test "!TARGET_ABI_OSF
+		 || REGNO (op) == 27 || REGNO (op) > LAST_VIRTUAL_REGISTER")
+    (and (match_test "!TARGET_ABI_UNICOSMK")
+	 (match_code "symbol_ref"))))
+
+;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing
+;; a (non-tls) variable known to be defined in this file.
+(define_predicate "local_symbolic_operand"
+  (match_code "label_ref,const,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  return (SYMBOL_REF_LOCAL_P (op)
+	  && !SYMBOL_REF_WEAK (op)
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
+;; known to be defined in this file in the small data area.
+(define_predicate "small_symbolic_operand"
+  (match_code "const,symbol_ref")
+{
+  if (! TARGET_SMALL_DATA)
+    return 0;
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  /* ??? There's no encode_section_info equivalent for the rtl
+     constant pool, so SYMBOL_FLAG_SMALL never gets set.  */
+  if (CONSTANT_POOL_ADDRESS_P (op))
+    return GET_MODE_SIZE (get_pool_mode (op)) <= g_switch_value;
+
+  return (SYMBOL_REF_LOCAL_P (op)
+	  && SYMBOL_REF_SMALL_P (op)
+	  && !SYMBOL_REF_WEAK (op)
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
+;; not known (or known not) to be defined in this file.
+(define_predicate "global_symbolic_operand"
+  (match_code "const,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op))
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op,0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op,0), 0)) == LABEL_REF)
+			 && CONST_INT_P (XEXP (XEXP (op,0), 1))"))))
+
+;; Return true if OP is valid for 16-bit DTP relative relocations.
+(define_predicate "dtp16_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 32-bit DTP relative relocations.
+(define_predicate "dtp32_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 64-bit DTP relative relocations.
+(define_predicate "gotdtp_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 16-bit TP relative relocations.
+(define_predicate "tp16_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)")))
+
+;; Return true if OP is valid for 32-bit TP relative relocations.
+(define_predicate "tp32_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)")))
+
+;; Return true if OP is valid for 64-bit TP relative relocations.
+(define_predicate "gottp_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)")))
+
+;; Return 1 if this memory address is a known aligned register plus
+;; a constant.  It must be a valid address.  This means that we can do
+;; this as an aligned reference plus some offset.
+;;
+;; Take into account what reload will do.  Oh god this is awful.
+;; The horrible comma-operator construct below is to prevent genrecog
+;; from thinking that this predicate accepts REG and SUBREG.  We don't
+;; use recog during reload, so pretending these codes are accepted 
+;; pessimizes things a tad.
+
+(define_special_predicate "aligned_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (match_code "mem"))
+{
+  rtx base;
+  int offset;
+
+  if (MEM_ALIGN (op) >= 32)
+    return 1;
+
+  op = XEXP (op, 0);
+
+  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
+     sorts of constructs.  Dig for the real base register.  */
+  if (reload_in_progress
+      && GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (op, 0), 0);
+      offset = INTVAL (XEXP (op, 1));
+    }
+  else
+    {
+      if (! memory_address_p (mode, op))
+	return 0;
+      if (GET_CODE (op) == PLUS)
+	{
+	  base = XEXP (op, 0);
+	  offset = INTVAL (XEXP (op, 1));
+	}
+      else
+	{
+	  base = op;
+	  offset = 0;
+	}
+    }
+
+  if (offset % GET_MODE_SIZE (mode))
+    return 0;
+
+  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
+})
+
+;; Similar, but return 1 if OP is a MEM which is not alignable.
+
+(define_special_predicate "unaligned_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (match_code "mem"))
+{
+  rtx base;
+  int offset;
+
+  if (MEM_ALIGN (op) >= 32)
+    return 0;
+
+  op = XEXP (op, 0);
+
+  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
+     sorts of constructs.  Dig for the real base register.  */
+  if (reload_in_progress
+      && GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (op, 0), 0);
+      offset = INTVAL (XEXP (op, 1));
+    }
+  else
+    {
+      if (! memory_address_p (mode, op))
+	return 0;
+      if (GET_CODE (op) == PLUS)
+	{
+	  base = XEXP (op, 0);
+	  offset = INTVAL (XEXP (op, 1));
+	}
+      else
+	{
+	  base = op;
+	  offset = 0;
+	}
+    }
+
+  if (offset % GET_MODE_SIZE (mode))
+    return 1;
+
+  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
+})
+
+;; Return 1 if OP is any memory location.  During reload a pseudo matches.
+(define_special_predicate "any_memory_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (MEM_P (op))
+    return true;
+  if (reload_in_progress && REG_P (op))
+    {
+      unsigned regno = REGNO (op);
+      if (HARD_REGISTER_NUM_P (regno))
+	return false;
+      else
+	return reg_renumber[regno] < 0;
+    }
+
+  return false;
+})
+
+;; Return 1 is OP is a memory location that is not a reference
+;; (using an AND) to an unaligned location.  Take into account
+;; what reload will do.
+(define_special_predicate "normal_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (and (match_code "mem")
+	    (match_test "GET_CODE (XEXP (op, 0)) != AND"))))
+
+;; Returns 1 if OP is not an eliminable register.
+;;
+;; This exists to cure a pathological failure in the s8addq (et al) patterns,
+;;
+;;	long foo () { long t; bar(); return (long) &t * 26107; }
+;;
+;; which run afoul of a hack in reload to cure a (presumably) similar
+;; problem with lea-type instructions on other targets.  But there is
+;; one of us and many of them, so work around the problem by selectively
+;; preventing combine from making the optimization.
+
+(define_predicate "reg_not_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return op != frame_pointer_rtx && op != arg_pointer_rtx;
+})
+
+;; Accept a register, but not a subreg of any kind.  This allows us to
+;; avoid pathological cases in reload wrt data movement common in 
+;; int->fp conversion.  */
+(define_predicate "reg_no_subreg_operand"
+  (and (match_code "reg")
+       (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a valid Alpha comparison operator for "cbranch"
+;; instructions.
+(define_predicate "alpha_cbranch_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (match_code "ordered,unordered")))
+
+;; Return 1 if OP is a valid Alpha comparison operator for "cmp" style
+;; instructions.
+(define_predicate "alpha_comparison_operator"
+  (match_code "eq,le,lt,leu,ltu"))
+
+;; Similarly, but with swapped operands.
+(define_predicate "alpha_swapped_comparison_operator"
+  (match_code "eq,ge,gt,gtu"))
+
+;; Return 1 if OP is a valid Alpha comparison operator against zero
+;; for "bcc" style instructions.
+(define_predicate "alpha_zero_comparison_operator"
+  (match_code "eq,ne,le,lt,leu,ltu"))
+
+;; Return 1 if OP is a signed comparison operation.
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt"))
+
+;; Return 1 if OP is a valid Alpha floating point comparison operator.
+(define_predicate "alpha_fp_comparison_operator"
+  (match_code "eq,le,lt,unordered"))
+
+;; Return 1 if this is a divide or modulus operator.
+(define_predicate "divmod_operator"
+  (match_code "div,mod,udiv,umod"))
+
+;; Return 1 if this is a float->int conversion operator.
+(define_predicate "fix_operator"
+  (match_code "fix,unsigned_fix"))
+
+;; Recognize an addition operation that includes a constant.  Used to
+;; convince reload to canonize (plus (plus reg c1) c2) during register
+;; elimination.
+
+(define_predicate "addition_operation"
+  (and (match_code "plus")
+       (match_test "register_operand (XEXP (op, 0), mode)
+		    && satisfies_constraint_K (XEXP (op, 1))")))
+
+;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a
+;; small symbolic operand until after reload.  At which point we need
+;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref))
+;; so that sched2 has the proper dependency information.  */
+(define_predicate "some_small_symbolic_operand"
+  (match_code "set,parallel,prefetch,unspec,unspec_volatile")
+{
+  /* Avoid search unless necessary.  */
+  if (!TARGET_EXPLICIT_RELOCS || !reload_completed)
+    return false;
+  return for_each_rtx (&op, some_small_symbolic_operand_int, NULL);
+})
diff --git a/gcc/config/alpha/qrnnd.asm b/gcc/config/alpha/qrnnd.asm
new file mode 100644
index 000000000..51b13bce6
--- /dev/null
+++ b/gcc/config/alpha/qrnnd.asm
@@ -0,0 +1,163 @@
+ # Alpha 21064 __udiv_qrnnd
+ # Copyright (C) 1992, 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 3 of the License, or (at your
+ # option) any later version.
+
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # Under Section 7 of GPL version 3, you are granted additional
+ # permissions described in the GCC Runtime Library Exception, version
+ # 3.1, as published by the Free Software Foundation.
+
+ # You should have received a copy of the GNU General Public License and
+ # a copy of the GCC Runtime Library Exception along with this program;
+ # see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ # <http://www.gnu.org/licenses/>.
+
+#ifdef __ELF__
+.section .note.GNU-stack,""
+#endif
+
+        .set noreorder
+        .set noat
+
+	.text
+
+	.globl __udiv_qrnnd
+	.ent __udiv_qrnnd
+__udiv_qrnnd:
+	.frame $30,0,$26,0
+	.prologue 0
+
+#define cnt	$2
+#define tmp	$3
+#define rem_ptr	$16
+#define n1	$17
+#define n0	$18
+#define d	$19
+#define qb	$20
+#define AT	$at
+
+	ldiq	cnt,16
+	blt	d,$largedivisor
+
+$loop1:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,$loop1
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$largedivisor:
+	and	n0,1,$4
+
+	srl	n0,1,n0
+	sll	n1,63,tmp
+	or	tmp,n0,n0
+	srl	n1,1,n1
+
+	and	d,1,$6
+	srl	d,1,$5
+	addq	$5,$6,$5
+
+$loop2:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,$loop2
+
+	addq	n1,n1,n1
+	addq	$4,n1,n1
+	bne	$6,$Odd
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$Odd:
+	/* q' in n0. r' in n1 */
+	addq	n1,n0,n1
+
+	cmpult	n1,n0,tmp	# tmp := carry from addq
+	subq	n1,d,AT
+	addq	n0,tmp,n0
+	cmovne	tmp,AT,n1
+
+	cmpult	n1,d,tmp
+	addq	n0,1,AT
+	cmoveq	tmp,AT,n0
+	subq	n1,d,AT
+	cmoveq	tmp,AT,n1
+
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+	.end	__udiv_qrnnd
diff --git a/gcc/config/alpha/sync.md b/gcc/config/alpha/sync.md
new file mode 100644
index 000000000..bb7210239
--- /dev/null
+++ b/gcc/config/alpha/sync.md
@@ -0,0 +1,308 @@
+;; GCC machine description for Alpha synchronization instructions.
+;; Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")])
+(define_code_attr fetchop_pred
+  [(plus "add_operand") (minus "reg_or_8bit_operand")
+   (ior "or_operand") (xor "or_operand") (and "and_operand")])
+(define_code_attr fetchop_constr
+  [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "riNHM")])
+
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+  "mb"
+  [(set_attr "type" "mb")])
+
+(define_insn "load_locked_<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec_volatile:I48MODE
+	  [(match_operand:I48MODE 1 "memory_operand" "m")]
+	  UNSPECV_LL))]
+  ""
+  "ld<modesuffix>_l %0,%1"
+  [(set_attr "type" "ld_l")])
+
+(define_insn "store_conditional_<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:I48MODE 1 "memory_operand" "=m")
+	(match_operand:I48MODE 2 "reg_or_0_operand" "0"))]
+  ""
+  "st<modesuffix>_c %0,%1"
+  [(set_attr "type" "st_c")])
+
+;; The Alpha Architecture Handbook says that it is UNPREDICTABLE whether
+;; the lock is cleared by a TAKEN branch.  This means that we can not
+;; expand a ll/sc sequence until after the final basic-block reordering pass.
+
+(define_insn_and_split "sync_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 0)
+	     (match_operand:I48MODE 1 "<fetchop_pred>" "<fetchop_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 2 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (<CODE>, operands[0], operands[1],
+			 NULL, NULL, operands[2]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "sync_nand<mode>"
+  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
+	(unspec:I48MODE
+	  [(not:I48MODE
+	     (and:I48MODE (match_dup 0)
+	       (match_operand:I48MODE 1 "register_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 2 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (NOT, operands[0], operands[1],
+			 NULL, NULL, operands[2]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "sync_old_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 1)
+	     (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (<CODE>, operands[1], operands[2],
+			 operands[0], NULL, operands[3]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "sync_old_nand<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(not:I48MODE
+	     (and:I48MODE (match_dup 1)
+	       (match_operand:I48MODE 2 "register_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (NOT, operands[1], operands[2],
+			 operands[0], NULL, operands[3]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "sync_new_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(FETCHOP:I48MODE 
+	  (match_operand:I48MODE 1 "memory_operand" "+m")
+	  (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>")))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 1) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (<CODE>, operands[1], operands[2],
+			 NULL, operands[0], operands[3]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "sync_new_nand<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(not:I48MODE
+	  (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m")
+	    (match_operand:I48MODE 2 "register_operand" "r"))))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_atomic_op (NOT, operands[1], operands[2],
+			 NULL, operands[0], operands[3]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(match_operand:I12MODE 0 "register_operand" "")
+   (match_operand:I12MODE 1 "memory_operand" "")
+   (match_operand:I12MODE 2 "register_operand" "")
+   (match_operand:I12MODE 3 "add_operand" "")]
+  ""
+{
+  alpha_expand_compare_and_swap_12 (operands[0], operands[1],
+				    operands[2], operands[3]);
+  DONE;
+})
+
+(define_insn_and_split "sync_compare_and_swap<mode>_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r,&r")
+	(zero_extend:DI
+	  (mem:I12MODE (match_operand:DI 1 "register_operand" "r,r"))))
+   (set (mem:I12MODE (match_dup 1))
+	(unspec:I12MODE
+	  [(match_operand:DI 2 "reg_or_8bit_operand" "J,rI")
+	   (match_operand:DI 3 "register_operand" "r,r")
+	   (match_operand:DI 4 "register_operand" "r,r")]
+	  UNSPEC_CMPXCHG))
+   (clobber (match_scratch:DI 5 "=&r,&r"))
+   (clobber (match_scratch:DI 6 "=X,&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_compare_and_swap_12 (<MODE>mode, operands[0], operands[1],
+				   operands[2], operands[3], operands[4],
+				   operands[5], operands[6]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+     [(set (match_operand:I48MODE 0 "register_operand" "")
+	   (match_operand:I48MODE 1 "memory_operand" ""))
+      (set (match_dup 1)
+	   (unspec:I48MODE
+	     [(match_operand:I48MODE 2 "reg_or_8bit_operand" "")
+	      (match_operand:I48MODE 3 "add_operand" "rKL")]
+	     UNSPEC_CMPXCHG))
+      (clobber (match_scratch:I48MODE 4 "=&r"))])]
+  ""
+{
+  if (<MODE>mode == SImode)
+    operands[2] = convert_modes (DImode, SImode, operands[2], 0);
+})
+
+(define_insn_and_split "*sync_compare_and_swap<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(match_operand:DI 2 "reg_or_8bit_operand" "rI")
+	   (match_operand:I48MODE 3 "add_operand" "rKL")]
+	  UNSPEC_CMPXCHG))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_compare_and_swap (operands[0], operands[1], operands[2],
+				operands[3], operands[4]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:I12MODE 0 "register_operand" "")
+   (match_operand:I12MODE 1 "memory_operand" "")
+   (match_operand:I12MODE 2 "register_operand" "")]
+  ""
+{
+  alpha_expand_lock_test_and_set_12 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn_and_split "sync_lock_test_and_set<mode>_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(zero_extend:DI
+	  (mem:I12MODE (match_operand:DI 1 "register_operand" "r"))))
+   (set (mem:I12MODE (match_dup 1))
+	(unspec:I12MODE
+	  [(match_operand:DI 2 "reg_or_8bit_operand" "rI")
+	   (match_operand:DI 3 "register_operand" "r")]
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:DI 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_lock_test_and_set_12 (<MODE>mode, operands[0], operands[1],
+				    operands[2], operands[3], operands[4]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "sync_lock_test_and_set<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(match_operand:I48MODE 2 "add_operand" "rKL")]
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:I48MODE 3 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  alpha_split_lock_test_and_set (operands[0], operands[1],
+				 operands[2], operands[3]);
+  DONE;
+}
+  [(set_attr "type" "multi")])
diff --git a/gcc/config/alpha/t-alpha b/gcc/config/alpha/t-alpha
new file mode 100644
index 000000000..d0b58d69a
--- /dev/null
+++ b/gcc/config/alpha/t-alpha
@@ -0,0 +1,2 @@
+# This is a support routine for longlong.h, used by libgcc2.c.
+LIB2FUNCS_EXTRA = $(srcdir)/config/alpha/qrnnd.asm
diff --git a/gcc/config/alpha/t-crtfm b/gcc/config/alpha/t-crtfm
new file mode 100644
index 000000000..b4103834a
--- /dev/null
+++ b/gcc/config/alpha/t-crtfm
@@ -0,0 +1,5 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/alpha/crtfastmath.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -frandom-seed=gcc-crtfastmath -c \
+		-o $(T)crtfastmath.o $(srcdir)/config/alpha/crtfastmath.c
diff --git a/gcc/config/alpha/t-ieee b/gcc/config/alpha/t-ieee
new file mode 100644
index 000000000..fe549dfc9
--- /dev/null
+++ b/gcc/config/alpha/t-ieee
@@ -0,0 +1,2 @@
+# All alphas get an IEEE complaint set of libraries.
+TARGET_LIBGCC2_CFLAGS += -mieee
diff --git a/gcc/config/alpha/t-linux b/gcc/config/alpha/t-linux
new file mode 100644
index 000000000..b96f1dfeb
--- /dev/null
+++ b/gcc/config/alpha/t-linux
@@ -0,0 +1,2 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,alpha-linux-gnu)
+SHLIB_MAPFILES += $(srcdir)/config/alpha/libgcc-alpha-ldbl.ver
diff --git a/gcc/config/alpha/t-osf-pthread b/gcc/config/alpha/t-osf-pthread
new file mode 100644
index 000000000..968e65cce
--- /dev/null
+++ b/gcc/config/alpha/t-osf-pthread
@@ -0,0 +1,5 @@
+# Provide dummy POSIX threads functions
+LIB2FUNCS_EXTRA += $(srcdir)/gthr-posix.c
+
+# Compile libgcc2 with POSIX threads supports
+TARGET_LIBGCC2_CFLAGS=-pthread
diff --git a/gcc/config/alpha/t-osf5 b/gcc/config/alpha/t-osf5
new file mode 100644
index 000000000..eabf2728f
--- /dev/null
+++ b/gcc/config/alpha/t-osf5
@@ -0,0 +1,48 @@
+# Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compile crtbeginS.o and crtendS.o with pic.
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+# Compile libgcc2.a with pic.
+TARGET_LIBGCC2_CFLAGS = -fPIC
+
+# Build a shared libgcc library.
+SHLIB_EXT = .so
+SHLIB_NAME = @shlib_base_name@.so
+SHLIB_SONAME = @shlib_base_name@.so.1
+SHLIB_OBJS = @shlib_objs@
+
+# Beware *not* to hide the POSIX threads related symbols provided by
+# gthr-posix.c, as this would prevent their preemption by real symbols.
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,-msym -Wl,-set_version,gcc.1 -Wl,-soname,$(SHLIB_SONAME) \
+	-o $(SHLIB_NAME).tmp @multilib_flags@ $(SHLIB_OBJS) -lc && \
+	rm -f $(SHLIB_SONAME) && \
+	if [ -f $(SHLIB_NAME) ]; then \
+	  mv -f $(SHLIB_NAME) $(SHLIB_NAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_NAME).tmp $(SHLIB_NAME) && \
+	$(LN_S) $(SHLIB_NAME) $(SHLIB_SONAME)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir); \
+	$(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \
+	$(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME)
diff --git a/gcc/config/alpha/t-vms b/gcc/config/alpha/t-vms
new file mode 100644
index 000000000..410e219ff
--- /dev/null
+++ b/gcc/config/alpha/t-vms
@@ -0,0 +1,65 @@
+# Copyright (C) 1996, 1997, 1998, 2001, 2002,
+# 2007, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/alpha/vms-gcc_shell_handler.c
+
+EXTRA_PARTS = vms-dwarf2.o vms-dwarf2eh.o $(VMS_EXTRA_PARTS) \
+  crtbegin.o crtbeginS.o crtend.o crtendS.o
+
+# This object must be linked with in order to make the executable debuggable.
+# vms-ld handles it automatically when passed -g.
+$(T)vms-dwarf2.o : $(srcdir)/config/alpha/vms-dwarf2.asm
+	$(GCC_FOR_TARGET) -c -x assembler $< -o $@
+
+$(T)vms-dwarf2eh.o : $(srcdir)/config/alpha/vms-dwarf2eh.asm
+	$(GCC_FOR_TARGET) -c -x assembler $< -o $@
+
+MULTILIB_OPTIONS = mcpu=ev6
+MULTILIB_DIRNAMES = ev6
+MULTILIB_OSDIRNAMES = ev6
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+shlib_version:=$(shell echo $(BASEVER_c) | sed -e 's/\./,/' -e 's/\.//g')
+SHLIB_EXT = .exe
+SHLIB_OBJS = @shlib_objs@
+SHLIB_NAME = @shlib_base_name@.exe
+SHLIB_MULTILIB =
+SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(libsubdir)/$(SHLIB_NAME)
+SHLIB_SYMVEC = \
+  grep -F -e "\$$BSS\$$" -e "\$$DATA\$$" -e " sdata " -e " data.rel " -e " data.rel.ro " -e " sbss " \
+        -e "\$$LINK\$$" -e "\$$READONLY\$$" | \
+  sed -e "s/.*\$$LINK\$$   \(.*\)/SYMBOL_VECTOR=(\1=PROCEDURE)/" \
+      -e "s/.*\$$DATA\$$   \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \
+      -e "s/.* sbss     \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \
+      -e "s/.* sdata    \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \
+      -e "s/.* data.rel \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \
+      -e "s/.* data.rel.ro \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \
+      -e "s/.*\$$BSS\$$    \(.*\)/SYMBOL_VECTOR=(\1=DATA)/" \
+      -e "s/.*\$$READONLY\$$ \(.*\)/SYMBOL_VECTOR=(\1=DATA)/"
+SHLIB_SYMVECX2 := $(subst $$,$$$$,$(SHLIB_SYMVEC))
+SHLIB_LINK = \
+  echo "case_sensitive=yes" > SYMVEC_$$$$$$$$.opt; \
+  objdump --syms $(SHLIB_OBJS) | \
+  $(SHLIB_SYMVECX2) >> SYMVEC_$$$$$$$$.opt ; \
+  echo "case_sensitive=NO" >> SYMVEC_$$$$$$$$.opt; \
+  $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -nodefaultlibs \
+  -shared --for-linker=/noinform -o $(SHLIB_NAME) $(SHLIB_OBJS) \
+  --for-linker=SYMVEC_$$$$$$$$.opt \
+  --for-linker=gsmatch=equal,$(shlib_version)
diff --git a/gcc/config/alpha/va_list.h b/gcc/config/alpha/va_list.h
new file mode 100644
index 000000000..26e9515b4
--- /dev/null
+++ b/gcc/config/alpha/va_list.h
@@ -0,0 +1,42 @@
+/* A replacement for Digital Unix's <va_list.h>.
+
+Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __GNUC_VA_LIST
+#define __GNUC_VA_LIST
+typedef __builtin_va_list __gnuc_va_list;
+#endif
+
+#if !defined(_VA_LIST) && !defined(_HIDDEN_VA_LIST)
+#define _VA_LIST
+typedef __gnuc_va_list va_list;
+
+#elif defined(_HIDDEN_VA_LIST) && !defined(_HIDDEN_VA_LIST_DONE)
+#define _HIDDEN_VA_LIST_DONE
+typedef __gnuc_va_list __va_list;
+
+#elif defined(_HIDDEN_VA_LIST) && defined(_VA_LIST)
+#undef _HIDDEN_VA_LIST
+
+#endif
diff --git a/gcc/config/alpha/vms-dwarf2.asm b/gcc/config/alpha/vms-dwarf2.asm
new file mode 100644
index 000000000..531c7aa99
--- /dev/null
+++ b/gcc/config/alpha/vms-dwarf2.asm
@@ -0,0 +1,77 @@
+/* VMS dwarf2 section sequentializer.
+   Copyright (C) 2001, 2009 Free Software Foundation, Inc.
+   Contributed by Douglas B. Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Linking with this file forces Dwarf2 debug sections to be
+   sequentially loaded by the VMS linker, enabling GDB to read them.  */
+
+.section	.debug_abbrev,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_abbrev
+$dwarf2.debug_abbrev:
+	
+.section	.debug_aranges,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_aranges
+$dwarf2.debug_aranges:
+	
+.section	.debug_frame,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_frame
+$dwarf2.debug_frame:		
+	
+.section	.debug_info,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_info
+$dwarf2.debug_info:		
+	
+.section	.debug_line,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_line
+$dwarf2.debug_line:		
+	
+.section	.debug_loc,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_loc
+$dwarf2.debug_loc:		
+	
+.section	.debug_macinfo,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_macinfo
+$dwarf2.debug_macinfo:		
+	
+.section	.debug_pubnames,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_pubnames
+$dwarf2.debug_pubnames:		
+	
+.section	.debug_str,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_str
+$dwarf2.debug_str:		
+	
+.section	.debug_zzzzzz,NOWRT
+		.align 0
+		.globl	$dwarf2.debug_zzzzzz
+$dwarf2.debug_zzzzzz:		
diff --git a/gcc/config/alpha/vms-dwarf2eh.asm b/gcc/config/alpha/vms-dwarf2eh.asm
new file mode 100644
index 000000000..e0eaf9d37
--- /dev/null
+++ b/gcc/config/alpha/vms-dwarf2eh.asm
@@ -0,0 +1,30 @@
+/* VMS dwarf2 exception handling section sequentializer.
+   Copyright (C) 2002, 2009 Free Software Foundation, Inc.
+   Contributed by Douglas B. Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Linking with this file forces the Dwarf2 EH section to be
+   individually loaded by the VMS linker an the unwinder to read it.  */
+
+.section	.eh_frame,NOWRT
+		.align 0
diff --git a/gcc/config/alpha/vms-gcc_shell_handler.c b/gcc/config/alpha/vms-gcc_shell_handler.c
new file mode 100644
index 000000000..67d0fe7f9
--- /dev/null
+++ b/gcc/config/alpha/vms-gcc_shell_handler.c
@@ -0,0 +1,124 @@
+/* Static condition handler for Alpha/VMS.
+   Copyright (C) 2005-2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file implements __gcc_shell_handler, the static VMS condition handler
+   used as the indirection wrapper around user level handlers installed with
+   establish_vms_condition_handler GCC builtin.
+
+   [ABI] in comments refers to the "HP OpenVMS calling standard" document
+   dated January 2005.  */
+
+#include <vms/chfdef.h>
+#include <vms/pdscdef.h>
+#include <vms/ssdef.h>
+
+typedef void * ADDR;
+typedef unsigned long long REG;
+
+#define REG_AT(addr) (*(REG *)(addr))
+
+/* Compute pointer to procedure descriptor (Procedure Value) from Frame
+   Pointer FP, according to the rules in [ABI-3.5.1 Current Procedure].  */
+#define PV_FOR(FP) \
+  (((FP) != 0) \
+    ? (((REG_AT (FP) & 0x7) == 0) ? *(PDSCDEF **)(FP) : (PDSCDEF *)(FP)) : 0)
+
+long
+__gcc_shell_handler (struct chf$signal_array *sig_arr,
+		     struct chf$mech_array *mech_arr);
+
+/* Helper for __gcc_shell_handler.  Fetch the pointer to procedure currently
+   registered as the VMS condition handler for the live function with a frame
+   pointer FP.  */
+
+static ADDR
+get_dyn_handler_pointer (REG fp)
+{
+  /* From the frame pointer we find the procedure descriptor, and fetch
+     the handler_data field from there.  This field contains the offset
+     from FP at which the address of the currently installed handler is
+     to be found.  */
+  
+  PDSCDEF * pd = PV_FOR (fp);
+  /* Procedure descriptor pointer for the live subprogram with FP as the frame
+     pointer, and to which _gcc_shell_handler is attached as a condition
+     handler.  */
+
+  REG handler_slot_offset;
+  /* Offset from FP at which the address of the currently established real
+     condition handler is to be found.  This offset is available from the
+     handler_data field of the procedure descriptor.  */
+
+  REG handler_data_offset;
+  /* The handler_data field position in the procedure descriptor, which
+     depends on the kind of procedure at hand.  */
+
+  switch (pd->pdsc$w_flags & 0xf)
+    {
+    case PDSC$K_KIND_FP_STACK:    /* [3.4.2 PD for stack frame procedures]  */
+      handler_data_offset = 40;
+      break;
+	
+    case PDSC$K_KIND_FP_REGISTER: /* [3.4.5 PD for reg frame procedures]  */
+      handler_data_offset = 32;
+      break;
+      
+    default:
+      handler_data_offset = 0;
+      break;
+    }
+
+  /* If we couldn't determine the handler_data field position, give up.  */
+  if (handler_data_offset == 0)
+    return 0;
+
+  /* Otherwise, fetch the fp offset at which the real handler address is to be
+     found, then fetch and return the latter in turn.  */
+     
+  handler_slot_offset = REG_AT ((REG)pd + handler_data_offset);
+
+  return (ADDR) REG_AT (fp + handler_slot_offset);
+}
+
+/* The static VMS condition handler for GCC code.  Fetch the address of the
+   currently established condition handler, then resignal if there is none or
+   call the handler with the VMS condition arguments.  */
+
+long
+__gcc_shell_handler (struct chf$signal_array *sig_arr,
+		     struct chf$mech_array *mech_arr)
+{
+  long ret;
+  long (*user_handler) (struct chf$signal_array *, struct chf$mech_array *);
+
+  user_handler = get_dyn_handler_pointer (mech_arr->chf$q_mch_frame);
+  if (!user_handler)
+    ret = SS$_RESIGNAL;
+  else
+    ret = user_handler (sig_arr, mech_arr);
+
+  return ret;
+}
+   
diff --git a/gcc/config/alpha/vms-unwind.h b/gcc/config/alpha/vms-unwind.h
new file mode 100644
index 000000000..ea2c3a319
--- /dev/null
+++ b/gcc/config/alpha/vms-unwind.h
@@ -0,0 +1,293 @@
+/* Fallback frame unwinding for Alpha/VMS.
+   Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <vms/pdscdef.h>
+#include <vms/libicb.h>
+#include <vms/chfctxdef.h>
+#include <vms/chfdef.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR alpha_vms_fallback_frame_state
+
+typedef void * ADDR;
+typedef unsigned long long REG;
+typedef PDSCDEF * PV;
+
+#define REG_AT(addr) (*(REG *)(addr))
+#define ADDR_AT(addr) (*(ADDR *)(addr))
+
+/* Compute pointer to procedure descriptor (Procedure Value) from Frame
+   Pointer FP, according to the rules in [ABI-3.5.1 Current Procedure].  */
+#define PV_FOR(FP) \
+  (((FP) != 0) \
+    ? (((REG_AT (FP) & 0x7) == 0) ? *(PDSCDEF **)(FP) : (PDSCDEF *)(FP)) : 0)
+
+extern int SYS$GL_CALL_HANDL;
+/* This is actually defined as a "long", but in system code where longs
+   are always 4bytes while GCC longs might be 8bytes.  */
+
+#define UPDATE_FS_FOR_CFA_GR(FS, GRN, LOC, CFA) \
+do { \
+(FS)->regs.reg[GRN].how = REG_SAVED_OFFSET;      \
+(FS)->regs.reg[GRN].loc.offset = (_Unwind_Sword) ((REG) (LOC) - (REG) (CFA)); \
+} while (0);
+
+#define GIVEUP_ON_FAILURE(STATUS) \
+  { if ((((STATUS) & 1) != 1)) return _URC_END_OF_STACK; }
+#define DENOTES_EXC_DISPATCHER(PV) ((PV) == (ADDR) (REG) SYS$GL_CALL_HANDL)
+
+#define RA_COLUMN (DWARF_ALT_FRAME_RETURN_COLUMN)
+
+static int
+alpha_vms_fallback_frame_state (struct _Unwind_Context *context,
+				_Unwind_FrameState *fs)
+{
+  static int eh_debug = -1;
+
+  /* Our goal is to update FS to reflect the state one step up CONTEXT, that
+     is: the CFA, return address and *saved* registers locations associated
+     with the function designated by CONTEXT->ra.  We are called when the
+     libgcc unwinder has not found any dwarf FDE for this address, which
+     typically happens when trying to propagate a language exception through a
+     signal global vector or frame based handler.
+
+     The CONTEXT->reg[] entries reflect the state/location of register saves
+     so designate values live at the CONTEXT->ra point.  Of precious value to
+     us here is the frame pointer (r29), which gets us a procedure value.  */
+
+  PV pv = (context->reg[29] != 0) ? PV_FOR (ADDR_AT (context->reg[29])) : 0;
+
+  int pkind = pv ? pv->pdsc$w_flags & 0xf : 0;
+  /* VMS procedure kind, as indicated by the procedure descriptor.  We only
+     know how to deal with FP_STACK or FP_REGISTER here.  */
+
+  ADDR new_cfa = 0;
+  /* CFA we will establish for the caller, computed in different ways,
+     e.g. depending whether we cross an exception dispatcher frame.  */
+
+  CHFCTX *chfctx = 0;
+  /* Pointer to the VMS CHF context associated with an exception dispatcher
+     frame, if we happen to come across one.  */
+
+  int i,j;
+
+  if (eh_debug == -1)
+    {
+      char * eh_debug_env = getenv ("EH_DEBUG");
+      eh_debug = eh_debug_env ? atoi (eh_debug_env) : 0;
+    }
+
+  if (eh_debug)
+    printf ("MD_FALLBACK running ...\n");
+
+  /* We only know how to deal with stack or reg frame procedures, so give
+     up if we're handed anything else.  */
+  if (pkind != PDSC$K_KIND_FP_STACK && pkind != PDSC$K_KIND_FP_REGISTER)
+    return _URC_END_OF_STACK;
+  
+  if (eh_debug)
+    printf ("FALLBACK: CTX FP = 0x%p, PV = 0x%p, EN = 0x%llx, RA = 0x%p\n",
+	    ADDR_AT (context->reg[29]), pv, pv->pdsc$q_entry, context->ra);
+
+  fs->retaddr_column = RA_COLUMN;
+
+  /* If PV designates a VMS exception vector or condition handler, we need to
+     do as if the caller was the signaling point and estabish the state of the
+     intermediate VMS code (CFA, RA and saved register locations) as if it was
+     a single regular function.  This requires special processing.
+
+     The datastructures available from an condition dispatcher frame (signal
+     context) do not contain the values of most callee-saved registers, so
+     whathever PV designates, we need to account for the registers it saves.
+
+     Besides, we need to express all the locations with respect to a
+     consistent CFA value, so we compute this first.  */
+
+  if (DENOTES_EXC_DISPATCHER (pv))
+    {
+      /* The CFA to establish is the signaling point's stack pointer. We
+	 compute it using the system invocation context unwinding services and
+	 save the CHF context data pointer along the way for later uses.  */
+
+      INVO_CONTEXT_BLK icb;
+      int status, invo_handle;
+
+      if (eh_debug)
+	printf ("FALLBACK: SYS$HANDLER\n");
+
+      icb.libicb$q_ireg [29] = REG_AT (context->reg[29]);
+      icb.libicb$q_ireg [30] = 0;
+      invo_handle = LIB$GET_INVO_HANDLE (&icb);
+
+      status = LIB$GET_INVO_CONTEXT (invo_handle, &icb);
+      GIVEUP_ON_FAILURE (status);
+
+      chfctx = (CHFCTX *) icb.libicb$ph_chfctx_addr;
+
+      status = LIB$GET_PREV_INVO_CONTEXT (&icb);
+      GIVEUP_ON_FAILURE (status);
+
+      new_cfa = (ADDR) icb.libicb$q_ireg[30];      
+    }
+  else
+    {
+      /* The CFA to establish is the SP value on entry of the procedure
+	 designated by PV, which we compute as the corresponding frame base
+	 register value + frame size.  Note that the frame base may differ
+	 from CONTEXT->cfa, typically if the caller has performed dynamic
+	 stack allocations.  */
+      
+      int  base_reg  = pv->pdsc$w_flags & PDSC$M_BASE_REG_IS_FP ? 29 : 30;
+      ADDR base_addr = ADDR_AT (context->reg[base_reg]);
+      
+      new_cfa = base_addr + pv->pdsc$l_size;
+    }
+
+  /* State to compute the caller's CFA by adding an offset to the current
+     one in CONTEXT.  */
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = new_cfa - context->cfa;
+
+  /* Regular unwind first, accounting for the register saves performed by
+     the procedure designated by PV.  */
+
+  switch (pkind)
+    {
+    case PDSC$K_KIND_FP_STACK:
+      {
+	/* The saved registers are all located in the Register Save Area,
+	   except for the procedure value register (R27) found at the frame
+	   base address.  */
+
+	int  base_reg  = pv->pdsc$w_flags & PDSC$M_BASE_REG_IS_FP ? 29 : 30;
+	ADDR base_addr = ADDR_AT (context->reg[base_reg]);
+	ADDR rsa_addr  = base_addr + pv->pdsc$w_rsa_offset;
+
+	if (eh_debug)
+	  printf ("FALLBACK: STACK frame procedure\n");
+
+	UPDATE_FS_FOR_CFA_GR (fs, 27, base_addr, new_cfa);
+
+	/* The first RSA entry is for the return address register, R26.  */
+
+	UPDATE_FS_FOR_CFA_GR (fs, 26, rsa_addr, new_cfa);
+	UPDATE_FS_FOR_CFA_GR (fs, RA_COLUMN, rsa_addr, new_cfa);
+
+	/* The following entries are for registers marked as saved according
+	   to ireg_mask.  */
+	for (i = 0, j = 0; i < 32; i++)
+	  if ((1 << i) & pv->pdsc$l_ireg_mask)
+	    UPDATE_FS_FOR_CFA_GR (fs, i, rsa_addr + 8 * ++j, new_cfa);
+	
+	/* ??? floating point registers ?  */
+
+	break;
+      }
+
+    case PDSC$K_KIND_FP_REGISTER:
+      {
+	if (eh_debug)
+	  printf ("FALLBACK: REGISTER frame procedure\n");
+
+	fs->regs.reg[RA_COLUMN].how = REG_SAVED_REG;
+	fs->regs.reg[RA_COLUMN].loc.reg = pv->pdsc$b_save_ra;
+	
+	fs->regs.reg[29].how = REG_SAVED_REG;
+	fs->regs.reg[29].loc.reg = pv->pdsc$b_save_fp;
+	
+	break;
+      }
+
+    default:
+      /* Should never reach here.  */
+      return _URC_END_OF_STACK;
+    }
+
+  /* If PV designates an exception dispatcher, we have to adjust the return
+     address column to get at the signal occurrence point, and account for
+     for what the CHF context contains.  */
+
+  if (DENOTES_EXC_DISPATCHER (pv))
+    {
+      /* The PC of the instruction causing the condition is available from the
+	 signal argument vector.  Extra saved register values are available
+	 from the mechargs array.  */
+
+      CHF$SIGNAL_ARRAY *sigargs
+	= (CHF$SIGNAL_ARRAY *) chfctx->chfctx$q_sigarglst;
+
+      CHF$MECH_ARRAY *mechargs
+	= (CHF$MECH_ARRAY *) chfctx->chfctx$q_mcharglst;
+
+      ADDR condpc_addr
+	= &((int *)(&sigargs->chf$l_sig_name)) [sigargs->chf$is_sig_args-2];
+
+      ADDR rei_frame_addr = (void *) mechargs->chf$q_mch_esf_addr;
+
+      /* Adjust the return address location.  */
+
+      UPDATE_FS_FOR_CFA_GR (fs, RA_COLUMN, condpc_addr, new_cfa);
+
+      /* The frame pointer at the condition point is available from the
+	 chf context directly.  */
+
+      UPDATE_FS_FOR_CFA_GR (fs, 29, &chfctx->chfctx$q_expt_fp, new_cfa);
+
+      /* Registers available from the mechargs array.  */
+
+      UPDATE_FS_FOR_CFA_GR (fs, 0, &mechargs->chf$q_mch_savr0, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 1, &mechargs->chf$q_mch_savr1, new_cfa);
+
+      UPDATE_FS_FOR_CFA_GR (fs, 16, &mechargs->chf$q_mch_savr16, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 17, &mechargs->chf$q_mch_savr17, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 18, &mechargs->chf$q_mch_savr18, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 19, &mechargs->chf$q_mch_savr19, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 20, &mechargs->chf$q_mch_savr20, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 21, &mechargs->chf$q_mch_savr21, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 22, &mechargs->chf$q_mch_savr22, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 23, &mechargs->chf$q_mch_savr23, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 24, &mechargs->chf$q_mch_savr24, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 25, &mechargs->chf$q_mch_savr25, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 26, &mechargs->chf$q_mch_savr26, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 27, &mechargs->chf$q_mch_savr27, new_cfa);
+      UPDATE_FS_FOR_CFA_GR (fs, 28, &mechargs->chf$q_mch_savr28, new_cfa);
+      
+      /* Registers R2 to R7 are available from the rei frame pointer.  */
+      
+      for (i = 2; i <= 7; i ++)
+	UPDATE_FS_FOR_CFA_GR (fs, i, rei_frame_addr+(i - 2)*8, new_cfa);
+      
+      /* ??? floating point registers ?  */
+    }
+
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+
+
diff --git a/gcc/config/alpha/vms.h b/gcc/config/alpha/vms.h
new file mode 100644
index 000000000..fab0f2577
--- /dev/null
+++ b/gcc/config/alpha/vms.h
@@ -0,0 +1,360 @@
+/* Output variables, constants and external declarations, for GNU compiler.
+   Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2004, 2005, 2007, 2008,
+   2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJECT_SUFFIX ".obj"
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+/* Alpha/VMS object format is not really Elf, but this makes compiling
+   crtstuff.c and dealing with shared library initialization much easier.  */
+#define OBJECT_FORMAT_ELF
+
+/* This enables certain macros in alpha.h, which will make an indirect
+   reference to an external symbol an invalid address.  This needs to be
+   defined before we include alpha.h, since it determines which macros
+   are used for GO_IF_*.  */
+
+#define NO_EXTERNAL_INDIRECT_ADDRESS
+
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define_std ("vms");		\
+	builtin_define_std ("VMS");		\
+	builtin_define ("__ALPHA");		\
+	builtin_assert ("system=vms");		\
+	if (TARGET_FLOAT_VAX)			\
+	  builtin_define ("__G_FLOAT");		\
+	else					\
+	  builtin_define ("__IEEE_FLOAT");	\
+    } while (0)
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_FPREGS|MASK_GAS)
+#undef TARGET_ABI_OPEN_VMS
+#define TARGET_ABI_OPEN_VMS 1
+
+#undef TARGET_NAME   
+#define TARGET_NAME "OpenVMS/Alpha"
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (%s)", TARGET_NAME);           
+
+#define VMS_DEBUG_MAIN_POINTER "TRANSFER$BREAK$GO"
+
+#undef PCC_STATIC_STRUCT_RETURN
+
+/* "long" is 32 bits, but 64 bits for Ada.  */
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+#define ADA_LONG_TYPE_SIZE 64
+
+/* Pointer is 32 bits but the hardware has 64-bit addresses, sign extended.  */
+#undef POINTER_SIZE
+#define POINTER_SIZE 32
+#define POINTERS_EXTEND_UNSIGNED 0
+
+#define MAX_OFILE_ALIGNMENT 524288  /* 8 x 2^16 by DEC Ada Test CD40VRA */
+
+/* The maximum alignment 'malloc' honors.  */
+#undef  MALLOC_ABI_ALIGNMENT
+#define MALLOC_ABI_ALIGNMENT ((TARGET_MALLOC64 ? 16 : 8) * BITS_PER_UNIT)
+
+#undef FIXED_REGISTERS
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
+
+#undef CALL_USED_REGISTERS
+#define CALL_USED_REGISTERS  \
+ {1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.
+
+   We allocate in the following order:
+   $f1			(nonsaved floating-point register)
+   $f10-$f15		(likewise)
+   $f22-$f30		(likewise)
+   $f21-$f16		(likewise, but input args)
+   $f0			(nonsaved, but return value)
+   $f2-$f9		(saved floating-point registers)
+   $1			(nonsaved integer registers)
+   $22-$25		(likewise)
+   $28			(likewise)
+   $0			(likewise, but return value)
+   $21-$16		(likewise, but input args)
+   $27			(procedure value in OSF, nonsaved in NT)
+   $2-$8		(saved integer registers)
+   $9-$14		(saved integer registers)
+   $26			(return PC)
+   $15			(frame pointer)
+   $29			(global pointer)
+   $30, $31, $f31	(stack pointer and always zero/ap & fp)  */
+
+#undef REG_ALLOC_ORDER
+#define REG_ALLOC_ORDER		\
+  {33,					\
+   42, 43, 44, 45, 46, 47,		\
+   54, 55, 56, 57, 58, 59, 60, 61, 62,	\
+   53, 52, 51, 50, 49, 48,		\
+   32,					\
+   34, 35, 36, 37, 38, 39, 40, 41,	\
+   1,					\
+   22, 23, 24, 25,			\
+   28,					\
+   0,					\
+   21, 20, 19, 18, 17, 16,		\
+   27,					\
+   2, 3, 4, 5, 6, 7, 8,			\
+   9, 10, 11, 12, 13, 14,		\
+   26,					\
+   15,					\
+   29,					\
+   30, 31, 63 }
+
+#undef HARD_FRAME_POINTER_REGNUM
+#define HARD_FRAME_POINTER_REGNUM 29
+
+/* Define registers used by the epilogue and return instruction.  */
+#undef EPILOGUE_USES
+#define EPILOGUE_USES(REGNO)    ((REGNO) == 26 || (REGNO) == 29)
+
+#undef INITIAL_ELIMINATION_OFFSET
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  ((OFFSET) = alpha_vms_initial_elimination_offset(FROM, TO))
+
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On Alpha/VMS, this is a structure that contains the number of
+   arguments and, for each argument, the datatype of that argument.
+
+   The number of arguments is a number of words of arguments scanned so far.
+   Thus 6 or more means all following args should go on the stack.  */
+
+enum avms_arg_type {I64, FF, FD, FG, FS, FT};
+typedef struct {int num_args; enum avms_arg_type atypes[6];} avms_arg_info;
+
+#undef CUMULATIVE_ARGS
+#define CUMULATIVE_ARGS avms_arg_info
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#undef INIT_CUMULATIVE_ARGS
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM).num_args = 0;						\
+  (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64;	\
+  (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64;
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#undef  ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE, NAME)                            \
+   do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME);  \
+        fputc ('\n', FILE); } while (0)
+
+#define READONLY_DATA_SECTION_ASM_OP "\t.rdata"
+#define CTORS_SECTION_ASM_OP "\t.ctors"
+#define DTORS_SECTION_ASM_OP "\t.dtors"
+#define SDATA_SECTION_ASM_OP "\t.sdata"
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)              \
+   asm (SECTION_OP "\n\t.long " #FUNC"\n");
+
+#undef ASM_OUTPUT_ADDR_DIFF_ELT
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) gcc_unreachable ()
+
+#undef ASM_OUTPUT_ADDR_VEC_ELT
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t.quad $L%d\n", (VALUE))
+
+#undef CASE_VECTOR_MODE
+#define CASE_VECTOR_MODE DImode
+#undef CASE_VECTOR_PC_RELATIVE
+
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN)	\
+{ ASM_OUTPUT_ALIGN (FILE, 3); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); }
+
+/* This says how to output assembler code to declare an                
+   uninitialized external linkage data object.  */ 
+
+#define COMMON_ASM_OP "\t.comm\t"
+
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+  vms_output_aligned_decl_common (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Control how constructors and destructors are emitted.  */
+#define TARGET_ASM_CONSTRUCTOR  vms_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   vms_asm_out_destructor
+
+#undef SDB_DEBUGGING_INFO
+#undef MIPS_DEBUGGING_INFO
+#undef DBX_DEBUGGING_INFO
+
+#define DWARF2_DEBUGGING_INFO 1
+#define VMS_DEBUGGING_INFO 1
+
+#define DWARF2_UNWIND_INFO 1
+
+#undef EH_RETURN_HANDLER_RTX
+#define EH_RETURN_HANDLER_RTX \
+  gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, 8))
+
+#define LINK_EH_SPEC "vms-dwarf2eh.o%s "
+#define LINK_GCC_C_SEQUENCE_SPEC "%G"
+
+#ifdef IN_LIBGCC2
+/* Get the definition for MD_FALLBACK_FRAME_STATE_FOR from a separate
+   file. This avoids having to recompile the world instead of libgcc only
+   when changes to this macro are exercised.  */
+
+#define MD_UNWIND_SUPPORT "config/alpha/vms-unwind.h"
+#endif
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  avms_asm_output_external (FILE, DECL, NAME)
+
+typedef struct crtl_name_spec
+{
+  const char *const name;
+  const char *deccname;
+  int referenced;
+} crtl_name_spec;
+
+#include "config/vms/vms-crtl.h"
+
+/* Alias CRTL names to 32/64bit DECCRTL functions. 
+   Fixme: This should do a binary search.  */
+#define DO_CRTL_NAMES                                                      \
+  do                                                                       \
+    {                                                                      \
+      int i;                                                               \
+      static crtl_name_spec vms_crtl_names[] = CRTL_NAMES;                 \
+      static int malloc64_init = 0;                                        \
+                                                                           \
+      if ((malloc64_init == 0) && TARGET_MALLOC64)          		   \
+	{                                                                  \
+          for (i=0; vms_crtl_names [i].name; i++)                          \
+            {                                                              \
+	      if (strcmp ("calloc", vms_crtl_names [i].name) == 0)         \
+                vms_crtl_names [i].deccname = "decc$_calloc64";            \
+              else                                                         \
+	      if (strcmp ("malloc", vms_crtl_names [i].name) == 0)         \
+                vms_crtl_names [i].deccname = "decc$_malloc64";            \
+              else                                                         \
+	      if (strcmp ("realloc", vms_crtl_names [i].name) == 0)        \
+                vms_crtl_names [i].deccname = "decc$_realloc64";           \
+              else                                                         \
+	      if (strcmp ("strdup", vms_crtl_names [i].name) == 0)         \
+                vms_crtl_names [i].deccname = "decc$_strdup64";            \
+	    }                                                              \
+            malloc64_init = 1;                                             \
+        }                                                                  \
+      for (i=0; vms_crtl_names [i].name; i++)                              \
+	if (!vms_crtl_names [i].referenced &&                              \
+	    (strcmp (name, vms_crtl_names [i].name) == 0))                 \
+	  {                                                                \
+	    fprintf (file, "\t%s=%s\n",                        \
+		     name, vms_crtl_names [i].deccname);                   \
+	    vms_crtl_names [i].referenced = 1;                             \
+	  }                                                                \
+    } while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    fprintf (FILE, "\t.align %d\n", LOG);
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION vms_asm_named_section
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+  do {	fprintf ((FILE), "\t.literals\n");				\
+	in_section = NULL;						\
+	fprintf ((FILE), "\t");						\
+	assemble_name (FILE, LABEL1);					\
+	fprintf (FILE, " = ");						\
+	assemble_name (FILE, LABEL2);					\
+	fprintf (FILE, "\n");						\
+  } while (0)
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE VMS_AND_DWARF2_DEBUG
+
+#define ASM_PN_FORMAT "%s___%lu"
+
+/* ??? VMS uses different linkage.  */
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+
+#undef ASM_SPEC
+#undef ASM_FINAL_SPEC
+
+/* The VMS convention is to always provide minimal debug info
+   for a traceback unless specifically overridden.  */
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS                  \
+do {                                                \
+  if (write_symbols == NO_DEBUG                     \
+      && debug_info_level == DINFO_LEVEL_NONE)      \
+    {                                               \
+      write_symbols = VMS_DEBUG;                    \
+      debug_info_level = DINFO_LEVEL_TERSE;         \
+    }                                               \
+} while (0)
+
+/* Link with vms-dwarf2.o if -g (except -g0). This causes the
+   VMS link to pull all the dwarf2 debug sections together.  */
+#undef LINK_SPEC
+#define LINK_SPEC "%{g:-g vms-dwarf2.o%s} %{g0} %{g1:-g1 vms-dwarf2.o%s} \
+%{g2:-g2 vms-dwarf2.o%s} %{g3:-g3 vms-dwarf2.o%s} %{shared} %{v} %{map}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{!shared:%{mvms-return-codes:vcrt0.o%s} %{!mvms-return-codes:pcrt0.o%s} \
+    crtbegin.o%s} \
+ %{!static:%{shared:crtbeginS.o%s}}"
+
+#define ENDFILE_SPEC \
+"%{!shared:crtend.o%s} %{!static:%{shared:crtendS.o%s}}"
+
+#define NAME__MAIN "__gccmain"
+#define SYMBOL__MAIN __gccmain
+
+#define INIT_SECTION_ASM_OP "\t.section LIB$INITIALIZE,GBL,NOWRT"
+
+#define LONGLONG_STANDALONE 1
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE vms_valid_pointer_mode
diff --git a/gcc/config/alpha/vms64.h b/gcc/config/alpha/vms64.h
new file mode 100644
index 000000000..07424d0f2
--- /dev/null
+++ b/gcc/config/alpha/vms64.h
@@ -0,0 +1,53 @@
+/* Output variables, constants and external declarations, for GNU compiler.
+   Copyright (C) 2001, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Douglas Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()                \
+    do {                                        \
+        builtin_define_std ("vms");             \
+        builtin_define_std ("VMS");             \
+        builtin_define ("__ALPHA");             \
+        builtin_assert ("system=vms");          \
+        builtin_define ("__IEEE_FLOAT");        \
+        builtin_define ("__LONG_POINTERS=1");   \
+    } while (0)
+
+#undef SUBTARGET_SWITCHES
+#define SUBTARGET_SWITCHES \
+  { "malloc64",    MASK_MALLOC64,     "Malloc data into P2 space" },
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_FPREGS | MASK_GAS | MASK_MALLOC64)
+
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 64
+
+#undef POINTER_SIZE
+#define POINTER_SIZE 64
+
+/* Eventhough pointers are 64bits, only 32bit ever remain significant in code
+   addresses.  */
+#define MASK_RETURN_ADDR (GEN_INT (0xffffffff))
+
+/* Defaults to "long int" */
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+
+# include "config/vms/vms-crtl-64.h"
diff --git a/gcc/config/alpha/x-alpha b/gcc/config/alpha/x-alpha
new file mode 100644
index 000000000..ecca70424
--- /dev/null
+++ b/gcc/config/alpha/x-alpha
@@ -0,0 +1,3 @@
+driver-alpha.o: $(srcdir)/config/alpha/driver-alpha.c \
+  $(CONFIG_H) $(SYSTEM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/alpha/x-osf b/gcc/config/alpha/x-osf
new file mode 100644
index 000000000..5bb9c9098
--- /dev/null
+++ b/gcc/config/alpha/x-osf
@@ -0,0 +1,4 @@
+host-osf.o : $(srcdir)/config/alpha/host-osf.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/alpha/host-osf.c
diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def
new file mode 100644
index 000000000..c2d2ceaf8
--- /dev/null
+++ b/gcc/config/arc/arc-modes.def
@@ -0,0 +1,24 @@
+/* Definitions of target machine for GNU compiler, Argonaut ARC cpu.
+   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Some insns set all condition code flags, some only set the ZNC flags, and
+   some only set the ZN flags.  */
+
+CC_MODE (CCZNC);
+CC_MODE (CCZN);
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
new file mode 100644
index 000000000..5550ebfbe
--- /dev/null
+++ b/gcc/config/arc/arc-protos.h
@@ -0,0 +1,63 @@
+/* Definitions of target machine for GNU compiler, Argonaut ARC cpu.
+   Copyright (C) 2000, 2004, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef RTX_CODE
+extern enum machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx);
+
+/* Define the function that build the compare insn for scc and bcc.  */
+extern struct rtx_def *gen_compare_reg (enum rtx_code, rtx, rtx);
+#endif
+
+/* Declarations for various fns used in the .md file.  */
+extern const char *output_shift (rtx *);
+
+extern int symbolic_operand (rtx, enum machine_mode);
+extern int arc_double_limm_p (rtx);
+extern int arc_eligible_for_epilogue_delay (rtx, int);
+extern void arc_initialize_trampoline (rtx, rtx, rtx);
+extern void arc_print_operand (FILE *, rtx, int);
+extern void arc_print_operand_address (FILE *, rtx);
+extern void arc_final_prescan_insn (rtx, rtx *, int);
+extern int call_address_operand (rtx, enum machine_mode);
+extern int call_operand (rtx, enum machine_mode);
+extern int symbolic_memory_operand (rtx, enum machine_mode);
+extern int short_immediate_operand (rtx, enum machine_mode);
+extern int long_immediate_operand (rtx, enum machine_mode);
+extern int long_immediate_loadstore_operand (rtx, enum machine_mode);
+extern int move_src_operand (rtx, enum machine_mode);
+extern int move_double_src_operand (rtx, enum machine_mode);
+extern int move_dest_operand (rtx, enum machine_mode);
+extern int load_update_operand (rtx, enum machine_mode);
+extern int store_update_operand (rtx, enum machine_mode);
+extern int nonvol_nonimm_operand (rtx, enum machine_mode);
+extern int const_sint32_operand (rtx, enum machine_mode);
+extern int const_uint32_operand (rtx, enum machine_mode);
+extern int proper_comparison_operator (rtx, enum machine_mode);
+extern int shift_operator (rtx, enum machine_mode);
+
+extern enum arc_function_type arc_compute_function_type (tree);
+
+
+extern unsigned int arc_compute_frame_size (int);
+extern void arc_save_restore (FILE *, const char *, unsigned int,
+			      unsigned int, const char *);
+extern int arc_delay_slots_for_epilogue (void);
+extern void arc_ccfsm_at_label (const char *, int);
+extern int arc_ccfsm_branch_deleted_p (void);
+extern void arc_ccfsm_record_branch_deleted (void);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
new file mode 100644
index 000000000..f1afda20f
--- /dev/null
+++ b/gcc/config/arc/arc.c
@@ -0,0 +1,2491 @@
+/* Subroutines used for code generation on the Argonaut ARC cpu.
+   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* ??? This is an old port, and is undoubtedly suffering from bit rot.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "df.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+
+/* Which cpu we're compiling for.  */
+int arc_cpu_type;
+
+/* Name of mangle string to add to symbols to separate code compiled for each
+   cpu (or NULL).  */
+const char *arc_mangle_cpu;
+
+/* Name of text, data, and rodata sections used in varasm.c.  */
+const char *arc_text_section;
+const char *arc_data_section;
+const char *arc_rodata_section;
+
+/* Array of valid operand punctuation characters.  */
+char arc_punct_chars[256];
+
+/* Variables used by arc_final_prescan_insn to implement conditional
+   execution.  */
+static int arc_ccfsm_state;
+static int arc_ccfsm_current_cc;
+static rtx arc_ccfsm_target_insn;
+static int arc_ccfsm_target_label;
+
+/* The maximum number of insns skipped which will be conditionalised if
+   possible.  */
+#define MAX_INSNS_SKIPPED 3
+
+/* A nop is needed between a 4 byte insn that sets the condition codes and
+   a branch that uses them (the same isn't true for an 8 byte insn that sets
+   the condition codes).  Set by arc_final_prescan_insn.  Used by
+   arc_print_operand.  */
+static int last_insn_set_cc_p;
+static int current_insn_set_cc_p;
+static bool arc_handle_option (size_t, const char *, int);
+static void record_cc_ref (rtx);
+static void arc_init_reg_tables (void);
+static int get_arc_condition_code (rtx);
+static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
+static bool arc_assemble_integer (rtx, unsigned int, int);
+static void arc_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void arc_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void arc_file_start (void);
+static void arc_internal_label (FILE *, const char *, unsigned long);
+static void arc_va_start (tree, rtx);
+static void arc_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					tree, int *, int);
+static bool arc_rtx_costs (rtx, int, int, int *, bool);
+static int arc_address_cost (rtx, bool);
+static void arc_external_libcall (rtx);
+static bool arc_return_in_memory (const_tree, const_tree);
+static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				   const_tree, bool);
+static rtx arc_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static void arc_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static unsigned int arc_function_arg_boundary (enum machine_mode, const_tree);
+static void arc_trampoline_init (rtx, tree, rtx);
+static void arc_option_override (void);
+static void arc_conditional_register_usage (void);
+
+
+/* ARC specific attributs.  */
+
+static const struct attribute_spec arc_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt", 1, 1, true,  false, false, arc_handle_interrupt_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER arc_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE arc_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE arc_output_function_epilogue
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START arc_file_start
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE arc_attribute_table
+#undef TARGET_ASM_INTERNAL_LABEL
+#define TARGET_ASM_INTERNAL_LABEL arc_internal_label
+#undef TARGET_ASM_EXTERNAL_LIBCALL
+#define TARGET_ASM_EXTERNAL_LIBCALL arc_external_libcall
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION arc_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arc_option_override
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS arc_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST arc_address_cost
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arc_return_in_memory
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arc_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY arc_function_arg_boundary
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START arc_va_start
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arc_trampoline_init
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+arc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mcpu_:
+      return strcmp (arg, "base") == 0 || ARC_EXTENSION_CPU (arg);
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.
+   These need to be done at start up.  It's convenient to do them here.  */
+
+static void
+arc_option_override (void)
+{
+  char *tmp;
+  
+  /* Set the pseudo-ops for the various standard sections.  */
+  arc_text_section = tmp = XNEWVEC (char, strlen (arc_text_string) + sizeof (ARC_SECTION_FORMAT) + 1);
+  sprintf (tmp, ARC_SECTION_FORMAT, arc_text_string);
+  arc_data_section = tmp = XNEWVEC (char, strlen (arc_data_string) + sizeof (ARC_SECTION_FORMAT) + 1);
+  sprintf (tmp, ARC_SECTION_FORMAT, arc_data_string);
+  arc_rodata_section = tmp = XNEWVEC (char, strlen (arc_rodata_string) + sizeof (ARC_SECTION_FORMAT) + 1);
+  sprintf (tmp, ARC_SECTION_FORMAT, arc_rodata_string);
+
+  arc_init_reg_tables ();
+
+  /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P.  */
+  memset (arc_punct_chars, 0, sizeof (arc_punct_chars));
+  arc_punct_chars['#'] = 1;
+  arc_punct_chars['*'] = 1;
+  arc_punct_chars['?'] = 1;
+  arc_punct_chars['!'] = 1;
+  arc_punct_chars['~'] = 1;
+}
+
+/* The condition codes of the ARC, and the inverse function.  */
+static const char *const arc_condition_codes[] =
+{
+  "al", 0, "eq", "ne", "p", "n", "c", "nc", "v", "nv",
+  "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
+};
+
+#define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
+
+/* Returns the index of the ARC condition code string in
+   `arc_condition_codes'.  COMPARISON should be an rtx like
+   `(eq (...) (...))'.  */
+
+static int
+get_arc_condition_code (rtx comparison)
+{
+  switch (GET_CODE (comparison))
+    {
+    case EQ : return 2;
+    case NE : return 3;
+    case GT : return 10;
+    case LE : return 11;
+    case GE : return 12;
+    case LT : return 13;
+    case GTU : return 14;
+    case LEU : return 15;
+    case LTU : return 6;
+    case GEU : return 7;
+    default : gcc_unreachable ();
+    }
+  /*NOTREACHED*/
+  return (42);
+}
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+
+enum machine_mode
+arc_select_cc_mode (enum rtx_code op,
+	            rtx x ATTRIBUTE_UNUSED,
+                    rtx y ATTRIBUTE_UNUSED)
+{
+  switch (op)
+    {
+    case EQ :
+    case NE :
+      return CCZNmode;
+    default :
+      switch (GET_CODE (x))
+	{
+	case AND :
+	case IOR :
+	case XOR :
+	case SIGN_EXTEND :
+	case ZERO_EXTEND :
+	  return CCZNmode;
+	case ASHIFT :
+	case ASHIFTRT :
+	case LSHIFTRT :
+	  return CCZNCmode;
+	default:
+	  break;
+	}
+    }
+  return CCmode;
+}
+
+/* Vectors to keep interesting information about registers where it can easily
+   be got.  We use to use the actual mode value as the bit number, but there
+   is (or may be) more than 32 modes now.  Instead we use two tables: one
+   indexed by hard register number, and one indexed by mode.  */
+
+/* The purpose of arc_mode_class is to shrink the range of modes so that
+   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
+   mapped into one arc_mode_class mode.  */
+
+enum arc_mode_class {
+  C_MODE,
+  S_MODE, D_MODE, T_MODE, O_MODE,
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE
+};
+
+/* Modes for condition codes.  */
+#define C_MODES (1 << (int) C_MODE)
+
+/* Modes for single-word and smaller quantities.  */
+#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-word and smaller quantities.  */
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Modes for quad-word and smaller quantities.  */
+#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+
+/* Value is 1 if register/mode pair is acceptable on arc.  */
+
+const unsigned int arc_hard_regno_mode_ok[] = {
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
+  D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+
+  /* ??? Leave these as S_MODES for now.  */
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES
+};
+
+unsigned int arc_mode_class [NUM_MACHINE_MODES];
+
+enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+static void
+arc_init_reg_tables (void)
+{
+  int i;
+
+  for (i = 0; i < NUM_MACHINE_MODES; i++)
+    {
+      switch (GET_MODE_CLASS (i))
+	{
+	case MODE_INT:
+	case MODE_PARTIAL_INT:
+	case MODE_COMPLEX_INT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    arc_mode_class[i] = 1 << (int) S_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    arc_mode_class[i] = 1 << (int) D_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    arc_mode_class[i] = 1 << (int) T_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    arc_mode_class[i] = 1 << (int) O_MODE;
+	  else 
+	    arc_mode_class[i] = 0;
+	  break;
+	case MODE_FLOAT:
+	case MODE_COMPLEX_FLOAT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    arc_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    arc_mode_class[i] = 1 << (int) DF_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    arc_mode_class[i] = 1 << (int) TF_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    arc_mode_class[i] = 1 << (int) OF_MODE;
+	  else 
+	    arc_mode_class[i] = 0;
+	  break;
+	case MODE_CC:
+	  arc_mode_class[i] = 1 << (int) C_MODE;
+	  break;
+	default:
+	  arc_mode_class[i] = 0;
+	  break;
+	}
+    }
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (i < 60)
+	arc_regno_reg_class[i] = GENERAL_REGS;
+      else if (i == 60)
+	arc_regno_reg_class[i] = LPCOUNT_REG;
+      else if (i == 61)
+	arc_regno_reg_class[i] = NO_REGS /* CC_REG: must be NO_REGS */;
+      else
+	arc_regno_reg_class[i] = NO_REGS;
+    }
+}
+
+/* ARC specific attribute support.
+
+   The ARC has these attributes:
+   interrupt - for interrupt functions
+*/
+
+/* Handle an "interrupt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+arc_handle_interrupt_attribute (tree *node ATTRIBUTE_UNUSED,
+                                tree name,
+                                tree args,
+                                int flags ATTRIBUTE_UNUSED,
+                                bool *no_add_attrs)
+{
+  tree value = TREE_VALUE (args);
+
+  if (TREE_CODE (value) != STRING_CST)
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not a string constant",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
+	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
+    {
+      warning (OPT_Wattributes,
+	       "argument of %qE attribute is not \"ilink1\" or \"ilink2\"",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+
+/* Acceptable arguments to the call insn.  */
+
+int
+call_address_operand (rtx op, enum machine_mode mode)
+{
+  return (symbolic_operand (op, mode)
+	  || (GET_CODE (op) == CONST_INT && LEGITIMATE_CONSTANT_P (op))
+	  || (GET_CODE (op) == REG));
+}
+
+int
+call_operand (rtx op, enum machine_mode mode)
+{
+  if (GET_CODE (op) != MEM)
+    return 0;
+  op = XEXP (op, 0);
+  return call_address_operand (op, mode);
+}
+
+/* Returns 1 if OP is a symbol reference.  */
+
+int
+symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST :
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+/* Return truth value of statement that OP is a symbolic memory
+   operand of mode MODE.  */
+
+int
+symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (GET_CODE (op) != MEM)
+    return 0;
+  op = XEXP (op, 0);
+  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
+	  || GET_CODE (op) == LABEL_REF);
+}
+
+/* Return true if OP is a short immediate (shimm) value.  */
+
+int
+short_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  return SMALL_INT (INTVAL (op));
+}
+
+/* Return true if OP will require a long immediate (limm) value.
+   This is currently only used when calculating length attributes.  */
+
+int
+long_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      return !SMALL_INT (INTVAL (op));
+    case CONST_DOUBLE :
+      /* These can happen because large unsigned 32-bit constants are
+	 represented this way (the multiplication patterns can cause these
+	 to be generated).  They also occur for SFmode values.  */
+      return 1;
+    default:
+      break;
+    }
+  return 0;
+}
+
+/* Return true if OP is a MEM that when used as a load or store address will
+   require an 8 byte insn.
+   Load and store instructions don't allow the same possibilities but they're
+   similar enough that this one function will do.
+   This is currently only used when calculating length attributes.  */
+
+int
+long_immediate_loadstore_operand (rtx op,
+                                  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  op = XEXP (op, 0);
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      /* This must be handled as "st c,[limm]".  Ditto for load.
+	 Technically, the assembler could translate some possibilities to
+	 "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't
+	 assume that it does.  */
+      return 1;
+    case CONST_DOUBLE :
+      /* These can happen because large unsigned 32-bit constants are
+	 represented this way (the multiplication patterns can cause these
+	 to be generated).  They also occur for SFmode values.  */
+      return 1;
+    case REG :
+      return 0;
+    case PLUS :
+      if (GET_CODE (XEXP (op, 1)) == CONST_INT
+	  && !SMALL_INT (INTVAL (XEXP (op, 1))))
+	return 1;
+      return 0;
+    default:
+      break;
+    }
+  return 0;
+}
+
+/* Return true if OP is an acceptable argument for a single word
+   move source.  */
+
+int
+move_src_operand (rtx op, enum machine_mode mode)
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      return (LARGE_INT (INTVAL (op)));
+    case CONST_DOUBLE :
+      /* We can handle DImode integer constants in SImode if the value
+	 (signed or unsigned) will fit in 32 bits.  This is needed because
+	 large unsigned 32-bit constants are represented as CONST_DOUBLEs.  */
+      if (mode == SImode)
+	return arc_double_limm_p (op);
+      /* We can handle 32-bit floating point constants.  */
+      if (mode == SFmode)
+	return GET_MODE (op) == SFmode;
+      return 0;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+}
+
+/* Return true if OP is an acceptable argument for a double word
+   move source.  */
+
+int
+move_double_src_operand (rtx op, enum machine_mode mode)
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return move_double_src_operand (SUBREG_REG (op), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      /* Disallow auto inc/dec for now.  */
+      if (GET_CODE (XEXP (op, 0)) == PRE_DEC
+	  || GET_CODE (XEXP (op, 0)) == PRE_INC)
+	return 0;
+      return address_operand (XEXP (op, 0), mode);
+    case CONST_INT :
+    case CONST_DOUBLE :
+      return 1;
+    default :
+      return 0;
+    }
+}
+
+/* Return true if OP is an acceptable argument for a move destination.  */
+
+int
+move_dest_operand (rtx op, enum machine_mode mode)
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+}
+
+/* Return true if OP is valid load with update operand.  */
+
+int
+load_update_operand (rtx op, enum machine_mode mode)
+{
+  if (GET_CODE (op) != MEM
+      || GET_MODE (op) != mode)
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS
+      || GET_MODE (op) != Pmode
+      || !register_operand (XEXP (op, 0), Pmode)
+      || !nonmemory_operand (XEXP (op, 1), Pmode))
+    return 0;
+  return 1;
+}
+
+/* Return true if OP is valid store with update operand.  */
+
+int
+store_update_operand (rtx op, enum machine_mode mode)
+{
+  if (GET_CODE (op) != MEM
+      || GET_MODE (op) != mode)
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS
+      || GET_MODE (op) != Pmode
+      || !register_operand (XEXP (op, 0), Pmode)
+      || !(GET_CODE (XEXP (op, 1)) == CONST_INT
+	   && SMALL_INT (INTVAL (XEXP (op, 1)))))
+    return 0;
+  return 1;
+}
+
+/* Return true if OP is a non-volatile non-immediate operand.
+   Volatile memory refs require a special "cache-bypass" instruction
+   and only the standard movXX patterns are set up to handle them.  */
+
+int
+nonvol_nonimm_operand (rtx op, enum machine_mode mode)
+{
+  if (GET_CODE (op) == MEM && MEM_VOLATILE_P (op))
+    return 0;
+  return nonimmediate_operand (op, mode);
+}
+
+/* Accept integer operands in the range -0x80000000..0x7fffffff.  We have
+   to check the range carefully since this predicate is used in DImode
+   contexts.  */
+
+int
+const_sint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= (-0x7fffffff - 1) && INTVAL (op) <= 0x7fffffff));
+}
+
+/* Accept integer operands in the range 0..0xffffffff.  We have to check the
+   range carefully since this predicate is used in DImode contexts.  Also, we
+   need some extra crud to make it work when hosted on 64-bit machines.  */
+
+int
+const_uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+#if HOST_BITS_PER_WIDE_INT > 32
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= 0 && INTVAL (op) <= 0xffffffffL));
+#else
+  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0)
+	  || (GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_HIGH (op) == 0));
+#endif
+}
+
+/* Return 1 if OP is a comparison operator valid for the mode of CC.
+   This allows the use of MATCH_OPERATOR to recognize all the branch insns.
+
+   Some insns only set a few bits in the condition code.  So only allow those
+   comparisons that use the bits that are valid.  */
+
+int
+proper_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code;
+  if (!COMPARISON_P (op))
+    return 0;
+
+  code = GET_CODE (op);
+  if (GET_MODE (XEXP (op, 0)) == CCZNmode)
+    return (code == EQ || code == NE);
+  if (GET_MODE (XEXP (op, 0)) == CCZNCmode)
+    return (code == EQ || code == NE
+	    || code == LTU || code == GEU || code == GTU || code == LEU);
+  return 1;
+}
+
+/* Misc. utilities.  */
+
+/* X and Y are two things to compare using CODE.  Return the rtx
+   for the cc reg in the proper mode.  */
+
+rtx
+gen_compare_reg (enum rtx_code code, rtx x, rtx y)
+{
+  enum machine_mode mode = SELECT_CC_MODE (code, x, y);
+  return gen_rtx_REG (mode, 61);
+}
+
+/* Return 1 if VALUE, a const_double, will fit in a limm (4 byte number).
+   We assume the value can be either signed or unsigned.  */
+
+int
+arc_double_limm_p (rtx value)
+{
+  HOST_WIDE_INT low, high;
+
+  gcc_assert (GET_CODE (value) == CONST_DOUBLE);
+
+  low = CONST_DOUBLE_LOW (value);
+  high = CONST_DOUBLE_HIGH (value);
+
+  if (low & 0x80000000)
+    {
+      return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0)
+	      || (((low & - (unsigned HOST_WIDE_INT) 0x80000000)
+		   == - (unsigned HOST_WIDE_INT) 0x80000000)
+		  && high == -1));
+    }
+  else
+    {
+      return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
+    }
+}
+
+/* Do any needed setup for a variadic function.  For the ARC, we must
+   create a register parameter block, and then copy any anonymous arguments
+   in registers to memory.
+
+   CUM has not been updated for the last named argument which has type TYPE
+   and mode MODE, and we rely on this fact.
+
+   We do things a little weird here.  We're supposed to only allocate space
+   for the anonymous arguments.  However we need to keep the stack eight byte
+   aligned.  So we round the space up if necessary, and leave it to va_start
+   to compensate.  */
+
+static void
+arc_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+                            enum machine_mode mode,
+                            tree type ATTRIBUTE_UNUSED,
+                            int *pretend_size,
+                            int no_rtl)
+{
+  int first_anon_arg;
+
+  /* All BLKmode values are passed by reference.  */
+  gcc_assert (mode != BLKmode);
+
+  first_anon_arg = *cum + ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
+			   / UNITS_PER_WORD);
+
+  if (first_anon_arg < MAX_ARC_PARM_REGS && !no_rtl)
+    {
+      /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
+      int first_reg_offset = first_anon_arg;
+      /* Size in words to "pretend" allocate.  */
+      int size = MAX_ARC_PARM_REGS - first_reg_offset;
+      /* Extra slop to keep stack eight byte aligned.  */
+      int align_slop = size & 1;
+      rtx regblock;
+
+      regblock = gen_rtx_MEM (BLKmode,
+			      plus_constant (arg_pointer_rtx,
+					     FIRST_PARM_OFFSET (0)
+					     + align_slop * UNITS_PER_WORD));
+      set_mem_alias_set (regblock, get_varargs_alias_set ());
+      set_mem_align (regblock, BITS_PER_WORD);
+      move_block_from_reg (first_reg_offset, regblock,
+			   MAX_ARC_PARM_REGS - first_reg_offset);
+
+      *pretend_size = ((MAX_ARC_PARM_REGS - first_reg_offset + align_slop)
+		       * UNITS_PER_WORD);
+    }
+}
+
+/* Cost functions.  */
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+	       bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      /* Small integers are as cheap as registers.  4 byte values can
+	 be fetched as immediate constants - let's give that the cost
+	 of an extra insn.  */
+    case CONST_INT:
+      if (SMALL_INT (INTVAL (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST_DOUBLE:
+      {
+        rtx high, low;
+        split_double (x, &high, &low);
+	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
+				+ !SMALL_INT (INTVAL (low)));
+	return true;
+      }
+
+    /* Encourage synth_mult to find a synthetic multiply when reasonable.
+       If we need more than 12 insns to do a multiply, then go out-of-line,
+       since the call overhead will be < 10% of the cost of the multiply.  */
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TARGET_SHIFTER)
+        *total = COSTS_N_INSNS (1);
+      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+        *total = COSTS_N_INSNS (16);
+      else
+        *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   If ADDR is not a valid address, its cost is irrelevant.  */
+
+static int
+arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      return 1;
+
+    case LABEL_REF :
+    case SYMBOL_REF :
+    case CONST :
+      return 2;
+
+    case PLUS :
+      {
+	register rtx plus0 = XEXP (addr, 0);
+	register rtx plus1 = XEXP (addr, 1);
+
+	if (GET_CODE (plus0) != REG)
+	  break;
+
+	switch (GET_CODE (plus1))
+	  {
+	  case CONST_INT :
+	    return SMALL_INT (INTVAL (plus1)) ? 1 : 2;
+	  case CONST :
+	  case SYMBOL_REF :
+	  case LABEL_REF :
+	    return 2;
+	  default:
+	    break;
+	  }
+	break;
+      }
+    default:
+      break;
+    }
+
+  return 4;
+}
+
+/* Function prologue/epilogue handlers.  */
+
+/* ARC stack frames look like:
+
+             Before call                       After call
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+   high |  local variables,     |       |  local variables,     |
+   mem  |  reg save area, etc.  |       |  reg save area, etc.  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  arguments on stack.  |       |  arguments on stack.  |
+        |                       |       |                       |
+ SP+16->+-----------------------+FP+48->+-----------------------+
+        | 4 word save area for  |       |  reg parm save area,  |
+        | return addr, prev %fp |       |  only created for     |    
+  SP+0->+-----------------------+       |  variable argument    |    
+                                        |  functions            |    
+                                 FP+16->+-----------------------+    
+                                        | 4 word save area for  |    
+                                        | return addr, prev %fp |    
+                                  FP+0->+-----------------------+    
+                                        |                       |    
+                                        |  local variables      |    
+                                        |                       |    
+                                        +-----------------------+    
+                                        |                       |    
+                                        |  register save area   |    
+                                        |                       |    
+                                        +-----------------------+    
+                                        |                       |    
+                                        |  alloca allocations   |    
+                                        |                       |    
+                                        +-----------------------+    
+                                        |                       |    
+                                        |  arguments on stack   |    
+                                        |                       |    
+                                 SP+16->+-----------------------+
+   low                                  | 4 word save area for  |    
+   memory                               | return addr, prev %fp |    
+                                  SP+0->+-----------------------+    
+
+Notes:
+1) The "reg parm save area" does not exist for non variable argument fns.
+   The "reg parm save area" can be eliminated completely if we created our
+   own va-arc.h, but that has tradeoffs as well (so it's not done).  */
+
+/* Structure to be filled in by arc_compute_frame_size with register
+   save masks, and offsets for the current function.  */
+struct arc_frame_info
+{
+  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
+  unsigned int extra_size;	/* # bytes of extra stuff.  */
+  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # bytes needed to store regs.  */
+  unsigned int var_size;	/* # bytes that variables take up.  */
+  unsigned int reg_offset;	/* Offset from new sp to store regs.  */
+  unsigned int gmask;		/* Mask of saved gp registers.  */
+  int          initialized;	/* Nonzero if frame size already calculated.  */
+};
+
+/* Current frame information calculated by arc_compute_frame_size.  */
+static struct arc_frame_info current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+static struct arc_frame_info zero_frame_info;
+
+/* Type of function DECL.
+
+   The result is cached.  To reset the cache at the end of a function,
+   call with DECL = NULL_TREE.  */
+
+enum arc_function_type
+arc_compute_function_type (tree decl)
+{
+  tree a;
+  /* Cached value.  */
+  static enum arc_function_type fn_type = ARC_FUNCTION_UNKNOWN;
+  /* Last function we were called for.  */
+  static tree last_fn = NULL_TREE;
+
+  /* Resetting the cached value?  */
+  if (decl == NULL_TREE)
+    {
+      fn_type = ARC_FUNCTION_UNKNOWN;
+      last_fn = NULL_TREE;
+      return fn_type;
+    }
+
+  if (decl == last_fn && fn_type != ARC_FUNCTION_UNKNOWN)
+    return fn_type;
+
+  /* Assume we have a normal function (not an interrupt handler).  */
+  fn_type = ARC_FUNCTION_NORMAL;
+
+  /* Now see if this is an interrupt handler.  */
+  for (a = DECL_ATTRIBUTES (current_function_decl);
+       a;
+       a = TREE_CHAIN (a))
+    {
+      tree name = TREE_PURPOSE (a), args = TREE_VALUE (a);
+
+      if (name == get_identifier ("__interrupt__")
+	  && list_length (args) == 1
+	  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
+	{
+	  tree value = TREE_VALUE (args);
+
+	  if (!strcmp (TREE_STRING_POINTER (value), "ilink1"))
+	    fn_type = ARC_FUNCTION_ILINK1;
+	  else if (!strcmp (TREE_STRING_POINTER (value), "ilink2"))
+	    fn_type = ARC_FUNCTION_ILINK2;
+	  else
+	    gcc_unreachable ();
+	  break;
+	}
+    }
+
+  last_fn = decl;
+  return fn_type;
+}
+
+#define ILINK1_REGNUM 29
+#define ILINK2_REGNUM 30
+#define RETURN_ADDR_REGNUM 31
+#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
+#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.  */
+#define MUST_SAVE_REGISTER(regno, interrupt_p) \
+((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
+ && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p)))
+
+#define MUST_SAVE_RETURN_ADDR (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   SIZE is the size needed for local variables.  */
+
+unsigned int
+arc_compute_frame_size (int size /* # of var. bytes allocated.  */)
+{
+  int regno;
+  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
+  unsigned int reg_size, reg_offset;
+  unsigned int gmask;
+  enum arc_function_type fn_type;
+  int interrupt_p;
+
+  var_size	= size;
+  args_size	= crtl->outgoing_args_size;
+  pretend_size	= crtl->args.pretend_args_size;
+  extra_size	= FIRST_PARM_OFFSET (0);
+  total_size	= extra_size + pretend_size + args_size + var_size;
+  reg_offset	= FIRST_PARM_OFFSET(0) + crtl->outgoing_args_size;
+  reg_size	= 0;
+  gmask		= 0;
+
+  /* See if this is an interrupt handler.  Call used registers must be saved
+     for them too.  */
+  fn_type = arc_compute_function_type (current_function_decl);
+  interrupt_p = ARC_INTERRUPT_P (fn_type);
+
+  /* Calculate space needed for registers.
+     ??? We ignore the extension registers for now.  */
+
+  for (regno = 0; regno <= 31; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno, interrupt_p))
+	{
+	  reg_size += UNITS_PER_WORD;
+	  gmask |= 1 << regno;
+	}
+    }
+
+  total_size += reg_size;
+
+  /* If the only space to allocate is the fp/blink save area this is an
+     empty frame.  However, if we'll be making a function call we need to
+     allocate a stack frame for our callee's fp/blink save area.  */
+  if (total_size == extra_size
+      && !MUST_SAVE_RETURN_ADDR)
+    total_size = extra_size = 0;
+
+  total_size = ARC_STACK_ALIGN (total_size);
+
+  /* Save computed information.  */
+  current_frame_info.total_size   = total_size;
+  current_frame_info.extra_size   = extra_size;
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.var_size     = var_size;
+  current_frame_info.args_size    = args_size;
+  current_frame_info.reg_size	  = reg_size;
+  current_frame_info.reg_offset	  = reg_offset;
+  current_frame_info.gmask	  = gmask;
+  current_frame_info.initialized  = reload_completed;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Common code to save/restore registers.  */
+
+void
+arc_save_restore (FILE *file,
+                  const char *base_reg,
+                  unsigned int offset,
+                  unsigned int gmask,
+                  const char *op)
+{
+  int regno;
+
+  if (gmask == 0)
+    return;
+
+  for (regno = 0; regno <= 31; regno++)
+    {
+      if ((gmask & (1L << regno)) != 0)
+	{
+	  fprintf (file, "\t%s %s,[%s,%d]\n",
+		     op, reg_names[regno], base_reg, offset);
+	  offset += UNITS_PER_WORD;
+	}
+    }
+}
+
+/* Target hook to assemble an integer object.  The ARC version needs to
+   emit a special directive for references to labels and function
+   symbols.  */
+
+static bool
+arc_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == UNITS_PER_WORD && aligned_p
+      && ((GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x))
+	  || GET_CODE (x) == LABEL_REF))
+    {
+      fputs ("\t.word\t%st(", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")\n", asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Set up the stack and frame pointer (if desired) for the function.  */
+
+static void
+arc_output_function_prologue (FILE *file, HOST_WIDE_INT size)
+{
+  const char *sp_str = reg_names[STACK_POINTER_REGNUM];
+  const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
+  unsigned int gmask = current_frame_info.gmask;
+  enum arc_function_type fn_type = arc_compute_function_type (current_function_decl);
+
+  /* If this is an interrupt handler, set up our stack frame.
+     ??? Optimize later.  */
+  if (ARC_INTERRUPT_P (fn_type))
+    {
+      fprintf (file, "\t%s interrupt handler\n",
+	       ASM_COMMENT_START);
+      fprintf (file, "\tsub %s,%s,16\n", sp_str, sp_str);
+    }
+
+  /* This is only for the human reader.  */
+  fprintf (file, "\t%s BEGIN PROLOGUE %s vars= %d, regs= %d, args= %d, extra= %d\n",
+	   ASM_COMMENT_START, ASM_COMMENT_START,
+	   current_frame_info.var_size,
+	   current_frame_info.reg_size / 4,
+	   current_frame_info.args_size,
+	   current_frame_info.extra_size);
+
+  size = ARC_STACK_ALIGN (size);
+  size = (! current_frame_info.initialized
+	   ? arc_compute_frame_size (size)
+	   : current_frame_info.total_size);
+
+  /* These cases shouldn't happen.  Catch them now.  */
+  gcc_assert (size || !gmask);
+
+  /* Allocate space for register arguments if this is a variadic function.  */
+  if (current_frame_info.pretend_size != 0)
+    fprintf (file, "\tsub %s,%s,%d\n",
+	     sp_str, sp_str, current_frame_info.pretend_size);
+
+  /* The home-grown ABI says link register is saved first.  */
+  if (MUST_SAVE_RETURN_ADDR)
+    fprintf (file, "\tst %s,[%s,%d]\n",
+	     reg_names[RETURN_ADDR_REGNUM], sp_str, UNITS_PER_WORD);
+
+  /* Set up the previous frame pointer next (if we need to).  */
+  if (frame_pointer_needed)
+    {
+      fprintf (file, "\tst %s,[%s]\n", fp_str, sp_str);
+      fprintf (file, "\tmov %s,%s\n", fp_str, sp_str);
+    }
+
+  /* ??? We don't handle the case where the saved regs are more than 252
+     bytes away from sp.  This can be handled by decrementing sp once, saving
+     the regs, and then decrementing it again.  The epilogue doesn't have this
+     problem as the `ld' insn takes reg+limm values (though it would be more
+     efficient to avoid reg+limm).  */
+
+  /* Allocate the stack frame.  */
+  if (size - current_frame_info.pretend_size > 0)
+    fprintf (file, "\tsub %s,%s," HOST_WIDE_INT_PRINT_DEC "\n",
+	     sp_str, sp_str, size - current_frame_info.pretend_size);
+
+  /* Save any needed call-saved regs (and call-used if this is an
+     interrupt handler).  */
+  arc_save_restore (file, sp_str, current_frame_info.reg_offset,
+		    /* The zeroing of these two bits is unnecessary,
+		       but leave this in for clarity.  */
+		    gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
+		    "st");
+
+  fprintf (file, "\t%s END PROLOGUE\n", ASM_COMMENT_START);
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+
+static void
+arc_output_function_epilogue (FILE *file, HOST_WIDE_INT size)
+{
+  rtx epilogue_delay = crtl->epilogue_delay_list;
+  int noepilogue = FALSE;
+  enum arc_function_type fn_type = arc_compute_function_type (current_function_decl);
+
+  /* This is only for the human reader.  */
+  fprintf (file, "\t%s EPILOGUE\n", ASM_COMMENT_START);
+
+  size = ARC_STACK_ALIGN (size);
+  size = (!current_frame_info.initialized
+	   ? arc_compute_frame_size (size)
+	   : current_frame_info.total_size);
+
+  if (size == 0 && epilogue_delay == 0)
+    {
+      rtx insn = get_last_insn ();
+
+      /* If the last insn was a BARRIER, we don't have to write any code
+	 because a jump (aka return) was put there.  */
+      if (GET_CODE (insn) == NOTE)
+	insn = prev_nonnote_insn (insn);
+      if (insn && GET_CODE (insn) == BARRIER)
+	noepilogue = TRUE;
+    }
+
+  if (!noepilogue)
+    {
+      unsigned int pretend_size = current_frame_info.pretend_size;
+      unsigned int frame_size = size - pretend_size;
+      int restored, fp_restored_p;
+      int can_trust_sp_p = !cfun->calls_alloca;
+      const char *sp_str = reg_names[STACK_POINTER_REGNUM];
+      const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
+
+      /* ??? There are lots of optimizations that can be done here.
+	 EG: Use fp to restore regs if it's closer.
+	 Maybe in time we'll do them all.  For now, always restore regs from
+	 sp, but don't restore sp if we don't have to.  */
+
+      if (!can_trust_sp_p)
+	{
+	  gcc_assert (frame_pointer_needed);
+	  fprintf (file,"\tsub %s,%s,%d\t\t%s sp not trusted here\n",
+		   sp_str, fp_str, frame_size, ASM_COMMENT_START);
+	}
+
+      /* Restore any saved registers.  */
+      arc_save_restore (file, sp_str, current_frame_info.reg_offset,
+			/* The zeroing of these two bits is unnecessary,
+			   but leave this in for clarity.  */
+			current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
+			"ld");
+
+      if (MUST_SAVE_RETURN_ADDR)
+	fprintf (file, "\tld %s,[%s,%d]\n",
+		 reg_names[RETURN_ADDR_REGNUM],
+		 frame_pointer_needed ? fp_str : sp_str,
+		 UNITS_PER_WORD + (frame_pointer_needed ? 0 : frame_size));
+
+      /* Keep track of how much of the stack pointer we've restored.
+	 It makes the following a lot more readable.  */
+      restored = 0;
+      fp_restored_p = 0;
+
+      /* We try to emit the epilogue delay slot insn right after the load
+	 of the return address register so that it can execute with the
+	 stack intact.  Secondly, loads are delayed.  */
+      /* ??? If stack intactness is important, always emit now.  */
+      if (MUST_SAVE_RETURN_ADDR && epilogue_delay != NULL_RTX)
+	{
+	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL);
+	  epilogue_delay = NULL_RTX;
+	}
+
+      if (frame_pointer_needed)
+	{
+	  /* Try to restore the frame pointer in the delay slot.  We can't,
+	     however, if any of these is true.  */
+	  if (epilogue_delay != NULL_RTX
+	      || !SMALL_INT (frame_size)
+	      || pretend_size
+	      || ARC_INTERRUPT_P (fn_type))
+	    {
+	      /* Note that we restore fp and sp here!  */
+	      fprintf (file, "\tld.a %s,[%s,%d]\n", fp_str, sp_str, frame_size);
+	      restored += frame_size;
+	      fp_restored_p = 1;
+	    }
+	}
+      else if (!SMALL_INT (size /* frame_size + pretend_size */)
+	       || ARC_INTERRUPT_P (fn_type))
+	{
+	  fprintf (file, "\tadd %s,%s,%d\n", sp_str, sp_str, frame_size);
+	  restored += frame_size;
+	}
+
+      /* These must be done before the return insn because the delay slot
+	 does the final stack restore.  */
+      if (ARC_INTERRUPT_P (fn_type))
+	{
+	  if (epilogue_delay)
+	    {
+	      final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL);
+	    }
+	}
+
+      /* Emit the return instruction.  */
+      {
+	static const int regs[4] = {
+	  0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM
+	};
+
+	/* Update the flags, if returning from an interrupt handler. */
+	if (ARC_INTERRUPT_P (fn_type))
+	  fprintf (file, "\tj.d.f %s\n", reg_names[regs[fn_type]]);
+	else
+	  fprintf (file, "\tj.d %s\n", reg_names[regs[fn_type]]);
+	}
+
+      /* If the only register saved is the return address, we need a
+	 nop, unless we have an instruction to put into it.  Otherwise
+	 we don't since reloading multiple registers doesn't reference
+	 the register being loaded.  */
+
+      if (ARC_INTERRUPT_P (fn_type))
+	fprintf (file, "\tadd %s,%s,16\n", sp_str, sp_str);
+      else if (epilogue_delay != NULL_RTX)
+	{
+	  gcc_assert (!frame_pointer_needed || fp_restored_p);
+	  gcc_assert (restored >= size);
+	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL);
+	}
+      else if (frame_pointer_needed && !fp_restored_p)
+	{
+	  gcc_assert (SMALL_INT (frame_size));
+	  /* Note that we restore fp and sp here!  */
+	  fprintf (file, "\tld.a %s,[%s,%d]\n", fp_str, sp_str, frame_size);
+	}
+      else if (restored < size)
+	{
+	  gcc_assert (SMALL_INT (size - restored));
+	  fprintf (file, "\tadd %s,%s," HOST_WIDE_INT_PRINT_DEC "\n",
+		   sp_str, sp_str, size - restored);
+	}
+      else
+	fprintf (file, "\tnop\n");
+    }
+
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+  arc_compute_function_type (NULL_TREE);
+}
+
+/* Define the number of delay slots needed for the function epilogue.
+
+   Interrupt handlers can't have any epilogue delay slots (it's always needed
+   for something else, I think).  For normal functions, we have to worry about
+   using call-saved regs as they'll be restored before the delay slot insn.
+   Functions with non-empty frames already have enough choices for the epilogue
+   delay slot so for now we only consider functions with empty frames.  */
+
+int
+arc_delay_slots_for_epilogue (void)
+{
+  if (arc_compute_function_type (current_function_decl) != ARC_FUNCTION_NORMAL)
+    return 0;
+  if (!current_frame_info.initialized)
+    (void) arc_compute_frame_size (get_frame_size ());
+  if (current_frame_info.total_size == 0)
+    return 1;
+  return 0;
+}
+
+/* Return true if TRIAL is a valid insn for the epilogue delay slot.
+   Any single length instruction which doesn't reference the stack or frame
+   pointer or any call-saved register is OK.  SLOT will always be 0.  */
+
+int
+arc_eligible_for_epilogue_delay (rtx trial, int slot)
+{
+  gcc_assert (!slot);
+
+  if (get_attr_length (trial) == 1
+      /* If registers where saved, presumably there's more than enough
+	 possibilities for the delay slot.  The alternative is something
+	 more complicated (of course, if we expanded the epilogue as rtl
+	 this problem would go away).  */
+      /* ??? Note that this will always be true since only functions with
+	 empty frames have epilogue delay slots.  See
+	 arc_delay_slots_for_epilogue.  */
+      && current_frame_info.gmask == 0
+      && ! reg_mentioned_p (stack_pointer_rtx, PATTERN (trial))
+      && ! reg_mentioned_p (frame_pointer_rtx, PATTERN (trial)))
+    return 1;
+  return 0;
+}
+
+/* Return true if OP is a shift operator.  */
+
+int
+shift_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (op))
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ASHIFT:
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+/* Output the assembler code for doing a shift.
+   We go to a bit of trouble to generate efficient code as the ARC only has
+   single bit shifts.  This is taken from the h8300 port.  We only have one
+   mode of shifting and can't access individual bytes like the h8300 can, so
+   this is greatly simplified (at the expense of not generating hyper-
+   efficient code).
+
+   This function is not used if the variable shift insns are present.  */
+
+/* ??? We assume the output operand is the same as operand 1.
+   This can be optimized (deleted) in the case of 1 bit shifts.  */
+/* ??? We use the loop register here.  We don't use it elsewhere (yet) and
+   using it here will give us a chance to play with it.  */
+
+const char *
+output_shift (rtx *operands)
+{
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  const char *shift_one;
+
+  gcc_assert (mode == SImode);
+
+  switch (code)
+    {
+    case ASHIFT:   shift_one = "asl %0,%0"; break;
+    case ASHIFTRT: shift_one = "asr %0,%0"; break;
+    case LSHIFTRT: shift_one = "lsr %0,%0"; break;
+    default:       gcc_unreachable ();
+    }
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      if (optimize)
+	{
+	  output_asm_insn ("sub.f 0,%2,0", operands);
+      	  output_asm_insn ("mov lp_count,%2", operands);
+	  output_asm_insn ("bz 2f", operands);
+	}
+      else
+	output_asm_insn ("mov %4,%2", operands);
+      goto shiftloop;
+    }
+  else
+    {
+      int n;
+
+      /* If the count is negative, make it 0.  */
+      n = INTVAL (operands[2]);
+      if (n < 0)
+	n = 0;
+      /* If the count is too big, truncate it.
+         ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+	 do the intuitive thing.  */
+      else if (n > GET_MODE_BITSIZE (mode))
+	n = GET_MODE_BITSIZE (mode);
+
+      /* First see if we can do them inline.  */
+      if (n <= 8)
+	{
+	  while (--n >= 0)
+	    output_asm_insn (shift_one, operands);
+	}
+      /* See if we can use a rotate/and.  */
+      else if (n == BITS_PER_WORD - 1)
+	{
+	  switch (code)
+	    {
+	    case ASHIFT :
+	      output_asm_insn ("and %0,%0,1\n\tror %0,%0", operands);
+	      break;
+	    case ASHIFTRT :
+	      /* The ARC doesn't have a rol insn.  Use something else.  */
+	      output_asm_insn ("asl.f 0,%0\n\tsbc %0,0,0", operands);
+	      break;
+	    case LSHIFTRT :
+	      /* The ARC doesn't have a rol insn.  Use something else.  */
+	      output_asm_insn ("asl.f 0,%0\n\tadc %0,0,0", operands);
+	      break;
+	    default:
+	      break;
+	    }
+	}
+      /* Must loop.  */
+      else
+	{
+	  char buf[100];
+
+	  if (optimize)
+	    output_asm_insn ("mov lp_count,%c2", operands);
+	  else
+	    output_asm_insn ("mov %4,%c2", operands);
+	shiftloop:
+	  if (optimize)
+	    {
+	      if (flag_pic)
+		sprintf (buf, "lr %%4,[status]\n\tadd %%4,%%4,6\t%s single insn loop start",
+			 ASM_COMMENT_START);
+	      else
+		sprintf (buf, "mov %%4,%%%%st(1f)\t%s (single insn loop start) >> 2",
+			 ASM_COMMENT_START);
+	      output_asm_insn (buf, operands);
+	      output_asm_insn ("sr %4,[lp_start]", operands);
+	      output_asm_insn ("add %4,%4,1", operands);
+	      output_asm_insn ("sr %4,[lp_end]", operands);
+	      output_asm_insn ("nop\n\tnop", operands);
+	      if (flag_pic)
+		fprintf (asm_out_file, "\t%s single insn loop\n",
+			 ASM_COMMENT_START);
+	      else
+		fprintf (asm_out_file, "1:\t%s single insn loop\n",
+			 ASM_COMMENT_START);
+	      output_asm_insn (shift_one, operands);
+	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
+		       ASM_COMMENT_START);
+	    }
+	  else 
+	    {
+	      fprintf (asm_out_file, "1:\t%s begin shift loop\n",
+		       ASM_COMMENT_START);
+	      output_asm_insn ("sub.f %4,%4,1", operands);
+	      output_asm_insn ("nop", operands);
+	      output_asm_insn ("bn.nd 2f", operands);
+	      output_asm_insn (shift_one, operands);
+	      output_asm_insn ("b.nd 1b", operands);
+	      fprintf (asm_out_file, "2:\t%s end shift loop\n",
+		       ASM_COMMENT_START);
+	    }
+	}
+    }
+
+  return "";
+}
+
+/* Nested function support.  */
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+void
+arc_initialize_trampoline (rtx tramp ATTRIBUTE_UNUSED,
+                           rtx fnaddr ATTRIBUTE_UNUSED,
+                           rtx cxt ATTRIBUTE_UNUSED)
+{
+}
+
+/* Set the cpu type and print out other fancy things,
+   at the top of the file.  */
+
+static void
+arc_file_start (void)
+{
+  default_file_start ();
+  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+void
+arc_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case '#' :
+      /* Conditional branches.  For now these are equivalent.  */
+    case '*' :
+      /* Unconditional branches.  Output the appropriate delay slot suffix.  */
+      if (!final_sequence || XVECLEN (final_sequence, 0) == 1)
+	{
+	  /* There's nothing in the delay slot.  */
+	  fputs (".nd", file);
+	}
+      else
+	{
+	  rtx jump = XVECEXP (final_sequence, 0, 0);
+	  rtx delay = XVECEXP (final_sequence, 0, 1);
+	  if (INSN_ANNULLED_BRANCH_P (jump))
+	    fputs (INSN_FROM_TARGET_P (delay) ? ".jd" : ".nd", file);
+	  else
+	    fputs (".d", file);
+	}
+      return;
+    case '?' : /* with leading "." */
+    case '!' : /* without leading "." */
+      /* This insn can be conditionally executed.  See if the ccfsm machinery
+	 says it should be conditionalized.  */
+      if (arc_ccfsm_state == 3 || arc_ccfsm_state == 4)
+	{
+	  /* Is this insn in a delay slot?  */
+	  if (final_sequence && XVECLEN (final_sequence, 0) == 2)
+	    {
+	      rtx insn = XVECEXP (final_sequence, 0, 1);
+
+	      /* If the insn is annulled and is from the target path, we need
+		 to inverse the condition test.  */
+	      if (INSN_ANNULLED_BRANCH_P (insn))
+		{
+		  if (INSN_FROM_TARGET_P (insn))
+		    fprintf (file, "%s%s",
+			     code == '?' ? "." : "",
+			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current_cc)]);
+		  else
+		    fprintf (file, "%s%s",
+			     code == '?' ? "." : "",
+			     arc_condition_codes[arc_ccfsm_current_cc]);
+		}
+	      else
+	        {
+		  /* This insn is executed for either path, so don't
+		     conditionalize it at all.  */
+		  ; /* nothing to do */
+		}
+	    }
+	  else
+	    {
+	      /* This insn isn't in a delay slot.  */
+	      fprintf (file, "%s%s",
+		       code == '?' ? "." : "",
+		       arc_condition_codes[arc_ccfsm_current_cc]);
+	    }
+	}
+      return;
+    case '~' :
+      /* Output a nop if we're between a set of the condition codes,
+	 and a conditional branch.  */
+      if (last_insn_set_cc_p)
+	fputs ("nop\n\t", file);
+      return;
+    case 'd' :
+      fputs (arc_condition_codes[get_arc_condition_code (x)], file);
+      return;
+    case 'D' :
+      fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE
+				 (get_arc_condition_code (x))],
+	     file);
+      return;
+    case 'R' :
+      /* Write second word of DImode or DFmode reference,
+	 register or memory.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x)+1], file);
+      else if (GET_CODE (x) == MEM)
+	{
+	  fputc ('[', file);
+	  /* Handle possible auto-increment.  Since it is pre-increment and
+	     we have already done it, we can just use an offset of four.  */
+	  /* ??? This is taken from rs6000.c I think.  I don't think it is
+	     currently necessary, but keep it around.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 4));
+	  else
+	    output_address (plus_constant (XEXP (x, 0), 4));
+	  fputc (']', file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%R code");
+      return;
+    case 'S' :
+      if ((GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x))
+	  || GET_CODE (x) == LABEL_REF)
+	{
+	  fprintf (file, "%%st(");
+	  output_addr_const (file, x);
+	  fprintf (file, ")");
+	  return;
+	}
+      break;
+    case 'H' :
+    case 'L' :
+      if (GET_CODE (x) == REG)
+	{
+	  /* L = least significant word, H = most significant word */
+	  if ((TARGET_BIG_ENDIAN != 0) ^ (code == 'L'))
+	    fputs (reg_names[REGNO (x)], file);
+	  else
+	    fputs (reg_names[REGNO (x)+1], file);
+	}
+      else if (GET_CODE (x) == CONST_INT
+	       || GET_CODE (x) == CONST_DOUBLE)
+	{
+	  rtx first, second;
+
+	  split_double (x, &first, &second);
+	  fprintf (file, "0x%08lx",
+		   (long)(code == 'L' ? INTVAL (first) : INTVAL (second)));
+	}
+      else
+	output_operand_lossage ("invalid operand to %%H/%%L code");
+      return;
+    case 'A' :
+      {
+	char str[30];
+
+	gcc_assert (GET_CODE (x) == CONST_DOUBLE
+		    && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
+
+	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
+	fprintf (file, "%s", str);
+	return;
+      }
+    case 'U' :
+      /* Output a load/store with update indicator if appropriate.  */
+      if (GET_CODE (x) == MEM)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    fputs (".a", file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%U code");
+      return;
+    case 'V' :
+      /* Output cache bypass indicator for a load/store insn.  Volatile memory
+	 refs are defined to use the cache bypass mechanism.  */
+      if (GET_CODE (x) == MEM)
+	{
+	  if (MEM_VOLATILE_P (x))
+	    fputs (".di", file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%V code");
+      return;
+    case 0 :
+      /* Do nothing special.  */
+      break;
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG :
+      fputs (reg_names[REGNO (x)], file);
+      break;
+    case MEM :
+      fputc ('[', file);
+      if (GET_CODE (XEXP (x, 0)) == PRE_INC)
+	output_address (plus_constant (XEXP (XEXP (x, 0), 0),
+				       GET_MODE_SIZE (GET_MODE (x))));
+      else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	output_address (plus_constant (XEXP (XEXP (x, 0), 0),
+				       - GET_MODE_SIZE (GET_MODE (x))));
+      else
+	output_address (XEXP (x, 0));
+      fputc (']', file);
+      break;
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+      /* Fall through.  Let output_addr_const deal with it.  */
+    default :
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+void
+arc_print_operand_address (FILE *file, rtx addr)
+{
+  register rtx base, index = 0;
+  int offset = 0;
+
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      fputs (reg_names[REGNO (addr)], file);
+      break;
+    case SYMBOL_REF :
+      if (/*???*/ 0 && SYMBOL_REF_FUNCTION_P (addr))
+	{
+	  fprintf (file, "%%st(");
+	  output_addr_const (file, addr);
+	  fprintf (file, ")");
+	}
+      else
+	output_addr_const (file, addr);
+      break;
+    case PLUS :
+      if (GET_CODE (XEXP (addr, 0)) == CONST_INT)
+	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
+      else if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+      gcc_assert (GET_CODE (base) == REG);
+      fputs (reg_names[REGNO (base)], file);
+      if (index == 0)
+	{
+	  if (offset != 0)
+	    fprintf (file, ",%d", offset);
+	}
+      else
+	{
+	  switch (GET_CODE (index))
+	    {
+	    case REG:
+	      fprintf (file, ",%s", reg_names[REGNO (index)]);
+	      break;
+	    case SYMBOL_REF:
+	      fputc (',', file), output_addr_const (file, index);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      break;
+    case PRE_INC :
+    case PRE_DEC :
+      /* We shouldn't get here as we've lost the mode of the memory object
+	 (which says how much to inc/dec by.  */
+      gcc_unreachable ();
+      break;
+    default :
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Update compare/branch separation marker.  */
+
+static void
+record_cc_ref (rtx insn)
+{
+  last_insn_set_cc_p = current_insn_set_cc_p;
+
+  switch (get_attr_cond (insn))
+    {
+    case COND_SET :
+    case COND_SET_ZN :
+    case COND_SET_ZNC :
+      if (get_attr_length (insn) == 1)
+	current_insn_set_cc_p = 1;
+      else
+	current_insn_set_cc_p = 0;
+      break;
+    default :
+      current_insn_set_cc_p = 0;
+      break;
+    }
+}
+
+/* Conditional execution support.
+
+   This is based on the ARM port but for now is much simpler.
+
+   A finite state machine takes care of noticing whether or not instructions
+   can be conditionally executed, and thus decrease execution time and code
+   size by deleting branch instructions.  The fsm is controlled by
+   final_prescan_insn, and controls the actions of PRINT_OPERAND.  The patterns
+   in the .md file for the branch insns also have a hand in this.  */
+
+/* The state of the fsm controlling condition codes are:
+   0: normal, do nothing special
+   1: don't output this insn
+   2: don't output this insn
+   3: make insns conditional
+   4: make insns conditional
+
+   State transitions (state->state by whom, under what condition):
+   0 -> 1 final_prescan_insn, if insn is conditional branch
+   0 -> 2 final_prescan_insn, if the `target' is an unconditional branch
+   1 -> 3 branch patterns, after having not output the conditional branch
+   2 -> 4 branch patterns, after having not output the conditional branch
+   3 -> 0 (*targetm.asm_out.internal_label), if the `target' label is reached
+          (the target label has CODE_LABEL_NUMBER equal to
+	  arc_ccfsm_target_label).
+   4 -> 0 final_prescan_insn, if `target' unconditional branch is reached
+
+   If the jump clobbers the conditions then we use states 2 and 4.
+
+   A similar thing can be done with conditional return insns.
+
+   We also handle separating branches from sets of the condition code.
+   This is done here because knowledge of the ccfsm state is required,
+   we may not be outputting the branch.  */
+
+void
+arc_final_prescan_insn (rtx insn,
+                        rtx *opvec ATTRIBUTE_UNUSED,
+                        int noperands ATTRIBUTE_UNUSED)
+{
+  /* BODY will hold the body of INSN.  */
+  register rtx body = PATTERN (insn);
+
+  /* This will be 1 if trying to repeat the trick (i.e.: do the `else' part of
+     an if/then/else), and things need to be reversed.  */
+  int reverse = 0;
+
+  /* If we start with a return insn, we only succeed if we find another one.  */
+  int seeking_return = 0;
+  
+  /* START_INSN will hold the insn from where we start looking.  This is the
+     first insn after the following code_label if REVERSE is true.  */
+  rtx start_insn = insn;
+
+  /* Update compare/branch separation marker.  */
+  record_cc_ref (insn);
+
+  /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
+     We can't do this in macro FINAL_PRESCAN_INSN because its called from
+     final_scan_insn which has `optimize' as a local.  */
+  if (optimize < 2 || TARGET_NO_COND_EXEC)
+    return;
+
+  /* If in state 4, check if the target branch is reached, in order to
+     change back to state 0.  */
+  if (arc_ccfsm_state == 4)
+    {
+      if (insn == arc_ccfsm_target_insn)
+	{
+	  arc_ccfsm_target_insn = NULL;
+	  arc_ccfsm_state = 0;
+	}
+      return;
+    }
+
+  /* If in state 3, it is possible to repeat the trick, if this insn is an
+     unconditional branch to a label, and immediately following this branch
+     is the previous target label which is only used once, and the label this
+     branch jumps to is not too far off.  Or in other words "we've done the
+     `then' part, see if we can do the `else' part."  */
+  if (arc_ccfsm_state == 3)
+    {
+      if (simplejump_p (insn))
+	{
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == BARRIER)
+	    {
+	      /* ??? Isn't this always a barrier?  */
+	      start_insn = next_nonnote_insn (start_insn);
+	    }
+	  if (GET_CODE (start_insn) == CODE_LABEL
+	      && CODE_LABEL_NUMBER (start_insn) == arc_ccfsm_target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    reverse = TRUE;
+	  else
+	    return;
+	}
+      else if (GET_CODE (body) == RETURN)
+        {
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == BARRIER)
+	    start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == CODE_LABEL
+	      && CODE_LABEL_NUMBER (start_insn) == arc_ccfsm_target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    {
+	      reverse = TRUE;
+	      seeking_return = 1;
+	    }
+	  else
+	    return;
+        }
+      else
+	return;
+    }
+
+  if (GET_CODE (insn) != JUMP_INSN)
+    return;
+
+  /* This jump might be paralleled with a clobber of the condition codes,
+     the jump should always come first.  */
+  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
+    body = XVECEXP (body, 0, 0);
+
+  if (reverse
+      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
+	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
+    {
+      int insns_skipped = 0, fail = FALSE, succeed = FALSE;
+      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
+      int then_not_else = TRUE;
+      /* Nonzero if next insn must be the target label.  */
+      int next_must_be_target_label_p;
+      rtx this_insn = start_insn, label = 0;
+
+      /* Register the insn jumped to.  */
+      if (reverse)
+        {
+	  if (!seeking_return)
+	    label = XEXP (SET_SRC (body), 0);
+        }
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
+	label = XEXP (XEXP (SET_SRC (body), 1), 0);
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
+	{
+	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
+	  then_not_else = FALSE;
+	}
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
+	seeking_return = 1;
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
+        {
+	  seeking_return = 1;
+	  then_not_else = FALSE;
+        }
+      else
+	gcc_unreachable ();
+
+      /* See how many insns this branch skips, and what kind of insns.  If all
+	 insns are okay, and the label or unconditional branch to the same
+	 label is not too far away, succeed.  */
+      for (insns_skipped = 0, next_must_be_target_label_p = FALSE;
+	   !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED;
+	   insns_skipped++)
+	{
+	  rtx scanbody;
+
+	  this_insn = next_nonnote_insn (this_insn);
+	  if (!this_insn)
+	    break;
+
+	  if (next_must_be_target_label_p)
+	    {
+	      if (GET_CODE (this_insn) == BARRIER)
+		continue;
+	      if (GET_CODE (this_insn) == CODE_LABEL
+		  && this_insn == label)
+		{
+		  arc_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+	    }
+
+	  scanbody = PATTERN (this_insn);
+
+	  switch (GET_CODE (this_insn))
+	    {
+	    case CODE_LABEL:
+	      /* Succeed if it is the target label, otherwise fail since
+		 control falls in from somewhere else.  */
+	      if (this_insn == label)
+		{
+		  arc_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case BARRIER:
+	      /* Succeed if the following insn is the target label.
+		 Otherwise fail.  
+		 If return insns are used then the last insn in a function 
+		 will be a barrier.  */
+	      next_must_be_target_label_p = TRUE;
+	      break;
+
+	    case CALL_INSN:
+	      /* Can handle a call insn if there are no insns after it.
+		 IE: The next "insn" is the target label.  We don't have to
+		 worry about delay slots as such insns are SEQUENCE's inside
+		 INSN's.  ??? It is possible to handle such insns though.  */
+	      if (get_attr_cond (this_insn) == COND_CANUSE)
+		next_must_be_target_label_p = TRUE;
+	      else
+		fail = TRUE;
+	      break;
+
+	    case JUMP_INSN:
+      	      /* If this is an unconditional branch to the same label, succeed.
+		 If it is to another label, do nothing.  If it is conditional,
+		 fail.  */
+	      /* ??? Probably, the test for the SET and the PC are unnecessary.  */
+
+	      if (GET_CODE (scanbody) == SET
+		  && GET_CODE (SET_DEST (scanbody)) == PC)
+		{
+		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
+		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
+		    {
+		      arc_ccfsm_state = 2;
+		      succeed = TRUE;
+		    }
+		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
+		    fail = TRUE;
+		}
+	      else if (GET_CODE (scanbody) == RETURN
+		       && seeking_return)
+	        {
+		  arc_ccfsm_state = 2;
+		  succeed = TRUE;
+	        }
+	      else if (GET_CODE (scanbody) == PARALLEL)
+	        {
+		  if (get_attr_cond (this_insn) != COND_CANUSE)
+		    fail = TRUE;
+		}
+	      break;
+
+	    case INSN:
+	      /* We can only do this with insns that can use the condition
+		 codes (and don't set them).  */
+	      if (GET_CODE (scanbody) == SET
+		  || GET_CODE (scanbody) == PARALLEL)
+		{
+		  if (get_attr_cond (this_insn) != COND_CANUSE)
+		    fail = TRUE;
+		}
+	      /* We can't handle other insns like sequences.  */
+	      else
+		fail = TRUE;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+
+      if (succeed)
+	{
+	  if ((!seeking_return) && (arc_ccfsm_state == 1 || reverse))
+	    arc_ccfsm_target_label = CODE_LABEL_NUMBER (label);
+	  else
+	    {
+	      gcc_assert (seeking_return || arc_ccfsm_state == 2);
+	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
+	        {
+		  this_insn = next_nonnote_insn (this_insn);
+		  gcc_assert (!this_insn
+			      || (GET_CODE (this_insn) != BARRIER
+				  && GET_CODE (this_insn) != CODE_LABEL));
+	        }
+	      if (!this_insn)
+	        {
+		  /* Oh dear! we ran off the end, give up.  */
+		  extract_insn_cached (insn);
+		  arc_ccfsm_state = 0;
+		  arc_ccfsm_target_insn = NULL;
+		  return;
+	        }
+	      arc_ccfsm_target_insn = this_insn;
+	    }
+
+	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
+	     what it was.  */
+	  if (!reverse)
+	    arc_ccfsm_current_cc = get_arc_condition_code (XEXP (SET_SRC (body),
+								 0));
+
+	  if (reverse || then_not_else)
+	    arc_ccfsm_current_cc = ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current_cc);
+	}
+
+      /* Restore recog_data.  Getting the attributes of other insns can
+	 destroy this array, but final.c assumes that it remains intact
+	 across this call.  */
+      extract_insn_cached (insn);
+    }
+}
+
+/* Record that we are currently outputting label NUM with prefix PREFIX.
+   It it's the label we're looking for, reset the ccfsm machinery.
+
+   Called from (*targetm.asm_out.internal_label).  */
+
+void
+arc_ccfsm_at_label (const char *prefix, int num)
+{
+  if (arc_ccfsm_state == 3 && arc_ccfsm_target_label == num
+      && !strcmp (prefix, "L"))
+    {
+      arc_ccfsm_state = 0;
+      arc_ccfsm_target_insn = NULL_RTX;
+    }
+}
+
+/* See if the current insn, which is a conditional branch, is to be
+   deleted.  */
+
+int
+arc_ccfsm_branch_deleted_p (void)
+{
+  if (arc_ccfsm_state == 1 || arc_ccfsm_state == 2)
+    return 1;
+  return 0;
+}
+
+/* Record a branch isn't output because subsequent insns can be
+   conditionalized.  */
+
+void
+arc_ccfsm_record_branch_deleted (void)
+{
+  /* Indicate we're conditionalizing insns now.  */
+  arc_ccfsm_state += 2;
+
+  /* If the next insn is a subroutine call, we still need a nop between the
+     cc setter and user.  We need to undo the effect of calling record_cc_ref
+     for the just deleted branch.  */
+  current_insn_set_cc_p = last_insn_set_cc_p;
+}
+
+static void
+arc_va_start (tree valist, rtx nextarg)
+{
+  /* See arc_setup_incoming_varargs for reasons for this oddity.  */
+  if (crtl->args.info < 8
+      && (crtl->args.info & 1))
+    nextarg = plus_constant (nextarg, UNITS_PER_WORD);
+
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* This is how to output a definition of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.  */
+
+static void
+arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
+{
+  arc_ccfsm_at_label (prefix, labelno);
+  default_internal_label (stream, prefix, labelno);
+}
+
+/* Worker function for TARGET_ASM_EXTERNAL_LIBCALL.  */
+
+static void
+arc_external_libcall (rtx fun ATTRIBUTE_UNUSED)
+{
+#if 0
+/* On the ARC we want to have libgcc's for multiple cpus in one binary.
+   We can't use `assemble_name' here as that will call ASM_OUTPUT_LABELREF
+   and we'll get another suffix added on if -mmangle-cpu.  */
+  if (TARGET_MANGLE_CPU_LIBGCC)
+    {
+      fprintf (FILE, "\t.rename\t_%s, _%s%s\n",
+	       XSTR (SYMREF, 0), XSTR (SYMREF, 0),
+	       arc_mangle_suffix);
+    }
+#endif
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (AGGREGATE_TYPE_P (type))
+    return true;
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      return (size == -1 || size > 8);
+    }
+}
+
+/* For ARC, All aggregates and arguments greater than 8 bytes are
+   passed by reference.  */
+
+static bool
+arc_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
+		       enum machine_mode mode, const_tree type,
+		       bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+      size = int_size_in_bytes (type);
+    }
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return size > 8;
+}
+
+/* Round SIZE up to a word boundary.  */
+#define ROUND_ADVANCE(SIZE) \
+(((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round arg MODE/TYPE up to the next word boundary.  */
+#define ROUND_ADVANCE_ARG(MODE, TYPE) \
+((MODE) == BLKmode				\
+ ? ROUND_ADVANCE (int_size_in_bytes (TYPE))	\
+ : ROUND_ADVANCE (GET_MODE_SIZE (MODE)))
+
+/* Round CUM up to the necessary point for argument MODE/TYPE.  */
+#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \
+((((MODE) == BLKmode ? TYPE_ALIGN (TYPE) : GET_MODE_BITSIZE (MODE)) \
+  > BITS_PER_WORD)	\
+ ? (((CUM) + 1) & ~1)	\
+ : (CUM))
+
+/* Return boolean indicating arg of type TYPE and mode MODE will be passed in
+   a reg.  This includes arguments that have to be passed by reference as the
+   pointer to them is passed in a reg if one is available (and that is what
+   we're given).  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+((CUM) < MAX_ARC_PARM_REGS						\
+ && ((ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE))				\
+      + ROUND_ADVANCE_ARG ((MODE), (TYPE))				\
+      <= MAX_ARC_PARM_REGS)))
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+/* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+arc_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return (PASS_IN_REG_P (*cum, mode, type)
+	  ? gen_rtx_REG (mode, ROUND_ADVANCE_CUM (*cum, mode, type))
+	  : NULL_RTX);
+}
+
+/* Worker function for TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+arc_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum = (ROUND_ADVANCE_CUM (*cum, mode, type)
+	  + ROUND_ADVANCE_ARG (mode, type));
+}
+
+/* Worker function for TARGET_FUNCTION_ARG_BOUNDARY.  */
+
+static unsigned int
+arc_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return (type != NULL_TREE
+	  ? TYPE_ALIGN (type)
+	  : (GET_MODE_BITSIZE (mode) <= PARM_BOUNDARY
+	     ? PARM_BOUNDARY
+	     : 2 * PARM_BOUNDARY));
+}
+
+/* Trampolines.  */
+/* ??? This doesn't work yet because GCC will use as the address of a nested
+   function the address of the trampoline.  We need to use that address
+   right shifted by 2.  It looks like we'll need PSImode after all. :-( 
+
+   ??? The above comment sounds like it's doable via
+   TARGET_TRAMPOLINE_ADJUST_ADDRESS; no PSImode needed.
+
+   On the ARC, the trampoline is quite simple as we have 32-bit immediate
+   constants.
+
+	mov r24,STATIC
+	j.nd FUNCTION
+*/
+
+static void
+arc_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  mem = adjust_address (m_tramp, SImode, 0);
+  emit_move_insn (mem, GEN_INT (0x631f7c00));
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, GEN_INT (0x381f0000));
+
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, fnaddr);
+
+  emit_insn (gen_flush_icache (m_tramp));
+}
+
+/* Worker function for TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+arc_conditional_register_usage (void)
+{
+  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
new file mode 100644
index 000000000..0f2b99c19
--- /dev/null
+++ b/gcc/config/arc/arc.h
@@ -0,0 +1,935 @@
+/* Definitions of target machine for GNU compiler, Argonaut ARC cpu.
+   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005,
+   2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* ??? This is an old port, and is undoubtedly suffering from bit rot.  */
+
+/* Things to do:
+
+   - incscc, decscc?
+   - print active compiler options in assembler output
+*/
+
+
+#undef ASM_SPEC
+#undef LINK_SPEC
+#undef LIB_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
+#undef ASM_OUTPUT_LABELREF
+
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION fprintf (stderr, " (arc)")
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__arc__");		\
+	if (TARGET_BIG_ENDIAN)			\
+	  builtin_define ("__big_endian__");	\
+	if (arc_cpu_type == 0)			\
+	  builtin_define ("__base__");		\
+	builtin_assert ("cpu=arc");		\
+	builtin_assert ("machine=arc");		\
+    } while (0)
+
+/* Pass -mmangle-cpu if we get -mcpu=*.
+   Doing it this way lets one have it on as default with -mcpu=*,
+   but also lets one turn it off with -mno-mangle-cpu.  */
+#define CC1_SPEC "\
+%{mcpu=*:-mmangle-cpu} \
+%{EB:%{EL:%emay not use both -EB and -EL}} \
+%{EB:-mbig-endian} %{EL:-mlittle-endian} \
+"
+
+#define ASM_SPEC "%{EB} %{EL}"
+
+#define LINK_SPEC "%{v} %{EB} %{EL}"
+
+#define LIB_SPEC "-lc"
+
+#define STARTFILE_SPEC "%{!shared:crt0.o%s} crtinit.o%s"
+
+#define ENDFILE_SPEC "crtfini.o%s"
+
+/* Instruction set characteristics.
+   These are internal macros, set by the appropriate -mcpu= option.  */
+
+/* Nonzero means the cpu has a barrel shifter.  */
+#define TARGET_SHIFTER 0
+
+/* Which cpu we're compiling for.  */
+extern int arc_cpu_type;
+
+/* Check if CPU is an extension and set `arc_cpu_type' and `arc_mangle_cpu'
+   appropriately.  The result should be nonzero if the cpu is recognized,
+   otherwise zero.  This is intended to be redefined in a cover file.
+   This is used by arc_handle_option.  */
+#define ARC_EXTENSION_CPU(cpu) 0
+
+
+/* Target machine storage layout.  */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases, 
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \
+if (GET_MODE_CLASS (MODE) == MODE_INT		\
+    && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)	\
+{						\
+  (MODE) = SImode;				\
+}
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 64
+
+/* ALIGN FRAMES on word boundaries */
+#define ARC_STACK_ALIGN(LOC) (((LOC)+7) & ~7)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+/* This is bigger than currently necessary for the ARC.  If 8 byte floats are
+   ever added it's not clear whether they'll need such alignment or not.  For
+   now we assume they will.  We can always relax it if necessary but the
+   reverse isn't true.  */
+#define BIGGEST_ALIGNMENT 64
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+/* On the ARC the lower address bits are masked to 0 as necessary.  The chip
+   won't croak when given an unaligned address, but the insn will still fail
+   to produce the correct result.  */
+#define STRICT_ALIGNMENT 1
+
+/* Layout of source language data types.  */
+
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+/* Registers 61, 62, and 63 are not really registers and we needn't treat
+   them as such.  We still need a register for the condition code.  */
+#define FIRST_PSEUDO_REGISTER 62
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   0-28  - general purpose registers
+   29    - ilink1 (interrupt link register)
+   30    - ilink2 (interrupt link register)
+   31    - blink (branch link register)
+   32-59 - reserved for extensions
+   60    - LP_COUNT
+   61    - condition code
+
+   For doc purposes:
+   61    - short immediate data indicator (setting flags)
+   62    - long immediate data indicator
+   63    - short immediate data indicator (not setting flags).
+
+   The general purpose registers are further broken down into:
+   0-7   - arguments/results
+   8-15  - call used
+   16-23 - call saved
+   24    - call used, static chain pointer
+   25    - call used, gptmp
+   26    - global pointer
+   27    - frame pointer
+   28    - stack pointer
+
+   By default, the extension registers are not available.  */
+
+#define FIXED_REGISTERS \
+{ 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 1, 1, 1, 1, 0,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS \
+{ 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1 }
+
+/* If defined, an initializer for a vector of integers, containing the
+   numbers of hard registers in the order in which GCC should
+   prefer to use them (from most preferred to least).  */
+#define REG_ALLOC_ORDER \
+{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1,			\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31,			\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,		\
+  27, 28, 29, 30 }
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+extern const unsigned int arc_hard_regno_mode_ok[];
+extern unsigned int arc_mode_class[];
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+((arc_hard_regno_mode_ok[REGNO] & arc_mode_class[MODE]) != 0)
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero.  */
+
+/* Tie QI/HI/SI modes together.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+(GET_MODE_CLASS (MODE1) == MODE_INT		\
+ && GET_MODE_CLASS (MODE2) == MODE_INT		\
+ && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+ && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+/* Register classes and constants.  */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It is important that any condition codes have class NO_REGS.
+   See `register_operand'.  */
+
+enum reg_class {
+  NO_REGS, LPCOUNT_REG, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "LPCOUNT_REG", "GENERAL_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS \
+{ {0, 0}, {0, 0x10000000}, {0xffffffff, 0xfffffff}, \
+  {0xffffffff, 0x1fffffff} }
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) \
+(arc_regno_reg_class[REGNO])
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Get reg_class from a letter such as appears in the machine description.  */
+#define REG_CLASS_FROM_LETTER(C) \
+((C) == 'l' ? LPCOUNT_REG /* ??? needed? */ \
+ : NO_REGS)
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32)
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The letters I, J, K, L, M, N, O, P in a register constraint string
+   can be used to stand for particular ranges of immediate operands.
+   This macro defines what the ranges are.
+   C is the letter, and VALUE is a constant value.
+   Return 1 if VALUE is in the range specified by C.  */
+/* 'I' is used for short immediates (always signed).
+   'J' is used for long immediates.
+   'K' is used for any constant up to 64 bits (for 64x32 situations?).  */
+
+/* local to this file */
+#define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200)
+/* local to this file */
+#define LARGE_INT(X) \
+((X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \
+ && (unsigned HOST_WIDE_INT)(X) <= (unsigned HOST_WIDE_INT) 0xffffffff)
+
+#define CONST_OK_FOR_LETTER_P(VALUE, C) \
+((C) == 'I' ? SMALL_INT (VALUE)		\
+ : (C) == 'J' ? LARGE_INT (VALUE)	\
+ : (C) == 'K' ? 1			\
+ : 0)
+
+/* Similar, but for floating constants, and defining letters G and H.
+   Here VALUE is the CONST_DOUBLE rtx itself.  */
+/* 'G' is used for integer values for the multiplication insns where the
+   operands are extended from 4 bytes to 8 bytes.
+   'H' is used when any 64-bit constant is allowed.  */
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
+((C) == 'G' ? arc_double_limm_p (VALUE) \
+ : (C) == 'H' ? 1 \
+ : 0)
+
+/* A C expression that defines the optional machine-dependent constraint
+   letters that can be used to segregate specific types of operands,
+   usually memory references, for the target machine.  It should return 1 if
+   VALUE corresponds to the operand type represented by the constraint letter
+   C.  If C is not defined as an extra constraint, the value returned should
+   be 0 regardless of VALUE.  */
+/* ??? This currently isn't used.  Waiting for PIC.  */
+#if 0
+#define EXTRA_CONSTRAINT(VALUE, C) \
+((C) == 'R' ? (SYMBOL_REF_FUNCTION_P (VALUE) || GET_CODE (VALUE) == LABEL_REF) \
+ : 0)
+#endif
+
+/* Stack layout and stack pointer usage.  */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET FIRST_PARM_OFFSET (0)
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* 4 bytes for each of previous fp, return address, and previous gp.
+   4 byte reserved area for future considerations.  */
+#define FIRST_PARM_OFFSET(FNDECL) 16
+
+/* A C expression whose value is RTL representing the address in a
+   stack frame where the pointer to the caller's frame is stored.
+   Assume that FRAMEADDR is an RTL expression for the address of the
+   stack frame itself.
+
+   If you don't define this macro, the default is to return the value
+   of FRAMEADDR--that is, the stack frame address is also the address
+   of the stack word that points to the previous frame.  */
+/* ??? unfinished */
+/*define DYNAMIC_CHAIN_ADDRESS (FRAMEADDR)*/
+
+/* A C expression whose value is RTL representing the value of the
+   return address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is the frame pointer of the COUNT frame, or the frame
+   pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME'
+   is defined.  */
+/* The current return address is in r31.  The return address of anything
+   farther back is at [%fp,4].  */
+#if 0 /* The default value should work.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+(((COUNT) == -1)							\
+ ? gen_rtx_REG (Pmode, 31)						\
+ : copy_to_reg (gen_rtx_MEM (Pmode,					\
+			     memory_address (Pmode,			\
+					     plus_constant ((FRAME),	\
+							    UNITS_PER_WORD)))))
+#endif
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 28
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 27
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM
+
+/* Register in which static-chain is passed to a function.  This must
+   not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM 24
+
+/* C statement to store the difference between the frame pointer
+   and the stack pointer values immediately after the function prologue.  */
+#define INITIAL_FRAME_POINTER_OFFSET(VAR) \
+((VAR) = arc_compute_frame_size (get_frame_size ()))
+
+/* Function argument passing.  */
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+((CUM) = 0)
+
+/* The number of registers used for parameter passing.  Local to this file.  */
+#define MAX_ARC_PARM_REGS 8
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) \
+((unsigned) (N) < MAX_ARC_PARM_REGS)
+
+
+/* Function results.  */
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC) gen_rtx_REG (TYPE_MODE (VALTYPE), 0)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, 0)
+
+/* 1 if N is a possible register number for a function value
+   as seen by the caller.  */
+/* ??? What about r1 in DI/DF values.  */
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0)
+
+/* Tell GCC to use TARGET_RETURN_IN_MEMORY.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 0
+
+/* Epilogue delay slots.  */
+#define DELAY_SLOTS_FOR_EPILOGUE arc_delay_slots_for_epilogue ()
+
+#define ELIGIBLE_FOR_EPILOGUE_DELAY(TRIAL, SLOTS_FILLED) \
+arc_eligible_for_epilogue_delay (TRIAL, SLOTS_FILLED)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+#define TRAMPOLINE_ALIGNMENT 32
+#define TRAMPOLINE_SIZE 16
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+/* The `ld' insn allows 2, but the `st' insn only allows 1.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* We have pre inc/dec (load/store with update).  */
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) \
+(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
+ || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   We can handle any 32- or 64-bit constant.  */
+/* "1" should work since the largest constant should be a 64 bit critter.  */
+/* ??? Not sure what to do for 64x32 compiler.  */
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) \
+((unsigned) REGNO (X) - 32 >= FIRST_PSEUDO_REGISTER - 32)
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+((unsigned) REGNO (X) - 32 >= FIRST_PSEUDO_REGISTER - 32)
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.  */
+/* The `ld' insn allows [reg],[reg+shimm],[reg+limm],[reg+reg],[limm]
+   but the `st' insn only allows [reg],[reg+shimm],[limm].
+   The only thing we can do is only allow the most strict case `st' and hope
+   other parts optimize out the restrictions for `ld'.  */
+
+/* local to this file */
+#define RTX_OK_FOR_BASE_P(X) \
+(REG_P (X) && REG_OK_FOR_BASE_P (X))
+
+/* local to this file */
+#define RTX_OK_FOR_INDEX_P(X) \
+(0 && /*???*/ REG_P (X) && REG_OK_FOR_INDEX_P (X))
+
+/* local to this file */
+/* ??? Loads can handle any constant, stores can only handle small ones.  */
+#define RTX_OK_FOR_OFFSET_P(X) \
+(GET_CODE (X) == CONST_INT && SMALL_INT (INTVAL (X)))
+
+#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X) \
+(GET_CODE (X) == PLUS				\
+ && RTX_OK_FOR_BASE_P (XEXP (X, 0))		\
+ && (RTX_OK_FOR_INDEX_P (XEXP (X, 1))		\
+     || RTX_OK_FOR_OFFSET_P (XEXP (X, 1))))
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)		\
+{ if (RTX_OK_FOR_BASE_P (X))				\
+    goto ADDR;						\
+  if (LEGITIMATE_OFFSET_ADDRESS_P ((MODE), (X)))	\
+    goto ADDR;						\
+  if (GET_CODE (X) == CONST_INT && LARGE_INT (INTVAL (X))) \
+    goto ADDR;						\
+  if (GET_CODE (X) == SYMBOL_REF			\
+	   || GET_CODE (X) == LABEL_REF			\
+	   || GET_CODE (X) == CONST)			\
+    goto ADDR;						\
+  if ((GET_CODE (X) == PRE_DEC || GET_CODE (X) == PRE_INC) \
+      /* We're restricted here by the `st' insn.  */	\
+      && RTX_OK_FOR_BASE_P (XEXP ((X), 0)))		\
+    goto ADDR;						\
+}
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+#define SELECT_CC_MODE(OP, X, Y) \
+arc_select_cc_mode (OP, X, Y)
+
+/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/
+
+/* Costs.  */
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) 2
+
+/* Compute the cost of moving data between registers and memory.  */
+/* Memory is 3 times as expensive as registers.
+   ??? Is that the right way to look at it?  */
+#define MEMORY_MOVE_COST(MODE,CLASS,IN) \
+(GET_MODE_SIZE (MODE) <= UNITS_PER_WORD ? 6 : 12)
+
+/* The cost of a branch insn.  */
+/* ??? What's the right value here?  Branches are certainly more
+   expensive than reg->reg moves.  */
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+/* On the ARC, calling through registers is slow.  */
+#define NO_FUNCTION_CSE
+
+/* Section selection.  */
+/* WARNING: These section names also appear in dwarfout.c.  */
+
+/* The names of the text, data, and readonly-data sections are runtime
+   selectable.  */
+
+#define ARC_SECTION_FORMAT		"\t.section %s"
+#define ARC_DEFAULT_TEXT_SECTION	".text"
+#define ARC_DEFAULT_DATA_SECTION	".data"
+#define ARC_DEFAULT_RODATA_SECTION	".rodata"
+
+extern const char *arc_text_section, *arc_data_section, *arc_rodata_section;
+
+/* initfini.c uses this in an asm.  */
+#if defined (CRT_INIT) || defined (CRT_FINI)
+#define TEXT_SECTION_ASM_OP	"\t.section .text"
+#else
+#define TEXT_SECTION_ASM_OP	arc_text_section
+#endif
+#define DATA_SECTION_ASM_OP	arc_data_section
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	arc_rodata_section
+
+#define BSS_SECTION_ASM_OP	"\t.section .bss"
+
+/* Define this macro if jump tables (for tablejump insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.
+   This macro is irrelevant if there is no separate readonly data section.  */
+/*#define JUMP_TABLES_IN_TEXT_SECTION*/
+
+/* For DWARF.  Marginally different than default so output is "prettier"
+   (and consistent with above).  */
+#define PUSHSECTION_ASM_OP "\t.section "
+
+/* Tell crtstuff.c we're using ELF.  */
+#define OBJECT_FORMAT_ELF
+
+/* PIC */
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  In some cases this register is defined by a
+   processor's ``application binary interface'' (ABI).  When this macro
+   is defined, RTL is generated for this register once, as with the stack
+   pointer and frame pointer registers.  If this macro is not defined, it
+   is up to the machine-dependent files to allocate such a register (if
+   necessary).  */
+#define PIC_OFFSET_TABLE_REGNUM  (flag_pic ? 26 : INVALID_REGNUM)
+
+/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is
+   clobbered by calls.  Do not define this macro if PIC_OFFSET_TABLE_REGNUM
+   is not defined.  */
+/* This register is call-saved on the ARC.  */
+/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent code.
+   You can assume that X satisfies CONSTANT_P, so you need not
+   check this.  You can also assume `flag_pic' is true, so you need not
+   check it either.  You need not define this macro if all constants
+   (including SYMBOL_REF) can be immediate operands when generating
+   position independent code.  */
+/*#define LEGITIMATE_PIC_OPERAND_P(X)*/
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will
+   end at the end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF ""
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+/* We mangle all user labels to provide protection from linking code
+   compiled for different cpus.  */
+/* We work around a dwarfout.c deficiency by watching for labels from it and
+   not adding the '_' prefix nor the cpu suffix.  There is a comment in
+   dwarfout.c that says it should be using (*targetm.asm_out.internal_label).  */
+extern const char *arc_mangle_cpu;
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+do {							\
+  if ((NAME)[0] == '.' && (NAME)[1] == 'L')		\
+    fprintf (FILE, "%s", NAME);				\
+  else							\
+    {							\
+      fputc ('_', FILE);				\
+      if (TARGET_MANGLE_CPU && arc_mangle_cpu != NULL)	\
+	fprintf (FILE, "%s_", arc_mangle_cpu);		\
+      fprintf (FILE, "%s", NAME);			\
+    }							\
+} while (0)
+
+/* Assembler pseudo-op to equate one value with another.  */
+/* ??? This is needed because dwarfout.c provides a default definition too
+   late for defaults.h (which contains the default definition of ASM_OUTPUT_DEF
+   that we use).  */
+#define SET_ASM_OP "\t.set\t"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES \
+{"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",		\
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",		\
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",	\
+ "r24", "r25", "r26", "fp", "sp", "ilink1", "ilink2", "blink",	\
+ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",	\
+ "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",	\
+ "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",	\
+ "r56", "r57", "r58", "r59", "lp_count", "cc"}
+
+/* Entry to the insn conditionalizer.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+arc_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+/* A C expression which evaluates to true if CODE is a valid
+   punctuation character for use in the `PRINT_OPERAND' macro.  */
+extern char arc_punct_chars[256];
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+arc_punct_chars[(unsigned char) (CHAR)]
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+#define PRINT_OPERAND(FILE, X, CODE) \
+arc_print_operand (FILE, X, CODE)
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+arc_print_operand_address (FILE, ADDR)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+do {							\
+  char label[30];					\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+  fprintf (FILE, "\t.word %%st(");			\
+  assemble_name (FILE, label);				\
+  fprintf (FILE, ")\n");				\
+} while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+do {							\
+  char label[30];					\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+  fprintf (FILE, "\t.word %%st(");			\
+  assemble_name (FILE, label);				\
+  fprintf (FILE, "-");					\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);	\
+  assemble_name (FILE, label);				\
+  fprintf (FILE, ")\n");				\
+} while (0)
+
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+/* On the ARC, align loops to 32 byte boundaries (cache line size)
+   if -malign-loops.  */
+#define LOOP_ALIGN(LABEL) (TARGET_ALIGN_LOOPS ? 5 : 0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0)
+
+/* Debugging information.  */
+
+/* Generate DBX and DWARF debugging information.  */
+#define DBX_DEBUGGING_INFO 1
+
+/* Prefer STABS (for now).  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+/* Turn off splitting of long stabs.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Miscellaneous.  */
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+/* ??? The arc doesn't have full 32-bit pointers, but making this PSImode has
+   its own problems (you have to add extendpsisi2 and trucnsipsi2 but how does
+   one do it without getting excess code?).  Try to avoid it.  */
+#define Pmode SImode
+
+/* A function address in a call instruction.  */
+#define FUNCTION_MODE SImode
+
+/* alloca should avoid clobbering the old register save area.  */
+/* ??? Not defined in tm.texi.  */
+#define SETJMP_VIA_SAVE_AREA
+
+/* ARC function types.  */
+enum arc_function_type {
+  ARC_FUNCTION_UNKNOWN, ARC_FUNCTION_NORMAL,
+  /* These are interrupt handlers.  The name corresponds to the register
+     name that contains the return address.  */
+  ARC_FUNCTION_ILINK1, ARC_FUNCTION_ILINK2
+};
+#define ARC_INTERRUPT_P(TYPE) \
+((TYPE) == ARC_FUNCTION_ILINK1 || (TYPE) == ARC_FUNCTION_ILINK2)
+/* Compute the type of a function from its DECL.  */
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
new file mode 100644
index 000000000..09e47daf1
--- /dev/null
+++ b/gcc/config/arc/arc.md
@@ -0,0 +1,1376 @@
+;; Machine description of the Argonaut ARC cpu for GNU C compiler
+;; Copyright (C) 1994, 1997, 1998, 1999, 2000, 2004, 2005, 2007, 2008
+;; Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; ??? This is an old port, and is undoubtedly suffering from bit rot.
+
+;; Insn type.  Used to default other attribute values.
+
+(define_attr "type"
+  "move,load,store,cmove,unary,binary,compare,shift,mul,uncond_branch,branch,call,call_no_delay_slot,multi,misc"
+  (const_string "binary"))
+
+;; Length (in # of insns, long immediate constants counted too).
+;; ??? There's a nasty interaction between the conditional execution fsm
+;; and insn lengths: insns with shimm values cannot be conditionally executed.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "load")
+	 (if_then_else (match_operand 1 "long_immediate_loadstore_operand" "")
+		       (const_int 2) (const_int 1))
+
+	 (eq_attr "type" "store")
+	 (if_then_else (match_operand 0 "long_immediate_loadstore_operand" "")
+		       (const_int 2) (const_int 1))
+
+	 (eq_attr "type" "move,unary,compare")
+	 (if_then_else (match_operand 1 "long_immediate_operand" "")
+		       (const_int 2) (const_int 1))
+
+	 (eq_attr "type" "binary,mul")
+	 (if_then_else (match_operand 2 "long_immediate_operand" "")
+		       (const_int 2) (const_int 1))
+
+	 (eq_attr "type" "cmove")
+	 (if_then_else (match_operand 2 "register_operand" "")
+		       (const_int 1) (const_int 2))
+
+	 (eq_attr "type" "multi") (const_int 2)
+	]
+
+	(const_int 1)))
+
+;; The length here is the length of a single asm.  Unfortunately it might be
+;; 1 or 2 so we must allow for 2.  That's ok though.  How often will users
+;; lament asm's not being put in delay slots?
+(define_asm_attributes
+  [(set_attr "length" "2")
+   (set_attr "type" "multi")])
+
+;; Condition codes: this one is used by final_prescan_insn to speed up
+;; conditionalizing instructions.  It saves having to scan the rtl to see if
+;; it uses or alters the condition codes.
+
+;; USE: This insn uses the condition codes (e.g.: a conditional branch).
+;; CANUSE: This insn can use the condition codes (for conditional execution).
+;; SET: All condition codes are set by this insn.
+;; SET_ZN: the Z and N flags are set by this insn.
+;; SET_ZNC: the Z, N, and C flags are set by this insn.
+;; CLOB: The condition codes are set to unknown values by this insn.
+;; NOCOND: This insn can't use and doesn't affect the condition codes.
+
+(define_attr "cond" "use,canuse,set,set_zn,set_znc,clob,nocond"
+  (cond [(and (eq_attr "type" "unary,binary,move")
+	      (eq_attr "length" "1"))
+	 (const_string "canuse")
+
+	 (eq_attr "type" "compare")
+	 (const_string "set")
+
+	 (eq_attr "type" "cmove,branch")
+	 (const_string "use")
+
+	 (eq_attr "type" "multi,misc")
+	 (const_string "clob")
+	 ]
+
+	 (const_string "nocond")))
+
+;; Delay slots.
+
+(define_attr "in_delay_slot" "false,true"
+  (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi")
+	 (const_string "false")
+	 ]
+
+	 (if_then_else (eq_attr "length" "1")
+		       (const_string "true")
+		       (const_string "false"))))
+
+(define_delay (eq_attr "type" "call")
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "in_delay_slot" "true")
+   (eq_attr "in_delay_slot" "true")])
+
+(define_delay (eq_attr "type" "branch,uncond_branch")
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "in_delay_slot" "true")
+   (eq_attr "in_delay_slot" "true")])
+   
+;; Scheduling description for the ARC
+
+(define_cpu_unit "branch")
+
+(define_insn_reservation "any_insn" 1 (eq_attr "type" "!load,compare,branch")
+			 "nothing")
+
+;; 1) A conditional jump cannot immediately follow the insn setting the flags.
+;; This isn't a complete solution as it doesn't come with guarantees.  That
+;; is done in the branch patterns and in arc_print_operand.  This exists to
+;; avoid inserting a nop when we can.
+
+(define_insn_reservation "compare" 1 (eq_attr "type" "compare")
+		         "nothing,branch")
+
+(define_insn_reservation "branch" 1 (eq_attr "type" "branch")
+		         "branch")
+
+;; 2) References to loaded registers should wait a cycle.
+
+;; Memory with load-delay of 1 (i.e., 2 cycle load).
+
+(define_insn_reservation "memory" 2 (eq_attr "type" "load")
+			 "nothing")
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (QImode, operands[1]);
+}")
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:QI 1 "move_src_operand" "rI,Ji,m,r"))]
+;; ??? Needed?
+  "register_operand (operands[0], QImode)
+   || register_operand (operands[1], QImode)"
+  "@
+   mov%? %0,%1
+   mov%? %0,%1
+   ldb%U1%V1 %0,%1
+   stb%U0%V0 %1,%0"
+  [(set_attr "type" "move,move,load,store")])
+
+;; ??? This may never match since there's no cmpqi insn.
+
+(define_insn "*movqi_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (sign_extend:SI (match_operand:QI 1 "move_src_operand" "rIJi"))
+		       (const_int 0)))
+   (set (match_operand:QI 0 "move_dest_operand" "=r")
+	(match_dup 1))]
+  ""
+  "mov%?.f %0,%1"
+  [(set_attr "type" "move")
+   (set_attr "cond" "set_zn")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (HImode, operands[1]);
+}")
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:HI 1 "move_src_operand" "rI,Ji,m,r"))]
+  "register_operand (operands[0], HImode)
+   || register_operand (operands[1], HImode)"
+  "@
+   mov%? %0,%1
+   mov%? %0,%1
+   ldw%U1%V1 %0,%1
+   stw%U0%V0 %1,%0"
+  [(set_attr "type" "move,move,load,store")])
+
+;; ??? Will this ever match?
+
+(define_insn "*movhi_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (sign_extend:SI (match_operand:HI 1 "move_src_operand" "rIJi"))
+		       (const_int 0)))
+   (set (match_operand:HI 0 "move_dest_operand" "=r")
+	(match_dup 1))]
+;; ??? Needed?
+  "register_operand (operands[0], HImode)
+   || register_operand (operands[1], HImode)"
+  "mov%?.f %0,%1"
+  [(set_attr "type" "move")
+   (set_attr "cond" "set_zn")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (SImode, operands[1]);
+}")
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:SI 1 "move_src_operand" "rI,GJi,m,r"))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+  "@
+   mov%? %0,%1
+   mov%? %0,%S1
+   ld%U1%V1 %0,%1
+   st%U0%V0 %1,%0"
+  [(set_attr "type" "move,move,load,store")])
+
+(define_insn "*movsi_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (match_operand:SI 1 "move_src_operand" "rIJi")
+		       (const_int 0)))
+   (set (match_operand:SI 0 "move_dest_operand" "=r")
+	(match_dup 1))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+  "mov%?.f %0,%S1"
+  [(set_attr "type" "move")
+   (set_attr "cond" "set_zn")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DImode, operands[1]);
+}")
+
+(define_insn "*movdi_insn"
+  [(set (match_operand:DI 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:DI 1 "move_double_src_operand" "r,HK,m,r"))]
+  "register_operand (operands[0], DImode)
+   || register_operand (operands[1], DImode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0 :
+      /* We normally copy the low-numbered register first.  However, if
+	 the first register operand 0 is the same as the second register of
+	 operand 1, we must copy in the opposite order.  */
+      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+	return \"mov %R0,%R1\;mov %0,%1\";
+      else
+	return \"mov %0,%1\;mov %R0,%R1\";
+    case 1 :
+      return \"mov %0,%L1\;mov %R0,%H1\";
+    case 2 :
+      /* If the low-address word is used in the address, we must load it
+	 last.  Otherwise, load it first.  Note that we cannot have
+	 auto-increment in that case since the address register is known to be
+	 dead.  */
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands [1], 0))
+	  return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
+      else
+	  return \"ld%V1 %0,%1\;ld%V1 %R0,%R1\";
+    case 3 :
+      return \"st%V0 %1,%0\;st%V0 %R1,%R0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "move,move,load,store")
+   ;; ??? The ld/st values could be 4 if it's [reg,bignum].
+   (set_attr "length" "2,4,2,2")])
+
+;(define_expand "movdi"
+;  [(set (match_operand:DI 0 "general_operand" "")
+;	(match_operand:DI 1 "general_operand" ""))]
+;  ""
+;  "
+;{
+;  /* Flow doesn't understand that this is effectively a DFmode move.
+;     It doesn't know that all of `operands[0]' is set.  */
+;  emit_clobber (operands[0]);
+;
+;  /* Emit insns that movsi_insn can handle.  */
+;  emit_insn (gen_movsi (operand_subword (operands[0], 0, 0, DImode),
+;			operand_subword (operands[1], 0, 0, DImode)));
+;  emit_insn (gen_movsi (operand_subword (operands[0], 1, 0, DImode),
+;			operand_subword (operands[1], 1, 0, DImode)));
+;  DONE;
+;}")
+
+;; Floating point move insns.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (SFmode, operands[1]);
+}")
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:SF 1 "move_src_operand" "r,E,m,r"))]
+  "register_operand (operands[0], SFmode)
+   || register_operand (operands[1], SFmode)"
+  "@
+   mov%? %0,%1
+   mov%? %0,%1 ; %A1
+   ld%U1%V1 %0,%1
+   st%U0%V0 %1,%0"
+  [(set_attr "type" "move,move,load,store")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DFmode, operands[1]);
+}")
+
+(define_insn "*movdf_insn"
+  [(set (match_operand:DF 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:DF 1 "move_double_src_operand" "r,E,m,r"))]
+  "register_operand (operands[0], DFmode)
+   || register_operand (operands[1], DFmode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0 :
+      /* We normally copy the low-numbered register first.  However, if
+	 the first register operand 0 is the same as the second register of
+	 operand 1, we must copy in the opposite order.  */
+      if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+	return \"mov %R0,%R1\;mov %0,%1\";
+      else
+	return \"mov %0,%1\;mov %R0,%R1\";
+    case 1 :
+      return \"mov %0,%L1\;mov %R0,%H1 ; %A1\";
+    case 2 :
+      /* If the low-address word is used in the address, we must load it
+	 last.  Otherwise, load it first.  Note that we cannot have
+	 auto-increment in that case since the address register is known to be
+	 dead.  */
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands [1], 0))
+	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
+      else
+	return \"ld%V1 %0,%1\;ld%V1 %R0,%R1\";
+    case 3 :
+      return \"st%V0 %1,%0\;st%V0 %R1,%R0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "move,move,load,store")
+   ;; ??? The ld/st values could be 4 if it's [reg,bignum].
+   (set_attr "length" "2,4,2,2")])
+
+;(define_expand "movdf"
+;  [(set (match_operand:DF 0 "general_operand" "")
+;	(match_operand:DF 1 "general_operand" ""))]
+;  ""
+;  "
+;{
+;  /* Flow doesn't understand that this is effectively a DFmode move.
+;     It doesn't know that all of `operands[0]' is set.  */
+;  emit_clobber (operands[0]);
+;
+;  /* Emit insns that movsi_insn can handle.  */
+;  emit_insn (gen_movsi (operand_subword (operands[0], 0, 0, DFmode),
+;			operand_subword (operands[1], 0, 0, DFmode)));
+;  emit_insn (gen_movsi (operand_subword (operands[0], 1, 0, DFmode),
+;			operand_subword (operands[1], 1, 0, DFmode)));
+;  DONE;
+;}")
+
+;; Load/Store with update instructions.
+;;
+;; Some of these we can get by using pre-decrement or pre-increment, but the
+;; hardware can also do cases where the increment is not the size of the
+;; object.
+;;
+;; In all these cases, we use operands 0 and 1 for the register being
+;; incremented because those are the operands that local-alloc will
+;; tie and these are the pair most likely to be tieable (and the ones
+;; that will benefit the most).
+;;
+;; We use match_operator here because we need to know whether the memory
+;; object is volatile or not.
+
+(define_insn "*loadqi_update"
+  [(set (match_operand:QI 3 "register_operand" "=r,r")
+	(match_operator:QI 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldb.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*load_zeroextendqisi_update"
+  [(set (match_operand:SI 3 "register_operand" "=r,r")
+	(zero_extend:SI (match_operator:QI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldb.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*load_signextendqisi_update"
+  [(set (match_operand:SI 3 "register_operand" "=r,r")
+	(sign_extend:SI (match_operator:QI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldb.x.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*storeqi_update"
+  [(set (match_operator:QI 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:QI 3 "register_operand" "r"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "stb.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "1")])
+
+(define_insn "*loadhi_update"
+  [(set (match_operand:HI 3 "register_operand" "=r,r")
+	(match_operator:HI 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldw.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*load_zeroextendhisi_update"
+  [(set (match_operand:SI 3 "register_operand" "=r,r")
+	(zero_extend:SI (match_operator:HI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldw.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*load_signextendhisi_update"
+  [(set (match_operand:SI 3 "register_operand" "=r,r")
+	(sign_extend:SI (match_operator:HI 4 "load_update_operand"
+			 [(match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ldw.x.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*storehi_update"
+  [(set (match_operator:HI 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:HI 3 "register_operand" "r"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "stw.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "1")])
+
+(define_insn "*loadsi_update"
+  [(set (match_operand:SI 3 "register_operand" "=r,r")
+	(match_operator:SI 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ld.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*storesi_update"
+  [(set (match_operator:SI 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:SI 3 "register_operand" "r"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "st.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "1")])
+
+(define_insn "*loadsf_update"
+  [(set (match_operand:SF 3 "register_operand" "=r,r")
+	(match_operator:SF 4 "load_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
+   (set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "ld.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "load,load")
+   (set_attr "length" "1,2")])
+
+(define_insn "*storesf_update"
+  [(set (match_operator:SF 4 "store_update_operand"
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand:SI 2 "short_immediate_operand" "I")])
+	(match_operand:SF 3 "register_operand" "r"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "st.a%V4 %3,[%0,%2]"
+  [(set_attr "type" "store")
+   (set_attr "length" "1")])
+
+;; Conditional move instructions.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "nonmemory_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx cc_reg = gen_compare_reg (code, XEXP (operands[1], 0),
+				XEXP (operands[1], 1));
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
+}")
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+			 (match_operand:SF 2 "nonmemory_operand" "")
+			 (match_operand:SF 3 "register_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx cc_reg = gen_compare_reg (code, XEXP (operands[1], 0),
+				XEXP (operands[1], 1));
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
+}")
+
+(define_insn "*movsicc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "nonmemory_operand" "rJi")
+			 (match_operand:SI 3 "register_operand" "0")))]
+  ""
+  "mov.%d1 %0,%S2"
+  [(set_attr "type" "cmove")])
+
+(define_insn "*movsfcc_insn"
+  [(set (match_operand:SF 0 "register_operand" "=r,r")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+			 (match_operand:SF 2 "nonmemory_operand" "r,E")
+			 (match_operand:SF 3 "register_operand" "0,0")))]
+  ""
+  "@
+   mov.%d1 %0,%2
+   mov.%d1 %0,%2 ; %A2"
+  [(set_attr "type" "cmove,cmove")])
+
+
+;; Zero extension instructions.
+;; ??? We don't support volatile memrefs here, but I'm not sure why.
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+  ""
+  "@
+   extb%? %0,%1
+   ldb%U1 %0,%1"
+  [(set_attr "type" "unary,load")])
+
+(define_insn "*zero_extendqihi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (match_dup 1)))]
+  ""
+  "extb%?.f %0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+  ""
+  "@
+   extb%? %0,%1
+   ldb%U1 %0,%1"
+  [(set_attr "type" "unary,load")])
+
+(define_insn "*zero_extendqisi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "extb%?.f %0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))]
+  ""
+  "@
+   extw%? %0,%1
+   ldw%U1 %0,%1"
+  [(set_attr "type" "unary,load")])
+
+(define_insn "*zero_extendhisi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "extw%?.f %0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+;; Sign extension instructions.
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+  ""
+  "@
+   sexb%? %0,%1
+   ldb.x%U1 %0,%1"
+  [(set_attr "type" "unary,load")])
+
+(define_insn "*extendqihi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (sign_extend:SI (match_operand:QI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_dup 1)))]
+  ""
+  "sexb%?.f %0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+  ""
+  "@
+   sexb%? %0,%1
+   ldb.x%U1 %0,%1"
+  [(set_attr "type" "unary,load")])
+
+(define_insn "*extendqisi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (sign_extend:SI (match_operand:QI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_dup 1)))]
+  ""
+  "sexb%?.f %0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))]
+  ""
+  "@
+   sexw%? %0,%1
+   ldw.x%U1 %0,%1"
+  [(set_attr "type" "unary,load")])
+
+(define_insn "*extendhisi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_dup 1)))]
+  ""
+  "sexw%?.f %0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+;; Arithmetic instructions.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+  ""
+  "add%? %0,%1,%2")
+
+(define_insn "*addsi3_set_cc_insn"
+  [(set (reg:CC 61) (compare:CC
+		     (plus:SI (match_operand:SI 1 "register_operand" "%r")
+			      (match_operand:SI 2 "nonmemory_operand" "rIJ"))
+		     (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  ""
+  "add%?.f %0,%1,%2"
+  [(set_attr "cond" "set")])
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "nonmemory_operand" "%r")
+		 (match_operand:DI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:CC 61))]
+  ""
+  "*
+{
+  rtx op2 = operands[2];
+
+  if (GET_CODE (op2) == CONST_INT)
+    {
+      int sign = INTVAL (op2);
+      if (sign < 0)
+	return \"add.f %L0,%L1,%2\;adc %H0,%H1,-1\";
+      else
+	return \"add.f %L0,%L1,%2\;adc %H0,%H1,0\";
+    }
+  else
+    return \"add.f %L0,%L1,%L2\;adc %H0,%H1,%H2\";
+}"
+  [(set_attr "length" "2")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+  ""
+  "sub%? %0,%1,%2")
+
+(define_insn "*subsi3_set_cc_insn"
+  [(set (reg:CC 61) (compare:CC
+		     (minus:SI (match_operand:SI 1 "register_operand" "%r")
+			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
+		     (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 1)
+		  (match_dup 2)))]
+  ""
+  "sub%?.f %0,%1,%2"
+  [(set_attr "cond" "set")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "nonmemory_operand" "r")
+		  (match_operand:DI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:CC 61))]
+  ""
+  "*
+{
+  rtx op2 = operands[2];
+
+  if (GET_CODE (op2) == CONST_INT)
+    {
+      int sign = INTVAL (op2);
+      if (sign < 0)
+	return \"sub.f %L0,%L1,%2\;sbc %H0,%H1,-1\";
+      else
+	return \"sub.f %L0,%L1,%2\;sbc %H0,%H1,0\";
+    }
+  else
+    return \"sub.f %L0,%L1,%L2\;sbc %H0,%H1,%H2\";
+}"
+  [(set_attr "length" "2")])
+
+;; Boolean instructions.
+;;
+;; We don't define the DImode versions as expand_binop does a good enough job.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+  ""
+  "and%? %0,%1,%2")
+
+(define_insn "*andsi3_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (and:SI (match_operand:SI 1 "register_operand" "%r")
+			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_dup 1)
+		(match_dup 2)))]
+  ""
+  "and%?.f %0,%1,%2"
+  [(set_attr "cond" "set_zn")])
+
+(define_insn "*bicsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
+		(not:SI (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r"))))]
+  ""
+  "bic%? %0,%1,%2"
+  [(set_attr "length" "1,2,1,2")])
+
+(define_insn "*bicsi3_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (and:SI (match_operand:SI 1 "register_operand" "%r")
+			       (not:SI (match_operand:SI 2 "nonmemory_operand" "rIJ")))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_dup 1)
+		(not:SI (match_dup 2))))]
+  ""
+  "bic%?.f %0,%1,%2"
+  [(set_attr "cond" "set_zn")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+  ""
+  "or%? %0,%1,%2")
+
+(define_insn "*iorsi3_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (ior:SI (match_operand:SI 1 "register_operand" "%r")
+			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_dup 1)
+		(match_dup 2)))]
+  ""
+  "or%?.f %0,%1,%2"
+  [(set_attr "cond" "set_zn")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+  ""
+  "xor%? %0,%1,%2")
+
+(define_insn "*xorsi3_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (xor:SI (match_operand:SI 1 "register_operand" "%r")
+			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_dup 1)
+		(match_dup 2)))]
+  ""
+  "xor%?.f %0,%1,%2"
+  [(set_attr "cond" "set_zn")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub%? %0,0,%1"
+  [(set_attr "type" "unary")])
+
+(define_insn "*negsi2_set_cc_insn"
+  [(set (reg:CC 61) (compare:CC
+		     (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		     (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_dup 1)))]
+  ""
+  "sub%?.f %0,0,%1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (clobber (reg:SI 61))]
+  ""
+  "sub.f %L0,0,%L1\;sbc %H0,0,%H1"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "xor%? %0,%1,-1"
+  [(set_attr "type" "unary")])
+
+(define_insn "*one_cmplsi2_set_cc_insn"
+  [(set (reg:CCZN 61) (compare:CCZN
+		       (not:SI (match_operand:SI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_dup 1)))]
+  ""
+  "xor%?.f %0,%1,-1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "set_zn")])
+
+;; Shift instructions.
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (! TARGET_SHIFTER)
+    {
+      emit_insn (gen_rtx_PARALLEL
+		 (VOIDmode,
+		  gen_rtvec (2,
+			     gen_rtx_SET (VOIDmode, operands[0],
+					  gen_rtx_ASHIFT (SImode, operands[1],
+							  operands[2])),
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_SCRATCH (SImode)))));
+      DONE;
+    }
+}")
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (! TARGET_SHIFTER)
+    {
+      emit_insn (gen_rtx_PARALLEL
+		 (VOIDmode,
+		  gen_rtvec (2,
+			     gen_rtx_SET (VOIDmode, operands[0],
+					  gen_rtx_ASHIFTRT (SImode,
+							    operands[1],
+							    operands[2])),
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_SCRATCH (SImode)))));
+      DONE;
+    }
+}")
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (! TARGET_SHIFTER)
+    {
+      emit_insn (gen_rtx_PARALLEL
+		 (VOIDmode,
+		  gen_rtvec (2,
+			     gen_rtx_SET (VOIDmode, operands[0],
+					  gen_rtx_LSHIFTRT (SImode,
+							    operands[1],
+							    operands[2])),
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_SCRATCH (SImode)))));
+      DONE;
+    }
+}")
+
+(define_insn "*ashlsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
+		   (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))]
+  "TARGET_SHIFTER"
+  "asl%? %0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1,2,1,2")])
+
+(define_insn "*ashrsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
+		     (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))]
+  "TARGET_SHIFTER"
+  "asr%? %0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1,2,1,2")])
+
+(define_insn "*lshrsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
+		     (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))]
+  "TARGET_SHIFTER"
+  "lsr%? %0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "length" "1,2,1,2")])
+
+(define_insn "*shift_si3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 3 "shift_operator"
+			   [(match_operand:SI 1 "register_operand" "0")
+			    (match_operand:SI 2 "nonmemory_operand" "rIJ")]))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "! TARGET_SHIFTER"
+  "* return output_shift (operands);"
+  [(set_attr "type" "shift")
+   (set_attr "length" "8")])
+
+;; Compare instructions.
+;; This controls RTL generation and register allocation.
+
+;; ??? We may be able to relax this a bit by adding a new constant 'K' for 0.
+;; This assumes sub.f 0,symbol,0 is a valid insn.
+;; Note that "sub.f 0,r0,1" is an 8 byte insn.  To avoid unnecessarily
+;; creating 8 byte insns we duplicate %1 in the destination reg of the insn
+;; if it's a small constant.
+
+(define_insn "*cmpsi_cc_insn"
+  [(set (reg:CC 61)
+	(compare:CC (match_operand:SI 0 "register_operand" "r,r,r")
+		    (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+   sub.f 0,%0,%1
+   sub.f %1,%0,%1
+   sub.f 0,%0,%1"
+  [(set_attr "type" "compare,compare,compare")])
+
+(define_insn "*cmpsi_cczn_insn"
+  [(set (reg:CCZN 61)
+	(compare:CCZN (match_operand:SI 0 "register_operand" "r,r,r")
+		      (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+   sub.f 0,%0,%1
+   sub.f %1,%0,%1
+   sub.f 0,%0,%1"
+  [(set_attr "type" "compare,compare,compare")])
+
+(define_insn "*cmpsi_ccznc_insn"
+  [(set (reg:CCZNC 61)
+	(compare:CCZNC (match_operand:SI 0 "register_operand" "r,r,r")
+		       (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+   sub.f 0,%0,%1
+   sub.f %1,%0,%1
+   sub.f 0,%0,%1"
+  [(set_attr "type" "compare,compare,compare")])
+
+;; Next come the scc insn and its expander.
+
+(define_expand "cstoresi4"
+  [(set (match_dup 4)
+        (match_op_dup 5
+         [(match_operand:SI 2 "register_operand" "")
+          (match_operand:SI 3 "nonmemory_operand" "")]))
+   (set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_dup 4)
+	  (const_int 0)]))]
+  ""
+  "
+{
+  operands[4] = gen_compare_reg (GET_CODE (operands[1]),
+				 operands[2], operands[3]);
+  operands[5] = gen_rtx_fmt_ee (COMPARE,
+				GET_MODE (operands[4]),
+				operands[2], operands[3]);
+}")
+
+(define_insn "*scc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator" [(reg 61) (const_int 0)]))]
+  ""
+  "mov %0,1\;sub.%D1 %0,%0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+;; ??? Look up negscc insn.  See pa.md for example.
+(define_insn "*neg_scc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operator:SI 1 "comparison_operator"
+		 [(reg 61) (const_int 0)])))]
+  ""
+  "mov %0,-1\;sub.%D1 %0,%0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+(define_insn "*not_scc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operator:SI 1 "comparison_operator"
+		 [(reg 61) (const_int 0)])))]
+  ""
+  "mov %0,1\;sub.%d1 %0,%0,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "2")])
+
+;; These control RTL generation for conditional jump insns
+
+(define_expand "cbranchsi4"
+  [(set (match_dup 4)
+        (match_op_dup 5
+	 [(match_operand:SI 1 "register_operand" "")
+          (match_operand:SI 2 "nonmemory_operand" "")]))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator"
+	       [(match_dup 4)
+		(const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  ""
+  "
+{
+  operands[4] = gen_compare_reg (GET_CODE (operands[0]),
+				 operands[1], operands[2]);
+  operands[5] = gen_rtx_fmt_ee (COMPARE,
+				GET_MODE (operands[4]),
+				operands[1], operands[2]);
+}")
+
+;; Now match both normal and inverted jump.
+
+(define_insn "*branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "proper_comparison_operator"
+				      [(reg 61) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (arc_ccfsm_branch_deleted_p ())
+    {
+      arc_ccfsm_record_branch_deleted ();
+      return \"; branch deleted, next insns conditionalized\";
+    }
+  else
+    return \"%~b%d1%# %l0\";
+}"
+  [(set_attr "type" "branch")])
+
+(define_insn "*rev_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "proper_comparison_operator"
+				      [(reg 61) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))"
+  "*
+{
+  if (arc_ccfsm_branch_deleted_p ())
+    {
+      arc_ccfsm_record_branch_deleted ();
+      return \"; branch deleted, next insns conditionalized\";
+    }
+  else
+    return \"%~b%D1%# %l0\";
+}"
+  [(set_attr "type" "branch")])
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "b%* %l0"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  ""
+  "j%* %a0"
+  [(set_attr "type" "uncond_branch")])
+ 
+;; Implement a switch statement.
+;; This wouldn't be necessary in the non-pic case if we could distinguish
+;; label refs of the jump table from other label refs.  The problem is that
+;; label refs are output as "%st(.LL42)" but we don't want the %st - we want
+;; the real address since it's the address of the table.
+
+(define_expand "casesi"
+  [(set (match_dup 5)
+	(minus:SI (match_operand:SI 0 "register_operand" "")
+		  (match_operand:SI 1 "nonmemory_operand" "")))
+   (set (reg:CC 61)
+	(compare:CC (match_dup 5)
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (pc)
+	(if_then_else (gtu (reg:CC 61)
+			   (const_int 0))
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (parallel
+    [(set (pc)
+	  (mem:SI (plus:SI (mult:SI (match_dup 5)
+				    (const_int 4))
+			   (label_ref (match_operand 3 "" "")))))
+     (clobber (match_scratch:SI 6 ""))
+     (clobber (match_scratch:SI 7 ""))])]
+  ""
+  "
+{
+  operands[5] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "*casesi_insn"
+  [(set (pc)
+	(mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+				  (const_int 4))
+			 (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:SI 2 "=r"))
+   (clobber (match_scratch:SI 3 "=r"))]
+  ""
+  "*
+{
+  output_asm_insn (\"mov %2,%1\", operands);
+  if (TARGET_SHIFTER)
+    output_asm_insn (\"asl %3,%0,2\", operands);
+  else
+    output_asm_insn (\"asl %3,%0\;asl %3,%3\", operands);
+  output_asm_insn (\"ld %2,[%2,%3]\", operands);
+  output_asm_insn (\"j.nd %a2\", operands);
+  return \"\";
+}"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "6")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "0 /* disabled -> using casesi now */"
+  "j%* %a0"
+  [(set_attr "type" "uncond_branch")])
+
+(define_expand "call"
+  ;; operands[1] is stack_size_rtx
+  ;; operands[2] is next_arg_register
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	     (clobber (reg:SI 31))])]
+  ""
+  "")
+
+(define_insn "*call_via_reg"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 31))]
+  ""
+  "lr blink,[status]\;j.d %0\;add blink,blink,2"
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+(define_insn "*call_via_label"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" ""))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 31))]
+  ""
+  ; The %~ is necessary in case this insn gets conditionalized and the previous
+  ; insn is the cc setter.
+  "%~bl%!%* %0"
+  [(set_attr "type" "call")
+   (set_attr "cond" "canuse")])
+
+(define_expand "call_value"
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  [(parallel [(set (match_operand 0 "register_operand" "=r")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	     (clobber (reg:SI 31))])]
+  ""
+  "")
+
+(define_insn "*call_value_via_reg"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 31))]
+  ""
+  "lr blink,[status]\;j.d %1\;add blink,blink,2"
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+(define_insn "*call_value_via_label"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "call_address_operand" ""))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 31))]
+  ""
+  ; The %~ is necessary in case this insn gets conditionalized and the previous
+  ; insn is the cc setter.
+  "%~bl%!%* %1"
+  [(set_attr "type" "call")
+   (set_attr "cond" "canuse")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "misc")])
+
+;; Special pattern to flush the icache.
+;; ??? Not sure what to do here.  Some ARC's are known to support this.
+
+(define_insn "flush_icache"
+  [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 0)]
+  ""
+  "* return \"\";"
+  [(set_attr "type" "misc")])
+
+;; Split up troublesome insns for better scheduling.
+
+;; Peepholes go at the end.
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
new file mode 100644
index 000000000..e5381f95a
--- /dev/null
+++ b/gcc/config/arc/arc.opt
@@ -0,0 +1,60 @@
+; Options for the Argonaut ARC port of the compiler
+;
+; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+EB
+Driver
+
+EL
+Driver
+
+malign-loops
+Target Undocumented Report Mask(ALIGN_LOOPS)
+
+mbig-endian
+Target Undocumented Report RejectNegative Mask(BIG_ENDIAN)
+
+mlittle-endian
+Target Undocumented Report RejectNegative InverseMask(BIG_ENDIAN)
+
+mmangle-cpu
+Target Report Mask(MANGLE_CPU)
+Prepend the name of the cpu to all public symbol names
+
+; mmangle-cpu-libgcc
+; Target Undocumented Mask(MANGLE_CPU_LIBGC)
+
+mno-cond-exec
+Target Undocumented Report RejectNegative Mask(NO_COND_EXEC)
+
+mcpu=
+Target RejectNegative Joined Var(arc_cpu_string) Init("base")
+-mcpu=CPU	Compile code for ARC variant CPU
+
+mtext=
+Target RejectNegative Joined Var(arc_text_string) Init(ARC_DEFAULT_TEXT_SECTION)
+-mtext=SECTION	Put functions in SECTION
+
+mdata=
+Target RejectNegative Joined Var(arc_data_string) Init(ARC_DEFAULT_DATA_SECTION)
+-mdata=SECTION	Put data in SECTION
+
+mrodata=
+Target RejectNegative Joined Var(arc_rodata_string) Init(ARC_DEFAULT_RODATA_SECTION)
+-mrodata=SECTION	Put read-only data in SECTION
diff --git a/gcc/config/arc/initfini.c b/gcc/config/arc/initfini.c
new file mode 100644
index 000000000..d7514133a
--- /dev/null
+++ b/gcc/config/arc/initfini.c
@@ -0,0 +1,155 @@
+/* .init/.fini section handling + C++ global constructor/destructor handling.
+   This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
+
+Copyright (C) 1995, 1997, 1998, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*  Declare a pointer to void function type.  */
+typedef void (*func_ptr) (void);
+
+#ifdef CRT_INIT
+
+/* NOTE:  In order to be able to support SVR4 shared libraries, we arrange
+   to have one set of symbols { __CTOR_LIST__, __DTOR_LIST__, __CTOR_END__,
+   __DTOR_END__ } per root executable and also one set of these symbols
+   per shared library.  So in any given whole process image, we may have
+   multiple definitions of each of these symbols.  In order to prevent
+   these definitions from conflicting with one another, and in order to
+   ensure that the proper lists are used for the initialization/finalization
+   of each individual shared library (respectively), we give these symbols
+   only internal (i.e. `static') linkage, and we also make it a point to
+   refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
+   symbol in crtinit.o, where they are defined.  */
+
+static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors")))
+     = { (func_ptr) (-1) };
+
+static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
+     = { (func_ptr) (-1) };
+
+/* Run all the global destructors on exit from the program.  */
+ 
+/* Some systems place the number of pointers in the first word of the
+   table.  On SVR4 however, that word is -1.  In all cases, the table is
+   null-terminated.  On SVR4, we start from the beginning of the list and
+   invoke each per-compilation-unit destructor routine in order
+   until we find that null.
+
+   Note that this function MUST be static.  There will be one of these
+   functions in each root executable and one in each shared library, but
+   although they all have the same code, each one is unique in that it
+   refers to one particular associated `__DTOR_LIST__' which belongs to the
+   same particular root executable or shared library file.  */
+
+static void __do_global_dtors (void)
+asm ("__do_global_dtors") __attribute__ ((section (".text")));
+
+static void
+__do_global_dtors (void)
+{
+  func_ptr *p;
+  for (p = __DTOR_LIST__ + 1; *p; p++)
+    (*p) ();
+}
+
+/* .init section start.
+   This must appear at the start of the .init section.  */
+
+asm ("\n\
+	.section .init\n\
+	.global init\n\
+	.word 0\n\
+init:\n\
+	st blink,[sp,4]\n\
+	st fp,[sp]\n\
+	mov fp,sp\n\
+	sub sp,sp,16\n\
+");
+
+/* .fini section start.
+   This must appear at the start of the .init section.  */
+
+asm ("\n\
+	.section .fini\n\
+	.global fini\n\
+	.word 0\n\
+fini:\n\
+	st blink,[sp,4]\n\
+	st fp,[sp]\n\
+	mov fp,sp\n\
+	sub sp,sp,16\n\
+	bl.nd __do_global_dtors\n\
+");
+
+#endif /* CRT_INIT */
+
+#ifdef CRT_FINI
+
+/* Put a word containing zero at the end of each of our two lists of function
+   addresses.  Note that the words defined here go into the .ctors and .dtors
+   sections of the crtend.o file, and since that file is always linked in
+   last, these words naturally end up at the very ends of the two lists
+   contained in these two sections.  */
+
+static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors")))
+     = { (func_ptr) 0 };
+
+static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors")))
+     = { (func_ptr) 0 };
+
+/* Run all global constructors for the program.
+   Note that they are run in reverse order.  */
+
+static void __do_global_ctors (void)
+asm ("__do_global_ctors") __attribute__ ((section (".text")));
+
+static void
+__do_global_ctors (void)
+{
+  func_ptr *p;
+  for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
+    (*p) ();
+}
+
+/* .init section end.
+   This must live at the end of the .init section.  */
+
+asm ("\n\
+	.section .init\n\
+	bl.nd __do_global_ctors\n\
+	ld blink,[fp,4]\n\
+	j.d blink\n\
+	ld.a fp,[sp,16]\n\
+");
+
+/* .fini section end.
+   This must live at the end of the .fini section.  */
+
+asm ("\n\
+	.section .fini\n\
+	ld blink,[fp,4]\n\
+	j.d blink\n\
+	ld.a fp,[sp,16]\n\
+");
+
+#endif /* CRT_FINI */
diff --git a/gcc/config/arc/lib1funcs.asm b/gcc/config/arc/lib1funcs.asm
new file mode 100644
index 000000000..c61f39a5c
--- /dev/null
+++ b/gcc/config/arc/lib1funcs.asm
@@ -0,0 +1,266 @@
+; libgcc routines for ARC cpu.
+
+/* Copyright (C) 1995, 1997,2004, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef  L_mulsi3
+	.section .text
+	.align 4
+
+#ifdef __base__
+	.cpu base
+	.global ___mulsi3
+___mulsi3:
+
+/* This the simple version.
+
+  while (a) 
+    {
+      if (a & 1)
+        r += b;
+      a >>= 1;
+      b <<= 1;
+    }
+*/
+	mov r2,0		; Accumulate result here.
+.Lloop:
+	sub.f 0,r0,0		; while (a)
+	nop
+	beq.nd .Ldone
+	and.f 0,r0,1		; if (a & 1)
+	add.nz r2,r2,r1		; r += b
+	lsr r0,r0		; a >>= 1
+	b.d .Lloop
+	lsl r1,r1		; b <<= 1
+.Ldone:
+	j.d blink
+	mov r0,r2
+#endif
+
+#endif /* L_mulsi3 */
+
+#ifdef  L_umulsidi3
+	.section .text
+	.align 4
+
+#ifdef __base__
+	.cpu base
+	.global ___umulsidi3
+___umulsidi3:
+
+/* This the simple version.
+
+  while (a) 
+    {
+      if (a & 1)
+        r += b;
+      a >>= 1;
+      b <<= 1;
+    }
+*/
+	mov r2,0		; Top part of b.
+	mov r3,0		; Accumulate result here.
+	mov r4,0
+.Lloop:
+	sub.f 0,r0,0		; while (a)
+	nop
+	beq.nd .Ldone
+	and.f 0,r0,1		; if (a & 1)
+	sub.f 0,r0,0
+	nop
+	beq .Ldontadd
+	add.f r4,r4,r1		; r += b
+	adc   r3,r3,r2
+.Ldontadd:
+	lsr r0,r0		; a >>= 1
+	lsl.f r1,r1		; b <<= 1
+	b.d .Lloop
+	rlc r2,r2
+.Ldone:
+#ifdef __big_endian__
+	mov r1,r4
+	j.d blink
+	mov r0,r3
+#else
+	mov r0,r4
+	j.d blink
+	mov r1,r3
+#endif
+#endif
+
+#endif /* L_umulsidi3 */
+
+#ifdef L_divmod_tools
+
+; Utilities used by all routines.
+
+	.section .text
+	.align 4
+
+; inputs: r0 = numerator, r1 = denominator
+; outputs: positive r0/r1,
+;          r6.bit1 = sign of numerator, r6.bit0 = sign of result
+
+	.global ___divnorm
+___divnorm:
+	mov r6,0		; keep sign in r6
+	sub.f 0,r0,0		; is numerator -ve?
+	sub.lt r0,0,r0		; negate numerator
+	mov.lt r6,3		; sign is -ve
+	sub.f 0,r1,0		; is denominator -ve?
+	sub.lt r1,0,r1		; negate denominator
+	xor.lt r6,r6,1		; toggle sign
+	j.nd blink
+
+/*
+unsigned long
+udivmodsi4(int modwanted, unsigned long num, unsigned long den)
+{
+  unsigned long bit = 1;
+  unsigned long res = 0;
+
+  while (den < num && bit && !(den & (1L<<31)))
+    {
+      den <<=1;
+      bit <<=1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>=1;
+      den >>=1;
+    }
+  if (modwanted) return num;
+  return res;
+}
+*/
+
+; inputs: r0 = numerator, r1 = denominator
+; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed
+
+	.global ___udivmodsi4
+___udivmodsi4:
+	mov r2,1		; bit = 1
+	mov r3,0		; res = 0
+.Lloop1:
+	sub.f 0,r1,r0		; while (den < num
+	nop
+	bnc.nd .Lloop2
+	sub.f 0,r2,0		; && bit
+	nop
+	bz.nd .Lloop2
+	lsl.f 0,r1		; && !(den & (1<<31))
+	nop
+	bc.nd .Lloop2
+	lsl r1,r1		; den <<= 1
+	b.d .Lloop1
+	lsl r2,r2		; bit <<= 1
+.Lloop2:
+	sub.f 0,r2,0		; while (bit)
+	nop
+	bz.nd .Ldivmodend
+	sub.f 0,r0,r1		; if (num >= den)
+	nop
+	bc.nd .Lshiftdown
+	sub r0,r0,r1		; num -= den
+	or r3,r3,r2		; res |= bit
+.Lshiftdown:
+	lsr r2,r2		; bit >>= 1
+	b.d .Lloop2
+	lsr r1,r1		; den >>= 1
+.Ldivmodend:
+	mov r1,r0		; r1 = mod
+	j.d blink
+	mov r0,r3		; r0 = res
+
+#endif
+
+#ifdef  L_udivsi3
+	.section .text
+	.align 4
+
+#ifdef __base__
+	.cpu base
+	.global ___udivsi3
+___udivsi3:
+	mov r7,blink
+	bl.nd ___udivmodsi4
+	j.nd r7
+#endif
+
+#endif /* L_udivsi3 */
+
+#ifdef  L_divsi3
+	.section .text
+	.align 4
+
+#ifdef __base__
+	.cpu base
+	.global ___divsi3
+___divsi3:
+	mov r7,blink
+	bl.nd ___divnorm
+	bl.nd ___udivmodsi4
+	and.f 0,r6,1
+	sub.nz r0,0,r0		; cannot go in delay slot, has limm value
+	j.nd r7
+#endif
+
+#endif /* L_divsi3 */
+
+#ifdef  L_umodsi3
+	.section .text
+	.align 4
+
+#ifdef __base__
+	.cpu base
+	.global ___umodsi3
+___umodsi3:
+	mov r7,blink
+	bl.nd ___udivmodsi4
+	j.d r7
+	mov r0,r1
+#endif
+
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+	.section .text
+	.align 4
+
+#ifdef __base__
+	.cpu base
+	.global ___modsi3
+___modsi3:
+	mov r7,blink
+	bl.nd ___divnorm
+	bl.nd ___udivmodsi4
+	and.f 0,r6,2
+	sub.nz r1,0,r1
+	j.d r7
+	mov r0,r1
+#endif
+
+#endif /* L_modsi3 */
diff --git a/gcc/config/arc/t-arc b/gcc/config/arc/t-arc
new file mode 100644
index 000000000..a923479ca
--- /dev/null
+++ b/gcc/config/arc/t-arc
@@ -0,0 +1,60 @@
+# Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003,
+# 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = arc/lib1funcs.asm
+LIB1ASMFUNCS = _mulsi3 _umulsidi3 _udivsi3 _divsi3 _umodsi3 _modsi3 _divmod_tools
+
+# We need libgcc routines to be mangled according to which cpu they
+# were compiled for.
+# ??? -mmangle-cpu passed by default for now.
+#LIBGCC2_CFLAGS = -g1 -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS) -mmangle-cpu
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifndef __big_endian__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifndef __big_endian__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# .init/.fini section routines
+
+$(T)crtinit.o: $(srcdir)/config/arc/initfini.c $(GCC_PASSES) $(CONFIG_H)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(CRTSTUFF_T_CFLAGS) \
+	 $(MULTILIB_CFLAGS) -DCRT_INIT -finhibit-size-directive -fno-inline-functions \
+	 -g0 -c $(srcdir)/config/arc/initfini.c -o $(T)crtinit.o
+
+$(T)crtfini.o: $(srcdir)/config/arc/initfini.c $(GCC_PASSES) $(CONFIG_H)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(CRTSTUFF_T_CFLAGS) \
+	 -DCRT_FINI $(MULTILIB_CFLAGS) -finhibit-size-directive -fno-inline-functions \
+	 -g0 -c $(srcdir)/config/arc/initfini.c -o $(T)crtfini.o
+
+MULTILIB_OPTIONS = EB
+MULTILIB_DIRNAMES = be
+EXTRA_MULTILIB_PARTS = crtinit.o crtfini.o
diff --git a/gcc/config/arm/README-interworking b/gcc/config/arm/README-interworking
new file mode 100644
index 000000000..7f2eda83b
--- /dev/null
+++ b/gcc/config/arm/README-interworking
@@ -0,0 +1,749 @@
+		Arm / Thumb Interworking
+		========================
+
+The Cygnus GNU Pro Toolkit for the ARM7T processor supports function
+calls between code compiled for the ARM instruction set and code
+compiled for the Thumb instruction set and vice versa.  This document
+describes how that interworking support operates and explains the
+command line switches that should be used in order to produce working
+programs.
+
+Note:  The Cygnus GNU Pro Toolkit does not support switching between
+compiling for the ARM instruction set and the Thumb instruction set
+on anything other than a per file basis.  There are in fact two
+completely separate compilers, one that produces ARM assembler
+instructions and one that produces Thumb assembler instructions.  The
+two compilers share the same assembler, linker and so on.
+
+
+1. Explicit interworking support for C and C++ files
+====================================================
+
+By default if a file is compiled without any special command line
+switches then the code produced will not support interworking.
+Provided that a program is made up entirely from object files and
+libraries produced in this way and which contain either exclusively
+ARM instructions or exclusively Thumb instructions then this will not
+matter and a working executable will be created.  If an attempt is
+made to link together mixed ARM and Thumb object files and libraries,
+then warning messages will be produced by the linker and a non-working
+executable will be created.
+
+In order to produce code which does support interworking it should be
+compiled with the
+
+	-mthumb-interwork
+
+command line option.  Provided that a program is made up entirely from
+object files and libraries built with this command line switch a
+working executable will be produced, even if both ARM and Thumb
+instructions are used by the various components of the program.  (No
+warning messages will be produced by the linker either).
+
+Note that specifying -mthumb-interwork does result in slightly larger,
+slower code being produced.  This is why interworking support must be
+specifically enabled by a switch.
+
+
+2. Explicit interworking support for assembler files
+====================================================
+
+If assembler files are to be included into an interworking program
+then the following rules must be obeyed:
+
+	* Any externally visible functions must return by using the BX
+	instruction.
+
+	* Normal function calls can just use the BL instruction.  The
+	linker will automatically insert code to switch between ARM
+	and Thumb modes as necessary.
+
+	* Calls via function pointers should use the BX instruction if
+	the call is made in ARM mode:
+
+		.code 32
+		mov lr, pc
+		bx  rX
+
+	This code sequence will not work in Thumb mode however, since
+	the mov instruction will not set the bottom bit of the lr
+	register.  Instead a branch-and-link to the _call_via_rX
+	functions should be used instead:
+
+		.code 16
+		bl  _call_via_rX
+
+	where rX is replaced by the name of the register containing
+	the function address.
+
+	* All externally visible functions which should be entered in
+	Thumb mode must have the .thumb_func pseudo op specified just
+	before their entry point.  e.g.:
+
+			.code 16
+			.global function
+			.thumb_func
+		function:
+			...start of function....
+
+	* All assembler files must be assembled with the switch
+	-mthumb-interwork specified on the command line.  (If the file
+	is assembled by calling gcc it will automatically pass on the
+	-mthumb-interwork switch to the assembler, provided that it
+	was specified on the gcc command line in the first place.) 
+
+
+3. Support for old, non-interworking aware code.
+================================================
+
+If it is necessary to link together code produced by an older,
+non-interworking aware compiler, or code produced by the new compiler
+but without the -mthumb-interwork command line switch specified, then
+there are two command line switches that can be used to support this.
+
+The switch
+
+	-mcaller-super-interworking
+
+will allow calls via function pointers in Thumb mode to work,
+regardless of whether the function pointer points to old,
+non-interworking aware code or not.  Specifying this switch does
+produce slightly slower code however.
+
+Note:  There is no switch to allow calls via function pointers in ARM
+mode to be handled specially.  Calls via function pointers from
+interworking aware ARM code to non-interworking aware ARM code work
+without any special considerations by the compiler.  Calls via
+function pointers from interworking aware ARM code to non-interworking
+aware Thumb code however will not work.  (Actually under some
+circumstances they may work, but there are no guarantees).  This is
+because only the new compiler is able to produce Thumb code, and this
+compiler already has a command line switch to produce interworking
+aware code.
+
+
+The switch
+
+	-mcallee-super-interworking
+
+will allow non-interworking aware ARM or Thumb code to call Thumb
+functions, either directly or via function pointers.  Specifying this
+switch does produce slightly larger, slower code however.
+
+Note:  There is no switch to allow non-interworking aware ARM or Thumb
+code to call ARM functions.  There is no need for any special handling
+of calls from non-interworking aware ARM code to interworking aware
+ARM functions, they just work normally.  Calls from non-interworking
+aware Thumb functions to ARM code however, will not work.  There is no
+option to support this, since it is always possible to recompile the
+Thumb code to be interworking aware.
+
+As an alternative to the command line switch
+-mcallee-super-interworking, which affects all externally visible
+functions in a file, it is possible to specify an attribute or
+declspec for individual functions, indicating that that particular
+function should support being called by non-interworking aware code.
+The function should be defined like this:
+
+	int __attribute__((interfacearm)) function 
+	{
+		... body of function ...
+	}
+
+or
+
+	int __declspec(interfacearm) function
+	{
+		... body of function ...
+	}
+
+
+
+4. Interworking support in dlltool
+==================================
+
+It is possible to create DLLs containing mixed ARM and Thumb code.  It
+is also possible to call Thumb code in a DLL from an ARM program and
+vice versa.  It is even possible to call ARM DLLs that have been compiled
+without interworking support (say by an older version of the compiler),
+from Thumb programs and still have things work properly.
+
+   A version of the `dlltool' program which supports the `--interwork'
+command line switch is needed, as well as the following special
+considerations when building programs and DLLs:
+
+*Use `-mthumb-interwork'*
+     When compiling files for a DLL or a program the `-mthumb-interwork'
+     command line switch should be specified if calling between ARM and
+     Thumb code can happen.  If a program is being compiled and the
+     mode of the DLLs that it uses is not known, then it should be
+     assumed that interworking might occur and the switch used.
+
+*Use `-m thumb'*
+     If the exported functions from a DLL are all Thumb encoded then the
+     `-m thumb' command line switch should be given to dlltool when
+     building the stubs.  This will make dlltool create Thumb encoded
+     stubs, rather than its default of ARM encoded stubs.
+
+     If the DLL consists of both exported Thumb functions and exported
+     ARM functions then the `-m thumb' switch should not be used.
+     Instead the Thumb functions in the DLL should be compiled with the
+     `-mcallee-super-interworking' switch, or with the `interfacearm'
+     attribute specified on their prototypes.  In this way they will be
+     given ARM encoded prologues, which will work with the ARM encoded
+     stubs produced by dlltool.
+
+*Use `-mcaller-super-interworking'*
+     If it is possible for Thumb functions in a DLL to call
+     non-interworking aware code via a function pointer, then the Thumb
+     code must be compiled with the `-mcaller-super-interworking'
+     command line switch.  This will force the function pointer calls
+     to use the _interwork_call_via_rX stub functions which will
+     correctly restore Thumb mode upon return from the called function.
+
+*Link with `libgcc.a'*
+     When the dll is built it may have to be linked with the GCC
+     library (`libgcc.a') in order to extract the _call_via_rX functions
+     or the _interwork_call_via_rX functions.  This represents a partial
+     redundancy since the same functions *may* be present in the
+     application itself, but since they only take up 372 bytes this
+     should not be too much of a consideration.
+
+*Use `--support-old-code'*
+     When linking a program with an old DLL which does not support
+     interworking, the `--support-old-code' command line switch to the
+     linker should be used.   This causes the linker to generate special
+     interworking stubs which can cope with old, non-interworking aware
+     ARM code, at the cost of generating bulkier code.  The linker will
+     still generate a warning message along the lines of:
+       "Warning: input file XXX does not support interworking, whereas YYY does."
+     but this can now be ignored because the --support-old-code switch
+     has been used.
+
+
+
+5. How interworking support works
+=================================
+
+Switching between the ARM and Thumb instruction sets is accomplished
+via the BX instruction which takes as an argument a register name.
+Control is transfered to the address held in this register (with the
+bottom bit masked out), and if the bottom bit is set, then Thumb
+instruction processing is enabled, otherwise ARM instruction
+processing is enabled.
+
+When the -mthumb-interwork command line switch is specified, gcc
+arranges for all functions to return to their caller by using the BX
+instruction.  Thus provided that the return address has the bottom bit
+correctly initialized to indicate the instruction set of the caller,
+correct operation will ensue.
+
+When a function is called explicitly (rather than via a function
+pointer), the compiler generates a BL instruction to do this.  The
+Thumb version of the BL instruction has the special property of
+setting the bottom bit of the LR register after it has stored the
+return address into it, so that a future BX instruction will correctly
+return the instruction after the BL instruction, in Thumb mode.
+
+The BL instruction does not change modes itself however, so if an ARM
+function is calling a Thumb function, or vice versa, it is necessary
+to generate some extra instructions to handle this.  This is done in
+the linker when it is storing the address of the referenced function
+into the BL instruction.  If the BL instruction is an ARM style BL
+instruction, but the referenced function is a Thumb function, then the
+linker automatically generates a calling stub that converts from ARM
+mode to Thumb mode, puts the address of this stub into the BL
+instruction, and puts the address of the referenced function into the
+stub.  Similarly if the BL instruction is a Thumb BL instruction, and
+the referenced function is an ARM function, the linker generates a
+stub which converts from Thumb to ARM mode, puts the address of this
+stub into the BL instruction, and the address of the referenced
+function into the stub.
+
+This is why it is necessary to mark Thumb functions with the
+.thumb_func pseudo op when creating assembler files.  This pseudo op
+allows the assembler to distinguish between ARM functions and Thumb
+functions.  (The Thumb version of GCC automatically generates these
+pseudo ops for any Thumb functions that it generates).
+
+Calls via function pointers work differently.  Whenever the address of
+a function is taken, the linker examines the type of the function
+being referenced.  If the function is a Thumb function, then it sets
+the bottom bit of the address.  Technically this makes the address
+incorrect, since it is now one byte into the start of the function,
+but this is never a problem because:
+
+	a. with interworking enabled all calls via function pointer
+	   are done using the BX instruction and this ignores the
+	   bottom bit when computing where to go to.
+
+	b. the linker will always set the bottom bit when the address
+	   of the function is taken, so it is never possible to take
+	   the address of the function in two different places and
+	   then compare them and find that they are not equal.
+
+As already mentioned any call via a function pointer will use the BX
+instruction (provided that interworking is enabled).  The only problem
+with this is computing the return address for the return from the
+called function.  For ARM code this can easily be done by the code
+sequence:
+
+	mov	lr, pc
+	bx	rX
+
+(where rX is the name of the register containing the function
+pointer).  This code does not work for the Thumb instruction set,
+since the MOV instruction will not set the bottom bit of the LR
+register, so that when the called function returns, it will return in
+ARM mode not Thumb mode.  Instead the compiler generates this
+sequence:
+
+	bl	_call_via_rX
+
+(again where rX is the name if the register containing the function
+pointer).  The special call_via_rX functions look like this:
+
+	.thumb_func
+_call_via_r0:
+	bx	r0
+	nop
+
+The BL instruction ensures that the correct return address is stored
+in the LR register and then the BX instruction jumps to the address
+stored in the function pointer, switch modes if necessary.
+
+
+6. How caller-super-interworking support works
+==============================================
+
+When the -mcaller-super-interworking command line switch is specified
+it changes the code produced by the Thumb compiler so that all calls
+via function pointers (including virtual function calls) now go via a
+different stub function.  The code to call via a function pointer now
+looks like this:
+
+	bl _interwork_call_via_r0
+
+Note: The compiler does not insist that r0 be used to hold the
+function address.  Any register will do, and there are a suite of stub
+functions, one for each possible register.  The stub functions look
+like this:
+
+	.code 16
+	.thumb_func
+_interwork_call_via_r0
+	bx 	pc
+	nop
+	
+	.code 32
+	tst	r0, #1
+	stmeqdb	r13!, {lr}
+	adreq	lr, _arm_return
+	bx	r0
+
+The stub first switches to ARM mode, since it is a lot easier to
+perform the necessary operations using ARM instructions.  It then
+tests the bottom bit of the register containing the address of the
+function to be called.  If this bottom bit is set then the function
+being called uses Thumb instructions and the BX instruction to come
+will switch back into Thumb mode before calling this function.  (Note
+that it does not matter how this called function chooses to return to
+its caller, since the both the caller and callee are Thumb functions,
+and mode switching is necessary).  If the function being called is an
+ARM mode function however, the stub pushes the return address (with
+its bottom bit set) onto the stack, replaces the return address with
+the address of the a piece of code called '_arm_return' and then
+performs a BX instruction to call the function.
+
+The '_arm_return' code looks like this:
+
+	.code 32
+_arm_return:		
+	ldmia 	r13!, {r12}
+	bx 	r12
+	.code 16
+
+
+It simply retrieves the return address from the stack, and then
+performs a BX operation to return to the caller and switch back into
+Thumb mode.
+
+
+7. How callee-super-interworking support works
+==============================================
+
+When -mcallee-super-interworking is specified on the command line the
+Thumb compiler behaves as if every externally visible function that it
+compiles has had the (interfacearm) attribute specified for it.  What
+this attribute does is to put a special, ARM mode header onto the
+function which forces a switch into Thumb mode:
+
+  without __attribute__((interfacearm)):
+
+		.code 16
+		.thumb_func
+	function:
+		... start of function ...
+
+  with __attribute__((interfacearm)):
+
+		.code 32
+	function:
+		orr	r12, pc, #1
+		bx	r12
+
+		.code 16
+                .thumb_func
+        .real_start_of_function:
+
+		... start of function ...
+
+Note that since the function now expects to be entered in ARM mode, it
+no longer has the .thumb_func pseudo op specified for its name.
+Instead the pseudo op is attached to a new label .real_start_of_<name>
+(where <name> is the name of the function) which indicates the start
+of the Thumb code.  This does have the interesting side effect in that
+if this function is now called from a Thumb mode piece of code
+outside of the current file, the linker will generate a calling stub
+to switch from Thumb mode into ARM mode, and then this is immediately
+overridden by the function's header which switches back into Thumb
+mode. 
+
+In addition the (interfacearm) attribute also forces the function to
+return by using the BX instruction, even if has not been compiled with
+the -mthumb-interwork command line flag, so that the correct mode will
+be restored upon exit from the function.
+
+
+8. Some examples
+================
+
+   Given these two test files:
+
+             int arm (void) { return 1 + thumb (); }
+
+             int thumb (void) { return 2 + arm (); }
+
+   The following pieces of assembler are produced by the ARM and Thumb
+version of GCC depending upon the command line options used:
+
+   `-O2':
+             .code 32                               .code 16
+             .global _arm                           .global _thumb
+                                                    .thumb_func
+     _arm:                                    _thumb:
+             mov     ip, sp
+             stmfd   sp!, {fp, ip, lr, pc}          push    {lr}
+             sub     fp, ip, #4
+             bl      _thumb                          bl      _arm
+             add     r0, r0, #1                      add     r0, r0, #2
+             ldmea   fp, {fp, sp, pc}                pop     {pc}
+
+   Note how the functions return without using the BX instruction.  If
+these files were assembled and linked together they would fail to work
+because they do not change mode when returning to their caller.
+
+   `-O2 -mthumb-interwork':
+
+             .code 32                               .code 16
+             .global _arm                           .global _thumb
+                                                    .thumb_func
+     _arm:                                    _thumb:
+             mov     ip, sp
+             stmfd   sp!, {fp, ip, lr, pc}          push    {lr}
+             sub     fp, ip, #4
+             bl      _thumb                         bl       _arm
+             add     r0, r0, #1                     add      r0, r0, #2
+             ldmea   fp, {fp, sp, lr}               pop      {r1}
+             bx      lr                             bx       r1
+
+   Now the functions use BX to return their caller.  They have grown by
+4 and 2 bytes respectively, but they can now successfully be linked
+together and be expect to work.  The linker will replace the
+destinations of the two BL instructions with the addresses of calling
+stubs which convert to the correct mode before jumping to the called
+function.
+
+   `-O2 -mcallee-super-interworking':
+
+             .code 32                               .code 32
+             .global _arm                           .global _thumb
+     _arm:                                    _thumb:
+                                                    orr      r12, pc, #1
+                                                    bx       r12
+             mov     ip, sp                         .code 16
+             stmfd   sp!, {fp, ip, lr, pc}          push     {lr}
+             sub     fp, ip, #4
+             bl      _thumb                         bl       _arm
+             add     r0, r0, #1                     add      r0, r0, #2
+             ldmea   fp, {fp, sp, lr}               pop      {r1}
+             bx      lr                             bx       r1
+
+   The thumb function now has an ARM encoded prologue, and it no longer
+has the `.thumb-func' pseudo op attached to it.  The linker will not
+generate a calling stub for the call from arm() to thumb(), but it will
+still have to generate a stub for the call from thumb() to arm().  Also
+note how specifying `--mcallee-super-interworking' automatically
+implies `-mthumb-interworking'.
+
+
+9. Some Function Pointer Examples
+=================================
+
+   Given this test file:
+
+     	int func (void) { return 1; }
+     
+     	int call (int (* ptr)(void)) { return ptr (); }
+
+   The following varying pieces of assembler are produced by the Thumb
+version of GCC depending upon the command line options used:
+
+   `-O2':
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.thumb_func
+     	_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{pc}
+
+   Note how the two functions have different exit sequences.  In
+particular call() uses pop {pc} to return, which would not work if the
+caller was in ARM mode.  func() however, uses the BX instruction, even
+though `-mthumb-interwork' has not been specified, as this is the most
+efficient way to exit a function when the return address is held in the
+link register.
+
+   `-O2 -mthumb-interwork':
+
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.thumb_func
+     	_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{r1}
+     		bx	r1
+
+   This time both functions return by using the BX instruction.  This
+means that call() is now two bytes longer and several cycles slower
+than the previous version.
+
+   `-O2 -mcaller-super-interworking':
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.thumb_func
+     	_call:
+     		push	{lr}
+     		bl	__interwork_call_via_r0
+     		pop	{pc}
+
+   Very similar to the first (non-interworking) version, except that a
+different stub is used to call via the function pointer.  This new stub
+will work even if the called function is not interworking aware, and
+tries to return to call() in ARM mode.  Note that the assembly code for
+call() is still not interworking aware itself, and so should not be
+called from ARM code.
+
+   `-O2 -mcallee-super-interworking':
+
+     		.code	32
+     		.globl	_func
+     	_func:
+     		orr	r12, pc, #1
+     		bx	r12
+     
+     		.code	16
+     		.globl .real_start_of_func
+     		.thumb_func
+     	.real_start_of_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.code	32
+     		.globl	_call
+     	_call:
+     		orr	r12, pc, #1
+     		bx	r12
+     
+     		.code	16
+     		.globl .real_start_of_call
+     		.thumb_func
+     	.real_start_of_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{r1}
+     		bx	r1
+
+   Now both functions have an ARM coded prologue, and both functions
+return by using the BX instruction.  These functions are interworking
+aware therefore and can safely be called from ARM code.  The code for
+the call() function is now 10 bytes longer than the original, non
+interworking aware version, an increase of over 200%.
+
+   If a prototype for call() is added to the source code, and this
+prototype includes the `interfacearm' attribute:
+
+     	int __attribute__((interfacearm)) call (int (* ptr)(void));
+
+   then this code is produced (with only -O2 specified on the command
+line):
+
+     		.code	16
+     		.globl	_func
+     		.thumb_func
+     	_func:
+     		mov	r0, #1
+     		bx	lr
+     
+     		.globl	_call
+     		.code	32
+     	_call:
+     		orr	r12, pc, #1
+     		bx	r12
+     
+     		.code	16
+     		.globl .real_start_of_call
+     		.thumb_func
+     	.real_start_of_call:
+     		push	{lr}
+     		bl	__call_via_r0
+     		pop	{r1}
+     		bx	r1
+
+   So now both call() and func() can be safely called via
+non-interworking aware ARM code.  If, when such a file is assembled,
+the assembler detects the fact that call() is being called by another
+function in the same file, it will automatically adjust the target of
+the BL instruction to point to .real_start_of_call.  In this way there
+is no need for the linker to generate a Thumb-to-ARM calling stub so
+that call can be entered in ARM mode.
+
+
+10. How to use dlltool to build ARM/Thumb DLLs
+==============================================
+   Given a program (`prog.c') like this:
+
+             extern int func_in_dll (void);
+     
+             int main (void) { return func_in_dll(); }
+
+   And a DLL source file (`dll.c') like this:
+
+             int func_in_dll (void) { return 1; }
+
+   Here is how to build the DLL and the program for a purely ARM based
+environment:
+
+*Step One
+     Build a `.def' file describing the DLL:
+
+             ; example.def
+             ; This file describes the contents of the DLL
+             LIBRARY     example
+             HEAPSIZE    0x40000, 0x2000
+             EXPORTS
+                          func_in_dll  1
+
+*Step Two
+     Compile the DLL source code:
+
+            arm-pe-gcc -O2 -c dll.c
+
+*Step Three
+     Use `dlltool' to create an exports file and a library file:
+
+            dlltool --def example.def --output-exp example.o --output-lib example.a
+
+*Step Four
+     Link together the complete DLL:
+
+            arm-pe-ld dll.o example.o -o example.dll
+
+*Step Five
+     Compile the program's source code:
+
+            arm-pe-gcc -O2 -c prog.c
+
+*Step Six
+     Link together the program and the DLL's library file:
+
+            arm-pe-gcc prog.o example.a -o prog
+
+   If instead this was a Thumb DLL being called from an ARM program, the
+steps would look like this.  (To save space only those steps that are
+different from the previous version are shown):
+
+*Step Two
+     Compile the DLL source code (using the Thumb compiler):
+
+            thumb-pe-gcc -O2 -c dll.c -mthumb-interwork
+
+*Step Three
+     Build the exports and library files (and support interworking):
+
+            dlltool -d example.def -z example.o -l example.a --interwork -m thumb
+
+*Step Five
+     Compile the program's source code (and support interworking):
+
+            arm-pe-gcc -O2 -c prog.c -mthumb-interwork
+
+   If instead, the DLL was an old, ARM DLL which does not support
+interworking, and which cannot be rebuilt, then these steps would be
+used.
+
+*Step One
+     Skip.  If you do not have access to the sources of a DLL, there is
+     no point in building a `.def' file for it.
+
+*Step Two
+     Skip.  With no DLL sources there is nothing to compile.
+
+*Step Three
+     Skip.  Without a `.def' file you cannot use dlltool to build an
+     exports file or a library file.
+
+*Step Four
+     Skip.  Without a set of DLL object files you cannot build the DLL.
+     Besides it has already been built for you by somebody else.
+
+*Step Five
+     Compile the program's source code, this is the same as before:
+
+            arm-pe-gcc -O2 -c prog.c
+
+*Step Six
+     Link together the program and the DLL's library file, passing the
+     `--support-old-code' option to the linker:
+
+            arm-pe-gcc prog.o example.a -Wl,--support-old-code -o prog
+
+     Ignore the warning message about the input file not supporting
+     interworking as the --support-old-code switch has taken care if this.
+
+
+Copyright (C) 1998, 2002, 2003, 2004 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
new file mode 100644
index 000000000..f8e7367fd
--- /dev/null
+++ b/gcc/config/arm/aout.h
@@ -0,0 +1,380 @@
+/* Definitions of target machine for GNU compiler, for ARM with a.out
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2004, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Richard Earnshaw (rearnsha@armltd.co.uk).
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef ASM_APP_ON
+#define ASM_APP_ON  		""
+#endif
+#ifndef ASM_APP_OFF
+#define ASM_APP_OFF  		""
+#endif
+
+/* Switch to the text or data segment.  */
+#define TEXT_SECTION_ASM_OP  	"\t.text"
+#define DATA_SECTION_ASM_OP  	"\t.data"
+#define BSS_SECTION_ASM_OP   	"\t.bss"
+
+/* Note: If USER_LABEL_PREFIX or LOCAL_LABEL_PREFIX are changed,
+   make sure that this change is reflected in the function
+   coff_arm_is_local_label_name() in bfd/coff-arm.c.  */
+#ifndef REGISTER_PREFIX
+#define REGISTER_PREFIX 	""
+#endif
+
+#ifndef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX 	"_"
+#endif
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX 	""
+#endif
+
+/* The assembler's names for the registers.  Note that the ?xx registers are
+   there so that VFPv3/NEON registers D16-D31 have the same spacing as D0-D15
+   (each of which is overlaid on two S registers), although there are no
+   actual single-precision registers which correspond to D16-D31.  */
+#ifndef REGISTER_NAMES
+#define REGISTER_NAMES				   \
+{				                   \
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",  \
+  "r8", "r9", "sl", "fp", "ip", "sp", "lr", "pc",  \
+  "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",  \
+  "cc", "sfp", "afp",		   		   \
+  "mv0",   "mv1",   "mv2",   "mv3",		   \
+  "mv4",   "mv5",   "mv6",   "mv7",		   \
+  "mv8",   "mv9",   "mv10",  "mv11",		   \
+  "mv12",  "mv13",  "mv14",  "mv15",		   \
+  "wcgr0", "wcgr1", "wcgr2", "wcgr3",		   \
+  "wr0",   "wr1",   "wr2",   "wr3",		   \
+  "wr4",   "wr5",   "wr6",   "wr7",		   \
+  "wr8",   "wr9",   "wr10",  "wr11",		   \
+  "wr12",  "wr13",  "wr14",  "wr15",		   \
+  "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  \
+  "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15", \
+  "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", \
+  "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", \
+  "d16", "?16", "d17", "?17", "d18", "?18", "d19", "?19", \
+  "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23", \
+  "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27", \
+  "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31", \
+  "vfpcc"					   \
+}
+#endif
+
+#ifndef ADDITIONAL_REGISTER_NAMES
+#define ADDITIONAL_REGISTER_NAMES		\
+{						\
+  {"a1", 0},					\
+  {"a2", 1},					\
+  {"a3", 2},					\
+  {"a4", 3},					\
+  {"v1", 4},					\
+  {"v2", 5},					\
+  {"v3", 6},					\
+  {"v4", 7},					\
+  {"v5", 8},					\
+  {"v6", 9},					\
+  {"rfp", 9}, /* Gcc used to call it this */	\
+  {"sb", 9},					\
+  {"v7", 10},					\
+  {"r10", 10},	/* sl */			\
+  {"r11", 11},	/* fp */			\
+  {"r12", 12},	/* ip */			\
+  {"r13", 13},	/* sp */			\
+  {"r14", 14},	/* lr */			\
+  {"r15", 15},	/* pc */			\
+  {"mvf0", 27},					\
+  {"mvf1", 28},					\
+  {"mvf2", 29},					\
+  {"mvf3", 30},					\
+  {"mvf4", 31},					\
+  {"mvf5", 32},					\
+  {"mvf6", 33},					\
+  {"mvf7", 34},					\
+  {"mvf8", 35},					\
+  {"mvf9", 36},					\
+  {"mvf10", 37},				\
+  {"mvf11", 38},				\
+  {"mvf12", 39},				\
+  {"mvf13", 40},				\
+  {"mvf14", 41},				\
+  {"mvf15", 42},				\
+  {"mvd0", 27},					\
+  {"mvd1", 28},					\
+  {"mvd2", 29},					\
+  {"mvd3", 30},					\
+  {"mvd4", 31},					\
+  {"mvd5", 32},					\
+  {"mvd6", 33},					\
+  {"mvd7", 34},					\
+  {"mvd8", 35},					\
+  {"mvd9", 36},					\
+  {"mvd10", 37},				\
+  {"mvd11", 38},				\
+  {"mvd12", 39},				\
+  {"mvd13", 40},				\
+  {"mvd14", 41},				\
+  {"mvd15", 42},				\
+  {"mvfx0", 27},				\
+  {"mvfx1", 28},				\
+  {"mvfx2", 29},				\
+  {"mvfx3", 30},				\
+  {"mvfx4", 31},				\
+  {"mvfx5", 32},				\
+  {"mvfx6", 33},				\
+  {"mvfx7", 34},				\
+  {"mvfx8", 35},				\
+  {"mvfx9", 36},				\
+  {"mvfx10", 37},				\
+  {"mvfx11", 38},				\
+  {"mvfx12", 39},				\
+  {"mvfx13", 40},				\
+  {"mvfx14", 41},				\
+  {"mvfx15", 42},				\
+  {"mvdx0", 27},				\
+  {"mvdx1", 28},				\
+  {"mvdx2", 29},				\
+  {"mvdx3", 30},				\
+  {"mvdx4", 31},				\
+  {"mvdx5", 32},				\
+  {"mvdx6", 33},				\
+  {"mvdx7", 34},				\
+  {"mvdx8", 35},				\
+  {"mvdx9", 36},				\
+  {"mvdx10", 37},				\
+  {"mvdx11", 38},				\
+  {"mvdx12", 39},				\
+  {"mvdx13", 40},				\
+  {"mvdx14", 41},				\
+  {"mvdx15", 42}				\
+}
+#endif
+
+#ifndef OVERLAPPING_REGISTER_NAMES
+#define OVERLAPPING_REGISTER_NAMES		\
+{						\
+  {"d0", 63, 2},				\
+  {"d1", 65, 2},				\
+  {"d2", 67, 2},				\
+  {"d3", 69, 2},				\
+  {"d4", 71, 2},				\
+  {"d5", 73, 2},				\
+  {"d6", 75, 2},				\
+  {"d7", 77, 2},				\
+  {"d8", 79, 2},				\
+  {"d9", 81, 2},				\
+  {"d10", 83, 2},				\
+  {"d11", 85, 2},				\
+  {"d12", 87, 2},				\
+  {"d13", 89, 2},				\
+  {"d14", 91, 2},				\
+  {"d15", 93, 2},				\
+  {"q0", 63, 4},				\
+  {"q1", 67, 4},				\
+  {"q2", 71, 4},				\
+  {"q3", 75, 4},				\
+  {"q4", 79, 4},				\
+  {"q5", 83, 4},				\
+  {"q6", 87, 4},				\
+  {"q7", 91, 4},				\
+  {"q8", 95, 4},				\
+  {"q9", 99, 4},				\
+  {"q10", 103, 4},				\
+  {"q11", 107, 4},				\
+  {"q12", 111, 4},				\
+  {"q13", 115, 4},				\
+  {"q14", 119, 4},				\
+  {"q15", 123, 4}				\
+}
+#endif
+
+#ifndef NO_DOLLAR_IN_LABEL
+#define NO_DOLLAR_IN_LABEL 1
+#endif
+
+/* Generate DBX debugging information.  riscix.h will undefine this because
+   the native assembler does not support stabs.  */
+#define DBX_DEBUGGING_INFO 1
+
+/* Acorn dbx moans about continuation chars, so don't use any.  */
+#ifndef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH  0
+#endif
+
+/* Output a function label definition.  */
+#ifndef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL)	\
+  do							\
+    {							\
+      ARM_DECLARE_FUNCTION_NAME (STREAM, NAME, DECL);   \
+      ASM_OUTPUT_LABEL (STREAM, NAME);			\
+    }							\
+  while (0)
+#endif
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Make an internal label into a string.  */
+#ifndef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
+  sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned int)(NUM))
+#endif
+     
+/* Output an element of a dispatch table.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)			\
+  do								\
+    {								\
+      gcc_assert (!TARGET_THUMB2);				\
+      asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);		\
+    }								\
+  while (0)
+	  
+
+/* Thumb-2 always uses addr_diff_elf so that the Table Branch instructions
+   can be used.  For non-pic code where the offsets do not suitable for
+   TBB/TBH the elements are output as absolute labels.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do									\
+    {									\
+      if (TARGET_ARM)							\
+	asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE);			\
+      else if (TARGET_THUMB1)						\
+	{								\
+	  if (flag_pic || optimize_size)				\
+	    {								\
+	      switch (GET_MODE(body))					\
+		{							\
+		case QImode:						\
+		  asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n",	\
+			       VALUE, REL);				\
+		  break;						\
+		case HImode: /* TBH */					\
+		  asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n",	\
+			       VALUE, REL);				\
+		  break;						\
+		case SImode:						\
+		  asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d\n",	\
+			       VALUE, REL);				\
+		  break;						\
+		default:						\
+		  gcc_unreachable();					\
+		}							\
+	    }								\
+	  else								\
+	    asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE);		\
+	}								\
+      else /* Thumb-2 */						\
+	{								\
+	  switch (GET_MODE(body))					\
+	    {								\
+	    case QImode: /* TBB */					\
+	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n",	\
+			   VALUE, REL);					\
+	      break;							\
+	    case HImode: /* TBH */					\
+	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n",	\
+			   VALUE, REL);					\
+	      break;							\
+	    case SImode:						\
+	      if (flag_pic)						\
+		asm_fprintf (STREAM, "\t.word\t%LL%d+1-%LL%d\n", VALUE, REL); \
+	      else							\
+		asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE);	\
+	      break;							\
+	    default:							\
+	      gcc_unreachable();					\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+
+#undef  ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN)  \
+  output_ascii_pseudo_op (STREAM, (const unsigned char *) (PTR), LEN)
+
+/* Output a gap.  In fact we fill it with nulls.  */
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES) 	\
+  fprintf (STREAM, "\t.space\t%d\n", (int) (NBYTES))
+
+/* Align output to a power of two.  Horrible /bin/as.  */
+#ifndef ASM_OUTPUT_ALIGN  
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)			\
+  do							\
+    {							\
+      register int amount = 1 << (POWER);		\
+							\
+      if (amount == 2)					\
+	fprintf (STREAM, "\t.even\n");			\
+      else if (amount != 1)				\
+	fprintf (STREAM, "\t.align\t%d\n", amount - 4);	\
+    }							\
+  while (0)
+#endif
+
+/* Output a common block.  */
+#ifndef ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)	\
+  do							\
+    {							\
+      fprintf (STREAM, "\t.comm\t");			\
+      assemble_name (STREAM, NAME);			\
+      asm_fprintf (STREAM, ", %d\t%@ %d\n", 		\
+	           (int)(ROUNDED), (int)(SIZE));	\
+    }							\
+  while (0)
+#endif
+     
+/* Output a local common block.  /bin/as can't do this, so hack a
+   `.space' into the bss segment.  Note that this is *bad* practice,
+   which is guaranteed NOT to work since it doesn't define STATIC
+   COMMON space but merely STATIC BSS space.  */
+#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      switch_to_section (bss_section);					\
+      ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL (STREAM, NAME);					\
+      fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE));			\
+    }									\
+  while (0)
+#endif
+     
+/* Output a zero-initialized block.  */
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(STREAM, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (STREAM, DECL, NAME, SIZE, ALIGN)
+#endif
+
+/* Output a #ident directive.  */
+#ifndef ASM_OUTPUT_IDENT
+#define ASM_OUTPUT_IDENT(STREAM,STRING)  \
+  asm_fprintf (STREAM, "%@ - - - ident %s\n", STRING)
+#endif
+     
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START 	"@"
+#endif
+
+/* This works for GAS and some other assemblers.  */
+#define SET_ASM_OP		"\t.set\t"
diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
new file mode 100644
index 000000000..f9ad1c9e1
--- /dev/null
+++ b/gcc/config/arm/arm-c.c
@@ -0,0 +1,45 @@
+/* Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "tree.h"
+#include "output.h"
+#include "c-family/c-common.h"
+
+
+/* Output C specific EABI object attributes.  These can not be done in
+   arm.c because they require information from the C frontend.  */
+
+static void arm_output_c_attributes(void)
+{
+  /* Tag_ABI_PCS_wchar_t.  */
+  asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
+	       (int)(TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT));
+}
+
+
+/* Setup so that common code calls arm_output_c_attributes.  */
+
+void arm_lang_object_attributes_init(void)
+{
+  arm_lang_output_object_attributes_hook = arm_output_c_attributes;
+}
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
new file mode 100644
index 000000000..0bb9aa3ee
--- /dev/null
+++ b/gcc/config/arm/arm-cores.def
@@ -0,0 +1,136 @@
+/* ARM CPU Cores
+   Copyright (C) 2003, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Written by CodeSourcery, LLC
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before using #include to read this file, define a macro:
+
+      ARM_CORE(CORE_NAME, CORE_IDENT, ARCH, FLAGS, COSTS)
+
+   The CORE_NAME is the name of the core, represented as a string constant.
+   The CORE_IDENT is the name of the core, represented as an identifier.
+   ARCH is the architecture revision implemented by the chip.
+   FLAGS are the bitwise-or of the traits that apply to that core.
+   This need not include flags implied by the architecture.
+   COSTS is the name of the rtx_costs routine to use.
+
+   If you update this table, you must update the "tune" attribute in
+   arm.md.
+   
+   Some tools assume no whitespace up to the first "," in each entry.  */
+
+/* V2/V2A Architecture Processors */
+ARM_CORE("arm2",   arm2,	2,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm250", arm250,	2,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm3",   arm3,	2,	FL_CO_PROC | FL_MODE26, slowmul)
+
+/* V3 Architecture Processors */
+ARM_CORE("arm6",          arm6,		3,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm60",         arm60,	3,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm600",        arm600,	3,	FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm610",        arm610,	3,	             FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm620",        arm620,	3,	FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7",          arm7,		3,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm7d",         arm7d,	3,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm7di",        arm7di,	3,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm70",         arm70,	3,	FL_CO_PROC | FL_MODE26, slowmul)
+ARM_CORE("arm700",        arm700,	3,	FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm700i",       arm700i,	3,	FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm710",        arm710,	3,	             FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm720",        arm720,	3,	             FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm710c",       arm710c,	3,	             FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7100",       arm7100,	3,	             FL_MODE26 | FL_WBUF, slowmul)
+ARM_CORE("arm7500",       arm7500,	3,	             FL_MODE26 | FL_WBUF, slowmul)
+/* Doesn't have an external co-proc, but does have embedded fpa.  */
+ARM_CORE("arm7500fe",     arm7500fe,	3,	FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul)
+
+/* V3M Architecture Processors */
+/* arm7m doesn't exist on its own, but only with D, ("and", and I), but
+   those don't alter the code, so arm7m is sometimes used.  */
+ARM_CORE("arm7m",         arm7m,	3M,	FL_CO_PROC | FL_MODE26, fastmul)
+ARM_CORE("arm7dm",        arm7dm,	3M,	FL_CO_PROC | FL_MODE26, fastmul)
+ARM_CORE("arm7dmi",       arm7dmi,	3M,	FL_CO_PROC | FL_MODE26, fastmul)
+
+/* V4 Architecture Processors */
+ARM_CORE("arm8",          arm8,		4,	             FL_MODE26 | FL_LDSCHED, fastmul)
+ARM_CORE("arm810",        arm810,	4,	             FL_MODE26 | FL_LDSCHED, fastmul)
+ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("fa526",         fa526,        4,                               FL_LDSCHED, fastmul)
+ARM_CORE("fa626",         fa626,        4,                               FL_LDSCHED, fastmul)
+
+/* V4T Architecture Processors */
+ARM_CORE("arm7tdmi",      arm7tdmi,	4T,	FL_CO_PROC          , fastmul)
+ARM_CORE("arm7tdmi-s",    arm7tdmis,	4T,	FL_CO_PROC          , fastmul)
+ARM_CORE("arm710t",       arm710t,	4T,	             FL_WBUF, fastmul)
+ARM_CORE("arm720t",       arm720t,	4T,	             FL_WBUF, fastmul)
+ARM_CORE("arm740t",       arm740t,	4T,	             FL_WBUF, fastmul)
+ARM_CORE("arm9",          arm9,		4T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("arm9tdmi",      arm9tdmi,	4T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("arm920",        arm920,	4T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("arm920t",       arm920t,	4T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("arm922t",       arm922t,	4T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("arm940t",       arm940t,	4T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("ep9312",        ep9312,	4T,	                         FL_LDSCHED |             FL_CIRRUS, fastmul)
+
+/* V5T Architecture Processors */
+ARM_CORE("arm10tdmi",     arm10tdmi,	5T,	                         FL_LDSCHED, fastmul)
+ARM_CORE("arm1020t",      arm1020t,	5T,	                         FL_LDSCHED, fastmul)
+
+/* V5TE Architecture Processors */
+ARM_CORE("arm9e",         arm9e,	5TE,	                         FL_LDSCHED, 9e)
+ARM_CORE("arm946e-s",     arm946es,	5TE,	                         FL_LDSCHED, 9e)
+ARM_CORE("arm966e-s",     arm966es,	5TE,	                         FL_LDSCHED, 9e)
+ARM_CORE("arm968e-s",     arm968es,	5TE,	                         FL_LDSCHED, 9e)
+ARM_CORE("arm10e",        arm10e,	5TE,				 FL_LDSCHED, fastmul)
+ARM_CORE("arm1020e",      arm1020e,	5TE,				 FL_LDSCHED, fastmul)
+ARM_CORE("arm1022e",      arm1022e,	5TE,				 FL_LDSCHED, fastmul)
+ARM_CORE("xscale",        xscale,	5TE,	                         FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
+ARM_CORE("iwmmxt",        iwmmxt,	5TE,	                         FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
+ARM_CORE("iwmmxt2",       iwmmxt2,	5TE,	                         FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
+ARM_CORE("fa606te",       fa606te,      5TE,                             FL_LDSCHED, 9e)
+ARM_CORE("fa626te",       fa626te,      5TE,                             FL_LDSCHED, 9e)
+ARM_CORE("fmp626",        fmp626,       5TE,                             FL_LDSCHED, 9e)
+ARM_CORE("fa726te",       fa726te,      5TE,                             FL_LDSCHED, fa726te)
+
+/* V5TEJ Architecture Processors */
+ARM_CORE("arm926ej-s",    arm926ejs,	5TEJ,	                         FL_LDSCHED, 9e)
+ARM_CORE("arm1026ej-s",   arm1026ejs,	5TEJ,	                         FL_LDSCHED, 9e)
+
+/* V6 Architecture Processors */
+ARM_CORE("arm1136j-s",    arm1136js,	6J,	                         FL_LDSCHED, 9e)
+ARM_CORE("arm1136jf-s",   arm1136jfs,	6J,	                         FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("arm1176jz-s",	  arm1176jzs,	6ZK,				 FL_LDSCHED, 9e)
+ARM_CORE("arm1176jzf-s",  arm1176jzfs,	6ZK,				 FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("mpcorenovfp",	  mpcorenovfp,	6K,				 FL_LDSCHED, 9e)
+ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, 9e)
+ARM_CORE("arm1156t2f-s",  arm1156t2fs,  6T2,				 FL_LDSCHED | FL_VFPV2, 9e)
+ARM_CORE("cortex-a5",	  cortexa5,	7A,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, cortex_a9)
+ARM_CORE("cortex-a15",	  cortexa15,	7A,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, 9e)
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
new file mode 100644
index 000000000..44e758692
--- /dev/null
+++ b/gcc/config/arm/arm-generic.md
@@ -0,0 +1,153 @@
+;; Generic ARM Pipeline Description
+;; Copyright (C) 2003, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_automaton "arm")
+
+;; Write buffer
+;
+; Strictly, we should model a 4-deep write buffer for ARM7xx based chips
+;
+; The write buffer on some of the arm6 processors is hard to model exactly.
+; There is room in the buffer for up to two addresses and up to eight words
+; of memory, but the two needn't be split evenly.  When writing the two
+; addresses are fully pipelined.  However, a read from memory that is not
+; currently in the cache will block until the writes have completed.
+; It is normally the case that FCLK and MCLK will be in the ratio 2:1, so
+; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous
+; (they aren't allowed to be at present) then there is a startup cost of 1MCLK
+; cycle to add as well.
+(define_cpu_unit "write_buf" "arm")
+
+;; Write blockage unit
+;
+; The write_blockage unit models (partially), the fact that reads will stall
+; until the write buffer empties.
+; The f_mem_r and r_mem_f could also block, but they are to the stack,
+; so we don't model them here
+(define_cpu_unit "write_blockage" "arm")
+
+;; Core
+;
+(define_cpu_unit "core" "arm")
+
+(define_insn_reservation "r_mem_f_wbuf" 5
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+	    (eq_attr "type" "r_mem_f")))
+  "core+write_buf*3")
+
+(define_insn_reservation "store_wbuf" 5
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+       	    (eq_attr "type" "store1")))
+  "core+write_buf*3+write_blockage*5")
+
+(define_insn_reservation "store2_wbuf" 7
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+	    (eq_attr "type" "store2")))
+  "core+write_buf*4+write_blockage*7")
+
+(define_insn_reservation "store3_wbuf" 9
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+	    (eq_attr "type" "store3")))
+  "core+write_buf*5+write_blockage*9")
+
+(define_insn_reservation "store4_wbuf" 11
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "yes")
+            (eq_attr "type" "store4")))
+  "core+write_buf*6+write_blockage*11")
+
+(define_insn_reservation "store2" 3
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "no")
+            (eq_attr "type" "store2")))
+  "core*3")
+
+(define_insn_reservation "store3" 4
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "no")
+            (eq_attr "type" "store3")))
+  "core*4")
+
+(define_insn_reservation "store4" 5
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "model_wbuf" "no")
+	    (eq_attr "type" "store4")))
+  "core*5")
+
+(define_insn_reservation "store_ldsched" 1
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (eq_attr "type" "store1")))
+  "core")
+
+(define_insn_reservation "load_ldsched_xscale" 3
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "type" "load_byte,load1")
+	         (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
+  "core")
+
+(define_insn_reservation "load_ldsched" 2
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "type" "load_byte,load1")
+	         (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
+  "core")
+
+(define_insn_reservation "load_or_store" 2
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "!yes") 
+	    (eq_attr "type" "load_byte,load1,load2,load3,load4,store1")))
+  "core*2")
+
+(define_insn_reservation "mult" 16
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "no") (eq_attr "type" "mult")))
+  "core*16")
+
+(define_insn_reservation "mult_ldsched_strongarm" 3
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "tune"
+		  "strongarm,strongarm110,strongarm1100,strongarm1110")
+	         (eq_attr "type" "mult"))))
+  "core*2")
+
+(define_insn_reservation "mult_ldsched" 4
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "ldsched" "yes") 
+	    (and (eq_attr "tune"
+		  "!strongarm,strongarm110,strongarm1100,strongarm1110")
+	         (eq_attr "type" "mult"))))
+  "core*4")
+
+(define_insn_reservation "multi_cycle" 32
+  (and (eq_attr "generic_sched" "yes")
+       (and (eq_attr "core_cycles" "multi")
+            (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4")))
+  "core*32")
+
+(define_insn_reservation "single_cycle" 1
+  (and (eq_attr "generic_sched" "yes")
+       (eq_attr "core_cycles" "single"))
+  "core")
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
new file mode 100644
index 000000000..221edd2aa
--- /dev/null
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -0,0 +1,332 @@
+(* Auto-generate ARM ldm/stm patterns
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Run with:
+     ocaml arm-ldmstm.ml >/path/to/gcc/config/arm/ldmstm.md
+*)
+
+type amode = IA | IB | DA | DB
+
+type optype = IN | OUT | INOUT
+
+let rec string_of_addrmode addrmode =
+  match addrmode with
+    IA -> "ia" | IB -> "ib" | DA -> "da" | DB -> "db"
+
+let rec initial_offset addrmode nregs =
+  match addrmode with
+    IA -> 0
+  | IB -> 4
+  | DA -> -4 * nregs + 4
+  | DB -> -4 * nregs
+
+let rec final_offset addrmode nregs =
+  match addrmode with
+    IA -> nregs * 4
+  | IB -> nregs * 4
+  | DA -> -4 * nregs
+  | DB -> -4 * nregs
+
+let constr thumb =
+  if thumb then "l" else "rk"
+
+let inout_constr op_type =
+  match op_type with
+  OUT -> "=&"
+  | INOUT -> "+&"
+  | IN -> ""
+
+let destreg nregs first op_type thumb =
+  if not first then
+    Printf.sprintf "(match_dup %d)" (nregs + 1)
+  else
+    Printf.sprintf ("(match_operand:SI %d \"s_register_operand\" \"%s%s\")")
+      (nregs + 1) (inout_constr op_type) (constr thumb)
+
+let write_ldm_set thumb nregs offset opnr first =
+  let indent = "     " in
+  Printf.printf "%s" (if first then "    [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"arm_hard_register_operand\" \"\")\n" opnr;
+  Printf.printf "%s     (mem:SI " indent;
+  begin if offset != 0 then Printf.printf "(plus:SI " end;
+  Printf.printf "%s" (destreg nregs first IN thumb);
+  begin if offset != 0 then Printf.printf "\n%s             (const_int %d))" indent offset end;
+  Printf.printf "))"
+
+let write_stm_set thumb nregs offset opnr first =
+  let indent = "     " in
+  Printf.printf "%s" (if first then "    [" else indent);
+  Printf.printf "(set (mem:SI ";
+  begin if offset != 0 then Printf.printf "(plus:SI " end;
+  Printf.printf "%s" (destreg nregs first IN thumb);
+  begin if offset != 0 then Printf.printf " (const_int %d))" offset end;
+  Printf.printf ")\n%s     (match_operand:SI %d \"arm_hard_register_operand\" \"\"))" indent opnr 
+
+let write_ldm_peep_set extra_indent nregs opnr first =
+  let indent = "   " ^ extra_indent in
+  Printf.printf "%s" (if first then extra_indent ^ "  [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr;
+  Printf.printf "%s     (match_operand:SI %d \"memory_operand\" \"\"))" indent (nregs + opnr)
+
+let write_stm_peep_set extra_indent nregs opnr first =
+  let indent = "   " ^ extra_indent in
+  Printf.printf "%s" (if first then extra_indent ^ "  [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"memory_operand\" \"\")\n" (nregs + opnr);
+  Printf.printf "%s     (match_operand:SI %d \"s_register_operand\" \"\"))" indent opnr
+
+let write_any_load optype nregs opnr first =
+  let indent = "   " in
+  Printf.printf "%s" (if first then "  [" else indent);
+  Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr;
+  Printf.printf "%s     (match_operand:SI %d \"%s\" \"\"))" indent (nregs * 2 + opnr) optype
+
+let write_const_store nregs opnr first =
+  let indent = "   " in
+  Printf.printf "%s(set (match_operand:SI %d \"memory_operand\" \"\")\n" indent (nregs + opnr);
+  Printf.printf "%s     (match_dup %d))" indent opnr
+
+let write_const_stm_peep_set nregs opnr first =
+  write_any_load "const_int_operand" nregs opnr first;
+  Printf.printf "\n";
+  write_const_store nregs opnr false
+
+  
+let rec write_pat_sets func opnr offset first n_left =
+  func offset opnr first;
+  begin
+    if n_left > 1 then begin
+      Printf.printf "\n";
+      write_pat_sets func (opnr + 1) (offset + 4) false (n_left - 1);
+    end else
+      Printf.printf "]"
+  end
+
+let rec write_peep_sets func opnr first n_left =
+  func opnr first;
+  begin
+    if n_left > 1 then begin
+      Printf.printf "\n";
+      write_peep_sets func (opnr + 1) false (n_left - 1);
+    end
+  end
+    
+let can_thumb addrmode update is_store =
+  match addrmode, update, is_store with
+    (* Thumb1 mode only supports IA with update.  However, for LDMIA,
+       if the address register also appears in the list of loaded
+       registers, the loaded value is stored, hence the RTL pattern
+       to describe such an insn does not have an update.  We check
+       in the match_parallel predicate that the condition described
+       above is met.  *)
+    IA, _, false -> true
+  | IA, true, true -> true
+  | _ -> false
+
+let target addrmode thumb =
+  match addrmode, thumb with
+    IA, true -> "TARGET_THUMB1"
+  | IA, false -> "TARGET_32BIT"
+  | DB, false -> "TARGET_32BIT"
+  | _, false -> "TARGET_ARM"
+
+let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
+  let astr = string_of_addrmode addrmode in
+  Printf.printf "(define_insn \"*%s%s%d_%s%s\"\n"
+    (if thumb then "thumb_" else "") name nregs astr
+    (if update then "_update" else "");
+  Printf.printf "  [(match_parallel 0 \"%s_multiple_operation\"\n" ls;
+  begin
+    if update then begin
+      Printf.printf "    [(set %s\n          (plus:SI %s"
+	(destreg nregs true INOUT thumb) (destreg nregs false IN thumb);
+      Printf.printf " (const_int %d)))\n"
+	(final_offset addrmode nregs)
+    end
+  end;
+  write_pat_sets
+    (write_set_fn thumb nregs) 1
+    (initial_offset addrmode nregs)
+    (not update) nregs;
+  Printf.printf ")]\n  \"%s && XVECLEN (operands[0], 0) == %d\"\n"
+    (target addrmode thumb)
+    (if update then nregs + 1 else nregs);
+  Printf.printf "  \"%s%%(%s%%)\\t%%%d%s, {"
+    name astr (nregs + 1) (if update then "!" else "");
+  for n = 1 to nregs; do
+    Printf.printf "%%%d%s" n (if n < nregs then ", " else "")
+  done;
+  Printf.printf "}\"\n";
+  Printf.printf "  [(set_attr \"type\" \"%s%d\")" ls nregs;
+  begin if not thumb then
+    Printf.printf "\n   (set_attr \"predicable\" \"yes\")";
+  end;
+  Printf.printf "])\n\n"
+
+let write_ldm_pattern addrmode nregs update =
+  write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update false;
+  begin if can_thumb addrmode update false then
+    write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update true;
+  end
+
+let write_stm_pattern addrmode nregs update =
+  write_pattern_1 "stm" "store" addrmode nregs write_stm_set update false;
+  begin if can_thumb addrmode update true then
+    write_pattern_1 "stm" "store" addrmode nregs write_stm_set update true;
+  end
+
+let write_ldm_commutative_peephole thumb =
+  let nregs = 2 in
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs;
+  let indent = "   " in
+  if thumb then begin
+    Printf.printf "\n%s(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2);
+    Printf.printf "%s     (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1);
+    Printf.printf "%s      [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2);
+    Printf.printf "%s       (match_operand:SI %d \"s_register_operand\" \"\")]))]\n" indent (nregs * 2 + 3)
+  end else begin
+    Printf.printf "\n%s(parallel\n" indent;
+    Printf.printf "%s  [(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2);
+    Printf.printf "%s        (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1);
+    Printf.printf "%s         [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2);
+    Printf.printf "%s          (match_operand:SI %d \"s_register_operand\" \"\")]))\n" indent (nregs * 2 + 3);
+    Printf.printf "%s   (clobber (reg:CC CC_REGNUM))])]\n" indent
+  end;
+  Printf.printf "  \"(((operands[%d] == operands[0] && operands[%d] == operands[1])\n" (nregs * 2 + 2) (nregs * 2 + 3);
+  Printf.printf "     || (operands[%d] == operands[0] && operands[%d] == operands[1]))\n" (nregs * 2 + 3) (nregs * 2 + 2);
+  Printf.printf "    && peep2_reg_dead_p (%d, operands[0]) && peep2_reg_dead_p (%d, operands[1]))\"\n" (nregs + 1) (nregs + 1);
+  begin
+    if thumb then
+      Printf.printf "  [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))]\n"
+	(nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3)
+    else begin
+      Printf.printf "  [(parallel\n";
+      Printf.printf "    [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))\n"
+	(nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3);
+      Printf.printf "     (clobber (reg:CC CC_REGNUM))])]\n"
+    end
+  end;
+  Printf.printf "{\n  if (!gen_ldm_seq (operands, %d, true))\n    FAIL;\n" nregs;
+  Printf.printf "})\n\n"
+
+let write_ldm_peephole nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_ldm_seq (operands, %d, false))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let write_ldm_peephole_b nregs =
+  if nregs > 2 then begin
+    Printf.printf "(define_peephole2\n";
+    write_ldm_peep_set "" nregs 0 true;
+    Printf.printf "\n   (parallel\n";
+    write_peep_sets (write_ldm_peep_set "  " nregs) 1 true (nregs - 1);
+    Printf.printf "])]\n  \"\"\n  [(const_int 0)]\n{\n";
+    Printf.printf "  if (gen_ldm_seq (operands, %d, false))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+  end
+
+let write_stm_peephole nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_stm_peep_set "" nregs) 0 true nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let write_stm_peephole_b nregs =
+  if nregs > 2 then begin
+    Printf.printf "(define_peephole2\n";
+    write_stm_peep_set "" nregs 0 true;
+    Printf.printf "\n   (parallel\n";
+    write_peep_sets (write_stm_peep_set "" nregs) 1 true (nregs - 1);
+    Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+    Printf.printf "  if (gen_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+  end
+
+let write_const_stm_peephole_a nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_const_stm_peep_set nregs) 0 true nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_const_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let write_const_stm_peephole_b nregs =
+  Printf.printf "(define_peephole2\n";
+  write_peep_sets (write_any_load "const_int_operand" nregs) 0 true nregs;
+  Printf.printf "\n";
+  write_peep_sets (write_const_store nregs) 0 false nregs;
+  Printf.printf "]\n  \"\"\n  [(const_int 0)]\n{\n";
+  Printf.printf "  if (gen_const_stm_seq (operands, %d))\n    DONE;\n  else\n    FAIL;\n})\n\n" nregs
+
+let patterns () =
+  let addrmodes = [ IA; IB; DA; DB ]  in
+  let sizes = [ 4; 3; 2] in
+  List.iter
+    (fun n ->
+      List.iter
+	(fun addrmode ->
+	  write_ldm_pattern addrmode n false;
+	  write_ldm_pattern addrmode n true;
+	  write_stm_pattern addrmode n false;
+	  write_stm_pattern addrmode n true)
+	addrmodes;
+      write_ldm_peephole n;
+      write_ldm_peephole_b n;
+      write_const_stm_peephole_a n;
+      write_const_stm_peephole_b n;
+      write_stm_peephole n;)
+    sizes;
+  write_ldm_commutative_peephole false;
+  write_ldm_commutative_peephole true
+
+let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
+
+(* Do it.  *)
+
+let _ = 
+  print_lines [
+"/* ARM ldm/stm instruction patterns.  This file was automatically generated";
+"   using arm-ldmstm.ml.  Please do not edit manually.";
+"";
+"   Copyright (C) 2010 Free Software Foundation, Inc.";
+"   Contributed by CodeSourcery.";
+"";
+"   This file is part of GCC.";
+"";
+"   GCC is free software; you can redistribute it and/or modify it";
+"   under the terms of the GNU General Public License as published";
+"   by the Free Software Foundation; either version 3, or (at your";
+"   option) any later version.";
+"";
+"   GCC is distributed in the hope that it will be useful, but WITHOUT";
+"   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
+"   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public";
+"   License for more details.";
+"";
+"   You should have received a copy of the GNU General Public License and";
+"   a copy of the GCC Runtime Library Exception along with this program;";
+"   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see";
+"   <http://www.gnu.org/licenses/>.  */";
+""];
+  patterns ();
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
new file mode 100644
index 000000000..24e3d90a5
--- /dev/null
+++ b/gcc/config/arm/arm-modes.def
@@ -0,0 +1,78 @@
+/* Definitions of target machine for GNU compiler, for ARM.
+   Copyright (C) 2002, 2004, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+   and Martin Simmons (@harleqn.co.uk).
+   More major hacks by Richard Earnshaw (rearnsha@arm.com)
+   Minor hacks by Nick Clifton (nickc@cygnus.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Extended precision floating point.
+   FIXME What format is this?  */
+FLOAT_MODE (XF, 12, 0);
+
+/* Half-precision floating point */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
+			  ? &arm_half_format : &ieee_half_format));
+
+/* CCFPEmode should be used with floating inequalities,
+   CCFPmode should be used with floating equalities.
+   CC_NOOVmode should be used with SImode integer equalities.
+   CC_Zmode should be used if only the Z flag is set correctly
+   CC_Cmode should be used if only the C flag is set correctly, after an
+     addition.
+   CC_Nmode should be used if only the N (sign) flag is set correctly
+   CC_CZmode should be used if only the C and Z flags are correct
+   (used for DImode unsigned comparisons).
+   CC_NCVmode should be used if only the N, C, and V flags are correct
+   (used for DImode signed comparisons).
+   CCmode should be used otherwise.  */
+
+CC_MODE (CC_NOOV);
+CC_MODE (CC_Z);
+CC_MODE (CC_CZ);
+CC_MODE (CC_NCV);
+CC_MODE (CC_SWP);
+CC_MODE (CCFP);
+CC_MODE (CCFPE);
+CC_MODE (CC_DNE);
+CC_MODE (CC_DEQ);
+CC_MODE (CC_DLE);
+CC_MODE (CC_DLT);
+CC_MODE (CC_DGE);
+CC_MODE (CC_DGT);
+CC_MODE (CC_DLEU);
+CC_MODE (CC_DLTU);
+CC_MODE (CC_DGEU);
+CC_MODE (CC_DGTU);
+CC_MODE (CC_C);
+CC_MODE (CC_N);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+
+/* Opaque integer modes for 3, 4, 6 or 8 Neon double registers (2 is
+   TImode).  */
+INT_MODE (EI, 24);
+INT_MODE (OI, 32);
+INT_MODE (CI, 48);
+INT_MODE (XI, 64);
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
new file mode 100644
index 000000000..f037a456a
--- /dev/null
+++ b/gcc/config/arm/arm-protos.h
@@ -0,0 +1,231 @@
+/* Prototypes for exported functions defined in arm.c and pe.c
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+   Contributed by Richard Earnshaw (rearnsha@arm.com)
+   Minor hacks by Nick Clifton (nickc@cygnus.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_PROTOS_H
+#define GCC_ARM_PROTOS_H
+
+extern int use_return_insn (int, rtx);
+extern enum reg_class arm_regno_class (int);
+extern void arm_load_pic_register (unsigned long);
+extern int arm_volatile_func (void);
+extern const char *arm_output_epilogue (rtx);
+extern void arm_expand_prologue (void);
+extern const char *arm_strip_name_encoding (const char *);
+extern void arm_asm_output_labelref (FILE *, const char *);
+extern void thumb2_asm_output_opcode (FILE *);
+extern unsigned long arm_current_func_type (void);
+extern HOST_WIDE_INT arm_compute_initial_elimination_offset (unsigned int,
+							     unsigned int);
+extern HOST_WIDE_INT thumb_compute_initial_elimination_offset (unsigned int,
+							       unsigned int);
+extern unsigned int arm_dbx_register_number (unsigned int);
+extern void arm_output_fn_unwind (FILE *, bool);
+  
+
+#ifdef RTX_CODE
+extern bool arm_vector_mode_supported_p (enum machine_mode);
+extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
+extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern int const_ok_for_arm (HOST_WIDE_INT);
+extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
+			       HOST_WIDE_INT, rtx, rtx, int);
+extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
+extern int legitimate_pic_operand_p (rtx);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern rtx legitimize_tls_address (rtx, rtx);
+extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int);
+extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT);
+extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int,
+					   int);
+extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
+					    int);
+extern int arm_const_double_rtx (rtx);
+extern int neg_const_double_rtx_ok_for_fpa (rtx);
+extern int vfp3_const_double_rtx (rtx);
+extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
+extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
+					   int *);
+extern char *neon_output_logic_immediate (const char *, rtx *,
+					  enum machine_mode, int, int);
+extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
+				  rtx (*) (rtx, rtx, rtx));
+extern rtx neon_make_constant (rtx);
+extern void neon_expand_vector_init (rtx, rtx);
+extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+extern HOST_WIDE_INT neon_element_bits (enum machine_mode);
+extern void neon_reinterpret (rtx, rtx);
+extern void neon_emit_pair_result_insn (enum machine_mode,
+					rtx (*) (rtx, rtx, rtx, rtx),
+					rtx, rtx, rtx);
+extern void neon_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int);
+extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
+						     bool);
+extern bool arm_tls_referenced_p (rtx);
+extern bool arm_cannot_force_const_mem (rtx);
+
+extern int cirrus_memory_offset (rtx);
+extern int arm_coproc_mem_operand (rtx, bool);
+extern int neon_vector_mem_operand (rtx, int);
+extern int neon_struct_mem_operand (rtx);
+extern int arm_no_early_store_addr_dep (rtx, rtx);
+extern int arm_early_store_addr_dep (rtx, rtx);
+extern int arm_early_load_addr_dep (rtx, rtx);
+extern int arm_no_early_alu_shift_dep (rtx, rtx);
+extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
+extern int arm_no_early_mul_dep (rtx, rtx);
+extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
+
+extern int tls_mentioned_p (rtx);
+extern int symbol_mentioned_p (rtx);
+extern int label_mentioned_p (rtx);
+extern RTX_CODE minmax_code (rtx);
+extern int adjacent_mem_locations (rtx, rtx);
+extern bool gen_ldm_seq (rtx *, int, bool);
+extern bool gen_stm_seq (rtx *, int);
+extern bool gen_const_stm_seq (rtx *, int);
+extern rtx arm_gen_load_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
+extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
+extern int arm_gen_movmemqi (rtx *);
+extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
+extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
+						       HOST_WIDE_INT);
+extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx);
+extern rtx arm_gen_return_addr_mask (void);
+extern void arm_reload_in_hi (rtx *);
+extern void arm_reload_out_hi (rtx *);
+extern int arm_const_double_inline_cost (rtx);
+extern bool arm_const_double_by_parts (rtx);
+extern bool arm_const_double_by_immediates (rtx);
+extern const char *fp_immediate_constant (rtx);
+extern void arm_emit_call_insn (rtx, rtx);
+extern const char *output_call (rtx *);
+extern const char *output_call_mem (rtx *);
+void arm_emit_movpair (rtx, rtx);
+extern const char *output_mov_long_double_fpa_from_arm (rtx *);
+extern const char *output_mov_long_double_arm_from_fpa (rtx *);
+extern const char *output_mov_long_double_arm_from_arm (rtx *);
+extern const char *output_mov_double_fpa_from_arm (rtx *);
+extern const char *output_mov_double_arm_from_fpa (rtx *);
+extern const char *output_move_double (rtx *);
+extern const char *output_move_quad (rtx *);
+extern const char *output_move_vfp (rtx *operands);
+extern const char *output_move_neon (rtx *operands);
+extern int arm_attr_length_move_neon (rtx);
+extern int arm_address_offset_is_imm (rtx);
+extern const char *output_add_immediate (rtx *);
+extern const char *arithmetic_instr (rtx, int);
+extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
+extern const char *output_return_instruction (rtx, int, int);
+extern void arm_poke_function_name (FILE *, const char *);
+extern void arm_final_prescan_insn (rtx);
+extern int arm_debugger_arg_offset (int, rtx);
+extern bool arm_is_long_call_p (tree);
+extern int    arm_emit_vector_const (FILE *, rtx);
+extern void arm_emit_fp16_const (rtx c);
+extern const char * arm_output_load_gr (rtx *);
+extern const char *vfp_output_fstmd (rtx *);
+extern void arm_set_return_address (rtx, rtx);
+extern int arm_eliminable_register (rtx);
+extern const char *arm_output_shift(rtx *, int);
+extern void arm_expand_sync (enum machine_mode, struct arm_sync_generator *,
+ 			     rtx, rtx, rtx, rtx);
+extern const char *arm_output_memory_barrier (rtx *);
+extern const char *arm_output_sync_insn (rtx, rtx *);
+extern unsigned int arm_sync_loop_insns (rtx , rtx *);
+
+#if defined TREE_CODE
+extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+extern bool arm_pad_arg_upward (enum machine_mode, const_tree);
+extern bool arm_pad_reg_upward (enum machine_mode, tree, int);
+#endif
+extern int arm_apply_result_size (void);
+extern rtx aapcs_libcall_value (enum machine_mode);
+
+#endif /* RTX_CODE */
+
+extern int arm_float_words_big_endian (void);
+
+/* Thumb functions.  */
+extern void arm_init_expanders (void);
+extern const char *thumb_unexpanded_epilogue (void);
+extern void thumb1_expand_prologue (void);
+extern void thumb1_expand_epilogue (void);
+#ifdef TREE_CODE
+extern int is_called_in_ARM_mode (tree);
+#endif
+extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
+#ifdef RTX_CODE
+extern void thumb1_final_prescan_insn (rtx);
+extern void thumb2_final_prescan_insn (rtx);
+extern const char *thumb_load_double_from_address (rtx *);
+extern const char *thumb_output_move_mem_multiple (int, rtx *);
+extern const char *thumb_call_via_reg (rtx);
+extern void thumb_expand_movmemqi (rtx *);
+extern rtx arm_return_addr (int, rtx);
+extern void thumb_reload_out_hi (rtx *);
+extern void thumb_reload_in_hi (rtx *);
+extern void thumb_set_return_address (rtx, rtx);
+extern const char *thumb1_output_casesi (rtx *);
+extern const char *thumb2_output_casesi (rtx *);
+#endif
+
+/* Defined in pe.c.  */
+extern int arm_dllexport_name_p (const char *);
+extern int arm_dllimport_name_p (const char *);
+
+#ifdef TREE_CODE
+extern void arm_pe_unique_section (tree, int);
+extern void arm_pe_encode_section_info (tree, rtx, int);
+extern int arm_dllexport_p (tree);
+extern int arm_dllimport_p (tree);
+extern void arm_mark_dllexport (tree);
+extern void arm_mark_dllimport (tree);
+#endif
+
+extern void arm_pr_long_calls (struct cpp_reader *);
+extern void arm_pr_no_long_calls (struct cpp_reader *);
+extern void arm_pr_long_calls_off (struct cpp_reader *);
+
+extern void arm_lang_object_attributes_init(void);
+
+extern const char *arm_mangle_type (const_tree);
+
+extern void arm_order_regs_for_local_alloc (void);
+
+#ifdef RTX_CODE
+/* This needs to be here because we need RTX_CODE and similar.  */
+
+struct tune_params
+{
+  bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
+  bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
+  int constant_limit;
+  int num_prefetch_slots;
+  int l1_cache_size;
+  int l1_cache_line_size;
+};
+
+extern const struct tune_params *current_tune;
+#endif /* RTX_CODE */
+
+#endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
new file mode 100644
index 000000000..9b664e7e6
--- /dev/null
+++ b/gcc/config/arm/arm-tune.md
@@ -0,0 +1,5 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically by gentune.sh from arm-cores.def
+(define_attr "tune"
+	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
+	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
new file mode 100644
index 000000000..c3c5aa168
--- /dev/null
+++ b/gcc/config/arm/arm.c
@@ -0,0 +1,23712 @@
+/* Output routines for GCC for ARM.
+   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+   and Martin Simmons (@harleqn.co.uk).
+   More major hacks by Richard Earnshaw (rearnsha@arm.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "obstack.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "reload.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "cgraph.h"
+#include "ggc.h"
+#include "except.h"
+#include "c-family/c-pragma.h"	/* ??? */
+#include "integrate.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "df.h"
+#include "intl.h"
+#include "libfuncs.h"
+#include "params.h"
+
+/* Forward definitions of types.  */
+typedef struct minipool_node    Mnode;
+typedef struct minipool_fixup   Mfix;
+
+void (*arm_lang_output_object_attributes_hook)(void);
+
+/* Forward function declarations.  */
+static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
+static int arm_compute_static_chain_stack_bytes (void);
+static arm_stack_offsets *arm_get_frame_offsets (void);
+static void arm_add_gc_roots (void);
+static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
+			     HOST_WIDE_INT, rtx, rtx, int, int);
+static unsigned bit_count (unsigned long);
+static int arm_address_register_rtx_p (rtx, int);
+static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
+static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
+static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
+static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
+inline static int thumb1_index_register_rtx_p (rtx, int);
+static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
+static int thumb_far_jump_used_p (void);
+static bool thumb_force_lr_save (void);
+static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+static rtx emit_sfm (int, int);
+static unsigned arm_size_return_regs (void);
+static bool arm_assemble_integer (rtx, unsigned int, int);
+static void arm_print_operand (FILE *, rtx, int);
+static void arm_print_operand_address (FILE *, rtx);
+static bool arm_print_operand_punct_valid_p (unsigned char code);
+static const char *fp_const_from_val (REAL_VALUE_TYPE *);
+static arm_cc get_arm_condition_code (rtx);
+static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
+static rtx is_jump_table (rtx);
+static const char *output_multi_immediate (rtx *, const char *, const char *,
+					   int, HOST_WIDE_INT);
+static const char *shift_op (rtx, HOST_WIDE_INT *);
+static struct machine_function *arm_init_machine_status (void);
+static void thumb_exit (FILE *, int);
+static rtx is_jump_table (rtx);
+static HOST_WIDE_INT get_jump_table_size (rtx);
+static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
+static Mnode *add_minipool_forward_ref (Mfix *);
+static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
+static Mnode *add_minipool_backward_ref (Mfix *);
+static void assign_minipool_offsets (Mfix *);
+static void arm_print_value (FILE *, rtx);
+static void dump_minipool (rtx);
+static int arm_barrier_cost (rtx);
+static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
+static void push_minipool_barrier (rtx, HOST_WIDE_INT);
+static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
+			       rtx);
+static void arm_reorg (void);
+static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
+static unsigned long arm_compute_save_reg0_reg12_mask (void);
+static unsigned long arm_compute_save_reg_mask (void);
+static unsigned long arm_isr_value (tree);
+static unsigned long arm_compute_func_type (void);
+static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
+static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
+#endif
+static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
+static int arm_comp_type_attributes (const_tree, const_tree);
+static void arm_set_default_type_attributes (tree);
+static int arm_adjust_cost (rtx, rtx, rtx, int);
+static int count_insns_for_constant (HOST_WIDE_INT, int);
+static int arm_get_strip_length (int);
+static bool arm_function_ok_for_sibcall (tree, tree);
+static enum machine_mode arm_promote_function_mode (const_tree,
+						    enum machine_mode, int *,
+						    const_tree, int);
+static bool arm_return_in_memory (const_tree, const_tree);
+static rtx arm_function_value (const_tree, const_tree, bool);
+static rtx arm_libcall_value (enum machine_mode, const_rtx);
+
+static void arm_internal_label (FILE *, const char *, unsigned long);
+static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
+				 tree);
+static bool arm_have_conditional_execution (void);
+static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
+static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
+static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_rtx_costs (rtx, int, int, int *, bool);
+static int arm_address_cost (rtx, bool);
+static bool arm_memory_load_p (rtx);
+static bool arm_cirrus_insn_p (rtx);
+static void cirrus_reorg (rtx);
+static void arm_init_builtins (void);
+static void arm_init_iwmmxt_builtins (void);
+static rtx safe_vector_operand (rtx, enum machine_mode);
+static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
+static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void emit_constant_insn (rtx cond, rtx pattern);
+static rtx emit_set_insn (rtx, rtx);
+static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				  tree, bool);
+static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
+				      const_tree);
+static int aapcs_select_return_coproc (const_tree, const_tree);
+
+#ifdef OBJECT_FORMAT_ELF
+static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
+static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
+#endif
+#ifndef ARM_PE
+static void arm_encode_section_info (tree, rtx, int);
+#endif
+
+static void arm_file_end (void);
+static void arm_file_start (void);
+
+static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					tree, int *, int);
+static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
+				   enum machine_mode, const_tree, bool);
+static bool arm_promote_prototypes (const_tree);
+static bool arm_default_short_enums (void);
+static bool arm_align_anon_bitfield (void);
+static bool arm_return_in_msb (const_tree);
+static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
+static bool arm_return_in_memory (const_tree, const_tree);
+#if ARM_UNWIND_INFO
+static void arm_unwind_emit (FILE *, rtx);
+static bool arm_output_ttype (rtx);
+static void arm_asm_emit_except_personality (rtx);
+static void arm_asm_init_sections (void);
+#endif
+static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
+static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
+static rtx arm_dwarf_register_span (rtx);
+
+static tree arm_cxx_guard_type (void);
+static bool arm_cxx_guard_mask_bit (void);
+static tree arm_get_cookie_size (tree);
+static bool arm_cookie_has_size (void);
+static bool arm_cxx_cdtor_returns_this (void);
+static bool arm_cxx_key_method_may_be_inline (void);
+static void arm_cxx_determine_class_data_visibility (tree);
+static bool arm_cxx_class_data_always_comdat (void);
+static bool arm_cxx_use_aeabi_atexit (void);
+static void arm_init_libfuncs (void);
+static tree arm_build_builtin_va_list (void);
+static void arm_expand_builtin_va_start (tree, rtx);
+static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static void arm_option_override (void);
+static bool arm_handle_option (size_t, const char *, int);
+static void arm_target_help (void);
+static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
+static bool arm_cannot_copy_insn_p (rtx);
+static bool arm_tls_symbol_p (rtx x);
+static int arm_issue_rate (void);
+static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static bool arm_output_addr_const_extra (FILE *, rtx);
+static bool arm_allocate_stack_slots_for_args (void);
+static const char *arm_invalid_parameter_type (const_tree t);
+static const char *arm_invalid_return_type (const_tree t);
+static tree arm_promoted_type (const_tree t);
+static tree arm_convert_to_type (tree type, tree expr);
+static bool arm_scalar_mode_supported_p (enum machine_mode);
+static bool arm_frame_pointer_required (void);
+static bool arm_can_eliminate (const int, const int);
+static void arm_asm_trampoline_template (FILE *);
+static void arm_trampoline_init (rtx, tree, rtx);
+static rtx arm_trampoline_adjust_address (rtx);
+static rtx arm_pic_static_addr (rtx orig, rtx reg);
+static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
+static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
+static bool arm_class_likely_spilled_p (reg_class_t);
+static HOST_WIDE_INT arm_vector_alignment (const_tree type);
+static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
+static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
+						     const_tree type,
+						     int misalignment,
+						     bool is_packed);
+static void arm_conditional_register_usage (void);
+static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+
+
+/* Table of machine attributes.  */
+static const struct attribute_spec arm_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  /* Function calls made to this symbol must be done indirectly, because
+     it may lie outside of the 26 bit addressing range of a normal function
+     call.  */
+  { "long_call",    0, 0, false, true,  true,  NULL },
+  /* Whereas these functions are always known to reside within the 26 bit
+     addressing range.  */
+  { "short_call",   0, 0, false, true,  true,  NULL },
+  /* Specify the procedure call conventions for a function.  */
+  { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute },
+  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
+  { "isr",          0, 1, false, false, false, arm_handle_isr_attribute },
+  { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute },
+  { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute },
+#ifdef ARM_PE
+  /* ARM/PE has three new attributes:
+     interfacearm - ?
+     dllexport - for exporting a function/variable that will live in a dll
+     dllimport - for importing a function/variable from a dll
+
+     Microsoft allows multiple declspecs in one __declspec, separating
+     them with spaces.  We do NOT support this.  Instead, use __declspec
+     multiple times.
+  */
+  { "dllimport",    0, 0, true,  false, false, NULL },
+  { "dllexport",    0, 0, true,  false, false, NULL },
+  { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute },
+#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
+  { "dllimport",    0, 0, false, false, false, handle_dll_attribute },
+  { "dllexport",    0, 0, false, false, false, handle_dll_attribute },
+  { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute },
+#endif
+  { NULL,           0, 0, false, false, false, NULL }
+};
+
+/* Set default optimization options.  */
+static const struct default_options arm_option_optimization_table[] =
+  {
+    /* Enable section anchors by default at -O1 or higher.  */
+    { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef  TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE arm_attribute_table
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START arm_file_start
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END arm_file_end
+
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP NULL
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER arm_assemble_integer
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND arm_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
+
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
+
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION arm_handle_option
+#undef  TARGET_HELP
+#define TARGET_HELP arm_target_help
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE arm_option_override
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
+
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
+
+#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
+
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifdef ARM_PE
+#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
+#endif
+
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
+
+#undef  TARGET_ASM_INTERNAL_LABEL
+#define TARGET_ASM_INTERNAL_LABEL arm_internal_label
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arm_function_value
+
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE arm_libcall_value
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS arm_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST arm_address_cost
+
+#undef TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  arm_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arm_expand_builtin
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS arm_init_libfuncs
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG arm_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
+
+#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
+
+#undef TARGET_DEFAULT_SHORT_ENUMS
+#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
+
+#undef TARGET_ALIGN_ANON_BITFIELD
+#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
+
+#undef TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
+
+#undef TARGET_CXX_GUARD_TYPE
+#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
+
+#undef TARGET_CXX_GUARD_MASK_BIT
+#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
+
+#undef TARGET_CXX_GET_COOKIE_SIZE
+#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
+
+#undef TARGET_CXX_COOKIE_HAS_SIZE
+#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
+
+#undef TARGET_CXX_CDTOR_RETURNS_THIS
+#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
+
+#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
+#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
+
+#undef TARGET_CXX_USE_AEABI_ATEXIT
+#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
+
+#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
+#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
+  arm_cxx_determine_class_data_visibility
+
+#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
+#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB arm_return_in_msb
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY arm_return_in_memory
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
+
+#if ARM_UNWIND_INFO
+#undef TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
+
+/* EABI unwinding tables use a different format for the typeinfo tables.  */
+#undef TARGET_ASM_TTYPE
+#define TARGET_ASM_TTYPE arm_output_ttype
+
+#undef TARGET_ARM_EABI_UNWINDER
+#define TARGET_ARM_EABI_UNWINDER true
+
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
+#endif /* ARM_UNWIND_INFO */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO  arm_except_unwind_info
+
+#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
+#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
+
+#undef  TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
+
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 4095
+
+/* The minimum is set such that the total size of the block
+   for a particular anchor is -4088 + 1 + 4095 bytes, which is
+   divisible by eight, ensuring natural spacing of anchors.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -4088
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE arm_issue_rate
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE arm_mangle_type
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
+
+#undef TARGET_INVALID_PARAMETER_TYPE
+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
+
+#undef TARGET_INVALID_RETURN_TYPE
+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
+
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE arm_promoted_type
+
+#undef TARGET_CONVERT_TO_TYPE
+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE arm_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
+
+#undef TARGET_VECTOR_ALIGNMENT
+#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
+  arm_vector_alignment_reachable
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+  arm_builtin_support_vector_misalignment
+
+#undef TARGET_PREFERRED_RENAME_CLASS
+#define TARGET_PREFERRED_RENAME_CLASS \
+  arm_preferred_rename_class
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Obstack for minipool constant handling.  */
+static struct obstack minipool_obstack;
+static char *         minipool_startobj;
+
+/* The maximum number of insns skipped which
+   will be conditionalised if possible.  */
+static int max_insns_skipped = 5;
+
+extern FILE * asm_out_file;
+
+/* True if we are currently building a constant table.  */
+int making_const_table;
+
+/* The processor for which instructions should be scheduled.  */
+enum processor_type arm_tune = arm_none;
+
+/* The current tuning set.  */
+const struct tune_params *current_tune;
+
+/* Which floating point hardware to schedule for.  */
+int arm_fpu_attr;
+
+/* Which floating popint hardware to use.  */
+const struct arm_fpu_desc *arm_fpu_desc;
+
+/* Whether to use floating point hardware.  */
+enum float_abi_type arm_float_abi;
+
+/* Which __fp16 format to use.  */
+enum arm_fp16_format_type arm_fp16_format;
+
+/* Which ABI to use.  */
+enum arm_abi_type arm_abi;
+
+/* Which thread pointer model to use.  */
+enum arm_tp_type target_thread_pointer = TP_AUTO;
+
+/* Used to parse -mstructure_size_boundary command line option.  */
+int    arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
+
+/* Used for Thumb call_via trampolines.  */
+rtx thumb_call_via_label[14];
+static int thumb_call_reg_needed;
+
+/* Bit values used to identify processor capabilities.  */
+#define FL_CO_PROC    (1 << 0)        /* Has external co-processor bus */
+#define FL_ARCH3M     (1 << 1)        /* Extended multiply */
+#define FL_MODE26     (1 << 2)        /* 26-bit mode support */
+#define FL_MODE32     (1 << 3)        /* 32-bit mode support */
+#define FL_ARCH4      (1 << 4)        /* Architecture rel 4 */
+#define FL_ARCH5      (1 << 5)        /* Architecture rel 5 */
+#define FL_THUMB      (1 << 6)        /* Thumb aware */
+#define FL_LDSCHED    (1 << 7)	      /* Load scheduling necessary */
+#define FL_STRONG     (1 << 8)	      /* StrongARM */
+#define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
+#define FL_XSCALE     (1 << 10)	      /* XScale */
+#define FL_CIRRUS     (1 << 11)	      /* Cirrus/DSP.  */
+#define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
+					 media instructions.  */
+#define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
+#define FL_WBUF	      (1 << 14)	      /* Schedule for write buffer ops.
+					 Note: ARM6 & 7 derivatives only.  */
+#define FL_ARCH6K     (1 << 15)       /* Architecture rel 6 K extensions.  */
+#define FL_THUMB2     (1 << 16)	      /* Thumb-2.  */
+#define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
+					 profile.  */
+#define FL_DIV	      (1 << 18)	      /* Hardware divide.  */
+#define FL_VFPV3      (1 << 19)       /* Vector Floating Point V3.  */
+#define FL_NEON       (1 << 20)       /* Neon instructions.  */
+#define FL_ARCH7EM    (1 << 21)	      /* Instructions present in the ARMv7E-M
+					 architecture.  */
+#define FL_ARCH7      (1 << 22)       /* Architecture 7.  */
+
+#define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
+
+/* Flags that only effect tuning, not available instructions.  */
+#define FL_TUNE		(FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
+			 | FL_CO_PROC)
+
+#define FL_FOR_ARCH2	FL_NOTM
+#define FL_FOR_ARCH3	(FL_FOR_ARCH2 | FL_MODE32)
+#define FL_FOR_ARCH3M	(FL_FOR_ARCH3 | FL_ARCH3M)
+#define FL_FOR_ARCH4	(FL_FOR_ARCH3M | FL_ARCH4)
+#define FL_FOR_ARCH4T	(FL_FOR_ARCH4 | FL_THUMB)
+#define FL_FOR_ARCH5	(FL_FOR_ARCH4 | FL_ARCH5)
+#define FL_FOR_ARCH5T	(FL_FOR_ARCH5 | FL_THUMB)
+#define FL_FOR_ARCH5E	(FL_FOR_ARCH5 | FL_ARCH5E)
+#define FL_FOR_ARCH5TE	(FL_FOR_ARCH5E | FL_THUMB)
+#define FL_FOR_ARCH5TEJ	FL_FOR_ARCH5TE
+#define FL_FOR_ARCH6	(FL_FOR_ARCH5TE | FL_ARCH6)
+#define FL_FOR_ARCH6J	FL_FOR_ARCH6
+#define FL_FOR_ARCH6K	(FL_FOR_ARCH6 | FL_ARCH6K)
+#define FL_FOR_ARCH6Z	FL_FOR_ARCH6
+#define FL_FOR_ARCH6ZK	FL_FOR_ARCH6K
+#define FL_FOR_ARCH6T2	(FL_FOR_ARCH6 | FL_THUMB2)
+#define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
+#define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
+#define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
+#define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_DIV)
+#define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_DIV)
+#define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
+
+/* The bits in this mask specify which
+   instructions we are allowed to generate.  */
+static unsigned long insn_flags = 0;
+
+/* The bits in this mask specify which instruction scheduling options should
+   be used.  */
+static unsigned long tune_flags = 0;
+
+/* The following are used in the arm.md file as equivalents to bits
+   in the above two flag variables.  */
+
+/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
+int arm_arch3m = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
+int arm_arch4 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
+int arm_arch4t = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
+int arm_arch5 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
+int arm_arch5e = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
+int arm_arch6 = 0;
+
+/* Nonzero if this chip supports the ARM 6K extensions.  */
+int arm_arch6k = 0;
+
+/* Nonzero if this chip supports the ARM 7 extensions.  */
+int arm_arch7 = 0;
+
+/* Nonzero if instructions not present in the 'M' profile can be used.  */
+int arm_arch_notm = 0;
+
+/* Nonzero if instructions present in ARMv7E-M can be used.  */
+int arm_arch7em = 0;
+
+/* Nonzero if this chip can benefit from load scheduling.  */
+int arm_ld_sched = 0;
+
+/* Nonzero if this chip is a StrongARM.  */
+int arm_tune_strongarm = 0;
+
+/* Nonzero if this chip is a Cirrus variant.  */
+int arm_arch_cirrus = 0;
+
+/* Nonzero if this chip supports Intel Wireless MMX technology.  */
+int arm_arch_iwmmxt = 0;
+
+/* Nonzero if this chip is an XScale.  */
+int arm_arch_xscale = 0;
+
+/* Nonzero if tuning for XScale  */
+int arm_tune_xscale = 0;
+
+/* Nonzero if we want to tune for stores that access the write-buffer.
+   This typically means an ARM6 or ARM7 with MMU or MPU.  */
+int arm_tune_wbuf = 0;
+
+/* Nonzero if tuning for Cortex-A9.  */
+int arm_tune_cortex_a9 = 0;
+
+/* Nonzero if generating Thumb instructions.  */
+int thumb_code = 0;
+
+/* Nonzero if generating Thumb-1 instructions.  */
+int thumb1_code = 0;
+
+/* Nonzero if we should define __THUMB_INTERWORK__ in the
+   preprocessor.
+   XXX This is a bit of a hack, it's intended to help work around
+   problems in GLD which doesn't understand that armv5t code is
+   interworking clean.  */
+int arm_cpp_interwork = 0;
+
+/* Nonzero if chip supports Thumb 2.  */
+int arm_arch_thumb2;
+
+/* Nonzero if chip supports integer division instruction.  */
+int arm_arch_hwdiv;
+
+/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
+   we must report the mode of the memory reference from
+   TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
+enum machine_mode output_memory_reference_mode;
+
+/* The register number to be used for the PIC offset register.  */
+unsigned arm_pic_register = INVALID_REGNUM;
+
+/* Set to 1 after arm_reorg has started.  Reset to start at the start of
+   the next function.  */
+static int after_arm_reorg = 0;
+
+enum arm_pcs arm_pcs_default;
+
+/* For an explanation of these variables, see final_prescan_insn below.  */
+int arm_ccfsm_state;
+/* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
+enum arm_cond_code arm_current_cc;
+
+rtx arm_target_insn;
+int arm_target_label;
+/* The number of conditionally executed insns, including the current insn.  */
+int arm_condexec_count = 0;
+/* A bitmask specifying the patterns for the IT block.
+   Zero means do not output an IT block before this insn. */
+int arm_condexec_mask = 0;
+/* The number of bits used in arm_condexec_mask.  */
+int arm_condexec_masklen = 0;
+
+/* The condition codes of the ARM, and the inverse function.  */
+static const char * const arm_condition_codes[] =
+{
+  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
+};
+
+/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
+int arm_regs_in_sequence[] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
+#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
+#define streq(string1, string2) (strcmp (string1, string2) == 0)
+
+#define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
+				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
+				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
+
+/* Initialization code.  */
+
+struct processors
+{
+  const char *const name;
+  enum processor_type core;
+  const char *arch;
+  const unsigned long flags;
+  const struct tune_params *const tune;
+};
+
+
+#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
+#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
+  prefetch_slots, \
+  l1_size, \
+  l1_line_size
+
+const struct tune_params arm_slowmul_tune =
+{
+  arm_slowmul_rtx_costs,
+  NULL,
+  3,
+  ARM_PREFETCH_NOT_BENEFICIAL
+};
+
+const struct tune_params arm_fastmul_tune =
+{
+  arm_fastmul_rtx_costs,
+  NULL,
+  1,
+  ARM_PREFETCH_NOT_BENEFICIAL
+};
+
+const struct tune_params arm_xscale_tune =
+{
+  arm_xscale_rtx_costs,
+  xscale_sched_adjust_cost,
+  2,
+  ARM_PREFETCH_NOT_BENEFICIAL
+};
+
+const struct tune_params arm_9e_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  1,
+  ARM_PREFETCH_NOT_BENEFICIAL
+};
+
+const struct tune_params arm_cortex_a9_tune =
+{
+  arm_9e_rtx_costs,
+  cortex_a9_sched_adjust_cost,
+  1,
+  ARM_PREFETCH_BENEFICIAL(4,32,32)
+};
+
+const struct tune_params arm_fa726te_tune =
+{
+  arm_9e_rtx_costs,
+  fa726te_sched_adjust_cost,
+  1,
+  ARM_PREFETCH_NOT_BENEFICIAL
+};
+
+
+/* Not all of these give usefully different compilation alternatives,
+   but there is no simple way of generalizing them.  */
+static const struct processors all_cores[] =
+{
+  /* ARM Cores */
+#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
+  {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
+#include "arm-cores.def"
+#undef ARM_CORE
+  {NULL, arm_none, NULL, 0, NULL}
+};
+
+static const struct processors all_architectures[] =
+{
+  /* ARM Architectures */
+  /* We don't specify tuning costs here as it will be figured out
+     from the core.  */
+
+  {"armv2",   arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
+  {"armv2a",  arm2,       "2",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
+  {"armv3",   arm6,       "3",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
+  {"armv3m",  arm7m,      "3M",  FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
+  {"armv4",   arm7tdmi,   "4",   FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
+  /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
+     implementations that support it, so we will leave it out for now.  */
+  {"armv4t",  arm7tdmi,   "4T",  FL_CO_PROC |             FL_FOR_ARCH4T, NULL},
+  {"armv5",   arm10tdmi,  "5",   FL_CO_PROC |             FL_FOR_ARCH5, NULL},
+  {"armv5t",  arm10tdmi,  "5T",  FL_CO_PROC |             FL_FOR_ARCH5T, NULL},
+  {"armv5e",  arm1026ejs, "5E",  FL_CO_PROC |             FL_FOR_ARCH5E, NULL},
+  {"armv5te", arm1026ejs, "5TE", FL_CO_PROC |             FL_FOR_ARCH5TE, NULL},
+  {"armv6",   arm1136js,  "6",   FL_CO_PROC |             FL_FOR_ARCH6, NULL},
+  {"armv6j",  arm1136js,  "6J",  FL_CO_PROC |             FL_FOR_ARCH6J, NULL},
+  {"armv6k",  mpcore,	  "6K",  FL_CO_PROC |             FL_FOR_ARCH6K, NULL},
+  {"armv6z",  arm1176jzs, "6Z",  FL_CO_PROC |             FL_FOR_ARCH6Z, NULL},
+  {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC |             FL_FOR_ARCH6ZK, NULL},
+  {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC |             FL_FOR_ARCH6T2, NULL},
+  {"armv6-m", cortexm1,	  "6M",				  FL_FOR_ARCH6M, NULL},
+  {"armv7",   cortexa8,	  "7",	 FL_CO_PROC |		  FL_FOR_ARCH7, NULL},
+  {"armv7-a", cortexa8,	  "7A",	 FL_CO_PROC |		  FL_FOR_ARCH7A, NULL},
+  {"armv7-r", cortexr4,	  "7R",	 FL_CO_PROC |		  FL_FOR_ARCH7R, NULL},
+  {"armv7-m", cortexm3,	  "7M",	 FL_CO_PROC |		  FL_FOR_ARCH7M, NULL},
+  {"armv7e-m", cortexm4,  "7EM", FL_CO_PROC |		  FL_FOR_ARCH7EM, NULL},
+  {"ep9312",  ep9312,     "4T",  FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
+  {"iwmmxt",  iwmmxt,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
+  {"iwmmxt2", iwmmxt2,     "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
+  {NULL, arm_none, NULL, 0 , NULL}
+};
+
+
+/* These are populated as commandline arguments are processed, or NULL
+   if not specified.  */
+static const struct processors *arm_selected_arch;
+static const struct processors *arm_selected_cpu;
+static const struct processors *arm_selected_tune;
+
+/* The name of the preprocessor macro to define for this architecture.  */
+
+char arm_arch_name[] = "__ARM_ARCH_0UNK__";
+
+/* Available values for -mfpu=.  */
+
+static const struct arm_fpu_desc all_fpus[] =
+{
+  {"fpa",		ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
+  {"fpe2",		ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
+  {"fpe3",		ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
+  {"maverick",		ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
+  {"vfp",		ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
+  {"vfpv3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+  {"vfpv3-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
+  {"vfpv3-d16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
+  {"vfpv3-d16-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
+  {"vfpv3xd",		ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
+  {"vfpv3xd-fp16",	ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
+  {"neon",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
+  {"neon-fp16",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
+  {"vfpv4",		ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
+  {"vfpv4-d16",		ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
+  {"fpv4-sp-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
+  {"neon-vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
+  /* Compatibility aliases.  */
+  {"vfp3",		ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
+};
+
+
+struct float_abi
+{
+  const char * name;
+  enum float_abi_type abi_type;
+};
+
+
+/* Available values for -mfloat-abi=.  */
+
+static const struct float_abi all_float_abis[] =
+{
+  {"soft",	ARM_FLOAT_ABI_SOFT},
+  {"softfp",	ARM_FLOAT_ABI_SOFTFP},
+  {"hard",	ARM_FLOAT_ABI_HARD}
+};
+
+
+struct fp16_format
+{
+  const char *name;
+  enum arm_fp16_format_type fp16_format_type;
+};
+
+
+/* Available values for -mfp16-format=.  */
+
+static const struct fp16_format all_fp16_formats[] =
+{
+  {"none",		ARM_FP16_FORMAT_NONE},
+  {"ieee",		ARM_FP16_FORMAT_IEEE},
+  {"alternative",	ARM_FP16_FORMAT_ALTERNATIVE}
+};
+
+
+struct abi_name
+{
+  const char *name;
+  enum arm_abi_type abi_type;
+};
+
+
+/* Available values for -mabi=.  */
+
+static const struct abi_name arm_all_abis[] =
+{
+  {"apcs-gnu",    ARM_ABI_APCS},
+  {"atpcs",   ARM_ABI_ATPCS},
+  {"aapcs",   ARM_ABI_AAPCS},
+  {"iwmmxt",  ARM_ABI_IWMMXT},
+  {"aapcs-linux",   ARM_ABI_AAPCS_LINUX}
+};
+
+/* Supported TLS relocations.  */
+
+enum tls_reloc {
+  TLS_GD32,
+  TLS_LDM32,
+  TLS_LDO32,
+  TLS_IE32,
+  TLS_LE32
+};
+
+/* The maximum number of insns to be used when loading a constant.  */
+inline static int
+arm_constant_limit (bool size_p)
+{
+  return size_p ? 1 : current_tune->constant_limit;
+}
+
+/* Emit an insn that's a simple single-set.  Both the operands must be known
+   to be valid.  */
+inline static rtx
+emit_set_insn (rtx x, rtx y)
+{
+  return emit_insn (gen_rtx_SET (VOIDmode, x, y));
+}
+
+/* Return the number of bits set in VALUE.  */
+static unsigned
+bit_count (unsigned long value)
+{
+  unsigned long count = 0;
+
+  while (value)
+    {
+      count++;
+      value &= value - 1;  /* Clear the least-significant set bit.  */
+    }
+
+  return count;
+}
+
+/* Set up library functions unique to ARM.  */
+
+static void
+arm_init_libfuncs (void)
+{
+  /* There are no special library functions unless we are using the
+     ARM BPABI.  */
+  if (!TARGET_BPABI)
+    return;
+
+  /* The functions below are described in Section 4 of the "Run-Time
+     ABI for the ARM architecture", Version 1.0.  */
+
+  /* Double-precision floating-point arithmetic.  Table 2.  */
+  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
+  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
+  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
+  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
+  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
+
+  /* Double-precision comparisons.  Table 3.  */
+  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
+  set_optab_libfunc (ne_optab, DFmode, NULL);
+  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
+  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
+  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
+  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
+  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
+
+  /* Single-precision floating-point arithmetic.  Table 4.  */
+  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
+  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
+  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
+  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
+  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
+
+  /* Single-precision comparisons.  Table 5.  */
+  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
+  set_optab_libfunc (ne_optab, SFmode, NULL);
+  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
+  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
+  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
+  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
+  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
+
+  /* Floating-point to integer conversions.  Table 6.  */
+  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
+  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
+  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
+  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
+  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
+  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
+  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
+  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
+
+  /* Conversions between floating types.  Table 7.  */
+  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
+  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
+
+  /* Integer to floating-point conversions.  Table 8.  */
+  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
+  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
+  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
+  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
+  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
+  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
+  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
+  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
+
+  /* Long long.  Table 9.  */
+  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
+  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
+  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
+  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
+  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
+  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
+  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
+  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
+
+  /* Integer (32/32->32) division.  \S 4.3.1.  */
+  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
+  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
+
+  /* The divmod functions are designed so that they can be used for
+     plain division, even though they return both the quotient and the
+     remainder.  The quotient is returned in the usual location (i.e.,
+     r0 for SImode, {r0, r1} for DImode), just as would be expected
+     for an ordinary division routine.  Because the AAPCS calling
+     conventions specify that all of { r0, r1, r2, r3 } are
+     callee-saved registers, there is no need to tell the compiler
+     explicitly that those registers are clobbered by these
+     routines.  */
+  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
+  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
+
+  /* For SImode division the ABI provides div-without-mod routines,
+     which are faster.  */
+  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
+  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
+
+  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
+     divmod libcalls instead.  */
+  set_optab_libfunc (smod_optab, DImode, NULL);
+  set_optab_libfunc (umod_optab, DImode, NULL);
+  set_optab_libfunc (smod_optab, SImode, NULL);
+  set_optab_libfunc (umod_optab, SImode, NULL);
+
+  /* Half-precision float operations.  The compiler handles all operations
+     with NULL libfuncs by converting the SFmode.  */
+  switch (arm_fp16_format)
+    {
+    case ARM_FP16_FORMAT_IEEE:
+    case ARM_FP16_FORMAT_ALTERNATIVE:
+
+      /* Conversions.  */
+      set_conv_libfunc (trunc_optab, HFmode, SFmode,
+			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
+			 ? "__gnu_f2h_ieee"
+			 : "__gnu_f2h_alternative"));
+      set_conv_libfunc (sext_optab, SFmode, HFmode, 
+			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
+			 ? "__gnu_h2f_ieee"
+			 : "__gnu_h2f_alternative"));
+      
+      /* Arithmetic.  */
+      set_optab_libfunc (add_optab, HFmode, NULL);
+      set_optab_libfunc (sdiv_optab, HFmode, NULL);
+      set_optab_libfunc (smul_optab, HFmode, NULL);
+      set_optab_libfunc (neg_optab, HFmode, NULL);
+      set_optab_libfunc (sub_optab, HFmode, NULL);
+
+      /* Comparisons.  */
+      set_optab_libfunc (eq_optab, HFmode, NULL);
+      set_optab_libfunc (ne_optab, HFmode, NULL);
+      set_optab_libfunc (lt_optab, HFmode, NULL);
+      set_optab_libfunc (le_optab, HFmode, NULL);
+      set_optab_libfunc (ge_optab, HFmode, NULL);
+      set_optab_libfunc (gt_optab, HFmode, NULL);
+      set_optab_libfunc (unord_optab, HFmode, NULL);
+      break;
+
+    default:
+      break;
+    }
+
+  if (TARGET_AAPCS_BASED)
+    synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+}
+
+/* On AAPCS systems, this is the "struct __va_list".  */
+static GTY(()) tree va_list_type;
+
+/* Return the type to use as __builtin_va_list.  */
+static tree
+arm_build_builtin_va_list (void)
+{
+  tree va_list_name;
+  tree ap_field;
+  
+  if (!TARGET_AAPCS_BASED)
+    return std_build_builtin_va_list ();
+
+  /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
+     defined as:
+
+       struct __va_list 
+       {
+	 void *__ap;
+       };
+
+     The C Library ABI further reinforces this definition in \S
+     4.1.
+
+     We must follow this definition exactly.  The structure tag
+     name is visible in C++ mangled names, and thus forms a part
+     of the ABI.  The field name may be used by people who
+     #include <stdarg.h>.  */
+  /* Create the type.  */
+  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
+  /* Give it the required name.  */
+  va_list_name = build_decl (BUILTINS_LOCATION,
+			     TYPE_DECL,
+			     get_identifier ("__va_list"),
+			     va_list_type);
+  DECL_ARTIFICIAL (va_list_name) = 1;
+  TYPE_NAME (va_list_type) = va_list_name;
+  TYPE_STUB_DECL (va_list_type) = va_list_name;
+  /* Create the __ap field.  */
+  ap_field = build_decl (BUILTINS_LOCATION,
+			 FIELD_DECL, 
+			 get_identifier ("__ap"),
+			 ptr_type_node);
+  DECL_ARTIFICIAL (ap_field) = 1;
+  DECL_FIELD_CONTEXT (ap_field) = va_list_type;
+  TYPE_FIELDS (va_list_type) = ap_field;
+  /* Compute its layout.  */
+  layout_type (va_list_type);
+
+  return va_list_type;
+}
+
+/* Return an expression of type "void *" pointing to the next
+   available argument in a variable-argument list.  VALIST is the
+   user-level va_list object, of type __builtin_va_list.  */
+static tree
+arm_extract_valist_ptr (tree valist)
+{
+  if (TREE_TYPE (valist) == error_mark_node)
+    return error_mark_node;
+
+  /* On an AAPCS target, the pointer is stored within "struct
+     va_list".  */
+  if (TARGET_AAPCS_BASED)
+    {
+      tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
+      valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field), 
+		       valist, ap_field, NULL_TREE);
+    }
+
+  return valist;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+static void
+arm_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+  valist = arm_extract_valist_ptr (valist);
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+static tree
+arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 
+			  gimple_seq *post_p)
+{
+  valist = arm_extract_valist_ptr (valist);
+  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+}
+
+/* Lookup NAME in SEL.  */
+
+static const struct processors *
+arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
+{
+  if (!(name && *name))
+    return NULL;
+
+  for (; sel->name != NULL; sel++)
+    {
+      if (streq (name, sel->name))
+	return sel;
+    }
+
+  error ("bad value (%s) for %s switch", name, desc);
+  return NULL;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_march_:
+      arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
+      return true;
+
+    case OPT_mcpu_:
+      arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
+      return true;
+
+    case OPT_mhard_float:
+      target_float_abi_name = "hard";
+      return true;
+
+    case OPT_msoft_float:
+      target_float_abi_name = "soft";
+      return true;
+
+    case OPT_mtune_:
+      arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+static void
+arm_target_help (void)
+{
+  int i;
+  static int columns = 0;
+  int remaining;
+
+  /* If we have not done so already, obtain the desired maximum width of
+     the output.  Note - this is a duplication of the code at the start of
+     gcc/opts.c:print_specific_help() - the two copies should probably be
+     replaced by a single function.  */
+  if (columns == 0)
+    {
+      const char *p;
+
+      p = getenv ("COLUMNS");
+      if (p != NULL)
+	{
+	  int value = atoi (p);
+
+	  if (value > 0)
+	    columns = value;
+	}
+
+      if (columns == 0)
+	/* Use a reasonable default.  */
+	columns = 80;
+    }
+
+  printf ("  Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
+
+  /* The - 2 is because we know that the last entry in the array is NULL.  */
+  i = ARRAY_SIZE (all_cores) - 2;
+  gcc_assert (i > 0);
+  printf ("    %s", all_cores[i].name);
+  remaining = columns - (strlen (all_cores[i].name) + 4);
+  gcc_assert (remaining >= 0);
+
+  while (i--)
+    {
+      int len = strlen (all_cores[i].name);
+
+      if (remaining > len + 2)
+	{
+	  printf (", %s", all_cores[i].name);
+	  remaining -= len + 2;
+	}
+      else
+	{
+	  if (remaining > 0)
+	    printf (",");
+	  printf ("\n    %s", all_cores[i].name);
+	  remaining = columns - (len + 4);
+	}
+    }
+
+  printf ("\n\n  Known ARM architectures (for use with the -march= option):\n");
+
+  i = ARRAY_SIZE (all_architectures) - 2;
+  gcc_assert (i > 0);
+  
+  printf ("    %s", all_architectures[i].name);
+  remaining = columns - (strlen (all_architectures[i].name) + 4);
+  gcc_assert (remaining >= 0);
+
+  while (i--)
+    {
+      int len = strlen (all_architectures[i].name);
+
+      if (remaining > len + 2)
+	{
+	  printf (", %s", all_architectures[i].name);
+	  remaining -= len + 2;
+	}
+      else
+	{
+	  if (remaining > 0)
+	    printf (",");
+	  printf ("\n    %s", all_architectures[i].name);
+	  remaining = columns - (len + 4);
+	}
+    }
+  printf ("\n");
+
+}
+
+/* Fix up any incompatible options that the user has specified.  */
+static void
+arm_option_override (void)
+{
+  unsigned i;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  if (arm_selected_arch)
+    {
+      if (arm_selected_cpu)
+	{
+	  /* Check for conflict between mcpu and march.  */
+	  if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
+	    {
+	      warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
+		       arm_selected_cpu->name, arm_selected_arch->name);
+	      /* -march wins for code generation.
+	         -mcpu wins for default tuning.  */
+	      if (!arm_selected_tune)
+		arm_selected_tune = arm_selected_cpu;
+
+	      arm_selected_cpu = arm_selected_arch;
+	    }
+	  else
+	    /* -mcpu wins.  */
+	    arm_selected_arch = NULL;
+	}
+      else
+	/* Pick a CPU based on the architecture.  */
+	arm_selected_cpu = arm_selected_arch;
+    }
+
+  /* If the user did not specify a processor, choose one for them.  */
+  if (!arm_selected_cpu)
+    {
+      const struct processors * sel;
+      unsigned int        sought;
+
+      arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
+      if (!arm_selected_cpu->name)
+	{
+#ifdef SUBTARGET_CPU_DEFAULT
+	  /* Use the subtarget default CPU if none was specified by
+	     configure.  */
+	  arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
+#endif
+	  /* Default to ARM6.  */
+	  if (!arm_selected_cpu->name)
+	    arm_selected_cpu = &all_cores[arm6];
+	}
+
+      sel = arm_selected_cpu;
+      insn_flags = sel->flags;
+
+      /* Now check to see if the user has specified some command line
+	 switch that require certain abilities from the cpu.  */
+      sought = 0;
+
+      if (TARGET_INTERWORK || TARGET_THUMB)
+	{
+	  sought |= (FL_THUMB | FL_MODE32);
+
+	  /* There are no ARM processors that support both APCS-26 and
+	     interworking.  Therefore we force FL_MODE26 to be removed
+	     from insn_flags here (if it was set), so that the search
+	     below will always be able to find a compatible processor.  */
+	  insn_flags &= ~FL_MODE26;
+	}
+
+      if (sought != 0 && ((sought & insn_flags) != sought))
+	{
+	  /* Try to locate a CPU type that supports all of the abilities
+	     of the default CPU, plus the extra abilities requested by
+	     the user.  */
+	  for (sel = all_cores; sel->name != NULL; sel++)
+	    if ((sel->flags & sought) == (sought | insn_flags))
+	      break;
+
+	  if (sel->name == NULL)
+	    {
+	      unsigned current_bit_count = 0;
+	      const struct processors * best_fit = NULL;
+
+	      /* Ideally we would like to issue an error message here
+		 saying that it was not possible to find a CPU compatible
+		 with the default CPU, but which also supports the command
+		 line options specified by the programmer, and so they
+		 ought to use the -mcpu=<name> command line option to
+		 override the default CPU type.
+
+		 If we cannot find a cpu that has both the
+		 characteristics of the default cpu and the given
+		 command line options we scan the array again looking
+		 for a best match.  */
+	      for (sel = all_cores; sel->name != NULL; sel++)
+		if ((sel->flags & sought) == sought)
+		  {
+		    unsigned count;
+
+		    count = bit_count (sel->flags & insn_flags);
+
+		    if (count >= current_bit_count)
+		      {
+			best_fit = sel;
+			current_bit_count = count;
+		      }
+		  }
+
+	      gcc_assert (best_fit);
+	      sel = best_fit;
+	    }
+
+	  arm_selected_cpu = sel;
+	}
+    }
+
+  gcc_assert (arm_selected_cpu);
+  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
+  if (!arm_selected_tune)
+    arm_selected_tune = &all_cores[arm_selected_cpu->core];
+
+  sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
+  insn_flags = arm_selected_cpu->flags;
+
+  arm_tune = arm_selected_tune->core;
+  tune_flags = arm_selected_tune->flags;
+  current_tune = arm_selected_tune->tune;
+
+  if (target_fp16_format_name)
+    {
+      for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
+	{
+	  if (streq (all_fp16_formats[i].name, target_fp16_format_name))
+	    {
+	      arm_fp16_format = all_fp16_formats[i].fp16_format_type;
+	      break;
+	    }
+	}
+      if (i == ARRAY_SIZE (all_fp16_formats))
+	error ("invalid __fp16 format option: -mfp16-format=%s",
+	       target_fp16_format_name);
+    }
+  else
+    arm_fp16_format = ARM_FP16_FORMAT_NONE;
+
+  if (target_abi_name)
+    {
+      for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
+	{
+	  if (streq (arm_all_abis[i].name, target_abi_name))
+	    {
+	      arm_abi = arm_all_abis[i].abi_type;
+	      break;
+	    }
+	}
+      if (i == ARRAY_SIZE (arm_all_abis))
+	error ("invalid ABI option: -mabi=%s", target_abi_name);
+    }
+  else
+    arm_abi = ARM_DEFAULT_ABI;
+
+  /* Make sure that the processor choice does not conflict with any of the
+     other command line choices.  */
+  if (TARGET_ARM && !(insn_flags & FL_NOTM))
+    error ("target CPU does not support ARM mode");
+
+  /* BPABI targets use linker tricks to allow interworking on cores
+     without thumb support.  */
+  if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
+    {
+      warning (0, "target CPU does not support interworking" );
+      target_flags &= ~MASK_INTERWORK;
+    }
+
+  if (TARGET_THUMB && !(insn_flags & FL_THUMB))
+    {
+      warning (0, "target CPU does not support THUMB instructions");
+      target_flags &= ~MASK_THUMB;
+    }
+
+  if (TARGET_APCS_FRAME && TARGET_THUMB)
+    {
+      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
+      target_flags &= ~MASK_APCS_FRAME;
+    }
+
+  /* Callee super interworking implies thumb interworking.  Adding
+     this to the flags here simplifies the logic elsewhere.  */
+  if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
+    target_flags |= MASK_INTERWORK;
+
+  /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
+     from here where no function is being compiled currently.  */
+  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
+    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
+
+  if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
+    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
+
+  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
+    {
+      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
+      target_flags |= MASK_APCS_FRAME;
+    }
+
+  if (TARGET_POKE_FUNCTION_NAME)
+    target_flags |= MASK_APCS_FRAME;
+
+  if (TARGET_APCS_REENT && flag_pic)
+    error ("-fpic and -mapcs-reent are incompatible");
+
+  if (TARGET_APCS_REENT)
+    warning (0, "APCS reentrant code not supported.  Ignored");
+
+  /* If this target is normally configured to use APCS frames, warn if they
+     are turned off and debugging is turned on.  */
+  if (TARGET_ARM
+      && write_symbols != NO_DEBUG
+      && !TARGET_APCS_FRAME
+      && (TARGET_DEFAULT & MASK_APCS_FRAME))
+    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
+
+  if (TARGET_APCS_FLOAT)
+    warning (0, "passing floating point arguments in fp regs not yet supported");
+
+  /* Initialize boolean versions of the flags, for use in the arm.md file.  */
+  arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
+  arm_arch4 = (insn_flags & FL_ARCH4) != 0;
+  arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
+  arm_arch5 = (insn_flags & FL_ARCH5) != 0;
+  arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
+  arm_arch6 = (insn_flags & FL_ARCH6) != 0;
+  arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
+  arm_arch_notm = (insn_flags & FL_NOTM) != 0;
+  arm_arch7 = (insn_flags & FL_ARCH7) != 0;
+  arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
+  arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
+  arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
+  arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
+
+  arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
+  arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
+  thumb_code = TARGET_ARM == 0;
+  thumb1_code = TARGET_THUMB1 != 0;
+  arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
+  arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
+  arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
+  arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
+  arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+
+  /* If we are not using the default (ARM mode) section anchor offset
+     ranges, then set the correct ranges now.  */
+  if (TARGET_THUMB1)
+    {
+      /* Thumb-1 LDR instructions cannot have negative offsets.
+         Permissible positive offset ranges are 5-bit (for byte loads),
+         6-bit (for halfword loads), or 7-bit (for word loads).
+         Empirical results suggest a 7-bit anchor range gives the best
+         overall code size.  */
+      targetm.min_anchor_offset = 0;
+      targetm.max_anchor_offset = 127;
+    }
+  else if (TARGET_THUMB2)
+    {
+      /* The minimum is set such that the total size of the block
+         for a particular anchor is 248 + 1 + 4095 bytes, which is
+         divisible by eight, ensuring natural spacing of anchors.  */
+      targetm.min_anchor_offset = -248;
+      targetm.max_anchor_offset = 4095;
+    }
+
+  /* V5 code we generate is completely interworking capable, so we turn off
+     TARGET_INTERWORK here to avoid many tests later on.  */
+
+  /* XXX However, we must pass the right pre-processor defines to CPP
+     or GLD can get confused.  This is a hack.  */
+  if (TARGET_INTERWORK)
+    arm_cpp_interwork = 1;
+
+  if (arm_arch5)
+    target_flags &= ~MASK_INTERWORK;
+
+  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
+    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
+
+  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
+    error ("iwmmxt abi requires an iwmmxt capable cpu");
+
+  if (target_fpu_name == NULL && target_fpe_name != NULL)
+    {
+      if (streq (target_fpe_name, "2"))
+	target_fpu_name = "fpe2";
+      else if (streq (target_fpe_name, "3"))
+	target_fpu_name = "fpe3";
+      else
+	error ("invalid floating point emulation option: -mfpe=%s",
+	       target_fpe_name);
+    }
+
+  if (target_fpu_name == NULL)
+    {
+#ifdef FPUTYPE_DEFAULT
+      target_fpu_name = FPUTYPE_DEFAULT;
+#else
+      if (arm_arch_cirrus)
+	target_fpu_name = "maverick";
+      else
+	target_fpu_name = "fpe2";
+#endif
+    }
+
+  arm_fpu_desc = NULL;
+  for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
+    {
+      if (streq (all_fpus[i].name, target_fpu_name))
+	{
+	  arm_fpu_desc = &all_fpus[i];
+	  break;
+	}
+    }
+
+  if (!arm_fpu_desc)
+    {
+      error ("invalid floating point option: -mfpu=%s", target_fpu_name);
+      return;
+    }
+
+  switch (arm_fpu_desc->model)
+    {
+    case ARM_FP_MODEL_FPA:
+      if (arm_fpu_desc->rev == 2)
+	arm_fpu_attr = FPU_FPE2;
+      else if (arm_fpu_desc->rev == 3)
+	arm_fpu_attr = FPU_FPE3;
+      else
+	arm_fpu_attr = FPU_FPA;
+      break;
+
+    case ARM_FP_MODEL_MAVERICK:
+      arm_fpu_attr = FPU_MAVERICK;
+      break;
+
+    case ARM_FP_MODEL_VFP:
+      arm_fpu_attr = FPU_VFP;
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  if (target_float_abi_name != NULL)
+    {
+      /* The user specified a FP ABI.  */
+      for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
+	{
+	  if (streq (all_float_abis[i].name, target_float_abi_name))
+	    {
+	      arm_float_abi = all_float_abis[i].abi_type;
+	      break;
+	    }
+	}
+      if (i == ARRAY_SIZE (all_float_abis))
+	error ("invalid floating point abi: -mfloat-abi=%s",
+	       target_float_abi_name);
+    }
+  else
+    arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
+
+  if (TARGET_AAPCS_BASED
+      && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
+    error ("FPA is unsupported in the AAPCS");
+
+  if (TARGET_AAPCS_BASED)
+    {
+      if (TARGET_CALLER_INTERWORKING)
+	error ("AAPCS does not support -mcaller-super-interworking");
+      else
+	if (TARGET_CALLEE_INTERWORKING)
+	  error ("AAPCS does not support -mcallee-super-interworking");
+    }
+
+  /* FPA and iWMMXt are incompatible because the insn encodings overlap.
+     VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
+     will ever exist.  GCC makes no attempt to support this combination.  */
+  if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
+    sorry ("iWMMXt and hardware floating point");
+
+  /* ??? iWMMXt insn patterns need auditing for Thumb-2.  */
+  if (TARGET_THUMB2 && TARGET_IWMMXT)
+    sorry ("Thumb-2 iWMMXt");
+
+  /* __fp16 support currently assumes the core has ldrh.  */
+  if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
+    sorry ("__fp16 and no ldrh");
+
+  /* If soft-float is specified then don't use FPU.  */
+  if (TARGET_SOFT_FLOAT)
+    arm_fpu_attr = FPU_NONE;
+
+  if (TARGET_AAPCS_BASED)
+    {
+      if (arm_abi == ARM_ABI_IWMMXT)
+	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
+      else if (arm_float_abi == ARM_FLOAT_ABI_HARD
+	       && TARGET_HARD_FLOAT
+	       && TARGET_VFP)
+	arm_pcs_default = ARM_PCS_AAPCS_VFP;
+      else
+	arm_pcs_default = ARM_PCS_AAPCS;
+    }
+  else
+    {
+      if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
+	sorry ("-mfloat-abi=hard and VFP");
+
+      if (arm_abi == ARM_ABI_APCS)
+	arm_pcs_default = ARM_PCS_APCS;
+      else
+	arm_pcs_default = ARM_PCS_ATPCS;
+    }
+
+  /* For arm2/3 there is no need to do any scheduling if there is only
+     a floating point emulator, or we are doing software floating-point.  */
+  if ((TARGET_SOFT_FLOAT
+       || (TARGET_FPA && arm_fpu_desc->rev))
+      && (tune_flags & FL_MODE32) == 0)
+    flag_schedule_insns = flag_schedule_insns_after_reload = 0;
+
+  if (target_thread_switch)
+    {
+      if (strcmp (target_thread_switch, "soft") == 0)
+	target_thread_pointer = TP_SOFT;
+      else if (strcmp (target_thread_switch, "auto") == 0)
+	target_thread_pointer = TP_AUTO;
+      else if (strcmp (target_thread_switch, "cp15") == 0)
+	target_thread_pointer = TP_CP15;
+      else
+	error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
+    }
+
+  /* Use the cp15 method if it is available.  */
+  if (target_thread_pointer == TP_AUTO)
+    {
+      if (arm_arch6k && !TARGET_THUMB1)
+	target_thread_pointer = TP_CP15;
+      else
+	target_thread_pointer = TP_SOFT;
+    }
+
+  if (TARGET_HARD_TP && TARGET_THUMB1)
+    error ("can not use -mtp=cp15 with 16-bit Thumb");
+
+  /* Override the default structure alignment for AAPCS ABI.  */
+  if (TARGET_AAPCS_BASED)
+    arm_structure_size_boundary = 8;
+
+  if (structure_size_string != NULL)
+    {
+      int size = strtol (structure_size_string, NULL, 0);
+
+      if (size == 8 || size == 32
+	  || (ARM_DOUBLEWORD_ALIGN && size == 64))
+	arm_structure_size_boundary = size;
+      else
+	warning (0, "structure size boundary can only be set to %s",
+		 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
+    }
+
+  if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
+    {
+      error ("RTP PIC is incompatible with Thumb");
+      flag_pic = 0;
+    }
+
+  /* If stack checking is disabled, we can use r10 as the PIC register,
+     which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
+  if (flag_pic && TARGET_SINGLE_PIC_BASE)
+    {
+      if (TARGET_VXWORKS_RTP)
+	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
+      arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
+    }
+
+  if (flag_pic && TARGET_VXWORKS_RTP)
+    arm_pic_register = 9;
+
+  if (arm_pic_register_string != NULL)
+    {
+      int pic_register = decode_reg_name (arm_pic_register_string);
+
+      if (!flag_pic)
+	warning (0, "-mpic-register= is useless without -fpic");
+
+      /* Prevent the user from choosing an obviously stupid PIC register.  */
+      else if (pic_register < 0 || call_used_regs[pic_register]
+	       || pic_register == HARD_FRAME_POINTER_REGNUM
+	       || pic_register == STACK_POINTER_REGNUM
+	       || pic_register >= PC_REGNUM
+	       || (TARGET_VXWORKS_RTP
+		   && (unsigned int) pic_register != arm_pic_register))
+	error ("unable to use '%s' for PIC register", arm_pic_register_string);
+      else
+	arm_pic_register = pic_register;
+    }
+
+  /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
+  if (fix_cm3_ldrd == 2)
+    {
+      if (arm_selected_cpu->core == cortexm3)
+	fix_cm3_ldrd = 1;
+      else
+	fix_cm3_ldrd = 0;
+    }
+
+  if (TARGET_THUMB1 && flag_schedule_insns)
+    {
+      /* Don't warn since it's on by default in -O2.  */
+      flag_schedule_insns = 0;
+    }
+
+  if (optimize_size)
+    {
+      /* If optimizing for size, bump the number of instructions that we
+         are prepared to conditionally execute (even on a StrongARM).  */
+      max_insns_skipped = 6;
+    }
+  else
+    {
+      /* StrongARM has early execution of branches, so a sequence
+         that is worth skipping is shorter.  */
+      if (arm_tune_strongarm)
+        max_insns_skipped = 3;
+    }
+
+  /* Hot/Cold partitioning is not currently supported, since we can't
+     handle literal pool placement in that case.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      inform (input_location,
+	      "-freorder-blocks-and-partition not supported on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+  if (flag_pic)
+    /* Hoisting PIC address calculations more aggressively provides a small,
+       but measurable, size reduction for PIC code.  Therefore, we decrease
+       the bar for unrestricted expression hoisting to the cost of PIC address
+       calculation, which is 2 instructions.  */
+    maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+
+  /* ARM EABI defaults to strict volatile bitfields.  */
+  if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
+      && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
+     it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
+  if (flag_prefetch_loop_arrays < 0
+      && HAVE_prefetch
+      && optimize >= 3
+      && current_tune->num_prefetch_slots > 0)
+    flag_prefetch_loop_arrays = 1;
+
+  /* Set up parameters to be used in prefetching algorithm.  Do not override the
+     defaults unless we are tuning for a core we have researched values for.  */
+  if (current_tune->num_prefetch_slots > 0)
+    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+                           current_tune->num_prefetch_slots,
+                           global_options.x_param_values,
+                           global_options_set.x_param_values);
+  if (current_tune->l1_cache_line_size >= 0)
+    maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+                           current_tune->l1_cache_line_size,
+                           global_options.x_param_values,
+                           global_options_set.x_param_values);
+  if (current_tune->l1_cache_size >= 0)
+    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+                           current_tune->l1_cache_size,
+                           global_options.x_param_values,
+                           global_options_set.x_param_values);
+
+  /* Register global variables with the garbage collector.  */
+  arm_add_gc_roots ();
+}
+
+static void
+arm_add_gc_roots (void)
+{
+  gcc_obstack_init(&minipool_obstack);
+  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
+}
+
+/* A table of known ARM exception types.
+   For use with the interrupt function attribute.  */
+
+typedef struct
+{
+  const char *const arg;
+  const unsigned long return_value;
+}
+isr_attribute_arg;
+
+static const isr_attribute_arg isr_attribute_args [] =
+{
+  { "IRQ",   ARM_FT_ISR },
+  { "irq",   ARM_FT_ISR },
+  { "FIQ",   ARM_FT_FIQ },
+  { "fiq",   ARM_FT_FIQ },
+  { "ABORT", ARM_FT_ISR },
+  { "abort", ARM_FT_ISR },
+  { "ABORT", ARM_FT_ISR },
+  { "abort", ARM_FT_ISR },
+  { "UNDEF", ARM_FT_EXCEPTION },
+  { "undef", ARM_FT_EXCEPTION },
+  { "SWI",   ARM_FT_EXCEPTION },
+  { "swi",   ARM_FT_EXCEPTION },
+  { NULL,    ARM_FT_NORMAL }
+};
+
+/* Returns the (interrupt) function type of the current
+   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
+
+static unsigned long
+arm_isr_value (tree argument)
+{
+  const isr_attribute_arg * ptr;
+  const char *              arg;
+
+  if (!arm_arch_notm)
+    return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
+
+  /* No argument - default to IRQ.  */
+  if (argument == NULL_TREE)
+    return ARM_FT_ISR;
+
+  /* Get the value of the argument.  */
+  if (TREE_VALUE (argument) == NULL_TREE
+      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
+    return ARM_FT_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
+
+  /* Check it against the list of known arguments.  */
+  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
+    if (streq (arg, ptr->arg))
+      return ptr->return_value;
+
+  /* An unrecognized interrupt type.  */
+  return ARM_FT_UNKNOWN;
+}
+
+/* Computes the type of the current function.  */
+
+static unsigned long
+arm_compute_func_type (void)
+{
+  unsigned long type = ARM_FT_UNKNOWN;
+  tree a;
+  tree attr;
+
+  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
+
+  /* Decide if the current function is volatile.  Such functions
+     never return, and many memory cycles can be saved by not storing
+     register values that will never be needed again.  This optimization
+     was added to speed up context switching in a kernel application.  */
+  if (optimize > 0
+      && (TREE_NOTHROW (current_function_decl)
+          || !(flag_unwind_tables
+               || (flag_exceptions
+		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
+      && TREE_THIS_VOLATILE (current_function_decl))
+    type |= ARM_FT_VOLATILE;
+
+  if (cfun->static_chain_decl != NULL)
+    type |= ARM_FT_NESTED;
+
+  attr = DECL_ATTRIBUTES (current_function_decl);
+
+  a = lookup_attribute ("naked", attr);
+  if (a != NULL_TREE)
+    type |= ARM_FT_NAKED;
+
+  a = lookup_attribute ("isr", attr);
+  if (a == NULL_TREE)
+    a = lookup_attribute ("interrupt", attr);
+
+  if (a == NULL_TREE)
+    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
+  else
+    type |= arm_isr_value (TREE_VALUE (a));
+
+  return type;
+}
+
+/* Returns the type of the current function.  */
+
+unsigned long
+arm_current_func_type (void)
+{
+  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
+    cfun->machine->func_type = arm_compute_func_type ();
+
+  return cfun->machine->func_type;
+}
+
+bool
+arm_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return !IS_NAKED (arm_current_func_type ());
+}
+
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+
+   On the ARM, (if r8 is the static chain regnum, and remembering that
+   referencing pc adds an offset of 8) the trampoline looks like:
+	   ldr 		r8, [pc, #0]
+	   ldr		pc, [pc]
+	   .word	static chain value
+	   .word	function's address
+   XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
+
+static void
+arm_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_ARM)
+    {
+      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
+      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
+    }
+  else if (TARGET_THUMB2)
+    {
+      /* The Thumb-2 trampoline is similar to the arm implementation.
+	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
+      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
+		   STATIC_CHAIN_REGNUM, PC_REGNUM);
+      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
+    }
+  else
+    {
+      ASM_OUTPUT_ALIGN (f, 2);
+      fprintf (f, "\t.code\t16\n");
+      fprintf (f, ".Ltrampoline_start:\n");
+      asm_fprintf (f, "\tpush\t{r0, r1}\n");
+      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+      asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
+      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
+      asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
+      asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
+    }
+  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.  */
+
+static void
+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr, mem, a_tramp;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
+  emit_move_insn (mem, chain_value);
+
+  mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  emit_move_insn (mem, fnaddr);
+
+  a_tramp = XEXP (m_tramp, 0);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
+		     plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
+}
+
+/* Thumb trampolines should be entered in thumb mode, so set
+   the bottom bit of the address.  */
+
+static rtx
+arm_trampoline_adjust_address (rtx addr)
+{
+  if (TARGET_THUMB)
+    addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
+				NULL, 0, OPTAB_LIB_WIDEN);
+  return addr;
+}
+
+/* Return 1 if it is possible to return using a single instruction.
+   If SIBLING is non-null, this is a test for a return before a sibling
+   call.  SIBLING is the call insn, so we can examine its register usage.  */
+
+int
+use_return_insn (int iscond, rtx sibling)
+{
+  int regno;
+  unsigned int func_type;
+  unsigned long saved_int_regs;
+  unsigned HOST_WIDE_INT stack_adjust;
+  arm_stack_offsets *offsets;
+
+  /* Never use a return instruction before reload has run.  */
+  if (!reload_completed)
+    return 0;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked, volatile and stack alignment functions need special
+     consideration.  */
+  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
+    return 0;
+
+  /* So do interrupt functions that use the frame pointer and Thumb
+     interrupt functions.  */
+  if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
+    return 0;
+
+  offsets = arm_get_frame_offsets ();
+  stack_adjust = offsets->outgoing_args - offsets->saved_regs;
+
+  /* As do variadic functions.  */
+  if (crtl->args.pretend_args_size
+      || cfun->machine->uses_anonymous_args
+      /* Or if the function calls __builtin_eh_return () */
+      || crtl->calls_eh_return
+      /* Or if the function calls alloca */
+      || cfun->calls_alloca
+      /* Or if there is a stack adjustment.  However, if the stack pointer
+	 is saved on the stack, we can use a pre-incrementing stack load.  */
+      || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
+				 && stack_adjust == 4)))
+    return 0;
+
+  saved_int_regs = offsets->saved_regs_mask;
+
+  /* Unfortunately, the insn
+
+       ldmib sp, {..., sp, ...}
+
+     triggers a bug on most SA-110 based devices, such that the stack
+     pointer won't be correctly restored if the instruction takes a
+     page fault.  We work around this problem by popping r3 along with
+     the other registers, since that is never slower than executing
+     another instruction.
+
+     We test for !arm_arch5 here, because code for any architecture
+     less than this could potentially be run on one of the buggy
+     chips.  */
+  if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
+    {
+      /* Validate that r3 is a call-clobbered register (always true in
+	 the default abi) ...  */
+      if (!call_used_regs[3])
+	return 0;
+
+      /* ... that it isn't being used for a return value ... */
+      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
+	return 0;
+
+      /* ... or for a tail-call argument ...  */
+      if (sibling)
+	{
+	  gcc_assert (GET_CODE (sibling) == CALL_INSN);
+
+	  if (find_regno_fusage (sibling, USE, 3))
+	    return 0;
+	}
+
+      /* ... and that there are no call-saved registers in r0-r2
+	 (always true in the default ABI).  */
+      if (saved_int_regs & 0x7)
+	return 0;
+    }
+
+  /* Can't be done if interworking with Thumb, and any registers have been
+     stacked.  */
+  if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
+    return 0;
+
+  /* On StrongARM, conditional returns are expensive if they aren't
+     taken and multiple registers have been stacked.  */
+  if (iscond && arm_tune_strongarm)
+    {
+      /* Conditional return when just the LR is stored is a simple
+	 conditional-load instruction, that's not expensive.  */
+      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
+	return 0;
+
+      if (flag_pic 
+	  && arm_pic_register != INVALID_REGNUM
+	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+	return 0;
+    }
+
+  /* If there are saved registers but the LR isn't saved, then we need
+     two instructions for the return.  */
+  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
+    return 0;
+
+  /* Can't be done if any of the FPA regs are pushed,
+     since this also requires an insn.  */
+  if (TARGET_HARD_FLOAT && TARGET_FPA)
+    for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	return 0;
+
+  /* Likewise VFP regs.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	return 0;
+
+  if (TARGET_REALLY_IWMMXT)
+    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
+      if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	return 0;
+
+  return 1;
+}
+
+/* Return TRUE if int I is a valid immediate ARM constant.  */
+
+int
+const_ok_for_arm (HOST_WIDE_INT i)
+{
+  int lowbit;
+
+  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
+     be all zero, or all one.  */
+  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
+      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
+	  != ((~(unsigned HOST_WIDE_INT) 0)
+	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
+    return FALSE;
+
+  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
+  /* Fast return for 0 and small values.  We must do this for zero, since
+     the code below can't handle that one case.  */
+  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
+    return TRUE;
+
+  /* Get the number of trailing zeros.  */
+  lowbit = ffs((int) i) - 1;
+  
+  /* Only even shifts are allowed in ARM mode so round down to the
+     nearest even number.  */
+  if (TARGET_ARM)
+    lowbit &= ~1;
+
+  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
+    return TRUE;
+
+  if (TARGET_ARM)
+    {
+      /* Allow rotated constants in ARM mode.  */
+      if (lowbit <= 4
+	   && ((i & ~0xc000003f) == 0
+	       || (i & ~0xf000000f) == 0
+	       || (i & ~0xfc000003) == 0))
+	return TRUE;
+    }
+  else
+    {
+      HOST_WIDE_INT v;
+
+      /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
+      v = i & 0xff;
+      v |= v << 16;
+      if (i == v || i == (v | (v << 8)))
+	return TRUE;
+
+      /* Allow repeated pattern 0xXY00XY00.  */
+      v = i & 0xff00;
+      v |= v << 16;
+      if (i == v)
+	return TRUE;
+    }
+
+  return FALSE;
+}
+
+/* Return true if I is a valid constant for the operation CODE.  */
+static int
+const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
+{
+  if (const_ok_for_arm (i))
+    return 1;
+
+  switch (code)
+    {
+    case PLUS:
+    case COMPARE:
+    case EQ:
+    case NE:
+    case GT:
+    case LE:
+    case LT:
+    case GE:
+    case GEU:
+    case LTU:
+    case GTU:
+    case LEU:
+    case UNORDERED:
+    case ORDERED:
+    case UNEQ:
+    case UNGE:
+    case UNLT:
+    case UNGT:
+    case UNLE:
+      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
+
+    case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
+    case XOR:
+      return 0;
+
+    case IOR:
+      if (TARGET_THUMB2)
+	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
+      return 0;
+
+    case AND:
+      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Emit a sequence of insns to handle a large constant.
+   CODE is the code of the operation required, it can be any of SET, PLUS,
+   IOR, AND, XOR, MINUS;
+   MODE is the mode in which the operation is being performed;
+   VAL is the integer to operate on;
+   SOURCE is the other operand (a register, or a null-pointer for SET);
+   SUBTARGETS means it is safe to create scratch registers if that will
+   either produce a simpler sequence, or we will want to cse the values.
+   Return value is the number of insns emitted.  */
+
+/* ??? Tweak this for thumb2.  */
+int
+arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
+		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
+{
+  rtx cond;
+
+  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
+    cond = COND_EXEC_TEST (PATTERN (insn));
+  else
+    cond = NULL_RTX;
+
+  if (subtargets || code == SET
+      || (GET_CODE (target) == REG && GET_CODE (source) == REG
+	  && REGNO (target) != REGNO (source)))
+    {
+      /* After arm_reorg has been called, we can't fix up expensive
+	 constants by pushing them into memory so we must synthesize
+	 them in-line, regardless of the cost.  This is only likely to
+	 be more costly on chips that have load delay slots and we are
+	 compiling without running the scheduler (so no splitting
+	 occurred before the final instruction emission).
+
+	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
+      */
+      if (!after_arm_reorg
+	  && !cond
+	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
+				1, 0)
+	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
+		 + (code != SET))))
+	{
+	  if (code == SET)
+	    {
+	      /* Currently SET is the only monadic value for CODE, all
+		 the rest are diadic.  */
+	      if (TARGET_USE_MOVT)
+		arm_emit_movpair (target, GEN_INT (val));
+	      else
+		emit_set_insn (target, GEN_INT (val));
+
+	      return 1;
+	    }
+	  else
+	    {
+	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
+
+	      if (TARGET_USE_MOVT)
+		arm_emit_movpair (temp, GEN_INT (val));
+	      else
+		emit_set_insn (temp, GEN_INT (val));
+
+	      /* For MINUS, the value is subtracted from, since we never
+		 have subtraction of a constant.  */
+	      if (code == MINUS)
+		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
+	      else
+		emit_set_insn (target,
+			       gen_rtx_fmt_ee (code, mode, source, temp));
+	      return 2;
+	    }
+	}
+    }
+
+  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
+			   1);
+}
+
+/* Return the number of instructions required to synthesize the given
+   constant, if we start emitting them from bit-position I.  */
+static int
+count_insns_for_constant (HOST_WIDE_INT remainder, int i)
+{
+  HOST_WIDE_INT temp1;
+  int step_size = TARGET_ARM ? 2 : 1;
+  int num_insns = 0;
+
+  gcc_assert (TARGET_ARM || i == 0);
+
+  do
+    {
+      int end;
+
+      if (i <= 0)
+	i += 32;
+      if (remainder & (((1 << step_size) - 1) << (i - step_size)))
+	{
+	  end = i - 8;
+	  if (end < 0)
+	    end += 32;
+	  temp1 = remainder & ((0x0ff << end)
+				    | ((i < end) ? (0xff >> (32 - end)) : 0));
+	  remainder &= ~temp1;
+	  num_insns++;
+	  i -= 8 - step_size;
+	}
+      i -= step_size;
+    } while (remainder);
+  return num_insns;
+}
+
+static int
+find_best_start (unsigned HOST_WIDE_INT remainder)
+{
+  int best_consecutive_zeros = 0;
+  int i;
+  int best_start = 0;
+
+  /* If we aren't targetting ARM, the best place to start is always at
+     the bottom.  */
+  if (! TARGET_ARM)
+    return 0;
+
+  for (i = 0; i < 32; i += 2)
+    {
+      int consecutive_zeros = 0;
+
+      if (!(remainder & (3 << i)))
+	{
+	  while ((i < 32) && !(remainder & (3 << i)))
+	    {
+	      consecutive_zeros += 2;
+	      i += 2;
+	    }
+	  if (consecutive_zeros > best_consecutive_zeros)
+	    {
+	      best_consecutive_zeros = consecutive_zeros;
+	      best_start = i - consecutive_zeros;
+	    }
+	  i -= 2;
+	}
+    }
+
+  /* So long as it won't require any more insns to do so, it's
+     desirable to emit a small constant (in bits 0...9) in the last
+     insn.  This way there is more chance that it can be combined with
+     a later addressing insn to form a pre-indexed load or store
+     operation.  Consider:
+
+	   *((volatile int *)0xe0000100) = 1;
+	   *((volatile int *)0xe0000110) = 2;
+
+     We want this to wind up as:
+
+	    mov rA, #0xe0000000
+	    mov rB, #1
+	    str rB, [rA, #0x100]
+	    mov rB, #2
+	    str rB, [rA, #0x110]
+
+     rather than having to synthesize both large constants from scratch.
+
+     Therefore, we calculate how many insns would be required to emit
+     the constant starting from `best_start', and also starting from
+     zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
+     yield a shorter sequence, we may as well use zero.  */
+  if (best_start != 0
+      && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
+      && (count_insns_for_constant (remainder, 0) <=
+	  count_insns_for_constant (remainder, best_start)))
+    best_start = 0;
+
+  return best_start;
+}
+
+/* Emit an instruction with the indicated PATTERN.  If COND is
+   non-NULL, conditionalize the execution of the instruction on COND
+   being true.  */
+
+static void
+emit_constant_insn (rtx cond, rtx pattern)
+{
+  if (cond)
+    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
+  emit_insn (pattern);
+}
+
+/* As above, but extra parameter GENERATE which, if clear, suppresses
+   RTL generation.  */
+/* ??? This needs more work for thumb2.  */
+
+static int
+arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
+		  HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
+		  int generate)
+{
+  int can_invert = 0;
+  int can_negate = 0;
+  int final_invert = 0;
+  int can_negate_initial = 0;
+  int i;
+  int num_bits_set = 0;
+  int set_sign_bit_copies = 0;
+  int clear_sign_bit_copies = 0;
+  int clear_zero_bit_copies = 0;
+  int set_zero_bit_copies = 0;
+  int insns = 0;
+  unsigned HOST_WIDE_INT temp1, temp2;
+  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
+  int step_size = TARGET_ARM ? 2 : 1;
+
+  /* Find out which operations are safe for a given CODE.  Also do a quick
+     check for degenerate cases; these can occur when DImode operations
+     are split.  */
+  switch (code)
+    {
+    case SET:
+      can_invert = 1;
+      can_negate = 1;
+      break;
+
+    case PLUS:
+      can_negate = 1;
+      can_negate_initial = 1;
+      break;
+
+    case IOR:
+      if (remainder == 0xffffffff)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     GEN_INT (ARM_SIGN_EXTEND (val))));
+	  return 1;
+	}
+
+      if (remainder == 0)
+	{
+	  if (reload_completed && rtx_equal_p (target, source))
+	    return 0;
+
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, source));
+	  return 1;
+	}
+
+      if (TARGET_THUMB2)
+	can_invert = 1;
+      break;
+
+    case AND:
+      if (remainder == 0)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, const0_rtx));
+	  return 1;
+	}
+      if (remainder == 0xffffffff)
+	{
+	  if (reload_completed && rtx_equal_p (target, source))
+	    return 0;
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, source));
+	  return 1;
+	}
+      can_invert = 1;
+      break;
+
+    case XOR:
+      if (remainder == 0)
+	{
+	  if (reload_completed && rtx_equal_p (target, source))
+	    return 0;
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target, source));
+	  return 1;
+	}
+
+      if (remainder == 0xffffffff)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_NOT (mode, source)));
+	  return 1;
+	}
+      break;
+
+    case MINUS:
+      /* We treat MINUS as (val - source), since (source - val) is always
+	 passed as (source + (-val)).  */
+      if (remainder == 0)
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_NEG (mode, source)));
+	  return 1;
+	}
+      if (const_ok_for_arm (val))
+	{
+	  if (generate)
+	    emit_constant_insn (cond,
+				gen_rtx_SET (VOIDmode, target,
+					     gen_rtx_MINUS (mode, GEN_INT (val),
+							    source)));
+	  return 1;
+	}
+      can_negate = 1;
+
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* If we can do it in one insn get out quickly.  */
+  if (const_ok_for_arm (val)
+      || (can_negate_initial && const_ok_for_arm (-val))
+      || (can_invert && const_ok_for_arm (~val)))
+    {
+      if (generate)
+	emit_constant_insn (cond,
+			    gen_rtx_SET (VOIDmode, target,
+					 (source
+					  ? gen_rtx_fmt_ee (code, mode, source,
+							    GEN_INT (val))
+					  : GEN_INT (val))));
+      return 1;
+    }
+
+  /* Calculate a few attributes that may be useful for specific
+     optimizations.  */
+  /* Count number of leading zeros.  */
+  for (i = 31; i >= 0; i--)
+    {
+      if ((remainder & (1 << i)) == 0)
+	clear_sign_bit_copies++;
+      else
+	break;
+    }
+
+  /* Count number of leading 1's.  */
+  for (i = 31; i >= 0; i--)
+    {
+      if ((remainder & (1 << i)) != 0)
+	set_sign_bit_copies++;
+      else
+	break;
+    }
+
+  /* Count number of trailing zero's.  */
+  for (i = 0; i <= 31; i++)
+    {
+      if ((remainder & (1 << i)) == 0)
+	clear_zero_bit_copies++;
+      else
+	break;
+    }
+
+  /* Count number of trailing 1's.  */
+  for (i = 0; i <= 31; i++)
+    {
+      if ((remainder & (1 << i)) != 0)
+	set_zero_bit_copies++;
+      else
+	break;
+    }
+
+  switch (code)
+    {
+    case SET:
+      /* See if we can use movw.  */
+      if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
+	{
+	  if (generate)
+	    emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+						   GEN_INT (val)));
+	  return 1;
+	}
+
+      /* See if we can do this by sign_extending a constant that is known
+	 to be negative.  This is a good, way of doing it, since the shift
+	 may well merge into a subsequent insn.  */
+      if (set_sign_bit_copies > 1)
+	{
+	  if (const_ok_for_arm
+	      (temp1 = ARM_SIGN_EXTEND (remainder
+					<< (set_sign_bit_copies - 1))))
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, new_src,
+						   GEN_INT (temp1)));
+		  emit_constant_insn (cond,
+				      gen_ashrsi3 (target, new_src,
+						   GEN_INT (set_sign_bit_copies - 1)));
+		}
+	      return 2;
+	    }
+	  /* For an inverted constant, we will need to set the low bits,
+	     these will be shifted out of harm's way.  */
+	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
+	  if (const_ok_for_arm (~temp1))
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, new_src,
+						   GEN_INT (temp1)));
+		  emit_constant_insn (cond,
+				      gen_ashrsi3 (target, new_src,
+						   GEN_INT (set_sign_bit_copies - 1)));
+		}
+	      return 2;
+	    }
+	}
+
+      /* See if we can calculate the value as the difference between two
+	 valid immediates.  */
+      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
+	{
+	  int topshift = clear_sign_bit_copies & ~1;
+
+	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
+				   & (0xff000000 >> topshift));
+
+	  /* If temp1 is zero, then that means the 9 most significant
+	     bits of remainder were 1 and we've caused it to overflow.
+	     When topshift is 0 we don't need to do anything since we
+	     can borrow from 'bit 32'.  */
+	  if (temp1 == 0 && topshift != 0)
+	    temp1 = 0x80000000 >> (topshift - 1);
+
+	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
+
+	  if (const_ok_for_arm (temp2))
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, new_src,
+						   GEN_INT (temp1)));
+		  emit_constant_insn (cond,
+				      gen_addsi3 (target, new_src,
+						  GEN_INT (-temp2)));
+		}
+
+	      return 2;
+	    }
+	}
+
+      /* See if we can generate this by setting the bottom (or the top)
+	 16 bits, and then shifting these into the other half of the
+	 word.  We only look for the simplest cases, to do more would cost
+	 too much.  Be careful, however, not to generate this when the
+	 alternative would take fewer insns.  */
+      if (val & 0xffff0000)
+	{
+	  temp1 = remainder & 0xffff0000;
+	  temp2 = remainder & 0x0000ffff;
+
+	  /* Overlaps outside this range are best done using other methods.  */
+	  for (i = 9; i < 24; i++)
+	    {
+	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
+		  && !const_ok_for_arm (temp2))
+		{
+		  rtx new_src = (subtargets
+				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
+				 : target);
+		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
+					    source, subtargets, generate);
+		  source = new_src;
+		  if (generate)
+		    emit_constant_insn
+		      (cond,
+		       gen_rtx_SET
+		       (VOIDmode, target,
+			gen_rtx_IOR (mode,
+				     gen_rtx_ASHIFT (mode, source,
+						     GEN_INT (i)),
+				     source)));
+		  return insns + 1;
+		}
+	    }
+
+	  /* Don't duplicate cases already considered.  */
+	  for (i = 17; i < 24; i++)
+	    {
+	      if (((temp1 | (temp1 >> i)) == remainder)
+		  && !const_ok_for_arm (temp1))
+		{
+		  rtx new_src = (subtargets
+				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
+				 : target);
+		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
+					    source, subtargets, generate);
+		  source = new_src;
+		  if (generate)
+		    emit_constant_insn
+		      (cond,
+		       gen_rtx_SET (VOIDmode, target,
+				    gen_rtx_IOR
+				    (mode,
+				     gen_rtx_LSHIFTRT (mode, source,
+						       GEN_INT (i)),
+				     source)));
+		  return insns + 1;
+		}
+	    }
+	}
+      break;
+
+    case IOR:
+    case XOR:
+      /* If we have IOR or XOR, and the constant can be loaded in a
+	 single instruction, and we can find a temporary to put it in,
+	 then this can be done in two instructions instead of 3-4.  */
+      if (subtargets
+	  /* TARGET can't be NULL if SUBTARGETS is 0 */
+	  || (reload_completed && !reg_mentioned_p (target, source)))
+	{
+	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
+	    {
+	      if (generate)
+		{
+		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, sub,
+						   GEN_INT (val)));
+		  emit_constant_insn (cond,
+				      gen_rtx_SET (VOIDmode, target,
+						   gen_rtx_fmt_ee (code, mode,
+								   source, sub)));
+		}
+	      return 2;
+	    }
+	}
+
+      if (code == XOR)
+	break;
+
+      /*  Convert.
+	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
+	                     and the remainder 0s for e.g. 0xfff00000)
+	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
+
+	  This can be done in 2 instructions by using shifts with mov or mvn.
+	  e.g. for
+	  x = x | 0xfff00000;
+	  we generate.
+	  mvn	r0, r0, asl #12
+	  mvn	r0, r0, lsr #12  */
+      if (set_sign_bit_copies > 8
+	  && (val & (-1 << (32 - set_sign_bit_copies))) == val)
+	{
+	  if (generate)
+	    {
+	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (set_sign_bit_copies);
+
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, sub,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_ASHIFT (mode,
+							   source,
+							   shift))));
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_LSHIFTRT (mode, sub,
+							     shift))));
+	    }
+	  return 2;
+	}
+
+      /* Convert
+	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
+	   to
+	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
+
+	  For eg. r0 = r0 | 0xfff
+	       mvn	r0, r0, lsr #12
+	       mvn	r0, r0, asl #12
+
+      */
+      if (set_zero_bit_copies > 8
+	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
+	{
+	  if (generate)
+	    {
+	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (set_zero_bit_copies);
+
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, sub,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_LSHIFTRT (mode,
+							     source,
+							     shift))));
+	      emit_constant_insn
+		(cond,
+		 gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_NOT (mode,
+					   gen_rtx_ASHIFT (mode, sub,
+							   shift))));
+	    }
+	  return 2;
+	}
+
+      /* This will never be reached for Thumb2 because orn is a valid
+	 instruction. This is for Thumb1 and the ARM 32 bit cases.
+
+	 x = y | constant (such that ~constant is a valid constant)
+	 Transform this to
+	 x = ~(~y & ~constant).
+      */
+      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
+	{
+	  if (generate)
+	    {
+	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
+	      emit_constant_insn (cond,
+				  gen_rtx_SET (VOIDmode, sub,
+					       gen_rtx_NOT (mode, source)));
+	      source = sub;
+	      if (subtargets)
+		sub = gen_reg_rtx (mode);
+	      emit_constant_insn (cond,
+				  gen_rtx_SET (VOIDmode, sub,
+					       gen_rtx_AND (mode, source,
+							    GEN_INT (temp1))));
+	      emit_constant_insn (cond,
+				  gen_rtx_SET (VOIDmode, target,
+					       gen_rtx_NOT (mode, sub)));
+	    }
+	  return 3;
+	}
+      break;
+
+    case AND:
+      /* See if two shifts will do 2 or more insn's worth of work.  */
+      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
+	{
+	  HOST_WIDE_INT shift_mask = ((0xffffffff
+				       << (32 - clear_sign_bit_copies))
+				      & 0xffffffff);
+
+	  if ((remainder | shift_mask) != 0xffffffff)
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    new_src, source, subtargets, 1);
+		  source = new_src;
+		}
+	      else
+		{
+		  rtx targ = subtargets ? NULL_RTX : target;
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    targ, source, subtargets, 0);
+		}
+	    }
+
+	  if (generate)
+	    {
+	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (clear_sign_bit_copies);
+
+	      emit_insn (gen_ashlsi3 (new_src, source, shift));
+	      emit_insn (gen_lshrsi3 (target, new_src, shift));
+	    }
+
+	  return insns + 2;
+	}
+
+      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
+	{
+	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
+
+	  if ((remainder | shift_mask) != 0xffffffff)
+	    {
+	      if (generate)
+		{
+		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    new_src, source, subtargets, 1);
+		  source = new_src;
+		}
+	      else
+		{
+		  rtx targ = subtargets ? NULL_RTX : target;
+
+		  insns = arm_gen_constant (AND, mode, cond,
+					    remainder | shift_mask,
+					    targ, source, subtargets, 0);
+		}
+	    }
+
+	  if (generate)
+	    {
+	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
+	      rtx shift = GEN_INT (clear_zero_bit_copies);
+
+	      emit_insn (gen_lshrsi3 (new_src, source, shift));
+	      emit_insn (gen_ashlsi3 (target, new_src, shift));
+	    }
+
+	  return insns + 2;
+	}
+
+      break;
+
+    default:
+      break;
+    }
+
+  for (i = 0; i < 32; i++)
+    if (remainder & (1 << i))
+      num_bits_set++;
+
+  if ((code == AND)
+      || (code != IOR && can_invert && num_bits_set > 16))
+    remainder ^= 0xffffffff;
+  else if (code == PLUS && num_bits_set > 16)
+    remainder = (-remainder) & 0xffffffff;
+
+  /* For XOR, if more than half the bits are set and there's a sequence
+     of more than 8 consecutive ones in the pattern then we can XOR by the
+     inverted constant and then invert the final result; this may save an
+     instruction and might also lead to the final mvn being merged with
+     some other operation.  */
+  else if (code == XOR && num_bits_set > 16
+	   && (count_insns_for_constant (remainder ^ 0xffffffff,
+					 find_best_start
+					 (remainder ^ 0xffffffff))
+	       < count_insns_for_constant (remainder,
+					   find_best_start (remainder))))
+    {
+      remainder ^= 0xffffffff;
+      final_invert = 1;
+    }
+  else
+    {
+      can_invert = 0;
+      can_negate = 0;
+    }
+
+  /* Now try and find a way of doing the job in either two or three
+     instructions.
+     We start by looking for the largest block of zeros that are aligned on
+     a 2-bit boundary, we then fill up the temps, wrapping around to the
+     top of the word when we drop off the bottom.
+     In the worst case this code should produce no more than four insns.
+     Thumb-2 constants are shifted, not rotated, so the MSB is always the
+     best place to start.  */
+
+  /* ??? Use thumb2 replicated constants when the high and low halfwords are
+     the same.  */
+  {
+    /* Now start emitting the insns.  */
+    i = find_best_start (remainder);
+    do
+      {
+	int end;
+
+	if (i <= 0)
+	  i += 32;
+	if (remainder & (3 << (i - 2)))
+	  {
+	    end = i - 8;
+	    if (end < 0)
+	      end += 32;
+	    temp1 = remainder & ((0x0ff << end)
+				 | ((i < end) ? (0xff >> (32 - end)) : 0));
+	    remainder &= ~temp1;
+
+	    if (generate)
+	      {
+		rtx new_src, temp1_rtx;
+
+		if (code == SET || code == MINUS)
+		  {
+		    new_src = (subtargets ? gen_reg_rtx (mode) : target);
+		    if (can_invert && code != MINUS)
+		      temp1 = ~temp1;
+		  }
+		else
+		  {
+		    if ((final_invert || remainder) && subtargets)
+		      new_src = gen_reg_rtx (mode);
+		    else
+		      new_src = target;
+		    if (can_invert)
+		      temp1 = ~temp1;
+		    else if (can_negate)
+		      temp1 = -temp1;
+		  }
+
+		temp1 = trunc_int_for_mode (temp1, mode);
+		temp1_rtx = GEN_INT (temp1);
+
+		if (code == SET)
+		  ;
+		else if (code == MINUS)
+		  temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
+		else
+		  temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
+
+		emit_constant_insn (cond,
+				    gen_rtx_SET (VOIDmode, new_src,
+						 temp1_rtx));
+		source = new_src;
+	      }
+
+	    if (code == SET)
+	      {
+		can_invert = 0;
+		code = PLUS;
+	      }
+	    else if (code == MINUS)
+	      code = PLUS;
+
+	    insns++;
+	    i -= 8 - step_size;
+	  }
+	/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
+	   shifts.  */
+	i -= step_size;
+      }
+    while (remainder);
+  }
+
+  if (final_invert)
+    {
+      if (generate)
+	emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
+					       gen_rtx_NOT (mode, source)));
+      insns++;
+    }
+
+  return insns;
+}
+
+/* Canonicalize a comparison so that we are more likely to recognize it.
+   This can be done for a few constant compares, where we can make the
+   immediate value easier to load.  */
+
+enum rtx_code
+arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
+{
+  enum machine_mode mode;
+  unsigned HOST_WIDE_INT i, maxval;
+
+  mode = GET_MODE (*op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (*op1);
+
+  maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
+
+  /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
+     we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
+     reversed or (for constant OP1) adjusted to GE/LT.  Similarly
+     for GTU/LEU in Thumb mode.  */
+  if (mode == DImode)
+    {
+      rtx tem;
+
+      /* To keep things simple, always use the Cirrus cfcmp64 if it is
+	 available.  */
+      if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
+	return code;
+
+      if (code == GT || code == LE
+	  || (!TARGET_ARM && (code == GTU || code == LEU)))
+	{
+	  /* Missing comparison.  First try to use an available
+	     comparison.  */
+	  if (GET_CODE (*op1) == CONST_INT)
+	    {
+	      i = INTVAL (*op1);
+	      switch (code)
+		{
+		case GT:
+		case LE:
+		  if (i != maxval
+		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
+		    {
+		      *op1 = GEN_INT (i + 1);
+		      return code == GT ? GE : LT;
+		    }
+		  break;
+		case GTU:
+		case LEU:
+		  if (i != ~((unsigned HOST_WIDE_INT) 0)
+		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
+		    {
+		      *op1 = GEN_INT (i + 1);
+		      return code == GTU ? GEU : LTU;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+
+	  /* If that did not work, reverse the condition.  */
+	  tem = *op0;
+	  *op0 = *op1;
+	  *op1 = tem;
+	  return swap_condition (code);
+	}
+
+      return code;
+    }
+
+  /* Comparisons smaller than DImode.  Only adjust comparisons against
+     an out-of-range constant.  */
+  if (GET_CODE (*op1) != CONST_INT
+      || const_ok_for_arm (INTVAL (*op1))
+      || const_ok_for_arm (- INTVAL (*op1)))
+    return code;
+
+  i = INTVAL (*op1);
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+      return code;
+
+    case GT:
+    case LE:
+      if (i != maxval
+	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
+	{
+	  *op1 = GEN_INT (i + 1);
+	  return code == GT ? GE : LT;
+	}
+      break;
+
+    case GE:
+    case LT:
+      if (i != ~maxval
+	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
+	{
+	  *op1 = GEN_INT (i - 1);
+	  return code == GE ? GT : LE;
+	}
+      break;
+
+    case GTU:
+    case LEU:
+      if (i != ~((unsigned HOST_WIDE_INT) 0)
+	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
+	{
+	  *op1 = GEN_INT (i + 1);
+	  return code == GTU ? GEU : LTU;
+	}
+      break;
+
+    case GEU:
+    case LTU:
+      if (i != 0
+	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
+	{
+	  *op1 = GEN_INT (i - 1);
+	  return code == GEU ? GTU : LEU;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return code;
+}
+
+
+/* Define how to find the value returned by a function.  */
+
+static rtx
+arm_function_value(const_tree type, const_tree func,
+		   bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  int unsignedp ATTRIBUTE_UNUSED;
+  rtx r ATTRIBUTE_UNUSED;
+
+  mode = TYPE_MODE (type);
+
+  if (TARGET_AAPCS_BASED)
+    return aapcs_allocate_return_reg (mode, type, func);
+
+  /* Promote integer types.  */
+  if (INTEGRAL_TYPE_P (type))
+    mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
+
+  /* Promotes small structs returned in a register to full-word size
+     for big-endian AAPCS.  */
+  if (arm_return_in_msb (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size % UNITS_PER_WORD != 0)
+	{
+	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  return LIBCALL_VALUE (mode);
+}
+
+static int
+libcall_eq (const void *p1, const void *p2)
+{
+  return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
+}
+
+static hashval_t
+libcall_hash (const void *p1)
+{
+  return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
+}
+
+static void
+add_libcall (htab_t htab, rtx libcall)
+{
+  *htab_find_slot (htab, libcall, INSERT) = libcall;
+}
+
+static bool
+arm_libcall_uses_aapcs_base (const_rtx libcall)
+{
+  static bool init_done = false;
+  static htab_t libcall_htab;
+
+  if (!init_done)
+    {
+      init_done = true;
+
+      libcall_htab = htab_create (31, libcall_hash, libcall_eq,
+				  NULL);
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
+      
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
+
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
+      add_libcall (libcall_htab,
+		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
+
+      /* Values from double-precision helper functions are returned in core
+	 registers if the selected core only supports single-precision
+	 arithmetic, even if we are using the hard-float ABI.  The same is
+	 true for single-precision helpers, but we will never be using the
+	 hard-float ABI on a CPU which doesn't support single-precision
+	 operations in hardware.  */
+      add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
+      add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
+      add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
+							SFmode));
+      add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
+							DFmode));
+    }
+
+  return libcall && htab_find (libcall_htab, libcall) != NULL;
+}
+
+rtx
+arm_libcall_value (enum machine_mode mode, const_rtx libcall)
+{
+  if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
+      && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      /* The following libcalls return their result in integer registers,
+	 even though they return a floating point value.  */
+      if (arm_libcall_uses_aapcs_base (libcall))
+	return gen_rtx_REG (mode, ARG_REGISTER(1));
+
+    }
+
+  return LIBCALL_VALUE (mode);
+}
+
+/* Determine the amount of memory needed to store the possible return
+   registers of an untyped call.  */
+int
+arm_apply_result_size (void)
+{
+  int size = 16;
+
+  if (TARGET_32BIT)
+    {
+      if (TARGET_HARD_FLOAT_ABI)
+	{
+	  if (TARGET_VFP)
+	    size += 32;
+	  if (TARGET_FPA)
+	    size += 12;
+	  if (TARGET_MAVERICK)
+	    size += 8;
+	}
+      if (TARGET_IWMMXT_ABI)
+	size += 8;
+    }
+
+  return size;
+}
+
+/* Decide whether TYPE should be returned in memory (true)
+   or in a register (false).  FNTYPE is the type of the function making
+   the call.  */
+static bool
+arm_return_in_memory (const_tree type, const_tree fntype)
+{
+  HOST_WIDE_INT size;
+
+  size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* Simple, non-aggregate types (ie not including vectors and
+	 complex) are always returned in a register (or registers).
+	 We don't care about which register here, so we can short-cut
+	 some of the detail.  */
+      if (!AGGREGATE_TYPE_P (type)
+	  && TREE_CODE (type) != VECTOR_TYPE
+	  && TREE_CODE (type) != COMPLEX_TYPE)
+	return false;
+
+      /* Any return value that is no larger than one word can be
+	 returned in r0.  */
+      if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
+	return false;
+
+      /* Check any available co-processors to see if they accept the
+	 type as a register candidate (VFP, for example, can return
+	 some aggregates in consecutive registers).  These aren't
+	 available if the call is variadic.  */
+      if (aapcs_select_return_coproc (type, fntype) >= 0)
+	return false;
+
+      /* Vector values should be returned using ARM registers, not
+	 memory (unless they're over 16 bytes, which will break since
+	 we only have four call-clobbered registers to play with).  */
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+      /* The rest go in memory.  */
+      return true;
+    }
+
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    return (size < 0 || size > (4 * UNITS_PER_WORD));
+
+  if (!AGGREGATE_TYPE_P (type) &&
+      (TREE_CODE (type) != VECTOR_TYPE))
+    /* All simple types are returned in registers.  */
+    return false;
+
+  if (arm_abi != ARM_ABI_APCS)
+    {
+      /* ATPCS and later return aggregate types in memory only if they are
+	 larger than a word (or are variable size).  */
+      return (size < 0 || size > UNITS_PER_WORD);
+    }
+
+  /* For the arm-wince targets we choose to be compatible with Microsoft's
+     ARM and Thumb compilers, which always return aggregates in memory.  */
+#ifndef ARM_WINCE
+  /* All structures/unions bigger than one word are returned in memory.
+     Also catch the case where int_size_in_bytes returns -1.  In this case
+     the aggregate is either huge or of variable size, and in either case
+     we will want to return it via memory and not in a register.  */
+  if (size < 0 || size > UNITS_PER_WORD)
+    return true;
+
+  if (TREE_CODE (type) == RECORD_TYPE)
+    {
+      tree field;
+
+      /* For a struct the APCS says that we only return in a register
+	 if the type is 'integer like' and every addressable element
+	 has an offset of zero.  For practical purposes this means
+	 that the structure can have at most one non bit-field element
+	 and that this element must be the first one in the structure.  */
+
+      /* Find the first field, ignoring non FIELD_DECL things which will
+	 have been created by C++.  */
+      for (field = TYPE_FIELDS (type);
+	   field && TREE_CODE (field) != FIELD_DECL;
+	   field = DECL_CHAIN (field))
+	continue;
+
+      if (field == NULL)
+	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
+
+      /* Check that the first field is valid for returning in a register.  */
+
+      /* ... Floats are not allowed */
+      if (FLOAT_TYPE_P (TREE_TYPE (field)))
+	return true;
+
+      /* ... Aggregates that are not themselves valid for returning in
+	 a register are not allowed.  */
+      if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
+	return true;
+
+      /* Now check the remaining fields, if any.  Only bitfields are allowed,
+	 since they are not addressable.  */
+      for (field = DECL_CHAIN (field);
+	   field;
+	   field = DECL_CHAIN (field))
+	{
+	  if (TREE_CODE (field) != FIELD_DECL)
+	    continue;
+
+	  if (!DECL_BIT_FIELD_TYPE (field))
+	    return true;
+	}
+
+      return false;
+    }
+
+  if (TREE_CODE (type) == UNION_TYPE)
+    {
+      tree field;
+
+      /* Unions can be returned in registers if every element is
+	 integral, or can be returned in an integer register.  */
+      for (field = TYPE_FIELDS (type);
+	   field;
+	   field = DECL_CHAIN (field))
+	{
+	  if (TREE_CODE (field) != FIELD_DECL)
+	    continue;
+
+	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
+	    return true;
+
+	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
+	    return true;
+	}
+
+      return false;
+    }
+#endif /* not ARM_WINCE */
+
+  /* Return all other types in memory.  */
+  return true;
+}
+
+/* Indicate whether or not words of a double are in big-endian order.  */
+
+int
+arm_float_words_big_endian (void)
+{
+  if (TARGET_MAVERICK)
+    return 0;
+
+  /* For FPA, float words are always big-endian.  For VFP, floats words
+     follow the memory system mode.  */
+
+  if (TARGET_FPA)
+    {
+      return 1;
+    }
+
+  if (TARGET_VFP)
+    return (TARGET_BIG_END ? 1 : 0);
+
+  return 1;
+}
+
+const struct pcs_attribute_arg
+{
+  const char *arg;
+  enum arm_pcs value;
+} pcs_attribute_args[] =
+  {
+    {"aapcs", ARM_PCS_AAPCS},
+    {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
+#if 0
+    /* We could recognize these, but changes would be needed elsewhere
+     * to implement them.  */
+    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
+    {"atpcs", ARM_PCS_ATPCS},
+    {"apcs", ARM_PCS_APCS},
+#endif
+    {NULL, ARM_PCS_UNKNOWN}
+  };
+
+static enum arm_pcs
+arm_pcs_from_attribute (tree attr)
+{
+  const struct pcs_attribute_arg *ptr;
+  const char *arg;
+
+  /* Get the value of the argument.  */
+  if (TREE_VALUE (attr) == NULL_TREE
+      || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
+    return ARM_PCS_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (attr));
+
+  /* Check it against the list of known arguments.  */
+  for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
+    if (streq (arg, ptr->arg))
+      return ptr->value;
+
+  /* An unrecognized interrupt type.  */
+  return ARM_PCS_UNKNOWN;
+}
+
+/* Get the PCS variant to use for this call.  TYPE is the function's type
+   specification, DECL is the specific declartion.  DECL may be null if
+   the call could be indirect or if this is a library call.  */
+static enum arm_pcs
+arm_get_pcs_model (const_tree type, const_tree decl)
+{
+  bool user_convention = false;
+  enum arm_pcs user_pcs = arm_pcs_default;
+  tree attr;
+
+  gcc_assert (type);
+
+  attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
+  if (attr)
+    {
+      user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
+      user_convention = true;
+    }
+
+  if (TARGET_AAPCS_BASED)
+    {
+      /* Detect varargs functions.  These always use the base rules
+	 (no argument is ever a candidate for a co-processor
+	 register).  */
+      bool base_rules = stdarg_p (type);
+      
+      if (user_convention)
+	{
+	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
+	    sorry ("non-AAPCS derived PCS variant");
+	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
+	    error ("variadic functions must use the base AAPCS variant");
+	}
+
+      if (base_rules)
+	return ARM_PCS_AAPCS;
+      else if (user_convention)
+	return user_pcs;
+      else if (decl && flag_unit_at_a_time)
+	{
+	  /* Local functions never leak outside this compilation unit,
+	     so we are free to use whatever conventions are
+	     appropriate.  */
+	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
+	  struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+	  if (i && i->local)
+	    return ARM_PCS_AAPCS_LOCAL;
+	}
+    }
+  else if (user_convention && user_pcs != arm_pcs_default)
+    sorry ("PCS variant");
+
+  /* For everything else we use the target's default.  */
+  return arm_pcs_default;
+}
+
+
+static void
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
+		    const_tree fntype ATTRIBUTE_UNUSED,
+		    rtx libcall ATTRIBUTE_UNUSED, 
+		    const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  /* Record the unallocated VFP registers.  */
+  pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
+  pcum->aapcs_vfp_reg_alloc = 0;
+}
+
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   type.  If a non-floating point type is found, or if a floating point
+   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
+   otherwise return the count in the sub-tree.  */
+static int
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (mode != DFmode && mode != SFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (mode != DFmode && mode != SFmode)
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      /* Use V2SImode and V4SImode as representatives of all 64-bit
+	 and 128-bit vector types, whether or not those modes are
+	 supported with the present options.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 8:
+	  mode = V2SImode;
+	  break;
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P(type))
+	  return -1;
+
+	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
+	    || !TYPE_MIN_VALUE (index)
+	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+      
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P(type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P(type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* Return true if PCS_VARIANT should use VFP registers.  */
+static bool
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
+{
+  if (pcs_variant == ARM_PCS_AAPCS_VFP)
+    {
+      static bool seen_thumb1_vfp = false;
+
+      if (TARGET_THUMB1 && !seen_thumb1_vfp)
+	{
+	  sorry ("Thumb-1 hard-float VFP ABI");
+	  /* sorry() is not immediately fatal, so only display this once.  */
+	  seen_thumb1_vfp = true;
+	}
+
+      return true;
+    }
+
+  if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
+    return false;
+
+  return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
+	  (TARGET_VFP_DOUBLE || !is_double));
+}
+
+static bool
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
+				       enum machine_mode mode, const_tree type,
+				       enum machine_mode *base_mode, int *count)
+{
+  enum machine_mode new_mode = VOIDmode;
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT
+      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      *count = 1;
+      new_mode = mode;
+    }
+  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+    {
+      *count = 2;
+      new_mode = (mode == DCmode ? DFmode : SFmode);
+    }
+  else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
+    {
+      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
+
+      if (ag_count > 0 && ag_count <= 4)
+	*count = ag_count;
+      else
+	return false;
+    }
+  else
+    return false;
+
+
+  if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
+    return false;
+
+  *base_mode = new_mode;
+  return true;
+}
+
+static bool
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
+			       enum machine_mode mode, const_tree type)
+{
+  int count ATTRIBUTE_UNUSED;
+  enum machine_mode ag_mode ATTRIBUTE_UNUSED;
+
+  if (!use_vfp_abi (pcs_variant, false))
+    return false;
+  return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+						&ag_mode, &count);
+}
+
+static bool
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 
+			     const_tree type)
+{
+  if (!use_vfp_abi (pcum->pcs_variant, false))
+    return false;
+
+  return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
+						&pcum->aapcs_vfp_rmode,
+						&pcum->aapcs_vfp_rcount);
+}
+
+static bool
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		    const_tree type  ATTRIBUTE_UNUSED)
+{
+  int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
+  unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
+  int regno;
+  
+  for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
+    if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
+      {
+	pcum->aapcs_vfp_reg_alloc = mask << regno;
+	if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+	  {
+	    int i;
+	    int rcount = pcum->aapcs_vfp_rcount;
+	    int rshift = shift;
+	    enum machine_mode rmode = pcum->aapcs_vfp_rmode;
+	    rtx par;
+	    if (!TARGET_NEON)
+	      {
+		/* Avoid using unsupported vector modes.  */
+		if (rmode == V2SImode)
+		  rmode = DImode;
+		else if (rmode == V4SImode)
+		  {
+		    rmode = DImode;
+		    rcount *= 2;
+		    rshift /= 2;
+		  }
+	      }
+	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
+	    for (i = 0; i < rcount; i++)
+	      {
+		rtx tmp = gen_rtx_REG (rmode, 
+				       FIRST_VFP_REGNUM + regno + i * rshift);
+		tmp = gen_rtx_EXPR_LIST
+		  (VOIDmode, tmp, 
+		   GEN_INT (i * GET_MODE_SIZE (rmode)));
+		XVECEXP (par, 0, i) = tmp;
+	      }
+
+	    pcum->aapcs_reg = par;
+	  }
+	else
+	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
+	return true;
+      }
+  return false;
+}
+
+static rtx
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
+			       enum machine_mode mode,
+			       const_tree type ATTRIBUTE_UNUSED)
+{
+  if (!use_vfp_abi (pcs_variant, false))
+    return false;
+
+  if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
+    {
+      int count;
+      enum machine_mode ag_mode;
+      int i;
+      rtx par;
+      int shift;
+      
+      aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
+					     &ag_mode, &count);
+
+      if (!TARGET_NEON)
+	{
+	  if (ag_mode == V2SImode)
+	    ag_mode = DImode;
+	  else if (ag_mode == V4SImode)
+	    {
+	      ag_mode = DImode;
+	      count *= 2;
+	    }
+	}
+      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
+      par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
+      for (i = 0; i < count; i++)
+	{
+	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
+	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, 
+				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
+	  XVECEXP (par, 0, i) = tmp;
+	}
+
+      return par;
+    }
+
+  return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
+}
+
+static void
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
+		   enum machine_mode mode  ATTRIBUTE_UNUSED,
+		   const_tree type  ATTRIBUTE_UNUSED)
+{
+  pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
+  pcum->aapcs_vfp_reg_alloc = 0;
+  return;
+}
+
+#define AAPCS_CP(X)				\
+  {						\
+    aapcs_ ## X ## _cum_init,			\
+    aapcs_ ## X ## _is_call_candidate,		\
+    aapcs_ ## X ## _allocate,			\
+    aapcs_ ## X ## _is_return_candidate,	\
+    aapcs_ ## X ## _allocate_return_reg,	\
+    aapcs_ ## X ## _advance			\
+  }
+
+/* Table of co-processors that can be used to pass arguments in
+   registers.  Idealy no arugment should be a candidate for more than
+   one co-processor table entry, but the table is processed in order
+   and stops after the first match.  If that entry then fails to put
+   the argument into a co-processor register, the argument will go on
+   the stack.  */
+static struct 
+{
+  /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
+  void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
+
+  /* Return true if an argument of mode MODE (or type TYPE if MODE is
+     BLKmode) is a candidate for this co-processor's registers; this
+     function should ignore any position-dependent state in
+     CUMULATIVE_ARGS and only use call-type dependent information.  */
+  bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+  /* Return true if the argument does get a co-processor register; it
+     should set aapcs_reg to an RTX of the register allocated as is
+     required for a return from FUNCTION_ARG.  */
+  bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+
+  /* Return true if a result of mode MODE (or type TYPE if MODE is
+     BLKmode) is can be returned in this co-processor's registers.  */
+  bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
+
+  /* Allocate and return an RTX element to hold the return type of a
+     call, this routine must not fail and will only be called if
+     is_return_candidate returned true with the same parameters.  */
+  rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
+
+  /* Finish processing this argument and prepare to start processing
+     the next one.  */
+  void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
+  {
+    AAPCS_CP(vfp)
+  };
+
+#undef AAPCS_CP
+
+static int
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 
+			  const_tree type)
+{
+  int i;
+
+  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+    if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
+      return i;
+
+  return -1;
+}
+
+static int
+aapcs_select_return_coproc (const_tree type, const_tree fntype)
+{
+  /* We aren't passed a decl, so we can't check that a call is local.
+     However, it isn't clear that that would be a win anyway, since it
+     might limit some tail-calling opportunities.  */
+  enum arm_pcs pcs_variant;
+
+  if (fntype)
+    {
+      const_tree fndecl = NULL_TREE;
+
+      if (TREE_CODE (fntype) == FUNCTION_DECL)
+	{
+	  fndecl = fntype;
+	  fntype = TREE_TYPE (fntype);
+	}
+
+      pcs_variant = arm_get_pcs_model (fntype, fndecl);
+    }
+  else
+    pcs_variant = arm_pcs_default;
+
+  if (pcs_variant != ARM_PCS_AAPCS)
+    {
+      int i;
+
+      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, 
+							TYPE_MODE (type),
+							type))
+	  return i;
+    }
+  return -1;
+}
+
+static rtx
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
+			   const_tree fntype)
+{
+  /* We aren't passed a decl, so we can't check that a call is local.
+     However, it isn't clear that that would be a win anyway, since it
+     might limit some tail-calling opportunities.  */
+  enum arm_pcs pcs_variant;
+  int unsignedp ATTRIBUTE_UNUSED;
+
+  if (fntype)
+    {
+      const_tree fndecl = NULL_TREE;
+
+      if (TREE_CODE (fntype) == FUNCTION_DECL)
+	{
+	  fndecl = fntype;
+	  fntype = TREE_TYPE (fntype);
+	}
+
+      pcs_variant = arm_get_pcs_model (fntype, fndecl);
+    }
+  else
+    pcs_variant = arm_pcs_default;
+
+  /* Promote integer types.  */
+  if (type && INTEGRAL_TYPE_P (type))
+    mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
+
+  if (pcs_variant != ARM_PCS_AAPCS)
+    {
+      int i;
+
+      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
+							type))
+	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
+							     mode, type);
+    }
+
+  /* Promotes small structs returned in a register to full-word size
+     for big-endian AAPCS.  */
+  if (type && arm_return_in_msb (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if (size % UNITS_PER_WORD != 0)
+	{
+	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	}
+    }
+
+  return gen_rtx_REG (mode, R0_REGNUM);
+}
+
+rtx
+aapcs_libcall_value (enum machine_mode mode)
+{
+  return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
+}
+
+/* Lay out a function argument using the AAPCS rules.  The rule
+   numbers referred to here are those in the AAPCS.  */
+static void
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		  const_tree type, bool named)
+{
+  int nregs, nregs2;
+  int ncrn;
+
+  /* We only need to do this once per argument.  */
+  if (pcum->aapcs_arg_processed)
+    return;
+
+  pcum->aapcs_arg_processed = true;
+
+  /* Special case: if named is false then we are handling an incoming
+     anonymous argument which is on the stack.  */
+  if (!named)
+    return;
+  
+  /* Is this a potential co-processor register candidate?  */
+  if (pcum->pcs_variant != ARM_PCS_AAPCS)
+    {
+      int slot = aapcs_select_call_coproc (pcum, mode, type);
+      pcum->aapcs_cprc_slot = slot;
+
+      /* We don't have to apply any of the rules from part B of the
+	 preparation phase, these are handled elsewhere in the
+	 compiler.  */
+
+      if (slot >= 0)
+	{
+	  /* A Co-processor register candidate goes either in its own
+	     class of registers or on the stack.  */
+	  if (!pcum->aapcs_cprc_failed[slot])
+	    {
+	      /* C1.cp - Try to allocate the argument to co-processor
+		 registers.  */
+	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
+		return;
+
+	      /* C2.cp - Put the argument on the stack and note that we
+		 can't assign any more candidates in this slot.  We also
+		 need to note that we have allocated stack space, so that
+		 we won't later try to split a non-cprc candidate between
+		 core registers and the stack.  */
+	      pcum->aapcs_cprc_failed[slot] = true;
+	      pcum->can_split = false;
+	    }
+
+	  /* We didn't get a register, so this argument goes on the
+	     stack.  */
+	  gcc_assert (pcum->can_split == false);
+	  return;
+	}
+    }
+
+  /* C3 - For double-word aligned arguments, round the NCRN up to the
+     next even number.  */
+  ncrn = pcum->aapcs_ncrn;
+  if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
+    ncrn++;
+
+  nregs = ARM_NUM_REGS2(mode, type);
+
+  /* Sigh, this test should really assert that nregs > 0, but a GCC
+     extension allows empty structs and then gives them empty size; it
+     then allows such a structure to be passed by value.  For some of
+     the code below we have to pretend that such an argument has
+     non-zero size so that we 'locate' it correctly either in
+     registers or on the stack.  */
+  gcc_assert (nregs >= 0);
+
+  nregs2 = nregs ? nregs : 1;
+
+  /* C4 - Argument fits entirely in core registers.  */
+  if (ncrn + nregs2 <= NUM_ARG_REGS)
+    {
+      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+      pcum->aapcs_next_ncrn = ncrn + nregs;
+      return;
+    }
+
+  /* C5 - Some core registers left and there are no arguments already
+     on the stack: split this argument between the remaining core
+     registers and the stack.  */
+  if (ncrn < NUM_ARG_REGS && pcum->can_split)
+    {
+      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
+      pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+      pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
+      return;
+    }
+
+  /* C6 - NCRN is set to 4.  */
+  pcum->aapcs_next_ncrn = NUM_ARG_REGS;
+
+  /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
+  return;
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is NULL.  */
+void
+arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
+			  rtx libname,
+			  tree fndecl ATTRIBUTE_UNUSED)
+{
+  /* Long call handling.  */
+  if (fntype)
+    pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
+  else
+    pcum->pcs_variant = arm_pcs_default;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      if (arm_libcall_uses_aapcs_base (libname))
+	pcum->pcs_variant = ARM_PCS_AAPCS;
+ 
+      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
+      pcum->aapcs_reg = NULL_RTX;
+      pcum->aapcs_partial = 0;
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_cprc_slot = -1;
+      pcum->can_split = true;
+
+      if (pcum->pcs_variant != ARM_PCS_AAPCS)
+	{
+	  int i;
+
+	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
+	    {
+	      pcum->aapcs_cprc_failed[i] = false;
+	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
+	    }
+	}
+      return;
+    }
+
+  /* Legacy ABIs */
+
+  /* On the ARM, the offset starts at 0.  */
+  pcum->nregs = 0;
+  pcum->iwmmxt_nregs = 0;
+  pcum->can_split = true;
+
+  /* Varargs vectors are treated the same as long long.
+     named_count avoids having to change the way arm handles 'named' */
+  pcum->named_count = 0;
+  pcum->nargs = 0;
+
+  if (TARGET_REALLY_IWMMXT && fntype)
+    {
+      tree fn_arg;
+
+      for (fn_arg = TYPE_ARG_TYPES (fntype);
+	   fn_arg;
+	   fn_arg = TREE_CHAIN (fn_arg))
+	pcum->named_count += 1;
+
+      if (! pcum->named_count)
+	pcum->named_count = INT_MAX;
+    }
+}
+
+
+/* Return true if mode/type need doubleword alignment.  */
+static bool
+arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
+{
+  return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
+	  || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
+}
+
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
+   other arguments are passed on the stack.  If (NAMED == 0) (which happens
+   only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
+   defined), say it is passed in the stack (function_prologue will
+   indeed make it pass in the stack if necessary).  */
+
+static rtx
+arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		  const_tree type, bool named)
+{
+  int nregs;
+
+  /* Handle the special case quickly.  Pick an arbitrary value for op2 of
+     a call insn (op3 of a call_value insn).  */
+  if (mode == VOIDmode)
+    return const0_rtx;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+      return pcum->aapcs_reg;
+    }
+
+  /* Varargs vectors are treated the same as long long.
+     named_count avoids having to change the way arm handles 'named' */
+  if (TARGET_IWMMXT_ABI
+      && arm_vector_mode_supported_p (mode)
+      && pcum->named_count > pcum->nargs + 1)
+    {
+      if (pcum->iwmmxt_nregs <= 9)
+	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
+      else
+	{
+	  pcum->can_split = false;
+	  return NULL_RTX;
+	}
+    }
+
+  /* Put doubleword aligned quantities in even register pairs.  */
+  if (pcum->nregs & 1
+      && ARM_DOUBLEWORD_ALIGN
+      && arm_needs_doubleword_align (mode, type))
+    pcum->nregs++;
+
+  /* Only allow splitting an arg between regs and memory if all preceding
+     args were allocated to regs.  For args passed by reference we only count
+     the reference pointer.  */
+  if (pcum->can_split)
+    nregs = 1;
+  else
+    nregs = ARM_NUM_REGS2 (mode, type);
+
+  if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
+    return NULL_RTX;
+
+  return gen_rtx_REG (mode, pcum->nregs);
+}
+
+/* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
+static HOST_WIDE_INT
+arm_vector_alignment (const_tree type)
+{
+  HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
+
+  if (TARGET_AAPCS_BASED)
+    align = MIN (align, 64);
+
+  return align;
+}
+
+static unsigned int
+arm_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
+	  ? DOUBLEWORD_ALIGNMENT
+	  : PARM_BOUNDARY);
+}
+
+static int
+arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+		       tree type, bool named)
+{
+  int nregs = pcum->nregs;
+
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+      return pcum->aapcs_partial;
+    }
+
+  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
+    return 0;
+
+  if (NUM_ARG_REGS > nregs
+      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
+      && pcum->can_split)
+    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+
+  return 0;
+}
+
+/* Update the data in PCUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
+			  const_tree type, bool named)
+{
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      aapcs_layout_arg (pcum, mode, type, named);
+
+      if (pcum->aapcs_cprc_slot >= 0)
+	{
+	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
+							      type);
+	  pcum->aapcs_cprc_slot = -1;
+	}
+
+      /* Generic stuff.  */
+      pcum->aapcs_arg_processed = false;
+      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
+      pcum->aapcs_reg = NULL_RTX;
+      pcum->aapcs_partial = 0;
+    }
+  else
+    {
+      pcum->nargs += 1;
+      if (arm_vector_mode_supported_p (mode)
+	  && pcum->named_count > pcum->nargs
+	  && TARGET_IWMMXT_ABI)
+	pcum->iwmmxt_nregs += 1;
+      else
+	pcum->nregs += ARM_NUM_REGS2 (mode, type);
+    }
+}
+
+/* Variable sized types are passed by reference.  This is a GCC
+   extension to the ARM ABI.  */
+
+static bool
+arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
+}
+
+/* Encode the current state of the #pragma [no_]long_calls.  */
+typedef enum
+{
+  OFF,		/* No #pragma [no_]long_calls is in effect.  */
+  LONG,		/* #pragma long_calls is in effect.  */
+  SHORT		/* #pragma no_long_calls is in effect.  */
+} arm_pragma_enum;
+
+static arm_pragma_enum arm_pragma_long_calls = OFF;
+
+void
+arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  arm_pragma_long_calls = LONG;
+}
+
+void
+arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  arm_pragma_long_calls = SHORT;
+}
+
+void
+arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  arm_pragma_long_calls = OFF;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL;
+   arguments as in struct attribute_spec.handler.  */
+static tree
+arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt" or "isr" attribute;
+   arguments as in struct attribute_spec.handler.  */
+static tree
+arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
+			  bool *no_add_attrs)
+{
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) != FUNCTION_DECL)
+	{
+	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
+		   name);
+	  *no_add_attrs = true;
+	}
+      /* FIXME: the argument if any is checked for type attributes;
+	 should it be checked for decl ones?  */
+    }
+  else
+    {
+      if (TREE_CODE (*node) == FUNCTION_TYPE
+	  || TREE_CODE (*node) == METHOD_TYPE)
+	{
+	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
+	    {
+	      warning (OPT_Wattributes, "%qE attribute ignored",
+		       name);
+	      *no_add_attrs = true;
+	    }
+	}
+      else if (TREE_CODE (*node) == POINTER_TYPE
+	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
+		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
+	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
+	{
+	  *node = build_variant_type_copy (*node);
+	  TREE_TYPE (*node) = build_type_attribute_variant
+	    (TREE_TYPE (*node),
+	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
+	  *no_add_attrs = true;
+	}
+      else
+	{
+	  /* Possibly pass this attribute on from the type to a decl.  */
+	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
+		       | (int) ATTR_FLAG_FUNCTION_NEXT
+		       | (int) ATTR_FLAG_ARRAY_NEXT))
+	    {
+	      *no_add_attrs = true;
+	      return tree_cons (name, args, NULL_TREE);
+	    }
+	  else
+	    {
+	      warning (OPT_Wattributes, "%qE attribute ignored",
+		       name);
+	    }
+	}
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "pcs" attribute; arguments as in struct
+   attribute_spec.handler.  */
+static tree
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
+			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored", name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+/* Handle the "notshared" attribute.  This attribute is another way of
+   requesting hidden visibility.  ARM's compiler supports
+   "__declspec(notshared)"; we support the same thing via an
+   attribute.  */
+
+static tree
+arm_handle_notshared_attribute (tree *node,
+				tree name ATTRIBUTE_UNUSED,
+				tree args ATTRIBUTE_UNUSED,
+				int flags ATTRIBUTE_UNUSED,
+				bool *no_add_attrs)
+{
+  tree decl = TYPE_NAME (*node);
+
+  if (decl)
+    {
+      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+      DECL_VISIBILITY_SPECIFIED (decl) = 1;
+      *no_add_attrs = false;
+    }
+  return NULL_TREE;
+}
+#endif
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible, and 2 if they are nearly compatible (which causes a
+   warning to be generated).  */
+static int
+arm_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  int l1, l2, s1, s2;
+
+  /* Check for mismatch of non-default calling convention.  */
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
+
+  /* Check for mismatched call attributes.  */
+  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
+
+  /* Only bother to check if an attribute is defined.  */
+  if (l1 | l2 | s1 | s2)
+    {
+      /* If one type has an attribute, the other must have the same attribute.  */
+      if ((l1 != l2) || (s1 != s2))
+	return 0;
+
+      /* Disallow mixed attributes.  */
+      if ((l1 & s2) || (l2 & s1))
+	return 0;
+    }
+
+  /* Check for mismatched ISR attribute.  */
+  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
+  if (! l1)
+    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
+  if (! l2)
+    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
+  if (l1 != l2)
+    return 0;
+
+  return 1;
+}
+
+/*  Assigns default attributes to newly defined type.  This is used to
+    set short_call/long_call attributes for function types of
+    functions defined inside corresponding #pragma scopes.  */
+static void
+arm_set_default_type_attributes (tree type)
+{
+  /* Add __attribute__ ((long_call)) to all functions, when
+     inside #pragma long_calls or __attribute__ ((short_call)),
+     when inside #pragma no_long_calls.  */
+  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
+    {
+      tree type_attr_list, attr_name;
+      type_attr_list = TYPE_ATTRIBUTES (type);
+
+      if (arm_pragma_long_calls == LONG)
+ 	attr_name = get_identifier ("long_call");
+      else if (arm_pragma_long_calls == SHORT)
+ 	attr_name = get_identifier ("short_call");
+      else
+ 	return;
+
+      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
+      TYPE_ATTRIBUTES (type) = type_attr_list;
+    }
+}
+
+/* Return true if DECL is known to be linked into section SECTION.  */
+
+static bool
+arm_function_in_section_p (tree decl, section *section)
+{
+  /* We can only be certain about functions defined in the same
+     compilation unit.  */
+  if (!TREE_STATIC (decl))
+    return false;
+
+  /* Make sure that SYMBOL always binds to the definition in this
+     compilation unit.  */
+  if (!targetm.binds_local_p (decl))
+    return false;
+
+  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
+  if (!DECL_SECTION_NAME (decl))
+    {
+      /* Make sure that we will not create a unique section for DECL.  */
+      if (flag_function_sections || DECL_ONE_ONLY (decl))
+	return false;
+    }
+
+  return function_section (decl) == section;
+}
+
+/* Return nonzero if a 32-bit "long_call" should be generated for
+   a call from the current function to DECL.  We generate a long_call
+   if the function:
+
+        a.  has an __attribute__((long call))
+     or b.  is within the scope of a #pragma long_calls
+     or c.  the -mlong-calls command line switch has been specified
+
+   However we do not generate a long call if the function:
+
+        d.  has an __attribute__ ((short_call))
+     or e.  is inside the scope of a #pragma no_long_calls
+     or f.  is defined in the same section as the current function.  */
+
+bool
+arm_is_long_call_p (tree decl)
+{
+  tree attrs;
+
+  if (!decl)
+    return TARGET_LONG_CALLS;
+
+  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  if (lookup_attribute ("short_call", attrs))
+    return false;
+
+  /* For "f", be conservative, and only cater for cases in which the
+     whole of the current function is placed in the same section.  */
+  if (!flag_reorder_blocks_and_partition
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && arm_function_in_section_p (decl, current_function_section ()))
+    return false;
+
+  if (lookup_attribute ("long_call", attrs))
+    return true;
+
+  return TARGET_LONG_CALLS;
+}
+
+/* Return nonzero if it is ok to make a tail-call to DECL.  */
+static bool
+arm_function_ok_for_sibcall (tree decl, tree exp)
+{
+  unsigned long func_type;
+
+  if (cfun->machine->sibcall_blocked)
+    return false;
+
+  /* Never tailcall something for which we have no decl, or if we
+     are generating code for Thumb-1.  */
+  if (decl == NULL || TARGET_THUMB1)
+    return false;
+
+  /* The PIC register is live on entry to VxWorks PLT entries, so we
+     must make the call before restoring the PIC register.  */
+  if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
+    return false;
+
+  /* Cannot tail-call to long calls, since these are out of range of
+     a branch instruction.  */
+  if (arm_is_long_call_p (decl))
+    return false;
+
+  /* If we are interworking and the function is not declared static
+     then we can't tail-call it unless we know that it exists in this
+     compilation unit (since it might be a Thumb routine).  */
+  if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
+    return false;
+
+  func_type = arm_current_func_type ();
+  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
+  if (IS_INTERRUPT (func_type))
+    return false;
+
+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a VFP register but then need to transfer it to a core
+	 register.  */
+      rtx a, b;
+
+      a = arm_function_value (TREE_TYPE (exp), decl, false);
+      b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+			      cfun->decl, false);
+      if (!rtx_equal_p (a, b))
+	return false;
+    }
+
+  /* Never tailcall if function may be called with a misaligned SP.  */
+  if (IS_STACKALIGN (func_type))
+    return false;
+
+  /* The AAPCS says that, on bare-metal, calls to unresolved weak
+     references should become a NOP.  Don't convert such calls into
+     sibling calls.  */
+  if (TARGET_AAPCS_BASED
+      && arm_abi == ARM_ABI_AAPCS
+      && DECL_WEAK (decl))
+    return false;
+
+  /* Everything else is ok.  */
+  return true;
+}
+
+
+/* Addressing mode support functions.  */
+
+/* Return nonzero if X is a legitimate immediate operand when compiling
+   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
+int
+legitimate_pic_operand_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      || (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
+    return 0;
+
+  return 1;
+}
+
+/* Record that the current function needs a PIC register.  Initialize
+   cfun->machine->pic_reg if we have not already done so.  */
+
+static void
+require_pic_register (void)
+{
+  /* A lot of the logic here is made obscure by the fact that this
+     routine gets called as part of the rtx cost estimation process.
+     We don't want those calls to affect any assumptions about the real
+     function; and further, we can't call entry_of_function() until we
+     start the real expansion process.  */
+  if (!crtl->uses_pic_offset_table)
+    {
+      gcc_assert (can_create_pseudo_p ());
+      if (arm_pic_register != INVALID_REGNUM)
+	{
+	  if (!cfun->machine->pic_reg)
+	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
+
+	  /* Play games to avoid marking the function as needing pic
+	     if we are being called as part of the cost-estimation
+	     process.  */
+	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+	    crtl->uses_pic_offset_table = 1;
+	}
+      else
+	{
+	  rtx seq, insn;
+
+	  if (!cfun->machine->pic_reg)
+	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+
+	  /* Play games to avoid marking the function as needing pic
+	     if we are being called as part of the cost-estimation
+	     process.  */
+	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+	    {
+	      crtl->uses_pic_offset_table = 1;
+	      start_sequence ();
+
+	      arm_load_pic_register (0UL);
+
+	      seq = get_insns ();
+	      end_sequence ();
+
+	      for (insn = seq; insn; insn = NEXT_INSN (insn))
+		if (INSN_P (insn))
+		  INSN_LOCATOR (insn) = prologue_locator;
+
+	      /* We can be called during expansion of PHI nodes, where
+	         we can't yet emit instructions directly in the final
+		 insn stream.  Queue the insns on the entry edge, they will
+		 be committed after everything else is expanded.  */
+	      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
+	    }
+	}
+    }
+}
+
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+{
+  if (GET_CODE (orig) == SYMBOL_REF
+      || GET_CODE (orig) == LABEL_REF)
+    {
+      rtx insn;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      /* VxWorks does not impose a fixed gap between segments; the run-time
+	 gap can be different from the object-file gap.  We therefore can't
+	 use GOTOFF unless we are absolutely sure that the symbol is in the
+	 same segment as the GOT.  Unfortunately, the flexibility of linker
+	 scripts means that we can't be sure of that in general, so assume
+	 that GOTOFF is never valid on VxWorks.  */
+      if ((GET_CODE (orig) == LABEL_REF
+	   || (GET_CODE (orig) == SYMBOL_REF &&
+	       SYMBOL_REF_LOCAL_P (orig)))
+	  && NEED_GOT_RELOC
+	  && !TARGET_VXWORKS_RTP)
+	insn = arm_pic_static_addr (orig, reg);
+      else
+	{
+	  rtx pat;
+	  rtx mem;
+
+	  /* If this function doesn't have a pic register, create one now.  */
+	  require_pic_register ();
+
+	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
+
+	  /* Make the MEM as close to a constant as possible.  */
+	  mem = SET_SRC (pat);
+	  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
+	  MEM_READONLY_P (mem) = 1;
+	  MEM_NOTRAP_P (mem) = 1;
+
+	  insn = emit_insn (pat);
+	}
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized
+	 by loop.  */
+      set_unique_reg_note (insn, REG_EQUAL, orig);
+
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
+	return orig;
+
+      /* Handle the case where we have: const (UNSPEC_TLS).  */
+      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
+	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
+	return orig;
+
+      /* Handle the case where we have:
+         const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
+         CONST_INT.  */
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+          && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
+          && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
+        {
+	  gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
+	  return orig;
+	}
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				       base == reg ? 0 : reg);
+
+      if (GET_CODE (offset) == CONST_INT)
+	{
+	  /* The base register doesn't really matter, we only want to
+	     test the index for the appropriate mode.  */
+	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
+	    {
+	      gcc_assert (can_create_pseudo_p ());
+	      offset = force_reg (Pmode, offset);
+	    }
+
+	  if (GET_CODE (offset) == CONST_INT)
+	    return plus_constant (base, INTVAL (offset));
+	}
+
+      if (GET_MODE_SIZE (mode) > 4
+	  && (GET_MODE_CLASS (mode) == MODE_INT
+	      || TARGET_SOFT_FLOAT))
+	{
+	  emit_insn (gen_addsi3 (reg, base, offset));
+	  return reg;
+	}
+
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  return orig;
+}
+
+
+/* Find a spare register to use during the prolog of a function.  */
+
+static int
+thumb_find_work_register (unsigned long pushed_regs_mask)
+{
+  int reg;
+
+  /* Check the argument registers first as these are call-used.  The
+     register allocation order means that sometimes r3 might be used
+     but earlier argument registers might not, so check them all.  */
+  for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
+    if (!df_regs_ever_live_p (reg))
+      return reg;
+
+  /* Before going on to check the call-saved registers we can try a couple
+     more ways of deducing that r3 is available.  The first is when we are
+     pushing anonymous arguments onto the stack and we have less than 4
+     registers worth of fixed arguments(*).  In this case r3 will be part of
+     the variable argument list and so we can be sure that it will be
+     pushed right at the start of the function.  Hence it will be available
+     for the rest of the prologue.
+     (*): ie crtl->args.pretend_args_size is greater than 0.  */
+  if (cfun->machine->uses_anonymous_args
+      && crtl->args.pretend_args_size > 0)
+    return LAST_ARG_REGNUM;
+
+  /* The other case is when we have fixed arguments but less than 4 registers
+     worth.  In this case r3 might be used in the body of the function, but
+     it is not being used to convey an argument into the function.  In theory
+     we could just check crtl->args.size to see how many bytes are
+     being passed in argument registers, but it seems that it is unreliable.
+     Sometimes it will have the value 0 when in fact arguments are being
+     passed.  (See testcase execute/20021111-1.c for an example).  So we also
+     check the args_info.nregs field as well.  The problem with this field is
+     that it makes no allowances for arguments that are passed to the
+     function but which are not used.  Hence we could miss an opportunity
+     when a function has an unused argument in r3.  But it is better to be
+     safe than to be sorry.  */
+  if (! cfun->machine->uses_anonymous_args
+      && crtl->args.size >= 0
+      && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
+      && crtl->args.info.nregs < 4)
+    return LAST_ARG_REGNUM;
+
+  /* Otherwise look for a call-saved register that is going to be pushed.  */
+  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
+    if (pushed_regs_mask & (1 << reg))
+      return reg;
+
+  if (TARGET_THUMB2)
+    {
+      /* Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
+	if (pushed_regs_mask & (1 << reg))
+	  return reg;
+    }
+  /* Something went wrong - thumb_compute_save_reg_mask()
+     should have arranged for a suitable register to be pushed.  */
+  gcc_unreachable ();
+}
+
+static GTY(()) int pic_labelno;
+
+/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
+   low register.  */
+
+void
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+{
+  rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+
+  if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
+    return;
+
+  gcc_assert (flag_pic);
+
+  pic_reg = cfun->machine->pic_reg;
+  if (TARGET_VXWORKS_RTP)
+    {
+      pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+      emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
+
+      emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
+
+      pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
+    }
+  else
+    {
+      /* We use an UNSPEC rather than a LABEL_REF because this label
+	 never appears in the code stream.  */
+
+      labelno = GEN_INT (pic_labelno++);
+      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+      l1 = gen_rtx_CONST (VOIDmode, l1);
+
+      /* On the ARM the PC register contains 'dot + 8' at the time of the
+	 addition, on the Thumb it is 'dot + 4'.  */
+      pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
+      pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
+				UNSPEC_GOTSYM_OFF);
+      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
+
+      if (TARGET_32BIT)
+	{
+	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
+	}
+      else /* TARGET_THUMB1 */
+	{
+	  if (arm_pic_register != INVALID_REGNUM
+	      && REGNO (pic_reg) > LAST_LO_REGNUM)
+	    {
+	      /* We will have pushed the pic register, so we should always be
+		 able to find a work register.  */
+	      pic_tmp = gen_rtx_REG (SImode,
+				     thumb_find_work_register (saved_regs));
+	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
+	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
+	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
+	    }
+	  else
+	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
+	}
+    }
+
+  /* Need to emit this whether or not we obey regdecls,
+     since setjmp/longjmp can cause life info to screw up.  */
+  emit_use (pic_reg);
+}
+
+/* Generate code to load the address of a static var when flag_pic is set.  */
+static rtx
+arm_pic_static_addr (rtx orig, rtx reg)
+{
+  rtx l1, labelno, offset_rtx, insn;
+
+  gcc_assert (flag_pic);
+
+  /* We use an UNSPEC rather than a LABEL_REF because this label
+     never appears in the code stream.  */
+  labelno = GEN_INT (pic_labelno++);
+  l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+  l1 = gen_rtx_CONST (VOIDmode, l1);
+
+  /* On the ARM the PC register contains 'dot + 8' at the time of the
+     addition, on the Thumb it is 'dot + 4'.  */
+  offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
+  offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
+                               UNSPEC_SYMBOL_OFFSET);
+  offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
+
+  insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
+  return insn;
+}
+
+/* Return nonzero if X is valid as an ARM state addressing register.  */
+static int
+arm_address_register_rtx_p (rtx x, int strict_p)
+{
+  int regno;
+
+  if (GET_CODE (x) != REG)
+    return 0;
+
+  regno = REGNO (x);
+
+  if (strict_p)
+    return ARM_REGNO_OK_FOR_BASE_P (regno);
+
+  return (regno <= LAST_ARM_REGNUM
+	  || regno >= FIRST_PSEUDO_REGISTER
+	  || regno == FRAME_POINTER_REGNUM
+	  || regno == ARG_POINTER_REGNUM);
+}
+
+/* Return TRUE if this rtx is the difference of a symbol and a label,
+   and will reduce to a PC-relative relocation in the object file.
+   Expressions like this can be left alone when generating PIC, rather
+   than forced through the GOT.  */
+static int
+pcrel_constant_p (rtx x)
+{
+  if (GET_CODE (x) == MINUS)
+    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
+
+  return FALSE;
+}
+
+/* Return true if X will surely end up in an index register after next
+   splitting pass.  */
+static bool
+will_be_in_index_register (const_rtx x)
+{
+  /* arm.md: calculate_pic_address will split this into a register.  */
+  return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
+}
+
+/* Return nonzero if X is a valid ARM state address operand.  */
+int
+arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
+			        int strict_p)
+{
+  bool use_ldrd;
+  enum rtx_code code = GET_CODE (x);
+
+  if (arm_address_register_rtx_p (x, strict_p))
+    return 1;
+
+  use_ldrd = (TARGET_LDRD
+	      && (mode == DImode
+		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+
+  if (code == POST_INC || code == PRE_DEC
+      || ((code == PRE_INC || code == POST_DEC)
+	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
+    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+
+  else if ((code == POST_MODIFY || code == PRE_MODIFY)
+	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
+	   && GET_CODE (XEXP (x, 1)) == PLUS
+	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+    {
+      rtx addend = XEXP (XEXP (x, 1), 1);
+
+      /* Don't allow ldrd post increment by register because it's hard
+	 to fixup invalid register choices.  */
+      if (use_ldrd
+	  && GET_CODE (x) == POST_MODIFY
+	  && GET_CODE (addend) == REG)
+	return 0;
+
+      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
+	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
+    }
+
+  /* After reload constants split into minipools will have addresses
+     from a LABEL_REF.  */
+  else if (reload_completed
+	   && (code == LABEL_REF
+	       || (code == CONST
+		   && GET_CODE (XEXP (x, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
+    return 1;
+
+  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+    return 0;
+
+  else if (code == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      return ((arm_address_register_rtx_p (xop0, strict_p)
+	       && ((GET_CODE(xop1) == CONST_INT
+		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
+		   || (!strict_p && will_be_in_index_register (xop1))))
+	      || (arm_address_register_rtx_p (xop1, strict_p)
+		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
+    }
+
+#if 0
+  /* Reload currently can't handle MINUS, so disable this for now */
+  else if (GET_CODE (x) == MINUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      return (arm_address_register_rtx_p (xop0, strict_p)
+	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
+    }
+#endif
+
+  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	   && code == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x)
+	   && ! (flag_pic
+		 && symbol_mentioned_p (get_pool_constant (x))
+		 && ! pcrel_constant_p (get_pool_constant (x))))
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if X is a valid Thumb-2 address operand.  */
+static int
+thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+  bool use_ldrd;
+  enum rtx_code code = GET_CODE (x);
+  
+  if (arm_address_register_rtx_p (x, strict_p))
+    return 1;
+
+  use_ldrd = (TARGET_LDRD
+	      && (mode == DImode
+		  || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
+
+  if (code == POST_INC || code == PRE_DEC
+      || ((code == PRE_INC || code == POST_DEC)
+	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
+    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
+
+  else if ((code == POST_MODIFY || code == PRE_MODIFY)
+	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
+	   && GET_CODE (XEXP (x, 1)) == PLUS
+	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+    {
+      /* Thumb-2 only has autoincrement by constant.  */
+      rtx addend = XEXP (XEXP (x, 1), 1);
+      HOST_WIDE_INT offset;
+
+      if (GET_CODE (addend) != CONST_INT)
+	return 0;
+
+      offset = INTVAL(addend);
+      if (GET_MODE_SIZE (mode) <= 4)
+	return (offset > -256 && offset < 256);
+      
+      return (use_ldrd && offset > -1024 && offset < 1024
+	      && (offset & 3) == 0);
+    }
+
+  /* After reload constants split into minipools will have addresses
+     from a LABEL_REF.  */
+  else if (reload_completed
+	   && (code == LABEL_REF
+	       || (code == CONST
+		   && GET_CODE (XEXP (x, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
+    return 1;
+
+  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
+    return 0;
+
+  else if (code == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      return ((arm_address_register_rtx_p (xop0, strict_p)
+	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
+		   || (!strict_p && will_be_in_index_register (xop1))))
+	      || (arm_address_register_rtx_p (xop1, strict_p)
+		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
+    }
+
+  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	   && code == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x)
+	   && ! (flag_pic
+		 && symbol_mentioned_p (get_pool_constant (x))
+		 && ! pcrel_constant_p (get_pool_constant (x))))
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if INDEX is valid for an address index operand in
+   ARM state.  */
+static int
+arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
+			int strict_p)
+{
+  HOST_WIDE_INT range;
+  enum rtx_code code = GET_CODE (index);
+
+  /* Standard coprocessor addressing modes.  */
+  if (TARGET_HARD_FLOAT
+      && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
+      && (mode == SFmode || mode == DFmode
+	  || (TARGET_MAVERICK && mode == DImode)))
+    return (code == CONST_INT && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  /* For quad modes, we restrict the constant offset to be slightly less
+     than what the instruction format permits.  We do this because for
+     quad mode moves, we will actually decompose them into two separate
+     double-mode reads or writes.  INDEX must therefore be a valid
+     (double-mode) offset and so should INDEX+8.  */
+  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1016
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  /* We have no such constraint on double mode offsets, so we permit the
+     full range of the instruction format.  */
+  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+
+	  if (TARGET_LDRD)
+	    return val > -256 && val < 256;
+	  else
+	    return val > -4096 && val < 4092;
+	}
+
+      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
+    }
+
+  if (GET_MODE_SIZE (mode) <= 4
+      && ! (arm_arch4
+	    && (mode == HImode
+		|| mode == HFmode
+		|| (mode == QImode && outer == SIGN_EXTEND))))
+    {
+      if (code == MULT)
+	{
+	  rtx xiop0 = XEXP (index, 0);
+	  rtx xiop1 = XEXP (index, 1);
+
+	  return ((arm_address_register_rtx_p (xiop0, strict_p)
+		   && power_of_two_operand (xiop1, SImode))
+		  || (arm_address_register_rtx_p (xiop1, strict_p)
+		      && power_of_two_operand (xiop0, SImode)));
+	}
+      else if (code == LSHIFTRT || code == ASHIFTRT
+	       || code == ASHIFT || code == ROTATERT)
+	{
+	  rtx op = XEXP (index, 1);
+
+	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
+		  && GET_CODE (op) == CONST_INT
+		  && INTVAL (op) > 0
+		  && INTVAL (op) <= 31);
+	}
+    }
+
+  /* For ARM v4 we may be doing a sign-extend operation during the
+     load.  */
+  if (arm_arch4)
+    {
+      if (mode == HImode
+	  || mode == HFmode
+	  || (outer == SIGN_EXTEND && mode == QImode))
+	range = 256;
+      else
+	range = 4096;
+    }
+  else
+    range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
+
+  return (code == CONST_INT
+	  && INTVAL (index) < range
+	  && INTVAL (index) > -range);
+}
+
+/* Return true if OP is a valid index scaling factor for Thumb-2 address
+   index operand.  i.e. 1, 2, 4 or 8.  */
+static bool
+thumb2_index_mul_operand (rtx op)
+{
+  HOST_WIDE_INT val;
+  
+  if (GET_CODE(op) != CONST_INT)
+    return false;
+
+  val = INTVAL(op);
+  return (val == 1 || val == 2 || val == 4 || val == 8);
+}
+  
+/* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
+static int
+thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
+{
+  enum rtx_code code = GET_CODE (index);
+
+  /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
+  /* Standard coprocessor addressing modes.  */
+  if (TARGET_HARD_FLOAT
+      && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
+      && (mode == SFmode || mode == DFmode
+	  || (TARGET_MAVERICK && mode == DImode)))
+    return (code == CONST_INT && INTVAL (index) < 1024
+	    /* Thumb-2 allows only > -256 index range for it's core register
+	       load/stores. Since we allow SF/DF in core registers, we have
+	       to use the intersection between -256~4096 (core) and -1024~1024
+	       (coprocessor).  */
+	    && INTVAL (index) > -256
+	    && (INTVAL (index) & 3) == 0);
+
+  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+    {
+      /* For DImode assume values will usually live in core regs
+	 and only allow LDRD addressing modes.  */
+      if (!TARGET_LDRD || mode != DImode)
+	return (code == CONST_INT
+		&& INTVAL (index) < 1024
+		&& INTVAL (index) > -1024
+		&& (INTVAL (index) & 3) == 0);
+    }
+
+  /* For quad modes, we restrict the constant offset to be slightly less
+     than what the instruction format permits.  We do this because for
+     quad mode moves, we will actually decompose them into two separate
+     double-mode reads or writes.  INDEX must therefore be a valid
+     (double-mode) offset and so should INDEX+8.  */
+  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1016
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  /* We have no such constraint on double mode offsets, so we permit the
+     full range of the instruction format.  */
+  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
+    return (code == CONST_INT
+	    && INTVAL (index) < 1024
+	    && INTVAL (index) > -1024
+	    && (INTVAL (index) & 3) == 0);
+
+  if (arm_address_register_rtx_p (index, strict_p)
+      && (GET_MODE_SIZE (mode) <= 4))
+    return 1;
+
+  if (mode == DImode || mode == DFmode)
+    {
+      if (code == CONST_INT)
+	{
+	  HOST_WIDE_INT val = INTVAL (index);
+	  /* ??? Can we assume ldrd for thumb2?  */
+	  /* Thumb-2 ldrd only has reg+const addressing modes.  */
+	  /* ldrd supports offsets of +-1020.
+	     However the ldr fallback does not.  */
+	  return val > -256 && val < 256 && (val & 3) == 0;
+	}
+      else
+	return 0;
+    }
+
+  if (code == MULT)
+    {
+      rtx xiop0 = XEXP (index, 0);
+      rtx xiop1 = XEXP (index, 1);
+
+      return ((arm_address_register_rtx_p (xiop0, strict_p)
+	       && thumb2_index_mul_operand (xiop1))
+	      || (arm_address_register_rtx_p (xiop1, strict_p)
+		  && thumb2_index_mul_operand (xiop0)));
+    }
+  else if (code == ASHIFT)
+    {
+      rtx op = XEXP (index, 1);
+
+      return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
+	      && GET_CODE (op) == CONST_INT
+	      && INTVAL (op) > 0
+	      && INTVAL (op) <= 3);
+    }
+
+  return (code == CONST_INT
+	  && INTVAL (index) < 4096
+	  && INTVAL (index) > -256);
+}
+
+/* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
+static int
+thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
+{
+  int regno;
+
+  if (GET_CODE (x) != REG)
+    return 0;
+
+  regno = REGNO (x);
+
+  if (strict_p)
+    return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
+
+  return (regno <= LAST_LO_REGNUM
+	  || regno > LAST_VIRTUAL_REGISTER
+	  || regno == FRAME_POINTER_REGNUM
+	  || (GET_MODE_SIZE (mode) >= 4
+	      && (regno == STACK_POINTER_REGNUM
+		  || regno >= FIRST_PSEUDO_REGISTER
+		  || x == hard_frame_pointer_rtx
+		  || x == arg_pointer_rtx)));
+}
+
+/* Return nonzero if x is a legitimate index register.  This is the case
+   for any base register that can access a QImode object.  */
+inline static int
+thumb1_index_register_rtx_p (rtx x, int strict_p)
+{
+  return thumb1_base_register_rtx_p (x, QImode, strict_p);
+}
+
+/* Return nonzero if x is a legitimate 16-bit Thumb-state address.
+
+   The AP may be eliminated to either the SP or the FP, so we use the
+   least common denominator, e.g. SImode, and offsets from 0 to 64.
+
+   ??? Verify whether the above is the right approach.
+
+   ??? Also, the FP may be eliminated to the SP, so perhaps that
+   needs special handling also.
+
+   ??? Look at how the mips16 port solves this problem.  It probably uses
+   better ways to solve some of these problems.
+
+   Although it is not incorrect, we don't accept QImode and HImode
+   addresses based on the frame pointer or arg pointer until the
+   reload pass starts.  This is so that eliminating such addresses
+   into stack based ones won't produce impossible code.  */
+static int
+thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+  /* ??? Not clear if this is right.  Experiment.  */
+  if (GET_MODE_SIZE (mode) < 4
+      && !(reload_in_progress || reload_completed)
+      && (reg_mentioned_p (frame_pointer_rtx, x)
+	  || reg_mentioned_p (arg_pointer_rtx, x)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
+    return 0;
+
+  /* Accept any base register.  SP only in SImode or larger.  */
+  else if (thumb1_base_register_rtx_p (x, mode, strict_p))
+    return 1;
+
+  /* This is PC relative data before arm_reorg runs.  */
+  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
+	   && GET_CODE (x) == SYMBOL_REF
+           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
+    return 1;
+
+  /* This is PC relative data after arm_reorg runs.  */
+  else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
+	   && reload_completed
+	   && (GET_CODE (x) == LABEL_REF
+	       || (GET_CODE (x) == CONST
+		   && GET_CODE (XEXP (x, 0)) == PLUS
+		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
+    return 1;
+
+  /* Post-inc indexing only supported for SImode and larger.  */
+  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
+	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
+    return 1;
+
+  else if (GET_CODE (x) == PLUS)
+    {
+      /* REG+REG address can be any two index registers.  */
+      /* We disallow FRAME+REG addressing since we know that FRAME
+	 will be replaced with STACK, and SP relative addressing only
+	 permits SP+OFFSET.  */
+      if (GET_MODE_SIZE (mode) <= 4
+	  && XEXP (x, 0) != frame_pointer_rtx
+	  && XEXP (x, 1) != frame_pointer_rtx
+	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
+	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
+	return 1;
+
+      /* REG+const has 5-7 bit offset for non-SP registers.  */
+      else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
+		|| XEXP (x, 0) == arg_pointer_rtx)
+	       && GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+	return 1;
+
+      /* REG+const has 10-bit offset for SP, but only SImode and
+	 larger is supported.  */
+      /* ??? Should probably check for DI/DFmode overflow here
+	 just like GO_IF_LEGITIMATE_OFFSET does.  */
+      else if (GET_CODE (XEXP (x, 0)) == REG
+	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
+	       && GET_MODE_SIZE (mode) >= 4
+	       && GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && INTVAL (XEXP (x, 1)) >= 0
+	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
+	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
+	return 1;
+
+      else if (GET_CODE (XEXP (x, 0)) == REG
+	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
+		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
+		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
+		       && REGNO (XEXP (x, 0))
+			  <= LAST_VIRTUAL_POINTER_REGISTER))
+	       && GET_MODE_SIZE (mode) >= 4
+	       && GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
+	return 1;
+    }
+
+  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
+	   && GET_MODE_SIZE (mode) == 4
+	   && GET_CODE (x) == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (x)
+	   && ! (flag_pic
+		 && symbol_mentioned_p (get_pool_constant (x))
+		 && ! pcrel_constant_p (get_pool_constant (x))))
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if VAL can be used as an offset in a Thumb-state address
+   instruction of mode MODE.  */
+int
+thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
+{
+  switch (GET_MODE_SIZE (mode))
+    {
+    case 1:
+      return val >= 0 && val < 32;
+
+    case 2:
+      return val >= 0 && val < 64 && (val & 1) == 0;
+
+    default:
+      return (val >= 0
+	      && (val + GET_MODE_SIZE (mode)) <= 128
+	      && (val & 3) == 0);
+    }
+}
+
+bool
+arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  if (TARGET_ARM)
+    return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
+  else if (TARGET_THUMB2)
+    return thumb2_legitimate_address_p (mode, x, strict_p);
+  else /* if (TARGET_THUMB1) */
+    return thumb1_legitimate_address_p (mode, x, strict_p);
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+static rtx
+arm_load_tp (rtx target)
+{
+  if (!target)
+    target = gen_reg_rtx (SImode);
+
+  if (TARGET_HARD_TP)
+    {
+      /* Can return in any reg.  */
+      emit_insn (gen_load_tp_hard (target));
+    }
+  else
+    {
+      /* Always returned in r0.  Immediately copy the result into a pseudo,
+	 otherwise other uses of r0 (e.g. setting up function arguments) may
+	 clobber the value.  */
+
+      rtx tmp;
+
+      emit_insn (gen_load_tp_soft ());
+
+      tmp = gen_rtx_REG (SImode, 0);
+      emit_move_insn (target, tmp);
+    }
+  return target;
+}
+
+static rtx
+load_tls_operand (rtx x, rtx reg)
+{
+  rtx tmp;
+
+  if (reg == NULL_RTX)
+    reg = gen_reg_rtx (SImode);
+
+  tmp = gen_rtx_CONST (SImode, x);
+
+  emit_move_insn (reg, tmp);
+
+  return reg;
+}
+
+static rtx
+arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
+{
+  rtx insns, label, labelno, sum;
+
+  start_sequence ();
+
+  labelno = GEN_INT (pic_labelno++);
+  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+  label = gen_rtx_CONST (VOIDmode, label);
+
+  sum = gen_rtx_UNSPEC (Pmode,
+			gen_rtvec (4, x, GEN_INT (reloc), label,
+				   GEN_INT (TARGET_ARM ? 8 : 4)),
+			UNSPEC_TLS);
+  reg = load_tls_operand (sum, reg);
+
+  if (TARGET_ARM)
+    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
+  else if (TARGET_THUMB2)
+    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+  else /* TARGET_THUMB1 */
+    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+
+  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST?  */
+				     Pmode, 1, reg, Pmode);
+
+  insns = get_insns ();
+  end_sequence ();
+
+  return insns;
+}
+
+rtx
+legitimize_tls_address (rtx x, rtx reg)
+{
+  rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
+  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
+      dest = gen_reg_rtx (Pmode);
+      emit_libcall_block (insns, dest, ret, x);
+      return dest;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
+
+      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	 share the LDM result with other LD model accesses.  */
+      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
+			    UNSPEC_TLS);
+      dest = gen_reg_rtx (Pmode);
+      emit_libcall_block (insns, dest, ret, eqv);
+
+      /* Load the addend.  */
+      addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
+			       UNSPEC_TLS);
+      addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
+      return gen_rtx_PLUS (Pmode, dest, addend);
+
+    case TLS_MODEL_INITIAL_EXEC:
+      labelno = GEN_INT (pic_labelno++);
+      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
+      label = gen_rtx_CONST (VOIDmode, label);
+      sum = gen_rtx_UNSPEC (Pmode,
+			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
+				       GEN_INT (TARGET_ARM ? 8 : 4)),
+			    UNSPEC_TLS);
+      reg = load_tls_operand (sum, reg);
+
+      if (TARGET_ARM)
+	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
+      else if (TARGET_THUMB2)
+	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
+      else
+	{
+	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
+	  emit_move_insn (reg, gen_const_mem (SImode, reg));
+	}
+
+      tp = arm_load_tp (NULL_RTX);
+
+      return gen_rtx_PLUS (Pmode, tp, reg);
+
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = arm_load_tp (NULL_RTX);
+
+      reg = gen_rtx_UNSPEC (Pmode,
+			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
+			    UNSPEC_TLS);
+      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
+
+      return gen_rtx_PLUS (Pmode, tp, reg);
+
+    default:
+      abort ();
+    }
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.  */
+rtx
+arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+{
+  if (!TARGET_ARM)
+    {
+      /* TODO: legitimize_address for Thumb2.  */
+      if (TARGET_THUMB2)
+        return x;
+      return thumb_legitimize_address (x, orig_x, mode);
+    }
+
+  if (arm_tls_symbol_p (x))
+    return legitimize_tls_address (x, NULL_RTX);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
+	xop0 = force_reg (SImode, xop0);
+
+      if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
+	xop1 = force_reg (SImode, xop1);
+
+      if (ARM_BASE_REGISTER_RTX_P (xop0)
+	  && GET_CODE (xop1) == CONST_INT)
+	{
+	  HOST_WIDE_INT n, low_n;
+	  rtx base_reg, val;
+	  n = INTVAL (xop1);
+
+	  /* VFP addressing modes actually allow greater offsets, but for
+	     now we just stick with the lowest common denominator.  */
+	  if (mode == DImode
+	      || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
+	    {
+	      low_n = n & 0x0f;
+	      n &= ~0x0f;
+	      if (low_n > 4)
+		{
+		  n += 16;
+		  low_n -= 16;
+		}
+	    }
+	  else
+	    {
+	      low_n = ((mode) == TImode ? 0
+		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
+	      n -= low_n;
+	    }
+
+	  base_reg = gen_reg_rtx (SImode);
+	  val = force_operand (plus_constant (xop0, n), NULL_RTX);
+	  emit_move_insn (base_reg, val);
+	  x = plus_constant (base_reg, low_n);
+	}
+      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
+	x = gen_rtx_PLUS (SImode, xop0, xop1);
+    }
+
+  /* XXX We don't allow MINUS any more -- see comment in
+     arm_legitimate_address_outer_p ().  */
+  else if (GET_CODE (x) == MINUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (CONSTANT_P (xop0))
+	xop0 = force_reg (SImode, xop0);
+
+      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
+	xop1 = force_reg (SImode, xop1);
+
+      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
+	x = gen_rtx_MINUS (SImode, xop0, xop1);
+    }
+
+  /* Make sure to take full advantage of the pre-indexed addressing mode
+     with absolute addresses which often allows for the base register to
+     be factorized for multiple adjacent memory references, and it might
+     even allows for the mini pool to be avoided entirely. */
+  else if (GET_CODE (x) == CONST_INT && optimize > 0)
+    {
+      unsigned int bits;
+      HOST_WIDE_INT mask, base, index;
+      rtx base_reg;
+
+      /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
+         use a 8-bit index. So let's use a 12-bit index for SImode only and
+         hope that arm_gen_constant will enable ldrb to use more bits. */
+      bits = (mode == SImode) ? 12 : 8;
+      mask = (1 << bits) - 1;
+      base = INTVAL (x) & ~mask;
+      index = INTVAL (x) & mask;
+      if (bit_count (base & 0xffffffff) > (32 - bits)/2)
+        {
+	  /* It'll most probably be more efficient to generate the base
+	     with more bits set and use a negative index instead. */
+	  base |= mask;
+	  index -= mask;
+	}
+      base_reg = force_reg (SImode, GEN_INT (base));
+      x = plus_constant (base_reg, index);
+    }
+
+  if (flag_pic)
+    {
+      /* We need to find and carefully transform any SYMBOL and LABEL
+	 references; so go back to the original address expression.  */
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+
+      if (new_x != orig_x)
+	x = new_x;
+    }
+
+  return x;
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate Thumb address
+   to be legitimate.  If we find one, return the new, valid address.  */
+rtx
+thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
+{
+  if (arm_tls_symbol_p (x))
+    return legitimize_tls_address (x, NULL_RTX);
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
+	  || INTVAL (XEXP (x, 1)) < 0))
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+      HOST_WIDE_INT offset = INTVAL (xop1);
+
+      /* Try and fold the offset into a biasing of the base register and
+	 then offsetting that.  Don't do this when optimizing for space
+	 since it can cause too many CSEs.  */
+      if (optimize_size && offset >= 0
+	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
+	{
+	  HOST_WIDE_INT delta;
+
+	  if (offset >= 256)
+	    delta = offset - (256 - GET_MODE_SIZE (mode));
+	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
+	    delta = 31 * GET_MODE_SIZE (mode);
+	  else
+	    delta = offset & (~31 * GET_MODE_SIZE (mode));
+
+	  xop0 = force_operand (plus_constant (xop0, offset - delta),
+				NULL_RTX);
+	  x = plus_constant (xop0, delta);
+	}
+      else if (offset < 0 && offset > -256)
+	/* Small negative offsets are best done with a subtract before the
+	   dereference, forcing these into a register normally takes two
+	   instructions.  */
+	x = force_operand (x, NULL_RTX);
+      else
+	{
+	  /* For the remaining cases, force the constant into a register.  */
+	  xop1 = force_reg (SImode, xop1);
+	  x = gen_rtx_PLUS (SImode, xop0, xop1);
+	}
+    }
+  else if (GET_CODE (x) == PLUS
+	   && s_register_operand (XEXP (x, 1), SImode)
+	   && !s_register_operand (XEXP (x, 0), SImode))
+    {
+      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
+
+      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
+    }
+
+  if (flag_pic)
+    {
+      /* We need to find and carefully transform any SYMBOL and LABEL
+	 references; so go back to the original address expression.  */
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+
+      if (new_x != orig_x)
+	x = new_x;
+    }
+
+  return x;
+}
+
+bool
+arm_legitimize_reload_address (rtx *p,
+			       enum machine_mode mode,
+			       int opnum, int type,
+			       int ind_levels ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*p) == PLUS
+      && GET_CODE (XEXP (*p, 0)) == REG
+      && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
+      && GET_CODE (XEXP (*p, 1)) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
+      HOST_WIDE_INT low, high;
+
+      if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
+	low = ((val & 0xf) ^ 0x8) - 0x8;
+      else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
+	/* Need to be careful, -256 is not a valid offset.  */
+	low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
+      else if (mode == SImode
+	       || (mode == SFmode && TARGET_SOFT_FLOAT)
+	       || ((mode == HImode || mode == QImode) && ! arm_arch4))
+	/* Need to be careful, -4096 is not a valid offset.  */
+	low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
+      else if ((mode == HImode || mode == QImode) && arm_arch4)
+	/* Need to be careful, -256 is not a valid offset.  */
+	low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
+      else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+	       && TARGET_HARD_FLOAT && TARGET_FPA)
+	/* Need to be careful, -1024 is not a valid offset.  */
+	low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
+      else
+	return false;
+
+      high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
+	       ^ (unsigned HOST_WIDE_INT) 0x80000000)
+	      - (unsigned HOST_WIDE_INT) 0x80000000);
+      /* Check for overflow or zero */
+      if (low == 0 || high == 0 || (high + low != val))
+	return false;
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem.  */
+      *p = gen_rtx_PLUS (GET_MODE (*p),
+			 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
+				       GEN_INT (high)),
+			 GEN_INT (low));
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
+		   VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return true;
+    }
+
+  return false;
+}
+
+rtx
+thumb_legitimize_reload_address (rtx *x_p,
+				 enum machine_mode mode,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  rtx x = *x_p;
+
+  if (GET_CODE (x) == PLUS
+      && GET_MODE_SIZE (mode) < 4
+      && REG_P (XEXP (x, 0))
+      && XEXP (x, 0) == stack_pointer_rtx
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
+    {
+      rtx orig_x = x;
+
+      x = copy_rtx (x);
+      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return x;
+    }
+
+  /* If both registers are hi-regs, then it's better to reload the
+     entire expression rather than each register individually.  That
+     only requires one reload register rather than two.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0))
+      && REG_P (XEXP (x, 1))
+      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
+      && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
+    {
+      rtx orig_x = x;
+
+      x = copy_rtx (x);
+      push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
+		   Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
+      return x;
+    }
+
+  return NULL;
+}
+
+/* Test for various thread-local symbols.  */
+
+/* Return TRUE if X is a thread-local symbol.  */
+
+static bool
+arm_tls_symbol_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Helper for arm_tls_referenced_p.  */
+
+static int
+arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+     TLS offsets, not real symbol references.  */
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_TLS)
+    return -1;
+
+  return 0;
+}
+
+/* Return TRUE if X contains any TLS symbol references.  */
+
+bool
+arm_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+bool
+arm_cannot_force_const_mem (rtx x)
+{
+  rtx base, offset;
+
+  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	return true;
+    }
+  return arm_tls_referenced_p (x);
+}
+
+#define REG_OR_SUBREG_REG(X)						\
+  (GET_CODE (X) == REG							\
+   || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
+
+#define REG_OR_SUBREG_RTX(X)			\
+   (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
+
+static inline int
+thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int total;
+
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+    case PLUS:
+    case MINUS:
+    case COMPARE:
+    case NEG:
+    case NOT:
+      return COSTS_N_INSNS (1);
+
+    case MULT:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  int cycles = 0;
+	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+
+	  while (i)
+	    {
+	      i >>= 2;
+	      cycles++;
+	    }
+	  return COSTS_N_INSNS (2) + cycles;
+	}
+      return COSTS_N_INSNS (1) + 16;
+
+    case SET:
+      return (COSTS_N_INSNS (1)
+	      + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
+		     + GET_CODE (SET_DEST (x)) == MEM));
+
+    case CONST_INT:
+      if (outer == SET)
+	{
+	  if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+	    return 0;
+	  if (thumb_shiftable_const (INTVAL (x)))
+	    return COSTS_N_INSNS (2);
+	  return COSTS_N_INSNS (3);
+	}
+      else if ((outer == PLUS || outer == COMPARE)
+	       && INTVAL (x) < 256 && INTVAL (x) > -256)
+	return 0;
+      else if ((outer == IOR || outer == XOR || outer == AND)
+	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
+	return COSTS_N_INSNS (1);
+      else if (outer == AND)
+	{
+	  int i;
+	  /* This duplicates the tests in the andsi3 expander.  */
+	  for (i = 9; i <= 31; i++)
+	    if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+		|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+	      return COSTS_N_INSNS (2);
+	}
+      else if (outer == ASHIFT || outer == ASHIFTRT
+	       || outer == LSHIFTRT)
+	return 0;
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+    case CONST_DOUBLE:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return COSTS_N_INSNS (3);
+
+    case UDIV:
+    case UMOD:
+    case DIV:
+    case MOD:
+      return 100;
+
+    case TRUNCATE:
+      return 99;
+
+    case AND:
+    case XOR:
+    case IOR:
+      /* XXX guess.  */
+      return 8;
+
+    case MEM:
+      /* XXX another guess.  */
+      /* Memory costs quite a lot for the first word, but subsequent words
+	 load at the equivalent of a single insn each.  */
+      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+		 ? 4 : 0));
+
+    case IF_THEN_ELSE:
+      /* XXX a guess.  */
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+	return 14;
+      return 2;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      total = mode == DImode ? COSTS_N_INSNS (1) : 0;
+      total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
+
+      if (mode == SImode)
+	return total;
+
+      if (arm_arch6)
+	return total + COSTS_N_INSNS (1);
+
+      /* Assume a two-shift sequence.  Increase the cost slightly so
+	 we prefer actual shifts over an extend operation.  */
+      return total + 1 + COSTS_N_INSNS (2);
+
+    default:
+      return 99;
+    }
+}
+
+static inline bool
+arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  enum rtx_code subcode;
+  rtx operand;
+  enum rtx_code code = GET_CODE (x);
+  *total = 0;
+
+  switch (code)
+    {
+    case MEM:
+      /* Memory costs quite a lot for the first word, but subsequent words
+	 load at the equivalent of a single insn each.  */
+      *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      if (TARGET_HARD_FLOAT && mode == SFmode)
+	*total = COSTS_N_INSNS (2);
+      else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = COSTS_N_INSNS (20);
+      return false;
+
+    case ROTATE:
+      if (GET_CODE (XEXP (x, 1)) == REG)
+	*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
+      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	*total = rtx_cost (XEXP (x, 1), code, speed);
+
+      /* Fall through */
+    case ROTATERT:
+      if (mode != SImode)
+	{
+	  *total += COSTS_N_INSNS (4);
+	  return true;
+	}
+
+      /* Fall through */
+    case ASHIFT: case LSHIFTRT: case ASHIFTRT:
+      *total += rtx_cost (XEXP (x, 0), code, speed);
+      if (mode == DImode)
+	{
+	  *total += COSTS_N_INSNS (3);
+	  return true;
+	}
+
+      *total += COSTS_N_INSNS (1);
+      /* Increase the cost of complex shifts because they aren't any faster,
+         and reduce dual issue opportunities.  */
+      if (arm_tune_cortex_a9
+	  && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
+	++*total;
+
+      return true;
+
+    case MINUS:
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+	  if (GET_CODE (XEXP (x, 0)) == CONST_INT
+	      && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+	    {
+	      *total += rtx_cost (XEXP (x, 1), code, speed);
+	      return true;
+	    }
+
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && const_ok_for_arm (INTVAL (XEXP (x, 1))))
+	    {
+	      *total += rtx_cost (XEXP (x, 0), code, speed);
+	      return true;
+	    }
+
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
+		  && arm_const_double_rtx (XEXP (x, 0)))
+		{
+		  *total += rtx_cost (XEXP (x, 1), code, speed);
+		  return true;
+		}
+
+	      if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+		  && arm_const_double_rtx (XEXP (x, 1)))
+		{
+		  *total += rtx_cost (XEXP (x, 0), code, speed);
+		  return true;
+		}
+
+	      return false;
+	    }
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+
+      *total = COSTS_N_INSNS (1);
+      if (GET_CODE (XEXP (x, 0)) == CONST_INT
+	  && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  return true;
+	}
+
+      subcode = GET_CODE (XEXP (x, 1));
+      if (subcode == ASHIFT || subcode == ASHIFTRT
+	  || subcode == LSHIFTRT
+	  || subcode == ROTATE || subcode == ROTATERT)
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+	  return true;
+	}
+
+      /* A shift as a part of RSB costs no more than RSB itself.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  return true;
+	}
+
+      if (subcode == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+	  return true;
+	}
+
+      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
+	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+	  if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+	      && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
+	    *total += COSTS_N_INSNS (1);
+
+	  return true;
+	}
+
+      /* Fall through */
+
+    case PLUS:
+      if (code == PLUS && arm_arch6 && mode == SImode
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
+			      speed);
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  return true;
+	}
+
+      /* MLA: All arguments must be registers.  We filter out
+	 multiplication by a power of two, so that we fall down into
+	 the code below.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  /* The cost comes from the cost of the multiply.  */
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+		  && arm_const_double_rtx (XEXP (x, 1)))
+		{
+		  *total += rtx_cost (XEXP (x, 0), code, speed);
+		  return true;
+		}
+
+	      return false;
+	    }
+
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+
+      if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+	  || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
+	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	      && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
+	    *total += COSTS_N_INSNS (1);
+	  return true;
+	}
+
+      /* Fall through */
+
+    case AND: case XOR: case IOR:
+
+      /* Normally the frame registers will be spilt into reg+const during
+	 reload, so it is a bad idea to combine them with other instructions,
+	 since then they might not be moved outside of loops.  As a compromise
+	 we allow integration with ops that have a constant as their second
+	 operand.  */
+      if (REG_OR_SUBREG_REG (XEXP (x, 0))
+	  && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
+	  && GET_CODE (XEXP (x, 1)) != CONST_INT)
+	*total = COSTS_N_INSNS (1);
+
+      if (mode == DImode)
+	{
+	  *total += COSTS_N_INSNS (2);
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+	    {
+	      *total += rtx_cost (XEXP (x, 0), code, speed);
+	      return true;
+	    }
+
+	  return false;
+	}
+
+      *total += COSTS_N_INSNS (1);
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, speed);
+	  return true;
+	}
+      subcode = GET_CODE (XEXP (x, 0));
+      if (subcode == ASHIFT || subcode == ASHIFTRT
+	  || subcode == LSHIFTRT
+	  || subcode == ROTATE || subcode == ROTATERT)
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+	  return true;
+	}
+
+      if (subcode == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+	  return true;
+	}
+
+      if (subcode == UMIN || subcode == UMAX
+	  || subcode == SMIN || subcode == SMAX)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+	}
+
+      return false;
+
+    case MULT:
+      /* This should have been handled by the CPU specific routines.  */
+      gcc_unreachable ();
+
+    case TRUNCATE:
+      if (arm_arch3m && mode == SImode
+	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
+	      == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
+	  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
+	{
+	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
+	  return true;
+	}
+      *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
+      return false;
+
+    case NEG:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+      /* Fall through */
+    case NOT:
+      *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
+      if (mode == SImode && code == NOT)
+	{
+	  subcode = GET_CODE (XEXP (x, 0));
+	  if (subcode == ASHIFT || subcode == ASHIFTRT
+	      || subcode == LSHIFTRT
+	      || subcode == ROTATE || subcode == ROTATERT
+	      || (subcode == MULT
+		  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
+	    {
+	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+	      /* Register shifts cost an extra cycle.  */
+	      if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+		*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
+							subcode, speed);
+	      return true;
+	    }
+	}
+
+      return false;
+
+    case IF_THEN_ELSE:
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return true;
+	}
+
+      operand = XEXP (x, 0);
+
+      if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
+	     || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
+	    && GET_CODE (XEXP (operand, 0)) == REG
+	    && REGNO (XEXP (operand, 0)) == CC_REGNUM))
+	*total += COSTS_N_INSNS (1);
+      *total += (rtx_cost (XEXP (x, 1), code, speed)
+		 + rtx_cost (XEXP (x, 2), code, speed));
+      return true;
+
+    case NE:
+      if (mode == SImode && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+	  return true;
+	}
+      goto scc_insn;
+
+    case GE:
+      if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+	  && mode == SImode && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+	  return true;
+	}
+      goto scc_insn;
+
+    case LT:
+      if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+	  && mode == SImode && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+	  return true;
+	}
+      goto scc_insn;
+
+    case EQ:
+    case GT:
+    case LE:
+    case GEU:
+    case LTU:
+    case GTU:
+    case LEU:
+    case UNORDERED:
+    case ORDERED:
+    case UNEQ:
+    case UNGE:
+    case UNLT:
+    case UNGT:
+    case UNLE:
+    scc_insn:
+      /* SCC insns.  In the case where the comparison has already been
+	 performed, then they cost 2 instructions.  Otherwise they need
+	 an additional comparison before them.  */
+      *total = COSTS_N_INSNS (2);
+      if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+	{
+	  return true;
+	}
+
+      /* Fall through */
+    case COMPARE:
+      if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      *total += COSTS_N_INSNS (1);
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+	{
+	  *total += rtx_cost (XEXP (x, 0), code, speed);
+	  return true;
+	}
+
+      subcode = GET_CODE (XEXP (x, 0));
+      if (subcode == ASHIFT || subcode == ASHIFTRT
+	  || subcode == LSHIFTRT
+	  || subcode == ROTATE || subcode == ROTATERT)
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+	  return true;
+	}
+
+      if (subcode == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total += rtx_cost (XEXP (x, 1), code, speed);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+	  return true;
+	}
+      
+      return false;
+
+    case UMIN:
+    case UMAX:
+    case SMIN:
+    case SMAX:
+      *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT
+	  || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
+	*total += rtx_cost (XEXP (x, 1), code, speed);
+      return true;
+
+    case ABS:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+      *total = COSTS_N_INSNS (1);
+      if (mode == DImode)
+	*total += COSTS_N_INSNS (3);
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      *total = 0;
+      if (GET_MODE_CLASS (mode) == MODE_INT)
+	{
+	  rtx op = XEXP (x, 0);
+	  enum machine_mode opmode = GET_MODE (op);
+
+	  if (mode == DImode)
+	    *total += COSTS_N_INSNS (1);
+
+	  if (opmode != SImode)
+	    {
+	      if (MEM_P (op))
+		{
+		  /* If !arm_arch4, we use one of the extendhisi2_mem
+		     or movhi_bytes patterns for HImode.  For a QImode
+		     sign extension, we first zero-extend from memory
+		     and then perform a shift sequence.  */
+		  if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
+		    *total += COSTS_N_INSNS (2);
+		}
+	      else if (arm_arch6)
+		*total += COSTS_N_INSNS (1);
+
+	      /* We don't have the necessary insn, so we need to perform some
+		 other operation.  */
+	      else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
+		/* An and with constant 255.  */
+		*total += COSTS_N_INSNS (1);
+	      else
+		/* A shift sequence.  Increase costs slightly to avoid
+		   combining two shifts into an extend operation.  */
+		*total += COSTS_N_INSNS (2) + 1;
+	    }
+
+	  return false;
+	}
+
+      switch (GET_MODE (XEXP (x, 0)))
+	{
+	case V8QImode:
+	case V4HImode:
+	case V2SImode:
+	case V4QImode:
+	case V2HImode:
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+
+	default:
+	  gcc_unreachable ();
+	}
+      gcc_unreachable ();
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+      return true;
+
+    case CONST_INT:
+      if (const_ok_for_arm (INTVAL (x))
+	  || const_ok_for_arm (~INTVAL (x)))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
+						  INTVAL (x), NULL_RTX,
+						  NULL_RTX, 0, 0));
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case HIGH:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case LO_SUM:
+      *total = COSTS_N_INSNS (1);
+      *total += rtx_cost (XEXP (x, 0), code, speed);
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (4);
+      return true;
+
+    case UNSPEC:
+      /* We cost this as high as our memory costs to allow this to
+	 be hoisted from loops.  */
+      if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
+	{
+	  *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+	}
+      return true;
+
+    default:
+      *total = COSTS_N_INSNS (4);
+      return false;
+    }
+}
+
+/* Estimates the size cost of thumb1 instructions.
+   For now most of the code is copied from thumb1_rtx_costs. We need more
+   fine grain tuning when we have more related test cases.  */
+static inline int
+thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+    case PLUS:
+    case MINUS:
+    case COMPARE:
+    case NEG:
+    case NOT:
+      return COSTS_N_INSNS (1);
+
+    case MULT:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+        {
+          /* Thumb1 mul instruction can't operate on const. We must Load it
+             into a register first.  */
+          int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
+          return COSTS_N_INSNS (1) + const_size;
+        }
+      return COSTS_N_INSNS (1);
+
+    case SET:
+      return (COSTS_N_INSNS (1)
+              + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
+                     + GET_CODE (SET_DEST (x)) == MEM));
+
+    case CONST_INT:
+      if (outer == SET)
+        {
+          if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
+            return COSTS_N_INSNS (1);
+	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
+	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
+            return COSTS_N_INSNS (2);
+	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
+          if (thumb_shiftable_const (INTVAL (x)))
+            return COSTS_N_INSNS (2);
+          return COSTS_N_INSNS (3);
+        }
+      else if ((outer == PLUS || outer == COMPARE)
+               && INTVAL (x) < 256 && INTVAL (x) > -256)
+        return 0;
+      else if ((outer == IOR || outer == XOR || outer == AND)
+               && INTVAL (x) < 256 && INTVAL (x) >= -256)
+        return COSTS_N_INSNS (1);
+      else if (outer == AND)
+        {
+          int i;
+          /* This duplicates the tests in the andsi3 expander.  */
+          for (i = 9; i <= 31; i++)
+            if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
+                || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
+              return COSTS_N_INSNS (2);
+        }
+      else if (outer == ASHIFT || outer == ASHIFTRT
+               || outer == LSHIFTRT)
+        return 0;
+      return COSTS_N_INSNS (2);
+
+    case CONST:
+    case CONST_DOUBLE:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return COSTS_N_INSNS (3);
+
+    case UDIV:
+    case UMOD:
+    case DIV:
+    case MOD:
+      return 100;
+
+    case TRUNCATE:
+      return 99;
+
+    case AND:
+    case XOR:
+    case IOR:
+      /* XXX guess.  */
+      return 8;
+
+    case MEM:
+      /* XXX another guess.  */
+      /* Memory costs quite a lot for the first word, but subsequent words
+         load at the equivalent of a single insn each.  */
+      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+                 ? 4 : 0));
+
+    case IF_THEN_ELSE:
+      /* XXX a guess.  */
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+        return 14;
+      return 2;
+
+    case ZERO_EXTEND:
+      /* XXX still guessing.  */
+      switch (GET_MODE (XEXP (x, 0)))
+        {
+          case QImode:
+            return (1 + (mode == DImode ? 4 : 0)
+                    + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+
+          case HImode:
+            return (4 + (mode == DImode ? 4 : 0)
+                    + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+
+          case SImode:
+            return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+
+          default:
+            return 99;
+        }
+
+    default:
+      return 99;
+    }
+}
+
+/* RTX costs when optimizing for size.  */
+static bool
+arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		    int *total)
+{
+  enum machine_mode mode = GET_MODE (x);
+  if (TARGET_THUMB1)
+    {
+      *total = thumb1_size_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
+  switch (code)
+    {
+    case MEM:
+      /* A memory access costs 1 insn if the mode is small, or the address is
+	 a single register, otherwise it costs one insn per word.  */
+      if (REG_P (XEXP (x, 0)))
+	*total = COSTS_N_INSNS (1);
+      else if (flag_pic
+	       && GET_CODE (XEXP (x, 0)) == PLUS
+	       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
+	/* This will be split into two instructions.
+	   See arm.md:calculate_pic_address.  */
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      /* Needs a libcall, so it costs about this.  */
+      *total = COSTS_N_INSNS (2);
+      return false;
+
+    case ROTATE:
+      if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
+	{
+	  *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
+	  return true;
+	}
+      /* Fall through */
+    case ROTATERT:
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
+	  return true;
+	}
+      else if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
+	  /* Slightly disparage register shifts, but not by much.  */
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    *total += 1 + rtx_cost (XEXP (x, 1), code, false);
+	  return true;
+	}
+
+      /* Needs a libcall.  */
+      *total = COSTS_N_INSNS (2);
+      return false;
+
+    case MINUS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
+	  enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
+
+	  if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
+	      || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
+	      || subcode1 == ROTATE || subcode1 == ROTATERT
+	      || subcode1 == ASHIFT || subcode1 == LSHIFTRT
+	      || subcode1 == ASHIFTRT)
+	    {
+	      /* It's just the cost of the two operands.  */
+	      *total = 0;
+	      return false;
+	    }
+
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return false;
+
+    case PLUS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      /* A shift as a part of ADD costs nothing.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
+	{
+	  *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
+	  *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
+	  *total += rtx_cost (XEXP (x, 1), code, false);
+	  return true;
+	}
+
+      /* Fall through */
+    case AND: case XOR: case IOR:
+      if (mode == SImode)
+	{
+	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
+
+	  if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
+	      || subcode == LSHIFTRT || subcode == ASHIFTRT
+	      || (code == AND && subcode == NOT))
+	    {
+	      /* It's just the cost of the two operands.  */
+	      *total = 0;
+	      return false;
+	    }
+	}
+
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return false;
+
+    case MULT:
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      return false;
+
+    case NEG:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+
+      /* Fall through */
+    case NOT:
+      *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+
+      return false;
+
+    case IF_THEN_ELSE:
+      *total = 0;
+      return false;
+
+    case COMPARE:
+      if (cc_register (XEXP (x, 0), VOIDmode))
+	* total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case ABS:
+      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      return arm_rtx_costs_1 (x, outer_code, total, 0);
+
+    case CONST_INT:
+      if (const_ok_for_arm (INTVAL (x)))
+	/* A multiplication by a constant requires another instruction
+	   to load the constant to a register.  */
+	*total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
+				? 1 : 0);
+      else if (const_ok_for_arm (~INTVAL (x)))
+	*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
+      else if (const_ok_for_arm (-INTVAL (x)))
+	{
+	  if (outer_code == COMPARE || outer_code == PLUS
+	      || outer_code == MINUS)
+	    *total = 0;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	}
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case HIGH:
+    case LO_SUM:
+      /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
+	 cost of these slightly.  */
+      *total = COSTS_N_INSNS (1) + 1;
+      return true;
+
+    default:
+      if (mode != VOIDmode)
+	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+      else
+	*total = COSTS_N_INSNS (4); /* How knows?  */
+      return false;
+    }
+}
+
+/* RTX costs when optimizing for size.  */
+static bool
+arm_rtx_costs (rtx x, int code, int outer_code, int *total,
+	       bool speed)
+{
+  if (!speed)
+    return arm_size_rtx_costs (x, (enum rtx_code) code,
+			       (enum rtx_code) outer_code, total);
+  else
+    return current_tune->rtx_costs (x, (enum rtx_code) code,
+				    (enum rtx_code) outer_code,
+				    total, speed);
+}
+
+/* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
+   supported on any "slowmul" cores, so it can be ignored.  */
+
+static bool
+arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		       int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB)
+    {
+      *total = thumb1_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  switch (code)
+    {
+    case MULT:
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT
+	  || mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (20);
+	  return false;
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
+				      & (unsigned HOST_WIDE_INT) 0xffffffff);
+	  int cost, const_ok = const_ok_for_arm (i);
+	  int j, booth_unit_size;
+
+	  /* Tune as appropriate.  */
+	  cost = const_ok ? 4 : 8;
+	  booth_unit_size = 2;
+	  for (j = 0; i && j < 32; j += booth_unit_size)
+	    {
+	      i >>= booth_unit_size;
+	      cost++;
+	    }
+
+	  *total = COSTS_N_INSNS (cost);
+	  *total += rtx_cost (XEXP (x, 0), code, speed);
+	  return true;
+	}
+
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);;
+    }
+}
+
+
+/* RTX cost for cores with a fast multiply unit (M variants).  */
+
+static bool
+arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		       int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB1)
+    {
+      *total = thumb1_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  /* ??? should thumb2 use different costs?  */
+  switch (code)
+    {
+    case MULT:
+      /* There is no point basing this on the tuning, since it is always the
+	 fast variant if it exists at all.  */
+      if (mode == DImode
+	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS(2);
+	  return false;
+	}
+
+
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (5);
+	  return false;
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
+				      & (unsigned HOST_WIDE_INT) 0xffffffff);
+	  int cost, const_ok = const_ok_for_arm (i);
+	  int j, booth_unit_size;
+
+	  /* Tune as appropriate.  */
+	  cost = const_ok ? 4 : 8;
+	  booth_unit_size = 8;
+	  for (j = 0; i && j < 32; j += booth_unit_size)
+	    {
+	      i >>= booth_unit_size;
+	      cost++;
+	    }
+
+	  *total = COSTS_N_INSNS(cost);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	}
+
+      /* Requires a lib call */
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);
+    }
+}
+
+
+/* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
+   so it can be ignored.  */
+
+static bool
+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		      int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB)
+    {
+      *total = thumb1_rtx_costs (x, code, outer_code);
+      return true;
+    }
+
+  switch (code)
+    {
+    case COMPARE:
+      if (GET_CODE (XEXP (x, 0)) != MULT)
+	return arm_rtx_costs_1 (x, outer_code, total, speed);
+
+      /* A COMPARE of a MULT is slow on XScale; the muls instruction
+	 will stall until the multiplication is complete.  */
+      *total = COSTS_N_INSNS (3);
+      return false;
+
+    case MULT:
+      /* There is no point basing this on the tuning, since it is always the
+	 fast variant if it exists at all.  */
+      if (mode == DImode
+	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (5);
+	  return false;
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  /* If operand 1 is a constant we can more accurately
+	     calculate the cost of the multiply.  The multiplier can
+	     retire 15 bits on the first cycle and a further 12 on the
+	     second.  We do, of course, have to load the constant into
+	     a register first.  */
+	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+	  /* There's a general overhead of one cycle.  */
+	  int cost = 1;
+	  unsigned HOST_WIDE_INT masked_const;
+
+	  if (i & 0x80000000)
+	    i = ~i;
+
+	  i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
+	  masked_const = i & 0xffff8000;
+	  if (masked_const != 0)
+	    {
+	      cost++;
+	      masked_const = i & 0xf8000000;
+	      if (masked_const != 0)
+		cost++;
+	    }
+	  *total = COSTS_N_INSNS (cost);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return false;
+	}
+
+      /* Requires a lib call */
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);
+    }
+}
+
+
+/* RTX costs for 9e (and later) cores.  */
+
+static bool
+arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+		  int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (TARGET_THUMB1)
+    {
+      switch (code)
+	{
+	case MULT:
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+
+	default:
+	  *total = thumb1_rtx_costs (x, code, outer_code);
+	  return true;
+	}
+    }
+
+  switch (code)
+    {
+    case MULT:
+      /* There is no point basing this on the tuning, since it is always the
+	 fast variant if it exists at all.  */
+      if (mode == DImode
+	  && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
+	  && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+
+      if (mode == DImode)
+	{
+	  *total = COSTS_N_INSNS (5);
+	  return false;
+	}
+
+      if (mode == SImode)
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	{
+	  if (TARGET_HARD_FLOAT
+	      && (mode == SFmode
+		  || (mode == DFmode && !TARGET_VFP_SINGLE)))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }
+	}
+
+      *total = COSTS_N_INSNS (20);
+      return false;
+
+    default:
+      return arm_rtx_costs_1 (x, outer_code, total, speed);
+    }
+}
+/* All address computations that can be done are free, but rtx cost returns
+   the same for practically all of them.  So we weight the different types
+   of address here in the order (most pref first):
+   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
+static inline int
+arm_arm_address_cost (rtx x)
+{
+  enum rtx_code c  = GET_CODE (x);
+
+  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
+    return 0;
+  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
+    return 10;
+
+  if (c == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	return 2;
+
+      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
+	return 3;
+
+      return 4;
+    }
+
+  return 6;
+}
+
+static inline int
+arm_thumb_address_cost (rtx x)
+{
+  enum rtx_code c  = GET_CODE (x);
+
+  if (c == REG)
+    return 1;
+  if (c == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    return 1;
+
+  return 2;
+}
+
+static int
+arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+  return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
+}
+
+/* Adjust cost hook for XScale.  */
+static bool
+xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+  /* Some true dependencies can have a higher cost depending
+     on precisely how certain input operands are used.  */
+  if (REG_NOTE_KIND(link) == 0
+      && recog_memoized (insn) >= 0
+      && recog_memoized (dep) >= 0)
+    {
+      int shift_opnum = get_attr_shift (insn);
+      enum attr_type attr_type = get_attr_type (dep);
+
+      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
+	 operand for INSN.  If we have a shifted input operand and the
+	 instruction we depend on is another ALU instruction, then we may
+	 have to account for an additional stall.  */
+      if (shift_opnum != 0
+	  && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
+	{
+	  rtx shifted_operand;
+	  int opno;
+
+	  /* Get the shifted operand.  */
+	  extract_insn (insn);
+	  shifted_operand = recog_data.operand[shift_opnum];
+
+	  /* Iterate over all the operands in DEP.  If we write an operand
+	     that overlaps with SHIFTED_OPERAND, then we have increase the
+	     cost of this dependency.  */
+	  extract_insn (dep);
+	  preprocess_constraints ();
+	  for (opno = 0; opno < recog_data.n_operands; opno++)
+	    {
+	      /* We can ignore strict inputs.  */
+	      if (recog_data.operand_type[opno] == OP_IN)
+		continue;
+
+	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
+					   shifted_operand))
+		{
+		  *cost = 2;
+		  return false;
+		}
+	    }
+	}
+    }
+  return true;
+}
+
+/* Adjust cost hook for Cortex A9.  */
+static bool
+cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+  switch (REG_NOTE_KIND (link))
+    {
+    case REG_DEP_ANTI:
+      *cost = 0;
+      return false;
+
+    case REG_DEP_TRUE:
+    case REG_DEP_OUTPUT:
+	if (recog_memoized (insn) >= 0
+	    && recog_memoized (dep) >= 0)
+	  {
+	    if (GET_CODE (PATTERN (insn)) == SET)
+	      {
+		if (GET_MODE_CLASS 
+		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
+		  || GET_MODE_CLASS 
+		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
+		  {
+		    enum attr_type attr_type_insn = get_attr_type (insn);
+		    enum attr_type attr_type_dep = get_attr_type (dep);
+
+		    /* By default all dependencies of the form
+		       s0 = s0 <op> s1
+		       s0 = s0 <op> s2
+		       have an extra latency of 1 cycle because
+		       of the input and output dependency in this
+		       case. However this gets modeled as an true
+		       dependency and hence all these checks.  */
+		    if (REG_P (SET_DEST (PATTERN (insn)))
+			&& REG_P (SET_DEST (PATTERN (dep)))
+			&& reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
+						    SET_DEST (PATTERN (dep))))
+		      {
+			/* FMACS is a special case where the dependant
+			   instruction can be issued 3 cycles before
+			   the normal latency in case of an output 
+			   dependency.  */
+			if ((attr_type_insn == TYPE_FMACS
+			     || attr_type_insn == TYPE_FMACD)
+			    && (attr_type_dep == TYPE_FMACS
+				|| attr_type_dep == TYPE_FMACD))
+			  {
+			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+			      *cost = insn_default_latency (dep) - 3;
+			    else
+			      *cost = insn_default_latency (dep);
+			    return false;
+			  }
+			else
+			  {
+			    if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+			      *cost = insn_default_latency (dep) + 1;
+			    else
+			      *cost = insn_default_latency (dep);
+			  }
+			return false;
+		      }
+		  }
+	      }
+	  }
+	break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return true;
+}
+
+/* Adjust cost hook for FA726TE.  */
+static bool
+fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
+{
+  /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
+     have penalty of 3.  */
+  if (REG_NOTE_KIND (link) == REG_DEP_TRUE
+      && recog_memoized (insn) >= 0
+      && recog_memoized (dep) >= 0
+      && get_attr_conds (dep) == CONDS_SET)
+    {
+      /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
+      if (get_attr_conds (insn) == CONDS_USE
+          && get_attr_type (insn) != TYPE_BRANCH)
+        {
+          *cost = 3;
+          return false;
+        }
+
+      if (GET_CODE (PATTERN (insn)) == COND_EXEC
+          || get_attr_conds (insn) == CONDS_USE)
+        {
+          *cost = 0;
+          return false;
+        }
+    }
+
+  return true;
+}
+
+/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
+   It corrects the value of COST based on the relationship between
+   INSN and DEP through the dependence LINK.  It returns the new
+   value. There is a per-core adjust_cost hook to adjust scheduler costs
+   and the per-core hook can choose to completely override the generic 
+   adjust_cost function. Only put bits of code into arm_adjust_cost that 
+   are common across all cores.  */
+static int
+arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
+{
+  rtx i_pat, d_pat;
+
+ /* When generating Thumb-1 code, we want to place flag-setting operations
+    close to a conditional branch which depends on them, so that we can
+    omit the comparison. */
+  if (TARGET_THUMB1
+      && REG_NOTE_KIND (link) == 0
+      && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
+      && recog_memoized (dep) >= 0
+      && get_attr_conds (dep) == CONDS_SET)
+    return 0;
+
+  if (current_tune->sched_adjust_cost != NULL)
+    {
+      if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
+	return cost;
+    }
+
+  /* XXX This is not strictly true for the FPA.  */
+  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+      || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+    return 0;
+
+  /* Call insns don't incur a stall, even if they follow a load.  */
+  if (REG_NOTE_KIND (link) == 0
+      && GET_CODE (insn) == CALL_INSN)
+    return 1;
+
+  if ((i_pat = single_set (insn)) != NULL
+      && GET_CODE (SET_SRC (i_pat)) == MEM
+      && (d_pat = single_set (dep)) != NULL
+      && GET_CODE (SET_DEST (d_pat)) == MEM)
+    {
+      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
+      /* This is a load after a store, there is no conflict if the load reads
+	 from a cached area.  Assume that loads from the stack, and from the
+	 constant pool are cached, and that others will miss.  This is a
+	 hack.  */
+
+      if ((GET_CODE (src_mem) == SYMBOL_REF 
+	   && CONSTANT_POOL_ADDRESS_P (src_mem))
+	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
+	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
+	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
+	return 1;
+    }
+
+  return cost;
+}
+
+static int fp_consts_inited = 0;
+
+/* Only zero is valid for VFP.  Other values are also valid for FPA.  */
+static const char * const strings_fp[8] =
+{
+  "0",   "1",   "2",   "3",
+  "4",   "5",   "0.5", "10"
+};
+
+static REAL_VALUE_TYPE values_fp[8];
+
+static void
+init_fp_table (void)
+{
+  int i;
+  REAL_VALUE_TYPE r;
+
+  if (TARGET_VFP)
+    fp_consts_inited = 1;
+  else
+    fp_consts_inited = 8;
+
+  for (i = 0; i < fp_consts_inited; i++)
+    {
+      r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
+      values_fp[i] = r;
+    }
+}
+
+/* Return TRUE if rtx X is a valid immediate FP constant.  */
+int
+arm_const_double_rtx (rtx x)
+{
+  REAL_VALUE_TYPE r;
+  int i;
+
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+  if (REAL_VALUE_MINUS_ZERO (r))
+    return 0;
+
+  for (i = 0; i < fp_consts_inited; i++)
+    if (REAL_VALUES_EQUAL (r, values_fp[i]))
+      return 1;
+
+  return 0;
+}
+
+/* Return TRUE if rtx X is a valid immediate FPA constant.  */
+int
+neg_const_double_rtx_ok_for_fpa (rtx x)
+{
+  REAL_VALUE_TYPE r;
+  int i;
+
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+  r = real_value_negate (&r);
+  if (REAL_VALUE_MINUS_ZERO (r))
+    return 0;
+
+  for (i = 0; i < 8; i++)
+    if (REAL_VALUES_EQUAL (r, values_fp[i]))
+      return 1;
+
+  return 0;
+}
+
+
+/* VFPv3 has a fairly wide range of representable immediates, formed from
+   "quarter-precision" floating-point values. These can be evaluated using this
+   formula (with ^ for exponentiation):
+
+     -1^s * n * 2^-r
+
+   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
+   16 <= n <= 31 and 0 <= r <= 7.
+
+   These values are mapped onto an 8-bit integer ABCDEFGH s.t.
+
+     - A (most-significant) is the sign bit.
+     - BCD are the exponent (encoded as r XOR 3).
+     - EFGH are the mantissa (encoded as n - 16).
+*/
+
+/* Return an integer index for a VFPv3 immediate operand X suitable for the
+   fconst[sd] instruction, or -1 if X isn't suitable.  */
+static int
+vfp3_const_double_index (rtx x)
+{
+  REAL_VALUE_TYPE r, m;
+  int sign, exponent;
+  unsigned HOST_WIDE_INT mantissa, mant_hi;
+  unsigned HOST_WIDE_INT mask;
+  HOST_WIDE_INT m1, m2;
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+
+  if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
+    return -1;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* We can't represent these things, so detect them first.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
+    return -1;
+
+  /* Extract sign, exponent and mantissa.  */
+  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
+  r = real_value_abs (&r);
+  exponent = REAL_EXP (&r);
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
+     bits for the mantissa, this may fail (low bits would be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  REAL_VALUE_TO_INT (&m1, &m2, m);
+  mantissa = m1;
+  mant_hi = m2;
+
+  /* If there are bits set in the low part of the mantissa, we can't
+     represent this value.  */
+  if (mantissa != 0)
+    return -1;
+
+  /* Now make it so that mantissa contains the most-significant bits, and move
+     the point_pos to indicate that the least-significant bits have been
+     discarded.  */
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+  mantissa = mant_hi;
+
+  /* We can permit four significant bits of mantissa only, plus a high bit
+     which is always 1.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return -1;
+
+  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* The mantissa may be zero. Disallow that case. (It's possible to load the
+     floating-point immediate zero with Neon using an integer-zero load, but
+     that case is handled elsewhere.)  */
+  if (mantissa == 0)
+    return -1;
+
+  gcc_assert (mantissa >= 16 && mantissa <= 31);
+
+  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
+     normalized significands are in the range [1, 2). (Our mantissa is shifted
+     left 4 places at this point relative to normalized IEEE754 values).  GCC
+     internally uses [0.5, 1) (see real.c), so the exponent returned from
+     REAL_EXP must be altered.  */
+  exponent = 5 - exponent;
+
+  if (exponent < 0 || exponent > 7)
+    return -1;
+
+  /* Sign, mantissa and exponent are now in the correct form to plug into the
+     formula described in the comment above.  */
+  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
+}
+
+/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
+int
+vfp3_const_double_rtx (rtx x)
+{
+  if (!TARGET_VFP3)
+    return 0;
+
+  return vfp3_const_double_index (x) != -1;
+}
+
+/* Recognize immediates which can be used in various Neon instructions. Legal
+   immediates are described by the following table (for VMVN variants, the
+   bitwise inverse of the constant shown is recognized. In either case, VMOV
+   is output and the correct instruction to use for a given constant is chosen
+   by the assembler). The constant shown is replicated across all elements of
+   the destination vector.
+
+   insn elems variant constant (binary)
+   ---- ----- ------- -----------------
+   vmov  i32     0    00000000 00000000 00000000 abcdefgh
+   vmov  i32     1    00000000 00000000 abcdefgh 00000000
+   vmov  i32     2    00000000 abcdefgh 00000000 00000000
+   vmov  i32     3    abcdefgh 00000000 00000000 00000000
+   vmov  i16     4    00000000 abcdefgh
+   vmov  i16     5    abcdefgh 00000000
+   vmvn  i32     6    00000000 00000000 00000000 abcdefgh
+   vmvn  i32     7    00000000 00000000 abcdefgh 00000000
+   vmvn  i32     8    00000000 abcdefgh 00000000 00000000
+   vmvn  i32     9    abcdefgh 00000000 00000000 00000000
+   vmvn  i16    10    00000000 abcdefgh
+   vmvn  i16    11    abcdefgh 00000000
+   vmov  i32    12    00000000 00000000 abcdefgh 11111111
+   vmvn  i32    13    00000000 00000000 abcdefgh 11111111
+   vmov  i32    14    00000000 abcdefgh 11111111 11111111
+   vmvn  i32    15    00000000 abcdefgh 11111111 11111111
+   vmov   i8    16    abcdefgh
+   vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
+                      eeeeeeee ffffffff gggggggg hhhhhhhh
+   vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
+
+   For case 18, B = !b. Representable values are exactly those accepted by
+   vfp3_const_double_index, but are output as floating-point numbers rather
+   than indices.
+
+   Variants 0-5 (inclusive) may also be used as immediates for the second
+   operand of VORR/VBIC instructions.
+
+   The INVERSE argument causes the bitwise inverse of the given operand to be
+   recognized instead (used for recognizing legal immediates for the VAND/VORN
+   pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
+   *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
+   output, rather than the real insns vbic/vorr).
+
+   INVERSE makes no difference to the recognition of float vectors.
+
+   The return value is the variant of immediate as shown in the above table, or
+   -1 if the given value doesn't match any of the listed patterns.
+*/
+static int
+neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
+		      rtx *modconst, int *elementwidth)
+{
+#define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
+  matches = 1;					\
+  for (i = 0; i < idx; i += (STRIDE))		\
+    if (!(TEST))				\
+      matches = 0;				\
+  if (matches)					\
+    {						\
+      immtype = (CLASS);			\
+      elsize = (ELSIZE);			\
+      break;					\
+    }
+
+  unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
+  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+  unsigned char bytes[16];
+  int immtype = -1, matches;
+  unsigned int invmask = inverse ? 0xff : 0;
+
+  /* Vectors of float constants.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      rtx el0 = CONST_VECTOR_ELT (op, 0);
+      REAL_VALUE_TYPE r0;
+
+      if (!vfp3_const_double_rtx (el0))
+        return -1;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
+
+      for (i = 1; i < n_elts; i++)
+        {
+          rtx elt = CONST_VECTOR_ELT (op, i);
+          REAL_VALUE_TYPE re;
+
+          REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
+
+          if (!REAL_VALUES_EQUAL (r0, re))
+            return -1;
+        }
+
+      if (modconst)
+        *modconst = CONST_VECTOR_ELT (op, 0);
+
+      if (elementwidth)
+        *elementwidth = 0;
+
+      return 18;
+    }
+
+  /* Splat vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = CONST_VECTOR_ELT (op, i);
+      unsigned HOST_WIDE_INT elpart;
+      unsigned int part, parts;
+
+      if (GET_CODE (el) == CONST_INT)
+        {
+          elpart = INTVAL (el);
+          parts = 1;
+        }
+      else if (GET_CODE (el) == CONST_DOUBLE)
+        {
+          elpart = CONST_DOUBLE_LOW (el);
+          parts = 2;
+        }
+      else
+        gcc_unreachable ();
+
+      for (part = 0; part < parts; part++)
+        {
+          unsigned int byte;
+          for (byte = 0; byte < innersize; byte++)
+            {
+              bytes[idx++] = (elpart & 0xff) ^ invmask;
+              elpart >>= BITS_PER_UNIT;
+            }
+          if (GET_CODE (el) == CONST_DOUBLE)
+            elpart = CONST_DOUBLE_HIGH (el);
+        }
+    }
+
+  /* Sanity check.  */
+  gcc_assert (idx == GET_MODE_SIZE (mode));
+
+  do
+    {
+      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
+
+      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
+
+      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
+
+      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
+
+      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
+
+      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
+
+      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+      CHECK (1, 8, 16, bytes[i] == bytes[0]);
+
+      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+			&& bytes[i] == bytes[(i + 8) % idx]);
+    }
+  while (0);
+
+  if (immtype == -1)
+    return -1;
+
+  if (elementwidth)
+    *elementwidth = elsize;
+
+  if (modconst)
+    {
+      unsigned HOST_WIDE_INT imm = 0;
+
+      /* Un-invert bytes of recognized vector, if necessary.  */
+      if (invmask != 0)
+        for (i = 0; i < idx; i++)
+          bytes[i] ^= invmask;
+
+      if (immtype == 17)
+        {
+          /* FIXME: Broken on 32-bit H_W_I hosts.  */
+          gcc_assert (sizeof (HOST_WIDE_INT) == 8);
+
+          for (i = 0; i < 8; i++)
+            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
+                   << (i * BITS_PER_UNIT);
+
+          *modconst = GEN_INT (imm);
+        }
+      else
+        {
+          unsigned HOST_WIDE_INT imm = 0;
+
+          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+
+          *modconst = GEN_INT (imm);
+        }
+    }
+
+  return immtype;
+#undef CHECK
+}
+
+/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
+   VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
+   float elements), and a modified constant (whatever should be output for a
+   VMOV) in *MODCONST.  */
+
+int
+neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
+			       rtx *modconst, int *elementwidth)
+{
+  rtx tmpconst;
+  int tmpwidth;
+  int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
+
+  if (retval == -1)
+    return 0;
+
+  if (modconst)
+    *modconst = tmpconst;
+
+  if (elementwidth)
+    *elementwidth = tmpwidth;
+
+  return 1;
+}
+
+/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
+   the immediate is valid, write a constant suitable for using as an operand
+   to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
+   *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
+
+int
+neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
+				rtx *modconst, int *elementwidth)
+{
+  rtx tmpconst;
+  int tmpwidth;
+  int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
+
+  if (retval < 0 || retval > 5)
+    return 0;
+
+  if (modconst)
+    *modconst = tmpconst;
+
+  if (elementwidth)
+    *elementwidth = tmpwidth;
+
+  return 1;
+}
+
+/* Return a string suitable for output of Neon immediate logic operation
+   MNEM.  */
+
+char *
+neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
+			     int inverse, int quad)
+{
+  int width, is_valid;
+  static char templ[40];
+
+  is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
+
+  gcc_assert (is_valid != 0);
+
+  if (quad)
+    sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
+  else
+    sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
+
+  return templ;
+}
+
+/* Output a sequence of pairwise operations to implement a reduction.
+   NOTE: We do "too much work" here, because pairwise operations work on two
+   registers-worth of operands in one go. Unfortunately we can't exploit those
+   extra calculations to do the full operation in fewer steps, I don't think.
+   Although all vector elements of the result but the first are ignored, we
+   actually calculate the same result in each of the elements. An alternative
+   such as initially loading a vector with zero to use as each of the second
+   operands would use up an additional register and take an extra instruction,
+   for no particular gain.  */
+
+void
+neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
+		      rtx (*reduc) (rtx, rtx, rtx))
+{
+  enum machine_mode inner = GET_MODE_INNER (mode);
+  unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
+  rtx tmpsum = op1;
+
+  for (i = parts / 2; i >= 1; i /= 2)
+    {
+      rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
+      emit_insn (reduc (dest, tmpsum, tmpsum));
+      tmpsum = dest;
+    }
+}
+
+/* If VALS is a vector constant that can be loaded into a register
+   using VDUP, generate instructions to do so and return an RTX to
+   assign to the register.  Otherwise return NULL_RTX.  */
+
+static rtx
+neon_vdup_constant (rtx vals)
+{
+  enum machine_mode mode = GET_MODE (vals);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  bool all_same = true;
+  rtx x;
+  int i;
+
+  if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
+    return NULL_RTX;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (!all_same)
+    /* The elements are not all the same.  We could handle repeating
+       patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
+       {0, C, 0, C, 0, C, 0, C} which can be loaded using
+       vdup.i16).  */
+    return NULL_RTX;
+
+  /* We can load this constant by using VDUP and a constant in a
+     single ARM register.  This will be cheaper than a vector
+     load.  */
+
+  x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+  return gen_rtx_VEC_DUPLICATE (mode, x);
+}
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+   constants (for vec_init) or CONST_VECTOR, efficiently into a
+   register.  Returns an RTX to copy into the register, or NULL_RTX
+   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
+
+rtx
+neon_make_constant (rtx vals)
+{
+  enum machine_mode mode = GET_MODE (vals);
+  rtx target;
+  rtx const_vec = NULL_RTX;
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_const = 0;
+  int i;
+
+  if (GET_CODE (vals) == CONST_VECTOR)
+    const_vec = vals;
+  else if (GET_CODE (vals) == PARALLEL)
+    {
+      /* A CONST_VECTOR must contain only CONST_INTs and
+	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+	 Only store valid constants in a CONST_VECTOR.  */
+      for (i = 0; i < n_elts; ++i)
+	{
+	  rtx x = XVECEXP (vals, 0, i);
+	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+	    n_const++;
+	}
+      if (n_const == n_elts)
+	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+    }
+  else
+    gcc_unreachable ();
+
+  if (const_vec != NULL
+      && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
+    /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
+    return const_vec;
+  else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+    /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
+       pipeline cycle; creating the constant takes one or two ARM
+       pipeline cycles.  */
+    return target;
+  else if (const_vec != NULL_RTX)
+    /* Load from constant pool.  On Cortex-A8 this takes two cycles
+       (for either double or quad vectors).  We can not take advantage
+       of single-cycle VLD1 because we need a PC-relative addressing
+       mode.  */
+    return const_vec;
+  else
+    /* A PARALLEL containing something not valid inside CONST_VECTOR.
+       We can not construct an initializer.  */
+    return NULL_RTX;
+}
+
+/* Initialize vector TARGET to VALS.  */
+
+void
+neon_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true;
+  rtx x, mem;
+  int i;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!CONSTANT_P (x))
+	++n_var, one_var = i;
+
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (n_var == 0)
+    {
+      rtx constant = neon_make_constant (vals);
+      if (constant != NULL_RTX)
+	{
+	  emit_move_insn (target, constant);
+	  return;
+	}
+    }
+
+  /* Splat a single non-constant element if we can.  */
+  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+    {
+      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_DUPLICATE (mode, x)));
+      return;
+    }
+
+  /* One field is non-constant.  Load constant then overwrite varying
+     field.  This is more efficient than using the stack.  */
+  if (n_var == 1)
+    {
+      rtx copy = copy_rtx (vals);
+      rtx index = GEN_INT (one_var);
+
+      /* Load constant part of vector, substitute neighboring value for
+	 varying element.  */
+      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+      neon_expand_vector_init (target, copy);
+
+      /* Insert variable.  */
+      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+      switch (mode)
+	{
+	case V8QImode:
+	  emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
+	  break;
+	case V16QImode:
+	  emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
+	  break;
+	case V4HImode:
+	  emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
+	  break;
+	case V8HImode:
+	  emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
+	  break;
+	case V2SImode:
+	  emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
+	  break;
+	case V4SImode:
+	  emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
+	  break;
+	case V2SFmode:
+	  emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
+	  break;
+	case V4SFmode:
+	  emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
+	  break;
+	case V2DImode:
+	  emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+
+  /* Construct the vector in memory one field at a time
+     and load the whole vector.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner_mode,
+				    i * GET_MODE_SIZE (inner_mode)),
+		    XVECEXP (vals, 0, i));
+  emit_move_insn (target, mem);
+}
+
+/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
+   ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
+   reported source locations are bogus.  */
+
+static void
+bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
+	      const char *err)
+{
+  HOST_WIDE_INT lane;
+
+  gcc_assert (GET_CODE (operand) == CONST_INT);
+
+  lane = INTVAL (operand);
+
+  if (lane < low || lane >= high)
+    error (err);
+}
+
+/* Bounds-check lanes.  */
+
+void
+neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  bounds_check (operand, low, high, "lane out of range");
+}
+
+/* Bounds-check constants.  */
+
+void
+neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  bounds_check (operand, low, high, "constant out of range");
+}
+
+HOST_WIDE_INT
+neon_element_bits (enum machine_mode mode)
+{
+  if (mode == DImode)
+    return GET_MODE_BITSIZE (mode);
+  else
+    return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+}
+
+
+/* Predicates for `match_operand' and `match_operator'.  */
+
+/* Return nonzero if OP is a valid Cirrus memory address pattern.  */
+int
+cirrus_memory_offset (rtx op)
+{
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return 0;
+
+  if (GET_CODE (op) == MEM)
+    {
+      rtx ind;
+
+      ind = XEXP (op, 0);
+
+      /* Match: (mem (reg)).  */
+      if (GET_CODE (ind) == REG)
+	return 1;
+
+      /* Match:
+	 (mem (plus (reg)
+	            (const))).  */
+      if (GET_CODE (ind) == PLUS
+	  && GET_CODE (XEXP (ind, 0)) == REG
+	  && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+	  && GET_CODE (XEXP (ind, 1)) == CONST_INT)
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return TRUE if OP is a valid coprocessor memory address pattern.
+   WB is true if full writeback address modes are allowed and is false
+   if limited writeback address modes (POST_INC and PRE_DEC) are
+   allowed.  */
+
+int
+arm_coproc_mem_operand (rtx op, bool wb)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return FALSE;
+
+  /* Constants are converted into offsets from labels.  */
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (GET_CODE (ind) == REG)
+    return arm_address_register_rtx_p (ind, 0);
+
+  /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
+     acceptable in any case (subject to verification by
+     arm_address_register_rtx_p).  We need WB to be true to accept
+     PRE_INC and POST_DEC.  */
+  if (GET_CODE (ind) == POST_INC
+      || GET_CODE (ind) == PRE_DEC
+      || (wb
+	  && (GET_CODE (ind) == PRE_INC
+	      || GET_CODE (ind) == POST_DEC)))
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+  if (wb
+      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
+      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
+      && GET_CODE (XEXP (ind, 1)) == PLUS
+      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
+    ind = XEXP (ind, 1);
+
+  /* Match:
+     (plus (reg)
+	   (const)).  */
+  if (GET_CODE (ind) == PLUS
+      && GET_CODE (XEXP (ind, 0)) == REG
+      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+      && GET_CODE (XEXP (ind, 1)) == CONST_INT
+      && INTVAL (XEXP (ind, 1)) > -1024
+      && INTVAL (XEXP (ind, 1)) <  1024
+      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Return TRUE if OP is a memory operand which we can load or store a vector
+   to/from. TYPE is one of the following values:
+    0 - Vector load/stor (vldr)
+    1 - Core registers (ldm)
+    2 - Element/structure loads (vld1)
+ */
+int
+neon_vector_mem_operand (rtx op, int type)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return FALSE;
+
+  /* Constants are converted into offsets from labels.  */
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (GET_CODE (ind) == REG)
+    return arm_address_register_rtx_p (ind, 0);
+
+  /* Allow post-increment with Neon registers.  */
+  if ((type != 1 && GET_CODE (ind) == POST_INC)
+      || (type == 0 && GET_CODE (ind) == PRE_DEC))
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+  /* FIXME: vld1 allows register post-modify.  */
+
+  /* Match:
+     (plus (reg)
+          (const)).  */
+  if (type == 0
+      && GET_CODE (ind) == PLUS
+      && GET_CODE (XEXP (ind, 0)) == REG
+      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+      && GET_CODE (XEXP (ind, 1)) == CONST_INT
+      && INTVAL (XEXP (ind, 1)) > -1024
+      && INTVAL (XEXP (ind, 1)) < 1016
+      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+    return TRUE;
+
+  return FALSE;
+}
+
+/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
+   type.  */
+int
+neon_struct_mem_operand (rtx op)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return FALSE;
+
+  /* Constants are converted into offsets from labels.  */
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (GET_CODE (ind) == REG)
+    return arm_address_register_rtx_p (ind, 0);
+
+  return FALSE;
+}
+
+/* Return true if X is a register that will be eliminated later on.  */
+int
+arm_eliminable_register (rtx x)
+{
+  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
+		       || REGNO (x) == ARG_POINTER_REGNUM
+		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
+			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
+}
+
+/* Return GENERAL_REGS if a scratch register required to reload x to/from
+   coprocessor registers.  Otherwise return NO_REGS.  */
+
+enum reg_class
+coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
+{
+  if (mode == HFmode)
+    {
+      if (!TARGET_NEON_FP16)
+	return GENERAL_REGS;
+      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
+	return NO_REGS;
+      return GENERAL_REGS;
+    }
+
+  if (TARGET_NEON
+      && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+          || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+      && neon_vector_mem_operand (x, 0))
+     return NO_REGS;
+
+  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
+    return NO_REGS;
+
+  return GENERAL_REGS;
+}
+
+/* Values which must be returned in the most-significant end of the return
+   register.  */
+
+static bool
+arm_return_in_msb (const_tree valtype)
+{
+  return (TARGET_AAPCS_BASED
+          && BYTES_BIG_ENDIAN
+          && (AGGREGATE_TYPE_P (valtype)
+              || TREE_CODE (valtype) == COMPLEX_TYPE));
+}
+
+/* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
+   Use by the Cirrus Maverick code which has to workaround
+   a hardware bug triggered by such instructions.  */
+static bool
+arm_memory_load_p (rtx insn)
+{
+  rtx body, lhs, rhs;;
+
+  if (insn == NULL_RTX || GET_CODE (insn) != INSN)
+    return false;
+
+  body = PATTERN (insn);
+
+  if (GET_CODE (body) != SET)
+    return false;
+
+  lhs = XEXP (body, 0);
+  rhs = XEXP (body, 1);
+
+  lhs = REG_OR_SUBREG_RTX (lhs);
+
+  /* If the destination is not a general purpose
+     register we do not have to worry.  */
+  if (GET_CODE (lhs) != REG
+      || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
+    return false;
+
+  /* As well as loads from memory we also have to react
+     to loads of invalid constants which will be turned
+     into loads from the minipool.  */
+  return (GET_CODE (rhs) == MEM
+	  || GET_CODE (rhs) == SYMBOL_REF
+	  || note_invalid_constants (insn, -1, false));
+}
+
+/* Return TRUE if INSN is a Cirrus instruction.  */
+static bool
+arm_cirrus_insn_p (rtx insn)
+{
+  enum attr_cirrus attr;
+
+  /* get_attr cannot accept USE or CLOBBER.  */
+  if (!insn
+      || GET_CODE (insn) != INSN
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return 0;
+
+  attr = get_attr_cirrus (insn);
+
+  return attr != CIRRUS_NOT;
+}
+
+/* Cirrus reorg for invalid instruction combinations.  */
+static void
+cirrus_reorg (rtx first)
+{
+  enum attr_cirrus attr;
+  rtx body = PATTERN (first);
+  rtx t;
+  int nops;
+
+  /* Any branch must be followed by 2 non Cirrus instructions.  */
+  if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
+    {
+      nops = 0;
+      t = next_nonnote_insn (first);
+
+      if (arm_cirrus_insn_p (t))
+	++ nops;
+
+      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
+	++ nops;
+
+      while (nops --)
+	emit_insn_after (gen_nop (), first);
+
+      return;
+    }
+
+  /* (float (blah)) is in parallel with a clobber.  */
+  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
+    body = XVECEXP (body, 0, 0);
+
+  if (GET_CODE (body) == SET)
+    {
+      rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
+
+      /* cfldrd, cfldr64, cfstrd, cfstr64 must
+	 be followed by a non Cirrus insn.  */
+      if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
+	{
+	  if (arm_cirrus_insn_p (next_nonnote_insn (first)))
+	    emit_insn_after (gen_nop (), first);
+
+	  return;
+	}
+      else if (arm_memory_load_p (first))
+	{
+	  unsigned int arm_regno;
+
+	  /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
+	     ldr/cfmv64hr combination where the Rd field is the same
+	     in both instructions must be split with a non Cirrus
+	     insn.  Example:
+
+	     ldr r0, blah
+	     nop
+	     cfmvsr mvf0, r0.  */
+
+	  /* Get Arm register number for ldr insn.  */
+	  if (GET_CODE (lhs) == REG)
+	    arm_regno = REGNO (lhs);
+	  else
+	    {
+	      gcc_assert (GET_CODE (rhs) == REG);
+	      arm_regno = REGNO (rhs);
+	    }
+
+	  /* Next insn.  */
+	  first = next_nonnote_insn (first);
+
+	  if (! arm_cirrus_insn_p (first))
+	    return;
+
+	  body = PATTERN (first);
+
+          /* (float (blah)) is in parallel with a clobber.  */
+          if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
+	    body = XVECEXP (body, 0, 0);
+
+	  if (GET_CODE (body) == FLOAT)
+	    body = XEXP (body, 0);
+
+	  if (get_attr_cirrus (first) == CIRRUS_MOVE
+	      && GET_CODE (XEXP (body, 1)) == REG
+	      && arm_regno == REGNO (XEXP (body, 1)))
+	    emit_insn_after (gen_nop (), first);
+
+	  return;
+	}
+    }
+
+  /* get_attr cannot accept USE or CLOBBER.  */
+  if (!first
+      || GET_CODE (first) != INSN
+      || GET_CODE (PATTERN (first)) == USE
+      || GET_CODE (PATTERN (first)) == CLOBBER)
+    return;
+
+  attr = get_attr_cirrus (first);
+
+  /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
+     must be followed by a non-coprocessor instruction.  */
+  if (attr == CIRRUS_COMPARE)
+    {
+      nops = 0;
+
+      t = next_nonnote_insn (first);
+
+      if (arm_cirrus_insn_p (t))
+	++ nops;
+
+      if (arm_cirrus_insn_p (next_nonnote_insn (t)))
+	++ nops;
+
+      while (nops --)
+	emit_insn_after (gen_nop (), first);
+
+      return;
+    }
+}
+
+/* Return TRUE if X references a SYMBOL_REF.  */
+int
+symbol_mentioned_p (rtx x)
+{
+  const char * fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return 1;
+
+  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
+     are constant offsets, not symbols.  */
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return TRUE if X references a LABEL_REF.  */
+int
+label_mentioned_p (rtx x)
+{
+  const char * fmt;
+  int i;
+
+  if (GET_CODE (x) == LABEL_REF)
+    return 1;
+
+  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
+     instruction, but they are constant offsets, not symbols.  */
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (label_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+tls_mentioned_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      return tls_mentioned_p (XEXP (x, 0));
+
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_TLS)
+	return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Must not copy any rtx that uses a pc-relative address.  */
+
+static int
+arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == UNSPEC
+      && (XINT (*x, 1) == UNSPEC_PIC_BASE
+	  || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
+    return 1;
+  return 0;
+}
+
+static bool
+arm_cannot_copy_insn_p (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
+}
+
+enum rtx_code
+minmax_code (rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+
+  switch (code)
+    {
+    case SMAX:
+      return GE;
+    case SMIN:
+      return LE;
+    case UMIN:
+      return LEU;
+    case UMAX:
+      return GEU;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return 1 if memory locations are adjacent.  */
+int
+adjacent_mem_locations (rtx a, rtx b)
+{
+  /* We don't guarantee to preserve the order of these memory refs.  */
+  if (volatile_refs_p (a) || volatile_refs_p (b))
+    return 0;
+
+  if ((GET_CODE (XEXP (a, 0)) == REG
+       || (GET_CODE (XEXP (a, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
+      && (GET_CODE (XEXP (b, 0)) == REG
+	  || (GET_CODE (XEXP (b, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
+    {
+      HOST_WIDE_INT val0 = 0, val1 = 0;
+      rtx reg0, reg1;
+      int val_diff;
+
+      if (GET_CODE (XEXP (a, 0)) == PLUS)
+        {
+	  reg0 = XEXP (XEXP (a, 0), 0);
+	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
+        }
+      else
+	reg0 = XEXP (a, 0);
+
+      if (GET_CODE (XEXP (b, 0)) == PLUS)
+        {
+	  reg1 = XEXP (XEXP (b, 0), 0);
+	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
+        }
+      else
+	reg1 = XEXP (b, 0);
+
+      /* Don't accept any offset that will require multiple
+	 instructions to handle, since this would cause the
+	 arith_adjacentmem pattern to output an overlong sequence.  */
+      if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
+	return 0;
+
+      /* Don't allow an eliminable register: register elimination can make
+	 the offset too large.  */
+      if (arm_eliminable_register (reg0))
+	return 0;
+
+      val_diff = val1 - val0;
+
+      if (arm_ld_sched)
+	{
+	  /* If the target has load delay slots, then there's no benefit
+	     to using an ldm instruction unless the offset is zero and
+	     we are optimizing for size.  */
+	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
+		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
+		  && (val_diff == 4 || val_diff == -4));
+	}
+
+      return ((REGNO (reg0) == REGNO (reg1))
+	      && (val_diff == 4 || val_diff == -4));
+    }
+
+  return 0;
+}
+
+/* Return true iff it would be profitable to turn a sequence of NOPS loads
+   or stores (depending on IS_STORE) into a load-multiple or store-multiple
+   instruction.  ADD_OFFSET is nonzero if the base address register needs
+   to be modified with an add instruction before we can use it.  */
+
+static bool
+multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
+				 int nops, HOST_WIDE_INT add_offset)
+ {
+  /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
+     if the offset isn't small enough.  The reason 2 ldrs are faster
+     is because these ARMs are able to do more than one cache access
+     in a single cycle.  The ARM9 and StrongARM have Harvard caches,
+     whilst the ARM8 has a double bandwidth cache.  This means that
+     these cores can do both an instruction fetch and a data fetch in
+     a single cycle, so the trick of calculating the address into a
+     scratch register (one of the result regs) and then doing a load
+     multiple actually becomes slower (and no smaller in code size).
+     That is the transformation
+
+ 	ldr	rd1, [rbase + offset]
+ 	ldr	rd2, [rbase + offset + 4]
+
+     to
+
+ 	add	rd1, rbase, offset
+ 	ldmia	rd1, {rd1, rd2}
+
+     produces worse code -- '3 cycles + any stalls on rd2' instead of
+     '2 cycles + any stalls on rd2'.  On ARMs with only one cache
+     access per cycle, the first sequence could never complete in less
+     than 6 cycles, whereas the ldm sequence would only take 5 and
+     would make better use of sequential accesses if not hitting the
+     cache.
+
+     We cheat here and test 'arm_ld_sched' which we currently know to
+     only be true for the ARM8, ARM9 and StrongARM.  If this ever
+     changes, then the test below needs to be reworked.  */
+  if (nops == 2 && arm_ld_sched && add_offset != 0)
+    return false;
+
+  /* XScale has load-store double instructions, but they have stricter
+     alignment requirements than load-store multiple, so we cannot
+     use them.
+
+     For XScale ldm requires 2 + NREGS cycles to complete and blocks
+     the pipeline until completion.
+
+	NREGS		CYCLES
+	  1		  3
+	  2		  4
+	  3		  5
+	  4		  6
+
+     An ldr instruction takes 1-3 cycles, but does not block the
+     pipeline.
+
+	NREGS		CYCLES
+	  1		 1-3
+	  2		 2-6
+	  3		 3-9
+	  4		 4-12
+
+     Best case ldr will always win.  However, the more ldr instructions
+     we issue, the less likely we are to be able to schedule them well.
+     Using ldr instructions also increases code size.
+
+     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
+     for counts of 3 or 4 regs.  */
+  if (nops <= 2 && arm_tune_xscale && !optimize_size)
+    return false;
+  return true;
+}
+
+/* Subroutine of load_multiple_sequence and store_multiple_sequence.
+   Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
+   an array ORDER which describes the sequence to use when accessing the
+   offsets that produces an ascending order.  In this sequence, each
+   offset must be larger by exactly 4 than the previous one.  ORDER[0]
+   must have been filled in with the lowest offset by the caller.
+   If UNSORTED_REGS is nonnull, it is an array of register numbers that
+   we use to verify that ORDER produces an ascending order of registers.
+   Return true if it was possible to construct such an order, false if
+   not.  */
+
+static bool
+compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
+		      int *unsorted_regs)
+{
+  int i;
+  for (i = 1; i < nops; i++)
+    {
+      int j;
+
+      order[i] = order[i - 1];
+      for (j = 0; j < nops; j++)
+	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
+	  {
+	    /* We must find exactly one offset that is higher than the
+	       previous one by 4.  */
+	    if (order[i] != order[i - 1])
+	      return false;
+	    order[i] = j;
+	  }
+      if (order[i] == order[i - 1])
+	return false;
+      /* The register numbers must be ascending.  */
+      if (unsorted_regs != NULL
+	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
+	return false;
+    }
+  return true;
+}
+
+/* Used to determine in a peephole whether a sequence of load
+   instructions can be changed into a load-multiple instruction.
+   NOPS is the number of separate load instructions we are examining.  The
+   first NOPS entries in OPERANDS are the destination registers, the
+   next NOPS entries are memory operands.  If this function is
+   successful, *BASE is set to the common base register of the memory
+   accesses; *LOAD_OFFSET is set to the first memory location's offset
+   from that base register.
+   REGS is an array filled in with the destination register numbers.
+   SAVED_ORDER (if nonnull), is an array filled in with an order that maps
+   insn numbers to to an ascending order of stores.  If CHECK_REGS is true,
+   the sequence of registers in REGS matches the loads from ascending memory
+   locations, and the function verifies that the register numbers are
+   themselves ascending.  If CHECK_REGS is false, the register numbers
+   are stored in the order they are found in the operands.  */
+static int
+load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
+			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
+{
+  int unsorted_regs[MAX_LDM_STM_OPS];
+  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+  int order[MAX_LDM_STM_OPS];
+  rtx base_reg_rtx = NULL;
+  int base_reg = -1;
+  int i, ldm_case;
+
+  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+     easily extended if required.  */
+  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
+
+  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
+
+  /* Loop over the operands and check that the memory references are
+     suitable (i.e. immediate offsets from the same base register).  At
+     the same time, extract the target register, and the memory
+     offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      rtx reg;
+      rtx offset;
+
+      /* Convert a subreg of a mem into the mem itself.  */
+      if (GET_CODE (operands[nops + i]) == SUBREG)
+	operands[nops + i] = alter_subreg (operands + (nops + i));
+
+      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
+
+      /* Don't reorder volatile memory references; it doesn't seem worth
+	 looking for the case where the order is ok anyway.  */
+      if (MEM_VOLATILE_P (operands[nops + i]))
+	return 0;
+
+      offset = const0_rtx;
+
+      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
+	   || (GET_CODE (reg) == SUBREG
+	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
+	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
+	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
+		   == REG)
+		  || (GET_CODE (reg) == SUBREG
+		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
+	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
+		  == CONST_INT)))
+	{
+	  if (i == 0)
+	    {
+	      base_reg = REGNO (reg);
+	      base_reg_rtx = reg;
+	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
+		return 0;
+	    }
+	  else if (base_reg != (int) REGNO (reg))
+	    /* Not addressed from the same base register.  */
+	    return 0;
+
+	  unsorted_regs[i] = (GET_CODE (operands[i]) == REG
+			      ? REGNO (operands[i])
+			      : REGNO (SUBREG_REG (operands[i])));
+
+	  /* If it isn't an integer register, or if it overwrites the
+	     base register but isn't the last insn in the list, then
+	     we can't do this.  */
+	  if (unsorted_regs[i] < 0
+	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+	      || unsorted_regs[i] > 14
+	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
+	    return 0;
+
+	  unsorted_offsets[i] = INTVAL (offset);
+	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+	    order[0] = i;
+	}
+      else
+	/* Not a suitable memory address.  */
+	return 0;
+    }
+
+  /* All the useful information has now been extracted from the
+     operands into unsorted_regs and unsorted_offsets; additionally,
+     order[0] has been set to the lowest offset in the list.  Sort
+     the offsets into order, verifying that they are adjacent, and
+     check that the register numbers are ascending.  */
+  if (!compute_offset_order (nops, unsorted_offsets, order,
+			     check_regs ? unsorted_regs : NULL))
+    return 0;
+
+  if (saved_order)
+    memcpy (saved_order, order, sizeof order);
+
+  if (base)
+    {
+      *base = base_reg;
+
+      for (i = 0; i < nops; i++)
+	regs[i] = unsorted_regs[check_regs ? order[i] : i];
+
+      *load_offset = unsorted_offsets[order[0]];
+    }
+
+  if (TARGET_THUMB1
+      && !peep2_reg_dead_p (nops, base_reg_rtx))
+    return 0;
+
+  if (unsorted_offsets[order[0]] == 0)
+    ldm_case = 1; /* ldmia */
+  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+    ldm_case = 2; /* ldmib */
+  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+    ldm_case = 3; /* ldmda */
+  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+    ldm_case = 4; /* ldmdb */
+  else if (const_ok_for_arm (unsorted_offsets[order[0]])
+	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
+    ldm_case = 5;
+  else
+    return 0;
+
+  if (!multiple_operation_profitable_p (false, nops,
+					ldm_case == 5
+					? unsorted_offsets[order[0]] : 0))
+    return 0;
+
+  return ldm_case;
+}
+
+/* Used to determine in a peephole whether a sequence of store instructions can
+   be changed into a store-multiple instruction.
+   NOPS is the number of separate store instructions we are examining.
+   NOPS_TOTAL is the total number of instructions recognized by the peephole
+   pattern.
+   The first NOPS entries in OPERANDS are the source registers, the next
+   NOPS entries are memory operands.  If this function is successful, *BASE is
+   set to the common base register of the memory accesses; *LOAD_OFFSET is set
+   to the first memory location's offset from that base register.  REGS is an
+   array filled in with the source register numbers, REG_RTXS (if nonnull) is
+   likewise filled with the corresponding rtx's.
+   SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
+   numbers to to an ascending order of stores.
+   If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
+   from ascending memory locations, and the function verifies that the register
+   numbers are themselves ascending.  If CHECK_REGS is false, the register
+   numbers are stored in the order they are found in the operands.  */
+static int
+store_multiple_sequence (rtx *operands, int nops, int nops_total,
+			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
+			 HOST_WIDE_INT *load_offset, bool check_regs)
+{
+  int unsorted_regs[MAX_LDM_STM_OPS];
+  rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
+  HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+  int order[MAX_LDM_STM_OPS];
+  int base_reg = -1;
+  rtx base_reg_rtx = NULL;
+  int i, stm_case;
+
+  /* Write back of base register is currently only supported for Thumb 1.  */
+  int base_writeback = TARGET_THUMB1;
+
+  /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
+     easily extended if required.  */
+  gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
+
+  memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
+
+  /* Loop over the operands and check that the memory references are
+     suitable (i.e. immediate offsets from the same base register).  At
+     the same time, extract the target register, and the memory
+     offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      rtx reg;
+      rtx offset;
+
+      /* Convert a subreg of a mem into the mem itself.  */
+      if (GET_CODE (operands[nops + i]) == SUBREG)
+	operands[nops + i] = alter_subreg (operands + (nops + i));
+
+      gcc_assert (GET_CODE (operands[nops + i]) == MEM);
+
+      /* Don't reorder volatile memory references; it doesn't seem worth
+	 looking for the case where the order is ok anyway.  */
+      if (MEM_VOLATILE_P (operands[nops + i]))
+	return 0;
+
+      offset = const0_rtx;
+
+      if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
+	   || (GET_CODE (reg) == SUBREG
+	       && GET_CODE (reg = SUBREG_REG (reg)) == REG))
+	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
+	      && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
+		   == REG)
+		  || (GET_CODE (reg) == SUBREG
+		      && GET_CODE (reg = SUBREG_REG (reg)) == REG))
+	      && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
+		  == CONST_INT)))
+	{
+	  unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
+				  ? operands[i] : SUBREG_REG (operands[i]));
+	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
+
+	  if (i == 0)
+	    {
+	      base_reg = REGNO (reg);
+	      base_reg_rtx = reg;
+	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
+		return 0;
+	    }
+	  else if (base_reg != (int) REGNO (reg))
+	    /* Not addressed from the same base register.  */
+	    return 0;
+
+	  /* If it isn't an integer register, then we can't do this.  */
+	  if (unsorted_regs[i] < 0
+	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+	      /* The effects are unpredictable if the base register is
+		 both updated and stored.  */
+	      || (base_writeback && unsorted_regs[i] == base_reg)
+	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
+	      || unsorted_regs[i] > 14)
+	    return 0;
+
+	  unsorted_offsets[i] = INTVAL (offset);
+	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+	    order[0] = i;
+	}
+      else
+	/* Not a suitable memory address.  */
+	return 0;
+    }
+
+  /* All the useful information has now been extracted from the
+     operands into unsorted_regs and unsorted_offsets; additionally,
+     order[0] has been set to the lowest offset in the list.  Sort
+     the offsets into order, verifying that they are adjacent, and
+     check that the register numbers are ascending.  */
+  if (!compute_offset_order (nops, unsorted_offsets, order,
+			     check_regs ? unsorted_regs : NULL))
+    return 0;
+
+  if (saved_order)
+    memcpy (saved_order, order, sizeof order);
+
+  if (base)
+    {
+      *base = base_reg;
+
+      for (i = 0; i < nops; i++)
+	{
+	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
+	  if (reg_rtxs)
+	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
+	}
+
+      *load_offset = unsorted_offsets[order[0]];
+    }
+
+  if (TARGET_THUMB1
+      && !peep2_reg_dead_p (nops_total, base_reg_rtx))
+    return 0;
+
+  if (unsorted_offsets[order[0]] == 0)
+    stm_case = 1; /* stmia */
+  else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+    stm_case = 2; /* stmib */
+  else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+    stm_case = 3; /* stmda */
+  else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+    stm_case = 4; /* stmdb */
+  else
+    return 0;
+
+  if (!multiple_operation_profitable_p (false, nops, 0))
+    return 0;
+
+  return stm_case;
+}
+
+/* Routines for use in generating RTL.  */
+
+/* Generate a load-multiple instruction.  COUNT is the number of loads in
+   the instruction; REGS and MEMS are arrays containing the operands.
+   BASEREG is the base register to be used in addressing the memory operands.
+   WBACK_OFFSET is nonzero if the instruction should update the base
+   register.  */
+
+static rtx
+arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+			 HOST_WIDE_INT wback_offset)
+{
+  int i = 0, j;
+  rtx result;
+
+  if (!multiple_operation_profitable_p (false, count, 0))
+    {
+      rtx seq;
+
+      start_sequence ();
+
+      for (i = 0; i < count; i++)
+	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
+
+      if (wback_offset != 0)
+	emit_move_insn (basereg, plus_constant (basereg, wback_offset));
+
+      seq = get_insns ();
+      end_sequence ();
+
+      return seq;
+    }
+
+  result = gen_rtx_PARALLEL (VOIDmode,
+			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+  if (wback_offset != 0)
+    {
+      XVECEXP (result, 0, 0)
+	= gen_rtx_SET (VOIDmode, basereg,
+		       plus_constant (basereg, wback_offset));
+      i = 1;
+      count++;
+    }
+
+  for (j = 0; i < count; i++, j++)
+    XVECEXP (result, 0, i)
+      = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
+
+  return result;
+}
+
+/* Generate a store-multiple instruction.  COUNT is the number of stores in
+   the instruction; REGS and MEMS are arrays containing the operands.
+   BASEREG is the base register to be used in addressing the memory operands.
+   WBACK_OFFSET is nonzero if the instruction should update the base
+   register.  */
+
+static rtx
+arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+			  HOST_WIDE_INT wback_offset)
+{
+  int i = 0, j;
+  rtx result;
+
+  if (GET_CODE (basereg) == PLUS)
+    basereg = XEXP (basereg, 0);
+
+  if (!multiple_operation_profitable_p (false, count, 0))
+    {
+      rtx seq;
+
+      start_sequence ();
+
+      for (i = 0; i < count; i++)
+	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
+
+      if (wback_offset != 0)
+	emit_move_insn (basereg, plus_constant (basereg, wback_offset));
+
+      seq = get_insns ();
+      end_sequence ();
+
+      return seq;
+    }
+
+  result = gen_rtx_PARALLEL (VOIDmode,
+			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+  if (wback_offset != 0)
+    {
+      XVECEXP (result, 0, 0)
+	= gen_rtx_SET (VOIDmode, basereg,
+		       plus_constant (basereg, wback_offset));
+      i = 1;
+      count++;
+    }
+
+  for (j = 0; i < count; i++, j++)
+    XVECEXP (result, 0, i)
+      = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
+
+  return result;
+}
+
+/* Generate either a load-multiple or a store-multiple instruction.  This
+   function can be used in situations where we can start with a single MEM
+   rtx and adjust its address upwards.
+   COUNT is the number of operations in the instruction, not counting a
+   possible update of the base register.  REGS is an array containing the
+   register operands.
+   BASEREG is the base register to be used in addressing the memory operands,
+   which are constructed from BASEMEM.
+   WRITE_BACK specifies whether the generated instruction should include an
+   update of the base register.
+   OFFSETP is used to pass an offset to and from this function; this offset
+   is not used when constructing the address (instead BASEMEM should have an
+   appropriate offset in its address), it is used only for setting
+   MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
+
+static rtx
+arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
+		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+{
+  rtx mems[MAX_LDM_STM_OPS];
+  HOST_WIDE_INT offset = *offsetp;
+  int i;
+
+  gcc_assert (count <= MAX_LDM_STM_OPS);
+
+  if (GET_CODE (basereg) == PLUS)
+    basereg = XEXP (basereg, 0);
+
+  for (i = 0; i < count; i++)
+    {
+      rtx addr = plus_constant (basereg, i * 4);
+      mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
+      offset += 4;
+    }
+
+  if (write_back)
+    *offsetp = offset;
+
+  if (is_load)
+    return arm_gen_load_multiple_1 (count, regs, mems, basereg,
+				    write_back ? 4 * count : 0);
+  else
+    return arm_gen_store_multiple_1 (count, regs, mems, basereg,
+				     write_back ? 4 * count : 0);
+}
+
+rtx
+arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
+		       rtx basemem, HOST_WIDE_INT *offsetp)
+{
+  return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
+			      offsetp);
+}
+
+rtx
+arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
+			rtx basemem, HOST_WIDE_INT *offsetp)
+{
+  return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
+			      offsetp);
+}
+
+/* Called from a peephole2 expander to turn a sequence of loads into an
+   LDM instruction.  OPERANDS are the operands found by the peephole matcher;
+   NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
+   is true if we can reorder the registers because they are used commutatively
+   subsequently.
+   Returns true iff we could generate a new instruction.  */
+
+bool
+gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
+{
+  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+  rtx mems[MAX_LDM_STM_OPS];
+  int i, j, base_reg;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT offset;
+  int write_back = FALSE;
+  int ldm_case;
+  rtx addr;
+
+  ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
+				     &base_reg, &offset, !sort_regs);
+
+  if (ldm_case == 0)
+    return false;
+
+  if (sort_regs)
+    for (i = 0; i < nops - 1; i++)
+      for (j = i + 1; j < nops; j++)
+	if (regs[i] > regs[j])
+	  {
+	    int t = regs[i];
+	    regs[i] = regs[j];
+	    regs[j] = t;
+	  }
+  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+  if (TARGET_THUMB1)
+    {
+      gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
+      gcc_assert (ldm_case == 1 || ldm_case == 5);
+      write_back = TRUE;
+    }
+
+  if (ldm_case == 5)
+    {
+      rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
+      emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
+      offset = 0;
+      if (!TARGET_THUMB1)
+	{
+	  base_reg = regs[0];
+	  base_reg_rtx = newbase;
+	}
+    }
+
+  for (i = 0; i < nops; i++)
+    {
+      addr = plus_constant (base_reg_rtx, offset + i * 4);
+      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+					      SImode, addr, 0);
+    }
+  emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
+				      write_back ? offset + i * 4 : 0));
+  return true;
+}
+
+/* Called from a peephole2 expander to turn a sequence of stores into an
+   STM instruction.  OPERANDS are the operands found by the peephole matcher;
+   NOPS indicates how many separate stores we are trying to combine.
+   Returns true iff we could generate a new instruction.  */
+
+bool
+gen_stm_seq (rtx *operands, int nops)
+{
+  int i;
+  int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+  rtx mems[MAX_LDM_STM_OPS];
+  int base_reg;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT offset;
+  int write_back = FALSE;
+  int stm_case;
+  rtx addr;
+  bool base_reg_dies;
+
+  stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
+				      mem_order, &base_reg, &offset, true);
+
+  if (stm_case == 0)
+    return false;
+
+  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+  base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
+  if (TARGET_THUMB1)
+    {
+      gcc_assert (base_reg_dies);
+      write_back = TRUE;
+    }
+
+  if (stm_case == 5)
+    {
+      gcc_assert (base_reg_dies);
+      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+      offset = 0;
+    }
+
+  addr = plus_constant (base_reg_rtx, offset);
+
+  for (i = 0; i < nops; i++)
+    {
+      addr = plus_constant (base_reg_rtx, offset + i * 4);
+      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+					      SImode, addr, 0);
+    }
+  emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
+				       write_back ? offset + i * 4 : 0));
+  return true;
+}
+
+/* Called from a peephole2 expander to turn a sequence of stores that are
+   preceded by constant loads into an STM instruction.  OPERANDS are the
+   operands found by the peephole matcher; NOPS indicates how many
+   separate stores we are trying to combine; there are 2 * NOPS
+   instructions in the peephole.
+   Returns true iff we could generate a new instruction.  */
+
+bool
+gen_const_stm_seq (rtx *operands, int nops)
+{
+  int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
+  int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+  rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
+  rtx mems[MAX_LDM_STM_OPS];
+  int base_reg;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT offset;
+  int write_back = FALSE;
+  int stm_case;
+  rtx addr;
+  bool base_reg_dies;
+  int i, j;
+  HARD_REG_SET allocated;
+
+  stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
+				      mem_order, &base_reg, &offset, false);
+
+  if (stm_case == 0)
+    return false;
+
+  memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
+
+  /* If the same register is used more than once, try to find a free
+     register.  */
+  CLEAR_HARD_REG_SET (allocated);
+  for (i = 0; i < nops; i++)
+    {
+      for (j = i + 1; j < nops; j++)
+	if (regs[i] == regs[j])
+	  {
+	    rtx t = peep2_find_free_register (0, nops * 2,
+					      TARGET_THUMB1 ? "l" : "r",
+					      SImode, &allocated);
+	    if (t == NULL_RTX)
+	      return false;
+	    reg_rtxs[i] = t;
+	    regs[i] = REGNO (t);
+	  }
+    }
+
+  /* Compute an ordering that maps the register numbers to an ascending
+     sequence.  */
+  reg_order[0] = 0;
+  for (i = 0; i < nops; i++)
+    if (regs[i] < regs[reg_order[0]])
+      reg_order[0] = i;
+
+  for (i = 1; i < nops; i++)
+    {
+      int this_order = reg_order[i - 1];
+      for (j = 0; j < nops; j++)
+	if (regs[j] > regs[reg_order[i - 1]]
+	    && (this_order == reg_order[i - 1]
+		|| regs[j] < regs[this_order]))
+	  this_order = j;
+      reg_order[i] = this_order;
+    }
+
+  /* Ensure that registers that must be live after the instruction end
+     up with the correct value.  */
+  for (i = 0; i < nops; i++)
+    {
+      int this_order = reg_order[i];
+      if ((this_order != mem_order[i]
+	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
+	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
+	return false;
+    }
+
+  /* Load the constants.  */
+  for (i = 0; i < nops; i++)
+    {
+      rtx op = operands[2 * nops + mem_order[i]];
+      sorted_regs[i] = regs[reg_order[i]];
+      emit_move_insn (reg_rtxs[reg_order[i]], op);
+    }
+
+  base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+  base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
+  if (TARGET_THUMB1)
+    {
+      gcc_assert (base_reg_dies);
+      write_back = TRUE;
+    }
+
+  if (stm_case == 5)
+    {
+      gcc_assert (base_reg_dies);
+      emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+      offset = 0;
+    }
+
+  addr = plus_constant (base_reg_rtx, offset);
+
+  for (i = 0; i < nops; i++)
+    {
+      addr = plus_constant (base_reg_rtx, offset + i * 4);
+      mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+					      SImode, addr, 0);
+    }
+  emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
+				       write_back ? offset + i * 4 : 0));
+  return true;
+}
+
+int
+arm_gen_movmemqi (rtx *operands)
+{
+  HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
+  HOST_WIDE_INT srcoffset, dstoffset;
+  int i;
+  rtx src, dst, srcbase, dstbase;
+  rtx part_bytes_reg = NULL;
+  rtx mem;
+
+  if (GET_CODE (operands[2]) != CONST_INT
+      || GET_CODE (operands[3]) != CONST_INT
+      || INTVAL (operands[2]) > 64
+      || INTVAL (operands[3]) & 3)
+    return 0;
+
+  dstbase = operands[0];
+  srcbase = operands[1];
+
+  dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
+  src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
+
+  in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
+  out_words_to_go = INTVAL (operands[2]) / 4;
+  last_bytes = INTVAL (operands[2]) & 3;
+  dstoffset = srcoffset = 0;
+
+  if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
+    part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
+
+  for (i = 0; in_words_to_go >= 2; i+=4)
+    {
+      if (in_words_to_go > 4)
+	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
+					  TRUE, srcbase, &srcoffset));
+      else
+	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
+					  src, FALSE, srcbase,
+					  &srcoffset));
+
+      if (out_words_to_go)
+	{
+	  if (out_words_to_go > 4)
+	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
+					       TRUE, dstbase, &dstoffset));
+	  else if (out_words_to_go != 1)
+	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
+					       out_words_to_go, dst,
+					       (last_bytes == 0
+						? FALSE : TRUE),
+					       dstbase, &dstoffset));
+	  else
+	    {
+	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
+	      emit_move_insn (mem, gen_rtx_REG (SImode, 0));
+	      if (last_bytes != 0)
+		{
+		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
+		  dstoffset += 4;
+		}
+	    }
+	}
+
+      in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
+      out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
+    }
+
+  /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
+  if (out_words_to_go)
+    {
+      rtx sreg;
+
+      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
+      sreg = copy_to_reg (mem);
+
+      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
+      emit_move_insn (mem, sreg);
+      in_words_to_go--;
+
+      gcc_assert (!in_words_to_go);	/* Sanity check */
+    }
+
+  if (in_words_to_go)
+    {
+      gcc_assert (in_words_to_go > 0);
+
+      mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
+      part_bytes_reg = copy_to_mode_reg (SImode, mem);
+    }
+
+  gcc_assert (!last_bytes || part_bytes_reg);
+
+  if (BYTES_BIG_ENDIAN && last_bytes)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+
+      /* The bytes we want are in the top end of the word.  */
+      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
+			      GEN_INT (8 * (4 - last_bytes))));
+      part_bytes_reg = tmp;
+
+      while (last_bytes)
+	{
+	  mem = adjust_automodify_address (dstbase, QImode,
+					   plus_constant (dst, last_bytes - 1),
+					   dstoffset + last_bytes - 1);
+	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
+
+	  if (--last_bytes)
+	    {
+	      tmp = gen_reg_rtx (SImode);
+	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
+	      part_bytes_reg = tmp;
+	    }
+	}
+
+    }
+  else
+    {
+      if (last_bytes > 1)
+	{
+	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
+	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
+	  last_bytes -= 2;
+	  if (last_bytes)
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);
+	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
+	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
+	      part_bytes_reg = tmp;
+	      dstoffset += 2;
+	    }
+	}
+
+      if (last_bytes)
+	{
+	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
+	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
+	}
+    }
+
+  return 1;
+}
+
+/* Select a dominance comparison mode if possible for a test of the general
+   form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
+   COND_OR == DOM_CC_X_AND_Y => (X && Y)
+   COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
+   COND_OR == DOM_CC_X_OR_Y => (X || Y)
+   In all cases OP will be either EQ or NE, but we don't need to know which
+   here.  If we are unable to support a dominance comparison we return
+   CC mode.  This will then fail to match for the RTL expressions that
+   generate this call.  */
+enum machine_mode
+arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
+{
+  enum rtx_code cond1, cond2;
+  int swapped = 0;
+
+  /* Currently we will probably get the wrong result if the individual
+     comparisons are not simple.  This also ensures that it is safe to
+     reverse a comparison if necessary.  */
+  if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
+       != CCmode)
+      || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
+	  != CCmode))
+    return CCmode;
+
+  /* The if_then_else variant of this tests the second condition if the
+     first passes, but is true if the first fails.  Reverse the first
+     condition to get a true "inclusive-or" expression.  */
+  if (cond_or == DOM_CC_NX_OR_Y)
+    cond1 = reverse_condition (cond1);
+
+  /* If the comparisons are not equal, and one doesn't dominate the other,
+     then we can't do this.  */
+  if (cond1 != cond2
+      && !comparison_dominates_p (cond1, cond2)
+      && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
+    return CCmode;
+
+  if (swapped)
+    {
+      enum rtx_code temp = cond1;
+      cond1 = cond2;
+      cond2 = temp;
+    }
+
+  switch (cond1)
+    {
+    case EQ:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DEQmode;
+
+      switch (cond2)
+	{
+	case EQ: return CC_DEQmode;
+	case LE: return CC_DLEmode;
+	case LEU: return CC_DLEUmode;
+	case GE: return CC_DGEmode;
+	case GEU: return CC_DGEUmode;
+	default: gcc_unreachable ();
+	}
+
+    case LT:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DLTmode;
+
+      switch (cond2)
+	{
+	case  LT:
+	    return CC_DLTmode;
+	case LE:
+	  return CC_DLEmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    case GT:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DGTmode;
+
+      switch (cond2)
+	{
+	case GT:
+	  return CC_DGTmode;
+	case GE:
+	  return CC_DGEmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    case LTU:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DLTUmode;
+
+      switch (cond2)
+	{
+	case LTU:
+	  return CC_DLTUmode;
+	case LEU:
+	  return CC_DLEUmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    case GTU:
+      if (cond_or == DOM_CC_X_AND_Y)
+	return CC_DGTUmode;
+
+      switch (cond2)
+	{
+	case GTU:
+	  return CC_DGTUmode;
+	case GEU:
+	  return CC_DGEUmode;
+	case NE:
+	  return CC_DNEmode;
+	default:
+	  gcc_unreachable ();
+	}
+
+    /* The remaining cases only occur when both comparisons are the
+       same.  */
+    case NE:
+      gcc_assert (cond1 == cond2);
+      return CC_DNEmode;
+
+    case LE:
+      gcc_assert (cond1 == cond2);
+      return CC_DLEmode;
+
+    case GE:
+      gcc_assert (cond1 == cond2);
+      return CC_DGEmode;
+
+    case LEU:
+      gcc_assert (cond1 == cond2);
+      return CC_DLEUmode;
+
+    case GEU:
+      gcc_assert (cond1 == cond2);
+      return CC_DGEUmode;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+enum machine_mode
+arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
+{
+  /* All floating point compares return CCFP if it is an equality
+     comparison, and CCFPE otherwise.  */
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	case UNORDERED:
+	case ORDERED:
+	case UNLT:
+	case UNLE:
+	case UNGT:
+	case UNGE:
+	case UNEQ:
+	case LTGT:
+	  return CCFPmode;
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
+	    return CCFPmode;
+	  return CCFPEmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* A compare with a shifted operand.  Because of canonicalization, the
+     comparison will have to be swapped when we emit the assembler.  */
+  if (GET_MODE (y) == SImode 
+      && (REG_P (y) || (GET_CODE (y) == SUBREG))
+      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
+	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
+	  || GET_CODE (x) == ROTATERT))
+    return CC_SWPmode;
+
+  /* This operation is performed swapped, but since we only rely on the Z
+     flag we don't need an additional mode.  */
+  if (GET_MODE (y) == SImode 
+      && (REG_P (y) || (GET_CODE (y) == SUBREG))
+      && GET_CODE (x) == NEG
+      && (op ==	EQ || op == NE))
+    return CC_Zmode;
+
+  /* This is a special case that is used by combine to allow a
+     comparison of a shifted byte load to be split into a zero-extend
+     followed by a comparison of the shifted integer (only valid for
+     equalities and unsigned inequalities).  */
+  if (GET_MODE (x) == SImode
+      && GET_CODE (x) == ASHIFT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
+      && GET_CODE (XEXP (x, 0)) == SUBREG
+      && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
+      && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
+      && (op == EQ || op == NE
+	  || op == GEU || op == GTU || op == LTU || op == LEU)
+      && GET_CODE (y) == CONST_INT)
+    return CC_Zmode;
+
+  /* A construct for a conditional compare, if the false arm contains
+     0, then both conditions must be true, otherwise either condition
+     must be true.  Not all conditions are possible, so CCmode is
+     returned if it can't be done.  */
+  if (GET_CODE (x) == IF_THEN_ELSE
+      && (XEXP (x, 2) == const0_rtx
+	  || XEXP (x, 2) == const1_rtx)
+      && COMPARISON_P (XEXP (x, 0))
+      && COMPARISON_P (XEXP (x, 1)))
+    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
+					 INTVAL (XEXP (x, 2)));
+
+  /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
+  if (GET_CODE (x) == AND
+      && (op == EQ || op == NE)
+      && COMPARISON_P (XEXP (x, 0))
+      && COMPARISON_P (XEXP (x, 1)))
+    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
+					 DOM_CC_X_AND_Y);
+
+  if (GET_CODE (x) == IOR
+      && (op == EQ || op == NE)
+      && COMPARISON_P (XEXP (x, 0))
+      && COMPARISON_P (XEXP (x, 1)))
+    return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
+					 DOM_CC_X_OR_Y);
+
+  /* An operation (on Thumb) where we want to test for a single bit.
+     This is done by shifting that bit up into the top bit of a
+     scratch register; we can then branch on the sign bit.  */
+  if (TARGET_THUMB1
+      && GET_MODE (x) == SImode
+      && (op == EQ || op == NE)
+      && GET_CODE (x) == ZERO_EXTRACT
+      && XEXP (x, 1) == const1_rtx)
+    return CC_Nmode;
+
+  /* An operation that sets the condition codes as a side-effect, the
+     V flag is not set correctly, so we can only use comparisons where
+     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
+     instead.)  */
+  /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
+  if (GET_MODE (x) == SImode
+      && y == const0_rtx
+      && (op == EQ || op == NE || op == LT || op == GE)
+      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
+	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
+	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
+	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
+	  || GET_CODE (x) == LSHIFTRT
+	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
+	  || GET_CODE (x) == ROTATERT
+	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
+    return CC_NOOVmode;
+
+  if (GET_MODE (x) == QImode && (op == EQ || op == NE))
+    return CC_Zmode;
+
+  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
+      && GET_CODE (x) == PLUS
+      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
+    return CC_Cmode;
+
+  if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
+    {
+      /* To keep things simple, always use the Cirrus cfcmp64 if it is
+	 available.  */
+      if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
+	return CCmode;
+
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	  /* A DImode comparison against zero can be implemented by
+	     or'ing the two halves together.  */
+	  if (y == const0_rtx)
+	    return CC_Zmode;
+
+	  /* We can do an equality test in three Thumb instructions.  */
+	  if (!TARGET_ARM)
+	    return CC_Zmode;
+
+	  /* FALLTHROUGH */
+
+	case LTU:
+	case LEU:
+	case GTU:
+	case GEU:
+	  /* DImode unsigned comparisons can be implemented by cmp +
+	     cmpeq without a scratch register.  Not worth doing in
+	     Thumb-2.  */
+	  if (TARGET_ARM)
+	    return CC_CZmode;
+
+	  /* FALLTHROUGH */
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  /* DImode signed and unsigned comparisons can be implemented
+	     by cmp + sbcs with a scratch register, but that does not
+	     set the Z flag - we must reverse GT/LE/GTU/LEU.  */
+	  gcc_assert (op != EQ && op != NE);
+	  return CC_NCVmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  return CCmode;
+}
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for register 0 in the proper mode.  FP means this is a
+   floating point compare: I don't think that it is needed on the arm.  */
+rtx
+arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
+{
+  enum machine_mode mode;
+  rtx cc_reg;
+  int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
+
+  /* We might have X as a constant, Y as a register because of the predicates
+     used for cmpdi.  If so, force X to a register here.  */
+  if (dimode_comparison && !REG_P (x))
+    x = force_reg (DImode, x);
+
+  mode = SELECT_CC_MODE (code, x, y);
+  cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+
+  if (dimode_comparison
+      && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
+      && mode != CC_CZmode)
+    {
+      rtx clobber, set;
+
+      /* To compare two non-zero values for equality, XOR them and
+	 then compare against zero.  Not used for ARM mode; there
+	 CC_CZmode is cheaper.  */
+      if (mode == CC_Zmode && y != const0_rtx)
+	{
+	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
+	  y = const0_rtx;
+	}
+      /* A scratch register is required.  */
+      clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
+      set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+    }
+  else
+    emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
+
+  return cc_reg;
+}
+
+/* Generate a sequence of insns that will generate the correct return
+   address mask depending on the physical architecture that the program
+   is running on.  */
+rtx
+arm_gen_return_addr_mask (void)
+{
+  rtx reg = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_return_addr_mask (reg));
+  return reg;
+}
+
+void
+arm_reload_in_hi (rtx *operands)
+{
+  rtx ref = operands[1];
+  rtx base, scratch;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (ref) == SUBREG)
+    {
+      offset = SUBREG_BYTE (ref);
+      ref = SUBREG_REG (ref);
+    }
+
+  if (GET_CODE (ref) == REG)
+    {
+      /* We have a pseudo which has been spilt onto the stack; there
+	 are two cases here: the first where there is a simple
+	 stack-slot replacement and a second where the stack-slot is
+	 out of range, or is used as a subreg.  */
+      if (reg_equiv_mem[REGNO (ref)])
+	{
+	  ref = reg_equiv_mem[REGNO (ref)];
+	  base = find_replacement (&XEXP (ref, 0));
+	}
+      else
+	/* The slot is out of range, or was dressed up in a SUBREG.  */
+	base = reg_equiv_address[REGNO (ref)];
+    }
+  else
+    base = find_replacement (&XEXP (ref, 0));
+
+  /* Handle the case where the address is too complex to be offset by 1.  */
+  if (GET_CODE (base) == MINUS
+      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
+    {
+      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+      emit_set_insn (base_plus, base);
+      base = base_plus;
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      /* The addend must be CONST_INT, or we would have dealt with it above.  */
+      HOST_WIDE_INT hi, lo;
+
+      offset += INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+
+      /* Rework the address into a legal sequence of insns.  */
+      /* Valid range for lo is -4095 -> 4095 */
+      lo = (offset >= 0
+	    ? (offset & 0xfff)
+	    : -((-offset) & 0xfff));
+
+      /* Corner case, if lo is the max offset then we would be out of range
+	 once we have added the additional 1 below, so bump the msb into the
+	 pre-loading insn(s).  */
+      if (lo == 4095)
+	lo &= 0x7ff;
+
+      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
+	     ^ (HOST_WIDE_INT) 0x80000000)
+	    - (HOST_WIDE_INT) 0x80000000);
+
+      gcc_assert (hi + lo == offset);
+
+      if (hi != 0)
+	{
+	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+	  /* Get the base address; addsi3 knows how to handle constants
+	     that require more than one insn.  */
+	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
+	  base = base_plus;
+	  offset = lo;
+	}
+    }
+
+  /* Operands[2] may overlap operands[0] (though it won't overlap
+     operands[1]), that's why we asked for a DImode reg -- so we can
+     use the bit that does not overlap.  */
+  if (REGNO (operands[2]) == REGNO (operands[0]))
+    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+  else
+    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
+
+  emit_insn (gen_zero_extendqisi2 (scratch,
+				   gen_rtx_MEM (QImode,
+						plus_constant (base,
+							       offset))));
+  emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
+				   gen_rtx_MEM (QImode,
+						plus_constant (base,
+							       offset + 1))));
+  if (!BYTES_BIG_ENDIAN)
+    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
+		   gen_rtx_IOR (SImode,
+				gen_rtx_ASHIFT
+				(SImode,
+				 gen_rtx_SUBREG (SImode, operands[0], 0),
+				 GEN_INT (8)),
+				scratch));
+  else
+    emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
+		   gen_rtx_IOR (SImode,
+				gen_rtx_ASHIFT (SImode, scratch,
+						GEN_INT (8)),
+				gen_rtx_SUBREG (SImode, operands[0], 0)));
+}
+
+/* Handle storing a half-word to memory during reload by synthesizing as two
+   byte stores.  Take care not to clobber the input values until after we
+   have moved them somewhere safe.  This code assumes that if the DImode
+   scratch in operands[2] overlaps either the input value or output address
+   in some way, then that value must die in this insn (we absolutely need
+   two scratch registers for some corner cases).  */
+void
+arm_reload_out_hi (rtx *operands)
+{
+  rtx ref = operands[0];
+  rtx outval = operands[1];
+  rtx base, scratch;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (ref) == SUBREG)
+    {
+      offset = SUBREG_BYTE (ref);
+      ref = SUBREG_REG (ref);
+    }
+
+  if (GET_CODE (ref) == REG)
+    {
+      /* We have a pseudo which has been spilt onto the stack; there
+	 are two cases here: the first where there is a simple
+	 stack-slot replacement and a second where the stack-slot is
+	 out of range, or is used as a subreg.  */
+      if (reg_equiv_mem[REGNO (ref)])
+	{
+	  ref = reg_equiv_mem[REGNO (ref)];
+	  base = find_replacement (&XEXP (ref, 0));
+	}
+      else
+	/* The slot is out of range, or was dressed up in a SUBREG.  */
+	base = reg_equiv_address[REGNO (ref)];
+    }
+  else
+    base = find_replacement (&XEXP (ref, 0));
+
+  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
+
+  /* Handle the case where the address is too complex to be offset by 1.  */
+  if (GET_CODE (base) == MINUS
+      || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
+    {
+      rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+      /* Be careful not to destroy OUTVAL.  */
+      if (reg_overlap_mentioned_p (base_plus, outval))
+	{
+	  /* Updating base_plus might destroy outval, see if we can
+	     swap the scratch and base_plus.  */
+	  if (!reg_overlap_mentioned_p (scratch, outval))
+	    {
+	      rtx tmp = scratch;
+	      scratch = base_plus;
+	      base_plus = tmp;
+	    }
+	  else
+	    {
+	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
+
+	      /* Be conservative and copy OUTVAL into the scratch now,
+		 this should only be necessary if outval is a subreg
+		 of something larger than a word.  */
+	      /* XXX Might this clobber base?  I can't see how it can,
+		 since scratch is known to overlap with OUTVAL, and
+		 must be wider than a word.  */
+	      emit_insn (gen_movhi (scratch_hi, outval));
+	      outval = scratch_hi;
+	    }
+	}
+
+      emit_set_insn (base_plus, base);
+      base = base_plus;
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      /* The addend must be CONST_INT, or we would have dealt with it above.  */
+      HOST_WIDE_INT hi, lo;
+
+      offset += INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+
+      /* Rework the address into a legal sequence of insns.  */
+      /* Valid range for lo is -4095 -> 4095 */
+      lo = (offset >= 0
+	    ? (offset & 0xfff)
+	    : -((-offset) & 0xfff));
+
+      /* Corner case, if lo is the max offset then we would be out of range
+	 once we have added the additional 1 below, so bump the msb into the
+	 pre-loading insn(s).  */
+      if (lo == 4095)
+	lo &= 0x7ff;
+
+      hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
+	     ^ (HOST_WIDE_INT) 0x80000000)
+	    - (HOST_WIDE_INT) 0x80000000);
+
+      gcc_assert (hi + lo == offset);
+
+      if (hi != 0)
+	{
+	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+
+	  /* Be careful not to destroy OUTVAL.  */
+	  if (reg_overlap_mentioned_p (base_plus, outval))
+	    {
+	      /* Updating base_plus might destroy outval, see if we
+		 can swap the scratch and base_plus.  */
+	      if (!reg_overlap_mentioned_p (scratch, outval))
+		{
+		  rtx tmp = scratch;
+		  scratch = base_plus;
+		  base_plus = tmp;
+		}
+	      else
+		{
+		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
+
+		  /* Be conservative and copy outval into scratch now,
+		     this should only be necessary if outval is a
+		     subreg of something larger than a word.  */
+		  /* XXX Might this clobber base?  I can't see how it
+		     can, since scratch is known to overlap with
+		     outval.  */
+		  emit_insn (gen_movhi (scratch_hi, outval));
+		  outval = scratch_hi;
+		}
+	    }
+
+	  /* Get the base address; addsi3 knows how to handle constants
+	     that require more than one insn.  */
+	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
+	  base = base_plus;
+	  offset = lo;
+	}
+    }
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
+					 plus_constant (base, offset + 1)),
+			    gen_lowpart (QImode, outval)));
+      emit_insn (gen_lshrsi3 (scratch,
+			      gen_rtx_SUBREG (SImode, outval, 0),
+			      GEN_INT (8)));
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
+			    gen_lowpart (QImode, scratch)));
+    }
+  else
+    {
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
+			    gen_lowpart (QImode, outval)));
+      emit_insn (gen_lshrsi3 (scratch,
+			      gen_rtx_SUBREG (SImode, outval, 0),
+			      GEN_INT (8)));
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode,
+					 plus_constant (base, offset + 1)),
+			    gen_lowpart (QImode, scratch)));
+    }
+}
+
+/* Return true if a type must be passed in memory. For AAPCS, small aggregates
+   (padded to the size of a word) should be passed in a register.  */
+
+static bool
+arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (TARGET_AAPCS_BASED)
+    return must_pass_in_stack_var_size (mode, type);
+  else
+    return must_pass_in_stack_var_size_or_pad (mode, type);
+}
+
+
+/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
+   Return true if an argument passed on the stack should be padded upwards,
+   i.e. if the least-significant byte has useful data.
+   For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
+   aggregate types are placed in the lowest memory address.  */
+
+bool
+arm_pad_arg_upward (enum machine_mode mode, const_tree type)
+{
+  if (!TARGET_AAPCS_BASED)
+    return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
+
+  if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
+    return false;
+
+  return true;
+}
+
+
+/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
+   For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
+   byte of the register has useful data, and return the opposite if the
+   most significant byte does.
+   For AAPCS, small aggregates and small complex types are always padded
+   upwards.  */
+
+bool
+arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
+                    tree type, int first ATTRIBUTE_UNUSED)
+{
+  if (TARGET_AAPCS_BASED
+      && BYTES_BIG_ENDIAN
+      && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
+      && int_size_in_bytes (type) <= 4)
+    return true;
+
+  /* Otherwise, use default padding.  */
+  return !BYTES_BIG_ENDIAN;
+}
+
+
+/* Print a symbolic form of X to the debug file, F.  */
+static void
+arm_print_value (FILE *f, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      return;
+
+    case CONST_DOUBLE:
+      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
+      return;
+
+    case CONST_VECTOR:
+      {
+	int i;
+
+	fprintf (f, "<");
+	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
+	  {
+	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
+	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
+	      fputc (',', f);
+	  }
+	fprintf (f, ">");
+      }
+      return;
+
+    case CONST_STRING:
+      fprintf (f, "\"%s\"", XSTR (x, 0));
+      return;
+
+    case SYMBOL_REF:
+      fprintf (f, "`%s'", XSTR (x, 0));
+      return;
+
+    case LABEL_REF:
+      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
+      return;
+
+    case CONST:
+      arm_print_value (f, XEXP (x, 0));
+      return;
+
+    case PLUS:
+      arm_print_value (f, XEXP (x, 0));
+      fprintf (f, "+");
+      arm_print_value (f, XEXP (x, 1));
+      return;
+
+    case PC:
+      fprintf (f, "pc");
+      return;
+
+    default:
+      fprintf (f, "????");
+      return;
+    }
+}
+
+/* Routines for manipulation of the constant pool.  */
+
+/* Arm instructions cannot load a large constant directly into a
+   register; they have to come from a pc relative load.  The constant
+   must therefore be placed in the addressable range of the pc
+   relative load.  Depending on the precise pc relative load
+   instruction the range is somewhere between 256 bytes and 4k.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow
+   things down and make the code larger.
+
+   Normally we can hide the table after an existing unconditional
+   branch so that there is no interruption of the flow, but in the
+   worst case the code looks like this:
+
+	ldr	rn, L1
+	...
+	b	L2
+	align
+	L1:	.long value
+	L2:
+	...
+
+	ldr	rn, L3
+	...
+	b	L4
+	align
+	L3:	.long value
+	L4:
+	...
+
+   We fix this by performing a scan after scheduling, which notices
+   which instructions need to have their operands fetched from the
+   constant table and builds the table.
+
+   The algorithm starts by building a table of all the constants that
+   need fixing up and all the natural barriers in the function (places
+   where a constant table can be dropped without breaking the flow).
+   For each fixup we note how far the pc-relative replacement will be
+   able to reach and the offset of the instruction into the function.
+
+   Having built the table we then group the fixes together to form
+   tables that are as large as possible (subject to addressing
+   constraints) and emit each table of constants after the last
+   barrier that is within range of all the instructions in the group.
+   If a group does not contain a barrier, then we forcibly create one
+   by inserting a jump instruction into the flow.  Once the table has
+   been inserted, the insns are then modified to reference the
+   relevant entry in the pool.
+
+   Possible enhancements to the algorithm (not implemented) are:
+
+   1) For some processors and object formats, there may be benefit in
+   aligning the pools to the start of cache lines; this alignment
+   would need to be taken into account when calculating addressability
+   of a pool.  */
+
+/* These typedefs are located at the start of this file, so that
+   they can be used in the prototypes there.  This comment is to
+   remind readers of that fact so that the following structures
+   can be understood more easily.
+
+     typedef struct minipool_node    Mnode;
+     typedef struct minipool_fixup   Mfix;  */
+
+struct minipool_node
+{
+  /* Doubly linked chain of entries.  */
+  Mnode * next;
+  Mnode * prev;
+  /* The maximum offset into the code that this entry can be placed.  While
+     pushing fixes for forward references, all entries are sorted in order
+     of increasing max_address.  */
+  HOST_WIDE_INT max_address;
+  /* Similarly for an entry inserted for a backwards ref.  */
+  HOST_WIDE_INT min_address;
+  /* The number of fixes referencing this entry.  This can become zero
+     if we "unpush" an entry.  In this case we ignore the entry when we
+     come to emit the code.  */
+  int refcount;
+  /* The offset from the start of the minipool.  */
+  HOST_WIDE_INT offset;
+  /* The value in table.  */
+  rtx value;
+  /* The mode of value.  */
+  enum machine_mode mode;
+  /* The size of the value.  With iWMMXt enabled
+     sizes > 4 also imply an alignment of 8-bytes.  */
+  int fix_size;
+};
+
+struct minipool_fixup
+{
+  Mfix *            next;
+  rtx               insn;
+  HOST_WIDE_INT     address;
+  rtx *             loc;
+  enum machine_mode mode;
+  int               fix_size;
+  rtx               value;
+  Mnode *           minipool;
+  HOST_WIDE_INT     forwards;
+  HOST_WIDE_INT     backwards;
+};
+
+/* Fixes less than a word need padding out to a word boundary.  */
+#define MINIPOOL_FIX_SIZE(mode) \
+  (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
+
+static Mnode *	minipool_vector_head;
+static Mnode *	minipool_vector_tail;
+static rtx	minipool_vector_label;
+static int	minipool_pad;
+
+/* The linked list of all minipool fixes required for this function.  */
+Mfix * 		minipool_fix_head;
+Mfix * 		minipool_fix_tail;
+/* The fix entry for the current minipool, once it has been placed.  */
+Mfix *		minipool_barrier;
+
+/* Determines if INSN is the start of a jump table.  Returns the end
+   of the TABLE or NULL_RTX.  */
+static rtx
+is_jump_table (rtx insn)
+{
+  rtx table;
+
+  if (GET_CODE (insn) == JUMP_INSN
+      && JUMP_LABEL (insn) != NULL
+      && ((table = next_real_insn (JUMP_LABEL (insn)))
+	  == next_real_insn (insn))
+      && table != NULL
+      && GET_CODE (table) == JUMP_INSN
+      && (GET_CODE (PATTERN (table)) == ADDR_VEC
+	  || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
+    return table;
+
+  return NULL_RTX;
+}
+
+#ifndef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+#endif
+
+static HOST_WIDE_INT
+get_jump_table_size (rtx insn)
+{
+  /* ADDR_VECs only take room if read-only data does into the text
+     section.  */
+  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
+    {
+      rtx body = PATTERN (insn);
+      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
+      HOST_WIDE_INT size;
+      HOST_WIDE_INT modesize;
+
+      modesize = GET_MODE_SIZE (GET_MODE (body));
+      size = modesize * XVECLEN (body, elt);
+      switch (modesize)
+	{
+	case 1:
+	  /* Round up size  of TBB table to a halfword boundary.  */
+	  size = (size + 1) & ~(HOST_WIDE_INT)1;
+	  break;
+	case 2:
+	  /* No padding necessary for TBH.  */
+	  break;
+	case 4:
+	  /* Add two bytes for alignment on Thumb.  */
+	  if (TARGET_THUMB)
+	    size += 2;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return size;
+    }
+
+  return 0;
+}
+
+/* Move a minipool fix MP from its current location to before MAX_MP.
+   If MAX_MP is NULL, then MP doesn't need moving, but the addressing
+   constraints may need updating.  */
+static Mnode *
+move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
+			       HOST_WIDE_INT max_address)
+{
+  /* The code below assumes these are different.  */
+  gcc_assert (mp != max_mp);
+
+  if (max_mp == NULL)
+    {
+      if (max_address < mp->max_address)
+	mp->max_address = max_address;
+    }
+  else
+    {
+      if (max_address > max_mp->max_address - mp->fix_size)
+	mp->max_address = max_mp->max_address - mp->fix_size;
+      else
+	mp->max_address = max_address;
+
+      /* Unlink MP from its current position.  Since max_mp is non-null,
+       mp->prev must be non-null.  */
+      mp->prev->next = mp->next;
+      if (mp->next != NULL)
+	mp->next->prev = mp->prev;
+      else
+	minipool_vector_tail = mp->prev;
+
+      /* Re-insert it before MAX_MP.  */
+      mp->next = max_mp;
+      mp->prev = max_mp->prev;
+      max_mp->prev = mp;
+
+      if (mp->prev != NULL)
+	mp->prev->next = mp;
+      else
+	minipool_vector_head = mp;
+    }
+
+  /* Save the new entry.  */
+  max_mp = mp;
+
+  /* Scan over the preceding entries and adjust their addresses as
+     required.  */
+  while (mp->prev != NULL
+	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
+    {
+      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
+      mp = mp->prev;
+    }
+
+  return max_mp;
+}
+
+/* Add a constant to the minipool for a forward reference.  Returns the
+   node added or NULL if the constant will not fit in this pool.  */
+static Mnode *
+add_minipool_forward_ref (Mfix *fix)
+{
+  /* If set, max_mp is the first pool_entry that has a lower
+     constraint than the one we are trying to add.  */
+  Mnode *       max_mp = NULL;
+  HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
+  Mnode *       mp;
+
+  /* If the minipool starts before the end of FIX->INSN then this FIX
+     can not be placed into the current pool.  Furthermore, adding the
+     new constant pool entry may cause the pool to start FIX_SIZE bytes
+     earlier.  */
+  if (minipool_vector_head &&
+      (fix->address + get_attr_length (fix->insn)
+       >= minipool_vector_head->max_address - fix->fix_size))
+    return NULL;
+
+  /* Scan the pool to see if a constant with the same value has
+     already been added.  While we are doing this, also note the
+     location where we must insert the constant if it doesn't already
+     exist.  */
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+	  && fix->mode == mp->mode
+	  && (GET_CODE (fix->value) != CODE_LABEL
+	      || (CODE_LABEL_NUMBER (fix->value)
+		  == CODE_LABEL_NUMBER (mp->value)))
+	  && rtx_equal_p (fix->value, mp->value))
+	{
+	  /* More than one fix references this entry.  */
+	  mp->refcount++;
+	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
+	}
+
+      /* Note the insertion point if necessary.  */
+      if (max_mp == NULL
+	  && mp->max_address > max_address)
+	max_mp = mp;
+
+      /* If we are inserting an 8-bytes aligned quantity and
+	 we have not already found an insertion point, then
+	 make sure that all such 8-byte aligned quantities are
+	 placed at the start of the pool.  */
+      if (ARM_DOUBLEWORD_ALIGN
+	  && max_mp == NULL
+	  && fix->fix_size >= 8
+	  && mp->fix_size < 8)
+	{
+	  max_mp = mp;
+	  max_address = mp->max_address;
+	}
+    }
+
+  /* The value is not currently in the minipool, so we need to create
+     a new entry for it.  If MAX_MP is NULL, the entry will be put on
+     the end of the list since the placement is less constrained than
+     any existing entry.  Otherwise, we insert the new fix before
+     MAX_MP and, if necessary, adjust the constraints on the other
+     entries.  */
+  mp = XNEW (Mnode);
+  mp->fix_size = fix->fix_size;
+  mp->mode = fix->mode;
+  mp->value = fix->value;
+  mp->refcount = 1;
+  /* Not yet required for a backwards ref.  */
+  mp->min_address = -65536;
+
+  if (max_mp == NULL)
+    {
+      mp->max_address = max_address;
+      mp->next = NULL;
+      mp->prev = minipool_vector_tail;
+
+      if (mp->prev == NULL)
+	{
+	  minipool_vector_head = mp;
+	  minipool_vector_label = gen_label_rtx ();
+	}
+      else
+	mp->prev->next = mp;
+
+      minipool_vector_tail = mp;
+    }
+  else
+    {
+      if (max_address > max_mp->max_address - mp->fix_size)
+	mp->max_address = max_mp->max_address - mp->fix_size;
+      else
+	mp->max_address = max_address;
+
+      mp->next = max_mp;
+      mp->prev = max_mp->prev;
+      max_mp->prev = mp;
+      if (mp->prev != NULL)
+	mp->prev->next = mp;
+      else
+	minipool_vector_head = mp;
+    }
+
+  /* Save the new entry.  */
+  max_mp = mp;
+
+  /* Scan over the preceding entries and adjust their addresses as
+     required.  */
+  while (mp->prev != NULL
+	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
+    {
+      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
+      mp = mp->prev;
+    }
+
+  return max_mp;
+}
+
+static Mnode *
+move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
+				HOST_WIDE_INT  min_address)
+{
+  HOST_WIDE_INT offset;
+
+  /* The code below assumes these are different.  */
+  gcc_assert (mp != min_mp);
+
+  if (min_mp == NULL)
+    {
+      if (min_address > mp->min_address)
+	mp->min_address = min_address;
+    }
+  else
+    {
+      /* We will adjust this below if it is too loose.  */
+      mp->min_address = min_address;
+
+      /* Unlink MP from its current position.  Since min_mp is non-null,
+	 mp->next must be non-null.  */
+      mp->next->prev = mp->prev;
+      if (mp->prev != NULL)
+	mp->prev->next = mp->next;
+      else
+	minipool_vector_head = mp->next;
+
+      /* Reinsert it after MIN_MP.  */
+      mp->prev = min_mp;
+      mp->next = min_mp->next;
+      min_mp->next = mp;
+      if (mp->next != NULL)
+	mp->next->prev = mp;
+      else
+	minipool_vector_tail = mp;
+    }
+
+  min_mp = mp;
+
+  offset = 0;
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      mp->offset = offset;
+      if (mp->refcount > 0)
+	offset += mp->fix_size;
+
+      if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
+	mp->next->min_address = mp->min_address + mp->fix_size;
+    }
+
+  return min_mp;
+}
+
+/* Add a constant to the minipool for a backward reference.  Returns the
+   node added or NULL if the constant will not fit in this pool.
+
+   Note that the code for insertion for a backwards reference can be
+   somewhat confusing because the calculated offsets for each fix do
+   not take into account the size of the pool (which is still under
+   construction.  */
+static Mnode *
+add_minipool_backward_ref (Mfix *fix)
+{
+  /* If set, min_mp is the last pool_entry that has a lower constraint
+     than the one we are trying to add.  */
+  Mnode *min_mp = NULL;
+  /* This can be negative, since it is only a constraint.  */
+  HOST_WIDE_INT  min_address = fix->address - fix->backwards;
+  Mnode *mp;
+
+  /* If we can't reach the current pool from this insn, or if we can't
+     insert this entry at the end of the pool without pushing other
+     fixes out of range, then we don't try.  This ensures that we
+     can't fail later on.  */
+  if (min_address >= minipool_barrier->address
+      || (minipool_vector_tail->min_address + fix->fix_size
+	  >= minipool_barrier->address))
+    return NULL;
+
+  /* Scan the pool to see if a constant with the same value has
+     already been added.  While we are doing this, also note the
+     location where we must insert the constant if it doesn't already
+     exist.  */
+  for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
+    {
+      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+	  && fix->mode == mp->mode
+	  && (GET_CODE (fix->value) != CODE_LABEL
+	      || (CODE_LABEL_NUMBER (fix->value)
+		  == CODE_LABEL_NUMBER (mp->value)))
+	  && rtx_equal_p (fix->value, mp->value)
+	  /* Check that there is enough slack to move this entry to the
+	     end of the table (this is conservative).  */
+	  && (mp->max_address
+	      > (minipool_barrier->address
+		 + minipool_vector_tail->offset
+		 + minipool_vector_tail->fix_size)))
+	{
+	  mp->refcount++;
+	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
+	}
+
+      if (min_mp != NULL)
+	mp->min_address += fix->fix_size;
+      else
+	{
+	  /* Note the insertion point if necessary.  */
+	  if (mp->min_address < min_address)
+	    {
+	      /* For now, we do not allow the insertion of 8-byte alignment
+		 requiring nodes anywhere but at the start of the pool.  */
+	      if (ARM_DOUBLEWORD_ALIGN
+		  && fix->fix_size >= 8 && mp->fix_size < 8)
+		return NULL;
+	      else
+		min_mp = mp;
+	    }
+	  else if (mp->max_address
+		   < minipool_barrier->address + mp->offset + fix->fix_size)
+	    {
+	      /* Inserting before this entry would push the fix beyond
+		 its maximum address (which can happen if we have
+		 re-located a forwards fix); force the new fix to come
+		 after it.  */
+	      if (ARM_DOUBLEWORD_ALIGN
+		  && fix->fix_size >= 8 && mp->fix_size < 8)
+		return NULL;
+	      else
+		{
+		  min_mp = mp;
+		  min_address = mp->min_address + fix->fix_size;
+		}
+	    }
+	  /* Do not insert a non-8-byte aligned quantity before 8-byte
+	     aligned quantities.  */
+	  else if (ARM_DOUBLEWORD_ALIGN
+		   && fix->fix_size < 8
+		   && mp->fix_size >= 8)
+	    {
+	      min_mp = mp;
+	      min_address = mp->min_address + fix->fix_size;
+	    }
+	}
+    }
+
+  /* We need to create a new entry.  */
+  mp = XNEW (Mnode);
+  mp->fix_size = fix->fix_size;
+  mp->mode = fix->mode;
+  mp->value = fix->value;
+  mp->refcount = 1;
+  mp->max_address = minipool_barrier->address + 65536;
+
+  mp->min_address = min_address;
+
+  if (min_mp == NULL)
+    {
+      mp->prev = NULL;
+      mp->next = minipool_vector_head;
+
+      if (mp->next == NULL)
+	{
+	  minipool_vector_tail = mp;
+	  minipool_vector_label = gen_label_rtx ();
+	}
+      else
+	mp->next->prev = mp;
+
+      minipool_vector_head = mp;
+    }
+  else
+    {
+      mp->next = min_mp->next;
+      mp->prev = min_mp;
+      min_mp->next = mp;
+
+      if (mp->next != NULL)
+	mp->next->prev = mp;
+      else
+	minipool_vector_tail = mp;
+    }
+
+  /* Save the new entry.  */
+  min_mp = mp;
+
+  if (mp->prev)
+    mp = mp->prev;
+  else
+    mp->offset = 0;
+
+  /* Scan over the following entries and adjust their offsets.  */
+  while (mp->next != NULL)
+    {
+      if (mp->next->min_address < mp->min_address + mp->fix_size)
+	mp->next->min_address = mp->min_address + mp->fix_size;
+
+      if (mp->refcount)
+	mp->next->offset = mp->offset + mp->fix_size;
+      else
+	mp->next->offset = mp->offset;
+
+      mp = mp->next;
+    }
+
+  return min_mp;
+}
+
+static void
+assign_minipool_offsets (Mfix *barrier)
+{
+  HOST_WIDE_INT offset = 0;
+  Mnode *mp;
+
+  minipool_barrier = barrier;
+
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      mp->offset = offset;
+
+      if (mp->refcount > 0)
+	offset += mp->fix_size;
+    }
+}
+
+/* Output the literal table */
+static void
+dump_minipool (rtx scan)
+{
+  Mnode * mp;
+  Mnode * nmp;
+  int align64 = 0;
+
+  if (ARM_DOUBLEWORD_ALIGN)
+    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+      if (mp->refcount > 0 && mp->fix_size >= 8)
+	{
+	  align64 = 1;
+	  break;
+	}
+
+  if (dump_file)
+    fprintf (dump_file,
+	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
+	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
+
+  scan = emit_label_after (gen_label_rtx (), scan);
+  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
+  scan = emit_label_after (minipool_vector_label, scan);
+
+  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
+    {
+      if (mp->refcount > 0)
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file,
+		       ";;  Offset %u, min %ld, max %ld ",
+		       (unsigned) mp->offset, (unsigned long) mp->min_address,
+		       (unsigned long) mp->max_address);
+	      arm_print_value (dump_file, mp->value);
+	      fputc ('\n', dump_file);
+	    }
+
+	  switch (mp->fix_size)
+	    {
+#ifdef HAVE_consttable_1
+	    case 1:
+	      scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_2
+	    case 2:
+	      scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_4
+	    case 4:
+	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_8
+	    case 8:
+	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
+	      break;
+
+#endif
+#ifdef HAVE_consttable_16
+	    case 16:
+              scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
+              break;
+
+#endif
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      nmp = mp->next;
+      free (mp);
+    }
+
+  minipool_vector_head = minipool_vector_tail = NULL;
+  scan = emit_insn_after (gen_consttable_end (), scan);
+  scan = emit_barrier_after (scan);
+}
+
+/* Return the cost of forcibly inserting a barrier after INSN.  */
+static int
+arm_barrier_cost (rtx insn)
+{
+  /* Basing the location of the pool on the loop depth is preferable,
+     but at the moment, the basic block information seems to be
+     corrupt by this stage of the compilation.  */
+  int base_cost = 50;
+  rtx next = next_nonnote_insn (insn);
+
+  if (next != NULL && GET_CODE (next) == CODE_LABEL)
+    base_cost -= 20;
+
+  switch (GET_CODE (insn))
+    {
+    case CODE_LABEL:
+      /* It will always be better to place the table before the label, rather
+	 than after it.  */
+      return 50;
+
+    case INSN:
+    case CALL_INSN:
+      return base_cost;
+
+    case JUMP_INSN:
+      return base_cost - 10;
+
+    default:
+      return base_cost + 10;
+    }
+}
+
+/* Find the best place in the insn stream in the range
+   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
+   Create the barrier by inserting a jump and add a new fix entry for
+   it.  */
+static Mfix *
+create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
+{
+  HOST_WIDE_INT count = 0;
+  rtx barrier;
+  rtx from = fix->insn;
+  /* The instruction after which we will insert the jump.  */
+  rtx selected = NULL;
+  int selected_cost;
+  /* The address at which the jump instruction will be placed.  */
+  HOST_WIDE_INT selected_address;
+  Mfix * new_fix;
+  HOST_WIDE_INT max_count = max_address - fix->address;
+  rtx label = gen_label_rtx ();
+
+  selected_cost = arm_barrier_cost (from);
+  selected_address = fix->address;
+
+  while (from && count < max_count)
+    {
+      rtx tmp;
+      int new_cost;
+
+      /* This code shouldn't have been called if there was a natural barrier
+	 within range.  */
+      gcc_assert (GET_CODE (from) != BARRIER);
+
+      /* Count the length of this insn.  */
+      count += get_attr_length (from);
+
+      /* If there is a jump table, add its length.  */
+      tmp = is_jump_table (from);
+      if (tmp != NULL)
+	{
+	  count += get_jump_table_size (tmp);
+
+	  /* Jump tables aren't in a basic block, so base the cost on
+	     the dispatch insn.  If we select this location, we will
+	     still put the pool after the table.  */
+	  new_cost = arm_barrier_cost (from);
+
+	  if (count < max_count 
+	      && (!selected || new_cost <= selected_cost))
+	    {
+	      selected = tmp;
+	      selected_cost = new_cost;
+	      selected_address = fix->address + count;
+	    }
+
+	  /* Continue after the dispatch table.  */
+	  from = NEXT_INSN (tmp);
+	  continue;
+	}
+
+      new_cost = arm_barrier_cost (from);
+
+      if (count < max_count
+	  && (!selected || new_cost <= selected_cost))
+	{
+	  selected = from;
+	  selected_cost = new_cost;
+	  selected_address = fix->address + count;
+	}
+
+      from = NEXT_INSN (from);
+    }
+
+  /* Make sure that we found a place to insert the jump.  */
+  gcc_assert (selected);
+
+  /* Create a new JUMP_INSN that branches around a barrier.  */
+  from = emit_jump_insn_after (gen_jump (label), selected);
+  JUMP_LABEL (from) = label;
+  barrier = emit_barrier_after (from);
+  emit_label_after (label, barrier);
+
+  /* Create a minipool barrier entry for the new barrier.  */
+  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
+  new_fix->insn = barrier;
+  new_fix->address = selected_address;
+  new_fix->next = fix->next;
+  fix->next = new_fix;
+
+  return new_fix;
+}
+
+/* Record that there is a natural barrier in the insn stream at
+   ADDRESS.  */
+static void
+push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
+{
+  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
+
+  fix->insn = insn;
+  fix->address = address;
+
+  fix->next = NULL;
+  if (minipool_fix_head != NULL)
+    minipool_fix_tail->next = fix;
+  else
+    minipool_fix_head = fix;
+
+  minipool_fix_tail = fix;
+}
+
+/* Record INSN, which will need fixing up to load a value from the
+   minipool.  ADDRESS is the offset of the insn since the start of the
+   function; LOC is a pointer to the part of the insn which requires
+   fixing; VALUE is the constant that must be loaded, which is of type
+   MODE.  */
+static void
+push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
+		   enum machine_mode mode, rtx value)
+{
+  Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
+
+  fix->insn = insn;
+  fix->address = address;
+  fix->loc = loc;
+  fix->mode = mode;
+  fix->fix_size = MINIPOOL_FIX_SIZE (mode);
+  fix->value = value;
+  fix->forwards = get_attr_pool_range (insn);
+  fix->backwards = get_attr_neg_pool_range (insn);
+  fix->minipool = NULL;
+
+  /* If an insn doesn't have a range defined for it, then it isn't
+     expecting to be reworked by this code.  Better to stop now than
+     to generate duff assembly code.  */
+  gcc_assert (fix->forwards || fix->backwards);
+
+  /* If an entry requires 8-byte alignment then assume all constant pools
+     require 4 bytes of padding.  Trying to do this later on a per-pool
+     basis is awkward because existing pool entries have to be modified.  */
+  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
+    minipool_pad = 4;
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
+	       GET_MODE_NAME (mode),
+	       INSN_UID (insn), (unsigned long) address,
+	       -1 * (long)fix->backwards, (long)fix->forwards);
+      arm_print_value (dump_file, fix->value);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Add it to the chain of fixes.  */
+  fix->next = NULL;
+
+  if (minipool_fix_head != NULL)
+    minipool_fix_tail->next = fix;
+  else
+    minipool_fix_head = fix;
+
+  minipool_fix_tail = fix;
+}
+
+/* Return the cost of synthesizing a 64-bit constant VAL inline.
+   Returns the number of insns needed, or 99 if we don't know how to
+   do it.  */
+int
+arm_const_double_inline_cost (rtx val)
+{
+  rtx lowpart, highpart;
+  enum machine_mode mode;
+
+  mode = GET_MODE (val);
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  gcc_assert (GET_MODE_SIZE (mode) == 8);
+
+  lowpart = gen_lowpart (SImode, val);
+  highpart = gen_highpart_mode (SImode, mode, val);
+
+  gcc_assert (GET_CODE (lowpart) == CONST_INT);
+  gcc_assert (GET_CODE (highpart) == CONST_INT);
+
+  return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
+			    NULL_RTX, NULL_RTX, 0, 0)
+	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
+			      NULL_RTX, NULL_RTX, 0, 0));
+}
+
+/* Return true if it is worthwhile to split a 64-bit constant into two
+   32-bit operations.  This is the case if optimizing for size, or
+   if we have load delay slots, or if one 32-bit part can be done with
+   a single data operation.  */
+bool
+arm_const_double_by_parts (rtx val)
+{
+  enum machine_mode mode = GET_MODE (val);
+  rtx part;
+
+  if (optimize_size || arm_ld_sched)
+    return true;
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  part = gen_highpart_mode (SImode, mode, val);
+
+  gcc_assert (GET_CODE (part) == CONST_INT);
+
+  if (const_ok_for_arm (INTVAL (part))
+      || const_ok_for_arm (~INTVAL (part)))
+    return true;
+
+  part = gen_lowpart (SImode, val);
+
+  gcc_assert (GET_CODE (part) == CONST_INT);
+
+  if (const_ok_for_arm (INTVAL (part))
+      || const_ok_for_arm (~INTVAL (part)))
+    return true;
+
+  return false;
+}
+
+/* Return true if it is possible to inline both the high and low parts
+   of a 64-bit constant into 32-bit data processing instructions.  */
+bool
+arm_const_double_by_immediates (rtx val)
+{
+  enum machine_mode mode = GET_MODE (val);
+  rtx part;
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  part = gen_highpart_mode (SImode, mode, val);
+
+  gcc_assert (GET_CODE (part) == CONST_INT);
+
+  if (!const_ok_for_arm (INTVAL (part)))
+    return false;
+
+  part = gen_lowpart (SImode, val);
+
+  gcc_assert (GET_CODE (part) == CONST_INT);
+
+  if (!const_ok_for_arm (INTVAL (part)))
+    return false;
+
+  return true;
+}
+
+/* Scan INSN and note any of its operands that need fixing.
+   If DO_PUSHES is false we do not actually push any of the fixups
+   needed.  The function returns TRUE if any fixups were needed/pushed.
+   This is used by arm_memory_load_p() which needs to know about loads
+   of constants that will be converted into minipool loads.  */
+static bool
+note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
+{
+  bool result = false;
+  int opno;
+
+  extract_insn (insn);
+
+  if (!constrain_operands (1))
+    fatal_insn_not_found (insn);
+
+  if (recog_data.n_alternatives == 0)
+    return false;
+
+  /* Fill in recog_op_alt with information about the constraints of
+     this insn.  */
+  preprocess_constraints ();
+
+  for (opno = 0; opno < recog_data.n_operands; opno++)
+    {
+      /* Things we need to fix can only occur in inputs.  */
+      if (recog_data.operand_type[opno] != OP_IN)
+	continue;
+
+      /* If this alternative is a memory reference, then any mention
+	 of constants in this alternative is really to fool reload
+	 into allowing us to accept one there.  We need to fix them up
+	 now so that we output the right code.  */
+      if (recog_op_alt[opno][which_alternative].memory_ok)
+	{
+	  rtx op = recog_data.operand[opno];
+
+	  if (CONSTANT_P (op))
+	    {
+	      if (do_pushes)
+		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
+				   recog_data.operand_mode[opno], op);
+	      result = true;
+	    }
+	  else if (GET_CODE (op) == MEM
+		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+	    {
+	      if (do_pushes)
+		{
+		  rtx cop = avoid_constant_pool_reference (op);
+
+		  /* Casting the address of something to a mode narrower
+		     than a word can cause avoid_constant_pool_reference()
+		     to return the pool reference itself.  That's no good to
+		     us here.  Lets just hope that we can use the
+		     constant pool value directly.  */
+		  if (op == cop)
+		    cop = get_pool_constant (XEXP (op, 0));
+
+		  push_minipool_fix (insn, address,
+				     recog_data.operand_loc[opno],
+				     recog_data.operand_mode[opno], cop);
+		}
+
+	      result = true;
+	    }
+	}
+    }
+
+  return result;
+}
+
+/* Convert instructions to their cc-clobbering variant if possible, since
+   that allows us to use smaller encodings.  */
+
+static void
+thumb2_reorg (void)
+{
+  basic_block bb;
+  regset_head live;
+
+  INIT_REG_SET (&live);
+
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+  df_analyze ();
+
+  FOR_EACH_BB (bb)
+    {
+      rtx insn;
+
+      COPY_REG_SET (&live, DF_LR_OUT (bb));
+      df_simulate_initialize_backwards (bb, &live);
+      FOR_BB_INSNS_REVERSE (bb, insn)
+	{
+	  if (NONJUMP_INSN_P (insn)
+	      && !REGNO_REG_SET_P (&live, CC_REGNUM))
+	    {
+	      rtx pat = PATTERN (insn);
+	      if (GET_CODE (pat) == SET
+		  && low_register_operand (XEXP (pat, 0), SImode)
+		  && thumb_16bit_operator (XEXP (pat, 1), SImode)
+		  && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
+		  && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
+		{
+		  rtx dst = XEXP (pat, 0);
+		  rtx src = XEXP (pat, 1);
+		  rtx op0 = XEXP (src, 0);
+		  rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
+			     ? XEXP (src, 1) : NULL);
+
+		  if (rtx_equal_p (dst, op0)
+		      || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
+		    {
+		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+		      rtvec vec = gen_rtvec (2, pat, clobber);
+
+		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+		      INSN_CODE (insn) = -1;
+		    }
+		  /* We can also handle a commutative operation where the
+		     second operand matches the destination.  */
+		  else if (op1 && rtx_equal_p (dst, op1))
+		    {
+		      rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
+		      rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
+		      rtvec vec;
+
+		      src = copy_rtx (src);
+		      XEXP (src, 0) = op1;
+		      XEXP (src, 1) = op0;
+		      pat = gen_rtx_SET (VOIDmode, dst, src);
+		      vec = gen_rtvec (2, pat, clobber);
+		      PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
+		      INSN_CODE (insn) = -1;
+		    }
+		}
+	    }
+
+	  if (NONDEBUG_INSN_P (insn))
+	    df_simulate_one_insn_backwards (bb, insn, &live);
+	}
+    }
+
+  CLEAR_REG_SET (&live);
+}
+
+/* Gcc puts the pool in the wrong place for ARM, since we can only
+   load addresses a limited distance around the pc.  We do some
+   special munging to move the constant pool values to the correct
+   point in the code.  */
+static void
+arm_reorg (void)
+{
+  rtx insn;
+  HOST_WIDE_INT address = 0;
+  Mfix * fix;
+
+  if (TARGET_THUMB2)
+    thumb2_reorg ();
+  
+  minipool_fix_head = minipool_fix_tail = NULL;
+
+  /* The first insn must always be a note, or the code below won't
+     scan it properly.  */
+  insn = get_insns ();
+  gcc_assert (GET_CODE (insn) == NOTE);
+  minipool_pad = 0;
+
+  /* Scan all the insns and record the operands that will need fixing.  */
+  for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
+    {
+      if (TARGET_CIRRUS_FIX_INVALID_INSNS
+          && (arm_cirrus_insn_p (insn)
+	      || GET_CODE (insn) == JUMP_INSN
+	      || arm_memory_load_p (insn)))
+	cirrus_reorg (insn);
+
+      if (GET_CODE (insn) == BARRIER)
+	push_minipool_barrier (insn, address);
+      else if (INSN_P (insn))
+	{
+	  rtx table;
+
+	  note_invalid_constants (insn, address, true);
+	  address += get_attr_length (insn);
+
+	  /* If the insn is a vector jump, add the size of the table
+	     and skip the table.  */
+	  if ((table = is_jump_table (insn)) != NULL)
+	    {
+	      address += get_jump_table_size (table);
+	      insn = table;
+	    }
+	}
+    }
+
+  fix = minipool_fix_head;
+
+  /* Now scan the fixups and perform the required changes.  */
+  while (fix)
+    {
+      Mfix * ftmp;
+      Mfix * fdel;
+      Mfix *  last_added_fix;
+      Mfix * last_barrier = NULL;
+      Mfix * this_fix;
+
+      /* Skip any further barriers before the next fix.  */
+      while (fix && GET_CODE (fix->insn) == BARRIER)
+	fix = fix->next;
+
+      /* No more fixes.  */
+      if (fix == NULL)
+	break;
+
+      last_added_fix = NULL;
+
+      for (ftmp = fix; ftmp; ftmp = ftmp->next)
+	{
+	  if (GET_CODE (ftmp->insn) == BARRIER)
+	    {
+	      if (ftmp->address >= minipool_vector_head->max_address)
+		break;
+
+	      last_barrier = ftmp;
+	    }
+	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
+	    break;
+
+	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
+	}
+
+      /* If we found a barrier, drop back to that; any fixes that we
+	 could have reached but come after the barrier will now go in
+	 the next mini-pool.  */
+      if (last_barrier != NULL)
+	{
+	  /* Reduce the refcount for those fixes that won't go into this
+	     pool after all.  */
+	  for (fdel = last_barrier->next;
+	       fdel && fdel != ftmp;
+	       fdel = fdel->next)
+	    {
+	      fdel->minipool->refcount--;
+	      fdel->minipool = NULL;
+	    }
+
+	  ftmp = last_barrier;
+	}
+      else
+        {
+	  /* ftmp is first fix that we can't fit into this pool and
+	     there no natural barriers that we could use.  Insert a
+	     new barrier in the code somewhere between the previous
+	     fix and this one, and arrange to jump around it.  */
+	  HOST_WIDE_INT max_address;
+
+	  /* The last item on the list of fixes must be a barrier, so
+	     we can never run off the end of the list of fixes without
+	     last_barrier being set.  */
+	  gcc_assert (ftmp);
+
+	  max_address = minipool_vector_head->max_address;
+	  /* Check that there isn't another fix that is in range that
+	     we couldn't fit into this pool because the pool was
+	     already too large: we need to put the pool before such an
+	     instruction.  The pool itself may come just after the
+	     fix because create_fix_barrier also allows space for a
+	     jump instruction.  */
+	  if (ftmp->address < max_address)
+	    max_address = ftmp->address + 1;
+
+	  last_barrier = create_fix_barrier (last_added_fix, max_address);
+	}
+
+      assign_minipool_offsets (last_barrier);
+
+      while (ftmp)
+	{
+	  if (GET_CODE (ftmp->insn) != BARRIER
+	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
+		  == NULL))
+	    break;
+
+	  ftmp = ftmp->next;
+	}
+
+      /* Scan over the fixes we have identified for this pool, fixing them
+	 up and adding the constants to the pool itself.  */
+      for (this_fix = fix; this_fix && ftmp != this_fix;
+	   this_fix = this_fix->next)
+	if (GET_CODE (this_fix->insn) != BARRIER)
+	  {
+	    rtx addr
+	      = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
+						  minipool_vector_label),
+			       this_fix->minipool->offset);
+	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
+	  }
+
+      dump_minipool (last_barrier->insn);
+      fix = ftmp;
+    }
+
+  /* From now on we must synthesize any constants that we can't handle
+     directly.  This can happen if the RTL gets split during final
+     instruction generation.  */
+  after_arm_reorg = 1;
+
+  /* Free the minipool memory.  */
+  obstack_free (&minipool_obstack, minipool_startobj);
+}
+
+/* Routines to output assembly language.  */
+
+/* If the rtx is the correct value then return the string of the number.
+   In this way we can ensure that valid double constants are generated even
+   when cross compiling.  */
+const char *
+fp_immediate_constant (rtx x)
+{
+  REAL_VALUE_TYPE r;
+  int i;
+
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+  for (i = 0; i < 8; i++)
+    if (REAL_VALUES_EQUAL (r, values_fp[i]))
+      return strings_fp[i];
+
+  gcc_unreachable ();
+}
+
+/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
+static const char *
+fp_const_from_val (REAL_VALUE_TYPE *r)
+{
+  int i;
+
+  if (!fp_consts_inited)
+    init_fp_table ();
+
+  for (i = 0; i < 8; i++)
+    if (REAL_VALUES_EQUAL (*r, values_fp[i]))
+      return strings_fp[i];
+
+  gcc_unreachable ();
+}
+
+/* Output the operands of a LDM/STM instruction to STREAM.
+   MASK is the ARM register set mask of which only bits 0-15 are important.
+   REG is the base register, either the frame pointer or the stack pointer,
+   INSTR is the possibly suffixed load or store instruction.
+   RFE is nonzero if the instruction should also copy spsr to cpsr.  */
+
+static void
+print_multi_reg (FILE *stream, const char *instr, unsigned reg,
+		 unsigned long mask, int rfe)
+{
+  unsigned i;
+  bool not_first = FALSE;
+
+  gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
+  fputc ('\t', stream);
+  asm_fprintf (stream, instr, reg);
+  fputc ('{', stream);
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (mask & (1 << i))
+      {
+	if (not_first)
+	  fprintf (stream, ", ");
+
+	asm_fprintf (stream, "%r", i);
+	not_first = TRUE;
+      }
+
+  if (rfe)
+    fprintf (stream, "}^\n");
+  else
+    fprintf (stream, "}\n");
+}
+
+
+/* Output a FLDMD instruction to STREAM.
+   BASE if the register containing the address.
+   REG and COUNT specify the register range.
+   Extra registers may be added to avoid hardware bugs.
+
+   We output FLDMD even for ARMv5 VFP implementations.  Although
+   FLDMD is technically not supported until ARMv6, it is believed
+   that all VFP implementations support its use in this context.  */
+
+static void
+vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
+{
+  int i;
+
+  /* Workaround ARM10 VFPr1 bug.  */
+  if (count == 2 && !arm_arch6)
+    {
+      if (reg == 15)
+	reg--;
+      count++;
+    }
+
+  /* FLDMD may not load more than 16 doubleword registers at a time. Split the
+     load into multiple parts if we have to handle more than 16 registers.  */
+  if (count > 16)
+    {
+      vfp_output_fldmd (stream, base, reg, 16);
+      vfp_output_fldmd (stream, base, reg + 16, count - 16);
+      return;
+    }
+
+  fputc ('\t', stream);
+  asm_fprintf (stream, "fldmfdd\t%r!, {", base);
+
+  for (i = reg; i < reg + count; i++)
+    {
+      if (i > reg)
+	fputs (", ", stream);
+      asm_fprintf (stream, "d%d", i);
+    }
+  fputs ("}\n", stream);
+
+}
+
+
+/* Output the assembly for a store multiple.  */
+
+const char *
+vfp_output_fstmd (rtx * operands)
+{
+  char pattern[100];
+  int p;
+  int base;
+  int i;
+
+  strcpy (pattern, "fstmfdd\t%m0!, {%P1");
+  p = strlen (pattern);
+
+  gcc_assert (GET_CODE (operands[1]) == REG);
+
+  base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
+  for (i = 1; i < XVECLEN (operands[2], 0); i++)
+    {
+      p += sprintf (&pattern[p], ", d%d", base + i);
+    }
+  strcpy (&pattern[p], "}");
+
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+
+/* Emit RTL to save block of VFP register pairs to the stack.  Returns the
+   number of bytes pushed.  */
+
+static int
+vfp_emit_fstmd (int base_reg, int count)
+{
+  rtx par;
+  rtx dwarf;
+  rtx tmp, reg;
+  int i;
+
+  /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
+     register pairs are stored by a store multiple insn.  We avoid this
+     by pushing an extra pair.  */
+  if (count == 2 && !arm_arch6)
+    {
+      if (base_reg == LAST_VFP_REGNUM - 3)
+	base_reg -= 2;
+      count++;
+    }
+
+  /* FSTMD may not store more than 16 doubleword registers at once.  Split
+     larger stores into multiple parts (up to a maximum of two, in
+     practice).  */
+  if (count > 16)
+    {
+      int saved;
+      /* NOTE: base_reg is an internal register number, so each D register
+         counts as 2.  */
+      saved = vfp_emit_fstmd (base_reg + 32, count - 16);
+      saved += vfp_emit_fstmd (base_reg, 16);
+      return saved;
+    }
+
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+
+  reg = gen_rtx_REG (DFmode, base_reg);
+  base_reg += 2;
+
+  XVECEXP (par, 0, 0)
+    = gen_rtx_SET (VOIDmode,
+		   gen_frame_mem
+		   (BLKmode,
+		    gen_rtx_PRE_MODIFY (Pmode,
+					stack_pointer_rtx,
+					plus_constant
+					(stack_pointer_rtx,
+					 - (count * 8)))
+		    ),
+		   gen_rtx_UNSPEC (BLKmode,
+				   gen_rtvec (1, reg),
+				   UNSPEC_PUSH_MULT));
+
+  tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (stack_pointer_rtx, -(count * 8)));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  tmp = gen_rtx_SET (VOIDmode,
+		     gen_frame_mem (DFmode, stack_pointer_rtx),
+		     reg);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 1) = tmp;
+
+  for (i = 1; i < count; i++)
+    {
+      reg = gen_rtx_REG (DFmode, base_reg);
+      base_reg += 2;
+      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
+
+      tmp = gen_rtx_SET (VOIDmode,
+			 gen_frame_mem (DFmode,
+					plus_constant (stack_pointer_rtx,
+						       i * 8)),
+			 reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (dwarf, 0, i + 1) = tmp;
+    }
+
+  par = emit_insn (par);
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+  RTX_FRAME_RELATED_P (par) = 1;
+
+  return count * 8;
+}
+
+/* Emit a call instruction with pattern PAT.  ADDR is the address of
+   the call target.  */
+
+void
+arm_emit_call_insn (rtx pat, rtx addr)
+{
+  rtx insn;
+
+  insn = emit_call_insn (pat);
+
+  /* The PIC register is live on entry to VxWorks PIC PLT entries.
+     If the call might use such an entry, add a use of the PIC register
+     to the instruction's CALL_INSN_FUNCTION_USAGE.  */
+  if (TARGET_VXWORKS_RTP
+      && flag_pic
+      && GET_CODE (addr) == SYMBOL_REF
+      && (SYMBOL_REF_DECL (addr)
+	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
+	  : !SYMBOL_REF_LOCAL_P (addr)))
+    {
+      require_pic_register ();
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
+    }
+}
+
+/* Output a 'call' insn.  */
+const char *
+output_call (rtx *operands)
+{
+  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
+
+  /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
+  if (REGNO (operands[0]) == LR_REGNUM)
+    {
+      operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
+      output_asm_insn ("mov%?\t%0, %|lr", operands);
+    }
+
+  output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+
+  if (TARGET_INTERWORK || arm_arch4t)
+    output_asm_insn ("bx%?\t%0", operands);
+  else
+    output_asm_insn ("mov%?\t%|pc, %0", operands);
+
+  return "";
+}
+
+/* Output a 'call' insn that is a reference in memory. This is
+   disabled for ARMv5 and we prefer a blx instead because otherwise
+   there's a significant performance overhead.  */
+const char *
+output_call_mem (rtx *operands)
+{
+  gcc_assert (!arm_arch5);
+  if (TARGET_INTERWORK)
+    {
+      output_asm_insn ("ldr%?\t%|ip, %0", operands);
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      output_asm_insn ("bx%?\t%|ip", operands);
+    }
+  else if (regno_use_in (LR_REGNUM, operands[0]))
+    {
+      /* LR is used in the memory address.  We load the address in the
+	 first instruction.  It's safe to use IP as the target of the
+	 load since the call will kill it anyway.  */
+      output_asm_insn ("ldr%?\t%|ip, %0", operands);
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      if (arm_arch4t)
+	output_asm_insn ("bx%?\t%|ip", operands);
+      else
+	output_asm_insn ("mov%?\t%|pc, %|ip", operands);
+    }
+  else
+    {
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      output_asm_insn ("ldr%?\t%|pc, %0", operands);
+    }
+
+  return "";
+}
+
+
+/* Output a move from arm registers to an fpa registers.
+   OPERANDS[0] is an fpa register.
+   OPERANDS[1] is the first registers of an arm register pair.  */
+const char *
+output_mov_long_double_fpa_from_arm (rtx *operands)
+{
+  int arm_reg0 = REGNO (operands[1]);
+  rtx ops[3];
+
+  gcc_assert (arm_reg0 != IP_REGNUM);
+
+  ops[0] = gen_rtx_REG (SImode, arm_reg0);
+  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
+  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
+
+  output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
+  output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
+
+  return "";
+}
+
+/* Output a move from an fpa register to arm registers.
+   OPERANDS[0] is the first registers of an arm register pair.
+   OPERANDS[1] is an fpa register.  */
+const char *
+output_mov_long_double_arm_from_fpa (rtx *operands)
+{
+  int arm_reg0 = REGNO (operands[0]);
+  rtx ops[3];
+
+  gcc_assert (arm_reg0 != IP_REGNUM);
+
+  ops[0] = gen_rtx_REG (SImode, arm_reg0);
+  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
+  ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
+
+  output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
+  output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
+  return "";
+}
+
+/* Output a move from arm registers to arm registers of a long double
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.  */
+const char *
+output_mov_long_double_arm_from_arm (rtx *operands)
+{
+  /* We have to be careful here because the two might overlap.  */
+  int dest_start = REGNO (operands[0]);
+  int src_start = REGNO (operands[1]);
+  rtx ops[2];
+  int i;
+
+  if (dest_start < src_start)
+    {
+      for (i = 0; i < 3; i++)
+	{
+	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
+	  ops[1] = gen_rtx_REG (SImode, src_start + i);
+	  output_asm_insn ("mov%?\t%0, %1", ops);
+	}
+    }
+  else
+    {
+      for (i = 2; i >= 0; i--)
+	{
+	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
+	  ops[1] = gen_rtx_REG (SImode, src_start + i);
+	  output_asm_insn ("mov%?\t%0, %1", ops);
+	}
+    }
+
+  return "";
+}
+
+void
+arm_emit_movpair (rtx dest, rtx src)
+ {
+  /* If the src is an immediate, simplify it.  */
+  if (CONST_INT_P (src))
+    {
+      HOST_WIDE_INT val = INTVAL (src);
+      emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
+      if ((val >> 16) & 0x0000ffff)
+        emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
+                                             GEN_INT (16)),
+                       GEN_INT ((val >> 16) & 0x0000ffff));
+      return;
+    }
+   emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+   emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+ }
+
+/* Output a move from arm registers to an fpa registers.
+   OPERANDS[0] is an fpa register.
+   OPERANDS[1] is the first registers of an arm register pair.  */
+const char *
+output_mov_double_fpa_from_arm (rtx *operands)
+{
+  int arm_reg0 = REGNO (operands[1]);
+  rtx ops[2];
+
+  gcc_assert (arm_reg0 != IP_REGNUM);
+
+  ops[0] = gen_rtx_REG (SImode, arm_reg0);
+  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
+  output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
+  output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
+  return "";
+}
+
+/* Output a move from an fpa register to arm registers.
+   OPERANDS[0] is the first registers of an arm register pair.
+   OPERANDS[1] is an fpa register.  */
+const char *
+output_mov_double_arm_from_fpa (rtx *operands)
+{
+  int arm_reg0 = REGNO (operands[0]);
+  rtx ops[2];
+
+  gcc_assert (arm_reg0 != IP_REGNUM);
+
+  ops[0] = gen_rtx_REG (SImode, arm_reg0);
+  ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
+  output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
+  output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
+  return "";
+}
+
+/* Output a move between double words.  It must be REG<-MEM
+   or MEM<-REG.  */
+const char *
+output_move_double (rtx *operands)
+{
+  enum rtx_code code0 = GET_CODE (operands[0]);
+  enum rtx_code code1 = GET_CODE (operands[1]);
+  rtx otherops[3];
+
+  if (code0 == REG)
+    {
+      unsigned int reg0 = REGNO (operands[0]);
+
+      otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
+
+      gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
+
+      switch (GET_CODE (XEXP (operands[1], 0)))
+	{
+	case REG:
+	  if (TARGET_LDRD
+	      && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
+	    output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
+	  else
+	    output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+	  break;
+
+	case PRE_INC:
+	  gcc_assert (TARGET_LDRD);
+	  output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
+	  break;
+
+	case PRE_DEC:
+	  if (TARGET_LDRD)
+	    output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
+	  else
+	    output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
+	  break;
+
+	case POST_INC:
+	  if (TARGET_LDRD)
+	    output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
+	  else
+	    output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
+	  break;
+
+	case POST_DEC:
+	  gcc_assert (TARGET_LDRD);
+	  output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
+	  break;
+
+	case PRE_MODIFY:
+	case POST_MODIFY:
+	  /* Autoicrement addressing modes should never have overlapping
+	     base and destination registers, and overlapping index registers
+	     are already prohibited, so this doesn't need to worry about
+	     fix_cm3_ldrd.  */
+	  otherops[0] = operands[0];
+	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
+	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
+
+	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
+	    {
+	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
+		{
+		  /* Registers overlap so split out the increment.  */
+		  output_asm_insn ("add%?\t%1, %1, %2", otherops);
+		  output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
+		}
+	      else
+		{
+		  /* Use a single insn if we can.
+		     FIXME: IWMMXT allows offsets larger than ldrd can
+		     handle, fix these up with a pair of ldr.  */
+		  if (TARGET_THUMB2
+		      || GET_CODE (otherops[2]) != CONST_INT
+		      || (INTVAL (otherops[2]) > -256
+			  && INTVAL (otherops[2]) < 256))
+		    output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
+		  else
+		    {
+		      output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
+		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+		    }
+		}
+	    }
+	  else
+	    {
+	      /* Use a single insn if we can.
+		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
+		 fix these up with a pair of ldr.  */
+	      if (TARGET_THUMB2
+		  || GET_CODE (otherops[2]) != CONST_INT
+		  || (INTVAL (otherops[2]) > -256
+		      && INTVAL (otherops[2]) < 256))
+		output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
+	      else
+		{
+		  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
+		  output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
+		}
+	    }
+	  break;
+
+	case LABEL_REF:
+	case CONST:
+	  /* We might be able to use ldrd %0, %1 here.  However the range is
+	     different to ldr/adr, and it is broken on some ARMv7-M
+	     implementations.  */
+	  /* Use the second register of the pair to avoid problematic
+	     overlap.  */
+	  otherops[1] = operands[1];
+	  output_asm_insn ("adr%?\t%0, %1", otherops);
+	  operands[1] = otherops[0];
+	  if (TARGET_LDRD)
+	    output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+	  else
+	    output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
+	  break;
+
+	  /* ??? This needs checking for thumb2.  */
+	default:
+	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
+			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
+	    {
+	      otherops[0] = operands[0];
+	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
+	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
+
+	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
+		{
+		  if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
+		    {
+		      switch ((int) INTVAL (otherops[2]))
+			{
+			case -8:
+			  output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
+			  return "";
+			case -4:
+			  if (TARGET_THUMB2)
+			    break;
+			  output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
+			  return "";
+			case 4:
+			  if (TARGET_THUMB2)
+			    break;
+			  output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
+			  return "";
+			}
+		    }
+		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
+		  operands[1] = otherops[0];
+		  if (TARGET_LDRD
+		      && (GET_CODE (otherops[2]) == REG
+			  || TARGET_THUMB2
+			  || (GET_CODE (otherops[2]) == CONST_INT
+			      && INTVAL (otherops[2]) > -256
+			      && INTVAL (otherops[2]) < 256)))
+		    {
+		      if (reg_overlap_mentioned_p (operands[0],
+						   otherops[2]))
+			{
+			  rtx tmp;
+			  /* Swap base and index registers over to
+			     avoid a conflict.  */
+			  tmp = otherops[1];
+			  otherops[1] = otherops[2];
+			  otherops[2] = tmp;
+			}
+		      /* If both registers conflict, it will usually
+			 have been fixed by a splitter.  */
+		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
+			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
+			{
+			  output_asm_insn ("add%?\t%0, %1, %2", otherops);
+			  output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
+			}
+		      else
+			{
+			  otherops[0] = operands[0];
+			  output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
+			}
+		      return "";
+		    }
+
+		  if (GET_CODE (otherops[2]) == CONST_INT)
+		    {
+		      if (!(const_ok_for_arm (INTVAL (otherops[2]))))
+			output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
+		      else
+			output_asm_insn ("add%?\t%0, %1, %2", otherops);
+		    }
+		  else
+		    output_asm_insn ("add%?\t%0, %1, %2", otherops);
+		}
+	      else
+		output_asm_insn ("sub%?\t%0, %1, %2", otherops);
+
+	      if (TARGET_LDRD)
+		return "ldr%(d%)\t%0, [%1]";
+
+	      return "ldm%(ia%)\t%1, %M0";
+	    }
+	  else
+	    {
+	      otherops[1] = adjust_address (operands[1], SImode, 4);
+	      /* Take care of overlapping base/data reg.  */
+	      if (reg_mentioned_p (operands[0], operands[1]))
+		{
+		  output_asm_insn ("ldr%?\t%0, %1", otherops);
+		  output_asm_insn ("ldr%?\t%0, %1", operands);
+		}
+	      else
+		{
+		  output_asm_insn ("ldr%?\t%0, %1", operands);
+		  output_asm_insn ("ldr%?\t%0, %1", otherops);
+		}
+	    }
+	}
+    }
+  else
+    {
+      /* Constraints should ensure this.  */
+      gcc_assert (code0 == MEM && code1 == REG);
+      gcc_assert (REGNO (operands[1]) != IP_REGNUM);
+
+      switch (GET_CODE (XEXP (operands[0], 0)))
+        {
+	case REG:
+	  if (TARGET_LDRD)
+	    output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
+	  else
+	    output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+	  break;
+
+        case PRE_INC:
+	  gcc_assert (TARGET_LDRD);
+	  output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
+	  break;
+
+        case PRE_DEC:
+	  if (TARGET_LDRD)
+	    output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
+	  else
+	    output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
+	  break;
+
+        case POST_INC:
+	  if (TARGET_LDRD)
+	    output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
+	  else
+	    output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
+	  break;
+
+        case POST_DEC:
+	  gcc_assert (TARGET_LDRD);
+	  output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
+	  break;
+
+	case PRE_MODIFY:
+	case POST_MODIFY:
+	  otherops[0] = operands[1];
+	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
+	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
+
+	  /* IWMMXT allows offsets larger than ldrd can handle,
+	     fix these up with a pair of ldr.  */
+	  if (!TARGET_THUMB2
+	      && GET_CODE (otherops[2]) == CONST_INT
+	      && (INTVAL(otherops[2]) <= -256
+		  || INTVAL(otherops[2]) >= 256))
+	    {
+	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
+		{
+		  output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
+		  output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+		}
+	      else
+		{
+		  output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
+		  output_asm_insn ("str%?\t%0, [%1], %2", otherops);
+		}
+	    }
+	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
+	    output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
+	  else
+	    output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
+	  break;
+
+	case PLUS:
+	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
+	  if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
+	    {
+	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
+		{
+		case -8:
+		  output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
+		  return "";
+
+		case -4:
+		  if (TARGET_THUMB2)
+		    break;
+		  output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
+		  return "";
+
+		case 4:
+		  if (TARGET_THUMB2)
+		    break;
+		  output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
+		  return "";
+		}
+	    }
+	  if (TARGET_LDRD
+	      && (GET_CODE (otherops[2]) == REG
+		  || TARGET_THUMB2
+		  || (GET_CODE (otherops[2]) == CONST_INT
+		      && INTVAL (otherops[2]) > -256
+		      && INTVAL (otherops[2]) < 256)))
+	    {
+	      otherops[0] = operands[1];
+	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
+	      output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
+	      return "";
+	    }
+	  /* Fall through */
+
+        default:
+	  otherops[0] = adjust_address (operands[0], SImode, 4);
+	  otherops[1] = operands[1];
+	  output_asm_insn ("str%?\t%1, %0", operands);
+	  output_asm_insn ("str%?\t%H1, %0", otherops);
+	}
+    }
+
+  return "";
+}
+
+/* Output a move, load or store for quad-word vectors in ARM registers.  Only
+   handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
+
+const char *
+output_move_quad (rtx *operands)
+{
+  if (REG_P (operands[0]))
+    {
+      /* Load, or reg->reg move.  */
+
+      if (MEM_P (operands[1]))
+        {
+          switch (GET_CODE (XEXP (operands[1], 0)))
+            {
+            case REG:
+              output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+              break;
+
+            case LABEL_REF:
+            case CONST:
+              output_asm_insn ("adr%?\t%0, %1", operands);
+              output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
+              break;
+
+            default:
+              gcc_unreachable ();
+            }
+        }
+      else
+        {
+          rtx ops[2];
+          int dest, src, i;
+
+          gcc_assert (REG_P (operands[1]));
+
+          dest = REGNO (operands[0]);
+          src = REGNO (operands[1]);
+
+          /* This seems pretty dumb, but hopefully GCC won't try to do it
+             very often.  */
+          if (dest < src)
+            for (i = 0; i < 4; i++)
+              {
+                ops[0] = gen_rtx_REG (SImode, dest + i);
+                ops[1] = gen_rtx_REG (SImode, src + i);
+                output_asm_insn ("mov%?\t%0, %1", ops);
+              }
+          else
+            for (i = 3; i >= 0; i--)
+              {
+                ops[0] = gen_rtx_REG (SImode, dest + i);
+                ops[1] = gen_rtx_REG (SImode, src + i);
+                output_asm_insn ("mov%?\t%0, %1", ops);
+              }
+        }
+    }
+  else
+    {
+      gcc_assert (MEM_P (operands[0]));
+      gcc_assert (REG_P (operands[1]));
+      gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
+
+      switch (GET_CODE (XEXP (operands[0], 0)))
+        {
+        case REG:
+          output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+          break;
+
+        default:
+          gcc_unreachable ();
+        }
+    }
+
+  return "";
+}
+
+/* Output a VFP load or store instruction.  */
+
+const char *
+output_move_vfp (rtx *operands)
+{
+  rtx reg, mem, addr, ops[2];
+  int load = REG_P (operands[0]);
+  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
+  int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
+  const char *templ;
+  char buff[50];
+  enum machine_mode mode;
+
+  reg = operands[!load];
+  mem = operands[load];
+
+  mode = GET_MODE (reg);
+
+  gcc_assert (REG_P (reg));
+  gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
+  gcc_assert (mode == SFmode
+	      || mode == DFmode
+	      || mode == SImode
+	      || mode == DImode
+              || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case PRE_DEC:
+      templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    case POST_INC:
+      templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    default:
+      templ = "f%s%c%%?\t%%%s0, %%1%s";
+      ops[0] = reg;
+      ops[1] = mem;
+      break;
+    }
+
+  sprintf (buff, templ,
+	   load ? "ld" : "st",
+	   dp ? 'd' : 's',
+	   dp ? "P" : "",
+	   integer_p ? "\t%@ int" : "");
+  output_asm_insn (buff, ops);
+
+  return "";
+}
+
+/* Output a Neon quad-word load or store, or a load or store for
+   larger structure modes.
+
+   WARNING: The ordering of elements is weird in big-endian mode,
+   because we use VSTM, as required by the EABI.  GCC RTL defines
+   element ordering based on in-memory order.  This can be differ
+   from the architectural ordering of elements within a NEON register.
+   The intrinsics defined in arm_neon.h use the NEON register element
+   ordering, not the GCC RTL element ordering.
+
+   For example, the in-memory ordering of a big-endian a quadword
+   vector with 16-bit elements when stored from register pair {d0,d1}
+   will be (lowest address first, d0[N] is NEON register element N):
+
+     [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
+
+   When necessary, quadword registers (dN, dN+1) are moved to ARM
+   registers from rN in the order:
+
+     dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
+
+   So that STM/LDM can be used on vectors in ARM registers, and the
+   same memory layout will result as if VSTM/VLDM were used.  */
+
+const char *
+output_move_neon (rtx *operands)
+{
+  rtx reg, mem, addr, ops[2];
+  int regno, load = REG_P (operands[0]);
+  const char *templ;
+  char buff[50];
+  enum machine_mode mode;
+
+  reg = operands[!load];
+  mem = operands[load];
+
+  mode = GET_MODE (reg);
+
+  gcc_assert (REG_P (reg));
+  regno = REGNO (reg);
+  gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
+	      || NEON_REGNO_OK_FOR_QUAD (regno));
+  gcc_assert (VALID_NEON_DREG_MODE (mode)
+	      || VALID_NEON_QREG_MODE (mode)
+	      || VALID_NEON_STRUCT_MODE (mode));
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  /* Strip off const from addresses like (const (plus (...))).  */
+  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case POST_INC:
+      templ = "v%smia%%?\t%%0!, %%h1";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    case PRE_DEC:
+      /* FIXME: We should be using vld1/vst1 here in BE mode?  */
+      templ = "v%smdb%%?\t%%0!, %%h1";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+    
+    case POST_MODIFY:
+      /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
+      gcc_unreachable ();
+
+    case LABEL_REF:
+    case PLUS:
+      {
+	int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
+	int i;
+	int overlap = -1;
+	for (i = 0; i < nregs; i++)
+	  {
+	    /* We're only using DImode here because it's a convenient size.  */
+	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
+	    ops[1] = adjust_address (mem, DImode, 8 * i);
+	    if (reg_overlap_mentioned_p (ops[0], mem))
+	      {
+		gcc_assert (overlap == -1);
+		overlap = i;
+	      }
+	    else
+	      {
+		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+		output_asm_insn (buff, ops);
+	      }
+	  }
+	if (overlap != -1)
+	  {
+	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
+	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
+	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+	    output_asm_insn (buff, ops);
+	  }
+
+        return "";
+      }
+
+    default:
+      templ = "v%smia%%?\t%%m0, %%h1";
+      ops[0] = mem;
+      ops[1] = reg;
+    }
+
+  sprintf (buff, templ, load ? "ld" : "st");
+  output_asm_insn (buff, ops);
+
+  return "";
+}
+
+/* Compute and return the length of neon_mov<mode>, where <mode> is
+   one of VSTRUCT modes: EI, OI, CI or XI.  */
+int
+arm_attr_length_move_neon (rtx insn)
+{
+  rtx reg, mem, addr;
+  int load;
+  enum machine_mode mode;
+
+  extract_insn_cached (insn);
+
+  if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
+    {
+      mode = GET_MODE (recog_data.operand[0]);
+      switch (mode)
+	{
+	case EImode:
+	case OImode:
+	  return 8;
+	case CImode:
+	  return 12;
+	case XImode:
+	  return 16;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  load = REG_P (recog_data.operand[0]);
+  reg = recog_data.operand[!load];
+  mem = recog_data.operand[load];
+
+  gcc_assert (MEM_P (mem));
+
+  mode = GET_MODE (reg);
+  addr = XEXP (mem, 0);
+
+  /* Strip off const from addresses like (const (plus (...))).  */
+  if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
+    {
+      int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
+      return insns * 4;
+    }
+  else
+    return 4;
+}
+
+/* Return nonzero if the offset in the address is an immediate.  Otherwise,
+   return zero.  */
+
+int
+arm_address_offset_is_imm (rtx insn)
+{
+  rtx mem, addr;
+
+  extract_insn_cached (insn);
+
+  if (REG_P (recog_data.operand[0]))
+    return 0;
+
+  mem = recog_data.operand[0];
+
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  if (GET_CODE (addr) == REG
+      || (GET_CODE (addr) == PLUS
+	  && GET_CODE (XEXP (addr, 0)) == REG
+	  && GET_CODE (XEXP (addr, 1)) == CONST_INT))
+    return 1;
+  else
+    return 0;
+}
+
+/* Output an ADD r, s, #n where n may be too big for one instruction.
+   If adding zero to one register, output nothing.  */
+const char *
+output_add_immediate (rtx *operands)
+{
+  HOST_WIDE_INT n = INTVAL (operands[2]);
+
+  if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
+    {
+      if (n < 0)
+	output_multi_immediate (operands,
+				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
+				-n);
+      else
+	output_multi_immediate (operands,
+				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
+				n);
+    }
+
+  return "";
+}
+
+/* Output a multiple immediate operation.
+   OPERANDS is the vector of operands referred to in the output patterns.
+   INSTR1 is the output pattern to use for the first constant.
+   INSTR2 is the output pattern to use for subsequent constants.
+   IMMED_OP is the index of the constant slot in OPERANDS.
+   N is the constant value.  */
+static const char *
+output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
+			int immed_op, HOST_WIDE_INT n)
+{
+#if HOST_BITS_PER_WIDE_INT > 32
+  n &= 0xffffffff;
+#endif
+
+  if (n == 0)
+    {
+      /* Quick and easy output.  */
+      operands[immed_op] = const0_rtx;
+      output_asm_insn (instr1, operands);
+    }
+  else
+    {
+      int i;
+      const char * instr = instr1;
+
+      /* Note that n is never zero here (which would give no output).  */
+      for (i = 0; i < 32; i += 2)
+	{
+	  if (n & (3 << i))
+	    {
+	      operands[immed_op] = GEN_INT (n & (255 << i));
+	      output_asm_insn (instr, operands);
+	      instr = instr2;
+	      i += 6;
+	    }
+	}
+    }
+
+  return "";
+}
+
+/* Return the name of a shifter operation.  */
+static const char *
+arm_shift_nmem(enum rtx_code code)
+{
+  switch (code)
+    {
+    case ASHIFT:
+      return ARM_LSL_NAME;
+
+    case ASHIFTRT:
+      return "asr";
+
+    case LSHIFTRT:
+      return "lsr";
+
+    case ROTATERT:
+      return "ror";
+
+    default:
+      abort();
+    }
+}
+
+/* Return the appropriate ARM instruction for the operation code.
+   The returned result should not be overwritten.  OP is the rtx of the
+   operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
+   was shifted.  */
+const char *
+arithmetic_instr (rtx op, int shift_first_arg)
+{
+  switch (GET_CODE (op))
+    {
+    case PLUS:
+      return "add";
+
+    case MINUS:
+      return shift_first_arg ? "rsb" : "sub";
+
+    case IOR:
+      return "orr";
+
+    case XOR:
+      return "eor";
+
+    case AND:
+      return "and";
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      return arm_shift_nmem(GET_CODE(op));
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Ensure valid constant shifts and return the appropriate shift mnemonic
+   for the operation code.  The returned result should not be overwritten.
+   OP is the rtx code of the shift.
+   On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
+   shift.  */
+static const char *
+shift_op (rtx op, HOST_WIDE_INT *amountp)
+{
+  const char * mnem;
+  enum rtx_code code = GET_CODE (op);
+
+  switch (GET_CODE (XEXP (op, 1)))
+    {
+    case REG:
+    case SUBREG:
+      *amountp = -1;
+      break;
+
+    case CONST_INT:
+      *amountp = INTVAL (XEXP (op, 1));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ROTATE:
+      gcc_assert (*amountp != -1);
+      *amountp = 32 - *amountp;
+      code = ROTATERT;
+
+      /* Fall through.  */
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      mnem = arm_shift_nmem(code);
+      break;
+
+    case MULT:
+      /* We never have to worry about the amount being other than a
+	 power of 2, since this case can never be reloaded from a reg.  */
+      gcc_assert (*amountp != -1);
+      *amountp = int_log2 (*amountp);
+      return ARM_LSL_NAME;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (*amountp != -1)
+    {
+      /* This is not 100% correct, but follows from the desire to merge
+	 multiplication by a power of 2 with the recognizer for a
+	 shift.  >=32 is not a valid shift for "lsl", so we must try and
+	 output a shift that produces the correct arithmetical result.
+	 Using lsr #32 is identical except for the fact that the carry bit
+	 is not set correctly if we set the flags; but we never use the
+	 carry bit from such an operation, so we can ignore that.  */
+      if (code == ROTATERT)
+	/* Rotate is just modulo 32.  */
+	*amountp &= 31;
+      else if (*amountp != (*amountp & 31))
+	{
+	  if (code == ASHIFT)
+	    mnem = "lsr";
+	  *amountp = 32;
+	}
+
+      /* Shifts of 0 are no-ops.  */
+      if (*amountp == 0)
+	return NULL;
+    }
+
+  return mnem;
+}
+
+/* Obtain the shift from the POWER of two.  */
+
+static HOST_WIDE_INT
+int_log2 (HOST_WIDE_INT power)
+{
+  HOST_WIDE_INT shift = 0;
+
+  while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
+    {
+      gcc_assert (shift <= 31);
+      shift++;
+    }
+
+  return shift;
+}
+
+/* Output a .ascii pseudo-op, keeping track of lengths.  This is
+   because /bin/as is horribly restrictive.  The judgement about
+   whether or not each character is 'printable' (and can be output as
+   is) or not (and must be printed with an octal escape) must be made
+   with reference to the *host* character set -- the situation is
+   similar to that discussed in the comments above pp_c_char in
+   c-pretty-print.c.  */
+
+#define MAX_ASCII_LEN 51
+
+void
+output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
+{
+  int i;
+  int len_so_far = 0;
+
+  fputs ("\t.ascii\t\"", stream);
+
+  for (i = 0; i < len; i++)
+    {
+      int c = p[i];
+
+      if (len_so_far >= MAX_ASCII_LEN)
+	{
+	  fputs ("\"\n\t.ascii\t\"", stream);
+	  len_so_far = 0;
+	}
+
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
+	    {
+	      putc ('\\', stream);
+	      len_so_far++;
+	    }
+	  putc (c, stream);
+	  len_so_far++;
+	}
+      else
+	{
+	  fprintf (stream, "\\%03o", c);
+	  len_so_far += 4;
+	}
+    }
+
+  fputs ("\"\n", stream);
+}
+
+/* Compute the register save mask for registers 0 through 12
+   inclusive.  This code is used by arm_compute_save_reg_mask.  */
+
+static unsigned long
+arm_compute_save_reg0_reg12_mask (void)
+{
+  unsigned long func_type = arm_current_func_type ();
+  unsigned long save_reg_mask = 0;
+  unsigned int reg;
+
+  if (IS_INTERRUPT (func_type))
+    {
+      unsigned int max_reg;
+      /* Interrupt functions must not corrupt any registers,
+	 even call clobbered ones.  If this is a leaf function
+	 we can just examine the registers used by the RTL, but
+	 otherwise we have to assume that whatever function is
+	 called might clobber anything, and so we have to save
+	 all the call-clobbered registers as well.  */
+      if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
+	/* FIQ handlers have registers r8 - r12 banked, so
+	   we only need to check r0 - r7, Normal ISRs only
+	   bank r14 and r15, so we must check up to r12.
+	   r13 is the stack pointer which is always preserved,
+	   so we do not need to consider it here.  */
+	max_reg = 7;
+      else
+	max_reg = 12;
+
+      for (reg = 0; reg <= max_reg; reg++)
+	if (df_regs_ever_live_p (reg)
+	    || (! current_function_is_leaf && call_used_regs[reg]))
+	  save_reg_mask |= (1 << reg);
+
+      /* Also save the pic base register if necessary.  */
+      if (flag_pic
+	  && !TARGET_SINGLE_PIC_BASE
+	  && arm_pic_register != INVALID_REGNUM
+	  && crtl->uses_pic_offset_table)
+	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
+    }
+  else if (IS_VOLATILE(func_type))
+    {
+      /* For noreturn functions we historically omitted register saves
+	 altogether.  However this really messes up debugging.  As a
+	 compromise save just the frame pointers.  Combined with the link
+	 register saved elsewhere this should be sufficient to get
+	 a backtrace.  */
+      if (frame_pointer_needed)
+	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+      if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
+	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+      if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
+	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
+    }
+  else
+    {
+      /* In the normal case we only need to save those registers
+	 which are call saved and which are used by this function.  */
+      for (reg = 0; reg <= 11; reg++)
+	if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
+	  save_reg_mask |= (1 << reg);
+
+      /* Handle the frame pointer as a special case.  */
+      if (frame_pointer_needed)
+	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
+
+      /* If we aren't loading the PIC register,
+	 don't stack it even though it may be live.  */
+      if (flag_pic
+	  && !TARGET_SINGLE_PIC_BASE
+	  && arm_pic_register != INVALID_REGNUM
+	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
+	      || crtl->uses_pic_offset_table))
+	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
+
+      /* The prologue will copy SP into R0, so save it.  */
+      if (IS_STACKALIGN (func_type))
+	save_reg_mask |= 1;
+    }
+
+  /* Save registers so the exception handler can modify them.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0; ; i++)
+	{
+	  reg = EH_RETURN_DATA_REGNO (i);
+	  if (reg == INVALID_REGNUM)
+	    break;
+	  save_reg_mask |= 1 << reg;
+	}
+    }
+
+  return save_reg_mask;
+}
+
+
+/* Compute the number of bytes used to store the static chain register on the 
+   stack, above the stack frame. We need to know this accurately to get the
+   alignment of the rest of the stack frame correct. */
+
+static int arm_compute_static_chain_stack_bytes (void)
+{
+  unsigned long func_type = arm_current_func_type ();
+  int static_chain_stack_bytes = 0;
+
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
+      IS_NESTED (func_type) &&
+      df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
+    static_chain_stack_bytes = 4;
+
+  return static_chain_stack_bytes;
+}
+
+
+/* Compute a bit mask of which registers need to be
+   saved on the stack for the current function.
+   This is used by arm_get_frame_offsets, which may add extra registers.  */
+
+static unsigned long
+arm_compute_save_reg_mask (void)
+{
+  unsigned int save_reg_mask = 0;
+  unsigned long func_type = arm_current_func_type ();
+  unsigned int reg;
+
+  if (IS_NAKED (func_type))
+    /* This should never really happen.  */
+    return 0;
+
+  /* If we are creating a stack frame, then we must save the frame pointer,
+     IP (which will hold the old stack pointer), LR and the PC.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    save_reg_mask |=
+      (1 << ARM_HARD_FRAME_POINTER_REGNUM)
+      | (1 << IP_REGNUM)
+      | (1 << LR_REGNUM)
+      | (1 << PC_REGNUM);
+
+  save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
+
+  /* Decide if we need to save the link register.
+     Interrupt routines have their own banked link register,
+     so they never need to save it.
+     Otherwise if we do not use the link register we do not need to save
+     it.  If we are pushing other registers onto the stack however, we
+     can save an instruction in the epilogue by pushing the link register
+     now and then popping it back into the PC.  This incurs extra memory
+     accesses though, so we only do it when optimizing for size, and only
+     if we know that we will not need a fancy return sequence.  */
+  if (df_regs_ever_live_p (LR_REGNUM)
+      || (save_reg_mask
+	  && optimize_size
+	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+	  && !crtl->calls_eh_return))
+    save_reg_mask |= 1 << LR_REGNUM;
+
+  if (cfun->machine->lr_save_eliminated)
+    save_reg_mask &= ~ (1 << LR_REGNUM);
+
+  if (TARGET_REALLY_IWMMXT
+      && ((bit_count (save_reg_mask)
+	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
+			   arm_compute_static_chain_stack_bytes())
+	   ) % 2) != 0)
+    {
+      /* The total number of registers that are going to be pushed
+	 onto the stack is odd.  We need to ensure that the stack
+	 is 64-bit aligned before we start to save iWMMXt registers,
+	 and also before we start to create locals.  (A local variable
+	 might be a double or long long which we will load/store using
+	 an iWMMXt instruction).  Therefore we need to push another
+	 ARM register, so that the stack will be 64-bit aligned.  We
+	 try to avoid using the arg registers (r0 -r3) as they might be
+	 used to pass values in a tail call.  */
+      for (reg = 4; reg <= 12; reg++)
+	if ((save_reg_mask & (1 << reg)) == 0)
+	  break;
+
+      if (reg <= 12)
+	save_reg_mask |= (1 << reg);
+      else
+	{
+	  cfun->machine->sibcall_blocked = 1;
+	  save_reg_mask |= (1 << 3);
+	}
+    }
+
+  /* We may need to push an additional register for use initializing the
+     PIC base register.  */
+  if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
+      && (save_reg_mask & THUMB2_WORK_REGS) == 0)
+    {
+      reg = thumb_find_work_register (1 << 4);
+      if (!call_used_regs[reg])
+	save_reg_mask |= (1 << reg);
+    }
+
+  return save_reg_mask;
+}
+
+
+/* Compute a bit mask of which registers need to be
+   saved on the stack for the current function.  */
+static unsigned long
+thumb1_compute_save_reg_mask (void)
+{
+  unsigned long mask;
+  unsigned reg;
+
+  mask = 0;
+  for (reg = 0; reg < 12; reg ++)
+    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+      mask |= 1 << reg;
+
+  if (flag_pic
+      && !TARGET_SINGLE_PIC_BASE
+      && arm_pic_register != INVALID_REGNUM
+      && crtl->uses_pic_offset_table)
+    mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
+
+  /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
+  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
+    mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
+
+  /* LR will also be pushed if any lo regs are pushed.  */
+  if (mask & 0xff || thumb_force_lr_save ())
+    mask |= (1 << LR_REGNUM);
+
+  /* Make sure we have a low work register if we need one.
+     We will need one if we are going to push a high register,
+     but we are not currently intending to push a low register.  */
+  if ((mask & 0xff) == 0
+      && ((mask & 0x0f00) || TARGET_BACKTRACE))
+    {
+      /* Use thumb_find_work_register to choose which register
+	 we will use.  If the register is live then we will
+	 have to push it.  Use LAST_LO_REGNUM as our fallback
+	 choice for the register to select.  */
+      reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
+      /* Make sure the register returned by thumb_find_work_register is
+	 not part of the return value.  */
+      if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
+	reg = LAST_LO_REGNUM;
+
+      if (! call_used_regs[reg])
+	mask |= 1 << reg;
+    }
+
+  /* The 504 below is 8 bytes less than 512 because there are two possible
+     alignment words.  We can't tell here if they will be present or not so we
+     have to play it safe and assume that they are. */
+  if ((CALLER_INTERWORKING_SLOT_SIZE +
+       ROUND_UP_WORD (get_frame_size ()) +
+       crtl->outgoing_args_size) >= 504)
+    {
+      /* This is the same as the code in thumb1_expand_prologue() which
+	 determines which register to use for stack decrement. */
+      for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
+	if (mask & (1 << reg))
+	  break;
+
+      if (reg > LAST_LO_REGNUM)
+	{
+	  /* Make sure we have a register available for stack decrement. */
+	  mask |= 1 << LAST_LO_REGNUM;
+	}
+    }
+
+  return mask;
+}
+
+
+/* Return the number of bytes required to save VFP registers.  */
+static int
+arm_get_vfp_saved_size (void)
+{
+  unsigned int regno;
+  int count;
+  int saved;
+
+  saved = 0;
+  /* Space for saved VFP registers.  */
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      count = 0;
+      for (regno = FIRST_VFP_REGNUM;
+	   regno < LAST_VFP_REGNUM;
+	   regno += 2)
+	{
+	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
+	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
+	    {
+	      if (count > 0)
+		{
+		  /* Workaround ARM10 VFPr1 bug.  */
+		  if (count == 2 && !arm_arch6)
+		    count++;
+		  saved += count * 8;
+		}
+	      count = 0;
+	    }
+	  else
+	    count++;
+	}
+      if (count > 0)
+	{
+	  if (count == 2 && !arm_arch6)
+	    count++;
+	  saved += count * 8;
+	}
+    }
+  return saved;
+}
+
+
+/* Generate a function exit sequence.  If REALLY_RETURN is false, then do
+   everything bar the final return instruction.  */
+const char *
+output_return_instruction (rtx operand, int really_return, int reverse)
+{
+  char conditional[10];
+  char instr[100];
+  unsigned reg;
+  unsigned long live_regs_mask;
+  unsigned long func_type;
+  arm_stack_offsets *offsets;
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    return "";
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      /* If this function was declared non-returning, and we have
+	 found a tail call, then we have to trust that the called
+	 function won't return.  */
+      if (really_return)
+	{
+	  rtx ops[2];
+
+	  /* Otherwise, trap an attempted return by aborting.  */
+	  ops[0] = operand;
+	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
+				       : "abort");
+	  assemble_external_libcall (ops[1]);
+	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
+	}
+
+      return "";
+    }
+
+  gcc_assert (!cfun->calls_alloca || really_return);
+
+  sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
+
+  cfun->machine->return_used_this_function = 1;
+
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+
+  if (live_regs_mask)
+    {
+      const char * return_reg;
+
+      /* If we do not have any special requirements for function exit
+	 (e.g. interworking) then we can load the return address
+	 directly into the PC.  Otherwise we must load it into LR.  */
+      if (really_return
+	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
+	return_reg = reg_names[PC_REGNUM];
+      else
+	return_reg = reg_names[LR_REGNUM];
+
+      if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
+	{
+	  /* There are three possible reasons for the IP register
+	     being saved.  1) a stack frame was created, in which case
+	     IP contains the old stack pointer, or 2) an ISR routine
+	     corrupted it, or 3) it was saved to align the stack on
+	     iWMMXt.  In case 1, restore IP into SP, otherwise just
+	     restore IP.  */
+	  if (frame_pointer_needed)
+	    {
+	      live_regs_mask &= ~ (1 << IP_REGNUM);
+	      live_regs_mask |=   (1 << SP_REGNUM);
+	    }
+	  else
+	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
+	}
+
+      /* On some ARM architectures it is faster to use LDR rather than
+	 LDM to load a single register.  On other architectures, the
+	 cost is the same.  In 26 bit mode, or for exception handlers,
+	 we have to use LDM to load the PC so that the CPSR is also
+	 restored.  */
+      for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
+	if (live_regs_mask == (1U << reg))
+	  break;
+
+      if (reg <= LAST_ARM_REGNUM
+	  && (reg != LR_REGNUM
+	      || ! really_return
+	      || ! IS_INTERRUPT (func_type)))
+	{
+	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
+		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
+	}
+      else
+	{
+	  char *p;
+	  int first = 1;
+
+	  /* Generate the load multiple instruction to restore the
+	     registers.  Note we can get here, even if
+	     frame_pointer_needed is true, but only if sp already
+	     points to the base of the saved core registers.  */
+	  if (live_regs_mask & (1 << SP_REGNUM))
+	    {
+	      unsigned HOST_WIDE_INT stack_adjust;
+
+	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
+	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
+
+	      if (stack_adjust && arm_arch5 && TARGET_ARM)
+		if (TARGET_UNIFIED_ASM)
+		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
+		else
+		  sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
+	      else
+		{
+		  /* If we can't use ldmib (SA110 bug),
+		     then try to pop r3 instead.  */
+		  if (stack_adjust)
+		    live_regs_mask |= 1 << 3;
+		  
+		  if (TARGET_UNIFIED_ASM)
+		    sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
+		  else
+		    sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
+		}
+	    }
+	  else
+	    if (TARGET_UNIFIED_ASM)
+	      sprintf (instr, "pop%s\t{", conditional);
+	    else
+	      sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
+
+	  p = instr + strlen (instr);
+
+	  for (reg = 0; reg <= SP_REGNUM; reg++)
+	    if (live_regs_mask & (1 << reg))
+	      {
+		int l = strlen (reg_names[reg]);
+
+		if (first)
+		  first = 0;
+		else
+		  {
+		    memcpy (p, ", ", 2);
+		    p += 2;
+		  }
+
+		memcpy (p, "%|", 2);
+		memcpy (p + 2, reg_names[reg], l);
+		p += l + 2;
+	      }
+
+	  if (live_regs_mask & (1 << LR_REGNUM))
+	    {
+	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
+	      /* If returning from an interrupt, restore the CPSR.  */
+	      if (IS_INTERRUPT (func_type))
+		strcat (p, "^");
+	    }
+	  else
+	    strcpy (p, "}");
+	}
+
+      output_asm_insn (instr, & operand);
+
+      /* See if we need to generate an extra instruction to
+	 perform the actual function return.  */
+      if (really_return
+	  && func_type != ARM_FT_INTERWORKED
+	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
+	{
+	  /* The return has already been handled
+	     by loading the LR into the PC.  */
+	  really_return = 0;
+	}
+    }
+
+  if (really_return)
+    {
+      switch ((int) ARM_FUNC_TYPE (func_type))
+	{
+	case ARM_FT_ISR:
+	case ARM_FT_FIQ:
+	  /* ??? This is wrong for unified assembly syntax.  */
+	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
+	  break;
+
+	case ARM_FT_INTERWORKED:
+	  sprintf (instr, "bx%s\t%%|lr", conditional);
+	  break;
+
+	case ARM_FT_EXCEPTION:
+	  /* ??? This is wrong for unified assembly syntax.  */
+	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
+	  break;
+
+	default:
+	  /* Use bx if it's available.  */
+	  if (arm_arch5 || arm_arch4t)
+	    sprintf (instr, "bx%s\t%%|lr", conditional);
+	  else
+	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
+	  break;
+	}
+
+      output_asm_insn (instr, & operand);
+    }
+
+  return "";
+}
+
+/* Write the function name into the code section, directly preceding
+   the function prologue.
+
+   Code will be output similar to this:
+     t0
+	 .ascii "arm_poke_function_name", 0
+	 .align
+     t1
+	 .word 0xff000000 + (t1 - t0)
+     arm_poke_function_name
+	 mov     ip, sp
+	 stmfd   sp!, {fp, ip, lr, pc}
+	 sub     fp, ip, #4
+
+   When performing a stack backtrace, code can inspect the value
+   of 'pc' stored at 'fp' + 0.  If the trace function then looks
+   at location pc - 12 and the top 8 bits are set, then we know
+   that there is a function name embedded immediately preceding this
+   location and has length ((pc[-3]) & 0xff000000).
+
+   We assume that pc is declared as a pointer to an unsigned long.
+
+   It is of no benefit to output the function name if we are assembling
+   a leaf function.  These function types will not contain a stack
+   backtrace structure, therefore it is not possible to determine the
+   function name.  */
+void
+arm_poke_function_name (FILE *stream, const char *name)
+{
+  unsigned long alignlength;
+  unsigned long length;
+  rtx           x;
+
+  length      = strlen (name) + 1;
+  alignlength = ROUND_UP_WORD (length);
+
+  ASM_OUTPUT_ASCII (stream, name, length);
+  ASM_OUTPUT_ALIGN (stream, 2);
+  x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
+  assemble_aligned_integer (UNITS_PER_WORD, x);
+}
+
+/* Place some comments into the assembler stream
+   describing the current function.  */
+static void
+arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
+{
+  unsigned long func_type;
+
+  if (TARGET_THUMB1)
+    {
+      thumb1_output_function_prologue (f, frame_size);
+      return;
+    }
+
+  /* Sanity check.  */
+  gcc_assert (!arm_ccfsm_state && !arm_target_insn);
+
+  func_type = arm_current_func_type ();
+
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    default:
+    case ARM_FT_NORMAL:
+      break;
+    case ARM_FT_INTERWORKED:
+      asm_fprintf (f, "\t%@ Function supports interworking.\n");
+      break;
+    case ARM_FT_ISR:
+      asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
+      break;
+    case ARM_FT_FIQ:
+      asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
+      break;
+    case ARM_FT_EXCEPTION:
+      asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
+      break;
+    }
+
+  if (IS_NAKED (func_type))
+    asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
+
+  if (IS_VOLATILE (func_type))
+    asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
+
+  if (IS_NESTED (func_type))
+    asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
+  if (IS_STACKALIGN (func_type))
+    asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
+
+  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
+	       crtl->args.size,
+	       crtl->args.pretend_args_size, frame_size);
+
+  asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
+	       frame_pointer_needed,
+	       cfun->machine->uses_anonymous_args);
+
+  if (cfun->machine->lr_save_eliminated)
+    asm_fprintf (f, "\t%@ link register save eliminated.\n");
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
+
+}
+
+const char *
+arm_output_epilogue (rtx sibling)
+{
+  int reg;
+  unsigned long saved_regs_mask;
+  unsigned long func_type;
+  /* Floats_offset is the offset from the "virtual" frame.  In an APCS
+     frame that is $fp + 4 for a non-variadic function.  */
+  int floats_offset = 0;
+  rtx operands[3];
+  FILE * f = asm_out_file;
+  unsigned int lrm_count = 0;
+  int really_return = (sibling == NULL);
+  int start_reg;
+  arm_stack_offsets *offsets;
+
+  /* If we have already generated the return instruction
+     then it is futile to generate anything else.  */
+  if (use_return_insn (FALSE, sibling) && 
+      (cfun->machine->return_used_this_function != 0))
+    return "";
+
+  func_type = arm_current_func_type ();
+
+  if (IS_NAKED (func_type))
+    /* Naked functions don't have epilogues.  */
+    return "";
+
+  if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
+    {
+      rtx op;
+
+      /* A volatile function should never return.  Call abort.  */
+      op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
+      assemble_external_libcall (op);
+      output_asm_insn ("bl\t%a0", &op);
+
+      return "";
+    }
+
+  /* If we are throwing an exception, then we really must be doing a
+     return, so we can't tail-call.  */
+  gcc_assert (!crtl->calls_eh_return || really_return);
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  if (TARGET_IWMMXT)
+    lrm_count = bit_count (saved_regs_mask);
+
+  floats_offset = offsets->saved_args;
+  /* Compute how far away the floats will be.  */
+  for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
+    if (saved_regs_mask & (1 << reg))
+      floats_offset += 4;
+
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    {
+      /* This variable is for the Virtual Frame Pointer, not VFP regs.  */
+      int vfp_offset = offsets->frame;
+
+      if (TARGET_FPA_EMU2)
+	{
+	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
+	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	      {
+		floats_offset += 12;
+		asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
+			     reg, FP_REGNUM, floats_offset - vfp_offset);
+	      }
+	}
+      else
+	{
+	  start_reg = LAST_FPA_REGNUM;
+
+	  for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
+	    {
+	      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+		{
+		  floats_offset += 12;
+
+		  /* We can't unstack more than four registers at once.  */
+		  if (start_reg - reg == 3)
+		    {
+		      asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
+			           reg, FP_REGNUM, floats_offset - vfp_offset);
+		      start_reg = reg - 1;
+		    }
+		}
+	      else
+		{
+		  if (reg != start_reg)
+		    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
+				 reg + 1, start_reg - reg,
+				 FP_REGNUM, floats_offset - vfp_offset);
+		  start_reg = reg - 1;
+		}
+	    }
+
+	  /* Just in case the last register checked also needs unstacking.  */
+	  if (reg != start_reg)
+	    asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
+			 reg + 1, start_reg - reg,
+			 FP_REGNUM, floats_offset - vfp_offset);
+	}
+
+      if (TARGET_HARD_FLOAT && TARGET_VFP)
+	{
+	  int saved_size;
+
+	  /* The fldmd insns do not have base+offset addressing
+             modes, so we use IP to hold the address.  */
+	  saved_size = arm_get_vfp_saved_size ();
+
+	  if (saved_size > 0)
+	    {
+	      floats_offset += saved_size;
+	      asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
+			   FP_REGNUM, floats_offset - vfp_offset);
+	    }
+	  start_reg = FIRST_VFP_REGNUM;
+	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
+	    {
+	      if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+		  && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
+		{
+		  if (start_reg != reg)
+		    vfp_output_fldmd (f, IP_REGNUM,
+				      (start_reg - FIRST_VFP_REGNUM) / 2,
+				      (reg - start_reg) / 2);
+		  start_reg = reg + 2;
+		}
+	    }
+	  if (start_reg != reg)
+	    vfp_output_fldmd (f, IP_REGNUM,
+			      (start_reg - FIRST_VFP_REGNUM) / 2,
+			      (reg - start_reg) / 2);
+	}
+
+      if (TARGET_IWMMXT)
+	{
+	  /* The frame pointer is guaranteed to be non-double-word aligned.
+	     This is because it is set to (old_stack_pointer - 4) and the
+	     old_stack_pointer was double word aligned.  Thus the offset to
+	     the iWMMXt registers to be loaded must also be non-double-word
+	     sized, so that the resultant address *is* double-word aligned.
+	     We can ignore floats_offset since that was already included in
+	     the live_regs_mask.  */
+	  lrm_count += (lrm_count % 2 ? 2 : 1);
+
+	  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
+	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	      {
+		asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
+			     reg, FP_REGNUM, lrm_count * 4);
+		lrm_count += 2;
+	      }
+	}
+
+      /* saved_regs_mask should contain the IP, which at the time of stack
+	 frame generation actually contains the old stack pointer.  So a
+	 quick way to unwind the stack is just pop the IP register directly
+	 into the stack pointer.  */
+      gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
+      saved_regs_mask &= ~ (1 << IP_REGNUM);
+      saved_regs_mask |=   (1 << SP_REGNUM);
+
+      /* There are two registers left in saved_regs_mask - LR and PC.  We
+	 only need to restore the LR register (the return address), but to
+	 save time we can load it directly into the PC, unless we need a
+	 special function exit sequence, or we are not really returning.  */
+      if (really_return
+	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+	  && !crtl->calls_eh_return)
+	/* Delete the LR from the register mask, so that the LR on
+	   the stack is loaded into the PC in the register mask.  */
+	saved_regs_mask &= ~ (1 << LR_REGNUM);
+      else
+	saved_regs_mask &= ~ (1 << PC_REGNUM);
+
+      /* We must use SP as the base register, because SP is one of the
+         registers being restored.  If an interrupt or page fault
+         happens in the ldm instruction, the SP might or might not
+         have been restored.  That would be bad, as then SP will no
+         longer indicate the safe area of stack, and we can get stack
+         corruption.  Using SP as the base register means that it will
+         be reset correctly to the original value, should an interrupt
+         occur.  If the stack pointer already points at the right
+         place, then omit the subtraction.  */
+      if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
+	  || cfun->calls_alloca)
+	asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
+		     4 * bit_count (saved_regs_mask));
+      print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
+
+      if (IS_INTERRUPT (func_type))
+	/* Interrupt handlers will have pushed the
+	   IP onto the stack, so restore it now.  */
+	print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
+    }
+  else
+    {
+      /* This branch is executed for ARM mode (non-apcs frames) and
+	 Thumb-2 mode. Frame layout is essentially the same for those
+	 cases, except that in ARM mode frame pointer points to the
+	 first saved register, while in Thumb-2 mode the frame pointer points
+	 to the last saved register.
+
+	 It is possible to make frame pointer point to last saved
+	 register in both cases, and remove some conditionals below.
+	 That means that fp setup in prologue would be just "mov fp, sp"
+	 and sp restore in epilogue would be just "mov sp, fp", whereas
+	 now we have to use add/sub in those cases. However, the value
+	 of that would be marginal, as both mov and add/sub are 32-bit
+	 in ARM mode, and it would require extra conditionals
+	 in arm_expand_prologue to distingish ARM-apcs-frame case
+	 (where frame pointer is required to point at first register)
+	 and ARM-non-apcs-frame. Therefore, such change is postponed
+	 until real need arise.  */
+      unsigned HOST_WIDE_INT amount;
+      int rfe;
+      /* Restore stack pointer if necessary.  */
+      if (TARGET_ARM && frame_pointer_needed)
+	{
+	  operands[0] = stack_pointer_rtx;
+	  operands[1] = hard_frame_pointer_rtx;
+	  
+	  operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
+	  output_add_immediate (operands);
+	}
+      else
+	{
+	  if (frame_pointer_needed)
+	    {
+	      /* For Thumb-2 restore sp from the frame pointer.
+		 Operand restrictions mean we have to incrememnt FP, then copy
+		 to SP.  */
+	      amount = offsets->locals_base - offsets->saved_regs;
+	      operands[0] = hard_frame_pointer_rtx;
+	    }
+	  else
+	    {
+	      unsigned long count;
+	      operands[0] = stack_pointer_rtx;
+	      amount = offsets->outgoing_args - offsets->saved_regs;
+	      /* pop call clobbered registers if it avoids a
+	         separate stack adjustment.  */
+	      count = offsets->saved_regs - offsets->saved_args;
+	      if (optimize_size
+		  && count != 0
+		  && !crtl->calls_eh_return
+		  && bit_count(saved_regs_mask) * 4 == count
+		  && !IS_INTERRUPT (func_type)
+		  && !crtl->tail_call_emit)
+		{
+		  unsigned long mask;
+                  /* Preserve return values, of any size.  */
+		  mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
+		  mask ^= 0xf;
+		  mask &= ~saved_regs_mask;
+		  reg = 0;
+		  while (bit_count (mask) * 4 > amount)
+		    {
+		      while ((mask & (1 << reg)) == 0)
+			reg++;
+		      mask &= ~(1 << reg);
+		    }
+		  if (bit_count (mask) * 4 == amount) {
+		      amount = 0;
+		      saved_regs_mask |= mask;
+		  }
+		}
+	    }
+	  
+	  if (amount)
+	    {
+	      operands[1] = operands[0];
+	      operands[2] = GEN_INT (amount);
+	      output_add_immediate (operands);
+	    }
+	  if (frame_pointer_needed)
+	    asm_fprintf (f, "\tmov\t%r, %r\n",
+			 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
+	}
+
+      if (TARGET_FPA_EMU2)
+	{
+	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
+	    if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	      asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
+			   reg, SP_REGNUM);
+	}
+      else
+	{
+	  start_reg = FIRST_FPA_REGNUM;
+
+	  for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
+	    {
+	      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+		{
+		  if (reg - start_reg == 3)
+		    {
+		      asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
+				   start_reg, SP_REGNUM);
+		      start_reg = reg + 1;
+		    }
+		}
+	      else
+		{
+		  if (reg != start_reg)
+		    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
+				 start_reg, reg - start_reg,
+				 SP_REGNUM);
+
+		  start_reg = reg + 1;
+		}
+	    }
+
+	  /* Just in case the last register checked also needs unstacking.  */
+	  if (reg != start_reg)
+	    asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
+			 start_reg, reg - start_reg, SP_REGNUM);
+	}
+
+      if (TARGET_HARD_FLOAT && TARGET_VFP)
+	{
+	  int end_reg = LAST_VFP_REGNUM + 1;
+
+	  /* Scan the registers in reverse order.  We need to match
+	     any groupings made in the prologue and generate matching
+	     pop operations.  */
+	  for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
+	    {
+	      if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+		  && (!df_regs_ever_live_p (reg + 1)
+		      || call_used_regs[reg + 1]))
+		{
+		  if (end_reg > reg + 2)
+		    vfp_output_fldmd (f, SP_REGNUM,
+				      (reg + 2 - FIRST_VFP_REGNUM) / 2,
+				      (end_reg - (reg + 2)) / 2);
+		  end_reg = reg;
+		}
+	    }
+	  if (end_reg > reg + 2)
+	    vfp_output_fldmd (f, SP_REGNUM, 0,
+			      (end_reg - (reg + 2)) / 2);
+	}
+
+      if (TARGET_IWMMXT)
+	for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
+	  if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	    asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
+
+      /* If we can, restore the LR into the PC.  */
+      if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
+	  && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
+	  && !IS_STACKALIGN (func_type)
+	  && really_return
+	  && crtl->args.pretend_args_size == 0
+	  && saved_regs_mask & (1 << LR_REGNUM)
+	  && !crtl->calls_eh_return)
+	{
+	  saved_regs_mask &= ~ (1 << LR_REGNUM);
+	  saved_regs_mask |=   (1 << PC_REGNUM);
+	  rfe = IS_INTERRUPT (func_type);
+	}
+      else
+	rfe = 0;
+
+      /* Load the registers off the stack.  If we only have one register
+	 to load use the LDR instruction - it is faster.  For Thumb-2
+	 always use pop and the assembler will pick the best instruction.*/
+      if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
+	  && !IS_INTERRUPT(func_type))
+	{
+	  asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
+	}
+      else if (saved_regs_mask)
+	{
+	  if (saved_regs_mask & (1 << SP_REGNUM))
+	    /* Note - write back to the stack register is not enabled
+	       (i.e. "ldmfd sp!...").  We know that the stack pointer is
+	       in the list of registers and if we add writeback the
+	       instruction becomes UNPREDICTABLE.  */
+	    print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
+			     rfe);
+	  else if (TARGET_ARM)
+	    print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
+			     rfe);
+	  else
+	    print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
+	}
+
+      if (crtl->args.pretend_args_size)
+	{
+	  /* Unwind the pre-pushed regs.  */
+	  operands[0] = operands[1] = stack_pointer_rtx;
+	  operands[2] = GEN_INT (crtl->args.pretend_args_size);
+	  output_add_immediate (operands);
+	}
+    }
+
+  /* We may have already restored PC directly from the stack.  */
+  if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
+    return "";
+
+  /* Stack adjustment for exception handler.  */
+  if (crtl->calls_eh_return)
+    asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
+		 ARM_EH_STACKADJ_REGNUM);
+
+  /* Generate the return instruction.  */
+  switch ((int) ARM_FUNC_TYPE (func_type))
+    {
+    case ARM_FT_ISR:
+    case ARM_FT_FIQ:
+      asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
+      break;
+
+    case ARM_FT_EXCEPTION:
+      asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
+      break;
+
+    case ARM_FT_INTERWORKED:
+      asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
+      break;
+
+    default:
+      if (IS_STACKALIGN (func_type))
+	{
+	  /* See comment in arm_expand_prologue.  */
+	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
+	}
+      if (arm_arch5 || arm_arch4t)
+	asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
+      else
+	asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
+      break;
+    }
+
+  return "";
+}
+
+static void
+arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			      HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
+{
+  arm_stack_offsets *offsets;
+
+  if (TARGET_THUMB1)
+    {
+      int regno;
+
+      /* Emit any call-via-reg trampolines that are needed for v4t support
+	 of call_reg and call_value_reg type insns.  */
+      for (regno = 0; regno < LR_REGNUM; regno++)
+	{
+	  rtx label = cfun->machine->call_via[regno];
+
+	  if (label != NULL)
+	    {
+	      switch_to_section (function_section (current_function_decl));
+	      targetm.asm_out.internal_label (asm_out_file, "L",
+					      CODE_LABEL_NUMBER (label));
+	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
+	    }
+	}
+
+      /* ??? Probably not safe to set this here, since it assumes that a
+	 function will be emitted as assembly immediately after we generate
+	 RTL for it.  This does not happen for inline functions.  */
+      cfun->machine->return_used_this_function = 0;
+    }
+  else /* TARGET_32BIT */
+    {
+      /* We need to take into account any stack-frame rounding.  */
+      offsets = arm_get_frame_offsets ();
+
+      gcc_assert (!use_return_insn (FALSE, NULL)
+		  || (cfun->machine->return_used_this_function != 0)
+		  || offsets->saved_regs == offsets->outgoing_args
+		  || frame_pointer_needed);
+
+      /* Reset the ARM-specific per-function variables.  */
+      after_arm_reorg = 0;
+    }
+}
+
+/* Generate and emit an insn that we will recognize as a push_multi.
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+static rtx
+emit_multi_reg_push (unsigned long mask)
+{
+  int num_regs = 0;
+  int num_dwarf_regs;
+  int i, j;
+  rtx par;
+  rtx dwarf;
+  int dwarf_par_index;
+  rtx tmp, reg;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* We don't record the PC in the dwarf frame information.  */
+  num_dwarf_regs = num_regs;
+  if (mask & (1 << PC_REGNUM))
+    num_dwarf_regs--;
+
+  /* For the body of the insn we are going to generate an UNSPEC in
+     parallel with several USEs.  This allows the insn to be recognized
+     by the push_multi pattern in the arm.md file.
+
+     The body of the insn looks something like this:
+
+       (parallel [
+           (set (mem:BLK (pre_modify:SI (reg:SI sp)
+	                                (const_int:SI <num>)))
+	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
+           (use (reg:SI XX))
+           (use (reg:SI YY))
+	   ...
+        ])
+
+     For the frame note however, we try to be more explicit and actually
+     show each register being stored into the stack frame, plus a (single)
+     decrement of the stack pointer.  We do it this way in order to be
+     friendly to the stack unwinding code, which only wants to see a single
+     stack decrement per instruction.  The RTL we generate for the note looks
+     something like this:
+
+      (sequence [
+           (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
+           (set (mem:SI (reg:SI sp)) (reg:SI r4))
+           (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
+           (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
+	   ...
+        ])
+
+     FIXME:: In an ideal world the PRE_MODIFY would not exist and
+     instead we'd have a parallel expression detailing all
+     the stores to the various memory addresses so that debug
+     information is more up-to-date. Remember however while writing
+     this to take care of the constraints with the push instruction.
+
+     Note also that this has to be taken care of for the VFP registers.
+
+     For more see PR43399.  */
+
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
+  dwarf_par_index = 1;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    {
+      if (mask & (1 << i))
+	{
+	  reg = gen_rtx_REG (SImode, i);
+
+	  XVECEXP (par, 0, 0)
+	    = gen_rtx_SET (VOIDmode,
+			   gen_frame_mem
+			   (BLKmode,
+			    gen_rtx_PRE_MODIFY (Pmode,
+						stack_pointer_rtx,
+						plus_constant
+						(stack_pointer_rtx,
+						 -4 * num_regs))
+			    ),
+			   gen_rtx_UNSPEC (BLKmode,
+					   gen_rtvec (1, reg),
+					   UNSPEC_PUSH_MULT));
+
+	  if (i != PC_REGNUM)
+	    {
+	      tmp = gen_rtx_SET (VOIDmode,
+				 gen_frame_mem (SImode, stack_pointer_rtx),
+				 reg);
+	      RTX_FRAME_RELATED_P (tmp) = 1;
+	      XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
+	      dwarf_par_index++;
+	    }
+
+	  break;
+	}
+    }
+
+  for (j = 1, i++; j < num_regs; i++)
+    {
+      if (mask & (1 << i))
+	{
+	  reg = gen_rtx_REG (SImode, i);
+
+	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
+
+	  if (i != PC_REGNUM)
+	    {
+	      tmp
+		= gen_rtx_SET (VOIDmode,
+			       gen_frame_mem
+			       (SImode,
+				plus_constant (stack_pointer_rtx,
+					       4 * j)),
+			       reg);
+	      RTX_FRAME_RELATED_P (tmp) = 1;
+	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
+	    }
+
+	  j++;
+	}
+    }
+
+  par = emit_insn (par);
+
+  tmp = gen_rtx_SET (VOIDmode,
+		     stack_pointer_rtx,
+		     plus_constant (stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
+  return par;
+}
+
+/* Calculate the size of the return value that is passed in registers.  */
+static unsigned
+arm_size_return_regs (void)
+{
+  enum machine_mode mode;
+
+  if (crtl->return_rtx != 0)
+    mode = GET_MODE (crtl->return_rtx);
+  else
+    mode = DECL_MODE (DECL_RESULT (current_function_decl));
+
+  return GET_MODE_SIZE (mode);
+}
+
+static rtx
+emit_sfm (int base_reg, int count)
+{
+  rtx par;
+  rtx dwarf;
+  rtx tmp, reg;
+  int i;
+
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+
+  reg = gen_rtx_REG (XFmode, base_reg++);
+
+  XVECEXP (par, 0, 0)
+    = gen_rtx_SET (VOIDmode,
+		   gen_frame_mem
+		   (BLKmode,
+		    gen_rtx_PRE_MODIFY (Pmode,
+					stack_pointer_rtx,
+					plus_constant
+					(stack_pointer_rtx,
+					 -12 * count))
+		    ),
+		   gen_rtx_UNSPEC (BLKmode,
+				   gen_rtvec (1, reg),
+				   UNSPEC_PUSH_MULT));
+  tmp = gen_rtx_SET (VOIDmode,
+		     gen_frame_mem (XFmode, stack_pointer_rtx), reg);
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 1) = tmp;
+
+  for (i = 1; i < count; i++)
+    {
+      reg = gen_rtx_REG (XFmode, base_reg++);
+      XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
+
+      tmp = gen_rtx_SET (VOIDmode,
+			 gen_frame_mem (XFmode,
+					plus_constant (stack_pointer_rtx,
+						       i * 12)),
+			 reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (dwarf, 0, i + 1) = tmp;
+    }
+
+  tmp = gen_rtx_SET (VOIDmode,
+		     stack_pointer_rtx,
+		     plus_constant (stack_pointer_rtx, -12 * count));
+
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  par = emit_insn (par);
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+
+  return par;
+}
+
+
+/* Return true if the current function needs to save/restore LR.  */
+
+static bool
+thumb_force_lr_save (void)
+{
+  return !cfun->machine->lr_save_eliminated
+	 && (!leaf_function_p ()
+	     || thumb_far_jump_used_p ()
+	     || df_regs_ever_live_p (LR_REGNUM));
+}
+
+
+/* Return true if r3 is used by any of the tail call insns in the
+   current function.  */
+
+static bool
+any_sibcall_uses_r3 (void)
+{
+  edge_iterator ei;
+  edge e;
+
+  if (!crtl->tail_call_emit)
+    return false;
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    if (e->flags & EDGE_SIBCALL)
+      {
+	rtx call = BB_END (e->src);
+	if (!CALL_P (call))
+	  call = prev_nonnote_nondebug_insn (call);
+	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
+	if (find_regno_fusage (call, USE, 3))
+	  return true;
+      }
+  return false;
+}
+
+
+/* Compute the distance from register FROM to register TO.
+   These can be the arg pointer (26), the soft frame pointer (25),
+   the stack pointer (13) or the hard frame pointer (11).
+   In thumb mode r7 is used as the soft frame pointer, if needed.
+   Typical stack layout looks like this:
+
+       old stack pointer -> |    |
+                             ----
+                            |    | \
+                            |    |   saved arguments for
+                            |    |   vararg functions
+			    |    | /
+                              --
+   hard FP & arg pointer -> |    | \
+                            |    |   stack
+                            |    |   frame
+                            |    | /
+                              --
+                            |    | \
+                            |    |   call saved
+                            |    |   registers
+      soft frame pointer -> |    | /
+                              --
+                            |    | \
+                            |    |   local
+                            |    |   variables
+     locals base pointer -> |    | /
+                              --
+                            |    | \
+                            |    |   outgoing
+                            |    |   arguments
+   current stack pointer -> |    | /
+                              --
+
+  For a given function some or all of these stack components
+  may not be needed, giving rise to the possibility of
+  eliminating some of the registers.
+
+  The values returned by this function must reflect the behavior
+  of arm_expand_prologue() and arm_compute_save_reg_mask().
+
+  The sign of the number returned reflects the direction of stack
+  growth, so the values are positive for all eliminations except
+  from the soft frame pointer to the hard frame pointer.
+
+  SFP may point just inside the local variables block to ensure correct
+  alignment.  */
+
+
+/* Calculate stack offsets.  These are used to calculate register elimination
+   offsets and in prologue/epilogue code.  Also calculates which registers
+   should be saved.  */
+
+static arm_stack_offsets *
+arm_get_frame_offsets (void)
+{
+  struct arm_stack_offsets *offsets;
+  unsigned long func_type;
+  int leaf;
+  int saved;
+  int core_saved;
+  HOST_WIDE_INT frame_size;
+  int i;
+
+  offsets = &cfun->machine->stack_offsets;
+
+  /* We need to know if we are a leaf function.  Unfortunately, it
+     is possible to be called after start_sequence has been called,
+     which causes get_insns to return the insns for the sequence,
+     not the function, which will cause leaf_function_p to return
+     the incorrect result.
+
+     to know about leaf functions once reload has completed, and the
+     frame size cannot be changed after that time, so we can safely
+     use the cached value.  */
+
+  if (reload_completed)
+    return offsets;
+
+  /* Initially this is the size of the local variables.  It will translated
+     into an offset once we have determined the size of preceding data.  */
+  frame_size = ROUND_UP_WORD (get_frame_size ());
+
+  leaf = leaf_function_p ();
+
+  /* Space for variadic functions.  */
+  offsets->saved_args = crtl->args.pretend_args_size;
+
+  /* In Thumb mode this is incorrect, but never used.  */
+  offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
+                   arm_compute_static_chain_stack_bytes();
+
+  if (TARGET_32BIT)
+    {
+      unsigned int regno;
+
+      offsets->saved_regs_mask = arm_compute_save_reg_mask ();
+      core_saved = bit_count (offsets->saved_regs_mask) * 4;
+      saved = core_saved;
+
+      /* We know that SP will be doubleword aligned on entry, and we must
+	 preserve that condition at any subroutine call.  We also require the
+	 soft frame pointer to be doubleword aligned.  */
+
+      if (TARGET_REALLY_IWMMXT)
+	{
+	  /* Check for the call-saved iWMMXt registers.  */
+	  for (regno = FIRST_IWMMXT_REGNUM;
+	       regno <= LAST_IWMMXT_REGNUM;
+	       regno++)
+	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	      saved += 8;
+	}
+
+      func_type = arm_current_func_type ();
+      if (! IS_VOLATILE (func_type))
+	{
+	  /* Space for saved FPA registers.  */
+	  for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
+	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	    saved += 12;
+
+	  /* Space for saved VFP registers.  */
+	  if (TARGET_HARD_FLOAT && TARGET_VFP)
+	    saved += arm_get_vfp_saved_size ();
+	}
+    }
+  else /* TARGET_THUMB1 */
+    {
+      offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
+      core_saved = bit_count (offsets->saved_regs_mask) * 4;
+      saved = core_saved;
+      if (TARGET_BACKTRACE)
+	saved += 16;
+    }
+
+  /* Saved registers include the stack frame.  */
+  offsets->saved_regs = offsets->saved_args + saved +
+                        arm_compute_static_chain_stack_bytes();
+  offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
+  /* A leaf function does not need any stack alignment if it has nothing
+     on the stack.  */
+  if (leaf && frame_size == 0)
+    {
+      offsets->outgoing_args = offsets->soft_frame;
+      offsets->locals_base = offsets->soft_frame;
+      return offsets;
+    }
+
+  /* Ensure SFP has the correct alignment.  */
+  if (ARM_DOUBLEWORD_ALIGN
+      && (offsets->soft_frame & 7))
+    {
+      offsets->soft_frame += 4;
+      /* Try to align stack by pushing an extra reg.  Don't bother doing this
+         when there is a stack frame as the alignment will be rolled into
+	 the normal stack adjustment.  */
+      if (frame_size + crtl->outgoing_args_size == 0)
+	{
+	  int reg = -1;
+
+	  /* If it is safe to use r3, then do so.  This sometimes 
+	     generates better code on Thumb-2 by avoiding the need to
+	     use 32-bit push/pop instructions.  */
+ 	  if (! any_sibcall_uses_r3 ()
+	      && arm_size_return_regs () <= 12
+	      && (offsets->saved_regs_mask & (1 << 3)) == 0)
+	    {
+	      reg = 3;
+	    }
+	  else
+	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+	      {
+		if ((offsets->saved_regs_mask & (1 << i)) == 0)
+		  {
+		    reg = i;
+		    break;
+		  }
+	      }
+
+	  if (reg != -1)
+	    {
+	      offsets->saved_regs += 4;
+	      offsets->saved_regs_mask |= (1 << reg);
+	    }
+	}
+    }
+
+  offsets->locals_base = offsets->soft_frame + frame_size;
+  offsets->outgoing_args = (offsets->locals_base
+			    + crtl->outgoing_args_size);
+
+  if (ARM_DOUBLEWORD_ALIGN)
+    {
+      /* Ensure SP remains doubleword aligned.  */
+      if (offsets->outgoing_args & 7)
+	offsets->outgoing_args += 4;
+      gcc_assert (!(offsets->outgoing_args & 7));
+    }
+
+  return offsets;
+}
+
+
+/* Calculate the relative offsets for the different stack pointers.  Positive
+   offsets are in the direction of stack growth.  */
+
+HOST_WIDE_INT
+arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
+{
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+
+  /* OK, now we have enough information to compute the distances.
+     There must be an entry in these switch tables for each pair
+     of registers in ELIMINABLE_REGS, even if some of the entries
+     seem to be redundant or useless.  */
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      switch (to)
+	{
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return 0;
+
+	case FRAME_POINTER_REGNUM:
+	  /* This is the reverse of the soft frame pointer
+	     to hard frame pointer elimination below.  */
+	  return offsets->soft_frame - offsets->saved_args;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  /* This is only non-zero in the case where the static chain register
+	     is stored above the frame.  */
+	  return offsets->frame - offsets->saved_args - 4;
+
+	case STACK_POINTER_REGNUM:
+	  /* If nothing has been pushed on the stack at all
+	     then this will return -4.  This *is* correct!  */
+	  return offsets->outgoing_args - (offsets->saved_args + 4);
+
+	default:
+	  gcc_unreachable ();
+	}
+      gcc_unreachable ();
+
+    case FRAME_POINTER_REGNUM:
+      switch (to)
+	{
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return 0;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  /* The hard frame pointer points to the top entry in the
+	     stack frame.  The soft frame pointer to the bottom entry
+	     in the stack frame.  If there is no stack frame at all,
+	     then they are identical.  */
+
+	  return offsets->frame - offsets->soft_frame;
+
+	case STACK_POINTER_REGNUM:
+	  return offsets->outgoing_args - offsets->soft_frame;
+
+	default:
+	  gcc_unreachable ();
+	}
+      gcc_unreachable ();
+
+    default:
+      /* You cannot eliminate from the stack pointer.
+	 In theory you could eliminate from the hard frame
+	 pointer to the stack pointer, but this will never
+	 happen, since if a stack frame is not needed the
+	 hard frame pointer will never be used.  */
+      gcc_unreachable ();
+    }
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  Frame pointer elimination is automatically handled.
+
+   All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
+   HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
+   pointer, we must eliminate FRAME_POINTER_REGNUM into
+   HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
+   ARG_POINTER_REGNUM.  */
+
+bool
+arm_can_eliminate (const int from, const int to)
+{
+  return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
+          (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
+          (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
+          (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
+           true);
+}
+
+/* Emit RTL to save coprocessor registers on function entry.  Returns the
+   number of bytes pushed.  */
+
+static int
+arm_save_coproc_regs(void)
+{
+  int saved_size = 0;
+  unsigned reg;
+  unsigned start_reg;
+  rtx insn;
+
+  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
+    if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
+      {
+	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	insn = gen_rtx_MEM (V2SImode, insn);
+	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
+	RTX_FRAME_RELATED_P (insn) = 1;
+	saved_size += 8;
+      }
+
+  /* Save any floating point call-saved registers used by this
+     function.  */
+  if (TARGET_FPA_EMU2)
+    {
+      for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
+	if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	  {
+	    insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+	    insn = gen_rtx_MEM (XFmode, insn);
+	    insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    saved_size += 12;
+	  }
+    }
+  else
+    {
+      start_reg = LAST_FPA_REGNUM;
+
+      for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
+	{
+	  if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	    {
+	      if (start_reg - reg == 3)
+		{
+		  insn = emit_sfm (reg, 4);
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  saved_size += 48;
+		  start_reg = reg - 1;
+		}
+	    }
+	  else
+	    {
+	      if (start_reg != reg)
+		{
+		  insn = emit_sfm (reg + 1, start_reg - reg);
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  saved_size += (start_reg - reg) * 12;
+		}
+	      start_reg = reg - 1;
+	    }
+	}
+
+      if (start_reg != reg)
+	{
+	  insn = emit_sfm (reg + 1, start_reg - reg);
+	  saved_size += (start_reg - reg) * 12;
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      start_reg = FIRST_VFP_REGNUM;
+
+      for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
+	{
+	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
+	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
+	    {
+	      if (start_reg != reg)
+		saved_size += vfp_emit_fstmd (start_reg,
+					      (reg - start_reg) / 2);
+	      start_reg = reg + 2;
+	    }
+	}
+      if (start_reg != reg)
+	saved_size += vfp_emit_fstmd (start_reg,
+				      (reg - start_reg) / 2);
+    }
+  return saved_size;
+}
+
+
+/* Set the Thumb frame pointer from the stack pointer.  */
+
+static void
+thumb_set_frame_pointer (arm_stack_offsets *offsets)
+{
+  HOST_WIDE_INT amount;
+  rtx insn, dwarf;
+
+  amount = offsets->outgoing_args - offsets->locals_base;
+  if (amount < 1024)
+    insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+				  stack_pointer_rtx, GEN_INT (amount)));
+  else
+    {
+      emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
+      /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
+         expects the first two operands to be the same.  */
+      if (TARGET_THUMB2)
+	{
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+					stack_pointer_rtx,
+					hard_frame_pointer_rtx));
+	}
+      else
+	{
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+					hard_frame_pointer_rtx,
+					stack_pointer_rtx));
+	}
+      dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+			   plus_constant (stack_pointer_rtx, amount));
+      RTX_FRAME_RELATED_P (dwarf) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+    }
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Generate the prologue instructions for entry into an ARM or Thumb-2
+   function.  */
+void
+arm_expand_prologue (void)
+{
+  rtx amount;
+  rtx insn;
+  rtx ip_rtx;
+  unsigned long live_regs_mask;
+  unsigned long func_type;
+  int fp_offset = 0;
+  int saved_pretend_args = 0;
+  int saved_regs = 0;
+  unsigned HOST_WIDE_INT args_to_push;
+  arm_stack_offsets *offsets;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have prologues.  */
+  if (IS_NAKED (func_type))
+    return;
+
+  /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
+  args_to_push = crtl->args.pretend_args_size;
+
+  /* Compute which register we will have to save onto the stack.  */
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+
+  ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
+
+  if (IS_STACKALIGN (func_type))
+    {
+      rtx dwarf;
+      rtx r0;
+      rtx r1;
+      /* Handle a word-aligned stack pointer.  We generate the following:
+
+	  mov r0, sp
+	  bic r1, r0, #7
+	  mov sp, r1
+	  <save and restore r0 in normal prologue/epilogue>
+	  mov sp, r0
+	  bx lr
+
+	 The unwinder doesn't need to know about the stack realignment.
+	 Just tell it we saved SP in r0.  */
+      gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
+
+      r0 = gen_rtx_REG (SImode, 0);
+      r1 = gen_rtx_REG (SImode, 1);
+      /* Use a real rtvec rather than NULL_RTVEC so the rest of the
+	 compiler won't choke.  */
+      dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
+      dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
+      insn = gen_movsi (r0, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+      emit_insn (insn);
+      emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
+      emit_insn (gen_movsi (stack_pointer_rtx, r1));
+    }
+
+  /* For APCS frames, if IP register is clobbered
+     when creating frame, save that register in a special
+     way.  */
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    {
+      if (IS_INTERRUPT (func_type))
+	{
+	  /* Interrupt functions must not corrupt any registers.
+	     Creating a frame pointer however, corrupts the IP
+	     register, so we must push it first.  */
+	  insn = emit_multi_reg_push (1 << IP_REGNUM);
+
+	  /* Do not set RTX_FRAME_RELATED_P on this insn.
+	     The dwarf stack unwinding code only wants to see one
+	     stack decrement per function, and this is not it.  If
+	     this instruction is labeled as being part of the frame
+	     creation sequence then dwarf2out_frame_debug_expr will
+	     die when it encounters the assignment of IP to FP
+	     later on, since the use of SP here establishes SP as
+	     the CFA register and not IP.
+
+	     Anyway this instruction is not really part of the stack
+	     frame creation although it is part of the prologue.  */
+	}
+      else if (IS_NESTED (func_type))
+	{
+	  /* The Static chain register is the same as the IP register
+	     used as a scratch register during stack frame creation.
+	     To get around this need to find somewhere to store IP
+	     whilst the frame is being created.  We try the following
+	     places in order:
+
+	       1. The last argument register.
+	       2. A slot on the stack above the frame.  (This only
+	          works if the function is not a varargs function).
+	       3. Register r3, after pushing the argument registers
+	          onto the stack.
+
+	     Note - we only need to tell the dwarf2 backend about the SP
+	     adjustment in the second variant; the static chain register
+	     doesn't need to be unwound, as it doesn't contain a value
+	     inherited from the caller.  */
+
+	  if (df_regs_ever_live_p (3) == false)
+	    insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
+	  else if (args_to_push == 0)
+	    {
+	      rtx dwarf;
+
+	      gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
+	      saved_regs += 4;
+
+	      insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
+	      insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
+	      fp_offset = 4;
+
+	      /* Just tell the dwarf backend that we adjusted SP.  */
+	      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				   plus_constant (stack_pointer_rtx,
+						  -fp_offset));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	    }
+	  else
+	    {
+	      /* Store the args on the stack.  */
+	      if (cfun->machine->uses_anonymous_args)
+		insn = emit_multi_reg_push
+		  ((0xf0 >> (args_to_push / 4)) & 0xf);
+	      else
+		insn = emit_insn
+		  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (- args_to_push)));
+
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      saved_pretend_args = 1;
+	      fp_offset = args_to_push;
+	      args_to_push = 0;
+
+	      /* Now reuse r3 to preserve IP.  */
+	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
+	    }
+	}
+
+      insn = emit_set_insn (ip_rtx,
+			    plus_constant (stack_pointer_rtx, fp_offset));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (args_to_push)
+    {
+      /* Push the argument registers, or reserve space for them.  */
+      if (cfun->machine->uses_anonymous_args)
+	insn = emit_multi_reg_push
+	  ((0xf0 >> (args_to_push / 4)) & 0xf);
+      else
+	insn = emit_insn
+	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+		       GEN_INT (- args_to_push)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* If this is an interrupt service routine, and the link register
+     is going to be pushed, and we're not generating extra
+     push of IP (needed when frame is needed and frame layout if apcs),
+     subtracting four from LR now will mean that the function return
+     can be done with a single instruction.  */
+  if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
+      && (live_regs_mask & (1 << LR_REGNUM)) != 0
+      && !(frame_pointer_needed && TARGET_APCS_FRAME)
+      && TARGET_ARM)
+    {
+      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
+      
+      emit_set_insn (lr, plus_constant (lr, -4));
+    }
+
+  if (live_regs_mask)
+    {
+      saved_regs += bit_count (live_regs_mask) * 4;
+      if (optimize_size && !frame_pointer_needed
+	  && saved_regs == offsets->saved_regs - offsets->saved_args)
+	{
+	  /* If no coprocessor registers are being pushed and we don't have
+	     to worry about a frame pointer then push extra registers to
+	     create the stack frame.  This is done is a way that does not
+	     alter the frame layout, so is independent of the epilogue.  */
+	  int n;
+	  int frame;
+	  n = 0;
+	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
+	    n++;
+	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
+	  if (frame && n * 4 >= frame)
+	    {
+	      n = frame / 4;
+	      live_regs_mask |= (1 << n) - 1;
+	      saved_regs += frame;
+	    }
+	}
+      insn = emit_multi_reg_push (live_regs_mask);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (! IS_VOLATILE (func_type))
+    saved_regs += arm_save_coproc_regs ();
+
+  if (frame_pointer_needed && TARGET_ARM)
+    {
+      /* Create the new frame pointer.  */
+      if (TARGET_APCS_FRAME)
+	{
+	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (IS_NESTED (func_type))
+	    {
+	      /* Recover the static chain register.  */
+	      if (!df_regs_ever_live_p (3)
+		  || saved_pretend_args)
+		insn = gen_rtx_REG (SImode, 3);
+	      else /* if (crtl->args.pretend_args_size == 0) */
+		{
+		  insn = plus_constant (hard_frame_pointer_rtx, 4);
+		  insn = gen_frame_mem (SImode, insn);
+		}
+	      emit_set_insn (ip_rtx, insn);
+	      /* Add a USE to stop propagate_one_insn() from barfing.  */
+	      emit_insn (gen_prologue_use (ip_rtx));
+	    }
+	}
+      else
+	{
+	  insn = GEN_INT (saved_regs - 4);
+	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+					stack_pointer_rtx, insn));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (flag_stack_usage)
+    current_function_static_stack_size
+      = offsets->outgoing_args - offsets->saved_args;
+
+  if (offsets->outgoing_args != offsets->saved_args + saved_regs)
+    {
+      /* This add can produce multiple insns for a large constant, so we
+	 need to get tricky.  */
+      rtx last = get_last_insn ();
+
+      amount = GEN_INT (offsets->saved_args + saved_regs
+			- offsets->outgoing_args);
+
+      insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    amount));
+      do
+	{
+	  last = last ? NEXT_INSN (last) : get_insns ();
+	  RTX_FRAME_RELATED_P (last) = 1;
+	}
+      while (last != insn);
+
+      /* If the frame pointer is needed, emit a special barrier that
+	 will prevent the scheduler from moving stores to the frame
+	 before the stack adjustment.  */
+      if (frame_pointer_needed)
+	insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
+					 hard_frame_pointer_rtx));
+    }
+
+
+  if (frame_pointer_needed && TARGET_THUMB2)
+    thumb_set_frame_pointer (offsets);
+
+  if (flag_pic && arm_pic_register != INVALID_REGNUM)
+    {
+      unsigned long mask;
+
+      mask = live_regs_mask;
+      mask &= THUMB2_WORK_REGS;
+      if (!IS_NESTED (func_type))
+	mask |= (1 << IP_REGNUM);
+      arm_load_pic_register (mask);
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  Similarly if the user has requested no
+     scheduling in the prolog.  Similarly if we want non-call exceptions
+     using the EABI unwinder, to prevent faulting instructions from being
+     swapped with a stack adjustment.  */
+  if (crtl->profile || !TARGET_SCHED_PROLOG
+      || (arm_except_unwind_info (&global_options) == UI_TARGET
+	  && cfun->can_throw_non_call_exceptions))
+    emit_insn (gen_blockage ());
+
+  /* If the link register is being kept alive, with the return address in it,
+     then make sure that it does not get reused by the ce2 pass.  */
+  if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
+    cfun->machine->lr_save_eliminated = 1;
+}
+
+/* Print condition code to STREAM.  Helper function for arm_print_operand.  */
+static void
+arm_print_condition (FILE *stream)
+{
+  if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
+    {
+      /* Branch conversion is not implemented for Thumb-2.  */
+      if (TARGET_THUMB)
+	{
+	  output_operand_lossage ("predicated Thumb instruction");
+	  return;
+	}
+      if (current_insn_predicate != NULL)
+	{
+	  output_operand_lossage
+	    ("predicated instruction in conditional sequence");
+	  return;
+	}
+
+      fputs (arm_condition_codes[arm_current_cc], stream);
+    }
+  else if (current_insn_predicate)
+    {
+      enum arm_cond_code code;
+
+      if (TARGET_THUMB1)
+	{
+	  output_operand_lossage ("predicated Thumb instruction");
+	  return;
+	}
+
+      code = get_arm_condition_code (current_insn_predicate);
+      fputs (arm_condition_codes[code], stream);
+    }
+}
+
+
+/* If CODE is 'd', then the X is a condition operand and the instruction
+   should only be executed if the condition is true.
+   if CODE is 'D', then the X is a condition operand and the instruction
+   should only be executed if the condition is false: however, if the mode
+   of the comparison is CCFPEmode, then always execute the instruction -- we
+   do this because in these circumstances !GE does not necessarily imply LT;
+   in these cases the instruction pattern will take care to make sure that
+   an instruction containing %d will follow, thereby undoing the effects of
+   doing this instruction unconditionally.
+   If CODE is 'N' then X is a floating point operand that must be negated
+   before output.
+   If CODE is 'B' then output a bitwise inverted value of X (a const int).
+   If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
+static void
+arm_print_operand (FILE *stream, rtx x, int code)
+{
+  switch (code)
+    {
+    case '@':
+      fputs (ASM_COMMENT_START, stream);
+      return;
+
+    case '_':
+      fputs (user_label_prefix, stream);
+      return;
+
+    case '|':
+      fputs (REGISTER_PREFIX, stream);
+      return;
+
+    case '?':
+      arm_print_condition (stream);
+      return;
+
+    case '(':
+      /* Nothing in unified syntax, otherwise the current condition code.  */
+      if (!TARGET_UNIFIED_ASM)
+	arm_print_condition (stream);
+      break;
+
+    case ')':
+      /* The current condition code in unified syntax, otherwise nothing.  */
+      if (TARGET_UNIFIED_ASM)
+	arm_print_condition (stream);
+      break;
+  
+    case '.':
+      /* The current condition code for a condition code setting instruction.
+	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
+      if (TARGET_UNIFIED_ASM)
+	{
+	  fputc('s', stream);
+	  arm_print_condition (stream);
+	}
+      else
+	{
+	  arm_print_condition (stream);
+	  fputc('s', stream);
+	}
+      return;
+
+    case '!':
+      /* If the instruction is conditionally executed then print
+	 the current condition code, otherwise print 's'.  */
+      gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
+      if (current_insn_predicate)
+	arm_print_condition (stream);
+      else
+	fputc('s', stream);
+      break;
+
+    /* %# is a "break" sequence. It doesn't output anything, but is used to
+       separate e.g. operand numbers from following text, if that text consists
+       of further digits which we don't want to be part of the operand
+       number.  */
+    case '#':
+      return;
+
+    case 'N':
+      {
+	REAL_VALUE_TYPE r;
+	REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	r = real_value_negate (&r);
+	fprintf (stream, "%s", fp_const_from_val (&r));
+      }
+      return;
+
+    /* An integer or symbol address without a preceding # sign.  */
+    case 'c':
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+	  break;
+
+	case SYMBOL_REF:
+	  output_addr_const (stream, x);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+
+    case 'B':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  HOST_WIDE_INT val;
+	  val = ARM_SIGN_EXTEND (~INTVAL (x));
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
+	}
+      else
+	{
+	  putc ('~', stream);
+	  output_addr_const (stream, x);
+	}
+      return;
+
+    case 'L':
+      /* The low 16 bits of an immediate constant.  */
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
+      return;
+
+    case 'i':
+      fprintf (stream, "%s", arithmetic_instr (x, 1));
+      return;
+
+    /* Truncate Cirrus shift counts.  */
+    case 's':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
+	  return;
+	}
+      arm_print_operand (stream, x, 0);
+      return;
+
+    case 'I':
+      fprintf (stream, "%s", arithmetic_instr (x, 0));
+      return;
+
+    case 'S':
+      {
+	HOST_WIDE_INT val;
+	const char *shift;
+
+	if (!shift_operator (x, SImode))
+	  {
+	    output_operand_lossage ("invalid shift operand");
+	    break;
+	  }
+
+	shift = shift_op (x, &val);
+
+	if (shift)
+	  {
+	    fprintf (stream, ", %s ", shift);
+	    if (val == -1)
+	      arm_print_operand (stream, XEXP (x, 1), 0);
+	    else
+	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
+	  }
+      }
+      return;
+
+      /* An explanation of the 'Q', 'R' and 'H' register operands:
+
+	 In a pair of registers containing a DI or DF value the 'Q'
+	 operand returns the register number of the register containing
+	 the least significant part of the value.  The 'R' operand returns
+	 the register number of the register containing the most
+	 significant part of the value.
+
+	 The 'H' operand returns the higher of the two register numbers.
+	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
+	 same as the 'Q' operand, since the most significant part of the
+	 value is held in the lower number register.  The reverse is true
+	 on systems where WORDS_BIG_ENDIAN is false.
+
+	 The purpose of these operands is to distinguish between cases
+	 where the endian-ness of the values is important (for example
+	 when they are added together), and cases where the endian-ness
+	 is irrelevant, but the order of register operations is important.
+	 For example when loading a value from memory into a register
+	 pair, the endian-ness does not matter.  Provided that the value
+	 from the lower memory address is put into the lower numbered
+	 register, and the value from the higher address is put into the
+	 higher numbered register, the load will work regardless of whether
+	 the value being loaded is big-wordian or little-wordian.  The
+	 order of the two register loads can matter however, if the address
+	 of the memory location is actually held in one of the registers
+	 being overwritten by the load.
+
+	 The 'Q' and 'R' constraints are also available for 64-bit
+	 constants.  */
+    case 'Q':
+      if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+	{
+	  rtx part = gen_lowpart (SImode, x);
+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
+	  return;
+	}
+
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
+      return;
+
+    case 'R':
+      if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+	{
+	  enum machine_mode mode = GET_MODE (x);
+	  rtx part;
+
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  part = gen_highpart_mode (SImode, mode, x);
+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
+	  return;
+	}
+
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
+      return;
+
+    case 'H':
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + 1);
+      return;
+
+    case 'J':
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
+      return;
+
+    case 'K':
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
+      return;
+
+    case 'm':
+      asm_fprintf (stream, "%r",
+		   GET_CODE (XEXP (x, 0)) == REG
+		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
+      return;
+
+    case 'M':
+      asm_fprintf (stream, "{%r-%r}",
+		   REGNO (x),
+		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
+      return;
+
+    /* Like 'M', but writing doubleword vector registers, for use by Neon
+       insns.  */
+    case 'h':
+      {
+        int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
+        int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
+        if (numregs == 1)
+          asm_fprintf (stream, "{d%d}", regno);
+        else
+          asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
+      }
+      return;
+
+    case 'd':
+      /* CONST_TRUE_RTX means always -- that's the default.  */
+      if (x == const_true_rtx)
+	return;
+
+      if (!COMPARISON_P (x))
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      fputs (arm_condition_codes[get_arm_condition_code (x)],
+	     stream);
+      return;
+
+    case 'D':
+      /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
+	 want to do that.  */
+      if (x == const_true_rtx)
+	{
+	  output_operand_lossage ("instruction never executed");
+	  return;
+	}
+      if (!COMPARISON_P (x))
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
+				 (get_arm_condition_code (x))],
+	     stream);
+      return;
+
+    /* Cirrus registers can be accessed in a variety of ways:
+         single floating point (f)
+	 double floating point (d)
+	 32bit integer         (fx)
+	 64bit integer         (dx).  */
+    case 'W':			/* Cirrus register in F mode.  */
+    case 'X':			/* Cirrus register in D mode.  */
+    case 'Y':			/* Cirrus register in FX mode.  */
+    case 'Z':			/* Cirrus register in DX mode.  */
+      gcc_assert (GET_CODE (x) == REG
+		  && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
+
+      fprintf (stream, "mv%s%s",
+	       code == 'W' ? "f"
+	       : code == 'X' ? "d"
+	       : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
+
+      return;
+
+    /* Print cirrus register in the mode specified by the register's mode.  */
+    case 'V':
+      {
+	int mode = GET_MODE (x);
+
+	if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	fprintf (stream, "mv%s%s",
+		 mode == DFmode ? "d"
+		 : mode == SImode ? "fx"
+		 : mode == DImode ? "dx"
+		 : "f", reg_names[REGNO (x)] + 2);
+
+	return;
+      }
+
+    case 'U':
+      if (GET_CODE (x) != REG
+	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
+	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
+	/* Bad value for wCG register number.  */
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      else
+	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
+      return;
+
+      /* Print an iWMMXt control register name.  */
+    case 'w':
+      if (GET_CODE (x) != CONST_INT
+	  || INTVAL (x) < 0
+	  || INTVAL (x) >= 16)
+	/* Bad value for wC register number.  */
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      else
+	{
+	  static const char * wc_reg_names [16] =
+	    {
+	      "wCID",  "wCon",  "wCSSF", "wCASF",
+	      "wC4",   "wC5",   "wC6",   "wC7",
+	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
+	      "wC12",  "wC13",  "wC14",  "wC15"
+	    };
+
+	  fprintf (stream, wc_reg_names [INTVAL (x)]);
+	}
+      return;
+
+    /* Print the high single-precision register of a VFP double-precision
+       register.  */
+    case 'p':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
+      }
+      return;
+
+    /* Print a VFP/Neon double precision or quad precision register name.  */
+    case 'P':
+    case 'q':
+      {
+	int mode = GET_MODE (x);
+	int is_quad = (code == 'q');
+	int regno;
+
+	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	if (GET_CODE (x) != REG
+	    || !IS_VFP_REGNUM (REGNO (x)))
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	regno = REGNO (x);
+	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
+            || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
+	  {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+	  }
+
+	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
+	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
+      }
+      return;
+
+    /* These two codes print the low/high doubleword register of a Neon quad
+       register, respectively.  For pair-structure types, can also print
+       low/high quadword registers.  */
+    case 'e':
+    case 'f':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if ((GET_MODE_SIZE (mode) != 16
+	     && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!NEON_REGNO_OK_FOR_QUAD (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        if (GET_MODE_SIZE (mode) == 16)
+          fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
+				  + (code == 'f' ? 1 : 0));
+        else
+          fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
+				  + (code == 'f' ? 1 : 0));
+      }
+      return;
+
+    /* Print a VFPv3 floating-point constant, represented as an integer
+       index.  */
+    case 'G':
+      {
+        int index = vfp3_const_double_index (x);
+	gcc_assert (index != -1);
+	fprintf (stream, "%d", index);
+      }
+      return;
+
+    /* Print bits representing opcode features for Neon.
+
+       Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
+       and polynomials as unsigned.
+
+       Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
+
+       Bit 2 is 1 for rounding functions, 0 otherwise.  */
+
+    /* Identify the type as 's', 'u', 'p' or 'f'.  */
+    case 'T':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputc ("uspf"[bits & 3], stream);
+      }
+      return;
+
+    /* Likewise, but signed and unsigned integers are both 'i'.  */
+    case 'F':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputc ("iipf"[bits & 3], stream);
+      }
+      return;
+
+    /* As for 'T', but emit 'u' instead of 'p'.  */
+    case 't':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputc ("usuf"[bits & 3], stream);
+      }
+      return;
+
+    /* Bit 2: rounding (vs none).  */
+    case 'O':
+      {
+        HOST_WIDE_INT bits = INTVAL (x);
+        fputs ((bits & 4) != 0 ? "r" : "", stream);
+      }
+      return;
+
+    /* Memory operand for vld1/vst1 instruction.  */
+    case 'A':
+      {
+	rtx addr;
+	bool postinc = FALSE;
+	unsigned align, modesize, align_bits;
+
+	gcc_assert (GET_CODE (x) == MEM);
+	addr = XEXP (x, 0);
+	if (GET_CODE (addr) == POST_INC)
+	  {
+	    postinc = 1;
+	    addr = XEXP (addr, 0);
+	  }
+	asm_fprintf (stream, "[%r", REGNO (addr));
+
+	/* We know the alignment of this access, so we can emit a hint in the
+	   instruction (for some alignments) as an aid to the memory subsystem
+	   of the target.  */
+	align = MEM_ALIGN (x) >> 3;
+	modesize = GET_MODE_SIZE (GET_MODE (x));
+	
+	/* Only certain alignment specifiers are supported by the hardware.  */
+	if (modesize == 16 && (align % 32) == 0)
+	  align_bits = 256;
+	else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
+	  align_bits = 128;
+	else if ((align % 8) == 0)
+	  align_bits = 64;
+	else
+	  align_bits = 0;
+	
+	if (align_bits != 0)
+	  asm_fprintf (stream, ":%d", align_bits);
+
+	asm_fprintf (stream, "]");
+
+	if (postinc)
+	  fputs("!", stream);
+      }
+      return;
+
+    case 'C':
+      {
+	rtx addr;
+
+	gcc_assert (GET_CODE (x) == MEM);
+	addr = XEXP (x, 0);
+	gcc_assert (GET_CODE (addr) == REG);
+	asm_fprintf (stream, "[%r]", REGNO (addr));
+      }
+      return;
+
+    /* Translate an S register number into a D register number and element index.  */
+    case 'y':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	regno = regno - FIRST_VFP_REGNUM;
+	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
+      }
+      return;
+
+    /* Register specifier for vld1.16/vst1.16.  Translate the S register
+       number into a D register number and element index.  */
+    case 'z':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+
+        if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+        regno = REGNO (x);
+        if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+
+	regno = regno - FIRST_VFP_REGNUM;
+	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
+      }
+      return;
+      
+    default:
+      if (x == 0)
+	{
+	  output_operand_lossage ("missing operand");
+	  return;
+	}
+
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  asm_fprintf (stream, "%r", REGNO (x));
+	  break;
+
+	case MEM:
+	  output_memory_reference_mode = GET_MODE (x);
+	  output_address (XEXP (x, 0));
+	  break;
+
+	case CONST_DOUBLE:
+          if (TARGET_NEON)
+            {
+              char fpstr[20];
+              real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
+			       sizeof (fpstr), 0, 1);
+              fprintf (stream, "#%s", fpstr);
+            }
+          else
+	    fprintf (stream, "#%s", fp_immediate_constant (x));
+	  break;
+
+	default:
+	  gcc_assert (GET_CODE (x) != NEG);
+	  fputc ('#', stream);
+	  if (GET_CODE (x) == HIGH)
+	    {
+	      fputs (":lower16:", stream);
+	      x = XEXP (x, 0);
+	    }
+	    
+	  output_addr_const (stream, x);
+	  break;
+	}
+    }
+}
+
+/* Target hook for printing a memory address.  */
+static void
+arm_print_operand_address (FILE *stream, rtx x)
+{
+  if (TARGET_32BIT)
+    {
+      int is_minus = GET_CODE (x) == MINUS;
+
+      if (GET_CODE (x) == REG)
+	asm_fprintf (stream, "[%r, #0]", REGNO (x));
+      else if (GET_CODE (x) == PLUS || is_minus)
+	{
+	  rtx base = XEXP (x, 0);
+	  rtx index = XEXP (x, 1);
+	  HOST_WIDE_INT offset = 0;
+	  if (GET_CODE (base) != REG
+	      || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
+	    {
+	      /* Ensure that BASE is a register.  */
+	      /* (one of them must be).  */
+	      /* Also ensure the SP is not used as in index register.  */
+	      rtx temp = base;
+	      base = index;
+	      index = temp;
+	    }
+	  switch (GET_CODE (index))
+	    {
+	    case CONST_INT:
+	      offset = INTVAL (index);
+	      if (is_minus)
+		offset = -offset;
+	      asm_fprintf (stream, "[%r, #%wd]",
+			   REGNO (base), offset);
+	      break;
+
+	    case REG:
+	      asm_fprintf (stream, "[%r, %s%r]",
+			   REGNO (base), is_minus ? "-" : "",
+			   REGNO (index));
+	      break;
+
+	    case MULT:
+	    case ASHIFTRT:
+	    case LSHIFTRT:
+	    case ASHIFT:
+	    case ROTATERT:
+	      {
+		asm_fprintf (stream, "[%r, %s%r",
+			     REGNO (base), is_minus ? "-" : "",
+			     REGNO (XEXP (index, 0)));
+		arm_print_operand (stream, index, 'S');
+		fputs ("]", stream);
+		break;
+	      }
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
+	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
+	{
+	  extern enum machine_mode output_memory_reference_mode;
+
+	  gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+
+	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
+	    asm_fprintf (stream, "[%r, #%s%d]!",
+			 REGNO (XEXP (x, 0)),
+			 GET_CODE (x) == PRE_DEC ? "-" : "",
+			 GET_MODE_SIZE (output_memory_reference_mode));
+	  else
+	    asm_fprintf (stream, "[%r], #%s%d",
+			 REGNO (XEXP (x, 0)),
+			 GET_CODE (x) == POST_DEC ? "-" : "",
+			 GET_MODE_SIZE (output_memory_reference_mode));
+	}
+      else if (GET_CODE (x) == PRE_MODIFY)
+	{
+	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
+	  if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
+	    asm_fprintf (stream, "#%wd]!",
+			 INTVAL (XEXP (XEXP (x, 1), 1)));
+	  else
+	    asm_fprintf (stream, "%r]!",
+			 REGNO (XEXP (XEXP (x, 1), 1)));
+	}
+      else if (GET_CODE (x) == POST_MODIFY)
+	{
+	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
+	  if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
+	    asm_fprintf (stream, "#%wd",
+			 INTVAL (XEXP (XEXP (x, 1), 1)));
+	  else
+	    asm_fprintf (stream, "%r",
+			 REGNO (XEXP (XEXP (x, 1), 1)));
+	}
+      else output_addr_const (stream, x);
+    }
+  else
+    {
+      if (GET_CODE (x) == REG)
+	asm_fprintf (stream, "[%r]", REGNO (x));
+      else if (GET_CODE (x) == POST_INC)
+	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
+      else if (GET_CODE (x) == PLUS)
+	{
+	  gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    asm_fprintf (stream, "[%r, #%wd]",
+			 REGNO (XEXP (x, 0)),
+			 INTVAL (XEXP (x, 1)));
+	  else
+	    asm_fprintf (stream, "[%r, %r]",
+			 REGNO (XEXP (x, 0)),
+			 REGNO (XEXP (x, 1)));
+	}
+      else
+	output_addr_const (stream, x);
+    }
+}
+
+/* Target hook for indicating whether a punctuation character for
+   TARGET_PRINT_OPERAND is valid.  */
+static bool
+arm_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '@' || code == '|' || code == '.'
+	  || code == '(' || code == ')' || code == '#'
+	  || (TARGET_32BIT && (code == '?'))
+	  || (TARGET_THUMB2 && (code == '!'))
+	  || (TARGET_THUMB && (code == '_')));
+}
+
+/* Target hook for assembling integer objects.  The ARM version needs to
+   handle word-sized values specially.  */
+static bool
+arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  enum machine_mode mode;
+
+  if (size == UNITS_PER_WORD && aligned_p)
+    {
+      fputs ("\t.word\t", asm_out_file);
+      output_addr_const (asm_out_file, x);
+
+      /* Mark symbols as position independent.  We only do this in the
+	 .text segment, not in the .data segment.  */
+      if (NEED_GOT_RELOC && flag_pic && making_const_table &&
+	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
+	{
+	  /* See legitimize_pic_address for an explanation of the
+	     TARGET_VXWORKS_RTP check.  */
+	  if (TARGET_VXWORKS_RTP
+	      || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
+	    fputs ("(GOT)", asm_out_file);
+	  else
+	    fputs ("(GOTOFF)", asm_out_file);
+	}
+      fputc ('\n', asm_out_file);
+      return true;
+    }
+
+  mode = GET_MODE (x);
+
+  if (arm_vector_mode_supported_p (mode))
+    {
+      int i, units;
+
+      gcc_assert (GET_CODE (x) == CONST_VECTOR);
+
+      units = CONST_VECTOR_NUNITS (x);
+      size = GET_MODE_SIZE (GET_MODE_INNER (mode));
+
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        for (i = 0; i < units; i++)
+	  {
+	    rtx elt = CONST_VECTOR_ELT (x, i);
+	    assemble_integer
+	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
+	  }
+      else
+        for (i = 0; i < units; i++)
+          {
+            rtx elt = CONST_VECTOR_ELT (x, i);
+            REAL_VALUE_TYPE rval;
+
+            REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
+
+            assemble_real
+              (rval, GET_MODE_INNER (mode),
+              i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
+          }
+
+      return true;
+    }
+
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+static void
+arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
+{
+  section *s;
+
+  if (!TARGET_AAPCS_BASED)
+    {
+      (is_ctor ? 
+       default_named_section_asm_out_constructor 
+       : default_named_section_asm_out_destructor) (symbol, priority);
+      return;
+    }
+
+  /* Put these in the .init_array section, using a special relocation.  */
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      char buf[18];
+      sprintf (buf, "%s.%.5u", 
+	       is_ctor ? ".init_array" : ".fini_array",
+	       priority);
+      s = get_section (buf, SECTION_WRITE, NULL_TREE);
+    }
+  else if (is_ctor)
+    s = ctors_section;
+  else
+    s = dtors_section;
+
+  switch_to_section (s);
+  assemble_align (POINTER_SIZE);
+  fputs ("\t.word\t", asm_out_file);
+  output_addr_const (asm_out_file, symbol);
+  fputs ("(target1)\n", asm_out_file);
+}
+
+/* Add a function to the list of static constructors.  */
+
+static void
+arm_elf_asm_constructor (rtx symbol, int priority)
+{
+  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
+}
+
+/* Add a function to the list of static destructors.  */
+
+static void
+arm_elf_asm_destructor (rtx symbol, int priority)
+{
+  arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
+}
+
+/* A finite state machine takes care of noticing whether or not instructions
+   can be conditionally executed, and thus decrease execution time and code
+   size by deleting branch instructions.  The fsm is controlled by
+   final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
+
+/* The state of the fsm controlling condition codes are:
+   0: normal, do nothing special
+   1: make ASM_OUTPUT_OPCODE not output this instruction
+   2: make ASM_OUTPUT_OPCODE not output this instruction
+   3: make instructions conditional
+   4: make instructions conditional
+
+   State transitions (state->state by whom under condition):
+   0 -> 1 final_prescan_insn if the `target' is a label
+   0 -> 2 final_prescan_insn if the `target' is an unconditional branch
+   1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
+   2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
+   3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
+          (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
+   4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
+          (the target insn is arm_target_insn).
+
+   If the jump clobbers the conditions then we use states 2 and 4.
+
+   A similar thing can be done with conditional return insns.
+
+   XXX In case the `target' is an unconditional branch, this conditionalising
+   of the instructions always reduces code size, but not always execution
+   time.  But then, I want to reduce the code size to somewhere near what
+   /bin/cc produces.  */
+
+/* In addition to this, state is maintained for Thumb-2 COND_EXEC
+   instructions.  When a COND_EXEC instruction is seen the subsequent
+   instructions are scanned so that multiple conditional instructions can be
+   combined into a single IT block.  arm_condexec_count and arm_condexec_mask
+   specify the length and true/false mask for the IT block.  These will be
+   decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
+
+/* Returns the index of the ARM condition code string in
+   `arm_condition_codes'.  COMPARISON should be an rtx like
+   `(eq (...) (...))'.  */
+static enum arm_cond_code
+get_arm_condition_code (rtx comparison)
+{
+  enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
+  enum arm_cond_code code;
+  enum rtx_code comp_code = GET_CODE (comparison);
+
+  if (GET_MODE_CLASS (mode) != MODE_CC)
+    mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
+			   XEXP (comparison, 1));
+
+  switch (mode)
+    {
+    case CC_DNEmode: code = ARM_NE; goto dominance;
+    case CC_DEQmode: code = ARM_EQ; goto dominance;
+    case CC_DGEmode: code = ARM_GE; goto dominance;
+    case CC_DGTmode: code = ARM_GT; goto dominance;
+    case CC_DLEmode: code = ARM_LE; goto dominance;
+    case CC_DLTmode: code = ARM_LT; goto dominance;
+    case CC_DGEUmode: code = ARM_CS; goto dominance;
+    case CC_DGTUmode: code = ARM_HI; goto dominance;
+    case CC_DLEUmode: code = ARM_LS; goto dominance;
+    case CC_DLTUmode: code = ARM_CC;
+
+    dominance:
+      gcc_assert (comp_code == EQ || comp_code == NE);
+
+      if (comp_code == EQ)
+	return ARM_INVERSE_CONDITION_CODE (code);
+      return code;
+
+    case CC_NOOVmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GE: return ARM_PL;
+	case LT: return ARM_MI;
+	default: gcc_unreachable ();
+	}
+
+    case CC_Zmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	default: gcc_unreachable ();
+	}
+
+    case CC_Nmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_MI;
+	case EQ: return ARM_PL;
+	default: gcc_unreachable ();
+	}
+
+    case CCFPEmode:
+    case CCFPmode:
+      /* These encodings assume that AC=1 in the FPA system control
+	 byte.  This allows us to handle all cases except UNEQ and
+	 LTGT.  */
+      switch (comp_code)
+	{
+	case GE: return ARM_GE;
+	case GT: return ARM_GT;
+	case LE: return ARM_LS;
+	case LT: return ARM_MI;
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case ORDERED: return ARM_VC;
+	case UNORDERED: return ARM_VS;
+	case UNLT: return ARM_LT;
+	case UNLE: return ARM_LE;
+	case UNGT: return ARM_HI;
+	case UNGE: return ARM_PL;
+	  /* UNEQ and LTGT do not have a representation.  */
+	case UNEQ: /* Fall through.  */
+	case LTGT: /* Fall through.  */
+	default: gcc_unreachable ();
+	}
+
+    case CC_SWPmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GE: return ARM_LE;
+	case GT: return ARM_LT;
+	case LE: return ARM_GE;
+	case LT: return ARM_GT;
+	case GEU: return ARM_LS;
+	case GTU: return ARM_CC;
+	case LEU: return ARM_CS;
+	case LTU: return ARM_HI;
+	default: gcc_unreachable ();
+	}
+
+    case CC_Cmode:
+      switch (comp_code)
+	{
+	case LTU: return ARM_CS;
+	case GEU: return ARM_CC;
+	default: gcc_unreachable ();
+	}
+
+    case CC_CZmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GEU: return ARM_CS;
+	case GTU: return ARM_HI;
+	case LEU: return ARM_LS;
+	case LTU: return ARM_CC;
+	default: gcc_unreachable ();
+	}
+
+    case CC_NCVmode:
+      switch (comp_code)
+	{
+	case GE: return ARM_GE;
+	case LT: return ARM_LT;
+	case GEU: return ARM_CS;
+	case LTU: return ARM_CC;
+	default: gcc_unreachable ();
+	}
+
+    case CCmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_NE;
+	case EQ: return ARM_EQ;
+	case GE: return ARM_GE;
+	case GT: return ARM_GT;
+	case LE: return ARM_LE;
+	case LT: return ARM_LT;
+	case GEU: return ARM_CS;
+	case GTU: return ARM_HI;
+	case LEU: return ARM_LS;
+	case LTU: return ARM_CC;
+	default: gcc_unreachable ();
+	}
+
+    default: gcc_unreachable ();
+    }
+}
+
+/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
+   instructions.  */
+void
+thumb2_final_prescan_insn (rtx insn)
+{
+  rtx first_insn = insn;
+  rtx body = PATTERN (insn);
+  rtx predicate;
+  enum arm_cond_code code;
+  int n;
+  int mask;
+
+  /* Remove the previous insn from the count of insns to be output.  */
+  if (arm_condexec_count)
+      arm_condexec_count--;
+
+  /* Nothing to do if we are already inside a conditional block.  */
+  if (arm_condexec_count)
+    return;
+
+  if (GET_CODE (body) != COND_EXEC)
+    return;
+
+  /* Conditional jumps are implemented directly.  */
+  if (GET_CODE (insn) == JUMP_INSN)
+    return;
+
+  predicate = COND_EXEC_TEST (body);
+  arm_current_cc = get_arm_condition_code (predicate);
+
+  n = get_attr_ce_count (insn);
+  arm_condexec_count = 1;
+  arm_condexec_mask = (1 << n) - 1;
+  arm_condexec_masklen = n;
+  /* See if subsequent instructions can be combined into the same block.  */
+  for (;;)
+    {
+      insn = next_nonnote_insn (insn);
+
+      /* Jumping into the middle of an IT block is illegal, so a label or
+         barrier terminates the block.  */
+      if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
+	break;
+
+      body = PATTERN (insn);
+      /* USE and CLOBBER aren't really insns, so just skip them.  */
+      if (GET_CODE (body) == USE
+	  || GET_CODE (body) == CLOBBER)
+	continue;
+
+      /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
+      if (GET_CODE (body) != COND_EXEC)
+	break;
+      /* Allow up to 4 conditionally executed instructions in a block.  */
+      n = get_attr_ce_count (insn);
+      if (arm_condexec_masklen + n > 4)
+	break;
+
+      predicate = COND_EXEC_TEST (body);
+      code = get_arm_condition_code (predicate);
+      mask = (1 << n) - 1;
+      if (arm_current_cc == code)
+	arm_condexec_mask |= (mask << arm_condexec_masklen);
+      else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
+	break;
+
+      arm_condexec_count++;
+      arm_condexec_masklen += n;
+
+      /* A jump must be the last instruction in a conditional block.  */
+      if (GET_CODE(insn) == JUMP_INSN)
+	break;
+    }
+  /* Restore recog_data (getting the attributes of other insns can
+     destroy this array, but final.c assumes that it remains intact
+     across this call).  */
+  extract_constrain_insn_cached (first_insn);
+}
+
+void
+arm_final_prescan_insn (rtx insn)
+{
+  /* BODY will hold the body of INSN.  */
+  rtx body = PATTERN (insn);
+
+  /* This will be 1 if trying to repeat the trick, and things need to be
+     reversed if it appears to fail.  */
+  int reverse = 0;
+
+  /* If we start with a return insn, we only succeed if we find another one.  */
+  int seeking_return = 0;
+
+  /* START_INSN will hold the insn from where we start looking.  This is the
+     first insn after the following code_label if REVERSE is true.  */
+  rtx start_insn = insn;
+
+  /* If in state 4, check if the target branch is reached, in order to
+     change back to state 0.  */
+  if (arm_ccfsm_state == 4)
+    {
+      if (insn == arm_target_insn)
+	{
+	  arm_target_insn = NULL;
+	  arm_ccfsm_state = 0;
+	}
+      return;
+    }
+
+  /* If in state 3, it is possible to repeat the trick, if this insn is an
+     unconditional branch to a label, and immediately following this branch
+     is the previous target label which is only used once, and the label this
+     branch jumps to is not too far off.  */
+  if (arm_ccfsm_state == 3)
+    {
+      if (simplejump_p (insn))
+	{
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == BARRIER)
+	    {
+	      /* XXX Isn't this always a barrier?  */
+	      start_insn = next_nonnote_insn (start_insn);
+	    }
+	  if (GET_CODE (start_insn) == CODE_LABEL
+	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    reverse = TRUE;
+	  else
+	    return;
+	}
+      else if (GET_CODE (body) == RETURN)
+        {
+	  start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == BARRIER)
+	    start_insn = next_nonnote_insn (start_insn);
+	  if (GET_CODE (start_insn) == CODE_LABEL
+	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
+	      && LABEL_NUSES (start_insn) == 1)
+	    {
+	      reverse = TRUE;
+	      seeking_return = 1;
+	    }
+	  else
+	    return;
+        }
+      else
+	return;
+    }
+
+  gcc_assert (!arm_ccfsm_state || reverse);
+  if (GET_CODE (insn) != JUMP_INSN)
+    return;
+
+  /* This jump might be paralleled with a clobber of the condition codes
+     the jump should always come first */
+  if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
+    body = XVECEXP (body, 0, 0);
+
+  if (reverse
+      || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
+	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
+    {
+      int insns_skipped;
+      int fail = FALSE, succeed = FALSE;
+      /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
+      int then_not_else = TRUE;
+      rtx this_insn = start_insn, label = 0;
+
+      /* Register the insn jumped to.  */
+      if (reverse)
+        {
+	  if (!seeking_return)
+	    label = XEXP (SET_SRC (body), 0);
+        }
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
+	label = XEXP (XEXP (SET_SRC (body), 1), 0);
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
+	{
+	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
+	  then_not_else = FALSE;
+	}
+      else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
+	seeking_return = 1;
+      else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
+        {
+	  seeking_return = 1;
+	  then_not_else = FALSE;
+        }
+      else
+	gcc_unreachable ();
+
+      /* See how many insns this branch skips, and what kind of insns.  If all
+	 insns are okay, and the label or unconditional branch to the same
+	 label is not too far away, succeed.  */
+      for (insns_skipped = 0;
+	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
+	{
+	  rtx scanbody;
+
+	  this_insn = next_nonnote_insn (this_insn);
+	  if (!this_insn)
+	    break;
+
+	  switch (GET_CODE (this_insn))
+	    {
+	    case CODE_LABEL:
+	      /* Succeed if it is the target label, otherwise fail since
+		 control falls in from somewhere else.  */
+	      if (this_insn == label)
+		{
+		  arm_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case BARRIER:
+	      /* Succeed if the following insn is the target label.
+		 Otherwise fail.
+		 If return insns are used then the last insn in a function
+		 will be a barrier.  */
+	      this_insn = next_nonnote_insn (this_insn);
+	      if (this_insn && this_insn == label)
+		{
+		  arm_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case CALL_INSN:
+	      /* The AAPCS says that conditional calls should not be
+		 used since they make interworking inefficient (the
+		 linker can't transform BL<cond> into BLX).  That's
+		 only a problem if the machine has BLX.  */
+	      if (arm_arch5)
+		{
+		  fail = TRUE;
+		  break;
+		}
+
+	      /* Succeed if the following insn is the target label, or
+		 if the following two insns are a barrier and the
+		 target label.  */
+	      this_insn = next_nonnote_insn (this_insn);
+	      if (this_insn && GET_CODE (this_insn) == BARRIER)
+		this_insn = next_nonnote_insn (this_insn);
+
+	      if (this_insn && this_insn == label
+		  && insns_skipped < max_insns_skipped)
+		{
+		  arm_ccfsm_state = 1;
+		  succeed = TRUE;
+		}
+	      else
+		fail = TRUE;
+	      break;
+
+	    case JUMP_INSN:
+      	      /* If this is an unconditional branch to the same label, succeed.
+		 If it is to another label, do nothing.  If it is conditional,
+		 fail.  */
+	      /* XXX Probably, the tests for SET and the PC are
+		 unnecessary.  */
+
+	      scanbody = PATTERN (this_insn);
+	      if (GET_CODE (scanbody) == SET
+		  && GET_CODE (SET_DEST (scanbody)) == PC)
+		{
+		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
+		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
+		    {
+		      arm_ccfsm_state = 2;
+		      succeed = TRUE;
+		    }
+		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
+		    fail = TRUE;
+		}
+	      /* Fail if a conditional return is undesirable (e.g. on a
+		 StrongARM), but still allow this if optimizing for size.  */
+	      else if (GET_CODE (scanbody) == RETURN
+		       && !use_return_insn (TRUE, NULL)
+		       && !optimize_size)
+		fail = TRUE;
+	      else if (GET_CODE (scanbody) == RETURN
+		       && seeking_return)
+	        {
+		  arm_ccfsm_state = 2;
+		  succeed = TRUE;
+	        }
+	      else if (GET_CODE (scanbody) == PARALLEL)
+	        {
+		  switch (get_attr_conds (this_insn))
+		    {
+		    case CONDS_NOCOND:
+		      break;
+		    default:
+		      fail = TRUE;
+		      break;
+		    }
+		}
+	      else
+		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
+
+	      break;
+
+	    case INSN:
+	      /* Instructions using or affecting the condition codes make it
+		 fail.  */
+	      scanbody = PATTERN (this_insn);
+	      if (!(GET_CODE (scanbody) == SET
+		    || GET_CODE (scanbody) == PARALLEL)
+		  || get_attr_conds (this_insn) != CONDS_NOCOND)
+		fail = TRUE;
+
+	      /* A conditional cirrus instruction must be followed by
+		 a non Cirrus instruction.  However, since we
+		 conditionalize instructions in this function and by
+		 the time we get here we can't add instructions
+		 (nops), because shorten_branches() has already been
+		 called, we will disable conditionalizing Cirrus
+		 instructions to be safe.  */
+	      if (GET_CODE (scanbody) != USE
+		  && GET_CODE (scanbody) != CLOBBER
+		  && get_attr_cirrus (this_insn) != CIRRUS_NOT)
+		fail = TRUE;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+      if (succeed)
+	{
+	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
+	    arm_target_label = CODE_LABEL_NUMBER (label);
+	  else
+	    {
+	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
+
+	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
+	        {
+		  this_insn = next_nonnote_insn (this_insn);
+		  gcc_assert (!this_insn
+			      || (GET_CODE (this_insn) != BARRIER
+				  && GET_CODE (this_insn) != CODE_LABEL));
+	        }
+	      if (!this_insn)
+	        {
+		  /* Oh, dear! we ran off the end.. give up.  */
+		  extract_constrain_insn_cached (insn);
+		  arm_ccfsm_state = 0;
+		  arm_target_insn = NULL;
+		  return;
+	        }
+	      arm_target_insn = this_insn;
+	    }
+
+	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
+	     what it was.  */
+	  if (!reverse)
+	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
+
+	  if (reverse || then_not_else)
+	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
+	}
+
+      /* Restore recog_data (getting the attributes of other insns can
+	 destroy this array, but final.c assumes that it remains intact
+	 across this call.  */
+      extract_constrain_insn_cached (insn);
+    }
+}
+
+/* Output IT instructions.  */
+void
+thumb2_asm_output_opcode (FILE * stream)
+{
+  char buff[5];
+  int n;
+
+  if (arm_condexec_mask)
+    {
+      for (n = 0; n < arm_condexec_masklen; n++)
+	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
+      buff[n] = 0;
+      asm_fprintf(stream, "i%s\t%s\n\t", buff,
+		  arm_condition_codes[arm_current_cc]);
+      arm_condexec_mask = 0;
+    }
+}
+
+/* Returns true if REGNO is a valid register
+   for holding a quantity of type MODE.  */
+int
+arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return (regno == CC_REGNUM
+	    || (TARGET_HARD_FLOAT && TARGET_VFP
+		&& regno == VFPCC_REGNUM));
+
+  if (TARGET_THUMB1)
+    /* For the Thumb we only allow values bigger than SImode in
+       registers 0 - 6, so that there is always a second low
+       register available to hold the upper part of the value.
+       We probably we ought to ensure that the register is the
+       start of an even numbered register pair.  */
+    return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
+
+  if (TARGET_HARD_FLOAT && TARGET_MAVERICK
+      && IS_CIRRUS_REGNUM (regno))
+    /* We have outlawed SI values in Cirrus registers because they
+       reside in the lower 32 bits, but SF values reside in the
+       upper 32 bits.  This causes gcc all sorts of grief.  We can't
+       even split the registers into pairs because Cirrus SI values
+       get sign extended to 64bits-- aldyh.  */
+    return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP
+      && IS_VFP_REGNUM (regno))
+    {
+      if (mode == SFmode || mode == SImode)
+	return VFP_REGNO_OK_FOR_SINGLE (regno);
+
+      if (mode == DFmode)
+	return VFP_REGNO_OK_FOR_DOUBLE (regno);
+
+      /* VFP registers can hold HFmode values, but there is no point in
+	 putting them there unless we have hardware conversion insns. */
+      if (mode == HFmode)
+	return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
+
+      if (TARGET_NEON)
+        return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
+               || (VALID_NEON_QREG_MODE (mode)
+                   && NEON_REGNO_OK_FOR_QUAD (regno))
+	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
+	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
+	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
+	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
+
+      return FALSE;
+    }
+
+  if (TARGET_REALLY_IWMMXT)
+    {
+      if (IS_IWMMXT_GR_REGNUM (regno))
+	return mode == SImode;
+
+      if (IS_IWMMXT_REGNUM (regno))
+	return VALID_IWMMXT_REG_MODE (mode);
+    }
+  
+  /* We allow almost any value to be stored in the general registers.
+     Restrict doubleword quantities to even register pairs so that we can
+     use ldrd.  Do not allow very large Neon structure opaque modes in
+     general registers; they would use too many.  */
+  if (regno <= LAST_ARM_REGNUM)
+    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
+      && ARM_NUM_REGS (mode) <= 4;
+
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    /* We only allow integers in the fake hard registers.  */
+    return GET_MODE_CLASS (mode) == MODE_INT;
+
+  /* The only registers left are the FPA registers
+     which we only allow to hold FP values.  */
+  return (TARGET_HARD_FLOAT && TARGET_FPA
+	  && GET_MODE_CLASS (mode) == MODE_FLOAT
+	  && regno >= FIRST_FPA_REGNUM
+	  && regno <= LAST_FPA_REGNUM);
+}
+
+/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
+   not used in arm mode.  */
+
+enum reg_class
+arm_regno_class (int regno)
+{
+  if (TARGET_THUMB1)
+    {
+      if (regno == STACK_POINTER_REGNUM)
+	return STACK_REG;
+      if (regno == CC_REGNUM)
+	return CC_REG;
+      if (regno < 8)
+	return LO_REGS;
+      return HI_REGS;
+    }
+
+  if (TARGET_THUMB2 && regno < 8)
+    return LO_REGS;
+
+  if (   regno <= LAST_ARM_REGNUM
+      || regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
+
+  if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
+    return TARGET_THUMB2 ? CC_REG : NO_REGS;
+
+  if (IS_CIRRUS_REGNUM (regno))
+    return CIRRUS_REGS;
+
+  if (IS_VFP_REGNUM (regno))
+    {
+      if (regno <= D7_VFP_REGNUM)
+	return VFP_D0_D7_REGS;
+      else if (regno <= LAST_LO_VFP_REGNUM)
+        return VFP_LO_REGS;
+      else
+        return VFP_HI_REGS;
+    }
+
+  if (IS_IWMMXT_REGNUM (regno))
+    return IWMMXT_REGS;
+
+  if (IS_IWMMXT_GR_REGNUM (regno))
+    return IWMMXT_GR_REGS;
+
+  return FPA_REGS;
+}
+
+/* Handle a special case when computing the offset
+   of an argument from the frame pointer.  */
+int
+arm_debugger_arg_offset (int value, rtx addr)
+{
+  rtx insn;
+
+  /* We are only interested if dbxout_parms() failed to compute the offset.  */
+  if (value != 0)
+    return 0;
+
+  /* We can only cope with the case where the address is held in a register.  */
+  if (GET_CODE (addr) != REG)
+    return 0;
+
+  /* If we are using the frame pointer to point at the argument, then
+     an offset of 0 is correct.  */
+  if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
+    return 0;
+
+  /* If we are using the stack pointer to point at the
+     argument, then an offset of 0 is correct.  */
+  /* ??? Check this is consistent with thumb2 frame layout.  */
+  if ((TARGET_THUMB || !frame_pointer_needed)
+      && REGNO (addr) == SP_REGNUM)
+    return 0;
+
+  /* Oh dear.  The argument is pointed to by a register rather
+     than being held in a register, or being stored at a known
+     offset from the frame pointer.  Since GDB only understands
+     those two kinds of argument we must translate the address
+     held in the register into an offset from the frame pointer.
+     We do this by searching through the insns for the function
+     looking to see where this register gets its value.  If the
+     register is initialized from the frame pointer plus an offset
+     then we are in luck and we can continue, otherwise we give up.
+
+     This code is exercised by producing debugging information
+     for a function with arguments like this:
+
+           double func (double a, double b, int c, double d) {return d;}
+
+     Without this code the stab for parameter 'd' will be set to
+     an offset of 0 from the frame pointer, rather than 8.  */
+
+  /* The if() statement says:
+
+     If the insn is a normal instruction
+     and if the insn is setting the value in a register
+     and if the register being set is the register holding the address of the argument
+     and if the address is computing by an addition
+     that involves adding to a register
+     which is the frame pointer
+     a constant integer
+
+     then...  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (   GET_CODE (insn) == INSN
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
+	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
+	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
+	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
+	  && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
+	     )
+	{
+	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
+
+	  break;
+	}
+    }
+
+  if (value == 0)
+    {
+      debug_rtx (addr);
+      warning (0, "unable to compute real location of stacked parameter");
+      value = 8; /* XXX magic hack */
+    }
+
+  return value;
+}
+
+#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+  do									\
+    {									\
+      if ((MASK) & insn_flags)						\
+        add_builtin_function ((NAME), (TYPE), (CODE),			\
+			     BUILT_IN_MD, NULL, NULL_TREE);		\
+    }									\
+  while (0)
+
+struct builtin_description
+{
+  const unsigned int       mask;
+  const enum insn_code     icode;
+  const char * const       name;
+  const enum arm_builtins  code;
+  const enum rtx_code      comparison;
+  const unsigned int       flag;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+#define IWMMXT_BUILTIN(code, string, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
+  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
+  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
+  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
+  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
+  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
+  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
+  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
+  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
+  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
+  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
+  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
+  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
+  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
+  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
+  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
+  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
+  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
+  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
+  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
+  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
+  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
+  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
+  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
+  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
+  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
+  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
+  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
+  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
+  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
+  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
+  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
+  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
+  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
+  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
+  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
+  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
+  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
+  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
+  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
+  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
+  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
+  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
+  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
+  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
+  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
+  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
+  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
+  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
+  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
+  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
+
+#define IWMMXT_BUILTIN2(code, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
+
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
+  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
+  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
+  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
+  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
+  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
+  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
+  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
+  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
+  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
+  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
+  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
+  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
+  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
+  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
+  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
+  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
+  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
+  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
+  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
+  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
+  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
+  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
+  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
+  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
+  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
+  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
+  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
+};
+
+/* Set up all the iWMMXt builtins.  This is
+   not called if TARGET_IWMMXT is zero.  */
+
+static void
+arm_init_iwmmxt_builtins (void)
+{
+  const struct builtin_description * d;
+  size_t i;
+  tree endlink = void_list_node;
+
+  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
+
+  tree int_ftype_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+  tree v8qi_ftype_v8qi_v8qi_int
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, V8QI_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree v4hi_ftype_v4hi_int
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree v2si_ftype_v2si_int
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree v2si_ftype_di_di
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, long_long_integer_type_node,
+				      tree_cons (NULL_TREE, long_long_integer_type_node,
+						 endlink)));
+  tree di_ftype_di_int
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, long_long_integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree di_ftype_di_int_int
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, long_long_integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree int_ftype_v8qi
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      endlink));
+  tree int_ftype_v4hi
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      endlink));
+  tree int_ftype_v2si
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      endlink));
+  tree int_ftype_v8qi_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree int_ftype_v4hi_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree int_ftype_v2si_int
+    = build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree v8qi_ftype_v8qi_int_int
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree v4hi_ftype_v4hi_int_int
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  tree v2si_ftype_v2si_int_int
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+  /* Miscellaneous.  */
+  tree v8qi_ftype_v4hi_v4hi
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  tree v4hi_ftype_v2si_v2si
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, V2SI_type_node,
+						 endlink)));
+  tree v2si_ftype_v4hi_v4hi
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  tree v2si_ftype_v8qi_v8qi
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, V8QI_type_node,
+						 endlink)));
+  tree v4hi_ftype_v4hi_di
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_integer_type_node,
+						 endlink)));
+  tree v2si_ftype_v2si_di
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_integer_type_node,
+						 endlink)));
+  tree void_ftype_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+  tree di_ftype_void
+    = build_function_type (long_long_unsigned_type_node, endlink);
+  tree di_ftype_v8qi
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      endlink));
+  tree di_ftype_v4hi
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      endlink));
+  tree di_ftype_v2si
+    = build_function_type (long_long_integer_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      endlink));
+  tree v2si_ftype_v4hi
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      endlink));
+  tree v4hi_ftype_v8qi
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      endlink));
+
+  tree di_ftype_di_v4hi_v4hi
+    = build_function_type (long_long_unsigned_type_node,
+			   tree_cons (NULL_TREE,
+				      long_long_unsigned_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 tree_cons (NULL_TREE,
+							    V4HI_type_node,
+							    endlink))));
+
+  tree di_ftype_v4hi_v4hi
+    = build_function_type (long_long_unsigned_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+
+  /* Normal vector binops.  */
+  tree v8qi_ftype_v8qi_v8qi
+    = build_function_type (V8QI_type_node,
+			   tree_cons (NULL_TREE, V8QI_type_node,
+				      tree_cons (NULL_TREE, V8QI_type_node,
+						 endlink)));
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type (V4HI_type_node,
+			   tree_cons (NULL_TREE, V4HI_type_node,
+				      tree_cons (NULL_TREE, V4HI_type_node,
+						 endlink)));
+  tree v2si_ftype_v2si_v2si
+    = build_function_type (V2SI_type_node,
+			   tree_cons (NULL_TREE, V2SI_type_node,
+				      tree_cons (NULL_TREE, V2SI_type_node,
+						 endlink)));
+  tree di_ftype_di_di
+    = build_function_type (long_long_unsigned_type_node,
+			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
+				      tree_cons (NULL_TREE,
+						 long_long_unsigned_type_node,
+						 endlink)));
+
+  /* Add all builtins that are more or less simple operations on two
+     operands.  */
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    {
+      /* Use one of the operands; the target can have a different mode for
+	 mask-generating compares.  */
+      enum machine_mode mode;
+      tree type;
+
+      if (d->name == 0)
+	continue;
+
+      mode = insn_data[d->icode].operand[1].mode;
+
+      switch (mode)
+	{
+	case V8QImode:
+	  type = v8qi_ftype_v8qi_v8qi;
+	  break;
+	case V4HImode:
+	  type = v4hi_ftype_v4hi_v4hi;
+	  break;
+	case V2SImode:
+	  type = v2si_ftype_v2si_v2si;
+	  break;
+	case DImode:
+	  type = di_ftype_di_di;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_mbuiltin (d->mask, d->name, type, d->code);
+    }
+
+  /* Add the remaining MMX insns with somewhat more complicated types.  */
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
+}
+
+static void
+arm_init_tls_builtins (void)
+{
+  tree ftype, decl;
+
+  ftype = build_function_type (ptr_type_node, void_list_node);
+  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
+			       ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+			       NULL, NULL_TREE);
+  TREE_NOTHROW (decl) = 1;
+  TREE_READONLY (decl) = 1;
+}
+
+enum neon_builtin_type_bits {
+  T_V8QI  = 0x0001,
+  T_V4HI  = 0x0002,
+  T_V2SI  = 0x0004,
+  T_V2SF  = 0x0008,
+  T_DI    = 0x0010,
+  T_V16QI = 0x0020,
+  T_V8HI  = 0x0040,
+  T_V4SI  = 0x0080,
+  T_V4SF  = 0x0100,
+  T_V2DI  = 0x0200,
+  T_TI	  = 0x0400,
+  T_EI	  = 0x0800,
+  T_OI	  = 0x1000
+};
+
+#define v8qi_UP  T_V8QI
+#define v4hi_UP  T_V4HI
+#define v2si_UP  T_V2SI
+#define v2sf_UP  T_V2SF
+#define di_UP    T_DI
+#define v16qi_UP T_V16QI
+#define v8hi_UP  T_V8HI
+#define v4si_UP  T_V4SI
+#define v4sf_UP  T_V4SF
+#define v2di_UP  T_V2DI
+#define ti_UP	 T_TI
+#define ei_UP	 T_EI
+#define oi_UP	 T_OI
+
+#define UP(X) X##_UP
+
+#define T_MAX 13
+
+typedef enum {
+  NEON_BINOP,
+  NEON_TERNOP,
+  NEON_UNOP,
+  NEON_GETLANE,
+  NEON_SETLANE,
+  NEON_CREATE,
+  NEON_DUP,
+  NEON_DUPLANE,
+  NEON_COMBINE,
+  NEON_SPLIT,
+  NEON_LANEMUL,
+  NEON_LANEMULL,
+  NEON_LANEMULH,
+  NEON_LANEMAC,
+  NEON_SCALARMUL,
+  NEON_SCALARMULL,
+  NEON_SCALARMULH,
+  NEON_SCALARMAC,
+  NEON_CONVERT,
+  NEON_FIXCONV,
+  NEON_SELECT,
+  NEON_RESULTPAIR,
+  NEON_REINTERP,
+  NEON_VTBL,
+  NEON_VTBX,
+  NEON_LOAD1,
+  NEON_LOAD1LANE,
+  NEON_STORE1,
+  NEON_STORE1LANE,
+  NEON_LOADSTRUCT,
+  NEON_LOADSTRUCTLANE,
+  NEON_STORESTRUCT,
+  NEON_STORESTRUCTLANE,
+  NEON_LOGICBINOP,
+  NEON_SHIFTINSERT,
+  NEON_SHIFTIMM,
+  NEON_SHIFTACC
+} neon_itype;
+
+typedef struct {
+  const char *name;
+  const neon_itype itype;
+  const int bits;
+  const enum insn_code codes[T_MAX];
+  const unsigned int num_vars;
+  unsigned int base_fcode;
+} neon_builtin_datum;
+
+#define CF(N,X) CODE_FOR_neon_##N##X
+
+#define VAR1(T, N, A) \
+  #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
+#define VAR2(T, N, A, B) \
+  #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
+#define VAR3(T, N, A, B, C) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C), \
+  { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
+#define VAR4(T, N, A, B, C, D) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
+#define VAR5(T, N, A, B, C, D, E) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
+#define VAR6(T, N, A, B, C, D, E, F) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
+#define VAR7(T, N, A, B, C, D, E, F, G) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+    CF (N, G) }, 7, 0
+#define VAR8(T, N, A, B, C, D, E, F, G, H) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+                | UP (H), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+    CF (N, G), CF (N, H) }, 8, 0
+#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+                | UP (H) | UP (I), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+    CF (N, G), CF (N, H), CF (N, I) }, 9, 0
+#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
+  #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
+                | UP (H) | UP (I) | UP (J), \
+  { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
+    CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
+
+/* The mode entries in the following table correspond to the "key" type of the
+   instruction variant, i.e. equivalent to that which would be specified after
+   the assembler mnemonic, which usually refers to the last vector operand.
+   (Signed/unsigned/polynomial types are not differentiated between though, and
+   are all mapped onto the same mode for a given element size.) The modes
+   listed per instruction should be the same as those defined for that
+   instruction's pattern in neon.md.
+   WARNING: Variants should be listed in the same increasing order as
+   neon_builtin_type_bits.  */
+
+static neon_builtin_datum neon_builtin_data[] =
+{
+  { VAR10 (BINOP, vadd,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
+  { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
+  { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
+  { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
+  { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
+  { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
+  { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
+  { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
+  { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
+  { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
+  { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
+  { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
+  { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
+  { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
+  { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
+  { VAR2 (BINOP, vqdmull, v4hi, v2si) },
+  { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
+  { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
+  { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
+  { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
+  { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR10 (BINOP, vsub,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
+  { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
+  { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
+  { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR2 (BINOP, vcage, v2sf, v4sf) },
+  { VAR2 (BINOP, vcagt, v2sf, v4sf) },
+  { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
+  { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
+  { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
+  { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
+  { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
+  { VAR2 (BINOP, vrecps, v2sf, v4sf) },
+  { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
+  { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
+  { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  { VAR2 (UNOP, vcnt, v8qi, v16qi) },
+  { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
+  { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
+  { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+  /* FIXME: vget_lane supports more variants than this!  */
+  { VAR10 (GETLANE, vget_lane,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (SETLANE, vset_lane,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR10 (DUP, vdup_n,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (DUPLANE, vdup_lane,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
+  { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
+  { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
+  { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
+  { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
+  { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
+  { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
+  { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
+  { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
+  { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
+  { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
+  { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
+  { VAR10 (BINOP, vext,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
+  { VAR2 (UNOP, vrev16, v8qi, v16qi) },
+  { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
+  { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
+  { VAR10 (SELECT, vbsl,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR1 (VTBL, vtbl1, v8qi) },
+  { VAR1 (VTBL, vtbl2, v8qi) },
+  { VAR1 (VTBL, vtbl3, v8qi) },
+  { VAR1 (VTBL, vtbl4, v8qi) },
+  { VAR1 (VTBX, vtbx1, v8qi) },
+  { VAR1 (VTBX, vtbx2, v8qi) },
+  { VAR1 (VTBX, vtbx3, v8qi) },
+  { VAR1 (VTBX, vtbx4, v8qi) },
+  { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
+  { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (LOAD1, vld1,
+           v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (LOAD1LANE, vld1_lane,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (LOAD1, vld1_dup,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (STORE1, vst1,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (STORE1LANE, vst1_lane,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR9 (LOADSTRUCT,
+	  vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+  { VAR7 (LOADSTRUCTLANE, vld2_lane,
+	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR9 (STORESTRUCT, vst2,
+	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+  { VAR7 (STORESTRUCTLANE, vst2_lane,
+	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR9 (LOADSTRUCT,
+	  vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+  { VAR7 (LOADSTRUCTLANE, vld3_lane,
+	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR9 (STORESTRUCT, vst3,
+	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+  { VAR7 (STORESTRUCTLANE, vst3_lane,
+	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR9 (LOADSTRUCT, vld4,
+	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+  { VAR7 (LOADSTRUCTLANE, vld4_lane,
+	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
+  { VAR9 (STORESTRUCT, vst4,
+	  v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
+  { VAR7 (STORESTRUCTLANE, vst4_lane,
+	  v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
+  { VAR10 (LOGICBINOP, vand,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (LOGICBINOP, vorr,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (BINOP, veor,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (LOGICBINOP, vbic,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
+  { VAR10 (LOGICBINOP, vorn,
+	   v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
+};
+
+#undef CF
+#undef VAR1
+#undef VAR2
+#undef VAR3
+#undef VAR4
+#undef VAR5
+#undef VAR6
+#undef VAR7
+#undef VAR8
+#undef VAR9
+#undef VAR10
+
+static void
+arm_init_neon_builtins (void)
+{
+  unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
+
+  tree neon_intQI_type_node;
+  tree neon_intHI_type_node;
+  tree neon_polyQI_type_node;
+  tree neon_polyHI_type_node;
+  tree neon_intSI_type_node;
+  tree neon_intDI_type_node;
+  tree neon_float_type_node;
+
+  tree intQI_pointer_node;
+  tree intHI_pointer_node;
+  tree intSI_pointer_node;
+  tree intDI_pointer_node;
+  tree float_pointer_node;
+
+  tree const_intQI_node;
+  tree const_intHI_node;
+  tree const_intSI_node;
+  tree const_intDI_node;
+  tree const_float_node;
+
+  tree const_intQI_pointer_node;
+  tree const_intHI_pointer_node;
+  tree const_intSI_pointer_node;
+  tree const_intDI_pointer_node;
+  tree const_float_pointer_node;
+
+  tree V8QI_type_node;
+  tree V4HI_type_node;
+  tree V2SI_type_node;
+  tree V2SF_type_node;
+  tree V16QI_type_node;
+  tree V8HI_type_node;
+  tree V4SI_type_node;
+  tree V4SF_type_node;
+  tree V2DI_type_node;
+
+  tree intUQI_type_node;
+  tree intUHI_type_node;
+  tree intUSI_type_node;
+  tree intUDI_type_node;
+
+  tree intEI_type_node;
+  tree intOI_type_node;
+  tree intCI_type_node;
+  tree intXI_type_node;
+
+  tree V8QI_pointer_node;
+  tree V4HI_pointer_node;
+  tree V2SI_pointer_node;
+  tree V2SF_pointer_node;
+  tree V16QI_pointer_node;
+  tree V8HI_pointer_node;
+  tree V4SI_pointer_node;
+  tree V4SF_pointer_node;
+  tree V2DI_pointer_node;
+
+  tree void_ftype_pv8qi_v8qi_v8qi;
+  tree void_ftype_pv4hi_v4hi_v4hi;
+  tree void_ftype_pv2si_v2si_v2si;
+  tree void_ftype_pv2sf_v2sf_v2sf;
+  tree void_ftype_pdi_di_di;
+  tree void_ftype_pv16qi_v16qi_v16qi;
+  tree void_ftype_pv8hi_v8hi_v8hi;
+  tree void_ftype_pv4si_v4si_v4si;
+  tree void_ftype_pv4sf_v4sf_v4sf;
+  tree void_ftype_pv2di_v2di_v2di;
+
+  tree reinterp_ftype_dreg[5][5];
+  tree reinterp_ftype_qreg[5][5];
+  tree dreg_types[5], qreg_types[5];
+
+  /* Create distinguished type nodes for NEON vector element types,
+     and pointers to values of such types, so we can detect them later.  */
+  neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+  neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+  neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
+  neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
+  neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
+  neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
+  neon_float_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
+  layout_type (neon_float_type_node);
+
+  /* Define typedefs which exactly correspond to the modes we are basing vector
+     types on.  If you change these names you'll need to change
+     the table used by arm_mangle_type too.  */
+  (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
+					     "__builtin_neon_qi");
+  (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
+					     "__builtin_neon_hi");
+  (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
+					     "__builtin_neon_si");
+  (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
+					     "__builtin_neon_sf");
+  (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
+					     "__builtin_neon_di");
+  (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
+					     "__builtin_neon_poly8");
+  (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
+					     "__builtin_neon_poly16");
+
+  intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
+  intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
+  intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
+  intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
+  float_pointer_node = build_pointer_type (neon_float_type_node);
+
+  /* Next create constant-qualified versions of the above types.  */
+  const_intQI_node = build_qualified_type (neon_intQI_type_node,
+					   TYPE_QUAL_CONST);
+  const_intHI_node = build_qualified_type (neon_intHI_type_node,
+					   TYPE_QUAL_CONST);
+  const_intSI_node = build_qualified_type (neon_intSI_type_node,
+					   TYPE_QUAL_CONST);
+  const_intDI_node = build_qualified_type (neon_intDI_type_node,
+					   TYPE_QUAL_CONST);
+  const_float_node = build_qualified_type (neon_float_type_node,
+					   TYPE_QUAL_CONST);
+
+  const_intQI_pointer_node = build_pointer_type (const_intQI_node);
+  const_intHI_pointer_node = build_pointer_type (const_intHI_node);
+  const_intSI_pointer_node = build_pointer_type (const_intSI_node);
+  const_intDI_pointer_node = build_pointer_type (const_intDI_node);
+  const_float_pointer_node = build_pointer_type (const_float_node);
+
+  /* Now create vector types based on our NEON element types.  */
+  /* 64-bit vectors.  */
+  V8QI_type_node =
+    build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
+  V4HI_type_node =
+    build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+  V2SI_type_node =
+    build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
+  V2SF_type_node =
+    build_vector_type_for_mode (neon_float_type_node, V2SFmode);
+  /* 128-bit vectors.  */
+  V16QI_type_node =
+    build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
+  V8HI_type_node =
+    build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
+  V4SI_type_node =
+    build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
+  V4SF_type_node =
+    build_vector_type_for_mode (neon_float_type_node, V4SFmode);
+  V2DI_type_node =
+    build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
+
+  /* Unsigned integer types for various mode sizes.  */
+  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
+  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
+  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
+  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
+
+  (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
+					     "__builtin_neon_uqi");
+  (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
+					     "__builtin_neon_uhi");
+  (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
+					     "__builtin_neon_usi");
+  (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
+					     "__builtin_neon_udi");
+
+  /* Opaque integer types for structures of vectors.  */
+  intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
+  intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
+  intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
+  intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
+
+  (*lang_hooks.types.register_builtin_type) (intTI_type_node,
+					     "__builtin_neon_ti");
+  (*lang_hooks.types.register_builtin_type) (intEI_type_node,
+					     "__builtin_neon_ei");
+  (*lang_hooks.types.register_builtin_type) (intOI_type_node,
+					     "__builtin_neon_oi");
+  (*lang_hooks.types.register_builtin_type) (intCI_type_node,
+					     "__builtin_neon_ci");
+  (*lang_hooks.types.register_builtin_type) (intXI_type_node,
+					     "__builtin_neon_xi");
+
+  /* Pointers to vector types.  */
+  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
+  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
+  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
+  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
+  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
+  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
+  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
+  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
+  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
+
+  /* Operations which return results as pairs.  */
+  void_ftype_pv8qi_v8qi_v8qi =
+    build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
+  			      V8QI_type_node, NULL);
+  void_ftype_pv4hi_v4hi_v4hi =
+    build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
+  			      V4HI_type_node, NULL);
+  void_ftype_pv2si_v2si_v2si =
+    build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
+  			      V2SI_type_node, NULL);
+  void_ftype_pv2sf_v2sf_v2sf =
+    build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
+  			      V2SF_type_node, NULL);
+  void_ftype_pdi_di_di =
+    build_function_type_list (void_type_node, intDI_pointer_node,
+			      neon_intDI_type_node, neon_intDI_type_node, NULL);
+  void_ftype_pv16qi_v16qi_v16qi =
+    build_function_type_list (void_type_node, V16QI_pointer_node,
+			      V16QI_type_node, V16QI_type_node, NULL);
+  void_ftype_pv8hi_v8hi_v8hi =
+    build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
+  			      V8HI_type_node, NULL);
+  void_ftype_pv4si_v4si_v4si =
+    build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
+  			      V4SI_type_node, NULL);
+  void_ftype_pv4sf_v4sf_v4sf =
+    build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
+  			      V4SF_type_node, NULL);
+  void_ftype_pv2di_v2di_v2di =
+    build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
+			      V2DI_type_node, NULL);
+
+  dreg_types[0] = V8QI_type_node;
+  dreg_types[1] = V4HI_type_node;
+  dreg_types[2] = V2SI_type_node;
+  dreg_types[3] = V2SF_type_node;
+  dreg_types[4] = neon_intDI_type_node;
+
+  qreg_types[0] = V16QI_type_node;
+  qreg_types[1] = V8HI_type_node;
+  qreg_types[2] = V4SI_type_node;
+  qreg_types[3] = V4SF_type_node;
+  qreg_types[4] = V2DI_type_node;
+
+  for (i = 0; i < 5; i++)
+    {
+      int j;
+      for (j = 0; j < 5; j++)
+        {
+          reinterp_ftype_dreg[i][j]
+            = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
+          reinterp_ftype_qreg[i][j]
+            = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
+        }
+    }
+
+  for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
+    {
+      neon_builtin_datum *d = &neon_builtin_data[i];
+      unsigned int j, codeidx = 0;
+
+      d->base_fcode = fcode;
+
+      for (j = 0; j < T_MAX; j++)
+	{
+	  const char* const modenames[] = {
+	    "v8qi", "v4hi", "v2si", "v2sf", "di",
+	    "v16qi", "v8hi", "v4si", "v4sf", "v2di"
+	  };
+	  char namebuf[60];
+	  tree ftype = NULL;
+	  enum insn_code icode;
+	  int is_load = 0, is_store = 0;
+
+          if ((d->bits & (1 << j)) == 0)
+            continue;
+
+          icode = d->codes[codeidx++];
+
+          switch (d->itype)
+            {
+	    case NEON_LOAD1:
+	    case NEON_LOAD1LANE:
+	    case NEON_LOADSTRUCT:
+	    case NEON_LOADSTRUCTLANE:
+	      is_load = 1;
+	      /* Fall through.  */
+	    case NEON_STORE1:
+	    case NEON_STORE1LANE:
+	    case NEON_STORESTRUCT:
+	    case NEON_STORESTRUCTLANE:
+	      if (!is_load)
+	        is_store = 1;
+	      /* Fall through.  */
+            case NEON_UNOP:
+	    case NEON_BINOP:
+	    case NEON_LOGICBINOP:
+	    case NEON_SHIFTINSERT:
+	    case NEON_TERNOP:
+	    case NEON_GETLANE:
+	    case NEON_SETLANE:
+	    case NEON_CREATE:
+	    case NEON_DUP:
+	    case NEON_DUPLANE:
+	    case NEON_SHIFTIMM:
+	    case NEON_SHIFTACC:
+	    case NEON_COMBINE:
+	    case NEON_SPLIT:
+	    case NEON_CONVERT:
+	    case NEON_FIXCONV:
+	    case NEON_LANEMUL:
+	    case NEON_LANEMULL:
+	    case NEON_LANEMULH:
+	    case NEON_LANEMAC:
+	    case NEON_SCALARMUL:
+	    case NEON_SCALARMULL:
+	    case NEON_SCALARMULH:
+	    case NEON_SCALARMAC:
+	    case NEON_SELECT:
+	    case NEON_VTBL:
+	    case NEON_VTBX:
+	      {
+		int k;
+		tree return_type = void_type_node, args = void_list_node;
+
+		/* Build a function type directly from the insn_data for this
+		   builtin.  The build_function_type() function takes care of
+		   removing duplicates for us.  */
+		for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
+		  {
+		    tree eltype;
+
+		    if (is_load && k == 1)
+		      {
+		        /* Neon load patterns always have the memory operand
+			   (a SImode pointer) in the operand 1 position.  We
+			   want a const pointer to the element type in that
+			   position.  */
+		        gcc_assert (insn_data[icode].operand[k].mode == SImode);
+
+			switch (1 << j)
+			  {
+			  case T_V8QI:
+			  case T_V16QI:
+			    eltype = const_intQI_pointer_node;
+			    break;
+
+			  case T_V4HI:
+			  case T_V8HI:
+			    eltype = const_intHI_pointer_node;
+			    break;
+
+			  case T_V2SI:
+			  case T_V4SI:
+			    eltype = const_intSI_pointer_node;
+			    break;
+
+			  case T_V2SF:
+			  case T_V4SF:
+			    eltype = const_float_pointer_node;
+			    break;
+
+			  case T_DI:
+			  case T_V2DI:
+			    eltype = const_intDI_pointer_node;
+			    break;
+
+			  default: gcc_unreachable ();
+			  }
+  		      }
+		    else if (is_store && k == 0)
+		      {
+		        /* Similarly, Neon store patterns use operand 0 as
+			   the memory location to store to (a SImode pointer).
+			   Use a pointer to the element type of the store in
+			   that position.  */
+			gcc_assert (insn_data[icode].operand[k].mode == SImode);
+
+			switch (1 << j)
+			  {
+			  case T_V8QI:
+			  case T_V16QI:
+			    eltype = intQI_pointer_node;
+			    break;
+
+			  case T_V4HI:
+			  case T_V8HI:
+			    eltype = intHI_pointer_node;
+			    break;
+
+			  case T_V2SI:
+			  case T_V4SI:
+			    eltype = intSI_pointer_node;
+			    break;
+
+			  case T_V2SF:
+			  case T_V4SF:
+			    eltype = float_pointer_node;
+			    break;
+
+			  case T_DI:
+			  case T_V2DI:
+			    eltype = intDI_pointer_node;
+			    break;
+
+			  default: gcc_unreachable ();
+			  }
+		      }
+		    else
+		      {
+			switch (insn_data[icode].operand[k].mode)
+	        	  {
+			  case VOIDmode: eltype = void_type_node; break;
+			  /* Scalars.  */
+			  case QImode: eltype = neon_intQI_type_node; break;
+			  case HImode: eltype = neon_intHI_type_node; break;
+			  case SImode: eltype = neon_intSI_type_node; break;
+			  case SFmode: eltype = neon_float_type_node; break;
+			  case DImode: eltype = neon_intDI_type_node; break;
+			  case TImode: eltype = intTI_type_node; break;
+			  case EImode: eltype = intEI_type_node; break;
+			  case OImode: eltype = intOI_type_node; break;
+			  case CImode: eltype = intCI_type_node; break;
+			  case XImode: eltype = intXI_type_node; break;
+			  /* 64-bit vectors.  */
+			  case V8QImode: eltype = V8QI_type_node; break;
+			  case V4HImode: eltype = V4HI_type_node; break;
+			  case V2SImode: eltype = V2SI_type_node; break;
+			  case V2SFmode: eltype = V2SF_type_node; break;
+			  /* 128-bit vectors.  */
+			  case V16QImode: eltype = V16QI_type_node; break;
+			  case V8HImode: eltype = V8HI_type_node; break;
+			  case V4SImode: eltype = V4SI_type_node; break;
+			  case V4SFmode: eltype = V4SF_type_node; break;
+			  case V2DImode: eltype = V2DI_type_node; break;
+			  default: gcc_unreachable ();
+			  }
+		      }
+
+		    if (k == 0 && !is_store)
+	              return_type = eltype;
+		    else
+		      args = tree_cons (NULL_TREE, eltype, args);
+		  }
+
+		ftype = build_function_type (return_type, args);
+	      }
+	      break;
+
+	    case NEON_RESULTPAIR:
+              {
+                switch (insn_data[icode].operand[1].mode)
+                  {
+		  case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
+                  case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
+                  case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
+                  case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
+                  case DImode: ftype = void_ftype_pdi_di_di; break;
+                  case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
+                  case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
+                  case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
+                  case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
+                  case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
+                  default: gcc_unreachable ();
+                  }
+              }
+              break;
+
+	    case NEON_REINTERP:
+              {
+                /* We iterate over 5 doubleword types, then 5 quadword
+                   types.  */
+                int rhs = j % 5;
+                switch (insn_data[icode].operand[0].mode)
+                  {
+                  case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
+                  case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
+                  case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
+                  case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
+                  case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
+                  case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
+                  case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
+                  case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
+		  case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
+                  case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
+                  default: gcc_unreachable ();
+                  }
+              }
+              break;
+
+            default:
+              gcc_unreachable ();
+            }
+
+          gcc_assert (ftype != NULL);
+
+          sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
+
+          add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
+				NULL_TREE);
+        }
+    }
+}
+
+static void
+arm_init_fp16_builtins (void)
+{
+  tree fp16_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (fp16_type) = 16;
+  layout_type (fp16_type);
+  (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
+}
+
+static void
+arm_init_builtins (void)
+{
+  arm_init_tls_builtins ();
+
+  if (TARGET_REALLY_IWMMXT)
+    arm_init_iwmmxt_builtins ();
+
+  if (TARGET_NEON)
+    arm_init_neon_builtins ();
+
+  if (arm_fp16_format)
+    arm_init_fp16_builtins ();
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+
+static const char *
+arm_invalid_parameter_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return N_("function parameters cannot have __fp16 type");
+  return NULL;
+}
+
+/* Implement TARGET_INVALID_PARAMETER_TYPE.  */
+
+static const char *
+arm_invalid_return_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return N_("functions cannot return __fp16 type");
+  return NULL;
+}
+
+/* Implement TARGET_PROMOTED_TYPE.  */
+
+static tree
+arm_promoted_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return float_type_node;
+  return NULL_TREE;
+}
+
+/* Implement TARGET_CONVERT_TO_TYPE.
+   Specifically, this hook implements the peculiarity of the ARM
+   half-precision floating-point C semantics that requires conversions between
+   __fp16 to or from double to do an intermediate conversion to float.  */
+
+static tree
+arm_convert_to_type (tree type, tree expr)
+{
+  tree fromtype = TREE_TYPE (expr);
+  if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
+    return NULL_TREE;
+  if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
+      || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
+    return convert (type, convert (float_type_node, expr));
+  return NULL_TREE;
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
+   This simply adds HFmode as a supported mode; even though we don't
+   implement arithmetic on this type directly, it's supported by
+   optabs conversions, much the way the double-word arithmetic is
+   special-cased in the default hook.  */
+
+static bool
+arm_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (mode == HFmode)
+    return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
+  else
+    return default_scalar_mode_supported_p (mode);
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x != const0_rtx)
+    return x;
+  x = gen_reg_rtx (mode);
+
+  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
+			       : gen_rtx_SUBREG (DImode, x, 0)));
+  return x;
+}
+
+/* Subroutine of arm_expand_builtin to take care of binop insns.  */
+
+static rtx
+arm_expand_binop_builtin (enum insn_code icode,
+			  tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of arm_expand_builtin to take care of unop insns.  */
+
+static rtx
+arm_expand_unop_builtin (enum insn_code icode,
+			 tree exp, rtx target, int do_load)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+  if (do_load)
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  else
+    {
+      if (VECTOR_MODE_P (mode0))
+	op0 = safe_vector_operand (op0, mode0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+    }
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+static int
+neon_builtin_compare (const void *a, const void *b)
+{
+  const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
+  const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
+  unsigned int soughtcode = key->base_fcode;
+
+  if (soughtcode >= memb->base_fcode
+      && soughtcode < memb->base_fcode + memb->num_vars)
+    return 0;
+  else if (soughtcode < memb->base_fcode)
+    return -1;
+  else
+    return 1;
+}
+
+static enum insn_code
+locate_neon_builtin_icode (int fcode, neon_itype *itype)
+{
+  neon_builtin_datum key
+    = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
+  neon_builtin_datum *found;
+  int idx;
+
+  key.base_fcode = fcode;
+  found = (neon_builtin_datum *)
+    bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
+		   sizeof (neon_builtin_data[0]), neon_builtin_compare);
+  gcc_assert (found);
+  idx = fcode - (int) found->base_fcode;
+  gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
+
+  if (itype)
+    *itype = found->itype;
+
+  return found->codes[idx];
+}
+
+typedef enum {
+  NEON_ARG_COPY_TO_REG,
+  NEON_ARG_CONSTANT,
+  NEON_ARG_STOP
+} builtin_arg;
+
+#define NEON_MAX_BUILTIN_ARGS 5
+
+/* Expand a Neon builtin.  */
+static rtx
+arm_expand_neon_args (rtx target, int icode, int have_retval,
+		      tree exp, ...)
+{
+  va_list ap;
+  rtx pat;
+  tree arg[NEON_MAX_BUILTIN_ARGS];
+  rtx op[NEON_MAX_BUILTIN_ARGS];
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
+  int argc = 0;
+
+  if (have_retval
+      && (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
+    target = gen_reg_rtx (tmode);
+
+  va_start (ap, exp);
+
+  for (;;)
+    {
+      builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
+
+      if (thisarg == NEON_ARG_STOP)
+        break;
+      else
+        {
+          arg[argc] = CALL_EXPR_ARG (exp, argc);
+          op[argc] = expand_normal (arg[argc]);
+          mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
+
+          switch (thisarg)
+            {
+            case NEON_ARG_COPY_TO_REG:
+              /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
+              if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+                     (op[argc], mode[argc]))
+                op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
+              break;
+
+            case NEON_ARG_CONSTANT:
+              /* FIXME: This error message is somewhat unhelpful.  */
+              if (!(*insn_data[icode].operand[argc + have_retval].predicate)
+                    (op[argc], mode[argc]))
+		error ("argument must be a constant");
+              break;
+
+            case NEON_ARG_STOP:
+              gcc_unreachable ();
+            }
+
+          argc++;
+        }
+    }
+
+  va_end (ap);
+
+  if (have_retval)
+    switch (argc)
+      {
+      case 1:
+	pat = GEN_FCN (icode) (target, op[0]);
+	break;
+
+      case 2:
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+	break;
+
+      case 3:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+	break;
+
+      case 4:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+	break;
+
+      case 5:
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+  else
+    switch (argc)
+      {
+      case 1:
+	pat = GEN_FCN (icode) (op[0]);
+	break;
+
+      case 2:
+	pat = GEN_FCN (icode) (op[0], op[1]);
+	break;
+
+      case 3:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+	break;
+
+      case 4:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+	break;
+
+      case 5:
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+        break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  if (!pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Expand a Neon builtin. These are "special" because they don't have symbolic
+   constants defined per-instruction or per instruction-variant. Instead, the
+   required info is looked up in the table neon_builtin_data.  */
+static rtx
+arm_expand_neon_builtin (int fcode, tree exp, rtx target)
+{
+  neon_itype itype;
+  enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
+
+  switch (itype)
+    {
+    case NEON_UNOP:
+    case NEON_CONVERT:
+    case NEON_DUPLANE:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_BINOP:
+    case NEON_SETLANE:
+    case NEON_SCALARMUL:
+    case NEON_SCALARMULL:
+    case NEON_SCALARMULH:
+    case NEON_SHIFTINSERT:
+    case NEON_LOGICBINOP:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+        NEON_ARG_STOP);
+
+    case NEON_TERNOP:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_GETLANE:
+    case NEON_FIXCONV:
+    case NEON_SHIFTIMM:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
+        NEON_ARG_STOP);
+
+    case NEON_CREATE:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_DUP:
+    case NEON_SPLIT:
+    case NEON_REINTERP:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_COMBINE:
+    case NEON_VTBL:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_RESULTPAIR:
+      return arm_expand_neon_args (target, icode, 0, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_STOP);
+
+    case NEON_LANEMUL:
+    case NEON_LANEMULL:
+    case NEON_LANEMULH:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_LANEMAC:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_SHIFTACC:
+      return arm_expand_neon_args (target, icode, 1, exp,
+        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_SCALARMAC:
+      return arm_expand_neon_args (target, icode, 1, exp,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_CONSTANT, NEON_ARG_STOP);
+
+    case NEON_SELECT:
+    case NEON_VTBX:
+      return arm_expand_neon_args (target, icode, 1, exp,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
+        NEON_ARG_STOP);
+
+    case NEON_LOAD1:
+    case NEON_LOADSTRUCT:
+      return arm_expand_neon_args (target, icode, 1, exp,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_LOAD1LANE:
+    case NEON_LOADSTRUCTLANE:
+      return arm_expand_neon_args (target, icode, 1, exp,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+	NEON_ARG_STOP);
+
+    case NEON_STORE1:
+    case NEON_STORESTRUCT:
+      return arm_expand_neon_args (target, icode, 0, exp,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
+
+    case NEON_STORE1LANE:
+    case NEON_STORESTRUCTLANE:
+      return arm_expand_neon_args (target, icode, 0, exp,
+	NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
+	NEON_ARG_STOP);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Emit code to reinterpret one Neon type as another, without altering bits.  */
+void
+neon_reinterpret (rtx dest, rtx src)
+{
+  emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
+}
+
+/* Emit code to place a Neon pair result in memory locations (with equal
+   registers).  */
+void
+neon_emit_pair_result_insn (enum machine_mode mode,
+			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
+                            rtx op1, rtx op2)
+{
+  rtx mem = gen_rtx_MEM (mode, destaddr);
+  rtx tmp1 = gen_reg_rtx (mode);
+  rtx tmp2 = gen_reg_rtx (mode);
+
+  emit_insn (intfn (tmp1, op1, op2, tmp2));
+
+  emit_move_insn (mem, tmp1);
+  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
+  emit_move_insn (mem, tmp2);
+}
+
+/* Set up OPERANDS for a register copy from SRC to DEST, taking care
+   not to early-clobber SRC registers in the process.
+
+   We assume that the operands described by SRC and DEST represent a
+   decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
+   number of components into which the copy has been decomposed.  */
+void
+neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
+{
+  unsigned int i;
+
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      || REGNO (operands[0]) < REGNO (operands[1]))
+    {
+      for (i = 0; i < count; i++)
+	{
+	  operands[2 * i] = dest[i];
+	  operands[2 * i + 1] = src[i];
+	}
+    }
+  else
+    {
+      for (i = 0; i < count; i++)
+	{
+	  operands[2 * i] = dest[count - i - 1];
+	  operands[2 * i + 1] = src[count - i - 1];
+	}
+    }
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+arm_expand_builtin (tree exp,
+		    rtx target,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  const struct builtin_description * d;
+  enum insn_code    icode;
+  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree              arg0;
+  tree              arg1;
+  tree              arg2;
+  rtx               op0;
+  rtx               op1;
+  rtx               op2;
+  rtx               pat;
+  int               fcode = DECL_FUNCTION_CODE (fndecl);
+  size_t            i;
+  enum machine_mode tmode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+  enum machine_mode mode2;
+
+  if (fcode >= ARM_BUILTIN_NEON_BASE)
+    return arm_expand_neon_builtin (fcode, exp, target);
+
+  switch (fcode)
+    {
+    case ARM_BUILTIN_TEXTRMSB:
+    case ARM_BUILTIN_TEXTRMUB:
+    case ARM_BUILTIN_TEXTRMSH:
+    case ARM_BUILTIN_TEXTRMUH:
+    case ARM_BUILTIN_TEXTRMSW:
+    case ARM_BUILTIN_TEXTRMUW:
+      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
+	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
+	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
+	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
+	       : CODE_FOR_iwmmxt_textrmw);
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	{
+	  /* @@@ better error message */
+	  error ("selector must be an immediate");
+	  return gen_reg_rtx (tmode);
+	}
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_TINSRB:
+    case ARM_BUILTIN_TINSRH:
+    case ARM_BUILTIN_TINSRW:
+      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
+	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
+	       : CODE_FOR_iwmmxt_tinsrw);
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+	{
+	  /* @@@ better error message */
+	  error ("selector must be an immediate");
+	  return const0_rtx;
+	}
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_SETWCX:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = force_reg (SImode, expand_normal (arg0));
+      op1 = expand_normal (arg1);
+      emit_insn (gen_iwmmxt_tmcr (op1, op0));
+      return 0;
+
+    case ARM_BUILTIN_GETWCX:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      target = gen_reg_rtx (SImode);
+      emit_insn (gen_iwmmxt_tmrc (target, op0));
+      return target;
+
+    case ARM_BUILTIN_WSHUFH:
+      icode = CODE_FOR_iwmmxt_wshufh;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+	op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+	{
+	  /* @@@ better error message */
+	  error ("mask must be an immediate");
+	  return const0_rtx;
+	}
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WSADB:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
+    case ARM_BUILTIN_WSADH:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
+    case ARM_BUILTIN_WSADBZ:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
+    case ARM_BUILTIN_WSADHZ:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
+
+      /* Several three-argument builtins.  */
+    case ARM_BUILTIN_WMACS:
+    case ARM_BUILTIN_WMACU:
+    case ARM_BUILTIN_WALIGN:
+    case ARM_BUILTIN_TMIA:
+    case ARM_BUILTIN_TMIAPH:
+    case ARM_BUILTIN_TMIATT:
+    case ARM_BUILTIN_TMIATB:
+    case ARM_BUILTIN_TMIABT:
+    case ARM_BUILTIN_TMIABB:
+      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
+	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
+	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
+	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
+	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
+	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
+	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
+	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
+	       : CODE_FOR_iwmmxt_walign);
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+	op2 = copy_to_mode_reg (mode2, op2);
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WZERO:
+      target = gen_reg_rtx (DImode);
+      emit_insn (gen_iwmmxt_clrdi (target));
+      return target;
+
+    case ARM_BUILTIN_THREAD_POINTER:
+      return arm_load_tp (target);
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_binop_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_unop_builtin (d->icode, exp, target, 0);
+
+  /* @@@ Should really do something sensible here.  */
+  return NULL_RTX;
+}
+
+/* Return the number (counting from 0) of
+   the least significant set bit in MASK.  */
+
+inline static int
+number_of_first_bit_set (unsigned mask)
+{
+  int bit;
+
+  for (bit = 0;
+       (mask & (1 << bit)) == 0;
+       ++bit)
+    continue;
+
+  return bit;
+}
+
+/* Emit code to push or pop registers to or from the stack.  F is the
+   assembly file.  MASK is the registers to push or pop.  PUSH is
+   nonzero if we should push, and zero if we should pop.  For debugging
+   output, if pushing, adjust CFA_OFFSET by the amount of space added
+   to the stack.  REAL_REGS should have the same number of bits set as
+   MASK, and will be used instead (in the same order) to describe which
+   registers were saved - this is used to mark the save slots when we
+   push high registers after moving them to low registers.  */
+static void
+thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
+	       unsigned long real_regs)
+{
+  int regno;
+  int lo_mask = mask & 0xFF;
+  int pushed_words = 0;
+
+  gcc_assert (mask);
+
+  if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
+    {
+      /* Special case.  Do not generate a POP PC statement here, do it in
+	 thumb_exit() */
+      thumb_exit (f, -1);
+      return;
+    }
+
+  if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
+    {
+      fprintf (f, "\t.save\t{");
+      for (regno = 0; regno < 15; regno++)
+	{
+	  if (real_regs & (1 << regno))
+	    {
+	      if (real_regs & ((1 << regno) -1))
+		fprintf (f, ", ");
+	      asm_fprintf (f, "%r", regno);
+	    }
+	}
+      fprintf (f, "}\n");
+    }
+
+  fprintf (f, "\t%s\t{", push ? "push" : "pop");
+
+  /* Look at the low registers first.  */
+  for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
+    {
+      if (lo_mask & 1)
+	{
+	  asm_fprintf (f, "%r", regno);
+
+	  if ((lo_mask & ~1) != 0)
+	    fprintf (f, ", ");
+
+	  pushed_words++;
+	}
+    }
+
+  if (push && (mask & (1 << LR_REGNUM)))
+    {
+      /* Catch pushing the LR.  */
+      if (mask & 0xFF)
+	fprintf (f, ", ");
+
+      asm_fprintf (f, "%r", LR_REGNUM);
+
+      pushed_words++;
+    }
+  else if (!push && (mask & (1 << PC_REGNUM)))
+    {
+      /* Catch popping the PC.  */
+      if (TARGET_INTERWORK || TARGET_BACKTRACE
+	  || crtl->calls_eh_return)
+	{
+	  /* The PC is never poped directly, instead
+	     it is popped into r3 and then BX is used.  */
+	  fprintf (f, "}\n");
+
+	  thumb_exit (f, -1);
+
+	  return;
+	}
+      else
+	{
+	  if (mask & 0xFF)
+	    fprintf (f, ", ");
+
+	  asm_fprintf (f, "%r", PC_REGNUM);
+	}
+    }
+
+  fprintf (f, "}\n");
+
+  if (push && pushed_words && dwarf2out_do_frame ())
+    {
+      char *l = dwarf2out_cfi_label (false);
+      int pushed_mask = real_regs;
+
+      *cfa_offset += pushed_words * 4;
+      dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
+
+      pushed_words = 0;
+      pushed_mask = real_regs;
+      for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
+	{
+	  if (pushed_mask & 1)
+	    dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
+	}
+    }
+}
+
+/* Generate code to return from a thumb function.
+   If 'reg_containing_return_addr' is -1, then the return address is
+   actually on the stack, at the stack pointer.  */
+static void
+thumb_exit (FILE *f, int reg_containing_return_addr)
+{
+  unsigned regs_available_for_popping;
+  unsigned regs_to_pop;
+  int pops_needed;
+  unsigned available;
+  unsigned required;
+  int mode;
+  int size;
+  int restore_a4 = FALSE;
+
+  /* Compute the registers we need to pop.  */
+  regs_to_pop = 0;
+  pops_needed = 0;
+
+  if (reg_containing_return_addr == -1)
+    {
+      regs_to_pop |= 1 << LR_REGNUM;
+      ++pops_needed;
+    }
+
+  if (TARGET_BACKTRACE)
+    {
+      /* Restore the (ARM) frame pointer and stack pointer.  */
+      regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
+      pops_needed += 2;
+    }
+
+  /* If there is nothing to pop then just emit the BX instruction and
+     return.  */
+  if (pops_needed == 0)
+    {
+      if (crtl->calls_eh_return)
+	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
+
+      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
+      return;
+    }
+  /* Otherwise if we are not supporting interworking and we have not created
+     a backtrace structure and the function was not entered in ARM mode then
+     just pop the return address straight into the PC.  */
+  else if (!TARGET_INTERWORK
+	   && !TARGET_BACKTRACE
+	   && !is_called_in_ARM_mode (current_function_decl)
+	   && !crtl->calls_eh_return)
+    {
+      asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
+      return;
+    }
+
+  /* Find out how many of the (return) argument registers we can corrupt.  */
+  regs_available_for_popping = 0;
+
+  /* If returning via __builtin_eh_return, the bottom three registers
+     all contain information needed for the return.  */
+  if (crtl->calls_eh_return)
+    size = 12;
+  else
+    {
+      /* If we can deduce the registers used from the function's
+	 return value.  This is more reliable that examining
+	 df_regs_ever_live_p () because that will be set if the register is
+	 ever used in the function, not just if the register is used
+	 to hold a return value.  */
+
+      if (crtl->return_rtx != 0)
+	mode = GET_MODE (crtl->return_rtx);
+      else
+	mode = DECL_MODE (DECL_RESULT (current_function_decl));
+
+      size = GET_MODE_SIZE (mode);
+
+      if (size == 0)
+	{
+	  /* In a void function we can use any argument register.
+	     In a function that returns a structure on the stack
+	     we can use the second and third argument registers.  */
+	  if (mode == VOIDmode)
+	    regs_available_for_popping =
+	      (1 << ARG_REGISTER (1))
+	      | (1 << ARG_REGISTER (2))
+	      | (1 << ARG_REGISTER (3));
+	  else
+	    regs_available_for_popping =
+	      (1 << ARG_REGISTER (2))
+	      | (1 << ARG_REGISTER (3));
+	}
+      else if (size <= 4)
+	regs_available_for_popping =
+	  (1 << ARG_REGISTER (2))
+	  | (1 << ARG_REGISTER (3));
+      else if (size <= 8)
+	regs_available_for_popping =
+	  (1 << ARG_REGISTER (3));
+    }
+
+  /* Match registers to be popped with registers into which we pop them.  */
+  for (available = regs_available_for_popping,
+       required  = regs_to_pop;
+       required != 0 && available != 0;
+       available &= ~(available & - available),
+       required  &= ~(required  & - required))
+    -- pops_needed;
+
+  /* If we have any popping registers left over, remove them.  */
+  if (available > 0)
+    regs_available_for_popping &= ~available;
+
+  /* Otherwise if we need another popping register we can use
+     the fourth argument register.  */
+  else if (pops_needed)
+    {
+      /* If we have not found any free argument registers and
+	 reg a4 contains the return address, we must move it.  */
+      if (regs_available_for_popping == 0
+	  && reg_containing_return_addr == LAST_ARG_REGNUM)
+	{
+	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
+	  reg_containing_return_addr = LR_REGNUM;
+	}
+      else if (size > 12)
+	{
+	  /* Register a4 is being used to hold part of the return value,
+	     but we have dire need of a free, low register.  */
+	  restore_a4 = TRUE;
+
+	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
+	}
+
+      if (reg_containing_return_addr != LAST_ARG_REGNUM)
+	{
+	  /* The fourth argument register is available.  */
+	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
+
+	  --pops_needed;
+	}
+    }
+
+  /* Pop as many registers as we can.  */
+  thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
+		 regs_available_for_popping);
+
+  /* Process the registers we popped.  */
+  if (reg_containing_return_addr == -1)
+    {
+      /* The return address was popped into the lowest numbered register.  */
+      regs_to_pop &= ~(1 << LR_REGNUM);
+
+      reg_containing_return_addr =
+	number_of_first_bit_set (regs_available_for_popping);
+
+      /* Remove this register for the mask of available registers, so that
+         the return address will not be corrupted by further pops.  */
+      regs_available_for_popping &= ~(1 << reg_containing_return_addr);
+    }
+
+  /* If we popped other registers then handle them here.  */
+  if (regs_available_for_popping)
+    {
+      int frame_pointer;
+
+      /* Work out which register currently contains the frame pointer.  */
+      frame_pointer = number_of_first_bit_set (regs_available_for_popping);
+
+      /* Move it into the correct place.  */
+      asm_fprintf (f, "\tmov\t%r, %r\n",
+		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
+
+      /* (Temporarily) remove it from the mask of popped registers.  */
+      regs_available_for_popping &= ~(1 << frame_pointer);
+      regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
+
+      if (regs_available_for_popping)
+	{
+	  int stack_pointer;
+
+	  /* We popped the stack pointer as well,
+	     find the register that contains it.  */
+	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
+
+	  /* Move it into the stack register.  */
+	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
+
+	  /* At this point we have popped all necessary registers, so
+	     do not worry about restoring regs_available_for_popping
+	     to its correct value:
+
+	     assert (pops_needed == 0)
+	     assert (regs_available_for_popping == (1 << frame_pointer))
+	     assert (regs_to_pop == (1 << STACK_POINTER))  */
+	}
+      else
+	{
+	  /* Since we have just move the popped value into the frame
+	     pointer, the popping register is available for reuse, and
+	     we know that we still have the stack pointer left to pop.  */
+	  regs_available_for_popping |= (1 << frame_pointer);
+	}
+    }
+
+  /* If we still have registers left on the stack, but we no longer have
+     any registers into which we can pop them, then we must move the return
+     address into the link register and make available the register that
+     contained it.  */
+  if (regs_available_for_popping == 0 && pops_needed > 0)
+    {
+      regs_available_for_popping |= 1 << reg_containing_return_addr;
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
+		   reg_containing_return_addr);
+
+      reg_containing_return_addr = LR_REGNUM;
+    }
+
+  /* If we have registers left on the stack then pop some more.
+     We know that at most we will want to pop FP and SP.  */
+  if (pops_needed > 0)
+    {
+      int  popped_into;
+      int  move_to;
+
+      thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
+		     regs_available_for_popping);
+
+      /* We have popped either FP or SP.
+	 Move whichever one it is into the correct register.  */
+      popped_into = number_of_first_bit_set (regs_available_for_popping);
+      move_to     = number_of_first_bit_set (regs_to_pop);
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
+
+      regs_to_pop &= ~(1 << move_to);
+
+      --pops_needed;
+    }
+
+  /* If we still have not popped everything then we must have only
+     had one register available to us and we are now popping the SP.  */
+  if (pops_needed > 0)
+    {
+      int  popped_into;
+
+      thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
+		     regs_available_for_popping);
+
+      popped_into = number_of_first_bit_set (regs_available_for_popping);
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
+      /*
+	assert (regs_to_pop == (1 << STACK_POINTER))
+	assert (pops_needed == 1)
+      */
+    }
+
+  /* If necessary restore the a4 register.  */
+  if (restore_a4)
+    {
+      if (reg_containing_return_addr != LR_REGNUM)
+	{
+	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
+	  reg_containing_return_addr = LR_REGNUM;
+	}
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
+    }
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
+
+  /* Return to caller.  */
+  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
+}
+
+/* Scan INSN just before assembler is output for it.
+   For Thumb-1, we track the status of the condition codes; this
+   information is used in the cbranchsi4_insn pattern.  */
+void
+thumb1_final_prescan_insn (rtx insn)
+{
+  if (flag_print_asm_name)
+    asm_fprintf (asm_out_file, "%@ 0x%04x\n",
+		 INSN_ADDRESSES (INSN_UID (insn)));
+  /* Don't overwrite the previous setter when we get to a cbranch.  */
+  if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+    {
+      enum attr_conds conds;
+
+      if (cfun->machine->thumb1_cc_insn)
+	{
+	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
+	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
+	    CC_STATUS_INIT;
+	}
+      conds = get_attr_conds (insn);
+      if (conds == CONDS_SET)
+	{
+	  rtx set = single_set (insn);
+	  cfun->machine->thumb1_cc_insn = insn;
+	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
+	  cfun->machine->thumb1_cc_op1 = const0_rtx;
+	  cfun->machine->thumb1_cc_mode = CC_NOOVmode;
+	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
+	    {
+	      rtx src1 = XEXP (SET_SRC (set), 1);
+	      if (src1 == const0_rtx)
+		cfun->machine->thumb1_cc_mode = CCmode;
+	    }
+	}
+      else if (conds != CONDS_NOCOND)
+	cfun->machine->thumb1_cc_insn = NULL_RTX;
+    }
+}
+
+int
+thumb_shiftable_const (unsigned HOST_WIDE_INT val)
+{
+  unsigned HOST_WIDE_INT mask = 0xff;
+  int i;
+
+  val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
+  if (val == 0) /* XXX */
+    return 0;
+
+  for (i = 0; i < 25; i++)
+    if ((val & (mask << i)) == val)
+      return 1;
+
+  return 0;
+}
+
+/* Returns nonzero if the current function contains,
+   or might contain a far jump.  */
+static int
+thumb_far_jump_used_p (void)
+{
+  rtx insn;
+
+  /* This test is only important for leaf functions.  */
+  /* assert (!leaf_function_p ()); */
+
+  /* If we have already decided that far jumps may be used,
+     do not bother checking again, and always return true even if
+     it turns out that they are not being used.  Once we have made
+     the decision that far jumps are present (and that hence the link
+     register will be pushed onto the stack) we cannot go back on it.  */
+  if (cfun->machine->far_jump_used)
+    return 1;
+
+  /* If this function is not being called from the prologue/epilogue
+     generation code then it must be being called from the
+     INITIAL_ELIMINATION_OFFSET macro.  */
+  if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
+    {
+      /* In this case we know that we are being asked about the elimination
+	 of the arg pointer register.  If that register is not being used,
+	 then there are no arguments on the stack, and we do not have to
+	 worry that a far jump might force the prologue to push the link
+	 register, changing the stack offsets.  In this case we can just
+	 return false, since the presence of far jumps in the function will
+	 not affect stack offsets.
+
+	 If the arg pointer is live (or if it was live, but has now been
+	 eliminated and so set to dead) then we do have to test to see if
+	 the function might contain a far jump.  This test can lead to some
+	 false negatives, since before reload is completed, then length of
+	 branch instructions is not known, so gcc defaults to returning their
+	 longest length, which in turn sets the far jump attribute to true.
+
+	 A false negative will not result in bad code being generated, but it
+	 will result in a needless push and pop of the link register.  We
+	 hope that this does not occur too often.
+
+	 If we need doubleword stack alignment this could affect the other
+	 elimination offsets so we can't risk getting it wrong.  */
+      if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
+	cfun->machine->arg_pointer_live = 1;
+      else if (!cfun->machine->arg_pointer_live)
+	return 0;
+    }
+
+  /* Check to see if the function contains a branch
+     insn with the far jump attribute set.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == JUMP_INSN
+	  /* Ignore tablejump patterns.  */
+	  && GET_CODE (PATTERN (insn)) != ADDR_VEC
+	  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+	  && get_attr_far_jump (insn) == FAR_JUMP_YES
+	  )
+	{
+	  /* Record the fact that we have decided that
+	     the function does use far jumps.  */
+	  cfun->machine->far_jump_used = 1;
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
+/* Return nonzero if FUNC must be entered in ARM mode.  */
+int
+is_called_in_ARM_mode (tree func)
+{
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+
+  /* Ignore the problem about functions whose address is taken.  */
+  if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
+    return TRUE;
+
+#ifdef ARM_PE
+  return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
+#else
+  return FALSE;
+#endif
+}
+
+/* Given the stack offsets and register mask in OFFSETS, decide how
+   many additional registers to push instead of subtracting a constant
+   from SP.  For epilogues the principle is the same except we use pop.
+   FOR_PROLOGUE indicates which we're generating.  */
+static int
+thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
+{
+  HOST_WIDE_INT amount;
+  unsigned long live_regs_mask = offsets->saved_regs_mask;
+  /* Extract a mask of the ones we can give to the Thumb's push/pop
+     instruction.  */
+  unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
+  /* Then count how many other high registers will need to be pushed.  */
+  unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+  int n_free, reg_base, size;
+
+  if (!for_prologue && frame_pointer_needed)
+    amount = offsets->locals_base - offsets->saved_regs;
+  else
+    amount = offsets->outgoing_args - offsets->saved_regs;
+
+  /* If the stack frame size is 512 exactly, we can save one load
+     instruction, which should make this a win even when optimizing
+     for speed.  */
+  if (!optimize_size && amount != 512)
+    return 0;
+
+  /* Can't do this if there are high registers to push.  */
+  if (high_regs_pushed != 0)
+    return 0;
+
+  /* Shouldn't do it in the prologue if no registers would normally
+     be pushed at all.  In the epilogue, also allow it if we'll have
+     a pop insn for the PC.  */
+  if  (l_mask == 0
+       && (for_prologue
+	   || TARGET_BACKTRACE
+	   || (live_regs_mask & 1 << LR_REGNUM) == 0
+	   || TARGET_INTERWORK
+	   || crtl->args.pretend_args_size != 0))
+    return 0;
+
+  /* Don't do this if thumb_expand_prologue wants to emit instructions
+     between the push and the stack frame allocation.  */
+  if (for_prologue
+      && ((flag_pic && arm_pic_register != INVALID_REGNUM)
+	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
+    return 0;
+
+  reg_base = 0;
+  n_free = 0;
+  if (!for_prologue)
+    {
+      size = arm_size_return_regs ();
+      reg_base = ARM_NUM_INTS (size);
+      live_regs_mask >>= reg_base;
+    }
+
+  while (reg_base + n_free < 8 && !(live_regs_mask & 1)
+	 && (for_prologue || call_used_regs[reg_base + n_free]))
+    {
+      live_regs_mask >>= 1;
+      n_free++;
+    }
+
+  if (n_free == 0)
+    return 0;
+  gcc_assert (amount / 4 * 4 == amount);
+
+  if (amount >= 512 && (amount - n_free * 4) < 512)
+    return (amount - 508) / 4;
+  if (amount <= n_free * 4)
+    return amount / 4;
+  return 0;
+}
+
+/* The bits which aren't usefully expanded as rtl.  */
+const char *
+thumb_unexpanded_epilogue (void)
+{
+  arm_stack_offsets *offsets;
+  int regno;
+  unsigned long live_regs_mask = 0;
+  int high_regs_pushed = 0;
+  int extra_pop;
+  int had_to_push_lr;
+  int size;
+
+  if (cfun->machine->return_used_this_function != 0)
+    return "";
+
+  if (IS_NAKED (arm_current_func_type ()))
+    return "";
+
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+
+  /* If we can deduce the registers used from the function's return value.
+     This is more reliable that examining df_regs_ever_live_p () because that
+     will be set if the register is ever used in the function, not just if
+     the register is used to hold a return value.  */
+  size = arm_size_return_regs ();
+
+  extra_pop = thumb1_extra_regs_pushed (offsets, false);
+  if (extra_pop > 0)
+    {
+      unsigned long extra_mask = (1 << extra_pop) - 1;
+      live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
+    }
+
+  /* The prolog may have pushed some high registers to use as
+     work registers.  e.g. the testsuite file:
+     gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
+     compiles to produce:
+	push	{r4, r5, r6, r7, lr}
+	mov	r7, r9
+	mov	r6, r8
+	push	{r6, r7}
+     as part of the prolog.  We have to undo that pushing here.  */
+
+  if (high_regs_pushed)
+    {
+      unsigned long mask = live_regs_mask & 0xff;
+      int next_hi_reg;
+
+      /* The available low registers depend on the size of the value we are
+         returning.  */
+      if (size <= 12)
+	mask |=  1 << 3;
+      if (size <= 8)
+	mask |= 1 << 2;
+
+      if (mask == 0)
+	/* Oh dear!  We have no low registers into which we can pop
+           high registers!  */
+	internal_error
+	  ("no low registers available for popping high registers");
+
+      for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
+	if (live_regs_mask & (1 << next_hi_reg))
+	  break;
+
+      while (high_regs_pushed)
+	{
+	  /* Find lo register(s) into which the high register(s) can
+             be popped.  */
+	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
+	    {
+	      if (mask & (1 << regno))
+		high_regs_pushed--;
+	      if (high_regs_pushed == 0)
+		break;
+	    }
+
+	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
+
+	  /* Pop the values into the low register(s).  */
+	  thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
+
+	  /* Move the value(s) into the high registers.  */
+	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
+	    {
+	      if (mask & (1 << regno))
+		{
+		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
+			       regno);
+
+		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
+		    if (live_regs_mask & (1 << next_hi_reg))
+		      break;
+		}
+	    }
+	}
+      live_regs_mask &= ~0x0f00;
+    }
+
+  had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
+  live_regs_mask &= 0xff;
+
+  if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
+    {
+      /* Pop the return address into the PC.  */
+      if (had_to_push_lr)
+	live_regs_mask |= 1 << PC_REGNUM;
+
+      /* Either no argument registers were pushed or a backtrace
+	 structure was created which includes an adjusted stack
+	 pointer, so just pop everything.  */
+      if (live_regs_mask)
+	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
+		       live_regs_mask);
+
+      /* We have either just popped the return address into the
+	 PC or it is was kept in LR for the entire function.
+	 Note that thumb_pushpop has already called thumb_exit if the
+	 PC was in the list.  */
+      if (!had_to_push_lr)
+	thumb_exit (asm_out_file, LR_REGNUM);
+    }
+  else
+    {
+      /* Pop everything but the return address.  */
+      if (live_regs_mask)
+	thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
+		       live_regs_mask);
+
+      if (had_to_push_lr)
+	{
+	  if (size > 12)
+	    {
+	      /* We have no free low regs, so save one.  */
+	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
+			   LAST_ARG_REGNUM);
+	    }
+
+	  /* Get the return address into a temporary register.  */
+	  thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
+			 1 << LAST_ARG_REGNUM);
+
+	  if (size > 12)
+	    {
+	      /* Move the return address to lr.  */
+	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
+			   LAST_ARG_REGNUM);
+	      /* Restore the low register.  */
+	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
+			   IP_REGNUM);
+	      regno = LR_REGNUM;
+	    }
+	  else
+	    regno = LAST_ARG_REGNUM;
+	}
+      else
+	regno = LR_REGNUM;
+
+      /* Remove the argument registers that were pushed onto the stack.  */
+      asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
+		   SP_REGNUM, SP_REGNUM,
+		   crtl->args.pretend_args_size);
+
+      thumb_exit (asm_out_file, regno);
+    }
+
+  return "";
+}
+
+/* Functions to save and restore machine-specific function data.  */
+static struct machine_function *
+arm_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine = ggc_alloc_cleared_machine_function ();
+
+#if ARM_FT_UNKNOWN != 0
+  machine->func_type = ARM_FT_UNKNOWN;
+#endif
+  return machine;
+}
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+rtx
+arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL_RTX;
+
+  return get_hard_reg_initial_val (Pmode, LR_REGNUM);
+}
+
+/* Do anything needed before RTL is emitted for each function.  */
+void
+arm_init_expanders (void)
+{
+  /* Arrange to initialize and mark the machine per-function status.  */
+  init_machine_status = arm_init_machine_status;
+
+  /* This is to stop the combine pass optimizing away the alignment
+     adjustment of va_arg.  */
+  /* ??? It is claimed that this should not be necessary.  */
+  if (cfun)
+    mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
+}
+
+
+/* Like arm_compute_initial_elimination offset.  Simpler because there
+   isn't an ABI specified frame pointer for Thumb.  Instead, we set it
+   to point at the base of the local variables after static stack
+   space for a function has been allocated.  */
+
+HOST_WIDE_INT
+thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
+{
+  arm_stack_offsets *offsets;
+
+  offsets = arm_get_frame_offsets ();
+
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      switch (to)
+	{
+	case STACK_POINTER_REGNUM:
+	  return offsets->outgoing_args - offsets->saved_args;
+
+	case FRAME_POINTER_REGNUM:
+	  return offsets->soft_frame - offsets->saved_args;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->saved_regs - offsets->saved_args;
+
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->locals_base - offsets->saved_args;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FRAME_POINTER_REGNUM:
+      switch (to)
+	{
+	case STACK_POINTER_REGNUM:
+	  return offsets->outgoing_args - offsets->soft_frame;
+
+	case ARM_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->saved_regs - offsets->soft_frame;
+
+	case THUMB_HARD_FRAME_POINTER_REGNUM:
+	  return offsets->locals_base - offsets->soft_frame;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Generate the rest of a function's prologue.  */
+void
+thumb1_expand_prologue (void)
+{
+  rtx insn, dwarf;
+
+  HOST_WIDE_INT amount;
+  arm_stack_offsets *offsets;
+  unsigned long func_type;
+  int regno;
+  unsigned long live_regs_mask;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have prologues.  */
+  if (IS_NAKED (func_type))
+    return;
+
+  if (IS_INTERRUPT (func_type))
+    {
+      error ("interrupt Service Routines cannot be coded in Thumb mode");
+      return;
+    }
+
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+  /* Load the pic register before setting the frame pointer,
+     so we can use r7 as a temporary work register.  */
+  if (flag_pic && arm_pic_register != INVALID_REGNUM)
+    arm_load_pic_register (live_regs_mask);
+
+  if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
+    emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
+		    stack_pointer_rtx);
+
+  if (flag_stack_usage)
+    current_function_static_stack_size
+      = offsets->outgoing_args - offsets->saved_args;
+
+  amount = offsets->outgoing_args - offsets->saved_regs;
+  amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+  if (amount)
+    {
+      if (amount < 512)
+	{
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+					GEN_INT (- amount)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else
+	{
+	  rtx reg;
+
+	  /* The stack decrement is too big for an immediate value in a single
+	     insn.  In theory we could issue multiple subtracts, but after
+	     three of them it becomes more space efficient to place the full
+	     value in the constant pool and load into a register.  (Also the
+	     ARM debugger really likes to see only one stack decrement per
+	     function).  So instead we look for a scratch register into which
+	     we can load the decrement, and then we subtract this from the
+	     stack pointer.  Unfortunately on the thumb the only available
+	     scratch registers are the argument registers, and we cannot use
+	     these as they may hold arguments to the function.  Instead we
+	     attempt to locate a call preserved register which is used by this
+	     function.  If we can find one, then we know that it will have
+	     been pushed at the start of the prologue and so we can corrupt
+	     it now.  */
+	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
+	    if (live_regs_mask & (1 << regno))
+	      break;
+
+	  gcc_assert(regno <= LAST_LO_REGNUM);
+
+	  reg = gen_rtx_REG (SImode, regno);
+
+	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
+
+	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+					stack_pointer_rtx, reg));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			       plus_constant (stack_pointer_rtx,
+					      -amount));
+	  RTX_FRAME_RELATED_P (dwarf) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	}
+    }
+
+  if (frame_pointer_needed)
+    thumb_set_frame_pointer (offsets);
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  Similarly if the user has requested no
+     scheduling in the prolog.  Similarly if we want non-call exceptions
+     using the EABI unwinder, to prevent faulting instructions from being
+     swapped with a stack adjustment.  */
+  if (crtl->profile || !TARGET_SCHED_PROLOG
+      || (arm_except_unwind_info (&global_options) == UI_TARGET
+	  && cfun->can_throw_non_call_exceptions))
+    emit_insn (gen_blockage ());
+
+  cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
+  if (live_regs_mask & 0xff)
+    cfun->machine->lr_save_eliminated = 0;
+}
+
+
+void
+thumb1_expand_epilogue (void)
+{
+  HOST_WIDE_INT amount;
+  arm_stack_offsets *offsets;
+  int regno;
+
+  /* Naked functions don't have prologues.  */
+  if (IS_NAKED (arm_current_func_type ()))
+    return;
+
+  offsets = arm_get_frame_offsets ();
+  amount = offsets->outgoing_args - offsets->saved_regs;
+
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+      amount = offsets->locals_base - offsets->saved_regs;
+    }
+  amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
+
+  gcc_assert (amount >= 0);
+  if (amount)
+    {
+      emit_insn (gen_blockage ());
+
+      if (amount < 512)
+	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (amount)));
+      else
+	{
+	  /* r3 is always free in the epilogue.  */
+	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
+
+	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
+	}
+    }
+
+  /* Emit a USE (stack_pointer_rtx), so that
+     the stack adjustment will not be deleted.  */
+  emit_insn (gen_prologue_use (stack_pointer_rtx));
+
+  if (crtl->profile || !TARGET_SCHED_PROLOG)
+    emit_insn (gen_blockage ());
+
+  /* Emit a clobber for each insn that will be restored in the epilogue,
+     so that flow2 will get register lifetimes correct.  */
+  for (regno = 0; regno < 13; regno++)
+    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+      emit_clobber (gen_rtx_REG (SImode, regno));
+
+  if (! df_regs_ever_live_p (LR_REGNUM))
+    emit_use (gen_rtx_REG (SImode, LR_REGNUM));
+}
+
+static void
+thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  arm_stack_offsets *offsets;
+  unsigned long live_regs_mask = 0;
+  unsigned long l_mask;
+  unsigned high_regs_pushed = 0;
+  int cfa_offset = 0;
+  int regno;
+
+  if (IS_NAKED (arm_current_func_type ()))
+    return;
+
+  if (is_called_in_ARM_mode (current_function_decl))
+    {
+      const char * name;
+
+      gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
+      gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
+		  == SYMBOL_REF);
+      name = XSTR  (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+      /* Generate code sequence to switch us into Thumb mode.  */
+      /* The .code 32 directive has already been emitted by
+	 ASM_DECLARE_FUNCTION_NAME.  */
+      asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
+      asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
+
+      /* Generate a label, so that the debugger will notice the
+	 change in instruction sets.  This label is also used by
+	 the assembler to bypass the ARM code when this function
+	 is called from a Thumb encoded function elsewhere in the
+	 same file.  Hence the definition of STUB_NAME here must
+	 agree with the definition in gas/config/tc-arm.c.  */
+
+#define STUB_NAME ".real_start_of"
+
+      fprintf (f, "\t.code\t16\n");
+#ifdef ARM_PE
+      if (arm_dllexport_name_p (name))
+        name = arm_strip_name_encoding (name);
+#endif
+      asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
+      fprintf (f, "\t.thumb_func\n");
+      asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
+    }
+
+  if (crtl->args.pretend_args_size)
+    {
+      /* Output unwind directive for the stack adjustment.  */
+      if (arm_except_unwind_info (&global_options) == UI_TARGET)
+	fprintf (f, "\t.pad #%d\n",
+		 crtl->args.pretend_args_size);
+
+      if (cfun->machine->uses_anonymous_args)
+	{
+	  int num_pushes;
+
+	  fprintf (f, "\tpush\t{");
+
+	  num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
+
+	  for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
+	       regno <= LAST_ARG_REGNUM;
+	       regno++)
+	    asm_fprintf (f, "%r%s", regno,
+			 regno == LAST_ARG_REGNUM ? "" : ", ");
+
+	  fprintf (f, "}\n");
+	}
+      else
+	asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
+		     SP_REGNUM, SP_REGNUM,
+		     crtl->args.pretend_args_size);
+
+      /* We don't need to record the stores for unwinding (would it
+	 help the debugger any if we did?), but record the change in
+	 the stack pointer.  */
+      if (dwarf2out_do_frame ())
+	{
+	  char *l = dwarf2out_cfi_label (false);
+
+	  cfa_offset = cfa_offset + crtl->args.pretend_args_size;
+	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
+	}
+    }
+
+  /* Get the registers we are going to push.  */
+  offsets = arm_get_frame_offsets ();
+  live_regs_mask = offsets->saved_regs_mask;
+  /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
+  l_mask = live_regs_mask & 0x40ff;
+  /* Then count how many other high registers will need to be pushed.  */
+  high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
+
+  if (TARGET_BACKTRACE)
+    {
+      unsigned offset;
+      unsigned work_register;
+
+      /* We have been asked to create a stack backtrace structure.
+         The code looks like this:
+
+	 0   .align 2
+	 0   func:
+         0     sub   SP, #16         Reserve space for 4 registers.
+	 2     push  {R7}            Push low registers.
+         4     add   R7, SP, #20     Get the stack pointer before the push.
+         6     str   R7, [SP, #8]    Store the stack pointer (before reserving the space).
+         8     mov   R7, PC          Get hold of the start of this code plus 12.
+        10     str   R7, [SP, #16]   Store it.
+        12     mov   R7, FP          Get hold of the current frame pointer.
+        14     str   R7, [SP, #4]    Store it.
+        16     mov   R7, LR          Get hold of the current return address.
+        18     str   R7, [SP, #12]   Store it.
+        20     add   R7, SP, #16     Point at the start of the backtrace structure.
+        22     mov   FP, R7          Put this value into the frame pointer.  */
+
+      work_register = thumb_find_work_register (live_regs_mask);
+
+      if (arm_except_unwind_info (&global_options) == UI_TARGET)
+	asm_fprintf (f, "\t.pad #16\n");
+
+      asm_fprintf
+	(f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
+	 SP_REGNUM, SP_REGNUM);
+
+      if (dwarf2out_do_frame ())
+	{
+	  char *l = dwarf2out_cfi_label (false);
+
+	  cfa_offset = cfa_offset + 16;
+	  dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
+	}
+
+      if (l_mask)
+	{
+	  thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
+	  offset = bit_count (l_mask) * UNITS_PER_WORD;
+	}
+      else
+	offset = 0;
+
+      asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
+		   offset + 16 + crtl->args.pretend_args_size);
+
+      asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
+		   offset + 4);
+
+      /* Make sure that the instruction fetching the PC is in the right place
+	 to calculate "start of backtrace creation code + 12".  */
+      if (l_mask)
+	{
+	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
+	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
+		       offset + 12);
+	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
+		       ARM_HARD_FRAME_POINTER_REGNUM);
+	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
+		       offset);
+	}
+      else
+	{
+	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
+		       ARM_HARD_FRAME_POINTER_REGNUM);
+	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
+		       offset);
+	  asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
+	  asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
+		       offset + 12);
+	}
+
+      asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
+      asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
+		   offset + 8);
+      asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
+		   offset + 12);
+      asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
+		   ARM_HARD_FRAME_POINTER_REGNUM, work_register);
+    }
+  /* Optimization:  If we are not pushing any low registers but we are going
+     to push some high registers then delay our first push.  This will just
+     be a push of LR and we can combine it with the push of the first high
+     register.  */
+  else if ((l_mask & 0xff) != 0
+	   || (high_regs_pushed == 0 && l_mask))
+    {
+      unsigned long mask = l_mask;
+      mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
+      thumb_pushpop (f, mask, 1, &cfa_offset, mask);
+    }
+
+  if (high_regs_pushed)
+    {
+      unsigned pushable_regs;
+      unsigned next_hi_reg;
+
+      for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
+	if (live_regs_mask & (1 << next_hi_reg))
+	  break;
+
+      pushable_regs = l_mask & 0xff;
+
+      if (pushable_regs == 0)
+	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
+
+      while (high_regs_pushed > 0)
+	{
+	  unsigned long real_regs_mask = 0;
+
+	  for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
+	    {
+	      if (pushable_regs & (1 << regno))
+		{
+		  asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
+
+		  high_regs_pushed --;
+		  real_regs_mask |= (1 << next_hi_reg);
+
+		  if (high_regs_pushed)
+		    {
+		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
+			   next_hi_reg --)
+			if (live_regs_mask & (1 << next_hi_reg))
+			  break;
+		    }
+		  else
+		    {
+		      pushable_regs &= ~((1 << regno) - 1);
+		      break;
+		    }
+		}
+	    }
+
+	  /* If we had to find a work register and we have not yet
+	     saved the LR then add it to the list of regs to push.  */
+	  if (l_mask == (1 << LR_REGNUM))
+	    {
+	      thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
+			     1, &cfa_offset,
+			     real_regs_mask | (1 << LR_REGNUM));
+	      l_mask = 0;
+	    }
+	  else
+	    thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
+	}
+    }
+}
+
+/* Handle the case of a double word load into a low register from
+   a computed memory address.  The computed address may involve a
+   register which is overwritten by the load.  */
+const char *
+thumb_load_double_from_address (rtx *operands)
+{
+  rtx addr;
+  rtx base;
+  rtx offset;
+  rtx arg1;
+  rtx arg2;
+
+  gcc_assert (GET_CODE (operands[0]) == REG);
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  /* Get the memory address.  */
+  addr = XEXP (operands[1], 0);
+
+  /* Work out how the memory address is computed.  */
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      operands[2] = adjust_address (operands[1], SImode, 4);
+
+      if (REGNO (operands[0]) == REGNO (addr))
+	{
+	  output_asm_insn ("ldr\t%H0, %2", operands);
+	  output_asm_insn ("ldr\t%0, %1", operands);
+	}
+      else
+	{
+	  output_asm_insn ("ldr\t%0, %1", operands);
+	  output_asm_insn ("ldr\t%H0, %2", operands);
+	}
+      break;
+
+    case CONST:
+      /* Compute <address> + 4 for the high order load.  */
+      operands[2] = adjust_address (operands[1], SImode, 4);
+
+      output_asm_insn ("ldr\t%0, %1", operands);
+      output_asm_insn ("ldr\t%H0, %2", operands);
+      break;
+
+    case PLUS:
+      arg1   = XEXP (addr, 0);
+      arg2   = XEXP (addr, 1);
+
+      if (CONSTANT_P (arg1))
+	base = arg2, offset = arg1;
+      else
+	base = arg1, offset = arg2;
+
+      gcc_assert (GET_CODE (base) == REG);
+
+      /* Catch the case of <address> = <reg> + <reg> */
+      if (GET_CODE (offset) == REG)
+	{
+	  int reg_offset = REGNO (offset);
+	  int reg_base   = REGNO (base);
+	  int reg_dest   = REGNO (operands[0]);
+
+	  /* Add the base and offset registers together into the
+             higher destination register.  */
+	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
+		       reg_dest + 1, reg_base, reg_offset);
+
+	  /* Load the lower destination register from the address in
+             the higher destination register.  */
+	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
+		       reg_dest, reg_dest + 1);
+
+	  /* Load the higher destination register from its own address
+             plus 4.  */
+	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
+		       reg_dest + 1, reg_dest + 1);
+	}
+      else
+	{
+	  /* Compute <address> + 4 for the high order load.  */
+	  operands[2] = adjust_address (operands[1], SImode, 4);
+
+	  /* If the computed address is held in the low order register
+	     then load the high order register first, otherwise always
+	     load the low order register first.  */
+	  if (REGNO (operands[0]) == REGNO (base))
+	    {
+	      output_asm_insn ("ldr\t%H0, %2", operands);
+	      output_asm_insn ("ldr\t%0, %1", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ldr\t%0, %1", operands);
+	      output_asm_insn ("ldr\t%H0, %2", operands);
+	    }
+	}
+      break;
+
+    case LABEL_REF:
+      /* With no registers to worry about we can just load the value
+         directly.  */
+      operands[2] = adjust_address (operands[1], SImode, 4);
+
+      output_asm_insn ("ldr\t%H0, %2", operands);
+      output_asm_insn ("ldr\t%0, %1", operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return "";
+}
+
+const char *
+thumb_output_move_mem_multiple (int n, rtx *operands)
+{
+  rtx tmp;
+
+  switch (n)
+    {
+    case 2:
+      if (REGNO (operands[4]) > REGNO (operands[5]))
+	{
+	  tmp = operands[4];
+	  operands[4] = operands[5];
+	  operands[5] = tmp;
+	}
+      output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
+      output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
+      break;
+
+    case 3:
+      if (REGNO (operands[4]) > REGNO (operands[5]))
+	{
+	  tmp = operands[4];
+	  operands[4] = operands[5];
+	  operands[5] = tmp;
+	}
+      if (REGNO (operands[5]) > REGNO (operands[6]))
+	{
+	  tmp = operands[5];
+	  operands[5] = operands[6];
+	  operands[6] = tmp;
+	}
+      if (REGNO (operands[4]) > REGNO (operands[5]))
+	{
+	  tmp = operands[4];
+	  operands[4] = operands[5];
+	  operands[5] = tmp;
+	}
+
+      output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
+      output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return "";
+}
+
+/* Output a call-via instruction for thumb state.  */
+const char *
+thumb_call_via_reg (rtx reg)
+{
+  int regno = REGNO (reg);
+  rtx *labelp;
+
+  gcc_assert (regno < LR_REGNUM);
+
+  /* If we are in the normal text section we can use a single instance
+     per compilation unit.  If we are doing function sections, then we need
+     an entry per section, since we can't rely on reachability.  */
+  if (in_section == text_section)
+    {
+      thumb_call_reg_needed = 1;
+
+      if (thumb_call_via_label[regno] == NULL)
+	thumb_call_via_label[regno] = gen_label_rtx ();
+      labelp = thumb_call_via_label + regno;
+    }
+  else
+    {
+      if (cfun->machine->call_via[regno] == NULL)
+	cfun->machine->call_via[regno] = gen_label_rtx ();
+      labelp = cfun->machine->call_via + regno;
+    }
+
+  output_asm_insn ("bl\t%a0", labelp);
+  return "";
+}
+
+/* Routines for generating rtl.  */
+void
+thumb_expand_movmemqi (rtx *operands)
+{
+  rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
+  rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
+  HOST_WIDE_INT len = INTVAL (operands[2]);
+  HOST_WIDE_INT offset = 0;
+
+  while (len >= 12)
+    {
+      emit_insn (gen_movmem12b (out, in, out, in));
+      len -= 12;
+    }
+
+  if (len >= 8)
+    {
+      emit_insn (gen_movmem8b (out, in, out, in));
+      len -= 8;
+    }
+
+  if (len >= 4)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
+      emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
+      len -= 4;
+      offset += 4;
+    }
+
+  if (len >= 2)
+    {
+      rtx reg = gen_reg_rtx (HImode);
+      emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
+					      plus_constant (in, offset))));
+      emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
+			    reg));
+      len -= 2;
+      offset += 2;
+    }
+
+  if (len)
+    {
+      rtx reg = gen_reg_rtx (QImode);
+      emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
+					      plus_constant (in, offset))));
+      emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
+			    reg));
+    }
+}
+
+void
+thumb_reload_out_hi (rtx *operands)
+{
+  emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
+}
+
+/* Handle reading a half-word from memory during reload.  */
+void
+thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+
+/* Return the length of a function name prefix
+    that starts with the character 'c'.  */
+static int
+arm_get_strip_length (int c)
+{
+  switch (c)
+    {
+    ARM_NAME_ENCODING_LENGTHS
+      default: return 0;
+    }
+}
+
+/* Return a pointer to a function's name with any
+   and all prefix encodings stripped from it.  */
+const char *
+arm_strip_name_encoding (const char *name)
+{
+  int skip;
+
+  while ((skip = arm_get_strip_length (* name)))
+    name += skip;
+
+  return name;
+}
+
+/* If there is a '*' anywhere in the name's prefix, then
+   emit the stripped name verbatim, otherwise prepend an
+   underscore if leading underscores are being used.  */
+void
+arm_asm_output_labelref (FILE *stream, const char *name)
+{
+  int skip;
+  int verbatim = 0;
+
+  while ((skip = arm_get_strip_length (* name)))
+    {
+      verbatim |= (*name == '*');
+      name += skip;
+    }
+
+  if (verbatim)
+    fputs (name, stream);
+  else
+    asm_fprintf (stream, "%U%s", name);
+}
+
+static void
+arm_file_start (void)
+{
+  int val;
+
+  if (TARGET_UNIFIED_ASM)
+    asm_fprintf (asm_out_file, "\t.syntax unified\n");
+
+  if (TARGET_BPABI)
+    {
+      const char *fpu_name;
+      if (arm_selected_arch)
+	asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
+      else
+	asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
+
+      if (TARGET_SOFT_FLOAT)
+	{
+	  if (TARGET_VFP)
+	    fpu_name = "softvfp";
+	  else
+	    fpu_name = "softfpa";
+	}
+      else
+	{
+	  fpu_name = arm_fpu_desc->name;
+	  if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
+	    {
+	      if (TARGET_HARD_FLOAT)
+		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
+	      if (TARGET_HARD_FLOAT_ABI)
+		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
+	    }
+	}
+      asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
+
+      /* Some of these attributes only apply when the corresponding features
+         are used.  However we don't have any easy way of figuring this out.
+	 Conservatively record the setting that would have been used.  */
+
+      /* Tag_ABI_FP_rounding.  */
+      if (flag_rounding_math)
+	asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
+      if (!flag_unsafe_math_optimizations)
+	{
+	  /* Tag_ABI_FP_denomal.  */
+	  asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
+	  /* Tag_ABI_FP_exceptions.  */
+	  asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
+	}
+      /* Tag_ABI_FP_user_exceptions.  */
+      if (flag_signaling_nans)
+	asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
+      /* Tag_ABI_FP_number_model.  */
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n", 
+		   flag_finite_math_only ? 1 : 3);
+
+      /* Tag_ABI_align8_needed.  */
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
+      /* Tag_ABI_align8_preserved.  */
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
+      /* Tag_ABI_enum_size.  */
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
+		   flag_short_enums ? 1 : 2);
+
+      /* Tag_ABI_optimization_goals.  */
+      if (optimize_size)
+	val = 4;
+      else if (optimize >= 2)
+	val = 2;
+      else if (optimize)
+	val = 1;
+      else
+	val = 6;
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
+
+      /* Tag_ABI_FP_16bit_format.  */
+      if (arm_fp16_format)
+	asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
+		     (int)arm_fp16_format);
+
+      if (arm_lang_output_object_attributes_hook)
+	arm_lang_output_object_attributes_hook();
+    }
+  default_file_start();
+}
+
+static void
+arm_file_end (void)
+{
+  int regno;
+
+  if (NEED_INDICATE_EXEC_STACK)
+    /* Add .note.GNU-stack.  */
+    file_end_indicate_exec_stack ();
+
+  if (! thumb_call_reg_needed)
+    return;
+
+  switch_to_section (text_section);
+  asm_fprintf (asm_out_file, "\t.code 16\n");
+  ASM_OUTPUT_ALIGN (asm_out_file, 1);
+
+  for (regno = 0; regno < LR_REGNUM; regno++)
+    {
+      rtx label = thumb_call_via_label[regno];
+
+      if (label != 0)
+	{
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (label));
+	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
+	}
+    }
+}
+
+#ifndef ARM_PE
+/* Symbols in the text segment can be accessed without indirecting via the
+   constant pool; it may take an extra binary operation, but this is still
+   faster than indirecting via memory.  Don't do this when not optimizing,
+   since we won't be calculating al of the offsets necessary to do this
+   simplification.  */
+
+static void
+arm_encode_section_info (tree decl, rtx rtl, int first)
+{
+  if (optimize > 0 && TREE_CONSTANT (decl))
+    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+
+  default_encode_section_info (decl, rtl, first);
+}
+#endif /* !ARM_PE */
+
+static void
+arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
+{
+  if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
+      && !strcmp (prefix, "L"))
+    {
+      arm_ccfsm_state = 0;
+      arm_target_insn = NULL;
+    }
+  default_internal_label (stream, prefix, labelno);
+}
+
+/* Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+static void
+arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+		     tree function)
+{
+  static int thunk_label = 0;
+  char label[256];
+  char labelpc[256];
+  int mi_delta = delta;
+  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
+  int shift = 0;
+  int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
+                    ? 1 : 0);
+  if (mi_delta < 0)
+    mi_delta = - mi_delta;
+
+  if (TARGET_THUMB1)
+    {
+      int labelno = thunk_label++;
+      ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
+      /* Thunks are entered in arm mode when avaiable.  */
+      if (TARGET_THUMB1_ONLY)
+	{
+	  /* push r3 so we can use it as a temporary.  */
+	  /* TODO: Omit this save if r3 is not used.  */
+	  fputs ("\tpush {r3}\n", file);
+	  fputs ("\tldr\tr3, ", file);
+	}
+      else
+	{
+	  fputs ("\tldr\tr12, ", file);
+	}
+      assemble_name (file, label);
+      fputc ('\n', file);
+      if (flag_pic)
+	{
+	  /* If we are generating PIC, the ldr instruction below loads
+	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
+	     the address of the add + 8, so we have:
+
+	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
+	         = target + 1.
+
+	     Note that we have "+ 1" because some versions of GNU ld
+	     don't set the low bit of the result for R_ARM_REL32
+	     relocations against thumb function symbols.
+	     On ARMv6M this is +4, not +8.  */
+	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
+	  assemble_name (file, labelpc);
+	  fputs (":\n", file);
+	  if (TARGET_THUMB1_ONLY)
+	    {
+	      /* This is 2 insns after the start of the thunk, so we know it
+	         is 4-byte aligned.  */
+	      fputs ("\tadd\tr3, pc, r3\n", file);
+	      fputs ("\tmov r12, r3\n", file);
+	    }
+	  else
+	    fputs ("\tadd\tr12, pc, r12\n", file);
+	}
+      else if (TARGET_THUMB1_ONLY)
+	fputs ("\tmov r12, r3\n", file);
+    }
+  if (TARGET_THUMB1_ONLY)
+    {
+      if (mi_delta > 255)
+	{
+	  fputs ("\tldr\tr3, ", file);
+	  assemble_name (file, label);
+	  fputs ("+4\n", file);
+	  asm_fprintf (file, "\t%s\t%r, %r, r3\n",
+		       mi_op, this_regno, this_regno);
+	}
+      else if (mi_delta != 0)
+	{
+	  asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+		       mi_op, this_regno, this_regno,
+		       mi_delta);
+	}
+    }
+  else
+    {
+      /* TODO: Use movw/movt for large constants when available.  */
+      while (mi_delta != 0)
+	{
+	  if ((mi_delta & (3 << shift)) == 0)
+	    shift += 2;
+	  else
+	    {
+	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
+			   mi_op, this_regno, this_regno,
+			   mi_delta & (0xff << shift));
+	      mi_delta &= ~(0xff << shift);
+	      shift += 8;
+	    }
+	}
+    }
+  if (TARGET_THUMB1)
+    {
+      if (TARGET_THUMB1_ONLY)
+	fputs ("\tpop\t{r3}\n", file);
+
+      fprintf (file, "\tbx\tr12\n");
+      ASM_OUTPUT_ALIGN (file, 2);
+      assemble_name (file, label);
+      fputs (":\n", file);
+      if (flag_pic)
+	{
+	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
+	  rtx tem = XEXP (DECL_RTL (function), 0);
+	  tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
+	  tem = gen_rtx_MINUS (GET_MODE (tem),
+			       tem,
+			       gen_rtx_SYMBOL_REF (Pmode,
+						   ggc_strdup (labelpc)));
+	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
+	}
+      else
+	/* Output ".word .LTHUNKn".  */
+	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
+
+      if (TARGET_THUMB1_ONLY && mi_delta > 255)
+	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
+    }
+  else
+    {
+      fputs ("\tb\t", file);
+      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+      if (NEED_PLT_RELOC)
+        fputs ("(PLT)", file);
+      fputc ('\n', file);
+    }
+}
+
+int
+arm_emit_vector_const (FILE *file, rtx x)
+{
+  int i;
+  const char * pattern;
+
+  gcc_assert (GET_CODE (x) == CONST_VECTOR);
+
+  switch (GET_MODE (x))
+    {
+    case V2SImode: pattern = "%08x"; break;
+    case V4HImode: pattern = "%04x"; break;
+    case V8QImode: pattern = "%02x"; break;
+    default:       gcc_unreachable ();
+    }
+
+  fprintf (file, "0x");
+  for (i = CONST_VECTOR_NUNITS (x); i--;)
+    {
+      rtx element;
+
+      element = CONST_VECTOR_ELT (x, i);
+      fprintf (file, pattern, INTVAL (element));
+    }
+
+  return 1;
+}
+
+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
+   HFmode constant pool entries are actually loaded with ldr.  */
+void
+arm_emit_fp16_const (rtx c)
+{
+  REAL_VALUE_TYPE r;
+  long bits;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+  bits = real_to_target (NULL, &r, HFmode);
+  if (WORDS_BIG_ENDIAN)
+    assemble_zeros (2);
+  assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
+  if (!WORDS_BIG_ENDIAN)
+    assemble_zeros (2);
+}
+
+const char *
+arm_output_load_gr (rtx *operands)
+{
+  rtx reg;
+  rtx offset;
+  rtx wcgr;
+  rtx sum;
+
+  if (GET_CODE (operands [1]) != MEM
+      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
+      || GET_CODE (reg = XEXP (sum, 0)) != REG
+      || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
+      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
+    return "wldrw%?\t%0, %1";
+
+  /* Fix up an out-of-range load of a GR register.  */
+  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
+  wcgr = operands[0];
+  operands[0] = reg;
+  output_asm_insn ("ldr%?\t%0, %1", operands);
+
+  operands[0] = wcgr;
+  operands[1] = reg;
+  output_asm_insn ("tmcr%?\t%0, %1", operands);
+  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
+
+  return "";
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.
+
+   On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
+   named arg and all anonymous args onto the stack.
+   XXX I know the prologue shouldn't be pushing registers, but it is faster
+   that way.  */
+
+static void
+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
+			    enum machine_mode mode,
+			    tree type,
+			    int *pretend_size,
+			    int second_time ATTRIBUTE_UNUSED)
+{
+  int nregs;
+  
+  cfun->machine->uses_anonymous_args = 1;
+  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = pcum->aapcs_ncrn;
+      if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
+	nregs++;
+    }
+  else
+    nregs = pcum->nregs;
+  
+  if (nregs < NUM_ARG_REGS)
+    *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
+}
+
+/* Return nonzero if the CONSUMER instruction (a store) does not need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_no_early_store_addr_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx addr = PATTERN (consumer);
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (addr) == COND_EXEC)
+    addr = COND_EXEC_CODE (addr);
+  if (GET_CODE (addr) == PARALLEL)
+    addr = XVECEXP (addr, 0, 0);
+  addr = XEXP (addr, 0);
+
+  return !reg_overlap_mentioned_p (value, addr);
+}
+
+/* Return nonzero if the CONSUMER instruction (a store) does need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_early_store_addr_dep (rtx producer, rtx consumer)
+{
+  return !arm_no_early_store_addr_dep (producer, consumer);
+}
+
+/* Return nonzero if the CONSUMER instruction (a load) does need
+   PRODUCER's value to calculate the address.  */
+
+int
+arm_early_load_addr_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx addr = PATTERN (consumer);
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (addr) == COND_EXEC)
+    addr = COND_EXEC_CODE (addr);
+  if (GET_CODE (addr) == PARALLEL)
+    addr = XVECEXP (addr, 0, 0);
+  addr = XEXP (addr, 1);
+
+  return reg_overlap_mentioned_p (value, addr);
+}
+
+/* Return nonzero if the CONSUMER instruction (an ALU op) does not
+   have an early register shift value or amount dependency on the
+   result of PRODUCER.  */
+
+int
+arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx op = PATTERN (consumer);
+  rtx early_op;
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (op) == COND_EXEC)
+    op = COND_EXEC_CODE (op);
+  if (GET_CODE (op) == PARALLEL)
+    op = XVECEXP (op, 0, 0);
+  op = XEXP (op, 1);
+
+  early_op = XEXP (op, 0);
+  /* This is either an actual independent shift, or a shift applied to
+     the first operand of another operation.  We want the whole shift
+     operation.  */
+  if (GET_CODE (early_op) == REG)
+    early_op = op;
+
+  return !reg_overlap_mentioned_p (value, early_op);
+}
+
+/* Return nonzero if the CONSUMER instruction (an ALU op) does not
+   have an early register shift value dependency on the result of
+   PRODUCER.  */
+
+int
+arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx op = PATTERN (consumer);
+  rtx early_op;
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (op) == COND_EXEC)
+    op = COND_EXEC_CODE (op);
+  if (GET_CODE (op) == PARALLEL)
+    op = XVECEXP (op, 0, 0);
+  op = XEXP (op, 1);
+
+  early_op = XEXP (op, 0);
+
+  /* This is either an actual independent shift, or a shift applied to
+     the first operand of another operation.  We want the value being
+     shifted, in either case.  */
+  if (GET_CODE (early_op) != REG)
+    early_op = XEXP (early_op, 0);
+
+  return !reg_overlap_mentioned_p (value, early_op);
+}
+
+/* Return nonzero if the CONSUMER (a mul or mac op) does not
+   have an early register mult dependency on the result of
+   PRODUCER.  */
+
+int
+arm_no_early_mul_dep (rtx producer, rtx consumer)
+{
+  rtx value = PATTERN (producer);
+  rtx op = PATTERN (consumer);
+
+  if (GET_CODE (value) == COND_EXEC)
+    value = COND_EXEC_CODE (value);
+  if (GET_CODE (value) == PARALLEL)
+    value = XVECEXP (value, 0, 0);
+  value = XEXP (value, 0);
+  if (GET_CODE (op) == COND_EXEC)
+    op = COND_EXEC_CODE (op);
+  if (GET_CODE (op) == PARALLEL)
+    op = XVECEXP (op, 0, 0);
+  op = XEXP (op, 1);
+
+  if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+    {
+      if (GET_CODE (XEXP (op, 0)) == MULT)
+	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
+      else
+	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
+    }
+
+  return 0;
+}
+
+/* We can't rely on the caller doing the proper promotion when
+   using APCS or ATPCS.  */
+
+static bool
+arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
+{
+    return !TARGET_AAPCS_BASED;
+}
+
+static enum machine_mode
+arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                           enum machine_mode mode,
+                           int *punsignedp ATTRIBUTE_UNUSED,
+                           const_tree fntype ATTRIBUTE_UNUSED,
+                           int for_return ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < 4)
+    return SImode;
+
+  return mode;
+}
+
+/* AAPCS based ABIs use short enums by default.  */
+
+static bool
+arm_default_short_enums (void)
+{
+  return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
+}
+
+
+/* AAPCS requires that anonymous bitfields affect structure alignment.  */
+
+static bool
+arm_align_anon_bitfield (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+/* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
+
+static tree
+arm_cxx_guard_type (void)
+{
+  return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
+}
+
+/* Return non-zero if the consumer (a multiply-accumulate instruction)
+   has an accumulator dependency on the result of the producer (a
+   multiplication instruction) and no other dependency on that result.  */
+int
+arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
+{
+  rtx mul = PATTERN (producer);
+  rtx mac = PATTERN (consumer);
+  rtx mul_result;
+  rtx mac_op0, mac_op1, mac_acc;
+
+  if (GET_CODE (mul) == COND_EXEC)
+    mul = COND_EXEC_CODE (mul);
+  if (GET_CODE (mac) == COND_EXEC)
+    mac = COND_EXEC_CODE (mac);
+
+  /* Check that mul is of the form (set (...) (mult ...))
+     and mla is of the form (set (...) (plus (mult ...) (...))).  */
+  if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
+      || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
+          || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
+    return 0;
+
+  mul_result = XEXP (mul, 0);
+  mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
+  mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
+  mac_acc = XEXP (XEXP (mac, 1), 1);
+
+  return (reg_overlap_mentioned_p (mul_result, mac_acc)
+          && !reg_overlap_mentioned_p (mul_result, mac_op0)
+          && !reg_overlap_mentioned_p (mul_result, mac_op1));
+}
+
+
+/* The EABI says test the least significant bit of a guard variable.  */
+
+static bool
+arm_cxx_guard_mask_bit (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+/* The EABI specifies that all array cookies are 8 bytes long.  */
+
+static tree
+arm_get_cookie_size (tree type)
+{
+  tree size;
+
+  if (!TARGET_AAPCS_BASED)
+    return default_cxx_get_cookie_size (type);
+
+  size = build_int_cst (sizetype, 8);
+  return size;
+}
+
+
+/* The EABI says that array cookies should also contain the element size.  */
+
+static bool
+arm_cookie_has_size (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+/* The EABI says constructors and destructors should return a pointer to
+   the object constructed/destroyed.  */
+
+static bool
+arm_cxx_cdtor_returns_this (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+/* The EABI says that an inline function may never be the key
+   method.  */
+
+static bool
+arm_cxx_key_method_may_be_inline (void)
+{
+  return !TARGET_AAPCS_BASED;
+}
+
+static void
+arm_cxx_determine_class_data_visibility (tree decl)
+{
+  if (!TARGET_AAPCS_BASED
+      || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+    return;
+
+  /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
+     is exported.  However, on systems without dynamic vague linkage,
+     \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
+  if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
+    DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+  else
+    DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
+  DECL_VISIBILITY_SPECIFIED (decl) = 1;
+}
+
+static bool
+arm_cxx_class_data_always_comdat (void)
+{
+  /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
+     vague linkage if the class has no key function.  */
+  return !TARGET_AAPCS_BASED;
+}
+
+
+/* The EABI says __aeabi_atexit should be used to register static
+   destructors.  */
+
+static bool
+arm_cxx_use_aeabi_atexit (void)
+{
+  return TARGET_AAPCS_BASED;
+}
+
+
+void
+arm_set_return_address (rtx source, rtx scratch)
+{
+  arm_stack_offsets *offsets;
+  HOST_WIDE_INT delta;
+  rtx addr;
+  unsigned long saved_regs;
+
+  offsets = arm_get_frame_offsets ();
+  saved_regs = offsets->saved_regs_mask;
+
+  if ((saved_regs & (1 << LR_REGNUM)) == 0)
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
+  else
+    {
+      if (frame_pointer_needed)
+	addr = plus_constant(hard_frame_pointer_rtx, -4);
+      else
+	{
+	  /* LR will be the first saved register.  */
+	  delta = offsets->outgoing_args - (offsets->frame + 4);
+
+
+	  if (delta >= 4096)
+	    {
+	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
+				     GEN_INT (delta & ~4095)));
+	      addr = scratch;
+	      delta &= 4095;
+	    }
+	  else
+	    addr = stack_pointer_rtx;
+
+	  addr = plus_constant (addr, delta);
+	}
+      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+    }
+}
+
+
+void
+thumb_set_return_address (rtx source, rtx scratch)
+{
+  arm_stack_offsets *offsets;
+  HOST_WIDE_INT delta;
+  HOST_WIDE_INT limit;
+  int reg;
+  rtx addr;
+  unsigned long mask;
+
+  emit_use (source);
+
+  offsets = arm_get_frame_offsets ();
+  mask = offsets->saved_regs_mask;
+  if (mask & (1 << LR_REGNUM))
+    {
+      limit = 1024;
+      /* Find the saved regs.  */
+      if (frame_pointer_needed)
+	{
+	  delta = offsets->soft_frame - offsets->saved_args;
+	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
+	  if (TARGET_THUMB1)
+	    limit = 128;
+	}
+      else
+	{
+	  delta = offsets->outgoing_args - offsets->saved_args;
+	  reg = SP_REGNUM;
+	}
+      /* Allow for the stack frame.  */
+      if (TARGET_THUMB1 && TARGET_BACKTRACE)
+	delta -= 16;
+      /* The link register is always the first saved register.  */
+      delta -= 4;
+
+      /* Construct the address.  */
+      addr = gen_rtx_REG (SImode, reg);
+      if (delta > limit)
+	{
+	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
+	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
+	  addr = scratch;
+	}
+      else
+	addr = plus_constant (addr, delta);
+
+      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+    }
+  else
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+bool
+arm_vector_mode_supported_p (enum machine_mode mode)
+{
+  /* Neon also supports V2SImode, etc. listed in the clause below.  */
+  if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
+      || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
+    return true;
+
+  if ((TARGET_NEON || TARGET_IWMMXT)
+      && ((mode == V2SImode)
+	  || (mode == V4HImode)
+	  || (mode == V8QImode)))
+    return true;
+
+  return false;
+}
+
+/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+   registers when autovectorizing for Neon, at least until multiple vector
+   widths are supported properly by the middle-end.  */
+
+static enum machine_mode
+arm_preferred_simd_mode (enum machine_mode mode)
+{
+  if (TARGET_NEON)
+    switch (mode)
+      {
+      case SFmode:
+	return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+      case SImode:
+	return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+      case HImode:
+	return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+      case QImode:
+	return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+      case DImode:
+	if (TARGET_NEON_VECTORIZE_QUAD)
+	  return V2DImode;
+	break;
+
+      default:;
+      }
+
+  if (TARGET_REALLY_IWMMXT)
+    switch (mode)
+      {
+      case SImode:
+	return V2SImode;
+      case HImode:
+	return V4HImode;
+      case QImode:
+	return V8QImode;
+
+      default:;
+      }
+
+  return word_mode;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
+ 
+   We need to define this for LO_REGS on thumb.  Otherwise we can end up
+   using r0-r4 for function arguments, r7 for the stack frame and don't
+   have enough left over to do doubleword arithmetic.  */
+
+static bool
+arm_class_likely_spilled_p (reg_class_t rclass)
+{
+  if ((TARGET_THUMB && rclass == LO_REGS)
+      || rclass  == CC_REG)
+    return true;
+
+  return false;
+}
+
+/* Implements target hook small_register_classes_for_mode_p.  */
+bool
+arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return TARGET_THUMB1;
+}
+
+/* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
+   ARM insns and therefore guarantee that the shift count is modulo 256.
+   DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
+   guarantee no particular behavior for out-of-range counts.  */
+
+static unsigned HOST_WIDE_INT
+arm_shift_truncation_mask (enum machine_mode mode)
+{
+  return mode == SImode ? 255 : 0;
+}
+
+
+/* Map internal gcc register numbers to DWARF2 register numbers.  */
+
+unsigned int
+arm_dbx_register_number (unsigned int regno)
+{
+  if (regno < 16)
+    return regno;
+
+  /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
+     compatibility.  The EABI defines them as registers 96-103.  */
+  if (IS_FPA_REGNUM (regno))
+    return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
+
+  if (IS_VFP_REGNUM (regno))
+    {
+      /* See comment in arm_dwarf_register_span.  */
+      if (VFP_REGNO_OK_FOR_SINGLE (regno))
+	return 64 + regno - FIRST_VFP_REGNUM;
+      else
+	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
+    }
+
+  if (IS_IWMMXT_GR_REGNUM (regno))
+    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
+
+  if (IS_IWMMXT_REGNUM (regno))
+    return 112 + regno - FIRST_IWMMXT_REGNUM;
+
+  gcc_unreachable ();
+}
+
+/* Dwarf models VFPv3 registers as 32 64-bit registers.
+   GCC models tham as 64 32-bit registers, so we need to describe this to
+   the DWARF generation code.  Other registers can use the default.  */
+static rtx
+arm_dwarf_register_span (rtx rtl)
+{
+  unsigned regno;
+  int nregs;
+  int i;
+  rtx p;
+
+  regno = REGNO (rtl);
+  if (!IS_VFP_REGNUM (regno))
+    return NULL_RTX;
+
+  /* XXX FIXME: The EABI defines two VFP register ranges:
+	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
+	256-287: D0-D31
+     The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
+     corresponding D register.  Until GDB supports this, we shall use the
+     legacy encodings.  We also use these encodings for D0-D15 for
+     compatibility with older debuggers.  */
+  if (VFP_REGNO_OK_FOR_SINGLE (regno))
+    return NULL_RTX;
+
+  nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
+  p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
+  regno = (regno - FIRST_VFP_REGNUM) / 2;
+  for (i = 0; i < nregs; i++)
+    XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+
+  return p;
+}
+
+#if ARM_UNWIND_INFO
+/* Emit unwind directives for a store-multiple instruction or stack pointer
+   push during alignment.
+   These should only ever be generated by the function prologue code, so
+   expect them to have a particular form.  */
+
+static void
+arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
+{
+  int i;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT nregs;
+  int reg_size;
+  unsigned reg;
+  unsigned lastreg;
+  rtx e;
+
+  e = XVECEXP (p, 0, 0);
+  if (GET_CODE (e) != SET)
+    abort ();
+
+  /* First insn will adjust the stack pointer.  */
+  if (GET_CODE (e) != SET
+      || GET_CODE (XEXP (e, 0)) != REG
+      || REGNO (XEXP (e, 0)) != SP_REGNUM
+      || GET_CODE (XEXP (e, 1)) != PLUS)
+    abort ();
+
+  offset = -INTVAL (XEXP (XEXP (e, 1), 1));
+  nregs = XVECLEN (p, 0) - 1;
+
+  reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
+  if (reg < 16)
+    {
+      /* The function prologue may also push pc, but not annotate it as it is
+	 never restored.  We turn this into a stack pointer adjustment.  */
+      if (nregs * 4 == offset - 4)
+	{
+	  fprintf (asm_out_file, "\t.pad #4\n");
+	  offset -= 4;
+	}
+      reg_size = 4;
+      fprintf (asm_out_file, "\t.save {");
+    }
+  else if (IS_VFP_REGNUM (reg))
+    {
+      reg_size = 8;
+      fprintf (asm_out_file, "\t.vsave {");
+    }
+  else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
+    {
+      /* FPA registers are done differently.  */
+      asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
+      return;
+    }
+  else
+    /* Unknown register type.  */
+    abort ();
+
+  /* If the stack increment doesn't match the size of the saved registers,
+     something has gone horribly wrong.  */
+  if (offset != nregs * reg_size)
+    abort ();
+
+  offset = 0;
+  lastreg = 0;
+  /* The remaining insns will describe the stores.  */
+  for (i = 1; i <= nregs; i++)
+    {
+      /* Expect (set (mem <addr>) (reg)).
+         Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
+      e = XVECEXP (p, 0, i);
+      if (GET_CODE (e) != SET
+	  || GET_CODE (XEXP (e, 0)) != MEM
+	  || GET_CODE (XEXP (e, 1)) != REG)
+	abort ();
+
+      reg = REGNO (XEXP (e, 1));
+      if (reg < lastreg)
+	abort ();
+
+      if (i != 1)
+	fprintf (asm_out_file, ", ");
+      /* We can't use %r for vfp because we need to use the
+	 double precision register names.  */
+      if (IS_VFP_REGNUM (reg))
+	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
+      else
+	asm_fprintf (asm_out_file, "%r", reg);
+
+#ifdef ENABLE_CHECKING
+      /* Check that the addresses are consecutive.  */
+      e = XEXP (XEXP (e, 0), 0);
+      if (GET_CODE (e) == PLUS)
+	{
+	  offset += reg_size;
+	  if (GET_CODE (XEXP (e, 0)) != REG
+	      || REGNO (XEXP (e, 0)) != SP_REGNUM
+	      || GET_CODE (XEXP (e, 1)) != CONST_INT
+	      || offset != INTVAL (XEXP (e, 1)))
+	    abort ();
+	}
+      else if (i != 1
+	       || GET_CODE (e) != REG
+	       || REGNO (e) != SP_REGNUM)
+	abort ();
+#endif
+    }
+  fprintf (asm_out_file, "}\n");
+}
+
+/*  Emit unwind directives for a SET.  */
+
+static void
+arm_unwind_emit_set (FILE * asm_out_file, rtx p)
+{
+  rtx e0;
+  rtx e1;
+  unsigned reg;
+
+  e0 = XEXP (p, 0);
+  e1 = XEXP (p, 1);
+  switch (GET_CODE (e0))
+    {
+    case MEM:
+      /* Pushing a single register.  */
+      if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
+	  || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
+	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
+	abort ();
+
+      asm_fprintf (asm_out_file, "\t.save ");
+      if (IS_VFP_REGNUM (REGNO (e1)))
+	asm_fprintf(asm_out_file, "{d%d}\n",
+		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
+      else
+	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
+      break;
+
+    case REG:
+      if (REGNO (e0) == SP_REGNUM)
+	{
+	  /* A stack increment.  */
+	  if (GET_CODE (e1) != PLUS
+	      || GET_CODE (XEXP (e1, 0)) != REG
+	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
+	      || GET_CODE (XEXP (e1, 1)) != CONST_INT)
+	    abort ();
+
+	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
+		       -INTVAL (XEXP (e1, 1)));
+	}
+      else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
+	{
+	  HOST_WIDE_INT offset;
+
+	  if (GET_CODE (e1) == PLUS)
+	    {
+	      if (GET_CODE (XEXP (e1, 0)) != REG
+		  || GET_CODE (XEXP (e1, 1)) != CONST_INT)
+		abort ();
+	      reg = REGNO (XEXP (e1, 0));
+	      offset = INTVAL (XEXP (e1, 1));
+	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
+			   HARD_FRAME_POINTER_REGNUM, reg,
+			   offset);
+	    }
+	  else if (GET_CODE (e1) == REG)
+	    {
+	      reg = REGNO (e1);
+	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
+			   HARD_FRAME_POINTER_REGNUM, reg);
+	    }
+	  else
+	    abort ();
+	}
+      else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
+	{
+	  /* Move from sp to reg.  */
+	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
+	}
+     else if (GET_CODE (e1) == PLUS
+	      && GET_CODE (XEXP (e1, 0)) == REG
+	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
+	      && GET_CODE (XEXP (e1, 1)) == CONST_INT)
+	{
+	  /* Set reg to offset from sp.  */
+	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
+		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
+	}
+      else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
+	{
+	  /* Stack pointer save before alignment.  */
+	  reg = REGNO (e0);
+	  asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
+		       reg + 0x90, reg);
+	}
+      else
+	abort ();
+      break;
+
+    default:
+      abort ();
+    }
+}
+
+
+/* Emit unwind directives for the given insn.  */
+
+static void
+arm_unwind_emit (FILE * asm_out_file, rtx insn)
+{
+  rtx pat;
+
+  if (arm_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  if (!(flag_unwind_tables || crtl->uses_eh_lsda)
+      && (TREE_NOTHROW (current_function_decl)
+	  || crtl->all_throwers_are_sibcalls))
+    return;
+
+  if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
+    return;
+
+  pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
+  if (pat)
+    pat = XEXP (pat, 0);
+  else
+    pat = PATTERN (insn);
+
+  switch (GET_CODE (pat))
+    {
+    case SET:
+      arm_unwind_emit_set (asm_out_file, pat);
+      break;
+
+    case SEQUENCE:
+      /* Store multiple.  */
+      arm_unwind_emit_sequence (asm_out_file, pat);
+      break;
+
+    default:
+      abort();
+    }
+}
+
+
+/* Output a reference from a function exception table to the type_info
+   object X.  The EABI specifies that the symbol should be relocated by
+   an R_ARM_TARGET2 relocation.  */
+
+static bool
+arm_output_ttype (rtx x)
+{
+  fputs ("\t.word\t", asm_out_file);
+  output_addr_const (asm_out_file, x);
+  /* Use special relocations for symbol references.  */
+  if (GET_CODE (x) != CONST_INT)
+    fputs ("(TARGET2)", asm_out_file);
+  fputc ('\n', asm_out_file);
+
+  return TRUE;
+}
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
+
+static void
+arm_asm_emit_except_personality (rtx personality)
+{
+  fputs ("\t.personality\t", asm_out_file);
+  output_addr_const (asm_out_file, personality);
+  fputc ('\n', asm_out_file);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
+
+static void
+arm_asm_init_sections (void)
+{
+  exception_section = get_unnamed_section (0, output_section_asm_op,
+					   "\t.handlerdata");
+}
+#endif /* ARM_UNWIND_INFO */
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
+
+static enum unwind_info_type
+arm_except_unwind_info (struct gcc_options *opts)
+{
+  /* Honor the --enable-sjlj-exceptions configure switch.  */
+#ifdef CONFIG_SJLJ_EXCEPTIONS
+  if (CONFIG_SJLJ_EXCEPTIONS)
+    return UI_SJLJ;
+#endif
+
+  /* If not using ARM EABI unwind tables... */
+  if (ARM_UNWIND_INFO)
+    {
+      /* For simplicity elsewhere in this file, indicate that all unwind
+	 info is disabled if we're not emitting unwind tables.  */
+      if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
+	return UI_NONE;
+      else
+	return UI_TARGET;
+    }
+
+  /* ... we use sjlj exceptions for backwards compatibility.  */
+  return UI_SJLJ;
+}
+
+
+/* Handle UNSPEC DWARF call frame instructions.  These are needed for dynamic
+   stack alignment.  */
+
+static void
+arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
+{
+  rtx unspec = SET_SRC (pattern);
+  gcc_assert (GET_CODE (unspec) == UNSPEC);
+
+  switch (index)
+    {
+    case UNSPEC_STACK_ALIGN:
+      /* ??? We should set the CFA = (SP & ~7).  At this point we haven't
+         put anything on the stack, so hopefully it won't matter.
+         CFA = SP will be correct after alignment.  */
+      dwarf2out_reg_save_reg (label, stack_pointer_rtx,
+                              SET_DEST (pattern));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Output unwind directives for the start/end of a function.  */
+
+void
+arm_output_fn_unwind (FILE * f, bool prologue)
+{
+  if (arm_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  if (prologue)
+    fputs ("\t.fnstart\n", f);
+  else
+    {
+      /* If this function will never be unwound, then mark it as such.
+         The came condition is used in arm_unwind_emit to suppress
+	 the frame annotations.  */
+      if (!(flag_unwind_tables || crtl->uses_eh_lsda)
+	  && (TREE_NOTHROW (current_function_decl)
+	      || crtl->all_throwers_are_sibcalls))
+	fputs("\t.cantunwind\n", f);
+
+      fputs ("\t.fnend\n", f);
+    }
+}
+
+static bool
+arm_emit_tls_decoration (FILE *fp, rtx x)
+{
+  enum tls_reloc reloc;
+  rtx val;
+
+  val = XVECEXP (x, 0, 0);
+  reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
+
+  output_addr_const (fp, val);
+
+  switch (reloc)
+    {
+    case TLS_GD32:
+      fputs ("(tlsgd)", fp);
+      break;
+    case TLS_LDM32:
+      fputs ("(tlsldm)", fp);
+      break;
+    case TLS_LDO32:
+      fputs ("(tlsldo)", fp);
+      break;
+    case TLS_IE32:
+      fputs ("(gottpoff)", fp);
+      break;
+    case TLS_LE32:
+      fputs ("(tpoff)", fp);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (reloc)
+    {
+    case TLS_GD32:
+    case TLS_LDM32:
+    case TLS_IE32:
+      fputs (" + (. - ", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 2));
+      fputs (" - ", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 3));
+      fputc (')', fp);
+      break;
+    default:
+      break;
+    }
+
+  return TRUE;
+}
+
+/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void
+arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.word\t", file);
+  output_addr_const (file, x);
+  fputs ("(tlsldo)", file);
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+arm_output_addr_const_extra (FILE *fp, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+    return arm_emit_tls_decoration (fp, x);
+  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
+    {
+      char label[256];
+      int labelno = INTVAL (XVECEXP (x, 0, 0));
+
+      ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
+      assemble_name_raw (fp, label);
+
+      return TRUE;
+    }
+  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
+    {
+      assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
+      if (GOT_PCREL)
+	fputs ("+.", fp);
+      fputs ("-(", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 0));
+      fputc (')', fp);
+      return TRUE;
+    }
+  else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
+    {
+      output_addr_const (fp, XVECEXP (x, 0, 0));
+      if (GOT_PCREL)
+        fputs ("+.", fp);
+      fputs ("-(", fp);
+      output_addr_const (fp, XVECEXP (x, 0, 1));
+      fputc (')', fp);
+      return TRUE;
+    }
+  else if (GET_CODE (x) == CONST_VECTOR)
+    return arm_emit_vector_const (fp, x);
+
+  return FALSE;
+}
+
+/* Output assembly for a shift instruction.
+   SET_FLAGS determines how the instruction modifies the condition codes.
+   0 - Do not set condition codes.
+   1 - Set condition codes.
+   2 - Use smallest instruction.  */
+const char *
+arm_output_shift(rtx * operands, int set_flags)
+{
+  char pattern[100];
+  static const char flag_chars[3] = {'?', '.', '!'};
+  const char *shift;
+  HOST_WIDE_INT val;
+  char c;
+  
+  c = flag_chars[set_flags];
+  if (TARGET_UNIFIED_ASM)
+    {
+      shift = shift_op(operands[3], &val);
+      if (shift)
+	{
+	  if (val != -1)
+	    operands[2] = GEN_INT(val);
+	  sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
+	}
+      else
+	sprintf (pattern, "mov%%%c\t%%0, %%1", c);
+    }
+  else
+    sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
+  output_asm_insn (pattern, operands);
+  return "";
+}
+
+/* Output a Thumb-1 casesi dispatch sequence.  */
+const char *
+thumb1_output_casesi (rtx *operands)
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[0]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE(diff_vec))
+    {
+    case QImode:
+      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? 
+	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
+    case HImode:
+      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? 
+	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
+    case SImode:
+      return "bl\t%___gnu_thumb1_case_si";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output a Thumb-2 casesi instruction.  */
+const char *
+thumb2_output_casesi (rtx *operands)
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  output_asm_insn ("cmp\t%0, %1", operands);
+  output_asm_insn ("bhi\t%l3", operands);
+  switch (GET_MODE(diff_vec))
+    {
+    case QImode:
+      return "tbb\t[%|pc, %0]";
+    case HImode:
+      return "tbh\t[%|pc, %0, lsl #1]";
+    case SImode:
+      if (flag_pic)
+	{
+	  output_asm_insn ("adr\t%4, %l2", operands);
+	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
+	  output_asm_insn ("add\t%4, %4, %5", operands);
+	  return "bx\t%4";
+	}
+      else
+	{
+	  output_asm_insn ("adr\t%4, %l2", operands);
+	  return "ldr\t%|pc, [%4, %0, lsl #2]";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Most ARM cores are single issue, but some newer ones can dual issue.
+   The scheduler descriptions rely on this being correct.  */
+static int
+arm_issue_rate (void)
+{
+  switch (arm_tune)
+    {
+    case cortexr4:
+    case cortexr4f:
+    case cortexa5:
+    case cortexa8:
+    case cortexa9:
+    case fa726te:
+      return 2;
+
+    default:
+      return 1;
+    }
+}
+
+/* A table and a function to perform ARM-specific name mangling for
+   NEON vector types in order to conform to the AAPCS (see "Procedure
+   Call Standard for the ARM Architecture", Appendix A).  To qualify
+   for emission with the mangled names defined in that document, a
+   vector type must not only be of the correct mode but also be
+   composed of NEON vector element types (e.g. __builtin_neon_qi).  */
+typedef struct
+{
+  enum machine_mode mode;
+  const char *element_type_name;
+  const char *aapcs_name;
+} arm_mangle_map_entry;
+
+static arm_mangle_map_entry arm_mangle_map[] = {
+  /* 64-bit containerized types.  */
+  { V8QImode,  "__builtin_neon_qi",     "15__simd64_int8_t" },
+  { V8QImode,  "__builtin_neon_uqi",    "16__simd64_uint8_t" },
+  { V4HImode,  "__builtin_neon_hi",     "16__simd64_int16_t" },
+  { V4HImode,  "__builtin_neon_uhi",    "17__simd64_uint16_t" },
+  { V2SImode,  "__builtin_neon_si",     "16__simd64_int32_t" },
+  { V2SImode,  "__builtin_neon_usi",    "17__simd64_uint32_t" },
+  { V2SFmode,  "__builtin_neon_sf",     "18__simd64_float32_t" },
+  { V8QImode,  "__builtin_neon_poly8",  "16__simd64_poly8_t" },
+  { V4HImode,  "__builtin_neon_poly16", "17__simd64_poly16_t" },
+  /* 128-bit containerized types.  */
+  { V16QImode, "__builtin_neon_qi",     "16__simd128_int8_t" },
+  { V16QImode, "__builtin_neon_uqi",    "17__simd128_uint8_t" },
+  { V8HImode,  "__builtin_neon_hi",     "17__simd128_int16_t" },
+  { V8HImode,  "__builtin_neon_uhi",    "18__simd128_uint16_t" },
+  { V4SImode,  "__builtin_neon_si",     "17__simd128_int32_t" },
+  { V4SImode,  "__builtin_neon_usi",    "18__simd128_uint32_t" },
+  { V4SFmode,  "__builtin_neon_sf",     "19__simd128_float32_t" },
+  { V16QImode, "__builtin_neon_poly8",  "17__simd128_poly8_t" },
+  { V8HImode,  "__builtin_neon_poly16", "18__simd128_poly16_t" },
+  { VOIDmode, NULL, NULL }
+};
+
+const char *
+arm_mangle_type (const_tree type)
+{
+  arm_mangle_map_entry *pos = arm_mangle_map;
+
+  /* The ARM ABI documents (10th October 2008) say that "__va_list"
+     has to be managled as if it is in the "std" namespace.  */
+  if (TARGET_AAPCS_BASED 
+      && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
+    {
+      static bool warned;
+      if (!warned && warn_psabi && !in_system_header)
+	{
+	  warned = true;
+	  inform (input_location,
+		  "the mangling of %<va_list%> has changed in GCC 4.4");
+	}
+      return "St9__va_list";
+    }
+
+  /* Half-precision float.  */
+  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+    return "Dh";
+
+  if (TREE_CODE (type) != VECTOR_TYPE)
+    return NULL;
+
+  /* Check the mode of the vector type, and the name of the vector
+     element type, against the table.  */
+  while (pos->mode != VOIDmode)
+    {
+      tree elt_type = TREE_TYPE (type);
+
+      if (pos->mode == TYPE_MODE (type)
+	  && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
+	  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
+		      pos->element_type_name))
+        return pos->aapcs_name;
+
+      pos++;
+    }
+
+  /* Use the default mangling for unrecognized (possibly user-defined)
+     vector types.  */
+  return NULL;
+}
+
+/* Order of allocation of core registers for Thumb: this allocation is
+   written over the corresponding initial entries of the array
+   initialized with REG_ALLOC_ORDER.  We allocate all low registers
+   first.  Saving and restoring a low register is usually cheaper than
+   using a call-clobbered high register.  */
+
+static const int thumb_core_reg_alloc_order[] =
+{
+   3,  2,  1,  0,  4,  5,  6,  7,
+  14, 12,  8,  9, 10, 11, 13, 15
+};
+
+/* Adjust register allocation order when compiling for Thumb.  */
+
+void
+arm_order_regs_for_local_alloc (void)
+{
+  const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
+  memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
+  if (TARGET_THUMB)
+    memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
+            sizeof (thumb_core_reg_alloc_order));
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+bool
+arm_frame_pointer_required (void)
+{
+  return (cfun->has_nonlocal_label
+          || SUBTARGET_FRAME_POINTER_REQUIRED
+          || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
+}
+
+/* Only thumb1 can't support conditional execution, so return true if
+   the target is not thumb1.  */
+static bool
+arm_have_conditional_execution (void)
+{
+  return !TARGET_THUMB1;
+}
+
+/* Legitimize a memory reference for sync primitive implemented using
+   ldrex / strex.  We currently force the form of the reference to be
+   indirect without offset.  We do not yet support the indirect offset
+   addressing supported by some ARM targets for these
+   instructions.  */
+static rtx
+arm_legitimize_sync_memory (rtx memory)
+{
+  rtx addr = force_reg (Pmode, XEXP (memory, 0));
+  rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
+
+  set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
+  return legitimate_memory;
+}
+
+/* An instruction emitter. */
+typedef void (* emit_f) (int label, const char *, rtx *);
+
+/* An instruction emitter that emits via the conventional
+   output_asm_insn.  */
+static void
+arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
+{
+  output_asm_insn (pattern, operands);
+}
+
+/* Count the number of emitted synchronization instructions.  */
+static unsigned arm_insn_count;
+
+/* An emitter that counts emitted instructions but does not actually
+   emit instruction into the the instruction stream.  */
+static void
+arm_count (int label,
+	   const char *pattern ATTRIBUTE_UNUSED,
+	   rtx *operands ATTRIBUTE_UNUSED)
+{
+  if (! label)
+    ++ arm_insn_count;
+}
+
+/* Construct a pattern using conventional output formatting and feed
+   it to output_asm_insn.  Provides a mechanism to construct the
+   output pattern on the fly.  Note the hard limit on the pattern
+   buffer size.  */
+static void ATTRIBUTE_PRINTF_4
+arm_output_asm_insn (emit_f emit, int label, rtx *operands,
+		     const char *pattern, ...)
+{
+  va_list ap;
+  char buffer[256];
+
+  va_start (ap, pattern);
+  vsprintf (buffer, pattern, ap);
+  va_end (ap);
+  emit (label, buffer, operands);
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+   target to a specified emitter.  */
+static void
+arm_process_output_memory_barrier (emit_f emit, rtx *operands)
+{
+  if (TARGET_HAVE_DMB)
+    {
+      /* Note we issue a system level barrier. We should consider
+         issuing a inner shareabilty zone barrier here instead, ie.
+         "DMB ISH".  */
+      emit (0, "dmb\tsy", operands);
+      return;
+    }
+
+  if (TARGET_HAVE_DMB_MCR)
+    {
+      emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
+      return;
+    }
+
+  gcc_unreachable ();
+}
+
+/* Emit the memory barrier instruction, if any, provided by this
+   target.  */
+const char *
+arm_output_memory_barrier (rtx *operands)
+{
+  arm_process_output_memory_barrier (arm_emit, operands);
+  return "";
+}
+
+/* Helper to figure out the instruction suffix required on ldrex/strex
+   for operations on an object of the specified mode.  */
+static const char *
+arm_ldrex_suffix (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode: return "b";
+    case HImode: return "h";
+    case SImode: return "";
+    case DImode: return "d";
+    default:
+      gcc_unreachable ();
+    }
+  return "";
+}
+
+/* Emit an ldrex{b,h,d, } instruction appropriate for the specified
+   mode.  */
+static void
+arm_output_ldrex (emit_f emit,
+		  enum machine_mode mode,
+		  rtx target,
+		  rtx memory)
+{
+  const char *suffix = arm_ldrex_suffix (mode);
+  rtx operands[2];
+
+  operands[0] = target;
+  operands[1] = memory;
+  arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
+}
+
+/* Emit a strex{b,h,d, } instruction appropriate for the specified
+   mode.  */
+static void
+arm_output_strex (emit_f emit,
+		  enum machine_mode mode,
+		  const char *cc,
+		  rtx result,
+		  rtx value,
+		  rtx memory)
+{
+  const char *suffix = arm_ldrex_suffix (mode);
+  rtx operands[3];
+
+  operands[0] = result;
+  operands[1] = value;
+  operands[2] = memory;
+  arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
+		       cc);
+}
+
+/* Helper to emit a two operand instruction.  */
+static void
+arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
+{
+  rtx operands[2];
+
+  operands[0] = d;
+  operands[1] = s;
+  arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
+}
+
+/* Helper to emit a three operand instruction.  */
+static void
+arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
+{
+  rtx operands[3];
+
+  operands[0] = d;
+  operands[1] = a;
+  operands[2] = b;
+  arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
+}
+
+/* Emit a load store exclusive synchronization loop.
+
+   do
+     old_value = [mem]
+     if old_value != required_value
+       break;
+     t1 = sync_op (old_value, new_value)
+     [mem] = t1, t2 = [0|1]
+   while ! t2
+
+   Note:
+     t1 == t2 is not permitted
+     t1 == old_value is permitted
+
+   required_value:
+
+   RTX register or const_int representing the required old_value for
+   the modify to continue, if NULL no comparsion is performed.  */
+static void
+arm_output_sync_loop (emit_f emit,
+		      enum machine_mode mode,
+		      rtx old_value,
+		      rtx memory,
+		      rtx required_value,
+		      rtx new_value,
+		      rtx t1,
+		      rtx t2,
+		      enum attr_sync_op sync_op,
+		      int early_barrier_required)
+{
+  rtx operands[1];
+
+  gcc_assert (t1 != t2);
+
+  if (early_barrier_required)
+    arm_process_output_memory_barrier (emit, NULL);
+
+  arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
+
+  arm_output_ldrex (emit, mode, old_value, memory);
+
+  if (required_value)
+    {
+      rtx operands[2];
+
+      operands[0] = old_value;
+      operands[1] = required_value;
+      arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
+      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
+    }
+
+  switch (sync_op)
+    {
+    case SYNC_OP_ADD:
+      arm_output_op3 (emit, "add", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_SUB:
+      arm_output_op3 (emit, "sub", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_IOR:
+      arm_output_op3 (emit, "orr", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_XOR:
+      arm_output_op3 (emit, "eor", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_AND:
+      arm_output_op3 (emit,"and", t1, old_value, new_value);
+      break;
+
+    case SYNC_OP_NAND:
+      arm_output_op3 (emit, "and", t1, old_value, new_value);
+      arm_output_op2 (emit, "mvn", t1, t1);
+      break;
+
+    case SYNC_OP_NONE:
+      t1 = new_value;
+      break;
+    }
+
+  if (t2)
+    {
+       arm_output_strex (emit, mode, "", t2, t1, memory);
+       operands[0] = t2;
+       arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+       arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+			    LOCAL_LABEL_PREFIX);
+    }
+  else
+    {
+      /* Use old_value for the return value because for some operations
+	 the old_value can easily be restored.  This saves one register.  */
+      arm_output_strex (emit, mode, "", old_value, t1, memory);
+      operands[0] = old_value;
+      arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
+      arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
+			   LOCAL_LABEL_PREFIX);
+
+      switch (sync_op)
+	{
+	case SYNC_OP_ADD:
+	  arm_output_op3 (emit, "sub", old_value, t1, new_value);
+	  break;
+
+	case SYNC_OP_SUB:
+	  arm_output_op3 (emit, "add", old_value, t1, new_value);
+	  break;
+
+	case SYNC_OP_XOR:
+	  arm_output_op3 (emit, "eor", old_value, t1, new_value);
+	  break;
+
+	case SYNC_OP_NONE:
+	  arm_output_op2 (emit, "mov", old_value, required_value);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* Note: label is before barrier so that in cmp failure case we still get
+     a barrier to stop subsequent loads floating upwards past the ldrex
+     PR target/48126.  */
+  arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
+  arm_process_output_memory_barrier (emit, NULL);
+}
+
+static rtx
+arm_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+  if (index > 0)
+    default_value = operands[index - 1];
+
+  return default_value;
+}
+
+#define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
+  arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
+
+/* Extract the operands for a synchroniztion instruction from the
+   instructions attributes and emit the instruction.  */
+static void
+arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
+{
+  rtx result, memory, required_value, new_value, t1, t2;
+  int early_barrier;
+  enum machine_mode mode;
+  enum attr_sync_op sync_op;
+
+  result = FETCH_SYNC_OPERAND(result, 0);
+  memory = FETCH_SYNC_OPERAND(memory, 0);
+  required_value = FETCH_SYNC_OPERAND(required_value, 0);
+  new_value = FETCH_SYNC_OPERAND(new_value, 0);
+  t1 = FETCH_SYNC_OPERAND(t1, 0);
+  t2 = FETCH_SYNC_OPERAND(t2, 0);
+  early_barrier =
+    get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
+  sync_op = get_attr_sync_op (insn);
+  mode = GET_MODE (memory);
+
+  arm_output_sync_loop (emit, mode, result, memory, required_value,
+			new_value, t1, t2, sync_op, early_barrier);
+}
+
+/* Emit a synchronization instruction loop.  */
+const char *
+arm_output_sync_insn (rtx insn, rtx *operands)
+{
+  arm_process_output_sync_insn (arm_emit, insn, operands);
+  return "";
+}
+
+/* Count the number of machine instruction that will be emitted for a
+   synchronization instruction.  Note that the emitter used does not
+   emit instructions, it just counts instructions being carefull not
+   to count labels.  */
+unsigned int
+arm_sync_loop_insns (rtx insn, rtx *operands)
+{
+  arm_insn_count = 0;
+  arm_process_output_sync_insn (arm_count, insn, operands);
+  return arm_insn_count;
+}
+
+/* Helper to call a target sync instruction generator, dealing with
+   the variation in operands required by the different generators.  */
+static rtx
+arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
+  		    rtx memory, rtx required_value, rtx new_value)
+{
+  switch (generator->op)
+    {
+    case arm_sync_generator_omn:
+      gcc_assert (! required_value);
+      return generator->u.omn (old_value, memory, new_value);
+
+    case arm_sync_generator_omrn:
+      gcc_assert (required_value);
+      return generator->u.omrn (old_value, memory, required_value, new_value);
+    }
+
+  return NULL;
+}
+
+/* Expand a synchronization loop. The synchronization loop is expanded
+   as an opaque block of instructions in order to ensure that we do
+   not subsequently get extraneous memory accesses inserted within the
+   critical region. The exclusive access property of ldrex/strex is
+   only guaranteed in there are no intervening memory accesses. */
+void
+arm_expand_sync (enum machine_mode mode,
+		 struct arm_sync_generator *generator,
+		 rtx target, rtx memory, rtx required_value, rtx new_value)
+{
+  if (target == NULL)
+    target = gen_reg_rtx (mode);
+
+  memory = arm_legitimize_sync_memory (memory);
+  if (mode != SImode)
+    {
+      rtx load_temp = gen_reg_rtx (SImode);
+
+      if (required_value)
+	required_value = convert_modes (SImode, mode, required_value, true);
+
+      new_value = convert_modes (SImode, mode, new_value, true);
+      emit_insn (arm_call_generator (generator, load_temp, memory,
+				     required_value, new_value));
+      emit_move_insn (target, gen_lowpart (mode, load_temp));
+    }
+  else
+    {
+      emit_insn (arm_call_generator (generator, target, memory, required_value,
+				     new_value));
+    }
+}
+
+static bool
+arm_vector_alignment_reachable (const_tree type, bool is_packed)
+{
+  /* Vectors which aren't in packed structures will not be less aligned than
+     the natural alignment of their element type, so this is safe.  */
+  if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+    return !is_packed;
+
+  return default_builtin_vector_alignment_reachable (type, is_packed);
+}
+
+static bool
+arm_builtin_support_vector_misalignment (enum machine_mode mode,
+					 const_tree type, int misalignment,
+					 bool is_packed)
+{
+  if (TARGET_NEON && !BYTES_BIG_ENDIAN)
+    {
+      HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
+
+      if (is_packed)
+        return align == 1;
+
+      /* If the misalignment is unknown, we should be able to handle the access
+	 so long as it is not to a member of a packed data structure.  */
+      if (misalignment == -1)
+        return true;
+
+      /* Return true if the misalignment is a multiple of the natural alignment
+         of the vector's element type.  This is probably always going to be
+	 true in practice, since we've already established that this isn't a
+	 packed access.  */
+      return ((misalignment % align) == 0);
+    }
+  
+  return default_builtin_support_vector_misalignment (mode, type, misalignment,
+						      is_packed);
+}
+
+static void
+arm_conditional_register_usage (void)
+{
+  int regno;
+
+  if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
+    {
+      for (regno = FIRST_FPA_REGNUM;
+	   regno <= LAST_FPA_REGNUM; ++regno)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+    }
+
+  if (TARGET_THUMB1 && optimize_size)
+    {
+      /* When optimizing for size on Thumb-1, it's better not
+        to use the HI regs, because of the overhead of
+        stacking them.  */
+      for (regno = FIRST_HI_REGNUM;
+	   regno <= LAST_HI_REGNUM; ++regno)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+    }
+
+  /* The link register can be clobbered by any branch insn,
+     but we have no way to track that at present, so mark
+     it as unavailable.  */
+  if (TARGET_THUMB1)
+    fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
+
+  if (TARGET_32BIT && TARGET_HARD_FLOAT)
+    {
+      if (TARGET_MAVERICK)
+	{
+	  for (regno = FIRST_FPA_REGNUM;
+	       regno <= LAST_FPA_REGNUM; ++ regno)
+	    fixed_regs[regno] = call_used_regs[regno] = 1;
+	  for (regno = FIRST_CIRRUS_FP_REGNUM;
+	       regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
+	    {
+	      fixed_regs[regno] = 0;
+	      call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
+	    }
+	}
+      if (TARGET_VFP)
+	{
+	  /* VFPv3 registers are disabled when earlier VFP
+	     versions are selected due to the definition of
+	     LAST_VFP_REGNUM.  */
+	  for (regno = FIRST_VFP_REGNUM;
+	       regno <= LAST_VFP_REGNUM; ++ regno)
+	    {
+	      fixed_regs[regno] = 0;
+	      call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
+	      	|| regno >= FIRST_VFP_REGNUM + 32;
+	    }
+	}
+    }
+
+  if (TARGET_REALLY_IWMMXT)
+    {
+      regno = FIRST_IWMMXT_GR_REGNUM;
+      /* The 2002/10/09 revision of the XScale ABI has wCG0
+         and wCG1 as call-preserved registers.  The 2002/11/21
+         revision changed this so that all wCG registers are
+         scratch registers.  */
+      for (regno = FIRST_IWMMXT_GR_REGNUM;
+	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
+	fixed_regs[regno] = 0;
+      /* The XScale ABI has wR0 - wR9 as scratch registers,
+	 the rest as call-preserved registers.  */
+      for (regno = FIRST_IWMMXT_REGNUM;
+	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
+	{
+	  fixed_regs[regno] = 0;
+	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
+	}
+    }
+
+  if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  else if (TARGET_APCS_STACK)
+    {
+      fixed_regs[10]     = 1;
+      call_used_regs[10] = 1;
+    }
+  /* -mcaller-super-interworking reserves r11 for calls to
+     _interwork_r11_call_via_rN().  Making the register global
+     is an easy way of ensuring that it remains valid for all
+     calls.  */
+  if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
+      || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
+    {
+      fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+      call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+      if (TARGET_CALLER_INTERWORKING)
+	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
+    }
+  SUBTARGET_CONDITIONAL_REGISTER_USAGE
+}
+
+static reg_class_t
+arm_preferred_rename_class (reg_class_t rclass)
+{
+  /* Thumb-2 instructions using LO_REGS may be smaller than instructions
+     using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
+     and code size can be reduced.  */
+  if (TARGET_THUMB2 && rclass == GENERAL_REGS)
+    return LO_REGS;
+  else
+    return NO_REGS;
+}
+
+#include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
new file mode 100644
index 000000000..292b48f96
--- /dev/null
+++ b/gcc/config/arm/arm.h
@@ -0,0 +1,2464 @@
+/* Definitions of target machine for GNU compiler, for ARM.
+   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+   and Martin Simmons (@harleqn.co.uk).
+   More major hacks by Richard Earnshaw (rearnsha@arm.com)
+   Minor hacks by Nick Clifton (nickc@cygnus.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ARM_H
+#define GCC_ARM_H
+
+/* We can't use enum machine_mode inside a generator file because it
+   hasn't been created yet; we shouldn't be using any code that
+   needs the real definition though, so this ought to be safe.  */
+#ifdef GENERATOR_FILE
+#define MACHMODE int
+#else
+#include "insn-modes.h"
+#define MACHMODE enum machine_mode
+#endif
+
+#include "config/vxworks-dummy.h"
+
+/* The architecture define.  */
+extern char arm_arch_name[];
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+	if (TARGET_DSP_MULTIPLY)			\
+	   builtin_define ("__ARM_FEATURE_DSP");	\
+	/* Define __arm__ even when in thumb mode, for	\
+	   consistency with armcc.  */			\
+	builtin_define ("__arm__");			\
+	builtin_define ("__APCS_32__");			\
+	if (TARGET_THUMB)				\
+	  builtin_define ("__thumb__");			\
+	if (TARGET_THUMB2)				\
+	  builtin_define ("__thumb2__");		\
+							\
+	if (TARGET_BIG_END)				\
+	  {						\
+	    builtin_define ("__ARMEB__");		\
+	    if (TARGET_THUMB)				\
+	      builtin_define ("__THUMBEB__");		\
+	    if (TARGET_LITTLE_WORDS)			\
+	      builtin_define ("__ARMWEL__");		\
+	  }						\
+        else						\
+	  {						\
+	    builtin_define ("__ARMEL__");		\
+	    if (TARGET_THUMB)				\
+	      builtin_define ("__THUMBEL__");		\
+	  }						\
+							\
+	if (TARGET_SOFT_FLOAT)				\
+	  builtin_define ("__SOFTFP__");		\
+							\
+	if (TARGET_VFP)					\
+	  builtin_define ("__VFP_FP__");		\
+							\
+	if (TARGET_NEON)				\
+	  builtin_define ("__ARM_NEON__");		\
+							\
+	/* Add a define for interworking.		\
+	   Needed when building libgcc.a.  */		\
+	if (arm_cpp_interwork)				\
+	  builtin_define ("__THUMB_INTERWORK__");	\
+							\
+	builtin_assert ("cpu=arm");			\
+	builtin_assert ("machine=arm");			\
+							\
+	builtin_define (arm_arch_name);			\
+	if (arm_arch_cirrus)				\
+	  builtin_define ("__MAVERICK__");		\
+	if (arm_arch_xscale)				\
+	  builtin_define ("__XSCALE__");		\
+	if (arm_arch_iwmmxt)				\
+	  builtin_define ("__IWMMXT__");		\
+	if (TARGET_AAPCS_BASED)				\
+	  {						\
+	    if (arm_pcs_default == ARM_PCS_AAPCS_VFP)	\
+	      builtin_define ("__ARM_PCS_VFP");		\
+	    else if (arm_pcs_default == ARM_PCS_AAPCS)	\
+	      builtin_define ("__ARM_PCS");		\
+	    builtin_define ("__ARM_EABI__");		\
+	  }						\
+    } while (0)
+
+/* The various ARM cores.  */
+enum processor_type
+{
+#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
+  IDENT,
+#include "arm-cores.def"
+#undef ARM_CORE
+  /* Used to indicate that no processor has been specified.  */
+  arm_none
+};
+
+enum target_cpus
+{
+#define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
+  TARGET_CPU_##IDENT,
+#include "arm-cores.def"
+#undef ARM_CORE
+  TARGET_CPU_generic
+};
+
+/* The processor for which instructions should be scheduled.  */
+extern enum processor_type arm_tune;
+
+enum arm_sync_generator_tag
+  {
+    arm_sync_generator_omn,
+    arm_sync_generator_omrn
+  };
+
+/* Wrapper to pass around a polymorphic pointer to a sync instruction
+   generator and.  */
+struct arm_sync_generator
+{
+  enum arm_sync_generator_tag op;
+  union
+  {
+    rtx (* omn) (rtx, rtx, rtx);
+    rtx (* omrn) (rtx, rtx, rtx, rtx);
+  } u;
+};
+
+typedef enum arm_cond_code
+{
+  ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC,
+  ARM_HI, ARM_LS, ARM_GE, ARM_LT, ARM_GT, ARM_LE, ARM_AL, ARM_NV
+}
+arm_cc;
+
+extern arm_cc arm_current_cc;
+
+#define ARM_INVERSE_CONDITION_CODE(X)  ((arm_cc) (((int)X) ^ 1))
+
+extern int arm_target_label;
+extern int arm_ccfsm_state;
+extern GTY(()) rtx arm_target_insn;
+/* The label of the current constant pool.  */
+extern rtx pool_vector_label;
+/* Set to 1 when a return insn is output, this means that the epilogue
+   is not needed.  */
+extern int return_used_this_function;
+/* Callback to output language specific object attributes.  */
+extern void (*arm_lang_output_object_attributes_hook)(void);
+
+/* Just in case configure has failed to define anything.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_generic
+#endif
+
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%(subtarget_cpp_spec)					\
+%{msoft-float:%{mhard-float:						\
+	%e-msoft-float and -mhard_float may not be used together}}	\
+%{mbig-endian:%{mlittle-endian:						\
+	%e-mbig-endian and -mlittle-endian may not be used together}}"
+
+#ifndef CC1_SPEC
+#define CC1_SPEC ""
+#endif
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+#define EXTRA_SPECS						\
+  { "subtarget_cpp_spec",	SUBTARGET_CPP_SPEC },           \
+  SUBTARGET_EXTRA_SPECS
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC      ""
+#endif
+
+/* Run-time Target Specification.  */
+#ifndef TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/generic)", stderr);
+#endif
+
+#define TARGET_SOFT_FLOAT		(arm_float_abi == ARM_FLOAT_ABI_SOFT)
+/* Use hardware floating point instructions. */
+#define TARGET_HARD_FLOAT		(arm_float_abi != ARM_FLOAT_ABI_SOFT)
+/* Use hardware floating point calling convention.  */
+#define TARGET_HARD_FLOAT_ABI		(arm_float_abi == ARM_FLOAT_ABI_HARD)
+#define TARGET_FPA		(arm_fpu_desc->model == ARM_FP_MODEL_FPA)
+#define TARGET_MAVERICK		(arm_fpu_desc->model == ARM_FP_MODEL_MAVERICK)
+#define TARGET_VFP		(arm_fpu_desc->model == ARM_FP_MODEL_VFP)
+#define TARGET_IWMMXT			(arm_arch_iwmmxt)
+#define TARGET_REALLY_IWMMXT		(TARGET_IWMMXT && TARGET_32BIT)
+#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
+#define TARGET_ARM                      (! TARGET_THUMB)
+#define TARGET_EITHER			1 /* (TARGET_ARM | TARGET_THUMB) */
+#define TARGET_BACKTRACE	        (leaf_function_p () \
+				         ? TARGET_TPCS_LEAF_FRAME \
+				         : TARGET_TPCS_FRAME)
+#define TARGET_LDRD			(arm_arch5e && ARM_DOUBLEWORD_ALIGN)
+#define TARGET_AAPCS_BASED \
+    (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS)
+
+#define TARGET_HARD_TP			(target_thread_pointer == TP_CP15)
+#define TARGET_SOFT_TP			(target_thread_pointer == TP_SOFT)
+
+/* Only 16-bit thumb code.  */
+#define TARGET_THUMB1			(TARGET_THUMB && !arm_arch_thumb2)
+/* Arm or Thumb-2 32-bit code.  */
+#define TARGET_32BIT			(TARGET_ARM || arm_arch_thumb2)
+/* 32-bit Thumb-2 code.  */
+#define TARGET_THUMB2			(TARGET_THUMB && arm_arch_thumb2)
+/* Thumb-1 only.  */
+#define TARGET_THUMB1_ONLY		(TARGET_THUMB1 && !arm_arch_notm)
+/* FPA emulator without LFM.  */
+#define TARGET_FPA_EMU2			(TARGET_FPA && arm_fpu_desc->rev == 2)
+
+/* The following two macros concern the ability to execute coprocessor
+   instructions for VFPv3 or NEON.  TARGET_VFP3/TARGET_VFPD32 are currently
+   only ever tested when we know we are generating for VFP hardware; we need
+   to be more careful with TARGET_NEON as noted below.  */
+
+/* FPU is has the full VFPv3/NEON register file of 32 D registers.  */
+#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32)
+
+/* FPU supports VFPv3 instructions.  */
+#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
+
+/* FPU only supports VFP single-precision instructions.  */
+#define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE)
+
+/* FPU supports VFP double-precision instructions.  */
+#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_fpu_desc->regs != VFP_REG_SINGLE)
+
+/* FPU supports half-precision floating-point with NEON element load/store.  */
+#define TARGET_NEON_FP16 \
+  (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16)
+
+/* FPU supports VFP half-precision floating-point.  */
+#define TARGET_FP16 (TARGET_VFP && arm_fpu_desc->fp16)
+
+/* FPU supports Neon instructions.  The setting of this macro gets
+   revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT
+   and TARGET_HARD_FLOAT to ensure that NEON instructions are
+   available.  */
+#define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \
+		     && TARGET_VFP && arm_fpu_desc->neon)
+
+/* "DSP" multiply instructions, eg. SMULxy.  */
+#define TARGET_DSP_MULTIPLY \
+  (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em))
+/* Integer SIMD instructions, and extend-accumulate instructions.  */
+#define TARGET_INT_SIMD \
+  (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em))
+
+/* Should MOVW/MOVT be used in preference to a constant pool.  */
+#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
+
+/* We could use unified syntax for arm mode, but for now we just use it
+   for Thumb-2.  */
+#define TARGET_UNIFIED_ASM TARGET_THUMB2
+
+/* Nonzero if this chip provides the DMB instruction.  */
+#define TARGET_HAVE_DMB		(arm_arch7)
+
+/* Nonzero if this chip implements a memory barrier via CP15.  */
+#define TARGET_HAVE_DMB_MCR	(arm_arch6 && ! TARGET_HAVE_DMB \
+				 && ! TARGET_THUMB1)
+
+/* Nonzero if this chip implements a memory barrier instruction.  */
+#define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
+
+/* Nonzero if this chip supports ldrex and strex */
+#define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
+
+/* Nonzero if this chip supports ldrex{bhd} and strex{bhd}.  */
+#define TARGET_HAVE_LDREXBHD	((arm_arch6k && TARGET_ARM) || arm_arch7)
+
+/* True iff the full BPABI is being used.  If TARGET_BPABI is true,
+   then TARGET_AAPCS_BASED must be true -- but the converse does not
+   hold.  TARGET_BPABI implies the use of the BPABI runtime library,
+   etc., in addition to just the AAPCS calling conventions.  */
+#ifndef TARGET_BPABI
+#define TARGET_BPABI false
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-arch is ignored if -march or -mcpu are specified.
+   --with-cpu is ignored if -march or -mcpu are specified, and is overridden
+    by --with-arch.
+   --with-tune is ignored if -mtune or -mcpu are specified (but not affected
+     by -march).
+   --with-float is ignored if -mhard-float, -msoft-float or -mfloat-abi are
+   specified.
+   --with-fpu is ignored if -mfpu is specified.
+   --with-abi is ignored is -mabi is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
+  {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"float", \
+    "%{!msoft-float:%{!mhard-float:%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}}}" }, \
+  {"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \
+  {"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \
+  {"mode", "%{!marm:%{!mthumb:-m%(VALUE)}}"},
+
+/* Which floating point model to use.  */
+enum arm_fp_model
+{
+  ARM_FP_MODEL_UNKNOWN,
+  /* FPA model (Hardware or software).  */
+  ARM_FP_MODEL_FPA,
+  /* Cirrus Maverick floating point model.  */
+  ARM_FP_MODEL_MAVERICK,
+  /* VFP floating point model.  */
+  ARM_FP_MODEL_VFP
+};
+
+enum vfp_reg_type
+{
+  VFP_NONE = 0,
+  VFP_REG_D16,
+  VFP_REG_D32,
+  VFP_REG_SINGLE
+};
+
+extern const struct arm_fpu_desc
+{
+  const char *name;
+  enum arm_fp_model model;
+  int rev;
+  enum vfp_reg_type regs;
+  int neon;
+  int fp16;
+} *arm_fpu_desc;
+
+/* Which floating point hardware to schedule for.  */
+extern int arm_fpu_attr;
+
+enum float_abi_type
+{
+  ARM_FLOAT_ABI_SOFT,
+  ARM_FLOAT_ABI_SOFTFP,
+  ARM_FLOAT_ABI_HARD
+};
+
+extern enum float_abi_type arm_float_abi;
+
+#ifndef TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
+#endif
+
+/* Which __fp16 format to use.
+   The enumeration values correspond to the numbering for the
+   Tag_ABI_FP_16bit_format attribute.
+ */
+enum arm_fp16_format_type
+{
+  ARM_FP16_FORMAT_NONE = 0,
+  ARM_FP16_FORMAT_IEEE = 1,
+  ARM_FP16_FORMAT_ALTERNATIVE = 2
+};
+
+extern enum arm_fp16_format_type arm_fp16_format;
+#define LARGEST_EXPONENT_IS_NORMAL(bits) \
+    ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
+
+/* Which ABI to use.  */
+enum arm_abi_type
+{
+  ARM_ABI_APCS,
+  ARM_ABI_ATPCS,
+  ARM_ABI_AAPCS,
+  ARM_ABI_IWMMXT,
+  ARM_ABI_AAPCS_LINUX
+};
+
+extern enum arm_abi_type arm_abi;
+
+#ifndef ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_APCS
+#endif
+
+/* Which thread pointer access sequence to use.  */
+enum arm_tp_type {
+  TP_AUTO,
+  TP_SOFT,
+  TP_CP15
+};
+
+extern enum arm_tp_type target_thread_pointer;
+
+/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
+extern int arm_arch3m;
+
+/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
+extern int arm_arch4;
+
+/* Nonzero if this chip supports the ARM Architecture 4T extensions.  */
+extern int arm_arch4t;
+
+/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
+extern int arm_arch5;
+
+/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
+extern int arm_arch5e;
+
+/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
+extern int arm_arch6;
+
+/* Nonzero if this chip supports the ARM Architecture 6k extensions.  */
+extern int arm_arch6k;
+
+/* Nonzero if this chip supports the ARM Architecture 7 extensions.  */
+extern int arm_arch7;
+
+/* Nonzero if instructions not present in the 'M' profile can be used.  */
+extern int arm_arch_notm;
+
+/* Nonzero if instructions present in ARMv7E-M can be used.  */
+extern int arm_arch7em;
+
+/* Nonzero if this chip can benefit from load scheduling.  */
+extern int arm_ld_sched;
+
+/* Nonzero if generating Thumb code, either Thumb-1 or Thumb-2.  */
+extern int thumb_code;
+
+/* Nonzero if generating Thumb-1 code.  */
+extern int thumb1_code;
+
+/* Nonzero if this chip is a StrongARM.  */
+extern int arm_tune_strongarm;
+
+/* Nonzero if this chip is a Cirrus variant.  */
+extern int arm_arch_cirrus;
+
+/* Nonzero if this chip supports Intel XScale with Wireless MMX technology.  */
+extern int arm_arch_iwmmxt;
+
+/* Nonzero if this chip is an XScale.  */
+extern int arm_arch_xscale;
+
+/* Nonzero if tuning for XScale.  */
+extern int arm_tune_xscale;
+
+/* Nonzero if tuning for stores via the write buffer.  */
+extern int arm_tune_wbuf;
+
+/* Nonzero if tuning for Cortex-A9.  */
+extern int arm_tune_cortex_a9;
+
+/* Nonzero if we should define __THUMB_INTERWORK__ in the
+   preprocessor.
+   XXX This is a bit of a hack, it's intended to help work around
+   problems in GLD which doesn't understand that armv5t code is
+   interworking clean.  */
+extern int arm_cpp_interwork;
+
+/* Nonzero if chip supports Thumb 2.  */
+extern int arm_arch_thumb2;
+
+/* Nonzero if chip supports integer division instruction.  */
+extern int arm_arch_hwdiv;
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT  (MASK_APCS_FRAME)
+#endif
+
+/* Nonzero if PIC code requires explicit qualifiers to generate
+   PLT and GOT relocs rather than the assembler doing so implicitly.
+   Subtargets can override these if required.  */
+#ifndef NEED_GOT_RELOC
+#define NEED_GOT_RELOC	0
+#endif
+#ifndef NEED_PLT_RELOC
+#define NEED_PLT_RELOC	0
+#endif
+
+/* Nonzero if we need to refer to the GOT with a PC-relative
+   offset.  In other words, generate
+
+   .word	_GLOBAL_OFFSET_TABLE_ - [. - (.Lxx + 8)]
+
+   rather than
+
+   .word	_GLOBAL_OFFSET_TABLE_ - (.Lxx + 8)
+
+   The default is true, which matches NetBSD.  Subtargets can
+   override this if required.  */
+#ifndef GOT_PCREL
+#define GOT_PCREL   1
+#endif
+
+/* Target machine storage Layout.  */
+
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+/* It is far faster to zero extend chars than to sign extend them */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)      	\
+    {						\
+      if (MODE == QImode)			\
+	UNSIGNEDP = 1;				\
+      else if (MODE == HImode)			\
+	UNSIGNEDP = 1;				\
+      (MODE) = SImode;				\
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   Most ARM processors are run in little endian mode, so that is the default.
+   If you want to have it run-time selectable, change the definition in a
+   cover file to be TARGET_BIG_ENDIAN.  */
+#define BYTES_BIG_ENDIAN  (TARGET_BIG_END != 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.
+   This is always false, even when in big-endian mode.  */
+#define WORDS_BIG_ENDIAN  (BYTES_BIG_ENDIAN && ! TARGET_LITTLE_WORDS)
+
+/* Define this if most significant word of doubles is the lowest numbered.
+   The rules are different based on whether or not we use FPA-format,
+   VFP-format or some other floating point co-processor's format doubles.  */
+#define FLOAT_WORDS_BIG_ENDIAN (arm_float_words_big_endian ())
+
+#define UNITS_PER_WORD	4
+
+/* True if natural alignment is used for doubleword types.  */
+#define ARM_DOUBLEWORD_ALIGN	TARGET_AAPCS_BASED
+
+#define DOUBLEWORD_ALIGNMENT 64
+
+#define PARM_BOUNDARY  	32
+
+#define STACK_BOUNDARY  (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32)
+
+#define PREFERRED_STACK_BOUNDARY \
+    (arm_abi == ARM_ABI_ATPCS ? 64 : STACK_BOUNDARY)
+
+#define FUNCTION_BOUNDARY  ((TARGET_THUMB && optimize_size) ? 16 : 32)
+
+/* The lowest bit is used to indicate Thumb-mode functions, so the
+   vbit must go into the delta field of pointers to member
+   functions.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+
+#define EMPTY_FIELD_BOUNDARY  32
+
+#define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32)
+
+/* XXX Blah -- this macro is used directly by libobjc.  Since it
+   supports no vector modes, cut out the complexity and fall back
+   on BIGGEST_FIELD_ALIGNMENT.  */
+#ifdef IN_TARGET_LIBS
+#define BIGGEST_FIELD_ALIGNMENT 64
+#endif
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT_FACTOR (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2)
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)				\
+   ((TREE_CODE (EXP) == STRING_CST				\
+     && !optimize_size						\
+     && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR)	\
+    ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN))
+
+/* Align definitions of arrays, unions and structures so that
+   initializations and copies can be made more efficient.  This is not
+   ABI-changing, so it only affects places where we can see the
+   definition. Increasing the alignment tends to introduce padding,
+   so don't do this when optimizing for size/conserving stack space. */
+#define ARM_EXPAND_ALIGNMENT(COND, EXP, ALIGN)				\
+  (((COND) && ((ALIGN) < BITS_PER_WORD)					\
+    && (TREE_CODE (EXP) == ARRAY_TYPE					\
+	|| TREE_CODE (EXP) == UNION_TYPE				\
+	|| TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Align global data. */
+#define DATA_ALIGNMENT(EXP, ALIGN)			\
+  ARM_EXPAND_ALIGNMENT(!optimize_size, EXP, ALIGN)
+
+/* Similarly, make sure that objects on the stack are sensibly aligned.  */
+#define LOCAL_ALIGNMENT(EXP, ALIGN)				\
+  ARM_EXPAND_ALIGNMENT(!flag_conserve_stack, EXP, ALIGN)
+
+/* Setting STRUCTURE_SIZE_BOUNDARY to 32 produces more efficient code, but the
+   value set in previous versions of this toolchain was 8, which produces more
+   compact structures.  The command line option -mstructure_size_boundary=<n>
+   can be used to change this value.  For compatibility with the ARM SDK
+   however the value should be left at 32.  ARM SDT Reference Manual (ARM DUI
+   0020D) page 2-20 says "Structures are aligned on word boundaries".
+   The AAPCS specifies a value of 8.  */
+#define STRUCTURE_SIZE_BOUNDARY arm_structure_size_boundary
+extern int arm_structure_size_boundary;
+
+/* This is the value used to initialize arm_structure_size_boundary.  If a
+   particular arm target wants to change the default value it should change
+   the definition of this macro, not STRUCTURE_SIZE_BOUNDARY.  See netbsd.h
+   for an example of this.  */
+#ifndef DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 32
+#endif
+
+/* Nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* wchar_t is unsigned under the AAPCS.  */
+#ifndef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "int")
+
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef SIZE_TYPE
+#define SIZE_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long unsigned int")
+#endif
+
+#ifndef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int")
+#endif
+
+/* AAPCS requires that structure alignment is affected by bitfields.  */
+#ifndef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS TARGET_AAPCS_BASED
+#endif
+
+
+/* Standard register usage.  */
+
+/* Register allocation in ARM Procedure Call Standard (as used on RISCiX):
+   (S - saved over call).
+
+	r0	   *	argument word/integer result
+	r1-r3		argument word
+
+	r4-r8	     S	register variable
+	r9	     S	(rfp) register variable (real frame pointer)
+
+	r10  	   F S	(sl) stack limit (used by -mapcs-stack-check)
+	r11 	   F S	(fp) argument pointer
+	r12		(ip) temp workspace
+	r13  	   F S	(sp) lower end of current stack frame
+	r14		(lr) link address/workspace
+	r15	   F	(pc) program counter
+
+	f0		floating point result
+	f1-f3		floating point scratch
+
+	f4-f7	     S	floating point variable
+
+	cc		This is NOT a real register, but is used internally
+	                to represent things that use or set the condition
+			codes.
+	sfp             This isn't either.  It is used during rtl generation
+	                since the offset between the frame pointer and the
+			auto's isn't known until after register allocation.
+	afp		Nor this, we only need this because of non-local
+	                goto.  Without it fp appears to be used and the
+			elimination code won't get rid of sfp.  It tracks
+			fp exactly at all times.
+
+   *: See TARGET_CONDITIONAL_REGISTER_USAGE  */
+
+/*
+  	mvf0		Cirrus floating point result
+	mvf1-mvf3	Cirrus floating point scratch
+	mvf4-mvf15   S	Cirrus floating point variable.  */
+
+/*	s0-s15		VFP scratch (aka d0-d7).
+	s16-s31	      S	VFP variable (aka d8-d15).
+	vfpcc		Not a real register.  Represents the VFP condition
+			code flags.  */
+
+/* The stack backtrace structure is as follows:
+  fp points to here:  |  save code pointer  |      [fp]
+                      |  return link value  |      [fp, #-4]
+                      |  return sp value    |      [fp, #-8]
+                      |  return fp value    |      [fp, #-12]
+                     [|  saved r10 value    |]
+                     [|  saved r9 value     |]
+                     [|  saved r8 value     |]
+                     [|  saved r7 value     |]
+                     [|  saved r6 value     |]
+                     [|  saved r5 value     |]
+                     [|  saved r4 value     |]
+                     [|  saved r3 value     |]
+                     [|  saved r2 value     |]
+                     [|  saved r1 value     |]
+                     [|  saved r0 value     |]
+                     [|  saved f7 value     |]     three words
+                     [|  saved f6 value     |]     three words
+                     [|  saved f5 value     |]     three words
+                     [|  saved f4 value     |]     three words
+  r0-r3 are not normally saved in a C function.  */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS \
+{                       \
+  0,0,0,0,0,0,0,0,	\
+  0,0,0,0,0,1,0,1,	\
+  0,0,0,0,0,0,0,0,	\
+  1,1,1,		\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,		\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1			\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.
+   The CC is not preserved over function calls on the ARM 6, so it is
+   easier to assume this for all.  SFP is preserved, since FP is.  */
+#define CALL_USED_REGISTERS  \
+{                            \
+  1,1,1,1,0,0,0,0,	     \
+  0,0,0,0,1,1,1,1,	     \
+  1,1,1,1,0,0,0,0,	     \
+  1,1,1,		     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,		     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1			     \
+}
+
+#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#endif
+
+/* These are a couple of extensions to the formats accepted
+   by asm_fprintf:
+     %@ prints out ASM_COMMENT_START
+     %r prints out REGISTER_PREFIX reg_names[arg]  */
+#define ASM_FPRINTF_EXTENSIONS(FILE, ARGS, P)		\
+  case '@':						\
+    fputs (ASM_COMMENT_START, FILE);			\
+    break;						\
+							\
+  case 'r':						\
+    fputs (REGISTER_PREFIX, FILE);			\
+    fputs (reg_names [va_arg (ARGS, int)], FILE);	\
+    break;
+
+/* Round X up to the nearest word.  */
+#define ROUND_UP_WORD(X) (((X) + 3) & ~3)
+
+/* Convert fron bytes to ints.  */
+#define ARM_NUM_INTS(X) (((X) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The number of (integer) registers required to hold a quantity of type MODE.
+   Also used for VFP registers.  */
+#define ARM_NUM_REGS(MODE)				\
+  ARM_NUM_INTS (GET_MODE_SIZE (MODE))
+
+/* The number of (integer) registers required to hold a quantity of TYPE MODE.  */
+#define ARM_NUM_REGS2(MODE, TYPE)                   \
+  ARM_NUM_INTS ((MODE) == BLKmode ? 		\
+  int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE))
+
+/* The number of (integer) argument register available.  */
+#define NUM_ARG_REGS		4
+
+/* And similarly for the VFP.  */
+#define NUM_VFP_ARG_REGS	16
+
+/* Return the register number of the N'th (integer) argument.  */
+#define ARG_REGISTER(N) 	(N - 1)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* The number of the last argument register.  */
+#define LAST_ARG_REGNUM 	ARG_REGISTER (NUM_ARG_REGS)
+
+/* The numbers of the Thumb register ranges.  */
+#define FIRST_LO_REGNUM  	0
+#define LAST_LO_REGNUM  	7
+#define FIRST_HI_REGNUM		8
+#define LAST_HI_REGNUM		11
+
+/* Overridden by config/arm/bpabi.h.  */
+#ifndef ARM_UNWIND_INFO
+#define ARM_UNWIND_INFO  0
+#endif
+
+/* Use r0 and r1 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? N : INVALID_REGNUM)
+
+/* The register that holds the return address in exception handlers.  */
+#define ARM_EH_STACKADJ_REGNUM	2
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)
+
+/* The native (Norcroft) Pascal compiler for the ARM passes the static chain
+   as an invisible last argument (possible since varargs don't exist in
+   Pascal), so the following is not true.  */
+#define STATIC_CHAIN_REGNUM	12
+
+/* Define this to be where the real frame pointer is if it is not possible to
+   work out the offset between the frame pointer and the automatic variables
+   until after register allocation has taken place.  FRAME_POINTER_REGNUM
+   should point to a special register that we will make sure is eliminated.
+
+   For the Thumb we have another problem.  The TPCS defines the frame pointer
+   as r11, and GCC believes that it is always possible to use the frame pointer
+   as base register for addressing purposes.  (See comments in
+   find_reloads_address()).  But - the Thumb does not allow high registers,
+   including r11, to be used as base address registers.  Hence our problem.
+
+   The solution used here, and in the old thumb port is to use r7 instead of
+   r11 as the hard frame pointer and to have special code to generate
+   backtrace structures on the stack (if required to do so via a command line
+   option) using r11.  This is the only 'user visible' use of r11 as a frame
+   pointer.  */
+#define ARM_HARD_FRAME_POINTER_REGNUM	11
+#define THUMB_HARD_FRAME_POINTER_REGNUM	 7
+
+#define HARD_FRAME_POINTER_REGNUM		\
+  (TARGET_ARM					\
+   ? ARM_HARD_FRAME_POINTER_REGNUM		\
+   : THUMB_HARD_FRAME_POINTER_REGNUM)
+
+#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
+#define HARD_FRAME_POINTER_IS_ARG_POINTER 0
+
+#define FP_REGNUM	                HARD_FRAME_POINTER_REGNUM
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM	SP_REGNUM
+
+/* ARM floating pointer registers.  */
+#define FIRST_FPA_REGNUM 	16
+#define LAST_FPA_REGNUM  	23
+#define IS_FPA_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_FPA_REGNUM) && ((REGNUM) <= LAST_FPA_REGNUM))
+
+#define FIRST_IWMMXT_GR_REGNUM	43
+#define LAST_IWMMXT_GR_REGNUM	46
+#define FIRST_IWMMXT_REGNUM	47
+#define LAST_IWMMXT_REGNUM	62
+#define IS_IWMMXT_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM))
+#define IS_IWMMXT_GR_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM))
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM	25
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	26
+
+#define FIRST_CIRRUS_FP_REGNUM	27
+#define LAST_CIRRUS_FP_REGNUM	42
+#define IS_CIRRUS_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_CIRRUS_FP_REGNUM) && ((REGNUM) <= LAST_CIRRUS_FP_REGNUM))
+
+#define FIRST_VFP_REGNUM	63
+#define D7_VFP_REGNUM		78  /* Registers 77 and 78 == VFP reg D7.  */
+#define LAST_VFP_REGNUM	\
+  (TARGET_VFPD32 ? LAST_HI_VFP_REGNUM : LAST_LO_VFP_REGNUM)
+
+#define IS_VFP_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_VFP_REGNUM) && ((REGNUM) <= LAST_VFP_REGNUM))
+
+/* VFP registers are split into two types: those defined by VFP versions < 3
+   have D registers overlaid on consecutive pairs of S registers. VFP version 3
+   defines 16 new D registers (d16-d31) which, for simplicity and correctness
+   in various parts of the backend, we implement as "fake" single-precision
+   registers (which would be S32-S63, but cannot be used in that way).  The
+   following macros define these ranges of registers.  */
+#define LAST_LO_VFP_REGNUM	94
+#define FIRST_HI_VFP_REGNUM	95
+#define LAST_HI_VFP_REGNUM	126
+
+#define VFP_REGNO_OK_FOR_SINGLE(REGNUM) \
+  ((REGNUM) <= LAST_LO_VFP_REGNUM)
+
+/* DFmode values are only valid in even register pairs.  */
+#define VFP_REGNO_OK_FOR_DOUBLE(REGNUM) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 1) == 0)
+
+/* Neon Quad values must start at a multiple of four registers.  */
+#define NEON_REGNO_OK_FOR_QUAD(REGNUM) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0)
+
+/* Neon structures of vectors must be in even register pairs and there
+   must be enough registers available.  Because of various patterns
+   requiring quad registers, we require them to start at a multiple of
+   four.  */
+#define NEON_REGNO_OK_FOR_NREGS(REGNUM, N) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \
+   && (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1))
+
+/* The number of hard registers is 16 ARM + 8 FPA + 1 CC + 1 SFP + 1 AFP.  */
+/* + 16 Cirrus registers take us up to 43.  */
+/* Intel Wireless MMX Technology registers add 16 + 4 more.  */
+/* VFP (VFP3) adds 32 (64) + 1 more.  */
+#define FIRST_PSEUDO_REGISTER   128
+
+#define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO)
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may be accessed
+   via the stack pointer) in functions that seem suitable.
+   If we have to have a frame pointer we might as well make use of it.
+   APCS says that the frame pointer does not need to be pushed in leaf
+   functions, or simple tail call functions.  */
+
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the ARM regs are UNITS_PER_WORD bits wide; FPA regs can hold any FP
+   mode.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)  	\
+  ((TARGET_32BIT			\
+    && REGNO >= FIRST_FPA_REGNUM	\
+    && REGNO != FRAME_POINTER_REGNUM	\
+    && REGNO != ARG_POINTER_REGNUM)	\
+    && !IS_VFP_REGNUM (REGNO)		\
+   ? 1 : ARM_NUM_REGS (MODE))
+
+/* Return true if REGNO is suitable for holding a quantity of type MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  arm_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)  \
+  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+#define VALID_IWMMXT_REG_MODE(MODE) \
+ (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
+
+/* Modes valid for Neon D registers.  */
+#define VALID_NEON_DREG_MODE(MODE) \
+  ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
+   || (MODE) == V2SFmode || (MODE) == DImode)
+
+/* Modes valid for Neon Q registers.  */
+#define VALID_NEON_QREG_MODE(MODE) \
+  ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
+   || (MODE) == V4SFmode || (MODE) == V2DImode)
+
+/* Structure modes valid for Neon registers.  */
+#define VALID_NEON_STRUCT_MODE(MODE) \
+  ((MODE) == TImode || (MODE) == EImode || (MODE) == OImode \
+   || (MODE) == CImode || (MODE) == XImode)
+
+/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
+extern int arm_regs_in_sequence[];
+
+/* The order in which register should be allocated.  It is good to use ip
+   since no saving is required (though calls clobber it) and it never contains
+   function parameters.  It is quite good to use lr since other calls may
+   clobber it anyway.  Allocate r0 through r3 in reverse order since r3 is
+   least likely to contain a function parameter; in addition results are
+   returned in r0.
+   For VFP/VFPv3, allocate D16-D31 first, then caller-saved registers (D0-D7),
+   then D8-D15.  The reason for doing this is to attempt to reduce register
+   pressure when both single- and double-precision registers are used in a
+   function.  */
+
+#define REG_ALLOC_ORDER				\
+{						\
+     3,  2,  1,  0, 12, 14,  4,  5,		\
+     6,  7,  8, 10,  9, 11, 13, 15,		\
+    16, 17, 18, 19, 20, 21, 22, 23,		\
+    27, 28, 29, 30, 31, 32, 33, 34,		\
+    35, 36, 37, 38, 39, 40, 41, 42,		\
+    43, 44, 45, 46, 47, 48, 49, 50,		\
+    51, 52, 53, 54, 55, 56, 57, 58,		\
+    59, 60, 61, 62,				\
+    24, 25, 26,					\
+    95,  96,  97,  98,  99, 100, 101, 102,	\
+   103, 104, 105, 106, 107, 108, 109, 110,	\
+   111, 112, 113, 114, 115, 116, 117, 118,	\
+   119, 120, 121, 122, 123, 124, 125, 126,	\
+    78,  77,  76,  75,  74,  73,  72,  71,	\
+    70,  69,  68,  67,  66,  65,  64,  63,	\
+    79,  80,  81,  82,  83,  84,  85,  86,	\
+    87,  88,  89,  90,  91,  92,  93,  94,	\
+   127						\
+}
+
+/* Use different register alloc ordering for Thumb.  */
+#define ADJUST_REG_ALLOC_ORDER arm_order_regs_for_local_alloc ()
+
+/* Tell IRA to use the order we define rather than messing it up with its
+   own cost calculations.  */
+#define HONOR_REG_ALLOC_ORDER
+
+/* Interrupt functions can only use registers that have already been
+   saved by the prologue, even if they would normally be
+   call-clobbered.  */
+#define HARD_REGNO_RENAME_OK(SRC, DST)					\
+	(! IS_INTERRUPT (cfun->machine->func_type) ||			\
+	 df_regs_ever_live_p (DST))
+
+/* Register and constant classes.  */
+
+/* Register classes: used to be simple, just all ARM regs or all FPA regs
+   Now that the Thumb is involved it has become more complicated.  */
+enum reg_class
+{
+  NO_REGS,
+  FPA_REGS,
+  CIRRUS_REGS,
+  VFP_D0_D7_REGS,
+  VFP_LO_REGS,
+  VFP_HI_REGS,
+  VFP_REGS,
+  IWMMXT_GR_REGS,
+  IWMMXT_REGS,
+  LO_REGS,
+  STACK_REG,
+  BASE_REGS,
+  HI_REGS,
+  CC_REG,
+  VFPCC_REG,
+  GENERAL_REGS,
+  CORE_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES  \
+{			\
+  "NO_REGS",		\
+  "FPA_REGS",		\
+  "CIRRUS_REGS",	\
+  "VFP_D0_D7_REGS",	\
+  "VFP_LO_REGS",	\
+  "VFP_HI_REGS",	\
+  "VFP_REGS",		\
+  "IWMMXT_GR_REGS",	\
+  "IWMMXT_REGS",	\
+  "LO_REGS",		\
+  "STACK_REG",		\
+  "BASE_REGS",		\
+  "HI_REGS",		\
+  "CC_REG",		\
+  "VFPCC_REG",		\
+  "GENERAL_REGS",	\
+  "CORE_REGS",		\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */	\
+  { 0x00FF0000, 0x00000000, 0x00000000, 0x00000000 }, /* FPA_REGS */	\
+  { 0xF8000000, 0x000007FF, 0x00000000, 0x00000000 }, /* CIRRUS_REGS */	\
+  { 0x00000000, 0x80000000, 0x00007FFF, 0x00000000 }, /* VFP_D0_D7_REGS  */ \
+  { 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000 }, /* VFP_LO_REGS  */ \
+  { 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_HI_REGS  */ \
+  { 0x00000000, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF }, /* VFP_REGS  */	\
+  { 0x00000000, 0x00007800, 0x00000000, 0x00000000 }, /* IWMMXT_GR_REGS */ \
+  { 0x00000000, 0x7FFF8000, 0x00000000, 0x00000000 }, /* IWMMXT_REGS */	\
+  { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */	\
+  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */	\
+  { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */	\
+  { 0x0000DF00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */	\
+  { 0x01000000, 0x00000000, 0x00000000, 0x00000000 }, /* CC_REG */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x80000000 }, /* VFPCC_REG */	\
+  { 0x0000DFFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
+  { 0x0000FFFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */	\
+  { 0xFAFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF }  /* ALL_REGS */	\
+}
+
+/* Any of the VFP register classes.  */
+#define IS_VFP_CLASS(X) \
+  ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \
+   || (X) == VFP_HI_REGS || (X) == VFP_REGS)
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+#define REGNO_REG_CLASS(REGNO)  arm_regno_class (REGNO)
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						     \
+{									     \
+  GENERAL_REGS, FPA_REGS, CIRRUS_REGS, VFP_REGS, IWMMXT_GR_REGS, IWMMXT_REGS,\
+  LIM_REG_CLASSES							     \
+}
+
+/* FPA registers can't do subreg as all values are reformatted to internal
+   precision.  VFP registers may only be accessed in the mode they
+   were set.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)		\
+   ? reg_classes_intersect_p (FPA_REGS, (CLASS))	\
+     || reg_classes_intersect_p (VFP_REGS, (CLASS))	\
+   : 0)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS  (TARGET_THUMB1 ? LO_REGS : GENERAL_REGS)
+#define BASE_REG_CLASS   (TARGET_THUMB1 ? LO_REGS : CORE_REGS)
+
+/* For the Thumb the high registers cannot be used as base registers
+   when addressing quantities in QI or HI mode; if we don't know the
+   mode, then we must be conservative.  */
+#define MODE_BASE_REG_CLASS(MODE)					\
+    (TARGET_32BIT ? CORE_REGS :					\
+     (((MODE) == SImode) ? BASE_REGS : LO_REGS))
+
+/* For Thumb we can not support SP+reg addressing, so we return LO_REGS
+   instead of BASE_REGS.  */
+#define MODE_BASE_REG_REG_CLASS(MODE) BASE_REG_CLASS
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  arm_small_register_classes_for_mode_p 
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS, but for the Thumb core registers and
+   immediate constants we prefer a LO_REGS class or a subset.  */
+#define PREFERRED_RELOAD_CLASS(X, CLASS)		\
+  (TARGET_32BIT ? (CLASS) :				\
+   ((CLASS) == GENERAL_REGS || (CLASS) == HI_REGS	\
+    || (CLASS) == NO_REGS || (CLASS) == STACK_REG	\
+   ? LO_REGS : (CLASS)))
+
+/* Must leave BASE_REGS reloads alone */
+#define THUMB_SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  ((CLASS) != LO_REGS && (CLASS) != BASE_REGS				\
+   ? ((true_regnum (X) == -1 ? LO_REGS					\
+       : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS	\
+       : NO_REGS)) 							\
+   : NO_REGS)
+
+#define THUMB_SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  ((CLASS) != LO_REGS && (CLASS) != BASE_REGS				\
+   ? ((true_regnum (X) == -1 ? LO_REGS					\
+       : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS	\
+       : NO_REGS)) 							\
+   : NO_REGS)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
+  ((TARGET_VFP && TARGET_HARD_FLOAT				\
+    && IS_VFP_CLASS (CLASS))					\
+   ? coproc_secondary_reload_class (MODE, X, FALSE)		\
+   : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS)			\
+   ? coproc_secondary_reload_class (MODE, X, TRUE)		\
+   : TARGET_32BIT						\
+   ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
+    ? GENERAL_REGS : NO_REGS)					\
+   : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
+
+/* If we need to load shorts byte-at-a-time, then we need a scratch.  */
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
+  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
+  ((TARGET_VFP && TARGET_HARD_FLOAT				\
+    && IS_VFP_CLASS (CLASS))					\
+    ? coproc_secondary_reload_class (MODE, X, FALSE) :		\
+    (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ?			\
+    coproc_secondary_reload_class (MODE, X, TRUE) :		\
+  /* Cannot load constants into Cirrus registers.  */		\
+   (TARGET_MAVERICK && TARGET_HARD_FLOAT			\
+     && (CLASS) == CIRRUS_REGS					\
+     && (CONSTANT_P (X) || GET_CODE (X) == SYMBOL_REF))		\
+    ? GENERAL_REGS :						\
+  (TARGET_32BIT ?						\
+   (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS)	\
+      && CONSTANT_P (X))					\
+   ? GENERAL_REGS :						\
+   (((MODE) == HImode && ! arm_arch4				\
+     && (GET_CODE (X) == MEM					\
+	 || ((GET_CODE (X) == REG || GET_CODE (X) == SUBREG)	\
+	     && true_regnum (X) == -1)))			\
+    ? GENERAL_REGS : NO_REGS)					\
+   : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.
+
+   For the ARM, we wish to handle large displacements off a base
+   register by splitting the addend across a MOV and the mem insn.
+   This can cut the number of reloads needed.  */
+#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN)	   \
+  do									   \
+    {									   \
+      if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND))	   \
+	goto WIN;							   \
+    }									   \
+  while (0)
+
+/* XXX If an HImode FP+large_offset address is converted to an HImode
+   SP+large_offset address, then reload won't know how to fix it.  It sees
+   only that SP isn't valid for HImode, and so reloads the SP into an index
+   register, but the resulting address is still invalid because the offset
+   is too big.  We fix it here instead by reloading the entire address.  */
+/* We could probably achieve better results by defining PROMOTE_MODE to help
+   cope with the variances between the Thumb's signed and unsigned byte and
+   halfword load instructions.  */
+/* ??? This should be safe for thumb2, but we may be able to do better.  */
+#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)     \
+do {									      \
+  rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \
+  if (new_x)								      \
+    {									      \
+      X = new_x;							      \
+      goto WIN;								      \
+    }									      \
+} while (0)
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)   \
+  if (TARGET_ARM)							   \
+    ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \
+  else									   \
+    THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+   ARM regs are UNITS_PER_WORD bits while FPA regs can hold any FP mode */
+#define CLASS_MAX_NREGS(CLASS, MODE)  \
+  (((CLASS) == FPA_REGS || (CLASS) == CIRRUS_REGS) ? 1 : ARM_NUM_REGS (MODE))
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.  */
+
+/* Moves between FPA_REGS and GENERAL_REGS are two memory insns.
+   Moves between VFP_REGS and GENERAL_REGS are a single insn, but
+   it is typically more expensive than a single memory access.  We set
+   the cost to less than two memory accesses so that floating
+   point to integer conversion does not go through memory.  */
+#define REGISTER_MOVE_COST(MODE, FROM, TO)		\
+  (TARGET_32BIT ?						\
+   ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 :	\
+    (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 :	\
+    IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 15 :	\
+    !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 15 :	\
+    (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 :  \
+    (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 :  \
+    (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 :  \
+    (FROM) == CIRRUS_REGS && (TO) != CIRRUS_REGS ? 20 :	\
+    (FROM) != CIRRUS_REGS && (TO) == CIRRUS_REGS ? 20 :	\
+   2)							\
+   :							\
+   ((FROM) == HI_REGS || (TO) == HI_REGS) ? 4 : 2)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD  1
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* The amount of scratch space needed by _interwork_{r7,r11}_call_via_rN().
+   When present, it is one word in size, and sits at the top of the frame,
+   between the soft frame pointer and either r7 or r11.
+
+   We only need _interwork_rM_call_via_rN() for -mcaller-super-interworking,
+   and only then if some outgoing arguments are passed on the stack.  It would
+   be tempting to also check whether the stack arguments are passed by indirect
+   calls, but there seems to be no reason in principle why a post-reload pass
+   couldn't convert a direct call into an indirect one.  */
+#define CALLER_INTERWORKING_SLOT_SIZE			\
+  (TARGET_CALLER_INTERWORKING				\
+   && crtl->outgoing_args_size != 0		\
+   ? UNITS_PER_WORD : 0)
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.  */
+/* The push insns do not do this rounding implicitly.
+   So don't define this.  */
+/* #define PUSH_ROUNDING(NPUSHED)  ROUND_UP_WORD (NPUSHED) */
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  (TARGET_ARM ? 4 : 0)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE)  						\
+  (TARGET_AAPCS_BASED ? aapcs_libcall_value (MODE)			\
+   : (TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_FPA		\
+      && GET_MODE_CLASS (MODE) == MODE_FLOAT)				\
+   ? gen_rtx_REG (MODE, FIRST_FPA_REGNUM)				\
+   : TARGET_32BIT && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK		\
+     && GET_MODE_CLASS (MODE) == MODE_FLOAT				\
+   ? gen_rtx_REG (MODE, FIRST_CIRRUS_FP_REGNUM) 			\
+   : TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (MODE)    	\
+   ? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM) 				\
+   : gen_rtx_REG (MODE, ARG_REGISTER (1)))
+
+/* 1 if REGNO is a possible register number for a function value.  */
+#define FUNCTION_VALUE_REGNO_P(REGNO)				\
+  ((REGNO) == ARG_REGISTER (1)					\
+   || (TARGET_AAPCS_BASED && TARGET_32BIT 			\
+       && TARGET_VFP && TARGET_HARD_FLOAT			\
+       && (REGNO) == FIRST_VFP_REGNUM)				\
+   || (TARGET_32BIT && ((REGNO) == FIRST_CIRRUS_FP_REGNUM)	\
+       && TARGET_HARD_FLOAT_ABI && TARGET_MAVERICK)		\
+   || ((REGNO) == FIRST_IWMMXT_REGNUM && TARGET_IWMMXT_ABI)	\
+   || (TARGET_32BIT && ((REGNO) == FIRST_FPA_REGNUM)		\
+       && TARGET_HARD_FLOAT_ABI && TARGET_FPA))
+
+/* Amount of memory needed for an untyped call to save all possible return
+   registers.  */
+#define APPLY_RESULT_SIZE arm_apply_result_size()
+
+/* Define DEFAULT_PCC_STRUCT_RETURN to 1 if all structure and union return
+   values must be in memory.  On the ARM, they need only do so if larger
+   than a word, or if they contain elements offset from zero in the struct.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* These bits describe the different types of function supported
+   by the ARM backend.  They are exclusive.  i.e. a function cannot be both a
+   normal function and an interworked function, for example.  Knowing the
+   type of a function is important for determining its prologue and
+   epilogue sequences.
+   Note value 7 is currently unassigned.  Also note that the interrupt
+   function types all have bit 2 set, so that they can be tested for easily.
+   Note that 0 is deliberately chosen for ARM_FT_UNKNOWN so that when the
+   machine_function structure is initialized (to zero) func_type will
+   default to unknown.  This will force the first use of arm_current_func_type
+   to call arm_compute_func_type.  */
+#define ARM_FT_UNKNOWN		 0 /* Type has not yet been determined.  */
+#define ARM_FT_NORMAL		 1 /* Your normal, straightforward function.  */
+#define ARM_FT_INTERWORKED	 2 /* A function that supports interworking.  */
+#define ARM_FT_ISR		 4 /* An interrupt service routine.  */
+#define ARM_FT_FIQ		 5 /* A fast interrupt service routine.  */
+#define ARM_FT_EXCEPTION	 6 /* An ARM exception handler (subcase of ISR).  */
+
+#define ARM_FT_TYPE_MASK	((1 << 3) - 1)
+
+/* In addition functions can have several type modifiers,
+   outlined by these bit masks:  */
+#define ARM_FT_INTERRUPT	(1 << 2) /* Note overlap with FT_ISR and above.  */
+#define ARM_FT_NAKED		(1 << 3) /* No prologue or epilogue.  */
+#define ARM_FT_VOLATILE		(1 << 4) /* Does not return.  */
+#define ARM_FT_NESTED		(1 << 5) /* Embedded inside another func.  */
+#define ARM_FT_STACKALIGN	(1 << 6) /* Called with misaligned stack.  */
+
+/* Some macros to test these flags.  */
+#define ARM_FUNC_TYPE(t)	(t & ARM_FT_TYPE_MASK)
+#define IS_INTERRUPT(t)		(t & ARM_FT_INTERRUPT)
+#define IS_VOLATILE(t)     	(t & ARM_FT_VOLATILE)
+#define IS_NAKED(t)        	(t & ARM_FT_NAKED)
+#define IS_NESTED(t)       	(t & ARM_FT_NESTED)
+#define IS_STACKALIGN(t)       	(t & ARM_FT_STACKALIGN)
+
+
+/* Structure used to hold the function stack frame layout.  Offsets are
+   relative to the stack pointer on function entry.  Positive offsets are
+   in the direction of stack growth.
+   Only soft_frame is used in thumb mode.  */
+
+typedef struct GTY(()) arm_stack_offsets
+{
+  int saved_args;	/* ARG_POINTER_REGNUM.  */
+  int frame;		/* ARM_HARD_FRAME_POINTER_REGNUM.  */
+  int saved_regs;
+  int soft_frame;	/* FRAME_POINTER_REGNUM.  */
+  int locals_base;	/* THUMB_HARD_FRAME_POINTER_REGNUM.  */
+  int outgoing_args;	/* STACK_POINTER_REGNUM.  */
+  unsigned int saved_regs_mask;
+}
+arm_stack_offsets;
+
+#ifndef GENERATOR_FILE
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Additional stack adjustment in __builtin_eh_throw.  */
+  rtx eh_epilogue_sp_ofs;
+  /* Records if LR has to be saved for far jumps.  */
+  int far_jump_used;
+  /* Records if ARG_POINTER was ever live.  */
+  int arg_pointer_live;
+  /* Records if the save of LR has been eliminated.  */
+  int lr_save_eliminated;
+  /* The size of the stack frame.  Only valid after reload.  */
+  arm_stack_offsets stack_offsets;
+  /* Records the type of the current function.  */
+  unsigned long func_type;
+  /* Record if the function has a variable argument list.  */
+  int uses_anonymous_args;
+  /* Records if sibcalls are blocked because an argument
+     register is needed to preserve stack alignment.  */
+  int sibcall_blocked;
+  /* The PIC register for this function.  This might be a pseudo.  */
+  rtx pic_reg;
+  /* Labels for per-function Thumb call-via stubs.  One per potential calling
+     register.  We can never call via LR or PC.  We can call via SP if a
+     trampoline happens to be on the top of the stack.  */
+  rtx call_via[14];
+  /* Set to 1 when a return insn is output, this means that the epilogue
+     is not needed.  */
+  int return_used_this_function;
+  /* When outputting Thumb-1 code, record the last insn that provides
+     information about condition codes, and the comparison operands.  */
+  rtx thumb1_cc_insn;
+  rtx thumb1_cc_op0;
+  rtx thumb1_cc_op1;
+  /* Also record the CC mode that is supported.  */
+  enum machine_mode thumb1_cc_mode;
+}
+machine_function;
+#endif
+
+/* As in the machine_function, a global set of call-via labels, for code 
+   that is in text_section.  */
+extern GTY(()) rtx thumb_call_via_label[14];
+
+/* The number of potential ways of assigning to a co-processor.  */
+#define ARM_NUM_COPROC_SLOTS 1
+
+/* Enumeration of procedure calling standard variants.  We don't really 
+   support all of these yet.  */
+enum arm_pcs
+{
+  ARM_PCS_AAPCS,	/* Base standard AAPCS.  */
+  ARM_PCS_AAPCS_VFP,	/* Use VFP registers for floating point values.  */
+  ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors.  */
+  /* This must be the last AAPCS variant.  */
+  ARM_PCS_AAPCS_LOCAL,	/* Private call within this compilation unit.  */
+  ARM_PCS_ATPCS,	/* ATPCS.  */
+  ARM_PCS_APCS,		/* APCS (legacy Linux etc).  */
+  ARM_PCS_UNKNOWN
+};
+
+/* Default procedure calling standard of current compilation unit. */
+extern enum arm_pcs arm_pcs_default;
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  */
+typedef struct
+{
+  /* This is the number of registers of arguments scanned so far.  */
+  int nregs;
+  /* This is the number of iWMMXt register arguments scanned so far.  */
+  int iwmmxt_nregs;
+  int named_count;
+  int nargs;
+  /* Which procedure call variant to use for this call.  */
+  enum arm_pcs pcs_variant;
+
+  /* AAPCS related state tracking.  */
+  int aapcs_arg_processed;  /* No need to lay out this argument again.  */
+  int aapcs_cprc_slot;      /* Index of co-processor rules to handle
+			       this argument, or -1 if using core
+			       registers.  */
+  int aapcs_ncrn;
+  int aapcs_next_ncrn;
+  rtx aapcs_reg;	    /* Register assigned to this argument.  */
+  int aapcs_partial;	    /* How many bytes are passed in regs (if
+			       split between core regs and stack.
+			       Zero otherwise.  */
+  int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS];
+  int can_split;	    /* Argument can be split between core regs
+			       and the stack.  */
+  /* Private data for tracking VFP register allocation */
+  unsigned aapcs_vfp_regs_free;
+  unsigned aapcs_vfp_reg_alloc;
+  int aapcs_vfp_rcount;
+  MACHMODE aapcs_vfp_rmode;
+} CUMULATIVE_ARGS;
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  (arm_pad_arg_upward (MODE, TYPE) ? upward : downward)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (arm_pad_reg_upward (MODE, TYPE, FIRST) ? upward : downward)
+
+/* For AAPCS, padding should never be below the argument. For other ABIs,
+ * mimic the default.  */
+#define PAD_VARARGS_DOWN \
+  ((TARGET_AAPCS_BASED) ? 0 : BYTES_BIG_ENDIAN)
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+   On the ARM, the offset starts at 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  arm_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL))
+
+/* 1 if N is a possible register number for function argument passing.
+   On the ARM, r0-r3 are used to pass args.  */
+#define FUNCTION_ARG_REGNO_P(REGNO)					\
+   (IN_RANGE ((REGNO), 0, 3)						\
+    || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT		\
+	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15))	\
+    || (TARGET_IWMMXT_ABI						\
+	&& IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
+
+
+/* If your target environment doesn't prefix user functions with an
+   underscore, you may wish to re-define this to prevent any conflicts.  */
+#ifndef ARM_MCOUNT_NAME
+#define ARM_MCOUNT_NAME "*mcount"
+#endif
+
+/* Call the function profiler with a given profile label.  The Acorn
+   compiler puts this BEFORE the prolog but gcc puts it afterwards.
+   On the ARM the full profile code will look like:
+	.data
+	LP1
+		.word	0
+	.text
+		mov	ip, lr
+		bl	mcount
+		.word	LP1
+
+   profile_function() in final.c outputs the .data section, FUNCTION_PROFILER
+   will output the .text section.
+
+   The ``mov ip,lr'' seems like a good idea to stick with cc convention.
+   ``prof'' doesn't seem to mind about this!
+
+   Note - this version of the code is designed to work in both ARM and
+   Thumb modes.  */
+#ifndef ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)  	\
+{							\
+  char temp[20];					\
+  rtx sym;						\
+							\
+  asm_fprintf (STREAM, "\tmov\t%r, %r\n\tbl\t",		\
+	   IP_REGNUM, LR_REGNUM);			\
+  assemble_name (STREAM, ARM_MCOUNT_NAME);		\
+  fputc ('\n', STREAM);					\
+  ASM_GENERATE_INTERNAL_LABEL (temp, "LP", LABELNO);	\
+  sym = gen_rtx_SYMBOL_REF (Pmode, temp);		\
+  assemble_aligned_integer (UNITS_PER_WORD, sym);	\
+}
+#endif
+
+#ifdef THUMB_FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM, LABELNO)		\
+  if (TARGET_ARM)					\
+    ARM_FUNCTION_PROFILER (STREAM, LABELNO)		\
+  else							\
+    THUMB_FUNCTION_PROFILER (STREAM, LABELNO)
+#else
+#define FUNCTION_PROFILER(STREAM, LABELNO)		\
+    ARM_FUNCTION_PROFILER (STREAM, LABELNO)
+#endif
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.
+
+   On the ARM, the function epilogue recovers the stack pointer from the
+   frame.  */
+#define EXIT_IGNORE_STACK 1
+
+#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNUM)
+
+/* Determine if the epilogue should be output as RTL.
+   You should override this if you define FUNCTION_EXTRA_EPILOGUE.  */
+#define USE_RETURN_INSN(ISCOND)				\
+  (TARGET_32BIT ? use_return_insn (ISCOND, NULL) : 0)
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have two registers that can be eliminated on the ARM.  First, the
+   arg pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the pseudo frame pointer register can always
+   be eliminated; it is replaced with either the stack or the real frame
+   pointer.  Note we have to use {ARM|THUMB}_HARD_FRAME_POINTER_REGNUM
+   because the definition of HARD_FRAME_POINTER_REGNUM is not a constant.  */
+
+#define ELIMINABLE_REGS						\
+{{ ARG_POINTER_REGNUM,        STACK_POINTER_REGNUM            },\
+ { ARG_POINTER_REGNUM,        FRAME_POINTER_REGNUM            },\
+ { ARG_POINTER_REGNUM,        ARM_HARD_FRAME_POINTER_REGNUM   },\
+ { ARG_POINTER_REGNUM,        THUMB_HARD_FRAME_POINTER_REGNUM },\
+ { FRAME_POINTER_REGNUM,      STACK_POINTER_REGNUM            },\
+ { FRAME_POINTER_REGNUM,      ARM_HARD_FRAME_POINTER_REGNUM   },\
+ { FRAME_POINTER_REGNUM,      THUMB_HARD_FRAME_POINTER_REGNUM }}
+
+/* Define the offset between two registers, one to be eliminated, and the
+   other its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  if (TARGET_ARM)							\
+    (OFFSET) = arm_compute_initial_elimination_offset (FROM, TO);	\
+  else									\
+    (OFFSET) = thumb_compute_initial_elimination_offset (FROM, TO)
+
+/* Special case handling of the location of arguments passed on the stack.  */
+#define DEBUGGER_ARG_OFFSET(value, addr) value ? value : arm_debugger_arg_offset (value, addr)
+
+/* Initialize data used by insn expanders.  This is called from insn_emit,
+   once for every function before code is generated.  */
+#define INIT_EXPANDERS  arm_init_expanders ()
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  (TARGET_32BIT ? 16 : 20)
+
+/* Alignment required for a trampoline in bits.  */
+#define TRAMPOLINE_ALIGNMENT  32
+
+/* Addressing modes, and classification of registers for them.  */
+#define HAVE_POST_INCREMENT   1
+#define HAVE_PRE_INCREMENT    TARGET_32BIT
+#define HAVE_POST_DECREMENT   TARGET_32BIT
+#define HAVE_PRE_DECREMENT    TARGET_32BIT
+#define HAVE_PRE_MODIFY_DISP  TARGET_32BIT
+#define HAVE_POST_MODIFY_DISP TARGET_32BIT
+#define HAVE_PRE_MODIFY_REG   TARGET_32BIT
+#define HAVE_POST_MODIFY_REG  TARGET_32BIT
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define TEST_REGNO(R, TEST, VALUE) \
+  ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE))
+
+/* Don't allow the pc to be used.  */
+#define ARM_REGNO_OK_FOR_BASE_P(REGNO)			\
+  (TEST_REGNO (REGNO, <, PC_REGNUM)			\
+   || TEST_REGNO (REGNO, ==, FRAME_POINTER_REGNUM)	\
+   || TEST_REGNO (REGNO, ==, ARG_POINTER_REGNUM))
+
+#define THUMB1_REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE)		\
+  (TEST_REGNO (REGNO, <=, LAST_LO_REGNUM)			\
+   || (GET_MODE_SIZE (MODE) >= 4				\
+       && TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM)))
+
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE)		\
+  (TARGET_THUMB1					\
+   ? THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO, MODE)	\
+   : ARM_REGNO_OK_FOR_BASE_P (REGNO))
+
+/* Nonzero if X can be the base register in a reg+reg addressing mode.
+   For Thumb, we can not use SP + reg, so reject SP.  */
+#define REGNO_MODE_OK_FOR_REG_BASE_P(X, MODE)	\
+  REGNO_MODE_OK_FOR_BASE_P (X, QImode)
+
+/* For ARM code, we don't care about the mode, but for Thumb, the index
+   must be suitable for use in a QImode load.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO)	\
+  (REGNO_MODE_OK_FOR_BASE_P (REGNO, QImode) \
+   && !TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM))
+
+/* Maximum number of registers that can appear in a valid memory address.
+   Shifts in addresses can't be by a register.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+/* XXX We can address any constant, eventually...  */
+/* ??? Should the TARGET_ARM here also apply to thumb2?  */
+#define CONSTANT_ADDRESS_P(X)  			\
+  (GET_CODE (X) == SYMBOL_REF 			\
+   && (CONSTANT_POOL_ADDRESS_P (X)		\
+       || (TARGET_ARM && optimize > 0 && SYMBOL_REF_FLAG (X))))
+
+/* True if SYMBOL + OFFSET constants must refer to something within
+   SYMBOL's section.  */
+#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0
+
+/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32.  */
+#ifndef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 0
+#endif
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
+
+   On the ARM, allow any integer (invalid ones are removed later by insn
+   patterns), nice doubles and symbol_refs which refer to the function's
+   constant pool XXX.
+
+   When generating pic allow anything.  */
+#define ARM_LEGITIMATE_CONSTANT_P(X)	(flag_pic || ! label_mentioned_p (X))
+
+#define THUMB_LEGITIMATE_CONSTANT_P(X)	\
+ (   GET_CODE (X) == CONST_INT		\
+  || GET_CODE (X) == CONST_DOUBLE	\
+  || CONSTANT_ADDRESS_P (X)		\
+  || flag_pic)
+
+#define LEGITIMATE_CONSTANT_P(X)			\
+  (!arm_cannot_force_const_mem (X)			\
+   && (TARGET_32BIT ? ARM_LEGITIMATE_CONSTANT_P (X)	\
+		    : THUMB_LEGITIMATE_CONSTANT_P (X)))
+
+#ifndef SUBTARGET_NAME_ENCODING_LENGTHS
+#define SUBTARGET_NAME_ENCODING_LENGTHS
+#endif
+
+/* This is a C fragment for the inside of a switch statement.
+   Each case label should return the number of characters to
+   be stripped from the start of a function's name, if that
+   name starts with the indicated character.  */
+#define ARM_NAME_ENCODING_LENGTHS		\
+  case '*':  return 1;				\
+  SUBTARGET_NAME_ENCODING_LENGTHS
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME)		\
+   arm_asm_output_labelref (FILE, NAME)
+
+/* Output IT instructions for conditionally executed Thumb-2 instructions.  */
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
+  if (TARGET_THUMB2)			\
+    thumb2_asm_output_opcode (STREAM);
+
+/* The EABI specifies that constructors should go in .init_array.
+   Other targets use .ctors for compatibility.  */
+#ifndef ARM_EABI_CTORS_SECTION_OP
+#define ARM_EABI_CTORS_SECTION_OP \
+  "\t.section\t.init_array,\"aw\",%init_array"
+#endif
+#ifndef ARM_EABI_DTORS_SECTION_OP
+#define ARM_EABI_DTORS_SECTION_OP \
+  "\t.section\t.fini_array,\"aw\",%fini_array"
+#endif
+#define ARM_CTORS_SECTION_OP \
+  "\t.section\t.ctors,\"aw\",%progbits"
+#define ARM_DTORS_SECTION_OP \
+  "\t.section\t.dtors,\"aw\",%progbits"
+
+/* Define CTORS_SECTION_ASM_OP.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#ifndef IN_LIBGCC2
+# define CTORS_SECTION_ASM_OP \
+   (TARGET_AAPCS_BASED ? ARM_EABI_CTORS_SECTION_OP : ARM_CTORS_SECTION_OP)
+# define DTORS_SECTION_ASM_OP \
+   (TARGET_AAPCS_BASED ? ARM_EABI_DTORS_SECTION_OP : ARM_DTORS_SECTION_OP)
+#else /* !defined (IN_LIBGCC2) */
+/* In libgcc, CTORS_SECTION_ASM_OP must be a compile-time constant,
+   so we cannot use the definition above.  */
+# ifdef __ARM_EABI__
+/* The .ctors section is not part of the EABI, so we do not define
+   CTORS_SECTION_ASM_OP when in libgcc; that prevents crtstuff
+   from trying to use it.  We do define it when doing normal
+   compilation, as .init_array can be used instead of .ctors.  */
+/* There is no need to emit begin or end markers when using
+   init_array; the dynamic linker will compute the size of the
+   array itself based on special symbols created by the static
+   linker.  However, we do need to arrange to set up
+   exception-handling here.  */
+#   define CTOR_LIST_BEGIN asm (ARM_EABI_CTORS_SECTION_OP)
+#   define CTOR_LIST_END /* empty */
+#   define DTOR_LIST_BEGIN asm (ARM_EABI_DTORS_SECTION_OP)
+#   define DTOR_LIST_END /* empty */
+# else /* !defined (__ARM_EABI__) */
+#   define CTORS_SECTION_ASM_OP ARM_CTORS_SECTION_OP
+#   define DTORS_SECTION_ASM_OP ARM_DTORS_SECTION_OP
+# endif /* !defined (__ARM_EABI__) */
+#endif /* !defined (IN_LIBCC2) */
+
+/* True if the operating system can merge entities with vague linkage
+   (e.g., symbols in COMDAT group) during dynamic linking.  */
+#ifndef TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
+#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P true
+#endif
+
+#define ARM_OUTPUT_FN_UNWIND(F, PROLOGUE) arm_output_fn_unwind (F, PROLOGUE)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+   Thumb-2 has the same restrictions as arm.  */
+#ifndef REG_OK_STRICT
+
+#define ARM_REG_OK_FOR_BASE_P(X)		\
+  (REGNO (X) <= LAST_ARM_REGNUM			\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+   || REGNO (X) == FRAME_POINTER_REGNUM		\
+   || REGNO (X) == ARG_POINTER_REGNUM)
+
+#define ARM_REG_OK_FOR_INDEX_P(X)		\
+  ((REGNO (X) <= LAST_ARM_REGNUM		\
+    && REGNO (X) != STACK_POINTER_REGNUM)	\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+   || REGNO (X) == FRAME_POINTER_REGNUM		\
+   || REGNO (X) == ARG_POINTER_REGNUM)
+
+#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE)	\
+  (REGNO (X) <= LAST_LO_REGNUM			\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+   || (GET_MODE_SIZE (MODE) >= 4		\
+       && (REGNO (X) == STACK_POINTER_REGNUM	\
+	   || (X) == hard_frame_pointer_rtx	\
+	   || (X) == arg_pointer_rtx)))
+
+#define REG_STRICT_P 0
+
+#else /* REG_OK_STRICT */
+
+#define ARM_REG_OK_FOR_BASE_P(X) 		\
+  ARM_REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#define ARM_REG_OK_FOR_INDEX_P(X) 		\
+  ARM_REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE)	\
+  THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO (X), MODE)
+
+#define REG_STRICT_P 1
+
+#endif /* REG_OK_STRICT */
+
+/* Now define some helpers in terms of the above.  */
+
+#define REG_MODE_OK_FOR_BASE_P(X, MODE)		\
+  (TARGET_THUMB1				\
+   ? THUMB1_REG_MODE_OK_FOR_BASE_P (X, MODE)	\
+   : ARM_REG_OK_FOR_BASE_P (X))
+
+/* For 16-bit Thumb, a valid index register is anything that can be used in
+   a byte load instruction.  */
+#define THUMB1_REG_OK_FOR_INDEX_P(X) \
+  THUMB1_REG_MODE_OK_FOR_BASE_P (X, QImode)
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  On the Thumb, the stack pointer
+   is not suitable.  */
+#define REG_OK_FOR_INDEX_P(X)			\
+  (TARGET_THUMB1				\
+   ? THUMB1_REG_OK_FOR_INDEX_P (X)		\
+   : ARM_REG_OK_FOR_INDEX_P (X))
+
+/* Nonzero if X can be the base register in a reg+reg addressing mode.
+   For Thumb, we can not use SP + reg, so reject SP.  */
+#define REG_MODE_OK_FOR_REG_BASE_P(X, MODE)	\
+  REG_OK_FOR_INDEX_P (X)
+
+#define ARM_BASE_REGISTER_RTX_P(X)  \
+  (GET_CODE (X) == REG && ARM_REG_OK_FOR_BASE_P (X))
+
+#define ARM_INDEX_REGISTER_RTX_P(X)  \
+  (GET_CODE (X) == REG && ARM_REG_OK_FOR_INDEX_P (X))
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2				\
+				 || (TARGET_THUMB1			\
+				     && (optimize_size || flag_pic)))
+
+#define CASE_VECTOR_SHORTEN_MODE(min, max, body)			\
+  (TARGET_THUMB1							\
+   ? (min >= 0 && max < 512						\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode)	\
+      : min >= -256 && max < 256					\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode)	\
+      : min >= 0 && max < 8192						\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode)	\
+      : min >= -4096 && max < 4096					\
+      ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode)	\
+      : SImode)								\
+   : ((min < 0 || max >= 0x2000 || !TARGET_THUMB2) ? SImode		\
+      : (max >= 0x200) ? HImode						\
+      : QImode))
+
+/* signed 'char' is most compatible, but RISC OS wants it unsigned.
+   unsigned is probably best, but may break some code.  */
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR  0
+#endif
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+#undef  MOVE_RATIO
+#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE)						\
+  (TARGET_THUMB ? ZERO_EXTEND :						\
+   ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND			\
+    : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN)))
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 0
+
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Immediate shift counts are truncated by the output routines (or was it
+   the assembler?).  Shift counts in a register are truncated by ARM.  Note
+   that the native compiler puts too large (> 32) immediate shift counts
+   into a register and shifts by the register, letting the ARM decide what
+   to do instead of doing that itself.  */
+/* This is all wrong.  Defining SHIFT_COUNT_TRUNCATED tells combine that
+   code like (X << (Y % 32)) for register X, Y is equivalent to (X << Y).
+   On the arm, Y in a register is used modulo 256 for the shift. Only for
+   rotates is modulo 32 used.  */
+/* #define SHIFT_COUNT_TRUNCATED 1 */
+
+/* All integers have the same format so truncation is easy.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+/* Calling from registers is a massive pain.  */
+#define NO_FUNCTION_CSE 1
+
+/* The machine modes of pointers and functions */
+#define Pmode  SImode
+#define FUNCTION_MODE  Pmode
+
+#define ARM_FRAME_RTX(X)					\
+  (   (X) == frame_pointer_rtx || (X) == stack_pointer_rtx	\
+   || (X) == arg_pointer_rtx)
+
+/* Moves to and from memory are quite expensive */
+#define MEMORY_MOVE_COST(M, CLASS, IN)			\
+  (TARGET_32BIT ? 10 :					\
+   ((GET_MODE_SIZE (M) < 4 ? 8 : 2 * GET_MODE_SIZE (M))	\
+    * (CLASS == LO_REGS ? 1 : 2)))
+
+/* Try to generate sequences that don't involve branches, we can then use
+   conditional instructions */
+#define BRANCH_COST(speed_p, predictable_p) \
+  (TARGET_32BIT ? 4 : (optimize > 0 ? 2 : 0))
+
+/* Position Independent Code.  */
+/* We decide which register to use based on the compilation options and
+   the assembler in use; this is more general than the APCS restriction of
+   using sb (r9) all the time.  */
+extern unsigned arm_pic_register;
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  */
+#define PIC_OFFSET_TABLE_REGNUM arm_pic_register
+
+/* We can't directly access anything that contains a symbol,
+   nor can we indirect via the constant pool.  One exception is
+   UNSPEC_TLS, which is always PIC.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+	(!(symbol_mentioned_p (X)					\
+	   || label_mentioned_p (X)					\
+	   || (GET_CODE (X) == SYMBOL_REF				\
+	       && CONSTANT_POOL_ADDRESS_P (X)				\
+	       && (symbol_mentioned_p (get_pool_constant (X))		\
+		   || label_mentioned_p (get_pool_constant (X)))))	\
+	 || tls_mentioned_p (X))
+
+/* We need to know when we are making a constant pool; this determines
+   whether data needs to be in the GOT or can be referenced via a GOT
+   offset.  */
+extern int making_const_table;
+
+/* Handle pragmas for compatibility with Intel's compilers.  */
+/* Also abuse this to register additional C specific EABI attributes.  */
+#define REGISTER_TARGET_PRAGMAS() do {					\
+  c_register_pragma (0, "long_calls", arm_pr_long_calls);		\
+  c_register_pragma (0, "no_long_calls", arm_pr_no_long_calls);		\
+  c_register_pragma (0, "long_calls_off", arm_pr_long_calls_off);	\
+  arm_lang_object_attributes_init(); \
+} while (0)
+
+/* Condition code information.  */
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+
+#define SELECT_CC_MODE(OP, X, Y)  arm_select_cc_mode (OP, X, Y)
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+#define REVERSE_CONDITION(CODE,MODE) \
+  (((MODE) == CCFPmode || (MODE) == CCFPEmode) \
+   ? reverse_condition_maybe_unordered (code) \
+   : reverse_condition (code))
+
+#define CANONICALIZE_COMPARISON(CODE, OP0, OP1)				\
+  (CODE) = arm_canonicalize_comparison (CODE, &(OP0), &(OP1))
+
+/* The arm5 clz instruction returns 32.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+
+#define CC_STATUS_INIT \
+  do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0)
+
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \
+		     TARGET_THUMB2 ? "\t.thumb\n" : "")
+
+/* Output a push or a pop instruction (only used when profiling).
+   We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1.  We know
+   that ASM_OUTPUT_REG_PUSH will be matched with ASM_OUTPUT_REG_POP, and
+   that r7 isn't used by the function profiler, so we can use it as a
+   scratch reg.  WARNING: This isn't safe in the general case!  It may be
+   sensitive to future changes in final.c:profile_function.  */
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)		\
+  do							\
+    {							\
+      if (TARGET_ARM)					\
+	asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n",	\
+		     STACK_POINTER_REGNUM, REGNO);	\
+      else if (TARGET_THUMB1				\
+	       && (REGNO) == STATIC_CHAIN_REGNUM)	\
+	{						\
+	  asm_fprintf (STREAM, "\tpush\t{r7}\n");	\
+	  asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\
+	  asm_fprintf (STREAM, "\tpush\t{r7}\n");	\
+	}						\
+      else						\
+	asm_fprintf (STREAM, "\tpush {%r}\n", REGNO);	\
+    } while (0)
+
+
+/* See comment for ASM_OUTPUT_REG_PUSH concerning Thumb-1 issue.  */
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO)		\
+  do							\
+    {							\
+      if (TARGET_ARM)					\
+	asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n",	\
+		     STACK_POINTER_REGNUM, REGNO);	\
+      else if (TARGET_THUMB1				\
+	       && (REGNO) == STATIC_CHAIN_REGNUM)	\
+	{						\
+	  asm_fprintf (STREAM, "\tpop\t{r7}\n");	\
+	  asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\
+	  asm_fprintf (STREAM, "\tpop\t{r7}\n");	\
+	}						\
+      else						\
+	asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);	\
+    } while (0)
+
+/* Jump table alignment is explicit in ASM_OUTPUT_CASE_LABEL.  */
+#define ADDR_VEC_ALIGN(JUMPTABLE) 0
+
+/* This is how to output a label which precedes a jumptable.  Since
+   Thumb instructions are 2 bytes, we may need explicit alignment here.  */
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)		\
+  do									\
+    {									\
+      if (TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode)	\
+        ASM_OUTPUT_ALIGN (FILE, 2);					\
+      (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM);		\
+    }									\
+  while (0)
+
+/* Make sure subsequent insns are aligned after a TBB.  */
+#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE)	\
+  do							\
+    {							\
+      if (GET_MODE (PATTERN (JUMPTABLE)) == QImode)	\
+	ASM_OUTPUT_ALIGN (FILE, 1);			\
+    }							\
+  while (0)
+
+#define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) 	\
+  do							\
+    {							\
+      if (TARGET_THUMB) 				\
+        {						\
+          if (is_called_in_ARM_mode (DECL)		\
+	      || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY	\
+		  && cfun->is_thunk))	\
+            fprintf (STREAM, "\t.code 32\n") ;		\
+          else if (TARGET_THUMB1)			\
+           fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ;	\
+          else						\
+           fprintf (STREAM, "\t.thumb\n\t.thumb_func\n") ;	\
+        }						\
+      if (TARGET_POKE_FUNCTION_NAME)			\
+        arm_poke_function_name (STREAM, (const char *) NAME);	\
+    }							\
+  while (0)
+
+/* For aliases of functions we use .thumb_set instead.  */
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL1, DECL2)		\
+  do						   		\
+    {								\
+      const char *const LABEL1 = XSTR (XEXP (DECL_RTL (decl), 0), 0); \
+      const char *const LABEL2 = IDENTIFIER_POINTER (DECL2);	\
+								\
+      if (TARGET_THUMB && TREE_CODE (DECL1) == FUNCTION_DECL)	\
+	{							\
+	  fprintf (FILE, "\t.thumb_set ");			\
+	  assemble_name (FILE, LABEL1);			   	\
+	  fprintf (FILE, ",");			   		\
+	  assemble_name (FILE, LABEL2);		   		\
+	  fprintf (FILE, "\n");					\
+	}							\
+      else							\
+	ASM_OUTPUT_DEF (FILE, LABEL1, LABEL2);			\
+    }								\
+  while (0)
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* To support -falign-* switches we need to use .p2align so
+   that alignment directives in code sections will be padded
+   with no-op instructions, rather than zeroes.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)		\
+  if ((LOG) != 0)						\
+    {								\
+      if ((MAX_SKIP) == 0)					\
+        fprintf ((FILE), "\t.p2align %d\n", (int) (LOG));	\
+      else							\
+        fprintf ((FILE), "\t.p2align %d,,%d\n",			\
+                 (int) (LOG), (int) (MAX_SKIP));		\
+    }
+#endif
+
+/* Add two bytes to the length of conditionally executed Thumb-2
+   instructions for the IT instruction.  */
+#define ADJUST_INSN_LENGTH(insn, length) \
+  if (TARGET_THUMB2 && GET_CODE (PATTERN (insn)) == COND_EXEC) \
+    length += 2;
+
+/* Only perform branch elimination (by making instructions conditional) if
+   we're optimizing.  For Thumb-2 check if any IT instructions need
+   outputting.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
+  if (TARGET_ARM && optimize)				\
+    arm_final_prescan_insn (INSN);			\
+  else if (TARGET_THUMB2)				\
+    thumb2_final_prescan_insn (INSN);			\
+  else if (TARGET_THUMB1)				\
+    thumb1_final_prescan_insn (INSN)
+
+#define ARM_SIGN_EXTEND(x)  ((HOST_WIDE_INT)			\
+  (HOST_BITS_PER_WIDE_INT <= 32 ? (unsigned HOST_WIDE_INT) (x)	\
+   : ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0xffffffff) |\
+      ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0x80000000) \
+       ? ((~ (unsigned HOST_WIDE_INT) 0)			\
+	  & ~ (unsigned HOST_WIDE_INT) 0xffffffff)		\
+       : 0))))
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+  arm_return_addr (COUNT, FRAME)
+
+/* Mask of the bits in the PC that contain the real return address
+   when running in 26-bit mode.  */
+#define RETURN_ADDR_MASK26 (0x03fffffc)
+
+/* Pick up the return address upon entry to a procedure. Used for
+   dwarf2 unwind information.  This also enables the table driven
+   mechanism.  */
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_REG (Pmode, LR_REGNUM)
+#define DWARF_FRAME_RETURN_COLUMN	DWARF_FRAME_REGNUM (LR_REGNUM)
+
+/* Used to mask out junk bits from the return address, such as
+   processor state, interrupt status, condition codes and the like.  */
+#define MASK_RETURN_ADDR \
+  /* If we are generating code for an ARM2/ARM3 machine or for an ARM6	\
+     in 26 bit mode, the condition codes must be masked out of the	\
+     return address.  This does not apply to ARM6 and later processors	\
+     when running in 32 bit mode.  */					\
+  ((arm_arch4 || TARGET_THUMB)						\
+   ? (gen_int_mode ((unsigned long)0xffffffff, Pmode))			\
+   : arm_gen_return_addr_mask ())
+
+
+/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
+   symbolic names defined here (which would require too much duplication).
+   FIXME?  */
+enum arm_builtins
+{
+  ARM_BUILTIN_GETWCX,
+  ARM_BUILTIN_SETWCX,
+
+  ARM_BUILTIN_WZERO,
+
+  ARM_BUILTIN_WAVG2BR,
+  ARM_BUILTIN_WAVG2HR,
+  ARM_BUILTIN_WAVG2B,
+  ARM_BUILTIN_WAVG2H,
+
+  ARM_BUILTIN_WACCB,
+  ARM_BUILTIN_WACCH,
+  ARM_BUILTIN_WACCW,
+
+  ARM_BUILTIN_WMACS,
+  ARM_BUILTIN_WMACSZ,
+  ARM_BUILTIN_WMACU,
+  ARM_BUILTIN_WMACUZ,
+
+  ARM_BUILTIN_WSADB,
+  ARM_BUILTIN_WSADBZ,
+  ARM_BUILTIN_WSADH,
+  ARM_BUILTIN_WSADHZ,
+
+  ARM_BUILTIN_WALIGN,
+
+  ARM_BUILTIN_TMIA,
+  ARM_BUILTIN_TMIAPH,
+  ARM_BUILTIN_TMIABB,
+  ARM_BUILTIN_TMIABT,
+  ARM_BUILTIN_TMIATB,
+  ARM_BUILTIN_TMIATT,
+
+  ARM_BUILTIN_TMOVMSKB,
+  ARM_BUILTIN_TMOVMSKH,
+  ARM_BUILTIN_TMOVMSKW,
+
+  ARM_BUILTIN_TBCSTB,
+  ARM_BUILTIN_TBCSTH,
+  ARM_BUILTIN_TBCSTW,
+
+  ARM_BUILTIN_WMADDS,
+  ARM_BUILTIN_WMADDU,
+
+  ARM_BUILTIN_WPACKHSS,
+  ARM_BUILTIN_WPACKWSS,
+  ARM_BUILTIN_WPACKDSS,
+  ARM_BUILTIN_WPACKHUS,
+  ARM_BUILTIN_WPACKWUS,
+  ARM_BUILTIN_WPACKDUS,
+
+  ARM_BUILTIN_WADDB,
+  ARM_BUILTIN_WADDH,
+  ARM_BUILTIN_WADDW,
+  ARM_BUILTIN_WADDSSB,
+  ARM_BUILTIN_WADDSSH,
+  ARM_BUILTIN_WADDSSW,
+  ARM_BUILTIN_WADDUSB,
+  ARM_BUILTIN_WADDUSH,
+  ARM_BUILTIN_WADDUSW,
+  ARM_BUILTIN_WSUBB,
+  ARM_BUILTIN_WSUBH,
+  ARM_BUILTIN_WSUBW,
+  ARM_BUILTIN_WSUBSSB,
+  ARM_BUILTIN_WSUBSSH,
+  ARM_BUILTIN_WSUBSSW,
+  ARM_BUILTIN_WSUBUSB,
+  ARM_BUILTIN_WSUBUSH,
+  ARM_BUILTIN_WSUBUSW,
+
+  ARM_BUILTIN_WAND,
+  ARM_BUILTIN_WANDN,
+  ARM_BUILTIN_WOR,
+  ARM_BUILTIN_WXOR,
+
+  ARM_BUILTIN_WCMPEQB,
+  ARM_BUILTIN_WCMPEQH,
+  ARM_BUILTIN_WCMPEQW,
+  ARM_BUILTIN_WCMPGTUB,
+  ARM_BUILTIN_WCMPGTUH,
+  ARM_BUILTIN_WCMPGTUW,
+  ARM_BUILTIN_WCMPGTSB,
+  ARM_BUILTIN_WCMPGTSH,
+  ARM_BUILTIN_WCMPGTSW,
+
+  ARM_BUILTIN_TEXTRMSB,
+  ARM_BUILTIN_TEXTRMSH,
+  ARM_BUILTIN_TEXTRMSW,
+  ARM_BUILTIN_TEXTRMUB,
+  ARM_BUILTIN_TEXTRMUH,
+  ARM_BUILTIN_TEXTRMUW,
+  ARM_BUILTIN_TINSRB,
+  ARM_BUILTIN_TINSRH,
+  ARM_BUILTIN_TINSRW,
+
+  ARM_BUILTIN_WMAXSW,
+  ARM_BUILTIN_WMAXSH,
+  ARM_BUILTIN_WMAXSB,
+  ARM_BUILTIN_WMAXUW,
+  ARM_BUILTIN_WMAXUH,
+  ARM_BUILTIN_WMAXUB,
+  ARM_BUILTIN_WMINSW,
+  ARM_BUILTIN_WMINSH,
+  ARM_BUILTIN_WMINSB,
+  ARM_BUILTIN_WMINUW,
+  ARM_BUILTIN_WMINUH,
+  ARM_BUILTIN_WMINUB,
+
+  ARM_BUILTIN_WMULUM,
+  ARM_BUILTIN_WMULSM,
+  ARM_BUILTIN_WMULUL,
+
+  ARM_BUILTIN_PSADBH,
+  ARM_BUILTIN_WSHUFH,
+
+  ARM_BUILTIN_WSLLH,
+  ARM_BUILTIN_WSLLW,
+  ARM_BUILTIN_WSLLD,
+  ARM_BUILTIN_WSRAH,
+  ARM_BUILTIN_WSRAW,
+  ARM_BUILTIN_WSRAD,
+  ARM_BUILTIN_WSRLH,
+  ARM_BUILTIN_WSRLW,
+  ARM_BUILTIN_WSRLD,
+  ARM_BUILTIN_WRORH,
+  ARM_BUILTIN_WRORW,
+  ARM_BUILTIN_WRORD,
+  ARM_BUILTIN_WSLLHI,
+  ARM_BUILTIN_WSLLWI,
+  ARM_BUILTIN_WSLLDI,
+  ARM_BUILTIN_WSRAHI,
+  ARM_BUILTIN_WSRAWI,
+  ARM_BUILTIN_WSRADI,
+  ARM_BUILTIN_WSRLHI,
+  ARM_BUILTIN_WSRLWI,
+  ARM_BUILTIN_WSRLDI,
+  ARM_BUILTIN_WRORHI,
+  ARM_BUILTIN_WRORWI,
+  ARM_BUILTIN_WRORDI,
+
+  ARM_BUILTIN_WUNPCKIHB,
+  ARM_BUILTIN_WUNPCKIHH,
+  ARM_BUILTIN_WUNPCKIHW,
+  ARM_BUILTIN_WUNPCKILB,
+  ARM_BUILTIN_WUNPCKILH,
+  ARM_BUILTIN_WUNPCKILW,
+
+  ARM_BUILTIN_WUNPCKEHSB,
+  ARM_BUILTIN_WUNPCKEHSH,
+  ARM_BUILTIN_WUNPCKEHSW,
+  ARM_BUILTIN_WUNPCKEHUB,
+  ARM_BUILTIN_WUNPCKEHUH,
+  ARM_BUILTIN_WUNPCKEHUW,
+  ARM_BUILTIN_WUNPCKELSB,
+  ARM_BUILTIN_WUNPCKELSH,
+  ARM_BUILTIN_WUNPCKELSW,
+  ARM_BUILTIN_WUNPCKELUB,
+  ARM_BUILTIN_WUNPCKELUH,
+  ARM_BUILTIN_WUNPCKELUW,
+
+  ARM_BUILTIN_THREAD_POINTER,
+
+  ARM_BUILTIN_NEON_BASE,
+
+  ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE  /* FIXME: Wrong!  */
+};
+
+/* Do not emit .note.GNU-stack by default.  */
+#ifndef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK	0
+#endif
+
+/* The maximum number of parallel loads or stores we support in an ldm/stm
+   instruction.  */
+#define MAX_LDM_STM_OPS 4
+
+#endif /* ! GCC_ARM_H */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
new file mode 100644
index 000000000..130053b0b
--- /dev/null
+++ b/gcc/config/arm/arm.md
@@ -0,0 +1,10746 @@
+;;- Machine description for ARM for GNU compiler
+;;  Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000,
+;;  2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+;;  and Martin Simmons (@harleqn.co.uk).
+;;  More major hacks by Richard Earnshaw (rearnsha@arm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+;;---------------------------------------------------------------------------
+;; Constants
+
+;; Register numbers
+(define_constants
+  [(R0_REGNUM        0)		; First CORE register
+   (IP_REGNUM	    12)		; Scratch register
+   (SP_REGNUM	    13)		; Stack pointer
+   (LR_REGNUM       14)		; Return address register
+   (PC_REGNUM	    15)		; Program counter
+   (CC_REGNUM       24)		; Condition code pseudo register
+   (LAST_ARM_REGNUM 15)		;
+   (FPA_F0_REGNUM   16)		; FIRST_FPA_REGNUM
+   (FPA_F7_REGNUM   23)		; LAST_FPA_REGNUM
+  ]
+)
+;; 3rd operand to select_dominance_cc_mode
+(define_constants
+  [(DOM_CC_X_AND_Y  0)
+   (DOM_CC_NX_OR_Y  1)
+   (DOM_CC_X_OR_Y   2)
+  ]
+)
+
+;; UNSPEC Usage:
+;; Note: sin and cos are no-longer used.
+;; Unspec constants for Neon are defined in neon.md.
+
+(define_constants
+  [(UNSPEC_SIN       0)	; `sin' operation (MODE_FLOAT):
+			;   operand 0 is the result,
+			;   operand 1 the parameter.
+   (UNPSEC_COS	     1)	; `cos' operation (MODE_FLOAT):
+			;   operand 0 is the result,
+			;   operand 1 the parameter.
+   (UNSPEC_PUSH_MULT 2)	; `push multiple' operation:
+			;   operand 0 is the first register,
+			;   subsequent registers are in parallel (use ...)
+			;   expressions.
+   (UNSPEC_PIC_SYM   3) ; A symbol that has been treated properly for pic
+			;   usage, that is, we will add the pic_register
+			;   value to it before trying to dereference it.
+   (UNSPEC_PIC_BASE  4)	; Add PC and all but the last operand together,
+			;   The last operand is the number of a PIC_LABEL
+			;   that points at the containing instruction.
+   (UNSPEC_PRLG_STK  5) ; A special barrier that prevents frame accesses 
+			;   being scheduled before the stack adjustment insn.
+   (UNSPEC_PROLOGUE_USE 6) ; As USE insns are not meaningful after reload,
+   			; this unspec is used to prevent the deletion of
+   			; instructions setting registers for EH handling
+   			; and stack frame generation.  Operand 0 is the
+   			; register to "use".
+   (UNSPEC_CHECK_ARCH 7); Set CCs to indicate 26-bit or 32-bit mode.
+   (UNSPEC_WSHUFH    8) ; Used by the intrinsic form of the iWMMXt WSHUFH instruction.
+   (UNSPEC_WACC      9) ; Used by the intrinsic form of the iWMMXt WACC instruction.
+   (UNSPEC_TMOVMSK  10) ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction.
+   (UNSPEC_WSAD     11) ; Used by the intrinsic form of the iWMMXt WSAD instruction.
+   (UNSPEC_WSADZ    12) ; Used by the intrinsic form of the iWMMXt WSADZ instruction.
+   (UNSPEC_WMACS    13) ; Used by the intrinsic form of the iWMMXt WMACS instruction.
+   (UNSPEC_WMACU    14) ; Used by the intrinsic form of the iWMMXt WMACU instruction.
+   (UNSPEC_WMACSZ   15) ; Used by the intrinsic form of the iWMMXt WMACSZ instruction.
+   (UNSPEC_WMACUZ   16) ; Used by the intrinsic form of the iWMMXt WMACUZ instruction.
+   (UNSPEC_CLRDI    17) ; Used by the intrinsic form of the iWMMXt CLRDI instruction.
+   (UNSPEC_WMADDS   18) ; Used by the intrinsic form of the iWMMXt WMADDS instruction.
+   (UNSPEC_WMADDU   19) ; Used by the intrinsic form of the iWMMXt WMADDU instruction.
+   (UNSPEC_TLS      20) ; A symbol that has been treated properly for TLS usage.
+   (UNSPEC_PIC_LABEL 21) ; A label used for PIC access that does not appear in the
+                         ; instruction stream.
+   (UNSPEC_STACK_ALIGN 22) ; Doubleword aligned stack pointer.  Used to
+			   ; generate correct unwind information.
+   (UNSPEC_PIC_OFFSET 23) ; A symbolic 12-bit OFFSET that has been treated
+			  ; correctly for PIC usage.
+   (UNSPEC_GOTSYM_OFF 24) ; The offset of the start of the the GOT from a
+			  ; a given symbolic address.
+   (UNSPEC_THUMB1_CASESI 25) ; A Thumb1 compressed dispatch-table call.
+   (UNSPEC_RBIT 26)       ; rbit operation.
+   (UNSPEC_SYMBOL_OFFSET 27) ; The offset of the start of the symbol from
+                             ; another symbolic address.
+   (UNSPEC_MEMORY_BARRIER 28) ; Represent a memory barrier.
+   (UNSPEC_PIC_UNIFIED 29)  ; Create a common pic addressing form.
+  ]
+)
+
+;; UNSPEC_VOLATILE Usage:
+
+(define_constants
+  [(VUNSPEC_BLOCKAGE 0) ; `blockage' insn to prevent scheduling across an
+			;   insn in the code.
+   (VUNSPEC_EPILOGUE 1) ; `epilogue' insn, used to represent any part of the
+			;   instruction epilogue sequence that isn't expanded
+			;   into normal RTL.  Used for both normal and sibcall
+			;   epilogues.
+   (VUNSPEC_ALIGN    2) ; `align' insn.  Used at the head of a minipool table 
+			;   for inlined constants.
+   (VUNSPEC_POOL_END 3) ; `end-of-table'.  Used to mark the end of a minipool
+			;   table.
+   (VUNSPEC_POOL_1   4) ; `pool-entry(1)'.  An entry in the constant pool for
+			;   an 8-bit object.
+   (VUNSPEC_POOL_2   5) ; `pool-entry(2)'.  An entry in the constant pool for
+			;   a 16-bit object.
+   (VUNSPEC_POOL_4   6) ; `pool-entry(4)'.  An entry in the constant pool for
+			;   a 32-bit object.
+   (VUNSPEC_POOL_8   7) ; `pool-entry(8)'.  An entry in the constant pool for
+			;   a 64-bit object.
+   (VUNSPEC_POOL_16  8) ; `pool-entry(16)'.  An entry in the constant pool for
+			;   a 128-bit object.
+   (VUNSPEC_TMRC     9) ; Used by the iWMMXt TMRC instruction.
+   (VUNSPEC_TMCR     10) ; Used by the iWMMXt TMCR instruction.
+   (VUNSPEC_ALIGN8   11) ; 8-byte alignment version of VUNSPEC_ALIGN
+   (VUNSPEC_WCMP_EQ  12) ; Used by the iWMMXt WCMPEQ instructions
+   (VUNSPEC_WCMP_GTU 13) ; Used by the iWMMXt WCMPGTU instructions
+   (VUNSPEC_WCMP_GT  14) ; Used by the iwMMXT WCMPGT instructions
+   (VUNSPEC_EH_RETURN 20); Use to override the return address for exception
+			 ; handling.
+   (VUNSPEC_SYNC_COMPARE_AND_SWAP 21)	; Represent an atomic compare swap.
+   (VUNSPEC_SYNC_LOCK             22)	; Represent a sync_lock_test_and_set.
+   (VUNSPEC_SYNC_OP               23)	; Represent a sync_<op>
+   (VUNSPEC_SYNC_NEW_OP           24)	; Represent a sync_new_<op>
+   (VUNSPEC_SYNC_OLD_OP           25)	; Represent a sync_old_<op>
+  ]
+)
+
+;;---------------------------------------------------------------------------
+;; Attributes
+
+; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when
+; generating ARM code.  This is used to control the length of some insn
+; patterns that share the same RTL in both ARM and Thumb code.
+(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code")))
+
+; IS_ARCH6 is set to 'yes' when we are generating code form ARMv6.
+(define_attr "is_arch6" "no,yes" (const (symbol_ref "arm_arch6")))
+
+; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
+(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
+
+;; Operand number of an input operand that is shifted.  Zero if the
+;; given instruction does not shift one of its input operands.
+(define_attr "shift" "" (const_int 0))
+
+; Floating Point Unit.  If we only have floating point emulation, then there
+; is no point in scheduling the floating point insns.  (Well, for best
+; performance we should try and group them together).
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
+  (const (symbol_ref "arm_fpu_attr")))
+
+(define_attr "sync_result"          "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_memory"          "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_required_value"  "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_new_value"       "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_t1"              "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_t2"              "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_release_barrier" "yes,no"           (const_string "yes"))
+(define_attr "sync_op"              "none,add,sub,ior,xor,and,nand"
+                                    (const_string "none"))
+
+; LENGTH of an instruction (in bytes)
+(define_attr "length" ""
+  (cond [(not (eq_attr "sync_memory" "none"))
+ 	   (symbol_ref "arm_sync_loop_insns (insn, operands) * 4")
+	] (const_int 4)))
+
+; The architecture which supports the instruction (or alternative).
+; This can be "a" for ARM, "t" for either of the Thumbs, "32" for
+; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode.  "v6"
+; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
+; arm_arch6.  This attribute is used to compute attribute "enabled",
+; use type "any" to enable an alternative in all cases.
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6"
+  (const_string "any"))
+
+(define_attr "arch_enabled" "no,yes"
+  (cond [(eq_attr "arch" "any")
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "a")
+	      (ne (symbol_ref "TARGET_ARM") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "t")
+	      (ne (symbol_ref "TARGET_THUMB") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "t1")
+	      (ne (symbol_ref "TARGET_THUMB1") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "t2")
+	      (ne (symbol_ref "TARGET_THUMB2") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "32")
+	      (ne (symbol_ref "TARGET_32BIT") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "v6")
+	      (ne (symbol_ref "(TARGET_32BIT && arm_arch6)") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "nov6")
+	      (ne (symbol_ref "(TARGET_32BIT && !arm_arch6)") (const_int 0)))
+	 (const_string "yes")]
+	(const_string "no")))
+
+; Allows an insn to disable certain alternatives for reasons other than
+; arch support.
+(define_attr "insn_enabled" "no,yes"
+  (const_string "yes"))
+
+; Enable all alternatives that are both arch_enabled and insn_enabled.
+ (define_attr "enabled" "no,yes"
+   (if_then_else (eq_attr "insn_enabled" "yes")
+               (if_then_else (eq_attr "arch_enabled" "yes")
+                             (const_string "yes")
+                             (const_string "no"))
+                (const_string "no")))
+
+; POOL_RANGE is how far away from a constant pool entry that this insn
+; can be placed.  If the distance is zero, then this insn will never
+; reference the pool.
+; NEG_POOL_RANGE is nonzero for insns that can reference a constant pool entry
+; before its address.
+(define_attr "arm_pool_range" "" (const_int 0))
+(define_attr "thumb2_pool_range" "" (const_int 0))
+(define_attr "arm_neg_pool_range" "" (const_int 0))
+(define_attr "thumb2_neg_pool_range" "" (const_int 0))
+
+(define_attr "pool_range" ""
+  (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_pool_range")]
+	(attr "arm_pool_range")))
+(define_attr "neg_pool_range" ""
+  (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_neg_pool_range")]
+	(attr "arm_neg_pool_range")))
+
+; An assembler sequence may clobber the condition codes without us knowing.
+; If such an insn references the pool, then we have no way of knowing how,
+; so use the most conservative value for pool_range.
+(define_asm_attributes
+ [(set_attr "conds" "clob")
+  (set_attr "length" "4")
+  (set_attr "pool_range" "250")])
+
+;; The instruction used to implement a particular pattern.  This
+;; information is used by pipeline descriptions to provide accurate
+;; scheduling information.
+
+(define_attr "insn"
+        "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,other"
+        (const_string "other"))
+
+; TYPE attribute is used to detect floating point instructions which, if
+; running on a co-processor can run in parallel with other, basic instructions
+; If write-buffer scheduling is enabled then it can also be used in the
+; scheduling of writes.
+
+; Classification of each insn
+; Note: vfp.md has different meanings for some of these, and some further
+; types as well.  See that file for details.
+; alu		any alu  instruction that doesn't hit memory or fp
+;		regs or have a shifted source operand
+; alu_shift	any data instruction that doesn't hit memory or fp
+;		regs, but has a source operand shifted by a constant
+; alu_shift_reg	any data instruction that doesn't hit memory or fp
+;		regs, but has a source operand shifted by a register value
+; mult		a multiply instruction
+; block		blockage insn, this blocks all functional units
+; float		a floating point arithmetic operation (subject to expansion)
+; fdivd		DFmode floating point division
+; fdivs		SFmode floating point division
+; fmul		Floating point multiply
+; ffmul		Fast floating point multiply
+; farith	Floating point arithmetic (4 cycle)
+; ffarith	Fast floating point arithmetic (2 cycle)
+; float_em	a floating point arithmetic operation that is normally emulated
+;		even on a machine with an fpa.
+; f_fpa_load	a floating point load from memory. Only for the FPA.
+; f_fpa_store	a floating point store to memory. Only for the FPA.
+; f_load[sd]	A single/double load from memory. Used for VFP unit.
+; f_store[sd]	A single/double store to memory. Used for VFP unit.
+; f_flag	a transfer of co-processor flags to the CPSR
+; f_mem_r	a transfer of a floating point register to a real reg via mem
+; r_mem_f	the reverse of f_mem_r
+; f_2_r		fast transfer float to arm (no memory needed)
+; r_2_f		fast transfer arm to float
+; f_cvt		convert floating<->integral
+; branch	a branch
+; call		a subroutine call
+; load_byte	load byte(s) from memory to arm registers
+; load1		load 1 word from memory to arm registers
+; load2         load 2 words from memory to arm registers
+; load3         load 3 words from memory to arm registers
+; load4         load 4 words from memory to arm registers
+; store		store 1 word to memory from arm registers
+; store2	store 2 words
+; store3	store 3 words
+; store4	store 4 (or more) words
+;  Additions for Cirrus Maverick co-processor:
+; mav_farith	Floating point arithmetic (4 cycle)
+; mav_dmult	Double multiplies (7 cycle)
+;
+
+(define_attr "type"
+	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_fpa_load,f_fpa_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys"
+	(if_then_else 
+	 (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
+	 (const_string "mult")
+	 (const_string "alu")))
+
+; Load scheduling, set from the arm_ld_sched variable
+; initialized by arm_option_override()
+(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
+
+;; Classification of NEON instructions for scheduling purposes.
+;; Do not set this attribute and the "type" attribute together in
+;; any one instruction pattern.
+(define_attr "neon_type"
+   "neon_int_1,\
+   neon_int_2,\
+   neon_int_3,\
+   neon_int_4,\
+   neon_int_5,\
+   neon_vqneg_vqabs,\
+   neon_vmov,\
+   neon_vaba,\
+   neon_vsma,\
+   neon_vaba_qqq,\
+   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+   neon_mul_qqq_8_16_32_ddd_32,\
+   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,\
+   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+   neon_mla_qqq_8_16,\
+   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,\
+   neon_mla_qqq_32_qqd_32_scalar,\
+   neon_mul_ddd_16_scalar_32_16_long_scalar,\
+   neon_mul_qqd_32_scalar,\
+   neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,\
+   neon_shift_1,\
+   neon_shift_2,\
+   neon_shift_3,\
+   neon_vshl_ddd,\
+   neon_vqshl_vrshl_vqrshl_qqq,\
+   neon_vsra_vrsra,\
+   neon_fp_vadd_ddd_vabs_dd,\
+   neon_fp_vadd_qqq_vabs_qq,\
+   neon_fp_vsum,\
+   neon_fp_vmul_ddd,\
+   neon_fp_vmul_qqd,\
+   neon_fp_vmla_ddd,\
+   neon_fp_vmla_qqq,\
+   neon_fp_vmla_ddd_scalar,\
+   neon_fp_vmla_qqq_scalar,\
+   neon_fp_vrecps_vrsqrts_ddd,\
+   neon_fp_vrecps_vrsqrts_qqq,\
+   neon_bp_simple,\
+   neon_bp_2cycle,\
+   neon_bp_3cycle,\
+   neon_ldr,\
+   neon_str,\
+   neon_vld1_1_2_regs,\
+   neon_vld1_3_4_regs,\
+   neon_vld2_2_regs_vld1_vld2_all_lanes,\
+   neon_vld2_4_regs,\
+   neon_vld3_vld4,\
+   neon_vst1_1_2_regs_vst2_2_regs,\
+   neon_vst1_3_4_regs,\
+   neon_vst2_4_regs_vst3_vst4,\
+   neon_vst3_vst4,\
+   neon_vld1_vld2_lane,\
+   neon_vld3_vld4_lane,\
+   neon_vst1_vst2_lane,\
+   neon_vst3_vst4_lane,\
+   neon_vld3_vld4_all_lanes,\
+   neon_mcr,\
+   neon_mcr_2_mcrr,\
+   neon_mrc,\
+   neon_mrrc,\
+   neon_ldm_2,\
+   neon_stm_2,\
+   none"
+ (const_string "none"))
+
+; condition codes: this one is used by final_prescan_insn to speed up
+; conditionalizing instructions.  It saves having to scan the rtl to see if
+; it uses or alters the condition codes.
+; 
+; USE means that the condition codes are used by the insn in the process of
+;   outputting code, this means (at present) that we can't use the insn in
+;   inlined branches
+;
+; SET means that the purpose of the insn is to set the condition codes in a
+;   well defined manner.
+;
+; CLOB means that the condition codes are altered in an undefined manner, if
+;   they are altered at all
+;
+; UNCONDITIONAL means the instruction can not be conditionally executed and
+;   that the instruction does not use or alter the condition codes.
+;
+; NOCOND means that the instruction does not use or alter the condition
+;   codes but can be converted into a conditionally exectuted instruction.
+
+(define_attr "conds" "use,set,clob,unconditional,nocond"
+	(if_then_else
+	 (ior (eq_attr "is_thumb1" "yes")
+	      (eq_attr "type" "call"))
+	 (const_string "clob")
+	 (if_then_else (eq_attr "neon_type" "none")
+	  (const_string "nocond")
+	  (const_string "unconditional"))))
+
+; Predicable means that the insn can be conditionally executed based on
+; an automatically added predicate (additional patterns are generated by 
+; gen...).  We default to 'no' because no Thumb patterns match this rule
+; and not all ARM patterns do.
+(define_attr "predicable" "no,yes" (const_string "no"))
+
+; Only model the write buffer for ARM6 and ARM7.  Earlier processors don't
+; have one.  Later ones, such as StrongARM, have write-back caches, so don't
+; suffer blockages enough to warrant modelling this (and it can adversely
+; affect the schedule).
+(define_attr "model_wbuf" "no,yes" (const (symbol_ref "arm_tune_wbuf")))
+
+; WRITE_CONFLICT implies that a read following an unrelated write is likely
+; to stall the processor.  Used with model_wbuf above.
+(define_attr "write_conflict" "no,yes"
+  (if_then_else (eq_attr "type"
+		 "block,float_em,f_fpa_load,f_fpa_store,f_mem_r,r_mem_f,call,load1")
+		(const_string "yes")
+		(const_string "no")))
+
+; Classify the insns into those that take one cycle and those that take more
+; than one on the main cpu execution unit.
+(define_attr "core_cycles" "single,multi"
+  (if_then_else (eq_attr "type"
+		 "alu,alu_shift,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith")
+		(const_string "single")
+	        (const_string "multi")))
+
+;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a
+;; distant label.  Only applicable to Thumb code.
+(define_attr "far_jump" "yes,no" (const_string "no"))
+
+
+;; The number of machine instructions this pattern expands to.
+;; Used for Thumb-2 conditional execution.
+(define_attr "ce_count" "" (const_int 1))
+
+;;---------------------------------------------------------------------------
+;; Mode iterators
+
+(include "iterators.md")
+
+;;---------------------------------------------------------------------------
+;; Predicates
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;---------------------------------------------------------------------------
+;; Pipeline descriptions
+
+;; Processor type.  This is created automatically from arm-cores.def.
+(include "arm-tune.md")
+
+(define_attr "tune_cortexr4" "yes,no"
+  (const (if_then_else
+	  (eq_attr "tune" "cortexr4,cortexr4f")
+	  (const_string "yes")
+	  (const_string "no"))))
+
+;; True if the generic scheduling description should be used.
+
+(define_attr "generic_sched" "yes,no"
+  (const (if_then_else
+          (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
+	       (eq_attr "tune_cortexr4" "yes"))
+          (const_string "no")
+          (const_string "yes"))))
+
+(define_attr "generic_vfp" "yes,no"
+  (const (if_then_else
+	  (and (eq_attr "fpu" "vfp")
+	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
+	       (eq_attr "tune_cortexr4" "no"))
+	  (const_string "yes")
+	  (const_string "no"))))
+
+(include "arm-generic.md")
+(include "arm926ejs.md")
+(include "arm1020e.md")
+(include "arm1026ejs.md")
+(include "arm1136jfs.md")
+(include "fa526.md")
+(include "fa606te.md")
+(include "fa626te.md")
+(include "fmp626.md")
+(include "fa726te.md")
+(include "cortex-a5.md")
+(include "cortex-a8.md")
+(include "cortex-a9.md")
+(include "cortex-r4.md")
+(include "cortex-r4f.md")
+(include "cortex-m4.md")
+(include "cortex-m4-fpu.md")
+(include "vfp11.md")
+
+
+;;---------------------------------------------------------------------------
+;; Insn patterns
+;;
+;; Addition insns.
+
+;; Note: For DImode insns, there is normally no reason why operands should
+;; not be in the same register, what we don't want is for something being
+;; written to partially overlap something that is an input.
+;; Cirrus 64bit additions should not be split because we have a native
+;; 64bit addition instructions.
+
+(define_expand "adddi3"
+ [(parallel
+   [(set (match_operand:DI           0 "s_register_operand" "")
+	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
+	           (match_operand:DI 2 "s_register_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
+    {
+      if (!cirrus_fp_register (operands[0], DImode))
+        operands[0] = force_reg (DImode, operands[0]);
+      if (!cirrus_fp_register (operands[1], DImode))
+        operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_cirrus_adddi3 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  if (TARGET_THUMB1)
+    {
+      if (GET_CODE (operands[1]) != REG)
+        operands[1] = force_reg (DImode, operands[1]);
+      if (GET_CODE (operands[2]) != REG)
+        operands[2] = force_reg (DImode, operands[2]);
+     }
+  "
+)
+
+(define_insn "*thumb1_adddi3"
+  [(set (match_operand:DI          0 "register_operand" "=l")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0")
+		 (match_operand:DI 2 "register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))
+  ]
+  "TARGET_THUMB1"
+  "add\\t%Q0, %Q0, %Q2\;adc\\t%R0, %R0, %R2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn_and_split "*arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
+		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
+  "#"
+  "TARGET_32BIT && reload_completed
+   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (match_dup 5))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*adddi_sesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(plus:DI (sign_extend:DI
+		  (match_operand:SI 2 "s_register_operand" "r,r"))
+		 (match_operand:DI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (plus:SI (plus:SI (ashiftrt:SI (match_dup 2)
+						     (const_int 31))
+					(match_dup 4))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*adddi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(plus:DI (zero_extend:DI
+		  (match_operand:SI 2 "s_register_operand" "r,r"))
+		 (match_operand:DI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (const_int 0))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_expand "addsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "")
+		 (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT && GET_CODE (operands[2]) == CONST_INT)
+    {
+      arm_split_constant (PLUS, SImode, NULL_RTX,
+	                  INTVAL (operands[2]), operands[0], operands[1],
+			  optimize && can_create_pseudo_p ());
+      DONE;
+    }
+  "
+)
+
+; If there is a scratch available, this will be faster than synthesizing the
+; addition.
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (set (match_operand:SI          0 "arm_general_register_operand" "")
+	(plus:SI (match_operand:SI 1 "arm_general_register_operand" "")
+		 (match_operand:SI 2 "const_int_operand"  "")))]
+  "TARGET_32BIT &&
+   !(const_ok_for_arm (INTVAL (operands[2]))
+     || const_ok_for_arm (-INTVAL (operands[2])))
+    && const_ok_for_arm (~INTVAL (operands[2]))"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))]
+  ""
+)
+
+;; The r/r/k alternative is required when reloading the address
+;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
+;; put the duplicated register first, and not try the commutative version.
+(define_insn_and_split "*arm_addsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k,r")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")
+		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]
+  "TARGET_32BIT"
+  "@
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %1, %2
+   add%?\\t%0, %2, %1
+   sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
+   #"
+  "TARGET_32BIT
+   && GET_CODE (operands[2]) == CONST_INT
+   && !(const_ok_for_arm (INTVAL (operands[2]))
+        || const_ok_for_arm (-INTVAL (operands[2])))
+   && (reload_completed || !arm_eliminable_register (operands[1]))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant (PLUS, SImode, curr_insn,
+	              INTVAL (operands[2]), operands[0],
+		      operands[1], 0);
+  DONE;
+  "
+  [(set_attr "length" "4,4,4,4,4,16")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn_and_split "*thumb1_addsi3"
+  [(set (match_operand:SI          0 "register_operand" "=l,l,l,*rk,*hk,l,k,l,l,l")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,k,k,0,l,k")
+		 (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,M,O,Pa,Pb,Pc")))]
+  "TARGET_THUMB1"
+  "*
+   static const char * const asms[] = 
+   {
+     \"add\\t%0, %0, %2\",
+     \"sub\\t%0, %0, #%n2\",
+     \"add\\t%0, %1, %2\",
+     \"add\\t%0, %0, %2\",
+     \"add\\t%0, %0, %2\",
+     \"add\\t%0, %1, %2\",
+     \"add\\t%0, %1, %2\",
+     \"#\",
+     \"#\",
+     \"#\"
+   };
+   if ((which_alternative == 2 || which_alternative == 6)
+       && GET_CODE (operands[2]) == CONST_INT
+       && INTVAL (operands[2]) < 0)
+     return \"sub\\t%0, %1, #%n2\";
+   return asms[which_alternative];
+  "
+  "&& reload_completed && CONST_INT_P (operands[2])
+   && ((operands[1] != stack_pointer_rtx
+        && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255))
+       || (operands[1] == stack_pointer_rtx
+ 	   && INTVAL (operands[2]) > 1020))"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))]
+  {
+    HOST_WIDE_INT offset = INTVAL (operands[2]);
+    if (operands[1] == stack_pointer_rtx)
+      offset -= 1020;
+    else
+      {
+        if (offset > 255)
+	  offset = 255;
+	else if (offset < -255)
+	  offset = -255;
+      }
+    operands[3] = GEN_INT (offset);
+    operands[2] = GEN_INT (INTVAL (operands[2]) - offset);
+  }
+  [(set_attr "length" "2,2,2,2,2,2,2,4,4,4")]
+)
+
+;; Reloading and elimination of the frame pointer can
+;; sometimes cause this optimization to be missed.
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (reg:SI SP_REGNUM)))]
+  "TARGET_THUMB1
+   && (unsigned HOST_WIDE_INT) (INTVAL (operands[1])) < 1024
+   && (INTVAL (operands[1]) & 3) == 0"
+  [(set (match_dup 0) (plus:SI (reg:SI SP_REGNUM) (match_dup 1)))]
+  ""
+)
+
+(define_insn "*addsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "r, r")
+		  (match_operand:SI 2 "arm_add_operand"    "rI,L"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARM"
+  "@
+   add%.\\t%0, %1, %2
+   sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*addsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (match_operand:SI 0 "s_register_operand" "r, r")
+		  (match_operand:SI 1 "arm_add_operand"    "rI,L"))
+	 (const_int 0)))]
+  "TARGET_ARM"
+  "@
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*compare_negsi_si"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (neg:SI (match_operand:SI 0 "s_register_operand" "r"))
+	 (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "cmn%?\\t%1, %0"
+  [(set_attr "conds" "set")]
+)
+
+;; This is the canonicalization of addsi3_compare0_for_combiner when the
+;; addend is a constant.
+(define_insn "*cmpsi2_addneg"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	 (match_operand:SI 1 "s_register_operand" "r,r")
+	 (match_operand:SI 2 "arm_addimm_operand" "L,I")))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "arm_addimm_operand" "I,L")))]
+  "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])"
+  "@
+   add%.\\t%0, %1, %3
+   sub%.\\t%0, %1, #%n3"
+  [(set_attr "conds" "set")]
+)
+
+;; Convert the sequence
+;;  sub  rd, rn, #1
+;;  cmn  rd, #1	(equivalent to cmp rd, #-1)
+;;  bne  dest
+;; into
+;;  subs rd, rn, #1
+;;  bcs  dest	((unsigned)rn >= 1)
+;; similarly for the beq variant using bcc.
+;; This is a common looping idiom (while (n--))
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(plus:SI (match_operand:SI 1 "arm_general_register_operand" "")
+		 (const_int -1)))
+   (set (match_operand 2 "cc_register" "")
+	(compare (match_dup 0) (const_int -1)))
+   (set (pc)
+	(if_then_else (match_operator 3 "equality_operator"
+		       [(match_dup 2) (const_int 0)])
+		      (match_operand 4 "" "")
+		      (match_operand 5 "" "")))]
+  "TARGET_32BIT && peep2_reg_dead_p (3, operands[2])"
+  [(parallel[
+    (set (match_dup 2)
+	 (compare:CC
+	  (match_dup 1) (const_int 1)))
+    (set (match_dup 0) (plus:SI (match_dup 1) (const_int -1)))])
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(match_dup 2) (const_int 0)])
+		      (match_dup 4)
+		      (match_dup 5)))]
+  "operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
+   operands[3] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE
+				  ? GEU : LTU),
+				 VOIDmode, 
+				 operands[2], const0_rtx);"
+)
+
+;; The next four insns work because they compare the result with one of
+;; the operands, and we know that the use of the condition code is
+;; either GEU or LTU, so we can use the carry flag from the addition
+;; instead of doing the compare a second time.
+(define_insn "*addsi3_compare_op1"
+  [(set (reg:CC_C CC_REGNUM)
+	(compare:CC_C
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		  (match_operand:SI 2 "arm_add_operand" "rI,L"))
+	 (match_dup 1)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   add%.\\t%0, %1, %2
+   sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*addsi3_compare_op2"
+  [(set (reg:CC_C CC_REGNUM)
+	(compare:CC_C
+	 (plus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		  (match_operand:SI 2 "arm_add_operand" "rI,L"))
+	 (match_dup 2)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   add%.\\t%0, %1, %2
+   sub%.\\t%0, %1, #%n2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*compare_addsi2_op0"
+  [(set (reg:CC_C CC_REGNUM)
+	(compare:CC_C
+	 (plus:SI (match_operand:SI 0 "s_register_operand" "r,r")
+		  (match_operand:SI 1 "arm_add_operand" "rI,L"))
+	 (match_dup 0)))]
+  "TARGET_32BIT"
+  "@
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*compare_addsi2_op1"
+  [(set (reg:CC_C CC_REGNUM)
+	(compare:CC_C
+	 (plus:SI (match_operand:SI 0 "s_register_operand" "r,r")
+		  (match_operand:SI 1 "arm_add_operand" "rI,L"))
+	 (match_dup 1)))]
+  "TARGET_32BIT"
+  "@
+   cmn%?\\t%0, %1
+   cmp%?\\t%0, #%n1"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*addsi3_carryin_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
+			  (match_operand:SI 2 "arm_rhs_operand" "rI"))
+		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "adc%?\\t%0, %1, %2"
+  [(set_attr "conds" "use")]
+)
+
+(define_insn "*addsi3_carryin_alt2_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (plus:SI (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))
+			  (match_operand:SI 1 "s_register_operand" "%r"))
+		 (match_operand:SI 2 "arm_rhs_operand" "rI")))]
+  "TARGET_32BIT"
+  "adc%?\\t%0, %1, %2"
+  [(set_attr "conds" "use")]
+)
+
+(define_insn "*addsi3_carryin_shift_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (plus:SI
+		  (match_operator:SI 2 "shift_operator"
+		    [(match_operand:SI 3 "s_register_operand" "r")
+		     (match_operand:SI 4 "reg_or_int_operand" "rM")])
+		  (match_operand:SI 1 "s_register_operand" "r"))
+		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "adc%?\\t%0, %1, %3%S2"
+  [(set_attr "conds" "use")
+   (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_expand "incscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (plus:SI (match_operator:SI 2 "arm_comparison_operator"
+                    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
+                 (match_operand:SI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*arm_incscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (plus:SI (match_operator:SI 2 "arm_comparison_operator"
+                    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
+                 (match_operand:SI 1 "s_register_operand" "0,?r")))]
+  "TARGET_ARM"
+  "@
+  add%d2\\t%0, %1, #1
+  mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")]
+)
+
+; transform ((x << y) - 1) to ~(~(x-1) << y)  Where X is a constant.
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(plus:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "")
+			    (match_operand:SI 2 "s_register_operand" ""))
+		 (const_int -1)))
+   (clobber (match_operand:SI 3 "s_register_operand" ""))]
+  "TARGET_32BIT"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 0) (not:SI (ashift:SI (match_dup 3) (match_dup 2))))]
+  "
+  operands[1] = GEN_INT (~(INTVAL (operands[1]) - 1));
+")
+
+(define_expand "addsf3"
+  [(set (match_operand:SF          0 "s_register_operand" "")
+	(plus:SF (match_operand:SF 1 "s_register_operand" "")
+		 (match_operand:SF 2 "arm_float_add_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  if (TARGET_MAVERICK
+      && !cirrus_fp_register (operands[2], SFmode))
+    operands[2] = force_reg (SFmode, operands[2]);
+")
+
+(define_expand "adddf3"
+  [(set (match_operand:DF          0 "s_register_operand" "")
+	(plus:DF (match_operand:DF 1 "s_register_operand" "")
+		 (match_operand:DF 2 "arm_float_add_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+  if (TARGET_MAVERICK
+      && !cirrus_fp_register (operands[2], DFmode))
+    operands[2] = force_reg (DFmode, operands[2]);
+")
+
+(define_expand "subdi3"
+ [(parallel
+   [(set (match_operand:DI            0 "s_register_operand" "")
+	  (minus:DI (match_operand:DI 1 "s_register_operand" "")
+	            (match_operand:DI 2 "s_register_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_HARD_FLOAT && TARGET_MAVERICK
+      && TARGET_32BIT
+      && cirrus_fp_register (operands[0], DImode)
+      && cirrus_fp_register (operands[1], DImode))
+    {
+      emit_insn (gen_cirrus_subdi3 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  if (TARGET_THUMB1)
+    {
+      if (GET_CODE (operands[1]) != REG)
+        operands[1] = force_reg (DImode, operands[1]);
+      if (GET_CODE (operands[2]) != REG)
+        operands[2] = force_reg (DImode, operands[2]);
+     }	
+  "
+)
+
+(define_insn "*arm_subdi3"
+  [(set (match_operand:DI           0 "s_register_operand" "=&r,&r,&r")
+	(minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0")
+		  (match_operand:DI 2 "s_register_operand" "r,0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !TARGET_NEON"
+  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*thumb_subdi3"
+  [(set (match_operand:DI           0 "register_operand" "=l")
+	(minus:DI (match_operand:DI 1 "register_operand"  "0")
+		  (match_operand:DI 2 "register_operand"  "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB1"
+  "sub\\t%Q0, %Q0, %Q2\;sbc\\t%R0, %R0, %R2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "*subdi_di_zesidi"
+  [(set (match_operand:DI           0 "s_register_operand" "=&r,&r")
+	(minus:DI (match_operand:DI 1 "s_register_operand"  "0,r")
+		  (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*subdi_di_sesidi"
+  [(set (match_operand:DI            0 "s_register_operand" "=&r,&r")
+	(minus:DI (match_operand:DI  1 "s_register_operand"  "0,r")
+		  (sign_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*subdi_zesidi_di"
+  [(set (match_operand:DI            0 "s_register_operand" "=&r,&r")
+	(minus:DI (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r,r"))
+		  (match_operand:DI  1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*subdi_sesidi_di"
+  [(set (match_operand:DI            0 "s_register_operand" "=&r,&r")
+	(minus:DI (sign_extend:DI
+		   (match_operand:SI 2 "s_register_operand"   "r,r"))
+		  (match_operand:DI  1 "s_register_operand"  "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*subdi_zesidi_zesidi"
+  [(set (match_operand:DI            0 "s_register_operand" "=r")
+	(minus:DI (zero_extend:DI
+		   (match_operand:SI 1 "s_register_operand"  "r"))
+		  (zero_extend:DI
+		   (match_operand:SI 2 "s_register_operand"  "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_expand "subsi3"
+  [(set (match_operand:SI           0 "s_register_operand" "")
+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "")
+		  (match_operand:SI 2 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (MINUS, SImode, NULL_RTX,
+	                      INTVAL (operands[1]), operands[0],
+	  		      operands[2], optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        operands[1] = force_reg (SImode, operands[1]);
+    }
+  "
+)
+
+(define_insn "thumb1_subsi3_insn"
+  [(set (match_operand:SI           0 "register_operand" "=l")
+	(minus:SI (match_operand:SI 1 "register_operand" "l")
+		  (match_operand:SI 2 "reg_or_int_operand" "lPd")))]
+  "TARGET_THUMB1"
+  "sub\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+; ??? Check Thumb-2 split length
+(define_insn_and_split "*arm_subsi3_insn"
+  [(set (match_operand:SI           0 "s_register_operand" "=r,r,rk,r")
+	(minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,k,?n")
+		  (match_operand:SI 2 "reg_or_int_operand" "r,rI,r, r")))]
+  "TARGET_32BIT"
+  "@
+   rsb%?\\t%0, %2, %1
+   sub%?\\t%0, %1, %2
+   sub%?\\t%0, %1, %2
+   #"
+  "&& (GET_CODE (operands[1]) == CONST_INT
+       && !const_ok_for_arm (INTVAL (operands[1])))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant (MINUS, SImode, curr_insn,
+                      INTVAL (operands[1]), operands[0], operands[2], 0);
+  DONE;
+  "
+  [(set_attr "length" "4,4,4,16")
+   (set_attr "predicable" "yes")]
+)
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (set (match_operand:SI 0 "arm_general_register_operand" "")
+	(minus:SI (match_operand:SI 1 "const_int_operand" "")
+		  (match_operand:SI 2 "arm_general_register_operand" "")))]
+  "TARGET_32BIT
+   && !const_ok_for_arm (INTVAL (operands[1]))
+   && const_ok_for_arm (~INTVAL (operands[1]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 0) (minus:SI (match_dup 3) (match_dup 2)))]
+  ""
+)
+
+(define_insn "*subsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,I")
+		   (match_operand:SI 2 "arm_rhs_operand" "rI,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   sub%.\\t%0, %1, %2
+   rsb%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*subsi3_compare"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,I")
+		    (match_operand:SI 2 "arm_rhs_operand" "rI,r")))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   sub%.\\t%0, %1, %2
+   rsb%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")]
+)
+
+(define_expand "decscc"
+  [(set (match_operand:SI            0 "s_register_operand" "=r,r")
+        (minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
+		  (match_operator:SI 2 "arm_comparison_operator"
+                   [(match_operand   3 "cc_register" "") (const_int 0)])))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*arm_decscc"
+  [(set (match_operand:SI            0 "s_register_operand" "=r,r")
+        (minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
+		  (match_operator:SI 2 "arm_comparison_operator"
+                   [(match_operand   3 "cc_register" "") (const_int 0)])))]
+  "TARGET_ARM"
+  "@
+   sub%d2\\t%0, %1, #1
+   mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "*,8")]
+)
+
+(define_expand "subsf3"
+  [(set (match_operand:SF           0 "s_register_operand" "")
+	(minus:SF (match_operand:SF 1 "arm_float_rhs_operand" "")
+		  (match_operand:SF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  if (TARGET_MAVERICK)
+    {
+      if (!cirrus_fp_register (operands[1], SFmode))
+        operands[1] = force_reg (SFmode, operands[1]);
+      if (!cirrus_fp_register (operands[2], SFmode))
+        operands[2] = force_reg (SFmode, operands[2]);
+    }
+")
+
+(define_expand "subdf3"
+  [(set (match_operand:DF           0 "s_register_operand" "")
+	(minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "")
+		  (match_operand:DF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+  if (TARGET_MAVERICK)
+    {
+       if (!cirrus_fp_register (operands[1], DFmode))
+         operands[1] = force_reg (DFmode, operands[1]);
+       if (!cirrus_fp_register (operands[2], DFmode))
+         operands[2] = force_reg (DFmode, operands[2]);
+    }
+")
+
+
+;; Multiplication insns
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "")
+	(mult:SI (match_operand:SI 2 "s_register_operand" "")
+		 (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+;; Use `&' and then `0' to prevent the operands 0 and 1 being the same
+(define_insn "*arm_mulsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "=&r,&r")
+	(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
+		 (match_operand:SI 1 "s_register_operand" "%0,r")))]
+  "TARGET_32BIT && !arm_arch6"
+  "mul%?\\t%0, %2, %1"
+  [(set_attr "insn" "mul")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_mulsi3_v6"
+  [(set (match_operand:SI          0 "s_register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_32BIT && arm_arch6"
+  "mul%?\\t%0, %1, %2"
+  [(set_attr "insn" "mul")
+   (set_attr "predicable" "yes")]
+)
+
+; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
+; 1 and 2; are the same, because reload will make operand 0 match 
+; operand 1 without realizing that this conflicts with operand 2.  We fix 
+; this by adding another alternative to match this case, and then `reload' 
+; it ourselves.  This alternative must come first.
+(define_insn "*thumb_mulsi3"
+  [(set (match_operand:SI          0 "register_operand" "=&l,&l,&l")
+	(mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0")
+		 (match_operand:SI 2 "register_operand" "l,l,l")))]
+  "TARGET_THUMB1 && !arm_arch6"
+  "*
+  if (which_alternative < 2)
+    return \"mov\\t%0, %1\;mul\\t%0, %2\";
+  else
+    return \"mul\\t%0, %2\";
+  "
+  [(set_attr "length" "4,4,2")
+   (set_attr "insn" "mul")]
+)
+
+(define_insn "*thumb_mulsi3_v6"
+  [(set (match_operand:SI          0 "register_operand" "=l,l,l")
+	(mult:SI (match_operand:SI 1 "register_operand" "0,l,0")
+		 (match_operand:SI 2 "register_operand" "l,0,0")))]
+  "TARGET_THUMB1 && arm_arch6"
+  "@
+   mul\\t%0, %2
+   mul\\t%0, %1
+   mul\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "insn" "mul")]
+)
+
+(define_insn "*mulsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 1 "s_register_operand" "%0,r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+	(mult:SI (match_dup 2) (match_dup 1)))]
+  "TARGET_ARM && !arm_arch6"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "muls")]
+)
+
+(define_insn "*mulsi3_compare0_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (match_dup 2) (match_dup 1)))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "muls")]
+)
+
+(define_insn "*mulsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 1 "s_register_operand" "%0,r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=&r,&r"))]
+  "TARGET_ARM && !arm_arch6"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "muls")]
+)
+
+(define_insn "*mulsi_compare0_scratch_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (mult:SI
+			  (match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mul%.\\t%0, %2, %1"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "muls")]
+)
+
+;; Unnamed templates to match MLA instruction.
+
+(define_insn "*mulsi3addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
+	(plus:SI
+	  (mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
+	  (match_operand:SI 3 "s_register_operand" "r,r,0,0")))]
+  "TARGET_32BIT && !arm_arch6"
+  "mla%?\\t%0, %2, %1, %3"
+  [(set_attr "insn" "mla")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsi3addsi_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI
+	  (mult:SI (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))
+	  (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_32BIT && arm_arch6"
+  "mla%?\\t%0, %2, %1, %3"
+  [(set_attr "insn" "mla")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsi3addsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
+		  (match_operand:SI 3 "s_register_operand" "r,r,0,0"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
+	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
+		 (match_dup 3)))]
+  "TARGET_ARM && arm_arch6"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))
+		  (match_operand:SI 3 "s_register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
+		 (match_dup 3)))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+		   (match_operand:SI 1 "s_register_operand" "%0,r,0,r"))
+		  (match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
+  "TARGET_ARM && !arm_arch6"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_scratch_v6"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (plus:SI (mult:SI
+		   (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))
+		  (match_operand:SI 3 "s_register_operand" "r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_ARM && arm_arch6 && optimize_size"
+  "mla%.\\t%0, %2, %1, %3"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mlas")]
+)
+
+(define_insn "*mulsi3subsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI
+	  (match_operand:SI 3 "s_register_operand" "r")
+	  (mult:SI (match_operand:SI 2 "s_register_operand" "r")
+		   (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "mls%?\\t%0, %2, %1, %3"
+  [(set_attr "insn" "mla")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "maddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
+
+(define_insn "*mulsidi3adddi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
+	  (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "smlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "insn" "smlal")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsidi3adddi_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))
+	  (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch6"
+  "smlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "insn" "smlal")
+   (set_attr "predicable" "yes")]
+)
+
+;; 32x32->64 widening multiply.
+;; As with mulsi3, the only difference between the v3-5 and v6+
+;; versions of these patterns is the requirement that the output not
+;; overlap the inputs, but that still means we have to have a named
+;; expander and two different starred insns.
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))))]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*mulsidi3_nov6"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
+	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "smull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "smull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsidi3_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch6"
+  "smull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "smull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))))]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*umulsidi3_nov6"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
+	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "umull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "umull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsidi3_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+  "TARGET_32BIT && arm_arch6"
+  "umull%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "umull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "umaddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
+
+(define_insn "*umulsidi3adddi"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
+	  (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "umlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "insn" "umlal")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsidi3adddi_v6"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))
+	  (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+	 (match_operand:DI 1 "s_register_operand" "0")))]
+  "TARGET_32BIT && arm_arch6"
+  "umlal%?\\t%Q0, %R0, %3, %2"
+  [(set_attr "insn" "umlal")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	     (sign_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	    (const_int 32))))
+     (clobber (match_scratch:SI 3 ""))])]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*smulsi3_highpart_nov6"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r"))
+	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "smull%?\\t%3, %0, %2, %1"
+  [(set_attr "insn" "smull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*smulsi3_highpart_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_32BIT && arm_arch6"
+  "smull%?\\t%3, %0, %2, %1"
+  [(set_attr "insn" "smull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	      (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	    (const_int 32))))
+     (clobber (match_scratch:SI 3 ""))])]
+  "TARGET_32BIT && arm_arch3m"
+  ""
+)
+
+(define_insn "*umulsi3_highpart_nov6"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r"))
+	   (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT && arm_arch3m && !arm_arch6"
+  "umull%?\\t%3, %0, %2, %1"
+  [(set_attr "insn" "umull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsi3_highpart_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_32BIT && arm_arch6"
+  "umull%?\\t%3, %0, %2, %1"
+  [(set_attr "insn" "umull")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "s_register_operand" "%r"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "s_register_operand" "r"))))]
+  "TARGET_DSP_MULTIPLY"
+  "smulbb%?\\t%0, %1, %2"
+  [(set_attr "insn" "smulxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulhisi3tb"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "s_register_operand" "r")
+		  (const_int 16))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "s_register_operand" "r"))))]
+  "TARGET_DSP_MULTIPLY"
+  "smultb%?\\t%0, %1, %2"
+  [(set_attr "insn" "smulxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulhisi3bt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "s_register_operand" "r"))
+		 (ashiftrt:SI
+		  (match_operand:SI 2 "s_register_operand" "r")
+		  (const_int 16))))]
+  "TARGET_DSP_MULTIPLY"
+  "smulbt%?\\t%0, %1, %2"
+  [(set_attr "insn" "smulxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulhisi3tt"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(mult:SI (ashiftrt:SI
+		  (match_operand:SI 1 "s_register_operand" "r")
+		  (const_int 16))
+		 (ashiftrt:SI
+		  (match_operand:SI 2 "s_register_operand" "r")
+		  (const_int 16))))]
+  "TARGET_DSP_MULTIPLY"
+  "smultt%?\\t%0, %1, %2"
+  [(set_attr "insn" "smulxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "maddhisi4"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (mult:SI (sign_extend:SI
+			   (match_operand:HI 1 "s_register_operand" "r"))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "s_register_operand" "r")))
+		 (match_operand:SI 3 "s_register_operand" "r")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlabb%?\\t%0, %1, %2, %3"
+  [(set_attr "insn" "smlaxy")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*maddhidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(plus:DI
+	  (mult:DI (sign_extend:DI
+	 	    (match_operand:HI 1 "s_register_operand" "r"))
+		   (sign_extend:DI
+		    (match_operand:HI 2 "s_register_operand" "r")))
+	  (match_operand:DI 3 "s_register_operand" "0")))]
+  "TARGET_DSP_MULTIPLY"
+  "smlalbb%?\\t%Q0, %R0, %1, %2"
+  [(set_attr "insn" "smlalxy")
+   (set_attr "predicable" "yes")])
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF          0 "s_register_operand" "")
+	(mult:SF (match_operand:SF 1 "s_register_operand" "")
+		 (match_operand:SF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  if (TARGET_MAVERICK
+      && !cirrus_fp_register (operands[2], SFmode))
+    operands[2] = force_reg (SFmode, operands[2]);
+")
+
+(define_expand "muldf3"
+  [(set (match_operand:DF          0 "s_register_operand" "")
+	(mult:DF (match_operand:DF 1 "s_register_operand" "")
+		 (match_operand:DF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+  if (TARGET_MAVERICK
+      && !cirrus_fp_register (operands[2], DFmode))
+    operands[2] = force_reg (DFmode, operands[2]);
+")
+
+;; Division insns
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(div:SF (match_operand:SF 1 "arm_float_rhs_operand" "")
+		(match_operand:SF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "")
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(div:DF (match_operand:DF 1 "arm_float_rhs_operand" "")
+		(match_operand:DF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
+  "")
+
+;; Modulo insns
+
+(define_expand "modsf3"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(mod:SF (match_operand:SF 1 "s_register_operand" "")
+		(match_operand:SF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "")
+
+(define_expand "moddf3"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(mod:DF (match_operand:DF 1 "s_register_operand" "")
+		(match_operand:DF 2 "arm_float_rhs_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "")
+
+;; Boolean and,ior,xor insns
+
+;; Split up double word logical operations
+
+;; Split up simple DImode logical operations.  Simply perform the logical
+;; operation on the upper and lower halves of the registers.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(match_operator:DI 6 "logical_binary_operator"
+	  [(match_operand:DI 1 "s_register_operand" "")
+	   (match_operand:DI 2 "s_register_operand" "")]))]
+  "TARGET_32BIT && reload_completed
+   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))
+   && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
+   (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+)
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(match_operator:DI 6 "logical_binary_operator"
+	  [(sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))
+	   (match_operand:DI 1 "s_register_operand" "")]))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
+   (set (match_dup 3) (match_op_dup:SI 6
+			[(ashiftrt:SI (match_dup 2) (const_int 31))
+			 (match_dup 4)]))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+)
+
+;; The zero extend of operand 2 means we can just copy the high part of
+;; operand1 into operand0.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(ior:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))
+	  (match_operand:DI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && operands[0] != operands[1] && reload_completed"
+  [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))]
+  "
+  {
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+)
+
+;; The zero extend of operand 2 means we can just copy the high part of
+;; operand1 into operand0.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(xor:DI
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))
+	  (match_operand:DI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && operands[0] != operands[1] && reload_completed"
+  [(set (match_dup 0) (xor:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))]
+  "
+  {
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+)
+
+(define_expand "anddi3"
+  [(set (match_operand:DI         0 "s_register_operand" "")
+	(and:DI (match_operand:DI 1 "s_register_operand" "")
+		(match_operand:DI 2 "neon_inv_logic_op2" "")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*anddi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
+	(and:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
+		(match_operand:DI 2 "s_register_operand"   "r,r")))]
+  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
+  "#"
+  [(set_attr "length" "8")]
+)
+
+(define_insn_and_split "*anddi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (zero_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  ; The zero extend of operand 2 clears the high word of the output
+  ; operand.
+  [(set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (const_int 0))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "8")]
+)
+
+(define_insn "*anddi_sesdi_di"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
+	(and:DI (sign_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI  1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  [(set_attr "length" "8")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(and:SI (match_operand:SI 1 "s_register_operand" "")
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        {
+	  if (INTVAL (operands[2]) == 255 && arm_arch6)
+	    {
+	      operands[1] = convert_to_mode (QImode, operands[1], 1);
+	      emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0],
+							 operands[1]));
+	    }
+	  else
+	    arm_split_constant (AND, SImode, NULL_RTX,
+				INTVAL (operands[2]), operands[0],
+				operands[1],
+				optimize && can_create_pseudo_p ());
+
+          DONE;
+        }
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (GET_CODE (operands[2]) != CONST_INT)
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+      else
+        {
+          int i;
+	  
+          if (((unsigned HOST_WIDE_INT) ~INTVAL (operands[2])) < 256)
+  	    {
+	      operands[2] = force_reg (SImode,
+				       GEN_INT (~INTVAL (operands[2])));
+	      
+	      emit_insn (gen_thumb1_bicsi3 (operands[0], operands[2], operands[1]));
+	      
+	      DONE;
+	    }
+
+          for (i = 9; i <= 31; i++)
+	    {
+	      if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (operands[2]))
+	        {
+	          emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i),
+			 	        const0_rtx));
+	          DONE;
+	        }
+	      else if ((((HOST_WIDE_INT) 1) << i) - 1
+		       == ~INTVAL (operands[2]))
+	        {
+	          rtx shift = GEN_INT (i);
+	          rtx reg = gen_reg_rtx (SImode);
+		
+	          emit_insn (gen_lshrsi3 (reg, operands[1], shift));
+	          emit_insn (gen_ashlsi3 (operands[0], reg, shift));
+		  
+	          DONE;
+	        }
+	    }
+
+          operands[2] = force_reg (SImode, operands[2]);
+        }
+    }
+  "
+)
+
+; ??? Check split length for Thumb-2
+(define_insn_and_split "*arm_andsi3_insn"
+  [(set (match_operand:SI         0 "s_register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "s_register_operand" "r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  "TARGET_32BIT"
+  "@
+   and%?\\t%0, %1, %2
+   bic%?\\t%0, %1, #%B2
+   #"
+  "TARGET_32BIT
+   && GET_CODE (operands[2]) == CONST_INT
+   && !(const_ok_for_arm (INTVAL (operands[2]))
+	|| const_ok_for_arm (~INTVAL (operands[2])))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant  (AND, SImode, curr_insn, 
+	               INTVAL (operands[2]), operands[0], operands[1], 0);
+  DONE;
+  "
+  [(set_attr "length" "4,4,16")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb1_andsi3_insn"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(and:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "and\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_insn "*andsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		 (match_operand:SI 2 "arm_not_operand" "rI,K"))
+	 (const_int 0)))
+   (set (match_operand:SI          0 "s_register_operand" "=r,r")
+	(and:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   and%.\\t%0, %1, %2
+   bic%.\\t%0, %1, #%B2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*andsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (match_operand:SI 0 "s_register_operand" "r,r")
+		 (match_operand:SI 1 "arm_not_operand" "rI,K"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 2 "=X,r"))]
+  "TARGET_32BIT"
+  "@
+   tst%?\\t%0, %1
+   bic%.\\t%2, %0, #%B1"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*zeroextractsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (zero_extract:SI
+			  (match_operand:SI 0 "s_register_operand" "r")
+		 	  (match_operand 1 "const_int_operand" "n")
+			  (match_operand 2 "const_int_operand" "n"))
+			 (const_int 0)))]
+  "TARGET_32BIT
+  && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32
+      && INTVAL (operands[1]) > 0 
+      && INTVAL (operands[1]) + (INTVAL (operands[2]) & 1) <= 8
+      && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32)"
+  "*
+  operands[1] = GEN_INT (((1 << INTVAL (operands[1])) - 1)
+			 << INTVAL (operands[2]));
+  output_asm_insn (\"tst%?\\t%0, %1\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "set")]
+)
+
+(define_insn_and_split "*ne_zeroextractsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ne:SI (zero_extract:SI
+		(match_operand:SI 1 "s_register_operand" "r")
+		(match_operand:SI 2 "const_int_operand" "n")
+		(match_operand:SI 3 "const_int_operand" "n"))
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)"
+  "#"
+  "TARGET_32BIT
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (const_int 1)))]
+  "
+  operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1)
+			 << INTVAL (operands[3])); 
+  "
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 12)
+		      (const_int 8)))]
+)
+
+(define_insn_and_split "*ne_zeroextractsi_shifted"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ne:SI (zero_extract:SI
+		(match_operand:SI 1 "s_register_operand" "r")
+		(match_operand:SI 2 "const_int_operand" "n")
+		(const_int 0))
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (const_int 1)))]
+  "
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*ite_ne_zeroextractsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (ne (zero_extract:SI
+			      (match_operand:SI 1 "s_register_operand" "r")
+			      (match_operand:SI 2 "const_int_operand" "n")
+			      (match_operand:SI 3 "const_int_operand" "n"))
+			     (const_int 0))
+			 (match_operand:SI 4 "arm_not_operand" "rIK")
+			 (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  "#"
+  "TARGET_ARM
+   && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32
+       && INTVAL (operands[2]) > 0 
+       && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8
+       && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (match_dup 4)))]
+  "
+  operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1)
+			 << INTVAL (operands[3])); 
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*ite_ne_zeroextractsi_shifted"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (ne (zero_extract:SI
+			      (match_operand:SI 1 "s_register_operand" "r")
+			      (match_operand:SI 2 "const_int_operand" "n")
+			      (const_int 0))
+			     (const_int 0))
+			 (match_operand:SI 3 "arm_not_operand" "rIK")
+			 (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  "#"
+  "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0))
+			 (match_dup 0) (match_dup 3)))]
+  "
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_THUMB1"
+  [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 4) (match_dup 3)))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[2]);
+
+     operands[2] = GEN_INT (32 - temp - INTVAL (operands[3]));
+     operands[3] = GEN_INT (32 - temp);
+   }"
+)
+
+;; ??? Use Thumb-2 has bitfield insert/extract instructions.
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operand:SI 5 "s_register_operand" "")]))
+   (clobber (match_operand:SI 6 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(lshiftrt:SI (match_dup 6) (match_dup 4))
+	  (match_dup 5)]))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[3]);
+
+     operands[3] = GEN_INT (32 - temp - INTVAL (operands[4]));
+     operands[4] = GEN_INT (32 - temp);
+   }"
+)
+  
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_THUMB1"
+  [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 3)))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[2]);
+
+     operands[2] = GEN_INT (32 - temp - INTVAL (operands[3]));
+     operands[3] = GEN_INT (32 - temp);
+   }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operand:SI 5 "s_register_operand" "")]))
+   (clobber (match_operand:SI 6 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(ashiftrt:SI (match_dup 6) (match_dup 4))
+	  (match_dup 5)]))]
+  "{
+     HOST_WIDE_INT temp = INTVAL (operands[3]);
+
+     operands[3] = GEN_INT (32 - temp - INTVAL (operands[4]));
+     operands[4] = GEN_INT (32 - temp);
+   }"
+)
+  
+;;; ??? This pattern is bogus.  If operand3 has bits outside the range
+;;; represented by the bitfield, then this will produce incorrect results.
+;;; Somewhere, the value needs to be truncated.  On targets like the m68k,
+;;; which have a real bit-field insert instruction, the truncation happens
+;;; in the bit-field insert instruction itself.  Since arm does not have a
+;;; bit-field insert instruction, we would have to emit code here to truncate
+;;; the value before we insert.  This loses some of the advantage of having
+;;; this insv pattern, so this pattern needs to be reevalutated.
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
+                         (match_operand:SI 1 "general_operand" "")
+                         (match_operand:SI 2 "general_operand" ""))
+        (match_operand:SI 3 "reg_or_int_operand" ""))]
+  "TARGET_ARM || arm_arch_thumb2"
+  "
+  {
+    int start_bit = INTVAL (operands[2]);
+    int width = INTVAL (operands[1]);
+    HOST_WIDE_INT mask = (((HOST_WIDE_INT)1) << width) - 1;
+    rtx target, subtarget;
+
+    if (arm_arch_thumb2)
+      {
+	bool use_bfi = TRUE;
+
+	if (GET_CODE (operands[3]) == CONST_INT)
+	  {
+	    HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+	    if (val == 0)
+	      {
+		emit_insn (gen_insv_zero (operands[0], operands[1],
+					  operands[2]));
+		DONE;
+	      }
+
+	    /* See if the set can be done with a single orr instruction.  */
+	    if (val == mask && const_ok_for_arm (val << start_bit))
+	      use_bfi = FALSE;
+	  }
+	  
+	if (use_bfi)
+	  {
+	    if (GET_CODE (operands[3]) != REG)
+	      operands[3] = force_reg (SImode, operands[3]);
+
+	    emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+				    operands[3]));
+	    DONE;
+	  }
+      }
+
+    target = copy_rtx (operands[0]);
+    /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical 
+       subreg as the final target.  */
+    if (GET_CODE (target) == SUBREG)
+      {
+	subtarget = gen_reg_rtx (SImode);
+	if (GET_MODE_SIZE (GET_MODE (SUBREG_REG (target)))
+	    < GET_MODE_SIZE (SImode))
+	  target = SUBREG_REG (target);
+      }
+    else
+      subtarget = target;    
+
+    if (GET_CODE (operands[3]) == CONST_INT)
+      {
+	/* Since we are inserting a known constant, we may be able to
+	   reduce the number of bits that we have to clear so that
+	   the mask becomes simple.  */
+	/* ??? This code does not check to see if the new mask is actually
+	   simpler.  It may not be.  */
+	rtx op1 = gen_reg_rtx (SImode);
+	/* ??? Truncate operand3 to fit in the bitfield.  See comment before
+	   start of this pattern.  */
+	HOST_WIDE_INT op3_value = mask & INTVAL (operands[3]);
+	HOST_WIDE_INT mask2 = ((mask & ~op3_value) << start_bit);
+
+	emit_insn (gen_andsi3 (op1, operands[0],
+			       gen_int_mode (~mask2, SImode)));
+	emit_insn (gen_iorsi3 (subtarget, op1,
+			       gen_int_mode (op3_value << start_bit, SImode)));
+      }
+    else if (start_bit == 0
+	     && !(const_ok_for_arm (mask)
+		  || const_ok_for_arm (~mask)))
+      {
+	/* A Trick, since we are setting the bottom bits in the word,
+	   we can shift operand[3] up, operand[0] down, OR them together
+	   and rotate the result back again.  This takes 3 insns, and
+	   the third might be mergeable into another op.  */
+	/* The shift up copes with the possibility that operand[3] is
+           wider than the bitfield.  */
+	rtx op0 = gen_reg_rtx (SImode);
+	rtx op1 = gen_reg_rtx (SImode);
+
+	emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width)));
+	emit_insn (gen_lshrsi3 (op1, operands[0], operands[1]));
+	emit_insn (gen_iorsi3  (op1, op1, op0));
+	emit_insn (gen_rotlsi3 (subtarget, op1, operands[1]));
+      }
+    else if ((width + start_bit == 32)
+	     && !(const_ok_for_arm (mask)
+		  || const_ok_for_arm (~mask)))
+      {
+	/* Similar trick, but slightly less efficient.  */
+
+	rtx op0 = gen_reg_rtx (SImode);
+	rtx op1 = gen_reg_rtx (SImode);
+
+	emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width)));
+	emit_insn (gen_ashlsi3 (op1, operands[0], operands[1]));
+	emit_insn (gen_lshrsi3 (op1, op1, operands[1]));
+	emit_insn (gen_iorsi3 (subtarget, op1, op0));
+      }
+    else
+      {
+	rtx op0 = gen_int_mode (mask, SImode);
+	rtx op1 = gen_reg_rtx (SImode);
+	rtx op2 = gen_reg_rtx (SImode);
+
+	if (!(const_ok_for_arm (mask) || const_ok_for_arm (~mask)))
+	  {
+	    rtx tmp = gen_reg_rtx (SImode);
+
+	    emit_insn (gen_movsi (tmp, op0));
+	    op0 = tmp;
+	  }
+
+	/* Mask out any bits in operand[3] that are not needed.  */
+	   emit_insn (gen_andsi3 (op1, operands[3], op0));
+
+	if (GET_CODE (op0) == CONST_INT
+	    && (const_ok_for_arm (mask << start_bit)
+		|| const_ok_for_arm (~(mask << start_bit))))
+	  {
+	    op0 = gen_int_mode (~(mask << start_bit), SImode);
+	    emit_insn (gen_andsi3 (op2, operands[0], op0));
+	  }
+	else
+	  {
+	    if (GET_CODE (op0) == CONST_INT)
+	      {
+		rtx tmp = gen_reg_rtx (SImode);
+
+		emit_insn (gen_movsi (tmp, op0));
+		op0 = tmp;
+	      }
+
+	    if (start_bit != 0)
+	      emit_insn (gen_ashlsi3 (op0, op0, operands[2]));
+	    
+	    emit_insn (gen_andsi_notsi_si (op2, operands[0], op0));
+	  }
+
+	if (start_bit != 0)
+          emit_insn (gen_ashlsi3 (op1, op1, operands[2]));
+
+	emit_insn (gen_iorsi3 (subtarget, op1, op2));
+      }
+
+    if (subtarget != target)
+      {
+	/* If TARGET is still a SUBREG, then it must be wider than a word,
+	   so we must be careful only to set the subword we were asked to.  */
+	if (GET_CODE (target) == SUBREG)
+	  emit_move_insn (target, subtarget);
+	else
+	  emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
+      }
+
+    DONE;
+  }"
+)
+
+(define_insn "insv_zero"
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
+                         (match_operand:SI 1 "const_int_operand" "M")
+                         (match_operand:SI 2 "const_int_operand" "M"))
+        (const_int 0))]
+  "arm_arch_thumb2"
+  "bfc%?\t%0, %2, %1"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "insv_t2"
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
+                         (match_operand:SI 1 "const_int_operand" "M")
+                         (match_operand:SI 2 "const_int_operand" "M"))
+        (match_operand:SI 3 "s_register_operand" "r"))]
+  "arm_arch_thumb2"
+  "bfi%?\t%0, %3, %2, %1"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")]
+)
+
+; constants for op 2 will never be given to these patterns.
+(define_insn_and_split "*anddi_notdi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r"))
+		(match_operand:DI 2 "s_register_operand" "r,0")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed
+   && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))
+   && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 1)) (match_dup 2)))
+   (set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+  
+(define_insn_and_split "*anddi_notzesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (not:DI (zero_extend:DI
+			 (match_operand:SI 2 "s_register_operand" "r,r")))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  "@
+   bic%?\\t%Q0, %Q1, %2
+   #"
+  ; (not (zero_extend ...)) allows us to just copy the high word from
+  ; operand1 to operand0.
+  "TARGET_32BIT
+   && reload_completed
+   && operands[0] != operands[1]"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (match_dup 4))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")]
+)
+  
+(define_insn_and_split "*anddi_notsesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(and:DI (not:DI (sign_extend:DI
+			 (match_operand:SI 2 "s_register_operand" "r,r")))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
+   (set (match_dup 3) (and:SI (not:SI
+				(ashiftrt:SI (match_dup 2) (const_int 31)))
+			       (match_dup 4)))]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+  
+(define_insn "andsi_notsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "bic%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "thumb1_bicsi3"
+  [(set (match_operand:SI                 0 "register_operand" "=l")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "l"))
+		(match_operand:SI         2 "register_operand" "0")))]
+  "TARGET_THUMB1"
+  "bic\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_insn "andsi_not_shiftsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_operator:SI 4 "shift_operator"
+			 [(match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 3 "arm_rhs_operand" "rM")]))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "bic%?\\t%0, %1, %2%S4"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "2")
+   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*andsi_notsi_si_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		 (match_operand:SI 1 "s_register_operand" "r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_dup 2)) (match_dup 1)))]
+  "TARGET_32BIT"
+  "bic%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*andsi_notsi_si_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		 (match_operand:SI 1 "s_register_operand" "r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_32BIT"
+  "bic%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")]
+)
+
+(define_expand "iordi3"
+  [(set (match_operand:DI         0 "s_register_operand" "")
+	(ior:DI (match_operand:DI 1 "s_register_operand" "")
+		(match_operand:DI 2 "neon_logic_op2" "")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*iordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
+	(ior:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
+		(match_operand:DI 2 "s_register_operand"   "r,r")))]
+  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*iordi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (zero_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  "@
+   orr%?\\t%Q0, %Q1, %2
+   #"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*iordi_sesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(ior:DI (sign_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(ior:SI (match_operand:SI 1 "s_register_operand" "")
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (IOR, SImode, NULL_RTX,
+	                      INTVAL (operands[2]), operands[0], operands[1],
+			      optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+    }
+  "
+)
+
+(define_insn_and_split "*iorsi3_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "s_register_operand" "%r,r,r")
+		(match_operand:SI 2 "reg_or_int_operand" "rI,K,?n")))]
+  "TARGET_32BIT"
+  "@
+   orr%?\\t%0, %1, %2
+   orn%?\\t%0, %1, #%B2
+   #"
+  "TARGET_32BIT
+   && GET_CODE (operands[2]) == CONST_INT
+   && !(const_ok_for_arm (INTVAL (operands[2]))
+        || (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))"
+  [(clobber (const_int 0))]
+{
+  arm_split_constant (IOR, SImode, curr_insn, 
+                      INTVAL (operands[2]), operands[0], operands[1], 0);
+  DONE;
+}
+  [(set_attr "length" "4,4,16")
+   (set_attr "arch" "32,t2,32")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb1_iorsi3_insn"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "orr\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (set (match_operand:SI 0 "arm_general_register_operand" "")
+	(ior:SI (match_operand:SI 1 "arm_general_register_operand" "")
+		(match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_ARM
+   && !const_ok_for_arm (INTVAL (operands[2]))
+   && const_ok_for_arm (~INTVAL (operands[2]))"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 1) (match_dup 3)))]
+  ""
+)
+
+(define_insn "*iorsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r")
+				 (match_operand:SI 2 "arm_rhs_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(ior:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "orr%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*iorsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r")
+				 (match_operand:SI 2 "arm_rhs_operand" "rI"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_32BIT"
+  "orr%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")]
+)
+
+(define_expand "xordi3"
+  [(set (match_operand:DI         0 "s_register_operand" "")
+	(xor:DI (match_operand:DI 1 "s_register_operand" "")
+		(match_operand:DI 2 "s_register_operand" "")))]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*xordi3_insn"
+  [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
+	(xor:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
+		(match_operand:DI 2 "s_register_operand"   "r,r")))]
+  "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*xordi_zesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(xor:DI (zero_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
+  "TARGET_32BIT"
+  "@
+   eor%?\\t%Q0, %Q1, %2
+   #"
+  [(set_attr "length" "4,8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*xordi_sesidi_di"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(xor:DI (sign_extend:DI
+		 (match_operand:SI 2 "s_register_operand" "r,r"))
+		(match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(xor:SI (match_operand:SI 1 "s_register_operand" "")
+		(match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_EITHER"
+  "if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (TARGET_32BIT)
+        {
+          arm_split_constant (XOR, SImode, NULL_RTX,
+	                      INTVAL (operands[2]), operands[0], operands[1],
+			      optimize && can_create_pseudo_p ());
+          DONE;
+	}
+      else /* TARGET_THUMB1 */
+        {
+          rtx tmp = force_reg (SImode, operands[2]);
+	  if (rtx_equal_p (operands[0], operands[1]))
+	    operands[2] = tmp;
+	  else
+	    {
+              operands[2] = operands[1];
+              operands[1] = tmp;
+	    }
+        }
+    }"
+)
+
+(define_insn "*arm_xorsi3"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "s_register_operand" "r")
+		(match_operand:SI 2 "arm_rhs_operand" "rI")))]
+  "TARGET_32BIT"
+  "eor%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb1_xorsi3_insn"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "eor\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_insn "*xorsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (xor:SI (match_operand:SI 1 "s_register_operand" "r")
+				 (match_operand:SI 2 "arm_rhs_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(xor:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "eor%.\\t%0, %1, %2"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*xorsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (xor:SI (match_operand:SI 0 "s_register_operand" "r")
+				 (match_operand:SI 1 "arm_rhs_operand" "rI"))
+			 (const_int 0)))]
+  "TARGET_32BIT"
+  "teq%?\\t%0, %1"
+  [(set_attr "conds" "set")]
+)
+
+; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), 
+; (NOT D) we can sometimes merge the final NOT into one of the following
+; insns.
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ior:SI (and:SI (not:SI (match_operand:SI 1 "s_register_operand" ""))
+			(not:SI (match_operand:SI 2 "arm_rhs_operand" "")))
+		(match_operand:SI 3 "arm_rhs_operand" "")))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_32BIT"
+  [(set (match_dup 4) (and:SI (ior:SI (match_dup 1) (match_dup 2))
+			      (not:SI (match_dup 3))))
+   (set (match_dup 0) (not:SI (match_dup 4)))]
+  ""
+)
+
+(define_insn "*andsi_iorsi3_notsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
+	(and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r")
+			(match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))
+		(not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))]
+  "TARGET_32BIT"
+  "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3"
+  [(set_attr "length" "8")
+   (set_attr "ce_count" "2")
+   (set_attr "predicable" "yes")]
+)
+
+; ??? Are these four splitters still beneficial when the Thumb-2 bitfield
+; insns are available?
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operator:SI 9 "logical_binary_operator"
+	   [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(lshiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(match_operator:SI 9 "logical_binary_operator"
+	   [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])
+	  (zero_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(lshiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))
+	  (match_operator:SI 9 "logical_binary_operator"
+	   [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(ashiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "logical_binary_operator"
+	 [(match_operator:SI 9 "logical_binary_operator"
+	   [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "")
+			 (match_operand:SI 6 "const_int_operand" ""))
+	    (match_operand:SI 7 "s_register_operand" "")])
+	  (sign_extract:SI (match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "const_int_operand" "")
+			   (match_operand:SI 4 "const_int_operand" ""))]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT
+   && GET_CODE (operands[1]) == GET_CODE (operands[9])
+   && INTVAL (operands[3]) == 32 - INTVAL (operands[6])"
+  [(set (match_dup 8)
+	(match_op_dup 1
+	 [(ashift:SI (match_dup 2) (match_dup 4))
+	  (match_dup 5)]))
+   (set (match_dup 0)
+	(match_op_dup 1
+	 [(ashiftrt:SI (match_dup 8) (match_dup 6))
+	  (match_dup 7)]))]
+  "
+  operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4])));
+")
+
+
+;; Minimum and maximum insns
+
+(define_expand "smaxsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (smax:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  "
+  if (operands[2] == const0_rtx || operands[2] == constm1_rtx)
+    {
+      /* No need for a clobber of the condition code register here.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_SMAX (SImode, operands[1],
+					    operands[2])));
+      DONE;
+    }
+")
+
+(define_insn "*smax_0"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(smax:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (const_int 0)))]
+  "TARGET_32BIT"
+  "bic%?\\t%0, %1, %1, asr #31"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*smax_m1"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(smax:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (const_int -1)))]
+  "TARGET_32BIT"
+  "orr%?\\t%0, %1, %1, asr #31"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_smax_insn"
+  [(set (match_operand:SI          0 "s_register_operand" "=r,r")
+	(smax:SI (match_operand:SI 1 "s_register_operand"  "%0,?r")
+		 (match_operand:SI 2 "arm_rhs_operand"    "rI,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "@
+   cmp\\t%1, %2\;movlt\\t%0, %2
+   cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_expand "sminsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (smin:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  "
+  if (operands[2] == const0_rtx)
+    {
+      /* No need for a clobber of the condition code register here.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_SMIN (SImode, operands[1],
+					    operands[2])));
+      DONE;
+    }
+")
+
+(define_insn "*smin_0"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(smin:SI (match_operand:SI 1 "s_register_operand" "r")
+		 (const_int 0)))]
+  "TARGET_32BIT"
+  "and%?\\t%0, %1, %1, asr #31"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_smin_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "@
+   cmp\\t%1, %2\;movge\\t%0, %2
+   cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_expand "umaxsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (umax:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*arm_umaxsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "@
+   cmp\\t%1, %2\;movcc\\t%0, %2
+   cmp\\t%1, %2\;movcs\\t%0, %1
+   cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,8,12")]
+)
+
+(define_expand "uminsi3"
+  [(parallel [
+    (set (match_operand:SI 0 "s_register_operand" "")
+	 (umin:SI (match_operand:SI 1 "s_register_operand" "")
+		  (match_operand:SI 2 "arm_rhs_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "*arm_uminsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "@
+   cmp\\t%1, %2\;movcs\\t%0, %2
+   cmp\\t%1, %2\;movcc\\t%0, %1
+   cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,8,12")]
+)
+
+(define_insn "*store_minmaxsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(match_operator:SI 3 "minmax_operator"
+	 [(match_operand:SI 1 "s_register_operand" "r")
+	  (match_operand:SI 2 "s_register_operand" "r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "*
+  operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
+				operands[1], operands[2]);
+  output_asm_insn (\"cmp\\t%1, %2\", operands);
+  if (TARGET_THUMB2)
+    output_asm_insn (\"ite\t%d3\", operands);
+  output_asm_insn (\"str%d3\\t%1, %0\", operands);
+  output_asm_insn (\"str%D3\\t%2, %0\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 14)
+		      (const_int 12)))
+   (set_attr "type" "store1")]
+)
+
+; Reject the frame pointer in operand[1], since reloading this after
+; it has been eliminated can cause carnage.
+(define_insn "*minmax_arithsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_operator:SI 4 "shiftable_operator"
+	 [(match_operator:SI 5 "minmax_operator"
+	   [(match_operand:SI 2 "s_register_operand" "r,r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+	  (match_operand:SI 1 "s_register_operand" "0,?r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && !arm_eliminable_register (operands[1])"
+  "*
+  {
+    enum rtx_code code = GET_CODE (operands[4]);
+    bool need_else;
+
+    if (which_alternative != 0 || operands[3] != const0_rtx
+        || (code != PLUS && code != IOR && code != XOR))
+      need_else = true;
+    else
+      need_else = false;
+
+    operands[5] = gen_rtx_fmt_ee (minmax_code (operands[5]), SImode,
+				  operands[2], operands[3]);
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (TARGET_THUMB2)
+      {
+	if (need_else)
+	  output_asm_insn (\"ite\\t%d5\", operands);
+	else
+	  output_asm_insn (\"it\\t%d5\", operands);
+      }
+    output_asm_insn (\"%i4%d5\\t%0, %1, %2\", operands);
+    if (need_else)
+      output_asm_insn (\"%i4%D5\\t%0, %1, %3\", operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 14)
+		      (const_int 12)))]
+)
+
+
+;; Shift and rotation insns
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI            0 "s_register_operand" "")
+        (ashift:DI (match_operand:DI 1 "s_register_operand" "")
+                   (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1)
+        {
+          emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
+          DONE;
+        }
+        /* Ideally we shouldn't fail here if we could know that operands[1] 
+           ends up already living in an iwmmxt register. Otherwise it's
+           cheaper to have the alternate code being generated than moving
+           values to iwmmxt regs and back.  */
+        FAIL;
+    }
+  else if (!TARGET_REALLY_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK))
+    FAIL;
+  "
+)
+
+(define_insn "arm_ashldi3_1bit"
+  [(set (match_operand:DI            0 "s_register_operand" "=r,&r")
+        (ashift:DI (match_operand:DI 1 "s_register_operand" "0,r")
+                   (const_int 1)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI            0 "s_register_operand" "")
+	(ashift:SI (match_operand:SI 1 "s_register_operand" "")
+		   (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+    {
+      emit_insn (gen_movsi (operands[0], const0_rtx));
+      DONE;
+    }
+  "
+)
+
+(define_insn "*thumb1_ashlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=l,l")
+	(ashift:SI (match_operand:SI 1 "register_operand" "l,0")
+		   (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1"
+  "lsl\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI              0 "s_register_operand" "")
+        (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "")
+                     (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1)
+        {
+          emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+          DONE;
+        }
+        /* Ideally we shouldn't fail here if we could know that operands[1] 
+           ends up already living in an iwmmxt register. Otherwise it's
+           cheaper to have the alternate code being generated than moving
+           values to iwmmxt regs and back.  */
+        FAIL;
+    }
+  else if (!TARGET_REALLY_IWMMXT)
+    FAIL;
+  "
+)
+
+(define_insn "arm_ashrdi3_1bit"
+  [(set (match_operand:DI              0 "s_register_operand" "=r,&r")
+        (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
+                     (const_int 1)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx"
+  [(set_attr "conds" "clob")
+   (set_attr "insn" "mov")
+   (set_attr "length" "8")]
+)
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+    operands[2] = GEN_INT (31);
+  "
+)
+
+(define_insn "*thumb1_ashrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=l,l")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
+		     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1"
+  "asr\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI              0 "s_register_operand" "")
+        (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "")
+                     (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if ((HOST_WIDE_INT) INTVAL (operands[2]) == 1)
+        {
+          emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1]));
+          DONE;
+        }
+        /* Ideally we shouldn't fail here if we could know that operands[1] 
+           ends up already living in an iwmmxt register. Otherwise it's
+           cheaper to have the alternate code being generated than moving
+           values to iwmmxt regs and back.  */
+        FAIL;
+    }
+  else if (!TARGET_REALLY_IWMMXT)
+    FAIL;
+  "
+)
+
+(define_insn "arm_lshrdi3_1bit"
+  [(set (match_operand:DI              0 "s_register_operand" "=r,&r")
+        (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r")
+                     (const_int 1)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx"
+  [(set_attr "conds" "clob")
+   (set_attr "insn" "mov")
+   (set_attr "length" "8")]
+)
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+    {
+      emit_insn (gen_movsi (operands[0], const0_rtx));
+      DONE;
+    }
+  "
+)
+
+(define_insn "*thumb1_lshrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=l,l")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0")
+		     (match_operand:SI 2 "nonmemory_operand" "N,l")))]
+  "TARGET_THUMB1"
+  "lsr\\t%0, %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "conds" "set")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "reg_or_int_operand" "")))]
+  "TARGET_32BIT"
+  "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32);
+  else
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2]));
+      operands[2] = reg;
+    }
+  "
+)
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI              0 "s_register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+		     (match_operand:SI 2 "arm_rhs_operand" "")))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+          && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+        operands[2] = GEN_INT (INTVAL (operands[2]) % 32);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (GET_CODE (operands [2]) == CONST_INT)
+        operands [2] = force_reg (SImode, operands[2]);
+    }
+  "
+)
+
+(define_insn "*thumb1_rotrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=l")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "ror\\t%0, %0, %2"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "*arm_shiftsi3"
+  [(set (match_operand:SI   0 "s_register_operand" "=r")
+	(match_operator:SI  3 "shift_operator"
+	 [(match_operand:SI 1 "s_register_operand"  "r")
+	  (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
+  "TARGET_32BIT"
+  "* return arm_output_shift(operands, 0);"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*shiftsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_rhs_operand" "rM")])
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
+  "TARGET_32BIT"
+  "* return arm_output_shift(operands, 1);"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*shiftsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (match_operator:SI 3 "shift_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_rhs_operand" "rM")])
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_32BIT"
+  "* return arm_output_shift(operands, 1);"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")]
+)
+
+(define_insn "*not_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(not:SI (match_operator:SI 3 "shift_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r,r")
+		  (match_operand:SI 2 "shift_amount_operand" "M,rM")])))]
+  "TARGET_32BIT"
+  "mvn%?\\t%0, %1%S3"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "1")
+   (set_attr "insn" "mvn")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*not_shiftsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (not:SI (match_operator:SI 3 "shift_operator"
+		  [(match_operand:SI 1 "s_register_operand" "r,r")
+		   (match_operand:SI 2 "shift_amount_operand" "M,rM")]))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "insn" "mvn")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*not_shiftsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (not:SI (match_operator:SI 3 "shift_operator"
+		  [(match_operand:SI 1 "s_register_operand" "r,r")
+		   (match_operand:SI 2 "shift_amount_operand" "M,rM")]))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "insn" "mvn")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+;; We don't really have extzv, but defining this using shifts helps
+;; to reduce register pressure later on.
+
+(define_expand "extzv"
+  [(set (match_dup 4)
+	(ashift:SI (match_operand:SI   1 "register_operand" "")
+		   (match_operand:SI   2 "const_int_operand" "")))
+   (set (match_operand:SI              0 "register_operand" "")
+	(lshiftrt:SI (match_dup 4)
+		     (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_THUMB1 || arm_arch_thumb2"
+  "
+  {
+    HOST_WIDE_INT lshift = 32 - INTVAL (operands[2]) - INTVAL (operands[3]);
+    HOST_WIDE_INT rshift = 32 - INTVAL (operands[2]);
+    
+    if (arm_arch_thumb2)
+      {
+	emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+				 operands[3]));
+	DONE;
+      }
+
+    operands[3] = GEN_INT (rshift);
+    
+    if (lshift == 0)
+      {
+        emit_insn (gen_lshrsi3 (operands[0], operands[1], operands[3]));
+        DONE;
+      }
+      
+    operands[2] = GEN_INT (lshift);
+    operands[4] = gen_reg_rtx (SImode);
+  }"
+)
+
+(define_insn "extv"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                         (match_operand:SI 2 "const_int_operand" "M")
+                         (match_operand:SI 3 "const_int_operand" "M")))]
+  "arm_arch_thumb2"
+  "sbfx%?\t%0, %1, %3, %2"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "extzv_t2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+                         (match_operand:SI 2 "const_int_operand" "M")
+                         (match_operand:SI 3 "const_int_operand" "M")))]
+  "arm_arch_thumb2"
+  "ubfx%?\t%0, %1, %3, %2"
+  [(set_attr "length" "4")
+   (set_attr "predicable" "yes")]
+)
+
+
+;; Unary arithmetic insns
+
+(define_expand "negdi2"
+ [(parallel
+   [(set (match_operand:DI 0 "s_register_operand" "")
+	 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_EITHER"
+  ""
+)
+
+;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1).
+;; The first alternative allows the common case of a *full* overlap.
+(define_insn "*arm_negdi2"
+  [(set (match_operand:DI         0 "s_register_operand" "=r,&r")
+	(neg:DI (match_operand:DI 1 "s_register_operand"  "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*thumb1_negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=&l")
+	(neg:DI (match_operand:DI 1 "register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB1"
+  "mov\\t%R0, #0\;neg\\t%Q0, %Q1\;sbc\\t%R0, %R1"
+  [(set_attr "length" "6")]
+)
+
+(define_expand "negsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(neg:SI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+(define_insn "*arm_negsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "rsb%?\\t%0, %1, #0"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb1_negsi2"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(neg:SI (match_operand:SI 1 "register_operand" "l")))]
+  "TARGET_THUMB1"
+  "neg\\t%0, %1"
+  [(set_attr "length" "2")]
+)
+
+(define_expand "negsf2"
+  [(set (match_operand:SF         0 "s_register_operand" "")
+	(neg:SF (match_operand:SF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  ""
+)
+
+(define_expand "negdf2"
+  [(set (match_operand:DF         0 "s_register_operand" "")
+	(neg:DF (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
+  "")
+
+;; abssi2 doesn't really clobber the condition codes if a different register
+;; is being set.  To keep things simple, assume during rtl manipulations that
+;; it does, but tell the final scan operator the truth.  Similarly for
+;; (neg (abs...))
+
+(define_expand "abssi2"
+  [(parallel
+    [(set (match_operand:SI         0 "s_register_operand" "")
+	  (abs:SI (match_operand:SI 1 "s_register_operand" "")))
+     (clobber (match_dup 2))])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_THUMB1)
+    operands[2] = gen_rtx_SCRATCH (SImode);
+  else
+    operands[2] = gen_rtx_REG (CCmode, CC_REGNUM);
+")
+
+(define_insn "*arm_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "@
+   cmp\\t%0, #0\;rsblt\\t%0, %0, #0
+   eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
+  [(set_attr "conds" "clob,*")
+   (set_attr "shift" "1")
+   ;; predicable can't be set based on the variant, so left as no
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*thumb1_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "l")))
+   (clobber (match_scratch:SI 2 "=&l"))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1 && reload_completed"
+  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))]
+  ""
+  [(set_attr "length" "6")]
+)
+
+(define_insn "*arm_neg_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "@
+   cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
+   eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
+  [(set_attr "conds" "clob,*")
+   (set_attr "shift" "1")
+   ;; predicable can't be set based on the variant, so left as no
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*thumb1_neg_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "l"))))
+   (clobber (match_scratch:SI 2 "=&l"))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1 && reload_completed"
+  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
+   (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))]
+  ""
+  [(set_attr "length" "6")]
+)
+
+(define_expand "abssf2"
+  [(set (match_operand:SF         0 "s_register_operand" "")
+	(abs:SF (match_operand:SF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "")
+
+(define_expand "absdf2"
+  [(set (match_operand:DF         0 "s_register_operand" "")
+	(abs:DF (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "")
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP)"
+  "")
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(sqrt:DF (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
+  "")
+
+(define_insn_and_split "one_cmpldi2"
+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
+	(not:DI (match_operand:DI 1 "s_register_operand" "0,r")))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (not:SI (match_dup 1)))
+   (set (match_dup 2) (not:SI (match_dup 3)))]
+  "
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }"
+  [(set_attr "length" "8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(not:SI (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+(define_insn "*arm_one_cmplsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(not:SI (match_operand:SI 1 "s_register_operand"  "r")))]
+  "TARGET_32BIT"
+  "mvn%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "mvn")]
+)
+
+(define_insn "*thumb1_one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=l")
+	(not:SI (match_operand:SI 1 "register_operand"  "l")))]
+  "TARGET_THUMB1"
+  "mvn\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "insn" "mvn")]
+)
+
+(define_insn "*notsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI (match_dup 1)))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mvn")]
+)
+
+(define_insn "*notsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+			 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "TARGET_32BIT"
+  "mvn%.\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "insn" "mvn")]
+)
+
+;; Fixed <--> Floating conversion insns
+
+(define_expand "floatsihf2"
+  [(set (match_operand:HF           0 "general_operand" "")
+	(float:HF (match_operand:SI 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+(define_expand "floatdihf2"
+  [(set (match_operand:HF           0 "general_operand" "")
+	(float:HF (match_operand:DI 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+(define_expand "floatsisf2"
+  [(set (match_operand:SF           0 "s_register_operand" "")
+	(float:SF (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  if (TARGET_MAVERICK)
+    {
+      emit_insn (gen_cirrus_floatsisf2 (operands[0], operands[1]));
+      DONE;
+    }
+")
+
+(define_expand "floatsidf2"
+  [(set (match_operand:DF           0 "s_register_operand" "")
+	(float:DF (match_operand:SI 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+  if (TARGET_MAVERICK)
+    {
+      emit_insn (gen_cirrus_floatsidf2 (operands[0], operands[1]));
+      DONE;
+    }
+")
+
+(define_expand "fix_trunchfsi2"
+  [(set (match_operand:SI         0 "general_operand" "")
+	(fix:SI (fix:HF (match_operand:HF 1 "general_operand"  ""))))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = convert_to_mode (SFmode, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  }"
+)
+
+(define_expand "fix_trunchfdi2"
+  [(set (match_operand:DI         0 "general_operand" "")
+	(fix:DI (fix:HF (match_operand:HF 1 "general_operand"  ""))))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1 = convert_to_mode (SFmode, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  }"
+)
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand"  ""))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  if (TARGET_MAVERICK)
+    {
+      if (!cirrus_fp_register (operands[0], SImode))
+        operands[0] = force_reg (SImode, operands[0]);
+      if (!cirrus_fp_register (operands[1], SFmode))
+        operands[1] = force_reg (SFmode, operands[0]);
+      emit_insn (gen_cirrus_truncsfsi2 (operands[0], operands[1]));
+      DONE;
+    }
+")
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "")
+	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand"  ""))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "
+  if (TARGET_MAVERICK)
+    {
+      if (!cirrus_fp_register (operands[1], DFmode))
+        operands[1] = force_reg (DFmode, operands[0]);
+      emit_insn (gen_cirrus_truncdfsi2 (operands[0], operands[1]));
+      DONE;
+    }
+")
+
+;; Truncation insns
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF  0 "s_register_operand" "")
+	(float_truncate:SF
+ 	 (match_operand:DF 1 "s_register_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  ""
+)
+
+/* DFmode -> HFmode conversions have to go through SFmode.  */
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF  0 "general_operand" "")
+	(float_truncate:HF
+ 	 (match_operand:DF 1 "general_operand" "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1;
+    op1 = convert_to_mode (SFmode, operands[1], 0);
+    op1 = convert_to_mode (HFmode, op1, 0);
+    emit_move_insn (operands[0], op1);
+    DONE;
+  }"
+)
+
+;; Zero and sign extension instructions.
+
+(define_insn "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
+					    "<qhs_extenddi_cstr>")))]
+  "TARGET_32BIT <qhs_zextenddi_cond>"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "ce_count" "2")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+        (sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
+					    "<qhs_extenddi_cstr>")))]
+  "TARGET_32BIT <qhs_sextenddi_cond>"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "ce_count" "2")
+   (set_attr "shift" "1")
+   (set_attr "predicable" "yes")]
+)
+
+;; Splits for all extensions to DImode
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+        (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
+  "TARGET_32BIT"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx lo_part = gen_lowpart (SImode, operands[0]);
+  enum machine_mode src_mode = GET_MODE (operands[1]);
+
+  if (REG_P (operands[0])
+      && !reg_overlap_mentioned_p (operands[0], operands[1]))
+    emit_clobber (operands[0]);
+  if (!REG_P (lo_part) || src_mode != SImode
+      || !rtx_equal_p (lo_part, operands[1]))
+    {
+      if (src_mode == SImode)
+        emit_move_insn (lo_part, operands[1]);
+      else
+        emit_insn (gen_rtx_SET (VOIDmode, lo_part,
+				gen_rtx_ZERO_EXTEND (SImode, operands[1])));
+      operands[1] = lo_part;
+    }
+  operands[0] = gen_highpart (SImode, operands[0]);
+  operands[1] = const0_rtx;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+        (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
+  "TARGET_32BIT"
+  [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
+{
+  rtx lo_part = gen_lowpart (SImode, operands[0]);
+  enum machine_mode src_mode = GET_MODE (operands[1]);
+
+  if (REG_P (operands[0])
+      && !reg_overlap_mentioned_p (operands[0], operands[1]))
+    emit_clobber (operands[0]);
+
+  if (!REG_P (lo_part) || src_mode != SImode
+      || !rtx_equal_p (lo_part, operands[1]))
+    {
+      if (src_mode == SImode)
+        emit_move_insn (lo_part, operands[1]);
+      else
+        emit_insn (gen_rtx_SET (VOIDmode, lo_part,
+				gen_rtx_SIGN_EXTEND (SImode, operands[1])));
+      operands[1] = lo_part;
+    }
+  operands[0] = gen_highpart (SImode, operands[0]);
+})
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_EITHER"
+{
+  if (TARGET_ARM && !arm_arch4 && MEM_P (operands[1]))
+    {
+      emit_insn (gen_movhi_bytes (operands[0], operands[1]));
+      DONE;
+    }
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16)));
+      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (16)));
+      DONE;
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "s_register_operand" "")))]
+  "!TARGET_THUMB2 && !arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = gen_lowpart (SImode, operands[1]);
+})
+
+(define_insn "*thumb1_zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))]
+  "TARGET_THUMB1"
+{
+  rtx mem;
+
+  if (which_alternative == 0 && arm_arch6)
+    return "uxth\t%0, %1";
+  if (which_alternative == 0)
+    return "#";
+
+  mem = XEXP (operands[1], 0);
+
+  if (GET_CODE (mem) == CONST)
+    mem = XEXP (mem, 0);
+    
+  if (GET_CODE (mem) == PLUS)
+    {
+      rtx a = XEXP (mem, 0);
+
+      /* This can happen due to bugs in reload.  */
+      if (GET_CODE (a) == REG && REGNO (a) == SP_REGNUM)
+        {
+          rtx ops[2];
+          ops[0] = operands[0];
+          ops[1] = a;
+      
+          output_asm_insn ("mov\t%0, %1", ops);
+
+          XEXP (mem, 0) = operands[0];
+       }
+    }
+    
+  return "ldrh\t%0, %1";
+}
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "is_arch6" "yes")
+				       (const_int 2) (const_int 4))
+			 (const_int 4)])
+   (set_attr "type" "alu_shift,load_byte")]
+)
+
+(define_insn "*arm_zero_extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch4 && !arm_arch6"
+  "@
+   #
+   ldr%(h%)\\t%0, %1"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendhisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch6"
+  "@
+   uxth%?\\t%0, %1
+   ldr%(h%)\\t%0, %1"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendhisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (zero_extend:SI (match_operand:HI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "uxtah%?\\t%0, %2, %1"
+  [(set_attr "type" "alu_shift")
+   (set_attr "predicable" "yes")]
+)
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "TARGET_EITHER"
+{
+  if (TARGET_ARM && !arm_arch6 && GET_CODE (operands[1]) != MEM)
+    {
+      emit_insn (gen_andsi3 (operands[0],
+			     gen_lowpart (SImode, operands[1]),
+					  GEN_INT (255)));
+      DONE;
+    }
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24)));
+      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (24)));
+      DONE;
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "s_register_operand" "")))]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+  if (TARGET_ARM)
+    {
+      emit_insn (gen_andsi3 (operands[0], operands[2], GEN_INT (255)));
+      DONE;
+    }
+})
+
+(define_insn "*thumb1_zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))]
+  "TARGET_THUMB1 && !arm_arch6"
+  "@
+   #
+   ldrb\\t%0, %1"
+  [(set_attr "length" "4,2")
+   (set_attr "type" "alu_shift,load_byte")
+   (set_attr "pool_range" "*,32")]
+)
+
+(define_insn "*thumb1_zero_extendqisi2_v6"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))]
+  "TARGET_THUMB1 && arm_arch6"
+  "@
+   uxtb\\t%0, %1
+   ldrb\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "alu_shift,load_byte")]
+)
+
+(define_insn "*arm_zero_extendqisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && !arm_arch6"
+  "@
+   #
+   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendqisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch6"
+  "@
+   uxtb%(%)\\t%0, %1
+   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_zero_extendqisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (zero_extend:SI (match_operand:QI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "uxtab%?\\t%0, %2, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "xtab")
+   (set_attr "type" "alu_shift")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 0)))
+   (clobber (match_operand:SI 2 "s_register_operand" ""))]
+  "TARGET_32BIT && (GET_CODE (operands[1]) != MEM) && ! BYTES_BIG_ENDIAN"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 3)))
+   (clobber (match_operand:SI 2 "s_register_operand" ""))]
+  "TARGET_32BIT && (GET_CODE (operands[1]) != MEM) && BYTES_BIG_ENDIAN"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))]
+  ""
+)
+
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ior_xor:SI (and:SI (ashift:SI
+			     (match_operand:SI 1 "s_register_operand" "")
+			     (match_operand:SI 2 "const_int_operand" ""))
+			    (match_operand:SI 3 "const_int_operand" ""))
+		    (zero_extend:SI
+		     (match_operator 5 "subreg_lowpart_operator"
+		      [(match_operand:SI 4 "s_register_operand" "")]))))]
+  "TARGET_32BIT
+   && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
+       == (GET_MODE_MASK (GET_MODE (operands[5]))
+           & (GET_MODE_MASK (GET_MODE (operands[5]))
+	      << (INTVAL (operands[2])))))"
+  [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2))
+				  (match_dup 4)))
+   (set (match_dup 0) (zero_extend:SI (match_dup 5)))]
+  "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);"
+)
+
+(define_insn "*compareqi_eq0"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (match_operand:QI 0 "s_register_operand" "r")
+			 (const_int 0)))]
+  "TARGET_32BIT"
+  "tst\\t%0, #255"
+  [(set_attr "conds" "set")]
+)
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_EITHER"
+{
+  if (TARGET_THUMB1)
+    {
+      emit_insn (gen_thumb1_extendhisi2 (operands[0], operands[1]));
+      DONE;
+    }
+  if (MEM_P (operands[1]) && TARGET_ARM && !arm_arch4)
+    {
+      emit_insn (gen_extendhisi2_mem (operands[0], operands[1]));
+      DONE;
+    }
+
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16)));
+      emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (16)));
+      DONE;
+    }
+})
+
+(define_split
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" "")))
+     (clobber (match_scratch:SI 2 ""))])]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+})
+
+;; We used to have an early-clobber on the scratch register here.
+;; However, there's a bug somewhere in reload which means that this
+;; can be partially ignored during spill allocation if the memory
+;; address also needs reloading; this causes us to die later on when
+;; we try to verify the operands.  Fortunately, we don't really need
+;; the early-clobber: we can always use operand 0 if operand 2
+;; overlaps the address.
+(define_insn "thumb1_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))
+   (clobber (match_scratch:SI 2 "=X,l"))]
+  "TARGET_THUMB1"
+  "*
+  {
+    rtx ops[4];
+    rtx mem;
+
+    if (which_alternative == 0 && !arm_arch6)
+      return \"#\";
+    if (which_alternative == 0)
+      return \"sxth\\t%0, %1\";
+
+    mem = XEXP (operands[1], 0);
+
+    /* This code used to try to use 'V', and fix the address only if it was
+       offsettable, but this fails for e.g. REG+48 because 48 is outside the
+       range of QImode offsets, and offsettable_address_p does a QImode
+       address check.  */
+       
+    if (GET_CODE (mem) == CONST)
+      mem = XEXP (mem, 0);
+    
+    if (GET_CODE (mem) == LABEL_REF)
+      return \"ldr\\t%0, %1\";
+    
+    if (GET_CODE (mem) == PLUS)
+      {
+        rtx a = XEXP (mem, 0);
+        rtx b = XEXP (mem, 1);
+
+        if (GET_CODE (a) == LABEL_REF
+	    && GET_CODE (b) == CONST_INT)
+          return \"ldr\\t%0, %1\";
+
+        if (GET_CODE (b) == REG)
+          return \"ldrsh\\t%0, %1\";
+	  
+        ops[1] = a;
+        ops[2] = b;
+      }
+    else
+      {
+        ops[1] = mem;
+        ops[2] = const0_rtx;
+      }
+      
+    gcc_assert (GET_CODE (ops[1]) == REG);
+
+    ops[0] = operands[0];
+    if (reg_mentioned_p (operands[2], ops[1]))
+      ops[3] = ops[0];
+    else
+      ops[3] = operands[2];
+    output_asm_insn (\"mov\\t%3, %2\;ldrsh\\t%0, [%1, %3]\", ops);
+    return \"\";
+  }"
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "is_arch6" "yes")
+					(const_int 2) (const_int 4))
+			  (const_int 4)])
+   (set_attr "type" "alu_shift,load_byte")
+   (set_attr "pool_range" "*,1020")]
+)
+
+;; This pattern will only be used when ldsh is not available
+(define_expand "extendhisi2_mem"
+  [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" "")))
+   (set (match_dup 3)
+	(zero_extend:SI (match_dup 7)))
+   (set (match_dup 6) (ashift:SI (match_dup 4) (const_int 24)))
+   (set (match_operand:SI 0 "" "")
+	(ior:SI (ashiftrt:SI (match_dup 6) (const_int 16)) (match_dup 5)))]
+  "TARGET_ARM"
+  "
+  {
+    rtx mem1, mem2;
+    rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
+
+    mem1 = change_address (operands[1], QImode, addr);
+    mem2 = change_address (operands[1], QImode, plus_constant (addr, 1));
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = mem1;
+    operands[2] = gen_reg_rtx (SImode);
+    operands[3] = gen_reg_rtx (SImode);
+    operands[6] = gen_reg_rtx (SImode);
+    operands[7] = mem2;
+
+    if (BYTES_BIG_ENDIAN)
+      {
+	operands[4] = operands[2];
+	operands[5] = operands[3];
+      }
+    else
+      {
+	operands[4] = operands[3];
+	operands[5] = operands[2];
+      }
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+})
+
+(define_insn "*arm_extendhisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_ARM && arm_arch4 && !arm_arch6"
+  "@
+   #
+   ldr%(sh%)\\t%0, %1"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+;; ??? Check Thumb-2 pool range
+(define_insn "*arm_extendhisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_32BIT && arm_arch6"
+  "@
+   sxth%?\\t%0, %1
+   ldr%(sh%)\\t%0, %1"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+(define_insn "*arm_extendhisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (sign_extend:SI (match_operand:HI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "sxtah%?\\t%0, %2, %1"
+)
+
+(define_expand "extendqihi2"
+  [(set (match_dup 2)
+	(ashift:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "")
+		   (const_int 24)))
+   (set (match_operand:HI 0 "s_register_operand" "")
+	(ashiftrt:SI (match_dup 2)
+		     (const_int 24)))]
+  "TARGET_ARM"
+  "
+  {
+    if (arm_arch4 && GET_CODE (operands[1]) == MEM)
+      {
+	emit_insn (gen_rtx_SET (VOIDmode,
+				operands[0],
+				gen_rtx_SIGN_EXTEND (HImode, operands[1])));
+	DONE;
+      }
+    if (!s_register_operand (operands[1], QImode))
+      operands[1] = copy_to_mode_reg (QImode, operands[1]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_reg_rtx (SImode);
+  }"
+)
+
+(define_insn "*arm_extendqihi_insn"
+  [(set (match_operand:HI 0 "s_register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "arm_extendqisi_mem_op" "Uq")))]
+  "TARGET_ARM && arm_arch4"
+  "ldr%(sb%)\\t%0, %1"
+  [(set_attr "type" "load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "256")
+   (set_attr "neg_pool_range" "244")]
+)
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "")))]
+  "TARGET_EITHER"
+{
+  if (!arm_arch4 && MEM_P (operands[1]))
+    operands[1] = copy_to_mode_reg (QImode, operands[1]);
+
+  if (!arm_arch6 && !MEM_P (operands[1]))
+    {
+      rtx t = gen_lowpart (SImode, operands[1]);
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24)));
+      emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (24)));
+      DONE;
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  "!arm_arch6"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+})
+
+(define_insn "*arm_extendqisi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))]
+  "TARGET_ARM && arm_arch4 && !arm_arch6"
+  "@
+   #
+   ldr%(sb%)\\t%0, %1"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+(define_insn "*arm_extendqisi_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI
+	 (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))]
+  "TARGET_ARM && arm_arch6"
+  "@
+   sxtb%?\\t%0, %1
+   ldr%(sb%)\\t%0, %1"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,256")
+   (set_attr "neg_pool_range" "*,244")]
+)
+
+(define_insn "*arm_extendqisi2addsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (sign_extend:SI (match_operand:QI 1 "s_register_operand" "r"))
+		 (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_INT_SIMD"
+  "sxtab%?\\t%0, %2, %1"
+  [(set_attr "type" "alu_shift")
+   (set_attr "insn" "xtab")
+   (set_attr "predicable" "yes")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "")))]
+  "TARGET_THUMB1 && reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (sign_extend:SI (match_dup 3)))]
+{
+  rtx addr = XEXP (operands[1], 0);
+
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1)))
+    /* No split necessary.  */
+    FAIL;
+
+  if (GET_CODE (addr) == PLUS
+      && !REG_P (XEXP (addr, 0)) && !REG_P (XEXP (addr, 1)))
+    FAIL;
+
+  if (reg_overlap_mentioned_p (operands[0], addr))
+    {
+      rtx t = gen_lowpart (QImode, operands[0]);
+      emit_move_insn (t, operands[1]);
+      emit_insn (gen_thumb1_extendqisi2 (operands[0], t));
+      DONE;
+    }
+
+  if (REG_P (addr))
+    {
+      addr = gen_rtx_PLUS (Pmode, addr, operands[0]);
+      operands[2] = const0_rtx;
+    }
+  else if (GET_CODE (addr) != PLUS)
+    FAIL;
+  else if (REG_P (XEXP (addr, 0)))
+    {
+      operands[2] = XEXP (addr, 1);
+      addr = gen_rtx_PLUS (Pmode, XEXP (addr, 0), operands[0]);
+    }
+  else
+    {
+      operands[2] = XEXP (addr, 0);
+      addr = gen_rtx_PLUS (Pmode, XEXP (addr, 1), operands[0]);
+    }
+
+  operands[3] = change_address (operands[1], QImode, addr);
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0) (match_operand 1 "const_int_operand")))
+   (set (match_operand:SI 2 "register_operand" "") (const_int 0))
+   (set (match_operand:SI 3 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 4 "memory_operand" "")))]
+  "TARGET_THUMB1
+   && GET_CODE (XEXP (operands[4], 0)) == PLUS
+   && rtx_equal_p (operands[0], XEXP (XEXP (operands[4], 0), 0))
+   && rtx_equal_p (operands[2], XEXP (XEXP (operands[4], 0), 1))
+   && (peep2_reg_dead_p (3, operands[0])
+       || rtx_equal_p (operands[0], operands[3]))
+   && (peep2_reg_dead_p (3, operands[2])
+       || rtx_equal_p (operands[2], operands[3]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (sign_extend:SI (match_dup 4)))]
+{
+  rtx addr = gen_rtx_PLUS (Pmode, operands[0], operands[2]);
+  operands[4] = change_address (operands[4], QImode, addr);
+})
+
+(define_insn "thumb1_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,l")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,V,m")))]
+  "TARGET_THUMB1"
+{
+  rtx addr;
+
+  if (which_alternative == 0 && arm_arch6)
+    return "sxtb\\t%0, %1";
+  if (which_alternative == 0)
+    return "#";
+
+  addr = XEXP (operands[1], 0);
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1)))
+    return "ldrsb\\t%0, %1";
+      
+  return "#";
+}
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "is_arch6" "yes")
+					(const_int 2) (const_int 4))
+			  (const_int 2)
+			  (if_then_else (eq_attr "is_arch6" "yes")
+					(const_int 4) (const_int 6))])
+   (set_attr "type" "alu_shift,load_byte,load_byte")]
+)
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF                  0 "s_register_operand" "")
+	(float_extend:DF (match_operand:SF 1 "s_register_operand"  "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  ""
+)
+
+/* HFmode -> DFmode conversions have to go through SFmode.  */
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF                  0 "general_operand" "")
+	(float_extend:DF (match_operand:HF 1 "general_operand"  "")))]
+  "TARGET_EITHER"
+  "
+  {
+    rtx op1;
+    op1 = convert_to_mode (SFmode, operands[1], 0);
+    op1 = convert_to_mode (DFmode, op1, 0);
+    emit_insn (gen_movdf (operands[0], op1));
+    DONE;
+  }"
+)
+
+;; Move insns (including loads and stores)
+
+;; XXX Just some ideas about movti.
+;; I don't think these are a good idea on the arm, there just aren't enough
+;; registers
+;;(define_expand "loadti"
+;;  [(set (match_operand:TI 0 "s_register_operand" "")
+;;	(mem:TI (match_operand:SI 1 "address_operand" "")))]
+;;  "" "")
+
+;;(define_expand "storeti"
+;;  [(set (mem:TI (match_operand:TI 0 "address_operand" ""))
+;;	(match_operand:TI 1 "s_register_operand" ""))]
+;;  "" "")
+
+;;(define_expand "movti"
+;;  [(set (match_operand:TI 0 "general_operand" "")
+;;	(match_operand:TI 1 "general_operand" ""))]
+;;  ""
+;;  "
+;;{
+;;  rtx insn;
+;;
+;;  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+;;    operands[1] = copy_to_reg (operands[1]);
+;;  if (GET_CODE (operands[0]) == MEM)
+;;    insn = gen_storeti (XEXP (operands[0], 0), operands[1]);
+;;  else if (GET_CODE (operands[1]) == MEM)
+;;    insn = gen_loadti (operands[0], XEXP (operands[1], 0));
+;;  else
+;;    FAIL;
+;;
+;;  emit_insn (insn);
+;;  DONE;
+;;}")
+
+;; Recognize garbage generated above.
+
+;;(define_insn ""
+;;  [(set (match_operand:TI 0 "general_operand" "=r,r,r,<,>,m")
+;;	(match_operand:TI 1 "general_operand" "<,>,m,r,r,r"))]
+;;  ""
+;;  "*
+;;  {
+;;    register mem = (which_alternative < 3);
+;;    register const char *template;
+;;
+;;    operands[mem] = XEXP (operands[mem], 0);
+;;    switch (which_alternative)
+;;      {
+;;      case 0: template = \"ldmdb\\t%1!, %M0\"; break;
+;;      case 1: template = \"ldmia\\t%1!, %M0\"; break;
+;;      case 2: template = \"ldmia\\t%1, %M0\"; break;
+;;      case 3: template = \"stmdb\\t%0!, %M1\"; break;
+;;      case 4: template = \"stmia\\t%0!, %M1\"; break;
+;;      case 5: template = \"stmia\\t%0, %M1\"; break;
+;;      }
+;;    output_asm_insn (template, operands);
+;;    return \"\";
+;;  }")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (DImode, operands[1]);
+    }
+  "
+)
+
+(define_insn "*arm_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
+	(match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r"))]
+  "TARGET_32BIT
+   && !(TARGET_HARD_FLOAT && (TARGET_MAVERICK || TARGET_VFP))
+   && !TARGET_IWMMXT
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    default:
+      return output_move_double (operands);
+    }
+  "
+  [(set_attr "length" "8,12,16,8,8")
+   (set_attr "type" "*,*,*,load2,store2")
+   (set_attr "arm_pool_range" "*,*,*,1020,*")
+   (set_attr "arm_neg_pool_range" "*,*,*,1008,*")
+   (set_attr "thumb2_pool_range" "*,*,*,4096,*")
+   (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")]
+)
+
+(define_split
+  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
+	(match_operand:ANY64 1 "const_double_operand" ""))]
+  "TARGET_32BIT
+   && reload_completed
+   && (arm_const_double_inline_cost (operands[1])
+       <= ((optimize_size || arm_ld_sched) ? 3 : 4))"
+  [(const_int 0)]
+  "
+  arm_split_constant (SET, SImode, curr_insn,
+		      INTVAL (gen_lowpart (SImode, operands[1])),
+		      gen_lowpart (SImode, operands[0]), NULL_RTX, 0);
+  arm_split_constant (SET, SImode, curr_insn,
+		      INTVAL (gen_highpart_mode (SImode,
+						 GET_MODE (operands[0]),
+						 operands[1])),
+		      gen_highpart (SImode, operands[0]), NULL_RTX, 0);
+  DONE;
+  "
+)
+
+; If optimizing for size, or if we have load delay slots, then 
+; we want to split the constant into two separate operations. 
+; In both cases this may split a trivial part into a single data op
+; leaving a single complex constant to load.  We can also get longer
+; offsets in a LDR which means we get better chances of sharing the pool
+; entries.  Finally, we can normally do a better job of scheduling
+; LDR instructions than we can with LDM.
+; This pattern will only match if the one above did not.
+(define_split
+  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
+	(match_operand:ANY64 1 "const_double_operand" ""))]
+  "TARGET_ARM && reload_completed
+   && arm_const_double_by_parts (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_highpart_mode (SImode, GET_MODE (operands[0]),
+				   operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  "
+)
+
+(define_split
+  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
+	(match_operand:ANY64 1 "arm_general_register_operand" ""))]
+  "TARGET_EITHER && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[1]);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+
+  /* Handle a partial overlap.  */
+  if (rtx_equal_p (operands[0], operands[3]))
+    {
+      rtx tmp0 = operands[0];
+      rtx tmp1 = operands[1];
+
+      operands[0] = operands[2];
+      operands[1] = operands[3];
+      operands[2] = tmp0;
+      operands[3] = tmp1;
+    }
+  "
+)
+
+;; We can't actually do base+index doubleword loads if the index and
+;; destination overlap.  Split here so that we at least have chance to
+;; schedule.
+(define_split
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(mem:DI (plus:SI (match_operand:SI 1 "s_register_operand" "")
+			 (match_operand:SI 2 "s_register_operand" ""))))]
+  "TARGET_LDRD
+  && reg_overlap_mentioned_p (operands[0], operands[1])
+  && reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 4)
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+	(mem:DI (match_dup 4)))]
+  "
+  operands[4] = gen_rtx_REG (SImode, REGNO(operands[0]));
+  "
+)
+
+;;; ??? This should have alternatives for constants.
+;;; ??? This was originally identical to the movdf_insn pattern.
+;;; ??? The 'i' constraint looks funny, but it should always be replaced by
+;;; thumb_reorg with a memory reference.
+(define_insn "*thumb1_movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=l,l,l,l,>,l, m,*r")
+	(match_operand:DI 1 "general_operand"      "l, I,J,>,l,mi,l,*r"))]
+  "TARGET_THUMB1
+   && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+  {
+  switch (which_alternative)
+    {
+    default:
+    case 0:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"add\\t%0,  %1,  #0\;add\\t%H0, %H1, #0\";
+      return   \"add\\t%H0, %H1, #0\;add\\t%0,  %1,  #0\";
+    case 1:
+      return \"mov\\t%Q0, %1\;mov\\t%R0, #0\";
+    case 2:
+      operands[1] = GEN_INT (- INTVAL (operands[1]));
+      return \"mov\\t%Q0, %1\;neg\\t%Q0, %Q0\;asr\\t%R0, %Q0, #31\";
+    case 3:
+      return \"ldmia\\t%1, {%0, %H0}\";
+    case 4:
+      return \"stmia\\t%0, {%1, %H1}\";
+    case 5:
+      return thumb_load_double_from_address (operands);
+    case 6:
+      operands[2] = gen_rtx_MEM (SImode,
+			     plus_constant (XEXP (operands[0], 0), 4));
+      output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
+      return \"\";
+    case 7:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"mov\\t%0, %1\;mov\\t%H0, %H1\";
+      return \"mov\\t%H0, %H1\;mov\\t%0, %1\";
+    }
+  }"
+  [(set_attr "length" "4,4,6,2,2,6,4,4")
+   (set_attr "type" "*,*,*,load2,store2,load2,store2,*")
+   (set_attr "insn" "*,mov,*,*,*,*,*,mov")
+   (set_attr "pool_range" "*,*,*,*,*,1020,*,*")]
+)
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  {
+  rtx base, offset, tmp;
+
+  if (TARGET_32BIT)
+    {
+      /* Everything except mem = const or mem = mem can be done easily.  */
+      if (GET_CODE (operands[0]) == MEM)
+        operands[1] = force_reg (SImode, operands[1]);
+      if (arm_general_register_operand (operands[0], SImode)
+	  && GET_CODE (operands[1]) == CONST_INT
+          && !(const_ok_for_arm (INTVAL (operands[1]))
+               || const_ok_for_arm (~INTVAL (operands[1]))))
+        {
+           arm_split_constant (SET, SImode, NULL_RTX,
+	                       INTVAL (operands[1]), operands[0], NULL_RTX,
+			       optimize && can_create_pseudo_p ());
+          DONE;
+        }
+
+      if (TARGET_USE_MOVT && !target_word_relocations
+	  && GET_CODE (operands[1]) == SYMBOL_REF
+	  && !flag_pic && !arm_tls_referenced_p (operands[1]))
+	{
+	  arm_emit_movpair (operands[0], operands[1]);
+	  DONE;
+	}
+    }
+  else /* TARGET_THUMB1...  */
+    {
+      if (can_create_pseudo_p ())
+        {
+          if (GET_CODE (operands[0]) != REG)
+	    operands[1] = force_reg (SImode, operands[1]);
+        }
+    }
+
+  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (operands[1], &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	{
+	  tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+	  emit_move_insn (tmp, base);
+	  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+	  DONE;
+	}
+    }
+
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (arm_tls_referenced_p (operands[1]))
+    {
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+        {
+          addend = XEXP (XEXP (tmp, 0), 1);
+          tmp = XEXP (XEXP (tmp, 0), 0);
+        }
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0);
+
+      tmp = legitimize_tls_address (tmp,
+				    !can_create_pseudo_p () ? operands[0] : 0);
+      if (addend)
+        {
+          tmp = gen_rtx_PLUS (SImode, tmp, addend);
+          tmp = force_operand (tmp, operands[0]);
+        }
+      operands[1] = tmp;
+    }
+  else if (flag_pic
+	   && (CONSTANT_P (operands[1])
+	       || symbol_mentioned_p (operands[1])
+	       || label_mentioned_p (operands[1])))
+      operands[1] = legitimize_pic_address (operands[1], SImode,
+					    (!can_create_pseudo_p ()
+					     ? operands[0]
+					     : 0));
+  }
+  "
+)
+
+;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and
+;; LO_SUM adds in the high bits.  Fortunately these are opaque operations
+;; so this does not matter.
+(define_insn "*arm_movt"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		   (match_operand:SI 2 "general_operand"      "i")))]
+  "arm_arch_thumb2"
+  "movt%?\t%0, #:upper16:%c2"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4")]
+)
+
+(define_insn "*arm_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
+	(match_operand:SI 1 "general_operand"      "rk, I,K,j,mi,rk"))]
+  "TARGET_ARM && ! TARGET_IWMMXT
+   && !(TARGET_HARD_FLOAT && TARGET_VFP)
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   movw%?\\t%0, %1
+   ldr%?\\t%0, %1
+   str%?\\t%1, %0"
+  [(set_attr "type" "*,*,*,*,load1,store1")
+   (set_attr "insn" "mov,mov,mvn,mov,*,*")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,*,4096,*")
+   (set_attr "neg_pool_range" "*,*,*,*,4084,*")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_32BIT
+  && (!(const_ok_for_arm (INTVAL (operands[1]))
+        || const_ok_for_arm (~INTVAL (operands[1]))))"
+  [(clobber (const_int 0))]
+  "
+  arm_split_constant (SET, SImode, NULL_RTX, 
+                      INTVAL (operands[1]), operands[0], NULL_RTX, 0);
+  DONE;
+  "
+)
+
+(define_insn "*thumb1_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k")
+	(match_operand:SI 1 "general_operand"      "l, I,J,K,>,l,mi,l,*l*h*k"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], SImode) 
+       || register_operand (operands[1], SImode))"
+  "@
+   mov	%0, %1
+   mov	%0, %1
+   #
+   #
+   ldmia\\t%1, {%0}
+   stmia\\t%0, {%1}
+   ldr\\t%0, %1
+   str\\t%1, %0
+   mov\\t%0, %1"
+  [(set_attr "length" "2,2,4,4,2,2,2,2,2")
+   (set_attr "type" "*,*,*,*,load1,store1,load1,store1,*")
+   (set_attr "pool_range" "*,*,*,*,*,*,1020,*,*")
+   (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")])
+
+(define_split 
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_THUMB1 && satisfies_constraint_J (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (neg:SI (match_dup 2)))]
+  "
+  {
+    operands[1] = GEN_INT (- INTVAL (operands[1]));
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+  }"
+)
+
+(define_split 
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_THUMB1 && satisfies_constraint_K (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))]
+  "
+  {
+    unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu;
+    unsigned HOST_WIDE_INT mask = 0xff;
+    int i;
+    
+    for (i = 0; i < 25; i++)
+      if ((val & (mask << i)) == val)
+        break;
+
+    /* Don't split if the shift is zero.  */
+    if (i == 0)
+      FAIL;
+
+    operands[1] = GEN_INT (val >> i);
+    operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+    operands[3] = GEN_INT (i);
+  }"
+)
+
+;; When generating pic, we need to load the symbol offset into a register.
+;; So that the optimizer does not confuse this with a normal symbol load
+;; we use an unspec.  The offset will be loaded from a constant pool entry,
+;; since that is the only type of relocation we can use.
+
+;; Wrap calculation of the whole PIC address in a single pattern for the
+;; benefit of optimizers, particularly, PRE and HOIST.  Calculation of
+;; a PIC address involves two loads from memory, so we want to CSE it
+;; as often as possible.
+;; This pattern will be split into one of the pic_load_addr_* patterns
+;; and a move after GCSE optimizations.
+;;
+;; Note: Update arm.c: legitimize_pic_address() when changing this pattern.
+(define_expand "calculate_pic_address"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (unspec:SI [(match_operand:SI 2 "" "")]
+				    UNSPEC_PIC_SYM))))]
+  "flag_pic"
+)
+
+;; Split calculate_pic_address into pic_load_addr_* and a move.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (unspec:SI [(match_operand:SI 2 "" "")]
+				    UNSPEC_PIC_SYM))))]
+  "flag_pic"
+  [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM))
+   (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))]
+  "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];"
+)
+
+;; operand1 is the memory address to go into 
+;; pic_load_addr_32bit.
+;; operand2 is the PIC label to be emitted 
+;; from pic_add_dot_plus_eight.
+;; We do this to allow hoisting of the entire insn.
+(define_insn_and_split "pic_load_addr_unified"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,l")
+	(unspec:SI [(match_operand:SI 1 "" "mX,mX,mX") 
+		    (match_operand:SI 2 "" "")] 
+		    UNSPEC_PIC_UNIFIED))]
+ "flag_pic"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_PIC_SYM))
+  (set (match_dup 0) (unspec:SI [(match_dup 0) (match_dup 3)
+       		     		 (match_dup 2)] UNSPEC_PIC_BASE))]
+ "operands[3] = TARGET_THUMB ? GEN_INT (4) : GEN_INT (8);"
+ [(set_attr "type" "load1,load1,load1")
+  (set_attr "pool_range" "4096,4096,1024")
+  (set_attr "neg_pool_range" "4084,0,0")
+  (set_attr "arch"  "a,t2,t1")    
+  (set_attr "length" "8,6,4")]
+)
+
+;; The rather odd constraints on the following are to force reload to leave
+;; the insn alone, and to force the minipool generation pass to then move
+;; the GOT symbol to memory.
+
+(define_insn "pic_load_addr_32bit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
+  "TARGET_32BIT && flag_pic"
+  "ldr%?\\t%0, %1"
+  [(set_attr "type" "load1")
+   (set_attr "pool_range" "4096")
+   (set (attr "neg_pool_range")
+	(if_then_else (eq_attr "is_thumb" "no")
+		      (const_int 4084)
+		      (const_int 0)))]
+)
+
+(define_insn "pic_load_addr_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))]
+  "TARGET_THUMB1 && flag_pic"
+  "ldr\\t%0, %1"
+  [(set_attr "type" "load1")
+   (set (attr "pool_range") (const_int 1024))]
+)
+
+(define_insn "pic_add_dot_plus_four"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (const_int 4)
+		    (match_operand 2 "" "")]
+		   UNSPEC_PIC_BASE))]
+  "TARGET_THUMB"
+  "*
+  (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				     INTVAL (operands[2]));
+  return \"add\\t%0, %|pc\";
+  "
+  [(set_attr "length" "2")]
+)
+
+(define_insn "pic_add_dot_plus_eight"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (const_int 8)
+		    (match_operand 2 "" "")]
+		   UNSPEC_PIC_BASE))]
+  "TARGET_ARM"
+  "*
+    (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				       INTVAL (operands[2]));
+    return \"add%?\\t%0, %|pc, %1\";
+  "
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "tls_load_dot_plus_eight"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+			    (const_int 8)
+			    (match_operand 2 "" "")]
+			   UNSPEC_PIC_BASE)))]
+  "TARGET_ARM"
+  "*
+    (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+				       INTVAL (operands[2]));
+    return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\";
+  "
+  [(set_attr "predicable" "yes")]
+)
+
+;; PIC references to local variables can generate pic_add_dot_plus_eight
+;; followed by a load.  These sequences can be crunched down to
+;; tls_load_dot_plus_eight by a peephole.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 3 "register_operand" "")
+		    (const_int 8)
+		    (match_operand 1 "" "")]
+		   UNSPEC_PIC_BASE))
+   (set (match_operand:SI 2 "arm_general_register_operand" "")
+	(mem:SI (match_dup 0)))]
+  "TARGET_ARM && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(mem:SI (unspec:SI [(match_dup 3)
+			    (const_int 8)
+			    (match_dup 1)]
+			   UNSPEC_PIC_BASE)))]
+  ""
+)
+
+(define_insn "pic_offset_arm"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (unspec:SI [(match_operand:SI 2 "" "X")]
+				    UNSPEC_PIC_OFFSET))))]
+  "TARGET_VXWORKS_RTP && TARGET_ARM && flag_pic"
+  "ldr%?\\t%0, [%1,%2]"
+  [(set_attr "type" "load1")]
+)
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+  "
+{
+  /* r3 is clobbered by set/longjmp, so we can use it as a scratch
+     register.  */
+  if (arm_pic_register != INVALID_REGNUM)
+    arm_load_pic_register (1UL << 3);
+  DONE;
+}")
+
+;; If copying one reg to another we can set the condition codes according to
+;; its value.  Such a move is common after a return from subroutine and the
+;; result is being tested against zero.
+
+(define_insn "*movsi_compare0"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "s_register_operand" "0,r")
+		    (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_dup 1))]
+  "TARGET_32BIT"
+  "@
+   cmp%?\\t%0, #0
+   sub%.\\t%0, %1, #0"
+  [(set_attr "conds" "set")]
+)
+
+;; Subroutine to store a half word from a register into memory.
+;; Operand 0 is the source register (HImode)
+;; Operand 1 is the destination address in a register (SImode)
+
+;; In both this routine and the next, we must be careful not to spill
+;; a memory address of reg+large_const into a separate PLUS insn, since this
+;; can generate unrecognizable rtl.
+
+(define_expand "storehi"
+  [;; store the low byte
+   (set (match_operand 1 "" "") (match_dup 3))
+   ;; extract the high byte
+   (set (match_dup 2)
+	(ashiftrt:SI (match_operand 0 "" "") (const_int 8)))
+   ;; store the high byte
+   (set (match_dup 4) (match_dup 5))]
+  "TARGET_ARM"
+  "
+  {
+    rtx op1 = operands[1];
+    rtx addr = XEXP (op1, 0);
+    enum rtx_code code = GET_CODE (addr);
+
+    if ((code == PLUS && GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	|| code == MINUS)
+      op1 = replace_equiv_address (operands[1], force_reg (SImode, addr));
+
+    operands[4] = adjust_address (op1, QImode, 1);
+    operands[1] = adjust_address (operands[1], QImode, 0);
+    operands[3] = gen_lowpart (QImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_reg_rtx (SImode);
+    operands[5] = gen_lowpart (QImode, operands[2]);
+  }"
+)
+
+(define_expand "storehi_bigend"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 2)
+	(ashiftrt:SI (match_operand 0 "" "") (const_int 8)))
+   (set (match_operand 1 "" "")	(match_dup 5))]
+  "TARGET_ARM"
+  "
+  {
+    rtx op1 = operands[1];
+    rtx addr = XEXP (op1, 0);
+    enum rtx_code code = GET_CODE (addr);
+
+    if ((code == PLUS && GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	|| code == MINUS)
+      op1 = replace_equiv_address (op1, force_reg (SImode, addr));
+
+    operands[4] = adjust_address (op1, QImode, 1);
+    operands[1] = adjust_address (operands[1], QImode, 0);
+    operands[3] = gen_lowpart (QImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_reg_rtx (SImode);
+    operands[5] = gen_lowpart (QImode, operands[2]);
+  }"
+)
+
+;; Subroutine to store a half word integer constant into memory.
+(define_expand "storeinthi"
+  [(set (match_operand 0 "" "")
+	(match_operand 1 "" ""))
+   (set (match_dup 3) (match_dup 2))]
+  "TARGET_ARM"
+  "
+  {
+    HOST_WIDE_INT value = INTVAL (operands[1]);
+    rtx addr = XEXP (operands[0], 0);
+    rtx op0 = operands[0];
+    enum rtx_code code = GET_CODE (addr);
+
+    if ((code == PLUS && GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	|| code == MINUS)
+      op0 = replace_equiv_address (op0, force_reg (SImode, addr));
+
+    operands[1] = gen_reg_rtx (SImode);
+    if (BYTES_BIG_ENDIAN)
+      {
+	emit_insn (gen_movsi (operands[1], GEN_INT ((value >> 8) & 255)));
+	if ((value & 255) == ((value >> 8) & 255))
+	  operands[2] = operands[1];
+	else
+	  {
+	    operands[2] = gen_reg_rtx (SImode);
+	    emit_insn (gen_movsi (operands[2], GEN_INT (value & 255)));
+	  }
+      }
+    else
+      {
+	emit_insn (gen_movsi (operands[1], GEN_INT (value & 255)));
+	if ((value & 255) == ((value >> 8) & 255))
+	  operands[2] = operands[1];
+	else
+	  {
+	    operands[2] = gen_reg_rtx (SImode);
+	    emit_insn (gen_movsi (operands[2], GEN_INT ((value >> 8) & 255)));
+	  }
+      }
+
+    operands[3] = adjust_address (op0, QImode, 1);
+    operands[0] = adjust_address (operands[0], QImode, 0);
+    operands[2] = gen_lowpart (QImode, operands[2]);
+    operands[1] = gen_lowpart (QImode, operands[1]);
+  }"
+)
+
+(define_expand "storehi_single_op"
+  [(set (match_operand:HI 0 "memory_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  "TARGET_32BIT && arm_arch4"
+  "
+  if (!s_register_operand (operands[1], HImode))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+  "
+)
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_ARM)
+    {
+      if (can_create_pseudo_p ())
+        {
+          if (GET_CODE (operands[0]) == MEM)
+	    {
+	      if (arm_arch4)
+	        {
+	          emit_insn (gen_storehi_single_op (operands[0], operands[1]));
+	          DONE;
+	        }
+	      if (GET_CODE (operands[1]) == CONST_INT)
+	        emit_insn (gen_storeinthi (operands[0], operands[1]));
+	      else
+	        {
+	          if (GET_CODE (operands[1]) == MEM)
+		    operands[1] = force_reg (HImode, operands[1]);
+	          if (BYTES_BIG_ENDIAN)
+		    emit_insn (gen_storehi_bigend (operands[1], operands[0]));
+	          else
+		   emit_insn (gen_storehi (operands[1], operands[0]));
+	        }
+	      DONE;
+	    }
+          /* Sign extend a constant, and keep it in an SImode reg.  */
+          else if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+	      HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff;
+
+	      /* If the constant is already valid, leave it alone.  */
+	      if (!const_ok_for_arm (val))
+	        {
+	          /* If setting all the top bits will make the constant 
+		     loadable in a single instruction, then set them.  
+		     Otherwise, sign extend the number.  */
+
+	          if (const_ok_for_arm (~(val | ~0xffff)))
+		    val |= ~0xffff;
+	          else if (val & 0x8000)
+		    val |= ~0xffff;
+	        }
+
+	      emit_insn (gen_movsi (reg, GEN_INT (val)));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+	  else if (arm_arch4 && optimize && can_create_pseudo_p ()
+		   && GET_CODE (operands[1]) == MEM)
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_zero_extendhisi2 (reg, operands[1]));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+          else if (!arm_arch4)
+	    {
+	      if (GET_CODE (operands[1]) == MEM)
+	        {
+		  rtx base;
+		  rtx offset = const0_rtx;
+		  rtx reg = gen_reg_rtx (SImode);
+
+		  if ((GET_CODE (base = XEXP (operands[1], 0)) == REG
+		       || (GET_CODE (base) == PLUS
+			   && (GET_CODE (offset = XEXP (base, 1))
+			       == CONST_INT)
+                           && ((INTVAL(offset) & 1) != 1)
+			   && GET_CODE (base = XEXP (base, 0)) == REG))
+		      && REGNO_POINTER_ALIGN (REGNO (base)) >= 32)
+		    {
+		      rtx new_rtx;
+
+		      new_rtx = widen_memory_access (operands[1], SImode,
+						     ((INTVAL (offset) & ~3)
+						      - INTVAL (offset)));
+		      emit_insn (gen_movsi (reg, new_rtx));
+		      if (((INTVAL (offset) & 2) != 0)
+			  ^ (BYTES_BIG_ENDIAN ? 1 : 0))
+			{
+			  rtx reg2 = gen_reg_rtx (SImode);
+
+			  emit_insn (gen_lshrsi3 (reg2, reg, GEN_INT (16)));
+			  reg = reg2;
+			}
+		    }
+		  else
+		    emit_insn (gen_movhi_bytes (reg, operands[1]));
+
+		  operands[1] = gen_lowpart (HImode, reg);
+	       }
+	   }
+        }
+      /* Handle loading a large integer during reload.  */
+      else if (GET_CODE (operands[1]) == CONST_INT
+	       && !const_ok_for_arm (INTVAL (operands[1]))
+	       && !const_ok_for_arm (~INTVAL (operands[1])))
+        {
+          /* Writing a constant to memory needs a scratch, which should
+	     be handled with SECONDARY_RELOADs.  */
+          gcc_assert (GET_CODE (operands[0]) == REG);
+
+          operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+          emit_insn (gen_movsi (operands[0], operands[1]));
+          DONE;
+       }
+    }
+  else if (TARGET_THUMB2)
+    {
+      /* Thumb-2 can do everything except mem=mem and mem=const easily.  */
+      if (can_create_pseudo_p ())
+	{
+	  if (GET_CODE (operands[0]) != REG)
+	    operands[1] = force_reg (HImode, operands[1]);
+          /* Zero extend a constant, and keep it in an SImode reg.  */
+          else if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+	      HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff;
+
+	      emit_insn (gen_movsi (reg, GEN_INT (val)));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+	}
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_movsi (reg, operands[1]));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+
+          /* ??? We shouldn't really get invalid addresses here, but this can
+	     happen if we are passed a SP (never OK for HImode/QImode) or 
+	     virtual register (also rejected as illegitimate for HImode/QImode)
+	     relative address.  */
+          /* ??? This should perhaps be fixed elsewhere, for instance, in
+	     fixup_stack_1, by checking for other kinds of invalid addresses,
+	     e.g. a bare reference to a virtual register.  This may confuse the
+	     alpha though, which must handle this case differently.  */
+          if (GET_CODE (operands[0]) == MEM
+	      && !memory_address_p (GET_MODE (operands[0]),
+				    XEXP (operands[0], 0)))
+	    operands[0]
+	      = replace_equiv_address (operands[0],
+				       copy_to_reg (XEXP (operands[0], 0)));
+   
+          if (GET_CODE (operands[1]) == MEM
+	      && !memory_address_p (GET_MODE (operands[1]),
+				    XEXP (operands[1], 0)))
+	    operands[1]
+	      = replace_equiv_address (operands[1],
+				       copy_to_reg (XEXP (operands[1], 0)));
+
+	  if (GET_CODE (operands[1]) == MEM && optimize > 0)
+	    {
+	      rtx reg = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_zero_extendhisi2 (reg, operands[1]));
+	      operands[1] = gen_lowpart (HImode, reg);
+	    }
+
+          if (GET_CODE (operands[0]) == MEM)
+	    operands[1] = force_reg (HImode, operands[1]);
+        }
+      else if (GET_CODE (operands[1]) == CONST_INT
+	        && !satisfies_constraint_I (operands[1]))
+        {
+	  /* Handle loading a large integer during reload.  */
+
+          /* Writing a constant to memory needs a scratch, which should
+	     be handled with SECONDARY_RELOADs.  */
+          gcc_assert (GET_CODE (operands[0]) == REG);
+
+          operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+          emit_insn (gen_movsi (operands[0], operands[1]));
+          DONE;
+        }
+    }
+  "
+)
+
+(define_insn "*thumb1_movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
+	(match_operand:HI 1 "general_operand"       "l,m,l,*h,*r,I"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: return \"add	%0, %1, #0\";
+    case 2: return \"strh	%1, %0\";
+    case 3: return \"mov	%0, %1\";
+    case 4: return \"mov	%0, %1\";
+    case 5: return \"mov	%0, %1\";
+    default: gcc_unreachable ();
+    case 1:
+      /* The stack pointer can end up being taken as an index register.
+          Catch this case here and deal with it.  */
+      if (GET_CODE (XEXP (operands[1], 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG
+	  && REGNO    (XEXP (XEXP (operands[1], 0), 0)) == SP_REGNUM)
+        {
+	  rtx ops[2];
+          ops[0] = operands[0];
+          ops[1] = XEXP (XEXP (operands[1], 0), 0);
+      
+          output_asm_insn (\"mov	%0, %1\", ops);
+
+          XEXP (XEXP (operands[1], 0), 0) = operands[0];
+    
+	}
+      return \"ldrh	%0, %1\";
+    }"
+  [(set_attr "length" "2,4,2,2,2,2")
+   (set_attr "type" "*,load1,store1,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
+
+
+(define_expand "movhi_bytes"
+  [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" "")))
+   (set (match_dup 3)
+	(zero_extend:SI (match_dup 6)))
+   (set (match_operand:SI 0 "" "")
+	 (ior:SI (ashift:SI (match_dup 4) (const_int 8)) (match_dup 5)))]
+  "TARGET_ARM"
+  "
+  {
+    rtx mem1, mem2;
+    rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
+
+    mem1 = change_address (operands[1], QImode, addr);
+    mem2 = change_address (operands[1], QImode, plus_constant (addr, 1));
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = mem1;
+    operands[2] = gen_reg_rtx (SImode);
+    operands[3] = gen_reg_rtx (SImode);
+    operands[6] = mem2;
+
+    if (BYTES_BIG_ENDIAN)
+      {
+	operands[4] = operands[2];
+	operands[5] = operands[3];
+      }
+    else
+      {
+	operands[4] = operands[3];
+	operands[5] = operands[2];
+      }
+  }"
+)
+
+(define_expand "movhi_bigend"
+  [(set (match_dup 2)
+	(rotate:SI (subreg:SI (match_operand:HI 1 "memory_operand" "") 0)
+		   (const_int 16)))
+   (set (match_dup 3)
+	(ashiftrt:SI (match_dup 2) (const_int 16)))
+   (set (match_operand:HI 0 "s_register_operand" "")
+	(match_dup 4))]
+  "TARGET_ARM"
+  "
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_lowpart (HImode, operands[3]);
+  "
+)
+
+;; Pattern to recognize insn generated default case above
+(define_insn "*movhi_insn_arch4"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
+	(match_operand:HI 1 "general_operand"      "rI,K,r,mi"))]
+  "TARGET_ARM
+   && arm_arch4
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  "@
+   mov%?\\t%0, %1\\t%@ movhi
+   mvn%?\\t%0, #%B1\\t%@ movhi
+   str%(h%)\\t%1, %0\\t%@ movhi
+   ldr%(h%)\\t%0, %1\\t%@ movhi"
+  [(set_attr "type" "*,*,store1,load1")
+   (set_attr "predicable" "yes")
+   (set_attr "insn" "mov,mvn,*,*")
+   (set_attr "pool_range" "*,*,*,256")
+   (set_attr "neg_pool_range" "*,*,*,244")]
+)
+
+(define_insn "*movhi_bytes"
+  [(set (match_operand:HI 0 "s_register_operand" "=r,r")
+	(match_operand:HI 1 "arm_rhs_operand"  "rI,K"))]
+  "TARGET_ARM"
+  "@
+   mov%?\\t%0, %1\\t%@ movhi
+   mvn%?\\t%0, #%B1\\t%@ movhi"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "mov,mvn")]
+)
+
+(define_expand "thumb_movhi_clobber"
+  [(set (match_operand:HI     0 "memory_operand"   "")
+	(match_operand:HI     1 "register_operand" ""))
+   (clobber (match_operand:DI 2 "register_operand" ""))]
+  "TARGET_THUMB1"
+  "
+  if (strict_memory_address_p (HImode, XEXP (operands[0], 0))
+      && REGNO (operands[1]) <= LAST_LO_REGNUM)
+    {
+      emit_insn (gen_movhi (operands[0], operands[1]));
+      DONE;
+    }
+  /* XXX Fixme, need to handle other cases here as well.  */
+  gcc_unreachable ();
+  "
+)
+	
+;; We use a DImode scratch because we may occasionally need an additional
+;; temporary if the address isn't offsettable -- push_reload doesn't seem
+;; to take any notice of the "o" constraints on reload_memory_operand operand.
+(define_expand "reload_outhi"
+  [(parallel [(match_operand:HI 0 "arm_reload_memory_operand" "=o")
+	      (match_operand:HI 1 "s_register_operand"        "r")
+	      (match_operand:DI 2 "s_register_operand"        "=&l")])]
+  "TARGET_EITHER"
+  "if (TARGET_ARM)
+     arm_reload_out_hi (operands);
+   else
+     thumb_reload_out_hi (operands);
+  DONE;
+  "
+)
+
+(define_expand "reload_inhi"
+  [(parallel [(match_operand:HI 0 "s_register_operand" "=r")
+	      (match_operand:HI 1 "arm_reload_memory_operand" "o")
+	      (match_operand:DI 2 "s_register_operand" "=&r")])]
+  "TARGET_EITHER"
+  "
+  if (TARGET_ARM)
+    arm_reload_in_hi (operands);
+  else
+    thumb_reload_out_hi (operands);
+  DONE;
+")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  /* Everything except mem = const or mem = mem can be done easily */
+
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[1]) == CONST_INT)
+	{
+	  rtx reg = gen_reg_rtx (SImode);
+
+	  /* For thumb we want an unsigned immediate, then we are more likely 
+	     to be able to use a movs insn.  */
+	  if (TARGET_THUMB)
+	    operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+
+	  emit_insn (gen_movsi (reg, operands[1]));
+	  operands[1] = gen_lowpart (QImode, reg);
+	}
+
+      if (TARGET_THUMB)
+	{
+          /* ??? We shouldn't really get invalid addresses here, but this can
+	     happen if we are passed a SP (never OK for HImode/QImode) or
+	     virtual register (also rejected as illegitimate for HImode/QImode)
+	     relative address.  */
+          /* ??? This should perhaps be fixed elsewhere, for instance, in
+	     fixup_stack_1, by checking for other kinds of invalid addresses,
+	     e.g. a bare reference to a virtual register.  This may confuse the
+	     alpha though, which must handle this case differently.  */
+          if (GET_CODE (operands[0]) == MEM
+	      && !memory_address_p (GET_MODE (operands[0]),
+		  		     XEXP (operands[0], 0)))
+	    operands[0]
+	      = replace_equiv_address (operands[0],
+				       copy_to_reg (XEXP (operands[0], 0)));
+          if (GET_CODE (operands[1]) == MEM
+	      && !memory_address_p (GET_MODE (operands[1]),
+				    XEXP (operands[1], 0)))
+	     operands[1]
+	       = replace_equiv_address (operands[1],
+					copy_to_reg (XEXP (operands[1], 0)));
+	}
+
+      if (GET_CODE (operands[1]) == MEM && optimize > 0)
+	{
+	  rtx reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_zero_extendqisi2 (reg, operands[1]));
+	  operands[1] = gen_lowpart (QImode, reg);
+	}
+
+      if (GET_CODE (operands[0]) == MEM)
+	operands[1] = force_reg (QImode, operands[1]);
+    }
+  else if (TARGET_THUMB
+	   && GET_CODE (operands[1]) == CONST_INT
+	   && !satisfies_constraint_I (operands[1]))
+    {
+      /* Handle loading a large integer during reload.  */
+
+      /* Writing a constant to memory needs a scratch, which should
+	 be handled with SECONDARY_RELOADs.  */
+      gcc_assert (GET_CODE (operands[0]) == REG);
+
+      operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
+      emit_insn (gen_movsi (operands[0], operands[1]));
+      DONE;
+    }
+  "
+)
+
+
+(define_insn "*arm_movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
+  "TARGET_32BIT
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   ldr%(b%)\\t%0, %1
+   str%(b%)\\t%1, %0"
+  [(set_attr "type" "*,*,load1,store1")
+   (set_attr "insn" "mov,mvn,*,*")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb1_movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
+	(match_operand:QI 1 "general_operand"      "l, m,l,*h,*r,I"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   add\\t%0, %1, #0
+   ldrb\\t%0, %1
+   strb\\t%1, %0
+   mov\\t%0, %1
+   mov\\t%0, %1
+   mov\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "*,load1,store1,*,*,*")
+   (set_attr "insn" "*,*,*,mov,mov,mov")
+   (set_attr "pool_range" "*,32,*,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
+
+;; HFmode moves
+(define_expand "movhf"
+  [(set (match_operand:HF 0 "general_operand" "")
+	(match_operand:HF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (GET_CODE (operands[0]) == MEM)
+        operands[1] = force_reg (HFmode, operands[1]);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+           if (GET_CODE (operands[0]) != REG)
+	     operands[1] = force_reg (HFmode, operands[1]);
+        }
+    }
+  "
+)
+
+(define_insn "*arm32_movhf"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,r,F"))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16)
+   && (	  s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:	/* ARM register from memory */
+      return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\";
+    case 1:	/* memory from ARM register */
+      return \"str%(h%)\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* ARM register from ARM register */
+      return \"mov%?\\t%0, %1\\t%@ __fp16\";
+    case 3:	/* ARM register from constant */
+      {
+	REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+	REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw%?\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load1,store1,*,*")
+   (set_attr "insn" "*,*,mov,mov")
+   (set_attr "length" "4,4,4,8")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb1_movhf"
+  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,m,*r,*h")
+	(match_operand:HF     1 "general_operand"      "l,mF,l,*h,*r"))]
+  "TARGET_THUMB1
+   && (	  s_register_operand (operands[0], HFmode) 
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 1:
+      {
+	rtx addr;
+	gcc_assert (GET_CODE(operands[1]) == MEM);
+	addr = XEXP (operands[1], 0);
+	if (GET_CODE (addr) == LABEL_REF
+	    || (GET_CODE (addr) == CONST
+		&& GET_CODE (XEXP (addr, 0)) == PLUS
+		&& GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF
+		&& GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT))
+	  {
+	    /* Constant pool entry.  */
+	    return \"ldr\\t%0, %1\";
+	  }
+	return \"ldrh\\t%0, %1\";
+      }
+    case 2: return \"strh\\t%1, %0\";
+    default: return \"mov\\t%0, %1\";
+    }
+  "
+  [(set_attr "length" "2")
+   (set_attr "type" "*,load1,store1,*,*")
+   (set_attr "insn" "mov,*,*,mov,mov")
+   (set_attr "pool_range" "*,1020,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond")])
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (GET_CODE (operands[0]) == MEM)
+        operands[1] = force_reg (SFmode, operands[1]);
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (can_create_pseudo_p ())
+        {
+           if (GET_CODE (operands[0]) != REG)
+	     operands[1] = force_reg (SFmode, operands[1]);
+        }
+    }
+  "
+)
+
+;; Transform a floating-point move of a constant into a core register into
+;; an SImode operation.
+(define_split
+  [(set (match_operand:SF 0 "arm_general_register_operand" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "TARGET_EITHER
+   && reload_completed
+   && GET_CODE (operands[1]) == CONST_DOUBLE"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[1]);
+  if (operands[2] == 0 || operands[3] == 0)
+    FAIL;
+  "
+)
+
+(define_insn "*arm_movsf_soft_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:SF 1 "general_operand"  "r,mE,r"))]
+  "TARGET_32BIT
+   && TARGET_SOFT_FLOAT
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], SFmode))"
+  "@
+   mov%?\\t%0, %1
+   ldr%?\\t%0, %1\\t%@ float
+   str%?\\t%1, %0\\t%@ float"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "*,load1,store1")
+   (set_attr "insn" "mov,*,*")
+   (set_attr "pool_range" "*,4096,*")
+   (set_attr "arm_neg_pool_range" "*,4084,*")
+   (set_attr "thumb2_neg_pool_range" "*,0,*")]
+)
+
+;;; ??? This should have alternatives for constants.
+(define_insn "*thumb1_movsf_insn"
+  [(set (match_operand:SF     0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h")
+	(match_operand:SF     1 "general_operand"      "l, >,l,mF,l,*h,*r"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], SFmode) 
+       || register_operand (operands[1], SFmode))"
+  "@
+   add\\t%0, %1, #0
+   ldmia\\t%1, {%0}
+   stmia\\t%0, {%1}
+   ldr\\t%0, %1
+   str\\t%1, %0
+   mov\\t%0, %1
+   mov\\t%0, %1"
+  [(set_attr "length" "2")
+   (set_attr "type" "*,load1,store1,load1,store1,*,*")
+   (set_attr "pool_range" "*,*,*,1020,*,*,*")
+   (set_attr "insn" "*,*,*,*,*,mov,mov")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")]
+)
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (GET_CODE (operands[0]) == MEM)
+        operands[1] = force_reg (DFmode, operands[1]);
+    }
+  else /* TARGET_THUMB */
+    {
+      if (can_create_pseudo_p ())
+        {
+          if (GET_CODE (operands[0]) != REG)
+	    operands[1] = force_reg (DFmode, operands[1]);
+        }
+    }
+  "
+)
+
+;; Reloading a df mode value stored in integer regs to memory can require a
+;; scratch reg.
+(define_expand "reload_outdf"
+  [(match_operand:DF 0 "arm_reload_memory_operand" "=o")
+   (match_operand:DF 1 "s_register_operand" "r")
+   (match_operand:SI 2 "s_register_operand" "=&r")]
+  "TARGET_32BIT"
+  "
+  {
+    enum rtx_code code = GET_CODE (XEXP (operands[0], 0));
+
+    if (code == REG)
+      operands[2] = XEXP (operands[0], 0);
+    else if (code == POST_INC || code == PRE_DEC)
+      {
+	operands[0] = gen_rtx_SUBREG (DImode, operands[0], 0);
+	operands[1] = gen_rtx_SUBREG (DImode, operands[1], 0);
+	emit_insn (gen_movdi (operands[0], operands[1]));
+	DONE;
+      }
+    else if (code == PRE_INC)
+      {
+	rtx reg = XEXP (XEXP (operands[0], 0), 0);
+
+	emit_insn (gen_addsi3 (reg, reg, GEN_INT (8)));
+	operands[2] = reg;
+      }
+    else if (code == POST_DEC)
+      operands[2] = XEXP (XEXP (operands[0], 0), 0);
+    else
+      emit_insn (gen_addsi3 (operands[2], XEXP (XEXP (operands[0], 0), 0),
+			     XEXP (XEXP (operands[0], 0), 1)));
+
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    replace_equiv_address (operands[0], operands[2]),
+			    operands[1]));
+
+    if (code == POST_DEC)
+      emit_insn (gen_addsi3 (operands[2], operands[2], GEN_INT (-8)));
+
+    DONE;
+  }"
+)
+
+(define_insn "*movdf_soft_insn"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
+	(match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
+  "TARGET_32BIT && TARGET_SOFT_FLOAT
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    default:
+      return output_move_double (operands);
+    }
+  "
+  [(set_attr "length" "8,12,16,8,8")
+   (set_attr "type" "*,*,*,load2,store2")
+   (set_attr "pool_range" "*,*,*,1020,*")
+   (set_attr "arm_neg_pool_range" "*,*,*,1008,*")
+   (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")]
+)
+
+;;; ??? This should have alternatives for constants.
+;;; ??? This was originally identical to the movdi_insn pattern.
+;;; ??? The 'F' constraint looks funny, but it should always be replaced by
+;;; thumb_reorg with a memory reference.
+(define_insn "*thumb_movdf_insn"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=l,l,>,l, m,*r")
+	(match_operand:DF 1 "general_operand"      "l, >,l,mF,l,*r"))]
+  "TARGET_THUMB1
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  switch (which_alternative)
+    {
+    default:
+    case 0:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"add\\t%0, %1, #0\;add\\t%H0, %H1, #0\";
+      return \"add\\t%H0, %H1, #0\;add\\t%0, %1, #0\";
+    case 1:
+      return \"ldmia\\t%1, {%0, %H0}\";
+    case 2:
+      return \"stmia\\t%0, {%1, %H1}\";
+    case 3:
+      return thumb_load_double_from_address (operands);
+    case 4:
+      operands[2] = gen_rtx_MEM (SImode,
+				 plus_constant (XEXP (operands[0], 0), 4));
+      output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
+      return \"\";
+    case 5:
+      if (REGNO (operands[1]) == REGNO (operands[0]) + 1)
+	return \"mov\\t%0, %1\;mov\\t%H0, %H1\";
+      return \"mov\\t%H0, %H1\;mov\\t%0, %1\";
+    }
+  "
+  [(set_attr "length" "4,2,2,6,4,4")
+   (set_attr "type" "*,load2,store2,load2,store2,*")
+   (set_attr "insn" "*,*,*,*,*,mov")
+   (set_attr "pool_range" "*,*,*,1020,*,*")]
+)
+
+(define_expand "movxf"
+  [(set (match_operand:XF 0 "general_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (XFmode, operands[1]);
+  "
+)
+
+
+
+;; load- and store-multiple insns
+;; The arm can load/store any set of registers, provided that they are in
+;; ascending order, but these expanders assume a contiguous set.
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+                          (match_operand:SI 1 "" ""))
+                     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_32BIT"
+{
+  HOST_WIDE_INT offset = 0;
+
+  /* Support only fixed point registers.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) > 14
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[1]) != MEM
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) > (LAST_ARM_REGNUM - 1)
+      || REGNO (operands[0]) + INTVAL (operands[2]) > LAST_ARM_REGNUM)
+    FAIL;
+
+  operands[3]
+    = arm_gen_load_multiple (arm_regs_in_sequence + REGNO (operands[0]),
+			     INTVAL (operands[2]),
+			     force_reg (SImode, XEXP (operands[1], 0)),
+			     FALSE, operands[1], &offset);
+})
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+                          (match_operand:SI 1 "" ""))
+                     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_32BIT"
+{
+  HOST_WIDE_INT offset = 0;
+
+  /* Support only fixed point registers.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) > 14
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[1]) != REG
+      || GET_CODE (operands[0]) != MEM
+      || REGNO (operands[1]) > (LAST_ARM_REGNUM - 1)
+      || REGNO (operands[1]) + INTVAL (operands[2]) > LAST_ARM_REGNUM)
+    FAIL;
+
+  operands[3]
+    = arm_gen_store_multiple (arm_regs_in_sequence + REGNO (operands[1]),
+			      INTVAL (operands[2]),
+			      force_reg (SImode, XEXP (operands[0], 0)),
+			      FALSE, operands[0], &offset);
+})
+
+
+;; Move a block of memory if it is word aligned and MORE than 2 words long.
+;; We could let this apply for blocks of less than this, but it clobbers so
+;; many registers that there is then probably a better way.
+
+(define_expand "movmemqi"
+  [(match_operand:BLK 0 "general_operand" "")
+   (match_operand:BLK 1 "general_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_EITHER"
+  "
+  if (TARGET_32BIT)
+    {
+      if (arm_gen_movmemqi (operands))
+        DONE;
+      FAIL;
+    }
+  else /* TARGET_THUMB1 */
+    {
+      if (   INTVAL (operands[3]) != 4
+          || INTVAL (operands[2]) > 48)
+        FAIL;
+
+      thumb_expand_movmemqi (operands);
+      DONE;
+    }
+  "
+)
+
+;; Thumb block-move insns
+
+(define_insn "movmem12b"
+  [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+	(mem:SI (match_operand:SI 3 "register_operand" "1")))
+   (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	(mem:SI (plus:SI (match_dup 3) (const_int 4))))
+   (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
+	(mem:SI (plus:SI (match_dup 3) (const_int 8))))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(plus:SI (match_dup 2) (const_int 12)))
+   (set (match_operand:SI 1 "register_operand" "=l")
+	(plus:SI (match_dup 3) (const_int 12)))
+   (clobber (match_scratch:SI 4 "=&l"))
+   (clobber (match_scratch:SI 5 "=&l"))
+   (clobber (match_scratch:SI 6 "=&l"))]
+  "TARGET_THUMB1"
+  "* return thumb_output_move_mem_multiple (3, operands);"
+  [(set_attr "length" "4")
+   ; This isn't entirely accurate...  It loads as well, but in terms of
+   ; scheduling the following insn it is better to consider it as a store
+   (set_attr "type" "store3")]
+)
+
+(define_insn "movmem8b"
+  [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+	(mem:SI (match_operand:SI 3 "register_operand" "1")))
+   (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
+	(mem:SI (plus:SI (match_dup 3) (const_int 4))))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(plus:SI (match_dup 2) (const_int 8)))
+   (set (match_operand:SI 1 "register_operand" "=l")
+	(plus:SI (match_dup 3) (const_int 8)))
+   (clobber (match_scratch:SI 4 "=&l"))
+   (clobber (match_scratch:SI 5 "=&l"))]
+  "TARGET_THUMB1"
+  "* return thumb_output_move_mem_multiple (2, operands);"
+  [(set_attr "length" "4")
+   ; This isn't entirely accurate...  It loads as well, but in terms of
+   ; scheduling the following insn it is better to consider it as a store
+   (set_attr "type" "store2")]
+)
+
+
+
+;; Compare & branch insns
+;; The range calculations are based as follows:
+;; For forward branches, the address calculation returns the address of
+;; the next instruction.  This is 2 beyond the branch instruction.
+;; For backward branches, the address calculation returns the address of
+;; the first instruction in this pattern (cmp).  This is 2 before the branch
+;; instruction for the shortest sequence, and 4 before the branch instruction
+;; if we have to jump around an unconditional branch.
+;; To the basic branch range the PC offset must be added (this is +4).
+;; So for forward branches we have 
+;;   (pos_range - pos_base_offs + pc_offs) = (pos_range - 2 + 4).
+;; And for backward branches we have 
+;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4).
+;;
+;; For a 'b'       pos_range = 2046, neg_range = -2048 giving (-2040->2048).
+;; For a 'b<cond>' pos_range = 254,  neg_range = -256  giving (-250 ->256).
+
+(define_expand "cbranchsi4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arm_comparison_operator"
+	       [(match_operand:SI 1 "s_register_operand" "")
+	        (match_operand:SI 2 "nonmemory_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_THUMB1 || TARGET_32BIT"
+  "
+  if (!TARGET_THUMB1)
+    {
+      if (!arm_add_operand (operands[2], SImode))
+	operands[2] = force_reg (SImode, operands[2]);
+      emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				      operands[3]));
+      DONE;
+    }
+  if (thumb1_cmpneg_operand (operands[2], SImode))
+    {
+      emit_jump_insn (gen_cbranchsi4_scratch (NULL, operands[1], operands[2],
+					      operands[3], operands[0]));
+      DONE;
+    }
+  if (!thumb1_cmp_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+  ")
+
+;; A pattern to recognize a special situation and optimize for it.
+;; On the thumb, zero-extension from memory is preferrable to sign-extension
+;; due to the available addressing modes.  Hence, convert a signed comparison
+;; with zero into an unsigned comparison with 127 if possible.
+(define_expand "cbranchqi4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "lt_ge_comparison_operator"
+	       [(match_operand:QI 1 "memory_operand" "")
+	        (match_operand:QI 2 "const0_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_THUMB1"
+{
+  rtx xops[4];
+  xops[1] = gen_reg_rtx (SImode);
+  emit_insn (gen_zero_extendqisi2 (xops[1], operands[1]));
+  xops[2] = GEN_INT (127);
+  xops[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]) == GE ? LEU : GTU,
+			    VOIDmode, xops[1], xops[2]);
+  xops[3] = operands[3];
+  emit_insn (gen_cbranchsi4 (xops[0], xops[1], xops[2], xops[3]));
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arm_comparison_operator"
+	       [(match_operand:SF 1 "s_register_operand" "")
+	        (match_operand:SF 2 "arm_float_compare_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				   operands[3])); DONE;"
+)
+
+(define_expand "cbranchdf4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arm_comparison_operator"
+	       [(match_operand:DF 1 "s_register_operand" "")
+	        (match_operand:DF 2 "arm_float_compare_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				   operands[3])); DONE;"
+)
+
+(define_expand "cbranchdi4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arm_comparison_operator"
+	       [(match_operand:DI 1 "cmpdi_operand" "")
+	        (match_operand:DI 2 "cmpdi_operand" "")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_32BIT"
+  "{
+     rtx swap = NULL_RTX;
+     enum rtx_code code = GET_CODE (operands[0]);
+
+     /* We should not have two constants.  */
+     gcc_assert (GET_MODE (operands[1]) == DImode
+		 || GET_MODE (operands[2]) == DImode);
+
+    /* Flip unimplemented DImode comparisons to a form that
+       arm_gen_compare_reg can handle.  */
+     switch (code)
+     {
+     case GT:
+       swap = gen_rtx_LT (VOIDmode, operands[2], operands[1]); break;
+     case LE:
+       swap = gen_rtx_GE (VOIDmode, operands[2], operands[1]); break;
+     case GTU:
+       swap = gen_rtx_LTU (VOIDmode, operands[2], operands[1]); break;
+     case LEU:
+       swap = gen_rtx_GEU (VOIDmode, operands[2], operands[1]); break;
+     default:
+       break;
+     }
+     if (swap)
+       emit_jump_insn (gen_cbranch_cc (swap, operands[2], operands[1],
+                                       operands[3]));
+     else
+       emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2],
+				       operands[3]));
+     DONE;
+   }"
+)
+
+(define_insn "cbranchsi4_insn"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "arm_comparison_operator"
+	       [(match_operand:SI 1 "s_register_operand" "l,l*h")
+	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  "TARGET_THUMB1"
+{
+  rtx t = cfun->machine->thumb1_cc_insn;
+  if (t != NULL_RTX)
+    {
+      if (!rtx_equal_p (cfun->machine->thumb1_cc_op0, operands[1])
+	  || !rtx_equal_p (cfun->machine->thumb1_cc_op1, operands[2]))
+	t = NULL_RTX;
+      if (cfun->machine->thumb1_cc_mode == CC_NOOVmode)
+	{
+	  if (!noov_comparison_operator (operands[0], VOIDmode))
+	    t = NULL_RTX;
+	}
+      else if (cfun->machine->thumb1_cc_mode != CCmode)
+	t = NULL_RTX;
+    }
+  if (t == NULL_RTX)
+    {
+      output_asm_insn ("cmp\t%1, %2", operands);
+      cfun->machine->thumb1_cc_insn = insn;
+      cfun->machine->thumb1_cc_op0 = operands[1];
+      cfun->machine->thumb1_cc_op1 = operands[2];
+      cfun->machine->thumb1_cc_mode = CCmode;
+    }
+  else
+    /* Ensure we emit the right type of condition code on the jump.  */
+    XEXP (operands[0], 0) = gen_rtx_REG (cfun->machine->thumb1_cc_mode,
+					 CC_REGNUM);
+
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+}
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))]
+)
+
+(define_insn "cbranchsi4_scratch"
+  [(set (pc) (if_then_else
+	      (match_operator 4 "arm_comparison_operator"
+	       [(match_operand:SI 1 "s_register_operand" "l,0")
+	        (match_operand:SI 2 "thumb1_cmpneg_operand" "L,J")])
+	      (label_ref (match_operand 3 "" ""))
+	      (pc)))
+   (clobber (match_scratch:SI 0 "=l,l"))]
+  "TARGET_THUMB1"
+  "*
+  output_asm_insn (\"add\\t%0, %1, #%n2\", operands);
+
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d4\\t%l3\";
+    case 6:  return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))]
+)
+
+;; Two peepholes to generate subtract of 0 instead of a move if the
+;; condition codes will be useful.
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(match_operand:SI 1 "low_register_operand" ""))
+   (set (pc)
+	(if_then_else (match_operator 2 "arm_comparison_operator"
+		       [(match_dup 1) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_THUMB1"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 2 [(match_dup 0) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "")
+
+;; Sigh!  This variant shouldn't be needed, but combine often fails to
+;; merge cases like this because the op1 is a hard register in
+;; arm_class_likely_spilled_p.
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(match_operand:SI 1 "low_register_operand" ""))
+   (set (pc)
+	(if_then_else (match_operator 2 "arm_comparison_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_THUMB1"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 2 [(match_dup 0) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "")
+
+(define_insn "*negated_cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+	  [(match_operand:SI 1 "s_register_operand" "l")
+	   (neg:SI (match_operand:SI 2 "s_register_operand" "l"))])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  "TARGET_THUMB1"
+  "*
+  output_asm_insn (\"cmn\\t%1, %2\", operands);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))]
+)
+
+(define_insn "*tbit_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+	  [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l")
+			    (const_int 1)
+			    (match_operand:SI 2 "const_int_operand" "i"))
+	   (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (clobber (match_scratch:SI 4 "=l"))]
+  "TARGET_THUMB1"
+  "*
+  {
+  rtx op[3];
+  op[0] = operands[4];
+  op[1] = operands[1];
+  op[2] = GEN_INT (32 - 1 - INTVAL (operands[2]));
+
+  output_asm_insn (\"lsl\\t%0, %1, %2\", op);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  }"
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))]
+)
+  
+(define_insn "*tlobits_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+	  [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l")
+			    (match_operand:SI 2 "const_int_operand" "i")
+			    (const_int 0))
+	   (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (clobber (match_scratch:SI 4 "=l"))]
+  "TARGET_THUMB1"
+  "*
+  {
+  rtx op[3];
+  op[0] = operands[4];
+  op[1] = operands[1];
+  op[2] = GEN_INT (32 - INTVAL (operands[2]));
+
+  output_asm_insn (\"lsl\\t%0, %1, %2\", op);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d0\\t%l3\";
+    case 6:  return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\";
+    }
+  }"
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+	         (le (minus (match_dup 3) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 3) (pc)) (const_int -2040))
+		     (le (minus (match_dup 3) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))]
+)
+
+(define_insn "*tstsi3_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "equality_operator"
+	  [(and:SI (match_operand:SI 0 "s_register_operand" "%l")
+		   (match_operand:SI 1 "s_register_operand" "l"))
+	   (const_int 0)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_THUMB1"
+  "*
+  {
+  output_asm_insn (\"tst\\t%0, %1\", operands);
+  switch (get_attr_length (insn))
+    {
+    case 4:  return \"b%d3\\t%l2\";
+    case 6:  return \"b%D3\\t.LCB%=\;b\\t%l2\\t%@long jump\\n.LCB%=:\";
+    default: return \"b%D3\\t.LCB%=\;bl\\t%l2\\t%@far jump\\n.LCB%=:\";
+    }
+  }"
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+	         (le (minus (match_dup 2) (pc)) (const_int 256)))
+	    (const_int 4)
+	    (if_then_else
+	        (and (ge (minus (match_dup 2) (pc)) (const_int -2040))
+		     (le (minus (match_dup 2) (pc)) (const_int 2048)))
+		(const_int 6)
+		(const_int 8))))]
+)
+  
+(define_insn "*cbranchne_decr1"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_operator"
+		       [(match_operand:SI 2 "s_register_operand" "l,l,1,l")
+		        (const_int 0)])
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,*?h,*?m,*?m")
+	(plus:SI (match_dup 2) (const_int -1)))
+   (clobber (match_scratch:SI 1 "=X,l,&l,&l"))]
+  "TARGET_THUMB1"
+  "*
+   {
+     rtx cond[2];
+     cond[0] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE
+				? GEU : LTU),
+			       VOIDmode, operands[2], const1_rtx);
+     cond[1] = operands[4];
+
+     if (which_alternative == 0)
+       output_asm_insn (\"sub\\t%0, %2, #1\", operands);
+     else if (which_alternative == 1)
+       {
+	 /* We must provide an alternative for a hi reg because reload 
+	    cannot handle output reloads on a jump instruction, but we
+	    can't subtract into that.  Fortunately a mov from lo to hi
+	    does not clobber the condition codes.  */
+	 output_asm_insn (\"sub\\t%1, %2, #1\", operands);
+	 output_asm_insn (\"mov\\t%0, %1\", operands);
+       }
+     else
+       {
+	 /* Similarly, but the target is memory.  */
+	 output_asm_insn (\"sub\\t%1, %2, #1\", operands);
+	 output_asm_insn (\"str\\t%1, %0\", operands);
+       }
+
+     switch (get_attr_length (insn) - (which_alternative ? 2 : 0))
+       {
+	 case 4:
+	   output_asm_insn (\"b%d0\\t%l1\", cond);
+	   return \"\";
+	 case 6:
+	   output_asm_insn (\"b%D0\\t.LCB%=\", cond);
+	   return \"b\\t%l4\\t%@long jump\\n.LCB%=:\";
+	 default:
+	   output_asm_insn (\"b%D0\\t.LCB%=\", cond);
+	   return \"bl\\t%l4\\t%@far jump\\n.LCB%=:\";
+       }
+   }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (ior (and (eq (symbol_ref ("which_alternative"))
+	                  (const_int 0))
+		      (eq_attr "length" "8"))
+		 (eq_attr "length" "10"))
+	    (const_string "yes")
+            (const_string "no")))
+   (set_attr_alternative "length"
+      [
+       ;; Alternative 0
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -250))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 4)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2040))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 6)
+	   (const_int 8)))
+       ;; Alternative 1
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -248))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))
+       ;; Alternative 2
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -248))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))
+       ;; Alternative 3
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -248))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2038))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))])]
+)
+
+(define_insn "*addsi3_cbranch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 4 "arm_comparison_operator"
+	  [(plus:SI
+	    (match_operand:SI 2 "s_register_operand" "%0,l,*l,1,1,1")
+	    (match_operand:SI 3 "reg_or_int_operand" "IJ,lL,*l,lIJ,lIJ,lIJ"))
+	   (const_int 0)])
+	 (label_ref (match_operand 5 "" ""))
+	 (pc)))
+   (set
+    (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,l,*!h,*?h,*?m,*?m")
+    (plus:SI (match_dup 2) (match_dup 3)))
+   (clobber (match_scratch:SI 1 "=X,X,l,l,&l,&l"))]
+  "TARGET_THUMB1
+   && (GET_CODE (operands[4]) == EQ
+       || GET_CODE (operands[4]) == NE
+       || GET_CODE (operands[4]) == GE
+       || GET_CODE (operands[4]) == LT)"
+  "*
+   {
+     rtx cond[3];
+
+     cond[0] = (which_alternative < 2) ? operands[0] : operands[1];
+     cond[1] = operands[2];
+     cond[2] = operands[3];
+
+     if (GET_CODE (cond[2]) == CONST_INT && INTVAL (cond[2]) < 0)
+       output_asm_insn (\"sub\\t%0, %1, #%n2\", cond);
+     else
+       output_asm_insn (\"add\\t%0, %1, %2\", cond);
+
+     if (which_alternative >= 2
+	 && which_alternative < 4)
+       output_asm_insn (\"mov\\t%0, %1\", operands);
+     else if (which_alternative >= 4)
+       output_asm_insn (\"str\\t%1, %0\", operands);
+
+     switch (get_attr_length (insn) - ((which_alternative >= 2) ? 2 : 0))
+       {
+	 case 4:
+	   return \"b%d4\\t%l5\";
+	 case 6:
+	   return \"b%D4\\t.LCB%=\;b\\t%l5\\t%@long jump\\n.LCB%=:\";
+	 default:
+	   return \"b%D4\\t.LCB%=\;bl\\t%l5\\t%@far jump\\n.LCB%=:\";
+       }
+   }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (ior (and (lt (symbol_ref ("which_alternative"))
+	                  (const_int 2))
+		      (eq_attr "length" "8"))
+		 (eq_attr "length" "10"))
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length")
+     (if_then_else
+       (lt (symbol_ref ("which_alternative"))
+		       (const_int 2))
+       (if_then_else
+	 (and (ge (minus (match_dup 5) (pc)) (const_int -250))
+	      (le (minus (match_dup 5) (pc)) (const_int 256)))
+	 (const_int 4)
+	 (if_then_else
+	   (and (ge (minus (match_dup 5) (pc)) (const_int -2040))
+		(le (minus (match_dup 5) (pc)) (const_int 2048)))
+	   (const_int 6)
+	   (const_int 8)))
+       (if_then_else
+	 (and (ge (minus (match_dup 5) (pc)) (const_int -248))
+	      (le (minus (match_dup 5) (pc)) (const_int 256)))
+	 (const_int 6)
+	 (if_then_else
+	   (and (ge (minus (match_dup 5) (pc)) (const_int -2038))
+		(le (minus (match_dup 5) (pc)) (const_int 2048)))
+	   (const_int 8)
+	   (const_int 10)))))]
+)
+
+(define_insn "*addsi3_cbranch_scratch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "arm_comparison_operator"
+	  [(plus:SI
+	    (match_operand:SI 1 "s_register_operand" "%l,l,l,0")
+	    (match_operand:SI 2 "reg_or_int_operand" "J,l,L,IJ"))
+	   (const_int 0)])
+	 (label_ref (match_operand 4 "" ""))
+	 (pc)))
+   (clobber (match_scratch:SI 0 "=X,X,l,l"))]
+  "TARGET_THUMB1
+   && (GET_CODE (operands[3]) == EQ
+       || GET_CODE (operands[3]) == NE
+       || GET_CODE (operands[3]) == GE
+       || GET_CODE (operands[3]) == LT)"
+  "*
+   {
+     switch (which_alternative)
+       {
+       case 0:
+	 output_asm_insn (\"cmp\t%1, #%n2\", operands);
+	 break;
+       case 1:
+	 output_asm_insn (\"cmn\t%1, %2\", operands);
+	 break;
+       case 2:
+	 if (INTVAL (operands[2]) < 0)
+	   output_asm_insn (\"sub\t%0, %1, %2\", operands);
+	 else
+	   output_asm_insn (\"add\t%0, %1, %2\", operands);
+	 break;
+       case 3:
+	 if (INTVAL (operands[2]) < 0)
+	   output_asm_insn (\"sub\t%0, %0, %2\", operands);
+	 else
+	   output_asm_insn (\"add\t%0, %0, %2\", operands);
+	 break;
+       }
+
+     switch (get_attr_length (insn))
+       {
+	 case 4:
+	   return \"b%d3\\t%l4\";
+	 case 6:
+	   return \"b%D3\\t.LCB%=\;b\\t%l4\\t%@long jump\\n.LCB%=:\";
+	 default:
+	   return \"b%D3\\t.LCB%=\;bl\\t%l4\\t%@far jump\\n.LCB%=:\";
+       }
+   }
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "8")
+	    (const_string "yes")
+            (const_string "no")))
+   (set (attr "length")
+       (if_then_else
+	 (and (ge (minus (match_dup 4) (pc)) (const_int -250))
+	      (le (minus (match_dup 4) (pc)) (const_int 256)))
+	 (const_int 4)
+	 (if_then_else
+	   (and (ge (minus (match_dup 4) (pc)) (const_int -2040))
+		(le (minus (match_dup 4) (pc)) (const_int 2048)))
+	   (const_int 6)
+	   (const_int 8))))]
+)
+
+
+;; Comparison and test insns
+
+(define_insn "*arm_cmpsi_insn"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 0 "s_register_operand" "r,r")
+		    (match_operand:SI 1 "arm_add_operand"    "rI,L")))]
+  "TARGET_32BIT"
+  "@
+   cmp%?\\t%0, %1
+   cmn%?\\t%0, #%n1"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*cmpsi_shiftsi"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI   0 "s_register_operand" "r,r")
+		    (match_operator:SI  3 "shift_operator"
+		     [(match_operand:SI 1 "s_register_operand" "r,r")
+		      (match_operand:SI 2 "shift_amount_operand" "M,rM")])))]
+  "TARGET_32BIT"
+  "cmp%?\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*cmpsi_shiftsi_swp"
+  [(set (reg:CC_SWP CC_REGNUM)
+	(compare:CC_SWP (match_operator:SI 3 "shift_operator"
+			 [(match_operand:SI 1 "s_register_operand" "r,r")
+			  (match_operand:SI 2 "shift_amount_operand" "M,rM")])
+			(match_operand:SI 0 "s_register_operand" "r,r")))]
+  "TARGET_32BIT"
+  "cmp%?\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*arm_cmpsi_negshiftsi_si"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (neg:SI (match_operator:SI 1 "shift_operator"
+		    [(match_operand:SI 2 "s_register_operand" "r")
+		     (match_operand:SI 3 "reg_or_int_operand" "rM")]))
+	 (match_operand:SI 0 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "cmn%?\\t%0, %2%S1"
+  [(set_attr "conds" "set")
+   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
+				    (const_string "alu_shift")
+				    (const_string "alu_shift_reg")))]
+)
+
+;; DImode comparisons.  The generic code generates branches that
+;; if-conversion can not reduce to a conditional compare, so we do
+;; that directly.
+
+(define_insn "*arm_cmpdi_insn"
+  [(set (reg:CC_NCV CC_REGNUM)
+	(compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r")
+			(match_operand:DI 1 "arm_di_operand"	   "rDi")))
+   (clobber (match_scratch:SI 2 "=r"))]
+  "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)"
+  "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1"
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*arm_cmpdi_unsigned"
+  [(set (reg:CC_CZ CC_REGNUM)
+	(compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r")
+		       (match_operand:DI 1 "arm_di_operand"	"rDi")))]
+  "TARGET_ARM"
+  "cmp%?\\t%R0, %R1\;cmpeq\\t%Q0, %Q1"
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*arm_cmpdi_zero"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (match_operand:DI 0 "s_register_operand" "r")
+		      (const_int 0)))
+   (clobber (match_scratch:SI 1 "=r"))]
+  "TARGET_32BIT"
+  "orr%.\\t%1, %Q0, %R0"
+  [(set_attr "conds" "set")]
+)
+
+(define_insn "*thumb_cmpdi_zero"
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z (match_operand:DI 0 "s_register_operand" "l")
+		      (const_int 0)))
+   (clobber (match_scratch:SI 1 "=l"))]
+  "TARGET_THUMB1"
+  "orr\\t%1, %Q0, %R0"
+  [(set_attr "conds" "set")
+   (set_attr "length" "2")]
+)
+
+;; Cirrus SF compare instruction
+(define_insn "*cirrus_cmpsf"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:SF 0 "cirrus_fp_register" "v")
+		      (match_operand:SF 1 "cirrus_fp_register" "v")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcmps%?\\tr15, %V0, %V1"
+  [(set_attr "type"   "mav_farith")
+   (set_attr "cirrus" "compare")]
+)
+
+;; Cirrus DF compare instruction
+(define_insn "*cirrus_cmpdf"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "cirrus_fp_register" "v")
+		      (match_operand:DF 1 "cirrus_fp_register" "v")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcmpd%?\\tr15, %V0, %V1"
+  [(set_attr "type"   "mav_farith")
+   (set_attr "cirrus" "compare")]
+)
+
+(define_insn "*cirrus_cmpdi"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:DI 0 "cirrus_fp_register" "v")
+		    (match_operand:DI 1 "cirrus_fp_register" "v")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcmp64%?\\tr15, %V0, %V1"
+  [(set_attr "type"   "mav_farith")
+   (set_attr "cirrus" "compare")]
+)
+
+; This insn allows redundant compares to be removed by cse, nothing should
+; ever appear in the output file since (set (reg x) (reg x)) is a no-op that
+; is deleted later on. The match_dup will match the mode here, so that
+; mode changes of the condition codes aren't lost by this even though we don't
+; specify what they are.
+
+(define_insn "*deleted_compare"
+  [(set (match_operand 0 "cc_register" "") (match_dup 0))]
+  "TARGET_32BIT"
+  "\\t%@ deleted compare"
+  [(set_attr "conds" "set")
+   (set_attr "length" "0")]
+)
+
+
+;; Conditional branch insns
+
+(define_expand "cbranch_cc"
+  [(set (pc)
+	(if_then_else (match_operator 0 "" [(match_operand 1 "" "")
+					    (match_operand 2 "" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_32BIT"
+  "operands[1] = arm_gen_compare_reg (GET_CODE (operands[0]),
+				      operands[1], operands[2]);
+   operands[2] = const0_rtx;"
+)
+
+;;
+;; Patterns to match conditional branch insns.
+;;
+
+(define_insn "*arm_cond_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "arm_comparison_operator"
+		       [(match_operand 2 "cc_register" "") (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_32BIT"
+  "*
+  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
+    {
+      arm_ccfsm_state += 2;
+      return \"\";
+    }
+  return \"b%d1\\t%l0\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "type" "branch")]
+)
+
+(define_insn "*arm_cond_branch_reversed"
+  [(set (pc)
+	(if_then_else (match_operator 1 "arm_comparison_operator"
+		       [(match_operand 2 "cc_register" "") (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_32BIT"
+  "*
+  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
+    {
+      arm_ccfsm_state += 2;
+      return \"\";
+    }
+  return \"b%D1\\t%l0\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "type" "branch")]
+)
+
+
+
+; scc insns
+
+(define_expand "cstore_cc"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "" [(match_operand 2 "" "")
+				 (match_operand 3 "" "")]))]
+  "TARGET_32BIT"
+  "operands[2] = arm_gen_compare_reg (GET_CODE (operands[1]),
+				      operands[2], operands[3]);
+   operands[3] = const0_rtx;"
+)
+
+(define_insn "*mov_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
+  "TARGET_ARM"
+  "mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*mov_negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_ARM"
+  "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*mov_notscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_ARM"
+  "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
+   (set_attr "length" "8")]
+)
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand:SI 2 "s_register_operand" "")
+	  (match_operand:SI 3 "reg_or_int_operand" "")]))]
+  "TARGET_32BIT || TARGET_THUMB1"
+  "{
+  rtx op3, scratch, scratch2;
+
+  if (!TARGET_THUMB1)
+    {
+      if (!arm_add_operand (operands[3], SImode))
+	operands[3] = force_reg (SImode, operands[3]);
+      emit_insn (gen_cstore_cc (operands[0], operands[1],
+				operands[2], operands[3]));
+      DONE;
+    }
+
+  if (operands[3] == const0_rtx)
+    {
+      switch (GET_CODE (operands[1]))
+	{
+	case EQ:
+	  emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], operands[2]));
+	  break;
+
+	case NE:
+	  emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], operands[2]));
+	  break;
+
+	case LE:
+          scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx,
+				  NULL_RTX, 0, OPTAB_WIDEN);
+          scratch = expand_binop (SImode, ior_optab, operands[2], scratch,
+				  NULL_RTX, 0, OPTAB_WIDEN);
+          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+			operands[0], 1, OPTAB_WIDEN);
+	  break;
+
+        case GE:
+          scratch = expand_unop (SImode, one_cmpl_optab, operands[2],
+				 NULL_RTX, 1);
+          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+			NULL_RTX, 1, OPTAB_WIDEN);
+          break;
+
+        case GT:
+          scratch = expand_binop (SImode, ashr_optab, operands[2],
+				  GEN_INT (31), NULL_RTX, 0, OPTAB_WIDEN);
+          scratch = expand_binop (SImode, sub_optab, scratch, operands[2],
+				  NULL_RTX, 0, OPTAB_WIDEN);
+          expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0],
+			0, OPTAB_WIDEN);
+          break;
+
+	/* LT is handled by generic code.  No need for unsigned with 0.  */
+	default:
+	  FAIL;
+	}
+      DONE;
+    }
+
+  switch (GET_CODE (operands[1]))
+    {
+    case EQ:
+      scratch = expand_binop (SImode, sub_optab, operands[2], operands[3],
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], scratch));
+      break;
+
+    case NE:
+      scratch = expand_binop (SImode, sub_optab, operands[2], operands[3],
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], scratch));
+      break;
+
+    case LE:
+      op3 = force_reg (SImode, operands[3]);
+
+      scratch = expand_binop (SImode, lshr_optab, operands[2], GEN_INT (31),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+      scratch2 = expand_binop (SImode, ashr_optab, op3, GEN_INT (31),
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2,
+					  op3, operands[2]));
+      break;
+
+    case GE:
+      op3 = operands[3];
+      if (!thumb1_cmp_operand (op3, SImode))
+        op3 = force_reg (SImode, op3);
+      scratch = expand_binop (SImode, ashr_optab, operands[2], GEN_INT (31),
+			      NULL_RTX, 0, OPTAB_WIDEN);
+      scratch2 = expand_binop (SImode, lshr_optab, op3, GEN_INT (31),
+			       NULL_RTX, 1, OPTAB_WIDEN);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2,
+					  operands[2], op3));
+      break;
+
+    case LEU:
+      op3 = force_reg (SImode, operands[3]);
+      scratch = force_reg (SImode, const0_rtx);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
+					  op3, operands[2]));
+      break;
+
+    case GEU:
+      op3 = operands[3];
+      if (!thumb1_cmp_operand (op3, SImode))
+        op3 = force_reg (SImode, op3);
+      scratch = force_reg (SImode, const0_rtx);
+      emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch,
+					  operands[2], op3));
+      break;
+
+    case LTU:
+      op3 = operands[3];
+      if (!thumb1_cmp_operand (op3, SImode))
+        op3 = force_reg (SImode, op3);
+      scratch = gen_reg_rtx (SImode);
+      emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], operands[2], op3));
+      break;
+
+    case GTU:
+      op3 = force_reg (SImode, operands[3]);
+      scratch = gen_reg_rtx (SImode);
+      emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], op3, operands[2]));
+      break;
+
+    /* No good sequences for GT, LT.  */
+    default:
+      FAIL;
+    }
+  DONE;
+}")
+
+(define_expand "cstoresf4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand:SF 2 "s_register_operand" "")
+	  (match_operand:SF 3 "arm_float_compare_operand" "")]))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "emit_insn (gen_cstore_cc (operands[0], operands[1],
+			     operands[2], operands[3])); DONE;"
+)
+
+(define_expand "cstoredf4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand:DF 2 "s_register_operand" "")
+	  (match_operand:DF 3 "arm_float_compare_operand" "")]))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE"
+  "emit_insn (gen_cstore_cc (operands[0], operands[1],
+			     operands[2], operands[3])); DONE;"
+)
+
+(define_expand "cstoredi4"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand:DI 2 "cmpdi_operand" "")
+	  (match_operand:DI 3 "cmpdi_operand" "")]))]
+  "TARGET_32BIT"
+  "{
+     rtx swap = NULL_RTX;
+     enum rtx_code code = GET_CODE (operands[1]);
+
+     /* We should not have two constants.  */
+     gcc_assert (GET_MODE (operands[2]) == DImode
+		 || GET_MODE (operands[3]) == DImode);
+
+    /* Flip unimplemented DImode comparisons to a form that
+       arm_gen_compare_reg can handle.  */
+     switch (code)
+     {
+     case GT:
+       swap = gen_rtx_LT (VOIDmode, operands[3], operands[2]); break;
+     case LE:
+       swap = gen_rtx_GE (VOIDmode, operands[3], operands[2]); break;
+     case GTU:
+       swap = gen_rtx_LTU (VOIDmode, operands[3], operands[2]); break;
+     case LEU:
+       swap = gen_rtx_GEU (VOIDmode, operands[3], operands[2]); break;
+     default:
+       break;
+     }
+     if (swap)
+       emit_insn (gen_cstore_cc (operands[0], swap, operands[3],
+		      	         operands[2]));
+     else
+       emit_insn (gen_cstore_cc (operands[0], operands[1], operands[2],
+		      	         operands[3]));
+     DONE;
+   }"
+)
+
+(define_expand "cstoresi_eq0_thumb1"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (eq:SI (match_operand:SI 1 "s_register_operand" "")
+		 (const_int 0)))
+     (clobber (match_dup:SI 2))])]
+  "TARGET_THUMB1"
+  "operands[2] = gen_reg_rtx (SImode);"
+)
+
+(define_expand "cstoresi_ne0_thumb1"
+  [(parallel
+    [(set (match_operand:SI 0 "s_register_operand" "")
+	  (ne:SI (match_operand:SI 1 "s_register_operand" "")
+		 (const_int 0)))
+     (clobber (match_dup:SI 2))])]
+  "TARGET_THUMB1"
+  "operands[2] = gen_reg_rtx (SImode);"
+)
+
+(define_insn "*cstoresi_eq0_thumb1_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=&l,l")
+	(eq:SI (match_operand:SI 1 "s_register_operand" "l,0")
+	       (const_int 0)))
+   (clobber (match_operand:SI 2 "s_register_operand" "=X,l"))]
+  "TARGET_THUMB1"
+  "@
+   neg\\t%0, %1\;adc\\t%0, %0, %1
+   neg\\t%2, %1\;adc\\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "*cstoresi_ne0_thumb1_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(ne:SI (match_operand:SI 1 "s_register_operand" "0")
+	       (const_int 0)))
+   (clobber (match_operand:SI 2 "s_register_operand" "=l"))]
+  "TARGET_THUMB1"
+  "sub\\t%2, %1, #1\;sbc\\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+;; Used as part of the expansion of thumb ltu and gtu sequences
+(define_insn "cstoresi_nltu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+        (neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+			(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))]
+  "TARGET_THUMB1"
+  "cmp\\t%1, %2\;sbc\\t%0, %0, %0"
+  [(set_attr "length" "4")]
+)
+
+(define_insn_and_split "cstoresi_ltu_thumb1"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+        (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+		(match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")))]
+  "TARGET_THUMB1"
+  "#"
+  "TARGET_THUMB1"
+  [(set (match_dup 3)
+	(neg:SI (ltu:SI (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:SI (match_dup 3)))]
+  "operands[3] = gen_reg_rtx (SImode);"
+  [(set_attr "length" "4")]
+)
+
+;; Used as part of the expansion of thumb les sequence.
+(define_insn "thumb1_addsi3_addgeu"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+        (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0")
+			  (match_operand:SI 2 "s_register_operand" "l"))
+		 (geu:SI (match_operand:SI 3 "s_register_operand" "l")
+			 (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))]
+  "TARGET_THUMB1"
+  "cmp\\t%3, %4\;adc\\t%0, %1, %2"
+  [(set_attr "length" "4")]
+)
+
+
+;; Conditional move insns
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operand 1 "arm_comparison_operator" "")
+			 (match_operand:SI 2 "arm_not_operand" "")
+			 (match_operand:SI 3 "arm_not_operand" "")))]
+  "TARGET_32BIT"
+  "
+  {
+    enum rtx_code code = GET_CODE (operands[1]);
+    rtx ccreg;
+
+    if (code == UNEQ || code == LTGT)
+      FAIL;
+
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(if_then_else:SF (match_operand 1 "arm_comparison_operator" "")
+			 (match_operand:SF 2 "s_register_operand" "")
+			 (match_operand:SF 3 "nonmemory_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "
+  {
+    enum rtx_code code = GET_CODE (operands[1]);
+    rtx ccreg;
+
+    if (code == UNEQ || code == LTGT)
+      FAIL;
+
+    /* When compiling for SOFT_FLOAT, ensure both arms are in registers. 
+       Otherwise, ensure it is a valid FP add operand */
+    if ((!(TARGET_HARD_FLOAT && TARGET_FPA))
+        || (!arm_float_add_operand (operands[3], SFmode)))
+      operands[3] = force_reg (SFmode, operands[3]);
+
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_expand "movdfcc"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(if_then_else:DF (match_operand 1 "arm_comparison_operator" "")
+			 (match_operand:DF 2 "s_register_operand" "")
+			 (match_operand:DF 3 "arm_float_add_operand" "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FPA || TARGET_VFP_DOUBLE)"
+  "
+  {
+    enum rtx_code code = GET_CODE (operands[1]);
+    rtx ccreg;
+
+    if (code == UNEQ || code == LTGT)
+      FAIL;
+
+    ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  }"
+)
+
+(define_insn "*movsicc_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 3 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K")
+	 (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))]
+  "TARGET_ARM"
+  "@
+   mov%D3\\t%0, %2
+   mvn%D3\\t%0, #%B2
+   mov%d3\\t%0, %1
+   mvn%d3\\t%0, #%B1
+   mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+   mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+   mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
+  [(set_attr "length" "4,4,4,4,8,8,8,8")
+   (set_attr "conds" "use")
+   (set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn")]
+)
+
+(define_insn "*movsfcc_soft_insn"
+  [(set (match_operand:SF 0 "s_register_operand" "=r,r")
+	(if_then_else:SF (match_operator 3 "arm_comparison_operator"
+			  [(match_operand 4 "cc_register" "") (const_int 0)])
+			 (match_operand:SF 1 "s_register_operand" "0,r")
+			 (match_operand:SF 2 "s_register_operand" "r,0")))]
+  "TARGET_ARM && TARGET_SOFT_FLOAT"
+  "@
+   mov%D3\\t%0, %2
+   mov%d3\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")]
+)
+
+
+;; Jump and linkage insns
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_EITHER"
+  ""
+)
+
+(define_insn "*arm_jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_32BIT"
+  "*
+  {
+    if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return \"b%?\\t%l0\";
+  }
+  "
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*thumb_jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_THUMB1"
+  "*
+  if (get_attr_length (insn) == 2)
+    return \"b\\t%l0\";
+  return \"bl\\t%l0\\t%@ far jump\";
+  "
+  [(set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "yes")
+	    (const_string "no")))
+   (set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+		 (le (minus (match_dup 0) (pc)) (const_int 2048)))
+  	    (const_int 2)
+	    (const_int 4)))]
+)
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+	            (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  {
+    rtx callee, pat;
+    
+    /* In an untyped call, we can get NULL for operand 2.  */
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+      
+    /* Decide if we should generate indirect calls by loading the
+       32-bit address of the callee into a register before performing the
+       branch and link.  */
+    callee = XEXP (operands[0], 0);
+    if (GET_CODE (callee) == SYMBOL_REF
+	? arm_is_long_call_p (SYMBOL_REF_DECL (callee))
+	: !REG_P (callee))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+
+    pat = gen_call_internal (operands[0], operands[1], operands[2]);
+    arm_emit_call_insn (pat, XEXP (operands[0], 0));
+    DONE;
+  }"
+)
+
+(define_expand "call_internal"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+	            (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])])
+
+(define_insn "*call_reg_armv5"
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && arm_arch5"
+  "blx%?\\t%0"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_reg_arm"
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5"
+  "*
+  return output_call (operands);
+  "
+  ;; length is worst case, normally it is only two
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+
+;; Note: not used for armv5+ because the sequence used (ldr pc, ...) is not
+;; considered a function call by the branch predictor of some cores (PR40887).
+;; Falls back to blx rN (*call_reg_armv5).
+
+(define_insn "*call_mem"
+  [(call (mem:SI (match_operand:SI 0 "call_memory_operand" "m"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5"
+  "*
+  return output_call_mem (operands);
+  "
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_reg_thumb1_v5"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && arm_arch5"
+  "blx\\t%0"
+  [(set_attr "length" "2")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_reg_thumb1"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && !arm_arch5"
+  "*
+  {
+    if (!TARGET_CALLER_INTERWORKING)
+      return thumb_call_via_reg (operands[0]);
+    else if (operands[1] == const0_rtx)
+      return \"bl\\t%__interwork_call_via_%0\";
+    else if (frame_pointer_needed)
+      return \"bl\\t%__interwork_r7_call_via_%0\";
+    else
+      return \"bl\\t%__interwork_r11_call_via_%0\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand       0 "" "")
+	           (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])]
+  "TARGET_EITHER"
+  "
+  {
+    rtx pat, callee;
+    
+    /* In an untyped call, we can get NULL for operand 2.  */
+    if (operands[3] == 0)
+      operands[3] = const0_rtx;
+      
+    /* Decide if we should generate indirect calls by loading the
+       32-bit address of the callee into a register before performing the
+       branch and link.  */
+    callee = XEXP (operands[1], 0);
+    if (GET_CODE (callee) == SYMBOL_REF
+	? arm_is_long_call_p (SYMBOL_REF_DECL (callee))
+	: !REG_P (callee))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+
+    pat = gen_call_value_internal (operands[0], operands[1],
+				   operands[2], operands[3]);
+    arm_emit_call_insn (pat, XEXP (operands[1], 0));
+    DONE;
+  }"
+)
+
+(define_expand "call_value_internal"
+  [(parallel [(set (match_operand       0 "" "")
+	           (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNUM))])])
+
+(define_insn "*call_value_reg_armv5"
+  [(set (match_operand 0 "" "")
+        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && arm_arch5"
+  "blx%?\\t%1"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_arm"
+  [(set (match_operand 0 "" "")
+        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5"
+  "*
+  return output_call (&operands[1]);
+  "
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+;; Note: see *call_mem
+
+(define_insn "*call_value_mem"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "call_memory_operand" "m"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))"
+  "*
+  return output_call_mem (&operands[1]);
+  "
+  [(set_attr "length" "12")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_thumb1_v5"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && arm_arch5"
+  "blx\\t%1"
+  [(set_attr "length" "2")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_thumb1"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1 && !arm_arch5"
+  "*
+  {
+    if (!TARGET_CALLER_INTERWORKING)
+      return thumb_call_via_reg (operands[1]);
+    else if (operands[2] == const0_rtx)
+      return \"bl\\t%__interwork_call_via_%1\";
+    else if (frame_pointer_needed)
+      return \"bl\\t%__interwork_r7_call_via_%1\";
+    else
+      return \"bl\\t%__interwork_r11_call_via_%1\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+;; Allow calls to SYMBOL_REFs specially as they are not valid general addresses
+;; The 'a' causes the operand to be treated as an address, i.e. no '#' output.
+
+(define_insn "*call_symbol"
+  [(call (mem:SI (match_operand:SI 0 "" ""))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_32BIT
+   && (GET_CODE (operands[0]) == SYMBOL_REF)
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
+  "*
+  {
+    return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_value_symbol"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "" ""))
+	(match_operand:SI 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_32BIT
+   && (GET_CODE (operands[1]) == SYMBOL_REF)
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
+  "*
+  {
+    return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\";
+  }"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_insn"
+  [(call (mem:SI (match_operand:SI 0 "" ""))
+	 (match_operand:SI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))"
+  "bl\\t%a0"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
+(define_insn "*call_value_insn"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "" ""))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB1
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))"
+  "bl\\t%a1"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
+;; We may also be able to do sibcalls for Thumb, but it's much harder...
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (return)
+	      (use (match_operand 2 "" ""))])]
+  "TARGET_32BIT"
+  "
+  {
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+  }"
+)
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (return)
+	      (use (match_operand 3 "" ""))])]
+  "TARGET_32BIT"
+  "
+  {
+    if (operands[3] == NULL_RTX)
+      operands[3] = const0_rtx;
+  }"
+)
+
+(define_insn "*sibcall_insn"
+ [(call (mem:SI (match_operand:SI 0 "" "X"))
+	(match_operand 1 "" ""))
+  (return)
+  (use (match_operand 2 "" ""))]
+  "TARGET_32BIT && GET_CODE (operands[0]) == SYMBOL_REF"
+  "*
+  return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\";
+  "
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*sibcall_value_insn"
+ [(set (match_operand 0 "" "")
+       (call (mem:SI (match_operand:SI 1 "" "X"))
+	     (match_operand 2 "" "")))
+  (return)
+  (use (match_operand 3 "" ""))]
+  "TARGET_32BIT && GET_CODE (operands[1]) == SYMBOL_REF"
+  "*
+  return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\";
+  "
+  [(set_attr "type" "call")]
+)
+
+(define_expand "return"
+  [(return)]
+  "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
+  "")
+
+;; Often the return insn will be the same as loading from memory, so set attr
+(define_insn "*arm_return"
+  [(return)]
+  "TARGET_ARM && USE_RETURN_INSN (FALSE)"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+  }"
+  [(set_attr "type" "load1")
+   (set_attr "length" "12")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*cond_return"
+  [(set (pc)
+        (if_then_else (match_operator 0 "arm_comparison_operator"
+		       [(match_operand 1 "cc_register" "") (const_int 0)])
+                      (return)
+                      (pc)))]
+  "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (operands[0], TRUE, FALSE);
+  }"
+  [(set_attr "conds" "use")
+   (set_attr "length" "12")
+   (set_attr "type" "load1")]
+)
+
+(define_insn "*cond_return_inverted"
+  [(set (pc)
+        (if_then_else (match_operator 0 "arm_comparison_operator"
+		       [(match_operand 1 "cc_register" "") (const_int 0)])
+                      (pc)
+		      (return)))]
+  "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+  "*
+  {
+    if (arm_ccfsm_state == 2)
+      {
+        arm_ccfsm_state += 2;
+        return \"\";
+      }
+    return output_return_instruction (operands[0], TRUE, TRUE);
+  }"
+  [(set_attr "conds" "use")
+   (set_attr "length" "12")
+   (set_attr "type" "load1")]
+)
+
+;; Generate a sequence of instructions to determine if the processor is
+;; in 26-bit or 32-bit mode, and return the appropriate return address
+;; mask.
+
+(define_expand "return_addr_mask"
+  [(set (match_dup 1)
+      (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH)
+		       (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+      (if_then_else:SI (eq (match_dup 1) (const_int 0))
+		       (const_int -1)
+		       (const_int 67108860)))] ; 0x03fffffc
+  "TARGET_ARM"
+  "
+  operands[1] = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+  ")
+
+(define_insn "*check_arch2"
+  [(set (match_operand:CC_NOOV 0 "cc_register" "")
+      (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH)
+		       (const_int 0)))]
+  "TARGET_ARM"
+  "teq\\t%|r0, %|r0\;teq\\t%|pc, %|pc"
+  [(set_attr "length" "8")
+   (set_attr "conds" "set")]
+)
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "TARGET_EITHER"
+  "
+  {
+    int i;
+    rtx par = gen_rtx_PARALLEL (VOIDmode,
+				rtvec_alloc (XVECLEN (operands[2], 0)));
+    rtx addr = gen_reg_rtx (Pmode);
+    rtx mem;
+    int size = 0;
+
+    emit_move_insn (addr, XEXP (operands[1], 0));
+    mem = change_address (operands[1], BLKmode, addr);
+
+    for (i = 0; i < XVECLEN (operands[2], 0); i++)
+      {
+	rtx src = SET_SRC (XVECEXP (operands[2], 0, i));
+
+	/* Default code only uses r0 as a return value, but we could
+	   be using anything up to 4 registers.  */
+	if (REGNO (src) == R0_REGNUM)
+	  src = gen_rtx_REG (TImode, R0_REGNUM);
+
+        XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, src,
+						 GEN_INT (size));
+        size += GET_MODE_SIZE (GET_MODE (src));
+      }
+
+    emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL,
+				    const0_rtx));
+
+    size = 0;
+
+    for (i = 0; i < XVECLEN (par, 0); i++)
+      {
+	HOST_WIDE_INT offset = 0;
+	rtx reg = XEXP (XVECEXP (par, 0, i), 0);
+
+	if (size != 0)
+	  emit_move_insn (addr, plus_constant (addr, size));
+
+	mem = change_address (mem, GET_MODE (reg), NULL);
+	if (REGNO (reg) == R0_REGNUM)
+	  {
+	    /* On thumb we have to use a write-back instruction.  */
+	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, addr,
+ 		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+	    size = TARGET_ARM ? 16 : 0;
+	  }
+	else
+	  {
+	    emit_move_insn (mem, reg);
+	    size = GET_MODE_SIZE (GET_MODE (reg));
+	  }
+      }
+
+    /* The optimizer does not know that the call sets the function value
+       registers we stored in the result block.  We avoid problems by
+       claiming that all hard registers are used and clobbered at this
+       point.  */
+    emit_insn (gen_blockage ());
+
+    DONE;
+  }"
+)
+
+(define_expand "untyped_return"
+  [(match_operand:BLK 0 "memory_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_EITHER"
+  "
+  {
+    int i;
+    rtx addr = gen_reg_rtx (Pmode);
+    rtx mem;
+    int size = 0;
+
+    emit_move_insn (addr, XEXP (operands[0], 0));
+    mem = change_address (operands[0], BLKmode, addr);
+
+    for (i = 0; i < XVECLEN (operands[1], 0); i++)
+      {
+	HOST_WIDE_INT offset = 0;
+	rtx reg = SET_DEST (XVECEXP (operands[1], 0, i));
+
+	if (size != 0)
+	  emit_move_insn (addr, plus_constant (addr, size));
+
+	mem = change_address (mem, GET_MODE (reg), NULL);
+	if (REGNO (reg) == R0_REGNUM)
+	  {
+	    /* On thumb we have to use a write-back instruction.  */
+	    emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, addr,
+ 		       TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+	    size = TARGET_ARM ? 16 : 0;
+	  }
+	else
+	  {
+	    emit_move_insn (reg, mem);
+	    size = GET_MODE_SIZE (GET_MODE (reg));
+	  }
+      }
+
+    /* Emit USE insns before the return.  */
+    for (i = 0; i < XVECLEN (operands[1], 0); i++)
+      emit_use (SET_DEST (XVECEXP (operands[1], 0, i)));
+
+    /* Construct the return.  */
+    expand_naked_return ();
+
+    DONE;
+  }"
+)
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_BLOCKAGE)]
+  "TARGET_EITHER"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "block")]
+)
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "s_register_operand" "")	; index to jump on
+   (match_operand:SI 1 "const_int_operand" "")	; lower bound
+   (match_operand:SI 2 "const_int_operand" "")	; total range
+   (match_operand:SI 3 "" "")			; table label
+   (match_operand:SI 4 "" "")]			; Out of range label
+  "TARGET_32BIT || optimize_size || flag_pic"
+  "
+  {
+    enum insn_code code;
+    if (operands[1] != const0_rtx)
+      {
+	rtx reg = gen_reg_rtx (SImode);
+
+	emit_insn (gen_addsi3 (reg, operands[0],
+			       GEN_INT (-INTVAL (operands[1]))));
+	operands[0] = reg;
+      }
+
+    if (TARGET_ARM)
+      code = CODE_FOR_arm_casesi_internal;
+    else if (TARGET_THUMB1)
+      code = CODE_FOR_thumb1_casesi_internal_pic;
+    else if (flag_pic)
+      code = CODE_FOR_thumb2_casesi_internal_pic;
+    else
+      code = CODE_FOR_thumb2_casesi_internal;
+
+    if (!insn_data[(int) code].operand[1].predicate(operands[2], SImode))
+      operands[2] = force_reg (SImode, operands[2]);
+
+    emit_jump_insn (GEN_FCN ((int) code) (operands[0], operands[2],
+					  operands[3], operands[4]));
+    DONE;
+  }"
+)
+
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "arm_casesi_internal"
+  [(parallel [(set (pc)
+	       (if_then_else
+		(leu (match_operand:SI 0 "s_register_operand" "r")
+		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
+		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
+				 (label_ref (match_operand 2 "" ""))))
+		(label_ref (match_operand 3 "" ""))))
+	      (clobber (reg:CC CC_REGNUM))
+	      (use (label_ref (match_dup 2)))])]
+  "TARGET_ARM"
+  "*
+    if (flag_pic)
+      return \"cmp\\t%0, %1\;addls\\t%|pc, %|pc, %0, asl #2\;b\\t%l3\";
+    return   \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_expand "thumb1_casesi_internal_pic"
+  [(match_operand:SI 0 "s_register_operand" "")
+   (match_operand:SI 1 "thumb1_cmp_operand" "")
+   (match_operand 2 "" "")
+   (match_operand 3 "" "")]
+  "TARGET_THUMB1"
+  {
+    rtx reg0;
+    rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[1]);
+    emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[1],
+				    operands[3]));
+    reg0 = gen_rtx_REG (SImode, 0);
+    emit_move_insn (reg0, operands[0]);
+    emit_jump_insn (gen_thumb1_casesi_dispatch (operands[2]/*, operands[3]*/));
+    DONE;
+  }
+)
+
+(define_insn "thumb1_casesi_dispatch"
+  [(parallel [(set (pc) (unspec [(reg:SI 0)
+				 (label_ref (match_operand 0 "" ""))
+;;				 (label_ref (match_operand 1 "" ""))
+]
+			 UNSPEC_THUMB1_CASESI))
+	      (clobber (reg:SI IP_REGNUM))
+              (clobber (reg:SI LR_REGNUM))])]
+  "TARGET_THUMB1"
+  "* return thumb1_output_casesi(operands);"
+  [(set_attr "length" "4")]
+)
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "s_register_operand" ""))]
+  "TARGET_EITHER"
+  "
+  /* Thumb-2 doesn't have mov pc, reg.  Explicitly set the low bit of the
+     address and use bx.  */
+  if (TARGET_THUMB2)
+    {
+      rtx tmp;
+      tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_iorsi3 (tmp, operands[0], GEN_INT(1)));
+      operands[0] = tmp;
+    }
+  "
+)
+
+;; NB Never uses BX.
+(define_insn "*arm_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "s_register_operand" "r"))]
+  "TARGET_ARM"
+  "mov%?\\t%|pc, %0\\t%@ indirect register jump"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*load_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "memory_operand" "m"))]
+  "TARGET_ARM"
+  "ldr%?\\t%|pc, %0\\t%@ indirect memory jump"
+  [(set_attr "type" "load1")
+   (set_attr "pool_range" "4096")
+   (set_attr "neg_pool_range" "4084")
+   (set_attr "predicable" "yes")]
+)
+
+;; NB Never uses BX.
+(define_insn "*thumb1_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "l*r"))]
+  "TARGET_THUMB1"
+  "mov\\tpc, %0"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "2")]
+)
+
+
+;; Misc insns
+
+(define_insn "nop"
+  [(const_int 0)]
+  "TARGET_EITHER"
+  "*
+  if (TARGET_UNIFIED_ASM)
+    return \"nop\";
+  if (TARGET_ARM)
+    return \"mov%?\\t%|r0, %|r0\\t%@ nop\";
+  return  \"mov\\tr8, r8\";
+  "
+  [(set (attr "length")
+	(if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 2)
+		      (const_int 4)))]
+)
+
+
+;; Patterns to allow combination of arithmetic, cond code and shifts
+
+(define_insn "*arith_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
+        (match_operator:SI 1 "shiftable_operator"
+          [(match_operator:SI 3 "shift_operator"
+             [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
+              (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
+           (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
+  "TARGET_32BIT"
+  "%i1%?\\t%0, %2, %4%S3"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "4")
+   (set_attr "arch" "a,t2,t2,a")
+   ;; Thumb2 doesn't allow the stack pointer to be used for 
+   ;; operand1 for all operations other than add and sub. In this case 
+   ;; the minus operation is a candidate for an rsub and hence needs
+   ;; to be disabled.
+   ;; We have to make sure to disable the fourth alternative if
+   ;; the shift_operator is MULT, since otherwise the insn will
+   ;; also match a multiply_accumulate pattern and validate_change
+   ;; will allow a replacement of the constant with a register
+   ;; despite the checks done in shift_operator.
+   (set_attr_alternative "insn_enabled"
+			 [(const_string "yes")
+			  (if_then_else
+			   (match_operand:SI 1 "add_operator" "")
+			   (const_string "yes") (const_string "no"))
+			  (const_string "yes")
+			  (if_then_else
+			   (match_operand:SI 3 "mult_operator" "")
+			   (const_string "no") (const_string "yes"))])
+   (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(match_operator:SI 2 "shiftable_operator"
+	   [(match_operator:SI 3 "shift_operator"
+	     [(match_operand:SI 4 "s_register_operand" "")
+	      (match_operand:SI 5 "reg_or_int_operand" "")])
+	    (match_operand:SI 6 "s_register_operand" "")])
+	  (match_operand:SI 7 "arm_rhs_operand" "")]))
+   (clobber (match_operand:SI 8 "s_register_operand" ""))]
+  "TARGET_32BIT"
+  [(set (match_dup 8)
+	(match_op_dup 2 [(match_op_dup 3 [(match_dup 4) (match_dup 5)])
+			 (match_dup 6)]))
+   (set (match_dup 0)
+	(match_op_dup 1 [(match_dup 8) (match_dup 7)]))]
+  "")
+
+(define_insn "*arith_shiftsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+	 (match_operator:SI 1 "shiftable_operator"
+	  [(match_operator:SI 3 "shift_operator"
+	    [(match_operand:SI 4 "s_register_operand" "r,r")
+	     (match_operand:SI 5 "shift_amount_operand" "M,r")])
+	   (match_operand:SI 2 "s_register_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_op_dup 1 [(match_op_dup 3 [(match_dup 4) (match_dup 5)])
+			 (match_dup 2)]))]
+  "TARGET_32BIT"
+  "%i1%.\\t%0, %2, %4%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "4")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*arith_shiftsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+	 (match_operator:SI 1 "shiftable_operator"
+	  [(match_operator:SI 3 "shift_operator"
+	    [(match_operand:SI 4 "s_register_operand" "r,r")
+	     (match_operand:SI 5 "shift_amount_operand" "M,r")])
+	   (match_operand:SI 2 "s_register_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "%i1%.\\t%0, %2, %4%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "4")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*sub_shiftsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		  (match_operator:SI 2 "shift_operator"
+		   [(match_operand:SI 3 "s_register_operand" "r,r")
+		    (match_operand:SI 4 "shift_amount_operand" "M,r")])))]
+  "TARGET_32BIT"
+  "sub%?\\t%0, %1, %3%S2"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "3")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*sub_shiftsi_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		   (match_operator:SI 2 "shift_operator"
+		    [(match_operand:SI 3 "s_register_operand" "r,r")
+		     (match_operand:SI 4 "shift_amount_operand" "M,rM")]))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(minus:SI (match_dup 1)
+		  (match_op_dup 2 [(match_dup 3) (match_dup 4)])))]
+  "TARGET_32BIT"
+  "sub%.\\t%0, %1, %3%S2"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "3")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+(define_insn "*sub_shiftsi_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	 (minus:SI (match_operand:SI 1 "s_register_operand" "r,r")
+		   (match_operator:SI 2 "shift_operator"
+		    [(match_operand:SI 3 "s_register_operand" "r,r")
+		     (match_operand:SI 4 "shift_amount_operand" "M,rM")]))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r,r"))]
+  "TARGET_32BIT"
+  "sub%.\\t%0, %1, %3%S2"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "3")
+   (set_attr "arch" "32,a")
+   (set_attr "type" "alu_shift,alu_shift_reg")])
+
+
+(define_insn "*and_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 3 "cc_register" "") (const_int 0)])
+		(match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_ARM"
+  "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*ior_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(ior:SI (match_operator:SI 2 "arm_comparison_operator"
+		 [(match_operand 3 "cc_register" "") (const_int 0)])
+		(match_operand:SI 1 "s_register_operand" "0,?r")))]
+  "TARGET_ARM"
+  "@
+   orr%d2\\t%0, %1, #1
+   mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")]
+)
+
+; A series of splitters for the compare_scc pattern below.  Note that
+; order is important.
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(lt:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ge:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (not:SI (match_dup 1)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 31)))])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(eq:SI (match_operand:SI 1 "s_register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (const_int 1) (match_dup 1)))
+     (set (match_dup 0)
+	  (minus:SI (const_int 1) (match_dup 1)))])
+   (cond_exec (ltu:CC (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 0)))])
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ne:SI (match_operand:SI 1 "s_register_operand" "")
+	       (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))])
+   (cond_exec (ne:CC (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))]
+{
+  operands[3] = GEN_INT (-INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(ne:SI (match_operand:SI 1 "s_register_operand" "")
+	       (match_operand:SI 2 "arm_add_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel
+    [(set (reg:CC_NOOV CC_REGNUM)
+	  (compare:CC_NOOV (minus:SI (match_dup 1) (match_dup 2))
+			   (const_int 0)))
+     (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (cond_exec (ne:CC_NOOV (reg:CC_NOOV CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))])
+
+(define_insn_and_split "*compare_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand:SI 2 "s_register_operand" "r,r")
+	  (match_operand:SI 3 "arm_add_operand" "rI,L")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 3)))
+   (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))
+   (cond_exec (match_dup 5) (set (match_dup 0) (const_int 1)))]
+{
+  rtx tmp1;
+  enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					   operands[2], operands[3]);
+  enum rtx_code rc = GET_CODE (operands[1]);
+
+  tmp1 = gen_rtx_REG (mode, CC_REGNUM);
+
+  operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx);
+  if (mode == CCFPmode || mode == CCFPEmode)
+    rc = reverse_condition_maybe_unordered (rc);
+  else
+    rc = reverse_condition (rc);
+  operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx);
+})
+
+;; Attempt to improve the sequence generated by the compare_scc splitters
+;; not to use conditional execution.
+(define_peephole2
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "arm_rhs_operand" "")))
+   (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+   (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+	      (set (match_dup 0) (const_int 1)))
+   (match_scratch:SI 3 "r")]
+  "TARGET_32BIT"
+  [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))
+   (parallel
+    [(set (reg:CC CC_REGNUM)
+	  (compare:CC (const_int 0) (match_dup 3)))
+     (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))])
+   (set (match_dup 0)
+	(plus:SI (plus:SI (match_dup 0) (match_dup 3))
+		 (geu:SI (reg:CC CC_REGNUM) (const_int 0))))])
+
+(define_insn "*cond_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI (match_operator 3 "equality_operator"
+			  [(match_operator 4 "arm_comparison_operator"
+			    [(match_operand 5 "cc_register" "") (const_int 0)])
+			   (const_int 0)])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+			 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))]
+  "TARGET_ARM"
+  "*
+    if (GET_CODE (operands[3]) == NE)
+      {
+        if (which_alternative != 1)
+	  output_asm_insn (\"mov%D4\\t%0, %2\", operands);
+        if (which_alternative != 0)
+	  output_asm_insn (\"mov%d4\\t%0, %1\", operands);
+        return \"\";
+      }
+    if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    if (which_alternative != 1)
+      output_asm_insn (\"mov%d4\\t%0, %2\", operands);
+    return \"\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mov")
+   (set_attr "length" "4,4,8")]
+)
+
+(define_insn "*cond_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (match_operator:SI 5 "shiftable_operator" 
+	 [(match_operator:SI 4 "arm_comparison_operator"
+           [(match_operand:SI 2 "s_register_operand" "r,r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+          (match_operand:SI 1 "s_register_operand" "0,?r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
+      return \"%i5\\t%0, %1, %2, lsr #31\";
+
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (GET_CODE (operands[5]) == AND)
+      output_asm_insn (\"mov%D4\\t%0, #0\", operands);
+    else if (GET_CODE (operands[5]) == MINUS)
+      output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands);
+    else if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    return \"%i5%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_insn "*cond_sub"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
+		  (match_operator:SI 4 "arm_comparison_operator"
+                   [(match_operand:SI 2 "s_register_operand" "r,r")
+		    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    return \"sub%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+;; ??? Is it worth using these conditional patterns in Thumb-2 mode?
+(define_insn "*cmp_ite0"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (if_then_else:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+	  (const_int 0))
+	 (const_int 0)))]
+  "TARGET_ARM"
+  "*
+  {
+    static const char * const opcodes[4][2] =
+    {
+      {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
+       \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
+      {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
+       \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
+      {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
+       \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
+      {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
+       \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
+    };
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+
+    return opcodes[which_alternative][swap];
+  }"
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*cmp_ite1"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (if_then_else:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")])
+	  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_ARM"
+  "*
+  {
+    static const char * const opcodes[4][2] =
+    {
+      {\"cmp\\t%0, %1\;cmp%d4\\t%2, %3\",
+       \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
+      {\"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\",
+       \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
+      {\"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\",
+       \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
+      {\"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\",
+       \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
+    };
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]),
+			      reverse_condition (GET_CODE (operands[4])));
+
+    return opcodes[which_alternative][swap];
+  }"
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*cmp_and"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (and:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+	 (const_int 0)))]
+  "TARGET_ARM"
+  "*
+  {
+    static const char *const opcodes[4][2] =
+    {
+      {\"cmp\\t%2, %3\;cmp%d5\\t%0, %1\",
+       \"cmp\\t%0, %1\;cmp%d4\\t%2, %3\"},
+      {\"cmp\\t%2, %3\;cmn%d5\\t%0, #%n1\",
+       \"cmn\\t%0, #%n1\;cmp%d4\\t%2, %3\"},
+      {\"cmn\\t%2, #%n3\;cmp%d5\\t%0, %1\",
+       \"cmp\\t%0, %1\;cmn%d4\\t%2, #%n3\"},
+      {\"cmn\\t%2, #%n3\;cmn%d5\\t%0, #%n1\",
+       \"cmn\\t%0, #%n1\;cmn%d4\\t%2, #%n3\"}
+    };
+    int swap =
+      comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+
+    return opcodes[which_alternative][swap];
+  }"
+  [(set_attr "conds" "set")
+   (set_attr "predicable" "no")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*cmp_ior"
+  [(set (match_operand 6 "dominant_cc_register" "")
+	(compare
+	 (ior:SI
+	  (match_operator 4 "arm_comparison_operator"
+	   [(match_operand:SI 0 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 1 "arm_add_operand" "rI,L,rI,L")])
+	  (match_operator:SI 5 "arm_comparison_operator"
+	   [(match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	    (match_operand:SI 3 "arm_add_operand" "rI,rI,L,L")]))
+	 (const_int 0)))]
+  "TARGET_ARM"
+  "*
+{
+  static const char *const opcodes[4][2] =
+  {
+    {\"cmp\\t%0, %1\;cmp%D4\\t%2, %3\",
+     \"cmp\\t%2, %3\;cmp%D5\\t%0, %1\"},
+    {\"cmn\\t%0, #%n1\;cmp%D4\\t%2, %3\",
+     \"cmp\\t%2, %3\;cmn%D5\\t%0, #%n1\"},
+    {\"cmp\\t%0, %1\;cmn%D4\\t%2, #%n3\",
+     \"cmn\\t%2, #%n3\;cmp%D5\\t%0, %1\"},
+    {\"cmn\\t%0, #%n1\;cmn%D4\\t%2, #%n3\",
+     \"cmn\\t%2, #%n3\;cmn%D5\\t%0, #%n1\"}
+  };
+  int swap =
+    comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4]));
+
+  return opcodes[which_alternative][swap];
+}
+"
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")]
+)
+
+(define_insn_and_split "*ior_scc_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ior:SI (match_operator:SI 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_add_operand" "rIL")])
+		(match_operator:SI 6 "arm_comparison_operator"
+		 [(match_operand:SI 4 "s_register_operand" "r")
+		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y)
+       != CCmode)"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 7)
+	(compare
+	 (ior:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))]
+  "operands[7]
+     = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6],
+						  DOM_CC_X_OR_Y),
+		    CC_REGNUM);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "16")])
+
+; If the above pattern is followed by a CMP insn, then the compare is 
+; redundant, since we can rework the conditional instruction that follows.
+(define_insn_and_split "*ior_scc_scc_cmp"
+  [(set (match_operand 0 "dominant_cc_register" "")
+	(compare (ior:SI (match_operator:SI 3 "arm_comparison_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_add_operand" "rIL")])
+			 (match_operator:SI 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL")]))
+		 (const_int 0)))
+   (set (match_operand:SI 7 "s_register_operand" "=r")
+	(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 0)
+	(compare
+	 (ior:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))]
+  ""
+  [(set_attr "conds" "set")
+   (set_attr "length" "16")])
+
+(define_insn_and_split "*and_scc_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (match_operator:SI 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_add_operand" "rIL")])
+		(match_operator:SI 6 "arm_comparison_operator"
+		 [(match_operand:SI 4 "s_register_operand" "r")
+		  (match_operand:SI 5 "arm_add_operand" "rIL")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+       != CCmode)"
+  "#"
+  "TARGET_ARM && reload_completed
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+       != CCmode)"
+  [(set (match_dup 7)
+	(compare
+	 (and:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))]
+  "operands[7]
+     = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6],
+						  DOM_CC_X_AND_Y),
+		    CC_REGNUM);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "16")])
+
+; If the above pattern is followed by a CMP insn, then the compare is 
+; redundant, since we can rework the conditional instruction that follows.
+(define_insn_and_split "*and_scc_scc_cmp"
+  [(set (match_operand 0 "dominant_cc_register" "")
+	(compare (and:SI (match_operator:SI 3 "arm_comparison_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_add_operand" "rIL")])
+			 (match_operator:SI 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL")]))
+		 (const_int 0)))
+   (set (match_operand:SI 7 "s_register_operand" "=r")
+	(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+		(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 0)
+	(compare
+	 (and:SI
+	  (match_op_dup 3 [(match_dup 1) (match_dup 2)])
+	  (match_op_dup 6 [(match_dup 4) (match_dup 5)]))
+	 (const_int 0)))
+   (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))]
+  ""
+  [(set_attr "conds" "set")
+   (set_attr "length" "16")])
+
+;; If there is no dominance in the comparison, then we can still save an
+;; instruction in the AND case, since we can know that the second compare
+;; need only zero the value if false (if true, then the value is already
+;; correct).
+(define_insn_and_split "*and_scc_scc_nodom"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
+	(and:SI (match_operator:SI 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r,r,0")
+		  (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")])
+		(match_operator:SI 6 "arm_comparison_operator"
+		 [(match_operand:SI 4 "s_register_operand" "r,r,r")
+		  (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM
+   && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y)
+       == CCmode)"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (match_dup 7) (match_op_dup 8 [(match_dup 4) (match_dup 5)]))
+   (set (match_dup 0)
+	(if_then_else:SI (match_op_dup 6 [(match_dup 7) (const_int 0)])
+			 (match_dup 0)
+			 (const_int 0)))]
+  "operands[7] = gen_rtx_REG (SELECT_CC_MODE (GET_CODE (operands[6]),
+					      operands[4], operands[5]),
+			      CC_REGNUM);
+   operands[8] = gen_rtx_COMPARE (GET_MODE (operands[7]), operands[4],
+				  operands[5]);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "20")])
+
+(define_split
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI
+			  (and:SI (match_operand:SI 0 "s_register_operand" "")
+				  (const_int 1))
+			  (match_operator:SI 1 "arm_comparison_operator"
+			   [(match_operand:SI 2 "s_register_operand" "")
+			    (match_operand:SI 3 "arm_add_operand" "")]))
+			 (const_int 0)))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 4)
+	(ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)])
+		(match_dup 0)))
+   (set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (and:SI (match_dup 4) (const_int 1))
+			 (const_int 0)))]
+  "")
+
+(define_split
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (ior:SI
+			  (match_operator:SI 1 "arm_comparison_operator"
+			   [(match_operand:SI 2 "s_register_operand" "")
+			    (match_operand:SI 3 "arm_add_operand" "")])
+			  (and:SI (match_operand:SI 0 "s_register_operand" "")
+				  (const_int 1)))
+			 (const_int 0)))
+   (clobber (match_operand:SI 4 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 4)
+	(ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)])
+		(match_dup 0)))
+   (set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV (and:SI (match_dup 4) (const_int 1))
+			 (const_int 0)))]
+  "")
+;; ??? The conditional patterns above need checking for Thumb-2 usefulness
+
+(define_insn "*negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
+    return \"mov\\t%0, %1, asr #31\";
+
+  if (GET_CODE (operands[3]) == NE)
+    return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\";
+
+  output_asm_insn (\"cmp\\t%1, %2\", operands);
+  output_asm_insn (\"mov%D3\\t%0, #0\", operands);
+  return \"mvn%d3\\t%0, #0\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_insn "movcond"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+	 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  if (GET_CODE (operands[5]) == LT
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && GET_CODE (operands[1]) == REG)
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"and\\t%0, %1, %3, asr #31\";
+	  return \"ands\\t%0, %1, %3, asr #32\;movcc\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && GET_CODE (operands[2]) == REG)
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"bic\\t%0, %2, %3, asr #31\";
+	  return \"bics\\t%0, %2, %3, asr #32\;movcs\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+
+  if (GET_CODE (operands[5]) == GE
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && GET_CODE (operands[1]) == REG)
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"bic\\t%0, %1, %3, asr #31\";
+	  return \"bics\\t%0, %1, %3, asr #32\;movcs\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && GET_CODE (operands[2]) == REG)
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"and\\t%0, %2, %3, asr #31\";
+	  return \"ands\\t%0, %2, %3, asr #32\;movcc\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+  if (GET_CODE (operands[4]) == CONST_INT
+      && !const_ok_for_arm (INTVAL (operands[4])))
+    output_asm_insn (\"cmn\\t%3, #%n4\", operands);
+  else
+    output_asm_insn (\"cmp\\t%3, %4\", operands);
+  if (which_alternative != 0)
+    output_asm_insn (\"mov%d5\\t%0, %1\", operands);
+  if (which_alternative != 1)
+    output_asm_insn (\"mov%D5\\t%0, %2\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,8,12")]
+)
+
+;; ??? The patterns below need checking for Thumb-2 usefulness.
+
+(define_insn "*ifcompare_plus_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+			 (plus:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 3 "arm_add_operand" "rIL,rIL"))
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_plus_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 5 "cc_register" "") (const_int 0)])
+	 (plus:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	  (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L"))
+	 (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI")))]
+  "TARGET_ARM"
+  "@
+   add%d4\\t%0, %2, %3
+   sub%d4\\t%0, %2, #%n3
+   add%d4\\t%0, %2, %3\;mov%D4\\t%0, %1
+   sub%d4\\t%0, %2, #%n3\;mov%D4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,4,8,8")
+   (set_attr "type" "*,*,*,*")]
+)
+
+(define_insn "*ifcompare_move_plus"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+			 (plus:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r")
+			  (match_operand:SI 3 "arm_add_operand" "rIL,rIL"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_move_plus"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 5 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI")
+	 (plus:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r,r,r")
+	  (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L"))))]
+  "TARGET_ARM"
+  "@
+   add%D4\\t%0, %2, %3
+   sub%D4\\t%0, %2, #%n3
+   add%D4\\t%0, %2, %3\;mov%d4\\t%0, %1
+   sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,4,8,8")
+   (set_attr "type" "*,*,*,*")]
+)
+
+(define_insn "*ifcompare_arith_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (match_operator 9 "arm_comparison_operator"
+			  [(match_operand:SI 5 "s_register_operand" "r")
+			   (match_operand:SI 6 "arm_add_operand" "rIL")])
+			 (match_operator:SI 8 "shiftable_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_rhs_operand" "rI")])
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 3 "s_register_operand" "r")
+			   (match_operand:SI 4 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_insn "*if_arith_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI (match_operator 5 "arm_comparison_operator"
+			  [(match_operand 8 "cc_register" "") (const_int 0)])
+			 (match_operator:SI 6 "shiftable_operator"
+			  [(match_operand:SI 1 "s_register_operand" "r")
+			   (match_operand:SI 2 "arm_rhs_operand" "rI")])
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 3 "s_register_operand" "r")
+			   (match_operand:SI 4 "arm_rhs_operand" "rI")])))]
+  "TARGET_ARM"
+  "%I6%d5\\t%0, %1, %2\;%I7%D5\\t%0, %3, %4"
+  [(set_attr "conds" "use")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*ifcompare_arith_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r,r")
+			   (match_operand:SI 3 "arm_add_operand" "rIL,rIL")])
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_rhs_operand" "rI,rI")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  /* If we have an operation where (op x 0) is the identity operation and
+     the conditional operator is LT or GE and we are comparing against zero and
+     everything is in registers then we can do this in two instructions.  */
+  if (operands[3] == const0_rtx
+      && GET_CODE (operands[7]) != AND
+      && GET_CODE (operands[5]) == REG
+      && GET_CODE (operands[1]) == REG 
+      && REGNO (operands[1]) == REGNO (operands[4])
+      && REGNO (operands[4]) != REGNO (operands[0]))
+    {
+      if (GET_CODE (operands[6]) == LT)
+	return \"and\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\";
+      else if (GET_CODE (operands[6]) == GE)
+	return \"bic\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\";
+    }
+  if (GET_CODE (operands[3]) == CONST_INT
+      && !const_ok_for_arm (INTVAL (operands[3])))
+    output_asm_insn (\"cmn\\t%2, #%n3\", operands);
+  else
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+  output_asm_insn (\"%I7%d6\\t%0, %4, %5\", operands);
+  if (which_alternative != 0)
+    return \"mov%D6\\t%0, %1\";
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_arith_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 4 "arm_comparison_operator"
+			  [(match_operand 6 "cc_register" "") (const_int 0)])
+			 (match_operator:SI 5 "shiftable_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r,r")
+			   (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))]
+  "TARGET_ARM"
+  "@
+   %I5%d4\\t%0, %2, %3
+   %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")
+   (set_attr "type" "*,*")]
+)
+
+(define_insn "*ifcompare_move_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 6 "arm_comparison_operator"
+			  [(match_operand:SI 4 "s_register_operand" "r,r")
+			   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+			 (match_operator:SI 7 "shiftable_operator"
+			  [(match_operand:SI 2 "s_register_operand" "r,r")
+			   (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+  /* If we have an operation where (op x 0) is the identity operation and
+     the conditional operator is LT or GE and we are comparing against zero and
+     everything is in registers then we can do this in two instructions */
+  if (operands[5] == const0_rtx
+      && GET_CODE (operands[7]) != AND
+      && GET_CODE (operands[3]) == REG
+      && GET_CODE (operands[1]) == REG 
+      && REGNO (operands[1]) == REGNO (operands[2])
+      && REGNO (operands[2]) != REGNO (operands[0]))
+    {
+      if (GET_CODE (operands[6]) == GE)
+	return \"and\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\";
+      else if (GET_CODE (operands[6]) == LT)
+	return \"bic\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\";
+    }
+
+  if (GET_CODE (operands[5]) == CONST_INT
+      && !const_ok_for_arm (INTVAL (operands[5])))
+    output_asm_insn (\"cmn\\t%4, #%n5\", operands);
+  else
+    output_asm_insn (\"cmp\\t%4, %5\", operands);
+
+  if (which_alternative != 0)
+    output_asm_insn (\"mov%d6\\t%0, %1\", operands);
+  return \"%I7%D6\\t%0, %2, %3\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_move_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 6 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+	 (match_operator:SI 5 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))]
+  "TARGET_ARM"
+  "@
+   %I5%D4\\t%0, %2, %3
+   %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8")
+   (set_attr "type" "*,*")]
+)
+
+(define_insn "*ifcompare_move_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
+	 (not:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_move_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")
+	 (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))]
+  "TARGET_ARM"
+  "@
+   mvn%D4\\t%0, %2
+   mov%d4\\t%0, %1\;mvn%D4\\t%0, %2
+   mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
+   (set_attr "length" "4,8,8")]
+)
+
+(define_insn "*ifcompare_not_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI 
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (not:SI
+	  (match_operand:SI 2 "s_register_operand" "r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_not_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))]
+  "TARGET_ARM"
+  "@
+   mvn%d4\\t%0, %2
+   mov%D4\\t%0, %1\;mvn%d4\\t%0, %2
+   mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
+   (set_attr "length" "4,8,8")]
+)
+
+(define_insn "*ifcompare_shift_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r,r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+	 (match_operator:SI 7 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_shift_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 6 "cc_register" "") (const_int 0)])
+	 (match_operator:SI 4 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))]
+  "TARGET_ARM"
+  "@
+   mov%d5\\t%0, %2%S4
+   mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4
+   mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
+  [(set_attr "conds" "use")
+   (set_attr "shift" "2")
+   (set_attr "length" "4,8,8")
+   (set_attr "insn" "mov")
+   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*ifcompare_move_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r,r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL,rIL")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
+	 (match_operator:SI 7 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_move_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 6 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")
+	 (match_operator:SI 4 "shift_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r,r,r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")])))]
+  "TARGET_ARM"
+  "@
+   mov%D5\\t%0, %2%S4
+   mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4
+   mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
+  [(set_attr "conds" "use")
+   (set_attr "shift" "2")
+   (set_attr "length" "4,8,8")
+   (set_attr "insn" "mov")
+   (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*ifcompare_shift_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 7 "arm_comparison_operator"
+	  [(match_operand:SI 5 "s_register_operand" "r")
+	   (match_operand:SI 6 "arm_add_operand" "rIL")])
+	 (match_operator:SI 8 "shift_operator"
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "arm_rhs_operand" "rM")])
+	 (match_operator:SI 9 "shift_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r")
+	   (match_operand:SI 4 "arm_rhs_operand" "rM")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_insn "*if_shift_shift"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 8 "cc_register" "") (const_int 0)])
+	 (match_operator:SI 6 "shift_operator"
+	  [(match_operand:SI 1 "s_register_operand" "r")
+	   (match_operand:SI 2 "arm_rhs_operand" "rM")])
+	 (match_operator:SI 7 "shift_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r")
+	   (match_operand:SI 4 "arm_rhs_operand" "rM")])))]
+  "TARGET_ARM"
+  "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
+  [(set_attr "conds" "use")
+   (set_attr "shift" "1")
+   (set_attr "length" "8")
+   (set_attr "insn" "mov")
+   (set (attr "type") (if_then_else
+		        (and (match_operand 2 "const_int_operand" "")
+                             (match_operand 4 "const_int_operand" ""))
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+(define_insn "*ifcompare_not_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL")])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+	 (match_operator:SI 7 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_insn "*if_not_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))
+	 (match_operator:SI 6 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])))]
+  "TARGET_ARM"
+  "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*ifcompare_arith_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 6 "arm_comparison_operator"
+	  [(match_operand:SI 4 "s_register_operand" "r")
+	   (match_operand:SI 5 "arm_add_operand" "rIL")])
+	 (match_operator:SI 7 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+
+(define_insn "*if_arith_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operator:SI 6 "shiftable_operator"
+	  [(match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "arm_rhs_operand" "rI")])
+	 (not:SI (match_operand:SI 1 "s_register_operand" "r"))))]
+  "TARGET_ARM"
+  "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*ifcompare_neg_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_neg_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))]
+  "TARGET_ARM"
+  "@
+   rsb%d4\\t%0, %2, #0
+   mov%D4\\t%0, %1\;rsb%d4\\t%0, %2, #0
+   mvn%D4\\t%0, #%B1\;rsb%d4\\t%0, %2, #0"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8,8")]
+)
+
+(define_insn "*ifcompare_move_neg"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL")])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rIK")
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "#"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8,12")]
+)
+
+(define_insn "*if_move_neg"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 4 "arm_comparison_operator"
+	  [(match_operand 3 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,?rI,K")
+	 (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))]
+  "TARGET_ARM"
+  "@
+   rsb%D4\\t%0, %2, #0
+   mov%d4\\t%0, %1\;rsb%D4\\t%0, %2, #0
+   mvn%d4\\t%0, #%B1\;rsb%D4\\t%0, %2, #0"
+  [(set_attr "conds" "use")
+   (set_attr "length" "4,8,8")]
+)
+
+(define_insn "*arith_adjacentmem"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(match_operator:SI 1 "shiftable_operator"
+	 [(match_operand:SI 2 "memory_operand" "m")
+	  (match_operand:SI 3 "memory_operand" "m")]))
+   (clobber (match_scratch:SI 4 "=r"))]
+  "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])"
+  "*
+  {
+    rtx ldm[3];
+    rtx arith[4];
+    rtx base_reg;
+    HOST_WIDE_INT val1 = 0, val2 = 0;
+
+    if (REGNO (operands[0]) > REGNO (operands[4]))
+      {
+	ldm[1] = operands[4];
+	ldm[2] = operands[0];
+      }
+    else
+      {
+	ldm[1] = operands[0];
+	ldm[2] = operands[4];
+      }
+
+    base_reg = XEXP (operands[2], 0);
+
+    if (!REG_P (base_reg))
+      {
+	val1 = INTVAL (XEXP (base_reg, 1));
+	base_reg = XEXP (base_reg, 0);
+      }
+
+    if (!REG_P (XEXP (operands[3], 0)))
+      val2 = INTVAL (XEXP (XEXP (operands[3], 0), 1));
+
+    arith[0] = operands[0];
+    arith[3] = operands[1];
+
+    if (val1 < val2)
+      {
+	arith[1] = ldm[1];
+	arith[2] = ldm[2];
+      }
+    else
+      {
+	arith[1] = ldm[2];
+	arith[2] = ldm[1];
+      }
+
+    ldm[0] = base_reg;
+    if (val1 !=0 && val2 != 0)
+      {
+	rtx ops[3];
+
+	if (val1 == 4 || val2 == 4)
+	  /* Other val must be 8, since we know they are adjacent and neither
+	     is zero.  */
+	  output_asm_insn (\"ldm%(ib%)\\t%0, {%1, %2}\", ldm);
+	else if (const_ok_for_arm (val1) || const_ok_for_arm (-val1))
+	  {
+	    ldm[0] = ops[0] = operands[4];
+	    ops[1] = base_reg;
+	    ops[2] = GEN_INT (val1);
+	    output_add_immediate (ops);
+	    if (val1 < val2)
+	      output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	    else
+	      output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	  }
+	else
+	  {
+	    /* Offset is out of range for a single add, so use two ldr.  */
+	    ops[0] = ldm[1];
+	    ops[1] = base_reg;
+	    ops[2] = GEN_INT (val1);
+	    output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops);
+	    ops[0] = ldm[2];
+	    ops[2] = GEN_INT (val2);
+	    output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops);
+	  }
+      }
+    else if (val1 != 0)
+      {
+	if (val1 < val2)
+	  output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+	else
+	  output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+      }
+    else
+      {
+	if (val1 < val2)
+	  output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm);
+	else
+	  output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm);
+      }
+    output_asm_insn (\"%I3%?\\t%0, %1, %2\", arith);
+    return \"\";
+  }"
+  [(set_attr "length" "12")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load1")]
+)
+
+; This pattern is never tried by combine, so do it as a peephole
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+	(match_operand:SI 1 "arm_general_register_operand" ""))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (match_dup 1) (const_int 0)))]
+  "TARGET_ARM"
+  [(parallel [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0)))
+	      (set (match_dup 0) (match_dup 1))])]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(and:SI (ge:SI (match_operand:SI 1 "s_register_operand" "")
+		       (const_int 0))
+		(neg:SI (match_operator:SI 2 "arm_comparison_operator"
+			 [(match_operand:SI 3 "s_register_operand" "")
+			  (match_operand:SI 4 "arm_rhs_operand" "")]))))
+   (clobber (match_operand:SI 5 "s_register_operand" ""))]
+  "TARGET_ARM"
+  [(set (match_dup 5) (not:SI (ashiftrt:SI (match_dup 1) (const_int 31))))
+   (set (match_dup 0) (and:SI (match_op_dup 2 [(match_dup 3) (match_dup 4)])
+			      (match_dup 5)))]
+  ""
+)
+
+;; This split can be used because CC_Z mode implies that the following
+;; branch will be an equality, or an unsigned inequality, so the sign
+;; extension is not needed.
+
+(define_split
+  [(set (reg:CC_Z CC_REGNUM)
+	(compare:CC_Z
+	 (ashift:SI (subreg:SI (match_operand:QI 0 "memory_operand" "") 0)
+		    (const_int 24))
+	 (match_operand 1 "const_int_operand" "")))
+   (clobber (match_scratch:SI 2 ""))]
+  "TARGET_ARM
+   && (((unsigned HOST_WIDE_INT) INTVAL (operands[1]))
+       == (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) >> 24) << 24)"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 0)))
+   (set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 1)))]
+  "
+  operands[1] = GEN_INT (((unsigned long) INTVAL (operands[1])) >> 24);
+  "
+)
+;; ??? Check the patterns above for Thumb-2 usefulness
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  "TARGET_EITHER"
+  "if (TARGET_32BIT)
+     arm_expand_prologue ();
+   else
+     thumb1_expand_prologue ();
+  DONE;
+  "
+)
+
+(define_expand "epilogue"
+  [(clobber (const_int 0))]
+  "TARGET_EITHER"
+  "
+  if (crtl->calls_eh_return)
+    emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
+  if (TARGET_THUMB1)
+    thumb1_expand_epilogue ();
+  else if (USE_RETURN_INSN (FALSE))
+    {
+      emit_jump_insn (gen_return ());
+      DONE;
+    }
+  emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+	gen_rtvec (1,
+		gen_rtx_RETURN (VOIDmode)),
+	VUNSPEC_EPILOGUE));
+  DONE;
+  "
+)
+
+;; Note - although unspec_volatile's USE all hard registers,
+;; USEs are ignored after relaod has completed.  Thus we need
+;; to add an unspec of the link register to ensure that flow
+;; does not think that it is unused by the sibcall branch that
+;; will replace the standard function epilogue.
+(define_insn "sibcall_epilogue"
+  [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE)
+              (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
+  "TARGET_32BIT"
+  "*
+  if (use_return_insn (FALSE, next_nonnote_insn (insn)))
+    return output_return_instruction (const_true_rtx, FALSE, FALSE);
+  return arm_output_epilogue (next_nonnote_insn (insn));
+  "
+;; Length is absolute worst case
+  [(set_attr "length" "44")
+   (set_attr "type" "block")
+   ;; We don't clobber the conditions, but the potential length of this
+   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; unlikely to be profitable.
+   (set_attr "conds" "clob")]
+)
+
+(define_insn "*epilogue_insns"
+  [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
+  "TARGET_EITHER"
+  "*
+  if (TARGET_32BIT)
+    return arm_output_epilogue (NULL);
+  else /* TARGET_THUMB1 */
+    return thumb_unexpanded_epilogue ();
+  "
+  ; Length is absolute worst case
+  [(set_attr "length" "44")
+   (set_attr "type" "block")
+   ;; We don't clobber the conditions, but the potential length of this
+   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; unlikely to be profitable.
+   (set_attr "conds" "clob")]
+)
+
+(define_expand "eh_epilogue"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "register_operand" ""))
+   (use (match_operand:SI 2 "register_operand" ""))]
+  "TARGET_EITHER"
+  "
+  {
+    cfun->machine->eh_epilogue_sp_ofs = operands[1];
+    if (GET_CODE (operands[2]) != REG || REGNO (operands[2]) != 2)
+      {
+	rtx ra = gen_rtx_REG (Pmode, 2);
+
+	emit_move_insn (ra, operands[2]);
+	operands[2] = ra;
+      }
+    /* This is a hack -- we may have crystalized the function type too
+       early.  */
+    cfun->machine->func_type = 0;
+  }"
+)
+
+;; This split is only used during output to reduce the number of patterns
+;; that need assembler instructions adding to them.  We allowed the setting
+;; of the conditions to be implicit during rtl generation so that
+;; the conditional compare patterns would work.  However this conflicts to
+;; some extent with the conditional data operations, so we have to split them
+;; up again here.
+
+;; ??? Need to audit these splitters for Thumb-2.  Why isn't normal
+;; conditional execution sufficient?
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand 2 "" "") (match_operand 3 "" "")])
+			 (match_dup 0)
+			 (match_operand 4 "" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 5) (match_dup 6))
+   (cond_exec (match_dup 7)
+	      (set (match_dup 0) (match_dup 4)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[5] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+
+    operands[7] = gen_rtx_fmt_ee (rc, VOIDmode, operands[5], const0_rtx);
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand 2 "" "") (match_operand 3 "" "")])
+			 (match_operand 4 "" "")
+			 (match_dup 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 5) (match_dup 6))
+   (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)])
+	      (set (match_dup 0) (match_dup 4)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+
+    operands[5] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand 2 "" "") (match_operand 3 "" "")])
+			 (match_operand 4 "" "")
+			 (match_operand 5 "" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 6) (match_dup 7))
+   (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
+	      (set (match_dup 0) (match_dup 4)))
+   (cond_exec (match_dup 8)
+	      (set (match_dup 0) (match_dup 5)))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[6] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+
+    operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
+  }"
+)
+
+(define_split
+  [(set (match_operand:SI 0 "s_register_operand" "")
+	(if_then_else:SI (match_operator 1 "arm_comparison_operator"
+			  [(match_operand:SI 2 "s_register_operand" "")
+			   (match_operand:SI 3 "arm_add_operand" "")])
+			 (match_operand:SI 4 "arm_rhs_operand" "")
+			 (not:SI
+			  (match_operand:SI 5 "s_register_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM && reload_completed"
+  [(set (match_dup 6) (match_dup 7))
+   (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
+	      (set (match_dup 0) (match_dup 4)))
+   (cond_exec (match_dup 8)
+	      (set (match_dup 0) (not:SI (match_dup 5))))]
+  "
+  {
+    enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]),
+					     operands[2], operands[3]);
+    enum rtx_code rc = GET_CODE (operands[1]);
+
+    operands[6] = gen_rtx_REG (mode, CC_REGNUM);
+    operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]);
+    if (mode == CCFPmode || mode == CCFPEmode)
+      rc = reverse_condition_maybe_unordered (rc);
+    else
+      rc = reverse_condition (rc);
+
+    operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
+  }"
+)
+
+(define_insn "*cond_move_not"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(if_then_else:SI (match_operator 4 "arm_comparison_operator"
+			  [(match_operand 3 "cc_register" "") (const_int 0)])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,?rI")
+			 (not:SI
+			  (match_operand:SI 2 "s_register_operand" "r,r"))))]
+  "TARGET_ARM"
+  "@
+   mvn%D4\\t%0, %2
+   mov%d4\\t%0, %1\;mvn%D4\\t%0, %2"
+  [(set_attr "conds" "use")
+   (set_attr "insn" "mvn")
+   (set_attr "length" "4,8")]
+)
+
+;; The next two patterns occur when an AND operation is followed by a
+;; scc insn sequence 
+
+(define_insn "*sign_extract_onebit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:SI 2 "const_int_operand" "n")))
+    (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+    output_asm_insn (\"ands\\t%0, %1, %2\", operands);
+    return \"mvnne\\t%0, #0\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*not_signextract_onebit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI
+	 (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+			  (const_int 1)
+			  (match_operand:SI 2 "const_int_operand" "n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARM"
+  "*
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+    output_asm_insn (\"tst\\t%1, %2\", operands);
+    output_asm_insn (\"mvneq\\t%0, #0\", operands);
+    return \"movne\\t%0, #0\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "12")]
+)
+;; ??? The above patterns need auditing for Thumb-2
+
+;; Push multiple registers to the stack.  Registers are in parallel (use ...)
+;; expressions.  For simplicity, the first register is also in the unspec
+;; part.
+(define_insn "*push_multi"
+  [(match_parallel 2 "multi_register_push"
+    [(set (match_operand:BLK 0 "memory_operand" "=m")
+	  (unspec:BLK [(match_operand:SI 1 "s_register_operand" "")]
+		      UNSPEC_PUSH_MULT))])]
+  "TARGET_32BIT"
+  "*
+  {
+    int num_saves = XVECLEN (operands[2], 0);
+     
+    /* For the StrongARM at least it is faster to
+       use STR to store only a single register.
+       In Thumb mode always use push, and the assembler will pick
+       something appropriate.  */
+    if (num_saves == 1 && TARGET_ARM)
+      output_asm_insn (\"str\\t%1, [%m0, #-4]!\", operands);
+    else
+      {
+	int i;
+	char pattern[100];
+
+	if (TARGET_ARM)
+	    strcpy (pattern, \"stmfd\\t%m0!, {%1\");
+	else
+	    strcpy (pattern, \"push\\t{%1\");
+
+	for (i = 1; i < num_saves; i++)
+	  {
+	    strcat (pattern, \", %|\");
+	    strcat (pattern,
+		    reg_names[REGNO (XEXP (XVECEXP (operands[2], 0, i), 0))]);
+	  }
+
+	strcat (pattern, \"}\");
+	output_asm_insn (pattern, operands);
+      }
+
+    return \"\";
+  }"
+  [(set_attr "type" "store4")]
+)
+
+(define_insn "stack_tie"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:SI 0 "s_register_operand" "rk")
+		     (match_operand:SI 1 "s_register_operand" "rk")]
+		    UNSPEC_PRLG_STK))]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
+
+;; Similarly for the floating point registers
+(define_insn "*push_fp_multi"
+  [(match_parallel 2 "multi_register_push"
+    [(set (match_operand:BLK 0 "memory_operand" "=m")
+	  (unspec:BLK [(match_operand:XF 1 "f_register_operand" "")]
+		      UNSPEC_PUSH_MULT))])]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "*
+  {
+    char pattern[100];
+
+    sprintf (pattern, \"sfmfd\\t%%1, %d, [%%m0]!\", XVECLEN (operands[2], 0));
+    output_asm_insn (pattern, operands);
+    return \"\";
+  }"
+  [(set_attr "type" "f_fpa_store")]
+)
+
+;; Special patterns for dealing with the constant pool
+
+(define_insn "align_4"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (32);
+  return \"\";
+  "
+)
+
+(define_insn "align_8"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN8)]
+  "TARGET_EITHER"
+  "*
+  assemble_align (64);
+  return \"\";
+  "
+)
+
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)]
+  "TARGET_EITHER"
+  "*
+  making_const_table = FALSE;
+  return \"\";
+  "
+)
+
+(define_insn "consttable_1"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_1)]
+  "TARGET_THUMB1"
+  "*
+  making_const_table = TRUE;
+  assemble_integer (operands[0], 1, BITS_PER_WORD, 1);
+  assemble_zeros (3);
+  return \"\";
+  "
+  [(set_attr "length" "4")]
+)
+
+(define_insn "consttable_2"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_2)]
+  "TARGET_THUMB1"
+  "*
+  making_const_table = TRUE;
+  gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT);
+  assemble_integer (operands[0], 2, BITS_PER_WORD, 1);
+  assemble_zeros (2);
+  return \"\";
+  "
+  [(set_attr "length" "4")]
+)
+
+(define_insn "consttable_4"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_4)]
+  "TARGET_EITHER"
+  "*
+  {
+    rtx x = operands[0];
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (x)))
+      {
+      case MODE_FLOAT:
+ 	if (GET_MODE (x) == HFmode)
+ 	  arm_emit_fp16_const (x);
+ 	else
+ 	  {
+ 	    REAL_VALUE_TYPE r;
+ 	    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ 	    assemble_real (r, GET_MODE (x), BITS_PER_WORD);
+ 	  }
+ 	break;
+      default:
+	/* XXX: Sometimes gcc does something really dumb and ends up with
+	   a HIGH in a constant pool entry, usually because it's trying to
+	   load into a VFP register.  We know this will always be used in
+	   combination with a LO_SUM which ignores the high bits, so just
+	   strip off the HIGH.  */
+	if (GET_CODE (x) == HIGH)
+	  x = XEXP (x, 0);
+        assemble_integer (x, 4, BITS_PER_WORD, 1);
+	mark_symbol_refs_as_used (x);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "consttable_8"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_8)]
+  "TARGET_EITHER"
+  "*
+  {
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (operands[0])))
+      {
+       case MODE_FLOAT:
+        {
+          REAL_VALUE_TYPE r;
+          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+          break;
+        }
+      default:
+        assemble_integer (operands[0], 8, BITS_PER_WORD, 1);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "8")]
+)
+
+(define_insn "consttable_16"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_16)]
+  "TARGET_EITHER"
+  "*
+  {
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (operands[0])))
+      {
+       case MODE_FLOAT:
+        {
+          REAL_VALUE_TYPE r;
+          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+          break;
+        }
+      default:
+        assemble_integer (operands[0], 16, BITS_PER_WORD, 1);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "16")]
+)
+
+;; Miscellaneous Thumb patterns
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "register_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "TARGET_THUMB1"
+  "
+  if (flag_pic)
+    {
+      /* Hopefully, CSE will eliminate this copy.  */
+      rtx reg1 = copy_addr_to_reg (gen_rtx_LABEL_REF (Pmode, operands[1]));
+      rtx reg2 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg2, operands[0], reg1));
+      operands[0] = reg2;
+    }
+  "
+)
+
+;; NB never uses BX.
+(define_insn "*thumb1_tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "l*r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_THUMB1"
+  "mov\\t%|pc, %0"
+  [(set_attr "length" "2")]
+)
+
+;; V5 Instructions,
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(clz:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT && arm_arch5"
+  "clz%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "clz")])
+
+(define_insn "rbitsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "rbit%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "clz")])
+
+(define_expand "ctzsi2"
+ [(set (match_operand:SI           0 "s_register_operand" "")
+       (ctz:SI (match_operand:SI  1 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch_thumb2"
+  "
+   {
+     rtx tmp = gen_reg_rtx (SImode); 
+     emit_insn (gen_rbitsi2 (tmp, operands[1]));
+     emit_insn (gen_clzsi2 (operands[0], tmp));
+   }
+   DONE;
+  "
+)
+
+;; V5E instructions.
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "" "")
+	     (match_operand:SI 2 "" ""))]
+  "TARGET_32BIT && arm_arch5e"
+  "pld\\t%a0")
+
+;; General predication pattern
+
+(define_cond_exec
+  [(match_operator 0 "arm_comparison_operator"
+    [(match_operand 1 "cc_register" "")
+     (const_int 0)])]
+  "TARGET_32BIT"
+  ""
+)
+
+(define_insn "prologue_use"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_PROLOGUE_USE)]
+  ""
+  "%@ %0 needed for prologue"
+  [(set_attr "length" "0")]
+)
+
+
+;; Patterns for exception handling
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand" ""))]
+  "TARGET_EITHER"
+  "
+  {
+    if (TARGET_32BIT)
+      emit_insn (gen_arm_eh_return (operands[0]));
+    else
+      emit_insn (gen_thumb_eh_return (operands[0]));
+    DONE;
+  }"
+)
+				   
+;; We can't expand this before we know where the link register is stored.
+(define_insn_and_split "arm_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
+		    VUNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "TARGET_ARM"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    arm_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+)
+
+(define_insn_and_split "thumb_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "l")]
+		    VUNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&l"))]
+  "TARGET_THUMB1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    thumb_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+)
+
+
+;; TLS support
+
+(define_insn "load_tp_hard"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TLS))]
+  "TARGET_HARD_TP"
+  "mrc%?\\tp15, 0, %0, c13, c0, 3\\t@ load_tp_hard"
+  [(set_attr "predicable" "yes")]
+)
+
+;; Doesn't clobber R1-R3.  Must use r0 for the first operand.
+(define_insn "load_tp_soft"
+  [(set (reg:SI 0) (unspec:SI [(const_int 0)] UNSPEC_TLS))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:SI IP_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_SOFT_TP"
+  "bl\\t__aeabi_read_tp\\t@ load_tp_soft"
+  [(set_attr "conds" "clob")]
+)
+
+;; We only care about the lower 16 bits of the constant 
+;; being inserted into the upper 16 bits of the register.
+(define_insn "*arm_movtas_ze" 
+  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r")
+                   (const_int 16)
+                   (const_int 16))
+        (match_operand:SI 1 "const_int_operand" ""))]
+  "arm_arch_thumb2"
+  "movt%?\t%0, %L1"
+ [(set_attr "predicable" "yes")
+   (set_attr "length" "4")]
+)
+
+(define_insn "*arm_rev"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT && arm_arch6"
+  "rev%?\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4")]
+)
+
+(define_insn "*thumb1_rev"
+  [(set (match_operand:SI 0 "s_register_operand" "=l")
+	(bswap:SI (match_operand:SI 1 "s_register_operand" "l")))]
+  "TARGET_THUMB1 && arm_arch6"
+   "rev\t%0, %1"
+  [(set_attr "length" "2")]
+)
+
+(define_expand "arm_legacy_rev"
+  [(set (match_operand:SI 2 "s_register_operand" "")
+	(xor:SI (rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+			     (const_int 16))
+		(match_dup 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 8)))
+   (set (match_operand:SI 3 "s_register_operand" "")
+	(rotatert:SI (match_dup 1)
+		     (const_int 8)))
+   (set (match_dup 2)
+	(and:SI (match_dup 2)
+		(const_int -65281)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+	(xor:SI (match_dup 3)
+		(match_dup 2)))]
+  "TARGET_32BIT"
+  ""
+)
+
+;; Reuse temporaries to keep register pressure down.
+(define_expand "thumb_legacy_rev"
+  [(set (match_operand:SI 2 "s_register_operand" "")
+     (ashift:SI (match_operand:SI 1 "s_register_operand" "")
+                (const_int 24)))
+   (set (match_operand:SI 3 "s_register_operand" "")
+     (lshiftrt:SI (match_dup 1)
+		  (const_int 24)))
+   (set (match_dup 3)
+     (ior:SI (match_dup 3)
+	     (match_dup 2)))
+   (set (match_operand:SI 4 "s_register_operand" "")
+     (const_int 16))
+   (set (match_operand:SI 5 "s_register_operand" "")
+     (rotatert:SI (match_dup 1)
+		  (match_dup 4)))
+   (set (match_dup 2)
+     (ashift:SI (match_dup 5)
+                (const_int 24)))
+   (set (match_dup 5)
+     (lshiftrt:SI (match_dup 5)
+		  (const_int 24)))
+   (set (match_dup 5)
+     (ior:SI (match_dup 5)
+	     (match_dup 2)))
+   (set (match_dup 5)
+     (rotatert:SI (match_dup 5)
+		  (match_dup 4)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+     (ior:SI (match_dup 5)
+             (match_dup 3)))]
+  "TARGET_THUMB"
+  ""
+)
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
+"TARGET_EITHER && (arm_arch6 || !optimize_size)"
+"
+    if (!arm_arch6)
+      {
+	rtx op2 = gen_reg_rtx (SImode);
+	rtx op3 = gen_reg_rtx (SImode);
+
+	if (TARGET_THUMB)
+	  {
+	    rtx op4 = gen_reg_rtx (SImode);
+	    rtx op5 = gen_reg_rtx (SImode);
+
+	    emit_insn (gen_thumb_legacy_rev (operands[0], operands[1],
+					     op2, op3, op4, op5));
+	  }
+	else
+	  {
+	    emit_insn (gen_arm_legacy_rev (operands[0], operands[1],
+					   op2, op3));
+	  }
+
+	DONE;
+      }
+  "
+)
+
+;; Load the load/store multiple patterns
+(include "ldmstm.md")
+;; Load the FPA co-processor patterns
+(include "fpa.md")
+;; Load the Maverick co-processor patterns
+(include "cirrus.md")
+;; Vector bits common to IWMMXT and Neon
+(include "vec-common.md")
+;; Load the Intel Wireless Multimedia Extension patterns
+(include "iwmmxt.md")
+;; Load the VFP co-processor patterns
+(include "vfp.md")
+;; Thumb-2 patterns
+(include "thumb2.md")
+;; Neon patterns
+(include "neon.md")
+;; Synchronization Primitives
+(include "sync.md")
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
new file mode 100644
index 000000000..a39bb3a8d
--- /dev/null
+++ b/gcc/config/arm/arm.opt
@@ -0,0 +1,171 @@
+; Options for the ARM port of the compiler.
+
+; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mabi=
+Target RejectNegative Joined Var(target_abi_name)
+Specify an ABI
+
+mabort-on-noreturn
+Target Report Mask(ABORT_NORETURN)
+Generate a call to abort if a noreturn function returns
+
+mapcs
+Target RejectNegative Mask(APCS_FRAME) MaskExists Undocumented
+
+mapcs-float
+Target Report Mask(APCS_FLOAT)
+Pass FP arguments in FP registers
+
+mapcs-frame
+Target Report Mask(APCS_FRAME)
+Generate APCS conformant stack frames
+
+mapcs-reentrant
+Target Report Mask(APCS_REENT)
+Generate re-entrant, PIC code
+
+mapcs-stack-check
+Target Report Mask(APCS_STACK) Undocumented
+
+march=
+Target RejectNegative Joined
+Specify the name of the target architecture
+
+marm
+Target RejectNegative InverseMask(THUMB) Undocumented
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_END)
+Assume target CPU is configured as big endian
+
+mcallee-super-interworking
+Target Report Mask(CALLEE_INTERWORKING)
+Thumb: Assume non-static functions may be called from ARM code
+
+mcaller-super-interworking
+Target Report Mask(CALLER_INTERWORKING)
+Thumb: Assume function pointers may go to non-Thumb aware code
+
+mcirrus-fix-invalid-insns
+Target Report Mask(CIRRUS_FIX_INVALID_INSNS)
+Cirrus: Place NOPs to avoid invalid instruction combinations
+
+mcpu=
+Target RejectNegative Joined
+Specify the name of the target CPU
+
+mfloat-abi=
+Target RejectNegative Joined Var(target_float_abi_name)
+Specify if floating point hardware should be used
+
+mfp=
+Target RejectNegative Joined Undocumented Var(target_fpe_name)
+
+mfp16-format=
+Target RejectNegative Joined Var(target_fp16_format_name)
+Specify the __fp16 floating-point format
+
+;; Now ignored.
+mfpe
+Target RejectNegative Mask(FPE) Undocumented
+
+mfpe=
+Target RejectNegative Joined Undocumented Var(target_fpe_name)
+
+mfpu=
+Target RejectNegative Joined Var(target_fpu_name)
+Specify the name of the target floating point hardware/format
+
+mhard-float
+Target RejectNegative
+Alias for -mfloat-abi=hard
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_END)
+Assume target CPU is configured as little endian
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Generate call insns as indirect calls, if necessary
+
+mpic-register=
+Target RejectNegative Joined Var(arm_pic_register_string)
+Specify the register to be used for PIC addressing
+
+mpoke-function-name
+Target Report Mask(POKE_FUNCTION_NAME)
+Store function names in object code
+
+msched-prolog
+Target Report Mask(SCHED_PROLOG)
+Permit scheduling of a function's prologue sequence
+
+msingle-pic-base
+Target Report Mask(SINGLE_PIC_BASE)
+Do not load the PIC register in function prologues
+
+msoft-float
+Target RejectNegative
+Alias for -mfloat-abi=soft
+
+mstructure-size-boundary=
+Target RejectNegative Joined Var(structure_size_string)
+Specify the minimum bit alignment of structures
+
+mthumb
+Target Report Mask(THUMB)
+Compile for the Thumb not the ARM
+
+mthumb-interwork
+Target Report Mask(INTERWORK)
+Support calls between Thumb and ARM instruction sets
+
+mtp=
+Target RejectNegative Joined Var(target_thread_switch)
+Specify how to access the thread pointer
+
+mtpcs-frame
+Target Report Mask(TPCS_FRAME)
+Thumb: Generate (non-leaf) stack frames even if not needed
+
+mtpcs-leaf-frame
+Target Report Mask(TPCS_LEAF_FRAME)
+Thumb: Generate (leaf) stack frames even if not needed
+
+mtune=
+Target RejectNegative Joined
+Tune code for the given processor
+
+mwords-little-endian
+Target Report RejectNegative Mask(LITTLE_WORDS)
+Assume big endian bytes, little endian words
+
+mvectorize-with-neon-quad
+Target Report Mask(NEON_VECTORIZE_QUAD)
+Use Neon quad-word (rather than double-word) registers for vectorization
+
+mword-relocations
+Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
+Only generate absolute relocations on word sized values.
+
+mfix-cortex-m3-ldrd
+Target Report Var(fix_cm3_ldrd) Init(2)
+Avoid overlapping destination and address registers on LDRD instructions
+that may trigger Cortex-M3 errata.
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
new file mode 100644
index 000000000..280af12f9
--- /dev/null
+++ b/gcc/config/arm/arm1020e.md
@@ -0,0 +1,375 @@
+;; ARM 1020E & ARM 1022E Pipeline Description
+;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM1020E Technical Reference Manual, Copyright (c) 2003 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 1020E core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm1020e")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are two pipelines:
+;; 
+;; - An Arithmetic Logic Unit (ALU) pipeline.
+;;
+;;   The ALU pipeline has fetch, issue, decode, execute, memory, and
+;;   write stages. We only need to model the execute, memory and write
+;;   stages.
+;;
+;; - A Load-Store Unit (LSU) pipeline.
+;;
+;;   The LSU pipeline has decode, execute, memory, and write stages.
+;;   We only model the execute, memory and write stages.
+
+(define_cpu_unit "1020a_e,1020a_m,1020a_w" "arm1020e")
+(define_cpu_unit "1020l_e,1020l_m,1020l_w" "arm1020e")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "1020alu_op" 1 
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "alu"))
+ "1020a_e,1020a_m,1020a_w")
+
+;; ALU operations with a shift-by-constant operand
+(define_insn_reservation "1020alu_shift_op" 1 
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "alu_shift"))
+ "1020a_e,1020a_m,1020a_w")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the execute stage.
+(define_insn_reservation "1020alu_shift_reg_op" 2 
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "alu_shift_reg"))
+ "1020a_e*2,1020a_m,1020a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times.
+
+;; The result of the "smul" and "smulw" instructions is not available
+;; until after the memory stage.
+(define_insn_reservation "1020mult1" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "insn" "smulxy,smulwy"))
+ "1020a_e,1020a_m,1020a_w")
+
+;; The "smlaxy" and "smlawx" instructions require two iterations through
+;; the execute stage; the result is available immediately following
+;; the execute stage.
+(define_insn_reservation "1020mult2" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+ "1020a_e*2,1020a_m,1020a_w")
+
+;; The "smlalxy", "mul", and "mla" instructions require two iterations
+;; through the execute stage; the result is not available until after
+;; the memory stage.
+(define_insn_reservation "1020mult3" 3
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "insn" "smlalxy,mul,mla"))
+ "1020a_e*2,1020a_m,1020a_w")
+
+;; The "muls" and "mlas" instructions loop in the execute stage for
+;; four iterations in order to set the flags.  The value result is
+;; available after three iterations.
+(define_insn_reservation "1020mult4" 3
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "insn" "muls,mlas"))
+ "1020a_e*4,1020a_m,1020a_w")
+
+;; Long multiply instructions that produce two registers of
+;; output (such as umull) make their results available in two cycles;
+;; the least significant word is available before the most significant
+;; word.  That fact is not modeled; instead, the instructions are
+;; described.as if the entire result was available at the end of the
+;; cycle in which both words are available.
+
+;; The "umull", "umlal", "smull", and "smlal" instructions all take
+;; three iterations through the execute cycle, and make their results
+;; available after the memory cycle.
+(define_insn_reservation "1020mult5" 4
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "insn" "umull,umlal,smull,smlal"))
+ "1020a_e*3,1020a_m,1020a_w")
+
+;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
+;; the execute stage for five iterations in order to set the flags.
+;; The value result is available after four iterations.
+(define_insn_reservation "1020mult6" 4
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+ "1020a_e*5,1020a_m,1020a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; LSU instructions require six cycles to execute.  They use the ALU
+;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles
+;; three through six.
+;; Loads and stores which use a scaled register offset or scaled
+;; register pre-indexed addressing mode take three cycles EXCEPT for
+;; those that are base + offset with LSL of 0 or 2, or base - offset
+;; with LSL of zero.  The remainder take 1 cycle to execute.
+;; For 4byte loads there is a bypass from the load stage
+
+(define_insn_reservation "1020load1_op" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "load_byte,load1"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "1020store1_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "store1"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+;; A load's result can be stored by an immediately following store
+(define_bypass 1 "1020load1_op" "1020store1_op" "arm_no_early_store_addr_dep")
+
+;; On a LDM/STM operation, the LSU pipeline iterates until all of the
+;; registers have been processed.
+;;
+;; The time it takes to load the data depends on whether or not the
+;; base address is 64-bit aligned; if it is not, an additional cycle
+;; is required.  This model assumes that the address is always 64-bit
+;; aligned.  Because the processor can load two registers per cycle,
+;; that assumption means that we use the same instruction reservations
+;; for loading 2k and 2k - 1 registers.
+;;
+;; The ALU pipeline is decoupled after the first cycle unless there is
+;; a register dependency; the dependency is cleared as soon as the LDM/STM
+;; has dealt with the corresponding register.  So for example,
+;;  stmia sp, {r0-r3}
+;;  add	r0, r0, #4
+;; will have one fewer stalls than
+;;  stmia sp, {r0-r3}
+;;  add r3, r3, #4
+;;
+;; As with ALU operations, if one of the destination registers is the
+;; PC, there are additional stalls; that is not modeled.
+
+(define_insn_reservation "1020load2_op" 2
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "load2"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "1020store2_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "store2"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "1020load34_op" 3
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "load3,load4"))
+ "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
+
+(define_insn_reservation "1020store34_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "store3,store4"))
+ "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The ARM
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "1020branch_op" 0
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "branch"))
+ "1020a_e")
+
+;; The latency for a call is not predictable.  Therefore, we use 32 as
+;; roughly equivalent to positive infinity.
+
+(define_insn_reservation "1020call_op" 32
+ (and (eq_attr "tune" "arm1020e,arm1022e")
+      (eq_attr "type" "call"))
+ "1020a_e*32")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_cpu_unit "v10_fmac" "arm1020e")
+
+(define_cpu_unit "v10_ds" "arm1020e")
+
+(define_cpu_unit "v10_fmstat" "arm1020e")
+
+(define_cpu_unit "v10_ls1,v10_ls2,v10_ls3" "arm1020e")
+
+;; fmstat is a serializing instruction.  It will stall the core until
+;; the mac and ds units have completed.
+(exclusion_set "v10_fmac,v10_ds" "v10_fmstat")
+
+(define_attr "vfp10" "yes,no" 
+  (const (if_then_else (and (eq_attr "tune" "arm1020e,arm1022e")
+			    (eq_attr "fpu" "vfp"))
+		       (const_string "yes") (const_string "no"))))
+
+;; Note, no instruction can issue to the VFP if the core is stalled in the
+;; first execute state.  We model this by using 1020a_e in the first cycle.
+(define_insn_reservation "v10_ffarith" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
+ "1020a_e+v10_fmac")
+
+(define_insn_reservation "v10_farith" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "faddd,fadds"))
+ "1020a_e+v10_fmac")
+
+(define_insn_reservation "v10_cvt" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_cvt"))
+ "1020a_e+v10_fmac")
+
+(define_insn_reservation "v10_fmul" 6
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fmuls,fmacs,fmuld,fmacd"))
+ "1020a_e+v10_fmac*2")
+
+(define_insn_reservation "v10_fdivs" 18
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fdivs"))
+ "1020a_e+v10_ds*14")
+
+(define_insn_reservation "v10_fdivd" 32
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "fdivd"))
+ "1020a_e+v10_fmac+v10_ds*28")
+
+(define_insn_reservation "v10_floads" 4
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_loads"))
+ "1020a_e+1020l_e+v10_ls1,v10_ls2")
+
+;; We model a load of a double as needing all the vfp ls* stage in cycle 1.
+;; This gives the correct mix between single-and double loads where a flds
+;; followed by and fldd will stall for one cycle, but two back-to-back fldd
+;; insns stall for two cycles.
+(define_insn_reservation "v10_floadd" 5
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_loadd"))
+ "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3")
+ 
+;; Moves to/from arm regs also use the load/store pipeline.
+
+(define_insn_reservation "v10_c2v" 4
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "r_2_f"))
+ "1020a_e+1020l_e+v10_ls1,v10_ls2")
+
+(define_insn_reservation "v10_fstores" 1
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_stores"))
+ "1020a_e+1020l_e+v10_ls1,v10_ls2")
+
+(define_insn_reservation "v10_fstored" 1
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_stored"))
+ "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3")
+
+(define_insn_reservation "v10_v2c" 1
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_2_r"))
+ "1020a_e+1020l_e,1020l_m,1020l_w")
+
+(define_insn_reservation "v10_to_cpsr" 2
+ (and (eq_attr "vfp10" "yes")
+      (eq_attr "type" "f_flag"))
+ "1020a_e+v10_fmstat,1020a_e+1020l_e,1020l_m,1020l_w")
+
+;; VFP bypasses
+
+;; There are bypasses for most operations other than store
+
+(define_bypass 3
+ "v10_c2v,v10_floads"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd,v10_cvt")
+
+(define_bypass 4
+ "v10_floadd"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+;; Arithmetic to other arithmetic saves a cycle due to forwarding
+(define_bypass 4
+ "v10_ffarith,v10_farith"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+(define_bypass 5
+ "v10_fmul"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+(define_bypass 17
+ "v10_fdivs"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+(define_bypass 31
+ "v10_fdivd"
+ "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd")
+
+;; VFP anti-dependencies.
+
+;; There is one anti-dependence in the following case (not yet modelled):
+;; - After a store: one extra cycle for both fsts and fstd
+;; Note, back-to-back fstd instructions will overload the load/store datapath 
+;; causing a two-cycle stall.
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
new file mode 100644
index 000000000..e62213638
--- /dev/null
+++ b/gcc/config/arm/arm1026ejs.md
@@ -0,0 +1,240 @@
+;; ARM 1026EJ-S Pipeline Description
+;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM1026EJ-S Technical Reference Manual, Copyright (c) 2003 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 1026EJ-S core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm1026ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are two pipelines:
+;; 
+;; - An Arithmetic Logic Unit (ALU) pipeline.
+;;
+;;   The ALU pipeline has fetch, issue, decode, execute, memory, and
+;;   write stages. We only need to model the execute, memory and write
+;;   stages.
+;;
+;; - A Load-Store Unit (LSU) pipeline.
+;;
+;;   The LSU pipeline has decode, execute, memory, and write stages.
+;;   We only model the execute, memory and write stages.
+
+(define_cpu_unit "a_e,a_m,a_w" "arm1026ejs")
+(define_cpu_unit "l_e,l_m,l_w" "arm1026ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "alu_op" 1 
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "alu"))
+ "a_e,a_m,a_w")
+
+;; ALU operations with a shift-by-constant operand
+(define_insn_reservation "alu_shift_op" 1 
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "alu_shift"))
+ "a_e,a_m,a_w")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the execute stage.
+(define_insn_reservation "alu_shift_reg_op" 2 
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "alu_shift_reg"))
+ "a_e*2,a_m,a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times.
+
+;; The result of the "smul" and "smulw" instructions is not available
+;; until after the memory stage.
+(define_insn_reservation "mult1" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "insn" "smulxy,smulwy"))
+ "a_e,a_m,a_w")
+
+;; The "smlaxy" and "smlawx" instructions require two iterations through
+;; the execute stage; the result is available immediately following
+;; the execute stage.
+(define_insn_reservation "mult2" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+ "a_e*2,a_m,a_w")
+
+;; The "smlalxy", "mul", and "mla" instructions require two iterations
+;; through the execute stage; the result is not available until after
+;; the memory stage.
+(define_insn_reservation "mult3" 3
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "insn" "smlalxy,mul,mla"))
+ "a_e*2,a_m,a_w")
+
+;; The "muls" and "mlas" instructions loop in the execute stage for
+;; four iterations in order to set the flags.  The value result is
+;; available after three iterations.
+(define_insn_reservation "mult4" 3
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "insn" "muls,mlas"))
+ "a_e*4,a_m,a_w")
+
+;; Long multiply instructions that produce two registers of
+;; output (such as umull) make their results available in two cycles;
+;; the least significant word is available before the most significant
+;; word.  That fact is not modeled; instead, the instructions are
+;; described as if the entire result was available at the end of the
+;; cycle in which both words are available.
+
+;; The "umull", "umlal", "smull", and "smlal" instructions all take
+;; three iterations through the execute cycle, and make their results
+;; available after the memory cycle.
+(define_insn_reservation "mult5" 4
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "insn" "umull,umlal,smull,smlal"))
+ "a_e*3,a_m,a_w")
+
+;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
+;; the execute stage for five iterations in order to set the flags.
+;; The value result is available after four iterations.
+(define_insn_reservation "mult6" 4
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+ "a_e*5,a_m,a_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; LSU instructions require six cycles to execute.  They use the ALU
+;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles
+;; three through six.
+;; Loads and stores which use a scaled register offset or scaled
+;; register pre-indexed addressing mode take three cycles EXCEPT for
+;; those that are base + offset with LSL of 0 or 2, or base - offset
+;; with LSL of zero.  The remainder take 1 cycle to execute.
+;; For 4byte loads there is a bypass from the load stage
+
+(define_insn_reservation "load1_op" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "load_byte,load1"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+(define_insn_reservation "store1_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "store1"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+;; A load's result can be stored by an immediately following store
+(define_bypass 1 "load1_op" "store1_op" "arm_no_early_store_addr_dep")
+
+;; On a LDM/STM operation, the LSU pipeline iterates until all of the
+;; registers have been processed.
+;;
+;; The time it takes to load the data depends on whether or not the
+;; base address is 64-bit aligned; if it is not, an additional cycle
+;; is required.  This model assumes that the address is always 64-bit
+;; aligned.  Because the processor can load two registers per cycle,
+;; that assumption means that we use the same instruction reservations
+;; for loading 2k and 2k - 1 registers.
+;;
+;; The ALU pipeline is stalled until the completion of the last memory
+;; stage in the LSU pipeline.  That is modeled by keeping the ALU
+;; execute stage busy until that point.
+;;
+;; As with ALU operations, if one of the destination registers is the
+;; PC, there are additional stalls; that is not modeled.
+
+(define_insn_reservation "load2_op" 2
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "load2"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+(define_insn_reservation "store2_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "store2"))
+ "a_e+l_e,l_m,a_w+l_w")
+
+(define_insn_reservation "load34_op" 3
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "load3,load4"))
+ "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
+
+(define_insn_reservation "store34_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "store3,store4"))
+ "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The ARM
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "branch_op" 0
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; The latency for a call is not predictable.  Therefore, we use 32 as
+;; roughly equivalent to positive infinity.
+
+(define_insn_reservation "call_op" 32
+ (and (eq_attr "tune" "arm1026ejs")
+      (eq_attr "type" "call"))
+ "nothing")
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
new file mode 100644
index 000000000..8fc30e976
--- /dev/null
+++ b/gcc/config/arm/arm1136jfs.md
@@ -0,0 +1,376 @@
+;; ARM 1136J[F]-S Pipeline Description
+;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM1136JF-S Technical Reference Manual, Copyright (c) 2003 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 1136J-S and 1136JF-S cores.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm1136jfs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There are three distinct pipelines (page 1-26 and following):
+;;
+;; - A 4-stage decode pipeline, shared by all three.  It has fetch (1),
+;;   fetch (2), decode, and issue stages.  Since this is always involved,
+;;   we do not model it in the scheduler.
+;;
+;; - A 4-stage ALU pipeline.  It has shifter, ALU (main integer operations),
+;;   and saturation stages.  The fourth stage is writeback; see below.
+;;
+;; - A 4-stage multiply-accumulate pipeline.  It has three stages, called
+;;   MAC1 through MAC3, and a fourth writeback stage.
+;;
+;;   The 4th-stage writeback is shared between the ALU and MAC pipelines,
+;;   which operate in lockstep.  Results from either pipeline will be
+;;   moved into the writeback stage.  Because the two pipelines operate
+;;   in lockstep, we schedule them as a single "execute" pipeline.
+;;
+;; - A 4-stage LSU pipeline.  It has address generation, data cache (1),
+;;   data cache (2), and writeback stages.  (Note that this pipeline,
+;;   including the writeback stage, is independent from the ALU & LSU pipes.)  
+
+(define_cpu_unit "e_1,e_2,e_3,e_wb" "arm1136jfs")     ; ALU and MAC
+; e_1 = Sh/Mac1, e_2 = ALU/Mac2, e_3 = SAT/Mac3
+(define_cpu_unit "l_a,l_dc1,l_dc2,l_wb" "arm1136jfs") ; Load/Store
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require eight cycles to execute, and use the ALU
+;; pipeline in each of the eight stages.  The results are available
+;; after the alu stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modelled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "11_alu_op" 2
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "alu"))
+ "e_1,e_2,e_3,e_wb")
+
+;; ALU operations with a shift-by-constant operand
+(define_insn_reservation "11_alu_shift_op" 2
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "alu_shift"))
+ "e_1,e_2,e_3,e_wb")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the shift stage.
+(define_insn_reservation "11_alu_shift_reg_op" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "alu_shift_reg"))
+ "e_1*2,e_2,e_3,e_wb")
+
+;; alu_ops can start sooner, if there is no shifter dependency
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_alu_op")
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_alu_op")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the first two execute stages until
+;; the instruction has been passed through the multiplier array enough
+;; times.
+
+;; Multiply and multiply-accumulate results are available after four stages.
+(define_insn_reservation "11_mult1" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "mul,mla"))
+ "e_1*2,e_2,e_3,e_wb")
+
+;; The *S variants set the condition flags, which requires three more cycles.
+(define_insn_reservation "11_mult2" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "muls,mlas"))
+ "e_1*2,e_2,e_3,e_wb")
+
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_alu_op")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 3 "11_mult1,11_mult2"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; Signed and unsigned multiply long results are available across two cycles;
+;; the less significant word is available one cycle before the more significant
+;; word.  Here we conservatively wait until both are available, which is
+;; after three iterations and the memory cycle.  The same is also true of
+;; the two multiply-accumulate instructions.
+(define_insn_reservation "11_mult3" 5
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "smull,umull,smlal,umlal"))
+ "e_1*3,e_2,e_3,e_wb*2")
+
+;; The *S variants set the condition flags, which requires three more cycles.
+(define_insn_reservation "11_mult4" 5
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "smulls,umulls,smlals,umlals"))
+ "e_1*3,e_2,e_3,e_wb*2")
+
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_alu_op")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 4 "11_mult3,11_mult4"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; Various 16x16->32 multiplies and multiply-accumulates, using combinations
+;; of high and low halves of the argument registers.  They take a single
+;; pass through the pipeline and make the result available after three
+;; cycles.
+(define_insn_reservation "11_mult5" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx"))
+ "e_1,e_2,e_3,e_wb")
+
+(define_bypass 2 "11_mult5"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 2 "11_mult5"
+	       "11_alu_op")
+(define_bypass 2 "11_mult5"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 2 "11_mult5"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 2 "11_mult5"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; The same idea, then the 32-bit result is added to a 64-bit quantity.
+(define_insn_reservation "11_mult6" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "smlalxy"))
+ "e_1*2,e_2,e_3,e_wb*2")
+
+;; Signed 32x32 multiply, then the most significant 32 bits are extracted
+;; and are available after the memory stage.
+(define_insn_reservation "11_mult7" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "insn" "smmul,smmulr"))
+ "e_1*2,e_2,e_3,e_wb")
+
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_alu_op")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+(define_bypass 3 "11_mult6,11_mult7"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; These vary greatly depending on their arguments and the results of
+;; stat prediction.  Cycle count ranges from zero (unconditional branch,
+;; folded dynamic prediction) to seven (incorrect predictions, etc).  We
+;; assume an optimal case for now, because the cost of a cache miss
+;; overwhelms the cost of everything else anyhow.
+
+(define_insn_reservation "11_branches" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "11_call" 32
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "call"))
+ "nothing")
+
+;; Branches are predicted. A correctly predicted branch will be no
+;; cost, but we're conservative here, and use the timings a
+;; late-register would give us.
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_branches")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_branches")
+(define_bypass 2 "11_load1,11_load2"
+	       "11_branches")
+(define_bypass 3 "11_load34"
+	       "11_branches")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback.
+;; These models assume that all memory references hit in dcache.  Also,
+;; if the PC is one of the registers involved, there are additional stalls
+;; not modelled here.  Addressing modes are also not modelled.
+
+(define_insn_reservation "11_load1" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load1"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+;; Load byte results are not available until the writeback stage, where
+;; the correct byte is extracted.
+
+(define_insn_reservation "11_loadb" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load_byte"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+(define_insn_reservation "11_store1" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "store1"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+;; Load/store double words into adjacent registers.  The timing and
+;; latencies are different depending on whether the address is 64-bit
+;; aligned.  This model assumes that it is.
+(define_insn_reservation "11_load2" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load2"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+(define_insn_reservation "11_store2" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "store2"))
+ "l_a+e_1,l_dc1,l_dc2,l_wb")
+
+;; Load/store multiple registers.  Two registers are stored per cycle.
+;; Actual timing depends on how many registers are affected, so we
+;; optimistically schedule a low latency.
+(define_insn_reservation "11_load34" 4
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "load3,load4"))
+ "l_a+e_1,l_dc1*2,l_dc2,l_wb")
+
+(define_insn_reservation "11_store34" 0
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+      (eq_attr "type" "store3,store4"))
+ "l_a+e_1,l_dc1*2,l_dc2,l_wb")
+
+;; A store can start immediately after an alu op, if that alu op does
+;; not provide part of the address to access.
+(define_bypass 1 "11_alu_op,11_alu_shift_op"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+(define_bypass 2 "11_alu_shift_reg_op"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+
+;; An alu op can start sooner after a load, if that alu op does not
+;; have an early register dependency on the load
+(define_bypass 2 "11_load1"
+	       "11_alu_op")
+(define_bypass 2 "11_load1"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 2 "11_load1"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+(define_bypass 3 "11_loadb"
+	       "11_alu_op")
+(define_bypass 3 "11_loadb"
+	       "11_alu_shift_op"
+	       "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "11_loadb"
+	       "11_alu_shift_reg_op"
+	       "arm_no_early_alu_shift_dep")
+
+;; A mul op can start sooner after a load, if that mul op does not
+;; have an early multiply dependency
+(define_bypass 2 "11_load1"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_load34"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+(define_bypass 3 "11_loadb"
+	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
+	       "arm_no_early_mul_dep")
+
+;; A store can start sooner after a load, if that load does not
+;; produce part of the address to access
+(define_bypass 2 "11_load1"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
+(define_bypass 3 "11_loadb"
+	       "11_store1"
+	       "arm_no_early_store_addr_dep")
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
new file mode 100644
index 000000000..d3908f9e3
--- /dev/null
+++ b/gcc/config/arm/arm926ejs.md
@@ -0,0 +1,187 @@
+;; ARM 926EJ-S Pipeline Description
+;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM
+;; Limited.
+;;
+
+;; This automaton provides a pipeline description for the ARM
+;; 926EJ-S core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "arm926ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages. We only need to model the execute, memory and write
+;;   stages.
+
+(define_cpu_unit "e,m,w" "arm926ejs")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations with no shifted operand
+(define_insn_reservation "9_alu_op" 1 
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "alu,alu_shift"))
+ "e,m,w")
+
+;; ALU operations with a shift-by-register operand
+;; These really stall in the decoder, in order to read
+;; the shift value in a second cycle. Pretend we take two cycles in
+;; the execute stage.
+(define_insn_reservation "9_alu_shift_reg_op" 2 
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "alu_shift_reg"))
+ "e*2,m,w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times. Multiply operations occur in both the execute and memory
+;; stages of the pipeline
+
+(define_insn_reservation "9_mult1" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "insn" "smlalxy,mul,mla"))
+ "e*2,m,w")
+
+(define_insn_reservation "9_mult2" 4
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "insn" "muls,mlas"))
+ "e*3,m,w")
+
+(define_insn_reservation "9_mult3" 4
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "insn" "umull,umlal,smull,smlal"))
+ "e*3,m,w")
+
+(define_insn_reservation "9_mult4" 5
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+ "e*4,m,w")
+
+(define_insn_reservation "9_mult5" 2
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "insn" "smulxy,smlaxy,smlawx"))
+ "e,m,w")
+
+(define_insn_reservation "9_mult6" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "insn" "smlalxy"))
+ "e*2,m,w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; Loads with a shifted offset take 3 cycles, and are (a) probably the
+;; most common and (b) the pessimistic assumption will lead to fewer stalls.
+(define_insn_reservation "9_load1_op" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load1,load_byte"))
+ "e*2,m,w")
+
+(define_insn_reservation "9_store1_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store1"))
+ "e,m,w")
+
+;; multiple word loads and stores
+(define_insn_reservation "9_load2_op" 3
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load2"))
+ "e,m*2,w")
+
+(define_insn_reservation "9_load3_op" 4
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load3"))
+ "e,m*3,w")
+
+(define_insn_reservation "9_load4_op" 5
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "load4"))
+ "e,m*4,w")
+
+(define_insn_reservation "9_store2_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store2"))
+ "e,m*2,w")
+
+(define_insn_reservation "9_store3_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store3"))
+ "e,m*3,w")
+
+(define_insn_reservation "9_store4_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "store4"))
+ "e,m*4,w")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The ARM
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "9_branch_op" 0
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "branch"))
+ "nothing")
+
+;; The latency for a call is not predictable.  Therefore, we use 32 as
+;; roughly equivalent to positive infinity.
+
+(define_insn_reservation "9_call_op" 32
+ (and (eq_attr "tune" "arm926ejs")
+      (eq_attr "type" "call"))
+ "nothing")
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
new file mode 100644
index 000000000..9cba0a90a
--- /dev/null
+++ b/gcc/config/arm/arm_neon.h
@@ -0,0 +1,12176 @@
+/* ARM NEON intrinsics include file. This file is generated automatically
+   using neon-gen.ml.  Please do not edit manually.
+
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_ARM_NEON_H
+#define _GCC_ARM_NEON_H 1
+
+#ifndef __ARM_NEON__
+#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h
+#else
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+typedef __builtin_neon_qi int8x8_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_hi int16x4_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_si int32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_di int64x1_t;
+typedef __builtin_neon_sf float32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_poly8 poly8x8_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_poly16 poly16x4_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_uqi uint8x8_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_uhi uint16x4_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_usi uint32x2_t	__attribute__ ((__vector_size__ (8)));
+typedef __builtin_neon_udi uint64x1_t;
+typedef __builtin_neon_qi int8x16_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_hi int16x8_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_si int32x4_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_di int64x2_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_sf float32x4_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_poly8 poly8x16_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_poly16 poly16x8_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_uqi uint8x16_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_uhi uint16x8_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_usi uint32x4_t	__attribute__ ((__vector_size__ (16)));
+typedef __builtin_neon_udi uint64x2_t	__attribute__ ((__vector_size__ (16)));
+
+typedef float float32_t;
+typedef __builtin_neon_poly8 poly8_t;
+typedef __builtin_neon_poly16 poly16_t;
+
+typedef struct int8x8x2_t
+{
+  int8x8_t val[2];
+} int8x8x2_t;
+
+typedef struct int8x16x2_t
+{
+  int8x16_t val[2];
+} int8x16x2_t;
+
+typedef struct int16x4x2_t
+{
+  int16x4_t val[2];
+} int16x4x2_t;
+
+typedef struct int16x8x2_t
+{
+  int16x8_t val[2];
+} int16x8x2_t;
+
+typedef struct int32x2x2_t
+{
+  int32x2_t val[2];
+} int32x2x2_t;
+
+typedef struct int32x4x2_t
+{
+  int32x4_t val[2];
+} int32x4x2_t;
+
+typedef struct int64x1x2_t
+{
+  int64x1_t val[2];
+} int64x1x2_t;
+
+typedef struct int64x2x2_t
+{
+  int64x2_t val[2];
+} int64x2x2_t;
+
+typedef struct uint8x8x2_t
+{
+  uint8x8_t val[2];
+} uint8x8x2_t;
+
+typedef struct uint8x16x2_t
+{
+  uint8x16_t val[2];
+} uint8x16x2_t;
+
+typedef struct uint16x4x2_t
+{
+  uint16x4_t val[2];
+} uint16x4x2_t;
+
+typedef struct uint16x8x2_t
+{
+  uint16x8_t val[2];
+} uint16x8x2_t;
+
+typedef struct uint32x2x2_t
+{
+  uint32x2_t val[2];
+} uint32x2x2_t;
+
+typedef struct uint32x4x2_t
+{
+  uint32x4_t val[2];
+} uint32x4x2_t;
+
+typedef struct uint64x1x2_t
+{
+  uint64x1_t val[2];
+} uint64x1x2_t;
+
+typedef struct uint64x2x2_t
+{
+  uint64x2_t val[2];
+} uint64x2x2_t;
+
+typedef struct float32x2x2_t
+{
+  float32x2_t val[2];
+} float32x2x2_t;
+
+typedef struct float32x4x2_t
+{
+  float32x4_t val[2];
+} float32x4x2_t;
+
+typedef struct poly8x8x2_t
+{
+  poly8x8_t val[2];
+} poly8x8x2_t;
+
+typedef struct poly8x16x2_t
+{
+  poly8x16_t val[2];
+} poly8x16x2_t;
+
+typedef struct poly16x4x2_t
+{
+  poly16x4_t val[2];
+} poly16x4x2_t;
+
+typedef struct poly16x8x2_t
+{
+  poly16x8_t val[2];
+} poly16x8x2_t;
+
+typedef struct int8x8x3_t
+{
+  int8x8_t val[3];
+} int8x8x3_t;
+
+typedef struct int8x16x3_t
+{
+  int8x16_t val[3];
+} int8x16x3_t;
+
+typedef struct int16x4x3_t
+{
+  int16x4_t val[3];
+} int16x4x3_t;
+
+typedef struct int16x8x3_t
+{
+  int16x8_t val[3];
+} int16x8x3_t;
+
+typedef struct int32x2x3_t
+{
+  int32x2_t val[3];
+} int32x2x3_t;
+
+typedef struct int32x4x3_t
+{
+  int32x4_t val[3];
+} int32x4x3_t;
+
+typedef struct int64x1x3_t
+{
+  int64x1_t val[3];
+} int64x1x3_t;
+
+typedef struct int64x2x3_t
+{
+  int64x2_t val[3];
+} int64x2x3_t;
+
+typedef struct uint8x8x3_t
+{
+  uint8x8_t val[3];
+} uint8x8x3_t;
+
+typedef struct uint8x16x3_t
+{
+  uint8x16_t val[3];
+} uint8x16x3_t;
+
+typedef struct uint16x4x3_t
+{
+  uint16x4_t val[3];
+} uint16x4x3_t;
+
+typedef struct uint16x8x3_t
+{
+  uint16x8_t val[3];
+} uint16x8x3_t;
+
+typedef struct uint32x2x3_t
+{
+  uint32x2_t val[3];
+} uint32x2x3_t;
+
+typedef struct uint32x4x3_t
+{
+  uint32x4_t val[3];
+} uint32x4x3_t;
+
+typedef struct uint64x1x3_t
+{
+  uint64x1_t val[3];
+} uint64x1x3_t;
+
+typedef struct uint64x2x3_t
+{
+  uint64x2_t val[3];
+} uint64x2x3_t;
+
+typedef struct float32x2x3_t
+{
+  float32x2_t val[3];
+} float32x2x3_t;
+
+typedef struct float32x4x3_t
+{
+  float32x4_t val[3];
+} float32x4x3_t;
+
+typedef struct poly8x8x3_t
+{
+  poly8x8_t val[3];
+} poly8x8x3_t;
+
+typedef struct poly8x16x3_t
+{
+  poly8x16_t val[3];
+} poly8x16x3_t;
+
+typedef struct poly16x4x3_t
+{
+  poly16x4_t val[3];
+} poly16x4x3_t;
+
+typedef struct poly16x8x3_t
+{
+  poly16x8_t val[3];
+} poly16x8x3_t;
+
+typedef struct int8x8x4_t
+{
+  int8x8_t val[4];
+} int8x8x4_t;
+
+typedef struct int8x16x4_t
+{
+  int8x16_t val[4];
+} int8x16x4_t;
+
+typedef struct int16x4x4_t
+{
+  int16x4_t val[4];
+} int16x4x4_t;
+
+typedef struct int16x8x4_t
+{
+  int16x8_t val[4];
+} int16x8x4_t;
+
+typedef struct int32x2x4_t
+{
+  int32x2_t val[4];
+} int32x2x4_t;
+
+typedef struct int32x4x4_t
+{
+  int32x4_t val[4];
+} int32x4x4_t;
+
+typedef struct int64x1x4_t
+{
+  int64x1_t val[4];
+} int64x1x4_t;
+
+typedef struct int64x2x4_t
+{
+  int64x2_t val[4];
+} int64x2x4_t;
+
+typedef struct uint8x8x4_t
+{
+  uint8x8_t val[4];
+} uint8x8x4_t;
+
+typedef struct uint8x16x4_t
+{
+  uint8x16_t val[4];
+} uint8x16x4_t;
+
+typedef struct uint16x4x4_t
+{
+  uint16x4_t val[4];
+} uint16x4x4_t;
+
+typedef struct uint16x8x4_t
+{
+  uint16x8_t val[4];
+} uint16x8x4_t;
+
+typedef struct uint32x2x4_t
+{
+  uint32x2_t val[4];
+} uint32x2x4_t;
+
+typedef struct uint32x4x4_t
+{
+  uint32x4_t val[4];
+} uint32x4x4_t;
+
+typedef struct uint64x1x4_t
+{
+  uint64x1_t val[4];
+} uint64x1x4_t;
+
+typedef struct uint64x2x4_t
+{
+  uint64x2_t val[4];
+} uint64x2x4_t;
+
+typedef struct float32x2x4_t
+{
+  float32x2_t val[4];
+} float32x2x4_t;
+
+typedef struct float32x4x4_t
+{
+  float32x4_t val[4];
+} float32x4x4_t;
+
+typedef struct poly8x8x4_t
+{
+  poly8x8_t val[4];
+} poly8x8x4_t;
+
+typedef struct poly8x16x4_t
+{
+  poly8x16_t val[4];
+} poly8x16x4_t;
+
+typedef struct poly16x4x4_t
+{
+  poly16x4_t val[4];
+} poly16x4x4_t;
+
+typedef struct poly16x8x4_t
+{
+  poly16x8_t val[4];
+} poly16x8x4_t;
+
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vadd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vadd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vaddq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vaddlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vaddlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vaddlv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vaddw_s8 (int16x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vaddw_s16 (int32x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vaddw_s32 (int64x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vaddwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vaddwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vaddwv2si ((int64x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vhadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vhadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vhadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vhaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vhaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vhaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrhadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrhadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrhadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqadd_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqadddi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqaddq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqaddq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqaddq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqaddq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vaddhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vaddhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vaddhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vraddhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vraddhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vraddhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmul_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmul_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmul_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmulq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (poly8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 2);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmull_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmull_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmullv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vmullv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmull_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmlslv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlslv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlslv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vsub_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsub_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vsubq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vsublv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vsublv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vsublv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsubw_s8 (int16x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsubw_s16 (int32x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsubw_s32 (int64x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vsubwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vsubwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vsubwv2si ((int64x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vhsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vhsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vhsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vhsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vhsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vhsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vhsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vhsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vhsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vhsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vhsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vhsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vhsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vhsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vhsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vhsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vhsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vhsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqsub_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqsub_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqsub_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqsub_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqsubdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqsubq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqsubq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqsubq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqsubq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsubhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsubhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsubhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrsubhn_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrsubhn_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrsubhn_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceq_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceq_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 2);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcge_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcle_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclt_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcage_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcale_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtst_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vtst_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vtst_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtst_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtstq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vtstq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vabd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vabd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vabdv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vabd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vabdv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vabd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vabdv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabdq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vabdq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vabdq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vabdv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vabdv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vabdv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabdl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabdl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabdl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabdl_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vabdlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabdl_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vabdlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabdl_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vabdlv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vabav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vabav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vabav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vabav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vabav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vabav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vabalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vabalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vabalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmax_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmax_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmax_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmax_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmax_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmax_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmax_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmaxq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmaxq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmaxq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmaxq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vmaxv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmaxv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmaxv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmin_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmin_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmin_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmin_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmin_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmin_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmin_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vminv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vminq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vminq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vminq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vminq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vminq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vminv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vminq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vminv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vminq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vminv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpadd_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadd_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadd_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpadd_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpaddl_s8 (int8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpaddl_s16 (int16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpaddl_s32 (int32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpaddl_u8 (uint8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vpaddlv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpaddl_u16 (uint16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vpaddlv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vpaddl_u32 (uint32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vpaddlv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpaddlq_s8 (int8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpaddlq_s16 (int16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpaddlq_s32 (int32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpaddlq_u8 (uint8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vpaddlv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpaddlq_u16 (uint16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vpaddlv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpaddlq_u32 (uint32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vpaddlv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpadal_s8 (int16x4_t __a, int8x8_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpadal_s16 (int32x2_t __a, int16x4_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vpadal_s32 (int64x1_t __a, int32x2_t __b)
+{
+  return (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpadal_u8 (uint16x4_t __a, uint8x8_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpadalv8qi ((int16x4_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpadal_u16 (uint32x2_t __a, uint16x4_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpadalv4hi ((int32x2_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vpadal_u32 (uint64x1_t __a, uint32x2_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vpadalv2si ((int64x1_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vpadalq_s8 (int16x8_t __a, int8x16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vpadalq_s16 (int32x4_t __a, int16x8_t __b)
+{
+  return (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vpadalq_s32 (int64x2_t __a, int32x4_t __b)
+{
+  return (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vpadalq_u8 (uint16x8_t __a, uint8x16_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vpadalv16qi ((int16x8_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vpadalq_u16 (uint32x4_t __a, uint16x8_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vpadalv8hi ((int32x4_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vpadalq_u32 (uint64x2_t __a, uint32x4_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vpadalv4si ((int64x2_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpmax_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpmax_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpmax_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmax_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpmax_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vpmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpmax_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpmax_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vpmin_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vpmin_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vpmin_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vpmin_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vpmin_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vpminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vpmin_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vpminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vpmin_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vpminv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrecps_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrsqrts_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 3);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  return (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 3);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vshldi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vshldi (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqrshl_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrshl_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrshl_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqrshl_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshr_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshr_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshr_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshr_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshr_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshr_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshr_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshr_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshrq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshrq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshrq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshrq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshrq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshrq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshrq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshr_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshr_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshr_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrshr_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshr_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshr_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshr_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrshr_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrshrq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrshrq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrshrq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrshrq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrshrq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrshrq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrshrq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrshrq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqrshrn_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrshrn_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrshrn_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshrn_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshrn_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshrn_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 4);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshrun_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshrun_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqrshrun_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 5);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqrshrun_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 5);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqrshrun_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 5);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vshl_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vshl_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vshl_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vshl_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vshl_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vshl_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vshl_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vshlq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshlq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshlq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshlq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vshlq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshlq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshlq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshlq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqshl_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqshl_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqshl_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqshl_n_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vqshl_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshl_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshl_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshl_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshl_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshl_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshl_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshl_n_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshl_ndi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqshlq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqshlq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqshlq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqshlq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshlq_n_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshl_nv16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshlq_n_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshl_nv8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshlq_n_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshl_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshlq_n_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshl_nv2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqshlu_n_s8 (int8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqshlu_n_s16 (int16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqshlu_n_s32 (int32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vqshlu_n_s64 (int64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vqshlu_ndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vqshluq_n_s8 (int8x16_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vqshluq_n_s16 (int16x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vqshluq_n_s32 (int32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vqshluq_n_s64 (int64x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vshll_n_s8 (int8x8_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vshll_n_s16 (int16x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vshll_n_s32 (int32x2_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vshll_n_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vshll_nv8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vshll_n_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vshll_nv4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vshll_n_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vshll_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 4);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 5);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 4);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 4);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsri_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vsli_ndi ((int64x1_t) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabs_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vabsv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabs_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vabsv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabs_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vabsv2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vabs_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vabsv2sf (__a, 3);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vabsv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vabsv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vabsv4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vabsq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vabsv4sf (__a, 3);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqabs_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqabs_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqabs_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vqabsv2si (__a, 1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqabsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqabsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqabsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vqabsv4si (__a, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vneg_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vnegv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vneg_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vnegv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vneg_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vnegv2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vneg_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vnegv2sf (__a, 3);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vnegq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vnegv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vnegq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vnegv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vnegq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vnegv4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vnegq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vnegv4sf (__a, 3);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqneg_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqneg_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqneg_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vqnegv2si (__a, 1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vqnegq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqnegq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqnegq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vqnegv4si (__a, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmvn_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmvn_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmvn_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vmvnv2si (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmvn_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmvn_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmvn_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmvn_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmvnq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmvnq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmvnq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vmvnv4si (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmvnq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmvnq_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmvnq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmvnq_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcls_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vclsv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcls_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vclsv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcls_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vclsv2si (__a, 1);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclsq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vclsv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclsq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vclsv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclsq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vclsv4si (__a, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vclz_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vclzv8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vclz_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vclzv4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vclz_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vclzv2si (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclz_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclz_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclz_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vclzq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vclzv16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vclzq_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vclzv8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vclzq_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vclzv4si (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclzq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclzq_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclzq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcnt_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vcntv8qi (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcnt_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcnt_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcntq_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vcntv16qi (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcntq_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcntq_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrecpe_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrecpev2sf (__a, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrecpe_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrecpeq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrecpev4sf (__a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrecpeq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrsqrte_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 3);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrsqrte_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrsqrteq_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 3);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrsqrteq_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vget_lane_s8 (int8x8_t __a, const int __b)
+{
+  return (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vget_lane_s16 (int16x4_t __a, const int __b)
+{
+  return (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vget_lane_s32 (int32x2_t __a, const int __b)
+{
+  return (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vget_lane_f32 (float32x2_t __a, const int __b)
+{
+  return (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vget_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vget_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vget_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32_t)__builtin_neon_vget_lanev2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vget_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return (poly8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 2);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vget_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return (poly16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 2);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vget_lane_s64 (int64x1_t __a, const int __b)
+{
+  return (int64_t)__builtin_neon_vget_lanedi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vget_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vgetq_lane_s8 (int8x16_t __a, const int __b)
+{
+  return (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vgetq_lane_s16 (int16x8_t __a, const int __b)
+{
+  return (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vgetq_lane_s32 (int32x4_t __a, const int __b)
+{
+  return (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vgetq_lane_f32 (float32x4_t __a, const int __b)
+{
+  return (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 3);
+}
+
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vgetq_lane_u8 (uint8x16_t __a, const int __b)
+{
+  return (uint8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vgetq_lane_u16 (uint16x8_t __a, const int __b)
+{
+  return (uint16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vgetq_lane_u32 (uint32x4_t __a, const int __b)
+{
+  return (uint32_t)__builtin_neon_vget_lanev4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline poly8_t __attribute__ ((__always_inline__))
+vgetq_lane_p8 (poly8x16_t __a, const int __b)
+{
+  return (poly8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 2);
+}
+
+__extension__ static __inline poly16_t __attribute__ ((__always_inline__))
+vgetq_lane_p16 (poly16x8_t __a, const int __b)
+{
+  return (poly16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 2);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vgetq_lane_s64 (int64x2_t __a, const int __b)
+{
+  return (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vgetq_lane_u64 (uint64x2_t __a, const int __b)
+{
+  return (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vcreate_s8 (uint64_t __a)
+{
+  return (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vcreate_s16 (uint64_t __a)
+{
+  return (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcreate_s32 (uint64_t __a)
+{
+  return (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vcreate_s64 (uint64_t __a)
+{
+  return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcreate_f32 (uint64_t __a)
+{
+  return (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcreate_u8 (uint64_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcreate_u16 (uint64_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcreate_u32 (uint64_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcreate_u64 (uint64_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vcreate_p8 (uint64_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vcreate_p16 (uint64_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_n_s8 (int8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_n_s16 (int16_t __a)
+{
+  return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_n_s32 (int32_t __a)
+{
+  return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_n_f32 (float32_t __a)
+{
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_n_u8 (uint8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_n_u16 (uint16_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_n_u32 (uint32_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_n_p8 (poly8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_n_p16 (poly16_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_n_s64 (int64_t __a)
+{
+  return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_n_u64 (uint64_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_n_s8 (int8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_n_s16 (int16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_n_s32 (int32_t __a)
+{
+  return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_n_f32 (float32_t __a)
+{
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_n_u8 (uint8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_n_u16 (uint16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_n_u32 (uint32_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_n_p8 (poly8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_n_p16 (poly16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_n_s64 (int64_t __a)
+{
+  return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_n_u64 (uint64_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmov_n_s8 (int8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmov_n_s16 (int16_t __a)
+{
+  return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmov_n_s32 (int32_t __a)
+{
+  return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmov_n_f32 (float32_t __a)
+{
+  return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmov_n_u8 (uint8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmov_n_u16 (uint16_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmov_n_u32 (uint32_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vmov_n_p8 (poly8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vmov_n_p16 (poly16_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vmov_n_s64 (int64_t __a)
+{
+  return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vmov_n_u64 (uint64_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vmovq_n_s8 (int8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovq_n_s16 (int16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovq_n_s32 (int32_t __a)
+{
+  return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmovq_n_f32 (float32_t __a)
+{
+  return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vmovq_n_u8 (uint8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovq_n_u16 (uint16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovq_n_u32 (uint32_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vmovq_n_p8 (poly8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vmovq_n_p16 (poly16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovq_n_s64 (int64_t __a)
+{
+  return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovq_n_u64 (uint64_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vdup_lane_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vdup_lane_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vdup_lane_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vdup_lane_f32 (float32x2_t __a, const int __b)
+{
+  return (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vdup_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vdup_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vdup_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vdup_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vdup_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vdup_lane_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x1_t)__builtin_neon_vdup_lanedi (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vdup_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x1_t)__builtin_neon_vdup_lanedi ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_s8 (int8x8_t __a, const int __b)
+{
+  return (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_s16 (int16x4_t __a, const int __b)
+{
+  return (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_s32 (int32x2_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_f32 (float32x2_t __a, const int __b)
+{
+  return (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_u8 (uint8x8_t __a, const int __b)
+{
+  return (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_u16 (uint16x4_t __a, const int __b)
+{
+  return (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vdupq_lane_u32 (uint32x2_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vdupq_lane_p8 (poly8x8_t __a, const int __b)
+{
+  return (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_p16 (poly16x4_t __a, const int __b)
+{
+  return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_s64 (int64x1_t __a, const int __b)
+{
+  return (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vdupq_lane_u64 (uint64x1_t __a, const int __b)
+{
+  return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vcombine_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vcombine_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcombine_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x4_t)__builtin_neon_vcombinev2si (__a, __b);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vcombine_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcombine_f32 (float32x2_t __a, float32x2_t __b)
+{
+  return (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vcombinedi ((int64x1_t) __a, (int64x1_t) __b);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_high_s8 (int8x16_t __a)
+{
+  return (int8x8_t)__builtin_neon_vget_highv16qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_high_s16 (int16x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vget_highv8hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_high_s32 (int32x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vget_highv4si (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_high_s64 (int64x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vget_highv2di (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_high_f32 (float32x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vget_highv4sf (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_high_u8 (uint8x16_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_high_u16 (uint16x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_high_u32 (uint32x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_high_u64 (uint64x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_high_p8 (poly8x16_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_high_p16 (poly16x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vget_low_s8 (int8x16_t __a)
+{
+  return (int8x8_t)__builtin_neon_vget_lowv16qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vget_low_s16 (int16x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vget_lowv8hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vget_low_s32 (int32x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vget_low_f32 (float32x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vget_lowv4sf (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vget_low_u8 (uint8x16_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vget_low_u16 (uint16x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vget_low_u32 (uint32x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vget_low_p8 (poly8x16_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vget_low_p16 (poly16x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vget_low_s64 (int64x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vget_lowv2di (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vget_low_u64 (uint64x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvt_s32_f32 (float32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_s32 (int32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vcvtv2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_f32_u32 (uint32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vcvtv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvt_u32_f32 (float32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtq_s32_f32 (float32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_f32_s32 (int32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_f32_u32 (uint32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vcvtv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtq_u32_f32 (float32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vcvt_n_s32_f32 (float32x2_t __a, const int __b)
+{
+  return (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_n_f32_s32 (int32x2_t __a, const int __b)
+{
+  return (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vcvt_n_f32_u32 (uint32x2_t __a, const int __b)
+{
+  return (float32x2_t)__builtin_neon_vcvt_nv2si ((int32x2_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcvt_n_u32_f32 (float32x2_t __a, const int __b)
+{
+  return (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_s32_f32 (float32x4_t __a, const int __b)
+{
+  return (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_f32_s32 (int32x4_t __a, const int __b)
+{
+  return (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_f32_u32 (uint32x4_t __a, const int __b)
+{
+  return (float32x4_t)__builtin_neon_vcvt_nv4si ((int32x4_t) __a, __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcvtq_n_u32_f32 (float32x4_t __a, const int __b)
+{
+  return (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vmovn_s16 (int16x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmovn_s32 (int32x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vmovnv4si (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmovn_s64 (int64x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vmovnv2di (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vmovn_u16 (uint16x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmovn_u32 (uint32x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmovn_u64 (uint64x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vqmovn_s16 (int16x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqmovn_s32 (int32x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqmovn_s64 (int64x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqmovn_u16 (uint16x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vqmovnv8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqmovn_u32 (uint32x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vqmovnv4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqmovn_u64 (uint64x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vqmovnv2di ((int64x2_t) __a, 0);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vqmovun_s16 (int16x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vqmovun_s32 (int32x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vqmovun_s64 (int64x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmovl_s8 (int8x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmovl_s16 (int16x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmovl_s32 (int32x2_t __a)
+{
+  return (int64x2_t)__builtin_neon_vmovlv2si (__a, 1);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmovl_u8 (uint8x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vmovlv8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmovl_u16 (uint16x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vmovlv4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmovl_u32 (uint32x2_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vmovlv2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl1_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl1_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl1_p8 (poly8x8_t __a, uint8x8_t __b)
+{
+  return (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl2_s8 (int8x8x2_t __a, int8x8_t __b)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
+  return (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
+  return (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a };
+  return (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl3_s8 (int8x8x3_t __a, int8x8_t __b)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
+  return (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
+  return (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a };
+  return (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbl4_s8 (int8x8x4_t __a, int8x8_t __b)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
+  return (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
+  return (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a };
+  return (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c)
+{
+  return (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  return (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  return (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  return (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  return (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  return (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  return (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  return (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  return (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  return (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmlal_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint64x2_t)__builtin_neon_vmlal_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d)
+{
+  return (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d)
+{
+  return (uint32x4_t)__builtin_neon_vmlsl_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d)
+{
+  return (uint64x2_t)__builtin_neon_vmlsl_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vmull_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vmull_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmul_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmul_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmul_n_f32 (float32x2_t __a, float32_t __b)
+{
+  return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmul_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmul_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmulq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmulq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulq_n_f32 (float32x4_t __a, float32_t __b)
+{
+  return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int32x4_t)__builtin_neon_vmull_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int64x2_t)__builtin_neon_vmull_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmull_n_u16 (uint16x4_t __a, uint16_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vmull_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmull_n_u32 (uint32x2_t __a, uint32_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vmull_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmull_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmull_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqdmulhq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmulhq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqdmulh_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqdmulh_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
+{
+  return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 5);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
+{
+  return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmulh_n_s16 (int16x4_t __a, int16_t __b)
+{
+  return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 5);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmulh_n_s32 (int32x2_t __a, int32_t __b)
+{
+  return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 5);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlal_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlal_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c)
+{
+  return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+  return (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
+{
+  return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vmlsl_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vmlsl_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
+{
+  return (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
+{
+  return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vext_s8 (int8x8_t __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vext_s16 (int16x4_t __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vext_s32 (int32x2_t __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vext_s64 (int64x1_t __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vextdi (__a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vext_f32 (float32x2_t __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vextdi ((int64x1_t) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev64_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vrev64v8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev64_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vrev64v4hi (__a, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vrev64_s32 (int32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vrev64v2si (__a, 1);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vrev64_f32 (float32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vrev64v2sf (__a, 3);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev64_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev64_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vrev64_u32 (uint32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vrev64v2si ((int32x2_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev64_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev64_p16 (poly16x4_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev64q_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vrev64v16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev64q_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vrev64v8hi (__a, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vrev64q_s32 (int32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vrev64v4si (__a, 1);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vrev64q_f32 (float32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vrev64v4sf (__a, 3);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev64q_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev64q_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vrev64q_u32 (uint32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vrev64v4si ((int32x4_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev64q_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev64q_p16 (poly16x8_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev32_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vrev32v8qi (__a, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vrev32_s16 (int16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vrev32v4hi (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev32_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vrev32_u16 (uint16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev32_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vrev32_p16 (poly16x4_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev32q_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vrev32v16qi (__a, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vrev32q_s16 (int16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vrev32v8hi (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev32q_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vrev32q_u16 (uint16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev32q_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vrev32q_p16 (poly16x8_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrev16_s8 (int8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vrev16v8qi (__a, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrev16_u8 (uint8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 0);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrev16_p8 (poly8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 2);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrev16q_s8 (int8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vrev16v16qi (__a, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrev16q_u8 (uint8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 0);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrev16q_p8 (poly8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 2);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
+{
+  return (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
+{
+  return (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
+{
+  return (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
+{
+  return (int64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
+{
+  return (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+{
+  return (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+{
+  return (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+{
+  return (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
+{
+  return (uint64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
+{
+  return (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
+{
+  return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+  return (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+  return (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+  return (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+  return (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
+{
+  return (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+  return (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+  return (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+  return (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+  return (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
+{
+  return (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
+{
+  return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vtrn_s8 (int8x8_t __a, int8x8_t __b)
+{
+  int8x8x2_t __rv;
+  __builtin_neon_vtrnv8qi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vtrn_s16 (int16x4_t __a, int16x4_t __b)
+{
+  int16x4x2_t __rv;
+  __builtin_neon_vtrnv4hi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vtrn_s32 (int32x2_t __a, int32x2_t __b)
+{
+  int32x2x2_t __rv;
+  __builtin_neon_vtrnv2si (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vtrn_f32 (float32x2_t __a, float32x2_t __b)
+{
+  float32x2x2_t __rv;
+  __builtin_neon_vtrnv2sf (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  uint8x8x2_t __rv;
+  __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  uint16x4x2_t __rv;
+  __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  uint32x2x2_t __rv;
+  __builtin_neon_vtrnv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly8x8x2_t __rv;
+  __builtin_neon_vtrnv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  poly16x4x2_t __rv;
+  __builtin_neon_vtrnv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  int8x16x2_t __rv;
+  __builtin_neon_vtrnv16qi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  int16x8x2_t __rv;
+  __builtin_neon_vtrnv8hi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  int32x4x2_t __rv;
+  __builtin_neon_vtrnv4si (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  float32x4x2_t __rv;
+  __builtin_neon_vtrnv4sf (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16x2_t __rv;
+  __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8x2_t __rv;
+  __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4x2_t __rv;
+  __builtin_neon_vtrnv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly8x16x2_t __rv;
+  __builtin_neon_vtrnv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  poly16x8x2_t __rv;
+  __builtin_neon_vtrnv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vzip_s8 (int8x8_t __a, int8x8_t __b)
+{
+  int8x8x2_t __rv;
+  __builtin_neon_vzipv8qi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vzip_s16 (int16x4_t __a, int16x4_t __b)
+{
+  int16x4x2_t __rv;
+  __builtin_neon_vzipv4hi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vzip_s32 (int32x2_t __a, int32x2_t __b)
+{
+  int32x2x2_t __rv;
+  __builtin_neon_vzipv2si (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vzip_f32 (float32x2_t __a, float32x2_t __b)
+{
+  float32x2x2_t __rv;
+  __builtin_neon_vzipv2sf (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vzip_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  uint8x8x2_t __rv;
+  __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vzip_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  uint16x4x2_t __rv;
+  __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vzip_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  uint32x2x2_t __rv;
+  __builtin_neon_vzipv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vzip_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly8x8x2_t __rv;
+  __builtin_neon_vzipv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vzip_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  poly16x4x2_t __rv;
+  __builtin_neon_vzipv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vzipq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  int8x16x2_t __rv;
+  __builtin_neon_vzipv16qi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vzipq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  int16x8x2_t __rv;
+  __builtin_neon_vzipv8hi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vzipq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  int32x4x2_t __rv;
+  __builtin_neon_vzipv4si (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vzipq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  float32x4x2_t __rv;
+  __builtin_neon_vzipv4sf (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16x2_t __rv;
+  __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8x2_t __rv;
+  __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4x2_t __rv;
+  __builtin_neon_vzipv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly8x16x2_t __rv;
+  __builtin_neon_vzipv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  poly16x8x2_t __rv;
+  __builtin_neon_vzipv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vuzp_s8 (int8x8_t __a, int8x8_t __b)
+{
+  int8x8x2_t __rv;
+  __builtin_neon_vuzpv8qi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vuzp_s16 (int16x4_t __a, int16x4_t __b)
+{
+  int16x4x2_t __rv;
+  __builtin_neon_vuzpv4hi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vuzp_s32 (int32x2_t __a, int32x2_t __b)
+{
+  int32x2x2_t __rv;
+  __builtin_neon_vuzpv2si (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vuzp_f32 (float32x2_t __a, float32x2_t __b)
+{
+  float32x2x2_t __rv;
+  __builtin_neon_vuzpv2sf (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  uint8x8x2_t __rv;
+  __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  uint16x4x2_t __rv;
+  __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  uint32x2x2_t __rv;
+  __builtin_neon_vuzpv2si ((int32x2_t *) &__rv.val[0], (int32x2_t) __a, (int32x2_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  poly8x8x2_t __rv;
+  __builtin_neon_vuzpv8qi ((int8x8_t *) &__rv.val[0], (int8x8_t) __a, (int8x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  poly16x4x2_t __rv;
+  __builtin_neon_vuzpv4hi ((int16x4_t *) &__rv.val[0], (int16x4_t) __a, (int16x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vuzpq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  int8x16x2_t __rv;
+  __builtin_neon_vuzpv16qi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  int16x8x2_t __rv;
+  __builtin_neon_vuzpv8hi (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vuzpq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  int32x4x2_t __rv;
+  __builtin_neon_vuzpv4si (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vuzpq_f32 (float32x4_t __a, float32x4_t __b)
+{
+  float32x4x2_t __rv;
+  __builtin_neon_vuzpv4sf (&__rv.val[0], __a, __b);
+  return __rv;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  uint8x16x2_t __rv;
+  __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  uint16x8x2_t __rv;
+  __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  uint32x4x2_t __rv;
+  __builtin_neon_vuzpv4si ((int32x4_t *) &__rv.val[0], (int32x4_t) __a, (int32x4_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  poly8x16x2_t __rv;
+  __builtin_neon_vuzpv16qi ((int8x16_t *) &__rv.val[0], (int8x16_t) __a, (int8x16_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  poly16x8x2_t __rv;
+  __builtin_neon_vuzpv8hi ((int16x8_t *) &__rv.val[0], (int16x8_t) __a, (int16x8_t) __b);
+  return __rv;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_s8 (const int8_t * __a)
+{
+  return (int8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_s16 (const int16_t * __a)
+{
+  return (int16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_s32 (const int32_t * __a)
+{
+  return (int32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_s64 (const int64_t * __a)
+{
+  return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_f32 (const float32_t * __a)
+{
+  return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_u8 (const uint8_t * __a)
+{
+  return (uint8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_u16 (const uint16_t * __a)
+{
+  return (uint16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_u32 (const uint32_t * __a)
+{
+  return (uint32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_u64 (const uint64_t * __a)
+{
+  return (uint64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_p8 (const poly8_t * __a)
+{
+  return (poly8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_p16 (const poly16_t * __a)
+{
+  return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_s8 (const int8_t * __a)
+{
+  return (int8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_s16 (const int16_t * __a)
+{
+  return (int16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_s32 (const int32_t * __a)
+{
+  return (int32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_s64 (const int64_t * __a)
+{
+  return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_f32 (const float32_t * __a)
+{
+  return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_u8 (const uint8_t * __a)
+{
+  return (uint8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_u16 (const uint16_t * __a)
+{
+  return (uint16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_u32 (const uint32_t * __a)
+{
+  return (uint32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_u64 (const uint64_t * __a)
+{
+  return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_p8 (const poly8_t * __a)
+{
+  return (poly8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_p16 (const poly16_t * __a)
+{
+  return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c)
+{
+  return (int8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_lane_s16 (const int16_t * __a, int16x4_t __b, const int __c)
+{
+  return (int16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c)
+{
+  return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c)
+{
+  return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c)
+{
+  return (uint8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c)
+{
+  return (uint16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c)
+{
+  return (uint32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c)
+{
+  return (poly8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c)
+{
+  return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c)
+{
+  return (int64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c)
+{
+  return (uint64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c)
+{
+  return (int8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c)
+{
+  return (int16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c)
+{
+  return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c)
+{
+  return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c)
+{
+  return (uint8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c)
+{
+  return (uint16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c)
+{
+  return (uint32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c)
+{
+  return (poly8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c)
+{
+  return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c)
+{
+  return (int64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c)
+{
+  return (uint64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vld1_dup_s8 (const int8_t * __a)
+{
+  return (int8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vld1_dup_s16 (const int16_t * __a)
+{
+  return (int16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vld1_dup_s32 (const int32_t * __a)
+{
+  return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vld1_dup_f32 (const float32_t * __a)
+{
+  return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vld1_dup_u8 (const uint8_t * __a)
+{
+  return (uint8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vld1_dup_u16 (const uint16_t * __a)
+{
+  return (uint16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vld1_dup_u32 (const uint32_t * __a)
+{
+  return (uint32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vld1_dup_p8 (const poly8_t * __a)
+{
+  return (poly8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vld1_dup_p16 (const poly16_t * __a)
+{
+  return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vld1_dup_s64 (const int64_t * __a)
+{
+  return (int64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vld1_dup_u64 (const uint64_t * __a)
+{
+  return (uint64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_s8 (const int8_t * __a)
+{
+  return (int8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_s16 (const int16_t * __a)
+{
+  return (int16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_s32 (const int32_t * __a)
+{
+  return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_f32 (const float32_t * __a)
+{
+  return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_u8 (const uint8_t * __a)
+{
+  return (uint8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_u16 (const uint16_t * __a)
+{
+  return (uint16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vld1q_dup_u32 (const uint32_t * __a)
+{
+  return (uint32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vld1q_dup_p8 (const poly8_t * __a)
+{
+  return (poly8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vld1q_dup_p16 (const poly16_t * __a)
+{
+  return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_s64 (const int64_t * __a)
+{
+  return (int64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vld1q_dup_u64 (const uint64_t * __a)
+{
+  return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s8 (int8_t * __a, int8x8_t __b)
+{
+  __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s16 (int16_t * __a, int16x4_t __b)
+{
+  __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s32 (int32_t * __a, int32x2_t __b)
+{
+  __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_s64 (int64_t * __a, int64x1_t __b)
+{
+  __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f32 (float32_t * __a, float32x2_t __b)
+{
+  __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u8 (uint8_t * __a, uint8x8_t __b)
+{
+  __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u16 (uint16_t * __a, uint16x4_t __b)
+{
+  __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u32 (uint32_t * __a, uint32x2_t __b)
+{
+  __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, (int32x2_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_u64 (uint64_t * __a, uint64x1_t __b)
+{
+  __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p8 (poly8_t * __a, poly8x8_t __b)
+{
+  __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_p16 (poly16_t * __a, poly16x4_t __b)
+{
+  __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s8 (int8_t * __a, int8x16_t __b)
+{
+  __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s16 (int16_t * __a, int16x8_t __b)
+{
+  __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s32 (int32_t * __a, int32x4_t __b)
+{
+  __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_s64 (int64_t * __a, int64x2_t __b)
+{
+  __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f32 (float32_t * __a, float32x4_t __b)
+{
+  __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u8 (uint8_t * __a, uint8x16_t __b)
+{
+  __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u16 (uint16_t * __a, uint16x8_t __b)
+{
+  __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u32 (uint32_t * __a, uint32x4_t __b)
+{
+  __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, (int32x4_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_u64 (uint64_t * __a, uint64x2_t __b)
+{
+  __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p8 (poly8_t * __a, poly8x16_t __b)
+{
+  __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_p16 (poly16_t * __a, poly16x8_t __b)
+{
+  __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, (int32x2_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, (int64x1_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, (int32x4_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c);
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_s8 (const int8_t * __a)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_s16 (const int16_t * __a)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_s32 (const int32_t * __a)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_f32 (const float32_t * __a)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_u8 (const uint8_t * __a)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_u16 (const uint16_t * __a)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_u32 (const uint32_t * __a)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_p8 (const poly8_t * __a)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_p16 (const poly16_t * __a)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
+vld2_s64 (const int64_t * __a)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
+vld2_u64 (const uint64_t * __a)
+{
+  union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
+vld2q_s8 (const int8_t * __a)
+{
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vld2q_s16 (const int16_t * __a)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vld2q_s32 (const int32_t * __a)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vld2q_f32 (const float32_t * __a)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
+vld2q_u8 (const uint8_t * __a)
+{
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vld2q_u16 (const uint16_t * __a)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vld2q_u32 (const uint32_t * __a)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
+vld2q_p8 (const poly8_t * __a)
+{
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vld2q_p16 (const poly16_t * __a)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
+vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
+vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
+vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
+vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_s8 (const int8_t * __a)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_s16 (const int16_t * __a)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_s32 (const int32_t * __a)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_f32 (const float32_t * __a)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_u8 (const uint8_t * __a)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_u16 (const uint16_t * __a)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
+vld2_dup_u32 (const uint32_t * __a)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
+vld2_dup_p8 (const poly8_t * __a)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
+vld2_dup_p16 (const poly16_t * __a)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_s64 (const int64_t * __a)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
+vld2_dup_u64 (const uint64_t * __a)
+{
+  union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s8 (int8_t * __a, int8x8x2_t __b)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s16 (int16_t * __a, int16x4x2_t __b)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s32 (int32_t * __a, int32x2x2_t __b)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f32 (float32_t * __a, float32x2x2_t __b)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u8 (uint8_t * __a, uint8x8x2_t __b)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u16 (uint16_t * __a, uint16x4x2_t __b)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u32 (uint32_t * __a, uint32x2x2_t __b)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p8 (poly8_t * __a, poly8x8x2_t __b)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_p16 (poly16_t * __a, poly16x4x2_t __b)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_s64 (int64_t * __a, int64x1x2_t __b)
+{
+  union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_u64 (uint64_t * __a, uint64x1x2_t __b)
+{
+  union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s8 (int8_t * __a, int8x16x2_t __b)
+{
+  union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s16 (int16_t * __a, int16x8x2_t __b)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_s32 (int32_t * __a, int32x4x2_t __b)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f32 (float32_t * __a, float32x4x2_t __b)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u8 (uint8_t * __a, uint8x16x2_t __b)
+{
+  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u16 (uint16_t * __a, uint16x8x2_t __b)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_u32 (uint32_t * __a, uint32x4x2_t __b)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p8 (poly8_t * __a, poly8x16x2_t __b)
+{
+  union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_p16 (poly16_t * __a, poly16x8x2_t __b)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c)
+{
+  union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c)
+{
+  union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c)
+{
+  union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
+{
+  union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c)
+{
+  union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c)
+{
+  union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c)
+{
+  union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c)
+{
+  union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c)
+{
+  union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c)
+{
+  union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c)
+{
+  union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
+{
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c)
+{
+  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c)
+{
+  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c)
+{
+  union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_s8 (const int8_t * __a)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_s16 (const int16_t * __a)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_s32 (const int32_t * __a)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_f32 (const float32_t * __a)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_u8 (const uint8_t * __a)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_u16 (const uint16_t * __a)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_u32 (const uint32_t * __a)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_p8 (const poly8_t * __a)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_p16 (const poly16_t * __a)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
+vld3_s64 (const int64_t * __a)
+{
+  union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
+vld3_u64 (const uint64_t * __a)
+{
+  union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
+vld3q_s8 (const int8_t * __a)
+{
+  union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
+vld3q_s16 (const int16_t * __a)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
+vld3q_s32 (const int32_t * __a)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
+vld3q_f32 (const float32_t * __a)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
+vld3q_u8 (const uint8_t * __a)
+{
+  union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
+vld3q_u16 (const uint16_t * __a)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
+vld3q_u32 (const uint32_t * __a)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
+vld3q_p8 (const poly8_t * __a)
+{
+  union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
+vld3q_p16 (const poly16_t * __a)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
+vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
+vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
+vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
+vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_s8 (const int8_t * __a)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_s16 (const int16_t * __a)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_s32 (const int32_t * __a)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_f32 (const float32_t * __a)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_u8 (const uint8_t * __a)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_u16 (const uint16_t * __a)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
+vld3_dup_u32 (const uint32_t * __a)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
+vld3_dup_p8 (const poly8_t * __a)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
+vld3_dup_p16 (const poly16_t * __a)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_s64 (const int64_t * __a)
+{
+  union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
+vld3_dup_u64 (const uint64_t * __a)
+{
+  union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s8 (int8_t * __a, int8x8x3_t __b)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s16 (int16_t * __a, int16x4x3_t __b)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s32 (int32_t * __a, int32x2x3_t __b)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f32 (float32_t * __a, float32x2x3_t __b)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u8 (uint8_t * __a, uint8x8x3_t __b)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u16 (uint16_t * __a, uint16x4x3_t __b)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u32 (uint32_t * __a, uint32x2x3_t __b)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p8 (poly8_t * __a, poly8x8x3_t __b)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_p16 (poly16_t * __a, poly16x4x3_t __b)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_s64 (int64_t * __a, int64x1x3_t __b)
+{
+  union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_u64 (uint64_t * __a, uint64x1x3_t __b)
+{
+  union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s8 (int8_t * __a, int8x16x3_t __b)
+{
+  union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s16 (int16_t * __a, int16x8x3_t __b)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_s32 (int32_t * __a, int32x4x3_t __b)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f32 (float32_t * __a, float32x4x3_t __b)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u8 (uint8_t * __a, uint8x16x3_t __b)
+{
+  union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u16 (uint16_t * __a, uint16x8x3_t __b)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_u32 (uint32_t * __a, uint32x4x3_t __b)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p8 (poly8_t * __a, poly8x16x3_t __b)
+{
+  union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_p16 (poly16_t * __a, poly16x8x3_t __b)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c)
+{
+  union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c)
+{
+  union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c)
+{
+  union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
+{
+  union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c)
+{
+  union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c)
+{
+  union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c)
+{
+  union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c)
+{
+  union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c)
+{
+  union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c)
+{
+  union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c)
+{
+  union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
+{
+  union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c)
+{
+  union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c)
+{
+  union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c)
+{
+  union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b };
+  __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_s8 (const int8_t * __a)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_s16 (const int16_t * __a)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_s32 (const int32_t * __a)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_f32 (const float32_t * __a)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_u8 (const uint8_t * __a)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_u16 (const uint16_t * __a)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_u32 (const uint32_t * __a)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_p8 (const poly8_t * __a)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_p16 (const poly16_t * __a)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
+vld4_s64 (const int64_t * __a)
+{
+  union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
+vld4_u64 (const uint64_t * __a)
+{
+  union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
+vld4q_s8 (const int8_t * __a)
+{
+  union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
+vld4q_s16 (const int16_t * __a)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
+vld4q_s32 (const int32_t * __a)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
+vld4q_f32 (const float32_t * __a)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
+vld4q_u8 (const uint8_t * __a)
+{
+  union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
+vld4q_u16 (const uint16_t * __a)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
+vld4q_u32 (const uint32_t * __a)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
+vld4q_p8 (const poly8_t * __a)
+{
+  union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
+vld4q_p16 (const poly16_t * __a)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
+vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
+vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
+vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
+vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c);
+  return __rv.__i;
+}
+
+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_s8 (const int8_t * __a)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_s16 (const int16_t * __a)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_s32 (const int32_t * __a)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_f32 (const float32_t * __a)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_u8 (const uint8_t * __a)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_u16 (const uint16_t * __a)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
+vld4_dup_u32 (const uint32_t * __a)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
+vld4_dup_p8 (const poly8_t * __a)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
+vld4_dup_p16 (const poly16_t * __a)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_s64 (const int64_t * __a)
+{
+  union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
+vld4_dup_u64 (const uint64_t * __a)
+{
+  union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a);
+  return __rv.__i;
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s8 (int8_t * __a, int8x8x4_t __b)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s16 (int16_t * __a, int16x4x4_t __b)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s32 (int32_t * __a, int32x2x4_t __b)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f32 (float32_t * __a, float32x2x4_t __b)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u8 (uint8_t * __a, uint8x8x4_t __b)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u16 (uint16_t * __a, uint16x4x4_t __b)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u32 (uint32_t * __a, uint32x2x4_t __b)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p8 (poly8_t * __a, poly8x8x4_t __b)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_p16 (poly16_t * __a, poly16x4x4_t __b)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_s64 (int64_t * __a, int64x1x4_t __b)
+{
+  union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_u64 (uint64_t * __a, uint64x1x4_t __b)
+{
+  union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s8 (int8_t * __a, int8x16x4_t __b)
+{
+  union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s16 (int16_t * __a, int16x8x4_t __b)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_s32 (int32_t * __a, int32x4x4_t __b)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f32 (float32_t * __a, float32x4x4_t __b)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u8 (uint8_t * __a, uint8x16x4_t __b)
+{
+  union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u16 (uint16_t * __a, uint16x8x4_t __b)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_u32 (uint32_t * __a, uint32x4x4_t __b)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p8 (poly8_t * __a, poly8x16x4_t __b)
+{
+  union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_p16 (poly16_t * __a, poly16x8x4_t __b)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c)
+{
+  union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c)
+{
+  union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c)
+{
+  union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
+{
+  union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c)
+{
+  union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c)
+{
+  union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c)
+{
+  union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c)
+{
+  union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c)
+{
+  union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c)
+{
+  union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c)
+{
+  union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
+{
+  union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c)
+{
+  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c)
+{
+  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c)
+{
+  union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b };
+  __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vand_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vand_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vand_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vand_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vand_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vand_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vand_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vand_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vandq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vandq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vandq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vandq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vandq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vandq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vandq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vandq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vorr_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vorr_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vorr_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vorr_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vorr_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vorr_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vorr_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vorr_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vorrq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vorrq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vorrq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vorrq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+veor_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+veor_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+veor_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+veor_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+veor_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+veor_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+veor_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_veordi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+veor_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+veorq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+veorq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+veorq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+veorq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+veorq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+veorq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+veorq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+veorq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vbic_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vbic_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vbic_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vbic_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vbic_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vbic_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vbic_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vbic_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vbicq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vbicq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vbicq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vbicq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vorn_s8 (int8x8_t __a, int8x8_t __b)
+{
+  return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vorn_s16 (int16x4_t __a, int16x4_t __b)
+{
+  return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vorn_s32 (int32x2_t __a, int32x2_t __b)
+{
+  return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vorn_u8 (uint8x8_t __a, uint8x8_t __b)
+{
+  return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vorn_u16 (uint16x4_t __a, uint16x4_t __b)
+{
+  return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vorn_u32 (uint32x2_t __a, uint32x2_t __b)
+{
+  return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vorn_s64 (int64x1_t __a, int64x1_t __b)
+{
+  return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vorn_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+  return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vornq_s8 (int8x16_t __a, int8x16_t __b)
+{
+  return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vornq_s16 (int16x8_t __a, int16x8_t __b)
+{
+  return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vornq_s32 (int32x4_t __a, int32x4_t __b)
+{
+  return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vornq_s64 (int64x2_t __a, int64x2_t __b)
+{
+  return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vornq_u8 (uint8x16_t __a, uint8x16_t __b)
+{
+  return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vornq_u16 (uint16x8_t __a, uint16x8_t __b)
+{
+  return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vornq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+  return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vornq_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+  return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
+}
+
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s8 (int8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s16 (int16x4_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s32 (int32x2_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_s64 (int64x1_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_f32 (float32x2_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u8 (uint8x8_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u16 (uint16x4_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u32 (uint32x2_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_u64 (uint64x1_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vreinterpret_p8_p16 (poly16x4_t __a)
+{
+  return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s8 (int8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s16 (int16x8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s32 (int32x4_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_s64 (int64x2_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_f32 (float32x4_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u8 (uint8x16_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u16 (uint16x8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u32 (uint32x4_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_u64 (uint64x2_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_p8_p16 (poly16x8_t __a)
+{
+  return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s8 (int8x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s16 (int16x4_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s32 (int32x2_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_s64 (int64x1_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_f32 (float32x2_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u8 (uint8x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u16 (uint16x4_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u32 (uint32x2_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_u64 (uint64x1_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vreinterpret_p16_p8 (poly8x8_t __a)
+{
+  return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s8 (int8x16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s16 (int16x8_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s32 (int32x4_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_s64 (int64x2_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_f32 (float32x4_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u8 (uint8x16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u16 (uint16x8_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u32 (uint32x4_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_u64 (uint64x2_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_p16_p8 (poly8x16_t __a)
+{
+  return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s8 (int8x8_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s16 (int16x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s32 (int32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_s64 (int64x1_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u8 (uint8x8_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u16 (uint16x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u32 (uint32x2_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_u64 (uint64x1_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfdi ((int64x1_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p8 (poly8x8_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vreinterpret_f32_p16 (poly16x4_t __a)
+{
+  return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s8 (int8x16_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s16 (int16x8_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s32 (int32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_s64 (int64x2_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u8 (uint8x16_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u16 (uint16x8_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u32 (uint32x4_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_u64 (uint64x2_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p8 (poly8x16_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_f32_p16 (poly16x8_t __a)
+{
+  return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s8 (int8x8_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv8qi (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s16 (int16x4_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv4hi (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_s32 (int32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv2si (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_f32 (float32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u8 (uint8x8_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u16 (uint16x4_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u32 (uint32x2_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_u64 (uint64x1_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p8 (poly8x8_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vreinterpret_s64_p16 (poly16x4_t __a)
+{
+  return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s8 (int8x16_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s16 (int16x8_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_s32 (int32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_f32 (float32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u8 (uint8x16_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u16 (uint16x8_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u32 (uint32x4_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_u64 (uint64x2_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p8 (poly8x16_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_s64_p16 (poly16x8_t __a)
+{
+  return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s8 (int8x8_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s16 (int16x4_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s32 (int32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv2si (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_s64 (int64x1_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_f32 (float32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u8 (uint8x8_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u16 (uint16x4_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_u32 (uint32x2_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p8 (poly8x8_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vreinterpret_u64_p16 (poly16x4_t __a)
+{
+  return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s8 (int8x16_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s16 (int16x8_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s32 (int32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_s64 (int64x2_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_f32 (float32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u8 (uint8x16_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u16 (uint16x8_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_u32 (uint32x4_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p8 (poly8x16_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vreinterpretq_u64_p16 (poly16x8_t __a)
+{
+  return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s16 (int16x4_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s32 (int32x2_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_s64 (int64x1_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_f32 (float32x2_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u8 (uint8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u16 (uint16x4_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u32 (uint32x2_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_u64 (uint64x1_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p8 (poly8x8_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vreinterpret_s8_p16 (poly16x4_t __a)
+{
+  return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s16 (int16x8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s32 (int32x4_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_s64 (int64x2_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_f32 (float32x4_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u8 (uint8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u16 (uint16x8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u32 (uint32x4_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_u64 (uint64x2_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p8 (poly8x16_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_s8_p16 (poly16x8_t __a)
+{
+  return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s8 (int8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s32 (int32x2_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_s64 (int64x1_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_f32 (float32x2_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u8 (uint8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u16 (uint16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u32 (uint32x2_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_u64 (uint64x1_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p8 (poly8x8_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vreinterpret_s16_p16 (poly16x4_t __a)
+{
+  return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s8 (int8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s32 (int32x4_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_s64 (int64x2_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_f32 (float32x4_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u8 (uint8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u16 (uint16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u32 (uint32x4_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_u64 (uint64x2_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p8 (poly8x16_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_s16_p16 (poly16x8_t __a)
+{
+  return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s8 (int8x8_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s16 (int16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_s64 (int64x1_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_f32 (float32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u8 (uint8x8_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u16 (uint16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u32 (uint32x2_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_u64 (uint64x1_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p8 (poly8x8_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vreinterpret_s32_p16 (poly16x4_t __a)
+{
+  return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s8 (int8x16_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s16 (int16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_s64 (int64x2_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_f32 (float32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u8 (uint8x16_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u16 (uint16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u32 (uint32x4_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_u64 (uint64x2_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p8 (poly8x16_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_s32_p16 (poly16x8_t __a)
+{
+  return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s8 (int8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s16 (int16x4_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s32 (int32x2_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_s64 (int64x1_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_f32 (float32x2_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u16 (uint16x4_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u32 (uint32x2_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_u64 (uint64x1_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p8 (poly8x8_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vreinterpret_u8_p16 (poly16x4_t __a)
+{
+  return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s8 (int8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s16 (int16x8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s32 (int32x4_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_s64 (int64x2_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_f32 (float32x4_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u16 (uint16x8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u32 (uint32x4_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_u64 (uint64x2_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p8 (poly8x16_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vreinterpretq_u8_p16 (poly16x8_t __a)
+{
+  return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s8 (int8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s16 (int16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s32 (int32x2_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_s64 (int64x1_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_f32 (float32x2_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u8 (uint8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u32 (uint32x2_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_u64 (uint64x1_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p8 (poly8x8_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vreinterpret_u16_p16 (poly16x4_t __a)
+{
+  return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s8 (int8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s16 (int16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s32 (int32x4_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_s64 (int64x2_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_f32 (float32x4_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u8 (uint8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u32 (uint32x4_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_u64 (uint64x2_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p8 (poly8x16_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vreinterpretq_u16_p16 (poly16x8_t __a)
+{
+  return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s8 (int8x8_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s16 (int16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s32 (int32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_s64 (int64x1_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_f32 (float32x2_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u8 (uint8x8_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u16 (uint16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_u64 (uint64x1_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p8 (poly8x8_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vreinterpret_u32_p16 (poly16x4_t __a)
+{
+  return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s8 (int8x16_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s16 (int16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s32 (int32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_s64 (int64x2_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_f32 (float32x4_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u8 (uint8x16_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u16 (uint16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_u64 (uint64x2_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p8 (poly8x16_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vreinterpretq_u32_p16 (poly16x8_t __a)
+{
+  return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a);
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+#endif
diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S
new file mode 100644
index 000000000..4ecea6da5
--- /dev/null
+++ b/gcc/config/arm/bpabi-v6m.S
@@ -0,0 +1,318 @@
+/* Miscellaneous BPABI functions.  ARMv6M implementation
+
+   Copyright (C) 2006, 2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+FUNC_START aeabi_lcmp
+	cmp	xxh, yyh
+	beq	1f
+	bgt	2f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+2:
+	mov	r0, #1
+	RET
+1:
+	sub	r0, xxl, yyl
+	beq	1f
+	bhi	2f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+2:
+	mov	r0, #1
+1:
+	RET
+	FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+	
+#ifdef L_aeabi_ulcmp
+
+FUNC_START aeabi_ulcmp
+	cmp	xxh, yyh
+	bne	1f
+	sub	r0, xxl, yyl
+	beq	2f
+1:
+	bcs	1f
+	mov	r0, #1
+	neg	r0, r0
+	RET
+1:
+	mov	r0, #1
+2:
+	RET
+	FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+	cmp	yyh, #0
+	bne	7f
+	cmp	yyl, #0
+	bne	7f
+	cmp	xxh, #0
+	bne	2f
+	cmp	xxl, #0
+2:
+	.ifc	\signed, unsigned
+	beq	3f
+	mov	xxh, #0
+	mvn	xxh, xxh		@ 0xffffffff
+	mov	xxl, xxh
+3:
+	.else
+	beq	5f
+	blt	6f
+	mov	xxl, #0
+	mvn	xxl, xxl		@ 0xffffffff
+	lsr	xxh, xxl, #1		@ 0x7fffffff
+	b	5f
+6:	mov	xxh, #0x80
+	lsl	xxh, xxh, #24		@ 0x80000000
+	mov	xxl, #0
+5:
+	.endif
+	@ tailcalls are tricky on v6-m.
+	push	{r0, r1, r2}
+	ldr	r0, 1f
+	adr	r1, 1f
+	add	r0, r1
+	str	r0, [sp, #8]
+	@ We know we are not on armv4t, so pop pc is safe.
+	pop	{r0, r1, pc}
+	.align	2
+1:
+	.word	__aeabi_ldiv0 - 1b
+7:
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+FUNC_START aeabi_ldivmod
+	test_div_by_zero signed
+
+	push {r0, r1}
+	mov r0, sp
+	push {r0, lr}
+	ldr r0, [sp, #8]
+	bl SYM(__gnu_ldivmod_helper)
+	ldr r3, [sp, #4]
+	mov lr, r3
+	add sp, sp, #8
+	pop {r2, r3}
+	RET
+	FUNC_END aeabi_ldivmod
+
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+FUNC_START aeabi_uldivmod
+	test_div_by_zero unsigned
+
+	push {r0, r1}
+	mov r0, sp
+	push {r0, lr}
+	ldr r0, [sp, #8]
+	bl SYM(__gnu_uldivmod_helper)
+	ldr r3, [sp, #4]
+	mov lr, r3
+	add sp, sp, #8
+	pop {r2, r3}
+	RET
+	FUNC_END aeabi_uldivmod
+	
+#endif /* L_aeabi_uldivmod */
+
+#ifdef L_arm_addsubsf3
+
+FUNC_START aeabi_frsub
+
+      push	{r4, lr}
+      mov	r4, #1
+      lsl	r4, #31
+      eor	r0, r0, r4
+      bl	__aeabi_fadd
+      pop	{r4, pc}
+
+      FUNC_END aeabi_frsub
+
+#endif /* L_arm_addsubsf3 */
+
+#ifdef L_arm_cmpsf2
+
+FUNC_START aeabi_cfrcmple
+
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, ip
+	b	6f
+
+FUNC_START aeabi_cfcmpeq
+FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	push	{r0, r1, r2, r3, r4, lr}
+	bl	__lesf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	bmi 1f
+	mov	r1, #0
+	cmn	r0, r1
+1:
+	pop	{r0, r1, r2, r3, r4, pc}
+
+	FUNC_END aeabi_cfcmple
+	FUNC_END aeabi_cfcmpeq
+	FUNC_END aeabi_cfrcmple
+
+FUNC_START	aeabi_fcmpeq
+
+	push	{r4, lr}
+	bl	__eqsf2
+	neg	r0, r0
+	add	r0, r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_fcmpeq
+
+.macro COMPARISON cond, helper, mode=sf2
+FUNC_START	aeabi_fcmp\cond
+
+	push	{r4, lr}
+	bl	__\helper\mode
+	cmp	r0, #0
+	b\cond	1f
+	mov	r0, #0
+	pop	{r4, pc}
+1:
+	mov	r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_fcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpsf2 */
+
+#ifdef L_arm_addsubdf3
+
+FUNC_START aeabi_drsub
+
+      push	{r4, lr}
+      mov	r4, #1
+      lsl	r4, #31
+      eor	xxh, xxh, r4
+      bl	__aeabi_dadd
+      pop	{r4, pc}
+
+      FUNC_END aeabi_drsub
+
+#endif /* L_arm_addsubdf3 */
+
+#ifdef L_arm_cmpdf2
+
+FUNC_START aeabi_cdrcmple
+
+	mov	ip, r0
+	mov	r0, r2
+	mov	r2, ip
+	mov	ip, r1
+	mov	r1, r3
+	mov	r3, ip
+	b	6f
+
+FUNC_START aeabi_cdcmpeq
+FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	push	{r0, r1, r2, r3, r4, lr}
+	bl	__ledf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	bmi 1f
+	mov	r1, #0
+	cmn	r0, r1
+1:
+	pop	{r0, r1, r2, r3, r4, pc}
+
+	FUNC_END aeabi_cdcmple
+	FUNC_END aeabi_cdcmpeq
+	FUNC_END aeabi_cdrcmple
+
+FUNC_START	aeabi_dcmpeq
+
+	push	{r4, lr}
+	bl	__eqdf2
+	neg	r0, r0
+	add	r0, r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_dcmpeq
+
+.macro COMPARISON cond, helper, mode=df2
+FUNC_START	aeabi_dcmp\cond
+
+	push	{r4, lr}
+	bl	__\helper\mode
+	cmp	r0, #0
+	b\cond	1f
+	mov	r0, #0
+	pop	{r4, pc}
+1:
+	mov	r0, #1
+	pop	{r4, pc}
+
+	FUNC_END aeabi_dcmp\cond
+.endm
+
+COMPARISON lt, le
+COMPARISON le, le
+COMPARISON gt, ge
+COMPARISON ge, ge
+
+#endif /* L_arm_cmpdf2 */
diff --git a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S
new file mode 100644
index 000000000..2ff338927
--- /dev/null
+++ b/gcc/config/arm/bpabi.S
@@ -0,0 +1,163 @@
+/* Miscellaneous BPABI functions.
+
+   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifdef L_aeabi_lcmp
+
+ARM_FUNC_START aeabi_lcmp
+	cmp	xxh, yyh
+	do_it	lt
+	movlt	r0, #-1
+	do_it	gt
+	movgt	r0, #1
+	do_it	ne
+	RETc(ne)
+	subs	r0, xxl, yyl
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	RET
+	FUNC_END aeabi_lcmp
+
+#endif /* L_aeabi_lcmp */
+	
+#ifdef L_aeabi_ulcmp
+
+ARM_FUNC_START aeabi_ulcmp
+	cmp	xxh, yyh
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	do_it	ne
+	RETc(ne)
+	cmp	xxl, yyl
+	do_it	lo
+	movlo	r0, #-1
+	do_it	hi
+	movhi	r0, #1
+	do_it	eq
+	moveq	r0, #0
+	RET
+	FUNC_END aeabi_ulcmp
+
+#endif /* L_aeabi_ulcmp */
+
+.macro test_div_by_zero signed
+/* Tail-call to divide-by-zero handlers which may be overridden by the user,
+   so unwinding works properly.  */
+#if defined(__thumb2__)
+	cbnz	yyh, 1f
+	cbnz	yyl, 1f
+	cmp	xxh, #0
+	do_it	eq
+	cmpeq	xxl, #0
+	.ifc \signed, unsigned
+	beq	2f
+	mov	xxh, #0xffffffff
+	mov	xxl, xxh
+2:
+	.else
+	do_it	lt, t
+	movlt	xxl, #0
+	movlt	xxh, #0x80000000
+	do_it	gt, t
+	movgt	xxh, #0x7fffffff
+	movgt	xxl, #0xffffffff
+	.endif
+	b	SYM (__aeabi_ldiv0) __PLT__
+1:
+#else
+	/* Note: Thumb-1 code calls via an ARM shim on processors which
+	   support ARM mode.  */
+	cmp	yyh, #0
+	cmpeq	yyl, #0
+	bne	2f
+	cmp	xxh, #0
+	cmpeq	xxl, #0
+	.ifc \signed, unsigned
+	movne	xxh, #0xffffffff
+	movne	xxl, #0xffffffff
+	.else
+	movlt	xxh, #0x80000000
+	movlt	xxl, #0
+	movgt	xxh, #0x7fffffff
+	movgt	xxl, #0xffffffff
+	.endif
+	b	SYM (__aeabi_ldiv0) __PLT__
+2:
+#endif
+.endm
+
+#ifdef L_aeabi_ldivmod
+
+ARM_FUNC_START aeabi_ldivmod
+	test_div_by_zero signed
+
+	sub sp, sp, #8
+#if defined(__thumb2__)
+	mov ip, sp
+	push {ip, lr}
+#else
+	do_push {sp, lr}
+#endif
+	bl SYM(__gnu_ldivmod_helper) __PLT__
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	do_pop {r2, r3}
+	RET
+	
+#endif /* L_aeabi_ldivmod */
+
+#ifdef L_aeabi_uldivmod
+
+ARM_FUNC_START aeabi_uldivmod
+	test_div_by_zero unsigned
+
+	sub sp, sp, #8
+#if defined(__thumb2__)
+	mov ip, sp
+	push {ip, lr}
+#else
+	do_push {sp, lr}
+#endif
+	bl SYM(__gnu_uldivmod_helper) __PLT__
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	do_pop {r2, r3}
+	RET
+	
+#endif /* L_aeabi_divmod */
+	
diff --git a/gcc/config/arm/bpabi.c b/gcc/config/arm/bpabi.c
new file mode 100644
index 000000000..283bdc0ac
--- /dev/null
+++ b/gcc/config/arm/bpabi.c
@@ -0,0 +1,56 @@
+/* Miscellaneous BPABI functions.
+
+   Copyright (C) 2003, 2004, 2009  Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern long long __divdi3 (long long, long long);
+extern unsigned long long __udivdi3 (unsigned long long, 
+				     unsigned long long);
+extern long long __gnu_ldivmod_helper (long long, long long, long long *);
+extern unsigned long long __gnu_uldivmod_helper (unsigned long long, 
+						 unsigned long long, 
+						 unsigned long long *);
+
+
+long long
+__gnu_ldivmod_helper (long long a, 
+		      long long b, 
+		      long long *remainder)
+{
+  long long quotient;
+
+  quotient = __divdi3 (a, b);
+  *remainder = a - b * quotient;
+  return quotient;
+}
+
+unsigned long long
+__gnu_uldivmod_helper (unsigned long long a, 
+		       unsigned long long b,
+		       unsigned long long *remainder)
+{
+  unsigned long long quotient;
+
+  quotient = __udivdi3 (a, b);
+  *remainder = a - b * quotient;
+  return quotient;
+}
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
new file mode 100644
index 000000000..7b5ee6231
--- /dev/null
+++ b/gcc/config/arm/bpabi.h
@@ -0,0 +1,125 @@
+/* Configuration file for ARM BPABI targets.
+   Copyright (C) 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC   
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Use the AAPCS ABI by default.  */
+#define ARM_DEFAULT_ABI ARM_ABI_AAPCS
+
+/* Assume that AAPCS ABIs should adhere to the full BPABI.  */ 
+#define TARGET_BPABI (TARGET_AAPCS_BASED)
+
+/* BPABI targets use EABI frame unwinding tables.  */
+#undef ARM_UNWIND_INFO
+#define ARM_UNWIND_INFO 1
+
+/* Section 4.1 of the AAPCS requires the use of VFP format.  */
+#undef  FPUTYPE_DEFAULT
+#define FPUTYPE_DEFAULT "vfp"
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT MASK_BIG_END
+#else
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+/* EABI targets should enable interworking by default.  */
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_INTERWORK | TARGET_ENDIAN_DEFAULT)
+
+/* The ARM BPABI functions return a boolean; they use no special
+   calling convention.  */
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) TARGET_BPABI
+
+/* The BPABI integer comparison routines return { -1, 0, 1 }.  */
+#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
+
+#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\
+  "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
+
+#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
+  "|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
+
+/* Tell the assembler to build BPABI binaries.  */
+#undef  SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC \
+  "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
+
+#ifndef SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC ""
+#endif
+
+/* The generic link spec in elf.h does not support shared libraries.  */
+#define BPABI_LINK_SPEC \
+  "%{mbig-endian:-EB} %{mlittle-endian:-EL} "		\
+  "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} "	\
+  "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC BPABI_LINK_SPEC
+
+/* The BPABI requires that we always use an out-of-line implementation
+   of RTTI comparison, even if the target supports weak symbols,
+   because the same object file might be used on a target that does
+   not support merging symbols across DLL boundaries.  This macro is
+   broken out separately so that it can be used within
+   TARGET_OS_CPP_BUILTINS in configuration files for systems based on
+   the BPABI.  */
+#define TARGET_BPABI_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0");	\
+    }							\
+  while (false)
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() \
+  TARGET_BPABI_CPP_BUILTINS()
+
+/* The BPABI specifies the use of .{init,fini}_array.  Therefore, we
+   do not want GCC to put anything into the .{init,fini} sections.  */
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#define INIT_ARRAY_SECTION_ASM_OP ARM_EABI_CTORS_SECTION_OP
+#define FINI_ARRAY_SECTION_ASM_OP ARM_EABI_DTORS_SECTION_OP
+
+/* The legacy _mcount implementation assumes r11 points to a
+    4-word APCS frame.  This is generally not true for EABI targets,
+    particularly not in Thumb mode.  We assume the mcount
+    implementation does not require a counter variable (No Counter).
+    Note that __gnu_mcount_nc will be entered with a misaligned stack.
+    This is OK because it uses a special calling convention anyway.  */
+
+#undef  NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+#undef  ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)  			\
+{									\
+  fprintf (STREAM, "\tpush\t{lr}\n");					\
+  fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n");				\
+}
+
+#undef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+
+/* __gnu_mcount_nc restores the original LR value before returning.  Ensure
+   that there is no unnecessary hook set up.  */
+#undef PROFILE_HOOK
diff --git a/gcc/config/arm/cirrus.md b/gcc/config/arm/cirrus.md
new file mode 100644
index 000000000..f08da0bdc
--- /dev/null
+++ b/gcc/config/arm/cirrus.md
@@ -0,0 +1,540 @@
+;; Cirrus EP9312 "Maverick" ARM floating point co-processor description.
+;; Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;; Written by Aldy Hernandez (aldyh@redhat.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+; Cirrus types for invalid insn combinations
+; not		Not a cirrus insn
+; normal	Any Cirrus insn not covered by the special cases below
+; double	cfldrd, cfldr64, cfstrd, cfstr64
+; compare	cfcmps, cfcmpd, cfcmp32, cfcmp64
+; move		cfmvdlr, cfmvdhr, cfmvsr, cfmv64lr, cfmv64hr
+(define_attr "cirrus" "not,normal,double,compare,move" (const_string "not"))
+
+
+(define_insn "cirrus_adddi3"
+  [(set (match_operand:DI          0 "cirrus_fp_register" "=v")
+	(plus:DI (match_operand:DI 1 "cirrus_fp_register"  "v")
+		 (match_operand:DI 2 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfadd64%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_addsi3"
+  [(set (match_operand:SI          0 "cirrus_fp_register" "=v")
+	(plus:SI (match_operand:SI 1 "cirrus_fp_register" "v")
+		 (match_operand:SI 2 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfadd32%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_addsf3"
+  [(set (match_operand:SF          0 "cirrus_fp_register" "=v")
+	(plus:SF (match_operand:SF 1 "cirrus_fp_register" "v")
+		 (match_operand:SF 2 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfadds%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_adddf3"
+  [(set (match_operand:DF          0 "cirrus_fp_register" "=v")
+	(plus:DF (match_operand:DF 1 "cirrus_fp_register" "v")
+		 (match_operand:DF 2 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfaddd%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_subdi3"
+  [(set (match_operand:DI           0 "cirrus_fp_register" "=v")
+	(minus:DI (match_operand:DI 1 "cirrus_fp_register"  "v")
+		  (match_operand:DI 2 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfsub64%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_subsi3_insn"
+  [(set (match_operand:SI           0 "cirrus_fp_register" "=v")
+	(minus:SI (match_operand:SI 1 "cirrus_fp_register" "v")
+		  (match_operand:SI 2 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfsub32%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_subsf3"
+  [(set (match_operand:SF           0 "cirrus_fp_register" "=v")
+	(minus:SF (match_operand:SF 1 "cirrus_fp_register"  "v")
+		  (match_operand:SF 2 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfsubs%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_subdf3"
+  [(set (match_operand:DF           0 "cirrus_fp_register" "=v")
+	(minus:DF (match_operand:DF 1 "cirrus_fp_register" "v")
+		  (match_operand:DF 2 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfsubd%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_mulsi3"
+  [(set (match_operand:SI          0 "cirrus_fp_register" "=v")
+	(mult:SI (match_operand:SI 2 "cirrus_fp_register"  "v")
+		 (match_operand:SI 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfmul32%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "muldi3"
+  [(set (match_operand:DI          0 "cirrus_fp_register" "=v")
+	(mult:DI (match_operand:DI 2 "cirrus_fp_register"  "v")
+		 (match_operand:DI 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfmul64%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_dmult")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_mulsi3addsi"
+  [(set (match_operand:SI            0 "cirrus_fp_register" "=v")
+	(plus:SI
+	  (mult:SI (match_operand:SI 1 "cirrus_fp_register"  "v")
+		   (match_operand:SI 2 "cirrus_fp_register"  "v"))
+	  (match_operand:SI          3 "cirrus_fp_register"  "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfmac32%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+;; Cirrus SI multiply-subtract
+(define_insn "*cirrus_mulsi3subsi"
+  [(set (match_operand:SI            0 "cirrus_fp_register" "=v")
+	(minus:SI
+	  (match_operand:SI          1 "cirrus_fp_register"  "0")
+	  (mult:SI (match_operand:SI 2 "cirrus_fp_register"  "v")
+		   (match_operand:SI 3 "cirrus_fp_register"  "v"))))]
+  "0 && TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfmsc32%?\\t%V0, %V2, %V3"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_mulsf3"
+  [(set (match_operand:SF          0 "cirrus_fp_register" "=v")
+	(mult:SF (match_operand:SF 1 "cirrus_fp_register"  "v")
+		 (match_operand:SF 2 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfmuls%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_farith")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_muldf3"
+  [(set (match_operand:DF          0 "cirrus_fp_register" "=v")
+	(mult:DF (match_operand:DF 1 "cirrus_fp_register"  "v")
+		 (match_operand:DF 2 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfmuld%?\\t%V0, %V1, %V2"
+  [(set_attr "type" "mav_dmult")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_ashl_const"
+  [(set (match_operand:SI            0 "cirrus_fp_register" "=v")
+	(ashift:SI (match_operand:SI 1 "cirrus_fp_register"  "v")
+		   (match_operand:SI 2 "cirrus_shift_const"  "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfsh32%?\\t%V0, %V1, #%s2"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_ashiftrt_const"
+  [(set (match_operand:SI	       0 "cirrus_fp_register" "=v")
+	(ashiftrt:SI (match_operand:SI 1 "cirrus_fp_register"  "v")
+		     (match_operand:SI 2 "cirrus_shift_const"  "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfsh32%?\\t%V0, %V1, #-%s2"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_ashlsi3"
+  [(set (match_operand:SI            0 "cirrus_fp_register" "=v")
+	(ashift:SI (match_operand:SI 1 "cirrus_fp_register"  "v")
+		   (match_operand:SI 2 "register_operand"    "r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfrshl32%?\\t%V1, %V0, %s2"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "ashldi3_cirrus"
+  [(set (match_operand:DI            0 "cirrus_fp_register" "=v")
+	(ashift:DI (match_operand:DI 1 "cirrus_fp_register"  "v")
+		   (match_operand:SI 2 "register_operand"    "r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfrshl64%?\\t%V1, %V0, %s2"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_ashldi_const"
+  [(set (match_operand:DI            0 "cirrus_fp_register" "=v")
+	(ashift:DI (match_operand:DI 1 "cirrus_fp_register"  "v")
+		   (match_operand:SI 2 "cirrus_shift_const"  "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfsh64%?\\t%V0, %V1, #%s2"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_ashiftrtdi_const"
+  [(set (match_operand:DI            0 "cirrus_fp_register" "=v")
+	(ashiftrt:DI (match_operand:DI 1 "cirrus_fp_register"  "v")
+		     (match_operand:SI 2 "cirrus_shift_const"  "")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfsh64%?\\t%V0, %V1, #-%s2"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_absdi2"
+  [(set (match_operand:DI         0 "cirrus_fp_register" "=v")
+	(abs:DI (match_operand:DI 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfabs64%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+;; This doesn't really clobber ``cc''.  Fixme: aldyh.  
+(define_insn "*cirrus_negdi2"
+  [(set (match_operand:DI         0 "cirrus_fp_register" "=v")
+	(neg:DI (match_operand:DI 1 "cirrus_fp_register"  "v")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfneg64%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_negsi2"
+  [(set (match_operand:SI         0 "cirrus_fp_register" "=v")
+	(neg:SI (match_operand:SI 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfneg32%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_negsf2"
+  [(set (match_operand:SF         0 "cirrus_fp_register" "=v")
+	(neg:SF (match_operand:SF 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfnegs%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_negdf2"
+  [(set (match_operand:DF         0 "cirrus_fp_register" "=v")
+	(neg:DF (match_operand:DF 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfnegd%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+;; This doesn't really clobber the condition codes either.  
+(define_insn "*cirrus_abssi2"
+  [(set (match_operand:SI         0 "cirrus_fp_register" "=v")
+        (abs:SI (match_operand:SI 1 "cirrus_fp_register"  "v")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK && 0"
+  "cfabs32%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_abssf2"
+  [(set (match_operand:SF         0 "cirrus_fp_register" "=v")
+        (abs:SF (match_operand:SF 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfabss%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_absdf2"
+  [(set (match_operand:DF         0 "cirrus_fp_register" "=v")
+        (abs:DF (match_operand:DF 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfabsd%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+;; Convert Cirrus-SI to Cirrus-SF
+(define_insn "cirrus_floatsisf2"
+  [(set (match_operand:SF           0 "cirrus_fp_register" "=v")
+ 	(float:SF (match_operand:SI 1 "s_register_operand"  "r")))
+   (clobber (match_scratch:DF 2 "=v"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfmv64lr%?\\t%Z2, %1\;cfcvt32s%?\\t%V0, %Y2"
+  [(set_attr "length" "8")
+   (set_attr "cirrus" "move")]
+)
+
+(define_insn "cirrus_floatsidf2"
+  [(set (match_operand:DF           0 "cirrus_fp_register" "=v")
+	(float:DF (match_operand:SI 1 "s_register_operand" "r")))
+   (clobber (match_scratch:DF 2 "=v"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfmv64lr%?\\t%Z2, %1\;cfcvt32d%?\\t%V0, %Y2"
+  [(set_attr "length" "8")
+   (set_attr "cirrus" "move")]
+)
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF           0 "cirrus_fp_register" "=v")
+	(float:SF (match_operand:DI 1 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcvt64s%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "cirrus_fp_register" "=v")
+	(float:DF (match_operand:DI 1 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcvt64d%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")])
+
+(define_insn "cirrus_truncsfsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(fix:SI (fix:SF (match_operand:SF 1 "cirrus_fp_register"  "v"))))
+   (clobber (match_scratch:DF     2                      "=v"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cftruncs32%?\\t%Y2, %V1\;cfmvr64l%?\\t%0, %Z2"
+  [(set_attr "length" "8")
+   (set_attr "cirrus" "normal")]
+)
+
+(define_insn "cirrus_truncdfsi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(fix:SI (fix:DF (match_operand:DF 1 "cirrus_fp_register"  "v"))))
+   (clobber (match_scratch:DF     2                      "=v"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cftruncd32%?\\t%Y2, %V1\;cfmvr64l%?\\t%0, %Z2"
+  [(set_attr "length" "8")]
+)
+
+(define_insn "*cirrus_truncdfsf2"
+  [(set (match_operand:SF  0 "cirrus_fp_register" "=v")
+        (float_truncate:SF
+         (match_operand:DF 1 "cirrus_fp_register" "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcvtds%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_extendsfdf2"
+  [(set (match_operand:DF                  0 "cirrus_fp_register" "=v")
+        (float_extend:DF (match_operand:SF 1 "cirrus_fp_register"  "v")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "cfcvtsd%?\\t%V0, %V1"
+  [(set_attr "cirrus" "normal")]
+)
+
+(define_insn "*cirrus_arm_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,o<>,v,r,v,m,v")
+	(match_operand:DI 1 "di_operand"              "rIK,mi,r,r,v,mi,v,v"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "*
+  {
+  switch (which_alternative)
+    {
+    case 0:
+      return \"#\";
+    case 1:
+    case 2:
+      return output_move_double (operands);
+
+    case 3: return \"cfmv64lr%?\\t%V0, %Q1\;cfmv64hr%?\\t%V0, %R1\";
+    case 4: return \"cfmvr64l%?\\t%Q0, %V1\;cfmvr64h%?\\t%R0, %V1\";
+
+    case 5: return \"cfldr64%?\\t%V0, %1\";
+    case 6: return \"cfstr64%?\\t%V1, %0\";
+
+    /* Shifting by 0 will just copy %1 into %0.  */
+    case 7: return \"cfsh64%?\\t%V0, %V1, #0\";
+
+    default: gcc_unreachable ();
+    }
+  }"
+  [(set_attr "length"         "  8,   8,     8,   8,     8,     4,     4,     4")
+   (set_attr "type"           "  *,load2,store2,   *,     *,  load2,store2,     *")
+   (set_attr "pool_range"     "  *,1020,     *,   *,     *,  1020,     *,     *")
+   (set_attr "neg_pool_range" "  *,1012,     *,   *,     *,  1008,     *,     *")
+   (set_attr "cirrus"         "not, not,   not,move,normal,double,double,normal")]
+)
+
+;; Cirrus SI values have been outlawed.  Look in arm.h for the comment
+;; on HARD_REGNO_MODE_OK.
+
+(define_insn "*cirrus_movsf_hard_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,v,v,r,m,r,r,m")
+        (match_operand:SF 1 "general_operand"      "v,mE,r,v,v,r,mE,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], SFmode))"
+  "@
+   cfcpys%?\\t%V0, %V1
+   cfldrs%?\\t%V0, %1
+   cfmvsr%?\\t%V0, %1
+   cfmvrs%?\\t%0, %V1
+   cfstrs%?\\t%V1, %0
+   mov%?\\t%0, %1
+   ldr%?\\t%0, %1\\t%@ float
+   str%?\\t%1, %0\\t%@ float"
+  [(set_attr "length"         "     *,     *,   *,     *,     *,  4,   4,     4")
+   (set_attr "type"           "     *,  load1,   *,     *,store1,  *,load1,store1")
+   (set_attr "pool_range"     "     *,   1020,   *,     *,     *,  *,4096,     *")
+   (set_attr "neg_pool_range" "     *,   1008,   *,     *,     *,  *,4084,     *")
+   (set_attr "cirrus"         "normal,normal,move,normal,normal,not, not,   not")]
+)
+
+(define_insn "*cirrus_movdf_hard_insn"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Q,r,m,r,v,v,v,r,m")
+	(match_operand:DF 1 "general_operand"       "Q,r,r,r,mF,v,mF,r,v,v"))]
+  "TARGET_ARM
+   && TARGET_HARD_FLOAT && TARGET_MAVERICK
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+  switch (which_alternative)
+    {
+    case 0: return \"ldm%?ia\\t%m1, %M0\\t%@ double\";
+    case 1: return \"stm%?ia\\t%m0, %M1\\t%@ double\";
+    case 2: return \"#\";
+    case 3: case 4: return output_move_double (operands);
+    case 5: return \"cfcpyd%?\\t%V0, %V1\";
+    case 6: return \"cfldrd%?\\t%V0, %1\";
+    case 7: return \"cfmvdlr\\t%V0, %Q1\;cfmvdhr%?\\t%V0, %R1\";
+    case 8: return \"cfmvrdl%?\\t%Q0, %V1\;cfmvrdh%?\\t%R0, %V1\";
+    case 9: return \"cfstrd%?\\t%V1, %0\";
+    default: gcc_unreachable ();
+    }
+  }"
+  [(set_attr "type"           "load1,store2,  *,store2,load1,     *,  load1,   *,     *,store2")
+   (set_attr "length"         "   4,     4,  8,     8,   8,     4,     4,   8,     8,     4")
+   (set_attr "pool_range"     "   *,     *,  *,     *, 252,     *,  1020,   *,     *,     *")
+   (set_attr "neg_pool_range" "   *,     *,  *,     *, 244,     *,  1008,   *,     *,     *")
+   (set_attr "cirrus"         " not,   not,not,   not, not,normal,double,move,normal,double")]
+)
+
+(define_insn "*cirrus_thumb2_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,o<>,v,r,v,m,v")
+	(match_operand:DI 1 "di_operand"              "rIK,mi,r,r,v,mi,v,v"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_MAVERICK"
+  "*
+  {
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return (output_move_double (operands));
+
+    case 3: return \"cfmv64lr%?\\t%V0, %Q1\;cfmv64hr%?\\t%V0, %R1\";
+    case 4: return \"cfmvr64l%?\\t%Q0, %V1\;cfmvr64h%?\\t%R0, %V1\";
+
+    case 5: return \"cfldr64%?\\t%V0, %1\";
+    case 6: return \"cfstr64%?\\t%V1, %0\";
+
+    /* Shifting by 0 will just copy %1 into %0.  */
+    case 7: return \"cfsh64%?\\t%V0, %V1, #0\";
+
+    default: abort ();
+    }
+  }"
+  [(set_attr "length"         "  8,   8,     8,   8,     8,     4,     4,     4")
+   (set_attr "type"           "  *,load2,store2,   *,     *,  load2,store2,     *")
+   (set_attr "pool_range"     "  *,4096,     *,   *,     *,  1020,     *,     *")
+   (set_attr "neg_pool_range" "  *,   0,     *,   *,     *,  1008,     *,     *")
+   (set_attr "cirrus"         "not, not,   not,move,normal,double,double,normal")]
+)
+
+(define_insn "*thumb2_cirrus_movsf_hard_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,v,v,r,m,r,r,m")
+        (match_operand:SF 1 "general_operand"      "v,mE,r,v,v,r,mE,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_MAVERICK
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], SFmode))"
+  "@
+   cfcpys%?\\t%V0, %V1
+   cfldrs%?\\t%V0, %1
+   cfmvsr%?\\t%V0, %1
+   cfmvrs%?\\t%0, %V1
+   cfstrs%?\\t%V1, %0
+   mov%?\\t%0, %1
+   ldr%?\\t%0, %1\\t%@ float
+   str%?\\t%1, %0\\t%@ float"
+  [(set_attr "length"         "     *,     *,   *,     *,     *,  4,   4,     4")
+   (set_attr "type"           "     *,  load1,   *,     *,store1,  *,load1,store1")
+   (set_attr "pool_range"     "     *,   1020,   *,     *,     *,  *,4096,     *")
+   (set_attr "neg_pool_range" "     *,   1008,   *,     *,     *,  *,   0,     *")
+   (set_attr "cirrus"         "normal,normal,move,normal,normal,not, not,   not")]
+)
+
+(define_insn "*thumb2_cirrus_movdf_hard_insn"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Q,r,m,r,v,v,v,r,m")
+	(match_operand:DF 1 "general_operand"       "Q,r,r,r,mF,v,mF,r,v,v"))]
+  "TARGET_THUMB2
+   && TARGET_HARD_FLOAT && TARGET_MAVERICK
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+  switch (which_alternative)
+    {
+    case 0: return \"ldm%?ia\\t%m1, %M0\\t%@ double\";
+    case 1: return \"stm%?ia\\t%m0, %M1\\t%@ double\";
+    case 2: case 3: case 4: return output_move_double (operands);
+    case 5: return \"cfcpyd%?\\t%V0, %V1\";
+    case 6: return \"cfldrd%?\\t%V0, %1\";
+    case 7: return \"cfmvdlr\\t%V0, %Q1\;cfmvdhr%?\\t%V0, %R1\";
+    case 8: return \"cfmvrdl%?\\t%Q0, %V1\;cfmvrdh%?\\t%R0, %V1\";
+    case 9: return \"cfstrd%?\\t%V1, %0\";
+    default: abort ();
+    }
+  }"
+  [(set_attr "type"           "load1,store2,  *,store2,load1,     *,  load1,   *,     *,store2")
+   (set_attr "length"         "   4,     4,  8,     8,   8,     4,     4,   8,     8,     4")
+   (set_attr "pool_range"     "   *,     *,  *,     *,4092,     *,  1020,   *,     *,     *")
+   (set_attr "neg_pool_range" "   *,     *,  *,     *,   0,     *,  1008,   *,     *,     *")
+   (set_attr "cirrus"         " not,   not,not,   not, not,normal,double,move,normal,double")]
+)
+
diff --git a/gcc/config/arm/coff.h b/gcc/config/arm/coff.h
new file mode 100644
index 000000000..bd3e6f85d
--- /dev/null
+++ b/gcc/config/arm/coff.h
@@ -0,0 +1,86 @@
+/* Definitions of target machine for GNU compiler.
+   For ARM with COFF object format.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2005,
+   2007 Free Software Foundation, Inc.
+   Contributed by Doug Evans (devans@cygnus.com).
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Note - it is important that this definition matches the one in tcoff.h.  */
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+
+/* Run-time Target Specification.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/coff)", stderr)
+
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+  { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork" }
+#endif
+
+/* This is COFF, but prefer stabs.  */
+#define SDB_DEBUGGING_INFO 1
+
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+
+#define TARGET_ASM_FILE_START_APP_OFF true
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_coff_asm_named_section
+
+/* Support the ctors/dtors and other sections.  */
+
+#undef INIT_SECTION_ASM_OP
+
+/* Define this macro if jump tables (for `tablejump' insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.  */
+/* We put ARM and Thumb-2 jump tables in the text section, because it makes
+   the code more efficient, but for Thumb-1 it's better to put them out of
+   band unless we are generating compressed tables.  */
+#define JUMP_TABLES_IN_TEXT_SECTION					\
+   (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic)))
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section .rdata"
+#undef  CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section .ctors,\"x\""
+#undef  DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP	"\t.section .dtors,\"x\""
+
+/* Support the ctors/dtors sections for g++.  */
+
+/* __CTOR_LIST__ and __DTOR_LIST__ must be defined by the linker script.  */
+#define CTOR_LISTS_DEFINED_EXTERNALLY
+
+#undef DO_GLOBAL_CTORS_BODY
+#undef DO_GLOBAL_DTORS_BODY
+
+/* The ARM development system defines __main.  */
+#define NAME__MAIN  "__gccmain"
+#define SYMBOL__MAIN __gccmain
+
+#define SUPPORTS_INIT_PRIORITY 0
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
new file mode 100644
index 000000000..4e220e530
--- /dev/null
+++ b/gcc/config/arm/constraints.md
@@ -0,0 +1,335 @@
+;; Constraint definitions for ARM and Thumb
+;; Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following register constraints have been used:
+;; - in ARM/Thumb-2 state: f, t, v, w, x, y, z
+;; - in Thumb state: h, b
+;; - in both states: l, c, k
+;; In ARM state, 'l' is an alias for 'r'
+
+;; The following normal constraints have been used:
+;; in ARM/Thumb-2 state: G, H, I, j, J, K, L, M
+;; in Thumb-1 state: I, J, K, L, M, N, O
+
+;; The following multi-letter normal constraints have been used:
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
+;; in Thumb-1 state: Pa, Pb, Pc, Pd
+;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
+
+;; The following memory constraints have been used:
+;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+;; in ARM state: Uq
+
+
+(define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
+ "Legacy FPA registers @code{f0}-@code{f7}.")
+
+(define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS"
+ "The VFP registers @code{s0}-@code{s31}.")
+
+(define_register_constraint "v" "TARGET_ARM ? CIRRUS_REGS : NO_REGS"
+ "The Cirrus Maverick co-processor registers.")
+
+(define_register_constraint "w"
+  "TARGET_32BIT ? (TARGET_VFPD32 ? VFP_REGS : VFP_LO_REGS) : NO_REGS"
+ "The VFP registers @code{d0}-@code{d15}, or @code{d0}-@code{d31} for VFPv3.")
+
+(define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS"
+ "The VFP registers @code{d0}-@code{d7}.")
+
+(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS"
+ "The Intel iWMMX co-processor registers.")
+
+(define_register_constraint "z"
+ "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS"
+ "The Intel iWMMX GR registers.")
+
+(define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS"
+ "In Thumb state the core registers @code{r0}-@code{r7}.")
+
+(define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS"
+ "In Thumb state the core registers @code{r8}-@code{r15}.")
+
+(define_constraint "j"
+ "A constant suitable for a MOVW instruction. (ARM/Thumb-2)"
+ (and (match_test "TARGET_32BIT && arm_arch_thumb2")
+      (ior (match_code "high")
+	   (and (match_code "const_int")
+                (match_test "(ival & 0xffff0000) == 0")))))
+
+(define_register_constraint "k" "STACK_REG"
+ "@internal The stack register.")
+
+(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS"
+ "@internal
+  Thumb only.  The union of the low registers and the stack register.")
+
+(define_register_constraint "c" "CC_REG"
+ "@internal The condition code register.")
+
+(define_constraint "I"
+ "In ARM/Thumb-2 state a constant that can be used as an immediate value in a
+  Data Processing instruction.  In Thumb-1 state a constant in the range
+  0-255."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? const_ok_for_arm (ival)
+		   : ival >= 0 && ival <= 255")))
+
+(define_constraint "J"
+ "In ARM/Thumb-2 state a constant in the range @minus{}4095-4095.  In Thumb-1
+  state a constant in the range @minus{}255-@minus{}1."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? (ival >= -4095 && ival <= 4095)
+		   : (ival >= -255 && ival <= -1)")))
+
+(define_constraint "K"
+ "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if
+  inverted.  In Thumb-1 state a constant that satisfies the @code{I}
+  constraint multiplied by any power of 2."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? const_ok_for_arm (~ival)
+		   : thumb_shiftable_const (ival)")))
+
+(define_constraint "L"
+ "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if
+  negated.  In Thumb-1 state a constant in the range @minus{}7-7."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? const_ok_for_arm (-ival)
+		   : (ival >= -7 && ival <= 7)")))
+
+;; The ARM state version is internal...
+;; @internal In ARM/Thumb-2 state a constant in the range 0-32 or any
+;; power of 2.
+(define_constraint "M"
+ "In Thumb-1 state a constant that is a multiple of 4 in the range 0-1020."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT ? ((ival >= 0 && ival <= 32)
+				 || (((ival & (ival - 1)) & 0xFFFFFFFF) == 0))
+		   : ival >= 0 && ival <= 1020 && (ival & 3) == 0")))
+
+(define_constraint "N"
+ "Thumb-1 state a constant in the range 0-31."
+ (and (match_code "const_int")
+      (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)")))
+
+(define_constraint "O"
+ "In Thumb-1 state a constant that is a multiple of 4 in the range
+  @minus{}508-508."
+ (and (match_code "const_int")
+      (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508
+		   && ((ival & 3) == 0)")))
+
+(define_constraint "Pa"
+  "@internal In Thumb-1 state a constant in the range -510 to +510"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510
+		    && (ival > 255 || ival < -255)")))
+
+(define_constraint "Pb"
+  "@internal In Thumb-1 state a constant in the range -262 to +262"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262
+		    && (ival > 255 || ival < -255)")))
+
+(define_constraint "Pc"
+  "@internal In Thumb-1 state a constant that is in the range 1021 to 1275"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1
+  		    && ival > 1020 && ival <= 1275")))
+
+(define_constraint "Pd"
+  "@internal In Thumb-1 state a constant in the range 0 to 7"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB1 && ival >= 0 && ival <= 7")))
+
+(define_constraint "Ps"
+  "@internal In Thumb-2 state a constant in the range -255 to +255"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 255")))
+
+(define_constraint "Pt"
+  "@internal In Thumb-2 state a constant in the range -7 to +7"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -7 && ival <= 7")))
+
+(define_constraint "Pu"
+  "@internal In Thumb-2 state a constant in the range +1 to +8"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 8")))
+
+(define_constraint "Pv"
+  "@internal In Thumb-2 state a constant in the range -255 to 0"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 0")))
+
+(define_constraint "Pw"
+  "@internal In Thumb-2 state a constant in the range -255 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1")))
+
+(define_constraint "Px"
+  "@internal In Thumb-2 state a constant in the range -7 to -1"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1")))
+
+(define_constraint "G"
+ "In ARM/Thumb-2 state a valid FPA immediate constant."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && arm_const_double_rtx (op)")))
+
+(define_constraint "H"
+ "In ARM/Thumb-2 state a valid FPA immediate constant when negated."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && neg_const_double_rtx_ok_for_fpa (op)")))
+
+(define_constraint "Dz"
+ "@internal
+  In ARM/Thumb-2 state a vector of constant zeros."
+ (and (match_code "const_vector")
+      (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
+
+(define_constraint "Da"
+ "@internal
+  In ARM/Thumb-2 state a const_int, const_double or const_vector that can
+  be generated with two Data Processing insns."
+ (and (match_code "const_double,const_int,const_vector")
+      (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 2")))
+
+(define_constraint "Db"
+ "@internal
+  In ARM/Thumb-2 state a const_int, const_double or const_vector that can
+  be generated with three Data Processing insns."
+ (and (match_code "const_double,const_int,const_vector")
+      (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 3")))
+
+(define_constraint "Dc"
+ "@internal
+  In ARM/Thumb-2 state a const_int, const_double or const_vector that can
+  be generated with four Data Processing insns.  This pattern is disabled
+  if optimizing for space or when we have load-delay slots to fill."
+ (and (match_code "const_double,const_int,const_vector")
+      (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
+		   && !(optimize_size || arm_ld_sched)")))
+
+(define_constraint "Di"
+ "@internal
+  In ARM/Thumb-2 state a const_int or const_double where both the high
+  and low SImode words can be generated as immediates in 32-bit instructions."
+ (and (match_code "const_double,const_int")
+      (match_test "TARGET_32BIT && arm_const_double_by_immediates (op)")))
+
+(define_constraint "Dn"
+ "@internal
+  In ARM/Thumb-2 state a const_vector which can be loaded with a Neon vmov
+  immediate instruction."
+ (and (match_code "const_vector")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_mov_operand (op, GET_MODE (op))")))
+
+(define_constraint "Dl"
+ "@internal
+  In ARM/Thumb-2 state a const_vector which can be used with a Neon vorr or
+  vbic instruction."
+ (and (match_code "const_vector")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_logic_operand (op, GET_MODE (op))")))
+
+(define_constraint "DL"
+ "@internal
+  In ARM/Thumb-2 state a const_vector which can be used with a Neon vorn or
+  vand instruction."
+ (and (match_code "const_vector")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_inv_logic_operand (op, GET_MODE (op))")))
+
+(define_constraint "Dv"
+ "@internal
+  In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts
+  instruction."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)")))
+
+(define_constraint "Dy"
+ "@internal
+  In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd
+  instruction."
+ (and (match_code "const_double")
+      (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)")))
+
+(define_memory_constraint "Ut"
+ "@internal
+  In ARM/Thumb-2 state an address valid for loading/storing opaque structure
+  types wider than TImode."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_struct_mem_operand (op)")))
+
+(define_memory_constraint "Uv"
+ "@internal
+  In ARM/Thumb-2 state a valid VFP load/store address."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)")))
+
+(define_memory_constraint "Uy"
+ "@internal
+  In ARM/Thumb-2 state a valid iWMMX load/store address."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)")))
+
+(define_memory_constraint "Un"
+ "@internal
+  In ARM/Thumb-2 state a valid address for Neon doubleword vector
+  load/store instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)")))
+
+(define_memory_constraint "Um"
+ "@internal
+  In ARM/Thumb-2 state a valid address for Neon element and structure
+  load/store instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+
+(define_memory_constraint "Us"
+ "@internal
+  In ARM/Thumb-2 state a valid address for non-offset loads/stores of
+  quad-word values in four ARM registers."
+ (and (match_code "mem")
+      (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)")))
+
+(define_memory_constraint "Uq"
+ "@internal
+  In ARM state an address valid in ldrsb instructions."
+ (and (match_code "mem")
+      (match_test "TARGET_ARM
+		   && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0),
+						      SIGN_EXTEND, 0)")))
+
+(define_memory_constraint "Q"
+ "@internal
+  In ARM/Thumb-2 state an address that is a single base register."
+ (and (match_code "mem")
+      (match_test "REG_P (XEXP (op, 0))")))
+
+;; We used to have constraint letters for S and R in ARM state, but
+;; all uses of these now appear to have been removed.
+
+;; Additionally, we used to have a Q constraint in Thumb state, but
+;; this wasn't really a valid memory constraint.  Again, all uses of
+;; this now seem to have been removed.
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
new file mode 100644
index 000000000..eb154e298
--- /dev/null
+++ b/gcc/config/arm/cortex-a5.md
@@ -0,0 +1,297 @@
+;; ARM Cortex-A5 pipeline description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Functional units.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The integer (ALU) pipeline.  There are five DPU pipeline
+;; stages. However the decode/issue stages operate the same for all
+;; instructions, so do not model them.  We only need to model the
+;; first execute stage because instructions always advance one stage
+;; per cycle in order.  Only branch instructions may dual-issue, so a
+;; single unit covers all of the LS, ALU, MAC and FPU pipelines.
+
+(define_cpu_unit "cortex_a5_ex1" "cortex_a5")
+
+;; The branch pipeline.  Branches can dual-issue with other instructions
+;; (except when those instructions take multiple cycles to issue).
+
+(define_cpu_unit "cortex_a5_branch" "cortex_a5")
+
+;; Pseudo-unit for blocking the multiply pipeline when a double-precision
+;; multiply is in progress.
+
+(define_cpu_unit "cortex_a5_fpmul_pipe" "cortex_a5")
+
+;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
+;; of the add pipeline by fmac instructions, etc.
+
+(define_cpu_unit "cortex_a5_fpadd_pipe" "cortex_a5")
+
+;; Floating-point div/sqrt (long latency, out-of-order completion).
+
+(define_cpu_unit "cortex_a5_fp_div_sqrt" "cortex_a5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a5_alu" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "alu"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_alu_shift" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "alu_shift,alu_shift_reg"))
+  "cortex_a5_ex1")
+
+;; Forwarding path for unshifted operands.
+
+(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift"
+  "cortex_a5_alu")
+
+(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift"
+  "cortex_a5_alu_shift"
+  "arm_no_early_alu_shift_dep")
+
+;; The multiplier pipeline can forward results from wr stage only so 
+;; there's no need to specify bypasses).
+
+(define_insn_reservation "cortex_a5_mul" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "mult"))
+  "cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Address-generation happens in the issue stage, which is one stage behind
+;; the ex1 stage (the first stage we care about for scheduling purposes). The
+;; dc1 stage is parallel with ex1, dc2 with ex2 and rot with wr.
+
+(define_insn_reservation "cortex_a5_load1" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load_byte,load1"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store1" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store1"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_load2" 3
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load2"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store2" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store2"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_load3" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store3" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_load4" 5
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "load3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_store4" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "store3"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\
+   cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branches.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Direct branches are the only instructions we can dual-issue (also IT and
+;; nop, but those aren't very interesting for scheduling).  (The latency here
+;; is meant to represent when the branch actually takes place, but may not be
+;; entirely correct.)
+
+(define_insn_reservation "cortex_a5_branch" 3
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "branch,call"))
+  "cortex_a5_branch")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point arithmetic.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a5_fpalu" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
+			fcmps, fcmpd"))
+  "cortex_a5_ex1+cortex_a5_fpadd_pipe")
+
+;; For fconsts and fconstd, 8-bit immediate data is passed directly from
+;; f1 to f3 (which I think reduces the latency by one cycle).
+
+(define_insn_reservation "cortex_a5_fconst" 3
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fconsts,fconstd"))
+  "cortex_a5_ex1+cortex_a5_fpadd_pipe")
+
+;; We should try not to attempt to issue a single-precision multiplication in
+;; the middle of a double-precision multiplication operation (the usage of
+;; cortex_a5_fpmul_pipe).
+
+(define_insn_reservation "cortex_a5_fpmuls" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmuls"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe")
+
+;; For single-precision multiply-accumulate, the add (accumulate) is issued
+;; whilst the multiply is in F4.  The multiply result can then be forwarded
+;; from F5 to F1.  The issue unit is only used once (when we first start
+;; processing the instruction), but the usage of the FP add pipeline could
+;; block other instructions attempting to use it simultaneously.  We try to
+;; avoid that using cortex_a5_fpadd_pipe.
+
+(define_insn_reservation "cortex_a5_fpmacs" 8
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmacs"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe")
+
+;; Non-multiply instructions can issue in the middle two instructions of a
+;; double-precision multiply.  Note that it isn't entirely clear when a branch
+;; can dual-issue when a multi-cycle multiplication is in progress; we ignore
+;; that for now though.
+
+(define_insn_reservation "cortex_a5_fpmuld" 7
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmuld"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\
+   cortex_a5_ex1+cortex_a5_fpmul_pipe")
+
+(define_insn_reservation "cortex_a5_fpmacd" 11
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fmacd"))
+  "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\
+   cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point divide/square root instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ??? Not sure if the 14 cycles taken for single-precision divide to complete
+;; includes the time taken for the special instruction used to collect the
+;; result to travel down the multiply pipeline, or not.  Assuming so.  (If
+;; that's wrong, the latency should be increased by a few cycles.)
+
+;; fsqrt takes one cycle less, but that is not modelled, nor is the use of the
+;; multiply pipeline to collect the divide/square-root result.
+
+(define_insn_reservation "cortex_a5_fdivs" 14
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fdivs"))
+  "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13")
+
+;; ??? Similarly for fdivd.
+
+(define_insn_reservation "cortex_a5_fdivd" 29
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "fdivd"))
+  "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP to/from core transfers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; FP loads take data from wr/rot/f3.
+
+;; Core-to-VFP transfers use the multiply pipeline.
+
+(define_insn_reservation "cortex_a5_r2f" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "r_2_f"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f2r" 2
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_2_r"))
+  "cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP flag transfer.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ??? The flag forwarding from fmstat to the ex2 stage of the second
+;; instruction is not modeled at present.
+
+(define_insn_reservation "cortex_a5_f_flags" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_flag"))
+  "cortex_a5_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a5_f_loads" 4
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_loads"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f_loadd" 5
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_loadd"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f_stores" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_stores"))
+  "cortex_a5_ex1")
+
+(define_insn_reservation "cortex_a5_f_stored" 0
+  (and (eq_attr "tune" "cortexa5")
+       (eq_attr "type" "f_stored"))
+  "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1")
+
+;; Load-to-use for floating-point values has a penalty of one cycle,
+;; i.e. a latency of two.
+
+(define_bypass 2 "cortex_a5_f_loads"
+                 "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\
+		  cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\
+		  cortex_a5_f2r")
+
+(define_bypass 3 "cortex_a5_f_loadd"
+                 "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\
+		  cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\
+		  cortex_a5_f2r")
diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md
new file mode 100644
index 000000000..03f52b2df
--- /dev/null
+++ b/gcc/config/arm/cortex-a8-neon.md
@@ -0,0 +1,1312 @@
+;; ARM Cortex-A8 NEON scheduling description.
+;; Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "cortex_a8_neon")
+
+;; Only one load, store, permute, MCR or MRC instruction can be issued
+;; per cycle.
+(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon")
+
+;; Only one data-processing instruction can be issued per cycle.
+(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon")
+
+;; The VFPLite unit (non-pipelined).
+(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon")
+
+;; We need a special mutual exclusion (to be used in addition to
+;; cortex_a8_neon_issue_dp) for the case when an instruction such as
+;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
+;; E2 of the floating-point add pipeline.  On the cycle previous to that
+;; forward we must prevent issue of any instruction to the floating-point
+;; add pipeline, but still allow issue of a data-processing instruction
+;; to any of the other pipelines.
+(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon")
+
+;; Patterns of reservation.
+;; We model the NEON issue units as running in parallel with the core ones.
+;; We assume that multi-cycle NEON instructions get decomposed into
+;; micro-ops as they are issued into the NEON pipeline, and not as they
+;; are issued into the ARM pipeline.  Dual issue may not occur except
+;; upon the first and last cycles of a multi-cycle instruction, but it
+;; is unclear whether two multi-cycle instructions can issue together (in
+;; this model they cannot).  It is also unclear whether a pair of
+;; a multi-cycle and single-cycle instructions, that could potentially
+;; issue together, only do so if (say) the single-cycle one precedes
+;; the other.
+
+(define_reservation "cortex_a8_neon_dp"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp")
+(define_reservation "cortex_a8_neon_dp_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                     cortex_a8_neon_issue_dp")
+(define_reservation "cortex_a8_neon_dp_4"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp")
+
+(define_reservation "cortex_a8_neon_fadd"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_fadd")
+(define_reservation "cortex_a8_neon_fadd_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_fadd,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd")
+
+(define_reservation "cortex_a8_neon_perm"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_perm_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_perm_3"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+\
+                     cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+
+(define_reservation "cortex_a8_neon_ls"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_2"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_3"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_4"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+(define_reservation "cortex_a8_neon_ls_5"
+                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
+                     cortex_a8_neon_issue_perm")
+
+(define_reservation "cortex_a8_neon_fmul_then_fadd"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+		     nothing*3,\
+		     cortex_a8_neon_issue_fadd")
+(define_reservation "cortex_a8_neon_fmul_then_fadd_2"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
+		     cortex_a8_neon_issue_dp,\
+		     nothing*2,\
+		     cortex_a8_neon_issue_fadd,\
+		     cortex_a8_neon_issue_fadd")
+
+;; VFP instructions can only be single-issued into the NEON pipeline.
+(define_reservation "cortex_a8_vfp"
+                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
+                     cortex_a8_neon_issue_perm+cortex_a8_vfplite")
+
+;; VFP instructions.
+;; The VFPLite unit that executes these isn't pipelined; we give the
+;; worst-case latencies (and choose the double-precision ones where we
+;; do not distinguish on precision).  We assume RunFast mode is not
+;; enabled and therefore do not model the possible VFP instruction
+;; execution in the NEON floating point pipelines, nor additional
+;; latencies for the processing of subnormals.
+;;
+;; TODO: RunFast mode could potentially be enabled when -ffast-math
+;; is specified.
+
+(define_insn_reservation "cortex_a8_vfp_add_sub" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*9")
+
+(define_insn_reservation "cortex_a8_vfp_muls" 12
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmuls"))
+  "cortex_a8_vfp,cortex_a8_vfplite*11")
+
+(define_insn_reservation "cortex_a8_vfp_muld" 17
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmuld"))
+  "cortex_a8_vfp,cortex_a8_vfplite*16")
+
+(define_insn_reservation "cortex_a8_vfp_macs" 21
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmacs"))
+  "cortex_a8_vfp,cortex_a8_vfplite*20")
+
+(define_insn_reservation "cortex_a8_vfp_macd" 26
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fmacd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*25")
+
+(define_insn_reservation "cortex_a8_vfp_divs" 37
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fdivs"))
+  "cortex_a8_vfp,cortex_a8_vfplite*36")
+
+(define_insn_reservation "cortex_a8_vfp_divd" 65
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fdivd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*64")
+
+;; Comparisons can actually take 7 cycles sometimes instead of four,
+;; but given all the other instructions lumped into type=ffarith that
+;; take four cycles, we pick that latency.
+(define_insn_reservation "cortex_a8_vfp_farith" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
+  "cortex_a8_vfp,cortex_a8_vfplite*3")
+
+(define_insn_reservation "cortex_a8_vfp_cvt" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "f_cvt"))
+  "cortex_a8_vfp,cortex_a8_vfplite*6")
+
+;; NEON -> core transfers.
+
+(define_insn_reservation "cortex_a8_neon_mrc" 20
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mrc"))
+  "cortex_a8_neon_ls")
+
+(define_insn_reservation "cortex_a8_neon_mrrc" 21
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mrrc"))
+  "cortex_a8_neon_ls_2")
+
+;; The remainder of this file is auto-generated by neon-schedgen.
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_int_1" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_1"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_int_2" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_2"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_int_3" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_3"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_int_4" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_4"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_int_5" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_int_5"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vqneg_vqabs"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_vmov" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vmov"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_vaba" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vaba"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vaba_qqq"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_vsma" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vsma"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_qqq_8_16"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+  "cortex_a8_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mul_qqd_32_scalar"))
+  "cortex_a8_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a8_neon_shift_1" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_shift_1"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a8_neon_shift_2" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_shift_2"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_shift_3" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_shift_3"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N1.
+(define_insn_reservation "cortex_a8_neon_vshl_ddd" 1
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vshl_ddd"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vsra_vrsra"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+  "cortex_a8_neon_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+  "cortex_a8_neon_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a8_neon_fp_vsum" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vsum"))
+  "cortex_a8_neon_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5.
+(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmul_ddd"))
+  "cortex_a8_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmul_qqd"))
+  "cortex_a8_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_ddd"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_qqq"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+  "cortex_a8_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
+  "cortex_a8_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2.
+(define_insn_reservation "cortex_a8_neon_bp_simple" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_bp_simple"))
+  "cortex_a8_neon_perm")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_bp_2cycle"))
+  "cortex_a8_neon_perm_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_bp_3cycle"))
+  "cortex_a8_neon_perm_3")
+
+;; Instructions using this reservation produce a result at N1.
+(define_insn_reservation "cortex_a8_neon_ldr" 1
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_ldr"))
+  "cortex_a8_neon_ls")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_str" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_str"))
+  "cortex_a8_neon_ls")
+
+;; Instructions using this reservation produce a result at N1 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld1_1_2_regs"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N1 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld1_3_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld2_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 4.
+(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld3_vld4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst1_3_4_regs"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst3_vst4"))
+  "cortex_a8_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld1_vld2_lane"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 5.
+(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld3_vld4_lane"))
+  "cortex_a8_neon_ls_5")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst1_vst2_lane"))
+  "cortex_a8_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vst3_vst4_lane"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a8_neon_vld3_vld4_all_lanes" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_vld3_vld4_all_lanes"))
+  "cortex_a8_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a8_neon_mcr" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mcr"))
+  "cortex_a8_neon_perm")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "neon_type" "neon_mcr_2_mcrr"))
+  "cortex_a8_neon_perm_2")
+
+;; Exceptions to the default latencies.
+
+(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a8_neon_mcr"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_vld3_vld4_all_lanes"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_vld3_vld4"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_vld2_4_regs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "cortex_a8_neon_ldr"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_bp_3cycle"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_bp_2cycle"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a8_neon_bp_simple"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_fp_vsum"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vsra_vrsra"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "cortex_a8_neon_vshl_ddd"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_shift_3"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_shift_2"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_shift_1"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vsma"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a8_neon_vaba_qqq"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a8_neon_vaba"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_vmov"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_vqneg_vqabs"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_int_5"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a8_neon_int_4"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_int_3"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_int_2"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a8_neon_int_1"
+               "cortex_a8_neon_int_1,\
+               cortex_a8_neon_int_4,\
+               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a8_neon_mla_qqq_8_16,\
+               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a8_neon_fp_vmla_ddd,\
+               cortex_a8_neon_fp_vmla_qqq,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
+
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
new file mode 100644
index 000000000..1922e5cf4
--- /dev/null
+++ b/gcc/config/arm/cortex-a8.md
@@ -0,0 +1,275 @@
+;; ARM Cortex-A8 scheduling description.
+;; Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a8")
+
+;; Only one load/store instruction can be issued per cycle
+;; (although reservation of this unit is only required for single
+;; loads and stores -- see below).
+(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8")
+
+;; Only one branch instruction can be issued per cycle.
+(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8")
+
+;; The two ALU pipelines.
+(define_cpu_unit "cortex_a8_alu0" "cortex_a8")
+(define_cpu_unit "cortex_a8_alu1" "cortex_a8")
+
+;; The usual flow of an instruction through the pipelines.
+(define_reservation "cortex_a8_default"
+                    "cortex_a8_alu0|cortex_a8_alu1")
+
+;; The flow of a branch instruction through the pipelines.
+(define_reservation "cortex_a8_branch"
+                    "(cortex_a8_alu0+cortex_a8_issue_branch)|\
+                     (cortex_a8_alu1+cortex_a8_issue_branch)")
+
+;; The flow of a load or store instruction through the pipeline in
+;; the case where that instruction consists of only one micro-op...
+(define_reservation "cortex_a8_load_store_1"
+                    "(cortex_a8_alu0+cortex_a8_issue_ls)|\
+                     (cortex_a8_alu1+cortex_a8_issue_ls)")
+
+;; ...and in the case of two micro-ops.  Dual issue is altogether forbidden
+;; during the issue cycle of the first micro-op.  (Instead of modelling
+;; a separate issue unit, we instead reserve alu0 and alu1 to
+;; prevent any other instructions from being issued upon that first cycle.)
+;; Even though the load/store pipeline is usually available in either
+;; ALU pipe, multi-cycle instructions always issue in pipeline 0.
+(define_reservation "cortex_a8_load_store_2"
+                    "cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\
+                     cortex_a8_alu0+cortex_a8_issue_ls")
+
+;; The flow of a single-cycle multiplication.
+(define_reservation "cortex_a8_multiply"
+                    "cortex_a8_alu0")
+
+;; The flow of a multiplication instruction that gets decomposed into
+;; two micro-ops.  The two micro-ops will be issued to pipeline 0 on
+;; successive cycles.  Dual issue cannot happen at the same time as the
+;; first of the micro-ops.
+(define_reservation "cortex_a8_multiply_2"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; Similarly, the flow of a multiplication instruction that gets
+;; decomposed into three micro-ops.  Dual issue cannot occur except on
+;; the cycle upon which the third micro-op is issued.
+(define_reservation "cortex_a8_multiply_3"
+                    "cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0+cortex_a8_alu1,\
+                     cortex_a8_alu0")
+
+;; The model given here assumes that all instructions are unconditional.
+
+;; Data processing instructions, but not move instructions.
+
+;; We include CLZ with these since it has the same execution pattern
+;; (source read in E2 and destination available at the end of that cycle).
+(define_insn_reservation "cortex_a8_alu" 2
+  (and (eq_attr "tune" "cortexa8")
+       (ior (and (and (eq_attr "type" "alu")
+		      (eq_attr "neon_type" "none"))
+		 (not (eq_attr "insn" "mov,mvn")))
+            (eq_attr "insn" "clz")))
+  "cortex_a8_default")
+
+(define_insn_reservation "cortex_a8_alu_shift" 2
+  (and (eq_attr "tune" "cortexa8")
+       (and (eq_attr "type" "alu_shift")
+            (not (eq_attr "insn" "mov,mvn"))))
+  "cortex_a8_default")
+
+(define_insn_reservation "cortex_a8_alu_shift_reg" 2
+  (and (eq_attr "tune" "cortexa8")
+       (and (eq_attr "type" "alu_shift_reg")
+            (not (eq_attr "insn" "mov,mvn"))))
+  "cortex_a8_default")
+
+;; Move instructions.
+
+(define_insn_reservation "cortex_a8_mov" 1
+  (and (eq_attr "tune" "cortexa8")
+       (and (eq_attr "type" "alu,alu_shift,alu_shift_reg")
+            (eq_attr "insn" "mov,mvn")))
+  "cortex_a8_default")
+
+;; Exceptions to the default latencies for data processing instructions.
+
+;; A move followed by an ALU instruction with no early dep.
+;; (Such a pair can be issued in parallel, hence latency zero.)
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu")
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu")
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Multiplication instructions.  These are categorized according to their
+;; reservation behavior and the need below to distinguish certain
+;; varieties for bypasses.  Results are available at the E5 stage
+;; (but some of these are multi-cycle instructions which explains the
+;; latencies below).
+
+(define_insn_reservation "cortex_a8_mul" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "mul,smulxy,smmul"))
+  "cortex_a8_multiply_2")
+
+(define_insn_reservation "cortex_a8_mla" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
+  "cortex_a8_multiply_2")
+
+(define_insn_reservation "cortex_a8_mull" 7
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
+  "cortex_a8_multiply_3")
+
+(define_insn_reservation "cortex_a8_smulwy" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "smulwy,smuad,smusd"))
+  "cortex_a8_multiply")
+
+;; smlald and smlsld are multiply-accumulate instructions but do not
+;; received bypassed data from other multiplication results; thus, they
+;; cannot go in cortex_a8_mla above.  (See below for bypass details.)
+(define_insn_reservation "cortex_a8_smlald" 6
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "insn" "smlald,smlsld"))
+  "cortex_a8_multiply_2")
+
+;; A multiply with a single-register result or an MLA, followed by an
+;; MLA with an accumulator dependency, has its result forwarded so two
+;; such instructions can issue back-to-back.
+(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy"
+               "cortex_a8_mla"
+               "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at E2 has lower latency than one needing it at E1.
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu")
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\
+                  cortex_a8_smulwy,cortex_a8_smlald"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Load instructions.
+;; The presence of any register writeback is ignored here.
+
+;; A load result has latency 3 unless the dependent instruction has
+;; no early dep, in which case it is only latency two.
+;; We assume 64-bit alignment for doubleword loads.
+(define_insn_reservation "cortex_a8_load1_2" 3
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "load1,load2,load_byte"))
+  "cortex_a8_load_store_1")
+
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu")
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_a8_load1_2"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; We do not currently model the fact that loads with scaled register
+;; offsets that are not LSL #2 have an extra cycle latency (they issue
+;; as two micro-ops).
+
+;; A load multiple of three registers is usually issued as two micro-ops.
+;; The first register will be available at E3 of the first iteration,
+;; the second at E3 of the second iteration, and the third at E4 of
+;; the second iteration.  A load multiple of four registers is usually
+;; issued as two micro-ops.
+(define_insn_reservation "cortex_a8_load3_4" 5
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "load3,load4"))
+  "cortex_a8_load_store_2")
+
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu")
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 4 "cortex_a8_load3_4"
+               "cortex_a8_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; Store instructions.
+;; Writeback is again ignored.
+
+(define_insn_reservation "cortex_a8_store1_2" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "store1,store2"))
+  "cortex_a8_load_store_1")
+
+(define_insn_reservation "cortex_a8_store3_4" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "store3,store4"))
+  "cortex_a8_load_store_2")
+
+;; An ALU instruction acting as a producer for a store instruction
+;; that only uses the result as the value to be stored (as opposed to
+;; using it to calculate the address) has latency zero; the store
+;; reads the value to be stored at the start of E3 and the ALU insn
+;; writes it at the end of E2.  Move instructions actually produce the
+;; result at the end of E1, but since we don't have delay slots, the
+;; scheduling behavior will be the same.
+(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\
+                  cortex_a8_alu_shift_reg,cortex_a8_mov"
+               "cortex_a8_store1_2,cortex_a8_store3_4"
+               "arm_no_early_store_addr_dep")
+
+;; Branch instructions
+
+(define_insn_reservation "cortex_a8_branch" 0
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "branch"))
+  "cortex_a8_branch")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_a8_call" 32
+  (and (eq_attr "tune" "cortexa8")
+       (eq_attr "type" "call"))
+  "cortex_a8_issue_branch")
+
+;; NEON (including VFP) instructions.
+
+(include "cortex-a8-neon.md")
+
diff --git a/gcc/config/arm/cortex-a9-neon.md b/gcc/config/arm/cortex-a9-neon.md
new file mode 100644
index 000000000..2e8ec9b14
--- /dev/null
+++ b/gcc/config/arm/cortex-a9-neon.md
@@ -0,0 +1,1237 @@
+;; ARM Cortex-A9 pipeline description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;;
+;; Neon pipeline description contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "cortex_a9_neon")
+
+;; Only one instruction can be issued per cycle.
+(define_cpu_unit "cortex_a9_neon_issue_perm" "cortex_a9_neon")
+
+;; Only one data-processing instruction can be issued per cycle.
+(define_cpu_unit "cortex_a9_neon_issue_dp" "cortex_a9_neon")
+
+;; We need a special mutual exclusion (to be used in addition to
+;; cortex_a9_neon_issue_dp) for the case when an instruction such as
+;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
+;; E2 of the floating-point add pipeline.  On the cycle previous to that
+;; forward we must prevent issue of any instruction to the floating-point
+;; add pipeline, but still allow issue of a data-processing instruction
+;; to any of the other pipelines.
+(define_cpu_unit "cortex_a9_neon_issue_fadd" "cortex_a9_neon")
+(define_cpu_unit "cortex_a9_neon_mcr" "cortex_a9_neon")
+
+
+;; Patterns of reservation.
+;; We model the NEON issue units as running in parallel with the core ones.
+;; We assume that multi-cycle NEON instructions get decomposed into
+;; micro-ops as they are issued into the NEON pipeline.
+
+(define_reservation "cortex_a9_neon_dp"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp")
+(define_reservation "cortex_a9_neon_dp_2"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+                     cortex_a9_neon_issue_dp")
+(define_reservation "cortex_a9_neon_dp_4"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+                     cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp")
+
+(define_reservation "cortex_a9_neon_fadd"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp +  \
+                     cortex_a9_neon_issue_fadd")
+(define_reservation "cortex_a9_neon_fadd_2"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+                     cortex_a9_neon_issue_fadd,\
+                     cortex_a9_neon_issue_dp")
+
+(define_reservation "cortex_a9_neon_perm"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_perm_2"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,  \
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_perm_3"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+
+(define_reservation "cortex_a9_neon_ls"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm+cortex_a9_ls")
+(define_reservation "cortex_a9_neon_ls_2"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_ls_3"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_ls_4"
+                    "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+(define_reservation "cortex_a9_neon_ls_5"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\
+                     cortex_a9_neon_issue_perm")
+
+(define_reservation "cortex_a9_neon_fmul_then_fadd"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+		     nothing*3,\
+		     cortex_a9_neon_issue_fadd")
+(define_reservation "cortex_a9_neon_fmul_then_fadd_2"
+                    "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\
+		     cortex_a9_neon_issue_dp,\
+		     nothing*2,\
+		     cortex_a9_neon_issue_fadd,\
+		     cortex_a9_neon_issue_fadd")
+
+
+;; NEON -> core transfers.
+(define_insn_reservation "ca9_neon_mrc" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mrc"))
+  "ca9_issue_vfp_neon + cortex_a9_neon_mcr")
+
+(define_insn_reservation "ca9_neon_mrrc" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mrrc"))
+  "ca9_issue_vfp_neon + cortex_a9_neon_mcr")
+
+;; The remainder of this file is auto-generated by neon-schedgen.
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_int_1" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_int_1"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_int_2" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_int_2"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_int_3" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_int_3"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_int_4" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_int_4"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)n operands at N2, and produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_int_5" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_int_5"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vqneg_vqabs"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_vmov" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vmov"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_vaba" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vaba"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vaba_qqq" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vaba_qqq"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_vsma" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vsma"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mla_qqq_8_16"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
+  "cortex_a9_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
+(define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mul_qqd_32_scalar"))
+  "cortex_a9_neon_dp_4")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3.
+(define_insn_reservation "cortex_a9_neon_shift_1" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_shift_1"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4.
+(define_insn_reservation "cortex_a9_neon_shift_2" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_shift_2"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N3 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_shift_3" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_shift_3"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N1.
+(define_insn_reservation "cortex_a9_neon_vshl_ddd" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vshl_ddd"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N4 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)m operands at N1,
+;; their (D|Q)d operands at N3, and produce a result at N6.
+(define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vsra_vrsra"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd"))
+  "cortex_a9_neon_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vadd_qqq_vabs_qq"))
+  "cortex_a9_neon_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N5.
+(define_insn_reservation "cortex_a9_neon_fp_vsum" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vsum"))
+  "cortex_a9_neon_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5.
+(define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vmul_ddd"))
+  "cortex_a9_neon_dp")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vmul_qqd"))
+  "cortex_a9_neon_dp_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vmla_ddd"))
+  "cortex_a9_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vmla_qqq"))
+  "cortex_a9_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vmla_ddd_scalar"))
+  "cortex_a9_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their (D|Q)n operands at N2,
+;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vmla_qqq_scalar"))
+  "cortex_a9_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9.
+(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
+  "cortex_a9_neon_fmul_then_fadd")
+
+;; Instructions using this reservation read their source operands at N2, and
+;; produce a result at N9 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_fp_vrecps_vrsqrts_qqq"))
+  "cortex_a9_neon_fmul_then_fadd_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2.
+(define_insn_reservation "cortex_a9_neon_bp_simple" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_bp_simple"))
+  "cortex_a9_neon_perm")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_bp_2cycle" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_bp_2cycle"))
+  "cortex_a9_neon_perm_2")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_bp_3cycle" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_bp_3cycle"))
+  "cortex_a9_neon_perm_3")
+
+;; Instructions using this reservation produce a result at N1.
+(define_insn_reservation "cortex_a9_neon_ldr" 1
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_ldr"))
+  "cortex_a9_neon_ls")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_str" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_str"))
+  "cortex_a9_neon_ls")
+
+;; Instructions using this reservation produce a result at N1 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld1_1_2_regs"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N1 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld1_3_4_regs"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld2_4_regs"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 4.
+(define_insn_reservation "cortex_a9_neon_vld3_vld4" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld3_vld4"))
+  "cortex_a9_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vst1_3_4_regs"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vst2_4_regs_vst3_vst4"))
+  "cortex_a9_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst3_vst4" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vst3_vst4"))
+  "cortex_a9_neon_ls_4")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 3.
+(define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld1_vld2_lane"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation read their source operands at N1, and
+;; produce a result at N2 on cycle 5.
+(define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld3_vld4_lane"))
+  "cortex_a9_neon_ls_5")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vst1_vst2_lane"))
+  "cortex_a9_neon_ls_2")
+
+;; Instructions using this reservation read their source operands at N1.
+(define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vst3_vst4_lane"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2 on cycle 2.
+(define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_vld3_vld4_all_lanes"))
+  "cortex_a9_neon_ls_3")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a9_neon_mcr" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mcr"))
+  "cortex_a9_neon_perm")
+
+;; Instructions using this reservation produce a result at N2.
+(define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "neon_type" "neon_mcr_2_mcrr"))
+  "cortex_a9_neon_perm_2")
+
+;; Exceptions to the default latencies.
+
+(define_bypass 1 "cortex_a9_neon_mcr_2_mcrr"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a9_neon_mcr"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vld3_vld4_all_lanes"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vld3_vld4_lane"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_vld1_vld2_lane"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_vld3_vld4"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_vld2_4_regs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vld1_3_4_regs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a9_neon_vld1_1_2_regs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "cortex_a9_neon_ldr"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_bp_3cycle"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_bp_2cycle"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 1 "cortex_a9_neon_bp_simple"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a9_neon_fp_vrecps_vrsqrts_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_fp_vrecps_vrsqrts_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_fp_vmul_qqd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_fp_vmul_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_fp_vsum"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_fp_vadd_qqq_vabs_qq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_fp_vadd_ddd_vabs_dd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vsra_vrsra"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 4 "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 0 "cortex_a9_neon_vshl_ddd"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_shift_3"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_shift_2"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_shift_1"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_mul_qqd_32_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 8 "cortex_a9_neon_mla_qqq_32_qqd_32_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mla_qqq_8_16"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_mul_qqq_8_16_32_ddd_32"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vsma"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 6 "cortex_a9_neon_vaba_qqq"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 5 "cortex_a9_neon_vaba"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_vmov"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_vqneg_vqabs"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_int_5"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 3 "cortex_a9_neon_int_4"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_int_3"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_int_2"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
+(define_bypass 2 "cortex_a9_neon_int_1"
+               "cortex_a9_neon_int_1,\
+               cortex_a9_neon_int_4,\
+               cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\
+               cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
+               cortex_a9_neon_mla_qqq_8_16,\
+               cortex_a9_neon_fp_vadd_ddd_vabs_dd,\
+               cortex_a9_neon_fp_vadd_qqq_vabs_qq,\
+               cortex_a9_neon_fp_vmla_ddd,\
+               cortex_a9_neon_fp_vmla_qqq,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\
+               cortex_a9_neon_fp_vrecps_vrsqrts_qqq")
+
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
new file mode 100644
index 000000000..b74ace833
--- /dev/null
+++ b/gcc/config/arm/cortex-a9.md
@@ -0,0 +1,269 @@
+;; ARM Cortex-A9 pipeline description
+;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Originally written by CodeSourcery for VFP.
+;;
+;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+;; Integer Pipeline description contributed by ARM Ltd.
+;; VFP Pipeline description rewritten and contributed by ARM Ltd.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a9")
+
+;; The Cortex-A9 core is modelled as a dual issue pipeline that has
+;; the following components.
+;; 1. 1 Load Store Pipeline.
+;; 2. P0 / main pipeline for data processing instructions.
+;; 3. P1 / Dual pipeline for Data processing instructions.
+;; 4. MAC pipeline for multiply as well as multiply
+;;    and accumulate instructions.
+;; 5. 1 VFP and an optional Neon unit.
+;; The Load/Store, VFP and Neon issue pipeline are multiplexed.
+;; The P0 / main pipeline and M1 stage of the MAC pipeline are
+;;   multiplexed.
+;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
+;;   multiplexed.
+;; There are only 4 integer register read ports and hence at any point of
+;; time we can't have issue down the E1 and the E2 ports unless
+;; of course there are bypass paths that get exercised.
+;; Both P0 and P1 have 2 stages E1 and E2.
+;; Data processing instructions issue to E1 or E2 depending on
+;; whether they have an early shift or not.
+
+(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")
+(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
+(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
+(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
+(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9")
+(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9")
+
+(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb")
+(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default")
+(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default")
+
+(define_reservation "cortex_a9_multcycle1"
+  "cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \
+cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
+
+(define_reservation "cortex_a9_mult16"
+  "cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mac16"
+  "cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mult"
+  "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb")
+(define_reservation "cortex_a9_mac"
+  "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb")
+
+
+;; Issue at the same time along the load store pipeline and
+;; the VFP / Neon pipeline is not possible.
+(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")
+
+;; Default data processing instruction without any shift
+;; The only exception to this is the mov instruction
+;; which can go down E2 without any problem.
+(define_insn_reservation "cortex_a9_dp" 2
+  (and (eq_attr "tune" "cortexa9")
+         (ior (and (eq_attr "type" "alu")
+                        (eq_attr "neon_type" "none"))
+	      (and (and (eq_attr "type" "alu_shift_reg, alu_shift")
+			(eq_attr "insn" "mov"))
+                 (eq_attr "neon_type" "none"))))
+  "cortex_a9_p0_default|cortex_a9_p1_default")
+
+;; An instruction using the shifter will go down E1.
+(define_insn_reservation "cortex_a9_dp_shift" 3
+   (and (eq_attr "tune" "cortexa9")
+	(and (eq_attr "type" "alu_shift_reg, alu_shift")
+	     (not (eq_attr "insn" "mov"))))
+   "cortex_a9_p0_shift | cortex_a9_p1_shift")
+
+;; Loads have a latency of 4 cycles.
+;; We don't model autoincrement instructions. These
+;; instructions use the load store pipeline and 1 of
+;; the E2 units to write back the result of the increment.
+
+(define_insn_reservation "cortex_a9_load1_2" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
+  "cortex_a9_ls")
+
+;; Loads multiples and store multiples can't be issued for 2 cycles in a
+;; row. The description below assumes that addresses are 64 bit aligned.
+;; If not, there is an extra cycle latency which is not modelled.
+
+(define_insn_reservation "cortex_a9_load3_4" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "load3, load4"))
+  "cortex_a9_ls, cortex_a9_ls")
+
+(define_insn_reservation "cortex_a9_store1_2" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "store1, store2, f_stores, f_stored"))
+  "cortex_a9_ls")
+
+;; Almost all our store multiples use an auto-increment
+;; form. Don't issue back to back load and store multiples
+;; because the load store unit will stall.
+
+(define_insn_reservation "cortex_a9_store3_4" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "store3, store4"))
+  "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls")
+
+;; We get 16*16 multiply / mac results in 3 cycles.
+(define_insn_reservation "cortex_a9_mult16" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "smulxy"))
+       "cortex_a9_mult16")
+
+;; The 16*16 mac is slightly different that it
+;; reserves M1 and M2 in the same cycle.
+(define_insn_reservation "cortex_a9_mac16" 3
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "smlaxy"))
+  "cortex_a9_mac16")
+
+
+(define_insn_reservation "cortex_a9_multiply" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "mul"))
+       "cortex_a9_mult")
+
+(define_insn_reservation "cortex_a9_mac" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "insn" "mla"))
+       "cortex_a9_mac")
+
+;; An instruction with a result in E2 can be forwarded
+;; to E2 or E1 or M1 or the load store unit in the next cycle.
+
+(define_bypass 1 "cortex_a9_dp"
+                 "cortex_a9_dp_shift, cortex_a9_multiply,
+ cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+
+(define_bypass 2 "cortex_a9_dp_shift"
+                 "cortex_a9_dp_shift, cortex_a9_multiply,
+ cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2,
+ cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4")
+
+;; An instruction in the load store pipeline can provide
+;; read access to a DP instruction in the P0 default pipeline
+;; before the writeback stage.
+
+(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2,
+cortex_a9_store3_4, cortex_a9_store1_2")
+
+(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2,
+cortex_a9_store3_4, cortex_a9_store1_2,  cortex_a9_load3_4")
+
+;; Calls and branches.
+
+;; Branch instructions
+
+(define_insn_reservation "cortex_a9_branch" 0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "branch"))
+  "cortex_a9_branch")
+
+;; Call latencies are essentially 0 but make sure
+;; dual issue doesn't happen i.e the next instruction
+;; starts at the next cycle.
+(define_insn_reservation "cortex_a9_call"  0
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "call"))
+  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")
+
+
+;; Pipelining for VFP instructions.
+;; Issue happens either along load store unit or the VFP / Neon unit.
+;; Pipeline   Instruction Classification.
+;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r
+;; FP_ADD   - fadds, faddd, fcmps (1)
+;; FPMUL   - fmul{s,d}, fmac{s,d}
+;; FPDIV - fdiv{s,d}
+(define_cpu_unit "ca9fps" "cortex_a9")
+(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")
+(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")
+(define_cpu_unit "ca9fp_ds1" "cortex_a9")
+
+
+;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
+(define_insn_reservation "cortex_a9_fps" 2
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))
+ "ca9_issue_vfp_neon + ca9fps")
+
+(define_bypass 1
+  "cortex_a9_fps"
+  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
+
+;; Scheduling on the FP_ADD pipeline.
+(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
+
+(define_insn_reservation "cortex_a9_fadd" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fadds, faddd, f_cvt"))
+  "ca9fp_add")
+
+(define_insn_reservation "cortex_a9_fcmp" 1
+  (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fcmps, fcmpd"))
+ "ca9_issue_vfp_neon + ca9fp_add1")
+
+;; Scheduling for the Multiply and MAC instructions.
+(define_reservation "ca9fmuls"
+  "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
+
+(define_reservation "ca9fmuld"
+  "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
+
+(define_insn_reservation "cortex_a9_fmuls" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmuls"))
+  "ca9fmuls")
+
+(define_insn_reservation "cortex_a9_fmuld" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmuld"))
+  "ca9fmuld")
+
+(define_insn_reservation "cortex_a9_fmacs" 8
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmacs"))
+  "ca9fmuls, ca9fp_add")
+
+(define_insn_reservation "cortex_a9_fmacd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmacd"))
+  "ca9fmuld, ca9fp_add")
+
+;; Division pipeline description.
+(define_insn_reservation "cortex_a9_fdivs" 15
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fdivs"))
+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
+
+(define_insn_reservation "cortex_a9_fdivd" 25
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fdivd"))
+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
+
+;; Include Neon pipeline description
+(include "cortex-a9-neon.md")
diff --git a/gcc/config/arm/cortex-m4-fpu.md b/gcc/config/arm/cortex-m4-fpu.md
new file mode 100644
index 000000000..6fd5faf74
--- /dev/null
+++ b/gcc/config/arm/cortex-m4-fpu.md
@@ -0,0 +1,111 @@
+;; ARM Cortex-M4 FPU pipeline description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Use an artifial unit to model FPU.
+(define_cpu_unit "cortex_m4_v" "cortex_m4")
+
+(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v")
+
+;; Integer instructions following VDIV or VSQRT complete out-of-order.
+(define_insn_reservation "cortex_m4_fdivs" 15
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fdivs"))
+  "cortex_m4_ex_v,cortex_m4_v*13")
+
+(define_insn_reservation "cortex_m4_vmov_1" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fcpys,fconsts"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_vmov_2" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_2_r,r_2_f"))
+  "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_fmuls" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fmuls"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fmacs" 4
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fmacs"))
+  "cortex_m4_ex_v*3")
+
+(define_insn_reservation "cortex_m4_ffariths" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "ffariths"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fadds" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fadds"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fcmps" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "fcmps"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_flag" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_flag"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_cvt" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_cvt"))
+  "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_load" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_loads"))
+  "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_f_store" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_stores"))
+  "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_f_loadd" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_loadd"))
+  "cortex_m4_ex_v*3")
+
+(define_insn_reservation "cortex_m4_f_stored" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "f_stored"))
+  "cortex_m4_ex_v*3")
+
+;; MAC instructions consume their addend one cycle later. If the result
+;; of an arithmetic instruction is consumed as the addend of the following
+;; MAC instruction, the latency can be decreased by one.
+
+(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt"
+		 "cortex_m4_fmacs"
+		 "arm_no_early_mul_dep")
+
+(define_bypass 3 "cortex_m4_fmacs"
+		 "cortex_m4_fmacs"
+		 "arm_no_early_mul_dep")
+
+(define_bypass 14 "cortex_m4_fdivs"
+		  "cortex_m4_fmacs"
+		  "arm_no_early_mul_dep")
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
new file mode 100644
index 000000000..b71037585
--- /dev/null
+++ b/gcc/config/arm/cortex-m4.md
@@ -0,0 +1,111 @@
+;; ARM Cortex-M4 pipeline description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_m4")
+
+;; We model the pipelining of LDR instructions by using two artificial units.
+
+(define_cpu_unit "cortex_m4_a" "cortex_m4")
+
+(define_cpu_unit "cortex_m4_b" "cortex_m4")
+
+(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b")
+
+;; ALU and multiply is one cycle.
+(define_insn_reservation "cortex_m4_alu" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "alu,alu_shift,alu_shift_reg,mult"))
+  "cortex_m4_ex")
+
+;; Byte, half-word and word load is two cycles.
+(define_insn_reservation "cortex_m4_load1" 2
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load_byte,load1"))
+  "cortex_m4_a, cortex_m4_b")
+
+;; str rx, [ry, #imm] is always one cycle.
+(define_insn_reservation "cortex_m4_store1_1" 1
+  (and (and (eq_attr "tune" "cortexm4")
+	    (eq_attr "type" "store1"))
+       (ne (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0)))
+  "cortex_m4_a")
+
+;; Other byte, half-word and word load is two cycles.
+(define_insn_reservation "cortex_m4_store1_2" 2
+  (and (and (eq_attr "tune" "cortexm4")
+	    (eq_attr "type" "store1"))
+       (eq (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0)))
+  "cortex_m4_a*2")
+
+(define_insn_reservation "cortex_m4_load2" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load2"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_store2" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "store2"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_load3" 4
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load3"))
+  "cortex_m4_ex*4")
+
+(define_insn_reservation "cortex_m4_store3" 4
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "store3"))
+  "cortex_m4_ex*4")
+
+(define_insn_reservation "cortex_m4_load4" 5
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "load4"))
+  "cortex_m4_ex*5")
+
+(define_insn_reservation "cortex_m4_store4" 5
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "store4"))
+  "cortex_m4_ex*5")
+
+;; If the address of load or store depends on the result of the preceding
+;; instruction, the latency is increased by one.
+
+(define_bypass 2 "cortex_m4_alu"
+		 "cortex_m4_load1"
+		 "arm_early_load_addr_dep")
+
+(define_bypass 2 "cortex_m4_alu"
+		 "cortex_m4_store1_1,cortex_m4_store1_2"
+		 "arm_early_store_addr_dep")
+
+(define_insn_reservation "cortex_m4_branch" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "branch"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_call" 3
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "call"))
+  "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_block" 1
+  (and (eq_attr "tune" "cortexm4")
+       (eq_attr "type" "block"))
+  "cortex_m4_ex")
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
new file mode 100644
index 000000000..e26c3d45d
--- /dev/null
+++ b/gcc/config/arm/cortex-r4.md
@@ -0,0 +1,292 @@
+;; ARM Cortex-R4 scheduling description.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_r4")
+
+;; We approximate the dual-issue constraints of this core using four
+;; "issue units" and a reservation matrix as follows.  The numbers indicate
+;; the instruction groups' preferences in order.  Multiple entries for
+;; the same numbered preference indicate units that must be reserved
+;; together.
+;;
+;; Issue unit:		A	B	C	ALU
+;;
+;; ALU w/o reg shift	1st	2nd		1st and 2nd
+;; ALU w/ reg shift	1st	2nd	2nd	1st and 2nd
+;; Moves		1st	2nd		2nd
+;; Multiplication	1st			1st
+;; Division		1st			1st
+;; Load/store single	1st		1st
+;; Other load/store	1st	1st
+;; Branches			1st
+
+(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
+(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
+
+(define_reservation "cortex_r4_alu"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+                     (cortex_r4_issue_b+cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_alu_shift_reg"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
+                     (cortex_r4_issue_b+cortex_r4_issue_c+\
+                      cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mov"
+                    "cortex_r4_issue_a|(cortex_r4_issue_b+\
+                     cortex_r4_issue_alu)")
+(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_mul_2"
+                    "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
+;; Division instructions execute out-of-order with respect to the
+;; rest of the pipeline and only require reservations on their first and
+;; final cycles.
+(define_reservation "cortex_r4_div_9"
+                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
+                     nothing*7,\
+                     cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_div_10"
+                    "cortex_r4_issue_a+cortex_r4_issue_alu,\
+                     nothing*8,\
+                     cortex_r4_issue_a+cortex_r4_issue_alu")
+(define_reservation "cortex_r4_load_store"
+                    "cortex_r4_issue_a+cortex_r4_issue_c")
+(define_reservation "cortex_r4_load_store_2"
+                    "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
+(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
+
+;; We assume that all instructions are unconditional.
+
+;; Data processing instructions.  Moves without shifts are kept separate
+;; for the purposes of the dual-issue constraints above.
+(define_insn_reservation "cortex_r4_alu" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (and (eq_attr "type" "alu")
+            (not (eq_attr "insn" "mov"))))
+  "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_mov" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (and (eq_attr "type" "alu")
+            (eq_attr "insn" "mov")))
+  "cortex_r4_mov")
+
+(define_insn_reservation "cortex_r4_alu_shift" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "alu_shift"))
+  "cortex_r4_alu")
+
+(define_insn_reservation "cortex_r4_alu_shift_reg" 2
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "alu_shift_reg"))
+  "cortex_r4_alu_shift_reg")
+
+;; An ALU instruction followed by an ALU instruction with no early dep.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; In terms of availabilities, a consumer mov could theoretically be
+;; issued together with a producer ALU instruction, without stalls.
+;; In practice this cannot happen because mov;add (in that order) is not
+;; eligible for dual issue and furthermore dual issue is not permitted
+;; when a dependency is involved.  We therefore note it as latency one.
+;; A mov followed by another of the same is also latency one.
+(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
+                  cortex_r4_mov"
+               "cortex_r4_mov")
+
+;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
+;; media data processing instructions nor sad instructions.
+
+;; Multiplication instructions.
+
+(define_insn_reservation "cortex_r4_mul_4" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "mul,smmul"))
+  "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mul_3" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mla_4" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "mla,smmla"))
+  "cortex_r4_mul_2")
+
+(define_insn_reservation "cortex_r4_mla_3" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_smlald" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "smlald,smlsld"))
+  "cortex_r4_mul")
+
+(define_insn_reservation "cortex_r4_mull" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "smull,umull,umlal,umaal"))
+  "cortex_r4_mul_2")
+
+;; A multiply or an MLA with a single-register result, followed by an
+;; MLA with an accumulator dependency, has its result forwarded.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
+               "cortex_r4_mla_3,cortex_r4_mla_4"
+               "arm_mac_accumulator_is_mul_result")
+
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
+               "cortex_r4_mla_3,cortex_r4_mla_4"
+               "arm_mac_accumulator_is_mul_result")
+
+;; A multiply followed by an ALU instruction needing the multiply
+;; result only at ALU has lower latency than one needing it at Shift.
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; A multiply followed by a mov has one cycle lower latency again.
+(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_mov")
+(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_mov")
+
+;; We guess that division of A/B using sdiv or udiv, on average, 
+;; is performed with B having ten more leading zeros than A.
+;; This gives a latency of nine for udiv and ten for sdiv.
+(define_insn_reservation "cortex_r4_udiv" 9
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "udiv"))
+  "cortex_r4_div_9")
+
+(define_insn_reservation "cortex_r4_sdiv" 10
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "insn" "sdiv"))
+  "cortex_r4_div_10")
+
+;; Branches.  We assume correct prediction.
+
+(define_insn_reservation "cortex_r4_branch" 0
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "branch"))
+  "cortex_r4_branch")
+
+;; Call latencies are not predictable.  A semi-arbitrary very large
+;; number is used as "positive infinity" so that everything should be
+;; finished by the time of return.
+(define_insn_reservation "cortex_r4_call" 32
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "call"))
+  "nothing")
+
+;; Status register access instructions are not currently emitted.
+
+;; Load instructions.
+;; We do not model the "addr_md_3cycle" cases and assume that
+;; accesses following are correctly aligned.
+
+(define_insn_reservation "cortex_r4_load_1_2" 3
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "load1,load2"))
+  "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_load_3_4" 4
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "load3,load4"))
+  "cortex_r4_load_store_2")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Normal Reg, there is one fewer cycle of latency.
+
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu")
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 2 "cortex_r4_load_1_2"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu")
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu_shift"
+               "arm_no_early_alu_shift_dep")
+(define_bypass 3 "cortex_r4_load_3_4"
+               "cortex_r4_alu_shift_reg"
+               "arm_no_early_alu_shift_value_dep")
+
+;; If a producing load is followed by an instruction consuming only
+;; as a Late Reg, there are two fewer cycles of latency.  Such consumer
+;; instructions are moves and stores.
+
+(define_bypass 1 "cortex_r4_load_1_2"
+               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+(define_bypass 2 "cortex_r4_load_3_4"
+               "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
+
+;; If a producer's result is required as the base or offset of a load,
+;; there is an extra cycle latency.
+
+(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
+                  cortex_r4_alu_shift_reg"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
+               "cortex_r4_load_1_2,cortex_r4_load_3_4")
+
+;; Store instructions.
+
+(define_insn_reservation "cortex_r4_store_1_2" 0
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "store1,store2"))
+  "cortex_r4_load_store")
+
+(define_insn_reservation "cortex_r4_store_3_4" 0
+  (and (eq_attr "tune_cortexr4" "yes")
+       (eq_attr "type" "store3,store4"))
+  "cortex_r4_load_store_2")
+
diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md
new file mode 100644
index 000000000..8982bc068
--- /dev/null
+++ b/gcc/config/arm/cortex-r4f.md
@@ -0,0 +1,161 @@
+;; ARM Cortex-R4F VFP pipeline description
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; With the exception of simple VMOV <freg>, <freg> instructions and
+;; the accululate operand of a multiply-accumulate instruction, all
+;; registers are early registers.  Thus base latencies are 1 more than
+;; those listed in the TRM.
+
+;; We use the A, B abd C units from the integer core, plus two additional
+;; units to enforce VFP dual issue constraints.
+
+;;		  A B C	    V1	VMLA
+;; fcpy		  1 2
+;; farith	  1 2	    1
+;; fmrc		  1 2
+;; fconst	  1 2 *	    *
+;; ffarith	  1 2 *	    *
+;; fmac		  1 2	    1	2
+;; fdiv		  1 2	    *
+;; f_loads	  *   *	    *
+;; f_stores	  *   *	    	*
+
+(define_cpu_unit "cortex_r4_v1" "cortex_r4")
+
+(define_cpu_unit "cortex_r4_vmla" "cortex_r4")
+
+(define_reservation "cortex_r4_issue_ab"
+		    "(cortex_r4_issue_a|cortex_r4_issue_b)")
+(define_reservation "cortex_r4_single_issue"
+		    "cortex_r4_issue_a+cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fcpys" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcpys"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_ffariths" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffariths,fconsts,fcmps"))
+ "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fariths" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fadds,fmuls"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fmacs" 6
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacs"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)")
+
+(define_insn_reservation "cortex_r4_fdivs" 17
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivs"))
+ "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_floads" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loads"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fstores" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla")
+
+(define_insn_reservation "cortex_r4_mcr" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "r_2_f"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_mrc" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_2_r"))
+ "cortex_r4_issue_ab")
+
+;; Bypasses for normal (not early) regs.
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fcpys")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fcpys")
+(define_bypass 5 "cortex_r4_fmacs"
+		 "cortex_r4_fcpys")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fcpys")
+
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+;; mac->mac has an extra forwarding path.
+(define_bypass 3 "cortex_r4_fmacs"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fmacs"
+		  "arm_no_early_mul_dep")
+
+;; Double precision operations.  These can not dual issue.
+
+(define_insn_reservation "cortex_r4_fmacd" 20
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacd"))
+ "cortex_r4_single_issue*13")
+
+(define_insn_reservation "cortex_r4_farith" 10
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "faddd,fmuld"))
+ "cortex_r4_single_issue*3")
+
+;; FIXME: The short cycle count suggests these instructions complete
+;; out of order.  Chances are this is not a pipelined operation.
+(define_insn_reservation "cortex_r4_fdivd" 97
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivd"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_ffarithd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffarithd,fconstd"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_fcmpd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcmpd"))
+ "cortex_r4_single_issue*2")
+
+(define_insn_reservation "cortex_r4_f_cvt" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_cvt"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_f_memd" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loadd,f_stored"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_f_flag" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_single_issue")
+
diff --git a/gcc/config/arm/crti.asm b/gcc/config/arm/crti.asm
new file mode 100644
index 000000000..9454273dd
--- /dev/null
+++ b/gcc/config/arm/crti.asm
@@ -0,0 +1,86 @@
+#   Copyright (C) 2001, 2008, 2009, 2010 Free Software Foundation, Inc.
+#   Written By Nick Clifton
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+# This file just make a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+#ifdef __ELF__
+#define TYPE(x) .type x,function
+#else
+#define TYPE(x)
+#endif
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+	# Note - this macro is complemented by the FUNC_END macro
+	# in crtn.asm.  If you change this macro you must also change
+	# that macro match.
+.macro FUNC_START
+#ifdef __thumb__
+	.thumb
+	
+	push	{r3, r4, r5, r6, r7, lr}
+#else
+	.arm
+	#  Create a stack frame and save any call-preserved registers
+	mov	ip, sp
+	stmdb	sp!, {r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr, pc}
+	sub	fp, ip, #4
+#endif
+.endm
+		
+	.section	".init"
+	.align 2
+	.global	_init
+#ifdef __thumb__
+	.thumb_func
+#endif
+	TYPE(_init)
+_init:
+	FUNC_START
+	
+		
+	.section	".fini"
+	.align	2
+	.global	_fini
+#ifdef __thumb__
+	.thumb_func
+#endif
+	TYPE(_fini)
+_fini:
+	FUNC_START
+	
+# end of crti.asm
diff --git a/gcc/config/arm/crtn.asm b/gcc/config/arm/crtn.asm
new file mode 100644
index 000000000..c7f90814d
--- /dev/null
+++ b/gcc/config/arm/crtn.asm
@@ -0,0 +1,82 @@
+#   Copyright (C) 2001, 2004, 2008, 2009, 2010 Free Software Foundation, Inc.
+#   Written By Nick Clifton
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+# 
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+	# Note - this macro is complemented by the FUNC_START macro
+	# in crti.asm.  If you change this macro you must also change
+	# that macro match.
+	#
+	# Note - we do not try any fancy optimizations of the return
+	# sequences here, it is just not worth it.  Instead keep things
+	# simple.  Restore all the save resgisters, including the link
+	# register and then perform the correct function return instruction.
+	# We also save/restore r3 to ensure stack alignment.
+.macro FUNC_END
+#ifdef __thumb__
+	.thumb
+	
+	pop	{r3, r4, r5, r6, r7}
+	pop	{r3}
+	mov	lr, r3
+#else
+	.arm
+	
+	sub	sp, fp, #40
+	ldmfd	sp, {r4, r5, r6, r7, r8, r9, sl, fp, sp, lr}
+#endif
+	
+#if defined __THUMB_INTERWORK__ || defined __thumb__
+	bx	lr
+#else
+	mov	pc, lr
+#endif
+.endm
+		
+	
+	.section	".init"
+	;;
+	FUNC_END
+	
+	.section	".fini"
+	;;
+	FUNC_END
+	
+# end of crtn.asm
diff --git a/gcc/config/arm/ecos-elf.h b/gcc/config/arm/ecos-elf.h
new file mode 100644
index 000000000..9e9fa7046
--- /dev/null
+++ b/gcc/config/arm/ecos-elf.h
@@ -0,0 +1,27 @@
+/* Definitions for ecos based ARM systems using ELF
+   Copyright (C) 1998, 2001, 2007 Free Software Foundation, Inc.
+ 
+   This file is part of GCC.
+ 
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+ 
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION	fputs (" (ARM/ELF Ecos)", stderr);
+
+#define HAS_INIT_SECTION
+
+#undef INVOKE_main
+
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
new file mode 100644
index 000000000..88400884e
--- /dev/null
+++ b/gcc/config/arm/elf.h
@@ -0,0 +1,166 @@
+/* Definitions of target machine for GNU compiler.
+   For ARM with ELF obj format.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2004, 2005, 2007,
+   2008 Free Software Foundation, Inc.
+   Contributed by Philip Blundell <philb@gnu.org> and
+   Catherine Moore <clm@cygnus.com>
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef OBJECT_FORMAT_ELF
+ #error elf.h included before elfos.h
+#endif
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "-D__ELF__"
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "subtarget_extra_asm_spec",	SUBTARGET_EXTRA_ASM_SPEC }, \
+  { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \
+  SUBSUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC ""
+#endif
+
+#ifndef SUBTARGET_ASM_FLOAT_SPEC
+#define SUBTARGET_ASM_FLOAT_SPEC "\
+%{mapcs-float:-mfloat}"
+#endif
+
+#undef SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS
+
+#ifndef ASM_SPEC
+#define ASM_SPEC "\
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%{mcpu=*:-mcpu=%*} \
+%{march=*:-march=%*} \
+%{mapcs-*:-mapcs-%*} \
+%(subtarget_asm_float_spec) \
+%{mthumb-interwork:-mthumb-interwork} \
+%{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
+%{mfloat-abi=*} %{mfpu=*} \
+%(subtarget_extra_asm_spec)"
+#endif
+
+/* The ARM uses @ are a comment character so we need to redefine
+   TYPE_OPERAND_FMT.  */
+#undef  TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"%%%s"
+
+/* We might need a ARM specific header to function declarations.  */
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ARM_DECLARE_FUNCTION_NAME (FILE, NAME, DECL);		\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_LABEL(FILE, NAME);				\
+      ARM_OUTPUT_FN_UNWIND (FILE, TRUE);			\
+    }								\
+  while (0)
+
+/* We might need an ARM specific trailer for function declarations.  */
+#undef  ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do								\
+    {								\
+      ARM_OUTPUT_FN_UNWIND (FILE, FALSE);			\
+      if (!flag_inhibit_size_directive)				\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+    }								\
+  while (0)
+
+/* Define this macro if jump tables (for `tablejump' insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.  */
+/* We put ARM and Thumb-2 jump tables in the text section, because it makes
+   the code more efficient, but for Thumb-1 it's better to put them out of
+   band unless we are generating compressed tables.  */
+#define JUMP_TABLES_IN_TEXT_SECTION					\
+   (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic)))
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X"
+#endif
+  
+/* Run-time Target Specification.  */
+#ifndef TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/elf)", stderr)
+#endif
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+#endif
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+  { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork", "fno-leading-underscore" }
+#endif
+
+#define TARGET_ASM_FILE_START_APP_OFF true
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+
+/* Output an element in the static constructor array.  */
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR arm_elf_asm_constructor
+
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR arm_elf_asm_destructor
+
+/* For PIC code we need to explicitly specify (PLT) and (GOT) relocs.  */
+#define NEED_PLT_RELOC	flag_pic
+#define NEED_GOT_RELOC	flag_pic
+
+/* The ELF assembler handles GOT addressing differently to NetBSD.  */
+#define GOT_PCREL	0
+
+/* Align output to a power of two.  Note ".align 0" is redundant,
+   and also GAS will treat it as ".align 2" which we do not want.  */
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)			\
+  do							\
+    {							\
+      if ((POWER) > 0)					\
+	fprintf (STREAM, "\t.align\t%d\n", POWER);	\
+    }							\
+  while (0)
+
+/* Horrible hack: We want to prevent some libgcc routines being included
+   for some multilibs.  */
+#ifndef __ARM_ARCH_6M__
+#undef L_fixdfsi
+#undef L_fixunsdfsi
+#undef L_truncdfsf2
+#undef L_fixsfsi
+#undef L_fixunssfsi
+#undef L_floatdidf
+#undef L_floatdisf
+#undef L_floatundidf
+#undef L_floatundisf
+#endif
+
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
new file mode 100644
index 000000000..42eb9b272
--- /dev/null
+++ b/gcc/config/arm/fa526.md
@@ -0,0 +1,161 @@
+;; Faraday FA526 Pipeline Description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+;;
+;; Modeled pipeline characteristics:
+;; LD -> any use: latency = 3 (2 cycle penalty).
+;; ALU -> any use: latency = 2 (1 cycle penalty).
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA526 core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa526")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;      S      E      M      W
+
+(define_cpu_unit "fa526_core" "fa526")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "526_alu_op" 1
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "alu"))
+ "fa526_core")
+
+(define_insn_reservation "526_alu_shift_op" 2
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "alu_shift,alu_shift_reg"))
+ "fa526_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "526_mult1" 2
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
+ "fa526_core")
+
+(define_insn_reservation "526_mult2" 5
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
+                       umlals,smulls,smlals,smlawx"))
+ "fa526_core*4")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "526_load1_op" 3
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load1,load_byte"))
+ "fa526_core")
+
+(define_insn_reservation "526_load2_op" 4
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load2"))
+ "fa526_core*2")
+
+(define_insn_reservation "526_load3_op" 5
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load3"))
+ "fa526_core*3")
+
+(define_insn_reservation "526_load4_op" 6
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "load4"))
+ "fa526_core*4")
+
+(define_insn_reservation "526_store1_op" 0
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store1"))
+ "fa526_core")
+
+(define_insn_reservation "526_store2_op" 1
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store2"))
+ "fa526_core*2")
+
+(define_insn_reservation "526_store3_op" 2
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store3"))
+ "fa526_core*3")
+
+(define_insn_reservation "526_store4_op" 3
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "store4"))
+ "fa526_core*4")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA526
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "526_branch_op" 0
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "branch"))
+ "fa526_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value.  For most cases, the return value is set
+;; by a mov instruction, which has 1 cycle latency.
+(define_insn_reservation "526_call_op" 1
+ (and (eq_attr "tune" "fa526")
+      (eq_attr "type" "call"))
+ "fa526_core")
+
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
new file mode 100644
index 000000000..06e63d696
--- /dev/null
+++ b/gcc/config/arm/fa606te.md
@@ -0,0 +1,171 @@
+;; Faraday FA606TE Pipeline Description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA606TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; Modeled pipeline characteristics:
+;; LD -> any use: latency = 2 (1 cycle penalty).
+;; ALU -> any use: latency = 1 (0 cycle penalty).
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA606TE core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa606te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;      E      M      W
+
+(define_cpu_unit "fa606te_core" "fa606te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "606te_alu_op" 1
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "alu,alu_shift,alu_shift_reg"))
+ "fa606te_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "606te_mult1" 2
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "insn" "smlalxy"))
+ "fa606te_core")
+
+(define_insn_reservation "606te_mult2" 3
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy"))
+ "fa606te_core*2")
+
+(define_insn_reservation "606te_mult3" 4
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "insn" "mul,mla,muls,mlas"))
+ "fa606te_core*3")
+
+(define_insn_reservation "606te_mult4" 5
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
+ "fa606te_core*4")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "606te_load1_op" 2
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load1,load_byte"))
+ "fa606te_core")
+
+(define_insn_reservation "606te_load2_op" 3
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load2"))
+ "fa606te_core*2")
+
+(define_insn_reservation "606te_load3_op" 4
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load3"))
+ "fa606te_core*3")
+
+(define_insn_reservation "606te_load4_op" 5
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "load4"))
+ "fa606te_core*4")
+
+(define_insn_reservation "606te_store1_op" 0
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store1"))
+ "fa606te_core")
+
+(define_insn_reservation "606te_store2_op" 1
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store2"))
+ "fa606te_core*2")
+
+(define_insn_reservation "606te_store3_op" 2
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store3"))
+ "fa606te_core*3")
+
+(define_insn_reservation "606te_store4_op" 3
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "store4"))
+ "fa606te_core*4")
+
+
+;;(define_insn_reservation "606te_ldm_op" 9
+;; (and (eq_attr "tune" "fa606te")
+;;      (eq_attr "type" "load2,load3,load4,store2,store3,store4"))
+;; "fa606te_core*7")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA606TE
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycles to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "606te_branch_op" 0
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "branch"))
+ "fa606te_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value.  For most cases, the return value is set
+;; by a mov instruction, which has 1 cycle latency.
+(define_insn_reservation "606te_call_op" 1
+ (and (eq_attr "tune" "fa606te")
+      (eq_attr "type" "call"))
+ "fa606te_core")
+
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
new file mode 100644
index 000000000..7fe1c8724
--- /dev/null
+++ b/gcc/config/arm/fa626te.md
@@ -0,0 +1,165 @@
+;; Faraday FA626TE Pipeline Description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA626TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; Modeled pipeline characteristics:
+;; ALU -> simple address LDR/STR: latency = 2 (available after 2 cycles).
+;; ALU -> shifted address LDR/STR: latency = 3.
+;;		( extra 1 cycle unavoidable stall).
+;; ALU -> other use: latency = 2 (available after 2 cycles).
+;; LD  -> simple address LDR/STR: latency = 3 (available after 3 cycles).
+;; LD  -> shifted address LDR/STR: latency = 4
+;;		( extra 1 cycle unavoidable stall).
+;; LD  -> any other use: latency = 3 (available after 3 cycles).
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA626TE core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa626te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;      S      E      M      W
+
+(define_cpu_unit "fa626te_core" "fa626te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "626te_alu_op" 1
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "alu"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_alu_shift_op" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "alu_shift,alu_shift_reg"))
+ "fa626te_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "626te_mult1" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_mult2" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "insn" "mul,mla"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_mult3" 3
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+ "fa626te_core*2")
+
+(define_insn_reservation "626te_mult4" 4
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "insn" "smulls,smlals,umulls,umlals"))
+ "fa626te_core*3")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "626te_load1_op" 3
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "load1,load_byte"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_load2_op" 4
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "load2,load3"))
+ "fa626te_core*2")
+
+(define_insn_reservation "626te_load3_op" 5
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "load4"))
+ "fa626te_core*3")
+
+(define_insn_reservation "626te_store1_op" 0
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "store1"))
+ "fa626te_core")
+
+(define_insn_reservation "626te_store2_op" 1
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "store2,store3"))
+ "fa626te_core*2")
+
+(define_insn_reservation "626te_store3_op" 2
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "store4"))
+ "fa626te_core*3")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA626TE
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "626te_branch_op" 0
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "branch"))
+ "fa626te_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value. 
+(define_insn_reservation "626te_call_op" 1
+ (and (eq_attr "tune" "fa626,fa626te")
+      (eq_attr "type" "call"))
+ "fa626te_core")
+
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
new file mode 100644
index 000000000..3c33d5971
--- /dev/null
+++ b/gcc/config/arm/fa726te.md
@@ -0,0 +1,218 @@
+;; Faraday FA726TE Pipeline Description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; This automaton provides a pipeline description for the Faraday
+;; FA726TE core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fa726te")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+;;	E1	E2	E3	E4	E5	WB
+;;______________________________________________________
+;;
+;;      <-------------- LD/ST ----------->
+;;    shifter + LU      <-- AU -->
+;;      <-- AU -->     shifter + LU    CPSR     (Pipe 0)
+;;______________________________________________________
+;;
+;;      <---------- MUL --------->
+;;    shifter + LU      <-- AU -->
+;;      <-- AU -->     shifter + LU    CPSR     (Pipe 1)
+
+
+(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")
+(define_cpu_unit "fa726te_mac_pipe" "fa726te")
+(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")
+
+;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly
+;; improve code quality.
+(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")
+(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")
+
+(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)")
+;; Reservation to restrict issue to 1.
+(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require three cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; Move instructions.
+(define_insn_reservation "726te_shift_op" 1
+  (and (eq_attr "tune" "fa726te")
+       (eq_attr "insn" "mov,mvn"))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+
+;; ALU operations with no shifted operand will finished in 1 cycle
+;; Other ALU instructions 2 cycles.
+(define_insn_reservation "726te_alu_op" 1
+ (and (eq_attr "tune" "fa726te")
+      (and (eq_attr "type" "alu")
+           (not (eq_attr "insn" "mov,mvn"))))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+
+;; ALU operations with a shift-by-register operand.
+;; These really stall in the decoder, in order to read the shift value
+;; in the first cycle.  If the instruction uses both shifter and AU,
+;; it takes 3 cycles.
+(define_insn_reservation "726te_alu_shift_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (and (eq_attr "type" "alu_shift")
+           (not (eq_attr "insn" "mov,mvn"))))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+
+(define_insn_reservation "726te_alu_shift_reg_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (and (eq_attr "type" "alu_shift_reg")
+           (not (eq_attr "insn" "mov,mvn"))))
+  "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Multiplication instructions loop in the execute stage until the
+;; instruction has been passed through the multiplier array enough
+;; times.  Multiply operations occur in both the execute and memory
+;; stages of the pipeline
+
+(define_insn_reservation "726te_mult_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
+                       umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
+ "fa726te_issue+fa726te_mac_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+;; Loads with a shifted offset take 3 cycles, and are (a) probably the
+;; most common and (b) the pessimistic assumption will lead to fewer stalls.
+
+;; Scalar loads are pipelined in FA726TE LSU pipe.
+;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.
+;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the
+;; same "bundle", and the 2nd load will introudce another ISSUE stall but is
+;; still ok to execute (and may be benefical sometimes).
+
+(define_insn_reservation "726te_load1_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "load1,load_byte"))
+ "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
+  | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
+
+(define_insn_reservation "726te_store1_op" 1
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "store1"))
+ "fa726te_blockage*2")
+
+;; Load/Store Multiple blocks all pipelines in EX stages until WB.
+;; No other instructions can be issued together.  Since they essentially
+;; prevent all scheduling opportunities, we model them together here.
+
+;; The LDM is breaking into multiple load instructions, later instruction in
+;; the pipe 1 is stalled.
+(define_insn_reservation "726te_ldm2_op" 4
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "load2,load3"))
+ "fa726te_blockage*4")
+
+(define_insn_reservation "726te_ldm3_op" 5
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "load4"))
+ "fa726te_blockage*5")
+
+(define_insn_reservation "726te_stm2_op" 2
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "store2,store3"))
+ "fa726te_blockage*3")
+
+(define_insn_reservation "726te_stm3_op" 3
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "store4"))
+ "fa726te_blockage*4")
+
+(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
+                  726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")
+(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\
+                 726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"
+                 "arm_no_early_store_addr_dep")
+(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")
+(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"
+                 "726te_shift_op,726te_alu_op")
+(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
+                 "726te_alu_shift_op" "arm_no_early_alu_shift_dep")
+(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
+                 "726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")
+(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")
+
+(define_bypass 4 "726te_load1_op" "726te_mult_op")
+(define_bypass 5 "726te_ldm2_op" "726te_mult_op")
+(define_bypass 6 "726te_ldm3_op" "726te_mult_op")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FA726TE
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "726te_branch_op" 0
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "branch"))
+ "fa726te_blockage")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 is ready for int return value.
+(define_insn_reservation "726te_call_op" 1
+ (and (eq_attr "tune" "fa726te")
+      (eq_attr "type" "call"))
+ "fa726te_blockage")
+
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
new file mode 100644
index 000000000..9ba33ddec
--- /dev/null
+++ b/gcc/config/arm/fmp626.md
@@ -0,0 +1,182 @@
+;; Faraday FA626TE Pipeline Description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; These descriptions are based on the information contained in the
+;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
+
+;; Pipeline architecture
+;;	S	E	M	W(Q1)	Q2
+;;   ___________________________________________
+;;    shifter alu
+;;    mul1    mul2    mul3
+;;    ld/st1  ld/st2  ld/st3  ld/st4  ld/st5
+
+;; This automaton provides a pipeline description for the Faraday
+;; FMP626 core.
+;;
+;; The model given here assumes that the condition for all conditional
+;; instructions is "true", i.e., that all of the instructions are
+;; actually executed.
+
+(define_automaton "fmp626")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Pipelines
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; There is a single pipeline
+;;
+;;   The ALU pipeline has fetch, decode, execute, memory, and
+;;   write stages.  We only need to model the execute, memory and write
+;;   stages.
+
+(define_cpu_unit "fmp626_core" "fmp626")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instructions require two cycles to execute, and use the ALU
+;; pipeline in each of the three stages.  The results are available
+;; after the execute stage stage has finished.
+;;
+;; If the destination register is the PC, the pipelines are stalled
+;; for several cycles.  That case is not modeled here.
+
+;; ALU operations
+(define_insn_reservation "mp626_alu_op" 1
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "alu"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_alu_shift_op" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "alu_shift,alu_shift_reg"))
+ "fmp626_core")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Multiplication Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "mp626_mult1" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_mult2" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "insn" "mul,mla"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_mult3" 3
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+ "fmp626_core*2")
+
+(define_insn_reservation "mp626_mult4" 4
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "insn" "smulls,smlals,umulls,umlals"))
+ "fmp626_core*3")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/Store Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The models for load/store instructions do not accurately describe
+;; the difference between operations with a base register writeback
+;; (such as "ldm!").  These models assume that all memory references
+;; hit in dcache.
+
+(define_insn_reservation "mp626_load1_op" 5
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "load1,load_byte"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_load2_op" 6
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "load2,load3"))
+ "fmp626_core*2")
+
+(define_insn_reservation "mp626_load3_op" 7
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "load4"))
+ "fmp626_core*3")
+
+(define_insn_reservation "mp626_store1_op" 0
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "store1"))
+ "fmp626_core")
+
+(define_insn_reservation "mp626_store2_op" 1
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "store2,store3"))
+ "fmp626_core*2")
+
+(define_insn_reservation "mp626_store3_op" 2
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "store4"))
+ "fmp626_core*3")
+
+(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op"
+                 "mp626_store1_op,mp626_store2_op,mp626_store3_op"
+                 "arm_no_early_store_addr_dep")
+(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\
+                  mp626_mult3,mp626_mult4" "mp626_store1_op"
+                 "arm_no_early_store_addr_dep")
+(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op")
+(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op"
+                 "arm_no_early_alu_shift_dep")
+(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op")
+(define_bypass 2 "mp626_mult3" "mp626_alu_op")
+(define_bypass 3 "mp626_mult4" "mp626_alu_op")
+(define_bypass 4 "mp626_load1_op" "mp626_alu_op")
+(define_bypass 5 "mp626_load2_op" "mp626_alu_op")
+(define_bypass 6 "mp626_load3_op" "mp626_alu_op")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branch and Call Instructions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Branch instructions are difficult to model accurately.  The FMP626
+;; core can predict most branches.  If the branch is predicted
+;; correctly, and predicted early enough, the branch can be completely
+;; eliminated from the instruction stream.  Some branches can
+;; therefore appear to require zero cycle to execute.  We assume that
+;; all branches are predicted correctly, and that the latency is
+;; therefore the minimum value.
+
+(define_insn_reservation "mp626_branch_op" 0
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "branch"))
+ "fmp626_core")
+
+;; The latency for a call is actually the latency when the result is available.
+;; i.e. R0 ready for int return value.
+(define_insn_reservation "mp626_call_op" 1
+ (and (eq_attr "tune" "fmp626")
+      (eq_attr "type" "call"))
+ "fmp626_core")
+
diff --git a/gcc/config/arm/fp16.c b/gcc/config/arm/fp16.c
new file mode 100644
index 000000000..936caeb78
--- /dev/null
+++ b/gcc/config/arm/fp16.c
@@ -0,0 +1,145 @@
+/* Half-float conversion routines.
+
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+static inline unsigned short
+__gnu_f2h_internal(unsigned int a, int ieee)
+{
+  unsigned short sign = (a >> 16) & 0x8000;
+  int aexp = (a >> 23) & 0xff;
+  unsigned int mantissa = a & 0x007fffff;
+  unsigned int mask;
+  unsigned int increment;
+
+  if (aexp == 0xff)
+    {
+      if (!ieee)
+	return sign;
+      return sign | 0x7e00 | (mantissa >> 13);
+    }
+  
+  if (aexp == 0 && mantissa == 0)
+    return sign;
+
+  aexp -= 127;
+
+  /* Decimal point between bits 22 and 23.  */
+  mantissa |= 0x00800000;
+  if (aexp < -14)
+    {
+      mask = 0x007fffff;
+      if (aexp < -25)
+	aexp = -26;
+      else if (aexp != -25)
+	mask >>= 24 + aexp;
+    }
+  else
+    mask = 0x00001fff;
+
+  /* Round.  */
+  if (mantissa & mask)
+    {
+      increment = (mask + 1) >> 1;
+      if ((mantissa & mask) == increment)
+	increment = mantissa & (increment << 1);
+      mantissa += increment;
+      if (mantissa >= 0x01000000)
+       	{
+	  mantissa >>= 1;
+	  aexp++;
+	}
+    }
+
+  if (ieee)
+    {
+      if (aexp > 15)
+	return sign | 0x7c00;
+    }
+  else
+    {
+      if (aexp > 16)
+	return sign | 0x7fff;
+    }
+
+  if (aexp < -24)
+    return sign;
+
+  if (aexp < -14)
+    {
+      mantissa >>= -14 - aexp;
+      aexp = -14;
+    }
+
+  /* We leave the leading 1 in the mantissa, and subtract one
+     from the exponent bias to compensate.  */
+  return sign | (((aexp + 14) << 10) + (mantissa >> 13));
+}
+
+unsigned int
+__gnu_h2f_internal(unsigned short a, int ieee)
+{
+  unsigned int sign = (unsigned int)(a & 0x8000) << 16;
+  int aexp = (a >> 10) & 0x1f;
+  unsigned int mantissa = a & 0x3ff;
+
+  if (aexp == 0x1f && ieee)
+    return sign | 0x7f800000 | (mantissa << 13);
+
+  if (aexp == 0)
+    {
+      int shift;
+
+      if (mantissa == 0)
+	return sign;
+
+      shift = __builtin_clz(mantissa) - 21;
+      mantissa <<= shift;
+      aexp = -shift;
+    }
+
+  return sign | (((aexp + 0x70) << 23) + (mantissa << 13));
+}
+
+unsigned short
+__gnu_f2h_ieee(unsigned int a)
+{
+  return __gnu_f2h_internal(a, 1);
+}
+
+unsigned int
+__gnu_h2f_ieee(unsigned short a)
+{
+  return __gnu_h2f_internal(a, 1);
+}
+
+unsigned short
+__gnu_f2h_alternative(unsigned int x)
+{
+  return __gnu_f2h_internal(x, 0);
+}
+
+unsigned int
+__gnu_h2f_alternative(unsigned short a)
+{
+  return __gnu_h2f_internal(a, 0);
+}
diff --git a/gcc/config/arm/fpa.md b/gcc/config/arm/fpa.md
new file mode 100644
index 000000000..6e6dd8d43
--- /dev/null
+++ b/gcc/config/arm/fpa.md
@@ -0,0 +1,889 @@
+;;- Machine description for FPA co-processor for ARM cpus.
+;;  Copyright 1991, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999, 2000,
+;;  2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;  Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
+;;  and Martin Simmons (@harleqn.co.uk).
+;;  More major hacks by Richard Earnshaw (rearnsha@arm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Some FPA mnemonics are ambiguous between conditional infixes and
+;; conditional suffixes.  All instructions use a conditional infix,
+;; even in unified assembly mode.
+
+;; FPA automaton.
+(define_automaton "armfp")
+
+;; Floating point unit (FPA)
+(define_cpu_unit "fpa" "armfp")
+
+; The fpa10 doesn't really have a memory read unit, but it can start
+; to speculatively execute the instruction in the pipeline, provided
+; the data is already loaded, so pretend reads have a delay of 2 (and
+; that the pipeline is infinite).
+(define_cpu_unit "fpa_mem" "arm")
+
+(define_insn_reservation "fdivx" 71
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "fdivx"))
+  "core+fpa*69")
+
+(define_insn_reservation "fdivd" 59
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "fdivd"))
+  "core+fpa*57")
+
+(define_insn_reservation "fdivs" 31
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "fdivs"))
+  "core+fpa*29")
+
+(define_insn_reservation "fmul" 9
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "fmul"))
+  "core+fpa*7")
+
+(define_insn_reservation "ffmul" 6
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "ffmul"))
+  "core+fpa*4")
+
+(define_insn_reservation "farith" 4
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "farith"))
+  "core+fpa*2")
+
+(define_insn_reservation "ffarith" 2
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "ffarith"))
+  "core+fpa*2")
+
+(define_insn_reservation "r_2_f" 5
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "r_2_f"))
+  "core+fpa*3")
+
+(define_insn_reservation "f_2_r" 1
+  (and (eq_attr "fpu" "fpa")
+       (eq_attr "type" "f_2_r"))
+  "core+fpa*2")
+
+(define_insn_reservation "f_load" 3
+  (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_fpa_load"))
+  "fpa_mem+core*3")
+
+(define_insn_reservation "f_store" 4
+  (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_fpa_store"))
+  "core*4")
+
+(define_insn_reservation "r_mem_f" 6
+  (and (eq_attr "model_wbuf" "no")
+    (and (eq_attr "fpu" "fpa") (eq_attr "type" "r_mem_f")))
+  "core*6")
+
+(define_insn_reservation "f_mem_r" 7
+  (and (eq_attr "fpu" "fpa") (eq_attr "type" "f_mem_r"))
+  "core*7")
+
+
+(define_insn "*addsf3_fpa"
+  [(set (match_operand:SF          0 "s_register_operand" "=f,f")
+	(plus:SF (match_operand:SF 1 "s_register_operand" "%f,f")
+		 (match_operand:SF 2 "arm_float_add_operand"    "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   adf%?s\\t%0, %1, %2
+   suf%?s\\t%0, %1, #%N2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*adddf3_fpa"
+  [(set (match_operand:DF          0 "s_register_operand" "=f,f")
+	(plus:DF (match_operand:DF 1 "s_register_operand" "%f,f")
+		 (match_operand:DF 2 "arm_float_add_operand"    "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   adf%?d\\t%0, %1, %2
+   suf%?d\\t%0, %1, #%N2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*adddf_esfdf_df_fpa"
+  [(set (match_operand:DF           0 "s_register_operand" "=f,f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "s_register_operand"  "f,f"))
+		 (match_operand:DF  2 "arm_float_add_operand"    "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   adf%?d\\t%0, %1, %2
+   suf%?d\\t%0, %1, #%N2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*adddf_df_esfdf_fpa"
+  [(set (match_operand:DF           0 "s_register_operand" "=f")
+	(plus:DF (match_operand:DF  1 "s_register_operand"  "f")
+		 (float_extend:DF
+		  (match_operand:SF 2 "s_register_operand"  "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "adf%?d\\t%0, %1, %2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*adddf_esfdf_esfdf_fpa"
+  [(set (match_operand:DF           0 "s_register_operand" "=f")
+	(plus:DF (float_extend:DF 
+		  (match_operand:SF 1 "s_register_operand" "f"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "adf%?d\\t%0, %1, %2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*subsf3_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f,f")
+	(minus:SF (match_operand:SF 1 "arm_float_rhs_operand" "f,G")
+		  (match_operand:SF 2 "arm_float_rhs_operand" "fG,f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   suf%?s\\t%0, %1, %2
+   rsf%?s\\t%0, %2, %1"
+  [(set_attr "type" "farith")]
+)
+
+(define_insn "*subdf3_fpa"
+  [(set (match_operand:DF           0 "s_register_operand" "=f,f")
+	(minus:DF (match_operand:DF 1 "arm_float_rhs_operand"     "f,G")
+		  (match_operand:DF 2 "arm_float_rhs_operand"    "fG,f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   suf%?d\\t%0, %1, %2
+   rsf%?d\\t%0, %2, %1"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*subdf_esfdf_df_fpa"
+  [(set (match_operand:DF            0 "s_register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "s_register_operand"  "f"))
+		  (match_operand:DF  2 "arm_float_rhs_operand"    "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "suf%?d\\t%0, %1, %2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*subdf_df_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f,f")
+	(minus:DF (match_operand:DF 1 "arm_float_rhs_operand" "f,G")
+		  (float_extend:DF
+		   (match_operand:SF 2 "s_register_operand" "f,f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   suf%?d\\t%0, %1, %2
+   rsf%?d\\t%0, %2, %1"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*subdf_esfdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "s_register_operand" "f"))
+		  (float_extend:DF
+		   (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "suf%?d\\t%0, %1, %2"
+  [(set_attr "type" "farith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsf3_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "s_register_operand" "f")
+		 (match_operand:SF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "fml%?s\\t%0, %1, %2"
+  [(set_attr "type" "ffmul")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*muldf3_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "s_register_operand" "f")
+		 (match_operand:DF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "muf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fmul")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*muldf_esfdf_df_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "s_register_operand" "f"))
+		 (match_operand:DF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "muf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fmul")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*muldf_df_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "s_register_operand" "f")
+		 (float_extend:DF
+		  (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "muf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fmul")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*muldf_esfdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mult:DF
+	 (float_extend:DF (match_operand:SF 1 "s_register_operand" "f"))
+	 (float_extend:DF (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "muf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fmul")
+   (set_attr "predicable" "yes")]
+)
+
+;; Division insns
+
+(define_insn "*divsf3_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f,f")
+	(div:SF (match_operand:SF 1 "arm_float_rhs_operand" "f,G")
+		(match_operand:SF 2 "arm_float_rhs_operand" "fG,f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   fdv%?s\\t%0, %1, %2
+   frd%?s\\t%0, %2, %1"
+  [(set_attr "type" "fdivs")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*divdf3_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f,f")
+	(div:DF (match_operand:DF 1 "arm_float_rhs_operand" "f,G")
+		(match_operand:DF 2 "arm_float_rhs_operand" "fG,f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   dvf%?d\\t%0, %1, %2
+   rdf%?d\\t%0, %2, %1"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*divdf_esfdf_df_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(div:DF (float_extend:DF
+		 (match_operand:SF 1 "s_register_operand" "f"))
+		(match_operand:DF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "dvf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*divdf_df_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(div:DF (match_operand:DF 1 "arm_float_rhs_operand" "fG")
+		(float_extend:DF
+		 (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "rdf%?d\\t%0, %2, %1"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*divdf_esfdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(div:DF (float_extend:DF
+		 (match_operand:SF 1 "s_register_operand" "f"))
+		(float_extend:DF
+		 (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "dvf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*modsf3_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f")
+	(mod:SF (match_operand:SF 1 "s_register_operand" "f")
+		(match_operand:SF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "rmf%?s\\t%0, %1, %2"
+  [(set_attr "type" "fdivs")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*moddf3_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mod:DF (match_operand:DF 1 "s_register_operand" "f")
+		(match_operand:DF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "rmf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*moddf_esfdf_df_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mod:DF (float_extend:DF
+		 (match_operand:SF 1 "s_register_operand" "f"))
+		(match_operand:DF 2 "arm_float_rhs_operand" "fG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "rmf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*moddf_df_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mod:DF (match_operand:DF 1 "s_register_operand" "f")
+		(float_extend:DF
+		 (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "rmf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*moddf_esfdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(mod:DF (float_extend:DF
+		 (match_operand:SF 1 "s_register_operand" "f"))
+		(float_extend:DF
+		 (match_operand:SF 2 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "rmf%?d\\t%0, %1, %2"
+  [(set_attr "type" "fdivd")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*negsf2_fpa"
+  [(set (match_operand:SF         0 "s_register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "mnf%?s\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*negdf2_fpa"
+  [(set (match_operand:DF         0 "s_register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "mnf%?d\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*negdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(neg:DF (float_extend:DF
+		 (match_operand:SF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "mnf%?d\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*abssf2_fpa"
+  [(set (match_operand:SF          0 "s_register_operand" "=f")
+	 (abs:SF (match_operand:SF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "abs%?s\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*absdf2_fpa"
+  [(set (match_operand:DF         0 "s_register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "abs%?d\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*absdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(abs:DF (float_extend:DF
+		 (match_operand:SF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "abs%?d\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*sqrtsf2_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "sqt%?s\\t%0, %1"
+  [(set_attr "type" "float_em")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*sqrtdf2_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "sqt%?d\\t%0, %1"
+  [(set_attr "type" "float_em")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*sqrtdf_esfdf_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f")
+	(sqrt:DF (float_extend:DF
+		  (match_operand:SF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "sqt%?d\\t%0, %1"
+  [(set_attr "type" "float_em")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*floatsisf2_fpa"
+  [(set (match_operand:SF           0 "s_register_operand" "=f")
+	(float:SF (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "flt%?s\\t%0, %1"
+  [(set_attr "type" "r_2_f")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*floatsidf2_fpa"
+  [(set (match_operand:DF           0 "s_register_operand" "=f")
+	(float:DF (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "flt%?d\\t%0, %1"
+  [(set_attr "type" "r_2_f")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*fix_truncsfsi2_fpa"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "fix%?z\\t%0, %1"
+  [(set_attr "type" "f_2_r")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*fix_truncdfsi2_fpa"
+  [(set (match_operand:SI         0 "s_register_operand" "=r")
+	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "fix%?z\\t%0, %1"
+  [(set_attr "type" "f_2_r")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*truncdfsf2_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:DF 1 "s_register_operand" "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "mvf%?s\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*extendsfdf2_fpa"
+  [(set (match_operand:DF                  0 "s_register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "s_register_operand"  "f")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "mvf%?d\\t%0, %1"
+  [(set_attr "type" "ffarith")
+   (set_attr "predicable" "yes")]
+)
+
+(define_insn "*movsf_fpa"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f, m,f,r,r,r, m")
+	(match_operand:SF 1 "general_operand"      "fG,H,mE,f,r,f,r,mE,r"))]
+  "TARGET_ARM
+   && TARGET_HARD_FLOAT && TARGET_FPA
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], SFmode))"
+  "@
+   mvf%?s\\t%0, %1
+   mnf%?s\\t%0, #%N1
+   ldf%?s\\t%0, %1
+   stf%?s\\t%1, %0
+   str%?\\t%1, [%|sp, #-4]!\;ldf%?s\\t%0, [%|sp], #4
+   stf%?s\\t%1, [%|sp, #-4]!\;ldr%?\\t%0, [%|sp], #4
+   mov%?\\t%0, %1
+   ldr%?\\t%0, %1\\t%@ float
+   str%?\\t%1, %0\\t%@ float"
+  [(set_attr "length" "4,4,4,4,8,8,4,4,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type"
+	 "ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r,*,load1,store1")
+   (set_attr "pool_range" "*,*,1024,*,*,*,*,4096,*")
+   (set_attr "neg_pool_range" "*,*,1012,*,*,*,*,4084,*")]
+)
+
+(define_insn "*movdf_fpa"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+						"=r,Q,r,m,r, f, f,f, m,!f,!r")
+	(match_operand:DF 1 "general_operand"
+						"Q, r,r,r,mF,fG,H,mF,f,r, f"))]
+  "TARGET_ARM
+   && TARGET_HARD_FLOAT && TARGET_FPA
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+  switch (which_alternative)
+    {
+    default:
+    case 0: return \"ldm%(ia%)\\t%m1, %M0\\t%@ double\";
+    case 1: return \"stm%(ia%)\\t%m0, %M1\\t%@ double\";
+    case 2: return \"#\";
+    case 3: case 4: return output_move_double (operands);
+    case 5: return \"mvf%?d\\t%0, %1\";
+    case 6: return \"mnf%?d\\t%0, #%N1\";
+    case 7: return \"ldf%?d\\t%0, %1\";
+    case 8: return \"stf%?d\\t%1, %0\";
+    case 9: return output_mov_double_fpa_from_arm (operands);
+    case 10: return output_mov_double_arm_from_fpa (operands);
+    }
+  }
+  "
+  [(set_attr "length" "4,4,8,8,8,4,4,4,4,8,8")
+   (set_attr "predicable" "yes")
+   (set_attr "type"
+    "load1,store2,*,store2,load1,ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r")
+   (set_attr "pool_range" "*,*,*,*,1020,*,*,1024,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1008,*,*,1008,*,*,*")]
+)
+
+;; We treat XFmode as meaning 'internal format'.  It's the right size and we
+;; don't use it for anything else.  We only support moving between FPA
+;; registers and moving an FPA register to/from memory.
+(define_insn "*movxf_fpa"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:XF 1 "general_operand" "f,m,f"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_FPA
+   && (register_operand (operands[0], XFmode)
+       || register_operand (operands[1], XFmode))"
+  "*
+  switch (which_alternative)
+    {
+    default:
+    case 0: return \"mvf%?e\\t%0, %1\";
+    case 1: if (TARGET_FPA_EMU2)
+	      return \"ldf%?e\\t%0, %1\";
+	    return \"lfm%?\\t%0, 1, %1\";
+    case 2: if (TARGET_FPA_EMU2)
+	      return \"stf%?e\\t%1, %0\";
+	    return \"sfm%?\\t%1, 1, %0\";
+    }
+  "
+  [(set_attr "length" "4,4,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "ffarith,f_fpa_load,f_fpa_store")]
+)
+
+;; stfs/ldfs always use a conditional infix.  This works around the
+;; ambiguity between "stf pl s" and "sftp ls".
+(define_insn "*thumb2_movsf_fpa"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f, m,f,r,r,r, m")
+	(match_operand:SF 1 "general_operand"      "fG,H,mE,f,r,f,r,mE,r"))]
+  "TARGET_THUMB2
+   && TARGET_HARD_FLOAT && TARGET_FPA
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], SFmode))"
+  "@
+   mvf%?s\\t%0, %1
+   mnf%?s\\t%0, #%N1
+   ldf%?s\\t%0, %1
+   stf%?s\\t%1, %0
+   str%?\\t%1, [%|sp, #-4]!\;ldf%?s\\t%0, [%|sp], #4
+   stf%?s\\t%1, [%|sp, #-4]!\;ldr%?\\t%0, [%|sp], #4
+   mov%?\\t%0, %1 @bar
+   ldr%?\\t%0, %1\\t%@ float
+   str%?\\t%1, %0\\t%@ float"
+  [(set_attr "length" "4,4,4,4,8,8,4,4,4")
+   (set_attr "ce_count" "1,1,1,1,2,2,1,1,1")
+   (set_attr "predicable" "yes")
+   (set_attr "type"
+	 "ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r,*,load1,store1")
+   (set_attr "pool_range" "*,*,1024,*,*,*,*,4096,*")
+   (set_attr "neg_pool_range" "*,*,1012,*,*,*,*,0,*")]
+)
+
+;; Not predicable because we don't know the number of instructions.
+(define_insn "*thumb2_movdf_fpa"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+						"=r,Q,r,m,r, f, f,f, m,!f,!r")
+	(match_operand:DF 1 "general_operand"
+						"Q, r,r,r,mF,fG,H,mF,f,r, f"))]
+  "TARGET_THUMB2
+   && TARGET_HARD_FLOAT && TARGET_FPA
+   && (GET_CODE (operands[0]) != MEM
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+  switch (which_alternative)
+    {
+    default:
+    case 0: return \"ldm%(ia%)\\t%m1, %M0\\t%@ double\";
+    case 1: return \"stm%(ia%)\\t%m0, %M1\\t%@ double\";
+    case 2: case 3: case 4: return output_move_double (operands);
+    case 5: return \"mvf%?d\\t%0, %1\";
+    case 6: return \"mnf%?d\\t%0, #%N1\";
+    case 7: return \"ldf%?d\\t%0, %1\";
+    case 8: return \"stf%?d\\t%1, %0\";
+    case 9: return output_mov_double_fpa_from_arm (operands);
+    case 10: return output_mov_double_arm_from_fpa (operands);
+    }
+  }
+  "
+  [(set_attr "length" "4,4,8,8,8,4,4,4,4,8,8")
+   (set_attr "type"
+    "load1,store2,*,store2,load1,ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r")
+   (set_attr "pool_range" "*,*,*,*,4092,*,*,1024,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,0,*,*,1020,*,*,*")]
+)
+
+;; Saving and restoring the floating point registers in the prologue should
+;; be done in XFmode, even though we don't support that for anything else
+;; (Well, strictly it's 'internal representation', but that's effectively
+;; XFmode).
+;; Not predicable because we don't know the number of instructions.
+
+(define_insn "*thumb2_movxf_fpa"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,f,f,m,f,r,r")
+	(match_operand:XF 1 "general_operand" "fG,H,m,f,r,f,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_FPA && reload_completed"
+  "*
+  switch (which_alternative)
+    {
+    default:
+    case 0: return \"mvf%?e\\t%0, %1\";
+    case 1: return \"mnf%?e\\t%0, #%N1\";
+    case 2: return \"ldf%?e\\t%0, %1\";
+    case 3: return \"stf%?e\\t%1, %0\";
+    case 4: return output_mov_long_double_fpa_from_arm (operands);
+    case 5: return output_mov_long_double_arm_from_fpa (operands);
+    case 6: return output_mov_long_double_arm_from_arm (operands);
+    }
+  "
+  [(set_attr "length" "4,4,4,4,8,8,12")
+   (set_attr "type" "ffarith,ffarith,f_fpa_load,f_fpa_store,r_mem_f,f_mem_r,*")
+   (set_attr "pool_range" "*,*,1024,*,*,*,*")
+   (set_attr "neg_pool_range" "*,*,1004,*,*,*,*")]
+)
+
+(define_insn "*cmpsf_fpa"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:SF 0 "s_register_operand" "f,f")
+		      (match_operand:SF 1 "arm_float_add_operand" "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   cmf%?\\t%0, %1
+   cnf%?\\t%0, #%N1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmpdf_fpa"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "s_register_operand" "f,f")
+		      (match_operand:DF 1 "arm_float_add_operand" "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   cmf%?\\t%0, %1
+   cnf%?\\t%0, #%N1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmpesfdf_df_fpa"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (float_extend:DF
+		       (match_operand:SF 0 "s_register_operand" "f,f"))
+		      (match_operand:DF 1 "arm_float_add_operand" "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   cmf%?\\t%0, %1
+   cnf%?\\t%0, #%N1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmpdf_esfdf_fpa"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "s_register_operand" "f")
+		      (float_extend:DF
+		       (match_operand:SF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "cmf%?\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmpsf_trap_fpa"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand" "f,f")
+		       (match_operand:SF 1 "arm_float_add_operand" "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   cmf%?e\\t%0, %1
+   cnf%?e\\t%0, #%N1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmpdf_trap_fpa"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:DF 0 "s_register_operand" "f,f")
+		       (match_operand:DF 1 "arm_float_add_operand" "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   cmf%?e\\t%0, %1
+   cnf%?e\\t%0, #%N1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmp_esfdf_df_trap_fpa"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (float_extend:DF
+			(match_operand:SF 0 "s_register_operand" "f,f"))
+		       (match_operand:DF 1 "arm_float_add_operand" "fG,H")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   cmf%?e\\t%0, %1
+   cnf%?e\\t%0, #%N1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*cmp_df_esfdf_trap_fpa"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:DF 0 "s_register_operand" "f")
+		       (float_extend:DF
+			(match_operand:SF 1 "s_register_operand" "f"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPA"
+  "cmf%?e\\t%0, %1"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_2_r")]
+)
+
+(define_insn "*movsfcc_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f,f,f,f,f,f,f,f")
+	(if_then_else:SF
+	 (match_operator 3 "arm_comparison_operator" 
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:SF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H")
+	 (match_operand:SF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   mvf%D3s\\t%0, %2
+   mnf%D3s\\t%0, #%N2
+   mvf%d3s\\t%0, %1
+   mnf%d3s\\t%0, #%N1
+   mvf%d3s\\t%0, %1\;mvf%D3s\\t%0, %2
+   mvf%d3s\\t%0, %1\;mnf%D3s\\t%0, #%N2
+   mnf%d3s\\t%0, #%N1\;mvf%D3s\\t%0, %2
+   mnf%d3s\\t%0, #%N1\;mnf%D3s\\t%0, #%N2"
+  [(set_attr "length" "4,4,4,4,8,8,8,8")
+   (set_attr "type" "ffarith")
+   (set_attr "conds" "use")]
+)
+
+(define_insn "*movdfcc_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f,f,f,f,f,f,f,f")
+	(if_then_else:DF
+	 (match_operator 3 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:DF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H")
+	 (match_operand:DF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   mvf%D3d\\t%0, %2
+   mnf%D3d\\t%0, #%N2
+   mvf%d3d\\t%0, %1
+   mnf%d3d\\t%0, #%N1
+   mvf%d3d\\t%0, %1\;mvf%D3d\\t%0, %2
+   mvf%d3d\\t%0, %1\;mnf%D3d\\t%0, #%N2
+   mnf%d3d\\t%0, #%N1\;mvf%D3d\\t%0, %2
+   mnf%d3d\\t%0, #%N1\;mnf%D3d\\t%0, #%N2"
+  [(set_attr "length" "4,4,4,4,8,8,8,8")
+   (set_attr "type" "ffarith")
+   (set_attr "conds" "use")]
+)
+
+(define_insn "*thumb2_movsfcc_fpa"
+  [(set (match_operand:SF 0 "s_register_operand" "=f,f,f,f,f,f,f,f")
+	(if_then_else:SF
+	 (match_operator 3 "arm_comparison_operator" 
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:SF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H")
+	 (match_operand:SF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   it\\t%D3\;mvf%D3s\\t%0, %2
+   it\\t%D3\;mnf%D3s\\t%0, #%N2
+   it\\t%d3\;mvf%d3s\\t%0, %1
+   it\\t%d3\;mnf%d3s\\t%0, #%N1
+   ite\\t%d3\;mvf%d3s\\t%0, %1\;mvf%D3s\\t%0, %2
+   ite\\t%d3\;mvf%d3s\\t%0, %1\;mnf%D3s\\t%0, #%N2
+   ite\\t%d3\;mnf%d3s\\t%0, #%N1\;mvf%D3s\\t%0, %2
+   ite\\t%d3\;mnf%d3s\\t%0, #%N1\;mnf%D3s\\t%0, #%N2"
+  [(set_attr "length" "6,6,6,6,10,10,10,10")
+   (set_attr "type" "ffarith")
+   (set_attr "conds" "use")]
+)
+
+(define_insn "*thumb2_movdfcc_fpa"
+  [(set (match_operand:DF 0 "s_register_operand" "=f,f,f,f,f,f,f,f")
+	(if_then_else:DF
+	 (match_operator 3 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:DF 1 "arm_float_add_operand" "0,0,fG,H,fG,fG,H,H")
+	 (match_operand:DF 2 "arm_float_add_operand" "fG,H,0,0,fG,H,fG,H")))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_FPA"
+  "@
+   it\\t%D3\;mvf%D3d\\t%0, %2
+   it\\t%D3\;mnf%D3d\\t%0, #%N2
+   it\\t%d3\;mvf%d3d\\t%0, %1
+   it\\t%d3\;mnf%d3d\\t%0, #%N1
+   ite\\t%d3\;mvf%d3d\\t%0, %1\;mvf%D3d\\t%0, %2
+   ite\\t%d3\;mvf%d3d\\t%0, %1\;mnf%D3d\\t%0, #%N2
+   ite\\t%d3\;mnf%d3d\\t%0, #%N1\;mvf%D3d\\t%0, %2
+   ite\\t%d3\;mnf%d3d\\t%0, #%N1\;mnf%D3d\\t%0, #%N2"
+  [(set_attr "length" "6,6,6,6,10,10,10,10")
+   (set_attr "type" "ffarith")
+   (set_attr "conds" "use")]
+)
+
diff --git a/gcc/config/arm/freebsd.h b/gcc/config/arm/freebsd.h
new file mode 100644
index 000000000..701bb1499
--- /dev/null
+++ b/gcc/config/arm/freebsd.h
@@ -0,0 +1,67 @@
+/* Definitions for StrongARM running FreeBSD using the ELF format
+   Copyright (C) 2001, 2004, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC FBSD_CPP_SPEC
+
+#undef	LINK_SPEC
+#define LINK_SPEC "							\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{v:-V}								\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}							\
+  %{symbolic:-Bsymbolic}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* arm.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE	"unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE	"int"
+
+/* We use the GCC defaults here.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef  SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT	TARGET_CPU_strongarm
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/StrongARM ELF)");
diff --git a/gcc/config/arm/gentune.sh b/gcc/config/arm/gentune.sh
new file mode 100755
index 000000000..a873973e3
--- /dev/null
+++ b/gcc/config/arm/gentune.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Generate arm-tune.md, a file containing the tune attribute from the list of 
+# CPUs in arm-cores.def
+# Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically by gentune.sh from arm-cores.def"
+
+allcores=`awk -F'[(, 	]+' '/^ARM_CORE/ { cores = cores$3"," } END { print cores } ' $1`
+
+echo "(define_attr \"tune\""
+echo "	\"$allcores\"" | sed -e 's/,"$/"/'
+echo "	(const (symbol_ref \"((enum attr_tune) arm_tune)\")))"
diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S
new file mode 100644
index 000000000..eb0c38632
--- /dev/null
+++ b/gcc/config/arm/ieee754-df.S
@@ -0,0 +1,1447 @@
+/* ieee754-df.S double-precision floating point support for ARM
+
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * Notes: 
+ * 
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ * For slightly simpler code please see the single precision version
+ * of this file.
+ * 
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+
+@ For FPA, float words are always big-endian.
+@ For VFP, floats words follow the memory system mode.
+#if defined(__VFP_FP__) && !defined(__ARMEB__)
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#else
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#endif
+
+
+#ifdef L_arm_negdf2
+
+ARM_FUNC_START negdf2
+ARM_FUNC_ALIAS aeabi_dneg negdf2
+
+	@ flip sign bit
+	eor	xh, xh, #0x80000000
+	RET
+
+	FUNC_END aeabi_dneg
+	FUNC_END negdf2
+
+#endif
+
+#ifdef L_arm_addsubdf3
+
+ARM_FUNC_START aeabi_drsub
+
+	eor	xh, xh, #0x80000000	@ flip sign bit of first arg
+	b	1f	
+
+ARM_FUNC_START subdf3
+ARM_FUNC_ALIAS aeabi_dsub subdf3
+
+	eor	yh, yh, #0x80000000	@ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+	b	1f			@ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START adddf3
+ARM_FUNC_ALIAS aeabi_dadd adddf3
+
+1:	do_push	{r4, r5, lr}
+
+	@ Look for zeroes, equal values, INF, or NAN.
+	shift1	lsl, r4, xh, #1
+	shift1	lsl, r5, yh, #1
+	teq	r4, r5
+	do_it	eq
+	teqeq	xl, yl
+	do_it	ne, ttt
+	COND(orr,s,ne)	ip, r4, xl
+	COND(orr,s,ne)	ip, r5, yl
+	COND(mvn,s,ne)	ip, r4, asr #21
+	COND(mvn,s,ne)	ip, r5, asr #21
+	beq	LSYM(Lad_s)
+
+	@ Compute exponent difference.  Make largest exponent in r4,
+	@ corresponding arg in xh-xl, and positive exponent difference in r5.
+	shift1	lsr, r4, r4, #21
+	rsbs	r5, r4, r5, lsr #21
+	do_it	lt
+	rsblt	r5, r5, #0
+	ble	1f
+	add	r4, r4, r5
+	eor	yl, xl, yl
+	eor	yh, xh, yh
+	eor	xl, yl, xl
+	eor	xh, yh, xh
+	eor	yl, xl, yl
+	eor	yh, xh, yh
+1:
+	@ If exponent difference is too large, return largest argument
+	@ already in xh-xl.  We need up to 54 bit to handle proper rounding
+	@ of 0x1p54 - 1.1.
+	cmp	r5, #54
+	do_it	hi
+	RETLDM	"r4, r5" hi
+
+	@ Convert mantissa to signed integer.
+	tst	xh, #0x80000000
+	mov	xh, xh, lsl #12
+	mov	ip, #0x00100000
+	orr	xh, ip, xh, lsr #12
+	beq	1f
+#if defined(__thumb2__)
+	negs	xl, xl
+	sbc	xh, xh, xh, lsl #1
+#else
+	rsbs	xl, xl, #0
+	rsc	xh, xh, #0
+#endif
+1:
+	tst	yh, #0x80000000
+	mov	yh, yh, lsl #12
+	orr	yh, ip, yh, lsr #12
+	beq	1f
+#if defined(__thumb2__)
+	negs	yl, yl
+	sbc	yh, yh, yh, lsl #1
+#else
+	rsbs	yl, yl, #0
+	rsc	yh, yh, #0
+#endif
+1:
+	@ If exponent == difference, one or both args were denormalized.
+	@ Since this is not common case, rescale them off line.
+	teq	r4, r5
+	beq	LSYM(Lad_d)
+LSYM(Lad_x):
+
+	@ Compensate for the exponent overlapping the mantissa MSB added later
+	sub	r4, r4, #1
+
+	@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
+	rsbs	lr, r5, #32
+	blt	1f
+	shift1	lsl, ip, yl, lr
+	shiftop adds xl xl yl lsr r5 yl
+	adc	xh, xh, #0
+	shiftop adds xl xl yh lsl lr yl
+	shiftop adcs xh xh yh asr r5 yh
+	b	2f
+1:	sub	r5, r5, #32
+	add	lr, lr, #32
+	cmp	yl, #1
+	shift1	lsl,ip, yh, lr
+	do_it	cs
+	orrcs	ip, ip, #2		@ 2 not 1, to allow lsr #1 later
+	shiftop adds xl xl yh asr r5 yh
+	adcs	xh, xh, yh, asr #31
+2:
+	@ We now have a result in xh-xl-ip.
+	@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
+	and	r5, xh, #0x80000000
+	bpl	LSYM(Lad_p)
+#if defined(__thumb2__)
+	mov	lr, #0
+	negs	ip, ip
+	sbcs	xl, lr, xl
+	sbc	xh, lr, xh
+#else
+	rsbs	ip, ip, #0
+	rscs	xl, xl, #0
+	rsc	xh, xh, #0
+#endif
+
+	@ Determine how to normalize the result.
+LSYM(Lad_p):
+	cmp	xh, #0x00100000
+	bcc	LSYM(Lad_a)
+	cmp	xh, #0x00200000
+	bcc	LSYM(Lad_e)
+
+	@ Result needs to be shifted right.
+	movs	xh, xh, lsr #1
+	movs	xl, xl, rrx
+	mov	ip, ip, rrx
+	add	r4, r4, #1
+
+	@ Make sure we did not bust our exponent.
+	mov	r2, r4, lsl #21
+	cmn	r2, #(2 << 21)
+	bcs	LSYM(Lad_o)
+
+	@ Our result is now properly aligned into xh-xl, remaining bits in ip.
+	@ Round with MSB of ip. If halfway between two numbers, round towards
+	@ LSB of xl = 0.
+	@ Pack final result together.
+LSYM(Lad_e):
+	cmp	ip, #0x80000000
+	do_it	eq
+	COND(mov,s,eq)	ip, xl, lsr #1
+	adcs	xl, xl, #0
+	adc	xh, xh, r4, lsl #20
+	orr	xh, xh, r5
+	RETLDM	"r4, r5"
+
+	@ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+	movs	ip, ip, lsl #1
+	adcs	xl, xl, xl
+	adc	xh, xh, xh
+	tst	xh, #0x00100000
+	sub	r4, r4, #1
+	bne	LSYM(Lad_e)
+
+	@ No rounding necessary since ip will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+	teq	xh, #0
+	movne	r3, #20
+	moveq	r3, #52
+	moveq	xh, xl
+	moveq	xl, #0
+	mov	r2, xh
+	cmp	r2, #(1 << 16)
+	movhs	r2, r2, lsr #16
+	subhs	r3, r3, #16
+	cmp	r2, #(1 << 8)
+	movhs	r2, r2, lsr #8
+	subhs	r3, r3, #8
+	cmp	r2, #(1 << 4)
+	movhs	r2, r2, lsr #4
+	subhs	r3, r3, #4
+	cmp	r2, #(1 << 2)
+	subhs	r3, r3, #2
+	sublo	r3, r3, r2, lsr #1
+	sub	r3, r3, r2, lsr #3
+
+#else
+
+	teq	xh, #0
+	do_it	eq, t
+	moveq	xh, xl
+	moveq	xl, #0
+	clz	r3, xh
+	do_it	eq
+	addeq	r3, r3, #32
+	sub	r3, r3, #11
+
+#endif
+
+	@ determine how to shift the value.
+	subs	r2, r3, #32
+	bge	2f
+	adds	r2, r2, #12
+	ble	1f
+
+	@ shift value left 21 to 31 bits, or actually right 11 to 1 bits
+	@ since a register switch happened above.
+	add	ip, r2, #20
+	rsb	r2, r2, #12
+	shift1	lsl, xl, xh, ip
+	shift1	lsr, xh, xh, r2
+	b	3f
+
+	@ actually shift value left 1 to 20 bits, which might also represent
+	@ 32 to 52 bits if counting the register switch that happened earlier.
+1:	add	r2, r2, #20
+2:	do_it	le
+	rsble	ip, r2, #32
+	shift1	lsl, xh, xh, r2
+#if defined(__thumb2__)
+	lsr	ip, xl, ip
+	itt	le
+	orrle	xh, xh, ip
+	lslle	xl, xl, r2
+#else
+	orrle	xh, xh, xl, lsr ip
+	movle	xl, xl, lsl r2
+#endif
+
+	@ adjust exponent accordingly.
+3:	subs	r4, r4, r3
+	do_it	ge, tt
+	addge	xh, xh, r4, lsl #20
+	orrge	xh, xh, r5
+	RETLDM	"r4, r5" ge
+
+	@ Exponent too small, denormalize result.
+	@ Find out proper shift value.
+	mvn	r4, r4
+	subs	r4, r4, #31
+	bge	2f
+	adds	r4, r4, #12
+	bgt	1f
+
+	@ shift result right of 1 to 20 bits, sign is in r5.
+	add	r4, r4, #20
+	rsb	r2, r4, #32
+	shift1	lsr, xl, xl, r4
+	shiftop orr xl xl xh lsl r2 yh
+	shiftop orr xh r5 xh lsr r4 yh
+	RETLDM	"r4, r5"
+
+	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+	@ a register switch from xh to xl.
+1:	rsb	r4, r4, #12
+	rsb	r2, r4, #32
+	shift1	lsr, xl, xl, r2
+	shiftop orr xl xl xh lsl r4 yh
+	mov	xh, r5
+	RETLDM	"r4, r5"
+
+	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+	@ from xh to xl.
+2:	shift1	lsr, xl, xh, r4
+	mov	xh, r5
+	RETLDM	"r4, r5"
+
+	@ Adjust exponents for denormalized arguments.
+	@ Note that r4 must not remain equal to 0.
+LSYM(Lad_d):
+	teq	r4, #0
+	eor	yh, yh, #0x00100000
+	do_it	eq, te
+	eoreq	xh, xh, #0x00100000
+	addeq	r4, r4, #1
+	subne	r5, r5, #1
+	b	LSYM(Lad_x)
+
+
+LSYM(Lad_s):
+	mvns	ip, r4, asr #21
+	do_it	ne
+	COND(mvn,s,ne)	ip, r5, asr #21
+	beq	LSYM(Lad_i)
+
+	teq	r4, r5
+	do_it	eq
+	teqeq	xl, yl
+	beq	1f
+
+	@ Result is x + 0.0 = x or 0.0 + y = y.
+	orrs	ip, r4, xl
+	do_it	eq, t
+	moveq	xh, yh
+	moveq	xl, yl
+	RETLDM	"r4, r5"
+
+1:	teq	xh, yh
+
+	@ Result is x - x = 0.
+	do_it	ne, tt
+	movne	xh, #0
+	movne	xl, #0
+	RETLDM	"r4, r5" ne
+
+	@ Result is x + x = 2x.
+	movs	ip, r4, lsr #21
+	bne	2f
+	movs	xl, xl, lsl #1
+	adcs	xh, xh, xh
+	do_it	cs
+	orrcs	xh, xh, #0x80000000
+	RETLDM	"r4, r5"
+2:	adds	r4, r4, #(2 << 21)
+	do_it	cc, t
+	addcc	xh, xh, #(1 << 20)
+	RETLDM	"r4, r5" cc
+	and	r5, xh, #0x80000000
+
+	@ Overflow: return INF.
+LSYM(Lad_o):
+	orr	xh, r5, #0x7f000000
+	orr	xh, xh, #0x00f00000
+	mov	xl, #0
+	RETLDM	"r4, r5"
+
+	@ At least one of x or y is INF/NAN.
+	@   if xh-xl != INF/NAN: return yh-yl (which is INF/NAN)
+	@   if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
+	@   if either is NAN: return NAN
+	@   if opposite sign: return NAN
+	@   otherwise return xh-xl (which is INF or -INF)
+LSYM(Lad_i):
+	mvns	ip, r4, asr #21
+	do_it	ne, te
+	movne	xh, yh
+	movne	xl, yl
+	COND(mvn,s,eq)	ip, r5, asr #21
+	do_it	ne, t
+	movne	yh, xh
+	movne	yl, xl
+	orrs	r4, xl, xh, lsl #12
+	do_it	eq, te
+	COND(orr,s,eq)	r5, yl, yh, lsl #12
+	teqeq	xh, yh
+	orrne	xh, xh, #0x00080000	@ quiet NAN
+	RETLDM	"r4, r5"
+
+	FUNC_END aeabi_dsub
+	FUNC_END subdf3
+	FUNC_END aeabi_dadd
+	FUNC_END adddf3
+
+ARM_FUNC_START floatunsidf
+ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
+
+	teq	r0, #0
+	do_it	eq, t
+	moveq	r1, #0
+	RETc(eq)
+	do_push	{r4, r5, lr}
+	mov	r4, #0x400		@ initial exponent
+	add	r4, r4, #(52-1 - 1)
+	mov	r5, #0			@ sign bit is 0
+	.ifnc	xl, r0
+	mov	xl, r0
+	.endif
+	mov	xh, #0
+	b	LSYM(Lad_l)
+
+	FUNC_END aeabi_ui2d
+	FUNC_END floatunsidf
+
+ARM_FUNC_START floatsidf
+ARM_FUNC_ALIAS aeabi_i2d floatsidf
+
+	teq	r0, #0
+	do_it	eq, t
+	moveq	r1, #0
+	RETc(eq)
+	do_push	{r4, r5, lr}
+	mov	r4, #0x400		@ initial exponent
+	add	r4, r4, #(52-1 - 1)
+	ands	r5, r0, #0x80000000	@ sign bit in r5
+	do_it	mi
+	rsbmi	r0, r0, #0		@ absolute value
+	.ifnc	xl, r0
+	mov	xl, r0
+	.endif
+	mov	xh, #0
+	b	LSYM(Lad_l)
+
+	FUNC_END aeabi_i2d
+	FUNC_END floatsidf
+
+ARM_FUNC_START extendsfdf2
+ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
+
+	movs	r2, r0, lsl #1		@ toss sign bit
+	mov	xh, r2, asr #3		@ stretch exponent
+	mov	xh, xh, rrx		@ retrieve sign bit
+	mov	xl, r2, lsl #28		@ retrieve remaining bits
+	do_it	ne, ttt
+	COND(and,s,ne)	r3, r2, #0xff000000	@ isolate exponent
+	teqne	r3, #0xff000000		@ if not 0, check if INF or NAN
+	eorne	xh, xh, #0x38000000	@ fixup exponent otherwise.
+	RETc(ne)			@ and return it.
+
+	teq	r2, #0			@ if actually 0
+	do_it	ne, e
+	teqne	r3, #0xff000000		@ or INF or NAN
+	RETc(eq)			@ we are done already.
+
+	@ value was denormalized.  We can normalize it now.
+	do_push	{r4, r5, lr}
+	mov	r4, #0x380		@ setup corresponding exponent
+	and	r5, xh, #0x80000000	@ move sign bit in r5
+	bic	xh, xh, #0x80000000
+	b	LSYM(Lad_l)
+
+	FUNC_END aeabi_f2d
+	FUNC_END extendsfdf2
+
+ARM_FUNC_START floatundidf
+ARM_FUNC_ALIAS aeabi_ul2d floatundidf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqd	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	@ For hard FPA code we want to return via the tail below so that
+	@ we can return the result in f0 as well as in r0/r1 for backwards
+	@ compatibility.
+	adr	ip, LSYM(f0_ret)
+	@ Push pc as well so that RETLDM works correctly.
+	do_push	{r4, r5, ip, lr, pc}
+#else
+	do_push	{r4, r5, lr}
+#endif
+
+	mov	r5, #0
+	b	2f
+
+ARM_FUNC_START floatdidf
+ARM_FUNC_ALIAS aeabi_l2d floatdidf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqd	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	@ For hard FPA code we want to return via the tail below so that
+	@ we can return the result in f0 as well as in r0/r1 for backwards
+	@ compatibility.
+	adr	ip, LSYM(f0_ret)
+	@ Push pc as well so that RETLDM works correctly.
+	do_push	{r4, r5, ip, lr, pc}
+#else
+	do_push	{r4, r5, lr}
+#endif
+
+	ands	r5, ah, #0x80000000	@ sign bit in r5
+	bpl	2f
+#if defined(__thumb2__)
+	negs	al, al
+	sbc	ah, ah, ah, lsl #1
+#else
+	rsbs	al, al, #0
+	rsc	ah, ah, #0
+#endif
+2:
+	mov	r4, #0x400		@ initial exponent
+	add	r4, r4, #(52-1 - 1)
+
+	@ FPA little-endian: must swap the word order.
+	.ifnc	xh, ah
+	mov	ip, al
+	mov	xh, ah
+	mov	xl, ip
+	.endif
+
+	movs	ip, xh, lsr #22
+	beq	LSYM(Lad_p)
+
+	@ The value is too big.  Scale it down a bit...
+	mov	r2, #3
+	movs	ip, ip, lsr #3
+	do_it	ne
+	addne	r2, r2, #3
+	movs	ip, ip, lsr #3
+	do_it	ne
+	addne	r2, r2, #3
+	add	r2, r2, ip, lsr #3
+
+	rsb	r3, r2, #32
+	shift1	lsl, ip, xl, r3
+	shift1	lsr, xl, xl, r2
+	shiftop orr xl xl xh lsl r3 lr
+	shift1	lsr, xh, xh, r2
+	add	r4, r4, r2
+	b	LSYM(Lad_p)
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+	@ Legacy code expects the result to be returned in f0.  Copy it
+	@ there as well.
+LSYM(f0_ret):
+	do_push	{r0, r1}
+	ldfd	f0, [sp], #8
+	RETLDM
+
+#endif
+
+	FUNC_END floatdidf
+	FUNC_END aeabi_l2d
+	FUNC_END floatundidf
+	FUNC_END aeabi_ul2d
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_arm_muldivdf3
+
+ARM_FUNC_START muldf3
+ARM_FUNC_ALIAS aeabi_dmul muldf3
+	do_push	{r4, r5, r6, lr}
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	orr	ip, ip, #0x700
+	ands	r4, ip, xh, lsr #20
+	do_it	ne, tte
+	COND(and,s,ne)	r5, ip, yh, lsr #20
+	teqne	r4, ip
+	teqne	r5, ip
+	bleq	LSYM(Lml_s)
+
+	@ Add exponents together
+	add	r4, r4, r5
+
+	@ Determine final sign.
+	eor	r6, xh, yh
+
+	@ Convert mantissa to unsigned integer.
+	@ If power of two, branch to a separate path.
+	bic	xh, xh, ip, lsl #21
+	bic	yh, yh, ip, lsl #21
+	orrs	r5, xl, xh, lsl #12
+	do_it	ne
+	COND(orr,s,ne)	r5, yl, yh, lsl #12
+	orr	xh, xh, #0x00100000
+	orr	yh, yh, #0x00100000
+	beq	LSYM(Lml_1)
+
+#if __ARM_ARCH__ < 4
+
+	@ Put sign bit in r6, which will be restored in yl later.
+	and   r6, r6, #0x80000000
+
+	@ Well, no way to make it shorter without the umull instruction.
+	stmfd	sp!, {r6, r7, r8, r9, sl, fp}
+	mov	r7, xl, lsr #16
+	mov	r8, yl, lsr #16
+	mov	r9, xh, lsr #16
+	mov	sl, yh, lsr #16
+	bic	xl, xl, r7, lsl #16
+	bic	yl, yl, r8, lsl #16
+	bic	xh, xh, r9, lsl #16
+	bic	yh, yh, sl, lsl #16
+	mul	ip, xl, yl
+	mul	fp, xl, r8
+	mov	lr, #0
+	adds	ip, ip, fp, lsl #16
+	adc	lr, lr, fp, lsr #16
+	mul	fp, r7, yl
+	adds	ip, ip, fp, lsl #16
+	adc	lr, lr, fp, lsr #16
+	mul	fp, xl, sl
+	mov	r5, #0
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, r7, yh
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, xh, r8
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, r9, yl
+	adds	lr, lr, fp, lsl #16
+	adc	r5, r5, fp, lsr #16
+	mul	fp, xh, sl
+	mul	r6, r9, sl
+	adds	r5, r5, fp, lsl #16
+	adc	r6, r6, fp, lsr #16
+	mul	fp, r9, yh
+	adds	r5, r5, fp, lsl #16
+	adc	r6, r6, fp, lsr #16
+	mul	fp, xl, yh
+	adds	lr, lr, fp
+	mul	fp, r7, sl
+	adcs	r5, r5, fp
+	mul	fp, xh, yl
+	adc	r6, r6, #0
+	adds	lr, lr, fp
+	mul	fp, r9, r8
+	adcs	r5, r5, fp
+	mul	fp, r7, r8
+	adc	r6, r6, #0
+	adds	lr, lr, fp
+	mul	fp, xh, yh
+	adcs	r5, r5, fp
+	adc	r6, r6, #0
+	ldmfd	sp!, {yl, r7, r8, r9, sl, fp}
+
+#else
+
+	@ Here is the actual multiplication.
+	umull	ip, lr, xl, yl
+	mov	r5, #0
+	umlal	lr, r5, xh, yl
+	and	yl, r6, #0x80000000
+	umlal	lr, r5, xl, yh
+	mov	r6, #0
+	umlal	r5, r6, xh, yh
+
+#endif
+
+	@ The LSBs in ip are only significant for the final rounding.
+	@ Fold them into lr.
+	teq	ip, #0
+	do_it	ne
+	orrne	lr, lr, #1
+
+	@ Adjust result upon the MSB position.
+	sub	r4, r4, #0xff
+	cmp	r6, #(1 << (20-11))
+	sbc	r4, r4, #0x300
+	bcs	1f
+	movs	lr, lr, lsl #1
+	adcs	r5, r5, r5
+	adc	r6, r6, r6
+1:
+	@ Shift to final position, add sign to result.
+	orr	xh, yl, r6, lsl #11
+	orr	xh, xh, r5, lsr #21
+	mov	xl, r5, lsl #11
+	orr	xl, xl, lr, lsr #21
+	mov	lr, lr, lsl #11
+
+	@ Check exponent range for under/overflow.
+	subs	ip, r4, #(254 - 1)
+	do_it	hi
+	cmphi	ip, #0x700
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	cmp	lr, #0x80000000
+	do_it	eq
+	COND(mov,s,eq)	lr, xl, lsr #1
+	adcs	xl, xl, #0
+	adc	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6"
+
+	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+	and	r6, r6, #0x80000000
+	orr	xh, r6, xh
+	orr	xl, xl, yl
+	eor	xh, xh, yh
+	subs	r4, r4, ip, lsr #1
+	do_it	gt, tt
+	COND(rsb,s,gt)	r5, r4, ip
+	orrgt	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6" gt
+
+	@ Under/overflow: fix things up for the code below.
+	orr	xh, xh, #0x00100000
+	mov	lr, #0
+	subs	r4, r4, #1
+
+LSYM(Lml_u):
+	@ Overflow?
+	bgt	LSYM(Lml_o)
+
+	@ Check if denormalized result is possible, otherwise return signed 0.
+	cmn	r4, #(53 + 1)
+	do_it	le, tt
+	movle	xl, #0
+	bicle	xh, xh, #0x7fffffff
+	RETLDM	"r4, r5, r6" le
+
+	@ Find out proper shift value.
+	rsb	r4, r4, #0
+	subs	r4, r4, #32
+	bge	2f
+	adds	r4, r4, #12
+	bgt	1f
+
+	@ shift result right of 1 to 20 bits, preserve sign bit, round, etc.
+	add	r4, r4, #20
+	rsb	r5, r4, #32
+	shift1	lsl, r3, xl, r5
+	shift1	lsr, xl, xl, r4
+	shiftop orr xl xl xh lsl r5 r2
+	and	r2, xh, #0x80000000
+	bic	xh, xh, #0x80000000
+	adds	xl, xl, r3, lsr #31
+	shiftop adc xh r2 xh lsr r4 r6
+	orrs	lr, lr, r3, lsl #1
+	do_it	eq
+	biceq	xl, xl, r3, lsr #31
+	RETLDM	"r4, r5, r6"
+
+	@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
+	@ a register switch from xh to xl. Then round.
+1:	rsb	r4, r4, #12
+	rsb	r5, r4, #32
+	shift1	lsl, r3, xl, r4
+	shift1	lsr, xl, xl, r5
+	shiftop orr xl xl xh lsl r4 r2
+	bic	xh, xh, #0x7fffffff
+	adds	xl, xl, r3, lsr #31
+	adc	xh, xh, #0
+	orrs	lr, lr, r3, lsl #1
+	do_it	eq
+	biceq	xl, xl, r3, lsr #31
+	RETLDM	"r4, r5, r6"
+
+	@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
+	@ from xh to xl.  Leftover bits are in r3-r6-lr for rounding.
+2:	rsb	r5, r4, #32
+	shiftop orr lr lr xl lsl r5 r2
+	shift1	lsr, r3, xl, r4
+	shiftop orr r3 r3 xh lsl r5 r2
+	shift1	lsr, xl, xh, r4
+	bic	xh, xh, #0x7fffffff
+	shiftop bic xl xl xh lsr r4 r2
+	add	xl, xl, r3, lsr #31
+	orrs	lr, lr, r3, lsl #1
+	do_it	eq
+	biceq	xl, xl, r3, lsr #31
+	RETLDM	"r4, r5, r6"
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+	teq	r4, #0
+	bne	2f
+	and	r6, xh, #0x80000000
+1:	movs	xl, xl, lsl #1
+	adc	xh, xh, xh
+	tst	xh, #0x00100000
+	do_it	eq
+	subeq	r4, r4, #1
+	beq	1b
+	orr	xh, xh, r6
+	teq	r5, #0
+	do_it	ne
+	RETc(ne)
+2:	and	r6, yh, #0x80000000
+3:	movs	yl, yl, lsl #1
+	adc	yh, yh, yh
+	tst	yh, #0x00100000
+	do_it	eq
+	subeq	r5, r5, #1
+	beq	3b
+	orr	yh, yh, r6
+	RET
+
+LSYM(Lml_s):
+	@ Isolate the INF and NAN cases away
+	teq	r4, ip
+	and	r5, ip, yh, lsr #20
+	do_it	ne
+	teqne	r5, ip
+	beq	1f
+
+	@ Here, one or more arguments are either denormalized or zero.
+	orrs	r6, xl, xh, lsl #1
+	do_it	ne
+	COND(orr,s,ne)	r6, yl, yh, lsl #1
+	bne	LSYM(Lml_d)
+
+	@ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+	eor	xh, xh, yh
+	and	xh, xh, #0x80000000
+	mov	xl, #0
+	RETLDM	"r4, r5, r6"
+
+1:	@ One or both args are INF or NAN.
+	orrs	r6, xl, xh, lsl #1
+	do_it	eq, te
+	moveq	xl, yl
+	moveq	xh, yh
+	COND(orr,s,ne)	r6, yl, yh, lsl #1
+	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	teq	r4, ip
+	bne	1f
+	orrs	r6, xl, xh, lsl #12
+	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+1:	teq	r5, ip
+	bne	LSYM(Lml_i)
+	orrs	r6, yl, yh, lsl #12
+	do_it	ne, t
+	movne	xl, yl
+	movne	xh, yh
+	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+
+	@ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+	eor	xh, xh, yh
+
+	@ Overflow: return INF (sign already in xh).
+LSYM(Lml_o):
+	and	xh, xh, #0x80000000
+	orr	xh, xh, #0x7f000000
+	orr	xh, xh, #0x00f00000
+	mov	xl, #0
+	RETLDM	"r4, r5, r6"
+
+	@ Return a quiet NAN.
+LSYM(Lml_n):
+	orr	xh, xh, #0x7f000000
+	orr	xh, xh, #0x00f80000
+	RETLDM	"r4, r5, r6"
+
+	FUNC_END aeabi_dmul
+	FUNC_END muldf3
+
+ARM_FUNC_START divdf3
+ARM_FUNC_ALIAS aeabi_ddiv divdf3
+	
+	do_push	{r4, r5, r6, lr}
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	orr	ip, ip, #0x700
+	ands	r4, ip, xh, lsr #20
+	do_it	ne, tte
+	COND(and,s,ne)	r5, ip, yh, lsr #20
+	teqne	r4, ip
+	teqne	r5, ip
+	bleq	LSYM(Ldv_s)
+
+	@ Substract divisor exponent from dividend''s.
+	sub	r4, r4, r5
+
+	@ Preserve final sign into lr.
+	eor	lr, xh, yh
+
+	@ Convert mantissa to unsigned integer.
+	@ Dividend -> r5-r6, divisor -> yh-yl.
+	orrs	r5, yl, yh, lsl #12
+	mov	xh, xh, lsl #12
+	beq	LSYM(Ldv_1)
+	mov	yh, yh, lsl #12
+	mov	r5, #0x10000000
+	orr	yh, r5, yh, lsr #4
+	orr	yh, yh, yl, lsr #24
+	mov	yl, yl, lsl #8
+	orr	r5, r5, xh, lsr #4
+	orr	r5, r5, xl, lsr #24
+	mov	r6, xl, lsl #8
+
+	@ Initialize xh with final sign bit.
+	and	xh, lr, #0x80000000
+
+	@ Ensure result will land to known bit position.
+	@ Apply exponent bias accordingly.
+	cmp	r5, yh
+	do_it	eq
+	cmpeq	r6, yl
+	adc	r4, r4, #(255 - 2)
+	add	r4, r4, #0x300
+	bcs	1f
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+1:
+	@ Perform first substraction to align result to a nibble.
+	subs	r6, r6, yl
+	sbc	r5, r5, yh
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	mov	xl, #0x00100000
+	mov	ip, #0x00080000
+
+	@ The actual division loop.
+1:	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip, lsr #1
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip, lsr #2
+	movs	yh, yh, lsr #1
+	mov	yl, yl, rrx
+	subs	lr, r6, yl
+	sbcs	lr, r5, yh
+	do_it	cs, tt
+	subcs	r6, r6, yl
+	movcs	r5, lr
+	orrcs	xl, xl, ip, lsr #3
+
+	orrs	lr, r5, r6
+	beq	2f
+	mov	r5, r5, lsl #4
+	orr	r5, r5, r6, lsr #28
+	mov	r6, r6, lsl #4
+	mov	yh, yh, lsl #3
+	orr	yh, yh, yl, lsr #29
+	mov	yl, yl, lsl #3
+	movs	ip, ip, lsr #4
+	bne	1b
+
+	@ We are done with a word of the result.
+	@ Loop again for the low word if this pass was for the high word.
+	tst	xh, #0x00100000
+	bne	3f
+	orr	xh, xh, xl
+	mov	xl, #0
+	mov	ip, #0x80000000
+	b	1b
+2:
+	@ Be sure result starts in the high word.
+	tst	xh, #0x00100000
+	do_it	eq, t
+	orreq	xh, xh, xl
+	moveq	xl, #0
+3:
+	@ Check exponent range for under/overflow.
+	subs	ip, r4, #(254 - 1)
+	do_it	hi
+	cmphi	ip, #0x700
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	subs	ip, r5, yh
+	do_it	eq, t
+	COND(sub,s,eq)	ip, r6, yl
+	COND(mov,s,eq)	ip, xl, lsr #1
+	adcs	xl, xl, #0
+	adc	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6"
+
+	@ Division by 0x1p*: shortcut a lot of code.
+LSYM(Ldv_1):
+	and	lr, lr, #0x80000000
+	orr	xh, lr, xh, lsr #12
+	adds	r4, r4, ip, lsr #1
+	do_it	gt, tt
+	COND(rsb,s,gt)	r5, r4, ip
+	orrgt	xh, xh, r4, lsl #20
+	RETLDM	"r4, r5, r6" gt
+
+	orr	xh, xh, #0x00100000
+	mov	lr, #0
+	subs	r4, r4, #1
+	b	LSYM(Lml_u)
+
+	@ Result mightt need to be denormalized: put remainder bits
+	@ in lr for rounding considerations.
+LSYM(Ldv_u):
+	orr	lr, r5, r6
+	b	LSYM(Lml_u)
+
+	@ One or both arguments is either INF, NAN or zero.
+LSYM(Ldv_s):
+	and	r5, ip, yh, lsr #20
+	teq	r4, ip
+	do_it	eq
+	teqeq	r5, ip
+	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
+	teq	r4, ip
+	bne	1f
+	orrs	r4, xl, xh, lsl #12
+	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	teq	r5, ip
+	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	mov	xl, yl
+	mov	xh, yh
+	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+1:	teq	r5, ip
+	bne	2f
+	orrs	r5, yl, yh, lsl #12
+	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	mov	xl, yl
+	mov	xh, yh
+	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+2:	@ If both are nonzero, we need to normalize and resume above.
+	orrs	r6, xl, xh, lsl #1
+	do_it	ne
+	COND(orr,s,ne)	r6, yl, yh, lsl #1
+	bne	LSYM(Lml_d)
+	@ One or both arguments are 0.
+	orrs	r4, xl, xh, lsl #1
+	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	orrs	r5, yl, yh, lsl #1
+	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
+	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+
+	FUNC_END aeabi_ddiv
+	FUNC_END divdf3
+
+#endif /* L_muldivdf3 */
+
+#ifdef L_arm_cmpdf2
+
+@ Note: only r0 (return value) and ip are clobbered here.
+
+ARM_FUNC_START gtdf2
+ARM_FUNC_ALIAS gedf2 gtdf2
+	mov	ip, #-1
+	b	1f
+
+ARM_FUNC_START ltdf2
+ARM_FUNC_ALIAS ledf2 ltdf2
+	mov	ip, #1
+	b	1f
+
+ARM_FUNC_START cmpdf2
+ARM_FUNC_ALIAS nedf2 cmpdf2
+ARM_FUNC_ALIAS eqdf2 cmpdf2
+	mov	ip, #1			@ how should we specify unordered here?
+
+1:	str	ip, [sp, #-4]!
+
+	@ Trap any INF/NAN first.
+	mov	ip, xh, lsl #1
+	mvns	ip, ip, asr #21
+	mov	ip, yh, lsl #1
+	do_it	ne
+	COND(mvn,s,ne)	ip, ip, asr #21
+	beq	3f
+
+	@ Test for equality.
+	@ Note that 0.0 is equal to -0.0.
+2:	add	sp, sp, #4
+	orrs	ip, xl, xh, lsl #1	@ if x == 0.0 or -0.0
+	do_it	eq, e
+	COND(orr,s,eq)	ip, yl, yh, lsl #1	@ and y == 0.0 or -0.0
+	teqne	xh, yh			@ or xh == yh
+	do_it	eq, tt
+	teqeq	xl, yl			@ and xl == yl
+	moveq	r0, #0			@ then equal.
+	RETc(eq)
+
+	@ Clear C flag
+	cmn	r0, #0
+
+	@ Compare sign, 
+	teq	xh, yh
+
+	@ Compare values if same sign
+	do_it	pl
+	cmppl	xh, yh
+	do_it	eq
+	cmpeq	xl, yl
+
+	@ Result:
+	do_it	cs, e
+	movcs	r0, yh, asr #31
+	mvncc	r0, yh, asr #31
+	orr	r0, r0, #1
+	RET
+
+	@ Look for a NAN.
+3:	mov	ip, xh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	4f
+	orrs	ip, xl, xh, lsl #12
+	bne	5f			@ x is NAN
+4:	mov	ip, yh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	2b
+	orrs	ip, yl, yh, lsl #12
+	beq	2b			@ y is not NAN
+5:	ldr	r0, [sp], #4		@ unordered return code
+	RET
+
+	FUNC_END gedf2
+	FUNC_END gtdf2
+	FUNC_END ledf2
+	FUNC_END ltdf2
+	FUNC_END nedf2
+	FUNC_END eqdf2
+	FUNC_END cmpdf2
+
+ARM_FUNC_START aeabi_cdrcmple
+
+	mov	ip, r0
+	mov	r0, r2
+	mov	r2, ip
+	mov	ip, r1
+	mov	r1, r3
+	mov	r3, ip
+	b	6f
+	
+ARM_FUNC_START aeabi_cdcmpeq
+ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	do_push	{r0, lr}
+	ARM_CALL cmpdf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	do_it	mi
+	cmnmi	r0, #0
+	RETLDM	"r0"
+
+	FUNC_END aeabi_cdcmple
+	FUNC_END aeabi_cdcmpeq
+	FUNC_END aeabi_cdrcmple
+	
+ARM_FUNC_START	aeabi_dcmpeq
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdcmple
+	do_it	eq, e
+	moveq	r0, #1	@ Equal to.
+	movne	r0, #0	@ Less than, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmpeq
+
+ARM_FUNC_START	aeabi_dcmplt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Less than.
+	movcs	r0, #0	@ Equal to, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmplt
+
+ARM_FUNC_START	aeabi_dcmple
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdcmple
+	do_it	ls, e
+	movls	r0, #1  @ Less than or equal to.
+	movhi	r0, #0	@ Greater than or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmple
+
+ARM_FUNC_START	aeabi_dcmpge
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdrcmple
+	do_it	ls, e
+	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
+	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmpge
+
+ARM_FUNC_START	aeabi_dcmpgt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cdrcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Operand 2 is less than operand 1.
+	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
+			@ or they are unordered.
+	RETLDM
+
+	FUNC_END aeabi_dcmpgt
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_arm_unorddf2
+
+ARM_FUNC_START unorddf2
+ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
+
+	mov	ip, xh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	1f
+	orrs	ip, xl, xh, lsl #12
+	bne	3f			@ x is NAN
+1:	mov	ip, yh, lsl #1
+	mvns	ip, ip, asr #21
+	bne	2f
+	orrs	ip, yl, yh, lsl #12
+	bne	3f			@ y is NAN
+2:	mov	r0, #0			@ arguments are ordered.
+	RET
+
+3:	mov	r0, #1			@ arguments are unordered.
+	RET
+
+	FUNC_END aeabi_dcmpun
+	FUNC_END unorddf2
+
+#endif /* L_unorddf2 */
+
+#ifdef L_arm_fixdfsi
+
+ARM_FUNC_START fixdfsi
+ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
+
+	@ check exponent range.
+	mov	r2, xh, lsl #1
+	adds	r2, r2, #(1 << 21)
+	bcs	2f			@ value is INF or NAN
+	bpl	1f			@ value is too small
+	mov	r3, #(0xfffffc00 + 31)
+	subs	r2, r3, r2, asr #21
+	bls	3f			@ value is too large
+
+	@ scale value
+	mov	r3, xh, lsl #11
+	orr	r3, r3, #0x80000000
+	orr	r3, r3, xl, lsr #21
+	tst	xh, #0x80000000		@ the sign bit
+	shift1	lsr, r0, r3, r2
+	do_it	ne
+	rsbne	r0, r0, #0
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	orrs	xl, xl, xh, lsl #12
+	bne	4f			@ x is NAN.
+3:	ands	r0, xh, #0x80000000	@ the sign bit
+	do_it	eq
+	moveq	r0, #0x7fffffff		@ maximum signed positive si
+	RET
+
+4:	mov	r0, #0			@ How should we convert NAN?
+	RET
+
+	FUNC_END aeabi_d2iz
+	FUNC_END fixdfsi
+
+#endif /* L_fixdfsi */
+
+#ifdef L_arm_fixunsdfsi
+
+ARM_FUNC_START fixunsdfsi
+ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
+
+	@ check exponent range.
+	movs	r2, xh, lsl #1
+	bcs	1f			@ value is negative
+	adds	r2, r2, #(1 << 21)
+	bcs	2f			@ value is INF or NAN
+	bpl	1f			@ value is too small
+	mov	r3, #(0xfffffc00 + 31)
+	subs	r2, r3, r2, asr #21
+	bmi	3f			@ value is too large
+
+	@ scale value
+	mov	r3, xh, lsl #11
+	orr	r3, r3, #0x80000000
+	orr	r3, r3, xl, lsr #21
+	shift1	lsr, r0, r3, r2
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	orrs	xl, xl, xh, lsl #12
+	bne	4f			@ value is NAN.
+3:	mov	r0, #0xffffffff		@ maximum unsigned si
+	RET
+
+4:	mov	r0, #0			@ How should we convert NAN?
+	RET
+
+	FUNC_END aeabi_d2uiz
+	FUNC_END fixunsdfsi
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_arm_truncdfsf2
+
+ARM_FUNC_START truncdfsf2
+ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
+
+	@ check exponent range.
+	mov	r2, xh, lsl #1
+	subs	r3, r2, #((1023 - 127) << 21)
+	do_it	cs, t
+	COND(sub,s,cs)	ip, r3, #(1 << 21)
+	COND(rsb,s,cs)	ip, ip, #(254 << 21)
+	bls	2f			@ value is out of range
+
+1:	@ shift and round mantissa
+	and	ip, xh, #0x80000000
+	mov	r2, xl, lsl #3
+	orr	xl, ip, xl, lsr #29
+	cmp	r2, #0x80000000
+	adc	r0, xl, r3, lsl #2
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+2:	@ either overflow or underflow
+	tst	xh, #0x40000000
+	bne	3f			@ overflow
+
+	@ check if denormalized value is possible
+	adds	r2, r3, #(23 << 21)
+	do_it	lt, t
+	andlt	r0, xh, #0x80000000	@ too small, return signed 0.
+	RETc(lt)
+
+	@ denormalize value so we can resume with the code above afterwards.
+	orr	xh, xh, #0x00100000
+	mov	r2, r2, lsr #21
+	rsb	r2, r2, #24
+	rsb	ip, r2, #32
+#if defined(__thumb2__)
+	lsls	r3, xl, ip
+#else
+	movs	r3, xl, lsl ip
+#endif
+	shift1	lsr, xl, xl, r2
+	do_it	ne
+	orrne	xl, xl, #1		@ fold r3 for rounding considerations. 
+	mov	r3, xh, lsl #11
+	mov	r3, r3, lsr #11
+	shiftop orr xl xl r3 lsl ip ip
+	shift1	lsr, r3, r3, r2
+	mov	r3, r3, lsl #1
+	b	1b
+
+3:	@ chech for NAN
+	mvns	r3, r2, asr #21
+	bne	5f			@ simple overflow
+	orrs	r3, xl, xh, lsl #12
+	do_it	ne, tt
+	movne	r0, #0x7f000000
+	orrne	r0, r0, #0x00c00000
+	RETc(ne)			@ return NAN
+
+5:	@ return INF with sign
+	and	r0, xh, #0x80000000
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	FUNC_END aeabi_d2f
+	FUNC_END truncdfsf2
+
+#endif /* L_truncdfsf2 */
diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S
new file mode 100644
index 000000000..c93f66d8f
--- /dev/null
+++ b/gcc/config/arm/ieee754-sf.S
@@ -0,0 +1,1060 @@
+/* ieee754-sf.S single-precision floating point support for ARM
+
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009  Free Software Foundation, Inc.
+   Contributed by Nicolas Pitre (nico@cam.org)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * Notes:
+ *
+ * The goal of this code is to be as fast as possible.  This is
+ * not meant to be easy to understand for the casual reader.
+ *
+ * Only the default rounding mode is intended for best performances.
+ * Exceptions aren't supported yet, but that can be added quite easily
+ * if necessary without impacting performances.
+ */
+
+#ifdef L_arm_negsf2
+	
+ARM_FUNC_START negsf2
+ARM_FUNC_ALIAS aeabi_fneg negsf2
+
+	eor	r0, r0, #0x80000000	@ flip sign bit
+	RET
+
+	FUNC_END aeabi_fneg
+	FUNC_END negsf2
+
+#endif
+
+#ifdef L_arm_addsubsf3
+
+ARM_FUNC_START aeabi_frsub
+
+	eor	r0, r0, #0x80000000	@ flip sign bit of first arg
+	b	1f
+
+ARM_FUNC_START subsf3
+ARM_FUNC_ALIAS aeabi_fsub subsf3
+
+	eor	r1, r1, #0x80000000	@ flip sign bit of second arg
+#if defined(__INTERWORKING_STUBS__)
+	b	1f			@ Skip Thumb-code prologue
+#endif
+
+ARM_FUNC_START addsf3
+ARM_FUNC_ALIAS aeabi_fadd addsf3
+
+1:	@ Look for zeroes, equal values, INF, or NAN.
+	movs	r2, r0, lsl #1
+	do_it	ne, ttt
+	COND(mov,s,ne)	r3, r1, lsl #1
+	teqne	r2, r3
+	COND(mvn,s,ne)	ip, r2, asr #24
+	COND(mvn,s,ne)	ip, r3, asr #24
+	beq	LSYM(Lad_s)
+
+	@ Compute exponent difference.  Make largest exponent in r2,
+	@ corresponding arg in r0, and positive exponent difference in r3.
+	mov	r2, r2, lsr #24
+	rsbs	r3, r2, r3, lsr #24
+	do_it	gt, ttt
+	addgt	r2, r2, r3
+	eorgt	r1, r0, r1
+	eorgt	r0, r1, r0
+	eorgt	r1, r0, r1
+	do_it	lt
+	rsblt	r3, r3, #0
+
+	@ If exponent difference is too large, return largest argument
+	@ already in r0.  We need up to 25 bit to handle proper rounding
+	@ of 0x1p25 - 1.1.
+	cmp	r3, #25
+	do_it	hi
+	RETc(hi)
+
+	@ Convert mantissa to signed integer.
+	tst	r0, #0x80000000
+	orr	r0, r0, #0x00800000
+	bic	r0, r0, #0xff000000
+	do_it	ne
+	rsbne	r0, r0, #0
+	tst	r1, #0x80000000
+	orr	r1, r1, #0x00800000
+	bic	r1, r1, #0xff000000
+	do_it	ne
+	rsbne	r1, r1, #0
+
+	@ If exponent == difference, one or both args were denormalized.
+	@ Since this is not common case, rescale them off line.
+	teq	r2, r3
+	beq	LSYM(Lad_d)
+LSYM(Lad_x):
+
+	@ Compensate for the exponent overlapping the mantissa MSB added later
+	sub	r2, r2, #1
+
+	@ Shift and add second arg to first arg in r0.
+	@ Keep leftover bits into r1.
+	shiftop adds r0 r0 r1 asr r3 ip
+	rsb	r3, r3, #32
+	shift1	lsl, r1, r1, r3
+
+	@ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
+	and	r3, r0, #0x80000000
+	bpl	LSYM(Lad_p)
+#if defined(__thumb2__)
+	negs	r1, r1
+	sbc	r0, r0, r0, lsl #1
+#else
+	rsbs	r1, r1, #0
+	rsc	r0, r0, #0
+#endif
+
+	@ Determine how to normalize the result.
+LSYM(Lad_p):
+	cmp	r0, #0x00800000
+	bcc	LSYM(Lad_a)
+	cmp	r0, #0x01000000
+	bcc	LSYM(Lad_e)
+
+	@ Result needs to be shifted right.
+	movs	r0, r0, lsr #1
+	mov	r1, r1, rrx
+	add	r2, r2, #1
+
+	@ Make sure we did not bust our exponent.
+	cmp	r2, #254
+	bhs	LSYM(Lad_o)
+
+	@ Our result is now properly aligned into r0, remaining bits in r1.
+	@ Pack final result together.
+	@ Round with MSB of r1. If halfway between two numbers, round towards
+	@ LSB of r0 = 0. 
+LSYM(Lad_e):
+	cmp	r1, #0x80000000
+	adc	r0, r0, r2, lsl #23
+	do_it	eq
+	biceq	r0, r0, #1
+	orr	r0, r0, r3
+	RET
+
+	@ Result must be shifted left and exponent adjusted.
+LSYM(Lad_a):
+	movs	r1, r1, lsl #1
+	adc	r0, r0, r0
+	tst	r0, #0x00800000
+	sub	r2, r2, #1
+	bne	LSYM(Lad_e)
+	
+	@ No rounding necessary since r1 will always be 0 at this point.
+LSYM(Lad_l):
+
+#if __ARM_ARCH__ < 5
+
+	movs	ip, r0, lsr #12
+	moveq	r0, r0, lsl #12
+	subeq	r2, r2, #12
+	tst	r0, #0x00ff0000
+	moveq	r0, r0, lsl #8
+	subeq	r2, r2, #8
+	tst	r0, #0x00f00000
+	moveq	r0, r0, lsl #4
+	subeq	r2, r2, #4
+	tst	r0, #0x00c00000
+	moveq	r0, r0, lsl #2
+	subeq	r2, r2, #2
+	cmp	r0, #0x00800000
+	movcc	r0, r0, lsl #1
+	sbcs	r2, r2, #0
+
+#else
+
+	clz	ip, r0
+	sub	ip, ip, #8
+	subs	r2, r2, ip
+	shift1	lsl, r0, r0, ip
+
+#endif
+
+	@ Final result with sign
+	@ If exponent negative, denormalize result.
+	do_it	ge, et
+	addge	r0, r0, r2, lsl #23
+	rsblt	r2, r2, #0
+	orrge	r0, r0, r3
+#if defined(__thumb2__)
+	do_it	lt, t
+	lsrlt	r0, r0, r2
+	orrlt	r0, r3, r0
+#else
+	orrlt	r0, r3, r0, lsr r2
+#endif
+	RET
+
+	@ Fixup and adjust bit position for denormalized arguments.
+	@ Note that r2 must not remain equal to 0.
+LSYM(Lad_d):
+	teq	r2, #0
+	eor	r1, r1, #0x00800000
+	do_it	eq, te
+	eoreq	r0, r0, #0x00800000
+	addeq	r2, r2, #1
+	subne	r3, r3, #1
+	b	LSYM(Lad_x)
+
+LSYM(Lad_s):
+	mov	r3, r1, lsl #1
+
+	mvns	ip, r2, asr #24
+	do_it	ne
+	COND(mvn,s,ne)	ip, r3, asr #24
+	beq	LSYM(Lad_i)
+
+	teq	r2, r3
+	beq	1f
+
+	@ Result is x + 0.0 = x or 0.0 + y = y.
+	teq	r2, #0
+	do_it	eq
+	moveq	r0, r1
+	RET
+
+1:	teq	r0, r1
+
+	@ Result is x - x = 0.
+	do_it	ne, t
+	movne	r0, #0
+	RETc(ne)
+
+	@ Result is x + x = 2x.
+	tst	r2, #0xff000000
+	bne	2f
+	movs	r0, r0, lsl #1
+	do_it	cs
+	orrcs	r0, r0, #0x80000000
+	RET
+2:	adds	r2, r2, #(2 << 24)
+	do_it	cc, t
+	addcc	r0, r0, #(1 << 23)
+	RETc(cc)
+	and	r3, r0, #0x80000000
+
+	@ Overflow: return INF.
+LSYM(Lad_o):
+	orr	r0, r3, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	@ At least one of r0/r1 is INF/NAN.
+	@   if r0 != INF/NAN: return r1 (which is INF/NAN)
+	@   if r1 != INF/NAN: return r0 (which is INF/NAN)
+	@   if r0 or r1 is NAN: return NAN
+	@   if opposite sign: return NAN
+	@   otherwise return r0 (which is INF or -INF)
+LSYM(Lad_i):
+	mvns	r2, r2, asr #24
+	do_it	ne, et
+	movne	r0, r1
+	COND(mvn,s,eq)	r3, r3, asr #24
+	movne	r1, r0
+	movs	r2, r0, lsl #9
+	do_it	eq, te
+	COND(mov,s,eq)	r3, r1, lsl #9
+	teqeq	r0, r1
+	orrne	r0, r0, #0x00400000	@ quiet NAN
+	RET
+
+	FUNC_END aeabi_frsub
+	FUNC_END aeabi_fadd
+	FUNC_END addsf3
+	FUNC_END aeabi_fsub
+	FUNC_END subsf3
+
+ARM_FUNC_START floatunsisf
+ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
+		
+	mov	r3, #0
+	b	1f
+
+ARM_FUNC_START floatsisf
+ARM_FUNC_ALIAS aeabi_i2f floatsisf
+	
+	ands	r3, r0, #0x80000000
+	do_it	mi
+	rsbmi	r0, r0, #0
+
+1:	movs	ip, r0
+	do_it	eq
+	RETc(eq)
+
+	@ Add initial exponent to sign
+	orr	r3, r3, #((127 + 23) << 23)
+
+	.ifnc	ah, r0
+	mov	ah, r0
+	.endif
+	mov	al, #0
+	b	2f
+
+	FUNC_END aeabi_i2f
+	FUNC_END floatsisf
+	FUNC_END aeabi_ui2f
+	FUNC_END floatunsisf
+
+ARM_FUNC_START floatundisf
+ARM_FUNC_ALIAS aeabi_ul2f floatundisf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqs	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+	mov	r3, #0
+	b	1f
+
+ARM_FUNC_START floatdisf
+ARM_FUNC_ALIAS aeabi_l2f floatdisf
+
+	orrs	r2, r0, r1
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	do_it	eq, t
+	mvfeqs	f0, #0.0
+#else
+	do_it	eq
+#endif
+	RETc(eq)
+
+	ands	r3, ah, #0x80000000	@ sign bit in r3
+	bpl	1f
+#if defined(__thumb2__)
+	negs	al, al
+	sbc	ah, ah, ah, lsl #1
+#else
+	rsbs	al, al, #0
+	rsc	ah, ah, #0
+#endif
+1:
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+	@ For hard FPA code we want to return via the tail below so that
+	@ we can return the result in f0 as well as in r0 for backwards
+	@ compatibility.
+	str	lr, [sp, #-8]!
+	adr	lr, LSYM(f0_ret)
+#endif
+
+	movs	ip, ah
+	do_it	eq, tt
+	moveq	ip, al
+	moveq	ah, al
+	moveq	al, #0
+
+	@ Add initial exponent to sign
+	orr	r3, r3, #((127 + 23 + 32) << 23)
+	do_it	eq
+	subeq	r3, r3, #(32 << 23)
+2:	sub	r3, r3, #(1 << 23)
+
+#if __ARM_ARCH__ < 5
+
+	mov	r2, #23
+	cmp	ip, #(1 << 16)
+	do_it	hs, t
+	movhs	ip, ip, lsr #16
+	subhs	r2, r2, #16
+	cmp	ip, #(1 << 8)
+	do_it	hs, t
+	movhs	ip, ip, lsr #8
+	subhs	r2, r2, #8
+	cmp	ip, #(1 << 4)
+	do_it	hs, t
+	movhs	ip, ip, lsr #4
+	subhs	r2, r2, #4
+	cmp	ip, #(1 << 2)
+	do_it	hs, e
+	subhs	r2, r2, #2
+	sublo	r2, r2, ip, lsr #1
+	subs	r2, r2, ip, lsr #3
+
+#else
+
+	clz	r2, ip
+	subs	r2, r2, #8
+
+#endif
+
+	sub	r3, r3, r2, lsl #23
+	blt	3f
+
+	shiftop add r3 r3 ah lsl r2 ip
+	shift1	lsl, ip, al, r2
+	rsb	r2, r2, #32
+	cmp	ip, #0x80000000
+	shiftop adc r0 r3 al lsr r2 r2
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+3:	add	r2, r2, #32
+	shift1	lsl, ip, ah, r2
+	rsb	r2, r2, #32
+	orrs	al, al, ip, lsl #1
+	shiftop adc r0 r3 ah lsr r2 r2
+	do_it	eq
+	biceq	r0, r0, ip, lsr #31
+	RET
+
+#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
+
+LSYM(f0_ret):
+	str	r0, [sp, #-4]!
+	ldfs	f0, [sp], #4
+	RETLDM
+
+#endif
+
+	FUNC_END floatdisf
+	FUNC_END aeabi_l2f
+	FUNC_END floatundisf
+	FUNC_END aeabi_ul2f
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_arm_muldivsf3
+
+ARM_FUNC_START mulsf3
+ARM_FUNC_ALIAS aeabi_fmul mulsf3
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	ands	r2, ip, r0, lsr #23
+	do_it	ne, tt
+	COND(and,s,ne)	r3, ip, r1, lsr #23
+	teqne	r2, ip
+	teqne	r3, ip
+	beq	LSYM(Lml_s)
+LSYM(Lml_x):
+
+	@ Add exponents together
+	add	r2, r2, r3
+
+	@ Determine final sign.
+	eor	ip, r0, r1
+
+	@ Convert mantissa to unsigned integer.
+	@ If power of two, branch to a separate path.
+	@ Make up for final alignment.
+	movs	r0, r0, lsl #9
+	do_it	ne
+	COND(mov,s,ne)	r1, r1, lsl #9
+	beq	LSYM(Lml_1)
+	mov	r3, #0x08000000
+	orr	r0, r3, r0, lsr #5
+	orr	r1, r3, r1, lsr #5
+
+#if __ARM_ARCH__ < 4
+
+	@ Put sign bit in r3, which will be restored into r0 later.
+	and	r3, ip, #0x80000000
+
+	@ Well, no way to make it shorter without the umull instruction.
+	do_push	{r3, r4, r5}
+	mov	r4, r0, lsr #16
+	mov	r5, r1, lsr #16
+	bic	r0, r0, r4, lsl #16
+	bic	r1, r1, r5, lsl #16
+	mul	ip, r4, r5
+	mul	r3, r0, r1
+	mul	r0, r5, r0
+	mla	r0, r4, r1, r0
+	adds	r3, r3, r0, lsl #16
+	adc	r1, ip, r0, lsr #16
+	do_pop	{r0, r4, r5}
+
+#else
+
+	@ The actual multiplication.
+	umull	r3, r1, r0, r1
+
+	@ Put final sign in r0.
+	and	r0, ip, #0x80000000
+
+#endif
+
+	@ Adjust result upon the MSB position.
+	cmp	r1, #(1 << 23)
+	do_it	cc, tt
+	movcc	r1, r1, lsl #1
+	orrcc	r1, r1, r3, lsr #31
+	movcc	r3, r3, lsl #1
+
+	@ Add sign to result.
+	orr	r0, r0, r1
+
+	@ Apply exponent bias, check for under/overflow.
+	sbc	r2, r2, #127
+	cmp	r2, #(254 - 1)
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	cmp	r3, #0x80000000
+	adc	r0, r0, r2, lsl #23
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+	@ Multiplication by 0x1p*: let''s shortcut a lot of code.
+LSYM(Lml_1):
+	teq	r0, #0
+	and	ip, ip, #0x80000000
+	do_it	eq
+	moveq	r1, r1, lsl #9
+	orr	r0, ip, r0, lsr #9
+	orr	r0, r0, r1, lsr #9
+	subs	r2, r2, #127
+	do_it	gt, tt
+	COND(rsb,s,gt)	r3, r2, #255
+	orrgt	r0, r0, r2, lsl #23
+	RETc(gt)
+
+	@ Under/overflow: fix things up for the code below.
+	orr	r0, r0, #0x00800000
+	mov	r3, #0
+	subs	r2, r2, #1
+
+LSYM(Lml_u):
+	@ Overflow?
+	bgt	LSYM(Lml_o)
+
+	@ Check if denormalized result is possible, otherwise return signed 0.
+	cmn	r2, #(24 + 1)
+	do_it	le, t
+	bicle	r0, r0, #0x7fffffff
+	RETc(le)
+
+	@ Shift value right, round, etc.
+	rsb	r2, r2, #0
+	movs	r1, r0, lsl #1
+	shift1	lsr, r1, r1, r2
+	rsb	r2, r2, #32
+	shift1	lsl, ip, r0, r2
+	movs	r0, r1, rrx
+	adc	r0, r0, #0
+	orrs	r3, r3, ip, lsl #1
+	do_it	eq
+	biceq	r0, r0, ip, lsr #31
+	RET
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Lml_d):
+	teq	r2, #0
+	and	ip, r0, #0x80000000
+1:	do_it	eq, tt
+	moveq	r0, r0, lsl #1
+	tsteq	r0, #0x00800000
+	subeq	r2, r2, #1
+	beq	1b
+	orr	r0, r0, ip
+	teq	r3, #0
+	and	ip, r1, #0x80000000
+2:	do_it	eq, tt
+	moveq	r1, r1, lsl #1
+	tsteq	r1, #0x00800000
+	subeq	r3, r3, #1
+	beq	2b
+	orr	r1, r1, ip
+	b	LSYM(Lml_x)
+
+LSYM(Lml_s):
+	@ Isolate the INF and NAN cases away
+	and	r3, ip, r1, lsr #23
+	teq	r2, ip
+	do_it	ne
+	teqne	r3, ip
+	beq	1f
+
+	@ Here, one or more arguments are either denormalized or zero.
+	bics	ip, r0, #0x80000000
+	do_it	ne
+	COND(bic,s,ne)	ip, r1, #0x80000000
+	bne	LSYM(Lml_d)
+
+	@ Result is 0, but determine sign anyway.
+LSYM(Lml_z):
+	eor	r0, r0, r1
+	bic	r0, r0, #0x7fffffff
+	RET
+
+1:	@ One or both args are INF or NAN.
+	teq	r0, #0x0
+	do_it	ne, ett
+	teqne	r0, #0x80000000
+	moveq	r0, r1
+	teqne	r1, #0x0
+	teqne	r1, #0x80000000
+	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	teq	r2, ip
+	bne	1f
+	movs	r2, r0, lsl #9
+	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+1:	teq	r3, ip
+	bne	LSYM(Lml_i)
+	movs	r3, r1, lsl #9
+	do_it	ne
+	movne	r0, r1
+	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+
+	@ Result is INF, but we need to determine its sign.
+LSYM(Lml_i):
+	eor	r0, r0, r1
+
+	@ Overflow: return INF (sign already in r0).
+LSYM(Lml_o):
+	and	r0, r0, #0x80000000
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00800000
+	RET
+
+	@ Return a quiet NAN.
+LSYM(Lml_n):
+	orr	r0, r0, #0x7f000000
+	orr	r0, r0, #0x00c00000
+	RET
+
+	FUNC_END aeabi_fmul
+	FUNC_END mulsf3
+
+ARM_FUNC_START divsf3
+ARM_FUNC_ALIAS aeabi_fdiv divsf3
+
+	@ Mask out exponents, trap any zero/denormal/INF/NAN.
+	mov	ip, #0xff
+	ands	r2, ip, r0, lsr #23
+	do_it	ne, tt
+	COND(and,s,ne)	r3, ip, r1, lsr #23
+	teqne	r2, ip
+	teqne	r3, ip
+	beq	LSYM(Ldv_s)
+LSYM(Ldv_x):
+
+	@ Substract divisor exponent from dividend''s
+	sub	r2, r2, r3
+
+	@ Preserve final sign into ip.
+	eor	ip, r0, r1
+
+	@ Convert mantissa to unsigned integer.
+	@ Dividend -> r3, divisor -> r1.
+	movs	r1, r1, lsl #9
+	mov	r0, r0, lsl #9
+	beq	LSYM(Ldv_1)
+	mov	r3, #0x10000000
+	orr	r1, r3, r1, lsr #4
+	orr	r3, r3, r0, lsr #4
+
+	@ Initialize r0 (result) with final sign bit.
+	and	r0, ip, #0x80000000
+
+	@ Ensure result will land to known bit position.
+	@ Apply exponent bias accordingly.
+	cmp	r3, r1
+	do_it	cc
+	movcc	r3, r3, lsl #1
+	adc	r2, r2, #(127 - 2)
+
+	@ The actual division loop.
+	mov	ip, #0x00800000
+1:	cmp	r3, r1
+	do_it	cs, t
+	subcs	r3, r3, r1
+	orrcs	r0, r0, ip
+	cmp	r3, r1, lsr #1
+	do_it	cs, t
+	subcs	r3, r3, r1, lsr #1
+	orrcs	r0, r0, ip, lsr #1
+	cmp	r3, r1, lsr #2
+	do_it	cs, t
+	subcs	r3, r3, r1, lsr #2
+	orrcs	r0, r0, ip, lsr #2
+	cmp	r3, r1, lsr #3
+	do_it	cs, t
+	subcs	r3, r3, r1, lsr #3
+	orrcs	r0, r0, ip, lsr #3
+	movs	r3, r3, lsl #4
+	do_it	ne
+	COND(mov,s,ne)	ip, ip, lsr #4
+	bne	1b
+
+	@ Check exponent for under/overflow.
+	cmp	r2, #(254 - 1)
+	bhi	LSYM(Lml_u)
+
+	@ Round the result, merge final exponent.
+	cmp	r3, r1
+	adc	r0, r0, r2, lsl #23
+	do_it	eq
+	biceq	r0, r0, #1
+	RET
+
+	@ Division by 0x1p*: let''s shortcut a lot of code.
+LSYM(Ldv_1):
+	and	ip, ip, #0x80000000
+	orr	r0, ip, r0, lsr #9
+	adds	r2, r2, #127
+	do_it	gt, tt
+	COND(rsb,s,gt)	r3, r2, #255
+	orrgt	r0, r0, r2, lsl #23
+	RETc(gt)
+
+	orr	r0, r0, #0x00800000
+	mov	r3, #0
+	subs	r2, r2, #1
+	b	LSYM(Lml_u)
+
+	@ One or both arguments are denormalized.
+	@ Scale them leftwards and preserve sign bit.
+LSYM(Ldv_d):
+	teq	r2, #0
+	and	ip, r0, #0x80000000
+1:	do_it	eq, tt
+	moveq	r0, r0, lsl #1
+	tsteq	r0, #0x00800000
+	subeq	r2, r2, #1
+	beq	1b
+	orr	r0, r0, ip
+	teq	r3, #0
+	and	ip, r1, #0x80000000
+2:	do_it	eq, tt
+	moveq	r1, r1, lsl #1
+	tsteq	r1, #0x00800000
+	subeq	r3, r3, #1
+	beq	2b
+	orr	r1, r1, ip
+	b	LSYM(Ldv_x)
+
+	@ One or both arguments are either INF, NAN, zero or denormalized.
+LSYM(Ldv_s):
+	and	r3, ip, r1, lsr #23
+	teq	r2, ip
+	bne	1f
+	movs	r2, r0, lsl #9
+	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	teq	r3, ip
+	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	mov	r0, r1
+	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+1:	teq	r3, ip
+	bne	2f
+	movs	r3, r1, lsl #9
+	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	mov	r0, r1
+	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+2:	@ If both are nonzero, we need to normalize and resume above.
+	bics	ip, r0, #0x80000000
+	do_it	ne
+	COND(bic,s,ne)	ip, r1, #0x80000000
+	bne	LSYM(Ldv_d)
+	@ One or both arguments are zero.
+	bics	r2, r0, #0x80000000
+	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bics	r3, r1, #0x80000000
+	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
+	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+
+	FUNC_END aeabi_fdiv
+	FUNC_END divsf3
+
+#endif /* L_muldivsf3 */
+
+#ifdef L_arm_cmpsf2
+
+	@ The return value in r0 is
+	@
+	@   0  if the operands are equal
+	@   1  if the first operand is greater than the second, or
+	@      the operands are unordered and the operation is
+	@      CMP, LT, LE, NE, or EQ.
+	@   -1 if the first operand is less than the second, or
+	@      the operands are unordered and the operation is GT
+	@      or GE.
+	@
+	@ The Z flag will be set iff the operands are equal.
+	@
+	@ The following registers are clobbered by this function:
+	@   ip, r0, r1, r2, r3
+
+ARM_FUNC_START gtsf2
+ARM_FUNC_ALIAS gesf2 gtsf2
+	mov	ip, #-1
+	b	1f
+
+ARM_FUNC_START ltsf2
+ARM_FUNC_ALIAS lesf2 ltsf2
+	mov	ip, #1
+	b	1f
+
+ARM_FUNC_START cmpsf2
+ARM_FUNC_ALIAS nesf2 cmpsf2
+ARM_FUNC_ALIAS eqsf2 cmpsf2
+	mov	ip, #1			@ how should we specify unordered here?
+
+1:	str	ip, [sp, #-4]!
+
+	@ Trap any INF/NAN first.
+	mov	r2, r0, lsl #1
+	mov	r3, r1, lsl #1
+	mvns	ip, r2, asr #24
+	do_it	ne
+	COND(mvn,s,ne)	ip, r3, asr #24
+	beq	3f
+
+	@ Compare values.
+	@ Note that 0.0 is equal to -0.0.
+2:	add	sp, sp, #4
+	orrs	ip, r2, r3, lsr #1	@ test if both are 0, clear C flag
+	do_it	ne
+	teqne	r0, r1			@ if not 0 compare sign
+	do_it	pl
+	COND(sub,s,pl)	r0, r2, r3		@ if same sign compare values, set r0
+
+	@ Result:
+	do_it	hi
+	movhi	r0, r1, asr #31
+	do_it	lo
+	mvnlo	r0, r1, asr #31
+	do_it	ne
+	orrne	r0, r0, #1
+	RET
+
+	@ Look for a NAN. 
+3:	mvns	ip, r2, asr #24
+	bne	4f
+	movs	ip, r0, lsl #9
+	bne	5f			@ r0 is NAN
+4:	mvns	ip, r3, asr #24
+	bne	2b
+	movs	ip, r1, lsl #9
+	beq	2b			@ r1 is not NAN
+5:	ldr	r0, [sp], #4		@ return unordered code.
+	RET
+
+	FUNC_END gesf2
+	FUNC_END gtsf2
+	FUNC_END lesf2
+	FUNC_END ltsf2
+	FUNC_END nesf2
+	FUNC_END eqsf2
+	FUNC_END cmpsf2
+
+ARM_FUNC_START aeabi_cfrcmple
+
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, ip
+	b	6f
+
+ARM_FUNC_START aeabi_cfcmpeq
+ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
+
+	@ The status-returning routines are required to preserve all
+	@ registers except ip, lr, and cpsr.
+6:	do_push	{r0, r1, r2, r3, lr}
+	ARM_CALL cmpsf2
+	@ Set the Z flag correctly, and the C flag unconditionally.
+	cmp	r0, #0
+	@ Clear the C flag if the return value was -1, indicating
+	@ that the first operand was smaller than the second.
+	do_it	mi
+	cmnmi	r0, #0
+	RETLDM	"r0, r1, r2, r3"
+
+	FUNC_END aeabi_cfcmple
+	FUNC_END aeabi_cfcmpeq
+	FUNC_END aeabi_cfrcmple
+
+ARM_FUNC_START	aeabi_fcmpeq
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfcmple
+	do_it	eq, e
+	moveq	r0, #1	@ Equal to.
+	movne	r0, #0	@ Less than, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmpeq
+
+ARM_FUNC_START	aeabi_fcmplt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Less than.
+	movcs	r0, #0	@ Equal to, greater than, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmplt
+
+ARM_FUNC_START	aeabi_fcmple
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfcmple
+	do_it	ls, e
+	movls	r0, #1  @ Less than or equal to.
+	movhi	r0, #0	@ Greater than or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmple
+
+ARM_FUNC_START	aeabi_fcmpge
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfrcmple
+	do_it	ls, e
+	movls	r0, #1	@ Operand 2 is less than or equal to operand 1.
+	movhi	r0, #0	@ Operand 2 greater than operand 1, or unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmpge
+
+ARM_FUNC_START	aeabi_fcmpgt
+
+	str	lr, [sp, #-8]!
+	ARM_CALL aeabi_cfrcmple
+	do_it	cc, e
+	movcc	r0, #1	@ Operand 2 is less than operand 1.
+	movcs	r0, #0  @ Operand 2 is greater than or equal to operand 1,
+			@ or they are unordered.
+	RETLDM
+
+	FUNC_END aeabi_fcmpgt
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_arm_unordsf2
+
+ARM_FUNC_START unordsf2
+ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
+
+	mov	r2, r0, lsl #1
+	mov	r3, r1, lsl #1
+	mvns	ip, r2, asr #24
+	bne	1f
+	movs	ip, r0, lsl #9
+	bne	3f			@ r0 is NAN
+1:	mvns	ip, r3, asr #24
+	bne	2f
+	movs	ip, r1, lsl #9
+	bne	3f			@ r1 is NAN
+2:	mov	r0, #0			@ arguments are ordered.
+	RET
+3:	mov	r0, #1			@ arguments are unordered.
+	RET
+
+	FUNC_END aeabi_fcmpun
+	FUNC_END unordsf2
+
+#endif /* L_unordsf2 */
+
+#ifdef L_arm_fixsfsi
+
+ARM_FUNC_START fixsfsi
+ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
+
+	@ check exponent range.
+	mov	r2, r0, lsl #1
+	cmp	r2, #(127 << 24)
+	bcc	1f			@ value is too small
+	mov	r3, #(127 + 31)
+	subs	r2, r3, r2, lsr #24
+	bls	2f			@ value is too large
+
+	@ scale value
+	mov	r3, r0, lsl #8
+	orr	r3, r3, #0x80000000
+	tst	r0, #0x80000000		@ the sign bit
+	shift1	lsr, r0, r3, r2
+	do_it	ne
+	rsbne	r0, r0, #0
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	cmp	r2, #(127 + 31 - 0xff)
+	bne	3f
+	movs	r2, r0, lsl #9
+	bne	4f			@ r0 is NAN.
+3:	ands	r0, r0, #0x80000000	@ the sign bit
+	do_it	eq
+	moveq	r0, #0x7fffffff		@ the maximum signed positive si
+	RET
+
+4:	mov	r0, #0			@ What should we convert NAN to?
+	RET
+
+	FUNC_END aeabi_f2iz
+	FUNC_END fixsfsi
+
+#endif /* L_fixsfsi */
+
+#ifdef L_arm_fixunssfsi
+
+ARM_FUNC_START fixunssfsi
+ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
+
+	@ check exponent range.
+	movs	r2, r0, lsl #1
+	bcs	1f			@ value is negative
+	cmp	r2, #(127 << 24)
+	bcc	1f			@ value is too small
+	mov	r3, #(127 + 31)
+	subs	r2, r3, r2, lsr #24
+	bmi	2f			@ value is too large
+
+	@ scale the value
+	mov	r3, r0, lsl #8
+	orr	r3, r3, #0x80000000
+	shift1	lsr, r0, r3, r2
+	RET
+
+1:	mov	r0, #0
+	RET
+
+2:	cmp	r2, #(127 + 31 - 0xff)
+	bne	3f
+	movs	r2, r0, lsl #9
+	bne	4f			@ r0 is NAN.
+3:	mov	r0, #0xffffffff		@ maximum unsigned si
+	RET
+
+4:	mov	r0, #0			@ What should we convert NAN to?
+	RET
+
+	FUNC_END aeabi_f2uiz
+	FUNC_END fixunssfsi
+
+#endif /* L_fixunssfsi */
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
new file mode 100644
index 000000000..887c962ba
--- /dev/null
+++ b/gcc/config/arm/iterators.md
@@ -0,0 +1,405 @@
+;; Code and mode itertator and attribute definitions for the ARM backend
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;----------------------------------------------------------------------------
+;; Mode iterators
+;;----------------------------------------------------------------------------
+
+;; A list of modes that are exactly 64 bits in size. This is used to expand
+;; some splits that are the same for all modes when operating on ARM 
+;; registers.
+(define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF])
+
+(define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF])
+
+;; A list of integer modes that are up to one word long
+(define_mode_iterator QHSI [QI HI SI])
+
+;; Integer element sizes implemented by IWMMXT.
+(define_mode_iterator VMMX [V2SI V4HI V8QI])
+
+;; Integer element sizes for shifts.
+(define_mode_iterator VSHFT [V4HI V2SI DI])
+
+;; Integer and float modes supported by Neon and IWMMXT.
+(define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
+
+;; Integer and float modes supported by Neon and IWMMXT, except V2DI.
+(define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
+
+;; Integer modes supported by Neon and IWMMXT
+(define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI])
+
+;; Integer modes supported by Neon and IWMMXT, except V2DI
+(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
+
+;; Double-width vector modes.
+(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
+
+;; Double-width vector modes plus 64-bit elements.
+(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
+
+;; Double-width vector modes without floating-point elements.
+(define_mode_iterator VDI [V8QI V4HI V2SI])
+
+;; Quad-width vector modes.
+(define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
+
+;; Quad-width vector modes plus 64-bit elements.
+(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
+
+;; Quad-width vector modes without floating-point elements.
+(define_mode_iterator VQI [V16QI V8HI V4SI])
+
+;; Quad-width vector modes, with TImode added, for moves.
+(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI])
+
+;; Opaque structure types wider than TImode.
+(define_mode_iterator VSTRUCT [EI OI CI XI])
+
+;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
+(define_mode_iterator VTAB [TI EI OI])
+
+;; Widenable modes.
+(define_mode_iterator VW [V8QI V4HI V2SI])
+
+;; Narrowable modes.
+(define_mode_iterator VN [V8HI V4SI V2DI])
+
+;; All supported vector modes (except singleton DImode).
+(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI])
+
+;; All supported vector modes (except those with 64-bit integer elements).
+(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
+
+;; Supported integer vector modes (not 64 bit elements).
+(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Supported integer vector modes (not singleton DI)
+(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
+
+;; Vector modes, including 64-bit integer elements.
+(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI])
+
+;; Vector modes including 64-bit integer elements, but no floats.
+(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
+
+;; Vector modes for float->int conversions.
+(define_mode_iterator VCVTF [V2SF V4SF])
+
+;; Vector modes form int->float conversions.
+(define_mode_iterator VCVTI [V2SI V4SI])
+
+;; Vector modes for doubleword multiply-accumulate, etc. insns.
+(define_mode_iterator VMD [V4HI V2SI V2SF])
+
+;; Vector modes for quadword multiply-accumulate, etc. insns.
+(define_mode_iterator VMQ [V8HI V4SI V4SF])
+
+;; Above modes combined.
+(define_mode_iterator VMDQ [V4HI V2SI V2SF V8HI V4SI V4SF])
+
+;; As VMD, but integer modes only.
+(define_mode_iterator VMDI [V4HI V2SI])
+
+;; As VMQ, but integer modes only.
+(define_mode_iterator VMQI [V8HI V4SI])
+
+;; Above modes combined.
+(define_mode_iterator VMDQI [V4HI V2SI V8HI V4SI])
+
+;; Modes with 8-bit and 16-bit elements.
+(define_mode_iterator VX [V8QI V4HI V16QI V8HI])
+
+;; Modes with 8-bit elements.
+(define_mode_iterator VE [V8QI V16QI])
+
+;; Modes with 64-bit elements only.
+(define_mode_iterator V64 [DI V2DI])
+
+;; Modes with 32-bit elements only.
+(define_mode_iterator V32 [V2SI V2SF V4SI V4SF])
+
+;; Modes with 8-bit, 16-bit and 32-bit elements.
+(define_mode_iterator VU [V16QI V8HI V4SI])
+ 
+;;----------------------------------------------------------------------------
+;; Code iterators
+;;----------------------------------------------------------------------------
+
+;; A list of condition codes used in compare instructions where 
+;; the carry flag from the addition is used instead of doing the 
+;; compare a second time.
+(define_code_iterator LTUGEU [ltu geu])
+
+;; A list of ...
+(define_code_iterator ior_xor [ior xor])
+
+;; Operations on two halves of a quadword vector.
+(define_code_iterator vqh_ops [plus smin smax umin umax])
+
+;; Operations on two halves of a quadword vector,
+;; without unsigned variants (for use with *SFmode pattern).
+(define_code_iterator vqhs_ops [plus smin smax])
+
+;; A list of widening operators
+(define_code_iterator SE [sign_extend zero_extend])
+
+;;----------------------------------------------------------------------------
+;; Mode attributes
+;;----------------------------------------------------------------------------
+
+;; Determine element size suffix from vector mode.
+(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
+
+;; vtbl<n> suffix for NEON vector modes.
+(define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
+
+;; (Opposite) mode to convert to/from for NEON mode conversions.
+(define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI")
+               (V4SI "V4SF") (V4SF "V4SI")])
+
+;; Define element mode for each vector mode.
+(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
+              (V4HI "HI") (V8HI "HI")
+                          (V2SI "SI") (V4SI "SI")
+                          (V2SF "SF") (V4SF "SF")
+                          (DI "DI")   (V2DI "DI")])
+
+;; Element modes for vector extraction, padded up to register size.
+
+(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI")
+             (V4HI "SI") (V8HI "SI")
+             (V2SI "SI") (V4SI "SI")
+             (V2SF "SF") (V4SF "SF")
+             (DI "DI") (V2DI "DI")])
+
+;; Mode of pair of elements for each vector mode, to define transfer
+;; size for structure lane/dup loads and stores.
+(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
+                  (V4HI "SI") (V8HI "SI")
+                              (V2SI "V2SI") (V4SI "V2SI")
+                              (V2SF "V2SF") (V4SF "V2SF")
+                              (DI "V2DI")   (V2DI "V2DI")])
+
+;; Similar, for three elements.
+;; ??? Should we define extra modes so that sizes of all three-element
+;; accesses can be accurately represented?
+(define_mode_attr V_three_elem [(V8QI "SI")   (V16QI "SI")
+                    (V4HI "V4HI") (V8HI "V4HI")
+                                (V2SI "V4SI") (V4SI "V4SI")
+                                (V2SF "V4SF") (V4SF "V4SF")
+                                (DI "EI")     (V2DI "EI")])
+
+;; Similar, for four elements.
+(define_mode_attr V_four_elem [(V8QI "SI")   (V16QI "SI")
+                   (V4HI "V4HI") (V8HI "V4HI")
+                               (V2SI "V4SI") (V4SI "V4SI")
+                               (V2SF "V4SF") (V4SF "V4SF")
+                               (DI "OI")     (V2DI "OI")])
+
+;; Register width from element mode
+(define_mode_attr V_reg [(V8QI "P") (V16QI "q")
+                         (V4HI "P") (V8HI  "q")
+                         (V2SI "P") (V4SI  "q")
+                         (V2SF "P") (V4SF  "q")
+                         (DI   "P") (V2DI  "q")])
+
+;; Wider modes with the same number of elements.
+(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
+
+;; Narrower modes with the same number of elements.
+(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")])
+
+;; Narrower modes with double the number of elements.
+(define_mode_attr V_narrow_pack [(V4SI "V8HI") (V8HI "V16QI") (V2DI "V4SI")
+				 (V4HI "V8QI") (V2SI "V4HI")  (DI "V2SI")])
+
+;; Modes with half the number of equal-sized elements.
+(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
+              (V4SI  "V2SI") (V4SF "V2SF") (V2DF "DF")
+                          (V2DI "DI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi")
+              (V4SI  "v2si") (V4SF "v2sf")
+                          (V2DI "di")])
+
+;; Modes with twice the number of equal-sized elements.
+(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
+                (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
+                            (DI "V2DI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
+                (V2SI "v4si") (V2SF "v4sf")
+                            (DI "v2di")])
+
+;; Modes with double-width elements.
+(define_mode_attr V_double_width [(V8QI "V4HI") (V16QI "V8HI")
+                  (V4HI "V2SI") (V8HI "V4SI")
+                  (V2SI "DI")   (V4SI "V2DI")])
+
+;; Double-sized modes with the same element size.
+;; Used for neon_vdup_lane, where the second operand is double-sized
+;; even when the first one is quad.
+(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
+                                        (V4SI "V2SI") (V4SF "V2SF")
+                                        (V8QI "V8QI") (V4HI "V4HI")
+                                        (V2SI "V2SI") (V2SF "V2SF")])
+
+;; Mode of result of comparison operations (and bit-select operand 1).
+(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
+                    (V4HI "V4HI") (V8HI  "V8HI")
+                                (V2SI "V2SI") (V4SI  "V4SI")
+                                (V2SF "V2SI") (V4SF  "V4SI")
+                                (DI   "DI")   (V2DI  "V2DI")])
+
+;; Get element type from double-width mode, for operations where we 
+;; don't care about signedness.
+(define_mode_attr V_if_elem [(V8QI "i8")  (V16QI "i8")
+                 (V4HI "i16") (V8HI  "i16")
+                             (V2SI "i32") (V4SI  "i32")
+                             (DI   "i64") (V2DI  "i64")
+                 (V2SF "f32") (V4SF  "f32")])
+
+;; Same, but for operations which work on signed values.
+(define_mode_attr V_s_elem [(V8QI "s8")  (V16QI "s8")
+                (V4HI "s16") (V8HI  "s16")
+                            (V2SI "s32") (V4SI  "s32")
+                            (DI   "s64") (V2DI  "s64")
+                (V2SF "f32") (V4SF  "f32")])
+
+;; Same, but for operations which work on unsigned values.
+(define_mode_attr V_u_elem [(V8QI "u8")  (V16QI "u8")
+                (V4HI "u16") (V8HI  "u16")
+                            (V2SI "u32") (V4SI  "u32")
+                            (DI   "u64") (V2DI  "u64")
+                            (V2SF "f32") (V4SF  "f32")])
+
+;; Element types for extraction of unsigned scalars.
+(define_mode_attr V_uf_sclr [(V8QI "u8")  (V16QI "u8")
+                 (V4HI "u16") (V8HI "u16")
+                             (V2SI "32") (V4SI "32")
+                             (V2SF "32") (V4SF "32")])
+
+(define_mode_attr V_sz_elem [(V8QI "8")  (V16QI "8")
+                 (V4HI "16") (V8HI  "16")
+                             (V2SI "32") (V4SI  "32")
+                             (DI   "64") (V2DI  "64")
+                 (V2SF "32") (V4SF  "32")])
+
+;; Element sizes for duplicating ARM registers to all elements of a vector.
+(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")])
+
+;; Opaque integer types for results of pair-forming intrinsics (vtrn, etc.)
+(define_mode_attr V_PAIR [(V8QI "TI") (V16QI "OI")
+              (V4HI "TI") (V8HI  "OI")
+                          (V2SI "TI") (V4SI  "OI")
+                          (V2SF "TI") (V4SF  "OI")
+                          (DI   "TI") (V2DI  "OI")])
+
+;; Same, but lower-case.
+(define_mode_attr V_pair [(V8QI "ti") (V16QI "oi")
+              (V4HI "ti") (V8HI  "oi")
+                          (V2SI "ti") (V4SI  "oi")
+                          (V2SF "ti") (V4SF  "oi")
+                          (DI   "ti") (V2DI  "oi")])
+
+;; Extra suffix on some 64-bit insn names (to avoid collision with standard
+;; names which we don't want to define).
+(define_mode_attr V_suf64 [(V8QI "") (V16QI "")
+                           (V4HI "") (V8HI "")
+                           (V2SI "") (V4SI "")
+                           (V2SF "") (V4SF "")
+                           (DI "_neon") (V2DI "")])
+
+
+;; Scalars to be presented to scalar multiplication instructions
+;; must satisfy the following constraints.
+;; 1. If the mode specifies 16-bit elements, the scalar must be in D0-D7.
+;; 2. If the mode specifies 32-bit elements, the scalar must be in D0-D15.
+
+;; This mode attribute is used to obtain the correct register constraints.
+
+(define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t")
+                                         (V8HI "x") (V4SI "t") (V4SF "t")])
+
+;; Predicates used for setting neon_type
+
+(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
+                 (V4HI "false") (V8HI "false")
+                 (V2SI "false") (V4SI "false")
+                 (V2SF "true") (V4SF "true")
+                 (DI "false") (V2DI "false")])
+
+(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
+                   (V4HI "true") (V8HI "true")
+                   (V2SI "false") (V4SI "false")
+                   (V2SF "false") (V4SF "false")
+                   (DI "false") (V2DI "false")])
+
+
+(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
+                            (V4HI "true") (V8HI  "false")
+                            (V2SI "true") (V4SI  "false")
+                            (V2SF "true") (V4SF  "false")
+                            (DI   "true") (V2DI  "false")])
+
+(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+                                 (V4HI "4") (V8HI "8")
+                                 (V2SI "2") (V4SI "4")
+                                 (V2SF "2") (V4SF "4")
+                                 (DI "1")   (V2DI "2")
+                                 (DF "1")   (V2DF "2")])
+
+;; Same as V_widen, but lower-case.
+(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])
+
+;; Widen. Result is half the number of elements, but widened to double-width.
+(define_mode_attr V_unpack   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+
+;; Conditions to be used in extend<mode>di patterns.
+(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
+(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
+				      (QI "&& arm_arch6")])
+(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
+				   (HI "nonimmediate_operand")
+				   (QI "nonimmediate_operand")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+
+;;----------------------------------------------------------------------------
+;; Code attributes
+;;----------------------------------------------------------------------------
+
+;; Assembler mnemonics for vqh_ops and vqhs_ops iterators.
+(define_code_attr VQH_mnem [(plus "vadd") (smin "vmin") (smax "vmax")
+                (umin "vmin") (umax "vmax")])
+
+;; Signs of above, where relevant.
+(define_code_attr VQH_sign [(plus "i") (smin "s") (smax "s") (umin "u")
+                (umax "u")])
+
+(define_code_attr cnb [(ltu "CC_C") (geu "CC")])
+(define_code_attr optab [(ltu "ltu") (geu "geu")])
+
+;; Assembler mnemonics for signedness of widening operations.
+(define_code_attr US [(sign_extend "s") (zero_extend "u")])
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
new file mode 100644
index 000000000..7f13ae49b
--- /dev/null
+++ b/gcc/config/arm/iwmmxt.md
@@ -0,0 +1,1332 @@
+;; ??? This file needs auditing for thumb2
+;; Patterns for the Intel Wireless MMX technology architecture.
+;; Copyright (C) 2003, 2004, 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_insn "iwmmxt_iordi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (ior:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+		(match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wor%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")])
+
+(define_insn "iwmmxt_xordi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (xor:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+		(match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wxor%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")])
+
+(define_insn "iwmmxt_anddi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (and:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+		(match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wand%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")])
+
+(define_insn "iwmmxt_nanddi3"
+  [(set (match_operand:DI                 0 "register_operand" "=y")
+        (and:DI (match_operand:DI         1 "register_operand"  "y")
+		(not:DI (match_operand:DI 2 "register_operand"  "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wandn%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*iwmmxt_arm_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, m,y,y,yr,y,yrUy")
+	(match_operand:DI 1 "di_operand"              "rIK,mi,r,y,yr,y,yrUy,y"))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      return output_move_double (operands);
+    case 0:
+      return \"#\";
+    case 3:
+      return \"wmov%?\\t%0,%1\";
+    case 4:
+      return \"tmcrr%?\\t%0,%Q1,%R1\";
+    case 5:
+      return \"tmrrc%?\\t%Q0,%R0,%1\";
+    case 6:
+      return \"wldrd%?\\t%0,%1\";
+    case 7:
+      return \"wstrd%?\\t%1,%0\";
+    }
+}"
+  [(set_attr "length"         "8,8,8,4,4,4,4,4")
+   (set_attr "type"           "*,load1,store2,*,*,*,*,*")
+   (set_attr "pool_range"     "*,1020,*,*,*,*,*,*")
+   (set_attr "neg_pool_range" "*,1012,*,*,*,*,*,*")]
+)
+
+(define_insn "*iwmmxt_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,rk, m,z,r,?z,Uy,z")
+	(match_operand:SI 1 "general_operand"      "rk, I,K,mi,rk,r,z,Uy,z, z"))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"mov\\t%0, %1\";
+   case 1: return \"mov\\t%0, %1\";
+   case 2: return \"mvn\\t%0, #%B1\";
+   case 3: return \"ldr\\t%0, %1\";
+   case 4: return \"str\\t%1, %0\";
+   case 5: return \"tmcr\\t%0, %1\";
+   case 6: return \"tmrc\\t%0, %1\";
+   case 7: return arm_output_load_gr (operands);
+   case 8: return \"wstrw\\t%1, %0\";
+   default:return \"wstrw\\t%1, [sp, #-4]!\;wldrw\\t%0, [sp], #4\\t@move CG reg\";
+  }"
+  [(set_attr "type"           "*,*,*,load1,store1,*,*,load1,store1,*")
+   (set_attr "length"         "*,*,*,*,        *,*,*,  16,     *,8")
+   (set_attr "pool_range"     "*,*,*,4096,     *,*,*,1024,     *,*")
+   (set_attr "neg_pool_range" "*,*,*,4084,     *,*,*,   *,  1012,*")
+   ;; Note - the "predicable" attribute is not allowed to have alternatives.
+   ;; Since the wSTRw wCx instruction is not predicable, we cannot support
+   ;; predicating any of the alternatives in this template.  Instead,
+   ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn.
+   (set_attr "predicable"     "no")
+   ;; Also - we have to pretend that these insns clobber the condition code
+   ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
+   ;; them.
+   (set_attr "conds" "clob")]
+)
+
+;; Because iwmmxt_movsi_insn is not predicable, we provide the
+;; cond_exec version explicitly, with appropriate constraints.
+
+(define_insn "*cond_iwmmxt_movsi_insn"
+  [(cond_exec
+     (match_operator 2 "arm_comparison_operator"
+      [(match_operand 3 "cc_register" "")
+      (const_int 0)])
+     (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r")
+	  (match_operand:SI 1 "general_operand"      "rI,K,mi,r,r,z")))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"mov%?\\t%0, %1\";
+   case 1: return \"mvn%?\\t%0, #%B1\";
+   case 2: return \"ldr%?\\t%0, %1\";
+   case 3: return \"str%?\\t%1, %0\";
+   case 4: return \"tmcr%?\\t%0, %1\";
+   default: return \"tmrc%?\\t%0, %1\";
+  }"
+  [(set_attr "type"           "*,*,load1,store1,*,*")
+   (set_attr "pool_range"     "*,*,4096,     *,*,*")
+   (set_attr "neg_pool_range" "*,*,4084,     *,*,*")]
+)
+
+(define_insn "mov<mode>_internal"
+  [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m")
+	(match_operand:VMMX 1 "general_operand"       "y,y,mi,y,r,r,mi,r"))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"wmov%?\\t%0, %1\";
+   case 1: return \"wstrd%?\\t%1, %0\";
+   case 2: return \"wldrd%?\\t%0, %1\";
+   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
+   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
+   case 5: return \"#\";
+   default: return output_move_double (operands);
+   }"
+  [(set_attr "predicable" "yes")
+   (set_attr "length"         "4,     4,   4,4,4,8,   8,8")
+   (set_attr "type"           "*,store1,load1,*,*,*,load1,store1")
+   (set_attr "pool_range"     "*,     *, 256,*,*,*, 256,*")
+   (set_attr "neg_pool_range" "*,     *, 244,*,*,*, 244,*")])
+
+;; Vector add/subtract
+
+(define_insn "*add<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (plus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+	           (match_operand:VMMX 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wadd<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ssaddv8qi3"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+		      (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+		      (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ssaddv2si3"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (ss_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+		      (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+		      (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "usaddv4hi3"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+		      (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "usaddv2si3"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (us_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+		      (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*sub<mode>3_iwmmxt"
+  [(set (match_operand:VMMX             0 "register_operand" "=y")
+        (minus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+		    (match_operand:VMMX 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsub<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sssubv8qi3"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+		       (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubbss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sssubv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
+		       (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubbus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ussubv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ussubv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*mulv4hi3_iwmmxt"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		   (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulul%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI                                0 "register_operand" "=y")
+	(truncate:V4HI
+	 (lshiftrt:V4SI
+	  (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+		     (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	  (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulsm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI                                0 "register_operand" "=y")
+	(truncate:V4HI
+	 (lshiftrt:V4SI
+	  (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+		     (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+	  (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulum%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacs"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
+		    (match_operand:V4HI 2 "register_operand" "y")
+		    (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacsz"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
+		    (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacsz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacu"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
+		    (match_operand:V4HI 2 "register_operand" "y")
+		    (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacu%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacuz"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
+		    (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacuz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Same as xordi3, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "iwmmxt_clrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(const_int 0)] UNSPEC_CLRDI))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+;; Seems like cse likes to generate these, so we have to support them.
+
+(define_insn "*iwmmxt_clrv8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (const_vector:V8QI [(const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*iwmmxt_clrv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (const_vector:V4HI [(const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*iwmmxt_clrv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (const_vector:V2SI [(const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+;; Unsigned averages/sum of absolute differences
+
+(define_insn "iwmmxt_uavgrndv8qi3"
+  [(set (match_operand:V8QI              0 "register_operand" "=y")
+        (ashiftrt:V8QI
+	 (plus:V8QI (plus:V8QI
+		     (match_operand:V8QI 1 "register_operand" "y")
+		     (match_operand:V8QI 2 "register_operand" "y"))
+		    (const_vector:V8QI [(const_int 1)
+					(const_int 1)
+					(const_int 1)
+					(const_int 1)
+					(const_int 1)
+					(const_int 1)
+					(const_int 1)
+					(const_int 1)]))
+	 (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2br%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_uavgrndv4hi3"
+  [(set (match_operand:V4HI              0 "register_operand" "=y")
+        (ashiftrt:V4HI
+	 (plus:V4HI (plus:V4HI
+		     (match_operand:V4HI 1 "register_operand" "y")
+		     (match_operand:V4HI 2 "register_operand" "y"))
+		    (const_vector:V4HI [(const_int 1)
+					(const_int 1)
+					(const_int 1)
+					(const_int 1)]))
+	 (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2hr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+
+(define_insn "iwmmxt_uavgv8qi3"
+  [(set (match_operand:V8QI                 0 "register_operand" "=y")
+        (ashiftrt:V8QI (plus:V8QI
+			(match_operand:V8QI 1 "register_operand" "y")
+			(match_operand:V8QI 2 "register_operand" "y"))
+		       (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2b%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_uavgv4hi3"
+  [(set (match_operand:V4HI                 0 "register_operand" "=y")
+        (ashiftrt:V4HI (plus:V4HI
+			(match_operand:V4HI 1 "register_operand" "y")
+			(match_operand:V4HI 2 "register_operand" "y"))
+		       (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2h%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_psadbw"
+  [(set (match_operand:V8QI                       0 "register_operand" "=y")
+        (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
+			      (match_operand:V8QI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "psadbw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+
+;; Insert/extract/shuffle
+
+(define_insn "iwmmxt_tinsrb"
+  [(set (match_operand:V8QI                             0 "register_operand"    "=y")
+        (vec_merge:V8QI (match_operand:V8QI             1 "register_operand"     "0")
+			(vec_duplicate:V8QI
+			 (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r")))
+			(match_operand:SI               3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "tinsrb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tinsrh"
+  [(set (match_operand:V4HI                             0 "register_operand"    "=y")
+        (vec_merge:V4HI (match_operand:V4HI             1 "register_operand"     "0")
+			(vec_duplicate:V4HI
+			 (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r")))
+			(match_operand:SI               3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "tinsrh%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tinsrw"
+  [(set (match_operand:V2SI                 0 "register_operand"    "=y")
+        (vec_merge:V2SI (match_operand:V2SI 1 "register_operand"     "0")
+			(vec_duplicate:V2SI
+			 (match_operand:SI  2 "nonimmediate_operand" "r"))
+			(match_operand:SI   3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "tinsrw%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmub"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y")
+				       (parallel
+					[(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmsb"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (sign_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y")
+				       (parallel
+					[(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmuh"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+				       (parallel
+					[(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmsh"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (sign_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+				       (parallel
+					[(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; There are signed/unsigned variants of this instruction, but they are
+;; pointless.
+(define_insn "iwmmxt_textrmw"
+  [(set (match_operand:SI                           0 "register_operand" "=r")
+        (vec_select:SI (match_operand:V2SI          1 "register_operand" "y")
+		       (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wshufh"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:SI   2 "immediate_operand" "i")] UNSPEC_WSHUFH))]
+  "TARGET_REALLY_IWMMXT"
+  "wshufh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Mask-generating comparisons
+;;
+;; Note - you cannot use patterns like these here:
+;;
+;;   (set (match:<vector>) (<comparator>:<vector> (match:<vector>) (match:<vector>)))
+;;
+;; Because GCC will assume that the truth value (1 or 0) is installed
+;; into the entire destination vector, (with the '1' going into the least
+;; significant element of the vector).  This is not how these instructions
+;; behave.
+;;
+;; Unfortunately the current patterns are illegal.  They are SET insns
+;; without a SET in them.  They work in most cases for ordinary code
+;; generation, but there are circumstances where they can cause gcc to fail.
+;; XXX - FIXME.
+
+(define_insn "eqv8qi3"
+  [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y")
+		     (match_operand:V8QI 1 "register_operand"  "y")
+		     (match_operand:V8QI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_EQ)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "eqv4hi3"
+  [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y")
+		     (match_operand:V4HI 1 "register_operand"  "y")
+		     (match_operand:V4HI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_EQ)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "eqv2si3"
+  [(unspec_volatile:V2SI [(match_operand:V2SI 0 "register_operand" "=y")
+			  (match_operand:V2SI 1 "register_operand"  "y")
+			  (match_operand:V2SI 2 "register_operand"  "y")]
+			 VUNSPEC_WCMP_EQ)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtuv8qi3"
+  [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y")
+		     (match_operand:V8QI 1 "register_operand"  "y")
+		     (match_operand:V8QI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_GTU)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtuv4hi3"
+  [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y")
+		     (match_operand:V4HI 1 "register_operand"  "y")
+		     (match_operand:V4HI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_GTU)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtuv2si3"
+  [(unspec_volatile [(match_operand:V2SI 0 "register_operand" "=y")
+		     (match_operand:V2SI 1 "register_operand"  "y")
+		     (match_operand:V2SI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_GTU)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtuw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtv8qi3"
+  [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y")
+		     (match_operand:V8QI 1 "register_operand"  "y")
+		     (match_operand:V8QI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_GT)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtv4hi3"
+  [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y")
+		     (match_operand:V4HI 1 "register_operand"  "y")
+		     (match_operand:V4HI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_GT)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtv2si3"
+  [(unspec_volatile [(match_operand:V2SI 0 "register_operand" "=y")
+		     (match_operand:V2SI 1 "register_operand"  "y")
+		     (match_operand:V2SI 2 "register_operand"  "y")]
+		    VUNSPEC_WCMP_GT)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Max/min insns
+
+(define_insn "*smax<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (smax:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxs<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*umax<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (umax:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxu<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*smin<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (smin:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmins<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*umin<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (umin:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminu<MMX_char>%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Pack/unpack insns.
+
+(define_insn "iwmmxt_wpackhss"
+  [(set (match_operand:V8QI                    0 "register_operand" "=y")
+	(vec_concat:V8QI
+	 (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
+	 (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackwss"
+  [(set (match_operand:V4HI                    0 "register_operand" "=y")
+	(vec_concat:V4HI
+	 (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
+	 (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackdss"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+	(vec_concat:V2SI
+	 (ss_truncate:SI (match_operand:DI 1 "register_operand" "y"))
+	 (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackdss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackhus"
+  [(set (match_operand:V8QI                    0 "register_operand" "=y")
+	(vec_concat:V8QI
+	 (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
+	 (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackwus"
+  [(set (match_operand:V4HI                    0 "register_operand" "=y")
+	(vec_concat:V4HI
+	 (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
+	 (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackdus"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+	(vec_concat:V2SI
+	 (us_truncate:SI (match_operand:DI 1 "register_operand" "y"))
+	 (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackdus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+
+(define_insn "iwmmxt_wunpckihb"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+	(vec_merge:V8QI
+	 (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
+			  (parallel [(const_int 4)
+				     (const_int 0)
+				     (const_int 5)
+				     (const_int 1)
+				     (const_int 6)
+				     (const_int 2)
+				     (const_int 7)
+				     (const_int 3)]))
+	 (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 4)
+				     (const_int 1)
+				     (const_int 5)
+				     (const_int 2)
+				     (const_int 6)
+				     (const_int 3)
+				     (const_int 7)]))
+	 (const_int 85)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckihh"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(vec_merge:V4HI
+	 (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (const_int 5)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckihw"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(vec_merge:V2SI
+	 (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 1)]))
+	 (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+			  (parallel [(const_int 1)
+				     (const_int 0)]))
+	 (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckilb"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+	(vec_merge:V8QI
+	 (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 4)
+				     (const_int 1)
+				     (const_int 5)
+				     (const_int 2)
+				     (const_int 6)
+				     (const_int 3)
+				     (const_int 7)]))
+	 (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+			  (parallel [(const_int 4)
+				     (const_int 0)
+				     (const_int 5)
+				     (const_int 1)
+				     (const_int 6)
+				     (const_int 2)
+				     (const_int 7)
+				     (const_int 3)]))
+	 (const_int 85)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckilh"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(vec_merge:V4HI
+	 (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (const_int 5)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckilw"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(vec_merge:V2SI
+	 (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
+			   (parallel [(const_int 1)
+				      (const_int 0)]))
+	 (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+			  (parallel [(const_int 0)
+				     (const_int 1)]))
+	 (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehub"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(zero_extend:V4HI
+	 (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+			  (parallel [(const_int 4) (const_int 5)
+				     (const_int 6) (const_int 7)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehub%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehuh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(zero_extend:V2SI
+	 (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+			  (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehuh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehuw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+	(zero_extend:DI
+	 (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+			(parallel [(const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehuw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehsb"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(sign_extend:V4HI
+	 (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+			  (parallel [(const_int 4) (const_int 5)
+				     (const_int 6) (const_int 7)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehsh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(sign_extend:V2SI
+	 (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+			  (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehsw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+	(sign_extend:DI
+	 (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+			(parallel [(const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelub"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(zero_extend:V4HI
+	 (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+			  (parallel [(const_int 0) (const_int 1)
+				     (const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelub%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckeluh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(zero_extend:V2SI
+	 (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+			  (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckeluh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckeluw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+	(zero_extend:DI
+	 (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+			(parallel [(const_int 0)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckeluw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelsb"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(sign_extend:V4HI
+	 (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+			  (parallel [(const_int 0) (const_int 1)
+				     (const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelsh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(sign_extend:V2SI
+	 (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+			  (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelsw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+	(sign_extend:DI
+	 (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+			(parallel [(const_int 0)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+;; Shifts
+
+(define_insn "rorv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (rotatert:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorhg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rorv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (rotatert:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorwg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rordi3"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+	(rotatert:DI (match_operand:DI 1 "register_operand" "y")
+		   (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrordg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashr<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y")
+        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y")
+			(match_operand:SI    2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsra<MMX_char>g%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshr<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y")
+        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y")
+			(match_operand:SI    2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrl<MMX_char>g%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashl<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT               0 "register_operand" "=y")
+        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y")
+		      (match_operand:SI    2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsll<MMX_char>g%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rorv4hi3_di"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (rotatert:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rorv2si3_di"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (rotatert:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rordi3_di"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+	(rotatert:DI (match_operand:DI 1 "register_operand" "y")
+		   (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrord%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrv4hi3_di"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrah%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrv2si3_di"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsraw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrdi3_di"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "y")
+		   (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrad%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrv4hi3_di"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		       (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrlh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrv2si3_di"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrlw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrdi3_di"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "y")
+		     (match_operand:DI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrld%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashlv4hi3_di"
+  [(set (match_operand:V4HI              0 "register_operand" "=y")
+        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "y")
+		     (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsllh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashlv2si3_di"
+  [(set (match_operand:V2SI              0 "register_operand" "=y")
+        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "y")
+		       (match_operand:DI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsllw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashldi3_di"
+  [(set (match_operand:DI            0 "register_operand" "=y")
+	(ashift:DI (match_operand:DI 1 "register_operand" "y")
+		   (match_operand:DI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wslld%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmadds"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMADDS))]
+  "TARGET_REALLY_IWMMXT"
+  "wmadds%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmaddu"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMADDU))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaddu%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmia"
+  [(set (match_operand:DI                    0 "register_operand" "=y")
+	(plus:DI (match_operand:DI           1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			   (match_operand:SI 2 "register_operand" "r"))
+			  (sign_extend:DI
+			   (match_operand:SI 3 "register_operand" "r")))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmia%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiaph"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI 1 "register_operand" "0")
+		 (plus:DI
+		  (mult:DI (sign_extend:DI
+			    (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+			   (sign_extend:DI
+			    (truncate:HI (match_operand:SI 3 "register_operand" "r"))))
+		  (mult:DI (sign_extend:DI
+			    (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16))))
+			   (sign_extend:DI
+			    (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiaph%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiabb"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI 1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			   (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+			  (sign_extend:DI
+			   (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiabb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiatb"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI 1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			   (truncate:HI (ashiftrt:SI
+					 (match_operand:SI 2 "register_operand" "r")
+					 (const_int 16))))
+			  (sign_extend:DI
+			   (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiatb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiabt"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI 1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			   (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+			  (sign_extend:DI
+			   (truncate:HI (ashiftrt:SI
+					 (match_operand:SI 3 "register_operand" "r")
+					 (const_int 16)))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiabt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiatt"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+	(plus:DI (match_operand:DI 1 "register_operand" "0")
+		 (mult:DI (sign_extend:DI
+			   (truncate:HI (ashiftrt:SI
+					 (match_operand:SI 2 "register_operand" "r")
+					 (const_int 16))))
+			  (sign_extend:DI
+			   (truncate:HI (ashiftrt:SI
+					 (match_operand:SI 3 "register_operand" "r")
+					 (const_int 16)))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiatt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tbcstqi"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+	(vec_duplicate:V8QI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcstb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tbcsthi"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+	(vec_duplicate:V4HI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcsth%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tbcstsi"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+	(vec_duplicate:V2SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcstw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmovmskb"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmovmskh"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmovmskw"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_waccb"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "waccb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wacch"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "wacch%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_waccw"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+	(unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "waccw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_walign"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y,y")
+	(subreg:V8QI (ashiftrt:TI
+		      (subreg:TI (vec_concat:V16QI
+				  (match_operand:V8QI 1 "register_operand" "y,y")
+				  (match_operand:V8QI 2 "register_operand" "y,y")) 0)
+		      (mult:SI
+		       (match_operand:SI              3 "nonmemory_operand" "i,z")
+		       (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   waligni%?\\t%0, %1, %2, %3
+   walignr%U3%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmrc"
+  [(set (match_operand:SI                      0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")]
+			    VUNSPEC_TMRC))]
+  "TARGET_REALLY_IWMMXT"
+  "tmrc%?\\t%0, %w1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmcr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+			(match_operand:SI 1 "register_operand"  "r")]
+		       VUNSPEC_TMCR)]
+  "TARGET_REALLY_IWMMXT"
+  "tmcr%?\\t%w0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadb"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSAD))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadh"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSAD))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadbz"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadbz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadhz"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadhz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
new file mode 100644
index 000000000..5db4a3269
--- /dev/null
+++ b/gcc/config/arm/ldmstm.md
@@ -0,0 +1,1191 @@
+/* ARM ldm/stm instruction patterns.  This file was automatically generated
+   using arm-ldmstm.ml.  Please do not edit manually.
+
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+(define_insn "*ldm4_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_operand:SI 5 "s_register_operand" "rk")))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm4_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_operand:SI 5 "s_register_operand" "l")))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")])
+
+(define_insn "*ldm4_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 5)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm4_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&l")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 5)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
+  "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")])
+
+(define_insn "*stm4_ia"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 5 "s_register_operand" "rk"))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(ia%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_stm4_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&l")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
+  "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")])
+
+(define_insn "*ldm4_ib"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 16))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ib%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_ib_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 12))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int 16))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "ldm%(ib%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ib"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int 4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(ib%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ib_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int 16)))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "stm%(ib%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_da"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 5)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(da%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_da_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 5)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "ldm%(da%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_da"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -12)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(da%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_da_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (match_dup 5))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+  "stm%(da%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_db"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+                  (const_int -16))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -12))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(db%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_db_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -16))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -12))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -8))))
+     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 5)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "ldm%(db%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "load4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_db"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -16)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(db%)\t%5, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_db_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 5) (const_int -16)))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -16)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+  "stm%(db%)\t%5!, {%1, %2, %3, %4}"
+  [(set_attr "type" "store4")
+   (set_attr "predicable" "yes")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "memory_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 6 "memory_operand" ""))
+   (set (match_operand:SI 3 "s_register_operand" "")
+        (match_operand:SI 7 "memory_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 4, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "memory_operand" ""))
+   (parallel
+    [(set (match_operand:SI 1 "s_register_operand" "")
+          (match_operand:SI 5 "memory_operand" ""))
+     (set (match_operand:SI 2 "s_register_operand" "")
+          (match_operand:SI 6 "memory_operand" ""))
+     (set (match_operand:SI 3 "s_register_operand" "")
+          (match_operand:SI 7 "memory_operand" ""))])]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 4, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 9 "const_int_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 10 "const_int_operand" ""))
+   (set (match_operand:SI 6 "memory_operand" "")
+        (match_dup 2))
+   (set (match_operand:SI 3 "s_register_operand" "")
+        (match_operand:SI 11 "const_int_operand" ""))
+   (set (match_operand:SI 7 "memory_operand" "")
+        (match_dup 3))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 4))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 9 "const_int_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 10 "const_int_operand" ""))
+   (set (match_operand:SI 3 "s_register_operand" "")
+        (match_operand:SI 11 "const_int_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 6 "memory_operand" "")
+        (match_dup 2))
+   (set (match_operand:SI 7 "memory_operand" "")
+        (match_dup 3))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 4))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 4 "memory_operand" "")
+        (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_operand:SI 1 "s_register_operand" ""))
+   (set (match_operand:SI 6 "memory_operand" "")
+        (match_operand:SI 2 "s_register_operand" ""))
+   (set (match_operand:SI 7 "memory_operand" "")
+        (match_operand:SI 3 "s_register_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_stm_seq (operands, 4))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "*ldm3_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_operand:SI 4 "s_register_operand" "rk")))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm3_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_operand:SI 4 "s_register_operand" "l")))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")])
+
+(define_insn "*ldm3_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 4)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm3_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&l")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 4)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")])
+
+(define_insn "*stm3_ia"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 4 "s_register_operand" "rk"))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(ia%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_stm3_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&l")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+  "stm%(ia%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")])
+
+(define_insn "*ldm3_ib"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 12))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ib%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_ib_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int 12))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(ib%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ib"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int 4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(ib%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ib_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int 12)))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(ib%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_da"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 4)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(da%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_da_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 4)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "ldm%(da%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_da"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -8)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(da%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_da_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (match_dup 4))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+  "stm%(da%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_db"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(db%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_db_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -12))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -8))))
+     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 4)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "ldm%(db%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "load3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_db"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -12)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(db%)\t%4, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_db_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 4) (const_int -12)))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -12)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+  "stm%(db%)\t%4!, {%1, %2, %3}"
+  [(set_attr "type" "store3")
+   (set_attr "predicable" "yes")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 4 "memory_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 5 "memory_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 3, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+    [(set (match_operand:SI 1 "s_register_operand" "")
+          (match_operand:SI 4 "memory_operand" ""))
+     (set (match_operand:SI 2 "s_register_operand" "")
+          (match_operand:SI 5 "memory_operand" ""))])]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 3, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 6 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 7 "const_int_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 2))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 3))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 6 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 7 "const_int_operand" ""))
+   (set (match_operand:SI 2 "s_register_operand" "")
+        (match_operand:SI 8 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_dup 1))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_dup 2))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 3))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 3 "memory_operand" "")
+        (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 4 "memory_operand" "")
+        (match_operand:SI 1 "s_register_operand" ""))
+   (set (match_operand:SI 5 "memory_operand" "")
+        (match_operand:SI 2 "s_register_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_stm_seq (operands, 3))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "*ldm2_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_operand:SI 3 "s_register_operand" "rk")))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "ldm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm2_ia"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_operand:SI 3 "s_register_operand" "l")))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2"
+  "ldm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")])
+
+(define_insn "*ldm2_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 3)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm2_ia_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&l")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 3)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")])
+
+(define_insn "*stm2_ia"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 3 "s_register_operand" "rk"))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "stm%(ia%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_stm2_ia_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&l")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+  "stm%(ia%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")])
+
+(define_insn "*ldm2_ib"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 8))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "ldm%(ib%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_ib_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int 8))))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(ib%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ib"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int 4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "stm%(ib%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ib_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int 8)))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(ib%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_da"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+                  (const_int -4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 3)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "ldm%(da%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_da_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -4))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 3)))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "ldm%(da%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_da"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+  "stm%(da%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_da_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (match_dup 3))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+  "stm%(da%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_db"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "ldm%(db%)\t%3, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_db_update"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -8))))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 3)
+                  (const_int -4))))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "ldm%(db%)\t%3!, {%1, %2}"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_db"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -8)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+  "stm%(db%)\t%3, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_db_update"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+          (plus:SI (match_dup 3) (const_int -8)))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -8)))
+          (match_operand:SI 1 "arm_hard_register_operand" ""))
+     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+  "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+  "stm%(db%)\t%3!, {%1, %2}"
+  [(set_attr "type" "store2")
+   (set_attr "predicable" "yes")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_ldm_seq (operands, 2, false))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 2))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 4 "const_int_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 5 "const_int_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_dup 0))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_dup 1))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_const_stm_seq (operands, 2))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 2 "memory_operand" "")
+        (match_operand:SI 0 "s_register_operand" ""))
+   (set (match_operand:SI 3 "memory_operand" "")
+        (match_operand:SI 1 "s_register_operand" ""))]
+  ""
+  [(const_int 0)]
+{
+  if (gen_stm_seq (operands, 2))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (parallel
+     [(set (match_operand:SI 4 "s_register_operand" "")
+           (match_operator:SI 5 "commutative_binary_operator"
+            [(match_operand:SI 6 "s_register_operand" "")
+             (match_operand:SI 7 "s_register_operand" "")]))
+      (clobber (reg:CC CC_REGNUM))])]
+  "(((operands[6] == operands[0] && operands[7] == operands[1])
+     || (operands[7] == operands[0] && operands[6] == operands[1]))
+    && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))"
+  [(parallel
+    [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  if (!gen_ldm_seq (operands, 2, true))
+    FAIL;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "s_register_operand" "")
+        (match_operand:SI 2 "memory_operand" ""))
+   (set (match_operand:SI 1 "s_register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))
+   (set (match_operand:SI 4 "s_register_operand" "")
+        (match_operator:SI 5 "commutative_binary_operator"
+         [(match_operand:SI 6 "s_register_operand" "")
+          (match_operand:SI 7 "s_register_operand" "")]))]
+  "(((operands[6] == operands[0] && operands[7] == operands[1])
+     || (operands[7] == operands[0] && operands[6] == operands[1]))
+    && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))"
+  [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+{
+  if (!gen_ldm_seq (operands, 2, true))
+    FAIL;
+})
+
diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm
new file mode 100644
index 000000000..2e76c01df
--- /dev/null
+++ b/gcc/config/arm/lib1funcs.asm
@@ -0,0 +1,1829 @@
+@ libgcc routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif  /* __ELF__ and __linux__ */
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+/* ------------------------------------------------------------------------ */
+
+/* We need to know what prefix to add to function names.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#error  __USER_LABEL_PREFIX__ not defined
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+#ifdef __ELF__
+#ifdef __thumb__
+#define __PLT__  /* Not supported in Thumb assembler (for now).  */
+#elif defined __vxworks && !defined __PIC__
+#define __PLT__ /* Not supported by the kernel loader.  */
+#else
+#define __PLT__ (PLT)
+#endif
+#define TYPE(x) .type SYM(x),function
+#define SIZE(x) .size SYM(x), . - SYM(x)
+#define LSYM(x) .x
+#else
+#define __PLT__
+#define TYPE(x)
+#define SIZE(x)
+#define LSYM(x) x
+#endif
+
+/* Function end macros.  Variants for interworking.  */
+
+#if defined(__ARM_ARCH_2__)
+# define __ARM_ARCH__ 2
+#endif
+
+#if defined(__ARM_ARCH_3__)
+# define __ARM_ARCH__ 3
+#endif
+
+#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
+	|| defined(__ARM_ARCH_4T__)
+/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
+   long multiply instructions.  That includes v3M.  */
+# define __ARM_ARCH__ 4
+#endif
+	
+#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+	|| defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH__ 5
+#endif
+
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+	|| defined(__ARM_ARCH_6M__)
+# define __ARM_ARCH__ 6
+#endif
+
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+	|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+	|| defined(__ARM_ARCH_7EM__)
+# define __ARM_ARCH__ 7
+#endif
+
+#ifndef __ARM_ARCH__
+#error Unable to determine architecture.
+#endif
+
+/* There are times when we might prefer Thumb1 code even if ARM code is
+   permitted, for example, the code might be smaller, or there might be
+   interworking problems with switching to ARM state if interworking is
+   disabled.  */
+#if (defined(__thumb__)			\
+     && !defined(__thumb2__)		\
+     && (!defined(__THUMB_INTERWORK__)	\
+	 || defined (__OPTIMIZE_SIZE__)	\
+	 || defined(__ARM_ARCH_6M__)))
+# define __prefer_thumb__
+#endif
+
+/* How to return from a function call depends on the architecture variant.  */
+
+#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
+
+# define RET		bx	lr
+# define RETc(x)	bx##x	lr
+
+/* Special precautions for interworking on armv4t.  */
+# if (__ARM_ARCH__ == 4)
+
+/* Always use bx, not ldr pc.  */
+#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
+#    define __INTERWORKING__
+#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
+
+/* Include thumb stub before arm mode code.  */
+#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
+#   define __INTERWORKING_STUBS__
+#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
+
+#endif /* __ARM_ARCH == 4 */
+
+#else
+
+# define RET		mov	pc, lr
+# define RETc(x)	mov##x	pc, lr
+
+#endif
+
+.macro	cfi_pop		advance, reg, cfa_offset
+#ifdef __ELF__
+	.pushsection	.debug_frame
+	.byte	0x4		/* DW_CFA_advance_loc4 */
+	.4byte	\advance
+	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
+	.byte	0xe		/* DW_CFA_def_cfa_offset */
+	.uleb128 \cfa_offset
+	.popsection
+#endif
+.endm
+.macro	cfi_push	advance, reg, offset, cfa_offset
+#ifdef __ELF__
+	.pushsection	.debug_frame
+	.byte	0x4		/* DW_CFA_advance_loc4 */
+	.4byte	\advance
+	.byte	(0x80 | \reg)	/* DW_CFA_offset */
+	.uleb128 (\offset / -4)
+	.byte	0xe		/* DW_CFA_def_cfa_offset */
+	.uleb128 \cfa_offset
+	.popsection
+#endif
+.endm
+.macro cfi_start	start_label, end_label
+#ifdef __ELF__
+	.pushsection	.debug_frame
+LSYM(Lstart_frame):
+	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
+LSYM(Lstart_cie):
+        .4byte	0xffffffff	@ CIE Identifier Tag
+        .byte	0x1	@ CIE Version
+        .ascii	"\0"	@ CIE Augmentation
+        .uleb128 0x1	@ CIE Code Alignment Factor
+        .sleb128 -4	@ CIE Data Alignment Factor
+        .byte	0xe	@ CIE RA Column
+        .byte	0xc	@ DW_CFA_def_cfa
+        .uleb128 0xd
+        .uleb128 0x0
+
+	.align 2
+LSYM(Lend_cie):
+	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
+LSYM(Lstart_fde):
+	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
+	.4byte	\start_label	@ FDE initial location
+	.4byte	\end_label-\start_label	@ FDE address range
+	.popsection
+#endif
+.endm
+.macro cfi_end	end_label
+#ifdef __ELF__
+	.pushsection	.debug_frame
+	.align	2
+LSYM(Lend_fde):
+	.popsection
+\end_label:
+#endif
+.endm
+
+/* Don't pass dirn, it's there just to get token pasting right.  */
+
+.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
+#if defined (__INTERWORKING__)
+	.ifc "\regs",""
+	ldr\cond	lr, [sp], #8
+	.else
+# if defined(__thumb2__)
+	pop\cond	{\regs, lr}
+# else
+	ldm\cond\dirn	sp!, {\regs, lr}
+# endif
+	.endif
+	.ifnc "\unwind", ""
+	/* Mark LR as restored.  */
+97:	cfi_pop 97b - \unwind, 0xe, 0x0
+	.endif
+	bx\cond	lr
+#else
+	/* Caller is responsible for providing IT instruction.  */
+	.ifc "\regs",""
+	ldr\cond	pc, [sp], #8
+	.else
+# if defined(__thumb2__)
+	pop\cond	{\regs, pc}
+# else
+	ldm\cond\dirn	sp!, {\regs, pc}
+# endif
+	.endif
+#endif
+.endm
+
+/* The Unified assembly syntax allows the same code to be assembled for both
+   ARM and Thumb-2.  However this is only supported by recent gas, so define
+   a set of macros to allow ARM code on older assemblers.  */
+#if defined(__thumb2__)
+.macro do_it cond, suffix=""
+	it\suffix	\cond
+.endm
+.macro shift1 op, arg0, arg1, arg2
+	\op	\arg0, \arg1, \arg2
+.endm
+#define do_push	push
+#define do_pop	pop
+#define COND(op1, op2, cond) op1 ## op2 ## cond
+/* Perform an arithmetic operation with a variable shift operand.  This
+   requires two instructions and a scratch register on Thumb-2.  */
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+	\shiftop \tmp, \src2, \shiftreg
+	\name \dest, \src1, \tmp
+.endm
+#else
+.macro do_it cond, suffix=""
+.endm
+.macro shift1 op, arg0, arg1, arg2
+	mov	\arg0, \arg1, \op \arg2
+.endm
+#define do_push	stmfd sp!,
+#define do_pop	ldmfd sp!,
+#define COND(op1, op2, cond) op1 ## cond ## op2
+.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
+	\name \dest, \src1, \src2, \shiftop \shiftreg
+.endm
+#endif
+
+#ifdef __ARM_EABI__
+.macro ARM_LDIV0 name signed
+	cmp	r0, #0
+	.ifc	\signed, unsigned
+	movne	r0, #0xffffffff
+	.else
+	movgt	r0, #0x7fffffff
+	movlt	r0, #0x80000000
+	.endif
+	b	SYM (__aeabi_idiv0) __PLT__
+.endm
+#else
+.macro ARM_LDIV0 name signed
+	str	lr, [sp, #-8]!
+98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
+	bl	SYM (__div0) __PLT__
+	mov	r0, #0			@ About as wrong as it could be.
+	RETLDM	unwind=98b
+.endm
+#endif
+
+
+#ifdef __ARM_EABI__
+.macro THUMB_LDIV0 name signed
+#if defined(__ARM_ARCH_6M__)
+	.ifc \signed, unsigned
+	cmp	r0, #0
+	beq	1f
+	mov	r0, #0
+	mvn	r0, r0		@ 0xffffffff
+1:
+	.else
+	cmp	r0, #0
+	beq	2f
+	blt	3f
+	mov	r0, #0
+	mvn	r0, r0
+	lsr	r0, r0, #1	@ 0x7fffffff
+	b	2f
+3:	mov	r0, #0x80
+	lsl	r0, r0, #24	@ 0x80000000
+2:
+	.endif
+	push	{r0, r1, r2}
+	ldr	r0, 4f
+	adr	r1, 4f
+	add	r0, r1
+	str	r0, [sp, #8]
+	@ We know we are not on armv4t, so pop pc is safe.
+	pop	{r0, r1, pc}
+	.align	2
+4:
+	.word	__aeabi_idiv0 - 4b
+#elif defined(__thumb2__)
+	.syntax unified
+	.ifc \signed, unsigned
+	cbz	r0, 1f
+	mov	r0, #0xffffffff
+1:
+	.else
+	cmp	r0, #0
+	do_it	gt
+	movgt	r0, #0x7fffffff
+	do_it	lt
+	movlt	r0, #0x80000000
+	.endif
+	b.w	SYM(__aeabi_idiv0) __PLT__
+#else
+	.align	2
+	bx	pc
+	nop
+	.arm
+	cmp	r0, #0
+	.ifc	\signed, unsigned
+	movne	r0, #0xffffffff
+	.else
+	movgt	r0, #0x7fffffff
+	movlt	r0, #0x80000000
+	.endif
+	b	SYM(__aeabi_idiv0) __PLT__
+	.thumb
+#endif
+.endm
+#else
+.macro THUMB_LDIV0 name signed
+	push	{ r1, lr }
+98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
+	bl	SYM (__div0)
+	mov	r0, #0			@ About as wrong as it could be.
+#if defined (__INTERWORKING__)
+	pop	{ r1, r2 }
+	bx	r2
+#else
+	pop	{ r1, pc }
+#endif
+.endm
+#endif
+
+.macro FUNC_END name
+	SIZE (__\name)
+.endm
+
+.macro DIV_FUNC_END name signed
+	cfi_start	__\name, LSYM(Lend_div0)
+LSYM(Ldiv0):
+#ifdef __thumb__
+	THUMB_LDIV0 \name \signed
+#else
+	ARM_LDIV0 \name \signed
+#endif
+	cfi_end	LSYM(Lend_div0)
+	FUNC_END \name
+.endm
+
+.macro THUMB_FUNC_START name
+	.globl	SYM (\name)
+	TYPE	(\name)
+	.thumb_func
+SYM (\name):
+.endm
+
+/* Function start macros.  Variants for ARM and Thumb.  */
+
+#ifdef __thumb__
+#define THUMB_FUNC .thumb_func
+#define THUMB_CODE .force_thumb
+# if defined(__thumb2__)
+#define THUMB_SYNTAX .syntax divided
+# else
+#define THUMB_SYNTAX
+# endif
+#else
+#define THUMB_FUNC
+#define THUMB_CODE
+#define THUMB_SYNTAX
+#endif
+
+.macro FUNC_START name
+	.text
+	.globl SYM (__\name)
+	TYPE (__\name)
+	.align 0
+	THUMB_CODE
+	THUMB_FUNC
+	THUMB_SYNTAX
+SYM (__\name):
+.endm
+
+/* Special function that will always be coded in ARM assembly, even if
+   in Thumb-only compilation.  */
+
+#if defined(__thumb2__)
+
+/* For Thumb-2 we build everything in thumb mode.  */
+.macro ARM_FUNC_START name
+       FUNC_START \name
+       .syntax unified
+.endm
+#define EQUIV .thumb_set
+.macro  ARM_CALL name
+	bl	__\name
+.endm
+
+#elif defined(__INTERWORKING_STUBS__)
+
+.macro	ARM_FUNC_START name
+	FUNC_START \name
+	bx	pc
+	nop
+	.arm
+/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
+   directly from other local arm routines.  */
+_L__\name:		
+.endm
+#define EQUIV .thumb_set
+/* Branch directly to a function declared with ARM_FUNC_START.
+   Must be called in arm mode.  */
+.macro  ARM_CALL name
+	bl	_L__\name
+.endm
+
+#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
+
+#ifdef __ARM_ARCH_6M__
+#define EQUIV .thumb_set
+#else
+.macro	ARM_FUNC_START name
+	.text
+	.globl SYM (__\name)
+	TYPE (__\name)
+	.align 0
+	.arm
+SYM (__\name):
+.endm
+#define EQUIV .set
+.macro  ARM_CALL name
+	bl	__\name
+.endm
+#endif
+
+#endif
+
+.macro	FUNC_ALIAS new old
+	.globl	SYM (__\new)
+#if defined (__thumb__)
+	.thumb_set	SYM (__\new), SYM (__\old)
+#else
+	.set	SYM (__\new), SYM (__\old)
+#endif
+.endm
+
+#ifndef __ARM_ARCH_6M__
+.macro	ARM_FUNC_ALIAS new old
+	.globl	SYM (__\new)
+	EQUIV	SYM (__\new), SYM (__\old)
+#if defined(__INTERWORKING_STUBS__)
+	.set	SYM (_L__\new), SYM (_L__\old)
+#endif
+.endm
+#endif
+
+#ifdef __ARMEB__
+#define xxh r0
+#define xxl r1
+#define yyh r2
+#define yyl r3
+#else
+#define xxh r1
+#define xxl r0
+#define yyh r3
+#define yyl r2
+#endif	
+
+#ifdef __ARM_EABI__
+.macro	WEAK name
+	.weak SYM (__\name)
+.endm
+#endif
+
+#ifdef __thumb__
+/* Register aliases.  */
+
+work		.req	r4	@ XXXX is this safe ?
+dividend	.req	r0
+divisor		.req	r1
+overdone	.req	r2
+result		.req	r2
+curbit		.req	r3
+#endif
+#if 0
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+#endif
+
+/* ------------------------------------------------------------------------ */
+/*		Bodies of the division and modulo routines.		    */
+/* ------------------------------------------------------------------------ */	
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+#if defined (__thumb2__)
+	clz	\curbit, \dividend
+	clz	\result, \divisor
+	sub	\curbit, \result, \curbit
+	rsb	\curbit, \curbit, #31
+	adr	\result, 1f
+	add	\curbit, \result, \curbit, lsl #4
+	mov	\result, #0
+	mov	pc, \curbit
+.p2align 3
+1:
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp.w	\dividend, \divisor, lsl #shift
+	nop.n
+	adc.w	\result, \result, \result
+	it	cs
+	subcs.w	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+#else
+	clz	\curbit, \dividend
+	clz	\result, \divisor
+	sub	\curbit, \result, \curbit
+	rsbs	\curbit, \curbit, #31
+	addne	\curbit, \curbit, \curbit, lsl #1
+	mov	\result, #0
+	addne	pc, pc, \curbit, lsl #2
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	\dividend, \divisor, lsl #shift
+	adc	\result, \result, \result
+	subcs	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+#endif
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+	clz	\curbit, \divisor
+	clz	\result, \dividend
+	sub	\result, \curbit, \result
+	mov	\curbit, #1
+	mov	\divisor, \divisor, lsl \result
+	mov	\curbit, \curbit, lsl \result
+	mov	\result, #0
+	
+#else /* __ARM_ARCH__ < 5 */
+
+	@ Initially shift the divisor left 3 bits if possible,
+	@ set curbit accordingly.  This allows for curbit to be located
+	@ at the left end of each 4-bit nibbles in the division loop
+	@ to save one loop in most cases.
+	tst	\divisor, #0xe0000000
+	moveq	\divisor, \divisor, lsl #3
+	moveq	\curbit, #8
+	movne	\curbit, #1
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	movlo	\curbit, \curbit, lsl #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	movlo	\curbit, \curbit, lsl #1
+	blo	1b
+
+	mov	\result, #0
+
+#endif /* __ARM_ARCH__ < 5 */
+
+	@ Division loop
+1:	cmp	\dividend, \divisor
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor
+	orrhs	\result,   \result,   \curbit
+	cmp	\dividend, \divisor,  lsr #1
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	orrhs	\result,   \result,   \curbit,  lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	orrhs	\result,   \result,   \curbit,  lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	do_it	hs, t
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	orrhs	\result,   \result,   \curbit,  lsr #3
+	cmp	\dividend, #0			@ Early termination?
+	do_it	ne, t
+	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movne	\divisor,  \divisor, lsr #4
+	bne	1b
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */	
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	rsb	\order, \order, #31
+
+#else
+
+	cmp	\divisor, #(1 << 16)
+	movhs	\divisor, \divisor, lsr #16
+	movhs	\order, #16
+	movlo	\order, #0
+
+	cmp	\divisor, #(1 << 8)
+	movhs	\divisor, \divisor, lsr #8
+	addhs	\order, \order, #8
+
+	cmp	\divisor, #(1 << 4)
+	movhs	\divisor, \divisor, lsr #4
+	addhs	\order, \order, #4
+
+	cmp	\divisor, #(1 << 2)
+	addhi	\order, \order, #3
+	addls	\order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	rsbs	\order, \order, #31
+	addne	pc, pc, \order, lsl #3
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	\dividend, \divisor, lsl #shift
+	subcs	\dividend, \dividend, \divisor, lsl #shift
+	.endr
+
+#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+#if __ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	mov	\divisor, \divisor, lsl \order
+	
+#else /* __ARM_ARCH__ < 5 */
+
+	mov	\order, #0
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	addlo	\order, \order, #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	addlo	\order, \order, #1
+	blo	1b
+
+#endif /* __ARM_ARCH__ < 5 */
+
+	@ Perform all needed substractions to keep only the reminder.
+	@ Do comparisons in batch of 4 first.
+	subs	\order, \order, #3		@ yes, 3 is intended here
+	blt	2f
+
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	cmp	\dividend, #1
+	mov	\divisor, \divisor, lsr #4
+	subges	\order, \order, #4
+	bge	1b
+
+	tst	\order, #3
+	teqne	\dividend, #0
+	beq	5f
+
+	@ Either 1, 2 or 3 comparison/substractions are left.
+2:	cmn	\order, #2
+	blt	4f
+	beq	3f
+	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+3:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+4:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+5:
+
+#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
+
+.endm
+/* ------------------------------------------------------------------------ */
+.macro THUMB_DIV_MOD_BODY modulo
+	@ Load the constant 0x10000000 into our work register.
+	mov	work, #1
+	lsl	work, #28
+LSYM(Loop1):
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, work
+	bhs	LSYM(Lbignum)
+	cmp	divisor, dividend
+	bhs	LSYM(Lbignum)
+	lsl	divisor, #4
+	lsl	curbit,  #4
+	b	LSYM(Loop1)
+LSYM(Lbignum):
+	@ Set work to 0x80000000
+	lsl	work, #3
+LSYM(Loop2):
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, work
+	bhs	LSYM(Loop3)
+	cmp	divisor, dividend
+	bhs	LSYM(Loop3)
+	lsl	divisor, #1
+	lsl	curbit,  #1
+	b	LSYM(Loop2)
+LSYM(Loop3):
+	@ Test for possible subtractions ...
+  .if \modulo
+	@ ... On the final pass, this may subtract too much from the dividend, 
+	@ so keep track of which subtractions are done, we can fix them up 
+	@ afterwards.
+	mov	overdone, #0
+	cmp	dividend, divisor
+	blo	LSYM(Lover1)
+	sub	dividend, dividend, divisor
+LSYM(Lover1):
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	blo	LSYM(Lover2)
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #1
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+LSYM(Lover2):
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	blo	LSYM(Lover3)
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #2
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+LSYM(Lover3):
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	blo	LSYM(Lover4)
+	sub	dividend, dividend, work
+	mov	ip, curbit
+	mov	work, #3
+	ror	curbit, work
+	orr	overdone, curbit
+	mov	curbit, ip
+LSYM(Lover4):
+	mov	ip, curbit
+  .else
+	@ ... and note which bits are done in the result.  On the final pass,
+	@ this may subtract too much from the dividend, but the result will be ok,
+	@ since the "bit" will have been shifted out at the bottom.
+	cmp	dividend, divisor
+	blo	LSYM(Lover1)
+	sub	dividend, dividend, divisor
+	orr	result, result, curbit
+LSYM(Lover1):
+	lsr	work, divisor, #1
+	cmp	dividend, work
+	blo	LSYM(Lover2)
+	sub	dividend, dividend, work
+	lsr	work, curbit, #1
+	orr	result, work
+LSYM(Lover2):
+	lsr	work, divisor, #2
+	cmp	dividend, work
+	blo	LSYM(Lover3)
+	sub	dividend, dividend, work
+	lsr	work, curbit, #2
+	orr	result, work
+LSYM(Lover3):
+	lsr	work, divisor, #3
+	cmp	dividend, work
+	blo	LSYM(Lover4)
+	sub	dividend, dividend, work
+	lsr	work, curbit, #3
+	orr	result, work
+LSYM(Lover4):
+  .endif
+	
+	cmp	dividend, #0			@ Early termination?
+	beq	LSYM(Lover5)
+	lsr	curbit,  #4			@ No, any more bits to do?
+	beq	LSYM(Lover5)
+	lsr	divisor, #4
+	b	LSYM(Loop3)
+LSYM(Lover5):
+  .if \modulo
+	@ Any subtractions that we should not have done will be recorded in
+	@ the top three bits of "overdone".  Exactly which were not needed
+	@ are governed by the position of the bit, stored in ip.
+	mov	work, #0xe
+	lsl	work, #28
+	and	overdone, work
+	beq	LSYM(Lgot_result)
+	
+	@ If we terminated early, because dividend became zero, then the 
+	@ bit in ip will not be in the bottom nibble, and we should not
+	@ perform the additions below.  We must test for this though
+	@ (rather relying upon the TSTs to prevent the additions) since
+	@ the bit in ip could be in the top two bits which might then match
+	@ with one of the smaller RORs.
+	mov	curbit, ip
+	mov	work, #0x7
+	tst	curbit, work
+	beq	LSYM(Lgot_result)
+	
+	mov	curbit, ip
+	mov	work, #3
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	LSYM(Lover6)
+	lsr	work, divisor, #3
+	add	dividend, work
+LSYM(Lover6):
+	mov	curbit, ip
+	mov	work, #2
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	LSYM(Lover7)
+	lsr	work, divisor, #2
+	add	dividend, work
+LSYM(Lover7):
+	mov	curbit, ip
+	mov	work, #1
+	ror	curbit, work
+	tst	overdone, curbit
+	beq	LSYM(Lgot_result)
+	lsr	work, divisor, #1
+	add	dividend, work
+  .endif
+LSYM(Lgot_result):
+.endm	
+/* ------------------------------------------------------------------------ */
+/*		Start of the Real Functions				    */
+/* ------------------------------------------------------------------------ */
+#ifdef L_udivsi3
+
+#if defined(__prefer_thumb__)
+
+	FUNC_START udivsi3
+	FUNC_ALIAS aeabi_uidiv udivsi3
+
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+LSYM(udivsi3_skip_div0_test):
+	mov	curbit, #1
+	mov	result, #0
+	
+	push	{ work }
+	cmp	dividend, divisor
+	blo	LSYM(Lgot_result)
+
+	THUMB_DIV_MOD_BODY 0
+	
+	mov	r0, result
+	pop	{ work }
+	RET
+
+#else /* ARM version/Thumb-2.  */
+
+	ARM_FUNC_START udivsi3
+	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
+
+	/* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
+	   check for division-by-zero a second time.  */
+LSYM(udivsi3_skip_div0_test):
+	subs	r2, r1, #1
+	do_it	eq
+	RETc(eq)
+	bcc	LSYM(Ldiv0)
+	cmp	r0, r1
+	bls	11f
+	tst	r1, r2
+	beq	12f
+	
+	ARM_DIV_BODY r0, r1, r2, r3
+	
+	mov	r0, r2
+	RET	
+
+11:	do_it	eq, e
+	moveq	r0, #1
+	movne	r0, #0
+	RET
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	mov	r0, r0, lsr r2
+	RET
+
+#endif /* ARM version */
+
+	DIV_FUNC_END udivsi3 unsigned
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_uidivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	push	{r0, r1, lr}
+	bl	LSYM(udivsi3_skip_div0_test)
+	POP	{r1, r2, r3}
+	mul	r2, r0
+	sub	r1, r1, r2
+	bx	r3
+#else
+ARM_FUNC_START aeabi_uidivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	stmfd	sp!, { r0, r1, lr }
+	bl	LSYM(udivsi3_skip_div0_test)
+	ldmfd	sp!, { r1, r2, lr }
+	mul	r3, r2, r0
+	sub	r1, r1, r3
+	RET
+#endif
+	FUNC_END aeabi_uidivmod
+	
+#endif /* L_udivsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_umodsi3
+
+	FUNC_START umodsi3
+
+#ifdef __thumb__
+
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	mov	curbit, #1
+	cmp	dividend, divisor
+	bhs	LSYM(Lover10)
+	RET	
+
+LSYM(Lover10):
+	push	{ work }
+
+	THUMB_DIV_MOD_BODY 1
+	
+	pop	{ work }
+	RET
+	
+#else  /* ARM version.  */
+	
+	subs	r2, r1, #1			@ compare divisor with 1
+	bcc	LSYM(Ldiv0)
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq   r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	RETc(ls)
+
+	ARM_MOD_BODY r0, r1, r2, r3
+	
+	RET	
+
+#endif /* ARM version.  */
+	
+	DIV_FUNC_END umodsi3 unsigned
+
+#endif /* L_umodsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_divsi3
+
+#if defined(__prefer_thumb__)
+
+	FUNC_START divsi3	
+	FUNC_ALIAS aeabi_idiv divsi3
+
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+	push	{ work }
+	mov	work, dividend
+	eor	work, divisor		@ Save the sign of the result.
+	mov	ip, work
+	mov	curbit, #1
+	mov	result, #0
+	cmp	divisor, #0
+	bpl	LSYM(Lover10)
+	neg	divisor, divisor	@ Loops below use unsigned.
+LSYM(Lover10):
+	cmp	dividend, #0
+	bpl	LSYM(Lover11)
+	neg	dividend, dividend
+LSYM(Lover11):
+	cmp	dividend, divisor
+	blo	LSYM(Lgot_result)
+
+	THUMB_DIV_MOD_BODY 0
+	
+	mov	r0, result
+	mov	work, ip
+	cmp	work, #0
+	bpl	LSYM(Lover12)
+	neg	r0, r0
+LSYM(Lover12):
+	pop	{ work }
+	RET
+
+#else /* ARM/Thumb-2 version.  */
+	
+	ARM_FUNC_START divsi3	
+	ARM_FUNC_ALIAS aeabi_idiv divsi3
+
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+LSYM(divsi3_skip_div0_test):
+	eor	ip, r0, r1			@ save the sign of the result.
+	do_it	mi
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	subs	r2, r1, #1			@ division by 1 or -1 ?
+	beq	10f
+	movs	r3, r0
+	do_it	mi
+	rsbmi	r3, r0, #0			@ positive dividend value
+	cmp	r3, r1
+	bls	11f
+	tst	r1, r2				@ divisor is power of 2 ?
+	beq	12f
+
+	ARM_DIV_BODY r3, r1, r0, r2
+	
+	cmp	ip, #0
+	do_it	mi
+	rsbmi	r0, r0, #0
+	RET	
+
+10:	teq	ip, r0				@ same sign ?
+	do_it	mi
+	rsbmi	r0, r0, #0
+	RET	
+
+11:	do_it	lo
+	movlo	r0, #0
+	do_it	eq,t
+	moveq	r0, ip, asr #31
+	orreq	r0, r0, #1
+	RET
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	cmp	ip, #0
+	mov	r0, r3, lsr r2
+	do_it	mi
+	rsbmi	r0, r0, #0
+	RET
+
+#endif /* ARM version */
+	
+	DIV_FUNC_END divsi3 signed
+
+#if defined(__prefer_thumb__)
+FUNC_START aeabi_idivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	push	{r0, r1, lr}
+	bl	LSYM(divsi3_skip_div0_test)
+	POP	{r1, r2, r3}
+	mul	r2, r0
+	sub	r1, r1, r2
+	bx	r3
+#else
+ARM_FUNC_START aeabi_idivmod
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	stmfd	sp!, { r0, r1, lr }
+	bl	LSYM(divsi3_skip_div0_test)
+	ldmfd	sp!, { r1, r2, lr }
+	mul	r3, r2, r0
+	sub	r1, r1, r3
+	RET
+#endif
+	FUNC_END aeabi_idivmod
+	
+#endif /* L_divsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_modsi3
+
+	FUNC_START modsi3
+
+#ifdef __thumb__
+
+	mov	curbit, #1
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	bpl	LSYM(Lover10)
+	neg	divisor, divisor		@ Loops below use unsigned.
+LSYM(Lover10):
+	push	{ work }
+	@ Need to save the sign of the dividend, unfortunately, we need
+	@ work later on.  Must do this after saving the original value of
+	@ the work register, because we will pop this value off first.
+	push	{ dividend }
+	cmp	dividend, #0
+	bpl	LSYM(Lover11)
+	neg	dividend, dividend
+LSYM(Lover11):
+	cmp	dividend, divisor
+	blo	LSYM(Lgot_result)
+
+	THUMB_DIV_MOD_BODY 1
+		
+	pop	{ work }
+	cmp	work, #0
+	bpl	LSYM(Lover12)
+	neg	dividend, dividend
+LSYM(Lover12):
+	pop	{ work }
+	RET	
+
+#else /* ARM version.  */
+	
+	cmp	r1, #0
+	beq	LSYM(Ldiv0)
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	movs	ip, r0				@ preserve sign of dividend
+	rsbmi	r0, r0, #0			@ if negative make positive
+	subs	r2, r1, #1			@ compare divisor with 1
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq	r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	bls	10f
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+10:	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	RET	
+
+#endif /* ARM version */
+	
+	DIV_FUNC_END modsi3 signed
+
+#endif /* L_modsi3 */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_tls
+
+#ifdef __ARM_EABI__
+	WEAK aeabi_idiv0
+	WEAK aeabi_ldiv0
+	FUNC_START aeabi_idiv0
+	FUNC_START aeabi_ldiv0
+	RET
+	FUNC_END aeabi_ldiv0
+	FUNC_END aeabi_idiv0
+#else
+	FUNC_START div0
+	RET
+	FUNC_END div0
+#endif
+	
+#endif /* L_divmodsi_tools */
+/* ------------------------------------------------------------------------ */
+#ifdef L_dvmd_lnx
+@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
+
+/* Constant taken from <asm/signal.h>.  */
+#define SIGFPE	8
+
+#ifdef __ARM_EABI__
+	WEAK aeabi_idiv0
+	WEAK aeabi_ldiv0
+	ARM_FUNC_START aeabi_idiv0
+	ARM_FUNC_START aeabi_ldiv0
+#else
+	ARM_FUNC_START div0
+#endif
+
+	do_push	{r1, lr}
+	mov	r0, #SIGFPE
+	bl	SYM(raise) __PLT__
+	RETLDM	r1
+
+#ifdef __ARM_EABI__
+	FUNC_END aeabi_ldiv0
+	FUNC_END aeabi_idiv0
+#else
+	FUNC_END div0
+#endif
+	
+#endif /* L_dvmd_lnx */
+#ifdef L_clear_cache
+#if defined __ARM_EABI__ && defined __linux__
+@ EABI GNU/Linux call to cacheflush syscall.
+	ARM_FUNC_START clear_cache
+	do_push	{r7}
+#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
+	movw	r7, #2
+	movt	r7, #0xf
+#else
+	mov	r7, #0xf0000
+	add	r7, r7, #2
+#endif
+	mov	r2, #0
+	swi	0
+	do_pop	{r7}
+	RET
+	FUNC_END clear_cache
+#else
+#error "This is only for ARM EABI GNU/Linux"
+#endif
+#endif /* L_clear_cache */
+/* ------------------------------------------------------------------------ */
+/* Dword shift operations.  */
+/* All the following Dword shift variants rely on the fact that
+	shft xxx, Reg
+   is in fact done as
+	shft xxx, (Reg & 255)
+   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
+   case of logical shifts) or the sign (for asr).  */
+
+#ifdef __ARMEB__
+#define al	r1
+#define ah	r0
+#else
+#define al	r0
+#define ah	r1
+#endif
+
+/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
+#ifndef __symbian__
+
+#ifdef L_lshrdi3
+
+	FUNC_START lshrdi3
+	FUNC_ALIAS aeabi_llsr lshrdi3
+	
+#ifdef __thumb__
+	lsr	al, r2
+	mov	r3, ah
+	lsr	ah, r2
+	mov	ip, r3
+	sub	r2, #32
+	lsr	r3, r2
+	orr	al, r3
+	neg	r2, r2
+	mov	r3, ip
+	lsl	r3, r2
+	orr	al, r3
+	RET
+#else
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, lsr r3
+	orrmi	al, al, ah, lsl ip
+	mov	ah, ah, lsr r2
+	RET
+#endif
+	FUNC_END aeabi_llsr
+	FUNC_END lshrdi3
+
+#endif
+	
+#ifdef L_ashrdi3
+	
+	FUNC_START ashrdi3
+	FUNC_ALIAS aeabi_lasr ashrdi3
+	
+#ifdef __thumb__
+	lsr	al, r2
+	mov	r3, ah
+	asr	ah, r2
+	sub	r2, #32
+	@ If r2 is negative at this point the following step would OR
+	@ the sign bit into all of AL.  That's not what we want...
+	bmi	1f
+	mov	ip, r3
+	asr	r3, r2
+	orr	al, r3
+	mov	r3, ip
+1:
+	neg	r2, r2
+	lsl	r3, r2
+	orr	al, r3
+	RET
+#else
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, asr r3
+	orrmi	al, al, ah, lsl ip
+	mov	ah, ah, asr r2
+	RET
+#endif
+
+	FUNC_END aeabi_lasr
+	FUNC_END ashrdi3
+
+#endif
+
+#ifdef L_ashldi3
+
+	FUNC_START ashldi3
+	FUNC_ALIAS aeabi_llsl ashldi3
+	
+#ifdef __thumb__
+	lsl	ah, r2
+	mov	r3, al
+	lsl	al, r2
+	mov	ip, r3
+	sub	r2, #32
+	lsl	r3, r2
+	orr	ah, r3
+	neg	r2, r2
+	mov	r3, ip
+	lsr	r3, r2
+	orr	ah, r3
+	RET
+#else
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	ah, ah, lsl r2
+	movpl	ah, al, lsl r3
+	orrmi	ah, ah, al, lsr ip
+	mov	al, al, lsl r2
+	RET
+#endif
+	FUNC_END aeabi_llsl
+	FUNC_END ashldi3
+
+#endif
+
+#endif /* __symbian__ */
+
+#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
+    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+    || defined(__ARM_ARCH_5TEJ__)
+#define HAVE_ARM_CLZ 1
+#endif
+
+#ifdef L_clzsi2
+#if defined(__ARM_ARCH_6M__)
+FUNC_START clzsi2
+	mov	r1, #28
+	mov	r3, #1
+	lsl	r3, r3, #16
+	cmp	r0, r3 /* 0x10000 */
+	bcc	2f
+	lsr	r0, r0, #16
+	sub	r1, r1, #16
+2:	lsr	r3, r3, #8
+	cmp	r0, r3 /* #0x100 */
+	bcc	2f
+	lsr	r0, r0, #8
+	sub	r1, r1, #8
+2:	lsr	r3, r3, #4
+	cmp	r0, r3 /* #0x10 */
+	bcc	2f
+	lsr	r0, r0, #4
+	sub	r1, r1, #4
+2:	adr	r2, 1f
+	ldrb	r0, [r2, r0]
+	add	r0, r0, r1
+	bx lr
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+	FUNC_END clzsi2
+#else
+ARM_FUNC_START clzsi2
+# if defined(HAVE_ARM_CLZ)
+	clz	r0, r0
+	RET
+# else
+	mov	r1, #28
+	cmp	r0, #0x10000
+	do_it	cs, t
+	movcs	r0, r0, lsr #16
+	subcs	r1, r1, #16
+	cmp	r0, #0x100
+	do_it	cs, t
+	movcs	r0, r0, lsr #8
+	subcs	r1, r1, #8
+	cmp	r0, #0x10
+	do_it	cs, t
+	movcs	r0, r0, lsr #4
+	subcs	r1, r1, #4
+	adr	r2, 1f
+	ldrb	r0, [r2, r0]
+	add	r0, r0, r1
+	RET
+.align 2
+1:
+.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
+# endif /* !HAVE_ARM_CLZ */
+	FUNC_END clzsi2
+#endif
+#endif /* L_clzsi2 */
+
+#ifdef L_clzdi2
+#if !defined(HAVE_ARM_CLZ)
+
+# if defined(__ARM_ARCH_6M__)
+FUNC_START clzdi2
+	push	{r4, lr}
+# else
+ARM_FUNC_START clzdi2
+	do_push	{r4, lr}
+# endif
+	cmp	xxh, #0
+	bne	1f
+# ifdef __ARMEB__
+	mov	r0, xxl
+	bl	__clzsi2
+	add	r0, r0, #32
+	b 2f
+1:
+	bl	__clzsi2
+# else
+	bl	__clzsi2
+	add	r0, r0, #32
+	b 2f
+1:
+	mov	r0, xxh
+	bl	__clzsi2
+# endif
+2:
+# if defined(__ARM_ARCH_6M__)
+	pop	{r4, pc}
+# else
+	RETLDM	r4
+# endif
+	FUNC_END clzdi2
+
+#else /* HAVE_ARM_CLZ */
+
+ARM_FUNC_START clzdi2
+	cmp	xxh, #0
+	do_it	eq, et
+	clzeq	r0, xxl
+	clzne	r0, xxh
+	addeq	r0, r0, #32
+	RET
+	FUNC_END clzdi2
+
+#endif
+#endif /* L_clzdi2 */
+
+/* ------------------------------------------------------------------------ */
+/* These next two sections are here despite the fact that they contain Thumb 
+   assembler because their presence allows interworked code to be linked even
+   when the GCC library is this one.  */
+		
+/* Do not build the interworking functions when the target architecture does 
+   not support Thumb instructions.  (This can be a multilib option).  */
+#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
+      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
+      || __ARM_ARCH__ >= 6
+
+#if defined L_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code. 
+   The address of function to be called is loaded into a register and then 
+   one of these labels is called via a BL instruction.  This puts the 
+   return address into the link register with the bottom bit set, and the 
+   code here switches to the correct mode before executing the function.  */
+	
+	.text
+	.align 0
+        .force_thumb
+
+.macro call_via register
+	THUMB_FUNC_START _call_via_\register
+
+	bx	\register
+	nop
+
+	SIZE	(_call_via_\register)
+.endm
+
+	call_via r0
+	call_via r1
+	call_via r2
+	call_via r3
+	call_via r4
+	call_via r5
+	call_via r6
+	call_via r7
+	call_via r8
+	call_via r9
+	call_via sl
+	call_via fp
+	call_via ip
+	call_via sp
+	call_via lr
+
+#endif /* L_call_via_rX */
+
+/* Don't bother with the old interworking routines for Thumb-2.  */
+/* ??? Maybe only omit these on "m" variants.  */
+#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
+
+#if defined L_interwork_call_via_rX
+
+/* These labels & instructions are used by the Arm/Thumb interworking code,
+   when the target address is in an unknown instruction set.  The address 
+   of function to be called is loaded into a register and then one of these
+   labels is called via a BL instruction.  This puts the return address 
+   into the link register with the bottom bit set, and the code here 
+   switches to the correct mode before executing the function.  Unfortunately
+   the target code cannot be relied upon to return via a BX instruction, so
+   instead we have to store the resturn address on the stack and allow the
+   called function to return here instead.  Upon return we recover the real
+   return address and use a BX to get back to Thumb mode.
+
+   There are three variations of this code.  The first,
+   _interwork_call_via_rN(), will push the return address onto the
+   stack and pop it in _arm_return().  It should only be used if all
+   arguments are passed in registers.
+
+   The second, _interwork_r7_call_via_rN(), instead stores the return
+   address at [r7, #-4].  It is the caller's responsibility to ensure
+   that this address is valid and contains no useful data.
+
+   The third, _interwork_r11_call_via_rN(), works in the same way but
+   uses r11 instead of r7.  It is useful if the caller does not really
+   need a frame pointer.  */
+	
+	.text
+	.align 0
+
+	.code   32
+	.globl _arm_return
+LSYM(Lstart_arm_return):
+	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
+	cfi_push	0, 0xe, -0x8, 0x8
+	nop	@ This nop is for the benefit of debuggers, so that
+		@ backtraces will use the correct unwind information.
+_arm_return:
+	RETLDM	unwind=LSYM(Lstart_arm_return)
+	cfi_end	LSYM(Lend_arm_return)
+
+	.globl _arm_return_r7
+_arm_return_r7:
+	ldr	lr, [r7, #-4]
+	bx	lr
+
+	.globl _arm_return_r11
+_arm_return_r11:
+	ldr	lr, [r11, #-4]
+	bx	lr
+
+.macro interwork_with_frame frame, register, name, return
+	.code	16
+
+	THUMB_FUNC_START \name
+
+	bx	pc
+	nop
+
+	.code	32
+	tst	\register, #1
+	streq	lr, [\frame, #-4]
+	adreq	lr, _arm_return_\frame
+	bx	\register
+
+	SIZE	(\name)
+.endm
+
+.macro interwork register
+	.code	16
+
+	THUMB_FUNC_START _interwork_call_via_\register
+
+	bx	pc
+	nop
+
+	.code	32
+	.globl LSYM(Lchange_\register)
+LSYM(Lchange_\register):
+	tst	\register, #1
+	streq	lr, [sp, #-8]!
+	adreq	lr, _arm_return
+	bx	\register
+
+	SIZE	(_interwork_call_via_\register)
+
+	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
+	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
+.endm
+	
+	interwork r0
+	interwork r1
+	interwork r2
+	interwork r3
+	interwork r4
+	interwork r5
+	interwork r6
+	interwork r7
+	interwork r8
+	interwork r9
+	interwork sl
+	interwork fp
+	interwork ip
+	interwork sp
+	
+	/* The LR case has to be handled a little differently...  */
+	.code 16
+
+	THUMB_FUNC_START _interwork_call_via_lr
+
+	bx 	pc
+	nop
+	
+	.code 32
+	.globl .Lchange_lr
+.Lchange_lr:
+	tst	lr, #1
+	stmeqdb	r13!, {lr, pc}
+	mov	ip, lr
+	adreq	lr, _arm_return
+	bx	ip
+	
+	SIZE	(_interwork_call_via_lr)
+	
+#endif /* L_interwork_call_via_rX */
+#endif /* !__thumb2__ */
+
+/* Functions to support compact pic switch tables in thumb1 state.
+   All these routines take an index into the table in r0.  The
+   table is at LR & ~1 (but this must be rounded up in the case
+   of 32-bit entires).  They are only permitted to clobber r12
+   and r14 and r0 must be preserved on exit.  */
+#ifdef L_thumb1_case_sqi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_sqi
+	push	{r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r1, r1, #1
+	ldrsb	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_sqi)
+#endif
+
+#ifdef L_thumb1_case_uqi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_uqi
+	push	{r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r1, r1, #1
+	ldrb	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_uqi)
+#endif
+
+#ifdef L_thumb1_case_shi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_shi
+	push	{r0, r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r0, r0, #1
+	lsls	r1, r1, #1
+	ldrsh	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r0, r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_shi)
+#endif
+
+#ifdef L_thumb1_case_uhi
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_uhi
+	push	{r0, r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r0, r0, #1
+	lsls	r1, r1, #1
+	ldrh	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r0, r1}
+	bx	lr
+	SIZE (__gnu_thumb1_case_uhi)
+#endif
+
+#ifdef L_thumb1_case_si
+	
+	.text
+	.align 0
+        .force_thumb
+	.syntax unified
+	THUMB_FUNC_START __gnu_thumb1_case_si
+	push	{r0, r1}
+	mov	r1, lr
+	adds.n	r1, r1, #2	/* Align to word.  */
+	lsrs	r1, r1, #2
+	lsls	r0, r0, #2
+	lsls	r1, r1, #2
+	ldr	r0, [r1, r0]
+	adds	r0, r0, r1
+	mov	lr, r0
+	pop	{r0, r1}
+	mov	pc, lr		/* We know we were called from thumb code.  */
+	SIZE (__gnu_thumb1_case_si)
+#endif
+
+#endif /* Arch supports thumb.  */
+
+#ifndef __symbian__
+#ifndef __ARM_ARCH_6M__
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
+#include "bpabi.S"
+#else /* __ARM_ARCH_6M__ */
+#include "bpabi-v6m.S"
+#endif /* __ARM_ARCH_6M__ */
+#endif /* !__symbian__ */
diff --git a/gcc/config/arm/libgcc-bpabi.ver b/gcc/config/arm/libgcc-bpabi.ver
new file mode 100644
index 000000000..3ba8364dc
--- /dev/null
+++ b/gcc/config/arm/libgcc-bpabi.ver
@@ -0,0 +1,108 @@
+# Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+GCC_3.5 {
+  # BPABI symbols
+  __aeabi_cdcmpeq
+  __aeabi_cdcmple
+  __aeabi_cdrcmple
+  __aeabi_cfcmpeq
+  __aeabi_cfcmple
+  __aeabi_cfrcmple
+  __aeabi_d2f
+  __aeabi_d2iz
+  __aeabi_d2lz
+  __aeabi_d2uiz
+  __aeabi_d2ulz
+  __aeabi_dadd
+  __aeabi_dcmpeq
+  __aeabi_dcmpge
+  __aeabi_dcmpgt
+  __aeabi_dcmple
+  __aeabi_dcmplt
+  __aeabi_dcmpun
+  __aeabi_ddiv
+  __aeabi_dmul
+  __aeabi_dneg
+  __aeabi_drsub
+  __aeabi_dsub
+  __aeabi_f2d
+  __aeabi_f2iz
+  __aeabi_f2lz
+  __aeabi_f2uiz
+  __aeabi_f2ulz
+  __aeabi_fadd
+  __aeabi_fcmpeq
+  __aeabi_fcmpge
+  __aeabi_fcmpgt
+  __aeabi_fcmple
+  __aeabi_fcmplt
+  __aeabi_fcmpun
+  __aeabi_fdiv
+  __aeabi_fmul
+  __aeabi_fneg
+  __aeabi_frsub
+  __aeabi_fsub
+  __aeabi_i2d
+  __aeabi_i2f
+  __aeabi_idiv
+  __aeabi_idiv0
+  __aeabi_idivmod
+  __aeabi_l2d
+  __aeabi_l2f
+  __aeabi_lasr
+  __aeabi_lcmp
+  __aeabi_ldiv0
+  __aeabi_ldivmod
+  __aeabi_llsl
+  __aeabi_llsr
+  __aeabi_lmul
+  __aeabi_ui2d
+  __aeabi_ui2f
+  __aeabi_uidiv
+  __aeabi_uidivmod
+  __aeabi_uldivmod
+  __aeabi_ulcmp
+  __aeabi_ul2d
+  __aeabi_ul2f
+  __aeabi_uread4
+  __aeabi_uread8
+  __aeabi_uwrite4
+  __aeabi_uwrite8
+
+  # Exception-Handling
+  # \S 7.5
+  _Unwind_Complete
+  _Unwind_VRS_Get
+  _Unwind_VRS_Set
+  _Unwind_VRS_Pop
+  # \S 9.2
+  __aeabi_unwind_cpp_pr0
+  __aeabi_unwind_cpp_pr1
+  __aeabi_unwind_cpp_pr2
+  # The libstdc++ exception-handling personality routine uses this 
+  # GNU-specific entry point.
+  __gnu_unwind_frame
+}
+
+%exclude {
+  _Unwind_Backtrace
+}
+GCC_4.3.0 {
+  _Unwind_Backtrace
+}
diff --git a/gcc/config/arm/libunwind.S b/gcc/config/arm/libunwind.S
new file mode 100644
index 000000000..48eb592fd
--- /dev/null
+++ b/gcc/config/arm/libunwind.S
@@ -0,0 +1,363 @@
+/* Support functions for the unwinder.
+   Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Paul Brook
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* An executable stack is *not* required for these functions.  */
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+#ifdef __ARM_EABI__
+/* Some attributes that are common to all routines in this file.  */
+	/* Tag_ABI_align_needed: This code does not require 8-byte
+	   alignment from the caller.  */
+	/* .eabi_attribute 24, 0  -- default setting.  */
+	/* Tag_ABI_align_preserved: This code preserves 8-byte
+	   alignment in any callee.  */
+	.eabi_attribute 25, 1
+#endif /* __ARM_EABI__ */
+
+#ifndef __symbian__
+
+#include "lib1funcs.asm"
+
+.macro UNPREFIX name
+	.global SYM (\name)
+	EQUIV SYM (\name), SYM (__\name)
+.endm
+
+#if (__ARM_ARCH__ == 4)
+/* Some coprocessors require armv5.  We know this code will never be run on
+   other cpus.  Tell gas to allow armv5, but only mark the objects as armv4.
+ */
+.arch armv5t
+#ifdef __ARM_ARCH_4T__
+.object_arch armv4t
+#else
+.object_arch armv4
+#endif
+#endif
+
+#ifdef __ARM_ARCH_6M__
+
+/* r0 points to a 16-word block.  Upload these values to the actual core
+   state.  */
+FUNC_START restore_core_regs
+	mov r1, r0
+	add r1, r1, #52
+	ldmia r1!, {r3, r4, r5}
+	sub r3, r3, #4
+	mov ip, r3
+	str r5, [r3]
+	mov lr, r4
+	/* Restore r8-r11.  */
+	mov r1, r0
+	add r1, r1, #32
+	ldmia r1!, {r2, r3, r4, r5}
+	mov r8, r2
+	mov r9, r3
+	mov sl, r4
+	mov fp, r5
+	mov r1, r0
+	add r1, r1, #8
+	ldmia r1!, {r2, r3, r4, r5, r6, r7}
+	ldr r1, [r0, #4]
+	ldr r0, [r0]
+	mov sp, ip
+	pop {pc}
+	FUNC_END restore_core_regs
+	UNPREFIX restore_core_regs
+
+/* ARMV6M does not have coprocessors, so these should never be used.  */
+FUNC_START gnu_Unwind_Restore_VFP
+	RET
+
+/* Store VFR regsters d0-d15 to the address in r0.  */
+FUNC_START gnu_Unwind_Save_VFP
+	RET
+
+/* Load VFP registers d0-d15 from the address in r0.
+   Use this to load from FSTMD format.  */
+FUNC_START gnu_Unwind_Restore_VFP_D
+	RET
+
+/* Store VFP registers d0-d15 to the address in r0.
+   Use this to store in FLDMD format.  */
+FUNC_START gnu_Unwind_Save_VFP_D
+	RET
+
+/* Load VFP registers d16-d31 from the address in r0.
+   Use this to load from FSTMD (=VSTM) format.  Needs VFPv3.  */
+FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31
+	RET
+
+/* Store VFP registers d16-d31 to the address in r0.
+   Use this to store in FLDMD (=VLDM) format.  Needs VFPv3.  */
+FUNC_START gnu_Unwind_Save_VFP_D_16_to_31
+	RET
+
+FUNC_START gnu_Unwind_Restore_WMMXD
+	RET
+
+FUNC_START gnu_Unwind_Save_WMMXD
+	RET
+
+FUNC_START gnu_Unwind_Restore_WMMXC
+	RET
+
+FUNC_START gnu_Unwind_Save_WMMXC
+	RET
+
+.macro  UNWIND_WRAPPER name nargs
+	FUNC_START \name
+	/* Create a phase2_vrs structure.  */
+	/* Save r0 in the PC slot so we can use it as a scratch register.  */
+	push {r0}
+	add r0, sp, #4
+	push {r0, lr} /* Push original SP and LR.  */
+	/* Make space for r8-r12.  */
+	sub sp, sp, #20
+	/* Save low registers.  */
+	push {r0, r1, r2, r3, r4, r5, r6, r7}
+	/* Save high registers.  */
+	add r0, sp, #32
+	mov r1, r8
+	mov r2, r9
+	mov r3, sl
+	mov r4, fp
+	mov r5, ip
+	stmia r0!, {r1, r2, r3, r4, r5}
+	/* Restore original low register values.  */
+	add r0, sp, #4
+	ldmia r0!, {r1, r2, r3, r4, r5}
+	/* Restore orginial r0.  */
+	ldr r0, [sp, #60]
+	str r0, [sp]
+	/* Demand-save flags, plus an extra word for alignment.  */
+	mov r3, #0
+	push {r2, r3}
+	/* Point r1 at the block.  Pass r[0..nargs) unchanged.  */
+	add r\nargs, sp, #4
+
+	bl SYM (__gnu\name)
+
+	ldr r3, [sp, #64]
+	add sp, sp, #72
+	bx r3
+
+	FUNC_END \name
+	UNPREFIX \name
+.endm
+
+#else /* !__ARM_ARCH_6M__ */
+
+/* r0 points to a 16-word block.  Upload these values to the actual core
+   state.  */
+ARM_FUNC_START restore_core_regs
+	/* We must use sp as the base register when restoring sp.  Push the
+	   last 3 registers onto the top of the current stack to achieve
+	   this.  */
+	add r1, r0, #52
+	ldmia r1, {r3, r4, r5}  /* {sp, lr, pc}.  */
+#if defined(__thumb2__)
+	/* Thumb-2 doesn't allow sp in a load-multiple instruction, so push
+	   the target address onto the target stack.  This is safe as
+	   we're always returning to somewhere further up the call stack.  */
+	mov ip, r3
+	mov lr, r4
+	str r5, [ip, #-4]!
+#elif defined(__INTERWORKING__)
+	/* Restore pc into ip.  */
+	mov r2, r5
+	stmfd sp!, {r2, r3, r4}
+#else
+	stmfd sp!, {r3, r4, r5}
+#endif
+	/* Don't bother restoring ip.  */
+	ldmia r0, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp}
+#if defined(__thumb2__)
+	/* Pop the return address off the target stack.  */
+	mov sp, ip
+	pop {pc}
+#elif defined(__INTERWORKING__)
+	/* Pop the three registers we pushed earlier.  */
+	ldmfd sp, {ip, sp, lr}
+	bx ip
+#else
+	ldmfd sp, {sp, lr, pc}
+#endif
+	FUNC_END restore_core_regs
+	UNPREFIX restore_core_regs
+
+/* Load VFP registers d0-d15 from the address in r0.
+   Use this to load from FSTMX format.  */
+ARM_FUNC_START gnu_Unwind_Restore_VFP
+	/* Use the generic coprocessor form so that gas doesn't complain
+	   on soft-float targets.  */
+	ldc   p11,cr0,[r0],{0x21} /* fldmiax r0, {d0-d15} */
+	RET
+
+/* Store VFP registers d0-d15 to the address in r0.
+   Use this to store in FSTMX format.  */
+ARM_FUNC_START gnu_Unwind_Save_VFP
+	/* Use the generic coprocessor form so that gas doesn't complain
+	   on soft-float targets.  */
+	stc   p11,cr0,[r0],{0x21} /* fstmiax r0, {d0-d15} */
+	RET
+
+/* Load VFP registers d0-d15 from the address in r0.
+   Use this to load from FSTMD format.  */
+ARM_FUNC_START gnu_Unwind_Restore_VFP_D
+	ldc   p11,cr0,[r0],{0x20} /* fldmiad r0, {d0-d15} */
+	RET
+
+/* Store VFP registers d0-d15 to the address in r0.
+   Use this to store in FLDMD format.  */
+ARM_FUNC_START gnu_Unwind_Save_VFP_D
+	stc   p11,cr0,[r0],{0x20} /* fstmiad r0, {d0-d15} */
+	RET
+
+/* Load VFP registers d16-d31 from the address in r0.
+   Use this to load from FSTMD (=VSTM) format.  Needs VFPv3.  */
+ARM_FUNC_START gnu_Unwind_Restore_VFP_D_16_to_31
+	ldcl  p11,cr0,[r0],{0x20} /* vldm r0, {d16-d31} */
+	RET
+
+/* Store VFP registers d16-d31 to the address in r0.
+   Use this to store in FLDMD (=VLDM) format.  Needs VFPv3.  */
+ARM_FUNC_START gnu_Unwind_Save_VFP_D_16_to_31
+	stcl  p11,cr0,[r0],{0x20} /* vstm r0, {d16-d31} */
+	RET
+
+ARM_FUNC_START gnu_Unwind_Restore_WMMXD
+	/* Use the generic coprocessor form so that gas doesn't complain
+	   on non-iWMMXt targets.  */
+	ldcl  p1, cr0, [r0], #8 /* wldrd wr0, [r0], #8 */
+	ldcl  p1, cr1, [r0], #8 /* wldrd wr1, [r0], #8 */
+	ldcl  p1, cr2, [r0], #8 /* wldrd wr2, [r0], #8 */
+	ldcl  p1, cr3, [r0], #8 /* wldrd wr3, [r0], #8 */
+	ldcl  p1, cr4, [r0], #8 /* wldrd wr4, [r0], #8 */
+	ldcl  p1, cr5, [r0], #8 /* wldrd wr5, [r0], #8 */
+	ldcl  p1, cr6, [r0], #8 /* wldrd wr6, [r0], #8 */
+	ldcl  p1, cr7, [r0], #8 /* wldrd wr7, [r0], #8 */
+	ldcl  p1, cr8, [r0], #8 /* wldrd wr8, [r0], #8 */
+	ldcl  p1, cr9, [r0], #8 /* wldrd wr9, [r0], #8 */
+	ldcl  p1, cr10, [r0], #8 /* wldrd wr10, [r0], #8 */
+	ldcl  p1, cr11, [r0], #8 /* wldrd wr11, [r0], #8 */
+	ldcl  p1, cr12, [r0], #8 /* wldrd wr12, [r0], #8 */
+	ldcl  p1, cr13, [r0], #8 /* wldrd wr13, [r0], #8 */
+	ldcl  p1, cr14, [r0], #8 /* wldrd wr14, [r0], #8 */
+	ldcl  p1, cr15, [r0], #8 /* wldrd wr15, [r0], #8 */
+	RET
+
+ARM_FUNC_START gnu_Unwind_Save_WMMXD
+	/* Use the generic coprocessor form so that gas doesn't complain
+	   on non-iWMMXt targets.  */
+	stcl  p1, cr0, [r0], #8 /* wstrd wr0, [r0], #8 */
+	stcl  p1, cr1, [r0], #8 /* wstrd wr1, [r0], #8 */
+	stcl  p1, cr2, [r0], #8 /* wstrd wr2, [r0], #8 */
+	stcl  p1, cr3, [r0], #8 /* wstrd wr3, [r0], #8 */
+	stcl  p1, cr4, [r0], #8 /* wstrd wr4, [r0], #8 */
+	stcl  p1, cr5, [r0], #8 /* wstrd wr5, [r0], #8 */
+	stcl  p1, cr6, [r0], #8 /* wstrd wr6, [r0], #8 */
+	stcl  p1, cr7, [r0], #8 /* wstrd wr7, [r0], #8 */
+	stcl  p1, cr8, [r0], #8 /* wstrd wr8, [r0], #8 */
+	stcl  p1, cr9, [r0], #8 /* wstrd wr9, [r0], #8 */
+	stcl  p1, cr10, [r0], #8 /* wstrd wr10, [r0], #8 */
+	stcl  p1, cr11, [r0], #8 /* wstrd wr11, [r0], #8 */
+	stcl  p1, cr12, [r0], #8 /* wstrd wr12, [r0], #8 */
+	stcl  p1, cr13, [r0], #8 /* wstrd wr13, [r0], #8 */
+	stcl  p1, cr14, [r0], #8 /* wstrd wr14, [r0], #8 */
+	stcl  p1, cr15, [r0], #8 /* wstrd wr15, [r0], #8 */
+	RET
+
+ARM_FUNC_START gnu_Unwind_Restore_WMMXC
+	/* Use the generic coprocessor form so that gas doesn't complain
+	   on non-iWMMXt targets.  */
+	ldc2  p1, cr8, [r0], #4 /* wldrw wcgr0, [r0], #4 */
+	ldc2  p1, cr9, [r0], #4 /* wldrw wcgr1, [r0], #4 */
+	ldc2  p1, cr10, [r0], #4 /* wldrw wcgr2, [r0], #4 */
+	ldc2  p1, cr11, [r0], #4 /* wldrw wcgr3, [r0], #4 */
+	RET
+
+ARM_FUNC_START gnu_Unwind_Save_WMMXC
+	/* Use the generic coprocessor form so that gas doesn't complain
+	   on non-iWMMXt targets.  */
+	stc2  p1, cr8, [r0], #4 /* wstrw wcgr0, [r0], #4 */
+	stc2  p1, cr9, [r0], #4 /* wstrw wcgr1, [r0], #4 */
+	stc2  p1, cr10, [r0], #4 /* wstrw wcgr2, [r0], #4 */
+	stc2  p1, cr11, [r0], #4 /* wstrw wcgr3, [r0], #4 */
+	RET
+
+/* Wrappers to save core registers, then call the real routine.   */
+
+.macro  UNWIND_WRAPPER name nargs
+	ARM_FUNC_START \name
+	/* Create a phase2_vrs structure.  */
+	/* Split reg push in two to ensure the correct value for sp.  */
+#if defined(__thumb2__)
+	mov ip, sp
+	push {lr} /* PC is ignored.  */
+	push {ip, lr} /* Push original SP and LR.  */
+#else
+	stmfd sp!, {sp, lr, pc}
+#endif
+	stmfd sp!, {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip}
+	
+	/* Demand-save flags, plus an extra word for alignment.  */
+	mov r3, #0
+	stmfd sp!, {r2, r3}
+
+	/* Point r1 at the block.  Pass r[0..nargs) unchanged.  */
+	add r\nargs, sp, #4
+#if defined(__thumb__) && !defined(__thumb2__)
+	/* Switch back to thumb mode to avoid interworking hassle.  */
+	adr ip, .L1_\name
+	orr ip, ip, #1
+	bx ip
+	.thumb
+.L1_\name:
+	bl SYM (__gnu\name) __PLT__
+	ldr r3, [sp, #64]
+	add sp, #72
+	bx r3
+#else
+	bl SYM (__gnu\name) __PLT__
+	ldr lr, [sp, #64]
+	add sp, sp, #72
+	RET
+#endif
+	FUNC_END \name
+	UNPREFIX \name
+.endm
+
+#endif /* !__ARM_ARCH_6M__ */
+
+UNWIND_WRAPPER _Unwind_RaiseException 1
+UNWIND_WRAPPER _Unwind_Resume 1
+UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1
+UNWIND_WRAPPER _Unwind_ForcedUnwind 3
+UNWIND_WRAPPER _Unwind_Backtrace 2
+
+#endif  /* ndef __symbian__ */
diff --git a/gcc/config/arm/linux-atomic.c b/gcc/config/arm/linux-atomic.c
new file mode 100644
index 000000000..57065a6e8
--- /dev/null
+++ b/gcc/config/arm/linux-atomic.c
@@ -0,0 +1,278 @@
+/* Linux-specific atomic operations for ARM EABI.
+   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Kernel helper for compare-and-exchange.  */
+typedef int (__kernel_cmpxchg_t) (int oldval, int newval, int *ptr);
+#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *) 0xffff0fc0)
+
+/* Kernel helper for memory barrier.  */
+typedef void (__kernel_dmb_t) (void);
+#define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0)
+
+/* Note: we implement byte, short and int versions of atomic operations using
+   the above kernel helpers, but there is no support for "long long" (64-bit)
+   operations as yet.  */
+
+#define HIDDEN __attribute__ ((visibility ("hidden")))
+
+#ifdef __ARMEL__
+#define INVERT_MASK_1 0
+#define INVERT_MASK_2 0
+#else
+#define INVERT_MASK_1 24
+#define INVERT_MASK_2 16
+#endif
+
+#define MASK_1 0xffu
+#define MASK_2 0xffffu
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+  {									\
+    int failure, tmp;							\
+									\
+    do {								\
+      tmp = *ptr;							\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);						\
+									\
+    return tmp;								\
+  }
+
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
+   subword-sized quantities.  */
+
+#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+  TYPE HIDDEN								\
+  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  {									\
+    int *wordptr = (int *) ((unsigned int) ptr & ~3);			\
+    unsigned int mask, shift, oldval, newval;				\
+    int failure;							\
+									\
+    shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = *wordptr;						\
+      newval = ((PFX_OP (((oldval & mask) >> shift)			\
+			 INF_OP (unsigned int) val)) << shift) & mask;	\
+      newval |= oldval & ~mask;						\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (failure != 0);						\
+									\
+    return (RETURN & mask) >> shift;					\
+  }
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
+
+#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
+  {									\
+    int tmp, failure;							\
+									\
+    do {								\
+      tmp = *ptr;							\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);						\
+									\
+    return PFX_OP (tmp INF_OP val);					\
+  }
+
+OP_AND_FETCH_WORD (add,   , +)
+OP_AND_FETCH_WORD (sub,   , -)
+OP_AND_FETCH_WORD (or,    , |)
+OP_AND_FETCH_WORD (and,   , &)
+OP_AND_FETCH_WORD (xor,   , ^)
+OP_AND_FETCH_WORD (nand, ~, &)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+
+int HIDDEN
+__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int actual_oldval, fail;
+    
+  while (1)
+    {
+      actual_oldval = *ptr;
+
+      if (__builtin_expect (oldval != actual_oldval, 0))
+	return actual_oldval;
+
+      fail = __kernel_cmpxchg (actual_oldval, newval, ptr);
+  
+      if (__builtin_expect (!fail, 1))
+        return oldval;
+    }
+}
+
+#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
+  TYPE HIDDEN								\
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+				       TYPE newval)			\
+  {									\
+    int *wordptr = (int *)((unsigned int) ptr & ~3), fail;		\
+    unsigned int mask, shift, actual_oldval, actual_newval;		\
+									\
+    shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    while (1)								\
+      {									\
+	actual_oldval = *wordptr;					\
+									\
+	if (__builtin_expect (((actual_oldval & mask) >> shift) !=      \
+                              (unsigned int) oldval, 0))                \
+          return (actual_oldval & mask) >> shift;			\
+									\
+	actual_newval = (actual_oldval & ~mask)				\
+			| (((unsigned int) newval << shift) & mask);	\
+									\
+	fail = __kernel_cmpxchg (actual_oldval, actual_newval,		\
+				 wordptr);				\
+									\
+      if (__builtin_expect (!fail, 1))                                  \
+          return oldval;						\
+      }									\
+  }
+
+SUBWORD_VAL_CAS (unsigned short, 2)
+SUBWORD_VAL_CAS (unsigned char,  1)
+
+typedef unsigned char bool;
+
+bool HIDDEN
+__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int failure = __kernel_cmpxchg (oldval, newval, ptr);
+  return (failure == 0);
+}
+
+#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
+  bool HIDDEN								\
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+					TYPE newval)			\
+  {									\
+    TYPE actual_oldval							\
+      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
+    return (oldval == actual_oldval);					\
+  }
+
+SUBWORD_BOOL_CAS (unsigned short, 2)
+SUBWORD_BOOL_CAS (unsigned char,  1)
+
+void HIDDEN
+__sync_synchronize (void)
+{
+  __kernel_dmb ();
+}
+
+int HIDDEN
+__sync_lock_test_and_set_4 (int *ptr, int val)
+{
+  int failure, oldval;
+
+  do {
+    oldval = *ptr;
+    failure = __kernel_cmpxchg (oldval, val, ptr);
+  } while (failure != 0);
+
+  return oldval;
+}
+
+#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
+  TYPE HIDDEN								\
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
+  {									\
+    int failure;							\
+    unsigned int oldval, newval, shift, mask;				\
+    int *wordptr = (int *) ((unsigned int) ptr & ~3);			\
+									\
+    shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = *wordptr;						\
+      newval = (oldval & ~mask)						\
+	       | (((unsigned int) val << shift) & mask);		\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (failure != 0);						\
+									\
+    return (oldval & mask) >> shift;					\
+  }
+
+SUBWORD_TEST_AND_SET (unsigned short, 2)
+SUBWORD_TEST_AND_SET (unsigned char,  1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
+  void HIDDEN								\
+  __sync_lock_release_##WIDTH (TYPE *ptr)				\
+  {									\
+    /* All writes before this point must be seen before we release	\
+       the lock itself.  */						\
+    __kernel_dmb ();							\
+    *ptr = 0;								\
+  }
+
+SYNC_LOCK_RELEASE (int,   4)
+SYNC_LOCK_RELEASE (short, 2)
+SYNC_LOCK_RELEASE (char,  1)
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
new file mode 100644
index 000000000..833005284
--- /dev/null
+++ b/gcc/config/arm/linux-eabi.h
@@ -0,0 +1,103 @@
+/* Configuration file for ARM GNU/Linux EABI targets.
+   Copyright (C) 2004, 2005, 2006, 2007, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC   
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* On EABI GNU/Linux, we want both the BPABI builtins and the
+   GNU/Linux builtins.  */
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      TARGET_BPABI_CPP_BUILTINS();		\
+      LINUX_TARGET_OS_CPP_BUILTINS();		\
+      ANDROID_TARGET_OS_CPP_BUILTINS();		\
+    }						\
+  while (false)
+
+/* We default to a soft-float ABI so that binaries can run on all
+   target hardware.  */
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
+
+/* We default to the "aapcs-linux" ABI so that enums are int-sized by
+   default.  */
+#undef  ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX
+
+/* Default to armv5t so that thumb shared libraries work.
+   The ARM10TDMI core is the default for armv5t, so set
+   SUBTARGET_CPU_DEFAULT to achieve this.  */
+#undef  SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#undef  TARGET_LINKER_EMULATION
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_LINKER_EMULATION "armelfb_linux_eabi"
+#else
+#define TARGET_LINKER_EMULATION "armelf_linux_eabi"
+#endif
+
+#undef  SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION
+
+/* Use ld-linux.so.3 so that it will be possible to run "classic"
+   GNU/Linux binaries on an EABI system.  */
+#undef  GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.3"
+
+/* At this point, bpabi.h will have clobbered LINK_SPEC.  We want to
+   use the GNU/Linux version, not the generic BPABI version.  */
+#undef  LINK_SPEC
+#define LINK_SPEC BE8_LINK_SPEC						\
+  LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC,				\
+		       LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef  CC1_SPEC
+#define CC1_SPEC							\
+  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC,			\
+		       GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC)
+
+#define CC1PLUS_SPEC \
+  LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+
+#undef  LIB_SPEC
+#define LIB_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
+		       GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+/* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we
+   do not use -lfloat.  */
+#undef LIBGCC_SPEC
+
+/* Clear the instruction cache from `beg' to `end'.  This is
+   implemented in lib1funcs.asm, so ensure an error if this definition
+   is used.  */
+#undef  CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END) not_used
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
new file mode 100644
index 000000000..81d27bb72
--- /dev/null
+++ b/gcc/config/arm/linux-elf.h
@@ -0,0 +1,120 @@
+/* Definitions for ARM running Linux-based GNU systems using ELF
+   Copyright (C) 1993, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Philip Blundell <philb@gnu.org>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* elfos.h should have already been included.  Now just override
+   any conflicting definitions and add any extras.  */
+
+/* Run-time Target Specification.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION  fputs (" (ARM GNU/Linux with ELF)", stderr);
+
+#undef  TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD
+
+/* TARGET_BIG_ENDIAN_DEFAULT is set in
+   config.gcc for big endian configurations.  */
+#if TARGET_BIG_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT    MASK_BIG_END
+#define TARGET_ENDIAN_OPTION     "mbig-endian"
+#define TARGET_LINKER_EMULATION  "armelfb_linux"
+#else
+#define TARGET_ENDIAN_DEFAULT    0
+#define TARGET_ENDIAN_OPTION     "mlittle-endian"
+#define TARGET_LINKER_EMULATION  "armelf_linux"
+#endif
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (TARGET_ENDIAN_DEFAULT)
+
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6
+
+#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p"
+
+#undef  MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+	{ "marm", "mlittle-endian", "mhard-float", "mno-thumb-interwork" }
+
+/* Now we define the strings used to build the spec file.  */
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define LIBGCC_SPEC "%{msoft-float:-lfloat} %{mfloat-abi=soft*:-lfloat} -lgcc"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#define LINUX_TARGET_LINK_SPEC  "%{h*} \
+   %{static:-Bstatic} \
+   %{shared:-shared} \
+   %{symbolic:-Bsymbolic} \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker " LINUX_DYNAMIC_LINKER " \
+   -X \
+   %{mbig-endian:-EB} %{mlittle-endian:-EL}" \
+   SUBTARGET_EXTRA_LINK_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	LINUX_TARGET_OS_CPP_BUILTINS();		\
+    }						\
+  while (0)
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \
+  do					   \
+    {					   \
+      assemble_name (FILE, NAME1); 	   \
+      fputs (" = ", FILE);		   \
+      assemble_name (FILE, NAME2);	   \
+      fputc ('\n', FILE);		   \
+    }					   \
+  while (0)
+
+/* NWFPE always understands FPA instructions.  */
+#undef  FPUTYPE_DEFAULT
+#define FPUTYPE_DEFAULT "fpe3"
+
+/* Call the function profiler with a given profile label.  */
+#undef  ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)  			\
+{									\
+  fprintf (STREAM, "\tbl\tmcount%s\n",					\
+	   (TARGET_ARM && NEED_PLT_RELOC) ? "(PLT)" : "");		\
+}
+
+/* The GNU/Linux profiler clobbers the link register.  Make sure the
+   prologue knows to save it.  */
+#define PROFILE_HOOK(X)						\
+  emit_clobber (gen_rtx_REG (SImode, LR_REGNUM))
+
+/* The GNU/Linux profiler needs a frame pointer.  */
+#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile
+
+/* Add .note.GNU-stack.  */
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK	1
diff --git a/gcc/config/arm/linux-gas.h b/gcc/config/arm/linux-gas.h
new file mode 100644
index 000000000..9b6fcde2b
--- /dev/null
+++ b/gcc/config/arm/linux-gas.h
@@ -0,0 +1,56 @@
+/* Definitions of target machine for GNU compiler.
+   ARM Linux-based GNU systems version.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2004, 2007
+   Free Software Foundation, Inc.
+   Contributed by Russell King  <rmk92@ecs.soton.ac.uk>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is how we tell the assembler that a symbol is weak.
+   GAS always supports weak symbols.  */
+
+/* Unsigned chars produces much better code than signed.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Use the AAPCS type for wchar_t, or the previous Linux default for
+   non-AAPCS.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long int")
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("a1") = (unsigned long) (BEG);	\
+  register unsigned long _end __asm ("a2") = (unsigned long) (END);	\
+  register unsigned long _flg __asm ("a3") = 0;				\
+  __asm __volatile ("swi 0x9f0002		@ sys_cacheflush"	\
+		    : "=r" (_beg)					\
+		    : "0" (_beg), "r" (_end), "r" (_flg));		\
+}
diff --git a/gcc/config/arm/mmintrin.h b/gcc/config/arm/mmintrin.h
new file mode 100644
index 000000000..2cc500de3
--- /dev/null
+++ b/gcc/config/arm/mmintrin.h
@@ -0,0 +1,1254 @@
+/* Copyright (C) 2002, 2003, 2004, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+/* The data type intended for user use.  */
+typedef unsigned long long __m64, __int64;
+
+/* Internal data types for implementing the intrinsics.  */
+typedef int __v2si __attribute__ ((vector_size (8)));
+typedef short __v4hi __attribute__ ((vector_size (8)));
+typedef char __v8qi __attribute__ ((vector_size (8)));
+
+/* "Convert" __m64 and __int64 into each other.  */
+static __inline __m64 
+_mm_cvtsi64_m64 (__int64 __i)
+{
+  return __i;
+}
+
+static __inline __int64
+_mm_cvtm64_si64 (__m64 __i)
+{
+  return __i;
+}
+
+static __inline int
+_mm_cvtsi64_si32 (__int64 __i)
+{
+  return __i;
+}
+
+static __inline __int64
+_mm_cvtsi32_si64 (int __i)
+{
+  return __i;
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with signed saturation.  */
+static __inline __m64
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with signed saturation.  */
+static __inline __m64
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
+   the 64-bit value from M2 into the upper 32-bits of the result, all with
+   signed saturation for values that do not fit exactly into 32-bits.  */
+static __inline __m64
+_mm_packs_pi64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackdss ((long long)__m1, (long long)__m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with unsigned saturation.  */
+static __inline __m64
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Pack the two 32-bit values from M1 into the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with unsigned saturation.  */
+static __inline __m64
+_mm_packs_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
+   the 64-bit value from M2 into the upper 32-bits of the result, all with
+   unsigned saturation for values that do not fit exactly into 32-bits.  */
+static __inline __m64
+_mm_packs_pu64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackdus ((long long)__m1, (long long)__m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+   8-bit values from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+   16-bit values from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+   value from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+   8-bit values from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+   16-bit values from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+   value from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Take the four 8-bit values from the low half of M1, sign extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pi8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsb ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the low half of M1, sign extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pi16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the low half of M1, and return it sign extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackel_pi32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the high half of M1, sign extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pi8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsb ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the high half of M1, sign extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pi16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the high half of M1, and return it sign extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackeh_pi32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the low half of M1, zero extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pu8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelub ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the low half of M1, zero extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pu16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckeluh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the low half of M1, and return it zero extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackel_pu32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckeluw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the high half of M1, zero extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pu8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehub ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the high half of M1, zero extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pu16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehuh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the high half of M1, and return it zero extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackeh_pu32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehuw ((__v2si)__m1);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
+static __inline __m64
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
+static __inline __m64
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
+static __inline __m64
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddbss ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddbus ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+   saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubbss ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   signed saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
+   signed saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubbus ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+static __inline __m64
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmadds ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+static __inline __m64
+_mm_madd_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmaddu ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+static __inline __m64
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulsm ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+static __inline __m64
+_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulum ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+   the low 16 bits of the results.  */
+static __inline __m64
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulul ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT.  */
+static __inline __m64
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsllh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsllhi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT.  */
+static __inline __m64
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsllw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsllwi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT.  */
+static __inline __m64
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wslld (__m, __count);
+}
+
+static __inline __m64
+_mm_slli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wslldi (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrah ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrahi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsraw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrawi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrad (__m, __count);
+}
+
+static __inline __m64
+_mm_srai_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsradi (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrlh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrlhi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrlw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrlwi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrld (__m, __count);
+}
+
+static __inline __m64
+_mm_srli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrldi (__m, __count);
+}
+
+/* Rotate four 16-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrorh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_rori_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrorhi ((__v4hi)__m, __count);
+}
+
+/* Rotate two 32-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrorw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_rori_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrorwi ((__v2si)__m, __count);
+}
+
+/* Rotate two 64-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrord (__m, __count);
+}
+
+static __inline __m64
+_mm_rori_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrordi (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wand (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+   64-bit value in M2.  */
+static __inline __m64
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wandn (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wor (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wxor (__m1, __m2);
+}
+
+/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
+   test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtub ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
+   the test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtuh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
+   the test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqw ((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsw ((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtuw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
+   by accumulate across all elements and __A.  */
+static __inline __m64
+_mm_mac_pu16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return __builtin_arm_wmacu (__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+/* Element-wise multiplication of signed 16-bit values __B and __C, followed
+   by accumulate across all elements and __A.  */
+static __inline __m64
+_mm_mac_pi16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return __builtin_arm_wmacs (__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
+   by accumulate across all elements.  */
+static __inline __m64
+_mm_macz_pu16 (__m64 __A, __m64 __B)
+{
+  return __builtin_arm_wmacuz ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Element-wise multiplication of signed 16-bit values __B and __C, followed
+   by accumulate across all elements.  */
+static __inline __m64
+_mm_macz_pi16 (__m64 __A, __m64 __B)
+{
+  return __builtin_arm_wmacsz ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Accumulate across all unsigned 8-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu8 (__m64 __A)
+{
+  return __builtin_arm_waccb ((__v8qi)__A);
+}
+
+/* Accumulate across all unsigned 16-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu16 (__m64 __A)
+{
+  return __builtin_arm_wacch ((__v4hi)__A);
+}
+
+/* Accumulate across all unsigned 32-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu32 (__m64 __A)
+{
+  return __builtin_arm_waccw ((__v2si)__A);
+}
+
+static __inline __m64
+_mm_mia_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmia (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miaph_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiaph (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miabb_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiabb (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miabt_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiabt (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miatb_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiatb (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miatt_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiatt (__A, __B, __C);
+}
+
+/* Extract one of the elements of A and sign extend.  The selector N must
+   be immediate.  */
+#define _mm_extract_pi8(A, N) __builtin_arm_textrmsb ((__v8qi)(A), (N))
+#define _mm_extract_pi16(A, N) __builtin_arm_textrmsh ((__v4hi)(A), (N))
+#define _mm_extract_pi32(A, N) __builtin_arm_textrmsw ((__v2si)(A), (N))
+
+/* Extract one of the elements of A and zero extend.  The selector N must
+   be immediate.  */
+#define _mm_extract_pu8(A, N) __builtin_arm_textrmub ((__v8qi)(A), (N))
+#define _mm_extract_pu16(A, N) __builtin_arm_textrmuh ((__v4hi)(A), (N))
+#define _mm_extract_pu32(A, N) __builtin_arm_textrmuw ((__v2si)(A), (N))
+
+/* Inserts word D into one of the elements of A.  The selector N must be
+   immediate.  */
+#define _mm_insert_pi8(A, D, N) \
+  ((__m64) __builtin_arm_tinsrb ((__v8qi)(A), (D), (N)))
+#define _mm_insert_pi16(A, D, N) \
+  ((__m64) __builtin_arm_tinsrh ((__v4hi)(A), (D), (N)))
+#define _mm_insert_pi32(A, D, N) \
+  ((__m64) __builtin_arm_tinsrw ((__v2si)(A), (D), (N)))
+
+/* Compute the element-wise maximum of signed 8-bit values.  */
+static __inline __m64
+_mm_max_pi8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+static __inline __m64
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise maximum of signed 32-bit values.  */
+static __inline __m64
+_mm_max_pi32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+static __inline __m64
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_max_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxuh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 32-bit values.  */
+static __inline __m64
+_mm_max_pu32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxuw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+static __inline __m64
+_mm_min_pi8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+static __inline __m64
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise minimum of signed 32-bit values.  */
+static __inline __m64
+_mm_min_pi32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_min_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminuh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 32-bit values.  */
+static __inline __m64
+_mm_min_pu32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminuw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+static __inline int
+_mm_movemask_pi8 (__m64 __A)
+{
+  return __builtin_arm_tmovmskb ((__v8qi)__A);
+}
+
+/* Create an 8-bit mask of the signs of 16-bit values.  */
+static __inline int
+_mm_movemask_pi16 (__m64 __A)
+{
+  return __builtin_arm_tmovmskh ((__v4hi)__A);
+}
+
+/* Create an 8-bit mask of the signs of 32-bit values.  */
+static __inline int
+_mm_movemask_pi32 (__m64 __A)
+{
+  return __builtin_arm_tmovmskw ((__v2si)__A);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+#define _mm_shuffle_pi16(A, N) \
+  ((__m64) __builtin_arm_wshufh ((__v4hi)(A), (N)))
+
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+static __inline __m64
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2br ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+static __inline __m64
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2hr ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the averages of the unsigned 8-bit values in A and B.  */
+static __inline __m64
+_mm_avg2_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2b ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the averages of the unsigned 16-bit values in A and B.  */
+static __inline __m64
+_mm_avg2_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2h ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 16-bit
+   values in A and B.  Return the value in the lower 32-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sad_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sadz_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 16-bit
+   values in A and B.  Return the value in the lower 32-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sadz_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
+}
+
+static __inline __m64
+_mm_align_si64 (__m64 __A, __m64 __B, int __C)
+{
+  return (__m64) __builtin_arm_walign ((__v8qi)__A, (__v8qi)__B, __C);
+}
+
+/* Creates a 64-bit zero.  */
+static __inline __m64
+_mm_setzero_si64 (void)
+{
+  return __builtin_arm_wzero ();
+}
+
+/* Set and Get arbitrary iWMMXt Control registers.
+   Note only registers 0-3 and 8-11 are currently defined,
+   the rest are reserved.  */
+
+static __inline void
+_mm_setwcx (const int __value, const int __regno)
+{
+  switch (__regno)
+    {
+    case 0:  __builtin_arm_setwcx (__value, 0); break;
+    case 1:  __builtin_arm_setwcx (__value, 1); break;
+    case 2:  __builtin_arm_setwcx (__value, 2); break;
+    case 3:  __builtin_arm_setwcx (__value, 3); break;
+    case 8:  __builtin_arm_setwcx (__value, 8); break;
+    case 9:  __builtin_arm_setwcx (__value, 9); break;
+    case 10: __builtin_arm_setwcx (__value, 10); break;
+    case 11: __builtin_arm_setwcx (__value, 11); break;
+    default: break;
+    }
+}
+
+static __inline int
+_mm_getwcx (const int __regno)
+{
+  switch (__regno)
+    {
+    case 0:  return __builtin_arm_getwcx (0);
+    case 1:  return __builtin_arm_getwcx (1);
+    case 2:  return __builtin_arm_getwcx (2);
+    case 3:  return __builtin_arm_getwcx (3);
+    case 8:  return __builtin_arm_getwcx (8);
+    case 9:  return __builtin_arm_getwcx (9);
+    case 10: return __builtin_arm_getwcx (10);
+    case 11: return __builtin_arm_getwcx (11);
+    default: return 0;
+    }
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant.  */
+static __inline __m64
+_mm_set_pi32 (int __i1, int __i0)
+{
+  union {
+    __m64 __q;
+    struct {
+      unsigned int __i0;
+      unsigned int __i1;
+    } __s;
+  } __u;
+
+  __u.__s.__i0 = __i0;
+  __u.__s.__i1 = __i1;
+
+  return __u.__q;
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant.  */
+static __inline __m64
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+  unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2;
+  unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0;
+  return _mm_set_pi32 (__i1, __i0);
+		       
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant.  */
+static __inline __m64
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+	     char __b3, char __b2, char __b1, char __b0)
+{
+  unsigned int __i1, __i0;
+
+  __i1 = (unsigned char)__b7;
+  __i1 = __i1 << 8 | (unsigned char)__b6;
+  __i1 = __i1 << 8 | (unsigned char)__b5;
+  __i1 = __i1 << 8 | (unsigned char)__b4;
+
+  __i0 = (unsigned char)__b3;
+  __i0 = __i0 << 8 | (unsigned char)__b2;
+  __i0 = __i0 << 8 | (unsigned char)__b1;
+  __i0 = __i0 << 8 | (unsigned char)__b0;
+
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+/* Similar, but with the arguments in reverse order.  */
+static __inline __m64
+_mm_setr_pi32 (int __i0, int __i1)
+{
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+static __inline __m64
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+static __inline __m64
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+	      char __b4, char __b5, char __b6, char __b7)
+{
+  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I.  */
+static __inline __m64
+_mm_set1_pi32 (int __i)
+{
+  return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W.  */
+static __inline __m64
+_mm_set1_pi16 (short __w)
+{
+  unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
+  return _mm_set1_pi32 (__i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing B.  */
+static __inline __m64
+_mm_set1_pi8 (char __b)
+{
+  unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
+  unsigned int __i = __w << 16 | __w;
+  return _mm_set1_pi32 (__i);
+}
+
+/* Convert an integer to a __m64 object.  */
+static __inline __m64
+_m_from_int (int __a)
+{
+  return (__m64)__a;
+}
+
+#define _m_packsswb _mm_packs_pi16
+#define _m_packssdw _mm_packs_pi32
+#define _m_packuswb _mm_packs_pu16
+#define _m_packusdw _mm_packs_pu32
+#define _m_packssqd _mm_packs_pi64
+#define _m_packusqd _mm_packs_pu64
+#define _mm_packs_si64 _mm_packs_pi64
+#define _mm_packs_su64 _mm_packs_pu64
+#define _m_punpckhbw _mm_unpackhi_pi8
+#define _m_punpckhwd _mm_unpackhi_pi16
+#define _m_punpckhdq _mm_unpackhi_pi32
+#define _m_punpcklbw _mm_unpacklo_pi8
+#define _m_punpcklwd _mm_unpacklo_pi16
+#define _m_punpckldq _mm_unpacklo_pi32
+#define _m_punpckehsbw _mm_unpackeh_pi8
+#define _m_punpckehswd _mm_unpackeh_pi16
+#define _m_punpckehsdq _mm_unpackeh_pi32
+#define _m_punpckehubw _mm_unpackeh_pu8
+#define _m_punpckehuwd _mm_unpackeh_pu16
+#define _m_punpckehudq _mm_unpackeh_pu32
+#define _m_punpckelsbw _mm_unpackel_pi8
+#define _m_punpckelswd _mm_unpackel_pi16
+#define _m_punpckelsdq _mm_unpackel_pi32
+#define _m_punpckelubw _mm_unpackel_pu8
+#define _m_punpckeluwd _mm_unpackel_pu16
+#define _m_punpckeludq _mm_unpackel_pu32
+#define _m_paddb _mm_add_pi8
+#define _m_paddw _mm_add_pi16
+#define _m_paddd _mm_add_pi32
+#define _m_paddsb _mm_adds_pi8
+#define _m_paddsw _mm_adds_pi16
+#define _m_paddsd _mm_adds_pi32
+#define _m_paddusb _mm_adds_pu8
+#define _m_paddusw _mm_adds_pu16
+#define _m_paddusd _mm_adds_pu32
+#define _m_psubb _mm_sub_pi8
+#define _m_psubw _mm_sub_pi16
+#define _m_psubd _mm_sub_pi32
+#define _m_psubsb _mm_subs_pi8
+#define _m_psubsw _mm_subs_pi16
+#define _m_psubuw _mm_subs_pi32
+#define _m_psubusb _mm_subs_pu8
+#define _m_psubusw _mm_subs_pu16
+#define _m_psubusd _mm_subs_pu32
+#define _m_pmaddwd _mm_madd_pi16
+#define _m_pmadduwd _mm_madd_pu16
+#define _m_pmulhw _mm_mulhi_pi16
+#define _m_pmulhuw _mm_mulhi_pu16
+#define _m_pmullw _mm_mullo_pi16
+#define _m_pmacsw _mm_mac_pi16
+#define _m_pmacuw _mm_mac_pu16
+#define _m_pmacszw _mm_macz_pi16
+#define _m_pmacuzw _mm_macz_pu16
+#define _m_paccb _mm_acc_pu8
+#define _m_paccw _mm_acc_pu16
+#define _m_paccd _mm_acc_pu32
+#define _m_pmia _mm_mia_si64
+#define _m_pmiaph _mm_miaph_si64
+#define _m_pmiabb _mm_miabb_si64
+#define _m_pmiabt _mm_miabt_si64
+#define _m_pmiatb _mm_miatb_si64
+#define _m_pmiatt _mm_miatt_si64
+#define _m_psllw _mm_sll_pi16
+#define _m_psllwi _mm_slli_pi16
+#define _m_pslld _mm_sll_pi32
+#define _m_pslldi _mm_slli_pi32
+#define _m_psllq _mm_sll_si64
+#define _m_psllqi _mm_slli_si64
+#define _m_psraw _mm_sra_pi16
+#define _m_psrawi _mm_srai_pi16
+#define _m_psrad _mm_sra_pi32
+#define _m_psradi _mm_srai_pi32
+#define _m_psraq _mm_sra_si64
+#define _m_psraqi _mm_srai_si64
+#define _m_psrlw _mm_srl_pi16
+#define _m_psrlwi _mm_srli_pi16
+#define _m_psrld _mm_srl_pi32
+#define _m_psrldi _mm_srli_pi32
+#define _m_psrlq _mm_srl_si64
+#define _m_psrlqi _mm_srli_si64
+#define _m_prorw _mm_ror_pi16
+#define _m_prorwi _mm_rori_pi16
+#define _m_prord _mm_ror_pi32
+#define _m_prordi _mm_rori_pi32
+#define _m_prorq _mm_ror_si64
+#define _m_prorqi _mm_rori_si64
+#define _m_pand _mm_and_si64
+#define _m_pandn _mm_andnot_si64
+#define _m_por _mm_or_si64
+#define _m_pxor _mm_xor_si64
+#define _m_pcmpeqb _mm_cmpeq_pi8
+#define _m_pcmpeqw _mm_cmpeq_pi16
+#define _m_pcmpeqd _mm_cmpeq_pi32
+#define _m_pcmpgtb _mm_cmpgt_pi8
+#define _m_pcmpgtub _mm_cmpgt_pu8
+#define _m_pcmpgtw _mm_cmpgt_pi16
+#define _m_pcmpgtuw _mm_cmpgt_pu16
+#define _m_pcmpgtd _mm_cmpgt_pi32
+#define _m_pcmpgtud _mm_cmpgt_pu32
+#define _m_pextrb _mm_extract_pi8
+#define _m_pextrw _mm_extract_pi16
+#define _m_pextrd _mm_extract_pi32
+#define _m_pextrub _mm_extract_pu8
+#define _m_pextruw _mm_extract_pu16
+#define _m_pextrud _mm_extract_pu32
+#define _m_pinsrb _mm_insert_pi8
+#define _m_pinsrw _mm_insert_pi16
+#define _m_pinsrd _mm_insert_pi32
+#define _m_pmaxsb _mm_max_pi8
+#define _m_pmaxsw _mm_max_pi16
+#define _m_pmaxsd _mm_max_pi32
+#define _m_pmaxub _mm_max_pu8
+#define _m_pmaxuw _mm_max_pu16
+#define _m_pmaxud _mm_max_pu32
+#define _m_pminsb _mm_min_pi8
+#define _m_pminsw _mm_min_pi16
+#define _m_pminsd _mm_min_pi32
+#define _m_pminub _mm_min_pu8
+#define _m_pminuw _mm_min_pu16
+#define _m_pminud _mm_min_pu32
+#define _m_pmovmskb _mm_movemask_pi8
+#define _m_pmovmskw _mm_movemask_pi16
+#define _m_pmovmskd _mm_movemask_pi32
+#define _m_pshufw _mm_shuffle_pi16
+#define _m_pavgb _mm_avg_pu8
+#define _m_pavgw _mm_avg_pu16
+#define _m_pavg2b _mm_avg2_pu8
+#define _m_pavg2w _mm_avg2_pu16
+#define _m_psadbw _mm_sad_pu8
+#define _m_psadwd _mm_sad_pu16
+#define _m_psadzbw _mm_sadz_pu8
+#define _m_psadzwd _mm_sadz_pu16
+#define _m_paligniq _mm_align_si64
+#define _m_cvt_si2pi _mm_cvtsi64_m64
+#define _m_cvt_pi2si _mm_cvtm64_si64
+
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc/config/arm/neon-docgen.ml b/gcc/config/arm/neon-docgen.ml
new file mode 100644
index 000000000..23e37b498
--- /dev/null
+++ b/gcc/config/arm/neon-docgen.ml
@@ -0,0 +1,337 @@
+(* ARM NEON documentation generator.
+
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Compile with:
+     ocamlc -c neon.ml
+     ocamlc -o neon-docgen neon.cmo neon-docgen.ml
+
+   Run with:
+     /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
+*)
+
+open Neon
+
+(* The combined "ops" and "reinterp" table.  *)
+let ops_reinterp = reinterp @ ops
+
+(* Helper functions for extracting things from the "ops" table.  *)
+let single_opcode desired_opcode () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        if opcode = desired_opcode then row :: got_so_far
+                                                   else got_so_far
+                 ) [] ops_reinterp
+
+let multiple_opcodes desired_opcodes () =
+  List.fold_left (fun got_so_far ->
+                  fun desired_opcode ->
+                    (single_opcode desired_opcode ()) @ got_so_far)
+                 [] desired_opcodes
+
+let ldx_opcode number () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
+                            row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+let stx_opcode number () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vstx n | Vstx_lane n when n = number ->
+                            row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+let tbl_opcode () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vtbl _ -> row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+let tbx_opcode () =
+  List.fold_left (fun got_so_far ->
+                  fun row ->
+                    match row with
+                      (opcode, _, _, _, _, _) ->
+                        match opcode with
+                          Vtbx _ -> row :: got_so_far
+                          | _ -> got_so_far
+                 ) [] ops_reinterp
+
+(* The groups of intrinsics.  *)
+let intrinsic_groups =
+  [ "Addition", single_opcode Vadd;
+    "Multiplication", single_opcode Vmul;
+    "Multiply-accumulate", single_opcode Vmla;
+    "Multiply-subtract", single_opcode Vmls;
+    "Subtraction", single_opcode Vsub;
+    "Comparison (equal-to)", single_opcode Vceq;
+    "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
+    "Comparison (less-than-or-equal-to)", single_opcode Vcle;
+    "Comparison (greater-than)", single_opcode Vcgt;
+    "Comparison (less-than)", single_opcode Vclt;
+    "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
+    "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
+    "Comparison (absolute greater-than)", single_opcode Vcagt;
+    "Comparison (absolute less-than)", single_opcode Vcalt;
+    "Test bits", single_opcode Vtst;
+    "Absolute difference", single_opcode Vabd;
+    "Absolute difference and accumulate", single_opcode Vaba;
+    "Maximum", single_opcode Vmax;
+    "Minimum", single_opcode Vmin;
+    "Pairwise add", single_opcode Vpadd;
+    "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
+    "Folding maximum", single_opcode Vpmax;
+    "Folding minimum", single_opcode Vpmin;
+    "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
+    "Vector shift left", single_opcode Vshl;
+    "Vector shift left by constant", single_opcode Vshl_n;
+    "Vector shift right by constant", single_opcode Vshr_n;
+    "Vector shift right by constant and accumulate", single_opcode Vsra_n;
+    "Vector shift right and insert", single_opcode Vsri;
+    "Vector shift left and insert", single_opcode Vsli;
+    "Absolute value", single_opcode Vabs;
+    "Negation", single_opcode Vneg;
+    "Bitwise not", single_opcode Vmvn;
+    "Count leading sign bits", single_opcode Vcls;
+    "Count leading zeros", single_opcode Vclz;
+    "Count number of set bits", single_opcode Vcnt;
+    "Reciprocal estimate", single_opcode Vrecpe;
+    "Reciprocal square-root estimate", single_opcode Vrsqrte;
+    "Get lanes from a vector", single_opcode Vget_lane;
+    "Set lanes in a vector", single_opcode Vset_lane;
+    "Create vector from literal bit pattern", single_opcode Vcreate;
+    "Set all lanes to the same value",
+      multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
+    "Combining vectors", single_opcode Vcombine;
+    "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
+    "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
+    "Move, single_opcode narrowing", single_opcode Vmovn;
+    "Move, single_opcode long", single_opcode Vmovl;
+    "Table lookup", tbl_opcode;
+    "Extended table lookup", tbx_opcode;
+    "Multiply, lane", single_opcode Vmul_lane;
+    "Long multiply, lane", single_opcode Vmull_lane;
+    "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
+    "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
+    "Multiply-accumulate, lane", single_opcode Vmla_lane;
+    "Multiply-subtract, lane", single_opcode Vmls_lane;
+    "Vector multiply by scalar", single_opcode Vmul_n;
+    "Vector long multiply by scalar", single_opcode Vmull_n;
+    "Vector saturating doubling long multiply by scalar",
+      single_opcode Vqdmull_n;
+    "Vector saturating doubling multiply high by scalar",
+      single_opcode Vqdmulh_n;
+    "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
+    "Vector multiply-subtract by scalar", single_opcode Vmls_n;
+    "Vector extract", single_opcode Vext;
+    "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
+    "Bit selection", single_opcode Vbsl;
+    "Transpose elements", single_opcode Vtrn;
+    "Zip elements", single_opcode Vzip;
+    "Unzip elements", single_opcode Vuzp;
+    "Element/structure loads, VLD1 variants", ldx_opcode 1;
+    "Element/structure stores, VST1 variants", stx_opcode 1;
+    "Element/structure loads, VLD2 variants", ldx_opcode 2;
+    "Element/structure stores, VST2 variants", stx_opcode 2;
+    "Element/structure loads, VLD3 variants", ldx_opcode 3;
+    "Element/structure stores, VST3 variants", stx_opcode 3;
+    "Element/structure loads, VLD4 variants", ldx_opcode 4;
+    "Element/structure stores, VST4 variants", stx_opcode 4;
+    "Logical operations (AND)", single_opcode Vand;
+    "Logical operations (OR)", single_opcode Vorr;
+    "Logical operations (exclusive OR)", single_opcode Veor;
+    "Logical operations (AND-NOT)", single_opcode Vbic;
+    "Logical operations (OR-NOT)", single_opcode Vorn;
+    "Reinterpret casts", single_opcode Vreinterp ]
+
+(* Given an intrinsic shape, produce a string to document the corresponding
+   operand shapes.  *)
+let rec analyze_shape shape =
+  let rec n_things n thing =
+    match n with
+      0 -> []
+    | n -> thing :: (n_things (n - 1) thing)
+  in
+  let rec analyze_shape_elt reg_no elt =
+    match elt with
+      Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
+    | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
+    | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
+    | Immed -> "#@var{0}"
+    | VecArray (1, elt) ->
+        let elt_regexp = analyze_shape_elt 0 elt in
+          "@{" ^ elt_regexp ^ "@}"
+    | VecArray (n, elt) ->
+      let rec f m =
+        match m with
+          0 -> []
+        | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
+      in
+      let ops = List.rev (f n) in
+        "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
+    | (PtrTo elt | CstPtrTo elt) ->
+      "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
+    | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
+    | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
+    | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
+    | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
+  in
+    match shape with
+      All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
+    | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
+              ", " ^ (analyze_shape_elt 0 Dreg)
+    | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
+              (analyze_shape_elt 0 elt)
+    | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
+              ", " ^ (analyze_shape_elt 0 Dreg)
+    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
+    | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
+                ", " ^ (analyze_shape_elt 0 Qreg)
+    | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
+    | By_scalar Dreg ->
+        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
+    | By_scalar Qreg ->
+        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
+    | By_scalar _ -> assert false
+    | Wide_lane ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Wide_scalar ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Pair_result elt ->
+      let elt_regexp = analyze_shape_elt 0 elt in
+      let elt_regexp' = analyze_shape_elt 1 elt in
+        elt_regexp ^ ", " ^ elt_regexp'
+    | Unary_scalar _ -> "FIXME Unary_scalar"
+    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
+    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
+    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
+
+(* Document a single intrinsic.  *)
+let describe_intrinsic first chan
+                       (elt_ty, (_, features, shape, name, munge, _)) =
+  let c_arity, new_elt_ty = munge shape elt_ty in
+  let c_types = strings_of_arity c_arity in
+  Printf.fprintf chan "@itemize @bullet\n";
+  let item_code = if first then "@item" else "@itemx" in
+    Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
+                   (intrinsic_name name) (string_of_elt elt_ty);
+    Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
+    if not (List.exists (fun feature -> feature = No_op) features) then
+    begin
+      let print_one_insn name =
+        Printf.fprintf chan "@code{";
+        let no_suffix = (new_elt_ty = NoElts) in
+        let name_with_suffix =
+          if no_suffix then name
+          else name ^ "." ^ (string_of_elt_dots new_elt_ty)
+        in
+        let possible_operands = analyze_all_shapes features shape
+                                                   analyze_shape
+        in
+	let rec print_one_possible_operand op =
+	  Printf.fprintf chan "%s %s}" name_with_suffix op
+        in
+          (* If the intrinsic expands to multiple instructions, we assume
+             they are all of the same form.  *)
+          print_one_possible_operand (List.hd possible_operands)
+      in
+      let rec print_insns names =
+        match names with
+          [] -> ()
+        | [name] -> print_one_insn name
+        | name::names -> (print_one_insn name;
+                          Printf.fprintf chan " @emph{or} ";
+                          print_insns names)
+      in
+      let insn_names = get_insn_names features name in
+        Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
+        print_insns insn_names;
+        Printf.fprintf chan "\n"
+    end;
+    Printf.fprintf chan "@end itemize\n";
+    Printf.fprintf chan "\n\n"
+
+(* Document a group of intrinsics.  *)
+let document_group chan (group_title, group_extractor) =
+  (* Extract the rows in question from the ops table and then turn them
+     into a list of intrinsics.  *)
+  let intrinsics =
+    List.fold_left (fun got_so_far ->
+                    fun row ->
+                      match row with
+                        (_, _, _, _, _, elt_tys) ->
+                          List.fold_left (fun got_so_far' ->
+                                          fun elt_ty ->
+                                            (elt_ty, row) :: got_so_far')
+                                         got_so_far elt_tys
+                   ) [] (group_extractor ())
+  in
+    (* Emit the title for this group.  *)
+    Printf.fprintf chan "@subsubsection %s\n\n" group_title;
+    (* Emit a description of each intrinsic.  *)
+    List.iter (describe_intrinsic true chan) intrinsics;
+    (* Close this group.  *)
+    Printf.fprintf chan "\n\n"
+
+let gnu_header chan =
+  List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
+  "@c Copyright (C) 2006 Free Software Foundation, Inc.";
+  "@c This is part of the GCC manual.";
+  "@c For copying conditions, see the file gcc.texi.";
+  "";
+  "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
+  "@c Please do not edit manually."]
+
+(* Program entry point.  *)
+let _ =
+  if Array.length Sys.argv <> 2 then
+    failwith "Usage: neon-docgen <output filename>"
+  else
+  let file = Sys.argv.(1) in
+    try
+      let chan = open_out file in
+        gnu_header chan;
+        List.iter (document_group chan) intrinsic_groups;
+        close_out chan
+    with Sys_error sys ->
+      failwith ("Could not create output file " ^ file ^ ": " ^ sys)
diff --git a/gcc/config/arm/neon-gen.ml b/gcc/config/arm/neon-gen.ml
new file mode 100644
index 000000000..112c8be6e
--- /dev/null
+++ b/gcc/config/arm/neon-gen.ml
@@ -0,0 +1,416 @@
+(* Auto-generate ARM Neon intrinsics header file.
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Compile with:
+     ocamlc -c neon.ml
+     ocamlc -o neon-gen neon.cmo neon-gen.ml
+
+   Run with:
+     ./neon-gen > arm_neon.h
+*)
+
+open Neon
+
+(* The format codes used in the following functions are documented at:
+     http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
+     #6_printflikefunctionsforprettyprinting
+   (one line, remove the backslash.)
+*)
+
+(* Following functions can be used to approximate GNU indentation style.  *)
+let start_function () =
+  Format.printf "@[<v 0>";
+  ref 0
+
+let end_function nesting =
+  match !nesting with
+    0 -> Format.printf "@;@;@]"
+  | _ -> failwith ("Bad nesting (ending function at level "
+                   ^ (string_of_int !nesting) ^ ")")
+
+let open_braceblock nesting =
+  begin match !nesting with
+    0 -> Format.printf "@,@<0>{@[<v 2>@,"
+  | _ -> Format.printf "@,@[<v 2>  @<0>{@[<v 2>@,"
+  end;
+  incr nesting
+
+let close_braceblock nesting =
+  decr nesting;
+  match !nesting with
+    0 -> Format.printf "@]@,@<0>}"
+  | _ -> Format.printf "@]@,@<0>}@]"
+
+let print_function arity fnname body =
+  let ffmt = start_function () in
+  Format.printf "__extension__ static __inline ";
+  let inl = "__attribute__ ((__always_inline__))" in
+  begin match arity with
+    Arity0 ret ->
+      Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname
+  | Arity1 (ret, arg0) ->
+      Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname
+                                        (string_of_vectype arg0)
+  | Arity2 (ret, arg0, arg1) ->
+      Format.printf "%s %s@,%s (%s __a, %s __b)"
+        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
+	(string_of_vectype arg1)
+  | Arity3 (ret, arg0, arg1, arg2) ->
+      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)"
+        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
+	(string_of_vectype arg1) (string_of_vectype arg2)
+  | Arity4 (ret, arg0, arg1, arg2, arg3) ->
+      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
+        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
+	(string_of_vectype arg1) (string_of_vectype arg2)
+        (string_of_vectype arg3)
+  end;
+  open_braceblock ffmt;
+  let rec print_lines = function
+    [] -> ()
+  | [line] -> Format.printf "%s" line
+  | line::lines -> Format.printf "%s@," line; print_lines lines in
+  print_lines body;
+  close_braceblock ffmt;
+  end_function ffmt
+
+let return_by_ptr features = List.mem ReturnPtr features
+
+let union_string num elts base =
+  let itype = inttype_for_array num elts in
+  let iname = string_of_inttype itype
+  and sname = string_of_vectype (T_arrayof (num, elts)) in
+  Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base
+
+let rec signed_ctype = function
+    T_uint8x8 | T_poly8x8 -> T_int8x8
+  | T_uint8x16 | T_poly8x16 -> T_int8x16
+  | T_uint16x4 | T_poly16x4 -> T_int16x4
+  | T_uint16x8 | T_poly16x8 -> T_int16x8
+  | T_uint32x2 -> T_int32x2
+  | T_uint32x4 -> T_int32x4
+  | T_uint64x1 -> T_int64x1
+  | T_uint64x2 -> T_int64x2
+  (* Cast to types defined by mode in arm.c, not random types pulled in from
+     the <stdint.h> header in use. This fixes incompatible pointer errors when
+     compiling with C++.  *)
+  | T_uint8 | T_int8 -> T_intQI
+  | T_uint16 | T_int16 -> T_intHI
+  | T_uint32 | T_int32 -> T_intSI
+  | T_uint64 | T_int64 -> T_intDI
+  | T_float32 -> T_floatSF
+  | T_poly8 -> T_intQI
+  | T_poly16 -> T_intHI
+  | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
+  | T_ptrto elt -> T_ptrto (signed_ctype elt)
+  | T_const elt -> T_const (signed_ctype elt)
+  | x -> x
+
+let add_cast ctype cval =
+  let stype = signed_ctype ctype in
+  if ctype <> stype then
+    Printf.sprintf "(%s) %s" (string_of_vectype stype) cval
+  else
+    cval
+
+let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
+
+(* Return a tuple of a list of declarations to go at the start of the function,
+   and a list of statements needed to return THING.  *)
+let return arity return_by_ptr thing =
+  match arity with
+    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
+  | Arity4 (ret, _, _, _, _) ->
+    match ret with
+      T_arrayof (num, vec) ->
+        if return_by_ptr then
+          let sname = string_of_vectype ret in
+          [Printf.sprintf "%s __rv;" sname],
+          [thing ^ ";"; "return __rv;"]
+        else
+          let uname = union_string num vec "__rv" in
+          [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
+    | T_void -> [], [thing ^ ";"]
+    | _ ->
+        [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
+
+let rec element_type ctype =
+  match ctype with
+    T_arrayof (_, v) -> element_type v
+  | _ -> ctype
+
+let params return_by_ptr ps =
+  let pdecls = ref [] in
+  let ptype t p =
+    match t with
+      T_arrayof (num, elts) ->
+        let uname = union_string num elts (p ^ "u") in
+        let decl = Printf.sprintf "%s = { %s };" uname p in
+        pdecls := decl :: !pdecls;
+        p ^ "u.__o"
+    | _ -> add_cast t p in
+  let plist = match ps with
+    Arity0 _ -> []
+  | Arity1 (_, t1) -> [ptype t1 "__a"]
+  | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"]
+  | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
+  | Arity4 (_, t1, t2, t3, t4) ->
+      [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
+  match ps with
+    Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
+  | Arity4 (ret, _, _, _, _) ->
+      if return_by_ptr then
+        !pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist
+      else
+        !pdecls, plist
+
+let modify_params features plist =
+  let is_flipped =
+    List.exists (function Flipped _ -> true | _ -> false) features in
+  if is_flipped then
+    match plist with
+      [ a; b ] -> [ b; a ]
+    | _ ->
+      failwith ("Don't know how to flip args " ^ (String.concat ", " plist))
+  else
+    plist
+
+(* !!! Decide whether to add an extra information word based on the shape
+   form.  *)
+let extra_word shape features paramlist bits =
+  let use_word =
+    match shape with
+      All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow
+    | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm
+    | Narrow_imm -> true
+    | _ -> List.mem InfoWord features
+  in
+    if use_word then
+      paramlist @ [string_of_int bits]
+    else
+      paramlist
+
+(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
+   Bit 1 represents floats & polynomials (1), or ordinary integers (0).
+   Bit 2 represents rounding (1) vs none (0).  *)
+let infoword_value elttype features =
+  let bits01 =
+    match elt_class elttype with
+      Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001
+    | Poly -> 0b010
+    | Float -> 0b011
+    | _ -> 0b000
+  and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in
+  bits01 lor rounding_bit
+
+(* "Cast" type operations will throw an exception in mode_of_elt (actually in
+   elt_width, called from there). Deal with that here, and generate a suffix
+   with multiple modes (<to><from>).  *)
+let rec mode_suffix elttype shape =
+  try
+    let mode = mode_of_elt elttype shape in
+    string_of_mode mode
+  with MixedMode (dst, src) ->
+    let dstmode = mode_of_elt dst shape
+    and srcmode = mode_of_elt src shape in
+    string_of_mode dstmode ^ string_of_mode srcmode
+
+let print_variant opcode features shape name (ctype, asmtype, elttype) =
+  let bits = infoword_value elttype features in
+  let modesuf = mode_suffix elttype shape in
+  let return_by_ptr = return_by_ptr features in
+  let pdecls, paramlist = params return_by_ptr ctype in
+  let paramlist' = modify_params features paramlist in
+  let paramlist'' = extra_word shape features paramlist' bits in
+  let parstr = String.concat ", " paramlist'' in
+  let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
+                  (builtin_name features name) modesuf parstr in
+  let rdecls, stmts = return ctype return_by_ptr builtin in
+  let body = pdecls @ rdecls @ stmts
+  and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
+  print_function ctype fnname body
+
+(* When this function processes the element types in the ops table, it rewrites
+   them in a list of tuples (a,b,c):
+     a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
+     b : Asm type : a single, processed element type, e.g. P16. This is the
+         type which should be attached to the asm opcode.
+     c : Variant type : the unprocessed type for this variant (e.g. in add
+         instructions which don't care about the sign, b might be i16 and c
+         might be s16.)
+*)
+
+let print_op (opcode, features, shape, name, munge, types) =
+  let sorted_types = List.sort compare types in
+  let munged_types = List.map
+    (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in
+  List.iter
+    (fun variant -> print_variant opcode features shape name variant)
+    munged_types
+
+let print_ops ops =
+  List.iter print_op ops
+
+(* Output type definitions. Table entries are:
+     cbase : "C" name for the type.
+     abase : "ARM" base name for the type (i.e. int in int8x8_t).
+     esize : element size.
+     enum : element count.
+*)
+
+let deftypes () =
+  let typeinfo = [
+    (* Doubleword vector types.  *)
+    "__builtin_neon_qi", "int", 8, 8;
+    "__builtin_neon_hi", "int", 16, 4;
+    "__builtin_neon_si", "int", 32, 2;
+    "__builtin_neon_di", "int", 64, 1;
+    "__builtin_neon_sf", "float", 32, 2;
+    "__builtin_neon_poly8", "poly", 8, 8;
+    "__builtin_neon_poly16", "poly", 16, 4;
+    "__builtin_neon_uqi", "uint", 8, 8;
+    "__builtin_neon_uhi", "uint", 16, 4;
+    "__builtin_neon_usi", "uint", 32, 2;
+    "__builtin_neon_udi", "uint", 64, 1;
+
+    (* Quadword vector types.  *)
+    "__builtin_neon_qi", "int", 8, 16;
+    "__builtin_neon_hi", "int", 16, 8;
+    "__builtin_neon_si", "int", 32, 4;
+    "__builtin_neon_di", "int", 64, 2;
+    "__builtin_neon_sf", "float", 32, 4;
+    "__builtin_neon_poly8", "poly", 8, 16;
+    "__builtin_neon_poly16", "poly", 16, 8;
+    "__builtin_neon_uqi", "uint", 8, 16;
+    "__builtin_neon_uhi", "uint", 16, 8;
+    "__builtin_neon_usi", "uint", 32, 4;
+    "__builtin_neon_udi", "uint", 64, 2
+  ] in
+  List.iter
+    (fun (cbase, abase, esize, enum) ->
+      let attr =
+        match enum with
+          1 -> ""
+        | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))"
+                              (esize * enum / 8) in
+      Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr)
+    typeinfo;
+  Format.print_newline ();
+  (* Extra types not in <stdint.h>.  *)
+  Format.printf "typedef float float32_t;\n";
+  Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
+  Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"
+
+(* Output structs containing arrays, for load & store instructions etc.  *)
+
+let arrtypes () =
+  let typeinfo = [
+    "int", 8;    "int", 16;
+    "int", 32;   "int", 64;
+    "uint", 8;   "uint", 16;
+    "uint", 32;  "uint", 64;
+    "float", 32; "poly", 8;
+    "poly", 16
+  ] in
+  let writestruct elname elsize regsize arrsize =
+    let elnum = regsize / elsize in
+    let structname =
+      Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in
+    let sfmt = start_function () in
+    Format.printf "typedef struct %s" structname;
+    open_braceblock sfmt;
+    Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize;
+    close_braceblock sfmt;
+    Format.printf " %s;" structname;
+    end_function sfmt;
+  in
+    for n = 2 to 4 do
+      List.iter
+        (fun (elname, elsize) ->
+          writestruct elname elsize 64 n;
+          writestruct elname elsize 128 n)
+        typeinfo
+    done
+
+let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
+
+(* Do it.  *)
+
+let _ =
+  print_lines [
+"/* ARM NEON intrinsics include file. This file is generated automatically";
+"   using neon-gen.ml.  Please do not edit manually.";
+"";
+"   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.";
+"   Contributed by CodeSourcery.";
+"";
+"   This file is part of GCC.";
+"";
+"   GCC is free software; you can redistribute it and/or modify it";
+"   under the terms of the GNU General Public License as published";
+"   by the Free Software Foundation; either version 3, or (at your";
+"   option) any later version.";
+"";
+"   GCC is distributed in the hope that it will be useful, but WITHOUT";
+"   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
+"   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public";
+"   License for more details.";
+"";
+"   Under Section 7 of GPL version 3, you are granted additional";
+"   permissions described in the GCC Runtime Library Exception, version";
+"   3.1, as published by the Free Software Foundation.";
+"";
+"   You should have received a copy of the GNU General Public License and";
+"   a copy of the GCC Runtime Library Exception along with this program;";
+"   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see";
+"   <http://www.gnu.org/licenses/>.  */";
+"";
+"#ifndef _GCC_ARM_NEON_H";
+"#define _GCC_ARM_NEON_H 1";
+"";
+"#ifndef __ARM_NEON__";
+"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
+"#else";
+"";
+"#ifdef __cplusplus";
+"extern \"C\" {";
+"#endif";
+"";
+"#include <stdint.h>";
+""];
+  deftypes ();
+  arrtypes ();
+  Format.print_newline ();
+  print_ops ops;
+  Format.print_newline ();
+  print_ops reinterp;
+  print_lines [
+"#ifdef __cplusplus";
+"}";
+"#endif";
+"#endif";
+"#endif"]
diff --git a/gcc/config/arm/neon-schedgen.ml b/gcc/config/arm/neon-schedgen.ml
new file mode 100644
index 000000000..3d9b04422
--- /dev/null
+++ b/gcc/config/arm/neon-schedgen.ml
@@ -0,0 +1,543 @@
+(* Emission of the core of the Cortex-A8 NEON scheduling description.
+   Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+*)
+
+(* This scheduling description generator works as follows.
+   - Each group of instructions has source and destination requirements
+     specified and a list of cores supported. This is then filtered
+     and per core scheduler descriptions are generated out.
+     The reservations generated are prefixed by the name of the
+     core and the check is performed on the basis of what the tuning
+     string is. Running this will generate Neon scheduler descriptions
+     for all cores supported.
+
+     The source requirements may be specified using
+     Source (the stage at which all source operands not otherwise
+     described are read), Source_m (the stage at which Rm operands are
+     read), Source_n (likewise for Rn) and Source_d (likewise for Rd).
+   - For each group of instructions the earliest stage where a source
+     operand may be required is calculated.
+   - Each group of instructions is selected in turn as a producer.
+     The latencies between this group and every other group are then
+     calculated, yielding up to four values for each combination:
+	1. Producer -> consumer Rn latency
+	2. Producer -> consumer Rm latency
+	3. Producer -> consumer Rd (as a source) latency
+	4. Producer -> consumer worst-case latency.
+     Value 4 is calculated from the destination availability requirements
+     of the consumer and the earliest source availability requirements
+     of the producer.
+   - The largest Value 4 calculated for the current producer is the
+     worse-case latency, L, for that instruction group.  This value is written
+     out in a define_insn_reservation for the producer group.
+   - For each producer and consumer pair, the latencies calculated above
+     are collated.  The average (of up to four values) is calculated and
+     if this average is different from the worst-case latency, an
+     unguarded define_bypass construction is issued for that pair.
+     (For each pair only one define_bypass construction will be emitted,
+     and at present we do not emit specific guards.)
+*)
+
+let find_with_result fn lst =
+  let rec scan = function
+      [] -> raise Not_found
+    | l::ls -> 
+      match fn l with
+          Some result -> result
+       | _ -> scan ls in
+    scan lst
+
+let n1 = 1 and n2 = 2 and n3 = 3 and n4 = 4 and n5 = 5 and n6 = 6
+    and n7 = 7 and n8 = 8 and n9 = 9
+
+type availability = Source of int
+                  | Source_n of int
+                  | Source_m of int
+                  | Source_d of int
+                  | Dest of int
+		  | Dest_n_after of int * int
+
+type guard = Guard_none | Guard_only_m | Guard_only_n | Guard_only_d
+
+(* Reservation behaviors.  All but the last row here correspond to one
+   pipeline each.  Each constructor will correspond to one
+   define_reservation.  *)
+type reservation =
+  Mul | Mul_2cycle | Mul_4cycle
+| Shift | Shift_2cycle
+| ALU | ALU_2cycle
+| Fmul | Fmul_2cycle
+| Fadd | Fadd_2cycle
+(* | VFP *)
+| Permute of int
+| Ls of int
+| Fmul_then_fadd | Fmul_then_fadd_2
+
+type core = CortexA8 | CortexA9
+let allCores = [CortexA8; CortexA9]
+let coreStr = function
+    CortexA8 -> "cortex_a8"
+  | CortexA9 -> "cortex_a9"
+
+let tuneStr = function
+    CortexA8 -> "cortexa8"
+   | CortexA9 -> "cortexa9"
+
+
+(* This table must be kept as short as possible by conflating
+   entries with the same availability behavior.
+
+   First components: instruction group names
+   Second components: availability requirements, in the order in which
+   they should appear in the comments in the .md file.
+   Third components: reservation info
+   Fourth components: List of supported cores.
+*)
+let availability_table = [
+  (* NEON integer ALU instructions.  *)
+  (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr
+     veor vbic vorn ddd qqq *)
+  "neon_int_1", [Source n2; Dest n3], ALU, allCores;
+  (* vadd vsub qqd vsub ddd qqq *)
+  "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU, allCores;
+  (* vsum vneg dd qq vadd vsub qdd *)
+  "neon_int_3", [Source n1; Dest n3], ALU, allCores;
+  (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *)
+  (* vhadd vrhadd vqadd vtst ddd qqq *)
+  "neon_int_4", [Source n2; Dest n4], ALU, allCores;
+  (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *)
+  "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU, allCores;
+  (* vqneg vqabs dd qq *)
+  "neon_vqneg_vqabs", [Source n1; Dest n4], ALU, allCores;
+  (* vmov vmvn *)
+  "neon_vmov", [Dest n3], ALU, allCores;
+  (* vaba *)
+  "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU, allCores;
+  "neon_vaba_qqq",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], 
+   ALU_2cycle, allCores;
+  (* vsma *)
+  "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU, allCores;
+
+  (* NEON integer multiply instructions.  *)
+  (* vmul, vqdmlh, vqrdmlh *)
+  (* vmul, vqdmul, qdd 16/8 long 32/16 long *)
+  "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6], 
+   Mul, allCores;
+  "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)], 
+   Mul_2cycle, allCores;
+  (* vmul, vqdmul again *)
+  "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar",
+    [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle, allCores;
+  (* vmla, vmls *)
+  "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long",
+    [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul, allCores;
+  "neon_mla_qqq_8_16",
+    [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)], 
+   Mul_2cycle, allCores;
+  "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)], 
+   Mul_2cycle, allCores;
+  "neon_mla_qqq_32_qqd_32_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)], 
+   Mul_4cycle, allCores;
+  (* vmul, vqdmulh, vqrdmulh *)
+  (* vmul, vqdmul *)
+  "neon_mul_ddd_16_scalar_32_16_long_scalar",
+    [Source_n n2; Source_m n1; Dest n6], Mul, allCores;
+  "neon_mul_qqd_32_scalar",
+    [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle, allCores;
+  (* vmla, vmls *)
+  (* vmla, vmla, vqdmla, vqdmls *)
+  "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul, allCores;
+
+  (* NEON integer shift instructions.  *)
+  (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *)
+  "neon_shift_1", [Source n1; Dest n3], Shift, allCores;
+  (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow, allCores;
+     vqshl_vrshl_vqrshl_ddd *)
+  "neon_shift_2", [Source n1; Dest n4], Shift, allCores;
+  (* vsli, vsri and vshl for qqq *)
+  "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle, allCores;
+  "neon_vshl_ddd", [Source n1; Dest n1], Shift, allCores;
+  "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)],
+    Shift_2cycle, allCores;
+  "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift, allCores;
+
+  (* NEON floating-point instructions.  *)
+  (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *)
+  (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *)
+  "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd, allCores;
+  "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)],
+    Fadd_2cycle, allCores;
+  (* vsum, fvmx, vfmn *)
+  "neon_fp_vsum", [Source n1; Dest n5], Fadd, allCores;
+  "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul, allCores;
+  "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)],
+    Fmul_2cycle, allCores;
+  (* vmla, vmls *)
+  "neon_fp_vmla_ddd",
+    [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd, allCores;
+  "neon_fp_vmla_qqq",
+    [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)],
+    Fmul_then_fadd_2, allCores;
+  "neon_fp_vmla_ddd_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd, allCores;
+  "neon_fp_vmla_qqq_scalar",
+    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)],
+    Fmul_then_fadd_2, allCores;
+  "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd, allCores;
+  "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)],
+    Fmul_then_fadd_2, allCores;
+
+  (* NEON byte permute instructions.  *)
+  (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *)
+  "neon_bp_simple", [Source n1; Dest n2], Permute 1, allCores;
+  (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}, allCores;
+     similarly for vtbx *)
+  "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2, allCores;
+  (* all the rest *)
+  "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3, allCores;
+
+  (* NEON load/store instructions.  *)
+  "neon_ldr", [Dest n1], Ls 1, allCores;
+  "neon_str", [Source n1], Ls 1, allCores;
+  "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2, allCores;
+  "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3, allCores;
+  "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2, allCores;
+  "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3, allCores;
+  "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4, allCores;
+  "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2, allCores;
+  "neon_vst1_3_4_regs", [Source n1], Ls 3, allCores;
+  "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4, allCores;
+  "neon_vst3_vst4", [Source n1], Ls 4, allCores;
+  "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3, allCores;
+  "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5, allCores;
+  "neon_vst1_vst2_lane", [Source n1], Ls 2, allCores;
+  "neon_vst3_vst4_lane", [Source n1], Ls 3, allCores;
+  "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3, allCores;
+
+  (* NEON register transfer instructions.  *)
+  "neon_mcr", [Dest n2], Permute 1, allCores;
+  "neon_mcr_2_mcrr", [Dest n2], Permute 2, allCores;
+  (* MRC instructions are in the .tpl file.  *)
+]
+
+(* Augment the tuples in the availability table with an extra component
+   that describes the earliest stage where a source operand may be
+   required.  (It is also possible that an entry in the table has no
+   source requirements.)  *)
+let calculate_sources =
+  List.map (fun (name, avail, res, cores) ->
+              let earliest_stage =
+                List.fold_left
+                  (fun cur -> fun info ->
+                     match info with
+                       Source stage
+                     | Source_n stage
+                     | Source_m stage
+                     | Source_d stage ->
+                         (match cur with
+                           None -> Some stage
+                         | Some stage' when stage < stage' -> Some stage
+                         | _ -> cur)
+                     | _ -> cur) None avail
+              in
+                (name, avail, res, earliest_stage))
+
+(* Find the stage, if any, at the end of which a group produces a result.  *)
+let find_dest (attr, avail, _, _) =
+  try
+    find_with_result
+      (fun av -> match av with
+                   Dest st -> Some (Some st)
+                 | Dest_n_after (after, st) -> Some (Some (after + st))
+                 | _ -> None) avail
+  with Not_found -> None
+
+(* Find the worst-case latency between a producer and a consumer.  *)
+let worst_case_latency producer (_, _, _, earliest_required) =
+  let dest = find_dest producer in
+    match earliest_required, dest with
+      None, _ ->
+        (* The consumer doesn't have any source requirements.  *)
+        None
+    | _, None ->
+        (* The producer doesn't produce any results (e.g. a store insn).  *)
+        None
+    | Some consumed, Some produced -> Some (produced - consumed + 1)
+
+(* Helper function for below.  *)
+let latency_calc f producer (_, avail, _, _) =
+  try
+    let source_avail = find_with_result f avail in
+      match find_dest producer with
+        None ->
+          (* The producer does not produce a result.  *)
+          Some 0
+      | Some produced ->
+          let latency = produced - source_avail + 1 in
+            (* Latencies below zero are raised to zero since we don't have
+               delay slots.  *)
+            if latency < 0 then Some 0 else Some latency
+  with Not_found -> None
+
+(* Find any Rm latency between a producer and a consumer.  If no
+   Rm source requirement is explicitly specified for the consumer,
+   return "positive infinity".  Also return "positive infinity" if
+   the latency matches the supplied worst-case latency for this
+   producer.  *)
+let get_m_latency producer consumer =
+  match latency_calc (fun av -> match av with Source_m stage -> Some stage
+                                            | _ -> None) producer consumer
+  with None -> [] | Some latency -> [(Guard_only_m, latency)]
+
+(* Likewise for Rn.  *)
+let get_n_latency producer consumer =
+  match latency_calc (fun av -> match av with Source_n stage -> Some stage
+                                            | _ -> None) producer consumer
+  with None -> [] | Some latency -> [(Guard_only_n, latency)]
+
+(* Likewise for Rd.  *)
+let get_d_latency producer consumer =
+  match
+    latency_calc (fun av -> match av with Source_d stage -> Some stage
+                                        | _ -> None) producer consumer
+  with None -> [] | Some latency -> [(Guard_only_d, latency)]
+
+(* Given a producer and a consumer, work out the latency of the producer
+   to the consumer in each of the four cases (availability information
+   permitting) identified at the top of this file.  Return the
+   consumer, the worst-case unguarded latency and any guarded latencies.  *)
+let calculate_latencies producer consumer =
+  let worst = worst_case_latency producer consumer in
+  let m_latency = get_m_latency producer consumer in
+  let n_latency = get_n_latency producer consumer in
+  let d_latency = get_d_latency producer consumer in
+    (consumer, worst, m_latency @ n_latency @ d_latency)
+
+(* Helper function for below.  *)
+let pick_latency largest worst guards =
+  let guards =
+    match worst with
+      None -> guards
+    | Some worst -> (Guard_none, worst) :: guards
+  in
+  if List.length guards = 0 then None else
+    let total_latency =
+      List.fold_left (fun acc -> fun (_, latency) -> acc + latency) 0 guards
+    in
+    let average_latency = (float_of_int total_latency) /.
+                          (float_of_int (List.length guards)) in
+    let rounded_latency = int_of_float (ceil average_latency) in
+      if rounded_latency = largest then None
+      else Some (Guard_none, rounded_latency)
+
+(* Collate all bypasses for a particular producer as required in
+   worst_case_latencies_and_bypasses.  (By this stage there is a maximum
+   of one bypass from this producer to any particular consumer listed
+   in LATENCIES.)  Use a hash table to collate bypasses with the
+   same latency and guard.  *)
+let collate_bypasses (producer_name, _, _, _) largest latencies core =
+  let ht = Hashtbl.create 42 in
+  let keys = ref [] in
+    List.iter (
+      fun ((consumer, _, _, _), worst, guards) ->
+        (* Find out which latency to use.  Ignoring latencies that match
+           the *overall* worst-case latency for this producer (which will
+           be in define_insn_reservation), we have to examine:
+	   1. the latency with no guard between this producer and this
+              consumer; and
+	   2. any guarded latency.  *)
+        let guard_latency_opt = pick_latency largest worst guards in
+          match guard_latency_opt with
+            None -> ()
+          | Some (guard, latency) ->
+            begin
+              (if (try ignore (Hashtbl.find ht (guard, latency)); false
+                   with Not_found -> true) then
+                 keys := (guard, latency) :: !keys);
+              Hashtbl.add ht (guard, latency) ((coreStr core) ^ "_" ^ consumer)
+            end
+    ) latencies;
+    (* The hash table now has bypasses collated so that ones with the
+       same latency and guard have the same keys.  Walk through all the
+       keys, extract the associated bypasses, and concatenate the names
+       of the consumers for each bypass.  *)
+    List.map (
+      fun ((guard, latency) as key) ->
+        let consumers = Hashtbl.find_all ht key in
+          (producer_name,
+           String.concat ",\\\n               " consumers,
+           latency,
+           guard)
+      ) !keys
+
+(* For every producer, find the worst-case latency between it and
+   *any* consumer.  Also determine (if such a thing exists) the
+   lowest-latency bypass from each producer to each consumer.  Group
+   the output in such a way that all bypasses with the same producer
+   and latency are together, and so that bypasses with the worst-case
+   latency are ignored.  *)
+let worst_case_latencies_and_bypasses core =
+  let rec f (worst_acc, bypasses_acc) prev xs =
+    match xs with
+      [] -> (worst_acc, bypasses_acc)
+    | ((producer_name, producer_avail, res_string, _) as producer)::next ->
+      (* For this particular producer, work out the latencies between
+         it and every consumer.  *)
+      let latencies =
+        List.fold_left (fun acc -> fun consumer ->
+                          (calculate_latencies producer consumer) :: acc)
+                       [] (prev @ xs)
+      in
+        (* Now work out what the overall worst case latency was for this
+           particular producer.  *)
+        match latencies with
+          [] -> assert false
+        | _ ->
+          let comp_fn (_, l1, _) (_, l2, _) =
+            if l1 > l2 then -1 else if l1 = l2 then 0 else 1
+          in
+          let largest =
+            match List.hd (List.sort comp_fn latencies) with
+              (_, None, _) -> 0 (* Producer has no consumers. *)
+            | (_, Some worst, _) -> worst
+          in
+          (* Having got the largest latency, collect all bypasses for
+             this producer and filter out those with that larger
+             latency.  Record the others for later emission.  *)
+          let bypasses = collate_bypasses producer largest latencies core in
+            (* Go on to process remaining producers, having noted
+               the result for this one.  *)
+            f ((producer_name, producer_avail, largest,
+                res_string) :: worst_acc,
+               bypasses @ bypasses_acc)
+              (prev @ [producer]) next
+  in
+    f ([], []) []
+
+(* Emit a helpful comment for a define_insn_reservation.  *)
+let write_comment producer avail =
+  let seen_source = ref false in
+  let describe info =
+    let read = if !seen_source then "" else "read " in
+    match info with
+      Source stage ->
+        seen_source := true;
+	Printf.printf "%stheir source operands at N%d" read stage
+    | Source_n stage ->
+        seen_source := true;
+	Printf.printf "%stheir (D|Q)n operands at N%d" read stage
+    | Source_m stage ->
+        seen_source := true;
+	Printf.printf "%stheir (D|Q)m operands at N%d" read stage
+    | Source_d stage ->
+	Printf.printf "%stheir (D|Q)d operands at N%d" read stage
+    | Dest stage ->
+	Printf.printf "produce a result at N%d" stage
+    | Dest_n_after (after, stage) ->
+	Printf.printf "produce a result at N%d on cycle %d" stage (after + 1)
+  in
+    Printf.printf ";; Instructions using this reservation ";
+    let rec f infos x =
+      let sep = if x mod 2 = 1 then "" else "\n;;" in
+      match infos with
+        [] -> assert false
+      | [info] -> describe info; Printf.printf ".\n"
+      | info::(_::[] as infos) ->
+          describe info; Printf.printf ", and%s " sep; f infos (x+1)
+      | info::infos -> describe info; Printf.printf ",%s " sep; f infos (x+1)
+    in
+      f avail 0
+
+
+(* Emit a define_insn_reservation for each producer.  The latency
+   written in will be its worst-case latency.  *)
+let emit_insn_reservations core =
+  let corestring = coreStr core in
+  let tunestring = tuneStr core
+  in  List.iter (
+     fun (producer, avail, latency, reservation) ->
+        write_comment producer avail;
+        Printf.printf "(define_insn_reservation \"%s_%s\" %d\n" 
+            corestring producer latency;
+            Printf.printf "  (and (eq_attr \"tune\" \"%s\")\n" tunestring;
+        Printf.printf "       (eq_attr \"neon_type\" \"%s\"))\n" producer;
+        let str =
+          match reservation with
+	    Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4"
+	  | Shift -> "dp" | Shift_2cycle -> "dp_2"
+	  | ALU -> "dp" | ALU_2cycle -> "dp_2"
+	  | Fmul -> "dp" | Fmul_2cycle -> "dp_2"
+	  | Fadd -> "fadd" | Fadd_2cycle -> "fadd_2"
+	  | Ls 1 -> "ls"
+          | Ls n -> "ls_" ^ (string_of_int n)
+	  | Permute 1 -> "perm"
+          | Permute n -> "perm_" ^ (string_of_int n)
+	  | Fmul_then_fadd -> "fmul_then_fadd"
+	  | Fmul_then_fadd_2 -> "fmul_then_fadd_2"
+        in
+          Printf.printf "  \"%s_neon_%s\")\n\n" corestring str
+    )
+
+(* Given a guard description, return the name of the C function to
+   be used as the guard for define_bypass.  *)
+let guard_fn g =
+  match g with
+    Guard_only_m -> "arm_neon_only_m_dependency"
+  | Guard_only_n -> "arm_neon_only_n_dependency"
+  | Guard_only_d -> "arm_neon_only_d_dependency"
+  | Guard_none -> assert false
+
+(* Emit a define_bypass for each bypass.  *)
+let emit_bypasses core =
+  List.iter (
+      fun (producer, consumers, latency, guard) ->
+        Printf.printf "(define_bypass %d \"%s_%s\"\n" 
+	latency (coreStr core) producer;
+
+        if guard = Guard_none then
+          Printf.printf "               \"%s\")\n\n" consumers
+        else
+          begin
+            Printf.printf "               \"%s\"\n" consumers;
+            Printf.printf "               \"%s\")\n\n" (guard_fn guard)
+          end
+    )
+
+
+let calculate_per_core_availability_table core availability_table =
+  let table = calculate_sources availability_table in
+  let worst_cases, bypasses = worst_case_latencies_and_bypasses core table in
+    emit_insn_reservations core (List.rev worst_cases);
+    Printf.printf ";; Exceptions to the default latencies.\n\n";
+    emit_bypasses core bypasses
+
+let calculate_core_availability_table core availability_table =
+let filter_core = List.filter (fun (_, _, _, cores) 
+				   -> List.exists ((=) core) cores)
+in calculate_per_core_availability_table core (filter_core availability_table)
+
+
+(* Program entry point.  *)
+let main =
+  List.map (fun core -> calculate_core_availability_table 
+		core availability_table) allCores
diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml
new file mode 100644
index 000000000..63fbbbf2c
--- /dev/null
+++ b/gcc/config/arm/neon-testgen.ml
@@ -0,0 +1,283 @@
+(* Auto-generate ARM Neon intrinsics tests.
+   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+   This is an O'Caml program.  The O'Caml compiler is available from:
+
+     http://caml.inria.fr/
+
+   Or from your favourite OS's friendly packaging system. Tested with version
+   3.09.2, though other versions will probably work too.
+
+   Compile with:
+     ocamlc -c neon.ml
+     ocamlc -o neon-testgen neon.cmo neon-testgen.ml
+
+   Run with:
+     cd /path/to/gcc/testsuite/gcc.target/arm/neon
+     /path/to/neon-testgen
+*)
+
+open Neon
+
+type c_type_flags = Pointer | Const
+
+(* Open a test source file.  *)
+let open_test_file dir name =
+  try
+    open_out (dir ^ "/" ^ name ^ ".c")
+  with Sys_error str ->
+    failwith ("Could not create test source file " ^ name ^ ": " ^ str)
+
+(* Emit prologue code to a test source file.  *)
+let emit_prologue chan test_name =
+  Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic.  */\n" test_name;
+  Printf.fprintf chan "/* This file was autogenerated by neon-testgen.  */\n\n";
+  Printf.fprintf chan "/* { dg-do assemble } */\n";
+  Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n";
+  Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
+  Printf.fprintf chan "/* { dg-add-options arm_neon } */\n";
+  Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
+  Printf.fprintf chan "void test_%s (void)\n{\n" test_name
+
+(* Emit declarations of local variables that are going to be passed
+   to an intrinsic, together with one to take a returned value if needed.  *)
+let emit_automatics chan c_types features =
+  let emit () =
+    ignore (
+      List.fold_left (fun arg_number -> fun (flags, ty) ->
+                        let pointer_bit =
+                          if List.mem Pointer flags then "*" else ""
+                        in
+                          (* Const arguments to builtins are directly
+                             written in as constants.  *)
+                          if not (List.mem Const flags) then
+                            Printf.fprintf chan "  %s %sarg%d_%s;\n"
+                                           ty pointer_bit arg_number ty;
+                        arg_number + 1)
+                     0 (List.tl c_types))
+  in
+    match c_types with
+      (_, return_ty) :: tys ->
+        if return_ty <> "void" then begin
+          (* The intrinsic returns a value.  We need to do explict register
+             allocation for vget_low tests or they fail because of copy
+             elimination.  *)
+          ((if List.mem Fixed_return_reg features then
+              Printf.fprintf chan "  register %s out_%s asm (\"d18\");\n"
+                             return_ty return_ty
+            else
+              Printf.fprintf chan "  %s out_%s;\n" return_ty return_ty);
+	   emit ())
+        end else
+          (* The intrinsic does not return a value.  *)
+          emit ()
+    | _ -> assert false
+
+(* Emit code to call an intrinsic.  *)
+let emit_call chan const_valuator c_types name elt_ty =
+  (if snd (List.hd c_types) <> "void" then
+     Printf.fprintf chan "  out_%s = " (snd (List.hd c_types))
+   else
+     Printf.fprintf chan "  ");
+  Printf.fprintf chan "%s_%s (" (intrinsic_name name) (string_of_elt elt_ty);
+  let print_arg chan arg_number (flags, ty) =
+    (* If the argument is of const type, then directly write in the
+       constant now.  *)
+    if List.mem Const flags then
+      match const_valuator with
+        None ->
+          if List.mem Pointer flags then
+            Printf.fprintf chan "0"
+          else
+            Printf.fprintf chan "1"
+      | Some f -> Printf.fprintf chan "%s" (string_of_int (f arg_number))
+    else
+      Printf.fprintf chan "arg%d_%s" arg_number ty
+  in
+  let rec print_args arg_number tys =
+    match tys with
+      [] -> ()
+    | [ty] -> print_arg chan arg_number ty
+    | ty::tys ->
+      print_arg chan arg_number ty;
+      Printf.fprintf chan ", ";
+      print_args (arg_number + 1) tys
+  in
+    print_args 0 (List.tl c_types);
+    Printf.fprintf chan ");\n"
+
+(* Emit epilogue code to a test source file.  *)
+let emit_epilogue chan features regexps =
+  let no_op = List.exists (fun feature -> feature = No_op) features in
+    Printf.fprintf chan "}\n\n";
+    (if not no_op then
+       List.iter (fun regexp ->
+                   Printf.fprintf chan
+                     "/* { dg-final { scan-assembler \"%s\" } } */\n" regexp)
+                regexps
+     else
+       ()
+    );
+    Printf.fprintf chan "/* { dg-final { cleanup-saved-temps } } */\n"
+
+(* Check a list of C types to determine which ones are pointers and which
+   ones are const.  *)
+let check_types tys =
+  let tys' =
+    List.map (fun ty ->
+                let len = String.length ty in
+                  if len > 2 && String.get ty (len - 2) = ' '
+                             && String.get ty (len - 1) = '*'
+                  then ([Pointer], String.sub ty 0 (len - 2))
+                  else ([], ty)) tys
+  in
+    List.map (fun (flags, ty) ->
+                if String.length ty > 6 && String.sub ty 0 6 = "const "
+                then (Const :: flags, String.sub ty 6 ((String.length ty) - 6))
+                else (flags, ty)) tys'
+
+(* Given an intrinsic shape, produce a regexp that will match
+   the right-hand sides of instructions generated by an intrinsic of
+   that shape.  *)
+let rec analyze_shape shape =
+  let rec n_things n thing =
+    match n with
+      0 -> []
+    | n -> thing :: (n_things (n - 1) thing)
+  in
+  let rec analyze_shape_elt elt =
+    match elt with
+      Dreg -> "\\[dD\\]\\[0-9\\]+"
+    | Qreg -> "\\[qQ\\]\\[0-9\\]+"
+    | Corereg -> "\\[rR\\]\\[0-9\\]+"
+    | Immed -> "#\\[0-9\\]+"
+    | VecArray (1, elt) ->
+        let elt_regexp = analyze_shape_elt elt in
+          "((\\\\\\{" ^ elt_regexp ^ "\\\\\\})|(" ^ elt_regexp ^ "))"
+    | VecArray (n, elt) ->
+      let elt_regexp = analyze_shape_elt elt in
+      let alt1 = elt_regexp ^ "-" ^ elt_regexp in
+      let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in
+        "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}"
+    | (PtrTo elt | CstPtrTo elt) ->
+      "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\\\\\]"
+    | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
+    | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
+    | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
+    | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")"
+  in
+    match shape with
+      All (n, elt) -> commas analyze_shape_elt (n_things n elt) ""
+    | Long -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Dreg) ^
+              ", " ^ (analyze_shape_elt Dreg)
+    | Long_noreg elt -> (analyze_shape_elt elt) ^ ", " ^ (analyze_shape_elt elt)
+    | Wide -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Qreg) ^
+              ", " ^ (analyze_shape_elt Dreg)
+    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
+    | Narrow -> (analyze_shape_elt Dreg) ^ ", " ^ (analyze_shape_elt Qreg) ^
+                ", " ^ (analyze_shape_elt Qreg)
+    | Use_operands elts -> commas analyze_shape_elt (Array.to_list elts) ""
+    | By_scalar Dreg ->
+        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
+    | By_scalar Qreg ->
+        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
+    | By_scalar _ -> assert false
+    | Wide_lane ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Wide_scalar ->
+        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
+    | Pair_result elt ->
+      let elt_regexp = analyze_shape_elt elt in
+        elt_regexp ^ ", " ^ elt_regexp
+    | Unary_scalar _ -> "FIXME Unary_scalar"
+    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
+    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
+    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
+
+(* Generate tests for one intrinsic.  *)
+let test_intrinsic dir opcode features shape name munge elt_ty =
+  (* Open the test source file.  *)
+  let test_name = name ^ (string_of_elt elt_ty) in
+  let chan = open_test_file dir test_name in
+  (* Work out what argument and return types the intrinsic has.  *)
+  let c_arity, new_elt_ty = munge shape elt_ty in
+  let c_types = check_types (strings_of_arity c_arity) in
+  (* Extract any constant valuator (a function specifying what constant
+     values are to be written into the intrinsic call) from the features
+     list.  *)
+  let const_valuator =
+    try
+      match (List.find (fun feature -> match feature with
+                                         Const_valuator _ -> true
+				       | _ -> false) features) with
+        Const_valuator f -> Some f
+      | _ -> assert false
+    with Not_found -> None
+  in
+  (* Work out what instruction name(s) to expect.  *)
+  let insns = get_insn_names features name in
+  let no_suffix = (new_elt_ty = NoElts) in
+  let insns =
+    if no_suffix then insns
+                 else List.map (fun insn ->
+                                  let suffix = string_of_elt_dots new_elt_ty in
+                                    insn ^ "\\." ^ suffix) insns
+  in
+  (* Construct a regexp to match against the expected instruction name(s).  *)
+  let insn_regexp =
+    match insns with
+      [] -> assert false
+    | [insn] -> insn
+    | _ ->
+      let rec calc_regexp insns cur_regexp =
+        match insns with
+          [] -> cur_regexp
+        | [insn] -> cur_regexp ^ "(" ^ insn ^ "))"
+        | insn::insns -> calc_regexp insns (cur_regexp ^ "(" ^ insn ^ ")|")
+      in calc_regexp insns "("
+  in
+  (* Construct regexps to match against the instructions that this
+     intrinsic expands to.  Watch out for any writeback character and
+     comments after the instruction.  *)
+  let regexps = List.map (fun regexp -> insn_regexp ^ "\\[ \t\\]+" ^ regexp ^
+			  "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n")
+                         (analyze_all_shapes features shape analyze_shape)
+  in
+    (* Emit file and function prologues.  *)
+    emit_prologue chan test_name;
+    (* Emit local variable declarations.  *)
+    emit_automatics chan c_types features;
+    Printf.fprintf chan "\n";
+    (* Emit the call to the intrinsic.  *)
+    emit_call chan const_valuator c_types name elt_ty;
+    (* Emit the function epilogue and the DejaGNU scan-assembler directives.  *)
+    emit_epilogue chan features regexps;
+    (* Close the test file.  *)
+    close_out chan
+
+(* Generate tests for one element of the "ops" table.  *)
+let test_intrinsic_group dir (opcode, features, shape, name, munge, types) =
+  List.iter (test_intrinsic dir opcode features shape name munge) types
+
+(* Program entry point.  *)
+let _ =
+  let directory = if Array.length Sys.argv <> 1 then Sys.argv.(1) else "." in
+    List.iter (test_intrinsic_group directory) (reinterp @ ops)
+
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
new file mode 100644
index 000000000..247dc1ff4
--- /dev/null
+++ b/gcc/config/arm/neon.md
@@ -0,0 +1,5476 @@
+;; ARM NEON coprocessor Machine Description
+;; Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Constants for unspecs.
+(define_constants
+  [(UNSPEC_ASHIFT_SIGNED	65)
+   (UNSPEC_ASHIFT_UNSIGNED	66)
+   (UNSPEC_VABD			69)
+   (UNSPEC_VABDL		70)
+   (UNSPEC_VADD			72)
+   (UNSPEC_VADDHN		73)
+   (UNSPEC_VADDL		74)
+   (UNSPEC_VADDW		75)
+   (UNSPEC_VBSL			78)
+   (UNSPEC_VCAGE		79)
+   (UNSPEC_VCAGT		80)
+   (UNSPEC_VCEQ			81)
+   (UNSPEC_VCGE			82)
+   (UNSPEC_VCGT			83)
+   (UNSPEC_VCLS			84)
+   (UNSPEC_VCVT			88)
+   (UNSPEC_VCVT_N		89)
+   (UNSPEC_VEXT			93)
+   (UNSPEC_VHADD		97)
+   (UNSPEC_VHSUB		98)
+   (UNSPEC_VLD1			99)
+   (UNSPEC_VLD1_DUP		100)
+   (UNSPEC_VLD1_LANE		101)
+   (UNSPEC_VLD2			102)
+   (UNSPEC_VLD2_DUP		103)
+   (UNSPEC_VLD2_LANE		104)
+   (UNSPEC_VLD3			105)
+   (UNSPEC_VLD3A		106)
+   (UNSPEC_VLD3B		107)
+   (UNSPEC_VLD3_DUP		108)
+   (UNSPEC_VLD3_LANE		109)
+   (UNSPEC_VLD4			110)
+   (UNSPEC_VLD4A		111)
+   (UNSPEC_VLD4B		112)
+   (UNSPEC_VLD4_DUP		113)
+   (UNSPEC_VLD4_LANE		114)
+   (UNSPEC_VMAX			115)
+   (UNSPEC_VMIN			116)
+   (UNSPEC_VMLA			117)
+   (UNSPEC_VMLAL		118)
+   (UNSPEC_VMLA_LANE		119)
+   (UNSPEC_VMLAL_LANE		120)
+   (UNSPEC_VMLS			121)
+   (UNSPEC_VMLSL		122)
+   (UNSPEC_VMLS_LANE		123)
+   (UNSPEC_VMLSL_LANE		124)
+   (UNSPEC_VMOVL		125)
+   (UNSPEC_VMOVN		126)
+   (UNSPEC_VMUL			127)
+   (UNSPEC_VMULL		128)
+   (UNSPEC_VMUL_LANE		129)
+   (UNSPEC_VMULL_LANE		130)
+   (UNSPEC_VPADAL		135)
+   (UNSPEC_VPADD		136)
+   (UNSPEC_VPADDL		137)
+   (UNSPEC_VPMAX		138)
+   (UNSPEC_VPMIN		139)
+   (UNSPEC_VPSMAX		140)
+   (UNSPEC_VPSMIN		141)
+   (UNSPEC_VPUMAX		142)
+   (UNSPEC_VPUMIN		143)
+   (UNSPEC_VQABS		144)
+   (UNSPEC_VQADD		145)
+   (UNSPEC_VQDMLAL		146)
+   (UNSPEC_VQDMLAL_LANE		147)
+   (UNSPEC_VQDMLSL		148)
+   (UNSPEC_VQDMLSL_LANE		149)
+   (UNSPEC_VQDMULH		150)
+   (UNSPEC_VQDMULH_LANE		151)
+   (UNSPEC_VQDMULL		152)
+   (UNSPEC_VQDMULL_LANE		153)
+   (UNSPEC_VQMOVN		154)
+   (UNSPEC_VQMOVUN		155)
+   (UNSPEC_VQNEG		156)
+   (UNSPEC_VQSHL		157)
+   (UNSPEC_VQSHL_N		158)
+   (UNSPEC_VQSHLU_N		159)
+   (UNSPEC_VQSHRN_N		160)
+   (UNSPEC_VQSHRUN_N		161)
+   (UNSPEC_VQSUB		162)
+   (UNSPEC_VRECPE		163)
+   (UNSPEC_VRECPS		164)
+   (UNSPEC_VREV16		165)
+   (UNSPEC_VREV32		166)
+   (UNSPEC_VREV64		167)
+   (UNSPEC_VRSQRTE		168)
+   (UNSPEC_VRSQRTS		169)
+   (UNSPEC_VSHL			171)
+   (UNSPEC_VSHLL_N		172)
+   (UNSPEC_VSHL_N		173)
+   (UNSPEC_VSHR_N		174)
+   (UNSPEC_VSHRN_N		175)
+   (UNSPEC_VSLI			176)
+   (UNSPEC_VSRA_N		177)
+   (UNSPEC_VSRI			178)
+   (UNSPEC_VST1			179)
+   (UNSPEC_VST1_LANE		180)
+   (UNSPEC_VST2			181)
+   (UNSPEC_VST2_LANE		182)
+   (UNSPEC_VST3			183)
+   (UNSPEC_VST3A		184)
+   (UNSPEC_VST3B		185)
+   (UNSPEC_VST3_LANE		186)
+   (UNSPEC_VST4			187)
+   (UNSPEC_VST4A		188)
+   (UNSPEC_VST4B		189)
+   (UNSPEC_VST4_LANE		190)
+   (UNSPEC_VSTRUCTDUMMY		191)
+   (UNSPEC_VSUB			192)
+   (UNSPEC_VSUBHN		193)
+   (UNSPEC_VSUBL		194)
+   (UNSPEC_VSUBW		195)
+   (UNSPEC_VTBL			196)
+   (UNSPEC_VTBX			197)
+   (UNSPEC_VTRN1		198)
+   (UNSPEC_VTRN2		199)
+   (UNSPEC_VTST			200)
+   (UNSPEC_VUZP1		201)
+   (UNSPEC_VUZP2		202)
+   (UNSPEC_VZIP1		203)
+   (UNSPEC_VZIP2		204)
+   (UNSPEC_MISALIGNED_ACCESS	205)
+   (UNSPEC_VCLE			206)
+   (UNSPEC_VCLT			207)])
+
+
+;; Attribute used to permit string comparisons against <VQH_mnem> in
+;; neon_type attribute definitions.
+(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VD 0 "nonimmediate_operand"
+	  "=w,Uv,w, w,  ?r,?w,?r,?r, ?Us")
+	(match_operand:VD 1 "general_operand"
+	  " w,w, Dn,Uvi, w, r, r, Usi,r"))]
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  if (which_alternative == 2)
+    {
+      int width, is_valid;
+      static char templ[40];
+
+      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+        &operands[1], &width);
+
+      gcc_assert (is_valid != 0);
+
+      if (width == 0)
+        return "vmov.f32\t%P0, %1  @ <mode>";
+      else
+        sprintf (templ, "vmov.i%d\t%%P0, %%1  @ <mode>", width);
+
+      return templ;
+    }
+
+  /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp
+     below must be changed to output_move_neon (which will use the
+     element/structure loads/stores), and the constraint changed to 'Um' instead
+     of 'Uv'.  */
+
+  switch (which_alternative)
+    {
+    case 0: return "vmov\t%P0, %P1  @ <mode>";
+    case 1: case 3: return output_move_vfp (operands);
+    case 2: gcc_unreachable ();
+    case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
+    case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
+    default: return output_move_double (operands);
+    }
+}
+ [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*")
+  (set_attr "type" "*,f_stored,*,f_loadd,*,*,alu,load2,store2")
+  (set_attr "insn" "*,*,*,*,*,*,mov,*,*")
+  (set_attr "length" "4,4,4,4,4,4,8,8,8")
+  (set_attr "pool_range"     "*,*,*,1020,*,*,*,1020,*")
+  (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
+  	  "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
+	(match_operand:VQXMOV 1 "general_operand"
+	  " w,w, Dn,Uni, w, r, r, Usi, r"))]
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  if (which_alternative == 2)
+    {
+      int width, is_valid;
+      static char templ[40];
+
+      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+        &operands[1], &width);
+
+      gcc_assert (is_valid != 0);
+
+      if (width == 0)
+        return "vmov.f32\t%q0, %1  @ <mode>";
+      else
+        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
+
+      return templ;
+    }
+
+  switch (which_alternative)
+    {
+    case 0: return "vmov\t%q0, %q1  @ <mode>";
+    case 1: case 3: return output_move_neon (operands);
+    case 2: gcc_unreachable ();
+    case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
+    case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
+    default: return output_move_quad (operands);
+    }
+}
+  [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\
+                          neon_mrrc,neon_mcr_2_mcrr,*,*,*")
+   (set_attr "type" "*,*,*,*,*,*,alu,load4,store4")
+   (set_attr "insn" "*,*,*,*,*,*,mov,*,*")
+   (set_attr "length" "4,8,4,8,8,8,16,8,16")
+   (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")])
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  "TARGET_NEON"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (TImode, operands[1]);
+    }
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
+	(match_operand:VSTRUCT 1 "general_operand" ""))]
+  "TARGET_NEON"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
+})
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"	"=w,Ut,w")
+	(match_operand:VSTRUCT 1 "general_operand"	" w,w, Ut"))]
+  "TARGET_NEON
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "#";
+    case 1: case 2: return output_move_neon (operands);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2")
+   (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
+
+(define_split
+  [(set (match_operand:EI 0 "s_register_operand" "")
+	(match_operand:EI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[2], src[2];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (DImode, rdest + 4);
+  src[1] = gen_rtx_REG (DImode, rsrc + 4);
+
+  neon_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+  [(set (match_operand:OI 0 "s_register_operand" "")
+	(match_operand:OI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[2], src[2];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (TImode, rdest + 4);
+  src[1] = gen_rtx_REG (TImode, rsrc + 4);
+
+  neon_disambiguate_copy (operands, dest, src, 2);
+})
+
+(define_split
+  [(set (match_operand:CI 0 "s_register_operand" "")
+	(match_operand:CI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[3], src[3];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (TImode, rdest + 4);
+  src[1] = gen_rtx_REG (TImode, rsrc + 4);
+  dest[2] = gen_rtx_REG (TImode, rdest + 8);
+  src[2] = gen_rtx_REG (TImode, rsrc + 8);
+
+  neon_disambiguate_copy (operands, dest, src, 3);
+})
+
+(define_split
+  [(set (match_operand:XI 0 "s_register_operand" "")
+	(match_operand:XI 1 "s_register_operand" ""))]
+  "TARGET_NEON && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+{
+  int rdest = REGNO (operands[0]);
+  int rsrc = REGNO (operands[1]);
+  rtx dest[4], src[4];
+
+  dest[0] = gen_rtx_REG (TImode, rdest);
+  src[0] = gen_rtx_REG (TImode, rsrc);
+  dest[1] = gen_rtx_REG (TImode, rdest + 4);
+  src[1] = gen_rtx_REG (TImode, rsrc + 4);
+  dest[2] = gen_rtx_REG (TImode, rdest + 8);
+  src[2] = gen_rtx_REG (TImode, rsrc + 8);
+  dest[3] = gen_rtx_REG (TImode, rdest + 12);
+  src[3] = gen_rtx_REG (TImode, rsrc + 12);
+
+  neon_disambiguate_copy (operands, dest, src, 4);
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VDQX 0 "nonimmediate_operand"	      "")
+	(unspec:VDQX [(match_operand:VDQX 1 "general_operand" "")]
+		     UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+  /* This pattern is not permitted to fail during expansion: if both arguments
+     are non-registers (e.g. memory := constant, which can be created by the
+     auto-vectorizer), force operand 1 into a register.  */
+  if (!s_register_operand (operands[0], <MODE>mode)
+      && !s_register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "*movmisalign<mode>_neon_store"
+  [(set (match_operand:VDX 0 "memory_operand"		       "=Um")
+	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vst1.<V_sz_elem>\t{%P1}, %A0"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+
+(define_insn "*movmisalign<mode>_neon_load"
+  [(set (match_operand:VDX 0 "s_register_operand"	   "=w")
+	(unspec:VDX [(match_operand:VDX 1 "memory_operand" " Um")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vld1.<V_sz_elem>\t{%P0}, %A1"
+  [(set_attr "neon_type" "neon_vld1_1_2_regs")])
+
+(define_insn "*movmisalign<mode>_neon_store"
+  [(set (match_operand:VQX 0 "memory_operand"		       "=Um")
+	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vst1.<V_sz_elem>\t{%q1}, %A0"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+
+(define_insn "*movmisalign<mode>_neon_load"
+  [(set (match_operand:VQX 0 "s_register_operand"	   "=w")
+	(unspec:VQX [(match_operand:VQX 1 "memory_operand" " Um")]
+		    UNSPEC_MISALIGNED_ACCESS))]
+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
+  "vld1.<V_sz_elem>\t{%q0}, %A1"
+  [(set_attr "neon_type" "neon_vld1_1_2_regs")])
+
+(define_insn "vec_set<mode>_internal"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (vec_merge:VD
+          (vec_duplicate:VD
+            (match_operand:<V_elem> 1 "s_register_operand" "r"))
+          (match_operand:VD 3 "s_register_operand" "0")
+          (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_NEON"
+{
+  int elt = ffs ((int) INTVAL (operands[2])) - 1;
+  if (BYTES_BIG_ENDIAN)
+    elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+  operands[2] = GEN_INT (elt);
+  
+  return "vmov%?.<V_sz_elem>\t%P0[%c2], %1";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_mcr")])
+
+(define_insn "vec_set<mode>_internal"
+  [(set (match_operand:VQ 0 "s_register_operand" "=w")
+        (vec_merge:VQ
+          (vec_duplicate:VQ
+            (match_operand:<V_elem> 1 "s_register_operand" "r"))
+          (match_operand:VQ 3 "s_register_operand" "0")
+          (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
+  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  int elt = elem % half_elts;
+  int hi = (elem / half_elts) * 2;
+  int regno = REGNO (operands[0]);
+
+  if (BYTES_BIG_ENDIAN)
+    elt = half_elts - 1 - elt;
+
+  operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
+  operands[2] = GEN_INT (elt);
+
+  return "vmov%?.<V_sz_elem>\t%P0[%c2], %1";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_mcr")]
+)
+
+(define_insn "vec_setv2di_internal"
+  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+        (vec_merge:V2DI
+          (vec_duplicate:V2DI
+            (match_operand:DI 1 "s_register_operand" "r"))
+          (match_operand:V2DI 3 "s_register_operand" "0")
+          (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
+  int regno = REGNO (operands[0]) + 2 * elem;
+
+  operands[0] = gen_rtx_REG (DImode, regno);
+
+  return "vmov%?\t%P0, %Q1, %R1";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_mcr_2_mcrr")]
+)
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:<V_elem> 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
+  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
+					 GEN_INT (elem), operands[0]));
+  DONE;
+})
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
+        (vec_select:<V_elem>
+          (match_operand:VD 1 "s_register_operand" "w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  return "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
+	(vec_select:<V_elem>
+          (match_operand:VQ 1 "s_register_operand" "w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_NEON"
+{
+  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  int elt = INTVAL (operands[2]) % half_elts;
+  int hi = (INTVAL (operands[2]) / half_elts) * 2;
+  int regno = REGNO (operands[1]);
+
+  if (BYTES_BIG_ENDIAN)
+    elt = half_elts - 1 - elt;
+
+  operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
+  operands[2] = GEN_INT (elt);
+
+  return "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "vec_extractv2di"
+  [(set (match_operand:DI 0 "s_register_operand" "=r")
+	(vec_select:DI
+          (match_operand:V2DI 1 "s_register_operand" "w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
+
+  operands[1] = gen_rtx_REG (DImode, regno);
+
+  return "vmov%?\t%Q0, %R0, %P1  @ v2di";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_int_1")]
+)
+
+(define_expand "vec_init<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_NEON"
+{
+  neon_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;; Doubleword and quadword arithmetic.
+
+;; NOTE: some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations.
+
+(define_insn "*add<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_1")))]
+)
+
+(define_insn "adddi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0")
+                 (match_operand:DI 2 "s_register_operand" "w,r,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vadd.i64\t%P0, %P1, %P2";
+    case 1: return "#";
+    case 2: return "#";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_1,*,*")
+   (set_attr "conds" "*,clob,clob")
+   (set_attr "length" "*,8,8")]
+)
+
+(define_insn "*sub<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                   (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_2")))]
+)
+
+(define_insn "subdi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+        (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0")
+                  (match_operand:DI 2 "s_register_operand" "w,r,0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vsub.i64\t%P0, %P1, %P2";
+    case 1: /* fall through */ 
+    case 2: /* fall through */
+    case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_2,*,*,*")
+   (set_attr "conds" "*,clob,clob,clob")
+   (set_attr "length" "*,8,8,8")]
+)
+
+(define_insn "*mul<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
+)
+
+(define_insn "mul<mode>3add<mode>_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
+                            (match_operand:VDQ 3 "s_register_operand" "w"))
+		  (match_operand:VDQ 1 "s_register_operand" "0")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
+(define_insn "mul<mode>3neg<mode>add<mode>_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0")
+                   (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
+                             (match_operand:VDQ 3 "s_register_operand" "w"))))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+		 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+		     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "iordi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r")
+        (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r")
+		(match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vorr\t%P0, %P1, %P2";
+    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+		     DImode, 0, VALID_NEON_QREG_MODE (DImode));
+    case 2: return "#";
+    case 3: return "#";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*")
+   (set_attr "length" "*,*,8,8")]
+)
+
+;; The concrete forms of the Neon immediate-logic instructions are vbic and
+;; vorr. We support the pseudo-instruction vand instead, because that
+;; corresponds to the canonical form the middle-end expects to use for
+;; immediate bitwise-ANDs.
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+    case 1: return neon_output_logic_immediate ("vand", &operands[2],
+    		     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "anddi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r")
+        (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r")
+		(match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vand\t%P0, %P1, %P2";
+    case 1: return neon_output_logic_immediate ("vand", &operands[2],
+    		     DImode, 1, VALID_NEON_QREG_MODE (DImode));
+    case 2: return "#";
+    case 3: return "#";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*")
+   (set_attr "length" "*,*,8,8")]
+)
+
+(define_insn "orn<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		 (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+  "TARGET_NEON"
+  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "orndi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
+	(ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
+	         (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
+  "TARGET_NEON"
+  "@
+   vorn\t%P0, %P1, %P2
+   #
+   #"
+  [(set_attr "neon_type" "neon_int_1,*,*")
+   (set_attr "length" "*,8,8")]
+)
+
+(define_insn "bic<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		  (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+  "TARGET_NEON"
+  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+;; Compare to *anddi_notdi_di.
+(define_insn "bicdi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
+		(match_operand:DI 1 "s_register_operand" "w,0,r")))]
+  "TARGET_NEON"
+  "@
+   vbic\t%P0, %P1, %P2
+   #
+   #"
+  [(set_attr "neon_type" "neon_int_1,*,*")
+   (set_attr "length" "*,8,8")]
+)
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		 (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "xordi3_neon"
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
+        (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r")
+	        (match_operand:DI 2 "s_register_operand" "w,r,r")))]
+  "TARGET_NEON"
+  "@
+   veor\t%P0, %P1, %P2
+   #
+   #"
+  [(set_attr "neon_type" "neon_int_1,*,*")
+   (set_attr "length" "*,8,8")]
+)
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmvn\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_3")))]
+)
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_3")))]
+)
+
+(define_insn "*umin<mode>3_neon"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "*umax<mode>3_neon"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "*smin<mode>3_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+		   (match_operand:VDQW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
+
+(define_insn "*smax<mode>3_neon"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+	(smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
+		   (match_operand:VDQW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
+
+; TODO: V2DI shifts are current disabled because there are bugs in the
+; generic vectorizer code.  It ends up creating a V2DI constructor with
+; SImode elements.
+
+(define_insn "vashl<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
+		      (match_operand:VDQIW 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with signed operands. This is essentially the same as ashl<mode>3
+; above, but using an unspec in case GCC tries anything tricky with negative
+; shift amounts.
+
+(define_insn "ashl<mode>3_signed"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+		      (match_operand:VDQI 2 "s_register_operand" "w")]
+		     UNSPEC_ASHIFT_SIGNED))]
+  "TARGET_NEON"
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with unsigned operands.
+
+(define_insn "ashl<mode>3_unsigned"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+		      (match_operand:VDQI 2 "s_register_operand" "w")]
+		     UNSPEC_ASHIFT_UNSIGNED))]
+  "TARGET_NEON"
+  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+(define_expand "vashr<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+			(match_operand:VDQIW 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+
+  DONE;
+})
+
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
+			(match_operand:VDQIW 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+
+  DONE;
+})
+
+;; Widening operations
+
+(define_insn "widen_ssum<mode>3"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(plus:<V_widen> (sign_extend:<V_widen>
+			  (match_operand:VW 1 "s_register_operand" "%w"))
+		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
+  [(set_attr "neon_type" "neon_int_3")]
+)
+
+(define_insn "widen_usum<mode>3"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(plus:<V_widen> (zero_extend:<V_widen>
+			  (match_operand:VW 1 "s_register_operand" "%w"))
+		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
+  [(set_attr "neon_type" "neon_int_3")]
+)
+
+;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
+;; shift-count granularity. That's good enough for the middle-end's current
+;; needs.
+
+(define_expand "vec_shr_<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:VDQ 1 "s_register_operand" "")
+   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+  "TARGET_NEON"
+{
+  rtx zero_reg;
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  const int width = GET_MODE_BITSIZE (<MODE>mode);
+  const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+  if (num_bits == width)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+
+  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+  operands[0] = gen_lowpart (bvecmode, operands[0]);
+  operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+  emit_insn (gen_ext (operands[0], operands[1], zero_reg,
+		      GEN_INT (num_bits / BITS_PER_UNIT)));
+  DONE;
+})
+
+(define_expand "vec_shl_<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:VDQ 1 "s_register_operand" "")
+   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+  "TARGET_NEON"
+{
+  rtx zero_reg;
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  const int width = GET_MODE_BITSIZE (<MODE>mode);
+  const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+  if (num_bits == 0)
+    {
+      emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+      DONE;
+    }
+
+  num_bits = width - num_bits;
+
+  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+  operands[0] = gen_lowpart (bvecmode, operands[0]);
+  operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+  emit_insn (gen_ext (operands[0], zero_reg, operands[1],
+		      GEN_INT (num_bits / BITS_PER_UNIT)));
+  DONE;
+})
+
+;; Helpers for quad-word reduction operations
+
+; Add (or smin, smax...) the low N/2 elements of the N-element vector
+; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
+; N/2-element vector.
+
+(define_insn "quad_halves_<code>v4si"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (vqh_ops:V2SI
+          (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)]))
+          (vec_select:V2SI (match_dup 1)
+                           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_NEON"
+  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+(define_insn "quad_halves_<code>v4sf"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+        (vqhs_ops:V2SF
+          (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)]))
+          (vec_select:V2SF (match_dup 1)
+                           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_NEON && flag_unsafe_math_optimizations"
+  "<VQH_mnem>.f32\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+(define_insn "quad_halves_<code>v8hi"
+  [(set (match_operand:V4HI 0 "s_register_operand" "+w")
+        (vqh_ops:V4HI
+          (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)
+				      (const_int 2) (const_int 3)]))
+          (vec_select:V4HI (match_dup 1)
+                           (parallel [(const_int 4) (const_int 5)
+				      (const_int 6) (const_int 7)]))))]
+  "TARGET_NEON"
+  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+(define_insn "quad_halves_<code>v16qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "+w")
+        (vqh_ops:V8QI
+          (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+                           (parallel [(const_int 0) (const_int 1)
+				      (const_int 2) (const_int 3)
+				      (const_int 4) (const_int 5)
+				      (const_int 6) (const_int 7)]))
+          (vec_select:V8QI (match_dup 1)
+                           (parallel [(const_int 8) (const_int 9)
+				      (const_int 10) (const_int 11)
+				      (const_int 12) (const_int 13)
+				      (const_int 14) (const_int 15)]))))]
+  "TARGET_NEON"
+  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
+  [(set_attr "vqh_mnem" "<VQH_mnem>")
+   (set (attr "neon_type")
+      (if_then_else (eq_attr "vqh_mnem" "vadd")
+                    (const_string "neon_int_1") (const_string "neon_int_5")))]
+)
+
+; FIXME: We wouldn't need the following insns if we could write subregs of
+; vector registers. Make an attempt at removing unnecessary moves, though
+; we're really at the mercy of the register allocator.
+
+(define_insn "neon_move_lo_quad_<mode>"
+  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
+        (vec_concat:ANY128
+          (match_operand:<V_HALF> 1 "s_register_operand" "w")
+          (vec_select:<V_HALF> 
+		(match_dup 0)
+	        (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%e0, %P1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_move_hi_quad_<mode>"
+  [(set (match_operand:ANY128 0 "s_register_operand" "+w")
+        (vec_concat:ANY128
+          (vec_select:<V_HALF>
+		(match_dup 0)
+	        (match_operand:ANY128 2 "vect_par_constant_low" ""))
+          (match_operand:<V_HALF> 1 "s_register_operand" "w")))]
+	   
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%f0, %P1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_expand "move_hi_quad_<mode>"
+ [(match_operand:ANY128 0 "s_register_operand" "")
+  (match_operand:<V_HALF> 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
+  rtx t1;
+  int i;
+
+  for (i=0; i < (<V_mode_nunits>/2); i++)
+     RTVEC_ELT (v, i) = GEN_INT (i);
+
+  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+  emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
+
+  DONE;
+})
+
+(define_expand "move_lo_quad_<mode>"
+ [(match_operand:ANY128 0 "s_register_operand" "")
+  (match_operand:<V_HALF> 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+  rtvec v = rtvec_alloc (<V_mode_nunits>/2);
+  rtx t1;
+  int i;
+
+  for (i=0; i < (<V_mode_nunits>/2); i++)
+     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
+
+  t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+  emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
+
+  DONE;
+})
+
+;; Reduction operations
+
+(define_expand "reduc_splus_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpadd_internal<mode>);
+  DONE;
+})
+
+(define_expand "reduc_splus_<mode>"
+  [(match_operand:VQ 0 "s_register_operand" "")
+   (match_operand:VQ 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_insn "reduc_splus_v2di"
+  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
+	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
+		     UNSPEC_VPADD))]
+  "TARGET_NEON"
+  "vadd.i64\t%e0, %e1, %f1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+;; NEON does not distinguish between signed and unsigned addition except on
+;; widening operations.
+(define_expand "reduc_uplus_<mode>"
+  [(match_operand:VDQI 0 "s_register_operand" "")
+   (match_operand:VDQI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpsmin<mode>);
+  DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+  [(match_operand:VQ 0 "s_register_operand" "")
+   (match_operand:VQ 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpsmax<mode>);
+  DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+  [(match_operand:VQ 0 "s_register_operand" "")
+   (match_operand:VQ 1 "s_register_operand" "")]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+  [(match_operand:VDI 0 "s_register_operand" "")
+   (match_operand:VDI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpumin<mode>);
+  DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+  [(match_operand:VQI 0 "s_register_operand" "")
+   (match_operand:VQI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+  [(match_operand:VDI 0 "s_register_operand" "")
+   (match_operand:VDI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpumax<mode>);
+  DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+  [(match_operand:VQI 0 "s_register_operand" "")
+   (match_operand:VQI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  rtx step1 = gen_reg_rtx (<V_HALF>mode);
+  rtx res_d = gen_reg_rtx (<V_HALF>mode);
+
+  emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
+  emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
+  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
+
+  DONE;
+})
+
+(define_insn "neon_vpadd_internal<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPADD))]
+  "TARGET_NEON"
+  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vadd.
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_1")))]
+)
+
+(define_insn "neon_vpsmin<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPSMIN))]
+  "TARGET_NEON"
+  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vmin.
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vpsmax<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPSMAX))]
+  "TARGET_NEON"
+  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like vmax.
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vpumin<mode>"
+  [(set (match_operand:VDI 0 "s_register_operand" "=w")
+	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+		     (match_operand:VDI 2 "s_register_operand" "w")]
+                   UNSPEC_VPUMIN))]
+  "TARGET_NEON"
+  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like umin.
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "neon_vpumax<mode>"
+  [(set (match_operand:VDI 0 "s_register_operand" "=w")
+	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+		     (match_operand:VDI 2 "s_register_operand" "w")]
+                   UNSPEC_VPUMAX))]
+  "TARGET_NEON"
+  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
+  ;; Assume this schedules like umax.
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+;; Saturating arithmetic
+
+; NOTE: Neon supports many more saturating variants of instructions than the
+; following, but these are all GCC currently understands.
+; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
+; yet either, although these patterns may be used by intrinsics when they're
+; added.
+
+(define_insn "*ss_add<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+                   (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "*us_add<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+                   (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "*ss_sub<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+                    (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "*us_sub<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+                    (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+;; Conditional instructions.  These are comparisons with conditional moves for
+;; vectors.  They perform the assignment:
+;;   
+;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
+;;
+;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
+;; element-wise.
+
+(define_expand "vcond<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "")
+	(if_then_else:VDQW
+	  (match_operator 3 "arm_comparison_operator"
+	    [(match_operand:VDQW 4 "s_register_operand" "")
+	     (match_operand:VDQW 5 "nonmemory_operand" "")])
+	  (match_operand:VDQW 1 "s_register_operand" "")
+	  (match_operand:VDQW 2 "s_register_operand" "")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  rtx mask;
+  int inverse = 0, immediate_zero = 0;
+  /* See the description of "magic" bits in the 'T' case of
+     arm_print_operand.  */
+  HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
+			     ? 3 : 1;
+  rtx magic_rtx = GEN_INT (magic_word);
+  
+  mask = gen_reg_rtx (<V_cmp_result>mode);
+  
+  if (operands[5] == CONST0_RTX (<MODE>mode))
+    immediate_zero = 1;
+  else if (!REG_P (operands[5]))
+    operands[5] = force_reg (<MODE>mode, operands[5]);
+  
+  switch (GET_CODE (operands[3]))
+    {
+    case GE:
+      emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
+				      magic_rtx));
+      break;
+    
+    case GT:
+      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
+				      magic_rtx));
+      break;
+    
+    case EQ:
+      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+				      magic_rtx));
+      break;
+    
+    case LE:
+      if (immediate_zero)
+	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
+					magic_rtx));
+      else
+	emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
+					magic_rtx));
+      break;
+    
+    case LT:
+      if (immediate_zero)
+	emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
+					magic_rtx));
+      else
+	emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
+					magic_rtx));
+      break;
+    
+    case NE:
+      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+				      magic_rtx));
+      inverse = 1;
+      break;
+    
+    default:
+      gcc_unreachable ();
+    }
+  
+  if (inverse)
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
+				    operands[1]));
+  else
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
+				    operands[2]));
+
+  DONE;
+})
+
+(define_expand "vcondu<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "")
+	(if_then_else:VDQIW
+	  (match_operator 3 "arm_comparison_operator"
+	    [(match_operand:VDQIW 4 "s_register_operand" "")
+	     (match_operand:VDQIW 5 "s_register_operand" "")])
+	  (match_operand:VDQIW 1 "s_register_operand" "")
+	  (match_operand:VDQIW 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+{
+  rtx mask;
+  int inverse = 0, immediate_zero = 0;
+  
+  mask = gen_reg_rtx (<V_cmp_result>mode);
+  
+  if (operands[5] == CONST0_RTX (<MODE>mode))
+    immediate_zero = 1;
+  else if (!REG_P (operands[5]))
+    operands[5] = force_reg (<MODE>mode, operands[5]);
+  
+  switch (GET_CODE (operands[3]))
+    {
+    case GEU:
+      emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      break;
+    
+    case GTU:
+      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      break;
+    
+    case EQ:
+      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      break;
+    
+    case LEU:
+      if (immediate_zero)
+	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
+					const0_rtx));
+      else
+	emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
+					const0_rtx));
+      break;
+    
+    case LTU:
+      if (immediate_zero)
+        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
+					const0_rtx));
+      else
+	emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
+					const0_rtx));
+      break;
+    
+    case NE:
+      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+				      const0_rtx));
+      inverse = 1;
+      break;
+    
+    default:
+      gcc_unreachable ();
+    }
+  
+  if (inverse)
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
+				    operands[1]));
+  else
+    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
+				    operands[2]));
+
+  DONE;
+})
+
+;; Patterns for builtins.
+
+; good for plain vadd, vaddq.
+
+(define_expand "neon_vadd<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "=w")
+   (match_operand:VDQX 1 "s_register_operand" "w")
+   (match_operand:VDQX 2 "s_register_operand" "w")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
+					   operands[2]));
+  DONE;
+})
+
+; Note that NEON operations don't support the full IEEE 754 standard: in
+; particular, denormal values are flushed to zero.  This means that GCC cannot
+; use those instructions for autovectorization, etc. unless
+; -funsafe-math-optimizations is in effect (in which case flush-to-zero
+; behaviour is permissible).  Intrinsic operations (provided by the arm_neon.h
+; header) must work in either case: if -funsafe-math-optimizations is given,
+; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
+; expand to unspecs (which may potentially limit the extent to which they might
+; be optimized by generic code).
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vadd<mode>_unspec"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
+		      (match_operand:VDQX 2 "s_register_operand" "w")]
+                     UNSPEC_VADD))]
+  "TARGET_NEON"
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_1")))]
+)
+
+; operand 3 represents in bits:
+;  bit 0: signed (vs unsigned).
+;  bit 1: rounding (vs none).
+
+(define_insn "neon_vaddl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VADDL))]
+  "TARGET_NEON"
+  "vaddl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_3")]
+)
+
+(define_insn "neon_vaddw<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VADDW))]
+  "TARGET_NEON"
+  "vaddw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
+  [(set_attr "neon_type" "neon_int_2")]
+)
+
+; vhadd and vrhadd.
+
+(define_insn "neon_vhadd<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:VDQIW 2 "s_register_operand" "w")
+		       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VHADD))]
+  "TARGET_NEON"
+  "v%O3hadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "neon_vqadd<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VQADD))]
+  "TARGET_NEON"
+  "vqadd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "neon_vaddhn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+		            (match_operand:VN 2 "s_register_operand" "w")
+                            (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VADDHN))]
+  "TARGET_NEON"
+  "v%O3addhn.<V_if_elem>\t%P0, %q1, %q2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+;; We cannot replace this unspec with mul<mode>3 because of the odd 
+;; polynomial multiplication case that can specified by operand 3.
+(define_insn "neon_vmul<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+		     UNSPEC_VMUL))]
+  "TARGET_NEON"
+  "vmul.%F3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")
+                                    (const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
+)
+
+(define_expand "neon_vmla<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "=w")
+   (match_operand:VDQW 1 "s_register_operand" "0")
+   (match_operand:VDQW 2 "s_register_operand" "w")
+   (match_operand:VDQW 3 "s_register_operand" "w")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
+				             operands[2], operands[3]));
+  else
+    emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
+					   operands[2], operands[3]));
+  DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmla<mode>_unspec"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0")
+		     (match_operand:VDQ 2 "s_register_operand" "w")
+		     (match_operand:VDQ 3 "s_register_operand" "w")]
+		    UNSPEC_VMLA))]
+  "TARGET_NEON"
+  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
+(define_insn "neon_vmlal<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VW 2 "s_register_operand" "w")
+		           (match_operand:VW 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VMLAL))]
+  "TARGET_NEON"
+  "vmlal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_expand "neon_vmls<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "=w")
+   (match_operand:VDQW 1 "s_register_operand" "0")
+   (match_operand:VDQW 2 "s_register_operand" "w")
+   (match_operand:VDQW 3 "s_register_operand" "w")
+   (match_operand:SI 4 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
+		 operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
+					   operands[2], operands[3]));
+  DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmls<mode>_unspec"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0")
+		     (match_operand:VDQ 2 "s_register_operand" "w")
+		     (match_operand:VDQ 3 "s_register_operand" "w")]
+		    UNSPEC_VMLS))]
+  "TARGET_NEON"
+  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vmla_ddd")
+                                  (const_string "neon_fp_vmla_qqq"))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                                    (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
+                                  (if_then_else
+                                    (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                    (const_string "neon_mla_qqq_8_16")
+                                    (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
+)
+
+(define_insn "neon_vmlsl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VW 2 "s_register_operand" "w")
+		           (match_operand:VW 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VMLSL))]
+  "TARGET_NEON"
+  "vmlsl.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_insn "neon_vqdmulh<mode>"
+  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
+        (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
+		       (match_operand:VMDQI 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQDMULH))]
+  "TARGET_NEON"
+  "vq%O3dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+        (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                      (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                      (const_string "neon_mul_qqq_8_16_32_ddd_32"))
+        (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                      (const_string "neon_mul_qqq_8_16_32_ddd_32")
+                      (const_string "neon_mul_qqq_8_16_32_ddd_32"))))]
+)
+
+(define_insn "neon_vqdmlal<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VMDI 2 "s_register_operand" "w")
+		           (match_operand:VMDI 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VQDMLAL))]
+  "TARGET_NEON"
+  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_insn "neon_vqdmlsl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+		           (match_operand:VMDI 2 "s_register_operand" "w")
+		           (match_operand:VMDI 3 "s_register_operand" "w")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VQDMLSL))]
+  "TARGET_NEON"
+  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_insn "neon_vmull<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+		           (match_operand:VW 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VMULL))]
+  "TARGET_NEON"
+  "vmull.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
+
+(define_insn "neon_vqdmull<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
+		           (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VQDMULL))]
+  "TARGET_NEON"
+  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
+
+(define_expand "neon_vsub<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "=w")
+   (match_operand:VDQX 1 "s_register_operand" "w")
+   (match_operand:VDQX 2 "s_register_operand" "w")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+    emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
+					   operands[2]));
+  DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vsub<mode>_unspec"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
+		      (match_operand:VDQX 2 "s_register_operand" "w")]
+                     UNSPEC_VSUB))]
+  "TARGET_NEON"
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_2")))]
+)
+
+(define_insn "neon_vsubl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VSUBL))]
+  "TARGET_NEON"
+  "vsubl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_2")]
+)
+
+(define_insn "neon_vsubw<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
+		           (match_operand:VDI 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VSUBW))]
+  "TARGET_NEON"
+  "vsubw.%T3%#<V_sz_elem>\t%q0, %q1, %P2"
+  [(set_attr "neon_type" "neon_int_2")]
+)
+
+(define_insn "neon_vqsub<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VQSUB))]
+  "TARGET_NEON"
+  "vqsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "neon_vhsub<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:VDQIW 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VHSUB))]
+  "TARGET_NEON"
+  "vhsub.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "neon_vsubhn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+		            (match_operand:VN 2 "s_register_operand" "w")
+                            (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VSUBHN))]
+  "TARGET_NEON"
+  "v%O3subhn.<V_if_elem>\t%P0, %q1, %q2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "neon_vceq<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+	   (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+	   (match_operand:SI 3 "immediate_operand" "i,i")]
+          UNSPEC_VCEQ))]
+  "TARGET_NEON"
+  "@
+  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+  vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vcge<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+	   (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+	   (match_operand:SI 3 "immediate_operand" "i,i")]
+          UNSPEC_VCGE))]
+  "TARGET_NEON"
+  "@
+  vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+  vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                 (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vcgt<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w,w")
+	   (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+           (match_operand:SI 3 "immediate_operand" "i,i")]
+          UNSPEC_VCGT))]
+  "TARGET_NEON"
+  "@
+  vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+  vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                 (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_int_5")))]
+)
+
+;; VCLE and VCLT only support comparisons with immediate zero (register
+;; variants are VCGE and VCGT with operands reversed).
+
+(define_insn "neon_vcle<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w")
+	   (match_operand:VDQW 2 "nonmemory_operand" "Dz")
+	   (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCLE))]
+  "TARGET_NEON"
+  "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vclt<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQW 1 "s_register_operand" "w")
+	   (match_operand:VDQW 2 "nonmemory_operand" "Dz")
+	   (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCLT))]
+  "TARGET_NEON"
+  "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                    (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                  (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                    (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vcage<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
+		                (match_operand:VCVTF 2 "s_register_operand" "w")
+                                (match_operand:SI 3 "immediate_operand" "i")]
+                               UNSPEC_VCAGE))]
+  "TARGET_NEON"
+  "vacge.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vcagt<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
+		                (match_operand:VCVTF 2 "s_register_operand" "w")
+                                (match_operand:SI 3 "immediate_operand" "i")]
+                               UNSPEC_VCAGT))]
+  "TARGET_NEON"
+  "vacgt.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vtst<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:VDQIW 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+		      UNSPEC_VTST))]
+  "TARGET_NEON"
+  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_4")]
+)
+
+(define_insn "neon_vabd<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+		     UNSPEC_VABD))]
+  "TARGET_NEON"
+  "vabd.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                 (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                 (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                   (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vabdl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+		           (match_operand:VW 2 "s_register_operand" "w")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+                          UNSPEC_VABDL))]
+  "TARGET_NEON"
+  "vabdl.%T3%#<V_sz_elem>\t%q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
+(define_insn "neon_vaba<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (plus:VDQIW (match_operand:VDQIW 1 "s_register_operand" "0")
+                    (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
+		                   (match_operand:VDQIW 3 "s_register_operand" "w")
+                                   (match_operand:SI 4 "immediate_operand" "i")]
+		                  UNSPEC_VABD)))]
+  "TARGET_NEON"
+  "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_vaba") (const_string "neon_vaba_qqq")))]
+)
+
+(define_insn "neon_vabal<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+        (plus:<V_widen> (match_operand:<V_widen> 1 "s_register_operand" "0")
+                        (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
+                                           (match_operand:VW 3 "s_register_operand" "w")
+                                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VABDL)))]
+  "TARGET_NEON"
+  "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
+  [(set_attr "neon_type" "neon_vaba")]
+)
+
+(define_insn "neon_vmax<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VMAX))]
+  "TARGET_NEON"
+  "vmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                  (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vmin<mode>"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
+		      (match_operand:VDQW 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VMIN))]
+  "TARGET_NEON"
+  "vmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                                (const_string "neon_fp_vadd_ddd_vabs_dd")
+                                (const_string "neon_fp_vadd_qqq_vabs_qq"))
+                  (const_string "neon_int_5")))]
+)
+
+(define_expand "neon_vpadd<mode>"
+  [(match_operand:VD 0 "s_register_operand" "=w")
+   (match_operand:VD 1 "s_register_operand" "w")
+   (match_operand:VD 2 "s_register_operand" "w")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
+					    operands[2]));
+  DONE;
+})
+
+(define_insn "neon_vpaddl<mode>"
+  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
+        (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")
+                                  (match_operand:SI 2 "immediate_operand" "i")]
+                                 UNSPEC_VPADDL))]
+  "TARGET_NEON"
+  "vpaddl.%T2%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  ;; Assume this schedules like vaddl.
+  [(set_attr "neon_type" "neon_int_3")]
+)
+
+(define_insn "neon_vpadal<mode>"
+  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
+        (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
+                                  (match_operand:VDQIW 2 "s_register_operand" "w")
+                                  (match_operand:SI 3 "immediate_operand" "i")]
+                                 UNSPEC_VPADAL))]
+  "TARGET_NEON"
+  "vpadal.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  ;; Assume this schedules like vpadd.
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "neon_vpmax<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")
+                    (match_operand:SI 3 "immediate_operand" "i")]
+                   UNSPEC_VPMAX))]
+  "TARGET_NEON"
+  "vpmax.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  ;; Assume this schedules like vmax.
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                  (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vpmin<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+        (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")
+                    (match_operand:SI 3 "immediate_operand" "i")]
+                   UNSPEC_VPMIN))]
+  "TARGET_NEON"
+  "vpmin.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  ;; Assume this schedules like vmin.
+  [(set (attr "neon_type")
+    (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                  (const_string "neon_fp_vadd_ddd_vabs_dd")
+                  (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vrecps<mode>"
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		       (match_operand:VCVTF 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VRECPS))]
+  "TARGET_NEON"
+  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vrecps_vrsqrts_ddd")
+                    (const_string "neon_fp_vrecps_vrsqrts_qqq")))]
+)
+
+(define_insn "neon_vrsqrts<mode>"
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		       (match_operand:VCVTF 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VRSQRTS))]
+  "TARGET_NEON"
+  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vrecps_vrsqrts_ddd")
+                    (const_string "neon_fp_vrecps_vrsqrts_qqq")))]
+)
+
+(define_expand "neon_vabs<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "")
+   (match_operand:VDQW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vqabs<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+		      UNSPEC_VQABS))]
+  "TARGET_NEON"
+  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_vqneg_vqabs")]
+)
+
+(define_expand "neon_vneg<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "")
+   (match_operand:VDQW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vqneg<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+		      UNSPEC_VQNEG))]
+  "TARGET_NEON"
+  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_vqneg_vqabs")]
+)
+
+(define_insn "neon_vcls<mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+		      UNSPEC_VCLS))]
+  "TARGET_NEON"
+  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
+        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_expand "neon_vclz<mode>"
+  [(match_operand:VDQIW 0 "s_register_operand" "")
+   (match_operand:VDQIW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:VE 0 "s_register_operand" "=w")
+        (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_expand "neon_vcnt<mode>"
+  [(match_operand:VE 0 "s_register_operand" "=w")
+   (match_operand:VE 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vrecpe<mode>"
+  [(set (match_operand:V32 0 "s_register_operand" "=w")
+	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
+                     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_VRECPE))]
+  "TARGET_NEON"
+  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vrsqrte<mode>"
+  [(set (match_operand:V32 0 "s_register_operand" "=w")
+	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
+                     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_VRSQRTE))]
+  "TARGET_NEON"
+  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_fp_vadd_ddd_vabs_dd")
+                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_expand "neon_vmvn<mode>"
+  [(match_operand:VDQIW 0 "s_register_operand" "")
+   (match_operand:VDQIW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vget_lane<mode>_sext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VD 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  return "vmov%?.s<V_sz_elem>\t%0, %P1[%c2]";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lane<mode>_zext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(zero_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VD 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  return "vmov%?.u<V_sz_elem>\t%0, %P1[%c2]";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lane<mode>_sext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(sign_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VQ 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  rtx ops[3];
+  int regno = REGNO (operands[1]);
+  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  unsigned int elt = INTVAL (operands[2]);
+  unsigned int elt_adj = elt % halfelts;
+
+  if (BYTES_BIG_ENDIAN)
+    elt_adj = halfelts - 1 - elt_adj;
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
+  ops[2] = GEN_INT (elt_adj);
+  output_asm_insn ("vmov%?.s<V_sz_elem>\t%0, %P1[%c2]", ops);
+
+  return "";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lane<mode>_zext_internal"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(zero_extend:SI
+	  (vec_select:<V_elem>
+	    (match_operand:VQ 1 "s_register_operand" "w")
+	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  rtx ops[3];
+  int regno = REGNO (operands[1]);
+  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
+  unsigned int elt = INTVAL (operands[2]);
+  unsigned int elt_adj = elt % halfelts;
+
+  if (BYTES_BIG_ENDIAN)
+    elt_adj = halfelts - 1 - elt_adj;
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
+  ops[2] = GEN_INT (elt_adj);
+  output_asm_insn ("vmov%?.u<V_sz_elem>\t%0, %P1[%c2]", ops);
+
+  return "";
+}
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_expand "neon_vget_lane<mode>"
+  [(match_operand:<V_ext> 0 "s_register_operand" "")
+   (match_operand:VDQW 1 "s_register_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT magic = INTVAL (operands[3]);
+  rtx insn;
+
+  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      /* The intrinsics are defined in terms of a model where the
+	 element ordering in memory is vldm order, whereas the generic
+	 RTL is defined in terms of a model where the element ordering
+	 in memory is array order.  Convert the lane number to conform
+	 to this model.  */
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+	= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+
+  if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
+    insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
+  else
+    {
+      if ((magic & 1) != 0)
+	insn = gen_neon_vget_lane<mode>_sext_internal (operands[0], operands[1],
+						       operands[2]);
+      else
+	insn = gen_neon_vget_lane<mode>_zext_internal (operands[0], operands[1],
+						       operands[2]);
+    }
+  emit_insn (insn);
+  DONE;
+})
+
+; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
+; elements.
+
+(define_expand "neon_vget_lanedi"
+  [(match_operand:DI 0 "s_register_operand" "=r")
+   (match_operand:DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vget_lanev2di"
+  [(match_operand:DI 0 "s_register_operand" "=r")
+   (match_operand:V2DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 2);
+  emit_insn (gen_vec_extractv2di (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vset_lane<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "=w")
+   (match_operand:<V_elem> 1 "s_register_operand" "r")
+   (match_operand:VDQ 2 "s_register_operand" "0")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  unsigned int elt = INTVAL (operands[3]);
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int reg_nelts
+	= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
+      elt ^= reg_nelts - 1;
+    }
+
+  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
+                                         GEN_INT (1 << elt), operands[2]));
+  DONE;
+})
+
+; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
+
+(define_expand "neon_vset_lanedi"
+  [(match_operand:DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "r")
+   (match_operand:DI 2 "s_register_operand" "0")
+   (match_operand:SI 3 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vcreate<mode>"
+  [(match_operand:VDX 0 "s_register_operand" "")
+   (match_operand:DI 1 "general_operand" "")]
+  "TARGET_NEON"
+{
+  rtx src = gen_lowpart (<MODE>mode, operands[1]);
+  emit_move_insn (operands[0], src);
+  DONE;
+})
+
+(define_insn "neon_vdup_n<mode>"
+  [(set (match_operand:VX 0 "s_register_operand" "=w")
+        (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
+  "TARGET_NEON"
+  "vdup%?.<V_sz_elem>\t%<V_reg>0, %1"
+  ;; Assume this schedules like vmov.
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vdup_n<mode>"
+  [(set (match_operand:V32 0 "s_register_operand" "=w,w")
+        (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
+  "TARGET_NEON"
+  "@
+  vdup%?.<V_sz_elem>\t%<V_reg>0, %1
+  vdup%?.<V_sz_elem>\t%<V_reg>0, %y1"
+  ;; Assume this schedules like vmov.
+  [(set_attr "predicable" "yes")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_expand "neon_vdup_ndi"
+  [(match_operand:DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "r")]
+  "TARGET_NEON"
+{
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+}
+)
+
+(define_insn "neon_vdup_nv2di"
+  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
+        (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
+  "TARGET_NEON"
+  "@
+  vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1
+  vmov%?\t%e0, %P1\;vmov%?\t%f0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "8")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vdup_lane<mode>_internal"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+  	(vec_duplicate:VDQW 
+          (vec_select:<V_elem>
+            (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_NEON"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  if (<Is_d_reg>)
+    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+  else
+    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
+}
+  ;; Assume this schedules like vmov.
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_expand "neon_vdup_lane<mode>"
+  [(match_operand:VDQW 0 "s_register_operand" "=w")
+   (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+	= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+    emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+                                                  operands[2]));
+    DONE;
+})
+
+; Scalar index is ignored, since only zero is valid here.
+(define_expand "neon_vdup_lanedi"
+  [(match_operand:DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 1);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+; Likewise for v2di, as the DImode second operand has only a single element.
+(define_expand "neon_vdup_lanev2di"
+  [(match_operand:V2DI 0 "s_register_operand" "=w")
+   (match_operand:DI 1 "s_register_operand" "w")
+   (match_operand:SI 2 "immediate_operand" "i")]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[2], 0, 1);
+  emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
+  DONE;
+})
+
+;; In this insn, operand 1 should be low, and operand 2 the high part of the
+;; dest vector.
+;; FIXME: A different implementation of this builtin could make it much
+;; more likely that we wouldn't actually need to output anything (we could make
+;; it so that the reg allocator puts things in the right places magically
+;; instead). Lack of subregs for vectors makes that tricky though, I think.
+
+(define_insn "neon_vcombine<mode>"
+  [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
+        (vec_concat:<V_DOUBLE> (match_operand:VDX 1 "s_register_operand" "w")
+			       (match_operand:VDX 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src1 = REGNO (operands[1]);
+  int src2 = REGNO (operands[2]);
+  rtx destlo;
+
+  if (src1 == dest && src2 == dest + 2)
+    return "";
+  else if (src2 == dest && src1 == dest + 2)
+    /* Special case of reversed high/low parts.  */
+    return "vswp\t%P1, %P2";
+
+  destlo = gen_rtx_REG (<MODE>mode, dest);
+
+  if (!reg_overlap_mentioned_p (operands[2], destlo))
+    {
+      /* Try to avoid unnecessary moves if part of the result is in the right
+         place already.  */
+      if (src1 != dest)
+        output_asm_insn ("vmov\t%e0, %P1", operands);
+      if (src2 != dest + 2)
+        output_asm_insn ("vmov\t%f0, %P2", operands);
+    }
+  else
+    {
+      if (src2 != dest + 2)
+        output_asm_insn ("vmov\t%f0, %P2", operands);
+      if (src1 != dest)
+        output_asm_insn ("vmov\t%e0, %P1", operands);
+    }
+
+  return "";
+}
+  ;; We set the neon_type attribute based on the vmov instructions above.
+  [(set_attr "length" "8")
+   (set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv16qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+                         (parallel [(const_int 8) (const_int 9)
+			            (const_int 10) (const_int 11)
+				    (const_int 12) (const_int 13)
+				    (const_int 14) (const_int 15)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src + 2)
+    return "vmov\t%P0, %f1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv8hi"
+  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
+	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+	                 (parallel [(const_int 4) (const_int 5)
+			            (const_int 6) (const_int 7)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src + 2)
+    return "vmov\t%P0, %f1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv4si"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+	                 (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src + 2)
+    return "vmov\t%P0, %f1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv4sf"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+	                 (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src + 2)
+    return "vmov\t%P0, %f1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv2di"
+  [(set (match_operand:DI 0 "s_register_operand" "=w")
+	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
+	               (parallel [(const_int 1)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src + 2)
+    return "vmov\t%P0, %f1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv16qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+                         (parallel [(const_int 0) (const_int 1)
+			            (const_int 2) (const_int 3)
+				    (const_int 4) (const_int 5)
+				    (const_int 6) (const_int 7)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%P0, %e1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv8hi"
+  [(set (match_operand:V4HI 0 "s_register_operand" "=w")
+	(vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+	                 (parallel [(const_int 0) (const_int 1)
+			            (const_int 2) (const_int 3)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%P0, %e1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv4si"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+	(vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+	                 (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%P0, %e1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv4sf"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+	(vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+	                 (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%P0, %e1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv2di"
+  [(set (match_operand:DI 0 "s_register_operand" "=w")
+	(vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
+	               (parallel [(const_int 0)])))]
+  "TARGET_NEON"
+{
+  int dest = REGNO (operands[0]);
+  int src = REGNO (operands[1]);
+
+  if (dest != src)
+    return "vmov\t%P0, %e1";
+  else
+    return "";
+}
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vcvt<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON"
+  "vcvt.%T2%#32.f32\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vcvt<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+			  UNSPEC_VCVT))]
+  "TARGET_NEON"
+  "vcvt.f32.%T2%#32\t%<V_reg>0, %<V_reg>1"
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vcvt_n<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VCVT_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, 33);
+  return "vcvt.%T3%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vcvt_n<mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
+	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")
+                           (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VCVT_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, 33);
+  return "vcvt.f32.%T3%#32\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                   (const_string "neon_fp_vadd_ddd_vabs_dd")
+                   (const_string "neon_fp_vadd_qqq_vabs_qq")))]
+)
+
+(define_insn "neon_vmovn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+                           UNSPEC_VMOVN))]
+  "TARGET_NEON"
+  "vmovn.<V_if_elem>\t%P0, %q1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vqmovn<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+                           UNSPEC_VQMOVN))]
+  "TARGET_NEON"
+  "vqmovn.%T2%#<V_sz_elem>\t%P0, %q1"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
+
+(define_insn "neon_vqmovun<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")]
+                           UNSPEC_VQMOVUN))]
+  "TARGET_NEON"
+  "vqmovun.<V_s_elem>\t%P0, %q1"
+  [(set_attr "neon_type" "neon_shift_2")]
+)
+
+(define_insn "neon_vmovl<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")]
+                          UNSPEC_VMOVL))]
+  "TARGET_NEON"
+  "vmovl.%T2%#<V_sz_elem>\t%q0, %P1"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_insn "neon_vmul_lane<mode>"
+  [(set (match_operand:VMD 0 "s_register_operand" "=w")
+	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
+		     (match_operand:VMD 2 "s_register_operand"
+                                        "<scalar_mul_constraint>")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+                    UNSPEC_VMUL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmul_ddd")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                                 (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))))]
+)
+
+(define_insn "neon_vmul_lane<mode>"
+  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
+	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
+		     (match_operand:<V_HALF> 2 "s_register_operand"
+                                             "<scalar_mul_constraint>")
+                     (match_operand:SI 3 "immediate_operand" "i")
+                     (match_operand:SI 4 "immediate_operand" "i")]
+                    UNSPEC_VMUL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<V_HALF>mode));
+  return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmul_qqd")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
+                                 (const_string "neon_mul_qqd_32_scalar"))))]
+)
+
+(define_insn "neon_vmull_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
+		           (match_operand:VMDI 2 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 3 "immediate_operand" "i")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VMULL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmull.%T4%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
+
+(define_insn "neon_vqdmull_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
+		           (match_operand:VMDI 2 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 3 "immediate_operand" "i")
+                           (match_operand:SI 4 "immediate_operand" "i")]
+                          UNSPEC_VQDMULL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
+
+(define_insn "neon_vqdmulh_lane<mode>"
+  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
+	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
+		      (match_operand:<V_HALF> 2 "s_register_operand"
+					      "<scalar_mul_constraint>")
+                      (match_operand:SI 3 "immediate_operand" "i")
+                      (match_operand:SI 4 "immediate_operand" "i")]
+                      UNSPEC_VQDMULH_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vq%O4dmulh.%T4%#<V_sz_elem>\t%q0, %q1, %P2[%c3]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
+                   (const_string "neon_mul_qqd_32_scalar")))]
+)
+
+(define_insn "neon_vqdmulh_lane<mode>"
+  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
+	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
+		      (match_operand:VMDI 2 "s_register_operand"
+					  "<scalar_mul_constraint>")
+                      (match_operand:SI 3 "immediate_operand" "i")
+                      (match_operand:SI 4 "immediate_operand" "i")]
+                      UNSPEC_VQDMULH_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vq%O4dmulh.%T4%#<V_sz_elem>\t%P0, %P1, %P2[%c3]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mul_ddd_16_scalar_32_16_long_scalar")
+                   (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
+)
+
+(define_insn "neon_vmla_lane<mode>"
+  [(set (match_operand:VMD 0 "s_register_operand" "=w")
+	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
+		     (match_operand:VMD 2 "s_register_operand" "w")
+                     (match_operand:VMD 3 "s_register_operand"
+					"<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                     UNSPEC_VMLA_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_ddd_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))]
+)
+
+(define_insn "neon_vmla_lane<mode>"
+  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
+	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
+		     (match_operand:VMQ 2 "s_register_operand" "w")
+                     (match_operand:<V_HALF> 3 "s_register_operand"
+					     "<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                     UNSPEC_VMLA_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_qqq_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
+                                 (const_string "neon_mla_qqq_32_qqd_32_scalar"))))]
+)
+
+(define_insn "neon_vmlal_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VMLAL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmlal.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_insn "neon_vqdmlal_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VQDMLAL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_insn "neon_vmls_lane<mode>"
+  [(set (match_operand:VMD 0 "s_register_operand" "=w")
+	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
+		     (match_operand:VMD 2 "s_register_operand" "w")
+                     (match_operand:VMD 3 "s_register_operand"
+					"<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                    UNSPEC_VMLS_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_ddd_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))))]
+)
+
+(define_insn "neon_vmls_lane<mode>"
+  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
+	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
+		     (match_operand:VMQ 2 "s_register_operand" "w")
+                     (match_operand:<V_HALF> 3 "s_register_operand"
+					     "<scalar_mul_constraint>")
+                     (match_operand:SI 4 "immediate_operand" "i")
+                     (match_operand:SI 5 "immediate_operand" "i")]
+                    UNSPEC_VMLS_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+                   (const_string "neon_fp_vmla_qqq_scalar")
+                   (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                                 (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
+                                 (const_string "neon_mla_qqq_32_qqd_32_scalar"))))]
+)
+
+(define_insn "neon_vmlsl_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VMLSL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vmlsl.%T5%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+(define_insn "neon_vqdmlsl_lane<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
+			   (match_operand:VMDI 2 "s_register_operand" "w")
+                           (match_operand:VMDI 3 "s_register_operand"
+					       "<scalar_mul_constraint>")
+                           (match_operand:SI 4 "immediate_operand" "i")
+                           (match_operand:SI 5 "immediate_operand" "i")]
+                          UNSPEC_VQDMLSL_LANE))]
+  "TARGET_NEON"
+{
+  neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
+}
+  [(set (attr "neon_type")
+     (if_then_else (ne (symbol_ref "<Scalar_mul_8_16>") (const_int 0))
+                   (const_string "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
+                   (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
+)
+
+; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
+; core register into a temp register, then use a scalar taken from that. This
+; isn't an optimal solution if e.g. the scalar has just been read from memory
+; or extracted from another vector. The latter case it's currently better to
+; use the "_lane" variant, and the former case can probably be implemented
+; using vld1_lane, but that hasn't been done yet.
+
+(define_expand "neon_vmul_n<mode>"
+  [(match_operand:VMD 0 "s_register_operand" "")
+   (match_operand:VMD 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
+				       const0_rtx, const0_rtx));
+  DONE;
+})
+
+(define_expand "neon_vmul_n<mode>"
+  [(match_operand:VMQ 0 "s_register_operand" "")
+   (match_operand:VMQ 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
+				       const0_rtx, const0_rtx));
+  DONE;
+})
+
+(define_expand "neon_vmull_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:VMDI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vmull_lane<mode> (operands[0], operands[1], tmp,
+				        const0_rtx, operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vqdmull_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:VMDI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
+				          const0_rtx, const0_rtx));
+  DONE;
+})
+
+(define_expand "neon_vqdmulh_n<mode>"
+  [(match_operand:VMDI 0 "s_register_operand" "")
+   (match_operand:VMDI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
+				          const0_rtx, operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vqdmulh_n<mode>"
+  [(match_operand:VMQI 0 "s_register_operand" "")
+   (match_operand:VMQI 1 "s_register_operand" "")
+   (match_operand:<V_elem> 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
+				          const0_rtx, operands[3]));
+  DONE;
+})
+
+(define_expand "neon_vmla_n<mode>"
+  [(match_operand:VMD 0 "s_register_operand" "")
+   (match_operand:VMD 1 "s_register_operand" "")
+   (match_operand:VMD 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmla_n<mode>"
+  [(match_operand:VMQ 0 "s_register_operand" "")
+   (match_operand:VMQ 1 "s_register_operand" "")
+   (match_operand:VMQ 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmlal_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmlal_lane<mode> (operands[0], operands[1], operands[2],
+					tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vqdmlal_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
+					  tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmls_n<mode>"
+  [(match_operand:VMD 0 "s_register_operand" "")
+   (match_operand:VMD 1 "s_register_operand" "")
+   (match_operand:VMD 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmls_n<mode>"
+  [(match_operand:VMQ 0 "s_register_operand" "")
+   (match_operand:VMQ 1 "s_register_operand" "")
+   (match_operand:VMQ 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<V_HALF>mode);
+  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
+				       tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vmlsl_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vmlsl_lane<mode> (operands[0], operands[1], operands[2],
+					tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_expand "neon_vqdmlsl_n<mode>"
+  [(match_operand:<V_widen> 0 "s_register_operand" "")
+   (match_operand:<V_widen> 1 "s_register_operand" "")
+   (match_operand:VMDI 2 "s_register_operand" "")
+   (match_operand:<V_elem> 3 "s_register_operand" "")
+   (match_operand:SI 4 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
+  emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
+					  tmp, const0_rtx, operands[4]));
+  DONE;
+})
+
+(define_insn "neon_vext<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
+		      (match_operand:VDQX 2 "s_register_operand" "w")
+                      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VEXT))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
+  return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
+}
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_2cycle")))]
+)
+
+(define_insn "neon_vrev64<mode>"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
+		     (match_operand:SI 2 "immediate_operand" "i")]
+                    UNSPEC_VREV64))]
+  "TARGET_NEON"
+  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vrev32<mode>"
+  [(set (match_operand:VX 0 "s_register_operand" "=w")
+	(unspec:VX [(match_operand:VX 1 "s_register_operand" "w")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_VREV32))]
+  "TARGET_NEON"
+  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vrev16<mode>"
+  [(set (match_operand:VE 0 "s_register_operand" "=w")
+	(unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+                   UNSPEC_VREV16))]
+  "TARGET_NEON"
+  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
+; allocation. For an intrinsic of form:
+;   rD = vbsl_* (rS, rN, rM)
+; We can use any of:
+;   vbsl rS, rN, rM  (if D = S)
+;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
+;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
+
+(define_insn "neon_vbsl<mode>_internal"
+  [(set (match_operand:VDQX 0 "s_register_operand"		 "=w,w,w")
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
+		      (match_operand:VDQX 2 "s_register_operand" " w,w,0")
+                      (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
+                     UNSPEC_VBSL))]
+  "TARGET_NEON"
+  "@
+  vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
+  vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
+  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
+  [(set_attr "neon_type" "neon_int_1")]
+)
+
+(define_expand "neon_vbsl<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "")
+        (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
+                      (match_operand:VDQX 2 "s_register_operand" "")
+                      (match_operand:VDQX 3 "s_register_operand" "")]
+                     UNSPEC_VBSL))]
+  "TARGET_NEON"
+{
+  /* We can't alias operands together if they have different modes.  */
+  operands[1] = gen_lowpart (<MODE>mode, operands[1]);
+})
+
+(define_insn "neon_vshl<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSHL))]
+  "TARGET_NEON"
+  "v%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_vshl_ddd")
+                    (const_string "neon_shift_3")))]
+)
+
+(define_insn "neon_vqshl<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQSHL))]
+  "TARGET_NEON"
+  "vq%O3shl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_shift_2")
+                    (const_string "neon_vqshl_vrshl_vqrshl_qqq")))]
+)
+
+(define_insn "neon_vshr_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSHR_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
+  return "v%O3shr.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_insn "neon_vshrn_n<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")
+			    (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VSHRN_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
+  return "v%O3shrn.<V_if_elem>\t%P0, %q1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_insn "neon_vqshrn_n<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")
+			    (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VQSHRN_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
+  return "vq%O3shrn.%T3%#<V_sz_elem>\t%P0, %q1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_2")]
+)
+
+(define_insn "neon_vqshrun_n<mode>"
+  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
+	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
+			    (match_operand:SI 2 "immediate_operand" "i")
+			    (match_operand:SI 3 "immediate_operand" "i")]
+                           UNSPEC_VQSHRUN_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
+  return "vq%O3shrun.%T3%#<V_sz_elem>\t%P0, %q1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_2")]
+)
+
+(define_insn "neon_vshl_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSHL_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_insn "neon_vqshl_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQSHL_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  return "vqshl.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_2")]
+)
+
+(define_insn "neon_vqshlu_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VQSHLU_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
+  return "vqshlu.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_2")]
+)
+
+(define_insn "neon_vshll_n<mode>"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
+			   (match_operand:SI 2 "immediate_operand" "i")
+			   (match_operand:SI 3 "immediate_operand" "i")]
+			  UNSPEC_VSHLL_N))]
+  "TARGET_NEON"
+{
+  /* The boundaries are: 0 < imm <= size.  */
+  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
+  return "vshll.%T3%#<V_sz_elem>\t%q0, %P1, %2";
+}
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_insn "neon_vsra_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
+		       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")
+                       (match_operand:SI 4 "immediate_operand" "i")]
+                      UNSPEC_VSRA_N))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
+  return "v%O4sra.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
+}
+  [(set_attr "neon_type" "neon_vsra_vrsra")]
+)
+
+(define_insn "neon_vsri_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
+        	       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSRI))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
+  return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
+}
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_shift_1")
+                    (const_string "neon_shift_3")))]
+)
+
+(define_insn "neon_vsli_n<mode>"
+  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
+	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
+        	       (match_operand:VDQIX 2 "s_register_operand" "w")
+                       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VSLI))]
+  "TARGET_NEON"
+{
+  neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
+  return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
+}
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_shift_1")
+                    (const_string "neon_shift_3")))]
+)
+
+(define_insn "neon_vtbl1v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+  "vtbl.8\t%P0, {%P1}, %P2"
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
+
+(define_insn "neon_vtbl2v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+{
+  rtx ops[4];
+  int tabbase = REGNO (operands[1]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = operands[2];
+  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
+
+  return "";
+}
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
+
+(define_insn "neon_vtbl3v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+{
+  rtx ops[5];
+  int tabbase = REGNO (operands[1]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = operands[2];
+  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
+
+  return "";
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
+
+(define_insn "neon_vtbl4v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")]
+                     UNSPEC_VTBL))]
+  "TARGET_NEON"
+{
+  rtx ops[6];
+  int tabbase = REGNO (operands[1]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
+  ops[5] = operands[2];
+  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
+
+  return "";
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
+
+(define_insn "neon_vtbx1v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:V8QI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+  "vtbx.8\t%P0, {%P2}, %P3"
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
+
+(define_insn "neon_vtbx2v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:TI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+{
+  rtx ops[4];
+  int tabbase = REGNO (operands[2]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = operands[3];
+  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
+
+  return "";
+}
+  [(set_attr "neon_type" "neon_bp_2cycle")]
+)
+
+(define_insn "neon_vtbx3v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:EI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+{
+  rtx ops[5];
+  int tabbase = REGNO (operands[2]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = operands[3];
+  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
+
+  return "";
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
+
+(define_insn "neon_vtbx4v8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
+		      (match_operand:OI 2 "s_register_operand" "w")
+		      (match_operand:V8QI 3 "s_register_operand" "w")]
+                     UNSPEC_VTBX))]
+  "TARGET_NEON"
+{
+  rtx ops[6];
+  int tabbase = REGNO (operands[2]);
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (V8QImode, tabbase);
+  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
+  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
+  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
+  ops[5] = operands[3];
+  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
+
+  return "";
+}
+  [(set_attr "neon_type" "neon_bp_3cycle")]
+)
+
+(define_insn "neon_vtrn<mode>_internal"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
+  "TARGET_NEON"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
+
+(define_expand "neon_vtrn<mode>"
+  [(match_operand:SI 0 "s_register_operand" "r")
+   (match_operand:VDQW 1 "s_register_operand" "w")
+   (match_operand:VDQW 2 "s_register_operand" "w")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "neon_vzip<mode>_internal"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
+  "TARGET_NEON"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
+
+(define_expand "neon_vzip<mode>"
+  [(match_operand:SI 0 "s_register_operand" "r")
+   (match_operand:VDQW 1 "s_register_operand" "w")
+   (match_operand:VDQW 2 "s_register_operand" "w")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "neon_vuzp<mode>_internal"
+  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VUZP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
+  "TARGET_NEON"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
+  [(set (attr "neon_type")
+      (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+                    (const_string "neon_bp_simple")
+                    (const_string "neon_bp_3cycle")))]
+)
+
+(define_expand "neon_vuzp<mode>"
+  [(match_operand:SI 0 "s_register_operand" "r")
+   (match_operand:VDQW 1 "s_register_operand" "w")
+   (match_operand:VDQW 2 "s_register_operand" "w")]
+  "TARGET_NEON"
+{
+  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
+			      operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv8qi<mode>"
+  [(match_operand:V8QI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv4hi<mode>"
+  [(match_operand:V4HI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv2si<mode>"
+  [(match_operand:V2SI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv2sf<mode>"
+  [(match_operand:V2SF 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretdi<mode>"
+  [(match_operand:DI 0 "s_register_operand" "")
+   (match_operand:VDX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv16qi<mode>"
+  [(match_operand:V16QI 0 "s_register_operand" "")
+   (match_operand:VQX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv8hi<mode>"
+  [(match_operand:V8HI 0 "s_register_operand" "")
+   (match_operand:VQX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv4si<mode>"
+  [(match_operand:V4SI 0 "s_register_operand" "")
+   (match_operand:VQX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv4sf<mode>"
+  [(match_operand:V4SF 0 "s_register_operand" "")
+   (match_operand:VQX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "neon_vreinterpretv2di<mode>"
+  [(match_operand:V2DI 0 "s_register_operand" "")
+   (match_operand:VQX 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_reinterpret (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "neon_vld1<mode>"
+  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
+        (unspec:VDQX [(mem:VDQX (match_operand:SI 1 "s_register_operand" "r"))]
+                    UNSPEC_VLD1))]
+  "TARGET_NEON"
+  "vld1.<V_sz_elem>\t%h0, [%1]"
+  [(set_attr "neon_type" "neon_vld1_1_2_regs")]
+)
+
+(define_insn "neon_vld1_lane<mode>"
+  [(set (match_operand:VDX 0 "s_register_operand" "=w")
+        (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                     (match_operand:VDX 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")]
+                    UNSPEC_VLD1_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  if (max == 1)
+    return "vld1.<V_sz_elem>\t%P0, [%1]";
+  else
+    return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld1_vld2_lane")))]
+)
+
+(define_insn "neon_vld1_lane<mode>"
+  [(set (match_operand:VQX 0 "s_register_operand" "=w")
+        (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                     (match_operand:VQX 2 "s_register_operand" "0")
+                     (match_operand:SI 3 "immediate_operand" "i")]
+                    UNSPEC_VLD1_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+      operands[3] = GEN_INT (lane);
+    }
+  operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
+  if (max == 2)
+    return "vld1.<V_sz_elem>\t%P0, [%1]";
+  else
+    return "vld1.<V_sz_elem>\t{%P0[%c3]}, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 2))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld1_vld2_lane")))]
+)
+
+(define_insn "neon_vld1_dup<mode>"
+  [(set (match_operand:VDX 0 "s_register_operand" "=w")
+        (unspec:VDX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))]
+                    UNSPEC_VLD1_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    return "vld1.<V_sz_elem>\t{%P0[]}, [%1]";
+  else
+    return "vld1.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
+
+(define_insn "neon_vld1_dup<mode>"
+  [(set (match_operand:VQX 0 "s_register_operand" "=w")
+        (unspec:VQX [(mem:<V_elem> (match_operand:SI 1 "s_register_operand" "r"))]
+                    UNSPEC_VLD1_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 2)
+    return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
+  else
+    return "vld1.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
+
+(define_insn "neon_vst1<mode>"
+  [(set (mem:VDQX (match_operand:SI 0 "s_register_operand" "r"))
+	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
+		     UNSPEC_VST1))]
+  "TARGET_NEON"
+  "vst1.<V_sz_elem>\t%h1, [%0]"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
+
+(define_insn "neon_vst1_lane<mode>"
+  [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
+	(vec_select:<V_elem>
+	  (match_operand:VDX 1 "s_register_operand" "w")
+	  (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  if (max == 1)
+    return "vst1.<V_sz_elem>\t{%P1}, [%0]";
+  else
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_mode_nunits>") (const_int 1))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst1_vst2_lane")))])
+
+(define_insn "neon_vst1_lane<mode>"
+  [(set (mem:<V_elem> (match_operand:SI 0 "s_register_operand" "r"))
+        (vec_select:<V_elem>
+           (match_operand:VQX 1 "s_register_operand" "w")
+           (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+      operands[2] = GEN_INT (lane);
+    }
+  operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
+  if (max == 2)
+    return "vst1.<V_sz_elem>\t{%P1}, [%0]";
+  else
+    return "vst1.<V_sz_elem>\t{%P1[%c2]}, [%0]";
+}
+  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+)
+
+(define_insn "neon_vld2<mode>"
+  [(set (match_operand:TI 0 "s_register_operand" "=w")
+        (unspec:TI [(mem:TI (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vld1.64\t%h0, [%1]";
+  else
+    return "vld2.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")))]
+)
+
+(define_insn "neon_vld2<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2))]
+  "TARGET_NEON"
+  "vld2.<V_sz_elem>\t%h0, [%1]"
+  [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")])
+
+(define_insn "neon_vld2_lane<mode>"
+  [(set (match_operand:TI 0 "s_register_operand" "=w")
+        (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (match_operand:TI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = operands[1];
+  ops[3] = operands[3];
+  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+)
+
+(define_insn "neon_vld2_lane<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (match_operand:OI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = operands[1];
+  ops[3] = GEN_INT (lane);
+  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, [%2]", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld1_vld2_lane")]
+)
+
+(define_insn "neon_vld2_dup<mode>"
+  [(set (match_operand:TI 0 "s_register_operand" "=w")
+        (unspec:TI [(mem:<V_two_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD2_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, [%1]";
+  else
+    return "vld1.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
+
+(define_insn "neon_vst2<mode>"
+  [(set (mem:TI (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vst1.64\t%h1, [%0]";
+  else
+    return "vst2.<V_sz_elem>\t%h1, [%0]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")))]
+)
+
+(define_insn "neon_vst2<mode>"
+  [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
+	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VST2))]
+  "TARGET_NEON"
+  "vst2.<V_sz_elem>\t%h1, [%0]"
+  [(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")]
+)
+
+(define_insn "neon_vst2_lane<mode>"
+  [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
+	(unspec:<V_two_elem>
+	  [(match_operand:TI 1 "s_register_operand" "w")
+	   (match_operand:SI 2 "immediate_operand" "i")
+	   (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+	  UNSPEC_VST2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 2);
+  ops[3] = operands[2];
+  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+)
+
+(define_insn "neon_vst2_lane<mode>"
+  [(set (mem:<V_two_elem> (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:<V_two_elem>
+           [(match_operand:OI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST2_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[4];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = GEN_INT (lane);
+  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, [%0]", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst1_vst2_lane")]
+)
+
+(define_insn "neon_vld3<mode>"
+  [(set (match_operand:EI 0 "s_register_operand" "=w")
+        (unspec:EI [(mem:EI (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vld1.64\t%h0, [%1]";
+  else
+    return "vld3.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld3_vld4")))]
+)
+
+(define_expand "neon_vld3<mode>"
+  [(match_operand:CI 0 "s_register_operand" "=w")
+   (match_operand:SI 1 "s_register_operand" "+r")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vld3qa<mode> (operands[0], operands[0],
+                                    operands[1], operands[1]));
+  emit_insn (gen_neon_vld3qb<mode> (operands[0], operands[0],
+                                    operands[1], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vld3qa<mode>"
+  [(set (match_operand:CI 0 "s_register_operand" "=w")
+        (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
+                    (match_operand:CI 1 "s_register_operand" "0")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3A))
+   (set (match_operand:SI 2 "s_register_operand" "=r")
+        (plus:SI (match_dup 3)
+		 (const_int 24)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = operands[2];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
+
+(define_insn "neon_vld3qb<mode>"
+  [(set (match_operand:CI 0 "s_register_operand" "=w")
+        (unspec:CI [(mem:CI (match_operand:SI 3 "s_register_operand" "2"))
+                    (match_operand:CI 1 "s_register_operand" "0")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3B))
+   (set (match_operand:SI 2 "s_register_operand" "=r")
+        (plus:SI (match_dup 3)
+		 (const_int 24)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[4];
+  ops[0] = gen_rtx_REG (DImode, regno + 2);
+  ops[1] = gen_rtx_REG (DImode, regno + 6);
+  ops[2] = gen_rtx_REG (DImode, regno + 10);
+  ops[3] = operands[2];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, [%3]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
+
+(define_insn "neon_vld3_lane<mode>"
+  [(set (match_operand:EI 0 "s_register_operand" "=w")
+        (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (match_operand:EI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = operands[1];
+  ops[4] = operands[3];
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
+
+(define_insn "neon_vld3_lane<mode>"
+  [(set (match_operand:CI 0 "s_register_operand" "=w")
+        (unspec:CI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (match_operand:CI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = operands[1];
+  ops[4] = GEN_INT (lane);
+  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, [%3]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
+
+(define_insn "neon_vld3_dup<mode>"
+  [(set (match_operand:EI 0 "s_register_operand" "=w")
+        (unspec:EI [(mem:<V_three_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD3_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    {
+      int regno = REGNO (operands[0]);
+      rtx ops[4];
+      ops[0] = gen_rtx_REG (DImode, regno);
+      ops[1] = gen_rtx_REG (DImode, regno + 2);
+      ops[2] = gen_rtx_REG (DImode, regno + 4);
+      ops[3] = operands[1];
+      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, [%3]", ops);
+      return "";
+    }
+  else
+    return "vld1.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld3_vld4_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))])
+
+(define_insn "neon_vst3<mode>"
+  [(set (mem:EI (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vst1.64\t%h1, [%0]";
+  else
+    return "vst3.<V_sz_elem>\t%h1, [%0]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst2_4_regs_vst3_vst4")))])
+
+(define_expand "neon_vst3<mode>"
+  [(match_operand:SI 0 "s_register_operand" "+r")
+   (match_operand:CI 1 "s_register_operand" "w")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vst3qa<mode> (operands[0], operands[0], operands[1]));
+  emit_insn (gen_neon_vst3qb<mode> (operands[0], operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vst3qa<mode>"
+  [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
+        (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3A))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+        (plus:SI (match_dup 1)
+		 (const_int 24)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[2]);
+  rtx ops[4];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
+
+(define_insn "neon_vst3qb<mode>"
+  [(set (mem:EI (match_operand:SI 1 "s_register_operand" "0"))
+        (unspec:EI [(match_operand:CI 2 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST3B))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+        (plus:SI (match_dup 1)
+		 (const_int 24)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[2]);
+  rtx ops[4];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 6);
+  ops[3] = gen_rtx_REG (DImode, regno + 10);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, [%0]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
+
+(define_insn "neon_vst3_lane<mode>"
+  [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:<V_three_elem>
+           [(match_operand:EI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 2);
+  ops[3] = gen_rtx_REG (DImode, regno + 4);
+  ops[4] = operands[2];
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+)
+
+(define_insn "neon_vst3_lane<mode>"
+  [(set (mem:<V_three_elem> (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:<V_three_elem>
+           [(match_operand:CI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST3_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[5];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  ops[4] = GEN_INT (lane);
+  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, [%0]",
+                   ops);
+  return "";
+}
+[(set_attr "neon_type" "neon_vst3_vst4_lane")])
+
+(define_insn "neon_vld4<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(mem:OI (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vld1.64\t%h0, [%1]";
+  else
+    return "vld4.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vld1_1_2_regs")
+                    (const_string "neon_vld3_vld4")))]
+)
+
+(define_expand "neon_vld4<mode>"
+  [(match_operand:XI 0 "s_register_operand" "=w")
+   (match_operand:SI 1 "s_register_operand" "+r")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[0],
+                                    operands[1], operands[1]));
+  emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0],
+                                    operands[1], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vld4qa<mode>"
+  [(set (match_operand:XI 0 "s_register_operand" "=w")
+        (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
+                    (match_operand:XI 1 "s_register_operand" "0")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4A))
+   (set (match_operand:SI 2 "s_register_operand" "=r")
+        (plus:SI (match_dup 3)
+		 (const_int 32)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = gen_rtx_REG (DImode, regno + 12);
+  ops[4] = operands[2];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
+
+(define_insn "neon_vld4qb<mode>"
+  [(set (match_operand:XI 0 "s_register_operand" "=w")
+        (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2"))
+                    (match_operand:XI 1 "s_register_operand" "0")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4B))
+   (set (match_operand:SI 2 "s_register_operand" "=r")
+        (plus:SI (match_dup 3)
+		 (const_int 32)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[0]);
+  rtx ops[5];
+  ops[0] = gen_rtx_REG (DImode, regno + 2);
+  ops[1] = gen_rtx_REG (DImode, regno + 6);
+  ops[2] = gen_rtx_REG (DImode, regno + 10);
+  ops[3] = gen_rtx_REG (DImode, regno + 14);
+  ops[4] = operands[2];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4")]
+)
+
+(define_insn "neon_vld4_lane<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (match_operand:OI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 6);
+  ops[4] = operands[1];
+  ops[5] = operands[3];
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
+
+(define_insn "neon_vld4_lane<mode>"
+  [(set (match_operand:XI 0 "s_register_operand" "=w")
+        (unspec:XI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (match_operand:XI 2 "s_register_operand" "0")
+                    (match_operand:SI 3 "immediate_operand" "i")
+                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[3]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[0]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = gen_rtx_REG (DImode, regno);
+  ops[1] = gen_rtx_REG (DImode, regno + 4);
+  ops[2] = gen_rtx_REG (DImode, regno + 8);
+  ops[3] = gen_rtx_REG (DImode, regno + 12);
+  ops[4] = operands[1];
+  ops[5] = GEN_INT (lane);
+  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, [%4]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vld3_vld4_lane")]
+)
+
+(define_insn "neon_vld4_dup<mode>"
+  [(set (match_operand:OI 0 "s_register_operand" "=w")
+        (unspec:OI [(mem:<V_four_elem> (match_operand:SI 1 "s_register_operand" "r"))
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VLD4_DUP))]
+  "TARGET_NEON"
+{
+  if (GET_MODE_NUNITS (<MODE>mode) > 1)
+    {
+      int regno = REGNO (operands[0]);
+      rtx ops[5];
+      ops[0] = gen_rtx_REG (DImode, regno);
+      ops[1] = gen_rtx_REG (DImode, regno + 2);
+      ops[2] = gen_rtx_REG (DImode, regno + 4);
+      ops[3] = gen_rtx_REG (DImode, regno + 6);
+      ops[4] = operands[1];
+      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, [%4]",
+                       ops);
+      return "";
+    }
+  else
+    return "vld1.<V_sz_elem>\t%h0, [%1]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
+                    (const_string "neon_vld3_vld4_all_lanes")
+                    (const_string "neon_vld1_1_2_regs")))]
+)
+
+(define_insn "neon_vst4<mode>"
+  [(set (mem:OI (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
+                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4))]
+  "TARGET_NEON"
+{
+  if (<V_sz_elem> == 64)
+    return "vst1.64\t%h1, [%0]";
+  else
+    return "vst4.<V_sz_elem>\t%h1, [%0]";
+}
+  [(set (attr "neon_type")
+      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
+                    (const_string "neon_vst1_1_2_regs_vst2_2_regs")
+                    (const_string "neon_vst2_4_regs_vst3_vst4")))]
+)
+
+(define_expand "neon_vst4<mode>"
+  [(match_operand:SI 0 "s_register_operand" "+r")
+   (match_operand:XI 1 "s_register_operand" "w")
+   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vst4qa<mode> (operands[0], operands[0], operands[1]));
+  emit_insn (gen_neon_vst4qb<mode> (operands[0], operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "neon_vst4qa<mode>"
+  [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
+        (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4A))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+        (plus:SI (match_dup 1)
+		 (const_int 32)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[2]);
+  rtx ops[5];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  ops[4] = gen_rtx_REG (DImode, regno + 12);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
+
+(define_insn "neon_vst4qb<mode>"
+  [(set (mem:OI (match_operand:SI 1 "s_register_operand" "0"))
+        (unspec:OI [(match_operand:XI 2 "s_register_operand" "w")
+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4B))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+        (plus:SI (match_dup 1)
+		 (const_int 32)))]
+  "TARGET_NEON"
+{
+  int regno = REGNO (operands[2]);
+  rtx ops[5];
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno + 2);
+  ops[2] = gen_rtx_REG (DImode, regno + 6);
+  ops[3] = gen_rtx_REG (DImode, regno + 10);
+  ops[4] = gen_rtx_REG (DImode, regno + 14);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, [%0]!", ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst2_4_regs_vst3_vst4")]
+)
+
+(define_insn "neon_vst4_lane<mode>"
+  [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:<V_four_elem>
+           [(match_operand:OI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 2);
+  ops[3] = gen_rtx_REG (DImode, regno + 4);
+  ops[4] = gen_rtx_REG (DImode, regno + 6);
+  ops[5] = operands[2];
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+)
+
+(define_insn "neon_vst4_lane<mode>"
+  [(set (mem:<V_four_elem> (match_operand:SI 0 "s_register_operand" "r"))
+        (unspec:<V_four_elem>
+           [(match_operand:XI 1 "s_register_operand" "w")
+            (match_operand:SI 2 "immediate_operand" "i")
+            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+           UNSPEC_VST4_LANE))]
+  "TARGET_NEON"
+{
+  HOST_WIDE_INT lane = INTVAL (operands[2]);
+  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
+  int regno = REGNO (operands[1]);
+  rtx ops[6];
+  if (lane < 0 || lane >= max)
+    error ("lane out of range");
+  else if (lane >= max / 2)
+    {
+      lane -= max / 2;
+      regno += 2;
+    }
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (DImode, regno);
+  ops[2] = gen_rtx_REG (DImode, regno + 4);
+  ops[3] = gen_rtx_REG (DImode, regno + 8);
+  ops[4] = gen_rtx_REG (DImode, regno + 12);
+  ops[5] = GEN_INT (lane);
+  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, [%0]",
+                   ops);
+  return "";
+}
+  [(set_attr "neon_type" "neon_vst3_vst4_lane")]
+)
+
+(define_expand "neon_vand<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_and<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vorr<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_veor<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "s_register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vbic<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "neon_vorn<mode>"
+  [(match_operand:VDQX 0 "s_register_operand" "")
+   (match_operand:VDQX 1 "s_register_operand" "")
+   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_NEON"
+{
+  emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "neon_vec_unpack<US>_lo_<mode>"
+  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+        (SE:<V_unpack> (vec_select:<V_HALF>
+			  (match_operand:VU 1 "register_operand" "w")
+			  (match_operand:VU 2 "vect_par_constant_low" ""))))]
+  "TARGET_NEON"
+  "vmovl.<US><V_sz_elem> %q0, %e1"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_insn "neon_vec_unpack<US>_hi_<mode>"
+  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+        (SE:<V_unpack> (vec_select:<V_HALF>
+			  (match_operand:VU 1 "register_operand" "w")
+			  (match_operand:VU 2 "vect_par_constant_high" ""))))]
+  "TARGET_NEON"
+  "vmovl.<US><V_sz_elem> %q0, %f1"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_unpack<US>_hi_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
+ "TARGET_NEON"
+  {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2); i++)
+     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
+  
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+   emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 
+                                                 operands[1], 
+					         t1));
+   DONE;
+  }
+)
+
+(define_expand "vec_unpack<US>_lo_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
+ "TARGET_NEON"
+  {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+     RTVEC_ELT (v, i) = GEN_INT (i);
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+   emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 
+                                                 operands[1], 
+				   	         t1));
+   DONE;
+  }
+)
+
+(define_insn "neon_vec_<US>mult_lo_<mode>"
+ [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
+			   (match_operand:VU 1 "register_operand" "w") 
+                           (match_operand:VU 2 "vect_par_constant_low" "")))
+ 		        (SE:<V_unpack> (vec_select:<V_HALF>
+                           (match_operand:VU 3 "register_operand" "w") 
+                           (match_dup 2)))))]
+  "TARGET_NEON"
+  "vmull.<US><V_sz_elem> %q0, %e1, %e3"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>mult_lo_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
+ "TARGET_NEON"
+ {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+     RTVEC_ELT (v, i) = GEN_INT (i);
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+
+   emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
+ 					       operands[1],
+					       t1,
+					       operands[2]));
+   DONE;
+ }
+)
+
+(define_insn "neon_vec_<US>mult_hi_<mode>"
+ [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+      (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
+			    (match_operand:VU 1 "register_operand" "w") 
+			    (match_operand:VU 2 "vect_par_constant_high" "")))
+		       (SE:<V_unpack> (vec_select:<V_HALF>
+			    (match_operand:VU 3 "register_operand" "w") 
+			    (match_dup 2)))))]
+  "TARGET_NEON"
+  "vmull.<US><V_sz_elem> %q0, %f1, %f3"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>mult_hi_<mode>"
+  [(match_operand:<V_unpack> 0 "register_operand" "")
+   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
+ "TARGET_NEON"
+ {
+   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
+   rtx t1;
+   int i;
+   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+     RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
+   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+
+   emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
+ 					       operands[1],
+					       t1,
+					       operands[2]));
+   DONE;
+
+ }
+)
+
+;; Vectorize for non-neon-quad case
+(define_insn "neon_unpack<US>_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+       (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
+ "TARGET_NEON"
+ "vmovl.<US><V_sz_elem> %q0, %P1"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_unpack<US>_lo_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
+ "TARGET_NEON"
+{
+  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
+  emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+
+  DONE;
+}
+)
+
+(define_expand "vec_unpack<US>_hi_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
+ "TARGET_NEON"
+{
+  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
+  emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+
+  DONE;
+}
+)
+
+(define_insn "neon_vec_<US>mult_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+       (mult:<V_widen> (SE:<V_widen> 
+		 	   (match_operand:VDI 1 "register_operand" "w"))
+ 		       (SE:<V_widen> 
+			   (match_operand:VDI 2 "register_operand" "w"))))]
+  "TARGET_NEON"
+  "vmull.<US><V_sz_elem> %q0, %P1, %P2"
+  [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>mult_hi_<mode>"
+  [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+ 					    
+   DONE;
+
+ }
+)
+
+(define_expand "vec_widen_<US>mult_lo_<mode>"
+  [(match_operand:<V_double_width> 0 "register_operand" "")
+   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
+ "TARGET_NEON"
+ {
+   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
+   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+ 					    
+   DONE;
+
+ }
+)
+
+;; The case when using all quad registers.
+(define_insn "vec_pack_trunc_<mode>"
+ [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
+       (vec_concat:<V_narrow_pack> 
+		(truncate:<V_narrow> 
+			(match_operand:VN 1 "register_operand" "w"))
+		(truncate:<V_narrow>
+			(match_operand:VN 2 "register_operand" "w"))))]
+ "TARGET_NEON"
+ "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
+ [(set_attr "neon_type" "neon_shift_1")
+  (set_attr "length" "8")]
+)
+
+;; For the non-quad case.
+(define_insn "neon_vec_pack_trunc_<mode>"
+ [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
+       (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
+ "TARGET_NEON"
+ "vmovn.i<V_sz_elem>\t%P0, %q1"
+ [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<V_narrow_pack> 0 "register_operand" "")
+  (match_operand:VSHFT 1 "register_operand" "")
+  (match_operand:VSHFT 2 "register_operand")]
+ "TARGET_NEON"
+{
+  rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
+  
+  emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 
+  emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 
+  emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
+  DONE;
+})
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
new file mode 100644
index 000000000..b5b9cab73
--- /dev/null
+++ b/gcc/config/arm/neon.ml
@@ -0,0 +1,1857 @@
+(* Common code for ARM NEON header file, documentation and test case
+   generators.
+
+   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  *)
+
+(* Shorthand types for vector elements.  *)
+type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
+          | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
+          | Cast of elts * elts | NoElts
+
+type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
+	      | ConvClass of eltclass * eltclass | NoType
+
+(* These vector types correspond directly to C types.  *)
+type vectype = T_int8x8    | T_int8x16
+             | T_int16x4   | T_int16x8
+	     | T_int32x2   | T_int32x4
+	     | T_int64x1   | T_int64x2
+	     | T_uint8x8   | T_uint8x16
+	     | T_uint16x4  | T_uint16x8
+	     | T_uint32x2  | T_uint32x4
+	     | T_uint64x1  | T_uint64x2
+	     | T_float32x2 | T_float32x4
+	     | T_poly8x8   | T_poly8x16
+	     | T_poly16x4  | T_poly16x8
+	     | T_immediate of int * int
+             | T_int8      | T_int16
+             | T_int32     | T_int64
+             | T_uint8     | T_uint16
+             | T_uint32    | T_uint64
+             | T_poly8     | T_poly16
+             | T_float32   | T_arrayof of int * vectype
+             | T_ptrto of vectype | T_const of vectype
+             | T_void      | T_intQI
+             | T_intHI     | T_intSI
+             | T_intDI     | T_floatSF
+
+(* The meanings of the following are:
+     TImode : "Tetra", two registers (four words).
+     EImode : "hExa", three registers (six words).
+     OImode : "Octa", four registers (eight words).
+     CImode : "dodeCa", six registers (twelve words).
+     XImode : "heXadeca", eight registers (sixteen words).
+*)
+
+type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
+
+type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
+               | PtrTo of shape_elt | CstPtrTo of shape_elt
+	       (* These next ones are used only in the test generator.  *)
+	       | Element_of_dreg	(* Used for "lane" variants.  *)
+	       | Element_of_qreg	(* Likewise.  *)
+	       | All_elements_of_dreg	(* Used for "dup" variants.  *)
+	       | Alternatives of shape_elt list (* Used for multiple valid operands *)
+
+type shape_form = All of int * shape_elt
+                | Long
+		| Long_noreg of shape_elt
+		| Wide
+		| Wide_noreg of shape_elt
+		| Narrow
+                | Long_imm
+                | Narrow_imm
+                | Binary_imm of shape_elt
+                | Use_operands of shape_elt array
+                | By_scalar of shape_elt
+                | Unary_scalar of shape_elt
+                | Wide_lane
+                | Wide_scalar
+                | Pair_result of shape_elt
+
+type arity = Arity0 of vectype
+           | Arity1 of vectype * vectype
+	   | Arity2 of vectype * vectype * vectype
+	   | Arity3 of vectype * vectype * vectype * vectype
+           | Arity4 of vectype * vectype * vectype * vectype * vectype
+
+type vecmode = V8QI | V4HI | V2SI | V2SF | DI
+             | V16QI | V8HI | V4SI | V4SF | V2DI
+             | QI | HI | SI | SF
+
+type opcode =
+  (* Binary ops.  *)
+    Vadd
+  | Vmul
+  | Vmla
+  | Vmls
+  | Vsub
+  | Vceq
+  | Vcge
+  | Vcgt
+  | Vcle
+  | Vclt
+  | Vcage
+  | Vcagt
+  | Vcale
+  | Vcalt
+  | Vtst
+  | Vabd
+  | Vaba
+  | Vmax
+  | Vmin
+  | Vpadd
+  | Vpada
+  | Vpmax
+  | Vpmin
+  | Vrecps
+  | Vrsqrts
+  | Vshl
+  | Vshr_n
+  | Vshl_n
+  | Vsra_n
+  | Vsri
+  | Vsli
+  (* Logic binops.  *)
+  | Vand
+  | Vorr
+  | Veor
+  | Vbic
+  | Vorn
+  | Vbsl
+  (* Ops with scalar.  *)
+  | Vmul_lane
+  | Vmla_lane
+  | Vmls_lane
+  | Vmul_n
+  | Vmla_n
+  | Vmls_n
+  | Vmull_n
+  | Vmull_lane
+  | Vqdmull_n
+  | Vqdmull_lane
+  | Vqdmulh_n
+  | Vqdmulh_lane
+  (* Unary ops.  *)
+  | Vabs
+  | Vneg
+  | Vcls
+  | Vclz
+  | Vcnt
+  | Vrecpe
+  | Vrsqrte
+  | Vmvn
+  (* Vector extract.  *)
+  | Vext
+  (* Reverse elements.  *)
+  | Vrev64
+  | Vrev32
+  | Vrev16
+  (* Transposition ops.  *)
+  | Vtrn
+  | Vzip
+  | Vuzp
+  (* Loads and stores (VLD1/VST1/VLD2...), elements and structures.  *)
+  | Vldx of int
+  | Vstx of int
+  | Vldx_lane of int
+  | Vldx_dup of int
+  | Vstx_lane of int
+  (* Set/extract lanes from a vector.  *)
+  | Vget_lane
+  | Vset_lane
+  (* Initialize vector from bit pattern.  *)
+  | Vcreate
+  (* Set all lanes to same value.  *)
+  | Vdup_n
+  | Vmov_n  (* Is this the same?  *)
+  (* Duplicate scalar to all lanes of vector.  *)
+  | Vdup_lane
+  (* Combine vectors.  *)
+  | Vcombine
+  (* Get quadword high/low parts.  *)
+  | Vget_high
+  | Vget_low
+  (* Convert vectors.  *)
+  | Vcvt
+  | Vcvt_n
+  (* Narrow/lengthen vectors.  *)
+  | Vmovn
+  | Vmovl
+  (* Table lookup.  *)
+  | Vtbl of int
+  | Vtbx of int
+  (* Reinterpret casts.  *)
+  | Vreinterp
+
+(* Features used for documentation, to distinguish between some instruction
+   variants, and to signal special requirements (e.g. swapping arguments).  *)
+
+type features =
+    Halving
+  | Rounding
+  | Saturating
+  | Dst_unsign
+  | High_half
+  | Doubling
+  | Flipped of string  (* Builtin name to use with flipped arguments.  *)
+  | InfoWord  (* Pass an extra word for signage/rounding etc. (always passed
+                 for All _, Long, Wide, Narrow shape_forms.  *)
+  | ReturnPtr  (* Pass explicit pointer to return value as first argument.  *)
+    (* A specification as to the shape of instruction expected upon
+       disassembly, used if it differs from the shape used to build the
+       intrinsic prototype.  Multiple entries in the constructor's argument
+       indicate that the intrinsic expands to more than one assembly
+       instruction, each with a corresponding shape specified here.  *)
+  | Disassembles_as of shape_form list
+  | Builtin_name of string  (* Override the name of the builtin.  *)
+    (* Override the name of the instruction.  If more than one name
+       is specified, it means that the instruction can have any of those
+       names.  *)
+  | Instruction_name of string list
+    (* Mark that the intrinsic yields no instructions, or expands to yield
+       behavior that the test generator cannot test.  *)
+  | No_op
+    (* Mark that the intrinsic has constant arguments that cannot be set
+       to the defaults (zero for pointers and one otherwise) in the test
+       cases.  The function supplied must return the integer to be written
+       into the testcase for the argument number (0-based) supplied to it.  *)
+  | Const_valuator of (int -> int)
+  | Fixed_return_reg
+
+exception MixedMode of elts * elts
+
+let rec elt_width = function
+    S8 | U8 | P8 | I8 | B8 -> 8
+  | S16 | U16 | P16 | I16 | B16 -> 16
+  | S32 | F32 | U32 | I32 | B32 -> 32
+  | S64 | U64 | I64 | B64 -> 64
+  | Conv (a, b) ->
+      let wa = elt_width a and wb = elt_width b in
+      if wa = wb then wa else failwith "element width?"
+  | Cast (a, b) -> raise (MixedMode (a, b))
+  | NoElts -> failwith "No elts"
+
+let rec elt_class = function
+    S8 | S16 | S32 | S64 -> Signed
+  | U8 | U16 | U32 | U64 -> Unsigned
+  | P8 | P16 -> Poly
+  | F32 -> Float
+  | I8 | I16 | I32 | I64 -> Int
+  | B8 | B16 | B32 | B64 -> Bits
+  | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
+  | NoElts -> NoType
+
+let elt_of_class_width c w =
+  match c, w with
+    Signed, 8 -> S8
+  | Signed, 16 -> S16
+  | Signed, 32 -> S32
+  | Signed, 64 -> S64
+  | Float, 32 -> F32
+  | Unsigned, 8 -> U8
+  | Unsigned, 16 -> U16
+  | Unsigned, 32 -> U32
+  | Unsigned, 64 -> U64
+  | Poly, 8 -> P8
+  | Poly, 16 -> P16
+  | Int, 8 -> I8
+  | Int, 16 -> I16
+  | Int, 32 -> I32
+  | Int, 64 -> I64
+  | Bits, 8 -> B8
+  | Bits, 16 -> B16
+  | Bits, 32 -> B32
+  | Bits, 64 -> B64
+  | _ -> failwith "Bad element type"
+
+(* Return unsigned integer element the same width as argument.  *)
+let unsigned_of_elt elt =
+  elt_of_class_width Unsigned (elt_width elt)
+
+let signed_of_elt elt =
+  elt_of_class_width Signed (elt_width elt)
+
+(* Return untyped bits element the same width as argument.  *)
+let bits_of_elt elt =
+  elt_of_class_width Bits (elt_width elt)
+
+let non_signed_variant = function
+    S8 -> I8
+  | S16 -> I16
+  | S32 -> I32
+  | S64 -> I64
+  | U8 -> I8
+  | U16 -> I16
+  | U32 -> I32
+  | U64 -> I64
+  | x -> x
+
+let poly_unsigned_variant v =
+  let elclass = match elt_class v with
+    Poly -> Unsigned
+  | x -> x in
+  elt_of_class_width elclass (elt_width v)
+
+let widen_elt elt =
+  let w = elt_width elt
+  and c = elt_class elt in
+  elt_of_class_width c (w * 2)
+
+let narrow_elt elt =
+  let w = elt_width elt
+  and c = elt_class elt in
+  elt_of_class_width c (w / 2)
+
+(* If we're trying to find a mode from a "Use_operands" instruction, use the
+   last vector operand as the dominant mode used to invoke the correct builtin.
+   We must stick to this rule in neon.md.  *)
+let find_key_operand operands =
+  let rec scan opno =
+    match operands.(opno) with
+      Qreg -> Qreg
+    | Dreg -> Dreg
+    | VecArray (_, Qreg) -> Qreg
+    | VecArray (_, Dreg) -> Dreg
+    | _ -> scan (opno-1)
+  in
+    scan ((Array.length operands) - 1)
+
+let rec mode_of_elt elt shape =
+  let flt = match elt_class elt with
+    Float | ConvClass(_, Float) -> true | _ -> false in
+  let idx =
+    match elt_width elt with
+      8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
+    | _ -> failwith "Bad element width"
+  in match shape with
+    All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
+  | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
+      [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
+  | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
+  | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
+      [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
+  | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
+      [| QI; HI; if flt then SF else SI; DI |].(idx)
+  | Long | Wide | Wide_lane | Wide_scalar
+  | Long_imm ->
+      [| V8QI; V4HI; V2SI; DI |].(idx)
+  | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
+  | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
+  | _ -> failwith "invalid shape"
+
+(* Modify an element type dependent on the shape of the instruction and the
+   operand number.  *)
+
+let shapemap shape no =
+  let ident = fun x -> x in
+  match shape with
+    All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
+  | Binary_imm _ -> ident
+  | Long | Long_noreg _ | Wide_scalar | Long_imm ->
+      [| widen_elt; ident; ident |].(no)
+  | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
+  | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
+  | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
+
+(* Register type (D/Q) of an operand, based on shape and operand number.  *)
+
+let regmap shape no =
+  match shape with
+    All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
+  | Long -> [| Qreg; Dreg; Dreg |].(no)
+  | Wide -> [| Qreg; Qreg; Dreg |].(no)
+  | Narrow -> [| Dreg; Qreg; Qreg |].(no)
+  | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
+  | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
+  | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
+  | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
+  | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
+  | Binary_imm reg -> [| reg; reg; Immed |].(no)
+  | Long_imm -> [| Qreg; Dreg; Immed |].(no)
+  | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
+  | Use_operands these -> these.(no)
+
+let type_for_elt shape elt no =
+  let elt = (shapemap shape no) elt in
+  let reg = regmap shape no in
+  let rec type_for_reg_elt reg elt =
+    match reg with
+      Dreg ->
+        begin match elt with
+          S8 -> T_int8x8
+        | S16 -> T_int16x4
+        | S32 -> T_int32x2
+        | S64 -> T_int64x1
+        | U8 -> T_uint8x8
+        | U16 -> T_uint16x4
+        | U32 -> T_uint32x2
+        | U64 -> T_uint64x1
+        | F32 -> T_float32x2
+        | P8 -> T_poly8x8
+        | P16 -> T_poly16x4
+        | _ -> failwith "Bad elt type"
+        end
+    | Qreg ->
+        begin match elt with
+          S8 -> T_int8x16
+        | S16 -> T_int16x8
+        | S32 -> T_int32x4
+        | S64 -> T_int64x2
+        | U8 -> T_uint8x16
+        | U16 -> T_uint16x8
+        | U32 -> T_uint32x4
+        | U64 -> T_uint64x2
+        | F32 -> T_float32x4
+        | P8 -> T_poly8x16
+        | P16 -> T_poly16x8
+        | _ -> failwith "Bad elt type"
+        end
+    | Corereg ->
+        begin match elt with
+          S8 -> T_int8
+        | S16 -> T_int16
+        | S32 -> T_int32
+        | S64 -> T_int64
+        | U8 -> T_uint8
+        | U16 -> T_uint16
+        | U32 -> T_uint32
+        | U64 -> T_uint64
+        | P8 -> T_poly8
+        | P16 -> T_poly16
+        | F32 -> T_float32
+        | _ -> failwith "Bad elt type"
+        end
+    | Immed ->
+        T_immediate (0, 0)
+    | VecArray (num, sub) ->
+        T_arrayof (num, type_for_reg_elt sub elt)
+    | PtrTo x ->
+        T_ptrto (type_for_reg_elt x elt)
+    | CstPtrTo x ->
+        T_ptrto (T_const (type_for_reg_elt x elt))
+    (* Anything else is solely for the use of the test generator.  *)
+    | _ -> assert false
+  in
+    type_for_reg_elt reg elt
+
+(* Return size of a vector type, in bits.  *)
+let vectype_size = function
+    T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
+  | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
+  | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
+  | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
+  | T_uint8x16 | T_uint16x8  | T_uint32x4  | T_uint64x2
+  | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
+  | _ -> raise Not_found
+
+let inttype_for_array num elttype =
+  let eltsize = vectype_size elttype in
+  let numwords = (num * eltsize) / 32 in
+  match numwords with
+    4 -> B_TImode
+  | 6 -> B_EImode
+  | 8 -> B_OImode
+  | 12 -> B_CImode
+  | 16 -> B_XImode
+  | _ -> failwith ("no int type for size " ^ string_of_int numwords)
+
+(* These functions return pairs of (internal, external) types, where "internal"
+   types are those seen by GCC, and "external" are those seen by the assembler.
+   These types aren't necessarily the same, since the intrinsics can munge more
+   than one C type into each assembler opcode.  *)
+
+let make_sign_invariant func shape elt =
+  let arity, elt' = func shape elt in
+  arity, non_signed_variant elt'
+
+(* Don't restrict any types.  *)
+
+let elts_same make_arity shape elt =
+  let vtype = type_for_elt shape elt in
+  make_arity vtype, elt
+
+(* As sign_invar_*, but when sign matters.  *)
+let elts_same_io_lane =
+  elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
+
+let elts_same_io =
+  elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
+
+let elts_same_2_lane =
+  elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
+
+let elts_same_3 = elts_same_2_lane
+
+let elts_same_2 =
+  elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
+
+let elts_same_1 =
+  elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
+
+(* Use for signed/unsigned invariant operations (i.e. where the operation
+   doesn't depend on the sign of the data.  *)
+
+let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
+let sign_invar_io = make_sign_invariant elts_same_io
+let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
+let sign_invar_2 = make_sign_invariant elts_same_2
+let sign_invar_1 = make_sign_invariant elts_same_1
+
+(* Sign-sensitive comparison.  *)
+
+let cmp_sign_matters shape elt =
+  let vtype = type_for_elt shape elt
+  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
+  Arity2 (rtype, vtype 1, vtype 2), elt
+
+(* Signed/unsigned invariant comparison.  *)
+
+let cmp_sign_invar shape elt =
+  let shape', elt' = cmp_sign_matters shape elt in
+  let elt'' =
+    match non_signed_variant elt' with
+      P8 -> I8
+    | x -> x
+  in
+    shape', elt''
+
+(* Comparison (VTST) where only the element width matters.  *)
+
+let cmp_bits shape elt =
+  let vtype = type_for_elt shape elt
+  and rtype = type_for_elt shape (unsigned_of_elt elt) 0
+  and bits_only = bits_of_elt elt in
+  Arity2 (rtype, vtype 1, vtype 2), bits_only
+
+let reg_shift shape elt =
+  let vtype = type_for_elt shape elt
+  and op2type = type_for_elt shape (signed_of_elt elt) 2 in
+  Arity2 (vtype 0, vtype 1, op2type), elt
+
+(* Genericised constant-shift type-generating function.  *)
+
+let const_shift mkimm ?arity ?result shape elt =
+  let op2type = (shapemap shape 2) elt in
+  let op2width = elt_width op2type in
+  let op2 = mkimm op2width
+  and op1 = type_for_elt shape elt 1
+  and r_elt =
+    match result with
+      None -> elt
+    | Some restriction -> restriction elt in
+  let rtype = type_for_elt shape r_elt 0 in
+  match arity with
+    None -> Arity2 (rtype, op1, op2), elt
+  | Some mkarity -> mkarity rtype op1 op2, elt
+
+(* Use for immediate right-shifts.  *)
+
+let shift_right shape elt =
+  const_shift (fun imm -> T_immediate (1, imm)) shape elt
+
+let shift_right_acc shape elt =
+  const_shift (fun imm -> T_immediate (1, imm))
+    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
+
+(* Use for immediate right-shifts when the operation doesn't care about
+   signedness.  *)
+
+let shift_right_sign_invar =
+  make_sign_invariant shift_right
+
+(* Immediate right-shift; result is unsigned even when operand is signed.  *)
+
+let shift_right_to_uns shape elt =
+  const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
+    shape elt
+
+(* Immediate left-shift.  *)
+
+let shift_left shape elt =
+  const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
+
+(* Immediate left-shift, unsigned result.  *)
+
+let shift_left_to_uns shape elt =
+  const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
+    shape elt
+
+(* Immediate left-shift, don't care about signs.  *)
+
+let shift_left_sign_invar =
+  make_sign_invariant shift_left
+
+(* Shift left/right and insert: only element size matters.  *)
+
+let shift_insert shape elt =
+  let arity, elt =
+    const_shift (fun imm -> T_immediate (1, imm))
+    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
+  arity, bits_of_elt elt
+
+(* Get/set lane.  *)
+
+let get_lane shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity2 (vtype 0, vtype 1, vtype 2),
+    (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
+
+let set_lane shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
+
+let set_lane_notype shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
+
+let create_vector shape elt =
+  let vtype = type_for_elt shape U64 1
+  and rtype = type_for_elt shape elt 0 in
+  Arity1 (rtype, vtype), elt
+
+let conv make_arity shape elt =
+  let edest, esrc = match elt with
+    Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
+  | _ -> failwith "Non-conversion element in conversion" in
+  let vtype = type_for_elt shape esrc
+  and rtype = type_for_elt shape edest 0 in
+  make_arity rtype vtype, elt
+
+let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
+let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
+
+(* Operation has an unsigned result even if operands are signed.  *)
+
+let dst_unsign make_arity shape elt =
+  let vtype = type_for_elt shape elt
+  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
+  make_arity rtype vtype, elt
+
+let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
+
+let make_bits_only func shape elt =
+  let arity, elt' = func shape elt in
+  arity, bits_of_elt elt'
+
+(* Extend operation.  *)
+
+let extend shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
+
+(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
+   integer ops respectively, or unsigned for polynomial ops.  *)
+
+let table mkarity shape elt =
+  let vtype = type_for_elt shape elt in
+  let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
+  mkarity vtype op2, bits_of_elt elt
+
+let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
+let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
+
+(* Operations where only bits matter.  *)
+
+let bits_1 = make_bits_only elts_same_1
+let bits_2 = make_bits_only elts_same_2
+let bits_3 = make_bits_only elts_same_3
+
+(* Store insns.  *)
+let store_1 shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
+
+let store_3 shape elt =
+  let vtype = type_for_elt shape elt in
+  Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
+
+let make_notype func shape elt =
+  let arity, _ = func shape elt in
+  arity, NoElts
+
+let notype_1 = make_notype elts_same_1
+let notype_2 = make_notype elts_same_2
+let notype_3 = make_notype elts_same_3
+
+(* Bit-select operations (first operand is unsigned int).  *)
+
+let bit_select shape elt =
+  let vtype = type_for_elt shape elt
+  and itype = type_for_elt shape (unsigned_of_elt elt) in
+  Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
+
+(* Common lists of supported element types.  *)
+
+let su_8_32 = [S8; S16; S32; U8; U16; U32]
+let su_8_64 = S64 :: U64 :: su_8_32
+let su_16_64 = [S16; S32; S64; U16; U32; U64]
+let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
+let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
+
+let ops =
+  [
+    (* Addition.  *)
+    Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
+    Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
+    Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
+    Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
+    Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
+    Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
+    Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
+    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
+      All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
+    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
+      All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
+    Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
+    Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
+    Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
+    Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
+      Narrow, "vRaddhn", sign_invar_2, su_16_64;
+
+    (* Multiplication.  *)
+    Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
+    Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
+    Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
+      elts_same_2, [S16; S32];
+    Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
+      elts_same_2, [S16; S32];
+    Vmul,
+      [Saturating; Rounding; Doubling; High_half;
+       Instruction_name ["vqrdmulh"]],
+      All (3, Dreg), "vqRdmulh",
+      elts_same_2, [S16; S32];
+    Vmul,
+      [Saturating; Rounding; Doubling; High_half;
+       Instruction_name ["vqrdmulh"]],
+      All (3, Qreg), "vqRdmulhQ",
+      elts_same_2, [S16; S32];
+    Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
+    Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
+
+    (* Multiply-accumulate. *)
+    Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
+    Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
+    Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
+    Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
+
+    (* Multiply-subtract.  *)
+    Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
+    Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
+    Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
+    Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
+
+    (* Subtraction.  *)
+    Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
+    Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2,  [S64; U64];
+    Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
+    Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
+    Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
+    Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
+    Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
+    Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
+    Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
+    Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
+    Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
+      Narrow, "vRsubhn", sign_invar_2, su_16_64;
+
+    (* Comparison, equal.  *)
+    Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
+    Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
+
+    (* Comparison, greater-than or equal.  *)
+    Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
+    Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
+
+    (* Comparison, less-than or equal.  *)
+    Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
+      F32 :: su_8_32;
+    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
+      All (3, Qreg), "vcleQ", cmp_sign_matters,
+      F32 :: su_8_32;
+
+    (* Comparison, greater-than.  *)
+    Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
+    Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
+
+    (* Comparison, less-than.  *)
+    Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
+      F32 :: su_8_32;
+    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
+      All (3, Qreg), "vcltQ", cmp_sign_matters,
+      F32 :: su_8_32;
+
+    (* Compare absolute greater-than or equal.  *)
+    Vcage, [Instruction_name ["vacge"]],
+      All (3, Dreg), "vcage", cmp_sign_matters, [F32];
+    Vcage, [Instruction_name ["vacge"]],
+      All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
+
+    (* Compare absolute less-than or equal.  *)
+    Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
+      All (3, Dreg), "vcale", cmp_sign_matters, [F32];
+    Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
+      All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
+
+    (* Compare absolute greater-than or equal.  *)
+    Vcagt, [Instruction_name ["vacgt"]],
+      All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
+    Vcagt, [Instruction_name ["vacgt"]],
+      All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
+
+    (* Compare absolute less-than or equal.  *)
+    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
+      All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
+    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
+      All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
+
+    (* Test bits.  *)
+    Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
+    Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
+
+    (* Absolute difference.  *)
+    Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
+    Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
+    Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
+
+    (* Absolute difference and accumulate.  *)
+    Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
+    Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
+    Vaba, [], Long, "vabal", elts_same_io, su_8_32;
+
+    (* Max.  *)
+    Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
+    Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
+
+    (* Min.  *)
+    Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
+    Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
+
+    (* Pairwise add.  *)
+    Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
+    Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
+    Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
+
+    (* Pairwise add, widen and accumulate.  *)
+    Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
+    Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
+
+    (* Folding maximum, minimum.  *)
+    Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
+    Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
+
+    (* Reciprocal step.  *)
+    Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
+    Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
+    Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
+    Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
+
+    (* Vector shift left.  *)
+    Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
+    Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vrshl"]; Rounding],
+      All (3, Dreg), "vRshl", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vrshl"]; Rounding],
+      All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
+    Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
+    Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
+      All (3, Dreg), "vqRshl", reg_shift, su_8_64;
+    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
+      All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
+
+    (* Vector shift right by constant.  *)
+    Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
+    Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
+    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
+      "vRshr_n", shift_right, su_8_64;
+    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
+      "vRshrQ_n", shift_right, su_8_64;
+    Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
+    Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
+      shift_right_sign_invar, su_16_64;
+    Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
+    Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
+      "vqRshrn_n", shift_right, su_16_64;
+    Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
+      shift_right_to_uns, [S16; S32; S64];
+    Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
+      Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
+
+    (* Vector shift left by constant.  *)
+    Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
+    Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
+    Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
+    Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
+    Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
+      shift_left_to_uns, [S8; S16; S32; S64];
+    Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
+      shift_left_to_uns, [S8; S16; S32; S64];
+    Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
+
+    (* Vector shift right by constant and accumulate.  *)
+    Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
+    Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
+    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
+      "vRsra_n", shift_right_acc, su_8_64;
+    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
+      "vRsraQ_n", shift_right_acc, su_8_64;
+
+    (* Vector shift right and insert.  *)
+    Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+    Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+
+    (* Vector shift left and insert.  *)
+    Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+    Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
+      P8 :: P16 :: su_8_64;
+
+    (* Absolute value.  *)
+    Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
+    Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
+    Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
+    Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
+
+    (* Negate.  *)
+    Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
+    Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
+    Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
+    Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
+
+    (* Bitwise not.  *)
+    Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
+    Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
+
+    (* Count leading sign bits.  *)
+    Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
+    Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
+
+    (* Count leading zeros.  *)
+    Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
+    Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
+
+    (* Count number of set bits.  *)
+    Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
+    Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
+
+    (* Reciprocal estimate.  *)
+    Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
+    Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
+
+    (* Reciprocal square-root estimate.  *)
+    Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
+    Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
+
+    (* Get lanes from a vector.  *)
+    Vget_lane,
+      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
+       Instruction_name ["vmov"]],
+      Use_operands [| Corereg; Dreg; Immed |],
+      "vget_lane", get_lane, pf_su_8_32;
+    Vget_lane,
+      [No_op;
+       InfoWord;
+       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
+       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Corereg; Dreg; Immed |],
+      "vget_lane", notype_2, [S64; U64];
+    Vget_lane,
+      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
+       Instruction_name ["vmov"]],
+      Use_operands [| Corereg; Qreg; Immed |],
+      "vgetQ_lane", get_lane, pf_su_8_32;
+    Vget_lane,
+      [InfoWord;
+       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
+       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Corereg; Qreg; Immed |],
+      "vgetQ_lane", notype_2, [S64; U64];
+
+    (* Set lanes in a vector.  *)
+    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
+                Instruction_name ["vmov"]],
+      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
+      set_lane, pf_su_8_32;
+    Vset_lane, [No_op;
+                Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
+                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
+      set_lane_notype, [S64; U64];
+    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
+                Instruction_name ["vmov"]],
+      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
+      set_lane, pf_su_8_32;
+    Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
+                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
+      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
+      set_lane_notype, [S64; U64];
+
+    (* Create vector from literal bit pattern.  *)
+    Vcreate,
+      [No_op], (* Not really, but it can yield various things that are too
+                  hard for the test generator at this time.  *)
+      Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
+      pf_su_8_64;
+
+    (* Set all lanes to the same value.  *)
+    Vdup_n,
+      [Disassembles_as [Use_operands [| Dreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
+      pf_su_8_32;
+    Vdup_n,
+      [No_op;
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
+      [S64; U64];
+    Vdup_n,
+      [Disassembles_as [Use_operands [| Qreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
+      pf_su_8_32;
+    Vdup_n,
+      [No_op;
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
+                        Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
+      [S64; U64];
+
+    (* These are just aliases for the above.  *)
+    Vmov_n,
+      [Builtin_name "vdup_n";
+       Disassembles_as [Use_operands [| Dreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Dreg; Corereg |],
+      "vmov_n", bits_1, pf_su_8_32;
+    Vmov_n,
+      [No_op;
+       Builtin_name "vdup_n";
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Dreg; Corereg |],
+      "vmov_n", notype_1, [S64; U64];
+    Vmov_n,
+      [Builtin_name "vdupQ_n";
+       Disassembles_as [Use_operands [| Qreg;
+                                        Alternatives [ Corereg;
+                                                       Element_of_dreg ] |]]],
+      Use_operands [| Qreg; Corereg |],
+      "vmovQ_n", bits_1, pf_su_8_32;
+    Vmov_n,
+      [No_op;
+       Builtin_name "vdupQ_n";
+       Instruction_name ["vmov"];
+       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
+                        Use_operands [| Dreg; Corereg; Corereg |]]],
+      Use_operands [| Qreg; Corereg |],
+      "vmovQ_n", notype_1, [S64; U64];
+
+    (* Duplicate, lane version.  We can't use Use_operands here because the
+       rightmost register (always Dreg) would be picked up by find_key_operand,
+       when we want the leftmost register to be used in this case (otherwise
+       the modes are indistinguishable in neon.md, etc.  *)
+    Vdup_lane,
+      [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
+      Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
+    Vdup_lane,
+      [No_op; Const_valuator (fun _ -> 0)],
+      Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
+    Vdup_lane,
+      [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
+      Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
+    Vdup_lane,
+      [No_op; Const_valuator (fun _ -> 0)],
+      Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
+
+    (* Combining vectors.  *)
+    Vcombine, [No_op],
+      Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
+      pf_su_8_64;
+
+    (* Splitting vectors.  *)
+    Vget_high, [No_op],
+      Use_operands [| Dreg; Qreg |], "vget_high",
+      notype_1, pf_su_8_64;
+    Vget_low, [Instruction_name ["vmov"];
+               Disassembles_as [Use_operands [| Dreg; Dreg |]];
+	       Fixed_return_reg],
+      Use_operands [| Dreg; Qreg |], "vget_low",
+      notype_1, pf_su_8_32;
+     Vget_low, [No_op],
+      Use_operands [| Dreg; Qreg |], "vget_low",
+      notype_1, [S64; U64];
+
+    (* Conversions.  *)
+    Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+    Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
+      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
+
+    (* Move, narrowing.  *)
+    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
+      Narrow, "vmovn", sign_invar_1, su_16_64;
+    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
+      Narrow, "vqmovn", elts_same_1, su_16_64;
+    Vmovn,
+      [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
+      Narrow, "vqmovun", dst_unsign_1,
+      [S16; S32; S64];
+
+    (* Move, long.  *)
+    Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
+      Long, "vmovl", elts_same_1, su_8_32;
+
+    (* Table lookup.  *)
+    Vtbl 1,
+      [Instruction_name ["vtbl"];
+       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
+    Vtbl 2, [Instruction_name ["vtbl"]],
+      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
+      [U8; S8; P8];
+    Vtbl 3, [Instruction_name ["vtbl"]],
+      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
+      [U8; S8; P8];
+    Vtbl 4, [Instruction_name ["vtbl"]],
+      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
+      [U8; S8; P8];
+
+    (* Extended table lookup.  *)
+    Vtbx 1,
+      [Instruction_name ["vtbx"];
+       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
+    Vtbx 2, [Instruction_name ["vtbx"]],
+      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
+      [U8; S8; P8];
+    Vtbx 3, [Instruction_name ["vtbx"]],
+      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
+      [U8; S8; P8];
+    Vtbx 4, [Instruction_name ["vtbx"]],
+      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
+      [U8; S8; P8];
+
+    (* Multiply, lane.  (note: these were undocumented at the time of
+       writing).  *)
+    Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
+      [S16; S32; U16; U32; F32];
+    Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
+      [S16; S32; U16; U32; F32];
+
+    (* Multiply-accumulate, lane.  *)
+    Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
+      [S16; S32; U16; U32];
+    Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
+      elts_same_io_lane, [S16; S32];
+
+    (* Multiply-subtract, lane.  *)
+    Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
+      [S16; S32; U16; U32; F32];
+    Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
+      [S16; S32; U16; U32];
+    Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
+      elts_same_io_lane, [S16; S32];
+
+    (* Long multiply, lane.  *)
+    Vmull_lane, [],
+      Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
+
+    (* Saturating doubling long multiply, lane.  *)
+    Vqdmull_lane, [Saturating; Doubling],
+      Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
+
+    (* Saturating doubling long multiply high, lane.  *)
+    Vqdmulh_lane, [Saturating; Halving],
+      By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
+    Vqdmulh_lane, [Saturating; Halving],
+      By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
+    Vqdmulh_lane, [Saturating; Halving; Rounding;
+		   Instruction_name ["vqrdmulh"]],
+      By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
+    Vqdmulh_lane, [Saturating; Halving; Rounding;
+		   Instruction_name ["vqrdmulh"]],
+      By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
+
+    (* Vector multiply by scalar.  *)
+    Vmul_n, [InfoWord;
+             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+             Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
+      sign_invar_2, [S16; S32; U16; U32; F32];
+    Vmul_n, [InfoWord;
+             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+             Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
+      sign_invar_2, [S16; S32; U16; U32; F32];
+
+    (* Vector long multiply by scalar.  *)
+    Vmull_n, [Instruction_name ["vmull"];
+              Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
+              Wide_scalar, "vmull_n",
+      elts_same_2, [S16; S32; U16; U32];
+
+    (* Vector saturating doubling long multiply by scalar.  *)
+    Vqdmull_n, [Saturating; Doubling;
+	        Disassembles_as [Use_operands [| Qreg; Dreg;
+						 Element_of_dreg |]]],
+                Wide_scalar, "vqdmull_n",
+      elts_same_2, [S16; S32];
+
+    (* Vector saturating doubling long multiply high by scalar.  *)
+    Vqdmulh_n,
+      [Saturating; Halving; InfoWord;
+       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |],
+      "vqdmulhQ_n", elts_same_2, [S16; S32];
+    Vqdmulh_n,
+      [Saturating; Halving; InfoWord;
+       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |],
+      "vqdmulh_n", elts_same_2, [S16; S32];
+    Vqdmulh_n,
+      [Saturating; Halving; Rounding; InfoWord;
+       Instruction_name ["vqrdmulh"];
+       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |],
+      "vqRdmulhQ_n", elts_same_2, [S16; S32];
+    Vqdmulh_n,
+      [Saturating; Halving; Rounding; InfoWord;
+       Instruction_name ["vqrdmulh"];
+       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |],
+      "vqRdmulh_n", elts_same_2, [S16; S32];
+
+    (* Vector multiply-accumulate by scalar.  *)
+    Vmla_n, [InfoWord;
+             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmla_n, [InfoWord;
+             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
+    Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
+      [S16; S32];
+
+    (* Vector multiply subtract by scalar.  *)
+    Vmls_n, [InfoWord;
+             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
+      Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmls_n, [InfoWord;
+             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
+      Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
+      sign_invar_io, [S16; S32; U16; U32; F32];
+    Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
+    Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
+      [S16; S32];
+
+    (* Vector extract.  *)
+    Vext, [Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
+      pf_su_8_64;
+    Vext, [Const_valuator (fun _ -> 0)],
+      Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
+      pf_su_8_64;
+
+    (* Reverse elements.  *)
+    Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
+    Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
+    Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
+    Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
+    Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
+    Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
+
+    (* Bit selection.  *)
+    Vbsl,
+      [Instruction_name ["vbsl"; "vbit"; "vbif"];
+       Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
+      Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
+      pf_su_8_64;
+    Vbsl,
+      [Instruction_name ["vbsl"; "vbit"; "vbif"];
+       Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
+      Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
+      pf_su_8_64;
+
+    (* Transpose elements.  **NOTE** ReturnPtr goes some of the way towards
+       generating good code for intrinsics which return structure types --
+       builtins work well by themselves (and understand that the values being
+       stored on e.g. the stack also reside in registers, so can optimise the
+       stores away entirely if the results are used immediately), but
+       intrinsics are very much less efficient. Maybe something can be improved
+       re: inlining, or tweaking the ABI used for intrinsics (a special call
+       attribute?).
+    *)
+    Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
+    Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
+
+    (* Zip elements.  *)
+    Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
+    Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
+
+    (* Unzip elements.  *)
+    Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
+    Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
+
+    (* Element/structure loads.  VLD1 variants.  *)
+    Vldx 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
+      pf_su_8_64;
+    Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
+      pf_su_8_64;
+
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
+      "vld1_lane", bits_3, pf_su_8_32;
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]];
+       Const_valuator (fun _ -> 0)],
+      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
+      "vld1_lane", bits_3, [S64; U64];
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
+      "vld1Q_lane", bits_3, pf_su_8_32;
+    Vldx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
+      "vld1Q_lane", bits_3, [S64; U64];
+
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
+      bits_1, pf_su_8_32;
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
+      bits_1, [S64; U64];
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
+      bits_1, pf_su_8_32;
+    Vldx_dup 1,
+      [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
+      bits_1, [S64; U64];
+
+    (* VST1 variants.  *)
+    Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Dreg |], "vst1",
+      store_1, pf_su_8_64;
+    Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
+      store_1, pf_su_8_64;
+
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Dreg; Immed |],
+      "vst1_lane", store_3, pf_su_8_32;
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]];
+       Const_valuator (fun _ -> 0)],
+      Use_operands [| PtrTo Corereg; Dreg; Immed |],
+      "vst1_lane", store_3, [U64; S64];
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg; Immed |],
+      "vst1Q_lane", store_3, pf_su_8_32;
+    Vstx_lane 1,
+      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
+                                        CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; Qreg; Immed |],
+      "vst1Q_lane", store_3, [U64; S64];
+
+    (* VLD2 variants.  *)
+    Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2", bits_1, pf_su_8_32;
+    Vldx 2, [Instruction_name ["vld1"]],
+       Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2", bits_1, [S64; U64];
+    Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              CstPtrTo Corereg |];
+                              Use_operands [| VecArray (2, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
+      "vld2Q", bits_1, pf_su_8_32;
+
+    Vldx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
+                      VecArray (2, Dreg); Immed |],
+      "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
+    Vldx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
+ 	              VecArray (2, Qreg); Immed |],
+      "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
+
+    Vldx_dup 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2_dup", bits_1, pf_su_8_32;
+    Vldx_dup 2,
+      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
+      "vld2_dup", bits_1, [S64; U64];
+
+    (* VST2 variants.  *)
+    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
+      store_1, pf_su_8_32;
+    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
+      store_1, [S64; U64];
+    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
+					      PtrTo Corereg |];
+                              Use_operands [| VecArray (2, Dreg);
+				              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
+      store_1, pf_su_8_32;
+
+    Vstx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
+      store_3, P8 :: P16 :: F32 :: su_8_32;
+    Vstx_lane 2,
+      [Disassembles_as [Use_operands
+        [| VecArray (2, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
+      store_3, [P16; F32; U16; U32; S16; S32];
+
+    (* VLD3 variants.  *)
+    Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3", bits_1, pf_su_8_32;
+    Vldx 3, [Instruction_name ["vld1"]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3", bits_1, [S64; U64];
+    Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
+					      CstPtrTo Corereg |];
+                              Use_operands [| VecArray (3, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
+      "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
+
+    Vldx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
+                                     VecArray (3, Dreg); Immed |],
+      "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
+    Vldx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
+				     VecArray (3, Qreg); Immed |],
+      "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
+
+    Vldx_dup 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3_dup", bits_1, pf_su_8_32;
+    Vldx_dup 3,
+      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
+      "vld3_dup", bits_1, [S64; U64];
+
+    (* VST3 variants.  *)
+    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
+      store_1, pf_su_8_32;
+    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
+      store_1, [S64; U64];
+    Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
+					      PtrTo Corereg |];
+                              Use_operands [| VecArray (3, Dreg);
+					      PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
+      store_1, pf_su_8_32;
+
+    Vstx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
+      store_3, P8 :: P16 :: F32 :: su_8_32;
+    Vstx_lane 3,
+      [Disassembles_as [Use_operands
+        [| VecArray (3, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
+      store_3, [P16; F32; U16; U32; S16; S32];
+
+    (* VLD4/VST4 variants.  *)
+    Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4", bits_1, pf_su_8_32;
+    Vldx 4, [Instruction_name ["vld1"]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4", bits_1, [S64; U64];
+    Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+					      CstPtrTo Corereg |];
+                              Use_operands [| VecArray (4, Dreg);
+					      CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
+      "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
+
+    Vldx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
+                                     VecArray (4, Dreg); Immed |],
+      "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
+    Vldx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
+   	              VecArray (4, Qreg); Immed |],
+      "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
+
+    Vldx_dup 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4_dup", bits_1, pf_su_8_32;
+    Vldx_dup 4,
+      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
+        [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
+      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
+      "vld4_dup", bits_1, [S64; U64];
+
+    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
+      store_1, pf_su_8_32;
+    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+                                              PtrTo Corereg |]];
+             Instruction_name ["vst1"]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
+      store_1, [S64; U64];
+    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
+					      PtrTo Corereg |];
+                              Use_operands [| VecArray (4, Dreg);
+					      PtrTo Corereg |]]],
+     Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
+      store_1, pf_su_8_32;
+
+    Vstx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
+      store_3, P8 :: P16 :: F32 :: su_8_32;
+    Vstx_lane 4,
+      [Disassembles_as [Use_operands
+        [| VecArray (4, Element_of_dreg);
+           CstPtrTo Corereg |]]],
+      Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
+      store_3, [P16; F32; U16; U32; S16; S32];
+
+    (* Logical operations. And.  *)
+    Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
+    Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
+    Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
+
+    (* Or.  *)
+    Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
+    Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
+    Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
+
+    (* Eor.  *)
+    Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
+    Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
+    Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
+
+    (* Bic (And-not).  *)
+    Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
+    Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
+    Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
+
+    (* Or-not.  *)
+    Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
+    Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
+    Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
+  ]
+
+let reinterp =
+  let elems = P8 :: P16 :: F32 :: su_8_64 in
+  List.fold_right
+    (fun convto acc ->
+      let types = List.fold_right
+        (fun convfrom acc ->
+          if convfrom <> convto then
+            Cast (convto, convfrom) :: acc
+          else
+            acc)
+        elems
+        []
+      in
+        let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
+                      "vreinterpret", conv_1, types
+        and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
+		      "vreinterpretQ", conv_1, types in
+        dconv :: qconv :: acc)
+    elems
+    []
+
+(* Output routines.  *)
+
+let rec string_of_elt = function
+    S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
+  | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
+  | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
+  | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
+  | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
+  | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
+  | NoElts -> failwith "No elts"
+
+let string_of_elt_dots elt =
+  match elt with
+    Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
+  | _ -> string_of_elt elt
+
+let string_of_vectype vt =
+  let rec name affix = function
+    T_int8x8 -> affix "int8x8"
+  | T_int8x16 -> affix "int8x16"
+  | T_int16x4 -> affix "int16x4"
+  | T_int16x8 -> affix "int16x8"
+  | T_int32x2 -> affix "int32x2"
+  | T_int32x4 -> affix "int32x4"
+  | T_int64x1 -> affix "int64x1"
+  | T_int64x2 -> affix "int64x2"
+  | T_uint8x8 -> affix "uint8x8"
+  | T_uint8x16 -> affix "uint8x16"
+  | T_uint16x4 -> affix "uint16x4"
+  | T_uint16x8 -> affix "uint16x8"
+  | T_uint32x2 -> affix "uint32x2"
+  | T_uint32x4 -> affix "uint32x4"
+  | T_uint64x1 -> affix "uint64x1"
+  | T_uint64x2 -> affix "uint64x2"
+  | T_float32x2 -> affix "float32x2"
+  | T_float32x4 -> affix "float32x4"
+  | T_poly8x8 -> affix "poly8x8"
+  | T_poly8x16 -> affix "poly8x16"
+  | T_poly16x4 -> affix "poly16x4"
+  | T_poly16x8 -> affix "poly16x8"
+  | T_int8 -> affix "int8"
+  | T_int16 -> affix "int16"
+  | T_int32 -> affix "int32"
+  | T_int64 -> affix "int64"
+  | T_uint8 -> affix "uint8"
+  | T_uint16 -> affix "uint16"
+  | T_uint32 -> affix "uint32"
+  | T_uint64 -> affix "uint64"
+  | T_poly8 -> affix "poly8"
+  | T_poly16 -> affix "poly16"
+  | T_float32 -> affix "float32"
+  | T_immediate _ -> "const int"
+  | T_void -> "void"
+  | T_intQI -> "__builtin_neon_qi"
+  | T_intHI -> "__builtin_neon_hi"
+  | T_intSI -> "__builtin_neon_si"
+  | T_intDI -> "__builtin_neon_di"
+  | T_floatSF -> "__builtin_neon_sf"
+  | T_arrayof (num, base) ->
+      let basename = name (fun x -> x) base in
+      affix (Printf.sprintf "%sx%d" basename num)
+  | T_ptrto x ->
+      let basename = name affix x in
+      Printf.sprintf "%s *" basename
+  | T_const x ->
+      let basename = name affix x in
+      Printf.sprintf "const %s" basename
+  in
+    name (fun x -> x ^ "_t") vt
+
+let string_of_inttype = function
+    B_TImode -> "__builtin_neon_ti"
+  | B_EImode -> "__builtin_neon_ei"
+  | B_OImode -> "__builtin_neon_oi"
+  | B_CImode -> "__builtin_neon_ci"
+  | B_XImode -> "__builtin_neon_xi"
+
+let string_of_mode = function
+    V8QI -> "v8qi" | V4HI  -> "v4hi"  | V2SI -> "v2si" | V2SF -> "v2sf"
+  | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
+  | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI -> "qi" | HI -> "hi" | SI -> "si"
+  | SF -> "sf"
+
+(* Use uppercase chars for letters which form part of the intrinsic name, but
+   should be omitted from the builtin name (the info is passed in an extra
+   argument, instead).  *)
+let intrinsic_name name = String.lowercase name
+
+(* Allow the name of the builtin to be overridden by things (e.g. Flipped)
+   found in the features list.  *)
+let builtin_name features name =
+  let name = List.fold_right
+               (fun el name ->
+                 match el with
+                   Flipped x | Builtin_name x -> x
+                 | _ -> name)
+               features name in
+  let islower x = let str = String.make 1 x in (String.lowercase str) = str
+  and buf = Buffer.create (String.length name) in
+  String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
+  Buffer.contents buf
+
+(* Transform an arity into a list of strings.  *)
+let strings_of_arity a =
+  match a with
+  | Arity0 vt -> [string_of_vectype vt]
+  | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
+  | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
+			       string_of_vectype vt2;
+                               string_of_vectype vt3]
+  | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
+                                    string_of_vectype vt2;
+                                    string_of_vectype vt3;
+                                    string_of_vectype vt4]
+  | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
+                                         string_of_vectype vt2;
+                                         string_of_vectype vt3;
+                                         string_of_vectype vt4;
+                                         string_of_vectype vt5]
+
+(* Suffixes on the end of builtin names that are to be stripped in order
+   to obtain the name used as an instruction.  They are only stripped if
+   preceded immediately by an underscore.  *)
+let suffixes_to_strip = [ "n"; "lane"; "dup" ]
+
+(* Get the possible names of an instruction corresponding to a "name" from the
+   ops table.  This is done by getting the equivalent builtin name and
+   stripping any suffixes from the list at the top of this file, unless
+   the features list presents with an Instruction_name entry, in which
+   case that is used; or unless the features list presents with a Flipped
+   entry, in which case that is used.  If both such entries are present,
+   the first in the list will be chosen.  *)
+let get_insn_names features name =
+  let names = try
+  begin
+    match List.find (fun feature -> match feature with
+                                      Instruction_name _ -> true
+				    | Flipped _ -> true
+				    | _ -> false) features
+    with
+      Instruction_name names -> names
+    | Flipped name -> [name]
+    | _ -> assert false
+  end
+  with Not_found -> [builtin_name features name]
+  in
+  begin
+    List.map (fun name' ->
+      try
+        let underscore = String.rindex name' '_' in
+        let our_suffix = String.sub name' (underscore + 1)
+                                    ((String.length name') - underscore - 1)
+        in
+          let rec strip remaining_suffixes =
+            match remaining_suffixes with
+              [] -> name'
+            | s::ss when our_suffix = s -> String.sub name' 0 underscore
+            | _::ss -> strip ss
+          in
+            strip suffixes_to_strip
+      with (Not_found | Invalid_argument _) -> name') names
+  end
+
+(* Apply a function to each element of a list and then comma-separate
+   the resulting strings.  *)
+let rec commas f elts acc =
+  match elts with
+    [] -> acc
+  | [elt] -> acc ^ (f elt)
+  | elt::elts ->
+    commas f elts (acc ^ (f elt) ^ ", ")
+
+(* Given a list of features and the shape specified in the "ops" table, apply
+   a function to each possible shape that the instruction may have.
+   By default, this is the "shape" entry in "ops".  If the features list
+   contains a Disassembles_as entry, the shapes contained in that entry are
+   mapped to corresponding outputs and returned in a list.  If there is more
+   than one Disassembles_as entry, only the first is used.  *)
+let analyze_all_shapes features shape f =
+  try
+    match List.find (fun feature ->
+                       match feature with Disassembles_as _ -> true
+                                        | _ -> false)
+                    features with
+      Disassembles_as shapes -> List.map f shapes
+    | _ -> assert false
+  with Not_found -> [f shape]
+
diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h
new file mode 100644
index 000000000..9cf186b33
--- /dev/null
+++ b/gcc/config/arm/netbsd-elf.h
@@ -0,0 +1,157 @@
+/* Definitions of target machine for GNU compiler, NetBSD/arm ELF version.
+   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (NetBSD/arm ELF)", stderr);
+
+/* arm.h defaults to ARM6 CPU.  */
+
+/* This defaults us to little-endian.  */
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+#undef MULTILIB_DEFAULTS
+
+/* Default it to use ATPCS with soft-VFP.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT			\
+  (MASK_APCS_FRAME			\
+   | TARGET_ENDIAN_DEFAULT)
+
+#undef ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_ATPCS
+
+#define TARGET_OS_CPP_BUILTINS()	\
+  do					\
+    {					\
+      NETBSD_OS_CPP_BUILTINS_ELF();	\
+    }					\
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC
+
+#undef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC	\
+  "-matpcs %{fpic|fpie:-k} %{fPIC|fPIE:-k}"
+
+/* Default to full VFP if -mhard-float is specified.  */
+#undef SUBTARGET_ASM_FLOAT_SPEC
+#define SUBTARGET_ASM_FLOAT_SPEC	\
+  "%{mhard-float:{!mfpu=*:-mfpu=vfp}}   \
+   %{mfloat-abi=hard:{!mfpu=*:-mfpu=vfp}}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS				\
+  { "subtarget_extra_asm_spec",	SUBTARGET_EXTRA_ASM_SPEC }, \
+  { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF },	\
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-X %{mbig-endian:-EB} %{mlittle-endian:-EL} \
+   %(netbsd_link_spec)"
+
+/* Make GCC agree with <machine/ansi.h>.  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* We don't have any limit on the length as out debugger is GDB.  */
+#undef DBX_CONTIN_LENGTH
+
+/* NetBSD does its profiling differently to the Acorn compiler. We      
+   don't need a word following the mcount call; and to skip it
+   requires either an assembly stub or use of fomit-frame-pointer when  
+   compiling the profiling functions.  Since we break Acorn CC
+   compatibility below a little more won't hurt.  */
+   
+#undef ARM_FUNCTION_PROFILER                                  
+#define ARM_FUNCTION_PROFILER(STREAM,LABELNO)		\
+{							\
+  asm_fprintf (STREAM, "\tmov\t%Rip, %Rlr\n");		\
+  asm_fprintf (STREAM, "\tbl\t__mcount%s\n",		\
+	       (TARGET_ARM && NEED_PLT_RELOC)		\
+	       ? "(PLT)" : "");				\
+}
+
+/* VERY BIG NOTE: Change of structure alignment for NetBSD/arm.
+   There are consequences you should be aware of...
+
+   Normally GCC/arm uses a structure alignment of 32 for compatibility
+   with armcc.  This means that structures are padded to a word
+   boundary.  However this causes problems with bugged NetBSD kernel
+   code (possibly userland code as well - I have not checked every
+   binary).  The nature of this bugged code is to rely on sizeof()
+   returning the correct size of various structures rounded to the  
+   nearest byte (SCSI and ether code are two examples, the vm system
+   is another).  This code breaks when the structure alignment is 32
+   as sizeof() will report a word=rounded size.  By changing the        
+   structure alignment to 8. GCC will conform to what is expected by
+   NetBSD.
+   
+   This has several side effects that should be considered.
+   1. Structures will only be aligned to the size of the largest member.
+      i.e. structures containing only bytes will be byte aligned.
+           structures containing shorts will be half word aligned.          
+           structures containing ints will be word aligned.                 
+  
+      This means structures should be padded to a word boundary if
+      alignment of 32 is required for byte structures etc.
+       
+   2. A potential performance penalty may exist if strings are no longer
+      word aligned.  GCC will not be able to use word load/stores to copy
+      short strings.
+
+   This modification is not encouraged but with the present state of the
+   NetBSD source tree it is currently the only solution that meets the
+   requirements.  */
+
+#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8
+
+/* Clear the instruction cache from `BEG' to `END'.  This makes a
+   call to the ARM_SYNC_ICACHE architecture specific syscall.  */
+#define CLEAR_INSN_CACHE(BEG, END)					\
+do									\
+  {									\
+    extern int sysarch(int number, void *args);				\
+    struct								\
+      {									\
+	unsigned int addr;						\
+	int          len;						\
+      } s;								\
+    s.addr = (unsigned int)(BEG);					\
+    s.len = (END) - (BEG);						\
+    (void) sysarch (0, &s);						\
+  }									\
+while (0)
+
+#undef FPUTYPE_DEFAULT
+#define FPUTYPE_DEFAULT "vfp"
+
diff --git a/gcc/config/arm/netbsd.h b/gcc/config/arm/netbsd.h
new file mode 100644
index 000000000..4a1adbae9
--- /dev/null
+++ b/gcc/config/arm/netbsd.h
@@ -0,0 +1,150 @@
+/* NetBSD/arm a.out version.
+   Copyright (C) 1993, 1994, 1997, 1998, 2003, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Mark Brinicombe (amb@physig.ph.kcl.ac.uk)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/NetBSD)", stderr);
+
+/* Unsigned chars produces much better code than signed.  */
+#define DEFAULT_SIGNED_CHAR  0
+
+/* Since we always use GAS as our assembler we support stabs.  */
+#define DBX_DEBUGGING_INFO 1
+
+/*#undef ASM_DECLARE_FUNCTION_NAME*/
+
+/* ARM6 family default cpu.  */
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+
+/* Some defines for CPP.
+   arm32 is the NetBSD port name, so we always define arm32 and __arm32__.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	NETBSD_OS_CPP_BUILTINS_AOUT();		\
+	builtin_define_std ("arm32");		\
+	builtin_define_std ("unix");		\
+	builtin_define_std ("riscbsd");		\
+    } while (0)
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "netbsd_cpp_spec",  NETBSD_CPP_SPEC }, \
+  { "netbsd_link_spec", NETBSD_LINK_SPEC_AOUT },
+
+#undef CPP_SPEC
+#define CPP_SPEC "\
+%(cpp_cpu_arch) %(cpp_float) %(cpp_endian) %(netbsd_cpp_spec) \
+"
+
+/* Because TARGET_DEFAULT sets MASK_SOFT_FLOAT */
+#undef CPP_FLOAT_DEFAULT_SPEC
+#define CPP_FLOAT_DEFAULT_SPEC "-D__SOFTFP__"
+
+/* Pass -X to the linker so that it will strip symbols starting with 'L' */
+#undef LINK_SPEC
+#define LINK_SPEC "-X %(netbsd_link_spec)"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* We don't have any limit on the length as out debugger is GDB.  */
+#undef DBX_CONTIN_LENGTH
+
+/* NetBSD does its profiling differently to the Acorn compiler. We
+   don't need a word following the mcount call; and to skip it
+   requires either an assembly stub or use of fomit-frame-pointer when
+   compiling the profiling functions.  Since we break Acorn CC
+   compatibility below a little more won't hurt.  */
+
+#undef  ARM_FUNCTION_PROFILER
+#define ARM_FUNCTION_PROFILER(STREAM,LABELNO)  				    \
+{									    \
+  fprintf(STREAM, "\tmov\t%sip, %slr\n", REGISTER_PREFIX, REGISTER_PREFIX); \
+  fprintf(STREAM, "\tbl\tmcount\n");					    \
+}
+
+/* On the ARM `@' introduces a comment, so we must use something else
+   for .type directives.  */
+#undef TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT "%%%s"
+
+/* NetBSD uses the old PCC style aggregate returning conventions.  */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* Although not normally relevant (since by default, all aggregates
+   are returned in memory) compiling some parts of libc requires
+   non-APCS style struct returns.  */
+#undef TARGET_RETURN_IN_MEMORY
+
+/* VERY BIG NOTE : Change of structure alignment for RiscBSD.
+   There are consequences you should be aware of...
+
+   Normally GCC/arm uses a structure alignment of 32 for compatibility
+   with armcc.  This means that structures are padded to a word
+   boundary.  However this causes problems with bugged NetBSD kernel
+   code (possibly userland code as well - I have not checked every
+   binary).  The nature of this bugged code is to rely on sizeof()
+   returning the correct size of various structures rounded to the
+   nearest byte (SCSI and ether code are two examples, the vm system
+   is another).  This code breaks when the structure alignment is 32
+   as sizeof() will report a word=rounded size.  By changing the
+   structure alignment to 8. GCC will conform to what is expected by
+   NetBSD.
+
+   This has several side effects that should be considered.
+   1. Structures will only be aligned to the size of the largest member.
+      i.e. structures containing only bytes will be byte aligned.
+           structures containing shorts will be half word aligned.
+           structures containing ints will be word aligned.
+
+      This means structures should be padded to a word boundary if
+      alignment of 32 is required for byte structures etc.
+      
+   2. A potential performance penalty may exist if strings are no longer
+      word aligned.  GCC will not be able to use word load/stores to copy
+      short strings.
+      
+   This modification is not encouraged but with the present state of the
+   NetBSD source tree it is currently the only solution that meets the
+   requirements.  */
+#undef  DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8
+
+/* Clear the instruction cache from `BEG' to `END'.  This makes a
+   call to the ARM32_SYNC_ICACHE architecture specific syscall.  */
+#define CLEAR_INSN_CACHE(BEG, END)                                     \
+{                                                                      \
+  extern int sysarch(int number, void *args);                          \
+  struct {                                                             \
+    unsigned int  addr;                                                \
+    int           len;                                                 \
+  } s;                                                                 \
+  s.addr = (unsigned int)(BEG);                                        \
+  s.len = (END) - (BEG);                                               \
+  (void)sysarch(0, &s);                                                \
+}
diff --git a/gcc/config/arm/pe.c b/gcc/config/arm/pe.c
new file mode 100644
index 000000000..3d9efd578
--- /dev/null
+++ b/gcc/config/arm/pe.c
@@ -0,0 +1,257 @@
+/* Routines for GCC for ARM/pe.
+   Copyright (C) 1995, 1996, 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Doug Evans (dje@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "output.h"
+#include "flags.h"
+#include "tree.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+
+extern int current_function_anonymous_args;
+
+
+/* Return nonzero if DECL is a dllexport'd object.  */
+
+tree current_class_type; /* FIXME */
+
+int
+arm_dllexport_p (tree decl)
+{
+  tree exp;
+
+  if (TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+  exp = lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl));
+  if (exp)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if DECL is a dllimport'd object.  */
+
+int
+arm_dllimport_p (tree decl)
+{
+  tree imp;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && TARGET_NOP_FUN_DLLIMPORT)
+    return 0;
+
+  if (TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+  imp = lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl));
+  if (imp)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if SYMBOL is marked as being dllexport'd.  */
+
+int
+arm_dllexport_name_p (const char *symbol)
+{
+  return symbol[0] == ARM_PE_FLAG_CHAR && symbol[1] == 'e' && symbol[2] == '.';
+}
+
+/* Return nonzero if SYMBOL is marked as being dllimport'd.  */
+
+int
+arm_dllimport_name_p (const char *symbol)
+{
+  return symbol[0] == ARM_PE_FLAG_CHAR && symbol[1] == 'i' && symbol[2] == '.';
+}
+
+/* Mark a DECL as being dllexport'd.
+   Note that we override the previous setting (e.g.: dllimport).  */
+
+void
+arm_mark_dllexport (tree decl)
+{
+  const char * oldname;
+  char * newname;
+  rtx rtlname;
+  tree idp;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  if (GET_CODE (rtlname) == MEM)
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+  
+  if (arm_dllimport_name_p (oldname))
+    oldname += 9;
+  else if (arm_dllexport_name_p (oldname))
+    return; /* already done */
+
+  newname = XALLOCAVEC (char, strlen (oldname) + 4);
+  sprintf (newname, "%ce.%s", ARM_PE_FLAG_CHAR, oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  /* ??? At least I think that's why we do this.  */
+  idp = get_identifier (newname);
+
+  XEXP (DECL_RTL (decl), 0) =
+    gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+}
+
+/* Mark a DECL as being dllimport'd.  */
+
+void
+arm_mark_dllimport (tree decl)
+{
+  const char * oldname;
+  char * newname;
+  tree idp;
+  rtx rtlname, newrtl;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  
+  if (GET_CODE (rtlname) == MEM)
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+  
+  gcc_assert (!arm_dllexport_name_p (oldname));
+  if (arm_dllimport_name_p (oldname))
+    return; /* already done */
+
+  /* ??? One can well ask why we're making these checks here,
+     and that would be a good question.  */
+
+  /* Imported variables can't be initialized.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && !DECL_VIRTUAL_P (decl)
+      && DECL_INITIAL (decl))
+    {
+      error ("initialized variable %q+D is marked dllimport", decl);
+      return;
+    }
+  /* Nor can they be static.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      /* ??? Is this test for vtables needed?  */
+      && !DECL_VIRTUAL_P (decl)
+      && 0 /*???*/)
+    {
+      error ("static variable %q+D is marked dllimport", decl);
+      return;
+    }
+
+  /* `extern' needn't be specified with dllimport.
+     Specify `extern' now and hope for the best.  Sigh.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      /* ??? Is this test for vtables needed?  */
+      && !DECL_VIRTUAL_P (decl))
+    {
+      DECL_EXTERNAL (decl) = 1;
+      TREE_PUBLIC (decl) = 1;
+    }
+
+  newname = XALLOCAVEC (char, strlen (oldname) + 11);
+  sprintf (newname, "%ci.__imp_%s", ARM_PE_FLAG_CHAR, oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  /* ??? At least I think that's why we do this.  */
+  idp = get_identifier (newname);
+
+  newrtl = gen_rtx_MEM (Pmode,
+			gen_rtx_SYMBOL_REF (Pmode,
+					    IDENTIFIER_POINTER (idp)));
+  XEXP (DECL_RTL (decl), 0) = newrtl;
+}
+
+void
+arm_pe_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED)
+{
+  /* This bit is copied from arm_encode_section_info.  */
+  if (optimize > 0 && TREE_CONSTANT (decl))
+    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+
+  /* Mark the decl so we can tell from the rtl whether the object is
+     dllexport'd or dllimport'd.  */
+  if (arm_dllexport_p (decl))
+    arm_mark_dllexport (decl);
+  else if (arm_dllimport_p (decl))
+    arm_mark_dllimport (decl);
+  /* It might be that DECL has already been marked as dllimport, but a
+     subsequent definition nullified that.  The attribute is gone but
+     DECL_RTL still has @i.__imp_foo.  We need to remove that.  */
+  else if ((TREE_CODE (decl) == FUNCTION_DECL
+	    || TREE_CODE (decl) == VAR_DECL)
+	   && DECL_RTL (decl) != NULL_RTX
+	   && GET_CODE (DECL_RTL (decl)) == MEM
+	   && GET_CODE (XEXP (DECL_RTL (decl), 0)) == MEM
+	   && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF
+	   && arm_dllimport_name_p (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0)))
+    {
+      const char *oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0);
+      tree idp = get_identifier (oldname + 9);
+      rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+
+      XEXP (DECL_RTL (decl), 0) = newrtl;
+
+      /* We previously set TREE_PUBLIC and DECL_EXTERNAL.
+	 ??? We leave these alone for now.  */
+    }
+}
+
+void
+arm_pe_unique_section (tree decl, int reloc)
+{
+  int len;
+  const char * name;
+  char * string;
+  const char * prefix;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = arm_strip_name_encoding (name);
+
+  /* The object is put in, for example, section .text$foo.
+     The linker will then ultimately place them in .text
+     (everything from the $ on is stripped).  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    prefix = ".text$";
+  else if (decl_readonly_section (decl, reloc))
+    prefix = ".rdata$";
+  else
+    prefix = ".data$";
+  len = strlen (name) + strlen (prefix);
+  string = XALLOCAVEC (char, len + 1);
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
diff --git a/gcc/config/arm/pe.h b/gcc/config/arm/pe.h
new file mode 100644
index 000000000..009c4fe43
--- /dev/null
+++ b/gcc/config/arm/pe.h
@@ -0,0 +1,148 @@
+/* Definitions of target machine for GNU compiler, for ARM with PE obj format.
+   Copyright (C) 1995, 1996, 1999, 2000, 2002, 2003, 2004, 2005, 2007
+   Free Software Foundation, Inc.
+   Contributed by Doug Evans (dje@cygnus.com).
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Enable PE specific code.  */
+#define ARM_PE		1
+
+#define ARM_PE_FLAG_CHAR '@'
+
+/* Ensure that @x. will be stripped from the function name.  */
+#undef SUBTARGET_NAME_ENCODING_LENGTHS
+#define SUBTARGET_NAME_ENCODING_LENGTHS  \
+  case ARM_PE_FLAG_CHAR: return 3;
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+
+/* Run-time Target Specification.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/pe)", stderr)
+
+/* Get tree.c to declare a target-specific specialization of
+   merge_decl_attributes.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "-D__pe__"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT	(MASK_NOP_FUN_DLLIMPORT)
+
+#undef  MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+  { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork" }  
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE 	"short unsigned int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+/* r11 is fixed.  */
+#undef  SUBTARGET_CONDITIONAL_REGISTER_USAGE
+#define SUBTARGET_CONDITIONAL_REGISTER_USAGE \
+  fixed_regs [11] = 1; \
+  call_used_regs [11] = 1;
+
+
+/* PE/COFF uses explicit import from shared libraries.  */
+#define MULTIPLE_SYMBOL_SPACES 1
+
+#define TARGET_ASM_UNIQUE_SECTION arm_pe_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* Switch into a generic section.  */
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_pe_asm_named_section
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* Output a reference to a label.  */
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)  \
+  asm_fprintf (STREAM, "%U%s", arm_strip_name_encoding (NAME))
+
+/* Output a function definition label.  */
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL)   	\
+  do								\
+    {								\
+      if (arm_dllexport_name_p (NAME))				\
+	{							\
+	  drectve_section ();					\
+	  fprintf (STREAM, "\t.ascii \" -export:%s\"\n",	\
+		   arm_strip_name_encoding (NAME));		\
+	  switch_to_section (function_section (DECL));		\
+	}							\
+      ARM_DECLARE_FUNCTION_NAME (STREAM, NAME, DECL);		\
+      if (TARGET_THUMB)						\
+	fprintf (STREAM, "\t.code 16\n");			\
+      ASM_OUTPUT_LABEL (STREAM, NAME);				\
+    }								\
+  while (0)
+
+/* Output a common block.  */
+#undef  ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)	\
+  do							\
+    {							\
+      if (arm_dllexport_name_p (NAME))			\
+	{						\
+	  drectve_section ();				\
+	  fprintf ((STREAM), "\t.ascii \" -export:%s\"\n",\
+		   arm_strip_name_encoding (NAME));	\
+	}						\
+      if (! arm_dllimport_name_p (NAME))		\
+	{						\
+	  fprintf ((STREAM), "\t.comm\t"); 		\
+	  assemble_name ((STREAM), (NAME));		\
+	  asm_fprintf ((STREAM), ", %d\t%@ %d\n",	\
+ 		   (int)(ROUNDED), (int)(SIZE));	\
+	}						\
+    }							\
+  while (0)
+
+/* Output the label for an initialized variable.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) 	\
+  do							\
+    {							\
+      if (arm_dllexport_name_p (NAME))			\
+	{						\
+	  section *save_section = in_section;		\
+	  drectve_section ();				\
+	  fprintf (STREAM, "\t.ascii \" -export:%s\"\n",\
+		   arm_strip_name_encoding (NAME));	\
+	  switch_to_section (save_section);		\
+	}						\
+      ASM_OUTPUT_LABEL ((STREAM), (NAME));		\
+    }							\
+  while (0)
+
+/* Support the ctors/dtors and other sections.  */
+
+#define DRECTVE_SECTION_ASM_OP	"\t.section .drectve"
+
+#define drectve_section() \
+  (fprintf (asm_out_file, "%s\n", DRECTVE_SECTION_ASM_OP), \
+   in_section = NULL)
diff --git a/gcc/config/arm/pe.opt b/gcc/config/arm/pe.opt
new file mode 100644
index 000000000..560a52a81
--- /dev/null
+++ b/gcc/config/arm/pe.opt
@@ -0,0 +1,23 @@
+; PE-specific options for the ARM port
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mnop-fun-dllimport
+Target Report Mask(NOP_FUN_DLLIMPORT)
+Ignore dllimport attribute for functions
diff --git a/gcc/config/arm/pr-support.c b/gcc/config/arm/pr-support.c
new file mode 100644
index 000000000..deee661e2
--- /dev/null
+++ b/gcc/config/arm/pr-support.c
@@ -0,0 +1,401 @@
+/* ARM EABI compliant unwinding routines
+   Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Paul Brook
+ 
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "unwind.h"
+
+/* We add a prototype for abort here to avoid creating a dependency on
+   target headers.  */
+extern void abort (void);
+
+typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
+
+/* Misc constants.  */
+#define R_IP    12
+#define R_SP    13
+#define R_LR    14
+#define R_PC    15
+
+#define uint32_highbit (((_uw) 1) << 31)
+
+void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
+
+/* Unwind descriptors.  */
+
+typedef struct
+{
+  _uw16 length;
+  _uw16 offset;
+} EHT16;
+
+typedef struct
+{
+  _uw length;
+  _uw offset;
+} EHT32;
+
+/* Calculate the address encoded by a 31-bit self-relative offset at address
+   P.  Copy of routine in unwind-arm.c.  */
+
+static inline _uw
+selfrel_offset31 (const _uw *p)
+{
+  _uw offset;
+
+  offset = *p;
+  /* Sign extend to 32 bits.  */
+  if (offset & (1 << 30))
+    offset |= 1u << 31;
+
+  return offset + (_uw) p;
+}
+
+
+/* Personality routine helper functions.  */
+
+#define CODE_FINISH (0xb0)
+
+/* Return the next byte of unwinding information, or CODE_FINISH if there is
+   no data remaining.  */
+static inline _uw8
+next_unwind_byte (__gnu_unwind_state * uws)
+{
+  _uw8 b;
+
+  if (uws->bytes_left == 0)
+    {
+      /* Load another word */
+      if (uws->words_left == 0)
+	return CODE_FINISH; /* Nothing left.  */
+      uws->words_left--;
+      uws->data = *(uws->next++);
+      uws->bytes_left = 3;
+    }
+  else
+    uws->bytes_left--;
+
+  /* Extract the most significant byte.  */
+  b = (uws->data >> 24) & 0xff;
+  uws->data <<= 8;
+  return b;
+}
+
+/* Execute the unwinding instructions described by UWS.  */
+_Unwind_Reason_Code
+__gnu_unwind_execute (_Unwind_Context * context, __gnu_unwind_state * uws)
+{
+  _uw op;
+  int set_pc;
+  _uw reg;
+
+  set_pc = 0;
+  for (;;)
+    {
+      op = next_unwind_byte (uws);
+      if (op == CODE_FINISH)
+	{
+	  /* If we haven't already set pc then copy it from lr.  */
+	  if (!set_pc)
+	    {
+	      _Unwind_VRS_Get (context, _UVRSC_CORE, R_LR, _UVRSD_UINT32,
+			       &reg);
+	      _Unwind_VRS_Set (context, _UVRSC_CORE, R_PC, _UVRSD_UINT32,
+			       &reg);
+	      set_pc = 1;
+	    }
+	  /* Drop out of the loop.  */
+	  break;
+	}
+      if ((op & 0x80) == 0)
+	{
+	  /* vsp = vsp +- (imm6 << 2 + 4).  */
+	  _uw offset;
+
+	  offset = ((op & 0x3f) << 2) + 4;
+	  _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, &reg);
+	  if (op & 0x40)
+	    reg -= offset;
+	  else
+	    reg += offset;
+	  _Unwind_VRS_Set (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, &reg);
+	  continue;
+	}
+      
+      if ((op & 0xf0) == 0x80)
+	{
+	  op = (op << 8) | next_unwind_byte (uws);
+	  if (op == 0x8000)
+	    {
+	      /* Refuse to unwind.  */
+	      return _URC_FAILURE;
+	    }
+	  /* Pop r4-r15 under mask.  */
+	  op = (op << 4) & 0xfff0;
+	  if (_Unwind_VRS_Pop (context, _UVRSC_CORE, op, _UVRSD_UINT32)
+	      != _UVRSR_OK)
+	    return _URC_FAILURE;
+	  if (op & (1 << R_PC))
+	    set_pc = 1;
+	  continue;
+	}
+      if ((op & 0xf0) == 0x90)
+	{
+	  op &= 0xf;
+	  if (op == 13 || op == 15)
+	    /* Reserved.  */
+	    return _URC_FAILURE;
+	  /* vsp = r[nnnn].  */
+	  _Unwind_VRS_Get (context, _UVRSC_CORE, op, _UVRSD_UINT32, &reg);
+	  _Unwind_VRS_Set (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32, &reg);
+	  continue;
+	}
+      if ((op & 0xf0) == 0xa0)
+	{
+	  /* Pop r4-r[4+nnn], [lr].  */
+	  _uw mask;
+	  
+	  mask = (0xff0 >> (7 - (op & 7))) & 0xff0;
+	  if (op & 8)
+	    mask |= (1 << R_LR);
+	  if (_Unwind_VRS_Pop (context, _UVRSC_CORE, mask, _UVRSD_UINT32)
+	      != _UVRSR_OK)
+	    return _URC_FAILURE;
+	  continue;
+	}
+      if ((op & 0xf0) == 0xb0)
+	{
+	  /* op == 0xb0 already handled.  */
+	  if (op == 0xb1)
+	    {
+	      op = next_unwind_byte (uws);
+	      if (op == 0 || ((op & 0xf0) != 0))
+		/* Spare.  */
+		return _URC_FAILURE;
+	      /* Pop r0-r4 under mask.  */
+	      if (_Unwind_VRS_Pop (context, _UVRSC_CORE, op, _UVRSD_UINT32)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  if (op == 0xb2)
+	    {
+	      /* vsp = vsp + 0x204 + (uleb128 << 2).  */
+	      int shift;
+
+	      _Unwind_VRS_Get (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32,
+			       &reg);
+	      op = next_unwind_byte (uws);
+	      shift = 2;
+	      while (op & 0x80)
+		{
+		  reg += ((op & 0x7f) << shift);
+		  shift += 7;
+		  op = next_unwind_byte (uws);
+		}
+	      reg += ((op & 0x7f) << shift) + 0x204;
+	      _Unwind_VRS_Set (context, _UVRSC_CORE, R_SP, _UVRSD_UINT32,
+			       &reg);
+	      continue;
+	    }
+	  if (op == 0xb3)
+	    {
+	      /* Pop VFP registers with fldmx.  */
+	      op = next_unwind_byte (uws);
+	      op = ((op & 0xf0) << 12) | ((op & 0xf) + 1);
+	      if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_VFPX)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  if ((op & 0xfc) == 0xb4)
+	    {
+	      /* Pop FPA E[4]-E[4+nn].  */
+	      op = 0x40000 | ((op & 3) + 1);
+	      if (_Unwind_VRS_Pop (context, _UVRSC_FPA, op, _UVRSD_FPAX)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  /* op & 0xf8 == 0xb8.  */
+	  /* Pop VFP D[8]-D[8+nnn] with fldmx.  */
+	  op = 0x80000 | ((op & 7) + 1);
+	  if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_VFPX)
+	      != _UVRSR_OK)
+	    return _URC_FAILURE;
+	  continue;
+	}
+      if ((op & 0xf0) == 0xc0)
+	{
+	  if (op == 0xc6)
+	    {
+	      /* Pop iWMMXt D registers.  */
+	      op = next_unwind_byte (uws);
+	      op = ((op & 0xf0) << 12) | ((op & 0xf) + 1);
+	      if (_Unwind_VRS_Pop (context, _UVRSC_WMMXD, op, _UVRSD_UINT64)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  if (op == 0xc7)
+	    {
+	      op = next_unwind_byte (uws);
+	      if (op == 0 || (op & 0xf0) != 0)
+		/* Spare.  */
+		return _URC_FAILURE;
+	      /* Pop iWMMXt wCGR{3,2,1,0} under mask.  */
+	      if (_Unwind_VRS_Pop (context, _UVRSC_WMMXC, op, _UVRSD_UINT32)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  if ((op & 0xf8) == 0xc0)
+	    {
+	      /* Pop iWMMXt wR[10]-wR[10+nnn].  */
+	      op = 0xa0000 | ((op & 0xf) + 1);
+	      if (_Unwind_VRS_Pop (context, _UVRSC_WMMXD, op, _UVRSD_UINT64)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  if (op == 0xc8)
+	    {
+#ifndef __VFP_FP__
+ 	      /* Pop FPA registers.  */
+ 	      op = next_unwind_byte (uws);
+	      op = ((op & 0xf0) << 12) | ((op & 0xf) + 1);
+ 	      if (_Unwind_VRS_Pop (context, _UVRSC_FPA, op, _UVRSD_FPAX)
+ 		  != _UVRSR_OK)
+ 		return _URC_FAILURE;
+ 	      continue;
+#else
+              /* Pop VFPv3 registers D[16+ssss]-D[16+ssss+cccc] with vldm.  */
+              op = next_unwind_byte (uws);
+              op = (((op & 0xf0) + 16) << 12) | ((op & 0xf) + 1);
+              if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE)
+                  != _UVRSR_OK)
+                return _URC_FAILURE;
+              continue;
+#endif
+	    }
+	  if (op == 0xc9)
+	    {
+	      /* Pop VFP registers with fldmd.  */
+	      op = next_unwind_byte (uws);
+	      op = ((op & 0xf0) << 12) | ((op & 0xf) + 1);
+	      if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE)
+		  != _UVRSR_OK)
+		return _URC_FAILURE;
+	      continue;
+	    }
+	  /* Spare.  */
+	  return _URC_FAILURE;
+	}
+      if ((op & 0xf8) == 0xd0)
+	{
+	  /* Pop VFP D[8]-D[8+nnn] with fldmd.  */
+	  op = 0x80000 | ((op & 7) + 1);
+	  if (_Unwind_VRS_Pop (context, _UVRSC_VFP, op, _UVRSD_DOUBLE)
+	      != _UVRSR_OK)
+	    return _URC_FAILURE;
+	  continue;
+	}
+      /* Spare.  */
+      return _URC_FAILURE;
+    }
+  return _URC_OK;
+}
+
+
+/* Execute the unwinding instructions associated with a frame.  UCBP and
+   CONTEXT are the current exception object and virtual CPU state
+   respectively.  */
+
+_Unwind_Reason_Code
+__gnu_unwind_frame (_Unwind_Control_Block * ucbp, _Unwind_Context * context)
+{
+  _uw *ptr;
+  __gnu_unwind_state uws;
+
+  ptr = (_uw *) ucbp->pr_cache.ehtp;
+  /* Skip over the personality routine address.  */
+  ptr++;
+  /* Setup the unwinder state.  */
+  uws.data = (*ptr) << 8;
+  uws.next = ptr + 1;
+  uws.bytes_left = 3;
+  uws.words_left = ((*ptr) >> 24) & 0xff;
+
+  return __gnu_unwind_execute (context, &uws);
+}
+
+/* Get the _Unwind_Control_Block from an _Unwind_Context.  */
+
+static inline _Unwind_Control_Block *
+unwind_UCB_from_context (_Unwind_Context * context)
+{
+  return (_Unwind_Control_Block *) _Unwind_GetGR (context, R_IP);
+}
+
+/* Get the start address of the function being unwound.  */
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (_Unwind_Context * context)
+{
+  _Unwind_Control_Block *ucbp;
+
+  ucbp = unwind_UCB_from_context (context);
+  return (_Unwind_Ptr) ucbp->pr_cache.fnstart;
+}
+
+/* Find the Language specific exception data.  */
+
+void *
+_Unwind_GetLanguageSpecificData (_Unwind_Context * context)
+{
+  _Unwind_Control_Block *ucbp;
+  _uw *ptr;
+
+  /* Get a pointer to the exception table entry.  */
+  ucbp = unwind_UCB_from_context (context);
+  ptr = (_uw *) ucbp->pr_cache.ehtp;
+  /* Skip the personality routine address.  */
+  ptr++;
+  /* Skip the unwind opcodes.  */
+  ptr += (((*ptr) >> 24) & 0xff) + 1;
+
+  return ptr;
+}
+
+
+/* These two should never be used.  */
+
+_Unwind_Ptr
+_Unwind_GetDataRelBase (_Unwind_Context *context __attribute__ ((unused)))
+{
+  abort ();
+}
+
+_Unwind_Ptr
+_Unwind_GetTextRelBase (_Unwind_Context *context __attribute__ ((unused)))
+{
+  abort ();
+}
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
new file mode 100644
index 000000000..e34b46da0
--- /dev/null
+++ b/gcc/config/arm/predicates.md
@@ -0,0 +1,688 @@
+;; Predicate definitions for ARM and Thumb
+;; Copyright (C) 2004, 2007, 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by ARM Ltd.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "s_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  /* We don't consider registers whose class is NO_REGS
+     to be a register operand.  */
+  /* XXX might have to check for lo regs only for thumb ??? */
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
+})
+
+;; Any hard register.
+(define_predicate "arm_hard_register_operand"
+  (match_code "reg")
+{
+  return REGNO (op) < FIRST_PSEUDO_REGISTER;
+})
+
+;; A low register.
+(define_predicate "low_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) <= LAST_LO_REGNUM")))
+
+;; A low register or const_int.
+(define_predicate "low_reg_or_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "low_register_operand")))
+
+;; Any core register, or any pseudo.  */ 
+(define_predicate "arm_general_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) <= LAST_ARM_REGNUM
+	      || REGNO (op) >= FIRST_PSEUDO_REGISTER));
+})
+
+(define_predicate "f_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* We don't consider registers whose class is NO_REGS
+     to be a register operand.  */
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == FPA_REGS));
+})
+
+(define_predicate "vfp_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* We don't consider registers whose class is NO_REGS
+     to be a register operand.  */
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == VFP_D0_D7_REGS
+	      || REGNO_REG_CLASS (REGNO (op)) == VFP_LO_REGS
+	      || (TARGET_VFPD32
+		  && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS)));
+})
+
+(define_special_predicate "subreg_lowpart_operator"
+  (and (match_code "subreg")
+       (match_test "subreg_lowpart_p (op)")))
+
+;; Reg, subreg(reg) or const_int.
+(define_predicate "reg_or_int_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "arm_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "const_ok_for_arm (INTVAL (op))")))
+
+;; A constant value which fits into two instructions, each taking
+;; an arithmetic constant operand for one of the words.
+(define_predicate "arm_immediate_di_operand"
+  (and (match_code "const_int,const_double")
+       (match_test "arm_const_double_by_immediates (op)")))
+
+(define_predicate "arm_neg_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "const_ok_for_arm (-INTVAL (op))")))
+
+(define_predicate "arm_not_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "const_ok_for_arm (~INTVAL (op))")))
+
+(define_predicate "const0_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 0")))
+
+;; Something valid on the RHS of an ARM data-processing instruction
+(define_predicate "arm_rhs_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "arm_immediate_operand")))
+
+(define_predicate "arm_rhsm_operand"
+  (ior (match_operand 0 "arm_rhs_operand")
+       (match_operand 0 "memory_operand")))
+
+(define_predicate "shift_amount_operand"
+  (ior (and (match_test "TARGET_ARM")
+	    (match_operand 0 "s_register_operand"))
+       (match_operand 0 "const_int_operand")))
+
+(define_predicate "arm_add_operand"
+  (ior (match_operand 0 "arm_rhs_operand")
+       (match_operand 0 "arm_neg_immediate_operand")))
+
+(define_predicate "arm_addimm_operand"
+  (ior (match_operand 0 "arm_immediate_operand")
+       (match_operand 0 "arm_neg_immediate_operand")))
+
+(define_predicate "arm_not_operand"
+  (ior (match_operand 0 "arm_rhs_operand")
+       (match_operand 0 "arm_not_immediate_operand")))
+
+(define_predicate "arm_di_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (match_operand 0 "arm_immediate_di_operand")))
+
+;; True if the operand is a memory reference which contains an
+;; offsettable address.
+(define_predicate "offsettable_memory_operand"
+  (and (match_code "mem")
+       (match_test
+        "offsettable_address_p (reload_completed | reload_in_progress,
+				mode, XEXP (op, 0))")))
+
+;; True if the operand is a memory operand that does not have an
+;; automodified base register (and thus will not generate output reloads).
+(define_predicate "call_memory_operand"
+  (and (match_code "mem")
+       (and (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0)))
+			 != RTX_AUTOINC")
+	    (match_operand 0 "memory_operand"))))
+
+(define_predicate "arm_reload_memory_operand"
+  (and (match_code "mem,reg,subreg")
+       (match_test "(!CONSTANT_P (op)
+		     && (true_regnum(op) == -1
+			 || (GET_CODE (op) == REG
+			     && REGNO (op) >= FIRST_PSEUDO_REGISTER)))")))
+
+;; True for valid operands for the rhs of an floating point insns.
+;;   Allows regs or certain consts on FPA, just regs for everything else.
+(define_predicate "arm_float_rhs_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_double")
+	    (match_test "TARGET_FPA && arm_const_double_rtx (op)"))))
+
+(define_predicate "arm_float_add_operand"
+  (ior (match_operand 0 "arm_float_rhs_operand")
+       (and (match_code "const_double")
+	    (match_test "TARGET_FPA && neg_const_double_rtx_ok_for_fpa (op)"))))
+
+(define_predicate "vfp_compare_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_double")
+	    (match_test "arm_const_double_rtx (op)"))))
+
+(define_predicate "arm_float_compare_operand"
+  (if_then_else (match_test "TARGET_VFP")
+		(match_operand 0 "vfp_compare_operand")
+		(match_operand 0 "arm_float_rhs_operand")))
+
+;; True for valid index operands.
+(define_predicate "index_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_operand 0 "immediate_operand")
+	    (match_test "(GET_CODE (op) != CONST_INT
+			  || (INTVAL (op) < 4096 && INTVAL (op) > -4096))"))))
+
+;; True for operators that can be combined with a shift in ARM state.
+(define_special_predicate "shiftable_operator"
+  (and (match_code "plus,minus,ior,xor,and")
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for logical binary operators.
+(define_special_predicate "logical_binary_operator"
+  (and (match_code "ior,xor,and")
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for commutative operators
+(define_special_predicate "commutative_binary_operator"
+  (and (match_code "ior,xor,and,plus")
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for shift operators.
+(define_special_predicate "shift_operator"
+  (and (ior (ior (and (match_code "mult")
+		      (match_test "power_of_two_operand (XEXP (op, 1), mode)"))
+		 (and (match_code "rotate")
+		      (match_test "GET_CODE (XEXP (op, 1)) == CONST_INT
+				   && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
+	    (match_code "ashift,ashiftrt,lshiftrt,rotatert"))
+       (match_test "mode == GET_MODE (op)")))
+
+;; True for MULT, to identify which variant of shift_operator is in use.
+(define_special_predicate "mult_operator"
+  (match_code "mult"))
+
+;; True for operators that have 16-bit thumb variants.  */
+(define_special_predicate "thumb_16bit_operator"
+  (match_code "plus,minus,and,ior,xor"))
+
+;; True for EQ & NE
+(define_special_predicate "equality_operator"
+  (match_code "eq,ne"))
+
+;; True for integer comparisons and, if FP is active, for comparisons
+;; other than LTGT or UNEQ.
+(define_special_predicate "arm_comparison_operator"
+  (ior (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu")
+       (and (match_test "TARGET_32BIT && TARGET_HARD_FLOAT
+			 && (TARGET_FPA || TARGET_VFP)")
+            (match_code "unordered,ordered,unlt,unle,unge,ungt"))))
+
+(define_special_predicate "lt_ge_comparison_operator"
+  (match_code "lt,ge"))
+
+(define_special_predicate "noov_comparison_operator"
+  (match_code "lt,ge,eq,ne"))
+
+(define_special_predicate "minmax_operator"
+  (and (match_code "smin,smax,umin,umax")
+       (match_test "mode == GET_MODE (op)")))
+
+(define_special_predicate "cc_register"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) == CC_REGNUM")
+	    (ior (match_test "mode == GET_MODE (op)")
+		 (match_test "mode == VOIDmode && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))))
+
+(define_special_predicate "dominant_cc_register"
+  (match_code "reg")
+{
+  if (mode == VOIDmode)
+    {
+      mode = GET_MODE (op);
+      
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+	return false;
+    }
+
+  return (cc_register (op, mode)
+	  && (mode == CC_DNEmode
+	     || mode == CC_DEQmode
+	     || mode == CC_DLEmode
+	     || mode == CC_DLTmode
+	     || mode == CC_DGEmode
+	     || mode == CC_DGTmode
+	     || mode == CC_DLEUmode
+	     || mode == CC_DLTUmode
+	     || mode == CC_DGEUmode
+	     || mode == CC_DGTUmode));
+})
+
+(define_special_predicate "arm_extendqisi_mem_op"
+  (and (match_operand 0 "memory_operand")
+       (match_test "arm_legitimate_address_outer_p (mode, XEXP (op, 0),
+						    SIGN_EXTEND, 0)")))
+
+(define_special_predicate "arm_reg_or_extendqisi_mem_op"
+  (ior (match_operand 0 "arm_extendqisi_mem_op")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "power_of_two_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT value = INTVAL (op) & 0xffffffff;
+
+  return value != 0 && (value & (value - 1)) == 0;
+})
+
+(define_predicate "nonimmediate_di_operand"
+  (match_code "reg,subreg,mem")
+{
+   if (s_register_operand (op, mode))
+     return true;
+
+   if (GET_CODE (op) == SUBREG)
+     op = SUBREG_REG (op);
+
+   return GET_CODE (op) == MEM && memory_address_p (DImode, XEXP (op, 0));
+})
+
+(define_predicate "di_operand"
+  (ior (match_code "const_int,const_double")
+       (and (match_code "reg,subreg,mem")
+	    (match_operand 0 "nonimmediate_di_operand"))))
+
+(define_predicate "nonimmediate_soft_df_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (s_register_operand (op, mode))
+    return true;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return GET_CODE (op) == MEM && memory_address_p (DFmode, XEXP (op, 0));
+})
+
+(define_predicate "soft_df_operand"
+  (ior (match_code "const_double")
+       (and (match_code "reg,subreg,mem")
+	    (match_operand 0 "nonimmediate_soft_df_operand"))))
+
+(define_predicate "const_shift_operand"
+  (and (match_code "const_int")
+       (ior (match_operand 0 "power_of_two_operand")
+	    (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 32"))))
+
+
+(define_special_predicate "load_multiple_operation"
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned dest_regno;
+  rtx src_addr;
+  HOST_WIDE_INT i = 1, base = 0;
+  HOST_WIDE_INT offset = 0;
+  rtx elt;
+  bool addr_reg_loaded = false;
+  bool update = false;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
+    {
+      i++;
+      base = 1;
+      update = true;
+
+      /* Now check it more carefully.  */
+      if (GET_CODE (SET_DEST (elt)) != REG
+          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
+          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
+          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
+        return false;
+    }
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= i
+      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+  if (GET_CODE (src_addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
+	return false;
+      offset = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+  if (!REG_P (src_addr))
+    return false;
+
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || GET_CODE (SET_DEST (elt)) != REG
+          || GET_MODE (SET_DEST (elt)) != SImode
+          || REGNO (SET_DEST (elt)) <= dest_regno
+          || GET_CODE (SET_SRC (elt)) != MEM
+          || GET_MODE (SET_SRC (elt)) != SImode
+          || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	       || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	       || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	       || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
+	      && (!REG_P (XEXP (SET_SRC (elt), 0))
+		  || offset + (i - base) * 4 != 0)))
+        return false;
+      dest_regno = REGNO (SET_DEST (elt));
+      if (dest_regno == REGNO (src_addr))
+        addr_reg_loaded = true;
+    }
+  /* For Thumb, we only have updating instructions.  If the pattern does
+     not describe an update, it must be because the address register is
+     in the list of loaded registers - on the hardware, this has the effect
+     of overriding the update.  */
+  if (update && addr_reg_loaded)
+    return false;
+  if (TARGET_THUMB1)
+    return update || addr_reg_loaded;
+  return true;
+})
+
+(define_special_predicate "store_multiple_operation"
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  unsigned src_regno;
+  rtx dest_addr;
+  HOST_WIDE_INT i = 1, base = 0, offset = 0;
+  rtx elt;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET)
+    return false;
+
+  /* Check to see if this might be a write-back.  */
+  if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS)
+    {
+      i++;
+      base = 1;
+
+      /* Now check it more carefully.  */
+      if (GET_CODE (SET_DEST (elt)) != REG
+          || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG
+          || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT
+          || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4)
+        return false;
+    }
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= i
+      || GET_CODE (XVECEXP (op, 0, i - 1)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != REG)
+    return false;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, i - 1)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, i - 1)), 0);
+
+  if (GET_CODE (dest_addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (dest_addr, 1)) != CONST_INT)
+	return false;
+      offset = INTVAL (XEXP (dest_addr, 1));
+      dest_addr = XEXP (dest_addr, 0);
+    }
+  if (!REG_P (dest_addr))
+    return false;
+
+  for (; i < count; i++)
+    {
+      elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || GET_CODE (SET_SRC (elt)) != REG
+          || GET_MODE (SET_SRC (elt)) != SImode
+          || REGNO (SET_SRC (elt)) <= src_regno
+          || GET_CODE (SET_DEST (elt)) != MEM
+          || GET_MODE (SET_DEST (elt)) != SImode
+          || ((GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	       || !rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	       || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+               || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != offset + (i - base) * 4)
+	      && (!REG_P (XEXP (SET_DEST (elt), 0))
+		  || offset + (i - base) * 4 != 0)))
+        return false;
+      src_regno = REGNO (SET_SRC (elt));
+    }
+
+  return true;
+})
+
+(define_special_predicate "multi_register_push"
+  (match_code "parallel")
+{
+  if ((GET_CODE (XVECEXP (op, 0, 0)) != SET)
+      || (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC)
+      || (XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPEC_PUSH_MULT))
+    return false;
+
+  return true;
+})
+
+;;-------------------------------------------------------------------------
+;;
+;; Thumb predicates
+;;
+
+(define_predicate "thumb1_cmp_operand"
+  (ior (and (match_code "reg,subreg")
+	    (match_operand 0 "s_register_operand"))
+       (and (match_code "const_int")
+	    (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 256"))))
+
+(define_predicate "thumb1_cmpneg_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) < 0 && INTVAL (op) > -256")))
+
+;; Return TRUE if a result can be stored in OP without clobbering the
+;; condition code register.  Prior to reload we only accept a
+;; register.  After reload we have to be able to handle memory as
+;; well, since a pseudo may not get a hard reg and reload cannot
+;; handle output-reloads on jump insns.
+
+;; We could possibly handle mem before reload as well, but that might
+;; complicate things with the need to handle increment
+;; side-effects.
+(define_predicate "thumb_cbrch_target_operand"
+  (and (match_code "reg,subreg,mem")
+       (ior (match_operand 0 "s_register_operand")
+	    (and (match_test "reload_in_progress || reload_completed")
+		 (match_operand 0 "memory_operand")))))
+
+;;-------------------------------------------------------------------------
+;;
+;; MAVERICK predicates
+;;
+
+(define_predicate "cirrus_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (GET_CODE (op) == REG
+	  && (REGNO_REG_CLASS (REGNO (op)) == CIRRUS_REGS
+	      || REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS));
+})
+
+(define_predicate "cirrus_fp_register"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == CIRRUS_REGS));
+})
+
+(define_predicate "cirrus_shift_const"
+  (and (match_code "const_int")
+       (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 64")))
+
+
+;; Neon predicates
+
+(define_predicate "const_multiple_of_8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT val = INTVAL (op);
+  return (val & 7) == 0;
+})
+
+(define_predicate "imm_for_neon_mov_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_move (op, mode, NULL, NULL);
+})
+
+(define_predicate "imm_for_neon_logic_operand"
+  (match_code "const_vector")
+{
+  return (TARGET_NEON
+          && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
+})
+
+(define_predicate "imm_for_neon_inv_logic_operand"
+  (match_code "const_vector")
+{
+  return (TARGET_NEON
+          && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
+})
+
+(define_predicate "neon_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "neon_inv_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
+;; TODO: We could check lane numbers more precisely based on the mode.
+(define_predicate "neon_lane_number"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
+;; Predicates for named expanders that overlap multiple ISAs.
+
+(define_predicate "cmpdi_operand"
+  (if_then_else (match_test "TARGET_HARD_FLOAT && TARGET_MAVERICK")
+		(and (match_test "TARGET_ARM")
+		     (match_operand 0 "cirrus_fp_register"))
+		(and (match_test "TARGET_32BIT")
+		     (match_operand 0 "arm_di_operand"))))
+
+;; True if the operand is memory reference suitable for a ldrex/strex.
+(define_predicate "arm_sync_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_code "reg" "0")))
+
+;; Predicates for parallel expanders based on mode.
+(define_special_predicate "vect_par_constant_high" 
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  int i;
+  int base = GET_MODE_NUNITS (mode);
+
+  if ((count < 1)
+      || (count != base/2))
+    return false;
+    
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  for (i = 0; i < count; i++)
+   {
+     rtx elt = XVECEXP (op, 0, i);
+     int val;
+
+     if (GET_CODE (elt) != CONST_INT)
+       return false;
+
+     val = INTVAL (elt);
+     if (val != (base/2) + i)
+       return false;
+   }
+  return true; 
+})
+
+(define_special_predicate "vect_par_constant_low"
+  (match_code "parallel")
+{
+  HOST_WIDE_INT count = XVECLEN (op, 0);
+  int i;
+  int base = GET_MODE_NUNITS (mode);
+
+  if ((count < 1)
+      || (count != base/2))
+    return false;
+    
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  for (i = 0; i < count; i++)
+   {
+     rtx elt = XVECEXP (op, 0, i);
+     int val;
+
+     if (GET_CODE (elt) != CONST_INT)
+       return false;
+
+     val = INTVAL (elt);
+     if (val != i)
+       return false;
+   } 
+  return true; 
+})
+
+(define_special_predicate "add_operator"
+			 (match_code "plus"))
diff --git a/gcc/config/arm/rtems-eabi.h b/gcc/config/arm/rtems-eabi.h
new file mode 100644
index 000000000..ced98a91b
--- /dev/null
+++ b/gcc/config/arm/rtems-eabi.h
@@ -0,0 +1,29 @@
+/* Definitions for RTEMS based ARM systems using EABI.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+ 
+   This file is part of GCC.
+ 
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+ 
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define HAS_INIT_SECTION
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+	TARGET_BPABI_CPP_BUILTINS();    	\
+    } while (0)
diff --git a/gcc/config/arm/rtems-elf.h b/gcc/config/arm/rtems-elf.h
new file mode 100644
index 000000000..dade74b15
--- /dev/null
+++ b/gcc/config/arm/rtems-elf.h
@@ -0,0 +1,45 @@
+/* Definitions for RTEMS based ARM systems using ELF
+   Copyright (C) 2000, 2002, 2005, 2007, 2008 Free Software Foundation, Inc.
+ 
+   This file is part of GCC.
+ 
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+ 
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#undef TARGET_VERSION
+#define TARGET_VERSION  fputs (" (ARM/ELF RTEMS)", stderr);
+
+#define HAS_INIT_SECTION
+
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    } while (0)
+
+/*
+ * The default in gcc now is soft-float, but gcc misses it to 
+ * pass it to the assembler.
+ */
+#undef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC "\
+  %{!mhard-float: %{!msoft-float:-mfpu=softfpa}}"
+
+/*
+ *  The default includes --start-group and --end-group which conflicts
+ *  with how this used to be defined.
+ */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc/config/arm/semi.h b/gcc/config/arm/semi.h
new file mode 100644
index 000000000..1e35710c9
--- /dev/null
+++ b/gcc/config/arm/semi.h
@@ -0,0 +1,75 @@
+/* Definitions of target machine for GNU compiler.  ARM on semi-hosted platform
+   Copyright (C) 1994, 1995, 1996, 1997, 2001, 2004, 2005, 2007
+   Free Software Foundation, Inc.
+   Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STARTFILE_SPEC  "crt0.o%s"
+
+#ifndef LIB_SPEC
+#define LIB_SPEC "-lc"
+#endif
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "-D__semi__"
+#endif
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} -X"
+#endif
+
+#ifndef TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/semi-hosted)", stderr);
+#endif
+
+#ifndef TARGET_DEFAULT_FLOAT_ABI
+#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD
+#endif
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APCS_FRAME)
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "subtarget_extra_asm_spec",	SUBTARGET_EXTRA_ASM_SPEC },
+#endif
+
+#ifndef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC ""
+#endif
+
+/* The compiler supports PIC code generation, even though the binutils
+   may not.  If we are asked to compile position independent code, we
+   always pass -k to the assembler.  If it doesn't recognize it, then
+   it will barf, which probably means that it doesn't know how to
+   assemble PIC code.  This is what we want, since otherwise tools
+   may incorrectly assume we support PIC compilation even if the
+   binutils can't.  */
+#ifndef ASM_SPEC
+#define ASM_SPEC "\
+%{fpic|fpie: -k} %{fPIC|fPIE: -k} \
+%{mbig-endian:-EB} \
+%{mcpu=*:-mcpu=%*} \
+%{march=*:-march=%*} \
+%{mapcs-float:-mfloat} \
+%{msoft-float:-mfloat-abi=soft} %{mhard-float:-mfloat-abi=hard} \
+%{mfloat-abi=*} %{mfpu=*} \
+%{mthumb-interwork:-mthumb-interwork} \
+%(subtarget_extra_asm_spec)"
+#endif
diff --git a/gcc/config/arm/sfp-machine.h b/gcc/config/arm/sfp-machine.h
new file mode 100644
index 000000000..a89d05a00
--- /dev/null
+++ b/gcc/config/arm/sfp-machine.h
@@ -0,0 +1,105 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_H		((_FP_QNANBIT_H << 1) - 1)
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_H		0
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+#if defined __ARMEB__
+# define __BYTE_ORDER __BIG_ENDIAN
+#else
+# define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
+#ifdef __ARM_EABI__
+/* Rename functions to their EABI names.  */
+/* The comparison functions need wrappers for EABI semantics, so
+   leave them unmolested.  */
+#define __negsf2	__aeabi_fneg
+#define __subsf3	__aeabi_fsub
+#define __addsf3	__aeabi_fadd
+#define __floatunsisf	__aeabi_ui2f
+#define __floatsisf	__aeabi_i2f
+#define __floatundisf	__aeabi_ul2f
+#define __floatdisf	__aeabi_l2f
+#define __mulsf3	__aeabi_fmul
+#define __divsf3	__aeabi_fdiv
+#define __unordsf2	__aeabi_fcmpun
+#define __fixsfsi	__aeabi_f2iz
+#define __fixunssfsi	__aeabi_f2uiz
+#define __fixsfdi	__aeabi_f2lz
+#define __fixunssfdi	__aeabi_f2ulz
+#define __floatdisf	__aeabi_l2f
+
+#define __negdf2	__aeabi_dneg
+#define __subdf3	__aeabi_dsub
+#define __adddf3	__aeabi_dadd
+#define __floatunsidf	__aeabi_ui2d
+#define __floatsidf	__aeabi_i2d
+#define __extendsfdf2	__aeabi_f2d
+#define __truncdfsf2	__aeabi_d2f
+#define __floatundidf	__aeabi_ul2d
+#define __floatdidf	__aeabi_l2d
+#define __muldf3	__aeabi_dmul
+#define __divdf3	__aeabi_ddiv
+#define __unorddf2	__aeabi_dcmpun
+#define __fixdfsi	__aeabi_d2iz
+#define __fixunsdfsi	__aeabi_d2uiz
+#define __fixdfdi	__aeabi_d2lz
+#define __fixunsdfdi	__aeabi_d2ulz
+#define __floatdidf	__aeabi_l2d
+#define __extendhfsf2	__gnu_h2f_ieee
+#define __truncsfhf2	__gnu_f2h_ieee
+
+#endif /* __ARM_EABI__ */
diff --git a/gcc/config/arm/symbian.h b/gcc/config/arm/symbian.h
new file mode 100644
index 000000000..ff233a89f
--- /dev/null
+++ b/gcc/config/arm/symbian.h
@@ -0,0 +1,105 @@
+/* Configuration file for Symbian OS on ARM processors.
+   Copyright (C) 2004, 2005, 2007, 2008
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC   
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Do not expand builtin functions (unless explicitly prefixed with
+   "__builtin").  Symbian OS code relies on properties of the standard
+   library that go beyond those guaranteed by the ANSI/ISO standard.
+   For example, "memcpy" works even with overlapping memory, like
+   "memmove".  We cannot simply set flag_no_builtin in arm.c because
+   (a) flag_no_builtin is not declared in language-independent code,
+   and (b) that would prevent users from explicitly overriding the
+   default with -fbuiltin, which may sometimes be useful.
+
+   Make all symbols hidden by default.  Symbian OS expects that all
+   exported symbols will be explicitly marked with
+   "__declspec(dllexport)".  
+
+   Enumeration types use 4 bytes, even if the enumerals are small,
+   unless explicitly overridden.
+
+   The wchar_t type is a 2-byte type, unless explicitly
+   overridden.  */
+#define CC1_SPEC						\
+  "%{!fbuiltin:%{!fno-builtin:-fno-builtin}} "			\
+  "%{!fvisibility=*:-fvisibility=hidden} "			\
+  "%{!fshort-enums:%{!fno-short-enums:-fno-short-enums}} "	\
+  "%{!fshort-wchar:%{!fno-short-wchar:-fshort-wchar}} "
+#define CC1PLUS_SPEC CC1_SPEC
+
+/* Symbian OS does not use crt*.o, unlike the generic unknown-elf
+   configuration.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+
+/* Do not link with any libraries by default.  On Symbian OS, the user
+   must supply all required libraries on the command line.  */
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+/* Support the "dllimport" attribute.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+/* Symbian OS assumes ARM V5 or above.  Since -march=armv5 is
+   equivalent to making the ARM 10TDMI core the default, we can set
+   SUBTARGET_CPU_DEFAULT and get an equivalent effect.  */
+#undef SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi
+
+/* The assembler should assume VFP FPU format, and armv5t.  */
+#undef SUBTARGET_ASM_FLOAT_SPEC
+#define SUBTARGET_ASM_FLOAT_SPEC \
+  "%{!mfpu=*:-mfpu=vfp} %{!mcpu=*:%{!march=*:-march=armv5t}}"
+  
+/* SymbianOS provides the BPABI routines in a separate library.
+   Therefore, we do not need to define any of them in libgcc.  */
+#undef RENAME_LIBRARY
+#define RENAME_LIBRARY(GCC_NAME, AEABI_NAME) /* empty */
+
+/* Define the __symbian__ macro.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+      /* Include the default BPABI stuff.  */			\
+      TARGET_BPABI_CPP_BUILTINS ();				\
+      /* Symbian OS does not support merging symbols across DLL	\
+	 boundaries.  */					\
+      builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0");		\
+      builtin_define ("__symbian__");				\
+    }								\
+  while (false)
+
+/* On SymbianOS, these sections are not writable, so we use "a",
+   rather than "aw", for the section attributes.  */
+#undef ARM_EABI_CTORS_SECTION_OP
+#define ARM_EABI_CTORS_SECTION_OP \
+  "\t.section\t.init_array,\"a\",%init_array"
+#undef ARM_EABI_DTORS_SECTION_OP
+#define ARM_EABI_DTORS_SECTION_OP \
+  "\t.section\t.fini_array,\"a\",%fini_array"
+
+/* SymbianOS cannot merge entities with vague linkage at runtime.  */
+#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
new file mode 100644
index 000000000..689a235c1
--- /dev/null
+++ b/gcc/config/arm/sync.md
@@ -0,0 +1,602 @@
+;; Machine description for ARM processor synchronization primitives.
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; ARMV6 introduced ldrex and strex instruction. These instruction
+;; access SI width data. In order to implement synchronization
+;; primitives for the narrower QI and HI modes we insert appropriate
+;; AND/OR sequences into the synchronization loop to mask out the
+;; relevant component of an SI access.
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_HAVE_MEMORY_BARRIER"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_expand "sync_compare_and_swapsi"
+  [(set (match_operand:SI 0 "s_register_operand")
+        (unspec_volatile:SI [(match_operand:SI 1 "memory_operand")
+			     (match_operand:SI 2 "s_register_operand")
+			     (match_operand:SI 3 "s_register_operand")]
+			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omrn;
+    generator.u.omrn = gen_arm_sync_compare_and_swapsi;
+    arm_expand_sync (SImode, &generator, operands[0], operands[1], operands[2],
+                     operands[3]);
+    DONE;
+  })
+
+(define_mode_iterator NARROW [QI HI])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(set (match_operand:NARROW 0 "s_register_operand")
+        (unspec_volatile:NARROW [(match_operand:NARROW 1 "memory_operand")
+			     (match_operand:NARROW 2 "s_register_operand")
+			     (match_operand:NARROW 3 "s_register_operand")]
+			     VUNSPEC_SYNC_COMPARE_AND_SWAP))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omrn;
+    generator.u.omrn = gen_arm_sync_compare_and_swap<mode>;
+    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+                     operands[2], operands[3]);
+    DONE;
+  })
+
+(define_expand "sync_lock_test_and_setsi"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "memory_operand")
+   (match_operand:SI 2 "s_register_operand")]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_lock_test_and_setsi;
+    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+                     operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:NARROW 0 "s_register_operand")
+   (match_operand:NARROW 1 "memory_operand")
+   (match_operand:NARROW 2 "s_register_operand")]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_lock_test_and_set<mode>;
+    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1], NULL,
+                     operands[2]);
+    DONE;
+  })
+
+(define_code_iterator syncop [plus minus ior xor and])
+
+(define_code_attr sync_optab [(ior "ior")
+			      (xor "xor")
+			      (and "and")
+			      (plus "add")
+			      (minus "sub")])
+
+(define_code_attr sync_clobber [(ior "=&r")
+				(and "=&r")
+				(xor "X")
+				(plus "X")
+				(minus "X")])
+
+(define_code_attr sync_t2_reqd [(ior "4")
+				(and "4")
+				(xor "*")
+				(plus "*")
+				(minus "*")])
+
+(define_expand "sync_<sync_optab>si"
+  [(match_operand:SI 0 "memory_operand")
+   (match_operand:SI 1 "s_register_operand")
+   (syncop:SI (match_dup 0) (match_dup 1))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_<sync_optab>si;
+    arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
+    DONE;
+  })
+
+(define_expand "sync_nandsi"
+  [(match_operand:SI 0 "memory_operand")
+   (match_operand:SI 1 "s_register_operand")
+   (not:SI (and:SI (match_dup 0) (match_dup 1)))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_nandsi;
+    arm_expand_sync (SImode, &generator, NULL, operands[0], NULL, operands[1]);
+    DONE;
+  })
+
+(define_expand "sync_<sync_optab><mode>"
+  [(match_operand:NARROW 0 "memory_operand")
+   (match_operand:NARROW 1 "s_register_operand")
+   (syncop:NARROW (match_dup 0) (match_dup 1))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
+    arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
+    		     operands[1]);
+    DONE;
+  })
+
+(define_expand "sync_nand<mode>"
+  [(match_operand:NARROW 0 "memory_operand")
+   (match_operand:NARROW 1 "s_register_operand")
+   (not:NARROW (and:NARROW (match_dup 0) (match_dup 1)))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_nand<mode>;
+    arm_expand_sync (<MODE>mode, &generator, NULL, operands[0], NULL,
+                     operands[1]);
+    DONE;
+  })
+
+(define_expand "sync_new_<sync_optab>si"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "memory_operand")
+   (match_operand:SI 2 "s_register_operand")
+   (syncop:SI (match_dup 1) (match_dup 2))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_<sync_optab>si;
+    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+                     operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_new_nandsi"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "memory_operand")
+   (match_operand:SI 2 "s_register_operand")
+   (not:SI (and:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_nandsi;
+    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+    		     operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_new_<sync_optab><mode>"
+  [(match_operand:NARROW 0 "s_register_operand")
+   (match_operand:NARROW 1 "memory_operand")
+   (match_operand:NARROW 2 "s_register_operand")
+   (syncop:NARROW (match_dup 1) (match_dup 2))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_<sync_optab><mode>;
+    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+    		     NULL, operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_new_nand<mode>"
+  [(match_operand:NARROW 0 "s_register_operand")
+   (match_operand:NARROW 1 "memory_operand")
+   (match_operand:NARROW 2 "s_register_operand")
+   (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_new_nand<mode>;
+    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+    		     NULL, operands[2]);
+    DONE;
+  });
+
+(define_expand "sync_old_<sync_optab>si"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "memory_operand")
+   (match_operand:SI 2 "s_register_operand")
+   (syncop:SI (match_dup 1) (match_dup 2))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_old_<sync_optab>si;
+    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+                     operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_old_nandsi"
+  [(match_operand:SI 0 "s_register_operand")
+   (match_operand:SI 1 "memory_operand")
+   (match_operand:SI 2 "s_register_operand")
+   (not:SI (and:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_old_nandsi;
+    arm_expand_sync (SImode, &generator, operands[0], operands[1], NULL,
+                     operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_old_<sync_optab><mode>"
+  [(match_operand:NARROW 0 "s_register_operand")
+   (match_operand:NARROW 1 "memory_operand")
+   (match_operand:NARROW 2 "s_register_operand")
+   (syncop:NARROW (match_dup 1) (match_dup 2))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_old_<sync_optab><mode>;
+    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+    		     NULL, operands[2]);
+    DONE;
+  })
+
+(define_expand "sync_old_nand<mode>"
+  [(match_operand:NARROW 0 "s_register_operand")
+   (match_operand:NARROW 1 "memory_operand")
+   (match_operand:NARROW 2 "s_register_operand")
+   (not:NARROW (and:NARROW (match_dup 1) (match_dup 2)))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    struct arm_sync_generator generator;
+    generator.op = arm_sync_generator_omn;
+    generator.u.omn = gen_arm_sync_old_nand<mode>;
+    arm_expand_sync (<MODE>mode, &generator, operands[0], operands[1],
+                     NULL, operands[2]);
+    DONE;
+  })
+
+(define_insn "arm_sync_compare_and_swapsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI
+	  [(match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+   	   (match_operand:SI 2 "s_register_operand" "r")
+	   (match_operand:SI 3 "s_register_operand" "r")]
+	  VUNSPEC_SYNC_COMPARE_AND_SWAP))
+   (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)]
+                                          VUNSPEC_SYNC_COMPARE_AND_SWAP))
+   (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
+                                                VUNSPEC_SYNC_COMPARE_AND_SWAP))
+   ]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_required_value"  "2")
+   (set_attr "sync_new_value"       "3")
+   (set_attr "sync_t1"              "0")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_compare_and_swap<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (zero_extend:SI
+	  (unspec_volatile:NARROW
+	    [(match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")
+   	     (match_operand:SI 2 "s_register_operand" "r")
+	     (match_operand:SI 3 "s_register_operand" "r")]
+	    VUNSPEC_SYNC_COMPARE_AND_SWAP)))
+   (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)]
+                                          VUNSPEC_SYNC_COMPARE_AND_SWAP))
+   (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)]
+                                                VUNSPEC_SYNC_COMPARE_AND_SWAP))
+   ]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_required_value"  "2")
+   (set_attr "sync_new_value"       "3")
+   (set_attr "sync_t1"              "0")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_lock_test_and_setsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (match_operand:SI 1 "arm_sync_memory_operand" "+Q"))
+   (set (match_dup 1)
+        (unspec_volatile:SI [(match_operand:SI 2 "s_register_operand" "r")]
+	                    VUNSPEC_SYNC_LOCK))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_release_barrier" "no")
+   (set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "0")
+   (set_attr "sync_t2"              "3")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_lock_test_and_set<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (zero_extend:SI (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q")))
+   (set (match_dup 1)
+        (unspec_volatile:NARROW [(match_operand:SI 2 "s_register_operand" "r")]
+	                        VUNSPEC_SYNC_LOCK))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_release_barrier" "no")
+   (set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "0")
+   (set_attr "sync_t2"              "3")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_new_<sync_optab>si"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(syncop:SI
+                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+                               (match_operand:SI 2 "s_register_operand" "r"))
+	                    ]
+	                    VUNSPEC_SYNC_NEW_OP))
+   (set (match_dup 1)
+        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+	                    VUNSPEC_SYNC_NEW_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "0")
+   (set_attr "sync_t2"              "3")
+   (set_attr "sync_op"              "<sync_optab>")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_new_nandsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(not:SI (and:SI
+                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+                               (match_operand:SI 2 "s_register_operand" "r")))
+	                    ]
+	                    VUNSPEC_SYNC_NEW_OP))
+   (set (match_dup 1)
+        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+	                    VUNSPEC_SYNC_NEW_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "0")
+   (set_attr "sync_t2"              "3")
+   (set_attr "sync_op"              "nand")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_new_<sync_optab><mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(syncop:SI
+                               (zero_extend:SI
+			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+                               (match_operand:SI 2 "s_register_operand" "r"))
+	                    ]
+	                    VUNSPEC_SYNC_NEW_OP))
+   (set (match_dup 1)
+        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+	                        VUNSPEC_SYNC_NEW_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "0")
+   (set_attr "sync_t2"              "3")
+   (set_attr "sync_op"              "<sync_optab>")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_new_nand<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI
+	  [(not:SI
+	     (and:SI
+               (zero_extend:SI	  
+	         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+               (match_operand:SI 2 "s_register_operand" "r")))
+	  ] VUNSPEC_SYNC_NEW_OP))
+   (set (match_dup 1)
+        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+	                        VUNSPEC_SYNC_NEW_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "0")
+   (set_attr "sync_t2"              "3")
+   (set_attr "sync_op"              "nand")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_old_<sync_optab>si"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(syncop:SI
+                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+                               (match_operand:SI 2 "s_register_operand" "r"))
+	                    ]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (set (match_dup 1)
+        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "3")
+   (set_attr "sync_t2"              "<sync_t2_reqd>")
+   (set_attr "sync_op"              "<sync_optab>")
+   (set_attr "conds" "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_old_nandsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(not:SI (and:SI
+                               (match_operand:SI 1 "arm_sync_memory_operand" "+Q")
+                               (match_operand:SI 2 "s_register_operand" "r")))
+	                    ]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (set (match_dup 1)
+        (unspec_volatile:SI [(match_dup 1) (match_dup 2)]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "3")
+   (set_attr "sync_t2"              "4")
+   (set_attr "sync_op"              "nand")
+   (set_attr "conds" 		    "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_old_<sync_optab><mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(syncop:SI
+                               (zero_extend:SI
+			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+                               (match_operand:SI 2 "s_register_operand" "r"))
+	                    ]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (set (match_dup 1)
+        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "<sync_clobber>"))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "3")
+   (set_attr "sync_t2"              "<sync_t2_reqd>")
+   (set_attr "sync_op"              "<sync_optab>")
+   (set_attr "conds" 		    "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "arm_sync_old_nand<mode>"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")
+        (unspec_volatile:SI [(not:SI (and:SI
+                               (zero_extend:SI
+			         (match_operand:NARROW 1 "arm_sync_memory_operand" "+Q"))
+                               (match_operand:SI 2 "s_register_operand" "r")))
+	                    ]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (set (match_dup 1)
+        (unspec_volatile:NARROW [(match_dup 1) (match_dup 2)]
+	                    VUNSPEC_SYNC_OLD_OP))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_sync_insn (insn, operands);
+  } 
+  [(set_attr "sync_result"          "0")
+   (set_attr "sync_memory"          "1")
+   (set_attr "sync_new_value"       "2")
+   (set_attr "sync_t1"              "3")
+   (set_attr "sync_t2"              "4")
+   (set_attr "sync_op"              "nand")
+   (set_attr "conds"                "clob")
+   (set_attr "predicable" "no")])
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_HAVE_MEMORY_BARRIER"
+  {
+    return arm_output_memory_barrier (operands);
+  }
+  [(set_attr "length" "4")
+   (set_attr "conds" "unconditional")
+   (set_attr "predicable" "no")])
+
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
new file mode 100644
index 000000000..33d7e19f7
--- /dev/null
+++ b/gcc/config/arm/t-arm
@@ -0,0 +1,66 @@
+# Rules common to all arm targets
+#
+# Copyright (C) 2004, 2005, 2007, 2008, 2009, 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MD_INCLUDES= 	$(srcdir)/config/arm/arm-tune.md \
+		$(srcdir)/config/arm/predicates.md \
+		$(srcdir)/config/arm/arm-generic.md \
+		$(srcdir)/config/arm/arm1020e.md \
+		$(srcdir)/config/arm/arm1026ejs.md \
+		$(srcdir)/config/arm/arm1136jfs.md \
+		$(srcdir)/config/arm/fa526.md \
+		$(srcdir)/config/arm/fa606te.md \
+		$(srcdir)/config/arm/fa626te.md \
+		$(srcdir)/config/arm/fmp626.md \
+		$(srcdir)/config/arm/fa726te.md \
+		$(srcdir)/config/arm/arm926ejs.md \
+		$(srcdir)/config/arm/cirrus.md \
+		$(srcdir)/config/arm/fpa.md \
+		$(srcdir)/config/arm/vec-common.md \
+		$(srcdir)/config/arm/iwmmxt.md \
+		$(srcdir)/config/arm/vfp.md \
+		$(srcdir)/config/arm/neon.md \
+		$(srcdir)/config/arm/thumb2.md
+
+LIB1ASMSRC = arm/lib1funcs.asm
+LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
+	_thumb1_case_uhi _thumb1_case_si
+s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
+	s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES)
+
+$(srcdir)/config/arm/arm-tune.md: $(srcdir)/config/arm/gentune.sh \
+	$(srcdir)/config/arm/arm-cores.def
+	$(SHELL) $(srcdir)/config/arm/gentune.sh \
+		$(srcdir)/config/arm/arm-cores.def > \
+		$(srcdir)/config/arm/arm-tune.md
+
+arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+  insn-config.h conditions.h output.h \
+  $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
+  $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
+  $(GGC_H) except.h $(C_PRAGMA_H) $(INTEGRATE_H) $(TM_P_H) \
+  $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \
+  intl.h libfuncs.h $(PARAMS_H)
+
+arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/arm-c.c
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf
new file mode 100644
index 000000000..38c291827
--- /dev/null
+++ b/gcc/config/arm/t-arm-elf
@@ -0,0 +1,128 @@
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+# 2008, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# For most CPUs we have an assembly soft-float implementations.
+# However this is not true for ARMv6M.  Here we want to use the soft-fp C
+# implementation.  The soft-fp code is only build for ARMv6M.  This pulls
+# in the asm implementation for other CPUs.
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \
+	_call_via_rX _interwork_call_via_rX \
+	_lshrdi3 _ashrdi3 _ashldi3 \
+	_arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \
+	_arm_fixdfsi _arm_fixunsdfsi \
+	_arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
+	_arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
+	_arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
+	_clzsi2 _clzdi2 
+
+MULTILIB_OPTIONS     = marm/mthumb
+MULTILIB_DIRNAMES    = arm thumb
+MULTILIB_EXCEPTIONS  = 
+MULTILIB_MATCHES     =
+
+#MULTILIB_OPTIONS     += mcpu=fa526/mcpu=fa626/mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
+#MULTILIB_DIRNAMES    += fa526 fa626 fa606te fa626te fmp626 fa726te
+#MULTILIB_EXCEPTIONS  += *mthumb*/*mcpu=fa526 *mthumb*/*mcpu=fa626
+
+#MULTILIB_OPTIONS      += march=armv7
+#MULTILIB_DIRNAMES     += thumb2
+#MULTILIB_EXCEPTIONS   += march=armv7* marm/*march=armv7*
+#MULTILIB_MATCHES      += march?armv7=march?armv7-a
+#MULTILIB_MATCHES      += march?armv7=march?armv7-r
+#MULTILIB_MATCHES      += march?armv7=march?armv7-m
+#MULTILIB_MATCHES      += march?armv7=mcpu?cortex-a8
+#MULTILIB_MATCHES      += march?armv7=mcpu?cortex-r4
+#MULTILIB_MATCHES      += march?armv7=mcpu?cortex-m3
+
+# Not quite true.  We can support hard-vfp calling in Thumb2, but how do we
+# express that here?  Also, we really need architecture v5e or later
+# (mcrr etc).
+MULTILIB_OPTIONS       += mfloat-abi=hard
+MULTILIB_DIRNAMES      += fpu
+MULTILIB_EXCEPTIONS    += *mthumb/*mfloat-abi=hard*
+#MULTILIB_EXCEPTIONS    += *mcpu=fa526/*mfloat-abi=hard*
+#MULTILIB_EXCEPTIONS    += *mcpu=fa626/*mfloat-abi=hard*
+
+# MULTILIB_OPTIONS    += mcpu=ep9312
+# MULTILIB_DIRNAMES   += ep9312
+# MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312*
+# 	
+# MULTILIB_OPTIONS     += mlittle-endian/mbig-endian
+# MULTILIB_DIRNAMES    += le be
+# MULTILIB_MATCHES     += mbig-endian=mbe mlittle-endian=mle
+# 
+# MULTILIB_OPTIONS    += mhard-float/msoft-float
+# MULTILIB_DIRNAMES   += fpu soft
+# MULTILIB_EXCEPTIONS += *mthumb/*mhard-float*
+# 
+# MULTILIB_OPTIONS    += mno-thumb-interwork/mthumb-interwork
+# MULTILIB_DIRNAMES   += normal interwork
+# 
+# MULTILIB_OPTIONS    += fno-leading-underscore/fleading-underscore
+# MULTILIB_DIRNAMES   += elf under
+# 
+# MULTILIB_OPTIONS    += mcpu=arm7
+# MULTILIB_DIRNAMES   += nofmult
+# MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=arm7*
+# # Note: the multilib_exceptions matches both -mthumb and
+# # -mthumb-interwork
+# #
+# # We have to match all the arm cpu variants which do not have the
+# # multiply instruction and treat them as if the user had specified
+# # -mcpu=arm7.  Note that in the following the ? is interpreted as
+# # an = for the purposes of matching command line options.
+# # FIXME: There ought to be a better way to do this.
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7d
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7di
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm70
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm700
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm700i
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm710
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm710c
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7100
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7500
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm7500fe
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm6
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm60
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm600
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm610
+# MULTILIB_MATCHES    += mcpu?arm7=mcpu?arm620
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+# If EXTRA_MULTILIB_PARTS is not defined above then define EXTRA_PARTS here
+# EXTRA_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# Currently there is a bug somewhere in GCC's alias analysis
+# or scheduling code that is breaking _fpmul_parts in fp-bit.c.
+# Disabling function inlining is a workaround for this problem.
+TARGET_LIBGCC2_CFLAGS = -fno-inline
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/arm/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/arm/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/arm/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/arm/crtn.asm
+
diff --git a/gcc/config/arm/t-arm-softfp b/gcc/config/arm/t-arm-softfp
new file mode 100644
index 000000000..f9cace97e
--- /dev/null
+++ b/gcc/config/arm/t-arm-softfp
@@ -0,0 +1,29 @@
+# Copyright (C) 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := arm/sfp-machine.h
+softfp_exclude_libgcc2 := y
+softfp_wrap_start := '\#ifdef __ARM_ARCH_6M__'
+softfp_wrap_end := '\#endif'
+
+# softfp seems to be missing a whole bunch of prototypes.
+TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi
new file mode 100644
index 000000000..61da9ec7b
--- /dev/null
+++ b/gcc/config/arm/t-bpabi
@@ -0,0 +1,36 @@
+# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Add the bpabi.S functions.
+LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod
+
+# Add the BPABI C functions.
+LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \
+		  $(srcdir)/config/arm/unaligned-funcs.c
+
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c
+
+UNWIND_H = $(srcdir)/config/arm/unwind-arm.h
+LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \
+  $(srcdir)/config/arm/libunwind.S \
+  $(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c
+LIB2ADDEHDEP = $(UNWIND_H) $(srcdir)/config/$(LIB1ASMSRC)
+
+# Add the BPABI names.
+SHLIB_MAPFILES += $(srcdir)/config/arm/libgcc-bpabi.ver
+
diff --git a/gcc/config/arm/t-linux b/gcc/config/arm/t-linux
new file mode 100644
index 000000000..a6fddad50
--- /dev/null
+++ b/gcc/config/arm/t-linux
@@ -0,0 +1,34 @@
+# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
+# 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Just for these, we omit the frame pointer since it makes such a big
+# difference.
+TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC
+
+LIB1ASMSRC = arm/lib1funcs.asm
+LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \
+	_arm_addsubdf3 _arm_addsubsf3
+
+# MULTILIB_OPTIONS = mhard-float/msoft-float
+# MULTILIB_DIRNAMES = hard-float soft-float
+
+# EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+
+# LIBGCC = stmp-multilib
+# INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/arm/t-linux-androideabi b/gcc/config/arm/t-linux-androideabi
new file mode 100644
index 000000000..8f1307c55
--- /dev/null
+++ b/gcc/config/arm/t-linux-androideabi
@@ -0,0 +1,10 @@
+MULTILIB_OPTIONS     = march=armv7-a mthumb
+MULTILIB_DIRNAMES    = armv7-a thumb
+MULTILIB_EXCEPTIONS  =
+MULTILIB_MATCHES     =
+MULTILIB_OSDIRNAMES  =
+
+# The "special" multilib can be used to build native applications for Android,
+# as opposed to native shared libraries that are then called via JNI.
+#MULTILIB_OPTIONS    += tno-android-cc
+#MULTILIB_DIRNAMES   += special
diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi
new file mode 100644
index 000000000..39de9aefe
--- /dev/null
+++ b/gcc/config/arm/t-linux-eabi
@@ -0,0 +1,43 @@
+# Copyright (C) 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# These functions are included in shared libraries.
+TARGET_LIBGCC2_CFLAGS = -fPIC
+
+# We do not build a Thumb multilib for Linux because the definition of
+# CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode.
+MULTILIB_OPTIONS	=
+MULTILIB_DIRNAMES	=
+
+#MULTILIB_OPTIONS     += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
+#MULTILIB_DIRNAMES    += fa606te fa626te fmp626 fa726te
+#MULTILIB_EXCEPTIONS  += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te*
+
+ifneq (,$(findstring gnueabi,$(target)))
+ARM_EB = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),eb)
+MULTIARCH_DIRNAME = $(call if_multiarch,arm$(ARM_EB)-linux-gnueabi$(if $(filter hard,$(with_float)),hf))
+endif
+
+# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
+LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
+
+# Multilib the standard Linux files.  Don't include crti.o or crtn.o,
+# which are provided by glibc.
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+
+LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c
diff --git a/gcc/config/arm/t-netbsd b/gcc/config/arm/t-netbsd
new file mode 100644
index 000000000..22bbbe7dd
--- /dev/null
+++ b/gcc/config/arm/t-netbsd
@@ -0,0 +1,47 @@
+# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+# 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Just for these, we omit the frame pointer since it makes such a big
+# difference.  It is then pointless adding debugging.
+TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fpic
+LIBGCC2_DEBUG_CFLAGS = -g0
+LIB2FUNCS_EXTRA = $(srcdir)/config/floatunsidf.c $(srcdir)/config/floatunsisf.c
+
+# Build a shared libgcc library.
+SHLIB_EXT = .so
+SHLIB_NAME = @shlib_base_name@.so
+SHLIB_SONAME = @shlib_base_name@.so.1
+SHLIB_OBJS = @shlib_objs@
+
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,-soname,$(SHLIB_SONAME) \
+	-o $(SHLIB_NAME).tmp @multilib_flags@ $(SHLIB_OBJS) -lc && \
+	rm -f $(SHLIB_SONAME) && \
+	if [ -f $(SHLIB_NAME) ]; then \
+	  mv -f $(SHLIB_NAME) $(SHLIB_NAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_NAME).tmp $(SHLIB_NAME) && \
+	$(LN_S) $(SHLIB_NAME) $(SHLIB_SONAME)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir); \
+	$(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \
+	$(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME)
diff --git a/gcc/config/arm/t-pe b/gcc/config/arm/t-pe
new file mode 100644
index 000000000..626b1d29a
--- /dev/null
+++ b/gcc/config/arm/t-pe
@@ -0,0 +1,52 @@
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009,
+# 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifndef __ARMEB__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifndef __ARMEB__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/pe.c
+
+MULTILIB_OPTIONS = mhard-float mthumb
+MULTILIB_DIRNAMES = fpu thumb
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+TARGET_LIBGCC2_CFLAGS = 
diff --git a/gcc/config/arm/t-rtems b/gcc/config/arm/t-rtems
new file mode 100644
index 000000000..52d14bab0
--- /dev/null
+++ b/gcc/config/arm/t-rtems
@@ -0,0 +1,10 @@
+# Custom rtems multilibs
+
+MULTILIB_OPTIONS     = marm/mthumb
+MULTILIB_DIRNAMES    = arm thumb
+MULTILIB_EXCEPTIONS  = 
+MULTILIB_MATCHES     = marm=mno-thumb
+
+MULTILIB_OPTIONS    += msoft-float/mhard-float
+MULTILIB_DIRNAMES   += soft fpu
+MULTILIB_EXCEPTIONS += *mthumb/*mhard-float*
diff --git a/gcc/config/arm/t-rtems-eabi b/gcc/config/arm/t-rtems-eabi
new file mode 100644
index 000000000..f0e714a9b
--- /dev/null
+++ b/gcc/config/arm/t-rtems-eabi
@@ -0,0 +1,8 @@
+# Custom RTEMS EABI multilibs
+
+MULTILIB_OPTIONS    = mthumb march=armv6-m/march=armv7/march=armv7-m
+MULTILIB_DIRNAMES   = thumb armv6-m armv7 armv7-m
+MULTILIB_EXCEPTIONS = march=armv6-m march=armv7 march=armv7-m
+MULTILIB_MATCHES    =
+MULTILIB_EXCLUSIONS =
+MULTILIB_OSDIRNAMES =
diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf
new file mode 100644
index 000000000..64d7ca694
--- /dev/null
+++ b/gcc/config/arm/t-strongarm-elf
@@ -0,0 +1,61 @@
+# Copyright (C) 2000, 2001, 2006, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifndef __ARMEB__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifndef __ARMEB__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+MULTILIB_OPTIONS     = mlittle-endian/mbig-endian mhard-float/msoft-float
+MULTILIB_DIRNAMES    = le be fpu soft
+MULTILIB_EXCEPTIONS  =
+MULTILIB_MATCHES     = mbig-endian=mbe mlittle-endian=mle
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# Currently there is a bug somewhere in GCC's alias analysis
+# or scheduling code that is breaking _fpmul_parts in fp-bit.c.
+# Disabling function inlining is a workaround for this problem.
+TARGET_LIBGCC2_CFLAGS = -fno-inline
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/arm/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/arm/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/arm/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/arm/crtn.asm
diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian
new file mode 100644
index 000000000..4a1476f67
--- /dev/null
+++ b/gcc/config/arm/t-symbian
@@ -0,0 +1,53 @@
+# Copyright (C) 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# These functions have __aeabi equivalents and will never be called by GCC.  
+# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being
+# used -- and we make sure that definitions are not available in lib1funcs.asm,
+# either, so they end up undefined.
+LIB1ASMFUNCS += \
+	_ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \
+	_udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \
+	_fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
+	_negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \
+	_truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
+	_fixsfsi _fixunssfsi
+
+# Include the gcc personality routine
+UNWIND_H = $(srcdir)/config/arm/unwind-arm.h
+LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c
+LIB2ADDEHDEP = $(UNWIND_H)
+
+# Include half-float helpers.
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c
+
+# Create a multilib for processors with VFP floating-point, and a
+# multilib for those without -- using the soft-float ABI in both
+# cases.  Symbian OS object should be compiled with interworking
+# enabled, so there are no separate thumb-mode libraries.
+MULTILIB_OPTIONS     = mfloat-abi=softfp
+MULTILIB_DIRNAMES    = softfp
+
+# There is no C library to link against on Symbian OS -- at least when 
+# building GCC.
+SHLIB_LC = 
+
+# Symbian OS provides its own startup code.
+EXTRA_MULTILIB_PARTS=
diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks
new file mode 100644
index 000000000..af01ac412
--- /dev/null
+++ b/gcc/config/arm/t-vxworks
@@ -0,0 +1,44 @@
+# Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifndef __ARMEB__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifndef __ARMEB__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+MULTILIB_OPTIONS = \
+  mrtp fPIC \
+  t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe
+MULTILIB_MATCHES = fPIC=fpic
+# Don't build -fPIC multilibs for kernel or Thumb code.
+MULTILIB_EXCEPTIONS = fPIC* mrtp/fPIC/*t[45]t*
diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe
new file mode 100644
index 000000000..165bef200
--- /dev/null
+++ b/gcc/config/arm/t-wince-pe
@@ -0,0 +1,56 @@
+# Copyright (C) 2003, 2004, 2006, 2008, 2009, 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifndef __ARMEB__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifndef __ARMEB__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#define FLOAT_WORD_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/arm/pe.c
+
+MULTILIB_OPTIONS = mhard-float
+MULTILIB_DIRNAMES = fpu
+# Note - Thumb multilib omitted because Thumb support for
+# arm-wince-pe target does not appear to be working in binutils
+# yet... 
+# MULTILIB_OPTIONS += thumb
+# MULTILIB_DIRNAMES += thumb
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+TARGET_LIBGCC2_CFLAGS = 
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
new file mode 100644
index 000000000..1b2fb2d44
--- /dev/null
+++ b/gcc/config/arm/thumb2.md
@@ -0,0 +1,1121 @@
+;; ARM Thumb-2 Machine Description
+;; Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc.
+;; Written by CodeSourcery, LLC.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; Note: Thumb-2 is the variant of the Thumb architecture that adds
+;; 32-bit encodings of [almost all of] the Arm instruction set.
+;; Some old documents refer to the relatively minor interworking
+;; changes made in armv5t as "thumb2".  These are considered part
+;; the 16-bit Thumb-1 instruction set.
+
+(define_insn "*thumb2_incscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (plus:SI (match_operator:SI 2 "arm_comparison_operator"
+                    [(match_operand:CC 3 "cc_register" "") (const_int 0)])
+                 (match_operand:SI 1 "s_register_operand" "0,?r")))]
+  "TARGET_THUMB2"
+  "@
+  it\\t%d2\;add%d2\\t%0, %1, #1
+  ite\\t%D2\;mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "6,10")]
+)
+
+(define_insn "*thumb2_decscc"
+  [(set (match_operand:SI            0 "s_register_operand" "=r,r")
+        (minus:SI (match_operand:SI  1 "s_register_operand" "0,?r")
+		  (match_operator:SI 2 "arm_comparison_operator"
+                   [(match_operand   3 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2"
+  "@
+   it\\t%d2\;sub%d2\\t%0, %1, #1
+   ite\\t%D2\;mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "6,10")]
+)
+
+;; Thumb-2 only allows shift by constant on data processing instructions 
+(define_insn "*thumb_andsi_not_shiftsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (not:SI (match_operator:SI 4 "shift_operator"
+			 [(match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 3 "const_int_operand" "M")]))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "bic%?\\t%0, %1, %2%S4"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "2")
+   (set_attr "type" "alu_shift")]
+)
+
+(define_insn "*thumb2_smaxsi3"
+  [(set (match_operand:SI          0 "s_register_operand" "=r,r,r")
+	(smax:SI (match_operand:SI 1 "s_register_operand"  "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand"    "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "@
+   cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2
+   cmp\\t%1, %2\;it\\tge\;movge\\t%0, %1
+   cmp\\t%1, %2\;ite\\tge\;movge\\t%0, %1\;movlt\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,10,14")]
+)
+
+(define_insn "*thumb2_sminsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(smin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "@
+   cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2
+   cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %1
+   cmp\\t%1, %2\;ite\\tlt\;movlt\\t%0, %1\;movge\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,10,14")]
+)
+
+(define_insn "*thumb32_umaxsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "@
+   cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2
+   cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %1
+   cmp\\t%1, %2\;ite\\tcs\;movcs\\t%0, %1\;movcc\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,10,14")]
+)
+
+(define_insn "*thumb2_uminsi3"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r")
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "@
+   cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2
+   cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %1
+   cmp\\t%1, %2\;ite\\tcc\;movcc\\t%0, %1\;movcs\\t%0, %2"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,10,14")]
+)
+
+;; Thumb-2 does not have rsc, so use a clever trick with shifter operands.
+(define_insn "*thumb2_negdi2"
+  [(set (match_operand:DI         0 "s_register_operand" "=&r,r")
+	(neg:DI (match_operand:DI 1 "s_register_operand"  "?r,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*thumb2_abssi2"
+  [(set (match_operand:SI         0 "s_register_operand" "=r,&r")
+	(abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "@
+   cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0
+   eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
+  [(set_attr "conds" "clob,*")
+   (set_attr "shift" "1")
+   ;; predicable can't be set based on the variant, so left as no
+   (set_attr "length" "10,8")]
+)
+
+(define_insn "*thumb2_neg_abssi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
+	(neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "@
+   cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0
+   eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
+  [(set_attr "conds" "clob,*")
+   (set_attr "shift" "1")
+   ;; predicable can't be set based on the variant, so left as no
+   (set_attr "length" "10,8")]
+)
+
+;; We have two alternatives here for memory loads (and similarly for stores)
+;; to reflect the fact that the permissible constant pool ranges differ
+;; between ldr instructions taking low regs and ldr instructions taking high
+;; regs.  The high register alternatives are not taken into account when
+;; choosing register preferences in order to reflect their expense.
+(define_insn "*thumb2_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m")
+	(match_operand:SI 1 "general_operand"	   "rk ,I,K,j,mi,*mi,l,*hk"))]
+  "TARGET_THUMB2 && ! TARGET_IWMMXT
+   && !(TARGET_HARD_FLOAT && TARGET_VFP)
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+   mov%?\\t%0, %1
+   mov%?\\t%0, %1
+   mvn%?\\t%0, #%B1
+   movw%?\\t%0, %1
+   ldr%?\\t%0, %1
+   ldr%?\\t%0, %1
+   str%?\\t%1, %0
+   str%?\\t%1, %0"
+  [(set_attr "type" "*,*,*,*,load1,load1,store1,store1")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,*,1020,4096,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")]
+)
+
+(define_insn "tls_load_dot_plus_four"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,r,r")
+	(mem:SI (unspec:SI [(match_operand:SI 2 "register_operand" "0,1,0,1")
+			    (const_int 4)
+			    (match_operand 3 "" "")]
+			   UNSPEC_PIC_BASE)))
+   (clobber (match_scratch:SI 1 "=X,l,X,r"))]
+  "TARGET_THUMB2"
+  "*
+  (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\",
+			     INTVAL (operands[3]));
+  return \"add\\t%2, %|pc\;ldr%?\\t%0, [%2]\";
+  "
+  [(set_attr "length" "4,4,6,6")]
+)
+
+;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot
+;; of the messiness associated with the ARM patterns.
+(define_insn "*thumb2_movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
+	(match_operand:HI 1 "general_operand"      "rI,n,r,m"))]
+  "TARGET_THUMB2"
+  "@
+   mov%?\\t%0, %1\\t%@ movhi
+   movw%?\\t%0, %L1\\t%@ movhi
+   str%(h%)\\t%1, %0\\t%@ movhi
+   ldr%(h%)\\t%0, %1\\t%@ movhi"
+  [(set_attr "type" "*,*,store1,load1")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,4096")
+   (set_attr "neg_pool_range" "*,*,*,250")]
+)
+
+(define_insn "*thumb2_cmpsi_neg_shiftsi"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:SI 0 "s_register_operand" "r")
+		    (neg:SI (match_operator:SI 3 "shift_operator"
+			     [(match_operand:SI 1 "s_register_operand" "r")
+			      (match_operand:SI 2 "const_int_operand" "M")]))))]
+  "TARGET_THUMB2"
+  "cmn%?\\t%0, %1%S3"
+  [(set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set_attr "type" "alu_shift")]
+)
+
+(define_insn "*thumb2_mov_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(match_operator:SI 1 "arm_comparison_operator"
+	 [(match_operand 2 "cc_register" "") (const_int 0)]))]
+  "TARGET_THUMB2"
+  "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "10")]
+)
+
+(define_insn "*thumb2_mov_negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2"
+  "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0"
+  [(set_attr "conds" "use")
+   (set_attr "length" "10")]
+)
+
+(define_insn "*thumb2_mov_notscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(not:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 2 "cc_register" "") (const_int 0)])))]
+  "TARGET_THUMB2"
+  "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "10")]
+)
+
+(define_insn "*thumb2_movsicc_insn"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 3 "arm_comparison_operator"
+	  [(match_operand 4 "cc_register" "") (const_int 0)])
+	 (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K")
+	 (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))]
+  "TARGET_THUMB2"
+  "@
+   it\\t%D3\;mov%D3\\t%0, %2
+   it\\t%D3\;mvn%D3\\t%0, #%B2
+   it\\t%d3\;mov%d3\\t%0, %1
+   it\\t%d3\;mvn%d3\\t%0, #%B1
+   ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+   ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+   ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+   ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
+  [(set_attr "length" "6,6,6,6,10,10,10,10")
+   (set_attr "conds" "use")]
+)
+
+(define_insn "*thumb2_movsfcc_soft_insn"
+  [(set (match_operand:SF 0 "s_register_operand" "=r,r")
+	(if_then_else:SF (match_operator 3 "arm_comparison_operator"
+			  [(match_operand 4 "cc_register" "") (const_int 0)])
+			 (match_operand:SF 1 "s_register_operand" "0,r")
+			 (match_operand:SF 2 "s_register_operand" "r,0")))]
+  "TARGET_THUMB2 && TARGET_SOFT_FLOAT"
+  "@
+   it\\t%D3\;mov%D3\\t%0, %2
+   it\\t%d3\;mov%d3\\t%0, %1"
+  [(set_attr "length" "6,6")
+   (set_attr "conds" "use")]
+)
+
+(define_insn "*call_reg_thumb2"
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB2"
+  "blx%?\\t%0"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*call_value_reg_thumb2"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))]
+  "TARGET_THUMB2"
+  "blx\\t%1"
+  [(set_attr "type" "call")]
+)
+
+(define_insn "*thumb2_indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "l*r"))]
+  "TARGET_THUMB2"
+  "bx\\t%0"
+  [(set_attr "conds" "clob")]
+)
+;; Don't define thumb2_load_indirect_jump because we can't guarantee label
+;; addresses will have the thumb bit set correctly. 
+
+
+(define_insn "*thumb2_and_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(and:SI (match_operator:SI 1 "arm_comparison_operator"
+		 [(match_operand 3 "cc_register" "") (const_int 0)])
+		(match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "10")]
+)
+
+(define_insn "*thumb2_ior_scc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(ior:SI (match_operator:SI 2 "arm_comparison_operator"
+		 [(match_operand 3 "cc_register" "") (const_int 0)])
+		(match_operand:SI 1 "s_register_operand" "0,?r")))]
+  "TARGET_THUMB2"
+  "@
+   it\\t%d2\;orr%d2\\t%0, %1, #1
+   ite\\t%D2\;mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1"
+  [(set_attr "conds" "use")
+   (set_attr "length" "6,10")]
+)
+
+(define_insn "*thumb2_cond_move"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI (match_operator 3 "equality_operator"
+			  [(match_operator 4 "arm_comparison_operator"
+			    [(match_operand 5 "cc_register" "") (const_int 0)])
+			   (const_int 0)])
+			 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+			 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))]
+  "TARGET_THUMB2"
+  "*
+    if (GET_CODE (operands[3]) == NE)
+      {
+        if (which_alternative != 1)
+	  output_asm_insn (\"it\\t%D4\;mov%D4\\t%0, %2\", operands);
+        if (which_alternative != 0)
+	  output_asm_insn (\"it\\t%d4\;mov%d4\\t%0, %1\", operands);
+        return \"\";
+      }
+    switch (which_alternative)
+      {
+      case 0:
+	output_asm_insn (\"it\\t%d4\", operands);
+	break;
+      case 1:
+	output_asm_insn (\"it\\t%D4\", operands);
+	break;
+      case 2:
+	output_asm_insn (\"ite\\t%D4\", operands);
+	break;
+      default:
+	abort();
+      }
+    if (which_alternative != 0)
+      output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+    if (which_alternative != 1)
+      output_asm_insn (\"mov%d4\\t%0, %2\", operands);
+    return \"\";
+  "
+  [(set_attr "conds" "use")
+   (set_attr "length" "6,6,10")]
+)
+
+(define_insn "*thumb2_cond_arith"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (match_operator:SI 5 "shiftable_operator" 
+	 [(match_operator:SI 4 "arm_comparison_operator"
+           [(match_operand:SI 2 "s_register_operand" "r,r")
+	    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])
+          (match_operand:SI 1 "s_register_operand" "0,?r")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+    if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx)
+      return \"%i5\\t%0, %1, %2, lsr #31\";
+
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (GET_CODE (operands[5]) == AND)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"mov%D4\\t%0, #0\", operands);
+      }
+    else if (GET_CODE (operands[5]) == MINUS)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands);
+      }
+    else if (which_alternative != 0)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+      }
+    else
+      output_asm_insn (\"it\\t%d4\", operands);
+    return \"%i5%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "14")]
+)
+
+(define_insn "*thumb2_cond_sub"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+        (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r")
+		  (match_operator:SI 4 "arm_comparison_operator"
+                   [(match_operand:SI 2 "s_register_operand" "r,r")
+		    (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+    output_asm_insn (\"cmp\\t%2, %3\", operands);
+    if (which_alternative != 0)
+      {
+	output_asm_insn (\"ite\\t%D4\", operands);
+	output_asm_insn (\"mov%D4\\t%0, %1\", operands);
+      }
+    else
+      output_asm_insn (\"it\\t%d4\", operands);
+    return \"sub%d4\\t%0, %1, #1\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,14")]
+)
+
+(define_insn "*thumb2_negscc"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(neg:SI (match_operator 3 "arm_comparison_operator"
+		 [(match_operand:SI 1 "s_register_operand" "r")
+		  (match_operand:SI 2 "arm_rhs_operand" "rI")])))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
+    return \"asr\\t%0, %1, #31\";
+
+  if (GET_CODE (operands[3]) == NE)
+    return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\";
+
+  output_asm_insn (\"cmp\\t%1, %2\", operands);
+  output_asm_insn (\"ite\\t%D3\", operands);
+  output_asm_insn (\"mov%D3\\t%0, #0\", operands);
+  return \"mvn%d3\\t%0, #0\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "14")]
+)
+
+(define_insn "*thumb2_movcond"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "arm_comparison_operator"
+	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
+	   (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")])
+	 (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI")
+	 (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (GET_CODE (operands[5]) == LT
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && GET_CODE (operands[1]) == REG)
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"and\\t%0, %1, %3, asr #31\";
+	  return \"ands\\t%0, %1, %3, asr #32\;it\\tcc\;movcc\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && GET_CODE (operands[2]) == REG)
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"bic\\t%0, %2, %3, asr #31\";
+	  return \"bics\\t%0, %2, %3, asr #32\;it\\tcs\;movcs\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+
+  if (GET_CODE (operands[5]) == GE
+      && (operands[4] == const0_rtx))
+    {
+      if (which_alternative != 1 && GET_CODE (operands[1]) == REG)
+	{
+	  if (operands[2] == const0_rtx)
+	    return \"bic\\t%0, %1, %3, asr #31\";
+	  return \"bics\\t%0, %1, %3, asr #32\;it\\tcs\;movcs\\t%0, %2\";
+	}
+      else if (which_alternative != 0 && GET_CODE (operands[2]) == REG)
+	{
+	  if (operands[1] == const0_rtx)
+	    return \"and\\t%0, %2, %3, asr #31\";
+	  return \"ands\\t%0, %2, %3, asr #32\;it\tcc\;movcc\\t%0, %1\";
+	}
+      /* The only case that falls through to here is when both ops 1 & 2
+	 are constants.  */
+    }
+  if (GET_CODE (operands[4]) == CONST_INT
+      && !const_ok_for_arm (INTVAL (operands[4])))
+    output_asm_insn (\"cmn\\t%3, #%n4\", operands);
+  else
+    output_asm_insn (\"cmp\\t%3, %4\", operands);
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"it\\t%D5\", operands);
+      break;
+    case 1:
+      output_asm_insn (\"it\\t%d5\", operands);
+      break;
+    case 2:
+      output_asm_insn (\"ite\\t%d5\", operands);
+      break;
+    default:
+      abort();
+    }
+  if (which_alternative != 0)
+    output_asm_insn (\"mov%d5\\t%0, %1\", operands);
+  if (which_alternative != 1)
+    output_asm_insn (\"mov%D5\\t%0, %2\", operands);
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set_attr "length" "10,10,14")]
+)
+
+;; Zero and sign extension instructions.
+
+;; All supported Thumb2 implementations are armv6, so only that case is
+;; provided.
+(define_insn "*thumb2_extendqisi_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_THUMB2 && arm_arch6"
+  "@
+   sxtb%?\\t%0, %1
+   ldr%(sb%)\\t%0, %1"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,4096")
+   (set_attr "neg_pool_range" "*,250")]
+)
+
+(define_insn "*thumb2_zero_extendhisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_THUMB2 && arm_arch6"
+  "@
+   uxth%?\\t%0, %1
+   ldr%(h%)\\t%0, %1"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,4096")
+   (set_attr "neg_pool_range" "*,250")]
+)
+
+(define_insn "thumb2_zero_extendqisi2_v6"
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_THUMB2 && arm_arch6"
+  "@
+   uxtb%(%)\\t%0, %1
+   ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
+  [(set_attr "type" "alu_shift,load_byte")
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,4096")
+   (set_attr "neg_pool_range" "*,250")]
+)
+
+(define_insn "thumb2_casesi_internal"
+  [(parallel [(set (pc)
+	       (if_then_else
+		(leu (match_operand:SI 0 "s_register_operand" "r")
+		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
+		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
+				 (label_ref (match_operand 2 "" ""))))
+		(label_ref (match_operand 3 "" ""))))
+	      (clobber (reg:CC CC_REGNUM))
+	      (clobber (match_scratch:SI 4 "=&r"))
+	      (use (label_ref (match_dup 2)))])]
+  "TARGET_THUMB2 && !flag_pic"
+  "* return thumb2_output_casesi(operands);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "16")]
+)
+
+(define_insn "thumb2_casesi_internal_pic"
+  [(parallel [(set (pc)
+	       (if_then_else
+		(leu (match_operand:SI 0 "s_register_operand" "r")
+		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
+		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
+				 (label_ref (match_operand 2 "" ""))))
+		(label_ref (match_operand 3 "" ""))))
+	      (clobber (reg:CC CC_REGNUM))
+	      (clobber (match_scratch:SI 4 "=&r"))
+	      (clobber (match_scratch:SI 5 "=r"))
+	      (use (label_ref (match_dup 2)))])]
+  "TARGET_THUMB2 && flag_pic"
+  "* return thumb2_output_casesi(operands);"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "20")]
+)
+
+;; Note: this is not predicable, to avoid issues with linker-generated
+;; interworking stubs.
+(define_insn "*thumb2_return"
+  [(return)]
+  "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+  "*
+  {
+    return output_return_instruction (const_true_rtx, TRUE, FALSE);
+  }"
+  [(set_attr "type" "load1")
+   (set_attr "length" "12")]
+)
+
+(define_insn_and_split "thumb2_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
+		    VUNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "TARGET_THUMB2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+  {
+    thumb_set_return_address (operands[0], operands[1]);
+    DONE;
+  }"
+)
+
+(define_insn "*thumb2_alusi3_short"
+  [(set (match_operand:SI          0 "s_register_operand" "=l")
+        (match_operator:SI 3 "thumb_16bit_operator"
+	 [(match_operand:SI 1 "s_register_operand" "0")
+	  (match_operand:SI 2 "s_register_operand" "l")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed
+   && GET_CODE(operands[3]) != PLUS
+   && GET_CODE(operands[3]) != MINUS"
+  "%I3%!\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")]
+)
+
+;; Similarly for 16-bit shift instructions
+;; There is no 16-bit rotate by immediate instruction.
+(define_peephole2
+  [(set (match_operand:SI   0 "low_register_operand" "")
+	(match_operator:SI  3 "shift_operator"
+	 [(match_operand:SI 1 "low_register_operand" "")
+	  (match_operand:SI 2 "low_reg_or_int_operand" "")]))]
+  "TARGET_THUMB2
+   && peep2_regno_dead_p(0, CC_REGNUM)
+   && (CONST_INT_P (operands[2]) || operands[1] == operands[0])
+   && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT)
+       || REG_P(operands[2]))"
+  [(parallel
+    [(set (match_dup 0)
+	  (match_op_dup 3
+	   [(match_dup 1)
+	    (match_dup 2)]))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_shiftsi3_short"
+  [(set (match_operand:SI   0 "low_register_operand" "=l,l")
+	(match_operator:SI  3 "shift_operator"
+	 [(match_operand:SI 1 "low_register_operand"  "0,l")
+	  (match_operand:SI 2 "low_reg_or_int_operand" "l,M")]))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed
+   && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT)
+       || REG_P(operands[2]))"
+  "* return arm_output_shift(operands, 2);"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "1")
+   (set_attr "length" "2")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+		      (const_string "alu_shift")
+		      (const_string "alu_shift_reg")))]
+)
+
+;; 16-bit load immediate
+(define_peephole2
+  [(set (match_operand:QHSI 0 "low_register_operand" "")
+	(match_operand:QHSI 1 "const_int_operand" ""))]
+  "TARGET_THUMB2
+   && peep2_regno_dead_p(0, CC_REGNUM)
+   && (unsigned HOST_WIDE_INT) INTVAL(operands[1]) < 256"
+  [(parallel
+    [(set (match_dup 0)
+	  (match_dup 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_mov<mode>_shortim"
+  [(set (match_operand:QHSI 0 "low_register_operand" "=l")
+	(match_operand:QHSI 1 "const_int_operand" "I"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "mov%!\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")]
+)
+
+;; 16-bit add/sub immediate
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(plus:SI (match_operand:SI 1 "low_register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_regno_dead_p(0, CC_REGNUM)
+   && ((rtx_equal_p(operands[0], operands[1])
+	&& INTVAL(operands[2]) > -256 && INTVAL(operands[2]) < 256)
+       || (INTVAL(operands[2]) > -8 && INTVAL(operands[2]) < 8))"
+  [(parallel
+    [(set (match_dup 0)
+	  (plus:SI (match_dup 1)
+		   (match_dup 2)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_addsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l,l")
+	(plus:SI (match_operand:SI 1 "low_register_operand" "l,0")
+		 (match_operand:SI 2 "low_reg_or_int_operand" "lPt,Ps")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "*
+    HOST_WIDE_INT val;
+
+    if (GET_CODE (operands[2]) == CONST_INT)
+      val = INTVAL(operands[2]);
+    else
+      val = 0;
+
+    /* We prefer eg. subs rn, rn, #1 over adds rn, rn, #0xffffffff.  */
+    if (val < 0 && const_ok_for_arm(ARM_SIGN_EXTEND (-val)))
+      return \"sub%!\\t%0, %1, #%n2\";
+    else
+      return \"add%!\\t%0, %1, %2\";
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")]
+)
+
+(define_insn "divsi3"
+  [(set (match_operand:SI	  0 "s_register_operand" "=r")
+	(div:SI (match_operand:SI 1 "s_register_operand"  "r")
+		(match_operand:SI 2 "s_register_operand"  "r")))]
+  "TARGET_THUMB2 && arm_arch_hwdiv"
+  "sdiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "sdiv")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI	   0 "s_register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "s_register_operand"  "r")
+		 (match_operand:SI 2 "s_register_operand"  "r")))]
+  "TARGET_THUMB2 && arm_arch_hwdiv"
+  "udiv%?\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "insn" "udiv")]
+)
+
+(define_insn "*thumb2_subsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(minus:SI (match_operand:SI 1 "low_register_operand" "l")
+		  (match_operand:SI 2 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "sub%!\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")]
+)
+
+(define_peephole2
+  [(set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_reg_dead_p (1, operands[1])
+   && satisfies_constraint_Pw (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])]
+  "operands[3] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_peephole2
+  [(match_scratch:SI 3 "l")
+   (set (match_operand:CC 0 "cc_register" "")
+	(compare:CC (match_operand:SI 1 "low_register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" "")))]
+  "TARGET_THUMB2
+   && satisfies_constraint_Px (operands[2])"
+  [(parallel
+    [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2)))
+     (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])]
+  "operands[4] = GEN_INT (- INTVAL (operands[2]));"
+)
+
+(define_insn "*thumb2_addsi3_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	  (plus:SI (match_operand:SI 1 "s_register_operand" "l,  0, r")
+		   (match_operand:SI 2 "arm_add_operand"    "lPt,Ps,rIL"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "s_register_operand" "=l,l,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_THUMB2"
+  "*
+    HOST_WIDE_INT val;
+
+    if (GET_CODE (operands[2]) == CONST_INT)
+      val = INTVAL (operands[2]);
+    else
+      val = 0;
+
+    if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val)))
+      return \"subs\\t%0, %1, #%n2\";
+    else
+      return \"adds\\t%0, %1, %2\";
+  "
+  [(set_attr "conds" "set")
+   (set_attr "length" "2,2,4")]
+)
+
+(define_insn "*thumb2_addsi3_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+	(compare:CC_NOOV
+	  (plus:SI (match_operand:SI 0 "s_register_operand" "l,  r")
+		   (match_operand:SI 1 "arm_add_operand"    "lPv,rIL"))
+	  (const_int 0)))]
+  "TARGET_THUMB2"
+  "*
+    HOST_WIDE_INT val;
+
+    if (GET_CODE (operands[1]) == CONST_INT)
+      val = INTVAL (operands[1]);
+    else
+      val = 0;
+
+    if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val)))
+      return \"cmp\\t%0, #%n1\";
+    else
+      return \"cmn\\t%0, %1\";
+  "
+  [(set_attr "conds" "set")
+   (set_attr "length" "2,4")]
+)
+
+;; 16-bit encodings of "muls" and "mul<c>".  We only use these when
+;; optimizing for size since "muls" is slow on all known
+;; implementations and since "mul<c>" will be generated by
+;; "*arm_mulsi3_v6" anyhow.  The assembler will use a 16-bit encoding
+;; for "mul<c>" whenever possible anyhow.
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+        (mult:SI (match_operand:SI 1 "low_register_operand" "")
+                 (match_dup 0)))]
+  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
+  [(parallel
+    [(set (match_dup 0)
+           (mult:SI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+        (mult:SI (match_dup 0)
+	         (match_operand:SI 1 "low_register_operand" "")))]
+  "TARGET_THUMB2 && optimize_size && peep2_regno_dead_p (0, CC_REGNUM)"
+  [(parallel
+    [(set (match_dup 0)
+           (mult:SI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_mulsi_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+        (mult:SI (match_operand:SI 1 "low_register_operand" "%0")
+                 (match_operand:SI 2 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && optimize_size && reload_completed"
+  "mul%!\\t%0, %2, %0"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")
+   (set_attr "insn" "muls")])
+
+(define_insn "*thumb2_mulsi_short_compare0"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+         (mult:SI (match_operand:SI 1 "register_operand" "%0")
+	          (match_operand:SI 2 "register_operand" "l"))
+         (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=l")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_THUMB2 && optimize_size"
+  "muls\\t%0, %2, %0"
+  [(set_attr "length" "2")
+   (set_attr "insn" "muls")])
+
+(define_insn "*thumb2_mulsi_short_compare0_scratch"
+  [(set (reg:CC_NOOV CC_REGNUM)
+        (compare:CC_NOOV
+         (mult:SI (match_operand:SI 1 "register_operand" "%0")
+	          (match_operand:SI 2 "register_operand" "l"))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=l"))]
+  "TARGET_THUMB2 && optimize_size"
+  "muls\\t%0, %2, %0"
+  [(set_attr "length" "2")
+   (set_attr "insn" "muls")])
+
+(define_insn "*thumb2_cbz"
+  [(set (pc) (if_then_else
+	      (eq (match_operand:SI 0 "s_register_operand" "l,?r")
+		  (const_int 0))
+	      (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (get_attr_length (insn) == 2)
+    return \"cbz\\t%0, %l1\";
+  else
+    return \"cmp\\t%0, #0\;beq\\t%l1\";
+  "
+  [(set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 1) (pc)) (const_int 2))
+	         (le (minus (match_dup 1) (pc)) (const_int 128))
+	         (eq (symbol_ref ("which_alternative")) (const_int 0)))
+	    (const_int 2)
+	    (const_int 8)))]
+)
+
+(define_insn "*thumb2_cbnz"
+  [(set (pc) (if_then_else
+	      (ne (match_operand:SI 0 "s_register_operand" "l,?r")
+		  (const_int 0))
+	      (label_ref (match_operand 1 "" ""))
+	      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2"
+  "*
+  if (get_attr_length (insn) == 2)
+    return \"cbnz\\t%0, %l1\";
+  else
+    return \"cmp\\t%0, #0\;bne\\t%l1\";
+  "
+  [(set (attr "length") 
+        (if_then_else
+	    (and (ge (minus (match_dup 1) (pc)) (const_int 2))
+	         (le (minus (match_dup 1) (pc)) (const_int 128))
+	         (eq (symbol_ref ("which_alternative")) (const_int 0)))
+	    (const_int 2)
+	    (const_int 8)))]
+)
+
+;; 16-bit complement
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(not:SI (match_operand:SI 1 "low_register_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_regno_dead_p(0, CC_REGNUM)"
+  [(parallel
+    [(set (match_dup 0)
+	  (not:SI (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_one_cmplsi2_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(not:SI (match_operand:SI 1 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "mvn%!\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")]
+)
+
+;; 16-bit negate
+(define_peephole2
+  [(set (match_operand:SI 0 "low_register_operand" "")
+	(neg:SI (match_operand:SI 1 "low_register_operand" "")))]
+  "TARGET_THUMB2
+   && peep2_regno_dead_p(0, CC_REGNUM)"
+  [(parallel
+    [(set (match_dup 0)
+	  (neg:SI (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+)
+
+(define_insn "*thumb2_negsi2_short"
+  [(set (match_operand:SI 0 "low_register_operand" "=l")
+	(neg:SI (match_operand:SI 1 "low_register_operand" "l")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_THUMB2 && reload_completed"
+  "neg%!\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "2")]
+)
+
+(define_insn "*orsi_notsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "orn%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")]
+)
+
+(define_insn "*orsi_not_shiftsi_si"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(ior:SI (not:SI (match_operator:SI 4 "shift_operator"
+			 [(match_operand:SI 2 "s_register_operand" "r")
+			  (match_operand:SI 3 "const_int_operand" "M")]))
+		(match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_THUMB2"
+  "orn%?\\t%0, %1, %2%S4"
+  [(set_attr "predicable" "yes")
+   (set_attr "shift" "2")
+   (set_attr "type" "alu_shift")]
+)
+
+(define_peephole2
+  [(set (match_operand:CC_NOOV 0 "cc_register" "")
+	(compare:CC_NOOV (zero_extract:SI
+			  (match_operand:SI 1 "low_register_operand" "")
+			  (const_int 1)
+			  (match_operand:SI 2 "const_int_operand" ""))
+			 (const_int 0)))
+   (match_scratch:SI 3 "l")
+   (set (pc)
+	(if_then_else (match_operator:CC_NOOV 4 "equality_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))]
+  "TARGET_THUMB2
+   && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32)"
+  [(parallel [(set (match_dup 0)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (clobber (match_dup 3))])
+   (set (pc)
+	(if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)])
+		      (match_dup 5) (match_dup 6)))]
+  "
+  operands[2] = GEN_INT (31 - INTVAL (operands[2]));
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? LT : GE,
+				VOIDmode, operands[0], const0_rtx);
+  ")
+
+(define_peephole2
+  [(set (match_operand:CC_NOOV 0 "cc_register" "")
+	(compare:CC_NOOV (zero_extract:SI
+			  (match_operand:SI 1 "low_register_operand" "")
+			  (match_operand:SI 2 "const_int_operand" "")
+			  (const_int 0))
+			 (const_int 0)))
+   (match_scratch:SI 3 "l")
+   (set (pc)
+	(if_then_else (match_operator:CC_NOOV 4 "equality_operator"
+		       [(match_dup 0) (const_int 0)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))]
+  "TARGET_THUMB2
+   && (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 32)"
+  [(parallel [(set (match_dup 0)
+		   (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2))
+				    (const_int 0)))
+	      (clobber (match_dup 3))])
+   (set (pc)
+	(if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)])
+		      (match_dup 5) (match_dup 6)))]
+  "
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  ")
diff --git a/gcc/config/arm/uclinux-eabi.h b/gcc/config/arm/uclinux-eabi.h
new file mode 100644
index 000000000..4455288b8
--- /dev/null
+++ b/gcc/config/arm/uclinux-eabi.h
@@ -0,0 +1,66 @@
+/* Definitions for ARM EABI ucLinux
+   Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+   Contributed by Paul Brook <paul@codesourcery.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override settings that are different to the uclinux-elf or
+   bpabi defaults.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE | MASK_INTERWORK)
+
+/* On EABI GNU/Linux, we want both the BPABI builtins and the
+   GNU/Linux builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      TARGET_BPABI_CPP_BUILTINS();		\
+      builtin_define ("__uClinux__");		\
+      builtin_define ("__gnu_linux__");         \
+      builtin_define_std ("linux");             \
+      builtin_define_std ("unix");              \
+      builtin_assert ("system=linux");          \
+      builtin_assert ("system=unix");           \
+      builtin_assert ("system=posix");          \
+    }						\
+  while (false)
+
+#undef SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux_eabi -elf2flt" \
+  " --pic-veneer --target2=abs"
+
+/* We default to the "aapcs-linux" ABI so that enums are int-sized by
+   default.  */
+#undef ARM_DEFAULT_ABI
+#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("a1") = (unsigned long) (BEG);	\
+  register unsigned long _end __asm ("a2") = (unsigned long) (END);	\
+  register unsigned long _flg __asm ("a3") = 0;				\
+  register unsigned long _scno __asm ("r7") = 0xf0002;			\
+  __asm __volatile ("swi 0x0		@ sys_cacheflush"		\
+		    : "=r" (_beg)					\
+		    : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno));	\
+}
+
diff --git a/gcc/config/arm/uclinux-elf.h b/gcc/config/arm/uclinux-elf.h
new file mode 100644
index 000000000..50fd76580
--- /dev/null
+++ b/gcc/config/arm/uclinux-elf.h
@@ -0,0 +1,88 @@
+/* Definitions for ARM running ucLinux using ELF
+   Copyright (C) 1999, 2001, 2004, 2005, 2007, 2008
+   Free Software Foundation, Inc.
+   Contributed by Philip Blundell <pb@nexus.co.uk>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* We don't want a PLT.  */
+#undef  NEED_PLT_RELOC
+#define NEED_PLT_RELOC 0
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/ELF ucLinux)", stderr);
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE)
+
+/* NOTE: The remaining definitions in this file are needed because uclinux
+   does not use config/linux.h.  */
+
+/* Add GNU/Linux builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() 		\
+  do 						\
+    {						\
+      builtin_define ("__uClinux__");		\
+      builtin_define ("__gnu_linux__");         \
+      builtin_define_std ("linux");             \
+      builtin_define_std ("unix");              \
+      builtin_assert ("system=linux");          \
+      builtin_assert ("system=unix");           \
+      builtin_assert ("system=posix");          \
+    }						\
+  while (false)
+
+/* Do not assume anything about header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef SUBTARGET_EXTRA_LINK_SPEC
+#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux"
+
+/* Now we define the strings used to build the spec file.  */
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC	"crt1%O%s crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{profile:-p}"
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X -elf2flt"
+
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/unaligned-funcs.c b/gcc/config/arm/unaligned-funcs.c
new file mode 100644
index 000000000..4e684f4fc
--- /dev/null
+++ b/gcc/config/arm/unaligned-funcs.c
@@ -0,0 +1,57 @@
+/* EABI unaligned read/write functions.
+
+   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+int __aeabi_uread4 (void *);
+int __aeabi_uwrite4 (int, void *);
+long long __aeabi_uread8 (void *);
+long long __aeabi_uwrite8 (long long, void *);
+
+struct __attribute__((packed)) u4 { int data; };
+struct __attribute__((packed)) u8 { long long data; };
+
+int
+__aeabi_uread4 (void *ptr)
+{
+  return ((struct u4 *) ptr)->data;
+}
+
+int
+__aeabi_uwrite4 (int data, void *ptr)
+{
+  ((struct u4 *) ptr)->data = data;
+  return data;
+}
+
+long long
+__aeabi_uread8 (void *ptr)
+{
+  return ((struct u8 *) ptr)->data;
+}
+
+long long
+__aeabi_uwrite8 (long long data, void *ptr)
+{
+  ((struct u8 *) ptr)->data = data;
+  return data;
+}
diff --git a/gcc/config/arm/unknown-elf.h b/gcc/config/arm/unknown-elf.h
new file mode 100644
index 000000000..b47455ea9
--- /dev/null
+++ b/gcc/config/arm/unknown-elf.h
@@ -0,0 +1,100 @@
+/* Definitions for non-Linux based ARM systems using ELF
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Catherine Moore <clm@cygnus.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* elfos.h should have already been included.  Now just override
+   any conflicting definitions and add any extras.  */
+
+/* Run-time Target Specification.  */
+#ifndef TARGET_VERSION
+#define TARGET_VERSION	fputs (" (ARM/ELF)", stderr);
+#endif
+
+/* Default to using software floating point.  */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT	(0)
+#endif
+
+/* Now we define the strings used to build the spec file.  */
+#define UNKNOWN_ELF_STARTFILE_SPEC	" crti%O%s crtbegin%O%s crt0%O%s"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC	UNKNOWN_ELF_STARTFILE_SPEC
+
+#define UNKNOWN_ELF_ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	UNKNOWN_ELF_ENDFILE_SPEC
+
+/* The __USES_INITFINI__ define is tested in newlib/libc/sys/arm/crt0.S
+   to see if it needs to invoked _init() and _fini().  */
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC  "-D__USES_INITFINI__"
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Return a nonzero value if DECL has a section attribute.  */
+#define IN_NAMED_SECTION_P(DECL)					\
+  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL)	\
+   && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
+#undef  ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)   	\
+  do									\
+    {									\
+      if (IN_NAMED_SECTION_P (DECL))					\
+	switch_to_section (get_named_section (DECL, NULL, 0));		\
+      else								\
+	switch_to_section (bss_section);				\
+      									\
+      ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+									\
+      last_assemble_variable_decl = DECL;				\
+      ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);			\
+      ASM_OUTPUT_SKIP (FILE, SIZE ? (int)(SIZE) : 1);			\
+    } 									\
+  while (0)
+
+#undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+  do									\
+    {									\
+      if ((DECL) != NULL && IN_NAMED_SECTION_P (DECL))			\
+	switch_to_section (get_named_section (DECL, NULL, 0));		\
+      else								\
+	switch_to_section (bss_section);				\
+									\
+      ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL (FILE, NAME);					\
+      fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1);		\
+    }									\
+  while (0)
+
+#ifndef SUBTARGET_CPU_DEFAULT
+#define SUBTARGET_CPU_DEFAULT 		TARGET_CPU_arm7tdmi
+#endif
+
+/* The libgcc udivmod functions may throw exceptions.  If newlib is
+   configured to support long longs in I/O, then printf will depend on
+   udivmoddi4, which will depend on the exception unwind routines,
+   which will depend on abort, which is defined in libc.  */ 
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "--start-group %G %L --end-group"
diff --git a/gcc/config/arm/unwind-arm.c b/gcc/config/arm/unwind-arm.c
new file mode 100644
index 000000000..2c6e00489
--- /dev/null
+++ b/gcc/config/arm/unwind-arm.c
@@ -0,0 +1,1263 @@
+/* ARM EABI compliant unwinding routines.
+   Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Paul Brook
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "unwind.h"
+
+/* We add a prototype for abort here to avoid creating a dependency on
+   target headers.  */
+extern void abort (void);
+
+/* Definitions for C++ runtime support routines.  We make these weak
+   declarations to avoid pulling in libsupc++ unnecessarily.  */
+typedef unsigned char bool;
+
+typedef struct _ZSt9type_info type_info; /* This names C++ type_info type */
+
+void __attribute__((weak)) __cxa_call_unexpected(_Unwind_Control_Block *ucbp);
+bool __attribute__((weak)) __cxa_begin_cleanup(_Unwind_Control_Block *ucbp);
+bool __attribute__((weak)) __cxa_type_match(_Unwind_Control_Block *ucbp,
+					    const type_info *rttip,
+					    bool is_reference,
+					    void **matched_object);
+
+_Unwind_Ptr __attribute__((weak))
+__gnu_Unwind_Find_exidx (_Unwind_Ptr, int *);
+
+/* Misc constants.  */
+#define R_IP	12
+#define R_SP	13
+#define R_LR	14
+#define R_PC	15
+
+#define EXIDX_CANTUNWIND 1
+#define uint32_highbit (((_uw) 1) << 31)
+
+#define UCB_FORCED_STOP_FN(ucbp) ((ucbp)->unwinder_cache.reserved1)
+#define UCB_PR_ADDR(ucbp) ((ucbp)->unwinder_cache.reserved2)
+#define UCB_SAVED_CALLSITE_ADDR(ucbp) ((ucbp)->unwinder_cache.reserved3)
+#define UCB_FORCED_STOP_ARG(ucbp) ((ucbp)->unwinder_cache.reserved4)
+
+struct core_regs
+{
+  _uw r[16];
+};
+
+/* We use normal integer types here to avoid the compiler generating
+   coprocessor instructions.  */
+struct vfp_regs
+{
+  _uw64 d[16];
+  _uw pad;
+};
+
+struct vfpv3_regs
+{
+  /* Always populated via VSTM, so no need for the "pad" field from
+     vfp_regs (which is used to store the format word for FSTMX).  */
+  _uw64 d[16];
+};
+
+struct fpa_reg
+{
+  _uw w[3];
+};
+
+struct fpa_regs
+{
+  struct fpa_reg f[8];
+};
+
+struct wmmxd_regs
+{
+  _uw64 wd[16];
+};
+
+struct wmmxc_regs
+{
+  _uw wc[4];
+};
+
+/* Unwind descriptors.  */
+
+typedef struct
+{
+  _uw16 length;
+  _uw16 offset;
+} EHT16;
+
+typedef struct
+{
+  _uw length;
+  _uw offset;
+} EHT32;
+
+/* The ABI specifies that the unwind routines may only use core registers,
+   except when actually manipulating coprocessor state.  This allows
+   us to write one implementation that works on all platforms by
+   demand-saving coprocessor registers.
+
+   During unwinding we hold the coprocessor state in the actual hardware
+   registers and allocate demand-save areas for use during phase1
+   unwinding.  */
+
+typedef struct
+{
+  /* The first fields must be the same as a phase2_vrs.  */
+  _uw demand_save_flags;
+  struct core_regs core;
+  _uw prev_sp; /* Only valid during forced unwinding.  */
+  struct vfp_regs vfp;
+  struct vfpv3_regs vfp_regs_16_to_31;
+  struct fpa_regs fpa;
+  struct wmmxd_regs wmmxd;
+  struct wmmxc_regs wmmxc;
+} phase1_vrs;
+
+#define DEMAND_SAVE_VFP 1	/* VFP state has been saved if not set */
+#define DEMAND_SAVE_VFP_D 2	/* VFP state is for FLDMD/FSTMD if set */
+#define DEMAND_SAVE_VFP_V3 4    /* VFPv3 state for regs 16 .. 31 has
+                                   been saved if not set */
+#define DEMAND_SAVE_WMMXD 8	/* iWMMXt data registers have been
+				   saved if not set.  */
+#define DEMAND_SAVE_WMMXC 16	/* iWMMXt control registers have been
+				   saved if not set.  */
+
+/* This must match the structure created by the assembly wrappers.  */
+typedef struct
+{
+  _uw demand_save_flags;
+  struct core_regs core;
+} phase2_vrs;
+
+
+/* An exception index table entry.  */
+
+typedef struct __EIT_entry
+{
+  _uw fnoffset;
+  _uw content;
+} __EIT_entry;
+
+/* Assembly helper functions.  */
+
+/* Restore core register state.  Never returns.  */
+void __attribute__((noreturn)) restore_core_regs (struct core_regs *);
+
+
+/* Coprocessor register state manipulation functions.  */
+
+/* Routines for FLDMX/FSTMX format...  */
+void __gnu_Unwind_Save_VFP (struct vfp_regs * p);
+void __gnu_Unwind_Restore_VFP (struct vfp_regs * p);
+void __gnu_Unwind_Save_WMMXD (struct wmmxd_regs * p);
+void __gnu_Unwind_Restore_WMMXD (struct wmmxd_regs * p);
+void __gnu_Unwind_Save_WMMXC (struct wmmxc_regs * p);
+void __gnu_Unwind_Restore_WMMXC (struct wmmxc_regs * p);
+
+/* ...and those for FLDMD/FSTMD format...  */
+void __gnu_Unwind_Save_VFP_D (struct vfp_regs * p);
+void __gnu_Unwind_Restore_VFP_D (struct vfp_regs * p);
+
+/* ...and those for VLDM/VSTM format, saving/restoring only registers
+   16 through 31.  */
+void __gnu_Unwind_Save_VFP_D_16_to_31 (struct vfpv3_regs * p);
+void __gnu_Unwind_Restore_VFP_D_16_to_31 (struct vfpv3_regs * p);
+
+/* Restore coprocessor state after phase1 unwinding.  */
+static void
+restore_non_core_regs (phase1_vrs * vrs)
+{
+  if ((vrs->demand_save_flags & DEMAND_SAVE_VFP) == 0)
+    {
+      if (vrs->demand_save_flags & DEMAND_SAVE_VFP_D)
+        __gnu_Unwind_Restore_VFP_D (&vrs->vfp);
+      else
+        __gnu_Unwind_Restore_VFP (&vrs->vfp);
+    }
+
+  if ((vrs->demand_save_flags & DEMAND_SAVE_VFP_V3) == 0)
+    __gnu_Unwind_Restore_VFP_D_16_to_31 (&vrs->vfp_regs_16_to_31);
+
+  if ((vrs->demand_save_flags & DEMAND_SAVE_WMMXD) == 0)
+    __gnu_Unwind_Restore_WMMXD (&vrs->wmmxd);
+  if ((vrs->demand_save_flags & DEMAND_SAVE_WMMXC) == 0)
+    __gnu_Unwind_Restore_WMMXC (&vrs->wmmxc);
+}
+
+/* A better way to do this would probably be to compare the absolute address
+   with a segment relative relocation of the same symbol.  */
+
+extern int __text_start;
+extern int __data_start;
+
+/* The exception index table location.  */
+extern __EIT_entry __exidx_start;
+extern __EIT_entry __exidx_end;
+
+/* ABI defined personality routines.  */
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr0 (_Unwind_State,
+    _Unwind_Control_Block *, _Unwind_Context *);// __attribute__((weak));
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr1 (_Unwind_State,
+    _Unwind_Control_Block *, _Unwind_Context *) __attribute__((weak));
+extern _Unwind_Reason_Code __aeabi_unwind_cpp_pr2 (_Unwind_State,
+    _Unwind_Control_Block *, _Unwind_Context *) __attribute__((weak));
+
+/* ABI defined routine to store a virtual register to memory.  */
+
+_Unwind_VRS_Result _Unwind_VRS_Get (_Unwind_Context *context,
+				    _Unwind_VRS_RegClass regclass,
+				    _uw regno,
+				    _Unwind_VRS_DataRepresentation representation,
+				    void *valuep)
+{
+  phase1_vrs *vrs = (phase1_vrs *) context;
+
+  switch (regclass)
+    {
+    case _UVRSC_CORE:
+      if (representation != _UVRSD_UINT32
+	  || regno > 15)
+	return _UVRSR_FAILED;
+      *(_uw *) valuep = vrs->core.r[regno];
+      return _UVRSR_OK;
+
+    case _UVRSC_VFP:
+    case _UVRSC_FPA:
+    case _UVRSC_WMMXD:
+    case _UVRSC_WMMXC:
+      return _UVRSR_NOT_IMPLEMENTED;
+
+    default:
+      return _UVRSR_FAILED;
+    }
+}
+
+
+/* ABI defined function to load a virtual register from memory.  */
+
+_Unwind_VRS_Result _Unwind_VRS_Set (_Unwind_Context *context,
+				    _Unwind_VRS_RegClass regclass,
+				    _uw regno,
+				    _Unwind_VRS_DataRepresentation representation,
+				    void *valuep)
+{
+  phase1_vrs *vrs = (phase1_vrs *) context;
+
+  switch (regclass)
+    {
+    case _UVRSC_CORE:
+      if (representation != _UVRSD_UINT32
+	  || regno > 15)
+	return _UVRSR_FAILED;
+
+      vrs->core.r[regno] = *(_uw *) valuep;
+      return _UVRSR_OK;
+
+    case _UVRSC_VFP:
+    case _UVRSC_FPA:
+    case _UVRSC_WMMXD:
+    case _UVRSC_WMMXC:
+      return _UVRSR_NOT_IMPLEMENTED;
+
+    default:
+      return _UVRSR_FAILED;
+    }
+}
+
+
+/* ABI defined function to pop registers off the stack.  */
+
+_Unwind_VRS_Result _Unwind_VRS_Pop (_Unwind_Context *context,
+				    _Unwind_VRS_RegClass regclass,
+				    _uw discriminator,
+				    _Unwind_VRS_DataRepresentation representation)
+{
+  phase1_vrs *vrs = (phase1_vrs *) context;
+
+  switch (regclass)
+    {
+    case _UVRSC_CORE:
+      {
+	_uw *ptr;
+	_uw mask;
+	int i;
+
+	if (representation != _UVRSD_UINT32)
+	  return _UVRSR_FAILED;
+
+	mask = discriminator & 0xffff;
+	ptr = (_uw *) vrs->core.r[R_SP];
+	/* Pop the requested registers.  */
+	for (i = 0; i < 16; i++)
+	  {
+	    if (mask & (1 << i))
+	      vrs->core.r[i] = *(ptr++);
+	  }
+	/* Writeback the stack pointer value if it wasn't restored.  */
+	if ((mask & (1 << R_SP)) == 0)
+	  vrs->core.r[R_SP] = (_uw) ptr;
+      }
+      return _UVRSR_OK;
+
+    case _UVRSC_VFP:
+      {
+	_uw start = discriminator >> 16;
+	_uw count = discriminator & 0xffff;
+	struct vfp_regs tmp;
+	struct vfpv3_regs tmp_16_to_31;
+	int tmp_count;
+	_uw *sp;
+	_uw *dest;
+        int num_vfpv3_regs = 0;
+
+        /* We use an approximation here by bounding _UVRSD_DOUBLE
+           register numbers at 32 always, since we can't detect if
+           VFPv3 isn't present (in such a case the upper limit is 16).  */
+	if ((representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
+            || start + count > (representation == _UVRSD_VFPX ? 16 : 32)
+            || (representation == _UVRSD_VFPX && start >= 16))
+	  return _UVRSR_FAILED;
+
+        /* Check if we're being asked to pop VFPv3-only registers
+           (numbers 16 through 31).  */
+	if (start >= 16)
+          num_vfpv3_regs = count;
+        else if (start + count > 16)
+          num_vfpv3_regs = start + count - 16;
+
+        if (num_vfpv3_regs && representation != _UVRSD_DOUBLE)
+          return _UVRSR_FAILED;
+
+	/* Demand-save coprocessor registers for stage1.  */
+	if (start < 16 && (vrs->demand_save_flags & DEMAND_SAVE_VFP))
+	  {
+	    vrs->demand_save_flags &= ~DEMAND_SAVE_VFP;
+
+            if (representation == _UVRSD_DOUBLE)
+              {
+                /* Save in FLDMD/FSTMD format.  */
+	        vrs->demand_save_flags |= DEMAND_SAVE_VFP_D;
+	        __gnu_Unwind_Save_VFP_D (&vrs->vfp);
+              }
+            else
+              {
+                /* Save in FLDMX/FSTMX format.  */
+	        vrs->demand_save_flags &= ~DEMAND_SAVE_VFP_D;
+	        __gnu_Unwind_Save_VFP (&vrs->vfp);
+              }
+	  }
+
+        if (num_vfpv3_regs > 0
+            && (vrs->demand_save_flags & DEMAND_SAVE_VFP_V3))
+	  {
+	    vrs->demand_save_flags &= ~DEMAND_SAVE_VFP_V3;
+            __gnu_Unwind_Save_VFP_D_16_to_31 (&vrs->vfp_regs_16_to_31);
+	  }
+
+	/* Restore the registers from the stack.  Do this by saving the
+	   current VFP registers to a memory area, moving the in-memory
+	   values into that area, and restoring from the whole area.
+	   For _UVRSD_VFPX we assume FSTMX standard format 1.  */
+        if (representation == _UVRSD_VFPX)
+  	  __gnu_Unwind_Save_VFP (&tmp);
+        else
+          {
+	    /* Save registers 0 .. 15 if required.  */
+            if (start < 16)
+              __gnu_Unwind_Save_VFP_D (&tmp);
+
+	    /* Save VFPv3 registers 16 .. 31 if required.  */
+            if (num_vfpv3_regs)
+  	      __gnu_Unwind_Save_VFP_D_16_to_31 (&tmp_16_to_31);
+          }
+
+	/* Work out how many registers below register 16 need popping.  */
+	tmp_count = num_vfpv3_regs > 0 ? 16 - start : count;
+
+	/* Copy registers below 16, if needed.
+	   The stack address is only guaranteed to be word aligned, so
+	   we can't use doubleword copies.  */
+	sp = (_uw *) vrs->core.r[R_SP];
+        if (tmp_count > 0)
+          {
+	    tmp_count *= 2;
+	    dest = (_uw *) &tmp.d[start];
+	    while (tmp_count--)
+	      *(dest++) = *(sp++);
+          }
+
+	/* Copy VFPv3 registers numbered >= 16, if needed.  */
+        if (num_vfpv3_regs > 0)
+          {
+            /* num_vfpv3_regs is needed below, so copy it.  */
+            int tmp_count_2 = num_vfpv3_regs * 2;
+            int vfpv3_start = start < 16 ? 16 : start;
+
+	    dest = (_uw *) &tmp_16_to_31.d[vfpv3_start - 16];
+	    while (tmp_count_2--)
+	      *(dest++) = *(sp++);
+          }
+
+	/* Skip the format word space if using FLDMX/FSTMX format.  */
+	if (representation == _UVRSD_VFPX)
+	  sp++;
+
+	/* Set the new stack pointer.  */
+	vrs->core.r[R_SP] = (_uw) sp;
+
+	/* Reload the registers.  */
+        if (representation == _UVRSD_VFPX)
+  	  __gnu_Unwind_Restore_VFP (&tmp);
+        else
+          {
+	    /* Restore registers 0 .. 15 if required.  */
+            if (start < 16)
+              __gnu_Unwind_Restore_VFP_D (&tmp);
+
+	    /* Restore VFPv3 registers 16 .. 31 if required.  */
+            if (num_vfpv3_regs > 0)
+  	      __gnu_Unwind_Restore_VFP_D_16_to_31 (&tmp_16_to_31);
+          }
+      }
+      return _UVRSR_OK;
+
+    case _UVRSC_FPA:
+      return _UVRSR_NOT_IMPLEMENTED;
+
+    case _UVRSC_WMMXD:
+      {
+	_uw start = discriminator >> 16;
+	_uw count = discriminator & 0xffff;
+	struct wmmxd_regs tmp;
+	_uw *sp;
+	_uw *dest;
+
+	if ((representation != _UVRSD_UINT64) || start + count > 16)
+	  return _UVRSR_FAILED;
+
+	if (vrs->demand_save_flags & DEMAND_SAVE_WMMXD)
+	  {
+	    /* Demand-save resisters for stage1.  */
+	    vrs->demand_save_flags &= ~DEMAND_SAVE_WMMXD;
+	    __gnu_Unwind_Save_WMMXD (&vrs->wmmxd);
+	  }
+
+	/* Restore the registers from the stack.  Do this by saving the
+	   current WMMXD registers to a memory area, moving the in-memory
+	   values into that area, and restoring from the whole area.  */
+	__gnu_Unwind_Save_WMMXD (&tmp);
+
+	/* The stack address is only guaranteed to be word aligned, so
+	   we can't use doubleword copies.  */
+	sp = (_uw *) vrs->core.r[R_SP];
+	dest = (_uw *) &tmp.wd[start];
+	count *= 2;
+	while (count--)
+	  *(dest++) = *(sp++);
+
+	/* Set the new stack pointer.  */
+	vrs->core.r[R_SP] = (_uw) sp;
+
+	/* Reload the registers.  */
+	__gnu_Unwind_Restore_WMMXD (&tmp);
+      }
+      return _UVRSR_OK;
+
+    case _UVRSC_WMMXC:
+      {
+	int i;
+	struct wmmxc_regs tmp;
+	_uw *sp;
+
+	if ((representation != _UVRSD_UINT32) || discriminator > 16)
+	  return _UVRSR_FAILED;
+
+	if (vrs->demand_save_flags & DEMAND_SAVE_WMMXC)
+	  {
+	    /* Demand-save resisters for stage1.  */
+	    vrs->demand_save_flags &= ~DEMAND_SAVE_WMMXC;
+	    __gnu_Unwind_Save_WMMXC (&vrs->wmmxc);
+	  }
+
+	/* Restore the registers from the stack.  Do this by saving the
+	   current WMMXC registers to a memory area, moving the in-memory
+	   values into that area, and restoring from the whole area.  */
+	__gnu_Unwind_Save_WMMXC (&tmp);
+
+	sp = (_uw *) vrs->core.r[R_SP];
+	for (i = 0; i < 4; i++)
+	  if (discriminator & (1 << i))
+	    tmp.wc[i] = *(sp++);
+
+	/* Set the new stack pointer.  */
+	vrs->core.r[R_SP] = (_uw) sp;
+
+	/* Reload the registers.  */
+	__gnu_Unwind_Restore_WMMXC (&tmp);
+      }
+      return _UVRSR_OK;
+
+    default:
+      return _UVRSR_FAILED;
+    }
+}
+
+
+/* Core unwinding functions.  */
+
+/* Calculate the address encoded by a 31-bit self-relative offset at address
+   P.  */
+static inline _uw
+selfrel_offset31 (const _uw *p)
+{
+  _uw offset;
+
+  offset = *p;
+  /* Sign extend to 32 bits.  */
+  if (offset & (1 << 30))
+    offset |= 1u << 31;
+  else
+    offset &= ~(1u << 31);
+
+  return offset + (_uw) p;
+}
+
+
+/* Perform a binary search for RETURN_ADDRESS in TABLE.  The table contains
+   NREC entries.  */
+
+static const __EIT_entry *
+search_EIT_table (const __EIT_entry * table, int nrec, _uw return_address)
+{
+  _uw next_fn;
+  _uw this_fn;
+  int n, left, right;
+
+  if (nrec == 0)
+    return (__EIT_entry *) 0;
+
+  left = 0;
+  right = nrec - 1;
+
+  while (1)
+    {
+      n = (left + right) / 2;
+      this_fn = selfrel_offset31 (&table[n].fnoffset);
+      if (n != nrec - 1)
+	next_fn = selfrel_offset31 (&table[n + 1].fnoffset) - 1;
+      else
+	next_fn = (_uw)0 - 1;
+
+      if (return_address < this_fn)
+	{
+	  if (n == left)
+	    return (__EIT_entry *) 0;
+	  right = n - 1;
+	}
+      else if (return_address <= next_fn)
+	return &table[n];
+      else
+	left = n + 1;
+    }
+}
+
+/* Find the exception index table eintry for the given address.
+   Fill in the relevant fields of the UCB.
+   Returns _URC_FAILURE if an error occurred, _URC_OK on success.  */
+
+static _Unwind_Reason_Code
+get_eit_entry (_Unwind_Control_Block *ucbp, _uw return_address)
+{
+  const __EIT_entry * eitp;
+  int nrec;
+  
+  /* The return address is the address of the instruction following the
+     call instruction (plus one in thumb mode).  If this was the last
+     instruction in the function the address will lie in the following
+     function.  Subtract 2 from the address so that it points within the call
+     instruction itself.  */
+  return_address -= 2;
+
+  if (__gnu_Unwind_Find_exidx)
+    {
+      eitp = (const __EIT_entry *) __gnu_Unwind_Find_exidx (return_address,
+							    &nrec);
+      if (!eitp)
+	{
+	  UCB_PR_ADDR (ucbp) = 0;
+	  return _URC_FAILURE;
+	}
+    }
+  else
+    {
+      eitp = &__exidx_start;
+      nrec = &__exidx_end - &__exidx_start;
+    }
+
+  eitp = search_EIT_table (eitp, nrec, return_address);
+
+  if (!eitp)
+    {
+      UCB_PR_ADDR (ucbp) = 0;
+      return _URC_FAILURE;
+    }
+  ucbp->pr_cache.fnstart = selfrel_offset31 (&eitp->fnoffset);
+
+  /* Can this frame be unwound at all?  */
+  if (eitp->content == EXIDX_CANTUNWIND)
+    {
+      UCB_PR_ADDR (ucbp) = 0;
+      return _URC_END_OF_STACK;
+    }
+
+  /* Obtain the address of the "real" __EHT_Header word.  */
+
+  if (eitp->content & uint32_highbit)
+    {
+      /* It is immediate data.  */
+      ucbp->pr_cache.ehtp = (_Unwind_EHT_Header *)&eitp->content;
+      ucbp->pr_cache.additional = 1;
+    }
+  else
+    {
+      /* The low 31 bits of the content field are a self-relative
+	 offset to an _Unwind_EHT_Entry structure.  */
+      ucbp->pr_cache.ehtp =
+	(_Unwind_EHT_Header *) selfrel_offset31 (&eitp->content);
+      ucbp->pr_cache.additional = 0;
+    }
+
+  /* Discover the personality routine address.  */
+  if (*ucbp->pr_cache.ehtp & (1u << 31))
+    {
+      /* One of the predefined standard routines.  */
+      _uw idx = (*(_uw *) ucbp->pr_cache.ehtp >> 24) & 0xf;
+      if (idx == 0)
+	UCB_PR_ADDR (ucbp) = (_uw) &__aeabi_unwind_cpp_pr0;
+      else if (idx == 1)
+	UCB_PR_ADDR (ucbp) = (_uw) &__aeabi_unwind_cpp_pr1;
+      else if (idx == 2)
+	UCB_PR_ADDR (ucbp) = (_uw) &__aeabi_unwind_cpp_pr2;
+      else
+	{ /* Failed */
+	  UCB_PR_ADDR (ucbp) = 0;
+	  return _URC_FAILURE;
+	}
+    } 
+  else
+    {
+      /* Execute region offset to PR */
+      UCB_PR_ADDR (ucbp) = selfrel_offset31 (ucbp->pr_cache.ehtp);
+    }
+  return _URC_OK;
+}
+
+
+/* Perform phase2 unwinding.  VRS is the initial virtual register state.  */
+
+static void __attribute__((noreturn))
+unwind_phase2 (_Unwind_Control_Block * ucbp, phase2_vrs * vrs)
+{
+  _Unwind_Reason_Code pr_result;
+
+  do
+    {
+      /* Find the entry for this routine.  */
+      if (get_eit_entry (ucbp, vrs->core.r[R_PC]) != _URC_OK)
+	abort ();
+
+      UCB_SAVED_CALLSITE_ADDR (ucbp) = vrs->core.r[R_PC];
+
+      /* Call the pr to decide what to do.  */
+      pr_result = ((personality_routine) UCB_PR_ADDR (ucbp))
+	(_US_UNWIND_FRAME_STARTING, ucbp, (_Unwind_Context *) vrs);
+    }
+  while (pr_result == _URC_CONTINUE_UNWIND);
+  
+  if (pr_result != _URC_INSTALL_CONTEXT)
+    abort();
+  
+  restore_core_regs (&vrs->core);
+}
+
+/* Perform phase2 forced unwinding.  */
+
+static _Unwind_Reason_Code
+unwind_phase2_forced (_Unwind_Control_Block *ucbp, phase2_vrs *entry_vrs,
+		      int resuming)
+{
+  _Unwind_Stop_Fn stop_fn = (_Unwind_Stop_Fn) UCB_FORCED_STOP_FN (ucbp);
+  void *stop_arg = (void *)UCB_FORCED_STOP_ARG (ucbp);
+  _Unwind_Reason_Code pr_result = 0;
+  /* We use phase1_vrs here even though we do not demand save, for the
+     prev_sp field.  */
+  phase1_vrs saved_vrs, next_vrs;
+
+  /* Save the core registers.  */
+  saved_vrs.core = entry_vrs->core;
+  /* We don't need to demand-save the non-core registers, because we
+     unwind in a single pass.  */
+  saved_vrs.demand_save_flags = 0;
+
+  /* Unwind until we reach a propagation barrier.  */
+  do
+    {
+      _Unwind_State action;
+      _Unwind_Reason_Code entry_code;
+      _Unwind_Reason_Code stop_code;
+
+      /* Find the entry for this routine.  */
+      entry_code = get_eit_entry (ucbp, saved_vrs.core.r[R_PC]);
+
+      if (resuming)
+	{
+	  action = _US_UNWIND_FRAME_RESUME | _US_FORCE_UNWIND;
+	  resuming = 0;
+	}
+      else
+	action = _US_UNWIND_FRAME_STARTING | _US_FORCE_UNWIND;
+
+      if (entry_code == _URC_OK)
+	{
+	  UCB_SAVED_CALLSITE_ADDR (ucbp) = saved_vrs.core.r[R_PC];
+
+	  next_vrs = saved_vrs;
+
+	  /* Call the pr to decide what to do.  */
+	  pr_result = ((personality_routine) UCB_PR_ADDR (ucbp))
+	    (action, ucbp, (void *) &next_vrs);
+
+	  saved_vrs.prev_sp = next_vrs.core.r[R_SP];
+	}
+      else
+	{
+	  /* Treat any failure as the end of unwinding, to cope more
+	     gracefully with missing EH information.  Mixed EH and
+	     non-EH within one object will usually result in failure,
+	     because the .ARM.exidx tables do not indicate the end
+	     of the code to which they apply; but mixed EH and non-EH
+	     shared objects should return an unwind failure at the
+	     entry of a non-EH shared object.  */
+	  action |= _US_END_OF_STACK;
+
+	  saved_vrs.prev_sp = saved_vrs.core.r[R_SP];
+	}
+
+      stop_code = stop_fn (1, action, ucbp->exception_class, ucbp,
+			   (void *)&saved_vrs, stop_arg);
+      if (stop_code != _URC_NO_REASON)
+	return _URC_FAILURE;
+
+      if (entry_code != _URC_OK)
+	return entry_code;
+
+      saved_vrs = next_vrs;
+    }
+  while (pr_result == _URC_CONTINUE_UNWIND);
+
+  if (pr_result != _URC_INSTALL_CONTEXT)
+    {
+      /* Some sort of failure has occurred in the pr and probably the
+	 pr returned _URC_FAILURE.  */
+      return _URC_FAILURE;
+    }
+
+  restore_core_regs (&saved_vrs.core);
+}
+
+/* This is a very limited implementation of _Unwind_GetCFA.  It returns
+   the stack pointer as it is about to be unwound, and is only valid
+   while calling the stop function during forced unwinding.  If the
+   current personality routine result is going to run a cleanup, this
+   will not be the CFA; but when the frame is really unwound, it will
+   be.  */
+
+_Unwind_Word
+_Unwind_GetCFA (_Unwind_Context *context)
+{
+  return ((phase1_vrs *) context)->prev_sp;
+}
+
+/* Perform phase1 unwinding.  UCBP is the exception being thrown, and
+   entry_VRS is the register state on entry to _Unwind_RaiseException.  */
+
+_Unwind_Reason_Code
+__gnu_Unwind_RaiseException (_Unwind_Control_Block *, phase2_vrs *);
+
+_Unwind_Reason_Code
+__gnu_Unwind_RaiseException (_Unwind_Control_Block * ucbp,
+			     phase2_vrs * entry_vrs)
+{
+  phase1_vrs saved_vrs;
+  _Unwind_Reason_Code pr_result;
+
+  /* Set the pc to the call site.  */
+  entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR];
+
+  /* Save the core registers.  */
+  saved_vrs.core = entry_vrs->core;
+  /* Set demand-save flags.  */
+  saved_vrs.demand_save_flags = ~(_uw) 0;
+  
+  /* Unwind until we reach a propagation barrier.  */
+  do
+    {
+      /* Find the entry for this routine.  */
+      if (get_eit_entry (ucbp, saved_vrs.core.r[R_PC]) != _URC_OK)
+	return _URC_FAILURE;
+
+      /* Call the pr to decide what to do.  */
+      pr_result = ((personality_routine) UCB_PR_ADDR (ucbp))
+	(_US_VIRTUAL_UNWIND_FRAME, ucbp, (void *) &saved_vrs);
+    }
+  while (pr_result == _URC_CONTINUE_UNWIND);
+
+  /* We've unwound as far as we want to go, so restore the original
+     register state.  */
+  restore_non_core_regs (&saved_vrs);
+  if (pr_result != _URC_HANDLER_FOUND)
+    {
+      /* Some sort of failure has occurred in the pr and probably the
+	 pr returned _URC_FAILURE.  */
+      return _URC_FAILURE;
+    }
+  
+  unwind_phase2 (ucbp, entry_vrs);
+}
+
+/* Resume unwinding after a cleanup has been run.  UCBP is the exception
+   being thrown and ENTRY_VRS is the register state on entry to
+   _Unwind_Resume.  */
+_Unwind_Reason_Code
+__gnu_Unwind_ForcedUnwind (_Unwind_Control_Block *,
+			   _Unwind_Stop_Fn, void *, phase2_vrs *);
+
+_Unwind_Reason_Code
+__gnu_Unwind_ForcedUnwind (_Unwind_Control_Block *ucbp,
+			   _Unwind_Stop_Fn stop_fn, void *stop_arg,
+			   phase2_vrs *entry_vrs)
+{
+  UCB_FORCED_STOP_FN (ucbp) = (_uw) stop_fn;
+  UCB_FORCED_STOP_ARG (ucbp) = (_uw) stop_arg;
+
+  /* Set the pc to the call site.  */
+  entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR];
+
+  return unwind_phase2_forced (ucbp, entry_vrs, 0);
+}
+
+_Unwind_Reason_Code
+__gnu_Unwind_Resume (_Unwind_Control_Block *, phase2_vrs *);
+
+_Unwind_Reason_Code
+__gnu_Unwind_Resume (_Unwind_Control_Block * ucbp, phase2_vrs * entry_vrs)
+{
+  _Unwind_Reason_Code pr_result;
+
+  /* Recover the saved address.  */
+  entry_vrs->core.r[R_PC] = UCB_SAVED_CALLSITE_ADDR (ucbp);
+
+  if (UCB_FORCED_STOP_FN (ucbp))
+    {
+      unwind_phase2_forced (ucbp, entry_vrs, 1);
+
+      /* We can't return failure at this point.  */
+      abort ();
+    }
+
+  /* Call the cached PR.  */
+  pr_result = ((personality_routine) UCB_PR_ADDR (ucbp))
+	(_US_UNWIND_FRAME_RESUME, ucbp, (_Unwind_Context *) entry_vrs);
+
+  switch (pr_result)
+    {
+    case _URC_INSTALL_CONTEXT:
+      /* Upload the registers to enter the landing pad.  */
+      restore_core_regs (&entry_vrs->core);
+
+    case _URC_CONTINUE_UNWIND:
+      /* Continue unwinding the next frame.  */
+      unwind_phase2 (ucbp, entry_vrs);
+
+    default:
+      abort ();
+    }
+}
+
+_Unwind_Reason_Code
+__gnu_Unwind_Resume_or_Rethrow (_Unwind_Control_Block *, phase2_vrs *);
+
+_Unwind_Reason_Code
+__gnu_Unwind_Resume_or_Rethrow (_Unwind_Control_Block * ucbp,
+				phase2_vrs * entry_vrs)
+{
+  if (!UCB_FORCED_STOP_FN (ucbp))
+    return __gnu_Unwind_RaiseException (ucbp, entry_vrs);
+
+  /* Set the pc to the call site.  */
+  entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR];
+  /* Continue unwinding the next frame.  */
+  return unwind_phase2_forced (ucbp, entry_vrs, 0);
+}
+
+/* Clean up an exception object when unwinding is complete.  */
+void
+_Unwind_Complete (_Unwind_Control_Block * ucbp __attribute__((unused)))
+{
+}
+
+
+/* Get the _Unwind_Control_Block from an _Unwind_Context.  */
+
+static inline _Unwind_Control_Block *
+unwind_UCB_from_context (_Unwind_Context * context)
+{
+  return (_Unwind_Control_Block *) _Unwind_GetGR (context, R_IP);
+}
+
+
+/* Free an exception.  */
+
+void
+_Unwind_DeleteException (_Unwind_Exception * exc)
+{
+  if (exc->exception_cleanup)
+    (*exc->exception_cleanup) (_URC_FOREIGN_EXCEPTION_CAUGHT, exc);
+}
+
+
+/* Perform stack backtrace through unwind data.  */
+_Unwind_Reason_Code
+__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument,
+		       phase2_vrs * entry_vrs);
+_Unwind_Reason_Code
+__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument,
+		       phase2_vrs * entry_vrs)
+{
+  phase1_vrs saved_vrs;
+  _Unwind_Reason_Code code;
+
+  _Unwind_Control_Block ucb;
+  _Unwind_Control_Block *ucbp = &ucb;
+
+  /* Set the pc to the call site.  */
+  entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR];
+
+  /* Save the core registers.  */
+  saved_vrs.core = entry_vrs->core;
+  /* Set demand-save flags.  */
+  saved_vrs.demand_save_flags = ~(_uw) 0;
+  
+  do
+    {
+      /* Find the entry for this routine.  */
+      if (get_eit_entry (ucbp, saved_vrs.core.r[R_PC]) != _URC_OK)
+	{
+	  code = _URC_FAILURE;
+	  break;
+	}
+
+      /* The dwarf unwinder assumes the context structure holds things
+	 like the function and LSDA pointers.  The ARM implementation
+	 caches these in the exception header (UCB).  To avoid
+	 rewriting everything we make the virtual IP register point at
+	 the UCB.  */
+      _Unwind_SetGR((_Unwind_Context *)&saved_vrs, 12, (_Unwind_Ptr) ucbp);
+
+      /* Call trace function.  */
+      if ((*trace) ((_Unwind_Context *) &saved_vrs, trace_argument) 
+	  != _URC_NO_REASON)
+	{
+	  code = _URC_FAILURE;
+	  break;
+	}
+
+      /* Call the pr to decide what to do.  */
+      code = ((personality_routine) UCB_PR_ADDR (ucbp))
+	(_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, 
+	 ucbp, (void *) &saved_vrs);
+    }
+  while (code != _URC_END_OF_STACK
+	 && code != _URC_FAILURE);
+
+  restore_non_core_regs (&saved_vrs);
+  return code;
+}
+
+
+/* Common implementation for ARM ABI defined personality routines.
+   ID is the index of the personality routine, other arguments are as defined
+   by __aeabi_unwind_cpp_pr{0,1,2}.  */
+
+static _Unwind_Reason_Code
+__gnu_unwind_pr_common (_Unwind_State state,
+			_Unwind_Control_Block *ucbp,
+			_Unwind_Context *context,
+			int id)
+{
+  __gnu_unwind_state uws;
+  _uw *data;
+  _uw offset;
+  _uw len;
+  _uw rtti_count;
+  int phase2_call_unexpected_after_unwind = 0;
+  int in_range = 0;
+  int forced_unwind = state & _US_FORCE_UNWIND;
+
+  state &= _US_ACTION_MASK;
+
+  data = (_uw *) ucbp->pr_cache.ehtp;
+  uws.data = *(data++);
+  uws.next = data;
+  if (id == 0)
+    {
+      uws.data <<= 8;
+      uws.words_left = 0;
+      uws.bytes_left = 3;
+    }
+  else
+    {
+      uws.words_left = (uws.data >> 16) & 0xff;
+      uws.data <<= 16;
+      uws.bytes_left = 2;
+      data += uws.words_left;
+    }
+
+  /* Restore the saved pointer.  */
+  if (state == _US_UNWIND_FRAME_RESUME)
+    data = (_uw *) ucbp->cleanup_cache.bitpattern[0];
+
+  if ((ucbp->pr_cache.additional & 1) == 0)
+    {
+      /* Process descriptors.  */
+      while (*data)
+	{
+	  _uw addr;
+	  _uw fnstart;
+
+	  if (id == 2)
+	    {
+	      len = ((EHT32 *) data)->length;
+	      offset = ((EHT32 *) data)->offset;
+	      data += 2;
+	    }
+	  else
+	    {
+	      len = ((EHT16 *) data)->length;
+	      offset = ((EHT16 *) data)->offset;
+	      data++;
+	    }
+
+	  fnstart = ucbp->pr_cache.fnstart + (offset & ~1);
+	  addr = _Unwind_GetGR (context, R_PC);
+	  in_range = (fnstart <= addr && addr < fnstart + (len & ~1));
+
+	  switch (((offset & 1) << 1) | (len & 1))
+	    {
+	    case 0:
+	      /* Cleanup.  */
+	      if (state != _US_VIRTUAL_UNWIND_FRAME
+		  && in_range)
+		{
+		  /* Cleanup in range, and we are running cleanups.  */
+		  _uw lp;
+
+		  /* Landing pad address is 31-bit pc-relative offset.  */
+		  lp = selfrel_offset31 (data);
+		  data++;
+		  /* Save the exception data pointer.  */
+		  ucbp->cleanup_cache.bitpattern[0] = (_uw) data;
+		  if (!__cxa_begin_cleanup (ucbp))
+		    return _URC_FAILURE;
+		  /* Setup the VRS to enter the landing pad.  */
+		  _Unwind_SetGR (context, R_PC, lp);
+		  return _URC_INSTALL_CONTEXT;
+		}
+	      /* Cleanup not in range, or we are in stage 1.  */
+	      data++;
+	      break;
+
+	    case 1:
+	      /* Catch handler.  */
+	      if (state == _US_VIRTUAL_UNWIND_FRAME)
+		{
+		  if (in_range)
+		    {
+		      /* Check for a barrier.  */
+		      _uw rtti;
+		      bool is_reference = (data[0] & uint32_highbit) != 0;
+		      void *matched;
+
+		      /* Check for no-throw areas.  */
+		      if (data[1] == (_uw) -2)
+			return _URC_FAILURE;
+
+		      /* The thrown object immediately follows the ECB.  */
+		      matched = (void *)(ucbp + 1);
+		      if (data[1] != (_uw) -1)
+			{
+			  /* Match a catch specification.  */
+			  rtti = _Unwind_decode_target2 ((_uw) &data[1]);
+			  if (!__cxa_type_match (ucbp, (type_info *) rtti,
+						 is_reference,
+						 &matched))
+			    matched = (void *)0;
+			}
+
+		      if (matched)
+			{
+			  ucbp->barrier_cache.sp =
+			    _Unwind_GetGR (context, R_SP);
+			  ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
+			  ucbp->barrier_cache.bitpattern[1] = (_uw) data;
+			  return _URC_HANDLER_FOUND;
+			}
+		    }
+		  /* Handler out of range, or not matched.  */
+		}
+	      else if (ucbp->barrier_cache.sp == _Unwind_GetGR (context, R_SP)
+		       && ucbp->barrier_cache.bitpattern[1] == (_uw) data)
+		{
+		  /* Matched a previous propagation barrier.  */
+		  _uw lp;
+
+		  /* Setup for entry to the handler.  */
+		  lp = selfrel_offset31 (data);
+		  _Unwind_SetGR (context, R_PC, lp);
+		  _Unwind_SetGR (context, 0, (_uw) ucbp);
+		  return _URC_INSTALL_CONTEXT;
+		}
+	      /* Catch handler not matched.  Advance to the next descriptor.  */
+	      data += 2;
+	      break;
+
+	    case 2:
+	      rtti_count = data[0] & 0x7fffffff;
+	      /* Exception specification.  */
+	      if (state == _US_VIRTUAL_UNWIND_FRAME)
+		{
+		  if (in_range && (!forced_unwind || !rtti_count))
+		    {
+		      /* Match against the exception specification.  */
+		      _uw i;
+		      _uw rtti;
+		      void *matched;
+
+		      for (i = 0; i < rtti_count; i++)
+			{
+			  matched = (void *)(ucbp + 1);
+			  rtti = _Unwind_decode_target2 ((_uw) &data[i + 1]);
+			  if (__cxa_type_match (ucbp, (type_info *) rtti, 0,
+						&matched))
+			    break;
+			}
+
+		      if (i == rtti_count)
+			{
+			  /* Exception does not match the spec.  */
+			  ucbp->barrier_cache.sp =
+			    _Unwind_GetGR (context, R_SP);
+			  ucbp->barrier_cache.bitpattern[0] = (_uw) matched;
+			  ucbp->barrier_cache.bitpattern[1] = (_uw) data;
+			  return _URC_HANDLER_FOUND;
+			}
+		    }
+		  /* Handler out of range, or exception is permitted.  */
+		}
+	      else if (ucbp->barrier_cache.sp == _Unwind_GetGR (context, R_SP)
+		       && ucbp->barrier_cache.bitpattern[1] == (_uw) data)
+		{
+		  /* Matched a previous propagation barrier.  */
+		  _uw lp;
+		  /* Record the RTTI list for __cxa_call_unexpected.  */
+		  ucbp->barrier_cache.bitpattern[1] = rtti_count;
+		  ucbp->barrier_cache.bitpattern[2] = 0;
+		  ucbp->barrier_cache.bitpattern[3] = 4;
+		  ucbp->barrier_cache.bitpattern[4] = (_uw) &data[1];
+
+		  if (data[0] & uint32_highbit)
+		    phase2_call_unexpected_after_unwind = 1;
+		  else
+		    {
+		      data += rtti_count + 1;
+		      /* Setup for entry to the handler.  */
+		      lp = selfrel_offset31 (data);
+		      data++;
+		      _Unwind_SetGR (context, R_PC, lp);
+		      _Unwind_SetGR (context, 0, (_uw) ucbp);
+		      return _URC_INSTALL_CONTEXT;
+		    }
+		}
+	      if (data[0] & uint32_highbit)
+		data++;
+	      data += rtti_count + 1;
+	      break;
+
+	    default:
+	      /* Should never happen.  */
+	      return _URC_FAILURE;
+	    }
+	  /* Finished processing this descriptor.  */
+	}
+    }
+
+  if (__gnu_unwind_execute (context, &uws) != _URC_OK)
+    return _URC_FAILURE;
+
+  if (phase2_call_unexpected_after_unwind)
+    {
+      /* Enter __cxa_unexpected as if called from the call site.  */
+      _Unwind_SetGR (context, R_LR, _Unwind_GetGR (context, R_PC));
+      _Unwind_SetGR (context, R_PC, (_uw) &__cxa_call_unexpected);
+      return _URC_INSTALL_CONTEXT;
+    }
+
+  return _URC_CONTINUE_UNWIND;
+}
+
+
+/* ABI defined personality routine entry points.  */
+
+_Unwind_Reason_Code
+__aeabi_unwind_cpp_pr0 (_Unwind_State state,
+			_Unwind_Control_Block *ucbp,
+			_Unwind_Context *context)
+{
+  return __gnu_unwind_pr_common (state, ucbp, context, 0);
+}
+
+_Unwind_Reason_Code
+__aeabi_unwind_cpp_pr1 (_Unwind_State state,
+			_Unwind_Control_Block *ucbp,
+			_Unwind_Context *context)
+{
+  return __gnu_unwind_pr_common (state, ucbp, context, 1);
+}
+
+_Unwind_Reason_Code
+__aeabi_unwind_cpp_pr2 (_Unwind_State state,
+			_Unwind_Control_Block *ucbp,
+			_Unwind_Context *context)
+{
+  return __gnu_unwind_pr_common (state, ucbp, context, 2);
+}
diff --git a/gcc/config/arm/unwind-arm.h b/gcc/config/arm/unwind-arm.h
new file mode 100644
index 000000000..a9ba1267a
--- /dev/null
+++ b/gcc/config/arm/unwind-arm.h
@@ -0,0 +1,281 @@
+/* Header file for the ARM EABI unwinder
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+   Contributed by Paul Brook
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Language-independent unwinder header public defines.  This contains both
+   ABI defined objects, and GNU support routines.  */
+
+#ifndef UNWIND_ARM_H
+#define UNWIND_ARM_H
+
+#define __ARM_EABI_UNWINDER__ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  typedef unsigned _Unwind_Word __attribute__((__mode__(__word__)));
+  typedef signed _Unwind_Sword __attribute__((__mode__(__word__)));
+  typedef unsigned _Unwind_Ptr __attribute__((__mode__(__pointer__)));
+  typedef unsigned _Unwind_Internal_Ptr __attribute__((__mode__(__pointer__)));
+  typedef _Unwind_Word _uw;
+  typedef unsigned _uw64 __attribute__((mode(__DI__)));
+  typedef unsigned _uw16 __attribute__((mode(__HI__)));
+  typedef unsigned _uw8 __attribute__((mode(__QI__)));
+
+  typedef enum
+    {
+      _URC_OK = 0,       /* operation completed successfully */
+      _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+      _URC_END_OF_STACK = 5,
+      _URC_HANDLER_FOUND = 6,
+      _URC_INSTALL_CONTEXT = 7,
+      _URC_CONTINUE_UNWIND = 8,
+      _URC_FAILURE = 9   /* unspecified failure of some kind */
+    }
+  _Unwind_Reason_Code;
+
+  typedef enum
+    {
+      _US_VIRTUAL_UNWIND_FRAME = 0,
+      _US_UNWIND_FRAME_STARTING = 1,
+      _US_UNWIND_FRAME_RESUME = 2,
+      _US_ACTION_MASK = 3,
+      _US_FORCE_UNWIND = 8,
+      _US_END_OF_STACK = 16
+    }
+  _Unwind_State;
+
+  /* Provided only for for compatibility with existing code.  */
+  typedef int _Unwind_Action;
+#define _UA_SEARCH_PHASE	1
+#define _UA_CLEANUP_PHASE	2
+#define _UA_HANDLER_FRAME	4
+#define _UA_FORCE_UNWIND	8
+#define _UA_END_OF_STACK	16
+#define _URC_NO_REASON 	_URC_OK
+
+  typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+  typedef struct _Unwind_Context _Unwind_Context;
+  typedef _uw _Unwind_EHT_Header;
+
+
+  /* UCB: */
+
+  struct _Unwind_Control_Block
+    {
+      char exception_class[8];
+      void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);
+      /* Unwinder cache, private fields for the unwinder's use */
+      struct
+	{
+	  _uw reserved1;  /* Forced unwind stop fn, 0 if not forced */
+	  _uw reserved2;  /* Personality routine address */
+	  _uw reserved3;  /* Saved callsite address */
+	  _uw reserved4;  /* Forced unwind stop arg */
+	  _uw reserved5;
+	}
+      unwinder_cache;
+      /* Propagation barrier cache (valid after phase 1): */
+      struct
+	{
+	  _uw sp;
+	  _uw bitpattern[5];
+	}
+      barrier_cache;
+      /* Cleanup cache (preserved over cleanup): */
+      struct
+	{
+	  _uw bitpattern[4];
+	}
+      cleanup_cache;
+      /* Pr cache (for pr's benefit): */
+      struct
+	{
+	  _uw fnstart;			/* function start address */
+	  _Unwind_EHT_Header *ehtp;	/* pointer to EHT entry header word */
+	  _uw additional;		/* additional data */
+	  _uw reserved1;
+	}
+      pr_cache;
+      long long int :0;	/* Force alignment to 8-byte boundary */
+    };
+
+  /* Virtual Register Set*/
+
+  typedef enum
+    {
+      _UVRSC_CORE = 0,      /* integer register */
+      _UVRSC_VFP = 1,       /* vfp */
+      _UVRSC_FPA = 2,       /* fpa */
+      _UVRSC_WMMXD = 3,     /* Intel WMMX data register */
+      _UVRSC_WMMXC = 4      /* Intel WMMX control register */
+    }
+  _Unwind_VRS_RegClass;
+
+  typedef enum
+    {
+      _UVRSD_UINT32 = 0,
+      _UVRSD_VFPX = 1,
+      _UVRSD_FPAX = 2,
+      _UVRSD_UINT64 = 3,
+      _UVRSD_FLOAT = 4,
+      _UVRSD_DOUBLE = 5
+    }
+  _Unwind_VRS_DataRepresentation;
+
+  typedef enum
+    {
+      _UVRSR_OK = 0,
+      _UVRSR_NOT_IMPLEMENTED = 1,
+      _UVRSR_FAILED = 2
+    }
+  _Unwind_VRS_Result;
+
+  /* Frame unwinding state.  */
+  typedef struct
+    {
+      /* The current word (bytes packed msb first).  */
+      _uw data;
+      /* Pointer to the next word of data.  */
+      _uw *next;
+      /* The number of bytes left in this word.  */
+      _uw8 bytes_left;
+      /* The number of words pointed to by ptr.  */
+      _uw8 words_left;
+    }
+  __gnu_unwind_state;
+
+  typedef _Unwind_Reason_Code (*personality_routine) (_Unwind_State,
+      _Unwind_Control_Block *, _Unwind_Context *);
+
+  _Unwind_VRS_Result _Unwind_VRS_Set(_Unwind_Context *, _Unwind_VRS_RegClass,
+                                     _uw, _Unwind_VRS_DataRepresentation,
+                                     void *);
+
+  _Unwind_VRS_Result _Unwind_VRS_Get(_Unwind_Context *, _Unwind_VRS_RegClass,
+                                     _uw, _Unwind_VRS_DataRepresentation,
+                                     void *);
+
+  _Unwind_VRS_Result _Unwind_VRS_Pop(_Unwind_Context *, _Unwind_VRS_RegClass,
+                                     _uw, _Unwind_VRS_DataRepresentation);
+
+
+  /* Support functions for the PR.  */
+#define _Unwind_Exception _Unwind_Control_Block
+  typedef char _Unwind_Exception_Class[8];
+
+  void * _Unwind_GetLanguageSpecificData (_Unwind_Context *);
+  _Unwind_Ptr _Unwind_GetRegionStart (_Unwind_Context *);
+
+  /* These two should never be used.  */
+  _Unwind_Ptr _Unwind_GetDataRelBase (_Unwind_Context *);
+  _Unwind_Ptr _Unwind_GetTextRelBase (_Unwind_Context *);
+
+  /* Interface functions: */
+  _Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Control_Block *ucbp);
+  void __attribute__((noreturn)) _Unwind_Resume(_Unwind_Control_Block *ucbp);
+  _Unwind_Reason_Code _Unwind_Resume_or_Rethrow (_Unwind_Control_Block *ucbp);
+
+  typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
+       (int, _Unwind_Action, _Unwind_Exception_Class,
+	_Unwind_Control_Block *, struct _Unwind_Context *, void *);
+  _Unwind_Reason_Code _Unwind_ForcedUnwind (_Unwind_Control_Block *,
+					    _Unwind_Stop_Fn, void *);
+  /* @@@ Use unwind data to perform a stack backtrace.  The trace callback
+     is called for every stack frame in the call chain, but no cleanup
+     actions are performed.  */
+  typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn) (_Unwind_Context *, void *);
+  _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn,
+					void*);
+
+  _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *);
+  void _Unwind_Complete(_Unwind_Control_Block *ucbp);
+  void _Unwind_DeleteException (_Unwind_Exception *);
+
+  _Unwind_Reason_Code __gnu_unwind_frame (_Unwind_Control_Block *,
+					  _Unwind_Context *);
+  _Unwind_Reason_Code __gnu_unwind_execute (_Unwind_Context *,
+					    __gnu_unwind_state *);
+
+  /* Decode an R_ARM_TARGET2 relocation.  */
+  static inline _Unwind_Word
+  _Unwind_decode_target2 (_Unwind_Word ptr)
+    {
+      _Unwind_Word tmp;
+
+      tmp = *(_Unwind_Word *) ptr;
+      /* Zero values are always NULL.  */
+      if (!tmp)
+	return 0;
+
+#if (defined(linux) && !defined(__uClinux__)) || defined(__NetBSD__)
+      /* Pc-relative indirect.  */
+      tmp += ptr;
+      tmp = *(_Unwind_Word *) tmp;
+#elif defined(__symbian__) || defined(__uClinux__)
+      /* Absolute pointer.  Nothing more to do.  */
+#else
+      /* Pc-relative pointer.  */
+      tmp += ptr;
+#endif
+      return tmp;
+    }
+
+  static inline _Unwind_Word
+  _Unwind_GetGR (_Unwind_Context *context, int regno)
+    {
+      _uw val;
+      _Unwind_VRS_Get (context, _UVRSC_CORE, regno, _UVRSD_UINT32, &val);
+      return val;
+    }
+
+  /* Return the address of the instruction, not the actual IP value.  */
+#define _Unwind_GetIP(context) \
+  (_Unwind_GetGR (context, 15) & ~(_Unwind_Word)1)
+
+#define _Unwind_GetIPInfo(context, ip_before_insn) \
+  (*ip_before_insn = 0, _Unwind_GetGR (context, 15) & ~(_Unwind_Word)1)
+
+  static inline void
+  _Unwind_SetGR (_Unwind_Context *context, int regno, _Unwind_Word val)
+    {
+      _Unwind_VRS_Set (context, _UVRSC_CORE, regno, _UVRSD_UINT32, &val);
+    }
+
+  /* The dwarf unwinder doesn't understand arm/thumb state.  We assume the
+     landing pad uses the same instruction set as the call site.  */
+#define _Unwind_SetIP(context, val) \
+  _Unwind_SetGR (context, 15, val | (_Unwind_GetGR (context, 15) & 1))
+
+/* leb128 type numbers have a potentially unlimited size.
+   The target of the following definitions of _sleb128_t and _uleb128_t
+   is to have efficient data types large enough to hold the leb128 type
+   numbers used in the unwind code.  */
+typedef long _sleb128_t;
+typedef unsigned long _uleb128_t;
+
+#ifdef __cplusplus
+}   /* extern "C" */
+#endif
+
+#endif /* defined UNWIND_ARM_H */
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
new file mode 100644
index 000000000..c27c41411
--- /dev/null
+++ b/gcc/config/arm/vec-common.md
@@ -0,0 +1,110 @@
+;; Machine Description for shared bits common to IWMMXT and Neon.
+;; Copyright (C) 2006, 2007, 2010 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Vector Moves
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
+	(match_operand:VALL 1 "general_operand" ""))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) != REG)
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+      else if (TARGET_NEON && CONSTANT_P (operands[1]))
+	{
+	  operands[1] = neon_make_constant (operands[1]);
+	  gcc_assert (operands[1] != NULL_RTX);
+	}
+    }
+})
+
+;; Vector arithmetic. Expanders are blank, then unnamed insns implement
+;; patterns separately for IWMMXT and Neon.
+
+(define_expand "add<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (plus:VALL (match_operand:VALL 1 "s_register_operand" "")
+                   (match_operand:VALL 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (minus:VALL (match_operand:VALL 1 "s_register_operand" "")
+                    (match_operand:VALL 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VALLW 0 "s_register_operand" "")
+        (mult:VALLW (match_operand:VALLW 1 "s_register_operand" "")
+		    (match_operand:VALLW 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
+{
+})
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VALLW 0 "s_register_operand" "")
+	(smin:VALLW (match_operand:VALLW 1 "s_register_operand" "")
+		    (match_operand:VALLW 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "umin<mode>3"
+  [(set (match_operand:VINTW 0 "s_register_operand" "")
+	(umin:VINTW (match_operand:VINTW 1 "s_register_operand" "")
+		    (match_operand:VINTW 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VALLW 0 "s_register_operand" "")
+	(smax:VALLW (match_operand:VALLW 1 "s_register_operand" "")
+		    (match_operand:VALLW 2 "s_register_operand" "")))]
+  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
+		    || flag_unsafe_math_optimizations))
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "umax<mode>3"
+  [(set (match_operand:VINTW 0 "s_register_operand" "")
+	(umax:VINTW (match_operand:VINTW 1 "s_register_operand" "")
+		    (match_operand:VINTW 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
new file mode 100644
index 000000000..1ac2d0c2d
--- /dev/null
+++ b/gcc/config/arm/vfp.md
@@ -0,0 +1,1153 @@
+;; ARM VFP instruction patterns
+;; Copyright (C) 2003, 2005, 2006, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; Additional register numbers
+(define_constants
+  [(VFPCC_REGNUM 127)]
+)
+
+;; The VFP "type" attributes differ from those used in the FPA model.
+;; fcpys	Single precision cpy.
+;; ffariths	Single precision abs, neg.
+;; ffarithd	Double precision abs, neg, cpy.
+;; fadds	Single precision add/sub.
+;; faddd	Double precision add/sub.
+;; fconsts	Single precision load immediate.
+;; fconstd	Double precision load immediate.
+;; fcmps	Single precision comparison.
+;; fcmpd	Double precision comparison.
+;; fmuls	Single precision multiply.
+;; fmuld	Double precision multiply.
+;; fmacs	Single precision multiply-accumulate.
+;; fmacd	Double precision multiply-accumulate.
+;; fdivs	Single precision sqrt or division.
+;; fdivd	Double precision sqrt or division.
+;; f_flag	fmstat operation
+;; f_load[sd]	Floating point load from memory.
+;; f_store[sd]	Floating point store to memory.
+;; f_2_r	Transfer vfp to arm reg.
+;; r_2_f	Transfer arm to vfp reg.
+;; f_cvt	Convert floating<->integral
+
+;; SImode moves
+;; ??? For now do not allow loading constants into vfp regs.  This causes
+;; problems because small constants get converted into adds.
+(define_insn "*arm_movsi_vfp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv")
+      (match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))]
+  "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
+   && (   s_register_operand (operands[0], SImode)
+       || s_register_operand (operands[1], SImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 1:
+      return \"mov%?\\t%0, %1\";
+    case 2:
+      return \"mvn%?\\t%0, #%B1\";
+    case 3:
+      return \"movw%?\\t%0, %1\";
+    case 4:
+      return \"ldr%?\\t%0, %1\";
+    case 5:
+      return \"str%?\\t%1, %0\";
+    case 6:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 7:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 8:
+      return \"fcpys%?\\t%0, %1\\t%@ int\";
+    case 9: case 10:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*")
+   (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
+)
+
+;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
+;; high/low register alternatives for loads and stores here.
+(define_insn "*thumb2_movsi_vfp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t,  *Uv")
+	(match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
+  "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
+   && (   s_register_operand (operands[0], SImode)
+       || s_register_operand (operands[1], SImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 1:
+      return \"mov%?\\t%0, %1\";
+    case 2:
+      return \"mvn%?\\t%0, #%B1\";
+    case 3:
+      return \"movw%?\\t%0, %1\";
+    case 4:
+    case 5:
+      return \"ldr%?\\t%0, %1\";
+    case 6:
+    case 7:
+      return \"str%?\\t%1, %0\";
+    case 8:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 9:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 10:
+      return \"fcpys%?\\t%0, %1\\t%@ int\";
+    case 11: case 12:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
+   (set_attr "pool_range"     "*,*,*,*,1020,4096,*,*,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
+)
+
+
+;; DImode moves
+
+(define_insn "*arm_movdi_vfp"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv")
+	(match_operand:DI 1 "di_operand"              "rIK,mi,r,r,w,w,Uvi,w"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: 
+      return \"#\";
+    case 1:
+    case 2:
+      return output_move_double (operands);
+    case 3:
+      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 4:
+      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 5:
+      if (TARGET_VFP_SINGLE)
+	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+      else
+	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+    case 6: case 7:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
+			       (eq_attr "alternative" "5")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE")
+				     (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
+   (set_attr "predicable"    "yes")
+   (set_attr "pool_range"     "*,1020,*,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")]
+)
+
+(define_insn "*thumb2_movdi_vfp"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv")
+	(match_operand:DI 1 "di_operand"              "rIK,mi,r,r,w,w,Uvi,w"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 1: case 2:
+      return (output_move_double (operands));
+    case 3:
+      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 4:
+      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 5:
+      if (TARGET_VFP_SINGLE)
+	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+      else
+	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
+    case 6: case 7:
+      return output_move_vfp (operands);
+    default:
+      abort ();
+    }
+  "
+  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
+   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
+			       (eq_attr "alternative" "5")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE")
+				     (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
+   (set_attr "pool_range"     "*,4096,*,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,   0,*,*,*,*,1008,*")]
+)
+
+;; HFmode moves
+(define_insn "*movhf_vfp_neon"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* S register from memory */
+      return \"vld1.16\\t{%z0}, %A1\";
+    case 1:     /* memory from S register */
+      return \"vst1.16\\t{%z1}, %A0\";
+    case 2:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 3:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 4:	/* S register from S register */
+      return \"fcpys\\t%0, %1\";
+    case 5:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 6:	/* S register from ARM register */
+      return \"fmsr\\t%0, %1\";
+    case 7:	/* ARM register from S register */
+      return \"fmrs\\t%0, %1\";
+    case 8:	/* ARM register from constant */
+      {
+        REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*")
+   (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
+)
+
+;; FP16 without element load/store instructions.
+(define_insn "*movhf_vfp"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,t,r,r,t,F"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 1:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* S register from S register */
+      return \"fcpys\\t%0, %1\";
+    case 3:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 4:	/* S register from ARM register */
+      return \"fmsr\\t%0, %1\";
+    case 5:	/* ARM register from S register */
+      return \"fmrs\\t%0, %1\";
+    case 6:	/* ARM register from constant */
+      {
+        REAL_VALUE_TYPE r;
+	long bits;
+	rtx ops[4];
+
+        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	bits = real_to_target (NULL, &r, HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*")
+   (set_attr "length" "4,4,4,4,4,4,8")]
+)
+
+
+;; SFmode moves
+;; Disparage the w<->r cases because reloading an invalid address is
+;; preferable to loading the value via integer registers.
+
+(define_insn "*movsf_vfp"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   s_register_operand (operands[0], SFmode)
+       || s_register_operand (operands[1], SFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"fmsr%?\\t%0, %1\";
+    case 1:
+      return \"fmrs%?\\t%0, %1\";
+    case 2:
+      return \"fconsts%?\\t%0, #%G1\";
+    case 3: case 4:
+      return output_move_vfp (operands);
+    case 5:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 6:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 7:
+      return \"fcpys%?\\t%0, %1\";
+    case 8:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type"
+     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+   (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
+   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
+)
+
+(define_insn "*thumb2_movsf_vfp"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   s_register_operand (operands[0], SFmode)
+       || s_register_operand (operands[1], SFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"fmsr%?\\t%0, %1\";
+    case 1:
+      return \"fmrs%?\\t%0, %1\";
+    case 2:
+      return \"fconsts%?\\t%0, #%G1\";
+    case 3: case 4:
+      return output_move_vfp (operands);
+    case 5:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 6:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 7:
+      return \"fcpys%?\\t%0, %1\";
+    case 8:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "type"
+     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
+   (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
+   (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+)
+
+
+;; DFmode moves
+
+(define_insn "*movdf_vfp"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  "*
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+      case 1:
+	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+      case 2:
+	gcc_assert (TARGET_VFP_DOUBLE);
+        return \"fconstd%?\\t%P0, #%G1\";
+      case 3: case 4:
+	return output_move_double (operands);
+      case 5: case 6:
+	return output_move_vfp (operands);
+      case 7:
+	if (TARGET_VFP_SINGLE)
+	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	else
+	  return \"fcpyd%?\\t%P0, %P1\";
+      case 8:
+        return \"#\";
+      default:
+	gcc_unreachable ();
+      }
+    }
+  "
+  [(set_attr "type"
+     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+			       (eq_attr "alternative" "7")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE")
+				     (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
+   (set_attr "predicable" "yes")
+   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
+)
+
+(define_insn "*thumb2_movdf_vfp"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "*
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+      case 1:
+	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+      case 2:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"fconstd%?\\t%P0, #%G1\";
+      case 3: case 4: case 8:
+	return output_move_double (operands);
+      case 5: case 6:
+	return output_move_vfp (operands);
+      case 7:
+	if (TARGET_VFP_SINGLE)
+	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	else
+	  return \"fcpyd%?\\t%P0, %P1\";
+      default:
+	abort ();
+      }
+    }
+  "
+  [(set_attr "type"
+     "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
+   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
+			       (eq_attr "alternative" "7")
+				(if_then_else
+				 (eq (symbol_ref "TARGET_VFP_SINGLE")
+				     (const_int 1))
+				 (const_int 8)
+				 (const_int 4))]
+			      (const_int 4)))
+   (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
+)
+
+
+;; Conditional move patterns
+
+(define_insn "*movsfcc_vfp"
+  [(set (match_operand:SF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
+	(if_then_else:SF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fcpys%D3\\t%0, %2
+   fcpys%d3\\t%0, %1
+   fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
+   fmsr%D3\\t%0, %2
+   fmsr%d3\\t%0, %1
+   fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
+   fmrs%D3\\t%0, %2
+   fmrs%d3\\t%0, %1
+   fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,4,8,4,4,8,4,4,8")
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+)
+
+(define_insn "*thumb2_movsfcc_vfp"
+  [(set (match_operand:SF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
+	(if_then_else:SF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   it\\t%D3\;fcpys%D3\\t%0, %2
+   it\\t%d3\;fcpys%d3\\t%0, %1
+   ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
+   it\\t%D3\;fmsr%D3\\t%0, %2
+   it\\t%d3\;fmsr%d3\\t%0, %1
+   ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
+   it\\t%D3\;fmrs%D3\\t%0, %2
+   it\\t%d3\;fmrs%d3\\t%0, %1
+   ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "6,6,10,6,6,10,6,6,10")
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+)
+
+(define_insn "*movdfcc_vfp"
+  [(set (match_operand:DF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
+	(if_then_else:DF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
+	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
+  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fcpyd%D3\\t%P0, %P2
+   fcpyd%d3\\t%P0, %P1
+   fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
+   fmdrr%D3\\t%P0, %Q2, %R2
+   fmdrr%d3\\t%P0, %Q1, %R1
+   fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
+   fmrrd%D3\\t%Q0, %R0, %P2
+   fmrrd%d3\\t%Q0, %R0, %P1
+   fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "4,4,8,4,4,8,4,4,8")
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+)
+
+(define_insn "*thumb2_movdfcc_vfp"
+  [(set (match_operand:DF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
+	(if_then_else:DF
+	  (match_operator   3 "arm_comparison_operator"
+	    [(match_operand 4 "cc_register" "") (const_int 0)])
+	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
+	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   it\\t%D3\;fcpyd%D3\\t%P0, %P2
+   it\\t%d3\;fcpyd%d3\\t%P0, %P1
+   ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
+   it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2
+   it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1
+   ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
+   it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2
+   it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1
+   ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   [(set_attr "conds" "use")
+    (set_attr "length" "6,6,10,6,6,10,6,6,10")
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+)
+
+
+;; Sign manipulation functions
+
+(define_insn "*abssf2_vfp"
+  [(set (match_operand:SF	  0 "s_register_operand" "=t")
+	(abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fabss%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "ffariths")]
+)
+
+(define_insn "*absdf2_vfp"
+  [(set (match_operand:DF	  0 "s_register_operand" "=w")
+	(abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fabsd%?\\t%P0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "ffarithd")]
+)
+
+(define_insn "*negsf2_vfp"
+  [(set (match_operand:SF	  0 "s_register_operand" "=t,?r")
+	(neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fnegs%?\\t%0, %1
+   eor%?\\t%0, %1, #-2147483648"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "ffariths")]
+)
+
+(define_insn_and_split "*negdf2_vfp"
+  [(set (match_operand:DF	  0 "s_register_operand" "=w,?r,?r")
+	(neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fnegd%?\\t%P0, %P1
+   #
+   #"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
+   && arm_general_register_operand (operands[0], DFmode)"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    {
+      operands[0] = gen_highpart (SImode, operands[0]);
+      operands[1] = gen_rtx_XOR (SImode, operands[0], GEN_INT (0x80000000));
+    }
+  else
+    {
+      rtx in_hi, in_lo, out_hi, out_lo;
+
+      in_hi = gen_rtx_XOR (SImode, gen_highpart (SImode, operands[1]),
+			   GEN_INT (0x80000000));
+      in_lo = gen_lowpart (SImode, operands[1]);
+      out_hi = gen_highpart (SImode, operands[0]);
+      out_lo = gen_lowpart (SImode, operands[0]);
+
+      if (REGNO (in_lo) == REGNO (out_hi))
+        {
+          emit_insn (gen_rtx_SET (SImode, out_lo, in_lo));
+	  operands[0] = out_hi;
+          operands[1] = in_hi;
+        }
+      else
+        {
+          emit_insn (gen_rtx_SET (SImode, out_hi, in_hi));
+	  operands[0] = out_lo;
+          operands[1] = in_lo;
+        }
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,4,8")
+   (set_attr "type" "ffarithd")]
+)
+
+
+;; Arithmetic insns
+
+(define_insn "*addsf3_vfp"
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(plus:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fadds%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fadds")]
+)
+
+(define_insn "*adddf3_vfp"
+  [(set (match_operand:DF	   0 "s_register_operand" "=w")
+	(plus:DF (match_operand:DF 1 "s_register_operand" "w")
+		 (match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "faddd%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "faddd")]
+)
+
+
+(define_insn "*subsf3_vfp"
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(minus:SF (match_operand:SF 1 "s_register_operand" "t")
+		  (match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fsubs%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fadds")]
+)
+
+(define_insn "*subdf3_vfp"
+  [(set (match_operand:DF	    0 "s_register_operand" "=w")
+	(minus:DF (match_operand:DF 1 "s_register_operand" "w")
+		  (match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fsubd%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "faddd")]
+)
+
+
+;; Division insns
+
+(define_insn "*divsf3_vfp"
+  [(set (match_operand:SF	  0 "s_register_operand" "+t")
+	(div:SF (match_operand:SF 1 "s_register_operand" "t")
+		(match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fdivs%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fdivs")]
+)
+
+(define_insn "*divdf3_vfp"
+  [(set (match_operand:DF	  0 "s_register_operand" "+w")
+	(div:DF (match_operand:DF 1 "s_register_operand" "w")
+		(match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fdivd%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fdivd")]
+)
+
+
+;; Multiplication insns
+
+(define_insn "*mulsf3_vfp"
+  [(set (match_operand:SF	   0 "s_register_operand" "+t")
+	(mult:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF 2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmuls%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmuls")]
+)
+
+(define_insn "*muldf3_vfp"
+  [(set (match_operand:DF	   0 "s_register_operand" "+w")
+	(mult:DF (match_operand:DF 1 "s_register_operand" "w")
+		 (match_operand:DF 2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fmuld%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmuld")]
+)
+
+
+(define_insn "*mulsf3negsf_vfp"
+  [(set (match_operand:SF		   0 "s_register_operand" "+t")
+	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
+		 (match_operand:SF	   2 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fnmuls%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmuls")]
+)
+
+(define_insn "*muldf3negdf_vfp"
+  [(set (match_operand:DF		   0 "s_register_operand" "+w")
+	(mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w"))
+		 (match_operand:DF	   2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fnmuld%?\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmuld")]
+)
+
+
+;; Multiply-accumulate insns
+
+;; 0 = 1 * 2 + 0
+(define_insn "*mulsf3addsf_vfp"
+  [(set (match_operand:SF		    0 "s_register_operand" "=t")
+	(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			  (match_operand:SF 3 "s_register_operand" "t"))
+		 (match_operand:SF	    1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*muldf3adddf_vfp"
+  [(set (match_operand:DF		    0 "s_register_operand" "=w")
+	(plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w")
+			  (match_operand:DF 3 "s_register_operand" "w"))
+		 (match_operand:DF	    1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fmacd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacd")]
+)
+
+;; 0 = 1 * 2 - 0
+(define_insn "*mulsf3subsf_vfp"
+  [(set (match_operand:SF		     0 "s_register_operand" "=t")
+	(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			   (match_operand:SF 3 "s_register_operand" "t"))
+		  (match_operand:SF	     1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmscs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*muldf3subdf_vfp"
+  [(set (match_operand:DF		     0 "s_register_operand" "=w")
+	(minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w")
+			   (match_operand:DF 3 "s_register_operand" "w"))
+		  (match_operand:DF	     1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fmscd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacd")]
+)
+
+;; 0 = -(1 * 2) + 0
+(define_insn "*mulsf3negsfaddsf_vfp"
+  [(set (match_operand:SF		     0 "s_register_operand" "=t")
+	(minus:SF (match_operand:SF	     1 "s_register_operand" "0")
+		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			   (match_operand:SF 3 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fnmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*fmuldf3negdfadddf_vfp"
+  [(set (match_operand:DF		     0 "s_register_operand" "=w")
+	(minus:DF (match_operand:DF	     1 "s_register_operand" "0")
+		  (mult:DF (match_operand:DF 2 "s_register_operand" "w")
+			   (match_operand:DF 3 "s_register_operand" "w"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fnmacd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacd")]
+)
+
+
+;; 0 = -(1 * 2) - 0
+(define_insn "*mulsf3negsfsubsf_vfp"
+  [(set (match_operand:SF		      0 "s_register_operand" "=t")
+	(minus:SF (mult:SF
+		    (neg:SF (match_operand:SF 2 "s_register_operand" "t"))
+		    (match_operand:SF	      3 "s_register_operand" "t"))
+		  (match_operand:SF	      1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fnmscs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacs")]
+)
+
+(define_insn "*muldf3negdfsubdf_vfp"
+  [(set (match_operand:DF		      0 "s_register_operand" "=w")
+	(minus:DF (mult:DF
+		    (neg:DF (match_operand:DF 2 "s_register_operand" "w"))
+		    (match_operand:DF	      3 "s_register_operand" "w"))
+		  (match_operand:DF	      1 "s_register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fnmscd%?\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fmacd")]
+)
+
+
+;; Conversion routines
+
+(define_insn "*extendsfdf2_vfp"
+  [(set (match_operand:DF		   0 "s_register_operand" "=w")
+	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fcvtds%?\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*truncdfsf2_vfp"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fcvtsd%?\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "extendhfsf2"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "vcvtb%?.f32.f16\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF		   0 "s_register_operand" "=t")
+	(float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "vcvtb%?.f16.f32\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*truncsisf2_vfp"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "ftosizs%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*truncsidf2_vfp"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "ftosizd%?\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "ftouizs%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "fixuns_truncdfsi2"
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "ftouizd%?\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+
+(define_insn "*floatsisf2_vfp"
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(float:SF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fsitos%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*floatsidf2_vfp"
+  [(set (match_operand:DF	    0 "s_register_operand" "=w")
+	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fsitod%?\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fuitos%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "floatunssidf2"
+  [(set (match_operand:DF	    0 "s_register_operand" "=w")
+	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fuitod%?\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "f_cvt")]
+)
+
+
+;; Sqrt insns.
+
+(define_insn "*sqrtsf2_vfp"
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fsqrts%?\\t%0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fdivs")]
+)
+
+(define_insn "*sqrtdf2_vfp"
+  [(set (match_operand:DF	   0 "s_register_operand" "=w")
+	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "fsqrtd%?\\t%P0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fdivd")]
+)
+
+
+;; Patterns to split/copy vfp condition flags.
+
+(define_insn "*movcc_vfp"
+  [(set (reg CC_REGNUM)
+	(reg VFPCC_REGNUM))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "fmstat%?"
+  [(set_attr "conds" "set")
+   (set_attr "type" "f_flag")]
+)
+
+(define_insn_and_split "*cmpsf_split_vfp"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t")
+		      (match_operand:SF 1 "vfp_compare_operand" "tG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_dup 0)
+		      (match_dup 1)))
+   (set (reg:CCFP CC_REGNUM)
+	(reg:CCFP VFPCC_REGNUM))]
+  ""
+)
+
+(define_insn_and_split "*cmpsf_trap_split_vfp"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t")
+		       (match_operand:SF 1 "vfp_compare_operand" "tG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_dup 0)
+		       (match_dup 1)))
+   (set (reg:CCFPE CC_REGNUM)
+	(reg:CCFPE VFPCC_REGNUM))]
+  ""
+)
+
+(define_insn_and_split "*cmpdf_split_vfp"
+  [(set (reg:CCFP CC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "s_register_operand"  "w")
+		      (match_operand:DF 1 "vfp_compare_operand" "wG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_dup 0)
+		       (match_dup 1)))
+   (set (reg:CCFP CC_REGNUM)
+	(reg:CCFP VFPCC_REGNUM))]
+  ""
+)
+
+(define_insn_and_split "*cmpdf_trap_split_vfp"
+  [(set (reg:CCFPE CC_REGNUM)
+	(compare:CCFPE (match_operand:DF 0 "s_register_operand"  "w")
+		       (match_operand:DF 1 "vfp_compare_operand" "wG")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "#"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_dup 0)
+		       (match_dup 1)))
+   (set (reg:CCFPE CC_REGNUM)
+	(reg:CCFPE VFPCC_REGNUM))]
+  ""
+)
+
+
+;; Comparison patterns
+
+(define_insn "*cmpsf_vfp"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t,t")
+		      (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fcmps%?\\t%0, %1
+   fcmpzs%?\\t%0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fcmps")]
+)
+
+(define_insn "*cmpsf_trap_vfp"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t,t")
+		       (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "@
+   fcmpes%?\\t%0, %1
+   fcmpezs%?\\t%0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fcmps")]
+)
+
+(define_insn "*cmpdf_vfp"
+  [(set (reg:CCFP VFPCC_REGNUM)
+	(compare:CCFP (match_operand:DF 0 "s_register_operand"  "w,w")
+		      (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fcmpd%?\\t%P0, %P1
+   fcmpzd%?\\t%P0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fcmpd")]
+)
+
+(define_insn "*cmpdf_trap_vfp"
+  [(set (reg:CCFPE VFPCC_REGNUM)
+	(compare:CCFPE (match_operand:DF 0 "s_register_operand"  "w,w")
+		       (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "@
+   fcmped%?\\t%P0, %P1
+   fcmpezd%?\\t%P0"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "fcmpd")]
+)
+
+
+;; Store multiple insn used in function prologue.
+
+(define_insn "*push_multi_vfp"
+  [(match_parallel 2 "multi_register_push"
+    [(set (match_operand:BLK 0 "memory_operand" "=m")
+	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
+		      UNSPEC_PUSH_MULT))])]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "* return vfp_output_fstmd (operands);"
+  [(set_attr "type" "f_stored")]
+)
+
+
+;; Unimplemented insns:
+;; fldm*
+;; fstm*
+;; fmdhr et al (VFPv1)
+;; Support for xD (single precision only) variants.
+;; fmrrs, fmsrr
diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md
new file mode 100644
index 000000000..8f863fd70
--- /dev/null
+++ b/gcc/config/arm/vfp11.md
@@ -0,0 +1,92 @@
+;; ARM VFP11 pipeline description
+;; Copyright (C) 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "vfp11")
+
+;; There are 3 pipelines in the VFP11 unit.
+;;
+;; - A 8-stage FMAC pipeline (7 execute + writeback) with forward from
+;;   fourth stage for simple operations.
+;;
+;; - A 5-stage DS pipeline (4 execute + writeback) for divide/sqrt insns.
+;;   These insns also uses first execute stage of FMAC pipeline.
+;;
+;; - A 4-stage LS pipeline (execute + 2 memory + writeback) with forward from
+;;   second memory stage for loads.
+
+;; We do not model Write-After-Read hazards.
+;; We do not do write scheduling with the arm core, so it is only necessary
+;; to model the first stage of each pipeline
+;; ??? Need to model LS pipeline properly for load/store multiple?
+;; We do not model fmstat properly.  This could be done by modeling pipelines
+;; properly and defining an absence set between a dummy fmstat unit and all
+;; other vfp units.
+
+(define_cpu_unit "fmac" "vfp11")
+
+(define_cpu_unit "ds" "vfp11")
+
+(define_cpu_unit "vfp_ls" "vfp11")
+
+(define_cpu_unit "fmstat" "vfp11")
+
+(exclusion_set "fmac,ds" "fmstat")
+
+(define_insn_reservation "vfp_ffarith" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
+ "fmac")
+
+(define_insn_reservation "vfp_farith" 8
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs"))
+ "fmac")
+
+(define_insn_reservation "vfp_fmul" 9
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fmuld,fmacd"))
+ "fmac*2")
+
+(define_insn_reservation "vfp_fdivs" 19
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fdivs"))
+ "ds*15")
+
+(define_insn_reservation "vfp_fdivd" 33
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "fdivd"))
+ "fmac+ds*29")
+
+;; Moves to/from arm regs also use the load/store pipeline.
+(define_insn_reservation "vfp_fload" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "f_loads,f_loadd,r_2_f"))
+ "vfp_ls")
+
+(define_insn_reservation "vfp_fstore" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "f_stores,f_stored,f_2_r"))
+ "vfp_ls")
+
+(define_insn_reservation "vfp_to_cpsr" 4
+ (and (eq_attr "generic_vfp" "yes")
+      (eq_attr "type" "f_flag"))
+ "fmstat,vfp_ls*3")
+
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
new file mode 100644
index 000000000..3ceaed903
--- /dev/null
+++ b/gcc/config/arm/vxworks.h
@@ -0,0 +1,113 @@
+/* Definitions of target machine for GCC,
+   for ARM with targetting the VXWorks run time environment. 
+   Copyright (C) 1999, 2000, 2003, 2004, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   Contributed by: Mike Stump <mrs@wrs.com>
+   Brought up to date by CodeSourcery, LLC.
+   
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do {						\
+    if (TARGET_BIG_END)				\
+      builtin_define ("ARMEB");			\
+    else					\
+      builtin_define ("ARMEL");			\
+						\
+    if (arm_arch_xscale)			\
+      builtin_define ("CPU=XSCALE");		\
+    else if (arm_arch5)				\
+      builtin_define ("CPU=ARMARCH5");		\
+    else if (arm_arch4)				\
+      {						\
+	if (thumb_code)				\
+	  builtin_define ("CPU=ARMARCH4_T");	\
+	else					\
+	  builtin_define ("CPU=ARMARCH4");	\
+      }						\
+    VXWORKS_OS_CPP_BUILTINS ();			\
+  } while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+/* Subsume the arm/elf.h definition, and add RTP hooks.  */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "-D__ELF__" VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef  CC1_SPEC
+#define CC1_SPEC							\
+"%{tstrongarm:-mlittle-endian -mcpu=strongarm ;				\
+   t4:        -mlittle-endian -march=armv4 ;				\
+   t4be:      -mbig-endian -march=armv4 ;				\
+   t4t:       -mthumb -mthumb-interwork -mlittle-endian -march=armv4t ;	\
+   t4tbe:     -mthumb -mthumb-interwork -mbig-endian -march=armv4t ;	\
+   t5:        -mlittle-endian -march=armv5 ;				\
+   t5be:      -mbig-endian -march=armv5 ;				\
+   t5t:       -mthumb -mthumb-interwork -mlittle-endian -march=armv5 ;	\
+   t5tbe:     -mthumb -mthumb-interwork -mbig-endian -march=armv5 ;	\
+   txscale:   -mlittle-endian -mcpu=xscale ;				\
+   txscalebe: -mbig-endian -mcpu=xscale ;				\
+            : -march=armv4}"
+
+/* Pass -EB for big-endian targets.  */
+#define VXWORKS_ENDIAN_SPEC \
+  "%{mbig-endian|t4be|t4tbe|t5be|t5tbe|txscalebe:-EB}"
+
+#undef SUBTARGET_EXTRA_ASM_SPEC
+#define SUBTARGET_EXTRA_ASM_SPEC VXWORKS_ENDIAN_SPEC
+
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_ENDIAN_SPEC
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (ARM/VxWorks)", stderr);
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#define FPUTYPE_DEFAULT "vfp"
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+/* We want to be compatible with a version of "2.96" at one point in
+   the past before this macro was changed.  */
+#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY
+#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8
+
+/* The kernel loader does not allow relocations to overflow, so we
+   cannot allow arbitrary relocation addends in kernel modules or RTP
+   executables.  Also, the dynamic loader uses the resolved relocation
+   value to distinguish references to the text and data segments, so we
+   cannot allow arbitrary offsets for shared libraries either.  */
+#undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
+#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
+
+#undef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/vxworks.opt b/gcc/config/arm/vxworks.opt
new file mode 100644
index 000000000..bc8478391
--- /dev/null
+++ b/gcc/config/arm/vxworks.opt
@@ -0,0 +1,60 @@
+; ARM VxWorks options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+t4
+Driver
+
+t4be
+Driver
+
+t4t
+Driver
+
+t4tbe
+Driver
+
+t5
+Driver
+
+t5be
+Driver
+
+t5t
+Driver
+
+t5tbe
+Driver
+
+tstrongarm
+Driver
+
+txscale
+Driver
+
+txscalebe
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/arm/wince-pe.h b/gcc/config/arm/wince-pe.h
new file mode 100644
index 000000000..ffaa0c6c3
--- /dev/null
+++ b/gcc/config/arm/wince-pe.h
@@ -0,0 +1,26 @@
+/* Definitions of target machine for GNU compiler, for ARM with WINCE-PE obj format.
+   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Nick Clifton <nickc@redhat.com>
+   
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT	(MASK_NOP_FUN_DLLIMPORT)
+
+#undef  MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+  { "marm", "mlittle-endian", "msoft-float", "mno-thumb-interwork" }  
diff --git a/gcc/config/avr/avr-c.c b/gcc/config/avr/avr-c.c
new file mode 100644
index 000000000..05e8e8b30
--- /dev/null
+++ b/gcc/config/avr/avr-c.c
@@ -0,0 +1,85 @@
+/* Copyright (C) 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Anatoly Sokolov (aesok@post.ru)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+
+/* Not included in avr.c since this requires C front end.  */
+
+/* Worker function for TARGET_CPU_CPP_BUILTINS.  */
+
+void
+avr_cpu_cpp_builtins (struct cpp_reader *pfile)
+{
+  builtin_define_std ("AVR");
+
+  if (avr_current_arch->macro)
+    cpp_define (pfile, avr_current_arch->macro);
+  if (avr_extra_arch_macro)
+    cpp_define (pfile, avr_extra_arch_macro);
+  if (avr_current_arch->have_elpm)
+    cpp_define (pfile, "__AVR_HAVE_RAMPZ__");
+  if (avr_current_arch->have_elpm)
+    cpp_define (pfile, "__AVR_HAVE_ELPM__");
+  if (avr_current_arch->have_elpmx)
+    cpp_define (pfile, "__AVR_HAVE_ELPMX__");
+  if (avr_current_arch->have_movw_lpmx)
+    {
+      cpp_define (pfile, "__AVR_HAVE_MOVW__");
+      cpp_define (pfile, "__AVR_HAVE_LPMX__");
+    }
+  if (avr_current_arch->asm_only)
+    cpp_define (pfile, "__AVR_ASM_ONLY__");
+  if (avr_current_arch->have_mul)
+    {
+      cpp_define (pfile, "__AVR_ENHANCED__");
+      cpp_define (pfile, "__AVR_HAVE_MUL__");
+    }
+  if (avr_current_arch->have_jmp_call)
+    {
+      cpp_define (pfile, "__AVR_MEGA__");
+      cpp_define (pfile, "__AVR_HAVE_JMP_CALL__");
+    }
+  if (avr_current_arch->have_eijmp_eicall)
+    {
+      cpp_define (pfile, "__AVR_HAVE_EIJMP_EICALL__");
+      cpp_define (pfile, "__AVR_3_BYTE_PC__");
+    }
+  else
+    {
+      cpp_define (pfile, "__AVR_2_BYTE_PC__");
+    }
+
+  if (avr_current_device->short_sp)
+    cpp_define (pfile, "__AVR_HAVE_8BIT_SP__");
+  else
+    cpp_define (pfile, "__AVR_HAVE_16BIT_SP__");
+
+  if (TARGET_NO_INTERRUPTS)
+    cpp_define (pfile, "__NO_INTERRUPTS__");
+}
+
diff --git a/gcc/config/avr/avr-devices.c b/gcc/config/avr/avr-devices.c
new file mode 100755
index 000000000..91ca95e0f
--- /dev/null
+++ b/gcc/config/avr/avr-devices.c
@@ -0,0 +1,229 @@
+/* Copyright (C) 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Anatoly Sokolov (aesok@post.ru)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* List of all known AVR MCU architectures.  */
+
+const struct base_arch_s avr_arch_types[] = {
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, NULL,               "avr2" },  /* unknown device specified */
+  { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=1",   "avr1" },
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=2",   "avr2" },
+  { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=25",  "avr25" },
+  { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=3",   "avr3" },
+  { 0, 0, 1, 0, 1, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=31",  "avr31" },
+  { 0, 0, 1, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=35",  "avr35" },
+  { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=4",   "avr4" },
+  { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0x0060, "__AVR_ARCH__=5",   "avr5" },
+  { 0, 1, 1, 1, 1, 1, 0, 0, 0, 0x0060, "__AVR_ARCH__=51",  "avr51" },
+  { 0, 1, 1, 1, 1, 1, 1, 0, 0, 0x0060, "__AVR_ARCH__=6",   "avr6" }
+};
+
+/* List of all known AVR MCU types - if updated, it has to be kept
+   in sync in several places (FIXME: is there a better way?):
+    - here;
+    - t-avr (MULTILIB_MATCHES);
+    - gas/config/tc-avr.c;
+    - avr-libc.  */
+
+const struct mcu_type_s avr_mcu_types[] = {
+    /* Classic, <= 8K.  */
+  { "avr2",                 ARCH_AVR2, NULL,                        0, 0x0060, "s8515" },
+  { "at90s2313",            ARCH_AVR2, "__AVR_AT90S2313__",         1, 0x0060, "s2313" },
+  { "at90s2323",            ARCH_AVR2, "__AVR_AT90S2323__",         1, 0x0060, "s2323" },
+  { "at90s2333",            ARCH_AVR2, "__AVR_AT90S2333__",         1, 0x0060, "s2333" },
+  { "at90s2343",            ARCH_AVR2, "__AVR_AT90S2343__",         1, 0x0060, "s2343" },
+  { "attiny22",             ARCH_AVR2, "__AVR_ATtiny22__",          1, 0x0060, "tn22" },
+  { "attiny26",             ARCH_AVR2, "__AVR_ATtiny26__",          1, 0x0060, "tn26" },
+  { "at90s4414",            ARCH_AVR2, "__AVR_AT90S4414__",         0, 0x0060, "s4414" },
+  { "at90s4433",            ARCH_AVR2, "__AVR_AT90S4433__",         1, 0x0060, "s4433" },
+  { "at90s4434",            ARCH_AVR2, "__AVR_AT90S4434__",         0, 0x0060, "s4434" },
+  { "at90s8515",            ARCH_AVR2, "__AVR_AT90S8515__",         0, 0x0060, "s8515" },
+  { "at90c8534",            ARCH_AVR2, "__AVR_AT90C8534__",         0, 0x0060, "c8534" },
+  { "at90s8535",            ARCH_AVR2, "__AVR_AT90S8535__",         0, 0x0060, "s8535" },
+    /* Classic + MOVW, <= 8K.  */
+  { "avr25",                ARCH_AVR25, NULL,                       0, 0x0060, "tn85" },
+  { "ata6289",              ARCH_AVR25, "__AVR_ATA6289__",          0, 0x0100, "a6289" },
+  { "attiny13",             ARCH_AVR25, "__AVR_ATtiny13__",         1, 0x0060, "tn13" },
+  { "attiny13a",            ARCH_AVR25, "__AVR_ATtiny13A__",        1, 0x0060, "tn13a" },
+  { "attiny2313",           ARCH_AVR25, "__AVR_ATtiny2313__",       1, 0x0060, "tn2313" },
+  { "attiny2313a",          ARCH_AVR25, "__AVR_ATtiny2313A__",      1, 0x0060, "tn2313a" },
+  { "attiny24",             ARCH_AVR25, "__AVR_ATtiny24__",         1, 0x0060, "tn24" },
+  { "attiny24a",            ARCH_AVR25, "__AVR_ATtiny24A__",        1, 0x0060, "tn24a" },
+  { "attiny4313",           ARCH_AVR25, "__AVR_ATtiny4313__",       0, 0x0060, "tn4313" },
+  { "attiny44",             ARCH_AVR25, "__AVR_ATtiny44__",         0, 0x0060, "tn44" },
+  { "attiny44a",            ARCH_AVR25, "__AVR_ATtiny44A__",        0, 0x0060, "tn44a" },
+  { "attiny84",             ARCH_AVR25, "__AVR_ATtiny84__",         0, 0x0060, "tn84" },
+  { "attiny84a",            ARCH_AVR25, "__AVR_ATtiny84A__",        0, 0x0060, "tn84" },
+  { "attiny25",             ARCH_AVR25, "__AVR_ATtiny25__",         1, 0x0060, "tn25" },
+  { "attiny45",             ARCH_AVR25, "__AVR_ATtiny45__",         0, 0x0060, "tn45" },
+  { "attiny85",             ARCH_AVR25, "__AVR_ATtiny85__",         0, 0x0060, "tn85" },
+  { "attiny261",            ARCH_AVR25, "__AVR_ATtiny261__",        1, 0x0060, "tn261" },
+  { "attiny261a",           ARCH_AVR25, "__AVR_ATtiny261A__",       1, 0x0060, "tn261a" },
+  { "attiny461",            ARCH_AVR25, "__AVR_ATtiny461__",        0, 0x0060, "tn461" },
+  { "attiny461a",           ARCH_AVR25, "__AVR_ATtiny461A__",       0, 0x0060, "tn461a" },
+  { "attiny861",            ARCH_AVR25, "__AVR_ATtiny861__",        0, 0x0060, "tn861" },
+  { "attiny861a",           ARCH_AVR25, "__AVR_ATtiny861A__",       0, 0x0060, "tn861a" },
+  { "attiny43u",            ARCH_AVR25, "__AVR_ATtiny43U__",        0, 0x0060, "tn43u" },
+  { "attiny87",             ARCH_AVR25, "__AVR_ATtiny87__",         0, 0x0100, "tn87" },
+  { "attiny48",             ARCH_AVR25, "__AVR_ATtiny48__",         0, 0x0100, "tn48" },
+  { "attiny88",             ARCH_AVR25, "__AVR_ATtiny88__",         0, 0x0100, "tn88" },
+  { "at86rf401",            ARCH_AVR25, "__AVR_AT86RF401__",        0, 0x0060, "86401" },
+    /* Classic, > 8K, <= 64K.  */
+  { "avr3",                 ARCH_AVR3, NULL,                        0, 0x0060, "43355" },
+  { "at43usb355",           ARCH_AVR3, "__AVR_AT43USB355__",        0, 0x0060, "43355" },
+  { "at76c711",             ARCH_AVR3, "__AVR_AT76C711__",          0, 0x0060, "76711" },
+    /* Classic, == 128K.  */
+  { "avr31",                ARCH_AVR31, NULL,                       0, 0x0060, "m103" },
+  { "atmega103",            ARCH_AVR31, "__AVR_ATmega103__",        0, 0x0060, "m103" },
+  { "at43usb320",           ARCH_AVR31, "__AVR_AT43USB320__",       0, 0x0060, "43320" },
+    /* Classic + MOVW + JMP/CALL.  */
+  { "avr35",                ARCH_AVR35, NULL,                       0, 0x0100, "usb162" },
+  { "at90usb82",            ARCH_AVR35, "__AVR_AT90USB82__",        0, 0x0100, "usb82" },
+  { "at90usb162",           ARCH_AVR35, "__AVR_AT90USB162__",       0, 0x0100, "usb162" },
+  { "atmega8u2",            ARCH_AVR35, "__AVR_ATmega8U2__",        0, 0x0100, "m8u2" },
+  { "atmega16u2",           ARCH_AVR35, "__AVR_ATmega16U2__",       0, 0x0100, "m16u2" },
+  { "atmega32u2",           ARCH_AVR35, "__AVR_ATmega32U2__",       0, 0x0100, "m32u2" },
+  { "attiny167",            ARCH_AVR35, "__AVR_ATtiny167__",        0, 0x0100, "tn167" },
+    /* Enhanced, <= 8K.  */
+  { "avr4",                 ARCH_AVR4, NULL,                        0, 0x0060, "m8" },
+  { "atmega8",              ARCH_AVR4, "__AVR_ATmega8__",           0, 0x0060, "m8" },
+  { "atmega48",             ARCH_AVR4, "__AVR_ATmega48__",          0, 0x0100, "m48" },
+  { "atmega48a",            ARCH_AVR4, "__AVR_ATmega48A__",         0, 0x0100, "m48a" },
+  { "atmega48p",            ARCH_AVR4, "__AVR_ATmega48P__",         0, 0x0100, "m48p" },
+  { "atmega88",             ARCH_AVR4, "__AVR_ATmega88__",          0, 0x0100, "m88" },
+  { "atmega88a",            ARCH_AVR4, "__AVR_ATmega88A__",         0, 0x0100, "m88a" },
+  { "atmega88p",            ARCH_AVR4, "__AVR_ATmega88P__",         0, 0x0100, "m88p" },
+  { "atmega88pa",           ARCH_AVR4, "__AVR_ATmega88PA__",        0, 0x0100, "m88pa" },
+  { "atmega8515",           ARCH_AVR4, "__AVR_ATmega8515__",        0, 0x0060, "m8515" },
+  { "atmega8535",           ARCH_AVR4, "__AVR_ATmega8535__",        0, 0x0060, "m8535" },
+  { "atmega8hva",           ARCH_AVR4, "__AVR_ATmega8HVA__",        0, 0x0100, "m8hva" },
+  { "at90pwm1",             ARCH_AVR4, "__AVR_AT90PWM1__",          0, 0x0100, "90pwm1" },
+  { "at90pwm2",             ARCH_AVR4, "__AVR_AT90PWM2__",          0, 0x0100, "90pwm2" },
+  { "at90pwm2b",            ARCH_AVR4, "__AVR_AT90PWM2B__",         0, 0x0100, "90pwm2b" },
+  { "at90pwm3",             ARCH_AVR4, "__AVR_AT90PWM3__",          0, 0x0100, "90pwm3" },
+  { "at90pwm3b",            ARCH_AVR4, "__AVR_AT90PWM3B__",         0, 0x0100, "90pwm3b" },
+  { "at90pwm81",            ARCH_AVR4, "__AVR_AT90PWM81__",         0, 0x0100, "90pwm81" },
+    /* Enhanced, > 8K, <= 64K.  */
+  { "avr5",                 ARCH_AVR5, NULL,                        0, 0x0060, "m16" },
+  { "atmega16",             ARCH_AVR5, "__AVR_ATmega16__",          0, 0x0060, "m16" },
+  { "atmega16a",            ARCH_AVR5, "__AVR_ATmega16A__",         0, 0x0060, "m16a" },
+  { "atmega161",            ARCH_AVR5, "__AVR_ATmega161__",         0, 0x0060, "m161" },
+  { "atmega162",            ARCH_AVR5, "__AVR_ATmega162__",         0, 0x0100, "m162" },
+  { "atmega163",            ARCH_AVR5, "__AVR_ATmega163__",         0, 0x0060, "m163" },
+  { "atmega164a",           ARCH_AVR5, "__AVR_ATmega164A__",        0, 0x0100, "m164a" },
+  { "atmega164p",           ARCH_AVR5, "__AVR_ATmega164P__",        0, 0x0100, "m164p" },
+  { "atmega165",            ARCH_AVR5, "__AVR_ATmega165__",         0, 0x0100, "m165" },
+  { "atmega165a",           ARCH_AVR5, "__AVR_ATmega165A__",        0, 0x0100, "m165a" },
+  { "atmega165p",           ARCH_AVR5, "__AVR_ATmega165P__",        0, 0x0100, "m165p" },
+  { "atmega168",            ARCH_AVR5, "__AVR_ATmega168__",         0, 0x0100, "m168" },
+  { "atmega168a",           ARCH_AVR5, "__AVR_ATmega168A__",        0, 0x0100, "m168a" },
+  { "atmega168p",           ARCH_AVR5, "__AVR_ATmega168P__",        0, 0x0100, "m168p" },
+  { "atmega169",            ARCH_AVR5, "__AVR_ATmega169__",         0, 0x0100, "m169" },
+  { "atmega169a",           ARCH_AVR5, "__AVR_ATmega169A__",        0, 0x0100, "m169a" },
+  { "atmega169p",           ARCH_AVR5, "__AVR_ATmega169P__",        0, 0x0100, "m169p" },
+  { "atmega169pa",          ARCH_AVR5, "__AVR_ATmega169PA__",       0, 0x0100, "m169pa" },
+  { "atmega32",             ARCH_AVR5, "__AVR_ATmega32__",          0, 0x0060, "m32" },
+  { "atmega323",            ARCH_AVR5, "__AVR_ATmega323__",         0, 0x0060, "m323" },
+  { "atmega324a",           ARCH_AVR5, "__AVR_ATmega324A__",        0, 0x0100, "m324a" },
+  { "atmega324p",           ARCH_AVR5, "__AVR_ATmega324P__",        0, 0x0100, "m324p" },
+  { "atmega324pa",          ARCH_AVR5, "__AVR_ATmega324PA__",       0, 0x0100, "m324pa" },
+  { "atmega325",            ARCH_AVR5, "__AVR_ATmega325__",         0, 0x0100, "m325" },
+  { "atmega325a",           ARCH_AVR5, "__AVR_ATmega325A__",        0, 0x0100, "m325a" },
+  { "atmega325p",           ARCH_AVR5, "__AVR_ATmega325P__",        0, 0x0100, "m325p" },
+  { "atmega3250",           ARCH_AVR5, "__AVR_ATmega3250__",        0, 0x0100, "m3250" },
+  { "atmega3250a",          ARCH_AVR5, "__AVR_ATmega3250A__",       0, 0x0100, "m3250a" },
+  { "atmega3250p",          ARCH_AVR5, "__AVR_ATmega3250P__",       0, 0x0100, "m3250p" },
+  { "atmega328",            ARCH_AVR5, "__AVR_ATmega328__",         0, 0x0100, "m328" },
+  { "atmega328p",           ARCH_AVR5, "__AVR_ATmega328P__",        0, 0x0100, "m328p" },
+  { "atmega329",            ARCH_AVR5, "__AVR_ATmega329__",         0, 0x0100, "m329" },
+  { "atmega329a",           ARCH_AVR5, "__AVR_ATmega329A__",        0, 0x0100, "m329a" },
+  { "atmega329p",           ARCH_AVR5, "__AVR_ATmega329P__",        0, 0x0100, "m329p" },
+  { "atmega329pa",          ARCH_AVR5, "__AVR_ATmega329PA__",       0, 0x0100, "m329pa" },
+  { "atmega3290",           ARCH_AVR5, "__AVR_ATmega3290__",        0, 0x0100, "m3290" },
+  { "atmega3290a",          ARCH_AVR5, "__AVR_ATmega3290A__",       0, 0x0100, "m3290a" },
+  { "atmega3290p",          ARCH_AVR5, "__AVR_ATmega3290P__",       0, 0x0100, "m3290p" },
+  { "atmega406",            ARCH_AVR5, "__AVR_ATmega406__",         0, 0x0100, "m406" },
+  { "atmega64",             ARCH_AVR5, "__AVR_ATmega64__",          0, 0x0100, "m64" },
+  { "atmega640",            ARCH_AVR5, "__AVR_ATmega640__",         0, 0x0200, "m640" },
+  { "atmega644",            ARCH_AVR5, "__AVR_ATmega644__",         0, 0x0100, "m644" },
+  { "atmega644a",           ARCH_AVR5, "__AVR_ATmega644A__",        0, 0x0100, "m644a" },
+  { "atmega644p",           ARCH_AVR5, "__AVR_ATmega644P__",        0, 0x0100, "m644p" },
+  { "atmega644pa",          ARCH_AVR5, "__AVR_ATmega644PA__",       0, 0x0100, "m644pa" },
+  { "atmega645",            ARCH_AVR5, "__AVR_ATmega645__",         0, 0x0100, "m645" },
+  { "atmega645a",           ARCH_AVR5, "__AVR_ATmega645A__",        0, 0x0100, "m645a" },
+  { "atmega645p",           ARCH_AVR5, "__AVR_ATmega645P__",        0, 0x0100, "m645p" },
+  { "atmega6450",           ARCH_AVR5, "__AVR_ATmega6450__",        0, 0x0100, "m6450" },
+  { "atmega6450a",          ARCH_AVR5, "__AVR_ATmega6450A__",       0, 0x0100, "m6450a" },
+  { "atmega6450p",          ARCH_AVR5, "__AVR_ATmega6450P__",       0, 0x0100, "m6450p" },
+  { "atmega649",            ARCH_AVR5, "__AVR_ATmega649__",         0, 0x0100, "m649" },
+  { "atmega649a",           ARCH_AVR5, "__AVR_ATmega649A__",        0, 0x0100, "m649a" },
+  { "atmega649p",           ARCH_AVR5, "__AVR_ATmega649P__",        0, 0x0100, "m649p" },
+  { "atmega6490",           ARCH_AVR5, "__AVR_ATmega6490__",        0, 0x0100, "m6490" },
+  { "atmega16hva",          ARCH_AVR5, "__AVR_ATmega16HVA__",       0, 0x0100, "m16hva" },
+  { "atmega16hva2",         ARCH_AVR5, "__AVR_ATmega16HVA2__",      0, 0x0100, "m16hva2" },
+  { "atmega16hvb",          ARCH_AVR5, "__AVR_ATmega16HVB__",       0, 0x0100, "m16hvb" },
+  { "atmega32hvb",          ARCH_AVR5, "__AVR_ATmega32HVB__",       0, 0x0100, "m32hvb" },
+  { "atmega64hve",          ARCH_AVR5, "__AVR_ATmega64HVE__",       0, 0x0100, "m64hve" },
+  { "at90can32",            ARCH_AVR5, "__AVR_AT90CAN32__",         0, 0x0100, "can32" },
+  { "at90can64",            ARCH_AVR5, "__AVR_AT90CAN64__",         0, 0x0100, "can64" },
+  { "at90pwm216",           ARCH_AVR5, "__AVR_AT90PWM216__",        0, 0x0100, "90pwm216" },
+  { "at90pwm316",           ARCH_AVR5, "__AVR_AT90PWM316__",        0, 0x0100, "90pwm316" },
+  { "atmega32c1",           ARCH_AVR5, "__AVR_ATmega32C1__",        0, 0x0100, "m32c1" },
+  { "atmega64c1",           ARCH_AVR5, "__AVR_ATmega64C1__",        0, 0x0100, "m64c1" },
+  { "atmega16m1",           ARCH_AVR5, "__AVR_ATmega16M1__",        0, 0x0100, "m16m1" },
+  { "atmega32m1",           ARCH_AVR5, "__AVR_ATmega32M1__",        0, 0x0100, "m32m1" },
+  { "atmega64m1",           ARCH_AVR5, "__AVR_ATmega64M1__",        0, 0x0100, "m64m1" },
+  { "atmega16u4",           ARCH_AVR5, "__AVR_ATmega16U4__",        0, 0x0100, "m16u4" },
+  { "atmega32u4",           ARCH_AVR5, "__AVR_ATmega32U4__",        0, 0x0100, "m32u4" },
+  { "atmega32u6",           ARCH_AVR5, "__AVR_ATmega32U6__",        0, 0x0100, "m32u6" },
+  { "at90scr100",           ARCH_AVR5, "__AVR_AT90SCR100__",        0, 0x0100, "90scr100" },
+  { "at90usb646",           ARCH_AVR5, "__AVR_AT90USB646__",        0, 0x0100, "usb646" },
+  { "at90usb647",           ARCH_AVR5, "__AVR_AT90USB647__",        0, 0x0100, "usb647" },
+  { "at94k",                ARCH_AVR5, "__AVR_AT94K__",             0, 0x0060, "at94k" },
+  { "m3000",                ARCH_AVR5, "__AVR_M3000__",             0, 0x1000, "m3000" },
+    /* Enhanced, == 128K.  */
+  { "avr51",                ARCH_AVR51, NULL,                       0, 0x0100, "m128" },
+  { "atmega128",            ARCH_AVR51, "__AVR_ATmega128__",        0, 0x0100, "m128" },
+  { "atmega1280",           ARCH_AVR51, "__AVR_ATmega1280__",       0, 0x0200, "m1280" },
+  { "atmega1281",           ARCH_AVR51, "__AVR_ATmega1281__",       0, 0x0200, "m1281" },
+  { "atmega1284p",          ARCH_AVR51, "__AVR_ATmega1284P__",      0, 0x0100, "m1284p" },
+  { "atmega128rfa1",        ARCH_AVR51, "__AVR_ATmega128RFA1__",    0, 0x0200, "m128rfa1" },
+  { "at90can128",           ARCH_AVR51, "__AVR_AT90CAN128__",       0, 0x0100, "can128" },
+  { "at90usb1286",          ARCH_AVR51, "__AVR_AT90USB1286__",      0, 0x0100, "usb1286" },
+  { "at90usb1287",          ARCH_AVR51, "__AVR_AT90USB1287__",      0, 0x0100, "usb1287" },
+    /* 3-Byte PC.  */
+  { "avr6",                 ARCH_AVR6, NULL,                        0, 0x0200, "m2561" },
+  { "atmega2560",           ARCH_AVR6, "__AVR_ATmega2560__",        0, 0x0200, "m2560" },
+  { "atmega2561",           ARCH_AVR6, "__AVR_ATmega2561__",        0, 0x0200, "m2561" },
+    /* Assembler only.  */
+  { "avr1",                 ARCH_AVR1, NULL,                        0, 0x0060, "s1200" },
+  { "at90s1200",            ARCH_AVR1, "__AVR_AT90S1200__",         0, 0x0060, "s1200" },
+  { "attiny11",             ARCH_AVR1, "__AVR_ATtiny11__",          0, 0x0060, "tn11" },
+  { "attiny12",             ARCH_AVR1, "__AVR_ATtiny12__",          0, 0x0060, "tn12" },
+  { "attiny15",             ARCH_AVR1, "__AVR_ATtiny15__",          0, 0x0060, "tn15" },
+  { "attiny28",             ARCH_AVR1, "__AVR_ATtiny28__",          0, 0x0060, "tn28" },
+    /* End of list.  */
+  { NULL,                   ARCH_UNKNOWN, NULL,                     0,      0, NULL }
+};
+
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
new file mode 100644
index 000000000..06c9254fd
--- /dev/null
+++ b/gcc/config/avr/avr-protos.h
@@ -0,0 +1,121 @@
+/* Prototypes for exported functions defined in avr.c
+   
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+extern int function_arg_regno_p (int r);
+extern void avr_cpu_cpp_builtins (struct cpp_reader * pfile);
+extern int avr_ret_register (void);
+extern enum reg_class avr_regno_reg_class (int r);
+extern void asm_globalize_label (FILE *file, const char *name);
+extern void avr_asm_declare_function_name (FILE *, const char *, tree);
+extern void order_regs_for_local_alloc (void);
+extern int avr_initial_elimination_offset (int from, int to);
+extern int avr_simple_epilogue (void);
+extern void gas_output_limited_string (FILE *file, const char *str);
+extern void gas_output_ascii (FILE *file, const char *str, size_t length);
+extern int avr_hard_regno_rename_ok (unsigned int, unsigned int);
+extern rtx avr_return_addr_rtx (int count, rtx tem);
+
+#ifdef TREE_CODE
+extern void asm_output_external (FILE *file, tree decl, char *name);
+extern int avr_progmem_p (tree decl, tree attributes);
+
+#ifdef RTX_CODE /* inside TREE_CODE */
+extern void init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+				  rtx libname, tree fndecl);
+#endif /* RTX_CODE inside TREE_CODE */
+
+#endif /* TREE_CODE */
+
+#ifdef RTX_CODE
+extern void asm_output_external_libcall (FILE *file, rtx symref);
+extern int compare_diff_p (rtx insn);
+extern const char *output_movqi (rtx insn, rtx operands[], int *l);
+extern const char *output_movhi (rtx insn, rtx operands[], int *l);
+extern const char *out_movqi_r_mr (rtx insn, rtx op[], int *l);
+extern const char *out_movqi_mr_r (rtx insn, rtx op[], int *l);
+extern const char *out_movhi_r_mr (rtx insn, rtx op[], int *l);
+extern const char *out_movhi_mr_r (rtx insn, rtx op[], int *l);
+extern const char *out_movsi_r_mr (rtx insn, rtx op[], int *l);
+extern const char *out_movsi_mr_r (rtx insn, rtx op[], int *l);
+extern const char *output_movsisf (rtx insn, rtx operands[], int *l);
+extern const char *out_tstsi (rtx insn, rtx src, int *l);
+extern const char *out_tsthi (rtx insn, rtx src, int *l);
+extern const char *ret_cond_branch (rtx x, int len, int reverse);
+
+extern const char *ashlqi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashlhi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashlsi3_out (rtx insn, rtx operands[], int *len);
+
+extern const char *ashrqi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashrhi3_out (rtx insn, rtx operands[], int *len);
+extern const char *ashrsi3_out (rtx insn, rtx operands[], int *len);
+
+extern const char *lshrqi3_out (rtx insn, rtx operands[], int *len);
+extern const char *lshrhi3_out (rtx insn, rtx operands[], int *len);
+extern const char *lshrsi3_out (rtx insn, rtx operands[], int *len);
+extern bool avr_rotate_bytes (rtx operands[]);
+
+extern void expand_prologue (void);
+extern void expand_epilogue (void);
+extern int avr_epilogue_uses (int regno);
+
+extern void avr_output_bld (rtx operands[], int bit_nr);
+extern void avr_output_addr_vec_elt (FILE *stream, int value);
+extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]);
+
+extern int extra_constraint_Q (rtx x);
+extern int adjust_insn_length (rtx insn, int len);
+extern rtx avr_libcall_value (enum machine_mode mode);
+extern const char *output_reload_inhi (rtx insn, rtx *operands, int *len);
+extern const char *output_reload_insisf (rtx insn, rtx *operands, int *len);
+extern enum reg_class secondary_input_reload_class (enum reg_class,
+						    enum machine_mode,
+						    rtx);
+extern void notice_update_cc (rtx body, rtx insn);
+extern void print_operand (FILE *file, rtx x, int code);
+extern void print_operand_address (FILE *file, rtx addr);
+extern int reg_unused_after (rtx insn, rtx reg);
+extern int _reg_unused_after (rtx insn, rtx reg);
+extern int avr_jump_mode (rtx x, rtx insn);
+extern int byte_immediate_operand (rtx op, enum machine_mode mode);
+extern int test_hard_reg_class (enum reg_class rclass, rtx x);
+extern int jump_over_one_insn_p (rtx insn, rtx dest);
+
+extern int avr_hard_regno_mode_ok (int regno, enum machine_mode mode);
+extern void final_prescan_insn (rtx insn, rtx *operand, int num_operands);
+extern int avr_simplify_comparison_p (enum machine_mode mode,
+				      RTX_CODE op, rtx x);
+extern RTX_CODE avr_normalize_condition (RTX_CODE condition);
+extern int compare_eq_p (rtx insn);
+extern void out_shift_with_cnt (const char *templ, rtx insn,
+				rtx operands[], int *len, int t_len);
+extern rtx avr_incoming_return_addr_rtx (void);
+#endif /* RTX_CODE */
+
+#ifdef HAVE_MACHINE_MODES
+extern int class_max_nregs (enum reg_class rclass, enum machine_mode mode);
+#endif /* HAVE_MACHINE_MODES */
+
+#ifdef REAL_VALUE_TYPE
+extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n);
+#endif
diff --git a/gcc/config/avr/avr-stdint.h b/gcc/config/avr/avr-stdint.h
new file mode 100644
index 000000000..c3ec3ce9f
--- /dev/null
+++ b/gcc/config/avr/avr-stdint.h
@@ -0,0 +1,66 @@
+/* Definitions for <stdint.h> types on systems using newlib.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+   The intention of this file is to supply definitions that work with
+   avr-gcc's -mint8 that sets int to an 8-bit type.
+
+   This file is intended to yield the same results as newlib-stdint.h,
+   but there are some differences to newlib-stdint.h:
+
+   - AVR is an 8-bit architecture that cannot access 16-bit values
+     atomically, this SIG_ATOMIC_TYPE is "char".
+
+   - For the same reason, [u]int_fast8_t is defined as 8-bit type.
+
+*/
+   
+#define SIG_ATOMIC_TYPE "char"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "short int" : "long int")
+#define INT32_TYPE (INT_TYPE_SIZE == 16 ? "long int" : "long long int")
+#define INT64_TYPE (INT_TYPE_SIZE == 16 ? "long long int" : 0)
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "short unsigned int" : "long unsigned int")
+#define UINT32_TYPE (INT_TYPE_SIZE == 16 ? "long unsigned int" : "long long unsigned int")
+#define UINT64_TYPE (INT_TYPE_SIZE == 16 ? "long long unsigned int" : 0)
+
+#define INT_LEAST8_TYPE INT8_TYPE
+#define INT_LEAST16_TYPE INT16_TYPE
+#define INT_LEAST32_TYPE INT32_TYPE
+#define INT_LEAST64_TYPE INT64_TYPE
+#define UINT_LEAST8_TYPE UINT8_TYPE
+#define UINT_LEAST16_TYPE UINT16_TYPE
+#define UINT_LEAST32_TYPE UINT32_TYPE
+#define UINT_LEAST64_TYPE UINT64_TYPE
+
+#define INT_FAST8_TYPE INT8_TYPE
+#define INT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "int" : INT16_TYPE)
+#define INT_FAST32_TYPE INT32_TYPE
+#define INT_FAST64_TYPE INT64_TYPE
+#define UINT_FAST8_TYPE UINT8_TYPE
+#define UINT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : UINT16_TYPE)
+#define UINT_FAST32_TYPE UINT32_TYPE
+#define UINT_FAST64_TYPE UINT64_TYPE
+
+#define INTPTR_TYPE PTRDIFF_TYPE
+#ifndef UINTPTR_TYPE
+#define UINTPTR_TYPE SIZE_TYPE
+#endif
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
new file mode 100644
index 000000000..e60857980
--- /dev/null
+++ b/gcc/config/avr/avr.c
@@ -0,0 +1,6416 @@
+/* Subroutines for insn-output.c for ATMEL AVR micro controllers
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tree.h"
+#include "output.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "function.h"
+#include "recog.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "params.h"
+#include "df.h"
+
+/* Maximal allowed offset for an address in the LD command */
+#define MAX_LD_OFFSET(MODE) (64 - (signed)GET_MODE_SIZE (MODE))
+
+static void avr_option_override (void);
+static int avr_naked_function_p (tree);
+static int interrupt_function_p (tree);
+static int signal_function_p (tree);
+static int avr_OS_task_function_p (tree);
+static int avr_OS_main_function_p (tree);
+static int avr_regs_to_save (HARD_REG_SET *);
+static int get_sequence_length (rtx insns);
+static int sequent_regs_live (void);
+static const char *ptrreg_to_str (int);
+static const char *cond_string (enum rtx_code);
+static int avr_num_arg_regs (enum machine_mode, const_tree);
+
+static RTX_CODE compare_condition (rtx insn);
+static rtx avr_legitimize_address (rtx, rtx, enum machine_mode);
+static int compare_sign_p (rtx insn);
+static tree avr_handle_progmem_attribute (tree *, tree, tree, int, bool *);
+static tree avr_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree avr_handle_fntype_attribute (tree *, tree, tree, int, bool *);
+static bool avr_assemble_integer (rtx, unsigned int, int);
+static void avr_file_start (void);
+static void avr_file_end (void);
+static bool avr_legitimate_address_p (enum machine_mode, rtx, bool);
+static void avr_asm_function_end_prologue (FILE *);
+static void avr_asm_function_begin_epilogue (FILE *);
+static bool avr_cannot_modify_jumps_p (void);
+static rtx avr_function_value (const_tree, const_tree, bool);
+static void avr_insert_attributes (tree, tree *);
+static void avr_asm_init_sections (void);
+static unsigned int avr_section_type_flags (tree, const char *, int);
+static void avr_encode_section_info (tree, rtx, int);
+static void avr_reorg (void);
+static void avr_asm_out_ctor (rtx, int);
+static void avr_asm_out_dtor (rtx, int);
+static int avr_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int avr_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code, bool);
+static bool avr_rtx_costs (rtx, int, int, int *, bool);
+static int avr_address_cost (rtx, bool);
+static bool avr_return_in_memory (const_tree, const_tree);
+static struct machine_function * avr_init_machine_status (void);
+static rtx avr_builtin_setjmp_frame_value (void);
+static bool avr_hard_regno_scratch_ok (unsigned int);
+static unsigned int avr_case_values_threshold (void);
+static bool avr_frame_pointer_required_p (void);
+static bool avr_can_eliminate (const int, const int);
+static bool avr_allocate_stack_slots_for_args (void);
+static bool avr_class_likely_spilled_p (reg_class_t c);
+static rtx avr_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static void avr_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static void avr_help (void);
+
+/* Allocate registers from r25 to r8 for parameters for function calls.  */
+#define FIRST_CUM_REG 26
+
+/* Temporary register RTX (gen_rtx_REG (QImode, TMP_REGNO)) */
+static GTY(()) rtx tmp_reg_rtx;
+
+/* Zeroed register RTX (gen_rtx_REG (QImode, ZERO_REGNO)) */
+static GTY(()) rtx zero_reg_rtx;
+
+/* AVR register names {"r0", "r1", ..., "r31"} */
+static const char *const avr_regnames[] = REGISTER_NAMES;
+
+/* Preprocessor macros to define depending on MCU type.  */
+const char *avr_extra_arch_macro;
+
+/* Current architecture.  */
+const struct base_arch_s *avr_current_arch;
+
+/* Current device.  */
+const struct mcu_type_s *avr_current_device;
+
+section *progmem_section;
+
+/* AVR attributes.  */
+static const struct attribute_spec avr_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "progmem",   0, 0, false, false, false,  avr_handle_progmem_attribute },
+  { "signal",    0, 0, true,  false, false,  avr_handle_fndecl_attribute },
+  { "interrupt", 0, 0, true,  false, false,  avr_handle_fndecl_attribute },
+  { "naked",     0, 0, false, true,  true,   avr_handle_fntype_attribute },
+  { "OS_task",   0, 0, false, true,  true,   avr_handle_fntype_attribute },
+  { "OS_main",   0, 0, false, true,  true,   avr_handle_fntype_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options avr_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.long\t"
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER avr_assemble_integer
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START avr_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END avr_file_end
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE avr_asm_function_end_prologue
+#undef TARGET_ASM_FUNCTION_BEGIN_EPILOGUE
+#define TARGET_ASM_FUNCTION_BEGIN_EPILOGUE avr_asm_function_begin_epilogue
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE avr_function_value
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE avr_attribute_table
+#undef TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES avr_insert_attributes
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS avr_section_type_flags
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO avr_encode_section_info
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST avr_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST avr_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS avr_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST avr_address_cost
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG avr_reorg
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG avr_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE avr_function_arg_advance
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS avr_legitimize_address
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY avr_return_in_memory
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE avr_builtin_setjmp_frame_value
+
+#undef TARGET_HARD_REGNO_SCRATCH_OK
+#define TARGET_HARD_REGNO_SCRATCH_OK avr_hard_regno_scratch_ok
+#undef TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD avr_case_values_threshold
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P avr_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED avr_frame_pointer_required_p
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE avr_can_eliminate
+
+#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS avr_allocate_stack_slots_for_args
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P avr_class_likely_spilled_p
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE avr_option_override
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE avr_option_optimization_table
+
+#undef TARGET_CANNOT_MODIFY_JUMPS_P
+#define TARGET_CANNOT_MODIFY_JUMPS_P avr_cannot_modify_jumps_p
+
+#undef TARGET_HELP
+#define TARGET_HELP avr_help
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+static void
+avr_option_override (void)
+{
+  const struct mcu_type_s *t;
+
+  flag_delete_null_pointer_checks = 0;
+
+  for (t = avr_mcu_types; t->name; t++)
+    if (strcmp (t->name, avr_mcu_name) == 0)
+      break;
+
+  if (!t->name)
+    {
+      error ("unrecognized argument to -mmcu= option: %qs", avr_mcu_name);
+      inform (input_location,  "See --target-help for supported MCUs");
+    }
+
+  avr_current_device = t;
+  avr_current_arch = &avr_arch_types[avr_current_device->arch];
+  avr_extra_arch_macro = avr_current_device->macro;
+
+  tmp_reg_rtx  = gen_rtx_REG (QImode, TMP_REGNO);
+  zero_reg_rtx = gen_rtx_REG (QImode, ZERO_REGNO);
+
+  init_machine_status = avr_init_machine_status;
+}
+
+/* Implement TARGET_HELP */
+/* Report extra information for --target-help */
+
+static void
+avr_help (void)
+{
+  const struct mcu_type_s *t;
+  const char * const indent = "  ";
+  int len;
+
+  /* Give a list of MCUs that are accepted by -mmcu=* .
+     Note that MCUs supported by the compiler might differ from
+     MCUs supported by binutils. */
+
+  len = strlen (indent);
+  printf ("Known MCU names:\n%s", indent);
+
+  /* Print a blank-separated list of all supported MCUs */
+
+  for (t = avr_mcu_types; t->name; t++)
+    {
+      printf ("%s ", t->name);
+      len += 1 + strlen (t->name);
+
+      /* Break long lines */
+      
+      if (len > 66 && (t+1)->name)
+        {
+          printf ("\n%s", indent);
+          len = strlen (indent);
+        }
+    }
+
+  printf ("\n\n");
+}
+
+/*  return register class from register number.  */
+
+static const enum reg_class reg_class_tab[]={
+  GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,
+  GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,
+  GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,GENERAL_REGS,
+  GENERAL_REGS, /* r0 - r15 */
+  LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,LD_REGS,
+  LD_REGS,                      /* r16 - 23 */
+  ADDW_REGS,ADDW_REGS,          /* r24,r25 */
+  POINTER_X_REGS,POINTER_X_REGS, /* r26,27 */
+  POINTER_Y_REGS,POINTER_Y_REGS, /* r28,r29 */
+  POINTER_Z_REGS,POINTER_Z_REGS, /* r30,r31 */
+  STACK_REG,STACK_REG           /* SPL,SPH */
+};
+
+/* Function to set up the backend function structure.  */
+
+static struct machine_function *
+avr_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Return register class for register R.  */
+
+enum reg_class
+avr_regno_reg_class (int r)
+{
+  if (r <= 33)
+    return reg_class_tab[r];
+  return ALL_REGS;
+}
+
+/* Return nonzero if FUNC is a naked function.  */
+
+static int
+avr_naked_function_p (tree func)
+{
+  tree a;
+
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+  
+  a = lookup_attribute ("naked", TYPE_ATTRIBUTES (TREE_TYPE (func)));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is an interrupt function as specified
+   by the "interrupt" attribute.  */
+
+static int
+interrupt_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a signal function as specified
+   by the "signal" attribute.  */
+
+static int
+signal_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("signal", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a OS_task function.  */
+
+static int
+avr_OS_task_function_p (tree func)
+{
+  tree a;
+
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+  
+  a = lookup_attribute ("OS_task", TYPE_ATTRIBUTES (TREE_TYPE (func)));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a OS_main function.  */
+
+static int
+avr_OS_main_function_p (tree func)
+{
+  tree a;
+
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+  
+  a = lookup_attribute ("OS_main", TYPE_ATTRIBUTES (TREE_TYPE (func)));
+  return a != NULL_TREE;
+}
+
+/* Return the number of hard registers to push/pop in the prologue/epilogue
+   of the current function, and optionally store these registers in SET.  */
+
+static int
+avr_regs_to_save (HARD_REG_SET *set)
+{
+  int reg, count;
+  int int_or_sig_p = (interrupt_function_p (current_function_decl)
+		      || signal_function_p (current_function_decl));
+
+  if (set)
+    CLEAR_HARD_REG_SET (*set);
+  count = 0;
+
+  /* No need to save any registers if the function never returns or 
+     is have "OS_task" or "OS_main" attribute.  */
+  if (TREE_THIS_VOLATILE (current_function_decl)
+      || cfun->machine->is_OS_task
+      || cfun->machine->is_OS_main)
+    return 0;
+
+  for (reg = 0; reg < 32; reg++)
+    {
+      /* Do not push/pop __tmp_reg__, __zero_reg__, as well as
+	 any global register variables.  */
+      if (fixed_regs[reg])
+	continue;
+
+      if ((int_or_sig_p && !current_function_is_leaf && call_used_regs[reg])
+	  || (df_regs_ever_live_p (reg)
+	      && (int_or_sig_p || !call_used_regs[reg])
+	      && !(frame_pointer_needed
+		   && (reg == REG_Y || reg == (REG_Y+1)))))
+	{
+	  if (set)
+	    SET_HARD_REG_BIT (*set, reg);
+	  count++;
+	}
+    }
+  return count;
+}
+
+
+/* Implement `TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS' */
+
+static bool
+avr_allocate_stack_slots_for_args (void)
+{
+  return !cfun->machine->is_naked;
+}
+
+
+/* Return true if register FROM can be eliminated via register TO.  */
+
+bool
+avr_can_eliminate (const int from, const int to)
+{
+  return ((from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+	  || ((from == FRAME_POINTER_REGNUM 
+	       || from == FRAME_POINTER_REGNUM + 1)
+	      && !frame_pointer_needed));
+}
+
+/* Compute offset between arg_pointer and frame_pointer.  */
+
+int
+avr_initial_elimination_offset (int from, int to)
+{
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return 0;
+  else
+    {
+      int offset = frame_pointer_needed ? 2 : 0;
+      int avr_pc_size = AVR_HAVE_EIJMP_EICALL ? 3 : 2;
+
+      offset += avr_regs_to_save (NULL);
+      return get_frame_size () + (avr_pc_size) + 1 + offset;
+    }
+}
+
+/* Actual start of frame is virtual_stack_vars_rtx this is offset from 
+   frame pointer by +STARTING_FRAME_OFFSET.
+   Using saved frame = virtual_stack_vars_rtx - STARTING_FRAME_OFFSET
+   avoids creating add/sub of offset in nonlocal goto and setjmp.  */
+
+rtx avr_builtin_setjmp_frame_value (void)
+{
+  return gen_rtx_MINUS (Pmode, virtual_stack_vars_rtx, 
+			 gen_int_mode (STARTING_FRAME_OFFSET, Pmode));
+}
+
+/* Return contents of MEM at frame pointer + stack size + 1 (+2 if 3 byte PC).
+   This is return address of function.  */
+rtx 
+avr_return_addr_rtx (int count, rtx tem)
+{
+  rtx r;
+    
+  /* Can only return this functions return address. Others not supported.  */
+  if (count)
+     return NULL;
+
+  if (AVR_3_BYTE_PC)
+    {
+      r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+2");
+      warning (0, "'builtin_return_address' contains only 2 bytes of address");
+    }
+  else
+    r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+1");
+
+  r = gen_rtx_PLUS (Pmode, tem, r);
+  r = gen_frame_mem (Pmode, memory_address (Pmode, r));
+  r = gen_rtx_ROTATE (HImode, r, GEN_INT (8));
+  return  r;
+}
+
+/* Return 1 if the function epilogue is just a single "ret".  */
+
+int
+avr_simple_epilogue (void)
+{
+  return (! frame_pointer_needed
+	  && get_frame_size () == 0
+	  && avr_regs_to_save (NULL) == 0
+	  && ! interrupt_function_p (current_function_decl)
+	  && ! signal_function_p (current_function_decl)
+	  && ! avr_naked_function_p (current_function_decl)
+	  && ! TREE_THIS_VOLATILE (current_function_decl));
+}
+
+/* This function checks sequence of live registers.  */
+
+static int
+sequent_regs_live (void)
+{
+  int reg;
+  int live_seq=0;
+  int cur_seq=0;
+
+  for (reg = 0; reg < 18; ++reg)
+    {
+      if (fixed_regs[reg])
+        {
+          /* Don't recognize sequences that contain global register
+             variables.  */
+      
+          if (live_seq != 0)
+            return 0;
+          else
+            continue;
+        }
+      
+      if (!call_used_regs[reg])
+	{
+	  if (df_regs_ever_live_p (reg))
+	    {
+	      ++live_seq;
+	      ++cur_seq;
+	    }
+	  else
+	    cur_seq = 0;
+	}
+    }
+
+  if (!frame_pointer_needed)
+    {
+      if (df_regs_ever_live_p (REG_Y))
+	{
+	  ++live_seq;
+	  ++cur_seq;
+	}
+      else
+	cur_seq = 0;
+
+      if (df_regs_ever_live_p (REG_Y+1))
+	{
+	  ++live_seq;
+	  ++cur_seq;
+	}
+      else
+	cur_seq = 0;
+    }
+  else
+    {
+      cur_seq += 2;
+      live_seq += 2;
+    }
+  return (cur_seq == live_seq) ? live_seq : 0;
+}
+
+/* Obtain the length sequence of insns.  */
+
+int
+get_sequence_length (rtx insns)
+{
+  rtx insn;
+  int length;
+  
+  for (insn = insns, length = 0; insn; insn = NEXT_INSN (insn))
+    length += get_attr_length (insn);
+		
+  return length;
+}
+
+/*  Implement INCOMING_RETURN_ADDR_RTX.  */
+
+rtx
+avr_incoming_return_addr_rtx (void)
+{
+  /* The return address is at the top of the stack.  Note that the push
+     was via post-decrement, which means the actual address is off by one.  */
+  return gen_frame_mem (HImode, plus_constant (stack_pointer_rtx, 1));
+}
+
+/*  Helper for expand_prologue.  Emit a push of a byte register.  */
+
+static void
+emit_push_byte (unsigned regno, bool frame_related_p)
+{
+  rtx mem, reg, insn;
+
+  mem = gen_rtx_POST_DEC (HImode, stack_pointer_rtx);
+  mem = gen_frame_mem (QImode, mem);
+  reg = gen_rtx_REG (QImode, regno);
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
+  if (frame_related_p)
+    RTX_FRAME_RELATED_P (insn) = 1;
+
+  cfun->machine->stack_usage++;
+}
+
+
+/*  Output function prologue.  */
+
+void
+expand_prologue (void)
+{
+  int live_seq;
+  HARD_REG_SET set;
+  int minimize;
+  HOST_WIDE_INT size = get_frame_size();
+  rtx insn;
+  
+  /* Init cfun->machine.  */
+  cfun->machine->is_naked = avr_naked_function_p (current_function_decl);
+  cfun->machine->is_interrupt = interrupt_function_p (current_function_decl);
+  cfun->machine->is_signal = signal_function_p (current_function_decl);
+  cfun->machine->is_OS_task = avr_OS_task_function_p (current_function_decl);
+  cfun->machine->is_OS_main = avr_OS_main_function_p (current_function_decl);
+  cfun->machine->stack_usage = 0;
+  
+  /* Prologue: naked.  */
+  if (cfun->machine->is_naked)
+    {
+      return;
+    }
+
+  avr_regs_to_save (&set);
+  live_seq = sequent_regs_live ();
+  minimize = (TARGET_CALL_PROLOGUES
+	      && !cfun->machine->is_interrupt
+	      && !cfun->machine->is_signal
+	      && !cfun->machine->is_OS_task
+	      && !cfun->machine->is_OS_main
+	      && live_seq);
+
+  if (cfun->machine->is_interrupt || cfun->machine->is_signal)
+    {
+      /* Enable interrupts.  */
+      if (cfun->machine->is_interrupt)
+	emit_insn (gen_enable_interrupt ());
+	
+      /* Push zero reg.  */
+      emit_push_byte (ZERO_REGNO, true);
+
+      /* Push tmp reg.  */
+      emit_push_byte (TMP_REGNO, true);
+
+      /* Push SREG.  */
+      /* ??? There's no dwarf2 column reserved for SREG.  */
+      emit_move_insn (tmp_reg_rtx, gen_rtx_MEM (QImode, GEN_INT (SREG_ADDR)));
+      emit_push_byte (TMP_REGNO, false);
+
+      /* Push RAMPZ.  */
+      /* ??? There's no dwarf2 column reserved for RAMPZ.  */
+      if (AVR_HAVE_RAMPZ 
+          && TEST_HARD_REG_BIT (set, REG_Z)
+          && TEST_HARD_REG_BIT (set, REG_Z + 1))
+        {
+          emit_move_insn (tmp_reg_rtx,
+			  gen_rtx_MEM (QImode, GEN_INT (RAMPZ_ADDR)));
+	  emit_push_byte (TMP_REGNO, false);
+        }
+	
+      /* Clear zero reg.  */
+      emit_move_insn (zero_reg_rtx, const0_rtx);
+
+      /* Prevent any attempt to delete the setting of ZERO_REG!  */
+      emit_use (zero_reg_rtx);
+    }
+  if (minimize && (frame_pointer_needed 
+		   || (AVR_2_BYTE_PC && live_seq > 6)
+		   || live_seq > 7)) 
+    {
+      int first_reg, reg, offset;
+
+      emit_move_insn (gen_rtx_REG (HImode, REG_X), 
+                      gen_int_mode (size, HImode));
+
+      insn = emit_insn (gen_call_prologue_saves
+			(gen_int_mode (live_seq, HImode),
+		         gen_int_mode (size + live_seq, HImode)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Describe the effect of the unspec_volatile call to prologue_saves.
+	 Note that this formulation assumes that add_reg_note pushes the
+	 notes to the front.  Thus we build them in the reverse order of
+	 how we want dwarf2out to process them.  */
+
+      /* The function does always set frame_pointer_rtx, but whether that
+	 is going to be permanent in the function is frame_pointer_needed.  */
+      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		    gen_rtx_SET (VOIDmode,
+				 (frame_pointer_needed
+				  ? frame_pointer_rtx : stack_pointer_rtx),
+				 plus_constant (stack_pointer_rtx,
+						-(size + live_seq))));
+
+      /* Note that live_seq always contains r28+r29, but the other
+	 registers to be saved are all below 18.  */
+      first_reg = 18 - (live_seq - 2);
+
+      for (reg = 29, offset = -live_seq + 1;
+	   reg >= first_reg;
+	   reg = (reg == 28 ? 17 : reg - 1), ++offset)
+	{
+	  rtx m, r;
+
+	  m = gen_rtx_MEM (QImode, plus_constant (stack_pointer_rtx, offset));
+	  r = gen_rtx_REG (QImode, reg);
+	  add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, m, r));
+	}
+
+      cfun->machine->stack_usage += size + live_seq;
+    }
+  else
+    {
+      int reg;
+      for (reg = 0; reg < 32; ++reg)
+        if (TEST_HARD_REG_BIT (set, reg))
+	  emit_push_byte (reg, true);
+
+      if (frame_pointer_needed)
+        {
+	  if (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main))
+	    {
+              /* Push frame pointer.  Always be consistent about the
+		 ordering of pushes -- epilogue_restores expects the
+		 register pair to be pushed low byte first.  */
+	      emit_push_byte (REG_Y, true);
+	      emit_push_byte (REG_Y + 1, true);
+	    }
+
+          if (!size)
+            {
+              insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+              RTX_FRAME_RELATED_P (insn) = 1;
+            }
+          else
+            {
+              /*  Creating a frame can be done by direct manipulation of the
+                  stack or via the frame pointer. These two methods are:
+                    fp=sp
+                    fp-=size
+                    sp=fp
+                OR
+                    sp-=size
+                    fp=sp
+              the optimum method depends on function type, stack and frame size.
+              To avoid a complex logic, both methods are tested and shortest
+              is selected.  */
+              rtx myfp;
+	      rtx fp_plus_insns; 
+
+              if (AVR_HAVE_8BIT_SP)
+                {
+                  /* The high byte (r29) doesn't change.  Prefer 'subi'
+		     (1 cycle) over 'sbiw' (2 cycles, same size).  */
+                  myfp = gen_rtx_REG (QImode, FRAME_POINTER_REGNUM);
+                }
+              else 
+                {
+                  /*  Normal sized addition.  */
+                  myfp = frame_pointer_rtx;
+                }
+
+	      /* Method 1-Adjust frame pointer.  */
+	      start_sequence ();
+
+	      /* Normally the dwarf2out frame-related-expr interpreter does
+		 not expect to have the CFA change once the frame pointer is
+		 set up.  Thus we avoid marking the move insn below and
+		 instead indicate that the entire operation is complete after
+		 the frame pointer subtraction is done.  */
+
+              emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+
+              insn = emit_move_insn (myfp, plus_constant (myfp, -size));
+              RTX_FRAME_RELATED_P (insn) = 1;
+	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			    gen_rtx_SET (VOIDmode, frame_pointer_rtx,
+					 plus_constant (stack_pointer_rtx,
+							-size)));
+
+	      /* Copy to stack pointer.  Note that since we've already
+		 changed the CFA to the frame pointer this operation
+		 need not be annotated at all.  */
+	      if (AVR_HAVE_8BIT_SP)
+		{
+		  emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+		}
+	      else if (TARGET_NO_INTERRUPTS 
+		       || cfun->machine->is_signal
+		       || cfun->machine->is_OS_main)
+		{
+		  emit_insn (gen_movhi_sp_r_irq_off (stack_pointer_rtx, 
+						     frame_pointer_rtx));
+		}
+	      else if (cfun->machine->is_interrupt)
+		{
+		  emit_insn (gen_movhi_sp_r_irq_on (stack_pointer_rtx, 
+						    frame_pointer_rtx));
+		}
+	      else
+		{
+		  emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+		}
+
+	      fp_plus_insns = get_insns ();
+	      end_sequence ();
+
+	      /* Method 2-Adjust Stack pointer.  */
+              if (size <= 6)
+                {
+		  rtx sp_plus_insns;
+
+		  start_sequence ();
+
+	          insn = plus_constant (stack_pointer_rtx, -size);
+		  insn = emit_move_insn (stack_pointer_rtx, insn);
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  
+		  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+		  RTX_FRAME_RELATED_P (insn) = 1;
+
+		  sp_plus_insns = get_insns ();
+		  end_sequence ();
+
+		  /* Use shortest method.  */
+		  if (get_sequence_length (sp_plus_insns) 
+		      < get_sequence_length (fp_plus_insns))
+		    emit_insn (sp_plus_insns);
+		  else
+		    emit_insn (fp_plus_insns);
+                }
+	      else
+		emit_insn (fp_plus_insns);
+
+	      cfun->machine->stack_usage += size;
+            }
+        }
+    }
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = cfun->machine->stack_usage;
+}
+
+/* Output summary at end of function prologue.  */
+
+static void
+avr_asm_function_end_prologue (FILE *file)
+{
+  if (cfun->machine->is_naked)
+    {
+      fputs ("/* prologue: naked */\n", file);
+    }
+  else
+    {
+      if (cfun->machine->is_interrupt)
+        {
+          fputs ("/* prologue: Interrupt */\n", file);
+        }
+      else if (cfun->machine->is_signal)
+        {
+          fputs ("/* prologue: Signal */\n", file);
+        }
+      else
+        fputs ("/* prologue: function */\n", file);
+    }
+  fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n",
+                 get_frame_size());
+  fprintf (file, "/* stack size = %d */\n",
+                 cfun->machine->stack_usage);
+  /* Create symbol stack offset here so all functions have it. Add 1 to stack
+     usage for offset so that SP + .L__stack_offset = return address.  */
+  fprintf (file, ".L__stack_usage = %d\n", cfun->machine->stack_usage);
+}
+
+
+/* Implement EPILOGUE_USES.  */
+
+int
+avr_epilogue_uses (int regno ATTRIBUTE_UNUSED)
+{
+  if (reload_completed 
+      && cfun->machine
+      && (cfun->machine->is_interrupt || cfun->machine->is_signal))
+    return 1;
+  return 0;
+}
+
+/*  Helper for expand_epilogue.  Emit a pop of a byte register.  */
+
+static void
+emit_pop_byte (unsigned regno)
+{
+  rtx mem, reg;
+
+  mem = gen_rtx_PRE_INC (HImode, stack_pointer_rtx);
+  mem = gen_frame_mem (QImode, mem);
+  reg = gen_rtx_REG (QImode, regno);
+
+  emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+}
+
+/*  Output RTL epilogue.  */
+
+void
+expand_epilogue (void)
+{
+  int reg;
+  int live_seq;
+  HARD_REG_SET set;      
+  int minimize;
+  HOST_WIDE_INT size = get_frame_size();
+  
+  /* epilogue: naked  */
+  if (cfun->machine->is_naked)
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  avr_regs_to_save (&set);
+  live_seq = sequent_regs_live ();
+  minimize = (TARGET_CALL_PROLOGUES
+	      && !cfun->machine->is_interrupt
+	      && !cfun->machine->is_signal
+	      && !cfun->machine->is_OS_task
+	      && !cfun->machine->is_OS_main
+	      && live_seq);
+  
+  if (minimize && (frame_pointer_needed || live_seq > 4))
+    {
+      if (frame_pointer_needed)
+	{
+          /*  Get rid of frame.  */
+	  emit_move_insn(frame_pointer_rtx,
+                         gen_rtx_PLUS (HImode, frame_pointer_rtx,
+                                       gen_int_mode (size, HImode)));
+	}
+      else
+	{
+          emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+	}
+	
+      emit_insn (gen_epilogue_restores (gen_int_mode (live_seq, HImode)));
+    }
+  else
+    {
+      if (frame_pointer_needed)
+	{
+	  if (size)
+	    {
+              /* Try two methods to adjust stack and select shortest.  */
+	      rtx myfp;
+	      rtx fp_plus_insns;
+
+	      if (AVR_HAVE_8BIT_SP)
+                {
+                  /* The high byte (r29) doesn't change - prefer 'subi' 
+                     (1 cycle) over 'sbiw' (2 cycles, same size).  */
+                  myfp = gen_rtx_REG (QImode, FRAME_POINTER_REGNUM);
+                }
+              else 
+                {
+                  /* Normal sized addition.  */
+                  myfp = frame_pointer_rtx;
+                }
+	      
+              /* Method 1-Adjust frame pointer.  */
+	      start_sequence ();
+
+	      emit_move_insn (myfp, plus_constant (myfp, size));
+
+	      /* Copy to stack pointer.  */
+	      if (AVR_HAVE_8BIT_SP)
+		{
+		  emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+		}
+	      else if (TARGET_NO_INTERRUPTS 
+		       || cfun->machine->is_signal)
+		{
+		  emit_insn (gen_movhi_sp_r_irq_off (stack_pointer_rtx, 
+						     frame_pointer_rtx));
+		}
+	      else if (cfun->machine->is_interrupt)
+		{
+		  emit_insn (gen_movhi_sp_r_irq_on (stack_pointer_rtx, 
+						    frame_pointer_rtx));
+		}
+	      else
+		{
+		  emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+		}
+
+	      fp_plus_insns = get_insns ();
+	      end_sequence ();	      
+
+              /* Method 2-Adjust Stack pointer.  */
+              if (size <= 5)
+                {
+		  rtx sp_plus_insns;
+
+		  start_sequence ();
+
+		  emit_move_insn (stack_pointer_rtx,
+				  plus_constant (stack_pointer_rtx, size));
+
+		  sp_plus_insns = get_insns ();
+		  end_sequence ();
+
+		  /* Use shortest method.  */
+		  if (get_sequence_length (sp_plus_insns) 
+		      < get_sequence_length (fp_plus_insns))
+		    emit_insn (sp_plus_insns);
+		  else
+		    emit_insn (fp_plus_insns);
+                }
+	      else
+		emit_insn (fp_plus_insns);
+            }
+	  if (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main))
+	    {
+              /* Restore previous frame_pointer.  See expand_prologue for
+		 rationale for not using pophi.  */
+	      emit_pop_byte (REG_Y + 1);
+	      emit_pop_byte (REG_Y);
+	    }
+	}
+
+      /* Restore used registers.  */
+      for (reg = 31; reg >= 0; --reg)
+        if (TEST_HARD_REG_BIT (set, reg))
+          emit_pop_byte (reg);
+
+      if (cfun->machine->is_interrupt || cfun->machine->is_signal)
+        {
+          /* Restore RAMPZ using tmp reg as scratch.  */
+	  if (AVR_HAVE_RAMPZ 
+              && TEST_HARD_REG_BIT (set, REG_Z)
+	      && TEST_HARD_REG_BIT (set, REG_Z + 1))
+            {
+	      emit_pop_byte (TMP_REGNO);
+	      emit_move_insn (gen_rtx_MEM (QImode, GEN_INT (RAMPZ_ADDR)), 
+			      tmp_reg_rtx);
+	    }
+
+          /* Restore SREG using tmp reg as scratch.  */
+          emit_pop_byte (TMP_REGNO);
+      
+          emit_move_insn (gen_rtx_MEM (QImode, GEN_INT (SREG_ADDR)), 
+			  tmp_reg_rtx);
+
+          /* Restore tmp REG.  */
+          emit_pop_byte (TMP_REGNO);
+
+          /* Restore zero REG.  */
+          emit_pop_byte (ZERO_REGNO);
+        }
+
+      emit_jump_insn (gen_return ());
+    }
+}
+
+/* Output summary messages at beginning of function epilogue.  */
+
+static void
+avr_asm_function_begin_epilogue (FILE *file)
+{
+  fprintf (file, "/* epilogue start */\n");
+}
+
+
+/* Implement TARGET_CANNOT_MODITY_JUMPS_P */
+
+static bool
+avr_cannot_modify_jumps_p (void)
+{
+
+  /* Naked Functions must not have any instructions after
+     their epilogue, see PR42240 */
+     
+  if (reload_completed
+      && cfun->machine
+      && cfun->machine->is_naked)
+    {
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Return nonzero if X (an RTX) is a legitimate memory address on the target
+   machine for a memory operand of mode MODE.  */
+
+bool
+avr_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  enum reg_class r = NO_REGS;
+  
+  if (TARGET_ALL_DEBUG)
+    {
+      fprintf (stderr, "mode: (%s) %s %s %s %s:",
+	       GET_MODE_NAME(mode),
+	       strict ? "(strict)": "",
+	       reload_completed ? "(reload_completed)": "",
+	       reload_in_progress ? "(reload_in_progress)": "",
+	       reg_renumber ? "(reg_renumber)" : "");
+      if (GET_CODE (x) == PLUS
+	  && REG_P (XEXP (x, 0))
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) >= 0
+	  && INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode)
+	  && reg_renumber
+	  )
+	fprintf (stderr, "(r%d ---> r%d)", REGNO (XEXP (x, 0)),
+		 true_regnum (XEXP (x, 0)));
+      debug_rtx (x);
+    }
+  
+  if (REG_P (x) && (strict ? REG_OK_FOR_BASE_STRICT_P (x)
+                    : REG_OK_FOR_BASE_NOSTRICT_P (x)))
+    r = POINTER_REGS;
+  else if (CONSTANT_ADDRESS_P (x))
+    r = ALL_REGS;
+  else if (GET_CODE (x) == PLUS
+           && REG_P (XEXP (x, 0))
+	   && GET_CODE (XEXP (x, 1)) == CONST_INT
+	   && INTVAL (XEXP (x, 1)) >= 0)
+    {
+      int fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode);
+      if (fit)
+	{
+	  if (! strict
+	      || REGNO (XEXP (x,0)) == REG_X
+	      || REGNO (XEXP (x,0)) == REG_Y
+	      || REGNO (XEXP (x,0)) == REG_Z)
+	    r = BASE_POINTER_REGS;
+	  if (XEXP (x,0) == frame_pointer_rtx
+	      || XEXP (x,0) == arg_pointer_rtx)
+	    r = BASE_POINTER_REGS;
+	}
+      else if (frame_pointer_needed && XEXP (x,0) == frame_pointer_rtx)
+	r = POINTER_Y_REGS;
+    }
+  else if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)
+           && REG_P (XEXP (x, 0))
+           && (strict ? REG_OK_FOR_BASE_STRICT_P (XEXP (x, 0))
+               : REG_OK_FOR_BASE_NOSTRICT_P (XEXP (x, 0))))
+    {
+      r = POINTER_REGS;
+    }
+  if (TARGET_ALL_DEBUG)
+    {
+      fprintf (stderr, "   ret = %c\n", r + '0');
+    }
+  return r == NO_REGS ? 0 : (int)r;
+}
+
+/* Attempts to replace X with a valid
+   memory address for an operand of mode MODE  */
+
+rtx
+avr_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  x = oldx;
+  if (TARGET_ALL_DEBUG)
+    {
+      fprintf (stderr, "legitimize_address mode: %s", GET_MODE_NAME(mode));
+      debug_rtx (oldx);
+    }
+  
+  if (GET_CODE (oldx) == PLUS
+      && REG_P (XEXP (oldx,0)))
+    {
+      if (REG_P (XEXP (oldx,1)))
+	x = force_reg (GET_MODE (oldx), oldx);
+      else if (GET_CODE (XEXP (oldx, 1)) == CONST_INT)
+	{
+	  int offs = INTVAL (XEXP (oldx,1));
+	  if (frame_pointer_rtx != XEXP (oldx,0))
+	    if (offs > MAX_LD_OFFSET (mode))
+	      {
+		if (TARGET_ALL_DEBUG)
+		  fprintf (stderr, "force_reg (big offset)\n");
+		x = force_reg (GET_MODE (oldx), oldx);
+	      }
+	}
+    }
+  return x;
+}
+
+
+/* Return a pointer register name as a string.  */
+
+static const char *
+ptrreg_to_str (int regno)
+{
+  switch (regno)
+    {
+    case REG_X: return "X";
+    case REG_Y: return "Y";
+    case REG_Z: return "Z";
+    default:
+      output_operand_lossage ("address operand requires constraint for X, Y, or Z register");
+    }
+  return NULL;
+}
+
+/* Return the condition name as a string.
+   Used in conditional jump constructing  */
+
+static const char *
+cond_string (enum rtx_code code)
+{
+  switch (code)
+    {
+    case NE:
+      return "ne";
+    case EQ:
+      return "eq";
+    case GE:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+	return "pl";
+      else
+	return "ge";
+    case LT:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+	return "mi";
+      else
+	return "lt";
+    case GEU:
+      return "sh";
+    case LTU:
+      return "lo";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output ADDR to FILE as address.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, ptrreg_to_str (REGNO (addr)));
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "-%s", ptrreg_to_str (REGNO (XEXP (addr, 0))));
+      break;
+
+    case POST_INC:
+      fprintf (file, "%s+", ptrreg_to_str (REGNO (XEXP (addr, 0))));
+      break;
+
+    default:
+      if (CONSTANT_ADDRESS_P (addr)
+	  && text_segment_operand (addr, VOIDmode))
+	{
+	  rtx x = XEXP (addr,0);
+	  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x,1)) == CONST_INT)
+	    {
+	      /* Assembler gs() will implant word address. Make offset 
+		 a byte offset inside gs() for assembler. This is 
+		 needed because the more logical (constant+gs(sym)) is not 
+		 accepted by gas. For 128K and lower devices this is ok. For
+		 large devices it will create a Trampoline to offset from symbol 
+		 which may not be what the user really wanted.  */
+	      fprintf (file, "gs(");
+	      output_addr_const (file, XEXP (x,0));
+	      fprintf (file,"+" HOST_WIDE_INT_PRINT_DEC ")", 2 * INTVAL (XEXP (x,1)));
+	      if (AVR_3_BYTE_PC)
+	        if (warning (0, "pointer offset from symbol maybe incorrect"))
+		  {
+		    output_addr_const (stderr, addr);
+		    fprintf(stderr,"\n");
+		  }
+	    }
+	  else
+	    {
+	      fprintf (file, "gs(");
+	      output_addr_const (file, addr);
+	      fprintf (file, ")");
+	    }
+	}
+      else
+	output_addr_const (file, addr);
+    }
+}
+
+
+/* Output X as assembler operand to file FILE.  */
+     
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  int abcd = 0;
+
+  if (code >= 'A' && code <= 'D')
+    abcd = code - 'A';
+
+  if (code == '~')
+    {
+      if (!AVR_HAVE_JMP_CALL)
+	fputc ('r', file);
+    }
+  else if (code == '!')
+    {
+      if (AVR_HAVE_EIJMP_EICALL)
+	fputc ('e', file);
+    }
+  else if (REG_P (x))
+    {
+      if (x == zero_reg_rtx)
+	fprintf (file, "__zero_reg__");
+      else
+	fprintf (file, reg_names[true_regnum (x) + abcd]);
+    }
+  else if (GET_CODE (x) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + abcd);
+  else if (GET_CODE (x) == MEM)
+    {
+      rtx addr = XEXP (x,0);
+      if (code == 'm')
+	{
+	   if (!CONSTANT_P (addr))
+	    fatal_insn ("bad address, not a constant):", addr);
+	  /* Assembler template with m-code is data - not progmem section */
+	  if (text_segment_operand (addr, VOIDmode))
+	    if (warning ( 0, "accessing data memory with program memory address"))
+	      {
+		output_addr_const (stderr, addr);
+		fprintf(stderr,"\n");
+	      }
+	  output_addr_const (file, addr);
+	}
+      else if (code == 'o')
+	{
+	  if (GET_CODE (addr) != PLUS)
+	    fatal_insn ("bad address, not (reg+disp):", addr);
+
+	  print_operand (file, XEXP (addr, 1), 0);
+	}
+      else if (code == 'p' || code == 'r')
+        {
+          if (GET_CODE (addr) != POST_INC && GET_CODE (addr) != PRE_DEC)
+            fatal_insn ("bad address, not post_inc or pre_dec:", addr);
+          
+          if (code == 'p')
+            print_operand_address (file, XEXP (addr, 0));  /* X, Y, Z */
+          else
+            print_operand (file, XEXP (addr, 0), 0);  /* r26, r28, r30 */
+        }
+      else if (GET_CODE (addr) == PLUS)
+	{
+	  print_operand_address (file, XEXP (addr,0));
+	  if (REGNO (XEXP (addr, 0)) == REG_X)
+	    fatal_insn ("internal compiler error.  Bad address:"
+			,addr);
+	  fputc ('+', file);
+	  print_operand (file, XEXP (addr,1), code);
+	}
+      else
+	print_operand_address (file, addr);
+    }
+  else if (code == 'x')
+    {
+      /* Constant progmem address - like used in jmp or call */
+      if (0 == text_segment_operand (x, VOIDmode))
+	    if (warning ( 0, "accessing program  memory with data memory address"))
+	  {
+	    output_addr_const (stderr, x);
+	    fprintf(stderr,"\n");
+	  }
+      /* Use normal symbol for direct address no linker trampoline needed */
+      output_addr_const (file, x);
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      long val;
+      REAL_VALUE_TYPE rv;
+      if (GET_MODE (x) != SFmode)
+	fatal_insn ("internal compiler error.  Unknown mode:", x);
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+      fprintf (file, "0x%lx", val);
+    }
+  else if (code == 'j')
+    fputs (cond_string (GET_CODE (x)), file);
+  else if (code == 'k')
+    fputs (cond_string (reverse_condition (GET_CODE (x))), file);
+  else
+    print_operand_address (file, x);
+}
+
+/* Update the condition code in the INSN.  */
+
+void
+notice_update_cc (rtx body ATTRIBUTE_UNUSED, rtx insn)
+{
+  rtx set;
+  
+  switch (get_attr_cc (insn))
+    {
+    case CC_NONE:
+      /* Insn does not affect CC at all.  */
+      break;
+
+    case CC_SET_N:
+      CC_STATUS_INIT;
+      break;
+
+    case CC_SET_ZN:
+      set = single_set (insn);
+      CC_STATUS_INIT;
+      if (set)
+	{
+	  cc_status.flags |= CC_NO_OVERFLOW;
+	  cc_status.value1 = SET_DEST (set);
+	}
+      break;
+
+    case CC_SET_CZN:
+      /* Insn sets the Z,N,C flags of CC to recog_operand[0].
+         The V flag may or may not be known but that's ok because
+         alter_cond will change tests to use EQ/NE.  */
+      set = single_set (insn);
+      CC_STATUS_INIT;
+      if (set)
+	{
+	  cc_status.value1 = SET_DEST (set);
+	  cc_status.flags |= CC_OVERFLOW_UNUSABLE;
+	}
+      break;
+
+    case CC_COMPARE:
+      set = single_set (insn);
+      CC_STATUS_INIT;
+      if (set)
+	cc_status.value1 = SET_SRC (set);
+      break;
+      
+    case CC_CLOBBER:
+      /* Insn doesn't leave CC in a usable state.  */
+      CC_STATUS_INIT;
+
+      /* Correct CC for the ashrqi3 with the shift count as CONST_INT != 6 */
+      set = single_set (insn);
+      if (set)
+	{
+	  rtx src = SET_SRC (set);
+	  
+	  if (GET_CODE (src) == ASHIFTRT
+	      && GET_MODE (src) == QImode)
+	    {
+	      rtx x = XEXP (src, 1);
+
+	      if (CONST_INT_P (x)
+		  && IN_RANGE (INTVAL (x), 1, 5))
+		{
+		  cc_status.value1 = SET_DEST (set);
+		  cc_status.flags |= CC_OVERFLOW_UNUSABLE;
+		}
+	    }
+	}
+      break;
+    }
+}
+
+/* Return maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.  */
+
+int
+class_max_nregs (enum reg_class rclass ATTRIBUTE_UNUSED,enum machine_mode mode)
+{
+  return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+}
+
+/* Choose mode for jump insn:
+   1 - relative jump in range -63 <= x <= 62 ;
+   2 - relative jump in range -2046 <= x <= 2045 ;
+   3 - absolute jump (only for ATmega[16]03).  */
+
+int
+avr_jump_mode (rtx x, rtx insn)
+{
+  int dest_addr = INSN_ADDRESSES (INSN_UID (GET_CODE (x) == LABEL_REF
+					    ? XEXP (x, 0) : x));
+  int cur_addr = INSN_ADDRESSES (INSN_UID (insn));
+  int jump_distance = cur_addr - dest_addr;
+  
+  if (-63 <= jump_distance && jump_distance <= 62)
+    return 1;
+  else if (-2046 <= jump_distance && jump_distance <= 2045)
+    return 2;
+  else if (AVR_HAVE_JMP_CALL)
+    return 3;
+  
+  return 2;
+}
+
+/* return an AVR condition jump commands.
+   X is a comparison RTX.
+   LEN is a number returned by avr_jump_mode function.
+   if REVERSE nonzero then condition code in X must be reversed.  */
+
+const char *
+ret_cond_branch (rtx x, int len, int reverse)
+{
+  RTX_CODE cond = reverse ? reverse_condition (GET_CODE (x)) : GET_CODE (x);
+  
+  switch (cond)
+    {
+    case GT:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+	return (len == 1 ? (AS1 (breq,.+2) CR_TAB
+			    AS1 (brpl,%0)) :
+		len == 2 ? (AS1 (breq,.+4) CR_TAB
+			    AS1 (brmi,.+2) CR_TAB
+			    AS1 (rjmp,%0)) :
+		(AS1 (breq,.+6) CR_TAB
+		 AS1 (brmi,.+4) CR_TAB
+		 AS1 (jmp,%0)));
+	  
+      else
+	return (len == 1 ? (AS1 (breq,.+2) CR_TAB
+			    AS1 (brge,%0)) :
+		len == 2 ? (AS1 (breq,.+4) CR_TAB
+			    AS1 (brlt,.+2) CR_TAB
+			    AS1 (rjmp,%0)) :
+		(AS1 (breq,.+6) CR_TAB
+		 AS1 (brlt,.+4) CR_TAB
+		 AS1 (jmp,%0)));
+    case GTU:
+      return (len == 1 ? (AS1 (breq,.+2) CR_TAB
+                          AS1 (brsh,%0)) :
+              len == 2 ? (AS1 (breq,.+4) CR_TAB
+                          AS1 (brlo,.+2) CR_TAB
+                          AS1 (rjmp,%0)) :
+              (AS1 (breq,.+6) CR_TAB
+               AS1 (brlo,.+4) CR_TAB
+               AS1 (jmp,%0)));
+    case LE:
+      if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE)
+	return (len == 1 ? (AS1 (breq,%0) CR_TAB
+			    AS1 (brmi,%0)) :
+		len == 2 ? (AS1 (breq,.+2) CR_TAB
+			    AS1 (brpl,.+2) CR_TAB
+			    AS1 (rjmp,%0)) :
+		(AS1 (breq,.+2) CR_TAB
+		 AS1 (brpl,.+4) CR_TAB
+		 AS1 (jmp,%0)));
+      else
+	return (len == 1 ? (AS1 (breq,%0) CR_TAB
+			    AS1 (brlt,%0)) :
+		len == 2 ? (AS1 (breq,.+2) CR_TAB
+			    AS1 (brge,.+2) CR_TAB
+			    AS1 (rjmp,%0)) :
+		(AS1 (breq,.+2) CR_TAB
+		 AS1 (brge,.+4) CR_TAB
+		 AS1 (jmp,%0)));
+    case LEU:
+      return (len == 1 ? (AS1 (breq,%0) CR_TAB
+                          AS1 (brlo,%0)) :
+              len == 2 ? (AS1 (breq,.+2) CR_TAB
+                          AS1 (brsh,.+2) CR_TAB
+			  AS1 (rjmp,%0)) :
+              (AS1 (breq,.+2) CR_TAB
+               AS1 (brsh,.+4) CR_TAB
+	       AS1 (jmp,%0)));
+    default:
+      if (reverse)
+	{
+	  switch (len)
+	    {
+	    case 1:
+	      return AS1 (br%k1,%0);
+	    case 2:
+	      return (AS1 (br%j1,.+2) CR_TAB
+		      AS1 (rjmp,%0));
+	    default:
+	      return (AS1 (br%j1,.+4) CR_TAB
+		      AS1 (jmp,%0));
+	    }
+	}
+	else
+	  {
+	    switch (len)
+	      {
+	      case 1:
+		return AS1 (br%j1,%0);
+	      case 2:
+		return (AS1 (br%k1,.+2) CR_TAB
+			AS1 (rjmp,%0));
+	      default:
+		return (AS1 (br%k1,.+4) CR_TAB
+			AS1 (jmp,%0));
+	      }
+	  }
+    }
+  return "";
+}
+
+/* Predicate function for immediate operand which fits to byte (8bit) */
+
+int
+byte_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return (GET_CODE (op) == CONST_INT
+          && INTVAL (op) <= 0xff && INTVAL (op) >= 0);
+}
+
+/* Output insn cost for next insn.  */
+
+void
+final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+		    int num_operands ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ALL_DEBUG)
+    {
+      fprintf (asm_out_file, "/* DEBUG: cost = %d.  */\n",
+	       rtx_cost (PATTERN (insn), INSN, !optimize_size));
+    }
+}
+
+/* Return 0 if undefined, 1 if always true or always false.  */
+
+int
+avr_simplify_comparison_p (enum machine_mode mode, RTX_CODE op, rtx x)
+{
+  unsigned int max = (mode == QImode ? 0xff :
+                      mode == HImode ? 0xffff :
+                      mode == SImode ? 0xffffffff : 0);
+  if (max && op && GET_CODE (x) == CONST_INT)
+    {
+      if (unsigned_condition (op) != op)
+	max >>= 1;
+
+      if (max != (INTVAL (x) & max)
+	  && INTVAL (x) != 0xff)
+	return 1;
+    }
+  return 0;
+}
+
+
+/* Returns nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  */
+
+int
+function_arg_regno_p(int r)
+{
+  return (r >= 8 && r <= 25);
+}
+
+/* Initializing the variable cum for the state at the beginning
+   of the argument list.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname,
+		      tree fndecl ATTRIBUTE_UNUSED)
+{
+  cum->nregs = 18;
+  cum->regno = FIRST_CUM_REG;
+  if (!libname && stdarg_p (fntype))
+    cum->nregs = 0;
+}
+
+/* Returns the number of registers to allocate for a function argument.  */
+
+static int
+avr_num_arg_regs (enum machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  /* Align all function arguments to start in even-numbered registers.
+     Odd-sized arguments leave holes above them.  */
+
+  return (size + 1) & ~1;
+}
+
+/* Controls whether a function argument is passed
+   in a register, and which register.  */
+
+static rtx
+avr_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int bytes = avr_num_arg_regs (mode, type);
+
+  if (cum->nregs && bytes <= cum->nregs)
+    return gen_rtx_REG (mode, cum->regno - bytes);
+
+  return NULL_RTX;
+}
+
+/* Update the summarizer variable CUM to advance past an argument
+   in the argument list.  */
+   
+static void
+avr_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int bytes = avr_num_arg_regs (mode, type);
+
+  cum->nregs -= bytes;
+  cum->regno -= bytes;
+
+  if (cum->nregs <= 0)
+    {
+      cum->nregs = 0;
+      cum->regno = FIRST_CUM_REG;
+    }
+}
+
+/***********************************************************************
+  Functions for outputting various mov's for a various modes
+************************************************************************/
+const char *
+output_movqi (rtx insn, rtx operands[], int *l)
+{
+  int dummy;
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  int *real_l = l;
+  
+  if (!l)
+    l = &dummy;
+
+  *l = 1;
+  
+  if (register_operand (dest, QImode))
+    {
+      if (register_operand (src, QImode)) /* mov r,r */
+	{
+	  if (test_hard_reg_class (STACK_REG, dest))
+	    return AS2 (out,%0,%1);
+	  else if (test_hard_reg_class (STACK_REG, src))
+	    return AS2 (in,%0,%1);
+	  
+	  return AS2 (mov,%0,%1);
+	}
+      else if (CONSTANT_P (src))
+	{
+	  if (test_hard_reg_class (LD_REGS, dest)) /* ldi d,i */
+	    return AS2 (ldi,%0,lo8(%1));
+	  
+	  if (GET_CODE (src) == CONST_INT)
+	    {
+	      if (src == const0_rtx) /* mov r,L */
+		return AS1 (clr,%0);
+	      else if (src == const1_rtx)
+		{
+		  *l = 2;
+		  return (AS1 (clr,%0) CR_TAB
+			  AS1 (inc,%0));
+		}
+	      else if (src == constm1_rtx)
+		{
+		  /* Immediate constants -1 to any register */
+		  *l = 2;
+		  return (AS1 (clr,%0) CR_TAB
+			  AS1 (dec,%0));
+		}
+	      else
+		{
+		  int bit_nr = exact_log2 (INTVAL (src));
+
+		  if (bit_nr >= 0)
+		    {
+		      *l = 3;
+		      if (!real_l)
+			output_asm_insn ((AS1 (clr,%0) CR_TAB
+					  "set"), operands);
+		      if (!real_l)
+			avr_output_bld (operands, bit_nr);
+
+		      return "";
+		    }
+		}
+	    }
+	  
+	  /* Last resort, larger than loading from memory.  */
+	  *l = 4;
+	  return (AS2 (mov,__tmp_reg__,r31) CR_TAB
+		  AS2 (ldi,r31,lo8(%1))     CR_TAB
+		  AS2 (mov,%0,r31)          CR_TAB
+		  AS2 (mov,r31,__tmp_reg__));
+	}
+      else if (GET_CODE (src) == MEM)
+	return out_movqi_r_mr (insn, operands, real_l); /* mov r,m */
+    }
+  else if (GET_CODE (dest) == MEM)
+    {
+      const char *templ;
+
+      if (src == const0_rtx)
+	operands[1] = zero_reg_rtx;
+
+      templ = out_movqi_mr_r (insn, operands, real_l);
+
+      if (!real_l)
+	output_asm_insn (templ, operands);
+
+      operands[1] = src;
+    }
+  return "";
+}
+
+
+const char *
+output_movhi (rtx insn, rtx operands[], int *l)
+{
+  int dummy;
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  int *real_l = l;
+  
+  if (!l)
+    l = &dummy;
+  
+  if (register_operand (dest, HImode))
+    {
+      if (register_operand (src, HImode)) /* mov r,r */
+	{
+	  if (test_hard_reg_class (STACK_REG, dest))
+	    {
+	      if (AVR_HAVE_8BIT_SP)
+		return *l = 1, AS2 (out,__SP_L__,%A1);
+              /* Use simple load of stack pointer if no interrupts are 
+		 used.  */
+	      else if (TARGET_NO_INTERRUPTS)
+		return *l = 2, (AS2 (out,__SP_H__,%B1) CR_TAB
+				AS2 (out,__SP_L__,%A1));
+	      *l = 5;
+	      return (AS2 (in,__tmp_reg__,__SREG__)  CR_TAB
+		      "cli"                          CR_TAB
+		      AS2 (out,__SP_H__,%B1)         CR_TAB
+		      AS2 (out,__SREG__,__tmp_reg__) CR_TAB
+		      AS2 (out,__SP_L__,%A1));
+	    }
+	  else if (test_hard_reg_class (STACK_REG, src))
+	    {
+	      *l = 2;	
+	      return (AS2 (in,%A0,__SP_L__) CR_TAB
+		      AS2 (in,%B0,__SP_H__));
+	    }
+
+	  if (AVR_HAVE_MOVW)
+	    {
+	      *l = 1;
+	      return (AS2 (movw,%0,%1));
+	    }
+	  else
+	    {
+	      *l = 2;
+	      return (AS2 (mov,%A0,%A1) CR_TAB
+		      AS2 (mov,%B0,%B1));
+	    }
+	}
+      else if (CONSTANT_P (src))
+	{
+	  if (test_hard_reg_class (LD_REGS, dest)) /* ldi d,i */
+	    {
+	      *l = 2;
+	      return (AS2 (ldi,%A0,lo8(%1)) CR_TAB
+		      AS2 (ldi,%B0,hi8(%1)));
+	    }
+	  
+	  if (GET_CODE (src) == CONST_INT)
+	    {
+	      if (src == const0_rtx) /* mov r,L */
+		{
+		  *l = 2;
+		  return (AS1 (clr,%A0) CR_TAB
+			  AS1 (clr,%B0));
+		}
+	      else if (src == const1_rtx)
+		{
+		  *l = 3;
+		  return (AS1 (clr,%A0) CR_TAB
+			  AS1 (clr,%B0) CR_TAB
+			  AS1 (inc,%A0));
+		}
+	      else if (src == constm1_rtx)
+		{
+		  /* Immediate constants -1 to any register */
+		  *l = 3;
+		  return (AS1 (clr,%0)  CR_TAB
+			  AS1 (dec,%A0) CR_TAB
+			  AS2 (mov,%B0,%A0));
+		}
+	      else
+		{
+		  int bit_nr = exact_log2 (INTVAL (src));
+
+		  if (bit_nr >= 0)
+		    {
+		      *l = 4;
+		      if (!real_l)
+			output_asm_insn ((AS1 (clr,%A0) CR_TAB
+					  AS1 (clr,%B0) CR_TAB
+					  "set"), operands);
+		      if (!real_l)
+			avr_output_bld (operands, bit_nr);
+
+		      return "";
+		    }
+		}
+
+	      if ((INTVAL (src) & 0xff) == 0)
+		{
+		  *l = 5;
+		  return (AS2 (mov,__tmp_reg__,r31) CR_TAB
+			  AS1 (clr,%A0)             CR_TAB
+			  AS2 (ldi,r31,hi8(%1))     CR_TAB
+			  AS2 (mov,%B0,r31)         CR_TAB
+			  AS2 (mov,r31,__tmp_reg__));
+		}
+	      else if ((INTVAL (src) & 0xff00) == 0)
+		{
+		  *l = 5;
+		  return (AS2 (mov,__tmp_reg__,r31) CR_TAB
+			  AS2 (ldi,r31,lo8(%1))     CR_TAB
+			  AS2 (mov,%A0,r31)         CR_TAB
+			  AS1 (clr,%B0)             CR_TAB
+			  AS2 (mov,r31,__tmp_reg__));
+		}
+	    }
+	  
+	  /* Last resort, equal to loading from memory.  */
+	  *l = 6;
+	  return (AS2 (mov,__tmp_reg__,r31) CR_TAB
+		  AS2 (ldi,r31,lo8(%1))     CR_TAB
+		  AS2 (mov,%A0,r31)         CR_TAB
+		  AS2 (ldi,r31,hi8(%1))     CR_TAB
+		  AS2 (mov,%B0,r31)         CR_TAB
+		  AS2 (mov,r31,__tmp_reg__));
+	}
+      else if (GET_CODE (src) == MEM)
+	return out_movhi_r_mr (insn, operands, real_l); /* mov r,m */
+    }
+  else if (GET_CODE (dest) == MEM)
+    {
+      const char *templ;
+
+      if (src == const0_rtx)
+	operands[1] = zero_reg_rtx;
+
+      templ = out_movhi_mr_r (insn, operands, real_l);
+
+      if (!real_l)
+	output_asm_insn (templ, operands);
+
+      operands[1] = src;
+      return "";
+    }
+  fatal_insn ("invalid insn:", insn);
+  return "";
+}
+
+const char *
+out_movqi_r_mr (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx x = XEXP (src, 0);
+  int dummy;
+  
+  if (!l)
+    l = &dummy;
+  
+  if (CONSTANT_ADDRESS_P (x))
+    {
+      if (CONST_INT_P (x) && INTVAL (x) == SREG_ADDR)
+	{
+	  *l = 1;
+	  return AS2 (in,%0,__SREG__);
+	}
+      if (optimize > 0 && io_address_operand (x, QImode))
+	{
+	  *l = 1;
+	  return AS2 (in,%0,%m1-0x20);
+	}
+      *l = 2;
+      return AS2 (lds,%0,%m1);
+    }
+  /* memory access by reg+disp */
+  else if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x,0))
+      && GET_CODE (XEXP (x,1)) == CONST_INT)
+    {
+      if ((INTVAL (XEXP (x,1)) - GET_MODE_SIZE (GET_MODE (src))) >= 63)
+	{
+	  int disp = INTVAL (XEXP (x,1));
+	  if (REGNO (XEXP (x,0)) != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
+	    return *l = 3, (AS2 (adiw,r28,%o1-63) CR_TAB
+			    AS2 (ldd,%0,Y+63)     CR_TAB
+			    AS2 (sbiw,r28,%o1-63));
+
+	  return *l = 5, (AS2 (subi,r28,lo8(-%o1)) CR_TAB
+			  AS2 (sbci,r29,hi8(-%o1)) CR_TAB
+			  AS2 (ld,%0,Y)            CR_TAB
+			  AS2 (subi,r28,lo8(%o1))  CR_TAB
+			  AS2 (sbci,r29,hi8(%o1)));
+	}
+      else if (REGNO (XEXP (x,0)) == REG_X)
+	{
+	  /* This is a paranoid case LEGITIMIZE_RELOAD_ADDRESS must exclude
+	     it but I have this situation with extremal optimizing options.  */
+	  if (reg_overlap_mentioned_p (dest, XEXP (x,0))
+	      || reg_unused_after (insn, XEXP (x,0)))
+	    return *l = 2, (AS2 (adiw,r26,%o1) CR_TAB
+			    AS2 (ld,%0,X));
+
+	  return *l = 3, (AS2 (adiw,r26,%o1) CR_TAB
+			  AS2 (ld,%0,X)      CR_TAB
+			  AS2 (sbiw,r26,%o1));
+	}
+      *l = 1;
+      return AS2 (ldd,%0,%1);
+    }
+  *l = 1;
+  return AS2 (ld,%0,%1);
+}
+
+const char *
+out_movhi_r_mr (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (src, 0);
+  int reg_dest = true_regnum (dest);
+  int reg_base = true_regnum (base);
+  /* "volatile" forces reading low byte first, even if less efficient,
+     for correct operation with 16-bit I/O registers.  */
+  int mem_volatile_p = MEM_VOLATILE_P (src);
+  int tmp;
+
+  if (!l)
+    l = &tmp;
+
+  if (reg_base > 0)
+    {
+      if (reg_dest == reg_base)         /* R = (R) */
+	{
+	  *l = 3;
+	  return (AS2 (ld,__tmp_reg__,%1+) CR_TAB
+		  AS2 (ld,%B0,%1) CR_TAB
+		  AS2 (mov,%A0,__tmp_reg__));
+	}
+      else if (reg_base == REG_X)        /* (R26) */
+        {
+          if (reg_unused_after (insn, base))
+	    {
+	      *l = 2;
+	      return (AS2 (ld,%A0,X+) CR_TAB
+		      AS2 (ld,%B0,X));
+	    }
+	  *l  = 3;
+	  return (AS2 (ld,%A0,X+) CR_TAB
+		  AS2 (ld,%B0,X) CR_TAB
+		  AS2 (sbiw,r26,1));
+        }
+      else                      /* (R)  */
+	{
+	  *l = 2;
+	  return (AS2 (ld,%A0,%1)    CR_TAB
+		  AS2 (ldd,%B0,%1+1));
+	}
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      int reg_base = true_regnum (XEXP (base, 0));
+      
+      if (disp > MAX_LD_OFFSET (GET_MODE (src)))
+	{
+	  if (REGNO (XEXP (base, 0)) != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+	  
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
+	    return *l = 4, (AS2 (adiw,r28,%o1-62) CR_TAB
+			    AS2 (ldd,%A0,Y+62)    CR_TAB
+			    AS2 (ldd,%B0,Y+63)    CR_TAB
+			    AS2 (sbiw,r28,%o1-62));
+
+	  return *l = 6, (AS2 (subi,r28,lo8(-%o1)) CR_TAB
+			  AS2 (sbci,r29,hi8(-%o1)) CR_TAB
+			  AS2 (ld,%A0,Y)           CR_TAB
+			  AS2 (ldd,%B0,Y+1)        CR_TAB
+			  AS2 (subi,r28,lo8(%o1))  CR_TAB
+			  AS2 (sbci,r29,hi8(%o1)));
+	}
+      if (reg_base == REG_X)
+	{
+	  /* This is a paranoid case. LEGITIMIZE_RELOAD_ADDRESS must exclude
+	     it but I have this situation with extremal
+	     optimization options.  */
+	  
+	  *l = 4;
+	  if (reg_base == reg_dest)
+	    return (AS2 (adiw,r26,%o1)      CR_TAB
+		    AS2 (ld,__tmp_reg__,X+) CR_TAB
+		    AS2 (ld,%B0,X)          CR_TAB
+		    AS2 (mov,%A0,__tmp_reg__));
+
+	  return (AS2 (adiw,r26,%o1) CR_TAB
+		  AS2 (ld,%A0,X+)    CR_TAB
+		  AS2 (ld,%B0,X)     CR_TAB
+		  AS2 (sbiw,r26,%o1+1));
+	}
+
+      if (reg_base == reg_dest)
+	{
+	  *l = 3;
+	  return (AS2 (ldd,__tmp_reg__,%A1) CR_TAB
+		  AS2 (ldd,%B0,%B1)         CR_TAB
+		  AS2 (mov,%A0,__tmp_reg__));
+	}
+      
+      *l = 2;
+      return (AS2 (ldd,%A0,%A1) CR_TAB
+	      AS2 (ldd,%B0,%B1));
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    {
+      if (reg_overlap_mentioned_p (dest, XEXP (base, 0)))
+	fatal_insn ("incorrect insn:", insn);
+
+      if (mem_volatile_p)
+        {
+          if (REGNO (XEXP (base, 0)) == REG_X)
+            {
+              *l = 4;
+              return (AS2 (sbiw,r26,2)  CR_TAB
+                      AS2 (ld,%A0,X+)   CR_TAB
+                      AS2 (ld,%B0,X)    CR_TAB
+                      AS2 (sbiw,r26,1));
+            }
+          else
+            {
+              *l = 3;
+              return (AS2 (sbiw,%r1,2)   CR_TAB
+                      AS2 (ld,%A0,%p1)  CR_TAB
+                      AS2 (ldd,%B0,%p1+1));
+            }
+        }
+
+      *l = 2;
+      return (AS2 (ld,%B0,%1) CR_TAB
+	      AS2 (ld,%A0,%1));
+    }
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    {
+      if (reg_overlap_mentioned_p (dest, XEXP (base, 0)))
+	fatal_insn ("incorrect insn:", insn);
+
+      *l = 2;
+      return (AS2 (ld,%A0,%1)  CR_TAB
+	      AS2 (ld,%B0,%1));
+    }
+  else if (CONSTANT_ADDRESS_P (base))
+    {
+      if (optimize > 0 && io_address_operand (base, HImode))
+	{
+	  *l = 2;
+	  return (AS2 (in,%A0,%m1-0x20) CR_TAB
+		  AS2 (in,%B0,%m1+1-0x20));
+	}
+      *l = 4;
+      return (AS2 (lds,%A0,%m1) CR_TAB
+	      AS2 (lds,%B0,%m1+1));
+    }
+  
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+const char *
+out_movsi_r_mr (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (src, 0);
+  int reg_dest = true_regnum (dest);
+  int reg_base = true_regnum (base);
+  int tmp;
+
+  if (!l)
+    l = &tmp;
+  
+  if (reg_base > 0)
+    {
+      if (reg_base == REG_X)        /* (R26) */
+        {
+          if (reg_dest == REG_X)
+	    /* "ld r26,-X" is undefined */
+	    return *l=7, (AS2 (adiw,r26,3)        CR_TAB
+			  AS2 (ld,r29,X)          CR_TAB
+			  AS2 (ld,r28,-X)         CR_TAB
+			  AS2 (ld,__tmp_reg__,-X) CR_TAB
+			  AS2 (sbiw,r26,1)        CR_TAB
+			  AS2 (ld,r26,X)          CR_TAB
+			  AS2 (mov,r27,__tmp_reg__));
+          else if (reg_dest == REG_X - 2)
+            return *l=5, (AS2 (ld,%A0,X+)  CR_TAB
+                          AS2 (ld,%B0,X+) CR_TAB
+                          AS2 (ld,__tmp_reg__,X+)  CR_TAB
+                          AS2 (ld,%D0,X)  CR_TAB
+                          AS2 (mov,%C0,__tmp_reg__));
+          else if (reg_unused_after (insn, base))
+            return  *l=4, (AS2 (ld,%A0,X+)  CR_TAB
+                           AS2 (ld,%B0,X+) CR_TAB
+                           AS2 (ld,%C0,X+) CR_TAB
+                           AS2 (ld,%D0,X));
+          else
+            return  *l=5, (AS2 (ld,%A0,X+)  CR_TAB
+                           AS2 (ld,%B0,X+) CR_TAB
+                           AS2 (ld,%C0,X+) CR_TAB
+                           AS2 (ld,%D0,X)  CR_TAB
+                           AS2 (sbiw,r26,3));
+        }
+      else
+        {
+          if (reg_dest == reg_base)
+            return *l=5, (AS2 (ldd,%D0,%1+3) CR_TAB
+                          AS2 (ldd,%C0,%1+2) CR_TAB
+                          AS2 (ldd,__tmp_reg__,%1+1)  CR_TAB
+                          AS2 (ld,%A0,%1)  CR_TAB
+                          AS2 (mov,%B0,__tmp_reg__));
+          else if (reg_base == reg_dest + 2)
+            return *l=5, (AS2 (ld ,%A0,%1)    CR_TAB
+                          AS2 (ldd,%B0,%1+1) CR_TAB
+                          AS2 (ldd,__tmp_reg__,%1+2)  CR_TAB
+                          AS2 (ldd,%D0,%1+3) CR_TAB
+                          AS2 (mov,%C0,__tmp_reg__));
+          else
+            return *l=4, (AS2 (ld ,%A0,%1)   CR_TAB
+                          AS2 (ldd,%B0,%1+1) CR_TAB
+                          AS2 (ldd,%C0,%1+2) CR_TAB
+                          AS2 (ldd,%D0,%1+3));
+        }
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      
+      if (disp > MAX_LD_OFFSET (GET_MODE (src)))
+	{
+	  if (REGNO (XEXP (base, 0)) != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
+	    return *l = 6, (AS2 (adiw,r28,%o1-60) CR_TAB
+			    AS2 (ldd,%A0,Y+60)    CR_TAB
+			    AS2 (ldd,%B0,Y+61)    CR_TAB
+			    AS2 (ldd,%C0,Y+62)    CR_TAB
+			    AS2 (ldd,%D0,Y+63)    CR_TAB
+			    AS2 (sbiw,r28,%o1-60));
+
+	  return *l = 8, (AS2 (subi,r28,lo8(-%o1)) CR_TAB
+			  AS2 (sbci,r29,hi8(-%o1)) CR_TAB
+			  AS2 (ld,%A0,Y)           CR_TAB
+			  AS2 (ldd,%B0,Y+1)        CR_TAB
+			  AS2 (ldd,%C0,Y+2)        CR_TAB
+			  AS2 (ldd,%D0,Y+3)        CR_TAB
+			  AS2 (subi,r28,lo8(%o1))  CR_TAB
+			  AS2 (sbci,r29,hi8(%o1)));
+	}
+
+      reg_base = true_regnum (XEXP (base, 0));
+      if (reg_base == REG_X)
+	{
+	  /* R = (X + d) */
+	  if (reg_dest == REG_X)
+	    {
+	      *l = 7;
+	      /* "ld r26,-X" is undefined */
+	      return (AS2 (adiw,r26,%o1+3)    CR_TAB
+		      AS2 (ld,r29,X)          CR_TAB
+		      AS2 (ld,r28,-X)         CR_TAB
+		      AS2 (ld,__tmp_reg__,-X) CR_TAB
+		      AS2 (sbiw,r26,1)        CR_TAB
+		      AS2 (ld,r26,X)          CR_TAB
+		      AS2 (mov,r27,__tmp_reg__));
+	    }
+	  *l = 6;
+	  if (reg_dest == REG_X - 2)
+	    return (AS2 (adiw,r26,%o1)      CR_TAB
+		    AS2 (ld,r24,X+)         CR_TAB
+		    AS2 (ld,r25,X+)         CR_TAB
+		    AS2 (ld,__tmp_reg__,X+) CR_TAB
+		    AS2 (ld,r27,X)          CR_TAB
+		    AS2 (mov,r26,__tmp_reg__));
+
+	  return (AS2 (adiw,r26,%o1) CR_TAB
+		  AS2 (ld,%A0,X+)    CR_TAB
+		  AS2 (ld,%B0,X+)    CR_TAB
+		  AS2 (ld,%C0,X+)    CR_TAB
+		  AS2 (ld,%D0,X)     CR_TAB
+		  AS2 (sbiw,r26,%o1+3));
+	}
+      if (reg_dest == reg_base)
+        return *l=5, (AS2 (ldd,%D0,%D1) CR_TAB
+                      AS2 (ldd,%C0,%C1) CR_TAB
+                      AS2 (ldd,__tmp_reg__,%B1)  CR_TAB
+                      AS2 (ldd,%A0,%A1) CR_TAB
+                      AS2 (mov,%B0,__tmp_reg__));
+      else if (reg_dest == reg_base - 2)
+        return *l=5, (AS2 (ldd,%A0,%A1) CR_TAB
+                      AS2 (ldd,%B0,%B1) CR_TAB
+                      AS2 (ldd,__tmp_reg__,%C1)  CR_TAB
+                      AS2 (ldd,%D0,%D1) CR_TAB
+                      AS2 (mov,%C0,__tmp_reg__));
+      return *l=4, (AS2 (ldd,%A0,%A1) CR_TAB
+                    AS2 (ldd,%B0,%B1) CR_TAB
+                    AS2 (ldd,%C0,%C1) CR_TAB
+                    AS2 (ldd,%D0,%D1));
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return *l=4, (AS2 (ld,%D0,%1) CR_TAB
+		  AS2 (ld,%C0,%1) CR_TAB
+		  AS2 (ld,%B0,%1) CR_TAB
+		  AS2 (ld,%A0,%1));
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    return *l=4, (AS2 (ld,%A0,%1) CR_TAB
+		  AS2 (ld,%B0,%1) CR_TAB
+		  AS2 (ld,%C0,%1) CR_TAB
+		  AS2 (ld,%D0,%1));
+  else if (CONSTANT_ADDRESS_P (base))
+      return *l=8, (AS2 (lds,%A0,%m1) CR_TAB
+		    AS2 (lds,%B0,%m1+1) CR_TAB
+		    AS2 (lds,%C0,%m1+2) CR_TAB
+		    AS2 (lds,%D0,%m1+3));
+    
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+const char *
+out_movsi_mr_r (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (dest, 0);
+  int reg_base = true_regnum (base);
+  int reg_src = true_regnum (src);
+  int tmp;
+  
+  if (!l)
+    l = &tmp;
+  
+  if (CONSTANT_ADDRESS_P (base))
+    return *l=8,(AS2 (sts,%m0,%A1) CR_TAB
+		 AS2 (sts,%m0+1,%B1) CR_TAB
+		 AS2 (sts,%m0+2,%C1) CR_TAB
+		 AS2 (sts,%m0+3,%D1));
+  if (reg_base > 0)                 /* (r) */
+    {
+      if (reg_base == REG_X)                /* (R26) */
+        {
+          if (reg_src == REG_X)
+            {
+	      /* "st X+,r26" is undefined */
+              if (reg_unused_after (insn, base))
+		return *l=6, (AS2 (mov,__tmp_reg__,r27) CR_TAB
+			      AS2 (st,X,r26)            CR_TAB
+			      AS2 (adiw,r26,1)          CR_TAB
+			      AS2 (st,X+,__tmp_reg__)   CR_TAB
+			      AS2 (st,X+,r28)           CR_TAB
+			      AS2 (st,X,r29));
+              else
+                return *l=7, (AS2 (mov,__tmp_reg__,r27) CR_TAB
+			      AS2 (st,X,r26)            CR_TAB
+			      AS2 (adiw,r26,1)          CR_TAB
+			      AS2 (st,X+,__tmp_reg__)   CR_TAB
+			      AS2 (st,X+,r28)           CR_TAB
+			      AS2 (st,X,r29)            CR_TAB
+			      AS2 (sbiw,r26,3));
+            }
+          else if (reg_base == reg_src + 2)
+            {
+              if (reg_unused_after (insn, base))
+                return *l=7, (AS2 (mov,__zero_reg__,%C1) CR_TAB
+                              AS2 (mov,__tmp_reg__,%D1) CR_TAB
+                              AS2 (st,%0+,%A1) CR_TAB
+                              AS2 (st,%0+,%B1) CR_TAB
+                              AS2 (st,%0+,__zero_reg__)  CR_TAB
+                              AS2 (st,%0,__tmp_reg__)   CR_TAB
+                              AS1 (clr,__zero_reg__));
+              else
+                return *l=8, (AS2 (mov,__zero_reg__,%C1) CR_TAB
+                              AS2 (mov,__tmp_reg__,%D1) CR_TAB
+                              AS2 (st,%0+,%A1) CR_TAB
+                              AS2 (st,%0+,%B1) CR_TAB
+                              AS2 (st,%0+,__zero_reg__)  CR_TAB
+                              AS2 (st,%0,__tmp_reg__)   CR_TAB
+                              AS1 (clr,__zero_reg__)     CR_TAB
+                              AS2 (sbiw,r26,3));
+            }
+          return *l=5, (AS2 (st,%0+,%A1)  CR_TAB
+                        AS2 (st,%0+,%B1) CR_TAB
+                        AS2 (st,%0+,%C1) CR_TAB
+                        AS2 (st,%0,%D1)  CR_TAB
+                        AS2 (sbiw,r26,3));
+        }
+      else
+        return *l=4, (AS2 (st,%0,%A1)    CR_TAB
+		      AS2 (std,%0+1,%B1) CR_TAB
+		      AS2 (std,%0+2,%C1) CR_TAB
+		      AS2 (std,%0+3,%D1));
+    }
+  else if (GET_CODE (base) == PLUS) /* (R + i) */
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      reg_base = REGNO (XEXP (base, 0));
+      if (disp > MAX_LD_OFFSET (GET_MODE (dest)))
+	{
+	  if (reg_base != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)))
+	    return *l = 6, (AS2 (adiw,r28,%o0-60) CR_TAB
+			    AS2 (std,Y+60,%A1)    CR_TAB
+			    AS2 (std,Y+61,%B1)    CR_TAB
+			    AS2 (std,Y+62,%C1)    CR_TAB
+			    AS2 (std,Y+63,%D1)    CR_TAB
+			    AS2 (sbiw,r28,%o0-60));
+
+	  return *l = 8, (AS2 (subi,r28,lo8(-%o0)) CR_TAB
+			  AS2 (sbci,r29,hi8(-%o0)) CR_TAB
+			  AS2 (st,Y,%A1)           CR_TAB
+			  AS2 (std,Y+1,%B1)        CR_TAB
+			  AS2 (std,Y+2,%C1)        CR_TAB
+			  AS2 (std,Y+3,%D1)        CR_TAB
+			  AS2 (subi,r28,lo8(%o0))  CR_TAB
+			  AS2 (sbci,r29,hi8(%o0)));
+	}
+      if (reg_base == REG_X)
+	{
+	  /* (X + d) = R */
+	  if (reg_src == REG_X)
+	    {
+	      *l = 9;
+	      return (AS2 (mov,__tmp_reg__,r26)  CR_TAB
+		      AS2 (mov,__zero_reg__,r27) CR_TAB
+		      AS2 (adiw,r26,%o0)         CR_TAB
+		      AS2 (st,X+,__tmp_reg__)    CR_TAB
+		      AS2 (st,X+,__zero_reg__)   CR_TAB
+		      AS2 (st,X+,r28)            CR_TAB
+		      AS2 (st,X,r29)             CR_TAB
+		      AS1 (clr,__zero_reg__)     CR_TAB
+		      AS2 (sbiw,r26,%o0+3));
+	    }
+	  else if (reg_src == REG_X - 2)
+	    {
+	      *l = 9;
+	      return (AS2 (mov,__tmp_reg__,r26)  CR_TAB
+		      AS2 (mov,__zero_reg__,r27) CR_TAB
+		      AS2 (adiw,r26,%o0)         CR_TAB
+		      AS2 (st,X+,r24)            CR_TAB
+		      AS2 (st,X+,r25)            CR_TAB
+		      AS2 (st,X+,__tmp_reg__)    CR_TAB
+		      AS2 (st,X,__zero_reg__)    CR_TAB
+		      AS1 (clr,__zero_reg__)     CR_TAB
+		      AS2 (sbiw,r26,%o0+3));
+	    }
+	  *l = 6;
+	  return (AS2 (adiw,r26,%o0) CR_TAB
+		  AS2 (st,X+,%A1)    CR_TAB
+		  AS2 (st,X+,%B1)    CR_TAB
+		  AS2 (st,X+,%C1)    CR_TAB
+		  AS2 (st,X,%D1)     CR_TAB
+		  AS2 (sbiw,r26,%o0+3));
+	}
+      return *l=4, (AS2 (std,%A0,%A1)    CR_TAB
+		    AS2 (std,%B0,%B1) CR_TAB
+		    AS2 (std,%C0,%C1) CR_TAB
+		    AS2 (std,%D0,%D1));
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return *l=4, (AS2 (st,%0,%D1) CR_TAB
+		  AS2 (st,%0,%C1) CR_TAB
+		  AS2 (st,%0,%B1) CR_TAB
+		  AS2 (st,%0,%A1));
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    return *l=4, (AS2 (st,%0,%A1)  CR_TAB
+		  AS2 (st,%0,%B1) CR_TAB
+		  AS2 (st,%0,%C1) CR_TAB
+		  AS2 (st,%0,%D1));
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+const char *
+output_movsisf(rtx insn, rtx operands[], int *l)
+{
+  int dummy;
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  int *real_l = l;
+  
+  if (!l)
+    l = &dummy;
+  
+  if (register_operand (dest, VOIDmode))
+    {
+      if (register_operand (src, VOIDmode)) /* mov r,r */
+	{
+	  if (true_regnum (dest) > true_regnum (src))
+	    {
+	      if (AVR_HAVE_MOVW)
+		{
+		  *l = 2;
+		  return (AS2 (movw,%C0,%C1) CR_TAB
+			  AS2 (movw,%A0,%A1));
+		}
+	      *l = 4;
+	      return (AS2 (mov,%D0,%D1) CR_TAB
+		      AS2 (mov,%C0,%C1) CR_TAB
+		      AS2 (mov,%B0,%B1) CR_TAB
+		      AS2 (mov,%A0,%A1));
+	    }
+	  else
+	    {
+	      if (AVR_HAVE_MOVW)
+		{
+		  *l = 2;
+		  return (AS2 (movw,%A0,%A1) CR_TAB
+			  AS2 (movw,%C0,%C1));
+		}
+	      *l = 4;
+	      return (AS2 (mov,%A0,%A1) CR_TAB
+		      AS2 (mov,%B0,%B1) CR_TAB
+		      AS2 (mov,%C0,%C1) CR_TAB
+		      AS2 (mov,%D0,%D1));
+	    }
+	}
+      else if (CONSTANT_P (src))
+	{
+	  if (test_hard_reg_class (LD_REGS, dest)) /* ldi d,i */
+	    {
+	      *l = 4;
+	      return (AS2 (ldi,%A0,lo8(%1))  CR_TAB
+		      AS2 (ldi,%B0,hi8(%1))  CR_TAB
+		      AS2 (ldi,%C0,hlo8(%1)) CR_TAB
+		      AS2 (ldi,%D0,hhi8(%1)));
+	    }
+	  
+	  if (GET_CODE (src) == CONST_INT)
+	    {
+	      const char *const clr_op0 =
+		AVR_HAVE_MOVW ? (AS1 (clr,%A0) CR_TAB
+				AS1 (clr,%B0) CR_TAB
+				AS2 (movw,%C0,%A0))
+			     : (AS1 (clr,%A0) CR_TAB
+				AS1 (clr,%B0) CR_TAB
+				AS1 (clr,%C0) CR_TAB
+				AS1 (clr,%D0));
+
+	      if (src == const0_rtx) /* mov r,L */
+		{
+		  *l = AVR_HAVE_MOVW ? 3 : 4;
+		  return clr_op0;
+		}
+	      else if (src == const1_rtx)
+		{
+		  if (!real_l)
+		    output_asm_insn (clr_op0, operands);
+		  *l = AVR_HAVE_MOVW ? 4 : 5;
+		  return AS1 (inc,%A0);
+		}
+	      else if (src == constm1_rtx)
+		{
+		  /* Immediate constants -1 to any register */
+		  if (AVR_HAVE_MOVW)
+		    {
+		      *l = 4;
+		      return (AS1 (clr,%A0)     CR_TAB
+			      AS1 (dec,%A0)     CR_TAB
+			      AS2 (mov,%B0,%A0) CR_TAB
+			      AS2 (movw,%C0,%A0));
+		    }
+		  *l = 5;
+		  return (AS1 (clr,%A0)     CR_TAB
+			  AS1 (dec,%A0)     CR_TAB
+			  AS2 (mov,%B0,%A0) CR_TAB
+			  AS2 (mov,%C0,%A0) CR_TAB
+			  AS2 (mov,%D0,%A0));
+		}
+	      else
+		{
+		  int bit_nr = exact_log2 (INTVAL (src));
+
+		  if (bit_nr >= 0)
+		    {
+		      *l = AVR_HAVE_MOVW ? 5 : 6;
+		      if (!real_l)
+			{
+			  output_asm_insn (clr_op0, operands);
+			  output_asm_insn ("set", operands);
+			}
+		      if (!real_l)
+			avr_output_bld (operands, bit_nr);
+
+		      return "";
+		    }
+		}
+	    }
+	  
+	  /* Last resort, better than loading from memory.  */
+	  *l = 10;
+	  return (AS2 (mov,__tmp_reg__,r31) CR_TAB
+		  AS2 (ldi,r31,lo8(%1))     CR_TAB
+		  AS2 (mov,%A0,r31)         CR_TAB
+		  AS2 (ldi,r31,hi8(%1))     CR_TAB
+		  AS2 (mov,%B0,r31)         CR_TAB
+		  AS2 (ldi,r31,hlo8(%1))    CR_TAB
+		  AS2 (mov,%C0,r31)         CR_TAB
+		  AS2 (ldi,r31,hhi8(%1))    CR_TAB
+		  AS2 (mov,%D0,r31)         CR_TAB
+		  AS2 (mov,r31,__tmp_reg__));
+	}
+      else if (GET_CODE (src) == MEM)
+	return out_movsi_r_mr (insn, operands, real_l); /* mov r,m */
+    }
+  else if (GET_CODE (dest) == MEM)
+    {
+      const char *templ;
+
+      if (src == const0_rtx)
+	  operands[1] = zero_reg_rtx;
+
+      templ = out_movsi_mr_r (insn, operands, real_l);
+
+      if (!real_l)
+	output_asm_insn (templ, operands);
+
+      operands[1] = src;
+      return "";
+    }
+  fatal_insn ("invalid insn:", insn);
+  return "";
+}
+
+const char *
+out_movqi_mr_r (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx x = XEXP (dest, 0);
+  int dummy;
+
+  if (!l)
+    l = &dummy;
+  
+  if (CONSTANT_ADDRESS_P (x))
+    {
+      if (CONST_INT_P (x) && INTVAL (x) == SREG_ADDR)
+	{
+	  *l = 1;
+	  return AS2 (out,__SREG__,%1);
+	}
+      if (optimize > 0 && io_address_operand (x, QImode))
+	{
+	  *l = 1;
+	  return AS2 (out,%m0-0x20,%1);
+	}
+      *l = 2;
+      return AS2 (sts,%m0,%1);
+    }
+  /* memory access by reg+disp */
+  else if (GET_CODE (x) == PLUS	
+      && REG_P (XEXP (x,0))
+      && GET_CODE (XEXP (x,1)) == CONST_INT)
+    {
+      if ((INTVAL (XEXP (x,1)) - GET_MODE_SIZE (GET_MODE (dest))) >= 63)
+	{
+	  int disp = INTVAL (XEXP (x,1));
+	  if (REGNO (XEXP (x,0)) != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)))
+	    return *l = 3, (AS2 (adiw,r28,%o0-63) CR_TAB
+			    AS2 (std,Y+63,%1)     CR_TAB
+			    AS2 (sbiw,r28,%o0-63));
+
+	  return *l = 5, (AS2 (subi,r28,lo8(-%o0)) CR_TAB
+			  AS2 (sbci,r29,hi8(-%o0)) CR_TAB
+			  AS2 (st,Y,%1)            CR_TAB
+			  AS2 (subi,r28,lo8(%o0))  CR_TAB
+			  AS2 (sbci,r29,hi8(%o0)));
+	}
+      else if (REGNO (XEXP (x,0)) == REG_X)
+	{
+	  if (reg_overlap_mentioned_p (src, XEXP (x, 0)))
+	    {
+	      if (reg_unused_after (insn, XEXP (x,0)))
+		return *l = 3, (AS2 (mov,__tmp_reg__,%1) CR_TAB
+				AS2 (adiw,r26,%o0)       CR_TAB
+				AS2 (st,X,__tmp_reg__));
+
+	      return *l = 4, (AS2 (mov,__tmp_reg__,%1) CR_TAB
+			      AS2 (adiw,r26,%o0)       CR_TAB
+			      AS2 (st,X,__tmp_reg__)   CR_TAB
+			      AS2 (sbiw,r26,%o0));
+	    }
+	  else
+	    {
+	      if (reg_unused_after (insn, XEXP (x,0)))
+		return *l = 2, (AS2 (adiw,r26,%o0) CR_TAB
+				AS2 (st,X,%1));
+
+	      return *l = 3, (AS2 (adiw,r26,%o0) CR_TAB
+			      AS2 (st,X,%1)      CR_TAB
+			      AS2 (sbiw,r26,%o0));
+	    }
+	}
+      *l = 1;
+      return AS2 (std,%0,%1);
+    }
+  *l = 1;
+  return AS2 (st,%0,%1);
+}
+
+const char *
+out_movhi_mr_r (rtx insn, rtx op[], int *l)
+{
+  rtx dest = op[0];
+  rtx src = op[1];
+  rtx base = XEXP (dest, 0);
+  int reg_base = true_regnum (base);
+  int reg_src = true_regnum (src);
+  /* "volatile" forces writing high byte first, even if less efficient,
+     for correct operation with 16-bit I/O registers.  */
+  int mem_volatile_p = MEM_VOLATILE_P (dest);
+  int tmp;
+
+  if (!l)
+    l = &tmp;
+  if (CONSTANT_ADDRESS_P (base))
+    {
+      if (optimize > 0 && io_address_operand (base, HImode))
+	{
+	  *l = 2;
+	  return (AS2 (out,%m0+1-0x20,%B1) CR_TAB
+		  AS2 (out,%m0-0x20,%A1));
+	}
+      return *l = 4, (AS2 (sts,%m0+1,%B1) CR_TAB
+		      AS2 (sts,%m0,%A1));
+    }
+  if (reg_base > 0)
+    {
+      if (reg_base == REG_X)
+        {
+          if (reg_src == REG_X)
+            {
+              /* "st X+,r26" and "st -X,r26" are undefined.  */
+              if (!mem_volatile_p && reg_unused_after (insn, src))
+		return *l=4, (AS2 (mov,__tmp_reg__,r27) CR_TAB
+			      AS2 (st,X,r26)            CR_TAB
+			      AS2 (adiw,r26,1)          CR_TAB
+			      AS2 (st,X,__tmp_reg__));
+              else
+		return *l=5, (AS2 (mov,__tmp_reg__,r27) CR_TAB
+			      AS2 (adiw,r26,1)          CR_TAB
+			      AS2 (st,X,__tmp_reg__)    CR_TAB
+                              AS2 (sbiw,r26,1)          CR_TAB
+                              AS2 (st,X,r26));
+            }
+          else
+            {
+              if (!mem_volatile_p && reg_unused_after (insn, base))
+                return *l=2, (AS2 (st,X+,%A1) CR_TAB
+                              AS2 (st,X,%B1));
+              else
+                return *l=3, (AS2 (adiw,r26,1) CR_TAB
+                              AS2 (st,X,%B1)   CR_TAB
+                              AS2 (st,-X,%A1));
+            }
+        }
+      else
+        return  *l=2, (AS2 (std,%0+1,%B1) CR_TAB
+                       AS2 (st,%0,%A1));
+    }
+  else if (GET_CODE (base) == PLUS)
+    {
+      int disp = INTVAL (XEXP (base, 1));
+      reg_base = REGNO (XEXP (base, 0));
+      if (disp > MAX_LD_OFFSET (GET_MODE (dest)))
+	{
+	  if (reg_base != REG_Y)
+	    fatal_insn ("incorrect insn:",insn);
+
+	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)))
+	    return *l = 4, (AS2 (adiw,r28,%o0-62) CR_TAB
+			    AS2 (std,Y+63,%B1)    CR_TAB
+			    AS2 (std,Y+62,%A1)    CR_TAB
+			    AS2 (sbiw,r28,%o0-62));
+
+	  return *l = 6, (AS2 (subi,r28,lo8(-%o0)) CR_TAB
+			  AS2 (sbci,r29,hi8(-%o0)) CR_TAB
+			  AS2 (std,Y+1,%B1)        CR_TAB
+			  AS2 (st,Y,%A1)           CR_TAB
+			  AS2 (subi,r28,lo8(%o0))  CR_TAB
+			  AS2 (sbci,r29,hi8(%o0)));
+	}
+      if (reg_base == REG_X)
+	{
+	  /* (X + d) = R */
+	  if (reg_src == REG_X)
+            {
+	      *l = 7;
+	      return (AS2 (mov,__tmp_reg__,r26)  CR_TAB
+		      AS2 (mov,__zero_reg__,r27) CR_TAB
+                      AS2 (adiw,r26,%o0+1)       CR_TAB
+		      AS2 (st,X,__zero_reg__)    CR_TAB
+		      AS2 (st,-X,__tmp_reg__)    CR_TAB
+		      AS1 (clr,__zero_reg__)     CR_TAB
+                      AS2 (sbiw,r26,%o0));
+	    }
+	  *l = 4;
+          return (AS2 (adiw,r26,%o0+1) CR_TAB
+                  AS2 (st,X,%B1)       CR_TAB
+                  AS2 (st,-X,%A1)      CR_TAB
+                  AS2 (sbiw,r26,%o0));
+	}
+      return *l=2, (AS2 (std,%B0,%B1)    CR_TAB
+                    AS2 (std,%A0,%A1));
+    }
+  else if (GET_CODE (base) == PRE_DEC) /* (--R) */
+    return *l=2, (AS2 (st,%0,%B1) CR_TAB
+		  AS2 (st,%0,%A1));
+  else if (GET_CODE (base) == POST_INC) /* (R++) */
+    {
+      if (mem_volatile_p)
+        {
+          if (REGNO (XEXP (base, 0)) == REG_X)
+            {
+              *l = 4;
+              return (AS2 (adiw,r26,1)  CR_TAB
+                      AS2 (st,X,%B1)    CR_TAB
+                      AS2 (st,-X,%A1)   CR_TAB
+                      AS2 (adiw,r26,2));
+            }
+          else
+            {
+              *l = 3;
+              return (AS2 (std,%p0+1,%B1) CR_TAB
+                      AS2 (st,%p0,%A1)    CR_TAB
+                      AS2 (adiw,%r0,2));
+            }
+        }
+
+      *l = 2;
+      return (AS2 (st,%0,%A1)  CR_TAB
+            AS2 (st,%0,%B1));
+    }
+  fatal_insn ("unknown move insn:",insn);
+  return "";
+}
+
+/* Return 1 if frame pointer for current function required.  */
+
+bool
+avr_frame_pointer_required_p (void)
+{
+  return (cfun->calls_alloca
+	  || crtl->args.info.nregs == 0
+  	  || get_frame_size () > 0);
+}
+
+/* Returns the condition of compare insn INSN, or UNKNOWN.  */
+
+static RTX_CODE
+compare_condition (rtx insn)
+{
+  rtx next = next_real_insn (insn);
+  RTX_CODE cond = UNKNOWN;
+  if (next && GET_CODE (next) == JUMP_INSN)
+    {
+      rtx pat = PATTERN (next);
+      rtx src = SET_SRC (pat);
+      rtx t = XEXP (src, 0);
+      cond = GET_CODE (t);
+    }
+  return cond;
+}
+
+/* Returns nonzero if INSN is a tst insn that only tests the sign.  */
+
+static int
+compare_sign_p (rtx insn)
+{
+  RTX_CODE cond = compare_condition (insn);
+  return (cond == GE || cond == LT);
+}
+
+/* Returns nonzero if the next insn is a JUMP_INSN with a condition
+   that needs to be swapped (GT, GTU, LE, LEU).  */
+
+int
+compare_diff_p (rtx insn)
+{
+  RTX_CODE cond = compare_condition (insn);
+  return (cond == GT || cond == GTU || cond == LE || cond == LEU) ? cond : 0;
+}
+
+/* Returns nonzero if INSN is a compare insn with the EQ or NE condition.  */
+
+int
+compare_eq_p (rtx insn)
+{
+  RTX_CODE cond = compare_condition (insn);
+  return (cond == EQ || cond == NE);
+}
+
+
+/* Output test instruction for HImode.  */
+
+const char *
+out_tsthi (rtx insn, rtx op, int *l)
+{
+  if (compare_sign_p (insn))
+    {
+      if (l) *l = 1;
+      return AS1 (tst,%B0);
+    }
+  if (reg_unused_after (insn, op)
+      && compare_eq_p (insn))
+    {
+      /* Faster than sbiw if we can clobber the operand.  */
+      if (l) *l = 1;
+      return "or %A0,%B0";
+    }
+  if (test_hard_reg_class (ADDW_REGS, op))
+    {
+      if (l) *l = 1;
+      return AS2 (sbiw,%0,0);
+    }
+  if (l) *l = 2;
+  return (AS2 (cp,%A0,__zero_reg__) CR_TAB
+          AS2 (cpc,%B0,__zero_reg__));
+}
+
+
+/* Output test instruction for SImode.  */
+
+const char *
+out_tstsi (rtx insn, rtx op, int *l)
+{
+  if (compare_sign_p (insn))
+    {
+      if (l) *l = 1;
+      return AS1 (tst,%D0);
+    }
+  if (test_hard_reg_class (ADDW_REGS, op))
+    {
+      if (l) *l = 3;
+      return (AS2 (sbiw,%A0,0) CR_TAB
+              AS2 (cpc,%C0,__zero_reg__) CR_TAB
+              AS2 (cpc,%D0,__zero_reg__));
+    }
+  if (l) *l = 4;
+  return (AS2 (cp,%A0,__zero_reg__) CR_TAB
+          AS2 (cpc,%B0,__zero_reg__) CR_TAB
+          AS2 (cpc,%C0,__zero_reg__) CR_TAB
+          AS2 (cpc,%D0,__zero_reg__));
+}
+
+
+/* Generate asm equivalent for various shifts.
+   Shift count is a CONST_INT, MEM or REG.
+   This only handles cases that are not already
+   carefully hand-optimized in ?sh??i3_out.  */
+
+void
+out_shift_with_cnt (const char *templ, rtx insn, rtx operands[],
+		    int *len, int t_len)
+{
+  rtx op[10];
+  char str[500];
+  int second_label = 1;
+  int saved_in_tmp = 0;
+  int use_zero_reg = 0;
+
+  op[0] = operands[0];
+  op[1] = operands[1];
+  op[2] = operands[2];
+  op[3] = operands[3];
+  str[0] = 0;
+
+  if (len)
+    *len = 1;
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int count = INTVAL (operands[2]);
+      int max_len = 10;  /* If larger than this, always use a loop.  */
+
+      if (count <= 0)
+	{
+	  if (len)
+	    *len = 0;
+	  return;
+	}
+
+      if (count < 8 && !scratch)
+	use_zero_reg = 1;
+
+      if (optimize_size)
+	max_len = t_len + (scratch ? 3 : (use_zero_reg ? 4 : 5));
+
+      if (t_len * count <= max_len)
+	{
+	  /* Output shifts inline with no loop - faster.  */
+	  if (len)
+	    *len = t_len * count;
+	  else
+	    {
+	      while (count-- > 0)
+		output_asm_insn (templ, op);
+	    }
+
+	  return;
+	}
+
+      if (scratch)
+	{
+	  if (!len)
+	    strcat (str, AS2 (ldi,%3,%2));
+	}
+      else if (use_zero_reg)
+	{
+	  /* Hack to save one word: use __zero_reg__ as loop counter.
+	     Set one bit, then shift in a loop until it is 0 again.  */
+
+	  op[3] = zero_reg_rtx;
+	  if (len)
+	    *len = 2;
+	  else
+	    strcat (str, ("set" CR_TAB
+			  AS2 (bld,%3,%2-1)));
+	}
+      else
+	{
+	  /* No scratch register available, use one from LD_REGS (saved in
+	     __tmp_reg__) that doesn't overlap with registers to shift.  */
+
+	  op[3] = gen_rtx_REG (QImode,
+			   ((true_regnum (operands[0]) - 1) & 15) + 16);
+	  op[4] = tmp_reg_rtx;
+	  saved_in_tmp = 1;
+
+	  if (len)
+	    *len = 3;  /* Includes "mov %3,%4" after the loop.  */
+	  else
+	    strcat (str, (AS2 (mov,%4,%3) CR_TAB
+			  AS2 (ldi,%3,%2)));
+	}
+
+      second_label = 0;
+    }
+  else if (GET_CODE (operands[2]) == MEM)
+    {
+      rtx op_mov[10];
+      
+      op[3] = op_mov[0] = tmp_reg_rtx;
+      op_mov[1] = op[2];
+
+      if (len)
+	out_movqi_r_mr (insn, op_mov, len);
+      else
+	output_asm_insn (out_movqi_r_mr (insn, op_mov, NULL), op_mov);
+    }
+  else if (register_operand (operands[2], QImode))
+    {
+      if (reg_unused_after (insn, operands[2])
+          && !reg_overlap_mentioned_p (operands[0], operands[2]))
+        {
+          op[3] = op[2];
+        }
+      else
+	{
+	  op[3] = tmp_reg_rtx;
+	  if (!len)
+	    strcat (str, (AS2 (mov,%3,%2) CR_TAB));
+	}
+    }
+  else
+    fatal_insn ("bad shift insn:", insn);
+
+  if (second_label)
+    {
+      if (len)
+	++*len;
+      else
+	strcat (str, AS1 (rjmp,2f));
+    }
+
+  if (len)
+    *len += t_len + 2;  /* template + dec + brXX */
+  else
+    {
+      strcat (str, "\n1:\t");
+      strcat (str, templ);
+      strcat (str, second_label ? "\n2:\t" : "\n\t");
+      strcat (str, use_zero_reg ? AS1 (lsr,%3) : AS1 (dec,%3));
+      strcat (str, CR_TAB);
+      strcat (str, second_label ? AS1 (brpl,1b) : AS1 (brne,1b));
+      if (saved_in_tmp)
+	strcat (str, (CR_TAB AS2 (mov,%3,%4)));
+      output_asm_insn (str, op);
+    }
+}
+
+
+/* 8bit shift left ((char)x << i)   */
+
+const char *
+ashlqi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 8)
+	    break;
+
+	  *len = 1;
+	  return AS1 (clr,%0);
+	  
+	case 1:
+	  *len = 1;
+	  return AS1 (lsl,%0);
+	  
+	case 2:
+	  *len = 2;
+	  return (AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0));
+
+	case 3:
+	  *len = 3;
+	  return (AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0));
+
+	case 4:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 2;
+	      return (AS1 (swap,%0) CR_TAB
+		      AS2 (andi,%0,0xf0));
+	    }
+	  *len = 4;
+	  return (AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0));
+
+	case 5:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 3;
+	      return (AS1 (swap,%0) CR_TAB
+		      AS1 (lsl,%0)  CR_TAB
+		      AS2 (andi,%0,0xe0));
+	    }
+	  *len = 5;
+	  return (AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0));
+
+	case 6:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 4;
+	      return (AS1 (swap,%0) CR_TAB
+		      AS1 (lsl,%0)  CR_TAB
+		      AS1 (lsl,%0)  CR_TAB
+		      AS2 (andi,%0,0xc0));
+	    }
+	  *len = 6;
+	  return (AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0) CR_TAB
+		  AS1 (lsl,%0));
+
+	case 7:
+	  *len = 3;
+	  return (AS1 (ror,%0) CR_TAB
+		  AS1 (clr,%0) CR_TAB
+		  AS1 (ror,%0));
+	}
+    }
+  else if (CONSTANT_P (operands[2]))
+    fatal_insn ("internal compiler error.  Incorrect shift:", insn);
+
+  out_shift_with_cnt (AS1 (lsl,%0),
+		      insn, operands, len, 1);
+  return "";
+}
+
+
+/* 16bit shift left ((short)x << i)   */
+
+const char *
+ashlhi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+      
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 16)
+	    break;
+
+	  *len = 2;
+	  return (AS1 (clr,%B0) CR_TAB
+		  AS1 (clr,%A0));
+
+	case 4:
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (ldi_ok)
+	    {
+	      *len = 6;
+	      return (AS1 (swap,%A0)      CR_TAB
+		      AS1 (swap,%B0)      CR_TAB
+		      AS2 (andi,%B0,0xf0) CR_TAB
+		      AS2 (eor,%B0,%A0)   CR_TAB
+		      AS2 (andi,%A0,0xf0) CR_TAB
+		      AS2 (eor,%B0,%A0));
+	    }
+	  if (scratch)
+	    {
+	      *len = 7;
+	      return (AS1 (swap,%A0)    CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS2 (ldi,%3,0xf0) CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      AS2 (eor,%B0,%A0) CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      AS2 (eor,%B0,%A0));
+	    }
+	  break;  /* optimize_size ? 6 : 8 */
+
+	case 5:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  if (ldi_ok)
+	    {
+	      *len = 8;
+	      return (AS1 (lsl,%A0)       CR_TAB
+		      AS1 (rol,%B0)       CR_TAB
+		      AS1 (swap,%A0)      CR_TAB
+		      AS1 (swap,%B0)      CR_TAB
+		      AS2 (andi,%B0,0xf0) CR_TAB
+		      AS2 (eor,%B0,%A0)   CR_TAB
+		      AS2 (andi,%A0,0xf0) CR_TAB
+		      AS2 (eor,%B0,%A0));
+	    }
+	  if (scratch)
+	    {
+	      *len = 9;
+	      return (AS1 (lsl,%A0)     CR_TAB
+		      AS1 (rol,%B0)     CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS2 (ldi,%3,0xf0) CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      AS2 (eor,%B0,%A0) CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      AS2 (eor,%B0,%A0));
+	    }
+	  break;  /* 10 */
+
+	case 6:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  *len = 9;
+	  return (AS1 (clr,__tmp_reg__) CR_TAB
+		  AS1 (lsr,%B0)         CR_TAB
+		  AS1 (ror,%A0)         CR_TAB
+		  AS1 (ror,__tmp_reg__) CR_TAB
+		  AS1 (lsr,%B0)         CR_TAB
+		  AS1 (ror,%A0)         CR_TAB
+		  AS1 (ror,__tmp_reg__) CR_TAB
+		  AS2 (mov,%B0,%A0)     CR_TAB
+		  AS2 (mov,%A0,__tmp_reg__));
+
+	case 7:
+	  *len = 5;
+	  return (AS1 (lsr,%B0)     CR_TAB
+		  AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (clr,%A0)     CR_TAB
+		  AS1 (ror,%B0)     CR_TAB
+		  AS1 (ror,%A0));
+
+	case 8:
+	  return *len = 2, (AS2 (mov,%B0,%A1) CR_TAB
+			    AS1 (clr,%A0));
+
+	case 9:
+	  *len = 3;
+	  return (AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (clr,%A0)     CR_TAB
+		  AS1 (lsl,%B0));
+
+	case 10:
+	  *len = 4;
+	  return (AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (clr,%A0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0));
+
+	case 11:
+	  *len = 5;
+	  return (AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (clr,%A0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0));
+
+	case 12:
+	  if (ldi_ok)
+	    {
+	      *len = 4;
+	      return (AS2 (mov,%B0,%A0) CR_TAB
+		      AS1 (clr,%A0)     CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS2 (andi,%B0,0xf0));
+	    }
+	  if (scratch)
+	    {
+	      *len = 5;
+	      return (AS2 (mov,%B0,%A0) CR_TAB
+		      AS1 (clr,%A0)     CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS2 (ldi,%3,0xf0) CR_TAB
+		      "and %B0,%3");
+	    }
+	  *len = 6;
+	  return (AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (clr,%A0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0));
+
+	case 13:
+	  if (ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (mov,%B0,%A0) CR_TAB
+		      AS1 (clr,%A0)     CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS1 (lsl,%B0)     CR_TAB
+		      AS2 (andi,%B0,0xe0));
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%3,0x20) CR_TAB
+		      AS2 (mul,%A0,%3)  CR_TAB
+		      AS2 (mov,%B0,r0)  CR_TAB
+		      AS1 (clr,%A0)     CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (scratch)
+	    {
+	      *len = 6;
+	      return (AS2 (mov,%B0,%A0) CR_TAB
+		      AS1 (clr,%A0)     CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS1 (lsl,%B0)     CR_TAB
+		      AS2 (ldi,%3,0xe0) CR_TAB
+		      "and %B0,%3");
+	    }
+	  if (AVR_HAVE_MUL)
+	    {
+	      *len = 6;
+	      return ("set"            CR_TAB
+		      AS2 (bld,r1,5)   CR_TAB
+		      AS2 (mul,%A0,r1) CR_TAB
+		      AS2 (mov,%B0,r0) CR_TAB
+		      AS1 (clr,%A0)    CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  *len = 7;
+	  return (AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (clr,%A0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS1 (lsl,%B0));
+
+	case 14:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%B0,0x40) CR_TAB
+		      AS2 (mul,%A0,%B0)  CR_TAB
+		      AS2 (mov,%B0,r0)   CR_TAB
+		      AS1 (clr,%A0)      CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%3,0x40) CR_TAB
+		      AS2 (mul,%A0,%3)  CR_TAB
+		      AS2 (mov,%B0,r0)  CR_TAB
+		      AS1 (clr,%A0)     CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (mov,%B0,%A0) CR_TAB
+		      AS2 (ldi,%A0,6) "\n1:\t"
+		      AS1 (lsl,%B0)     CR_TAB
+		      AS1 (dec,%A0)     CR_TAB
+		      AS1 (brne,1b));
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 6;
+	  return (AS1 (clr,%B0) CR_TAB
+		  AS1 (lsr,%A0) CR_TAB
+		  AS1 (ror,%B0) CR_TAB
+		  AS1 (lsr,%A0) CR_TAB
+		  AS1 (ror,%B0) CR_TAB
+		  AS1 (clr,%A0));
+
+	case 15:
+	  *len = 4;
+	  return (AS1 (clr,%B0) CR_TAB
+		  AS1 (lsr,%A0) CR_TAB
+		  AS1 (ror,%B0) CR_TAB
+		  AS1 (clr,%A0));
+	}
+      len = t;
+    }
+  out_shift_with_cnt ((AS1 (lsl,%A0) CR_TAB
+		       AS1 (rol,%B0)),
+		       insn, operands, len, 2);
+  return "";
+}
+
+
+/* 32bit shift left ((long)x << i)   */
+
+const char *
+ashlsi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+      int *t = len;
+      
+      if (!len)
+	len = &k;
+      
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 32)
+	    break;
+
+	  if (AVR_HAVE_MOVW)
+	    return *len = 3, (AS1 (clr,%D0) CR_TAB
+			      AS1 (clr,%C0) CR_TAB
+			      AS2 (movw,%A0,%C0));
+	  *len = 4;
+	  return (AS1 (clr,%D0) CR_TAB
+		  AS1 (clr,%C0) CR_TAB
+		  AS1 (clr,%B0) CR_TAB
+		  AS1 (clr,%A0));
+
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    *len = 4;
+	    if (reg0 >= reg1)
+	      return (AS2 (mov,%D0,%C1)  CR_TAB
+		      AS2 (mov,%C0,%B1)  CR_TAB
+		      AS2 (mov,%B0,%A1)  CR_TAB
+		      AS1 (clr,%A0));
+	    else
+	      return (AS1 (clr,%A0)      CR_TAB
+		      AS2 (mov,%B0,%A1)  CR_TAB
+		      AS2 (mov,%C0,%B1)  CR_TAB
+		      AS2 (mov,%D0,%C1));
+	  }
+
+	case 16:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    if (reg0 + 2 == reg1)
+	      return *len = 2, (AS1 (clr,%B0)      CR_TAB
+				AS1 (clr,%A0));
+	    if (AVR_HAVE_MOVW)
+	      return *len = 3, (AS2 (movw,%C0,%A1) CR_TAB
+				AS1 (clr,%B0)      CR_TAB
+				AS1 (clr,%A0));
+	    else
+	      return *len = 4, (AS2 (mov,%C0,%A1)  CR_TAB
+				AS2 (mov,%D0,%B1)  CR_TAB
+				AS1 (clr,%B0)      CR_TAB
+				AS1 (clr,%A0));
+	  }
+
+	case 24:
+	  *len = 4;
+	  return (AS2 (mov,%D0,%A1)  CR_TAB
+		  AS1 (clr,%C0)      CR_TAB
+		  AS1 (clr,%B0)      CR_TAB
+		  AS1 (clr,%A0));
+
+	case 31:
+	  *len = 6;
+	  return (AS1 (clr,%D0) CR_TAB
+		  AS1 (lsr,%A0) CR_TAB
+		  AS1 (ror,%D0) CR_TAB
+		  AS1 (clr,%C0) CR_TAB
+		  AS1 (clr,%B0) CR_TAB
+		  AS1 (clr,%A0));
+	}
+      len = t;
+    }
+  out_shift_with_cnt ((AS1 (lsl,%A0) CR_TAB
+		       AS1 (rol,%B0) CR_TAB
+		       AS1 (rol,%C0) CR_TAB
+		       AS1 (rol,%D0)),
+		       insn, operands, len, 4);
+  return "";
+}
+
+/* 8bit arithmetic shift right  ((signed char)x >> i) */
+
+const char *
+ashrqi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	case 1:
+	  *len = 1;
+	  return AS1 (asr,%0);
+
+	case 2:
+	  *len = 2;
+	  return (AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0));
+
+	case 3:
+	  *len = 3;
+	  return (AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0));
+
+	case 4:
+	  *len = 4;
+	  return (AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0));
+
+	case 5:
+	  *len = 5;
+	  return (AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0) CR_TAB
+		  AS1 (asr,%0));
+
+	case 6:
+	  *len = 4;
+	  return (AS2 (bst,%0,6)  CR_TAB
+		  AS1 (lsl,%0)    CR_TAB
+		  AS2 (sbc,%0,%0) CR_TAB
+		  AS2 (bld,%0,0));
+
+	default:
+	  if (INTVAL (operands[2]) < 8)
+	    break;
+
+	  /* fall through */
+
+	case 7:
+	  *len = 2;
+	  return (AS1 (lsl,%0) CR_TAB
+		  AS2 (sbc,%0,%0));
+	}
+    }
+  else if (CONSTANT_P (operands[2]))
+    fatal_insn ("internal compiler error.  Incorrect shift:", insn);
+
+  out_shift_with_cnt (AS1 (asr,%0),
+		      insn, operands, len, 1);
+  return "";
+}
+
+
+/* 16bit arithmetic shift right  ((signed short)x >> i) */
+
+const char *
+ashrhi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      int k;
+      int *t = len;
+      
+      if (!len)
+	len = &k;
+
+      switch (INTVAL (operands[2]))
+	{
+	case 4:
+	case 5:
+	  /* XXX try to optimize this too? */
+	  break;
+
+	case 6:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  *len = 8;
+	  return (AS2 (mov,__tmp_reg__,%A0) CR_TAB
+		  AS2 (mov,%A0,%B0)         CR_TAB
+		  AS1 (lsl,__tmp_reg__)     CR_TAB
+		  AS1 (rol,%A0)             CR_TAB
+		  AS2 (sbc,%B0,%B0)         CR_TAB
+		  AS1 (lsl,__tmp_reg__)     CR_TAB
+		  AS1 (rol,%A0)             CR_TAB
+		  AS1 (rol,%B0));
+
+	case 7:
+	  *len = 4;
+	  return (AS1 (lsl,%A0)     CR_TAB
+		  AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (rol,%A0)     CR_TAB
+		  AS2 (sbc,%B0,%B0));
+
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+
+	    if (reg0 == reg1)
+	      return *len = 3, (AS2 (mov,%A0,%B0) CR_TAB
+				AS1 (lsl,%B0)     CR_TAB
+				AS2 (sbc,%B0,%B0));
+	    else 
+	      return *len = 4, (AS2 (mov,%A0,%B1) CR_TAB
+			        AS1 (clr,%B0)     CR_TAB
+			        AS2 (sbrc,%A0,7)  CR_TAB
+			        AS1 (dec,%B0));
+	  }
+
+	case 9:
+	  *len = 4;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (lsl,%B0)      CR_TAB
+		  AS2 (sbc,%B0,%B0) CR_TAB
+		  AS1 (asr,%A0));
+
+	case 10:
+	  *len = 5;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS2 (sbc,%B0,%B0) CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0));
+
+	case 11:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%A0,0x20) CR_TAB
+		      AS2 (muls,%B0,%A0) CR_TAB
+		      AS2 (mov,%A0,r1)   CR_TAB
+		      AS2 (sbc,%B0,%B0)  CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 6;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS2 (sbc,%B0,%B0) CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0));
+
+	case 12:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%A0,0x10) CR_TAB
+		      AS2 (muls,%B0,%A0) CR_TAB
+		      AS2 (mov,%A0,r1)   CR_TAB
+		      AS2 (sbc,%B0,%B0)  CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 7;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS2 (sbc,%B0,%B0) CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0));
+
+	case 13:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%A0,0x08) CR_TAB
+		      AS2 (muls,%B0,%A0) CR_TAB
+		      AS2 (mov,%A0,r1)   CR_TAB
+		      AS2 (sbc,%B0,%B0)  CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 7 */
+	  *len = 8;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS2 (sbc,%B0,%B0) CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0)     CR_TAB
+		  AS1 (asr,%A0));
+
+	case 14:
+	  *len = 5;
+	  return (AS1 (lsl,%B0)     CR_TAB
+		  AS2 (sbc,%A0,%A0) CR_TAB
+		  AS1 (lsl,%B0)     CR_TAB
+		  AS2 (mov,%B0,%A0) CR_TAB
+		  AS1 (rol,%A0));
+
+	default:
+	  if (INTVAL (operands[2]) < 16)
+	    break;
+
+	  /* fall through */
+
+	case 15:
+	  return *len = 3, (AS1 (lsl,%B0)     CR_TAB
+			    AS2 (sbc,%A0,%A0) CR_TAB
+			    AS2 (mov,%B0,%A0));
+	}
+      len = t;
+    }
+  out_shift_with_cnt ((AS1 (asr,%B0) CR_TAB
+		       AS1 (ror,%A0)),
+		       insn, operands, len, 2);
+  return "";
+}
+
+
+/* 32bit arithmetic shift right  ((signed long)x >> i) */
+
+const char *
+ashrsi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+      int *t = len;
+      
+      if (!len)
+	len = &k;
+      
+      switch (INTVAL (operands[2]))
+	{
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    *len=6;
+	    if (reg0 <= reg1)
+	      return (AS2 (mov,%A0,%B1) CR_TAB
+		      AS2 (mov,%B0,%C1) CR_TAB
+		      AS2 (mov,%C0,%D1) CR_TAB
+		      AS1 (clr,%D0)     CR_TAB
+		      AS2 (sbrc,%C0,7)  CR_TAB
+		      AS1 (dec,%D0));
+	    else
+	      return (AS1 (clr,%D0)     CR_TAB
+		      AS2 (sbrc,%D1,7)  CR_TAB
+		      AS1 (dec,%D0)     CR_TAB
+		      AS2 (mov,%C0,%D1) CR_TAB
+		      AS2 (mov,%B0,%C1) CR_TAB
+		      AS2 (mov,%A0,%B1));
+	  }
+	  
+	case 16:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    
+	    if (reg0 == reg1 + 2)
+	      return *len = 4, (AS1 (clr,%D0)     CR_TAB
+				AS2 (sbrc,%B0,7)  CR_TAB
+				AS1 (com,%D0)     CR_TAB
+				AS2 (mov,%C0,%D0));
+	    if (AVR_HAVE_MOVW)
+	      return *len = 5, (AS2 (movw,%A0,%C1) CR_TAB
+				AS1 (clr,%D0)      CR_TAB
+				AS2 (sbrc,%B0,7)   CR_TAB
+				AS1 (com,%D0)      CR_TAB
+				AS2 (mov,%C0,%D0));
+	    else 
+	      return *len = 6, (AS2 (mov,%B0,%D1) CR_TAB
+				AS2 (mov,%A0,%C1) CR_TAB
+				AS1 (clr,%D0)     CR_TAB
+				AS2 (sbrc,%B0,7)  CR_TAB
+				AS1 (com,%D0)     CR_TAB
+				AS2 (mov,%C0,%D0));
+	  }
+
+	case 24:
+	  return *len = 6, (AS2 (mov,%A0,%D1) CR_TAB
+			    AS1 (clr,%D0)     CR_TAB
+			    AS2 (sbrc,%A0,7)  CR_TAB
+			    AS1 (com,%D0)     CR_TAB
+			    AS2 (mov,%B0,%D0) CR_TAB
+			    AS2 (mov,%C0,%D0));
+
+	default:
+	  if (INTVAL (operands[2]) < 32)
+	    break;
+
+	  /* fall through */
+
+	case 31:
+	  if (AVR_HAVE_MOVW)
+	    return *len = 4, (AS1 (lsl,%D0)     CR_TAB
+			      AS2 (sbc,%A0,%A0) CR_TAB
+			      AS2 (mov,%B0,%A0) CR_TAB
+			      AS2 (movw,%C0,%A0));
+	  else
+	    return *len = 5, (AS1 (lsl,%D0)     CR_TAB
+			      AS2 (sbc,%A0,%A0) CR_TAB
+			      AS2 (mov,%B0,%A0) CR_TAB
+			      AS2 (mov,%C0,%A0) CR_TAB
+			      AS2 (mov,%D0,%A0));
+	}
+      len = t;
+    }
+  out_shift_with_cnt ((AS1 (asr,%D0) CR_TAB
+		       AS1 (ror,%C0) CR_TAB
+		       AS1 (ror,%B0) CR_TAB
+		       AS1 (ror,%A0)),
+		       insn, operands, len, 4);
+  return "";
+}
+
+/* 8bit logic shift right ((unsigned char)x >> i) */
+
+const char *
+lshrqi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+
+      if (!len)
+	len = &k;
+      
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 8)
+	    break;
+
+	  *len = 1;
+	  return AS1 (clr,%0);
+
+	case 1:
+	  *len = 1;
+	  return AS1 (lsr,%0);
+
+	case 2:
+	  *len = 2;
+	  return (AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0));
+	case 3:
+	  *len = 3;
+	  return (AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0));
+	  
+	case 4:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len=2;
+	      return (AS1 (swap,%0) CR_TAB
+		      AS2 (andi,%0,0x0f));
+	    }
+	  *len = 4;
+	  return (AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0));
+	  
+	case 5:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 3;
+	      return (AS1 (swap,%0) CR_TAB
+		      AS1 (lsr,%0)  CR_TAB
+		      AS2 (andi,%0,0x7));
+	    }
+	  *len = 5;
+	  return (AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0));
+	  
+	case 6:
+	  if (test_hard_reg_class (LD_REGS, operands[0]))
+	    {
+	      *len = 4;
+	      return (AS1 (swap,%0) CR_TAB
+		      AS1 (lsr,%0)  CR_TAB
+		      AS1 (lsr,%0)  CR_TAB
+		      AS2 (andi,%0,0x3));
+	    }
+	  *len = 6;
+	  return (AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0) CR_TAB
+		  AS1 (lsr,%0));
+	  
+	case 7:
+	  *len = 3;
+	  return (AS1 (rol,%0) CR_TAB
+		  AS1 (clr,%0) CR_TAB
+		  AS1 (rol,%0));
+	}
+    }
+  else if (CONSTANT_P (operands[2]))
+    fatal_insn ("internal compiler error.  Incorrect shift:", insn);
+  
+  out_shift_with_cnt (AS1 (lsr,%0),
+		      insn, operands, len, 1);
+  return "";
+}
+
+/* 16bit logic shift right ((unsigned short)x >> i) */
+
+const char *
+lshrhi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL);
+      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      int k;
+      int *t = len;
+
+      if (!len)
+	len = &k;
+      
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 16)
+	    break;
+
+	  *len = 2;
+	  return (AS1 (clr,%B0) CR_TAB
+		  AS1 (clr,%A0));
+
+	case 4:
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (ldi_ok)
+	    {
+	      *len = 6;
+	      return (AS1 (swap,%B0)      CR_TAB
+		      AS1 (swap,%A0)      CR_TAB
+		      AS2 (andi,%A0,0x0f) CR_TAB
+		      AS2 (eor,%A0,%B0)   CR_TAB
+		      AS2 (andi,%B0,0x0f) CR_TAB
+		      AS2 (eor,%A0,%B0));
+	    }
+	  if (scratch)
+	    {
+	      *len = 7;
+	      return (AS1 (swap,%B0)    CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS2 (ldi,%3,0x0f) CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      AS2 (eor,%A0,%B0) CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      AS2 (eor,%A0,%B0));
+	    }
+	  break;  /* optimize_size ? 6 : 8 */
+
+	case 5:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  if (ldi_ok)
+	    {
+	      *len = 8;
+	      return (AS1 (lsr,%B0)       CR_TAB
+		      AS1 (ror,%A0)       CR_TAB
+		      AS1 (swap,%B0)      CR_TAB
+		      AS1 (swap,%A0)      CR_TAB
+		      AS2 (andi,%A0,0x0f) CR_TAB
+		      AS2 (eor,%A0,%B0)   CR_TAB
+		      AS2 (andi,%B0,0x0f) CR_TAB
+		      AS2 (eor,%A0,%B0));
+	    }
+	  if (scratch)
+	    {
+	      *len = 9;
+	      return (AS1 (lsr,%B0)     CR_TAB
+		      AS1 (ror,%A0)     CR_TAB
+		      AS1 (swap,%B0)    CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS2 (ldi,%3,0x0f) CR_TAB
+		      "and %A0,%3"      CR_TAB
+		      AS2 (eor,%A0,%B0) CR_TAB
+		      "and %B0,%3"      CR_TAB
+		      AS2 (eor,%A0,%B0));
+	    }
+	  break;  /* 10 */
+
+	case 6:
+	  if (optimize_size)
+	    break;  /* scratch ? 5 : 6 */
+	  *len = 9;
+	  return (AS1 (clr,__tmp_reg__) CR_TAB
+		  AS1 (lsl,%A0)         CR_TAB
+		  AS1 (rol,%B0)         CR_TAB
+		  AS1 (rol,__tmp_reg__) CR_TAB
+		  AS1 (lsl,%A0)         CR_TAB
+		  AS1 (rol,%B0)         CR_TAB
+		  AS1 (rol,__tmp_reg__) CR_TAB
+		  AS2 (mov,%A0,%B0)     CR_TAB
+		  AS2 (mov,%B0,__tmp_reg__));
+
+	case 7:
+	  *len = 5;
+	  return (AS1 (lsl,%A0)     CR_TAB
+		  AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (rol,%A0)     CR_TAB
+		  AS2 (sbc,%B0,%B0) CR_TAB
+		  AS1 (neg,%B0));
+
+	case 8:
+	  return *len = 2, (AS2 (mov,%A0,%B1) CR_TAB
+			    AS1 (clr,%B0));
+
+	case 9:
+	  *len = 3;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (clr,%B0)     CR_TAB
+		  AS1 (lsr,%A0));
+
+	case 10:
+	  *len = 4;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (clr,%B0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0));
+
+	case 11:
+	  *len = 5;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (clr,%B0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0));
+
+	case 12:
+	  if (ldi_ok)
+	    {
+	      *len = 4;
+	      return (AS2 (mov,%A0,%B0) CR_TAB
+		      AS1 (clr,%B0)     CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS2 (andi,%A0,0x0f));
+	    }
+	  if (scratch)
+	    {
+	      *len = 5;
+	      return (AS2 (mov,%A0,%B0) CR_TAB
+		      AS1 (clr,%B0)     CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS2 (ldi,%3,0x0f) CR_TAB
+		      "and %A0,%3");
+	    }
+	  *len = 6;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (clr,%B0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0));
+
+	case 13:
+	  if (ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (mov,%A0,%B0) CR_TAB
+		      AS1 (clr,%B0)     CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS1 (lsr,%A0)     CR_TAB
+		      AS2 (andi,%A0,0x07));
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%3,0x08) CR_TAB
+		      AS2 (mul,%B0,%3)  CR_TAB
+		      AS2 (mov,%A0,r1)  CR_TAB
+		      AS1 (clr,%B0)     CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  if (scratch)
+	    {
+	      *len = 6;
+	      return (AS2 (mov,%A0,%B0) CR_TAB
+		      AS1 (clr,%B0)     CR_TAB
+		      AS1 (swap,%A0)    CR_TAB
+		      AS1 (lsr,%A0)     CR_TAB
+		      AS2 (ldi,%3,0x07) CR_TAB
+		      "and %A0,%3");
+	    }
+	  if (AVR_HAVE_MUL)
+	    {
+	      *len = 6;
+	      return ("set"            CR_TAB
+		      AS2 (bld,r1,3)   CR_TAB
+		      AS2 (mul,%B0,r1) CR_TAB
+		      AS2 (mov,%A0,r1) CR_TAB
+		      AS1 (clr,%B0)    CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  *len = 7;
+	  return (AS2 (mov,%A0,%B0) CR_TAB
+		  AS1 (clr,%B0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0)     CR_TAB
+		  AS1 (lsr,%A0));
+
+	case 14:
+	  if (AVR_HAVE_MUL && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%A0,0x04) CR_TAB
+		      AS2 (mul,%B0,%A0)  CR_TAB
+		      AS2 (mov,%A0,r1)   CR_TAB
+		      AS1 (clr,%B0)      CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (AVR_HAVE_MUL && scratch)
+	    {
+	      *len = 5;
+	      return (AS2 (ldi,%3,0x04) CR_TAB
+		      AS2 (mul,%B0,%3)  CR_TAB
+		      AS2 (mov,%A0,r1)  CR_TAB
+		      AS1 (clr,%B0)     CR_TAB
+		      AS1 (clr,__zero_reg__));
+	    }
+	  if (optimize_size && ldi_ok)
+	    {
+	      *len = 5;
+	      return (AS2 (mov,%A0,%B0) CR_TAB
+		      AS2 (ldi,%B0,6) "\n1:\t"
+		      AS1 (lsr,%A0)     CR_TAB
+		      AS1 (dec,%B0)     CR_TAB
+		      AS1 (brne,1b));
+	    }
+	  if (optimize_size && scratch)
+	    break;  /* 5 */
+	  *len = 6;
+	  return (AS1 (clr,%A0) CR_TAB
+		  AS1 (lsl,%B0) CR_TAB
+		  AS1 (rol,%A0) CR_TAB
+		  AS1 (lsl,%B0) CR_TAB
+		  AS1 (rol,%A0) CR_TAB
+		  AS1 (clr,%B0));
+
+	case 15:
+	  *len = 4;
+	  return (AS1 (clr,%A0) CR_TAB
+		  AS1 (lsl,%B0) CR_TAB
+		  AS1 (rol,%A0) CR_TAB
+		  AS1 (clr,%B0));
+	}
+      len = t;
+    }
+  out_shift_with_cnt ((AS1 (lsr,%B0) CR_TAB
+		       AS1 (ror,%A0)),
+		       insn, operands, len, 2);
+  return "";
+}
+
+/* 32bit logic shift right ((unsigned int)x >> i) */
+
+const char *
+lshrsi3_out (rtx insn, rtx operands[], int *len)
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int k;
+      int *t = len;
+      
+      if (!len)
+	len = &k;
+      
+      switch (INTVAL (operands[2]))
+	{
+	default:
+	  if (INTVAL (operands[2]) < 32)
+	    break;
+
+	  if (AVR_HAVE_MOVW)
+	    return *len = 3, (AS1 (clr,%D0) CR_TAB
+			      AS1 (clr,%C0) CR_TAB
+			      AS2 (movw,%A0,%C0));
+	  *len = 4;
+	  return (AS1 (clr,%D0) CR_TAB
+		  AS1 (clr,%C0) CR_TAB
+		  AS1 (clr,%B0) CR_TAB
+		  AS1 (clr,%A0));
+
+	case 8:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+	    *len = 4;
+	    if (reg0 <= reg1)
+	      return (AS2 (mov,%A0,%B1) CR_TAB
+		      AS2 (mov,%B0,%C1) CR_TAB
+		      AS2 (mov,%C0,%D1) CR_TAB
+		      AS1 (clr,%D0));
+	    else
+	      return (AS1 (clr,%D0)     CR_TAB
+		      AS2 (mov,%C0,%D1) CR_TAB
+		      AS2 (mov,%B0,%C1) CR_TAB
+		      AS2 (mov,%A0,%B1)); 
+	  }
+	  
+	case 16:
+	  {
+	    int reg0 = true_regnum (operands[0]);
+	    int reg1 = true_regnum (operands[1]);
+
+	    if (reg0 == reg1 + 2)
+	      return *len = 2, (AS1 (clr,%C0)     CR_TAB
+				AS1 (clr,%D0));
+	    if (AVR_HAVE_MOVW)
+	      return *len = 3, (AS2 (movw,%A0,%C1) CR_TAB
+				AS1 (clr,%C0)      CR_TAB
+				AS1 (clr,%D0));
+	    else
+	      return *len = 4, (AS2 (mov,%B0,%D1) CR_TAB
+				AS2 (mov,%A0,%C1) CR_TAB
+				AS1 (clr,%C0)     CR_TAB
+				AS1 (clr,%D0));
+	  }
+	  
+	case 24:
+	  return *len = 4, (AS2 (mov,%A0,%D1) CR_TAB
+			    AS1 (clr,%B0)     CR_TAB
+			    AS1 (clr,%C0)     CR_TAB
+			    AS1 (clr,%D0));
+
+	case 31:
+	  *len = 6;
+	  return (AS1 (clr,%A0)    CR_TAB
+		  AS2 (sbrc,%D0,7) CR_TAB
+		  AS1 (inc,%A0)    CR_TAB
+		  AS1 (clr,%B0)    CR_TAB
+		  AS1 (clr,%C0)    CR_TAB
+		  AS1 (clr,%D0));
+	}
+      len = t;
+    }
+  out_shift_with_cnt ((AS1 (lsr,%D0) CR_TAB
+		       AS1 (ror,%C0) CR_TAB
+		       AS1 (ror,%B0) CR_TAB
+		       AS1 (ror,%A0)),
+		      insn, operands, len, 4);
+  return "";
+}
+
+/* Create RTL split patterns for byte sized rotate expressions.  This
+  produces a series of move instructions and considers overlap situations.
+  Overlapping non-HImode operands need a scratch register.  */
+
+bool
+avr_rotate_bytes (rtx operands[])
+{
+    int i, j;
+    enum machine_mode mode = GET_MODE (operands[0]);
+    bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
+    bool same_reg = rtx_equal_p (operands[0], operands[1]);
+    int num = INTVAL (operands[2]);
+    rtx scratch = operands[3];
+    /* Work out if byte or word move is needed.  Odd byte rotates need QImode.
+       Word move if no scratch is needed, otherwise use size of scratch.  */
+    enum machine_mode move_mode = QImode;
+    int move_size, offset, size;
+
+    if (num & 0xf)
+      move_mode = QImode;
+    else if ((mode == SImode && !same_reg) || !overlapped)
+      move_mode = HImode;
+    else
+      move_mode = GET_MODE (scratch);
+
+    /* Force DI rotate to use QI moves since other DI moves are currently split
+       into QI moves so forward propagation works better.  */
+    if (mode == DImode)
+      move_mode = QImode;
+    /* Make scratch smaller if needed.  */
+    if (SCRATCH != GET_CODE (scratch)
+        && HImode == GET_MODE (scratch)
+        && QImode == move_mode)
+      scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); 
+
+    move_size = GET_MODE_SIZE (move_mode);
+    /* Number of bytes/words to rotate.  */
+    offset = (num  >> 3) / move_size;
+    /* Number of moves needed.  */
+    size = GET_MODE_SIZE (mode) / move_size;
+    /* Himode byte swap is special case to avoid a scratch register.  */
+    if (mode == HImode && same_reg)
+      {
+	/* HImode byte swap, using xor.  This is as quick as using scratch.  */
+	rtx src, dst;
+	src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
+	dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
+	if (!rtx_equal_p (dst, src))
+	  {
+	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+	     emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
+	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+	  }
+      }    
+    else  
+      {
+#define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode)  */
+	/* Create linked list of moves to determine move order.  */
+	struct {
+	  rtx src, dst;
+	  int links;
+	} move[MAX_SIZE + 8];
+	int blocked, moves;
+
+	gcc_assert (size <= MAX_SIZE);
+	/* Generate list of subreg moves.  */
+	for (i = 0; i < size; i++)
+	  {
+	    int from = i;
+	    int to = (from + offset) % size;          
+	    move[i].src = simplify_gen_subreg (move_mode, operands[1],
+						mode, from * move_size);
+	    move[i].dst = simplify_gen_subreg (move_mode, operands[0],
+						mode, to   * move_size);
+	    move[i].links = -1;
+	   }
+	/* Mark dependence where a dst of one move is the src of another move.
+	   The first move is a conflict as it must wait until second is
+	   performed.  We ignore moves to self - we catch this later.  */
+	if (overlapped)
+	  for (i = 0; i < size; i++)
+	    if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
+	      for (j = 0; j < size; j++)
+		if (j != i && rtx_equal_p (move[j].src, move[i].dst))
+		  {
+		    /* The dst of move i is the src of move j.  */
+		    move[i].links = j;
+		    break;
+		  }
+
+	blocked = -1;
+	moves = 0;
+	/* Go through move list and perform non-conflicting moves.  As each
+	   non-overlapping move is made, it may remove other conflicts
+	   so the process is repeated until no conflicts remain.  */
+	do
+	  {
+	    blocked = -1;
+	    moves = 0;
+	    /* Emit move where dst is not also a src or we have used that
+	       src already.  */
+	    for (i = 0; i < size; i++)
+	      if (move[i].src != NULL_RTX)
+		{
+		  if (move[i].links == -1
+		      || move[move[i].links].src == NULL_RTX)
+		    {
+		      moves++;
+		      /* Ignore NOP moves to self.  */
+		      if (!rtx_equal_p (move[i].dst, move[i].src))
+			emit_move_insn (move[i].dst, move[i].src);
+
+		      /* Remove  conflict from list.  */
+		      move[i].src = NULL_RTX;
+		    }
+		  else
+		    blocked = i;
+		}
+
+	    /* Check for deadlock. This is when no moves occurred and we have
+	       at least one blocked move.  */
+	    if (moves == 0 && blocked != -1)
+	      {
+		/* Need to use scratch register to break deadlock.
+		   Add move to put dst of blocked move into scratch.
+		   When this move occurs, it will break chain deadlock.
+		   The scratch register is substituted for real move.  */
+
+		gcc_assert (SCRATCH != GET_CODE (scratch));
+
+		move[size].src = move[blocked].dst;
+		move[size].dst =  scratch;
+		/* Scratch move is never blocked.  */
+		move[size].links = -1; 
+		/* Make sure we have valid link.  */
+		gcc_assert (move[blocked].links != -1);
+		/* Replace src of  blocking move with scratch reg.  */
+		move[move[blocked].links].src = scratch;
+		/* Make dependent on scratch move occuring.  */
+		move[blocked].links = size; 
+		size=size+1;
+	      }
+	  }
+	while (blocked != -1);
+      }
+    return true;
+}
+
+/* Modifies the length assigned to instruction INSN
+ LEN is the initially computed length of the insn.  */
+
+int
+adjust_insn_length (rtx insn, int len)
+{
+  rtx patt = PATTERN (insn);
+  rtx set;
+
+  if (GET_CODE (patt) == SET)
+    {
+      rtx op[10];
+      op[1] = SET_SRC (patt);
+      op[0] = SET_DEST (patt);
+      if (general_operand (op[1], VOIDmode)
+	  && general_operand (op[0], VOIDmode))
+	{
+	  switch (GET_MODE (op[0]))
+	    {
+	    case QImode:
+	      output_movqi (insn, op, &len);
+	      break;
+	    case HImode:
+	      output_movhi (insn, op, &len);
+	      break;
+	    case SImode:
+	    case SFmode:
+	      output_movsisf (insn, op, &len);
+	      break;
+	    default:
+	      break;
+	    }
+	}
+      else if (op[0] == cc0_rtx && REG_P (op[1]))
+	{
+	  switch (GET_MODE (op[1]))
+	    {
+	    case HImode: out_tsthi (insn, op[1], &len); break;
+	    case SImode: out_tstsi (insn, op[1], &len); break;
+	    default: break;
+	    }
+	}
+      else if (GET_CODE (op[1]) == AND)
+	{
+	  if (GET_CODE (XEXP (op[1],1)) == CONST_INT)
+	    {
+	      HOST_WIDE_INT mask = INTVAL (XEXP (op[1],1));
+	      if (GET_MODE (op[1]) == SImode)
+		len = (((mask & 0xff) != 0xff)
+		       + ((mask & 0xff00) != 0xff00)
+		       + ((mask & 0xff0000L) != 0xff0000L)
+		       + ((mask & 0xff000000L) != 0xff000000L));
+	      else if (GET_MODE (op[1]) == HImode)
+		len = (((mask & 0xff) != 0xff)
+		       + ((mask & 0xff00) != 0xff00));
+	    }
+	}
+      else if (GET_CODE (op[1]) == IOR)
+	{
+	  if (GET_CODE (XEXP (op[1],1)) == CONST_INT)
+	    {
+	      HOST_WIDE_INT mask = INTVAL (XEXP (op[1],1));
+	      if (GET_MODE (op[1]) == SImode)
+		len = (((mask & 0xff) != 0)
+		       + ((mask & 0xff00) != 0)
+		       + ((mask & 0xff0000L) != 0)
+		       + ((mask & 0xff000000L) != 0));
+	      else if (GET_MODE (op[1]) == HImode)
+		len = (((mask & 0xff) != 0)
+		       + ((mask & 0xff00) != 0));
+	    }
+	}
+    }
+  set = single_set (insn);
+  if (set)
+    {
+      rtx op[10];
+
+      op[1] = SET_SRC (set);
+      op[0] = SET_DEST (set);
+
+      if (GET_CODE (patt) == PARALLEL
+	  && general_operand (op[1], VOIDmode)
+	  && general_operand (op[0], VOIDmode))
+	{
+	  if (XVECLEN (patt, 0) == 2)
+	    op[2] = XVECEXP (patt, 0, 1);
+
+	  switch (GET_MODE (op[0]))
+	    {
+	    case QImode:
+	      len = 2;
+	      break;
+	    case HImode:
+	      output_reload_inhi (insn, op, &len);
+	      break;
+	    case SImode:
+	    case SFmode:
+	      output_reload_insisf (insn, op, &len);
+	      break;
+	    default:
+	      break;
+	    }
+	}
+      else if (GET_CODE (op[1]) == ASHIFT
+	  || GET_CODE (op[1]) == ASHIFTRT
+	  || GET_CODE (op[1]) == LSHIFTRT)
+	{
+	  rtx ops[10];
+	  ops[0] = op[0];
+	  ops[1] = XEXP (op[1],0);
+	  ops[2] = XEXP (op[1],1);
+	  switch (GET_CODE (op[1]))
+	    {
+	    case ASHIFT:
+	      switch (GET_MODE (op[0]))
+		{
+		case QImode: ashlqi3_out (insn,ops,&len); break;
+		case HImode: ashlhi3_out (insn,ops,&len); break;
+		case SImode: ashlsi3_out (insn,ops,&len); break;
+		default: break;
+		}
+	      break;
+	    case ASHIFTRT:
+	      switch (GET_MODE (op[0]))
+		{
+		case QImode: ashrqi3_out (insn,ops,&len); break;
+		case HImode: ashrhi3_out (insn,ops,&len); break;
+		case SImode: ashrsi3_out (insn,ops,&len); break;
+		default: break;
+		}
+	      break;
+	    case LSHIFTRT:
+	      switch (GET_MODE (op[0]))
+		{
+		case QImode: lshrqi3_out (insn,ops,&len); break;
+		case HImode: lshrhi3_out (insn,ops,&len); break;
+		case SImode: lshrsi3_out (insn,ops,&len); break;
+		default: break;
+		}
+	      break;
+	    default:
+	      break;
+	    }
+	}
+    }
+  return len;
+}
+
+/* Return nonzero if register REG dead after INSN.  */
+
+int
+reg_unused_after (rtx insn, rtx reg)
+{
+  return (dead_or_set_p (insn, reg)
+	  || (REG_P(reg) && _reg_unused_after (insn, reg)));
+}
+
+/* Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+
+int
+_reg_unused_after (rtx insn, rtx reg)
+{
+  enum rtx_code code;
+  rtx set;
+
+  /* If the reg is set by this instruction, then it is safe for our
+     case.  Disregard the case where this is a store to memory, since
+     we are checking a register used in the store address.  */
+  set = single_set (insn);
+  if (set && GET_CODE (SET_DEST (set)) != MEM
+      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+    return 1;
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      rtx set;
+      code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 if dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return 1;
+      /* else */
+#endif
+
+      if (!INSN_P (insn))
+	continue;
+
+      if (code == JUMP_INSN)
+	return 0;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+	      rtx set = single_set (this_insn);
+
+	      if (GET_CODE (this_insn) == CALL_INSN)
+		code = CALL_INSN;
+	      else if (GET_CODE (this_insn) == JUMP_INSN)
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return 0;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return 0;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (GET_CODE (SET_DEST (set)) != MEM)
+		    retval = 1;
+		  else
+		    return 0;
+		}
+	      if (set == 0
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return 0;
+	    }
+	  if (retval == 1)
+	    return 1;
+	  else if (code == JUMP_INSN)
+	    return 0;
+	}
+
+      if (code == CALL_INSN)
+	{
+	  rtx tem;
+	  for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1))
+	    if (GET_CODE (XEXP (tem, 0)) == USE
+		&& REG_P (XEXP (XEXP (tem, 0), 0))
+		&& reg_overlap_mentioned_p (reg, XEXP (XEXP (tem, 0), 0)))
+	      return 0;
+	  if (call_used_regs[REGNO (reg)]) 
+	    return 1;
+	}
+
+      set = single_set (insn);
+
+      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	return 0;
+      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return GET_CODE (SET_DEST (set)) != MEM;
+      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	return 0;
+    }
+  return 1;
+}
+
+/* Target hook for assembling integer objects.  The AVR version needs
+   special handling for references to certain labels.  */
+
+static bool
+avr_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == POINTER_SIZE / BITS_PER_UNIT && aligned_p
+      && text_segment_operand (x, VOIDmode) )
+    {
+      fputs ("\t.word\tgs(", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")\n", asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Worker function for ASM_DECLARE_FUNCTION_NAME.  */
+
+void
+avr_asm_declare_function_name (FILE *file, const char *name, tree decl)
+{
+
+  /* If the function has the 'signal' or 'interrupt' attribute, test to
+     make sure that the name of the function is "__vector_NN" so as to
+     catch when the user misspells the interrupt vector name.  */
+
+  if (cfun->machine->is_interrupt)
+    {
+      if (strncmp (name, "__vector", strlen ("__vector")) != 0)
+        {
+          warning_at (DECL_SOURCE_LOCATION (decl), 0,
+                      "%qs appears to be a misspelled interrupt handler",
+                      name);
+        }
+    }
+  else if (cfun->machine->is_signal)
+    {
+      if (strncmp (name, "__vector", strlen ("__vector")) != 0)
+        {
+           warning_at (DECL_SOURCE_LOCATION (decl), 0,
+                       "%qs appears to be a misspelled signal handler",
+                       name);
+        }
+    }
+
+  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+  ASM_OUTPUT_LABEL (file, name);
+}
+
+/* The routine used to output NUL terminated strings.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable, especially for targets like the i386
+   (where the only alternative is to output character sequences as
+   comma separated lists of numbers).  */
+
+void
+gas_output_limited_string(FILE *file, const char *str)
+{
+  const unsigned char *_limited_str = (const unsigned char *) str;
+  unsigned ch;
+  fprintf (file, "%s\"", STRING_ASM_OP);
+  for (; (ch = *_limited_str); _limited_str++)
+    {
+      int escape;
+      switch (escape = ESCAPES[ch])
+	{
+	case 0:
+	  putc (ch, file);
+	  break;
+	case 1:
+	  fprintf (file, "\\%03o", ch);
+	  break;
+	default:
+	  putc ('\\', file);
+	  putc (escape, file);
+	  break;
+	}
+    }
+  fprintf (file, "\"\n");
+}
+
+/* The routine used to output sequences of byte values.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable.  Note that if we find subparts of the
+   character sequence which end with NUL (and which are shorter than
+   STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
+
+void
+gas_output_ascii(FILE *file, const char *str, size_t length)
+{
+  const unsigned char *_ascii_bytes = (const unsigned char *) str;
+  const unsigned char *limit = _ascii_bytes + length;
+  unsigned bytes_in_chunk = 0;
+  for (; _ascii_bytes < limit; _ascii_bytes++)
+    {
+      const unsigned char *p;
+      if (bytes_in_chunk >= 60)
+	{
+	  fprintf (file, "\"\n");
+	  bytes_in_chunk = 0;
+	}
+      for (p = _ascii_bytes; p < limit && *p != '\0'; p++)
+	continue;
+      if (p < limit && (p - _ascii_bytes) <= (signed)STRING_LIMIT)
+	{
+	  if (bytes_in_chunk > 0)
+	    {
+	      fprintf (file, "\"\n");
+	      bytes_in_chunk = 0;
+	    }
+	  gas_output_limited_string (file, (const char*)_ascii_bytes);
+	  _ascii_bytes = p;
+	}
+      else
+	{
+	  int escape;
+	  unsigned ch;
+	  if (bytes_in_chunk == 0)
+	    fprintf (file, "\t.ascii\t\"");
+	  switch (escape = ESCAPES[ch = *_ascii_bytes])
+	    {
+	    case 0:
+	      putc (ch, file);
+	      bytes_in_chunk++;
+	      break;
+	    case 1:
+	      fprintf (file, "\\%03o", ch);
+	      bytes_in_chunk += 4;
+	      break;
+	    default:
+	      putc ('\\', file);
+	      putc (escape, file);
+	      bytes_in_chunk += 2;
+	      break;
+	    }
+	}
+    }
+  if (bytes_in_chunk > 0)
+    fprintf (file, "\"\n");
+}
+
+/* Return value is nonzero if pseudos that have been
+   assigned to registers of class CLASS would likely be spilled
+   because registers of CLASS are needed for spill registers.  */
+
+static bool
+avr_class_likely_spilled_p (reg_class_t c)
+{
+  return (c != ALL_REGS && c != ADDW_REGS);
+}
+
+/* Valid attributes:
+   progmem - put data to program memory;
+   signal - make a function to be hardware interrupt. After function
+   prologue interrupts are disabled;
+   interrupt - make a function to be hardware interrupt. After function
+   prologue interrupts are enabled;
+   naked     - don't generate function prologue/epilogue and `ret' command.
+
+   Only `progmem' attribute valid for type.  */
+
+/* Handle a "progmem" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+avr_handle_progmem_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) == TYPE_DECL)
+	{
+	  /* This is really a decl attribute, not a type attribute,
+	     but try to handle it for GCC 3.0 backwards compatibility.  */
+
+	  tree type = TREE_TYPE (*node);
+	  tree attr = tree_cons (name, args, TYPE_ATTRIBUTES (type));
+	  tree newtype = build_type_attribute_variant (type, attr);
+
+	  TYPE_MAIN_VARIANT (newtype) = TYPE_MAIN_VARIANT (type);
+	  TREE_TYPE (*node) = newtype;
+	  *no_add_attrs = true;
+	}
+      else if (TREE_STATIC (*node) || DECL_EXTERNAL (*node))
+	{
+          *no_add_attrs = false;
+	}
+      else
+	{
+	  warning (OPT_Wattributes, "%qE attribute ignored",
+		   name);
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+avr_handle_fndecl_attribute (tree *node, tree name,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+avr_handle_fntype_attribute (tree *node, tree name,
+                             tree args ATTRIBUTE_UNUSED,
+                             int flags ATTRIBUTE_UNUSED,
+                             bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Look for attribute `progmem' in DECL
+   if found return 1, otherwise 0.  */
+
+int
+avr_progmem_p (tree decl, tree attributes)
+{
+  tree a;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return 0;
+
+  if (NULL_TREE
+      != lookup_attribute ("progmem", attributes))
+    return 1;
+
+  a=decl;
+  do
+    a = TREE_TYPE(a);
+  while (TREE_CODE (a) == ARRAY_TYPE);
+
+  if (a == error_mark_node)
+    return 0;
+
+  if (NULL_TREE != lookup_attribute ("progmem", TYPE_ATTRIBUTES (a)))
+    return 1;
+  
+  return 0;
+}
+
+/* Add the section attribute if the variable is in progmem.  */
+
+static void
+avr_insert_attributes (tree node, tree *attributes)
+{
+  if (TREE_CODE (node) == VAR_DECL
+      && (TREE_STATIC (node) || DECL_EXTERNAL (node))
+      && avr_progmem_p (node, *attributes))
+    {
+      tree node0 = node;
+
+      /* For C++, we have to peel arrays in order to get correct
+         determination of readonlyness.  */
+      
+      do
+        node0 = TREE_TYPE (node0);
+      while (TREE_CODE (node0) == ARRAY_TYPE);
+
+      if (error_mark_node == node0)
+        return;
+      
+      if (TYPE_READONLY (node0))
+        {
+          static const char dsec[] = ".progmem.data";
+
+          *attributes = tree_cons (get_identifier ("section"),
+                                   build_tree_list (NULL, build_string (strlen (dsec), dsec)),
+                                   *attributes);
+        }
+      else
+        {
+          error ("variable %q+D must be const in order to be put into"
+                 " read-only section by means of %<__attribute__((progmem))%>",
+                 node);
+        }
+    }
+}
+
+/* A get_unnamed_section callback for switching to progmem_section.  */
+
+static void
+avr_output_progmem_section_asm_op (const void *arg ATTRIBUTE_UNUSED)
+{
+  fprintf (asm_out_file,
+	   "\t.section .progmem.gcc_sw_table, \"%s\", @progbits\n",
+	   AVR_HAVE_JMP_CALL ? "a" : "ax");
+  /* Should already be aligned, this is just to be safe if it isn't.  */
+  fprintf (asm_out_file, "\t.p2align 1\n");
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+avr_asm_init_sections (void)
+{
+  progmem_section = get_unnamed_section (AVR_HAVE_JMP_CALL ? 0 : SECTION_CODE,
+					 avr_output_progmem_section_asm_op,
+					 NULL);
+  readonly_data_section = data_section;
+}
+
+static unsigned int
+avr_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (strncmp (name, ".noinit", 7) == 0)
+    {
+      if (decl && TREE_CODE (decl) == VAR_DECL
+	  && DECL_INITIAL (decl) == NULL_TREE)
+	flags |= SECTION_BSS;  /* @nobits */
+      else
+	warning (0, "only uninitialized variables can be placed in the "
+		 ".noinit section");
+    }
+
+  if (0 == strncmp (name, ".progmem.data", strlen (".progmem.data")))
+    flags &= ~SECTION_WRITE;
+  
+  return flags;
+}
+
+
+/* Implement `TARGET_ENCODE_SECTION_INFO'.  */
+
+static void
+avr_encode_section_info (tree decl, rtx rtl, int new_decl_p)
+{
+  /* In avr_handle_progmem_attribute, DECL_INITIAL is not yet
+     readily available, see PR34734.  So we postpone the warning
+     about uninitialized data in program memory section until here.  */
+   
+  if (new_decl_p
+      && decl && DECL_P (decl)
+      && NULL_TREE == DECL_INITIAL (decl)
+      && !DECL_EXTERNAL (decl)
+      && avr_progmem_p (decl, DECL_ATTRIBUTES (decl)))
+    {
+      warning (OPT_Wuninitialized,
+               "uninitialized variable %q+D put into "
+               "program memory area", decl);
+    }
+
+  default_encode_section_info (decl, rtl, new_decl_p);
+}
+
+
+/* Outputs some appropriate text to go at the start of an assembler
+   file.  */
+
+static void
+avr_file_start (void)
+{
+  if (avr_current_arch->asm_only)
+    error ("MCU %qs supported for assembler only", avr_mcu_name);
+
+  default_file_start ();
+
+  fputs ("__SREG__ = 0x3f\n"
+	 "__SP_H__ = 0x3e\n"
+	 "__SP_L__ = 0x3d\n", asm_out_file);
+  
+  fputs ("__tmp_reg__ = 0\n" 
+         "__zero_reg__ = 1\n", asm_out_file);
+
+  /* FIXME: output these only if there is anything in the .data / .bss
+     sections - some code size could be saved by not linking in the
+     initialization code from libgcc if one or both sections are empty.  */
+  fputs ("\t.global __do_copy_data\n", asm_out_file);
+  fputs ("\t.global __do_clear_bss\n", asm_out_file);
+}
+
+/* Outputs to the stdio stream FILE some
+   appropriate text to go at the end of an assembler file.  */
+
+static void
+avr_file_end (void)
+{
+}
+
+/* Choose the order in which to allocate hard registers for
+   pseudo-registers local to a basic block.
+
+   Store the desired register order in the array `reg_alloc_order'.
+   Element 0 should be the register to allocate first; element 1, the
+   next register; and so on.  */
+
+void
+order_regs_for_local_alloc (void)
+{
+  unsigned int i;
+  static const int order_0[] = {
+    24,25,
+    18,19,
+    20,21,
+    22,23,
+    30,31,
+    26,27,
+    28,29,
+    17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,
+    0,1,
+    32,33,34,35
+  };
+  static const int order_1[] = {
+    18,19,
+    20,21,
+    22,23,
+    24,25,
+    30,31,
+    26,27,
+    28,29,
+    17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,
+    0,1,
+    32,33,34,35
+  };
+  static const int order_2[] = {
+    25,24,
+    23,22,
+    21,20,
+    19,18,
+    30,31,
+    26,27,
+    28,29,
+    17,16,
+    15,14,13,12,11,10,9,8,7,6,5,4,3,2,
+    1,0,
+    32,33,34,35
+  };
+  
+  const int *order = (TARGET_ORDER_1 ? order_1 :
+		      TARGET_ORDER_2 ? order_2 :
+		      order_0);
+  for (i=0; i < ARRAY_SIZE (order_0); ++i)
+      reg_alloc_order[i] = order[i];
+}
+
+
+/* Implement `TARGET_REGISTER_MOVE_COST' */
+
+static int
+avr_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                        reg_class_t from, reg_class_t to)
+{
+  return (from == STACK_REG ? 6
+          : to == STACK_REG ? 12
+          : 2);
+}
+
+
+/* Implement `TARGET_MEMORY_MOVE_COST' */
+
+static int
+avr_memory_move_cost (enum machine_mode mode, reg_class_t rclass ATTRIBUTE_UNUSED,
+                      bool in ATTRIBUTE_UNUSED)
+{
+  return (mode == QImode ? 2
+          : mode == HImode ? 4
+          : mode == SImode ? 8
+          : mode == SFmode ? 8
+          : 16);
+}
+
+
+/* Mutually recursive subroutine of avr_rtx_cost for calculating the
+   cost of an RTX operand given its context.  X is the rtx of the
+   operand, MODE is its mode, and OUTER is the rtx_code of this
+   operand's parent operator.  */
+
+static int
+avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer,
+		      bool speed)
+{
+  enum rtx_code code = GET_CODE (x);
+  int total;
+
+  switch (code)
+    {
+    case REG:
+    case SUBREG:
+      return 0;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return COSTS_N_INSNS (GET_MODE_SIZE (mode));
+
+    default:
+      break;
+    }
+
+  total = 0;
+  avr_rtx_costs (x, code, outer, &total, speed);
+  return total;
+}
+
+/* The AVR backend's rtx_cost function.  X is rtx expression whose cost
+   is to be calculated.  Return true if the complete cost has been
+   computed, and false if subexpressions should be scanned.  In either
+   case, *TOTAL contains the cost result.  */
+
+static bool
+avr_rtx_costs (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, int *total,
+	       bool speed)
+{
+  enum rtx_code code = (enum rtx_code) codearg;
+  enum machine_mode mode = GET_MODE (x);
+  HOST_WIDE_INT val;
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+      /* Immediate constants are as cheap as registers.  */
+      *total = 0;
+      return true;
+
+    case MEM:
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      return true;
+
+    case NEG:
+      switch (mode)
+	{
+	case QImode:
+	case SFmode:
+	  *total = COSTS_N_INSNS (1);
+	  break;
+
+	case HImode:
+	  *total = COSTS_N_INSNS (3);
+	  break;
+
+	case SImode:
+	  *total = COSTS_N_INSNS (7);
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case ABS:
+      switch (mode)
+	{
+	case QImode:
+	case SFmode:
+	  *total = COSTS_N_INSNS (1);
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case ZERO_EXTEND:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
+			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case SIGN_EXTEND:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + 2
+			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case PLUS:
+      switch (mode)
+	{
+	case QImode:
+	  *total = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	  break;
+
+	case HImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (2);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	  break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (4);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (4);
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case MINUS:
+    case AND:
+    case IOR:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+          *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+      return true;
+
+    case XOR:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+      return true;
+
+    case MULT:
+      switch (mode)
+	{
+	case QImode:
+	  if (AVR_HAVE_MUL)
+	    *total = COSTS_N_INSNS (!speed ? 3 : 4);
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+	  else
+	    return false;
+	  break;
+
+	case HImode:
+	  if (AVR_HAVE_MUL)
+	    *total = COSTS_N_INSNS (!speed ? 7 : 10);
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+	  else
+	    return false;
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      if (!speed)
+	*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+      else
+	return false;
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+      return true;
+
+    case ROTATE:
+      switch (mode)
+	{
+	case QImode:
+	  if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 4)
+	    *total = COSTS_N_INSNS (1);
+
+	  break;
+
+	case HImode:
+	  if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 8)
+	    *total = COSTS_N_INSNS (3);
+
+	  break;
+
+	case SImode:
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 8:
+	      case 24:
+		*total = COSTS_N_INSNS (5);
+		break;
+	      case 16:
+		*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 6);
+		break;
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;    
+
+    case ASHIFT:
+      switch (mode)
+	{
+	case QImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    {
+	      val = INTVAL (XEXP (x, 1));
+	      if (val == 7)
+		*total = COSTS_N_INSNS (3);
+	      else if (val >= 0 && val <= 7)
+		*total = COSTS_N_INSNS (val);
+	      else
+		*total = COSTS_N_INSNS (1);
+	    }
+	  break;
+
+	case HImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+	      case 8:
+		*total = COSTS_N_INSNS (2);
+		break;
+	      case 9:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 2:
+	      case 3:
+	      case 10:
+	      case 15:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 7:
+	      case 11:
+	      case 12:
+		*total = COSTS_N_INSNS (5);
+		break;
+	      case 4:
+		*total = COSTS_N_INSNS (!speed ? 5 : 8);
+		break;
+	      case 6:
+		*total = COSTS_N_INSNS (!speed ? 5 : 9);
+		break;
+	      case 5:
+		*total = COSTS_N_INSNS (!speed ? 5 : 10);
+		break;
+	      default:
+	        *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	        *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	      }
+	  break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 24:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 1:
+	      case 8:
+	      case 16:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 31:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      case 2:
+		*total = COSTS_N_INSNS (!speed ? 7 : 8);
+		break;
+	      default:
+		*total = COSTS_N_INSNS (!speed ? 7 : 113);
+		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case ASHIFTRT:
+      switch (mode)
+	{
+	case QImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    {
+	      val = INTVAL (XEXP (x, 1));
+	      if (val == 6)
+		*total = COSTS_N_INSNS (4);
+	      else if (val == 7)
+		*total = COSTS_N_INSNS (2);
+	      else if (val >= 0 && val <= 7)
+		*total = COSTS_N_INSNS (val);
+	      else
+		*total = COSTS_N_INSNS (1);
+	    }
+	  break;
+
+	case HImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+		*total = COSTS_N_INSNS (2);
+		break;
+	      case 15:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 2:
+	      case 7:
+              case 8:
+              case 9:
+		*total = COSTS_N_INSNS (4);
+		break;
+              case 10:
+	      case 14:
+		*total = COSTS_N_INSNS (5);
+		break;
+              case 11:
+                *total = COSTS_N_INSNS (!speed ? 5 : 6);
+		break;
+              case 12:
+                *total = COSTS_N_INSNS (!speed ? 5 : 7);
+		break;
+              case 6:
+	      case 13:
+                *total = COSTS_N_INSNS (!speed ? 5 : 8);
+		break;
+	      default:
+	        *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	        *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	      }
+	  break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 8:
+	      case 16:
+	      case 24:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      case 2:
+		*total = COSTS_N_INSNS (!speed ? 7 : 8);
+		break;
+	      case 31:
+		*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5);
+		break;
+	      default:
+		*total = COSTS_N_INSNS (!speed ? 7 : 113);
+		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case LSHIFTRT:
+      switch (mode)
+	{
+	case QImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    {
+	      val = INTVAL (XEXP (x, 1));
+	      if (val == 7)
+		*total = COSTS_N_INSNS (3);
+	      else if (val >= 0 && val <= 7)
+		*total = COSTS_N_INSNS (val);
+	      else
+		*total = COSTS_N_INSNS (1);
+	    }
+	  break;
+
+	case HImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+	      case 8:
+		*total = COSTS_N_INSNS (2);
+		break;
+	      case 9:
+		*total = COSTS_N_INSNS (3);
+		break;
+	      case 2:
+	      case 10:
+	      case 15:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 7:
+              case 11:
+		*total = COSTS_N_INSNS (5);
+		break;
+	      case 3:
+	      case 12:
+	      case 13:
+	      case 14:
+		*total = COSTS_N_INSNS (!speed ? 5 : 6);
+		break;
+	      case 4:
+		*total = COSTS_N_INSNS (!speed ? 5 : 7);
+		break;
+	      case 5:
+	      case 6:
+		*total = COSTS_N_INSNS (!speed ? 5 : 9);
+		break;
+	      default:
+	        *total = COSTS_N_INSNS (!speed ? 5 : 41);
+	        *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	      }
+	  break;
+
+	case SImode:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    {
+	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
+	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	    }
+	  else
+	    switch (INTVAL (XEXP (x, 1)))
+	      {
+	      case 0:
+		*total = 0;
+		break;
+	      case 1:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 2:
+		*total = COSTS_N_INSNS (!speed ? 7 : 8);
+		break;
+	      case 8:
+	      case 16:
+	      case 24:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 31:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      default:
+		*total = COSTS_N_INSNS (!speed ? 7 : 113);
+		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	      }
+	  break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    case COMPARE:
+      switch (GET_MODE (XEXP (x, 0)))
+	{
+	case QImode:
+	  *total = COSTS_N_INSNS (1);
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	  break;
+
+        case HImode:
+	  *total = COSTS_N_INSNS (2);
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+            *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	  else if (INTVAL (XEXP (x, 1)) != 0)
+	    *total += COSTS_N_INSNS (1);
+          break;
+
+        case SImode:
+          *total = COSTS_N_INSNS (4);
+          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+            *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, speed);
+	  else if (INTVAL (XEXP (x, 1)) != 0)
+	    *total += COSTS_N_INSNS (3);
+          break;
+
+	default:
+	  return false;
+	}
+      *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, speed);
+      return true;
+
+    default:
+      break;
+    }
+  return false;
+}
+
+/* Calculate the cost of a memory address.  */
+
+static int
+avr_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x,1)) == CONST_INT
+      && (REG_P (XEXP (x,0)) || GET_CODE (XEXP (x,0)) == SUBREG)
+      && INTVAL (XEXP (x,1)) >= 61)
+    return 18;
+  if (CONSTANT_ADDRESS_P (x))
+    {
+      if (optimize > 0 && io_address_operand (x, QImode))
+	return 2;
+      return 4;
+    }
+  return 4;
+}
+
+/* Test for extra memory constraint 'Q'.
+   It's a memory address based on Y or Z pointer with valid displacement.  */
+
+int
+extra_constraint_Q (rtx x)
+{
+  if (GET_CODE (XEXP (x,0)) == PLUS
+      && REG_P (XEXP (XEXP (x,0), 0))
+      && GET_CODE (XEXP (XEXP (x,0), 1)) == CONST_INT
+      && (INTVAL (XEXP (XEXP (x,0), 1))
+	  <= MAX_LD_OFFSET (GET_MODE (x))))
+    {
+      rtx xx = XEXP (XEXP (x,0), 0);
+      int regno = REGNO (xx);
+      if (TARGET_ALL_DEBUG)
+	{
+	  fprintf (stderr, ("extra_constraint:\n"
+			    "reload_completed: %d\n"
+			    "reload_in_progress: %d\n"),
+		   reload_completed, reload_in_progress);
+	  debug_rtx (x);
+	}
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	return 1;		/* allocate pseudos */
+      else if (regno == REG_Z || regno == REG_Y)
+	return 1;		/* strictly check */
+      else if (xx == frame_pointer_rtx
+	       || xx == arg_pointer_rtx)
+	return 1;		/* XXX frame & arg pointer checks */
+    }
+  return 0;
+}
+
+/* Convert condition code CONDITION to the valid AVR condition code.  */
+
+RTX_CODE
+avr_normalize_condition (RTX_CODE condition)
+{
+  switch (condition)
+    {
+    case GT:
+      return GE;
+    case GTU:
+      return GEU;
+    case LE:
+      return LT;
+    case LEU:
+      return LTU;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* This function optimizes conditional jumps.  */
+
+static void
+avr_reorg (void)
+{
+  rtx insn, pattern;
+  
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (! (GET_CODE (insn) == INSN
+	     || GET_CODE (insn) == CALL_INSN
+	     || GET_CODE (insn) == JUMP_INSN)
+	  || !single_set (insn))
+	continue;
+
+      pattern = PATTERN (insn);
+
+      if (GET_CODE (pattern) == PARALLEL)
+	pattern = XVECEXP (pattern, 0, 0);
+      if (GET_CODE (pattern) == SET
+	  && SET_DEST (pattern) == cc0_rtx
+	  && compare_diff_p (insn))
+	{
+	  if (GET_CODE (SET_SRC (pattern)) == COMPARE)
+	    {
+	      /* Now we work under compare insn.  */
+	      
+	      pattern = SET_SRC (pattern);
+	      if (true_regnum (XEXP (pattern,0)) >= 0
+		  && true_regnum (XEXP (pattern,1)) >= 0 )
+		{
+		  rtx x = XEXP (pattern,0);
+		  rtx next = next_real_insn (insn);
+		  rtx pat = PATTERN (next);
+		  rtx src = SET_SRC (pat);
+		  rtx t = XEXP (src,0);
+		  PUT_CODE (t, swap_condition (GET_CODE (t)));
+		  XEXP (pattern,0) = XEXP (pattern,1);
+		  XEXP (pattern,1) = x;
+		  INSN_CODE (next) = -1;
+		}
+	      else if (true_regnum (XEXP (pattern, 0)) >= 0
+		       && XEXP (pattern, 1) == const0_rtx)
+	        {
+	          /* This is a tst insn, we can reverse it.  */
+	          rtx next = next_real_insn (insn);
+	          rtx pat = PATTERN (next);
+	          rtx src = SET_SRC (pat);
+	          rtx t = XEXP (src,0);
+    
+	          PUT_CODE (t, swap_condition (GET_CODE (t)));
+	          XEXP (pattern, 1) = XEXP (pattern, 0);
+	          XEXP (pattern, 0) = const0_rtx;
+	          INSN_CODE (next) = -1;
+	          INSN_CODE (insn) = -1;
+	        }
+	      else if (true_regnum (XEXP (pattern,0)) >= 0
+		       && GET_CODE (XEXP (pattern,1)) == CONST_INT)
+		{
+		  rtx x = XEXP (pattern,1);
+		  rtx next = next_real_insn (insn);
+		  rtx pat = PATTERN (next);
+		  rtx src = SET_SRC (pat);
+		  rtx t = XEXP (src,0);
+		  enum machine_mode mode = GET_MODE (XEXP (pattern, 0));
+
+		  if (avr_simplify_comparison_p (mode, GET_CODE (t), x))
+		    {
+		      XEXP (pattern, 1) = gen_int_mode (INTVAL (x) + 1, mode);
+		      PUT_CODE (t, avr_normalize_condition (GET_CODE (t)));
+		      INSN_CODE (next) = -1;
+		      INSN_CODE (insn) = -1;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* Returns register number for function return value.*/
+
+int
+avr_ret_register (void)
+{
+  return 24;
+}
+
+/* Create an RTX representing the place where a
+   library function returns a value of mode MODE.  */
+
+rtx
+avr_libcall_value (enum machine_mode mode)
+{
+  int offs = GET_MODE_SIZE (mode);
+  if (offs < 2)
+    offs = 2;
+  return gen_rtx_REG (mode, RET_REGISTER + 2 - offs);
+}
+
+/* Create an RTX representing the place where a
+   function returns a value of data type VALTYPE.  */
+
+rtx
+avr_function_value (const_tree type, 
+		    const_tree func ATTRIBUTE_UNUSED, 
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  unsigned int offs;
+  
+  if (TYPE_MODE (type) != BLKmode)
+    return avr_libcall_value (TYPE_MODE (type));
+  
+  offs = int_size_in_bytes (type);
+  if (offs < 2)
+    offs = 2;
+  if (offs > 2 && offs < GET_MODE_SIZE (SImode))
+    offs = GET_MODE_SIZE (SImode);
+  else if (offs > GET_MODE_SIZE (SImode) && offs < GET_MODE_SIZE (DImode))
+    offs = GET_MODE_SIZE (DImode);
+  
+  return gen_rtx_REG (BLKmode, RET_REGISTER + 2 - offs);
+}
+
+int
+test_hard_reg_class (enum reg_class rclass, rtx x)
+{
+  int regno = true_regnum (x);
+  if (regno < 0)
+    return 0;
+
+  if (TEST_HARD_REG_CLASS (rclass, regno))
+    return 1;
+
+  return 0;
+}
+
+
+int
+jump_over_one_insn_p (rtx insn, rtx dest)
+{
+  int uid = INSN_UID (GET_CODE (dest) == LABEL_REF
+		      ? XEXP (dest, 0)
+		      : dest);
+  int jump_addr = INSN_ADDRESSES (INSN_UID (insn));
+  int dest_addr = INSN_ADDRESSES (uid);
+  return dest_addr - jump_addr == get_attr_length (insn) + 1;
+}
+
+/* Returns 1 if a value of mode MODE can be stored starting with hard
+   register number REGNO.  On the enhanced core, anything larger than
+   1 byte must start in even numbered register for "movw" to work
+   (this way we don't have to check for odd registers everywhere).  */
+
+int
+avr_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* NOTE: 8-bit values must not be disallowed for R28 or R29.
+        Disallowing QI et al. in these regs might lead to code like
+            (set (subreg:QI (reg:HI 28) n) ...)
+        which will result in wrong code because reload does not
+        handle SUBREGs of hard regsisters like this.
+        This could be fixed in reload.  However, it appears
+        that fixing reload is not wanted by reload people.  */
+  
+  /* Any GENERAL_REGS register can hold 8-bit values.  */
+  
+  if (GET_MODE_SIZE (mode) == 1)
+    return 1;
+
+  /* FIXME: Ideally, the following test is not needed.
+        However, it turned out that it can reduce the number
+        of spill fails.  AVR and it's poor endowment with
+        address registers is extreme stress test for reload.  */
+  
+  if (GET_MODE_SIZE (mode) >= 4
+      && regno >= REG_X)
+    return 0;
+
+  /* All modes larger than 8 bits should start in an even register.  */
+  
+  return !(regno & 1);
+}
+
+const char *
+output_reload_inhi (rtx insn ATTRIBUTE_UNUSED, rtx *operands, int *len)
+{
+  int tmp;
+  if (!len)
+    len = &tmp;
+      
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      int val = INTVAL (operands[1]);
+      if ((val & 0xff) == 0)
+	{
+	  *len = 3;
+	  return (AS2 (mov,%A0,__zero_reg__) CR_TAB
+		  AS2 (ldi,%2,hi8(%1))       CR_TAB
+		  AS2 (mov,%B0,%2));
+	}
+      else if ((val & 0xff00) == 0)
+	{
+	  *len = 3;
+	  return (AS2 (ldi,%2,lo8(%1)) CR_TAB
+		  AS2 (mov,%A0,%2)     CR_TAB
+		  AS2 (mov,%B0,__zero_reg__));
+	}
+      else if ((val & 0xff) == ((val & 0xff00) >> 8))
+	{
+	  *len = 3;
+	  return (AS2 (ldi,%2,lo8(%1)) CR_TAB
+		  AS2 (mov,%A0,%2)     CR_TAB
+		  AS2 (mov,%B0,%2));
+	}
+    }
+  *len = 4;
+  return (AS2 (ldi,%2,lo8(%1)) CR_TAB
+	  AS2 (mov,%A0,%2)     CR_TAB
+	  AS2 (ldi,%2,hi8(%1)) CR_TAB
+	  AS2 (mov,%B0,%2));
+}
+
+
+const char *
+output_reload_insisf (rtx insn ATTRIBUTE_UNUSED, rtx *operands, int *len)
+{
+  rtx src = operands[1];
+  int cnst = (GET_CODE (src) == CONST_INT);
+
+  if (len)
+    {
+      if (cnst)
+	*len = 4 + ((INTVAL (src) & 0xff) != 0)
+		+ ((INTVAL (src) & 0xff00) != 0)
+		+ ((INTVAL (src) & 0xff0000) != 0)
+		+ ((INTVAL (src) & 0xff000000) != 0);
+      else
+	*len = 8;
+
+      return "";
+    }
+
+  if (cnst && ((INTVAL (src) & 0xff) == 0))
+    output_asm_insn (AS2 (mov, %A0, __zero_reg__), operands);
+  else
+    {
+      output_asm_insn (AS2 (ldi, %2, lo8(%1)), operands);
+      output_asm_insn (AS2 (mov, %A0, %2), operands);
+    }
+  if (cnst && ((INTVAL (src) & 0xff00) == 0))
+    output_asm_insn (AS2 (mov, %B0, __zero_reg__), operands);
+  else
+    {
+      output_asm_insn (AS2 (ldi, %2, hi8(%1)), operands);
+      output_asm_insn (AS2 (mov, %B0, %2), operands);
+    }
+  if (cnst && ((INTVAL (src) & 0xff0000) == 0))
+    output_asm_insn (AS2 (mov, %C0, __zero_reg__), operands);
+  else
+    {
+      output_asm_insn (AS2 (ldi, %2, hlo8(%1)), operands);
+      output_asm_insn (AS2 (mov, %C0, %2), operands);
+    }
+  if (cnst && ((INTVAL (src) & 0xff000000) == 0))
+    output_asm_insn (AS2 (mov, %D0, __zero_reg__), operands);
+  else
+    {
+      output_asm_insn (AS2 (ldi, %2, hhi8(%1)), operands);
+      output_asm_insn (AS2 (mov, %D0, %2), operands);
+    }
+  return "";
+}
+
+void
+avr_output_bld (rtx operands[], int bit_nr)
+{
+  static char s[] = "bld %A0,0";
+
+  s[5] = 'A' + (bit_nr >> 3);
+  s[8] = '0' + (bit_nr & 7);
+  output_asm_insn (s, operands);
+}
+
+void
+avr_output_addr_vec_elt (FILE *stream, int value)
+{
+  switch_to_section (progmem_section);
+  if (AVR_HAVE_JMP_CALL)
+    fprintf (stream, "\t.word gs(.L%d)\n", value);
+  else
+    fprintf (stream, "\trjmp .L%d\n", value);
+}
+
+/* Returns true if SCRATCH are safe to be allocated as a scratch
+   registers (for a define_peephole2) in the current function.  */
+
+bool
+avr_hard_regno_scratch_ok (unsigned int regno)
+{
+  /* Interrupt functions can only use registers that have already been saved
+     by the prologue, even if they would normally be call-clobbered.  */
+
+  if ((cfun->machine->is_interrupt || cfun->machine->is_signal)
+      && !df_regs_ever_live_p (regno))
+    return false;
+
+  /* Don't allow hard registers that might be part of the frame pointer.
+     Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
+     and don't care for a frame pointer that spans more than one register.  */
+
+  if ((!reload_completed || frame_pointer_needed)
+      && (regno == REG_Y || regno == REG_Y + 1))
+    {
+      return false;
+    }
+
+  return true;
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+int
+avr_hard_regno_rename_ok (unsigned int old_reg,
+			  unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if ((cfun->machine->is_interrupt || cfun->machine->is_signal)
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  /* Don't allow hard registers that might be part of the frame pointer.
+     Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
+     and don't care for a frame pointer that spans more than one register.  */
+
+  if ((!reload_completed || frame_pointer_needed)
+      && (old_reg == REG_Y || old_reg == REG_Y + 1
+          || new_reg == REG_Y || new_reg == REG_Y + 1))
+    {
+      return 0;
+    }
+  
+  return 1;
+}
+
+/* Output a branch that tests a single bit of a register (QI, HI, SI or DImode)
+   or memory location in the I/O space (QImode only).
+
+   Operand 0: comparison operator (must be EQ or NE, compare bit to zero).
+   Operand 1: register operand to test, or CONST_INT memory address.
+   Operand 2: bit number.
+   Operand 3: label to jump to if the test is true.  */
+
+const char *
+avr_out_sbxx_branch (rtx insn, rtx operands[])
+{
+  enum rtx_code comp = GET_CODE (operands[0]);
+  int long_jump = (get_attr_length (insn) >= 4);
+  int reverse = long_jump || jump_over_one_insn_p (insn, operands[3]);
+
+  if (comp == GE)
+    comp = EQ;
+  else if (comp == LT)
+    comp = NE;
+
+  if (reverse)
+    comp = reverse_condition (comp);
+
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) < 0x40)
+	{
+	  if (comp == EQ)
+	    output_asm_insn (AS2 (sbis,%m1-0x20,%2), operands);
+	  else
+	    output_asm_insn (AS2 (sbic,%m1-0x20,%2), operands);
+	}
+      else
+	{
+	  output_asm_insn (AS2 (in,__tmp_reg__,%m1-0x20), operands);
+	  if (comp == EQ)
+	    output_asm_insn (AS2 (sbrs,__tmp_reg__,%2), operands);
+	  else
+	    output_asm_insn (AS2 (sbrc,__tmp_reg__,%2), operands);
+	}
+    }
+  else  /* GET_CODE (operands[1]) == REG */
+    {
+      if (GET_MODE (operands[1]) == QImode)
+	{
+	  if (comp == EQ)
+	    output_asm_insn (AS2 (sbrs,%1,%2), operands);
+	  else
+	    output_asm_insn (AS2 (sbrc,%1,%2), operands);
+	}
+      else  /* HImode or SImode */
+	{
+	  static char buf[] = "sbrc %A1,0";
+	  int bit_nr = INTVAL (operands[2]);
+	  buf[3] = (comp == EQ) ? 's' : 'c';
+	  buf[6] = 'A' + (bit_nr >> 3);
+	  buf[9] = '0' + (bit_nr & 7);
+	  output_asm_insn (buf, operands);
+	}
+    }
+
+  if (long_jump)
+    return (AS1 (rjmp,.+4) CR_TAB
+	    AS1 (jmp,%x3));
+  if (!reverse)
+    return AS1 (rjmp,%x3);
+  return "";
+}
+
+/* Worker function for TARGET_ASM_CONSTRUCTOR.  */
+
+static void
+avr_asm_out_ctor (rtx symbol, int priority)
+{
+  fputs ("\t.global __do_global_ctors\n", asm_out_file);
+  default_ctor_section_asm_out_constructor (symbol, priority);
+}
+
+/* Worker function for TARGET_ASM_DESTRUCTOR.  */
+
+static void
+avr_asm_out_dtor (rtx symbol, int priority)
+{
+  fputs ("\t.global __do_global_dtors\n", asm_out_file);
+  default_dtor_section_asm_out_destructor (symbol, priority);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+avr_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (TYPE_MODE (type) == BLKmode)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      return (size == -1 || size > 8);
+    }
+  else
+    return false;
+}
+
+/* Worker function for CASE_VALUES_THRESHOLD.  */
+
+unsigned int avr_case_values_threshold (void)
+{
+  return (!AVR_HAVE_JMP_CALL || TARGET_CALL_PROLOGUES) ? 8 : 17;
+}
+
+#include "gt-avr.h"
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
new file mode 100644
index 000000000..efe782df7
--- /dev/null
+++ b/gcc/config/avr/avr.h
@@ -0,0 +1,835 @@
+/* Definitions of target machine for GNU compiler,
+   for ATMEL AVR at90s8515, ATmega103/103L, ATmega603/603L microcontrollers.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
+   2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+struct base_arch_s {
+  /* Assembler only.  */
+  int asm_only;
+
+  /* Core have 'MUL*' instructions.  */
+  int have_mul;
+
+  /* Core have 'CALL' and 'JMP' instructions.  */
+  int have_jmp_call;
+
+  /* Core have 'MOVW' and 'LPM Rx,Z' instructions.  */
+  int have_movw_lpmx;
+
+  /* Core have 'ELPM' instructions.  */
+  int have_elpm;
+
+  /* Core have 'ELPM Rx,Z' instructions.  */
+  int have_elpmx;
+
+  /* Core have 'EICALL' and 'EIJMP' instructions.  */
+  int have_eijmp_eicall;
+
+  /* Reserved for xmega architecture.  */
+  int reserved;
+
+  /* Reserved for xmega architecture.  */
+  int reserved2;
+  
+  /* Default start of data section address for architecture.  */
+  int default_data_section_start;
+
+  const char *const macro;
+  
+  /* Architecture name.  */
+  const char *const arch_name;  
+};
+
+/* These names are used as the index into the avr_arch_types[] table 
+   above.  */
+
+enum avr_arch
+{
+  ARCH_UNKNOWN,
+  ARCH_AVR1,
+  ARCH_AVR2,
+  ARCH_AVR25,
+  ARCH_AVR3,
+  ARCH_AVR31,
+  ARCH_AVR35,
+  ARCH_AVR4,
+  ARCH_AVR5,
+  ARCH_AVR51,
+  ARCH_AVR6
+};
+
+struct mcu_type_s {
+  /* Device name.  */
+  const char *const name;
+  
+  /* Index in avr_arch_types[].  */
+  int arch; 
+  
+  /* Must lie outside user's namespace.  NULL == no macro.  */
+  const char *const macro;
+  
+  /* Stack pointer have 8 bits width.  */
+  int short_sp;
+  
+  /* Start of data section.  */
+  int data_section_start;
+  
+  /* Name of device library.  */
+  const char *const library_name; 
+};
+
+/* Preprocessor macros to define depending on MCU type.  */
+extern const char *avr_extra_arch_macro;
+extern const struct base_arch_s *avr_current_arch;
+extern const struct mcu_type_s *avr_current_device;
+extern const struct mcu_type_s avr_mcu_types[];
+extern const struct base_arch_s avr_arch_types[];
+
+#define TARGET_CPU_CPP_BUILTINS()	avr_cpu_cpp_builtins (pfile)
+
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS)
+extern GTY(()) section *progmem_section;
+#endif
+
+#define AVR_HAVE_JMP_CALL (avr_current_arch->have_jmp_call && !TARGET_SHORT_CALLS)
+#define AVR_HAVE_MUL (avr_current_arch->have_mul)
+#define AVR_HAVE_MOVW (avr_current_arch->have_movw_lpmx)
+#define AVR_HAVE_LPMX (avr_current_arch->have_movw_lpmx)
+#define AVR_HAVE_RAMPZ (avr_current_arch->have_elpm)
+#define AVR_HAVE_EIJMP_EICALL (avr_current_arch->have_eijmp_eicall)
+#define AVR_HAVE_8BIT_SP (avr_current_device->short_sp || TARGET_TINY_STACK)
+
+#define AVR_2_BYTE_PC (!AVR_HAVE_EIJMP_EICALL)
+#define AVR_3_BYTE_PC (AVR_HAVE_EIJMP_EICALL)
+
+#define TARGET_VERSION fprintf (stderr, " (GNU assembler syntax)");
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#ifdef IN_LIBGCC2
+/* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits).  */
+#define UNITS_PER_WORD 4
+#else
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 1
+#endif
+
+#define POINTER_SIZE 16
+
+
+/* Maximum sized of reasonable data type
+   DImode or Dfmode ...  */
+#define MAX_FIXED_MODE_SIZE 32
+
+#define PARM_BOUNDARY 8
+
+#define FUNCTION_BOUNDARY 8
+
+#define EMPTY_FIELD_BOUNDARY 8
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 8
+
+#define MAX_OFILE_ALIGNMENT (32768 * 8)
+
+#define TARGET_VTABLE_ENTRY_ALIGN 8
+
+#define STRICT_ALIGNMENT 0
+
+#define INT_TYPE_SIZE (TARGET_INT8 ? 8 : 16)
+#define SHORT_TYPE_SIZE (INT_TYPE_SIZE == 8 ? INT_TYPE_SIZE : 16)
+#define LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 16 : 32)
+#define LONG_LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 32 : 64)
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 32
+#define LONG_DOUBLE_TYPE_SIZE 32
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE (INT_TYPE_SIZE == 8 ? "long unsigned int" : "unsigned int")
+#define PTRDIFF_TYPE (INT_TYPE_SIZE == 8 ? "long int" :"int")
+
+#define WCHAR_TYPE_SIZE 16
+
+#define FIRST_PSEUDO_REGISTER 36
+
+#define FIXED_REGISTERS {\
+  1,1,/* r0 r1 */\
+  0,0,/* r2 r3 */\
+  0,0,/* r4 r5 */\
+  0,0,/* r6 r7 */\
+  0,0,/* r8 r9 */\
+  0,0,/* r10 r11 */\
+  0,0,/* r12 r13 */\
+  0,0,/* r14 r15 */\
+  0,0,/* r16 r17 */\
+  0,0,/* r18 r19 */\
+  0,0,/* r20 r21 */\
+  0,0,/* r22 r23 */\
+  0,0,/* r24 r25 */\
+  0,0,/* r26 r27 */\
+  0,0,/* r28 r29 */\
+  0,0,/* r30 r31 */\
+  1,1,/*  STACK */\
+  1,1 /* arg pointer */  }
+
+#define CALL_USED_REGISTERS {			\
+  1,1,/* r0 r1 */				\
+    0,0,/* r2 r3 */				\
+    0,0,/* r4 r5 */				\
+    0,0,/* r6 r7 */				\
+    0,0,/* r8 r9 */				\
+    0,0,/* r10 r11 */				\
+    0,0,/* r12 r13 */				\
+    0,0,/* r14 r15 */				\
+    0,0,/* r16 r17 */				\
+    1,1,/* r18 r19 */				\
+    1,1,/* r20 r21 */				\
+    1,1,/* r22 r23 */				\
+    1,1,/* r24 r25 */				\
+    1,1,/* r26 r27 */				\
+    0,0,/* r28 r29 */				\
+    1,1,/* r30 r31 */				\
+    1,1,/*  STACK */				\
+    1,1 /* arg pointer */  }
+
+#define REG_ALLOC_ORDER {			\
+    24,25,					\
+    18,19,					\
+    20,21,					\
+    22,23,					\
+    30,31,					\
+    26,27,					\
+    28,29,					\
+    17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,	\
+    0,1,					\
+    32,33,34,35					\
+    }
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE) ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) avr_hard_regno_mode_ok(REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+enum reg_class {
+  NO_REGS,
+  R0_REG,			/* r0 */
+  POINTER_X_REGS,		/* r26 - r27 */
+  POINTER_Y_REGS,		/* r28 - r29 */
+  POINTER_Z_REGS,		/* r30 - r31 */
+  STACK_REG,			/* STACK */
+  BASE_POINTER_REGS,		/* r28 - r31 */
+  POINTER_REGS,			/* r26 - r31 */
+  ADDW_REGS,			/* r24 - r31 */
+  SIMPLE_LD_REGS,		/* r16 - r23 */
+  LD_REGS,			/* r16 - r31 */
+  NO_LD_REGS,			/* r0 - r15 */
+  GENERAL_REGS,			/* r0 - r31 */
+  ALL_REGS, LIM_REG_CLASSES
+};
+
+
+#define N_REG_CLASSES (int)LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {					\
+		 "NO_REGS",					\
+		   "R0_REG",	/* r0 */                        \
+		   "POINTER_X_REGS", /* r26 - r27 */		\
+		   "POINTER_Y_REGS", /* r28 - r29 */		\
+		   "POINTER_Z_REGS", /* r30 - r31 */		\
+		   "STACK_REG",	/* STACK */			\
+		   "BASE_POINTER_REGS",	/* r28 - r31 */		\
+		   "POINTER_REGS", /* r26 - r31 */		\
+		   "ADDW_REGS",	/* r24 - r31 */			\
+                   "SIMPLE_LD_REGS", /* r16 - r23 */            \
+		   "LD_REGS",	/* r16 - r31 */			\
+                   "NO_LD_REGS", /* r0 - r15 */                 \
+		   "GENERAL_REGS", /* r0 - r31 */		\
+		   "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS {						\
+  {0x00000000,0x00000000},	/* NO_REGS */				\
+  {0x00000001,0x00000000},	/* R0_REG */                            \
+  {3 << REG_X,0x00000000},      /* POINTER_X_REGS, r26 - r27 */		\
+  {3 << REG_Y,0x00000000},      /* POINTER_Y_REGS, r28 - r29 */		\
+  {3 << REG_Z,0x00000000},      /* POINTER_Z_REGS, r30 - r31 */		\
+  {0x00000000,0x00000003},	/* STACK_REG, STACK */			\
+  {(3 << REG_Y) | (3 << REG_Z),						\
+     0x00000000},		/* BASE_POINTER_REGS, r28 - r31 */	\
+  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z),				\
+     0x00000000},		/* POINTER_REGS, r26 - r31 */		\
+  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W),		\
+     0x00000000},		/* ADDW_REGS, r24 - r31 */		\
+  {0x00ff0000,0x00000000},	/* SIMPLE_LD_REGS r16 - r23 */          \
+  {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16),	\
+     0x00000000},	/* LD_REGS, r16 - r31 */			\
+  {0x0000ffff,0x00000000},	/* NO_LD_REGS  r0 - r15 */              \
+  {0xffffffff,0x00000000},	/* GENERAL_REGS, r0 - r31 */		\
+  {0xffffffff,0x00000003}	/* ALL_REGS */				\
+}
+
+#define REGNO_REG_CLASS(R) avr_regno_reg_class(R)
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES               \
+{                                       \
+  GENERAL_REGS, LIM_REG_CLASSES         \
+}
+
+#define BASE_REG_CLASS (reload_completed ? BASE_POINTER_REGS : POINTER_REGS)
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define REGNO_OK_FOR_BASE_P(r) (((r) < FIRST_PSEUDO_REGISTER		\
+				 && ((r) == REG_X			\
+				     || (r) == REG_Y			\
+				     || (r) == REG_Z			\
+				     || (r) == ARG_POINTER_REGNUM))	\
+				|| (reg_renumber			\
+				    && (reg_renumber[r] == REG_X	\
+					|| reg_renumber[r] == REG_Y	\
+					|| reg_renumber[r] == REG_Z	\
+					|| (reg_renumber[r]		\
+					    == ARG_POINTER_REGNUM))))
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+#define CLASS_MAX_NREGS(CLASS, MODE)   class_max_nregs (CLASS, MODE)
+
+#define STACK_PUSH_CODE POST_DEC
+
+#define STACK_GROWS_DOWNWARD
+
+#define STARTING_FRAME_OFFSET 1
+
+#define STACK_POINTER_OFFSET 1
+
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+#define STACK_BOUNDARY 8
+
+#define STACK_POINTER_REGNUM 32
+
+#define FRAME_POINTER_REGNUM REG_Y
+
+#define ARG_POINTER_REGNUM 34
+
+#define STATIC_CHAIN_REGNUM 2
+
+#define ELIMINABLE_REGS {					\
+      {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},		\
+	{FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}		\
+       ,{FRAME_POINTER_REGNUM+1,STACK_POINTER_REGNUM+1}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  OFFSET = avr_initial_elimination_offset (FROM, TO)
+
+#define RETURN_ADDR_RTX(count, tem) avr_return_addr_rtx (count, tem)
+
+/* Don't use Push rounding. expr.c: emit_single_push_insn is broken 
+   for POST_DEC targets (PR27386).  */
+/*#define PUSH_ROUNDING(NPUSHED) (NPUSHED)*/
+
+typedef struct avr_args {
+  int nregs;			/* # registers available for passing */
+  int regno;			/* next available register number */
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  init_cumulative_args (&(CUM), FNTYPE, LIBNAME, FNDECL)
+
+#define FUNCTION_ARG_REGNO_P(r) function_arg_regno_p(r)
+
+extern int avr_reg_order[];
+
+#define RET_REGISTER avr_ret_register ()
+
+#define LIBCALL_VALUE(MODE)  avr_libcall_value (MODE)
+
+#define FUNCTION_VALUE_REGNO_P(N) ((int) (N) == RET_REGISTER)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define EPILOGUE_USES(REGNO) avr_epilogue_uses(REGNO)
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define REG_OK_FOR_BASE_NOSTRICT_P(X) \
+  (REGNO (X) >= FIRST_PSEUDO_REGISTER || REG_OK_FOR_BASE_STRICT_P(X))
+
+#define REG_OK_FOR_BASE_STRICT_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+/* LEGITIMIZE_RELOAD_ADDRESS will allow register R26/27 to be used, where it
+   is no worse than normal base pointers R28/29 and R30/31. For example:
+   If base offset is greater than 63 bytes or for R++ or --R addressing.  */
+   
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN)    \
+do {									    \
+  if (1&&(GET_CODE (X) == POST_INC || GET_CODE (X) == PRE_DEC))	    \
+    {									    \
+      push_reload (XEXP (X,0), XEXP (X,0), &XEXP (X,0), &XEXP (X,0),	    \
+	           POINTER_REGS, GET_MODE (X),GET_MODE (X) , 0, 0,	    \
+		   OPNUM, RELOAD_OTHER);				    \
+      goto WIN;								    \
+    }									    \
+  if (GET_CODE (X) == PLUS						    \
+      && REG_P (XEXP (X, 0))						    \
+      && reg_equiv_constant[REGNO (XEXP (X, 0))] == 0			    \
+      && GET_CODE (XEXP (X, 1)) == CONST_INT				    \
+      && INTVAL (XEXP (X, 1)) >= 1)					    \
+    {									    \
+      int fit = INTVAL (XEXP (X, 1)) <= (64 - GET_MODE_SIZE (MODE));	    \
+      if (fit)								    \
+	{								    \
+          if (reg_equiv_address[REGNO (XEXP (X, 0))] != 0)		    \
+	    {								    \
+	      int regno = REGNO (XEXP (X, 0));				    \
+	      rtx mem = make_memloc (X, regno);				    \
+	      push_reload (XEXP (mem,0), NULL, &XEXP (mem,0), NULL,         \
+		           POINTER_REGS, Pmode, VOIDmode, 0, 0,		    \
+		           1, ADDR_TYPE (TYPE));			    \
+	      push_reload (mem, NULL_RTX, &XEXP (X, 0), NULL,		    \
+		           BASE_POINTER_REGS, GET_MODE (X), VOIDmode, 0, 0, \
+		           OPNUM, TYPE);				    \
+	      goto WIN;							    \
+	    }								    \
+	}								    \
+      else if (! (frame_pointer_needed && XEXP (X,0) == frame_pointer_rtx)) \
+	{								    \
+	  push_reload (X, NULL_RTX, &X, NULL,				    \
+		       POINTER_REGS, GET_MODE (X), VOIDmode, 0, 0,	    \
+		       OPNUM, TYPE);					    \
+          goto WIN;							    \
+	}								    \
+    }									    \
+} while(0)
+
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+#define BRANCH_COST(speed_p, predictable_p) 0
+
+#define SLOW_BYTE_ACCESS 0
+
+#define NO_FUNCTION_CSE
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+   There are no shared libraries on this target, and these sections are
+   placed in the read-only program memory, so they are not writable.  */
+
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP "\t.section .ctors,\"a\",@progbits"
+
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP "\t.section .dtors,\"a\",@progbits"
+
+#define TARGET_ASM_CONSTRUCTOR avr_asm_out_ctor
+
+#define TARGET_ASM_DESTRUCTOR avr_asm_out_dtor
+
+#define SUPPORTS_INIT_PRIORITY 0
+
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+#define ASM_COMMENT_START " ; "
+
+#define ASM_APP_ON "/* #APP */\n"
+
+#define ASM_APP_OFF "/* #NOAPP */\n"
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section
+#define TARGET_ASM_INIT_SECTIONS avr_asm_init_sections
+
+#define ASM_OUTPUT_ASCII(FILE, P, SIZE)	 gas_output_ascii (FILE,P,SIZE)
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '\n' || ((C) == '$'))
+
+#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)			   \
+do {									   \
+     fputs ("\t.comm ", (STREAM));					   \
+     assemble_name ((STREAM), (NAME));					   \
+     fprintf ((STREAM), ",%lu,1\n", (unsigned long)(SIZE));		   \
+} while (0)
+
+#define ASM_OUTPUT_BSS(FILE, DECL, NAME, SIZE, ROUNDED)			\
+  asm_output_bss ((FILE), (DECL), (NAME), (SIZE), (ROUNDED))
+
+#define ASM_OUTPUT_LOCAL(STREAM, NAME, SIZE, ROUNDED)			\
+do {									\
+     fputs ("\t.lcomm ", (STREAM));					\
+     assemble_name ((STREAM), (NAME));					\
+     fprintf ((STREAM), ",%d\n", (int)(SIZE));				\
+} while (0)
+
+#undef TYPE_ASM_OP
+#undef SIZE_ASM_OP
+#undef WEAK_ASM_OP
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+#define WEAK_ASM_OP	"\t.weak\t"
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+
+#undef TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+avr_asm_declare_function_name ((FILE), (NAME), (DECL))
+
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do {									\
+    if (!flag_inhibit_size_directive)					\
+      ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);				\
+  } while (0)
+
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
+do {									\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+  size_directive_output = 0;						\
+  if (!flag_inhibit_size_directive && DECL_SIZE (DECL))			\
+    {									\
+      size_directive_output = 1;					\
+      ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME,				\
+				 int_size_in_bytes (TREE_TYPE (DECL)));	\
+    }									\
+  ASM_OUTPUT_LABEL(FILE, NAME);						\
+} while (0)
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
+do {									 \
+     const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
+     HOST_WIDE_INT size;						 \
+     if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		 \
+         && ! AT_END && TOP_LEVEL					 \
+	 && DECL_INITIAL (DECL) == error_mark_node			 \
+	 && !size_directive_output)					 \
+       {								 \
+	 size_directive_output = 1;					 \
+	 size = int_size_in_bytes (TREE_TYPE (DECL));			 \
+	 ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			 \
+       }								 \
+   } while (0)
+
+
+#define ESCAPES \
+"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
+/* A table of bytes codes used by the ASM_OUTPUT_ASCII and
+   ASM_OUTPUT_LIMITED_STRING macros.  Each byte in the table
+   corresponds to a particular byte value [0..255].  For any
+   given byte value, if the value in the corresponding table
+   position is zero, the given character can be output directly.
+   If the table value is 1, the byte must be output as a \ooo
+   octal escape.  If the tables value is anything else, then the
+   byte value should be output as a \ followed by the value
+   in the table.  Note that we can use standard UN*X escape
+   sequences for many control characters, but we don't use
+   \a to represent BEL because some svr4 assemblers (e.g. on
+   the i386) don't know about that.  Also, we don't use \v
+   since some versions of gas, such as 2.2 did not accept it.  */
+
+#define STRING_LIMIT	((unsigned) 64)
+#define STRING_ASM_OP	"\t.string\t"
+/* Some svr4 assemblers have a limit on the number of characters which
+   can appear in the operand of a .string directive.  If your assembler
+   has such a limitation, you should define STRING_LIMIT to reflect that
+   limit.  Note that at least some svr4 assemblers have a limit on the
+   actual number of bytes in the double-quoted string, and that they
+   count each character in an escape sequence as one byte.  Thus, an
+   escape sequence like \377 would count as four bytes.
+
+   If your target assembler doesn't support the .string directive, you
+   should define this to zero.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".global\t"
+
+#define SET_ASM_OP	"\t.set\t"
+
+#define ASM_WEAKEN_LABEL(FILE, NAME)	\
+  do					\
+    {					\
+      fputs ("\t.weak\t", (FILE));	\
+      assemble_name ((FILE), (NAME));	\
+      fputc ('\n', (FILE));		\
+    }					\
+  while (0)
+
+#define SUPPORTS_WEAK 1
+
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)	\
+sprintf (STRING, "*.%s%lu", PREFIX, (unsigned long)(NUM))
+
+#define HAS_INIT_SECTION 1
+
+#define REGISTER_NAMES {				\
+  "r0","r1","r2","r3","r4","r5","r6","r7",		\
+    "r8","r9","r10","r11","r12","r13","r14","r15",	\
+    "r16","r17","r18","r19","r20","r21","r22","r23",	\
+    "r24","r25","r26","r27","r28","r29","r30","r31",	\
+    "__SP_L__","__SP_H__","argL","argH"}
+
+#define FINAL_PRESCAN_INSN(insn, operand, nop) final_prescan_insn (insn, operand,nop)
+
+#define PRINT_OPERAND(STREAM, X, CODE) print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) ((CODE) == '~' || (CODE) == '!')
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) print_operand_address(STREAM, X)
+
+#define USER_LABEL_PREFIX ""
+
+#define ASSEMBLER_DIALECT AVR_HAVE_MOVW
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)	\
+{						\
+  gcc_assert (REGNO < 32);			\
+  fprintf (STREAM, "\tpush\tr%d", REGNO);	\
+}
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO)	\
+{						\
+  gcc_assert (REGNO < 32);			\
+  fprintf (STREAM, "\tpop\tr%d", REGNO);	\
+}
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)		\
+  avr_output_addr_vec_elt(STREAM, VALUE)
+
+#define ASM_OUTPUT_CASE_LABEL(STREAM, PREFIX, NUM, TABLE) \
+  (switch_to_section (progmem_section), \
+   (*targetm.asm_out.internal_label) (STREAM, PREFIX, NUM))
+
+#define ASM_OUTPUT_SKIP(STREAM, N)		\
+fprintf (STREAM, "\t.skip %lu,0\n", (unsigned long)(N))
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)			\
+  do {							\
+      if ((POWER) > 1)					\
+          fprintf (STREAM, "\t.p2align\t%d\n", POWER);	\
+  } while (0)
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  default_elf_asm_output_external (FILE, DECL, NAME)
+
+#define CASE_VECTOR_MODE HImode
+
+#undef WORD_REGISTER_OPERATIONS
+
+#define MOVE_MAX 4
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define Pmode HImode
+
+#define FUNCTION_MODE HImode
+
+#define DOLLARS_IN_IDENTIFIERS 0
+
+#define NO_DOLLAR_IN_LABEL 1
+
+#define TRAMPOLINE_SIZE 4
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) notice_update_cc(EXP, INSN)
+
+/* The add insns don't set overflow in a usable way.  */
+#define CC_OVERFLOW_UNUSABLE 01000
+/* The mov,and,or,xor insns don't set carry.  That's ok though as the
+   Z bit is all we need when doing unsigned comparisons on the result of
+   these insns (since they're always with 0).  However, conditions.h has
+   CC_NO_OVERFLOW defined for this purpose.  Rename it to something more
+   understandable.  */
+#define CC_NO_CARRY CC_NO_OVERFLOW
+
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fprintf (FILE, "/* profiler %d */", (LABELNO))
+
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) (LENGTH =\
+					  adjust_insn_length (INSN, LENGTH))
+
+extern const char *avr_device_to_arch (int argc, const char **argv);
+extern const char *avr_device_to_data_start (int argc, const char **argv);
+extern const char *avr_device_to_startfiles (int argc, const char **argv);
+extern const char *avr_device_to_devicelib (int argc, const char **argv);
+
+#define EXTRA_SPEC_FUNCTIONS \
+  { "device_to_arch", avr_device_to_arch }, \
+  { "device_to_data_start", avr_device_to_data_start }, \
+  { "device_to_startfile", avr_device_to_startfiles }, \
+  { "device_to_devicelib", avr_device_to_devicelib },
+
+#define CPP_SPEC ""
+
+#define CC1_SPEC ""
+
+#define CC1PLUS_SPEC "%{!frtti:-fno-rtti} \
+    %{!fenforce-eh-specs:-fno-enforce-eh-specs} \
+    %{!fexceptions:-fno-exceptions}"
+/* A C string constant that tells the GCC driver program options to
+   pass to `cc1plus'.  */
+
+#define ASM_SPEC "%{mmcu=avr25:-mmcu=avr2;mmcu=avr35:-mmcu=avr3;mmcu=avr31:-mmcu=avr3;mmcu=avr51:-mmcu=avr5;\
+mmcu=*:-mmcu=%*}"
+
+#define LINK_SPEC "\
+%{mrelax:--relax\
+         %{mpmem-wrap-around:%{mmcu=at90usb8*:--pmem-wrap-around=8k}\
+                             %{mmcu=atmega16*:--pmem-wrap-around=16k}\
+                             %{mmcu=atmega32*|\
+                               mmcu=at90can32*:--pmem-wrap-around=32k}\
+                             %{mmcu=atmega64*|\
+                               mmcu=at90can64*|\
+                               mmcu=at90usb64*:--pmem-wrap-around=64k}}}\
+%:device_to_arch(%{mmcu=*:%*})\
+%:device_to_data_start(%{mmcu=*:%*})"
+
+#define LIB_SPEC \
+  "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lc }}}}}"
+
+#define LIBSTDCXX "gcc"
+/* No libstdc++ for now.  Empty string doesn't work.  */
+
+#define LIBGCC_SPEC \
+  "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lgcc }}}}}"
+
+#define STARTFILE_SPEC "%:device_to_startfile(%{mmcu=*:%*})"
+
+#define ENDFILE_SPEC ""
+
+/* This is the default without any -mmcu=* option (AT90S*).  */
+#define MULTILIB_DEFAULTS { "mmcu=avr2" }
+
+#define TEST_HARD_REG_CLASS(CLASS, REGNO) \
+  TEST_HARD_REG_BIT (reg_class_contents[ (int) (CLASS)], REGNO)
+
+/* Note that the other files fail to use these
+   in some of the places where they should.  */
+
+#if defined(__STDC__) || defined(ALMOST_STDC)
+#define AS2(a,b,c) #a " " #b "," #c
+#define AS2C(b,c) " " #b "," #c
+#define AS3(a,b,c,d) #a " " #b "," #c "," #d
+#define AS1(a,b) #a " " #b
+#else
+#define AS1(a,b) "a	b"
+#define AS2(a,b,c) "a	b,c"
+#define AS2C(b,c) " b,c"
+#define AS3(a,b,c,d) "a	b,c,d"
+#endif
+#define OUT_AS1(a,b) output_asm_insn (AS1(a,b), operands)
+#define OUT_AS2(a,b,c) output_asm_insn (AS2(a,b,c), operands)
+#define CR_TAB "\n\t"
+
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+
+#define DWARF2_ADDR_SIZE 4
+
+#define OBJECT_FORMAT_ELF
+
+#define INCOMING_RETURN_ADDR_RTX   avr_incoming_return_addr_rtx ()
+#define INCOMING_FRAME_SP_OFFSET   (AVR_3_BYTE_PC ? 3 : 2)
+
+/* The caller's stack pointer value immediately before the call
+   is one byte below the first argument.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL)  -1
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  avr_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct GTY(()) machine_function
+{
+  /* 'true' - if current function is a naked function.  */
+  int is_naked;
+
+  /* 'true' - if current function is an interrupt function 
+     as specified by the "interrupt" attribute.  */
+  int is_interrupt;
+
+  /* 'true' - if current function is a signal function 
+     as specified by the "signal" attribute.  */
+  int is_signal;
+  
+  /* 'true' - if current function is a 'task' function 
+     as specified by the "OS_task" attribute.  */
+  int is_OS_task;
+
+  /* 'true' - if current function is a 'main' function 
+     as specified by the "OS_main" attribute.  */
+  int is_OS_main;
+  
+  /* Current function stack size.  */
+  int stack_usage;
+};
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
new file mode 100644
index 000000000..1fc6fee57
--- /dev/null
+++ b/gcc/config/avr/avr.md
@@ -0,0 +1,3248 @@
+;;   Machine description for GNU compiler,
+;;   for ATMEL AVR micro controllers.
+;;   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008,
+;;   2009, 2010 Free Software Foundation, Inc.
+;;   Contributed by Denis Chertykov (chertykov@gmail.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Special characters after '%':
+;;  A  No effect (add 0).
+;;  B  Add 1 to REG number, MEM address or CONST_INT.
+;;  C  Add 2.
+;;  D  Add 3.
+;;  j  Branch condition.
+;;  k  Reverse branch condition.
+;;..m..Constant Direct Data memory address.
+;;  o  Displacement for (mem (plus (reg) (const_int))) operands.
+;;  p  POST_INC or PRE_DEC address as a pointer (X, Y, Z)
+;;  r  POST_INC or PRE_DEC address as a register (r26, r28, r30)
+;;..x..Constant Direct Program memory address.
+;;  ~  Output 'r' if not AVR_HAVE_JMP_CALL.
+;;  !  Output 'e' if AVR_HAVE_EIJMP_EICALL.
+
+;; UNSPEC usage:
+;;  0  Length of a string, see "strlenhi".
+;;  1  Jump by register pair Z or by table addressed by Z, see "casesi".
+
+(define_constants
+  [(REG_X	26)
+   (REG_Y	28)
+   (REG_Z	30)
+   (REG_W	24)
+   (REG_SP	32)
+   (TMP_REGNO	0)	; temporary register r0
+   (ZERO_REGNO	1)	; zero register r1
+   
+   (SREG_ADDR   0x5F)
+   (RAMPZ_ADDR  0x5B)
+   
+   (UNSPEC_STRLEN	0)
+   (UNSPEC_INDEX_JMP	1)
+   (UNSPEC_SEI		2)
+   (UNSPEC_CLI		3)
+
+   (UNSPECV_PROLOGUE_SAVES	0)
+   (UNSPECV_EPILOGUE_RESTORES	1)
+   (UNSPECV_WRITE_SP_IRQ_ON	2)
+   (UNSPECV_WRITE_SP_IRQ_OFF	3)
+   (UNSPECV_GOTO_RECEIVER	4)])
+
+(include "predicates.md")
+(include "constraints.md")
+  
+;; Condition code settings.
+(define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber"
+  (const_string "none"))
+
+(define_attr "type" "branch,branch1,arith,xcall"
+  (const_string "arith"))
+
+(define_attr "mcu_have_movw" "yes,no"
+  (const (if_then_else (symbol_ref "AVR_HAVE_MOVW")
+		       (const_string "yes")
+		       (const_string "no"))))
+
+(define_attr "mcu_mega" "yes,no"
+  (const (if_then_else (symbol_ref "AVR_HAVE_JMP_CALL")
+		       (const_string "yes")
+		       (const_string "no"))))
+  
+
+;; The size of instructions in bytes.
+;; XXX may depend from "cc"
+
+(define_attr "length" ""
+  (cond [(eq_attr "type" "branch")
+         (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                (const_int -63))
+                            (le (minus (pc) (match_dup 0))
+                                (const_int 62)))
+                       (const_int 1)
+                       (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                              (const_int -2045))
+                                          (le (minus (pc) (match_dup 0))
+                                              (const_int 2045)))
+                                     (const_int 2)
+                                     (const_int 3)))
+         (eq_attr "type" "branch1")
+         (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                (const_int -62))
+                            (le (minus (pc) (match_dup 0))
+                                (const_int 61)))
+                       (const_int 2)
+                       (if_then_else (and (ge (minus (pc) (match_dup 0))
+                                              (const_int -2044))
+                                          (le (minus (pc) (match_dup 0))
+                                              (const_int 2043)))
+                                     (const_int 3)
+                                     (const_int 4)))
+	 (eq_attr "type" "xcall")
+	 (if_then_else (eq_attr "mcu_mega" "no")
+		       (const_int 1)
+		       (const_int 2))]
+        (const_int 2)))
+
+;; Define mode iterator
+(define_mode_iterator QISI [(QI "") (HI "") (SI "")])
+(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
+(define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
+(define_mode_iterator HISI [(HI "") (SI "")])
+
+;;========================================================================
+;; The following is used by nonlocal_goto and setjmp.
+;; The receiver pattern will create no instructions since internally
+;; virtual_stack_vars = hard_frame_pointer + 1 so the RTL become R28=R28
+;; This avoids creating add/sub offsets in frame_pointer save/resore.
+;; The 'null' receiver also avoids  problems with optimisation
+;; not recognising incoming jmp and removing code that resets frame_pointer.
+;; The code derived from builtins.c.
+
+(define_expand "nonlocal_goto_receiver"
+  [(set (reg:HI REG_Y) 
+	(unspec_volatile:HI [(const_int 0)] UNSPECV_GOTO_RECEIVER))]
+  ""
+  {
+    emit_move_insn (virtual_stack_vars_rtx, 
+		    gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, 
+				  gen_int_mode (STARTING_FRAME_OFFSET,
+						Pmode)));
+  /* This might change the hard frame pointer in ways that aren't
+    apparent to early optimization passes, so force a clobber.  */
+    emit_clobber (hard_frame_pointer_rtx);
+    DONE;
+  })
+  
+
+;; Defining nonlocal_goto_receiver means we must also define this.
+;; even though its function is identical to that in builtins.c
+
+(define_expand "nonlocal_goto"
+  [
+  (use (match_operand 0 "general_operand"))
+  (use (match_operand 1 "general_operand"))
+  (use (match_operand 2 "general_operand"))
+  (use (match_operand 3 "general_operand"))
+  ]
+  ""
+{
+  rtx r_label = copy_to_reg (operands[1]);
+  rtx r_fp = operands[3];
+  rtx r_sp = operands[2];
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  emit_move_insn (hard_frame_pointer_rtx, r_fp);
+  emit_stack_restore (SAVE_NONLOCAL, r_sp);
+
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  emit_indirect_jump (r_label);
+ 
+  DONE;
+})
+
+
+(define_insn "*pushqi"
+  [(set (mem:QI (post_dec:HI (reg:HI REG_SP)))
+        (match_operand:QI 0 "reg_or_0_operand" "r,L"))]
+  ""
+  "@
+	push %0
+	push __zero_reg__"
+  [(set_attr "length" "1,1")])
+
+(define_insn "*pushhi"
+  [(set (mem:HI (post_dec:HI (reg:HI REG_SP)))
+        (match_operand:HI 0 "reg_or_0_operand" "r,L"))]
+  ""
+  "@
+	push %B0\;push %A0
+	push __zero_reg__\;push __zero_reg__"
+  [(set_attr "length" "2,2")])
+
+(define_insn "*pushsi"
+  [(set (mem:SI (post_dec:HI (reg:HI REG_SP)))
+        (match_operand:SI 0 "reg_or_0_operand" "r,L"))]
+  ""
+  "@
+	push %D0\;push %C0\;push %B0\;push %A0
+	push __zero_reg__\;push __zero_reg__\;push __zero_reg__\;push __zero_reg__"
+  [(set_attr "length" "4,4")])
+
+(define_insn "*pushsf"
+  [(set (mem:SF (post_dec:HI (reg:HI REG_SP)))
+        (match_operand:SF 0 "register_operand" "r"))]
+  ""
+  "push %D0
+	push %C0
+	push %B0
+	push %A0"
+  [(set_attr "length" "4")])
+
+;;========================================================================
+;; move byte
+;; The last alternative (any immediate constant to any register) is
+;; very expensive.  It should be optimized by peephole2 if a scratch
+;; register is available, but then that register could just as well be
+;; allocated for the variable we are loading.  But, most of NO_LD_REGS
+;; are call-saved registers, and most of LD_REGS are call-used registers,
+;; so this may still be a win for registers live across function calls.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "/* One of the ops has to be in a register.  */
+   if (!register_operand(operand0, QImode)
+       && ! (register_operand(operand1, QImode) || const0_rtx == operand1))
+       operands[1] = copy_to_mode_reg(QImode, operand1);
+  ")
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,d,Qm,r,q,r,*r")
+	(match_operand:QI 1 "general_operand"       "rL,i,rL,Qm,r,q,i"))]
+  "(register_operand (operands[0],QImode)
+    || register_operand (operands[1], QImode) || const0_rtx == operands[1])"
+  "* return output_movqi (insn, operands, NULL);"
+  [(set_attr "length" "1,1,5,5,1,1,4")
+   (set_attr "cc" "none,none,clobber,clobber,none,none,clobber")])
+
+;; This is used in peephole2 to optimize loading immediate constants
+;; if a scratch register from LD_REGS happens to be available.
+
+(define_insn "*reload_inqi"
+  [(set (match_operand:QI 0 "register_operand" "=l")
+	(match_operand:QI 1 "immediate_operand" "i"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  "ldi %2,lo8(%1)
+	mov %0,%2"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_peephole2
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:QI 0 "l_register_operand" "")
+	(match_operand:QI 1 "immediate_operand" ""))]
+  "(operands[1] != const0_rtx
+    && operands[1] != const1_rtx
+    && operands[1] != constm1_rtx)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (match_dup 2))])]
+  "")
+
+;;============================================================================
+;; move word (16 bit)
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+        (match_operand:HI 1 "general_operand"       ""))]
+  ""
+  "
+{
+   /* One of the ops has to be in a register.  */
+  if (!register_operand(operand0, HImode)
+      && !(register_operand(operand1, HImode) || const0_rtx == operands[1]))
+    {
+      operands[1] = copy_to_mode_reg(HImode, operand1);
+    }
+}")
+
+(define_insn "*movhi_sp"
+  [(set (match_operand:HI 0 "register_operand" "=q,r")
+        (match_operand:HI 1 "register_operand"  "r,q"))]
+  "((stack_register_operand(operands[0], HImode) && register_operand (operands[1], HImode))
+    || (register_operand (operands[0], HImode) && stack_register_operand(operands[1], HImode)))"
+  "* return output_movhi (insn, operands, NULL);"
+  [(set_attr "length" "5,2")
+   (set_attr "cc" "none,none")])
+
+(define_insn "movhi_sp_r_irq_off"
+  [(set (match_operand:HI 0 "stack_register_operand" "=q")
+        (unspec_volatile:HI [(match_operand:HI 1 "register_operand"  "r")] 
+			    UNSPECV_WRITE_SP_IRQ_OFF))]
+  ""
+  "out __SP_H__, %B1
+	out __SP_L__, %A1"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "movhi_sp_r_irq_on"
+  [(set (match_operand:HI 0 "stack_register_operand" "=q")
+        (unspec_volatile:HI [(match_operand:HI 1 "register_operand"  "r")] 
+			    UNSPECV_WRITE_SP_IRQ_ON))]
+  ""
+  "cli
+        out __SP_H__, %B1
+	sei
+	out __SP_L__, %A1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none")])
+
+(define_peephole2
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:HI 0 "l_register_operand" "")
+        (match_operand:HI 1 "immediate_operand" ""))]
+  "(operands[1] != const0_rtx 
+    && operands[1] != constm1_rtx)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; '*' because it is not used in rtl generation, only in above peephole
+(define_insn "*reload_inhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (match_operand:HI 1 "immediate_operand" "i"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  "* return output_reload_inhi (insn, operands, NULL);"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none")])
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,d,*r,q,r")
+        (match_operand:HI 1 "general_operand"       "rL,m,rL,i,i,r,q"))]
+  "(register_operand (operands[0],HImode)
+    || register_operand (operands[1],HImode) || const0_rtx == operands[1])"
+  "* return output_movhi (insn, operands, NULL);"
+  [(set_attr "length" "2,6,7,2,6,5,2")
+   (set_attr "cc" "none,clobber,clobber,none,clobber,none,none")])
+
+(define_peephole2 ; movw
+  [(set (match_operand:QI 0 "even_register_operand" "")
+        (match_operand:QI 1 "even_register_operand" ""))
+   (set (match_operand:QI 2 "odd_register_operand" "")
+        (match_operand:QI 3 "odd_register_operand" ""))]
+  "(AVR_HAVE_MOVW
+    && REGNO (operands[0]) == REGNO (operands[2]) - 1
+    && REGNO (operands[1]) == REGNO (operands[3]) - 1)"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[0]));
+    operands[5] = gen_rtx_REG (HImode, REGNO (operands[1]));
+  })
+
+(define_peephole2 ; movw_r
+  [(set (match_operand:QI 0 "odd_register_operand" "")
+        (match_operand:QI 1 "odd_register_operand" ""))
+   (set (match_operand:QI 2 "even_register_operand" "")
+        (match_operand:QI 3 "even_register_operand" ""))]
+  "(AVR_HAVE_MOVW
+    && REGNO (operands[2]) == REGNO (operands[0]) - 1
+    && REGNO (operands[3]) == REGNO (operands[1]) - 1)"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (HImode, REGNO (operands[2]));
+    operands[5] = gen_rtx_REG (HImode, REGNO (operands[3]));
+  })
+
+;;==========================================================================
+;; move double word (32 bit)
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+        (match_operand:SI 1 "general_operand"  ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand0, SImode)
+      && !(register_operand (operand1, SImode) || const0_rtx == operand1))
+    {
+      operands[1] = copy_to_mode_reg (SImode, operand1);
+    }
+}")
+
+
+
+(define_peephole2 ; movsi_lreg_const
+  [(match_scratch:QI 2 "d")
+   (set (match_operand:SI 0 "l_register_operand" "")
+        (match_operand:SI 1 "immediate_operand" ""))
+   (match_dup 2)]
+  "(operands[1] != const0_rtx
+    && operands[1] != constm1_rtx)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (match_dup 2))])]
+  "")
+
+;; '*' because it is not used in rtl generation.
+(define_insn "*reload_insi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operand:SI 1 "immediate_operand" "i"))
+   (clobber (match_operand:QI 2 "register_operand" "=&d"))]
+  "reload_completed"
+  "* return output_reload_insisf (insn, operands, NULL);"
+  [(set_attr "length" "8")
+   (set_attr "cc" "none")])
+
+
+(define_insn "*movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,Qm,!d,r")
+        (match_operand:SI 1 "general_operand"       "r,L,Qm,rL,i,i"))]
+  "(register_operand (operands[0],SImode)
+    || register_operand (operands[1],SImode) || const0_rtx == operands[1])"
+  "* return output_movsisf (insn, operands, NULL);"
+  [(set_attr "length" "4,4,8,9,4,10")
+   (set_attr "cc" "none,set_zn,clobber,clobber,none,clobber")])
+
+;; fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+;; move floating point numbers (32 bit)
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+        (match_operand:SF 1 "general_operand"  ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, SFmode)
+      && !register_operand (operand0, SFmode))
+    {
+      operands[1] = copy_to_mode_reg (SFmode, operand1);
+    }
+}")
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,Qm,!d,r")
+        (match_operand:SF 1 "general_operand"       "r,G,Qm,r,F,F"))]
+  "register_operand (operands[0], SFmode)
+   || register_operand (operands[1], SFmode)"
+  "* return output_movsisf (insn, operands, NULL);"
+  [(set_attr "length" "4,4,8,9,4,10")
+   (set_attr "cc" "none,set_zn,clobber,clobber,none,clobber")])
+
+;;=========================================================================
+;; move string (like memcpy)
+;; implement as RTL loop
+
+(define_expand "movmemhi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+          (use (match_operand:HI 2 "const_int_operand" ""))
+          (use (match_operand:HI 3 "const_int_operand" ""))])]
+  ""
+  "{
+  int prob;
+  HOST_WIDE_INT count;
+  enum machine_mode mode;
+  rtx label = gen_label_rtx ();
+  rtx loop_reg;
+  rtx jump;
+
+  /* Copy pointers into new psuedos - they will be changed.  */
+  rtx addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  rtx addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+
+  /* Create rtx for tmp register - we use this as scratch.  */
+  rtx tmp_reg_rtx  = gen_rtx_REG (QImode, TMP_REGNO);
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  if (count <= 0)
+    FAIL;
+
+  /* Work out branch probability for latter use.  */
+  prob = REG_BR_PROB_BASE - REG_BR_PROB_BASE / count;
+
+  /* See if constant fit 8 bits.  */
+  mode = (count < 0x100) ? QImode : HImode;
+  /* Create loop counter register.  */
+  loop_reg = copy_to_mode_reg (mode, gen_int_mode (count, mode));
+
+  /* Now create RTL code for move loop.  */
+  /* Label at top of loop.  */
+  emit_label (label);
+
+  /* Move one byte into scratch and inc pointer.  */
+  emit_move_insn (tmp_reg_rtx, gen_rtx_MEM (QImode, addr1));
+  emit_move_insn (addr1, gen_rtx_PLUS (Pmode, addr1, const1_rtx));
+
+  /* Move to mem and inc pointer.  */
+  emit_move_insn (gen_rtx_MEM (QImode, addr0), tmp_reg_rtx);
+  emit_move_insn (addr0, gen_rtx_PLUS (Pmode, addr0, const1_rtx));
+
+  /* Decrement count.  */
+  emit_move_insn (loop_reg, gen_rtx_PLUS (mode, loop_reg, constm1_rtx));
+
+  /* Compare with zero and jump if not equal. */
+  emit_cmp_and_jump_insns (loop_reg, const0_rtx, NE, NULL_RTX, mode, 1,
+                           label);
+  /* Set jump probability based on loop count.  */
+  jump = get_last_insn ();
+  add_reg_note (jump, REG_BR_PROB, GEN_INT (prob));
+  DONE;
+}")
+
+;; =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2
+;; memset (%0, %2, %1)
+
+(define_expand "setmemhi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+ 		   (match_operand 2 "const_int_operand" ""))
+	      (use (match_operand:HI 1 "const_int_operand" ""))
+	      (use (match_operand:HI 3 "const_int_operand" "n"))
+	      (clobber (match_scratch:HI 4 ""))
+	      (clobber (match_dup 5))])]
+  ""
+  "{
+  rtx addr0;
+  int cnt8;
+  enum machine_mode mode;
+
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (GET_CODE (operands[1]) != CONST_INT)
+    FAIL;
+
+  cnt8 = byte_immediate_operand (operands[1], GET_MODE (operands[1]));
+  mode = cnt8 ? QImode : HImode;
+  operands[5] = gen_rtx_SCRATCH (mode);
+  operands[1] = copy_to_mode_reg (mode,
+                                  gen_int_mode (INTVAL (operands[1]), mode));
+  addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  operands[0] = gen_rtx_MEM (BLKmode, addr0);
+}")
+
+(define_insn "*clrmemqi"
+  [(set (mem:BLK (match_operand:HI 0 "register_operand" "e"))
+	(const_int 0))
+   (use (match_operand:QI 1 "register_operand" "r"))
+   (use (match_operand:QI 2 "const_int_operand" "n"))
+   (clobber (match_scratch:HI 3 "=0"))
+   (clobber (match_scratch:QI 4 "=&1"))]
+  ""
+  "st %a0+,__zero_reg__
+        dec %1
+	brne .-6"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*clrmemhi"
+  [(set (mem:BLK (match_operand:HI 0 "register_operand" "e,e"))
+	(const_int 0))
+   (use (match_operand:HI 1 "register_operand" "!w,d"))
+   (use (match_operand:HI 2 "const_int_operand" "n,n"))
+   (clobber (match_scratch:HI 3 "=0,0"))
+   (clobber (match_scratch:HI 4 "=&1,&1"))]
+  ""
+  "*{
+     if (which_alternative==0)
+       return (AS2 (st,%a0+,__zero_reg__) CR_TAB
+	       AS2 (sbiw,%A1,1) CR_TAB
+	       AS1 (brne,.-6));
+     else
+       return (AS2 (st,%a0+,__zero_reg__) CR_TAB
+	       AS2 (subi,%A1,1) CR_TAB
+	       AS2 (sbci,%B1,0) CR_TAB
+	       AS1 (brne,.-8));
+}"
+  [(set_attr "length" "3,4")
+   (set_attr "cc" "clobber,clobber")])
+
+(define_expand "strlenhi"
+    [(set (match_dup 4)
+	  (unspec:HI [(match_operand:BLK 1 "memory_operand" "")
+		      (match_operand:QI 2 "const_int_operand" "")
+		      (match_operand:HI 3 "immediate_operand" "")]
+		     UNSPEC_STRLEN))
+     (set (match_dup 4) (plus:HI (match_dup 4)
+				 (const_int -1)))
+     (set (match_operand:HI 0 "register_operand" "")
+	  (minus:HI (match_dup 4)
+		    (match_dup 5)))]
+   ""
+   "{
+  rtx addr;
+  if (! (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 0))
+    FAIL;
+  addr = copy_to_mode_reg (Pmode, XEXP (operands[1],0));
+  operands[1] = gen_rtx_MEM (BLKmode, addr); 
+  operands[5] = addr;
+  operands[4] = gen_reg_rtx (HImode);
+}")
+
+(define_insn "*strlenhi"
+  [(set (match_operand:HI 0 "register_operand" "=e")
+	(unspec:HI [(mem:BLK (match_operand:HI 1 "register_operand" "%0"))
+		    (const_int 0)
+		    (match_operand:HI 2 "immediate_operand" "i")]
+		   UNSPEC_STRLEN))]
+  ""
+  "ld __tmp_reg__,%a0+
+	tst __tmp_reg__
+	brne .-6"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+; add bytes
+
+(define_insn "addqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r,d,r,r")
+        (plus:QI (match_operand:QI 1 "register_operand" "%0,0,0,0")
+                 (match_operand:QI 2 "nonmemory_operand" "r,i,P,N")))]
+  ""
+  "@
+	add %0,%2
+	subi %0,lo8(-(%2))
+	inc %0
+	dec %0"
+  [(set_attr "length" "1,1,1,1")
+   (set_attr "cc" "set_czn,set_czn,set_zn,set_zn")])
+
+
+(define_expand "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      short tmp = INTVAL (operands[2]);
+      operands[2] = GEN_INT(tmp);
+    }
+}")
+
+
+(define_insn "*addhi3_zero_extend"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (zero_extend:HI
+		  (match_operand:QI 1 "register_operand" "r"))
+		 (match_operand:HI 2 "register_operand" "0")))]
+  ""
+  "add %A0,%1
+	adc %B0,__zero_reg__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addhi3_zero_extend1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0")
+		 (zero_extend:HI
+		  (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "add %A0,%2
+	adc %B0,__zero_reg__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "*addhi3_sp_R_pc2"
+  [(set (match_operand:HI 1 "stack_register_operand" "=q")
+        (plus:HI (match_operand:HI 2 "stack_register_operand" "q")
+                 (match_operand:HI 0 "avr_sp_immediate_operand" "R")))]
+  "AVR_2_BYTE_PC"
+  "*{
+      if (CONST_INT_P (operands[0]))
+        {
+	  switch(INTVAL (operands[0]))
+	    {
+	    case -6: 
+	      return \"rcall .\" CR_TAB 
+	             \"rcall .\" CR_TAB 
+		     \"rcall .\";
+	    case -5: 
+	      return \"rcall .\" CR_TAB 
+	             \"rcall .\" CR_TAB 
+		     \"push __tmp_reg__\";
+	    case -4: 
+	      return \"rcall .\" CR_TAB 
+	             \"rcall .\";
+	    case -3: 
+	      return \"rcall .\" CR_TAB 
+	             \"push __tmp_reg__\";
+	    case -2: 
+	      return \"rcall .\";
+	    case -1: 
+	      return \"push __tmp_reg__\";
+	    case 0: 
+	      return \"\";
+	    case 1: 
+	      return \"pop __tmp_reg__\";
+	    case 2: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\";
+	    case 3: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\";
+	    case 4: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\";
+	    case 5: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\";
+	    }
+        }
+      return \"bug\";
+    }"
+  [(set (attr "length") 
+        (cond [(eq (const_int -6) (symbol_ref "INTVAL (operands[0])")) (const_int 3)
+               (eq (const_int -5) (symbol_ref "INTVAL (operands[0])")) (const_int 3)
+               (eq (const_int -4) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int -3) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int -2) (symbol_ref "INTVAL (operands[0])")) (const_int 1)
+               (eq (const_int -1) (symbol_ref "INTVAL (operands[0])")) (const_int 1)
+               (eq (const_int  0) (symbol_ref "INTVAL (operands[0])")) (const_int 0)
+               (eq (const_int  1) (symbol_ref "INTVAL (operands[0])")) (const_int 1)
+               (eq (const_int  2) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int  3) (symbol_ref "INTVAL (operands[0])")) (const_int 3)
+               (eq (const_int  4) (symbol_ref "INTVAL (operands[0])")) (const_int 4)
+               (eq (const_int  5) (symbol_ref "INTVAL (operands[0])")) (const_int 5)]
+               (const_int 0)))])
+
+(define_insn "*addhi3_sp_R_pc3"
+  [(set (match_operand:HI 1 "stack_register_operand" "=q")
+        (plus:HI (match_operand:HI 2 "stack_register_operand" "q")
+                 (match_operand:QI 0 "avr_sp_immediate_operand" "R")))]
+  "AVR_3_BYTE_PC"
+  "*{
+      if (CONST_INT_P (operands[0]))
+        {
+	  switch(INTVAL (operands[0]))
+	    {
+	    case -6: 
+	      return \"rcall .\" CR_TAB 
+		     \"rcall .\";
+	    case -5: 
+	      return \"rcall .\" CR_TAB 
+	             \"push __tmp_reg__\" CR_TAB 
+		     \"push __tmp_reg__\";
+	    case -4: 
+	      return \"rcall .\" CR_TAB 
+	             \"push __tmp_reg__\";
+	    case -3: 
+	      return \"rcall .\";
+	    case -2: 
+	      return \"push __tmp_reg__\" CR_TAB 
+		     \"push __tmp_reg__\";
+	    case -1: 
+	      return \"push __tmp_reg__\";
+	    case 0: 
+	      return \"\";
+	    case 1: 
+	      return \"pop __tmp_reg__\";
+	    case 2: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\";
+	    case 3: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\";
+	    case 4: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\";
+	    case 5: 
+	      return \"pop __tmp_reg__\" CR_TAB 
+	             \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\" CR_TAB 
+		     \"pop __tmp_reg__\";
+	    }
+        }
+      return \"bug\";
+    }"
+  [(set (attr "length") 
+        (cond [(eq (const_int -6) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int -5) (symbol_ref "INTVAL (operands[0])")) (const_int 3)
+               (eq (const_int -4) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int -3) (symbol_ref "INTVAL (operands[0])")) (const_int 1)
+               (eq (const_int -2) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int -1) (symbol_ref "INTVAL (operands[0])")) (const_int 1)
+               (eq (const_int  0) (symbol_ref "INTVAL (operands[0])")) (const_int 0)
+               (eq (const_int  1) (symbol_ref "INTVAL (operands[0])")) (const_int 1)
+               (eq (const_int  2) (symbol_ref "INTVAL (operands[0])")) (const_int 2)
+               (eq (const_int  3) (symbol_ref "INTVAL (operands[0])")) (const_int 3)
+               (eq (const_int  4) (symbol_ref "INTVAL (operands[0])")) (const_int 4)
+               (eq (const_int  5) (symbol_ref "INTVAL (operands[0])")) (const_int 5)]
+               (const_int 0)))])
+
+(define_insn "*addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,!w,!w,d,r,r")
+ 	(plus:HI
+ 	 (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
+ 	 (match_operand:HI 2 "nonmemory_operand" "r,I,J,i,P,N")))]
+  ""
+  "@
+ 	add %A0,%A2\;adc %B0,%B2
+ 	adiw %A0,%2
+ 	sbiw %A0,%n2
+ 	subi %A0,lo8(-(%2))\;sbci %B0,hi8(-(%2))
+ 	sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__
+ 	sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__"
+  [(set_attr "length" "2,1,1,2,3,3")
+   (set_attr "cc" "set_n,set_czn,set_czn,set_czn,set_n,set_n")])
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,!w,!w,d,r,r")
+	  (plus:SI
+	   (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+	   (match_operand:SI 2 "nonmemory_operand" "r,I,J,i,P,N")))]
+  ""
+  "@
+	add %A0,%A2\;adc %B0,%B2\;adc %C0,%C2\;adc %D0,%D2
+	adiw %0,%2\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__
+	sbiw %0,%n2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__
+	subi %0,lo8(-(%2))\;sbci %B0,hi8(-(%2))\;sbci %C0,hlo8(-(%2))\;sbci %D0,hhi8(-(%2))
+	sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__
+	sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__"
+  [(set_attr "length" "4,3,3,4,5,5")
+   (set_attr "cc" "set_n,set_n,set_czn,set_czn,set_n,set_n")])
+
+(define_insn "*addsi3_zero_extend"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (zero_extend:SI
+		  (match_operand:QI 1 "register_operand" "r"))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  ""
+  "add %A0,%1
+	adc %B0,__zero_reg__
+	adc %C0,__zero_reg__
+	adc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+;-----------------------------------------------------------------------------
+; sub bytes
+(define_insn "subqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r,d")
+        (minus:QI (match_operand:QI 1 "register_operand" "0,0")
+                  (match_operand:QI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	sub %0,%2
+	subi %0,lo8(%2)"
+  [(set_attr "length" "1,1")
+   (set_attr "cc" "set_czn,set_czn")])
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,d")
+        (minus:HI (match_operand:HI 1 "register_operand" "0,0")
+		  (match_operand:HI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	sub %A0,%A2\;sbc %B0,%B2
+	subi %A0,lo8(%2)\;sbci %B0,hi8(%2)"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_czn,set_czn")])
+
+(define_insn "*subhi3_zero_extend1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0")
+		  (zero_extend:HI
+		   (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2
+	sbc %B0,__zero_reg__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,0")
+                 (match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2
+	subi %A0,lo8(%2)\;sbci %B0,hi8(%2)\;sbci %C0,hlo8(%2)\;sbci %D0,hhi8(%2)"
+  [(set_attr "length" "4,4")
+   (set_attr "cc" "set_czn,set_czn")])
+
+(define_insn "*subsi3_zero_extend"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (zero_extend:SI
+		   (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "sub %A0,%2
+	sbc %B0,__zero_reg__
+	sbc %C0,__zero_reg__
+	sbc %D0,__zero_reg__"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+;******************************************************************************
+; mul
+
+(define_expand "mulqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(mult:QI (match_operand:QI 1 "register_operand" "")
+		 (match_operand:QI 2 "register_operand" "")))]
+  ""
+  "{
+  if (!AVR_HAVE_MUL)
+    {
+      emit_insn (gen_mulqi3_call (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*mulqi3_enh"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mult:QI (match_operand:QI 1 "register_operand" "r")
+		 (match_operand:QI 2 "register_operand" "r")))]
+  "AVR_HAVE_MUL"
+  "mul %1,%2
+	mov %0,r0
+	clr r1"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_expand "mulqi3_call"
+  [(set (reg:QI 24) (match_operand:QI 1 "register_operand" ""))
+   (set (reg:QI 22) (match_operand:QI 2 "register_operand" ""))
+   (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
+	      (clobber (reg:QI 22))])
+   (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))]
+  ""
+  "")
+
+(define_insn "*mulqi3_call"
+  [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
+   (clobber (reg:QI 22))]
+  "!AVR_HAVE_MUL"
+  "%~call __mulqi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+		 (sign_extend:HI (match_operand:QI 2 "register_operand" "d"))))]
+  "AVR_HAVE_MUL"
+  "muls %1,%2
+	movw %0,r0
+	clr r1"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+		 (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %1,%2
+	movw %0,r0
+	clr r1"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "register_operand" "")))]
+  ""
+  "
+{
+  if (!AVR_HAVE_MUL)
+    {
+      emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*mulhi3_enh"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(mult:HI (match_operand:HI 1 "register_operand" "r")
+		 (match_operand:HI 2 "register_operand" "r")))]
+  "AVR_HAVE_MUL"
+  "mul %A1,%A2
+	movw %0,r0
+	mul %A1,%B2
+	add %B0,r0
+	mul %B1,%A2
+	add %B0,r0
+	clr r1"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_expand "mulhi3_call"
+  [(set (reg:HI 24) (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 22) (match_operand:HI 2 "register_operand" ""))
+   (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
+	      (clobber (reg:HI 22))
+	      (clobber (reg:QI 21))])
+   (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))]
+  ""
+  "")
+
+(define_insn "*mulhi3_call"
+  [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
+   (clobber (reg:HI 22))
+   (clobber (reg:QI 21))]
+  "!AVR_HAVE_MUL"
+  "%~call __mulhi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
+;; All call-used registers clobbered otherwise - normal library call.
+(define_expand "mulsi3"
+  [(set (reg:SI 22) (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 18) (match_operand:SI 2 "register_operand" ""))
+   (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
+	      (clobber (reg:HI 26))
+	      (clobber (reg:HI 30))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))]
+  "AVR_HAVE_MUL"
+  "")
+
+(define_insn "*mulsi3_call"
+  [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  "AVR_HAVE_MUL"
+  "%~call __mulsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
+; divmod
+
+;; Generate libgcc.S calls ourselves, because:
+;;  - we know exactly which registers are clobbered (for QI and HI
+;;    modes, some of the call-used registers are preserved)
+;;  - we get both the quotient and the remainder at no extra cost
+;;  - we split the patterns only after the first CSE passes because
+;;    CSE has problems to operate on hard regs.
+;; 
+(define_insn_and_split "divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "") 
+                   (div:QI (match_operand:QI 1 "pseudo_register_operand" "") 
+                           (match_operand:QI 2 "pseudo_register_operand" "")))
+              (set (match_operand:QI 3 "pseudo_register_operand" "") 
+                   (mod:QI (match_dup 1) (match_dup 2)))
+              (clobber (reg:QI 22)) 
+              (clobber (reg:QI 23)) 
+              (clobber (reg:QI 24)) 
+              (clobber (reg:QI 25))])]
+  ""
+  "this divmodqi4 pattern should have been splitted;"
+  ""
+  [(set (reg:QI 24) (match_dup 1))
+   (set (reg:QI 22) (match_dup 2))
+   (parallel [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
+	      (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22)))
+	      (clobber (reg:QI 22))
+	      (clobber (reg:QI 23))])
+   (set (match_dup 0) (reg:QI 24))
+   (set (match_dup 3) (reg:QI 25))]
+  "")
+
+(define_insn "*divmodqi4_call"
+  [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
+   (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22)))
+   (clobber (reg:QI 22))
+   (clobber (reg:QI 23))]
+  ""
+  "%~call __divmodqi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodqi4"
+ [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "") 
+                  (udiv:QI (match_operand:QI 1 "pseudo_register_operand" "") 
+                           (match_operand:QI 2 "pseudo_register_operand" "")))
+	     (set (match_operand:QI 3 "pseudo_register_operand" "") 
+                  (umod:QI (match_dup 1) (match_dup 2)))
+             (clobber (reg:QI 22))
+             (clobber (reg:QI 23))
+             (clobber (reg:QI 24))
+             (clobber (reg:QI 25))])]
+  ""
+  "this udivmodqi4 pattern should have been splitted;"
+  "" 
+  [(set (reg:QI 24) (match_dup 1))
+   (set (reg:QI 22) (match_dup 2))
+   (parallel [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
+	      (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22)))
+	      (clobber (reg:QI 23))])
+   (set (match_dup 0) (reg:QI 24))
+   (set (match_dup 3) (reg:QI 25))]
+  "")
+
+(define_insn "*udivmodqi4_call"
+  [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
+   (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22)))
+   (clobber (reg:QI 23))]
+  ""
+  "%~call __udivmodqi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "divmodhi4"
+  [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "") 
+                   (div:HI (match_operand:HI 1 "pseudo_register_operand" "") 
+                           (match_operand:HI 2 "pseudo_register_operand" "")))
+              (set (match_operand:HI 3 "pseudo_register_operand" "") 
+                   (mod:HI (match_dup 1) (match_dup 2)))
+              (clobber (reg:QI 21))
+              (clobber (reg:HI 22))
+              (clobber (reg:HI 24))
+              (clobber (reg:HI 26))])]
+  ""
+  "this should have been splitted;"
+  ""
+  [(set (reg:HI 24) (match_dup 1))
+   (set (reg:HI 22) (match_dup 2))
+   (parallel [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
+	      (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22)))
+	      (clobber (reg:HI 26))
+	      (clobber (reg:QI 21))])
+   (set (match_dup 0) (reg:HI 22))
+   (set (match_dup 3) (reg:HI 24))]
+  "") 
+
+(define_insn "*divmodhi4_call"
+  [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
+   (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22)))
+   (clobber (reg:HI 26))
+   (clobber (reg:QI 21))]
+  ""
+  "%~call __divmodhi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodhi4"
+  [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "") 
+                   (udiv:HI (match_operand:HI 1 "pseudo_register_operand" "")
+                            (match_operand:HI 2 "pseudo_register_operand" "")))
+	      (set (match_operand:HI 3 "pseudo_register_operand" "") 
+                   (umod:HI (match_dup 1) (match_dup 2)))
+              (clobber (reg:QI 21))
+              (clobber (reg:HI 22))
+              (clobber (reg:HI 24))
+              (clobber (reg:HI 26))])]
+  ""
+  "this udivmodhi4 pattern should have been splitted.;"
+  ""
+  [(set (reg:HI 24) (match_dup 1))
+   (set (reg:HI 22) (match_dup 2))
+   (parallel [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
+	      (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22)))
+	      (clobber (reg:HI 26))
+	      (clobber (reg:QI 21))])
+   (set (match_dup 0) (reg:HI 22))
+   (set (match_dup 3) (reg:HI 24))]
+  "")
+
+(define_insn "*udivmodhi4_call"
+  [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
+   (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22)))
+   (clobber (reg:HI 26))
+   (clobber (reg:QI 21))]
+  ""
+  "%~call __udivmodhi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "divmodsi4"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") 
+                   (div:SI (match_operand:SI 1 "pseudo_register_operand" "") 
+                           (match_operand:SI 2 "pseudo_register_operand" "")))
+              (set (match_operand:SI 3 "pseudo_register_operand" "") 
+                   (mod:SI (match_dup 1) (match_dup 2)))
+              (clobber (reg:SI 18))
+              (clobber (reg:SI 22))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])]
+  ""
+  "this divmodsi4 pattern should have been splitted;" 
+  ""
+  [(set (reg:SI 22) (match_dup 1))
+   (set (reg:SI 18) (match_dup 2))
+   (parallel [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
+	      (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18)))
+	      (clobber (reg:HI 26))
+	      (clobber (reg:HI 30))])
+   (set (match_dup 0) (reg:SI 18))
+   (set (match_dup 3) (reg:SI 22))]
+  "")
+
+(define_insn "*divmodsi4_call"
+  [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
+   (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18)))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  ""
+  "%~call __divmodsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "udivmodsi4"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") 
+                   (udiv:SI (match_operand:SI 1 "pseudo_register_operand" "") 
+                           (match_operand:SI 2 "pseudo_register_operand" "")))
+              (set (match_operand:SI 3 "pseudo_register_operand" "") 
+                   (umod:SI (match_dup 1) (match_dup 2)))
+              (clobber (reg:SI 18))
+              (clobber (reg:SI 22))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])]
+  ""
+  "this udivmodsi4 pattern should have been splitted;"
+  ""
+  [(set (reg:SI 22) (match_dup 1))
+   (set (reg:SI 18) (match_dup 2))
+   (parallel [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
+	      (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18)))
+	      (clobber (reg:HI 26))
+	      (clobber (reg:HI 30))])
+   (set (match_dup 0) (reg:SI 18))
+   (set (match_dup 3) (reg:SI 22))]
+  "")
+
+(define_insn "*udivmodsi4_call"
+  [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
+   (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18)))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  ""
+  "%~call __udivmodsi4"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
+; and
+
+(define_insn "andqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r,d")
+        (and:QI (match_operand:QI 1 "register_operand" "%0,0")
+                (match_operand:QI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	and %0,%2
+	andi %0,lo8(%2)"
+  [(set_attr "length" "1,1")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,d,r")
+	  (and:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+		  (match_operand:HI 2 "nonmemory_operand" "r,i,M")))
+   (clobber (match_scratch:QI 3 "=X,X,&d"))]
+  ""
+{
+  if (which_alternative==0)
+    return ("and %A0,%A2" CR_TAB
+	    "and %B0,%B2");
+  else if (which_alternative==1)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        {
+	  int mask = INTVAL (operands[2]);
+	  if ((mask & 0xff) != 0xff)
+	    output_asm_insn (AS2 (andi,%A0,lo8(%2)), operands);
+	  if ((mask & 0xff00) != 0xff00)
+	    output_asm_insn (AS2 (andi,%B0,hi8(%2)), operands);
+	  return "";
+        }
+        return (AS2 (andi,%A0,lo8(%2)) CR_TAB
+	        AS2 (andi,%B0,hi8(%2)));
+     }
+  return (AS2 (ldi,%3,lo8(%2)) CR_TAB
+          "and %A0,%3"         CR_TAB
+          AS1 (clr,%B0));
+}
+  [(set_attr "length" "2,2,3")
+   (set_attr "cc" "set_n,clobber,set_n")])
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,d")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0")
+		(match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  ""
+{
+  if (which_alternative==0)
+    return ("and %0,%2"   CR_TAB
+            "and %B0,%B2" CR_TAB
+            "and %C0,%C2" CR_TAB
+            "and %D0,%D2");
+  else if (which_alternative==1)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        {
+	  HOST_WIDE_INT mask = INTVAL (operands[2]);
+	  if ((mask & 0xff) != 0xff)
+	    output_asm_insn (AS2 (andi,%A0,lo8(%2)), operands);
+	  if ((mask & 0xff00) != 0xff00)
+	    output_asm_insn (AS2 (andi,%B0,hi8(%2)), operands);
+	  if ((mask & 0xff0000L) != 0xff0000L)
+	    output_asm_insn (AS2 (andi,%C0,hlo8(%2)), operands);
+	  if ((mask & 0xff000000L) != 0xff000000L)
+	    output_asm_insn (AS2 (andi,%D0,hhi8(%2)), operands);
+	  return "";
+        }
+      return (AS2 (andi, %A0,lo8(%2))  CR_TAB
+              AS2 (andi, %B0,hi8(%2)) CR_TAB
+	      AS2 (andi, %C0,hlo8(%2)) CR_TAB
+	      AS2 (andi, %D0,hhi8(%2)));
+    }
+  return "bug";
+}
+  [(set_attr "length" "4,4")
+   (set_attr "cc" "set_n,clobber")])
+
+(define_peephole2 ; andi
+  [(set (match_operand:QI 0 "d_register_operand" "")
+        (and:QI (match_dup 0)
+	        (match_operand:QI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+        (and:QI (match_dup 0)
+	        (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  {
+    operands[1] = GEN_INT (INTVAL (operands[1]) & INTVAL (operands[2]));
+  })
+
+;;|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+;; ior
+
+(define_insn "iorqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r,d")
+        (ior:QI (match_operand:QI 1 "register_operand" "%0,0")
+                (match_operand:QI 2 "nonmemory_operand" "r,i")))]
+  ""
+  "@
+	or %0,%2
+	ori %0,lo8(%2)"
+  [(set_attr "length" "1,1")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,d")
+	(ior:HI (match_operand:HI 1 "register_operand" "%0,0")
+		(match_operand:HI 2 "nonmemory_operand" "r,i")))]
+  ""
+{
+  if (which_alternative==0)
+    return ("or %A0,%A2" CR_TAB
+	    "or %B0,%B2");
+  if (GET_CODE (operands[2]) == CONST_INT)
+     {
+	int mask = INTVAL (operands[2]);
+	if (mask & 0xff)
+	  output_asm_insn (AS2 (ori,%A0,lo8(%2)), operands);
+	if (mask & 0xff00)
+	  output_asm_insn (AS2 (ori,%B0,hi8(%2)), operands);
+	return "";
+      }
+   return (AS2 (ori,%0,lo8(%2)) CR_TAB
+	   AS2 (ori,%B0,hi8(%2)));
+}
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_n,clobber")])
+
+(define_insn "*iorhi3_clobber"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%0,0")
+		(match_operand:HI 2 "immediate_operand" "M,i")))
+   (clobber (match_scratch:QI 3 "=&d,&d"))]
+  ""
+  "@
+	ldi %3,lo8(%2)\;or %A0,%3
+	ldi %3,lo8(%2)\;or %A0,%3\;ldi %3,hi8(%2)\;or %B0,%3"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "clobber,set_n")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"        "=r,d")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0")
+		(match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  ""
+{
+  if (which_alternative==0)
+    return ("or %0,%2"   CR_TAB
+	    "or %B0,%B2" CR_TAB
+	    "or %C0,%C2" CR_TAB
+	    "or %D0,%D2");
+  if (GET_CODE (operands[2]) == CONST_INT)
+     {
+	HOST_WIDE_INT mask = INTVAL (operands[2]);
+	if (mask & 0xff)
+	  output_asm_insn (AS2 (ori,%A0,lo8(%2)), operands);
+	if (mask & 0xff00)
+	  output_asm_insn (AS2 (ori,%B0,hi8(%2)), operands);
+	if (mask & 0xff0000L)
+	  output_asm_insn (AS2 (ori,%C0,hlo8(%2)), operands);
+	if (mask & 0xff000000L)
+	  output_asm_insn (AS2 (ori,%D0,hhi8(%2)), operands);
+	return "";
+      }
+  return (AS2 (ori, %A0,lo8(%2))  CR_TAB
+	  AS2 (ori, %B0,hi8(%2)) CR_TAB
+	  AS2 (ori, %C0,hlo8(%2)) CR_TAB
+	  AS2 (ori, %D0,hhi8(%2)));
+}
+  [(set_attr "length" "4,4")
+   (set_attr "cc" "set_n,clobber")])
+
+(define_insn "*iorsi3_clobber"
+  [(set (match_operand:SI 0 "register_operand"        "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0")
+		(match_operand:SI 2 "immediate_operand" "M,i")))
+   (clobber (match_scratch:QI 3 "=&d,&d"))]
+  ""
+  "@
+	ldi %3,lo8(%2)\;or %A0,%3
+	ldi %3,lo8(%2)\;or %A0,%3\;ldi %3,hi8(%2)\;or %B0,%3\;ldi %3,hlo8(%2)\;or %C0,%3\;ldi %3,hhi8(%2)\;or %D0,%3"
+  [(set_attr "length" "2,8")
+   (set_attr "cc" "clobber,set_n")])
+
+;;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+;; xor
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (xor:QI (match_operand:QI 1 "register_operand" "%0")
+                (match_operand:QI 2 "register_operand" "r")))]
+  ""
+  "eor %0,%2"
+  [(set_attr "length" "1")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (xor:HI (match_operand:HI 1 "register_operand" "%0")
+                (match_operand:HI 2 "register_operand" "r")))]
+  ""
+  "eor %0,%2
+	eor %B0,%B2"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (xor:SI (match_operand:SI 1 "register_operand" "%0")
+                (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "eor %0,%2
+	eor %B0,%B2
+	eor %C0,%C2
+	eor %D0,%D2"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+;; swap swap swap swap swap swap swap swap swap swap swap swap swap swap swap
+;; swap
+
+(define_expand "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(rotate:QI (match_operand:QI 1 "register_operand" "")
+		   (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  "
+{
+  if (!CONST_INT_P (operands[2]) || (INTVAL (operands[2]) != 4))
+    FAIL;
+}")
+
+(define_insn "*rotlqi3_4"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0")
+		   (const_int 4)))]
+  ""
+  "swap %0"
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; Split all rotates of HI,SI and DImode registers where rotation is by
+;; a whole number of bytes.  The split creates the appropriate moves and
+;; considers all overlap situations.  DImode is split before reload.
+
+;; HImode does not need scratch.  Use attribute for this constraint.
+;; Use QI scratch for DI mode as this is often split into byte sized operands.
+
+(define_mode_attr rotx [(DI "&r,&r,X") (SI "&r,&r,X") (HI "X,X,X")])
+(define_mode_attr rotsmode [(DI "QI") (SI "HI") (HI "QI")])
+
+(define_expand "rotl<mode>3"
+  [(parallel [(set (match_operand:HIDI 0 "register_operand" "")
+		   (rotate:HIDI (match_operand:HIDI 1 "register_operand" "")
+				(match_operand:VOID 2 "const_int_operand" "")))
+		(clobber (match_operand 3 ""))])]
+  ""
+  {
+    if (CONST_INT_P (operands[2])
+        && 0 == INTVAL (operands[2]) % 8)
+      {
+        if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16)
+          operands[3] = gen_rtx_SCRATCH (<rotsmode>mode);
+        else
+          operands[3] = gen_rtx_SCRATCH (QImode);
+      }
+    else
+      FAIL;
+  })
+
+
+;; Overlapping non-HImode registers often (but not always) need a scratch.
+;; The best we can do is use early clobber alternative "#&r" so that
+;; completely non-overlapping operands dont get a scratch but # so register
+;; allocation does not prefer non-overlapping.
+
+
+; Split word aligned rotates using scratch that is mode dependent.
+(define_insn_and_split "*rotw<mode>"
+  [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
+        (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
+                     (match_operand 2 "const_int_operand" "n,n,n")))
+   (clobber (match_scratch:<rotsmode> 3 "=<rotx>"))]
+  "AVR_HAVE_MOVW
+   && CONST_INT_P (operands[2])
+   && 0 == INTVAL (operands[2]) % 16"
+  "#"
+  "&& (reload_completed || <MODE>mode == DImode)"
+  [(const_int 0)]
+  {
+    avr_rotate_bytes (operands);
+    DONE;
+  })
+
+
+; Split byte aligned rotates using scratch that is always QI mode.
+(define_insn_and_split "*rotb<mode>"
+  [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
+        (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
+                     (match_operand 2 "const_int_operand" "n,n,n")))
+   (clobber (match_scratch:QI 3 "=<rotx>"))]
+  "CONST_INT_P (operands[2])
+   && (8 == INTVAL (operands[2]) % 16
+       || (!AVR_HAVE_MOVW
+           && 0 == INTVAL (operands[2]) % 16))"
+  "#"
+  "&& (reload_completed || <MODE>mode == DImode)"
+  [(const_int 0)]
+  {
+    avr_rotate_bytes (operands);
+    DONE;
+  })
+
+
+;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
+;; arithmetic shift left
+
+(define_expand "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand"            "")
+	(ashift:QI (match_operand:QI 1 "register_operand" "")
+		   (match_operand:QI 2 "general_operand"  "")))]
+  ""
+  "")
+
+(define_split ; ashlqi3_const4
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(ashift:QI (match_dup 0)
+		   (const_int 4)))]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (and:QI (match_dup 0) (const_int -16)))]
+  "")
+
+(define_split ; ashlqi3_const5
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(ashift:QI (match_dup 0)
+		   (const_int 5)))]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 1)))
+   (set (match_dup 0) (and:QI (match_dup 0) (const_int -32)))]
+  "")
+
+(define_split ; ashlqi3_const6
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(ashift:QI (match_dup 0)
+		   (const_int 6)))]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 2)))
+   (set (match_dup 0) (and:QI (match_dup 0) (const_int -64)))]
+  "")
+
+(define_insn "*ashlqi3"
+  [(set (match_operand:QI 0 "register_operand"           "=r,r,r,r,!d,r,r")
+	(ashift:QI (match_operand:QI 1 "register_operand" "0,0,0,0,0,0,0")
+		   (match_operand:QI 2 "general_operand"  "r,L,P,K,n,n,Qm")))]
+  ""
+  "* return ashlqi3_out (insn, operands, NULL);"
+  [(set_attr "length" "5,0,1,2,4,6,9")
+   (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")])
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand"           "=r,r,r,r,r,r,r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0,0,r,0,0,0")
+		   (match_operand:QI 2 "general_operand"  "r,L,P,O,K,n,Qm")))]
+  ""
+  "* return ashlhi3_out (insn, operands, NULL);"
+  [(set_attr "length" "6,0,2,2,4,10,10")
+   (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")])
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"           "=r,r,r,r,r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,0,0,r,0,0,0")
+		   (match_operand:QI 2 "general_operand"  "r,L,P,O,K,n,Qm")))]
+  ""
+  "* return ashlsi3_out (insn, operands, NULL);"
+  [(set_attr "length" "8,0,4,4,8,10,12")
+   (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")])
+
+;; Optimize if a scratch register from LD_REGS happens to be available.
+
+(define_peephole2 ; ashlqi3_l_const4
+  [(set (match_operand:QI 0 "l_register_operand" "")
+	(ashift:QI (match_dup 0)
+		   (const_int 4)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 1) (const_int -16))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_peephole2 ; ashlqi3_l_const5
+  [(set (match_operand:QI 0 "l_register_operand" "")
+	(ashift:QI (match_dup 0)
+		   (const_int 5)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 1)))
+   (set (match_dup 1) (const_int -32))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_peephole2 ; ashlqi3_l_const6
+  [(set (match_operand:QI 0 "l_register_operand" "")
+	(ashift:QI (match_dup 0)
+		   (const_int 6)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (ashift:QI (match_dup 0) (const_int 2)))
+   (set (match_dup 1) (const_int -64))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:HI 0 "register_operand" "")
+	(ashift:HI (match_operand:HI 1 "register_operand" "")
+		   (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0) (ashift:HI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*ashlhi3_const"
+  [(set (match_operand:HI 0 "register_operand"            "=r,r,r,r,r")
+	(ashift:HI (match_operand:HI 1 "register_operand"  "0,0,r,0,0")
+		   (match_operand:QI 2 "const_int_operand" "L,P,O,K,n")))
+   (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))]
+  "reload_completed"
+  "* return ashlhi3_out (insn, operands, NULL);"
+  [(set_attr "length" "0,2,2,4,10")
+   (set_attr "cc" "none,set_n,clobber,set_n,clobber")])
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*ashlsi3_const"
+  [(set (match_operand:SI 0 "register_operand"            "=r,r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r,0")
+		   (match_operand:QI 2 "const_int_operand" "L,P,O,n")))
+   (clobber (match_scratch:QI 3 "=X,X,X,&d"))]
+  "reload_completed"
+  "* return ashlsi3_out (insn, operands, NULL);"
+  [(set_attr "length" "0,4,4,10")
+   (set_attr "cc" "none,set_n,clobber,clobber")])
+
+;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >>
+;; arithmetic shift right
+
+(define_insn "ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0,0,0,0")
+		     (match_operand:QI 2 "general_operand"  "r,L,P,K,n,Qm")))]
+  ""
+  "* return ashrqi3_out (insn, operands, NULL);"
+  [(set_attr "length" "5,0,1,2,5,9")
+   (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber")])
+
+(define_insn "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand"             "=r,r,r,r,r,r,r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0,0,0,r,0,0,0")
+		     (match_operand:QI 2 "general_operand"  "r,L,P,O,K,n,Qm")))]
+  ""
+  "* return ashrhi3_out (insn, operands, NULL);"
+  [(set_attr "length" "6,0,2,4,4,10,10")
+   (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=r,r,r,r,r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0,r,0,0,0")
+		     (match_operand:QI 2 "general_operand"  "r,L,P,O,K,n,Qm")))]
+  ""
+  "* return ashrsi3_out (insn, operands, NULL);"
+  [(set_attr "length" "8,0,4,6,8,10,12")
+   (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")])
+
+;; Optimize if a scratch register from LD_REGS happens to be available.
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:HI 0 "register_operand" "")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "")
+		     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0) (ashiftrt:HI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*ashrhi3_const"
+  [(set (match_operand:HI 0 "register_operand"              "=r,r,r,r,r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand"  "0,0,r,0,0")
+		     (match_operand:QI 2 "const_int_operand" "L,P,O,K,n")))
+   (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))]
+  "reload_completed"
+  "* return ashrhi3_out (insn, operands, NULL);"
+  [(set_attr "length" "0,2,4,4,10")
+   (set_attr "cc" "none,clobber,set_n,clobber,clobber")])
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*ashrsi3_const"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r,0")
+		     (match_operand:QI 2 "const_int_operand" "L,P,O,n")))
+   (clobber (match_scratch:QI 3 "=X,X,X,&d"))]
+  "reload_completed"
+  "* return ashrsi3_out (insn, operands, NULL);"
+  [(set_attr "length" "0,4,4,10")
+   (set_attr "cc" "none,clobber,set_n,clobber")])
+
+;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >>
+;; logical shift right
+
+(define_expand "lshrqi3"
+  [(set (match_operand:QI 0 "register_operand"              "")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "general_operand"  "")))]
+  ""
+  "")
+
+(define_split	; lshrqi3_const4
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(lshiftrt:QI (match_dup 0)
+		     (const_int 4)))]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (and:QI (match_dup 0) (const_int 15)))]
+  "")
+
+(define_split	; lshrqi3_const5
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(lshiftrt:QI (match_dup 0)
+		     (const_int 5)))]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 1)))
+   (set (match_dup 0) (and:QI (match_dup 0) (const_int 7)))]
+  "")
+
+(define_split	; lshrqi3_const6
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(lshiftrt:QI (match_dup 0)
+		     (const_int 6)))]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 2)))
+   (set (match_dup 0) (and:QI (match_dup 0) (const_int 3)))]
+  "")
+
+(define_insn "*lshrqi3"
+  [(set (match_operand:QI 0 "register_operand"             "=r,r,r,r,!d,r,r")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0,0,0,0,0")
+		     (match_operand:QI 2 "general_operand"  "r,L,P,K,n,n,Qm")))]
+  ""
+  "* return lshrqi3_out (insn, operands, NULL);"
+  [(set_attr "length" "5,0,1,2,4,6,9")
+   (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")])
+
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand"             "=r,r,r,r,r,r,r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0,0,0,r,0,0,0")
+		     (match_operand:QI 2 "general_operand"  "r,L,P,O,K,n,Qm")))]
+  ""
+  "* return lshrhi3_out (insn, operands, NULL);"
+  [(set_attr "length" "6,0,2,2,4,10,10")
+   (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=r,r,r,r,r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0,r,0,0,0")
+		     (match_operand:QI 2 "general_operand"  "r,L,P,O,K,n,Qm")))]
+  ""
+  "* return lshrsi3_out (insn, operands, NULL);"
+  [(set_attr "length" "8,0,4,4,8,10,12")
+   (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")])
+
+;; Optimize if a scratch register from LD_REGS happens to be available.
+
+(define_peephole2 ; lshrqi3_l_const4
+  [(set (match_operand:QI 0 "l_register_operand" "")
+	(lshiftrt:QI (match_dup 0)
+		     (const_int 4)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 1) (const_int 15))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_peephole2 ; lshrqi3_l_const5
+  [(set (match_operand:QI 0 "l_register_operand" "")
+	(lshiftrt:QI (match_dup 0)
+		     (const_int 5)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 1)))
+   (set (match_dup 1) (const_int 7))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_peephole2 ; lshrqi3_l_const6
+  [(set (match_operand:QI 0 "l_register_operand" "")
+	(lshiftrt:QI (match_dup 0)
+		     (const_int 6)))
+   (match_scratch:QI 1 "d")]
+  ""
+  [(set (match_dup 0) (rotate:QI (match_dup 0) (const_int 4)))
+   (set (match_dup 0) (lshiftrt:QI (match_dup 0) (const_int 2)))
+   (set (match_dup 1) (const_int 3))
+   (set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:HI 0 "register_operand" "")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "")
+		     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0) (lshiftrt:HI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*lshrhi3_const"
+  [(set (match_operand:HI 0 "register_operand"              "=r,r,r,r,r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand"  "0,0,r,0,0")
+		     (match_operand:QI 2 "const_int_operand" "L,P,O,K,n")))
+   (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))]
+  "reload_completed"
+  "* return lshrhi3_out (insn, operands, NULL);"
+  [(set_attr "length" "0,2,2,4,10")
+   (set_attr "cc" "none,clobber,clobber,clobber,clobber")])
+
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  [(parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*lshrsi3_const"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r,0")
+		     (match_operand:QI 2 "const_int_operand" "L,P,O,n")))
+   (clobber (match_scratch:QI 3 "=X,X,X,&d"))]
+  "reload_completed"
+  "* return lshrsi3_out (insn, operands, NULL);"
+  [(set_attr "length" "0,4,4,10")
+   (set_attr "cc" "none,clobber,clobber,clobber")])
+
+;; abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x)
+;; abs
+
+(define_insn "absqi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (abs:QI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "sbrc %0,7
+	neg %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=d,r")
+        (abs:SF (match_operand:SF 1 "register_operand" "0,0")))]
+  ""
+  "@
+	andi %D0,0x7f
+	clt\;bld %D0,7"
+  [(set_attr "length" "1,2")
+   (set_attr "cc" "set_n,clobber")])
+
+;; 0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x  0 - x
+;; neg
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (neg:QI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "neg %0"
+  [(set_attr "length" "1")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "register_operand"       "=!d,r,&r")
+	(neg:HI (match_operand:HI 1 "register_operand" "0,0,r")))]
+  ""
+  "@
+	com %B0\;neg %A0\;sbci %B0,lo8(-1)
+	com %B0\;neg %A0\;sbc %B0,__zero_reg__\;inc %B0
+	clr %A0\;clr %B0\;sub %A0,%A1\;sbc %B0,%B1"
+  [(set_attr "length" "3,4,4")
+   (set_attr "cc" "set_czn,set_n,set_czn")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand"       "=!d,r,&r")
+	(neg:SI (match_operand:SI 1 "register_operand" "0,0,r")))]
+  ""
+  "@
+	com %D0\;com %C0\;com %B0\;neg %A0\;sbci %B0,lo8(-1)\;sbci %C0,lo8(-1)\;sbci %D0,lo8(-1)
+	com %D0\;com %C0\;com %B0\;com %A0\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__
+	clr %A0\;clr %B0\;{clr %C0\;clr %D0|movw %C0,%A0}\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1\;sbc %D0,%D1"
+  [(set_attr_alternative "length"
+			 [(const_int 7)
+			  (const_int 8)
+			  (if_then_else (eq_attr "mcu_have_movw" "yes")
+					(const_int 7)
+					(const_int 8))])
+   (set_attr "cc" "set_czn,set_n,set_czn")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=d,r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0,0")))]
+  ""
+  "@
+	subi %D0,0x80
+	bst %D0,7\;com %D0\;bld %D0,7\;com %D0"
+  [(set_attr "length" "1,4")
+   (set_attr "cc" "set_n,set_n")])
+
+;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+;; not
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (not:QI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "com %0"
+  [(set_attr "length" "1")
+   (set_attr "cc" "set_czn")])
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (not:HI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "com %0
+	com %B0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_n")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (not:SI (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "com %0
+	com %B0
+	com %C0
+	com %D0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_n")])
+
+;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
+;; sign extend
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))]
+  ""
+  "@
+	clr %B0\;sbrc %0,7\;com %B0
+	mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0"
+  [(set_attr "length" "3,4")
+   (set_attr "cc" "set_n,set_n")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))]
+  ""
+  "@
+	clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
+	mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0"
+  [(set_attr "length" "5,6")
+   (set_attr "cc" "set_n,set_n")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"               "=r,&r")
+        (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))]
+  ""
+  "@
+	clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
+	{mov %A0,%A1\;mov %B0,%B1|movw %A0,%A1}\;clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0"
+  [(set_attr_alternative "length"
+			 [(const_int 4)
+			  (if_then_else (eq_attr "mcu_have_movw" "yes")
+					(const_int 5)
+					(const_int 6))])
+   (set_attr "cc" "set_n,set_n")])
+
+;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
+;; zero extend
+
+(define_insn_and_split "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+{
+  unsigned int low_off = subreg_lowpart_offset (QImode, HImode);
+  unsigned int high_off = subreg_highpart_offset (QImode, HImode);
+
+  operands[2] = simplify_gen_subreg (QImode, operands[0], HImode, low_off);
+  operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, high_off);
+})
+
+(define_insn_and_split "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (zero_extend:HI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+{
+  unsigned int low_off = subreg_lowpart_offset (HImode, SImode);
+  unsigned int high_off = subreg_highpart_offset (HImode, SImode);
+
+  operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off);
+  operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off);
+})
+
+(define_insn_and_split "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+{
+  unsigned int low_off = subreg_lowpart_offset (HImode, SImode);
+  unsigned int high_off = subreg_highpart_offset (HImode, SImode);
+
+  operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off);
+  operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off);
+})
+
+(define_insn_and_split "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+{
+  unsigned int low_off = subreg_lowpart_offset (SImode, DImode);
+  unsigned int high_off = subreg_highpart_offset (SImode, DImode);
+
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off);
+  operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off);
+})
+
+(define_insn_and_split "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+{
+  unsigned int low_off = subreg_lowpart_offset (SImode, DImode);
+  unsigned int high_off = subreg_highpart_offset (SImode, DImode);
+
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off);
+  operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off);
+})
+
+(define_insn_and_split "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 3) (const_int 0))]
+{
+  unsigned int low_off = subreg_lowpart_offset (SImode, DImode);
+  unsigned int high_off = subreg_highpart_offset (SImode, DImode);
+
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off);
+  operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off);
+})
+
+;;<=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=>
+;; compare
+
+; Optimize negated tests into reverse compare if overflow is undefined.
+(define_insn "*negated_tstqi"
+  [(set (cc0)
+        (compare (neg:QI (match_operand:QI 0 "register_operand" "r"))
+		 (const_int 0)))]
+  "(!flag_wrapv && !flag_trapv && flag_strict_overflow)"
+  "cp __zero_reg__,%0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*reversed_tstqi"
+  [(set (cc0)
+        (compare (const_int 0)
+		 (match_operand:QI 0 "register_operand" "r")))]
+  ""
+  "cp __zero_reg__,%0"
+[(set_attr "cc" "compare")
+ (set_attr "length" "2")])
+
+(define_insn "*negated_tsthi"
+  [(set (cc0)
+        (compare (neg:HI (match_operand:HI 0 "register_operand" "r"))
+		 (const_int 0)))]
+  "(!flag_wrapv && !flag_trapv && flag_strict_overflow)"
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0"
+[(set_attr "cc" "compare")
+ (set_attr "length" "2")])
+
+;; Leave here the clobber used by the cmphi pattern for simplicity, even
+;; though it is unused, because this pattern is synthesized by avr_reorg.
+(define_insn "*reversed_tsthi"
+  [(set (cc0)
+        (compare (const_int 0)
+		 (match_operand:HI 0 "register_operand" "r")))
+   (clobber (match_scratch:QI 1 "=X"))]
+  ""
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0"
+[(set_attr "cc" "compare")
+ (set_attr "length" "2")])
+
+(define_insn "*negated_tstsi"
+  [(set (cc0)
+        (compare (neg:SI (match_operand:SI 0 "register_operand" "r"))
+		 (const_int 0)))]
+  "(!flag_wrapv && !flag_trapv && flag_strict_overflow)"
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0
+	cpc __zero_reg__,%C0
+	cpc __zero_reg__,%D0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "4")])
+
+(define_insn "*reversed_tstsi"
+  [(set (cc0)
+        (compare (const_int 0)
+		 (match_operand:SI 0 "register_operand" "r")))
+   (clobber (match_scratch:QI 1 "=X"))]
+  ""
+  "cp __zero_reg__,%A0
+	cpc __zero_reg__,%B0
+	cpc __zero_reg__,%C0
+	cpc __zero_reg__,%D0"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "4")])
+
+
+(define_insn "*cmpqi"
+  [(set (cc0)
+        (compare (match_operand:QI 0 "register_operand"  "r,r,d")
+		 (match_operand:QI 1 "nonmemory_operand" "L,r,i")))]
+  ""
+  "@
+	tst %0
+	cp %0,%1
+	cpi %0,lo8(%1)"
+  [(set_attr "cc" "compare,compare,compare")
+   (set_attr "length" "1,1,1")])
+
+(define_insn "*cmpqi_sign_extend"
+  [(set (cc0)
+        (compare (sign_extend:HI
+		  (match_operand:QI 0 "register_operand"  "d"))
+		 (match_operand:HI 1 "const_int_operand" "n")))]
+  "INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) <= 127"
+  "cpi %0,lo8(%1)"
+  [(set_attr "cc" "compare")
+   (set_attr "length" "1")])
+
+(define_insn "*cmphi"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand"  "!w,r,r,d,d,r,r")
+		 (match_operand:HI 1 "nonmemory_operand" "L,L,r,M,i,M,i")))
+   (clobber (match_scratch:QI 2 "=X,X,X,X,&d,&d,&d"))]
+  ""
+  "*{
+  switch (which_alternative)
+    {
+    case 0: case 1:
+      return out_tsthi (insn, operands[0], NULL);
+
+    case 2:
+      return (AS2 (cp,%A0,%A1) CR_TAB
+              AS2 (cpc,%B0,%B1));
+    case 3:
+      if (reg_unused_after (insn, operands[0])
+          && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 63
+          && test_hard_reg_class (ADDW_REGS, operands[0]))
+        return AS2 (sbiw,%0,%1);
+       else
+        return (AS2 (cpi,%0,%1) CR_TAB
+                AS2 (cpc,%B0,__zero_reg__));
+    case 4:
+      if (reg_unused_after (insn, operands[0]))
+        return (AS2 (subi,%0,lo8(%1))  CR_TAB
+                AS2 (sbci,%B0,hi8(%1)));
+      else
+        return (AS2 (ldi, %2,hi8(%1))  CR_TAB
+	        AS2 (cpi, %A0,lo8(%1)) CR_TAB
+	        AS2 (cpc, %B0,%2));
+   case 5:
+      return (AS2 (ldi, %2,lo8(%1))  CR_TAB
+	      AS2 (cp, %A0,%2) CR_TAB
+	      AS2 (cpc, %B0,__zero_reg__));
+
+   case 6:
+      return (AS2 (ldi, %2,lo8(%1))  CR_TAB
+              AS2 (cp, %A0,%2)       CR_TAB
+              AS2 (ldi, %2,hi8(%1)) CR_TAB
+	      AS2 (cpc, %B0,%2));
+    }
+  return \"bug\";
+}" 
+  [(set_attr "cc" "compare,compare,compare,compare,compare,compare,compare")
+   (set_attr "length" "1,2,2,2,3,3,4")])
+
+
+(define_insn "*cmpsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand"  "r,r,d,d,r,r")
+		 (match_operand:SI 1 "nonmemory_operand" "L,r,M,i,M,i")))
+   (clobber (match_scratch:QI 2 "=X,X,X,&d,&d,&d"))]
+  ""
+  "*{
+  switch (which_alternative)
+    {
+    case 0:
+      return out_tstsi (insn, operands[0], NULL);
+
+    case 1:
+      return (AS2 (cp,%A0,%A1) CR_TAB
+              AS2 (cpc,%B0,%B1) CR_TAB
+	      AS2 (cpc,%C0,%C1) CR_TAB
+	      AS2 (cpc,%D0,%D1));
+    case 2:
+      if (reg_unused_after (insn, operands[0])
+          && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 63
+          && test_hard_reg_class (ADDW_REGS, operands[0]))
+        return (AS2 (sbiw,%0,%1) CR_TAB
+                AS2 (cpc,%C0,__zero_reg__) CR_TAB
+                AS2 (cpc,%D0,__zero_reg__));
+      else
+        return (AS2 (cpi,%A0,lo8(%1))  CR_TAB
+                AS2 (cpc,%B0,__zero_reg__) CR_TAB
+                AS2 (cpc,%C0,__zero_reg__) CR_TAB
+                AS2 (cpc,%D0,__zero_reg__));
+    case 3:
+      if (reg_unused_after (insn, operands[0]))
+        return (AS2 (subi,%A0,lo8(%1))  CR_TAB
+                AS2 (sbci,%B0,hi8(%1))  CR_TAB
+                AS2 (sbci,%C0,hlo8(%1))  CR_TAB
+                AS2 (sbci,%D0,hhi8(%1)));
+      else
+       return (AS2 (cpi, %A0,lo8(%1))   CR_TAB
+	       AS2 (ldi, %2,hi8(%1))  CR_TAB
+	       AS2 (cpc, %B0,%2)       CR_TAB
+	       AS2 (ldi, %2,hlo8(%1))  CR_TAB
+	       AS2 (cpc, %C0,%2)       CR_TAB
+	       AS2 (ldi, %2,hhi8(%1)) CR_TAB
+	       AS2 (cpc, %D0,%2));
+    case 4:
+        return (AS2 (ldi,%2,lo8(%1))        CR_TAB
+                AS2 (cp,%A0,%2)            CR_TAB
+                AS2 (cpc,%B0,__zero_reg__) CR_TAB
+                AS2 (cpc,%C0,__zero_reg__) CR_TAB
+                AS2 (cpc,%D0,__zero_reg__));
+    case 5:
+       return (AS2 (ldi, %2,lo8(%1))   CR_TAB
+               AS2 (cp, %A0,%2)        CR_TAB
+	       AS2 (ldi, %2,hi8(%1))  CR_TAB
+	       AS2 (cpc, %B0,%2)       CR_TAB
+	       AS2 (ldi, %2,hlo8(%1))  CR_TAB
+	       AS2 (cpc, %C0,%2)       CR_TAB
+	       AS2 (ldi, %2,hhi8(%1)) CR_TAB
+	       AS2 (cpc, %D0,%2));
+    }
+  return \"bug\";
+}"
+  [(set_attr "cc" "compare,compare,compare,compare,compare,compare")
+   (set_attr "length" "4,4,4,7,5,8")])
+
+
+;; ----------------------------------------------------------------------
+;; JUMP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+;; Conditional jump instructions
+
+(define_expand "cbranchsi4"
+  [(parallel [(set (cc0)
+	           (compare (match_operand:SI 1 "register_operand" "")
+	                    (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (match_scratch:QI 4 ""))])
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_expand "cbranchhi4"
+  [(parallel [(set (cc0)
+	           (compare (match_operand:HI 1 "register_operand" "")
+	                    (match_operand:HI 2 "nonmemory_operand" "")))
+	      (clobber (match_scratch:QI 4 ""))])
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_expand "cbranchqi4"
+  [(set (cc0)
+        (compare (match_operand:QI 1 "register_operand" "")
+                 (match_operand:QI 2 "nonmemory_operand" "")))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+
+;; Test a single bit in a QI/HI/SImode register.
+;; Combine will create zero extract patterns for single bit tests.
+;; permit any mode in source pattern by using VOIDmode.
+
+(define_insn "*sbrx_branch<mode>"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 0 "eqne_operator"
+			 [(zero_extract:QIDI
+			   (match_operand:VOID 1 "register_operand" "r")
+			   (const_int 1)
+			   (match_operand 2 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  ""
+  "* return avr_out_sbxx_branch (insn, operands);"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+			   (le (minus (pc) (match_dup 3)) (const_int 2046)))
+		      (const_int 2)
+		      (if_then_else (eq_attr "mcu_mega" "no")
+				    (const_int 2)
+				    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Same test based on Bitwise AND RTL. Keep this incase gcc changes patterns.
+;; or for old peepholes.
+;; Fixme - bitwise Mask will not work for DImode
+
+(define_insn "*sbrx_and_branch<mode>"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 0 "eqne_operator"
+			 [(and:QISI
+			   (match_operand:QISI 1 "register_operand" "r")
+			   (match_operand:QISI 2 "single_one_operand" "n"))
+			  (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  ""
+{
+    HOST_WIDE_INT bitnumber;
+    bitnumber = exact_log2 (GET_MODE_MASK (<MODE>mode) & INTVAL (operands[2]));
+    operands[2] = GEN_INT (bitnumber);
+    return avr_out_sbxx_branch (insn, operands);
+}
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+			   (le (minus (pc) (match_dup 3)) (const_int 2046)))
+		      (const_int 2)
+		      (if_then_else (eq_attr "mcu_mega" "no")
+				    (const_int 2)
+				    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Convert sign tests to bit 7/15/31 tests that match the above insns.
+(define_peephole2
+  [(set (cc0) (compare (match_operand:QI 0 "register_operand" "")
+		       (const_int 0)))
+   (set (pc) (if_then_else (ge (cc0) (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  [(set (pc) (if_then_else (eq (zero_extract:HI (match_dup 0)
+						(const_int 1)
+						(const_int 7))
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  "")
+
+(define_peephole2
+  [(set (cc0) (compare (match_operand:QI 0 "register_operand" "")
+		       (const_int 0)))
+   (set (pc) (if_then_else (lt (cc0) (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  [(set (pc) (if_then_else (ne (zero_extract:HI (match_dup 0)
+						(const_int 1)
+						(const_int 7))
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  "")
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "")
+			 	  (const_int 0)))
+	      (clobber (match_operand:HI 2 ""))])
+   (set (pc) (if_then_else (ge (cc0) (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  [(set (pc) (if_then_else (eq (and:HI (match_dup 0) (const_int -32768))
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  "")
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "")
+			 	  (const_int 0)))
+	      (clobber (match_operand:HI 2 ""))])
+   (set (pc) (if_then_else (lt (cc0) (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  [(set (pc) (if_then_else (ne (and:HI (match_dup 0) (const_int -32768))
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  "")
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "")
+			 	  (const_int 0)))
+	      (clobber (match_operand:SI 2 ""))])
+   (set (pc) (if_then_else (ge (cc0) (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  [(set (pc) (if_then_else (eq (and:SI (match_dup 0) (match_dup 2))
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  "operands[2] = GEN_INT (-2147483647 - 1);")
+
+(define_peephole2
+  [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "")
+			 	  (const_int 0)))
+	      (clobber (match_operand:SI 2 ""))])
+   (set (pc) (if_then_else (lt (cc0) (const_int 0))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))]
+  ""
+  [(set (pc) (if_then_else (ne (and:SI (match_dup 0) (match_dup 2))
+			       (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  "operands[2] = GEN_INT (-2147483647 - 1);")
+
+;; ************************************************************************
+;; Implementation of conditional jumps here.
+;;  Compare with 0 (test) jumps
+;; ************************************************************************
+
+(define_insn "branch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "simple_comparison_operator"
+                        [(cc0)
+                         (const_int 0)])
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  ""
+  "*
+   return ret_cond_branch (operands[1], avr_jump_mode (operands[0],insn), 0);"
+  [(set_attr "type" "branch")
+   (set_attr "cc" "clobber")])
+
+;; ****************************************************************
+;; AVR does not have following conditional jumps: LE,LEU,GT,GTU.
+;; Convert them all to proper jumps.
+;; ****************************************************************/
+
+(define_insn "difficult_branch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "difficult_comparison_operator"
+                        [(cc0)
+                         (const_int 0)])
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))]
+  ""
+  "*
+   return ret_cond_branch (operands[1], avr_jump_mode (operands[0],insn), 0);"
+  [(set_attr "type" "branch1")
+   (set_attr "cc" "clobber")])
+
+;; revers branch
+
+(define_insn "rvbranch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "simple_comparison_operator" 
+	                [(cc0)
+                         (const_int 0)])
+                      (pc)
+                      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+   return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1);"
+  [(set_attr "type" "branch1")
+   (set_attr "cc" "clobber")])
+
+(define_insn "difficult_rvbranch"
+  [(set (pc)
+        (if_then_else (match_operator 1 "difficult_comparison_operator" 
+	                [(cc0)
+                         (const_int 0)])
+                      (pc)
+                      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+   return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1);"
+  [(set_attr "type" "branch")
+   (set_attr "cc" "clobber")])
+
+;; **************************************************************************
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+  "*{
+  if (AVR_HAVE_JMP_CALL && get_attr_length (insn) != 1)
+    return AS1 (jmp,%x0);
+  return AS1 (rjmp,%x0);
+}"
+  [(set (attr "length")
+	(if_then_else (match_operand 0 "symbol_ref_operand" "")	
+		(if_then_else (eq_attr "mcu_mega" "no")
+			      (const_int 1)
+			      (const_int 2))
+		(if_then_else (and (ge (minus (pc) (match_dup 0)) (const_int -2047))
+				   (le (minus (pc) (match_dup 0)) (const_int 2047)))
+			      (const_int 1)
+			      (const_int 2))))
+   (set_attr "cc" "none")])
+
+;; call
+
+(define_expand "call"
+  [(call (match_operand:HI 0 "call_insn_operand" "")
+         (match_operand:HI 1 "general_operand" ""))]
+  ;; Operand 1 not used on the AVR.
+  ""
+  "")
+
+;; call value
+
+(define_expand "call_value"
+  [(set (match_operand 0 "register_operand" "")
+        (call (match_operand:HI 1 "call_insn_operand" "")
+              (match_operand:HI 2 "general_operand" "")))]
+  ;; Operand 2 not used on the AVR.
+  ""
+  "")
+
+(define_insn "call_insn"
+  [(call (mem:HI (match_operand:HI 0 "nonmemory_operand" "!z,*r,s,n"))
+         (match_operand:HI 1 "general_operand" "X,X,X,X"))]
+;; We don't need in saving Z register because r30,r31 is a call used registers
+  ;; Operand 1 not used on the AVR.
+  "(register_operand (operands[0], HImode) || CONSTANT_P (operands[0]))"
+  "*{
+  if (which_alternative==0)
+     return \"%!icall\";
+  else if (which_alternative==1)
+    {
+      if (AVR_HAVE_MOVW)
+	return (AS2 (movw, r30, %0) CR_TAB
+               \"%!icall\");
+      else
+	return (AS2 (mov, r30, %A0) CR_TAB
+		AS2 (mov, r31, %B0) CR_TAB
+		\"%!icall\");
+    }
+  else if (which_alternative==2)
+    return AS1(%~call,%x0);
+  return (AS2 (ldi,r30,lo8(%0)) CR_TAB
+          AS2 (ldi,r31,hi8(%0)) CR_TAB
+          \"%!icall\");
+}"
+  [(set_attr "cc" "clobber,clobber,clobber,clobber")
+   (set_attr_alternative "length"
+			 [(const_int 1)
+			  (if_then_else (eq_attr "mcu_have_movw" "yes")
+					(const_int 2)
+					(const_int 3))
+			  (if_then_else (eq_attr "mcu_mega" "yes")
+					(const_int 2)
+					(const_int 1))
+			  (const_int 3)])])
+
+(define_insn "call_value_insn"
+  [(set (match_operand 0 "register_operand" "=r,r,r,r")
+        (call (mem:HI (match_operand:HI 1 "nonmemory_operand" "!z,*r,s,n"))
+;; We don't need in saving Z register because r30,r31 is a call used registers
+              (match_operand:HI 2 "general_operand" "X,X,X,X")))]
+  ;; Operand 2 not used on the AVR.
+  "(register_operand (operands[0], VOIDmode) || CONSTANT_P (operands[0]))"
+  "*{
+  if (which_alternative==0)
+     return \"%!icall\";
+  else if (which_alternative==1)
+    {
+      if (AVR_HAVE_MOVW)
+	return (AS2 (movw, r30, %1) CR_TAB
+		\"%!icall\");
+      else
+	return (AS2 (mov, r30, %A1) CR_TAB
+		AS2 (mov, r31, %B1) CR_TAB
+		\"%!icall\");
+    }
+  else if (which_alternative==2)
+    return AS1(%~call,%x1);
+  return (AS2 (ldi, r30, lo8(%1)) CR_TAB
+          AS2 (ldi, r31, hi8(%1)) CR_TAB
+          \"%!icall\");
+}"
+  [(set_attr "cc" "clobber,clobber,clobber,clobber")
+   (set_attr_alternative "length"
+			 [(const_int 1)
+			  (if_then_else (eq_attr "mcu_have_movw" "yes")
+					(const_int 2)
+					(const_int 3))
+			  (if_then_else (eq_attr "mcu_mega" "yes")
+					(const_int 2)
+					(const_int 1))
+			  (const_int 3)])])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+; indirect jump
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "nonmemory_operand" ""))]
+  ""
+  " if ((!AVR_HAVE_JMP_CALL) && !register_operand(operand0, HImode))
+    {
+      operands[0] = copy_to_mode_reg(HImode, operand0);
+    }"
+)
+
+; indirect jump
+(define_insn "*jcindirect_jump"
+  [(set (pc) (match_operand:HI 0 "immediate_operand" "i"))]
+  ""
+  "@
+  	%~jmp %x0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;;
+(define_insn "*njcindirect_jump"
+  [(set (pc) (match_operand:HI 0 "register_operand" "!z,*r"))]
+  "!AVR_HAVE_EIJMP_EICALL"
+  "@
+	ijmp
+	push %A0\;push %B0\;ret"
+  [(set_attr "length" "1,3")
+   (set_attr "cc" "none,none")])
+
+(define_insn "*indirect_jump_avr6"
+  [(set (pc) (match_operand:HI 0 "register_operand" "z"))]
+  "AVR_HAVE_EIJMP_EICALL"
+  "eijmp"
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; table jump
+
+;; Table made from "rjmp" instructions for <=8K devices.
+(define_insn "*tablejump_rjmp"
+  [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r")]
+			UNSPEC_INDEX_JMP))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  "(!AVR_HAVE_JMP_CALL) && (!AVR_HAVE_EIJMP_EICALL)"
+  "@
+	ijmp
+	push %A0\;push %B0\;ret"
+  [(set_attr "length" "1,3")
+   (set_attr "cc" "none,none")])
+
+;; Not a prologue, but similar idea - move the common piece of code to libgcc.
+(define_insn "*tablejump_lib"
+  [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "z")]
+			UNSPEC_INDEX_JMP))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  "AVR_HAVE_JMP_CALL && TARGET_CALL_PROLOGUES"
+  "%~jmp __tablejump2__"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*tablejump_enh"
+  [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "z")]
+			UNSPEC_INDEX_JMP))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  "AVR_HAVE_JMP_CALL && AVR_HAVE_LPMX"
+  "lsl r30
+	rol r31
+	lpm __tmp_reg__,Z+
+	lpm r31,Z
+	mov r30,__tmp_reg__
+	%!ijmp"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*tablejump"
+  [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "z")]
+			UNSPEC_INDEX_JMP))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  "AVR_HAVE_JMP_CALL && !AVR_HAVE_EIJMP_EICALL"
+  "lsl r30
+	rol r31
+	lpm
+	inc r30
+	push r0
+	lpm
+	push r0
+	ret"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")])
+
+(define_expand "casesi"
+  [(set (match_dup 6)
+	(minus:HI (subreg:HI (match_operand:SI 0 "register_operand" "") 0)
+		  (match_operand:HI 1 "register_operand" "")))
+   (parallel [(set (cc0)
+		   (compare (match_dup 6)
+			    (match_operand:HI 2 "register_operand" "")))
+	      (clobber (match_scratch:QI 9 ""))])
+   
+   (set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+
+   (set (match_dup 6)
+	(plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" ""))))
+
+   (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP))
+	      (use (label_ref (match_dup 3)))
+	      (clobber (match_dup 6))])]
+  ""
+  "
+{
+  operands[6] = gen_reg_rtx (HImode);
+}")
+
+
+;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+;; This instruction sets Z flag
+
+(define_insn "sez"
+  [(set (cc0) (const_int 0))]
+  ""
+  "sez"
+  [(set_attr "length" "1")
+   (set_attr "cc" "compare")])
+
+;; Clear/set/test a single bit in I/O address space.
+
+(define_insn "*cbi"
+  [(set (mem:QI (match_operand 0 "low_io_address_operand" "n"))
+	(and:QI (mem:QI (match_dup 0))
+		(match_operand:QI 1 "single_zero_operand" "n")))]
+  "(optimize > 0)"
+{
+  operands[2] = GEN_INT (exact_log2 (~INTVAL (operands[1]) & 0xff));
+  return AS2 (cbi,%m0-0x20,%2);
+}
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+(define_insn "*sbi"
+  [(set (mem:QI (match_operand 0 "low_io_address_operand" "n"))
+	(ior:QI (mem:QI (match_dup 0))
+		(match_operand:QI 1 "single_one_operand" "n")))]
+  "(optimize > 0)"
+{
+  operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1]) & 0xff));
+  return AS2 (sbi,%m0-0x20,%2);
+}
+  [(set_attr "length" "1")
+   (set_attr "cc" "none")])
+
+;; Lower half of the I/O space - use sbic/sbis directly.
+(define_insn "*sbix_branch"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "eqne_operator"
+			 [(zero_extract:HI
+			   (mem:QI (match_operand 1 "low_io_address_operand" "n"))
+			   (const_int 1)
+			   (match_operand 2 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  "(optimize > 0)"
+  "* return avr_out_sbxx_branch (insn, operands);"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+			   (le (minus (pc) (match_dup 3)) (const_int 2046)))
+		      (const_int 2)
+		      (if_then_else (eq_attr "mcu_mega" "no")
+				    (const_int 2)
+				    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Tests of bit 7 are pessimized to sign tests, so we need this too...
+(define_insn "*sbix_branch_bit7"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "gelt_operator"
+			 [(mem:QI (match_operand 1 "low_io_address_operand" "n"))
+			  (const_int 0)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "(optimize > 0)"
+{
+  operands[3] = operands[2];
+  operands[2] = GEN_INT (7);
+  return avr_out_sbxx_branch (insn, operands);
+}
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046))
+			   (le (minus (pc) (match_dup 2)) (const_int 2046)))
+		      (const_int 2)
+		      (if_then_else (eq_attr "mcu_mega" "no")
+				    (const_int 2)
+				    (const_int 4))))
+   (set_attr "cc" "clobber")])
+
+;; Upper half of the I/O space - read port to __tmp_reg__ and use sbrc/sbrs.
+(define_insn "*sbix_branch_tmp"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "eqne_operator"
+			 [(zero_extract:HI
+			   (mem:QI (match_operand 1 "high_io_address_operand" "n"))
+			   (const_int 1)
+			   (match_operand 2 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  "(optimize > 0)"
+  "* return avr_out_sbxx_branch (insn, operands);"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046))
+			   (le (minus (pc) (match_dup 3)) (const_int 2045)))
+		      (const_int 3)
+		      (if_then_else (eq_attr "mcu_mega" "no")
+				    (const_int 3)
+				    (const_int 5))))
+   (set_attr "cc" "clobber")])
+
+(define_insn "*sbix_branch_tmp_bit7"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "gelt_operator"
+			 [(mem:QI (match_operand 1 "high_io_address_operand" "n"))
+			  (const_int 0)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "(optimize > 0)"
+{
+  operands[3] = operands[2];
+  operands[2] = GEN_INT (7);
+  return avr_out_sbxx_branch (insn, operands);
+}
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046))
+			   (le (minus (pc) (match_dup 2)) (const_int 2045)))
+		      (const_int 3)
+		      (if_then_else (eq_attr "mcu_mega" "no")
+				    (const_int 3)
+				    (const_int 5))))
+   (set_attr "cc" "clobber")])
+
+;; ************************* Peepholes ********************************
+
+(define_peephole
+  [(set (match_operand:SI 0 "d_register_operand" "")
+        (plus:SI (match_dup 0)
+                 (const_int -1)))
+   (parallel
+    [(set (cc0)
+          (compare (match_dup 0)
+		   (const_int -1)))
+     (clobber (match_operand:QI 1 "d_register_operand" ""))])
+   (set (pc)
+	(if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  if (test_hard_reg_class (ADDW_REGS, operands[0]))
+    output_asm_insn (AS2 (sbiw,%0,1) CR_TAB
+		     AS2 (sbc,%C0,__zero_reg__) CR_TAB
+		     AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands);
+  else
+    output_asm_insn (AS2 (subi,%A0,1) CR_TAB
+		     AS2 (sbc,%B0,__zero_reg__) CR_TAB
+		     AS2 (sbc,%C0,__zero_reg__) CR_TAB
+		     AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands);
+  switch (avr_jump_mode (operands[2],insn))
+  {
+    case 1:
+      return AS1 (brcc,%2);
+    case 2:
+      return (AS1 (brcs,.+2) CR_TAB
+              AS1 (rjmp,%2));
+  }
+  return (AS1 (brcs,.+4) CR_TAB
+          AS1 (jmp,%2));
+}")
+
+(define_peephole
+  [(set (match_operand:HI 0 "d_register_operand" "")
+        (plus:HI (match_dup 0)
+                 (const_int -1)))
+   (parallel
+    [(set (cc0)
+          (compare (match_dup 0)
+		   (const_int 65535)))
+     (clobber (match_operand:QI 1 "d_register_operand" ""))])
+   (set (pc)
+	(if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  if (test_hard_reg_class (ADDW_REGS, operands[0]))
+    output_asm_insn (AS2 (sbiw,%0,1), operands);
+  else
+    output_asm_insn (AS2 (subi,%A0,1) CR_TAB
+		     AS2 (sbc,%B0,__zero_reg__) \"\\n\", operands);
+  switch (avr_jump_mode (operands[2],insn))
+  {
+    case 1:
+      return AS1 (brcc,%2);
+    case 2:
+      return (AS1 (brcs,.+2) CR_TAB
+              AS1 (rjmp,%2));
+  }
+  return (AS1 (brcs,.+4) CR_TAB
+          AS1 (jmp,%2));
+}")
+
+(define_peephole
+  [(set (match_operand:QI 0 "d_register_operand" "")
+        (plus:QI (match_dup 0)
+                 (const_int -1)))
+   (set (cc0)
+	(compare (match_dup 0)
+		 (const_int -1)))
+   (set (pc)
+	(if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  cc_status.value1 = operands[0];
+  cc_status.flags |= CC_OVERFLOW_UNUSABLE;
+  output_asm_insn (AS2 (subi,%A0,1), operands);
+  switch (avr_jump_mode (operands[1],insn))
+  {
+    case 1:
+      return AS1 (brcc,%1);
+    case 2:
+      return (AS1 (brcs,.+2) CR_TAB
+              AS1 (rjmp,%1));
+  }
+  return (AS1 (brcs,.+4) CR_TAB
+          AS1 (jmp,%1));
+}")
+
+(define_peephole
+  [(set (cc0)
+	(compare (match_operand:QI 0 "register_operand" "")
+		 (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (cc0) (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "jump_over_one_insn_p (insn, operands[1])"
+  "cpse %0,__zero_reg__")
+
+(define_peephole
+  [(set (cc0)
+        (compare (match_operand:QI 0 "register_operand" "")
+		 (match_operand:QI 1 "register_operand" "")))
+   (set (pc)
+	(if_then_else (eq (cc0) (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "jump_over_one_insn_p (insn, operands[2])"
+  "cpse %0,%1")
+
+;;pppppppppppppppppppppppppppppppppppppppppppppppppppp
+;;prologue/epilogue support instructions
+
+(define_insn "popqi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (mem:QI (pre_inc:HI (reg:HI REG_SP))))]
+  ""
+  "pop %0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+;; Enable Interrupts
+(define_insn "enable_interrupt"
+  [(unspec [(const_int 0)] UNSPEC_SEI)]
+  ""
+  "sei"
+  [(set_attr "length" "1")
+  (set_attr "cc" "none")
+  ])
+
+;; Disable Interrupts
+(define_insn "disable_interrupt"
+  [(unspec [(const_int 0)] UNSPEC_CLI)]
+  ""
+  "cli"
+  [(set_attr "length" "1")
+  (set_attr "cc" "none")
+  ])
+
+;;  Library prologue saves
+(define_insn "call_prologue_saves"
+  [(unspec_volatile:HI [(const_int 0)] UNSPECV_PROLOGUE_SAVES)
+   (match_operand:HI 0 "immediate_operand" "")
+   (set (reg:HI REG_SP) (minus:HI 
+                           (reg:HI REG_SP)
+                           (match_operand:HI 1 "immediate_operand" "")))
+   (use (reg:HI REG_X))
+   (clobber (reg:HI REG_Z))]
+  ""
+  "ldi r30,lo8(gs(1f))
+	ldi r31,hi8(gs(1f))
+	%~jmp __prologue_saves__+((18 - %0) * 2)
+1:"
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "mcu_mega" "yes")
+					(const_int 6)
+					(const_int 5))])
+  (set_attr "cc" "clobber")
+  ])
+  
+;  epilogue  restores using library
+(define_insn "epilogue_restores"
+  [(unspec_volatile:QI [(const_int 0)] UNSPECV_EPILOGUE_RESTORES)
+   (set (reg:HI REG_Y ) (plus:HI 
+                           (reg:HI REG_Y)
+                           (match_operand:HI 0 "immediate_operand" ""))) 
+   (set (reg:HI REG_SP) (reg:HI REG_Y))
+   (clobber  (reg:QI REG_Z))]
+  ""
+  "ldi r30, lo8(%0)
+	%~jmp __epilogue_restores__ + ((18 - %0) * 2)"
+  [(set_attr_alternative "length"
+			 [(if_then_else (eq_attr "mcu_mega" "yes")
+					(const_int 3)
+					(const_int 2))])
+  (set_attr "cc" "clobber")
+  ])
+  
+; return
+(define_insn "return"
+  [(return)]
+  "reload_completed && avr_simple_epilogue ()"
+  "ret"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+(define_insn "return_from_epilogue"
+  [(return)]
+  "(reload_completed 
+    && cfun->machine 
+    && !(cfun->machine->is_interrupt || cfun->machine->is_signal)
+    && !cfun->machine->is_naked)"
+  "ret"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+(define_insn "return_from_interrupt_epilogue"
+  [(return)]
+  "(reload_completed 
+    && cfun->machine 
+    && (cfun->machine->is_interrupt || cfun->machine->is_signal)
+    && !cfun->machine->is_naked)"
+  "reti"
+  [(set_attr "cc" "none")
+   (set_attr "length" "1")])
+
+(define_insn "return_from_naked_epilogue"
+  [(return)]
+  "(reload_completed 
+    && cfun->machine 
+    && cfun->machine->is_naked)"
+  ""
+  [(set_attr "cc" "none")
+   (set_attr "length" "0")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "
+  {
+    expand_prologue (); 
+    DONE;
+  }")
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  "
+  {
+    expand_epilogue (); 
+    DONE;
+  }")
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
new file mode 100644
index 000000000..d9c3c0f27
--- /dev/null
+++ b/gcc/config/avr/avr.opt
@@ -0,0 +1,60 @@
+; Options for the ATMEL AVR port of the compiler.
+
+; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mcall-prologues
+Target Report Mask(CALL_PROLOGUES)
+Use subroutines for function prologues and epilogues
+
+mmcu=
+Target RejectNegative Joined Var(avr_mcu_name) Init("avr2")
+-mmcu=MCU	Select the target MCU
+
+mdeb
+Target Report Undocumented Mask(ALL_DEBUG)
+
+mint8
+Target Report Mask(INT8)
+Use an 8-bit 'int' type
+
+mno-interrupts
+Target Report RejectNegative Mask(NO_INTERRUPTS)
+Change the stack pointer without disabling interrupts
+
+morder1
+Target Report Undocumented Mask(ORDER_1)
+
+morder2
+Target Report Undocumented Mask(ORDER_2)
+
+mshort-calls
+Target Report Mask(SHORT_CALLS)
+Use rjmp/rcall (limited range) on >8K devices
+
+mtiny-stack
+Target Report Mask(TINY_STACK)
+Change only the low 8 bits of the stack pointer
+
+mrelax
+Target Report
+Relax branches
+
+mpmem-wrap-around
+Target Report
+Make the linker relaxation machine assume that a program counter wrap-around occurs.
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
new file mode 100644
index 000000000..2ac8833bd
--- /dev/null
+++ b/gcc/config/avr/constraints.md
@@ -0,0 +1,109 @@
+;; Constraint definitions for ATMEL AVR micro controllers.
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+(define_register_constraint "t" "R0_REG"
+  "Temporary register r0")
+
+(define_register_constraint "b" "BASE_POINTER_REGS"
+  "Base pointer registers (r28--r31)")
+
+(define_register_constraint "e" "POINTER_REGS"
+  "Pointer registers (r26--r31)")
+
+(define_register_constraint "w" "ADDW_REGS"
+  "Registers from r24 to r31.  These registers
+   can be used in @samp{adiw} command.")
+
+(define_register_constraint "d" "LD_REGS"
+  "Registers from r16 to r31.")
+
+(define_register_constraint "l" "NO_LD_REGS"
+  "Registers from r0 to r15.")
+
+(define_register_constraint "a" "SIMPLE_LD_REGS"
+  "Registers from r16 to r23.")
+
+(define_register_constraint "x" "POINTER_X_REGS"
+  "Register pair X (r27:r26).")
+
+(define_register_constraint "y" "POINTER_Y_REGS"
+  "Register pair Y (r29:r28).")
+
+(define_register_constraint "z" "POINTER_Z_REGS"
+  "Register pair Z (r31:r30).")
+
+(define_register_constraint "q" "STACK_REG"
+  "Stack pointer register (SPH:SPL).")
+
+(define_constraint "I"
+  "Integer constant in the range 0 @dots{} 63."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 63")))
+
+(define_constraint "J"
+  "Integer constant in the range -63 @dots{} 0."
+  (and (match_code "const_int")
+       (match_test "ival <= 0 && ival >= -63")))
+
+(define_constraint "K"
+  "Integer constant 2."
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "L"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "M"
+  "Integer constant in the range 0 @dots{} 0xff."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 0xff")))
+
+(define_constraint "N"
+  "Constant integer @minus{}1."
+  (and (match_code "const_int")
+       (match_test "ival == -1")))
+
+(define_constraint "O"
+  "Constant integer 8, 16, or 24."
+  (and (match_code "const_int")
+       (match_test "ival == 8 || ival == 16 || ival == 24")))
+
+(define_constraint "P"
+  "Constant integer 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "G"
+  "Constant float 0."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (SFmode)")))
+
+(define_constraint "R"
+  "Integer constant in the range -6 @dots{} 5."
+  (and (match_code "const_int")
+       (match_test "ival >= -6 && ival <= 5")))
+       
+(define_memory_constraint "Q"
+  "A memory address based on Y or Z pointer with displacement."
+  (and (match_code "mem")
+       (match_test "extra_constraint_Q (op)")))
diff --git a/gcc/config/avr/driver-avr.c b/gcc/config/avr/driver-avr.c
new file mode 100755
index 000000000..6ab0bb822
--- /dev/null
+++ b/gcc/config/avr/driver-avr.c
@@ -0,0 +1,114 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Anatoly Sokolov <aesok@post.ru>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* Current architecture.  */
+const struct base_arch_s *avr_current_arch = NULL;
+
+/* Current device.  */
+const struct mcu_type_s *avr_current_device = NULL;
+
+/* Initialize avr_current_arch and avr_current_device variables.  */
+
+static void
+avr_set_current_device (const char *name)
+{
+ 
+ if (NULL != avr_current_arch)
+   return;
+ 
+  for (avr_current_device = avr_mcu_types; avr_current_device->name;
+       avr_current_device++)
+    {
+      if (strcmp (avr_current_device->name, name) == 0)
+        break;
+    }
+
+  avr_current_arch = &avr_arch_types[avr_current_device->arch];
+}
+
+/* Returns command line parameters that describe the device architecture.  */
+
+const char *
+avr_device_to_arch (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("-m ", avr_current_arch->arch_name, NULL);
+}
+
+/* Returns command line parameters that describe start of date section.  */
+
+const char *
+avr_device_to_data_start (int argc, const char **argv)
+{
+  unsigned long data_section_start;
+  char data_section_start_str[16];
+
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+  
+  if (avr_current_device->data_section_start 
+      == avr_current_arch->default_data_section_start)
+    return NULL;
+    
+  data_section_start = 0x800000 + avr_current_device->data_section_start;
+  
+  snprintf (data_section_start_str, sizeof(data_section_start_str) - 1,
+            "0x%lX", data_section_start);
+  
+  return concat ("-Tdata ", data_section_start_str, NULL);    
+}
+
+/* Returns command line parameters that describe the device startfile.  */
+
+const char *
+avr_device_to_startfiles (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("crt", avr_current_device->library_name, ".o%s", NULL);
+}
+
+/* Returns command line parameters that describe the device library.  */
+
+const char *
+avr_device_to_devicelib (int argc, const char **argv)
+{
+  if (0 == argc)
+    return NULL;
+
+  avr_set_current_device (argv[0]);
+
+  return concat ("-l", avr_current_device->library_name, NULL);
+}
+
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
new file mode 100644
index 000000000..ac8e5cd94
--- /dev/null
+++ b/gcc/config/avr/libgcc.S
@@ -0,0 +1,901 @@
+/*  -*- Mode: Asm -*-  */
+/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+   Contributed by Denis Chertykov <chertykov@gmail.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define __zero_reg__ r1
+#define __tmp_reg__ r0
+#define __SREG__ 0x3f
+#define __SP_H__ 0x3e
+#define __SP_L__ 0x3d
+#define __RAMPZ__ 0x3B
+#define __EIND__  0x3C
+
+/* Most of the functions here are called directly from avr.md
+   patterns, instead of using the standard libcall mechanisms.
+   This can make better code because GCC knows exactly which
+   of the call-used registers (not all of them) are clobbered.  */
+
+	.section .text.libgcc, "ax", @progbits
+
+	.macro	mov_l  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+	movw	\r_dest, \r_src
+#else
+	mov	\r_dest, \r_src
+#endif
+	.endm
+
+	.macro	mov_h  r_dest, r_src
+#if defined (__AVR_HAVE_MOVW__)
+	; empty
+#else
+	mov	\r_dest, \r_src
+#endif
+	.endm
+
+/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
+#if !defined (__AVR_HAVE_MUL__)
+/*******************************************************
+               Multiplication  8 x 8
+*******************************************************/
+#if defined (L_mulqi3)
+
+#define	r_arg2	r22		/* multiplicand */
+#define	r_arg1 	r24		/* multiplier */
+#define r_res	__tmp_reg__	/* result */
+
+	.global	__mulqi3
+	.func	__mulqi3
+__mulqi3:
+	clr	r_res		; clear result
+__mulqi3_loop:
+	sbrc	r_arg1,0
+	add	r_res,r_arg2
+	add	r_arg2,r_arg2	; shift multiplicand
+	breq	__mulqi3_exit	; while multiplicand != 0
+	lsr	r_arg1		; 
+	brne	__mulqi3_loop	; exit if multiplier = 0
+__mulqi3_exit:	
+	mov	r_arg1,r_res	; result to return register
+	ret
+
+#undef r_arg2  
+#undef r_arg1  
+#undef r_res   
+	
+.endfunc
+#endif 	/* defined (L_mulqi3) */
+
+#if defined (L_mulqihi3)
+	.global	__mulqihi3
+	.func	__mulqihi3
+__mulqihi3:
+	clr	r25
+	sbrc	r24, 7
+	dec	r25
+	clr	r23
+	sbrc	r22, 7
+	dec	r22
+	rjmp	__mulhi3
+	.endfunc
+#endif /* defined (L_mulqihi3) */
+
+#if defined (L_umulqihi3)
+	.global	__umulqihi3
+	.func	__umulqihi3
+__umulqihi3:
+	clr	r25
+	clr	r23
+	rjmp	__mulhi3
+	.endfunc
+#endif /* defined (L_umulqihi3) */
+
+/*******************************************************
+               Multiplication  16 x 16
+*******************************************************/
+#if defined (L_mulhi3)
+#define	r_arg1L	r24		/* multiplier Low */
+#define	r_arg1H	r25		/* multiplier High */
+#define	r_arg2L	r22		/* multiplicand Low */
+#define	r_arg2H	r23		/* multiplicand High */
+#define r_resL	__tmp_reg__	/* result Low */
+#define r_resH  r21		/* result High */
+
+	.global	__mulhi3
+	.func	__mulhi3
+__mulhi3:
+	clr	r_resH		; clear result
+	clr	r_resL		; clear result
+__mulhi3_loop:
+	sbrs	r_arg1L,0
+	rjmp	__mulhi3_skip1
+	add	r_resL,r_arg2L	; result + multiplicand
+	adc	r_resH,r_arg2H
+__mulhi3_skip1:	
+	add	r_arg2L,r_arg2L	; shift multiplicand
+	adc	r_arg2H,r_arg2H
+
+	cp	r_arg2L,__zero_reg__
+	cpc	r_arg2H,__zero_reg__
+	breq	__mulhi3_exit	; while multiplicand != 0
+
+	lsr	r_arg1H		; gets LSB of multiplier
+	ror	r_arg1L
+	sbiw	r_arg1L,0
+	brne	__mulhi3_loop	; exit if multiplier = 0
+__mulhi3_exit:
+	mov	r_arg1H,r_resH	; result to return register
+	mov	r_arg1L,r_resL
+	ret
+
+#undef r_arg1L
+#undef r_arg1H
+#undef r_arg2L
+#undef r_arg2H
+#undef r_resL 	
+#undef r_resH 
+
+.endfunc
+#endif /* defined (L_mulhi3) */
+#endif /* !defined (__AVR_HAVE_MUL__) */
+
+#if defined (L_mulhisi3)
+	.global	__mulhisi3
+	.func	__mulhisi3
+__mulhisi3:
+	mov_l	r18, r24
+	mov_h	r19, r25
+	clr	r24
+	sbrc	r23, 7
+	dec	r24
+	mov	r25, r24
+	clr	r20
+	sbrc	r19, 7
+	dec	r20
+	mov	r21, r20
+	rjmp	__mulsi3
+	.endfunc
+#endif /* defined (L_mulhisi3) */
+
+#if defined (L_umulhisi3)
+	.global	__umulhisi3
+	.func	__umulhisi3
+__umulhisi3:
+	mov_l	r18, r24
+	mov_h	r19, r25
+	clr	r24
+	clr	r25
+	clr	r20
+	clr	r21
+	rjmp	__mulsi3
+	.endfunc
+#endif /* defined (L_umulhisi3) */
+
+#if defined (L_mulsi3)
+/*******************************************************
+               Multiplication  32 x 32
+*******************************************************/
+#define r_arg1L  r22		/* multiplier Low */
+#define r_arg1H  r23
+#define	r_arg1HL r24
+#define	r_arg1HH r25		/* multiplier High */
+
+
+#define	r_arg2L  r18		/* multiplicand Low */
+#define	r_arg2H  r19	
+#define	r_arg2HL r20
+#define	r_arg2HH r21		/* multiplicand High */
+	
+#define r_resL	 r26		/* result Low */
+#define r_resH   r27
+#define r_resHL	 r30
+#define r_resHH  r31		/* result High */
+
+	
+	.global	__mulsi3
+	.func	__mulsi3
+__mulsi3:
+#if defined (__AVR_HAVE_MUL__)
+	mul	r_arg1L, r_arg2L
+	movw	r_resL, r0
+	mul	r_arg1H, r_arg2H
+	movw	r_resHL, r0
+	mul	r_arg1HL, r_arg2L
+	add	r_resHL, r0
+	adc	r_resHH, r1
+	mul	r_arg1L, r_arg2HL
+	add	r_resHL, r0
+	adc	r_resHH, r1
+	mul	r_arg1HH, r_arg2L
+	add	r_resHH, r0
+	mul	r_arg1HL, r_arg2H
+	add	r_resHH, r0
+	mul	r_arg1H, r_arg2HL
+	add	r_resHH, r0
+	mul	r_arg1L, r_arg2HH
+	add	r_resHH, r0
+	clr	r_arg1HH	; use instead of __zero_reg__ to add carry
+	mul	r_arg1H, r_arg2L
+	add	r_resH, r0
+	adc	r_resHL, r1
+	adc	r_resHH, r_arg1HH ; add carry
+	mul	r_arg1L, r_arg2H
+	add	r_resH, r0
+	adc	r_resHL, r1
+	adc	r_resHH, r_arg1HH ; add carry
+	movw	r_arg1L, r_resL
+	movw	r_arg1HL, r_resHL
+	clr	r1		; __zero_reg__ clobbered by "mul"
+	ret
+#else
+	clr	r_resHH		; clear result
+	clr	r_resHL		; clear result
+	clr	r_resH		; clear result
+	clr	r_resL		; clear result
+__mulsi3_loop:
+	sbrs	r_arg1L,0
+	rjmp	__mulsi3_skip1
+	add	r_resL,r_arg2L		; result + multiplicand
+	adc	r_resH,r_arg2H
+	adc	r_resHL,r_arg2HL
+	adc	r_resHH,r_arg2HH
+__mulsi3_skip1:
+	add	r_arg2L,r_arg2L		; shift multiplicand
+	adc	r_arg2H,r_arg2H
+	adc	r_arg2HL,r_arg2HL
+	adc	r_arg2HH,r_arg2HH
+	
+	lsr	r_arg1HH	; gets LSB of multiplier
+	ror	r_arg1HL
+	ror	r_arg1H
+	ror	r_arg1L
+	brne	__mulsi3_loop
+	sbiw	r_arg1HL,0
+	cpc	r_arg1H,r_arg1L
+	brne	__mulsi3_loop		; exit if multiplier = 0
+__mulsi3_exit:
+	mov_h	r_arg1HH,r_resHH	; result to return register
+	mov_l	r_arg1HL,r_resHL
+	mov_h	r_arg1H,r_resH
+	mov_l	r_arg1L,r_resL
+	ret
+#endif /* defined (__AVR_HAVE_MUL__) */
+#undef r_arg1L 
+#undef r_arg1H 
+#undef r_arg1HL
+#undef r_arg1HH
+             
+             
+#undef r_arg2L 
+#undef r_arg2H 
+#undef r_arg2HL
+#undef r_arg2HH
+             
+#undef r_resL  
+#undef r_resH  
+#undef r_resHL 
+#undef r_resHH 
+
+.endfunc
+#endif /* defined (L_mulsi3) */
+	
+/*******************************************************
+       Division 8 / 8 => (result + remainder)
+*******************************************************/
+#define	r_rem	r25	/* remainder */
+#define	r_arg1	r24	/* dividend, quotient */
+#define	r_arg2	r22	/* divisor */
+#define	r_cnt	r23	/* loop count */
+
+#if defined (L_udivmodqi4)
+	.global	__udivmodqi4
+	.func	__udivmodqi4
+__udivmodqi4:
+	sub	r_rem,r_rem	; clear remainder and carry
+	ldi	r_cnt,9		; init loop counter
+	rjmp	__udivmodqi4_ep	; jump to entry point
+__udivmodqi4_loop:
+	rol	r_rem		; shift dividend into remainder
+	cp	r_rem,r_arg2	; compare remainder & divisor
+	brcs	__udivmodqi4_ep	; remainder <= divisor
+	sub	r_rem,r_arg2	; restore remainder
+__udivmodqi4_ep:
+	rol	r_arg1		; shift dividend (with CARRY)
+	dec	r_cnt		; decrement loop counter
+	brne	__udivmodqi4_loop
+	com	r_arg1		; complement result 
+				; because C flag was complemented in loop
+	ret
+	.endfunc
+#endif /* defined (L_udivmodqi4) */
+
+#if defined (L_divmodqi4)
+	.global	__divmodqi4
+	.func	__divmodqi4
+__divmodqi4:
+        bst     r_arg1,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1
+        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
+        sbrc	r_arg1,7
+	neg     r_arg1		; dividend negative : negate
+        sbrc	r_arg2,7
+	neg     r_arg2		; divisor negative : negate
+	rcall	__udivmodqi4	; do the unsigned div/mod
+	brtc	__divmodqi4_1
+	neg	r_rem		; correct remainder sign
+__divmodqi4_1:
+	sbrc	__tmp_reg__,7
+	neg	r_arg1		; correct result sign
+__divmodqi4_exit:
+	ret
+	.endfunc
+#endif /* defined (L_divmodqi4) */
+
+#undef r_rem
+#undef r_arg1
+#undef r_arg2
+#undef r_cnt
+	
+		
+/*******************************************************
+       Division 16 / 16 => (result + remainder)
+*******************************************************/
+#define	r_remL	r26	/* remainder Low */
+#define	r_remH	r27	/* remainder High */
+
+/* return: remainder */
+#define	r_arg1L	r24	/* dividend Low */
+#define	r_arg1H	r25	/* dividend High */
+
+/* return: quotient */
+#define	r_arg2L	r22	/* divisor Low */
+#define	r_arg2H	r23	/* divisor High */
+	
+#define	r_cnt	r21	/* loop count */
+
+#if defined (L_udivmodhi4)
+	.global	__udivmodhi4
+	.func	__udivmodhi4
+__udivmodhi4:
+	sub	r_remL,r_remL
+	sub	r_remH,r_remH	; clear remainder and carry
+	ldi	r_cnt,17	; init loop counter
+	rjmp	__udivmodhi4_ep	; jump to entry point
+__udivmodhi4_loop:
+        rol	r_remL		; shift dividend into remainder
+	rol	r_remH
+        cp	r_remL,r_arg2L	; compare remainder & divisor
+	cpc	r_remH,r_arg2H
+        brcs	__udivmodhi4_ep	; remainder < divisor
+        sub	r_remL,r_arg2L	; restore remainder
+        sbc	r_remH,r_arg2H
+__udivmodhi4_ep:
+        rol	r_arg1L		; shift dividend (with CARRY)
+        rol	r_arg1H
+        dec	r_cnt		; decrement loop counter
+        brne	__udivmodhi4_loop
+	com	r_arg1L
+	com	r_arg1H
+; div/mod results to return registers, as for the div() function
+	mov_l	r_arg2L, r_arg1L	; quotient
+	mov_h	r_arg2H, r_arg1H
+	mov_l	r_arg1L, r_remL		; remainder
+	mov_h	r_arg1H, r_remH
+	ret
+	.endfunc
+#endif /* defined (L_udivmodhi4) */
+
+#if defined (L_divmodhi4)
+	.global	__divmodhi4
+	.func	__divmodhi4
+__divmodhi4:
+	.global	_div
+_div:
+        bst     r_arg1H,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1H
+        eor     __tmp_reg__,r_arg2H   ; r0.7 is sign of result
+	rcall	__divmodhi4_neg1 ; dividend negative : negate
+	sbrc	r_arg2H,7
+	rcall	__divmodhi4_neg2 ; divisor negative : negate
+	rcall	__udivmodhi4	; do the unsigned div/mod
+	rcall	__divmodhi4_neg1 ; correct remainder sign
+	tst	__tmp_reg__
+	brpl	__divmodhi4_exit
+__divmodhi4_neg2:
+	com	r_arg2H
+	neg	r_arg2L		; correct divisor/result sign
+	sbci	r_arg2H,0xff
+__divmodhi4_exit:
+	ret
+__divmodhi4_neg1:
+	brtc	__divmodhi4_exit
+	com	r_arg1H
+	neg	r_arg1L		; correct dividend/remainder sign
+	sbci	r_arg1H,0xff
+	ret
+	.endfunc
+#endif /* defined (L_divmodhi4) */
+
+#undef r_remH  
+#undef r_remL  
+             
+#undef r_arg1H 
+#undef r_arg1L 
+             
+#undef r_arg2H 
+#undef r_arg2L 
+             	
+#undef r_cnt   	
+	
+/*******************************************************
+       Division 32 / 32 => (result + remainder)
+*******************************************************/
+#define	r_remHH	r31	/* remainder High */
+#define	r_remHL	r30
+#define	r_remH	r27
+#define	r_remL	r26	/* remainder Low */
+
+/* return: remainder */
+#define	r_arg1HH r25	/* dividend High */
+#define	r_arg1HL r24
+#define	r_arg1H  r23
+#define	r_arg1L  r22	/* dividend Low */
+
+/* return: quotient */
+#define	r_arg2HH r21	/* divisor High */
+#define	r_arg2HL r20
+#define	r_arg2H  r19
+#define	r_arg2L  r18	/* divisor Low */
+	
+#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
+
+#if defined (L_udivmodsi4)
+	.global	__udivmodsi4
+	.func	__udivmodsi4
+__udivmodsi4:
+	ldi	r_remL, 33	; init loop counter
+	mov	r_cnt, r_remL
+	sub	r_remL,r_remL
+	sub	r_remH,r_remH	; clear remainder and carry
+	mov_l	r_remHL, r_remL
+	mov_h	r_remHH, r_remH
+	rjmp	__udivmodsi4_ep	; jump to entry point
+__udivmodsi4_loop:
+        rol	r_remL		; shift dividend into remainder
+	rol	r_remH
+	rol	r_remHL
+	rol	r_remHH
+        cp	r_remL,r_arg2L	; compare remainder & divisor
+	cpc	r_remH,r_arg2H
+	cpc	r_remHL,r_arg2HL
+	cpc	r_remHH,r_arg2HH
+	brcs	__udivmodsi4_ep	; remainder <= divisor
+        sub	r_remL,r_arg2L	; restore remainder
+        sbc	r_remH,r_arg2H
+        sbc	r_remHL,r_arg2HL
+        sbc	r_remHH,r_arg2HH
+__udivmodsi4_ep:
+        rol	r_arg1L		; shift dividend (with CARRY)
+        rol	r_arg1H
+        rol	r_arg1HL
+        rol	r_arg1HH
+        dec	r_cnt		; decrement loop counter
+        brne	__udivmodsi4_loop
+				; __zero_reg__ now restored (r_cnt == 0)
+	com	r_arg1L
+	com	r_arg1H
+	com	r_arg1HL
+	com	r_arg1HH
+; div/mod results to return registers, as for the ldiv() function
+	mov_l	r_arg2L,  r_arg1L	; quotient
+	mov_h	r_arg2H,  r_arg1H
+	mov_l	r_arg2HL, r_arg1HL
+	mov_h	r_arg2HH, r_arg1HH
+	mov_l	r_arg1L,  r_remL	; remainder
+	mov_h	r_arg1H,  r_remH
+	mov_l	r_arg1HL, r_remHL
+	mov_h	r_arg1HH, r_remHH
+	ret
+	.endfunc
+#endif /* defined (L_udivmodsi4) */
+
+#if defined (L_divmodsi4)
+	.global	__divmodsi4
+	.func	__divmodsi4
+__divmodsi4:
+        bst     r_arg1HH,7	; store sign of dividend
+        mov     __tmp_reg__,r_arg1HH
+        eor     __tmp_reg__,r_arg2HH   ; r0.7 is sign of result
+	rcall	__divmodsi4_neg1 ; dividend negative : negate
+	sbrc	r_arg2HH,7
+	rcall	__divmodsi4_neg2 ; divisor negative : negate
+	rcall	__udivmodsi4	; do the unsigned div/mod
+	rcall	__divmodsi4_neg1 ; correct remainder sign
+	rol	__tmp_reg__
+	brcc	__divmodsi4_exit
+__divmodsi4_neg2:
+	com	r_arg2HH
+	com	r_arg2HL
+	com	r_arg2H
+	neg	r_arg2L		; correct divisor/quotient sign
+	sbci	r_arg2H,0xff
+	sbci	r_arg2HL,0xff
+	sbci	r_arg2HH,0xff
+__divmodsi4_exit:
+	ret
+__divmodsi4_neg1:
+	brtc	__divmodsi4_exit
+	com	r_arg1HH
+	com	r_arg1HL
+	com	r_arg1H
+	neg	r_arg1L		; correct dividend/remainder sign
+	sbci	r_arg1H, 0xff
+	sbci	r_arg1HL,0xff
+	sbci	r_arg1HH,0xff
+	ret
+	.endfunc
+#endif /* defined (L_divmodsi4) */
+
+/**********************************
+ * This is a prologue subroutine
+ **********************************/
+#if defined (L_prologue)
+
+	.global	__prologue_saves__
+	.func	__prologue_saves__
+__prologue_saves__:
+	push r2
+	push r3
+	push r4
+	push r5
+	push r6
+	push r7
+	push r8
+	push r9
+	push r10
+	push r11
+	push r12
+	push r13
+	push r14
+	push r15
+	push r16
+	push r17
+	push r28
+	push r29
+	in	r28,__SP_L__
+	in	r29,__SP_H__
+	sub	r28,r26
+	sbc	r29,r27
+	in	__tmp_reg__,__SREG__
+	cli
+	out	__SP_H__,r29
+	out	__SREG__,__tmp_reg__
+	out	__SP_L__,r28
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+.endfunc
+#endif /* defined (L_prologue) */
+
+/*
+ * This is an epilogue subroutine
+ */
+#if defined (L_epilogue)
+
+	.global	__epilogue_restores__
+	.func	__epilogue_restores__
+__epilogue_restores__:
+	ldd	r2,Y+18
+	ldd	r3,Y+17
+	ldd	r4,Y+16
+	ldd	r5,Y+15
+	ldd	r6,Y+14
+	ldd	r7,Y+13
+	ldd	r8,Y+12
+	ldd	r9,Y+11
+	ldd	r10,Y+10
+	ldd	r11,Y+9
+	ldd	r12,Y+8
+	ldd	r13,Y+7
+	ldd	r14,Y+6
+	ldd	r15,Y+5
+	ldd	r16,Y+4
+	ldd	r17,Y+3
+	ldd	r26,Y+2
+	ldd	r27,Y+1
+	add	r28,r30
+	adc	r29,__zero_reg__
+	in	__tmp_reg__,__SREG__
+	cli
+	out	__SP_H__,r29
+	out	__SREG__,__tmp_reg__
+	out	__SP_L__,r28
+	mov_l	r28, r26
+	mov_h	r29, r27
+	ret
+.endfunc
+#endif /* defined (L_epilogue) */
+
+#ifdef L_exit
+	.section .fini9,"ax",@progbits
+	.global _exit
+	.func	_exit
+_exit:
+	.weak	exit
+exit:
+	.endfunc
+
+	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
+
+	.section .fini0,"ax",@progbits
+	cli
+__stop_program:
+	rjmp	__stop_program
+#endif /* defined (L_exit) */
+
+#ifdef L_cleanup
+	.weak	_cleanup
+	.func	_cleanup
+_cleanup:
+	ret
+.endfunc
+#endif /* defined (L_cleanup) */
+
+#ifdef L_tablejump
+	.global __tablejump2__
+	.func	__tablejump2__
+__tablejump2__:
+	lsl	r30
+	rol	r31
+	.global __tablejump__
+__tablejump__:
+#if defined (__AVR_HAVE_LPMX__)
+	lpm	__tmp_reg__, Z+
+	lpm	r31, Z
+	mov	r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+#else
+	lpm
+	adiw	r30, 1
+	push	r0
+	lpm
+	push	r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	in   __tmp_reg__, __EIND__
+	push __tmp_reg__
+#endif
+	ret
+#endif
+	.endfunc
+#endif /* defined (L_tablejump) */
+
+#ifdef L_copy_data
+	.section .init4,"ax",@progbits
+	.global __do_copy_data
+__do_copy_data:
+#if defined(__AVR_HAVE_ELPMX__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start)
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm	r0, Z+
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	ldi	r16, hh8(__data_load_start - 0x10000)
+.L__do_copy_data_carry:
+	inc	r16
+	out	__RAMPZ__, r16
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+	elpm
+	st	X+, r0
+	adiw	r30, 1
+	brcs	.L__do_copy_data_carry
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
+	ldi	r17, hi8(__data_end)
+	ldi	r26, lo8(__data_start)
+	ldi	r27, hi8(__data_start)
+	ldi	r30, lo8(__data_load_start)
+	ldi	r31, hi8(__data_load_start)
+	rjmp	.L__do_copy_data_start
+.L__do_copy_data_loop:
+#if defined (__AVR_HAVE_LPMX__)
+	lpm	r0, Z+
+#else
+	lpm
+	adiw	r30, 1
+#endif
+	st	X+, r0
+.L__do_copy_data_start:
+	cpi	r26, lo8(__data_end)
+	cpc	r27, r17
+	brne	.L__do_copy_data_loop
+#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
+#endif /* L_copy_data */
+
+/* __do_clear_bss is only necessary if there is anything in .bss section.  */
+
+#ifdef L_clear_bss
+	.section .init4,"ax",@progbits
+	.global __do_clear_bss
+__do_clear_bss:
+	ldi	r17, hi8(__bss_end)
+	ldi	r26, lo8(__bss_start)
+	ldi	r27, hi8(__bss_start)
+	rjmp	.do_clear_bss_start
+.do_clear_bss_loop:
+	st	X+, __zero_reg__
+.do_clear_bss_start:
+	cpi	r26, lo8(__bss_end)
+	cpc	r27, r17
+	brne	.do_clear_bss_loop
+#endif /* L_clear_bss */
+
+/* __do_global_ctors and __do_global_dtors are only necessary
+   if there are any constructors/destructors.  */
+
+#if defined (__AVR_HAVE_JMP_CALL__)
+#define XCALL call
+#else
+#define XCALL rcall
+#endif
+
+#ifdef L_ctors
+	.section .init6,"ax",@progbits
+	.global	__do_global_ctors
+#if defined(__AVR_HAVE_RAMPZ__)
+__do_global_ctors:
+	ldi	r17, hi8(__ctors_start)
+	ldi	r28, lo8(__ctors_end)
+	ldi	r29, hi8(__ctors_end)
+	ldi	r16, hh8(__ctors_end)
+	rjmp	.L__do_global_ctors_start
+.L__do_global_ctors_loop:
+	sbiw	r28, 2
+	sbc     r16, __zero_reg__
+	mov_h	r31, r29
+	mov_l	r30, r28
+	out     __RAMPZ__, r16
+	XCALL	__tablejump_elpm__
+.L__do_global_ctors_start:
+	cpi	r28, lo8(__ctors_start)
+	cpc	r29, r17
+	ldi	r24, hh8(__ctors_start)
+	cpc	r16, r24
+	brne	.L__do_global_ctors_loop
+#else
+__do_global_ctors:
+	ldi	r17, hi8(__ctors_start)
+	ldi	r28, lo8(__ctors_end)
+	ldi	r29, hi8(__ctors_end)
+	rjmp	.L__do_global_ctors_start
+.L__do_global_ctors_loop:
+	sbiw	r28, 2
+	mov_h	r31, r29
+	mov_l	r30, r28
+	XCALL	__tablejump__
+.L__do_global_ctors_start:
+	cpi	r28, lo8(__ctors_start)
+	cpc	r29, r17
+	brne	.L__do_global_ctors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+#endif /* L_ctors */
+
+#ifdef L_dtors
+	.section .fini6,"ax",@progbits
+	.global	__do_global_dtors
+#if defined(__AVR_HAVE_RAMPZ__)
+__do_global_dtors:
+	ldi	r17, hi8(__dtors_end)
+	ldi	r28, lo8(__dtors_start)
+	ldi	r29, hi8(__dtors_start)
+	ldi	r16, hh8(__dtors_start)
+	rjmp	.L__do_global_dtors_start
+.L__do_global_dtors_loop:
+	sbiw	r28, 2
+	sbc     r16, __zero_reg__
+	mov_h	r31, r29
+	mov_l	r30, r28
+	out     __RAMPZ__, r16
+	XCALL	__tablejump_elpm__
+.L__do_global_dtors_start:
+	cpi	r28, lo8(__dtors_end)
+	cpc	r29, r17
+	ldi	r24, hh8(__dtors_end)
+	cpc	r16, r24
+	brne	.L__do_global_dtors_loop
+#else
+__do_global_dtors:
+	ldi	r17, hi8(__dtors_end)
+	ldi	r28, lo8(__dtors_start)
+	ldi	r29, hi8(__dtors_start)
+	rjmp	.L__do_global_dtors_start
+.L__do_global_dtors_loop:
+	mov_h	r31, r29
+	mov_l	r30, r28
+	XCALL	__tablejump__
+	adiw	r28, 2
+.L__do_global_dtors_start:
+	cpi	r28, lo8(__dtors_end)
+	cpc	r29, r17
+	brne	.L__do_global_dtors_loop
+#endif /* defined(__AVR_HAVE_RAMPZ__) */
+#endif /* L_dtors */
+
+#ifdef L_tablejump_elpm
+	.global __tablejump_elpm__
+	.func	__tablejump_elpm__
+__tablejump_elpm__:
+#if defined (__AVR_HAVE_ELPM__)
+#if defined (__AVR_HAVE_LPMX__)
+	elpm	__tmp_reg__, Z+
+	elpm	r31, Z
+	mov	r30, __tmp_reg__
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	eijmp
+#else
+	ijmp
+#endif
+
+#else
+	elpm
+	adiw	r30, 1
+	push	r0
+	elpm
+	push	r0
+#if defined (__AVR_HAVE_EIJMP_EICALL__)
+	in      __tmp_reg__, __EIND__
+	push    __tmp_reg__
+#endif
+	ret
+#endif
+#endif /* defined (__AVR_HAVE_ELPM__) */
+	.endfunc
+#endif /* defined (L_tablejump_elpm) */
+
diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md
new file mode 100755
index 000000000..9a3473bf8
--- /dev/null
+++ b/gcc/config/avr/predicates.md
@@ -0,0 +1,140 @@
+;; Predicate definitions for ATMEL AVR micro controllers.
+;; Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Registers from r0 to r15.
+(define_predicate "l_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) <= 15")))
+
+;; Registers from r16 to r31.
+(define_predicate "d_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) >= 16 && REGNO (op) <= 31")))
+
+(define_predicate "even_register_operand"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) <= 31")
+            (match_test "(REGNO (op) & 1) == 0"))))
+
+(define_predicate "odd_register_operand"
+  (and (match_code "reg")
+       (and (match_test "REGNO (op) <= 31")
+            (match_test "(REGNO (op) & 1) != 0"))))
+
+;; SP register.
+(define_predicate "stack_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_SP")))
+
+;; Return true if OP is a valid address for lower half of I/O space.
+(define_predicate "low_io_address_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE((INTVAL (op)), 0x20, 0x3F)")))
+
+;; Return true if OP is a valid address for high half of I/O space.
+(define_predicate "high_io_address_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE((INTVAL (op)), 0x40, 0x5F)")))
+
+;; Return true if OP is a valid address of I/O space.
+(define_predicate "io_address_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE((INTVAL (op)), 0x20, (0x60 - GET_MODE_SIZE(mode)))")))
+
+;; Return 1 if OP is the zero constant for MODE.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Returns 1 if OP is a SYMBOL_REF.
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; Return true if OP is a text segment reference.
+;; This is needed for program memory address expressions.
+(define_predicate "text_segment_operand"
+  (match_code "code_label,label_ref,symbol_ref,plus,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CODE_LABEL:
+      return true;
+    case LABEL_REF :
+      return true;
+    case SYMBOL_REF :
+      return SYMBOL_REF_FUNCTION_P (op);
+    case PLUS :
+      /* Assume canonical format of symbol + constant.
+	 Fall through.  */
+    case CONST :
+      return text_segment_operand (XEXP (op, 0), VOIDmode);
+    default :
+      return false;
+    }
+})
+
+;; Return true if OP is a constant that contains only one 1 in its
+;; binary representation.
+(define_predicate "single_one_operand"
+  (and (match_code "const_int")
+       (match_test "exact_log2(INTVAL (op) & GET_MODE_MASK (mode)) >= 0")))
+
+;; Return true if OP is a constant that contains only one 0 in its
+;; binary representation.
+(define_predicate "single_zero_operand"
+  (and (match_code "const_int")
+       (match_test "exact_log2(~INTVAL (op) & GET_MODE_MASK (mode)) >= 0")))
+
+;;
+(define_predicate "avr_sp_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= -6 && INTVAL (op) <= 5")))
+
+;; True for EQ & NE
+(define_predicate "eqne_operator"
+  (match_code "eq,ne"))
+       
+;; True for GE & LT
+(define_predicate "gelt_operator"
+  (match_code "ge,lt"))
+       
+;; True for GT, GTU, LE & LEU
+(define_predicate "difficult_comparison_operator"
+  (match_code "gt,gtu,le,leu"))
+
+;; False for GT, GTU, LE & LEU
+(define_predicate "simple_comparison_operator"
+  (and (match_operand 0 "comparison_operator")
+       (not (match_code "gt,gtu,le,leu"))))
+
+;; Return true if OP is a valid call operand.
+(define_predicate "call_insn_operand"
+  (and (match_code "mem")
+       (ior (match_test "register_operand (XEXP (op, 0), mode)")
+            (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
+
+;; True for register that is pseudo register.
+(define_predicate "pseudo_register_operand"
+  (and (match_code "reg")
+       (match_test "!HARD_REGISTER_P (op)")))
diff --git a/gcc/config/avr/rtems.h b/gcc/config/avr/rtems.h
new file mode 100644
index 000000000..efd8aface
--- /dev/null
+++ b/gcc/config/avr/rtems.h
@@ -0,0 +1,28 @@
+/* Definitions for rtems targeting a AVR using ELF.
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Ralf Corsepius (ralf.corsepius@rtems.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()	\
+do {					\
+  builtin_define ("__rtems__");		\
+  builtin_define ("__USE_INIT_FINI__");	\
+  builtin_assert ("system=rtems");	\
+} while (0)
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr
new file mode 100644
index 000000000..18769ebb2
--- /dev/null
+++ b/gcc/config/avr/t-avr
@@ -0,0 +1,225 @@
+# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+# 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+driver-avr.o: $(srcdir)/config/avr/driver-avr.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
+	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+avr-devices.o: $(srcdir)/config/avr/avr-devices.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
+	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+	
+
+avr-c.o: $(srcdir)/config/avr/avr-c.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(C_COMMON_H)
+	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+	
+
+
+LIB1ASMSRC = avr/libgcc.S
+LIB1ASMFUNCS = \
+	_mulqi3 \
+	_mulhi3 \
+	_mulsi3 \
+	_udivmodqi4 \
+	_divmodqi4 \
+	_udivmodhi4 \
+	_divmodhi4 \
+	_udivmodsi4 \
+	_divmodsi4 \
+	_prologue \
+	_epilogue \
+	_exit \
+	_cleanup \
+	_tablejump \
+	_tablejump_elpm \
+	_copy_data \
+	_clear_bss \
+	_ctors \
+	_dtors
+
+# We do not have the DF type.
+# Most of the C functions in libgcc2 use almost all registers,
+# so use -mcall-prologues for smaller code size.
+TARGET_LIBGCC2_CFLAGS = -DDF=SF -Dinhibit_libc -mcall-prologues -Os
+
+fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/avr/t-avr
+	echo '#define FLOAT' > fp-bit.c
+	echo '#define FLOAT_ONLY' >> fp-bit.c
+	echo '#define CMPtype QItype' >> fp-bit.c
+	echo '#define DF SF' >> fp-bit.c
+	echo '#define DI SI' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#define SMALL_MACHINE' >> fp-bit.c
+	echo 'typedef int QItype __attribute__ ((mode (QI)));' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+FPBIT = fp-bit.c
+
+MULTILIB_OPTIONS = mmcu=avr2/mmcu=avr25/mmcu=avr3/mmcu=avr31/mmcu=avr35/mmcu=avr4/mmcu=avr5/mmcu=avr51/mmcu=avr6
+MULTILIB_DIRNAMES = avr2 avr25 avr3 avr31 avr35 avr4 avr5 avr51 avr6
+
+# The many avr2 matches are not listed here - this is the default.
+MULTILIB_MATCHES = \
+	mmcu?avr25=mmcu?ata6289 \
+	mmcu?avr25=mmcu?attiny13 \
+	mmcu?avr25=mmcu?attiny13a \
+	mmcu?avr25=mmcu?attiny2313 \
+	mmcu?avr25=mmcu?attiny2313a \
+	mmcu?avr25=mmcu?attiny4313 \
+	mmcu?avr25=mmcu?attiny24 \
+	mmcu?avr25=mmcu?attiny24a \
+	mmcu?avr25=mmcu?attiny44 \
+	mmcu?avr25=mmcu?attiny44a \
+	mmcu?avr25=mmcu?attiny84 \
+	mmcu?avr25=mmcu?attiny84a \
+	mmcu?avr25=mmcu?attiny25 \
+	mmcu?avr25=mmcu?attiny45 \
+	mmcu?avr25=mmcu?attiny85 \
+	mmcu?avr25=mmcu?attiny261 \
+	mmcu?avr25=mmcu?attiny261a \
+	mmcu?avr25=mmcu?attiny461 \
+	mmcu?avr25=mmcu?attiny461a \
+	mmcu?avr25=mmcu?attiny861 \
+	mmcu?avr25=mmcu?attiny861a \
+	mmcu?avr25=mmcu?attiny43u \
+	mmcu?avr25=mmcu?attiny87 \
+	mmcu?avr25=mmcu?attiny48 \
+	mmcu?avr25=mmcu?attiny88 \
+	mmcu?avr25=mmcu?at86rf401 \
+	mmcu?avr3=mmcu?at43usb355 \
+	mmcu?avr3=mmcu?at76c711 \
+	mmcu?avr31=mmcu?atmega103 \
+	mmcu?avr31=mmcu?at43usb320 \
+	mmcu?avr35=mmcu?at90usb82 \
+	mmcu?avr35=mmcu?at90usb162 \
+	mmcu?avr35=mmcu?atmega8u2 \
+	mmcu?avr35=mmcu?atmega16u2 \
+	mmcu?avr35=mmcu?atmega32u2 \
+	mmcu?avr35=mmcu?attiny167 \
+	mmcu?avr4=mmcu?atmega48 \
+	mmcu?avr4=mmcu?atmega48a \
+	mmcu?avr4=mmcu?atmega48p \
+	mmcu?avr4=mmcu?atmega8 \
+	mmcu?avr4=mmcu?atmega8515 \
+	mmcu?avr4=mmcu?atmega8535 \
+	mmcu?avr4=mmcu?atmega88 \
+	mmcu?avr4=mmcu?atmega88a \
+	mmcu?avr4=mmcu?atmega88p \
+	mmcu?avr4=mmcu?atmega88pa \
+	mmcu?avr4=mmcu?atmega8hva \
+	mmcu?avr4=mmcu?at90pwm1 \
+	mmcu?avr4=mmcu?at90pwm2 \
+	mmcu?avr4=mmcu?at90pwm2b \
+	mmcu?avr4=mmcu?at90pwm3 \
+	mmcu?avr4=mmcu?at90pwm3b \
+	mmcu?avr4=mmcu?at90pwm81 \
+	mmcu?avr5=mmcu?atmega16 \
+	mmcu?avr5=mmcu?atmega16a \
+	mmcu?avr5=mmcu?atmega161 \
+	mmcu?avr5=mmcu?atmega162 \
+	mmcu?avr5=mmcu?atmega163 \
+	mmcu?avr5=mmcu?atmega164a \
+	mmcu?avr5=mmcu?atmega164p \
+	mmcu?avr5=mmcu?atmega165 \
+	mmcu?avr5=mmcu?atmega165a \
+	mmcu?avr5=mmcu?atmega165p \
+	mmcu?avr5=mmcu?atmega168 \
+	mmcu?avr5=mmcu?atmega168a \
+	mmcu?avr5=mmcu?atmega168p \
+	mmcu?avr5=mmcu?atmega169 \
+	mmcu?avr5=mmcu?atmega169a \
+	mmcu?avr5=mmcu?atmega169p \
+	mmcu?avr5=mmcu?atmega169pa \
+	mmcu?avr5=mmcu?atmega32 \
+	mmcu?avr5=mmcu?atmega323 \
+	mmcu?avr5=mmcu?atmega324a \
+	mmcu?avr5=mmcu?atmega324p \
+	mmcu?avr5=mmcu?atmega324pa \
+	mmcu?avr5=mmcu?atmega325 \
+	mmcu?avr5=mmcu?atmega325a \
+	mmcu?avr5=mmcu?atmega325p \
+	mmcu?avr5=mmcu?atmega3250 \
+	mmcu?avr5=mmcu?atmega3250a \
+	mmcu?avr5=mmcu?atmega3250p \
+	mmcu?avr5=mmcu?atmega328 \
+	mmcu?avr5=mmcu?atmega328p \
+	mmcu?avr5=mmcu?atmega329 \
+	mmcu?avr5=mmcu?atmega329a \
+	mmcu?avr5=mmcu?atmega329p \
+	mmcu?avr5=mmcu?atmega329pa \
+	mmcu?avr5=mmcu?atmega3290 \
+	mmcu?avr5=mmcu?atmega3290a \
+	mmcu?avr5=mmcu?atmega3290p \
+	mmcu?avr5=mmcu?atmega406 \
+	mmcu?avr5=mmcu?atmega64  \
+	mmcu?avr5=mmcu?atmega640 \
+	mmcu?avr5=mmcu?atmega644 \
+	mmcu?avr5=mmcu?atmega644a \
+	mmcu?avr5=mmcu?atmega644p \
+	mmcu?avr5=mmcu?atmega644pa \
+	mmcu?avr5=mmcu?atmega645 \
+	mmcu?avr5=mmcu?atmega645a \
+	mmcu?avr5=mmcu?atmega645p \
+	mmcu?avr5=mmcu?atmega6450 \
+	mmcu?avr5=mmcu?atmega6450a \
+	mmcu?avr5=mmcu?atmega6450p \
+	mmcu?avr5=mmcu?atmega649 \
+	mmcu?avr5=mmcu?atmega649a \
+	mmcu?avr5=mmcu?atmega649p \
+	mmcu?avr5=mmcu?atmega6490 \
+	mmcu?avr5=mmcu?atmega6490a \
+	mmcu?avr5=mmcu?atmega6490p \
+	mmcu?avr5=mmcu?atmega16hva \
+	mmcu?avr5=mmcu?atmega16hva2 \
+	mmcu?avr5=mmcu?atmega16hvb \
+	mmcu?avr5=mmcu?atmega32hvb \
+	mmcu?avr5=mmcu?atmega64hve \
+	mmcu?avr5=mmcu?at90can32 \
+	mmcu?avr5=mmcu?at90can64 \
+	mmcu?avr5=mmcu?at90pwm216 \
+	mmcu?avr5=mmcu?at90pwm316 \
+	mmcu?avr5=mmcu?atmega32c1 \
+	mmcu?avr5=mmcu?atmega64c1 \
+	mmcu?avr5=mmcu?atmega16m1 \
+	mmcu?avr5=mmcu?atmega32m1 \
+	mmcu?avr5=mmcu?atmega64m1 \
+	mmcu?avr5=mmcu?atmega16u4 \
+	mmcu?avr5=mmcu?atmega32u4 \
+	mmcu?avr5=mmcu?atmega32u6 \
+	mmcu?avr5=mmcu?at90scr100 \
+	mmcu?avr5=mmcu?at90usb646 \
+	mmcu?avr5=mmcu?at90usb647 \
+	mmcu?avr5=mmcu?at94k \
+	mmcu?avr5=mmcu?m3000 \
+	mmcu?avr51=mmcu?atmega128 \
+	mmcu?avr51=mmcu?atmega1280 \
+	mmcu?avr51=mmcu?atmega1281 \
+	mmcu?avr51=mmcu?atmega1284p \
+	mmcu?avr51=mmcu?atmega128rfa1 \
+	mmcu?avr51=mmcu?at90can128 \
+	mmcu?avr51=mmcu?at90usb1286 \
+	mmcu?avr51=mmcu?at90usb1287 \
+ 	mmcu?avr6=mmcu?atmega2560 \
+ 	mmcu?avr6=mmcu?atmega2561
+
+MULTILIB_EXCEPTIONS =
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/avr/t-rtems b/gcc/config/avr/t-rtems
new file mode 100644
index 000000000..a3ef8bd80
--- /dev/null
+++ b/gcc/config/avr/t-rtems
@@ -0,0 +1,3 @@
+# Multilibs for avr RTEMS targets.
+
+# ATM, this is just a stub
diff --git a/gcc/config/bfin/bfin-modes.def b/gcc/config/bfin/bfin-modes.def
new file mode 100644
index 000000000..27459cc13
--- /dev/null
+++ b/gcc/config/bfin/bfin-modes.def
@@ -0,0 +1,28 @@
+/* Definitions of target machine for GNU compiler, for Blackfin.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* PDImode for the 40-bit accumulators.  */
+PARTIAL_INT_MODE (DI);
+
+/* Two of those - covering both accumulators for vector multiplications.  */
+VECTOR_MODE (INT, PDI, 2);
+
+VECTOR_MODE (INT, HI, 2); /* V2HI */
+VECTOR_MODE (INT, SI, 2); /* V2SI - occasionally used.  */
diff --git a/gcc/config/bfin/bfin-protos.h b/gcc/config/bfin/bfin-protos.h
new file mode 100644
index 000000000..1e85e16ff
--- /dev/null
+++ b/gcc/config/bfin/bfin-protos.h
@@ -0,0 +1,122 @@
+/* Prototypes for Blackfin functions used in the md file & elsewhere.
+   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GNU CC.
+
+   GNU CC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GNU CC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Function prototypes that cannot exist in bfin.h due to dependency
+   complications.  */
+#ifndef GCC_BFIN_PROTOS_H
+#define GCC_BFIN_PROTOS_H
+
+/* For the anomaly 05-00-0245 */
+#define WA_SPECULATIVE_LOADS 0x00000001
+#define ENABLE_WA_SPECULATIVE_LOADS \
+  (bfin_workarounds & WA_SPECULATIVE_LOADS)
+
+/* For the anomaly 05-00-0244 */
+#define WA_SPECULATIVE_SYNCS 0x00000002
+#define ENABLE_WA_SPECULATIVE_SYNCS \
+  (bfin_workarounds & WA_SPECULATIVE_SYNCS)
+
+/* For the anomaly 05-00-0371 */
+#define WA_RETS 0x00000004
+#define ENABLE_WA_RETS \
+  (bfin_workarounds & WA_RETS)
+
+/* For the anomaly 05-00-0426 */
+#define WA_INDIRECT_CALLS 0x00000008
+#define ENABLE_WA_INDIRECT_CALLS \
+  ((bfin_workarounds & WA_INDIRECT_CALLS) && !TARGET_ICPLB)
+
+#define WA_05000257 0x00000010
+#define ENABLE_WA_05000257 \
+  (bfin_workarounds & WA_05000257)
+
+#define WA_05000283 0x00000020
+#define ENABLE_WA_05000283 \
+  (bfin_workarounds & WA_05000283)
+
+#define WA_05000315 0x00000040
+#define ENABLE_WA_05000315 \
+  (bfin_workarounds & WA_05000315)
+
+/* For the anomaly 05-00-0312 */
+#define WA_LOAD_LCREGS 0x00000080
+#define ENABLE_WA_LOAD_LCREGS \
+  (bfin_workarounds & WA_LOAD_LCREGS)
+
+#define WA_05000074 0x00000100
+#define ENABLE_WA_05000074 \
+  (bfin_workarounds & WA_05000074)
+
+#define Mmode enum machine_mode
+
+extern bool function_arg_regno_p (int);
+
+extern const char *output_load_immediate (rtx *);
+extern const char *output_casesi_internal (rtx *);
+extern char *bfin_asm_long (void);
+extern char *bfin_asm_short (void);
+extern int log2constp (unsigned HOST_WIDE_INT);
+
+extern bool bfin_legitimate_constant_p (rtx);
+extern int hard_regno_mode_ok (int, Mmode);
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);	  
+extern HOST_WIDE_INT bfin_initial_elimination_offset (int, int);
+
+extern int effective_address_32bit_p (rtx, Mmode);
+extern int symbolic_reference_mentioned_p (rtx);
+extern rtx bfin_gen_compare (rtx, Mmode);
+extern bool expand_move (rtx *, Mmode);
+extern void bfin_expand_call (rtx, rtx, rtx, rtx, int);
+extern bool bfin_longcall_p (rtx, int);
+extern bool bfin_dsp_memref_p (rtx);
+extern bool bfin_expand_movmem (rtx, rtx, rtx, rtx);
+
+extern int bfin_register_move_cost (enum machine_mode, enum reg_class,
+				    enum reg_class);
+extern int bfin_memory_move_cost (enum machine_mode, enum reg_class, int in);
+extern enum reg_class secondary_input_reload_class (enum reg_class, Mmode,
+						    rtx);
+extern enum reg_class secondary_output_reload_class (enum reg_class, Mmode,
+						     rtx);
+extern char *section_asm_op_1 (SECT_ENUM_T);
+extern char *section_asm_op (SECT_ENUM_T);
+extern void print_operand (FILE *,  rtx, char);
+extern void print_address_operand (FILE *, rtx);
+extern void split_di (rtx [], int, rtx [], rtx []);
+extern int split_load_immediate (rtx []);
+extern void emit_pic_move (rtx *, Mmode);
+extern void asm_conditional_branch (rtx, rtx *, int, int);
+extern rtx bfin_gen_compare (rtx, Mmode);
+
+extern unsigned bfin_local_alignment (tree, unsigned);
+extern rtx bfin_va_arg (tree, tree);
+
+extern void bfin_expand_prologue (void);
+extern void bfin_expand_epilogue (int, int, bool);
+extern int push_multiple_operation (rtx, Mmode);
+extern int pop_multiple_operation (rtx, Mmode);
+extern void output_push_multiple (rtx, rtx *);
+extern void output_pop_multiple (rtx, rtx *);
+extern int bfin_hard_regno_rename_ok (unsigned int, unsigned int);
+extern rtx bfin_return_addr_rtx (int);
+extern void bfin_hardware_loop (void);
+#undef  Mmode 
+
+#endif
+
diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c
new file mode 100644
index 000000000..60cd09eff
--- /dev/null
+++ b/gcc/config/bfin/bfin.c
@@ -0,0 +1,6695 @@
+/* The Blackfin code generation auxiliary output file.
+   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "tree.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "input.h"
+#include "target.h"
+#include "target-def.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "optabs.h"
+#include "ggc.h"
+#include "integrate.h"
+#include "cgraph.h"
+#include "langhooks.h"
+#include "bfin-protos.h"
+#include "tm-preds.h"
+#include "tm-constrs.h"
+#include "gt-bfin.h"
+#include "basic-block.h"
+#include "cfglayout.h"
+#include "timevar.h"
+#include "df.h"
+#include "sel-sched.h"
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+struct GTY(()) machine_function
+{
+  /* Set if we are notified by the doloop pass that a hardware loop
+     was created.  */
+  int has_hardware_loops;
+
+  /* Set if we create a memcpy pattern that uses loop registers.  */
+  int has_loopreg_clobber;
+};
+
+/* RTX for condition code flag register and RETS register */
+extern GTY(()) rtx bfin_cc_rtx;
+extern GTY(()) rtx bfin_rets_rtx;
+rtx bfin_cc_rtx, bfin_rets_rtx;
+
+int max_arg_registers = 0;
+
+/* Arrays used when emitting register names.  */
+const char *short_reg_names[]  =  SHORT_REGISTER_NAMES;
+const char *high_reg_names[]   =  HIGH_REGISTER_NAMES;
+const char *dregs_pair_names[] =  DREGS_PAIR_NAMES;
+const char *byte_reg_names[]   =  BYTE_REGISTER_NAMES;
+
+static int arg_regs[] = FUNCTION_ARG_REGISTERS;
+static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
+
+/* Nonzero if -mshared-library-id was given.  */
+static int bfin_lib_id_given;
+
+/* -mcpu support */
+bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN;
+
+/* -msi-revision support. There are three special values:
+   -1      -msi-revision=none.
+   0xffff  -msi-revision=any.  */
+int bfin_si_revision;
+
+/* The workarounds enabled */
+unsigned int bfin_workarounds = 0;
+
+struct bfin_cpu
+{
+  const char *name;
+  bfin_cpu_t type;
+  int si_revision;
+  unsigned int workarounds;
+};
+
+struct bfin_cpu bfin_cpus[] =
+{
+  {"bf512", BFIN_CPU_BF512, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+
+  {"bf514", BFIN_CPU_BF514, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+
+  {"bf516", BFIN_CPU_BF516, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+
+  {"bf518", BFIN_CPU_BF518, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+
+  {"bf522", BFIN_CPU_BF522, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+  {"bf522", BFIN_CPU_BF522, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+  {"bf522", BFIN_CPU_BF522, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+
+  {"bf523", BFIN_CPU_BF523, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+  {"bf523", BFIN_CPU_BF523, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+  {"bf523", BFIN_CPU_BF523, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+
+  {"bf524", BFIN_CPU_BF524, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+  {"bf524", BFIN_CPU_BF524, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+  {"bf524", BFIN_CPU_BF524, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+
+  {"bf525", BFIN_CPU_BF525, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+  {"bf525", BFIN_CPU_BF525, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+  {"bf525", BFIN_CPU_BF525, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+
+  {"bf526", BFIN_CPU_BF526, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+  {"bf526", BFIN_CPU_BF526, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+  {"bf526", BFIN_CPU_BF526, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+
+  {"bf527", BFIN_CPU_BF527, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_05000074},
+  {"bf527", BFIN_CPU_BF527, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+  {"bf527", BFIN_CPU_BF527, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
+
+  {"bf531", BFIN_CPU_BF531, 0x0006,
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf531", BFIN_CPU_BF531, 0x0005,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
+   | WA_LOAD_LCREGS | WA_05000074},
+  {"bf531", BFIN_CPU_BF531, 0x0004,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf531", BFIN_CPU_BF531, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf532", BFIN_CPU_BF532, 0x0006,
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf532", BFIN_CPU_BF532, 0x0005,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
+   | WA_LOAD_LCREGS | WA_05000074},
+  {"bf532", BFIN_CPU_BF532, 0x0004,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf532", BFIN_CPU_BF532, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf533", BFIN_CPU_BF533, 0x0006,
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf533", BFIN_CPU_BF533, 0x0005,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
+   | WA_LOAD_LCREGS | WA_05000074},
+  {"bf533", BFIN_CPU_BF533, 0x0004,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf533", BFIN_CPU_BF533, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf534", BFIN_CPU_BF534, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf534", BFIN_CPU_BF534, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf534", BFIN_CPU_BF534, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf536", BFIN_CPU_BF536, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf536", BFIN_CPU_BF536, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf536", BFIN_CPU_BF536, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf537", BFIN_CPU_BF537, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf537", BFIN_CPU_BF537, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf537", BFIN_CPU_BF537, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf538", BFIN_CPU_BF538, 0x0005,
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf538", BFIN_CPU_BF538, 0x0004,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf538", BFIN_CPU_BF538, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_RETS
+   | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
+  {"bf538", BFIN_CPU_BF538, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf539", BFIN_CPU_BF539, 0x0005,
+   WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf539", BFIN_CPU_BF539, 0x0004,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
+  {"bf539", BFIN_CPU_BF539, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_RETS
+   | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
+  {"bf539", BFIN_CPU_BF539, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf542m", BFIN_CPU_BF542M, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+  {"bf542", BFIN_CPU_BF542, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf542", BFIN_CPU_BF542, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf542", BFIN_CPU_BF542, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf544m", BFIN_CPU_BF544M, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+  {"bf544", BFIN_CPU_BF544, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf544", BFIN_CPU_BF544, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf544", BFIN_CPU_BF544, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf547m", BFIN_CPU_BF547M, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+  {"bf547", BFIN_CPU_BF547, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf547", BFIN_CPU_BF547, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf547", BFIN_CPU_BF547, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf548m", BFIN_CPU_BF548M, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+  {"bf548", BFIN_CPU_BF548, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf548", BFIN_CPU_BF548, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf548", BFIN_CPU_BF548, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf549m", BFIN_CPU_BF549M, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+
+  {"bf549", BFIN_CPU_BF549, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf549", BFIN_CPU_BF549, 0x0001,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
+  {"bf549", BFIN_CPU_BF549, 0x0000,
+   WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS
+   | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
+  {"bf561", BFIN_CPU_BF561, 0x0003,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+  {"bf561", BFIN_CPU_BF561, 0x0002,
+   WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
+   | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
+   | WA_05000074},
+
+  {NULL, BFIN_CPU_UNKNOWN, 0, 0}
+};
+
+int splitting_for_sched, splitting_loops;
+
+static void
+bfin_globalize_label (FILE *stream, const char *name)
+{
+  fputs (".global ", stream);
+  assemble_name (stream, name);
+  fputc (';',stream);
+  fputc ('\n',stream);
+}
+
+static void 
+output_file_start (void) 
+{
+  FILE *file = asm_out_file;
+  int i;
+
+  fprintf (file, ".file \"%s\";\n", input_filename);
+  
+  for (i = 0; arg_regs[i] >= 0; i++)
+    ;
+  max_arg_registers = i;	/* how many arg reg used  */
+}
+
+/* Examine machine-dependent attributes of function type FUNTYPE and return its
+   type.  See the definition of E_FUNKIND.  */
+
+static e_funkind
+funkind (const_tree funtype)
+{
+  tree attrs = TYPE_ATTRIBUTES (funtype);
+  if (lookup_attribute ("interrupt_handler", attrs))
+    return INTERRUPT_HANDLER;
+  else if (lookup_attribute ("exception_handler", attrs))
+    return EXCPT_HANDLER;
+  else if (lookup_attribute ("nmi_handler", attrs))
+    return NMI_HANDLER;
+  else
+    return SUBROUTINE;
+}
+
+/* Legitimize PIC addresses.  If the address is already position-independent,
+   we return ORIG.  Newly generated position-independent addresses go into a
+   reg.  This is REG if nonzero, otherwise we allocate register(s) as
+   necessary.  PICREG is the register holding the pointer to the PIC offset
+   table.  */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
+{
+  rtx addr = orig;
+  rtx new_rtx = orig;
+
+  if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
+    {
+      int unspec;
+      rtx tmp;
+
+      if (TARGET_ID_SHARED_LIBRARY)
+	unspec = UNSPEC_MOVE_PIC;
+      else if (GET_CODE (addr) == SYMBOL_REF
+	       && SYMBOL_REF_FUNCTION_P (addr))
+	unspec = UNSPEC_FUNCDESC_GOT17M4;
+      else
+	unspec = UNSPEC_MOVE_FDPIC;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
+      new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
+
+      emit_move_insn (reg, new_rtx);
+      if (picreg == pic_offset_table_rtx)
+	crtl->uses_pic_offset_table = 1;
+      return reg;
+    }
+
+  else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
+    {
+      rtx base;
+
+      if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	}
+
+      if (XEXP (addr, 0) == picreg)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
+      addr = legitimize_pic_address (XEXP (addr, 1),
+				     base == reg ? NULL_RTX : reg,
+				     picreg);
+
+      if (GET_CODE (addr) == CONST_INT)
+	{
+	  gcc_assert (! reload_in_progress && ! reload_completed);
+	  addr = force_reg (Pmode, addr);
+	}
+
+      if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
+	{
+	  base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
+	  addr = XEXP (addr, 1);
+	}
+
+      return gen_rtx_PLUS (Pmode, base, addr);
+    }
+
+  return new_rtx;
+}
+
+/* Stack frame layout. */
+
+/* For a given REGNO, determine whether it must be saved in the function
+   prologue.  IS_INTHANDLER specifies whether we're generating a normal
+   prologue or an interrupt/exception one.  */
+static bool
+must_save_p (bool is_inthandler, unsigned regno)
+{
+  if (D_REGNO_P (regno))
+    {
+      bool is_eh_return_reg = false;
+      if (crtl->calls_eh_return)
+	{
+	  unsigned j;
+	  for (j = 0; ; j++)
+	    {
+	      unsigned test = EH_RETURN_DATA_REGNO (j);
+	      if (test == INVALID_REGNUM)
+		break;
+	      if (test == regno)
+		is_eh_return_reg = true;
+	    }
+	}
+
+      return (is_eh_return_reg
+	      || (df_regs_ever_live_p (regno)
+		  && !fixed_regs[regno]
+		  && (is_inthandler || !call_used_regs[regno])));
+    }
+  else if (P_REGNO_P (regno))
+    {
+      return ((df_regs_ever_live_p (regno)
+	       && !fixed_regs[regno]
+	       && (is_inthandler || !call_used_regs[regno]))
+	      || (is_inthandler
+		  && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
+		  && regno == REG_P5)
+	      || (!TARGET_FDPIC
+		  && regno == PIC_OFFSET_TABLE_REGNUM
+		  && (crtl->uses_pic_offset_table
+		      || (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf))));
+    }
+  else
+    return ((is_inthandler || !call_used_regs[regno])
+	    && (df_regs_ever_live_p (regno)
+		|| (!leaf_function_p () && call_used_regs[regno])));
+
+}
+
+/* Compute the number of DREGS to save with a push_multiple operation.
+   This could include registers that aren't modified in the function,
+   since push_multiple only takes a range of registers.
+   If IS_INTHANDLER, then everything that is live must be saved, even
+   if normally call-clobbered.
+   If CONSECUTIVE, return the number of registers we can save in one
+   instruction with a push/pop multiple instruction.  */
+
+static int
+n_dregs_to_save (bool is_inthandler, bool consecutive)
+{
+  int count = 0;
+  unsigned i;
+
+  for (i = REG_R7 + 1; i-- != REG_R0;)
+    {
+      if (must_save_p (is_inthandler, i))
+	count++;
+      else if (consecutive)
+	return count;
+    }
+  return count;
+}
+
+/* Like n_dregs_to_save, but compute number of PREGS to save.  */
+
+static int
+n_pregs_to_save (bool is_inthandler, bool consecutive)
+{
+  int count = 0;
+  unsigned i;
+
+  for (i = REG_P5 + 1; i-- != REG_P0;)
+    if (must_save_p (is_inthandler, i))
+      count++;
+    else if (consecutive)
+      return count;
+  return count;
+}
+
+/* Determine if we are going to save the frame pointer in the prologue.  */
+
+static bool
+must_save_fp_p (void)
+{
+  return df_regs_ever_live_p (REG_FP);
+}
+
+/* Determine if we are going to save the RETS register.  */
+static bool
+must_save_rets_p (void)
+{
+  return df_regs_ever_live_p (REG_RETS);
+}
+
+static bool
+stack_frame_needed_p (void)
+{
+  /* EH return puts a new return address into the frame using an
+     address relative to the frame pointer.  */
+  if (crtl->calls_eh_return)
+    return true;
+  return frame_pointer_needed;
+}
+
+/* Emit code to save registers in the prologue.  SAVEALL is nonzero if we
+   must save all registers; this is used for interrupt handlers.
+   SPREG contains (reg:SI REG_SP).  IS_INTHANDLER is true if we're doing
+   this for an interrupt (or exception) handler.  */
+
+static void
+expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
+{
+  rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
+  rtx predec = gen_rtx_MEM (SImode, predec1);
+  int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
+  int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
+  int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
+  int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
+  int dregno, pregno;
+  int total_consec = ndregs_consec + npregs_consec;
+  int i, d_to_save;
+
+  if (saveall || is_inthandler)
+    {
+      rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
+	if (! current_function_is_leaf
+	    || cfun->machine->has_hardware_loops
+	    || cfun->machine->has_loopreg_clobber
+	    || (ENABLE_WA_05000257
+		&& (dregno == REG_LC0 || dregno == REG_LC1)))
+	  {
+	    insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+    }
+
+  if (total_consec != 0)
+    {
+      rtx insn;
+      rtx val = GEN_INT (-total_consec * 4);
+      rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2));
+
+      XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val),
+					    UNSPEC_PUSH_MULTIPLE);
+      XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg,
+							gen_rtx_PLUS (Pmode,
+								      spreg,
+								      val));
+      RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1;
+      d_to_save = ndregs_consec;
+      dregno = REG_R7 + 1 - ndregs_consec;
+      pregno = REG_P5 + 1 - npregs_consec;
+      for (i = 0; i < total_consec; i++)
+	{
+	  rtx memref = gen_rtx_MEM (word_mode,
+				    gen_rtx_PLUS (Pmode, spreg,
+						  GEN_INT (- i * 4 - 4)));
+	  rtx subpat;
+	  if (d_to_save > 0)
+	    {
+	      subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
+								   dregno++));
+	      d_to_save--;
+	    }
+	  else
+	    {
+	      subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
+								   pregno++));
+	    }
+	  XVECEXP (pat, 0, i + 1) = subpat;
+	  RTX_FRAME_RELATED_P (subpat) = 1;
+	}
+      insn = emit_insn (pat);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  for (dregno = REG_R0; ndregs != ndregs_consec; dregno++)
+    {
+      if (must_save_p (is_inthandler, dregno))
+	{
+	  rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  ndregs--;
+	}
+    }
+  for (pregno = REG_P0; npregs != npregs_consec; pregno++)
+    {
+      if (must_save_p (is_inthandler, pregno))
+	{
+	  rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  npregs--;
+	}
+    }
+  for (i = REG_P7 + 1; i < REG_CC; i++)
+    if (saveall 
+	|| (is_inthandler
+	    && (df_regs_ever_live_p (i)
+		|| (!leaf_function_p () && call_used_regs[i]))))
+      {
+	rtx insn;
+	if (i == REG_A0 || i == REG_A1)
+	  insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1),
+				 gen_rtx_REG (PDImode, i));
+	else
+	  insn = emit_move_insn (predec, gen_rtx_REG (SImode, i));
+	RTX_FRAME_RELATED_P (insn) = 1;
+      }
+}
+
+/* Emit code to restore registers in the epilogue.  SAVEALL is nonzero if we
+   must save all registers; this is used for interrupt handlers.
+   SPREG contains (reg:SI REG_SP).  IS_INTHANDLER is true if we're doing
+   this for an interrupt (or exception) handler.  */
+
+static void
+expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
+{
+  rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
+  rtx postinc = gen_rtx_MEM (SImode, postinc1);
+
+  int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
+  int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
+  int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
+  int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
+  int total_consec = ndregs_consec + npregs_consec;
+  int i, regno;
+  rtx insn;
+
+  /* A slightly crude technique to stop flow from trying to delete "dead"
+     insns.  */
+  MEM_VOLATILE_P (postinc) = 1;
+
+  for (i = REG_CC - 1; i > REG_P7; i--)
+    if (saveall
+	|| (is_inthandler
+	    && (df_regs_ever_live_p (i)
+		|| (!leaf_function_p () && call_used_regs[i]))))
+      {
+	if (i == REG_A0 || i == REG_A1)
+	  {
+	    rtx mem = gen_rtx_MEM (PDImode, postinc1);
+	    MEM_VOLATILE_P (mem) = 1;
+	    emit_move_insn (gen_rtx_REG (PDImode, i), mem);
+	  }
+	else
+	  emit_move_insn (gen_rtx_REG (SImode, i), postinc);
+      }
+
+  regno = REG_P5 - npregs_consec;
+  for (; npregs != npregs_consec; regno--)
+    {
+      if (must_save_p (is_inthandler, regno))
+	{
+	  emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
+	  npregs--;
+	}
+    }
+  regno = REG_R7 - ndregs_consec;
+  for (; ndregs != ndregs_consec; regno--)
+    {
+      if (must_save_p (is_inthandler, regno))
+	{
+	  emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
+	  ndregs--;
+	}
+    }
+
+  if (total_consec != 0)
+    {
+      rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1));
+      XVECEXP (pat, 0, 0)
+	= gen_rtx_SET (VOIDmode, spreg,
+		       gen_rtx_PLUS (Pmode, spreg,
+				     GEN_INT (total_consec * 4)));
+
+      if (npregs_consec > 0)
+	regno = REG_P5 + 1;
+      else
+	regno = REG_R7 + 1;
+
+      for (i = 0; i < total_consec; i++)
+	{
+	  rtx addr = (i > 0
+		      ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4))
+		      : spreg);
+	  rtx memref = gen_rtx_MEM (word_mode, addr);
+
+	  regno--;
+	  XVECEXP (pat, 0, i + 1)
+	    = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref);
+
+	  if (npregs_consec > 0)
+	    {
+	      if (--npregs_consec == 0)
+		regno = REG_R7 + 1;
+	    }
+	}
+
+      insn = emit_insn (pat);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  if (saveall || is_inthandler)
+    {
+      for (regno = REG_LB1; regno >= REG_LT0; regno--)
+	if (! current_function_is_leaf
+	    || cfun->machine->has_hardware_loops
+	    || cfun->machine->has_loopreg_clobber
+	    || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
+	  emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
+
+      emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
+    }
+}
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.
+
+   CUM is as above.
+
+   MODE and TYPE are the mode and type of the current parameter.
+
+   PRETEND_SIZE is a variable that should be set to the amount of stack
+   that must be pushed by the prolog to pretend that our caller pushed
+   it.
+
+   Normally, this macro will push all remaining incoming registers on the
+   stack and set PRETEND_SIZE to the length of the registers pushed.  
+
+   Blackfin specific :
+   - VDSP C compiler manual (our ABI) says that a variable args function
+     should save the R0, R1 and R2 registers in the stack.
+   - The caller will always leave space on the stack for the
+     arguments that are passed in registers, so we dont have
+     to leave any extra space.
+   - now, the vastart pointer can access all arguments from the stack.  */
+
+static void
+setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			tree type ATTRIBUTE_UNUSED, int *pretend_size,
+			int no_rtl)
+{
+  rtx mem;
+  int i;
+
+  if (no_rtl)
+    return;
+
+  /* The move for named arguments will be generated automatically by the
+     compiler.  We need to generate the move rtx for the unnamed arguments
+     if they are in the first 3 words.  We assume at least 1 named argument
+     exists, so we never generate [ARGP] = R0 here.  */
+
+  for (i = cum->words + 1; i < max_arg_registers; i++)
+    {
+      mem = gen_rtx_MEM (Pmode,
+			 plus_constant (arg_pointer_rtx, (i * UNITS_PER_WORD)));
+      emit_move_insn (mem, gen_rtx_REG (Pmode, i));
+    }
+
+  *pretend_size = 0;
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may
+   be accessed via the stack pointer) in functions that seem suitable.  */
+
+static bool
+bfin_frame_pointer_required (void) 
+{
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+
+  if (fkind != SUBROUTINE)
+    return true;
+
+  /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
+     so we have to override it for non-leaf functions.  */
+  if (TARGET_OMIT_LEAF_FRAME_POINTER && ! current_function_is_leaf)
+    return true;
+
+  return false;
+}
+
+/* Return the number of registers pushed during the prologue.  */
+
+static int
+n_regs_saved_by_prologue (void)
+{
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  bool is_inthandler = fkind != SUBROUTINE;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE
+	      || (is_inthandler && !current_function_is_leaf));
+  int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false);
+  int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false);
+  int n = ndregs + npregs;
+  int i;
+
+  if (all || stack_frame_needed_p ())
+    n += 2;
+  else
+    {
+      if (must_save_fp_p ())
+	n++;
+      if (must_save_rets_p ())
+	n++;
+    }
+
+  if (fkind != SUBROUTINE || all)
+    {
+      /* Increment once for ASTAT.  */
+      n++;
+      if (! current_function_is_leaf
+	  || cfun->machine->has_hardware_loops
+	  || cfun->machine->has_loopreg_clobber)
+	{
+	  n += 6;
+	}
+    }
+
+  if (fkind != SUBROUTINE)
+    {
+      /* RETE/X/N.  */
+      if (lookup_attribute ("nesting", attrs))
+	n++;
+    }
+
+  for (i = REG_P7 + 1; i < REG_CC; i++)
+    if (all
+	|| (fkind != SUBROUTINE
+	    && (df_regs_ever_live_p (i)
+		|| (!leaf_function_p () && call_used_regs[i]))))
+      n += i == REG_A0 || i == REG_A1 ? 2 : 1;
+
+  return n;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  Frame pointer elimination is automatically handled.
+
+   All other eliminations are valid.  */
+
+static bool
+bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+bfin_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset = 0;
+
+  if (from == ARG_POINTER_REGNUM)
+    offset = n_regs_saved_by_prologue () * 4;
+
+  if (to == STACK_POINTER_REGNUM)
+    {
+      if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
+	offset += crtl->outgoing_args_size;
+      else if (crtl->outgoing_args_size)
+	offset += FIXED_STACK_AREA;
+
+      offset += get_frame_size ();
+    }
+
+  return offset;
+}
+
+/* Emit code to load a constant CONSTANT into register REG; setting
+   RTX_FRAME_RELATED_P on all insns we generate if RELATED is true.
+   Make sure that the insns we generate need not be split.  */
+
+static void
+frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related)
+{
+  rtx insn;
+  rtx cst = GEN_INT (constant);
+
+  if (constant >= -32768 && constant < 65536)
+    insn = emit_move_insn (reg, cst);
+  else
+    {
+      /* We don't call split_load_immediate here, since dwarf2out.c can get
+	 confused about some of the more clever sequences it can generate.  */
+      insn = emit_insn (gen_movsi_high (reg, cst));
+      if (related)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_movsi_low (reg, reg, cst));
+    }
+  if (related)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Generate efficient code to add a value to a P register.
+   Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero.
+   EPILOGUE_P is zero if this function is called for prologue,
+   otherwise it's nonzero. And it's less than zero if this is for
+   sibcall epilogue.  */
+
+static void
+add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p)
+{
+  if (value == 0)
+    return;
+
+  /* Choose whether to use a sequence using a temporary register, or
+     a sequence with multiple adds.  We can add a signed 7-bit value
+     in one instruction.  */
+  if (value > 120 || value < -120)
+    {
+      rtx tmpreg;
+      rtx tmpreg2;
+      rtx insn;
+
+      tmpreg2 = NULL_RTX;
+
+      /* For prologue or normal epilogue, P1 can be safely used
+	 as the temporary register. For sibcall epilogue, we try to find
+	 a call used P register, which will be restored in epilogue.
+	 If we cannot find such a P register, we have to use one I register
+	 to help us.  */
+
+      if (epilogue_p >= 0)
+	tmpreg = gen_rtx_REG (SImode, REG_P1);
+      else
+	{
+	  int i;
+	  for (i = REG_P0; i <= REG_P5; i++)
+	    if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
+		|| (!TARGET_FDPIC
+		    && i == PIC_OFFSET_TABLE_REGNUM
+		    && (crtl->uses_pic_offset_table
+			|| (TARGET_ID_SHARED_LIBRARY
+			    && ! current_function_is_leaf))))
+	      break;
+	  if (i <= REG_P5)
+	    tmpreg = gen_rtx_REG (SImode, i);
+	  else
+	    {
+	      tmpreg = gen_rtx_REG (SImode, REG_P1);
+	      tmpreg2 = gen_rtx_REG (SImode, REG_I0);
+	      emit_move_insn (tmpreg2, tmpreg);
+	    }
+	}
+
+      if (frame)
+	frame_related_constant_load (tmpreg, value, TRUE);
+      else
+	insn = emit_move_insn (tmpreg, GEN_INT (value));
+
+      insn = emit_insn (gen_addsi3 (reg, reg, tmpreg));
+      if (frame)
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (tmpreg2 != NULL_RTX)
+	emit_move_insn (tmpreg, tmpreg2);
+    }
+  else
+    do
+      {
+	int size = value;
+	rtx insn;
+
+	if (size > 60)
+	  size = 60;
+	else if (size < -60)
+	  /* We could use -62, but that would leave the stack unaligned, so
+	     it's no good.  */
+	  size = -60;
+
+	insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+	if (frame)
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	value -= size;
+      }
+    while (value != 0);
+}
+
+/* Generate a LINK insn for a frame sized FRAME_SIZE.  If this constant
+   is too large, generate a sequence of insns that has the same effect.
+   SPREG contains (reg:SI REG_SP).  */
+
+static void
+emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size)
+{
+  HOST_WIDE_INT link_size = frame_size;
+  rtx insn;
+  int i;
+
+  if (link_size > 262140)
+    link_size = 262140;
+
+  /* Use a LINK insn with as big a constant as possible, then subtract
+     any remaining size from the SP.  */
+  insn = emit_insn (gen_link (GEN_INT (-8 - link_size)));
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+    {
+      rtx set = XVECEXP (PATTERN (insn), 0, i);
+      gcc_assert (GET_CODE (set) == SET);
+      RTX_FRAME_RELATED_P (set) = 1;
+    }
+
+  frame_size -= link_size;
+
+  if (frame_size > 0)
+    {
+      /* Must use a call-clobbered PREG that isn't the static chain.  */
+      rtx tmpreg = gen_rtx_REG (Pmode, REG_P1);
+
+      frame_related_constant_load (tmpreg, -frame_size, TRUE);
+      insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Return the number of bytes we must reserve for outgoing arguments
+   in the current function's stack frame.  */
+
+static HOST_WIDE_INT
+arg_area_size (void)
+{
+  if (crtl->outgoing_args_size)
+    {
+      if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
+	return crtl->outgoing_args_size;
+      else
+	return FIXED_STACK_AREA;
+    }
+  return 0;
+}
+
+/* Save RETS and FP, and allocate a stack frame.  ALL is true if the
+   function must save all its registers (true only for certain interrupt
+   handlers).  */
+
+static void
+do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all)
+{
+  frame_size += arg_area_size ();
+
+  if (all
+      || stack_frame_needed_p ()
+      || (must_save_rets_p () && must_save_fp_p ()))
+    emit_link_insn (spreg, frame_size);
+  else
+    {
+      if (must_save_rets_p ())
+	{
+	  rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
+					    gen_rtx_PRE_DEC (Pmode, spreg)),
+			       bfin_rets_rtx);
+	  rtx insn = emit_insn (pat);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      if (must_save_fp_p ())
+	{
+	  rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
+					    gen_rtx_PRE_DEC (Pmode, spreg)),
+			       gen_rtx_REG (Pmode, REG_FP));
+	  rtx insn = emit_insn (pat);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      add_to_reg (spreg, -frame_size, 1, 0);
+    }
+}
+
+/* Like do_link, but used for epilogues to deallocate the stack frame.
+   EPILOGUE_P is zero if this function is called for prologue,
+   otherwise it's nonzero. And it's less than zero if this is for
+   sibcall epilogue.  */
+
+static void
+do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
+{
+  frame_size += arg_area_size ();
+
+  if (stack_frame_needed_p ())
+    emit_insn (gen_unlink ());
+  else 
+    {
+      rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
+
+      add_to_reg (spreg, frame_size, 0, epilogue_p);
+      if (all || must_save_fp_p ())
+	{
+	  rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
+	  emit_move_insn (fpreg, postinc);
+	  emit_use (fpreg);
+	}
+      if (all || must_save_rets_p ())
+	{
+	  emit_move_insn (bfin_rets_rtx, postinc);
+	  emit_use (bfin_rets_rtx);
+	}
+    }
+}
+
+/* Generate a prologue suitable for a function of kind FKIND.  This is
+   called for interrupt and exception handler prologues.
+   SPREG contains (reg:SI REG_SP).  */
+
+static void
+expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
+{
+  HOST_WIDE_INT frame_size = get_frame_size ();
+  rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
+  rtx predec = gen_rtx_MEM (SImode, predec1);
+  rtx insn;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  tree kspisusp = lookup_attribute ("kspisusp", attrs);
+
+  if (kspisusp)
+    {
+      insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* We need space on the stack in case we need to save the argument
+     registers.  */
+  if (fkind == EXCPT_HANDLER)
+    {
+      insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* If we're calling other functions, they won't save their call-clobbered
+     registers, so we must save everything here.  */
+  if (!current_function_is_leaf)
+    all = true;
+  expand_prologue_reg_save (spreg, all, true);
+
+  if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
+    {
+      rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
+      rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
+      emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
+      emit_insn (gen_movsi_high (p5reg, chipid));
+      emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
+      emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
+    }
+  
+  if (lookup_attribute ("nesting", attrs))
+    {
+      rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
+      insn = emit_move_insn (predec, srcreg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  do_link (spreg, frame_size, all);
+
+  if (fkind == EXCPT_HANDLER)
+    {
+      rtx r0reg = gen_rtx_REG (SImode, REG_R0);
+      rtx r1reg = gen_rtx_REG (SImode, REG_R1);
+      rtx r2reg = gen_rtx_REG (SImode, REG_R2);
+
+      emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT));
+      emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26)));
+      emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26)));
+      emit_move_insn (r1reg, spreg);
+      emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP));
+      emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8)));
+    }
+}
+
+/* Generate an epilogue suitable for a function of kind FKIND.  This is
+   called for interrupt and exception handler epilogues.
+   SPREG contains (reg:SI REG_SP).  */
+
+static void
+expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all)
+{
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
+  rtx postinc = gen_rtx_MEM (SImode, postinc1);
+
+  /* A slightly crude technique to stop flow from trying to delete "dead"
+     insns.  */
+  MEM_VOLATILE_P (postinc) = 1;
+
+  do_unlink (spreg, get_frame_size (), all, 1);
+
+  if (lookup_attribute ("nesting", attrs))
+    {
+      rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
+      emit_move_insn (srcreg, postinc);
+    }
+
+  /* If we're calling other functions, they won't save their call-clobbered
+     registers, so we must save (and restore) everything here.  */
+  if (!current_function_is_leaf)
+    all = true;
+
+  expand_epilogue_reg_restore (spreg, all, true);
+
+  /* Deallocate any space we left on the stack in case we needed to save the
+     argument registers.  */
+  if (fkind == EXCPT_HANDLER)
+    emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
+}
+
+/* Used while emitting the prologue to generate code to load the correct value
+   into the PIC register, which is passed in DEST.  */
+
+static rtx
+bfin_load_pic_reg (rtx dest)
+{
+  struct cgraph_local_info *i = NULL;
+  rtx addr;
+ 
+  i = cgraph_local_info (current_function_decl);
+ 
+  /* Functions local to the translation unit don't need to reload the
+     pic reg, since the caller always passes a usable one.  */
+  if (i && i->local)
+    return pic_offset_table_rtx;
+      
+  if (bfin_lib_id_given)
+    addr = plus_constant (pic_offset_table_rtx, -4 - bfin_library_id * 4);
+  else
+    addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+			 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+					 UNSPEC_LIBRARY_OFFSET));
+  emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr)));
+  return dest;
+}
+
+/* Generate RTL for the prologue of the current function.  */
+
+void
+bfin_expand_prologue (void)
+{
+  HOST_WIDE_INT frame_size = get_frame_size ();
+  rtx spreg = gen_rtx_REG (Pmode, REG_SP);
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  rtx pic_reg_loaded = NULL_RTX;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
+
+  if (fkind != SUBROUTINE)
+    {
+      expand_interrupt_handler_prologue (spreg, fkind, all);
+      return;
+    }
+
+  if (crtl->limit_stack
+      || (TARGET_STACK_CHECK_L1
+	  && !DECL_NO_LIMIT_STACK (current_function_decl)))
+    {
+      HOST_WIDE_INT offset
+	= bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
+					   STACK_POINTER_REGNUM);
+      rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
+      rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
+
+      if (!lim)
+	{
+	  emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode));
+	  emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg));
+	  lim = p2reg;
+	}
+      if (GET_CODE (lim) == SYMBOL_REF)
+	{
+	  if (TARGET_ID_SHARED_LIBRARY)
+	    {
+	      rtx p1reg = gen_rtx_REG (Pmode, REG_P1);
+	      rtx val;
+	      pic_reg_loaded = bfin_load_pic_reg (p2reg);
+	      val = legitimize_pic_address (stack_limit_rtx, p1reg,
+					    pic_reg_loaded);
+	      emit_move_insn (p1reg, val);
+	      frame_related_constant_load (p2reg, offset, FALSE);
+	      emit_insn (gen_addsi3 (p2reg, p2reg, p1reg));
+	      lim = p2reg;
+	    }
+	  else
+	    {
+	      rtx limit = plus_constant (lim, offset);
+	      emit_move_insn (p2reg, limit);
+	      lim = p2reg;
+	    }
+	}
+      else
+	{
+	  if (lim != p2reg)
+	    emit_move_insn (p2reg, lim);
+	  add_to_reg (p2reg, offset, 0, 0);
+	  lim = p2reg;
+	}
+      emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim));
+      emit_insn (gen_trapifcc ());
+    }
+  expand_prologue_reg_save (spreg, all, false);
+
+  do_link (spreg, frame_size, all);
+
+  if (TARGET_ID_SHARED_LIBRARY
+      && !TARGET_SEP_DATA
+      && (crtl->uses_pic_offset_table
+	  || !current_function_is_leaf))
+    bfin_load_pic_reg (pic_offset_table_rtx);
+}
+
+/* Generate RTL for the epilogue of the current function.  NEED_RETURN is zero
+   if this is for a sibcall.  EH_RETURN is nonzero if we're expanding an
+   eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue,
+   false otherwise.  */
+
+void
+bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p)
+{
+  rtx spreg = gen_rtx_REG (Pmode, REG_SP);
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  int e = sibcall_p ? -1 : 1;
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
+
+  if (fkind != SUBROUTINE)
+    {
+      expand_interrupt_handler_epilogue (spreg, fkind, all);
+      return;
+    }
+
+  do_unlink (spreg, get_frame_size (), all, e);
+
+  expand_epilogue_reg_restore (spreg, all, false);
+
+  /* Omit the return insn if this is for a sibcall.  */
+  if (! need_return)
+    return;
+
+  if (eh_return)
+    emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+int
+bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			   unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Return the value of the return address for the frame COUNT steps up
+   from the current frame, after the prologue.
+   We punt for everything but the current frame by returning const0_rtx.  */
+
+rtx
+bfin_return_addr_rtx (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, REG_RETS);
+}
+
+static rtx
+bfin_delegitimize_address (rtx orig_x)
+{
+  rtx x = orig_x;
+
+  if (GET_CODE (x) != MEM)
+    return orig_x;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == UNSPEC
+      && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+    return XVECEXP (XEXP (x, 1), 0, 0);
+
+  return orig_x;
+}
+
+/* This predicate is used to compute the length of a load/store insn.
+   OP is a MEM rtx, we return nonzero if its addressing mode requires a
+   32-bit instruction.  */
+
+int
+effective_address_32bit_p (rtx op, enum machine_mode mode) 
+{
+  HOST_WIDE_INT offset;
+
+  mode = GET_MODE (op);
+  op = XEXP (op, 0);
+
+  if (GET_CODE (op) != PLUS)
+    {
+      gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC
+		  || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC);
+      return 0;
+    }
+
+  if (GET_CODE (XEXP (op, 1)) == UNSPEC)
+    return 1;
+
+  offset = INTVAL (XEXP (op, 1));
+
+  /* All byte loads use a 16-bit offset.  */
+  if (GET_MODE_SIZE (mode) == 1)
+    return 1;
+
+  if (GET_MODE_SIZE (mode) == 4)
+    {
+      /* Frame pointer relative loads can use a negative offset, all others
+	 are restricted to a small positive one.  */
+      if (XEXP (op, 0) == frame_pointer_rtx)
+	return offset < -128 || offset > 60;
+      return offset < 0 || offset > 60;
+    }
+
+  /* Must be HImode now.  */
+  return offset < 0 || offset > 30;
+}
+
+/* Returns true if X is a memory reference using an I register.  */
+bool
+bfin_dsp_memref_p (rtx x)
+{
+  if (! MEM_P (x))
+    return false;
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC
+      || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC)
+    x = XEXP (x, 0);
+  return IREG_P (x);
+}
+
+/* Return cost of the memory address ADDR.
+   All addressing modes are equally cheap on the Blackfin.  */
+
+static int
+bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Subroutine of print_operand; used to print a memory reference X to FILE.  */
+
+void
+print_address_operand (FILE *file, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case PLUS:
+      output_address (XEXP (x, 0));
+      fprintf (file, "+");
+      output_address (XEXP (x, 1));
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "--");
+      output_address (XEXP (x, 0));    
+      break;
+    case POST_INC:
+      output_address (XEXP (x, 0));
+      fprintf (file, "++");
+      break;
+    case POST_DEC:
+      output_address (XEXP (x, 0));
+      fprintf (file, "--");
+      break;
+
+    default:
+      gcc_assert (GET_CODE (x) != MEM);
+      print_operand (file, x, 0);
+      break;
+    }
+}
+
+/* Adding intp DImode support by Tony
+ * -- Q: (low  word)
+ * -- R: (high word)
+ */
+
+void
+print_operand (FILE *file, rtx x, char code)
+{
+  enum machine_mode mode;
+
+  if (code == '!')
+    {
+      if (GET_MODE (current_output_insn) == SImode)
+	fprintf (file, " ||");
+      else
+	fprintf (file, ";");
+      return;
+    }
+
+  mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case 'j':
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fprintf (file, "e");
+	  break;
+	case NE:
+	  fprintf (file, "ne");
+	  break;
+	case GT:
+	  fprintf (file, "g");
+	  break;
+	case LT:
+	  fprintf (file, "l");
+	  break;
+	case GE:
+	  fprintf (file, "ge");
+	  break;
+	case LE:
+	  fprintf (file, "le");
+	  break;
+	case GTU:
+	  fprintf (file, "g");
+	  break;
+	case LTU:
+	  fprintf (file, "l");
+	  break;
+	case GEU:
+	  fprintf (file, "ge");
+	  break;
+	case LEU:
+	  fprintf (file, "le");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%j value");
+	}
+      break;
+    
+    case 'J':					 /* reverse logic */
+      switch (GET_CODE(x))
+	{
+	case EQ:
+	  fprintf (file, "ne");
+	  break;
+	case NE:
+	  fprintf (file, "e");
+	  break;
+	case GT:
+	  fprintf (file, "le");
+	  break;
+	case LT:
+	  fprintf (file, "ge");
+	  break;
+	case GE:
+	  fprintf (file, "l");
+	  break;
+	case LE:
+	  fprintf (file, "g");
+	  break;
+	case GTU:
+	  fprintf (file, "le");
+	  break;
+	case LTU:
+	  fprintf (file, "ge");
+	  break;
+	case GEU:
+	  fprintf (file, "l");
+	  break;
+	case LEU:
+	  fprintf (file, "g");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%J value");
+	}
+      break;
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (code == 'h')
+	    {
+	      if (REGNO (x) < 32)
+		fprintf (file, "%s", short_reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'd')
+	    {
+	      if (REGNO (x) < 32)
+		fprintf (file, "%s", high_reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'w')
+	    {
+	      if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
+		fprintf (file, "%s.w", reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'x')
+	    {
+	      if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
+		fprintf (file, "%s.x", reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'v')
+	    {
+	      if (REGNO (x) == REG_A0)
+		fprintf (file, "AV0");
+	      else if (REGNO (x) == REG_A1)
+		fprintf (file, "AV1");
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'D')
+	    {
+	      if (D_REGNO_P (REGNO (x)))
+		fprintf (file, "%s", dregs_pair_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'H')
+	    {
+	      if ((mode == DImode || mode == DFmode) && REG_P (x))
+		fprintf (file, "%s", reg_names[REGNO (x) + 1]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else if (code == 'T')
+	    {
+	      if (D_REGNO_P (REGNO (x)))
+		fprintf (file, "%s", byte_reg_names[REGNO (x)]);
+	      else
+		output_operand_lossage ("invalid operand for code '%c'", code);
+	    }
+	  else 
+	    fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case MEM:
+	  fputc ('[', file);
+	  x = XEXP (x,0);
+	  print_address_operand (file, x);
+	  fputc (']', file);
+	  break;
+
+	case CONST_INT:
+	  if (code == 'M')
+	    {
+	      switch (INTVAL (x))
+		{
+		case MACFLAG_NONE:
+		  break;
+		case MACFLAG_FU:
+		  fputs ("(FU)", file);
+		  break;
+		case MACFLAG_T:
+		  fputs ("(T)", file);
+		  break;
+		case MACFLAG_TFU:
+		  fputs ("(TFU)", file);
+		  break;
+		case MACFLAG_W32:
+		  fputs ("(W32)", file);
+		  break;
+		case MACFLAG_IS:
+		  fputs ("(IS)", file);
+		  break;
+		case MACFLAG_IU:
+		  fputs ("(IU)", file);
+		  break;
+		case MACFLAG_IH:
+		  fputs ("(IH)", file);
+		  break;
+		case MACFLAG_M:
+		  fputs ("(M)", file);
+		  break;
+		case MACFLAG_IS_M:
+		  fputs ("(IS,M)", file);
+		  break;
+		case MACFLAG_ISS2:
+		  fputs ("(ISS2)", file);
+		  break;
+		case MACFLAG_S2RND:
+		  fputs ("(S2RND)", file);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      break;
+	    }
+	  else if (code == 'b')
+	    {
+	      if (INTVAL (x) == 0)
+		fputs ("+=", file);
+	      else if (INTVAL (x) == 1)
+		fputs ("-=", file);
+	      else
+		gcc_unreachable ();
+	      break;
+	    }
+	  /* Moves to half registers with d or h modifiers always use unsigned
+	     constants.  */
+	  else if (code == 'd')
+	    x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
+	  else if (code == 'h')
+	    x = GEN_INT (INTVAL (x) & 0xffff);
+	  else if (code == 'N')
+	    x = GEN_INT (-INTVAL (x));
+	  else if (code == 'X')
+	    x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
+	  else if (code == 'Y')
+	    x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x)));
+	  else if (code == 'Z')
+	    /* Used for LINK insns.  */
+	    x = GEN_INT (-8 - INTVAL (x));
+
+	  /* fall through */
+
+	case SYMBOL_REF:
+	  output_addr_const (file, x);
+	  break;
+
+	case CONST_DOUBLE:
+	  output_operand_lossage ("invalid const_double operand");
+	  break;
+
+	case UNSPEC:
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_MOVE_PIC:
+	      output_addr_const (file, XVECEXP (x, 0, 0));
+	      fprintf (file, "@GOT");
+	      break;
+
+	    case UNSPEC_MOVE_FDPIC:
+	      output_addr_const (file, XVECEXP (x, 0, 0));
+	      fprintf (file, "@GOT17M4");
+	      break;
+
+	    case UNSPEC_FUNCDESC_GOT17M4:
+	      output_addr_const (file, XVECEXP (x, 0, 0));
+	      fprintf (file, "@FUNCDESC_GOT17M4");
+	      break;
+
+	    case UNSPEC_LIBRARY_OFFSET:
+	      fprintf (file, "_current_shared_library_p5_offset_");
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	default:
+	  output_addr_const (file, x);
+	}
+    }
+}
+
+/* Argument support functions.  */
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  
+   VDSP C Compiler manual, our ABI says that
+   first 3 words of arguments will use R0, R1 and R2.
+*/
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cum;
+
+  *cum = zero_cum;
+
+  /* Set up the number of registers to use for passing arguments.  */
+
+  cum->nregs = max_arg_registers;
+  cum->arg_regs = arg_regs;
+
+  cum->call_cookie = CALL_NORMAL;
+  /* Check for a longcall attribute.  */
+  if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))
+    cum->call_cookie |= CALL_SHORT;
+  else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)))
+    cum->call_cookie |= CALL_LONG;
+
+  return;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+bfin_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int count, bytes, words;
+
+  bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  cum->words += words;
+  cum->nregs -= words;
+
+  if (cum->nregs <= 0)
+    {
+      cum->nregs = 0;
+      cum->arg_regs = NULL;
+    }
+  else
+    {
+      for (count = 1; count <= words; count++)
+        cum->arg_regs++;
+    }
+
+  return;
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+bfin_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int bytes
+    = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+
+  if (mode == VOIDmode)
+    /* Compute operand 2 of the call insn.  */
+    return GEN_INT (cum->call_cookie);
+
+  if (bytes == -1)
+    return NULL_RTX;
+
+  if (cum->nregs)
+    return gen_rtx_REG (mode, *(cum->arg_regs));
+
+  return NULL_RTX;
+}
+
+/* For an arg passed partly in registers and partly in memory,
+   this is the number of bytes passed in registers.
+   For args passed entirely in registers or entirely in memory, zero.
+
+   Refer VDSP C Compiler manual, our ABI.
+   First 3 words are in registers. So, if an argument is larger
+   than the registers available, it will span the register and
+   stack.   */
+
+static int
+bfin_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			tree type ATTRIBUTE_UNUSED,
+			bool named ATTRIBUTE_UNUSED)
+{
+  int bytes
+    = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  int bytes_left = cum->nregs * UNITS_PER_WORD;
+  
+  if (bytes == -1)
+    return 0;
+
+  if (bytes_left == 0)
+    return 0;
+  if (bytes > bytes_left)
+    return bytes_left;
+  return 0;
+}
+
+/* Variable sized types are passed by reference.  */
+
+static bool
+bfin_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
+}
+
+/* Decide whether a type should be returned in memory (true)
+   or in a register (false).  This is called by the macro
+   TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  int size = int_size_in_bytes (type);
+  return size > 2 * UNITS_PER_WORD || size == -1;
+}
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+static rtx
+bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, REG_P0);
+}
+
+/* Return true when register may be used to pass function parameters.  */
+
+bool 
+function_arg_regno_p (int n)
+{
+  int i;
+  for (i = 0; arg_regs[i] != -1; i++)
+    if (n == arg_regs[i])
+      return true;
+  return false;
+}
+
+/* Returns 1 if OP contains a symbol reference */
+
+int
+symbolic_reference_mentioned_p (rtx op)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+
+static bool
+bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+			      tree exp ATTRIBUTE_UNUSED)
+{
+  struct cgraph_local_info *this_func, *called_func;
+  e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
+  if (fkind != SUBROUTINE)
+    return false;
+  if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)
+    return true;
+
+  /* When compiling for ID shared libraries, can't sibcall a local function
+     from a non-local function, because the local function thinks it does
+     not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
+     sibcall epilogue, and we end up with the wrong value in P5.  */
+
+  if (!decl)
+    /* Not enough information.  */
+    return false;
+ 
+  this_func = cgraph_local_info (current_function_decl);
+  called_func = cgraph_local_info (decl);
+  return !called_func->local || this_func->local;
+}
+
+/* Write a template for a trampoline to F.  */
+
+static void
+bfin_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_FDPIC)
+    {
+      fprintf (f, "\t.dd\t0x00000000\n");	/* 0 */
+      fprintf (f, "\t.dd\t0x00000000\n");	/* 0 */
+      fprintf (f, "\t.dd\t0x0000e109\n");	/* p1.l = fn low */
+      fprintf (f, "\t.dd\t0x0000e149\n");	/* p1.h = fn high */
+      fprintf (f, "\t.dd\t0x0000e10a\n");	/* p2.l = sc low */
+      fprintf (f, "\t.dd\t0x0000e14a\n");	/* p2.h = sc high */
+      fprintf (f, "\t.dw\t0xac4b\n");		/* p3 = [p1 + 4] */
+      fprintf (f, "\t.dw\t0x9149\n");		/* p1 = [p1] */
+      fprintf (f, "\t.dw\t0x0051\n");		/* jump (p1)*/
+    }
+  else
+    {
+      fprintf (f, "\t.dd\t0x0000e109\n");	/* p1.l = fn low */
+      fprintf (f, "\t.dd\t0x0000e149\n");	/* p1.h = fn high */
+      fprintf (f, "\t.dd\t0x0000e10a\n");	/* p2.l = sc low */
+      fprintf (f, "\t.dd\t0x0000e14a\n");	/* p2.h = sc high */
+      fprintf (f, "\t.dw\t0x0051\n");		/* jump (p1)*/
+    }
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline at
+   M_TRAMP. FNDECL is the target function.  CHAIN_VALUE is an RTX for
+   the static chain value for the function.  */
+
+static void
+bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
+  rtx t2 = copy_to_reg (chain_value);
+  rtx mem;
+  int i = 0;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_FDPIC)
+    {
+      rtx a = force_reg (Pmode, plus_constant (XEXP (m_tramp, 0), 8));
+      mem = adjust_address (m_tramp, Pmode, 0);
+      emit_move_insn (mem, a);
+      i = 8;
+    }
+
+  mem = adjust_address (m_tramp, HImode, i + 2);
+  emit_move_insn (mem, gen_lowpart (HImode, t1));
+  emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
+  mem = adjust_address (m_tramp, HImode, i + 6);
+  emit_move_insn (mem, gen_lowpart (HImode, t1));
+
+  mem = adjust_address (m_tramp, HImode, i + 10);
+  emit_move_insn (mem, gen_lowpart (HImode, t2));
+  emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
+  mem = adjust_address (m_tramp, HImode, i + 14);
+  emit_move_insn (mem, gen_lowpart (HImode, t2));
+}
+
+/* Emit insns to move operands[1] into operands[0].  */
+
+void
+emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+
+  gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed));
+  if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  else
+    operands[1] = legitimize_pic_address (operands[1], temp,
+					  TARGET_FDPIC ? OUR_FDPIC_REG
+					  : pic_offset_table_rtx);
+}
+
+/* Expand a move operation in mode MODE.  The operands are in OPERANDS.
+   Returns true if no further code must be generated, false if the caller
+   should generate an insn to move OPERANDS[1] to OPERANDS[0].  */
+
+bool
+expand_move (rtx *operands, enum machine_mode mode)
+{
+  rtx op = operands[1];
+  if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC)
+      && SYMBOLIC_CONST (op))
+    emit_pic_move (operands, mode);
+  else if (mode == SImode && GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	   && !bfin_legitimate_constant_p (op))
+    {
+      rtx dest = operands[0];
+      rtx op0, op1;
+      gcc_assert (!reload_in_progress && !reload_completed);
+      op = XEXP (op, 0);
+      op0 = force_reg (mode, XEXP (op, 0));
+      op1 = XEXP (op, 1);
+      if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode))
+	op1 = force_reg (mode, op1);
+      if (GET_CODE (dest) == MEM)
+	dest = gen_reg_rtx (mode);
+      emit_insn (gen_addsi3 (dest, op0, op1));
+      if (dest == operands[0])
+	return true;
+      operands[1] = dest;
+    }
+  /* Don't generate memory->memory or constant->memory moves, go through a
+     register */
+  else if ((reload_in_progress | reload_completed) == 0
+	   && GET_CODE (operands[0]) == MEM
+    	   && GET_CODE (operands[1]) != REG)
+    operands[1] = force_reg (mode, operands[1]);
+  return false;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuse to split volatile memory addresses,
+         but we still have to handle it.  */
+      if (GET_CODE (op) == MEM)
+	{
+	  lo_half[num] = adjust_address (op, SImode, 0);
+	  hi_half[num] = adjust_address (op, SImode, 4);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 0);
+	  hi_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 4);
+	}
+    }
+}
+
+bool
+bfin_longcall_p (rtx op, int call_cookie)
+{
+  gcc_assert (GET_CODE (op) == SYMBOL_REF);
+  if (SYMBOL_REF_WEAK (op))
+    return 1;
+  if (call_cookie & CALL_SHORT)
+    return 0;
+  if (call_cookie & CALL_LONG)
+    return 1;
+  if (TARGET_LONG_CALLS)
+    return 1;
+  return 0;
+}
+
+/* Expand a call instruction.  FNADDR is the call target, RETVAL the return value.
+   COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args.
+   SIBCALL is nonzero if this is a sibling call.  */
+
+void
+bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall)
+{
+  rtx use = NULL, call;
+  rtx callee = XEXP (fnaddr, 0);
+  int nelts = 3;
+  rtx pat;
+  rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
+  rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
+  int n;
+
+  /* In an untyped call, we can get NULL for operand 2.  */
+  if (cookie == NULL_RTX)
+    cookie = const0_rtx;
+
+  /* Static functions and indirect calls don't need the pic register.  */
+  if (!TARGET_FDPIC && flag_pic
+      && GET_CODE (callee) == SYMBOL_REF
+      && !SYMBOL_REF_LOCAL_P (callee))
+    use_reg (&use, pic_offset_table_rtx);
+
+  if (TARGET_FDPIC)
+    {
+      int caller_in_sram, callee_in_sram;
+
+      /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram.  */
+      caller_in_sram = callee_in_sram = 0;
+
+      if (lookup_attribute ("l1_text",
+			    DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
+	caller_in_sram = 1;
+      else if (lookup_attribute ("l2",
+				 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
+	caller_in_sram = 2;
+
+      if (GET_CODE (callee) == SYMBOL_REF
+	  && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee)))
+	{
+	  if (lookup_attribute
+	      ("l1_text",
+	       DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
+	    callee_in_sram = 1;
+	  else if (lookup_attribute
+		   ("l2",
+		    DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
+	    callee_in_sram = 2;
+	}
+
+      if (GET_CODE (callee) != SYMBOL_REF
+	  || bfin_longcall_p (callee, INTVAL (cookie))
+	  || (GET_CODE (callee) == SYMBOL_REF
+	      && !SYMBOL_REF_LOCAL_P (callee)
+	      && TARGET_INLINE_PLT)
+	  || caller_in_sram != callee_in_sram
+	  || (caller_in_sram && callee_in_sram
+	      && (GET_CODE (callee) != SYMBOL_REF
+		  || !SYMBOL_REF_LOCAL_P (callee))))
+	{
+	  rtx addr = callee;
+	  if (! address_operand (addr, Pmode))
+	    addr = force_reg (Pmode, addr);
+
+	  fnaddr = gen_reg_rtx (SImode);
+	  emit_insn (gen_load_funcdescsi (fnaddr, addr));
+	  fnaddr = gen_rtx_MEM (Pmode, fnaddr);
+
+	  picreg = gen_reg_rtx (SImode);
+	  emit_insn (gen_load_funcdescsi (picreg,
+					  plus_constant (addr, 4)));
+	}
+
+      nelts++;
+    }
+  else if ((!register_no_elim_operand (callee, Pmode)
+	    && GET_CODE (callee) != SYMBOL_REF)
+	   || (GET_CODE (callee) == SYMBOL_REF
+	       && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY)
+		   || bfin_longcall_p (callee, INTVAL (cookie)))))
+    {
+      callee = copy_to_mode_reg (Pmode, callee);
+      fnaddr = gen_rtx_MEM (Pmode, callee);
+    }
+  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+
+  if (retval)
+    call = gen_rtx_SET (VOIDmode, retval, call);
+
+  pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts));
+  n = 0;
+  XVECEXP (pat, 0, n++) = call;
+  if (TARGET_FDPIC)
+    XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
+  XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
+  if (sibcall)
+    XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode);
+  else
+    XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
+  call = emit_call_insn (pat);
+  if (use)
+    CALL_INSN_FUNCTION_USAGE (call) = use;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+
+int
+hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* Allow only dregs to store value of mode HI or QI */
+  enum reg_class rclass = REGNO_REG_CLASS (regno);
+
+  if (mode == CCmode)
+    return 0;
+
+  if (mode == V2HImode)
+    return D_REGNO_P (regno);
+  if (rclass == CCREGS)
+    return mode == BImode;
+  if (mode == PDImode || mode == V2PDImode)
+    return regno == REG_A0 || regno == REG_A1;
+
+  /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
+     up with a bad register class (such as ALL_REGS) for DImode.  */
+  if (mode == DImode)
+    return regno < REG_M3;
+
+  if (mode == SImode
+      && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno))
+    return 1;
+
+  return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+
+static bool
+bfin_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mode == V2HImode;
+}
+
+/* Return the cost of moving data from a register in class CLASS1 to
+   one in class CLASS2.  A cost of 2 is the default.  */
+
+int
+bfin_register_move_cost (enum machine_mode mode,
+			 enum reg_class class1, enum reg_class class2)
+{
+  /* These need secondary reloads, so they're more expensive.  */
+  if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
+      || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
+    return 4;
+
+  /* If optimizing for size, always prefer reg-reg over reg-memory moves.  */
+  if (optimize_size)
+    return 2;
+
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    {
+      /* Discourage trying to use the accumulators.  */
+      if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0)
+	  || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1)
+	  || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0)
+	  || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1))
+	return 20;
+    }
+  return 2;
+}
+
+/* Return the cost of moving data of mode M between a
+   register and memory.  A value of 2 is the default; this cost is
+   relative to those in `REGISTER_MOVE_COST'.
+
+   ??? In theory L1 memory has single-cycle latency.  We should add a switch
+   that tells the compiler whether we expect to use only L1 memory for the
+   program; it'll make the costs more accurate.  */
+
+int
+bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       enum reg_class rclass,
+		       int in ATTRIBUTE_UNUSED)
+{
+  /* Make memory accesses slightly more expensive than any register-register
+     move.  Also, penalize non-DP registers, since they need secondary
+     reloads to load and store.  */
+  if (! reg_class_subset_p (rclass, DPREGS))
+    return 10;
+
+  return 8;
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch register.  Return the class needed for the
+   scratch register.  */
+
+static reg_class_t
+bfin_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		       enum machine_mode mode, secondary_reload_info *sri)
+{
+  /* If we have HImode or QImode, we can only use DREGS as secondary registers;
+     in most other cases we can also use PREGS.  */
+  enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS;
+  enum reg_class x_class = NO_REGS;
+  enum rtx_code code = GET_CODE (x);
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  if (code == SUBREG)
+    x = SUBREG_REG (x), code = GET_CODE (x);
+  if (REG_P (x))
+    {
+      int regno = REGNO (x);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	regno = reg_renumber[regno];
+
+      if (regno == -1)
+	code = MEM;
+      else
+	x_class = REGNO_REG_CLASS (regno);
+    }
+
+  /* We can be asked to reload (plus (FP) (large_constant)) into a DREG.
+     This happens as a side effect of register elimination, and we need
+     a scratch register to do it.  */
+  if (fp_plus_const_operand (x, mode))
+    {
+      rtx op2 = XEXP (x, 1);
+      int large_constant_p = ! satisfies_constraint_Ks7 (op2);
+
+      if (rclass == PREGS || rclass == PREGS_CLOBBERED)
+	return NO_REGS;
+      /* If destination is a DREG, we can do this without a scratch register
+	 if the constant is valid for an add instruction.  */
+      if ((rclass == DREGS || rclass == DPREGS)
+	  && ! large_constant_p)
+	return NO_REGS;
+      /* Reloading to anything other than a DREG?  Use a PREG scratch
+	 register.  */
+      sri->icode = CODE_FOR_reload_insi;
+      return NO_REGS;
+    }
+
+  /* Data can usually be moved freely between registers of most classes.
+     AREGS are an exception; they can only move to or from another register
+     in AREGS or one in DREGS.  They can also be assigned the constant 0.  */
+  if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
+    return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
+	    || rclass == ODD_AREGS
+	    ? NO_REGS : DREGS);
+
+  if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
+    {
+      if (code == MEM)
+	{
+	  sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi;
+	  return NO_REGS;
+	}
+
+      if (x != const0_rtx && x_class != DREGS)
+	{
+	  return DREGS;
+	}
+      else
+	return NO_REGS;
+    }
+
+  /* CCREGS can only be moved from/to DREGS.  */
+  if (rclass == CCREGS && x_class != DREGS)
+    return DREGS;
+  if (x_class == CCREGS && rclass != DREGS)
+    return DREGS;
+
+  /* All registers other than AREGS can load arbitrary constants.  The only
+     case that remains is MEM.  */
+  if (code == MEM)
+    if (! reg_class_subset_p (rclass, default_class))
+      return default_class;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+bfin_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+      case PREGS_CLOBBERED:
+      case PROLOGUE_REGS:
+      case P0REGS:
+      case D0REGS:
+      case D1REGS:
+      case D2REGS:
+      case CCREGS:
+        return true;
+
+      default:
+        break;
+    }
+
+  return false;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+bfin_handle_option (size_t code, const char *arg, int value)
+{
+  switch (code)
+    {
+    case OPT_mshared_library_id_:
+      if (value > MAX_LIBRARY_ID)
+	error ("-mshared-library-id=%s is not between 0 and %d",
+	       arg, MAX_LIBRARY_ID);
+      bfin_lib_id_given = 1;
+      return true;
+
+    case OPT_mcpu_:
+      {
+	const char *p, *q;
+	int i;
+
+	i = 0;
+	while ((p = bfin_cpus[i].name) != NULL)
+	  {
+	    if (strncmp (arg, p, strlen (p)) == 0)
+	      break;
+	    i++;
+	  }
+
+	if (p == NULL)
+	  {
+	    error ("-mcpu=%s is not valid", arg);
+	    return false;
+	  }
+
+	bfin_cpu_type = bfin_cpus[i].type;
+
+	q = arg + strlen (p);
+
+	if (*q == '\0')
+	  {
+	    bfin_si_revision = bfin_cpus[i].si_revision;
+	    bfin_workarounds |= bfin_cpus[i].workarounds;
+	  }
+	else if (strcmp (q, "-none") == 0)
+	  bfin_si_revision = -1;
+      	else if (strcmp (q, "-any") == 0)
+	  {
+	    bfin_si_revision = 0xffff;
+	    while (bfin_cpus[i].type == bfin_cpu_type)
+	      {
+		bfin_workarounds |= bfin_cpus[i].workarounds;
+		i++;
+	      }
+	  }
+	else
+	  {
+	    unsigned int si_major, si_minor;
+	    int rev_len, n;
+
+	    rev_len = strlen (q);
+
+	    if (sscanf (q, "-%u.%u%n", &si_major, &si_minor, &n) != 2
+		|| n != rev_len
+		|| si_major > 0xff || si_minor > 0xff)
+	      {
+	      invalid_silicon_revision:
+		error ("-mcpu=%s has invalid silicon revision", arg);
+		return false;
+	      }
+
+	    bfin_si_revision = (si_major << 8) | si_minor;
+
+	    while (bfin_cpus[i].type == bfin_cpu_type
+		   && bfin_cpus[i].si_revision != bfin_si_revision)
+	      i++;
+
+	    if (bfin_cpus[i].type != bfin_cpu_type)
+	      goto invalid_silicon_revision;
+
+	    bfin_workarounds |= bfin_cpus[i].workarounds;
+	  }
+
+	return true;
+      }
+
+    default:
+      return true;
+    }
+}
+
+static struct machine_function *
+bfin_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook.  */
+
+static void
+bfin_option_override (void)
+{
+  /* If processor type is not specified, enable all workarounds.  */
+  if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
+    {
+      int i;
+
+      for (i = 0; bfin_cpus[i].name != NULL; i++)
+	bfin_workarounds |= bfin_cpus[i].workarounds;
+
+      bfin_si_revision = 0xffff;
+    }
+
+  if (bfin_csync_anomaly == 1)
+    bfin_workarounds |= WA_SPECULATIVE_SYNCS;
+  else if (bfin_csync_anomaly == 0)
+    bfin_workarounds &= ~WA_SPECULATIVE_SYNCS;
+
+  if (bfin_specld_anomaly == 1)
+    bfin_workarounds |= WA_SPECULATIVE_LOADS;
+  else if (bfin_specld_anomaly == 0)
+    bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
+
+  if (TARGET_OMIT_LEAF_FRAME_POINTER)
+    flag_omit_frame_pointer = 1;
+
+  /* Library identification */
+  if (bfin_lib_id_given && ! TARGET_ID_SHARED_LIBRARY)
+    error ("-mshared-library-id= specified without -mid-shared-library");
+
+  if (stack_limit_rtx && TARGET_STACK_CHECK_L1)
+    error ("can%'t use multiple stack checking methods together");
+
+  if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC)
+    error ("ID shared libraries and FD-PIC mode can%'t be used together");
+
+  /* Don't allow the user to specify -mid-shared-library and -msep-data
+     together, as it makes little sense from a user's point of view...  */
+  if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
+    error ("cannot specify both -msep-data and -mid-shared-library");
+  /* ... internally, however, it's nearly the same.  */
+  if (TARGET_SEP_DATA)
+    target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY;
+
+  if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0)
+    flag_pic = 1;
+
+  /* There is no single unaligned SI op for PIC code.  Sometimes we
+     need to use ".4byte" and sometimes we need to use ".picptr".
+     See bfin_assemble_integer for details.  */
+  if (TARGET_FDPIC)
+    targetm.asm_out.unaligned_op.si = 0;
+
+  /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries,
+     since we don't support it and it'll just break.  */
+  if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
+    flag_pic = 0;
+
+  if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
+    error ("-mmulticore can only be used with BF561");
+
+  if (TARGET_COREA && !TARGET_MULTICORE)
+    error ("-mcorea should be used with -mmulticore");
+
+  if (TARGET_COREB && !TARGET_MULTICORE)
+    error ("-mcoreb should be used with -mmulticore");
+
+  if (TARGET_COREA && TARGET_COREB)
+    error ("-mcorea and -mcoreb can%'t be used together");
+
+  flag_schedule_insns = 0;
+
+  init_machine_status = bfin_init_machine_status;
+}
+
+/* Return the destination address of BRANCH.
+   We need to use this instead of get_attr_length, because the
+   cbranch_with_nops pattern conservatively sets its length to 6, and
+   we still prefer to use shorter sequences.  */
+
+static int
+branch_dest (rtx branch)
+{
+  rtx dest;
+  int dest_uid;
+  rtx pat = PATTERN (branch);
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  dest = SET_SRC (pat);
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, 1);
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+  return INSN_ADDRESSES (dest_uid);
+}
+
+/* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates
+   it's a branch that's predicted taken.  */
+
+static int
+cbranch_predicted_taken_p (rtx insn)
+{
+  rtx x = find_reg_note (insn, REG_BR_PROB, 0);
+
+  if (x)
+    {
+      int pred_val = INTVAL (XEXP (x, 0));
+
+      return pred_val >= REG_BR_PROB_BASE / 2;
+    }
+
+  return 0;
+}
+
+/* Templates for use by asm_conditional_branch.  */
+
+static const char *ccbranch_templates[][3] = {
+  { "if !cc jump %3;",  "if cc jump 4 (bp); jump.s %3;",  "if cc jump 6 (bp); jump.l %3;" },
+  { "if cc jump %3;",   "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" },
+  { "if !cc jump %3 (bp);",  "if cc jump 4; jump.s %3;",  "if cc jump 6; jump.l %3;" },
+  { "if cc jump %3 (bp);",  "if !cc jump 4; jump.s %3;",  "if !cc jump 6; jump.l %3;" },
+};
+
+/* Output INSN, which is a conditional branch instruction with operands
+   OPERANDS.
+
+   We deal with the various forms of conditional branches that can be generated
+   by bfin_reorg to prevent the hardware from doing speculative loads, by
+   - emitting a sufficient number of nops, if N_NOPS is nonzero, or
+   - always emitting the branch as predicted taken, if PREDICT_TAKEN is true.
+   Either of these is only necessary if the branch is short, otherwise the
+   template we use ends in an unconditional jump which flushes the pipeline
+   anyway.  */
+
+void
+asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken)
+{
+  int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+  /* Note : offset for instructions like if cc jmp; jump.[sl] offset
+            is to be taken from start of if cc rather than jump.
+            Range for jump.s is (-4094, 4096) instead of (-4096, 4094)
+  */
+  int len = (offset >= -1024 && offset <= 1022 ? 0
+	     : offset >= -4094 && offset <= 4096 ? 1
+	     : 2);
+  int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn);
+  int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT);
+  output_asm_insn (ccbranch_templates[idx][len], operands);
+  gcc_assert (n_nops == 0 || !bp);
+  if (len == 0)
+    while (n_nops-- > 0)
+      output_asm_insn ("nop;", NULL);
+}
+
+/* Emit rtl for a comparison operation CMP in mode MODE.  Operands have been
+   stored in bfin_compare_op0 and bfin_compare_op1 already.  */
+
+rtx
+bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code1, code2;
+  rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
+  rtx tem = bfin_cc_rtx;
+  enum rtx_code code = GET_CODE (cmp);
+
+  /* If we have a BImode input, then we already have a compare result, and
+     do not need to emit another comparison.  */
+  if (GET_MODE (op0) == BImode)
+    {
+      gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
+      tem = op0, code2 = code;
+    }
+  else
+    {
+      switch (code) {
+	/* bfin has these conditions */
+      case EQ:
+      case LT:
+      case LE:
+      case LEU:
+      case LTU:
+	code1 = code;
+	code2 = NE;
+	break;
+      default:
+	code1 = reverse_condition (code);
+	code2 = EQ;
+	break;
+      }
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_fmt_ee (code1, BImode, op0, op1)));
+    }
+
+  return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode));
+}
+
+/* Return nonzero iff C has exactly one bit set if it is interpreted
+   as a 32-bit constant.  */
+
+int
+log2constp (unsigned HOST_WIDE_INT c)
+{
+  c &= 0xFFFFFFFF;
+  return c != 0 && (c & (c-1)) == 0;
+}
+
+/* Returns the number of consecutive least significant zeros in the binary
+   representation of *V.
+   We modify *V to contain the original value arithmetically shifted right by
+   the number of zeroes.  */
+
+static int
+shiftr_zero (HOST_WIDE_INT *v)
+{
+  unsigned HOST_WIDE_INT tmp = *v;
+  unsigned HOST_WIDE_INT sgn;
+  int n = 0;
+
+  if (tmp == 0)
+    return 0;
+
+  sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1));
+  while ((tmp & 0x1) == 0 && n <= 32)
+    {
+      tmp = (tmp >> 1) | sgn;
+      n++;
+    }
+  *v = tmp;
+  return n;
+}
+
+/* After reload, split the load of an immediate constant.  OPERANDS are the
+   operands of the movsi_insn pattern which we are splitting.  We return
+   nonzero if we emitted a sequence to load the constant, zero if we emitted
+   nothing because we want to use the splitter's default sequence.  */
+
+int
+split_load_immediate (rtx operands[])
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+  HOST_WIDE_INT tmp;
+  HOST_WIDE_INT shifted = val;
+  HOST_WIDE_INT shifted_compl = ~val;
+  int num_zero = shiftr_zero (&shifted);
+  int num_compl_zero = shiftr_zero (&shifted_compl);
+  unsigned int regno = REGNO (operands[0]);
+
+  /* This case takes care of single-bit set/clear constants, which we could
+     also implement with BITSET/BITCLR.  */
+  if (num_zero
+      && shifted >= -32768 && shifted < 65536
+      && (D_REGNO_P (regno)
+	  || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2)))
+    {
+      emit_insn (gen_movsi (operands[0], GEN_INT (shifted)));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero)));
+      return 1;
+    }
+
+  tmp = val & 0xFFFF;
+  tmp |= -(tmp & 0x8000);
+
+  /* If high word has one bit set or clear, try to use a bit operation.  */
+  if (D_REGNO_P (regno))
+    {
+      if (log2constp (val & 0xFFFF0000))
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF)));
+	  emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000)));
+	  return 1;
+	}
+      else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0)
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
+	  emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF)));
+	}
+    }
+
+  if (D_REGNO_P (regno))
+    {
+      if (tmp >= -64 && tmp <= 63)
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
+	  emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536)));
+	  return 1;
+	}
+
+      if ((val & 0xFFFF0000) == 0)
+	{
+	  emit_insn (gen_movsi (operands[0], const0_rtx));
+	  emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
+	  return 1;
+	}
+
+      if ((val & 0xFFFF0000) == 0xFFFF0000)
+	{
+	  emit_insn (gen_movsi (operands[0], constm1_rtx));
+	  emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
+	  return 1;
+	}
+    }
+
+  /* Need DREGs for the remaining case.  */
+  if (regno > REG_R7)
+    return 0;
+
+  if (optimize_size
+      && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
+    {
+      /* If optimizing for size, generate a sequence that has more instructions
+	 but is shorter.  */
+      emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl)));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0],
+			      GEN_INT (num_compl_zero)));
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+      return 1;
+    }
+  return 0;
+}
+
+/* Return true if the legitimate memory address for a memory operand of mode
+   MODE.  Return false if not.  */
+
+static bool
+bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value)
+{
+  unsigned HOST_WIDE_INT v = value > 0 ? value : -value;
+  int sz = GET_MODE_SIZE (mode);
+  int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2;
+  /* The usual offsettable_memref machinery doesn't work so well for this
+     port, so we deal with the problem here.  */
+  if (value > 0 && sz == 8)
+    v += 4;
+  return (v & ~(0x7fff << shift)) == 0;
+}
+
+static bool
+bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode,
+		  enum rtx_code outer_code)
+{
+  if (strict)
+    return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH);
+  else
+    return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
+}
+
+/* Recognize an RTL expression that is a valid memory address for an
+   instruction.  The MODE argument is the machine mode for the MEM expression
+   that wants to use this address. 
+
+   Blackfin addressing modes are as follows:
+
+      [preg]
+      [preg + imm16]
+
+      B [ Preg + uimm15 ]
+      W [ Preg + uimm16m2 ]
+      [ Preg + uimm17m4 ] 
+
+      [preg++]
+      [preg--]
+      [--sp]
+*/
+
+static bool
+bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  switch (GET_CODE (x)) {
+  case REG:
+    if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM))
+      return true;
+    break;
+  case PLUS:
+    if (REG_P (XEXP (x, 0))
+	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS)
+	&& ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode)
+	    || (GET_CODE (XEXP (x, 1)) == CONST_INT
+		&& bfin_valid_add (mode, INTVAL (XEXP (x, 1))))))
+      return true;
+    break;
+  case POST_INC:
+  case POST_DEC:
+    if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
+	&& REG_P (XEXP (x, 0))
+	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC))
+      return true;
+  case PRE_DEC:
+    if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
+	&& XEXP (x, 0) == stack_pointer_rtx
+	&& REG_P (XEXP (x, 0))
+	&& bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC))
+      return true;
+    break;
+  default:
+    break;
+  }
+  return false;
+}
+
+/* Decide whether we can force certain constants to memory.  If we
+   decide we can't, the caller should be able to cope with it in
+   another way.  */
+
+static bool
+bfin_cannot_force_const_mem (rtx x ATTRIBUTE_UNUSED)
+{
+  /* We have only one class of non-legitimate constants, and our movsi
+     expander knows how to handle them.  Dropping these constants into the
+     data section would only shift the problem - we'd still get relocs
+     outside the object, in the data section rather than the text section.  */
+  return true;
+}
+
+/* Ensure that for any constant of the form symbol + offset, the offset
+   remains within the object.  Any other constants are ok.
+   This ensures that flat binaries never have to deal with relocations
+   crossing section boundaries.  */
+
+bool
+bfin_legitimate_constant_p (rtx x)
+{
+  rtx sym;
+  HOST_WIDE_INT offset;
+
+  if (GET_CODE (x) != CONST)
+    return true;
+
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == PLUS);
+
+  sym = XEXP (x, 0);
+  x = XEXP (x, 1);
+  if (GET_CODE (sym) != SYMBOL_REF
+      || GET_CODE (x) != CONST_INT)
+    return true;
+  offset = INTVAL (x);
+
+  if (SYMBOL_REF_DECL (sym) == 0)
+    return true;
+  if (offset < 0
+      || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym))))
+    return false;
+
+  return true;
+}
+
+static bool
+bfin_rtx_costs (rtx x, int code_i, int outer_code_i, int *total, bool speed)
+{
+  enum rtx_code code = (enum rtx_code) code_i;
+  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
+  int cost2 = COSTS_N_INSNS (1);
+  rtx op0, op1;
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (outer_code == SET || outer_code == PLUS)
+        *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
+      else if (outer_code == AND)
+        *total = log2constp (~INTVAL (x)) ? 0 : cost2;
+      else if (outer_code == LE || outer_code == LT || outer_code == EQ)
+        *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2;
+      else if (outer_code == LEU || outer_code == LTU)
+        *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2;
+      else if (outer_code == MULT)
+        *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2;
+      else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2))
+        *total = 0;
+      else if (outer_code == ASHIFT || outer_code == ASHIFTRT
+	       || outer_code == LSHIFTRT)
+        *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2;
+      else if (outer_code == IOR || outer_code == XOR)
+        *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2;
+      else
+	*total = cost2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_MODE (x) == SImode)
+	{
+	  if (GET_CODE (op0) == MULT
+	      && GET_CODE (XEXP (op0, 1)) == CONST_INT)
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
+	      if (val == 2 || val == 4)
+		{
+		  *total = cost2;
+		  *total += rtx_cost (XEXP (op0, 0), outer_code, speed);
+		  *total += rtx_cost (op1, outer_code, speed);
+		  return true;
+		}
+	    }
+	  *total = cost2;
+	  if (GET_CODE (op0) != REG
+	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	    *total += rtx_cost (op0, SET, speed);
+#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
+	 towards creating too many induction variables.  */
+	  if (!reg_or_7bit_operand (op1, SImode))
+	    *total += rtx_cost (op1, SET, speed);
+#endif
+	}
+      else if (GET_MODE (x) == DImode)
+	{
+	  *total = 6 * cost2;
+	  if (GET_CODE (op1) != CONST_INT
+	      || !satisfies_constraint_Ks7 (op1))
+	    *total += rtx_cost (op1, PLUS, speed);
+	  if (GET_CODE (op0) != REG
+	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	    *total += rtx_cost (op0, PLUS, speed);
+	}
+      return true;
+
+    case MINUS:
+      if (GET_MODE (x) == DImode)
+	*total = 6 * cost2;
+      else
+	*total = cost2;
+      return true;
+      
+    case ASHIFT: 
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (GET_MODE (x) == DImode)
+	*total = 6 * cost2;
+      else
+	*total = cost2;
+
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_CODE (op0) != REG
+	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	*total += rtx_cost (op0, code, speed);
+
+      return true;
+	  
+    case IOR:
+    case AND:
+    case XOR:
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+
+      /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high.  */
+      if (code == IOR)
+	{
+	  if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
+	      || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
+	      || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
+	      || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
+	    {
+	      *total = cost2;
+	      return true;
+	    }
+	}
+
+      if (GET_CODE (op0) != REG
+	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	*total += rtx_cost (op0, code, speed);
+
+      if (GET_MODE (x) == DImode)
+	{
+	  *total = 2 * cost2;
+	  return true;
+	}
+      *total = cost2;
+      if (GET_MODE (x) != SImode)
+	return true;
+
+      if (code == AND)
+	{
+	  if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
+	    *total += rtx_cost (XEXP (x, 1), code, speed);
+	}
+      else
+	{
+	  if (! regorlog2_operand (XEXP (x, 1), SImode))
+	    *total += rtx_cost (XEXP (x, 1), code, speed);
+	}
+
+      return true;
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      if (outer_code == SET
+	  && XEXP (x, 1) == const1_rtx
+	  && GET_CODE (XEXP (x, 2)) == CONST_INT)
+	{
+	  *total = 2 * cost2;
+	  return true;
+	}
+      /* fall through */
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      *total = cost2;
+      return true;
+
+    case MULT:
+	{
+	  op0 = XEXP (x, 0);
+	  op1 = XEXP (x, 1);
+	  if (GET_CODE (op0) == GET_CODE (op1)
+	      && (GET_CODE (op0) == ZERO_EXTEND
+		  || GET_CODE (op0) == SIGN_EXTEND))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      op0 = XEXP (op0, 0);
+	      op1 = XEXP (op1, 0);
+	    }
+	  else if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (3);
+
+	  if (GET_CODE (op0) != REG
+	      && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
+	    *total += rtx_cost (op0, MULT, speed);
+	  if (GET_CODE (op1) != REG
+	      && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
+	    *total += rtx_cost (op1, MULT, speed);
+	}
+      return true;
+
+    case UDIV:
+    case UMOD:
+      *total = COSTS_N_INSNS (32);
+      return true;
+
+    case VEC_CONCAT:
+    case VEC_SELECT:
+      if (outer_code == SET)
+	*total = cost2;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Used for communication between {push,pop}_multiple_operation (which
+   we use not only as a predicate) and the corresponding output functions.  */
+static int first_preg_to_save, first_dreg_to_save;
+static int n_regs_to_save;
+
+int
+push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int lastdreg = 8, lastpreg = 6;
+  int i, group;
+
+  first_preg_to_save = lastpreg;
+  first_dreg_to_save = lastdreg;
+  for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++)
+    {
+      rtx t = XVECEXP (op, 0, i);
+      rtx src, dest;
+      int regno;
+
+      if (GET_CODE (t) != SET)
+	return 0;
+
+      src = SET_SRC (t);
+      dest = SET_DEST (t);
+      if (GET_CODE (dest) != MEM || ! REG_P (src))
+	return 0;
+      dest = XEXP (dest, 0);
+      if (GET_CODE (dest) != PLUS
+	  || ! REG_P (XEXP (dest, 0))
+	  || REGNO (XEXP (dest, 0)) != REG_SP
+	  || GET_CODE (XEXP (dest, 1)) != CONST_INT
+	  || INTVAL (XEXP (dest, 1)) != -i * 4)
+	return 0;
+
+      regno = REGNO (src);
+      if (group == 0)
+	{
+	  if (D_REGNO_P (regno))
+	    {
+	      group = 1;
+	      first_dreg_to_save = lastdreg = regno - REG_R0;
+	    }
+	  else if (regno >= REG_P0 && regno <= REG_P7)
+	    {
+	      group = 2;
+	      first_preg_to_save = lastpreg = regno - REG_P0;
+	    }
+	  else
+	    return 0;
+
+	  continue;
+	}
+
+      if (group == 1)
+	{
+	  if (regno >= REG_P0 && regno <= REG_P7)
+	    {
+	      group = 2;
+	      first_preg_to_save = lastpreg = regno - REG_P0;
+	    }
+	  else if (regno != REG_R0 + lastdreg + 1)
+	    return 0;
+	  else
+	    lastdreg++;
+	}
+      else if (group == 2)
+	{
+	  if (regno != REG_P0 + lastpreg + 1)
+	    return 0;
+	  lastpreg++;
+	}
+    }
+  n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
+  return 1;
+}
+
+int
+pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int lastdreg = 8, lastpreg = 6;
+  int i, group;
+
+  for (i = 1, group = 0; i < XVECLEN (op, 0); i++)
+    {
+      rtx t = XVECEXP (op, 0, i);
+      rtx src, dest;
+      int regno;
+
+      if (GET_CODE (t) != SET)
+	return 0;
+
+      src = SET_SRC (t);
+      dest = SET_DEST (t);
+      if (GET_CODE (src) != MEM || ! REG_P (dest))
+	return 0;
+      src = XEXP (src, 0);
+
+      if (i == 1)
+	{
+	  if (! REG_P (src) || REGNO (src) != REG_SP)
+	    return 0;
+	}
+      else if (GET_CODE (src) != PLUS
+	       || ! REG_P (XEXP (src, 0))
+	       || REGNO (XEXP (src, 0)) != REG_SP
+	       || GET_CODE (XEXP (src, 1)) != CONST_INT
+	       || INTVAL (XEXP (src, 1)) != (i - 1) * 4)
+	return 0;
+
+      regno = REGNO (dest);
+      if (group == 0)
+	{
+	  if (regno == REG_R7)
+	    {
+	      group = 1;
+	      lastdreg = 7;
+	    }
+	  else if (regno != REG_P0 + lastpreg - 1)
+	    return 0;
+	  else
+	    lastpreg--;
+	}
+      else if (group == 1)
+	{
+	  if (regno != REG_R0 + lastdreg - 1)
+	    return 0;
+	  else
+	    lastdreg--;
+	}
+    }
+  first_dreg_to_save = lastdreg;
+  first_preg_to_save = lastpreg;
+  n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
+  return 1;
+}
+
+/* Emit assembly code for one multi-register push described by INSN, with
+   operands in OPERANDS.  */
+
+void
+output_push_multiple (rtx insn, rtx *operands)
+{
+  char buf[80];
+  int ok;
+  
+  /* Validate the insn again, and compute first_[dp]reg_to_save. */
+  ok = push_multiple_operation (PATTERN (insn), VOIDmode);
+  gcc_assert (ok);
+  
+  if (first_dreg_to_save == 8)
+    sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
+  else if (first_preg_to_save == 6)
+    sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save);
+  else
+    sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n",
+	     first_dreg_to_save, first_preg_to_save);
+
+  output_asm_insn (buf, operands);
+}
+
+/* Emit assembly code for one multi-register pop described by INSN, with
+   operands in OPERANDS.  */
+
+void
+output_pop_multiple (rtx insn, rtx *operands)
+{
+  char buf[80];
+  int ok;
+  
+  /* Validate the insn again, and compute first_[dp]reg_to_save. */
+  ok = pop_multiple_operation (PATTERN (insn), VOIDmode);
+  gcc_assert (ok);
+
+  if (first_dreg_to_save == 8)
+    sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save);
+  else if (first_preg_to_save == 6)
+    sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save);
+  else
+    sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n",
+	     first_dreg_to_save, first_preg_to_save);
+
+  output_asm_insn (buf, operands);
+}
+
+/* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE.  */
+
+static void
+single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset)
+{
+  rtx scratch = gen_reg_rtx (mode);
+  rtx srcmem, dstmem;
+
+  srcmem = adjust_address_nv (src, mode, offset);
+  dstmem = adjust_address_nv (dst, mode, offset);
+  emit_move_insn (scratch, srcmem);
+  emit_move_insn (dstmem, scratch);
+}
+
+/* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with
+   alignment ALIGN_EXP.  Return true if successful, false if we should fall
+   back on a different method.  */
+
+bool
+bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
+{
+  rtx srcreg, destreg, countreg;
+  HOST_WIDE_INT align = 0;
+  unsigned HOST_WIDE_INT count = 0;
+
+  if (GET_CODE (align_exp) == CONST_INT)
+    align = INTVAL (align_exp);
+  if (GET_CODE (count_exp) == CONST_INT)
+    {
+      count = INTVAL (count_exp);
+#if 0
+      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+	return false;
+#endif
+    }
+
+  /* If optimizing for size, only do single copies inline.  */
+  if (optimize_size)
+    {
+      if (count == 2 && align < 2)
+	return false;
+      if (count == 4 && align < 4)
+	return false;
+      if (count != 1 && count != 2 && count != 4)
+	return false;
+    }
+  if (align < 2 && count != 1)
+    return false;
+
+  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  if (destreg != XEXP (dst, 0))
+    dst = replace_equiv_address_nv (dst, destreg);
+  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  if (srcreg != XEXP (src, 0))
+    src = replace_equiv_address_nv (src, srcreg);
+
+  if (count != 0 && align >= 2)
+    {
+      unsigned HOST_WIDE_INT offset = 0;
+
+      if (align >= 4)
+	{
+	  if ((count & ~3) == 4)
+	    {
+	      single_move_for_movmem (dst, src, SImode, offset);
+	      offset = 4;
+	    }
+	  else if (count & ~3)
+	    {
+	      HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1;
+	      countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
+
+	      emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
+	      cfun->machine->has_loopreg_clobber = true;
+	    }
+	  if (count & 2)
+	    {
+	      single_move_for_movmem (dst, src, HImode, offset);
+	      offset += 2;
+	    }
+	}
+      else
+	{
+	  if ((count & ~1) == 2)
+	    {
+	      single_move_for_movmem (dst, src, HImode, offset);
+	      offset = 2;
+	    }
+	  else if (count & ~1)
+	    {
+	      HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1;
+	      countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
+
+	      emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
+	      cfun->machine->has_loopreg_clobber = true;
+	    }
+	}
+      if (count & 1)
+	{
+	  single_move_for_movmem (dst, src, QImode, offset);
+	}
+      return true;
+    }
+  return false;
+}
+
+/* Compute the alignment for a local variable.
+   TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.  */
+
+unsigned
+bfin_local_alignment (tree type, unsigned align)
+{
+  /* Increasing alignment for (relatively) big types allows the builtin
+     memcpy can use 32 bit loads/stores.  */
+  if (TYPE_SIZE (type)
+      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8
+	  || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32)
+    return 32;
+  return align;
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+
+static int
+bfin_issue_rate (void)
+{
+  return 3;
+}
+
+static int
+bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type dep_insn_type;
+  int dep_insn_code_number;
+
+  /* Anti and output dependencies have zero cost.  */
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+
+  dep_insn_code_number = recog_memoized (dep_insn);
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+    return cost;
+
+  dep_insn_type = get_attr_type (dep_insn);
+
+  if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD)
+    {
+      rtx pat = PATTERN (dep_insn);
+      rtx dest, src;
+
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      dest = SET_DEST (pat);
+      src = SET_SRC (pat);
+      if (! ADDRESS_REGNO_P (REGNO (dest))
+	  || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
+	return cost;
+      return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
+    }
+
+  return cost;
+}
+
+/* This function acts like NEXT_INSN, but is aware of three-insn bundles and
+   skips all subsequent parallel instructions if INSN is the start of such
+   a group.  */
+static rtx
+find_next_insn_start (rtx insn)
+{
+  if (GET_MODE (insn) == SImode)
+    {
+      while (GET_MODE (insn) != QImode)
+	insn = NEXT_INSN (insn);
+    }
+  return NEXT_INSN (insn);
+}
+
+/* This function acts like PREV_INSN, but is aware of three-insn bundles and
+   skips all subsequent parallel instructions if INSN is the start of such
+   a group.  */
+static rtx
+find_prev_insn_start (rtx insn)
+{
+  insn = PREV_INSN (insn);
+  gcc_assert (GET_MODE (insn) != SImode);
+  if (GET_MODE (insn) == QImode)
+    {
+      while (GET_MODE (PREV_INSN (insn)) == SImode)
+	insn = PREV_INSN (insn);
+    }
+  return insn;
+}
+
+/* Increment the counter for the number of loop instructions in the
+   current function.  */
+
+void
+bfin_hardware_loop (void)
+{
+  cfun->machine->has_hardware_loops++;
+}
+
+/* Maximum loop nesting depth.  */
+#define MAX_LOOP_DEPTH 2
+
+/* Maximum size of a loop.  */
+#define MAX_LOOP_LENGTH 2042
+
+/* Maximum distance of the LSETUP instruction from the loop start.  */
+#define MAX_LSETUP_DISTANCE 30
+
+/* We need to keep a vector of loops */
+typedef struct loop_info_d *loop_info;
+DEF_VEC_P (loop_info);
+DEF_VEC_ALLOC_P (loop_info,heap);
+
+/* Information about a loop we have found (or are in the process of
+   finding).  */
+struct GTY (()) loop_info_d
+{
+  /* loop number, for dumps */
+  int loop_no;
+
+  /* All edges that jump into and out of the loop.  */
+  VEC(edge,gc) *incoming;
+
+  /* We can handle two cases: all incoming edges have the same destination
+     block, or all incoming edges have the same source block.  These two
+     members are set to the common source or destination we found, or NULL
+     if different blocks were found.  If both are NULL the loop can't be
+     optimized.  */
+  basic_block incoming_src;
+  basic_block incoming_dest;
+
+  /* First block in the loop.  This is the one branched to by the loop_end
+     insn.  */
+  basic_block head;
+
+  /* Last block in the loop (the one with the loop_end insn).  */
+  basic_block tail;
+
+  /* The successor block of the loop.  This is the one the loop_end insn
+     falls into.  */
+  basic_block successor;
+
+  /* The last instruction in the tail.  */
+  rtx last_insn;
+
+  /* The loop_end insn.  */
+  rtx loop_end;
+
+  /* The iteration register.  */
+  rtx iter_reg;
+
+  /* The new label placed at the beginning of the loop. */
+  rtx start_label;
+
+  /* The new label placed at the end of the loop. */
+  rtx end_label;
+
+  /* The length of the loop.  */
+  int length;
+
+  /* The nesting depth of the loop.  */
+  int depth;
+
+  /* Nonzero if we can't optimize this loop.  */
+  int bad;
+
+  /* True if we have visited this loop.  */
+  int visited;
+
+  /* True if this loop body clobbers any of LC0, LT0, or LB0.  */
+  int clobber_loop0;
+
+  /* True if this loop body clobbers any of LC1, LT1, or LB1.  */
+  int clobber_loop1;
+
+  /* Next loop in the graph. */
+  struct loop_info_d *next;
+
+  /* Immediate outer loop of this loop.  */
+  struct loop_info_d *outer;
+
+  /* Vector of blocks only within the loop, including those within
+     inner loops.  */
+  VEC (basic_block,heap) *blocks;
+
+  /* Same information in a bitmap.  */
+  bitmap block_bitmap;
+
+  /* Vector of inner loops within this loop  */
+  VEC (loop_info,heap) *loops;
+};
+
+static void
+bfin_dump_loops (loop_info loops)
+{
+  loop_info loop;
+
+  for (loop = loops; loop; loop = loop->next)
+    {
+      loop_info i;
+      basic_block b;
+      unsigned ix;
+
+      fprintf (dump_file, ";; loop %d: ", loop->loop_no);
+      if (loop->bad)
+	fprintf (dump_file, "(bad) ");
+      fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
+
+      fprintf (dump_file, " blocks: [ ");
+      FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, b)
+	fprintf (dump_file, "%d ", b->index);
+      fprintf (dump_file, "] ");
+
+      fprintf (dump_file, " inner loops: [ ");
+      FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, i)
+	fprintf (dump_file, "%d ", i->loop_no);
+      fprintf (dump_file, "]\n");
+    }
+  fprintf (dump_file, "\n");
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for basic block
+   BB. Return true, if we find it.  */
+
+static bool
+bfin_bb_in_loop (loop_info loop, basic_block bb)
+{
+  return bitmap_bit_p (loop->block_bitmap, bb->index);
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for uses of
+   REG.  Return true, if we find any.  Don't count the loop's loop_end
+   insn if it matches LOOP_END.  */
+
+static bool
+bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
+{
+  unsigned ix;
+  basic_block bb;
+
+  FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb)
+    {
+      rtx insn;
+
+      for (insn = BB_HEAD (bb);
+	   insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+	  if (insn == loop_end)
+	    continue;
+	  if (reg_mentioned_p (reg, PATTERN (insn)))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* Estimate the length of INSN conservatively.  */
+
+static int
+length_for_loop (rtx insn)
+{
+  int length = 0;
+  if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
+    {
+      if (ENABLE_WA_SPECULATIVE_SYNCS)
+	length = 8;
+      else if (ENABLE_WA_SPECULATIVE_LOADS)
+	length = 6;
+    }
+  else if (LABEL_P (insn))
+    {
+      if (ENABLE_WA_SPECULATIVE_SYNCS)
+	length = 4;
+    }
+
+  if (NONDEBUG_INSN_P (insn))
+    length += get_attr_length (insn);
+
+  return length;
+}
+
+/* Optimize LOOP.  */
+
+static void
+bfin_optimize_loop (loop_info loop)
+{
+  basic_block bb;
+  loop_info inner;
+  rtx insn, last_insn;
+  rtx loop_init, start_label, end_label;
+  rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
+  rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
+  rtx lc_reg, lt_reg, lb_reg;
+  rtx seq, seq_end;
+  int length;
+  unsigned ix;
+  int inner_depth = 0;
+
+  if (loop->visited)
+    return;
+
+  loop->visited = 1;
+
+  if (loop->bad)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Every loop contains in its list of inner loops every loop nested inside
+     it, even if there are intermediate loops.  This works because we're doing
+     a depth-first search here and never visit a loop more than once.  */
+  FOR_EACH_VEC_ELT (loop_info, loop->loops, ix, inner)
+    {
+      bfin_optimize_loop (inner);
+
+      if (!inner->bad && inner_depth < inner->depth)
+	{
+	  inner_depth = inner->depth;
+
+	  loop->clobber_loop0 |= inner->clobber_loop0;
+	  loop->clobber_loop1 |= inner->clobber_loop1;
+	}
+    }
+
+  loop->depth = inner_depth + 1;
+  if (loop->depth > MAX_LOOP_DEPTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Get the loop iteration register.  */
+  iter_reg = loop->iter_reg;
+
+  if (!REG_P (iter_reg))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d iteration count not in a register\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+  scratchreg = NULL_RTX;
+  scratch_init = iter_reg;
+  scratch_init_insn = NULL_RTX;
+  if (!PREG_P (iter_reg) && loop->incoming_src)
+    {
+      basic_block bb_in = loop->incoming_src;
+      int i;
+      for (i = REG_P0; i <= REG_P5; i++)
+	if ((df_regs_ever_live_p (i)
+	     || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
+		 && call_used_regs[i]))
+	    && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
+	  {
+	    scratchreg = gen_rtx_REG (SImode, i);
+	    break;
+	  }
+      for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
+	   insn = PREV_INSN (insn))
+	{
+	  rtx set;
+	  if (NOTE_P (insn) || BARRIER_P (insn))
+	    continue;
+	  set = single_set (insn);
+	  if (set && rtx_equal_p (SET_DEST (set), iter_reg))
+	    {
+	      if (CONSTANT_P (SET_SRC (set)))
+		{
+		  scratch_init = SET_SRC (set);
+		  scratch_init_insn = insn;
+		}
+	      break;
+	    }
+	  else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
+	    break;
+	}
+    }
+
+  if (loop->incoming_src)
+    {
+      /* Make sure the predecessor is before the loop start label, as required by
+	 the LSETUP instruction.  */
+      length = 0;
+      insn = BB_END (loop->incoming_src);
+      /* If we have to insert the LSETUP before a jump, count that jump in the
+	 length.  */
+      if (VEC_length (edge, loop->incoming) > 1
+	  || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
+	{
+	  gcc_assert (JUMP_P (insn));
+	  insn = PREV_INSN (insn);
+	}
+
+      for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
+	length += length_for_loop (insn);
+
+      if (!insn)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
+		     loop->loop_no);
+	  goto bad_loop;
+	}
+
+      /* Account for the pop of a scratch register where necessary.  */
+      if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
+	  && ENABLE_WA_LOAD_LCREGS)
+	length += 2;
+
+      if (length > MAX_LSETUP_DISTANCE)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
+	  goto bad_loop;
+	}
+    }
+
+  /* Check if start_label appears before loop_end and calculate the
+     offset between them.  We calculate the length of instructions
+     conservatively.  */
+  length = 0;
+  for (insn = loop->start_label;
+       insn && insn != loop->loop_end;
+       insn = NEXT_INSN (insn))
+    length += length_for_loop (insn);
+
+  if (!insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  loop->length = length;
+  if (loop->length > MAX_LOOP_LENGTH)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Scan all the blocks to make sure they don't use iter_reg.  */
+  if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* Scan all the insns to see if the loop body clobber
+     any hardware loop registers. */
+
+  reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
+  reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
+  reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
+  reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
+  reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
+  reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
+
+  FOR_EACH_VEC_ELT (basic_block, loop->blocks, ix, bb)
+    {
+      rtx insn;
+
+      for (insn = BB_HEAD (bb);
+	   insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn))
+	    continue;
+
+	  if (reg_set_p (reg_lc0, insn)
+	      || reg_set_p (reg_lt0, insn)
+	      || reg_set_p (reg_lb0, insn))
+	    loop->clobber_loop0 = 1;
+	  
+	  if (reg_set_p (reg_lc1, insn)
+	      || reg_set_p (reg_lt1, insn)
+	      || reg_set_p (reg_lb1, insn))
+	    loop->clobber_loop1 |= 1;
+	}
+    }
+
+  if ((loop->clobber_loop0 && loop->clobber_loop1)
+      || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
+    {
+      loop->depth = MAX_LOOP_DEPTH + 1;
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d no loop reg available\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  /* There should be an instruction before the loop_end instruction
+     in the same basic block. And the instruction must not be
+     - JUMP
+     - CONDITIONAL BRANCH
+     - CALL
+     - CSYNC
+     - SSYNC
+     - Returns (RTS, RTN, etc.)  */
+
+  bb = loop->tail;
+  last_insn = find_prev_insn_start (loop->loop_end);
+
+  while (1)
+    {
+      for (; last_insn != BB_HEAD (bb);
+	   last_insn = find_prev_insn_start (last_insn))
+	if (NONDEBUG_INSN_P (last_insn))
+	  break;
+
+      if (last_insn != BB_HEAD (bb))
+	break;
+
+      if (single_pred_p (bb)
+	  && single_pred_edge (bb)->flags & EDGE_FALLTHRU
+	  && single_pred (bb) != ENTRY_BLOCK_PTR)
+	{
+	  bb = single_pred (bb);
+	  last_insn = BB_END (bb);
+	  continue;
+	}
+      else
+	{
+	  last_insn = NULL_RTX;
+	  break;
+	}
+    }
+
+  if (!last_insn)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has no last instruction\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+
+  if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last instruction\n",
+		 loop->loop_no);
+      goto bad_loop;
+    }
+  /* In all other cases, try to replace a bad last insn with a nop.  */
+  else if (JUMP_P (last_insn)
+	   || CALL_P (last_insn)
+	   || get_attr_type (last_insn) == TYPE_SYNC
+	   || get_attr_type (last_insn) == TYPE_CALL
+	   || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
+	   || recog_memoized (last_insn) == CODE_FOR_return_internal
+	   || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+	   || asm_noperands (PATTERN (last_insn)) >= 0)
+    {
+      if (loop->length + 2 > MAX_LOOP_LENGTH)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+	  goto bad_loop;
+	}
+      if (dump_file)
+	fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
+		 loop->loop_no);
+
+      last_insn = emit_insn_after (gen_forced_nop (), last_insn);
+    }
+
+  loop->last_insn = last_insn;
+
+  /* The loop is good for replacement.  */
+  start_label = loop->start_label;
+  end_label = gen_label_rtx ();
+  iter_reg = loop->iter_reg;
+
+  if (loop->depth == 1 && !loop->clobber_loop1)
+    {
+      lc_reg = reg_lc1;
+      lt_reg = reg_lt1;
+      lb_reg = reg_lb1;
+      loop->clobber_loop1 = 1;
+    }
+  else
+    {
+      lc_reg = reg_lc0;
+      lt_reg = reg_lt0;
+      lb_reg = reg_lb0;
+      loop->clobber_loop0 = 1;
+    }
+
+  loop->end_label = end_label;
+
+  /* Create a sequence containing the loop setup.  */
+  start_sequence ();
+
+  /* LSETUP only accepts P registers.  If we have one, we can use it,
+     otherwise there are several ways of working around the problem.
+     If we're not affected by anomaly 312, we can load the LC register
+     from any iteration register, and use LSETUP without initialization.
+     If we've found a P scratch register that's not live here, we can
+     instead copy the iter_reg into that and use an initializing LSETUP.
+     If all else fails, push and pop P0 and use it as a scratch.  */
+  if (P_REGNO_P (REGNO (iter_reg)))
+    {
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, iter_reg);
+      seq_end = emit_insn (loop_init);
+    }
+  else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
+    {
+      emit_insn (gen_movsi (lc_reg, iter_reg));
+      loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
+					       lb_reg, end_label,
+					       lc_reg);
+      seq_end = emit_insn (loop_init);
+    }
+  else if (scratchreg != NULL_RTX)
+    {
+      emit_insn (gen_movsi (scratchreg, scratch_init));
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, scratchreg);
+      seq_end = emit_insn (loop_init);
+      if (scratch_init_insn != NULL_RTX)
+	delete_insn (scratch_init_insn);
+    }
+  else
+    {
+      rtx p0reg = gen_rtx_REG (SImode, REG_P0);
+      rtx push = gen_frame_mem (SImode,
+				gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
+      rtx pop = gen_frame_mem (SImode,
+			       gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+      emit_insn (gen_movsi (push, p0reg));
+      emit_insn (gen_movsi (p0reg, scratch_init));
+      loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+					    lb_reg, end_label,
+					    lc_reg, p0reg);
+      emit_insn (loop_init);
+      seq_end = emit_insn (gen_movsi (p0reg, pop));
+      if (scratch_init_insn != NULL_RTX)
+	delete_insn (scratch_init_insn);
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; replacing loop %d initializer with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop_init);
+      fprintf (dump_file, ";; replacing loop %d terminator with\n",
+	       loop->loop_no);
+      print_rtl_single (dump_file, loop->loop_end);
+    }
+
+  /* If the loop isn't entered at the top, also create a jump to the entry
+     point.  */
+  if (!loop->incoming_src && loop->head != loop->incoming_dest)
+    {
+      rtx label = BB_HEAD (loop->incoming_dest);
+      /* If we're jumping to the final basic block in the loop, and there's
+	 only one cheap instruction before the end (typically an increment of
+	 an induction variable), we can just emit a copy here instead of a
+	 jump.  */
+      if (loop->incoming_dest == loop->tail
+	  && next_real_insn (label) == last_insn
+	  && asm_noperands (last_insn) < 0
+	  && GET_CODE (PATTERN (last_insn)) == SET)
+	{
+	  seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
+	}
+      else
+	{
+	  emit_jump_insn (gen_jump (label));
+	  seq_end = emit_barrier ();
+	}
+    }
+
+  seq = get_insns ();
+  end_sequence ();
+
+  if (loop->incoming_src)
+    {
+      rtx prev = BB_END (loop->incoming_src);
+      if (VEC_length (edge, loop->incoming) > 1
+	  || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
+	{
+	  gcc_assert (JUMP_P (prev));
+	  prev = PREV_INSN (prev);
+	}
+      emit_insn_after (seq, prev);
+    }
+  else
+    {
+      basic_block new_bb;
+      edge e;
+      edge_iterator ei;
+
+#ifdef ENABLE_CHECKING
+      if (loop->head != loop->incoming_dest)
+	{
+	  /* We aren't entering the loop at the top.  Since we've established
+	     that the loop is entered only at one point, this means there
+	     can't be fallthru edges into the head.  Any such fallthru edges
+	     would become invalid when we insert the new block, so verify
+	     that this does not in fact happen.  */
+	  FOR_EACH_EDGE (e, ei, loop->head->preds)
+	    gcc_assert (!(e->flags & EDGE_FALLTHRU));
+	}
+#endif
+
+      emit_insn_before (seq, BB_HEAD (loop->head));
+      seq = emit_label_before (gen_label_rtx (), seq);
+
+      new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
+      FOR_EACH_EDGE (e, ei, loop->incoming)
+	{
+	  if (!(e->flags & EDGE_FALLTHRU)
+	      || e->dest != loop->head)
+	    redirect_edge_and_branch_force (e, new_bb);
+	  else
+	    redirect_edge_succ (e, new_bb);
+	}
+      e = make_edge (new_bb, loop->head, 0);
+    }
+
+  delete_insn (loop->loop_end);
+  /* Insert the loop end label before the last instruction of the loop.  */
+  emit_label_before (loop->end_label, loop->last_insn);
+
+  return;
+
+ bad_loop:
+
+  if (dump_file)
+    fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
+
+  loop->bad = 1;
+
+  if (DPREG_P (loop->iter_reg))
+    {
+      /* If loop->iter_reg is a DREG or PREG, we can split it here
+	 without scratch register.  */
+      rtx insn, test;
+
+      emit_insn_before (gen_addsi3 (loop->iter_reg,
+				    loop->iter_reg,
+				    constm1_rtx),
+			loop->loop_end);
+
+      test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
+      insn = emit_jump_insn_before (gen_cbranchsi4 (test,
+						    loop->iter_reg, const0_rtx,
+						    loop->start_label),
+				    loop->loop_end);
+
+      JUMP_LABEL (insn) = loop->start_label;
+      LABEL_NUSES (loop->start_label)++;
+      delete_insn (loop->loop_end);
+    }
+}
+
+/* Called from bfin_reorg_loops when a potential loop end is found.  LOOP is
+   a newly set up structure describing the loop, it is this function's
+   responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
+   loop_end insn and its enclosing basic block.  */
+
+static void
+bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn)
+{
+  unsigned dwork = 0;
+  basic_block bb;
+  VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20);
+
+  loop->tail = tail_bb;
+  loop->head = BRANCH_EDGE (tail_bb)->dest;
+  loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
+  loop->loop_end = tail_insn;
+  loop->last_insn = NULL_RTX;
+  loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1));
+  loop->depth = loop->length = 0;
+  loop->visited = 0;
+  loop->clobber_loop0 = loop->clobber_loop1 = 0;
+  loop->outer = NULL;
+  loop->loops = NULL;
+  loop->incoming = VEC_alloc (edge, gc, 2);
+  loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
+  loop->end_label = NULL_RTX;
+  loop->bad = 0;
+
+  VEC_safe_push (basic_block, heap, works, loop->head);
+
+  while (VEC_iterate (basic_block, works, dwork++, bb))
+    {
+      edge e;
+      edge_iterator ei;
+      if (bb == EXIT_BLOCK_PTR)
+	{
+	  /* We've reached the exit block.  The loop must be bad. */
+	  if (dump_file)
+	    fprintf (dump_file,
+		     ";; Loop is bad - reached exit block while scanning\n");
+	  loop->bad = 1;
+	  break;
+	}
+
+      if (!bitmap_set_bit (loop->block_bitmap, bb->index))
+	continue;
+
+      /* We've not seen this block before.  Add it to the loop's
+	 list and then add each successor to the work list.  */
+
+      VEC_safe_push (basic_block, heap, loop->blocks, bb);
+
+      if (bb != tail_bb)
+	{
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    {
+	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
+	      if (!REGNO_REG_SET_P (df_get_live_in (succ),
+				    REGNO (loop->iter_reg)))
+		continue;
+	      if (!VEC_space (basic_block, works, 1))
+		{
+		  if (dwork)
+		    {
+		      VEC_block_remove (basic_block, works, 0, dwork);
+		      dwork = 0;
+		    }
+		  else
+		    VEC_reserve (basic_block, heap, works, 1);
+		}
+	      VEC_quick_push (basic_block, works, succ);
+	    }
+	}
+    }
+
+  /* Find the predecessor, and make sure nothing else jumps into this loop.  */
+  if (!loop->bad)
+    {
+      int pass, retry;
+      FOR_EACH_VEC_ELT (basic_block, loop->blocks, dwork, bb)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    {
+	      basic_block pred = e->src;
+
+	      if (!bfin_bb_in_loop (loop, pred))
+		{
+		  if (dump_file)
+		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
+			     loop->loop_no, pred->index,
+			     e->dest->index);
+		  VEC_safe_push (edge, gc, loop->incoming, e);
+		}
+	    }
+	}
+
+      for (pass = 0, retry = 1; retry && pass < 2; pass++)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  bool first = true;
+	  retry = 0;
+
+	  FOR_EACH_EDGE (e, ei, loop->incoming)
+	    {
+	      if (first)
+		{
+		  loop->incoming_src = e->src;
+		  loop->incoming_dest = e->dest;
+		  first = false;
+		}
+	      else
+		{
+		  if (e->dest != loop->incoming_dest)
+		    loop->incoming_dest = NULL;
+		  if (e->src != loop->incoming_src)
+		    loop->incoming_src = NULL;
+		}
+	      if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
+		{
+		  if (pass == 0)
+		    {
+		      if (dump_file)
+			fprintf (dump_file,
+				 ";; retrying loop %d with forwarder blocks\n",
+				 loop->loop_no);
+		      retry = 1;
+		      break;
+		    }
+		  loop->bad = 1;
+		  if (dump_file)
+		    fprintf (dump_file,
+			     ";; can't find suitable entry for loop %d\n",
+			     loop->loop_no);
+		  goto out;
+		}
+	    }
+	  if (retry)
+	    {
+	      retry = 0;
+	      FOR_EACH_EDGE (e, ei, loop->incoming)
+		{
+		  if (forwarder_block_p (e->src))
+		    {
+		      edge e2;
+		      edge_iterator ei2;
+
+		      if (dump_file)
+			fprintf (dump_file,
+				 ";; Adding forwarder block %d to loop %d and retrying\n",
+				 e->src->index, loop->loop_no);
+		      VEC_safe_push (basic_block, heap, loop->blocks, e->src);
+		      bitmap_set_bit (loop->block_bitmap, e->src->index);
+		      FOR_EACH_EDGE (e2, ei2, e->src->preds)
+			VEC_safe_push (edge, gc, loop->incoming, e2);
+		      VEC_unordered_remove (edge, loop->incoming, ei.index);
+		      retry = 1;
+		      break;
+		    }
+		}
+	      if (!retry)
+		{
+		  if (dump_file)
+		    fprintf (dump_file, ";; No forwarder blocks found\n");
+		  loop->bad = 1;
+		}
+	    }
+	}
+    }
+
+ out:
+  VEC_free (basic_block, heap, works);
+}
+
+/* Analyze the structure of the loops in the current function.  Use STACK
+   for bitmap allocations.  Returns all the valid candidates for hardware
+   loops found in this function.  */
+static loop_info
+bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file)
+{
+  loop_info loops = NULL;
+  loop_info loop;
+  basic_block bb;
+  bitmap tmp_bitmap;
+  int nloops = 0;
+
+  /* Find all the possible loop tails.  This means searching for every
+     loop_end instruction.  For each one found, create a loop_info
+     structure and add the head block to the work list. */
+  FOR_EACH_BB (bb)
+    {
+      rtx tail = BB_END (bb);
+
+      while (GET_CODE (tail) == NOTE)
+	tail = PREV_INSN (tail);
+
+      bb->aux = NULL;
+
+      if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end)
+	{
+	  rtx insn;
+	  /* A possible loop end */
+
+	  /* There's a degenerate case we can handle - an empty loop consisting
+	     of only a back branch.  Handle that by deleting the branch.  */
+	  insn = BB_HEAD (BRANCH_EDGE (bb)->dest);
+	  if (next_real_insn (insn) == tail)
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, ";; degenerate loop ending at\n");
+		  print_rtl_single (dump_file, tail);
+		}
+	      delete_insn_and_edges (tail);
+	      continue;
+	    }
+
+	  loop = XNEW (struct loop_info_d);
+	  loop->next = loops;
+	  loops = loop;
+	  loop->loop_no = nloops++;
+	  loop->blocks = VEC_alloc (basic_block, heap, 20);
+	  loop->block_bitmap = BITMAP_ALLOC (stack);
+	  bb->aux = loop;
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; potential loop %d ending at\n",
+		       loop->loop_no);
+	      print_rtl_single (dump_file, tail);
+	    }
+
+	  bfin_discover_loop (loop, bb, tail);
+	}
+    }
+
+  tmp_bitmap = BITMAP_ALLOC (stack);
+  /* Compute loop nestings.  */
+  for (loop = loops; loop; loop = loop->next)
+    {
+      loop_info other;
+      if (loop->bad)
+	continue;
+
+      for (other = loop->next; other; other = other->next)
+	{
+	  if (other->bad)
+	    continue;
+
+	  bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap);
+	  if (bitmap_empty_p (tmp_bitmap))
+	    continue;
+	  if (bitmap_equal_p (tmp_bitmap, other->block_bitmap))
+	    {
+	      other->outer = loop;
+	      VEC_safe_push (loop_info, heap, loop->loops, other);
+	    }
+	  else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap))
+	    {
+	      loop->outer = other;
+	      VEC_safe_push (loop_info, heap, other->loops, loop);
+	    }
+	  else
+	    {
+	      if (dump_file)
+		fprintf (dump_file,
+			 ";; can't find suitable nesting for loops %d and %d\n",
+			 loop->loop_no, other->loop_no);
+	      loop->bad = other->bad = 1;
+	    }
+	}
+    }
+  BITMAP_FREE (tmp_bitmap);
+
+  return loops;
+}
+
+/* Free up the loop structures in LOOPS.  */
+static void
+free_loops (loop_info loops)
+{
+  while (loops)
+    {
+      loop_info loop = loops;
+      loops = loop->next;
+      VEC_free (loop_info, heap, loop->loops);
+      VEC_free (basic_block, heap, loop->blocks);
+      BITMAP_FREE (loop->block_bitmap);
+      XDELETE (loop);
+    }
+}
+
+#define BB_AUX_INDEX(BB) ((intptr_t)(BB)->aux)
+
+/* The taken-branch edge from the loop end can actually go forward.  Since the
+   Blackfin's LSETUP instruction requires that the loop end be after the loop
+   start, try to reorder a loop's basic blocks when we find such a case.  */
+static void
+bfin_reorder_loops (loop_info loops, FILE *dump_file)
+{
+  basic_block bb;
+  loop_info loop;
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+  cfg_layout_initialize (0);
+
+  for (loop = loops; loop; loop = loop->next)
+    {
+      intptr_t index;
+      basic_block bb;
+      edge e;
+      edge_iterator ei;
+
+      if (loop->bad)
+	continue;
+
+      /* Recreate an index for basic blocks that represents their order.  */
+      for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0;
+	   bb != EXIT_BLOCK_PTR;
+	   bb = bb->next_bb, index++)
+	bb->aux = (PTR) index;
+
+      if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail))
+	continue;
+
+      FOR_EACH_EDGE (e, ei, loop->head->succs)
+	{
+	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
+	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
+	    {
+	      basic_block start_bb = e->dest;
+	      basic_block start_prev_bb = start_bb->prev_bb;
+
+	      if (dump_file)
+		fprintf (dump_file, ";; Moving block %d before block %d\n",
+			 loop->head->index, start_bb->index);
+	      loop->head->prev_bb->next_bb = loop->head->next_bb;
+	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
+
+	      loop->head->prev_bb = start_prev_bb;
+	      loop->head->next_bb = start_bb;
+	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
+	      break;
+	    }
+	}
+      loops = loops->next;
+    }
+  
+  FOR_EACH_BB (bb)
+    {
+      if (bb->next_bb != EXIT_BLOCK_PTR)
+	bb->aux = bb->next_bb;
+      else
+	bb->aux = NULL;
+    }
+  cfg_layout_finalize ();
+  df_analyze ();
+}
+
+/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
+   and tries to rewrite the RTL of these loops so that proper Blackfin
+   hardware loops are generated.  */
+
+static void
+bfin_reorg_loops (FILE *dump_file)
+{
+  loop_info loops = NULL;
+  loop_info loop;
+  basic_block bb;
+  bitmap_obstack stack;
+
+  bitmap_obstack_initialize (&stack);
+
+  if (dump_file)
+    fprintf (dump_file, ";; Find loops, first pass\n\n");
+
+  loops = bfin_discover_loops (&stack, dump_file);
+
+  if (dump_file)
+    bfin_dump_loops (loops);
+
+  bfin_reorder_loops (loops, dump_file);
+  free_loops (loops);
+
+  if (dump_file)
+    fprintf (dump_file, ";; Find loops, second pass\n\n");
+
+  loops = bfin_discover_loops (&stack, dump_file);
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; All loops found:\n\n");
+      bfin_dump_loops (loops);
+    }
+
+  /* Now apply the optimizations.  */
+  for (loop = loops; loop; loop = loop->next)
+    bfin_optimize_loop (loop);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; After hardware loops optimization:\n\n");
+      bfin_dump_loops (loops);
+    }
+
+  free_loops (loops);
+
+  if (dump_file)
+    print_rtl (dump_file, get_insns ());
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+
+  splitting_loops = 1;
+  FOR_EACH_BB (bb)
+    {
+      rtx insn = BB_END (bb);
+      if (!JUMP_P (insn))
+	continue;
+
+      try_split (PATTERN (insn), insn, 1);
+    }
+  splitting_loops = 0;
+}
+
+/* Possibly generate a SEQUENCE out of three insns found in SLOT.
+   Returns true if we modified the insn chain, false otherwise.  */
+static bool
+gen_one_bundle (rtx slot[3])
+{
+  gcc_assert (slot[1] != NULL_RTX);
+
+  /* Don't add extra NOPs if optimizing for size.  */
+  if (optimize_size
+      && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
+    return false;
+
+  /* Verify that we really can do the multi-issue.  */
+  if (slot[0])
+    {
+      rtx t = NEXT_INSN (slot[0]);
+      while (t != slot[1])
+	{
+	  if (GET_CODE (t) != NOTE
+	      || NOTE_KIND (t) != NOTE_INSN_DELETED)
+	    return false;
+	  t = NEXT_INSN (t);
+	}
+    }
+  if (slot[2])
+    {
+      rtx t = NEXT_INSN (slot[1]);
+      while (t != slot[2])
+	{
+	  if (GET_CODE (t) != NOTE
+	      || NOTE_KIND (t) != NOTE_INSN_DELETED)
+	    return false;
+	  t = NEXT_INSN (t);
+	}
+    }
+
+  if (slot[0] == NULL_RTX)
+    {
+      slot[0] = emit_insn_before (gen_mnop (), slot[1]);
+      df_insn_rescan (slot[0]);
+    }
+  if (slot[2] == NULL_RTX)
+    {
+      slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
+      df_insn_rescan (slot[2]);
+    }
+
+  /* Avoid line number information being printed inside one bundle.  */
+  if (INSN_LOCATOR (slot[1])
+      && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0]))
+    INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]);
+  if (INSN_LOCATOR (slot[2])
+      && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0]))
+    INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]);
+
+  /* Terminate them with "|| " instead of ";" in the output.  */
+  PUT_MODE (slot[0], SImode);
+  PUT_MODE (slot[1], SImode);
+  /* Terminate the bundle, for the benefit of reorder_var_tracking_notes.  */
+  PUT_MODE (slot[2], QImode);
+  return true;
+}
+
+/* Go through all insns, and use the information generated during scheduling
+   to generate SEQUENCEs to represent bundles of instructions issued
+   simultaneously.  */
+
+static void
+bfin_gen_bundles (void)
+{
+  basic_block bb;
+  FOR_EACH_BB (bb)
+    {
+      rtx insn, next;
+      rtx slot[3];
+      int n_filled = 0;
+
+      slot[0] = slot[1] = slot[2] = NULL_RTX;
+      for (insn = BB_HEAD (bb);; insn = next)
+	{
+	  int at_end;
+	  rtx delete_this = NULL_RTX;
+
+	  if (NONDEBUG_INSN_P (insn))
+	    {
+	      enum attr_type type = get_attr_type (insn);
+
+	      if (type == TYPE_STALL)
+		{
+		  gcc_assert (n_filled == 0);
+		  delete_this = insn;
+		}
+	      else
+		{
+		  if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM)
+		    slot[0] = insn;
+		  else if (slot[1] == NULL_RTX)
+		    slot[1] = insn;
+		  else
+		    slot[2] = insn;
+		  n_filled++;
+		}
+	    }
+
+	  next = NEXT_INSN (insn);
+	  while (next && insn != BB_END (bb)
+		 && !(INSN_P (next)
+		      && GET_CODE (PATTERN (next)) != USE
+		      && GET_CODE (PATTERN (next)) != CLOBBER))
+	    {
+	      insn = next;
+	      next = NEXT_INSN (insn);
+	    }
+
+	  /* BB_END can change due to emitting extra NOPs, so check here.  */
+	  at_end = insn == BB_END (bb);
+	  if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
+	    {
+	      if ((n_filled < 2
+		   || !gen_one_bundle (slot))
+		  && slot[0] != NULL_RTX)
+		{
+		  rtx pat = PATTERN (slot[0]);
+		  if (GET_CODE (pat) == SET
+		      && GET_CODE (SET_SRC (pat)) == UNSPEC
+		      && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
+		    {
+		      SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
+		      INSN_CODE (slot[0]) = -1;
+		      df_insn_rescan (slot[0]);
+		    }
+		}
+	      n_filled = 0;
+	      slot[0] = slot[1] = slot[2] = NULL_RTX;
+	    }
+	  if (delete_this != NULL_RTX)
+	    delete_insn (delete_this);
+	  if (at_end)
+	    break;
+	}
+    }
+}
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+   three-instruction bundle.  */
+
+static void
+reorder_var_tracking_notes (void)
+{
+  basic_block bb;
+  FOR_EACH_BB (bb)
+    {
+      rtx insn, next;
+      rtx queue = NULL_RTX;
+      bool in_bundle = false;
+
+      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
+	{
+	  next = NEXT_INSN (insn);
+
+	  if (INSN_P (insn))
+	    {
+	      /* Emit queued up notes at the last instruction of a bundle.  */
+	      if (GET_MODE (insn) == QImode)
+		{
+		  while (queue)
+		    {
+		      rtx next_queue = PREV_INSN (queue);
+		      PREV_INSN (NEXT_INSN (insn)) = queue;
+		      NEXT_INSN (queue) = NEXT_INSN (insn);
+		      NEXT_INSN (insn) = queue;
+		      PREV_INSN (queue) = insn;
+		      queue = next_queue;
+		    }
+		  in_bundle = false;
+		}
+	      else if (GET_MODE (insn) == SImode)
+		in_bundle = true;
+	    }
+	  else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+	    {
+	      if (in_bundle)
+		{
+		  rtx prev = PREV_INSN (insn);
+		  PREV_INSN (next) = prev;
+		  NEXT_INSN (prev) = next;
+
+		  PREV_INSN (insn) = queue;
+		  queue = insn;
+		}
+	    }
+	}
+    }
+}
+
+/* On some silicon revisions, functions shorter than a certain number of cycles
+   can cause unpredictable behaviour.  Work around this by adding NOPs as
+   needed.  */
+static void
+workaround_rts_anomaly (void)
+{
+  rtx insn, first_insn = NULL_RTX;
+  int cycles = 4;
+
+  if (! ENABLE_WA_RETS)
+    return;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat;
+
+      if (BARRIER_P (insn))
+	return;
+      
+      if (NOTE_P (insn) || LABEL_P (insn))
+	continue;
+
+      if (first_insn == NULL_RTX)
+	first_insn = insn;
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+	  || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+	  || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+	continue;
+
+      if (CALL_P (insn))
+	return;
+
+      if (JUMP_P (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_return_internal)
+	    break;
+
+	  /* Nothing to worry about for direct jumps.  */
+	  if (!any_condjump_p (insn))
+	    return;
+	  if (cycles <= 1)
+	    return;
+	  cycles--;
+	}
+      else if (INSN_P (insn))
+	{
+	  rtx pat = PATTERN (insn);
+	  int this_cycles = 1;
+
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      if (push_multiple_operation (pat, VOIDmode)
+		  || pop_multiple_operation (pat, VOIDmode))
+		this_cycles = n_regs_to_save;
+	    }
+	  else
+	    {
+	      int icode = recog_memoized (insn);
+
+	      if (icode == CODE_FOR_link)
+		this_cycles = 4;
+	      else if (icode == CODE_FOR_unlink)
+		this_cycles = 3;
+	      else if (icode == CODE_FOR_mulsi3)
+		this_cycles = 5;
+	    }
+	  if (this_cycles >= cycles)
+	    return;
+
+	  cycles -= this_cycles;
+	}
+    }
+  while (cycles > 0)
+    {
+      emit_insn_before (gen_nop (), first_insn);
+      cycles--;
+    }
+}
+
+/* Return an insn type for INSN that can be used by the caller for anomaly
+   workarounds.  This differs from plain get_attr_type in that it handles
+   SEQUENCEs.  */
+
+static enum attr_type
+type_for_anomaly (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  if (GET_CODE (pat) == SEQUENCE)
+    {
+      enum attr_type t;
+      t = get_attr_type (XVECEXP (pat, 0, 1));
+      if (t == TYPE_MCLD)
+	return t;
+      t = get_attr_type (XVECEXP (pat, 0, 2));
+      if (t == TYPE_MCLD)
+	return t;
+      return TYPE_MCST;
+    }
+  else
+    return get_attr_type (insn);
+}
+
+/* Return true iff the address found in MEM is based on the register
+   NP_REG and optionally has a positive offset.  */
+static bool
+harmless_null_pointer_p (rtx mem, int np_reg)
+{
+  mem = XEXP (mem, 0);
+  if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC)
+    mem = XEXP (mem, 0);
+  if (REG_P (mem) && (int) REGNO (mem) == np_reg)
+    return true;
+  if (GET_CODE (mem) == PLUS
+      && REG_P (XEXP (mem, 0)) && (int) REGNO (XEXP (mem, 0)) == np_reg)
+    {
+      mem = XEXP (mem, 1);
+      if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0)
+	return true;
+    }
+  return false;
+}
+
+/* Return nonzero if INSN contains any loads that may trap.  */
+
+static bool
+trapping_loads_p (rtx insn, int np_reg, bool after_np_branch)
+{
+  rtx mem = SET_SRC (single_set (insn));
+
+  if (!after_np_branch)
+    np_reg = -1;
+  return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg))
+	  && may_trap_p (mem));
+}
+
+/* Return INSN if it is of TYPE_MCLD.  Alternatively, if INSN is the start of
+   a three-insn bundle, see if one of them is a load and return that if so.
+   Return NULL_RTX if the insn does not contain loads.  */
+static rtx
+find_load (rtx insn)
+{
+  if (!NONDEBUG_INSN_P (insn))
+    return NULL_RTX;
+  if (get_attr_type (insn) == TYPE_MCLD)
+    return insn;
+  if (GET_MODE (insn) != SImode)
+    return NULL_RTX;
+  do {
+    insn = NEXT_INSN (insn);
+    if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
+	&& get_attr_type (insn) == TYPE_MCLD)
+      return insn;
+  } while (GET_MODE (insn) != QImode);
+  return NULL_RTX;
+}
+
+/* Determine whether PAT is an indirect call pattern.  */
+static bool
+indirect_call_p (rtx pat)
+{
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  if (GET_CODE (pat) == SET)
+    pat = SET_SRC (pat);
+  gcc_assert (GET_CODE (pat) == CALL);
+  pat = XEXP (pat, 0);
+  gcc_assert (GET_CODE (pat) == MEM);
+  pat = XEXP (pat, 0);
+  
+  return REG_P (pat);
+}
+
+/* During workaround_speculation, track whether we're in the shadow of a
+   conditional branch that tests a P register for NULL.  If so, we can omit
+   emitting NOPs if we see a load from that P register, since a speculative
+   access at address 0 isn't a problem, and the load is executed in all other
+   cases anyway.
+   Global for communication with note_np_check_stores through note_stores.
+   */
+int np_check_regno = -1;
+bool np_after_branch = false;
+
+/* Subroutine of workaround_speculation, called through note_stores.  */
+static void
+note_np_check_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+		      void *data ATTRIBUTE_UNUSED)
+{
+  if (REG_P (x) && (REGNO (x) == REG_CC || (int) REGNO (x) == np_check_regno))
+    np_check_regno = -1;
+}
+
+static void
+workaround_speculation (void)
+{
+  rtx insn, next;
+  rtx last_condjump = NULL_RTX;
+  int cycles_since_jump = INT_MAX;
+  int delay_added = 0;
+
+  if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+      && ! ENABLE_WA_INDIRECT_CALLS)
+    return;
+
+  /* First pass: find predicted-false branches; if something after them
+     needs nops, insert them or change the branch to predict true.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      rtx pat;
+      int delay_needed = 0;
+
+      next = find_next_insn_start (insn);
+      
+      if (NOTE_P (insn) || BARRIER_P (insn))
+	continue;
+
+      if (LABEL_P (insn))
+	{
+	  np_check_regno = -1;
+	  continue;
+	}
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+	  || GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
+	continue;
+      
+      if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0)
+	{
+	  np_check_regno = -1;
+	  continue;
+	}
+
+      if (JUMP_P (insn))
+	{
+	  /* Is this a condjump based on a null pointer comparison we saw
+	     earlier?  */
+	  if (np_check_regno != -1
+	      && recog_memoized (insn) == CODE_FOR_cbranchbi4)
+	    {
+	      rtx op = XEXP (SET_SRC (PATTERN (insn)), 0);
+	      gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE);
+	      if (GET_CODE (op) == NE)
+		np_after_branch = true;
+	    }
+	  if (any_condjump_p (insn)
+	      && ! cbranch_predicted_taken_p (insn))
+	    {
+	      last_condjump = insn;
+	      delay_added = 0;
+	      cycles_since_jump = 0;
+	    }
+	  else
+	    cycles_since_jump = INT_MAX;
+	}
+      else if (CALL_P (insn))
+	{
+	  np_check_regno = -1;
+	  if (cycles_since_jump < INT_MAX)
+	    cycles_since_jump++;
+	  if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
+	    {
+	      delay_needed = 3;
+	    }
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  rtx load_insn = find_load (insn);
+	  enum attr_type type = type_for_anomaly (insn);
+
+	  if (cycles_since_jump < INT_MAX)
+	    cycles_since_jump++;
+
+	  /* Detect a comparison of a P register with zero.  If we later
+	     see a condjump based on it, we have found a null pointer
+	     check.  */
+	  if (recog_memoized (insn) == CODE_FOR_compare_eq)
+	    {
+	      rtx src = SET_SRC (PATTERN (insn));
+	      if (REG_P (XEXP (src, 0))
+		  && P_REGNO_P (REGNO (XEXP (src, 0)))
+		  && XEXP (src, 1) == const0_rtx)
+		{
+		  np_check_regno = REGNO (XEXP (src, 0));
+		  np_after_branch = false;
+		}
+	      else
+		np_check_regno = -1;
+	    }
+
+	  if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
+	    {
+	      if (trapping_loads_p (load_insn, np_check_regno,
+				    np_after_branch))
+		delay_needed = 4;
+	    }
+	  else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
+	    delay_needed = 3;
+
+	  /* See if we need to forget about a null pointer comparison
+	     we found earlier.  */
+	  if (recog_memoized (insn) != CODE_FOR_compare_eq)
+	    {
+	      note_stores (PATTERN (insn), note_np_check_stores, NULL);
+	      if (np_check_regno != -1)
+		{
+		  if (find_regno_note (insn, REG_INC, np_check_regno))
+		    np_check_regno = -1;
+		}
+	    }
+
+	}
+
+      if (delay_needed > cycles_since_jump
+	  && (delay_needed - cycles_since_jump) > delay_added)
+	{
+	  rtx pat1;
+	  int num_clobbers;
+	  rtx *op = recog_data.operand;
+
+	  delay_needed -= cycles_since_jump;
+
+	  extract_insn (last_condjump);
+	  if (optimize_size)
+	    {
+	      pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
+						 op[3]);
+	      cycles_since_jump = INT_MAX;
+	    }
+	  else
+	    {
+	      /* Do not adjust cycles_since_jump in this case, so that
+		 we'll increase the number of NOPs for a subsequent insn
+		 if necessary.  */
+	      pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
+					    GEN_INT (delay_needed));
+	      delay_added = delay_needed;
+	    }
+	  PATTERN (last_condjump) = pat1;
+	  INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
+	}
+      if (CALL_P (insn))
+	{
+	  cycles_since_jump = INT_MAX;
+	  delay_added = 0;
+	}
+    }
+
+  /* Second pass: for predicted-true branches, see if anything at the
+     branch destination needs extra nops.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      int cycles_since_jump;
+      if (JUMP_P (insn)
+	  && any_condjump_p (insn)
+	  && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
+	      || cbranch_predicted_taken_p (insn)))
+	{
+	  rtx target = JUMP_LABEL (insn);
+	  rtx label = target;
+	  rtx next_tgt;
+
+	  cycles_since_jump = 0;
+	  for (; target && cycles_since_jump < 3; target = next_tgt)
+	    {
+	      rtx pat;
+
+	      next_tgt = find_next_insn_start (target);
+
+	      if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
+		continue;
+
+	      pat = PATTERN (target);
+	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+		  || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+		  || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+		continue;
+
+	      if (NONDEBUG_INSN_P (target))
+		{
+		  rtx load_insn = find_load (target);
+		  enum attr_type type = type_for_anomaly (target);
+		  int delay_needed = 0;
+		  if (cycles_since_jump < INT_MAX)
+		    cycles_since_jump++;
+
+		  if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
+		    {
+		      if (trapping_loads_p (load_insn, -1, false))
+			delay_needed = 2;
+		    }
+		  else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
+		    delay_needed = 2;
+
+		  if (delay_needed > cycles_since_jump)
+		    {
+		      rtx prev = prev_real_insn (label);
+		      delay_needed -= cycles_since_jump;
+		      if (dump_file)
+			fprintf (dump_file, "Adding %d nops after %d\n",
+				 delay_needed, INSN_UID (label));
+		      if (JUMP_P (prev)
+			  && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops)
+			{
+			  rtx x;
+			  HOST_WIDE_INT v;
+
+			  if (dump_file)
+			    fprintf (dump_file,
+				     "Reducing nops on insn %d.\n",
+				     INSN_UID (prev));
+			  x = PATTERN (prev);
+			  x = XVECEXP (x, 0, 1);
+			  v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed;
+			  XVECEXP (x, 0, 0) = GEN_INT (v);
+			}
+		      while (delay_needed-- > 0)
+			emit_insn_after (gen_nop (), label);
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* Called just before the final scheduling pass.  If we need to insert NOPs
+   later on to work around speculative loads, insert special placeholder
+   insns that cause loads to be delayed for as many cycles as necessary
+   (and possible).  This reduces the number of NOPs we need to add.
+   The dummy insns we generate are later removed by bfin_gen_bundles.  */
+static void
+add_sched_insns_for_speculation (void)
+{
+  rtx insn;
+
+  if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+      && ! ENABLE_WA_INDIRECT_CALLS)
+    return;
+
+  /* First pass: find predicted-false branches; if something after them
+     needs nops, insert them or change the branch to predict true.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      rtx pat;
+
+      if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+	continue;
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+	  || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+	  || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+	continue;
+
+      if (JUMP_P (insn))
+	{
+	  if (any_condjump_p (insn)
+	      && !cbranch_predicted_taken_p (insn))
+	    {
+	      rtx n = next_real_insn (insn);
+	      emit_insn_before (gen_stall (GEN_INT (3)), n);
+	    }
+	}
+    }
+
+  /* Second pass: for predicted-true branches, see if anything at the
+     branch destination needs extra nops.  */
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (JUMP_P (insn)
+	  && any_condjump_p (insn)
+	  && (cbranch_predicted_taken_p (insn)))
+	{
+	  rtx target = JUMP_LABEL (insn);
+	  rtx next = next_real_insn (target);
+
+	  if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
+	      && get_attr_type (next) == TYPE_STALL)
+	    continue;
+	  emit_insn_before (gen_stall (GEN_INT (1)), next);	  
+	}
+    }
+}
+
+/* We use the machine specific reorg pass for emitting CSYNC instructions
+   after conditional branches as needed.
+
+   The Blackfin is unusual in that a code sequence like
+     if cc jump label
+     r0 = (p0)
+   may speculatively perform the load even if the condition isn't true.  This
+   happens for a branch that is predicted not taken, because the pipeline
+   isn't flushed or stalled, so the early stages of the following instructions,
+   which perform the memory reference, are allowed to execute before the
+   jump condition is evaluated.
+   Therefore, we must insert additional instructions in all places where this
+   could lead to incorrect behavior.  The manual recommends CSYNC, while
+   VDSP seems to use NOPs (even though its corresponding compiler option is
+   named CSYNC).
+
+   When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
+   When optimizing for size, we turn the branch into a predicted taken one.
+   This may be slower due to mispredicts, but saves code size.  */
+
+static void
+bfin_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  if (flag_schedule_insns_after_reload)
+    {
+      splitting_for_sched = 1;
+      split_all_insns ();
+      splitting_for_sched = 0;
+
+      add_sched_insns_for_speculation ();
+
+      timevar_push (TV_SCHED2);
+      if (flag_selective_scheduling2
+	  && !maybe_skip_selective_scheduling ())
+        run_selective_scheduling ();
+      else
+	schedule_insns ();
+      timevar_pop (TV_SCHED2);
+
+      /* Examine the schedule and insert nops as necessary for 64-bit parallel
+	 instructions.  */
+      bfin_gen_bundles ();
+    }
+
+  df_analyze ();
+
+  /* Doloop optimization */
+  if (cfun->machine->has_hardware_loops)
+    bfin_reorg_loops (dump_file);
+
+  workaround_speculation ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      reorder_var_tracking_notes ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+
+  df_finish_pass (false);
+
+  workaround_rts_anomaly ();
+}
+
+/* Handle interrupt_handler, exception_handler and nmi_handler function
+   attributes; arguments as in struct attribute_spec.handler.  */
+
+static tree
+handle_int_attribute (tree *node, tree name,
+		      tree args ATTRIBUTE_UNUSED,
+		      int flags ATTRIBUTE_UNUSED,
+		      bool *no_add_attrs)
+{
+  tree x = *node;
+  if (TREE_CODE (x) == FUNCTION_DECL)
+    x = TREE_TYPE (x);
+
+  if (TREE_CODE (x) != FUNCTION_TYPE)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (funkind (x) != SUBROUTINE)
+    error ("multiple function type attributes specified");
+
+  return NULL_TREE;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible, and 2 if they are nearly compatible (which causes a
+   warning to be generated).  */
+
+static int
+bfin_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  e_funkind kind1, kind2;
+
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
+
+  kind1 = funkind (type1);
+  kind2 = funkind (type2);
+
+  if (kind1 != kind2)
+    return 0;
+  
+  /*  Check for mismatched modifiers */
+  if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  return 1;
+}
+
+/* Handle a "longcall" or "shortcall" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_longcall_attribute (tree *node, tree name, 
+				tree args ATTRIBUTE_UNUSED, 
+				int flags ATTRIBUTE_UNUSED, 
+				bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0
+       && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node)))
+      || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0
+	  && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
+    {
+      warning (OPT_Wattributes,
+	       "can%'t apply both longcall and shortcall attributes to the same function");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "l1_text" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args),
+			       int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    {
+      error ("%qE attribute only applies to functions",
+	     name);
+      *no_add_attrs = true;
+    }
+
+  /* The decl may have already been given a section attribute
+     from a previous declaration. Ensure they match.  */
+  else if (DECL_SECTION_NAME (decl) != NULL_TREE
+	   && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		      ".l1.text") != 0)
+    {
+      error ("section of %q+D conflicts with previous declaration",
+	     decl);
+      *no_add_attrs = true;
+    }
+  else
+    DECL_SECTION_NAME (decl) = build_string (9, ".l1.text");
+
+  return NULL_TREE;
+}
+
+/* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args),
+			       int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    {
+      error ("%qE attribute only applies to variables",
+	     name);
+      *no_add_attrs = true;
+    }
+  else if (current_function_decl != NULL_TREE
+	   && !TREE_STATIC (decl))
+    {
+      error ("%qE attribute cannot be specified for local variables",
+	     name);
+      *no_add_attrs = true;
+    }
+  else
+    {
+      const char *section_name;
+
+      if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0)
+	section_name = ".l1.data";
+      else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0)
+	section_name = ".l1.data.A";
+      else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0)
+	section_name = ".l1.data.B";
+      else
+	gcc_unreachable ();
+
+      /* The decl may have already been given a section attribute
+	 from a previous declaration. Ensure they match.  */
+      if (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		     section_name) != 0)
+	{
+	  error ("section of %q+D conflicts with previous declaration",
+		 decl);
+	  *no_add_attrs = true;
+	}
+      else
+	DECL_SECTION_NAME (decl)
+	  = build_string (strlen (section_name) + 1, section_name);
+    }
+
+ return NULL_TREE;
+}
+
+/* Handle a "l2" attribute; arguments as in struct attribute_spec.handler.  */
+
+static tree
+bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name),
+			  tree ARG_UNUSED (args), int ARG_UNUSED (flags),
+			  bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      if (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		     ".l2.text") != 0)
+	{
+	  error ("section of %q+D conflicts with previous declaration",
+		 decl);
+	  *no_add_attrs = true;
+	}
+      else
+	DECL_SECTION_NAME (decl) = build_string (9, ".l2.text");
+    }
+  else if (TREE_CODE (decl) == VAR_DECL)
+    {
+      if (DECL_SECTION_NAME (decl) != NULL_TREE
+	  && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
+		     ".l2.data") != 0)
+	{
+	  error ("section of %q+D conflicts with previous declaration",
+		 decl);
+	  *no_add_attrs = true;
+	}
+      else
+	DECL_SECTION_NAME (decl) = build_string (9, ".l2.data");
+    }
+
+  return NULL_TREE;
+}
+
+/* Table of valid machine attributes.  */
+static const struct attribute_spec bfin_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt_handler", 0, 0, false, true,  true, handle_int_attribute },
+  { "exception_handler", 0, 0, false, true,  true, handle_int_attribute },
+  { "nmi_handler", 0, 0, false, true,  true, handle_int_attribute },
+  { "nesting", 0, 0, false, true,  true, NULL },
+  { "kspisusp", 0, 0, false, true,  true, NULL },
+  { "saveall", 0, 0, false, true,  true, NULL },
+  { "longcall",  0, 0, false, true,  true,  bfin_handle_longcall_attribute },
+  { "shortcall", 0, 0, false, true,  true,  bfin_handle_longcall_attribute },
+  { "l1_text", 0, 0, true, false, false,  bfin_handle_l1_text_attribute },
+  { "l1_data", 0, 0, true, false, false,  bfin_handle_l1_data_attribute },
+  { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
+  { "l1_data_B", 0, 0, true, false, false,  bfin_handle_l1_data_attribute },
+  { "l2", 0, 0, true, false, false,  bfin_handle_l2_attribute },
+  { NULL, 0, 0, false, false, false, NULL }
+};
+
+/* Implementation of TARGET_ASM_INTEGER.  When using FD-PIC, we need to
+   tell the assembler to generate pointers to function descriptors in
+   some cases.  */
+
+static bool
+bfin_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+  if (TARGET_FDPIC && size == UNITS_PER_WORD)
+    {
+      if (GET_CODE (value) == SYMBOL_REF
+	  && SYMBOL_REF_FUNCTION_P (value))
+	{
+	  fputs ("\t.picptr\tfuncdesc(", asm_out_file);
+	  output_addr_const (asm_out_file, value);
+	  fputs (")\n", asm_out_file);
+	  return true;
+	}
+      if (!aligned_p)
+	{
+	  /* We've set the unaligned SI op to NULL, so we always have to
+	     handle the unaligned case here.  */
+	  assemble_integer_with_op ("\t.4byte\t", value);
+	  return true;
+	}
+    }
+  return default_assemble_integer (value, size, aligned_p);
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+		      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+		      HOST_WIDE_INT vcall_offset, tree function)
+{
+  rtx xops[3];
+  /* The this parameter is passed as the first argument.  */
+  rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
+
+  /* Adjust the this parameter by a fixed constant.  */
+  if (delta)
+    {
+      xops[1] = this_rtx;
+      if (delta >= -64 && delta <= 63)
+	{
+	  xops[0] = GEN_INT (delta);
+	  output_asm_insn ("%1 += %0;", xops);
+	}
+      else if (delta >= -128 && delta < -64)
+	{
+	  xops[0] = GEN_INT (delta + 64);
+	  output_asm_insn ("%1 += -64; %1 += %0;", xops);
+	}
+      else if (delta > 63 && delta <= 126)
+	{
+	  xops[0] = GEN_INT (delta - 63);
+	  output_asm_insn ("%1 += 63; %1 += %0;", xops);
+	}
+      else
+	{
+	  xops[0] = GEN_INT (delta);
+	  output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops);
+	}
+    }
+
+  /* Adjust the this parameter by a value stored in the vtable.  */
+  if (vcall_offset)
+    {
+      rtx p2tmp = gen_rtx_REG (Pmode, REG_P2);
+      rtx tmp = gen_rtx_REG (Pmode, REG_R3);
+
+      xops[1] = tmp;
+      xops[2] = p2tmp;
+      output_asm_insn ("%2 = r0; %2 = [%2];", xops);
+
+      /* Adjust the this parameter.  */
+      xops[0] = gen_rtx_MEM (Pmode, plus_constant (p2tmp, vcall_offset));
+      if (!memory_operand (xops[0], Pmode))
+	{
+	  rtx tmp2 = gen_rtx_REG (Pmode, REG_P1);
+	  xops[0] = GEN_INT (vcall_offset);
+	  xops[1] = tmp2;
+	  output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
+	  xops[0] = gen_rtx_MEM (Pmode, p2tmp);
+	}
+      xops[2] = this_rtx;
+      output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
+    }
+
+  xops[0] = XEXP (DECL_RTL (function), 0);
+  if (1 || !flag_pic || (*targetm.binds_local_p) (function))
+    output_asm_insn ("jump.l\t%P0", xops);
+}
+
+/* Codes for all the Blackfin builtins.  */
+enum bfin_builtins
+{
+  BFIN_BUILTIN_CSYNC,
+  BFIN_BUILTIN_SSYNC,
+  BFIN_BUILTIN_ONES,
+  BFIN_BUILTIN_COMPOSE_2X16,
+  BFIN_BUILTIN_EXTRACTLO,
+  BFIN_BUILTIN_EXTRACTHI,
+
+  BFIN_BUILTIN_SSADD_2X16,
+  BFIN_BUILTIN_SSSUB_2X16,
+  BFIN_BUILTIN_SSADDSUB_2X16,
+  BFIN_BUILTIN_SSSUBADD_2X16,
+  BFIN_BUILTIN_MULT_2X16,
+  BFIN_BUILTIN_MULTR_2X16,
+  BFIN_BUILTIN_NEG_2X16,
+  BFIN_BUILTIN_ABS_2X16,
+  BFIN_BUILTIN_MIN_2X16,
+  BFIN_BUILTIN_MAX_2X16,
+
+  BFIN_BUILTIN_SSADD_1X16,
+  BFIN_BUILTIN_SSSUB_1X16,
+  BFIN_BUILTIN_MULT_1X16,
+  BFIN_BUILTIN_MULTR_1X16,
+  BFIN_BUILTIN_NORM_1X16,
+  BFIN_BUILTIN_NEG_1X16,
+  BFIN_BUILTIN_ABS_1X16,
+  BFIN_BUILTIN_MIN_1X16,
+  BFIN_BUILTIN_MAX_1X16,
+
+  BFIN_BUILTIN_SUM_2X16,
+  BFIN_BUILTIN_DIFFHL_2X16,
+  BFIN_BUILTIN_DIFFLH_2X16,
+
+  BFIN_BUILTIN_SSADD_1X32,
+  BFIN_BUILTIN_SSSUB_1X32,
+  BFIN_BUILTIN_NORM_1X32,
+  BFIN_BUILTIN_ROUND_1X32,
+  BFIN_BUILTIN_NEG_1X32,
+  BFIN_BUILTIN_ABS_1X32,
+  BFIN_BUILTIN_MIN_1X32,
+  BFIN_BUILTIN_MAX_1X32,
+  BFIN_BUILTIN_MULT_1X32,
+  BFIN_BUILTIN_MULT_1X32X32,
+  BFIN_BUILTIN_MULT_1X32X32NS,
+
+  BFIN_BUILTIN_MULHISILL,
+  BFIN_BUILTIN_MULHISILH,
+  BFIN_BUILTIN_MULHISIHL,
+  BFIN_BUILTIN_MULHISIHH,
+
+  BFIN_BUILTIN_LSHIFT_1X16,
+  BFIN_BUILTIN_LSHIFT_2X16,
+  BFIN_BUILTIN_SSASHIFT_1X16,
+  BFIN_BUILTIN_SSASHIFT_2X16,
+  BFIN_BUILTIN_SSASHIFT_1X32,
+
+  BFIN_BUILTIN_CPLX_MUL_16,
+  BFIN_BUILTIN_CPLX_MAC_16,
+  BFIN_BUILTIN_CPLX_MSU_16,
+
+  BFIN_BUILTIN_CPLX_MUL_16_S40,
+  BFIN_BUILTIN_CPLX_MAC_16_S40,
+  BFIN_BUILTIN_CPLX_MSU_16_S40,
+
+  BFIN_BUILTIN_CPLX_SQU,
+
+  BFIN_BUILTIN_LOADBYTES,
+
+  BFIN_BUILTIN_MAX
+};
+
+#define def_builtin(NAME, TYPE, CODE)					\
+do {									\
+  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,		\
+		       NULL, NULL_TREE);				\
+} while (0)
+
+/* Set up all builtin functions for this target.  */
+static void
+bfin_init_builtins (void)
+{
+  tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
+  tree void_ftype_void
+    = build_function_type (void_type_node, void_list_node);
+  tree short_ftype_short
+    = build_function_type_list (short_integer_type_node, short_integer_type_node,
+				NULL_TREE);
+  tree short_ftype_int_int
+    = build_function_type_list (short_integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_int_int
+    = build_function_type_list (integer_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_int
+    = build_function_type_list (integer_type_node, integer_type_node,
+				NULL_TREE);
+  tree short_ftype_int
+    = build_function_type_list (short_integer_type_node, integer_type_node,
+				NULL_TREE);
+  tree int_ftype_v2hi_v2hi
+    = build_function_type_list (integer_type_node, V2HI_type_node,
+				V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_int_int
+    = build_function_type_list (V2HI_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_int
+    = build_function_type_list (V2HI_type_node, V2HI_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_short_short
+    = build_function_type_list (integer_type_node, short_integer_type_node,
+				short_integer_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi
+    = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree short_ftype_v2hi
+    = build_function_type_list (short_integer_type_node, V2HI_type_node,
+				NULL_TREE);
+  tree int_ftype_pint
+    = build_function_type_list (integer_type_node,
+				build_pointer_type (integer_type_node),
+				NULL_TREE);
+  
+  /* Add the remaining MMX insns with somewhat more complicated types.  */
+  def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
+  def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
+
+  def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
+
+  def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
+	       BFIN_BUILTIN_COMPOSE_2X16);
+  def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
+	       BFIN_BUILTIN_EXTRACTHI);
+  def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi,
+	       BFIN_BUILTIN_EXTRACTLO);
+
+  def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MIN_2X16);
+  def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MAX_2X16);
+
+  def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSADD_2X16);
+  def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSSUB_2X16);
+  def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSADDSUB_2X16);
+  def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSSUBADD_2X16);
+  def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULT_2X16);
+  def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULTR_2X16);
+  def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi,
+	       BFIN_BUILTIN_NEG_2X16);
+  def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
+	       BFIN_BUILTIN_ABS_2X16);
+
+  def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MIN_1X16);
+  def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MAX_1X16);
+
+  def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_SSADD_1X16);
+  def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_SSSUB_1X16);
+  def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MULT_1X16);
+  def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_MULTR_1X16);
+  def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short,
+	       BFIN_BUILTIN_NEG_1X16);
+  def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short,
+	       BFIN_BUILTIN_ABS_1X16);
+  def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int,
+	       BFIN_BUILTIN_NORM_1X16);
+
+  def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi,
+	       BFIN_BUILTIN_SUM_2X16);
+  def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi,
+	       BFIN_BUILTIN_DIFFHL_2X16);
+  def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi,
+	       BFIN_BUILTIN_DIFFLH_2X16);
+
+  def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISILL);
+  def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISIHL);
+  def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISILH);
+  def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_MULHISIHH);
+
+  def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_MIN_1X32);
+  def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_MAX_1X32);
+
+  def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_SSADD_1X32);
+  def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_SSSUB_1X32);
+  def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int,
+	       BFIN_BUILTIN_NEG_1X32);
+  def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int,
+	       BFIN_BUILTIN_ABS_1X32);
+  def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int,
+	       BFIN_BUILTIN_NORM_1X32);
+  def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int,
+	       BFIN_BUILTIN_ROUND_1X32);
+  def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short,
+	       BFIN_BUILTIN_MULT_1X32);
+  def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int,
+	       BFIN_BUILTIN_MULT_1X32X32);
+  def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int,
+	       BFIN_BUILTIN_MULT_1X32X32NS);
+
+  /* Shifts.  */
+  def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_SSASHIFT_1X16);
+  def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int,
+	       BFIN_BUILTIN_SSASHIFT_2X16);
+  def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int,
+	       BFIN_BUILTIN_LSHIFT_1X16);
+  def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int,
+	       BFIN_BUILTIN_LSHIFT_2X16);
+  def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int,
+	       BFIN_BUILTIN_SSASHIFT_1X32);
+
+  /* Complex numbers.  */
+  def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSADD_2X16);
+  def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_SSSUB_2X16);
+  def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MUL_16);
+  def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MAC_16);
+  def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MSU_16);
+  def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MUL_16_S40);
+  def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MAC_16_S40);
+  def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
+	       BFIN_BUILTIN_CPLX_MSU_16_S40);
+  def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
+	       BFIN_BUILTIN_CPLX_SQU);
+
+  /* "Unaligned" load.  */
+  def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
+	       BFIN_BUILTIN_LOADBYTES);
+
+}
+
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *const name;
+  const enum bfin_builtins code;
+  int macflag;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+  { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 },
+
+  { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 },
+  { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 },
+  { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 },
+  { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 },
+  { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 },
+
+  { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 },
+  { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 },
+  { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 },
+  { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 },
+
+  { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 },
+  { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 },
+  { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 },
+  { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 },
+
+  { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 },
+  { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 },
+  { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 },
+  { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 },
+  { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 },
+  { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 },
+
+  { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE },
+  { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
+  { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
+  { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
+  { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
+
+  { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
+  { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
+  { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
+  { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
+
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+  { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
+
+  { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
+
+  { CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
+  { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
+  { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
+
+  { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 },
+  { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 },
+  { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 },
+  { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 },
+
+  { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 },
+  { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 },
+  { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 },
+  { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 }
+};
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x != const0_rtx)
+    return x;
+  x = gen_reg_rtx (SImode);
+
+  emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
+  return gen_lowpart (mode, x);
+}
+
+/* Subroutine of bfin_expand_builtin to take care of binop insns.  MACFLAG is -1
+   if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
+
+static rtx
+bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
+			   int macflag)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode op0mode = GET_MODE (op0);
+  enum machine_mode op1mode = GET_MODE (op1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode)
+    {
+      op0mode = HImode;
+      op0 = gen_lowpart (HImode, op0);
+    }
+  if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode)
+    {
+      op1mode = HImode;
+      op1 = gen_lowpart (HImode, op1);
+    }
+  /* In case the insn wants input operands in modes different from
+     the result, abort.  */
+  gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
+	      && (op1mode == mode1 || op1mode == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  if (macflag == -1)
+    pat = GEN_FCN (icode) (target, op0, op1);
+  else
+    pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag));
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of bfin_expand_builtin to take care of unop insns.  */
+
+static rtx
+bfin_expand_unop_builtin (enum insn_code icode, tree exp,
+			  rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode op0mode = GET_MODE (op0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+
+  if (op0mode == SImode && mode0 == HImode)
+    {
+      op0mode = HImode;
+      op0 = gen_lowpart (HImode, op0);
+    }
+  gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+		     rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  size_t i;
+  enum insn_code icode;
+  const struct builtin_description *d;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg;
+  enum machine_mode tmode, mode0;
+
+  switch (fcode)
+    {
+    case BFIN_BUILTIN_CSYNC:
+      emit_insn (gen_csync ());
+      return 0;
+    case BFIN_BUILTIN_SSYNC:
+      emit_insn (gen_ssync ());
+      return 0;
+
+    case BFIN_BUILTIN_DIFFHL_2X16:
+    case BFIN_BUILTIN_DIFFLH_2X16:
+    case BFIN_BUILTIN_SUM_2X16:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3
+	       : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3
+	       : CODE_FOR_ssaddhilov2hi3);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+
+      if (! target
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+
+      if (VECTOR_MODE_P (mode0))
+	op0 = safe_vector_operand (op0, mode0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (target, op0, op0);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case BFIN_BUILTIN_MULT_1X32X32:
+    case BFIN_BUILTIN_MULT_1X32X32NS:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      if (! target
+	  || !register_operand (target, SImode))
+	target = gen_reg_rtx (SImode);
+      if (! register_operand (op0, SImode))
+	op0 = copy_to_mode_reg (SImode, op0);
+      if (! register_operand (op1, SImode))
+	op1 = copy_to_mode_reg (SImode, op1);
+
+      a1reg = gen_rtx_REG (PDImode, REG_A1);
+      a0reg = gen_rtx_REG (PDImode, REG_A0);
+      tmp1 = gen_lowpart (V2HImode, op0);
+      tmp2 = gen_lowpart (V2HImode, op1);
+      emit_insn (gen_flag_macinit1hi (a1reg,
+				      gen_lowpart (HImode, op0),
+				      gen_lowpart (HImode, op1),
+				      GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+
+      if (fcode == BFIN_BUILTIN_MULT_1X32X32)
+	emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2,
+						       const1_rtx, const1_rtx,
+						       const1_rtx, const0_rtx, a1reg,
+						       const0_rtx, GEN_INT (MACFLAG_NONE),
+						       GEN_INT (MACFLAG_M)));
+      else
+	{
+	  /* For saturating multiplication, there's exactly one special case
+	     to be handled: multiplying the smallest negative value with
+	     itself.  Due to shift correction in fractional multiplies, this
+	     can overflow.  Iff this happens, OP2 will contain 1, which, when
+	     added in 32 bits to the smallest negative, wraps to the largest
+	     positive, which is the result we want.  */
+	  op2 = gen_reg_rtx (V2HImode);
+	  emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx));
+	  emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC),
+				  gen_lowpart (SImode, op2)));
+	  emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2,
+								const1_rtx, const1_rtx,
+								const1_rtx, const0_rtx, a1reg,
+								const0_rtx, GEN_INT (MACFLAG_NONE),
+								GEN_INT (MACFLAG_M)));
+	  op2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC)));
+	}
+      emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1,
+					       const1_rtx, const0_rtx,
+					       a1reg, const0_rtx, GEN_INT (MACFLAG_M)));
+      emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15)));
+      emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg));
+      if (fcode == BFIN_BUILTIN_MULT_1X32X32NS)
+	emit_insn (gen_addsi3 (target, target, op2));
+      return target;
+
+    case BFIN_BUILTIN_CPLX_MUL_16:
+    case BFIN_BUILTIN_CPLX_MUL_16_S40:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      accvec = gen_reg_rtx (V2PDImode);
+      icode = CODE_FOR_flag_macv2hi_parts;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (! target
+	  || GET_MODE (target) != V2HImode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+	target = gen_reg_rtx (tmode);
+      if (! register_operand (op0, GET_MODE (op0)))
+	op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+      if (! register_operand (op1, GET_MODE (op1)))
+	op1 = copy_to_mode_reg (GET_MODE (op1), op1);
+
+      if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
+	emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
+						const0_rtx, const0_rtx,
+						const1_rtx, GEN_INT (MACFLAG_W32)));
+      else
+	emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
+						const0_rtx, const0_rtx,
+						const1_rtx, GEN_INT (MACFLAG_NONE)));
+      emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
+					 const1_rtx, const1_rtx,
+					 const0_rtx, accvec, const1_rtx, const0_rtx,
+					 GEN_INT (MACFLAG_NONE), accvec));
+
+      return target;
+
+    case BFIN_BUILTIN_CPLX_MAC_16:
+    case BFIN_BUILTIN_CPLX_MSU_16:
+    case BFIN_BUILTIN_CPLX_MAC_16_S40:
+    case BFIN_BUILTIN_CPLX_MSU_16_S40:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      accvec = gen_reg_rtx (V2PDImode);
+      icode = CODE_FOR_flag_macv2hi_parts;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (! target
+	  || GET_MODE (target) != V2HImode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+	target = gen_reg_rtx (tmode);
+      if (! register_operand (op1, GET_MODE (op1)))
+	op1 = copy_to_mode_reg (GET_MODE (op1), op1);
+      if (! register_operand (op2, GET_MODE (op2)))
+	op2 = copy_to_mode_reg (GET_MODE (op2), op2);
+
+      tmp1 = gen_reg_rtx (SImode);
+      tmp2 = gen_reg_rtx (SImode);
+      emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16)));
+      emit_move_insn (tmp2, gen_lowpart (SImode, op0));
+      emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
+      emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
+      if (fcode == BFIN_BUILTIN_CPLX_MAC_16
+	  || fcode == BFIN_BUILTIN_CPLX_MSU_16)
+	emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
+						   const0_rtx, const0_rtx,
+						   const1_rtx, accvec, const0_rtx,
+						   const0_rtx,
+						   GEN_INT (MACFLAG_W32)));
+      else
+	emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
+						   const0_rtx, const0_rtx,
+						   const1_rtx, accvec, const0_rtx,
+						   const0_rtx,
+						   GEN_INT (MACFLAG_NONE)));
+      if (fcode == BFIN_BUILTIN_CPLX_MAC_16
+	  || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
+	{
+	  tmp1 = const1_rtx;
+	  tmp2 = const0_rtx;
+	}
+      else
+	{
+	  tmp1 = const0_rtx;
+	  tmp2 = const1_rtx;
+	}
+      emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
+					 const1_rtx, const1_rtx,
+					 const0_rtx, accvec, tmp1, tmp2,
+					 GEN_INT (MACFLAG_NONE), accvec));
+
+      return target;
+
+    case BFIN_BUILTIN_CPLX_SQU:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      accvec = gen_reg_rtx (V2PDImode);
+      icode = CODE_FOR_flag_mulv2hi;
+      tmp1 = gen_reg_rtx (V2HImode);
+      tmp2 = gen_reg_rtx (V2HImode);
+
+      if (! target
+	  || GET_MODE (target) != V2HImode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
+	target = gen_reg_rtx (V2HImode);
+      if (! register_operand (op0, GET_MODE (op0)))
+	op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+
+      emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
+
+      emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
+				       const0_rtx, const1_rtx,
+				       GEN_INT (MACFLAG_NONE)));
+
+      emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
+					  const0_rtx));
+      emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
+					 const0_rtx, const1_rtx));
+
+      return target;
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return bfin_expand_binop_builtin (d->icode, exp, target,
+					d->macflag);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return bfin_expand_unop_builtin (d->icode, exp, target);
+
+  gcc_unreachable ();
+}
+
+static void
+bfin_conditional_register_usage (void)
+{
+  /* initialize condition code flag register rtx */
+  bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC);
+  bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS);
+  if (TARGET_FDPIC)
+    call_used_regs[FDPIC_REGNO] = 1;
+  if (!TARGET_FDPIC && flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS bfin_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN bfin_expand_builtin
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label 
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START output_file_start
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE bfin_attribute_table
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS bfin_rtx_costs
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST bfin_address_cost
+
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER bfin_assemble_integer
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST bfin_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG bfin_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE bfin_function_arg_advance
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION bfin_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE bfin_option_override
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD bfin_secondary_reload
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P bfin_class_likely_spilled_p
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	bfin_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE bfin_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE bfin_conditional_register_usage
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT bfin_trampoline_init
+
+/* Passes after sched2 can break the helpful TImode annotations that
+   haifa-sched puts on every insn.  Just do scheduling in reorg.  */
+#undef TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
new file mode 100644
index 000000000..c26b41cc5
--- /dev/null
+++ b/gcc/config/bfin/bfin.h
@@ -0,0 +1,1220 @@
+/* Definitions for the Blackfin port.
+   Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _BFIN_CONFIG
+#define _BFIN_CONFIG
+
+#define OBJECT_FORMAT_ELF
+
+#define BRT 1
+#define BRF 0
+
+/* CPU type.  */
+typedef enum bfin_cpu_type
+{
+  BFIN_CPU_UNKNOWN,
+  BFIN_CPU_BF512,
+  BFIN_CPU_BF514,
+  BFIN_CPU_BF516,
+  BFIN_CPU_BF518,
+  BFIN_CPU_BF522,
+  BFIN_CPU_BF523,
+  BFIN_CPU_BF524,
+  BFIN_CPU_BF525,
+  BFIN_CPU_BF526,
+  BFIN_CPU_BF527,
+  BFIN_CPU_BF531,
+  BFIN_CPU_BF532,
+  BFIN_CPU_BF533,
+  BFIN_CPU_BF534,
+  BFIN_CPU_BF536,
+  BFIN_CPU_BF537,
+  BFIN_CPU_BF538,
+  BFIN_CPU_BF539,
+  BFIN_CPU_BF542,
+  BFIN_CPU_BF542M,
+  BFIN_CPU_BF544,
+  BFIN_CPU_BF544M,
+  BFIN_CPU_BF547,
+  BFIN_CPU_BF547M,
+  BFIN_CPU_BF548,
+  BFIN_CPU_BF548M,
+  BFIN_CPU_BF549,
+  BFIN_CPU_BF549M,
+  BFIN_CPU_BF561
+} bfin_cpu_t;
+
+/* Value of -mcpu= */
+extern bfin_cpu_t bfin_cpu_type;
+
+/* Value of -msi-revision= */
+extern int bfin_si_revision;
+
+extern unsigned int bfin_workarounds;
+
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION fprintf (stderr, " (BlackFin bfin)")
+
+/* Predefinition in the preprocessor for this target machine */
+#ifndef TARGET_CPU_CPP_BUILTINS
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("bfin");		\
+      builtin_define_std ("BFIN");		\
+      builtin_define ("__ADSPBLACKFIN__");	\
+      builtin_define ("__ADSPLPBLACKFIN__");	\
+						\
+      switch (bfin_cpu_type)			\
+	{					\
+	case BFIN_CPU_BF512:			\
+	  builtin_define ("__ADSPBF512__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF514:			\
+	  builtin_define ("__ADSPBF514__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF516:			\
+	  builtin_define ("__ADSPBF516__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF518:			\
+	  builtin_define ("__ADSPBF518__");	\
+	  builtin_define ("__ADSPBF51x__");	\
+	  break;				\
+	case BFIN_CPU_BF522:			\
+	  builtin_define ("__ADSPBF522__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF523:			\
+	  builtin_define ("__ADSPBF523__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF524:			\
+	  builtin_define ("__ADSPBF524__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF525:			\
+	  builtin_define ("__ADSPBF525__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF526:			\
+	  builtin_define ("__ADSPBF526__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF527:			\
+	  builtin_define ("__ADSPBF527__");	\
+	  builtin_define ("__ADSPBF52x__");	\
+	  break;				\
+	case BFIN_CPU_BF531:			\
+	  builtin_define ("__ADSPBF531__");	\
+	  break;				\
+	case BFIN_CPU_BF532:			\
+	  builtin_define ("__ADSPBF532__");	\
+	  break;				\
+	case BFIN_CPU_BF533:			\
+	  builtin_define ("__ADSPBF533__");	\
+	  break;				\
+	case BFIN_CPU_BF534:			\
+	  builtin_define ("__ADSPBF534__");	\
+	  break;				\
+	case BFIN_CPU_BF536:			\
+	  builtin_define ("__ADSPBF536__");	\
+	  break;				\
+	case BFIN_CPU_BF537:			\
+	  builtin_define ("__ADSPBF537__");	\
+	  break;				\
+	case BFIN_CPU_BF538:			\
+	  builtin_define ("__ADSPBF538__");	\
+	  break;				\
+	case BFIN_CPU_BF539:			\
+	  builtin_define ("__ADSPBF539__");	\
+	  break;				\
+	case BFIN_CPU_BF542M:			\
+	  builtin_define ("__ADSPBF542M__");	\
+	case BFIN_CPU_BF542:			\
+	  builtin_define ("__ADSPBF542__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF544M:			\
+	  builtin_define ("__ADSPBF544M__");	\
+	case BFIN_CPU_BF544:			\
+	  builtin_define ("__ADSPBF544__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF547M:			\
+	  builtin_define ("__ADSPBF547M__");	\
+	case BFIN_CPU_BF547:			\
+	  builtin_define ("__ADSPBF547__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF548M:			\
+	  builtin_define ("__ADSPBF548M__");	\
+	case BFIN_CPU_BF548:			\
+	  builtin_define ("__ADSPBF548__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF549M:			\
+	  builtin_define ("__ADSPBF549M__");	\
+	case BFIN_CPU_BF549:			\
+	  builtin_define ("__ADSPBF549__");	\
+	  builtin_define ("__ADSPBF54x__");	\
+	  break;				\
+	case BFIN_CPU_BF561:			\
+	  builtin_define ("__ADSPBF561__");	\
+	  break;				\
+	}					\
+						\
+      if (bfin_si_revision != -1)		\
+	{					\
+	  /* space of 0xnnnn and a NUL */	\
+	  char *buf = XALLOCAVEC (char, 7);	\
+						\
+	  sprintf (buf, "0x%04x", bfin_si_revision);			\
+	  builtin_define_with_value ("__SILICON_REVISION__", buf, 0);	\
+	}								\
+									\
+      if (bfin_workarounds)						\
+	builtin_define ("__WORKAROUNDS_ENABLED");			\
+      if (ENABLE_WA_SPECULATIVE_LOADS)					\
+	builtin_define ("__WORKAROUND_SPECULATIVE_LOADS");		\
+      if (ENABLE_WA_SPECULATIVE_SYNCS)					\
+	builtin_define ("__WORKAROUND_SPECULATIVE_SYNCS");		\
+      if (ENABLE_WA_INDIRECT_CALLS)					\
+	builtin_define ("__WORKAROUND_INDIRECT_CALLS");			\
+      if (ENABLE_WA_RETS)						\
+	builtin_define ("__WORKAROUND_RETS");				\
+						\
+      if (TARGET_FDPIC)				\
+	{					\
+	  builtin_define ("__BFIN_FDPIC__");	\
+	  builtin_define ("__FDPIC__");		\
+	}					\
+      if (TARGET_ID_SHARED_LIBRARY		\
+	  && !TARGET_SEP_DATA)			\
+	builtin_define ("__ID_SHARED_LIB__");	\
+      if (flag_no_builtin)			\
+	builtin_define ("__NO_BUILTIN");	\
+      if (TARGET_MULTICORE)			\
+	builtin_define ("__BFIN_MULTICORE");	\
+      if (TARGET_COREA)				\
+	builtin_define ("__BFIN_COREA");	\
+      if (TARGET_COREB)				\
+	builtin_define ("__BFIN_COREB");	\
+      if (TARGET_SDRAM)				\
+	builtin_define ("__BFIN_SDRAM");	\
+    }						\
+  while (0)
+#endif
+
+#define DRIVER_SELF_SPECS SUBTARGET_DRIVER_SELF_SPECS	"\
+ %{mleaf-id-shared-library:%{!mid-shared-library:-mid-shared-library}} \
+ %{mfdpic:%{!fpic:%{!fpie:%{!fPIC:%{!fPIE:\
+   	    %{!fno-pic:%{!fno-pie:%{!fno-PIC:%{!fno-PIE:-fpie}}}}}}}}} \
+"
+#ifndef SUBTARGET_DRIVER_SELF_SPECS
+# define SUBTARGET_DRIVER_SELF_SPECS
+#endif
+
+#define LINK_GCC_C_SEQUENCE_SPEC "\
+  %{mfast-fp:-lbffastfp} %G %L %{mfast-fp:-lbffastfp} %G \
+"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+    %{mno-fdpic:-mnopic} %{mfdpic}"
+
+#define LINK_SPEC "\
+%{h*} %{v:-V} \
+%{mfdpic:-melf32bfinfd -z text} \
+%{static:-dn -Bstatic} \
+%{shared:-G -Bdynamic} \
+%{symbolic:-Bsymbolic} \
+-init __init -fini __fini "
+
+/* Generate DSP instructions, like DSP halfword loads */
+#define TARGET_DSP			(1)
+
+#define TARGET_DEFAULT 0
+
+/* Maximum number of library ids we permit */
+#define MAX_LIBRARY_ID 255
+
+extern const char *bfin_library_id_string;
+
+#define FUNCTION_MODE    SImode
+#define Pmode            SImode
+
+/* store-condition-codes instructions store 0 for false
+   This is the value stored for true.  */
+#define STORE_FLAG_VALUE 1
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+#define STACK_PUSH_CODE PRE_DEC
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* We define a dummy ARGP register; the parameters start at offset 0 from
+   it. */
+#define FIRST_PARM_OFFSET(DECL) 0
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM REG_P6
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM REG_P7
+
+/* A dummy register that will be eliminated to either FP or SP.  */
+#define ARG_POINTER_REGNUM REG_ARGP
+
+/* `PIC_OFFSET_TABLE_REGNUM'
+     The register number of the register used to address a table of
+     static data addresses in memory.  In some cases this register is
+     defined by a processor's "application binary interface" (ABI).
+     When this macro is defined, RTL is generated for this register
+     once, as with the stack pointer and frame pointer registers.  If
+     this macro is not defined, it is up to the machine-dependent files
+     to allocate such a register (if necessary). */
+#define PIC_OFFSET_TABLE_REGNUM (REG_P5)
+
+#define FDPIC_FPTR_REGNO REG_P1
+#define FDPIC_REGNO REG_P3
+#define OUR_FDPIC_REG	get_hard_reg_initial_val (SImode, FDPIC_REGNO)
+
+/* A static chain register for nested functions.  We need to use a
+   call-clobbered register for this.  */
+#define STATIC_CHAIN_REGNUM REG_P2
+
+/* Define this if functions should assume that stack space has been
+   allocated for arguments even when their values are passed in
+   registers.
+
+   The value of this macro is the size, in bytes, of the area reserved for
+   arguments passed in registers.
+
+   This space can either be allocated by the caller or be a part of the
+   machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE'
+   says which.  */
+#define FIXED_STACK_AREA 12
+#define REG_PARM_STACK_SPACE(FNDECL) FIXED_STACK_AREA
+
+/* Define this if the above stack space is to be considered part of the
+ * space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+	  
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size. */ 
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/*#define DATA_ALIGNMENT(TYPE, BASIC-ALIGN) for arrays.. */
+
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) bfin_local_alignment ((TYPE), (ALIGN))
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST        \
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))    
+
+#define TRAMPOLINE_SIZE (TARGET_FDPIC ? 30 : 18)
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   There are two registers that can always be eliminated on the i386.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+
+#define ELIMINABLE_REGS				\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}	\
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = bfin_initial_elimination_offset ((FROM), (TO)))
+
+/* This processor has
+   8 data register for doing arithmetic
+   8  pointer register for doing addressing, including
+      1  stack pointer P6
+      1  frame pointer P7
+   4 sets of indexing registers (I0-3, B0-3, L0-3, M0-3)
+   1  condition code flag register CC
+   5  return address registers RETS/I/X/N/E
+   1  arithmetic status register (ASTAT).  */
+
+#define FIRST_PSEUDO_REGISTER 50
+
+#define D_REGNO_P(X) ((X) <= REG_R7)
+#define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7)
+#define I_REGNO_P(X) ((X) >= REG_I0 && (X) <= REG_I3)
+#define DP_REGNO_P(X) (D_REGNO_P (X) || P_REGNO_P (X))
+#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3)
+#define DREG_P(X) (REG_P (X) && D_REGNO_P (REGNO (X)))
+#define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X)))
+#define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X)))
+#define DPREG_P(X) (REG_P (X) && DP_REGNO_P (REGNO (X)))
+
+#define REGISTER_NAMES { \
+  "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", \
+  "P0", "P1", "P2", "P3", "P4", "P5", "SP", "FP", \
+  "I0", "I1", "I2", "I3", "B0", "B1", "B2", "B3", \
+  "L0", "L1", "L2", "L3", "M0", "M1", "M2", "M3", \
+  "A0", "A1", \
+  "CC", \
+  "RETS", "RETI", "RETX", "RETN", "RETE", "ASTAT", "SEQSTAT", "USP", \
+  "ARGP", \
+  "LT0", "LT1", "LC0", "LC1", "LB0", "LB1" \
+}
+
+#define SHORT_REGISTER_NAMES { \
+	"R0.L",	"R1.L",	"R2.L",	"R3.L", "R4.L", "R5.L", "R6.L", "R7.L", \
+	"P0.L",	"P1.L",	"P2.L",	"P3.L", "P4.L", "P5.L", "SP.L", "FP.L", \
+	"I0.L",	"I1.L", "I2.L",	"I3.L",	"B0.L",	"B1.L",	"B2.L",	"B3.L", \
+	"L0.L",	"L1.L",	"L2.L",	"L3.L",	"M0.L",	"M1.L",	"M2.L",	"M3.L", }
+
+#define HIGH_REGISTER_NAMES { \
+	"R0.H",	"R1.H",	"R2.H",	"R3.H", "R4.H", "R5.H", "R6.H", "R7.H", \
+	"P0.H",	"P1.H",	"P2.H",	"P3.H", "P4.H", "P5.H", "SP.H", "FP.H", \
+	"I0.H",	"I1.H",	"I2.H",	"I3.H",	"B0.H",	"B1.H",	"B2.H",	"B3.H", \
+	"L0.H",	"L1.H",	"L2.H",	"L3.H",	"M0.H",	"M1.H",	"M2.H",	"M3.H", }
+
+#define DREGS_PAIR_NAMES { \
+  "R1:0.p", 0, "R3:2.p", 0, "R5:4.p", 0, "R7:6.p", 0,  }
+
+#define BYTE_REGISTER_NAMES { \
+  "R0.B", "R1.B", "R2.B", "R3.B", "R4.B", "R5.B", "R6.B", "R7.B",  }
+
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS \
+/*r0 r1 r2 r3 r4 r5 r6 r7   p0 p1 p2 p3 p4 p5 p6 p7 */ \
+{ 0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 1, 0,    \
+/*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
+  0, 0, 0, 0, 0, 0, 0, 0,   1, 1, 1, 1, 0, 0, 0, 0,    \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  0, 0, 0, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,    \
+/*lb0/1 */ \
+  1, 1  \
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS \
+/*r0 r1 r2 r3 r4 r5 r6 r7   p0 p1 p2 p3 p4 p5 p6 p7 */ \
+{ 1, 1, 1, 1, 0, 0, 0, 0,   1, 1, 1, 0, 0, 0, 1, 0, \
+/*i0 i1 i2 i3 b0 b1 b2 b3   l0 l1 l2 l3 m0 m1 m2 m3 */ \
+  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   \
+/*a0 a1 cc rets/i/x/n/e     astat seqstat usp argp lt0/1 lc0/1 */ \
+  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1, \
+/*lb0/1 */ \
+  1, 1  \
+}
+
+/* Order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  List frame pointer
+   late and fixed registers last.  Note that, in general, we prefer
+   registers listed in CALL_USED_REGISTERS, keeping the others
+   available for storage of persistent values. */
+
+#define REG_ALLOC_ORDER \
+{ REG_R0, REG_R1, REG_R2, REG_R3, REG_R7, REG_R6, REG_R5, REG_R4, \
+  REG_P2, REG_P1, REG_P0, REG_P5, REG_P4, REG_P3, REG_P6, REG_P7, \
+  REG_A0, REG_A1, \
+  REG_I0, REG_I1, REG_I2, REG_I3, REG_B0, REG_B1, REG_B2, REG_B3, \
+  REG_L0, REG_L1, REG_L2, REG_L3, REG_M0, REG_M1, REG_M2, REG_M3, \
+  REG_RETS, REG_RETI, REG_RETX, REG_RETN, REG_RETE,		  \
+  REG_ASTAT, REG_SEQSTAT, REG_USP, 				  \
+  REG_CC, REG_ARGP,						  \
+  REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1		  \
+}
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union. */
+
+
+enum reg_class
+{
+  NO_REGS,
+  IREGS,
+  BREGS,
+  LREGS,
+  MREGS,
+  CIRCREGS, /* Circular buffering registers, Ix, Bx, Lx together form.  See Automatic Circular Buffering.  */
+  DAGREGS,
+  EVEN_AREGS,
+  ODD_AREGS,
+  AREGS,
+  CCREGS,
+  EVEN_DREGS,
+  ODD_DREGS,
+  D0REGS,
+  D1REGS,
+  D2REGS,
+  D3REGS,
+  D4REGS,
+  D5REGS,
+  D6REGS,
+  D7REGS,
+  DREGS,
+  P0REGS,
+  FDPIC_REGS,
+  FDPIC_FPTR_REGS,
+  PREGS_CLOBBERED,
+  PREGS,
+  IPREGS,
+  DPREGS,
+  MOST_REGS,
+  LT_REGS,
+  LC_REGS,
+  LB_REGS,
+  PROLOGUE_REGS,
+  NON_A_CC_REGS,
+  ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int)LIM_REG_CLASSES)
+
+#define GENERAL_REGS DPREGS
+
+/* Give names of register classes as strings for dump file.   */
+
+#define REG_CLASS_NAMES \
+{  "NO_REGS",		\
+   "IREGS",		\
+   "BREGS",		\
+   "LREGS",		\
+   "MREGS",		\
+   "CIRCREGS",		\
+   "DAGREGS",		\
+   "EVEN_AREGS",	\
+   "ODD_AREGS",		\
+   "AREGS",		\
+   "CCREGS",		\
+   "EVEN_DREGS",	\
+   "ODD_DREGS",		\
+   "D0REGS",		\
+   "D1REGS",		\
+   "D2REGS",		\
+   "D3REGS",		\
+   "D4REGS",		\
+   "D5REGS",		\
+   "D6REGS",		\
+   "D7REGS",		\
+   "DREGS",		\
+   "P0REGS",		\
+   "FDPIC_REGS",	\
+   "FDPIC_FPTR_REGS",	\
+   "PREGS_CLOBBERED",	\
+   "PREGS",		\
+   "IPREGS",		\
+   "DPREGS",		\
+   "MOST_REGS",		\
+   "LT_REGS",		\
+   "LC_REGS",		\
+   "LB_REGS",		\
+   "PROLOGUE_REGS",	\
+   "NON_A_CC_REGS",	\
+   "ALL_REGS" }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not suffice.
+   Then the integers are replaced by sub-initializers, braced groupings
+   containing several integers.  Each sub-initializer must be suitable as an
+   initializer for the type `HARD_REG_SET' which is defined in
+   `hard-reg-set.h'.  */
+
+/* NOTE: DSP registers, IREGS - AREGS, are not GENERAL_REGS.  We use
+   MOST_REGS as the union of DPREGS and DAGREGS.  */
+
+#define REG_CLASS_CONTENTS \
+    /* 31 - 0       63-32   */ \
+{   { 0x00000000,    0 },		/* NO_REGS */	\
+    { 0x000f0000,    0 },		/* IREGS */	\
+    { 0x00f00000,    0 },		/* BREGS */		\
+    { 0x0f000000,    0 },		/* LREGS */	\
+    { 0xf0000000,    0 },		/* MREGS */   \
+    { 0x0fff0000,    0 },		/* CIRCREGS */   \
+    { 0xffff0000,    0 },		/* DAGREGS */   \
+    { 0x00000000,    0x1 },		/* EVEN_AREGS */   \
+    { 0x00000000,    0x2 },		/* ODD_AREGS */   \
+    { 0x00000000,    0x3 },		/* AREGS */   \
+    { 0x00000000,    0x4 },		/* CCREGS */  \
+    { 0x00000055,    0 },		/* EVEN_DREGS */   \
+    { 0x000000aa,    0 },		/* ODD_DREGS */   \
+    { 0x00000001,    0 },		/* D0REGS */   \
+    { 0x00000002,    0 },		/* D1REGS */   \
+    { 0x00000004,    0 },		/* D2REGS */   \
+    { 0x00000008,    0 },		/* D3REGS */   \
+    { 0x00000010,    0 },		/* D4REGS */   \
+    { 0x00000020,    0 },		/* D5REGS */   \
+    { 0x00000040,    0 },		/* D6REGS */   \
+    { 0x00000080,    0 },		/* D7REGS */   \
+    { 0x000000ff,    0 },		/* DREGS */   \
+    { 0x00000100,    0x000 },		/* P0REGS */   \
+    { 0x00000800,    0x000 },		/* FDPIC_REGS */   \
+    { 0x00000200,    0x000 },		/* FDPIC_FPTR_REGS */   \
+    { 0x00004700,    0x800 },		/* PREGS_CLOBBERED */   \
+    { 0x0000ff00,    0x800 },		/* PREGS */   \
+    { 0x000fff00,    0x800 },		/* IPREGS */	\
+    { 0x0000ffff,    0x800 },		/* DPREGS */   \
+    { 0xffffffff,    0x800 },		/* MOST_REGS */\
+    { 0x00000000,    0x3000 },		/* LT_REGS */\
+    { 0x00000000,    0xc000 },		/* LC_REGS */\
+    { 0x00000000,    0x30000 },		/* LB_REGS */\
+    { 0x00000000,    0x3f7f8 },		/* PROLOGUE_REGS */\
+    { 0xffffffff,    0x3fff8 },		/* NON_A_CC_REGS */\
+    { 0xffffffff,    0x3ffff }}		/* ALL_REGS */
+
+#define IREG_POSSIBLE_P(OUTER)				     \
+  ((OUTER) == POST_INC || (OUTER) == PRE_INC		     \
+   || (OUTER) == POST_DEC || (OUTER) == PRE_DEC		     \
+   || (OUTER) == MEM || (OUTER) == ADDRESS)
+
+#define MODE_CODE_BASE_REG_CLASS(MODE, OUTER, INDEX)			\
+  ((MODE) == HImode && IREG_POSSIBLE_P (OUTER) ? IPREGS : PREGS)
+
+#define INDEX_REG_CLASS         PREGS
+
+#define REGNO_OK_FOR_BASE_STRICT_P(X, MODE, OUTER, INDEX)	\
+  (P_REGNO_P (X) || (X) == REG_ARGP				\
+   || (IREG_POSSIBLE_P (OUTER) && (MODE) == HImode		\
+       && I_REGNO_P (X)))
+
+#define REGNO_OK_FOR_BASE_NONSTRICT_P(X, MODE, OUTER, INDEX)	\
+  ((X) >= FIRST_PSEUDO_REGISTER					\
+   || REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX))
+
+#ifdef REG_OK_STRICT
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, OUTER, INDEX) \
+  REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX)
+#else
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, OUTER, INDEX) \
+  REGNO_OK_FOR_BASE_NONSTRICT_P (X, MODE, OUTER, INDEX)
+#endif
+
+#define REGNO_OK_FOR_INDEX_P(X)   0
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) \
+((REGNO) == REG_R0 ? D0REGS				\
+ : (REGNO) == REG_R1 ? D1REGS				\
+ : (REGNO) == REG_R2 ? D2REGS				\
+ : (REGNO) == REG_R3 ? D3REGS				\
+ : (REGNO) == REG_R4 ? D4REGS				\
+ : (REGNO) == REG_R5 ? D5REGS				\
+ : (REGNO) == REG_R6 ? D6REGS				\
+ : (REGNO) == REG_R7 ? D7REGS				\
+ : (REGNO) == REG_P0 ? P0REGS				\
+ : (REGNO) < REG_I0 ? PREGS				\
+ : (REGNO) == REG_ARGP ? PREGS				\
+ : (REGNO) >= REG_I0 && (REGNO) <= REG_I3 ? IREGS	\
+ : (REGNO) >= REG_L0 && (REGNO) <= REG_L3 ? LREGS	\
+ : (REGNO) >= REG_B0 && (REGNO) <= REG_B3 ? BREGS	\
+ : (REGNO) >= REG_M0 && (REGNO) <= REG_M3 ? MREGS	\
+ : (REGNO) == REG_A0 || (REGNO) == REG_A1 ? AREGS	\
+ : (REGNO) == REG_LT0 || (REGNO) == REG_LT1 ? LT_REGS	\
+ : (REGNO) == REG_LC0 || (REGNO) == REG_LC1 ? LC_REGS	\
+ : (REGNO) == REG_LB0 || (REGNO) == REG_LB1 ? LB_REGS	\
+ : (REGNO) == REG_CC ? CCREGS				\
+ : (REGNO) >= REG_RETS ? PROLOGUE_REGS			\
+ : NO_REGS)
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES				\
+{							\
+    MOST_REGS, AREGS, CCREGS, LIM_REG_CLASSES		\
+}
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Do not allow to store a value in REG_CC for any mode */
+/* Do not allow to store value in pregs if mode is not SI*/
+#define HARD_REGNO_MODE_OK(REGNO, MODE) hard_regno_mode_ok((REGNO), (MODE))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((MODE) == V2PDImode && (CLASS) == AREGS ? 2				\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((MODE) == PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 1	\
+   : (MODE) == V2PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 2 \
+   : CLASS_MAX_NREGS (GENERAL_REGS, MODE))
+
+/* A C expression that is nonzero if hard register TO can be
+   considered for use as a rename register for FROM register */
+#define HARD_REGNO_RENAME_OK(FROM, TO) bfin_hard_regno_rename_ok (FROM, TO)
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero. */
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+ ((MODE1) == (MODE2)					\
+  || ((GET_MODE_CLASS (MODE1) == MODE_INT		\
+       || GET_MODE_CLASS (MODE1) == MODE_FLOAT)		\
+      && (GET_MODE_CLASS (MODE2) == MODE_INT		\
+	  || GET_MODE_CLASS (MODE2) == MODE_FLOAT)	\
+      && (MODE1) != BImode && (MODE2) != BImode		\
+      && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+      && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD))
+
+/* `PREFERRED_RELOAD_CLASS (X, CLASS)'
+   A C expression that places additional restrictions on the register
+   class to use when it is necessary to copy value X into a register
+   in class CLASS.  The value is a register class; perhaps CLASS, or
+   perhaps another, smaller class.  */
+#define PREFERRED_RELOAD_CLASS(X, CLASS)		\
+  (GET_CODE (X) == POST_INC				\
+   || GET_CODE (X) == POST_DEC				\
+   || GET_CODE (X) == PRE_DEC ? PREGS : (CLASS))
+
+/* Function Calling Conventions. */
+
+/* The type of the current function; normal functions are of type
+   SUBROUTINE.  */
+typedef enum {
+  SUBROUTINE, INTERRUPT_HANDLER, EXCPT_HANDLER, NMI_HANDLER
+} e_funkind;
+#define FUNCTION_RETURN_REGISTERS { REG_RETS, REG_RETI, REG_RETX, REG_RETN }
+
+#define FUNCTION_ARG_REGISTERS { REG_R0, REG_R1, REG_R2, -1 }
+
+/* Flags for the call/call_value rtl operations set up by function_arg */
+#define CALL_NORMAL		0x00000000	/* no special processing */
+#define CALL_LONG		0x00000001	/* always call indirect */
+#define CALL_SHORT		0x00000002	/* always call by symbol */
+
+typedef struct {
+  int words;			/* # words passed so far */
+  int nregs;			/* # registers available for passing */
+  int *arg_regs;		/* array of register -1 terminated */
+  int call_cookie;		/* Do special things for this call */
+} CUMULATIVE_ARGS;
+
+#define FUNCTION_ARG_REGNO_P(REGNO) function_arg_regno_p (REGNO)
+
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT, N_NAMED_ARGS)	\
+  (init_cumulative_args (&CUM, FNTYPE, LIBNAME))
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.
+*/
+
+#define VALUE_REGNO(MODE) (REG_R0)
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)		\
+  gen_rtx_REG (TYPE_MODE (VALTYPE),		\
+	       VALUE_REGNO(TYPE_MODE(VALTYPE)))
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, VALUE_REGNO(MODE))
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == REG_R0)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Before the prologue, the return address is in the RETS register.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, REG_RETS)
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) bfin_return_addr_rtx (COUNT)
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (REG_RETS)
+
+/* Call instructions don't modify the stack pointer on the Blackfin.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) < 2 ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, REG_P2)
+#define EH_RETURN_HANDLER_RTX \
+    gen_frame_mem (Pmode, plus_constant (frame_pointer_rtx, UNITS_PER_WORD))
+
+/* Addressing Modes */
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   symbol_ref are not legitimate and will be put into constant pool.
+   See force_const_mem().
+   If -mno-pool, all constants are legitimate.
+ */
+#define LEGITIMATE_CONSTANT_P(X) bfin_legitimate_constant_p (X)
+
+/*   A number, the maximum number of registers that can appear in a
+     valid memory address.  Note that it is up to you to specify a
+     value equal to the maximum number that `TARGET_LEGITIMATE_ADDRESS_P'
+     would ever accept. */
+#define MAX_REGS_PER_ADDRESS 1
+
+#define LEGITIMATE_MODE_FOR_AUTOINC_P(MODE) \
+      (GET_MODE_SIZE (MODE) <= 4 || (MODE) == PDImode)
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_DECREMENT  1
+
+/* `LEGITIMATE_PIC_OPERAND_P (X)'
+     A C expression that is nonzero if X is a legitimate immediate
+     operand on the target machine when generating position independent
+     code.  You can assume that X satisfies `CONSTANT_P', so you need
+     not check this.  You can also assume FLAG_PIC is true, so you need
+     not check it either.  You need not define this macro if all
+     constants (including `SYMBOL_REF') can be immediate operands when
+     generating position independent code. */
+#define LEGITIMATE_PIC_OPERAND_P(X) ! SYMBOLIC_CONST (X)
+
+#define SYMBOLIC_CONST(X)	\
+(GET_CODE (X) == SYMBOL_REF						\
+ || GET_CODE (X) == LABEL_REF						\
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+#define NOTICE_UPDATE_CC(EXPR, INSN) 0
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX UNITS_PER_WORD
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.  */
+
+#define MOVE_RATIO(speed) 5
+
+/* STORAGE LAYOUT: target machine storage layout
+   Define this macro as a C expression which is nonzero if accessing
+   less than a word of memory (i.e. a `char' or a `short') is no
+   faster than accessing a word of memory, i.e., if such access
+   require more than one instruction or if there is no difference in
+   cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by
+   finding the smallest containing object; when it is defined, a
+   fullword load will be used if alignment permits.  Unless bytes
+   accesses are faster than word accesses, using word accesses is
+   preferable since it may eliminate subsequent memory access if
+   subsequent accesses occur to other fields in the same word of the
+   structure, but to different bytes.  */
+#define SLOW_BYTE_ACCESS  0
+#define SLOW_SHORT_ACCESS 0
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields. */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   We can't access bytes but if we could we would in the Big Endian order. */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is numbered. */
+#define WORDS_BIG_ENDIAN 0
+
+/* number of bits in an addressable storage unit */
+#define BITS_PER_UNIT 8
+
+/* Width in bits of a "word", which is the contents of a machine register.
+   Note that this is not necessarily the width of data type `int';
+   if using 16-bit ints on a 68000, this would still be 32.
+   But on a machine with 16-bit registers, this would be 16.  */
+#define BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode1' defined below.  */
+#define POINTER_SIZE 32
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* (shell-command "rm c-decl.o stor-layout.o")
+ *  never define PCC_BITFIELD_TYPE_MATTERS
+ *  really cause some alignment problem
+ */
+
+#define UNITS_PER_FLOAT  ((FLOAT_TYPE_SIZE  + BITS_PER_UNIT - 1) / \
+			   BITS_PER_UNIT)
+
+#define UNITS_PER_DOUBLE ((DOUBLE_TYPE_SIZE + BITS_PER_UNIT - 1) / \
+ 			   BITS_PER_UNIT)
+
+
+/* what is the 'type' of size_t */
+#define SIZE_TYPE "long unsigned int"
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+#define FLOAT_TYPE_SIZE BITS_PER_WORD
+#define SHORT_TYPE_SIZE 16 
+#define CHAR_TYPE_SIZE	8
+#define INT_TYPE_SIZE	32
+#define LONG_TYPE_SIZE	32
+#define LONG_LONG_TYPE_SIZE 64 
+
+/* Note: Fix this to depend on target switch. -- lev */
+
+/* Note: Try to implement double and force long double. -- tonyko
+ * #define __DOUBLES_ARE_FLOATS__
+ * #define DOUBLE_TYPE_SIZE FLOAT_TYPE_SIZE
+ * #define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE
+ * #define DOUBLES_ARE_FLOATS 1
+ */
+
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* `PROMOTE_MODE (M, UNSIGNEDP, TYPE)'
+     A macro to update M and UNSIGNEDP when an object whose type is
+     TYPE and which has the specified mode and signedness is to be
+     stored in a register.  This macro is only called when TYPE is a
+     scalar type.
+
+     On most RISC machines, which only have operations that operate on
+     a full register, define this macro to set M to `word_mode' if M is
+     an integer mode narrower than `BITS_PER_WORD'.  In most cases,
+     only integer modes should be widened because wider-precision
+     floating-point operations are usually more expensive than their
+     narrower counterparts.
+
+     For most machines, the macro definition does not change UNSIGNEDP.
+     However, some machines, have instructions that preferentially
+     handle either signed or unsigned quantities of certain modes.  For
+     example, on the DEC Alpha, 32-bit loads from memory and 32-bit add
+     instructions sign-extend the result to 64 bits.  On such machines,
+     set UNSIGNEDP according to which kind of extension is more
+     efficient.
+
+     Do not define this macro if it would never modify M.*/
+
+#define BFIN_PROMOTE_MODE_P(MODE) \
+    (!TARGET_DSP && GET_MODE_CLASS (MODE) == MODE_INT	\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
+  if (BFIN_PROMOTE_MODE_P(MODE))		\
+    {                                           \
+      if (MODE == QImode)                       \
+        UNSIGNEDP = 1;                          \
+      else if (MODE == HImode)                  \
+        UNSIGNEDP = 0;      			\
+      (MODE) = SImode;                          \
+    }
+
+/* Describing Relative Costs of Operations */
+
+/* Do not put function addr into constant pool */
+#define NO_FUNCTION_CSE 1
+
+/* A C expression for the cost of moving data from a register in class FROM to
+   one in class TO.  The classes are expressed using the enumeration values
+   such as `GENERAL_REGS'.  A value of 2 is the default; other values are
+   interpreted relative to that.
+
+   It is not required that the cost always equal 2 when FROM is the same as TO;
+   on some machines it is expensive to move between registers if they are not
+   general registers.  */
+
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \
+   bfin_register_move_cost ((MODE), (CLASS1), (CLASS2))
+
+/* A C expression for the cost of moving data of mode M between a
+   register and memory.  A value of 2 is the default; this cost is
+   relative to those in `REGISTER_MOVE_COST'.
+
+   If moving between registers and memory is more expensive than
+   between two registers, you should define this macro to express the
+   relative cost.  */
+
+#define MEMORY_MOVE_COST(MODE, CLASS, IN)	\
+  bfin_memory_move_cost ((MODE), (CLASS), (IN))
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+#define JUMP_TABLES_IN_TEXT_SECTION flag_pic
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified. 
+#define WORD_REGISTER_OPERATIONS
+*/
+
+/* Evaluates to true if A and B are mac flags that can be used
+   together in a single multiply insn.  That is the case if they are
+   both the same flag not involving M, or if one is a combination of
+   the other with M.  */
+#define MACFLAGS_MATCH_P(A, B) \
+ ((A) == (B) \
+  || ((A) == MACFLAG_NONE && (B) == MACFLAG_M) \
+  || ((A) == MACFLAG_M && (B) == MACFLAG_NONE) \
+  || ((A) == MACFLAG_IS && (B) == MACFLAG_IS_M) \
+  || ((A) == MACFLAG_IS_M && (B) == MACFLAG_IS))
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#define PRINT_OPERAND(FILE, RTX, CODE)	 print_operand (FILE, RTX, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, RTX) print_address_operand (FILE, RTX)
+
+typedef enum sections {
+    CODE_DIR,
+    DATA_DIR,
+    LAST_SECT_NM
+} SECT_ENUM_T;
+
+typedef enum directives {
+    LONG_CONST_DIR,
+    SHORT_CONST_DIR,
+    BYTE_CONST_DIR,
+    SPACE_DIR,
+    INIT_DIR,
+    LAST_DIR_NM
+} DIR_ENUM_T;
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR)	\
+  ((C) == ';'					\
+   || ((C) == '|' && (STR)[1] == '|'))
+
+#define TEXT_SECTION_ASM_OP ".text;"
+#define DATA_SECTION_ASM_OP ".data;"
+
+#define ASM_APP_ON  ""
+#define ASM_APP_OFF ""
+
+#define ASM_GLOBALIZE_LABEL1(FILE, NAME) \
+  do {  fputs (".global ", FILE);		\
+        assemble_name (FILE, NAME);	        \
+        fputc (';',FILE);			\
+        fputc ('\n',FILE);			\
+      } while (0)
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \
+  do {					\
+    fputs (".type ", FILE);           	\
+    assemble_name (FILE, NAME);         \
+    fputs (", STT_FUNC", FILE);         \
+    fputc (';',FILE);                   \
+    fputc ('\n',FILE);			\
+    ASM_OUTPUT_LABEL(FILE, NAME);	\
+  } while (0)
+
+#define ASM_OUTPUT_LABEL(FILE, NAME)    \
+  do {  assemble_name (FILE, NAME);		\
+        fputs (":\n",FILE);			\
+      } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME) 	\
+    do {  fprintf (FILE, "_%s", NAME); \
+        } while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)    	\
+do { char __buf[256];					\
+     fprintf (FILE, "\t.dd\t");				\
+     ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE);	\
+     assemble_name (FILE, __buf);			\
+     fputc (';', FILE);					\
+     fputc ('\n', FILE);				\
+   } while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+    MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL)
+
+#define MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL)		\
+    do {							\
+	char __buf[256];					\
+	fprintf (FILE, "\t.dd\t");				\
+	ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE);	\
+	assemble_name (FILE, __buf);				\
+	fputs (" - ", FILE);					\
+	ASM_GENERATE_INTERNAL_LABEL (__buf, "L", REL);		\
+	assemble_name (FILE, __buf);				\
+	fputc (';', FILE);					\
+	fputc ('\n', FILE);					\
+    } while (0)
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) 				\
+    do {							\
+      if ((LOG) != 0)						\
+	fprintf (FILE, "\t.align %d\n", 1 << (LOG));		\
+    } while (0)
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)		\
+    do {					\
+	asm_output_skip (FILE, SIZE);		\
+    } while (0)
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) 	\
+do { 						\
+    switch_to_section (data_section);				\
+    if ((SIZE) >= (unsigned int) 4 ) ASM_OUTPUT_ALIGN(FILE,2);	\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);		\
+    ASM_OUTPUT_LABEL (FILE, NAME);				\
+    fprintf (FILE, "%s %ld;\n", ASM_SPACE,			\
+	     (ROUNDED) > (unsigned int) 1 ? (ROUNDED) : 1);	\
+} while (0)
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+     do {						\
+	ASM_GLOBALIZE_LABEL1(FILE,NAME); 		\
+        ASM_OUTPUT_LOCAL (FILE, NAME, SIZE, ROUNDED); } while(0)
+
+#define ASM_COMMENT_START "//"
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+  do {						\
+    fprintf (FILE, "\tCALL __mcount;\n");	\
+  } while(0)
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) fprintf (FILE, "[SP--] = %s;\n", reg_names[REGNO])
+#define ASM_OUTPUT_REG_POP(FILE, REGNO)  fprintf (FILE, "%s = [SP++];\n", reg_names[REGNO])
+
+extern struct rtx_def *bfin_cc_rtx, *bfin_rets_rtx;
+
+/* This works for GAS and some other assemblers.  */
+#define SET_ASM_OP              ".set "
+
+/* DBX register number for a given compiler register number */
+#define DBX_REGISTER_NUMBER(REGNO)  (REGNO) 
+
+#define SIZE_ASM_OP     "\t.size\t"
+
+extern int splitting_for_sched, splitting_loops;
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) ((CHAR) == '!')
+
+#ifndef TARGET_SUPPORTS_SYNC_CALLS
+#define TARGET_SUPPORTS_SYNC_CALLS 0
+#endif
+
+#endif /*  _BFIN_CONFIG */
diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md
new file mode 100644
index 000000000..3fac01ca5
--- /dev/null
+++ b/gcc/config/bfin/bfin.md
@@ -0,0 +1,4211 @@
+;;- Machine description for Blackfin for GNU compiler
+;;  Copyright 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;;  Contributed by Analog Devices.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; operand punctuation marks:
+;
+;     X -- integer value printed as log2
+;     Y -- integer value printed as log2(~value) - for bitclear
+;     h -- print half word register, low part
+;     d -- print half word register, high part
+;     D -- print operand as dregs pairs
+;     w -- print operand as accumulator register word (a0w, a1w)
+;     H -- high part of double mode operand
+;     T -- byte register representation Oct. 02 2001
+
+; constant operand classes
+;
+;     J   2**N       5bit imm scaled
+;     Ks7 -64 .. 63  signed 7bit imm
+;     Ku5 0..31      unsigned 5bit imm
+;     Ks4 -8 .. 7    signed 4bit imm
+;     Ks3 -4 .. 3    signed 3bit imm
+;     Ku3 0 .. 7     unsigned 3bit imm
+;     Pn  0, 1, 2    constants 0, 1 or 2, corresponding to n
+;
+; register operands
+;     d  (r0..r7)
+;     a  (p0..p5,fp,sp)
+;     e  (a0, a1)
+;     b  (i0..i3)
+;     f  (m0..m3)
+;     v  (b0..b3)
+;     c  (i0..i3,m0..m3) CIRCREGS
+;     C  (CC)            CCREGS
+;     t  (lt0,lt1)
+;     k  (lc0,lc1)
+;     u  (lb0,lb1)
+;
+
+;; Define constants for hard registers.
+
+(define_constants
+  [(REG_R0 0)
+   (REG_R1 1)
+   (REG_R2 2)
+   (REG_R3 3)
+   (REG_R4 4)
+   (REG_R5 5)
+   (REG_R6 6)
+   (REG_R7 7)
+
+   (REG_P0 8)
+   (REG_P1 9)
+   (REG_P2 10)
+   (REG_P3 11)
+   (REG_P4 12)
+   (REG_P5 13)
+   (REG_P6 14)
+   (REG_P7 15)
+
+   (REG_SP 14)
+   (REG_FP 15)
+
+   (REG_I0 16)
+   (REG_I1 17)
+   (REG_I2 18)
+   (REG_I3 19)
+
+   (REG_B0 20)
+   (REG_B1 21)
+   (REG_B2 22)
+   (REG_B3 23)
+
+   (REG_L0 24)
+   (REG_L1 25)
+   (REG_L2 26)
+   (REG_L3 27)
+
+   (REG_M0 28)
+   (REG_M1 29)
+   (REG_M2 30)
+   (REG_M3 31)
+
+   (REG_A0 32)
+   (REG_A1 33)
+
+   (REG_CC 34)
+   (REG_RETS 35)
+   (REG_RETI 36)
+   (REG_RETX 37)
+   (REG_RETN 38)
+   (REG_RETE 39)
+
+   (REG_ASTAT 40)
+   (REG_SEQSTAT 41)
+   (REG_USP 42)
+
+   (REG_ARGP 43)
+
+   (REG_LT0 44)
+   (REG_LT1 45)
+   (REG_LC0 46)
+   (REG_LC1 47)
+   (REG_LB0 48)
+   (REG_LB1 49)])
+
+;; Constants used in UNSPECs and UNSPEC_VOLATILEs.
+
+(define_constants
+  [(UNSPEC_CBRANCH_TAKEN 0)
+   (UNSPEC_CBRANCH_NOPS 1)
+   (UNSPEC_RETURN 2)
+   (UNSPEC_MOVE_PIC 3)
+   (UNSPEC_LIBRARY_OFFSET 4)
+   (UNSPEC_PUSH_MULTIPLE 5)
+   ;; Multiply or MAC with extra CONST_INT operand specifying the macflag
+   (UNSPEC_MUL_WITH_FLAG 6)
+   (UNSPEC_MAC_WITH_FLAG 7)
+   (UNSPEC_MOVE_FDPIC 8)
+   (UNSPEC_FUNCDESC_GOT17M4 9)
+   (UNSPEC_LSETUP_END 10)
+   ;; Distinguish a 32-bit version of an insn from a 16-bit version.
+   (UNSPEC_32BIT 11)
+   (UNSPEC_NOP 12)
+   (UNSPEC_ONES 13)
+   (UNSPEC_ATOMIC 14)])
+
+(define_constants
+  [(UNSPEC_VOLATILE_CSYNC 1)
+   (UNSPEC_VOLATILE_SSYNC 2)
+   (UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
+   (UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
+   (UNSPEC_VOLATILE_DUMMY 5)
+   (UNSPEC_VOLATILE_STALL 6)])
+
+(define_constants
+  [(MACFLAG_NONE 0)
+   (MACFLAG_T 1)
+   (MACFLAG_FU 2)
+   (MACFLAG_TFU 3)
+   (MACFLAG_IS 4)
+   (MACFLAG_IU 5)
+   (MACFLAG_W32 6)
+   (MACFLAG_M 7)
+   (MACFLAG_IS_M 8)
+   (MACFLAG_S2RND 9)
+   (MACFLAG_ISS2 10)
+   (MACFLAG_IH 11)])
+
+(define_attr "type"
+  "move,movcc,mvi,mcld,mcst,dsp32,dsp32shiftimm,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall"
+  (const_string "misc"))
+
+(define_attr "addrtype" "32bit,preg,spreg,ireg"
+  (cond [(and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "dp_register_operand" "")
+		   (match_operand 1 "mem_p_address_operand" "")))
+	   (const_string "preg")
+	 (and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "dp_register_operand" "")
+		   (match_operand 1 "mem_spfp_address_operand" "")))
+	   (const_string "spreg")
+	 (and (eq_attr "type" "mcld")
+	      (and (match_operand 0 "dp_register_operand" "")
+		   (match_operand 1 "mem_i_address_operand" "")))
+	   (const_string "ireg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "dp_register_operand" "")
+		   (match_operand 0 "mem_p_address_operand" "")))
+	   (const_string "preg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "dp_register_operand" "")
+		   (match_operand 0 "mem_spfp_address_operand" "")))
+	   (const_string "spreg")
+	 (and (eq_attr "type" "mcst")
+	      (and (match_operand 1 "dp_register_operand" "")
+		   (match_operand 0 "mem_i_address_operand" "")))
+	   (const_string "ireg")]
+	(const_string "32bit")))
+
+(define_attr "storereg" "preg,other"
+  (cond [(and (eq_attr "type" "mcst")
+	      (match_operand 1 "p_register_operand" ""))
+	   (const_string "preg")]
+	(const_string "other")))
+
+;; Scheduling definitions
+
+(define_automaton "bfin")
+
+(define_cpu_unit "slot0" "bfin")
+(define_cpu_unit "slot1" "bfin")
+(define_cpu_unit "slot2" "bfin")
+
+;; Three units used to enforce parallel issue restrictions:
+;; only one of the 16-bit slots can use a P register in an address,
+;; and only one them can be a store.
+(define_cpu_unit "store" "bfin")
+(define_cpu_unit "pregs" "bfin")
+
+;; A dummy unit used to delay scheduling of loads after a conditional
+;; branch.
+(define_cpu_unit "load" "bfin")
+
+;; A logical unit used to work around anomaly 05000074.
+(define_cpu_unit "anomaly_05000074" "bfin")
+
+(define_reservation "core" "slot0+slot1+slot2")
+
+(define_insn_reservation "alu" 1
+  (eq_attr "type" "move,movcc,mvi,alu0,shft,brcc,br,call,misc,sync,compare")
+  "core")
+
+(define_insn_reservation "imul" 3
+  (eq_attr "type" "mult")
+  "core*3")
+
+(define_insn_reservation "dsp32" 1
+  (eq_attr "type" "dsp32")
+  "slot0")
+
+(define_insn_reservation "dsp32shiftimm" 1
+  (and (eq_attr "type" "dsp32shiftimm")
+       (eq (symbol_ref "ENABLE_WA_05000074")
+	   (const_int 0)))
+  "slot0")
+
+(define_insn_reservation "dsp32shiftimm_anomaly_05000074" 1
+  (and (eq_attr "type" "dsp32shiftimm")
+       (ne (symbol_ref "ENABLE_WA_05000074")
+	   (const_int 0)))
+  "slot0+anomaly_05000074")
+
+(define_insn_reservation "load32" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit")))
+  "core+load")
+
+(define_insn_reservation "loadp" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg")))
+  "slot1+pregs+load")
+
+(define_insn_reservation "loadsp" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg")))
+  "slot1+pregs")
+
+(define_insn_reservation "loadi" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg")))
+  "(slot1|slot2)+load")
+
+(define_insn_reservation "store32" 1
+  (and (not (eq_attr "seq_insns" "multi"))
+       (and (eq_attr "type" "mcst") (eq_attr "addrtype" "32bit")))
+  "core")
+
+(define_insn_reservation "storep" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst")
+		 (ior (eq_attr "addrtype" "preg")
+		      (eq_attr "addrtype" "spreg"))))
+       (ior (eq (symbol_ref "ENABLE_WA_05000074")
+		(const_int 0))
+	    (eq_attr "storereg" "other")))
+  "slot1+pregs+store")
+
+(define_insn_reservation "storep_anomaly_05000074" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst")
+		 (ior (eq_attr "addrtype" "preg")
+		      (eq_attr "addrtype" "spreg"))))
+       (and (ne (symbol_ref "ENABLE_WA_05000074")
+		(const_int 0))
+	    (eq_attr "storereg" "preg")))
+  "slot1+anomaly_05000074+pregs+store")
+
+(define_insn_reservation "storei" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg")))
+       (ior (eq (symbol_ref "ENABLE_WA_05000074")
+		(const_int 0))
+	    (eq_attr "storereg" "other")))
+  "(slot1|slot2)+store")
+
+(define_insn_reservation "storei_anomaly_05000074" 1
+  (and (and (not (eq_attr "seq_insns" "multi"))
+	    (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg")))
+       (and (ne (symbol_ref "ENABLE_WA_05000074")
+		(const_int 0))
+	    (eq_attr "storereg" "preg")))
+  "((slot1+anomaly_05000074)|slot2)+store")
+
+(define_insn_reservation "multi" 2
+  (eq_attr "seq_insns" "multi")
+  "core")
+
+(define_insn_reservation "load_stall1" 1
+  (and (eq_attr "type" "stall")
+       (match_operand 0 "const1_operand" ""))
+  "core+load*2")
+
+(define_insn_reservation "load_stall3" 1
+  (and (eq_attr "type" "stall")
+       (match_operand 0 "const3_operand" ""))
+  "core+load*4")
+
+(absence_set "slot0" "slot1,slot2")
+(absence_set "slot1" "slot2")
+
+;; Make sure genautomata knows about the maximum latency that can be produced
+;; by the adjust_cost function.
+(define_insn_reservation "dummy" 5
+  (eq_attr "type" "dummy")
+  "core")
+
+;; Operand and operator predicates
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;; FRIO branches have been optimized for code density
+;;; this comes at a slight cost of complexity when
+;;; a compiler needs to generate branches in the general
+;;; case.  In order to generate the correct branching
+;;; mechanisms the compiler needs keep track of instruction
+;;; lengths.  The follow table describes how to count instructions
+;;; for the FRIO architecture.
+;;;
+;;; unconditional br are 12-bit imm pcrelative branches *2
+;;; conditional   br are 10-bit imm pcrelative branches *2
+;;; brcc 10-bit:
+;;;   1024 10-bit imm *2 is 2048 (-1024..1022)
+;;; br 12-bit  :
+;;;   4096 12-bit imm *2 is 8192 (-4096..4094)
+;;; NOTE : For brcc we generate instructions such as
+;;;   if cc jmp; jump.[sl] offset
+;;;   offset of jump.[sl] is from the jump instruction but
+;;;     gcc calculates length from the if cc jmp instruction
+;;;     furthermore gcc takes the end address of the branch instruction
+;;;     as (pc) for a forward branch
+;;;     hence our range is (-4094, 4092) instead of (-4096, 4094) for a br
+;;;
+;;; The way the (pc) rtx works in these calculations is somewhat odd;
+;;; for backward branches it's the address of the current instruction,
+;;; for forward branches it's the previously known address of the following
+;;; instruction - we have to take this into account by reducing the range
+;;; for a forward branch.
+
+;; Lengths for type "mvi" insns are always defined by the instructions
+;; themselves.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "mcld")
+         (if_then_else (match_operand 1 "effective_address_32bit_p" "")
+                       (const_int 4) (const_int 2))
+
+	 (eq_attr "type" "mcst")
+	 (if_then_else (match_operand 0 "effective_address_32bit_p" "")
+		       (const_int 4) (const_int 2))
+
+	 (eq_attr "type" "move") (const_int 2)
+
+	 (eq_attr "type" "dsp32") (const_int 4)
+	 (eq_attr "type" "dsp32shiftimm") (const_int 4)
+	 (eq_attr "type" "call")  (const_int 4)
+
+         (eq_attr "type" "br")
+  	 (if_then_else (and
+	                  (le (minus (match_dup 0) (pc)) (const_int 4092))
+	                  (ge (minus (match_dup 0) (pc)) (const_int -4096)))
+        	  (const_int 2)
+                  (const_int 4))
+
+         (eq_attr "type" "brcc")
+	 (cond [(and
+	            (le (minus (match_dup 3) (pc)) (const_int 1020))
+	            (ge (minus (match_dup 3) (pc)) (const_int -1024)))
+		  (const_int 2)
+		(and
+	            (le (minus (match_dup 3) (pc)) (const_int 4092))
+	            (ge (minus (match_dup 3) (pc)) (const_int -4094)))
+		  (const_int 4)]
+	       (const_int 6))
+        ]
+
+	(const_int 2)))
+
+;; Classify the insns into those that are one instruction and those that
+;; are more than one in sequence.
+(define_attr "seq_insns" "single,multi"
+  (const_string "single"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "misc")
+   (set_attr "seq_insns" "multi")
+   (set_attr "length" "4")])
+
+;; Conditional moves
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (if_then_else:SI (match_operand 1 "comparison_operator" "")
+                         (match_operand:SI 2 "register_operand" "")
+                         (match_operand:SI 3 "register_operand" "")))]
+  ""
+{
+  operands[1] = bfin_gen_compare (operands[1], SImode);
+})
+
+(define_insn "*movsicc_insn1"
+  [(set (match_operand:SI 0 "register_operand" "=da,da,da")
+        (if_then_else:SI
+	    (eq:BI (match_operand:BI 3 "register_operand" "C,C,C")
+		(const_int 0))
+	    (match_operand:SI 1 "register_operand" "da,0,da")
+	    (match_operand:SI 2 "register_operand" "0,da,da")))]
+  ""
+  "@
+    if !cc %0 =%1; /* movsicc-1a */
+    if cc %0 =%2; /* movsicc-1b */
+    if !cc %0 =%1; if cc %0=%2; /* movsicc-1 */"
+  [(set_attr "length" "2,2,4")
+   (set_attr "type" "movcc")
+   (set_attr "seq_insns" "*,*,multi")])
+
+(define_insn "*movsicc_insn2"
+  [(set (match_operand:SI 0 "register_operand" "=da,da,da")
+        (if_then_else:SI
+	    (ne:BI (match_operand:BI 3 "register_operand" "C,C,C")
+		(const_int 0))
+	    (match_operand:SI 1 "register_operand" "0,da,da")
+	    (match_operand:SI 2 "register_operand" "da,0,da")))]
+  ""
+  "@
+   if !cc %0 =%2; /* movsicc-2b */
+   if cc %0 =%1; /* movsicc-2a */
+   if cc %0 =%1; if !cc %0=%2; /* movsicc-1 */"
+  [(set_attr "length" "2,2,4")
+   (set_attr "type" "movcc")
+   (set_attr "seq_insns" "*,*,multi")])
+
+;; Insns to load HIGH and LO_SUM
+
+(define_insn "movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(high:SI (match_operand:SI 1 "immediate_operand" "i")))]
+  "reload_completed"
+  "%d0 = %d1;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movstricthi_high"
+  [(set (match_operand:SI 0 "register_operand" "+x")
+	(ior:SI (and:SI (match_dup 0) (const_int 65535))
+		(match_operand:SI 1 "immediate_operand" "i")))]
+  "reload_completed"
+  "%d0 = %d1;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_low"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "immediate_operand" "i")))]
+  "reload_completed"
+  "%h0 = %h2;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_high_pic"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(high:SI (unspec:SI [(match_operand:SI 1 "" "")]
+			    UNSPEC_MOVE_PIC)))]
+  ""
+  "%d0 = %1@GOT_LOW;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_low_pic"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (unspec:SI [(match_operand:SI 2 "" "")]
+			      UNSPEC_MOVE_PIC)))]
+  ""
+  "%h0 = %h2@GOT_HIGH;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+;;; Move instructions
+
+(define_insn_and_split "movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x,mx,r")
+	(match_operand:DI 1 "general_operand" "iFx,r,mx"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx lo_half[2], hi_half[2];
+  split_di (operands, 2, lo_half, hi_half);
+
+  if (reg_overlap_mentioned_p (lo_half[0], hi_half[1]))
+    {
+      operands[2] = hi_half[0];
+      operands[3] = hi_half[1];
+      operands[4] = lo_half[0];
+      operands[5] = lo_half[1];
+    }
+  else
+    {
+      operands[2] = lo_half[0];
+      operands[3] = lo_half[1];
+      operands[4] = hi_half[0];
+      operands[5] = hi_half[1];
+    }
+})
+
+(define_insn "movbi"
+  [(set (match_operand:BI 0 "nonimmediate_operand" "=x,x,d,md,C,d,C,P1")
+        (match_operand:BI 1 "general_operand" "x,xKs3,md,d,d,C,P0,P1"))]
+
+  ""
+  "@
+   %0 = %1;
+   %0 = %1 (X);
+   %0 = B %1 (Z)%!
+   B %0 = %1;
+   CC = %1;
+   %0 = CC;
+   CC = R0 < R0;
+   CC = R0 == R0;"
+  [(set_attr "type" "move,mvi,mcld,mcst,compare,compare,compare,compare")
+   (set_attr "length" "2,2,*,*,2,2,2,2")
+   (set_attr "seq_insns" "*,*,*,*,*,*,*,*")])
+
+(define_insn "movpdi"
+  [(set (match_operand:PDI 0 "nonimmediate_operand" "=e,<,e")
+        (match_operand:PDI 1 "general_operand" " e,e,>"))]
+  ""
+  "@
+   %0 = %1;
+   %0 = %x1; %0 = %w1;
+   %w0 = %1; %x0 = %1;"
+  [(set_attr "type" "move,mcst,mcld")
+   (set_attr "seq_insns" "*,multi,multi")])
+
+(define_insn "load_accumulator"
+  [(set (match_operand:PDI 0 "register_operand" "=e")
+        (sign_extend:PDI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = %1;"
+  [(set_attr "type" "move")])
+
+(define_insn_and_split "load_accumulator_pair"
+  [(set (match_operand:V2PDI 0 "register_operand" "=e")
+        (sign_extend:V2PDI (vec_concat:V2SI
+			    (match_operand:SI 1 "register_operand" "d")
+			    (match_operand:SI 2 "register_operand" "d"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (sign_extend:PDI (match_dup 1)))
+   (set (match_dup 4) (sign_extend:PDI (match_dup 2)))]
+{
+  operands[3] = gen_rtx_REG (PDImode, REGNO (operands[0]));
+  operands[4] = gen_rtx_REG (PDImode, REGNO (operands[0]) + 1);
+})
+
+(define_insn "*pushsi_insn"
+  [(set (mem:SI (pre_dec:SI (reg:SI REG_SP)))
+        (match_operand:SI 0 "register_operand" "xy"))]
+  ""
+  "[--SP] = %0;"
+  [(set_attr "type" "mcst")
+   (set_attr "addrtype" "32bit")
+   (set_attr "length" "2")])
+
+(define_insn "*popsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,xy")
+        (mem:SI (post_inc:SI (reg:SI REG_SP))))]
+  ""
+  "%0 = [SP++]%!"
+  [(set_attr "type" "mcld")
+   (set_attr "addrtype" "preg,32bit")
+   (set_attr "length" "2")])
+
+;; The first alternative is used to make reload choose a limited register
+;; class when faced with a movsi_insn that had its input operand replaced
+;; with a PLUS.  We generally require fewer secondary reloads this way.
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x,da,y,da,x,x,x,da,mr")
+	(match_operand:SI 1 "general_operand" "da,x,y,da,xKs7,xKsh,xKuh,ix,mr,da"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+ "@
+   %0 = %1;
+   %0 = %1;
+   %0 = %1;
+   %0 = %1;
+   %0 = %1 (X);
+   %0 = %1 (X);
+   %0 = %1 (Z);
+   #
+   %0 = %1%!
+   %0 = %1%!"
+  [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst")
+   (set_attr "length" "2,2,2,2,2,4,4,*,*,*")])
+
+(define_insn "*movsi_insn32"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(unspec:SI [(match_operand:SI 1 "nonmemory_operand" "d,P0")] UNSPEC_32BIT))]
+  ""
+ "@
+   %0 = ROT %1 BY 0%!
+   %0 = %0 -|- %0%!"
+  [(set_attr "type" "dsp32shiftimm,dsp32")])
+
+(define_split
+  [(set (match_operand:SI 0 "d_register_operand" "")
+	(const_int 0))]
+  "splitting_for_sched && !optimize_size"
+  [(set (match_dup 0) (unspec:SI [(const_int 0)] UNSPEC_32BIT))])
+
+(define_split
+  [(set (match_operand:SI 0 "d_register_operand" "")
+	(match_operand:SI 1 "d_register_operand" ""))]
+  "splitting_for_sched && !optimize_size"
+  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_32BIT))])
+
+(define_insn_and_split "*movv2hi_insn"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "=da,da,d,dm")
+        (match_operand:V2HI 1 "general_operand" "i,di,md,d"))]
+
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "@
+   #
+   %0 = %1;
+   %0 = %1%!
+   %0 = %1%!"
+  "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0) (high:SI (match_dup 2)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 3)))]
+{
+  HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+  intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+
+  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+  operands[2] = operands[3] = GEN_INT (trunc_int_for_mode (intval, SImode));
+}
+  [(set_attr "type" "move,move,mcld,mcst")
+   (set_attr "length" "2,2,*,*")])
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=x,da,x,d,mr")
+        (match_operand:HI 1 "general_operand" "x,xKs7,xKsh,mr,d"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+{
+  static const char *templates[] = {
+    "%0 = %1;",
+    "%0 = %1 (X);",
+    "%0 = %1 (X);",
+    "%0 = W %1 (X)%!",
+    "W %0 = %1%!",
+    "%h0 = W %1%!",
+    "W %0 = %h1%!"
+  };
+  int alt = which_alternative;
+  rtx mem = (MEM_P (operands[0]) ? operands[0]
+	     : MEM_P (operands[1]) ? operands[1] : NULL_RTX);
+  if (mem && bfin_dsp_memref_p (mem))
+    alt += 2;
+  return templates[alt];
+}
+  [(set_attr "type" "move,mvi,mvi,mcld,mcst")
+   (set_attr "length" "2,2,4,*,*")])
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=x,da,x,d,mr")
+        (match_operand:QI 1 "general_operand" "x,xKs7,xKsh,mr,d"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "@
+   %0 = %1;
+   %0 = %1 (X);
+   %0 = %1 (X);
+   %0 = B %1 (X)%!
+   B %0 = %1%!"
+  [(set_attr "type" "move,mvi,mvi,mcld,mcst")
+   (set_attr "length" "2,2,4,*,*")])
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,da,mr")
+        (match_operand:SF 1 "general_operand" "x,Fx,mr,da"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "@
+   %0 = %1;
+   #
+   %0 = %1%!
+   %0 = %1%!"
+  [(set_attr "type" "move,*,mcld,mcst")])
+
+(define_insn_and_split "movdf_insn"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=x,mx,r")
+	(match_operand:DF 1 "general_operand" "iFx,r,mx"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx lo_half[2], hi_half[2];
+  split_di (operands, 2, lo_half, hi_half);
+
+  if (reg_overlap_mentioned_p (lo_half[0], hi_half[1]))
+    {
+      operands[2] = hi_half[0];
+      operands[3] = hi_half[1];
+      operands[4] = lo_half[0];
+      operands[5] = lo_half[1];
+    }
+  else
+    {
+      operands[2] = lo_half[0];
+      operands[3] = lo_half[1];
+      operands[4] = hi_half[0];
+      operands[5] = hi_half[1];
+    }
+})
+
+;; Storing halfwords.
+(define_insn "*movsi_insv"
+  [(set (zero_extract:SI (match_operand 0 "register_operand" "+d,x")
+			 (const_int 16)
+			 (const_int 16))
+	(match_operand:SI 1 "nonmemory_operand" "d,n"))]
+  ""
+  "@
+   %d0 = %h1 << 0%!
+   %d0 = %1;"
+  [(set_attr "type" "dsp32shiftimm,mvi")])
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			 (match_operand:SI 1 "immediate_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))
+        (match_operand:SI 3 "nonmemory_operand" ""))]
+  ""
+{
+  if (INTVAL (operands[1]) != 16 || INTVAL (operands[2]) != 16)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! register_operand (operands[0], VOIDmode))
+    FAIL;
+})
+
+;; This is the main "hook" for PIC code.  When generating
+;; PIC, movsi is responsible for determining when the source address
+;; needs PIC relocation and appropriately calling legitimize_pic_address
+;; to perform the actual relocation.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (expand_move (operands, SImode))
+    DONE;
+})
+
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "")
+	(match_operand:V2HI 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, V2HImode);")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, DImode);")
+
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+       (match_operand:SF 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, SFmode);")
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+       (match_operand:DF 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, DFmode);")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "expand_move (operands, HImode);")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  " expand_move (operands, QImode); ")
+
+;; Some define_splits to break up SI/SFmode loads of immediate constants.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "symbolic_or_const_operand" ""))]
+  "reload_completed
+   /* Always split symbolic operands; split integer constants that are
+      too large for a single instruction.  */
+   && (GET_CODE (operands[1]) != CONST_INT
+       || (INTVAL (operands[1]) < -32768
+ 	   || INTVAL (operands[1]) >= 65536
+	   || (INTVAL (operands[1]) >= 32768 && PREG_P (operands[0]))))"
+  [(set (match_dup 0) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))]
+{
+  if (GET_CODE (operands[1]) == CONST_INT
+      && split_load_immediate (operands))
+    DONE;
+  /* ??? Do something about TARGET_LOW_64K.  */
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (high:SI (match_dup 3)))
+   (set (match_dup 2) (lo_sum:SI (match_dup 2) (match_dup 3)))]
+{
+  long values;
+  REAL_VALUE_TYPE value;
+
+  gcc_assert (GET_CODE (operands[1]) == CONST_DOUBLE);
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (value, values);
+
+  operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0]));
+  operands[3] = GEN_INT (trunc_int_for_mode (values, SImode));
+  if (values >= -32768 && values < 65536)
+    {
+      emit_move_insn (operands[2], operands[3]);
+      DONE;
+    }
+  if (split_load_immediate (operands + 2))
+    DONE;
+})
+
+;; Sadly, this can't be a proper named movstrict pattern, since the compiler
+;; expects to be able to use registers for operand 1.
+;; Note that the asm instruction is defined by the manual to take an unsigned
+;; constant, but it doesn't matter to the assembler, and the compiler only
+;; deals with sign-extended constants.  Hence "Ksh".
+(define_insn "movstricthi_1"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+x"))
+	(match_operand:HI 1 "immediate_operand" "Ksh"))]
+  ""
+  "%h0 = %1;"
+  [(set_attr "type" "mvi")
+   (set_attr "length" "4")])
+
+;; Sign and zero extensions
+
+(define_insn_and_split "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))]
+  ""
+  "@
+   %0 = %h1 (X);
+   %0 = W %h1 (X)%!"
+  "reload_completed && bfin_dsp_memref_p (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (HImode, operands[0]);
+}
+  [(set_attr "type" "alu0,mcld")])
+
+(define_insn_and_split "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))]
+  ""
+  "@
+   %0 = %h1 (Z);
+   %0 = W %h1 (Z)%!"
+  "reload_completed && bfin_dsp_memref_p (operands[1])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (HImode, operands[0]);
+}
+  [(set_attr "type" "alu0,mcld")])
+
+(define_insn "zero_extendbisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:BI 1 "nonimmediate_operand" "C")))]
+  ""
+  "%0 = %1;"
+  [(set_attr "type" "compare")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d, d")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (X)%!
+   %0 = %T1 (X);"
+  [(set_attr "type" "mcld,alu0")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (X)%!
+   %0 = %T1 (X);"
+  [(set_attr "type" "mcld,alu0")])
+
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d, d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (Z)%!
+   %0 = %T1 (Z);"
+  [(set_attr "type" "mcld,alu0")])
+
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d, d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))]
+  ""
+  "@
+   %0 = B %1 (Z)%!
+   %0 = %T1 (Z);"
+  [(set_attr "type" "mcld,alu0")])
+
+;; DImode logical operations
+
+(define_code_iterator any_logical [and ior xor])
+(define_code_attr optab [(and "and")
+			 (ior "ior")
+			 (xor "xor")])
+(define_code_attr op [(and "&")
+		      (ior "|")
+		      (xor "^")])
+(define_code_attr high_result [(and "0")
+			       (ior "%H1")
+			       (xor "%H1")])
+
+;; Keep this pattern around to avoid generating NO_CONFLICT blocks.
+(define_expand "<optab>di3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (any_logical:DI (match_operand:DI 1 "register_operand" "0")
+			(match_operand:DI 2 "general_operand" "d")))]
+  ""
+{
+  rtx hi_half[3], lo_half[3];
+  enum insn_code icode = CODE_FOR_<optab>si3;
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      && !reg_overlap_mentioned_p (operands[0], operands[2]))
+    emit_clobber (operands[0]);
+  split_di (operands, 3, lo_half, hi_half);
+  if (!(*insn_data[icode].operand[2].predicate) (lo_half[2], SImode))
+    lo_half[2] = force_reg (SImode, lo_half[2]);
+  emit_insn (GEN_FCN (icode) (lo_half[0], lo_half[1], lo_half[2]));
+  if (!(*insn_data[icode].operand[2].predicate) (hi_half[2], SImode))
+    hi_half[2] = force_reg (SImode, hi_half[2]);
+  emit_insn (GEN_FCN (icode) (hi_half[0], hi_half[1], hi_half[2]));
+  DONE;
+})
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (zero_extend:DI (match_operand:QI 1 "register_operand" "d")))]
+  ""
+  "%0 = %T1 (Z);\\n\\t%H0 = 0;"
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (zero_extend:DI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = %h1 (Z);\\n\\t%H0 = 0;"
+  [(set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))]
+{
+  split_di (operands, 1, operands + 2, operands + 3);
+  if (REGNO (operands[0]) != REGNO (operands[1]))
+    emit_move_insn (operands[2], operands[1]);
+})
+
+(define_insn_and_split "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:QI 1 "register_operand" "d")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))]
+{
+  split_di (operands, 1, operands + 2, operands + 3);
+})
+
+(define_insn_and_split "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))]
+{
+  split_di (operands, 1, operands + 2, operands + 3);
+})
+
+;; DImode arithmetic operations
+
+(define_insn "add_with_carry"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (plus:SI (match_operand:SI 1 "register_operand" "%0,d")
+                 (match_operand:SI 2 "nonmemory_operand" "Ks7,d")))
+   (set (match_operand:BI 3 "register_operand" "=C,C")
+	(ltu:BI (not:SI (match_dup 1)) (match_dup 2)))]
+  ""
+  "@
+   %0 += %2; cc = ac0;
+   %0 = %1 + %2; cc = ac0;"
+  [(set_attr "type" "alu0")
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn "sub_with_carry"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (minus:SI (match_operand:SI 1 "register_operand" "%d")
+		  (match_operand:SI 2 "nonmemory_operand" "d")))
+   (set (match_operand:BI 3 "register_operand" "=C")
+	(leu:BI (match_dup 2) (match_dup 1)))]
+  ""
+  "%0 = %1 - %2; cc = ac0;"
+  [(set_attr "type" "alu0")
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (plus:DI (match_operand:DI 1 "register_operand" "")
+                 (match_operand:DI 2 "nonmemory_operand" "")))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (reg:CC 34))]
+  ""
+{
+  rtx xops[8];
+  xops[0] = gen_lowpart (SImode, operands[0]);
+  xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  xops[2] = gen_lowpart (SImode, operands[1]);
+  xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+  xops[4] = gen_lowpart (SImode, operands[2]);
+  xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+  xops[6] = gen_reg_rtx (SImode);
+  xops[7] = gen_rtx_REG (BImode, REG_CC);
+  if (!register_operand (xops[4], SImode)
+      && (GET_CODE (xops[4]) != CONST_INT
+          || !satisfies_constraint_Ks7 (xops[4])))
+    xops[4] = force_reg (SImode, xops[4]);
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      && !reg_overlap_mentioned_p (operands[0], operands[2]))
+    emit_clobber (operands[0]);
+  emit_insn (gen_add_with_carry (xops[0], xops[2], xops[4], xops[7]));
+  emit_insn (gen_movbisi (xops[6], xops[7]));
+  if (!register_operand (xops[5], SImode)
+      && (GET_CODE (xops[5]) != CONST_INT
+          || !satisfies_constraint_Ks7 (xops[5])))
+    xops[5] = force_reg (SImode, xops[5]);
+  if (xops[5] != const0_rtx)
+    emit_insn (gen_addsi3 (xops[1], xops[3], xops[5]));
+  else
+    emit_move_insn (xops[1], xops[3]);
+  emit_insn (gen_addsi3 (xops[1], xops[1], xops[6]));
+  DONE;
+})
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 34))]
+  ""
+{
+  rtx xops[8];
+  xops[0] = gen_lowpart (SImode, operands[0]);
+  xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  xops[2] = gen_lowpart (SImode, operands[1]);
+  xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+  xops[4] = gen_lowpart (SImode, operands[2]);
+  xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4);
+  xops[6] = gen_reg_rtx (SImode);
+  xops[7] = gen_rtx_REG (BImode, REG_CC);
+  if (!reg_overlap_mentioned_p (operands[0], operands[1])
+      && !reg_overlap_mentioned_p (operands[0], operands[2]))
+    emit_clobber (operands[0]);
+  emit_insn (gen_sub_with_carry (xops[0], xops[2], xops[4], xops[7]));
+  emit_insn (gen_notbi (xops[7], xops[7]));
+  emit_insn (gen_movbisi (xops[6], xops[7]));
+  emit_insn (gen_subsi3 (xops[1], xops[3], xops[5]));
+  emit_insn (gen_subsi3 (xops[1], xops[1], xops[6]));
+  DONE;
+})
+
+;; Combined shift/add instructions
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a,d")
+	(ashift:SI (plus:SI (match_operand:SI 1 "register_operand" "%0,0")
+		            (match_operand:SI 2 "register_operand" "a,d"))
+		   (match_operand:SI 3 "pos_scale_operand" "P1P2,P1P2")))]
+  ""
+  "%0 = (%0 + %2) << %3;" /* "shadd %0,%2,%3;" */
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (match_operand:SI 1 "register_operand" "a")
+		 (mult:SI (match_operand:SI 2 "register_operand" "a")
+			  (match_operand:SI 3 "scale_by_operand" "i"))))]
+  ""
+  "%0 = %1 + (%2 << %X3);"
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (match_operand:SI 1 "register_operand" "a")
+		 (ashift:SI (match_operand:SI 2 "register_operand" "a")
+			    (match_operand:SI 3 "pos_scale_operand" "i"))))]
+  ""
+  "%0 = %1 + (%2 << %3);"
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "a")
+			  (match_operand:SI 2 "scale_by_operand" "i"))
+		 (match_operand:SI 3 "register_operand" "a")))]
+  ""
+  "%0 = %3 + (%1 << %X2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "a")
+			    (match_operand:SI 2 "pos_scale_operand" "i"))
+		 (match_operand:SI 3 "register_operand" "a")))]
+  ""
+  "%0 = %3 + (%1 << %2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%d"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "d"))))]
+  ""
+  "%0 = %h1 * %h2 (IS)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%d"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "d"))))]
+  ""
+  "%0 = %h1 * %h2 (FU)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "W"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "W"))))]
+  ""
+  "%0 = %h2 * %h1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+;; The processor also supports ireg += mreg or ireg -= mreg, but these
+;; are unusable if we don't ensure that the corresponding lreg is zero.
+;; The same applies to the add/subtract constant versions involving
+;; iregs
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=ad,a,d")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0, a,d")
+		 (match_operand:SI 2 "reg_or_7bit_operand" "Ks7, a,d")))]
+  ""
+  "@
+   %0 += %2;
+   %0 = %1 + %2;
+   %0 = %1 + %2;"
+  [(set_attr "type" "alu0")
+   (set_attr "length" "2,2,2")])
+
+(define_insn "ssaddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_plus:SI (match_operand:SI 1 "register_operand" "d")
+		    (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 + %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=da,d,a")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,d,0")
+		  (match_operand:SI 2 "reg_or_neg7bit_operand" "KN7,d,a")))]
+  ""
+{
+  static const char *const strings_subsi3[] = {
+    "%0 += -%2;",
+    "%0 = %1 - %2;",
+    "%0 -= %2;",
+  };
+
+  if (CONSTANT_P (operands[2]) && INTVAL (operands[2]) < 0) {
+     rtx tmp_op = operands[2];
+     operands[2] = GEN_INT (-INTVAL (operands[2]));
+     output_asm_insn ("%0 += %2;", operands);
+     operands[2] = tmp_op;
+     return "";
+  }
+
+  return strings_subsi3[which_alternative];
+}
+  [(set_attr "type" "alu0")])
+
+(define_insn "sssubsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_minus:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 - %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Accumulator addition
+
+(define_insn "addpdi3"
+  [(set (match_operand:PDI 0 "register_operand" "=A")
+        (ss_plus:PDI (match_operand:PDI 1 "register_operand" "%0")
+		     (match_operand:PDI 2 "nonmemory_operand" "B")))]
+  ""
+  "A0 += A1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sum_of_accumulators"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_truncate:SI
+	 (ss_plus:PDI (match_operand:PDI 2 "register_operand" "1")
+		      (match_operand:PDI 3 "register_operand" "B"))))
+   (set (match_operand:PDI 1 "register_operand" "=A")
+	 (ss_plus:PDI (match_dup 2) (match_dup 3)))]
+  ""
+  "%0 = (A0 += A1)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "us_truncpdisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,W")
+	(us_truncate:SI (match_operand:PDI 1 "register_operand" "A,B")))]
+  ""
+  "%0 = %1 (FU)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Bit test instructions
+
+(define_insn "*not_bittst"
+ [(set (match_operand:BI 0 "register_operand" "=C")
+       (eq:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			       (const_int 1)
+			       (match_operand:SI 2 "immediate_operand" "Ku5"))
+	      (const_int 0)))]
+ ""
+ "cc = !BITTST (%1,%2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn "*bittst"
+ [(set (match_operand:BI 0 "register_operand" "=C")
+       (ne:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			       (const_int 1)
+			       (match_operand:SI 2 "immediate_operand" "Ku5"))
+		(const_int 0)))]
+ ""
+ "cc = BITTST (%1,%2);"
+  [(set_attr "type" "alu0")])
+
+(define_insn_and_split "*bit_extract"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" "Ku5")))
+   (clobber (reg:BI REG_CC))]
+  ""
+  "#"
+  ""
+  [(set (reg:BI REG_CC)
+	(ne:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2))
+	       (const_int 0)))
+   (set (match_dup 0)
+	(ne:SI (reg:BI REG_CC) (const_int 0)))])
+
+(define_insn_and_split "*not_bit_extract"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extract:SI (not:SI (match_operand:SI 1 "register_operand" "d"))
+			 (const_int 1)
+			 (match_operand:SI 2 "immediate_operand" "Ku5")))
+   (clobber (reg:BI REG_CC))]
+  ""
+  "#"
+  ""
+  [(set (reg:BI REG_CC)
+	(eq:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2))
+	       (const_int 0)))
+   (set (match_dup 0)
+	(ne:SI (reg:BI REG_CC) (const_int 0)))])
+
+(define_insn "*andsi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,d,d,d")
+		(match_operand:SI 2 "rhs_andsi3_operand" "L,M1,M2,d")))]
+  ""
+  "@
+   BITCLR (%0,%Y2);
+   %0 = %T1 (Z);
+   %0 = %h1 (Z);
+   %0 = %1 & %2;"
+  [(set_attr "type" "alu0")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (highbits_operand (operands[2], SImode))
+    {
+      operands[2] = GEN_INT (exact_log2 (-INTVAL (operands[2])));
+      emit_insn (gen_ashrsi3 (operands[0], operands[1], operands[2]));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+      DONE;
+    }
+  if (! rhs_andsi3_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,d")
+		(match_operand:SI 2 "regorlog2_operand" "J,d")))]
+  ""
+  "@
+   BITSET (%0, %X2);
+   %0 = %1 | %2;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,d")
+		  (match_operand:SI 2 "regorlog2_operand" "J,d")))]
+  ""
+  "@
+   BITTGL (%0, %X2);
+   %0 = %1 ^ %2;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "ones"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(unspec:HI [(match_operand:SI 1 "register_operand" "d")]
+		UNSPEC_ONES))]
+  ""
+  "%h0 = ONES %1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(smax:SI (match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = max(%1,%2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(smin:SI (match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 = min(%1,%2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(abs:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = abs %1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssabssi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_abs:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = abs %1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "ssnegsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ss_neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(not:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "%0 = ~%1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "signbitssi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(if_then_else:HI
+	 (lt (match_operand:SI 1 "register_operand" "d") (const_int 0))
+	 (clz:HI (not:SI (match_dup 1)))
+	 (clz:HI (match_dup 1))))]
+  ""
+  "%h0 = signbits %1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssroundsi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(truncate:HI
+	 (lshiftrt:SI (ss_plus:SI (match_operand:SI 1 "register_operand" "d")
+				  (const_int 32768))
+		      (const_int 16))))]
+  ""
+  "%h0 = %1 (RND)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "smaxhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(smax:HI (match_operand:HI 1 "register_operand" "d")
+		 (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%0 = max(%1,%2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sminhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(smin:HI (match_operand:HI 1 "register_operand" "d")
+		 (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%0 = min(%1,%2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "abshi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(abs:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = abs %1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(neg:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1;"
+  [(set_attr "type" "alu0")])
+
+(define_insn "ssneghi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_neg:HI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "%0 = -%1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "signbitshi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(if_then_else:HI
+	 (lt (match_operand:HI 1 "register_operand" "d") (const_int 0))
+	 (clz:HI (not:HI (match_dup 1)))
+	 (clz:HI (match_dup 1))))]
+  ""
+  "%h0 = signbits %h1%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  "%0 *= %2;"
+  [(set_attr "type" "mult")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (zero_extend:DI
+		      (match_operand:SI 1 "nonimmediate_operand" ""))
+		     (zero_extend:DI
+		      (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))
+     (clobber (reg:PDI REG_A0))
+     (clobber (reg:PDI REG_A1))])]
+  ""
+{
+  if (!optimize_size)
+    {
+      rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+      rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+      emit_insn (gen_flag_macinit1hi (a1reg,
+				      gen_lowpart (HImode, operands[1]),
+				      gen_lowpart (HImode, operands[2]),
+				      GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+						     gen_lowpart (V2HImode, operands[1]),
+						     gen_lowpart (V2HImode, operands[2]),
+						     const1_rtx, const1_rtx,
+						     const1_rtx, const0_rtx, a1reg,
+						     const0_rtx, GEN_INT (MACFLAG_FU),
+						     GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_flag_machi_parts_acconly (a1reg,
+					       gen_lowpart (V2HImode, operands[2]),
+					       gen_lowpart (V2HImode, operands[1]),
+					       const1_rtx, const0_rtx,
+					       a1reg, const0_rtx, GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_addpdi3 (a0reg, a0reg, a1reg));
+      emit_insn (gen_us_truncpdisi2 (operands[0], a0reg));
+    }
+  else
+    {
+      rtx umulsi3_highpart_libfunc
+	= init_one_libfunc ("__umulsi3_highpart");
+
+      emit_library_call_value (umulsi3_highpart_libfunc,
+			       operands[0], LCT_NORMAL, SImode,
+			       2, operands[1], SImode, operands[2], SImode);
+    }
+  DONE;
+})
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (sign_extend:DI
+		      (match_operand:SI 1 "nonimmediate_operand" ""))
+		     (sign_extend:DI
+		      (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))
+     (clobber (reg:PDI REG_A0))
+     (clobber (reg:PDI REG_A1))])]
+  ""
+{
+  if (!optimize_size)
+    {
+      rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+      rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+      emit_insn (gen_flag_macinit1hi (a1reg,
+				      gen_lowpart (HImode, operands[1]),
+				      gen_lowpart (HImode, operands[2]),
+				      GEN_INT (MACFLAG_FU)));
+      emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+						     gen_lowpart (V2HImode, operands[1]),
+						     gen_lowpart (V2HImode, operands[2]),
+						     const1_rtx, const1_rtx,
+						     const1_rtx, const0_rtx, a1reg,
+						     const0_rtx, GEN_INT (MACFLAG_IS),
+						     GEN_INT (MACFLAG_IS_M)));
+      emit_insn (gen_flag_machi_parts_acconly (a1reg,
+					       gen_lowpart (V2HImode, operands[2]),
+					       gen_lowpart (V2HImode, operands[1]),
+					       const1_rtx, const0_rtx,
+					       a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M)));
+      emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16)));
+      emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
+    }
+  else
+    {
+      rtx smulsi3_highpart_libfunc
+	= init_one_libfunc ("__smulsi3_highpart");
+
+      emit_library_call_value (smulsi3_highpart_libfunc,
+			       operands[0], LCT_NORMAL, SImode,
+			       2, operands[1], SImode, operands[2], SImode);
+    }
+  DONE;
+})
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ashift:SI (match_operand:SI 1 "register_operand" "")
+                   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+{
+ if (GET_CODE (operands[2]) == CONST_INT
+     && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31)
+   {
+     emit_insn (gen_movsi (operands[0], const0_rtx));
+     DONE;
+   }
+})
+
+(define_insn_and_split "*ashlsi3_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,a,a,a")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,d,a,a,a")
+		   (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1,P2,?P3P4")))]
+  ""
+  "@
+   %0 <<= %2;
+   %0 = %1 << %2%!
+   %0 = %1 + %1;
+   %0 = %1 << %2;
+   #"
+  "PREG_P (operands[0]) && INTVAL (operands[2]) > 2"
+  [(set (match_dup 0) (ashift:SI (match_dup 1) (const_int 2)))
+   (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))]
+  "operands[3] = GEN_INT (INTVAL (operands[2]) - 2);"
+  [(set_attr "type" "shft,dsp32shiftimm,shft,shft,*")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0,d")
+		     (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5")))]
+  ""
+  "@
+   %0 >>>= %2;
+   %0 = %1 >>> %2%!"
+  [(set_attr "type" "shft,dsp32shiftimm")])
+
+(define_insn "rotl16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotate:SI (match_operand:SI 1 "register_operand" "d")
+		   (const_int 16)))]
+  ""
+  "%0 = PACK (%h1, %d1)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(rotate:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+{
+  if (INTVAL (operands[2]) != 16)
+    FAIL;
+})
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+{
+  if (INTVAL (operands[2]) != 16)
+    FAIL;
+  emit_insn (gen_rotl16 (operands[0], operands[1]));
+  DONE;
+})
+
+
+(define_insn "ror_one"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "d") (const_int 1))
+		(ashift:SI (zero_extend:SI (reg:BI REG_CC)) (const_int 31))))
+   (set (reg:BI REG_CC)
+	(zero_extract:BI (match_dup 1) (const_int 1) (const_int 0)))]
+  ""
+  "%0 = ROT %1 BY -1%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_insn "rol_one"
+  [(set (match_operand:SI 0 "register_operand" "+d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") (const_int 1))
+		(zero_extend:SI (reg:BI REG_CC))))
+   (set (reg:BI REG_CC)
+	(zero_extract:BI (match_dup 1) (const_int 31) (const_int 0)))]
+  ""
+  "%0 = ROT %1 BY 1%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "general_operand" "")))]
+  ""
+{
+  rtx lo_half[2], hi_half[2];
+      
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (! rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  emit_move_insn (bfin_cc_rtx, const0_rtx);
+  emit_insn (gen_ror_one (hi_half[0], hi_half[0]));
+  emit_insn (gen_ror_one (lo_half[0], lo_half[0]));
+  DONE;
+})
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "general_operand" "")))]
+  ""
+{
+  rtx lo_half[2], hi_half[2];
+      
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (! rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  emit_insn (gen_compare_lt (gen_rtx_REG (BImode, REG_CC),
+			     hi_half[1], const0_rtx));
+  emit_insn (gen_ror_one (hi_half[0], hi_half[0]));
+  emit_insn (gen_ror_one (lo_half[0], lo_half[0]));
+  DONE;
+})
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand:DI 2 "general_operand" "")))]
+  ""
+{
+  rtx lo_half[2], hi_half[2];
+      
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (! rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+
+  split_di (operands, 2, lo_half, hi_half);
+
+  emit_move_insn (bfin_cc_rtx, const0_rtx);
+  emit_insn (gen_rol_one (lo_half[0], lo_half[0]));
+  emit_insn (gen_rol_one (hi_half[0], hi_half[0]));
+  DONE;
+})
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,a")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0,d,a")
+		     (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1P2")))]
+  ""
+  "@
+   %0 >>= %2;
+   %0 = %1 >> %2%!
+   %0 = %1 >> %2;"
+  [(set_attr "type" "shft,dsp32shiftimm,shft")])
+
+(define_insn "lshrpdi3"
+  [(set (match_operand:PDI 0 "register_operand" "=e")
+	(lshiftrt:PDI (match_operand:PDI 1 "register_operand" "0")
+		      (match_operand:SI 2 "nonmemory_operand" "Ku5")))]
+  ""
+  "%0 = %1 >> %2%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_insn "ashrpdi3"
+  [(set (match_operand:PDI 0 "register_operand" "=e")
+	(ashiftrt:PDI (match_operand:PDI 1 "register_operand" "0")
+		      (match_operand:SI 2 "nonmemory_operand" "Ku5")))]
+  ""
+  "%0 = %1 >>> %2%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+;; A pattern to reload the equivalent of
+;;   (set (Dreg) (plus (FP) (large_constant)))
+;; or
+;;   (set (dagreg) (plus (FP) (arbitrary_constant))) 
+;; using a scratch register
+(define_expand "reload_insi"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=w")
+                   (match_operand:SI 1 "fp_plus_const_operand" ""))
+              (clobber (match_operand:SI 2 "register_operand" "=&a"))])]
+  ""
+{
+  rtx fp_op = XEXP (operands[1], 0);
+  rtx const_op = XEXP (operands[1], 1);
+  rtx primary = operands[0];
+  rtx scratch = operands[2];
+
+  emit_move_insn (scratch, const_op);
+  emit_insn (gen_addsi3 (scratch, scratch, fp_op));
+  emit_move_insn (primary, scratch);
+  DONE;
+})
+
+(define_mode_iterator AREG [PDI V2PDI])
+
+(define_insn "reload_in<mode>"
+  [(set (match_operand:AREG 0 "register_operand" "=e")
+	(match_operand:AREG 1 "memory_operand" "m"))
+   (clobber (match_operand:SI 2 "register_operand" "=d"))]
+  ""
+{
+  rtx xops[4];
+  xops[0] = operands[0];
+  xops[1] = operands[2];
+  split_di (operands + 1, 1, xops + 2, xops + 3);
+  output_asm_insn ("%1 = %2;", xops);
+  output_asm_insn ("%w0 = %1;", xops);
+  output_asm_insn ("%1 = %3;", xops);
+  output_asm_insn ("%x0 = %1;", xops);
+  return "";
+}
+ [(set_attr "seq_insns" "multi")
+  (set_attr "type" "mcld")
+  (set_attr "length" "12")])
+
+(define_insn "reload_out<mode>"
+  [(set (match_operand:AREG 0 "memory_operand" "=m")
+	(match_operand:AREG 1 "register_operand" "e"))
+   (clobber (match_operand:SI 2 "register_operand" "=d"))]
+  ""
+{
+  rtx xops[4];
+  xops[0] = operands[1];
+  xops[1] = operands[2];
+  split_di (operands, 1, xops + 2, xops + 3);
+  output_asm_insn ("%1 = %w0;", xops);
+  output_asm_insn ("%2 = %1;", xops);
+  output_asm_insn ("%1 = %x0;", xops);
+  output_asm_insn ("%3 = %1;", xops);
+  return "";
+}
+ [(set_attr "seq_insns" "multi")
+  (set_attr "type" "mcld")
+  (set_attr "length" "12")])
+
+;; Jump instructions
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (get_attr_length (insn) == 2)
+    return "jump.s %0;";
+  else
+    return "jump.l %0;";
+}
+  [(set_attr "type" "br")])
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "a"))]
+  ""
+  "jump (%0);"
+  [(set_attr "type" "misc")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+              (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  /* In PIC mode, the table entries are stored PC relative.
+     Convert the relative address to an absolute address.  */
+  if (flag_pic)
+    {
+      rtx op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+
+      operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
+					 op1, NULL_RTX, 0, OPTAB_DIRECT);
+    }
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jump (%0);"
+  [(set_attr "type" "misc")])
+
+;;  Hardware loop
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 4 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))
+	      (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+	      (clobber (match_scratch:SI 5 ""))])]
+  ""
+{
+  /* The loop optimizer doesn't check the predicates... */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  /* Due to limitations in the hardware (an initial loop count of 0
+     does not loop 2^32 times) we must avoid to generate a hardware
+     loops when we cannot rule out this case.  */
+  if (!flag_unsafe_loop_optimizations
+      && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 0xFFFFFFFF)
+    FAIL;
+  bfin_hardware_loop ();
+})
+
+(define_insn "loop_end"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "+a*d,*b*v*f,m")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=X,&r,&r"))]
+  ""
+  "@
+   /* loop end %0 %l1 */
+   #
+   #"
+  [(set_attr "length" "6,10,14")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nondp_reg_or_memory_operand" "")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus (match_dup 0)
+	      (const_int -1)))
+   (unspec [(const_int 0)] UNSPEC_LSETUP_END)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "splitting_loops"
+  [(set (match_dup 2) (match_dup 0))
+   (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+   (set (match_dup 0) (match_dup 2))
+   (set (reg:BI REG_CC) (eq:BI (match_dup 2) (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (reg:BI REG_CC)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+(define_insn "lsetup_with_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=u")
+	(label_ref (match_operand 3 "" "")))
+   (set (match_operand:SI 4 "lc_register_operand" "=k")
+	(match_operand:SI 5 "register_operand" "a"))]
+  ""
+  "LSETUP (%1, %3) %4 = %5;"
+  [(set_attr "length" "4")])
+
+(define_insn "lsetup_without_autoinit"
+  [(set (match_operand:SI 0 "lt_register_operand" "=t")
+	(label_ref (match_operand 1 "" "")))
+   (set (match_operand:SI 2 "lb_register_operand" "=u")
+	(label_ref (match_operand 3 "" "")))
+   (use (match_operand:SI 4 "lc_register_operand" "k"))]
+  ""
+  "LSETUP (%1, %3) %4;"
+  [(set_attr "length" "4")])
+
+;;  Call instructions..
+
+;; The explicit MEM inside the UNSPEC prevents the compiler from moving
+;; the load before a branch after a NULL test, or before a store that
+;; initializes a function descriptor.
+
+(define_insn_and_split "load_funcdescsi"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI [(mem:SI (match_operand:SI 1 "address_operand" "p"))]
+			    UNSPEC_VOLATILE_LOAD_FUNCDESC))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (mem:SI (match_dup 1)))])
+
+(define_expand "call"
+  [(parallel [(call (match_operand:SI 0 "" "")
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))])]
+  ""
+{
+  bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand:SI 0 "" "")
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (return)])]
+  ""
+{
+  bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 1);
+  DONE;
+})
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))])]
+  ""
+{
+  bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 0);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (return)])]
+  ""
+{
+  bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 1);
+  DONE;
+})
+
+(define_insn "*call_symbol_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[3]))"
+  "call %0;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_symbol_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[3]))"
+  "jump.l %0;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_symbol_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[4]))"
+  "call %1;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_symbol_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[4]))"
+  "jump.l %1;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_insn_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%0);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_insn_fdpic"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand:SI 2 "register_operand" "Z"))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%0);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+(define_insn "*call_value_insn_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%1);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_value_insn_fdpic"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand:SI 3 "register_operand" "Z"))
+   (use (match_operand 4 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%1);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+(define_insn "*call_symbol"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[2]))"
+  "call %0;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_symbol"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[0]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[0], INTVAL (operands[2]))"
+  "jump.l %0;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_symbol"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[3]))"
+  "call %1;"
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_symbol"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)
+   && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY)
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && !bfin_longcall_p (operands[1], INTVAL (operands[3]))"
+  "jump.l %1;"
+  [(set_attr "type" "br")
+   (set_attr "length" "4")])
+
+(define_insn "*call_insn"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "a"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%0);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_insn"
+  [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "z"))
+	 (match_operand 1 "general_operand" "g"))
+   (use (match_operand 2 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%0);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+(define_insn "*call_value_insn"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "a"))
+	      (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI REG_RETS))]
+  "! SIBLING_CALL_P (insn)"
+  "call (%1);"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*sibcall_value_insn"
+  [(set (match_operand 0 "register_operand" "=d")
+         (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "z"))
+	       (match_operand 2 "general_operand" "g")))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "SIBLING_CALL_P (insn)"
+  "jump (%1);"
+  [(set_attr "type" "br")
+   (set_attr "length" "2")])
+
+;; Block move patterns
+
+;; We cheat.  This copies one more word than operand 2 indicates.
+
+(define_insn "rep_movsi"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+        (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0")
+			  (ashift:SI (match_operand:SI 2 "register_operand" "a")
+				     (const_int 2)))
+		 (const_int 4)))
+   (set (match_operand:SI 1 "register_operand" "=&b")
+        (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1")
+			  (ashift:SI (match_dup 2) (const_int 2)))
+		 (const_int 4)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 2))
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
+  ""
+  "%5 = [%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || [%3++] = %5 || %5 = [%4++]; [%3++] = %5;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
+
+(define_insn "rep_movhi"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+        (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0")
+			  (ashift:SI (match_operand:SI 2 "register_operand" "a")
+				     (const_int 1)))
+		 (const_int 2)))
+   (set (match_operand:SI 1 "register_operand" "=&b")
+        (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1")
+			  (ashift:SI (match_dup 2) (const_int 1)))
+		 (const_int 2)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 2))
+   (clobber (match_scratch:HI 5 "=&d"))
+   (clobber (reg:SI REG_LT1))
+   (clobber (reg:SI REG_LC1))
+   (clobber (reg:SI REG_LB1))]
+  ""
+  "%h5 = W[%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || W [%3++] = %5 || %h5 = W [%4++]; W [%3++] = %5;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "16")
+   (set_attr "seq_insns" "multi")])
+
+(define_expand "movmemsi"
+  [(match_operand:BLK 0 "general_operand" "")
+   (match_operand:BLK 1 "general_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  ""
+{
+  if (bfin_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  FAIL;
+})
+
+;; Conditional branch patterns
+;; The Blackfin has only few condition codes: eq, lt, lte, ltu, leu
+
+(define_insn "compare_eq"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (eq:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  ""
+  "cc =%1==%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_ne"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (ne:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  "0"
+  "cc =%1!=%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_lt"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (lt:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  ""
+  "cc =%1<%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_le"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (le:BI (match_operand:SI 1 "register_operand" "d,a")
+               (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))]
+  ""
+  "cc =%1<=%2;"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_leu"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (leu:BI (match_operand:SI 1 "register_operand" "d,a")
+                (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))]
+  ""
+  "cc =%1<=%2 (iu);"
+  [(set_attr "type" "compare")])
+
+(define_insn "compare_ltu"
+  [(set (match_operand:BI 0 "register_operand" "=C,C")
+        (ltu:BI (match_operand:SI 1 "register_operand" "d,a")
+                (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))]
+  ""
+  "cc =%1<%2 (iu);"
+  [(set_attr "type" "compare")])
+
+;; Same as above, but and CC with the overflow bit generated by the first
+;; multiplication.
+(define_insn "flag_mul_macv2hi_parts_acconly_andcc0"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d,d,d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 3 "register_operand" "d,d,d")
+		      (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand 10 "const_int_operand" "PB,PA,PA")]
+		    UNSPEC_MUL_WITH_FLAG))
+   (set (match_operand:PDI 1 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_dup 2)
+		      (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_dup 3)
+		      (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand:PDI 8 "register_operand" "1,1,1")
+		     (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1")
+		     (match_operand 11 "const_int_operand" "PA,PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))
+   (set (reg:BI REG_CC)
+	(and:BI (reg:BI REG_CC)
+		(unspec:BI [(vec_select:HI (match_dup 2) (parallel [(match_dup 4)]))
+			    (vec_select:HI (match_dup 3) (parallel [(match_dup 6)]))
+			    (match_dup 10)]
+			   UNSPEC_MUL_WITH_FLAG)))]
+  "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))"
+{
+  rtx xops[6];
+  const char *templates[] = {
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;" };
+  int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1)
+	     + (INTVAL (operands[6]) << 2)  + (INTVAL (operands[7]) << 3));
+  xops[0] = operands[0];
+  xops[1] = operands[1];
+  xops[2] = operands[2];
+  xops[3] = operands[3];
+  xops[4] = operands[9];
+  xops[5] = which_alternative == 0 ? operands[10] : operands[11];
+  output_asm_insn (templates[alt], xops);
+  return "";
+}
+  [(set_attr "type" "misc")
+   (set_attr "length" "6")
+   (set_attr "seq_insns" "multi")])
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(match_operand:SI 1 "register_operand" "")
+                        (match_operand:SI 2 "reg_or_const_int_operand" "")])
+                   (label_ref (match_operand 3 "" ""))
+                   (pc)))]
+  ""
+{
+  rtx bi_compare = bfin_gen_compare (operands[0], SImode);
+  emit_jump_insn (gen_cbranchbi4 (bi_compare, bfin_cc_rtx, CONST0_RTX (BImode),
+				  operands[3]));
+  DONE;
+})
+
+(define_insn "cbranchbi4"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "bfin_bimode_comparison_operator"
+			 [(match_operand:BI 1 "register_operand" "C")
+			  (match_operand:BI 2 "immediate_operand" "P0")])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))]
+  ""
+{
+  asm_conditional_branch (insn, operands, 0, 0);
+  return "";
+}
+  [(set_attr "type" "brcc")])
+
+;; Special cbranch patterns to deal with the speculative load problem - see
+;; bfin_reorg for details.
+
+(define_insn "cbranch_predicted_taken"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "bfin_bimode_comparison_operator"
+			 [(match_operand:BI 1 "register_operand" "C")
+			  (match_operand:BI 2 "immediate_operand" "P0")])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (unspec [(const_int 0)] UNSPEC_CBRANCH_TAKEN)]
+  ""
+{
+  asm_conditional_branch (insn, operands, 0, 1);
+  return "";
+}
+  [(set_attr "type" "brcc")])
+
+(define_insn "cbranch_with_nops"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "bfin_bimode_comparison_operator"
+			 [(match_operand:BI 1 "register_operand" "C")
+			  (match_operand:BI 2 "immediate_operand" "P0")])
+	 (label_ref (match_operand 3 "" ""))
+	 (pc)))
+   (unspec [(match_operand 4 "immediate_operand" "")] UNSPEC_CBRANCH_NOPS)]
+  "reload_completed"
+{
+  asm_conditional_branch (insn, operands, INTVAL (operands[4]), 0);
+  return "";
+}
+  [(set_attr "type" "brcc")
+   (set_attr "length" "8")])
+
+;; setcc insns.
+
+(define_expand "cstorebi4"
+  [(set (match_dup 4)
+        (match_operator:BI 1 "bfin_bimode_comparison_operator"
+                       [(match_operand:BI 2 "register_operand" "")
+                        (match_operand:BI 3 "reg_or_const_int_operand" "")]))
+   (set (match_operand:SI 0 "register_operand" "")
+       (ne:SI (match_dup 4) (const_int 0)))]
+  ""
+{
+  /* It could be expanded as a movbisi instruction, but the portable
+     alternative produces better code.  */
+  if (GET_CODE (operands[1]) == NE)
+    FAIL;
+
+  operands[4] = bfin_cc_rtx;
+})
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator"
+                       [(match_operand:SI 2 "register_operand" "")
+                        (match_operand:SI 3 "reg_or_const_int_operand" "")]))]
+  ""
+{
+  rtx bi_compare, test;
+
+  if (!bfin_direct_comparison_operator (operands[1], SImode))
+    {
+      if (!register_operand (operands[3], SImode)
+	  || GET_CODE (operands[1]) == NE)
+	FAIL;
+      test = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
+			     SImode, operands[3], operands[2]);
+    }
+  else
+    test = operands[1];
+
+  bi_compare = bfin_gen_compare (test, SImode);
+  gcc_assert (GET_CODE (bi_compare) == NE);
+  emit_insn (gen_movbisi (operands[0], bfin_cc_rtx));
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop;")
+
+;; A nop which stays there when emitted.
+(define_insn "forced_nop"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  ""
+  "nop;")
+
+(define_insn "mnop"
+  [(unspec [(const_int 0)] UNSPEC_32BIT)]
+  ""
+  "mnop%!"
+  [(set_attr "type" "dsp32")])
+
+;;;;;;;;;;;;;;;;;;;;   CC2dreg   ;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "movsibi"
+  [(set (match_operand:BI 0 "register_operand" "=C")
+	(ne:BI (match_operand:SI 1 "register_operand" "d")
+	       (const_int 0)))]
+  ""
+  "CC = %1;"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "movbisi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI (match_operand:BI 1 "register_operand" "C")
+	       (const_int 0)))]
+  ""
+  "#"
+  ""
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:BI 1 "register_operand" "")))]
+  "")
+
+(define_insn "notbi"
+  [(set (match_operand:BI 0 "register_operand" "=C")
+	(eq:BI (match_operand:BI 1 "register_operand" " 0")
+	       (const_int 0)))]
+  ""
+  "%0 = ! %0;"    /*  NOT CC;"  */
+  [(set_attr "type" "compare")])
+
+;; Vector and DSP insns
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d")
+			   (const_int 24))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "d")
+			     (const_int 8))))]
+  ""
+  "%0 = ALIGN8(%1, %2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d")
+			   (const_int 16))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "d")
+			     (const_int 16))))]
+  ""
+  "%0 = ALIGN16(%1, %2)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d")
+			   (const_int 8))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "d")
+			     (const_int 24))))]
+  ""
+  "%0 = ALIGN24(%1, %2)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Prologue and epilogue.
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "bfin_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(const_int 1)]
+  ""
+  "bfin_expand_epilogue (1, 0, 0); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(const_int 1)]
+  ""
+  "bfin_expand_epilogue (0, 0, 1); DONE;")
+
+(define_expand "eh_return"
+  [(use (match_operand:SI 0 "register_operand" ""))]
+  ""
+{
+  emit_insn (gen_eh_store_handler (EH_RETURN_HANDLER_RTX, operands[0]));
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn "eh_store_handler"
+  [(unspec_volatile [(match_operand:SI 1 "register_operand" "da")]
+		    UNSPEC_VOLATILE_STORE_EH_HANDLER)
+   (clobber (match_operand:SI 0 "memory_operand" "=m"))]
+  ""
+  "%0 = %1%!"
+  [(set_attr "type" "mcst")])
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 1)]
+  "bfin_expand_epilogue (1, 1, 0); DONE;")
+
+(define_insn "link"
+  [(set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -4))) (reg:SI REG_RETS))
+   (set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -8))) (reg:SI REG_FP))
+   (set (reg:SI REG_FP)
+	(plus:SI (reg:SI REG_SP) (const_int -8)))
+   (set (reg:SI REG_SP)
+	(plus:SI (reg:SI REG_SP) (match_operand:SI 0 "immediate_operand" "i")))]
+  ""
+  "LINK %Z0;"
+  [(set_attr "length" "4")])
+
+(define_insn "unlink"
+  [(set (reg:SI REG_FP) (mem:SI (reg:SI REG_FP)))
+   (set (reg:SI REG_RETS) (mem:SI (plus:SI (reg:SI REG_FP) (const_int 4))))
+   (set (reg:SI REG_SP) (plus:SI (reg:SI REG_FP) (const_int 8)))]
+  ""
+  "UNLINK;"
+  [(set_attr "length" "4")])
+
+;; This pattern is slightly clumsy.  The stack adjust must be the final SET in
+;; the pattern, otherwise dwarf2out becomes very confused about which reg goes
+;; where on the stack, since it goes through all elements of the parallel in
+;; sequence.
+(define_insn "push_multiple"
+  [(match_parallel 0 "push_multiple_operation"
+    [(unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_PUSH_MULTIPLE)])]
+  ""
+{
+  output_push_multiple (insn, operands);
+  return "";
+})
+
+(define_insn "pop_multiple"
+  [(match_parallel 0 "pop_multiple_operation"
+    [(set (reg:SI REG_SP)
+	  (plus:SI (reg:SI REG_SP) (match_operand:SI 1 "immediate_operand" "i")))])]
+  ""
+{
+  output_pop_multiple (insn, operands);
+  return "";
+})
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand 0 "register_operand" ""))]
+  "reload_completed"
+{
+  switch (REGNO (operands[0]))
+    {
+    case REG_RETX:
+      return "rtx;";
+    case REG_RETN:
+      return "rtn;";
+    case REG_RETI:
+      return "rti;";
+    case REG_RETS:
+      return "rts;";
+    }
+  gcc_unreachable ();
+})
+
+;; When used at a location where CC contains 1, causes a speculative load
+;; that is later cancelled.  This is used for certain workarounds in
+;; interrupt handler prologues.
+(define_insn "dummy_load"
+  [(unspec_volatile [(match_operand 0 "register_operand" "a")
+		     (match_operand 1 "register_operand" "C")]
+		    UNSPEC_VOLATILE_DUMMY)]
+  ""
+  "if cc jump 4;\n\tr7 = [%0];"
+ [(set_attr "type" "misc")
+  (set_attr "length" "4")
+  (set_attr "seq_insns" "multi")])
+
+;; A placeholder insn inserted before the final scheduling pass.  It is used
+;; to improve scheduling of loads when workarounds for speculative loads are
+;; needed, by not placing them in the first few cycles after a conditional
+;; branch.
+(define_insn "stall"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")]
+		    UNSPEC_VOLATILE_STALL)]
+  ""
+  ""
+  [(set_attr "type" "stall")])
+
+(define_insn "csync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)]
+  ""
+  "csync;"
+  [(set_attr "type" "sync")])
+
+(define_insn "ssync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_SSYNC)]
+  ""
+  "ssync;"
+  [(set_attr "type" "sync")])
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 3))]
+  ""
+  "excpt 3;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2")])
+
+(define_insn "trapifcc"
+  [(trap_if (reg:BI REG_CC) (const_int 3))]
+  ""
+  "if !cc jump 4 (bp); excpt 3;"
+  [(set_attr "type" "misc")
+   (set_attr "length" "4")
+   (set_attr "seq_insns" "multi")])
+
+;;; Vector instructions
+
+;; First, all sorts of move variants
+
+(define_insn "movhiv2hi_low"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (match_operand:HI 2 "register_operand" "d")
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h2 << 0%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_insn "movhiv2hi_high"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 0)]))
+	 (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%d0 = %h2 << 0%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+;; No earlyclobber on alternative two since our sequence ought to be safe.
+;; The order of operands is intentional to match the VDSP builtin (high word
+;; is passed first).
+(define_insn_and_split "composev2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(vec_concat:V2HI (match_operand:HI 2 "register_operand" "0,d")
+			 (match_operand:HI 1 "register_operand" "d,d")))]
+  ""
+  "@
+   %d0 = %h1 << 0%!
+   #"
+  "reload_completed"
+  [(set (match_dup 0)
+	(vec_concat:V2HI
+	 (vec_select:HI (match_dup 0) (parallel [(const_int 0)]))
+	 (match_dup 1)))
+   (set (match_dup 0)
+	(vec_concat:V2HI
+	 (match_dup 2)
+	 (vec_select:HI (match_dup 0) (parallel [(const_int 1)]))))]
+  ""
+  [(set_attr "type" "dsp32shiftimm")])
+
+; Like composev2hi, but operating on elements of V2HI vectors.
+; Useful on its own, and as a combiner bridge for the multiply and
+; mac patterns.
+(define_insn "packv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d,d,d,d,d,d,d")
+	(vec_concat:V2HI (vec_select:HI
+			  (match_operand:V2HI 1 "register_operand" "0,0,d,d,d,d,d,d")
+			  (parallel [(match_operand 3 "const01_operand" "P0,P0,P0,P1,P0,P1,P0,P1")]))
+			 (vec_select:HI
+			  (match_operand:V2HI 2 "register_operand" "d,d,0,0,d,d,d,d")
+			  (parallel [(match_operand 4 "const01_operand" "P0,P1,P1,P1,P0,P0,P1,P1")]))))]
+  ""
+  "@
+   %d0 = %h2 << 0%!
+   %d0 = %d2 << 0%!
+   %h0 = %h1 << 0%!
+   %h0 = %d1 << 0%!
+   %0 = PACK (%h2,%h1)%!
+   %0 = PACK (%h2,%d1)%!
+   %0 = PACK (%d2,%h1)%!
+   %0 = PACK (%d2,%d1)%!"
+  [(set_attr "type" "dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32,dsp32,dsp32,dsp32")])
+
+(define_insn "movv2hi_hi"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(vec_select:HI (match_operand:V2HI 1 "register_operand" "0,d,d")
+		       (parallel [(match_operand 2 "const01_operand" "P0,P0,P1")])))]
+  ""
+  "@
+   /* optimized out */
+   %h0 = %h1 << 0%!
+   %h0 = %d1 << 0%!"
+  [(set_attr "type" "dsp32shiftimm")])
+
+(define_expand "movv2hi_hi_low"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI (match_operand:V2HI 1 "register_operand" "")
+		       (parallel [(const_int 0)])))]
+  ""
+  "")
+
+(define_expand "movv2hi_hi_high"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI (match_operand:V2HI 1 "register_operand" "")
+		       (parallel [(const_int 1)])))]
+  ""
+  "")
+
+;; Unusual arithmetic operations on 16-bit registers.
+
+(define_code_iterator sp_or_sm [ss_plus ss_minus])
+(define_code_attr spm_string [(ss_plus "+") (ss_minus "-")])
+(define_code_attr spm_name [(ss_plus "add") (ss_minus "sub")])
+
+(define_insn "ss<spm_name>hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sp_or_sm:HI (match_operand:HI 1 "register_operand" "d")
+		    (match_operand:HI 2 "register_operand" "d")))]
+  ""
+  "%h0 = %h1 <spm_string>  %h2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ss<spm_name>hi3_parts"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sp_or_sm:HI (vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" "d")
+		      (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1")]))))]
+   ""
+{
+  const char *templates[] = {
+    "%h0 = %h1 <spm_string> %h2 (S)%!",
+    "%h0 = %d1 <spm_string> %h2 (S)%!",
+    "%h0 = %h1 <spm_string> %d2 (S)%!",
+    "%h0 = %d1 <spm_string> %d2 (S)%!" };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ss<spm_name>hi3_low_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 0)]))
+	 (sp_or_sm:HI (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand" "d")
+		       (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		      (vec_select:HI
+		       (match_operand:V2HI 3 "register_operand" "d")
+		       (parallel [(match_operand 5 "const01_operand" "P0P1")])))))]
+   ""
+{
+  const char *templates[] = {
+    "%h0 = %h2 <spm_string> %h3 (S)%!",
+    "%h0 = %d2 <spm_string> %h3 (S)%!",
+    "%h0 = %h2 <spm_string> %d3 (S)%!",
+    "%h0 = %d2 <spm_string> %d3 (S)%!" };
+  int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ss<spm_name>hi3_high_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (sp_or_sm:HI (vec_select:HI
+		       (match_operand:V2HI 2 "register_operand" "d")
+		       (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		      (vec_select:HI
+		       (match_operand:V2HI 3 "register_operand" "d")
+		       (parallel [(match_operand 5 "const01_operand" "P0P1")])))
+	 (vec_select:HI (match_operand:V2HI 1 "register_operand" "0")
+			(parallel [(const_int 1)]))))]
+   ""
+{
+  const char *templates[] = {
+    "%d0 = %h2 <spm_string> %h3 (S)%!",
+    "%d0 = %d2 <spm_string> %h3 (S)%!",
+    "%d0 = %h2 <spm_string> %d3 (S)%!",
+    "%d0 = %d2 <spm_string> %d3 (S)%!" };
+  int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; V2HI vector insns
+
+(define_insn "addv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(plus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 +|+ %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_plus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 +|+ %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(minus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 -|- %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_minus:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		       (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = %1 -|- %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "addsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)]))
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))
+	 (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 +|- %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subaddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				  (parallel [(const_int 0)]))
+		   (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				  (parallel [(const_int 0)])))
+	 (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 -|+ %2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddsubv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				    (parallel [(const_int 0)]))
+		     (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				    (parallel [(const_int 0)])))
+	 (ss_minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 +|- %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssubaddv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(vec_concat:V2HI
+	 (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				     (parallel [(const_int 0)]))
+		      (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				     (parallel [(const_int 0)])))
+	 (ss_plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		     (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %1 -|+ %2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sublohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 1)]))
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 - %h2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "subhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)]))
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 - %d2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssublohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				    (parallel [(const_int 1)]))
+		     (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				    (parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 - %h2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sssubhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				    (parallel [(const_int 0)]))
+		     (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				    (parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 - %d2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "addlohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				(parallel [(const_int 1)]))
+		 (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				(parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 + %h2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "addhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				(parallel [(const_int 0)]))
+		 (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				(parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 + %d2%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddlohiv2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				   (parallel [(const_int 1)]))
+		    (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				   (parallel [(const_int 0)]))))]
+  ""
+  "%h0 = %d1 + %h2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssaddhilov2hi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				   (parallel [(const_int 0)]))
+		    (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				   (parallel [(const_int 1)]))))]
+  ""
+  "%h0 = %h1 + %d2 (S)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "sminv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(smin:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = MIN (%1, %2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "smaxv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(smax:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%0 = MAX (%1, %2) (V)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Multiplications.
+
+;; The Blackfin allows a lot of different options, and we need many patterns to
+;; cover most of the hardware's abilities.
+;; There are a few simple patterns using MULT rtx codes, but most of them use
+;; an unspec with a const_int operand that determines which flag to use in the
+;; instruction.
+;; There are variants for single and parallel multiplications.
+;; There are variants which just use 16-bit lowparts as inputs, and variants
+;; which allow the user to choose just which halves to use as input values.
+;; There are variants which set D registers, variants which set accumulators,
+;; variants which set both, some of them optionally using the accumulators as
+;; inputs for multiply-accumulate operations.
+
+(define_insn "flag_mulhi"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "d")
+		    (match_operand:HI 2 "register_operand" "d")
+		    (match_operand 3 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+  "%h0 = %h1 * %h2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulhi_parts"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(unspec:HI [(vec_select:HI
+		     (match_operand:V2HI 1 "register_operand" "d")
+		     (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		    (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand" "d")
+		     (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		    (match_operand 5 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = %h1 * %h2 %M5%!",
+    "%h0 = %d1 * %h2 %M5%!",
+    "%h0 = %h1 * %d2 %M5%!",
+    "%h0 = %d1 * %d2 %M5%!" };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulhisi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:HI 1 "register_operand" "d")
+		    (match_operand:HI 2 "register_operand" "d")
+		    (match_operand 3 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+  "%0 = %h1 * %h2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulhisi_parts"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(vec_select:HI
+		     (match_operand:V2HI 1 "register_operand" "d")
+		     (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		    (vec_select:HI
+		     (match_operand:V2HI 2 "register_operand" "d")
+		     (parallel [(match_operand 4 "const01_operand" "P0P1")]))
+		    (match_operand 5 "const_int_operand" "n")]
+		   UNSPEC_MUL_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%0 = %h1 * %h2 %M5%!",
+    "%0 = %d1 * %h2 %M5%!",
+    "%0 = %h1 * %d2 %M5%!",
+    "%0 = %d1 * %d2 %M5%!" };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; Three alternatives here to cover all possible allocations:
+;; 0. mac flag is usable only for accumulator 1 - use A1 and odd DREG
+;; 1. mac flag is usable for accumulator 0 - use A0 and even DREG
+;; 2. mac flag is usable in any accumulator - use A1 and odd DREG
+;; Other patterns which don't have a DREG destination can collapse cases
+;; 1 and 2 into one.
+(define_insn "flag_machi"
+  [(set (match_operand:HI 0 "register_operand" "=W,D,W")
+	(unspec:HI [(match_operand:HI 2 "register_operand" "d,d,d")
+		    (match_operand:HI 3 "register_operand" "d,d,d")
+		    (match_operand 4 "register_operand" "1,1,1")
+		    (match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")
+		    (match_operand 6 "const_int_operand" "PB,PA,PA")]
+		   UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:PDI 1 "register_operand" "=B,A,B")
+	(unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3)
+		     (match_dup 4) (match_dup 5)]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%h0 = (%1 %b5 %h2 * %h3) %M6%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_machi_acconly"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e")
+	(unspec:PDI [(match_operand:HI 1 "register_operand" "d,d")
+		     (match_operand:HI 2 "register_operand" "d,d")
+		     (match_operand 3 "register_operand" "0,0")
+		     (match_operand 4 "const01_operand" "P0P1,P0P1")
+		     (match_operand 5 "const_int_operand" "PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%0 %b4 %h1 * %h2 %M5%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_machi_parts_acconly"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_operand:V2HI 1 "register_operand" "d,d")
+		      (parallel [(match_operand 3 "const01_operand" "P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d,d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1")]))
+		     (match_operand:PDI 5 "register_operand" "0,0")
+		     (match_operand 6 "const01_operand" "P0P1,P0P1")
+		     (match_operand 7 "const_int_operand" "PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%0 %b6 %h1 * %h2 %M7%!",
+    "%0 %b6 %d1 * %h2 %M7%!",
+    "%0 %b6 %h1 * %d2 %M7%!",
+    "%0 %b6 %d1 * %d2 %M7%!"
+  };
+  int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1);
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macinithi"
+  [(set (match_operand:HI 0 "register_operand" "=W,D,W")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "d,d,d")
+		    (match_operand:HI 2 "register_operand" "d,d,d")
+		    (match_operand 3 "const_int_operand" "PB,PA,PA")]
+		   UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:PDI 4 "register_operand" "=B,A,B")
+	(unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3)]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%h0 = (%4 = %h1 * %h2) %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macinit1hi"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e")
+	(unspec:PDI [(match_operand:HI 1 "register_operand" "d,d")
+		     (match_operand:HI 2 "register_operand" "d,d")
+		     (match_operand 3 "const_int_operand" "PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  ""
+  "%0 = %h1 * %h2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "mulv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(mult:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		   (match_operand:V2HI 2 "register_operand" "d")))]
+  ""
+  "%h0 = %h1 * %h2, %d0 = %d1 * %d2 (IS)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulv2hi"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand 3 "const_int_operand" "n")]
+		     UNSPEC_MUL_WITH_FLAG))]
+  ""
+  "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M3%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_mulv2hi_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand 7 "const_int_operand" "n")]
+		     UNSPEC_MUL_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = %h1 * %h2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %h1 * %h2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %h1 * %h2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %d1 * %h2 %M7%!",
+    "%h0 = %h1 * %h2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M7%!",
+    "%h0 = %d1 * %h2, %d0 = %d1 * %d2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %h1 * %d2 %M7%!",
+    "%h0 = %h1 * %d2, %d0 = %d1 * %d2 %M7%!",
+    "%h0 = %d1 * %d2, %d0 = %d1 * %d2 %M7%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; A slightly complicated pattern.
+;; Operand 0 is the halfword output; operand 11 is the accumulator output
+;; Halfword inputs are operands 1 and 2; operands 3, 4, 5 and 6 specify which
+;; parts of these 2x16 bit registers to use.
+;; Operand 7 is the accumulator input.
+;; Operands 8/9 specify whether low/high parts are mac (0) or msu (1)
+;; Operand 10 is the macflag to be used.
+(define_insn "flag_macv2hi_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand:V2PDI 7 "register_operand" "e")
+		      (match_operand 8 "const01_operand" "P0P1")
+		      (match_operand 9 "const01_operand" "P0P1")
+		      (match_operand 10 "const_int_operand" "n")]
+		     UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:V2PDI 11 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 3)]))
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 4)])))
+		       (vec_concat:V2HI
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)])))
+		       (match_dup 7) (match_dup 8) (match_dup 9) (match_dup 10)]
+		      UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!",
+    "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macv2hi_parts_acconly"
+  [(set (match_operand:V2PDI 0 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+			(vec_select:HI
+			 (match_operand:V2HI 1 "register_operand" "d")
+			 (parallel [(match_operand 3 "const01_operand" "P0P1")]))
+			(vec_select:HI
+			 (match_dup 1)
+			 (parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		       (vec_concat:V2HI
+			(vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				       (parallel [(match_operand 5 "const01_operand" "P0P1")]))
+			(vec_select:HI (match_dup 2)
+				       (parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		       (match_operand:V2PDI 7 "register_operand" "e")
+		       (match_operand 8 "const01_operand" "P0P1")
+		       (match_operand 9 "const01_operand" "P0P1")
+		       (match_operand 10 "const_int_operand" "n")]
+		      UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %h2 %M10%!",
+    "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %d2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %d2 %M10%!",
+    "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %d2 %M10%!",
+    "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %d2 %M10%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; Same as above, but initializing the accumulators and therefore a couple fewer
+;; necessary operands.
+(define_insn "flag_macinitv2hi_parts"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand 7 "const_int_operand" "n")]
+		     UNSPEC_MAC_WITH_FLAG))
+   (set (match_operand:V2PDI 8 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 3)]))
+			(vec_select:HI (match_dup 1) (parallel [(match_dup 4)])))
+		       (vec_concat:V2HI
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))
+			(vec_select:HI (match_dup 2) (parallel [(match_dup 5)])))
+		       (match_dup 7)]
+		      UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!",
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!",
+    "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!",
+    "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+(define_insn "flag_macinit1v2hi_parts"
+  [(set (match_operand:V2PDI 0 "register_operand" "=e")
+	(unspec:V2PDI [(vec_concat:V2HI
+		       (vec_select:HI
+			(match_operand:V2HI 1 "register_operand" "d")
+			(parallel [(match_operand 3 "const01_operand" "P0P1")]))
+		       (vec_select:HI
+			(match_dup 1)
+			(parallel [(match_operand 4 "const01_operand" "P0P1")])))
+		      (vec_concat:V2HI
+		       (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+			(parallel [(match_operand 5 "const01_operand" "P0P1")]))
+		       (vec_select:HI (match_dup 2)
+			(parallel [(match_operand 6 "const01_operand" "P0P1")])))
+		      (match_operand 7 "const_int_operand" "n")]
+		     UNSPEC_MAC_WITH_FLAG))]
+  ""
+{
+  const char *templates[] = {
+    "A0 = %h1 * %h2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %h1 * %h2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %h1 * %h2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %d1 * %h2 %M7%!",
+    "A0 = %h1 * %h2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %h1 * %h2, A1 = %d1 * %d2 %M7%!",
+    "A0 = %d1 * %h2, A1 = %d1 * %d2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %h1 * %d2 %M7%!",
+    "A0 = %h1 * %d2, A1 = %d1 * %d2 %M7%!",
+    "A0 = %d1 * %d2, A1 = %d1 * %d2 %M7%!" };
+  int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1)
+	     + (INTVAL (operands[5]) << 2)  + (INTVAL (operands[6]) << 3));
+  return templates[alt];
+}
+  [(set_attr "type" "dsp32")])
+
+;; A mixture of multiply and multiply-accumulate for when we only want to
+;; initialize one part.
+(define_insn "flag_mul_macv2hi_parts_acconly"
+  [(set (match_operand:PDI 0 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_operand:V2HI 2 "register_operand" "d,d,d")
+		      (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_operand:V2HI 3 "register_operand" "d,d,d")
+		      (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand 10 "const_int_operand" "PB,PA,PA")]
+		    UNSPEC_MUL_WITH_FLAG))
+   (set (match_operand:PDI 1 "register_operand" "=B,e,e")
+	(unspec:PDI [(vec_select:HI
+		      (match_dup 2)
+		      (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (vec_select:HI
+		      (match_dup 3)
+		      (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")]))
+		     (match_operand:PDI 8 "register_operand" "1,1,1")
+		     (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1")
+		     (match_operand 11 "const_int_operand" "PA,PB,PA")]
+		    UNSPEC_MAC_WITH_FLAG))]
+  "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))"
+{
+  rtx xops[6];
+  const char *templates[] = {
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5%!",
+    "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5%!",
+    "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5%!",
+    "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5%!",
+    "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5%!" };
+  int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1)
+	     + (INTVAL (operands[6]) << 2)  + (INTVAL (operands[7]) << 3));
+  xops[0] = operands[0];
+  xops[1] = operands[1];
+  xops[2] = operands[2];
+  xops[3] = operands[3];
+  xops[4] = operands[9];
+  xops[5] = which_alternative == 0 ? operands[10] : operands[11];
+  output_asm_insn (templates[alt], xops);
+  return "";
+}
+  [(set_attr "type" "dsp32")])
+
+
+(define_code_iterator s_or_u [sign_extend zero_extend])
+(define_code_attr su_optab [(sign_extend "mul")
+			    (zero_extend "umul")])
+(define_code_attr su_modifier [(sign_extend "IS")
+			       (zero_extend "FU")])
+
+(define_insn "<su_optab>hisi_ll"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_lh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_hl"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_hh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Additional variants for signed * unsigned multiply.
+
+(define_insn "usmulhisi_ull"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h2 * %h1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ulh"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d2 * %h1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_uhl"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h2 * %d1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_uhh"
+  [(set (match_operand:SI 0 "register_operand" "=W")
+	(mult:SI (zero_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d")
+				 (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d2 * %d1 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Parallel versions of these operations.  First, normal signed or unsigned
+;; multiplies.
+
+(define_insn "<su_optab>hisi_ll_lh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %h1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_ll_hl"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_ll_hh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_lh_hl"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %h2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_lh_hh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "<su_optab>hisi_hl_hh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (s_or_u:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (s_or_u:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %d1 * %d2 (<su_modifier>)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Special signed * unsigned variants.
+
+(define_insn "usmulhisi_ll_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ll_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ll_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_ll_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %h2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_lh_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %h1 * %d2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hl_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 0)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %h2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_lul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %h1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_luh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %h1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_hul"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %d1 * %h2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "usmulhisi_hh_huh"
+  [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))
+		 (sign_extend:SI
+		  (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d")
+				 (parallel [(const_int 1)])))))
+   (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7")
+	(mult:SI (sign_extend:SI
+		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+		 (zero_extend:SI
+		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))]
+  ""
+  "%0 = %d1 * %d2, %3 = %d1 * %d2 (IS,M)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Vector neg/abs.
+
+(define_insn "ssnegv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_neg:V2HI (match_operand:V2HI 1 "register_operand" "d")))]
+  ""
+  "%0 = - %1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+(define_insn "ssabsv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "d")))]
+  ""
+  "%0 = ABS %1 (V)%!"
+  [(set_attr "type" "dsp32")])
+
+;; Shifts.
+
+(define_insn "ssashiftv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d,d")
+	(if_then_else:V2HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d")
+			(match_dup 2))
+	 (ss_ashift:V2HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = ASHIFT %1 BY %h2 (V, S)%!
+   %0 = %1 << %2 (V,S)%!
+   %0 = %1 >>> %N2 (V,S)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "ssashifthi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(if_then_else:HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (ashiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d")
+		      (match_dup 2))
+	 (ss_ashift:HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = ASHIFT %1 BY %h2 (V, S)%!
+   %0 = %1 << %2 (V,S)%!
+   %0 = %1 >>> %N2 (V,S)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "ssashiftsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(if_then_else:SI
+	 (lt (match_operand:HI 2 "reg_or_const_int_operand" "d,Ku5,Ks5") (const_int 0))
+	 (ashiftrt:SI (match_operand:HI 1 "register_operand" "d,d,d")
+		      (match_dup 2))
+	 (ss_ashift:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = ASHIFT %1 BY %h2 (S)%!
+   %0 = %1 << %2 (S)%!
+   %0 = %1 >>> %N2 (S)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "lshiftv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d,d")
+	(if_then_else:V2HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d")
+			(match_dup 2))
+	 (ashift:V2HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = LSHIFT %1 BY %h2 (V)%!
+   %0 = %1 << %2 (V)%!
+   %0 = %1 >> %N2 (V)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+(define_insn "lshifthi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(if_then_else:HI
+	 (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0))
+	 (lshiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d")
+		      (match_dup 2))
+	 (ashift:HI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+   %0 = LSHIFT %1 BY %h2 (V)%!
+   %0 = %1 << %2 (V)%!
+   %0 = %1 >> %N2 (V)%!"
+  [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")])
+
+;; Load without alignment exception (masking off low bits)
+
+(define_insn "loadbytes"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mem:SI (and:SI (match_operand:SI 1 "register_operand" "b")
+			(const_int -4))))]
+  ""
+  "DISALGNEXCPT || %0 = [%1];"
+  [(set_attr "type" "mcld")
+   (set_attr "length" "8")])
+
+(include "sync.md")
diff --git a/gcc/config/bfin/bfin.opt b/gcc/config/bfin/bfin.opt
new file mode 100644
index 000000000..c7a905602
--- /dev/null
+++ b/gcc/config/bfin/bfin.opt
@@ -0,0 +1,101 @@
+; Options for the Blackfin port of the compiler
+;
+; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msim
+Target RejectNegative
+Use simulator runtime
+
+mcpu=
+Target RejectNegative Joined
+Specify the name of the target CPU
+
+momit-leaf-frame-pointer
+Target Report Mask(OMIT_LEAF_FRAME_POINTER)
+Omit frame pointer for leaf functions
+
+mlow64k
+Target Report Mask(LOW_64K)
+Program is entirely located in low 64k of memory
+
+mcsync-anomaly
+Target Report Var(bfin_csync_anomaly) Init(-1)
+Work around a hardware anomaly by adding a number of NOPs before a
+CSYNC or SSYNC instruction.
+
+mspecld-anomaly
+Target Report Var(bfin_specld_anomaly) Init(-1)
+Avoid speculative loads to work around a hardware anomaly.
+
+mid-shared-library
+Target Report Mask(ID_SHARED_LIBRARY)
+Enabled ID based shared library
+
+mleaf-id-shared-library
+Target Report Mask(LEAF_ID_SHARED_LIBRARY)
+Generate code that won't be linked against any other ID shared libraries,
+but may be used as a shared library.
+
+mshared-library-id=
+Target RejectNegative Joined UInteger Var(bfin_library_id)
+ID of shared library to build
+
+msep-data
+Target Report Mask(SEP_DATA)
+Enable separate data segment
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Avoid generating pc-relative calls; use indirection
+
+mfast-fp
+Target Report Mask(FAST_FP)
+Link with the fast floating-point library
+
+mfdpic
+Target Report Mask(FDPIC)
+Enable Function Descriptor PIC mode
+
+minline-plt
+Target Report Mask(INLINE_PLT)
+Enable inlining of PLT in function calls
+
+mstack-check-l1
+Target Report Mask(STACK_CHECK_L1)
+Do stack checking using bounds in L1 scratch memory
+
+mmulticore
+Target Report Mask(MULTICORE)
+Enable multicore support
+
+mcorea
+Target Report Mask(COREA)
+Build for Core A
+
+mcoreb
+Target Report Mask(COREB)
+Build for Core B
+
+msdram
+Target Report Mask(SDRAM)
+Build for SDRAM
+
+micplb
+Target Report Mask(ICPLB)
+Assume ICPLBs are enabled at runtime.
diff --git a/gcc/config/bfin/constraints.md b/gcc/config/bfin/constraints.md
new file mode 100644
index 000000000..fa9dcf143
--- /dev/null
+++ b/gcc/config/bfin/constraints.md
@@ -0,0 +1,225 @@
+;; Constraint definitions for Blackfin
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Contributed by Analog Devices
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "a" "PREGS"
+  "A Pn register.")
+
+(define_register_constraint "d" "DREGS"
+  "A Rn register.")
+
+(define_register_constraint "z" "PREGS_CLOBBERED"
+  "A call clobbered Pn register.")
+
+(define_register_constraint "D" "EVEN_DREGS"
+  "An even-numbered Rn register.")
+
+(define_register_constraint "W" "ODD_DREGS"
+  "An odd-numbered Rn register.")
+
+(define_register_constraint "e" "AREGS"
+  "An accumulator register.")
+
+(define_register_constraint "A" "EVEN_AREGS"
+  "An even-numbered accumulator; A0.")
+
+(define_register_constraint "B" "ODD_AREGS"
+  "An odd-numbered accumulator; A1.")
+
+(define_register_constraint "b" "IREGS"
+  "An I register.")
+
+(define_register_constraint "v" "BREGS"
+  "A B register.")
+
+(define_register_constraint "f" "MREGS"
+  "An M register.")
+
+(define_register_constraint "c" "CIRCREGS"
+  "A register used for circular buffering, i.e. I, B, or L registers.")
+
+(define_register_constraint "C" "CCREGS"
+  "The CC register.")
+
+(define_register_constraint "t" "LT_REGS"
+  "LT0 or LT1.")
+
+(define_register_constraint "u" "LB_REGS"
+  "LB0 or LB1.")
+
+(define_register_constraint "k" "LC_REGS"
+  "LC0 or LC1.")
+
+(define_register_constraint "x" "MOST_REGS"
+  "Any R, P, B, M, I or L register.")
+
+(define_register_constraint "y" "PROLOGUE_REGS"
+  "Additional registers typically used only in prologues and epilogues:
+   RETS, RETN, RETI, RETX, RETE, ASTAT, SEQSTAT and USP.")
+
+(define_register_constraint "w" "NON_A_CC_REGS"
+  "Any register except accumulators or CC.")
+
+(define_register_constraint "Z" "FDPIC_REGS"
+  "@internal The FD-PIC GOT pointer; P3.")
+
+(define_register_constraint "Y" "FDPIC_FPTR_REGS"
+  "@internal The FD-PIC function pointer register; P1.")
+
+(define_register_constraint "q0" "D0REGS"
+  "The register R0.")
+
+(define_register_constraint "q1" "D1REGS"
+  "The register R1.")
+
+(define_register_constraint "q2" "D2REGS"
+  "The register R2.")
+
+(define_register_constraint "q3" "D3REGS"
+  "The register R3.")
+
+(define_register_constraint "q4" "D4REGS"
+  "The register R4.")
+
+(define_register_constraint "q5" "D5REGS"
+  "The register R5.")
+
+(define_register_constraint "q6" "D6REGS"
+  "The register R6.")
+
+(define_register_constraint "q7" "D7REGS"
+  "The register R7.")
+
+(define_register_constraint "qA" "P0REGS"
+  "The register P0.")
+
+;; Constant constraints.
+
+(define_constraint "J"
+  "A constant value of the form 2**N, where N 5-bit wide."
+  (and (match_code "const_int")
+       (match_test "log2constp (ival)")))
+
+(define_constraint "Ks3"
+  "A signed 3 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -4 && ival <= 3")))
+
+(define_constraint "Ku3"
+  "An unsigned 3 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+(define_constraint "Ks4"
+  "A signed 4 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -8 && ival <= 7")))
+
+(define_constraint "Ku4"
+  "An unsigned 4 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 15")))
+
+(define_constraint "Ks5"
+  "A signed 5 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -16 && ival <= 15")))
+
+(define_constraint "Ku5"
+  "An unsigned 5 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "Ks7"
+  "A signed 7 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -64 && ival <= 63")))
+
+(define_constraint "KN7"
+  "A constant that when negated is a signed 7 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -63 && ival <= 64")))
+
+(define_constraint "Ksh"
+  "A signed 16 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "Kuh"
+  "An unsigned 16 bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "L"
+  "A constant value of the form ~(2**N)."
+  (and (match_code "const_int")
+       (match_test "log2constp (~ival)")))
+
+(define_constraint "M1"
+  "An integer with the value 255."
+  (and (match_code "const_int")
+       (match_test "ival == 255")))
+
+(define_constraint "M2"
+  "An integer with the value 65535."
+  (and (match_code "const_int")
+       (match_test "ival == 65535")))
+
+(define_constraint "P0"
+  "An integer with the value 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P1"
+  "An integer with the value 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "P2"
+  "An integer with the value 2."
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "P3"
+  "An integer with the value 3."
+  (and (match_code "const_int")
+       (match_test "ival == 3")))
+
+(define_constraint "P4"
+  "An integer with the value 4."
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "PA"
+  "An integer constant describing any macflag except variants involving M."
+  (and (match_code "const_int")
+       (match_test "ival != MACFLAG_M && ival != MACFLAG_IS_M")))
+
+(define_constraint "PB"
+  "An integer constant describing any macflag involving M."
+  (and (match_code "const_int")
+       (match_test "ival == MACFLAG_M || ival == MACFLAG_IS_M")))
+
+
+;; Extra constraints
+
+(define_constraint "Q"
+  "A SYMBOL_REF."
+  (match_code "symbol_ref"))
+
diff --git a/gcc/config/bfin/crti.s b/gcc/config/bfin/crti.s
new file mode 100644
index 000000000..b6f20fc9e
--- /dev/null
+++ b/gcc/config/bfin/crti.s
@@ -0,0 +1,59 @@
+/* Specialized code needed to support construction and destruction of
+   file-scope objects in C++ and Java code, and to support exception handling.
+   Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * This file just supplies function prologues for the .init and .fini
+ * sections.  It is linked in before crtbegin.o.
+ */
+
+	.ident  "GNU C crti.o"
+
+	.section .init
+	.globl  __init
+	.type   __init,@function
+__init:
+#if defined __ID_SHARED_LIB__
+	[--SP] = P5;
+#elif defined __BFIN_FDPIC__
+	[--SP] = P3; 
+#endif
+	LINK 12;
+#if defined __ID_SHARED_LIB__
+	P5 = [P5 + _current_shared_library_p5_offset_]
+#endif	
+	.section .fini
+	.globl  __fini
+	.type   __fini,@function
+__fini:
+#if defined __ID_SHARED_LIB__
+	[--SP] = P5; 
+#elif defined __BFIN_FDPIC__
+	[--SP] = P3; 
+#endif
+	LINK 12; 
+#if defined __ID_SHARED_LIB__
+	P5 = [P5 + _current_shared_library_p5_offset_]
+#endif	
diff --git a/gcc/config/bfin/crtlibid.s b/gcc/config/bfin/crtlibid.s
new file mode 100644
index 000000000..beab80938
--- /dev/null
+++ b/gcc/config/bfin/crtlibid.s
@@ -0,0 +1,29 @@
+/* Provide a weak definition of the library ID, for the benefit of certain
+   configure scripts.	 
+   Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.ident  "GNU C crtlibid.o"
+
+.weak _current_shared_library_p5_offset_
+.set _current_shared_library_p5_offset_, 0
diff --git a/gcc/config/bfin/crtn.s b/gcc/config/bfin/crtn.s
new file mode 100644
index 000000000..7fcd27bfa
--- /dev/null
+++ b/gcc/config/bfin/crtn.s
@@ -0,0 +1,50 @@
+/* Specialized code needed to support construction and destruction of
+   file-scope objects in C++ and Java code, and to support exception handling.
+   Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * This file supplies function epilogues for the .init and .fini sections.
+ * It is linked in after all other files.
+ */
+
+	.ident  "GNU C crtn.o"
+
+	.section .init
+	unlink; 
+#if defined __ID_SHARED_LIB__
+	P5 = [SP++];
+#elif defined __BFIN_FDPIC__
+	P3 = [SP++];
+#endif
+	rts;
+
+	.section .fini
+	unlink;
+#if defined __ID_SHARED_LIB__
+	P5 = [SP++];
+#elif defined __BFIN_FDPIC__
+	P3 = [SP++];
+#endif
+	rts;
diff --git a/gcc/config/bfin/elf.h b/gcc/config/bfin/elf.h
new file mode 100644
index 000000000..975212faa
--- /dev/null
+++ b/gcc/config/bfin/elf.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+%{msim:%{!shared:crt0%O%s}} \
+%{!msim:%{!mcpu=bf561*:%{!msdram:basiccrt%O%s} %{msdram:basiccrts%O%s};: \
+		       %{!msdram:basiccrt561%O%s} %{msdram:basiccrt561s%O%s}} \
+	%{mcpu=bf561*:%{mmulticore:%{!mcorea:%{!mcoreb:basiccrt561b%O%s}}}}} \
+crti%O%s crtbegin%O%s crtlibid%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC	"crtend%O%s crtn%O%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "--start-group -lc %{msim:-lsim}%{!msim:-lnosys} --end-group \
+%{!T*:%{!msim:%{!msdram: \
+	      %{mcpu=bf512*:-T bf512.ld%s}%{mcpu=bf514*:-T bf514.ld%s} \
+	      %{mcpu=bf516*:-T bf516.ld%s}%{mcpu=bf518*:-T bf518.ld%s} \
+	      %{mcpu=bf522*:-T bf522.ld%s}%{mcpu=bf523*:-T bf523.ld%s} \
+	      %{mcpu=bf524*:-T bf524.ld%s}%{mcpu=bf525*:-T bf525.ld%s} \
+	      %{mcpu=bf526*:-T bf526.ld%s}%{mcpu=bf527*:-T bf527.ld%s} \
+	      %{mcpu=bf531*:-T bf531.ld%s}%{mcpu=bf532*:-T bf532.ld%s} \
+	      %{mcpu=bf533*:-T bf533.ld%s}%{mcpu=bf534*:-T bf534.ld%s} \
+	      %{mcpu=bf536*:-T bf536.ld%s}%{mcpu=bf537*:-T bf537.ld%s} \
+	      %{mcpu=bf538*:-T bf538.ld%s}%{mcpu=bf539*:-T bf539.ld%s} \
+	      %{mcpu=bf542*:-T bf542.ld%s}%{mcpu=bf544*:-T bf544.ld%s} \
+	      %{mcpu=bf547*:-T bf547.ld%s}%{mcpu=bf548*:-T bf548.ld%s} \
+	      %{mcpu=bf549*:-T bf549.ld%s} \
+	      %{mcpu=bf561*:%{!mmulticore:-T bf561.ld%s} \
+			    %{mmulticore:%{mcorea:-T bf561a.ld%s}} \
+			    %{mmulticore:%{mcoreb:-T bf561b.ld%s}} \
+			    %{mmulticore:%{!mcorea:%{!mcoreb:-T bf561m.ld%s}}}} \
+	      %{!mcpu=*:%eno processor type specified for linking} \
+	      %{!mcpu=bf561*:-T bfin-common-sc.ld%s} \
+	      %{mcpu=bf561*:%{!mmulticore:-T bfin-common-sc.ld%s} \
+			   %{mmulticore:-T bfin-common-mc.ld%s}}}}}"
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#ifdef __BFIN_FDPIC__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+asm (SECTION_OP); \
+asm ("P3 = [SP + 20];\n\tcall " USER_LABEL_PREFIX #FUNC ";"); \
+asm (TEXT_SECTION_ASM_OP);
+#endif
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS \
+     "%{mfdpic:-msim} %{mid-shared-library:-msim}"
+
+#define NO_IMPLICIT_EXTERN_C
diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm
new file mode 100644
index 000000000..4e15ad230
--- /dev/null
+++ b/gcc/config/bfin/lib1funcs.asm
@@ -0,0 +1,146 @@
+/* libgcc functions for Blackfin.
+   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Analog Devices.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_divsi3
+.text
+.align 2
+.global ___divsi3;
+.type ___divsi3, STT_FUNC;
+
+___divsi3:
+        [--SP]= RETS;
+	[--SP] = R7;
+
+	R2 = -R0;
+        CC = R0 < 0;
+	IF CC R0 = R2;
+	R7 = CC;
+
+	R2 = -R1;
+        CC = R1 < 0;
+	IF CC R1 = R2;
+	R2 = CC;
+	R7 = R7 ^ R2;
+
+        CALL ___udivsi3;
+
+	CC = R7;
+	R1 = -R0;
+	IF CC R0 = R1;
+
+	R7 = [SP++];
+        RETS = [SP++];
+        RTS;
+#endif
+
+#ifdef L_modsi3	
+.align 2
+.global ___modsi3;
+.type ___modsi3, STT_FUNC;
+
+___modsi3:
+	[--SP] = RETS;
+	[--SP] = R0;
+	[--SP] = R1;
+	CALL ___divsi3;
+	R2 = [SP++];
+	R1 = [SP++];
+	R2 *= R0;
+	R0 = R1 - R2;
+	RETS = [SP++];
+	RTS; 
+#endif
+
+#ifdef L_udivsi3
+.align 2
+.global ___udivsi3;
+.type ___udivsi3, STT_FUNC;
+
+___udivsi3:
+        P0 = 32;
+        LSETUP (0f, 1f) LC0 = P0;
+	/* upper half of dividend */
+        R3 = 0;
+0:
+	/* The first time round in the loop we shift in garbage, but since we
+	   perform 33 shifts, it doesn't matter.  */
+	R0 = ROT R0 BY 1;
+	R3 = ROT R3 BY 1;
+	R2 = R3 - R1;
+        CC = R3 < R1 (IU);
+1:
+	/* Last instruction of the loop.  */
+	IF ! CC R3 = R2;
+
+	/* Shift in the last bit.  */
+	R0 = ROT R0 BY 1;
+	/* R0 is the result, R3 contains the remainder.  */
+	R0 = ~ R0;
+        RTS;
+#endif
+
+#ifdef L_umodsi3
+.align 2
+.global ___umodsi3;
+.type ___umodsi3, STT_FUNC;
+
+___umodsi3:
+	[--SP] = RETS;
+	CALL ___udivsi3;
+	R0 = R3;
+	RETS = [SP++]; 
+	RTS;
+#endif
+
+#ifdef L_umulsi3_highpart
+.align 2
+.global ___umulsi3_highpart;
+.type ___umulsi3_highpart, STT_FUNC;
+
+___umulsi3_highpart:
+	A1 = R1.L * R0.L (FU);
+	A1 = A1 >> 16;
+	A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
+	A1 += R0.L * R1.H (FU);
+	A1 = A1 >> 16;
+	A0 += A1;
+	R0 = A0 (FU);
+	RTS;
+#endif
+
+#ifdef L_smulsi3_highpart
+.align 2
+.global ___smulsi3_highpart;
+.type ___smulsi3_highpart, STT_FUNC;
+
+___smulsi3_highpart:
+	A1 = R1.L * R0.L (FU);
+	A1 = A1 >> 16;
+	A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
+	A1 += R1.H * R0.L (IS,M);
+	A1 = A1 >>> 16;
+	R0 = (A0 += A1);
+	RTS;
+#endif
diff --git a/gcc/config/bfin/libgcc-bfin.ver b/gcc/config/bfin/libgcc-bfin.ver
new file mode 100644
index 000000000..516d91f65
--- /dev/null
+++ b/gcc/config/bfin/libgcc-bfin.ver
@@ -0,0 +1,1914 @@
+# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+# 2008, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+GCC_3.0 {
+  # libgcc1 integer symbols
+  ___absvsi2
+  ___addvsi3
+  ___ashlsi3
+  ___ashrsi3
+  ___divsi3
+  ___lshrsi3
+  ___modsi3
+  ___mulsi3
+  ___mulvsi3
+  ___negvsi2
+  ___subvsi3
+  ___udivsi3
+  ___umodsi3
+
+  # libgcc1 floating point symbols
+  ___addsf3
+  ___adddf3
+  ___addxf3
+  ___addtf3
+  ___divsf3
+  ___divdf3
+  ___divxf3
+  ___divtf3
+  ___eqsf2
+  ___eqdf2
+  ___eqxf2
+  ___eqtf2
+  ___extenddfxf2
+  ___extenddftf2
+  ___extendsfdf2
+  ___extendsfxf2
+  ___extendsftf2
+  ___fixsfsi
+  ___fixdfsi
+  ___fixxfsi
+  ___fixtfsi
+  ___floatsisf
+  ___floatsidf
+  ___floatsixf
+  ___floatsitf
+  ___gesf2
+  ___gedf2
+  ___gexf2
+  ___getf2
+  ___gtsf2
+  ___gtdf2
+  ___gtxf2
+  ___gttf2
+  ___lesf2
+  ___ledf2
+  ___lexf2
+  ___letf2
+  ___ltsf2
+  ___ltdf2
+  ___ltxf2
+  ___lttf2
+  ___mulsf3
+  ___muldf3
+  ___mulxf3
+  ___multf3
+  ___negsf2
+  ___negdf2
+  ___negxf2
+  ___negtf2
+  ___nesf2
+  ___nedf2
+  ___nexf2
+  ___netf2
+  ___subsf3
+  ___subdf3
+  ___subxf3
+  ___subtf3
+  ___truncdfsf2
+  ___truncxfsf2
+  ___trunctfsf2
+  ___truncxfdf2
+  ___trunctfdf2
+
+  # libgcc2 DImode arithmetic (for 32-bit targets).
+  ___absvdi2
+  ___addvdi3
+  ___ashldi3
+  ___ashrdi3
+  ___cmpdi2
+  ___divdi3
+  ___ffsdi2
+  ___fixdfdi
+  ___fixsfdi
+  ___fixtfdi
+  ___fixxfdi
+  ___fixunsdfdi
+  ___fixunsdfsi
+  ___fixunssfsi
+  ___fixunssfdi
+  ___fixunstfdi
+  ___fixunstfsi
+  ___fixunsxfdi
+  ___fixunsxfsi
+  ___floatdidf
+  ___floatdisf
+  ___floatdixf
+  ___floatditf
+  ___lshrdi3
+  ___moddi3
+  ___muldi3
+  ___mulvdi3
+  ___negdi2
+  ___negvdi2
+  ___subvdi3
+  ___ucmpdi2
+  ___udivdi3
+  ___udivmoddi4
+  ___umoddi3
+
+  # libgcc2 TImode arithmetic (for 64-bit targets).
+  ___ashlti3
+  ___ashrti3
+  ___cmpti2
+  ___divti3
+  ___ffsti2
+  ___fixdfti
+  ___fixsfti
+  ___fixtfti
+  ___fixxfti
+  ___lshrti3
+  ___modti3
+  ___multi3
+  ___negti2
+  ___ucmpti2
+  ___udivmodti4
+  ___udivti3
+  ___umodti3
+  ___fixunsdfti
+  ___fixunssfti
+  ___fixunstfti
+  ___fixunsxfti
+  ___floattidf
+  ___floattisf
+  ___floattixf
+  ___floattitf
+
+  # Used to deal with trampoline initialization on some platforms
+  ___clear_cache
+
+  # EH symbols
+  __Unwind_DeleteException
+  __Unwind_Find_FDE
+  __Unwind_ForcedUnwind
+  __Unwind_GetGR
+  __Unwind_GetIP
+  __Unwind_GetLanguageSpecificData
+  __Unwind_GetRegionStart
+  __Unwind_GetTextRelBase
+  __Unwind_GetDataRelBase
+  __Unwind_RaiseException
+  __Unwind_Resume
+  __Unwind_SetGR
+  __Unwind_SetIP
+  ___deregister_frame
+  ___deregister_frame_info
+  ___deregister_frame_info_bases
+  ___register_frame
+  ___register_frame_info
+  ___register_frame_info_bases
+  ___register_frame_info_table
+  ___register_frame_info_table_bases
+  ___register_frame_table
+
+  # SjLj EH symbols
+  __Unwind_SjLj_Register
+  __Unwind_SjLj_Unregister
+  __Unwind_SjLj_RaiseException
+  __Unwind_SjLj_ForcedUnwind
+  __Unwind_SjLj_Resume
+}
+
+%inherit GCC_3.3 GCC_3.0
+GCC_3.3 {
+  __Unwind_FindEnclosingFunction
+  __Unwind_GetCFA
+  __Unwind_Backtrace
+  __Unwind_Resume_or_Rethrow
+  __Unwind_SjLj_Resume_or_Rethrow
+}
+
+%inherit GCC_3.3.1 GCC_3.3
+GCC_3.3.1 {
+  ___gcc_personality_sj0
+  ___gcc_personality_v0
+}
+
+%inherit GCC_3.3.2 GCC_3.3.1
+GCC_3.3.2 {
+}
+%inherit GCC_3.3.4 GCC_3.3.2
+GCC_3.3.4 {
+  ___unorddf2
+  ___unordsf2
+}
+
+%inherit GCC_3.4 GCC_3.3.4
+GCC_3.4 {
+  # bit scanning and counting built-ins
+  ___clzsi2
+  ___clzdi2
+  ___clzti2
+  ___ctzsi2
+  ___ctzdi2
+  ___ctzti2
+  ___popcountsi2
+  ___popcountdi2
+  ___popcountti2
+  ___paritysi2
+  ___paritydi2
+  ___parityti2
+}
+
+%inherit GCC_3.4.2 GCC_3.4
+GCC_3.4.2 {
+  # Used to deal with trampoline initialization on some platforms
+  ___enable_execute_stack
+  ___trampoline_setup
+}
+
+%inherit GCC_3.4.4 GCC_3.4.2
+GCC_3.4.4 {
+  # libgcc2 TImode arithmetic (for 64-bit targets).
+  ___absvti2
+  ___addvti3
+  ___mulvti3
+  ___negvti2
+  ___subvti3
+}
+
+%inherit GCC_4.0.0 GCC_3.4.4
+GCC_4.0.0 {
+  # libgcc2 __builtin_powi helpers.
+  ___powisf2
+  ___powidf2
+  ___powixf2
+  ___powitf2
+
+  # c99 compliant complex arithmetic
+  ___divsc3
+  ___divdc3
+  ___divxc3
+  ___divtc3
+  ___mulsc3
+  ___muldc3
+  ___mulxc3
+  ___multc3
+}
+
+%inherit GCC_4.1.0 GCC_4.0.0
+GCC_4.1.0 {
+  ___smulsi3_highpart
+  ___umulsi3_highpart
+}
+
+%inherit GCC_4.2.0 GCC_4.1.0
+GCC_4.2.0 {
+  # unsigned-to-floating conversions
+  ___floatunsisf
+  ___floatunsidf
+  ___floatunsixf
+  ___floatunsitf
+  ___floatundidf
+  ___floatundisf
+  ___floatundixf
+  ___floatunditf
+  ___floatuntidf
+  ___floatuntisf
+  ___floatuntixf
+  ___floatuntitf
+  __Unwind_GetIPInfo
+}
+
+%inherit GCC_4.3.0 GCC_4.2.0
+GCC_4.3.0 {
+  # byte swapping routines
+  ___bswapsi2
+  ___bswapdi2
+  ___emutls_get_address
+  ___emutls_register_common
+  ___ffssi2
+  ___extendxftf2
+  ___trunctfxf2
+
+  # fixed-point routines
+  ___addqq3
+  ___addhq3
+  ___addsq3
+  ___adddq3
+  ___addtq3
+  ___adduqq3
+  ___adduhq3
+  ___addusq3
+  ___addudq3
+  ___addutq3
+  ___addha3
+  ___addsa3
+  ___addda3
+  ___addta3
+  ___adduha3
+  ___addusa3
+  ___adduda3
+  ___adduta3
+  ___ssaddqq3
+  ___ssaddhq3
+  ___ssaddsq3
+  ___ssadddq3
+  ___ssaddtq3
+  ___ssaddha3
+  ___ssaddsa3
+  ___ssaddda3
+  ___ssaddta3
+  ___usadduqq3
+  ___usadduhq3
+  ___usaddusq3
+  ___usaddudq3
+  ___usaddutq3
+  ___usadduha3
+  ___usaddusa3
+  ___usadduda3
+  ___usadduta3
+  ___subqq3
+  ___subhq3
+  ___subsq3
+  ___subdq3
+  ___subtq3
+  ___subuqq3
+  ___subuhq3
+  ___subusq3
+  ___subudq3
+  ___subutq3
+  ___subha3
+  ___subsa3
+  ___subda3
+  ___subta3
+  ___subuha3
+  ___subusa3
+  ___subuda3
+  ___subuta3
+  ___sssubqq3
+  ___sssubhq3
+  ___sssubsq3
+  ___sssubdq3
+  ___sssubtq3
+  ___sssubha3
+  ___sssubsa3
+  ___sssubda3
+  ___sssubta3
+  ___ussubuqq3
+  ___ussubuhq3
+  ___ussubusq3
+  ___ussubudq3
+  ___ussubutq3
+  ___ussubuha3
+  ___ussubusa3
+  ___ussubuda3
+  ___ussubuta3
+  ___mulqq3
+  ___mulhq3
+  ___mulsq3
+  ___muldq3
+  ___multq3
+  ___muluqq3
+  ___muluhq3
+  ___mulusq3
+  ___muludq3
+  ___mulutq3
+  ___mulha3
+  ___mulsa3
+  ___mulda3
+  ___multa3
+  ___muluha3
+  ___mulusa3
+  ___muluda3
+  ___muluta3
+  ___ssmulqq3
+  ___ssmulhq3
+  ___ssmulsq3
+  ___ssmuldq3
+  ___ssmultq3
+  ___ssmulha3
+  ___ssmulsa3
+  ___ssmulda3
+  ___ssmulta3
+  ___usmuluqq3
+  ___usmuluhq3
+  ___usmulusq3
+  ___usmuludq3
+  ___usmulutq3
+  ___usmuluha3
+  ___usmulusa3
+  ___usmuluda3
+  ___usmuluta3
+  ___divqq3
+  ___divhq3
+  ___divsq3
+  ___divdq3
+  ___divtq3
+  ___divha3
+  ___divsa3
+  ___divda3
+  ___divta3
+  ___udivuqq3
+  ___udivuhq3
+  ___udivusq3
+  ___udivudq3
+  ___udivutq3
+  ___udivuha3
+  ___udivusa3
+  ___udivuda3
+  ___udivuta3
+  ___ssdivqq3
+  ___ssdivhq3
+  ___ssdivsq3
+  ___ssdivdq3
+  ___ssdivtq3
+  ___ssdivha3
+  ___ssdivsa3
+  ___ssdivda3
+  ___ssdivta3
+  ___usdivuqq3
+  ___usdivuhq3
+  ___usdivusq3
+  ___usdivudq3
+  ___usdivutq3
+  ___usdivuha3
+  ___usdivusa3
+  ___usdivuda3
+  ___usdivuta3
+  ___negqq2
+  ___neghq2
+  ___negsq2
+  ___negdq2
+  ___negtq2
+  ___neguqq2
+  ___neguhq2
+  ___negusq2
+  ___negudq2
+  ___negutq2
+  ___negha2
+  ___negsa2
+  ___negda2
+  ___negta2
+  ___neguha2
+  ___negusa2
+  ___neguda2
+  ___neguta2
+  ___ssnegqq2
+  ___ssneghq2
+  ___ssnegsq2
+  ___ssnegdq2
+  ___ssnegtq2
+  ___ssnegha2
+  ___ssnegsa2
+  ___ssnegda2
+  ___ssnegta2
+  ___usneguqq2
+  ___usneguhq2
+  ___usnegusq2
+  ___usnegudq2
+  ___usnegutq2
+  ___usneguha2
+  ___usnegusa2
+  ___usneguda2
+  ___usneguta2
+  ___ashlqq3
+  ___ashlhq3
+  ___ashlsq3
+  ___ashldq3
+  ___ashltq3
+  ___ashluqq3
+  ___ashluhq3
+  ___ashlusq3
+  ___ashludq3
+  ___ashlutq3
+  ___ashlha3
+  ___ashlsa3
+  ___ashlda3
+  ___ashlta3
+  ___ashluha3
+  ___ashlusa3
+  ___ashluda3
+  ___ashluta3
+  ___ashrqq3
+  ___ashrhq3
+  ___ashrsq3
+  ___ashrdq3
+  ___ashrtq3
+  ___ashrha3
+  ___ashrsa3
+  ___ashrda3
+  ___ashrta3
+  ___lshruqq3
+  ___lshruhq3
+  ___lshrusq3
+  ___lshrudq3
+  ___lshrutq3
+  ___lshruha3
+  ___lshrusa3
+  ___lshruda3
+  ___lshruta3
+  ___ssashlqq3
+  ___ssashlhq3
+  ___ssashlsq3
+  ___ssashldq3
+  ___ssashltq3
+  ___ssashlha3
+  ___ssashlsa3
+  ___ssashlda3
+  ___ssashlta3
+  ___usashluqq3
+  ___usashluhq3
+  ___usashlusq3
+  ___usashludq3
+  ___usashlutq3
+  ___usashluha3
+  ___usashlusa3
+  ___usashluda3
+  ___usashluta3
+  ___cmpqq2
+  ___cmphq2
+  ___cmpsq2
+  ___cmpdq2
+  ___cmptq2
+  ___cmpuqq2
+  ___cmpuhq2
+  ___cmpusq2
+  ___cmpudq2
+  ___cmputq2
+  ___cmpha2
+  ___cmpsa2
+  ___cmpda2
+  ___cmpta2
+  ___cmpuha2
+  ___cmpusa2
+  ___cmpuda2
+  ___cmputa2
+  ___fractqqhq2
+  ___fractqqsq2
+  ___fractqqdq2
+  ___fractqqtq2
+  ___fractqqha
+  ___fractqqsa
+  ___fractqqda
+  ___fractqqta
+  ___fractqquqq
+  ___fractqquhq
+  ___fractqqusq
+  ___fractqqudq
+  ___fractqqutq
+  ___fractqquha
+  ___fractqqusa
+  ___fractqquda
+  ___fractqquta
+  ___fractqqqi
+  ___fractqqhi
+  ___fractqqsi
+  ___fractqqdi
+  ___fractqqti
+  ___fractqqsf
+  ___fractqqdf
+  ___fracthqqq2
+  ___fracthqsq2
+  ___fracthqdq2
+  ___fracthqtq2
+  ___fracthqha
+  ___fracthqsa
+  ___fracthqda
+  ___fracthqta
+  ___fracthquqq
+  ___fracthquhq
+  ___fracthqusq
+  ___fracthqudq
+  ___fracthqutq
+  ___fracthquha
+  ___fracthqusa
+  ___fracthquda
+  ___fracthquta
+  ___fracthqqi
+  ___fracthqhi
+  ___fracthqsi
+  ___fracthqdi
+  ___fracthqti
+  ___fracthqsf
+  ___fracthqdf
+  ___fractsqqq2
+  ___fractsqhq2
+  ___fractsqdq2
+  ___fractsqtq2
+  ___fractsqha
+  ___fractsqsa
+  ___fractsqda
+  ___fractsqta
+  ___fractsquqq
+  ___fractsquhq
+  ___fractsqusq
+  ___fractsqudq
+  ___fractsqutq
+  ___fractsquha
+  ___fractsqusa
+  ___fractsquda
+  ___fractsquta
+  ___fractsqqi
+  ___fractsqhi
+  ___fractsqsi
+  ___fractsqdi
+  ___fractsqti
+  ___fractsqsf
+  ___fractsqdf
+  ___fractdqqq2
+  ___fractdqhq2
+  ___fractdqsq2
+  ___fractdqtq2
+  ___fractdqha
+  ___fractdqsa
+  ___fractdqda
+  ___fractdqta
+  ___fractdquqq
+  ___fractdquhq
+  ___fractdqusq
+  ___fractdqudq
+  ___fractdqutq
+  ___fractdquha
+  ___fractdqusa
+  ___fractdquda
+  ___fractdquta
+  ___fractdqqi
+  ___fractdqhi
+  ___fractdqsi
+  ___fractdqdi
+  ___fractdqti
+  ___fractdqsf
+  ___fractdqdf
+  ___fracttqqq2
+  ___fracttqhq2
+  ___fracttqsq2
+  ___fracttqdq2
+  ___fracttqha
+  ___fracttqsa
+  ___fracttqda
+  ___fracttqta
+  ___fracttquqq
+  ___fracttquhq
+  ___fracttqusq
+  ___fracttqudq
+  ___fracttqutq
+  ___fracttquha
+  ___fracttqusa
+  ___fracttquda
+  ___fracttquta
+  ___fracttqqi
+  ___fracttqhi
+  ___fracttqsi
+  ___fracttqdi
+  ___fracttqti
+  ___fracttqsf
+  ___fracttqdf
+  ___fracthaqq
+  ___fracthahq
+  ___fracthasq
+  ___fracthadq
+  ___fracthatq
+  ___fracthasa2
+  ___fracthada2
+  ___fracthata2
+  ___fracthauqq
+  ___fracthauhq
+  ___fracthausq
+  ___fracthaudq
+  ___fracthautq
+  ___fracthauha
+  ___fracthausa
+  ___fracthauda
+  ___fracthauta
+  ___fracthaqi
+  ___fracthahi
+  ___fracthasi
+  ___fracthadi
+  ___fracthati
+  ___fracthasf
+  ___fracthadf
+  ___fractsaqq
+  ___fractsahq
+  ___fractsasq
+  ___fractsadq
+  ___fractsatq
+  ___fractsaha2
+  ___fractsada2
+  ___fractsata2
+  ___fractsauqq
+  ___fractsauhq
+  ___fractsausq
+  ___fractsaudq
+  ___fractsautq
+  ___fractsauha
+  ___fractsausa
+  ___fractsauda
+  ___fractsauta
+  ___fractsaqi
+  ___fractsahi
+  ___fractsasi
+  ___fractsadi
+  ___fractsati
+  ___fractsasf
+  ___fractsadf
+  ___fractdaqq
+  ___fractdahq
+  ___fractdasq
+  ___fractdadq
+  ___fractdatq
+  ___fractdaha2
+  ___fractdasa2
+  ___fractdata2
+  ___fractdauqq
+  ___fractdauhq
+  ___fractdausq
+  ___fractdaudq
+  ___fractdautq
+  ___fractdauha
+  ___fractdausa
+  ___fractdauda
+  ___fractdauta
+  ___fractdaqi
+  ___fractdahi
+  ___fractdasi
+  ___fractdadi
+  ___fractdati
+  ___fractdasf
+  ___fractdadf
+  ___fracttaqq
+  ___fracttahq
+  ___fracttasq
+  ___fracttadq
+  ___fracttatq
+  ___fracttaha2
+  ___fracttasa2
+  ___fracttada2
+  ___fracttauqq
+  ___fracttauhq
+  ___fracttausq
+  ___fracttaudq
+  ___fracttautq
+  ___fracttauha
+  ___fracttausa
+  ___fracttauda
+  ___fracttauta
+  ___fracttaqi
+  ___fracttahi
+  ___fracttasi
+  ___fracttadi
+  ___fracttati
+  ___fracttasf
+  ___fracttadf
+  ___fractuqqqq
+  ___fractuqqhq
+  ___fractuqqsq
+  ___fractuqqdq
+  ___fractuqqtq
+  ___fractuqqha
+  ___fractuqqsa
+  ___fractuqqda
+  ___fractuqqta
+  ___fractuqquhq2
+  ___fractuqqusq2
+  ___fractuqqudq2
+  ___fractuqqutq2
+  ___fractuqquha
+  ___fractuqqusa
+  ___fractuqquda
+  ___fractuqquta
+  ___fractuqqqi
+  ___fractuqqhi
+  ___fractuqqsi
+  ___fractuqqdi
+  ___fractuqqti
+  ___fractuqqsf
+  ___fractuqqdf
+  ___fractuhqqq
+  ___fractuhqhq
+  ___fractuhqsq
+  ___fractuhqdq
+  ___fractuhqtq
+  ___fractuhqha
+  ___fractuhqsa
+  ___fractuhqda
+  ___fractuhqta
+  ___fractuhquqq2
+  ___fractuhqusq2
+  ___fractuhqudq2
+  ___fractuhqutq2
+  ___fractuhquha
+  ___fractuhqusa
+  ___fractuhquda
+  ___fractuhquta
+  ___fractuhqqi
+  ___fractuhqhi
+  ___fractuhqsi
+  ___fractuhqdi
+  ___fractuhqti
+  ___fractuhqsf
+  ___fractuhqdf
+  ___fractusqqq
+  ___fractusqhq
+  ___fractusqsq
+  ___fractusqdq
+  ___fractusqtq
+  ___fractusqha
+  ___fractusqsa
+  ___fractusqda
+  ___fractusqta
+  ___fractusquqq2
+  ___fractusquhq2
+  ___fractusqudq2
+  ___fractusqutq2
+  ___fractusquha
+  ___fractusqusa
+  ___fractusquda
+  ___fractusquta
+  ___fractusqqi
+  ___fractusqhi
+  ___fractusqsi
+  ___fractusqdi
+  ___fractusqti
+  ___fractusqsf
+  ___fractusqdf
+  ___fractudqqq
+  ___fractudqhq
+  ___fractudqsq
+  ___fractudqdq
+  ___fractudqtq
+  ___fractudqha
+  ___fractudqsa
+  ___fractudqda
+  ___fractudqta
+  ___fractudquqq2
+  ___fractudquhq2
+  ___fractudqusq2
+  ___fractudqutq2
+  ___fractudquha
+  ___fractudqusa
+  ___fractudquda
+  ___fractudquta
+  ___fractudqqi
+  ___fractudqhi
+  ___fractudqsi
+  ___fractudqdi
+  ___fractudqti
+  ___fractudqsf
+  ___fractudqdf
+  ___fractutqqq
+  ___fractutqhq
+  ___fractutqsq
+  ___fractutqdq
+  ___fractutqtq
+  ___fractutqha
+  ___fractutqsa
+  ___fractutqda
+  ___fractutqta
+  ___fractutquqq2
+  ___fractutquhq2
+  ___fractutqusq2
+  ___fractutqudq2
+  ___fractutquha
+  ___fractutqusa
+  ___fractutquda
+  ___fractutquta
+  ___fractutqqi
+  ___fractutqhi
+  ___fractutqsi
+  ___fractutqdi
+  ___fractutqti
+  ___fractutqsf
+  ___fractutqdf
+  ___fractuhaqq
+  ___fractuhahq
+  ___fractuhasq
+  ___fractuhadq
+  ___fractuhatq
+  ___fractuhaha
+  ___fractuhasa
+  ___fractuhada
+  ___fractuhata
+  ___fractuhauqq
+  ___fractuhauhq
+  ___fractuhausq
+  ___fractuhaudq
+  ___fractuhautq
+  ___fractuhausa2
+  ___fractuhauda2
+  ___fractuhauta2
+  ___fractuhaqi
+  ___fractuhahi
+  ___fractuhasi
+  ___fractuhadi
+  ___fractuhati
+  ___fractuhasf
+  ___fractuhadf
+  ___fractusaqq
+  ___fractusahq
+  ___fractusasq
+  ___fractusadq
+  ___fractusatq
+  ___fractusaha
+  ___fractusasa
+  ___fractusada
+  ___fractusata
+  ___fractusauqq
+  ___fractusauhq
+  ___fractusausq
+  ___fractusaudq
+  ___fractusautq
+  ___fractusauha2
+  ___fractusauda2
+  ___fractusauta2
+  ___fractusaqi
+  ___fractusahi
+  ___fractusasi
+  ___fractusadi
+  ___fractusati
+  ___fractusasf
+  ___fractusadf
+  ___fractudaqq
+  ___fractudahq
+  ___fractudasq
+  ___fractudadq
+  ___fractudatq
+  ___fractudaha
+  ___fractudasa
+  ___fractudada
+  ___fractudata
+  ___fractudauqq
+  ___fractudauhq
+  ___fractudausq
+  ___fractudaudq
+  ___fractudautq
+  ___fractudauha2
+  ___fractudausa2
+  ___fractudauta2
+  ___fractudaqi
+  ___fractudahi
+  ___fractudasi
+  ___fractudadi
+  ___fractudati
+  ___fractudasf
+  ___fractudadf
+  ___fractutaqq
+  ___fractutahq
+  ___fractutasq
+  ___fractutadq
+  ___fractutatq
+  ___fractutaha
+  ___fractutasa
+  ___fractutada
+  ___fractutata
+  ___fractutauqq
+  ___fractutauhq
+  ___fractutausq
+  ___fractutaudq
+  ___fractutautq
+  ___fractutauha2
+  ___fractutausa2
+  ___fractutauda2
+  ___fractutaqi
+  ___fractutahi
+  ___fractutasi
+  ___fractutadi
+  ___fractutati
+  ___fractutasf
+  ___fractutadf
+  ___fractqiqq
+  ___fractqihq
+  ___fractqisq
+  ___fractqidq
+  ___fractqitq
+  ___fractqiha
+  ___fractqisa
+  ___fractqida
+  ___fractqita
+  ___fractqiuqq
+  ___fractqiuhq
+  ___fractqiusq
+  ___fractqiudq
+  ___fractqiutq
+  ___fractqiuha
+  ___fractqiusa
+  ___fractqiuda
+  ___fractqiuta
+  ___fracthiqq
+  ___fracthihq
+  ___fracthisq
+  ___fracthidq
+  ___fracthitq
+  ___fracthiha
+  ___fracthisa
+  ___fracthida
+  ___fracthita
+  ___fracthiuqq
+  ___fracthiuhq
+  ___fracthiusq
+  ___fracthiudq
+  ___fracthiutq
+  ___fracthiuha
+  ___fracthiusa
+  ___fracthiuda
+  ___fracthiuta
+  ___fractsiqq
+  ___fractsihq
+  ___fractsisq
+  ___fractsidq
+  ___fractsitq
+  ___fractsiha
+  ___fractsisa
+  ___fractsida
+  ___fractsita
+  ___fractsiuqq
+  ___fractsiuhq
+  ___fractsiusq
+  ___fractsiudq
+  ___fractsiutq
+  ___fractsiuha
+  ___fractsiusa
+  ___fractsiuda
+  ___fractsiuta
+  ___fractdiqq
+  ___fractdihq
+  ___fractdisq
+  ___fractdidq
+  ___fractditq
+  ___fractdiha
+  ___fractdisa
+  ___fractdida
+  ___fractdita
+  ___fractdiuqq
+  ___fractdiuhq
+  ___fractdiusq
+  ___fractdiudq
+  ___fractdiutq
+  ___fractdiuha
+  ___fractdiusa
+  ___fractdiuda
+  ___fractdiuta
+  ___fracttiqq
+  ___fracttihq
+  ___fracttisq
+  ___fracttidq
+  ___fracttitq
+  ___fracttiha
+  ___fracttisa
+  ___fracttida
+  ___fracttita
+  ___fracttiuqq
+  ___fracttiuhq
+  ___fracttiusq
+  ___fracttiudq
+  ___fracttiutq
+  ___fracttiuha
+  ___fracttiusa
+  ___fracttiuda
+  ___fracttiuta
+  ___fractsfqq
+  ___fractsfhq
+  ___fractsfsq
+  ___fractsfdq
+  ___fractsftq
+  ___fractsfha
+  ___fractsfsa
+  ___fractsfda
+  ___fractsfta
+  ___fractsfuqq
+  ___fractsfuhq
+  ___fractsfusq
+  ___fractsfudq
+  ___fractsfutq
+  ___fractsfuha
+  ___fractsfusa
+  ___fractsfuda
+  ___fractsfuta
+  ___fractdfqq
+  ___fractdfhq
+  ___fractdfsq
+  ___fractdfdq
+  ___fractdftq
+  ___fractdfha
+  ___fractdfsa
+  ___fractdfda
+  ___fractdfta
+  ___fractdfuqq
+  ___fractdfuhq
+  ___fractdfusq
+  ___fractdfudq
+  ___fractdfutq
+  ___fractdfuha
+  ___fractdfusa
+  ___fractdfuda
+  ___fractdfuta
+  ___satfractqqhq2
+  ___satfractqqsq2
+  ___satfractqqdq2
+  ___satfractqqtq2
+  ___satfractqqha
+  ___satfractqqsa
+  ___satfractqqda
+  ___satfractqqta
+  ___satfractqquqq
+  ___satfractqquhq
+  ___satfractqqusq
+  ___satfractqqudq
+  ___satfractqqutq
+  ___satfractqquha
+  ___satfractqqusa
+  ___satfractqquda
+  ___satfractqquta
+  ___satfracthqqq2
+  ___satfracthqsq2
+  ___satfracthqdq2
+  ___satfracthqtq2
+  ___satfracthqha
+  ___satfracthqsa
+  ___satfracthqda
+  ___satfracthqta
+  ___satfracthquqq
+  ___satfracthquhq
+  ___satfracthqusq
+  ___satfracthqudq
+  ___satfracthqutq
+  ___satfracthquha
+  ___satfracthqusa
+  ___satfracthquda
+  ___satfracthquta
+  ___satfractsqqq2
+  ___satfractsqhq2
+  ___satfractsqdq2
+  ___satfractsqtq2
+  ___satfractsqha
+  ___satfractsqsa
+  ___satfractsqda
+  ___satfractsqta
+  ___satfractsquqq
+  ___satfractsquhq
+  ___satfractsqusq
+  ___satfractsqudq
+  ___satfractsqutq
+  ___satfractsquha
+  ___satfractsqusa
+  ___satfractsquda
+  ___satfractsquta
+  ___satfractdqqq2
+  ___satfractdqhq2
+  ___satfractdqsq2
+  ___satfractdqtq2
+  ___satfractdqha
+  ___satfractdqsa
+  ___satfractdqda
+  ___satfractdqta
+  ___satfractdquqq
+  ___satfractdquhq
+  ___satfractdqusq
+  ___satfractdqudq
+  ___satfractdqutq
+  ___satfractdquha
+  ___satfractdqusa
+  ___satfractdquda
+  ___satfractdquta
+  ___satfracttqqq2
+  ___satfracttqhq2
+  ___satfracttqsq2
+  ___satfracttqdq2
+  ___satfracttqha
+  ___satfracttqsa
+  ___satfracttqda
+  ___satfracttqta
+  ___satfracttquqq
+  ___satfracttquhq
+  ___satfracttqusq
+  ___satfracttqudq
+  ___satfracttqutq
+  ___satfracttquha
+  ___satfracttqusa
+  ___satfracttquda
+  ___satfracttquta
+  ___satfracthaqq
+  ___satfracthahq
+  ___satfracthasq
+  ___satfracthadq
+  ___satfracthatq
+  ___satfracthasa2
+  ___satfracthada2
+  ___satfracthata2
+  ___satfracthauqq
+  ___satfracthauhq
+  ___satfracthausq
+  ___satfracthaudq
+  ___satfracthautq
+  ___satfracthauha
+  ___satfracthausa
+  ___satfracthauda
+  ___satfracthauta
+  ___satfractsaqq
+  ___satfractsahq
+  ___satfractsasq
+  ___satfractsadq
+  ___satfractsatq
+  ___satfractsaha2
+  ___satfractsada2
+  ___satfractsata2
+  ___satfractsauqq
+  ___satfractsauhq
+  ___satfractsausq
+  ___satfractsaudq
+  ___satfractsautq
+  ___satfractsauha
+  ___satfractsausa
+  ___satfractsauda
+  ___satfractsauta
+  ___satfractdaqq
+  ___satfractdahq
+  ___satfractdasq
+  ___satfractdadq
+  ___satfractdatq
+  ___satfractdaha2
+  ___satfractdasa2
+  ___satfractdata2
+  ___satfractdauqq
+  ___satfractdauhq
+  ___satfractdausq
+  ___satfractdaudq
+  ___satfractdautq
+  ___satfractdauha
+  ___satfractdausa
+  ___satfractdauda
+  ___satfractdauta
+  ___satfracttaqq
+  ___satfracttahq
+  ___satfracttasq
+  ___satfracttadq
+  ___satfracttatq
+  ___satfracttaha2
+  ___satfracttasa2
+  ___satfracttada2
+  ___satfracttauqq
+  ___satfracttauhq
+  ___satfracttausq
+  ___satfracttaudq
+  ___satfracttautq
+  ___satfracttauha
+  ___satfracttausa
+  ___satfracttauda
+  ___satfracttauta
+  ___satfractuqqqq
+  ___satfractuqqhq
+  ___satfractuqqsq
+  ___satfractuqqdq
+  ___satfractuqqtq
+  ___satfractuqqha
+  ___satfractuqqsa
+  ___satfractuqqda
+  ___satfractuqqta
+  ___satfractuqquhq2
+  ___satfractuqqusq2
+  ___satfractuqqudq2
+  ___satfractuqqutq2
+  ___satfractuqquha
+  ___satfractuqqusa
+  ___satfractuqquda
+  ___satfractuqquta
+  ___satfractuhqqq
+  ___satfractuhqhq
+  ___satfractuhqsq
+  ___satfractuhqdq
+  ___satfractuhqtq
+  ___satfractuhqha
+  ___satfractuhqsa
+  ___satfractuhqda
+  ___satfractuhqta
+  ___satfractuhquqq2
+  ___satfractuhqusq2
+  ___satfractuhqudq2
+  ___satfractuhqutq2
+  ___satfractuhquha
+  ___satfractuhqusa
+  ___satfractuhquda
+  ___satfractuhquta
+  ___satfractusqqq
+  ___satfractusqhq
+  ___satfractusqsq
+  ___satfractusqdq
+  ___satfractusqtq
+  ___satfractusqha
+  ___satfractusqsa
+  ___satfractusqda
+  ___satfractusqta
+  ___satfractusquqq2
+  ___satfractusquhq2
+  ___satfractusqudq2
+  ___satfractusqutq2
+  ___satfractusquha
+  ___satfractusqusa
+  ___satfractusquda
+  ___satfractusquta
+  ___satfractudqqq
+  ___satfractudqhq
+  ___satfractudqsq
+  ___satfractudqdq
+  ___satfractudqtq
+  ___satfractudqha
+  ___satfractudqsa
+  ___satfractudqda
+  ___satfractudqta
+  ___satfractudquqq2
+  ___satfractudquhq2
+  ___satfractudqusq2
+  ___satfractudqutq2
+  ___satfractudquha
+  ___satfractudqusa
+  ___satfractudquda
+  ___satfractudquta
+  ___satfractutqqq
+  ___satfractutqhq
+  ___satfractutqsq
+  ___satfractutqdq
+  ___satfractutqtq
+  ___satfractutqha
+  ___satfractutqsa
+  ___satfractutqda
+  ___satfractutqta
+  ___satfractutquqq2
+  ___satfractutquhq2
+  ___satfractutqusq2
+  ___satfractutqudq2
+  ___satfractutquha
+  ___satfractutqusa
+  ___satfractutquda
+  ___satfractutquta
+  ___satfractuhaqq
+  ___satfractuhahq
+  ___satfractuhasq
+  ___satfractuhadq
+  ___satfractuhatq
+  ___satfractuhaha
+  ___satfractuhasa
+  ___satfractuhada
+  ___satfractuhata
+  ___satfractuhauqq
+  ___satfractuhauhq
+  ___satfractuhausq
+  ___satfractuhaudq
+  ___satfractuhautq
+  ___satfractuhausa2
+  ___satfractuhauda2
+  ___satfractuhauta2
+  ___satfractusaqq
+  ___satfractusahq
+  ___satfractusasq
+  ___satfractusadq
+  ___satfractusatq
+  ___satfractusaha
+  ___satfractusasa
+  ___satfractusada
+  ___satfractusata
+  ___satfractusauqq
+  ___satfractusauhq
+  ___satfractusausq
+  ___satfractusaudq
+  ___satfractusautq
+  ___satfractusauha2
+  ___satfractusauda2
+  ___satfractusauta2
+  ___satfractudaqq
+  ___satfractudahq
+  ___satfractudasq
+  ___satfractudadq
+  ___satfractudatq
+  ___satfractudaha
+  ___satfractudasa
+  ___satfractudada
+  ___satfractudata
+  ___satfractudauqq
+  ___satfractudauhq
+  ___satfractudausq
+  ___satfractudaudq
+  ___satfractudautq
+  ___satfractudauha2
+  ___satfractudausa2
+  ___satfractudauta2
+  ___satfractutaqq
+  ___satfractutahq
+  ___satfractutasq
+  ___satfractutadq
+  ___satfractutatq
+  ___satfractutaha
+  ___satfractutasa
+  ___satfractutada
+  ___satfractutata
+  ___satfractutauqq
+  ___satfractutauhq
+  ___satfractutausq
+  ___satfractutaudq
+  ___satfractutautq
+  ___satfractutauha2
+  ___satfractutausa2
+  ___satfractutauda2
+  ___satfractqiqq
+  ___satfractqihq
+  ___satfractqisq
+  ___satfractqidq
+  ___satfractqitq
+  ___satfractqiha
+  ___satfractqisa
+  ___satfractqida
+  ___satfractqita
+  ___satfractqiuqq
+  ___satfractqiuhq
+  ___satfractqiusq
+  ___satfractqiudq
+  ___satfractqiutq
+  ___satfractqiuha
+  ___satfractqiusa
+  ___satfractqiuda
+  ___satfractqiuta
+  ___satfracthiqq
+  ___satfracthihq
+  ___satfracthisq
+  ___satfracthidq
+  ___satfracthitq
+  ___satfracthiha
+  ___satfracthisa
+  ___satfracthida
+  ___satfracthita
+  ___satfracthiuqq
+  ___satfracthiuhq
+  ___satfracthiusq
+  ___satfracthiudq
+  ___satfracthiutq
+  ___satfracthiuha
+  ___satfracthiusa
+  ___satfracthiuda
+  ___satfracthiuta
+  ___satfractsiqq
+  ___satfractsihq
+  ___satfractsisq
+  ___satfractsidq
+  ___satfractsitq
+  ___satfractsiha
+  ___satfractsisa
+  ___satfractsida
+  ___satfractsita
+  ___satfractsiuqq
+  ___satfractsiuhq
+  ___satfractsiusq
+  ___satfractsiudq
+  ___satfractsiutq
+  ___satfractsiuha
+  ___satfractsiusa
+  ___satfractsiuda
+  ___satfractsiuta
+  ___satfractdiqq
+  ___satfractdihq
+  ___satfractdisq
+  ___satfractdidq
+  ___satfractditq
+  ___satfractdiha
+  ___satfractdisa
+  ___satfractdida
+  ___satfractdita
+  ___satfractdiuqq
+  ___satfractdiuhq
+  ___satfractdiusq
+  ___satfractdiudq
+  ___satfractdiutq
+  ___satfractdiuha
+  ___satfractdiusa
+  ___satfractdiuda
+  ___satfractdiuta
+  ___satfracttiqq
+  ___satfracttihq
+  ___satfracttisq
+  ___satfracttidq
+  ___satfracttitq
+  ___satfracttiha
+  ___satfracttisa
+  ___satfracttida
+  ___satfracttita
+  ___satfracttiuqq
+  ___satfracttiuhq
+  ___satfracttiusq
+  ___satfracttiudq
+  ___satfracttiutq
+  ___satfracttiuha
+  ___satfracttiusa
+  ___satfracttiuda
+  ___satfracttiuta
+  ___satfractsfqq
+  ___satfractsfhq
+  ___satfractsfsq
+  ___satfractsfdq
+  ___satfractsftq
+  ___satfractsfha
+  ___satfractsfsa
+  ___satfractsfda
+  ___satfractsfta
+  ___satfractsfuqq
+  ___satfractsfuhq
+  ___satfractsfusq
+  ___satfractsfudq
+  ___satfractsfutq
+  ___satfractsfuha
+  ___satfractsfusa
+  ___satfractsfuda
+  ___satfractsfuta
+  ___satfractdfqq
+  ___satfractdfhq
+  ___satfractdfsq
+  ___satfractdfdq
+  ___satfractdftq
+  ___satfractdfha
+  ___satfractdfsa
+  ___satfractdfda
+  ___satfractdfta
+  ___satfractdfuqq
+  ___satfractdfuhq
+  ___satfractdfusq
+  ___satfractdfudq
+  ___satfractdfutq
+  ___satfractdfuha
+  ___satfractdfusa
+  ___satfractdfuda
+  ___satfractdfuta
+  ___fractunsqqqi
+  ___fractunsqqhi
+  ___fractunsqqsi
+  ___fractunsqqdi
+  ___fractunsqqti
+  ___fractunshqqi
+  ___fractunshqhi
+  ___fractunshqsi
+  ___fractunshqdi
+  ___fractunshqti
+  ___fractunssqqi
+  ___fractunssqhi
+  ___fractunssqsi
+  ___fractunssqdi
+  ___fractunssqti
+  ___fractunsdqqi
+  ___fractunsdqhi
+  ___fractunsdqsi
+  ___fractunsdqdi
+  ___fractunsdqti
+  ___fractunstqqi
+  ___fractunstqhi
+  ___fractunstqsi
+  ___fractunstqdi
+  ___fractunstqti
+  ___fractunshaqi
+  ___fractunshahi
+  ___fractunshasi
+  ___fractunshadi
+  ___fractunshati
+  ___fractunssaqi
+  ___fractunssahi
+  ___fractunssasi
+  ___fractunssadi
+  ___fractunssati
+  ___fractunsdaqi
+  ___fractunsdahi
+  ___fractunsdasi
+  ___fractunsdadi
+  ___fractunsdati
+  ___fractunstaqi
+  ___fractunstahi
+  ___fractunstasi
+  ___fractunstadi
+  ___fractunstati
+  ___fractunsuqqqi
+  ___fractunsuqqhi
+  ___fractunsuqqsi
+  ___fractunsuqqdi
+  ___fractunsuqqti
+  ___fractunsuhqqi
+  ___fractunsuhqhi
+  ___fractunsuhqsi
+  ___fractunsuhqdi
+  ___fractunsuhqti
+  ___fractunsusqqi
+  ___fractunsusqhi
+  ___fractunsusqsi
+  ___fractunsusqdi
+  ___fractunsusqti
+  ___fractunsudqqi
+  ___fractunsudqhi
+  ___fractunsudqsi
+  ___fractunsudqdi
+  ___fractunsudqti
+  ___fractunsutqqi
+  ___fractunsutqhi
+  ___fractunsutqsi
+  ___fractunsutqdi
+  ___fractunsutqti
+  ___fractunsuhaqi
+  ___fractunsuhahi
+  ___fractunsuhasi
+  ___fractunsuhadi
+  ___fractunsuhati
+  ___fractunsusaqi
+  ___fractunsusahi
+  ___fractunsusasi
+  ___fractunsusadi
+  ___fractunsusati
+  ___fractunsudaqi
+  ___fractunsudahi
+  ___fractunsudasi
+  ___fractunsudadi
+  ___fractunsudati
+  ___fractunsutaqi
+  ___fractunsutahi
+  ___fractunsutasi
+  ___fractunsutadi
+  ___fractunsutati
+  ___fractunsqiqq
+  ___fractunsqihq
+  ___fractunsqisq
+  ___fractunsqidq
+  ___fractunsqitq
+  ___fractunsqiha
+  ___fractunsqisa
+  ___fractunsqida
+  ___fractunsqita
+  ___fractunsqiuqq
+  ___fractunsqiuhq
+  ___fractunsqiusq
+  ___fractunsqiudq
+  ___fractunsqiutq
+  ___fractunsqiuha
+  ___fractunsqiusa
+  ___fractunsqiuda
+  ___fractunsqiuta
+  ___fractunshiqq
+  ___fractunshihq
+  ___fractunshisq
+  ___fractunshidq
+  ___fractunshitq
+  ___fractunshiha
+  ___fractunshisa
+  ___fractunshida
+  ___fractunshita
+  ___fractunshiuqq
+  ___fractunshiuhq
+  ___fractunshiusq
+  ___fractunshiudq
+  ___fractunshiutq
+  ___fractunshiuha
+  ___fractunshiusa
+  ___fractunshiuda
+  ___fractunshiuta
+  ___fractunssiqq
+  ___fractunssihq
+  ___fractunssisq
+  ___fractunssidq
+  ___fractunssitq
+  ___fractunssiha
+  ___fractunssisa
+  ___fractunssida
+  ___fractunssita
+  ___fractunssiuqq
+  ___fractunssiuhq
+  ___fractunssiusq
+  ___fractunssiudq
+  ___fractunssiutq
+  ___fractunssiuha
+  ___fractunssiusa
+  ___fractunssiuda
+  ___fractunssiuta
+  ___fractunsdiqq
+  ___fractunsdihq
+  ___fractunsdisq
+  ___fractunsdidq
+  ___fractunsditq
+  ___fractunsdiha
+  ___fractunsdisa
+  ___fractunsdida
+  ___fractunsdita
+  ___fractunsdiuqq
+  ___fractunsdiuhq
+  ___fractunsdiusq
+  ___fractunsdiudq
+  ___fractunsdiutq
+  ___fractunsdiuha
+  ___fractunsdiusa
+  ___fractunsdiuda
+  ___fractunsdiuta
+  ___fractunstiqq
+  ___fractunstihq
+  ___fractunstisq
+  ___fractunstidq
+  ___fractunstitq
+  ___fractunstiha
+  ___fractunstisa
+  ___fractunstida
+  ___fractunstita
+  ___fractunstiuqq
+  ___fractunstiuhq
+  ___fractunstiusq
+  ___fractunstiudq
+  ___fractunstiutq
+  ___fractunstiuha
+  ___fractunstiusa
+  ___fractunstiuda
+  ___fractunstiuta
+  ___satfractunsqiqq
+  ___satfractunsqihq
+  ___satfractunsqisq
+  ___satfractunsqidq
+  ___satfractunsqitq
+  ___satfractunsqiha
+  ___satfractunsqisa
+  ___satfractunsqida
+  ___satfractunsqita
+  ___satfractunsqiuqq
+  ___satfractunsqiuhq
+  ___satfractunsqiusq
+  ___satfractunsqiudq
+  ___satfractunsqiutq
+  ___satfractunsqiuha
+  ___satfractunsqiusa
+  ___satfractunsqiuda
+  ___satfractunsqiuta
+  ___satfractunshiqq
+  ___satfractunshihq
+  ___satfractunshisq
+  ___satfractunshidq
+  ___satfractunshitq
+  ___satfractunshiha
+  ___satfractunshisa
+  ___satfractunshida
+  ___satfractunshita
+  ___satfractunshiuqq
+  ___satfractunshiuhq
+  ___satfractunshiusq
+  ___satfractunshiudq
+  ___satfractunshiutq
+  ___satfractunshiuha
+  ___satfractunshiusa
+  ___satfractunshiuda
+  ___satfractunshiuta
+  ___satfractunssiqq
+  ___satfractunssihq
+  ___satfractunssisq
+  ___satfractunssidq
+  ___satfractunssitq
+  ___satfractunssiha
+  ___satfractunssisa
+  ___satfractunssida
+  ___satfractunssita
+  ___satfractunssiuqq
+  ___satfractunssiuhq
+  ___satfractunssiusq
+  ___satfractunssiudq
+  ___satfractunssiutq
+  ___satfractunssiuha
+  ___satfractunssiusa
+  ___satfractunssiuda
+  ___satfractunssiuta
+  ___satfractunsdiqq
+  ___satfractunsdihq
+  ___satfractunsdisq
+  ___satfractunsdidq
+  ___satfractunsditq
+  ___satfractunsdiha
+  ___satfractunsdisa
+  ___satfractunsdida
+  ___satfractunsdita
+  ___satfractunsdiuqq
+  ___satfractunsdiuhq
+  ___satfractunsdiusq
+  ___satfractunsdiudq
+  ___satfractunsdiutq
+  ___satfractunsdiuha
+  ___satfractunsdiusa
+  ___satfractunsdiuda
+  ___satfractunsdiuta
+  ___satfractunstiqq
+  ___satfractunstihq
+  ___satfractunstisq
+  ___satfractunstidq
+  ___satfractunstitq
+  ___satfractunstiha
+  ___satfractunstisa
+  ___satfractunstida
+  ___satfractunstita
+  ___satfractunstiuqq
+  ___satfractunstiuhq
+  ___satfractunstiusq
+  ___satfractunstiudq
+  ___satfractunstiutq
+  ___satfractunstiuha
+  ___satfractunstiusa
+  ___satfractunstiuda
+  ___satfractunstiuta
+}
+
+%inherit GCC_4.4.0 GCC_4.3.0
+GCC_4.4.0 {
+  ___sync_fetch_and_add_1
+  ___sync_fetch_and_sub_1
+  ___sync_fetch_and_or_1
+  ___sync_fetch_and_and_1
+  ___sync_fetch_and_xor_1
+  ___sync_fetch_and_nand_1
+  ___sync_add_and_fetch_1
+  ___sync_sub_and_fetch_1
+  ___sync_or_and_fetch_1
+  ___sync_and_and_fetch_1
+  ___sync_xor_and_fetch_1
+  ___sync_nand_and_fetch_1
+  ___sync_bool_compare_and_swap_1
+  ___sync_val_compare_and_swap_1
+  ___sync_lock_test_and_set_1
+
+  ___sync_fetch_and_add_2
+  ___sync_fetch_and_sub_2
+  ___sync_fetch_and_or_2
+  ___sync_fetch_and_and_2
+  ___sync_fetch_and_xor_2
+  ___sync_fetch_and_nand_2
+  ___sync_add_and_fetch_2
+  ___sync_sub_and_fetch_2
+  ___sync_or_and_fetch_2
+  ___sync_and_and_fetch_2
+  ___sync_xor_and_fetch_2
+  ___sync_nand_and_fetch_2
+  ___sync_bool_compare_and_swap_2
+  ___sync_val_compare_and_swap_2
+  ___sync_lock_test_and_set_2
+
+  ___sync_fetch_and_add_4
+  ___sync_fetch_and_sub_4
+  ___sync_fetch_and_or_4
+  ___sync_fetch_and_and_4
+  ___sync_fetch_and_xor_4
+  ___sync_fetch_and_nand_4
+  ___sync_add_and_fetch_4
+  ___sync_sub_and_fetch_4
+  ___sync_or_and_fetch_4
+  ___sync_and_and_fetch_4
+  ___sync_xor_and_fetch_4
+  ___sync_nand_and_fetch_4
+  ___sync_bool_compare_and_swap_4
+  ___sync_val_compare_and_swap_4
+  ___sync_lock_test_and_set_4
+
+  ___sync_fetch_and_add_8
+  ___sync_fetch_and_sub_8
+  ___sync_fetch_and_or_8
+  ___sync_fetch_and_and_8
+  ___sync_fetch_and_xor_8
+  ___sync_fetch_and_nand_8
+  ___sync_add_and_fetch_8
+  ___sync_sub_and_fetch_8
+  ___sync_or_and_fetch_8
+  ___sync_and_and_fetch_8
+  ___sync_xor_and_fetch_8
+  ___sync_nand_and_fetch_8
+  ___sync_bool_compare_and_swap_8
+  ___sync_val_compare_and_swap_8
+  ___sync_lock_test_and_set_8
+
+  ___sync_fetch_and_add_16
+  ___sync_fetch_and_sub_16
+  ___sync_fetch_and_or_16
+  ___sync_fetch_and_and_16
+  ___sync_fetch_and_xor_16
+  ___sync_fetch_and_nand_16
+  ___sync_add_and_fetch_16
+  ___sync_sub_and_fetch_16
+  ___sync_or_and_fetch_16
+  ___sync_and_and_fetch_16
+  ___sync_xor_and_fetch_16
+  ___sync_nand_and_fetch_16
+  ___sync_bool_compare_and_swap_16
+  ___sync_val_compare_and_swap_16
+  ___sync_lock_test_and_set_16
+
+  ___sync_synchronize
+}
+
+%inherit GCC_4.5.0 GCC_4.4.0
+GCC_4.5.0 {
+  ___unordxf2
+  ___unordtf2
+}
diff --git a/gcc/config/bfin/linux-unwind.h b/gcc/config/bfin/linux-unwind.h
new file mode 100644
index 000000000..15bb2f12b
--- /dev/null
+++ b/gcc/config/bfin/linux-unwind.h
@@ -0,0 +1,164 @@
+/* DWARF2 EH unwinding support for Blackfin.
+   Copyright (C) 2007, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR bfin_fallback_frame_state
+
+static _Unwind_Reason_Code
+bfin_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+
+  /* P0=__NR_rt_sigreturn (X); EXCPT  0x0; */
+  if (*(unsigned short *)pc == 0xe128
+      && *(unsigned short *)(pc + 2) == 0x00ad
+      && *(unsigned short *)(pc + 4) == 0x00a0)
+    {
+      struct rt_sigframe {
+	int sig;
+	siginfo_t *pinfo;
+	void *puc;
+	char retcode[8];
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *)(void *)&rt_->uc.uc_mcontext.gregs;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_usp;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 14;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&sc->sc_r0 - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&sc->sc_r1 - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&sc->sc_r2 - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&sc->sc_r3 - new_cfa;
+  fs->regs.reg[4].how = REG_SAVED_OFFSET;
+  fs->regs.reg[4].loc.offset = (long)&sc->sc_r4 - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&sc->sc_r5 - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&sc->sc_r6 - new_cfa;
+  fs->regs.reg[7].how = REG_SAVED_OFFSET;
+  fs->regs.reg[7].loc.offset = (long)&sc->sc_r7 - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&sc->sc_p0 - new_cfa;
+  fs->regs.reg[9].how = REG_SAVED_OFFSET;
+  fs->regs.reg[9].loc.offset = (long)&sc->sc_p1 - new_cfa;
+  fs->regs.reg[10].how = REG_SAVED_OFFSET;
+  fs->regs.reg[10].loc.offset = (long)&sc->sc_p2 - new_cfa;
+  fs->regs.reg[11].how = REG_SAVED_OFFSET;
+  fs->regs.reg[11].loc.offset = (long)&sc->sc_p3 - new_cfa;
+  fs->regs.reg[12].how = REG_SAVED_OFFSET;
+  fs->regs.reg[12].loc.offset = (long)&sc->sc_p4 - new_cfa;
+  fs->regs.reg[13].how = REG_SAVED_OFFSET;
+  fs->regs.reg[13].loc.offset = (long)&sc->sc_p5 - new_cfa;
+
+  fs->regs.reg[15].how = REG_SAVED_OFFSET;
+  fs->regs.reg[15].loc.offset = (long)&sc->sc_fp - new_cfa;
+  fs->regs.reg[16].how = REG_SAVED_OFFSET;
+  fs->regs.reg[16].loc.offset = (long)&sc->sc_i0 - new_cfa;
+  fs->regs.reg[17].how = REG_SAVED_OFFSET;
+  fs->regs.reg[17].loc.offset = (long)&sc->sc_i1 - new_cfa;
+  fs->regs.reg[18].how = REG_SAVED_OFFSET;
+  fs->regs.reg[18].loc.offset = (long)&sc->sc_i2 - new_cfa;
+  fs->regs.reg[19].how = REG_SAVED_OFFSET;
+  fs->regs.reg[19].loc.offset = (long)&sc->sc_i3 - new_cfa;
+  fs->regs.reg[20].how = REG_SAVED_OFFSET;
+  fs->regs.reg[20].loc.offset = (long)&sc->sc_b0 - new_cfa;
+  fs->regs.reg[21].how = REG_SAVED_OFFSET;
+  fs->regs.reg[21].loc.offset = (long)&sc->sc_b1 - new_cfa;
+  fs->regs.reg[22].how = REG_SAVED_OFFSET;
+  fs->regs.reg[22].loc.offset = (long)&sc->sc_b2 - new_cfa;
+  fs->regs.reg[23].how = REG_SAVED_OFFSET;
+  fs->regs.reg[23].loc.offset = (long)&sc->sc_b3 - new_cfa;
+  fs->regs.reg[24].how = REG_SAVED_OFFSET;
+  fs->regs.reg[24].loc.offset = (long)&sc->sc_l0 - new_cfa;
+  fs->regs.reg[25].how = REG_SAVED_OFFSET;
+  fs->regs.reg[25].loc.offset = (long)&sc->sc_l1 - new_cfa;
+  fs->regs.reg[26].how = REG_SAVED_OFFSET;
+  fs->regs.reg[26].loc.offset = (long)&sc->sc_l2 - new_cfa;
+  fs->regs.reg[27].how = REG_SAVED_OFFSET;
+  fs->regs.reg[27].loc.offset = (long)&sc->sc_l3 - new_cfa;
+  fs->regs.reg[28].how = REG_SAVED_OFFSET;
+  fs->regs.reg[28].loc.offset = (long)&sc->sc_m0 - new_cfa;
+  fs->regs.reg[29].how = REG_SAVED_OFFSET;
+  fs->regs.reg[29].loc.offset = (long)&sc->sc_m1 - new_cfa;
+  fs->regs.reg[30].how = REG_SAVED_OFFSET;
+  fs->regs.reg[30].loc.offset = (long)&sc->sc_m2 - new_cfa;
+  fs->regs.reg[31].how = REG_SAVED_OFFSET;
+  fs->regs.reg[31].loc.offset = (long)&sc->sc_m3 - new_cfa;
+  /* FIXME: Handle A0, A1, CC.  */
+  fs->regs.reg[35].how = REG_SAVED_OFFSET;
+  fs->regs.reg[35].loc.offset = (long)&sc->sc_rets - new_cfa;
+  fs->regs.reg[36].how = REG_SAVED_OFFSET;
+  fs->regs.reg[36].loc.offset = (long)&sc->sc_pc - new_cfa;
+  fs->regs.reg[37].how = REG_SAVED_OFFSET;
+  fs->regs.reg[37].loc.offset = (long)&sc->sc_retx - new_cfa;
+
+  fs->regs.reg[40].how = REG_SAVED_OFFSET;
+  fs->regs.reg[40].loc.offset = (long)&sc->sc_astat - new_cfa;
+  fs->regs.reg[41].how = REG_SAVED_OFFSET;
+  fs->regs.reg[41].loc.offset = (long)&sc->sc_seqstat - new_cfa;
+
+  fs->regs.reg[44].how = REG_SAVED_OFFSET;
+  fs->regs.reg[44].loc.offset = (long)&sc->sc_lt0 - new_cfa;
+  fs->regs.reg[45].how = REG_SAVED_OFFSET;
+  fs->regs.reg[45].loc.offset = (long)&sc->sc_lt1 - new_cfa;
+  fs->regs.reg[46].how = REG_SAVED_OFFSET;
+  fs->regs.reg[46].loc.offset = (long)&sc->sc_lc0 - new_cfa;
+  fs->regs.reg[47].how = REG_SAVED_OFFSET;
+  fs->regs.reg[47].loc.offset = (long)&sc->sc_lc1 - new_cfa;
+  fs->regs.reg[48].how = REG_SAVED_OFFSET;
+  fs->regs.reg[48].loc.offset = (long)&sc->sc_lb0 - new_cfa;
+  fs->regs.reg[49].how = REG_SAVED_OFFSET;
+  fs->regs.reg[49].loc.offset = (long)&sc->sc_lb1 - new_cfa;
+  fs->retaddr_column = 35;
+
+  return _URC_NO_REASON;
+}
+
+#endif /* ifdef inhibit_libc */
diff --git a/gcc/config/bfin/linux.h b/gcc/config/bfin/linux.h
new file mode 100644
index 000000000..a75074592
--- /dev/null
+++ b/gcc/config/bfin/linux.h
@@ -0,0 +1,54 @@
+/* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS \
+  "%{!mno-fdpic:-mfdpic} -micplb",
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} crtreloc.o%s \
+   crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %{mfast-fp:-lbffastfp} %G %L %{static:--end-group} \
+   %{!static:%{mfast-fp:-lbffastfp} %G}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{mfdpic: -m elf32bfinfd -z text} %{shared} %{pie} \
+  %{static:-dn -Bstatic} \
+  %{shared:-G -Bdynamic} \
+  %{!shared: %{!static: \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker /lib/ld-uClibc.so.0} \
+   %{static}} -init __init -fini __fini"
+
+#define MD_UNWIND_SUPPORT "config/bfin/linux-unwind.h"
+
+#undef TARGET_SUPPORTS_SYNC_CALLS
+#define TARGET_SUPPORTS_SYNC_CALLS 1
diff --git a/gcc/config/bfin/predicates.md b/gcc/config/bfin/predicates.md
new file mode 100644
index 000000000..84bf59195
--- /dev/null
+++ b/gcc/config/bfin/predicates.md
@@ -0,0 +1,241 @@
+;; Predicate definitions for the Blackfin.
+;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by Analog Devices.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return nonzero iff OP is one of the integer constants 1 or 2.
+(define_predicate "pos_scale_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
+
+;; Return nonzero iff OP is one of the integer constants 2 or 4.
+(define_predicate "scale_by_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4")))
+
+;; Return nonzero if OP is a constant that consists of two parts; lower
+;; bits all zero and upper bits all ones.  In this case, we can perform
+;; an AND operation with a sequence of two shifts.  Don't return nonzero
+;; if the constant would be cheap to load.
+(define_predicate "highbits_operand"
+  (and (match_code "const_int")
+       (match_test "log2constp (-INTVAL (op)) && !satisfies_constraint_Ks7 (op)")))
+
+;; Return nonzero if OP is suitable as a right-hand side operand for an
+;; andsi3 operation.
+(define_predicate "rhs_andsi3_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "log2constp (~INTVAL (op)) || INTVAL (op) == 255 || INTVAL (op) == 65535"))))
+
+;; Return nonzero if OP is a register or a constant with exactly one bit
+;; set.
+(define_predicate "regorlog2_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "log2constp (INTVAL (op))"))))
+
+;; Return nonzero if OP is a register or an integer constant.
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_code "const_int")))
+
+(define_predicate "const01_operand"
+  (and (match_code "const_int")
+       (match_test "op == const0_rtx || op == const1_rtx")))
+
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const1_rtx")))
+
+(define_predicate "const3_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 3")))
+
+(define_predicate "vec_shift_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "INTVAL (op) >= -16 && INTVAL (op) < 15"))
+       (match_operand 0 "register_operand")))
+
+;; Like register_operand, but make sure that hard regs have a valid mode.
+(define_predicate "valid_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return HARD_REGNO_MODE_OK (REGNO (op), mode);
+  return 1;
+})
+
+;; Return nonzero if OP is a D register.
+(define_predicate "d_register_operand"
+  (and (match_code "reg")
+       (match_test "D_REGNO_P (REGNO (op))")))
+
+(define_predicate "p_register_operand"
+  (and (match_code "reg")
+       (match_test "P_REGNO_P (REGNO (op))")))
+
+(define_predicate "dp_register_operand"
+  (and (match_code "reg")
+       (match_test "D_REGNO_P (REGNO (op)) || P_REGNO_P (REGNO (op))")))
+
+;; Return nonzero if OP is a LC register.
+(define_predicate "lc_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LC0 || REGNO (op) == REG_LC1")))
+
+;; Return nonzero if OP is a LT register.
+(define_predicate "lt_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LT0 || REGNO (op) == REG_LT1")))
+
+;; Return nonzero if OP is a LB register.
+(define_predicate "lb_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == REG_LB0 || REGNO (op) == REG_LB1")))
+
+;; Return nonzero if OP is a register or a 7-bit signed constant.
+(define_predicate "reg_or_7bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_Ks7 (op)"))))
+
+;; Return nonzero if OP is a register other than DREG and PREG.
+(define_predicate "nondp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || !DP_REGNO_P (regno));
+})
+
+;; Return nonzero if OP is a register other than DREG and PREG, or MEM.
+(define_predicate "nondp_reg_or_memory_operand"
+  (ior (match_operand 0 "nondp_register_operand")
+       (match_operand 0 "memory_operand")))
+
+;; Return nonzero if OP is a register or, when negated, a 7-bit signed
+;; constant.
+(define_predicate "reg_or_neg7bit_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_KN7 (op)"))))
+
+;; Used for secondary reloads, this function returns 1 if OP is of the
+;; form (plus (fp) (const_int)).
+(define_predicate "fp_plus_const_operand"
+  (match_code "plus")
+{
+  rtx op1, op2;
+
+  op1 = XEXP (op, 0);
+  op2 = XEXP (op, 1);
+  return (REG_P (op1)
+	  && (REGNO (op1) == FRAME_POINTER_REGNUM
+	      || REGNO (op1) == STACK_POINTER_REGNUM)
+	  && GET_CODE (op2) == CONST_INT);
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op,0)) == PLUS
+			 && (GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+			     || GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF)
+			 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT"))))
+
+;; Returns 1 if OP is a plain constant or matched by symbolic_operand.
+(define_predicate "symbolic_or_const_operand"
+  (ior (match_code "const_int,const_double")
+       (match_operand 0 "symbolic_operand")))
+
+;; Returns 1 if OP is a SYMBOL_REF.
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; True for any non-virtual or eliminable register.  Used in places where
+;; instantiation of such a register may cause the pattern to not be recognized.
+(define_predicate "register_no_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return !(op == arg_pointer_rtx
+	   || op == frame_pointer_rtx
+	   || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	       && REGNO (op) <= LAST_VIRTUAL_REGISTER));
+})
+
+;; Test for an operator valid in a BImode conditional branch
+(define_predicate "bfin_bimode_comparison_operator"
+  (match_code "eq,ne"))
+
+;; Test for an operator whose result is accessible with movbisi.
+(define_predicate "bfin_direct_comparison_operator"
+  (match_code "eq,lt,le,leu,ltu"))
+
+;; The following three are used to compute the addrtype attribute.  They return
+;; true if passed a memory address usable for a 16-bit load or store using a
+;; P or I register, respectively.  If neither matches, we know we have a
+;; 32-bit instruction.
+;; We subdivide the P case into normal P registers, and SP/FP.  We can assume
+;; that speculative loads through SP and FP are no problem, so this has
+;; an effect on the anomaly workaround code.
+
+(define_predicate "mem_p_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx;
+})
+
+(define_predicate "mem_spfp_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return op == stack_pointer_rtx || op == frame_pointer_rtx;
+})
+
+(define_predicate "mem_i_address_operand"
+  (match_code "mem")
+{
+  if (effective_address_32bit_p (op, mode))
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+    op = XEXP (op, 0);
+  gcc_assert (REG_P (op));
+  return IREG_P (op);
+})
diff --git a/gcc/config/bfin/print-sysroot-suffix.sh b/gcc/config/bfin/print-sysroot-suffix.sh
new file mode 100644
index 000000000..c33ff47c3
--- /dev/null
+++ b/gcc/config/bfin/print-sysroot-suffix.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# Copyright (C) 2007 Free Software Foundation, Inc.
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This script takes the following arguments:
+#
+#    - the target sysroot
+#    - the value of $(MULTILIB_MATCHES)
+#    - the value of $(MULTILIB_OPTIONS)
+#
+# It uses these arguments to construct a definition of SYSROOT_SUFFIX_SPEC,
+# which it prints to the standard output.  For each multilib directory FOO,
+# the script checks whether $sysroot has a subdirectory FOO, and if so will
+# use /FOO for all compatible command-line options.  It will not add a
+# suffix for /FOO's options otherwise.  These suffixes are concatenated,
+# with one subspec for each space-separated entry in $(MULTILIB_OPTIONS).
+set -e
+sysroot=$1
+matches=$2
+options=$3
+
+# For each multilib option OPT, add to $substs a sed command of the
+# form "-e 's/OPT/OPT/'".
+substs=""
+for option in `echo "$options" | tr '/' ' '`
+do
+  substs="$substs -e 's/$option/$option/g'"
+done
+
+# For each ALIAS=CANONICAL entry in $MULTILIB_MATCHES, look for sed
+# arguments in $substs of the form "-e 's/CANONICAL/.../'".  Replace
+# such entries with "-e 's/CANONICAL/ALIAS|.../'".  Both the ALIAS and
+# CANONICAL parts of $MULTILIB_MATCHES use '?' to stand for '='.
+#
+# After this loop, a command of the form "echo FOO | eval sed $substs"
+# will replace a canonical option FOO with a %{...}-style spec pattern.
+for match in $matches
+do
+  canonical=`echo "$match" | sed -e 's/=.*//' -e 's/?/=/g'`
+  alias=`echo "$match" | sed -e 's/.*=//' -e 's/?/=/g'`
+  substs=`echo "$substs" | sed -e "s,s/$canonical/,&$alias|,"`
+done
+
+# Build up the final SYSROOT_SUFFIX_SPEC in $spec.
+spec=
+for combo in $options
+do
+  # See which option alternatives in $combo have their own sysroot
+  # directory.  Create a subspec of the form "%{PAT1:/DIR1;...;PATn:DIRn}"
+  # from each such option OPTi, where DIRi is the directory associated
+  # with OPTi and PATi is the result of passing OPTi through $substs.
+  subspec=
+  for option in `echo "$combo" | tr '/' ' '`
+  do
+    dir=`echo "$option" | sed 's/mcpu=//'`
+    if test -d "$sysroot/$dir"; then
+      test -z "$subspec" || subspec="$subspec;"
+      subspec="$subspec"`echo "$option" | eval sed $substs`":/$dir"
+    fi
+  done
+  # Concatenate all the subspecs.
+  test -z "$subspec" || spec="$spec%{$subspec}"
+done
+if test -n "$spec"; then
+  echo "#undef SYSROOT_SUFFIX_SPEC"
+  echo "#define SYSROOT_SUFFIX_SPEC \"$spec\""
+fi
diff --git a/gcc/config/bfin/rtems.h b/gcc/config/bfin/rtems.h
new file mode 100644
index 000000000..6fa6ef10e
--- /dev/null
+++ b/gcc/config/bfin/rtems.h
@@ -0,0 +1,28 @@
+/* Definitions for rtems targeting a bfin
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+   Contributed by Ralf Corsépius (ralf.corsepius@rtems.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS preprocessor built-ins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__rtems__");		\
+      builtin_assert ("system=rtems");		\
+    }						\
+  while (0)
diff --git a/gcc/config/bfin/sync.md b/gcc/config/bfin/sync.md
new file mode 100644
index 000000000..7025af497
--- /dev/null
+++ b/gcc/config/bfin/sync.md
@@ -0,0 +1,178 @@
+;; GCC machine description for Blackfin synchronization instructions.
+;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
+;; Contributed by Analog Devices.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_code_iterator FETCHOP [plus minus ior and xor])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "ior") (and "and") (xor "xor")])
+(define_code_attr fetchop_addr
+  [(plus "1072") (minus "1088") (ior "1104") (and "1120") (xor "1136")])
+
+(define_insn "sync_<fetchop_name>si_internal"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" "qA"))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 0))
+	     (match_operand:SI 1 "register_operand" "q0"))
+	   (match_operand:SI 2 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=q0"))
+   (clobber (match_scratch:SI 4 "=q1"))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%2);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "memory_operand" "+m")
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 0)
+			(match_operand:SI 1 "register_operand" "q0"))
+	    (match_dup 2)]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 3 ""))
+     (clobber (match_scratch:SI 4 ""))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[0], 0)))
+    {
+      operands[0] = shallow_copy_rtx (operands[0]);
+      XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0));
+    }
+  operands[2] = force_reg (Pmode, GEN_INT (<fetchop_addr>));
+})
+
+(define_insn "sync_old_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "register_operand" "=q1")
+	(mem:SI (match_operand:SI 1 "register_operand" "qA")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1))
+	     (match_operand:SI 2 "register_operand" "q0"))
+	   (match_operand:SI 3 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 4 "=q0"))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%3);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_old_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operand:SI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 1)
+			(match_operand:SI 2 "register_operand" ""))
+	    (match_dup 3)]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 4 ""))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      operands[1] = shallow_copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+    }
+  operands[3] = force_reg (Pmode, GEN_INT (<fetchop_addr>));
+})
+
+(define_insn "sync_new_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "register_operand" "=q0")
+	(unspec:SI
+	  [(FETCHOP:SI
+	    (mem:SI (match_operand:SI 1 "register_operand" "qA"))
+	    (match_operand:SI 2 "register_operand" "q0"))
+	   (match_operand:SI 3 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))
+	   (match_dup 3)]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 4 "=q1"))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%3);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_new_<fetchop_name>si"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec:SI
+	   [(FETCHOP:SI (match_operand:SI 1 "memory_operand" "")
+			(match_operand:SI 2 "register_operand" ""))
+	    (match_dup 3)]
+	   UNSPEC_ATOMIC))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(FETCHOP:SI (match_dup 1) (match_dup 2))
+	    (match_dup 3)]
+	   UNSPEC_ATOMIC))
+     (clobber (match_scratch:SI 4 ""))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      operands[1] = shallow_copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+    }
+  operands[3] = force_reg (Pmode, GEN_INT (<fetchop_addr>));
+})
+
+(define_insn "sync_compare_and_swapsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=q0")
+	(mem:SI (match_operand:SI 1 "register_operand" "qA")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(mem:SI (match_dup 1))
+	   (match_operand:SI 2 "register_operand" "q1")
+	   (match_operand:SI 3 "register_operand" "q2")
+	   (match_operand:SI 4 "register_no_elim_operand" "a")]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI REG_RETS))]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+  "call (%4);"
+  [(set_attr "type" "call")])
+
+(define_expand "sync_compare_and_swapsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operand:SI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec:SI
+	   [(match_dup 1)
+	    (match_operand:SI 2 "register_operand" "")
+	    (match_operand:SI 3 "register_operand" "")
+	    (match_dup 4)]
+	   UNSPEC_ATOMIC))
+     (clobber (reg:SI REG_RETS))])]
+  "TARGET_SUPPORTS_SYNC_CALLS"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      operands[1] = shallow_copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
+    }
+  operands[4] = force_reg (Pmode, GEN_INT (0x420));
+})
diff --git a/gcc/config/bfin/t-bfin b/gcc/config/bfin/t-bfin
new file mode 100644
index 000000000..37b6871c1
--- /dev/null
+++ b/gcc/config/bfin/t-bfin
@@ -0,0 +1,43 @@
+# Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+LIB1ASMSRC = bfin/lib1funcs.asm
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3
+
+EXTRA_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/bfin/crti.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/bfin/crti.s
+
+$(T)crtn.o: $(srcdir)/config/bfin/crtn.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/bfin/crtn.s
diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf
new file mode 100644
index 000000000..39209f628
--- /dev/null
+++ b/gcc/config/bfin/t-bfin-elf
@@ -0,0 +1,81 @@
+# Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+LIB1ASMSRC = bfin/lib1funcs.asm
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _umulsi3_highpart
+LIB1ASMFUNCS += _smulsi3_highpart
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+CRTSTUFF_T_CFLAGS = -fpic
+TARGET_LIBGCC2_CFLAGS = -fpic
+
+MULTILIB_OPTIONS=mcpu=bf532-none
+MULTILIB_OPTIONS+=mid-shared-library/msep-data/mfdpic mleaf-id-shared-library
+MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mfdpic mleaf-id-shared-library
+
+MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none
+
+MULTILIB_EXCEPTIONS=mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=*mfdpic/mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library*
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/bfin/crti.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/bfin/crti.s
+
+$(T)crtn.o: $(srcdir)/config/bfin/crtn.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/bfin/crtn.s
+
+$(T)crtlibid.o: $(srcdir)/config/bfin/crtlibid.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtlibid.o -x assembler-with-cpp \
+	$(srcdir)/config/bfin/crtlibid.s
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crti.o crtn.o crtlibid.o
diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux
new file mode 100644
index 000000000..f7ba95501
--- /dev/null
+++ b/gcc/config/bfin/t-bfin-linux
@@ -0,0 +1,72 @@
+# Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+LIB1ASMSRC = bfin/lib1funcs.asm
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _umulsi3_highpart
+LIB1ASMFUNCS += _smulsi3_highpart
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+CRTSTUFF_T_CFLAGS = -fpic
+TARGET_LIBGCC2_CFLAGS = -fpic
+
+MULTILIB_OPTIONS=mcpu=bf532-none
+MULTILIB_DIRNAMES=bf532-none
+
+MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none
+
+SHLIB_MAPFILES=$(srcdir)/config/bfin/libgcc-bfin.ver
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o
+
+# This rule uses MULTILIB_MATCHES to generate a definition of
+# SYSROOT_SUFFIX_SPEC.
+linux-sysroot-suffix.h: $(srcdir)/config/bfin/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/bfin/print-sysroot-suffix.sh \
+	  "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \
+	  "$(MULTILIB_OPTIONS)" > $@
+
+generated_files += linux-sysroot-suffix.h
diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux
new file mode 100644
index 000000000..eb6d2253e
--- /dev/null
+++ b/gcc/config/bfin/t-bfin-uclinux
@@ -0,0 +1,72 @@
+# Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+## Target part of the Makefile
+
+LIB1ASMSRC = bfin/lib1funcs.asm
+LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _umulsi3_highpart
+LIB1ASMFUNCS += _smulsi3_highpart
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+CRTSTUFF_T_CFLAGS = -fpic
+TARGET_LIBGCC2_CFLAGS = -fpic
+
+MULTILIB_OPTIONS=mcpu=bf532-none
+MULTILIB_OPTIONS+=mid-shared-library/msep-data mleaf-id-shared-library
+MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mleaf-id-shared-library
+
+MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none
+MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none
+
+MULTILIB_EXCEPTIONS=mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library*
+MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library*
+
+# Assemble startup files.
+$(T)crtlibid.o: $(srcdir)/config/bfin/crtlibid.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtlibid.o -x assembler-with-cpp \
+	$(srcdir)/config/bfin/crtlibid.s
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtlibid.o
diff --git a/gcc/config/bfin/t-rtems b/gcc/config/bfin/t-rtems
new file mode 100644
index 000000000..728ab1c4f
--- /dev/null
+++ b/gcc/config/bfin/t-rtems
@@ -0,0 +1,6 @@
+# Multilibs for fbin RTEMS targets.
+
+MULTILIB_OPTIONS	=
+MULTILIB_DIRNAMES	=
+MULTILIB_EXTRA_OPTS	=
+MULTILIB_EXCEPTIONS 	=
diff --git a/gcc/config/bfin/uclinux.h b/gcc/config/bfin/uclinux.h
new file mode 100644
index 000000000..6001b2364
--- /dev/null
+++ b/gcc/config/bfin/uclinux.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1%O%s} crti%O%s crtbegin%O%s crtlibid%O%s"
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#define MD_UNWIND_SUPPORT "config/bfin/linux-unwind.h"
+
+/* Like the definition in gcc.c, but for purposes of uClinux, every link is
+   static.  */
+#define MFWRAP_SPEC " %{fmudflap|fmudflapth: \
+ --wrap=malloc --wrap=free --wrap=calloc --wrap=realloc\
+ --wrap=mmap --wrap=munmap --wrap=alloca\
+ %{fmudflapth: --wrap=pthread_create\
+}} %{fmudflap|fmudflapth: --wrap=main}"
+
+#undef TARGET_SUPPORTS_SYNC_CALLS
+#define TARGET_SUPPORTS_SYNC_CALLS 1
diff --git a/gcc/config/cris/arit.c b/gcc/config/cris/arit.c
new file mode 100644
index 000000000..32255f99d
--- /dev/null
+++ b/gcc/config/cris/arit.c
@@ -0,0 +1,304 @@
+/* Signed and unsigned multiplication and division and modulus for CRIS.
+   Contributed by Axis Communications.
+   Written by Hans-Peter Nilsson <hp@axis.se>, c:a 1992.
+
+   Copyright (C) 1998, 1999, 2000, 2001, 2002,
+   2005, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Note that we provide prototypes for all "const" functions, to attach
+   the const attribute.  This is necessary in 2.7.2 - adding the
+   attribute to the function *definition* is a syntax error.
+    This did not work with e.g. 2.1; back then, the return type had to
+   be "const".  */
+
+#include "config.h"
+
+#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 3
+#define LZ(v) __builtin_clz (v)
+#endif
+
+
+#if defined (L_udivsi3) || defined (L_divsi3) || defined (L_umodsi3) \
+    || defined (L_modsi3)
+/* Result type of divmod worker function.  */
+struct quot_rem
+ {
+   long quot;
+   long rem;
+ };
+
+/* This is the worker function for div and mod.  It is inlined into the
+   respective library function.  Parameter A must have bit 31 == 0.  */
+
+static __inline__ struct quot_rem
+do_31div (unsigned long a, unsigned long b)
+     __attribute__ ((__const__, __always_inline__));
+
+static __inline__ struct quot_rem
+do_31div (unsigned long a, unsigned long b)
+{
+  /* Adjust operands and result if a is 31 bits.  */
+  long extra = 0;
+  int quot_digits = 0;
+
+  if (b == 0)
+    {
+      struct quot_rem ret;
+      ret.quot = 0xffffffff;
+      ret.rem = 0xffffffff;
+      return ret;
+    }
+
+  if (a < b)
+    return (struct quot_rem) { 0, a };
+
+#ifdef LZ
+  if (b <= a)
+    {
+      quot_digits = LZ (b) - LZ (a);
+      quot_digits += (a >= (b << quot_digits));
+      b <<= quot_digits;
+    }
+#else
+  while (b <= a)
+    {
+      b <<= 1;
+      quot_digits++;
+    }
+#endif
+
+  /* Is a 31 bits?  Note that bit 31 is handled by the caller.  */
+  if (a & 0x40000000)
+    {
+      /* Then make b:s highest bit max 0x40000000, because it must have
+	 been 0x80000000 to be 1 bit higher than a.  */
+      b >>= 1;
+
+      /* Adjust a to be maximum 0x3fffffff, i.e. two upper bits zero.  */
+      if (a >= b)
+	{
+	  a -= b;
+	  extra = 1 << (quot_digits - 1);
+	}
+      else
+	{
+	  a -= b >> 1;
+
+	  /* Remember that we adjusted a by subtracting b * 2 ** Something.  */
+	  extra = 1 << quot_digits;
+	}
+
+      /* The number of quotient digits will be one less, because
+	 we just adjusted b.  */
+      quot_digits--;
+    }
+
+  /* Now do the division part.  */
+
+  /* Subtract b and add ones to the right when a >= b
+     i.e. "a - (b - 1) == (a - b) + 1".  */
+  b--;
+
+#define DS __asm__ ("dstep %2,%0" : "=r" (a) : "0" (a), "r" (b))
+
+  switch (quot_digits)
+    {
+    case 32: DS; case 31: DS; case 30: DS; case 29: DS;
+    case 28: DS; case 27: DS; case 26: DS; case 25: DS;
+    case 24: DS; case 23: DS; case 22: DS; case 21: DS;
+    case 20: DS; case 19: DS; case 18: DS; case 17: DS;
+    case 16: DS; case 15: DS; case 14: DS; case 13: DS;
+    case 12: DS; case 11: DS; case 10: DS; case 9: DS;
+    case 8: DS; case 7: DS; case 6: DS; case 5: DS;
+    case 4: DS; case 3: DS; case 2: DS; case 1: DS;
+    case 0:;
+    }
+
+  {
+    struct quot_rem ret;
+    ret.quot = (a & ((1 << quot_digits) - 1)) + extra;
+    ret.rem = a >> quot_digits;
+    return ret;
+  }
+}
+
+#ifdef L_udivsi3
+unsigned long
+__Udiv (unsigned long a, unsigned long b) __attribute__ ((__const__));
+
+unsigned long
+__Udiv (unsigned long a, unsigned long b)
+{
+  long extra = 0;
+
+  /* Adjust operands and result, if a and/or b is 32 bits.  */
+  /* Effectively: b & 0x80000000.  */
+  if ((long) b < 0)
+    return a >= b;
+
+  /* Effectively: a & 0x80000000.  */
+  if ((long) a < 0)
+    {
+      int tmp = 0;
+
+      if (b == 0)
+	return 0xffffffff;
+#ifdef LZ
+      tmp = LZ (b);
+#else
+      for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
+	;
+
+      tmp = 31 - tmp;
+#endif
+
+      if ((b << tmp) > a)
+	{
+	  extra = 1 << (tmp-1);
+	  a -= b << (tmp - 1);
+	}
+      else
+	{
+	  extra = 1 << tmp;
+	  a -= b << tmp;
+	}
+    }
+
+  return do_31div (a, b).quot+extra;
+}
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+long
+__Div (long a, long b) __attribute__ ((__const__));
+
+long
+__Div (long a, long b)
+{
+  long extra = 0;
+  long sign = (b < 0) ? -1 : 1;
+
+  /* We need to handle a == -2147483648 as expected and must while
+     doing that avoid producing a sequence like "abs (a) < 0" as GCC
+     may optimize out the test.  That sequence may not be obvious as
+     we call inline functions.  Testing for a being negative and
+     handling (presumably much rarer than positive) enables us to get
+     a bit of optimization for an (accumulated) reduction of the
+     penalty of the 0x80000000 special-case.  */
+  if (a < 0)
+    {
+      sign = -sign;
+
+      if ((a & 0x7fffffff) == 0)
+	{
+	  /* We're at 0x80000000.  Tread carefully.  */
+	  a -= b * sign;
+	  extra = sign;
+	}
+      a = -a;
+    }
+
+  /* We knowingly penalize pre-v10 models by multiplication with the
+     sign.  */
+  return sign * do_31div (a, __builtin_labs (b)).quot + extra;
+}
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+unsigned long
+__Umod (unsigned long a, unsigned long b) __attribute__ ((__const__));
+
+unsigned long
+__Umod (unsigned long a, unsigned long b)
+{
+  /* Adjust operands and result if a and/or b is 32 bits.  */
+  if ((long) b < 0)
+    return a >= b ? a - b : a;
+
+  if ((long) a < 0)
+    {
+      int tmp = 0;
+
+      if (b == 0)
+	return a;
+#ifdef LZ
+      tmp = LZ (b);
+#else
+      for (tmp = 31; (((long) b & (1 << tmp)) == 0); tmp--)
+	;
+      tmp = 31 - tmp;
+#endif
+
+      if ((b << tmp) > a)
+	{
+	  a -= b << (tmp - 1);
+	}
+      else
+	{
+	  a -= b << tmp;
+	}
+    }
+
+  return do_31div (a, b).rem;
+}
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+long
+__Mod (long a, long b) __attribute__ ((__const__));
+
+long
+__Mod (long a, long b)
+{
+  long sign = 1;
+
+  /* We need to handle a == -2147483648 as expected and must while
+     doing that avoid producing a sequence like "abs (a) < 0" as GCC
+     may optimize out the test.  That sequence may not be obvious as
+     we call inline functions.  Testing for a being negative and
+     handling (presumably much rarer than positive) enables us to get
+     a bit of optimization for an (accumulated) reduction of the
+     penalty of the 0x80000000 special-case.  */
+  if (a < 0)
+    {
+      sign = -1;
+      if ((a & 0x7fffffff) == 0)
+	/* We're at 0x80000000.  Tread carefully.  */
+	a += __builtin_labs (b);
+      a = -a;
+    }
+
+  return sign * do_31div (a, __builtin_labs (b)).rem;
+}
+#endif /* L_modsi3 */
+#endif /* L_udivsi3 || L_divsi3 || L_umodsi3 || L_modsi3 */
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
new file mode 100644
index 000000000..9718cf9a9
--- /dev/null
+++ b/gcc/config/cris/cris-protos.h
@@ -0,0 +1,68 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 1998, 1999, 2000, 2001, 2004, 2005, 2006, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Axis Communications.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Prototypes for the CRIS port.  */
+
+#if defined(FILE) || defined(stdin) || defined(stdout) || defined(getc) || defined(putc)
+#define STDIO_INCLUDED
+#endif
+
+extern bool cris_simple_epilogue (void);
+#ifdef RTX_CODE
+extern const char *cris_op_str (rtx);
+extern void cris_notice_update_cc (rtx, rtx);
+extern bool cris_reload_address_legitimized (rtx, enum machine_mode, int, int, int);
+extern int cris_side_effect_mode_ok (enum rtx_code, rtx *, int, int,
+                                     int, int, int);
+extern bool cris_cc0_user_requires_cmp (rtx);
+extern rtx cris_return_addr_rtx (int, rtx);
+extern rtx cris_split_movdx (rtx *);
+extern int cris_legitimate_pic_operand (rtx);
+extern enum cris_pic_symbol_type cris_pic_symbol_type_of (rtx);
+extern bool cris_valid_pic_const (rtx, bool);
+extern bool cris_store_multiple_op_p (rtx);
+extern bool cris_movem_load_rest_p (rtx, int);
+extern void cris_asm_output_symbol_ref (FILE *, rtx);
+extern bool cris_output_addr_const_extra (FILE *, rtx);
+extern int cris_cfun_uses_pic_table (void);
+extern void cris_asm_output_case_end (FILE *, int, rtx);
+extern rtx cris_gen_movem_load (rtx, rtx, int);
+extern rtx cris_emit_movem_store (rtx, rtx, int, bool);
+extern void cris_expand_pic_call_address (rtx *);
+extern void cris_order_for_addsi3 (rtx *, int);
+#endif /* RTX_CODE */
+extern void cris_asm_output_label_ref (FILE *, char *);
+extern void cris_target_asm_named_section (const char *, unsigned int, tree);
+extern void cris_expand_prologue (void);
+extern void cris_expand_epilogue (void);
+extern void cris_expand_return (bool);
+extern bool cris_return_address_on_stack_for_return (void);
+extern bool cris_return_address_on_stack (void);
+extern void cris_pragma_expand_mul (struct cpp_reader *);
+
+/* Need one that returns an int; usable in expressions.  */
+extern int cris_fatal (char *);
+
+extern int cris_initial_elimination_offset (int, int);
+
+extern void cris_init_expanders (void);
+
+extern bool cris_function_value_regno_p (const unsigned int);
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
new file mode 100644
index 000000000..aec7cba63
--- /dev/null
+++ b/gcc/config/cris/cris.c
@@ -0,0 +1,4132 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+   2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "expr.h"
+#include "except.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "output.h"
+#include "target.h"
+#include "target-def.h"
+#include "ggc.h"
+#include "optabs.h"
+#include "df.h"
+
+/* Usable when we have an amount to add or subtract, and want the
+   optimal size of the insn.  */
+#define ADDITIVE_SIZE_MODIFIER(size) \
+ ((size) <= 63 ? "q" : (size) <= 255 ? "u.b" : (size) <= 65535 ? "u.w" : ".d")
+
+#define LOSE_AND_RETURN(msgid, x)			\
+  do						\
+    {						\
+      cris_operand_lossage (msgid, x);		\
+      return;					\
+    } while (0)
+
+enum cris_retinsn_type
+ { CRIS_RETINSN_UNKNOWN = 0, CRIS_RETINSN_RET, CRIS_RETINSN_JUMP };
+
+/* Per-function machine data.  */
+struct GTY(()) machine_function
+ {
+   int needs_return_address_on_stack;
+
+   /* This is the number of registers we save in the prologue due to
+      stdarg.  */
+   int stdarg_regs;
+
+   enum cris_retinsn_type return_type;
+ };
+
+/* This little fix suppresses the 'u' or 's' when '%e' in assembly
+   pattern.  */
+static char cris_output_insn_is_bound = 0;
+
+/* In code for output macros, this is how we know whether e.g. constant
+   goes in code or in a static initializer.  */
+static int in_code = 0;
+
+/* Fix for reg_overlap_mentioned_p.  */
+static int cris_reg_overlap_mentioned_p (rtx, rtx);
+
+static enum machine_mode cris_promote_function_mode (const_tree, enum machine_mode,
+						     int *, const_tree, int);
+
+static void cris_print_base (rtx, FILE *);
+
+static void cris_print_index (rtx, FILE *);
+
+static void cris_output_addr_const (FILE *, rtx);
+
+static struct machine_function * cris_init_machine_status (void);
+
+static rtx cris_struct_value_rtx (tree, int);
+
+static void cris_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					 tree type, int *, int);
+
+static int cris_initial_frame_pointer_offset (void);
+
+static void cris_operand_lossage (const char *, rtx);
+
+static int cris_reg_saved_in_regsave_area  (unsigned int, bool);
+
+static void cris_print_operand (FILE *, rtx, int);
+
+static void cris_print_operand_address (FILE *, rtx);
+
+static bool cris_print_operand_punct_valid_p (unsigned char code);
+
+static void cris_conditional_register_usage (void);
+
+static void cris_asm_output_mi_thunk
+  (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+
+static void cris_file_start (void);
+static void cris_init_libfuncs (void);
+
+static int cris_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int cris_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static bool cris_rtx_costs (rtx, int, int, int *, bool);
+static int cris_address_cost (rtx, bool);
+static bool cris_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				    const_tree, bool);
+static int cris_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				   tree, bool);
+static rtx cris_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static rtx cris_function_incoming_arg (CUMULATIVE_ARGS *,
+				       enum machine_mode, const_tree, bool);
+static void cris_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static tree cris_md_asm_clobbers (tree, tree, tree);
+
+static bool cris_handle_option (size_t, const char *, int);
+static void cris_option_override (void);
+
+static bool cris_frame_pointer_required (void);
+
+static void cris_asm_trampoline_template (FILE *);
+static void cris_trampoline_init (rtx, tree, rtx);
+
+static rtx cris_function_value(const_tree, const_tree, bool);
+static rtx cris_libcall_value (enum machine_mode, const_rtx);
+
+/* This is the parsed result of the "-max-stack-stackframe=" option.  If
+   it (still) is zero, then there was no such option given.  */
+int cris_max_stackframe = 0;
+
+/* This is the parsed result of the "-march=" option, if given.  */
+int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options cris_option_optimization_table[] =
+  {
+    { OPT_LEVELS_2_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.dword\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+/* We need to define these, since the 2byte, 4byte, 8byte op:s are only
+   available in ELF.  These "normal" pseudos do not have any alignment
+   constraints or side-effects.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND cris_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS cris_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P cris_print_operand_punct_valid_p
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE cris_conditional_register_usage
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK cris_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START cris_file_start
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS cris_init_libfuncs
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST cris_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST cris_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS cris_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST cris_address_cost
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE cris_promote_function_mode
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX cris_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS cris_setup_incoming_varargs
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE cris_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES cris_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG cris_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG cris_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE cris_function_arg_advance
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS cris_md_asm_clobbers
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | CRIS_SUBTARGET_DEFAULT)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION cris_handle_option
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED cris_frame_pointer_required
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE cris_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE cris_option_optimization_table
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE cris_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT cris_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE cris_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE cris_libcall_value
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Helper for cris_load_multiple_op and cris_ret_movem_op.  */
+
+bool
+cris_movem_load_rest_p (rtx op, int offs)
+{
+  unsigned int reg_count = XVECLEN (op, 0) - offs;
+  rtx src_addr;
+  int i;
+  rtx elt;
+  int setno;
+  int regno_dir = 1;
+  unsigned int regno = 0;
+
+  /* Perform a quick check so we don't blow up below.  FIXME: Adjust for
+     other than (MEM reg).  */
+  if (reg_count <= 1
+      || GET_CODE (XVECEXP (op, 0, offs)) != SET
+      || !REG_P (SET_DEST (XVECEXP (op, 0, offs)))
+      || !MEM_P (SET_SRC (XVECEXP (op, 0, offs))))
+    return false;
+
+  /* Check a possible post-inc indicator.  */
+  if (GET_CODE (SET_SRC (XVECEXP (op, 0, offs + 1))) == PLUS)
+    {
+      rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 0);
+      rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 1);
+
+      reg_count--;
+
+      if (reg_count == 1
+	  || !REG_P (reg)
+	  || !REG_P (SET_DEST (XVECEXP (op, 0, offs + 1)))
+	  || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, offs + 1)))
+	  || !CONST_INT_P (inc)
+	  || INTVAL (inc) != (HOST_WIDE_INT) reg_count * 4)
+	return false;
+      i = offs + 2;
+    }
+  else
+    i = offs + 1;
+
+  if (!TARGET_V32)
+    {
+      regno_dir = -1;
+      regno = reg_count - 1;
+    }
+
+  elt = XVECEXP (op, 0, offs);
+  src_addr = XEXP (SET_SRC (elt), 0);
+
+  if (GET_CODE (elt) != SET
+      || !REG_P (SET_DEST (elt))
+      || GET_MODE (SET_DEST (elt)) != SImode
+      || REGNO (SET_DEST (elt)) != regno
+      || !MEM_P (SET_SRC (elt))
+      || GET_MODE (SET_SRC (elt)) != SImode
+      || !memory_address_p (SImode, src_addr))
+    return false;
+
+  for (setno = 1; i < XVECLEN (op, 0); setno++, i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      regno += regno_dir;
+
+      if (GET_CODE (elt) != SET
+	  || !REG_P (SET_DEST (elt))
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt)) != regno
+	  || !MEM_P (SET_SRC (elt))
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != setno * 4)
+	return false;
+    }
+
+  return true;
+}
+
+/* Worker function for predicate for the parallel contents in a movem
+   to-memory.  */
+
+bool
+cris_store_multiple_op_p (rtx op)
+{
+  int reg_count = XVECLEN (op, 0);
+  rtx dest;
+  rtx dest_addr;
+  rtx dest_base;
+  int i;
+  rtx elt;
+  int setno;
+  int regno_dir = 1;
+  int regno = 0;
+  int offset = 0;
+
+  /* Perform a quick check so we don't blow up below.  FIXME: Adjust for
+     other than (MEM reg) and (MEM (PLUS reg const)).  */
+  if (reg_count <= 1)
+    return false;
+
+  elt = XVECEXP (op, 0, 0);
+
+  if (GET_CODE (elt) != SET)
+    return  false;
+
+  dest = SET_DEST (elt);
+
+  if (!REG_P (SET_SRC (elt)) || !MEM_P (dest))
+    return false;
+
+  dest_addr = XEXP (dest, 0);
+
+  /* Check a possible post-inc indicator.  */
+  if (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS)
+    {
+      rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 0);
+      rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1);
+
+      reg_count--;
+
+      if (reg_count == 1
+	  || !REG_P (reg)
+	  || !REG_P (SET_DEST (XVECEXP (op, 0, 1)))
+	  || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, 1)))
+	  || !CONST_INT_P (inc)
+	  /* Support increment by number of registers, and by the offset
+	     of the destination, if it has the form (MEM (PLUS reg
+	     offset)).  */
+	  || !((REG_P (dest_addr)
+		&& REGNO (dest_addr) == REGNO (reg)
+		&& INTVAL (inc) == (HOST_WIDE_INT) reg_count * 4)
+	       || (GET_CODE (dest_addr) == PLUS
+		   && REG_P (XEXP (dest_addr, 0))
+		   && REGNO (XEXP (dest_addr, 0)) == REGNO (reg)
+		   && CONST_INT_P (XEXP (dest_addr, 1))
+		   && INTVAL (XEXP (dest_addr, 1)) == INTVAL (inc))))
+	return false;
+
+      i = 2;
+    }
+  else
+    i = 1;
+
+  if (!TARGET_V32)
+    {
+      regno_dir = -1;
+      regno = reg_count - 1;
+    }
+
+  if (GET_CODE (elt) != SET
+      || !REG_P (SET_SRC (elt))
+      || GET_MODE (SET_SRC (elt)) != SImode
+      || REGNO (SET_SRC (elt)) != (unsigned int) regno
+      || !MEM_P (SET_DEST (elt))
+      || GET_MODE (SET_DEST (elt)) != SImode)
+    return false;
+
+  if (REG_P (dest_addr))
+    {
+      dest_base = dest_addr;
+      offset = 0;
+    }
+  else if (GET_CODE (dest_addr) == PLUS
+	   && REG_P (XEXP (dest_addr, 0))
+	   && CONST_INT_P (XEXP (dest_addr, 1)))
+    {
+      dest_base = XEXP (dest_addr, 0);
+      offset = INTVAL (XEXP (dest_addr, 1));
+    }
+  else
+    return false;
+
+  for (setno = 1; i < XVECLEN (op, 0); setno++, i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      regno += regno_dir;
+
+      if (GET_CODE (elt) != SET
+	  || !REG_P (SET_SRC (elt))
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != (unsigned int) regno
+	  || !MEM_P (SET_DEST (elt))
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_base)
+	  || !CONST_INT_P (XEXP (XEXP (SET_DEST (elt), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != setno * 4 + offset)
+	return false;
+    }
+
+  return true;
+}
+
+/* The TARGET_CONDITIONAL_REGISTER_USAGE worker.  */
+
+static void
+cris_conditional_register_usage (void)
+{
+  /* FIXME: This isn't nice.  We should be able to use that register for
+     something else if the PIC table isn't needed.  */
+  if (flag_pic)
+    fixed_regs[PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  /* Allow use of ACR (PC in pre-V32) and tweak order.  */
+  if (TARGET_V32)
+    {
+      static const int reg_alloc_order_v32[] = REG_ALLOC_ORDER_V32;
+      unsigned int i;
+
+      fixed_regs[CRIS_ACR_REGNUM] = 0;
+
+      for (i = 0;
+          i < sizeof (reg_alloc_order_v32)/sizeof (reg_alloc_order_v32[0]);
+          i++)
+       reg_alloc_order[i] = reg_alloc_order_v32[i];
+    }
+
+  if (TARGET_HAS_MUL_INSNS)
+    fixed_regs[CRIS_MOF_REGNUM] = 0;
+
+  /* On early versions, we must use the 16-bit condition-code register,
+     which has another name.  */
+  if (cris_cpu_version < 8)
+    reg_names[CRIS_CC0_REGNUM] = "ccr";
+}
+
+/* Return crtl->uses_pic_offset_table.  For use in cris.md,
+   since some generated files do not include function.h.  */
+
+int
+cris_cfun_uses_pic_table (void)
+{
+  return crtl->uses_pic_offset_table;
+}
+
+/* Given an rtx, return the text string corresponding to the CODE of X.
+   Intended for use in the assembly language output section of a
+   define_insn.  */
+
+const char *
+cris_op_str (rtx x)
+{
+  cris_output_insn_is_bound = 0;
+  switch (GET_CODE (x))
+    {
+    case PLUS:
+      return "add";
+      break;
+
+    case MINUS:
+      return "sub";
+      break;
+
+    case MULT:
+      /* This function is for retrieving a part of an instruction name for
+	 an operator, for immediate output.  If that ever happens for
+	 MULT, we need to apply TARGET_MUL_BUG in the caller.  Make sure
+	 we notice.  */
+      internal_error ("MULT case in cris_op_str");
+      break;
+
+    case DIV:
+      return "div";
+      break;
+
+    case AND:
+      return "and";
+      break;
+
+    case IOR:
+      return "or";
+      break;
+
+    case XOR:
+      return "xor";
+      break;
+
+    case NOT:
+      return "not";
+      break;
+
+    case ASHIFT:
+      return "lsl";
+      break;
+
+    case LSHIFTRT:
+      return "lsr";
+      break;
+
+    case ASHIFTRT:
+      return "asr";
+      break;
+
+    case UMIN:
+      /* Used to control the sign/zero-extend character for the 'E' modifier.
+	 BOUND has none.  */
+      cris_output_insn_is_bound = 1;
+      return "bound";
+      break;
+
+    default:
+      return "Unknown operator";
+      break;
+  }
+}
+
+/* Emit an error message when we're in an asm, and a fatal error for
+   "normal" insns.  Formatted output isn't easily implemented, since we
+   use output_operand_lossage to output the actual message and handle the
+   categorization of the error.  */
+
+static void
+cris_operand_lossage (const char *msgid, rtx op)
+{
+  debug_rtx (op);
+  output_operand_lossage ("%s", msgid);
+}
+
+/* Print an index part of an address to file.  */
+
+static void
+cris_print_index (rtx index, FILE *file)
+{
+  /* Make the index "additive" unless we'll output a negative number, in
+     which case the sign character is free (as in free beer).  */
+  if (!CONST_INT_P (index) || INTVAL (index) >= 0)
+    putc ('+', file);
+
+  if (REG_P (index))
+    fprintf (file, "$%s.b", reg_names[REGNO (index)]);
+  else if (CONSTANT_P (index))
+    cris_output_addr_const (file, index);
+  else if (GET_CODE (index) == MULT)
+    {
+      fprintf (file, "$%s.",
+	       reg_names[REGNO (XEXP (index, 0))]);
+
+      putc (INTVAL (XEXP (index, 1)) == 2 ? 'w' : 'd', file);
+    }
+  else if (GET_CODE (index) == SIGN_EXTEND && MEM_P (XEXP (index, 0)))
+    {
+      rtx inner = XEXP (index, 0);
+      rtx inner_inner = XEXP (inner, 0);
+
+      if (GET_CODE (inner_inner) == POST_INC)
+	{
+	  fprintf (file, "[$%s+].",
+		   reg_names[REGNO (XEXP (inner_inner, 0))]);
+	  putc (GET_MODE (inner) == HImode ? 'w' : 'b', file);
+	}
+      else
+	{
+	  fprintf (file, "[$%s].", reg_names[REGNO (inner_inner)]);
+
+	  putc (GET_MODE (inner) == HImode ? 'w' : 'b', file);
+	}
+    }
+  else if (MEM_P (index))
+    {
+      rtx inner = XEXP (index, 0);
+      if (GET_CODE (inner) == POST_INC)
+	fprintf (file, "[$%s+].d", reg_names[REGNO (XEXP (inner, 0))]);
+      else
+	fprintf (file, "[$%s].d", reg_names[REGNO (inner)]);
+    }
+  else
+    cris_operand_lossage ("unexpected index-type in cris_print_index",
+			  index);
+}
+
+/* Print a base rtx of an address to file.  */
+
+static void
+cris_print_base (rtx base, FILE *file)
+{
+  if (REG_P (base))
+    fprintf (file, "$%s", reg_names[REGNO (base)]);
+  else if (GET_CODE (base) == POST_INC)
+    {
+      gcc_assert (REGNO (XEXP (base, 0)) != CRIS_ACR_REGNUM);
+      fprintf (file, "$%s+", reg_names[REGNO (XEXP (base, 0))]);
+    }
+  else
+    cris_operand_lossage ("unexpected base-type in cris_print_base",
+			  base);
+}
+
+/* Usable as a guard in expressions.  */
+
+int
+cris_fatal (char *arg)
+{
+  internal_error (arg);
+
+  /* We'll never get here; this is just to appease compilers.  */
+  return 0;
+}
+
+/* Return nonzero if REGNO is an ordinary register that *needs* to be
+   saved together with other registers, possibly by a MOVEM instruction,
+   or is saved for target-independent reasons.  There may be
+   target-dependent reasons to save the register anyway; this is just a
+   wrapper for a complicated conditional.  */
+
+static int
+cris_reg_saved_in_regsave_area (unsigned int regno, bool got_really_used)
+{
+  return
+    (((df_regs_ever_live_p (regno)
+       && !call_used_regs[regno])
+      || (regno == PIC_OFFSET_TABLE_REGNUM
+	  && (got_really_used
+	      /* It is saved anyway, if there would be a gap.  */
+	      || (flag_pic
+		  && df_regs_ever_live_p (regno + 1)
+		  && !call_used_regs[regno + 1]))))
+     && (regno != FRAME_POINTER_REGNUM || !frame_pointer_needed)
+     && regno != CRIS_SRP_REGNUM)
+    || (crtl->calls_eh_return
+	&& (regno == EH_RETURN_DATA_REGNO (0)
+	    || regno == EH_RETURN_DATA_REGNO (1)
+	    || regno == EH_RETURN_DATA_REGNO (2)
+	    || regno == EH_RETURN_DATA_REGNO (3)));
+}
+
+/* The PRINT_OPERAND worker.  */
+
+static void
+cris_print_operand (FILE *file, rtx x, int code)
+{
+  rtx operand = x;
+
+  /* Size-strings corresponding to MULT expressions.  */
+  static const char *const mults[] = { "BAD:0", ".b", ".w", "BAD:3", ".d" };
+
+  /* New code entries should just be added to the switch below.  If
+     handling is finished, just return.  If handling was just a
+     modification of the operand, the modified operand should be put in
+     "operand", and then do a break to let default handling
+     (zero-modifier) output the operand.  */
+
+  switch (code)
+    {
+    case 'b':
+      /* Print the unsigned supplied integer as if it were signed
+	 and < 0, i.e print 255 or 65535 as -1, 254, 65534 as -2, etc.  */
+      if (!CONST_INT_P (x)
+	  || !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (x), 'O'))
+	LOSE_AND_RETURN ("invalid operand for 'b' modifier", x);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	       INTVAL (x)| (INTVAL (x) <= 255 ? ~255 : ~65535));
+      return;
+
+    case 'x':
+      /* Print assembler code for operator.  */
+      fprintf (file, "%s", cris_op_str (operand));
+      return;
+
+    case 'o':
+      {
+	/* A movem modifier working on a parallel; output the register
+	   name.  */
+	int regno;
+
+	if (GET_CODE (x) != PARALLEL)
+	  LOSE_AND_RETURN ("invalid operand for 'o' modifier", x);
+
+	/* The second item can be (set reg (plus reg const)) to denote a
+	   postincrement.  */
+	regno
+	  = (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS
+	     ? XVECLEN (x, 0) - 2
+	     : XVECLEN (x, 0) - 1);
+
+	fprintf (file, "$%s", reg_names [regno]);
+      }
+      return;
+
+    case 'O':
+      {
+	/* A similar movem modifier; output the memory operand.  */
+	rtx addr;
+
+	if (GET_CODE (x) != PARALLEL)
+	  LOSE_AND_RETURN ("invalid operand for 'O' modifier", x);
+
+	/* The lowest mem operand is in the first item, but perhaps it
+	   needs to be output as postincremented.  */
+	addr = MEM_P (SET_SRC (XVECEXP (x, 0, 0)))
+	  ? XEXP (SET_SRC (XVECEXP (x, 0, 0)), 0)
+	  : XEXP (SET_DEST (XVECEXP (x, 0, 0)), 0);
+
+	/* The second item can be a (set reg (plus reg const)) to denote
+	   a modification.  */
+	if (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS)
+	  {
+	    /* It's a post-increment, if the address is a naked (reg).  */
+	    if (REG_P (addr))
+	      addr = gen_rtx_POST_INC (SImode, addr);
+	    else
+	      {
+		/* Otherwise, it's a side-effect; RN=RN+M.  */
+		fprintf (file, "[$%s=$%s%s%d]",
+			 reg_names [REGNO (SET_DEST (XVECEXP (x, 0, 1)))],
+			 reg_names [REGNO (XEXP (addr, 0))],
+			 INTVAL (XEXP (addr, 1)) < 0 ? "" : "+",
+			 (int) INTVAL (XEXP (addr, 1)));
+		return;
+	      }
+	  }
+	output_address (addr);
+      }
+      return;
+
+    case 'p':
+      /* Adjust a power of two to its log2.  */
+      if (!CONST_INT_P (x) || exact_log2 (INTVAL (x)) < 0 )
+	LOSE_AND_RETURN ("invalid operand for 'p' modifier", x);
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      return;
+
+    case 's':
+      /* For an integer, print 'b' or 'w' if <= 255 or <= 65535
+	 respectively.  This modifier also terminates the inhibiting
+         effects of the 'x' modifier.  */
+      cris_output_insn_is_bound = 0;
+      if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
+	{
+	  if (INTVAL (x) >= 0)
+	    {
+	      if (INTVAL (x) <= 255)
+		putc ('b', file);
+	      else if (INTVAL (x) <= 65535)
+		putc ('w', file);
+	      else
+		putc ('d', file);
+	    }
+	  else
+	    putc ('d', file);
+	  return;
+	}
+
+      /* For a non-integer, print the size of the operand.  */
+      putc ((GET_MODE (x) == SImode || GET_MODE (x) == SFmode)
+	    ? 'd' : GET_MODE (x) == HImode ? 'w'
+	    : GET_MODE (x) == QImode ? 'b'
+	    /* If none of the above, emit an erroneous size letter.  */
+	    : 'X',
+	    file);
+      return;
+
+    case 'z':
+      /* Const_int: print b for -127 <= x <= 255,
+	 w for -32768 <= x <= 65535, else die.  */
+      if (!CONST_INT_P (x)
+	  || INTVAL (x) < -32768 || INTVAL (x) > 65535)
+	LOSE_AND_RETURN ("invalid operand for 'z' modifier", x);
+      putc (INTVAL (x) >= -128 && INTVAL (x) <= 255 ? 'b' : 'w', file);
+      return;
+
+    case 'Z':
+      /* If this is a GOT-symbol, print the size-letter corresponding to
+	 -fpic/-fPIC.  For everything else, print "d".  */
+      putc ((flag_pic == 1
+	     && GET_CODE (x) == CONST
+	     && GET_CODE (XEXP (x, 0)) == UNSPEC
+	     && XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREAD)
+	    ? 'w' : 'd', file);
+      return;
+
+    case '#':
+      /* Output a 'nop' if there's nothing for the delay slot.
+	 This method stolen from the sparc files.  */
+      if (dbr_sequence_length () == 0)
+	fputs ("\n\tnop", file);
+      return;
+
+    case '!':
+      /* Output directive for alignment padded with "nop" insns.
+	 Optimizing for size, it's plain 4-byte alignment, otherwise we
+	 align the section to a cache-line (32 bytes) and skip at max 2
+	 bytes, i.e. we skip if it's the last insn on a cache-line.  The
+	 latter is faster by a small amount (for two test-programs 99.6%
+	 and 99.9%) and larger by a small amount (ditto 100.1% and
+	 100.2%).  This is supposed to be the simplest yet performance-
+	 wise least intrusive way to make sure the immediately following
+	 (supposed) muls/mulu insn isn't located at the end of a
+	 cache-line.  */
+      if (TARGET_MUL_BUG)
+	fputs (optimize_size
+	       ? ".p2alignw 2,0x050f\n\t"
+	       : ".p2alignw 5,0x050f,2\n\t", file);
+      return;
+
+    case ':':
+      /* The PIC register.  */
+      if (! flag_pic)
+	internal_error ("invalid use of ':' modifier");
+      fprintf (file, "$%s", reg_names [PIC_OFFSET_TABLE_REGNUM]);
+      return;
+
+    case 'H':
+      /* Print high (most significant) part of something.  */
+      switch (GET_CODE (operand))
+	{
+	case CONST_INT:
+	  /* If we're having 64-bit HOST_WIDE_INTs, the whole (DImode)
+	     value is kept here, and so may be other than 0 or -1.  */
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		   INTVAL (operand_subword (operand, 1, 0, DImode)));
+	  return;
+
+	case CONST_DOUBLE:
+	  /* High part of a long long constant.  */
+	  if (GET_MODE (operand) == VOIDmode)
+	    {
+	      fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_HIGH (x));
+	      return;
+	    }
+	  else
+	    LOSE_AND_RETURN ("invalid operand for 'H' modifier", x);
+
+	case REG:
+	  /* Print reg + 1.  Check that there's not an attempt to print
+	     high-parts of registers like stack-pointer or higher, except
+	     for SRP (where the "high part" is MOF).  */
+	  if (REGNO (operand) > STACK_POINTER_REGNUM - 2
+	      && (REGNO (operand) != CRIS_SRP_REGNUM
+		  || CRIS_SRP_REGNUM + 1 != CRIS_MOF_REGNUM
+		  || fixed_regs[CRIS_MOF_REGNUM] != 0))
+	    LOSE_AND_RETURN ("bad register", operand);
+	  fprintf (file, "$%s", reg_names[REGNO (operand) + 1]);
+	  return;
+
+	case MEM:
+	  /* Adjust memory address to high part.  */
+	  {
+	    rtx adj_mem = operand;
+	    int size
+	      = GET_MODE_BITSIZE (GET_MODE (operand)) / BITS_PER_UNIT;
+
+	    /* Adjust so we can use two SImode in DImode.
+	       Calling adj_offsettable_operand will make sure it is an
+	       offsettable address.  Don't do this for a postincrement
+	       though; it should remain as it was.  */
+	    if (GET_CODE (XEXP (adj_mem, 0)) != POST_INC)
+	      adj_mem
+		= adjust_address (adj_mem, GET_MODE (adj_mem), size / 2);
+
+	    output_address (XEXP (adj_mem, 0));
+	    return;
+	  }
+
+	default:
+	  LOSE_AND_RETURN ("invalid operand for 'H' modifier", x);
+	}
+
+    case 'L':
+      /* Strip the MEM expression.  */
+      operand = XEXP (operand, 0);
+      break;
+
+    case 'e':
+      /* Like 'E', but ignore state set by 'x'.  FIXME: Use code
+	 iterators and attributes in cris.md to avoid the need for %x
+	 and %E (and %e) and state passed between those modifiers.  */
+      cris_output_insn_is_bound = 0;
+      /* FALL THROUGH.  */
+    case 'E':
+      /* Print 's' if operand is SIGN_EXTEND or 'u' if ZERO_EXTEND unless
+	 cris_output_insn_is_bound is nonzero.  */
+      if (GET_CODE (operand) != SIGN_EXTEND
+	  && GET_CODE (operand) != ZERO_EXTEND
+	  && !CONST_INT_P (operand))
+	LOSE_AND_RETURN ("invalid operand for 'e' modifier", x);
+
+      if (cris_output_insn_is_bound)
+	{
+	  cris_output_insn_is_bound = 0;
+	  return;
+	}
+
+      putc (GET_CODE (operand) == SIGN_EXTEND
+	    || (CONST_INT_P (operand) && INTVAL (operand) < 0)
+	    ? 's' : 'u', file);
+      return;
+
+    case 'm':
+      /* Print the size letter of the inner element.  We can do it by
+	 calling ourselves with the 's' modifier.  */
+      if (GET_CODE (operand) != SIGN_EXTEND && GET_CODE (operand) != ZERO_EXTEND)
+	LOSE_AND_RETURN ("invalid operand for 'm' modifier", x);
+      cris_print_operand (file, XEXP (operand, 0), 's');
+      return;
+
+    case 'M':
+      /* Print the least significant part of operand.  */
+      if (GET_CODE (operand) == CONST_DOUBLE)
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
+	  return;
+	}
+      else if (HOST_BITS_PER_WIDE_INT > 32 && CONST_INT_P (operand))
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		   INTVAL (x) & ((unsigned int) 0x7fffffff * 2 + 1));
+	  return;
+	}
+      /* Otherwise the least significant part equals the normal part,
+	 so handle it normally.  */
+      break;
+
+    case 'A':
+      /* When emitting an add for the high part of a DImode constant, we
+	 want to use addq for 0 and adds.w for -1.  */
+      if (!CONST_INT_P (operand))
+	LOSE_AND_RETURN ("invalid operand for 'A' modifier", x);
+      fprintf (file, INTVAL (operand) < 0 ? "adds.w" : "addq");
+      return;
+
+    case 'd':
+      /* If this is a GOT symbol, force it to be emitted as :GOT and
+	 :GOTPLT regardless of -fpic (i.e. not as :GOT16, :GOTPLT16).
+	 Avoid making this too much of a special case.  */
+      if (flag_pic == 1 && CONSTANT_P (operand))
+	{
+	  int flag_pic_save = flag_pic;
+
+	  flag_pic = 2;
+	  cris_output_addr_const (file, operand);
+	  flag_pic = flag_pic_save;
+	  return;
+	}
+      break;
+
+    case 'D':
+      /* When emitting an sub for the high part of a DImode constant, we
+	 want to use subq for 0 and subs.w for -1.  */
+      if (!CONST_INT_P (operand))
+	LOSE_AND_RETURN ("invalid operand for 'D' modifier", x);
+      fprintf (file, INTVAL (operand) < 0 ? "subs.w" : "subq");
+      return;
+
+    case 'S':
+      /* Print the operand as the index-part of an address.
+	 Easiest way out is to use cris_print_index.  */
+      cris_print_index (operand, file);
+      return;
+
+    case 'T':
+      /* Print the size letter for an operand to a MULT, which must be a
+	 const_int with a suitable value.  */
+      if (!CONST_INT_P (operand) || INTVAL (operand) > 4)
+	LOSE_AND_RETURN ("invalid operand for 'T' modifier", x);
+      fprintf (file, "%s", mults[INTVAL (operand)]);
+      return;
+
+    case 'u':
+      /* Print "u.w" if a GOT symbol and flag_pic == 1, else ".d".  */
+      if (flag_pic == 1
+	  && GET_CODE (operand) == CONST
+	  && GET_CODE (XEXP (operand, 0)) == UNSPEC
+	  && XINT (XEXP (operand, 0), 1) == CRIS_UNSPEC_GOTREAD)
+	fprintf (file, "u.w");
+      else
+	fprintf (file, ".d");
+      return;
+
+    case 0:
+      /* No code, print as usual.  */
+      break;
+
+    default:
+      LOSE_AND_RETURN ("invalid operand modifier letter", x);
+    }
+
+  /* Print an operand as without a modifier letter.  */
+  switch (GET_CODE (operand))
+    {
+    case REG:
+      if (REGNO (operand) > 15
+	  && REGNO (operand) != CRIS_MOF_REGNUM
+	  && REGNO (operand) != CRIS_SRP_REGNUM
+	  && REGNO (operand) != CRIS_CC0_REGNUM)
+	internal_error ("internal error: bad register: %d", REGNO (operand));
+      fprintf (file, "$%s", reg_names[REGNO (operand)]);
+      return;
+
+    case MEM:
+      output_address (XEXP (operand, 0));
+      return;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (operand) == VOIDmode)
+	/* A long long constant.  */
+	output_addr_const (file, operand);
+      else
+	{
+	  /* Only single precision is allowed as plain operands the
+	     moment.  FIXME:  REAL_VALUE_FROM_CONST_DOUBLE isn't
+	     documented.  */
+	  REAL_VALUE_TYPE r;
+	  long l;
+
+	  /* FIXME:  Perhaps check overflow of the "single".  */
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+	  REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+	  fprintf (file, "0x%lx", l);
+	}
+      return;
+
+    case UNSPEC:
+      /* Fall through.  */
+    case CONST:
+      cris_output_addr_const (file, operand);
+      return;
+
+    case MULT:
+    case ASHIFT:
+      {
+	/* For a (MULT (reg X) const_int) we output "rX.S".  */
+	int i = CONST_INT_P (XEXP (operand, 1))
+	  ? INTVAL (XEXP (operand, 1)) : INTVAL (XEXP (operand, 0));
+	rtx reg = CONST_INT_P (XEXP (operand, 1))
+	  ? XEXP (operand, 0) : XEXP (operand, 1);
+
+	if (!REG_P (reg)
+	    || (!CONST_INT_P (XEXP (operand, 0))
+		&& !CONST_INT_P (XEXP (operand, 1))))
+	  LOSE_AND_RETURN ("unexpected multiplicative operand", x);
+
+	cris_print_base (reg, file);
+	fprintf (file, ".%c",
+		 i == 0 || (i == 1 && GET_CODE (operand) == MULT) ? 'b'
+		 : i == 4 ? 'd'
+		 : (i == 2 && GET_CODE (operand) == MULT) || i == 1 ? 'w'
+		 : 'd');
+	return;
+      }
+
+    default:
+      /* No need to handle all strange variants, let output_addr_const
+	 do it for us.  */
+      if (CONSTANT_P (operand))
+	{
+	  cris_output_addr_const (file, operand);
+	  return;
+	}
+
+      LOSE_AND_RETURN ("unexpected operand", x);
+    }
+}
+
+static bool
+cris_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '#' || code == '!' || code == ':');
+}
+
+/* The PRINT_OPERAND_ADDRESS worker.  */
+
+static void
+cris_print_operand_address (FILE *file, rtx x)
+{
+  /* All these were inside MEM:s so output indirection characters.  */
+  putc ('[', file);
+
+  if (CONSTANT_ADDRESS_P (x))
+    cris_output_addr_const (file, x);
+  else if (BASE_OR_AUTOINCR_P (x))
+    cris_print_base (x, file);
+  else if (GET_CODE (x) == PLUS)
+    {
+      rtx x1, x2;
+
+      x1 = XEXP (x, 0);
+      x2 = XEXP (x, 1);
+      if (BASE_P (x1))
+	{
+	  cris_print_base (x1, file);
+	  cris_print_index (x2, file);
+	}
+      else if (BASE_P (x2))
+	{
+	  cris_print_base (x2, file);
+	  cris_print_index (x1, file);
+	}
+      else
+	LOSE_AND_RETURN ("unrecognized address", x);
+    }
+  else if (MEM_P (x))
+    {
+      /* A DIP.  Output more indirection characters.  */
+      putc ('[', file);
+      cris_print_base (XEXP (x, 0), file);
+      putc (']', file);
+    }
+  else
+    LOSE_AND_RETURN ("unrecognized address", x);
+
+  putc (']', file);
+}
+
+/* The RETURN_ADDR_RTX worker.
+   We mark that the return address is used, either by EH or
+   __builtin_return_address, for use by the function prologue and
+   epilogue.  FIXME: This isn't optimal; we just use the mark in the
+   prologue and epilogue to say that the return address is to be stored
+   in the stack frame.  We could return SRP for leaf-functions and use the
+   initial-value machinery.  */
+
+rtx
+cris_return_addr_rtx (int count, rtx frameaddr ATTRIBUTE_UNUSED)
+{
+  cfun->machine->needs_return_address_on_stack = 1;
+
+  /* The return-address is stored just above the saved frame-pointer (if
+     present).  Apparently we can't eliminate from the frame-pointer in
+     that direction, so use the incoming args (maybe pretended) pointer.  */
+  return count == 0
+    ? gen_rtx_MEM (Pmode, plus_constant (virtual_incoming_args_rtx, -4))
+    : NULL_RTX;
+}
+
+/* Accessor used in cris.md:return because cfun->machine isn't available
+   there.  */
+
+bool
+cris_return_address_on_stack (void)
+{
+  return df_regs_ever_live_p (CRIS_SRP_REGNUM)
+    || cfun->machine->needs_return_address_on_stack;
+}
+
+/* Accessor used in cris.md:return because cfun->machine isn't available
+   there.  */
+
+bool
+cris_return_address_on_stack_for_return (void)
+{
+  return cfun->machine->return_type == CRIS_RETINSN_RET ? false
+    : cris_return_address_on_stack ();
+}
+
+/* This used to be the INITIAL_FRAME_POINTER_OFFSET worker; now only
+   handles FP -> SP elimination offset.  */
+
+static int
+cris_initial_frame_pointer_offset (void)
+{
+  int regno;
+
+  /* Initial offset is 0 if we don't have a frame pointer.  */
+  int offs = 0;
+  bool got_really_used = false;
+
+  if (crtl->uses_pic_offset_table)
+    {
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (),
+			      NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* And 4 for each register pushed.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      offs += 4;
+
+  /* And then, last, we add the locals allocated.  */
+  offs += get_frame_size ();
+
+  /* And more; the accumulated args size.  */
+  offs += crtl->outgoing_args_size;
+
+  /* Then round it off, in case we use aligned stack.  */
+  if (TARGET_STACK_ALIGN)
+    offs = TARGET_ALIGN_BY_32 ? (offs + 3) & ~3 : (offs + 1) & ~1;
+
+  return offs;
+}
+
+/* The INITIAL_ELIMINATION_OFFSET worker.
+   Calculate the difference between imaginary registers such as frame
+   pointer and the stack pointer.  Used to eliminate the frame pointer
+   and imaginary arg pointer.  */
+
+int
+cris_initial_elimination_offset (int fromreg, int toreg)
+{
+  int fp_sp_offset
+    = cris_initial_frame_pointer_offset ();
+
+  /* We should be able to use regs_ever_live and related prologue
+     information here, or alpha should not as well.  */
+  bool return_address_on_stack = cris_return_address_on_stack ();
+
+  /* Here we act as if the frame-pointer were needed.  */
+  int ap_fp_offset = 4 + (return_address_on_stack ? 4 : 0);
+
+  if (fromreg == ARG_POINTER_REGNUM
+      && toreg == FRAME_POINTER_REGNUM)
+    return ap_fp_offset;
+
+  /* Between the frame pointer and the stack are only "normal" stack
+     variables and saved registers.  */
+  if (fromreg == FRAME_POINTER_REGNUM
+      && toreg == STACK_POINTER_REGNUM)
+    return fp_sp_offset;
+
+  /* We need to balance out the frame pointer here.  */
+  if (fromreg == ARG_POINTER_REGNUM
+      && toreg == STACK_POINTER_REGNUM)
+    return ap_fp_offset + fp_sp_offset - 4;
+
+  gcc_unreachable ();
+}
+
+/* Worker function for LEGITIMIZE_RELOAD_ADDRESS.  */
+
+bool
+cris_reload_address_legitimized (rtx x,
+				 enum machine_mode mode ATTRIBUTE_UNUSED,
+				 int opnum ATTRIBUTE_UNUSED,
+				 int itype,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  enum reload_type type = (enum reload_type) itype;
+  rtx op0, op1;
+  rtx *op1p;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  if (TARGET_V32)
+    return false;
+
+  op0 = XEXP (x, 0);
+  op1 = XEXP (x, 1);
+  op1p = &XEXP (x, 1);
+
+  if (!REG_P (op1))
+    return false;
+
+  if (GET_CODE (op0) == SIGN_EXTEND && MEM_P (XEXP (op0, 0)))
+    {
+      rtx op00 = XEXP (op0, 0);
+      rtx op000 = XEXP (op00, 0);
+      rtx *op000p = &XEXP (op00, 0);
+
+      if ((GET_MODE (op00) == HImode || GET_MODE (op00) == QImode)
+	  && (REG_P (op000)
+	      || (GET_CODE (op000) == POST_INC && REG_P (XEXP (op000, 0)))))
+	{
+	  bool something_reloaded = false;
+
+	  if (GET_CODE (op000) == POST_INC
+	      && REG_P (XEXP (op000, 0))
+	      && REGNO (XEXP (op000, 0)) > CRIS_LAST_GENERAL_REGISTER)
+	    /* No, this gets too complicated and is too rare to care
+	       about trying to improve on the general code Here.
+	       As the return-value is an all-or-nothing indicator, we
+	       punt on the other register too.  */
+	    return false;
+
+	  if ((REG_P (op000)
+	       && REGNO (op000) > CRIS_LAST_GENERAL_REGISTER))
+	    {
+	      /* The address of the inner mem is a pseudo or wrong
+		 reg: reload that.  */
+	      push_reload (op000, NULL_RTX, op000p, NULL, GENERAL_REGS,
+			   GET_MODE (x), VOIDmode, 0, 0, opnum, type);
+	      something_reloaded = true;
+	    }
+
+	  if (REGNO (op1) > CRIS_LAST_GENERAL_REGISTER)
+	    {
+	      /* Base register is a pseudo or wrong reg: reload it.  */
+	      push_reload (op1, NULL_RTX, op1p, NULL, GENERAL_REGS,
+			   GET_MODE (x), VOIDmode, 0, 0,
+			   opnum, type);
+	      something_reloaded = true;
+	    }
+
+	  gcc_assert (something_reloaded);
+
+	  return true;
+	}
+    }
+
+  return false;
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+static int
+cris_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from, reg_class_t to)
+{
+  if (!TARGET_V32)
+    {
+      /* Pretend that classes that we don't support are ALL_REGS, so
+	 we give them the highest cost.  */
+      if (from != SPECIAL_REGS && from != MOF_REGS
+	  && from != GENERAL_REGS && from != GENNONACR_REGS)
+	from = ALL_REGS;
+
+      if (to != SPECIAL_REGS && to != MOF_REGS
+	  && to != GENERAL_REGS && to != GENNONACR_REGS)
+	to = ALL_REGS;
+    }
+
+  /* Can't move to and from a SPECIAL_REGS register, so we have to say
+     their move cost within that class is higher.  How about 7?  That's 3
+     for a move to a GENERAL_REGS register, 3 for the move from the
+     GENERAL_REGS register, and 1 for the increased register pressure.
+     Also, it's higher than the memory move cost, which is in order.  
+     We also do this for ALL_REGS, since we don't want that class to be
+     preferred (even to memory) at all where GENERAL_REGS doesn't fit.
+     Whenever it's about to be used, it's for SPECIAL_REGS.  If we don't
+     present a higher cost for ALL_REGS than memory, a SPECIAL_REGS may be
+     used when a GENERAL_REGS should be used, even if there are call-saved
+     GENERAL_REGS left to allocate.  This is because the fall-back when
+     the most preferred register class isn't available, isn't the next
+     (or next good) wider register class, but the *most widest* register
+     class.  */
+
+  if ((reg_classes_intersect_p (from, SPECIAL_REGS)
+       && reg_classes_intersect_p (to, SPECIAL_REGS))
+      || from == ALL_REGS || to == ALL_REGS)
+    return 7;
+
+  if (reg_classes_intersect_p (from, SPECIAL_REGS)
+      || reg_classes_intersect_p (to, SPECIAL_REGS))
+    return 3;
+
+  return 2;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.
+
+   This isn't strictly correct for v0..3 in buswidth-8bit mode, but should
+   suffice.  */
+
+static int
+cris_memory_move_cost (enum machine_mode mode,
+                       reg_class_t rclass ATTRIBUTE_UNUSED,
+                       bool in ATTRIBUTE_UNUSED)
+{
+  if (mode == QImode
+      || mode == HImode)
+    return 4;
+  else
+    return 6;
+}
+
+/* Worker for cris_notice_update_cc; handles the "normal" cases.
+   FIXME: this code is historical; its functionality should be
+   refactored to look at insn attributes and moved to
+   cris_notice_update_cc.  Except, we better lose cc0 entirely.  */
+
+static void
+cris_normal_notice_update_cc (rtx exp, rtx insn)
+{
+  /* "Normal" means, for:
+     (set (cc0) (...)):
+     CC is (...).
+
+     (set (reg) (...)):
+     CC is (reg) and (...) - unless (...) is 0 or reg is a special
+        register or (v32 and (...) is -32..-1), then CC does not change.
+     CC_NO_OVERFLOW unless (...) is reg or mem.
+
+     (set (mem) (...)):
+     CC does not change.
+
+     (set (pc) (...)):
+     CC does not change.
+
+     (parallel
+      (set (reg1) (mem (bdap/biap)))
+      (set (reg2) (bdap/biap))):
+     CC is (reg1) and (mem (reg2))
+
+     (parallel
+      (set (mem (bdap/biap)) (reg1)) [or 0]
+      (set (reg2) (bdap/biap))):
+     CC does not change.
+
+     (where reg and mem includes strict_low_parts variants thereof)
+
+     For all others, assume CC is clobbered.
+     Note that we do not have to care about setting CC_NO_OVERFLOW,
+     since the overflow flag is set to 0 (i.e. right) for
+     instructions where it does not have any sane sense, but where
+     other flags have meanings.  (This includes shifts; the carry is
+     not set by them).
+
+     Note that there are other parallel constructs we could match,
+     but we don't do that yet.  */
+
+  if (GET_CODE (exp) == SET)
+    {
+      /* FIXME: Check when this happens.  It looks like we should
+	 actually do a CC_STATUS_INIT here to be safe.  */
+      if (SET_DEST (exp) == pc_rtx)
+	return;
+
+      /* Record CC0 changes, so we do not have to output multiple
+	 test insns.  */
+      if (SET_DEST (exp) == cc0_rtx)
+	{
+	  CC_STATUS_INIT;
+
+	  if (GET_CODE (SET_SRC (exp)) == COMPARE
+	      && XEXP (SET_SRC (exp), 1) == const0_rtx)
+	    cc_status.value1 = XEXP (SET_SRC (exp), 0);
+	  else
+	    cc_status.value1 = SET_SRC (exp);
+
+          /* Handle flags for the special btstq on one bit.  */
+	  if (GET_CODE (cc_status.value1) == ZERO_EXTRACT
+	      && XEXP (cc_status.value1, 1) == const1_rtx)
+	    {
+	      if (CONST_INT_P (XEXP (cc_status.value1, 0)))
+		/* Using cmpq.  */
+		cc_status.flags = CC_INVERTED;
+	      else
+		/* A one-bit btstq.  */
+		cc_status.flags = CC_Z_IN_NOT_N;
+	    }
+
+	  else if (GET_CODE (SET_SRC (exp)) == COMPARE)
+	    {
+	      if (!REG_P (XEXP (SET_SRC (exp), 0))
+		  && XEXP (SET_SRC (exp), 1) != const0_rtx)
+		/* For some reason gcc will not canonicalize compare
+		   operations, reversing the sign by itself if
+		   operands are in wrong order.  */
+		/* (But NOT inverted; eq is still eq.) */
+		cc_status.flags = CC_REVERSED;
+
+	      /* This seems to be overlooked by gcc.  FIXME: Check again.
+		 FIXME:  Is it really safe?  */
+	      cc_status.value2
+		= gen_rtx_MINUS (GET_MODE (SET_SRC (exp)),
+				 XEXP (SET_SRC (exp), 0),
+				 XEXP (SET_SRC (exp), 1));
+	    }
+	  return;
+	}
+      else if (REG_P (SET_DEST (exp))
+	       || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART
+		   && REG_P (XEXP (SET_DEST (exp), 0))))
+	{
+	  /* A register is set; normally CC is set to show that no
+	     test insn is needed.  Catch the exceptions.  */
+
+	  /* If not to cc0, then no "set"s in non-natural mode give
+	     ok cc0...  */
+	  if (GET_MODE_SIZE (GET_MODE (SET_DEST (exp))) > UNITS_PER_WORD
+	      || GET_MODE_CLASS (GET_MODE (SET_DEST (exp))) == MODE_FLOAT)
+	    {
+	      /* ... except add:s and sub:s in DImode.  */
+	      if (GET_MODE (SET_DEST (exp)) == DImode
+		  && (GET_CODE (SET_SRC (exp)) == PLUS
+		      || GET_CODE (SET_SRC (exp)) == MINUS))
+		{
+		  CC_STATUS_INIT;
+		  cc_status.value1 = SET_DEST (exp);
+		  cc_status.value2 = SET_SRC (exp);
+
+		  if (cris_reg_overlap_mentioned_p (cc_status.value1,
+						    cc_status.value2))
+		    cc_status.value2 = 0;
+
+		  /* Add and sub may set V, which gets us
+		     unoptimizable results in "gt" and "le" condition
+		     codes.  */
+		  cc_status.flags |= CC_NO_OVERFLOW;
+
+		  return;
+		}
+	    }
+	  else if (SET_SRC (exp) == const0_rtx
+		   || (REG_P (SET_SRC (exp))
+		       && (REGNO (SET_SRC (exp))
+			   > CRIS_LAST_GENERAL_REGISTER))
+		   || (TARGET_V32
+		       && GET_CODE (SET_SRC (exp)) == CONST_INT
+		       && CRIS_CONST_OK_FOR_LETTER_P (INTVAL (SET_SRC (exp)),
+						      'I')))
+	    {
+	      /* There's no CC0 change for this case.  Just check
+		 for overlap.  */
+	      if (cc_status.value1
+		  && modified_in_p (cc_status.value1, insn))
+		cc_status.value1 = 0;
+
+	      if (cc_status.value2
+		  && modified_in_p (cc_status.value2, insn))
+		cc_status.value2 = 0;
+
+	      return;
+	    }
+	  else
+	    {
+	      CC_STATUS_INIT;
+	      cc_status.value1 = SET_DEST (exp);
+	      cc_status.value2 = SET_SRC (exp);
+
+	      if (cris_reg_overlap_mentioned_p (cc_status.value1,
+						cc_status.value2))
+		cc_status.value2 = 0;
+
+	      /* Some operations may set V, which gets us
+		 unoptimizable results in "gt" and "le" condition
+		 codes.  */
+	      if (GET_CODE (SET_SRC (exp)) == PLUS
+		  || GET_CODE (SET_SRC (exp)) == MINUS
+		  || GET_CODE (SET_SRC (exp)) == NEG)
+		cc_status.flags |= CC_NO_OVERFLOW;
+
+	      /* For V32, nothing with a register destination sets
+		 C and V usefully.  */
+	      if (TARGET_V32)
+		cc_status.flags |= CC_NO_OVERFLOW;
+
+	      return;
+	    }
+	}
+      else if (MEM_P (SET_DEST (exp))
+	       || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART
+		   && MEM_P (XEXP (SET_DEST (exp), 0))))
+	{
+	  /* When SET to MEM, then CC is not changed (except for
+	     overlap).  */
+	  if (cc_status.value1
+	      && modified_in_p (cc_status.value1, insn))
+	    cc_status.value1 = 0;
+
+	  if (cc_status.value2
+	      && modified_in_p (cc_status.value2, insn))
+	    cc_status.value2 = 0;
+
+	  return;
+	}
+    }
+  else if (GET_CODE (exp) == PARALLEL)
+    {
+      if (GET_CODE (XVECEXP (exp, 0, 0)) == SET
+	  && GET_CODE (XVECEXP (exp, 0, 1)) == SET
+	  && REG_P (XEXP (XVECEXP (exp, 0, 1), 0)))
+	{
+	  if (REG_P (XEXP (XVECEXP (exp, 0, 0), 0))
+	      && MEM_P (XEXP (XVECEXP (exp, 0, 0), 1)))
+	    {
+	      CC_STATUS_INIT;
+
+	      /* For "move.S [rx=ry+o],rz", say CC reflects
+		 value1=rz and value2=[rx] */
+	      cc_status.value1 = XEXP (XVECEXP (exp, 0, 0), 0);
+	      cc_status.value2
+		= replace_equiv_address (XEXP (XVECEXP (exp, 0, 0), 1),
+					 XEXP (XVECEXP (exp, 0, 1), 0));
+
+	      /* Huh?  A side-effect cannot change the destination
+		 register.  */
+	      if (cris_reg_overlap_mentioned_p (cc_status.value1,
+						cc_status.value2))
+		internal_error ("internal error: sideeffect-insn affecting main effect");
+
+	      /* For V32, moves to registers don't set C and V.  */
+	      if (TARGET_V32)
+		cc_status.flags |= CC_NO_OVERFLOW;
+	      return;
+	    }
+	  else if ((REG_P (XEXP (XVECEXP (exp, 0, 0), 1))
+		    || XEXP (XVECEXP (exp, 0, 0), 1) == const0_rtx)
+		   && MEM_P (XEXP (XVECEXP (exp, 0, 0), 0)))
+	    {
+	      /* For "move.S rz,[rx=ry+o]" and "clear.S [rx=ry+o]",
+		 say flags are not changed, except for overlap.  */
+	      if (cc_status.value1
+		  && modified_in_p (cc_status.value1, insn))
+		cc_status.value1 = 0;
+
+	      if (cc_status.value2
+		  && modified_in_p (cc_status.value2, insn))
+		cc_status.value2 = 0;
+
+	      return;
+	    }
+	}
+    }
+
+  /* If we got here, the case wasn't covered by the code above.  */
+  CC_STATUS_INIT;
+}
+
+/*  This function looks into the pattern to see how this insn affects
+    condition codes.
+
+    Used when to eliminate test insns before a condition-code user,
+    such as a "scc" insn or a conditional branch.  This includes
+    checking if the entities that cc was updated by, are changed by the
+    operation.
+
+    Currently a jumble of the old peek-inside-the-insn and the newer
+    check-cc-attribute methods.  */
+
+void
+cris_notice_update_cc (rtx exp, rtx insn)
+{
+  enum attr_cc attrval = get_attr_cc (insn);
+
+  /* Check if user specified "-mcc-init" as a bug-workaround.  Remember
+     to still set CC_REVERSED as below, since that's required by some
+     compare insn alternatives.  (FIXME: GCC should do this virtual
+     operand swap by itself.)  A test-case that may otherwise fail is
+     gcc.c-torture/execute/20000217-1.c -O0 and -O1.  */
+  if (TARGET_CCINIT)
+    {
+      CC_STATUS_INIT;
+
+      if (attrval == CC_REV)
+	cc_status.flags = CC_REVERSED;
+      return;
+    }
+
+  /* Slowly, we're converting to using attributes to control the setting
+     of condition-code status.  */
+  switch (attrval)
+    {
+    case CC_NONE:
+      /* Even if it is "none", a setting may clobber a previous
+	 cc-value, so check.  */
+      if (GET_CODE (exp) == SET)
+	{
+	  if (cc_status.value1
+	      && modified_in_p (cc_status.value1, insn))
+	    cc_status.value1 = 0;
+
+	  if (cc_status.value2
+	      && modified_in_p (cc_status.value2, insn))
+	    cc_status.value2 = 0;
+	}
+      return;
+
+    case CC_CLOBBER:
+      CC_STATUS_INIT;
+      return;
+
+    case CC_REV:
+    case CC_NOOV32:
+    case CC_NORMAL:
+      cris_normal_notice_update_cc (exp, insn);
+
+      /* The "test" insn doesn't clear (carry and) overflow on V32.  We
+        can change bge => bpl and blt => bmi by passing on to the cc0
+        user that V should not be considered; bgt and ble are taken
+        care of by other methods (see {tst,cmp}{si,hi,qi}).  */
+      if (attrval == CC_NOOV32 && TARGET_V32)
+	cc_status.flags |= CC_NO_OVERFLOW;
+      return;
+
+    default:
+      internal_error ("unknown cc_attr value");
+    }
+
+  CC_STATUS_INIT;
+}
+
+/* Return != 0 if the return sequence for the current function is short,
+   like "ret" or "jump [sp+]".  Prior to reloading, we can't tell if
+   registers must be saved, so return 0 then.  */
+
+bool
+cris_simple_epilogue (void)
+{
+  unsigned int regno;
+  unsigned int reglimit = STACK_POINTER_REGNUM;
+  bool got_really_used = false;
+
+  if (! reload_completed
+      || frame_pointer_needed
+      || get_frame_size () != 0
+      || crtl->args.pretend_args_size
+      || crtl->args.size
+      || crtl->outgoing_args_size
+      || crtl->calls_eh_return
+
+      /* If we're not supposed to emit prologue and epilogue, we must
+	 not emit return-type instructions.  */
+      || !TARGET_PROLOGUE_EPILOGUE)
+    return false;
+
+  /* Can't return from stacked return address with v32.  */
+  if (TARGET_V32 && cris_return_address_on_stack ())
+    return false;
+
+  if (crtl->uses_pic_offset_table)
+    {
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* No simple epilogue if there are saved registers.  */
+  for (regno = 0; regno < reglimit; regno++)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      return false;
+
+  return true;
+}
+
+/* Expand a return insn (just one insn) marked as using SRP or stack
+   slot depending on parameter ON_STACK.  */
+
+void
+cris_expand_return (bool on_stack)
+{
+  /* FIXME: emit a parallel with a USE for SRP or the stack-slot, to
+     tell "ret" from "jump [sp+]".  Some, but not all, other parts of
+     GCC expect just (return) to do the right thing when optimizing, so
+     we do that until they're fixed.  Currently, all return insns in a
+     function must be the same (not really a limiting factor) so we need
+     to check that it doesn't change half-way through.  */
+  emit_jump_insn (gen_rtx_RETURN (VOIDmode));
+
+  CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_RET || !on_stack);
+  CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_JUMP || on_stack);
+
+  cfun->machine->return_type
+    = on_stack ? CRIS_RETINSN_JUMP : CRIS_RETINSN_RET;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+cris_rtx_costs (rtx x, int code, int outer_code, int *total,
+		bool speed)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      {
+	HOST_WIDE_INT val = INTVAL (x);
+	if (val == 0)
+	  *total = 0;
+	else if (val < 32 && val >= -32)
+	  *total = 1;
+	/* Eight or 16 bits are a word and cycle more expensive.  */
+	else if (val <= 32767 && val >= -32768)
+	  *total = 2;
+	/* A 32-bit constant (or very seldom, unsigned 16 bits) costs
+	   another word.  FIXME: This isn't linear to 16 bits.  */
+	else
+	  *total = 4;
+	return true;
+      }
+
+    case LABEL_REF:
+      *total = 6;
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+      *total = 6;
+      return true;
+
+    case CONST_DOUBLE:
+      if (x != CONST0_RTX (GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x)))
+	*total = 12;
+      else
+        /* Make 0.0 cheap, else test-insns will not be used.  */
+	*total = 0;
+      return true;
+
+    case MULT:
+      /* If we have one arm of an ADDI, make sure it gets the cost of
+	 one insn, i.e. zero cost for this operand, and just the cost
+	 of the PLUS, as the insn is created by combine from a PLUS
+	 and an ASHIFT, and the MULT cost below would make the
+	 combined value be larger than the separate insns.  The insn
+	 validity is checked elsewhere by combine.
+
+	 FIXME: this case is a stop-gap for 4.3 and 4.4, this whole
+	 function should be rewritten.  */
+      if (outer_code == PLUS && BIAP_INDEX_P (x))
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      /* Identify values that are no powers of two.  Powers of 2 are
+         taken care of already and those values should not be changed.  */
+      if (!CONST_INT_P (XEXP (x, 1))
+          || exact_log2 (INTVAL (XEXP (x, 1)) < 0))
+	{
+	  /* If we have a multiply insn, then the cost is between
+	     1 and 2 "fast" instructions.  */
+	  if (TARGET_HAS_MUL_INSNS)
+	    {
+	      *total = COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2;
+	      return true;
+	    }
+
+	  /* Estimate as 4 + 4 * #ofbits.  */
+	  *total = COSTS_N_INSNS (132);
+	  return true;
+	}
+      return false;
+
+    case UDIV:
+    case MOD:
+    case UMOD:
+    case DIV:
+      if (!CONST_INT_P (XEXP (x, 1))
+          || exact_log2 (INTVAL (XEXP (x, 1)) < 0))
+	{
+	  /* Estimate this as 4 + 8 * #of bits.  */
+	  *total = COSTS_N_INSNS (260);
+	  return true;
+	}
+      return false;
+
+    case AND:
+      if (CONST_INT_P (XEXP (x, 1))
+          /* Two constants may actually happen before optimization.  */
+          && !CONST_INT_P (XEXP (x, 0))
+          && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (XEXP (x, 1)), 'I'))
+	{
+	  *total
+	    = (rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, speed) + 2
+	       + 2 * GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))));
+	  return true;
+	}
+      return false;
+
+    case ZERO_EXTRACT:
+      if (outer_code != COMPARE)
+        return false;
+      /* fall through */
+
+    case ZERO_EXTEND: case SIGN_EXTEND:
+      *total = rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, speed);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* The ADDRESS_COST worker.  */
+
+static int
+cris_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+  /* The metric to use for the cost-macros is unclear.
+     The metric used here is (the number of cycles needed) / 2,
+     where we consider equal a cycle for a word of code and a cycle to
+     read memory.  FIXME: Adding "+ 1" to all values would avoid
+     returning 0, as tree-ssa-loop-ivopts.c as of r128272 "normalizes"
+     0 to 1, thereby giving equal costs to [rN + rM] and [rN].
+     Unfortunately(?) such a hack would expose other pessimizations,
+     at least with g++.dg/tree-ssa/ivopts-1.C, adding insns to the
+     loop there, without apparent reason.  */
+
+  /* The cheapest addressing modes get 0, since nothing extra is needed.  */
+  if (BASE_OR_AUTOINCR_P (x))
+    return 0;
+
+  /* An indirect mem must be a DIP.  This means two bytes extra for code,
+     and 4 bytes extra for memory read, i.e.  (2 + 4) / 2.  */
+  if (MEM_P (x))
+    return (2 + 4) / 2;
+
+  /* Assume (2 + 4) / 2 for a single constant; a dword, since it needs
+     an extra DIP prefix and 4 bytes of constant in most cases.  */
+  if (CONSTANT_P (x))
+    return (2 + 4) / 2;
+
+  /* Handle BIAP and BDAP prefixes.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx tem1 = XEXP (x, 0);
+      rtx tem2 = XEXP (x, 1);
+
+      /* Local extended canonicalization rule: the first operand must
+	 be REG, unless it's an operation (MULT).  */
+      if (!REG_P (tem1) && GET_CODE (tem1) != MULT)
+	tem1 = tem2, tem2 = XEXP (x, 0);
+
+      /* We'll "assume" we have canonical RTX now.  */
+      gcc_assert (REG_P (tem1) || GET_CODE (tem1) == MULT);
+
+      /* A BIAP is 2 extra bytes for the prefix insn, nothing more.  We
+	 recognize the typical MULT which is always in tem1 because of
+	 insn canonicalization.  */
+      if ((GET_CODE (tem1) == MULT && BIAP_INDEX_P (tem1))
+	  || REG_P (tem2))
+	return 2 / 2;
+
+      /* A BDAP (quick) is 2 extra bytes.  Any constant operand to the
+	 PLUS is always found in tem2.  */
+      if (CONST_INT_P (tem2) && INTVAL (tem2) < 128 && INTVAL (tem2) >= -128)
+	return 2 / 2;
+
+      /* A BDAP -32768 .. 32767 is like BDAP quick, but with 2 extra
+	 bytes.  */
+      if (CONST_INT_P (tem2)
+	  && CRIS_CONST_OK_FOR_LETTER_P (INTVAL (tem2), 'L'))
+	return (2 + 2) / 2;
+
+      /* A BDAP with some other constant is 2 bytes extra.  */
+      if (CONSTANT_P (tem2))
+	return (2 + 2 + 2) / 2;
+
+      /* BDAP with something indirect should have a higher cost than
+	 BIAP with register.   FIXME: Should it cost like a MEM or more?  */
+      return (2 + 2 + 2) / 2;
+    }
+
+  /* What else?  Return a high cost.  It matters only for valid
+     addressing modes.  */
+  return 10;
+}
+
+/* Check various objections to the side-effect.  Used in the test-part
+   of an anonymous insn describing an insn with a possible side-effect.
+   Returns nonzero if the implied side-effect is ok.
+
+   code     : PLUS or MULT
+   ops	    : An array of rtx:es. lreg, rreg, rval,
+	      The variables multop and other_op are indexes into this,
+	      or -1 if they are not applicable.
+   lreg     : The register that gets assigned in the side-effect.
+   rreg     : One register in the side-effect expression
+   rval     : The other register, or an int.
+   multop   : An integer to multiply rval with.
+   other_op : One of the entities of the main effect,
+	      whose mode we must consider.  */
+
+int
+cris_side_effect_mode_ok (enum rtx_code code, rtx *ops,
+			  int lreg, int rreg, int rval,
+			  int multop, int other_op)
+{
+  /* Find what value to multiply with, for rx =ry + rz * n.  */
+  int mult = multop < 0 ? 1 : INTVAL (ops[multop]);
+
+  rtx reg_rtx = ops[rreg];
+  rtx val_rtx = ops[rval];
+
+  /* The operands may be swapped.  Canonicalize them in reg_rtx and
+     val_rtx, where reg_rtx always is a reg (for this constraint to
+     match).  */
+  if (! BASE_P (reg_rtx))
+    reg_rtx = val_rtx, val_rtx = ops[rreg];
+
+  /* Don't forget to check that reg_rtx really is a reg.  If it isn't,
+     we have no business.  */
+  if (! BASE_P (reg_rtx))
+    return 0;
+
+  /* Don't do this when -mno-split.  */
+  if (!TARGET_SIDE_EFFECT_PREFIXES)
+    return 0;
+
+  /* The mult expression may be hidden in lreg.  FIXME:  Add more
+     commentary about that.  */
+  if (GET_CODE (val_rtx) == MULT)
+    {
+      mult = INTVAL (XEXP (val_rtx, 1));
+      val_rtx = XEXP (val_rtx, 0);
+      code = MULT;
+    }
+
+  /* First check the "other operand".  */
+  if (other_op >= 0)
+    {
+      if (GET_MODE_SIZE (GET_MODE (ops[other_op])) > UNITS_PER_WORD)
+	return 0;
+
+      /* Check if the lvalue register is the same as the "other
+	 operand".  If so, the result is undefined and we shouldn't do
+	 this.  FIXME:  Check again.  */
+      if ((BASE_P (ops[lreg])
+	   && BASE_P (ops[other_op])
+	   && REGNO (ops[lreg]) == REGNO (ops[other_op]))
+	  || rtx_equal_p (ops[other_op], ops[lreg]))
+      return 0;
+    }
+
+  /* Do not accept frame_pointer_rtx as any operand.  */
+  if (ops[lreg] == frame_pointer_rtx || ops[rreg] == frame_pointer_rtx
+      || ops[rval] == frame_pointer_rtx
+      || (other_op >= 0 && ops[other_op] == frame_pointer_rtx))
+    return 0;
+
+  if (code == PLUS
+      && ! BASE_P (val_rtx))
+    {
+
+      /* Do not allow rx = rx + n if a normal add or sub with same size
+	 would do.  */
+      if (rtx_equal_p (ops[lreg], reg_rtx)
+	  && CONST_INT_P (val_rtx)
+	  && (INTVAL (val_rtx) <= 63 && INTVAL (val_rtx) >= -63))
+	return 0;
+
+      /* Check allowed cases, like [r(+)?].[bwd] and const.  */
+      if (CONSTANT_P (val_rtx))
+	return 1;
+
+      if (MEM_P (val_rtx) && BASE_OR_AUTOINCR_P (XEXP (val_rtx, 0)))
+	return 1;
+
+      if (GET_CODE (val_rtx) == SIGN_EXTEND
+	  && MEM_P (XEXP (val_rtx, 0))
+	  && BASE_OR_AUTOINCR_P (XEXP (XEXP (val_rtx, 0), 0)))
+	return 1;
+
+      /* If we got here, it's not a valid addressing mode.  */
+      return 0;
+    }
+  else if (code == MULT
+	   || (code == PLUS && BASE_P (val_rtx)))
+    {
+      /* Do not allow rx = rx + ry.S, since it doesn't give better code.  */
+      if (rtx_equal_p (ops[lreg], reg_rtx)
+	  || (mult == 1 && rtx_equal_p (ops[lreg], val_rtx)))
+	return 0;
+
+      /* Do not allow bad multiply-values.  */
+      if (mult != 1 && mult != 2 && mult != 4)
+	return 0;
+
+      /* Only allow  r + ...  */
+      if (! BASE_P (reg_rtx))
+	return 0;
+
+      /* If we got here, all seems ok.
+	 (All checks need to be done above).  */
+      return 1;
+    }
+
+  /* If we get here, the caller got its initial tests wrong.  */
+  internal_error ("internal error: cris_side_effect_mode_ok with bad operands");
+}
+
+/* Whether next_cc0_user of insn is LE or GT or requires a real compare
+   insn for other reasons.  */
+
+bool
+cris_cc0_user_requires_cmp (rtx insn)
+{
+  rtx cc0_user = NULL;
+  rtx body;
+  rtx set;
+
+  gcc_assert (insn != NULL);
+
+  if (!TARGET_V32)
+    return false;
+
+  cc0_user = next_cc0_user (insn);
+  if (cc0_user == NULL)
+    return false;
+
+  body = PATTERN (cc0_user);
+  set = single_set (cc0_user);
+
+  /* Users can be sCC and bCC.  */
+  if (JUMP_P (cc0_user)
+      && GET_CODE (body) == SET
+      && SET_DEST (body) == pc_rtx
+      && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE
+      && XEXP (XEXP (SET_SRC (body), 0), 0) == cc0_rtx)
+    {
+      return
+	GET_CODE (XEXP (SET_SRC (body), 0)) == GT
+	|| GET_CODE (XEXP (SET_SRC (body), 0)) == LE;
+    }
+  else if (set)
+    {
+      return
+	GET_CODE (SET_SRC (body)) == GT
+	|| GET_CODE (SET_SRC (body)) == LE;
+    }
+
+  gcc_unreachable ();
+}
+
+/* The function reg_overlap_mentioned_p in CVS (still as of 2001-05-16)
+   does not handle the case where the IN operand is strict_low_part; it
+   does handle it for X.  Test-case in Axis-20010516.  This function takes
+   care of that for THIS port.  FIXME: strict_low_part is going away
+   anyway.  */
+
+static int
+cris_reg_overlap_mentioned_p (rtx x, rtx in)
+{
+  /* The function reg_overlap_mentioned now handles when X is
+     strict_low_part, but not when IN is a STRICT_LOW_PART.  */
+  if (GET_CODE (in) == STRICT_LOW_PART)
+    in = XEXP (in, 0);
+
+  return reg_overlap_mentioned_p (x, in);
+}
+
+/* The TARGET_ASM_NAMED_SECTION worker.
+   We just dispatch to the functions for ELF and a.out.  */
+
+void
+cris_target_asm_named_section (const char *name, unsigned int flags,
+			       tree decl)
+{
+  if (! TARGET_ELF)
+    default_no_named_section (name, flags, decl);
+  else
+    default_elf_asm_named_section (name, flags, decl);
+}
+
+/* Return TRUE iff X is a CONST valid for e.g. indexing.
+   ANY_OPERAND is 0 if X is in a CALL_P insn or movsi, 1
+   elsewhere.  */
+
+bool
+cris_valid_pic_const (rtx x, bool any_operand)
+{
+  gcc_assert (flag_pic);
+
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return true;
+    default:
+      ;
+    }
+
+  if (GET_CODE (x) != CONST)
+    return false;
+
+  x = XEXP (x, 0);
+
+  /* Handle (const (plus (unspec .. UNSPEC_GOTREL) (const_int ...))).  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == UNSPEC
+      && (XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREL
+	  || XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_PCREL)
+      && CONST_INT_P (XEXP (x, 1)))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == UNSPEC)
+    switch (XINT (x, 1))
+      {
+	/* A PCREL operand is only valid for call and movsi.  */
+      case CRIS_UNSPEC_PLT_PCREL:
+      case CRIS_UNSPEC_PCREL:
+	return !any_operand;
+
+      case CRIS_UNSPEC_PLT_GOTREL:
+      case CRIS_UNSPEC_PLTGOTREAD:
+      case CRIS_UNSPEC_GOTREAD:
+      case CRIS_UNSPEC_GOTREL:
+	return true;
+      default:
+	gcc_unreachable ();
+      }
+
+  return cris_pic_symbol_type_of (x) == cris_no_symbol;
+}
+
+/* Helper function to find the right PIC-type symbol to generate,
+   given the original (non-PIC) representation.  */
+
+enum cris_pic_symbol_type
+cris_pic_symbol_type_of (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      return SYMBOL_REF_LOCAL_P (x)
+	? cris_rel_symbol : cris_got_symbol;
+
+    case LABEL_REF:
+      return cris_rel_symbol;
+
+    case CONST:
+      return cris_pic_symbol_type_of (XEXP (x, 0));
+
+    case PLUS:
+    case MINUS:
+      {
+	enum cris_pic_symbol_type t1 = cris_pic_symbol_type_of (XEXP (x, 0));
+	enum cris_pic_symbol_type t2 = cris_pic_symbol_type_of (XEXP (x, 1));
+
+	gcc_assert (t1 == cris_no_symbol || t2 == cris_no_symbol);
+
+	if (t1 == cris_got_symbol || t1 == cris_got_symbol)
+	  return cris_got_symbol_needing_fixup;
+
+	return t1 != cris_no_symbol ? t1 : t2;
+      }
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return cris_no_symbol;
+
+    case UNSPEC:
+      /* Likely an offsettability-test attempting to add a constant to
+	 a GOTREAD symbol, which can't be handled.  */
+      return cris_invalid_pic_symbol;
+
+    default:
+      fatal_insn ("unrecognized supposed constant", x);
+    }
+
+  gcc_unreachable ();
+}
+
+/* The LEGITIMATE_PIC_OPERAND_P worker.  */
+
+int
+cris_legitimate_pic_operand (rtx x)
+{
+  /* Symbols are not valid PIC operands as-is; just constants.  */
+  return cris_valid_pic_const (x, true);
+}
+
+/* The ASM_OUTPUT_CASE_END worker.  */
+
+void
+cris_asm_output_case_end (FILE *stream, int num, rtx table)
+{
+  if (TARGET_V32)
+    {
+      rtx whole_jump_insn = PATTERN (PREV_INSN (PREV_INSN (table)));
+
+      /* This can be a SEQUENCE, meaning the delay-slot of the jump is
+	 filled.  */
+      rtx parallel_jump
+	= (GET_CODE (whole_jump_insn) == SEQUENCE
+	   ? PATTERN (XVECEXP (whole_jump_insn, 0, 0)) : whole_jump_insn);
+
+      asm_fprintf (stream,
+		   "\t.word %LL%d-.%s\n",
+		   CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (XVECEXP
+							(parallel_jump, 0, 0),
+							1), 2), 0)),
+		   (TARGET_PDEBUG ? "; default" : ""));
+      return;
+    }
+
+  asm_fprintf (stream,
+	       "\t.word %LL%d-%LL%d%s\n",
+	       CODE_LABEL_NUMBER (XEXP
+				  (XEXP
+				   (XEXP
+				    (XVECEXP
+				     (PATTERN
+				      (PREV_INSN
+				       (PREV_INSN (table))), 0, 0), 1),
+				    2), 0)),
+	       num,
+	       (TARGET_PDEBUG ? "; default" : ""));
+}
+
+/* TARGET_HANDLE_OPTION worker.  We just store the values into local
+   variables here.  Checks for correct semantics are in
+   cris_option_override.  */
+
+static bool
+cris_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
+		    int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_metrax100:
+      target_flags
+	|= (MASK_SVINTO
+	    + MASK_ETRAX4_ADD
+	    + MASK_ALIGN_BY_32);
+      break;
+
+    case OPT_mno_etrax100:
+      target_flags
+	&= ~(MASK_SVINTO
+	     + MASK_ETRAX4_ADD
+	     + MASK_ALIGN_BY_32);
+      break;
+
+    case OPT_m32_bit:
+    case OPT_m32bit:
+      target_flags
+	|= (MASK_STACK_ALIGN
+	    + MASK_CONST_ALIGN
+	    + MASK_DATA_ALIGN
+	    + MASK_ALIGN_BY_32);
+      break;
+
+    case OPT_m16_bit:
+    case OPT_m16bit:
+      target_flags
+	|= (MASK_STACK_ALIGN
+	    + MASK_CONST_ALIGN
+	    + MASK_DATA_ALIGN);
+      break;
+
+    case OPT_m8_bit:
+    case OPT_m8bit:
+      target_flags
+	&= ~(MASK_STACK_ALIGN
+	     + MASK_CONST_ALIGN
+	     + MASK_DATA_ALIGN);
+      break;
+
+    default:
+      break;
+    }
+
+  CRIS_SUBTARGET_HANDLE_OPTION(code, arg, value);
+
+  return true;
+}
+
+/* The TARGET_OPTION_OVERRIDE worker.
+   As is the norm, this also parses -mfoo=bar type parameters.  */
+
+static void
+cris_option_override (void)
+{
+  if (cris_max_stackframe_str)
+    {
+      cris_max_stackframe = atoi (cris_max_stackframe_str);
+
+      /* Do some sanity checking.  */
+      if (cris_max_stackframe < 0 || cris_max_stackframe > 0x20000000)
+	internal_error ("-max-stackframe=%d is not usable, not between 0 and %d",
+			cris_max_stackframe, 0x20000000);
+    }
+
+  /* Let "-metrax4" and "-metrax100" change the cpu version.  */
+  if (TARGET_SVINTO && cris_cpu_version < CRIS_CPU_SVINTO)
+    cris_cpu_version = CRIS_CPU_SVINTO;
+  else if (TARGET_ETRAX4_ADD && cris_cpu_version < CRIS_CPU_ETRAX4)
+    cris_cpu_version = CRIS_CPU_ETRAX4;
+
+  /* Parse -march=... and its synonym, the deprecated -mcpu=...  */
+  if (cris_cpu_str)
+    {
+      cris_cpu_version
+	= (*cris_cpu_str == 'v' ? atoi (cris_cpu_str + 1) : -1);
+
+      if (strcmp ("etrax4", cris_cpu_str) == 0)
+	cris_cpu_version = 3;
+
+      if (strcmp ("svinto", cris_cpu_str) == 0
+	  || strcmp ("etrax100", cris_cpu_str) == 0)
+	cris_cpu_version = 8;
+
+      if (strcmp ("ng", cris_cpu_str) == 0
+	  || strcmp ("etrax100lx", cris_cpu_str) == 0)
+	cris_cpu_version = 10;
+
+      if (cris_cpu_version < 0 || cris_cpu_version > 32)
+	error ("unknown CRIS version specification in -march= or -mcpu= : %s",
+	       cris_cpu_str);
+
+      /* Set the target flags.  */
+      if (cris_cpu_version >= CRIS_CPU_ETRAX4)
+	target_flags |= MASK_ETRAX4_ADD;
+
+      /* If this is Svinto or higher, align for 32 bit accesses.  */
+      if (cris_cpu_version >= CRIS_CPU_SVINTO)
+	target_flags
+	  |= (MASK_SVINTO | MASK_ALIGN_BY_32
+	      | MASK_STACK_ALIGN | MASK_CONST_ALIGN
+	      | MASK_DATA_ALIGN);
+
+      /* Note that we do not add new flags when it can be completely
+	 described with a macro that uses -mcpu=X.  So
+	 TARGET_HAS_MUL_INSNS is (cris_cpu_version >= CRIS_CPU_NG).  */
+    }
+
+  if (cris_tune_str)
+    {
+      int cris_tune
+	= (*cris_tune_str == 'v' ? atoi (cris_tune_str + 1) : -1);
+
+      if (strcmp ("etrax4", cris_tune_str) == 0)
+	cris_tune = 3;
+
+      if (strcmp ("svinto", cris_tune_str) == 0
+	  || strcmp ("etrax100", cris_tune_str) == 0)
+	cris_tune = 8;
+
+      if (strcmp ("ng", cris_tune_str) == 0
+	  || strcmp ("etrax100lx", cris_tune_str) == 0)
+	cris_tune = 10;
+
+      if (cris_tune < 0 || cris_tune > 32)
+	error ("unknown CRIS cpu version specification in -mtune= : %s",
+	       cris_tune_str);
+
+      if (cris_tune >= CRIS_CPU_SVINTO)
+	/* We have currently nothing more to tune than alignment for
+	   memory accesses.  */
+	target_flags
+	  |= (MASK_STACK_ALIGN | MASK_CONST_ALIGN
+	      | MASK_DATA_ALIGN | MASK_ALIGN_BY_32);
+    }
+
+  if (cris_cpu_version >= CRIS_CPU_V32)
+    target_flags &= ~(MASK_SIDE_EFFECT_PREFIXES|MASK_MUL_BUG);
+
+  if (flag_pic)
+    {
+      /* Use error rather than warning, so invalid use is easily
+	 detectable.  Still change to the values we expect, to avoid
+	 further errors.  */
+      if (! TARGET_LINUX)
+	{
+	  error ("-fPIC and -fpic are not supported in this configuration");
+	  flag_pic = 0;
+	}
+
+      /* Turn off function CSE.  We need to have the addresses reach the
+	 call expanders to get PLT-marked, as they could otherwise be
+	 compared against zero directly or indirectly.  After visiting the
+	 call expanders they will then be cse:ed, as the call expanders
+	 force_reg the addresses, effectively forcing flag_no_function_cse
+	 to 0.  */
+      flag_no_function_cse = 1;
+    }
+
+  if (write_symbols == DWARF2_DEBUG && ! TARGET_ELF)
+    {
+      warning (0, "that particular -g option is invalid with -maout and -melinux");
+      write_symbols = DBX_DEBUG;
+    }
+
+  /* Set the per-function-data initializer.  */
+  init_machine_status = cris_init_machine_status;
+}
+
+/* The TARGET_ASM_OUTPUT_MI_THUNK worker.  */
+
+static void
+cris_asm_output_mi_thunk (FILE *stream,
+			  tree thunkdecl ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT delta,
+			  HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			  tree funcdecl)
+{
+  if (delta > 0)
+    fprintf (stream, "\tadd%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n",
+	     ADDITIVE_SIZE_MODIFIER (delta), delta,
+	     reg_names[CRIS_FIRST_ARG_REG]);
+  else if (delta < 0)
+    fprintf (stream, "\tsub%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n",
+	     ADDITIVE_SIZE_MODIFIER (-delta), -delta,
+	     reg_names[CRIS_FIRST_ARG_REG]);
+
+  if (flag_pic)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (funcdecl), 0), 0);
+
+      name = (* targetm.strip_name_encoding) (name);
+
+      if (TARGET_V32)
+	{
+	  fprintf (stream, "\tba ");
+	  assemble_name (stream, name);
+	  fprintf (stream, "%s\n", CRIS_PLT_PCOFFSET_SUFFIX);
+	}
+      else
+	{
+	  fprintf (stream, "add.d ");
+	  assemble_name (stream, name);
+	  fprintf (stream, "%s,$pc\n", CRIS_PLT_PCOFFSET_SUFFIX);
+	}
+    }
+  else
+    {
+      fprintf (stream, "jump ");
+      assemble_name (stream, XSTR (XEXP (DECL_RTL (funcdecl), 0), 0));
+      fprintf (stream, "\n");
+
+      if (TARGET_V32)
+	fprintf (stream, "\tnop\n");
+    }
+}
+
+/* Boilerplate emitted at start of file.
+
+   NO_APP *only at file start* means faster assembly.  It also means
+   comments are not allowed.  In some cases comments will be output
+   for debugging purposes.  Make sure they are allowed then.
+
+   We want a .file directive only if TARGET_ELF.  */
+static void
+cris_file_start (void)
+{
+  /* These expressions can vary at run time, so we cannot put
+     them into TARGET_INITIALIZER.  */
+  targetm.asm_file_start_app_off = !(TARGET_PDEBUG || flag_print_asm_name);
+  targetm.asm_file_start_file_directive = TARGET_ELF;
+
+  default_file_start ();
+}
+
+/* Rename the function calls for integer multiply and divide.  */
+static void
+cris_init_libfuncs (void)
+{
+  set_optab_libfunc (smul_optab, SImode, "__Mul");
+  set_optab_libfunc (sdiv_optab, SImode, "__Div");
+  set_optab_libfunc (udiv_optab, SImode, "__Udiv");
+  set_optab_libfunc (smod_optab, SImode, "__Mod");
+  set_optab_libfunc (umod_optab, SImode, "__Umod");
+}
+
+/* The INIT_EXPANDERS worker sets the per-function-data initializer and
+   mark functions.  */
+
+void
+cris_init_expanders (void)
+{
+  /* Nothing here at the moment.  */
+}
+
+/* Zero initialization is OK for all current fields.  */
+
+static struct machine_function *
+cris_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Split a 2 word move (DI or presumably DF) into component parts.
+   Originally a copy of gen_split_move_double in m32r.c.  */
+
+rtx
+cris_split_movdx (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  rtx val;
+
+  /* We used to have to handle (SUBREG (MEM)) here, but that should no
+     longer happen; after reload there are no SUBREGs any more, and we're
+     only called after reload.  */
+  CRIS_ASSERT (GET_CODE (dest) != SUBREG && GET_CODE (src) != SUBREG);
+
+  start_sequence ();
+  if (REG_P (dest))
+    {
+      int dregno = REGNO (dest);
+
+      /* Reg-to-reg copy.  */
+      if (REG_P (src))
+	{
+	  int sregno = REGNO (src);
+
+	  int reverse = (dregno == sregno + 1);
+
+	  /* We normally copy the low-numbered register first.  However, if
+	     the first register operand 0 is the same as the second register of
+	     operand 1, we must copy in the opposite order.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  operand_subword (src, reverse, TRUE, mode)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, !reverse, TRUE, mode),
+				  operand_subword (src, !reverse, TRUE, mode)));
+	}
+      /* Constant-to-reg copy.  */
+      else if (CONST_INT_P (src) || GET_CODE (src) == CONST_DOUBLE)
+	{
+	  rtx words[2];
+	  split_double (src, &words[0], &words[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 0, TRUE, mode),
+				  words[0]));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 1, TRUE, mode),
+				  words[1]));
+	}
+      /* Mem-to-reg copy.  */
+      else if (MEM_P (src))
+	{
+	  /* If the high-address word is used in the address, we must load it
+	     last.  Otherwise, load it first.  */
+	  rtx addr = XEXP (src, 0);
+	  int reverse
+	    = (refers_to_regno_p (dregno, dregno + 1, addr, NULL) != 0);
+
+	  /* The original code implies that we can't do
+	     move.x [rN+],rM  move.x [rN],rM+1
+	     when rN is dead, because of REG_NOTES damage.  That is
+	     consistent with what I've seen, so don't try it.
+
+             We have two different cases here; if the addr is POST_INC,
+             just pass it through, otherwise add constants.  */
+
+          if (GET_CODE (addr) == POST_INC)
+	    {
+	      rtx mem;
+	      rtx insn;
+
+	      /* Whenever we emit insns with post-incremented
+		 addresses ourselves, we must add a post-inc note
+		 manually.  */
+	      mem = change_address (src, SImode, addr);
+	      insn
+		= gen_rtx_SET (VOIDmode,
+			       operand_subword (dest, 0, TRUE, mode), mem);
+	      insn = emit_insn (insn);
+	      if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+		REG_NOTES (insn)
+		  = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				     REG_NOTES (insn));
+
+	      mem = copy_rtx (mem);
+	      insn
+		= gen_rtx_SET (VOIDmode,
+			       operand_subword (dest, 1, TRUE, mode), mem);
+	      insn = emit_insn (insn);
+	      if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+		REG_NOTES (insn)
+		  = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				     REG_NOTES (insn));
+	    }
+	  else
+	    {
+	      /* Make sure we don't get any other addresses with
+		 embedded postincrements.  They should be stopped in
+		 GO_IF_LEGITIMATE_ADDRESS, but we're here for your
+		 safety.  */
+	      if (side_effects_p (addr))
+		fatal_insn ("unexpected side-effects in address", addr);
+
+	      emit_insn (gen_rtx_SET
+			 (VOIDmode,
+			  operand_subword (dest, reverse, TRUE, mode),
+			  change_address
+			  (src, SImode,
+			   plus_constant (addr,
+					  reverse * UNITS_PER_WORD))));
+	      emit_insn (gen_rtx_SET
+			 (VOIDmode,
+			  operand_subword (dest, ! reverse, TRUE, mode),
+			  change_address
+			  (src, SImode,
+			   plus_constant (addr,
+					  (! reverse) *
+					  UNITS_PER_WORD))));
+	    }
+	}
+      else
+	internal_error ("unknown src");
+    }
+  /* Reg-to-mem copy or clear mem.  */
+  else if (MEM_P (dest)
+	   && (REG_P (src)
+	       || src == const0_rtx
+	       || src == CONST0_RTX (DFmode)))
+    {
+      rtx addr = XEXP (dest, 0);
+
+      if (GET_CODE (addr) == POST_INC)
+	{
+	  rtx mem;
+	  rtx insn;
+
+	  /* Whenever we emit insns with post-incremented addresses
+	     ourselves, we must add a post-inc note manually.  */
+	  mem = change_address (dest, SImode, addr);
+	  insn
+	    = gen_rtx_SET (VOIDmode,
+			   mem, operand_subword (src, 0, TRUE, mode));
+	  insn = emit_insn (insn);
+	  if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+	    REG_NOTES (insn)
+	      = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				 REG_NOTES (insn));
+
+	  mem = copy_rtx (mem);
+	  insn
+	    = gen_rtx_SET (VOIDmode,
+			   mem,
+			   operand_subword (src, 1, TRUE, mode));
+	  insn = emit_insn (insn);
+	  if (GET_CODE (XEXP (mem, 0)) == POST_INC)
+	    REG_NOTES (insn)
+	      = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0),
+				 REG_NOTES (insn));
+	}
+      else
+	{
+	  /* Make sure we don't get any other addresses with embedded
+	     postincrements.  They should be stopped in
+	     GO_IF_LEGITIMATE_ADDRESS, but we're here for your safety.  */
+	  if (side_effects_p (addr))
+	    fatal_insn ("unexpected side-effects in address", addr);
+
+	  emit_insn (gen_rtx_SET
+		     (VOIDmode,
+		      change_address (dest, SImode, addr),
+		      operand_subword (src, 0, TRUE, mode)));
+
+	  emit_insn (gen_rtx_SET
+		     (VOIDmode,
+		      change_address (dest, SImode,
+				      plus_constant (addr,
+						     UNITS_PER_WORD)),
+		      operand_subword (src, 1, TRUE, mode)));
+	}
+    }
+
+  else
+    internal_error ("unknown dest");
+
+  val = get_insns ();
+  end_sequence ();
+  return val;
+}
+
+/* The expander for the prologue pattern name.  */
+
+void
+cris_expand_prologue (void)
+{
+  int regno;
+  int size = get_frame_size ();
+  /* Shorten the used name for readability.  */
+  int cfoa_size = crtl->outgoing_args_size;
+  int last_movem_reg = -1;
+  int framesize = 0;
+  rtx mem, insn;
+  int return_address_on_stack = cris_return_address_on_stack ();
+  int got_really_used = false;
+  int n_movem_regs = 0;
+  int pretend = crtl->args.pretend_args_size;
+
+  /* Don't do anything if no prologues or epilogues are wanted.  */
+  if (!TARGET_PROLOGUE_EPILOGUE)
+    return;
+
+  CRIS_ASSERT (size >= 0);
+
+  if (crtl->uses_pic_offset_table)
+    {
+      /* A reference may have been optimized out (like the abort () in
+	 fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that
+	 it's still used.  */
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* Align the size to what's best for the CPU model.  */
+  if (TARGET_STACK_ALIGN)
+    size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1;
+
+  if (pretend)
+    {
+      /* See also cris_setup_incoming_varargs where
+	 cfun->machine->stdarg_regs is set.  There are other setters of
+	 crtl->args.pretend_args_size than stdarg handling, like
+	 for an argument passed with parts in R13 and stack.  We must
+	 not store R13 into the pretend-area for that case, as GCC does
+	 that itself.  "Our" store would be marked as redundant and GCC
+	 will attempt to remove it, which will then be flagged as an
+	 internal error; trying to remove a frame-related insn.  */
+      int stdarg_regs = cfun->machine->stdarg_regs;
+
+      framesize += pretend;
+
+      for (regno = CRIS_FIRST_ARG_REG + CRIS_MAX_ARGS_IN_REGS - 1;
+	   stdarg_regs > 0;
+	   regno--, pretend -= 4, stdarg_regs--)
+	{
+	  insn = emit_insn (gen_rtx_SET (VOIDmode,
+					 stack_pointer_rtx,
+					 plus_constant (stack_pointer_rtx,
+							-4)));
+	  /* FIXME: When dwarf2 frame output and unless asynchronous
+	     exceptions, make dwarf2 bundle together all stack
+	     adjustments like it does for registers between stack
+	     adjustments.  */
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+	  insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno));
+
+	  /* Note the absence of RTX_FRAME_RELATED_P on the above insn:
+	     the value isn't restored, so we don't want to tell dwarf2
+	     that it's been stored to stack, else EH handling info would
+	     get confused.  */
+	}
+
+      /* For other setters of crtl->args.pretend_args_size, we
+	 just adjust the stack by leaving the remaining size in
+	 "pretend", handled below.  */
+    }
+
+  /* Save SRP if not a leaf function.  */
+  if (return_address_on_stack)
+    {
+      insn = emit_insn (gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     plus_constant (stack_pointer_rtx,
+						    -4 - pretend)));
+      pretend = 0;
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      framesize += 4;
+    }
+
+  /* Set up the frame pointer, if needed.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_insn (gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     plus_constant (stack_pointer_rtx,
+						    -4 - pretend)));
+      pretend = 0;
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn = emit_move_insn (mem, frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      framesize += 4;
+    }
+
+  /* Between frame-pointer and saved registers lie the area for local
+     variables.  If we get here with "pretended" size remaining, count
+     it into the general stack size.  */
+  size += pretend;
+
+  /* Get a contiguous sequence of registers, starting with R0, that need
+     to be saved.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+	{
+	  n_movem_regs++;
+
+	  /* Check if movem may be used for registers so far.  */
+	  if (regno == last_movem_reg + 1)
+	    /* Yes, update next expected register.  */
+	    last_movem_reg = regno;
+	  else
+	    {
+	      /* We cannot use movem for all registers.  We have to flush
+		 any movem:ed registers we got so far.  */
+	      if (last_movem_reg != -1)
+		{
+		  int n_saved
+		    = (n_movem_regs == 1) ? 1 : last_movem_reg + 1;
+
+		  /* It is a win to use a side-effect assignment for
+		     64 <= size <= 128.  But side-effect on movem was
+		     not usable for CRIS v0..3.  Also only do it if
+		     side-effects insns are allowed.  */
+		  if ((last_movem_reg + 1) * 4 + size >= 64
+		      && (last_movem_reg + 1) * 4 + size <= 128
+		      && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1)
+		      && TARGET_SIDE_EFFECT_PREFIXES)
+		    {
+		      mem
+			= gen_rtx_MEM (SImode,
+				       plus_constant (stack_pointer_rtx,
+						      -(n_saved * 4 + size)));
+		      set_mem_alias_set (mem, get_frame_alias_set ());
+		      insn
+			= cris_emit_movem_store (mem, GEN_INT (n_saved),
+						 -(n_saved * 4 + size),
+						 true);
+		    }
+		  else
+		    {
+		      insn
+			= gen_rtx_SET (VOIDmode,
+				       stack_pointer_rtx,
+				       plus_constant (stack_pointer_rtx,
+						      -(n_saved * 4 + size)));
+		      insn = emit_insn (insn);
+		      RTX_FRAME_RELATED_P (insn) = 1;
+
+		      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+		      set_mem_alias_set (mem, get_frame_alias_set ());
+		      insn = cris_emit_movem_store (mem, GEN_INT (n_saved),
+						    0, true);
+		    }
+
+		  framesize += n_saved * 4 + size;
+		  last_movem_reg = -1;
+		  size = 0;
+		}
+
+	      insn = emit_insn (gen_rtx_SET (VOIDmode,
+					     stack_pointer_rtx,
+					     plus_constant (stack_pointer_rtx,
+							    -4 - size)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	      set_mem_alias_set (mem, get_frame_alias_set ());
+	      insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      framesize += 4 + size;
+	      size = 0;
+	    }
+	}
+    }
+
+  /* Check after, if we could movem all registers.  This is the normal case.  */
+  if (last_movem_reg != -1)
+    {
+      int n_saved
+	= (n_movem_regs == 1) ? 1 : last_movem_reg + 1;
+
+      /* Side-effect on movem was not usable for CRIS v0..3.  Also only
+	 do it if side-effects insns are allowed.  */
+      if ((last_movem_reg + 1) * 4 + size >= 64
+	  && (last_movem_reg + 1) * 4 + size <= 128
+	  && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1)
+	  && TARGET_SIDE_EFFECT_PREFIXES)
+	{
+	  mem
+	    = gen_rtx_MEM (SImode,
+			   plus_constant (stack_pointer_rtx,
+					  -(n_saved * 4 + size)));
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = cris_emit_movem_store (mem, GEN_INT (n_saved),
+					-(n_saved * 4 + size), true);
+	}
+      else
+	{
+	  insn
+	    = gen_rtx_SET (VOIDmode,
+			   stack_pointer_rtx,
+			   plus_constant (stack_pointer_rtx,
+					  -(n_saved * 4 + size)));
+	  insn = emit_insn (insn);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  mem = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = cris_emit_movem_store (mem, GEN_INT (n_saved), 0, true);
+	}
+
+      framesize += n_saved * 4 + size;
+      /* We have to put outgoing argument space after regs.  */
+      if (cfoa_size)
+	{
+	  insn = emit_insn (gen_rtx_SET (VOIDmode,
+					 stack_pointer_rtx,
+					 plus_constant (stack_pointer_rtx,
+							-cfoa_size)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  framesize += cfoa_size;
+	}
+    }
+  else if ((size + cfoa_size) > 0)
+    {
+      insn = emit_insn (gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     plus_constant (stack_pointer_rtx,
+						    -(cfoa_size + size))));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      framesize += size + cfoa_size;
+    }
+
+  /* Set up the PIC register, if it is used.  */
+  if (got_really_used)
+    {
+      rtx got
+	= gen_rtx_UNSPEC (SImode, gen_rtvec (1, const0_rtx), CRIS_UNSPEC_GOT);
+      emit_move_insn (pic_offset_table_rtx, got);
+
+      /* FIXME: This is a cover-up for flow2 messing up; it doesn't
+	 follow exceptional paths and tries to delete the GOT load as
+	 unused, if it isn't used on the non-exceptional paths.  Other
+	 ports have similar or other cover-ups, or plain bugs marking
+	 the GOT register load as maybe-dead.  To see this, remove the
+	 line below and try libsupc++/vec.cc or a trivial
+	 "static void y (); void x () {try {y ();} catch (...) {}}".  */
+      emit_use (pic_offset_table_rtx);
+    }
+
+  if (cris_max_stackframe && framesize > cris_max_stackframe)
+    warning (0, "stackframe too big: %d bytes", framesize);
+}
+
+/* The expander for the epilogue pattern.  */
+
+void
+cris_expand_epilogue (void)
+{
+  int regno;
+  int size = get_frame_size ();
+  int last_movem_reg = -1;
+  int argspace_offset = crtl->outgoing_args_size;
+  int pretend =	 crtl->args.pretend_args_size;
+  rtx mem;
+  bool return_address_on_stack = cris_return_address_on_stack ();
+  /* A reference may have been optimized out
+     (like the abort () in fde_split in unwind-dw2-fde.c, at least 3.2.1)
+     so check that it's still used.  */
+  int got_really_used = false;
+  int n_movem_regs = 0;
+
+  if (!TARGET_PROLOGUE_EPILOGUE)
+    return;
+
+  if (crtl->uses_pic_offset_table)
+    {
+      /* A reference may have been optimized out (like the abort () in
+	 fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that
+	 it's still used.  */
+      push_topmost_sequence ();
+      got_really_used
+	= reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX);
+      pop_topmost_sequence ();
+    }
+
+  /* Align byte count of stack frame.  */
+  if (TARGET_STACK_ALIGN)
+    size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1;
+
+  /* Check how many saved regs we can movem.  They start at r0 and must
+     be contiguous.  */
+  for (regno = 0;
+       regno < FIRST_PSEUDO_REGISTER;
+       regno++)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      {
+	n_movem_regs++;
+
+	if (regno == last_movem_reg + 1)
+	  last_movem_reg = regno;
+	else
+	  break;
+      }
+
+  /* If there was only one register that really needed to be saved
+     through movem, don't use movem.  */
+  if (n_movem_regs == 1)
+    last_movem_reg = -1;
+
+  /* Now emit "normal" move insns for all regs higher than the movem
+     regs.  */
+  for (regno = FIRST_PSEUDO_REGISTER - 1;
+       regno > last_movem_reg;
+       regno--)
+    if (cris_reg_saved_in_regsave_area (regno, got_really_used))
+      {
+	rtx insn;
+
+	if (argspace_offset)
+	  {
+	    /* There is an area for outgoing parameters located before
+	       the saved registers.  We have to adjust for that.  */
+	    emit_insn (gen_rtx_SET (VOIDmode,
+				    stack_pointer_rtx,
+				    plus_constant (stack_pointer_rtx,
+						   argspace_offset)));
+	    /* Make sure we only do this once.  */
+	    argspace_offset = 0;
+	  }
+
+	mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode,
+						     stack_pointer_rtx));
+	set_mem_alias_set (mem, get_frame_alias_set ());
+	insn = emit_move_insn (gen_rtx_raw_REG (SImode, regno), mem);
+
+	/* Whenever we emit insns with post-incremented addresses
+	   ourselves, we must add a post-inc note manually.  */
+	REG_NOTES (insn)
+	  = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+      }
+
+  /* If we have any movem-restore, do it now.  */
+  if (last_movem_reg != -1)
+    {
+      rtx insn;
+
+      if (argspace_offset)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  stack_pointer_rtx,
+				  plus_constant (stack_pointer_rtx,
+						 argspace_offset)));
+	  argspace_offset = 0;
+	}
+
+      mem = gen_rtx_MEM (SImode,
+			 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn
+	= emit_insn (cris_gen_movem_load (mem,
+					  GEN_INT (last_movem_reg + 1), 0));
+      /* Whenever we emit insns with post-incremented addresses
+	 ourselves, we must add a post-inc note manually.  */
+      if (side_effects_p (PATTERN (insn)))
+	REG_NOTES (insn)
+	  = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+    }
+
+  /* If we don't clobber all of the allocated stack area (we've already
+     deallocated saved registers), GCC might want to schedule loads from
+     the stack to *after* the stack-pointer restore, which introduces an
+     interrupt race condition.  This happened for the initial-value
+     SRP-restore for g++.dg/eh/registers1.C (noticed by inspection of
+     other failure for that test).  It also happened for the stack slot
+     for the return value in (one version of)
+     linux/fs/dcache.c:__d_lookup, at least with "-O2
+     -fno-omit-frame-pointer".  */
+
+  /* Restore frame pointer if necessary.  */
+  if (frame_pointer_needed)
+    {
+      rtx insn;
+
+      emit_insn (gen_cris_frame_deallocated_barrier ());
+
+      emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+      mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode,
+						   stack_pointer_rtx));
+      set_mem_alias_set (mem, get_frame_alias_set ());
+      insn = emit_move_insn (frame_pointer_rtx, mem);
+
+      /* Whenever we emit insns with post-incremented addresses
+	 ourselves, we must add a post-inc note manually.  */
+      REG_NOTES (insn)
+	= alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+    }
+  else if ((size + argspace_offset) != 0)
+    {
+      emit_insn (gen_cris_frame_deallocated_barrier ());
+
+      /* If there was no frame-pointer to restore sp from, we must
+	 explicitly deallocate local variables.  */
+
+      /* Handle space for outgoing parameters that hasn't been handled
+	 yet.  */
+      size += argspace_offset;
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      stack_pointer_rtx,
+			      plus_constant (stack_pointer_rtx, size)));
+    }
+
+  /* If this function has no pushed register parameters
+     (stdargs/varargs), and if it is not a leaf function, then we have
+     the return address on the stack.  */
+  if (return_address_on_stack && pretend == 0)
+    {
+      if (TARGET_V32 || crtl->calls_eh_return)
+	{
+	  rtx mem;
+	  rtx insn;
+	  rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM);
+	  mem = gen_rtx_MEM (SImode,
+			     gen_rtx_POST_INC (SImode,
+					       stack_pointer_rtx));
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = emit_move_insn (srpreg, mem);
+
+	  /* Whenever we emit insns with post-incremented addresses
+	     ourselves, we must add a post-inc note manually.  */
+	  REG_NOTES (insn)
+	    = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+
+	  if (crtl->calls_eh_return)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   gen_rtx_raw_REG (SImode,
+						    CRIS_STACKADJ_REG)));
+	  cris_expand_return (false);
+	}
+      else
+	cris_expand_return (true);
+
+      return;
+    }
+
+  /* If we pushed some register parameters, then adjust the stack for
+     them.  */
+  if (pretend != 0)
+    {
+      /* If SRP is stored on the way, we need to restore it first.  */
+      if (return_address_on_stack)
+	{
+	  rtx mem;
+	  rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM);
+	  rtx insn;
+
+	  mem = gen_rtx_MEM (SImode,
+			     gen_rtx_POST_INC (SImode,
+					       stack_pointer_rtx));
+	  set_mem_alias_set (mem, get_frame_alias_set ());
+	  insn = emit_move_insn (srpreg, mem);
+
+	  /* Whenever we emit insns with post-incremented addresses
+	     ourselves, we must add a post-inc note manually.  */
+	  REG_NOTES (insn)
+	    = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn));
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      stack_pointer_rtx,
+			      plus_constant (stack_pointer_rtx, pretend)));
+    }
+
+  /* Perform the "physical" unwinding that the EH machinery calculated.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   gen_rtx_raw_REG (SImode,
+					    CRIS_STACKADJ_REG)));
+  cris_expand_return (false);
+}
+
+/* Worker function for generating movem from mem for load_multiple.  */
+
+rtx
+cris_gen_movem_load (rtx src, rtx nregs_rtx, int nprefix)
+{
+  int nregs = INTVAL (nregs_rtx);
+  rtvec vec;
+  int eltno = 1;
+  int i;
+  rtx srcreg = XEXP (src, 0);
+  unsigned int regno = nregs - 1;
+  int regno_inc = -1;
+
+  if (TARGET_V32)
+    {
+      regno = 0;
+      regno_inc = 1;
+    }
+
+  if (GET_CODE (srcreg) == POST_INC)
+    srcreg = XEXP (srcreg, 0);
+
+  CRIS_ASSERT (REG_P (srcreg));
+
+  /* Don't use movem for just one insn.  The insns are equivalent except
+     for the pipeline hazard (on v32); movem does not forward the loaded
+     registers so there's a three cycles penalty for their use.  */
+  if (nregs == 1)
+    return gen_movsi (gen_rtx_REG (SImode, 0), src);
+
+  vec = rtvec_alloc (nprefix + nregs
+		     + (GET_CODE (XEXP (src, 0)) == POST_INC));
+
+  if (GET_CODE (XEXP (src, 0)) == POST_INC)
+    {
+      RTVEC_ELT (vec, nprefix + 1)
+	= gen_rtx_SET (VOIDmode, srcreg, plus_constant (srcreg, nregs * 4));
+      eltno++;
+    }
+
+  src = replace_equiv_address (src, srcreg);
+  RTVEC_ELT (vec, nprefix)
+    = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno), src);
+  regno += regno_inc;
+
+  for (i = 1; i < nregs; i++, eltno++)
+    {
+      RTVEC_ELT (vec, nprefix + eltno)
+	= gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno),
+		       adjust_address_nv (src, SImode, i * 4));
+      regno += regno_inc;
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, vec);
+}
+
+/* Worker function for generating movem to mem.  If FRAME_RELATED, notes
+   are added that the dwarf2 machinery understands.  */
+
+rtx
+cris_emit_movem_store (rtx dest, rtx nregs_rtx, int increment,
+		       bool frame_related)
+{
+  int nregs = INTVAL (nregs_rtx);
+  rtvec vec;
+  int eltno = 1;
+  int i;
+  rtx insn;
+  rtx destreg = XEXP (dest, 0);
+  unsigned int regno = nregs - 1;
+  int regno_inc = -1;
+
+  if (TARGET_V32)
+    {
+      regno = 0;
+      regno_inc = 1;
+    }
+
+  if (GET_CODE (destreg) == POST_INC)
+    increment += nregs * 4;
+
+  if (GET_CODE (destreg) == POST_INC || GET_CODE (destreg) == PLUS)
+    destreg = XEXP (destreg, 0);
+
+  CRIS_ASSERT (REG_P (destreg));
+
+  /* Don't use movem for just one insn.  The insns are equivalent except
+     for the pipeline hazard (on v32); movem does not forward the loaded
+     registers so there's a three cycles penalty for use.  */
+  if (nregs == 1)
+    {
+      rtx mov = gen_rtx_SET (VOIDmode, dest, gen_rtx_REG (SImode, 0));
+
+      if (increment == 0)
+	{
+	  insn = emit_insn (mov);
+	  if (frame_related)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  return insn;
+	}
+
+      /* If there was a request for a side-effect, create the ordinary
+         parallel.  */
+      vec = rtvec_alloc (2);
+
+      RTVEC_ELT (vec, 0) = mov;
+      RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, destreg,
+					plus_constant (destreg, increment));
+      if (frame_related)
+	{
+	  RTX_FRAME_RELATED_P (mov) = 1;
+	  RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1;
+	}
+    }
+  else
+    {
+      vec = rtvec_alloc (nregs + (increment != 0 ? 1 : 0));
+      RTVEC_ELT (vec, 0)
+	= gen_rtx_SET (VOIDmode,
+		       replace_equiv_address (dest,
+					      plus_constant (destreg,
+							     increment)),
+		       gen_rtx_REG (SImode, regno));
+      regno += regno_inc;
+
+      /* The dwarf2 info wants this mark on each component in a parallel
+	 that's part of the prologue (though it's optional on the first
+	 component).  */
+      if (frame_related)
+	RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 0)) = 1;
+
+      if (increment != 0)
+	{
+	  RTVEC_ELT (vec, 1)
+	    = gen_rtx_SET (VOIDmode, destreg,
+			   plus_constant (destreg,
+					  increment != 0
+					  ? increment : nregs * 4));
+	  eltno++;
+
+	  if (frame_related)
+	    RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1;
+
+	  /* Don't call adjust_address_nv on a post-incremented address if
+	     we can help it.  */
+	  if (GET_CODE (XEXP (dest, 0)) == POST_INC)
+	    dest = replace_equiv_address (dest, destreg);
+	}
+
+      for (i = 1; i < nregs; i++, eltno++)
+	{
+	  RTVEC_ELT (vec, eltno)
+	    = gen_rtx_SET (VOIDmode, adjust_address_nv (dest, SImode, i * 4),
+			   gen_rtx_REG (SImode, regno));
+	  if (frame_related)
+	    RTX_FRAME_RELATED_P (RTVEC_ELT (vec, eltno)) = 1;
+	  regno += regno_inc;
+	}
+    }
+
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+
+  /* Because dwarf2out.c handles the insns in a parallel as a sequence,
+     we need to keep the stack adjustment separate, after the
+     MEM-setters.  Else the stack-adjustment in the second component of
+     the parallel would be mishandled; the offsets for the SETs that
+     follow it would be wrong.  We prepare for this by adding a
+     REG_FRAME_RELATED_EXPR with the MEM-setting parts in a SEQUENCE
+     followed by the increment.  Note that we have FRAME_RELATED_P on
+     all the SETs, including the original stack adjustment SET in the
+     parallel.  */
+  if (frame_related)
+    {
+      if (increment != 0)
+	{
+	  rtx seq = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (nregs + 1));
+	  XVECEXP (seq, 0, 0) = copy_rtx (XVECEXP (PATTERN (insn), 0, 0));
+	  for (i = 1; i < nregs; i++)
+	    XVECEXP (seq, 0, i)
+	      = copy_rtx (XVECEXP (PATTERN (insn), 0, i + 1));
+	  XVECEXP (seq, 0, nregs) = copy_rtx (XVECEXP (PATTERN (insn), 0, 1));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, seq);
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  return insn;
+}
+
+/* Worker function for expanding the address for PIC function calls.  */
+
+void
+cris_expand_pic_call_address (rtx *opp)
+{
+  rtx op = *opp;
+
+  gcc_assert (MEM_P (op));
+  op = XEXP (op, 0);
+
+  /* It might be that code can be generated that jumps to 0 (or to a
+     specific address).  Don't die on that.  (There is a
+     testcase.)  */
+  if (CONSTANT_ADDRESS_P (op) && !CONST_INT_P (op))
+    {
+      enum cris_pic_symbol_type t = cris_pic_symbol_type_of (op);
+
+      CRIS_ASSERT (can_create_pseudo_p ());
+
+      /* For local symbols (non-PLT), just get the plain symbol
+	 reference into a register.  For symbols that can be PLT, make
+	 them PLT.  */
+      if (t == cris_rel_symbol)
+	{
+	  /* For v32, we're fine as-is; just PICify the symbol.  Forcing
+	     into a register caused performance regression for 3.2.1,
+	     observable in __floatdidf and elsewhere in libgcc.  */
+	  if (TARGET_V32)
+	    {
+	      rtx sym = GET_CODE (op) != CONST ? op : get_related_value (op);
+	      HOST_WIDE_INT offs = get_integer_term (op);
+
+	      /* We can't get calls to sym+N, N integer, can we?  */
+	      gcc_assert (offs == 0);
+
+	      op = gen_rtx_CONST (Pmode,
+				  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym),
+						  CRIS_UNSPEC_PCREL));
+	    }
+	  else
+	    op = force_reg (Pmode, op);
+	}
+      else if (t == cris_got_symbol)
+	{
+	  if (TARGET_AVOID_GOTPLT)
+	    {
+	      /* Change a "jsr sym" into (allocate register rM, rO)
+		 "move.d (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_GOTREL)),rM"
+		 "add.d rPIC,rM,rO", "jsr rO" for pre-v32 and
+		 "jsr (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_PCREL))"
+		 for v32.  */
+	      rtx tem, rm, ro;
+	      gcc_assert (can_create_pseudo_p ());
+	      crtl->uses_pic_offset_table = 1;
+	      tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op),
+				    TARGET_V32
+				    ? CRIS_UNSPEC_PLT_PCREL
+				    : CRIS_UNSPEC_PLT_GOTREL);
+	      tem = gen_rtx_CONST (Pmode, tem);
+	      if (TARGET_V32)
+		op = tem;
+	      else
+		{
+		  rm = gen_reg_rtx (Pmode);
+		  emit_move_insn (rm, tem);
+		  ro = gen_reg_rtx (Pmode);
+		  if (expand_binop (Pmode, add_optab, rm,
+				    pic_offset_table_rtx,
+				    ro, 0, OPTAB_LIB_WIDEN) != ro)
+		    internal_error ("expand_binop failed in movsi got");
+		  op = ro;
+		}
+	    }
+	  else
+	    {
+	      /* Change a "jsr sym" into (allocate register rM, rO)
+		 "move.d (const (unspec [sym] CRIS_UNSPEC_PLTGOTREAD)),rM"
+		 "add.d rPIC,rM,rO" "jsr [rO]" with the memory access
+		 marked as not trapping and not aliasing.  No "move.d
+		 [rO],rP" as that would invite to re-use of a value
+		 that should not be reused.  FIXME: Need a peephole2
+		 for cases when this is cse:d from the call, to change
+		 back to just get the PLT entry address, so we don't
+		 resolve the same symbol over and over (the memory
+		 access of the PLTGOT isn't constant).  */
+	      rtx tem, mem, rm, ro;
+
+	      gcc_assert (can_create_pseudo_p ());
+	      crtl->uses_pic_offset_table = 1;
+	      tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op),
+				    CRIS_UNSPEC_PLTGOTREAD);
+	      rm = gen_reg_rtx (Pmode);
+	      emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+	      ro = gen_reg_rtx (Pmode);
+	      if (expand_binop (Pmode, add_optab, rm,
+				pic_offset_table_rtx,
+				ro, 0, OPTAB_LIB_WIDEN) != ro)
+		internal_error ("expand_binop failed in movsi got");
+	      mem = gen_rtx_MEM (Pmode, ro);
+
+	      /* This MEM doesn't alias anything.  Whether it aliases
+		 other same symbols is unimportant.  */
+	      set_mem_alias_set (mem, new_alias_set ());
+	      MEM_NOTRAP_P (mem) = 1;
+	      op = mem;
+	    }
+	}
+      else
+	/* Can't possibly get a GOT-needing-fixup for a function-call,
+	   right?  */
+	fatal_insn ("unidentifiable call op", op);
+
+      *opp = replace_equiv_address (*opp, op);
+    }
+}
+
+/* Make sure operands are in the right order for an addsi3 insn as
+   generated by a define_split.  Nothing but REG_P as the first
+   operand is recognized by addsi3 after reload.  OPERANDS contains
+   the operands, with the first at OPERANDS[N] and the second at
+   OPERANDS[N+1].  */
+
+void
+cris_order_for_addsi3 (rtx *operands, int n)
+{
+  if (!REG_P (operands[n]))
+    {
+      rtx tem = operands[n];
+      operands[n] = operands[n + 1];
+      operands[n + 1] = tem;
+    }
+}
+
+/* Use from within code, from e.g. PRINT_OPERAND and
+   PRINT_OPERAND_ADDRESS.  Macros used in output_addr_const need to emit
+   different things depending on whether code operand or constant is
+   emitted.  */
+
+static void
+cris_output_addr_const (FILE *file, rtx x)
+{
+  in_code++;
+  output_addr_const (file, x);
+  in_code--;
+}
+
+/* Worker function for ASM_OUTPUT_SYMBOL_REF.  */
+
+void
+cris_asm_output_symbol_ref (FILE *file, rtx x)
+{
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (flag_pic && in_code > 0)
+    {
+     const char *origstr = XSTR (x, 0);
+     const char *str;
+     str = (* targetm.strip_name_encoding) (origstr);
+     assemble_name (file, str);
+
+     /* Sanity check.  */
+     if (!TARGET_V32 && !crtl->uses_pic_offset_table)
+       output_operand_lossage ("PIC register isn't set up");
+    }
+  else
+    assemble_name (file, XSTR (x, 0));
+}
+
+/* Worker function for ASM_OUTPUT_LABEL_REF.  */
+
+void
+cris_asm_output_label_ref (FILE *file, char *buf)
+{
+  if (flag_pic && in_code > 0)
+    {
+      assemble_name (file, buf);
+
+      /* Sanity check.  */
+      if (!TARGET_V32 && !crtl->uses_pic_offset_table)
+	internal_error ("emitting PIC operand, but PIC register "
+			"isn%'t set up");
+    }
+  else
+    assemble_name (file, buf);
+}
+
+/* Worker function for OUTPUT_ADDR_CONST_EXTRA.  */
+
+bool
+cris_output_addr_const_extra (FILE *file, rtx xconst)
+{
+  switch (GET_CODE (xconst))
+    {
+      rtx x;
+
+    case UNSPEC:
+      x = XVECEXP (xconst, 0, 0);
+      CRIS_ASSERT (GET_CODE (x) == SYMBOL_REF
+		   || GET_CODE (x) == LABEL_REF
+		   || GET_CODE (x) == CONST);
+      output_addr_const (file, x);
+      switch (XINT (xconst, 1))
+	{
+	case CRIS_UNSPEC_PCREL:
+	  /* We only get this with -fpic/PIC to tell it apart from an
+	     invalid symbol.  We can't tell here, but it should only
+	     be the operand of a call or movsi.  */
+	  gcc_assert (TARGET_V32 && flag_pic);
+	  break;
+
+	case CRIS_UNSPEC_PLT_PCREL:
+	  gcc_assert (TARGET_V32);
+	  fprintf (file, ":PLT");
+	  break;
+
+	case CRIS_UNSPEC_PLT_GOTREL:
+	  gcc_assert (!TARGET_V32);
+	  fprintf (file, ":PLTG");
+	  break;
+
+	case CRIS_UNSPEC_GOTREL:
+	  gcc_assert (!TARGET_V32);
+	  fprintf (file, ":GOTOFF");
+	  break;
+
+	case CRIS_UNSPEC_GOTREAD:
+	  if (flag_pic == 1)
+	    fprintf (file, ":GOT16");
+	  else
+	    fprintf (file, ":GOT");
+	  break;
+
+	case CRIS_UNSPEC_PLTGOTREAD:
+	  if (flag_pic == 1)
+	    fprintf (file, CRIS_GOTPLT_SUFFIX "16");
+	  else
+	    fprintf (file, CRIS_GOTPLT_SUFFIX);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+cris_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, CRIS_STRUCT_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+cris_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     tree type ATTRIBUTE_UNUSED,
+			     int *pretend_arg_size,
+			     int second_time)
+{
+  if (ca->regs < CRIS_MAX_ARGS_IN_REGS)
+    {
+      int stdarg_regs = CRIS_MAX_ARGS_IN_REGS - ca->regs;
+      cfun->machine->stdarg_regs = stdarg_regs;
+      *pretend_arg_size = stdarg_regs * 4;
+    }
+
+  if (TARGET_PDEBUG)
+    fprintf (asm_out_file,
+	     "\n; VA:: ANSI: %d args before, anon @ #%d, %dtime\n",
+	     ca->regs, *pretend_arg_size, second_time);
+}
+
+/* Return true if TYPE must be passed by invisible reference.
+   For cris, we pass <= 8 bytes by value, others by reference.  */
+
+static bool
+cris_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  return (targetm.calls.must_pass_in_stack (mode, type)
+	  || CRIS_FUNCTION_ARG_SIZE (mode, type) > 8);
+}
+
+/* A combination of defining TARGET_PROMOTE_FUNCTION_MODE, promoting arguments
+   and *not* defining TARGET_PROMOTE_PROTOTYPES or PROMOTE_MODE gives the
+   best code size and speed for gcc, ipps and products in gcc-2.7.2.  */
+
+enum machine_mode
+cris_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                            enum machine_mode mode,
+                            int *punsignedp ATTRIBUTE_UNUSED,
+			    const_tree fntype ATTRIBUTE_UNUSED,
+                            int for_return)
+{
+  /* Defining PROMOTE_FUNCTION_RETURN in gcc-2.7.2 uncovered bug 981110 (even
+     when modifying TARGET_FUNCTION_VALUE to return the promoted mode).
+     Maybe pointless as of now, but let's keep the old behavior.  */
+  if (for_return == 1)
+    return mode;
+  return CRIS_PROMOTED_MODE (mode, *punsignedp, type);
+} 
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static rtx
+cris_function_value(const_tree type,
+		    const_tree func ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (type), CRIS_FIRST_ARG_REG);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+static rtx
+cris_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG);
+}
+
+/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the
+   time being.  */
+
+bool
+cris_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == CRIS_FIRST_ARG_REG);
+}
+
+static int
+cris_arg_partial_bytes (CUMULATIVE_ARGS *ca, enum machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  if (ca->regs == CRIS_MAX_ARGS_IN_REGS - 1
+      && !targetm.calls.must_pass_in_stack (mode, type)
+      && CRIS_FUNCTION_ARG_SIZE (mode, type) > 4
+      && CRIS_FUNCTION_ARG_SIZE (mode, type) <= 8)
+    return UNITS_PER_WORD;
+  else
+    return 0;
+}
+
+static rtx
+cris_function_arg_1 (const CUMULATIVE_ARGS *ca,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     const_tree type ATTRIBUTE_UNUSED,
+		     bool named, bool incoming)
+{
+  if ((!incoming || named) && ca->regs < CRIS_MAX_ARGS_IN_REGS)
+    return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG + ca->regs);
+  else
+    return NULL_RTX;
+}
+
+/* Worker function for TARGET_FUNCTION_ARG.
+   The void_type_node is sent as a "closing" call.  */
+
+static rtx
+cris_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  return cris_function_arg_1 (ca, mode, type, named, false);
+}
+
+/* Worker function for TARGET_FUNCTION_INCOMING_ARG.
+
+   The differences between this and the previous, is that this one checks
+   that an argument is named, since incoming stdarg/varargs arguments are
+   pushed onto the stack, and we don't have to check against the "closing"
+   void_type_node TYPE parameter.  */
+
+static rtx
+cris_function_incoming_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  return cris_function_arg_1 (ca, mode, type, named, true);
+}
+
+/* Worker function for TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+cris_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  ca->regs += (3 + CRIS_FUNCTION_ARG_SIZE (mode, type)) / 4;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.  */
+
+static tree
+cris_md_asm_clobbers (tree outputs, tree inputs, tree in_clobbers)
+{
+  HARD_REG_SET mof_set;
+  tree clobbers;
+  tree t;
+
+  CLEAR_HARD_REG_SET (mof_set);
+  SET_HARD_REG_BIT (mof_set, CRIS_MOF_REGNUM);
+
+  /* For the time being, all asms clobber condition codes.  Revisit when
+     there's a reasonable use for inputs/outputs that mention condition
+     codes.  */
+  clobbers
+    = tree_cons (NULL_TREE,
+		 build_string (strlen (reg_names[CRIS_CC0_REGNUM]),
+			       reg_names[CRIS_CC0_REGNUM]),
+		 in_clobbers);
+
+  for (t = outputs; t != NULL; t = TREE_CHAIN (t))
+    {
+      tree val = TREE_VALUE (t);
+
+      /* The constraint letter for the singleton register class of MOF
+	 is 'h'.  If it's mentioned in the constraints, the asm is
+	 MOF-aware and adding it to the clobbers would cause it to have
+	 impossible constraints.  */
+      if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))),
+		  'h') != NULL
+	  || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE)
+	return clobbers;
+    }
+
+  for (t = inputs; t != NULL; t = TREE_CHAIN (t))
+    {
+      tree val = TREE_VALUE (t);
+
+      if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))),
+		  'h') != NULL
+	  || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE)
+	return clobbers;
+    }
+
+  return tree_cons (NULL_TREE,
+		    build_string (strlen (reg_names[CRIS_MOF_REGNUM]),
+				  reg_names[CRIS_MOF_REGNUM]),
+		    clobbers);
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.
+
+   Really only needed if the stack frame has variable length (alloca
+   or variable sized local arguments (GNU C extension).  See PR39499 and
+   PR38609 for the reason this isn't just 0.  */
+
+bool
+cris_frame_pointer_required (void)
+{
+  return !current_function_sp_is_unchanging;
+}
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
+
+   This looks too complicated, and it is.  I assigned r7 to be the
+   static chain register, but it is call-saved, so we have to save it,
+   and come back to restore it after the call, so we have to save srp...
+   Anyway, trampolines are rare enough that we can cope with this
+   somewhat lack of elegance.
+    (Do not be tempted to "straighten up" whitespace in the asms; the
+   assembler #NO_APP state mandates strict spacing).  */
+/* ??? See the i386 regparm=3 implementation that pushes the static
+   chain value to the stack in the trampoline, and uses a call-saved
+   register when called directly.  */
+
+static void
+cris_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_V32)
+    {
+      /* This normally-unused nop insn acts as an instruction to
+	 the simulator to flush its instruction cache.  None of
+	 the other instructions in the trampoline template suits
+	 as a trigger for V32.  The pc-relative addressing mode
+	 works nicely as a trigger for V10.
+	 FIXME: Have specific V32 template (possibly avoiding the
+	 use of a special instruction).  */
+      fprintf (f, "\tclearf x\n");
+      /* We have to use a register as an intermediate, choosing
+	 semi-randomly R1 (which has to not be the STATIC_CHAIN_REGNUM),
+	 so we can use it for address indirection and jsr target.  */
+      fprintf (f, "\tmove $r1,$mof\n");
+      /* +4 */
+      fprintf (f, "\tmove.d 0,$r1\n");
+      fprintf (f, "\tmove.d $%s,[$r1]\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\taddq 6,$r1\n");
+      fprintf (f, "\tmove $mof,[$r1]\n");
+      fprintf (f, "\taddq 6,$r1\n");
+      fprintf (f, "\tmove $srp,[$r1]\n");
+      /* +20 */
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      /* +26 */
+      fprintf (f, "\tmove.d 0,$r1\n");
+      fprintf (f, "\tjsr $r1\n");
+      fprintf (f, "\tsetf\n");
+      /* +36 */
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      /* +42 */
+      fprintf (f, "\tmove.d 0,$r1\n");
+      /* +48 */
+      fprintf (f, "\tmove.d 0,$r9\n");
+      fprintf (f, "\tjump $r9\n");
+      fprintf (f, "\tsetf\n");
+    }
+  else
+    {
+      fprintf (f, "\tmove.d $%s,[$pc+20]\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tmove $srp,[$pc+22]\n");
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tjsr 0\n");
+      fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tjump 0\n");
+    }
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+cris_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx tramp = XEXP (m_tramp, 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_V32)
+    {
+      mem = adjust_address (m_tramp, SImode, 6);
+      emit_move_insn (mem, plus_constant (tramp, 38));
+      mem = adjust_address (m_tramp, SImode, 22);
+      emit_move_insn (mem, chain_value);
+      mem = adjust_address (m_tramp, SImode, 28);
+      emit_move_insn (mem, fnaddr);
+    }
+  else
+    {
+      mem = adjust_address (m_tramp, SImode, 10);
+      emit_move_insn (mem, chain_value);
+      mem = adjust_address (m_tramp, SImode, 16);
+      emit_move_insn (mem, fnaddr);
+    }
+
+  /* Note that there is no need to do anything with the cache for
+     sake of a trampoline.  */
+}
+
+
+#if 0
+/* Various small functions to replace macros.  Only called from a
+   debugger.  They might collide with gcc functions or system functions,
+   so only emit them when '#if 1' above.  */
+
+enum rtx_code Get_code (rtx);
+
+enum rtx_code
+Get_code (rtx x)
+{
+  return GET_CODE (x);
+}
+
+const char *Get_mode (rtx);
+
+const char *
+Get_mode (rtx x)
+{
+  return GET_MODE_NAME (GET_MODE (x));
+}
+
+rtx Xexp (rtx, int);
+
+rtx
+Xexp (rtx x, int n)
+{
+  return XEXP (x, n);
+}
+
+rtx Xvecexp (rtx, int, int);
+
+rtx
+Xvecexp (rtx x, int n, int m)
+{
+  return XVECEXP (x, n, m);
+}
+
+int Get_rtx_len (rtx);
+
+int
+Get_rtx_len (rtx x)
+{
+  return GET_RTX_LENGTH (GET_CODE (x));
+}
+
+/* Use upper-case to distinguish from local variables that are sometimes
+   called next_insn and prev_insn.  */
+
+rtx Next_insn (rtx);
+
+rtx
+Next_insn (rtx insn)
+{
+  return NEXT_INSN (insn);
+}
+
+rtx Prev_insn (rtx);
+
+rtx
+Prev_insn (rtx insn)
+{
+  return PREV_INSN (insn);
+}
+#endif
+
+#include "gt-cris.h"
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
new file mode 100644
index 000000000..0e69e6948
--- /dev/null
+++ b/gcc/config/cris/cris.h
@@ -0,0 +1,1335 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
+   2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* After the first "Node:" comment comes all preprocessor directives and
+   attached declarations described in the info files, the "Using and
+   Porting GCC" manual (uapgcc), in the same order as found in the "Target
+   macros" section in the gcc-2.9x CVS edition of 2000-03-17.  FIXME: Not
+   really, but needs an update anyway.
+
+   There is no generic copy-of-uapgcc comment, you'll have to see uapgcc
+   for that.  If applicable, there is a CRIS-specific comment.  The order
+   of macro definitions follow the order in the manual.  Every section in
+   the manual (node in the info pages) has an introductory `Node:
+   <subchapter>' comment.  If no macros are defined for a section, only
+   the section-comment is present.  */
+
+/* Note that other header files (e.g. config/elfos.h, config/linux.h,
+   config/cris/linux.h and config/cris/aout.h) are responsible for lots of
+   settings not repeated below.  This file contains general CRIS
+   definitions and definitions for the cris-*-elf subtarget.  */
+
+/* We don't want to use gcc_assert for everything, as that can be
+   compiled out.  */
+#define CRIS_ASSERT(x) \
+ do { if (!(x)) internal_error ("CRIS-port assertion failed: " #x); } while (0)
+
+/* Replacement for REG_P since it does not match SUBREGs.  Happens for
+   testcase Axis-20000320 with gcc-2.9x.  */
+#define REG_S_P(x) \
+ (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (XEXP (x, 0))))
+
+/* Last register in main register bank r0..r15.  */
+#define CRIS_LAST_GENERAL_REGISTER 15
+
+/* Descriptions of registers used for arguments.  */
+#define CRIS_FIRST_ARG_REG 10
+#define CRIS_MAX_ARGS_IN_REGS 4
+
+/* See also *_REGNUM constants in cris.md.  */
+
+/* Most of the time, we need the index into the register-names array.
+   When passing debug-info, we need the real hardware register number.  */
+#define CRIS_CANONICAL_SRP_REGNUM (16 + 11)
+#define CRIS_CANONICAL_MOF_REGNUM (16 + 7)
+/* We have CCR in all models including v10, but that's 16 bits, so let's
+   prefer the DCCR number, which is a DMA pointer in pre-v8, so we'll
+   never clash with it for GCC purposes.  */
+#define CRIS_CANONICAL_CC0_REGNUM (16 + 13)
+
+/* When generating PIC, these suffixes are added to the names of non-local
+   functions when being output.  Contrary to other ports, we have offsets
+   relative to the GOT, not the PC.  We might implement PC-relative PLT
+   semantics later for the general case; they are used in some cases right
+   now, such as MI thunks.  */
+#define CRIS_GOTPLT_SUFFIX ":GOTPLT"
+#define CRIS_PLT_GOTOFFSET_SUFFIX ":PLTG"
+#define CRIS_PLT_PCOFFSET_SUFFIX ":PLT"
+
+#define CRIS_FUNCTION_ARG_SIZE(MODE, TYPE)	\
+  ((MODE) != BLKmode ? GET_MODE_SIZE (MODE)	\
+   : (unsigned) int_size_in_bytes (TYPE))
+
+/* Which CPU version this is.  The parsed and adjusted cris_cpu_str.  */
+extern int cris_cpu_version;
+
+/* Changing the order used to be necessary to put the fourth __make_dp
+   argument (a DImode parameter) in registers, to fit with the libfunc
+   parameter passing scheme used for intrinsic functions.  FIXME: Check
+   performance and maybe remove definition from TARGET_LIBGCC2_CFLAGS now
+   that it isn't strictly necessary.  We used to do this through
+   TARGET_LIBGCC2_CFLAGS, but that became increasingly difficult as the
+   parenthesis (that needed quoting) travels through several layers of
+   make and shell invocations.  */
+#ifdef IN_LIBGCC2
+#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c)
+#endif
+
+
+/* Node: Driver */
+
+/* Also provide canonical vN definitions when user specifies an alias.
+   Note that -melf overrides -maout.  */
+
+#define CPP_SPEC \
+ "%{mtune=*:-D__tune_%* %{mtune=v*:-D__CRIS_arch_tune=%*}\
+   %{mtune=etrax4:-D__tune_v3 -D__CRIS_arch_tune=3}\
+   %{mtune=etrax100:-D__tune_v8 -D__CRIS_arch_tune=8}\
+   %{mtune=svinto:-D__tune_v8 -D__CRIS_arch_tune=8}\
+   %{mtune=etrax100lx:-D__tune_v10 -D__CRIS_arch_tune=10}\
+   %{mtune=ng:-D__tune_v10 -D__CRIS_arch_tune=10}}\
+  %{mcpu=*:-D__arch_%* %{mcpu=v*:-D__CRIS_arch_version=%*}\
+   %{mcpu=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\
+   %{mcpu=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{mcpu=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{mcpu=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\
+   %{mcpu=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\
+  %{march=*:-D__arch_%* %{march=v*:-D__CRIS_arch_version=%*}\
+   %{march=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\
+   %{march=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{march=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\
+   %{march=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\
+   %{march=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\
+  %{metrax100:-D__arch__v8 -D__CRIS_arch_version=8}\
+  %{metrax4:-D__arch__v3 -D__CRIS_arch_version=3}\
+  %(cpp_subtarget)"
+
+/* For the cris-*-elf subtarget.  */
+
+#define CRIS_DEFAULT_TUNE "10"
+#define CRIS_ARCH_CPP_DEFAULT
+#define CRIS_DEFAULT_ASM_ARCH_OPTION ""
+
+#ifdef TARGET_CPU_DEFAULT
+#if TARGET_CPU_DEFAULT != 32 && TARGET_CPU_DEFAULT != 10
+ #error "Due to '()'; e.g. '#define TARGET_CPU_DEFAULT (10)', stringize TARGET_CPU_DEFAULT isn't useful: update manually."
+#endif
+
+#if TARGET_CPU_DEFAULT == 32
+#undef CRIS_DEFAULT_TUNE
+#define CRIS_DEFAULT_TUNE "32"
+/* To enable use of "generic" cris-axis-elf binutils, always pass the
+   architecture option to GAS.  (We don't do this for non-v32.)  */
+#undef CRIS_DEFAULT_ASM_ARCH_OPTION
+#define CRIS_DEFAULT_ASM_ARCH_OPTION "--march=v32"
+#endif
+
+#undef CRIS_ARCH_CPP_DEFAULT
+#define CRIS_ARCH_CPP_DEFAULT \
+ "%{!march=*:\
+   %{!metrax*:\
+    %{!mcpu=*:\
+     %{!mtune=*:-D__tune_v" CRIS_DEFAULT_TUNE "}\
+     -D__arch_v"CRIS_DEFAULT_TUNE\
+   " -D__CRIS_arch_version=" CRIS_DEFAULT_TUNE "}}}"
+#endif
+
+#define CRIS_CPP_SUBTARGET_SPEC \
+ "%{mbest-lib-options:\
+   %{!moverride-best-lib-options:\
+   %{!march=*:%{!metrax*:%{!mcpu=*:\
+      -D__tune_v" CRIS_DEFAULT_TUNE \
+    " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}}}}"\
+ CRIS_ARCH_CPP_DEFAULT
+
+/* Override previous definitions (linux.h).  */
+#undef CC1_SPEC
+#define CC1_SPEC \
+ "%{metrax4:-march=v3}\
+  %{metrax100:-march=v8}\
+  %(cc1_subtarget)"
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_CC1_SUBTARGET_SPEC \
+ "-melf\
+  %{mbest-lib-options:\
+   %{!moverride-best-lib-options:\
+   %{!march=*:%{!mcpu=*:-mtune=v" CRIS_DEFAULT_TUNE\
+       " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}\
+    %{!finhibit-size-directive:\
+      %{!fno-function-sections: -ffunction-sections}\
+      %{!fno-data-sections: -fdata-sections}}}}"
+
+/* This adds to CC1_SPEC.  */
+#define CC1PLUS_SPEC ""
+
+#ifdef HAVE_AS_NO_MUL_BUG_ABORT_OPTION
+#define MAYBE_AS_NO_MUL_BUG_ABORT \
+ "%{mno-mul-bug-workaround:-no-mul-bug-abort} "
+#else
+#define MAYBE_AS_NO_MUL_BUG_ABORT
+#endif
+
+/* Override previous definitions (linux.h).  */
+#undef ASM_SPEC
+#define ASM_SPEC \
+ MAYBE_AS_NO_MUL_BUG_ABORT \
+ "%(asm_subtarget)\
+ %{march=*:%{mcpu=*:%edo not specify both -march=... and -mcpu=...}}\
+ %{march=v32:--march=v32} %{mcpu=v32:--march=v32}"
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_ASM_SUBTARGET_SPEC \
+ "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
+
+/* FIXME: We should propagate the -melf option to make the criself
+   "emulation" unless a linker script is provided (-T*), but I don't know
+   how to do that if either of -Ttext, -Tdata or -Tbss is given but no
+   linker script, as is usually the case.  Leave it to the user for the
+   time being.
+
+   Note that -melf overrides -maout except that a.out-compiled libraries
+   are linked in (multilibbing).  We'd need some %s-variant that
+   checked for existence of some specific file.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{v:--verbose}\
+  %(link_subtarget)"
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_LINK_SUBTARGET_SPEC \
+ "-mcriself\
+  %{sim2:%{!T*:-Tdata 0x4000000 -Tbss 0x8000000}}\
+  %{!r:%{O2|O3: --gc-sections}}"
+
+/* Which library to get.  The simulator uses a different library for
+   the low-level syscalls (implementing the Linux syscall ABI instead
+   of direct-iron accesses).  Default everything with the stub "nosys"
+   library.  */
+/* Override previous definitions (linux.h).  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+ "%{sim*:--start-group -lc -lsyslinux --end-group}\
+  %{!sim*:%{g*:-lg}\
+    %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p} -lbsp}\
+  -lnosys"
+
+/* Linker startfile options; crt0 flavors.
+   We need to remove any previous definition (elfos.h).  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+ "%{sim*:crt1.o%s}%{!sim*:crt0.o%s}\
+  crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#define EXTRA_SPECS				\
+  {"cpp_subtarget", CRIS_CPP_SUBTARGET_SPEC},	\
+  {"cc1_subtarget", CRIS_CC1_SUBTARGET_SPEC},	\
+  {"asm_subtarget", CRIS_ASM_SUBTARGET_SPEC},	\
+  {"link_subtarget", CRIS_LINK_SUBTARGET_SPEC},	\
+  CRIS_SUBTARGET_EXTRA_SPECS
+
+#define CRIS_SUBTARGET_EXTRA_SPECS
+
+
+/* Node: Run-time Target */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("cris");		\
+      builtin_define_std ("CRIS");		\
+      builtin_define_std ("GNU_CRIS");		\
+      builtin_define ("__CRIS_ABI_version=2");	\
+      builtin_assert ("cpu=cris");		\
+      builtin_assert ("machine=cris");		\
+    }						\
+  while (0)
+
+/* Previously controlled by target_flags.  */
+#define TARGET_ELF 1
+
+/* Previously controlled by target_flags.  Note that this is *not* set
+   for -melinux.  */
+#define TARGET_LINUX 0
+
+/* For the cris-*-elf subtarget.  */
+#define CRIS_SUBTARGET_DEFAULT 0
+
+#define CRIS_CPU_BASE 0
+#define CRIS_CPU_ETRAX4 3	/* Just lz added.  */
+#define CRIS_CPU_SVINTO 8	/* Added swap, jsrc & Co., 32-bit accesses.  */
+#define CRIS_CPU_NG 10		/* Added mul[su].  */
+#define CRIS_CPU_V32 32		/* Major changes.  */
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT CRIS_CPU_BASE
+#endif
+
+/* Default target_flags if no switches specified.  */
+#ifndef TARGET_DEFAULT
+# if TARGET_CPU_DEFAULT == 32
+#  define TARGET_DEFAULT \
+ (MASK_STACK_ALIGN \
+  + MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+  + MASK_PROLOGUE_EPILOGUE)
+# else  /* 10 */
+# define TARGET_DEFAULT \
+ (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \
+  + MASK_CONST_ALIGN + MASK_DATA_ALIGN \
+  + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG)
+# endif
+#endif
+
+/* Local, providing a default for cris_cpu_version.  */
+#define CRIS_DEFAULT_CPU_VERSION TARGET_CPU_DEFAULT
+
+#define TARGET_HAS_MUL_INSNS (cris_cpu_version >= CRIS_CPU_NG)
+#define TARGET_HAS_LZ (cris_cpu_version >= CRIS_CPU_ETRAX4)
+#define TARGET_HAS_SWAP (cris_cpu_version >= CRIS_CPU_SVINTO)
+#define TARGET_V32 (cris_cpu_version >= CRIS_CPU_V32)
+
+#define CRIS_SUBTARGET_HANDLE_OPTION(x, y, z)
+
+/* Node: Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+
+#define BYTES_BIG_ENDIAN 0
+
+/* WORDS_BIG_ENDIAN is not defined in the hardware, but for consistency,
+   we use little-endianness, and we may also be able to use
+   post-increment on DImode indirect.  */
+#define WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 4
+
+#define CRIS_PROMOTED_MODE(MODE, UNSIGNEDP, TYPE) \
+ (GET_MODE_CLASS (MODE) == MODE_INT && GET_MODE_SIZE (MODE) < 4) \
+  ? SImode : MODE
+
+/* We will be using prototype promotion, so they will be 32 bit.  */
+#define PARM_BOUNDARY 32
+
+/* Stack boundary is guided by -mstack-align, -mno-stack-align,
+   -malign.
+   Old comment: (2.1: still valid in 2.7.2?)
+    Note that to make this macro affect the alignment of stack
+   locals, a fix was required, and special precautions when handling
+   the stack pointer in various other macros (TARGET_ASM_FUNCTION_PROLOGUE
+   et al) were required.  See file "function.c".  If you would just define
+   this macro, it would only affect the builtin alloca and variable
+   local data (non-ANSI, non-K&R, Gnu C extension).  */
+#define STACK_BOUNDARY \
+ (TARGET_STACK_ALIGN ? (TARGET_ALIGN_BY_32 ? 32 : 16) : 8)
+
+#define FUNCTION_BOUNDARY 16
+
+/* Do not change BIGGEST_ALIGNMENT (when optimizing), as it will affect
+   strange places, at least in 2.1.  */
+#define BIGGEST_ALIGNMENT 8
+
+/* If -m16bit,	-m16-bit, -malign or -mdata-align,
+   align everything to 16 bit.  */
+#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN)			\
+ (TARGET_DATA_ALIGN						\
+  ? (TARGET_ALIGN_BY_32						\
+     ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN)			\
+     : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN)
+
+/* Note that CONSTANT_ALIGNMENT has the effect of making gcc believe that
+   ALL references to constant stuff (in code segment, like strings) has
+   this alignment.  That is a rather rushed assumption.  Luckily we do not
+   care about the "alignment" operand to builtin memcpy (only place where
+   it counts), so it doesn't affect any bad spots.  */
+#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN)		\
+ (TARGET_CONST_ALIGN						\
+  ? (TARGET_ALIGN_BY_32						\
+     ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN)			\
+     : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN)
+
+/* FIXME: Define LOCAL_ALIGNMENT for word and dword or arrays and
+   structures (if -mstack-align=), and check that it is good.  */
+
+#define EMPTY_FIELD_BOUNDARY 8
+
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+#define STRICT_ALIGNMENT 0
+
+/* Remove any previous definition (elfos.h).
+   ??? If it wasn't for all the other stuff that affects layout of
+   structures and bit-fields, this could presumably cause incompatibility
+   with other GNU/Linux ports (i.e. elfos.h users).  */
+#undef PCC_BITFIELD_TYPE_MATTERS
+
+/* This is only used for non-scalars.  Strange stuff happens to structs
+   (FIXME: What?) if we use anything larger than largest actually used
+   datum size, so lets make it 32.  The type "long long" will still work
+   as usual.  We can still have DImode insns, but they will only be used
+   for scalar data (i.e. long long).  */
+#define MAX_FIXED_MODE_SIZE 32
+
+
+/* Node: Type Layout */
+
+/* Note that DOUBLE_TYPE_SIZE is not defined anymore, since the default
+   value gives a 64-bit double, which is what we now use.  */
+
+/* For compatibility and historical reasons, a char should be signed.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Note that WCHAR_TYPE_SIZE is used in cexp.y,
+   where TARGET_SHORT is not available.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Node: Register Basics */
+
+/*  We count all 16 non-special registers, SRP, a faked argument
+    pointer register, MOF and CCR/DCCR.  */
+#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1)
+
+/* For CRIS, these are r15 (pc) and r14 (sp). Register r8 is used as a
+   frame-pointer, but is not fixed.  SRP is not included in general
+   registers and will not be used automatically.  All other special
+   registers are fixed at the moment.  The faked argument pointer register
+   is fixed too.  */
+#define FIXED_REGISTERS \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0}
+
+/* Register r9 is used for structure-address, r10-r13 for parameters,
+   r10- for return values.  */
+#define CALL_USED_REGISTERS \
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1}
+
+/* Node: Allocation Order */
+
+/* We need this on CRIS, because call-used regs should be used first,
+   (so we don't need to push).  Else start using registers from r0 and up.
+    This preference is mainly because if we put call-used-regs from r0
+   and up, then we can't use movem to push the rest, (which have to be
+   saved if we use them, and movem has to start with r0).
+   Change here if you change which registers to use as call registers.
+
+   The actual need to explicitly prefer call-used registers improved the
+   situation a lot for 2.1, but might not actually be needed anymore.
+   Still, this order reflects what GCC should find out by itself, so it
+   probably does not hurt.
+
+   Order of preference: Call-used-regs first, then r0 and up, last fp &
+   sp & pc as fillers.
+   Call-used regs in opposite order, so they will cause less conflict if
+   a function has few args (<= 3) and it wants a scratch reg.
+    Use struct-return address first, since very few functions use
+   structure return values so it is likely to be available.  */
+#define REG_ALLOC_ORDER \
+ {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19}
+
+/* Use MOF and ACR.  Prefer ACR before any other register.  Prefer MOF
+   then SRP after saved registers.  The *after* is because they're only
+   useful for storage, not for things being computed, which is
+   apparently more common.  */
+#define REG_ALLOC_ORDER_V32 \
+ {15, 9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 16, 14, 18, 19}
+
+
+/* Node: Values in Registers */
+
+/* The VOIDmode test is so we can omit mode on anonymous insns.  FIXME:
+   Still needed in 2.9x, at least for Axis-20000319.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)	\
+ (MODE == VOIDmode \
+  ? 1 : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* CRIS permits all registers to hold all modes.  Well, except for the
+   condition-code register.  And we can't hold larger-than-register size
+   modes in the last special register that can hold a full 32 bits.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)		\
+ (((MODE) == CCmode				\
+   || (REGNO) != CRIS_CC0_REGNUM)		\
+  && (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD	\
+      || ((REGNO) != CRIS_MOF_REGNUM && (REGNO) != CRIS_ACR_REGNUM)))
+
+/* Because CCmode isn't covered by the "narrower mode" statement in
+   tm.texi, we can still say all modes are tieable despite not having an
+   always 1 HARD_REGNO_MODE_OK.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+
+/* Node: Leaf Functions */
+/* (no definitions) */
+
+/* Node: Stack Registers */
+/* (no definitions) */
+
+
+/* Node: Register Classes */
+
+/* FIXME: A separate class for the return register would make sense.
+
+   We need a separate register class to handle register allocation for
+   ACR, since it can't be used for post-increment.
+
+   It's not obvious, but having subunions of all movable-between
+   register classes does really help register allocation.  */
+enum reg_class
+  {
+    NO_REGS,
+    ACR_REGS, MOF_REGS, CC0_REGS, SPECIAL_REGS,
+    SPEC_ACR_REGS, GENNONACR_REGS,
+    SPEC_GENNONACR_REGS, GENERAL_REGS,
+    ALL_REGS,
+    LIM_REG_CLASSES
+  };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES						\
+  {"NO_REGS",							\
+   "ACR_REGS", "MOF_REGS", "CC0_REGS", "SPECIAL_REGS",		\
+   "SPEC_ACR_REGS", "GENNONACR_REGS", "SPEC_GENNONACR_REGS",	\
+   "GENERAL_REGS", "ALL_REGS"}
+
+#define CRIS_SPECIAL_REGS_CONTENTS					\
+ ((1 << CRIS_SRP_REGNUM) | (1 << CRIS_MOF_REGNUM) | (1 << CRIS_CC0_REGNUM))
+
+/* Count in the faked argument register in GENERAL_REGS.  Keep out SRP.  */
+#define REG_CLASS_CONTENTS			\
+  {						\
+   {0},						\
+   {1 << CRIS_ACR_REGNUM},			\
+   {1 << CRIS_MOF_REGNUM},			\
+   {1 << CRIS_CC0_REGNUM},			\
+   {CRIS_SPECIAL_REGS_CONTENTS},		\
+   {CRIS_SPECIAL_REGS_CONTENTS			\
+    | (1 << CRIS_ACR_REGNUM)},			\
+   {(0xffff | (1 << CRIS_AP_REGNUM))		\
+    & ~(1 << CRIS_ACR_REGNUM)},			\
+   {(0xffff | (1 << CRIS_AP_REGNUM)		\
+    | CRIS_SPECIAL_REGS_CONTENTS)		\
+    & ~(1 << CRIS_ACR_REGNUM)},			\
+   {0xffff | (1 << CRIS_AP_REGNUM)},		\
+   {0xffff | (1 << CRIS_AP_REGNUM)		\
+    | CRIS_SPECIAL_REGS_CONTENTS}		\
+  }
+
+#define REGNO_REG_CLASS(REGNO)			\
+  ((REGNO) == CRIS_ACR_REGNUM ? ACR_REGS :	\
+   (REGNO) == CRIS_MOF_REGNUM ? MOF_REGS :	\
+   (REGNO) == CRIS_CC0_REGNUM ? CC0_REGS :	\
+   (REGNO) == CRIS_SRP_REGNUM ? SPECIAL_REGS :	\
+   GENERAL_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define MODE_CODE_BASE_REG_CLASS(MODE, OCODE, ICODE)	\
+  ((OCODE) != POST_INC ? BASE_REG_CLASS : GENNONACR_REGS)
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define IRA_COVER_CLASSES { GENERAL_REGS, SPECIAL_REGS, LIM_REG_CLASSES }
+
+#define REG_CLASS_FROM_LETTER(C)		\
+  (						\
+   (C) == 'a' ? ACR_REGS :			\
+   (C) == 'b' ? GENNONACR_REGS :		\
+   (C) == 'h' ? MOF_REGS :			\
+   (C) == 'x' ? SPECIAL_REGS :			\
+   (C) == 'c' ? CC0_REGS :			\
+   NO_REGS					\
+  )
+
+/* Since it uses reg_renumber, it is safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define REGNO_OK_FOR_BASE_P(REGNO)					\
+ ((REGNO) <= CRIS_LAST_GENERAL_REGISTER					\
+  || (REGNO) == ARG_POINTER_REGNUM					\
+  || (unsigned) reg_renumber[REGNO] <= CRIS_LAST_GENERAL_REGISTER	\
+  || (unsigned) reg_renumber[REGNO] == ARG_POINTER_REGNUM)
+
+/* REGNO_OK_FOR_BASE_P seems to be obsolete wrt. this one, but not yet
+   documented as such.  */
+#define REGNO_MODE_CODE_OK_FOR_BASE_P(REGNO, MODE, OCODE, ICODE)	\
+ (REGNO_OK_FOR_BASE_P (REGNO)						\
+  && ((OCODE) != POST_INC						\
+      || !((REGNO) == CRIS_ACR_REGNUM					\
+	   || (unsigned) reg_renumber[REGNO] == CRIS_ACR_REGNUM)))
+
+/* See REGNO_OK_FOR_BASE_P.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* It seems like gcc (2.7.2 and 2.9x of 2000-03-22) may send "NO_REGS" as
+   the class for a constant (testcase: __Mul in arit.c).  To avoid forcing
+   out a constant into the constant pool, we will trap this case and
+   return something a bit more sane.  FIXME: Check if this is a bug.
+   Beware that we must not "override" classes that can be specified as
+   constraint letters, or else asm operands using them will fail when
+   they need to be reloaded.  FIXME: Investigate whether that constitutes
+   a bug.  */
+#define PREFERRED_RELOAD_CLASS(X, CLASS)	\
+ ((CLASS) != ACR_REGS				\
+  && (CLASS) != MOF_REGS			\
+  && (CLASS) != CC0_REGS			\
+  && (CLASS) != SPECIAL_REGS			\
+  ? GENERAL_REGS : (CLASS))
+
+/* We can't move special registers to and from memory in smaller than
+   word_mode.  We also can't move between special registers.  Luckily,
+   -1, as returned by true_regnum for non-sub/registers, is valid as a
+   parameter to our REGNO_REG_CLASS, returning GENERAL_REGS, so we get
+   the effect that any X that isn't a special-register is treated as
+   a non-empty intersection with GENERAL_REGS.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X)				\
+ ((((CLASS) == SPECIAL_REGS || (CLASS) == MOF_REGS)			\
+   && ((GET_MODE_SIZE (MODE) < 4 && MEM_P (X))				\
+       || !reg_classes_intersect_p (REGNO_REG_CLASS (true_regnum (X)),	\
+				    GENERAL_REGS)))			\
+   ? GENERAL_REGS : NO_REGS)
+
+/* FIXME: Fix regrename.c; it should check validity of replacements,
+   not just with a silly pass-specific macro.  We may miss some
+   opportunities, but we must stop regrename from creating acr++.  */
+#define HARD_REGNO_RENAME_OK(FROM, TO) ((TO) != CRIS_ACR_REGNUM)
+
+/* For CRIS, this is always the size of MODE in words,
+   since all registers are the same size.  To use omitted modes in
+   patterns with reload constraints, you must say the widest size
+   which is allowed for VOIDmode.
+   FIXME:  Does that still apply for gcc-2.9x?  Keep poisoned until such
+   patterns are added back.  News: 2001-03-16: Happens as early as the
+   underscore-test.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+ ((MODE) == VOIDmode							\
+  ? 1 /* + cris_fatal ("CLASS_MAX_NREGS with VOIDmode")	*/		\
+  : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* We are now out of letters; we could use ten more.  This forces us to
+   use C-code in the 'md' file.  FIXME: Use some EXTRA_CONSTRAINTS.  */
+#define CRIS_CONST_OK_FOR_LETTER_P(VALUE, C)		\
+ (							\
+  /* MOVEQ, CMPQ, ANDQ, ORQ.  */			\
+  (C) == 'I' ? (VALUE) >= -32 && (VALUE) <= 31 :	\
+  /* ADDQ, SUBQ.  */					\
+  (C) == 'J' ? (VALUE) >= 0 && (VALUE) <= 63 :		\
+  /* ASRQ, BTSTQ, LSRQ, LSLQ.  */			\
+  (C) == 'K' ? (VALUE) >= 0 && (VALUE) <= 31 :		\
+  /* A 16-bit signed number.  */			\
+  (C) == 'L' ? (VALUE) >= -32768 && (VALUE) <= 32767 :	\
+  /* The constant 0 for CLEAR.  */			\
+  (C) == 'M' ? (VALUE) == 0 :				\
+  /* A negative ADDQ or SUBQ.  */			\
+  (C) == 'N' ? (VALUE) >= -63 && (VALUE) < 0 :		\
+  /* Quickened ints, QI and HI.  */			\
+  (C) == 'O' ? (VALUE) >= 0 && (VALUE) <= 65535		\
+		&& ((VALUE) >= (65535-31)		\
+		    || ((VALUE) >= (255-31)		\
+			&& (VALUE) <= 255 )) :		\
+  /* A 16-bit number signed *or* unsigned.  */		\
+  (C) == 'P' ? (VALUE) >= -32768 && (VALUE) <= 65535 :	\
+  0)
+
+#define CONST_OK_FOR_CONSTRAINT_P(VALUE, C, S)	\
+ (						\
+  ((C) != 'K' || (S)[1] == 'c')			\
+   ? CRIS_CONST_OK_FOR_LETTER_P (VALUE, C) :	\
+  ((C) == 'K' && (S)[1] == 'p')			\
+   ? exact_log2 (VALUE) >= 0 :			\
+  0)
+
+#define CONSTRAINT_LEN(C, S) ((C) == 'K' ? 2 : DEFAULT_CONSTRAINT_LEN (C, S))
+
+/* It is really simple to make up a 0.0; it is the same as int-0 in
+   IEEE754.  */
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C)			\
+ ((C) == 'G' && ((VALUE) == CONST0_RTX (DFmode)			\
+		 || (VALUE) == CONST0_RTX (SFmode)))
+
+/* We need this on cris to distinguish delay-slottable addressing modes.  */
+#define EXTRA_CONSTRAINT(X, C)			\
+ (						\
+  /* Slottable address mode?  */		\
+  (C) == 'Q' ? EXTRA_CONSTRAINT_Q (X) :		\
+  /* Operand to BDAP or BIAP?  */		\
+  (C) == 'R' ? EXTRA_CONSTRAINT_R (X) :		\
+  /* A local PIC symbol?  */			\
+  (C) == 'S' ? EXTRA_CONSTRAINT_S (X) :		\
+  /* A three-address addressing-mode?  */	\
+  (C) == 'T' ? EXTRA_CONSTRAINT_T (X) :		\
+  /* A PLT symbol?  */				\
+  (C) == 'U' ? EXTRA_CONSTRAINT_U (X) :		\
+  0)
+
+#define EXTRA_MEMORY_CONSTRAINT(X, STR) ((X) == 'Q')
+
+#define EXTRA_CONSTRAINT_Q(X)				\
+ (							\
+  /* Just an indirect register (happens to also be	\
+     "all" slottable memory addressing modes not	\
+     covered by other constraints, i.e. '>').  */	\
+  MEM_P (X) && BASE_P (XEXP (X, 0))			\
+ )
+
+#define EXTRA_CONSTRAINT_R(X)					\
+ (								\
+  /* An operand to BDAP or BIAP:				\
+     A BIAP; r.S? */						\
+  BIAP_INDEX_P (X)						\
+  /* A [reg] or (int) [reg], maybe with post-increment.  */	\
+  || BDAP_INDEX_P (X)						\
+  || CONSTANT_INDEX_P (X)					\
+ )
+
+#define EXTRA_CONSTRAINT_T(X)						\
+ (									\
+  /* Memory three-address operand.  All are indirect-memory:  */	\
+  MEM_P (X)								\
+  && ((MEM_P (XEXP (X, 0))						\
+       /* Double indirect: [[reg]] or [[reg+]]?  */			\
+       && (BASE_OR_AUTOINCR_P (XEXP (XEXP (X, 0), 0))))			\
+      /* Just an explicit indirect reference: [const]?  */		\
+      || CONSTANT_P (XEXP (X, 0))					\
+      /* Something that is indexed; [...+...]?  */			\
+      || (GET_CODE (XEXP (X, 0)) == PLUS				\
+	  /* A BDAP constant: [reg+(8|16|32)bit offset]?  */		\
+	  && ((BASE_P (XEXP (XEXP (X, 0), 0))				\
+	       && CONSTANT_INDEX_P (XEXP (XEXP (X, 0), 1)))		\
+	      /* A BDAP register: [reg+[reg(+)].S]?  */			\
+	      || (BASE_P (XEXP (XEXP (X, 0), 0))			\
+		  && BDAP_INDEX_P(XEXP(XEXP(X, 0), 1)))			\
+	      /* Same, but with swapped arguments (no canonical		\
+		 ordering between e.g. REG and MEM as of LAST_UPDATED	\
+		 "Thu May 12 03:59:11 UTC 2005").  */			\
+	      || (BASE_P (XEXP (XEXP (X, 0), 1))			\
+		  && BDAP_INDEX_P (XEXP (XEXP (X, 0), 0)))		\
+	      /* A BIAP: [reg+reg.S] (MULT comes first).  */		\
+	      || (BASE_P (XEXP (XEXP (X, 0), 1))			\
+		  && BIAP_INDEX_P (XEXP (XEXP (X, 0), 0))))))		\
+ )
+
+/* PIC-constructs for symbols.  */
+#define EXTRA_CONSTRAINT_S(X)						\
+ (flag_pic && GET_CODE (X) == CONST && cris_valid_pic_const (X, false))
+
+#define EXTRA_CONSTRAINT_U(X)						\
+ (flag_pic								\
+  && CONSTANT_P (X)							\
+  && cris_nonmemory_operand_or_callable_symbol (X, VOIDmode))
+
+
+/* Node: Frame Layout */
+
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+
+/* It seems to be indicated in the code (at least 2.1) that this is
+   better a constant, and best 0.  */
+#define STARTING_FRAME_OFFSET 0
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \
+ cris_return_addr_rtx (COUNT, FRAMEADDR)
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, CRIS_SRP_REGNUM)
+
+/* FIXME: Any __builtin_eh_return callers must not return anything and
+   there must not be collisions with incoming parameters.  Luckily the
+   number of __builtin_eh_return callers is limited.  For now return
+   parameter registers in reverse order and hope for the best.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  (IN_RANGE ((N), 0, 3) ? (CRIS_FIRST_ARG_REG + 3 - (N)) : INVALID_REGNUM)
+
+/* Store the stack adjustment in the structure-return-address register.  */
+#define CRIS_STACKADJ_REG CRIS_STRUCT_VALUE_REGNUM
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, CRIS_STACKADJ_REG)
+
+#define EH_RETURN_HANDLER_RTX \
+  cris_return_addr_rtx (0, NULL)
+
+#define INIT_EXPANDERS cris_init_expanders ()
+
+/* FIXME: Move this to right node (it's not documented properly yet).  */
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (CRIS_SRP_REGNUM)
+
+/* FIXME: Move this to right node (it's not documented properly yet).
+   FIXME: Check what alignment we can assume regarding
+   TARGET_STACK_ALIGN and TARGET_ALIGN_BY_32.  */
+#define DWARF_CIE_DATA_ALIGNMENT -1
+
+/* If we would ever need an exact mapping between canonical register
+   number and dwarf frame register, we would either need to include all
+   registers in the gcc description (with some marked fixed of course), or
+   an inverse mapping from dwarf register to gcc register.  There is one
+   need in dwarf2out.c:expand_builtin_init_dwarf_reg_sizes.  Right now, I
+   don't see that we need exact correspondence between DWARF *frame*
+   registers and DBX_REGISTER_NUMBER, so map them onto GCC registers.  */
+#define DWARF_FRAME_REGNUM(REG) (REG)
+
+/* Node: Stack Checking */
+/* (no definitions) FIXME: Check.  */
+
+/* Node: Frame Registers */
+
+#define STACK_POINTER_REGNUM CRIS_SP_REGNUM
+
+/* Register used for frame pointer.  This is also the last of the saved
+   registers, when a frame pointer is not used.  */
+#define FRAME_POINTER_REGNUM CRIS_FP_REGNUM
+
+/* Faked register, is always eliminated.  We need it to eliminate
+   allocating stack slots for the return address and the frame pointer.  */
+#define ARG_POINTER_REGNUM CRIS_AP_REGNUM
+
+#define STATIC_CHAIN_REGNUM CRIS_STATIC_CHAIN_REGNUM
+
+
+/* Node: Elimination */
+
+#define ELIMINABLE_REGS				\
+ {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ (OFFSET) = cris_initial_elimination_offset (FROM, TO)
+
+
+/* Node: Stack Arguments */
+
+/* Since many parameters take up one register each in any case,
+   defining TARGET_PROMOTE_PROTOTYPES that always returns true would
+   seem like a good idea, but measurements indicate that a combination
+   using PROMOTE_MODE is better.  */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Node: Register Arguments */
+
+/* Contrary to what you'd believe, defining FUNCTION_ARG_CALLEE_COPIES
+   seems like a (small total) loss, at least for gcc-2.7.2 compiling and
+   running gcc-2.1 (small win in size, small loss running -- 100.1%),
+   and similarly for size for products (.1 .. .3% bloat, sometimes win).
+   Due to the empirical likeliness of making slower code, it is not
+   defined.  */
+
+/* This no longer *needs* to be a structure; but keeping it as such should
+   not hurt (and hacking the ABI is simpler).  */
+#define CUMULATIVE_ARGS struct cum_args
+struct cum_args {int regs;};
+
+/* The regs member is an integer, the number of arguments got into
+   registers so far.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+ ((CUM).regs = 0)
+
+#define FUNCTION_ARG_REGNO_P(REGNO)			\
+ ((REGNO) >= CRIS_FIRST_ARG_REG				\
+  && (REGNO) < CRIS_FIRST_ARG_REG + (CRIS_MAX_ARGS_IN_REGS))
+
+
+/* Node: Scalar Return */
+
+#define FUNCTION_VALUE_REGNO_P(N) cris_function_value_regno_p (N)
+
+
+
+/* Node: Aggregate Return */
+
+#define CRIS_STRUCT_VALUE_REGNUM ((CRIS_FIRST_ARG_REG) - 1)
+
+
+/* Node: Caller Saves */
+/* (no definitions) */
+
+/* Node: Function entry */
+
+/* See cris.c for TARGET_ASM_FUNCTION_PROLOGUE and
+   TARGET_ASM_FUNCTION_EPILOGUE.  */
+
+/* Node: Profiling */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+ error ("no FUNCTION_PROFILER for CRIS")
+
+/* FIXME: Some of the undefined macros might be mandatory.  If so, fix
+   documentation.  */
+
+
+/* Node: Trampolines */
+
+#define TRAMPOLINE_SIZE (TARGET_V32 ? 58 : 32)
+
+/* CRIS wants instructions on word-boundary.  */
+#define TRAMPOLINE_ALIGNMENT 16
+
+/* Node: Library Calls */
+
+/* If you change this, you have to check whatever libraries and systems
+   that use it.  */
+#define TARGET_EDOM 33
+
+
+/* Node: Addressing Modes */
+
+#define HAVE_POST_INCREMENT 1
+
+/* Must be a compile-time constant, so we go with the highest value
+   among all CRIS variants.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* There are helper macros defined here which are used only in
+   GO_IF_LEGITIMATE_ADDRESS.
+
+   Note that you *have to* reject invalid addressing modes for mode
+   MODE, even if it is legal for normal addressing modes.  You cannot
+   rely on the constraints to do this work.  They can only be used to
+   doublecheck your intentions.  One example is that you HAVE TO reject
+   (mem:DI (plus:SI (reg:SI x) (reg:SI y))) because for some reason
+   this cannot be reloaded.  (Which of course you can argue that gcc
+   should have done.)  FIXME:  Strange.  Check.  */
+
+/* No symbol can be used as an index (or more correct, as a base) together
+   with a register with PIC; the PIC register must be there.  */
+#define CONSTANT_INDEX_P(X) \
+ (CONSTANT_P (X) && (!flag_pic || cris_valid_pic_const (X, true)))
+
+/* True if X is a valid base register.  */
+#define BASE_P(X) \
+ (REG_P (X) && REG_OK_FOR_BASE_P (X))
+
+/* True if X is a valid base register with or without autoincrement.  */
+#define BASE_OR_AUTOINCR_P(X)				\
+ (BASE_P (X)						\
+  || (GET_CODE (X) == POST_INC				\
+      && BASE_P (XEXP (X, 0))				\
+      && REGNO (XEXP (X, 0)) != CRIS_ACR_REGNUM))
+
+/* True if X is a valid (register) index for BDAP, i.e. [Rs].S or [Rs+].S.  */
+#define BDAP_INDEX_P(X)					\
+ ((MEM_P (X) && GET_MODE (X) == SImode			\
+   && BASE_OR_AUTOINCR_P (XEXP (X, 0)))			\
+  || (GET_CODE (X) == SIGN_EXTEND			\
+      && MEM_P (XEXP (X, 0))				\
+      && (GET_MODE (XEXP (X, 0)) == HImode		\
+	  || GET_MODE (XEXP (X, 0)) == QImode)		\
+      && BASE_OR_AUTOINCR_P (XEXP (XEXP (X, 0), 0))))
+
+/* True if X is a valid (register) index for BIAP, i.e. Rd.m.  */
+#define BIAP_INDEX_P(X)				\
+ ((BASE_P (X) && REG_OK_FOR_INDEX_P (X))	\
+  || (GET_CODE (X) == MULT			\
+      && BASE_P (XEXP (X, 0))			\
+      && REG_OK_FOR_INDEX_P (XEXP (X, 0))	\
+      && CONST_INT_P (XEXP (X, 1))		\
+      && (INTVAL (XEXP (X, 1)) == 2		\
+	  || INTVAL (XEXP (X, 1)) == 4)))
+
+/* A PIC operand looks like a normal symbol here.  At output we dress it
+   in "[rPIC+symbol:GOT]" (global symbol) or "rPIC+symbol:GOTOFF" (local
+   symbol) so we exclude all addressing modes where we can't replace a
+   plain "symbol" with that.  A global PIC symbol does not fit anywhere
+   here (but is thankfully a general_operand in itself).  A local PIC
+   symbol is valid for the plain "symbol + offset" case.  */
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)			\
+ {								\
+   rtx x1, x2;							\
+   if (BASE_OR_AUTOINCR_P (X))					\
+     goto ADDR;							\
+   else if (TARGET_V32)						\
+     /* Nothing else is valid then.  */				\
+     ;								\
+   else if (CONSTANT_INDEX_P (X))				\
+     goto ADDR;							\
+   /* Indexed?  */						\
+   else if (GET_CODE (X) == PLUS)				\
+     {								\
+       x1 = XEXP (X, 0);					\
+       x2 = XEXP (X, 1);					\
+       /* BDAP o, Rd.  */					\
+       if ((BASE_P (x1) && CONSTANT_INDEX_P (x2))		\
+	   || (BASE_P (x2) && CONSTANT_INDEX_P (x1))		\
+	    /* BDAP Rs[+], Rd.  */				\
+	   || (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD		\
+	       && ((BASE_P (x1) && BDAP_INDEX_P (x2))		\
+		   || (BASE_P (x2) && BDAP_INDEX_P (x1))	\
+		   /* BIAP.m Rs, Rd */				\
+		   || (BASE_P (x1) && BIAP_INDEX_P (x2))	\
+		   || (BASE_P (x2) && BIAP_INDEX_P (x1)))))	\
+	 goto ADDR;						\
+     }								\
+   else if (MEM_P (X))						\
+     {								\
+       /* DIP (Rs).  Reject [[reg+]] and [[reg]] for		\
+	  DImode (long long).  */				\
+       if (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD		\
+	   && (BASE_P (XEXP (X, 0))				\
+	       || BASE_OR_AUTOINCR_P (XEXP (X, 0))))		\
+	 goto ADDR;						\
+     }								\
+ }
+
+#ifndef REG_OK_STRICT
+ /* Nonzero if X is a hard reg that can be used as a base reg
+    or if it is a pseudo reg.  */
+# define REG_OK_FOR_BASE_P(X)			\
+ (REGNO (X) <= CRIS_LAST_GENERAL_REGISTER	\
+  || REGNO (X) == ARG_POINTER_REGNUM		\
+  || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+#else
+ /* Nonzero if X is a hard reg that can be used as a base reg.  */
+# define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#endif
+
+#ifndef REG_OK_STRICT
+ /* Nonzero if X is a hard reg that can be used as an index
+    or if it is a pseudo reg.  */
+# define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+#else
+ /* Nonzero if X is a hard reg that can be used as an index.  */
+# define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#endif
+
+/* Fix reloads known to cause suboptimal spilling.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, INDL, WIN)	\
+  do									\
+    {									\
+      if (cris_reload_address_legitimized (X, MODE, OPNUM, TYPE, INDL))	\
+	goto WIN;							\
+    }									\
+  while (0)
+
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+
+/* Node: Condition Code */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) cris_notice_update_cc (EXP, INSN)
+
+/* FIXME: Maybe define CANONICALIZE_COMPARISON later, when playing with
+   optimizations.  It is needed; currently we do this with instruction
+   patterns and NOTICE_UPDATE_CC.  */
+
+
+/* Node: Costs */
+
+/* Regardless of the presence of delay slots, the default value of 1 for
+   BRANCH_COST is the best in the range (1, 2, 3), tested with gcc-2.7.2
+   with testcases ipps and gcc, giving smallest and fastest code.  */
+
+#define SLOW_BYTE_ACCESS 0
+
+/* This is the threshold *below* which inline move sequences of
+   word-length sizes will be emitted.  The "9" will translate to
+   (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions
+   (8 instruction sequences) or less.  */
+#define MOVE_RATIO(speed) 9
+
+
+/* Node: Sections */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#define FORCE_EH_FRAME_INFO_IN_DATA_SECTION (! TARGET_ELF)
+
+/* The jump table is immediately connected to the preceding insn.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* Node: PIC */
+
+/* Helper type.  */
+
+enum cris_pic_symbol_type
+  {
+    cris_no_symbol = 0,
+    cris_got_symbol = 1,
+    cris_rel_symbol = 2,
+    cris_got_symbol_needing_fixup = 3,
+    cris_invalid_pic_symbol = 4
+  };
+
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? CRIS_GOT_REGNUM : INVALID_REGNUM)
+
+#define LEGITIMATE_PIC_OPERAND_P(X) cris_legitimate_pic_operand (X)
+
+
+/* Node: File Framework */
+
+/* We don't want an .ident for gcc.  To avoid that but still support
+   #ident, we override ASM_OUTPUT_IDENT and, since the gcc .ident is its
+   only use besides ASM_OUTPUT_IDENT, undef IDENT_ASM_OP from elfos.h.  */
+#undef IDENT_ASM_OP
+#undef ASM_OUTPUT_IDENT
+#define ASM_OUTPUT_IDENT(FILE, NAME) \
+  fprintf (FILE, "%s\"%s\"\n", "\t.ident\t", NAME);
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Node: Data Output */
+
+#define OUTPUT_ADDR_CONST_EXTRA(STREAM, X, FAIL) \
+  do { if (!cris_output_addr_const_extra (STREAM, X)) goto FAIL; } while (0)
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) (C) == '@'
+
+/* Node: Uninitialized Data */
+
+/* Remember to round off odd values if we want data alignment,
+   since we cannot do that with an .align directive.
+
+   Using .comm causes the space not to be reserved in .bss, but by
+   tricks with the symbol type.  Not good if other tools than binutils
+   are used on the object files.  Since ".global ... .lcomm ..." works, we
+   use that.  Use .._ALIGNED_COMMON, since gcc whines when we only have
+   ..._COMMON, and we prefer to whine ourselves; BIGGEST_ALIGNMENT is not
+   the one to check.  This done for a.out only.  */
+/* FIXME: I suspect a bug in gcc with alignment.  Do not warn until
+   investigated; it mucks up the testsuite results.  */
+#define CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, LOCAL) \
+  do									\
+    {									\
+      int align_ = (ALIGN) / BITS_PER_UNIT;				\
+      if (TARGET_DATA_ALIGN && TARGET_ALIGN_BY_32 && align_ < 4)	\
+	align_ = 4;							\
+      else if (TARGET_DATA_ALIGN && align_ < 2)				\
+	align_ = 2;							\
+      /* FIXME: Do we need this?  */					\
+      else if (align_ < 1)						\
+	align_ = 1;							\
+									\
+      if (TARGET_ELF)							\
+	{								\
+	  if (LOCAL)							\
+	    {								\
+	      fprintf ((FILE), "%s", LOCAL_ASM_OP);			\
+	      assemble_name ((FILE), (NAME));				\
+	      fprintf ((FILE), "\n");					\
+	    }								\
+	  fprintf ((FILE), "%s", COMMON_ASM_OP);			\
+	  assemble_name ((FILE), (NAME));				\
+	  fprintf ((FILE), ",%u,%u\n", (int)(SIZE), align_);		\
+	}								\
+      else								\
+	{								\
+	  /* We can't tell a one-only or weak COMM from a "global	\
+	     COMM" so just make all non-locals weak.  */		\
+	  if (! (LOCAL))						\
+	    ASM_WEAKEN_LABEL (FILE, NAME);				\
+	  fputs ("\t.lcomm ", (FILE));					\
+	  assemble_name ((FILE), (NAME));				\
+	  fprintf ((FILE), ",%u\n",					\
+		   ((int)(SIZE) + (align_ - 1)) & ~(align_ - 1));	\
+	}								\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+ CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 0)
+
+#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+ CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 1)
+
+/* Node: Label Output */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+#define SUPPORTS_WEAK 1
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYM) \
+ cris_asm_output_symbol_ref (STREAM, SYM)
+
+#define ASM_OUTPUT_LABEL_REF(STREAM, BUF) \
+ cris_asm_output_label_ref (STREAM, BUF)
+
+/* Remove any previous definition (elfos.h).  */
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
+  sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long) NUM)
+
+/* Node: Initialization */
+/* (no definitions) */
+
+/* Node: Macros for Initialization */
+/* (no definitions) */
+
+/* Node: Instruction Output */
+
+#define REGISTER_NAMES					\
+ {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8",	\
+  "r9", "r10", "r11", "r12", "r13", "sp", "acr", "srp", "mof", "faked_ap", "dccr"}
+
+#define ADDITIONAL_REGISTER_NAMES \
+ {{"r14", 14}, {"r15", 15}, {"pc", 15}}
+
+/* Output an empty line to illustrate the presence of the delay slot.  */
+#define DBR_OUTPUT_SEQEND(FILE) \
+  fprintf (FILE, "\n")
+
+#define LOCAL_LABEL_PREFIX (TARGET_ELF ? "." : "")
+
+/* cppinit.c initializes a const array from this, so it must be constant,
+   can't have it different based on options.  Luckily, the prefix is
+   always allowed, so let's have it on all GCC-generated code.  Note that
+   we have this verbatim everywhere in the back-end, not using %R or %s or
+   such.  */
+#define REGISTER_PREFIX "$"
+
+/* Remove any previous definition (elfos.h).  */
+/* We use -fno-leading-underscore to remove it, when necessary.  */
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO)				\
+  fprintf (FILE,							\
+	   TARGET_V32							\
+	   ? "\tsubq 4,$sp\n\tmove $%s,[$sp]\n" : "\tpush $%s\n",	\
+	   reg_names[REGNO])
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO) \
+  fprintf (FILE, "\tmove [$sp+],$%s\n", reg_names[REGNO])
+
+
+/* Node: Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
+  do									\
+    {									\
+      if (TARGET_V32)							\
+       asm_fprintf (FILE, "\t.word %LL%d-.\n", VALUE);			\
+      else								\
+       asm_fprintf (FILE, "\t.word %LL%d-%LL%d\n", VALUE, REL);		\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  asm_fprintf (FILE, "\t.dword %LL%d\n", VALUE)
+
+/* Defined to also emit an .align in elfos.h.  We don't want that.  */
+#undef ASM_OUTPUT_CASE_LABEL
+
+/* Since the "bound" insn loads the comparison value if the compared<
+   value (register) is out of bounds (0..comparison value-1), we need
+   to output another case to catch it.
+   The way to find it is to look for the label_ref at the else-arm inside
+   the expanded casesi core-insn.
+   FIXME: Check this construct when changing to new version of gcc.  */
+#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE)				\
+  cris_asm_output_case_end (STREAM, NUM, TABLE)
+
+
+/* Node: Exception Region Output */
+/* (no definitions) */
+/* FIXME: Fill in with our own optimized layout.  */
+
+/* Node: Alignment Output */
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)  \
+ fprintf (FILE, "\t.align %d\n", (LOG))
+
+
+/* Node: All Debuggers */
+
+#define DBX_REGISTER_NUMBER(REGNO)				\
+ ((REGNO) == CRIS_SRP_REGNUM ? CRIS_CANONICAL_SRP_REGNUM :	\
+  (REGNO) == CRIS_MOF_REGNUM ? CRIS_CANONICAL_MOF_REGNUM :	\
+  (REGNO) == CRIS_CC0_REGNUM ? CRIS_CANONICAL_CC0_REGNUM :	\
+ (REGNO))
+
+/* FIXME: Investigate DEBUGGER_AUTO_OFFSET, DEBUGGER_ARG_OFFSET.  */
+
+
+/* Node: DBX Options */
+
+/* Is this correct? Check later.  */
+#define DBX_NO_XREFS
+
+#define DBX_CONTIN_LENGTH 0
+
+/* FIXME: Is this needed when we have 0 DBX_CONTIN_LENGTH?  */
+#define DBX_CONTIN_CHAR '?'
+
+
+/* Node: DBX Hooks */
+/* (no definitions) */
+
+/* Node: File names and DBX */
+/* (no definitions) */
+
+
+/* Node: SDB and DWARF */
+/* (no definitions) */
+
+/* Node: Misc */
+
+/* A combination of the bound (umin) insn together with a
+   sign-extended add via the table to PC seems optimal.
+   If the table overflows, the assembler will take care of it.
+   Theoretically, in extreme cases (uncertain if they occur), an error
+   will be emitted, so FIXME: Check how large case-tables are emitted,
+   possible add an option to emit SImode case-tables.  */
+#define CASE_VECTOR_MODE HImode
+
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* FIXME: Investigate CASE_VECTOR_SHORTEN_MODE to make sure HImode is not
+   used when broken-.word could possibly fail (plus testcase).  */
+
+#define FIXUNS_TRUNC_LIKE_FIX_TRUNC
+
+/* This is the number of bytes that can be moved in one
+   reasonably fast instruction sequence.  For CRIS, this is two
+   instructions: mem => reg, reg => mem.  */
+#define MOVE_MAX 4
+
+/* Maybe SHIFT_COUNT_TRUNCATED is safe to define?  FIXME: Check later.  */
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+
+#define Pmode SImode
+
+#define FUNCTION_MODE QImode
+
+#define NO_IMPLICIT_EXTERN_C
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
new file mode 100644
index 000000000..dd5d4940c
--- /dev/null
+++ b/gcc/config/cris/cris.md
@@ -0,0 +1,5110 @@
+;; GCC machine description for CRIS cpu cores.
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+;; 2008, 2009, 2010  Free Software Foundation, Inc.
+;; Contributed by Axis Communications.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See files "md.texi" and "rtl.def" for documentation on define_insn,
+;; match_*, et. al.
+;;
+;; The function cris_notice_update_cc in cris.c handles condition code
+;; updates for most instructions, helped by the "cc" attribute.
+
+;; There are several instructions that are orthogonal in size, and seems
+;; they could be matched by a single pattern without a specified size
+;; for the operand that is orthogonal.  However, this did not work on
+;; gcc-2.7.2 (and probably not on gcc-2.8.1), relating to that when a
+;; constant is substituted into an operand, the actual mode must be
+;; deduced from the pattern.  There is reasonable hope that that has been
+;; fixed, so FIXME: try again.
+
+;; You will notice that three-operand alternatives ("=r", "r", "!To")
+;; are marked with a "!" constraint modifier to avoid being reloaded
+;; into.  This is because gcc would otherwise prefer to use the constant
+;; pool and its offsettable address instead of reloading to an
+;; ("=r", "0", "i") alternative.  Also, the constant-pool support was not
+;; only suboptimal but also buggy in 2.7.2, ??? maybe only in 2.6.3.
+
+;; All insns that look like (set (...) (plus (...) (reg:SI 8)))
+;; get problems when reloading r8 (frame pointer) to r14 + offs (stack
+;; pointer).  Thus the instructions that get into trouble have specific
+;; checks against matching frame_pointer_rtx.
+;; ??? But it should be re-checked for gcc > 2.7.2
+;; FIXME: This changed some time ago (from 2000-03-16) for gcc-2.9x.
+
+;; FIXME: When PIC, all [rX=rY+S] could be enabled to match
+;; [rX=gotless_symbol].
+;; The movsi for a gotless symbol could be split (post reload).
+
+
+(define_constants
+  [
+   ;; PLT reference from call expansion: operand 0 is the address,
+   ;; the mode is VOIDmode.  Always wrapped in CONST.
+   ;; The value is relative to the GOT.
+   (CRIS_UNSPEC_PLT_GOTREL 0)
+
+   ;; PLT reference from call expansion: operand 0 is the address,
+   ;; the mode is VOIDmode.  Always wrapped in CONST.
+   ;; The value is relative to the PC.  It's arch-dependent whether
+   ;; the offset counts from the start or the end of the current item.
+   (CRIS_UNSPEC_PLT_PCREL 1)
+
+   ;; The address of the global offset table as a source operand.
+   (CRIS_UNSPEC_GOT 2)
+
+   ;; The offset from the global offset table to the operand.
+   (CRIS_UNSPEC_GOTREL 3)
+
+   ;; The PC-relative offset to the operand.  It's arch-dependent whether
+   ;; the offset counts from the start or the end of the current item.
+   (CRIS_UNSPEC_PCREL 4)
+
+   ;; The index into the global offset table of a symbol, while
+   ;; also generating a GOT entry for the symbol.
+   (CRIS_UNSPEC_GOTREAD 5)
+
+   ;; Similar to CRIS_UNSPEC_GOTREAD, but also generating a PLT entry.
+   (CRIS_UNSPEC_PLTGOTREAD 6)
+
+   ;; Condition for v32 casesi jump, since it needs to have if_then_else
+   ;; form with register as one branch and default label as other.
+   ;; Operand 0 is const_int 0.
+   (CRIS_UNSPEC_CASESI 7)
+
+   ;; Stack frame deallocation barrier.
+   (CRIS_UNSPEC_FRAME_DEALLOC 8)
+
+   ;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1...
+   (CRIS_UNSPEC_SWAP_BITS 9)
+  ])
+
+;; Register numbers.
+(define_constants
+  [(CRIS_GOT_REGNUM 0)
+   (CRIS_STATIC_CHAIN_REGNUM 7)
+   (CRIS_FP_REGNUM 8)
+   (CRIS_SP_REGNUM 14)
+   (CRIS_ACR_REGNUM 15)
+   (CRIS_SRP_REGNUM 16)
+   (CRIS_MOF_REGNUM 17)
+   (CRIS_AP_REGNUM 18)
+   (CRIS_CC0_REGNUM 19)]
+)
+
+;; We need an attribute to define whether an instruction can be put in
+;; a branch-delay slot or not, and whether it has a delay slot.
+;;
+;; Branches and return instructions have a delay slot, and cannot
+;; themselves be put in a delay slot.  This has changed *for short
+;; branches only* between architecture variants, but the possible win
+;; is presumed negligible compared to the added complexity of the machine
+;; description: one would have to add always-correct infrastructure to
+;; distinguish short branches.
+;;
+;; Whether an instruction can be put in a delay slot depends on the
+;; instruction (all short instructions except jumps and branches)
+;; and the addressing mode (must not be prefixed or referring to pc).
+;; In short, any "slottable" instruction must be 16 bit and not refer
+;; to pc, or alter it.
+;;
+;; The possible values are "yes", "no", "has_slot", "has_return_slot"
+;; and "has_call_slot".
+;; Yes/no tells whether the insn is slottable or not.  Has_call_slot means
+;; that the insn is a call insn, which for CRIS v32 has a delay-slot.
+;; Of special concern is that no RTX_FRAME_RELATED insn must go in that
+;; call delay slot, as it's located in the address *after* the call insn,
+;; and the unwind machinery doesn't know about delay slots.
+;; Has_slot means that the insn is a branch insn (which are
+;; not considered slottable since that is generally true).  Having the
+;; seemingly illogical value "has_slot" means we do not have to add
+;; another attribute just to say that an insn has a delay-slot, since it
+;; also infers that it is not slottable.  Better names for the attribute
+;; were found to be longer and not add readability to the machine
+;; description.
+;; Has_return_slot is similar, for the return insn.
+;;
+;; The default that is defined here for this attribute is "no", not
+;; slottable, not having a delay-slot, so there's no need to worry about
+;; it being wrong for non-branch and return instructions.
+;;  The default could depend on the kind of insn and the addressing
+;; mode, but that would need more attributes and hairier, more error
+;; prone code.
+;;
+;;  There is an extra memory constraint, 'Q', which recognizes an indirect
+;; register.  The constraints 'Q' and '>' together match all possible
+;; memory operands that are slottable.
+;;  For other operands, you need to check if it has a valid "slottable"
+;; quick-immediate operand, where the particular signedness-variation
+;; may match the constraints 'I' or 'J'.), and include it in the
+;; constraint pattern for the slottable pattern.  An alternative using
+;; only "r" constraints is most often slottable.
+
+(define_attr "slottable" "no,yes,has_slot,has_return_slot,has_call_slot"
+  (const_string "no"))
+
+;; We also need attributes to sanely determine the condition code
+;; state.  See cris_notice_update_cc for how this is used.
+
+(define_attr "cc" "none,clobber,normal,noov32,rev" (const_string "normal"))
+
+;; At the moment, this attribute is just used to help bb-reorder do its
+;; work; the default 0 doesn't help it.  Many insns have other lengths,
+;; though none are shorter.
+(define_attr "length" "" (const_int 2))
+
+;; A branch has one delay-slot.  The instruction in the
+;; delay-slot is always executed, independent of whether the branch is
+;; taken or not.  Note that besides setting "slottable" to "has_slot",
+;; there also has to be a "%#" at the end of a "delayed" instruction
+;; output pattern (for "jump" this means "ba %l0%#"), so print_operand can
+;; catch it and print a "nop" if necessary.  This method was stolen from
+;; sparc.md.
+
+(define_delay (eq_attr "slottable" "has_slot")
+  [(eq_attr "slottable" "yes") (nil) (nil)])
+
+;; We can't put prologue insns in call-insn delay-slots when
+;; DWARF2 unwind info is emitted, because the unwinder matches the
+;; address after the insn.  It must see the return address of a call at
+;; a position at least *one byte after* the insn, or it'll think that
+;; the insn hasn't been executed.  If the insn is in a delay-slot of a
+;; call, it's just *exactly* after the insn.
+
+(define_delay (eq_attr "slottable" "has_call_slot")
+  [(and (eq_attr "slottable" "yes")
+	(ior (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)")
+		 (const_int 0))
+	     (eq (symbol_ref "flag_exceptions")
+		 (const_int 0))))
+   (nil) (nil)])
+
+;; The insn in the return insn slot must not be the
+;; return-address-register restore.  FIXME: Use has_slot and express
+;; as a parallel with a use of the return-address-register (currently
+;; only SRP).  However, this requires an amount of fixing tests for
+;; naked RETURN in middle-end.
+(define_delay (eq_attr "slottable" "has_return_slot")
+  [(and (eq_attr "slottable" "yes")
+	(eq (symbol_ref "dead_or_set_regno_p (insn, CRIS_SRP_REGNUM)")
+	    (const_int 0)))
+   (nil) (nil)])
+
+
+;; Iterator definitions.
+
+;; For the "usual" pattern size alternatives.
+(define_mode_iterator BWD [SI HI QI])
+(define_mode_iterator WD [SI HI])
+(define_mode_iterator BW [HI QI])
+(define_mode_attr S [(SI "HI") (HI "QI")])
+(define_mode_attr s [(SI "hi") (HI "qi")])
+(define_mode_attr m [(SI ".d") (HI ".w") (QI ".b")])
+(define_mode_attr mm [(SI ".w") (HI ".b")])
+(define_mode_attr nbitsm1 [(SI "31") (HI "15") (QI "7")])
+
+;; For the sign_extend+zero_extend variants.
+(define_code_iterator szext [sign_extend zero_extend])
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; For the shift variants.
+(define_code_iterator shift [ashiftrt lshiftrt ashift])
+(define_code_iterator shiftrt [ashiftrt lshiftrt])
+(define_code_attr shlr [(ashiftrt "ashr") (lshiftrt "lshr") (ashift "ashl")])
+(define_code_attr slr [(ashiftrt "asr") (lshiftrt "lsr") (ashift "lsl")])
+
+(define_code_iterator ncond [eq ne gtu ltu geu leu])
+(define_code_iterator ocond [gt le])
+(define_code_iterator rcond [lt ge])
+(define_code_attr CC [(eq "eq") (ne "ne") (gt "gt") (gtu "hi") (lt "lt")
+		      (ltu "lo") (ge "ge") (geu "hs") (le "le") (leu "ls")])
+(define_code_attr rCC [(eq "ne") (ne "eq") (gt "le") (gtu "ls") (lt "ge")
+		       (ltu "hs") (ge "lt") (geu "lo") (le "gt") (leu "hi")])
+(define_code_attr oCC [(lt "mi") (ge "pl")])
+(define_code_attr roCC [(lt "pl") (ge "mi")])
+
+;; Operand and operator predicates.
+
+(include "predicates.md")
+
+;; Test insns.
+
+;; No test insns with side-effect on the mem addressing.
+;;
+;; See note on cmp-insns with side-effects (or lack of them)
+
+;; Normal named test patterns from SI on.
+
+(define_insn "*tstsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "r,Q>,m")
+		 (const_int 0)))]
+  ""
+{
+  if (which_alternative == 0 && TARGET_V32)
+    return "cmpq 0,%0";
+  return "test.d %0";
+}
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*tst<mode>_cmp"
+  [(set (cc0)
+	(compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m")
+		 (const_int 0)))]
+  "cris_cc0_user_requires_cmp (insn)"
+  "@
+   cmp<m> 0,%0
+   test<m> %0
+   test<m> %0"
+  [(set_attr "slottable" "no,yes,no")])
+
+(define_insn "*tst<mode>_non_cmp"
+  [(set (cc0)
+	(compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m")
+		 (const_int 0)))]
+  "!cris_cc0_user_requires_cmp (insn)"
+  "@
+   move<m> %0,%0
+   test<m> %0
+   test<m> %0"
+  [(set_attr "slottable" "yes,yes,no")
+   (set_attr "cc" "noov32,*,*")])
+
+;; It seems that the position of the sign-bit and the fact that 0.0 is
+;; all 0-bits would make "tstsf" a straight-forward implementation;
+;; either "test.d" it for positive/negative or "btstq 30,r" it for
+;; zeroness.
+;;
+;; FIXME: Do that some time; check next_cc0_user to determine if
+;; zero or negative is tested for.
+
+;; Compare insns.
+
+;; We could optimize the sizes of the immediate operands for various
+;; cases, but that is not worth it because of the very little usage of
+;; DImode for anything else but a structure/block-mode.  Just do the
+;; obvious stuff for the straight-forward constraint letters.
+
+(define_insn "*cmpdi_non_v32"
+  [(set (cc0)
+	(compare (match_operand:DI 0 "nonimmediate_operand" "rm,r,r,r,r,r,r,o")
+		 (match_operand:DI 1 "general_operand" "M,Kc,I,P,n,r,o,r")))]
+  "!TARGET_V32"
+  "@
+   test.d %M0\;ax\;test.d %H0
+   cmpq %1,%M0\;ax\;cmpq 0,%H0
+   cmpq %1,%M0\;ax\;cmpq -1,%H0
+   cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M0,%M1\;ax\;cmp.d %H0,%H1")
+
+(define_insn "*cmpdi_v32"
+  [(set (cc0)
+	(compare (match_operand:DI 0 "register_operand"  "r,r,r,r,r")
+		 (match_operand:DI 1 "nonmemory_operand" "Kc,I,P,n,r")))]
+  "TARGET_V32"
+  "@
+   cmpq %1,%M0\;ax\;cmpq 0,%H0
+   cmpq %1,%M0\;ax\;cmpq -1,%H0
+   cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0
+   cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0")
+
+;; Note that compare insns with side effect addressing mode (e.g.):
+;;
+;; cmp.S [rx=ry+i],rz;
+;; cmp.S [%3=%1+%2],%0
+;;
+;; are *not* usable for gcc since the reloader *does not accept*
+;; cc0-changing insns with side-effects other than setting the condition
+;; codes.  The reason is that the reload stage *may* cause another insn to
+;; be output after the main instruction, in turn invalidating cc0 for the
+;; insn using the test.  (This does not apply to the CRIS case, since a
+;; reload for output -- move to memory -- does not change the condition
+;; code.  Unfortunately we have no way to describe that at the moment.  I
+;; think code would improve being in the order of one percent faster.
+
+;; We have cmps and cmpu (compare reg w. sign/zero extended mem).
+;; These are mostly useful for compares in SImode, using 8 or 16-bit
+;; constants, but sometimes gcc will find its way to use it for other
+;; (memory) operands.  Avoid side-effect patterns, though (see above).
+
+(define_insn "*cmp_ext<mode>"
+  [(set (cc0)
+	(compare
+	 (match_operand:SI 0 "register_operand" "r,r")
+	 (match_operator:SI 2 "cris_extend_operator"
+			 [(match_operand:BW 1 "memory_operand" "Q>,m")])))]
+  ""
+  "cmp%e2<m> %1,%0"
+  [(set_attr "slottable" "yes,no")])
+
+;; Swap operands; it seems the canonical look (if any) is not enforced.
+;;
+;; FIXME: Investigate that.
+
+(define_insn "*cmp_swapext<mode>"
+  [(set (cc0)
+	(compare
+	 (match_operator:SI 2 "cris_extend_operator"
+			    [(match_operand:BW 0 "memory_operand" "Q>,m")])
+	 (match_operand:SI 1 "register_operand" "r,r")))]
+  ""
+  "cmp%e2<m> %0,%1"
+  [(set_attr "slottable" "yes,no")
+   (set_attr "cc" "rev")])
+
+;; The "normal" compare patterns, from SI on.  Special-cases with zero
+;; are covered above.
+
+(define_insn "*cmpsi"
+  [(set (cc0)
+	(compare
+	 (match_operand:SI 0 "nonimmediate_operand" "r,r,r, Q>,r,r,m")
+	 (match_operand:SI 1 "general_operand"	    "I,r,Q>,r, P,g,r")))]
+  ""
+  "@
+   cmpq %1,%0
+   cmp.d %1,%0
+   cmp.d %1,%0
+   cmp.d %0,%1
+   cmp%e1.%z1 %1,%0
+   cmp.d %1,%0
+   cmp.d %0,%1"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no,no")
+   (set_attr "cc" "normal,normal,normal,rev,normal,normal,rev")])
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:BW 0 "nonimmediate_operand" "r,r, Q>,r,m")
+		 (match_operand:BW 1 "general_operand"	    "r,Q>,r, g,r")))]
+  ""
+  "@
+   cmp<m> %1,%0
+   cmp<m> %1,%0
+   cmp<m> %0,%1
+   cmp<m> %1,%0
+   cmp<m> %0,%1"
+  [(set_attr "slottable" "yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,rev,normal,rev")])
+
+;; Pattern matching the BTST insn.
+;; It is useful for "if (i & val)" constructs, where val is an exact
+;; power of 2, or if val + 1 is a power of two, where we check for a bunch
+;; of zeros starting at bit 0).
+
+;; SImode.  This mode is the only one needed, since gcc automatically
+;; extends subregs for lower-size modes.  FIXME: Add testcase.
+(define_insn "*btst"
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI
+	  (match_operand:SI 0 "nonmemory_operand" "r, r,r, r,r, r,Kp")
+	  (match_operand:SI 1 "const_int_operand" "Kc,n,Kc,n,Kc,n,n")
+	  (match_operand:SI 2 "nonmemory_operand" "M, M,Kc,n,r, r,r"))
+	 (const_int 0)))]
+  ;; Either it is a single bit, or consecutive ones starting at 0.
+  ;; The btst ones depend on stuff in NOTICE_UPDATE_CC.
+  "CONST_INT_P (operands[1])
+   && (operands[1] == const1_rtx || operands[2] == const0_rtx)
+   && (REG_S_P (operands[0])
+       || (operands[1] == const1_rtx
+	   && REG_S_P (operands[2])
+	   && CONST_INT_P (operands[0])
+	   && exact_log2 (INTVAL (operands[0])) >= 0))
+   && !TARGET_CCINIT"
+
+;; The next-to-last "&&" condition above should be caught by some kind of
+;; canonicalization in gcc, but we can easily help with it here.
+;;  It results from expressions of the type
+;; "power_of_2_value & (1 << y)".
+;;
+;; Since there may be codes with tests in on bits (in constant position)
+;; beyond the size of a word, handle that by assuming those bits are 0.
+;; GCC should handle that, but it's a matter of easily-added belts while
+;; having suspenders.
+
+  "@
+   btstq (%1-1),%0
+   cmpq 0,%0
+   btstq %2,%0
+   clearf nz
+   btst %2,%0
+   clearf nz
+   cmpq %p0,%2"
+ [(set_attr "slottable" "yes")
+  (set_attr "cc" "noov32")])
+
+;; Move insns.
+
+;; The whole mandatory movdi family is here; expander, "anonymous"
+;; recognizer and splitter.  We're forced to have a movdi pattern,
+;; although GCC should be able to split it up itself.  Normally it can,
+;; but if other insns have DI operands (as is the case here), reload
+;; must be able to generate or match a movdi.  many testcases fail at
+;; -O3 or -fssa if we don't have this.  FIXME: Fix GCC...  See
+;; <URL:http://gcc.gnu.org/ml/gcc-patches/2000-04/msg00104.html>.
+;; However, a patch from Richard Kenner (similar to the cause of
+;; discussion at the URL above), indicates otherwise.  See
+;; <URL:http://gcc.gnu.org/ml/gcc-patches/2000-04/msg00554.html>.
+;; The truth has IMO is not been decided yet, so check from time to
+;; time by disabling the movdi patterns.
+
+;; To appease testcase gcc.c-torture/execute/920501-2.c (and others) at
+;; -O0, we need a movdi as a temporary measure.  Here's how things fail:
+;;  A cmpdi RTX needs reloading (global):
+;;    (insn 185 326 186 (set (cc0)
+;;	    (compare (mem/f:DI (reg/v:SI 22) 0)
+;;		(const_int 1 [0x1]))) 4 {cmpdi} (nil)
+;;	(nil))
+;; Now, reg 22 is reloaded for input address, and the mem is also moved
+;; out of the instruction (into a register), since one of the operands
+;; must be a register.  Reg 22 is reloaded (into reg 10), and the mem is
+;; moved out and synthesized in SImode parts (reg 9, reg 10 - should be ok
+;; wrt. overlap).  The bad things happen with the synthesis in
+;; emit_move_insn_1; the location where to substitute reg 10 is lost into
+;; two new RTX:es, both still having reg 22.  Later on, the left-over reg
+;; 22 is recognized to have an equivalent in memory which is substituted
+;; straight in, and we end up with an unrecognizable insn:
+;;    (insn 325 324 326 (set (reg:SI 9 r9)
+;;    	      (mem/f:SI (mem:SI (plus:SI (reg:SI 8 r8)
+;;    			  (const_int -84 [0xffffffac])) 0) 0)) -1 (nil)
+;;    	  (nil))
+;; which is the first part of the reloaded synthesized "movdi".
+;;  The right thing would be to add equivalent replacement locations for
+;; insn with pseudos that need more reloading.  The question is where.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && operands[1] != const0_rtx
+      && (!TARGET_V32 || (!REG_P (operands[1]) && can_create_pseudo_p ())))
+    operands[1] = copy_to_mode_reg (DImode, operands[1]);
+
+  /* Some other ports (as of 2001-09-10 for example mcore and romp) also
+     prefer to split up constants early, like this.  The testcase in
+     gcc.c-torture/execute/961213-1.c shows that CSE2 gets confused by the
+     resulting subreg sets when using the construct from mcore (as of FSF
+     CVS, version -r 1.5), and it believes that the high part (the last one
+     emitted) is the final value.  */
+  if ((CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_DOUBLE)
+      && ! reload_completed
+      && ! reload_in_progress)
+    {
+      rtx insns;
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+
+      start_sequence ();
+      emit_move_insn (operand_subword (op0, 0, 1, DImode),
+		      operand_subword (op1, 0, 1, DImode));
+      emit_move_insn (operand_subword (op0, 1, 1, DImode),
+		      operand_subword (op1, 1, 1, DImode));
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_insn_and_split "*movdi_insn_non_v32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rx,m")
+	(match_operand:DI 1 "general_operand"	   "rx,g,rxM"))]
+  "(register_operand (operands[0], DImode)
+    || register_operand (operands[1], DImode)
+    || operands[1] == const0_rtx)
+   && !TARGET_V32"
+  "#"
+  "&& reload_completed"
+  [(match_dup 2)]
+  "operands[2] = cris_split_movdx (operands);")
+
+;; Overlapping (but non-identical) source memory address and destination
+;; register would be a compiler bug, so we don't have to specify that.
+(define_insn "*movdi_v32"
+  [(set
+    (match_operand:DI 0 "nonimmediate_operand" "=r,rx,&r,>, m,r,x,m")
+    (match_operand:DI 1 "general_operand"     "rxi,r>,m, rx,r,m,m,x"))]
+  "TARGET_V32"
+{
+  switch (which_alternative)
+    {
+      /* FIXME: 1) Use autoincrement where possible.  2) Have peephole2,
+	 particularly for cases where the address register is dead.  */
+    case 5:
+      if (REGNO (operands[0]) == REGNO (XEXP (operands[1], 0)))
+	return "addq 4,%L1\;move.d %1,%H0\;subq 4,%L1\;move.d %1,%M0";
+      gcc_assert (REGNO (operands[0]) + 1 == REGNO (XEXP (operands[1], 0)));
+      return "move.d [%L1+],%M0\;move.d [%L1],%H0";
+    case 2:
+      /* We could do away with the addq if we knew the address-register
+	 isn't ACR.  If we knew the address-register is dead, we could do
+	 away with the subq too.  */
+      return "move.d [%L1],%M0\;addq 4,%L1\;move.d [%L1],%H0\;subq 4,%L1";
+    case 4:
+      return "move.d %M1,[%L0]\;addq 4,%L0\;move.d %H1,[%L0]\;subq 4,%L0";
+    case 6:
+      return "move [%L1],%M0\;addq 4,%L1\;move [%L1],%H0\;subq 4,%L1";
+    case 7:
+      return "move %M1,[%L0]\;addq 4,%L0\;move %H1,[%L0]\;subq 4,%L0";
+
+    default:
+      return "#";
+    }
+}
+  ;; The non-split cases clobber cc0 because of their adds and subs.
+  ;; Beware that NOTICE_UPDATE_CC is called before the forced split happens.
+  [(set_attr "cc" "*,*,clobber,*,clobber,clobber,*,*")])
+
+;; Much like "*movdi_insn_non_v32".  Overlapping registers and constants
+;; is handled so much better in cris_split_movdx.
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  "TARGET_V32
+   && reload_completed
+   && (!MEM_P (operands[0]) || !REG_P (XEXP (operands[0], 0)))
+   && (!MEM_P (operands[1]) || !REG_P (XEXP (operands[1], 0)))"
+  [(match_dup 2)]
+  "operands[2] = cris_split_movdx (operands);")
+
+;; Side-effect patterns for move.S1 [rx=ry+rx.S2],rw
+;; and move.S1 [rx=ry+i],rz
+;;  Then movs.S1 and movu.S1 for both modes.
+;;
+;; move.S1 [rx=ry+rz.S],rw avoiding when rx is ry, or rw is rx
+;; FIXME: These could have anonymous mode for operand 0.
+;; FIXME: Special registers' alternatives too.
+
+(define_insn "*mov_side<mode>_biap"
+  [(set (match_operand:BW 0 "register_operand" "=r,r")
+	(mem:BW (plus:SI
+		 (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+			  (match_operand:SI 2 "const_int_operand" "n,n"))
+		 (match_operand:SI 3 "register_operand" "r,r"))))
+   (set (match_operand:SI 4 "register_operand" "=*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   move<m> [%4=%3+%1%T2],%0")
+
+(define_insn "*mov_sidesisf_biap"
+  [(set (match_operand 0 "register_operand" "=r,r,x,x")
+	(mem (plus:SI
+	      (mult:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+		       (match_operand:SI 2 "const_int_operand" "n,n,n,n"))
+	      (match_operand:SI 3 "register_operand" "r,r,r,r"))))
+   (set (match_operand:SI 4 "register_operand" "=*3,r,*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   move.%s0 [%4=%3+%1%T2],%0
+   #
+   move [%4=%3+%1%T2],%0")
+
+;; move.S1 [rx=ry+i],rz
+;; avoiding move.S1 [ry=ry+i],rz
+;; and      move.S1 [rz=ry+i],rz
+;; Note that "i" is allowed to be a register.
+
+(define_insn "*mov_side<mode>"
+  [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r")
+	(mem:BW
+	 (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R")
+		  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r"))))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "move<m> [%3=%2%S1],%0";
+  return "move<m> [%3=%1%S2],%0";
+})
+
+(define_insn "*mov_sidesisf"
+  [(set (match_operand 0 "register_operand" "=r,r,r,x,x,x,r,r,x,x")
+	(mem
+	 (plus:SI
+	  (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,r,r,r,R,R,R,R")
+	  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r>Rn,r,>Rn,r,r,r,r"))))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*1,r,r,*2,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0
+       || which_alternative == 3
+       || which_alternative == 6
+       || which_alternative == 8)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')))
+    return "#";
+  if (which_alternative < 3)
+    return "move.%s0 [%3=%1%S2],%0";
+  if (which_alternative == 7)
+    return "move.%s0 [%3=%2%S1],%0";
+  if (which_alternative == 9)
+    return "move [%3=%2%S1],%0";
+  return "move [%3=%1%S2],%0";
+})
+
+;; Other way around; move to memory.
+
+;; Note that the condition (which for side-effect patterns is usually a
+;; call to cris_side_effect_mode_ok), isn't consulted for register
+;; allocation preferences -- constraints is the method for that.  The
+;; drawback is that we can't exclude register allocation to cause
+;; "move.s rw,[rx=ry+rz.S]" when rw==rx without also excluding rx==ry or
+;; rx==rz if we use an earlyclobber modifier for the constraint for rx.
+;; Instead of that, we recognize and split the cases where dangerous
+;; register combinations are spotted: where a register is set in the
+;; side-effect, and used in the main insn.  We don't handle the case where
+;; the set in the main insn overlaps the set in the side-effect; that case
+;; must be handled in gcc.  We handle just the case where the set in the
+;; side-effect overlaps the input operand of the main insn (i.e. just
+;; moves to memory).
+
+;;
+;; move.s rz,[ry=rx+rw.S]
+
+(define_insn "*mov_side<mode>_biap_mem"
+  [(set (mem:BW (plus:SI
+		 (mult:SI (match_operand:SI 0 "register_operand" "r,r,r")
+			  (match_operand:SI 1 "const_int_operand" "n,n,n"))
+		 (match_operand:SI 2 "register_operand" "r,r,r")))
+	(match_operand:BW 3 "register_operand" "r,r,r"))
+   (set (match_operand:SI 4 "register_operand" "=*2,!3,r")
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)"
+  "@
+   #
+   #
+   move<m> %3,[%4=%2+%0%T1]")
+
+(define_insn "*mov_sidesisf_biap_mem"
+  [(set (mem (plus:SI
+	      (mult:SI (match_operand:SI 0 "register_operand" "r,r,r,r,r,r")
+		       (match_operand:SI 1 "const_int_operand" "n,n,n,n,n,n"))
+	      (match_operand:SI 2 "register_operand" "r,r,r,r,r,r")))
+	(match_operand 3 "register_operand" "r,r,r,x,x,x"))
+   (set (match_operand:SI 4 "register_operand" "=*2,!3,r,*2,!3,r")
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))]
+  "GET_MODE_SIZE (GET_MODE (operands[3])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)"
+  "@
+   #
+   #
+   move.%s3 %3,[%4=%2+%0%T1]
+   #
+   #
+   move %3,[%4=%2+%0%T1]")
+
+;; Split for the case above where we're out of luck with register
+;; allocation (again, the condition isn't checked for that), and we end up
+;; with the set in the side-effect getting the same register as the input
+;; register.
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI
+	     (mult:SI (match_operand:SI 0 "register_operand" "")
+		      (match_operand:SI 1 "const_int_operand" ""))
+	     (match_operand:SI 2 "register_operand" ""))])
+	  (match_operand 3 "register_operand" ""))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (plus:SI (mult:SI (match_dup 0)
+			    (match_dup 1))
+		   (match_dup 2)))])]
+  "reload_completed && reg_overlap_mentioned_p (operands[4], operands[3])"
+  [(set (match_dup 5) (match_dup 3))
+   (set (match_dup 4) (match_dup 2))
+   (set (match_dup 4)
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 4)))]
+  "operands[5]
+     = replace_equiv_address (operands[6],
+			      gen_rtx_PLUS (SImode,
+					    gen_rtx_MULT (SImode,
+							  operands[0],
+							  operands[1]),
+					    operands[2]));")
+
+;; move.s rx,[ry=rz+i]
+;; FIXME: These could have anonymous mode for operand 2.
+
+;; QImode
+
+(define_insn "*mov_side<mode>_mem"
+  [(set (mem:BW
+	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r,R,R,R")
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn,r,r,r")))
+	(match_operand:BW 2 "register_operand" "r,r,r,r,r,r,r"))
+   (set (match_operand:SI 3 "register_operand" "=*0,!*2,r,r,*1,!*2,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)"
+{
+  if ((which_alternative == 0 || which_alternative == 4)
+      && (!CONST_INT_P (operands[1])
+	  || INTVAL (operands[1]) > 127
+	  || INTVAL (operands[1]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'J')))
+    return "#";
+  if (which_alternative == 1 || which_alternative == 5)
+    return "#";
+  if (which_alternative == 6)
+    return "move.%s2 %2,[%3=%1%S0]";
+  return "move<m> %2,[%3=%0%S1]";
+})
+
+;; SImode
+
+(define_insn "*mov_sidesisf_mem"
+  [(set (mem
+	 (plus:SI
+	  (match_operand:SI
+	   0 "cris_bdap_operand"
+	   			"%r,  r,   r,r,  r,   r,r,  R,R,  R,R, R")
+	  (match_operand:SI
+	   1 "cris_bdap_operand"
+	   			"r>Rn,r>Rn,r,>Rn,r>Rn,r,>Rn,r,r,  r,r, r")))
+	(match_operand 2 "register_operand"
+		       		"r,   r,   r,r,  x,   x,x,  r,r,  r,x, x"))
+   (set (match_operand:SI 3 "register_operand"
+			  	"=*0,!2,   r,r,  *0,  r,r, *1,!*2,r,*1,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "GET_MODE_SIZE (GET_MODE (operands[2])) == UNITS_PER_WORD
+   && cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)"
+{
+  if ((which_alternative == 0 || which_alternative == 4)
+      && (!CONST_INT_P (operands[1])
+	  || INTVAL (operands[1]) > 127
+	  || INTVAL (operands[1]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'J')))
+    return "#";
+  if (which_alternative == 1
+      || which_alternative == 7
+      || which_alternative == 8
+      || which_alternative == 10)
+    return "#";
+  if (which_alternative < 4)
+    return "move.%s2 %2,[%3=%0%S1]";
+  if (which_alternative == 9)
+    return "move.%s2 %2,[%3=%1%S0]";
+  if (which_alternative == 11)
+    return "move %2,[%3=%1%S0]";
+  return "move %2,[%3=%0%S1]";
+})
+
+;; Like the biap case, a split where the set in the side-effect gets the
+;; same register as the input register to the main insn, since the
+;; condition isn't checked at register allocation.
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   4 "cris_mem_op"
+	   [(plus:SI
+	     (match_operand:SI 0 "cris_bdap_operand" "")
+	     (match_operand:SI 1 "cris_bdap_operand" ""))])
+	  (match_operand 2 "register_operand" ""))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (match_dup 0) (match_dup 1)))])]
+  "reload_completed && reg_overlap_mentioned_p (operands[3], operands[2])"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 3) (match_dup 0))
+   (set (match_dup 3) (plus:SI (match_dup 3) (match_dup 1)))]
+  "")
+
+;; Clear memory side-effect patterns.  It is hard to get to the mode if
+;; the MEM was anonymous, so there will be one for each mode.
+
+;;  clear.[bwd] [ry=rx+rw.s2]
+
+(define_insn "*clear_side<mode>_biap"
+  [(set (mem:BWD (plus:SI
+		  (mult:SI (match_operand:SI 0 "register_operand" "r,r")
+			   (match_operand:SI 1 "const_int_operand" "n,n"))
+		  (match_operand:SI 2 "register_operand" "r,r")))
+	(const_int 0))
+   (set (match_operand:SI 3 "register_operand" "=*2,r")
+	(plus:SI (mult:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (MULT, operands, 3, 2, 0, 1, -1)"
+  "@
+   #
+   clear<m> [%3=%2+%0%T1]")
+
+;; clear.[bwd] [ry=rz+i]
+
+(define_insn "*clear_side<mode>"
+  [(set (mem:BWD
+	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,R,R")
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))
+	(const_int 0))
+   (set (match_operand:SI 2 "register_operand" "=*0,r,r,*1,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 2, 0, 1, -1, -1)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[1])
+	  || INTVAL (operands[1]) > 127
+	  || INTVAL (operands[1]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "clear<m> [%2=%1%S0]";
+  return "clear<m> [%2=%0%S1]";
+})
+
+;; Normal move patterns from SI on.
+
+(define_expand "movsi"
+  [(set
+    (match_operand:SI 0 "nonimmediate_operand" "")
+    (match_operand:SI 1 "cris_general_operand_or_symbol" ""))]
+  ""
+{
+  /* If the output goes to a MEM, make sure we have zero or a register as
+     input.  */
+  if (MEM_P (operands[0])
+      && ! REG_S_P (operands[1])
+      && operands[1] != const0_rtx
+      && can_create_pseudo_p ())
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* If we're generating PIC and have an incoming symbol, validize it to a
+     general operand or something that will match a special pattern.
+
+     FIXME: Do we *have* to recognize anything that would normally be a
+     valid symbol?  Can we exclude global PIC addresses with an added
+     offset?  */
+    if (flag_pic
+	&& CONSTANT_ADDRESS_P (operands[1])
+	&& !cris_valid_pic_const (operands[1], false))
+      {
+	enum cris_pic_symbol_type t = cris_pic_symbol_type_of (operands[1]);
+
+	gcc_assert (t != cris_no_symbol);
+
+	if (! REG_S_P (operands[0]))
+	  {
+	    /* We must have a register as destination for what we're about to
+	       do, and for the patterns we generate.  */
+	    CRIS_ASSERT (can_create_pseudo_p ());
+	    operands[1] = force_reg (SImode, operands[1]);
+	  }
+	else
+	  {
+	    /* FIXME: add a REG_EQUAL (or is it REG_EQUIV) note to the
+	       destination register for the symbol.  It might not be
+	       worth it.  Measure.  */
+	    crtl->uses_pic_offset_table = 1;
+	    if (t == cris_rel_symbol)
+	      {
+		/* Change a "move.d sym(+offs),rN" into (allocate register rM)
+		   for pre-v32:
+		   "move.d (const (plus (unspec [sym]
+		    CRIS_UNSPEC_GOTREL) offs)),rM" "add.d rPIC,rM,rN"
+		   and for v32:
+		   "move.d (const (plus (unspec [sym]
+		    CRIS_UNSPEC_PCREL) offs)),rN".  */
+		rtx tem, rm, rn = operands[0];
+		rtx sym = GET_CODE (operands[1]) != CONST
+		  ? operands[1] : get_related_value (operands[1]);
+		HOST_WIDE_INT offs = get_integer_term (operands[1]);
+
+		gcc_assert (can_create_pseudo_p ());
+
+		if (TARGET_V32)
+		  {
+		    tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym),
+					  CRIS_UNSPEC_PCREL);
+		    if (offs != 0)
+		      tem = plus_constant (tem, offs);
+		    rm = rn;
+		    emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+		  }
+		else
+		  {
+		    /* We still uses GOT-relative addressing for
+		       pre-v32.	 */
+		    crtl->uses_pic_offset_table = 1;
+		    tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym),
+					  CRIS_UNSPEC_GOTREL);
+		    if (offs != 0)
+		      tem = plus_constant (tem, offs);
+		    rm = gen_reg_rtx (Pmode);
+		    emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+		    if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx,
+				      rn, 0, OPTAB_LIB_WIDEN) != rn)
+		      internal_error ("expand_binop failed in movsi gotrel");
+		  }
+		DONE;
+	      }
+	    else if (t == cris_got_symbol)
+	      {
+		/* Change a "move.d sym,rN" into (allocate register rM, rO)
+		   "move.d (const (unspec [sym] CRIS_UNSPEC_GOTREAD)),rM"
+		   "add.d rPIC,rM,rO", "move.d [rO],rN" with
+		   the memory access marked as read-only.  */
+		rtx tem, mem, rm, ro, rn = operands[0];
+		gcc_assert (can_create_pseudo_p ());
+		tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]),
+				      CRIS_UNSPEC_GOTREAD);
+		rm = gen_reg_rtx (Pmode);
+		emit_move_insn (rm, gen_rtx_CONST (Pmode, tem));
+		ro = gen_reg_rtx (Pmode);
+	        if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx,
+				  ro, 0, OPTAB_LIB_WIDEN) != ro)
+		  internal_error ("expand_binop failed in movsi got");
+		mem = gen_rtx_MEM (Pmode, ro);
+
+		/* This MEM doesn't alias anything.  Whether it
+		   aliases other same symbols is unimportant.  */
+		set_mem_alias_set (mem, new_alias_set ());
+		MEM_NOTRAP_P (mem) = 1;
+
+		/* We can set the GOT memory read of a non-called symbol
+		   to readonly, but not that of a call symbol, as those
+		   are subject to lazy evaluation and usually have the value
+		   changed from the first call to the second (but
+		   constant thereafter).  */
+		MEM_READONLY_P (mem) = 1;
+		emit_move_insn (rn, mem);
+		DONE;
+	      }
+	    else
+	      {
+		/* We get here when we have to change something that would
+		   be recognizable if it wasn't PIC.  A ``sym'' is ok for
+		   PIC symbols both with and without a GOT entry.  And ``sym
+		   + offset'' is ok for local symbols, so the only thing it
+		   could be, is a global symbol with an offset.  Check and
+		   abort if not.  */
+		rtx reg = gen_reg_rtx (Pmode);
+		rtx sym = get_related_value (operands[1]);
+		HOST_WIDE_INT offs = get_integer_term (operands[1]);
+
+		gcc_assert (can_create_pseudo_p ()
+			    && t == cris_got_symbol_needing_fixup
+			    && sym != NULL_RTX && offs != 0);
+
+		emit_move_insn (reg, sym);
+		if (expand_binop (SImode, add_optab, reg,
+				  GEN_INT (offs), operands[0], 0,
+				  OPTAB_LIB_WIDEN) != operands[0])
+		  internal_error ("expand_binop failed in movsi got+offs");
+		DONE;
+	      }
+	  }
+      }
+})
+
+(define_insn "*movsi_got_load"
+  [(set (reg:SI CRIS_GOT_REGNUM) (unspec:SI [(const_int 0)] CRIS_UNSPEC_GOT))]
+  "flag_pic"
+{
+  return TARGET_V32
+    ? "lapc _GLOBAL_OFFSET_TABLE_,%:"
+    : "move.d $pc,%:\;sub.d .:GOTOFF,%:";
+}
+  [(set_attr "cc" "clobber")])
+
+(define_insn "*movsi_internal"
+  [(set
+    (match_operand:SI 0 "nonimmediate_operand"
+		      "=r,r, r,Q>,r,Q>,g,r,r, r,g,rQ>,x,  m,x")
+    (match_operand:SI 1 "cris_general_operand_or_pic_source"
+		       "r,Q>,M,M, I,r, M,n,!S,g,r,x,  rQ>,x,gi"))]
+    ;; Note that we prefer not to use the S alternative (if for some reason
+    ;; it competes with others) above, but g matches S.
+  ""
+{
+  /* Better to have c-switch here; it is worth it to optimize the size of
+     move insns.  The alternative would be to try to find more constraint
+     letters.  FIXME: Check again.  It seems this could shrink a bit.  */
+  switch (which_alternative)
+    {
+    case 9:
+      if (TARGET_V32)
+       {
+	 if (!flag_pic
+	     && (GET_CODE (operands[1]) == SYMBOL_REF
+		 || GET_CODE (operands[1]) == LABEL_REF
+		 || GET_CODE (operands[1]) == CONST))
+	   {
+	     /* FIXME: Express this through (set_attr cc none) instead,
+		since we can't express the ``none'' at this point.  FIXME:
+		Use lapc for everything except const_int and when next cc0
+		user would want the flag setting.  */
+	     CC_STATUS_INIT;
+	     return "lapc %1,%0";
+	   }
+	 if (flag_pic == 1
+	     && GET_CODE (operands[1]) == CONST
+	     && GET_CODE (XEXP (operands[1], 0)) == UNSPEC
+	     && XINT (XEXP (operands[1], 0), 1) == CRIS_UNSPEC_GOTREAD)
+	   return "movu.w %1,%0";
+       }
+       /* FALLTHROUGH */
+    case 0:
+    case 1:
+    case 5:
+    case 10:
+      return "move.d %1,%0";
+
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+      return "move %d1,%0";
+
+    case 2:
+    case 3:
+    case 6:
+      return "clear.d %0";
+
+      /* Constants -32..31 except 0.  */
+    case 4:
+      return "moveq %1,%0";
+
+      /* We can win a little on constants -32768..-33, 32..65535.  */
+    case 7:
+      if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) < 65536)
+	{
+	  if (INTVAL (operands[1]) < 256)
+	    return "movu.b %1,%0";
+	  return "movu.w %1,%0";
+	}
+      else if (INTVAL (operands[1]) >= -32768 && INTVAL (operands[1]) < 32768)
+	{
+	  if (INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) < 128)
+	    return "movs.b %1,%0";
+	  return "movs.w %1,%0";
+	}
+      return "move.d %1,%0";
+
+    case 8:
+      {
+	rtx tem = operands[1];
+	gcc_assert (GET_CODE (tem) == CONST);
+	tem = XEXP (tem, 0);
+	if (GET_CODE (tem) == PLUS
+	    && GET_CODE (XEXP (tem, 0)) == UNSPEC
+	    && (XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL
+		|| XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_PCREL)
+	    && CONST_INT_P (XEXP (tem, 1)))
+	  tem = XEXP (tem, 0);
+	gcc_assert (GET_CODE (tem) == UNSPEC);
+	switch (XINT (tem, 1))
+	  {
+	  case CRIS_UNSPEC_GOTREAD:
+	  case CRIS_UNSPEC_PLTGOTREAD:
+	    /* Using sign-extend mostly to be consistent with the
+	       indexed addressing mode.  */
+	    if (flag_pic == 1)
+	      return "movs.w %1,%0";
+	    return "move.d %1,%0";
+
+	  case CRIS_UNSPEC_GOTREL:
+	  case CRIS_UNSPEC_PLT_GOTREL:
+	    gcc_assert (!TARGET_V32);
+	    return "move.d %1,%0";
+
+	  case CRIS_UNSPEC_PCREL:
+	  case CRIS_UNSPEC_PLT_PCREL:
+	    gcc_assert (TARGET_V32);
+	    return "lapc %1,%0";
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+    default:
+      return "BOGUS: %1 to %0";
+    }
+}
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,no,yes,yes,no,no")
+   (set_attr "cc" "*,*,*,*,*,*,*,*,*,*,*,none,none,none,none")])
+
+;; Extend operations with side-effect from mem to register, using
+;; MOVS/MOVU.  These are from mem to register only.
+;;
+;; [rx=ry+rz.S]
+;;
+;; QImode to HImode
+;;
+;; FIXME: Can we omit extend to HImode, since GCC should truncate for
+;; HImode by itself?  Perhaps use only anonymous modes?
+
+(define_insn "*ext_sideqihi_biap"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI
+	 5 "cris_extend_operator"
+	 [(mem:QI (plus:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "n,n"))
+		   (match_operand:SI 3 "register_operand" "r,r")))]))
+   (set (match_operand:SI 4 "register_operand" "=*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   mov%e5.%m5 [%4=%3+%1%T2],%0")
+
+(define_insn "*ext_side<mode>si_biap"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 5 "cris_extend_operator"
+	 [(mem:BW (plus:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "n,n"))
+		   (match_operand:SI 3 "register_operand" "r,r")))]))
+   (set (match_operand:SI 4 "register_operand" "=*3,r")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)"
+  "@
+   #
+   mov%e5<m> [%4=%3+%1%T2],%0")
+
+;; Same but [rx=ry+i]
+
+;; QImode to HImode
+
+(define_insn "*ext_sideqihi"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:HI
+	 4 "cris_extend_operator"
+	 [(mem:QI (plus:SI
+		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R")
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "mov%e4.%m4 [%3=%2%S1],%0";
+  return "mov%e4.%m4 [%3=%1%S2],%0";
+})
+
+(define_insn "*ext_side<mode>si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:SI
+	 4 "cris_extend_operator"
+	 [(mem:BW (plus:SI
+		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R")
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]))
+   (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[2])
+	  || INTVAL (operands[2]) > 127
+	  || INTVAL (operands[2]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "mov%e4<m> [%3=%2%S1],%0";
+  return "mov%e4<m> [%3=%1%S2],%0";
+})
+
+;; FIXME: See movsi.
+
+(define_insn "movhi"
+  [(set
+    (match_operand:HI 0 "nonimmediate_operand" "=r,r, r,Q>,r,Q>,r,r,r,g,g,r,r,x")
+    (match_operand:HI 1 "general_operand"	"r,Q>,M,M, I,r, L,O,n,M,r,g,x,r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 5:
+    case 10:
+    case 11:
+      return "move.w %1,%0";
+    case 12:
+    case 13:
+      return "move %1,%0";
+    case 2:
+    case 3:
+    case 9:
+      return "clear.w %0";
+    case 4:
+      return "moveq %1,%0";
+    case 6:
+    case 8:
+      if (INTVAL (operands[1]) < 256 && INTVAL (operands[1]) >= -128)
+	{
+	  if (INTVAL (operands[1]) > 0)
+	    return "movu.b %1,%0";
+	  return "movs.b %1,%0";
+	}
+      return "move.w %1,%0";
+    case 7:
+      return "movEq %b1,%0";
+    default:
+      return "BOGUS: %1 to %0";
+  }
+}
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,yes,no,no,no,no,yes,yes")
+   (set_attr "cc" "*,*,none,none,*,none,*,clobber,*,none,none,*,none,none")])
+
+(define_insn "movstricthi"
+  [(set
+    (strict_low_part
+     (match_operand:HI 0 "nonimmediate_operand" "+r,r, r,Q>,Q>,g,r,g"))
+    (match_operand:HI 1 "general_operand"	 "r,Q>,M,M, r, M,g,r"))]
+  ""
+  "@
+   move.w %1,%0
+   move.w %1,%0
+   clear.w %0
+   clear.w %0
+   move.w %1,%0
+   clear.w %0
+   move.w %1,%0
+   move.w %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")])
+
+(define_expand "reload_in<mode>"
+  [(set (match_operand:BW 2 "register_operand" "=r")
+	(match_operand:BW 1 "memory_operand" "m"))
+   (set (match_operand:BW 0 "register_operand" "=x")
+	(match_dup 2))]
+  ""
+  "")
+
+(define_expand "reload_out<mode>"
+  [(set (match_operand:BW 2 "register_operand" "=&r")
+	(match_operand:BW 1 "register_operand" "x"))
+   (set (match_operand:BW 0 "memory_operand" "=m")
+	(match_dup 2))]
+  ""
+  "")
+
+(define_insn "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,r,g,g,r,r,r,x")
+	(match_operand:QI 1 "general_operand"	    "r,r, Q>,M,M, I,M,r,O,g,x,r"))]
+  ""
+  "@
+   move.b %1,%0
+   move.b %1,%0
+   move.b %1,%0
+   clear.b %0
+   clear.b %0
+   moveq %1,%0
+   clear.b %0
+   move.b %1,%0
+   moveq %b1,%0
+   move.b %1,%0
+   move %1,%0
+   move %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,yes,no,yes,yes")
+   (set_attr "cc" "*,*,*,*,*,*,*,*,clobber,*,none,none")])
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part
+	 (match_operand:QI 0 "nonimmediate_operand" "+r,Q>,r, r,Q>,g,g,r"))
+	(match_operand:QI 1 "general_operand"	     "r,r, Q>,M,M, M,r,g"))]
+  ""
+  "@
+   move.b %1,%0
+   move.b %1,%0
+   move.b %1,%0
+   clear.b %0
+   clear.b %0
+   clear.b %0
+   move.b %1,%0
+   move.b %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")])
+
+;; The valid "quick" bit-patterns are, except for 0.0, denormalized
+;; values REALLY close to 0, and some NaN:s (I think; their exponent is
+;; all ones); the worthwhile one is "0.0".
+;; It will use clear, so we know ALL types of immediate 0 never change cc.
+
+(define_insn "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,g,g,r,r,x,Q>,m,x, x")
+	(match_operand:SF 1 "general_operand"       "r,r, Q>,G,G, G,r,g,x,r,x, x,Q>,g"))]
+  ""
+  "@
+   move.d %1,%0
+   move.d %1,%0
+   move.d %1,%0
+   clear.d %0
+   clear.d %0
+   clear.d %0
+   move.d %1,%0
+   move.d %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0
+   move %1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no,yes,yes,yes,no,yes,no")])
+
+;; Movem patterns.  Primarily for use in function prologue and epilogue.
+;; The V32 variants have an ordering matching the expectations of the
+;; standard names "load_multiple" and "store_multiple"; pre-v32 movem
+;; store R0 in the highest memory location.
+
+(define_expand "load_multiple"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "memory_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_V32"
+{
+  rtx indreg;
+
+  /* Apparently the predicate isn't checked, so we need to do so
+     manually.  Once happened for libstdc++-v3 locale_facets.tcc.  */
+  if (!MEM_P (operands[1]))
+    FAIL;
+
+  indreg = XEXP (operands[1], 0);
+
+  if (GET_CODE (indreg) == POST_INC)
+    indreg = XEXP (indreg, 0);
+  if (!REG_P (indreg)
+      || GET_CODE (operands[2]) != CONST_INT
+      || !REG_P (operands[0])
+      || REGNO (operands[0]) != 0
+      || INTVAL (operands[2]) > CRIS_SP_REGNUM
+      || (int) REGNO (indreg) < INTVAL (operands[2]))
+    FAIL;
+  gcc_unreachable ();
+  emit_insn (cris_gen_movem_load (operands[1], operands[2], 0));
+  DONE;
+})
+
+(define_expand "store_multiple"
+  [(match_operand:SI 0 "memory_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_V32"
+{
+  rtx indreg;
+
+  /* See load_multiple.  */
+  if (!MEM_P (operands[0]))
+    FAIL;
+
+  indreg = XEXP (operands[0], 0);
+
+  if (GET_CODE (indreg) == POST_INC)
+    indreg = XEXP (indreg, 0);
+  if (!REG_P (indreg)
+      || GET_CODE (operands[2]) != CONST_INT
+      || !REG_P (operands[1])
+      || REGNO (operands[1]) != 0
+      || INTVAL (operands[2]) > CRIS_SP_REGNUM
+      || (int) REGNO (indreg) < INTVAL (operands[2]))
+    FAIL;
+  gcc_unreachable ();
+  cris_emit_movem_store (operands[0], operands[2], 0, false);
+  DONE;
+})
+
+(define_insn "*cris_load_multiple"
+  [(match_parallel 0 "cris_load_multiple_op"
+		   [(set (match_operand:SI 1 "register_operand" "=r,r")
+			 (match_operand:SI 2 "memory_operand" "Q,m"))])]
+  ""
+  "movem %O0,%o0"
+  [(set_attr "cc" "none")
+   (set_attr "slottable" "yes,no")
+   ;; Not true, but setting the length to 0 causes return sequences (ret
+   ;; movem) to have the cost they had when (return) included the movem
+   ;; and reduces the performance penalty taken for needing to emit an
+   ;; epilogue (in turn copied by bb-reorder) instead of return patterns.
+   ;; FIXME: temporary change until all insn lengths are correctly
+   ;; described.  FIXME: have better target control over bb-reorder.
+   (set_attr "length" "0")])
+
+(define_insn "*cris_store_multiple"
+  [(match_parallel 0 "cris_store_multiple_op"
+		   [(set (match_operand:SI 2 "memory_operand" "=Q,m")
+			 (match_operand:SI 1 "register_operand" "r,r"))])]
+  ""
+  "movem %o0,%O0"
+  [(set_attr "cc" "none")
+   (set_attr "slottable" "yes,no")])
+
+
+;; Sign- and zero-extend insns with standard names.
+;;  Those for integer source operand are ordered with the widest source
+;; type first.
+
+;; Sign-extend.
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:SI 1 "general_operand" "g")))]
+  ""
+  "move.d %1,%M0\;smi %H0\;neg.d %H0,%H0")
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:BW 1 "general_operand" "g")))]
+  ""
+  "movs<m> %1,%M0\;smi %H0\;neg.d %H0,%H0")
+
+(define_insn "extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:BW 1 "general_operand" "r,Q>,g")))]
+  ""
+  "movs<m> %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+;; To do a byte->word extension, extend to dword, exept that the top half
+;; of the register will be clobbered.  FIXME: Perhaps this is not needed.
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_operand" "r,Q>,g")))]
+  ""
+  "movs.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+
+;; Zero-extend.  The DImode ones are synthesized by gcc, so we don't
+;; specify them here.
+
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI
+	 (match_operand:BW 1 "nonimmediate_operand" "r,Q>,m")))]
+  ""
+  "movu<m> %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+;; Same comment as sign-extend QImode to HImode above applies.
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(zero_extend:HI
+	 (match_operand:QI 1 "nonimmediate_operand" "r,Q>,m")))]
+  ""
+  "movu.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+;; All kinds of arithmetic and logical instructions.
+;;
+;; First, anonymous patterns to match addressing modes with
+;; side-effects.
+;;
+;; op.S [rx=ry+I],rz; (add, sub, or, and, bound).
+;;
+;; [rx=ry+rz.S]
+
+(define_insn "*op_side<mode>_biap"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r")
+	(match_operator:BWD
+	 6 "cris_orthogonal_operator"
+	 [(match_operand:BWD 1 "register_operand" "0,0")
+	  (mem:BWD (plus:SI
+		    (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			     (match_operand:SI 3 "const_int_operand" "n,n"))
+		    (match_operand:SI 4 "register_operand" "r,r")))]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6<m> [%5=%4+%2%T3],%0")
+
+;; [rx=ry+i] ([%4=%2+%3])
+
+(define_insn "*op_side<mode>"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:BWD
+	 5 "cris_orthogonal_operator"
+	 [(match_operand:BWD 1 "register_operand" "0,0,0,0,0")
+	  (mem:BWD (plus:SI
+		   (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		   (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5.%s0 [%4=%3%S2],%0";
+  return "%x5<m> [%4=%2%S3],%0";
+})
+
+;; To match all cases for commutative operations we may have to have the
+;; following pattern for add, or & and.  I do not know really, but it does
+;; not break anything.
+;;
+;; FIXME: This really ought to be checked.
+;;
+;; op.S [rx=ry+I],rz;
+;;
+;; [rx=ry+rz.S]
+
+(define_insn "*op_swap_side<mode>_biap"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r")
+	(match_operator:BWD
+	 6 "cris_commutative_orth_op"
+	 [(mem:BWD (plus:SI
+		   (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			    (match_operand:SI 3 "const_int_operand" "n,n"))
+		   (match_operand:SI 4 "register_operand" "r,r")))
+	  (match_operand:BWD 1 "register_operand" "0,0")]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6<m> [%5=%4+%2%T3],%0")
+
+;; [rx=ry+i] ([%4=%2+%3])
+;; FIXME: These could have anonymous mode for operand 0.
+
+;; QImode
+
+(define_insn "*op_swap_side<mode>"
+  [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:BWD
+	 5 "cris_commutative_orth_op"
+	 [(mem:BWD
+	   (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))
+	  (match_operand:BWD 1 "register_operand" "0,0,0,0,0")]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5<m> [%4=%3%S2],%0";
+  return "%x5<m> [%4=%2%S3],%0";
+})
+
+;; Add operations, standard names.
+
+;; Note that for the 'P' constraint, the high part can be -1 or 0.  We
+;; output the insn through the 'A' output modifier as "adds.w" and "addq",
+;; respectively.
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "general_operand")))]
+  ""
+{
+  if (MEM_P (operands[2]) && TARGET_V32)
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+(define_insn "*adddi3_non_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,r")
+		 (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))]
+  "!TARGET_V32"
+  "@
+   addq %2,%M0\;ax\;addq 0,%H0
+   subq %n2,%M0\;ax\;subq 0,%H0
+   add%e2.%z2 %2,%M0\;ax\;%A2 %H2,%H0
+   add.d %M2,%M0\;ax\;add.d %H2,%H0
+   add.d %M2,%M1,%M0\;ax\;add.d %H2,%H1,%H0")
+
+; It seems no use allowing a memory operand for this one, because we'd
+; need a scratch register for incrementing the address.
+(define_insn "*adddi3_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
+       (plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,0")
+                (match_operand:DI 2 "nonmemory_operand" "J,N,P,r,n")))]
+  "TARGET_V32"
+  "@
+   addq %2,%M0\;addc 0,%H0
+   subq %n2,%M0\;ax\;subq 0,%H0
+   add%e2.%z2 %2,%M0\;addc %H2,%H0
+   add.d %M2,%M0\;addc %H2,%H0
+   add.d %M2,%M0\;addc %H2,%H0")
+
+(define_expand "add<mode>3"
+  [(set (match_operand:BWD 0 "register_operand")
+	(plus:BWD
+	 (match_operand:BWD 1 "register_operand")
+	 (match_operand:BWD 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*addsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand"  "=r,r, r,r,r,r, r,r,  r")
+	(plus:SI
+	 (match_operand:SI 1 "register_operand" "%0,0, 0,0,0,0, 0,r,  r")
+	 (match_operand:SI 2 "general_operand"   "r,Q>,J,N,n,!S,g,!To,0")))]
+
+;; The last constraint is due to that after reload, the '%' is not
+;; honored, and canonicalization doesn't care about keeping the same
+;; register as in destination.  This will happen after insn splitting.
+;; gcc <= 2.7.2.  FIXME: Check for gcc-2.9x
+
+ "!TARGET_V32"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "add.d %2,%0";
+    case 2:
+      return "addq %2,%0";
+    case 3:
+      return "subq %n2,%0";
+    case 4:
+      /* 'Known value', but not in -63..63.
+	 Check if addu/subu may be used.  */
+      if (INTVAL (operands[2]) > 0)
+	{
+	  if (INTVAL (operands[2]) < 256)
+	    return "addu.b %2,%0";
+	  if (INTVAL (operands[2]) < 65536)
+	    return "addu.w %2,%0";
+	}
+      else
+	{
+	  if (INTVAL (operands[2]) >= -255)
+	    return "subu.b %n2,%0";
+	  if (INTVAL (operands[2]) >= -65535)
+	    return "subu.w %n2,%0";
+	}
+      return "add.d %2,%0";
+    case 5:
+      {
+	rtx tem = operands[2];
+	gcc_assert (GET_CODE (tem) == CONST);
+	tem = XEXP (tem, 0);
+	if (GET_CODE (tem) == PLUS
+	    && GET_CODE (XEXP (tem, 0)) == UNSPEC
+	    /* We don't allow CRIS_UNSPEC_PCREL here; we can't have a
+	       pc-relative operand in an add insn.  */
+	    && XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL
+	    && CONST_INT_P (XEXP (tem, 1)))
+	  tem = XEXP (tem, 0);
+	gcc_assert (GET_CODE (tem) == UNSPEC);
+	switch (XINT (tem, 1))
+	  {
+	  case CRIS_UNSPEC_GOTREAD:
+	  case CRIS_UNSPEC_PLTGOTREAD:
+	    /* Using sign-extend mostly to be consistent with the
+	       indexed addressing mode.  */
+	    if (flag_pic == 1)
+	      return "adds.w %2,%0";
+	    return "add.d %2,%0";
+
+	  case CRIS_UNSPEC_PLT_GOTREL:
+	  case CRIS_UNSPEC_GOTREL:
+	    return "add.d %2,%0";
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+    case 6:
+      return "add%u2 %2,%0";
+    case 7:
+      return "add.d %2,%1,%0";
+    case 8:
+      return "add.d %1,%0";
+    default:
+      return "BOGUS addsi %2+%1 to %0";
+    }
+}
+ [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no,yes")])
+
+; FIXME: Check what's best: having the three-operand ACR alternative
+; before or after the corresponding-operand2 alternative.  Check for
+; *all* insns.  FIXME: constant constraint letter for -128..127.
+(define_insn "*addsi3_v32"
+  [(set (match_operand:SI 0 "register_operand"  "=r,!a,r,!a, r,r,!a,r,!a,r,r,r,!a")
+	(plus:SI
+	 (match_operand:SI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r, 0,0,0,r")
+	 (match_operand:SI 2 "general_operand"  "r, r, Q>,Q>,J,N,NJ,L,L, P,n,g,g")))]
+  "TARGET_V32"
+  "@
+   add.d %2,%0
+   addi %2.b,%1,%0
+   add.d %2,%0
+   addo.d %2,%1,%0
+   addq %2,%0
+   subq %n2,%0
+   addoq %2,%1,%0
+   adds.w %2,%0
+   addo %2,%1,%0
+   addu.w %2,%0
+   add.d %2,%0
+   add%u2 %2,%0
+   addo.%Z2 %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no,no,no,no,no")
+   (set_attr "cc" "*,none,*,none,*,*,none,*,none,*,*,*,none")])
+
+(define_insn "*addhi3_non_v32"
+  [(set (match_operand:HI 0 "register_operand"		"=r,r, r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0, 0,0,0,r")
+		 (match_operand:HI 2 "general_operand"   "r,Q>,J,N,g,!To")))]
+  "!TARGET_V32"
+  "@
+   add.w %2,%0
+   add.w %2,%0
+   addq %2,%0
+   subq %n2,%0
+   add.w %2,%0
+   add.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")])
+
+(define_insn "*addhi3_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r, !a,r,!a, r,r,!a,r,!a")
+	(plus:HI
+	 (match_operand:HI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r")
+	 (match_operand:HI 2 "general_operand"  "r, r, Q>,Q>,J,N,NJ,g,g")))]
+  "TARGET_V32"
+  "@
+   add.w %2,%0
+   addi %2.b,%1,%0
+   add.w %2,%0
+   addo.w %2,%1,%0
+   addq %2,%0
+   subq %n2,%0
+   addoq %2,%1,%0
+   add.w %2,%0
+   addo.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no")
+   (set_attr "cc" "*,none,*,none,clobber,clobber,none,*,none")])
+
+(define_insn "*addqi3_non_v32"
+  [(set (match_operand:QI 0 "register_operand"		"=r,r, r,r,r,r,r")
+	(plus:QI (match_operand:QI 1 "register_operand" "%0,0, 0,0,0,0,r")
+		 (match_operand:QI 2 "general_operand"	 "r,Q>,J,N,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   add.b %2,%0
+   add.b %2,%0
+   addq %2,%0
+   subq %n2,%0
+   subQ -%b2,%0
+   add.b %2,%0
+   add.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,clobber,normal,normal")])
+
+(define_insn "*addqi3_v32"
+  [(set (match_operand:QI 0 "register_operand"  "=r,!a,r,!a, r,r,!a,r,r,!a")
+	(plus:QI
+	 (match_operand:QI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,0,r")
+	 (match_operand:QI 2 "general_operand"   "r,r, Q>,Q>,J,N,NJ,O,g,g")))]
+  "TARGET_V32"
+  "@
+   add.b %2,%0
+   addi %2.b,%1,%0
+   add.b %2,%0
+   addo.b %2,%1,%0
+   addq %2,%0
+   subq %n2,%0
+   addoq %2,%1,%0
+   subQ -%b2,%0
+   add.b %2,%0
+   addo.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,yes,no,no")
+   (set_attr "cc" "*,none,*,none,clobber,clobber,none,clobber,*,none")])
+
+;; Subtract.
+;;
+;; Note that because of insn canonicalization these will *seldom* but
+;; rarely be used with a known constant as an operand.
+
+;; Note that for the 'P' constraint, the high part can be -1 or 0.  We
+;; output the insn through the 'D' output modifier as "subs.w" and "subq",
+;; respectively.
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(minus:DI (match_operand:DI 1 "register_operand")
+		  (match_operand:DI 2 "general_operand")))]
+  ""
+{
+  if (TARGET_V32 && MEM_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+(define_insn "*subdi3_non_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0,r")
+		  (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))]
+  "!TARGET_V32"
+  "@
+   subq %2,%M0\;ax\;subq 0,%H0
+   addq %n2,%M0\;ax\;addq 0,%H0
+   sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0
+   sub.d %M2,%M0\;ax\;sub.d %H2,%H0
+   sub.d %M2,%M1,%M0\;ax\;sub.d %H2,%H1,%H0")
+
+(define_insn "*subdi3_v32"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0")
+		  (match_operand:DI 2 "nonmemory_operand" "J,N,P,r")))]
+  "TARGET_V32"
+  "@
+   subq %2,%M0\;ax\;subq 0,%H0
+   addq %n2,%M0\;ax\;addq 0,%H0
+   sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0
+   sub.d %M2,%M0\;ax\;sub.d %H2,%H0")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:BWD 0 "register_operand")
+	(minus:BWD
+	 (match_operand:BWD 1 "register_operand")
+	 (match_operand:BWD 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*subsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r, r,r,r,r,r,r")
+	(minus:SI
+	 (match_operand:SI 1 "register_operand" "0,0, 0,0,0,0,0,r")
+	 (match_operand:SI 2 "general_operand"	"r,Q>,J,N,P,n,g,!To")))]
+  "!TARGET_V32"
+
+;; This does not do the optimal: "addu.w 65535,r0" when %2 is negative.
+;; But then again, %2 should not be negative.
+
+  "@
+   sub.d %2,%0
+   sub.d %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub%e2.%z2 %2,%0
+   sub.d %2,%0
+   sub.d %2,%0
+   sub.d %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no")])
+
+(define_insn "*subsi3_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r")
+       (minus:SI
+        (match_operand:SI 1 "register_operand" "0,0,0,0,0,0,0")
+        (match_operand:SI 2 "general_operand" "r,Q>,J,N,P,n,g")))]
+  "TARGET_V32"
+  "@
+   sub.d %2,%0
+   sub.d %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub%e2.%z2 %2,%0
+   sub.d %2,%0
+   sub.d %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no,no")])
+
+(define_insn "*sub<mode>3_nonv32"
+  [(set (match_operand:BW 0 "register_operand"		"=r,r, r,r,r,r")
+	(minus:BW (match_operand:BW 1 "register_operand" "0,0, 0,0,0,r")
+		  (match_operand:BW 2 "general_operand"  "r,Q>,J,N,g,!To")))]
+  "!TARGET_V32"
+  "@
+   sub<m> %2,%0
+   sub<m> %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub<m> %2,%0
+   sub<m> %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")])
+
+(define_insn "*sub<mode>3_v32"
+  [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r")
+	(minus:BW (match_operand:BW 1 "register_operand" "0,0,0,0,0")
+		  (match_operand:BW 2 "general_operand" "r,Q>,J,N,g")))]
+  "TARGET_V32"
+  "@
+   sub<m> %2,%0
+   sub<m> %2,%0
+   subq %2,%0
+   addq %n2,%0
+   sub<m> %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no")
+   (set_attr "cc" "normal,normal,clobber,clobber,normal")])
+
+;; CRIS has some add/sub-with-sign/zero-extend instructions.
+;;  Although these perform sign/zero-extension to SImode, they are
+;; equally applicable for the HImode case.
+;; FIXME: Check; GCC should handle the widening.
+;;  Note that these must be located after the normal add/sub patterns,
+;; so not to get constants into any less specific operands.
+;;
+;; Extend with add/sub and side-effect.
+;;
+;; ADDS/SUBS/ADDU/SUBU and BOUND, which needs a check for zero_extend
+;;
+;; adds/subs/addu/subu bound [rx=ry+rz.S]
+
+;; QImode to HImode
+;; FIXME: GCC should widen.
+
+(define_insn "*extopqihi_side_biap"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI
+	 6 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0")
+	  (match_operator:HI
+	   7 "cris_extend_operator"
+	   [(mem:QI (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			      (match_operand:SI 3 "const_int_operand" "n,n"))
+		     (match_operand:SI 4 "register_operand" "r,r")))])]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6%e7.%m7 [%5=%4+%2%T3],%0")
+
+(define_insn "*extop<mode>si_side_biap"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 6 "cris_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operator:SI
+	   7 "cris_extend_operator"
+	   [(mem:BW (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			      (match_operand:SI 3 "const_int_operand" "n,n"))
+		     (match_operand:SI 4 "register_operand" "r,r")))])]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x6%e7<m> [%5=%4+%2%T3],%0")
+
+
+;; [rx=ry+i]
+
+;; QImode to HImode
+
+(define_insn "*extopqihi_side"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:HI
+	 5 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0,0,0,0")
+	  (match_operator:HI
+	   6 "cris_extend_operator"
+	   [(mem:QI
+	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")
+		      ))])]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5%E6.%m6 [%4=%3%S2],%0";
+  return "%x5%E6.%m6 [%4=%2%S3],%0";
+})
+
+(define_insn "*extop<mode>si_side"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:SI
+	 5 "cris_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0,0,0,0")
+	  (match_operator:SI
+	   6 "cris_extend_operator"
+	   [(mem:BW
+	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")
+		      ))])]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "%x5%E6<m> [%4=%3%S2],%0";
+  return "%x5%E6<m> [%4=%2%S3],%0";
+})
+
+
+;; As with op.S we may have to add special pattern to match commuted
+;; operands to adds/addu and bound
+;;
+;; adds/addu/bound [rx=ry+rz.S]
+
+;; QImode to HImode
+;; FIXME: GCC should widen.
+
+(define_insn "*extopqihi_swap_side_biap"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI
+	 (match_operator:HI
+	  6 "cris_extend_operator"
+	  [(mem:QI (plus:SI
+		    (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			     (match_operand:SI 3 "const_int_operand" "n,n"))
+		    (match_operand:SI 4 "register_operand" "r,r")))])
+	 (match_operand:HI 1 "register_operand" "0,0")))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   add%e6.b [%5=%4+%2%T3],%0")
+
+(define_insn "*extop<mode>si_swap_side_biap"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 7 "cris_plus_or_bound_operator"
+	 [(match_operator:SI
+	   6 "cris_extend_operator"
+	   [(mem:BW (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			      (match_operand:SI 3 "const_int_operand" "n,n"))
+		     (match_operand:SI 4 "register_operand" "r,r")))])
+	  (match_operand:SI 1 "register_operand" "0,0")]))
+   (set (match_operand:SI 5 "register_operand" "=*4,r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))]
+  "(GET_CODE (operands[7]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "@
+   #
+   %x7%E6<m> [%5=%4+%2%T3],%0")
+
+;; [rx=ry+i]
+;; FIXME: GCC should widen.
+
+;; QImode to HImode
+
+(define_insn "*extopqihi_swap_side"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(plus:HI
+	 (match_operator:HI
+	  5 "cris_extend_operator"
+	  [(mem:QI (plus:SI
+		    (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])
+	 (match_operand:HI 1 "register_operand" "0,0,0,0,0")))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return "add%e5.b [%4=%3%S2],%0";
+  return "add%e5.b [%4=%2%S3],%0";
+})
+
+(define_insn "*extop<mode>si_swap_side"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(match_operator:SI
+	 6 "cris_plus_or_bound_operator"
+	 [(match_operator:SI
+	   5 "cris_extend_operator"
+	   [(mem:BW (plus:SI
+		     (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R")
+		     (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])
+	  (match_operand:SI 1 "register_operand" "0,0,0,0,0")]))
+   (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r")
+	(plus:SI (match_dup 2)
+		 (match_dup 3)))]
+  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[5]) == ZERO_EXTEND)
+   && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+{
+  if ((which_alternative == 0 || which_alternative == 3)
+      && (!CONST_INT_P (operands[3])
+	  || INTVAL (operands[3]) > 127
+	  || INTVAL (operands[3]) < -128
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
+	  || CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
+    return "#";
+  if (which_alternative == 4)
+    return \"%x6%E5.%m5 [%4=%3%S2],%0\";
+  return "%x6%E5<m> [%4=%2%S3],%0";
+})
+
+;; Extend versions (zero/sign) of normal add/sub (no side-effects).
+
+;; QImode to HImode
+;; FIXME: GCC should widen.
+
+(define_insn "*extopqihi_non_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(match_operator:HI
+	 3 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0,0,r")
+	  (match_operator:HI
+	   4 "cris_extend_operator"
+	   [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])]))]
+  "!TARGET_V32 && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)"
+  "@
+   %x3%E4.%m4 %2,%0
+   %x3%E4.%m4 %2,%0
+   %x3%E4.%m4 %2,%0
+   %x3%E4.%m4 %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*extopqihi_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI
+	 3 "cris_additive_operand_extend_operator"
+	 [(match_operand:HI 1 "register_operand" "0,0")
+	  (match_operator:HI
+	   4 "cris_extend_operator"
+	   [(match_operand:QI 2 "nonimmediate_operand" "r,m")])]))]
+  "TARGET_V32"
+  "%x3%e4.%m4 %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+;; QImode to SImode
+
+(define_insn "*extop<mode>si_non_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(match_operator:SI
+	 3 "cris_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0,0,r")
+	  (match_operator:SI
+	   4 "cris_extend_operator"
+	   [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")])]))]
+  "!TARGET_V32
+   && (GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND)
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)"
+  "@
+   %x3%E4<m> %2,%0
+   %x3%E4<m> %2,%0
+   %x3%E4<m> %2,%0
+   %x3%E4<m> %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")])
+
+(define_insn "*extop<mode>si_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI
+	 3 "cris_additive_operand_extend_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operator:SI
+	   4 "cris_extend_operator"
+	   [(match_operand:BW 2 "nonimmediate_operand" "r,m")])]))]
+  "TARGET_V32"
+  "%x3%e4.%m4 %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; As with the side-effect patterns, may have to have swapped operands for add.
+;; For commutative operands, these are the canonical forms.
+
+;; QImode to HImode
+
+(define_insn "*addxqihi_swap_non_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(plus:HI
+	 (match_operator:HI
+	  3 "cris_extend_operator"
+	  [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])
+	 (match_operand:HI 1 "register_operand" "0,0,0,r")))]
+  "!TARGET_V32 && operands[1] != frame_pointer_rtx"
+  "@
+   add%e3.b %2,%0
+   add%e3.b %2,%0
+   add%e3.b %2,%0
+   add%e3.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")
+   (set_attr "cc" "clobber")])
+
+;; A case for v32, to catch the "addo" insn in addition to "adds".  We
+;; only care to match the canonical form; there should be no other.
+
+(define_insn "*addsbw_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,!a")
+	(plus:HI
+	 (sign_extend:HI
+	  (match_operand:QI 2 "nonimmediate_operand" "r,m,m"))
+	 (match_operand:HI 1 "register_operand" "0,0,r")))]
+  "TARGET_V32"
+  "@
+   adds.b %2,%0
+   adds.b %2,%0
+   addo.b %2,%1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber,clobber,none")])
+
+(define_insn "*addubw_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI
+	 (zero_extend:HI
+	  (match_operand:QI 2 "nonimmediate_operand" "r,m"))
+	 (match_operand:HI 1 "register_operand" "0,0")))]
+  "TARGET_V32"
+  "addu.b %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*extop<mode>si_swap_non_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(match_operator:SI
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operator:SI
+	   3 "cris_extend_operator"
+	   [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")])
+	  (match_operand:SI 1 "register_operand" "0,0,0,r")]))]
+  "!TARGET_V32
+   && (GET_CODE (operands[4]) != UMIN || GET_CODE (operands[3]) == ZERO_EXTEND)
+   && operands[1] != frame_pointer_rtx"
+  "@
+   %x4%E3<m> %2,%0
+   %x4%E3<m> %2,%0
+   %x4%E3<m> %2,%0
+   %x4%E3<m> %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,no")])
+
+(define_insn "*adds<mode>_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,!a")
+	(plus:SI
+	 (sign_extend:SI
+	  (match_operand:BW 2 "nonimmediate_operand" "r,m,m"))
+	 (match_operand:SI 1 "register_operand" "0,0,r")))]
+  "TARGET_V32"
+  "@
+   adds<m> %2,%0
+   adds<m> %2,%0
+   addo<m> %2,%1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "*,*,none")])
+
+(define_insn "*addu<mode>_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (plus:SI
+        (zero_extend:SI
+          (match_operand:BW 2 "nonimmediate_operand" "r,m"))
+        (match_operand:SI 1 "register_operand" "0,0")))]
+  "TARGET_V32 && operands[1] != frame_pointer_rtx"
+  "addu<m> %2,%0"
+  [(set_attr "slottable" "yes")])
+
+(define_insn "*bound<mode>_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (umin:SI
+        (zero_extend:SI
+         (match_operand:BW 2 "register_operand" "r"))
+        (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_V32 && operands[1] != frame_pointer_rtx"
+  "bound<m> %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; This is the special case when we use what corresponds to the
+;; instruction above in "casesi".  Do *not* change it to use the generic
+;; pattern and "REG 15" as pc; I did that and it led to madness and
+;; maintenance problems: Instead of (as imagined) recognizing and removing
+;; or replacing this pattern with something simpler, other variant
+;; patterns were recognized or combined, including some prefix variants
+;; where the value in pc is not that of the next instruction (which means
+;; this instruction actually *is* special and *should* be marked as such).
+;; When switching from the "generic pattern match" approach to this simpler
+;; approach, there were insignificant differences in gcc, ipps and
+;; product code, somehow due to scratching reload behind the ear or
+;; something.  Testcase "gcc" looked .01% slower and 4 bytes bigger;
+;; product code became .001% smaller but "looked better".  The testcase
+;; "ipps" was just different at register allocation).
+;;
+;; Assumptions in the jump optimizer forces us to use IF_THEN_ELSE in this
+;; pattern with the default-label as the else, with the "if" being
+;; index-is-less-than the max number of cases plus one.  The default-label
+;; is attached to the end of the case-table at time of output.
+
+(define_insn "*casesi_adds_w"
+  [(set (pc)
+	(if_then_else
+	 (ltu (match_operand:SI 0 "register_operand" "r")
+	      (match_operand:SI 1 "const_int_operand" "n"))
+	 (plus:SI (sign_extend:SI
+		   (mem:HI
+		    (plus:SI (mult:SI (match_dup 0) (const_int 2))
+			     (pc))))
+		  (pc))
+	 (label_ref (match_operand 2 "" ""))))
+   (use (label_ref (match_operand 3 "" "")))]
+  "!TARGET_V32 && operands[0] != frame_pointer_rtx"
+  "adds.w [$pc+%0.w],$pc"
+  [(set_attr "cc" "clobber")])
+
+;; For V32, we just have a jump, but we need to mark the table as used,
+;; and the jump insn must have the if_then_else form expected by core
+;; GCC.  Since we don't want to prolong the lifetime of the original
+;; index value, we compare against "unspec 0".  It's a pity we have to
+;; jump through to get the default label in place and to keep the jump
+;; table around.  FIXME: Look into it some time.
+
+(define_insn "*casesi_jump_v32"
+  [(set (pc)
+	(if_then_else
+	 (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI)
+	      (match_operand:SI 0 "const_int_operand" "n"))
+	 (match_operand:SI 1 "register_operand" "r")
+	 (label_ref (match_operand 2 "" ""))))
+   (use (label_ref (match_operand 3 "" "")))]
+  "TARGET_V32"
+  "jump %1%#"
+  [(set_attr "cc" "clobber")
+   (set_attr "slottable" "has_slot")])
+
+;; Multiply instructions.
+
+;; Sometimes powers of 2 (which are normally canonicalized to a
+;; left-shift) appear here, as a result of address reloading.
+;; As a special, for values 3 and 5, we can match with an addi, so add those.
+;;
+;; FIXME: This may be unnecessary now.
+;; Explicitly named for convenience of having a gen_... function.
+
+(define_insn "addi_mul"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	 (match_operand:SI 1 "register_operand" "%0")
+	 (match_operand:SI 2 "const_int_operand" "n")))]
+  "operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && CONST_INT_P (operands[2])
+   && (INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 4 || INTVAL (operands[2]) == 3
+       || INTVAL (operands[2]) == 5)"
+{
+  if (INTVAL (operands[2]) == 2)
+    return "lslq 1,%0";
+  else if (INTVAL (operands[2]) == 4)
+    return "lslq 2,%0";
+  else if (INTVAL (operands[2]) == 3)
+    return "addi %0.w,%0";
+  else if (INTVAL (operands[2]) == 5)
+    return "addi %0.d,%0";
+  return "BAD: adr_mulsi: %0=%1*%2";
+}
+[(set_attr "slottable" "yes")
+ ;; No flags are changed if this insn is "addi", but it does not seem
+ ;; worth the trouble to distinguish that to the lslq cases.
+ (set_attr "cc" "clobber")])
+
+;; The addi insn as it is normally used.
+
+;; Make the the ACR alternative taste bad enough to not choose it as a
+;; preference to avoid spilling problems (unwind-dw2-fde.c at build).
+;; FIXME: Revisit for new register allocator.
+
+(define_insn "*addi"
+  [(set (match_operand:SI 0 "register_operand" "=r,!a")
+	(plus:SI
+	 (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+		  (match_operand:SI 3 "const_int_operand" "n,n"))
+	 (match_operand:SI 1 "register_operand" "0,r")))]
+  "operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && CONST_INT_P (operands[3])
+   && (INTVAL (operands[3]) == 1
+       || INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)"
+  "@
+   addi %2%T3,%0
+   addi %2%T3,%1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+;; The mstep instruction.  Probably not useful by itself; it's to
+;; non-linear wrt. the other insns.  We used to expand to it, so at least
+;; it's correct.
+
+(define_insn "mstep_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (lt:SI (cc0) (const_int 0))
+	 (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			     (const_int 1))
+		  (match_operand:SI 2 "register_operand" "r"))
+	 (ashift:SI (match_operand:SI 3 "register_operand" "0")
+		    (const_int 1))))]
+  "!TARGET_V32"
+  "mstep %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; When illegitimate addresses are legitimized, sometimes gcc forgets
+;; to canonicalize the multiplications.
+;;
+;; FIXME: Check gcc > 2.7.2, remove and possibly fix in gcc.
+
+(define_insn "mstep_mul"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (lt:SI (cc0) (const_int 0))
+	 (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			   (const_int 2))
+		  (match_operand:SI 2 "register_operand" "r"))
+	 (mult:SI (match_operand:SI 3 "register_operand" "0")
+		  (const_int 2))))]
+  "!TARGET_V32
+   && operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[2] != frame_pointer_rtx
+   && operands[3] != frame_pointer_rtx"
+  "mstep %2,%0"
+  [(set_attr "slottable" "yes")])
+
+(define_insn "<u>mul<s><mode>3"
+  [(set (match_operand:WD 0 "register_operand" "=r")
+	(mult:WD
+	 (szext:WD (match_operand:<S> 1 "register_operand" "%0"))
+	 (szext:WD (match_operand:<S> 2 "register_operand" "r"))))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "%!mul<su><mm> %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
+   ;; For umuls.[bwd] it's just N unusable here, but let's be safe.
+   ;; For muls.b, this really extends to SImode, so cc should be
+   ;; considered clobbered.
+   ;; For muls.w, it's just N unusable here, but let's be safe.
+   (set_attr "cc" "clobber")])
+
+;; Note that gcc does not make use of such a thing as umulqisi3.  It gets
+;; confused and will erroneously use it instead of umulhisi3, failing (at
+;; least) gcc.c-torture/execute/arith-rand.c at all optimization levels.
+;; Inspection of optab code shows that there must be only one widening
+;; multiplication per mode widened to.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "%!muls.d %2,%0"
+  [(set (attr "slottable")
+	(if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+		      (const_string "no")
+		      (const_string "yes")))
+   ;; Just N unusable here, but let's be safe.
+   (set_attr "cc" "clobber")])
+
+;; A few multiply variations.
+
+;; When needed, we can get the high 32 bits from the overflow
+;; register.  We don't care to split and optimize these.
+;;
+;; Note that cc0 is still valid after the move-from-overflow-register
+;; insn; no special precaution need to be taken in cris_notice_update_cc.
+
+(define_insn "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	 (szext:DI (match_operand:SI 1 "register_operand" "%0"))
+	 (szext:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "%!mul<su>.d %2,%M0\;move $mof,%H0")
+
+;; These two patterns may be expressible by other means, perhaps by making
+;; [u]?mulsidi3 a define_expand.
+
+;; Due to register allocation braindamage, the clobber 1,2 alternatives
+;; cause a move into the clobbered register *before* the insn, then
+;; after the insn, mof is moved too, rather than the clobber assigned
+;; the last mof target.  This became apparent when making MOF and SRP
+;; visible registers, with the necessary tweak to smulsi3_highpart.
+;; Because these patterns are used in division by constants, that damage
+;; is visible (ipps regression tests).  Therefore the last two
+;; alternatives, "helping" reload to avoid an unnecessary move, but
+;; punished by force of one "?".  Check code from "int d (int a) {return
+;; a / 1000;}" and unsigned.  FIXME: Comment above was for 3.2, revisit.
+
+(define_insn "<su>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=h,h,?r,?r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (szext:DI (match_operand:SI 1 "register_operand" "r,r,0,r"))
+	   (szext:DI (match_operand:SI 2 "register_operand" "r,r,r,0")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=1,2,h,h"))]
+  "TARGET_HAS_MUL_INSNS"
+  "@
+   %!mul<su>.d %2,%1
+   %!mul<su>.d %1,%2
+   %!mul<su>.d %2,%1\;move $mof,%0
+   %!mul<su>.d %1,%2\;move $mof,%0"
+  [(set_attr "slottable" "yes,yes,no,no")
+   (set_attr "cc" "clobber")])
+
+;; Divide and modulus instructions.  CRIS only has a step instruction.
+
+(define_insn "dstep_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (geu:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			    (const_int 1))
+	      (match_operand:SI 2 "register_operand" "r"))
+	 (minus:SI (ashift:SI (match_operand:SI 3 "register_operand" "0")
+			(const_int 1))
+		   (match_operand:SI 4 "register_operand" "2"))
+	 (ashift:SI (match_operand:SI 5 "register_operand" "0")
+			(const_int 1))))]
+  ""
+  "dstep %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Here's a variant with mult instead of ashift.
+;;
+;; FIXME: This should be investigated.  Which one matches through combination?
+
+(define_insn "dstep_mul"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (geu:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+	      (match_operand:SI 2 "register_operand" "r"))
+	 (minus:SI (mult:SI (match_operand:SI 3 "register_operand" "0")
+			    (const_int 2))
+		   (match_operand:SI 4 "register_operand" "2"))
+	 (mult:SI (match_operand:SI 5 "register_operand" "0")
+		  (const_int 2))))]
+  "operands[0] != frame_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[2] != frame_pointer_rtx
+   && operands[3] != frame_pointer_rtx"
+  "dstep %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Logical operators.
+
+;; Bitwise "and".
+
+;; There is no use in defining "anddi3", because gcc can expand this by
+;; itself, and make reasonable code without interference.
+
+;; If the first operand is memory or a register and is the same as the
+;; second operand, and the third operand is -256 or -65536, we can use
+;; CLEAR instead.  Or, if the first operand is a register, and the third
+;; operand is 255 or 65535, we can zero_extend.
+;; GCC isn't smart enough to recognize these cases (yet), and they seem
+;; to be common enough to be worthwhile.
+;; FIXME: This should be made obsolete.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand"	   "")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		(match_operand:SI 2 "general_operand"	 "")))]
+  ""
+{
+  if (! (CONST_INT_P (operands[2])
+	 && (((INTVAL (operands[2]) == -256
+	       || INTVAL (operands[2]) == -65536)
+	      && rtx_equal_p (operands[1], operands[0]))
+	     || ((INTVAL (operands[2]) == 255
+		  || INTVAL (operands[2]) == 65535)
+		 && REG_P (operands[0])))))
+    {
+      /* Make intermediate steps if operand0 is not a register or
+	 operand1 is not a register, and hope that the reload pass will
+	 make something useful out of it.  Note that the operands are
+	 *not* canonicalized.  For the moment, I chicken out on this,
+	 because all or most ports do not describe 'and' with
+	 canonicalized operands, and I seem to remember magic in reload,
+	 checking that operand1 has constraint '%0', in which case
+	 operand0 and operand1 must have similar predicates.
+	 FIXME: Investigate.  */
+      rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+      rtx reg1 = operands[1];
+
+      if (! REG_P (reg1))
+	{
+	  emit_move_insn (reg0, reg1);
+	  reg1 = reg0;
+	}
+
+      emit_insn (gen_rtx_SET (SImode, reg0,
+			  gen_rtx_AND (SImode, reg1, operands[2])));
+
+      /* Make sure we get the right *final* destination.  */
+      if (! REG_P (operands[0]))
+	emit_move_insn (operands[0], reg0);
+
+      DONE;
+    }
+})
+
+;; Some special cases of andsi3.
+
+(define_insn "*andsi_movu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%r,Q,To")
+		(match_operand:SI 2 "const_int_operand" "n,n,n")))]
+  "(INTVAL (operands[2]) == 255 || INTVAL (operands[2]) == 65535)
+   && !side_effects_p (operands[1])"
+  "movu.%z2 %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*andsi_clear"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,Q,Q,To,To")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0,0,0")
+		(match_operand:SI 2 "const_int_operand" "P,n,P,n,P,n")))]
+  "(INTVAL (operands[2]) == -65536 || INTVAL (operands[2]) == -256)
+   && !side_effects_p (operands[0])"
+  "@
+   cLear.b %0
+   cLear.w %0
+   cLear.b %0
+   cLear.w %0
+   cLear.b %0
+   cLear.w %0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "none")])
+
+;; This is a catch-all pattern, taking care of everything that was not
+;; matched in the insns above.
+;;
+;; Sidenote: the tightening from "nonimmediate_operand" to
+;; "register_operand" for operand 1 actually increased the register
+;; pressure (worse code).  That will hopefully change with an
+;; improved reload pass.
+
+(define_insn "*expanded_andsi_non_v32"
+  [(set (match_operand:SI 0 "register_operand"	       "=r,r,r, r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,r")
+		(match_operand:SI 2 "general_operand"   "I,r,Q>,g,!To")))]
+  "!TARGET_V32"
+  "@
+   andq %2,%0
+   and.d %2,%0
+   and.d %2,%0
+   and.d %2,%0
+   and.d %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no")])
+
+(define_insn "*expanded_andsi_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0")
+		(match_operand:SI 2 "general_operand" "I,r,Q>,g")))]
+  "TARGET_V32"
+  "@
+   andq %2,%0
+   and.d %2,%0
+   and.d %2,%0
+   and.d %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no")
+   (set_attr "cc" "noov32")])
+
+;; For both QI and HI we may use the quick patterns.  This results in
+;; useless condition codes, but that is used rarely enough for it to
+;; normally be a win (could check ahead for use of cc0, but seems to be
+;; more pain than win).
+
+;; FIXME: See note for andsi3
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "")
+		(match_operand:HI 2 "general_operand"  "")))]
+  ""
+{
+  if (! (CONST_INT_P (operands[2])
+	 && (((INTVAL (operands[2]) == -256
+	       || INTVAL (operands[2]) == 65280)
+	      && rtx_equal_p (operands[1], operands[0]))
+	     || (INTVAL (operands[2]) == 255
+		 && REG_P (operands[0])))))
+    {
+      /* See comment for andsi3.  */
+      rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (HImode);
+      rtx reg1 = operands[1];
+
+      if (! REG_P (reg1))
+	{
+	  emit_move_insn (reg0, reg1);
+	  reg1 = reg0;
+	}
+
+      emit_insn (gen_rtx_SET (HImode, reg0,
+			  gen_rtx_AND (HImode, reg1, operands[2])));
+
+      /* Make sure we get the right destination.  */
+      if (! REG_P (operands[0]))
+	emit_move_insn (operands[0], reg0);
+
+      DONE;
+    }
+})
+
+;; Some fast andhi3 special cases.
+
+(define_insn "*andhi_movu"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "r,Q,To")
+		(const_int 255)))]
+  "!side_effects_p (operands[1])"
+  "mOvu.b %1,%0"
+  [(set_attr "slottable" "yes,yes,no")])
+
+(define_insn "*andhi_clear"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,Q,To")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "0,0,0")
+		(const_int -256)))]
+  "!side_effects_p (operands[0])"
+  "cLear.b %0"
+  [(set_attr "slottable" "yes,yes,no")
+   (set_attr "cc" "none")])
+
+;; Catch-all andhi3 pattern.
+
+(define_insn "*expanded_andhi_non_v32"
+  [(set (match_operand:HI 0 "register_operand"	       "=r,r,r, r,r,r,r")
+	(and:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r")
+		(match_operand:HI 2 "general_operand"   "I,r,Q>,L,O,g,!To")))]
+
+;; Sidenote: the tightening from "general_operand" to
+;; "register_operand" for operand 1 actually increased the register
+;; pressure (worse code).  That will hopefully change with an
+;; improved reload pass.
+
+  "!TARGET_V32"
+  "@
+   andq %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   anDq %b2,%0
+   and.w %2,%0
+   and.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*expanded_andhi_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r")
+       (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
+               (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))]
+  "TARGET_V32"
+  "@
+   andq %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   anDq %b2,%0
+   and.w %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")])
+
+;; A strict_low_part pattern.
+
+(define_insn "*andhi_lowpart_non_v32"
+  [(set (strict_low_part
+	 (match_operand:HI 0 "register_operand"	       "+r,r, r,r,r,r"))
+	(and:HI (match_operand:HI 1 "register_operand" "%0,0, 0,0,0,r")
+		(match_operand:HI 2 "general_operand"   "r,Q>,L,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   and.w %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   anDq %b2,%0
+   and.w %2,%0
+   and.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,no,yes,no,no")
+   (set_attr "cc" "normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*andhi_lowpart_v32"
+  [(set (strict_low_part
+	 (match_operand:HI 0 "register_operand" "+r,r,r,r,r"))
+	(and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:HI 2 "general_operand" "r,Q>,L,O,g")))]
+  "TARGET_V32"
+  "@
+   and.w %2,%0
+   and.w %2,%0
+   and.w %2,%0
+   anDq %b2,%0
+   and.w %2,%0"
+  [(set_attr "slottable" "yes,yes,no,yes,no")
+   (set_attr "cc" "noov32,noov32,noov32,clobber,noov32")])
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "register_operand")
+	(and:QI (match_operand:QI 1 "register_operand")
+               (match_operand:QI 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*andqi3_non_v32"
+  [(set (match_operand:QI 0 "register_operand"	       "=r,r,r, r,r,r")
+	(and:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r")
+		(match_operand:QI 2 "general_operand"   "I,r,Q>,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   andq %2,%0
+   and.b %2,%0
+   and.b %2,%0
+   andQ %b2,%0
+   and.b %2,%0
+   and.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")])
+
+(define_insn "*andqi3_v32"
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r")
+	(and:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))]
+  "TARGET_V32"
+  "@
+   andq %2,%0
+   and.b %2,%0
+   and.b %2,%0
+   andQ %b2,%0
+   and.b %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")])
+
+(define_insn "*andqi_lowpart_non_v32"
+  [(set (strict_low_part
+	 (match_operand:QI 0 "register_operand"	       "+r,r, r,r,r"))
+	(and:QI (match_operand:QI 1 "register_operand" "%0,0, 0,0,r")
+		(match_operand:QI 2 "general_operand"   "r,Q>,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   and.b %2,%0
+   and.b %2,%0
+   andQ %b2,%0
+   and.b %2,%0
+   and.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no")
+   (set_attr "cc" "normal,normal,clobber,normal,normal")])
+
+(define_insn "*andqi_lowpart_v32"
+  [(set (strict_low_part
+	 (match_operand:QI 0 "register_operand" "+r,r,r,r"))
+	(and:QI (match_operand:QI 1 "register_operand" "%0,0,0,0")
+		(match_operand:QI 2 "general_operand" "r,Q>,O,g")))]
+  "TARGET_V32"
+  "@
+   and.b %2,%0
+   and.b %2,%0
+   andQ %b2,%0
+   and.b %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no")
+   (set_attr "cc" "noov32,noov32,clobber,noov32")])
+
+;; Bitwise or.
+
+;; Same comment as anddi3 applies here - no need for such a pattern.
+
+;; It seems there's no need to jump through hoops to get good code such as
+;; with andsi3.
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:BWD 0 "register_operand")
+	(ior:BWD (match_operand:BWD 1 "register_operand")
+		 (match_operand:BWD 2 "general_operand")))]
+  ""
+  "")
+
+(define_insn "*iorsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand"	       "=r,r,r, r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,0,r")
+		(match_operand:SI 2 "general_operand"  "I, r,Q>,n,g,!To")))]
+  "!TARGET_V32"
+  "@
+   orq %2,%0
+   or.d %2,%0
+   or.d %2,%0
+   oR.%s2 %2,%0
+   or.d %2,%0
+   or.d %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no,no")
+   (set_attr "cc" "normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*iorsi3_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:SI 2 "general_operand" "I,r,Q>,n,g")))]
+  "TARGET_V32"
+  "@
+   orq %2,%0
+   or.d %2,%0
+   or.d %2,%0
+   oR.%s2 %2,%0
+   or.d %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,no")
+   (set_attr "cc" "noov32,noov32,noov32,clobber,noov32")])
+
+(define_insn "*iorhi3_non_v32"
+  [(set (match_operand:HI 0 "register_operand"	       "=r,r,r, r,r,r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r")
+		(match_operand:HI 2 "general_operand"   "I,r,Q>,L,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   orq %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   oRq %b2,%0
+   or.w %2,%0
+   or.w %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")])
+
+(define_insn "*iorhi3_v32"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0")
+		(match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))]
+  "TARGET_V32"
+  "@
+   orq %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   or.w %2,%0
+   oRq %b2,%0
+   or.w %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,no,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")])
+
+(define_insn "*iorqi3_non_v32"
+  [(set (match_operand:QI 0 "register_operand"	       "=r,r,r, r,r,r")
+	(ior:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r")
+		(match_operand:QI 2 "general_operand"   "I,r,Q>,O,g,!To")))]
+  "!TARGET_V32"
+  "@
+   orq %2,%0
+   or.b %2,%0
+   or.b %2,%0
+   orQ %b2,%0
+   or.b %2,%0
+   or.b %2,%1,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no,no")
+   (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")])
+
+(define_insn "*iorqi3_v32"
+  [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r")
+	(ior:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0")
+		(match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))]
+  "TARGET_V32"
+  "@
+   orq %2,%0
+   or.b %2,%0
+   or.b %2,%0
+   orQ %b2,%0
+   or.b %2,%0"
+  [(set_attr "slottable" "yes,yes,yes,yes,no")
+   (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")])
+
+;; Exclusive-or
+
+;; See comment about "anddi3" for xordi3 - no need for such a pattern.
+;; FIXME: Do we really need the shorter variants?
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:BW 0 "register_operand" "=r")
+	(xor:BW (match_operand:BW 1 "register_operand" "%0")
+		(match_operand:BW 2 "register_operand" "r")))]
+  ""
+  "xor %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+;; Negation insns.
+
+;; Questionable use, here mostly as a (slightly usable) define_expand
+;; example.
+
+(define_expand "negsf2"
+  [(set (match_dup 2)
+        (match_dup 3))
+   (parallel [(set (match_operand:SF 0 "register_operand" "=r")
+                   (neg:SF (match_operand:SF 1
+                            "register_operand" "0")))
+              (use (match_dup 2))])]
+  ""
+{
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = GEN_INT (1 << 31);
+})
+
+(define_insn "*expanded_negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0")))
+   (use (match_operand:SI 2 "register_operand" "r"))]
+  ""
+  "xor %2,%0"
+  [(set_attr "slottable" "yes")])
+
+;; No "negdi2" although we could make one up that may be faster than
+;; the one in libgcc.
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:BWD 0 "register_operand" "=r")
+	(neg:BWD (match_operand:BWD 1 "register_operand" "r")))]
+  ""
+  "neg<m> %1,%0"
+  [(set_attr "slottable" "yes")])
+
+;; One-complements.
+
+;; See comment on anddi3 - no need for a DImode pattern.
+;; See also xor comment.
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "not %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:BW 0 "register_operand" "=r")
+	(not:BW (match_operand:BW 1 "register_operand" "0")))]
+  ""
+  "not %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "clobber")])
+
+;; Arithmetic/Logical shift right (and SI left).
+
+(define_insn "<shlr>si3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(shift:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "nonmemory_operand" "Kcr")))]
+  ""
+{
+  if (REG_S_P (operands[2]))
+    return "<slr>.d %2,%0";
+
+  return "<slr>q %2,%0";
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Since gcc gets lost, and forgets to zero-extend the source (or mask
+;; the destination) when it changes shifts of lower modes into SImode,
+;; it is better to make these expands an anonymous patterns instead of
+;; the more correct define_insns.  This occurs when gcc thinks that is
+;; is better to widen to SImode and use immediate shift count.
+
+;; FIXME: Is this legacy or still true for gcc >= 2.7.2?
+
+;; FIXME: Can't parametrize sign_extend and zero_extend (before
+;; mentioning "shiftrt"), so we need two patterns.
+(define_expand "ashr<mode>3"
+  [(set (match_dup 3)
+	(sign_extend:SI (match_operand:BW 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" "")))
+   (set (match_dup 5) (ashiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:BW 0 "general_operand" "")
+	(subreg:BW (match_dup 5) 0))]
+  ""
+{
+  int i;
+
+  for (i = 3; i < 6; i++)
+    operands[i] = gen_reg_rtx (SImode);
+})
+
+(define_expand "lshr<mode>3"
+  [(set (match_dup 3)
+	(zero_extend:SI (match_operand:BW 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" "")))
+   (set (match_dup 5) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:BW 0 "general_operand" "")
+	(subreg:BW (match_dup 5) 0))]
+  ""
+{
+  int i;
+
+  for (i = 3; i < 6; i++)
+    operands[i] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*expanded_<shlr><mode>"
+  [(set (match_operand:BW 0 "register_operand" "=r")
+	(shiftrt:BW (match_operand:BW 1 "register_operand" "0")
+		    (match_operand:BW 2 "register_operand" "r")))]
+  ""
+  "<slr><m> %2,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "*<shlr><mode>_lowpart"
+  [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r"))
+	(shiftrt:BW (match_dup 0)
+		    (match_operand:BW 1 "register_operand" "r")))]
+  ""
+  "<slr><m> %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Arithmetic/logical shift left.
+
+;; For narrower modes than SI, we can use lslq although it makes cc
+;; unusable.  The win is that we do not have to reload the shift-count
+;; into a register.
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:BW 0 "register_operand" "=r,r")
+	(ashift:BW (match_operand:BW 1 "register_operand" "0,0")
+		   (match_operand:BW 2 "nonmemory_operand" "r,Kc")))]
+  ""
+{
+  return
+    (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > <nbitsm1>)
+    ? "moveq 0,%0"
+    : (CONSTANT_P (operands[2])
+       ? "lslq %2,%0" : "lsl<m> %2,%0");
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32,clobber")])
+
+;; A strict_low_part matcher.
+
+(define_insn "*ashl<mode>_lowpart"
+  [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r"))
+	(ashift:BW (match_dup 0)
+		   (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "lsl<m> %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Various strange insns that gcc likes.
+
+;; Fortunately, it is simple to construct an abssf (although it may not
+;; be very much used in practice).
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  ""
+  "lslq 1,%0\;lsrq 1,%0")
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "abs %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; FIXME: GCC should be able to do these expansions itself.
+
+(define_expand "abs<mode>2"
+  [(set (match_dup 2)
+	(sign_extend:SI (match_operand:BW 1 "general_operand" "")))
+   (set (match_dup 3) (abs:SI (match_dup 2)))
+   (set (match_operand:BW 0 "register_operand" "")
+	(subreg:BW (match_dup 3) 0))]
+  ""
+  "operands[2] = gen_reg_rtx (SImode); operands[3] = gen_reg_rtx (SImode);")
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_HAS_LZ"
+  "lz %1,%0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+(define_insn "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (bswap:SI (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_HAS_SWAP"
+  "swapwb %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; This instruction swaps all bits in a register.
+;; That means that the most significant bit is put in the place
+;; of the least significant bit, and so on.
+
+(define_insn "cris_swap_bits"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")]
+		   CRIS_UNSPEC_SWAP_BITS))]
+  "TARGET_HAS_SWAP"
+  "swapwbr %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "noov32")])
+
+;; Implement ctz using two instructions, one for bit swap and one for clz.
+;; Defines a scratch register to avoid clobbering input.
+
+(define_expand "ctzsi2"
+  [(set (match_dup 2)
+	(match_operand:SI 1 "register_operand"))
+   (set (match_dup 2)
+	(unspec:SI [(match_dup 2)] CRIS_UNSPEC_SWAP_BITS))
+   (set (match_operand:SI 0 "register_operand")
+	(clz:SI (match_dup 2)))]
+  "TARGET_HAS_LZ && TARGET_HAS_SWAP"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+;; Bound-insn.  Defined to be the same as an unsigned minimum, which is an
+;; operation supported by gcc.  Used in casesi, but used now and then in
+;; normal code too.
+
+(define_expand "uminsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(umin:SI  (match_operand:SI 1 "register_operand" "")
+		  (match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (MEM_P (operands[2]) && TARGET_V32)
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "*uminsi3_non_v32"
+  [(set (match_operand:SI 0 "register_operand"		 "=r,r, r,r")
+	(umin:SI  (match_operand:SI 1 "register_operand" "%0,0, 0,r")
+		  (match_operand:SI 2 "general_operand"   "r,Q>,g,!To")))]
+  "!TARGET_V32"
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      /* Constant operands are zero-extended, so only 32-bit operands
+	 may be negative.  */
+      if (INTVAL (operands[2]) >= 0)
+	{
+	  if (INTVAL (operands[2]) < 256)
+	    return "bound.b %2,%0";
+
+	  if (INTVAL (operands[2]) < 65536)
+	    return "bound.w %2,%0";
+	}
+    }
+  else if (which_alternative == 3)
+    return "bound.d %2,%1,%0";
+
+  return "bound.d %2,%0";
+}
+ [(set_attr "slottable" "yes,yes,no,no")])
+
+(define_insn "*uminsi3_v32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(umin:SI  (match_operand:SI 1 "register_operand" "%0,0")
+		  (match_operand:SI 2 "nonmemory_operand" "r,i")))]
+  "TARGET_V32"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      /* Constant operands are zero-extended, so only 32-bit operands
+	 may be negative.  */
+      if (INTVAL (operands[2]) >= 0)
+	{
+	  if (INTVAL (operands[2]) < 256)
+	    return "bound.b %2,%0";
+
+	  if (INTVAL (operands[2]) < 65536)
+	    return "bound.w %2,%0";
+	}
+    }
+
+  return "bound.d %2,%0";
+}
+ [(set_attr "slottable" "yes,no")])
+
+;; Jump and branch insns.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "ba %l0%#"
+  [(set_attr "slottable" "has_slot")])
+
+;; Testcase gcc.c-torture/compile/991213-3.c fails if we allow a constant
+;; here, since the insn is not recognized as an indirect jump by
+;; jmp_uses_reg_or_mem used by computed_jump_p.  Perhaps it is a kludge to
+;; change from general_operand to nonimmediate_operand (at least the docs
+;; should be changed), but then again the pattern is called indirect_jump.
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand"))]
+  ""
+{
+  if (TARGET_V32 && MEM_P (operands[0]))
+    operands[0] = force_reg (SImode, operands[0]);
+})
+
+(define_insn "*indirect_jump_non_v32"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))]
+  "!TARGET_V32"
+  "jump %0")
+
+(define_insn "*indirect_jump_v32"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  "TARGET_V32"
+  "jump %0%#"
+  [(set_attr "slottable" "has_slot")])
+
+;; Return insn.  Used whenever the epilogue is very simple; if it is only
+;; a single ret or jump [sp+].  No allocated stack space or saved
+;; registers are allowed.
+;; Note that for this pattern, although named, it is ok to check the
+;; context of the insn in the test, not only compiler switches.
+
+(define_expand "return"
+  [(return)]
+  "cris_simple_epilogue ()"
+  "cris_expand_return (cris_return_address_on_stack ()); DONE;")
+
+(define_insn "*return_expanded"
+  [(return)]
+  ""
+{
+  return cris_return_address_on_stack_for_return ()
+    ? "jump [$sp+]" : "ret%#";
+}
+  [(set (attr "slottable")
+ 	(if_then_else
+ 	 (ne (symbol_ref
+	      "(cris_return_address_on_stack_for_return ())")
+ 	     (const_int 0))
+ 	 (const_string "no")
+	 (const_string "has_return_slot")))])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  "TARGET_PROLOGUE_EPILOGUE"
+  "cris_expand_prologue (); DONE;")
+
+;; Note that the (return) from the expander itself is always the last
+;; insn in the epilogue.
+(define_expand "epilogue"
+  [(const_int 0)]
+  "TARGET_PROLOGUE_EPILOGUE"
+  "cris_expand_epilogue (); DONE;")
+
+;; Conditional branches.
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0) (compare
+	       (match_operand:BWD 1 "nonimmediate_operand")
+	       (match_operand:BWD 2 "general_operand")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cbranchdi4"
+  [(set (cc0)
+	(compare (match_operand:DI 1 "nonimmediate_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  if (TARGET_V32 && !REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+  if (TARGET_V32 && MEM_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+})
+
+
+;; We suffer from the same overflow-bit-gets-in-the-way problem as
+;; e.g. m68k, so we have to check if overflow bit is set on all "signed"
+;; conditions.
+
+(define_insn "b<ncond:code>"
+  [(set (pc)
+	(if_then_else (ncond (cc0)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "b<CC> %l0%#"
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "b<ocond:code>"
+  [(set (pc)
+	(if_then_else (ocond (cc0)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? 0 : "b<CC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "b<rcond:code>"
+  [(set (pc)
+	(if_then_else (rcond (cc0)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? "b<oCC> %l0%#" : "b<CC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+;; Reversed anonymous patterns to the ones above, as mandated.
+
+(define_insn "*b<ncond:code>_reversed"
+  [(set (pc)
+	(if_then_else (ncond (cc0)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "b<rCC> %l0%#"
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "*b<ocond:code>_reversed"
+  [(set (pc)
+	(if_then_else (ocond (cc0)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? 0 : "b<rCC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+(define_insn "*b<rcond:code>_reversed"
+  [(set (pc)
+	(if_then_else (rcond (cc0)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? "b<roCC> %l0%#" : "b<rCC> %l0%#";
+}
+  [(set_attr "slottable" "has_slot")])
+
+;; Set on condition: sCC.
+
+(define_expand "cstoredi4"
+  [(set (cc0) (compare
+	       (match_operand:DI 2 "nonimmediate_operand")
+	       (match_operand:DI 3 "general_operand")))
+   (set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(cc0) (const_int 0)]))]
+  ""
+{
+  if (TARGET_V32 && !REG_P (operands[2]))
+    operands[2] = force_reg (DImode, operands[2]);
+  if (TARGET_V32 && MEM_P (operands[3]))
+    operands[3] = force_reg (DImode, operands[3]);
+})
+
+(define_expand "cstore<mode>4"
+  [(set (cc0) (compare
+	       (match_operand:BWD 2 "nonimmediate_operand")
+	       (match_operand:BWD 3 "general_operand")))
+   (set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+;; Like bCC, we have to check the overflow bit for
+;; signed conditions.
+
+(define_insn "s<ncond:code>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ncond:SI (cc0) (const_int 0)))]
+  ""
+  "s<CC> %0"
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+(define_insn "s<rcond:code>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rcond:SI (cc0) (const_int 0)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? "s<oCC> %0" : "s<CC> %0";
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+(define_insn "s<ocond:code>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ocond:SI (cc0) (const_int 0)))]
+  ""
+{
+  return
+    (cc_prev_status.flags & CC_NO_OVERFLOW)
+    ? 0 : "s<CC> %0";
+}
+  [(set_attr "slottable" "yes")
+   (set_attr "cc" "none")])
+
+;; Call insns.
+
+;; We need to make these patterns "expand", since the real operand is
+;; hidden in a (mem:QI ) inside operand[0] (call_value: operand[1]),
+;; and cannot be checked if it were a "normal" pattern.
+;;  Note that "call" and "call_value" are *always* called with a
+;; mem-operand for operand 0 and 1 respective.  What happens for combined
+;; instructions is a different issue.
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "cris_mem_call_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+  if (flag_pic)
+    cris_expand_pic_call_address (&operands[0]);
+})
+
+;; Accept *anything* as operand 1.  Accept operands for operand 0 in
+;; order of preference.
+
+(define_insn "*expanded_call_non_v32"
+  [(call (mem:QI (match_operand:SI 0 "general_operand" "r,Q>,g"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI CRIS_SRP_REGNUM))]
+  "!TARGET_V32"
+  "jsr %0")
+
+(define_insn "*expanded_call_v32"
+  [(call
+    (mem:QI
+     (match_operand:SI 0 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i"))
+    (match_operand 1 "" ""))
+   (clobber (reg:SI CRIS_SRP_REGNUM))]
+  "TARGET_V32"
+  "@
+   jsr %0%#
+   jsr %0%#
+   bsr %0%#
+   bsr %0%#"
+  [(set_attr "slottable" "has_call_slot")])
+
+;; Parallel when calculating and reusing address of indirect pointer
+;; with simple offset.  (Makes most sense with PIC.)  It looks a bit
+;; wrong not to have the clobber last, but that's the way combine
+;; generates it (except it doesn' look into the *inner* mem, so this
+;; just matches a peephole2).  FIXME: investigate that.
+(define_insn "*expanded_call_side"
+  [(call (mem:QI
+	  (mem:SI
+	   (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,  r,r")
+		    (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn"))))
+	 (match_operand 2 "" ""))
+   (clobber (reg:SI CRIS_SRP_REGNUM))
+   (set (match_operand:SI 3 "register_operand" "=*0,r,r")
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "!TARGET_AVOID_GOTPLT && !TARGET_V32"
+  "jsr [%3=%0%S1]")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "cris_mem_call_operand" "")
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+  if (flag_pic)
+    cris_expand_pic_call_address (&operands[1]);
+})
+
+;; Accept *anything* as operand 2.  The validity other than "general" of
+;; operand 0 will be checked elsewhere.  Accept operands for operand 1 in
+;; order of preference (Q includes r, but r is shorter, faster).
+;;  We also accept a PLT symbol.  We output it as [rPIC+sym:GOTPLT] rather
+;; than requiring getting rPIC + sym:PLT into a register.
+
+(define_insn "*expanded_call_value_non_v32"
+  [(set (match_operand 0 "nonimmediate_operand" "=g,g,g")
+	(call (mem:QI (match_operand:SI 1 "general_operand" "r,Q>,g"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI CRIS_SRP_REGNUM))]
+  "!TARGET_V32"
+  "Jsr %1"
+  [(set_attr "cc" "clobber")])
+
+;; See similar call special-case.
+(define_insn "*expanded_call_value_side"
+  [(set (match_operand 0 "nonimmediate_operand" "=g,g,g")
+	(call
+	 (mem:QI
+	  (mem:SI
+	   (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,  r,r")
+		    (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn"))))
+	      (match_operand 3 "" "")))
+   (clobber (reg:SI CRIS_SRP_REGNUM))
+   (set (match_operand:SI 4 "register_operand" "=*1,r,r")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "!TARGET_AVOID_GOTPLT && !TARGET_V32"
+  "Jsr [%4=%1%S2]"
+  [(set_attr "cc" "clobber")])
+
+(define_insn "*expanded_call_value_v32"
+  [(set
+    (match_operand 0 "nonimmediate_operand" "=g,g,g,g")
+    (call
+     (mem:QI
+      (match_operand:SI 1 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i"))
+     (match_operand 2 "" "")))
+   (clobber (reg:SI 16))]
+  "TARGET_V32"
+  "@
+   Jsr %1%#
+   Jsr %1%#
+   Bsr %1%#
+   Bsr %1%#"
+  [(set_attr "cc" "clobber")
+   (set_attr "slottable" "has_call_slot")])
+
+;; Used in debugging.  No use for the direct pattern; unfilled
+;; delayed-branches are taken care of by other means.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "cc" "none")])
+
+;; We need to stop accesses to the stack after the memory is
+;; deallocated.  Unfortunately, reorg doesn't look at naked clobbers,
+;; e.g. (insn ... (clobber (mem:BLK (stack_pointer_rtx)))) and we don't
+;; want to use a naked (unspec_volatile) as that would stop any
+;; scheduling in the epilogue.  Hence we model it as a "real" insn that
+;; sets the memory in an unspecified manner.  FIXME: Unfortunately it
+;; still has the effect of an unspec_volatile.
+(define_insn "cris_frame_deallocated_barrier"
+  [(set (mem:BLK (reg:SI CRIS_SP_REGNUM))
+	(unspec:BLK [(const_int 0)] CRIS_UNSPEC_FRAME_DEALLOC))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; We expand on casesi so we can use "bound" and "add offset fetched from
+;; a table to pc" (adds.w [pc+%0.w],pc).
+
+;; Note: if you change the "parallel" (or add anything after it) in
+;; this expansion, you must change the macro ASM_OUTPUT_CASE_END
+;; accordingly, to add the default case at the end of the jump-table.
+
+(define_expand "cris_casesi_non_v32"
+  [(set (match_dup 5) (match_operand:SI 0 "general_operand" ""))
+   (set (match_dup 6)
+	(minus:SI (match_dup 5)
+		  (match_operand:SI 1 "const_int_operand" "n")))
+   (set (match_dup 7)
+	(umin:SI (match_dup 6)
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	   (ltu (match_dup 7) (match_dup 2))
+	   (plus:SI (sign_extend:SI
+		     (mem:HI
+		      (plus:SI (mult:SI (match_dup 7) (const_int 2))
+			       (pc))))
+		    (pc))
+	   (label_ref (match_operand 4 "" ""))))
+     (use (label_ref (match_operand 3 "" "")))])]
+  ""
+{
+  operands[2] = plus_constant (operands[2], 1);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = gen_reg_rtx (SImode);
+})
+
+;; FIXME: Check effect of not JUMP_TABLES_IN_TEXT_SECTION.
+(define_expand "cris_casesi_v32"
+  [(set (match_dup 5) (match_operand:SI 0 "general_operand"))
+   (set (match_dup 6)
+       (minus:SI (match_dup 5)
+		 (match_operand:SI 1 "const_int_operand")))
+   (set (match_dup 7)
+       (umin:SI (match_dup 6)
+		(match_operand:SI 2 "const_int_operand")))
+   (set (match_dup 8) (match_dup 11))
+   (set (match_dup 9)
+       (plus:SI (mult:SI (match_dup 7) (const_int 2))
+		(match_dup 8)))
+   (set (match_dup 10)
+       (plus:SI (sign_extend:SI (mem:HI (match_dup 9)))
+		(match_dup 9)))
+   (parallel
+    [(set (pc)
+	 (if_then_else
+	  (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI) (match_dup 2))
+	  (match_dup 10)
+	  (label_ref (match_operand 4 "" ""))))
+     (use (label_ref (match_dup 3)))])]
+  "TARGET_V32"
+{
+  int i;
+  rtx xlabel = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  for (i = 5; i <= 10; i++)
+    operands[i] = gen_reg_rtx (SImode);
+  operands[2] = plus_constant (operands[2], 1);
+
+  /* Don't forget to decorate labels too, for PIC.  */
+  operands[11] = flag_pic
+    ? gen_rtx_CONST (Pmode,
+		    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xlabel),
+				    CRIS_UNSPEC_PCREL))
+    : xlabel;
+})
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand")
+   (match_operand:SI 1 "const_int_operand")
+   (match_operand:SI 2 "const_int_operand")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+{
+  if (TARGET_V32)
+    emit_insn (gen_cris_casesi_v32 (operands[0], operands[1], operands[2],
+				    operands[3], operands[4]));
+  else
+    emit_insn (gen_cris_casesi_non_v32 (operands[0], operands[1], operands[2],
+					operands[3], operands[4]));
+  DONE;
+})
+
+;; Split-patterns.  Some of them have modes unspecified.  This
+;; should always be ok; if for no other reason sparc.md has it as
+;; well.
+;;
+;; When register_operand is specified for an operand, we can get a
+;; subreg as well (Axis-990331), so don't just assume that REG_P is true
+;; for a register_operand and that REGNO can be used as is.  It is best to
+;; guard with REG_P, unless it is worth it to adjust for the subreg case.
+
+;; op [rx + 0],ry,rz
+;; The index to rx is optimized into zero, and gone.
+
+;; First, recognize bound [rx],ry,rz; where [rx] is zero-extended,
+;; and add/sub [rx],ry,rz, with zero or sign-extend on [rx].
+;; Split this into:
+;;  move ry,rz
+;;  op [rx],rz
+;; Lose if rz=ry or rx=rz.
+;; Call this op-extend-split.
+;; Do not match for V32; the addo and addi shouldn't be split
+;; up.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 4 "cris_operand_extend_operator"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_op_dup 3 [(match_dup 2)])]))]
+  "")
+
+;; As op-extend-split, but recognize and split op [rz],ry,rz into
+;;  ext [rz],rz
+;;  op ry,rz
+;; Do this for plus or bound only, being commutative operations, since we
+;; have swapped the operands.
+;; Call this op-extend-split-rx=rz
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_op_dup 3 [(match_dup 2)]))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; As the op-extend-split, but swapped operands, and only for
+;; plus or bound, being the commutative extend-operators.  FIXME: Why is
+;; this needed?  Is it?
+;; Call this op-extend-split-swapped
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_op_dup 3 [(match_dup 2)])]))]
+  "")
+
+;; As op-extend-split-rx=rz, but swapped operands, only for plus or
+;; bound.  Call this op-extend-split-swapped-rx=rz.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 4 "cris_plus_or_bound_operator"
+	 [(match_operator
+	   3 "cris_extend_operator"
+	   [(match_operand 2 "memory_operand" "")])
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_op_dup 3 [(match_dup 2)]))
+   (set (match_dup 0)
+	(match_op_dup
+	 4 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; As op-extend-split, but the mem operand is not extended.
+;;
+;; op [rx],ry,rz changed into
+;;  move ry,rz
+;;  op [rx],rz
+;; lose if ry=rz or rx=rz
+;; Call this op-extend.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 3 "cris_orthogonal_operator"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operand 2 "memory_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 2)]))]
+  "")
+
+;; As op-extend-split-rx=rz, non-extended.
+;; Call this op-split-rx=rz
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 3 "cris_commutative_orth_op"
+	 [(match_operand 2 "memory_operand" "")
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0])
+   && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 2)]))]
+  "")
+
+;; As op-extend-split-swapped, nonextended.
+;; Call this op-split-swapped.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 3 "cris_commutative_orth_op"
+	 [(match_operand 1 "register_operand" "")
+	  (match_operand 2 "memory_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; As op-extend-split-swapped-rx=rz, non-extended.
+;; Call this op-split-swapped-rx=rz.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 3 "cris_orthogonal_operator"
+	 [(match_operand 2 "memory_operand" "")
+	  (match_operand 1 "register_operand" "")]))]
+  "!TARGET_V32
+   && REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[1]) != REGNO (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && REG_P (XEXP (operands[2], 0))
+   && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(match_op_dup
+	 3 [(match_dup 0)
+	    (match_dup 1)]))]
+  "")
+
+;; Splits for all cases in side-effect insns where (possibly after reload
+;; and register allocation) rx and ry in [rx=ry+i] are equal.
+
+;; move.S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI
+	     (mult:SI (match_operand:SI 1 "register_operand" "")
+		      (match_operand:SI 2 "const_int_operand" ""))
+	     (match_operand:SI 3 "register_operand" ""))]))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (plus:SI (mult:SI (match_dup 1)
+			    (match_dup 2))
+		    (match_dup 3)))])]
+  "REG_P (operands[3]) && REG_P (operands[4])
+   && REGNO (operands[3]) == REGNO (operands[4])"
+  [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+			       (match_dup 3)))
+   (set (match_dup 0) (match_dup 5))]
+  "operands[5] = replace_equiv_address (operands[6], operands[3]);")
+
+;; move.S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	   5 "cris_mem_op"
+	   [(plus:SI (match_operand:SI 1 "cris_bdap_operand" "")
+		     (match_operand:SI 2 "cris_bdap_operand" ""))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (match_dup 1)
+		    (match_dup 2)))])]
+  "(rtx_equal_p (operands[3], operands[1])
+    || rtx_equal_p (operands[3], operands[2]))"
+  [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (match_dup 4))]
+{
+  operands[4] = replace_equiv_address (operands[5], operands[3]);
+  cris_order_for_addsi3 (operands, 1);
+})
+
+;; move.S1 ry,[rx=rx+rz.S2]
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI
+	     (mult:SI (match_operand:SI 0 "register_operand" "")
+		      (match_operand:SI 1 "const_int_operand" ""))
+	     (match_operand:SI 2 "register_operand" ""))])
+	  (match_operand 3 "register_operand" ""))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 0)
+			     (match_dup 1))
+		    (match_dup 2)))])]
+  "REG_P (operands[2]) && REG_P (operands[4])
+   && REGNO (operands[4]) == REGNO (operands[2])"
+  [(set (match_dup 4) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+				(match_dup 2)))
+   (set (match_dup 5) (match_dup 3))]
+  "operands[5] = replace_equiv_address (operands[6], operands[4]);")
+
+;; move.S1 ry,[rx=rx+i]
+
+(define_split
+  [(parallel
+    [(set (match_operator
+	   6 "cris_mem_op"
+	   [(plus:SI (match_operand:SI 0 "cris_bdap_operand" "")
+		     (match_operand:SI 1 "cris_bdap_operand" ""))])
+	  (match_operand 2 "register_operand" ""))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (match_dup 0)
+		   (match_dup 1)))])]
+  "(rtx_equal_p (operands[3], operands[0])
+    || rtx_equal_p (operands[3], operands[1]))"
+  [(set (match_dup 3) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 5) (match_dup 2))]
+{
+  operands[5] = replace_equiv_address (operands[6], operands[3]);
+  cris_order_for_addsi3 (operands, 0);
+})
+
+;; clear.[bwd] [rx=rx+rz.S2]
+
+(define_split
+  [(parallel
+    [(set (mem:BWD (plus:SI
+		    (mult:SI (match_operand:SI 0 "register_operand" "")
+			     (match_operand:SI 1 "const_int_operand" ""))
+		    (match_operand:SI 2 "register_operand" "")))
+	   (const_int 0))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 0)
+			     (match_dup 1))
+		    (match_dup 2)))])]
+  "REG_P (operands[2]) && REG_P (operands[3])
+   && REGNO (operands[3]) == REGNO (operands[2])"
+  [(set (match_dup 3) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+				(match_dup 2)))
+   (set (mem:BWD (match_dup 3)) (const_int 0))]
+  "")
+
+;; clear.[bwd] [rx=rx+i]
+
+(define_split
+  [(parallel
+    [(set (mem:BWD
+	   (plus:SI (match_operand:SI 0 "cris_bdap_operand" "")
+		    (match_operand:SI 1 "cris_bdap_operand" "")))
+	   (const_int 0))
+     (set (match_operand:SI 2 "register_operand" "")
+	   (plus:SI (match_dup 0)
+		    (match_dup 1)))])]
+  "(rtx_equal_p (operands[0], operands[2])
+    || rtx_equal_p (operands[2], operands[1]))"
+  [(set (match_dup 2) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (mem:BWD (match_dup 2)) (const_int 0))]
+  "cris_order_for_addsi3 (operands, 0);")
+
+;; mov(s|u).S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_extend_operator"
+	    [(mem (plus:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		   (match_operand:SI 3 "register_operand" "")))]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 1)
+			     (match_dup 2))
+		    (match_dup 3)))])]
+  "REG_P (operands[3])
+   && REG_P (operands[4])
+   && REGNO (operands[3]) == REGNO (operands[4])"
+  [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+				(match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 6)]))]
+  "operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]);")
+
+;; mov(s|u).S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    4 "cris_extend_operator"
+	    [(mem (plus:SI
+		   (match_operand:SI 1 "cris_bdap_operand" "")
+		   (match_operand:SI 2 "cris_bdap_operand" "")))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	   (plus:SI (match_dup 1)
+		    (match_dup 2)))])]
+  "(rtx_equal_p (operands[1], operands[3])
+    || rtx_equal_p (operands[2], operands[3]))"
+  [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (match_op_dup 4 [(match_dup 5)]))]
+{
+  operands[5] = replace_equiv_address (XEXP (operands[4], 0), operands[3]);
+  cris_order_for_addsi3 (operands, 1);
+})
+
+;; op.S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_orthogonal_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (mem (plus:SI
+		   (match_operand:SI 2 "cris_bdap_operand" "")
+		   (match_operand:SI 3 "cris_bdap_operand" "")))]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 6)]))]
+{
+  operands[6] = replace_equiv_address (XEXP (operands[5], 1), operands[4]);
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; op.S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_orthogonal_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (mem (plus:SI
+		   (mult:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))
+		   (match_operand:SI 4 "register_operand" "")))]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		   (match_dup 4)))])]
+  "REG_P (operands[4])
+   && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+				(match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 7)]))]
+  "operands[7] = replace_equiv_address (XEXP (operands[6], 1), operands[5]);")
+
+;; op.S1 [rx=rx+rz.S2],ry (swapped)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_commutative_orth_op"
+	    [(mem (plus:SI
+		   (mult:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))
+		   (match_operand:SI 4 "register_operand" "")))
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		    (match_dup 4)))])]
+  "REG_P (operands[4])
+   && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+			       (match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))]
+  "operands[7] = replace_equiv_address (XEXP (operands[6], 0), operands[5]);")
+
+;; op.S1 [rx=rx+i],ry (swapped)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_commutative_orth_op"
+	    [(mem
+	      (plus:SI (match_operand:SI 2 "cris_bdap_operand" "")
+		       (match_operand:SI 3 "cris_bdap_operand" "")))
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 4 "register_operand" "")
+	  (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 6) (match_dup 1)]))]
+{
+  operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]);
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; op(s|u).S1 [rx=rx+rz.S2],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_operand_extend_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (match_operator
+	      7 "cris_extend_operator"
+	      [(mem (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "")
+			      (match_operand:SI 3 "const_int_operand" ""))
+		     (match_operand:SI 4 "register_operand" "")))])]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		    (match_dup 4)))])]
+  "REG_P (operands[4])
+   && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+			       (match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 8)]))]
+  "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[7]), GET_MODE (operands[7]),
+				replace_equiv_address (XEXP (operands[7], 0),
+						       operands[5]));")
+
+;; op(s|u).S1 [rx=rx+i],ry
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    5 "cris_operand_extend_operator"
+	    [(match_operand 1 "register_operand" "")
+	     (match_operator
+	      6 "cris_extend_operator"
+	      [(mem
+		(plus:SI (match_operand:SI 2 "cris_bdap_operand" "")
+			 (match_operand:SI 3 "cris_bdap_operand" "")
+			 ))])]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 7)]))]
+{
+  operands[7] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]),
+			       replace_equiv_address (XEXP (operands[6], 0),
+						      operands[4]));
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; op(s|u).S1 [rx=rx+rz.S2],ry (swapped, plus or bound)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    7 "cris_plus_or_bound_operator"
+	    [(match_operator
+	      6 "cris_extend_operator"
+	      [(mem (plus:SI
+		     (mult:SI (match_operand:SI 2 "register_operand" "")
+			      (match_operand:SI 3 "const_int_operand" ""))
+		     (match_operand:SI 4 "register_operand" "")))])
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 5 "register_operand" "")
+	   (plus:SI (mult:SI (match_dup 2)
+			     (match_dup 3))
+		    (match_dup 4)))])]
+  "REG_P (operands[4]) && REG_P (operands[5])
+   && REGNO (operands[5]) == REGNO (operands[4])"
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3))
+			       (match_dup 4)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 8) (match_dup 1)]))]
+  "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]),
+				replace_equiv_address (XEXP (operands[6], 0),
+						       operands[5]));")
+
+;; op(s|u).S1 [rx=rx+i],ry (swapped, plus or bound)
+
+(define_split
+  [(parallel
+    [(set (match_operand 0 "register_operand" "")
+	  (match_operator
+	    6 "cris_plus_or_bound_operator"
+	    [(match_operator
+	      5 "cris_extend_operator"
+	     [(mem (plus:SI
+		    (match_operand:SI 2 "cris_bdap_operand" "")
+		    (match_operand:SI 3 "cris_bdap_operand" "")))])
+	     (match_operand 1 "register_operand" "")]))
+     (set (match_operand:SI 4 "register_operand" "")
+	   (plus:SI (match_dup 2)
+		    (match_dup 3)))])]
+  "(rtx_equal_p (operands[4], operands[2])
+    || rtx_equal_p (operands[4], operands[3]))"
+  [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))]
+{
+  operands[7] = gen_rtx_fmt_e (GET_CODE (operands[5]), GET_MODE (operands[5]),
+			       replace_equiv_address (XEXP (operands[5], 0),
+						      operands[4]));
+  cris_order_for_addsi3 (operands, 2);
+})
+
+;; Splits for addressing prefixes that have no side-effects, so we can
+;; fill a delay slot.  Never split if we lose something, though.
+
+;; If we have a
+;;  move [indirect_ref],rx
+;; where indirect ref = {const, [r+], [r]}, it costs as much as
+;;  move indirect_ref,rx
+;;  move [rx],rx
+;; Take care not to allow indirect_ref = register.
+
+;; We're not allowed to generate copies of registers with different mode
+;; until after reload; copying pseudos upsets reload.  CVS as of
+;; 2001-08-24, unwind-dw2-fde.c, _Unwind_Find_FDE ICE in
+;; cselib_invalidate_regno.
+
+(define_split ; indir_to_reg_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "indirect_operand" ""))]
+  "reload_completed
+   && REG_P (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (MEM_P (XEXP (operands[1], 0)) || CONSTANT_P (XEXP (operands[1], 0)))
+   && REGNO (operands[0]) < CRIS_LAST_GENERAL_REGISTER"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 0) (match_dup 3))]
+  "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0]));
+   operands[3] = replace_equiv_address (operands[1], operands[2]);
+   operands[4] = XEXP (operands[1], 0);")
+
+;; As the above, but MOVS and MOVU.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 4 "cris_extend_operator"
+	 [(match_operand 1 "indirect_operand" "")]))]
+  "reload_completed
+   && REG_P (operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (MEM_P (XEXP (operands[1], 0))
+       || CONSTANT_P (XEXP (operands[1], 0)))"
+  [(set (match_dup 2) (match_dup 5))
+   (set (match_dup 0) (match_op_dup 4 [(match_dup 3)]))]
+  "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0]));
+   operands[3] = replace_equiv_address (XEXP (operands[4], 0), operands[2]);
+   operands[5] = XEXP (operands[1], 0);")
+
+;; Various peephole optimizations.
+;;
+;; Watch out: when you exchange one set of instructions for another, the
+;; condition codes setting must be the same, or you have to CC_INIT or
+;; whatever is appropriate, in the pattern before you emit the
+;; assembly text.  This is best done here, not in cris_notice_update_cc,
+;; to keep changes local to their cause.
+;;
+;; Do not add patterns that you do not know will be matched.
+;; Please also add a self-contained testcase.
+
+;; We have trouble with and:s and shifts.  Maybe something is broken in
+;; gcc?  Or it could just be that bit-field insn expansion is a bit
+;; suboptimal when not having extzv insns.
+;; Testcase for the following four peepholes: gcc.dg/cris-peep2-xsrand.c
+
+(define_peephole2 ; asrandb (peephole casesi+31)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31
+   && INTVAL (operands[2]) < 255
+   && INTVAL (operands[1]) > 23
+   /* Check that the and-operation enables us to use logical-shift.  */
+   && (INTVAL (operands[2])
+	  & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (QImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+})
+
+(define_peephole2 ; asrandw (peephole casesi+32)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31
+   && INTVAL (operands[2]) < 65535
+   && INTVAL (operands[2]) != 255
+   && INTVAL (operands[1]) > 15
+   /* Check that the and-operation enables us to use logical-shift.  */
+   && (INTVAL (operands[2])
+       & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode));
+})
+
+(define_peephole2 ; lsrandb (peephole casesi+33)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31
+   && INTVAL (operands[2]) < 255
+   && INTVAL (operands[1]) > 23"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (QImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+})
+
+(define_peephole2 ; lsrandw (peephole casesi+34)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_dup 0)
+		     (match_operand:SI 1 "const_int_operand" "")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))]
+  "INTVAL (operands[2]) > 31 && INTVAL (operands[2]) < 65535
+   && INTVAL (operands[2]) != 255
+   && INTVAL (operands[1]) > 15"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))]
+  ;; FIXME: CC0 is valid except for the M bit.
+{
+  operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode));
+})
+
+
+;; Change
+;;  add.d n,rx
+;;  move [rx],ry
+;; into
+;;  move [rx=rx+n],ry
+;; when -128 <= n <= 127.
+;; This will reduce the size of the assembler code for n = [-128..127],
+;; and speed up accordingly.  Don't match if the previous insn is
+;; (set rx rz) because that combination is matched by another peephole.
+;; No stable test-case.
+
+(define_peephole2 ; moversideqi (peephole casesi+35)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand 3 "register_operand" "")
+	(match_operator 4 "cris_mem_op" [(match_dup 0)]))]
+  "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD
+   && REGNO (operands[3]) != REGNO (operands[0])
+   && (BASE_P (operands[1]) || BASE_P (operands[2]))
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+   && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128)
+   && TARGET_SIDE_EFFECT_PREFIXES"
+  [(parallel
+    [(set (match_dup 3) (match_dup 5))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])]
+  ;; Checking the previous insn is a bit too awkward for the condition.
+{
+  rtx prev = prev_nonnote_insn (curr_insn);
+  if (prev != NULL_RTX)
+    {
+      rtx set = single_set (prev);
+      if (set != NULL_RTX
+	  && REG_S_P (SET_DEST (set))
+	  && REGNO (SET_DEST (set)) == REGNO (operands[0])
+	  && REG_S_P (SET_SRC (set)))
+	FAIL;
+    }
+  operands[5]
+    = replace_equiv_address (operands[4],
+			     gen_rtx_PLUS (SImode,
+					   operands[1], operands[2]));
+})
+
+;; Vice versa: move ry,[rx=rx+n]
+
+(define_peephole2 ; movemsideqi (peephole casesi+36)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operator 3 "cris_mem_op" [(match_dup 0)])
+	(match_operand 4 "register_operand" ""))]
+  "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD
+   && REGNO (operands[4]) != REGNO (operands[0])
+   && (BASE_P (operands[1]) || BASE_P (operands[2]))
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+   && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128)
+   && TARGET_SIDE_EFFECT_PREFIXES"
+  [(parallel
+    [(set (match_dup 5) (match_dup 4))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])]
+  "operands[5]
+     = replace_equiv_address (operands[3],
+			      gen_rtx_PLUS (SImode,
+					    operands[1], operands[2]));")
+
+;; As above, change:
+;;  add.d n,rx
+;;  op.d [rx],ry
+;; into:
+;;  op.d [rx=rx+n],ry
+;; Saves when n = [-128..127].
+;;
+;; Splitting and joining combinations for side-effect modes are slightly
+;; out of hand.  They probably will not save the time they take typing in,
+;; not to mention the bugs that creep in.  FIXME: Get rid of as many of
+;; the splits and peepholes as possible.
+;; No stable test-case.
+
+(define_peephole2 ; mover2side (peephole casesi+37)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand 3 "register_operand" "")
+	  (match_operator 4 "cris_orthogonal_operator"
+			  [(match_dup 3)
+			   (match_operator
+			    5 "cris_mem_op" [(match_dup 0)])]))]
+  ;; FIXME: What about DFmode?
+  ;; Change to GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD?
+  "GET_MODE (operands[3]) != DImode
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'N')
+   && INTVAL (operands[2]) >= -128
+   && INTVAL (operands[2]) <= 127
+   && TARGET_SIDE_EFFECT_PREFIXES"
+  [(parallel
+    [(set (match_dup 3) (match_op_dup 4 [(match_dup 3) (match_dup 6)]))
+     (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])]
+  "operands[6]
+     = replace_equiv_address (operands[5],
+			      gen_rtx_PLUS (SImode,
+					    operands[1], operands[2]));")
+
+;; Sometimes, for some reason the pattern
+;;  move x,rx
+;;  add y,rx
+;;  move [rx],rz
+;; will occur.  Solve this, and likewise for to-memory.
+;; No stable test-case.
+
+(define_peephole2 ; moverside (peephole casesi+38)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "cris_bdap_biap_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "")))
+   (set (match_operand 4 "register_operand" "")
+	(match_operator 5 "cris_mem_op" [(match_dup 0)]))]
+  "(rtx_equal_p (operands[2], operands[0])
+    || rtx_equal_p (operands[3], operands[0]))
+   && cris_side_effect_mode_ok (PLUS, operands, 0,
+				(REG_S_P (operands[1])
+				 ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				(! REG_S_P (operands[1])
+				 ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				-1, 4)"
+  [(parallel
+    [(set (match_dup 4) (match_dup 6))
+     (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])]
+{
+  rtx otherop
+    = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2];
+
+  /* Make sure we have canonical RTX so we match the insn pattern -
+     not a constant in the first operand.  We also require the order
+     (plus reg mem) to match the final pattern.  */
+  if (CONSTANT_P (otherop) || MEM_P (otherop))
+    {
+      operands[7] = operands[1];
+      operands[8] = otherop;
+    }
+  else
+    {
+      operands[7] = otherop;
+      operands[8] = operands[1];
+    }
+  operands[6]
+    = replace_equiv_address (operands[5],
+			     gen_rtx_PLUS (SImode,
+					   operands[7], operands[8]));
+})
+
+;; As above but to memory.
+;; FIXME: Split movemside and moverside into variants and prune
+;; the ones that don't trig.
+;; No stable test-case.
+
+(define_peephole2 ; movemside (peephole casesi+39)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "cris_bdap_biap_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "")))
+   (set (match_operator 4 "cris_mem_op" [(match_dup 0)])
+	(match_operand 5 "register_operand" ""))]
+  "(rtx_equal_p (operands[2], operands[0])
+    || rtx_equal_p (operands[3], operands[0]))
+   && cris_side_effect_mode_ok (PLUS, operands, 0,
+				(REG_S_P (operands[1])
+				 ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				(! REG_S_P (operands[1])
+				   ? 1
+				 : (rtx_equal_p (operands[2], operands[0])
+				    ? 3 : 2)),
+				-1, 5)"
+  [(parallel
+    [(set (match_dup 6) (match_dup 5))
+     (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])]
+{
+  rtx otherop
+    = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2];
+
+  /* Make sure we have canonical RTX so we match the insn pattern -
+     not a constant in the first operand.  We also require the order
+     (plus reg mem) to match the final pattern.  */
+  if (CONSTANT_P (otherop) || MEM_P (otherop))
+    {
+      operands[7] = operands[1];
+      operands[8] = otherop;
+    }
+  else
+    {
+      operands[7] = otherop;
+      operands[8] = operands[1];
+    }
+  operands[6]
+    = replace_equiv_address (operands[4],
+			     gen_rtx_PLUS (SImode,
+					   operands[7], operands[8]));
+})
+
+;; Another spotted bad code:
+;;   move rx,ry
+;;   move [ry],ry
+;; No stable test-case.
+
+(define_peephole2 ; movei (peephole casesi+42)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_operand 2 "register_operand" "")
+	(match_operator 3 "cris_mem_op" [(match_dup 0)]))]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && (REGNO_REG_CLASS (REGNO (operands[0]))
+       == REGNO_REG_CLASS (REGNO (operands[1])))
+   && GET_MODE_SIZE (GET_MODE (operands[2])) <= UNITS_PER_WORD"
+  [(set (match_dup 2) (match_dup 4))]
+  "operands[4] = replace_equiv_address (operands[3], operands[1]);")
+
+;;   move.d [r10+16],r9
+;;   and.d r12,r9
+;; change to
+;;   and.d [r10+16],r12,r9
+;; With generalization of the operation, the size and the addressing mode.
+;;  This seems to be the result of a quirk in register allocation
+;; missing the three-operand cases when having different predicates.
+;; Maybe that it matters that it is a commutative operation.
+;;  This pattern helps that situation, but there's still the increased
+;; register pressure.
+;;  Note that adding the noncommutative variant did not show any matches
+;; in ipps and cc1, so it's not here.
+;; No stable test-case.
+
+(define_peephole2 ; op3 (peephole casesi+44)
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator
+	 6 "cris_mem_op"
+	 [(plus:SI
+	   (match_operand:SI 1 "cris_bdap_biap_operand" "")
+	   (match_operand:SI 2 "cris_bdap_biap_operand" ""))]))
+   (set (match_dup 0)
+	(match_operator
+	 5 "cris_commutative_orth_op"
+	 [(match_operand 3 "register_operand" "")
+	  (match_operand 4 "register_operand" "")]))]
+  "(rtx_equal_p (operands[3], operands[0])
+    || rtx_equal_p (operands[4], operands[0]))
+   && ! rtx_equal_p (operands[3], operands[4])
+   && (REG_S_P (operands[1]) || REG_S_P (operands[2]))
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD"
+  [(set (match_dup 0) (match_op_dup 5 [(match_dup 7) (match_dup 6)]))]
+  "operands[7]
+     = rtx_equal_p (operands[3], operands[0]) ? operands[4] : operands[3];")
+
+;;  I cannot tell GCC (2.1, 2.7.2) how to correctly reload an instruction
+;; that looks like
+;;   and.b some_byte,const,reg_32
+;; where reg_32 is the destination of the "three-address" code optimally.
+;; It should be:
+;;   movu.b some_byte,reg_32
+;;   and.b const,reg_32
+;; but it turns into:
+;;   move.b some_byte,reg_32
+;;   and.d const,reg_32
+;; Fix it here.
+;; Testcases: gcc.dg/cris-peep2-andu1.c gcc.dg/cris-peep2-andu2.c
+
+(define_peephole2 ; andu (casesi+45)
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "nonimmediate_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+   ;; Since the size of the memory access could be made different here,
+   ;; don't do this for a mem-volatile access.
+  "REGNO (operands[2]) == REGNO (operands[0])
+   && INTVAL (operands[3]) <= 65535 && INTVAL (operands[3]) >= 0
+   && !CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'I')
+   && !side_effects_p (operands[1])
+   && (!REG_P (operands[1])
+       || REGNO (operands[1]) <= CRIS_LAST_GENERAL_REGISTER)"
+  ;; FIXME: CC0 valid except for M (i.e. CC_NOT_NEGATIVE).
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+{
+  enum machine_mode zmode = INTVAL (operands[3]) <= 255 ? QImode : HImode;
+  enum machine_mode amode
+    = CRIS_CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'O') ? SImode : zmode;
+  rtx op1
+    = (REG_S_P (operands[1])
+       ? gen_rtx_REG (zmode, REGNO (operands[1]))
+       : adjust_address (operands[1], zmode, 0));
+  operands[4]
+    = gen_rtx_ZERO_EXTEND (SImode, op1);
+  operands[5] = gen_rtx_REG (amode, REGNO (operands[0]));
+  operands[6]
+    = gen_rtx_AND (amode, gen_rtx_REG (amode, REGNO (operands[0])),
+		   GEN_INT (trunc_int_for_mode (INTVAL (operands[3]),
+						amode == SImode
+						? QImode : amode)));
+})
+
+;; Try and avoid GOTPLT reads escaping a call: transform them into
+;; PLT.  Curiously (but thankfully), peepholes for instructions
+;; *without side-effects* that just feed a call (or call_value) are
+;; not matched neither in a build or test-suite, so those patterns are
+;; omitted.
+
+;; A "normal" move where we don't check the consumer.
+
+(define_peephole2 ; gotplt-to-plt
+  [(set
+    (match_operand:SI 0 "register_operand" "")
+    (match_operator:SI
+     1 "cris_mem_op"
+     [(plus:SI
+       (reg:SI CRIS_GOT_REGNUM)
+       (const:SI
+	(unspec:SI [(match_operand:SI 2 "cris_general_operand_or_symbol" "")]
+		   CRIS_UNSPEC_PLTGOTREAD)))]))]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)"
+  [(set (match_dup 0) (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))]
+  "")
+
+;; And one set with a side-effect getting the PLTGOT offset.
+;; First call and call_value variants.
+
+(define_peephole2 ; gotplt-to-plt-side-call
+  [(parallel
+    [(set
+      (match_operand:SI 0 "register_operand" "")
+      (match_operator:SI
+       1 "cris_mem_op"
+       [(plus:SI
+	 (reg:SI CRIS_GOT_REGNUM)
+	 (const:SI
+	  (unspec:SI [(match_operand:SI
+		       2 "cris_general_operand_or_symbol" "")]
+		     CRIS_UNSPEC_PLTGOTREAD)))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (reg:SI CRIS_GOT_REGNUM)
+		   (const:SI
+		    (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])
+  (parallel [(call (mem:QI (match_dup 0))
+		    (match_operand 4 "" ""))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(call (mem:QI (match_dup 1))
+		    (match_dup 4))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))
+	      (set (match_dup 3)
+		   (plus:SI (reg:SI CRIS_GOT_REGNUM)
+			    (const:SI
+			     (unspec:SI [(match_dup 2)]
+					CRIS_UNSPEC_PLTGOTREAD))))])]
+  "")
+
+(define_peephole2 ; gotplt-to-plt-side-call-value
+  [(parallel
+    [(set
+      (match_operand:SI 0 "register_operand" "")
+      (match_operator:SI
+       1 "cris_mem_op"
+       [(plus:SI
+	 (reg:SI CRIS_GOT_REGNUM)
+	 (const:SI
+	  (unspec:SI [(match_operand:SI
+		       2 "cris_general_operand_or_symbol" "")]
+		     CRIS_UNSPEC_PLTGOTREAD)))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (reg:SI CRIS_GOT_REGNUM)
+		   (const:SI
+		    (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])
+   (parallel [(set (match_operand 5 "" "")
+		   (call (mem:QI (match_dup 0))
+			 (match_operand 4 "" "")))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))])]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && peep2_reg_dead_p (2, operands[0])"
+  [(parallel [(set (match_dup 5)
+		   (call (mem:QI (match_dup 1))
+			 (match_dup 4)))
+	      (clobber (reg:SI CRIS_SRP_REGNUM))
+	      (set (match_dup 3)
+		   (plus:SI (reg:SI CRIS_GOT_REGNUM)
+			    (const:SI
+			     (unspec:SI [(match_dup 2)]
+					CRIS_UNSPEC_PLTGOTREAD))))])]
+  "")
+
+(define_peephole2 ; gotplt-to-plt-side
+  [(parallel
+    [(set
+      (match_operand:SI 0 "register_operand" "")
+      (match_operator:SI
+       1 "cris_mem_op"
+       [(plus:SI
+	 (reg:SI CRIS_GOT_REGNUM)
+	 (const:SI
+	  (unspec:SI [(match_operand:SI
+		       2 "cris_general_operand_or_symbol" "")]
+		     CRIS_UNSPEC_PLTGOTREAD)))]))
+     (set (match_operand:SI 3 "register_operand" "")
+	  (plus:SI (reg:SI CRIS_GOT_REGNUM)
+		   (const:SI
+		    (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])]
+  "flag_pic
+   && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true)
+   && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)"
+  [(set (match_dup 3)
+	(const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD)))
+   (set (match_dup 3) (plus:SI (match_dup 3) (reg:SI CRIS_GOT_REGNUM)))
+   (set (match_dup 0)
+	(const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))]
+  "")
+
+;; Local variables:
+;; mode:emacs-lisp
+;; comment-start: ";; "
+;; eval: (set-syntax-table (copy-sequence (syntax-table)))
+;; eval: (modify-syntax-entry ?[ "(]")
+;; eval: (modify-syntax-entry ?] ")[")
+;; eval: (modify-syntax-entry ?{ "(}")
+;; eval: (modify-syntax-entry ?} "){")
+;; eval: (setq indent-tabs-mode t)
+;; End:
diff --git a/gcc/config/cris/cris.opt b/gcc/config/cris/cris.opt
new file mode 100644
index 000000000..9caa48924
--- /dev/null
+++ b/gcc/config/cris/cris.opt
@@ -0,0 +1,190 @@
+; Options for the CRIS port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; TARGET_MUL_BUG: Whether or not to work around multiplication
+; instruction hardware bug when generating code for models where
+; it may be present.  From the trouble report for Etrax 100 LX:
+; "A multiply operation may cause incorrect cache behaviour
+; under some specific circumstances. The problem can occur if
+; the instruction following the multiply instruction causes a
+; cache miss, and multiply operand 1 (source operand) bits
+; [31:27] matches the logical mapping of the mode register
+; address (0xb0....), and bits [9:2] of operand 1 matches the
+; TLB register address (0x258-0x25f).  There is such a mapping
+; in kernel mode or when the MMU is off.  Normally there is no
+; such mapping in user mode, and the problem will therefore
+; probably not occur in Linux user mode programs."
+;
+; We have no sure-fire way to know from within GCC that we're
+; compiling a user program.  For example, -fpic/PIC is used in
+; libgcc which is linked into the kernel.  However, the
+; workaround option -mno-mul-bug can be safely used per-package
+; when compiling programs.  The same goes for general user-only
+; libraries such as glibc, since there's no user-space
+; driver-like program that gets a mapping of I/O registers (all
+; on the same page, including the TLB registers).
+mmul-bug-workaround
+Target Report Mask(MUL_BUG)
+Work around bug in multiplication instruction
+
+; TARGET_ETRAX4_ADD: Instruction-set additions from Etrax 4 and up.
+; (Just "lz".)
+metrax4
+Target Report Mask(ETRAX4_ADD)
+Compile for ETRAX 4 (CRIS v3)
+
+; See cris_handle_option.
+metrax100
+Target Report RejectNegative
+Compile for ETRAX 100 (CRIS v8)
+
+; See cris_handle_option.
+mno-etrax100
+Target Report RejectNegative Undocumented
+
+mpdebug
+Target Report Mask(PDEBUG)
+Emit verbose debug information in assembly code
+
+; TARGET_CCINIT: Whether to use condition-codes generated by
+; insns other than the immediately preceding compare/test insn.
+; Used to check for errors in notice_update_cc.
+mcc-init
+Target Report Mask(CCINIT)
+Do not use condition codes from normal instructions
+
+; TARGET_SIDE_EFFECT_PREFIXES: Whether to use side-effect
+; patterns.  Used to debug the [rx=ry+i] type patterns.
+mside-effects
+Target Report RejectNegative Mask(SIDE_EFFECT_PREFIXES) Undocumented
+
+mno-side-effects
+Target Report RejectNegative InverseMask(SIDE_EFFECT_PREFIXES)
+Do not emit addressing modes with side-effect assignment
+
+; TARGET_STACK_ALIGN: Whether to *keep* (not force) alignment of
+; stack at 16 (or 32, depending on TARGET_ALIGN_BY_32) bits.
+mstack-align
+Target Report RejectNegative Mask(STACK_ALIGN) Undocumented
+
+mno-stack-align
+Target Report RejectNegative InverseMask(STACK_ALIGN)
+Do not tune stack alignment
+
+; TARGET_DATA_ALIGN: Whether to do alignment on individual
+; modifiable objects.
+mdata-align
+Target Report RejectNegative Mask(DATA_ALIGN) Undocumented
+
+mno-data-align
+Target Report RejectNegative InverseMask(DATA_ALIGN)
+Do not tune writable data alignment
+
+; TARGET_CONST_ALIGN: Whether to do alignment on individual
+; non-modifiable objects.
+mconst-align
+Target Report RejectNegative Mask(CONST_ALIGN) Undocumented
+
+mno-const-align
+Target Report RejectNegative InverseMask(CONST_ALIGN)
+Do not tune code and read-only data alignment
+
+; See cris_handle_option.
+m32-bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m32bit
+Target Report RejectNegative
+Align code and data to 32 bits
+
+; See cris_handle_option.
+m16-bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m16bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m8-bit
+Target Report RejectNegative Undocumented
+
+; See cris_handle_option.
+m8bit
+Target Report RejectNegative
+Don't align items in code or data
+
+; TARGET_PROLOGUE_EPILOGUE: Whether or not to omit function
+; prologue and epilogue.
+mprologue-epilogue
+Target Report RejectNegative Mask(PROLOGUE_EPILOGUE) Undocumented
+
+mno-prologue-epilogue
+Target Report RejectNegative InverseMask(PROLOGUE_EPILOGUE)
+Do not emit function prologue or epilogue
+
+; We have to handle this m-option here since we can't wash it
+; off in both CC1_SPEC and CC1PLUS_SPEC.
+
+mbest-lib-options
+Target Report RejectNegative
+Use the most feature-enabling options allowed by other options
+
+; FIXME: The following comment relates to gcc before cris.opt.
+; Check it it's still valid:
+; We must call it "override-" since calling it "no-" will cause
+; gcc.c to forget it, if there's a "later" -mbest-lib-options.
+; Kludgy, but needed for some multilibbed files.
+moverride-best-lib-options
+Target Report RejectNegative
+Override -mbest-lib-options
+
+mcpu=
+Target Report RejectNegative Joined Undocumented Var(cris_cpu_str)
+
+march=
+Target Report RejectNegative Joined Var(cris_cpu_str)
+-march=ARCH	Generate code for the specified chip or CPU version
+
+mtune=
+Target Report RejectNegative Joined Var(cris_tune_str)
+-mtune=ARCH	Tune alignment for the specified chip or CPU version
+
+mmax-stackframe=
+Target Report RejectNegative Joined Var(cris_max_stackframe_str)
+-mmax-stackframe=SIZE	Warn when a stackframe is larger than the specified size
+
+max-stackframe=
+Target Report RejectNegative Joined Undocumented Var(cris_max_stackframe_str)
+
+; TARGET_SVINTO: Currently this just affects alignment.  FIXME:
+; Redundant with TARGET_ALIGN_BY_32, or put machine stuff here?
+; This and the others below could just as well be variables and
+; TARGET_* defines in cris.h.
+Mask(SVINTO)
+
+; TARGET_ALIGN_BY_32: Say that all alignment specifications say
+; to prefer 32 rather than 16 bits.
+Mask(ALIGN_BY_32)
+
+; TARGET_AVOID_GOTPLT is referred to in the .c and the .md so we
+; need to allocate the flag and macros here.
+Mask(AVOID_GOTPLT)
diff --git a/gcc/config/cris/cris_abi_symbol.c b/gcc/config/cris/cris_abi_symbol.c
new file mode 100644
index 000000000..db9db2cfe
--- /dev/null
+++ b/gcc/config/cris/cris_abi_symbol.c
@@ -0,0 +1,45 @@
+/* Define symbol to recognize CRIS ABI version 2, for a.out use.
+   Contributed by Axis Communications.
+   Written by Hans-Peter Nilsson <hp@axis.se>, c:a 1992.
+
+   Copyright (C) 2000, 2001, 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tm.h"
+
+#ifdef __AOUT__
+
+/* ELF support was not released before the ABI was changed, so we
+   restrict this awkwardness to a.out.  This symbol is for gdb to
+   recognize, so it can debug both old and new programs successfully.  */
+__asm__ (".global " CRIS_ABI_VERSION_SYMBOL_STRING);
+__asm__ (".set " CRIS_ABI_VERSION_SYMBOL_STRING ",0");
+
+#else  /* not __AOUT__ */
+
+/* The file must not be empty (declaration/definition-wise) according to
+   ISO, IIRC. */
+extern int _Dummy;
+
+#endif /* not __AOUT__ */
diff --git a/gcc/config/cris/elf.opt b/gcc/config/cris/elf.opt
new file mode 100644
index 000000000..00ced56b0
--- /dev/null
+++ b/gcc/config/cris/elf.opt
@@ -0,0 +1,25 @@
+; ELF-specific options for the CRIS port of the compiler.
+
+; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+melf
+Target Report RejectNegative Undocumented
+
+sim
+Driver JoinedOrMissing
diff --git a/gcc/config/cris/libgcc.ver b/gcc/config/cris/libgcc.ver
new file mode 100644
index 000000000..e35de8310
--- /dev/null
+++ b/gcc/config/cris/libgcc.ver
@@ -0,0 +1,7 @@
+GCC_4.3 {
+ __Mul
+ __Div
+ __Udiv
+ __Mod
+ __Umod
+}
diff --git a/gcc/config/cris/linux.h b/gcc/config/cris/linux.h
new file mode 100644
index 000000000..bf2f5f9c9
--- /dev/null
+++ b/gcc/config/cris/linux.h
@@ -0,0 +1,151 @@
+/* Definitions for GCC.  Part of the machine description for CRIS.
+   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* After the first "Node:" comment comes all preprocessor directives and
+   attached declarations described in the info files, the "Using and
+   Porting GCC" manual (uapgcc), in the same order as found in the "Target
+   macros" section in the gcc-2.9x CVS edition of 2000-03-17.  FIXME: Not
+   really, but needs an update anyway.
+
+   There is no generic copy-of-uapgcc comment, you'll have to see uapgcc
+   for that.  If applicable, there is a CRIS-specific comment.  The order
+   of macro definitions follow the order in the manual.  Every section in
+   the manual (node in the info pages) has an introductory `Node:
+   <subchapter>' comment.  If no macros are defined for a section, only
+   the section-comment is present.  */
+
+/* This file defines the macros for cris-axis-linux-gnu that are not
+   covered by cris.h, elfos.h and (config/)linux.h.  */
+
+/* Make sure we have a valid TARGET_CPU_DEFAULT, so we can assume it
+   and take shortcuts below.  */
+#ifndef TARGET_CPU_DEFAULT
+#error "TARGET_CPU_DEFAULT not defined"
+#elif (TARGET_CPU_DEFAULT+0) != 10 && (TARGET_CPU_DEFAULT+0) != 32
+#error "TARGET_CPU_DEFAULT must be 10 or 32, or this file be updated"
+#endif
+
+/* Node: Instruction Output */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* Node: Driver */
+/* These macros are CRIS-specific, but used in target driver macros.  */
+
+#undef CRIS_CPP_SUBTARGET_SPEC
+#if TARGET_CPU_DEFAULT == 32
+# define CRIS_CPP_SUBTARGET_SPEC \
+  "%{pthread:-D_REENTRANT}\
+   %{!march=*:%{!mcpu=*:-D__arch_v32 -D__CRIS_arch_version=32}}"
+#else
+# define CRIS_CPP_SUBTARGET_SPEC \
+  "%{pthread:-D_REENTRANT}\
+   %{!march=*:%{!mcpu=*:-D__arch_v10 -D__CRIS_arch_version=10}}"
+#endif
+
+#undef CRIS_CC1_SUBTARGET_SPEC
+#if TARGET_CPU_DEFAULT == 32
+# define CRIS_CC1_SUBTARGET_SPEC \
+ "%{!march=*:%{!mcpu=*:-march=v32}}"
+#define CRIS_SUBTARGET_DEFAULT_ARCH MASK_AVOID_GOTPLT
+#else
+# define CRIS_CC1_SUBTARGET_SPEC \
+ "%{!march=*:%{!mcpu=*:-march=v10}}"
+#define CRIS_SUBTARGET_DEFAULT_ARCH 0
+#endif
+
+#undef CRIS_ASM_SUBTARGET_SPEC
+#if TARGET_CPU_DEFAULT == 32
+# define CRIS_ASM_SUBTARGET_SPEC \
+ "--em=criself \
+  %{!march=*:%{!mcpu=*:--march=v32}} \
+  %{!fleading-underscore:--no-underscore}\
+  %{fPIC|fpic|fPIE|fpie: --pic}"
+#else
+# define CRIS_ASM_SUBTARGET_SPEC \
+ "--em=criself \
+  %{!march=*:%{!mcpu=*:--march=v10}} \
+  %{!fleading-underscore:--no-underscore}\
+  %{fPIC|fpic|fPIE|fpie: --pic}"
+#endif
+
+/* Previously controlled by target_flags.  */
+#undef TARGET_LINUX
+#define TARGET_LINUX 1
+
+#undef CRIS_SUBTARGET_DEFAULT
+#define CRIS_SUBTARGET_DEFAULT			\
+  (MASK_SVINTO					\
+   + MASK_ETRAX4_ADD				\
+   + MASK_ALIGN_BY_32				\
+   + CRIS_SUBTARGET_DEFAULT_ARCH)
+
+#undef CRIS_DEFAULT_CPU_VERSION
+#define CRIS_DEFAULT_CPU_VERSION CRIS_CPU_NG
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef CRIS_LINK_SUBTARGET_SPEC
+#define CRIS_LINK_SUBTARGET_SPEC \
+ "-mcrislinux\
+  %{shared} %{static}\
+  %{symbolic:-Bdynamic} %{static:-Bstatic}\
+  %{!shared:%{!static:\
+              %{rdynamic:-export-dynamic}\
+              -dynamic-linker " LINUX_DYNAMIC_LINKER "}}\
+  %{!r:%{O2|O3: --gc-sections}}"
+
+
+/* Node: Run-time Target */
+
+/* For the cris-*-linux* subtarget.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      LINUX_TARGET_OS_CPP_BUILTINS();		\
+      if (flag_leading_underscore <= 0)		\
+	builtin_define ("__NO_UNDERSCORES__");	\
+    }						\
+  while (0)
+
+/* Node: Type Layout */
+     
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Node: Sections */
+
+/* GNU/Linux has crti and crtn and does not need the
+   CRT_CALL_STATIC_FUNCTION trick in cris.h.  */
+#undef CRT_CALL_STATIC_FUNCTION
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/cris/linux.opt b/gcc/config/cris/linux.opt
new file mode 100644
index 000000000..a57c48d7c
--- /dev/null
+++ b/gcc/config/cris/linux.opt
@@ -0,0 +1,33 @@
+; GNU/Linux-specific options for the CRIS port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Provide a legacy -mlinux option.
+mlinux
+Target Report RejectNegative Undocumented
+
+mno-gotplt
+Target Report RejectNegative Mask(AVOID_GOTPLT) MaskExists
+Together with -fpic and -fPIC, do not use GOTPLT references
+
+; There's a small added setup cost with using GOTPLT references
+; for the first (resolving) call, but should in total be a win
+; both in code-size and execution-time.
+mgotplt
+Target Report RejectNegative InverseMask(AVOID_GOTPLT) Undocumented
diff --git a/gcc/config/cris/mulsi3.asm b/gcc/config/cris/mulsi3.asm
new file mode 100644
index 000000000..76dfb6346
--- /dev/null
+++ b/gcc/config/cris/mulsi3.asm
@@ -0,0 +1,255 @@
+;; Copyright (C) 2001, 2004 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; Under Section 7 of GPL version 3, you are granted additional
+;; permissions described in the GCC Runtime Library Exception, version
+;; 3.1, as published by the Free Software Foundation.
+;;
+;; You should have received a copy of the GNU General Public License and
+;; a copy of the GCC Runtime Library Exception along with this program;
+;; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; This code used to be expanded through interesting expansions in
+;; the machine description, compiled from this code:
+;;
+;; #ifdef L_mulsi3
+;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__));
+;; 
+;; /* This must be compiled with the -mexpand-mul flag, to synthesize the
+;;    multiplication from the mstep instructions.  The check for
+;;    smaller-size multiplication pays off in the order of .5-10%;
+;;    estimated median 1%, depending on application.
+;;     FIXME: It can be further optimized if we go to assembler code, as
+;;    gcc 2.7.2 adds a few unnecessary instructions and does not put the
+;;    basic blocks in optimal order.  */
+;; long
+;; __Mul (unsigned long a, unsigned long b)
+;; {
+;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
+;;   /* In case other code is compiled without -march=v10, they will
+;; 	contain calls to __Mul, regardless of flags at link-time.  The
+;; 	"else"-code below will work, but is unnecessarily slow.  This
+;; 	sometimes cuts a few minutes off from simulation time by just
+;; 	returning a "mulu.d".  */
+;;   return a * b;
+;; #else
+;;   unsigned long min;
+;; 
+;;   /* Get minimum via the bound insn.  */
+;;   min = a < b ? a : b;
+;; 
+;;   /* Can we omit computation of the high part?	*/
+;;   if (min > 65535)
+;;     /* No.  Perform full multiplication.  */
+;;     return a * b;
+;;   else
+;;     {
+;; 	 /* Check if both operands are within 16 bits.  */
+;; 	 unsigned long max;
+;; 
+;; 	 /* Get maximum, by knowing the minimum.
+;; 	    This will partition a and b into max and min.
+;; 	    This is not currently something GCC understands,
+;; 	    so do this trick by asm.  */
+;; 	 __asm__ ("xor %1,%0\n\txor %2,%0"
+;; 		  : "=r" (max)
+;; 		  :  "r" (b), "r" (a), "0" (min));
+;; 
+;;     if (max > 65535)
+;; 	 /* Make GCC understand that only the low part of "min" will be
+;; 	    used.  */
+;; 	 return max * (unsigned short) min;
+;;     else
+;; 	 /* Only the low parts of both operands are necessary.  */
+;; 	 return ((unsigned short) max) * (unsigned short) min;
+;;     }
+;; #endif /* not __CRIS_arch_version >= 10 */
+;; }
+;; #endif /* L_mulsi3 */
+;;
+;; That approach was abandoned since the caveats outweighted the
+;; benefits.  The expand-multiplication machinery is also removed, so you
+;; can't do this anymore.
+;;
+;; For doubters of there being any benefits, some where: insensitivity to:
+;; - ABI changes (mostly for experimentation)
+;; - assembler syntax differences (mostly debug format).
+;; - insn scheduling issues.
+;; Most ABI experiments will presumably happen with arches with mul insns,
+;; so that argument doesn't really hold anymore, and it's unlikely there
+;; being new arch variants needing insn scheduling and not having mul
+;; insns.
+
+;; ELF and a.out have different syntax for local labels: the "wrong"
+;; one may not be omitted from the object.
+#undef L
+#ifdef __AOUT__
+# define L(x) x
+#else
+# define L(x) .x
+#endif
+
+	.global ___Mul
+	.type	___Mul,@function
+___Mul:
+#if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
+;; Can't have the mulu.d last on a cache-line (in the delay-slot of the
+;; "ret"), due to hardware bug.  See documentation for -mmul-bug-workaround.
+;; Not worthwhile to conditionalize here.
+	.p2alignw 2,0x050f
+	mulu.d $r11,$r10
+	ret
+	nop
+#else
+	move.d $r10,$r12
+	move.d $r11,$r9
+	bound.d $r12,$r9
+	cmpu.w 65535,$r9
+	bls L(L3)
+	move.d $r12,$r13
+
+	movu.w $r11,$r9
+	lslq 16,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	mstep $r9,$r13
+	clear.w $r10
+	test.d $r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	movu.w $r12,$r12
+	move.d $r11,$r9
+	clear.w $r9
+	test.d $r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	mstep $r12,$r9
+	add.w $r9,$r10
+	lslq 16,$r10
+	ret
+	add.d $r13,$r10
+
+L(L3):
+	move.d $r9,$r10
+	xor $r11,$r10
+	xor $r12,$r10
+	cmpu.w 65535,$r10
+	bls L(L5)
+	movu.w $r9,$r13
+
+	movu.w $r13,$r13
+	move.d $r10,$r9
+	lslq 16,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	mstep $r13,$r9
+	clear.w $r10
+	test.d $r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	lslq 16,$r10
+	ret
+	add.d $r9,$r10
+
+L(L5):
+	movu.w $r9,$r9
+	lslq 16,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	mstep $r9,$r10
+	ret
+	mstep $r9,$r10
+#endif
+L(Lfe1):
+	.size	___Mul,L(Lfe1)-___Mul
diff --git a/gcc/config/cris/predicates.md b/gcc/config/cris/predicates.md
new file mode 100644
index 000000000..edd16bbdd
--- /dev/null
+++ b/gcc/config/cris/predicates.md
@@ -0,0 +1,174 @@
+;; Operand and operator predicates for the GCC CRIS port.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Operator predicates.
+
+(define_predicate "cris_orthogonal_operator"
+  (match_code "plus, minus, ior, and, umin"))
+
+(define_predicate "cris_commutative_orth_op"
+  (match_code "plus, ior, and, umin"))
+
+;; By the name, you might think we should include MULT.  We don't because
+;; it doesn't accept the same addressing modes as the others (only
+;; registers) and there's also the problem of handling TARGET_MUL_BUG.
+
+(define_predicate "cris_operand_extend_operator"
+  (match_code "plus, minus, umin"))
+
+(define_predicate "cris_additive_operand_extend_operator"
+  (match_code "plus, minus"))
+
+(define_predicate "cris_extend_operator"
+  (match_code "zero_extend, sign_extend"))
+
+(define_predicate "cris_plus_or_bound_operator"
+  (match_code "plus, umin"))
+
+;; Used as an operator to get a handle on a already-known-valid MEM rtx:es
+;; (no need to validate the address), where some address expression parts
+;; have their own match_operand.
+
+(define_predicate "cris_mem_op"
+  (match_code "mem"))
+
+(define_predicate "cris_load_multiple_op"
+  (and (match_code "parallel")
+       (match_test "cris_movem_load_rest_p (op, 0)")))
+
+(define_predicate "cris_store_multiple_op"
+  (and (match_code "parallel")
+       (match_test "cris_store_multiple_op_p (op)")))
+
+
+;; Operand helper predicates.
+
+(define_predicate "cris_bdap_const_operand"
+  (and (match_code "label_ref, symbol_ref, const_int, const_double, const")
+       (ior (not (match_test "flag_pic"))
+	    (match_test "cris_valid_pic_const (op, true)"))))
+
+(define_predicate "cris_simple_address_operand"
+  (ior (match_operand:SI 0 "register_operand")
+       (and (match_code "post_inc")
+	    (match_test "register_operand (XEXP (op, 0), Pmode)"))))
+
+(define_predicate "cris_simple_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "mem")
+	    (match_test "cris_simple_address_operand (XEXP (op, 0),
+						      Pmode)"))))
+
+;; The caller needs to use :SI.
+(define_predicate "cris_bdap_sign_extend_operand"
+; Disabled until <URL:http://gcc.gnu.org/ml/gcc-patches/2005-10/msg01376.html>
+; or <URL:http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00940.html> is committed.
+  (match_test "0"))
+;  (and (match_code "sign_extend")
+;       (and (match_test "MEM_P (XEXP (op, 0))")
+;	    (match_test "cris_simple_address_operand (XEXP (XEXP (op, 0), 0),
+;						      Pmode)"))))
+
+;; FIXME: Should not have to test for 1.
+(define_predicate "cris_scale_int_operand"
+  (and (match_code "const_int")
+       (ior (ior (match_test "op == GEN_INT (4)")
+		 (match_test "op == const2_rtx"))
+	    (match_test "op == const1_rtx"))))
+
+;; FIXME: Should be able to assume (reg int).
+(define_predicate "cris_biap_mult_operand"
+  (and (match_code "mult")
+       (ior (and (match_test "register_operand (XEXP (op, 0), Pmode)")
+		 (match_test "cris_scale_int_operand (XEXP (op, 1), Pmode)"))
+	    (and (match_test "cris_scale_int_operand (XEXP (op, 0), Pmode)")
+		 (match_test "register_operand (XEXP (op, 1), Pmode)")))))
+
+
+;; Operand predicates.
+
+;; This checks a part of an address, the one that is not a plain register
+;; for an addressing mode using BDAP.
+;; Allowed operands are either:
+;; a) a register
+;; b) a CONST operand (but not a symbol when generating PIC)
+;; c) a [r] or [r+] in SImode, or sign-extend from HI or QI.
+
+(define_predicate "cris_bdap_operand"
+  (ior (match_operand 0 "cris_bdap_const_operand")
+       (ior (match_operand:SI 0 "cris_simple_operand")
+	    (match_operand:SI 0 "cris_bdap_sign_extend_operand"))))
+
+;; This is similar to cris_bdap_operand:
+;; It checks a part of an address, the one that is not a plain register
+;; for an addressing mode using BDAP or BIAP.
+;; Allowed operands are either:
+;; a) a register
+;; b) a CONST operand (but not a symbol when generating PIC)
+;; c) a mult of (1, 2 or 4) and a register
+;; d) a [r] or [r+] in SImode, or sign-extend from HI or QI.  */
+
+(define_predicate "cris_bdap_biap_operand"
+  (ior (match_operand 0 "cris_bdap_operand")
+       (match_operand 0 "cris_biap_mult_operand")))
+
+;; Since with -fPIC, not all symbols are valid PIC symbols or indeed
+;; general_operands, we have to have a predicate that matches it for the
+;; "movsi" expander.
+;; FIXME: Can s/special_// when PR 20413 is fixed.
+
+(define_special_predicate "cris_general_operand_or_symbol"
+  (ior (match_operand 0 "general_operand")
+       (and (match_code "const, symbol_ref, label_ref")
+       	    ; The following test is actually just an assertion.
+	    (match_test "cris_pic_symbol_type_of (op) != cris_no_symbol"))))
+
+;; A predicate for the anon movsi expansion, one that fits a PCREL
+;; operand as well as general_operand.
+
+(define_special_predicate "cris_general_operand_or_pic_source"
+  (ior (match_operand 0 "general_operand")
+       (and (match_test "flag_pic")
+	    (match_test "cris_valid_pic_const (op, false)"))))
+
+;; Since a PLT symbol is not a general_operand, we have to have a
+;; predicate that matches it when we need it.  We use this in the expanded
+;; "call" and "call_value" anonymous patterns.
+
+(define_predicate "cris_nonmemory_operand_or_callable_symbol"
+  (ior (match_operand 0 "nonmemory_operand")
+       (and (match_code "const")
+	    (and
+	     (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+	     (ior
+	      (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PLT_PCREL")
+	      (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PCREL"))))))
+
+;; This matches a (MEM (general_operand)) or
+;; (MEM (cris_general_operand_or_symbol)).  The second one isn't a valid
+;; memory_operand, so we need this predicate to recognize call
+;; destinations before we change them to a PLT operand (by wrapping in
+;; UNSPEC CRIS_UNSPEC_PLT).
+
+(define_predicate "cris_mem_call_operand"
+  (and (match_code "mem")
+       (ior (match_operand 0 "memory_operand")
+	    (match_test "cris_general_operand_or_symbol (XEXP (op, 0),
+							 Pmode)"))))
diff --git a/gcc/config/cris/t-cris b/gcc/config/cris/t-cris
new file mode 100644
index 000000000..1630acbcc
--- /dev/null
+++ b/gcc/config/cris/t-cris
@@ -0,0 +1,58 @@
+#
+# t-cris
+#
+# The Makefile fragment to include when compiling gcc et al for CRIS.
+#
+# Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+# The makefile macros etc. are included in the order found in the
+# section "Target Fragment" in the gcc info-files (or the paper copy) of
+# "Using and Porting GCC"
+
+LIB2FUNCS_EXTRA = _udivsi3.c _divsi3.c _umodsi3.c _modsi3.c
+CRIS_LIB1CSRC = $(srcdir)/config/cris/arit.c
+
+FPBIT = tmplibgcc_fp_bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' > dp-bit.c
+	cat $(srcdir)/config/fp-bit.c           >> dp-bit.c
+
+# Use another name to avoid confusing SUN make, if support for
+# it is reinstated elsewhere.  Prefixed with "tmplibgcc" means
+# "make clean" will wipe it.  We define a few L_ thingies
+# because we can't select them individually through FPBIT_FUNCS;
+# see above.
+tmplibgcc_fp_bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >  $@
+	echo '#define FLOAT'			>> $@
+	cat $(srcdir)/config/fp-bit.c		>> $@
+
+# The fixed-point arithmetic code is in one file, arit.c,
+# similar to libgcc2.c (or the old libgcc1.c).  We need to
+# "split it up" with one file per define.
+$(LIB2FUNCS_EXTRA): $(CRIS_LIB1CSRC)
+	name=`echo $@ | sed -e 's,.*/,,' | sed -e 's,.c$$,,'`; \
+	echo "#define L$$name" > tmp-$@ \
+	&& echo '#include "$<"' >> tmp-$@ \
+	&& mv -f tmp-$@ $@
+
+$(out_object_file): gt-cris.h
+gt-cris.h : s-gtype ; @true
diff --git a/gcc/config/cris/t-elfmulti b/gcc/config/cris/t-elfmulti
new file mode 100644
index 000000000..8d4dfea4e
--- /dev/null
+++ b/gcc/config/cris/t-elfmulti
@@ -0,0 +1,34 @@
+# Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/cris/mulsi3.asm
+MULTILIB_OPTIONS = march=v10/march=v32
+MULTILIB_DIRNAMES = v10 v32
+MULTILIB_MATCHES = \
+		march?v10=mcpu?etrax100lx \
+		march?v10=mcpu?ng \
+		march?v10=march?etrax100lx \
+		march?v10=march?ng \
+		march?v10=march?v11 \
+		march?v10=mcpu?v11 \
+		march?v10=mcpu?v10 \
+		march?v32=mcpu?v32
+MULTILIB_EXTRA_OPTS = mbest-lib-options
+INSTALL_LIBGCC = install-multilib
+LIBGCC = stmp-multilib
+CRTSTUFF_T_CFLAGS = $(LIBGCC2_CFLAGS) -moverride-best-lib-options
diff --git a/gcc/config/cris/t-linux b/gcc/config/cris/t-linux
new file mode 100644
index 000000000..96e861a42
--- /dev/null
+++ b/gcc/config/cris/t-linux
@@ -0,0 +1,9 @@
+TARGET_LIBGCC2_CFLAGS += -fPIC
+CRTSTUFF_T_CFLAGS_S = $(TARGET_LIBGCC2_CFLAGS)
+SHLIB_MAPFILES += $(srcdir)/config/cris/libgcc.ver
+
+# We *know* we have a limits.h in the glibc library, with extra
+# definitions needed for e.g. libgfortran.
+ifneq ($(inhibit_libc),true)
+LIMITS_H_TEST = :
+endif
diff --git a/gcc/config/crx/crx-protos.h b/gcc/config/crx/crx-protos.h
new file mode 100644
index 000000000..aeb4bdd59
--- /dev/null
+++ b/gcc/config/crx/crx-protos.h
@@ -0,0 +1,79 @@
+/* Prototypes for exported functions defined in crx.c
+   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+   2002, 2003, 2004, 2007, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_CRX_PROTOS_H
+#define GCC_CRX_PROTOS_H
+
+
+/* Register usage. */
+extern enum reg_class crx_regno_reg_class (int);
+extern int crx_hard_regno_mode_ok (int regno, enum machine_mode);
+#ifdef RTX_CODE
+extern enum reg_class crx_secondary_reload_class (enum reg_class, enum machine_mode, rtx);
+#endif /* RTX_CODE */
+
+/* Passing function arguments.  */
+extern int crx_function_arg_regno_p (int);
+#ifdef TREE_CODE
+#ifdef RTX_CODE
+extern void crx_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
+#endif /* RTX_CODE */
+#endif /* TREE_CODE */
+
+#ifdef RTX_CODE
+/* Addressing Modes.  */
+struct crx_address
+{
+  rtx base, index, disp, side_effect;
+  int scale;
+};
+
+enum crx_addrtype
+{
+  CRX_INVALID, CRX_REG_REL, CRX_POST_INC, CRX_SCALED_INDX, CRX_ABSOLUTE
+};
+
+extern enum crx_addrtype crx_decompose_address (rtx addr, struct crx_address *out);
+
+extern int crx_const_double_ok (rtx op);
+
+/* Instruction output.  */
+extern void crx_print_operand (FILE *, rtx, int);
+extern void crx_print_operand_address (FILE *, rtx);
+
+/* Misc functions called from crx.md.  */
+extern void crx_expand_movmem_single (rtx, rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT *);
+extern int crx_expand_movmem (rtx, rtx, rtx, rtx);
+#endif /* RTX_CODE */
+
+/* Routines to compute costs.  */
+extern int crx_memory_move_cost (enum machine_mode, enum reg_class, int);
+
+/* Prologue/Epilogue functions.  */
+extern int crx_initial_elimination_offset (int, int);
+extern char *crx_prepare_push_pop_string (int);
+extern void crx_expand_prologue (void);
+extern void crx_expand_epilogue (void);
+
+
+/* Handling the "interrupt" attribute */
+extern int crx_interrupt_function_p (void);
+
+#endif /* GCC_CRX_PROTOS_H */
diff --git a/gcc/config/crx/crx.c b/gcc/config/crx/crx.c
new file mode 100644
index 000000000..79d341c47
--- /dev/null
+++ b/gcc/config/crx/crx.c
@@ -0,0 +1,1466 @@
+/* Output routines for GCC for CRX.
+   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/*****************************************************************************/
+/* HEADER INCLUDES							     */
+/*****************************************************************************/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "df.h"
+#include "target.h"
+#include "target-def.h"
+
+/*****************************************************************************/
+/* DEFINITIONS								     */
+/*****************************************************************************/
+
+/* Maximum number of register used for passing parameters.  */
+#define MAX_REG_FOR_PASSING_ARGS 6
+
+/* Minimum number register used for passing parameters.  */
+#define MIN_REG_FOR_PASSING_ARGS 2
+
+/* The maximum count of words supported in the assembly of the architecture in
+ * a push/pop instruction.  */
+#define MAX_COUNT		8
+
+/* Predicate is true if the current function is a 'noreturn' function, i.e. it
+ * is qualified as volatile.  */
+#define FUNC_IS_NORETURN_P(decl) (TREE_THIS_VOLATILE (decl))
+
+/* The following macros are used in crx_decompose_address () */
+
+/* Returns the factor of a scaled index address or -1 if invalid. */
+#define SCALE_FOR_INDEX_P(X)	\
+ (GET_CODE (X) == CONST_INT ?	\
+  (INTVAL (X) == 1 ? 1 :	\
+   INTVAL (X) == 2 ? 2 :	\
+   INTVAL (X) == 4 ? 4 :	\
+   INTVAL (X) == 8 ? 8 :	\
+   -1) :			\
+  -1)
+
+/* Nonzero if the rtx X is a signed const int of n bits */
+#define RTX_SIGNED_INT_FITS_N_BITS(X,n)			\
+ ((GET_CODE (X) == CONST_INT				\
+   && SIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0)
+
+/* Nonzero if the rtx X is an unsigned const int of n bits.  */
+#define RTX_UNSIGNED_INT_FITS_N_BITS(X, n)		\
+ ((GET_CODE (X) == CONST_INT				\
+   && UNSIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0)
+
+/*****************************************************************************/
+/* STATIC VARIABLES							     */
+/*****************************************************************************/
+
+/* Nonzero if the last param processed is passed in a register.  */
+static int last_parm_in_reg;
+
+/* Will hold the number of the last register the prologue saves, -1 if no
+ * register is saved. */
+static int last_reg_to_save;
+
+/* Each object in the array is a register number. Mark 1 for registers that
+ * need to be saved.  */
+static int save_regs[FIRST_PSEUDO_REGISTER];
+
+/* Number of bytes saved on the stack for non-scratch registers */
+static int sum_regs = 0;
+
+/* Number of bytes saved on the stack for local variables. */
+static int local_vars_size;
+
+/* The sum of 2 sizes: locals vars and padding byte for saving the registers.
+ * Used in expand_prologue () and expand_epilogue ().  */
+static int size_for_adjusting_sp;
+
+/* In case of a POST_INC or POST_DEC memory reference, we must report the mode
+ * of the memory reference from PRINT_OPERAND to PRINT_OPERAND_ADDRESS. */
+static enum machine_mode output_memory_reference_mode;
+
+/*****************************************************************************/
+/* TARGETM FUNCTION PROTOTYPES						     */
+/*****************************************************************************/
+
+static bool crx_fixed_condition_code_regs (unsigned int *, unsigned int *);
+static rtx crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+				 int incoming ATTRIBUTE_UNUSED);
+static bool crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED);
+static int crx_address_cost (rtx, bool);
+static bool crx_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool crx_can_eliminate (const int, const int);
+static rtx crx_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static void crx_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+
+/*****************************************************************************/
+/* RTL VALIDITY								     */
+/*****************************************************************************/
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	crx_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE		crx_can_eliminate
+
+/*****************************************************************************/
+/* STACK LAYOUT AND CALLING CONVENTIONS					     */
+/*****************************************************************************/
+
+#undef	TARGET_FIXED_CONDITION_CODE_REGS
+#define	TARGET_FIXED_CONDITION_CODE_REGS crx_fixed_condition_code_regs
+
+#undef	TARGET_STRUCT_VALUE_RTX
+#define	TARGET_STRUCT_VALUE_RTX		crx_struct_value_rtx
+
+#undef	TARGET_RETURN_IN_MEMORY
+#define	TARGET_RETURN_IN_MEMORY		crx_return_in_memory
+
+/*****************************************************************************/
+/* PASSING FUNCTION ARGUMENTS						     */
+/*****************************************************************************/
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		crx_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	crx_function_arg_advance
+
+/*****************************************************************************/
+/* RELATIVE COSTS OF OPERATIONS						     */
+/*****************************************************************************/
+
+#undef	TARGET_ADDRESS_COST
+#define	TARGET_ADDRESS_COST		crx_address_cost
+
+/*****************************************************************************/
+/* TARGET-SPECIFIC USES OF `__attribute__'				     */
+/*****************************************************************************/
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		crx_attribute_table
+
+static const struct attribute_spec crx_attribute_table[] = {
+  /* ISRs have special prologue and epilogue requirements. */
+  {"interrupt", 0, 0, false, true, true, NULL},
+  {NULL, 0, 0, false, false, false, NULL}
+};
+
+/* Option handling.  */
+
+#undef	TARGET_OPTION_OPTIMIZATION_TABLE
+#define	TARGET_OPTION_OPTIMIZATION_TABLE	crx_option_optimization_table
+
+static const struct default_options crx_option_optimization_table[] =
+  {
+    /* Put each function in its own section so that PAGE-instruction
+       relaxation can do its best.  */
+    { OPT_LEVELS_1_PLUS, OPT_ffunction_sections, NULL, 1 },
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize 'targetm' variable which contains pointers to functions and data
+ * relating to the target machine.  */
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/*****************************************************************************/
+/* TARGET HOOK IMPLEMENTATIONS						     */
+/*****************************************************************************/
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+crx_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+    *p1 = CC_REGNUM;
+    *p2 = INVALID_REGNUM;
+    return true;
+}
+
+/* Implements hook TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+crx_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, CRX_STRUCT_VALUE_REGNUM);
+}
+
+/* Implements hook TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+crx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (TYPE_MODE (type) == BLKmode)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      return (size == -1 || size > 8);
+    }
+  else
+    return false;
+}
+
+
+/*****************************************************************************/
+/* MACRO IMPLEMENTATIONS						     */
+/*****************************************************************************/
+
+/* STACK LAYOUT AND CALLING CONVENTIONS ROUTINES */
+/* --------------------------------------------- */
+
+/* Return nonzero if the current function being compiled is an interrupt
+ * function as specified by the "interrupt" attribute.  */
+
+int
+crx_interrupt_function_p (void)
+{
+  tree attributes;
+
+  attributes = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  return lookup_attribute ("interrupt", attributes) != NULL_TREE;
+}
+
+/* Compute values for the array save_regs and the variable sum_regs.  The index
+ * of save_regs is numbers of register, each will get 1 if we need to save it
+ * in the current function, 0 if not. sum_regs is the total sum of the
+ * registers being saved. */
+
+static void
+crx_compute_save_regs (void)
+{
+  unsigned int regno;
+
+  /* initialize here so in case the function is no-return it will be -1. */
+  last_reg_to_save = -1;
+
+  /* No need to save any registers if the function never returns.  */
+  if (FUNC_IS_NORETURN_P (current_function_decl))
+    return;
+
+  /* Initialize the number of bytes to be saved. */
+  sum_regs = 0;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      if (fixed_regs[regno])
+	{
+	  save_regs[regno] = 0;
+	  continue;
+	}
+
+      /* If this reg is used and not call-used (except RA), save it. */
+      if (crx_interrupt_function_p ())
+	{
+	  if (!current_function_is_leaf && call_used_regs[regno])
+	    /* this is a volatile reg in a non-leaf interrupt routine - save it
+	     * for the sake of its sons.  */
+	    save_regs[regno] = 1;
+
+	  else if (df_regs_ever_live_p (regno))
+	    /* This reg is used - save it.  */
+	    save_regs[regno] = 1;
+	  else
+	    /* This reg is not used, and is not a volatile - don't save. */
+      	    save_regs[regno] = 0;
+	}
+      else
+	{
+	  /* If this reg is used and not call-used (except RA), save it. */
+	  if (df_regs_ever_live_p (regno)
+	      && (!call_used_regs[regno] || regno == RETURN_ADDRESS_REGNUM))
+	    save_regs[regno] = 1;
+	  else
+	    save_regs[regno] = 0;
+	}
+    }
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (save_regs[regno] == 1)
+      {
+	last_reg_to_save = regno;
+	sum_regs += UNITS_PER_WORD;
+      }
+}
+
+/* Compute the size of the local area and the size to be adjusted by the
+ * prologue and epilogue. */
+
+static void
+crx_compute_frame (void)
+{
+  /* For aligning the local variables. */
+  int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  int padding_locals;
+
+  /* Padding needed for each element of the frame.  */
+  local_vars_size = get_frame_size ();
+
+  /* Align to the stack alignment. */
+  padding_locals = local_vars_size % stack_alignment;
+  if (padding_locals)
+    padding_locals = stack_alignment - padding_locals;
+
+  local_vars_size += padding_locals;
+
+  size_for_adjusting_sp = local_vars_size + (ACCUMULATE_OUTGOING_ARGS ?
+				     crtl->outgoing_args_size : 0);
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+crx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Implements the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
+
+int
+crx_initial_elimination_offset (int from, int to)
+{
+  /* Compute this since we need to use sum_regs.  */
+  crx_compute_save_regs ();
+
+  /* Compute this since we need to use local_vars_size.  */
+  crx_compute_frame ();
+
+  if ((from) == FRAME_POINTER_REGNUM && (to) == STACK_POINTER_REGNUM)
+    return (ACCUMULATE_OUTGOING_ARGS ?
+	    crtl->outgoing_args_size : 0);
+  else if ((from) == ARG_POINTER_REGNUM && (to) == FRAME_POINTER_REGNUM)
+    return (sum_regs + local_vars_size);
+  else if ((from) == ARG_POINTER_REGNUM && (to) == STACK_POINTER_REGNUM)
+    return (sum_regs + local_vars_size +
+	    (ACCUMULATE_OUTGOING_ARGS ?
+	     crtl->outgoing_args_size : 0));
+  else
+    abort ();
+}
+
+/* REGISTER USAGE */
+/* -------------- */
+
+/* Return the class number of the smallest class containing reg number REGNO.
+ * This could be a conditional expression or could index an array. */
+
+enum reg_class
+crx_regno_reg_class (int regno)
+{
+  if (regno >= 0 && regno < SP_REGNUM)
+    return NOSP_REGS;
+
+  if (regno == SP_REGNUM)
+    return GENERAL_REGS;
+
+  if (regno == LO_REGNUM)
+    return LO_REGS;
+  if (regno == HI_REGNUM)
+    return HI_REGS;
+
+  return NO_REGS;
+}
+
+/* Transfer between HILO_REGS and memory via secondary reloading. */
+
+enum reg_class
+crx_secondary_reload_class (enum reg_class rclass,
+			    enum machine_mode mode ATTRIBUTE_UNUSED,
+			    rtx x ATTRIBUTE_UNUSED)
+{
+  if (reg_classes_intersect_p (rclass, HILO_REGS)
+      && true_regnum (x) == -1)
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
+
+int
+crx_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* CC can only hold CCmode values.  */
+  if (regno == CC_REGNUM)
+    return GET_MODE_CLASS (mode) == MODE_CC;
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return 0;
+  /* HILO registers can only hold SImode and DImode */
+  if (HILO_REGNO_P (regno))
+    return mode == SImode || mode == DImode;
+  return 1;
+}
+
+/* PASSING FUNCTION ARGUMENTS */
+/* -------------------------- */
+
+/* If enough param regs are available for passing the param of type TYPE return
+ * the number of registers needed else 0.  */
+
+static int
+enough_regs_for_param (CUMULATIVE_ARGS * cum, const_tree type,
+		       enum machine_mode mode)
+{
+  int type_size;
+  int remaining_size;
+
+  if (mode != BLKmode)
+    type_size = GET_MODE_BITSIZE (mode);
+  else
+    type_size = int_size_in_bytes (type) * BITS_PER_UNIT;
+
+  remaining_size =
+    BITS_PER_WORD * (MAX_REG_FOR_PASSING_ARGS -
+    (MIN_REG_FOR_PASSING_ARGS + cum->ints) + 1);
+
+  /* Any variable which is too big to pass in two registers, will pass on
+   * stack. */
+  if ((remaining_size >= type_size) && (type_size <= 2 * BITS_PER_WORD))
+    return (type_size + BITS_PER_WORD - 1) / BITS_PER_WORD;
+
+  return 0;
+}
+
+/* Implements TARGET_FUNCTION_ARG.  */
+
+static rtx
+crx_function_arg (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  last_parm_in_reg = 0;
+
+  /* Function_arg () is called with this type just after all the args have had
+   * their registers assigned. The rtx that function_arg returns from this type
+   * is supposed to pass to 'gen_call' but currently it is not implemented (see
+   * macro GEN_CALL).  */
+  if (type == void_type_node)
+    return NULL_RTX;
+
+  if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0))
+    return NULL_RTX;
+
+  if (mode == BLKmode)
+    {
+      /* Enable structures that need padding bytes at the end to pass to a
+       * function in registers. */
+      if (enough_regs_for_param (cum, type, mode) != 0)
+	{
+	  last_parm_in_reg = 1;
+	  return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints);
+	}
+    }
+
+  if (MIN_REG_FOR_PASSING_ARGS + cum->ints > MAX_REG_FOR_PASSING_ARGS)
+    return NULL_RTX;
+  else
+    {
+      if (enough_regs_for_param (cum, type, mode) != 0)
+	{
+	  last_parm_in_reg = 1;
+	  return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints);
+	}
+    }
+
+  return NULL_RTX;
+}
+
+/* Implements the macro INIT_CUMULATIVE_ARGS defined in crx.h.  */
+
+void
+crx_init_cumulative_args (CUMULATIVE_ARGS * cum, tree fntype,
+		      rtx libfunc ATTRIBUTE_UNUSED)
+{
+  tree param, next_param;
+
+  cum->ints = 0;
+
+  /* Determine if this function has variable arguments.  This is indicated by
+   * the last argument being 'void_type_mode' if there are no variable
+   * arguments.  Change here for a different vararg.  */
+  for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
+       param != (tree) 0; param = next_param)
+    {
+      next_param = TREE_CHAIN (param);
+      if (next_param == (tree) 0 && TREE_VALUE (param) != void_type_node)
+	{
+	  cum->ints = -1;
+	  return;
+	}
+    }
+}
+
+/* Implements TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+crx_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* l holds the number of registers required */
+  int l = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+
+  /* If the parameter isn't passed on a register don't advance cum.  */
+  if (!last_parm_in_reg)
+    return;
+
+  if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0))
+    return;
+
+  if (mode == SImode || mode == HImode || mode == QImode || mode == DImode)
+    {
+      if (l <= 1)
+	cum->ints += 1;
+      else
+	cum->ints += l;
+    }
+  else if (mode == SFmode || mode == DFmode)
+    cum->ints += l;
+  else if ((mode) == BLKmode)
+    {
+      if ((l = enough_regs_for_param (cum, type, mode)) != 0)
+	cum->ints += l;
+    }
+
+}
+
+/* Implements the macro FUNCTION_ARG_REGNO_P defined in crx.h.  Return nonzero
+ * if N is a register used for passing parameters.  */
+
+int
+crx_function_arg_regno_p (int n)
+{
+  return (n <= MAX_REG_FOR_PASSING_ARGS && n >= MIN_REG_FOR_PASSING_ARGS);
+}
+
+/* ADDRESSING MODES */
+/* ---------------- */
+
+/* Implements the hook for TARGET_LEGITIMATE_ADDRESS_P defined in crx.h.
+ * The following addressing modes are supported on CRX:
+ *
+ * Relocations		--> const | symbol_ref | label_ref
+ * Absolute address	--> 32-bit absolute
+ * Post increment	--> reg + 12-bit disp.
+ * Post modify		--> reg + 12-bit disp.
+ * Register relative	--> reg | 32-bit disp. + reg | 4 bit + reg
+ * Scaled index		--> reg + reg | 22-bit disp. + reg + reg |
+ *			    22-disp. + reg + reg + (2 | 4 | 8) */
+
+static rtx
+crx_addr_reg (rtx addr_reg)
+{
+  if (GET_MODE (addr_reg) != Pmode)
+    return NULL_RTX;
+
+  if (REG_P (addr_reg))
+    return addr_reg;
+  else if (GET_CODE (addr_reg) == SUBREG
+	   && REG_P (SUBREG_REG (addr_reg))
+	   && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (addr_reg)))
+	       <= UNITS_PER_WORD))
+    return SUBREG_REG (addr_reg);
+  else
+    return NULL_RTX;
+}
+
+enum crx_addrtype
+crx_decompose_address (rtx addr, struct crx_address *out)
+{
+  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+  rtx scale_rtx = NULL_RTX, side_effect = NULL_RTX;
+  int scale = -1;
+  
+  enum crx_addrtype retval = CRX_INVALID;
+
+  switch (GET_CODE (addr))
+    {
+    case CONST_INT:
+      /* Absolute address (known at compile time) */
+      retval = CRX_ABSOLUTE;
+      disp = addr;
+      if (!UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), GET_MODE_BITSIZE (Pmode)))
+	return CRX_INVALID;
+      break;
+      
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      /* Absolute address (known at link time) */
+      retval = CRX_ABSOLUTE;
+      disp = addr;
+      break;
+
+    case REG:
+    case SUBREG:
+      /* Register relative address */
+      retval = CRX_REG_REL;
+      base = addr;
+      break;
+
+    case PLUS:
+      switch (GET_CODE (XEXP (addr, 0)))
+	{
+	case REG:
+	case SUBREG:
+	  if (REG_P (XEXP (addr, 1)))
+	    {
+	      /* Scaled index with scale = 1 and disp. = 0 */
+	      retval = CRX_SCALED_INDX;
+	      base = XEXP (addr, 1);
+	      index = XEXP (addr, 0); 
+	      scale = 1;
+	    }
+	  else if (RTX_SIGNED_INT_FITS_N_BITS (XEXP (addr, 1), 28))
+	    {
+	      /* Register relative address and <= 28-bit disp. */
+	      retval = CRX_REG_REL;
+	      base = XEXP (addr, 0);
+	      disp = XEXP (addr, 1);
+	    }
+	  else
+	    return CRX_INVALID;
+	  break;
+
+	case PLUS:
+	  /* Scaled index and <= 22-bit disp. */
+	  retval = CRX_SCALED_INDX;
+	  base = XEXP (XEXP (addr, 0), 1); 
+	  disp = XEXP (addr, 1);
+	  if (!RTX_SIGNED_INT_FITS_N_BITS (disp, 22))
+	    return CRX_INVALID;
+	  switch (GET_CODE (XEXP (XEXP (addr, 0), 0)))
+	    {
+	    case REG:
+	      /* Scaled index with scale = 0 and <= 22-bit disp. */
+	      index = XEXP (XEXP (addr, 0), 0); 
+	      scale = 1;
+	      break;
+	      
+	    case MULT:
+	      /* Scaled index with scale >= 0 and <= 22-bit disp. */
+	      index = XEXP (XEXP (XEXP (addr, 0), 0), 0); 
+	      scale_rtx = XEXP (XEXP (XEXP (addr, 0), 0), 1); 
+	      if ((scale = SCALE_FOR_INDEX_P (scale_rtx)) == -1)
+		return CRX_INVALID;
+	      break;
+
+	    default:
+	      return CRX_INVALID;
+	    }
+	  break;
+	  
+	case MULT:
+	  /* Scaled index with scale >= 0 */
+	  retval = CRX_SCALED_INDX;
+	  base = XEXP (addr, 1); 
+	  index = XEXP (XEXP (addr, 0), 0); 
+	  scale_rtx = XEXP (XEXP (addr, 0), 1); 
+	  /* Scaled index with scale >= 0 and <= 22-bit disp. */
+	  if ((scale = SCALE_FOR_INDEX_P (scale_rtx)) == -1)
+	    return CRX_INVALID;
+	  break;
+
+	default:
+	  return CRX_INVALID;
+	}
+      break;
+
+    case POST_INC:
+    case POST_DEC:
+      /* Simple post-increment */
+      retval = CRX_POST_INC;
+      base = XEXP (addr, 0);
+      side_effect = addr;
+      break;
+
+    case POST_MODIFY:
+      /* Generic post-increment with <= 12-bit disp. */
+      retval = CRX_POST_INC;
+      base = XEXP (addr, 0);
+      side_effect = XEXP (addr, 1);
+      if (base != XEXP (side_effect, 0))
+	return CRX_INVALID;
+      switch (GET_CODE (side_effect))
+	{
+	case PLUS:
+	case MINUS:
+	  disp = XEXP (side_effect, 1);
+	  if (!RTX_SIGNED_INT_FITS_N_BITS (disp, 12))
+	    return CRX_INVALID;
+	  break;
+
+	default:
+	  /* CRX only supports PLUS and MINUS */
+	  return CRX_INVALID;
+	}
+      break;
+
+    default:
+      return CRX_INVALID;
+    }
+
+  if (base)
+    {
+      base = crx_addr_reg (base);
+      if (!base)
+	return CRX_INVALID;
+    }
+  if (index)
+    {
+      index = crx_addr_reg (index);
+      if (!index)
+	return CRX_INVALID;
+    }
+  
+  out->base = base;
+  out->index = index;
+  out->disp = disp;
+  out->scale = scale;
+  out->side_effect = side_effect;
+
+  return retval;
+}
+
+bool
+crx_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  rtx addr, bool strict)
+{
+  enum crx_addrtype addrtype;
+  struct crx_address address;
+						 
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr,
+               "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
+               GET_MODE_NAME (mode), strict);
+      debug_rtx (addr);
+    }
+  
+  addrtype = crx_decompose_address (addr, &address);
+
+  if (addrtype == CRX_POST_INC && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return FALSE;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      const char *typestr;
+      switch (addrtype)
+	{
+	case CRX_INVALID:
+	  typestr = "Invalid";
+	  break;
+	case CRX_REG_REL:
+	  typestr = "Register relative";
+	  break;
+	case CRX_POST_INC:
+	  typestr = "Post-increment";
+	  break;
+	case CRX_SCALED_INDX:
+	  typestr = "Scaled index";
+	  break;
+	case CRX_ABSOLUTE:
+	  typestr = "Absolute";
+	  break;
+	default:
+	  abort ();
+	}
+      fprintf (stderr, "CRX Address type: %s\n", typestr);
+    }
+  
+  if (addrtype == CRX_INVALID)
+    return FALSE;
+
+  if (strict)
+    {
+      if (address.base && !REGNO_OK_FOR_BASE_P (REGNO (address.base)))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    fprintf (stderr, "Base register not strict\n");
+	  return FALSE;
+	}
+      if (address.index && !REGNO_OK_FOR_INDEX_P (REGNO (address.index)))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    fprintf (stderr, "Index register not strict\n");
+	  return FALSE;
+	}
+    }
+
+  return TRUE;
+}
+
+/* ROUTINES TO COMPUTE COSTS */
+/* ------------------------- */
+
+/* Return cost of the memory address x. */
+
+static int
+crx_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  enum crx_addrtype addrtype;
+  struct crx_address address;
+						 
+  int cost = 2;
+  
+  addrtype = crx_decompose_address (addr, &address);
+  
+  gcc_assert (addrtype != CRX_INVALID);
+
+  /* An absolute address causes a 3-word instruction */
+  if (addrtype == CRX_ABSOLUTE)
+    cost+=2;
+  
+  /* Post-modifying addresses are more powerful.  */
+  if (addrtype == CRX_POST_INC)
+    cost-=2;
+
+  /* Attempt to minimize number of registers in the address. */
+  if (address.base)
+    cost++;
+  
+  if (address.index && address.scale == 1)
+    cost+=5;
+
+  if (address.disp && !INT_CST4 (INTVAL (address.disp)))
+    cost+=2;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n======\nTARGET_ADDRESS_COST = %d\n", cost);
+      debug_rtx (addr);
+    }
+  
+  return cost;
+}
+
+/* Return the cost of moving data of mode MODE between a register of class
+ * RCLASS and memory; IN is zero if the value is to be written to memory,
+ * nonzero if it is to be read in. This cost is relative to those in
+ * REGISTER_MOVE_COST.  */
+
+int
+crx_memory_move_cost (enum machine_mode mode,
+		  enum reg_class rclass ATTRIBUTE_UNUSED,
+		  int in ATTRIBUTE_UNUSED)
+{
+  /* One LD or ST takes twice the time of a simple reg-reg move */
+  if (reg_classes_intersect_p (rclass, GENERAL_REGS))
+    {
+      /* printf ("GENERAL_REGS LD/ST = %d\n", 4 * HARD_REGNO_NREGS (0, mode));*/
+      return 4 * HARD_REGNO_NREGS (0, mode);
+    }	
+  else if (reg_classes_intersect_p (rclass, HILO_REGS))
+    {
+      /* HILO to memory and vice versa */
+      /* printf ("HILO_REGS %s = %d\n", in ? "LD" : "ST",
+	     (REGISTER_MOVE_COST (mode,
+				 in ? GENERAL_REGS : HILO_REGS,
+				 in ? HILO_REGS : GENERAL_REGS) + 4)
+	* HARD_REGNO_NREGS (0, mode)); */
+      return (REGISTER_MOVE_COST (mode,
+				 in ? GENERAL_REGS : HILO_REGS,
+				 in ? HILO_REGS : GENERAL_REGS) + 4)
+	* HARD_REGNO_NREGS (0, mode);
+    }
+  else /* default (like in i386) */
+    {
+      /* printf ("ANYREGS = 100\n"); */
+      return 100;
+    }
+}
+
+/* INSTRUCTION OUTPUT */
+/* ------------------ */
+
+/* Check if a const_double is ok for crx store-immediate instructions */
+
+int
+crx_const_double_ok (rtx op)
+{
+  if (GET_MODE (op) == DFmode)
+  {
+    REAL_VALUE_TYPE r;
+    long l[2];
+    REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+    REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+    return (UNSIGNED_INT_FITS_N_BITS (l[0], 4) &&
+	    UNSIGNED_INT_FITS_N_BITS (l[1], 4)) ? 1 : 0;
+  }
+
+  if (GET_MODE (op) == SFmode)
+  {
+    REAL_VALUE_TYPE r;
+    long l;
+    REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+    REAL_VALUE_TO_TARGET_SINGLE (r, l);
+    return UNSIGNED_INT_FITS_N_BITS (l, 4) ? 1 : 0;
+  }
+
+  return (UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_LOW (op), 4) &&
+	  UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_HIGH (op), 4)) ? 1 : 0;
+}
+
+/* Implements the macro PRINT_OPERAND defined in crx.h.  */
+
+void
+crx_print_operand (FILE * file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'p' :
+      if (GET_CODE (x) == REG) {
+	if (GET_MODE (x) == DImode || GET_MODE (x) == DFmode)
+	  {
+	    int regno = REGNO (x);
+	    if (regno + 1 >= SP_REGNUM) abort ();
+	    fprintf (file, "{%s, %s}", reg_names[regno], reg_names[regno + 1]);
+	    return;
+	  }
+	else
+	  {
+	    if (REGNO (x) >= SP_REGNUM) abort ();
+	    fprintf (file, "%s", reg_names[REGNO (x)]);
+	    return;
+	  }
+      }
+
+    case 'd' :
+	{
+	  const char *crx_cmp_str;
+	  switch (GET_CODE (x))
+	    { /* MD: compare (reg, reg or imm) but CRX: cmp (reg or imm, reg)
+	       * -> swap all non symmetric ops */
+	    case EQ  : crx_cmp_str = "eq"; break;
+	    case NE  : crx_cmp_str = "ne"; break;
+	    case GT  : crx_cmp_str = "lt"; break;
+	    case GTU : crx_cmp_str = "lo"; break;
+	    case LT  : crx_cmp_str = "gt"; break;
+	    case LTU : crx_cmp_str = "hi"; break;
+	    case GE  : crx_cmp_str = "le"; break;
+	    case GEU : crx_cmp_str = "ls"; break;
+	    case LE  : crx_cmp_str = "ge"; break;
+	    case LEU : crx_cmp_str = "hs"; break;
+	    default : abort ();
+	    }
+	  fprintf (file, "%s", crx_cmp_str);
+	  return;
+	}
+
+    case 'H':
+      /* Print high part of a double precision value. */
+      switch (GET_CODE (x))
+	{
+	case CONST_DOUBLE:
+	  if (GET_MODE (x) == SFmode) abort ();
+	  if (GET_MODE (x) == DFmode)
+	    {
+	      /* High part of a DF const. */
+	      REAL_VALUE_TYPE r;
+	      long l[2];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+
+	      fprintf (file, "$0x%lx", l[1]);
+	      return;
+	    }
+
+	  /* -- Fallthrough to handle DI consts -- */
+
+	case CONST_INT:
+	    {
+	      rtx high, low;
+	      split_double (x, &low, &high);
+	      putc ('$', file);
+	      output_addr_const (file, high);
+	      return;
+	    }
+
+	case REG:
+	  if (REGNO (x) + 1 >= FIRST_PSEUDO_REGISTER) abort ();
+	  fprintf (file, "%s", reg_names[REGNO (x) + 1]);
+	  return;
+
+	case MEM:
+	  /* Adjust memory address to high part.  */
+	    {
+	      rtx adj_mem = x;
+	      adj_mem = adjust_address (adj_mem, GET_MODE (adj_mem), 4);
+
+	      output_memory_reference_mode = GET_MODE (adj_mem);
+	      output_address (XEXP (adj_mem, 0));
+	      return;
+	    }
+
+	default:
+	  abort ();
+	}
+
+    case 'L':
+      /* Print low part of a double precision value. */
+      switch (GET_CODE (x))
+	{
+	case CONST_DOUBLE:
+	  if (GET_MODE (x) == SFmode) abort ();
+	  if (GET_MODE (x) == DFmode)
+	    {
+	      /* High part of a DF const. */
+	      REAL_VALUE_TYPE r;
+	      long l[2];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+
+	      fprintf (file, "$0x%lx", l[0]);
+	      return;
+	    }
+
+	  /* -- Fallthrough to handle DI consts -- */
+
+	case CONST_INT:
+	    {
+	      rtx high, low;
+	      split_double (x, &low, &high);
+	      putc ('$', file);
+	      output_addr_const (file, low);
+	      return;
+	    }
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  return;
+
+	case MEM:
+	  output_memory_reference_mode = GET_MODE (x);
+	  output_address (XEXP (x, 0));
+	  return;
+
+	default:
+	  abort ();
+	}
+
+    case 0 : /* default */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  return;
+
+	case MEM:
+	  output_memory_reference_mode = GET_MODE (x);
+	  output_address (XEXP (x, 0));
+	  return;
+
+	case CONST_DOUBLE:
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l;
+
+	      /* Always use H and L for double precision - see above */
+	      gcc_assert (GET_MODE (x) == SFmode);
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+	      fprintf (file, "$0x%lx", l);
+	      return;
+	    }
+
+	default:
+	  putc ('$', file);
+	  output_addr_const (file, x);
+	  return;
+	}
+
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+
+  abort ();
+}
+
+/* Implements the macro PRINT_OPERAND_ADDRESS defined in crx.h.  */
+
+void
+crx_print_operand_address (FILE * file, rtx addr)
+{
+  enum crx_addrtype addrtype;
+  struct crx_address address;
+
+  int offset;
+  
+  addrtype = crx_decompose_address (addr, &address);
+  
+  if (address.disp)
+    offset = INTVAL (address.disp);
+  else
+    offset = 0;
+
+  switch (addrtype)
+    {
+    case CRX_REG_REL:
+      fprintf (file, "%d(%s)", offset, reg_names[REGNO (address.base)]);
+      return;
+      
+    case CRX_POST_INC:
+      switch (GET_CODE (address.side_effect))
+	{
+	case PLUS:
+	  break;
+	case MINUS:
+	  offset = -offset;
+	  break;
+	case POST_INC:
+	  offset = GET_MODE_SIZE (output_memory_reference_mode);
+	  break;
+	case POST_DEC:
+	  offset = -GET_MODE_SIZE (output_memory_reference_mode);
+	  break;
+	default:
+	  abort ();
+	}
+	fprintf (file, "%d(%s)+", offset, reg_names[REGNO (address.base)]);
+      return;
+      
+    case CRX_SCALED_INDX:
+      fprintf (file, "%d(%s, %s, %d)", offset, reg_names[REGNO (address.base)],
+	       reg_names[REGNO (address.index)], address.scale);
+      return;
+      
+    case CRX_ABSOLUTE:
+      output_addr_const (file, address.disp);
+      return;
+      
+    default:
+      abort ();
+    }
+}
+
+
+/*****************************************************************************/
+/* MACHINE DESCRIPTION HELPER-FUNCTIONS					     */
+/*****************************************************************************/
+
+void crx_expand_movmem_single (rtx src, rtx srcbase, rtx dst, rtx dstbase,
+			       rtx tmp_reg, unsigned HOST_WIDE_INT *offset_p)
+{
+  rtx addr, mem;
+  unsigned HOST_WIDE_INT offset = *offset_p;
+
+  /* Load */
+  addr = plus_constant (src, offset);
+  mem = adjust_automodify_address (srcbase, SImode, addr, offset);
+  emit_move_insn (tmp_reg, mem);
+
+  /* Store */
+  addr = plus_constant (dst, offset);
+  mem = adjust_automodify_address (dstbase, SImode, addr, offset);
+  emit_move_insn (mem, tmp_reg);
+
+  *offset_p = offset + 4;
+}
+
+int
+crx_expand_movmem (rtx dstbase, rtx srcbase, rtx count_exp, rtx align_exp)
+{
+  unsigned HOST_WIDE_INT count = 0, offset, si_moves, i;
+  HOST_WIDE_INT align = 0;
+
+  rtx src, dst;
+  rtx tmp_reg;
+
+  if (GET_CODE (align_exp) == CONST_INT)
+    { /* Only if aligned */
+      align = INTVAL (align_exp);
+      if (align & 3)
+	return 0;
+    }
+
+  if (GET_CODE (count_exp) == CONST_INT)
+    { /* No more than 16 SImode moves */
+      count = INTVAL (count_exp);
+      if (count > 64)
+	return 0;
+    }
+
+  tmp_reg = gen_reg_rtx (SImode);
+
+  /* Create psrs for the src and dest pointers */
+  dst = copy_to_mode_reg (Pmode, XEXP (dstbase, 0));
+  if (dst != XEXP (dstbase, 0))
+    dstbase = replace_equiv_address_nv (dstbase, dst);
+  src = copy_to_mode_reg (Pmode, XEXP (srcbase, 0));
+  if (src != XEXP (srcbase, 0))
+    srcbase = replace_equiv_address_nv (srcbase, src);
+
+  offset = 0;
+
+  /* Emit SImode moves */
+  si_moves = count >> 2;
+  for (i = 0; i < si_moves; i++)
+    crx_expand_movmem_single (src, srcbase, dst, dstbase, tmp_reg, &offset);
+
+  /* Special cases */
+  if (count & 3)
+    {
+      offset = count - 4;
+      crx_expand_movmem_single (src, srcbase, dst, dstbase, tmp_reg, &offset);
+    }
+
+  gcc_assert (offset == count);
+
+  return 1;
+}
+
+static void
+mpushpop_str (char *stringbuffer, const char *mnemonic, char *mask)
+{
+  if (strlen (mask) > 2 || crx_interrupt_function_p ()) /* needs 2-word instr. */
+    sprintf (stringbuffer, "\n\t%s\tsp, {%s}", mnemonic, mask);
+  else /* single word instruction */
+    sprintf (stringbuffer, "\n\t%s\t%s", mnemonic, mask);
+}
+
+/* Called from crx.md. The return value depends on the parameter push_or_pop:
+ * When push_or_pop is zero -> string for push instructions of prologue.
+ * When push_or_pop is nonzero -> string for pop/popret/retx in epilogue.
+ * Relies on the assumptions:
+ * 1. RA is the last register to be saved.
+ * 2. The maximal value of the counter is MAX_COUNT. */
+
+char *
+crx_prepare_push_pop_string (int push_or_pop)
+{
+  /* j is the number of registers being saved, takes care that there won't be
+   * more than 8 in one push/pop instruction */
+
+  /* For the register mask string */
+  static char mask_str[50];
+
+  /* i is the index of save_regs[], going from 0 until last_reg_to_save */
+  int i = 0;
+
+  int ra_in_bitmask = 0;
+
+  char *return_str;
+
+  /* For reversing on the push instructions if there are more than one. */
+  char *temp_str;
+
+  return_str = (char *) xmalloc (120);
+  temp_str = (char *) xmalloc (120);
+
+  /* Initialize */
+  memset (return_str, 0, 3);
+
+  while (i <= last_reg_to_save)
+    {
+      /* Prepare mask for one instruction. */
+      mask_str[0] = 0;
+
+      if (i <= SP_REGNUM)
+	{ /* Add regs unit full or SP register reached */
+	  int j = 0;
+	  while (j < MAX_COUNT && i <= SP_REGNUM)
+	    {
+	      if (save_regs[i])
+		{
+		  /* TODO to use ra_in_bitmask for detecting last pop is not
+		   * smart it prevents things like:  popret r5 */
+		  if (i == RETURN_ADDRESS_REGNUM) ra_in_bitmask = 1;
+		  if (j > 0) strcat (mask_str, ", ");
+		  strcat (mask_str, reg_names[i]);
+		  ++j;
+		}
+	      ++i;
+	    }
+	}
+      else
+	{
+	  /* Handle hi/lo savings */
+	  while (i <= last_reg_to_save)
+	    {
+	      if (save_regs[i])
+		{
+		  strcat (mask_str, "lo, hi");
+		  i = last_reg_to_save + 1;
+		  break;
+		}
+	      ++i;
+	    }
+	}
+
+      if (strlen (mask_str) == 0) continue;
+       	
+      if (push_or_pop == 1)
+	{
+	  if (crx_interrupt_function_p ())
+	    mpushpop_str (temp_str, "popx", mask_str);
+	  else
+	    {
+	      if (ra_in_bitmask)
+		{
+		  mpushpop_str (temp_str, "popret", mask_str);
+		  ra_in_bitmask = 0;
+		}
+	      else mpushpop_str (temp_str, "pop", mask_str);
+	    }
+
+	  strcat (return_str, temp_str);
+	}
+      else
+	{
+	  /* push - We need to reverse the order of the instructions if there
+	   * are more than one. (since the pop will not be reversed in the
+	   * epilogue */
+      	  if (crx_interrupt_function_p ())
+	    mpushpop_str (temp_str, "pushx", mask_str);
+	  else
+	    mpushpop_str (temp_str, "push", mask_str);
+	  strcat (temp_str, return_str);
+	  strcpy (strcat (return_str, "\t"), temp_str);
+	}
+
+    }
+
+  if (push_or_pop == 1)
+    {
+      /* pop */
+      if (crx_interrupt_function_p ())
+	strcat (return_str, "\n\tretx\n");
+
+      else if (!FUNC_IS_NORETURN_P (current_function_decl)
+	       && !save_regs[RETURN_ADDRESS_REGNUM])
+	strcat (return_str, "\n\tjump\tra\n");
+    }
+
+  /* Skip the newline and the tab in the start of return_str. */
+  return_str += 2;
+  return return_str;
+}
+
+/*  CompactRISC CRX Architecture stack layout:
+
+     0 +---------------------
+	|
+	.
+	.
+	|
+	+==================== Sp(x)=Ap(x+1)
+      A | Args for functions
+      | | called by X and      Dynamically
+      | | Dynamic allocations  allocated and
+      | | (alloca, variable    deallocated
+  Stack | length arrays).
+  grows +-------------------- Fp(x)
+  down| | Local variables of X
+  ward| +--------------------
+      | | Regs saved for X-1
+      | +==================== Sp(x-1)=Ap(x)
+	| Args for func X
+	| pushed by X-1
+	+-------------------- Fp(x-1)
+	|
+	|
+	V
+
+*/
+
+void
+crx_expand_prologue (void)
+{
+  crx_compute_frame ();
+  crx_compute_save_regs ();
+
+  /* If there is no need in push and adjustment to sp, return. */
+  if (size_for_adjusting_sp + sum_regs == 0)
+    return;
+
+  if (last_reg_to_save != -1)
+    /* If there are registers to push.  */
+    emit_insn (gen_push_for_prologue (GEN_INT (sum_regs)));
+
+  if (size_for_adjusting_sp > 0)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (-size_for_adjusting_sp)));
+
+  if (frame_pointer_needed)
+    /* Initialize the frame pointer with the value of the stack pointer
+     * pointing now to the locals. */
+    emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+}
+
+/* Generate insn that updates the stack for local variables and padding for
+ * registers we save. - Generate the appropriate return insn. */
+
+void
+crx_expand_epilogue (void)
+{
+  /* Nonzero if we need to return and pop only RA. This will generate a
+   * different insn. This differentiate is for the peepholes for call as last
+   * statement in function. */
+  int only_popret_RA = (save_regs[RETURN_ADDRESS_REGNUM]
+			&& (sum_regs == UNITS_PER_WORD));
+
+  if (frame_pointer_needed)
+    /* Restore the stack pointer with the frame pointers value */
+    emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+
+  if (size_for_adjusting_sp > 0)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (size_for_adjusting_sp)));
+
+  if (crx_interrupt_function_p ())
+    emit_jump_insn (gen_interrupt_return ());
+  else if (last_reg_to_save == -1)
+    /* Nothing to pop */
+    /* Don't output jump for interrupt routine, only retx.  */
+    emit_jump_insn (gen_indirect_jump_return ());
+  else if (only_popret_RA)
+    emit_jump_insn (gen_popret_RA_return ());
+  else
+    emit_jump_insn (gen_pop_and_popret_return (GEN_INT (sum_regs)));
+}
diff --git a/gcc/config/crx/crx.h b/gcc/config/crx/crx.h
new file mode 100644
index 000000000..da6e263ca
--- /dev/null
+++ b/gcc/config/crx/crx.h
@@ -0,0 +1,478 @@
+/* Definitions of target machine for GNU compiler, for CRX.
+   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_CRX_H
+#define GCC_CRX_H
+
+/*****************************************************************************/
+/* CONTROLLING THE DRIVER						     */
+/*****************************************************************************/
+
+#define CC1PLUS_SPEC "%{!frtti:-fno-rtti} \
+    %{!fenforce-eh-specs:-fno-enforce-eh-specs} \
+    %{!fexceptions:-fno-exceptions} \
+    %{!fthreadsafe-statics:-fno-threadsafe-statics}"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crti.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+/*****************************************************************************/
+/* RUN-TIME TARGET SPECIFICATION					     */
+/*****************************************************************************/
+
+#ifndef TARGET_CPU_CPP_BUILTINS
+#define TARGET_CPU_CPP_BUILTINS()				\
+do {								\
+     builtin_define("__CRX__");					\
+     builtin_define("__CR__");		  			\
+} while (0)
+#endif
+
+#define TARGET_VERSION fputs (" (CRX/ELF)", stderr);
+
+/*****************************************************************************/
+/* STORAGE LAYOUT							     */
+/*****************************************************************************/
+
+#define BITS_BIG_ENDIAN  0
+
+#define BYTES_BIG_ENDIAN 0
+
+#define WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 4
+
+#define POINTER_SIZE 32
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 32
+
+#define FUNCTION_BOUNDARY 32
+
+#define STRUCTURE_SIZE_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 32
+
+/* In CRX arrays of chars are word-aligned, so strcpy() will be faster.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  (TREE_CODE (TYPE) == ARRAY_TYPE && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \
+   && (ALIGN) < BITS_PER_WORD \
+   ? (BITS_PER_WORD) : (ALIGN))
+
+/* In CRX strings are word-aligned so strcpy from constants will be faster. */
+#define CONSTANT_ALIGNMENT(CONSTANT, ALIGN) \
+  (TREE_CODE (CONSTANT) == STRING_CST && (ALIGN) < BITS_PER_WORD \
+   ? (BITS_PER_WORD) : (ALIGN))
+
+#define STRICT_ALIGNMENT 0
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/*****************************************************************************/
+/* LAYOUT OF SOURCE LANGUAGE DATA TYPES					     */
+/*****************************************************************************/
+
+#define INT_TYPE_SIZE		32
+
+#define SHORT_TYPE_SIZE		16
+
+#define LONG_TYPE_SIZE		32
+
+#define LONG_LONG_TYPE_SIZE	64
+
+#define FLOAT_TYPE_SIZE 	32
+
+#define DOUBLE_TYPE_SIZE 	64
+
+#define LONG_DOUBLE_TYPE_SIZE   64
+
+#define DEFAULT_SIGNED_CHAR	1
+
+#define SIZE_TYPE		"unsigned int"
+
+#define PTRDIFF_TYPE		"int"
+
+/*****************************************************************************/
+/* REGISTER USAGE.							     */
+/*****************************************************************************/
+
+#define FIRST_PSEUDO_REGISTER	19
+
+/* On the CRX, only the stack pointer (r15) is such. */
+#define FIXED_REGISTERS \
+  { \
+ /* r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 */  \
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,	    \
+ /* r11 r12 r13 ra  sp  r16 r17 cc */		    \
+    0,  0,  0,  0,  1,  0,  0,  1		    \
+  }
+
+/* On the CRX, calls clobbers r0-r6 (scratch registers), ra (the return address)
+ * and sp - (the stack pointer which is fixed). */
+#define CALL_USED_REGISTERS \
+  { \
+ /* r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 */  \
+    1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,	    \
+ /* r11 r12 r13 ra  sp  r16 r17 cc */		    \
+    0,  0,  0,  1,  1,  1,  1,  1		    \
+  }
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* On the CRX architecture, HILO regs can only hold SI mode. */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) crx_hard_regno_mode_ok(REGNO, MODE)
+
+/* So far no patterns for moving CCMODE data are available */
+#define AVOID_CCMODE_COPIES
+
+/* Interrupt functions can only use registers that have already been saved by
+ * the prologue, even if they would normally be call-clobbered. */
+#define HARD_REGNO_RENAME_OK(SRC, DEST)	\
+  (!crx_interrupt_function_p () || df_regs_ever_live_p (DEST))
+
+#define MODES_TIEABLE_P(MODE1, MODE2)  1
+
+enum reg_class
+{
+  NO_REGS,
+  LO_REGS,
+  HI_REGS,
+  HILO_REGS,
+  NOSP_REGS,
+  GENERAL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES         \
+{                                 \
+   GENERAL_REGS, LIM_REG_CLASSES  \
+} 
+
+#define REG_CLASS_NAMES \
+  {			\
+    "NO_REGS",		\
+    "LO_REGS",		\
+    "HI_REGS",		\
+    "HILO_REGS",	\
+    "NOSP_REGS",	\
+    "GENERAL_REGS",	\
+    "ALL_REGS"		\
+  }
+
+#define REG_CLASS_CONTENTS				\
+  {							\
+    {0x00000000}, /* NO_REGS			*/	\
+    {0x00010000}, /* LO_REGS :		16 	*/	\
+    {0x00020000}, /* HI_REGS :		17	*/	\
+    {0x00030000}, /* HILO_REGS :	16, 17	*/	\
+    {0x00007fff}, /* NOSP_REGS : 	0 - 14	*/	\
+    {0x0000ffff}, /* GENERAL_REGS : 	0 - 15	*/	\
+    {0x0007ffff}  /* ALL_REGS : 	0 - 18	*/	\
+  }
+
+#define REGNO_REG_CLASS(REGNO)  crx_regno_reg_class(REGNO)
+
+#define BASE_REG_CLASS		GENERAL_REGS
+
+#define INDEX_REG_CLASS		GENERAL_REGS
+
+#define REG_CLASS_FROM_LETTER(C)	\
+  ((C) == 'b' ? NOSP_REGS :		\
+   (C) == 'l' ? LO_REGS : 		\
+   (C) == 'h' ? HI_REGS :		\
+   (C) == 'k' ? HILO_REGS :		\
+  NO_REGS)
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  ((REGNO) < 16 \
+   || (reg_renumber && (unsigned)reg_renumber[REGNO] < 16))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)	   REGNO_OK_FOR_BASE_P(REGNO)
+
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+  crx_secondary_reload_class (CLASS, MODE, X)
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+    (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD
+
+#define SIGNED_INT_FITS_N_BITS(imm, N) \
+  ((((imm) < ((long long)1<<((N)-1))) && ((imm) >= -((long long)1<<((N)-1)))) ? 1 : 0)
+
+#define UNSIGNED_INT_FITS_N_BITS(imm, N) \
+  (((imm) < ((long long)1<<(N)) && (imm) >= (long long)0) ? 1 : 0)
+
+#define HILO_REGNO_P(regno) \
+  (reg_classes_intersect_p(REGNO_REG_CLASS(regno), HILO_REGS))
+
+#define INT_CST4(VALUE) \
+  (((VALUE) >= -1 && (VALUE) <= 4) || (VALUE) == -4 \
+  || (VALUE) == 7 || (VALUE) == 8 || (VALUE) == 16 || (VALUE) == 32 \
+  || (VALUE) == 20 || (VALUE) == 12 || (VALUE) == 48)
+
+#define CONST_OK_FOR_LETTER_P(VALUE, C)				\
+  /* Legal const for store immediate instructions */		\
+  ((C) == 'I' ? UNSIGNED_INT_FITS_N_BITS(VALUE, 3) :		\
+   (C) == 'J' ? UNSIGNED_INT_FITS_N_BITS(VALUE, 4) :		\
+   (C) == 'K' ? UNSIGNED_INT_FITS_N_BITS(VALUE, 5) :		\
+   (C) == 'L' ? INT_CST4(VALUE) :				\
+  0)
+
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C)	\
+  ((C) == 'G' ? crx_const_double_ok (VALUE) :	\
+  0)
+
+/*****************************************************************************/
+/* STACK LAYOUT AND CALLING CONVENTIONS.				     */
+/*****************************************************************************/
+
+#define STACK_GROWS_DOWNWARD
+
+#define STARTING_FRAME_OFFSET  0
+
+#define	STACK_POINTER_REGNUM	15
+
+#define	FRAME_POINTER_REGNUM	13
+
+#define	ARG_POINTER_REGNUM	12
+
+#define STATIC_CHAIN_REGNUM	1
+
+#define	RETURN_ADDRESS_REGNUM	14
+
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+#define ELIMINABLE_REGS \
+  { \
+    { ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM}, \
+    { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM}, \
+    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}  \
+  }
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  do {									\
+    (OFFSET) = crx_initial_elimination_offset ((FROM), (TO));		\
+  } while (0)
+
+/*****************************************************************************/
+/* PASSING FUNCTION ARGUMENTS						     */
+/*****************************************************************************/
+
+#define ACCUMULATE_OUTGOING_ARGS (TARGET_NO_PUSH_ARGS)
+
+#define PUSH_ARGS (!TARGET_NO_PUSH_ARGS)
+
+#define PUSH_ROUNDING(BYTES) (((BYTES) + 3) & ~3)
+
+#ifndef CUMULATIVE_ARGS
+struct cumulative_args
+{
+  int ints;
+};
+
+#define CUMULATIVE_ARGS struct cumulative_args
+#endif
+
+/* On the CRX architecture, Varargs routines should receive their parameters on
+ * the stack.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  crx_init_cumulative_args(&(CUM), (FNTYPE), (LIBNAME))
+
+#define FUNCTION_ARG_REGNO_P(REGNO)  crx_function_arg_regno_p(REGNO)
+
+/*****************************************************************************/
+/* RETURNING FUNCTION VALUE						     */
+/*****************************************************************************/
+
+/* On the CRX, the return value is in R0 */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+	gen_rtx_REG(TYPE_MODE (VALTYPE), 0)
+
+#define LIBCALL_VALUE(MODE)	gen_rtx_REG (MODE, 0)
+
+#define FUNCTION_VALUE_REGNO_P(N)	((N) == 0)
+
+#define CRX_STRUCT_VALUE_REGNUM  0
+
+/*****************************************************************************/
+/* GENERATING CODE FOR PROFILING - NOT IMPLEMENTED			     */
+/*****************************************************************************/
+
+#undef  FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM, LABELNO)	\
+{						\
+    sorry ("profiler support for CRX");		\
+}
+	
+/*****************************************************************************/
+/* TRAMPOLINES FOR NESTED FUNCTIONS - NOT SUPPORTED      		     */
+/*****************************************************************************/
+
+#define TRAMPOLINE_SIZE	32
+
+/*****************************************************************************/
+/* ADDRESSING MODES							     */
+/*****************************************************************************/
+
+#define CONSTANT_ADDRESS_P(X)						\
+  (GET_CODE (X) == LABEL_REF						\
+   || GET_CODE (X) == SYMBOL_REF					\
+   || GET_CODE (X) == CONST						\
+   || GET_CODE (X) == CONST_INT)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define HAVE_POST_INCREMENT  1
+#define HAVE_POST_DECREMENT  1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_REG 0
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X)	REGNO_OK_FOR_BASE_P (REGNO (X))
+#define REG_OK_FOR_INDEX_P(X)	REGNO_OK_FOR_INDEX_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X)	1
+#define REG_OK_FOR_INDEX_P(X)	1
+#endif /* REG_OK_STRICT */
+
+#define LEGITIMATE_CONSTANT_P(X)  1
+
+/*****************************************************************************/
+/* CONDITION CODE STATUS						     */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* RELATIVE COSTS OF OPERATIONS						     */
+/*****************************************************************************/
+
+#define MEMORY_MOVE_COST(MODE, CLASS, IN) crx_memory_move_cost(MODE, CLASS, IN)
+/* Moving to processor register flushes pipeline - thus asymmetric */
+#define REGISTER_MOVE_COST(MODE, FROM, TO) ((TO != GENERAL_REGS) ? 8 : 2)
+/* Assume best case (branch predicted) */
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+#define SLOW_BYTE_ACCESS  1
+
+/*****************************************************************************/
+/* DIVIDING THE OUTPUT INTO SECTIONS					     */
+/*****************************************************************************/
+
+#define TEXT_SECTION_ASM_OP	"\t.section\t.text"
+
+#define DATA_SECTION_ASM_OP	"\t.section\t.data"
+
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+
+/*****************************************************************************/
+/* POSITION INDEPENDENT CODE						     */
+/*****************************************************************************/
+
+#define PIC_OFFSET_TABLE_REGNUM  12
+
+#define LEGITIMATE_PIC_OPERAND_P(X)  1
+
+/*****************************************************************************/
+/* ASSEMBLER FORMAT							     */
+/*****************************************************************************/
+
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+#undef	USER_LABEL_PREFIX
+#define	USER_LABEL_PREFIX "_"
+
+#undef	ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  asm_fprintf (STREAM, "%U%s", (*targetm.strip_name_encoding) (NAME));
+
+#undef	ASM_APP_ON
+#define ASM_APP_ON   "#APP\n"
+
+#undef	ASM_APP_OFF
+#define ASM_APP_OFF  "#NO_APP\n"
+
+/*****************************************************************************/
+/* INSTRUCTION OUTPUT							     */
+/*****************************************************************************/
+
+#define REGISTER_NAMES \
+  { \
+    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7", \
+    "r8",  "r9",  "r10", "r11", "r12", "r13", "ra",  "sp", \
+    "lo",  "hi",  "cc" \
+  }
+
+#define PRINT_OPERAND(STREAM, X, CODE) \
+  crx_print_operand(STREAM, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(STREAM, ADDR) \
+  crx_print_operand_address(STREAM, ADDR)
+
+/*****************************************************************************/
+/* OUTPUT OF DISPATCH TABLES						     */
+/*****************************************************************************/
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  asm_fprintf ((STREAM), "\t.long\t.L%d\n", (VALUE))
+
+/*****************************************************************************/
+/* ALIGNMENT IN ASSEMBLER FILE						     */
+/*****************************************************************************/
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  asm_fprintf ((STREAM), "\t.align\t%d\n", 1 << (POWER))
+
+/*****************************************************************************/
+/* MISCELLANEOUS PARAMETERS						     */
+/*****************************************************************************/
+
+#define CASE_VECTOR_MODE  Pmode
+
+#define MOVE_MAX 4
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+#define STORE_FLAG_VALUE  1
+
+#define Pmode		SImode
+
+#define FUNCTION_MODE	QImode
+
+#endif /* ! GCC_CRX_H */
diff --git a/gcc/config/crx/crx.md b/gcc/config/crx/crx.md
new file mode 100644
index 000000000..229e345d3
--- /dev/null
+++ b/gcc/config/crx/crx.md
@@ -0,0 +1,899 @@
+;; GCC machine description for CRX.
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003, 2004, 2007
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;;  Register numbers
+
+(define_constants
+  [(SP_REGNUM 15)	; Stack pointer
+   (RA_REGNUM 14)	; Return address
+   (LO_REGNUM 16)	; LO register
+   (HI_REGNUM 17)	; HI register
+   (CC_REGNUM 18)	; Condition code register
+  ]
+)
+
+(define_attr "length" "" ( const_int 6 ))
+
+(define_asm_attributes
+  [(set_attr "length" "6")]
+)
+
+;;  Predicates
+
+(define_predicate "u4bits_operand"
+  (match_code "const_int,const_double")
+  {
+    if (GET_CODE (op) == CONST_DOUBLE)
+      return crx_const_double_ok (op);
+    return (UNSIGNED_INT_FITS_N_BITS(INTVAL(op), 4)) ? 1 : 0;
+  }
+)
+
+(define_predicate "cst4_operand"
+  (and (match_code "const_int")
+       (match_test "INT_CST4(INTVAL(op))")))
+
+(define_predicate "reg_or_u4bits_operand"
+  (ior (match_operand 0 "u4bits_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "reg_or_cst4_operand"
+  (ior (match_operand 0 "cst4_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "reg_or_sym_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "cc_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CC_REGNUM")))
+
+(define_predicate "nosp_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO (op) != SP_REGNUM")))
+
+(define_predicate "store_operand"
+  (and (match_operand 0 "memory_operand")
+       (not (match_operand 0 "push_operand"))))
+
+;;  Mode Macro Definitions
+
+(define_mode_iterator ALLMT [QI HI SI SF DI DF])
+(define_mode_iterator CRXMM [QI HI SI SF])
+(define_mode_iterator CRXIM [QI HI SI])
+(define_mode_iterator DIDFM [DI DF])
+(define_mode_iterator SISFM [SI SF])
+(define_mode_iterator SHORT [QI HI])
+
+(define_mode_attr tIsa [(QI "b") (HI "w") (SI "d") (SF "d")])
+(define_mode_attr lImmArith [(QI "4") (HI "4") (SI "6")])
+(define_mode_attr lImmRotl [(QI "2") (HI "2") (SI "4")])
+(define_mode_attr IJK [(QI "I") (HI "J") (SI "K")])
+(define_mode_attr iF [(QI "i") (HI "i") (SI "i") (DI "i") (SF "F") (DF "F")])
+(define_mode_attr JG [(QI "J") (HI "J") (SI "J") (DI "J") (SF "G") (DF "G")])
+;   In HI or QI mode we push 4 bytes.
+(define_mode_attr pushCnstr [(QI "X") (HI "X") (SI "<") (SF "<") (DI "<") (DF "<")])
+(define_mode_attr tpush [(QI "") (HI "") (SI "") (SF "") (DI "sp, ") (DF "sp, ")])
+(define_mode_attr lpush [(QI "2") (HI "2") (SI "2") (SF "2") (DI "4") (DF "4")])
+
+
+;;  Code Macro Definitions
+
+(define_code_iterator sz_xtnd [sign_extend zero_extend])
+(define_code_attr sIsa [(sign_extend "") (zero_extend "u")])
+(define_code_attr sPat [(sign_extend "s") (zero_extend "u")])
+(define_code_attr szPat [(sign_extend "") (zero_extend "zero_")])
+(define_code_attr szIsa [(sign_extend "s") (zero_extend "z")])
+
+(define_code_iterator sh_oprnd [ashift ashiftrt lshiftrt])
+(define_code_attr shIsa [(ashift "ll") (ashiftrt "ra") (lshiftrt "rl")])
+(define_code_attr shPat [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr")])
+
+(define_code_iterator mima_oprnd [smax umax smin umin])
+(define_code_attr mimaIsa [(smax "maxs") (umax "maxu") (smin "mins") (umin "minu")])
+
+;;  Addition Instructions
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0,0")
+		 (match_operand:DI 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "addd\t%L2, %L1\;addcd\t%H2, %H1"
+  [(set_attr "length" "4,12")]
+)
+
+(define_insn "add<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(plus:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0")
+		    (match_operand:CRXIM 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "add<tIsa>\t%2, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+;;  Subtract Instructions
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,0")
+		  (match_operand:DI 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "subd\t%L2, %L1\;subcd\t%H2, %H1"
+  [(set_attr "length" "4,12")]
+)
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(minus:CRXIM (match_operand:CRXIM 1 "register_operand" "0,0")
+		     (match_operand:CRXIM 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "sub<tIsa>\t%2, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+;;  Multiply Instructions
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(mult:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0")
+		    (match_operand:CRXIM 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "mul<tIsa>\t%2, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+;;  Widening-multiplication Instructions
+
+(define_insn "<sIsa>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+	(mult:DI (sz_xtnd:DI (match_operand:SI 1 "register_operand" "%r"))
+		 (sz_xtnd:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "mull<sPat>d\t%2, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<sIsa>mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sz_xtnd:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (sz_xtnd:SI (match_operand:HI 2 "register_operand" "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "mul<sPat>wd\t%2, %0"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<sIsa>mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sz_xtnd:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (sz_xtnd:HI (match_operand:QI 2 "register_operand" "r"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "mul<sPat>bw\t%2, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  Logical Instructions - and
+
+(define_insn "and<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(and:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0")
+		   (match_operand:CRXIM 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "and<tIsa>\t%2, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+;;  Logical Instructions - or
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(ior:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0")
+		   (match_operand:CRXIM 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "or<tIsa>\t%2, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+;;  Logical Instructions - xor
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(xor:CRXIM (match_operand:CRXIM 1 "register_operand" "%0,0")
+		   (match_operand:CRXIM 2 "nonmemory_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "xor<tIsa>\t%2, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+;;  Sign and Zero Extend Instructions
+
+(define_insn "<szPat>extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sz_xtnd:SI (match_operand:HI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "<szIsa>extwd\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<szPat>extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sz_xtnd:SI (match_operand:QI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "<szIsa>extbd\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<szPat>extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sz_xtnd:HI (match_operand:QI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "<szIsa>extbw\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  Negation Instructions
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r")
+	(neg:CRXIM (match_operand:CRXIM 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "neg<tIsa>\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  Absolute Instructions
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r")
+	(abs:CRXIM (match_operand:CRXIM 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "abs<tIsa>\t%1, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  Max and Min Instructions
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r")
+	(mima_oprnd:CRXIM (match_operand:CRXIM 1 "register_operand"  "%0")
+			  (match_operand:CRXIM 2 "register_operand"  "r")))]
+  ""
+  "<mimaIsa><tIsa>\t%2, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  One's Complement
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r")
+	(not:CRXIM (match_operand:CRXIM 1 "register_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "xor<tIsa>\t$-1, %0"
+  [(set_attr "length" "2")]
+)
+
+;;  Rotate Instructions
+
+(define_insn "rotl<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(rotate:CRXIM (match_operand:CRXIM 1 "register_operand" "0,0")
+		      (match_operand:CRXIM 2 "nonmemory_operand" "r,<IJK>")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  rotl<tIsa>\t%2, %0
+  rot<tIsa>\t%2, %0"
+  [(set_attr "length" "4,<lImmRotl>")]
+)
+
+(define_insn "rotr<mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r")
+	(rotatert:CRXIM (match_operand:CRXIM 1 "register_operand" "0")
+			(match_operand:CRXIM 2 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "rotr<tIsa>\t%2, %0"
+  [(set_attr "length" "4")]
+)
+
+;;  Arithmetic Left and Right Shift Instructions
+
+(define_insn "<shPat><mode>3"
+  [(set (match_operand:CRXIM 0 "register_operand" "=r,r")
+	(sh_oprnd:CRXIM (match_operand:CRXIM 1 "register_operand" "0,0")
+			(match_operand:QI 2 "nonmemory_operand" "r,<IJK>")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s<shIsa><tIsa>\t%2, %0"
+  [(set_attr "length" "2,2")]
+)
+
+;;  Bit Set Instructions
+
+(define_insn "extv"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  ""
+  {
+    static char buf[100];
+    int strpntr;
+    int size = INTVAL (operands[2]);
+    int pos = INTVAL (operands[3]);
+    strpntr = sprintf (buf, "ram\t$%d, $31, $%d, %%1, %%0\;",
+	      BITS_PER_WORD - (size + pos), BITS_PER_WORD - size);
+    sprintf (buf + strpntr, "srad\t$%d, %%0", BITS_PER_WORD - size);
+    return buf;
+  }
+  [(set_attr "length" "6")]
+)
+
+(define_insn "extzv"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  ""
+  {
+    static char buf[40];
+    int size = INTVAL (operands[2]);
+    int pos = INTVAL (operands[3]);
+    sprintf (buf, "ram\t$%d, $%d, $0, %%1, %%0",
+	   (BITS_PER_WORD - pos) % BITS_PER_WORD, size - 1);
+    return buf;
+  }
+  [(set_attr "length" "4")]
+)
+
+(define_insn "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "register_operand" "r"))]
+  ""
+  {
+    static char buf[40];
+    int size = INTVAL (operands[1]);
+    int pos = INTVAL (operands[2]);
+    sprintf (buf, "rim\t$%d, $%d, $%d, %%3, %%0",
+	    pos, size + pos - 1, pos);
+    return buf;
+  }
+  [(set_attr "length" "4")]
+)
+
+;;  Move Instructions
+
+(define_expand "mov<mode>"
+  [(set (match_operand:ALLMT 0 "nonimmediate_operand" "")
+	(match_operand:ALLMT 1 "general_operand" ""))]
+  ""
+  {
+    if (!(reload_in_progress || reload_completed))
+      {
+	if (!register_operand (operands[0], <MODE>mode))
+	  {
+	    if (push_operand (operands[0], <MODE>mode) ?
+		!nosp_reg_operand (operands[1], <MODE>mode) :
+		!reg_or_u4bits_operand (operands[1], <MODE>mode))
+	      {
+		operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
+	      }
+	  }
+      }
+  }
+)
+
+(define_insn "push<mode>_internal"
+  [(set (match_operand:ALLMT 0 "push_operand" "=<pushCnstr>")
+	(match_operand:ALLMT 1 "nosp_reg_operand" "b"))]
+  ""
+  "push\t<tpush>%p1"
+  [(set_attr "length" "<lpush>")]
+)
+
+(define_insn "mov<mode>_regs"
+  [(set (match_operand:SISFM 0 "register_operand" "=r, r, r, k")
+	(match_operand:SISFM 1 "nonmemory_operand" "r, <iF>, k, r"))]
+  ""
+  "@
+  movd\t%1, %0
+  movd\t%1, %0
+  mfpr\t%1, %0
+  mtpr\t%1, %0"
+  [(set_attr "length" "2,6,4,4")]
+)
+
+(define_insn "mov<mode>_regs"
+  [(set (match_operand:DIDFM 0 "register_operand" "=r, r, r, k")
+	(match_operand:DIDFM 1 "nonmemory_operand" "r, <iF>, k, r"))]
+  ""
+  {
+    switch (which_alternative)
+      {
+      case 0: if (REGNO (operands[0]) > REGNO (operands[1]))
+	        return "movd\t%H1, %H0\;movd\t%L1, %L0";
+	      else
+	        return "movd\t%L1, %L0\;movd\t%H1, %H0";
+      case 1: return "movd\t%H1, %H0\;movd\t%L1, %L0";
+      case 2: return "mfpr\t%H1, %H0\;mfpr\t%L1, %L0";
+      case 3: return "mtpr\t%H1, %H0\;mtpr\t%L1, %L0";
+      default: gcc_unreachable ();
+      }
+  }
+  [(set_attr "length" "4,12,8,8")]
+)
+
+(define_insn "mov<mode>_regs" ; no HI/QI mode in HILO regs
+  [(set (match_operand:SHORT 0 "register_operand" "=r, r")
+	(match_operand:SHORT 1 "nonmemory_operand" "r, i"))]
+  ""
+  "mov<tIsa>\t%1, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+(define_insn "mov<mode>_load"
+  [(set (match_operand:CRXMM 0 "register_operand" "=r")
+	(match_operand:CRXMM 1 "memory_operand" "m"))]
+  ""
+  "load<tIsa>\t%1, %0"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "mov<mode>_load"
+  [(set (match_operand:DIDFM 0 "register_operand" "=r")
+	(match_operand:DIDFM 1 "memory_operand" "m"))]
+  ""
+  {
+    rtx first_dest_reg = gen_rtx_REG (SImode, REGNO (operands[0]));
+    if (reg_overlap_mentioned_p (first_dest_reg, operands[1]))
+      return "loadd\t%H1, %H0\;loadd\t%L1, %L0";
+    return "loadd\t%L1, %L0\;loadd\t%H1, %H0";
+  }
+  [(set_attr "length" "12")]
+)
+
+(define_insn "mov<mode>_store"
+  [(set (match_operand:CRXMM 0 "store_operand" "=m, m")
+	(match_operand:CRXMM 1 "reg_or_u4bits_operand" "r, <JG>"))]
+  ""
+  "stor<tIsa>\t%1, %0"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "mov<mode>_store"
+  [(set (match_operand:DIDFM 0 "store_operand" "=m, m")
+	(match_operand:DIDFM 1 "reg_or_u4bits_operand" "r, <JG>"))]
+  ""
+  "stord\t%H1, %H0\;stord\t%L1, %L0"
+  [(set_attr "length" "12")]
+)
+
+;;  Movmem Instruction
+
+(define_expand "movmemsi"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:SI 2 "nonmemory_operand" ""))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+  {
+    if (crx_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
+      DONE;
+    else
+      FAIL;
+  }
+)
+
+;;  Compare and Branch Instructions
+
+(define_insn "cbranchcc4"
+  [(set (pc)
+       (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:CC 1 "cc_reg_operand" "r")
+			(match_operand 2 "cst4_operand" "L")])
+                     (label_ref (match_operand 3 ""))
+                     (pc)))]
+  ""
+  "b%d0\t%l3"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+			[(match_operand:CRXIM 1 "register_operand" "r")
+			 (match_operand:CRXIM 2 "reg_or_cst4_operand" "rL")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "cmpb%d0<tIsa>\t%2, %1, %l3"
+  [(set_attr "length" "6")]
+)
+
+
+;;  Scond Instructions
+
+(define_expand "cstore<mode>4"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:CRXIM 2 "register_operand" "")
+		    (match_operand:CRXIM 3 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	[(reg:CC CC_REGNUM) (const_int 0)]))]
+  ""
+  ""
+)
+
+(define_insn "cmp<mode>_internal"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (match_operand:CRXIM 0 "register_operand" "r,r")
+		    (match_operand:CRXIM 1 "nonmemory_operand" "r,i")))]
+  ""
+  "cmp<tIsa>\t%1, %0"
+  [(set_attr "length" "2,<lImmArith>")]
+)
+
+(define_insn "sCOND_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	  [(reg:CC CC_REGNUM) (const_int 0)]))]
+  ""
+  "s%d1\t%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Jumps and Branches
+
+(define_insn "indirect_jump_return"
+  [(parallel
+    [(set (pc)
+	  (reg:SI RA_REGNUM))
+     (return)])
+  ]
+  "reload_completed"
+  "jump\tra"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "reg_or_sym_operand" "r,i"))]
+  ""
+  "@
+  jump\t%0
+  br\t%a0"
+  [(set_attr "length" "2,6")]
+)
+
+(define_insn "interrupt_return"
+  [(parallel
+    [(unspec_volatile [(const_int 0)] 0)
+     (return)])]
+  ""
+  {
+    return crx_prepare_push_pop_string (1);
+  }
+  [(set_attr "length" "14")]
+)
+
+(define_insn "jump_to_imm"
+  [(set (pc)
+	(match_operand 0 "immediate_operand" "i"))]
+  ""
+  "br\t%c0"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "br\t%l0"
+  [(set_attr "length" "6")]
+)
+
+;;  Function Prologue and Epilogue
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  {
+    crx_expand_prologue ();
+    DONE;
+  }
+)
+
+(define_insn "push_for_prologue"
+  [(parallel
+    [(set (reg:SI SP_REGNUM)
+	  (minus:SI (reg:SI SP_REGNUM)
+		    (match_operand:SI 0 "immediate_operand" "i")))])]
+  "reload_completed"
+  {
+    return crx_prepare_push_pop_string (0);
+  }
+  [(set_attr "length" "4")]
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  {
+    crx_expand_epilogue ();
+    DONE;
+  }
+)
+
+(define_insn "pop_and_popret_return"
+  [(parallel
+    [(set (reg:SI SP_REGNUM)
+	  (plus:SI (reg:SI SP_REGNUM)
+		   (match_operand:SI 0 "immediate_operand" "i")))
+     (use (reg:SI RA_REGNUM))
+     (return)])
+  ]
+  "reload_completed"
+  {
+    return crx_prepare_push_pop_string (1);
+  }
+  [(set_attr "length" "4")]
+)
+
+(define_insn "popret_RA_return"
+  [(parallel
+    [(use (reg:SI RA_REGNUM))
+     (return)])
+  ]
+  "reload_completed"
+  "popret\tra"
+  [(set_attr "length" "2")]
+)
+
+;;  Table Jump
+
+(define_insn "tablejump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+	(use (label_ref:SI (match_operand 1 "" "" )))]
+  ""
+  "jump\t%0"
+  [(set_attr "length" "2")]
+)
+
+;;  Call Instructions
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  ""
+  {
+    emit_call_insn (gen_crx_call (operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_expand "crx_call"
+  [(parallel
+    [(call (match_operand:QI 0 "memory_operand" "")
+	   (match_operand 1 "" ""))
+     (clobber (reg:SI RA_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "crx_call_insn_branch"
+  [(call (mem:QI (match_operand:SI 0 "immediate_operand" "i"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "register_operand" "+r"))]
+  ""
+  "bal\tra, %a0"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "crx_call_insn_jump"
+  [(call (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "register_operand" "+r"))]
+  ""
+  "jal\t%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "crx_call_insn_jalid"
+  [(call (mem:QI (mem:SI (plus:SI
+			   (match_operand:SI 0 "register_operand" "r")
+			   (match_operand:SI 1 "register_operand" "r"))))
+	 (match_operand 2 "" ""))
+   (clobber (match_operand:SI 3 "register_operand" "+r"))]
+  ""
+  "jalid\t%0, %1"
+  [(set_attr "length" "4")]
+)
+
+;;  Call Value Instructions
+
+(define_expand "call_value"
+  [(set (match_operand 0 "general_operand" "")
+	(call (match_operand:QI 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  ""
+  {
+    emit_call_insn (gen_crx_call_value (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "crx_call_value"
+  [(parallel
+    [(set (match_operand 0 "general_operand" "")
+	  (call (match_operand 1 "memory_operand" "")
+		(match_operand 2 "" "")))
+     (clobber (reg:SI RA_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "crx_call_value_insn_branch"
+  [(set (match_operand 0 "" "=g")
+	(call (mem:QI (match_operand:SI 1 "immediate_operand" "i"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "register_operand" "+r"))]
+  ""
+  "bal\tra, %a1"
+  [(set_attr "length" "6")]
+)
+
+(define_insn "crx_call_value_insn_jump"
+  [(set (match_operand 0 "" "=g")
+	(call (mem:QI (match_operand:SI 1 "register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "register_operand" "+r"))]
+  ""
+  "jal\t%1"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "crx_call_value_insn_jalid"
+  [(set (match_operand 0 "" "=g")
+	(call (mem:QI (mem:SI (plus:SI
+				(match_operand:SI 1 "register_operand" "r")
+				(match_operand:SI 2 "register_operand" "r"))))
+	      (match_operand 3 "" "")))
+   (clobber (match_operand:SI 4 "register_operand" "+r"))]
+  ""
+  "jalid\t%0, %1"
+  [(set_attr "length" "4")]
+)
+
+;;  Nop
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  ""
+)
+
+;;  Multiply and Accumulate Instructions
+
+(define_insn "<sPat>madsidi3"
+  [(set (match_operand:DI 0 "register_operand" "+k")
+	(plus:DI
+	  (mult:DI (sz_xtnd:DI (match_operand:SI 1 "register_operand" "%r"))
+		   (sz_xtnd:DI (match_operand:SI 2 "register_operand" "r")))
+	  (match_dup 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_MAC"
+  "mac<sPat>d\t%2, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<sPat>madhisi3"
+  [(set (match_operand:SI 0 "register_operand" "+l")
+	(plus:SI
+	  (mult:SI (sz_xtnd:SI (match_operand:HI 1 "register_operand" "%r"))
+		   (sz_xtnd:SI (match_operand:HI 2 "register_operand" "r")))
+	  (match_dup 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_MAC"
+  "mac<sPat>w\t%2, %1"
+  [(set_attr "length" "4")]
+)
+
+(define_insn "<sPat>madqihi3"
+  [(set (match_operand:HI 0 "register_operand" "+l")
+	(plus:HI
+	  (mult:HI (sz_xtnd:HI (match_operand:QI 1 "register_operand" "%r"))
+		   (sz_xtnd:HI (match_operand:QI 2 "register_operand" "r")))
+	  (match_dup 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_MAC"
+  "mac<sPat>b\t%2, %1"
+  [(set_attr "length" "4")]
+)
+
+;;  Loop Instructions
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))	; iterations; zero if unknown
+   (use (match_operand 2 "" ""))	; max iterations
+   (use (match_operand 3 "" ""))	; loop level
+   (use (match_operand 4 "" ""))]       ; label
+  ""
+  {
+    if (INTVAL (operands[3]) > crx_loop_nesting)
+      FAIL;
+    switch (GET_MODE (operands[0]))
+      {
+      case SImode:
+	emit_jump_insn (gen_doloop_end_si (operands[4], operands[0]));
+	break;
+      case HImode:
+	emit_jump_insn (gen_doloop_end_hi (operands[4], operands[0]));
+	break;
+      case QImode:
+	emit_jump_insn (gen_doloop_end_qi (operands[4], operands[0]));
+	break;
+      default:
+	FAIL;
+      }
+    DONE;
+  }
+)
+
+;   CRX dbnz[bwd] used explicitly (see above) but also by the combiner.
+
+(define_insn "doloop_end_<mode>"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CRXIM 1 "register_operand" "+r,!m")
+			  (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_dup 1) (plus:CRXIM (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:CRXIM 2 "=X,r"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  dbnz<tIsa>\t%1, %l0
+  load<tIsa>\t%1, %2\;add<tIsa>\t$-1, %2\;stor<tIsa>\t%2, %1\;bne\t%l0"
+  [(set_attr "length" "6, 12")]
+)
diff --git a/gcc/config/crx/crx.opt b/gcc/config/crx/crx.opt
new file mode 100644
index 000000000..7ff0be0e3
--- /dev/null
+++ b/gcc/config/crx/crx.opt
@@ -0,0 +1,34 @@
+; Options for the National Semiconductor CRX port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mmac
+Target Report Mask(MAC)
+Support multiply accumulate instructions
+
+mno-push-args
+Target Report RejectNegative Mask(NO_PUSH_ARGS)
+Do not use push to store function arguments
+
+mloop-nesting=
+Common RejectNegative Joined UInteger Var(crx_loop_nesting) Init(12)
+Restrict doloop to the given nesting level
+
+mdebug-addr
+Target RejectNegative Var(TARGET_DEBUG_ADDR) Undocumented
diff --git a/gcc/config/crx/t-crx b/gcc/config/crx/t-crx
new file mode 100644
index 000000000..8bb62c652
--- /dev/null
+++ b/gcc/config/crx/t-crx
@@ -0,0 +1,37 @@
+# CRX Target Makefile
+#
+# Copyright (C) 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Mingw specific compilation fixes
+USE_COLLECT2 =
+STMP_FIXINC =
+
+# Software emulation for integer div and mod
+LIB2FUNCS_EXTRA = $(srcdir)/config/udivmodsi4.c $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c
+
+# Build the floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
diff --git a/gcc/config/darwin-64.c b/gcc/config/darwin-64.c
new file mode 100644
index 000000000..a012e9dbc
--- /dev/null
+++ b/gcc/config/darwin-64.c
@@ -0,0 +1,72 @@
+/* Functions shipped in the ppc64 and x86_64 version of libgcc_s.1.dylib
+   in older Mac OS X versions, preserved for backwards compatibility.
+   Copyright (C) 2006, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#if defined (__ppc64__) || defined (__x86_64__)
+/* Many of these functions have probably never been used by anyone
+   anywhere on these targets, but it's hard to prove this, so they're defined
+   here.  None are actually necessary, as demonstrated below by defining
+   each function using the operation it implements.  */
+
+typedef long DI;
+typedef unsigned long uDI;
+typedef int SI;
+typedef unsigned int uSI;
+typedef int word_type __attribute__ ((mode (__word__)));
+
+DI __ashldi3 (DI x, word_type c);
+DI __ashrdi3 (DI x, word_type c);
+int __clzsi2 (uSI x);
+word_type __cmpdi2 (DI x, DI y);
+int __ctzsi2 (uSI x);
+DI __divdi3 (DI x, DI y);
+uDI __lshrdi3 (uDI x, word_type c);
+DI __moddi3 (DI x, DI y);
+DI __muldi3 (DI x, DI y);
+DI __negdi2 (DI x);
+int __paritysi2 (uSI x);
+int __popcountsi2 (uSI x);
+word_type __ucmpdi2 (uDI x, uDI y);
+uDI __udivdi3 (uDI x, uDI y);
+uDI __udivmoddi4 (uDI x, uDI y, uDI *r);
+uDI __umoddi3 (uDI x, uDI y);
+
+DI __ashldi3 (DI x, word_type c) { return x << c; }
+DI __ashrdi3 (DI x, word_type c) { return x >> c; }
+int __clzsi2 (uSI x) { return __builtin_clz (x); }
+word_type __cmpdi2 (DI x, DI y) { return x < y ? 0 : x == y ? 1 : 2; }
+int __ctzsi2 (uSI x) { return __builtin_ctz (x); }
+DI __divdi3 (DI x, DI y) { return x / y; }
+uDI __lshrdi3 (uDI x, word_type c) { return x >> c; }
+DI __moddi3 (DI x, DI y) { return x % y; }
+DI __muldi3 (DI x, DI y) { return x * y; }
+DI __negdi2 (DI x) { return -x; }
+int __paritysi2 (uSI x) { return __builtin_parity (x); }
+int __popcountsi2 (uSI x) { return __builtin_popcount (x); }
+word_type __ucmpdi2 (uDI x, uDI y) { return x < y ? 0 : x == y ? 1 : 2; }
+uDI __udivdi3 (uDI x, uDI y) { return x / y; }
+uDI __udivmoddi4 (uDI x, uDI y, uDI *r) { *r = x % y; return x / y; }
+uDI __umoddi3 (uDI x, uDI y) { return x % y; }
+
+#endif /* __ppc64__ || __x86_64__ */
diff --git a/gcc/config/darwin-c.c b/gcc/config/darwin-c.c
new file mode 100644
index 000000000..0c713ba7e
--- /dev/null
+++ b/gcc/config/darwin-c.c
@@ -0,0 +1,717 @@
+/* Darwin support needed only by C/C++ frontends.
+   Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "incpath.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-format.h"
+#include "diagnostic-core.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "cppdefault.h"
+#include "prefix.h"
+#include "target.h"
+#include "target-def.h"
+
+/* Pragmas.  */
+
+#define BAD(gmsgid) do { warning (OPT_Wpragmas, gmsgid); return; } while (0)
+#define BAD2(msgid, arg) do { warning (OPT_Wpragmas, msgid, arg); return; } while (0)
+
+static bool using_frameworks = false;
+
+static const char *find_subframework_header (cpp_reader *pfile, const char *header,
+					     cpp_dir **dirp);
+
+typedef struct align_stack
+{
+  int alignment;
+  struct align_stack * prev;
+} align_stack;
+
+static struct align_stack * field_align_stack = NULL;
+
+/* Maintain a small stack of alignments.  This is similar to pragma
+   pack's stack, but simpler.  */
+
+static void
+push_field_alignment (int bit_alignment)
+{
+  align_stack *entry = XNEW (align_stack);
+
+  entry->alignment = maximum_field_alignment;
+  entry->prev = field_align_stack;
+  field_align_stack = entry;
+
+  maximum_field_alignment = bit_alignment;
+}
+
+static void
+pop_field_alignment (void)
+{
+  if (field_align_stack)
+    {
+      align_stack *entry = field_align_stack;
+
+      maximum_field_alignment = entry->alignment;
+      field_align_stack = entry->prev;
+      free (entry);
+    }
+  else
+    error ("too many #pragma options align=reset");
+}
+
+/* Handlers for Darwin-specific pragmas.  */
+
+void
+darwin_pragma_ignore (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  /* Do nothing.  */
+}
+
+/* #pragma options align={mac68k|power|reset} */
+
+void
+darwin_pragma_options (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  const char *arg;
+  tree t, x;
+
+  if (pragma_lex (&t) != CPP_NAME)
+    BAD ("malformed '#pragma options', ignoring");
+  arg = IDENTIFIER_POINTER (t);
+  if (strcmp (arg, "align"))
+    BAD ("malformed '#pragma options', ignoring");
+  if (pragma_lex (&t) != CPP_EQ)
+    BAD ("malformed '#pragma options', ignoring");
+  if (pragma_lex (&t) != CPP_NAME)
+    BAD ("malformed '#pragma options', ignoring");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of '#pragma options'");
+
+  arg = IDENTIFIER_POINTER (t);
+  if (!strcmp (arg, "mac68k"))
+    push_field_alignment (16);
+  else if (!strcmp (arg, "power"))
+    push_field_alignment (0);
+  else if (!strcmp (arg, "reset"))
+    pop_field_alignment ();
+  else
+    BAD ("malformed '#pragma options align={mac68k|power|reset}', ignoring");
+}
+
+/* #pragma unused ([var {, var}*]) */
+
+void
+darwin_pragma_unused (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree decl, x;
+  int tok;
+
+  if (pragma_lex (&x) != CPP_OPEN_PAREN)
+    BAD ("missing '(' after '#pragma unused', ignoring");
+
+  while (1)
+    {
+      tok = pragma_lex (&decl);
+      if (tok == CPP_NAME && decl)
+	{
+	  tree local = lookup_name (decl);
+	  if (local && (TREE_CODE (local) == PARM_DECL
+			|| TREE_CODE (local) == VAR_DECL))
+	    {
+	      TREE_USED (local) = 1;
+	      DECL_READ_P (local) = 1;
+	    }
+	  tok = pragma_lex (&x);
+	  if (tok != CPP_COMMA)
+	    break;
+	}
+    }
+
+  if (tok != CPP_CLOSE_PAREN)
+    BAD ("missing ')' after '#pragma unused', ignoring");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    BAD ("junk at end of '#pragma unused'");
+}
+
+/* Parse the ms_struct pragma.  */
+void
+darwin_pragma_ms_struct (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  const char *arg;
+  tree t;
+
+  if (pragma_lex (&t) != CPP_NAME)
+    BAD ("malformed '#pragma ms_struct', ignoring");
+  arg = IDENTIFIER_POINTER (t);
+
+  if (!strcmp (arg, "on"))
+    darwin_ms_struct = true;
+  else if (!strcmp (arg, "off") || !strcmp (arg, "reset"))
+    darwin_ms_struct = false;
+  else
+    BAD ("malformed '#pragma ms_struct {on|off|reset}', ignoring");
+
+  if (pragma_lex (&t) != CPP_EOF)
+    BAD ("junk at end of '#pragma ms_struct'");
+}
+
+static struct frameworks_in_use {
+  size_t len;
+  const char *name;
+  cpp_dir* dir;
+} *frameworks_in_use;
+static int num_frameworks = 0;
+static int max_frameworks = 0;
+
+
+/* Remember which frameworks have been seen, so that we can ensure
+   that all uses of that framework come from the same framework.  DIR
+   is the place where the named framework NAME, which is of length
+   LEN, was found.  We copy the directory name from NAME, as it will be
+   freed by others.  */
+
+static void
+add_framework (const char *name, size_t len, cpp_dir *dir)
+{
+  char *dir_name;
+  int i;
+  for (i = 0; i < num_frameworks; ++i)
+    {
+      if (len == frameworks_in_use[i].len
+	  && strncmp (name, frameworks_in_use[i].name, len) == 0)
+	{
+	  return;
+	}
+    }
+  if (i >= max_frameworks)
+    {
+      max_frameworks = i*2;
+      max_frameworks += i == 0;
+      frameworks_in_use = XRESIZEVEC (struct frameworks_in_use,
+				      frameworks_in_use, max_frameworks);
+    }
+  dir_name = XNEWVEC (char, len + 1);
+  memcpy (dir_name, name, len);
+  dir_name[len] = '\0';
+  frameworks_in_use[num_frameworks].name = dir_name;
+  frameworks_in_use[num_frameworks].len = len;
+  frameworks_in_use[num_frameworks].dir = dir;
+  ++num_frameworks;
+}
+
+/* Recall if we have seen the named framework NAME, before, and where
+   we saw it.  NAME is LEN bytes long.  The return value is the place
+   where it was seen before.  */
+
+static struct cpp_dir*
+find_framework (const char *name, size_t len)
+{
+  int i;
+  for (i = 0; i < num_frameworks; ++i)
+    {
+      if (len == frameworks_in_use[i].len
+	  && strncmp (name, frameworks_in_use[i].name, len) == 0)
+	{
+	  return frameworks_in_use[i].dir;
+	}
+    }
+  return 0;
+}
+
+/* There are two directories in a framework that contain header files,
+   Headers and PrivateHeaders.  We search Headers first as it is more
+   common to upgrade a header from PrivateHeaders to Headers and when
+   that is done, the old one might hang around and be out of data,
+   causing grief.  */
+
+struct framework_header {const char * dirName; int dirNameLen; };
+static struct framework_header framework_header_dirs[] = {
+  { "Headers", 7 },
+  { "PrivateHeaders", 14 },
+  { NULL, 0 }
+};
+
+/* Returns a pointer to a malloced string that contains the real pathname
+   to the file, given the base name and the name.  */
+
+static char *
+framework_construct_pathname (const char *fname, cpp_dir *dir)
+{
+  char *buf;
+  size_t fname_len, frname_len;
+  cpp_dir *fast_dir;
+  char *frname;
+  struct stat st;
+  int i;
+
+  /* Framework names must have a / in them.  */
+  buf = strchr (fname, '/');
+  if (buf)
+    fname_len = buf - fname;
+  else
+    return 0;
+
+  fast_dir = find_framework (fname, fname_len);
+
+  /* Framework includes must all come from one framework.  */
+  if (fast_dir && dir != fast_dir)
+    return 0;
+
+  frname = XNEWVEC (char, strlen (fname) + dir->len + 2
+		    + strlen(".framework/") + strlen("PrivateHeaders"));
+  strncpy (&frname[0], dir->name, dir->len);
+  frname_len = dir->len;
+  if (frname_len && frname[frname_len-1] != '/')
+    frname[frname_len++] = '/';
+  strncpy (&frname[frname_len], fname, fname_len);
+  frname_len += fname_len;
+  strncpy (&frname[frname_len], ".framework/", strlen (".framework/"));
+  frname_len += strlen (".framework/");
+
+  if (fast_dir == 0)
+    {
+      frname[frname_len-1] = 0;
+      if (stat (frname, &st) == 0)
+	{
+	  /* As soon as we find the first instance of the framework,
+	     we stop and never use any later instance of that
+	     framework.  */
+	  add_framework (fname, fname_len, dir);
+	}
+      else
+	{
+	  /* If we can't find the parent directory, no point looking
+	     further.  */
+	  free (frname);
+	  return 0;
+	}
+      frname[frname_len-1] = '/';
+    }
+
+  /* Append framework_header_dirs and header file name */
+  for (i = 0; framework_header_dirs[i].dirName; i++)
+    {
+      strncpy (&frname[frname_len],
+	       framework_header_dirs[i].dirName,
+	       framework_header_dirs[i].dirNameLen);
+      strcpy (&frname[frname_len + framework_header_dirs[i].dirNameLen],
+	      &fname[fname_len]);
+
+      if (stat (frname, &st) == 0)
+	return frname;
+    }
+
+  free (frname);
+  return 0;
+}
+
+/* Search for FNAME in sub-frameworks.  pname is the context that we
+   wish to search in.  Return the path the file was found at,
+   otherwise return 0.  */
+
+static const char*
+find_subframework_file (const char *fname, const char *pname)
+{
+  char *sfrname;
+  const char *dot_framework = ".framework/";
+  char *bufptr;
+  int sfrname_len, i, fname_len;
+  struct cpp_dir *fast_dir;
+  static struct cpp_dir subframe_dir;
+  struct stat st;
+
+  bufptr = strchr (fname, '/');
+
+  /* Subframework files must have / in the name.  */
+  if (bufptr == 0)
+    return 0;
+
+  fname_len = bufptr - fname;
+  fast_dir = find_framework (fname, fname_len);
+
+  /* Sub framework header filename includes parent framework name and
+     header name in the "CarbonCore/OSUtils.h" form. If it does not
+     include slash it is not a sub framework include.  */
+  bufptr = strstr (pname, dot_framework);
+
+  /* If the parent header is not of any framework, then this header
+     cannot be part of any subframework.  */
+  if (!bufptr)
+    return 0;
+
+  /* Now translate. For example,                  +- bufptr
+     fname = CarbonCore/OSUtils.h                 |
+     pname = /System/Library/Frameworks/Foundation.framework/Headers/Foundation.h
+     into
+     sfrname = /System/Library/Frameworks/Foundation.framework/Frameworks/CarbonCore.framework/Headers/OSUtils.h */
+
+  sfrname = XNEWVEC (char, strlen (pname) + strlen (fname) + 2 +
+			      strlen ("Frameworks/") + strlen (".framework/")
+			      + strlen ("PrivateHeaders"));
+
+  bufptr += strlen (dot_framework);
+
+  sfrname_len = bufptr - pname;
+
+  strncpy (&sfrname[0], pname, sfrname_len);
+
+  strncpy (&sfrname[sfrname_len], "Frameworks/", strlen ("Frameworks/"));
+  sfrname_len += strlen("Frameworks/");
+
+  strncpy (&sfrname[sfrname_len], fname, fname_len);
+  sfrname_len += fname_len;
+
+  strncpy (&sfrname[sfrname_len], ".framework/", strlen (".framework/"));
+  sfrname_len += strlen (".framework/");
+
+  /* Append framework_header_dirs and header file name */
+  for (i = 0; framework_header_dirs[i].dirName; i++)
+    {
+      strncpy (&sfrname[sfrname_len],
+	       framework_header_dirs[i].dirName,
+	       framework_header_dirs[i].dirNameLen);
+      strcpy (&sfrname[sfrname_len + framework_header_dirs[i].dirNameLen],
+	      &fname[fname_len]);
+
+      if (stat (sfrname, &st) == 0)
+	{
+	  if (fast_dir != &subframe_dir)
+	    {
+	      if (fast_dir)
+		warning (0, "subframework include %s conflicts with framework include",
+			 fname);
+	      else
+		add_framework (fname, fname_len, &subframe_dir);
+	    }
+
+	  return sfrname;
+	}
+    }
+  free (sfrname);
+
+  return 0;
+}
+
+/* Add PATH to the system includes. PATH must be malloc-ed and
+   NUL-terminated.  System framework paths are C++ aware.  */
+
+static void
+add_system_framework_path (char *path)
+{
+  int cxx_aware = 1;
+  cpp_dir *p;
+
+  p = XNEW (cpp_dir);
+  p->next = NULL;
+  p->name = path;
+  p->sysp = 1 + !cxx_aware;
+  p->construct = framework_construct_pathname;
+  using_frameworks = 1;
+
+  add_cpp_dir_path (p, SYSTEM);
+}
+
+/* Add PATH to the bracket includes. PATH must be malloc-ed and
+   NUL-terminated.  */
+
+void
+add_framework_path (char *path)
+{
+  cpp_dir *p;
+
+  p = XNEW (cpp_dir);
+  p->next = NULL;
+  p->name = path;
+  p->sysp = 0;
+  p->construct = framework_construct_pathname;
+  using_frameworks = 1;
+
+  add_cpp_dir_path (p, BRACKET);
+}
+
+static const char *framework_defaults [] =
+  {
+    "/System/Library/Frameworks",
+    "/Library/Frameworks",
+  };
+
+/* Register the GNU objective-C runtime include path if STDINC.  */
+
+void
+darwin_register_objc_includes (const char *sysroot, const char *iprefix,
+			       int stdinc)
+{
+  const char *fname;
+  size_t len;
+  /* We do not do anything if we do not want the standard includes. */
+  if (!stdinc)
+    return;
+
+  fname = GCC_INCLUDE_DIR "-gnu-runtime";
+
+  /* Register the GNU OBJC runtime include path if we are compiling  OBJC
+    with GNU-runtime.  */
+
+  if (c_dialect_objc () && !flag_next_runtime)
+    {
+      char *str;
+      /* See if our directory starts with the standard prefix.
+	 "Translate" them, i.e. replace /usr/local/lib/gcc... with
+	 IPREFIX and search them first.  */
+      if (iprefix && (len = cpp_GCC_INCLUDE_DIR_len) != 0 && !sysroot
+	  && !strncmp (fname, cpp_GCC_INCLUDE_DIR, len))
+	{
+	  str = concat (iprefix, fname + len, NULL);
+          /* FIXME: wrap the headers for C++awareness.  */
+	  add_path (str, SYSTEM, /*c++aware=*/false, false);
+	}
+
+      /* Should this directory start with the sysroot?  */
+      if (sysroot)
+	str = concat (sysroot, fname, NULL);
+      else
+	str = update_path (fname, "");
+
+      add_path (str, SYSTEM, /*c++aware=*/false, false);
+    }
+}
+
+
+/* Register all the system framework paths if STDINC is true and setup
+   the missing_header callback for subframework searching if any
+   frameworks had been registered.  */
+
+void
+darwin_register_frameworks (const char *sysroot,
+			    const char *iprefix ATTRIBUTE_UNUSED, int stdinc)
+{
+  if (stdinc)
+    {
+      size_t i;
+
+      /* Setup default search path for frameworks.  */
+      for (i=0; i<sizeof (framework_defaults)/sizeof(const char *); ++i)
+	{
+	  char *str;
+	  if (sysroot)
+	    str = concat (sysroot, xstrdup (framework_defaults [i]), NULL);
+	  else
+	    str = xstrdup (framework_defaults[i]);
+	  /* System Framework headers are cxx aware.  */
+	  add_system_framework_path (str);
+	}
+    }
+
+  if (using_frameworks)
+    cpp_get_callbacks (parse_in)->missing_header = find_subframework_header;
+}
+
+/* Search for HEADER in context dependent way.  The return value is
+   the malloced name of a header to try and open, if any, or NULL
+   otherwise.  This is called after normal header lookup processing
+   fails to find a header.  We search each file in the include stack,
+   using FUNC, starting from the most deeply nested include and
+   finishing with the main input file.  We stop searching when FUNC
+   returns nonzero.  */
+
+static const char*
+find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp)
+{
+  const char *fname = header;
+  struct cpp_buffer *b;
+  const char *n;
+
+  for (b = cpp_get_buffer (pfile);
+       b && cpp_get_file (b) && cpp_get_path (cpp_get_file (b));
+       b = cpp_get_prev (b))
+    {
+      n = find_subframework_file (fname, cpp_get_path (cpp_get_file (b)));
+      if (n)
+	{
+	  /* Logically, the place where we found the subframework is
+	     the place where we found the Framework that contains the
+	     subframework.  This is useful for tracking wether or not
+	     we are in a system header.  */
+	  *dirp = cpp_get_dir (cpp_get_file (b));
+	  return n;
+	}
+    }
+
+  return 0;
+}
+
+/* Return the value of darwin_macosx_version_min suitable for the
+   __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro,
+   so '10.4.2' becomes 1040.  The lowest digit is always zero.
+   Print a warning if the version number can't be understood.  */
+static const char *
+version_as_macro (void)
+{
+  static char result[] = "1000";
+
+  if (strncmp (darwin_macosx_version_min, "10.", 3) != 0)
+    goto fail;
+  if (! ISDIGIT (darwin_macosx_version_min[3]))
+    goto fail;
+  result[2] = darwin_macosx_version_min[3];
+  if (darwin_macosx_version_min[4] != '\0'
+      && darwin_macosx_version_min[4] != '.')
+    goto fail;
+
+  return result;
+
+ fail:
+  error ("unknown value %qs of -mmacosx-version-min",
+	 darwin_macosx_version_min);
+  return "1000";
+}
+
+/* Define additional CPP flags for Darwin.   */
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+
+void
+darwin_cpp_builtins (cpp_reader *pfile)
+{
+  builtin_define ("__MACH__");
+  builtin_define ("__APPLE__");
+
+  /* __APPLE_CC__ is defined as some old Apple include files expect it
+     to be defined and won't work if it isn't.  */
+  builtin_define_with_value ("__APPLE_CC__", "1", false);
+
+  if (darwin_constant_cfstrings)
+    builtin_define ("__CONSTANT_CFSTRINGS__");
+
+  builtin_define_with_value ("__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__",
+			     version_as_macro(), false);
+
+  /* Since we do not (at 4.6) support ObjC gc for the NeXT runtime, the
+     following will cause a syntax error if one tries to compile gc attributed
+     items.  However, without this, NeXT system headers cannot be parsed 
+     properly (on systems >= darwin 9).  */
+  if (flag_objc_gc)
+    {
+      builtin_define ("__strong=__attribute__((objc_gc(strong)))");
+      builtin_define ("__weak=__attribute__((objc_gc(weak)))");
+      builtin_define ("__OBJC_GC__");
+    }
+  else
+    {
+      builtin_define ("__strong=");
+      builtin_define ("__weak=");
+    }
+
+  if (flag_objc_abi == 2)
+    builtin_define ("__OBJC2__");
+}
+
+/* Handle C family front-end options.  */
+
+static bool
+handle_c_option (size_t code,
+		 const char *arg,
+		 int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    default:
+      /* Unrecognized options that we said we'd handle turn into
+	 errors if not listed here.  */
+      return false;
+
+    case OPT_iframework:
+      add_system_framework_path (xstrdup (arg));
+      break;
+
+    case OPT_fapple_kext:
+      ;
+    }
+
+  /* We recognized the option.  */
+  return true;
+}
+
+#undef TARGET_HANDLE_C_OPTION
+#define TARGET_HANDLE_C_OPTION handle_c_option
+
+struct gcc_targetcm targetcm = TARGETCM_INITIALIZER;
+
+/* Allow ObjC* access to CFStrings.  */
+tree
+darwin_objc_construct_string (tree str)
+{
+  if (!darwin_constant_cfstrings)
+    {
+    /* Even though we are not using CFStrings, place our literal
+       into the cfstring_htab hash table, so that the
+       darwin_constant_cfstring_p() function will see it.  */
+      darwin_enter_string_into_cfstring_table (str);
+      /* Fall back to NSConstantString.  */
+      return NULL_TREE;
+    }
+
+  return darwin_build_constant_cfstring (str);
+}
+
+/* The string ref type is created as CFStringRef by <CFBase.h> therefore, we
+   must match for it explicitly, since it's outside the gcc code.  */
+
+bool
+darwin_cfstring_ref_p (const_tree strp)
+{
+  tree tn;
+  if (!strp || TREE_CODE (strp) != POINTER_TYPE)
+    return false;
+
+  tn = TYPE_NAME (strp);
+  if (tn) 
+    tn = DECL_NAME (tn);
+  return (tn 
+	  && IDENTIFIER_POINTER (tn)
+	  && !strncmp (IDENTIFIER_POINTER (tn), "CFStringRef", 8));
+}
+
+/* At present the behavior of this is undefined and it does nothing.  */
+void
+darwin_check_cfstring_format_arg (tree ARG_UNUSED (format_arg), 
+				  tree ARG_UNUSED (args_list))
+{
+}
+
+/* The extra format types we recognize.  */
+EXPORTED_CONST format_kind_info darwin_additional_format_types[] = {
+  { "CFString",   NULL,  NULL, NULL, NULL, 
+    NULL, NULL, 
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0,
+    NULL, NULL
+  }
+};
diff --git a/gcc/config/darwin-crt2.c b/gcc/config/darwin-crt2.c
new file mode 100644
index 000000000..f4a584a8f
--- /dev/null
+++ b/gcc/config/darwin-crt2.c
@@ -0,0 +1,153 @@
+/* KeyMgr backwards-compatibility support for Darwin.
+   Copyright (C) 2001, 2002, 2004, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* It is incorrect to include config.h here, because this file is being
+   compiled for the target, and hence definitions concerning only the host
+   do not apply.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+
+/* This file doesn't do anything useful on non-powerpc targets, since they
+   don't have backwards compatibility anyway.  */
+
+#ifdef __ppc__
+
+/* Homemade decls substituting for getsect.h and dyld.h, so cross
+   compilation works.  */
+struct mach_header;
+extern char *getsectdatafromheader (struct mach_header *, const char *,
+				    const char *, unsigned long *);
+extern void _dyld_register_func_for_add_image
+  (void (*) (struct mach_header *, unsigned long));
+extern void _dyld_register_func_for_remove_image
+  (void (*) (struct mach_header *, unsigned long));
+
+extern void __darwin_gcc3_preregister_frame_info (void);
+
+/* These are from "keymgr.h".  */
+extern void _init_keymgr (void);
+extern void *_keymgr_get_and_lock_processwide_ptr (unsigned key);
+extern void _keymgr_set_and_unlock_processwide_ptr (unsigned key, void *ptr);
+
+extern void *__keymgr_global[];
+typedef struct _Sinfo_Node {
+        unsigned int size ;             /*size of this node*/
+        unsigned short major_version ;  /*API major version.*/
+        unsigned short minor_version ;  /*API minor version.*/
+        } _Tinfo_Node ;
+
+/* KeyMgr 3.x is the first one supporting GCC3 stuff natively.  */
+#define KEYMGR_API_MAJOR_GCC3           3       
+/* ... with these keys.  */
+#define KEYMGR_GCC3_LIVE_IMAGE_LIST	301     /* loaded images  */
+#define KEYMGR_GCC3_DW2_OBJ_LIST	302     /* Dwarf2 object list  */   
+
+/* Node of KEYMGR_GCC3_LIVE_IMAGE_LIST.  Info about each resident image.  */
+struct live_images {
+  unsigned long this_size;                      /* sizeof (live_images)  */
+  struct mach_header *mh;                       /* the image info  */
+  unsigned long vm_slide;
+  void (*destructor)(struct live_images *);     /* destructor for this  */
+  struct live_images *next;
+  unsigned int examined_p;
+  void *fde;
+  void *object_info;
+  unsigned long info[2];                        /* Future use.  */
+};
+
+
+/* These routines are used only on Darwin versions before 10.2.
+   Later versions have equivalent code in the system.  
+   Eventually, they might go away, although it might be a long time...  */
+
+static void darwin_unwind_dyld_remove_image_hook 
+  (struct mach_header *m, unsigned long s);
+static void darwin_unwind_dyld_remove_image_hook 
+  (struct mach_header *m, unsigned long s);
+extern void __darwin_gcc3_preregister_frame_info (void);
+     
+static void
+darwin_unwind_dyld_add_image_hook (struct mach_header *mh, unsigned long slide)
+{
+  struct live_images *l = (struct live_images *)calloc (1, sizeof (*l));
+  l->mh = mh;
+  l->vm_slide = slide;
+  l->this_size = sizeof (*l);
+  l->next = (struct live_images *)
+	_keymgr_get_and_lock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST);
+  _keymgr_set_and_unlock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST, l);
+}
+
+static void
+darwin_unwind_dyld_remove_image_hook (struct mach_header *m, unsigned long s)
+{
+  struct live_images *top, **lip, *destroy = NULL;
+
+  /* Look for it in the list of live images and delete it.  */
+
+  top = (struct live_images *)
+	   _keymgr_get_and_lock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST);
+  for (lip = &top; *lip != NULL; lip = &(*lip)->next)
+    {
+      if ((*lip)->mh == m && (*lip)->vm_slide == s)
+        {
+          destroy = *lip;
+          *lip = destroy->next;			/* unlink DESTROY  */
+
+          if (destroy->this_size != sizeof (*destroy))	/* sanity check  */
+            abort ();
+
+          break;
+        }
+    }
+  _keymgr_set_and_unlock_processwide_ptr (KEYMGR_GCC3_LIVE_IMAGE_LIST, top);
+
+  /* Now that we have unlinked this from the image list, toss it.  */
+  if (destroy != NULL)
+    {
+      if (destroy->destructor != NULL)
+	(*destroy->destructor) (destroy);
+      free (destroy);
+    }
+}
+
+void
+__darwin_gcc3_preregister_frame_info (void)
+{
+  const _Tinfo_Node *info;
+  _init_keymgr ();
+  info = (_Tinfo_Node *)__keymgr_global[2];
+  if (info != NULL)
+    {
+      if (info->major_version >= KEYMGR_API_MAJOR_GCC3)
+	return;
+      /* Otherwise, use our own add_image_hooks.  */
+    }
+
+  _dyld_register_func_for_add_image (darwin_unwind_dyld_add_image_hook);
+  _dyld_register_func_for_remove_image (darwin_unwind_dyld_remove_image_hook);
+}
+
+#endif  /* __ppc__ */
diff --git a/gcc/config/darwin-crt3.c b/gcc/config/darwin-crt3.c
new file mode 100644
index 000000000..9b64f2aa8
--- /dev/null
+++ b/gcc/config/darwin-crt3.c
@@ -0,0 +1,532 @@
+/* __cxa_atexit backwards-compatibility support for Darwin.
+   Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Don't do anything if we are compiling for a kext multilib. */
+#ifdef __PIC__
+
+/* It is incorrect to include config.h here, because this file is being
+   compiled for the target, and hence definitions concerning only the host
+   do not apply.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+
+#include <dlfcn.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* This file works around two different problems.
+
+   The first problem is that there is no __cxa_atexit on Mac OS versions
+   before 10.4.  It fixes this by providing a complete atexit and
+   __cxa_atexit emulation called from the regular atexit.
+
+   The second problem is that on all shipping versions of Mac OS,
+   __cxa_finalize and exit() don't work right: they don't run routines
+   that were registered while other atexit routines are running.  This
+   is worked around by wrapping each atexit/__cxa_atexit routine with
+   our own routine which ensures that any __cxa_atexit calls while it
+   is running are honoured.
+
+   There are still problems which this does not solve.  Before 10.4,
+   shared objects linked with previous compilers won't have their
+   atexit calls properly interleaved with code compiled with newer
+   compilers.  Also, atexit routines registered from shared objects
+   linked with previous compilers won't get the bug fix.  */
+
+typedef int (*cxa_atexit_p)(void (*func) (void*), void* arg, const void* dso);
+typedef void (*cxa_finalize_p)(const void *dso);
+typedef int (*atexit_p)(void (*func)(void));
+
+/* These are from "keymgr.h".  */
+extern void *_keymgr_get_and_lock_processwide_ptr (unsigned key);
+extern int _keymgr_get_and_lock_processwide_ptr_2 (unsigned, void **);
+extern int _keymgr_set_and_unlock_processwide_ptr (unsigned key, void *ptr);
+
+extern void *__keymgr_global[];
+typedef struct _Sinfo_Node {
+        unsigned int size ;             /*size of this node*/
+        unsigned short major_version ;  /*API major version.*/
+        unsigned short minor_version ;  /*API minor version.*/
+        } _Tinfo_Node ;
+
+#ifdef __ppc__
+#define CHECK_KEYMGR_ERROR(e) \
+  (((_Tinfo_Node *)__keymgr_global[2])->major_version >= 4 ? (e) : 0)
+#else
+#define CHECK_KEYMGR_ERROR(e) (e)
+#endif
+
+/* Our globals are stored under this keymgr index.  */
+#define KEYMGR_ATEXIT_LIST	14
+
+/* The different kinds of callback routines.  */
+typedef void (*atexit_callback)(void);
+typedef void (*cxa_atexit_callback)(void *);
+
+/* This structure holds a routine to call.  There may be extra fields
+   at the end of the structure that this code doesn't know about.  */
+struct one_atexit_routine 
+{
+  union {
+    atexit_callback ac;
+    cxa_atexit_callback cac;
+  } callback;
+  /* has_arg is 0/2/4 if 'ac' is live, 1/3/5 if 'cac' is live.  
+     Higher numbers indicate a later version of the structure that this
+     code doesn't understand and will ignore.  */
+  int has_arg;
+  void * arg;
+};
+
+struct atexit_routine_list
+{
+  struct atexit_routine_list * next;
+  struct one_atexit_routine r;
+};
+
+/* The various possibilities for status of atexit().  */
+enum atexit_status {
+  atexit_status_unknown = 0,
+  atexit_status_missing = 1,
+  atexit_status_broken = 2,
+  atexit_status_working = 16
+};
+
+struct keymgr_atexit_list
+{
+  /* Version of this list.  This code knows only about version 0.
+     If the version is higher than 0, this code may add new atexit routines
+     but should not attempt to run the list.  */
+  short version;
+  /* 1 if an atexit routine is currently being run by this code, 0
+     otherwise.  */
+  char running_routines;
+  /* Holds a value from 'enum atexit_status'.  */
+  unsigned char atexit_status;
+  /* The list of atexit and cxa_atexit routines registered.  If
+   atexit_status_missing it contains all routines registered while
+   linked with this code.  If atexit_status_broken it contains all
+   routines registered during cxa_finalize while linked with this
+   code.  */
+  struct atexit_routine_list *l;
+  /* &__cxa_atexit; set if atexit_status >= atexit_status_broken.  */
+  cxa_atexit_p cxa_atexit_f;
+  /* &__cxa_finalize; set if atexit_status >= atexit_status_broken.  */
+  cxa_finalize_p cxa_finalize_f;
+  /* &atexit; set if atexit_status >= atexit_status_working
+     or atexit_status == atexit_status_missing.  */
+  atexit_p atexit_f;
+};
+
+/* Return 0 if __cxa_atexit has the bug it has in Mac OS 10.4: it
+   fails to call routines registered while an atexit routine is
+   running.  Return 1 if it works properly, and -1 if an error occurred.  */
+
+struct atexit_data 
+{
+  int result;
+  cxa_atexit_p cxa_atexit;
+};
+
+static void cxa_atexit_check_2 (void *arg)
+{
+  ((struct atexit_data *)arg)->result = 1;
+}
+
+static void cxa_atexit_check_1 (void *arg)
+{
+  struct atexit_data * aed = arg;
+  if (aed->cxa_atexit (cxa_atexit_check_2, arg, arg) != 0)
+    aed->result = -1;
+}
+
+static int
+check_cxa_atexit (cxa_atexit_p cxa_atexit, cxa_finalize_p cxa_finalize)
+{
+  struct atexit_data aed = { 0, cxa_atexit };
+
+  /* We re-use &aed as the 'dso' parameter, since it's a unique address.  */
+  if (cxa_atexit (cxa_atexit_check_1, &aed, &aed) != 0)
+    return -1;
+  cxa_finalize (&aed);
+  if (aed.result == 0)
+    {
+      /* Call __cxa_finalize again to make sure that cxa_atexit_check_2
+	 is removed from the list before AED goes out of scope.  */
+      cxa_finalize (&aed);
+      aed.result = 0;
+    }
+  return aed.result;
+}
+
+#ifdef __ppc__
+/* This comes from Csu.  It works only before 10.4.  The prototype has
+   been altered a bit to avoid casting.  */
+extern int _dyld_func_lookup(const char *dyld_func_name,
+     void *address) __attribute__((visibility("hidden")));
+
+static void our_atexit (void);
+
+/* We're running on 10.3.9.  Find the address of the system atexit()
+   function.  So easy to say, so hard to do.  */
+static atexit_p
+find_atexit_10_3 (void)
+{
+  unsigned int (*dyld_image_count_fn)(void);
+  const char *(*dyld_get_image_name_fn)(unsigned int image_index);
+  const void *(*dyld_get_image_header_fn)(unsigned int image_index);
+  const void *(*NSLookupSymbolInImage_fn)(const void *image, 
+					  const char *symbolName,
+					  unsigned int options);
+  void *(*NSAddressOfSymbol_fn)(const void *symbol);
+  unsigned i, count;
+  
+  /* Find some dyld functions.  */
+  _dyld_func_lookup("__dyld_image_count", &dyld_image_count_fn);
+  _dyld_func_lookup("__dyld_get_image_name", &dyld_get_image_name_fn);
+  _dyld_func_lookup("__dyld_get_image_header", &dyld_get_image_header_fn);
+  _dyld_func_lookup("__dyld_NSLookupSymbolInImage", &NSLookupSymbolInImage_fn);
+  _dyld_func_lookup("__dyld_NSAddressOfSymbol", &NSAddressOfSymbol_fn);
+
+  /* If any of these don't exist, that's an error.  */
+  if (! dyld_image_count_fn || ! dyld_get_image_name_fn
+      || ! dyld_get_image_header_fn || ! NSLookupSymbolInImage_fn
+      || ! NSAddressOfSymbol_fn)
+    return NULL;
+  
+  count = dyld_image_count_fn ();
+  for (i = 0; i < count; i++)
+    {
+      const char * path = dyld_get_image_name_fn (i);
+      const void * image;
+      const void * symbol;
+      
+      if (strcmp (path, "/usr/lib/libSystem.B.dylib") != 0)
+	continue;
+      image = dyld_get_image_header_fn (i);
+      if (! image)
+	return NULL;
+      /* '4' is NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR.  */
+      symbol = NSLookupSymbolInImage_fn (image, "_atexit", 4);
+      if (! symbol)
+	return NULL;
+      return NSAddressOfSymbol_fn (symbol);
+    }
+  return NULL;
+}
+#endif
+
+/* Create (if necessary), find, lock, fill in, and return our globals.  
+   Return NULL on error, in which case the globals will not be locked.  
+   The caller should call keymgr_set_and_unlock.  */
+static struct keymgr_atexit_list *
+get_globals (void)
+{
+  struct keymgr_atexit_list * r;
+  
+#ifdef __ppc__
+  /* 10.3.9 doesn't have _keymgr_get_and_lock_processwide_ptr_2 so the
+     PPC side can't use it.  On 10.4 this just means the error gets
+     reported a little later when
+     _keymgr_set_and_unlock_processwide_ptr finds that the key was
+     never locked.  */
+  r = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST);
+#else
+  void * rr;
+  if (_keymgr_get_and_lock_processwide_ptr_2 (KEYMGR_ATEXIT_LIST, &rr))
+    return NULL;
+  r = rr;
+#endif
+  
+  if (r == NULL)
+    {
+      r = calloc (sizeof (struct keymgr_atexit_list), 1);
+      if (! r)
+	return NULL;
+    }
+
+  if (r->atexit_status == atexit_status_unknown)
+    {
+      void *handle;
+
+      handle = dlopen ("/usr/lib/libSystem.B.dylib", RTLD_NOLOAD);
+      if (!handle)
+	{
+#ifdef __ppc__
+	  r->atexit_status = atexit_status_missing;
+	  r->atexit_f = find_atexit_10_3 ();
+	  if (! r->atexit_f)
+	    goto error;
+	  if (r->atexit_f (our_atexit))
+	    goto error;
+#else
+	  goto error;
+#endif
+	}
+      else
+	{
+	  int chk_result;
+
+	  r->cxa_atexit_f = (cxa_atexit_p)dlsym (handle, "__cxa_atexit");
+	  r->cxa_finalize_f = (cxa_finalize_p)dlsym (handle, "__cxa_finalize");
+	  if (! r->cxa_atexit_f || ! r->cxa_finalize_f)
+	    goto error;
+
+	  chk_result = check_cxa_atexit (r->cxa_atexit_f, r->cxa_finalize_f);
+	  if (chk_result == -1)
+	    goto error;
+	  else if (chk_result == 0)
+	    r->atexit_status = atexit_status_broken;
+	  else
+	    {
+	      r->atexit_f = (atexit_p)dlsym (handle, "atexit");
+	      if (! r->atexit_f)
+		goto error;
+	      r->atexit_status = atexit_status_working;
+	    }
+	}
+    }
+
+  return r;
+  
+ error:
+  _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, r);
+  return NULL;
+}
+
+/* Add TO_ADD to ATEXIT_LIST.  ATEXIT_LIST may be NULL but is
+   always the result of calling _keymgr_get_and_lock_processwide_ptr and
+   so KEYMGR_ATEXIT_LIST is known to be locked; this routine is responsible
+   for unlocking it.  */
+
+static int
+add_routine (struct keymgr_atexit_list * g,
+	     const struct one_atexit_routine * to_add)
+{
+  struct atexit_routine_list * s
+    = malloc (sizeof (struct atexit_routine_list));
+  int result;
+  
+  if (!s)
+    {
+      _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+      return -1;
+    }
+  s->r = *to_add;
+  s->next = g->l;
+  g->l = s;
+  result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+  return CHECK_KEYMGR_ERROR (result) == 0 ? 0 : -1;
+}
+
+/* This runs the routines in G->L up to STOP.  */
+static struct keymgr_atexit_list *
+run_routines (struct keymgr_atexit_list *g,
+	      struct atexit_routine_list *stop)
+{
+  for (;;)
+    {
+      struct atexit_routine_list * cur = g->l;
+      if (! cur || cur == stop)
+	break;
+      g->l = cur->next;
+      _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+
+      switch (cur->r.has_arg) {
+      case 0: case 2: case 4:
+	cur->r.callback.ac ();
+	break;
+      case 1: case 3: case 5:
+	cur->r.callback.cac (cur->r.arg);
+	break;
+      default:
+	/* Don't understand, so don't call it.  */
+	break;
+      }
+      free (cur);
+
+      g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST);
+      if (! g)
+	break;
+    }
+  return g;
+}
+
+/* Call the routine described by ROUTINE_PARAM and then call any
+   routines added to KEYMGR_ATEXIT_LIST while that routine was
+   running, all with in_cxa_finalize set.  */
+
+static void
+cxa_atexit_wrapper (void* routine_param)
+{
+  struct one_atexit_routine * routine = routine_param;
+  struct keymgr_atexit_list *g;
+  struct atexit_routine_list * base = NULL;
+  char prev_running = 0;
+  
+  g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST);
+  if (g)
+    {
+      prev_running = g->running_routines;
+      g->running_routines = 1;
+      base = g->l;
+      _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+    }
+
+  if (routine->has_arg)
+    routine->callback.cac (routine->arg);
+  else
+    routine->callback.ac ();
+
+  if (g)
+    g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST);
+  if (g)
+    g = run_routines (g, base);
+  if (g)
+    {
+      g->running_routines = prev_running;
+      _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+    }
+}
+
+#ifdef __ppc__
+/* This code is used while running on 10.3.9, when __cxa_atexit doesn't
+   exist in the system library.  10.3.9 only supported regular PowerPC,
+   so this code isn't necessary on x86 or ppc64.  */
+
+/* This routine is called from the system atexit(); it runs everything
+   registered on the KEYMGR_ATEXIT_LIST.  */
+
+static void
+our_atexit (void)
+{
+  struct keymgr_atexit_list *g;
+  char prev_running;
+
+  g = _keymgr_get_and_lock_processwide_ptr (KEYMGR_ATEXIT_LIST);
+  if (! g || g->version != 0 || g->atexit_status != atexit_status_missing)
+    return;
+  
+  prev_running = g->running_routines;
+  g->running_routines = 1;
+  g = run_routines (g, NULL);
+  if (! g)
+    return;
+  g->running_routines = prev_running;
+  _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+}
+#endif
+
+/* This is our wrapper around atexit and __cxa_atexit.  It will return
+   nonzero if an error occurs, and otherwise:
+   - if in_cxa_finalize is set, or running on 10.3.9, add R to
+     KEYMGR_ATEXIT_LIST; or
+   - call the system __cxa_atexit to add cxa_atexit_wrapper with an argument
+     that indicates how cxa_atexit_wrapper should call R.  */
+
+static int
+atexit_common (const struct one_atexit_routine *r, const void *dso)
+{
+  struct keymgr_atexit_list *g = get_globals ();
+
+  if (! g)
+    return -1;
+  
+  if (g->running_routines || g->atexit_status == atexit_status_missing)
+    return add_routine (g, r);
+
+  if (g->atexit_status >= atexit_status_working)
+    {
+      int result;
+      if (r->has_arg)
+	{
+	  cxa_atexit_p cxa_atexit = g->cxa_atexit_f;
+	  result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST,
+							   g);
+	  if (CHECK_KEYMGR_ERROR (result))
+	    return -1;
+	  return cxa_atexit (r->callback.cac, r->arg, dso);
+	}
+      else
+	{
+	  atexit_p atexit_f = g->atexit_f;
+	  result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST,
+							   g);
+	  if (CHECK_KEYMGR_ERROR (result))
+	    return -1;
+	  return atexit_f (r->callback.ac);
+	}
+    }
+  else
+    {
+      cxa_atexit_p cxa_atexit = g->cxa_atexit_f;
+      struct one_atexit_routine *alloced;
+      int result;
+
+      result = _keymgr_set_and_unlock_processwide_ptr (KEYMGR_ATEXIT_LIST, g);
+      if (CHECK_KEYMGR_ERROR (result))
+	return -1;
+
+      alloced = malloc (sizeof (struct one_atexit_routine));
+      if (! alloced)
+	return -1;
+      *alloced = *r;
+      return cxa_atexit (cxa_atexit_wrapper, alloced, dso);
+    }
+}
+
+/* These are the actual replacement routines; they just funnel into
+   atexit_common.  */
+
+int __cxa_atexit (cxa_atexit_callback func, void* arg, 
+		  const void* dso) __attribute__((visibility("hidden")));
+
+int
+__cxa_atexit (cxa_atexit_callback func, void* arg, const void* dso)
+{
+  struct one_atexit_routine r;
+  r.callback.cac = func;
+  r.has_arg = 1;
+  r.arg = arg;
+  return atexit_common (&r, dso);
+}
+
+int atexit (atexit_callback func) __attribute__((visibility("hidden")));
+
+/* Use __dso_handle to allow even bundles that call atexit() to be unloaded
+   on 10.4.  */
+extern void __dso_handle;
+
+int
+atexit (atexit_callback func)
+{
+  struct one_atexit_routine r;
+  r.callback.ac = func;
+  r.has_arg = 0;
+  return atexit_common (&r, &__dso_handle);
+}
+
+#endif /* __PIC__ */
diff --git a/gcc/config/darwin-driver.c b/gcc/config/darwin-driver.c
new file mode 100644
index 000000000..1eb920106
--- /dev/null
+++ b/gcc/config/darwin-driver.c
@@ -0,0 +1,189 @@
+/* Additional functions for the GCC driver on Darwin native.
+   Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "gcc.h"
+#include "opts.h"
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#include <sys/sysctl.h>
+#include "xregex.h"
+
+/* When running on a Darwin system and using that system's headers and
+   libraries, default the -mmacosx-version-min flag to be the version
+   of the system on which the compiler is running.  */
+
+static void
+darwin_default_min_version (unsigned int *decoded_options_count,
+			    struct cl_decoded_option **decoded_options)
+{
+  const unsigned int argc = *decoded_options_count;
+  struct cl_decoded_option *const argv = *decoded_options;
+  unsigned int i;
+  char osversion[32];
+  size_t osversion_len = sizeof (osversion) - 1;
+  static int osversion_name[2] = { CTL_KERN, KERN_OSRELEASE };
+  char * version_p;
+  char * version_pend;
+  int major_vers;
+  char minor_vers[6];
+  static char new_flag[sizeof ("10.0.0") + 6];
+
+  /* If the command-line is empty, just return.  */
+  if (argc <= 1)
+    return;
+  
+  /* Don't do this if the user specified -mmacosx-version-min= or
+     -mno-macosx-version-min.  */
+  for (i = 1; i < argc; i++)
+    if (argv[i].opt_index == OPT_mmacosx_version_min_)
+      return;
+
+  /* Retrieve the deployment target from the environment and insert
+     it as a flag.  */
+  {
+    const char * macosx_deployment_target;
+    macosx_deployment_target = getenv ("MACOSX_DEPLOYMENT_TARGET");
+    if (macosx_deployment_target
+	/* Apparently, an empty string for MACOSX_DEPLOYMENT_TARGET means
+	   "use the default".  Or, possibly "use 10.1".  We choose
+	   to ignore the environment variable, as if it was never set.  */
+	&& macosx_deployment_target[0])
+      {
+	++*decoded_options_count;
+	*decoded_options = XNEWVEC (struct cl_decoded_option,
+				    *decoded_options_count);
+	(*decoded_options)[0] = argv[0];
+	generate_option (OPT_mmacosx_version_min_, macosx_deployment_target,
+			 1, CL_DRIVER, &(*decoded_options)[1]);
+	memcpy (*decoded_options + 2, argv + 1,
+		(argc - 1) * sizeof (struct cl_decoded_option));
+	return;
+      }
+  }
+
+  /* Determine the version of the running OS.  If we can't, warn user,
+     and do nothing.  */
+  if (sysctl (osversion_name, ARRAY_SIZE (osversion_name), osversion,
+	      &osversion_len, NULL, 0) == -1)
+    {
+      warning (0, "sysctl for kern.osversion failed: %m");
+      return;
+    }
+
+  /* Try to parse the first two parts of the OS version number.  Warn
+     user and return if it doesn't make sense.  */
+  if (! ISDIGIT (osversion[0]))
+    goto parse_failed;
+  major_vers = osversion[0] - '0';
+  version_p = osversion + 1;
+  if (ISDIGIT (*version_p))
+    major_vers = major_vers * 10 + (*version_p++ - '0');
+  if (major_vers > 4 + 9)
+    goto parse_failed;
+  if (*version_p++ != '.')
+    goto parse_failed;
+  version_pend = strchr(version_p, '.');
+  if (!version_pend)
+    goto parse_failed;
+  if (! ISDIGIT (*version_p))
+    goto parse_failed;
+  strncpy(minor_vers, version_p, version_pend - version_p);
+  minor_vers[version_pend - version_p] = '\0';
+  
+  /* The major kernel version number is 4 plus the second OS version
+     component.  */
+  if (major_vers - 4 <= 4)
+    /* On 10.4 and earlier, the old linker is used which does not
+       support three-component system versions.  */
+    sprintf (new_flag, "10.%d", major_vers - 4);
+  else
+    sprintf (new_flag, "10.%d.%s", major_vers - 4,
+	     minor_vers);
+
+  /* Add the new flag.  */
+  ++*decoded_options_count;
+  *decoded_options = XNEWVEC (struct cl_decoded_option,
+			      *decoded_options_count);
+  (*decoded_options)[0] = argv[0];
+  generate_option (OPT_mmacosx_version_min_, new_flag,
+		   1, CL_DRIVER, &(*decoded_options)[1]);
+  memcpy (*decoded_options + 2, argv + 1,
+	  (argc - 1) * sizeof (struct cl_decoded_option));
+  return;
+  
+ parse_failed:
+  warning (0, "couldn%'t understand kern.osversion %q.*s",
+	   (int) osversion_len, osversion);
+  return;
+}
+
+#endif /* CROSS_DIRECTORY_STRUCTURE */
+
+/* Translate -filelist and -framework options in *DECODED_OPTIONS
+   (size *DECODED_OPTIONS_COUNT) to use -Xlinker so that they are
+   considered to be linker inputs in the case that no other inputs are
+   specified.  Handling these options in DRIVER_SELF_SPECS does not
+   suffice because specs are too late to add linker inputs, and
+   handling them in LINK_SPEC does not suffice because the linker will
+   not be called if there are no other inputs.  When native, also
+   default the -mmacosx-version-min flag.  */
+
+void
+darwin_driver_init (unsigned int *decoded_options_count,
+		    struct cl_decoded_option **decoded_options)
+{
+  unsigned int i;
+
+  for (i = 1; i < *decoded_options_count; i++)
+    {
+      if ((*decoded_options)[i].errors & CL_ERR_MISSING_ARG)
+	continue;
+      switch ((*decoded_options)[i].opt_index)
+	{
+	case OPT_filelist:
+	case OPT_framework:
+	  ++*decoded_options_count;
+	  *decoded_options = XRESIZEVEC (struct cl_decoded_option,
+					 *decoded_options,
+					 *decoded_options_count);
+	  memmove (*decoded_options + i + 2,
+		   *decoded_options + i + 1,
+		   ((*decoded_options_count - i - 2)
+		    * sizeof (struct cl_decoded_option)));
+	  generate_option (OPT_Xlinker, (*decoded_options)[i].arg, 1,
+			   CL_DRIVER, &(*decoded_options)[i + 1]);
+	  generate_option (OPT_Xlinker,
+			   (*decoded_options)[i].canonical_option[0], 1,
+			   CL_DRIVER, &(*decoded_options)[i]);
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+  darwin_default_min_version (decoded_options_count, decoded_options);
+#endif
+}
diff --git a/gcc/config/darwin-f.c b/gcc/config/darwin-f.c
new file mode 100644
index 000000000..24ed674d7
--- /dev/null
+++ b/gcc/config/darwin-f.c
@@ -0,0 +1,60 @@
+/* Darwin support needed only by Fortran frontends.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+   Contributed by Daniel Franke.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Provide stubs for the hooks defined by darwin.h
+     TARGET_EXTRA_PRE_INCLUDES, TARGET_EXTRA_INCLUDES
+
+   As both, gcc and gfortran link in incpath.o, we can not
+   conditionally undefine said hooks if fortran is build.
+   However, we can define do-nothing stubs of said hooks as
+   we are not interested in objc include files in Fortran.
+
+   The hooks original purpose (see also darwin-c.c):
+    * darwin_register_objc_includes
+      Register the GNU objective-C runtime include path if STDINC.
+
+    * darwin_register_frameworks
+      Register all the system framework paths if STDINC is true and setup
+      the missing_header callback for subframework searching if any
+      frameworks had been registered.  */
+
+
+#include "ansidecl.h"
+
+/* Prototypes for functions below to avoid a lengthy list of includes
+   to achieve the same.  */
+void darwin_register_objc_includes (const char *, const char *, int);
+void darwin_register_frameworks (const char *, const char *, int);
+
+
+void
+darwin_register_objc_includes (const char *sysroot ATTRIBUTE_UNUSED,
+			       const char *iprefix ATTRIBUTE_UNUSED,
+			       int stdinc ATTRIBUTE_UNUSED)
+{
+}
+
+void
+darwin_register_frameworks (const char *sysroot ATTRIBUTE_UNUSED,
+			    const char *iprefix ATTRIBUTE_UNUSED,
+			    int stdinc ATTRIBUTE_UNUSED)
+{
+}
diff --git a/gcc/config/darwin-ppc-ldouble-patch.def b/gcc/config/darwin-ppc-ldouble-patch.def
new file mode 100644
index 000000000..ba5751e41
--- /dev/null
+++ b/gcc/config/darwin-ppc-ldouble-patch.def
@@ -0,0 +1,113 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+PATCH_BUILTIN (BUILT_IN_ACOSHL)
+PATCH_BUILTIN (BUILT_IN_ACOSL)
+PATCH_BUILTIN (BUILT_IN_ASINHL)
+PATCH_BUILTIN (BUILT_IN_ASINL)
+PATCH_BUILTIN (BUILT_IN_ATAN2L)
+PATCH_BUILTIN (BUILT_IN_ATANHL)
+PATCH_BUILTIN (BUILT_IN_ATANL)
+PATCH_BUILTIN (BUILT_IN_CABSL)
+PATCH_BUILTIN (BUILT_IN_CACOSHL)
+PATCH_BUILTIN (BUILT_IN_CACOSL)
+PATCH_BUILTIN (BUILT_IN_CARGL)
+PATCH_BUILTIN (BUILT_IN_CASINHL)
+PATCH_BUILTIN (BUILT_IN_CASINL)
+PATCH_BUILTIN (BUILT_IN_CATANHL)
+PATCH_BUILTIN (BUILT_IN_CATANL)
+PATCH_BUILTIN (BUILT_IN_CBRTL)
+PATCH_BUILTIN (BUILT_IN_CCOSHL)
+PATCH_BUILTIN (BUILT_IN_CCOSL)
+PATCH_BUILTIN (BUILT_IN_CEILL)
+PATCH_BUILTIN (BUILT_IN_CEXPL)
+PATCH_BUILTIN (BUILT_IN_CIMAGL)
+PATCH_BUILTIN (BUILT_IN_CLOGL)
+PATCH_BUILTIN (BUILT_IN_CONJL)
+PATCH_BUILTIN (BUILT_IN_COPYSIGNL)
+PATCH_BUILTIN (BUILT_IN_COSHL)
+PATCH_BUILTIN (BUILT_IN_COSL)
+PATCH_BUILTIN (BUILT_IN_CPOWL)
+PATCH_BUILTIN (BUILT_IN_CPROJL)
+PATCH_BUILTIN (BUILT_IN_CREALL)
+PATCH_BUILTIN (BUILT_IN_CSINHL)
+PATCH_BUILTIN (BUILT_IN_CSINL)
+PATCH_BUILTIN (BUILT_IN_CSQRTL)
+PATCH_BUILTIN (BUILT_IN_CTANHL)
+PATCH_BUILTIN (BUILT_IN_CTANL)
+PATCH_BUILTIN (BUILT_IN_ERFCL)
+PATCH_BUILTIN (BUILT_IN_ERFL)
+PATCH_BUILTIN (BUILT_IN_EXP2L)
+PATCH_BUILTIN (BUILT_IN_EXPL)
+PATCH_BUILTIN (BUILT_IN_EXPM1L)
+PATCH_BUILTIN (BUILT_IN_FABSL)
+PATCH_BUILTIN (BUILT_IN_FDIML)
+PATCH_BUILTIN (BUILT_IN_FLOORL)
+PATCH_BUILTIN (BUILT_IN_FMAL)
+PATCH_BUILTIN (BUILT_IN_FMAXL)
+PATCH_BUILTIN (BUILT_IN_FMINL)
+PATCH_BUILTIN (BUILT_IN_FMODL)
+PATCH_BUILTIN (BUILT_IN_FREXPL)
+PATCH_BUILTIN (BUILT_IN_HYPOTL)
+PATCH_BUILTIN (BUILT_IN_ILOGBL)
+PATCH_BUILTIN (BUILT_IN_LDEXPL)
+PATCH_BUILTIN (BUILT_IN_LGAMMAL)
+PATCH_BUILTIN (BUILT_IN_LLRINTL)
+PATCH_BUILTIN (BUILT_IN_LLROUNDL)
+PATCH_BUILTIN (BUILT_IN_LOG10L)
+PATCH_BUILTIN (BUILT_IN_LOG1PL)
+PATCH_BUILTIN (BUILT_IN_LOG2L)
+PATCH_BUILTIN (BUILT_IN_LOGBL)
+PATCH_BUILTIN (BUILT_IN_LOGL)
+PATCH_BUILTIN (BUILT_IN_LRINTL)
+PATCH_BUILTIN (BUILT_IN_LROUNDL)
+PATCH_BUILTIN (BUILT_IN_MODFL)
+PATCH_BUILTIN (BUILT_IN_NANL)
+PATCH_BUILTIN (BUILT_IN_NEARBYINTL)
+PATCH_BUILTIN (BUILT_IN_NEXTAFTERL)
+PATCH_BUILTIN (BUILT_IN_NEXTTOWARDL)
+PATCH_BUILTIN (BUILT_IN_POWL)
+PATCH_BUILTIN (BUILT_IN_REMAINDERL)
+PATCH_BUILTIN (BUILT_IN_REMQUOL)
+PATCH_BUILTIN (BUILT_IN_RINTL)
+PATCH_BUILTIN (BUILT_IN_ROUNDL)
+PATCH_BUILTIN (BUILT_IN_SCALBLNL)
+PATCH_BUILTIN (BUILT_IN_SCALBNL)
+PATCH_BUILTIN (BUILT_IN_SINHL)
+PATCH_BUILTIN (BUILT_IN_SINL)
+PATCH_BUILTIN (BUILT_IN_SQRTL)
+PATCH_BUILTIN (BUILT_IN_TANHL)
+PATCH_BUILTIN (BUILT_IN_TANL)
+PATCH_BUILTIN (BUILT_IN_TGAMMAL)
+PATCH_BUILTIN (BUILT_IN_TRUNCL)
+
+PATCH_BUILTIN_NO64 (BUILT_IN_VFPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VFSCANF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSCANF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSNPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSPRINTF)
+PATCH_BUILTIN_NO64 (BUILT_IN_VSSCANF)
+
+PATCH_BUILTIN_VARIADIC (BUILT_IN_FPRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_FSCANF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_PRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SCANF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SNPRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SPRINTF)
+PATCH_BUILTIN_VARIADIC (BUILT_IN_SSCANF)
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
new file mode 100644
index 000000000..4a9961119
--- /dev/null
+++ b/gcc/config/darwin-protos.h
@@ -0,0 +1,127 @@
+/* Prototypes.
+   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void darwin_init_sections (void);
+extern int name_needs_quotes (const char *);
+
+extern void machopic_validate_stub_or_non_lazy_ptr (const char *);
+
+extern void machopic_output_function_base_name (FILE *);
+extern const char *machopic_indirection_name (rtx, bool);
+extern const char *machopic_mcount_stub_name (void);
+
+#ifdef RTX_CODE
+
+extern rtx machopic_gen_offset (rtx);
+extern int machopic_operand_p (rtx);
+extern int machopic_symbol_defined_p (rtx sym_ref);
+extern enum machopic_addr_class machopic_classify_symbol (rtx);
+
+extern rtx machopic_indirect_data_reference (rtx, rtx);
+extern rtx machopic_indirect_call_target (rtx);
+extern rtx machopic_legitimize_pic_address (rtx, enum machine_mode, rtx);
+
+extern void machopic_asm_out_constructor (rtx, int);
+extern void machopic_asm_out_destructor (rtx, int);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+
+extern void machopic_define_symbol (rtx);
+extern void darwin_encode_section_info (tree, rtx, int);
+extern void darwin_set_default_type_attributes (tree);
+
+#endif /* TREE_CODE */
+
+extern void machopic_finish (FILE *);
+
+extern int machopic_reloc_rw_mask (void);
+extern section *machopic_select_section (tree, int, unsigned HOST_WIDE_INT);
+extern section *machopic_select_rtx_section (enum machine_mode, rtx,
+					     unsigned HOST_WIDE_INT);
+
+extern section *darwin_function_section (tree, enum node_frequency, bool, bool); 
+extern void darwin_function_switched_text_sections (FILE *, tree, bool);
+
+extern void darwin_unique_section (tree decl, int reloc);
+extern void darwin_asm_named_section (const char *, unsigned int, tree);
+extern void darwin_non_lazy_pcrel (FILE *, rtx);
+
+extern void darwin_emit_unwind_label (FILE *, tree, int, int);
+extern void darwin_emit_except_table_label (FILE *);
+
+extern void darwin_pragma_ignore (struct cpp_reader *);
+extern void darwin_pragma_options (struct cpp_reader *);
+extern void darwin_pragma_unused (struct cpp_reader *);
+extern void darwin_pragma_ms_struct (struct cpp_reader *);
+
+extern void darwin_file_start (void);
+extern void darwin_file_end (void);
+
+extern void darwin_asm_lto_start (void);
+extern void darwin_asm_lto_end (void);
+
+extern void darwin_mark_decl_preserved (const char *);
+
+extern tree darwin_handle_kext_attribute (tree *, tree, tree, int, bool *);
+extern tree darwin_handle_weak_import_attribute (tree *node, tree name,
+						 tree args, int flags,
+						 bool * no_add_attrs);
+extern void machopic_output_stub (FILE *, const char *, const char *);
+extern void darwin_globalize_label (FILE *, const char *);
+extern void darwin_assemble_visibility (tree, int);
+
+extern void darwin_asm_output_dwarf_delta (FILE *, int, const char *,
+					   const char *);
+extern void darwin_asm_output_dwarf_offset (FILE *, int, const char *,
+					    section *);
+
+extern void darwin_asm_declare_object_name (FILE *, const char *, tree);
+extern void darwin_asm_declare_constant_name (FILE *, const char *,
+					      const_tree, HOST_WIDE_INT);
+
+extern void darwin_output_aligned_bss (FILE *, tree, const char *,
+				       unsigned HOST_WIDE_INT, unsigned int);
+
+extern void darwin_asm_output_aligned_decl_local (FILE *, tree, const char *, 
+						  unsigned HOST_WIDE_INT, 
+						  unsigned int);
+extern void darwin_asm_output_aligned_decl_common (FILE *, tree, const char *,
+						   unsigned HOST_WIDE_INT, 
+						   unsigned int);
+
+extern bool darwin_binds_local_p (const_tree);
+extern void darwin_cpp_builtins (struct cpp_reader *);
+
+extern tree darwin_init_cfstring_builtins (unsigned);
+extern tree darwin_fold_builtin (tree, int, tree *, bool);
+extern tree darwin_objc_construct_string (tree);
+extern bool darwin_cfstring_p (tree);
+extern bool darwin_cfstring_ref_p (const_tree);
+extern void darwin_check_cfstring_format_arg (tree, tree);
+extern tree darwin_build_constant_cfstring (tree);
+extern void darwin_enter_string_into_cfstring_table (tree);
+
+extern void darwin_asm_output_anchor (rtx symbol);
+extern bool darwin_use_anchors_for_symbol_p (const_rtx symbol);
+extern bool darwin_kextabi_p (void);
+extern void darwin_override_options (void);
+extern void darwin_patch_builtins (void);
+extern void darwin_rename_builtins (void);
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
new file mode 100644
index 000000000..61b6f69b1
--- /dev/null
+++ b/gcc/config/darwin-sections.def
@@ -0,0 +1,195 @@
+/* Copyright (C) 2005, 2006, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Since Darwin's ld will not allow zero-sized objects, and gcc wants them,
+   we emit one byte (in darwin.c) when such an object is encountered.
+
+   This messes up section anchoring because the emitted byte is not counted
+   outside the port.  To cope with this, we set aside sections for zero-sized
+   objects and disallow those sections from  participating in section anchors
+   ("zobj_" sections, below).
+   
+   Items that might be coalesced by the linker are prevented from participating,
+   (and those in mergeable sections are disallowed in varasm.c).  */
+
+/* The .text section is generated in varasm.c  */
+DEF_SECTION (text_coal_section, SECTION_CODE|SECTION_NO_ANCHOR,
+	     ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", 0)
+
+DEF_SECTION (text_hot_section, SECTION_CODE,
+	     ".section __TEXT,__text_hot,regular,pure_instructions", 0)
+DEF_SECTION (text_cold_section, SECTION_CODE,
+	     ".section __TEXT,__text_cold,regular,pure_instructions", 0)
+DEF_SECTION (text_startup_section, SECTION_CODE,
+	     ".section __TEXT,__text_startup,regular,pure_instructions", 0)
+DEF_SECTION (text_exit_section, SECTION_CODE,
+	     ".section __TEXT,__text_exit,regular,pure_instructions", 0)
+
+DEF_SECTION (text_hot_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_hot_coal,coalesced,pure_instructions", 0)
+DEF_SECTION (text_cold_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_cold_coal,coalesced,pure_instructions", 0)
+DEF_SECTION (text_startup_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_stt_coal,coalesced,pure_instructions", 0)
+DEF_SECTION (text_exit_coal_section, SECTION_CODE,
+	     ".section __TEXT,__text_exit_coal,coalesced,pure_instructions", 0)
+
+/* const */
+DEF_SECTION (const_section, 0, ".const", 0)
+DEF_SECTION (const_coal_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__const_coal,coalesced", 0)
+/* Place to put zero-sized to avoid issues with section anchors.  */
+DEF_SECTION (zobj_const_section, SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_const", 0)
+
+/* Write-able data.  '.data'  handled in varasm.c  */
+DEF_SECTION (static_data_section, SECTION_WRITE, ".static_data", 0)
+DEF_SECTION (data_coal_section, SECTION_WRITE|SECTION_NO_ANCHOR,
+	     ".section __DATA,__datacoal_nt,coalesced", 0)
+/* Place to put zero-sized to avoid issues with section anchors.  */
+DEF_SECTION (zobj_data_section, SECTION_WRITE|SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_data", 0)
+
+/* BSS - .lcomm / .zerofill __DATA,__bss sections cannot be switched to
+   explicitly (will create an assembler error).  */
+DEF_SECTION (zobj_bss_section, SECTION_WRITE|SECTION_BSS|SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_bss", 0)
+
+/* const data */
+DEF_SECTION (const_data_section, 0, ".const_data", 0)
+DEF_SECTION (const_data_coal_section, SECTION_NO_ANCHOR,
+	     ".section __DATA,__const_coal,coalesced", 0)
+/* Place to put zero-sized to avoid issues with section anchors.  */
+DEF_SECTION (zobj_const_data_section, SECTION_NO_ANCHOR, 
+	     ".section\t__DATA,__zobj_const_data", 0)
+
+/* Strings and other literals.  */
+DEF_SECTION (cstring_section, SECTION_MERGE | SECTION_STRINGS, ".cstring", 0)
+DEF_SECTION (literal4_section, SECTION_MERGE, ".literal4", 0)
+DEF_SECTION (literal8_section, SECTION_MERGE, ".literal8", 0)
+DEF_SECTION (literal16_section, SECTION_MERGE, ".literal16", 0)
+/* Unlike constant NSStrings, constant CFStrings do not live  in the
+   __OBJC segment since they may also occur in pure C  or C++ programs.  */
+DEF_SECTION (cfstring_constant_object_section, 0, 
+	     ".section __DATA, __cfstring", 0)
+
+/* Module init, term, constructors & destructors.  */
+DEF_SECTION (mod_init_section, 0, ".mod_init_func", 0)
+DEF_SECTION (mod_term_section, 0, ".mod_term_func", 0)
+DEF_SECTION (constructor_section, 0, ".constructor", 0)
+DEF_SECTION (destructor_section, 0, ".destructor", 0)
+
+/* Objective-C ABI=0 (Original version) sections.  */
+DEF_SECTION (objc_class_section, 0, ".objc_class", 1)
+DEF_SECTION (objc_meta_class_section, 0, ".objc_meta_class", 1)
+DEF_SECTION (objc_category_section, 0, ".objc_category", 1)
+DEF_SECTION (objc_class_vars_section, 0, ".objc_class_vars", 1)
+DEF_SECTION (objc_instance_vars_section, 0, ".objc_instance_vars", 1)
+DEF_SECTION (objc_cls_meth_section, 0, ".objc_cls_meth", 1)
+DEF_SECTION (objc_inst_meth_section, 0, ".objc_inst_meth", 1)
+DEF_SECTION (objc_cat_cls_meth_section, 0, ".objc_cat_cls_meth", 1)
+DEF_SECTION (objc_cat_inst_meth_section, 0, ".objc_cat_inst_meth", 1)
+DEF_SECTION (objc_selector_refs_section, SECTION_MERGE, ".objc_message_refs", 1)
+DEF_SECTION (objc_selector_fixup_section, 0,
+	    ".section __OBJC, __sel_fixup, regular, no_dead_strip", 1)
+DEF_SECTION (objc_symbols_section, 0, ".objc_symbols", 1)
+DEF_SECTION (objc_module_info_section, 0, ".objc_module_info", 1)
+DEF_SECTION (objc_protocol_section, 0, ".objc_protocol", 1)
+DEF_SECTION (objc_string_object_section, 0, ".objc_string_object", 1)
+DEF_SECTION (objc_constant_string_object_section, 0,
+	     ".section __OBJC, __cstring_object, regular, no_dead_strip", 0)
+
+/* Fix-and-Continue image marker.  */
+DEF_SECTION (objc_image_info_section, 0,
+	     ".section __OBJC, __image_info, regular, no_dead_strip", 1)
+DEF_SECTION (objc_class_names_section, 0, ".objc_class_names", 1)
+DEF_SECTION (objc_meth_var_names_section, 0, ".objc_meth_var_names", 1)
+DEF_SECTION (objc_meth_var_types_section, 0, ".objc_meth_var_types", 1)
+DEF_SECTION (objc_cls_refs_section, SECTION_MERGE, ".objc_cls_refs", 1)
+
+/* Stubs and symbol indirection sections.  */
+/* lazy symbol pointers.  */
+DEF_SECTION (machopic_lazy_symbol_ptr_section, SECTION_NO_ANCHOR, 
+	     ".lazy_symbol_pointer", 0)
+DEF_SECTION (machopic_lazy_symbol_ptr2_section,	SECTION_NO_ANCHOR,
+	     ".section __DATA, __la_sym_ptr2,lazy_symbol_pointers", 0)
+DEF_SECTION (machopic_lazy_symbol_ptr3_section, SECTION_NO_ANCHOR,
+	     ".section __DATA, __la_sym_ptr3,lazy_symbol_pointers", 0)
+/* non-lazy symbol pointers.  */
+DEF_SECTION (machopic_nl_symbol_ptr_section, SECTION_NO_ANCHOR,
+	     MACHOPIC_NL_SYMBOL_PTR_SECTION, 0)
+/* Symbol stubs.  */
+DEF_SECTION (machopic_symbol_stub_section, SECTION_NO_ANCHOR, 
+	     ".symbol_stub", 0)
+DEF_SECTION (machopic_symbol_stub1_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__symbol_stub1,symbol_stubs,"
+	     "pure_instructions,16", 0)
+/* PIC symbol stubs.  */
+DEF_SECTION (machopic_picsymbol_stub_section, SECTION_NO_ANCHOR, 
+	     ".picsymbol_stub", 0)
+DEF_SECTION (machopic_picsymbol_stub1_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__picsymbolstub1,symbol_stubs,"
+	     "pure_instructions,32", 0)
+DEF_SECTION (machopic_picsymbol_stub2_section, SECTION_NO_ANCHOR,
+	     ".section __TEXT,__picsymbolstub2,symbol_stubs,pure_instructions,25", 0)
+DEF_SECTION (machopic_picsymbol_stub3_section, SECTION_NO_ANCHOR,
+	     ".section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5", 0)
+
+/* Exception-related.  */
+DEF_SECTION (darwin_exception_section, SECTION_NO_ANCHOR,
+	     ".section __DATA,__gcc_except_tab", 0)
+DEF_SECTION (darwin_eh_frame_section, SECTION_NO_ANCHOR,
+	     ".section " EH_FRAME_SECTION_NAME ",__eh_frame"
+	     EH_FRAME_SECTION_ATTR, 0)
+
+/* Sections for ObjC ABI=1 (ObjC 'V1' extensions) */
+DEF_SECTION (objc1_class_ext_section, 0,
+	    ".section __OBJC, __class_ext, regular, no_dead_strip", 1)
+DEF_SECTION (objc1_prop_list_section, 0,
+	    ".section __OBJC, __property, regular, no_dead_strip", 1)
+DEF_SECTION (objc1_protocol_ext_section, 0,
+	    ".section __OBJC, __protocol_ext, regular, no_dead_strip", 1)
+
+/* Sections for ObjC ABI=2 (m64).  */
+DEF_SECTION (objc2_message_refs_section, 0,
+	     ".section __DATA, __objc_msgrefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_classdefs_section, 0, ".section __DATA, __objc_data", 1)
+DEF_SECTION (objc2_metadata_section, 0, ".section __DATA, __objc_const", 1)
+
+DEF_SECTION (objc2_classrefs_section, 0, 
+             ".section __DATA, __objc_classrefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_classlist_section, 0,
+	     ".section __DATA, __objc_classlist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_categorylist_section, 0,
+	     ".section __DATA, __objc_catlist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_selector_refs_section, 0,
+	     ".section __DATA, __objc_selrefs, literal_pointers, no_dead_strip", 1)
+DEF_SECTION (objc2_nonlazy_class_section, 0,
+	     ".section __DATA, __objc_nlclslist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_nonlazy_category_section, 0,
+	     ".section __DATA, __objc_nlcatlist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_protocollist_section, 0,
+	     ".section __DATA, __objc_protolist, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_protocolrefs_section, 0,
+	     ".section __DATA, __objc_protorefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_super_classrefs_section, 0,
+	     ".section __DATA, __objc_superrefs, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_image_info_section, 0,
+	     ".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1)
+DEF_SECTION (objc2_constant_string_object_section, 0,
+	     ".section __DATA, __objc_stringobj, regular, no_dead_strip", 1)
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
new file mode 100644
index 000000000..3b065e5b9
--- /dev/null
+++ b/gcc/config/darwin.c
@@ -0,0 +1,3472 @@
+/* Functions for generic Darwin as target machine for GNU C compiler.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "expr.h"
+#include "reload.h"
+#include "function.h"
+#include "ggc.h"
+#include "langhooks.h"
+#include "target.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "hashtab.h"
+#include "df.h"
+#include "debug.h"
+#include "obstack.h"
+#include "lto-streamer.h"
+
+/* Darwin supports a feature called fix-and-continue, which is used
+   for rapid turn around debugging.  When code is compiled with the
+   -mfix-and-continue flag, two changes are made to the generated code
+   that allow the system to do things that it would normally not be
+   able to do easily.  These changes allow gdb to load in
+   recompilation of a translation unit that has been changed into a
+   running program and replace existing functions and methods of that
+   translation unit with versions of those functions and methods
+   from the newly compiled translation unit.  The new functions access
+   the existing static symbols from the old translation unit, if the
+   symbol existed in the unit to be replaced, and from the new
+   translation unit, otherwise.
+
+   The changes are to insert 5 nops at the beginning of all functions
+   and to use indirection to get at static symbols.  The 5 nops
+   are required by consumers of the generated code.  Currently, gdb
+   uses this to patch in a jump to the overriding function, this
+   allows all uses of the old name to forward to the replacement,
+   including existing function pointers and virtual methods.  See
+   rs6000_emit_prologue for the code that handles the nop insertions.
+
+   The added indirection allows gdb to redirect accesses to static
+   symbols from the newly loaded translation unit to the existing
+   symbol, if any.  @code{static} symbols are special and are handled by
+   setting the second word in the .non_lazy_symbol_pointer data
+   structure to symbol.  See indirect_data for the code that handles
+   the extra indirection, and machopic_output_indirection and its use
+   of MACHO_SYMBOL_STATIC for the code that handles @code{static}
+   symbol indirection.  */
+
+/* For darwin >= 9  (OSX 10.5) the linker is capable of making the necessary
+   branch islands and we no longer need to emit darwin stubs.
+   However, if we are generating code for earlier systems (or for use in the 
+   kernel) the stubs might still be required, and this will be set true.  */
+int darwin_emit_branch_islands = false;
+
+/* A flag to determine whether we are running c++ or obj-c++.  This has to be
+   settable from non-c-family contexts too (i.e. we can't use the c_dialect_
+   functions).  */
+int darwin_running_cxx;
+
+/* Some code-gen now depends on OS major version numbers (at least).  */
+int generating_for_darwin_version ;
+
+/* Section names.  */
+section * darwin_sections[NUM_DARWIN_SECTIONS];
+
+/* While we transition to using in-tests instead of ifdef'd code.  */
+#ifndef HAVE_lo_sum
+#define HAVE_lo_sum 0
+#define gen_macho_high(a,b) (a)
+#define gen_macho_low(a,b,c) (a)
+#endif
+
+/* True if we're setting __attribute__ ((ms_struct)).  */
+int darwin_ms_struct = false;
+
+/* Earlier versions of Darwin as do not recognize an alignment field in 
+   .comm directives, this should be set for versions that allow it.  */
+int emit_aligned_common = false;
+
+/* A get_unnamed_section callback used to switch to an ObjC section.
+   DIRECTIVE is as for output_section_asm_op.  */
+
+static void
+output_objc_section_asm_op (const void *directive)
+{
+  static bool been_here = false;
+
+  /* The NeXT ObjC Runtime requires these sections to be present and in 
+     order in the object.  The code below implements this by emitting 
+     a section header for each ObjC section the first time that an ObjC
+     section is requested.  */
+  if (! been_here)
+    {
+      section *saved_in_section = in_section;
+      static const enum darwin_section_enum tomark[] =
+	{
+	  /* written, cold -> hot */
+	  objc_cat_cls_meth_section,
+	  objc_cat_inst_meth_section,
+	  objc_string_object_section,
+	  objc_constant_string_object_section,
+	  objc_selector_refs_section,
+	  objc_selector_fixup_section,
+	  objc_cls_refs_section,
+	  objc_class_section,
+	  objc_meta_class_section,
+	  /* shared, hot -> cold */
+	  objc_cls_meth_section,
+	  objc_inst_meth_section,
+	  objc_protocol_section,
+	  objc_class_names_section,
+	  objc_meth_var_types_section,
+	  objc_meth_var_names_section,
+	  objc_category_section,
+	  objc_class_vars_section,
+	  objc_instance_vars_section,
+	  objc_module_info_section,
+	  objc_symbols_section,
+	};
+      /* ABI=1 */
+      static const enum darwin_section_enum tomarkv1[] =
+	{
+	  objc1_protocol_ext_section,
+	  objc1_class_ext_section,
+	  objc1_prop_list_section
+	} ;
+      /* ABI=2 */
+      static const enum darwin_section_enum tomarkv2[] =
+	{
+	  objc2_message_refs_section,
+	  objc2_classdefs_section,
+	  objc2_metadata_section,
+	  objc2_classrefs_section,
+	  objc2_classlist_section,
+	  objc2_categorylist_section,
+	  objc2_selector_refs_section,
+	  objc2_nonlazy_class_section,
+	  objc2_nonlazy_category_section,
+	  objc2_protocollist_section,
+	  objc2_protocolrefs_section,
+	  objc2_super_classrefs_section,
+	  objc2_image_info_section,
+	  objc2_constant_string_object_section
+	} ;
+      size_t i;
+
+      been_here = true;
+      if (flag_objc_abi < 2)
+	{
+	  for (i = 0; i < ARRAY_SIZE (tomark); i++)
+	    switch_to_section (darwin_sections[tomark[i]]);
+	  if (flag_objc_abi == 1)
+	    for (i = 0; i < ARRAY_SIZE (tomarkv1); i++)
+	      switch_to_section (darwin_sections[tomarkv1[i]]);
+	}
+      else
+	for (i = 0; i < ARRAY_SIZE (tomarkv2); i++)
+	  switch_to_section (darwin_sections[tomarkv2[i]]);
+      /* Make sure we don't get varasm.c out of sync with us.  */
+      switch_to_section (saved_in_section);
+    }
+  output_section_asm_op (directive);
+}
+
+
+/* Private flag applied to disable section-anchors in a particular section.  */
+#define SECTION_NO_ANCHOR SECTION_MACH_DEP
+
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+void
+darwin_init_sections (void)
+{
+#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC)		\
+  darwin_sections[NAME] =					\
+    get_unnamed_section (FLAGS, (OBJC				\
+				 ? output_objc_section_asm_op	\
+				 : output_section_asm_op),	\
+			 "\t" DIRECTIVE);
+#include "config/darwin-sections.def"
+#undef DEF_SECTION
+
+  readonly_data_section = darwin_sections[const_section];
+  exception_section = darwin_sections[darwin_exception_section];
+  eh_frame_section = darwin_sections[darwin_eh_frame_section];
+}
+
+int
+name_needs_quotes (const char *name)
+{
+  int c;
+  while ((c = *name++) != '\0')
+    if (! ISIDNUM (c) 
+	  && c != '.' && c != '$' && c != '_' )
+      return 1;
+  return 0;
+}
+
+/* Return true if SYM_REF can be used without an indirection.  */
+int
+machopic_symbol_defined_p (rtx sym_ref)
+{
+  if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_DEFINED)
+    return true;
+
+  /* If a symbol references local and is not an extern to this
+     file, then the symbol might be able to declared as defined.  */
+  if (SYMBOL_REF_LOCAL_P (sym_ref) && ! SYMBOL_REF_EXTERNAL_P (sym_ref))
+    {
+      /* If the symbol references a variable and the variable is a
+	 common symbol, then this symbol is not defined.  */
+      if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_VARIABLE)
+	{
+	  tree decl = SYMBOL_REF_DECL (sym_ref);
+	  if (!decl)
+	    return true;
+	  if (DECL_COMMON (decl))
+	    return false;
+	}
+      return true;
+    }
+  return false;
+}
+
+/* This module assumes that (const (symbol_ref "foo")) is a legal pic
+   reference, which will not be changed.  */
+
+enum machopic_addr_class
+machopic_classify_symbol (rtx sym_ref)
+{
+  bool function_p;
+
+  function_p = SYMBOL_REF_FUNCTION_P (sym_ref);
+  if (machopic_symbol_defined_p (sym_ref))
+    return (function_p
+	    ? MACHOPIC_DEFINED_FUNCTION : MACHOPIC_DEFINED_DATA);
+  else
+    return (function_p
+	    ? MACHOPIC_UNDEFINED_FUNCTION : MACHOPIC_UNDEFINED_DATA);
+}
+
+#ifndef TARGET_FIX_AND_CONTINUE
+#define TARGET_FIX_AND_CONTINUE 0
+#endif
+
+/* Indicate when fix-and-continue style code generation is being used
+   and when a reference to data should be indirected so that it can be
+   rebound in a new translation unit to reference the original instance
+   of that data.  Symbol names that are for code generation local to
+   the translation unit are bound to the new translation unit;
+   currently this means symbols that begin with L or _OBJC_;
+   otherwise, we indicate that an indirect reference should be made to
+   permit the runtime to rebind new instances of the translation unit
+   to the original instance of the data.  */
+
+static int
+indirect_data (rtx sym_ref)
+{
+  int lprefix;
+  const char *name;
+
+  /* If we aren't generating fix-and-continue code, don't do anything
+     special.  */
+  if (TARGET_FIX_AND_CONTINUE == 0)
+    return 0;
+
+  /* Otherwise, all symbol except symbols that begin with L or _OBJC_
+     are indirected.  Symbols that begin with L and _OBJC_ are always
+     bound to the current translation unit as they are used for
+     generated local data of the translation unit.  */
+
+  name = XSTR (sym_ref, 0);
+
+  lprefix = (((name[0] == '*' || name[0] == '&')
+              && (name[1] == 'L' || (name[1] == '"' && name[2] == 'L')))
+             || (strncmp (name, "_OBJC_", 6) == 0));
+
+  return ! lprefix;
+}
+
+static int
+machopic_data_defined_p (rtx sym_ref)
+{
+  if (indirect_data (sym_ref))
+    return 0;
+
+  switch (machopic_classify_symbol (sym_ref))
+    {
+    case MACHOPIC_DEFINED_DATA:
+    case MACHOPIC_DEFINED_FUNCTION:
+      return 1;
+    default:
+      return 0;
+    }
+}
+
+void
+machopic_define_symbol (rtx mem)
+{
+  rtx sym_ref;
+
+  gcc_assert (GET_CODE (mem) == MEM);
+  sym_ref = XEXP (mem, 0);
+  SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED;
+}
+
+/* Return either ORIG or:
+
+     (const:P (unspec:P [ORIG] UNSPEC_MACHOPIC_OFFSET))
+
+   depending on MACHO_DYNAMIC_NO_PIC_P.  */
+rtx
+machopic_gen_offset (rtx orig)
+{
+  if (MACHO_DYNAMIC_NO_PIC_P)
+    return orig;
+  else
+    {
+      /* Play games to avoid marking the function as needing pic if we
+	 are being called as part of the cost-estimation process.  */
+      if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
+	crtl->uses_pic_offset_table = 1;
+      orig = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),
+			     UNSPEC_MACHOPIC_OFFSET);
+      return gen_rtx_CONST (Pmode, orig);
+    }
+}
+
+static GTY(()) const char * function_base_func_name;
+static GTY(()) int current_pic_label_num;
+
+void
+machopic_output_function_base_name (FILE *file)
+{
+  const char *current_name;
+
+  /* If dynamic-no-pic is on, we should not get here.  */
+  gcc_assert (!MACHO_DYNAMIC_NO_PIC_P);
+  /* When we are generating _get_pc thunks within stubs, there is no current
+     function.  */
+  if (current_function_decl)
+    {
+      current_name =
+	IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl));
+      if (function_base_func_name != current_name)
+	{
+	  ++current_pic_label_num;
+	  function_base_func_name = current_name;
+	}
+    }
+  else
+    {
+      ++current_pic_label_num;
+      function_base_func_name = "L_machopic_stub_dummy";
+    }
+  fprintf (file, "L%011d$pb", current_pic_label_num);
+}
+
+/* The suffix attached to non-lazy pointer symbols.  */
+#define NON_LAZY_POINTER_SUFFIX "$non_lazy_ptr"
+/* The suffix attached to stub symbols.  */
+#define STUB_SUFFIX "$stub"
+
+typedef struct GTY (()) machopic_indirection
+{
+  /* The SYMBOL_REF for the entity referenced.  */
+  rtx symbol;
+  /* The name of the stub or non-lazy pointer.  */
+  const char * ptr_name;
+  /* True iff this entry is for a stub (as opposed to a non-lazy
+     pointer).  */
+  bool stub_p;
+  /* True iff this stub or pointer pointer has been referenced.  */
+  bool used;
+} machopic_indirection;
+
+/* A table mapping stub names and non-lazy pointer names to
+   SYMBOL_REFs for the stubbed-to and pointed-to entities.  */
+
+static GTY ((param_is (struct machopic_indirection))) htab_t
+  machopic_indirections;
+
+/* Return a hash value for a SLOT in the indirections hash table.  */
+
+static hashval_t
+machopic_indirection_hash (const void *slot)
+{
+  const machopic_indirection *p = (const machopic_indirection *) slot;
+  return htab_hash_string (p->ptr_name);
+}
+
+/* Returns true if the KEY is the same as that associated with
+   SLOT.  */
+
+static int
+machopic_indirection_eq (const void *slot, const void *key)
+{
+  return strcmp (((const machopic_indirection *) slot)->ptr_name,
+		 (const char *) key) == 0;
+}
+
+/* Return the name of the non-lazy pointer (if STUB_P is false) or
+   stub (if STUB_B is true) corresponding to the given name.  */
+
+const char *
+machopic_indirection_name (rtx sym_ref, bool stub_p)
+{
+  char *buffer;
+  const char *name = XSTR (sym_ref, 0);
+  size_t namelen = strlen (name);
+  machopic_indirection *p;
+  void ** slot;
+  bool needs_quotes;
+  const char *suffix;
+  const char *prefix = user_label_prefix;
+  const char *quote = "";
+  tree id;
+
+  id = maybe_get_identifier (name);
+  if (id)
+    {
+      tree id_orig = id;
+
+      while (IDENTIFIER_TRANSPARENT_ALIAS (id))
+	id = TREE_CHAIN (id);
+      if (id != id_orig)
+	{
+	  name = IDENTIFIER_POINTER (id);
+	  namelen = strlen (name);
+	}
+    }
+
+  if (name[0] == '*')
+    {
+      prefix = "";
+      ++name;
+      --namelen;
+    }
+
+  needs_quotes = name_needs_quotes (name);
+  if (needs_quotes)
+    {
+      quote = "\"";
+    }
+
+  if (stub_p)
+    suffix = STUB_SUFFIX;
+  else
+    suffix = NON_LAZY_POINTER_SUFFIX;
+
+  buffer = XALLOCAVEC (char, strlen ("&L")
+		   + strlen (prefix)
+		   + namelen
+		   + strlen (suffix)
+		   + 2 * strlen (quote)
+		   + 1 /* '\0' */);
+
+  /* Construct the name of the non-lazy pointer or stub.  */
+  sprintf (buffer, "&%sL%s%s%s%s", quote, prefix, name, suffix, quote);
+
+  if (!machopic_indirections)
+    machopic_indirections = htab_create_ggc (37,
+					     machopic_indirection_hash,
+					     machopic_indirection_eq,
+					     /*htab_del=*/NULL);
+
+  slot = htab_find_slot_with_hash (machopic_indirections, buffer,
+				   htab_hash_string (buffer), INSERT);
+  if (*slot)
+    {
+      p = (machopic_indirection *) *slot;
+    }
+  else
+    {
+      p = ggc_alloc_machopic_indirection ();
+      p->symbol = sym_ref;
+      p->ptr_name = xstrdup (buffer);
+      p->stub_p = stub_p;
+      p->used = false;
+      *slot = p;
+    }
+
+  return p->ptr_name;
+}
+
+/* Return the name of the stub for the mcount function.  */
+
+const char*
+machopic_mcount_stub_name (void)
+{
+  rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "*mcount");
+  return machopic_indirection_name (symbol, /*stub_p=*/true);
+}
+
+/* If NAME is the name of a stub or a non-lazy pointer , mark the stub
+   or non-lazy pointer as used -- and mark the object to which the
+   pointer/stub refers as used as well, since the pointer/stub will
+   emit a reference to it.  */
+
+void
+machopic_validate_stub_or_non_lazy_ptr (const char *name)
+{
+  machopic_indirection *p;
+
+  p = ((machopic_indirection *)
+       (htab_find_with_hash (machopic_indirections, name,
+			     htab_hash_string (name))));
+  if (p && ! p->used)
+    {
+      const char *real_name;
+      tree id;
+
+      p->used = true;
+
+      /* Do what output_addr_const will do when we actually call it.  */
+      if (SYMBOL_REF_DECL (p->symbol))
+	mark_decl_referenced (SYMBOL_REF_DECL (p->symbol));
+
+      real_name = targetm.strip_name_encoding (XSTR (p->symbol, 0));
+
+      id = maybe_get_identifier (real_name);
+      if (id)
+	mark_referenced (id);
+    }
+}
+
+/* Transform ORIG, which may be any data source, to the corresponding
+   source using indirections.  */
+
+rtx
+machopic_indirect_data_reference (rtx orig, rtx reg)
+{
+  rtx ptr_ref = orig;
+
+  if (! MACHOPIC_INDIRECT)
+    return orig;
+
+  if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      int defined = machopic_data_defined_p (orig);
+
+      if (defined && MACHO_DYNAMIC_NO_PIC_P)
+	{
+	  if (DARWIN_PPC)
+	    {
+	  /* Create a new register for CSE opportunities.  */
+	  rtx hi_reg = (!can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode));
+ 	  emit_insn (gen_macho_high (hi_reg, orig));
+ 	  emit_insn (gen_macho_low (reg, hi_reg, orig));
+	      return reg;
+ 	    }
+	  else if (DARWIN_X86)
+	    return orig;
+	  else
+	   /* some other cpu -- writeme!  */
+	   gcc_unreachable ();
+	}
+      else if (defined)
+	{
+	  rtx offset = NULL;
+	  if (DARWIN_PPC || HAVE_lo_sum)
+	    offset = machopic_gen_offset (orig);
+
+	  if (DARWIN_PPC)
+	    {
+	  rtx hi_sum_reg = (!can_create_pseudo_p ()
+			    ? reg
+			    : gen_reg_rtx (Pmode));
+
+	  gcc_assert (reg);
+
+	  emit_insn (gen_rtx_SET (Pmode, hi_sum_reg,
+			      gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+				       gen_rtx_HIGH (Pmode, offset))));
+	  emit_insn (gen_rtx_SET (Pmode, reg,
+				  gen_rtx_LO_SUM (Pmode, hi_sum_reg,
+						  copy_rtx (offset))));
+
+	  orig = reg;
+	    }
+	  else if (HAVE_lo_sum)
+	    {
+	  gcc_assert (reg);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_HIGH (Pmode, offset)));
+	  emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_LO_SUM (Pmode, reg,
+						  copy_rtx (offset))));
+	  emit_use (pic_offset_table_rtx);
+
+	  orig = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, reg);
+	    }
+	  return orig;
+	}
+
+      ptr_ref = (gen_rtx_SYMBOL_REF
+		 (Pmode,
+		  machopic_indirection_name (orig, /*stub_p=*/false)));
+
+      SYMBOL_REF_DATA (ptr_ref) = SYMBOL_REF_DATA (orig);
+
+      ptr_ref = gen_const_mem (Pmode, ptr_ref);
+      machopic_define_symbol (ptr_ref);
+
+      if (DARWIN_X86 
+          && reg 
+          && MACHO_DYNAMIC_NO_PIC_P)
+	{
+	    emit_insn (gen_rtx_SET (Pmode, reg, ptr_ref));
+	    ptr_ref = reg;
+	}
+
+      return ptr_ref;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      /* If "(const (plus ...", walk the PLUS and return that result.
+	 PLUS processing (below) will restore the "(const ..." if
+	 appropriate.  */
+      if (GET_CODE (XEXP (orig, 0)) == PLUS)
+	return machopic_indirect_data_reference (XEXP (orig, 0), reg);
+      else 
+	return orig;
+    }
+  else if (GET_CODE (orig) == MEM)
+    {
+      XEXP (ptr_ref, 0) = 
+		machopic_indirect_data_reference (XEXP (orig, 0), reg);
+      return ptr_ref;
+    }
+  else if (GET_CODE (orig) == PLUS)
+    {
+      rtx base, result;
+      /* When the target is i386, this code prevents crashes due to the
+	compiler's ignorance on how to move the PIC base register to
+	other registers.  (The reload phase sometimes introduces such
+	insns.)  */
+      if (GET_CODE (XEXP (orig, 0)) == REG
+	   && REGNO (XEXP (orig, 0)) == PIC_OFFSET_TABLE_REGNUM
+	   /* Prevent the same register from being erroneously used
+	      as both the base and index registers.  */
+	   && (DARWIN_X86 && (GET_CODE (XEXP (orig, 1)) == CONST))
+	   && reg)
+	{
+	  emit_move_insn (reg, XEXP (orig, 0));
+	  XEXP (ptr_ref, 0) = reg;
+	  return ptr_ref;
+	}
+
+      /* Legitimize both operands of the PLUS.  */
+      base = machopic_indirect_data_reference (XEXP (orig, 0), reg);
+      orig = machopic_indirect_data_reference (XEXP (orig, 1),
+					       (base == reg ? 0 : reg));
+      if (MACHOPIC_INDIRECT && (GET_CODE (orig) == CONST_INT))
+	result = plus_constant (base, INTVAL (orig));
+      else
+	result = gen_rtx_PLUS (Pmode, base, orig);
+
+      if (MACHOPIC_JUST_INDIRECT && GET_CODE (base) == MEM)
+	{
+	  if (reg)
+	    {
+	      emit_move_insn (reg, result);
+	      result = reg;
+	    }
+	  else
+	    {
+	      result = force_reg (GET_MODE (result), result);
+	    }
+	}
+
+      return result;
+    }
+  return ptr_ref;
+}
+
+/* Transform TARGET (a MEM), which is a function call target, to the
+   corresponding symbol_stub if necessary.  Return a new MEM.  */
+
+rtx
+machopic_indirect_call_target (rtx target)
+{
+  if (! darwin_emit_branch_islands)
+    return target;
+
+  if (GET_CODE (target) != MEM)
+    return target;
+
+  if (MACHOPIC_INDIRECT
+      && GET_CODE (XEXP (target, 0)) == SYMBOL_REF
+      && !(SYMBOL_REF_FLAGS (XEXP (target, 0))
+	   & MACHO_SYMBOL_FLAG_DEFINED))
+    {
+      rtx sym_ref = XEXP (target, 0);
+      const char *stub_name = machopic_indirection_name (sym_ref,
+							 /*stub_p=*/true);
+      enum machine_mode mode = GET_MODE (sym_ref);
+
+      XEXP (target, 0) = gen_rtx_SYMBOL_REF (mode, stub_name);
+      SYMBOL_REF_DATA (XEXP (target, 0)) = SYMBOL_REF_DATA (sym_ref);
+      MEM_READONLY_P (target) = 1;
+      MEM_NOTRAP_P (target) = 1;
+    }
+
+  return target;
+}
+
+rtx
+machopic_legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+{
+  rtx pic_ref = orig;
+
+  if (! MACHOPIC_INDIRECT)
+    return orig;
+
+  /* First handle a simple SYMBOL_REF or LABEL_REF */
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF
+	  ))
+    {
+      /* addr(foo) = &func+(foo-func) */
+      orig = machopic_indirect_data_reference (orig, reg);
+
+      if (GET_CODE (orig) == PLUS
+	  && GET_CODE (XEXP (orig, 0)) == REG)
+	{
+	  if (reg == 0)
+	    return force_reg (mode, orig);
+
+	  emit_move_insn (reg, orig);
+	  return reg;
+	}
+
+      if (GET_CODE (orig) == MEM)
+	{
+	  if (reg == 0)
+	    {
+	      gcc_assert (!reload_in_progress);
+	      reg = gen_reg_rtx (Pmode);
+	    }
+
+#if HAVE_lo_sum
+	  if (MACHO_DYNAMIC_NO_PIC_P
+	      && (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
+		  || GET_CODE (XEXP (orig, 0)) == LABEL_REF))
+	    {
+#if defined (TARGET_TOC)	/* ppc  */
+	      rtx temp_reg = (!can_create_pseudo_p ()
+			      ? reg :
+			      gen_reg_rtx (Pmode));
+	      rtx asym = XEXP (orig, 0);
+	      rtx mem;
+
+	      emit_insn (gen_macho_high (temp_reg, asym));
+	      mem = gen_const_mem (GET_MODE (orig),
+				   gen_rtx_LO_SUM (Pmode, temp_reg,
+						   copy_rtx (asym)));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+#else
+	      /* Some other CPU -- WriteMe! but right now there are no other
+		 platforms that can use dynamic-no-pic  */
+	      gcc_unreachable ();
+#endif
+	      pic_ref = reg;
+	    }
+	  else
+	  if (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
+	      || GET_CODE (XEXP (orig, 0)) == LABEL_REF)
+	    {
+	      rtx offset = machopic_gen_offset (XEXP (orig, 0));
+#if defined (TARGET_TOC) /* i.e., PowerPC */
+	      /* Generating a new reg may expose opportunities for
+		 common subexpression elimination.  */
+              rtx hi_sum_reg = (!can_create_pseudo_p ()
+				? reg
+				: gen_reg_rtx (Pmode));
+	      rtx mem;
+	      rtx insn;
+	      rtx sum;
+
+	      sum = gen_rtx_HIGH (Pmode, offset);
+	      if (! MACHO_DYNAMIC_NO_PIC_P)
+		sum = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, sum);
+
+	      emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, sum));
+
+	      mem = gen_const_mem (GET_MODE (orig),
+				  gen_rtx_LO_SUM (Pmode,
+						  hi_sum_reg,
+						  copy_rtx (offset)));
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+	      set_unique_reg_note (insn, REG_EQUAL, pic_ref);
+
+	      pic_ref = reg;
+#else
+	      emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
+
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_HIGH (Pmode,
+						    gen_rtx_CONST (Pmode,
+								   offset))));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_LO_SUM (Pmode, reg,
+					   gen_rtx_CONST (Pmode,
+						   	  copy_rtx (offset)))));
+	      pic_ref = gen_rtx_PLUS (Pmode,
+				      pic_offset_table_rtx, reg);
+#endif
+	    }
+	  else
+#endif  /* HAVE_lo_sum */
+	    {
+	      rtx pic = pic_offset_table_rtx;
+	      if (GET_CODE (pic) != REG)
+		{
+		  emit_move_insn (reg, pic);
+		  pic = reg;
+		}
+#if 0
+	      emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
+#endif
+
+	      if (reload_in_progress)
+		df_set_regs_ever_live (REGNO (pic), true);
+	      pic_ref = gen_rtx_PLUS (Pmode, pic,
+				      machopic_gen_offset (XEXP (orig, 0)));
+	    }
+
+#if !defined (TARGET_TOC)
+	  emit_move_insn (reg, pic_ref);
+	  pic_ref = gen_const_mem (GET_MODE (orig), reg);
+#endif
+	}
+      else
+	{
+
+#if HAVE_lo_sum
+	  if (GET_CODE (orig) == SYMBOL_REF
+	      || GET_CODE (orig) == LABEL_REF)
+	    {
+	      rtx offset = machopic_gen_offset (orig);
+#if defined (TARGET_TOC) /* i.e., PowerPC */
+              rtx hi_sum_reg;
+
+	      if (reg == 0)
+		{
+		  gcc_assert (!reload_in_progress);
+		  reg = gen_reg_rtx (Pmode);
+		}
+
+	      hi_sum_reg = reg;
+
+	      emit_insn (gen_rtx_SET (Pmode, hi_sum_reg,
+				      (MACHO_DYNAMIC_NO_PIC_P)
+				      ? gen_rtx_HIGH (Pmode, offset)
+				      : gen_rtx_PLUS (Pmode,
+						      pic_offset_table_rtx,
+						      gen_rtx_HIGH (Pmode,
+								    offset))));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_LO_SUM (Pmode,
+						      hi_sum_reg,
+						      copy_rtx (offset))));
+	      pic_ref = reg;
+#else
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_HIGH (Pmode, offset)));
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				      gen_rtx_LO_SUM (Pmode, reg,
+						      copy_rtx (offset))));
+	      pic_ref = gen_rtx_PLUS (Pmode,
+				      pic_offset_table_rtx, reg);
+#endif
+	    }
+	  else
+#endif  /*  HAVE_lo_sum  */
+	    {
+	      if (REG_P (orig)
+	          || GET_CODE (orig) == SUBREG)
+		{
+		  return orig;
+		}
+	      else
+		{
+		  rtx pic = pic_offset_table_rtx;
+		  if (GET_CODE (pic) != REG)
+		    {
+		      emit_move_insn (reg, pic);
+		      pic = reg;
+		    }
+#if 0
+		  emit_use (pic_offset_table_rtx);
+#endif
+		  if (reload_in_progress)
+		    df_set_regs_ever_live (REGNO (pic), true);
+		  pic_ref = gen_rtx_PLUS (Pmode,
+					  pic,
+					  machopic_gen_offset (orig));
+		}
+	    }
+	}
+
+      if (GET_CODE (pic_ref) != REG)
+        {
+          if (reg != 0)
+            {
+              emit_move_insn (reg, pic_ref);
+              return reg;
+            }
+          else
+            {
+              return force_reg (mode, pic_ref);
+            }
+        }
+      else
+        {
+          return pic_ref;
+        }
+    }
+
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    return orig;
+
+  else if (GET_CODE (orig) == PLUS
+	   && (GET_CODE (XEXP (orig, 0)) == MEM
+	       || GET_CODE (XEXP (orig, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (orig, 0)) == LABEL_REF)
+	   && XEXP (orig, 0) != pic_offset_table_rtx
+	   && GET_CODE (XEXP (orig, 1)) != REG)
+
+    {
+      rtx base;
+      int is_complex = (GET_CODE (XEXP (orig, 0)) == MEM);
+
+      base = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
+      orig = machopic_legitimize_pic_address (XEXP (orig, 1),
+					      Pmode, (base == reg ? 0 : reg));
+      if (GET_CODE (orig) == CONST_INT)
+	{
+	  pic_ref = plus_constant (base, INTVAL (orig));
+	  is_complex = 1;
+	}
+      else
+	pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+
+      if (reg && is_complex)
+	{
+	  emit_move_insn (reg, pic_ref);
+	  pic_ref = reg;
+	}
+      /* Likewise, should we set special REG_NOTEs here?  */
+    }
+
+  else if (GET_CODE (orig) == CONST)
+    {
+      return machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
+    }
+
+  else if (GET_CODE (orig) == MEM
+	   && GET_CODE (XEXP (orig, 0)) == SYMBOL_REF)
+    {
+      rtx addr = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg);
+      addr = replace_equiv_address (orig, addr);
+      emit_move_insn (reg, addr);
+      pic_ref = reg;
+    }
+
+  return pic_ref;
+}
+
+/* Output the stub or non-lazy pointer in *SLOT, if it has been used.
+   DATA is the FILE* for assembly output.  Called from
+   htab_traverse.  */
+
+static int
+machopic_output_indirection (void **slot, void *data)
+{
+  machopic_indirection *p = *((machopic_indirection **) slot);
+  FILE *asm_out_file = (FILE *) data;
+  rtx symbol;
+  const char *sym_name;
+  const char *ptr_name;
+
+  if (!p->used)
+    return 1;
+
+  symbol = p->symbol;
+  sym_name = XSTR (symbol, 0);
+  ptr_name = p->ptr_name;
+
+  if (p->stub_p)
+    {
+      char *sym;
+      char *stub;
+      tree id;
+
+      id = maybe_get_identifier (sym_name);
+      if (id)
+	{
+	  tree id_orig = id;
+
+	  while (IDENTIFIER_TRANSPARENT_ALIAS (id))
+	    id = TREE_CHAIN (id);
+	  if (id != id_orig)
+	    sym_name = IDENTIFIER_POINTER (id);
+	}
+
+      sym = XALLOCAVEC (char, strlen (sym_name) + 2);
+      if (sym_name[0] == '*' || sym_name[0] == '&')
+	strcpy (sym, sym_name + 1);
+      else if (sym_name[0] == '-' || sym_name[0] == '+')
+	strcpy (sym, sym_name);
+      else
+	sprintf (sym, "%s%s", user_label_prefix, sym_name);
+
+      stub = XALLOCAVEC (char, strlen (ptr_name) + 2);
+      if (ptr_name[0] == '*' || ptr_name[0] == '&')
+	strcpy (stub, ptr_name + 1);
+      else
+	sprintf (stub, "%s%s", user_label_prefix, ptr_name);
+
+      machopic_output_stub (asm_out_file, sym, stub);
+    }
+  else if (! indirect_data (symbol)
+	   && (machopic_symbol_defined_p (symbol)
+	       || SYMBOL_REF_LOCAL_P (symbol)))
+    {
+      switch_to_section (data_section);
+      assemble_align (GET_MODE_ALIGNMENT (Pmode));
+      assemble_label (asm_out_file, ptr_name);
+      assemble_integer (gen_rtx_SYMBOL_REF (Pmode, sym_name),
+			GET_MODE_SIZE (Pmode),
+			GET_MODE_ALIGNMENT (Pmode), 1);
+    }
+  else
+    {
+      rtx init = const0_rtx;
+
+      switch_to_section (darwin_sections[machopic_nl_symbol_ptr_section]);
+
+      /* Mach-O symbols are passed around in code through indirect
+	 references and the original symbol_ref hasn't passed through
+	 the generic handling and reference-catching in
+	 output_operand, so we need to manually mark weak references
+	 as such.  */
+      if (SYMBOL_REF_WEAK (symbol))
+	{
+	  tree decl = SYMBOL_REF_DECL (symbol);
+	  gcc_assert (DECL_P (decl));
+
+	  if (decl != NULL_TREE
+	      && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
+	      /* Handle only actual external-only definitions, not
+		 e.g. extern inline code or variables for which
+		 storage has been allocated.  */
+	      && !TREE_STATIC (decl))
+	    {
+	      fputs ("\t.weak_reference ", asm_out_file);
+	      assemble_name (asm_out_file, sym_name);
+	      fputc ('\n', asm_out_file);
+	    }
+	}
+
+      assemble_name (asm_out_file, ptr_name);
+      fprintf (asm_out_file, ":\n");
+
+      fprintf (asm_out_file, "\t.indirect_symbol ");
+      assemble_name (asm_out_file, sym_name);
+      fprintf (asm_out_file, "\n");
+
+      /* Variables that are marked with MACHO_SYMBOL_STATIC need to
+	 have their symbol name instead of 0 in the second entry of
+	 the non-lazy symbol pointer data structure when they are
+	 defined.  This allows the runtime to rebind newer instances
+	 of the translation unit with the original instance of the
+	 symbol.  */
+
+      if ((SYMBOL_REF_FLAGS (symbol) & MACHO_SYMBOL_STATIC)
+	  && machopic_symbol_defined_p (symbol))
+	init = gen_rtx_SYMBOL_REF (Pmode, sym_name);
+
+      assemble_integer (init, GET_MODE_SIZE (Pmode),
+			GET_MODE_ALIGNMENT (Pmode), 1);
+    }
+
+  return 1;
+}
+
+void
+machopic_finish (FILE *asm_out_file)
+{
+  if (machopic_indirections)
+    htab_traverse_noresize (machopic_indirections,
+			    machopic_output_indirection,
+			    asm_out_file);
+}
+
+int
+machopic_operand_p (rtx op)
+{
+  if (MACHOPIC_JUST_INDIRECT)
+    return (GET_CODE (op) == SYMBOL_REF
+	    && machopic_symbol_defined_p (op));
+  else
+    return (GET_CODE (op) == CONST
+	    && GET_CODE (XEXP (op, 0)) == UNSPEC
+	    && XINT (XEXP (op, 0), 1) == UNSPEC_MACHOPIC_OFFSET);
+}
+
+/* This function records whether a given name corresponds to a defined
+   or undefined function or variable, for machopic_classify_ident to
+   use later.  */
+
+void
+darwin_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED)
+{
+  rtx sym_ref;
+
+  /* Do the standard encoding things first.  */
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) != FUNCTION_DECL && TREE_CODE (decl) != VAR_DECL)
+    return;
+
+  sym_ref = XEXP (rtl, 0);
+  if (TREE_CODE (decl) == VAR_DECL)
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_VARIABLE;
+
+  if (!DECL_EXTERNAL (decl)
+      && (!TREE_PUBLIC (decl) || !DECL_WEAK (decl))
+      && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (decl))
+      && ((TREE_STATIC (decl)
+	   && (!DECL_COMMON (decl) || !TREE_PUBLIC (decl)))
+	  || (!DECL_COMMON (decl) && DECL_INITIAL (decl)
+	      && DECL_INITIAL (decl) != error_mark_node)))
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED;
+
+  if (! TREE_PUBLIC (decl))
+    SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_STATIC;
+}
+
+void
+darwin_mark_decl_preserved (const char *name)
+{
+  fprintf (asm_out_file, "\t.no_dead_strip ");
+  assemble_name (asm_out_file, name);
+  fputc ('\n', asm_out_file);
+}
+
+static section *
+darwin_rodata_section (int weak, bool zsize)
+{
+  return (weak
+	  ? darwin_sections[const_coal_section]
+	  : (zsize ? darwin_sections[zobj_const_section]
+		   : darwin_sections[const_section]));
+}
+
+static section *
+darwin_mergeable_string_section (tree exp,
+				 unsigned HOST_WIDE_INT align)
+{
+  /* Darwin's ld expects to see non-writable string literals in the .cstring 
+     section.  Later versions of ld check and complain when CFStrings are 
+     enabled.  Therefore we shall force the strings into .cstring since we
+     don't support writable ones anyway.  */
+  if ((darwin_constant_cfstrings || flag_merge_constants)
+      && TREE_CODE (exp) == STRING_CST
+      && TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE
+      && align <= 256
+      && (int_size_in_bytes (TREE_TYPE (exp))
+	  == TREE_STRING_LENGTH (exp))
+      && ((size_t) TREE_STRING_LENGTH (exp)
+	  == strlen (TREE_STRING_POINTER (exp)) + 1))
+    return darwin_sections[cstring_section];
+
+  if (DARWIN_SECTION_ANCHORS && flag_section_anchors
+      && TREE_CODE (exp) == STRING_CST
+      && TREE_STRING_LENGTH (exp) == 0)
+    return darwin_sections[zobj_const_section];
+
+  return readonly_data_section;
+}
+
+#ifndef HAVE_GAS_LITERAL16
+#define HAVE_GAS_LITERAL16 0
+#endif
+
+static section *
+darwin_mergeable_constant_section (tree exp,
+				   unsigned HOST_WIDE_INT align,
+				   bool zsize)
+{
+  enum machine_mode mode = DECL_MODE (exp);
+  unsigned int modesize = GET_MODE_BITSIZE (mode);
+
+  if (DARWIN_SECTION_ANCHORS 
+      && flag_section_anchors 
+      && zsize)
+    return darwin_sections[zobj_const_section];
+
+  if (flag_merge_constants
+      && mode != VOIDmode
+      && mode != BLKmode
+      && modesize <= align
+      && align >= 8
+      && align <= 256
+      && (align & (align -1)) == 0)
+    {
+      tree size = TYPE_SIZE_UNIT (TREE_TYPE (exp));
+
+      if (TREE_CODE (size) == INTEGER_CST
+	  && TREE_INT_CST_LOW (size) == 4
+	  && TREE_INT_CST_HIGH (size) == 0)
+        return darwin_sections[literal4_section];
+      else if (TREE_CODE (size) == INTEGER_CST
+	       && TREE_INT_CST_LOW (size) == 8
+	       && TREE_INT_CST_HIGH (size) == 0)
+        return darwin_sections[literal8_section];
+      else if (HAVE_GAS_LITERAL16
+	       && TARGET_64BIT
+               && TREE_CODE (size) == INTEGER_CST
+               && TREE_INT_CST_LOW (size) == 16
+               && TREE_INT_CST_HIGH (size) == 0)
+        return darwin_sections[literal16_section];
+      else
+        return readonly_data_section;
+    }
+
+  return readonly_data_section;
+}
+
+int
+machopic_reloc_rw_mask (void)
+{
+  return MACHOPIC_INDIRECT ? 3 : 0;
+}
+
+/* We have to deal with ObjC/C++ metadata section placement in the common
+   code, since it will also be called from LTO.
+   
+   Return metadata attributes, if present (searching for ABI=2 first)
+   Return NULL_TREE if no such attributes are found.  */
+
+static tree
+is_objc_metadata (tree decl)
+{
+  if (DECL_P (decl) 
+      && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL)
+      && DECL_ATTRIBUTES (decl))
+    {
+      tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return meta;
+      meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return meta;
+    }
+  return NULL_TREE;
+}
+
+/* Return the section required for Objective C ABI 2 metadata.  */
+static section *
+darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
+{
+  const char *p;
+  tree ident = TREE_VALUE (meta);
+  gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
+  p = IDENTIFIER_POINTER (ident);
+
+  /* If we are in LTO, then we don't know the state of flag_next_runtime
+     or flag_objc_abi when the code was generated.  We set these from the
+     meta-data - which is needed to deal with const string constructors.  */
+
+  flag_next_runtime = 1;
+  flag_objc_abi = 2;
+
+  if (base == data_section)
+    base = darwin_sections[objc2_metadata_section];
+
+  /* Most of the OBJC2 META-data end up in the base section, so check it
+     first.  */
+  if      (!strncmp (p, "V2_BASE", 7))
+    return base;
+  else if (!strncmp (p, "V2_STRG", 7))
+    return darwin_sections[cstring_section];
+
+  else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7))
+    return darwin_sections[objc2_classdefs_section];
+  else if (!strncmp (p, "V2_MREF", 7))
+    return darwin_sections[objc2_message_refs_section];
+  else if (!strncmp (p, "V2_CLRF", 7))
+    return darwin_sections[objc2_classrefs_section];
+  else if (!strncmp (p, "V2_SURF", 7))
+    return darwin_sections[objc2_super_classrefs_section];
+  else if (!strncmp (p, "V2_NLCL", 7))
+    return darwin_sections[objc2_nonlazy_class_section];
+  else if (!strncmp (p, "V2_CLAB", 7))
+    return darwin_sections[objc2_classlist_section];
+  else if (!strncmp (p, "V2_SRFS", 7))
+    return darwin_sections[objc2_selector_refs_section];
+  else if (!strncmp (p, "V2_NLCA", 7))
+    return darwin_sections[objc2_nonlazy_category_section];
+  else if (!strncmp (p, "V2_CALA", 7))
+    return darwin_sections[objc2_categorylist_section];
+
+  else if (!strncmp (p, "V2_PLST", 7))
+    return darwin_sections[objc2_protocollist_section];
+  else if (!strncmp (p, "V2_PRFS", 7))
+    return darwin_sections[objc2_protocolrefs_section];
+
+  else if (!strncmp (p, "V2_INFO", 7))
+    return darwin_sections[objc2_image_info_section];
+
+  else if (!strncmp (p, "V2_EHTY", 7))
+    return darwin_sections[data_coal_section];
+
+  else if (!strncmp (p, "V2_CSTR", 7))
+    return darwin_sections[objc2_constant_string_object_section];
+
+  /* Not recognized, default.  */
+  return base;
+}
+
+/* Return the section required for Objective C ABI 0/1 metadata.  */
+static section *
+darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
+{
+  const char *p;
+  tree ident = TREE_VALUE (meta);
+  gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
+  p = IDENTIFIER_POINTER (ident);
+
+  /* If we are in LTO, then we don't know the state of flag_next_runtime
+     or flag_objc_abi when the code was generated.  We set these from the
+     meta-data - which is needed to deal with const string constructors.  */
+  flag_next_runtime = 1;
+  if (!global_options_set.x_flag_objc_abi)
+    flag_objc_abi = 1;
+
+  /* String sections first, cos there are lots of strings.  */
+  if      (!strncmp (p, "V1_STRG", 7))
+    return darwin_sections[cstring_section];
+  else if (!strncmp (p, "V1_CLSN", 7))
+    return darwin_sections[objc_class_names_section];
+  else if (!strncmp (p, "V1_METN", 7))
+    return darwin_sections[objc_meth_var_names_section];
+  else if (!strncmp (p, "V1_METT", 7))
+    return darwin_sections[objc_meth_var_types_section];
+
+  else if (!strncmp (p, "V1_CLAS", 7))
+    return darwin_sections[objc_class_section];
+  else if (!strncmp (p, "V1_META", 7))
+    return darwin_sections[objc_meta_class_section];
+  else if (!strncmp (p, "V1_CATG", 7))
+    return darwin_sections[objc_category_section];
+  else if (!strncmp (p, "V1_PROT", 7))
+    return darwin_sections[objc_protocol_section];
+
+  else if (!strncmp (p, "V1_CLCV", 7))
+    return darwin_sections[objc_class_vars_section];
+  else if (!strncmp (p, "V1_CLIV", 7))
+    return darwin_sections[objc_instance_vars_section];
+
+  else if (!strncmp (p, "V1_CLCM", 7))
+    return darwin_sections[objc_cls_meth_section];
+  else if (!strncmp (p, "V1_CLIM", 7))
+    return darwin_sections[objc_inst_meth_section];
+  else if (!strncmp (p, "V1_CACM", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+  else if (!strncmp (p, "V1_CAIM", 7))
+    return darwin_sections[objc_cat_inst_meth_section];
+  else if (!strncmp (p, "V1_PNSM", 7))
+    return darwin_sections[objc_cat_inst_meth_section];
+  else if (!strncmp (p, "V1_PCLM", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+
+  else if (!strncmp (p, "V1_CLPR", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+  else if (!strncmp (p, "V1_CAPR", 7))
+    return darwin_sections[objc_category_section]; /* ??? CHECK me.  */
+
+  else if (!strncmp (p, "V1_PRFS", 7))
+    return darwin_sections[objc_cat_cls_meth_section];
+  else if (!strncmp (p, "V1_CLRF", 7))
+    return darwin_sections[objc_cls_refs_section];
+  else if (!strncmp (p, "V1_SRFS", 7))
+    return darwin_sections[objc_selector_refs_section];
+
+  else if (!strncmp (p, "V1_MODU", 7))
+    return darwin_sections[objc_module_info_section];
+  else if (!strncmp (p, "V1_SYMT", 7))
+    return darwin_sections[objc_symbols_section];
+  else if (!strncmp (p, "V1_INFO", 7))
+    return darwin_sections[objc_image_info_section];
+
+  else if (!strncmp (p, "V1_PLST", 7))
+    return darwin_sections[objc1_prop_list_section];
+  else if (!strncmp (p, "V1_PEXT", 7))
+    return darwin_sections[objc1_protocol_ext_section];
+  else if (!strncmp (p, "V1_CEXT", 7))
+    return darwin_sections[objc1_class_ext_section];
+
+  else if (!strncmp (p, "V2_CSTR", 7))
+    return darwin_sections[objc_constant_string_object_section];
+
+  return base;
+}
+
+section *
+machopic_select_section (tree decl,
+			 int reloc,
+			 unsigned HOST_WIDE_INT align)
+{
+  bool zsize, one, weak, ro;
+  section *base_section = NULL;
+
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", DECL_ATTRIBUTES (decl)));
+
+  zsize = (DECL_P (decl) 
+	   && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) 
+	   && tree_low_cst (DECL_SIZE_UNIT (decl), 1) == 0);
+
+  one = DECL_P (decl) 
+	&& TREE_CODE (decl) == VAR_DECL 
+	&& DECL_ONE_ONLY (decl);
+
+  ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ;
+
+  switch (categorize_decl_for_section (decl, reloc))
+    {
+    case SECCAT_TEXT:
+      gcc_unreachable ();
+      break;
+
+    case SECCAT_RODATA:
+    case SECCAT_SRODATA:
+      base_section = darwin_rodata_section (weak, zsize);
+      break;
+
+    case SECCAT_RODATA_MERGE_STR:
+      base_section = darwin_mergeable_string_section (decl, align);
+      break;
+
+    case SECCAT_RODATA_MERGE_STR_INIT:
+      base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align);
+      break;
+
+    case SECCAT_RODATA_MERGE_CONST:
+      base_section =  darwin_mergeable_constant_section (decl, align, zsize);
+      break;
+
+    case SECCAT_DATA:
+    case SECCAT_DATA_REL:
+    case SECCAT_DATA_REL_LOCAL:
+    case SECCAT_DATA_REL_RO:
+    case SECCAT_DATA_REL_RO_LOCAL:
+    case SECCAT_SDATA:
+    case SECCAT_TDATA:
+      if (weak || one)
+	{
+	  if (ro)
+	    base_section = darwin_sections[const_data_coal_section];
+	  else 
+	    base_section = darwin_sections[data_coal_section];
+	}
+      else if (DARWIN_SECTION_ANCHORS 
+	       && flag_section_anchors
+	       && zsize)
+	{
+	  /* If we're doing section anchors, then punt zero-sized objects into
+	     their own sections so that they don't interfere with offset
+	     computation for the remaining vars.  This does not need to be done
+	     for stuff in mergeable sections, since these are ineligible for 
+	     anchors.  */
+	  if (ro)
+	    base_section = darwin_sections[zobj_const_data_section];
+	  else
+	    base_section = darwin_sections[zobj_data_section];
+	}
+      else if (ro)
+	base_section = darwin_sections[const_data_section];
+      else
+	base_section = data_section;
+      break;
+    case SECCAT_BSS:
+    case SECCAT_SBSS:
+    case SECCAT_TBSS:
+      if (weak || one) 
+	base_section = darwin_sections[data_coal_section];
+      else
+	{
+	  if (!TREE_PUBLIC (decl))
+	    base_section = lcomm_section;
+	  else if (bss_noswitch_section)
+	    base_section = bss_noswitch_section;
+	  else
+	    base_section = data_section;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Darwin weird special cases.  
+     a) OBJC Meta-data. */
+  if (DECL_P (decl) 
+      && (TREE_CODE (decl) == VAR_DECL 
+	  || TREE_CODE (decl) == CONST_DECL)
+      && DECL_ATTRIBUTES (decl))
+    {
+      tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return darwin_objc2_section (decl, meta, base_section);
+      meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return darwin_objc1_section (decl, meta, base_section);
+      meta = lookup_attribute ("OBJC1METG", DECL_ATTRIBUTES (decl));
+      if (meta)
+	return base_section; /* GNU runtime is happy with it all in one pot.  */
+    }
+
+  /* b) Constant string objects.  */
+  if (TREE_CODE (decl) == CONSTRUCTOR
+      && TREE_TYPE (decl)
+      && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE
+      && TYPE_NAME (TREE_TYPE (decl)))
+    {
+      tree name = TYPE_NAME (TREE_TYPE (decl));
+      if (TREE_CODE (name) == TYPE_DECL)
+        name = DECL_NAME (name);
+
+      /* FIXME: This is unsatisfactory for LTO, since it relies on other
+	 metadata determining the source FE.  */
+      if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_ObjCString"))
+	{
+	  if (flag_next_runtime)
+	    {
+	      if (flag_objc_abi == 2)
+		return darwin_sections[objc2_constant_string_object_section];
+	      else
+		return darwin_sections[objc_constant_string_object_section];
+	    }
+	  else
+	    return darwin_sections[objc_string_object_section];
+	}
+      else if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_CFString"))
+	return darwin_sections[cfstring_constant_object_section];
+      else
+	return base_section;
+    }
+  /* c) legacy meta-data selection.  */
+  else if (TREE_CODE (decl) == VAR_DECL
+	   && DECL_NAME (decl)
+	   && TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE
+	   && IDENTIFIER_POINTER (DECL_NAME (decl))
+	   && flag_next_runtime
+	   && !strncmp (IDENTIFIER_POINTER (DECL_NAME (decl)), "_OBJC_", 6))
+    {
+      const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
+      static bool warned_objc_46 = false;
+      /* We shall assert that zero-sized objects are an error in ObjC 
+         meta-data.  */
+      gcc_assert (tree_low_cst (DECL_SIZE_UNIT (decl), 1) != 0);
+      
+      /* ??? This mechanism for determining the metadata section is
+	 broken when LTO is in use, since the frontend that generated
+	 the data is not identified.  We will keep the capability for
+	 the short term - in case any non-Objective-C programs are using
+	 it to place data in specified sections.  */
+      if (!warned_objc_46)
+	{
+	  location_t loc = DECL_SOURCE_LOCATION (decl);
+	  warning_at (loc, 0, "the use of _OBJC_-prefixed variable names"
+		      " to select meta-data sections is deprecated at 4.6"
+		      " and will be removed in 4.7");
+	  warned_objc_46 = true;
+	}
+      
+      if (!strncmp (name, "_OBJC_CLASS_METHODS_", 20))
+        return darwin_sections[objc_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_INSTANCE_METHODS_", 23))
+        return darwin_sections[objc_inst_meth_section];
+      else if (!strncmp (name, "_OBJC_CATEGORY_CLASS_METHODS_", 29))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_CATEGORY_INSTANCE_METHODS_", 32))
+        return darwin_sections[objc_cat_inst_meth_section];
+      else if (!strncmp (name, "_OBJC_CLASS_VARIABLES_", 22))
+        return darwin_sections[objc_class_vars_section];
+      else if (!strncmp (name, "_OBJC_INSTANCE_VARIABLES_", 25))
+        return darwin_sections[objc_instance_vars_section];
+      else if (!strncmp (name, "_OBJC_CLASS_PROTOCOLS_", 22))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_CLASS_NAME_", 17))
+        return darwin_sections[objc_class_names_section];
+      else if (!strncmp (name, "_OBJC_METH_VAR_NAME_", 20))
+        return darwin_sections[objc_meth_var_names_section];
+      else if (!strncmp (name, "_OBJC_METH_VAR_TYPE_", 20))
+        return darwin_sections[objc_meth_var_types_section];
+      else if (!strncmp (name, "_OBJC_CLASS_REFERENCES", 22))
+        return darwin_sections[objc_cls_refs_section];
+      else if (!strncmp (name, "_OBJC_CLASS_", 12))
+        return darwin_sections[objc_class_section];
+      else if (!strncmp (name, "_OBJC_METACLASS_", 16))
+        return darwin_sections[objc_meta_class_section];
+      else if (!strncmp (name, "_OBJC_CATEGORY_", 15))
+        return darwin_sections[objc_category_section];
+      else if (!strncmp (name, "_OBJC_SELECTOR_REFERENCES", 25))
+        return darwin_sections[objc_selector_refs_section];
+      else if (!strncmp (name, "_OBJC_SELECTOR_FIXUP", 20))
+        return darwin_sections[objc_selector_fixup_section];
+      else if (!strncmp (name, "_OBJC_SYMBOLS", 13))
+        return darwin_sections[objc_symbols_section];
+      else if (!strncmp (name, "_OBJC_MODULES", 13))
+        return darwin_sections[objc_module_info_section];
+      else if (!strncmp (name, "_OBJC_IMAGE_INFO", 16))
+        return darwin_sections[objc_image_info_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_INSTANCE_METHODS_", 32))
+        return darwin_sections[objc_cat_inst_meth_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_CLASS_METHODS_", 29))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_REFS_", 20))
+        return darwin_sections[objc_cat_cls_meth_section];
+      else if (!strncmp (name, "_OBJC_PROTOCOL_", 15))
+        return darwin_sections[objc_protocol_section];
+      else
+        return base_section;
+    }
+
+  return base_section;
+}
+
+/* This can be called with address expressions as "rtx".
+   They must go in "const".  */
+
+section *
+machopic_select_rtx_section (enum machine_mode mode, rtx x,
+			     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_SIZE (mode) == 8
+      && (GET_CODE (x) == CONST_INT
+	  || GET_CODE (x) == CONST_DOUBLE))
+    return darwin_sections[literal8_section];
+  else if (GET_MODE_SIZE (mode) == 4
+	   && (GET_CODE (x) == CONST_INT
+	       || GET_CODE (x) == CONST_DOUBLE))
+    return darwin_sections[literal4_section];
+  else if (HAVE_GAS_LITERAL16
+	   && TARGET_64BIT
+	   && GET_MODE_SIZE (mode) == 16
+	   && (GET_CODE (x) == CONST_INT
+	       || GET_CODE (x) == CONST_DOUBLE
+	       || GET_CODE (x) == CONST_VECTOR))
+    return darwin_sections[literal16_section];
+  else if (MACHOPIC_INDIRECT
+	   && (GET_CODE (x) == SYMBOL_REF
+	       || GET_CODE (x) == CONST
+	       || GET_CODE (x) == LABEL_REF))
+    return darwin_sections[const_data_section];
+  else
+    return darwin_sections[const_section];
+}
+
+void
+machopic_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  if (MACHOPIC_INDIRECT)
+    switch_to_section (darwin_sections[mod_init_section]);
+  else
+    switch_to_section (darwin_sections[constructor_section]);
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+
+  if (! MACHOPIC_INDIRECT)
+    fprintf (asm_out_file, ".reference .constructors_used\n");
+}
+
+void
+machopic_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  if (MACHOPIC_INDIRECT)
+    switch_to_section (darwin_sections[mod_term_section]);
+  else
+    switch_to_section (darwin_sections[destructor_section]);
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+
+  if (! MACHOPIC_INDIRECT)
+    fprintf (asm_out_file, ".reference .destructors_used\n");
+}
+
+void
+darwin_globalize_label (FILE *stream, const char *name)
+{
+  if (!!strncmp (name, "_OBJC_", 6))
+    default_globalize_label (stream, name);
+}
+
+/* This routine returns non-zero if 'name' starts with the special objective-c 
+   anonymous file-scope static name.  It accommodates c++'s mangling of such 
+   symbols (in this case the symbols will have form _ZL{d}*_OBJC_* d=digit).  */
+   
+int 
+darwin_label_is_anonymous_local_objc_name (const char *name)
+{
+  const unsigned char *p = (const unsigned char *) name;
+  if (*p != '_')
+    return 0;
+  if (p[1] == 'Z' && p[2] == 'L')
+  {
+    p += 3;
+    while (*p >= '0' && *p <= '9')
+      p++;
+  }
+  return (!strncmp ((const char *)p, "_OBJC_", 6));
+}
+
+/* LTO support for Mach-O.
+
+   This version uses three mach-o sections to encapsulate the (unlimited
+   number of) lto sections.
+
+   __GNU_LTO, __lto_sections  contains the concatented GNU LTO section data.
+   __GNU_LTO, __section_names contains the GNU LTO section names.
+   __GNU_LTO, __section_index contains an array of values that index these.
+
+   Indexed thus:
+     <section offset from the start of __GNU_LTO, __lto_sections>,
+     <section length>
+     <name offset from the start of __GNU_LTO, __section_names,
+     <name length>.
+
+   At present, for both m32 and m64 mach-o files each of these fields is
+   represented  by a uint32_t.  This is because, AFAICT, a mach-o object
+   cannot exceed 4Gb because the section_64 offset field (see below) is 32bits.
+
+    uint32_t offset;
+   "offset  An integer specifying the offset to this section in the file."  */
+
+/* Count lto section numbers.  */
+static unsigned int lto_section_num = 0;
+
+/* A vector of information about LTO sections, at present, we only have
+   the name.  TODO: see if we can get the data length somehow.  */
+typedef struct GTY (()) darwin_lto_section_e {
+  const char *sectname;
+} darwin_lto_section_e ;
+DEF_VEC_O(darwin_lto_section_e);
+DEF_VEC_ALLOC_O(darwin_lto_section_e, gc);
+
+static GTY (()) VEC (darwin_lto_section_e, gc) * lto_section_names;
+
+/* Segment for LTO data.  */
+#define LTO_SEGMENT_NAME "__GNU_LTO"
+
+/* Section wrapper scheme (used here to wrap the unlimited number of LTO
+   sections into three Mach-O ones).
+   NOTE: These names MUST be kept in sync with those in
+	 libiberty/simple-object-mach-o.  */
+#define LTO_SECTS_SECTION "__wrapper_sects"
+#define LTO_NAMES_SECTION "__wrapper_names"
+#define LTO_INDEX_SECTION "__wrapper_index"
+
+/* File to temporarily store LTO data.  This is appended to asm_out_file
+   in darwin_end_file.  */
+static FILE *lto_asm_out_file, *saved_asm_out_file;
+static char *lto_asm_out_name;
+
+/* Prepare asm_out_file for LTO output.  For darwin, this means hiding
+   asm_out_file and switching to an alternative output file.  */
+void
+darwin_asm_lto_start (void)
+{
+  gcc_assert (! saved_asm_out_file);
+  saved_asm_out_file = asm_out_file;
+  if (! lto_asm_out_name)
+    lto_asm_out_name = make_temp_file (".lto.s");
+  lto_asm_out_file = fopen (lto_asm_out_name, "a");
+  if (lto_asm_out_file == NULL)
+    fatal_error ("failed to open temporary file %s for LTO output",
+		 lto_asm_out_name);
+  asm_out_file = lto_asm_out_file;
+}
+
+/* Restore asm_out_file.  */
+void
+darwin_asm_lto_end (void)
+{
+  gcc_assert (saved_asm_out_file);
+  fclose (lto_asm_out_file);
+  asm_out_file = saved_asm_out_file;
+  saved_asm_out_file = NULL;
+}
+
+static void
+darwin_asm_dwarf_section (const char *name, unsigned int flags, tree decl);
+
+/*  Called for the TARGET_ASM_NAMED_SECTION hook.  */
+
+void
+darwin_asm_named_section (const char *name,
+			  unsigned int flags,
+			  tree decl ATTRIBUTE_UNUSED)
+{
+  /* LTO sections go in a special section that encapsulates the (unlimited)
+     number of GNU LTO sections within a single mach-o one.  */
+  if (strncmp (name, LTO_SECTION_NAME_PREFIX,
+	       strlen (LTO_SECTION_NAME_PREFIX)) == 0)
+    {
+      darwin_lto_section_e e;
+      /* We expect certain flags to be set...  */
+      gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED))
+		  == (SECTION_DEBUG | SECTION_NAMED));
+
+      /* Switch to our combined section.  */
+      fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+	       LTO_SEGMENT_NAME, LTO_SECTS_SECTION);
+      /* Output a label for the start of this sub-section.  */
+      fprintf (asm_out_file, "L_GNU_LTO%d:\t;# %s\n",
+	       lto_section_num, name);
+      /* We have to jump through hoops to get the values of the intra-section
+         offsets... */
+      fprintf (asm_out_file, "\t.set L$gnu$lto$offs%d,L_GNU_LTO%d-L_GNU_LTO0\n",
+	       lto_section_num, lto_section_num);
+      fprintf (asm_out_file,
+	       "\t.set L$gnu$lto$size%d,L_GNU_LTO%d-L_GNU_LTO%d\n",
+	       lto_section_num, lto_section_num+1, lto_section_num);
+      lto_section_num++;
+      e.sectname = xstrdup (name);
+      /* Keep the names, we'll need to make a table later.
+         TODO: check that we do not revisit sections, that would break
+         the assumption of how this is done.  */
+      if (lto_section_names == NULL)
+        lto_section_names = VEC_alloc (darwin_lto_section_e, gc, 16);
+      VEC_safe_push (darwin_lto_section_e, gc, lto_section_names, &e);
+   }
+  else if (strncmp (name, "__DWARF,", 8) == 0)
+    darwin_asm_dwarf_section (name, flags, decl);
+  else
+    fprintf (asm_out_file, "\t.section %s\n", name);
+}
+
+void
+darwin_unique_section (tree decl ATTRIBUTE_UNUSED, int reloc ATTRIBUTE_UNUSED)
+{
+  /* Darwin does not use unique sections.  */
+}
+
+/* Handle __attribute__ ((apple_kext_compatibility)).
+   This only applies to darwin kexts for 2.95 compatibility -- it shrinks the
+   vtable for classes with this attribute (and their descendants) by not
+   outputting the new 3.0 nondeleting destructor.  This means that such
+   objects CANNOT be allocated on the stack or as globals UNLESS they have
+   a completely empty `operator delete'.
+   Luckily, this fits in with the Darwin kext model.
+
+   This attribute also disables gcc3's potential overlaying of derived
+   class data members on the padding at the end of the base class.  */
+
+tree
+darwin_handle_kext_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  /* APPLE KEXT stuff -- only applies with pure static C++ code.  */
+  if (! TARGET_KEXTABI)
+    {
+      warning (0, "%qE 2.95 vtable-compatibility attribute applies "
+	       "only when compiling a kext", name);
+
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != RECORD_TYPE)
+    {
+      warning (0, "%qE 2.95 vtable-compatibility attribute applies "
+	       "only to C++ classes", name);
+
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "weak_import" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+tree
+darwin_handle_weak_import_attribute (tree *node, tree name,
+				     tree ARG_UNUSED (args),
+				     int ARG_UNUSED (flags),
+				     bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL && TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+  else
+    declare_weak (*node);
+
+  return NULL_TREE;
+}
+
+/* Emit a label for an FDE, making it global and/or weak if appropriate.
+   The third parameter is nonzero if this is for exception handling.
+   The fourth parameter is nonzero if this is just a placeholder for an
+   FDE that we are omitting. */
+
+void
+darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty)
+{
+  char *lab ;
+  char buf[32];
+  static int invok_count = 0;
+  static tree last_fun_decl = NULL_TREE;
+  
+  /* We use the linker to emit the .eh labels for Darwin 9 and above.  */
+  if (! for_eh || generating_for_darwin_version >= 9)
+    return;
+
+  /* FIXME: This only works when the eh for all sections of a function is 
+     emitted at the same time.  If that changes, we would need to use a lookup
+     table of some form to determine what to do.  Also, we should emit the
+     unadorned label for the partition containing the public label for a
+     function.  This is of limited use, probably, since we do not currently
+     enable partitioning.  */
+  strcpy (buf, ".eh");
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL) 
+    {
+      if (decl == last_fun_decl)
+        {
+	  invok_count++;
+	  snprintf (buf, 31, "$$part$$%d.eh", invok_count);
+	}
+      else
+	{
+	  last_fun_decl = decl;
+	  invok_count = 0;
+	}
+    }
+
+  lab = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), buf, NULL);
+
+  if (TREE_PUBLIC (decl))
+    {
+      targetm.asm_out.globalize_label (file, lab);
+      if (DECL_VISIBILITY (decl) == VISIBILITY_HIDDEN)
+	{
+	  fputs ("\t.private_extern ", file);
+	  assemble_name (file, lab);
+	  fputc ('\n', file);
+	}
+    }
+
+  if (DECL_WEAK (decl))
+    {
+      fputs ("\t.weak_definition ", file);
+      assemble_name (file, lab);
+      fputc ('\n', file);
+    }
+
+  assemble_name (file, lab);
+  if (empty)
+    {
+      fputs (" = 0\n", file);
+
+      /* Mark the absolute .eh and .eh1 style labels as needed to
+	 ensure that we don't dead code strip them and keep such
+	 labels from another instantiation point until we can fix this
+	 properly with group comdat support.  */
+      darwin_mark_decl_preserved (lab);
+    }
+  else
+    fputs (":\n", file);
+
+  free (lab);
+}
+
+static GTY(()) unsigned long except_table_label_num;
+
+void
+darwin_emit_except_table_label (FILE *file)
+{
+  char section_start_label[30];
+
+  ASM_GENERATE_INTERNAL_LABEL (section_start_label, "GCC_except_table",
+			       except_table_label_num++);
+  ASM_OUTPUT_LABEL (file, section_start_label);
+}
+/* Generate a PC-relative reference to a Mach-O non-lazy-symbol.  */
+
+void
+darwin_non_lazy_pcrel (FILE *file, rtx addr)
+{
+  const char *nlp_name;
+
+  gcc_assert (GET_CODE (addr) == SYMBOL_REF);
+
+  nlp_name = machopic_indirection_name (addr, /*stub_p=*/false);
+  fputs ("\t.long\t", file);
+  ASM_OUTPUT_LABELREF (file, nlp_name);
+  fputs ("-.", file);
+}
+
+/* If this is uncommented, details of each allocation will be printed
+   in the asm right before the actual code.  WARNING - this will cause some
+   test-suite fails (since the printout will contain items that some tests
+   are not expecting) -- so don't leave it on by default (it bloats the
+   asm too).  */
+/*#define DEBUG_DARWIN_MEM_ALLOCATORS*/
+
+/* The first two of these routines are ostensibly just intended to put
+   names into the asm.  However, they are both hijacked in order to ensure
+   that zero-sized items do not make their way into the output.  Consequently,
+   we also need to make these participate in provisions for dealing with
+   such items in section anchors.  */
+
+/* The implementation of ASM_DECLARE_OBJECT_NAME.  */
+/* The RTTI data (e.g., __ti4name) is common and public (and static),
+   but it does need to be referenced via indirect PIC data pointers.
+   The machopic_define_symbol calls are telling the machopic subsystem
+   that the name *is* defined in this module, so it doesn't need to
+   make them indirect.  */
+void 
+darwin_asm_declare_object_name (FILE *file, 
+				const char *nam, tree decl)
+{
+  const char *xname = nam;
+  unsigned HOST_WIDE_INT size;
+  bool local_def, weak;
+
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+  local_def = DECL_INITIAL (decl) || (TREE_STATIC (decl) 
+				      && (!DECL_COMMON (decl) 
+					  || !TREE_PUBLIC (decl)));
+
+  if (GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
+    xname = IDENTIFIER_POINTER (DECL_NAME (decl));
+
+  if (local_def)
+    {
+      (* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
+      if (!weak)
+	machopic_define_symbol (DECL_RTL (decl));
+    }
+
+  size = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d"
+	       " stat %d com %d pub %d t-const %d t-ro %d init %lx\n",
+	xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"), 
+	(unsigned long long)size, DECL_ALIGN (decl), local_def, 
+	DECL_WEAK (decl), TREE_STATIC (decl), DECL_COMMON (decl),
+	TREE_PUBLIC (decl), TREE_CONSTANT (decl), TREE_READONLY (decl),
+	(unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* Darwin needs help to support local zero-sized objects. 
+     They must be made at least one byte, and the section containing must be
+     marked as unsuitable for section-anchors (see storage allocators below).
+     
+     For non-zero objects this output is handled by varasm.c.
+  */
+  if (!size)
+    {
+      unsigned int l2align = 0;
+
+      /* The align must be honored, even for zero-sized.  */
+      if (DECL_ALIGN (decl))
+	{
+	  l2align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT);
+	  fprintf (file, "\t.align\t%u\n", l2align);
+	}
+
+      ASM_OUTPUT_LABEL (file, xname);
+      size = 1;
+      fprintf (file, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+
+      /* Check that we've correctly picked up the zero-sized item and placed it
+         properly.  */
+      gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
+		  || (in_section 
+		      && (in_section->common.flags & SECTION_NO_ANCHOR)));
+    }
+  else
+    ASM_OUTPUT_LABEL (file, xname);
+}
+
+/* The implementation of ASM_DECLARE_CONSTANT_NAME.  */
+void
+darwin_asm_declare_constant_name (FILE *file, const char *name,
+				  const_tree exp ATTRIBUTE_UNUSED,
+				  HOST_WIDE_INT size)
+{
+  assemble_label (file, name);
+  /* As for other items, we need at least one byte.  */
+  if (!size)
+    {
+      fputs ("\t.space\t1\n", file);
+      /* Check that we've correctly picked up the zero-sized item and placed it
+         properly.  */
+      gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
+		  || (in_section 
+		      && (in_section->common.flags & SECTION_NO_ANCHOR)));
+    }
+}
+
+/* Darwin storage allocators.
+
+   Zerofill sections are desirable for large blank data since, otherwise, these
+   data bloat objects (PR33210).
+
+   However, section anchors don't work in .zerofill sections (one cannot switch
+   to a zerofill section).  Ergo, for Darwin targets using section anchors we need
+   to put (at least some) data into 'normal' switchable sections.
+
+   Here we set a relatively arbitrary value for the size of an object to trigger
+   zerofill when section anchors are enabled (anything bigger than a page for
+   current Darwin implementations).  FIXME: there ought to be some objective way
+   to make this choice.
+
+   When section anchor are off this is ignored anyway.  */
+
+#define BYTES_ZFILL 4096
+
+/* Emit a chunk of data for items coalesced by the linker.  */
+static void
+darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name,
+				  unsigned HOST_WIDE_INT size, 
+				  unsigned int align)
+{
+  /* Since the sections used here are coalesed, they will not be eligible
+     for section anchors, and therefore we don't need to break that out.  */
+ if (TREE_READONLY (decl) || TREE_CONSTANT (decl))
+    switch_to_section (darwin_sections[const_data_coal_section]);
+  else
+    switch_to_section (darwin_sections[data_coal_section]);
+
+  /* To be consistent, we'll allow darwin_asm_declare_object_name to assemble
+     the align info for zero-sized items... but do it here otherwise.  */
+  if (size && align)
+    fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT));
+
+  if (TREE_PUBLIC (decl))
+    darwin_globalize_label (fp, name);
+
+  /* ... and we let it deal with outputting one byte of zero for them too.  */ 
+  darwin_asm_declare_object_name (fp, name, decl);
+  if (size)
+    assemble_zeros (size);
+}
+
+/* Emit a chunk of data for ObjC meta-data that got placed in BSS erroneously.  */
+static void
+darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name,
+				  unsigned HOST_WIDE_INT size, 
+				  unsigned int align, tree meta)
+{
+  section *ocs = data_section;
+
+  if (TREE_PURPOSE (meta) == get_identifier("OBJC2META"))
+    ocs = darwin_objc2_section (decl, meta, ocs);
+  else
+    ocs = darwin_objc1_section (decl, meta, ocs);
+
+  switch_to_section (ocs);
+
+  /* We shall declare that zero-sized meta-data are not valid (yet).  */
+  gcc_assert (size);
+  fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT));
+
+  /* ... and we let it deal with outputting one byte of zero for them too.  */ 
+  darwin_asm_declare_object_name (fp, name, decl);
+  assemble_zeros (size);
+}
+
+/* This routine emits 'local' storage:
+
+   When Section Anchors are off this routine emits .zerofill commands in 
+   sections named for their alignment.
+
+   When Section Anchors are on, smaller (non-zero-sized) items are placed in
+   the .static_data section so that the section anchoring system can see them.
+   Larger items are still placed in .zerofill sections, addressing PR33210.
+   The routine has no checking - it is all assumed to be done by the caller.
+*/
+static void
+darwin_emit_local_bss (FILE *fp, tree decl, const char *name, 
+			unsigned HOST_WIDE_INT size, 
+			unsigned int l2align)
+{
+   /* FIXME: We have a fudge to make this work with Java even when the target does
+   not use sections anchors -- Java seems to need at least one small item in a
+   non-zerofill segment.   */
+   if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
+       || (size && size <= 2))
+    {
+      /* Put smaller objects in _static_data, where the section anchors system
+	 can get them.
+	 However, if they are zero-sized punt them to yet a different section
+	 (that is not allowed to participate in anchoring).  */
+      if (!size)
+	{
+	  fputs ("\t.section\t__DATA,__zobj_bss\n", fp);
+	  in_section = darwin_sections[zobj_bss_section];
+	  size = 1;
+	}
+      else
+	{
+	  fputs ("\t.static_data\n", fp);
+	  in_section = darwin_sections[static_data_section];
+	}
+
+      if (l2align)
+	fprintf (fp, "\t.align\t%u\n", l2align);
+
+      assemble_name (fp, name);        
+      fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+  else 
+    {
+      /* When we are on a non-section anchor target, we can get zero-sized
+	 items here.  However, all we need to do is to bump them to one byte
+	 and the section alignment will take care of the rest.  */
+      char secnam[64];
+      unsigned int flags ;
+      snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"), 
+						(unsigned) l2align);
+      /* We can't anchor (yet, if ever) in zerofill sections, because we can't
+	 switch to them and emit a label.  */
+      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      in_section = get_section (secnam, flags, NULL);
+      fprintf (fp, "\t.zerofill %s,", secnam);
+      assemble_name (fp, name);
+      if (!size)
+	size = 1;
+
+      if (l2align)
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
+		 size, (unsigned) l2align);
+      else
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+
+  (*targetm.encode_section_info) (decl, DECL_RTL (decl), false);
+  /* This is defined as a file-scope var, so we know to notify machopic.  */
+  machopic_define_symbol (DECL_RTL (decl));
+}
+
+/* Emit a chunk of common.  */
+static void
+darwin_emit_common (FILE *fp, const char *name,
+		    unsigned HOST_WIDE_INT size, unsigned int align) 
+{
+  unsigned HOST_WIDE_INT rounded;
+  unsigned int l2align;
+
+  /* Earlier systems complain if the alignment exceeds the page size. 
+     The magic number is 4096 * 8 - hard-coded for legacy systems.  */
+  if (!emit_aligned_common && (align > 32768UL))
+    align = 4096UL; /* In units.  */
+  else
+    align /= BITS_PER_UNIT;
+
+  /* Make sure we have a meaningful align.  */
+  if (!align)
+    align = 1;
+
+  /* For earlier toolchains, we need to emit the var as a rounded size to 
+     tell ld the alignment.  */
+  if (size < align) 
+    rounded = align;
+  else
+    rounded = (size + (align-1)) & ~(align-1);
+
+  l2align = floor_log2 (align);
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+
+  in_section = comm_section;
+  /* We mustn't allow multiple public symbols to share an address when using
+     the normal OSX toolchain.  */
+  if (!size)
+    {
+      /* Put at least one byte.  */
+      size = 1;
+      /* This section can no longer participate in section anchoring.  */
+      comm_section->common.flags |= SECTION_NO_ANCHOR;
+    }
+
+  fputs ("\t.comm\t", fp);
+  assemble_name (fp, name);
+  fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED, 
+	   emit_aligned_common?size:rounded);
+  if (l2align && emit_aligned_common)
+    fprintf (fp, ",%u", l2align);
+  fputs ("\n", fp);
+}
+
+/* Output a var which is all zero - into aligned BSS sections, common, lcomm
+   or coalescable data sections (for weak or comdat) as appropriate.  */
+
+void
+darwin_output_aligned_bss (FILE *fp, tree decl, const char *name,
+			  unsigned HOST_WIDE_INT size, unsigned int align)
+{
+  unsigned int l2align;
+  bool one, pub, weak;
+  tree meta;
+
+  pub = TREE_PUBLIC (decl);
+  one = DECL_ONE_ONLY (decl);
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
+	     " pub %d weak %d one %d init %lx\n",
+	name, (long long)size, (int)align, TREE_READONLY (decl), 
+	TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl),
+	pub, weak, one, (unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+     before the target has a chance to comment.  */
+  if ((meta = is_objc_metadata (decl)))
+    {
+      darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta);
+      return;
+    }
+
+  /* Check that any initializer is valid.  */
+  gcc_assert ((DECL_INITIAL (decl) == NULL) 
+	       || (DECL_INITIAL (decl) == error_mark_node) 
+	       || initializer_zerop (DECL_INITIAL (decl)));
+
+  gcc_assert (DECL_SECTION_NAME (decl) == NULL);
+  gcc_assert (!DECL_COMMON (decl));
+
+  /*  Pick up the correct alignment.  */
+  if (!size || !align)
+    align = DECL_ALIGN (decl);
+
+  l2align = floor_log2 (align / BITS_PER_UNIT);
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+  
+  last_assemble_variable_decl = decl;
+
+  /* We would rather not have to check this here - but it seems that we might
+     be passed a decl that should be in coalesced space.  */
+  if (one || weak)
+    {
+      /* Weak or COMDAT objects are put in mergeable sections.  */
+      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+					DECL_ALIGN (decl));
+      return;
+    } 
+
+  /* If this is not public, then emit according to local rules.  */
+  if (!pub)
+    {
+      darwin_emit_local_bss (fp, decl, name, size, l2align);	
+      return;
+    }
+
+  /* So we have a public symbol (small item fudge for Java, see above).  */
+  if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) 
+       || (size && size <= 2))
+    {
+      /* Put smaller objects in data, where the section anchors system can get
+	 them.  However, if they are zero-sized punt them to yet a different 
+	 section (that is not allowed to participate in anchoring).  */
+      if (!size)
+	{
+	  fputs ("\t.section\t__DATA,__zobj_data\n", fp);
+	  in_section = darwin_sections[zobj_data_section];
+	  size = 1;
+	}
+      else
+	{
+	  fputs ("\t.data\n", fp);
+	  in_section = data_section;
+	}
+
+      if (l2align)
+	fprintf (fp, "\t.align\t%u\n", l2align);
+
+      assemble_name (fp, name);
+      fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+  else 
+    {
+      char secnam[64];
+      unsigned int flags ;
+      /* When we are on a non-section anchor target, we can get zero-sized
+	 items here.  However, all we need to do is to bump them to one byte
+	 and the section alignment will take care of the rest.  */
+      snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align);
+
+      /* We can't anchor in zerofill sections, because we can't switch
+	 to them and emit a label.  */
+      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      in_section = get_section (secnam, flags, NULL);
+      fprintf (fp, "\t.zerofill %s,", secnam);
+      assemble_name (fp, name);
+      if (!size)
+	size = 1;
+
+      if (l2align)
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align);
+      else
+	fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+    }
+  (* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
+}
+
+/* Output a chunk of common, with alignment specified (where the target
+   supports this).  */
+void
+darwin_asm_output_aligned_decl_common (FILE *fp, tree decl, const char *name,
+				       unsigned HOST_WIDE_INT size, 
+				       unsigned int align)
+{
+  unsigned int l2align;
+  bool one, weak;
+  tree meta;
+
+  /* No corresponding var.  */
+  if (decl==NULL)
+    {
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align); 
+#endif
+      darwin_emit_common (fp, name, size, align);
+      return;
+    }
+
+  one = DECL_ONE_ONLY (decl);
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d"
+	     " weak %d one %d init %lx\n",
+	name,  (long long)size, (int)align, TREE_READONLY (decl), 
+	TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl),
+	TREE_PUBLIC (decl), weak, one, (unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+     before the target has a chance to comment.  */
+  if ((meta = is_objc_metadata (decl)))
+    {
+      darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta);
+      return;
+    }
+
+  /* We shouldn't be messing with this if the decl has a section name.  */
+  gcc_assert (DECL_SECTION_NAME (decl) == NULL);
+
+  /* We would rather not have to check this here - but it seems that we might
+     be passed a decl that should be in coalesced space.  */
+  if (one || weak)
+    {
+      /* Weak or COMDAT objects are put in mergable sections.  */
+      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+					DECL_ALIGN (decl));
+      return;
+    } 
+
+  /* We should only get here for DECL_COMMON, with a zero init (and, in 
+     principle, only for public symbols too - although we deal with local
+     ones below).  */
+
+  /* Check the initializer is OK.  */
+  gcc_assert (DECL_COMMON (decl) 
+	      && ((DECL_INITIAL (decl) == NULL) 
+	       || (DECL_INITIAL (decl) == error_mark_node) 
+	       || initializer_zerop (DECL_INITIAL (decl))));
+
+  last_assemble_variable_decl = decl;
+
+  if (!size || !align) 
+    align = DECL_ALIGN (decl);
+
+  l2align = floor_log2 (align / BITS_PER_UNIT);
+  /* Check we aren't asking for more aligment than the platform allows.  */
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+
+  if (TREE_PUBLIC (decl) != 0)
+    darwin_emit_common (fp, name, size, align);
+  else
+    darwin_emit_local_bss (fp, decl, name, size, l2align);	
+}
+
+/* Output a chunk of BSS with alignment specfied.  */
+void
+darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name, 
+				      unsigned HOST_WIDE_INT size, 
+				      unsigned int align)
+{
+  unsigned long l2align;
+  bool one, weak;
+  tree meta;
+
+  one = DECL_ONE_ONLY (decl);
+  weak = (DECL_P (decl)
+	  && DECL_WEAK (decl)
+	  && !lookup_attribute ("weak_import", 
+				 DECL_ATTRIBUTES (decl)));
+
+#ifdef DEBUG_DARWIN_MEM_ALLOCATORS
+fprintf (fp, "# adloc: %s (%lld,%d) ro %d cst %d stat %d one %d pub %d"
+	     " weak %d init %lx\n",
+	name, (long long)size, (int)align, TREE_READONLY (decl), 
+	TREE_CONSTANT (decl), TREE_STATIC (decl), one, TREE_PUBLIC (decl),
+	weak , (unsigned long)DECL_INITIAL (decl)); 
+#endif
+
+  /* ObjC metadata can get put in BSS because varasm.c decides it's BSS 
+     before the target has a chance to comment.  */
+  if ((meta = is_objc_metadata (decl)))
+    {
+      darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta);
+      return;
+    }
+
+  /* We shouldn't be messing with this if the decl has a section name.  */
+  gcc_assert (DECL_SECTION_NAME (decl) == NULL);
+
+  /* We would rather not have to check this here - but it seems that we might
+     be passed a decl that should be in coalesced space.  */
+  if (one || weak)
+    {
+      /* Weak or COMDAT objects are put in mergable sections.  */
+      darwin_emit_weak_or_comdat (fp, decl, name, size, 
+					DECL_ALIGN (decl));
+      return;
+    } 
+
+  /* .. and it should be suitable for placement in local mem.  */
+  gcc_assert(!TREE_PUBLIC (decl) && !DECL_COMMON (decl));
+  /* .. and any initializer must be all-zero.  */
+  gcc_assert ((DECL_INITIAL (decl) == NULL) 
+	       || (DECL_INITIAL (decl) == error_mark_node) 
+	       || initializer_zerop (DECL_INITIAL (decl)));
+
+  last_assemble_variable_decl = decl;
+
+  if (!size || !align)
+    align = DECL_ALIGN (decl);
+
+  l2align = floor_log2 (align / BITS_PER_UNIT);
+  gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT);
+
+  darwin_emit_local_bss (fp, decl, name, size, l2align);
+}
+
+/* Emit an assembler directive to set visibility for a symbol.  The
+   only supported visibilities are VISIBILITY_DEFAULT and
+   VISIBILITY_HIDDEN; the latter corresponds to Darwin's "private
+   extern".  There is no MACH-O equivalent of ELF's
+   VISIBILITY_INTERNAL or VISIBILITY_PROTECTED. */
+
+void
+darwin_assemble_visibility (tree decl, int vis)
+{
+  if (vis == VISIBILITY_DEFAULT)
+    ;
+  else if (vis == VISIBILITY_HIDDEN)
+    {
+      fputs ("\t.private_extern ", asm_out_file);
+      assemble_name (asm_out_file,
+		     (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
+      fputs ("\n", asm_out_file);
+    }
+  else
+    warning (OPT_Wattributes, "internal and protected visibility attributes "
+	     "not supported in this configuration; ignored");
+}
+
+/* VEC Used by darwin_asm_dwarf_section.
+   Maybe a hash tab would be better here - but the intention is that this is
+   a very short list (fewer than 16 items) and each entry should (ideally, 
+   eventually) only be presented once.
+
+   A structure to hold a dwarf debug section used entry.  */
+
+typedef struct GTY(()) dwarf_sect_used_entry {
+  const char *name;
+  unsigned count;
+}
+dwarf_sect_used_entry;
+
+DEF_VEC_O(dwarf_sect_used_entry);
+DEF_VEC_ALLOC_O(dwarf_sect_used_entry, gc);
+
+/* A list of used __DWARF sections.  */
+static GTY (()) VEC (dwarf_sect_used_entry, gc) * dwarf_sect_names_table;
+
+/* This is called when we are asked to assemble a named section and the 
+   name begins with __DWARF,.  We keep a list of the section names (without
+   the __DWARF, prefix) and use this to emit our required start label on the
+   first switch to each section.  */
+
+static void
+darwin_asm_dwarf_section (const char *name, unsigned int flags,
+			  tree ARG_UNUSED (decl))
+{
+  unsigned i;
+  int namelen;
+  const char * sname;
+  dwarf_sect_used_entry *ref;
+  bool found = false;
+  gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED))
+		    == (SECTION_DEBUG | SECTION_NAMED));
+  /* We know that the name starts with __DWARF,  */
+  sname = name + 8;
+  namelen = strchr (sname, ',') - sname;
+  gcc_assert (namelen);
+  if (dwarf_sect_names_table == NULL)
+    dwarf_sect_names_table = VEC_alloc (dwarf_sect_used_entry, gc, 16);
+  else
+    for (i = 0; 
+	 VEC_iterate (dwarf_sect_used_entry, dwarf_sect_names_table, i, ref);
+	 i++)
+      {
+	if (!ref)
+	  break;
+	if (!strcmp (ref->name, sname))
+	  {
+	    found = true;
+	    ref->count++;
+	    break;
+	  }
+      }
+
+  fprintf (asm_out_file, "\t.section %s\n", name);
+  if (!found)
+    {
+      dwarf_sect_used_entry e;
+      fprintf (asm_out_file, "Lsection%.*s:\n", namelen, sname);
+      e.count = 1;
+      e.name = xstrdup (sname);
+      VEC_safe_push (dwarf_sect_used_entry, gc, dwarf_sect_names_table, &e);
+    }
+}
+
+/* Output a difference of two labels that will be an assembly time
+   constant if the two labels are local.  (.long lab1-lab2 will be
+   very different if lab1 is at the boundary between two sections; it
+   will be relocated according to the second section, not the first,
+   so one ends up with a difference between labels in different
+   sections, which is bad in the dwarf2 eh context for instance.)  */
+
+static int darwin_dwarf_label_counter;
+
+void
+darwin_asm_output_dwarf_delta (FILE *file, int size,
+			       const char *lab1, const char *lab2)
+{
+  int islocaldiff = (lab1[0] == '*' && lab1[1] == 'L'
+		     && lab2[0] == '*' && lab2[1] == 'L');
+  const char *directive = (size == 8 ? ".quad" : ".long");
+
+  if (islocaldiff)
+    fprintf (file, "\t.set L$set$%d,", darwin_dwarf_label_counter);
+  else
+    fprintf (file, "\t%s\t", directive);
+
+  assemble_name_raw (file, lab1);
+  fprintf (file, "-");
+  assemble_name_raw (file, lab2);
+  if (islocaldiff)
+    fprintf (file, "\n\t%s L$set$%d", directive, darwin_dwarf_label_counter++);
+}
+
+/* Output an offset in a DWARF section on Darwin.  On Darwin, DWARF section
+   offsets are not represented using relocs in .o files; either the
+   section never leaves the .o file, or the linker or other tool is
+   responsible for parsing the DWARF and updating the offsets.  */
+
+void
+darwin_asm_output_dwarf_offset (FILE *file, int size, const char * lab,
+				section *base)
+{
+  char sname[64];
+  int namelen;
+
+  gcc_assert (base->common.flags & SECTION_NAMED);
+  gcc_assert (strncmp (base->named.name, "__DWARF,", 8) == 0);
+  gcc_assert (strchr (base->named.name + 8, ','));
+
+  namelen = strchr (base->named.name + 8, ',') - (base->named.name + 8);
+  sprintf (sname, "*Lsection%.*s", namelen, base->named.name + 8);
+  darwin_asm_output_dwarf_delta (file, size, lab, sname);
+}
+
+/* Called from the within the TARGET_ASM_FILE_START for each target.  */
+
+void
+darwin_file_start (void)
+{
+  /* Nothing to do.  */
+}
+
+/* Called for the TARGET_ASM_FILE_END hook.
+   Emit the mach-o pic indirection data, the lto data and, finally a flag
+   to tell the linker that it can break the file object into sections and
+   move those around for efficiency.  */
+
+void
+darwin_file_end (void)
+{
+  machopic_finish (asm_out_file);
+  if (strcmp (lang_hooks.name, "GNU C++") == 0)
+    {
+      switch_to_section (darwin_sections[constructor_section]);
+      switch_to_section (darwin_sections[destructor_section]);
+      ASM_OUTPUT_ALIGN (asm_out_file, 1);
+    }
+
+  /* If there was LTO assembler output, append it to asm_out_file.  */
+  if (lto_asm_out_name)
+    {
+      int n;
+      char *buf, *lto_asm_txt;
+
+      /* Shouldn't be here if we failed to switch back.  */
+      gcc_assert (! saved_asm_out_file);
+
+      lto_asm_out_file = fopen (lto_asm_out_name, "r");
+      if (lto_asm_out_file == NULL)
+	fatal_error ("failed to open temporary file %s with LTO output",
+		     lto_asm_out_name);
+      fseek (lto_asm_out_file, 0, SEEK_END);
+      n = ftell (lto_asm_out_file);
+      if (n > 0)
+        {
+	  fseek (lto_asm_out_file, 0, SEEK_SET);
+	  lto_asm_txt = buf = (char *) xmalloc (n + 1);
+	  while (fgets (lto_asm_txt, n, lto_asm_out_file))
+	    fputs (lto_asm_txt, asm_out_file);
+	  /* Put a termination label.  */
+	  fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+		   LTO_SEGMENT_NAME, LTO_SECTS_SECTION);
+	  fprintf (asm_out_file, "L_GNU_LTO%d:\t;# end of lto\n",
+		   lto_section_num);
+	  /* Make sure our termination label stays in this section.  */
+	  fputs ("\t.space\t1\n", asm_out_file);
+	}
+
+      /* Remove the temporary file.  */
+      fclose (lto_asm_out_file);
+      unlink_if_ordinary (lto_asm_out_name);
+      free (lto_asm_out_name);
+    }
+
+  /* Output the names and indices.  */
+  if (lto_section_names && VEC_length (darwin_lto_section_e, lto_section_names))
+    {
+      int count;
+      darwin_lto_section_e *ref;
+      /* For now, we'll make the offsets 4 bytes and unaligned - we'll fix
+         the latter up ourselves.  */
+      const char *op = integer_asm_op (4,0);
+
+      /* Emit the names.  */
+      fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+	       LTO_SEGMENT_NAME, LTO_NAMES_SECTION);
+      FOR_EACH_VEC_ELT (darwin_lto_section_e, lto_section_names, count, ref)
+	{
+	  fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\n", count);
+         /* We have to jump through hoops to get the values of the intra-section
+            offsets... */
+	  fprintf (asm_out_file,
+		   "\t.set L$gnu$lto$noff%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME0\n",
+		   count, count);
+	  fprintf (asm_out_file,
+		   "\t.set L$gnu$lto$nsiz%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME%d\n",
+		   count, count+1, count);
+	  fprintf (asm_out_file, "\t.asciz\t\"%s\"\n", ref->sectname);
+	}
+      fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\t;# end\n", lto_section_num);
+      /* make sure our termination label stays in this section.  */
+      fputs ("\t.space\t1\n", asm_out_file);
+
+      /* Emit the Index.  */
+      fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n",
+	       LTO_SEGMENT_NAME, LTO_INDEX_SECTION);
+      fputs ("\t.align\t2\n", asm_out_file);
+      fputs ("# Section offset, Section length, Name offset, Name length\n",
+	     asm_out_file);
+      FOR_EACH_VEC_ELT (darwin_lto_section_e, lto_section_names, count, ref)
+	{
+	  fprintf (asm_out_file, "%s L$gnu$lto$offs%d\t;# %s\n",
+		   op, count, ref->sectname);
+	  fprintf (asm_out_file, "%s L$gnu$lto$size%d\n", op, count);
+	  fprintf (asm_out_file, "%s L$gnu$lto$noff%d\n", op, count);
+	  fprintf (asm_out_file, "%s L$gnu$lto$nsiz%d\n", op, count);
+	}
+    }
+
+  /* If we have section anchors, then we must prevent the linker from
+     re-arranging data.  */
+  if (!DARWIN_SECTION_ANCHORS || !flag_section_anchors)
+    fprintf (asm_out_file, "\t.subsections_via_symbols\n");
+}
+
+/* TODO: Add a language hook for identifying if a decl is a vtable.  */
+#define DARWIN_VTABLE_P(DECL) 0
+
+/* Cross-module name binding.  Darwin does not support overriding
+   functions at dynamic-link time, except for vtables in kexts.  */
+
+bool
+darwin_binds_local_p (const_tree decl)
+{
+  return default_binds_local_p_1 (decl,
+				  TARGET_KEXTABI && DARWIN_VTABLE_P (decl));
+}
+
+/* The Darwin's implementation of TARGET_ASM_OUTPUT_ANCHOR.  Define the
+   anchor relative to ".", the current section position.  We cannot use
+   the default one because ASM_OUTPUT_DEF is wrong for Darwin.  */
+void
+darwin_asm_output_anchor (rtx symbol)
+{
+  fprintf (asm_out_file, "\t.set\t");
+  assemble_name (asm_out_file, XSTR (symbol, 0));
+  fprintf (asm_out_file, ", . + " HOST_WIDE_INT_PRINT_DEC "\n",
+	   SYMBOL_REF_BLOCK_OFFSET (symbol));
+}
+
+/* Disable section anchoring on any section containing a zero-sized 
+   object.  */
+bool
+darwin_use_anchors_for_symbol_p (const_rtx symbol)
+{
+  if (DARWIN_SECTION_ANCHORS && flag_section_anchors) 
+    {
+      section *sect;
+      /* If the section contains a zero-sized object it's ineligible.  */
+      sect = SYMBOL_REF_BLOCK (symbol)->sect;
+      /* This should have the effect of disabling anchors for vars that follow
+         any zero-sized one, in a given section.  */     
+      if (sect->common.flags & SECTION_NO_ANCHOR)
+	return false;
+
+        /* Also check the normal reasons for suppressing.  */
+        return default_use_anchors_for_symbol_p (symbol);
+    }
+  else
+    return false;
+}
+
+/* Set the darwin specific attributes on TYPE.  */
+void
+darwin_set_default_type_attributes (tree type)
+{
+  if (darwin_ms_struct
+      && TREE_CODE (type) == RECORD_TYPE)
+    TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("ms_struct"),
+                                        NULL_TREE,
+                                        TYPE_ATTRIBUTES (type));
+}
+
+/* True, iff we're generating code for loadable kernel extensions.  */
+
+bool
+darwin_kextabi_p (void) {
+  return flag_apple_kext;
+}
+
+void
+darwin_override_options (void)
+{
+  /* Keep track of which (major) version we're generating code for.  */
+  if (darwin_macosx_version_min)
+    {
+      if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
+	generating_for_darwin_version = 10;
+      else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0)
+	generating_for_darwin_version = 9;
+
+      /* Earlier versions are not specifically accounted, until required.  */
+    }
+
+  /* Don't emit DWARF3/4 unless specifically selected.  This is a 
+     workaround for tool bugs.  */
+  if (!global_options_set.x_dwarf_strict) 
+    dwarf_strict = 1;
+
+  /* Do not allow unwind tables to be generated by default for m32.  
+     fnon-call-exceptions will override this, regardless of what we do.  */
+  if (generating_for_darwin_version < 10
+      && !global_options_set.x_flag_asynchronous_unwind_tables
+      && !TARGET_64BIT)
+    global_options.x_flag_asynchronous_unwind_tables = 0;
+
+   /* Disable -freorder-blocks-and-partition when unwind tables are being
+      emitted for Darwin < 9 (OSX 10.5).
+      The strategy is, "Unless the User has specifically set/unset an unwind
+      flag we will switch off -freorder-blocks-and-partition when unwind tables
+      will be generated".  If the User specifically sets flags... we assume
+      (s)he knows why...  */
+   if (generating_for_darwin_version < 9
+       && global_options_set.x_flag_reorder_blocks_and_partition
+       && ((global_options.x_flag_exceptions 		/* User, c++, java */
+	    && !global_options_set.x_flag_exceptions) 	/* User specified... */
+	   || (global_options.x_flag_unwind_tables
+	       && !global_options_set.x_flag_unwind_tables)
+	   || (global_options.x_flag_non_call_exceptions
+	       && !global_options_set.x_flag_non_call_exceptions)
+	   || (global_options.x_flag_asynchronous_unwind_tables
+	       && !global_options_set.x_flag_asynchronous_unwind_tables)))
+    {
+      inform (input_location,
+	      "-freorder-blocks-and-partition does not work with exceptions "
+	      "on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+  if (flag_mkernel || flag_apple_kext)
+    {
+      /* -mkernel implies -fapple-kext for C++ */
+      if (strcmp (lang_hooks.name, "GNU C++") == 0)
+	flag_apple_kext = 1;
+
+      flag_no_common = 1;
+
+      /* No EH in kexts.  */
+      flag_exceptions = 0;
+      /* No -fnon-call-exceptions data in kexts.  */
+      flag_non_call_exceptions = 0;
+      /* so no tables either.. */
+      flag_unwind_tables = 0;
+      flag_asynchronous_unwind_tables = 0;
+      /* We still need to emit branch islands for kernel context.  */
+      darwin_emit_branch_islands = true;
+    }
+
+  if (flag_var_tracking
+      && generating_for_darwin_version >= 9
+      && (flag_gtoggle ? (debug_info_level == DINFO_LEVEL_NONE)
+      : (debug_info_level >= DINFO_LEVEL_NORMAL))
+      && write_symbols == DWARF2_DEBUG)
+    flag_var_tracking_uninit = 1;
+
+  if (MACHO_DYNAMIC_NO_PIC_P)
+    {
+      if (flag_pic)
+	warning (0, "-mdynamic-no-pic overrides -fpic or -fPIC");
+      flag_pic = 0;
+    }
+  else if (flag_pic == 1)
+    {
+      /* Darwin's -fpic is -fPIC.  */
+      flag_pic = 2;
+    }
+
+  /* It is assumed that branch island stubs are needed for earlier systems.  */
+  if (generating_for_darwin_version < 9)
+    darwin_emit_branch_islands = true;
+  else
+    emit_aligned_common = true; /* Later systems can support aligned common.  */
+
+  /* The c_dialect...() macros are not available to us here.  */
+  darwin_running_cxx = (strstr (lang_hooks.name, "C++") != 0);
+}
+
+/* Add $LDBL128 suffix to long double builtins.  */
+
+static void
+darwin_patch_builtin (int fncode)
+{
+  tree fn = built_in_decls[fncode];
+  tree sym;
+  char *newname;
+
+  if (!fn)
+    return;
+
+  sym = DECL_ASSEMBLER_NAME (fn);
+  newname = ACONCAT (("_", IDENTIFIER_POINTER (sym), "$LDBL128", NULL));
+
+  set_user_assembler_name (fn, newname);
+
+  fn = implicit_built_in_decls[fncode];
+  if (fn)
+    set_user_assembler_name (fn, newname);
+}
+
+void
+darwin_patch_builtins (void)
+{
+  if (LONG_DOUBLE_TYPE_SIZE != 128)
+    return;
+
+#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_NO64(fncode)		\
+  if (!TARGET_64BIT)				\
+    darwin_patch_builtin (fncode);
+#define PATCH_BUILTIN_VARIADIC(fncode)				  \
+  if (!TARGET_64BIT						  \
+      && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \
+    darwin_patch_builtin (fncode);
+#include "darwin-ppc-ldouble-patch.def"
+#undef PATCH_BUILTIN
+#undef PATCH_BUILTIN_NO64
+#undef PATCH_BUILTIN_VARIADIC
+}
+
+/*  CFStrings implementation.  */
+static GTY(()) tree cfstring_class_reference = NULL_TREE;
+static GTY(()) tree cfstring_type_node = NULL_TREE;
+static GTY(()) tree ccfstring_type_node = NULL_TREE;
+static GTY(()) tree pccfstring_type_node = NULL_TREE;
+static GTY(()) tree pcint_type_node = NULL_TREE;
+static GTY(()) tree pcchar_type_node = NULL_TREE;
+
+static enum built_in_function darwin_builtin_cfstring;
+
+/* Store all constructed constant CFStrings in a hash table so that
+   they get uniqued properly.  */
+
+typedef struct GTY (()) cfstring_descriptor {
+  /* The string literal.  */
+  tree literal;
+  /* The resulting constant CFString.  */
+  tree constructor;
+} cfstring_descriptor;
+
+static GTY ((param_is (struct cfstring_descriptor))) htab_t cfstring_htab;
+
+static hashval_t cfstring_hash (const void *);
+static int cfstring_eq (const void *, const void *);
+
+static tree
+add_builtin_field_decl (tree type, const char *name, tree **chain)
+{
+  tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, 
+			    get_identifier (name), type);
+
+  if (*chain != NULL)
+    **chain = field;
+  *chain = &DECL_CHAIN (field);
+
+  return field;
+}
+
+tree
+darwin_init_cfstring_builtins (unsigned builtin_cfstring)
+{
+  tree cfsfun, fields, pccfstring_ftype_pcchar;
+  tree *chain = NULL;
+
+  darwin_builtin_cfstring = 
+    (enum built_in_function) builtin_cfstring;
+  
+  /* struct __builtin_CFString {
+       const int *isa;		(will point at
+       int flags;		 __CFConstantStringClassReference)
+       const char *str;
+       long length;
+     };  */
+
+  pcint_type_node = build_pointer_type 
+		   (build_qualified_type (integer_type_node, TYPE_QUAL_CONST));
+
+  pcchar_type_node = build_pointer_type 
+		   (build_qualified_type (char_type_node, TYPE_QUAL_CONST));
+
+  cfstring_type_node = (*lang_hooks.types.make_type) (RECORD_TYPE);
+
+  /* Have to build backwards for finish struct.  */
+  fields = add_builtin_field_decl (long_integer_type_node, "length", &chain);
+  add_builtin_field_decl (pcchar_type_node, "str", &chain);
+  add_builtin_field_decl (integer_type_node, "flags", &chain);
+  add_builtin_field_decl (pcint_type_node, "isa", &chain);
+  finish_builtin_struct (cfstring_type_node, "__builtin_CFString",
+			 fields, NULL_TREE);
+
+  /* const struct __builtin_CFstring *
+     __builtin___CFStringMakeConstantString (const char *); */
+
+  ccfstring_type_node = build_qualified_type 
+			(cfstring_type_node, TYPE_QUAL_CONST);
+  pccfstring_type_node = build_pointer_type (ccfstring_type_node);
+  pccfstring_ftype_pcchar = build_function_type_list 
+			(pccfstring_type_node, pcchar_type_node, NULL_TREE);
+
+  cfsfun  = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 
+			get_identifier ("__builtin___CFStringMakeConstantString"),
+			pccfstring_ftype_pcchar);
+
+  TREE_PUBLIC (cfsfun) = 1;
+  DECL_EXTERNAL (cfsfun) = 1;
+  DECL_ARTIFICIAL (cfsfun) = 1;
+  /* Make a lang-specific section - dup_lang_specific_decl makes a new node
+     in place of the existing, which may be NULL.  */
+  DECL_LANG_SPECIFIC (cfsfun) = NULL;
+  (*lang_hooks.dup_lang_specific_decl) (cfsfun);
+  DECL_BUILT_IN_CLASS (cfsfun) = BUILT_IN_MD;
+  DECL_FUNCTION_CODE (cfsfun) = darwin_builtin_cfstring;
+  lang_hooks.builtin_function (cfsfun);
+
+  /* extern int __CFConstantStringClassReference[];  */
+  cfstring_class_reference = build_decl (BUILTINS_LOCATION, VAR_DECL,
+		 get_identifier ("__CFConstantStringClassReference"),
+		 build_array_type (integer_type_node, NULL_TREE));
+
+  TREE_PUBLIC (cfstring_class_reference) = 1;
+  DECL_ARTIFICIAL (cfstring_class_reference) = 1;
+  (*lang_hooks.decls.pushdecl) (cfstring_class_reference);
+  DECL_EXTERNAL (cfstring_class_reference) = 1;
+  rest_of_decl_compilation (cfstring_class_reference, 0, 0);
+  
+  /* Initialize the hash table used to hold the constant CFString objects.  */
+  cfstring_htab = htab_create_ggc (31, cfstring_hash, cfstring_eq, NULL);
+
+  return cfstring_type_node;
+}
+
+tree
+darwin_fold_builtin (tree fndecl, int n_args, tree *argp, 
+		     bool ARG_UNUSED (ignore))
+{
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  
+  if (fcode == darwin_builtin_cfstring)
+    {
+      if (!darwin_constant_cfstrings)
+	{
+	  error ("built-in function %qD requires the" 
+		 " %<-mconstant-cfstrings%> flag", fndecl);
+	  return error_mark_node;
+	}
+
+      if (n_args != 1)
+	{
+	  error ("built-in function %qD takes one argument only", fndecl);
+	  return error_mark_node;
+	}
+
+      return darwin_build_constant_cfstring (*argp);
+    }
+
+  return NULL_TREE;
+}
+
+void
+darwin_rename_builtins (void)
+{
+  /* The system ___divdc3 routine in libSystem on darwin10 is not
+     accurate to 1ulp, ours is, so we avoid ever using the system name
+     for this routine and instead install a non-conflicting name that
+     is accurate.
+
+     When -ffast-math or -funsafe-math-optimizations is given, we can
+     use the faster version.  */
+  if (!flag_unsafe_math_optimizations)
+    {
+      int dcode = (BUILT_IN_COMPLEX_DIV_MIN
+		   + DCmode - MIN_MODE_COMPLEX_FLOAT);
+      tree fn = built_in_decls[dcode];
+      /* Fortran and c call TARGET_INIT_BUILTINS and
+	 TARGET_INIT_LIBFUNCS at different times, so we have to put a
+	 call into each to ensure that at least one of them is called
+	 after build_common_builtin_nodes.  A better fix is to add a
+	 new hook to run after build_common_builtin_nodes runs.  */
+      if (fn)
+	set_user_assembler_name (fn, "___ieee_divdc3");
+      fn = implicit_built_in_decls[dcode];
+      if (fn)
+	set_user_assembler_name (fn, "___ieee_divdc3");
+    }
+}
+
+static hashval_t
+cfstring_hash (const void *ptr)
+{
+  tree str = ((const struct cfstring_descriptor *)ptr)->literal;
+  const unsigned char *p = (const unsigned char *) TREE_STRING_POINTER (str);
+  int i, len = TREE_STRING_LENGTH (str);
+  hashval_t h = len;
+
+  for (i = 0; i < len; i++)
+    h = ((h * 613) + p[i]);
+
+  return h;
+}
+
+static int
+cfstring_eq (const void *ptr1, const void *ptr2)
+{
+  tree str1 = ((const struct cfstring_descriptor *)ptr1)->literal;
+  tree str2 = ((const struct cfstring_descriptor *)ptr2)->literal;
+  int len1 = TREE_STRING_LENGTH (str1);
+
+  return (len1 == TREE_STRING_LENGTH (str2)
+	  && !memcmp (TREE_STRING_POINTER (str1), TREE_STRING_POINTER (str2),
+		      len1));
+}
+
+tree
+darwin_build_constant_cfstring (tree str)
+{
+  struct cfstring_descriptor *desc, key;
+  void **loc;
+  tree addr;
+
+  if (!str)
+    {
+      error ("CFString literal is missing");
+      return error_mark_node;
+    }
+
+  STRIP_NOPS (str);
+
+  if (TREE_CODE (str) == ADDR_EXPR)
+    str = TREE_OPERAND (str, 0);
+
+  if (TREE_CODE (str) != STRING_CST)
+    {
+      error ("CFString literal expression is not a string constant");
+      return error_mark_node;
+    }
+
+  /* Perhaps we already constructed a constant CFString just like this one? */
+  key.literal = str;
+  loc = htab_find_slot (cfstring_htab, &key, INSERT);
+  desc = (struct cfstring_descriptor *) *loc;
+
+  if (!desc)
+    {
+      tree var, constructor, field;
+      VEC(constructor_elt,gc) *v = NULL;
+      int length = TREE_STRING_LENGTH (str) - 1;
+
+      if (darwin_warn_nonportable_cfstrings)
+	{
+	  const char *s = TREE_STRING_POINTER (str);
+	  int l = 0;
+
+	  for (l = 0; l < length; l++)
+	    if (!s[l] || !isascii (s[l]))
+	      {
+		warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal",
+			 s[l] ? "non-ASCII character" : "embedded NUL");
+		break;
+	      }
+	}
+
+      *loc = desc = ggc_alloc_cleared_cfstring_descriptor ();
+      desc->literal = str;
+
+      /* isa *. */
+      field = TYPE_FIELDS (ccfstring_type_node);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, 
+			     build1 (ADDR_EXPR,  TREE_TYPE (field),  
+				     cfstring_class_reference));
+      /* flags */
+      field = DECL_CHAIN (field);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, 
+			     build_int_cst (TREE_TYPE (field), 0x000007c8));
+      /* string *. */
+      field = DECL_CHAIN (field);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE,
+			     build1 (ADDR_EXPR, TREE_TYPE (field), str));
+      /* length */
+      field = DECL_CHAIN (field);
+      CONSTRUCTOR_APPEND_ELT(v, NULL_TREE,
+			     build_int_cst (TREE_TYPE (field), length));
+
+      constructor = build_constructor (ccfstring_type_node, v);
+      TREE_READONLY (constructor) = 1;
+      TREE_CONSTANT (constructor) = 1;
+      TREE_STATIC (constructor) = 1;
+
+      /* Fromage: The C++ flavor of 'build_unary_op' expects constructor nodes
+	 to have the TREE_HAS_CONSTRUCTOR (...) bit set.  However, this file is
+	 being built without any knowledge of C++ tree accessors; hence, we shall
+	 use the generic accessor that TREE_HAS_CONSTRUCTOR actually maps to!  */
+      if (darwin_running_cxx)
+	TREE_LANG_FLAG_4 (constructor) = 1;  /* TREE_HAS_CONSTRUCTOR  */
+
+      /* Create an anonymous global variable for this CFString.  */
+      var = build_decl (input_location, CONST_DECL, 
+			NULL, TREE_TYPE (constructor));
+      DECL_ARTIFICIAL (var) = 1;
+      TREE_STATIC (var) = 1;
+      DECL_INITIAL (var) = constructor;
+      /* FIXME: This should use a translation_unit_decl to indicate file scope.  */
+      DECL_CONTEXT (var) = NULL_TREE;
+      desc->constructor = var;
+    }
+
+  addr = build1 (ADDR_EXPR, pccfstring_type_node, desc->constructor);
+  TREE_CONSTANT (addr) = 1;
+
+  return addr;
+}
+
+bool
+darwin_cfstring_p (tree str)
+{
+  struct cfstring_descriptor key;
+  void **loc;
+
+  if (!str)
+    return false;
+
+  STRIP_NOPS (str);
+
+  if (TREE_CODE (str) == ADDR_EXPR)
+    str = TREE_OPERAND (str, 0);
+
+  if (TREE_CODE (str) != STRING_CST)
+    return false;
+
+  key.literal = str;
+  loc = htab_find_slot (cfstring_htab, &key, NO_INSERT);
+  
+  if (loc)
+    return true;
+
+  return false;
+}
+
+void
+darwin_enter_string_into_cfstring_table (tree str)
+{
+  struct cfstring_descriptor key;
+  void **loc;
+
+  key.literal = str;
+  loc = htab_find_slot (cfstring_htab, &key, INSERT);
+
+  if (!*loc)
+    {
+      *loc = ggc_alloc_cleared_cfstring_descriptor ();
+      ((struct cfstring_descriptor *)*loc)->literal = str;
+    }
+}
+
+/* Choose named function section based on its frequency.  */
+
+section *
+darwin_function_section (tree decl, enum node_frequency freq,
+			  bool startup, bool exit)
+{
+  /* Decide if we need to put this in a coalescable section.  */
+  bool weak = (decl 
+	       && DECL_WEAK (decl)
+	       && (!DECL_ATTRIBUTES (decl)
+		   || !lookup_attribute ("weak_import", 
+					  DECL_ATTRIBUTES (decl))));
+
+  /* If there is a specified section name, we should not be trying to
+     override.  */
+  if (decl && DECL_SECTION_NAME (decl) != NULL_TREE)
+    return get_named_section (decl, NULL, 0);
+
+  /* Default when there is no function re-ordering.  */
+  if (!flag_reorder_functions)
+    return (weak)
+	    ? darwin_sections[text_coal_section]
+	    : text_section;
+
+  /* Startup code should go to startup subsection unless it is
+     unlikely executed (this happens especially with function splitting
+     where we can split away unnecesary parts of static constructors).  */
+  if (startup && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
+    return (weak)
+	    ? darwin_sections[text_startup_coal_section]
+	    : darwin_sections[text_startup_section];
+
+  /* Similarly for exit.  */
+  if (exit && freq != NODE_FREQUENCY_UNLIKELY_EXECUTED)
+    return (weak)
+	    ? darwin_sections[text_exit_coal_section]
+	    : darwin_sections[text_exit_section];
+
+  /* Group cold functions together, similarly for hot code.  */
+  switch (freq)
+    {
+      case NODE_FREQUENCY_UNLIKELY_EXECUTED:
+	return (weak)
+		? darwin_sections[text_cold_coal_section]
+		: darwin_sections[text_cold_section];
+	break;
+      case NODE_FREQUENCY_HOT:
+	return (weak)
+		? darwin_sections[text_hot_coal_section]
+		: darwin_sections[text_hot_section];
+	break;
+      default:
+	return (weak)
+		? darwin_sections[text_coal_section]
+		: text_section;
+	break;
+    }
+}
+
+/* When a function is partitioned between sections, we need to insert a label
+   at the start of each new chunk - so that it may become a valid 'atom' for
+   eh and debug purposes.  Without this the linker will emit warnings if one 
+   tries to add line location information (since the switched fragment will 
+   be anonymous).  */
+
+void
+darwin_function_switched_text_sections (FILE *fp, tree decl, bool new_is_cold)
+{
+  char buf[128];
+  snprintf (buf, 128, "%s%s",new_is_cold?"__cold_sect_of_":"__hot_sect_of_",
+	    IDENTIFIER_POINTER (DECL_NAME (decl)));
+  /* Make sure we pick up all the relevant quotes etc.  */
+  assemble_name_raw (fp, (const char *) buf);
+  fputs (":\n", fp);
+}
+
+#include "gt-darwin.h"
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
new file mode 100644
index 000000000..0526d851f
--- /dev/null
+++ b/gcc/config/darwin.h
@@ -0,0 +1,990 @@
+/* Target definitions for Darwin (Mac OS X) systems.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef CONFIG_DARWIN_H
+#define CONFIG_DARWIN_H
+
+/* The definitions in this file are common to all processor types
+   running Darwin, which is the kernel for Mac OS X.  Darwin is
+   basically a BSD user layer laid over a Mach kernel, then evolved
+   for many years (at NeXT) in parallel with other Unix systems.  So
+   while the runtime is a somewhat idiosyncratic Mach-based thing,
+   other definitions look like they would for a BSD variant.  */
+
+/* Although NeXT ran on many different architectures, as of Jan 2001
+   the only supported Darwin targets are PowerPC and x86.  */
+
+/* One of Darwin's NeXT legacies is the Mach-O format, which is partly
+   like a.out and partly like COFF, with additional features like
+   multi-architecture binary support.  */
+
+#define DARWIN_X86 0
+#define DARWIN_PPC 0
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* Suppress g++ attempt to link in the math library automatically. */
+#define MATH_LIBRARY ""
+
+/* We have atexit.  */
+
+#define HAVE_ATEXIT
+
+/* Define an empty body for the function do_global_dtors() in libgcc2.c.  */
+
+#define DO_GLOBAL_DTORS_BODY
+
+/* The string value for __SIZE_TYPE__.  */
+
+#ifndef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+#endif
+
+/* Type used for ptrdiff_t, as a string used in a declaration.  */
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* wchar_t is int.  */
+
+#undef	WCHAR_TYPE
+#define WCHAR_TYPE "int"
+#undef	WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Default to using the NeXT-style runtime, since that's what is
+   pre-installed on Darwin systems.  */
+
+#define NEXT_OBJC_RUNTIME
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+
+#undef	DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* True if pragma ms_struct is in effect.  */
+extern GTY(()) int darwin_ms_struct;
+
+#define DRIVER_SELF_SPECS					\
+  "%{gfull:-g -fno-eliminate-unused-debug-symbols} %<gfull",	\
+  "%{gused:-g -feliminate-unused-debug-symbols} %<gused",	\
+  "%{fapple-kext|mkernel:-static}",				\
+  "%{shared:-Zdynamiclib} %<shared"
+
+#define DARWIN_CC1_SPEC							\
+  "%{findirect-virtual-calls: -fapple-kext} %<findirect-virtual-calls " \
+  "%{fterminated-vtables: -fapple-kext} %<fterminated-vtables "		\
+  "%<filelist* %<framework*"
+
+#define SUBSUBTARGET_OVERRIDE_OPTIONS					\
+  do {									\
+    darwin_override_options ();						\
+  } while (0)
+
+#define SUBTARGET_C_COMMON_OVERRIDE_OPTIONS do {                        \
+  /* Unless set, force ABI=2 for NeXT and m64, 0 otherwise.  */		\
+  if (!global_options_set.x_flag_objc_abi)				\
+    global_options.x_flag_objc_abi					\
+	= (flag_next_runtime && TARGET_64BIT) ? 2 : 0;			\
+  /* Objective-C family ABI 2 is only valid for next/m64 at present. */	\
+  if (global_options_set.x_flag_objc_abi && flag_next_runtime)		\
+    if (TARGET_64BIT && global_options.x_flag_objc_abi < 2)		\
+      error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 is only"	\
+		" supported on %<-m64%> targets for"			\
+		" %<-fnext-runtime%>");					\
+  /* Sort out ObjC exceptions: If the runtime is NeXT we default to	\
+     sjlj for m32 only.  */						\
+  if (!global_options_set.x_flag_objc_sjlj_exceptions)			\
+    global_options.x_flag_objc_sjlj_exceptions = 			\
+				flag_next_runtime && !TARGET_64BIT;	\
+    if (flag_mkernel || flag_apple_kext)				\
+      {									\
+	if (flag_use_cxa_atexit == 2)					\
+	  flag_use_cxa_atexit = 0;					\
+	/* kexts should always be built without the coalesced sections	\
+	   because the kernel loader doesn't grok such sections.  */	\
+	flag_weak = 0;							\
+	/* No RTTI in kexts.  */					\
+	flag_rtti = 0;							\
+      }									\
+  } while (0)
+
+/* Machine dependent cpp options.  Don't add more options here, add
+   them to darwin_cpp_builtins in darwin-c.c.  */
+
+#undef	CPP_SPEC
+#define CPP_SPEC "%{static:%{!dynamic:-D__STATIC__}}%{!static:-D__DYNAMIC__}" \
+	" %{pthread:-D_REENTRANT}"
+
+/* This is mostly a clone of the standard LINK_COMMAND_SPEC, plus
+   precomp, libtool, and fat build additions.
+
+   In general, random Darwin linker flags should go into LINK_SPEC
+   instead of LINK_COMMAND_SPEC.  The command spec is better for
+   specifying the handling of options understood by generic Unix
+   linkers, and for positional arguments like libraries.  */
+
+#define LINK_COMMAND_SPEC_A \
+   "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %(linker) \
+    %{flto*:%<fcompare-debug*} \
+    %{flto*} \
+    %l %X %{s} %{t} %{Z} %{u*} \
+    %{e*} %{r} \
+    %{o*}%{!o:-o a.out} \
+    %{!nostdlib:%{!nostartfiles:%S}} \
+    %{L*} %(link_libgcc) %o %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} \
+    %{fopenmp|ftree-parallelize-loops=*: \
+      %{static|static-libgcc|static-libstdc++|static-libgfortran: libgomp.a%s; : -lgomp } } \
+    %{!nostdlib:%{!nodefaultlibs:\
+      %(link_ssp) %(link_gcc_c_sequence)\
+    }}\
+    %{!nostdlib:%{!nostartfiles:%E}} %{T*} %{F*} }}}}}}}"
+
+#define DSYMUTIL "\ndsymutil"
+
+#define DSYMUTIL_SPEC \
+   "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %{v} \
+    %{gdwarf-2:%{!gstabs*:%{!g0: -idsym}}}\
+    %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm: \
+    %{gdwarf-2:%{!gstabs*:%{!g0: -dsym}}}}}}}}}}}"
+
+#define LINK_COMMAND_SPEC LINK_COMMAND_SPEC_A DSYMUTIL_SPEC
+
+/* Tell collect2 to run dsymutil for us as necessary.  */
+#define COLLECT_RUN_DSYMUTIL 1
+
+/* We only want one instance of %G, since libSystem (Darwin's -lc) does not depend
+   on libgcc.  */
+#undef  LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "%G %L"
+
+#ifdef TARGET_SYSTEM_ROOT
+#define LINK_SYSROOT_SPEC \
+  "%{isysroot*:-syslibroot %*;:-syslibroot " TARGET_SYSTEM_ROOT "}"
+#else
+#define LINK_SYSROOT_SPEC "%{isysroot*:-syslibroot %*}"
+#endif
+
+/* Please keep the random linker options in alphabetical order (modulo
+   'Z' and 'no' prefixes). Note that options taking arguments may appear
+   multiple times on a command line with different arguments each time,
+   so put a * after their names so all of them get passed.  */
+#define LINK_SPEC  \
+  "%{static}%{!static:-dynamic} \
+   %:remove-outfile(-ldl) \
+   %:remove-outfile(-lm) \
+   %:remove-outfile(-lpthread) \
+   %{fgnu-runtime: %{static|static-libgcc: \
+                     %:replace-outfile(-lobjc libobjc-gnu.a%s); \
+                    :%:replace-outfile(-lobjc -lobjc-gnu ) } }\
+   %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\
+   %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\
+   %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\
+   %{!Zdynamiclib: \
+     %{Zforce_cpusubtype_ALL:-arch %(darwin_arch) -force_cpusubtype_ALL} \
+     %{!Zforce_cpusubtype_ALL:-arch %(darwin_subarch)} \
+     %{Zbundle:-bundle} \
+     %{Zbundle_loader*:-bundle_loader %*} \
+     %{client_name*} \
+     %{compatibility_version*:%e-compatibility_version only allowed with -dynamiclib\
+} \
+     %{current_version*:%e-current_version only allowed with -dynamiclib} \
+     %{Zforce_flat_namespace:-force_flat_namespace} \
+     %{Zinstall_name*:%e-install_name only allowed with -dynamiclib} \
+     %{keep_private_externs} \
+     %{private_bundle} \
+    } \
+   %{Zdynamiclib: -dylib \
+     %{Zbundle:%e-bundle not allowed with -dynamiclib} \
+     %{Zbundle_loader*:%e-bundle_loader not allowed with -dynamiclib} \
+     %{client_name*:%e-client_name not allowed with -dynamiclib} \
+     %{compatibility_version*:-dylib_compatibility_version %*} \
+     %{current_version*:-dylib_current_version %*} \
+     %{Zforce_cpusubtype_ALL:-arch %(darwin_arch)} \
+     %{!Zforce_cpusubtype_ALL: -arch %(darwin_subarch)} \
+     %{Zforce_flat_namespace:%e-force_flat_namespace not allowed with -dynamiclib} \
+     %{Zinstall_name*:-dylib_install_name %*} \
+     %{keep_private_externs:%e-keep_private_externs not allowed with -dynamiclib} \
+     %{private_bundle:%e-private_bundle not allowed with -dynamiclib} \
+    } \
+   %{Zall_load:-all_load} \
+   %{Zallowable_client*:-allowable_client %*} \
+   %{Zbind_at_load:-bind_at_load} \
+   %{Zarch_errors_fatal:-arch_errors_fatal} \
+   %{Zdead_strip:-dead_strip} \
+   %{Zno_dead_strip_inits_and_terms:-no_dead_strip_inits_and_terms} \
+   %{Zdylib_file*:-dylib_file %*} \
+   %{Zdynamic:-dynamic}\
+   %{Zexported_symbols_list*:-exported_symbols_list %*} \
+   %{Zflat_namespace:-flat_namespace} \
+   %{headerpad_max_install_names} \
+   %{Zimage_base*:-image_base %*} \
+   %{Zinit*:-init %*} \
+   %{!mmacosx-version-min=*:-macosx_version_min %(darwin_minversion)} \
+   %{mmacosx-version-min=*:-macosx_version_min %*} \
+   %{nomultidefs} \
+   %{Zmulti_module:-multi_module} %{Zsingle_module:-single_module} \
+   %{Zmultiply_defined*:-multiply_defined %*} \
+   %{!Zmultiply_defined*:%{shared-libgcc: \
+     %:version-compare(< 10.5 mmacosx-version-min= -multiply_defined) \
+     %:version-compare(< 10.5 mmacosx-version-min= suppress)}} \
+   %{Zmultiplydefinedunused*:-multiply_defined_unused %*} \
+   %{fpie:-pie} \
+   %{prebind} %{noprebind} %{nofixprebinding} %{prebind_all_twolevel_modules} \
+   %{read_only_relocs} \
+   %{sectcreate*} %{sectorder*} %{seg1addr*} %{segprot*} \
+   %{Zsegaddr*:-segaddr %*} \
+   %{Zsegs_read_only_addr*:-segs_read_only_addr %*} \
+   %{Zsegs_read_write_addr*:-segs_read_write_addr %*} \
+   %{Zseg_addr_table*: -seg_addr_table %*} \
+   %{Zfn_seg_addr_table_filename*:-seg_addr_table_filename %*} \
+   %{sub_library*} %{sub_umbrella*} \
+   " LINK_SYSROOT_SPEC " \
+   %{twolevel_namespace} %{twolevel_namespace_hints} \
+   %{Zumbrella*: -umbrella %*} \
+   %{undefined*} \
+   %{Zunexported_symbols_list*:-unexported_symbols_list %*} \
+   %{Zweak_reference_mismatches*:-weak_reference_mismatches %*} \
+   %{!Zweak_reference_mismatches*:-weak_reference_mismatches non-weak} \
+   %{X} \
+   %{y*} \
+   %{w} \
+   %{pagezero_size*} %{segs_read_*} %{seglinkedit} %{noseglinkedit}  \
+   %{sectalign*} %{sectobjectsymbols*} %{segcreate*} %{whyload} \
+   %{whatsloaded} %{dylinker_install_name*} \
+   %{dylinker} %{Mach} "
+
+
+/* Machine dependent libraries.  */
+
+#define LIB_SPEC "%{!static:-lSystem}"
+
+/* Support -mmacosx-version-min by supplying different (stub) libgcc_s.dylib
+   libraries to link against, and by not linking against libgcc_s on
+   earlier-than-10.3.9.
+
+   Note that by default, -lgcc_eh is not linked against!  This is
+   because in a future version of Darwin the EH frame information may
+   be in a new format, or the fallback routine might be changed; if
+   you want to explicitly link against the static version of those
+   routines, because you know you don't need to unwind through system
+   libraries, you need to explicitly say -static-libgcc.
+
+   If it is linked against, it has to be before -lgcc, because it may
+   need symbols from -lgcc.  */
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC						   \
+   "%{static-libgcc|static: -lgcc_eh -lgcc;				   \
+      shared-libgcc|fexceptions|fgnu-runtime:				   \
+       %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_s.10.4)	   \
+       %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5)   \
+       %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4)	   \
+       %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5)	   \
+       -lgcc ;								   \
+      :%:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \
+       %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5)   \
+       %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4)	   \
+       %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5)	   \
+       -lgcc }"
+
+/* We specify crt0.o as -lcrt0.o so that ld will search the library path.
+
+   crt3.o provides __cxa_atexit on systems that don't have it.  Since
+   it's only used with C++, which requires passing -shared-libgcc, key
+   off that to avoid unnecessarily adding a destructor to every
+   powerpc program built.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC							    \
+  "%{Zdynamiclib: %(darwin_dylib1) }					    \
+   %{!Zdynamiclib:%{Zbundle:%{!static:-lbundle1.o}}			    \
+     %{!Zbundle:%{pg:%{static:-lgcrt0.o}				    \
+                     %{!static:%{object:-lgcrt0.o}			    \
+                               %{!object:%{preload:-lgcrt0.o}		    \
+                                 %{!preload:-lgcrt1.o %(darwin_crt2)}}}}    \
+                %{!pg:%{static:-lcrt0.o}				    \
+                      %{!static:%{object:-lcrt0.o}			    \
+                                %{!object:%{preload:-lcrt0.o}		    \
+                                  %{!preload: %(darwin_crt1)		    \
+					      %(darwin_crt2)}}}}}}	    \
+  %{shared-libgcc:%:version-compare(< 10.5 mmacosx-version-min= crt3.o%s)}"
+
+/* The native Darwin linker doesn't necessarily place files in the order
+   that they're specified on the link line.  Thus, it is pointless
+   to put anything in ENDFILE_SPEC.  */
+/* #define ENDFILE_SPEC "" */
+
+#define DARWIN_EXTRA_SPECS						\
+  { "darwin_crt1", DARWIN_CRT1_SPEC },					\
+  { "darwin_dylib1", DARWIN_DYLIB1_SPEC },				\
+  { "darwin_minversion", DARWIN_MINVERSION_SPEC },
+
+#define DARWIN_DYLIB1_SPEC						\
+  "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o)		\
+   %:version-compare(>= 10.5 mmacosx-version-min= -ldylib1.10.5.o)"
+
+#define DARWIN_CRT1_SPEC						\
+  "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o)		\
+   %:version-compare(>= 10.5 mmacosx-version-min= -lcrt1.10.5.o)"
+
+/* Default Darwin ASM_SPEC, very simple.  */
+#define ASM_SPEC "-arch %(darwin_arch) \
+  %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \
+  %{static}"
+
+/* We still allow output of STABS.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+#define DWARF2_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#define DEBUG_FRAME_SECTION	"__DWARF,__debug_frame,regular,debug"
+#define DEBUG_INFO_SECTION	"__DWARF,__debug_info,regular,debug"
+#define DEBUG_ABBREV_SECTION	"__DWARF,__debug_abbrev,regular,debug"
+#define DEBUG_ARANGES_SECTION	"__DWARF,__debug_aranges,regular,debug"
+#define DEBUG_MACINFO_SECTION	"__DWARF,__debug_macinfo,regular,debug"
+#define DEBUG_LINE_SECTION	"__DWARF,__debug_line,regular,debug"
+#define DEBUG_LOC_SECTION	"__DWARF,__debug_loc,regular,debug"
+#define DEBUG_PUBNAMES_SECTION	"__DWARF,__debug_pubnames,regular,debug"
+#define DEBUG_PUBTYPES_SECTION	"__DWARF,__debug_pubtypes,regular,debug"
+#define DEBUG_STR_SECTION	"__DWARF,__debug_str,regular,debug"
+#define DEBUG_RANGES_SECTION	"__DWARF,__debug_ranges,regular,debug"
+
+#define TARGET_WANT_DEBUG_PUB_SECTIONS true
+
+/* When generating stabs debugging, use N_BINCL entries.  */
+
+#define DBX_USE_BINCL
+
+/* There is no limit to the length of stabs strings.  */
+
+#define DBX_CONTIN_LENGTH 0
+
+/* gdb needs a null N_SO at the end of each file for scattered loading.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+/* GCC's definition of 'one_only' is the same as its definition of 'weak'.  */
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* Mach-O supports 'weak imports', and 'weak definitions' in coalesced
+   sections.  machopic_select_section ensures that weak variables go in
+   coalesced sections.  Weak aliases (or any other kind of aliases) are
+   not supported.  Weak symbols that aren't visible outside the .s file
+   are not supported.  */
+#define ASM_WEAKEN_DECL(FILE, DECL, NAME, ALIAS)			\
+  do {									\
+    if (ALIAS)								\
+      {									\
+	warning (0, "alias definitions not supported in Mach-O; ignored");	\
+	break;								\
+      }									\
+ 									\
+    if (! DECL_EXTERNAL (DECL) && TREE_PUBLIC (DECL))			\
+      targetm.asm_out.globalize_label (FILE, NAME);			\
+    if (DECL_EXTERNAL (DECL))						\
+      fputs ("\t.weak_reference ", FILE);				\
+    else if (lookup_attribute ("weak_import", DECL_ATTRIBUTES (DECL)))	\
+      break;								\
+    else if (TREE_PUBLIC (DECL))					\
+      fputs ("\t.weak_definition ", FILE);				\
+    else								\
+      break;								\
+    assemble_name (FILE, NAME);						\
+    fputc ('\n', FILE);							\
+  } while (0)
+
+/* Darwin has the pthread routines in libSystem, which every program
+   links to, so there's no need for weak-ness for that.  */
+#define GTHREAD_USE_WEAK 0
+
+/* The Darwin linker imposes two limitations on common symbols: they
+   can't have hidden visibility, and they can't appear in dylibs.  As
+   a consequence, we should never use common symbols to represent
+   vague linkage. */
+#undef USE_COMMON_FOR_ONE_ONLY
+#define USE_COMMON_FOR_ONE_ONLY 0
+
+/* The Darwin linker doesn't want coalesced symbols to appear in
+   a static archive's table of contents. */
+#undef TARGET_WEAK_NOT_IN_ARCHIVE_TOC
+#define TARGET_WEAK_NOT_IN_ARCHIVE_TOC 1
+
+/* On Darwin, we don't (at the time of writing) have linkonce sections
+   with names, so it's safe to make the class data not comdat.  */
+#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT hook_bool_void_false
+
+/* For efficiency, on Darwin the RTTI information that is always
+   emitted in the standard C++ library should not be COMDAT.  */
+#define TARGET_CXX_LIBRARY_RTTI_COMDAT hook_bool_void_false
+
+/* We make exception information linkonce. */
+#undef TARGET_USES_WEAK_UNWIND_INFO
+#define TARGET_USES_WEAK_UNWIND_INFO 1
+
+/* We need to use a nonlocal label for the start of an EH frame: the
+   Darwin linker requires that a coalesced section start with a label.
+   Unfortunately, it also requires that 'debug' sections don't contain
+   labels.  */
+#undef FRAME_BEGIN_LABEL
+#define FRAME_BEGIN_LABEL (for_eh ? "EH_frame" : "Lframe")
+
+/* Emit a label for the FDE corresponding to DECL.  EMPTY means
+   emit a label for an empty FDE. */
+#define TARGET_ASM_EMIT_UNWIND_LABEL darwin_emit_unwind_label
+
+/* Emit a label to separate the exception table.  */
+#define TARGET_ASM_EMIT_EXCEPT_TABLE_LABEL darwin_emit_except_table_label
+
+/* Our profiling scheme doesn't LP labels and counter words.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#undef	INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP
+
+#undef	INVOKE__main
+
+#define TARGET_ASM_CONSTRUCTOR  machopic_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   machopic_asm_out_destructor
+
+/* Always prefix with an underscore.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* A dummy symbol that will be replaced with the function base name.  */
+#define MACHOPIC_FUNCTION_BASE_NAME "<pic base>"
+
+/* Don't output a .file directive.  That is only used by the assembler for
+   error reporting.  */
+#undef	TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false
+
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_file_end
+
+/* Because Mach-O relocations have a counter from 1 to 255 for the
+   section number they apply to, it is necessary to output all
+   normal sections before the LTO sections, to make sure that the
+   sections that may have relocations always have a section number
+   smaller than 255.  */
+#undef  TARGET_ASM_LTO_START
+#define TARGET_ASM_LTO_START darwin_asm_lto_start
+#undef  TARGET_ASM_LTO_END
+#define TARGET_ASM_LTO_END darwin_asm_lto_end
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", SIZE)
+
+/* Give ObjC methods pretty symbol names.  */
+
+#undef	OBJC_GEN_METHOD_LABEL
+#define OBJC_GEN_METHOD_LABEL(BUF,IS_INST,CLASS_NAME,CAT_NAME,SEL_NAME,NUM) \
+  do { if (CAT_NAME)							\
+	 sprintf (BUF, "%c[%s(%s) %s]", (IS_INST) ? '-' : '+',		\
+		  (CLASS_NAME), (CAT_NAME), (SEL_NAME));		\
+       else								\
+	 sprintf (BUF, "%c[%s %s]", (IS_INST) ? '-' : '+',		\
+		  (CLASS_NAME), (SEL_NAME));				\
+     } while (0)
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \
+	darwin_asm_declare_object_name ((FILE), (NAME), (DECL))
+
+/* The RTTI data (e.g., __ti4name) is common and public (and static),
+   but it does need to be referenced via indirect PIC data pointers.
+   The machopic_define_symbol calls are telling the machopic subsystem
+   that the name *is* defined in this module, so it doesn't need to
+   make them indirect.  */
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  do {									\
+    const char *xname = NAME;						\
+    if (GET_CODE (XEXP (DECL_RTL (DECL), 0)) != SYMBOL_REF)		\
+      xname = IDENTIFIER_POINTER (DECL_NAME (DECL));			\
+    if (! DECL_WEAK (DECL)						\
+        && ((TREE_STATIC (DECL)						\
+	     && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL)))		\
+            || DECL_INITIAL (DECL)))					\
+        machopic_define_symbol (DECL_RTL (DECL));			\
+    if ((TREE_STATIC (DECL)						\
+	 && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL)))		\
+        || DECL_INITIAL (DECL))						\
+      (* targetm.encode_section_info) (DECL, DECL_RTL (DECL), false);	\
+    ASM_OUTPUT_FUNCTION_LABEL (FILE, xname, DECL);			\
+  } while (0)
+
+#undef TARGET_ASM_DECLARE_CONSTANT_NAME
+#define TARGET_ASM_DECLARE_CONSTANT_NAME darwin_asm_declare_constant_name
+
+/* Wrap new method names in quotes so the assembler doesn't gag.
+   Make Objective-C internal symbols local and in doing this, we need 
+   to accommodate the name mangling done by c++ on file scope locals.  */
+
+int darwin_label_is_anonymous_local_objc_name (const char *name);
+
+#undef	ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE,NAME)					     \
+  do {									     \
+       const char *xname = (NAME);					     \
+       if (! strcmp (xname, MACHOPIC_FUNCTION_BASE_NAME))		     \
+         machopic_output_function_base_name(FILE);                           \
+       else if (xname[0] == '&' || xname[0] == '*')			     \
+         {								     \
+           int len = strlen (xname);					     \
+	   if (len > 6 && !strcmp ("$stub", xname + len - 5))		     \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   else if (len > 7 && !strcmp ("$stub\"", xname + len - 6))	     \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   else if (len > 14 && !strcmp ("$non_lazy_ptr", xname + len - 13)) \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   else if (len > 15 && !strcmp ("$non_lazy_ptr\"", xname + len - 14)) \
+	     machopic_validate_stub_or_non_lazy_ptr (xname);		     \
+	   if (xname[1] != '"' && name_needs_quotes (&xname[1]))	     \
+	     fprintf (FILE, "\"%s\"", &xname[1]);			     \
+	   else								     \
+	     fputs (&xname[1], FILE); 					     \
+	 }								     \
+       else if (xname[0] == '+' || xname[0] == '-')			     \
+         fprintf (FILE, "\"%s\"", xname);				     \
+       else if (darwin_label_is_anonymous_local_objc_name (xname))	     \
+         fprintf (FILE, "L%s", xname);					     \
+       else if (!strncmp (xname, ".objc_class_name_", 17))		     \
+	 fprintf (FILE, "%s", xname);					     \
+       else if (xname[0] != '"' && name_needs_quotes (xname))		     \
+	 fprintf (FILE, "\"%s\"", xname);				     \
+       else								     \
+         asm_fprintf (FILE, "%U%s", xname);				     \
+  } while (0)
+
+/* Output before executable code.  */
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable data.  */
+
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#undef	ALIGN_ASM_OP
+#define ALIGN_ASM_OP		".align"
+
+#undef	ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t%s\t%d\n", ALIGN_ASM_OP, (LOG))
+
+/* The maximum alignment which the object file format can support in
+   bits.  For Mach-O, this is 2^15 bytes.  */
+
+#undef	MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (0x8000 * 8)
+
+#define L2_MAX_OFILE_ALIGNMENT 15
+
+/*  These are the three variants that emit referenced blank space.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+	darwin_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+#undef	ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+	darwin_asm_output_aligned_decl_local				\
+				  ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+#undef  ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN)	\
+	darwin_asm_output_aligned_decl_common				\
+				   ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* The generic version, archs should over-ride where required.  */
+#define MACHOPIC_NL_SYMBOL_PTR_SECTION ".non_lazy_symbol_pointer"
+
+/* Declare the section variables.  */
+#ifndef USED_FOR_TARGET
+enum darwin_section_enum {
+#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC) NAME,
+#include "darwin-sections.def"
+#undef DEF_SECTION
+  NUM_DARWIN_SECTIONS
+};
+extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
+#endif
+
+#undef	TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION machopic_select_section
+
+#undef	TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION darwin_function_section
+
+#undef	TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS
+#define TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS \
+	darwin_function_switched_text_sections
+
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION machopic_select_rtx_section
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION darwin_unique_section
+#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK machopic_reloc_rw_mask
+
+
+#define ASM_DECLARE_UNRESOLVED_REFERENCE(FILE,NAME)			\
+    do {								\
+	 if (FILE) {							\
+	   if (MACHOPIC_INDIRECT)					\
+	     fprintf (FILE, "\t.lazy_reference ");			\
+	   else								\
+	     fprintf (FILE, "\t.reference ");				\
+	   assemble_name (FILE, NAME);					\
+	   fprintf (FILE, "\n");					\
+	 }                                                              \
+       } while (0)
+
+#define ASM_DECLARE_CLASS_REFERENCE(FILE,NAME)				\
+    do {								\
+	 if (FILE) {							\
+	   fprintf (FILE, "\t");					\
+	   assemble_name (FILE, NAME);					\
+	   fprintf (FILE, "=0\n");					\
+	   (*targetm.asm_out.globalize_label) (FILE, NAME);		\
+	 }								\
+       } while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+#define TARGET_ASM_GLOBALIZE_LABEL darwin_globalize_label
+
+/* Emit an assembler directive to set visibility for a symbol.  Used
+   to support visibility attribute and Darwin's private extern
+   feature.  */
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY darwin_assemble_visibility
+
+/* Extra attributes for Darwin.  */
+#define SUBTARGET_ATTRIBUTE_TABLE					     \
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ \
+  { "apple_kext_compatibility", 0, 0, false, true, false,		     \
+    darwin_handle_kext_attribute },					     \
+  { "weak_import", 0, 0, true, false, false,				     \
+    darwin_handle_weak_import_attribute }
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM))
+
+#undef TARGET_ASM_MARK_DECL_PRESERVED
+#define TARGET_ASM_MARK_DECL_PRESERVED darwin_mark_decl_preserved
+
+/* Set on a symbol with SYMBOL_FLAG_FUNCTION or
+   MACHO_SYMBOL_FLAG_VARIABLE to indicate that the function or
+   variable has been defined in this translation unit.
+   When porting Mach-O to new architectures you need to make
+   sure these aren't clobbered by the backend.  */
+
+#define MACHO_SYMBOL_FLAG_VARIABLE (SYMBOL_FLAG_MACH_DEP)
+#define MACHO_SYMBOL_FLAG_DEFINED ((SYMBOL_FLAG_MACH_DEP) << 1)
+
+/* Set on a symbol to indicate when fix-and-continue style code
+   generation is being used and the symbol refers to a static symbol
+   that should be rebound from new instances of a translation unit to
+   the original instance of the data.  */
+
+#define MACHO_SYMBOL_STATIC ((SYMBOL_FLAG_MACH_DEP) << 2)
+
+/* Symbolic names for various things we might know about a symbol.  */
+
+enum machopic_addr_class {
+  MACHOPIC_UNDEFINED,
+  MACHOPIC_DEFINED_DATA,
+  MACHOPIC_UNDEFINED_DATA,
+  MACHOPIC_DEFINED_FUNCTION,
+  MACHOPIC_UNDEFINED_FUNCTION
+};
+
+/* Macros defining the various PIC cases.  */
+
+#undef  MACHO_DYNAMIC_NO_PIC_P
+#define MACHO_DYNAMIC_NO_PIC_P	(TARGET_MACHO_DYNAMIC_NO_PIC)
+#undef  MACHOPIC_INDIRECT
+#define MACHOPIC_INDIRECT	(flag_pic || MACHO_DYNAMIC_NO_PIC_P)
+#define MACHOPIC_JUST_INDIRECT	(MACHO_DYNAMIC_NO_PIC_P)
+#undef  MACHOPIC_PURE
+#define MACHOPIC_PURE		(flag_pic && ! MACHO_DYNAMIC_NO_PIC_P)
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO  darwin_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  default_strip_name_encoding
+
+#define GEN_BINDER_NAME_FOR_STUB(BUF,STUB,STUB_LENGTH)		\
+  do {								\
+    const char *const stub_ = (STUB);				\
+    char *buffer_ = (BUF);					\
+    strcpy (buffer_, stub_);					\
+    if (stub_[0] == '"')					\
+      {								\
+	strcpy (buffer_ + (STUB_LENGTH) - 1, "_binder\"");	\
+      }								\
+    else							\
+      {								\
+	strcpy (buffer_ + (STUB_LENGTH), "_binder");		\
+      }								\
+  } while (0)
+
+#define GEN_SYMBOL_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH)	\
+  do {								\
+    const char *const symbol_ = (SYMBOL);			\
+    char *buffer_ = (BUF);					\
+    if (name_needs_quotes (symbol_) && symbol_[0] != '"')	\
+      {								\
+	  sprintf (buffer_, "\"%s\"", symbol_);			\
+      }								\
+    else							\
+      {								\
+	strcpy (buffer_, symbol_);				\
+      }								\
+  } while (0)
+
+/* Given a symbol name string, create the lazy pointer version
+   of the symbol name.  */
+
+#define GEN_LAZY_PTR_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH)	\
+  do {								\
+    const char *symbol_ = (SYMBOL);                             \
+    char *buffer_ = (BUF);					\
+    if (symbol_[0] == '"')					\
+      {								\
+        strcpy (buffer_, "\"L");				\
+        strcpy (buffer_ + 2, symbol_ + 1);			\
+	strcpy (buffer_ + (SYMBOL_LENGTH), "$lazy_ptr\"");	\
+      }								\
+    else if (name_needs_quotes (symbol_))			\
+      {								\
+        strcpy (buffer_, "\"L");				\
+        strcpy (buffer_ + 2, symbol_);				\
+	strcpy (buffer_ + (SYMBOL_LENGTH) + 2, "$lazy_ptr\"");	\
+      }								\
+    else							\
+      {								\
+        strcpy (buffer_, "L");					\
+        strcpy (buffer_ + 1, symbol_);				\
+	strcpy (buffer_ + (SYMBOL_LENGTH) + 1, "$lazy_ptr");	\
+      }								\
+  } while (0)
+
+#define EH_FRAME_SECTION_NAME   "__TEXT"
+#define EH_FRAME_SECTION_ATTR ",coalesced,no_toc+strip_static_syms+live_support"
+
+/* Java runtime class list.  */
+#define JCR_SECTION_NAME "__DATA,jcr,regular,no_dead_strip"
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)  \
+  (((CODE) == 2 && (GLOBAL) == 1) \
+   ? (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4) : \
+     ((CODE) == 1 || (GLOBAL) == 0) ? DW_EH_PE_pcrel : DW_EH_PE_absptr)
+
+#define ASM_OUTPUT_DWARF_DELTA(FILE,SIZE,LABEL1,LABEL2)  \
+  darwin_asm_output_dwarf_delta (FILE, SIZE, LABEL1, LABEL2)
+
+#define ASM_OUTPUT_DWARF_OFFSET(FILE,SIZE,LABEL,BASE)  \
+  darwin_asm_output_dwarf_offset (FILE, SIZE, LABEL, BASE)
+
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(ASM_OUT_FILE, ENCODING, SIZE, ADDR, DONE)	\
+      if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) {				\
+	darwin_non_lazy_pcrel (ASM_OUT_FILE, ADDR);					\
+	goto DONE;									\
+      }
+
+/* Experimentally, putting jump tables in text is faster on SPEC.
+   Also this is needed for correctness for coalesced functions.  */
+
+#ifndef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+#endif
+
+#define TARGET_TERMINATE_DW2_EH_FRAME_INFO false
+
+#define TARGET_ASM_INIT_SECTIONS darwin_init_sections
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION darwin_asm_named_section
+
+#define DARWIN_REGISTER_TARGET_PRAGMAS()			\
+  do {								\
+    if (!flag_preprocess_only)					\
+      cpp_register_pragma (parse_in, NULL, "mark",		\
+			   darwin_pragma_ignore, false);	\
+    c_register_pragma (0, "options", darwin_pragma_options);	\
+    c_register_pragma (0, "segment", darwin_pragma_ignore);	\
+    c_register_pragma (0, "unused", darwin_pragma_unused);	\
+    c_register_pragma (0, "ms_struct", darwin_pragma_ms_struct); \
+  } while (0)
+
+#undef ASM_APP_ON
+#define ASM_APP_ON ""
+#undef ASM_APP_OFF
+#define ASM_APP_OFF ""
+
+void darwin_register_frameworks (const char *, const char *, int);
+void darwin_register_objc_includes (const char *, const char *, int);
+#define TARGET_EXTRA_PRE_INCLUDES darwin_register_objc_includes
+#define TARGET_EXTRA_INCLUDES darwin_register_frameworks
+
+void add_framework_path (char *);
+#define TARGET_OPTF add_framework_path
+
+#define TARGET_POSIX_IO
+
+/* All new versions of Darwin have C99 functions.  */
+
+#define TARGET_C99_FUNCTIONS 1
+
+#define WINT_TYPE "int"
+
+/* Every program on darwin links against libSystem which contains the pthread
+   routines, so there's no need to explicitly call out when doing threaded
+   work.  */
+
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS ""
+
+/* Darwin disables section anchors by default.  
+   They should be enabled per arch where support exists in that arch.  */
+#define TARGET_ASM_OUTPUT_ANCHOR NULL
+#define DARWIN_SECTION_ANCHORS 0
+
+/* Attempt to turn on execute permission for the stack.  This may be
+    used by TARGET_TRAMPOLINE_INIT if the target needs it (that is,
+    if the target machine can change execute permissions on a page).
+
+    There is no way to query the execute permission of the stack, so
+    we always issue the mprotect() call.
+
+    Unfortunately it is not possible to make this namespace-clean.
+
+    Also note that no errors should be emitted by this code; it is
+    considered dangerous for library calls to send messages to
+    stdout/stderr.  */
+
+#define ENABLE_EXECUTE_STACK                                            \
+extern void __enable_execute_stack (void *);                            \
+void                                                                    \
+__enable_execute_stack (void *addr)                                     \
+{                                                                       \
+   extern int mprotect (void *, size_t, int);                           \
+   extern int getpagesize (void);					\
+   static int size;                                                     \
+   static long mask;                                                    \
+                                                                        \
+   char *page, *end;                                                    \
+                                                                        \
+   if (size == 0)                                                       \
+     {                                                                  \
+       size = getpagesize();						\
+       mask = ~((long) size - 1);                                       \
+     }                                                                  \
+                                                                        \
+   page = (char *) (((long) addr) & mask);                              \
+   end  = (char *) ((((long) (addr + (TARGET_64BIT ? 48 : 40))) & mask) + size); \
+                                                                        \
+   /* 7 == PROT_READ | PROT_WRITE | PROT_EXEC */                        \
+   (void) mprotect (page, end - page, 7);                               \
+}
+
+/* For Apple KEXTs, we make the constructors return this to match gcc
+   2.95.  */
+#define TARGET_CXX_CDTOR_RETURNS_THIS (darwin_kextabi_p)
+#define TARGET_KEXTABI flag_apple_kext
+
+/* We have target-specific builtins.  */
+#define TARGET_FOLD_BUILTIN darwin_fold_builtin
+
+#define TARGET_OBJC_CONSTRUCT_STRING_OBJECT \
+  darwin_objc_construct_string
+
+#define TARGET_STRING_OBJECT_REF_TYPE_P \
+  darwin_cfstring_ref_p
+
+#define TARGET_N_FORMAT_TYPES 1
+#define TARGET_FORMAT_TYPES darwin_additional_format_types
+
+#define TARGET_CHECK_STRING_OBJECT_FORMAT_ARG \
+  darwin_check_cfstring_format_arg
+
+#define TARGET_HAS_TARGETCM 1
+
+#ifndef USED_FOR_TARGET
+extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **);
+#define GCC_DRIVER_HOST_INITIALIZATION \
+  darwin_driver_init (&decoded_options_count, &decoded_options)
+#endif
+
+/* The Apple assembler and linker do not support constructor priorities.  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY 0
+
+#endif /* CONFIG_DARWIN_H */
diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt
new file mode 100644
index 000000000..a31ee074c
--- /dev/null
+++ b/gcc/config/darwin.opt
@@ -0,0 +1,390 @@
+; Processor-independent options for Darwin.
+
+; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Various linker options have a -Z added so that they can get to specs
+; processing without interference.  Note that an option name with a
+; prefix that matches another option name, that also takes an
+; argument, being mapped to a -Z linker option, needs to be modified
+; so the prefix is different, otherwise a '*' after the shorter option
+; will match with the longer one.
+
+all_load
+Driver Alias(Zall_load)
+
+allowable_client
+Driver Separate Alias(Zallowable_client)
+
+arch_errors_fatal
+Driver Alias(Zarch_errors_fatal)
+
+bind_at_load
+Driver Alias(Zbind_at_load)
+
+bundle
+Driver Alias(Zbundle)
+
+bundle_loader
+Driver Separate Alias(Zbundle_loader)
+
+dead_strip
+Driver Alias(Zdead_strip)
+
+dependency-file
+C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs)
+
+dylib_file
+Driver Separate Alias(Zdylib_file)
+
+dylinker
+Driver
+
+dynamic
+Driver Alias(Zdynamic)
+
+dynamiclib
+Driver Alias(Zdynamiclib)
+
+exported_symbols_list
+Driver Separate Alias(Zexported_symbols_list)
+
+filelist
+Driver RejectNegative Separate
+
+findirect-virtual-calls
+Driver RejectNegative
+
+flat_namespace
+Driver RejectNegative Alias(Zflat_namespace)
+
+force_cpusubtype_ALL
+Driver RejectNegative Alias(Zforce_cpusubtype_ALL)
+
+force_flat_namespace
+Driver RejectNegative Alias(Zforce_flat_namespace)
+
+framework
+Driver RejectNegative Separate
+
+fterminated-vtables
+Driver RejectNegative
+
+gfull
+Driver
+
+gused
+Driver
+
+headerpad_max_install_names
+Driver
+
+image_base
+Driver Separate Alias(Zimage_base)
+
+init
+Driver Separate Alias(Zinit)
+
+install_name
+Driver Separate Alias(Zinstall_name)
+
+keep_private_externs
+Driver
+
+mconstant-cfstrings
+Target Report Var(darwin_constant_cfstrings) Init(1)
+Generate compile-time CFString objects
+
+multi_module
+Driver RejectNegative Alias(Zmulti_module)
+
+multiply_defined
+Driver RejectNegative Separate Alias(Zmultiply_defined)
+
+multiply_defined_unused
+Driver RejectNegative Separate Alias(Zmultiplydefinedunused)
+
+no_dead_strip_inits_and_terms
+Driver Alias(Zno_dead_strip_inits_and_terms)
+
+nofixprebinding
+Driver
+
+nomultidefs
+Driver
+
+noprebind
+Driver
+
+noseglinkedit
+Driver
+
+object
+Driver
+
+prebind
+Driver
+
+prebind_all_twolevel_modules
+Driver
+
+preload
+Driver
+
+private_bundle
+Driver
+
+pthread
+Driver
+
+seg_addr_table
+Driver Separate Alias(Zseg_addr_table)
+
+seg_addr_table_filename
+Driver Separate Alias(Zfn_seg_addr_table_filename)
+
+segaddr
+Driver Separate Args(2) Alias(Zsegaddr)
+
+seglinkedit
+Driver
+
+segs_read_only_addr
+Driver Separate Alias(Zsegs_read_only_addr)
+
+segs_read_write_addr
+Driver Separate Alias(Zsegs_read_write_addr)
+
+single_module
+Driver Alias(Zsingle_module)
+
+twolevel_namespace
+Driver
+
+twolevel_namespace_hints
+Driver
+
+umbrella
+Driver Separate Alias(Zumbrella)
+
+unexported_symbols_list
+Driver Separate Alias(Zunexported_symbols_list)
+
+weak_reference_mismatches
+Driver Separate Alias(Zweak_reference_mismatches)
+
+whatsloaded
+Driver
+
+whyload
+Driver
+
+y
+Driver Joined
+
+Mach
+Driver
+
+Wnonportable-cfstrings
+Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning
+Warn if constant CFString objects contain non-portable characters
+
+; Use new-style pic stubs if this is true, x86 only so far.
+matt-stubs
+Target Report Var(darwin_macho_att_stub) Init(1)
+Generate AT&T-style stubs for Mach-O
+
+mdynamic-no-pic
+Target Common Report Mask(MACHO_DYNAMIC_NO_PIC)
+Generate code suitable for executables (NOT shared libs)
+
+mfix-and-continue
+Target Report Var(darwin_fix_and_continue)
+Generate code suitable for fast turn around debugging
+
+; The Init here is for the convenience of GCC developers, so that cc1
+; and cc1plus don't crash if no -mmacosx-version-min is passed.  The
+; driver will always pass a -mmacosx-version-min, so in normal use the
+; Init is never used.  Useful for setting the OS on which people
+; ususally debug.
+mmacosx-version-min=
+Target Joined Report Var(darwin_macosx_version_min) Init("10.6")
+The earliest MacOS X version on which this program will run
+
+mone-byte-bool
+Target RejectNegative Report Var(darwin_one_byte_bool)
+Set sizeof(bool) to 1
+
+fapple-kext
+Target Report C++ Var(flag_apple_kext)
+Generate code for darwin loadable kernel extensions
+
+mkernel
+Target Report Var(flag_mkernel)
+Generate code for the kernel or loadable kernel extensions
+
+iframework
+Target RejectNegative C ObjC C++ ObjC++ Joined Separate 
+-iframework <dir>	Add <dir> to the end of the system framework include path
+
+X
+Driver
+
+Zall_load
+Driver
+
+Zallowable_client
+Driver Separate
+
+Zarch_errors_fatal
+Driver
+
+Zbind_at_load
+Driver
+
+Zbundle
+Driver
+
+Zbundle_loader
+Driver Separate
+
+Zdead_strip
+Driver
+
+Zdylib_file
+Driver Separate
+
+Zdynamic
+Driver
+
+Zdynamiclib
+Driver
+
+Zexported_symbols_list
+Driver Separate
+
+Zfn_seg_addr_table_filename
+Driver Separate
+
+Zflat_namespace
+Driver
+
+Zforce_cpusubtype_ALL
+Driver
+
+Zforce_flat_namespace
+Driver
+
+Zimage_base
+Driver Separate
+
+Zinit
+Driver Separate
+
+Zinstall_name
+Driver Separate
+
+Zmulti_module
+Driver
+
+Zmultiply_defined
+Driver Separate
+
+Zmultiplydefinedunused
+Driver Separate
+
+Zno_dead_strip_inits_and_terms
+Driver
+
+Zseg_addr_table
+Driver Separate
+
+Zsegaddr
+Driver Separate Args(2)
+
+Zsegs_read_only_addr
+Driver Separate
+
+Zsegs_read_write_addr
+Driver Separate
+
+Zsingle_module
+Driver
+
+Zumbrella
+Driver Separate
+
+Zunexported_symbols_list
+Driver Separate
+
+Zweak_reference_mismatches
+Driver Separate
+
+client_name
+Driver Separate
+
+compatibility_version
+Driver Separate
+
+current_version
+Driver Separate
+
+dylinker_install_name
+Driver Separate
+
+pagezero_size
+Driver Separate
+
+read_only_relocs
+Driver Separate
+
+sectalign
+Driver Separate Args(3)
+
+sectcreate
+Driver Separate Args(3)
+
+sectobjectsymbols
+Driver Separate Args(2)
+
+sectorder
+Driver Separate Args(3)
+
+seg1addr
+Driver Separate
+
+segcreate
+Driver Separate Args(3)
+
+segprot
+Driver Separate Args(3)
+
+segs_read_only_addr
+Driver Separate
+
+segs_read_write_addr
+Driver Separate
+
+sub_library
+Driver Separate
+
+sub_umbrella
+Driver Separate
+
+undefined
+Driver Separate
diff --git a/gcc/config/darwin10.h b/gcc/config/darwin10.h
new file mode 100644
index 000000000..5c205945c
--- /dev/null
+++ b/gcc/config/darwin10.h
@@ -0,0 +1,32 @@
+/* Target definitions for Darwin (Mac OS X) systems.
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Jack Howarth <howarth@bromo.med.uc.edu>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!static: -lSystem }"
+
+/* Fix PR41260 by passing -no_compact_unwind on darwin10 and later until
+   unwinder in libSystem is fixed to digest new epilog unwinding notes.
+
+   Fix PR47558 by linking against libSystem ahead of libgcc_ext. */
+#undef  LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+"%:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) \
+   %{!static:%{!static-libgcc: \
+      %:version-compare(>= 10.6 mmacosx-version-min= -lSystem) } } %G %L"
diff --git a/gcc/config/darwin9.h b/gcc/config/darwin9.h
new file mode 100644
index 000000000..828118372
--- /dev/null
+++ b/gcc/config/darwin9.h
@@ -0,0 +1,50 @@
+/* Target definitions for Darwin (Mac OS X) systems.
+   Copyright (C) 2006, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Apple Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Prefer DWARF2.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DARWIN_PREFER_DWARF
+
+/* Since DWARF2 is default, conditions for running dsymutil are different.  */
+#undef DSYMUTIL_SPEC
+#define DSYMUTIL_SPEC \
+   "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %{v} \
+    %{g*:%{!gstabs*:%{!g0: -idsym}}}\
+    %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.f|.f90|.f95|.f03|.f77|.for|.F|.F90|.F95|.F03: \
+    %{g*:%{!gstabs*:%{!g0: -dsym}}}}}}}}}}}"
+
+/* Tell collect2 to run dsymutil for us as necessary.  */
+#define COLLECT_RUN_DSYMUTIL 1
+
+/* libSystem contains unwind information for signal frames.  */
+#define DARWIN_LIBSYSTEM_HAS_UNWIND
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do {									\
+    unsigned HOST_WIDE_INT _new_size = (SIZE);				\
+    fprintf ((FILE), "\t.comm ");						\
+    assemble_name ((FILE), (NAME));					\
+    if (_new_size == 0) _new_size = 1;					\
+    fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	     _new_size, floor_log2 ((ALIGN) / BITS_PER_UNIT));		\
+  } while (0)
diff --git a/gcc/config/dbx.h b/gcc/config/dbx.h
new file mode 100644
index 000000000..0f86e164c
--- /dev/null
+++ b/gcc/config/dbx.h
@@ -0,0 +1,27 @@
+/* Prefer DBX (stabs) debugging information.
+   Copyright (C) 1996, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file causes gcc to prefer using DBX (stabs) debugging
+   information.  The configure script will add a #include of this file
+   to tm.h when --with-stabs is used for certain targets.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
diff --git a/gcc/config/dbxcoff.h b/gcc/config/dbxcoff.h
new file mode 100644
index 000000000..a2b36c0d6
--- /dev/null
+++ b/gcc/config/dbxcoff.h
@@ -0,0 +1,62 @@
+/* Definitions needed when using stabs embedded in COFF sections.
+   Copyright (C) 1996, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file may be included by any COFF target which wishes to
+   support -gstabs generating stabs in sections, as produced by gas
+   and understood by gdb.  */
+
+/* Output DBX (stabs) debugging information if doing -gstabs.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+/* Generate SDB debugging information by default.  */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE SDB_DEBUG
+#endif
+
+/* Be function-relative for block and source line stab directives.  */
+
+#define DBX_BLOCKS_FUNCTION_RELATIVE 1
+
+/* but, to make this work, functions must appear prior to line info.  */
+
+#define DBX_FUNCTION_FIRST
+
+/* Generate a blank trailing N_SO to mark the end of the .o file, since
+   we can't depend upon the linker to mark .o file boundaries with
+   embedded stabs.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+/* Like block addresses, stabs line numbers are relative to the
+   current function.  */
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+/* When generating stabs debugging, use N_BINCL entries.  */
+
+#undef DBX_USE_BINCL
+#define DBX_USE_BINCL
+
+/* There is no limit to the length of stabs strings.  */
+
+#ifndef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 0
+#endif
diff --git a/gcc/config/dbxelf.h b/gcc/config/dbxelf.h
new file mode 100644
index 000000000..8d3c26507
--- /dev/null
+++ b/gcc/config/dbxelf.h
@@ -0,0 +1,68 @@
+/* Definitions needed when using stabs embedded in ELF sections.
+   Copyright (C) 1999, 2004, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file may be included by any ELF target which wishes to
+   support -gstabs generating stabs in sections, as produced by gas
+   and understood by gdb.  */
+
+#ifndef GCC_DBX_ELF_H
+#define GCC_DBX_ELF_H
+
+/* Output DBX (stabs) debugging information if doing -gstabs.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+/* Make LBRAC and RBRAC addresses relative to the start of the
+   function.  The native Solaris stabs debugging format works this
+   way, gdb expects it, and it reduces the number of relocation
+   entries...  */
+
+#define DBX_BLOCKS_FUNCTION_RELATIVE 1
+
+/* ... but, to make this work, functions must appear prior to line info.  */
+
+#define DBX_FUNCTION_FIRST
+
+/* When generating stabs debugging, use N_BINCL entries.  */
+
+#define DBX_USE_BINCL
+
+/* There is no limit to the length of stabs strings.  */
+
+#ifndef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 0
+#endif
+
+/* Like block addresses, stabs line numbers are relative to the
+   current function.  */
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+/* Generate a blank trailing N_SO to mark the end of the .o file, since
+   we can't depend upon the linker to mark .o file boundaries with
+   embedded stabs.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+#endif /* ! GCC_DBX_ELF_H */
diff --git a/gcc/config/dfp-bit.c b/gcc/config/dfp-bit.c
new file mode 100644
index 000000000..19f2fdae6
--- /dev/null
+++ b/gcc/config/dfp-bit.c
@@ -0,0 +1,680 @@
+/* This is a software decimal floating point library.
+   Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This implements IEEE 754 decimal floating point arithmetic, but
+   does not provide a mechanism for setting the rounding mode, or for
+   generating or handling exceptions.  Conversions between decimal
+   floating point types and other types depend on C library functions.
+
+   Contributed by Ben Elliston  <bje@au.ibm.com>.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+/* FIXME: compile with -std=gnu99 to get these from stdlib.h */
+extern float strtof (const char *, char **);
+extern long double strtold (const char *, char **);
+#include <string.h>
+#include <limits.h>
+
+#include "config/dfp-bit.h"
+
+/* Forward declarations.  */
+#if WIDTH == 32 || WIDTH_TO == 32
+void __host_to_ieee_32 (_Decimal32 in, decimal32 *out);
+void __ieee_to_host_32 (decimal32 in, _Decimal32 *out);
+#endif
+#if WIDTH == 64 || WIDTH_TO == 64
+void __host_to_ieee_64 (_Decimal64 in, decimal64 *out);
+void __ieee_to_host_64 (decimal64 in, _Decimal64 *out);
+#endif
+#if WIDTH == 128 || WIDTH_TO == 128
+void __host_to_ieee_128 (_Decimal128 in, decimal128 *out);
+void __ieee_to_host_128 (decimal128 in, _Decimal128 *out);
+#endif
+
+/* A pointer to a binary decFloat operation.  */
+typedef decFloat* (*dfp_binary_func)
+     (decFloat *, const decFloat *, const decFloat *, decContext *);
+
+/* Binary operations.  */
+
+/* Use a decFloat (decDouble or decQuad) function to perform a DFP
+   binary operation.  */
+static inline decFloat
+dfp_binary_op (dfp_binary_func op, decFloat arg_a, decFloat arg_b)
+{
+  decFloat result;
+  decContext context;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+
+  /* Perform the operation.  */
+  op (&result, &arg_a, &arg_b, &context);
+
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    {
+      /* decNumber exception flags we care about here.  */
+      int ieee_flags;
+      int dec_flags = DEC_IEEE_854_Division_by_zero | DEC_IEEE_854_Inexact
+		      | DEC_IEEE_854_Invalid_operation | DEC_IEEE_854_Overflow
+		      | DEC_IEEE_854_Underflow;
+      dec_flags &= context.status;
+      ieee_flags = DFP_IEEE_FLAGS (dec_flags);
+      if (ieee_flags != 0)
+        DFP_HANDLE_EXCEPTIONS (ieee_flags);
+    }
+
+  return result;
+}
+
+#if WIDTH == 32
+/* The decNumber package doesn't provide arithmetic for decSingle (32 bits);
+   convert to decDouble, use the operation for that, and convert back.  */
+static inline _Decimal32
+d32_binary_op (dfp_binary_func op, _Decimal32 arg_a, _Decimal32 arg_b)
+{
+  union { _Decimal32 c; decSingle f; } a32, b32, res32;
+  decDouble a, b, res;
+  decContext context;
+
+  /* Widen the operands and perform the operation.  */
+  a32.c = arg_a;
+  b32.c = arg_b;
+  decSingleToWider (&a32.f, &a);
+  decSingleToWider (&b32.f, &b);
+  res = dfp_binary_op (op, a, b);
+
+  /* Narrow the result, which might result in an underflow or overflow.  */
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+  decSingleFromWider (&res32.f, &res, &context);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    {
+      /* decNumber exception flags we care about here.  */
+      int ieee_flags;
+      int dec_flags = DEC_IEEE_854_Inexact | DEC_IEEE_854_Overflow
+		      | DEC_IEEE_854_Underflow;
+      dec_flags &= context.status;
+      ieee_flags = DFP_IEEE_FLAGS (dec_flags);
+      if (ieee_flags != 0)
+        DFP_HANDLE_EXCEPTIONS (ieee_flags);
+    }
+
+  return res32.c;
+}
+#else
+/* decFloat operations are supported for decDouble (64 bits) and
+   decQuad (128 bits).  The bit patterns for the types are the same.  */
+static inline DFP_C_TYPE
+dnn_binary_op (dfp_binary_func op, DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  union { DFP_C_TYPE c; decFloat f; } a, b, result;
+
+  a.c = arg_a;
+  b.c = arg_b;
+  result.f = dfp_binary_op (op, a.f, b.f);
+  return result.c;
+}
+#endif
+
+/* Comparison operations.  */
+
+/* Use a decFloat (decDouble or decQuad) function to perform a DFP
+   comparison.  */
+static inline CMPtype
+dfp_compare_op (dfp_binary_func op, decFloat arg_a, decFloat arg_b)
+{
+  decContext context;
+  decFloat res;
+  int result;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+
+  /* Perform the comparison.  */
+  op (&res, &arg_a, &arg_b, &context);
+
+  if (DEC_FLOAT_IS_SIGNED (&res))
+    result = -1;
+  else if (DEC_FLOAT_IS_ZERO (&res))
+    result = 0;
+  else if (DEC_FLOAT_IS_NAN (&res))
+    result = -2;
+  else
+    result = 1;
+
+  return (CMPtype) result;
+}
+
+#if WIDTH == 32
+/* The decNumber package doesn't provide comparisons for decSingle (32 bits);
+   convert to decDouble, use the operation for that, and convert back.  */
+static inline CMPtype
+d32_compare_op (dfp_binary_func op, _Decimal32 arg_a, _Decimal32 arg_b)
+{
+  union { _Decimal32 c; decSingle f; } a32, b32;
+  decDouble a, b;
+
+  a32.c = arg_a;
+  b32.c = arg_b;
+  decSingleToWider (&a32.f, &a);
+  decSingleToWider (&b32.f, &b);
+  return dfp_compare_op (op, a, b);  
+}
+#else
+/* decFloat comparisons are supported for decDouble (64 bits) and
+   decQuad (128 bits).  The bit patterns for the types are the same.  */
+static inline CMPtype
+dnn_compare_op (dfp_binary_func op, DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  union { DFP_C_TYPE c; decFloat f; } a, b;
+
+  a.c = arg_a;
+  b.c = arg_b;
+  return dfp_compare_op (op, a.f, b.f);  
+}
+#endif
+
+#if defined(L_conv_sd)
+void
+__host_to_ieee_32 (_Decimal32 in, decimal32 *out)
+{
+  memcpy (out, &in, 4);
+}
+
+void
+__ieee_to_host_32 (decimal32 in, _Decimal32 *out)
+{
+  memcpy (out, &in, 4);
+}
+#endif /* L_conv_sd */
+
+#if defined(L_conv_dd)
+void
+__host_to_ieee_64 (_Decimal64 in, decimal64 *out)
+{
+  memcpy (out, &in, 8);
+}
+
+void
+__ieee_to_host_64 (decimal64 in, _Decimal64 *out)
+{
+  memcpy (out, &in, 8);
+}
+#endif /* L_conv_dd */
+
+#if defined(L_conv_td)
+void
+__host_to_ieee_128 (_Decimal128 in, decimal128 *out)
+{
+  memcpy (out, &in, 16);
+}
+
+void
+__ieee_to_host_128 (decimal128 in, _Decimal128 *out)
+{
+  memcpy (out, &in, 16);
+}
+#endif /* L_conv_td */
+
+#if defined(L_addsub_sd) || defined(L_addsub_dd) || defined(L_addsub_td)
+DFP_C_TYPE
+DFP_ADD (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  return DFP_BINARY_OP (DEC_FLOAT_ADD, arg_a, arg_b);
+}
+
+DFP_C_TYPE
+DFP_SUB (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  return DFP_BINARY_OP (DEC_FLOAT_SUBTRACT, arg_a, arg_b);
+}
+#endif /* L_addsub */
+
+#if defined(L_mul_sd) || defined(L_mul_dd) || defined(L_mul_td)
+DFP_C_TYPE
+DFP_MULTIPLY (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  return DFP_BINARY_OP (DEC_FLOAT_MULTIPLY, arg_a, arg_b);
+}
+#endif /* L_mul */
+
+#if defined(L_div_sd) || defined(L_div_dd) || defined(L_div_td)
+DFP_C_TYPE
+DFP_DIVIDE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  return DFP_BINARY_OP (DEC_FLOAT_DIVIDE, arg_a, arg_b);
+}
+#endif /* L_div */
+
+#if defined (L_eq_sd) || defined (L_eq_dd) || defined (L_eq_td)
+CMPtype
+DFP_EQ (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  CMPtype stat;
+  stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b);
+  /* For EQ return zero for true, nonzero for false.  */
+  return stat != 0;
+}
+#endif /* L_eq */
+
+#if defined (L_ne_sd) || defined (L_ne_dd) || defined (L_ne_td)
+CMPtype
+DFP_NE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  int stat;
+  stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b);
+  /* For NE return zero for true, nonzero for false.  */
+  if (__builtin_expect (stat == -2, 0))  /* An operand is NaN.  */
+    return 1;
+  return stat != 0;
+}
+#endif /* L_ne */
+
+#if defined (L_lt_sd) || defined (L_lt_dd) || defined (L_lt_td)
+CMPtype
+DFP_LT (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  int stat;
+  stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b);
+  /* For LT return -1 (<0) for true, 1 for false.  */
+  return (stat == -1) ? -1 : 1;
+}
+#endif /* L_lt */
+
+#if defined (L_gt_sd) || defined (L_gt_dd) || defined (L_gt_td)
+CMPtype
+DFP_GT (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  int stat;
+  stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b);
+  /* For GT return 1 (>0) for true, -1 for false.  */
+  return (stat == 1) ? 1 : -1;
+}
+#endif
+
+#if defined (L_le_sd) || defined (L_le_dd) || defined (L_le_td)
+CMPtype
+DFP_LE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  int stat;
+  stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b);
+  /* For LE return 0 (<= 0) for true, 1 for false.  */
+  if (__builtin_expect (stat == -2, 0))  /* An operand is NaN.  */
+    return 1;
+  return stat == 1;
+}
+#endif /* L_le */
+
+#if defined (L_ge_sd) || defined (L_ge_dd) || defined (L_ge_td)
+CMPtype
+DFP_GE (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  int stat;
+  stat = DFP_COMPARE_OP (DEC_FLOAT_COMPARE, arg_a, arg_b);
+  /* For GE return 1 (>=0) for true, -1 for false.  */
+  if (__builtin_expect (stat == -2, 0))  /* An operand is NaN.  */
+    return -1;
+  return (stat != -1) ? 1 : -1;
+}
+#endif /* L_ge */
+
+#define BUFMAX 128
+
+/* Check for floating point exceptions that are relevant for conversions
+   between decimal float values and handle them.  */
+static inline void
+dfp_conversion_exceptions (const int status)
+{
+  /* decNumber exception flags we care about here.  */
+  int ieee_flags;
+  int dec_flags = DEC_IEEE_854_Inexact | DEC_IEEE_854_Invalid_operation
+		  | DEC_IEEE_854_Overflow;
+  dec_flags &= status;
+  ieee_flags = DFP_IEEE_FLAGS (dec_flags);
+  if (ieee_flags != 0)
+    DFP_HANDLE_EXCEPTIONS (ieee_flags);
+}
+
+#if defined (L_sd_to_dd)
+/* Use decNumber to convert directly from _Decimal32 to _Decimal64.  */
+_Decimal64
+DFP_TO_DFP (_Decimal32 f_from)
+{
+  union { _Decimal32 c; decSingle f; } from;
+  union { _Decimal64 c; decDouble f; } to;
+
+  from.c = f_from;
+  to.f = *decSingleToWider (&from.f, &to.f);
+  return to.c;
+}
+#endif
+
+#if defined (L_sd_to_td)
+/* Use decNumber to convert directly from _Decimal32 to _Decimal128.  */
+_Decimal128
+DFP_TO_DFP (_Decimal32 f_from)
+{
+  union { _Decimal32 c; decSingle f; } from;
+  union { _Decimal128 c; decQuad f; } to;
+  decDouble temp;
+
+  from.c = f_from;
+  temp = *decSingleToWider (&from.f, &temp);
+  to.f = *decDoubleToWider (&temp, &to.f);
+  return to.c;
+}
+#endif
+
+#if defined (L_dd_to_td)
+/* Use decNumber to convert directly from _Decimal64 to _Decimal128.  */
+_Decimal128
+DFP_TO_DFP (_Decimal64 f_from)
+{
+  union { _Decimal64 c; decDouble f; } from;
+  union { _Decimal128 c; decQuad f; } to;
+
+  from.c = f_from;
+  to.f = *decDoubleToWider (&from.f, &to.f);
+  return to.c;
+}
+#endif
+
+#if defined (L_dd_to_sd)
+/* Use decNumber to convert directly from _Decimal64 to _Decimal32.  */
+_Decimal32
+DFP_TO_DFP (_Decimal64 f_from)
+{
+  union { _Decimal32 c; decSingle f; } to;
+  union { _Decimal64 c; decDouble f; } from;
+  decContext context;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+  from.c = f_from;
+  to.f = *decSingleFromWider (&to.f, &from.f, &context);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+  return to.c;
+}
+#endif
+
+#if defined (L_td_to_sd)
+/* Use decNumber to convert directly from _Decimal128 to _Decimal32.  */
+_Decimal32
+DFP_TO_DFP (_Decimal128 f_from)
+{
+  union { _Decimal32 c; decSingle f; } to;
+  union { _Decimal128 c; decQuad f; } from;
+  decDouble temp;
+  decContext context;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+  from.c = f_from;
+  temp = *decDoubleFromWider (&temp, &from.f, &context);
+  to.f = *decSingleFromWider (&to.f, &temp, &context);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+  return to.c;
+}
+#endif
+
+#if defined (L_td_to_dd)
+/* Use decNumber to convert directly from _Decimal128 to _Decimal64.  */
+_Decimal64
+DFP_TO_DFP (_Decimal128 f_from)
+{
+  union { _Decimal64 c; decDouble f; } to;
+  union { _Decimal128 c; decQuad f; } from;
+  decContext context;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+  from.c = f_from;
+  to.f = *decDoubleFromWider (&to.f, &from.f, &context);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+  return to.c;
+}
+#endif
+
+#if defined (L_dd_to_si) || defined (L_td_to_si) \
+  || defined (L_dd_to_usi) || defined (L_td_to_usi)
+/* Use decNumber to convert directly from decimal float to integer types.  */
+INT_TYPE
+DFP_TO_INT (DFP_C_TYPE x)
+{
+  union { DFP_C_TYPE c; decFloat f; } u;
+  decContext context;
+  INT_TYPE i;
+
+  decContextDefault (&context, DEC_INIT_DECIMAL128);
+  context.round = DEC_ROUND_DOWN;
+  u.c = x;
+  i = DEC_FLOAT_TO_INT (&u.f, &context, context.round);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+  return i;
+}
+#endif
+
+#if defined (L_sd_to_si) || (L_sd_to_usi)
+/* Use decNumber to convert directly from decimal float to integer types.  */
+INT_TYPE
+DFP_TO_INT (_Decimal32 x)
+{
+  union { _Decimal32 c; decSingle f; } u32;
+  decDouble f64;
+  decContext context;
+  INT_TYPE i;
+
+  decContextDefault (&context, DEC_INIT_DECIMAL128);
+  context.round = DEC_ROUND_DOWN;
+  u32.c = x;
+  f64 = *decSingleToWider (&u32.f, &f64);
+  i = DEC_FLOAT_TO_INT (&f64, &context, context.round);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+  return i;
+}
+#endif
+
+#if defined (L_sd_to_di) || defined (L_dd_to_di) || defined (L_td_to_di) \
+  || defined (L_sd_to_udi) || defined (L_dd_to_udi) || defined (L_td_to_udi)
+/* decNumber doesn't provide support for conversions to 64-bit integer
+   types, so do it the hard way.  */
+INT_TYPE
+DFP_TO_INT (DFP_C_TYPE x)
+{
+  /* decNumber's decimal* types have the same format as C's _Decimal*
+     types, but they have different calling conventions.  */
+
+  /* TODO: Decimal float to integer conversions should raise FE_INVALID
+     if the result value does not fit into the result type.  */
+
+  IEEE_TYPE s;
+  char buf[BUFMAX];
+  char *pos;
+  decNumber qval, n1, n2;
+  decContext context;
+
+  /* Use a large context to avoid losing precision.  */
+  decContextDefault (&context, DEC_INIT_DECIMAL128);
+  /* Need non-default rounding mode here.  */
+  context.round = DEC_ROUND_DOWN;
+
+  HOST_TO_IEEE (x, &s);
+  TO_INTERNAL (&s, &n1);
+  /* Rescale if the exponent is less than zero.  */
+  decNumberToIntegralValue (&n2, &n1, &context);
+  /* Get a value to use for the quantize call.  */
+  decNumberFromString (&qval, "1.", &context);
+  /* Force the exponent to zero.  */
+  decNumberQuantize (&n1, &n2, &qval, &context);
+  /* Get a string, which at this point will not include an exponent.  */
+  decNumberToString (&n1, buf);
+  /* Ignore the fractional part.  */
+  pos = strchr (buf, '.');
+  if (pos)
+    *pos = 0;
+  /* Use a C library function to convert to the integral type.  */
+  return STR_TO_INT (buf, NULL, 10);
+}
+#endif
+
+#if defined (L_si_to_dd) || defined (L_si_to_td) \
+  || defined (L_usi_to_dd) || defined (L_usi_to_td)
+/* Use decNumber to convert directly from integer to decimal float types.  */
+DFP_C_TYPE
+INT_TO_DFP (INT_TYPE i)
+{
+  union { DFP_C_TYPE c; decFloat f; } u;
+
+  u.f = *DEC_FLOAT_FROM_INT (&u.f, i);
+  return u.c;
+}
+#endif
+
+#if defined (L_si_to_sd) || defined (L_usi_to_sd)
+_Decimal32
+/* Use decNumber to convert directly from integer to decimal float types.  */
+INT_TO_DFP (INT_TYPE i)
+{
+  union { _Decimal32 c; decSingle f; } u32;
+  decDouble f64;
+  decContext context;
+
+  decContextDefault (&context, DEC_INIT_DECIMAL128);
+  f64 = *DEC_FLOAT_FROM_INT (&f64, i);
+  u32.f = *decSingleFromWider (&u32.f, &f64, &context);
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+  return u32.c;
+}
+#endif
+
+#if defined (L_di_to_sd) || defined (L_di_to_dd) || defined (L_di_to_td) \
+  || defined (L_udi_to_sd) || defined (L_udi_to_dd) || defined (L_udi_to_td)
+/* decNumber doesn't provide support for conversions from 64-bit integer
+   types, so do it the hard way.  */
+DFP_C_TYPE
+INT_TO_DFP (INT_TYPE i)
+{
+  DFP_C_TYPE f;
+  IEEE_TYPE s;
+  char buf[BUFMAX];
+  decContext context;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+
+  /* Use a C library function to get a floating point string.  */
+  sprintf (buf, INT_FMT ".", CAST_FOR_FMT(i));
+  /* Convert from the floating point string to a decimal* type.  */
+  FROM_STRING (&s, buf, &context);
+  IEEE_TO_HOST (s, &f);
+
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    dfp_conversion_exceptions (context.status);
+
+  return f;
+}
+#endif
+
+#if defined (L_sd_to_sf) || defined (L_dd_to_sf) || defined (L_td_to_sf) \
+ || defined (L_sd_to_df) || defined (L_dd_to_df) || defined (L_td_to_df) \
+ || ((defined (L_sd_to_xf) || defined (L_dd_to_xf) || defined (L_td_to_xf)) \
+     && LONG_DOUBLE_HAS_XF_MODE) \
+ || ((defined (L_sd_to_tf) || defined (L_dd_to_tf) || defined (L_td_to_tf)) \
+     && LONG_DOUBLE_HAS_TF_MODE)
+BFP_TYPE
+DFP_TO_BFP (DFP_C_TYPE f)
+{
+  IEEE_TYPE s;
+  char buf[BUFMAX];
+
+  HOST_TO_IEEE (f, &s);
+  /* Write the value to a string.  */
+  TO_STRING (&s, buf);
+  /* Read it as the binary floating point type and return that.  */
+  return STR_TO_BFP (buf, NULL);
+}
+#endif
+                                                                                
+#if defined (L_sf_to_sd) || defined (L_sf_to_dd) || defined (L_sf_to_td) \
+ || defined (L_df_to_sd) || defined (L_df_to_dd) || defined (L_df_to_td) \
+ || ((defined (L_xf_to_sd) || defined (L_xf_to_dd) || defined (L_xf_to_td)) \
+     && LONG_DOUBLE_HAS_XF_MODE) \
+ || ((defined (L_tf_to_sd) || defined (L_tf_to_dd) || defined (L_tf_to_td)) \
+     && LONG_DOUBLE_HAS_TF_MODE)
+DFP_C_TYPE
+BFP_TO_DFP (BFP_TYPE x)
+{
+  DFP_C_TYPE f;
+  IEEE_TYPE s;
+  char buf[BUFMAX];
+  decContext context;
+
+  decContextDefault (&context, CONTEXT_INIT);
+  DFP_INIT_ROUNDMODE (context.round);
+
+  /* Use a C library function to write the floating point value to a string.  */
+  sprintf (buf, BFP_FMT, (BFP_VIA_TYPE) x);
+
+  /* Convert from the floating point string to a decimal* type.  */
+  FROM_STRING (&s, buf, &context);
+  IEEE_TO_HOST (s, &f);
+
+  if (DFP_EXCEPTIONS_ENABLED && context.status != 0)
+    {
+      /* decNumber exception flags we care about here.  */
+      int ieee_flags;
+      int dec_flags = DEC_IEEE_854_Inexact | DEC_IEEE_854_Invalid_operation
+		      | DEC_IEEE_854_Overflow | DEC_IEEE_854_Underflow;
+      dec_flags &= context.status;
+      ieee_flags = DFP_IEEE_FLAGS (dec_flags);
+      if (ieee_flags != 0)
+        DFP_HANDLE_EXCEPTIONS (ieee_flags);
+    }
+
+  return f;
+}
+#endif
+
+#if defined (L_unord_sd) || defined (L_unord_dd) || defined (L_unord_td)
+CMPtype
+DFP_UNORD (DFP_C_TYPE arg_a, DFP_C_TYPE arg_b)
+{
+  decNumber arg1, arg2;
+  IEEE_TYPE a, b;
+
+  HOST_TO_IEEE (arg_a, &a);
+  HOST_TO_IEEE (arg_b, &b);
+  TO_INTERNAL (&a, &arg1);
+  TO_INTERNAL (&b, &arg2);
+  return (decNumberIsNaN (&arg1) || decNumberIsNaN (&arg2));
+}
+#endif /* L_unord_sd || L_unord_dd || L_unord_td */
diff --git a/gcc/config/dfp-bit.h b/gcc/config/dfp-bit.h
new file mode 100644
index 000000000..45b79086b
--- /dev/null
+++ b/gcc/config/dfp-bit.h
@@ -0,0 +1,626 @@
+/* Header file for dfp-bit.c.
+   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _DFPBIT_H
+#define _DFPBIT_H
+
+#include <float.h>
+#include <fenv.h>
+#include <decRound.h>
+#include <decExcept.h>
+#include "tconfig.h"
+#include "coretypes.h"
+#include "tm.h"
+
+#ifndef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE LONG_DOUBLE_TYPE_SIZE
+#endif
+
+/* We need to know the size of long double that the C library supports.
+   Don't use LIBGCC2_HAS_XF_MODE or LIBGCC2_HAS_TF_MODE here because
+   some targets set both of those.  */
+
+#define LONG_DOUBLE_HAS_XF_MODE \
+  (BITS_PER_UNIT == 8 && LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 80)
+
+#define LONG_DOUBLE_HAS_TF_MODE \
+  (BITS_PER_UNIT == 8 && LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 128)
+
+/* Depending on WIDTH, define a number of macros:
+
+   DFP_C_TYPE: type of the arguments to the libgcc functions;
+	(eg _Decimal32)
+
+   IEEE_TYPE: the corresponding (encoded) IEEE754 type;
+	(eg decimal32)
+   
+   TO_INTERNAL: the name of the decNumber function to convert an
+   encoded value into the decNumber internal representation;
+
+   TO_ENCODED: the name of the decNumber function to convert an
+   internally represented decNumber into the encoded
+   representation.
+
+   FROM_STRING: the name of the decNumber function to read an
+   encoded value from a string.
+
+   TO_STRING: the name of the decNumber function to write an
+   encoded value to a string.  */
+
+#if WIDTH == 32
+#define DFP_C_TYPE	_Decimal32
+#define IEEE_TYPE	decimal32
+#define HOST_TO_IEEE	__host_to_ieee_32
+#define IEEE_TO_HOST	__ieee_to_host_32
+#define TO_INTERNAL	__decimal32ToNumber
+#define TO_ENCODED	__decimal32FromNumber
+#define FROM_STRING	__decimal32FromString
+#define TO_STRING	__decimal32ToString
+#elif WIDTH == 64
+#define DFP_C_TYPE	_Decimal64
+#define IEEE_TYPE	decimal64
+#define HOST_TO_IEEE	__host_to_ieee_64
+#define IEEE_TO_HOST	__ieee_to_host_64
+#define TO_INTERNAL	__decimal64ToNumber
+#define TO_ENCODED	__decimal64FromNumber
+#define FROM_STRING	__decimal64FromString
+#define TO_STRING	__decimal64ToString
+#elif WIDTH == 128
+#define DFP_C_TYPE	_Decimal128
+#define IEEE_TYPE	decimal128
+#define HOST_TO_IEEE	__host_to_ieee_128
+#define IEEE_TO_HOST	__ieee_to_host_128
+#define TO_INTERNAL	__decimal128ToNumber
+#define TO_ENCODED	__decimal128FromNumber
+#define FROM_STRING	__decimal128FromString
+#define TO_STRING	__decimal128ToString
+#else
+#error invalid decimal float word width
+#endif
+
+/* We define __DEC_EVAL_METHOD__ to 2, saying that we evaluate all
+   operations and constants to the range and precision of the _Decimal128
+   type.  Make it so.  */
+#if WIDTH == 32
+#define CONTEXT_INIT DEC_INIT_DECIMAL32
+#elif WIDTH == 64
+#define CONTEXT_INIT DEC_INIT_DECIMAL64
+#elif WIDTH == 128
+#define CONTEXT_INIT DEC_INIT_DECIMAL128
+#endif
+
+#ifndef DFP_INIT_ROUNDMODE
+#define DFP_INIT_ROUNDMODE(A) A = DEC_ROUND_HALF_EVEN
+#endif
+
+#ifdef DFP_EXCEPTIONS_ENABLED
+/* Return IEEE exception flags based on decNumber status flags.  */
+#define DFP_IEEE_FLAGS(DEC_FLAGS) __extension__			\
+({int _fe_flags = 0;						\
+  if ((dec_flags & DEC_IEEE_854_Division_by_zero) != 0)		\
+    _fe_flags |= FE_DIVBYZERO;					\
+  if ((dec_flags & DEC_IEEE_854_Inexact) != 0)			\
+    _fe_flags |= FE_INEXACT;					\
+  if ((dec_flags & DEC_IEEE_854_Invalid_operation) != 0)	\
+    _fe_flags |= FE_INVALID;					\
+  if ((dec_flags & DEC_IEEE_854_Overflow) != 0)			\
+    _fe_flags |= FE_OVERFLOW;					\
+  if ((dec_flags & DEC_IEEE_854_Underflow) != 0)		\
+    _fe_flags |= FE_UNDERFLOW;					\
+  _fe_flags; })
+#else
+#define DFP_EXCEPTIONS_ENABLED 0
+#define DFP_IEEE_FLAGS(A) 0
+#define DFP_HANDLE_EXCEPTIONS(A) do {} while (0)
+#endif
+
+/* Conversions between different decimal float types use WIDTH_TO to
+   determine additional macros to define.  */
+
+#if defined (L_dd_to_sd) || defined (L_td_to_sd)
+#define WIDTH_TO 32
+#elif defined (L_sd_to_dd) || defined (L_td_to_dd)
+#define WIDTH_TO 64
+#elif defined (L_sd_to_td) || defined (L_dd_to_td)
+#define WIDTH_TO 128
+#endif
+
+/* If WIDTH_TO is defined, define additional macros:
+
+   DFP_C_TYPE_TO: type of the result of dfp to dfp conversion.
+
+   IEEE_TYPE_TO: the corresponding (encoded) IEEE754 type.
+
+   TO_ENCODED_TO: the name of the decNumber function to convert an
+   internally represented decNumber into the encoded representation
+   for the destination.  */
+
+#if WIDTH_TO == 32
+#define DFP_C_TYPE_TO	_Decimal32
+#define IEEE_TYPE_TO	decimal32
+#define TO_ENCODED_TO	__decimal32FromNumber
+#define IEEE_TO_HOST_TO __ieee_to_host_32
+#elif WIDTH_TO == 64
+#define DFP_C_TYPE_TO	_Decimal64
+#define IEEE_TYPE_TO	decimal64
+#define TO_ENCODED_TO	__decimal64FromNumber
+#define IEEE_TO_HOST_TO __ieee_to_host_64
+#elif WIDTH_TO == 128
+#define DFP_C_TYPE_TO	_Decimal128
+#define IEEE_TYPE_TO	decimal128
+#define TO_ENCODED_TO	__decimal128FromNumber
+#define IEEE_TO_HOST_TO __ieee_to_host_128
+#endif
+
+/* Conversions between decimal float types and integral types use INT_KIND
+   to determine the data type and C functions to use.  */
+
+#if defined (L_sd_to_si) || defined (L_dd_to_si) || defined (L_td_to_si)  \
+   || defined (L_si_to_sd) || defined (L_si_to_dd) || defined (L_si_to_td)
+#define INT_KIND 1
+#elif defined (L_sd_to_di) || defined (L_dd_to_di) || defined (L_td_to_di) \
+   || defined (L_di_to_sd) || defined (L_di_to_dd) || defined (L_di_to_td)
+#define INT_KIND 2
+#elif defined (L_sd_to_usi) || defined (L_dd_to_usi) || defined (L_td_to_usi) \
+   || defined (L_usi_to_sd) || defined (L_usi_to_dd) || defined (L_usi_to_td)
+#define INT_KIND 3
+#elif defined (L_sd_to_udi) || defined (L_dd_to_udi) || defined (L_td_to_udi) \
+   || defined (L_udi_to_sd) || defined (L_udi_to_dd) || defined (L_udi_to_td)
+#define INT_KIND 4
+#endif
+
+/*  If INT_KIND is defined, define additional macros:
+
+    INT_TYPE: The integer data type.
+
+    INT_FMT: The format string for writing the integer to a string.
+
+    CAST_FOR_FMT: Cast variable of INT_KIND to C type for sprintf.
+    This works for ILP32 and LP64, won't for other type size systems.
+
+    STR_TO_INT: The function to read the integer from a string.  */
+
+#if INT_KIND == 1
+#define INT_TYPE SItype
+#define INT_FMT "%d"
+#define CAST_FOR_FMT(A) (int)A
+#define STR_TO_INT strtol
+#elif INT_KIND == 2
+#define INT_TYPE DItype
+#define INT_FMT "%lld"
+#define CAST_FOR_FMT(A) (long long)A
+#define STR_TO_INT strtoll
+#elif INT_KIND == 3
+#define INT_TYPE USItype
+#define INT_FMT "%u"
+#define CAST_FOR_FMT(A) (unsigned int)A
+#define STR_TO_INT strtoul
+#elif INT_KIND == 4
+#define INT_TYPE UDItype
+#define INT_FMT "%llu"
+#define CAST_FOR_FMT(A) (unsigned long long)A
+#define STR_TO_INT strtoull
+#endif
+
+/* Conversions between decimal float types and binary float types use
+   BFP_KIND to determine the data type and C functions to use.  */
+
+#if defined (L_sd_to_sf) || defined (L_dd_to_sf) || defined (L_td_to_sf) \
+ || defined (L_sf_to_sd) || defined (L_sf_to_dd) || defined (L_sf_to_td)
+#define BFP_KIND 1
+#elif defined (L_sd_to_df) || defined (L_dd_to_df ) || defined (L_td_to_df) \
+ ||   defined (L_df_to_sd) || defined (L_df_to_dd) || defined (L_df_to_td)
+#define BFP_KIND 2
+#elif defined (L_sd_to_xf) || defined (L_dd_to_xf ) || defined (L_td_to_xf) \
+ ||   defined (L_xf_to_sd) || defined (L_xf_to_dd) || defined (L_xf_to_td)
+#define BFP_KIND 3
+#elif defined (L_sd_to_tf) || defined (L_dd_to_tf) || defined (L_td_to_tf) \
+ ||   defined (L_tf_to_sd) || defined (L_tf_to_dd) || defined (L_tf_to_td)
+#define BFP_KIND 4
+#endif
+
+/*  If BFP_KIND is defined, define additional macros:
+
+    BFP_TYPE: The binary floating point data type.
+
+    BFP_FMT: The format string for writing the value to a string.
+    The number of decimal digits printed is
+       ceil (nbits / log2 (10.) + 1)
+    as described in David Matula's CACM 19(3) 716-723 June 1968 paper.
+
+    BFP_VIA_TYPE: Type to which to cast a variable of BPF_TYPE for a
+    call to sprintf.
+    
+    STR_TO_BFP: The function to read the value from a string.  */
+
+#if BFP_KIND == 1
+#define BFP_TYPE SFtype
+#define BFP_FMT "%.9e"
+#define BFP_VIA_TYPE double
+#define STR_TO_BFP strtof
+
+#elif BFP_KIND == 2
+#define BFP_TYPE DFtype
+#define BFP_FMT "%.17e"
+#define BFP_VIA_TYPE double
+#define STR_TO_BFP strtod
+
+#elif BFP_KIND == 3
+#if LONG_DOUBLE_HAS_XF_MODE
+#define BFP_TYPE XFtype
+#define BFP_FMT "%.21Le"
+#define BFP_VIA_TYPE long double
+#define STR_TO_BFP strtold
+#endif /* LONG_DOUBLE_HAS_XF_MODE */
+
+#elif BFP_KIND == 4
+#if LONG_DOUBLE_HAS_TF_MODE
+#define BFP_TYPE TFtype
+#if LDBL_MANT_DIG == 106
+#define BFP_FMT "%.33Le"
+#elif LDBL_MANT_DIG == 113
+#define BFP_FMT "%.36Le"
+#else
+#error "unknown long double size, cannot define BFP_FMT"
+#endif /* LDBL_MANT_DIG */
+#define STR_TO_BFP strtold
+#define BFP_VIA_TYPE long double
+#endif /* LONG_DOUBLE_HAS_TF_MODE */
+
+#endif /* BFP_KIND */
+
+#if WIDTH == 128 || WIDTH_TO == 128
+#include "decimal128.h"
+#include "decQuad.h"
+#endif
+#if WIDTH == 64 || WIDTH_TO == 64
+#include "decimal64.h"
+#include "decDouble.h"
+#endif
+#if WIDTH == 32 || WIDTH_TO == 32
+#include "decimal32.h"
+#include "decSingle.h"
+#endif
+#include "decNumber.h"
+
+/* Names of arithmetic functions.  */
+
+#if ENABLE_DECIMAL_BID_FORMAT
+#define DPD_BID_NAME(DPD,BID) BID
+#else
+#define DPD_BID_NAME(DPD,BID) DPD
+#endif
+
+#if WIDTH == 32
+#define DFP_ADD		DPD_BID_NAME(__dpd_addsd3,__bid_addsd3)
+#define DFP_SUB		DPD_BID_NAME(__dpd_subsd3,__bid_subsd3)
+#define DFP_MULTIPLY	DPD_BID_NAME(__dpd_mulsd3,__bid_mulsd3)
+#define DFP_DIVIDE	DPD_BID_NAME(__dpd_divsd3,__bid_divsd3)
+#define DFP_EQ		DPD_BID_NAME(__dpd_eqsd2,__bid_eqsd2)
+#define DFP_NE		DPD_BID_NAME(__dpd_nesd2,__bid_nesd2)
+#define DFP_LT		DPD_BID_NAME(__dpd_ltsd2,__bid_ltsd2)
+#define DFP_GT		DPD_BID_NAME(__dpd_gtsd2,__bid_gtsd2)
+#define DFP_LE		DPD_BID_NAME(__dpd_lesd2,__bid_lesd2)
+#define DFP_GE		DPD_BID_NAME(__dpd_gesd2,__bid_gesd2)
+#define DFP_UNORD	DPD_BID_NAME(__dpd_unordsd2,__bid_unordsd2)
+#elif WIDTH == 64
+#define DFP_ADD		DPD_BID_NAME(__dpd_adddd3,__bid_adddd3)
+#define DFP_SUB		DPD_BID_NAME(__dpd_subdd3,__bid_subdd3)
+#define DFP_MULTIPLY	DPD_BID_NAME(__dpd_muldd3,__bid_muldd3)
+#define DFP_DIVIDE	DPD_BID_NAME(__dpd_divdd3,__bid_divdd3)
+#define DFP_EQ		DPD_BID_NAME(__dpd_eqdd2,__bid_eqdd2)
+#define DFP_NE		DPD_BID_NAME(__dpd_nedd2,__bid_nedd2)
+#define DFP_LT		DPD_BID_NAME(__dpd_ltdd2,__bid_ltdd2)
+#define DFP_GT		DPD_BID_NAME(__dpd_gtdd2,__bid_gtdd2)
+#define DFP_LE		DPD_BID_NAME(__dpd_ledd2,__bid_ledd2)
+#define DFP_GE		DPD_BID_NAME(__dpd_gedd2,__bid_gedd2)
+#define DFP_UNORD	DPD_BID_NAME(__dpd_unorddd2,__bid_unorddd2)
+#elif WIDTH == 128
+#define DFP_ADD		DPD_BID_NAME(__dpd_addtd3,__bid_addtd3)
+#define DFP_SUB		DPD_BID_NAME(__dpd_subtd3,__bid_subtd3)
+#define DFP_MULTIPLY	DPD_BID_NAME(__dpd_multd3,__bid_multd3)
+#define DFP_DIVIDE	DPD_BID_NAME(__dpd_divtd3,__bid_divtd3)
+#define DFP_EQ		DPD_BID_NAME(__dpd_eqtd2,__bid_eqtd2)
+#define DFP_NE		DPD_BID_NAME(__dpd_netd2,__bid_netd2)
+#define DFP_LT		DPD_BID_NAME(__dpd_lttd2,__bid_lttd2)
+#define DFP_GT		DPD_BID_NAME(__dpd_gttd2,__bid_gttd2)
+#define DFP_LE		DPD_BID_NAME(__dpd_letd2,__bid_letd2)
+#define DFP_GE		DPD_BID_NAME(__dpd_getd2,__bid_getd2)
+#define DFP_UNORD	DPD_BID_NAME(__dpd_unordtd2,__bid_unordtd2)
+#endif
+
+/* Names of decNumber functions for DPD arithmetic.  */
+
+#if WIDTH == 32
+#define decFloat		decDouble
+#define DFP_BINARY_OP		d32_binary_op
+#define DFP_COMPARE_OP		d32_compare_op
+#define DEC_FLOAT_ADD		decDoubleAdd
+#define DEC_FLOAT_SUBTRACT	decDoubleSubtract
+#define DEC_FLOAT_MULTIPLY	decDoubleMultiply
+#define DEC_FLOAT_DIVIDE	decDoubleDivide
+#define DEC_FLOAT_COMPARE	decDoubleCompare
+#define DEC_FLOAT_IS_ZERO	decDoubleIsZero
+#define DEC_FLOAT_IS_NAN	decDoubleIsNaN
+#define DEC_FLOAT_IS_SIGNED	decDoubleIsSigned
+#elif WIDTH == 64
+#define DFP_BINARY_OP		dnn_binary_op
+#define DFP_COMPARE_OP		dnn_compare_op
+#define decFloat		decDouble
+#define DEC_FLOAT_ADD		decDoubleAdd
+#define DEC_FLOAT_SUBTRACT	decDoubleSubtract
+#define DEC_FLOAT_MULTIPLY	decDoubleMultiply
+#define DEC_FLOAT_DIVIDE	decDoubleDivide
+#define DEC_FLOAT_COMPARE	decDoubleCompare
+#define DEC_FLOAT_IS_ZERO	decDoubleIsZero
+#define DEC_FLOAT_IS_NAN	decDoubleIsNaN
+#define DEC_FLOAT_IS_SIGNED	decDoubleIsSigned
+#elif WIDTH == 128
+#define DFP_BINARY_OP		dnn_binary_op
+#define DFP_COMPARE_OP		dnn_compare_op
+#define decFloat		decQuad
+#define DEC_FLOAT_ADD		decQuadAdd
+#define DEC_FLOAT_SUBTRACT	decQuadSubtract
+#define DEC_FLOAT_MULTIPLY	decQuadMultiply
+#define DEC_FLOAT_DIVIDE	decQuadDivide
+#define DEC_FLOAT_COMPARE	decQuadCompare
+#define DEC_FLOAT_IS_ZERO	decQuadIsZero
+#define DEC_FLOAT_IS_NAN	decQuadIsNaN
+#define DEC_FLOAT_IS_SIGNED	decQuadIsSigned
+#endif
+
+/* Names of functions to convert between different decimal float types.  */
+
+#if WIDTH == 32
+#if WIDTH_TO == 64
+#define DFP_TO_DFP	DPD_BID_NAME(__dpd_extendsddd2,__bid_extendsddd2)
+#elif WIDTH_TO == 128
+#define DFP_TO_DFP	DPD_BID_NAME(__dpd_extendsdtd2,__bid_extendsdtd2)
+#endif
+#elif WIDTH == 64	
+#if WIDTH_TO == 32
+#define DFP_TO_DFP	DPD_BID_NAME(__dpd_truncddsd2,__bid_truncddsd2)
+#elif WIDTH_TO == 128
+#define DFP_TO_DFP	DPD_BID_NAME(__dpd_extendddtd2,__bid_extendddtd2)
+#endif
+#elif WIDTH == 128
+#if WIDTH_TO == 32
+#define DFP_TO_DFP	DPD_BID_NAME(__dpd_trunctdsd2,__bid_trunctdsd2)
+#elif WIDTH_TO == 64
+#define DFP_TO_DFP	DPD_BID_NAME(__dpd_trunctddd2,__bid_trunctddd2)
+#endif
+#endif
+
+/* Names of functions to convert between decimal float and integers.  */
+
+#if WIDTH == 32
+#if INT_KIND == 1
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatsisd,__bid_floatsisd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixsdsi,__bid_fixsdsi)
+#define DEC_FLOAT_FROM_INT decDoubleFromInt32
+#define DEC_FLOAT_TO_INT   decDoubleToInt32
+#elif INT_KIND == 2
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatdisd,__bid_floatdisd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixsddi,__bid_fixsddi)
+#elif INT_KIND == 3
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatunssisd,__bid_floatunssisd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixunssdsi,__bid_fixunssdsi)
+#define DEC_FLOAT_FROM_INT decDoubleFromUInt32
+#define DEC_FLOAT_TO_INT   decDoubleToUInt32
+#elif INT_KIND == 4
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatunsdisd,__bid_floatunsdisd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixunssddi,__bid_fixunssddi)
+#endif
+#elif WIDTH == 64
+#define decFloat	decDouble
+#if INT_KIND == 1
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatsidd,__bid_floatsidd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixddsi,__bid_fixddsi)
+#define DEC_FLOAT_FROM_INT decDoubleFromInt32
+#define DEC_FLOAT_TO_INT   decDoubleToInt32
+#elif INT_KIND == 2
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatdidd,__bid_floatdidd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixdddi,__bid_fixdddi)
+#elif INT_KIND == 3
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatunssidd,__bid_floatunssidd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixunsddsi,__bid_fixunsddsi)
+#define DEC_FLOAT_FROM_INT decDoubleFromUInt32
+#define DEC_FLOAT_TO_INT   decDoubleToUInt32
+#elif INT_KIND == 4
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatunsdidd,__bid_floatunsdidd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixunsdddi,__bid_fixunsdddi)
+#endif
+#elif WIDTH == 128
+#define decFloat	decQuad
+#if INT_KIND == 1
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatsitd,__bid_floatsitd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixtdsi,__bid_fixtdsi)
+#define DEC_FLOAT_FROM_INT decQuadFromInt32
+#define DEC_FLOAT_TO_INT   decQuadToInt32
+#elif INT_KIND == 2
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatditd,__bid_floatditd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixtddi,__bid_fixtddi)
+#elif INT_KIND == 3
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatunssitd,__bid_floatunssitd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixunstdsi,__bid_fixunstdsi)
+#define DEC_FLOAT_FROM_INT decQuadFromUInt32
+#define DEC_FLOAT_TO_INT   decQuadToUInt32
+#elif INT_KIND == 4
+#define INT_TO_DFP	DPD_BID_NAME(__dpd_floatunsditd,__bid_floatunsditd)
+#define DFP_TO_INT	DPD_BID_NAME(__dpd_fixunstddi,__bid_fixunstddi)
+#endif
+#endif
+
+/* Names of functions to convert between decimal float and binary float.  */
+
+#if WIDTH == 32
+#if BFP_KIND == 1
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extendsfsd,__bid_extendsfsd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_truncsdsf,__bid_truncsdsf)
+#elif BFP_KIND == 2
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_truncdfsd,__bid_truncdfsd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_extendsddf,__bid_extendsddf)
+#elif BFP_KIND == 3
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_truncxfsd,__bid_truncxfsd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_extendsdxf,__bid_extendsdxf)
+#elif BFP_KIND == 4
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_trunctfsd,__bid_trunctfsd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_extendsdtf,__bid_extendsdtf)
+#endif /* BFP_KIND */
+
+#elif WIDTH == 64
+#if BFP_KIND == 1
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extendsfdd,__bid_extendsfdd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_truncddsf,__bid_truncddsf)
+#elif BFP_KIND == 2
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extenddfdd,__bid_extenddfdd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_truncdddf,__bid_truncdddf)
+#elif BFP_KIND == 3
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_truncxfdd,__bid_truncxfdd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_extendddxf,__bid_extendddxf)
+#elif BFP_KIND == 4
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_trunctfdd,__bid_trunctfdd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_extendddtf,__bid_extendddtf)
+#endif /* BFP_KIND */
+
+#elif WIDTH == 128
+#if BFP_KIND == 1
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extendsftd,__bid_extendsftd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_trunctdsf,__bid_trunctdsf)
+#elif BFP_KIND == 2
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extenddftd,__bid_extenddftd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_trunctddf,__bid_trunctddf)
+#elif BFP_KIND == 3
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extendxftd,__bid_extendxftd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_trunctdxf,__bid_trunctdxf)
+#elif BFP_KIND == 4
+#define BFP_TO_DFP	DPD_BID_NAME(__dpd_extendtftd,__bid_extendtftd)
+#define DFP_TO_BFP	DPD_BID_NAME(__dpd_trunctdtf,__bid_trunctdtf)
+#endif /* BFP_KIND */
+
+#endif /* WIDTH */
+
+/* Some handy typedefs.  */
+
+typedef float SFtype __attribute__ ((mode (SF)));
+typedef float DFtype __attribute__ ((mode (DF)));
+#if LONG_DOUBLE_HAS_XF_MODE
+typedef float XFtype __attribute__ ((mode (XF)));
+#endif /* LONG_DOUBLE_HAS_XF_MODE */
+#if LONG_DOUBLE_HAS_TF_MODE
+typedef float TFtype __attribute__ ((mode (TF)));
+#endif /* LONG_DOUBLE_HAS_TF_MODE */
+
+typedef int SItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+
+/* The type of the result of a decimal float comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+
+typedef int CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+
+/* Prototypes.  */
+
+#if defined (L_mul_sd) || defined (L_mul_dd) || defined (L_mul_td)
+extern DFP_C_TYPE DFP_MULTIPLY (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_div_sd) || defined (L_div_dd) || defined (L_div_td)
+extern DFP_C_TYPE DFP_DIVIDE (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_addsub_sd) || defined (L_addsub_dd) || defined (L_addsub_td)
+extern DFP_C_TYPE DFP_ADD (DFP_C_TYPE, DFP_C_TYPE);
+extern DFP_C_TYPE DFP_SUB (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_eq_sd) || defined (L_eq_dd) || defined (L_eq_td)
+extern CMPtype DFP_EQ (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_ne_sd) || defined (L_ne_dd) || defined (L_ne_td)
+extern CMPtype DFP_NE (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_lt_sd) || defined (L_lt_dd) || defined (L_lt_td)
+extern CMPtype DFP_LT (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_gt_sd) || defined (L_gt_dd) || defined (L_gt_td)
+extern CMPtype DFP_GT (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_le_sd) || defined (L_le_dd) || defined (L_le_td)
+extern CMPtype DFP_LE (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_ge_sd) || defined (L_ge_dd) || defined (L_ge_td)
+extern CMPtype DFP_GE (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_unord_sd) || defined (L_unord_dd) || defined (L_unord_td)
+extern CMPtype DFP_UNORD (DFP_C_TYPE, DFP_C_TYPE);
+#endif
+
+#if defined (L_sd_to_dd) || defined (L_sd_to_td) || defined (L_dd_to_sd) \
+ || defined (L_dd_to_td) || defined (L_td_to_sd) || defined (L_td_to_dd)
+extern DFP_C_TYPE_TO DFP_TO_DFP (DFP_C_TYPE);
+#endif
+
+#if defined (L_sd_to_si) || defined (L_dd_to_si) || defined (L_td_to_si) \
+ || defined (L_sd_to_di) || defined (L_dd_to_di) || defined (L_td_to_di) \
+ || defined (L_sd_to_usi) || defined (L_dd_to_usi) || defined (L_td_to_usi) \
+ || defined (L_sd_to_udi) || defined (L_dd_to_udi) || defined (L_td_to_udi)
+extern INT_TYPE DFP_TO_INT (DFP_C_TYPE);
+#endif
+
+#if defined (L_si_to_sd) || defined (L_si_to_dd) || defined (L_si_to_td) \
+ || defined (L_di_to_sd) || defined (L_di_to_dd) || defined (L_di_to_td) \
+ || defined (L_usi_to_sd) || defined (L_usi_to_dd) || defined (L_usi_to_td) \
+ || defined (L_udi_to_sd) || defined (L_udi_to_dd) || defined (L_udi_to_td)
+extern DFP_C_TYPE INT_TO_DFP (INT_TYPE);
+#endif
+
+#if defined (L_sd_to_sf) || defined (L_dd_to_sf) || defined (L_td_to_sf) \
+ || defined (L_sd_to_df) || defined (L_dd_to_df) || defined (L_td_to_df) \
+ || ((defined (L_sd_to_xf) || defined (L_dd_to_xf) || defined (L_td_to_xf)) \
+     && LONG_DOUBLE_HAS_XF_MODE) \
+ || ((defined (L_sd_to_tf) || defined (L_dd_to_tf) || defined (L_td_to_tf)) \
+     && LONG_DOUBLE_HAS_TF_MODE)
+extern BFP_TYPE DFP_TO_BFP (DFP_C_TYPE);
+#endif
+
+#if defined (L_sf_to_sd) || defined (L_sf_to_dd) || defined (L_sf_to_td) \
+ || defined (L_df_to_sd) || defined (L_df_to_dd) || defined (L_df_to_td) \
+ || ((defined (L_xf_to_sd) || defined (L_xf_to_dd) || defined (L_xf_to_td)) \
+     && LONG_DOUBLE_HAS_XF_MODE) \
+ || ((defined (L_tf_to_sd) || defined (L_tf_to_dd) || defined (L_tf_to_td)) \
+     && LONG_DOUBLE_HAS_TF_MODE)
+extern DFP_C_TYPE BFP_TO_DFP (BFP_TYPE);
+#endif
+
+#endif /* _DFPBIT_H */
diff --git a/gcc/config/divmod.c b/gcc/config/divmod.c
new file mode 100644
index 000000000..c227b99cc
--- /dev/null
+++ b/gcc/config/divmod.c
@@ -0,0 +1,73 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+long udivmodsi4 ();
+
+long
+__divsi3 (long a, long b)
+{
+  int neg = 0;
+  long res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmodsi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+long
+__modsi3 (long a, long b)
+{
+  int neg = 0;
+  long res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmodsi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
diff --git a/gcc/config/elfos.h b/gcc/config/elfos.h
new file mode 100644
index 000000000..e483216ef
--- /dev/null
+++ b/gcc/config/elfos.h
@@ -0,0 +1,531 @@
+/* elfos.h  --  operating system specific defines to be used when
+   targeting GCC for some generic ELF system
+   Copyright (C) 1991, 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004,
+   2007, 2009, 2010 Free Software Foundation, Inc.
+   Based on svr4.h contributed by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJFMT_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__ELF__");		\
+    }						\
+  while (0)
+
+/* Define a symbol indicating that we are using elfos.h.
+   Some CPU specific configuration files use this.  */
+#define USING_ELFOS_H
+
+/* The prefix to add to user-visible assembler symbols.
+
+   For ELF systems the convention is *not* to prepend a leading
+   underscore onto user-level symbol names.  */
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* The biggest alignment supported by ELF in bits. 32-bit ELF 
+   supports section alignment up to (0x80000000 * 8), while 
+   64-bit ELF supports (0x8000000000000000 * 8). If this macro 
+   is not defined, the default is the largest alignment supported 
+   by 32-bit ELF and representable on a 32-bit host. Use this
+   macro to limit the alignment which can be specified using
+   the `__attribute__ ((aligned (N)))' construct.  */
+#ifndef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (((unsigned int) 1 << 28) * 8)
+#endif
+
+/* Use periods rather than dollar signs in special g++ assembler names.  */
+
+#define NO_DOLLAR_IN_LABEL
+
+/* Writing `int' for a bit-field forces int alignment for the structure.  */
+
+#ifndef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#endif
+
+/* All ELF targets can support DWARF-2.  */
+
+#define DWARF2_DEBUGGING_INFO 1
+
+/* The GNU tools operate better with dwarf2, and it is required by some
+   psABI's.  Since we don't have any native tools to be compatible with,
+   default to dwarf2.  */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+
+/* All SVR4 targets use the ELF object file format.  */
+#define OBJECT_FORMAT_ELF
+
+
+/* Output #ident as a .ident.  */
+
+#define ASM_OUTPUT_IDENT(FILE, NAME) \
+  fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME);
+
+#define IDENT_ASM_OP "\t.ident\t"
+
+#undef  SET_ASM_OP
+#define SET_ASM_OP	"\t.set\t"
+
+/* Most svr4 assemblers want a .file directive at the beginning of
+   their input file.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This is how to allocate empty space in some section.  The .zero
+   pseudo-op is used for this on most svr4 assemblers.  */
+
+#define SKIP_ASM_OP	"\t.zero\t"
+
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+   fprintf ((FILE), "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+	    SKIP_ASM_OP, (SIZE))
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.
+
+   For most svr4 systems, the convention is that any symbol which begins
+   with a period is not put into the linker symbol table by the assembler.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
+  do								\
+    {								\
+      sprintf (LABEL, "*.%s%u", PREFIX, (unsigned) (NUM));	\
+    }								\
+  while (0)
+
+/* Output the label which precedes a jumptable.  Note that for all svr4
+   systems where we actually generate jumptables (which is to say every
+   svr4 target except i386, where we use casesi instead) we put the jump-
+   tables into the .rodata section and since other stuff could have been
+   put into the .rodata section prior to any given jumptable, we have to
+   make sure that the location counter for the .rodata section gets pro-
+   perly re-aligned prior to the actual beginning of the jump table.  */
+
+#undef ALIGN_ASM_OP
+#define ALIGN_ASM_OP "\t.align\t"
+
+#ifndef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), 2);
+#endif
+
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)		\
+  do									\
+    {									\
+      ASM_OUTPUT_BEFORE_CASE_LABEL (FILE, PREFIX, NUM, JUMPTABLE)	\
+	(*targetm.asm_out.internal_label) (FILE, PREFIX, NUM);			\
+    }									\
+  while (0)
+
+/* The standard SVR4 assembler seems to require that certain builtin
+   library routines (e.g. .udiv) be explicitly declared as .globl
+   in each assembly file where they are referenced.  */
+
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)	\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0))
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#define COMMON_ASM_OP	"\t.comm\t"
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+    }									\
+  while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#define LOCAL_ASM_OP	"\t.local\t"
+
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)	\
+  do								\
+    {								\
+      fprintf ((FILE), "%s", LOCAL_ASM_OP);			\
+      assemble_name ((FILE), (NAME));				\
+      fprintf ((FILE), "\n");					\
+      ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);	\
+    }								\
+  while (0)
+
+/* This is the pseudo-op used to generate a contiguous sequence of byte
+   values from a double-quoted string WITHOUT HAVING A TERMINATING NUL
+   AUTOMATICALLY APPENDED.  This is the same for most svr4 assemblers.  */
+
+#undef  ASCII_DATA_ASM_OP
+#define ASCII_DATA_ASM_OP	"\t.ascii\t"
+
+/* Support a read-only data section.  */
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rodata"
+
+/* On svr4, we *do* have support for the .init and .fini sections, and we
+   can put stuff in there to be executed before and after `main'.  We let
+   crtstuff.c and other files know this by defining the following symbols.
+   The definitions say how to change sections to the .init and .fini
+   sections.  This is the same for all known svr4 assemblers.  */
+
+#define INIT_SECTION_ASM_OP	"\t.section\t.init"
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini"
+
+/* Output assembly directive to move to the beginning of current section.  */
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+# define ASM_SECTION_START_OP	"\t.subsection\t-1"
+# define ASM_OUTPUT_SECTION_START(FILE)	\
+  fprintf ((FILE), "%s\n", ASM_SECTION_START_OP)
+#endif
+
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#undef  TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION default_elf_select_rtx_section
+#undef	TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION default_elf_select_section
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS true
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+
+/* This is how we tell the assembler that a symbol is weak.  */
+
+#define ASM_WEAKEN_LABEL(FILE, NAME)	\
+  do					\
+    {					\
+      fputs ("\t.weak\t", (FILE));	\
+      assemble_name ((FILE), (NAME));	\
+      fputc ('\n', (FILE));		\
+    }					\
+  while (0)
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#define TYPE_OPERAND_FMT	"@%s"
+
+/* Write the extra assembler code needed to declare a function's result.
+   Most svr4 assemblers don't require any special declaration of the
+   result value, but there are exceptions.  */
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries in an ELF object file under SVR4.  These macros also output
+   the starting labels for the relevant functions/objects.  */
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+
+#ifndef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);		\
+    }								\
+  while (0)
+#endif
+
+/* Write the extra assembler code needed to declare an object properly.  */
+
+#ifdef HAVE_GAS_GNU_UNIQUE_OBJECT
+#define USE_GNU_UNIQUE_OBJECT 1
+#else
+#define USE_GNU_UNIQUE_OBJECT 0
+#endif
+
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
+  do									\
+    {									\
+      HOST_WIDE_INT size;						\
+									\
+      /* For template static data member instantiations or		\
+	 inline fn local statics and their guard variables, use		\
+	 gnu_unique_object so that they will be combined even under	\
+	 RTLD_LOCAL.  Don't use gnu_unique_object for typeinfo,		\
+	 vtables and other read-only artificial decls.  */		\
+      if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (DECL)			\
+	  && (!DECL_ARTIFICIAL (DECL) || !TREE_READONLY (DECL)))	\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "gnu_unique_object");	\
+      else								\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+									\
+      size_directive_output = 0;					\
+      if (!flag_inhibit_size_directive					\
+	  && (DECL) && DECL_SIZE (DECL))				\
+	{								\
+	  size_directive_output = 1;					\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));			\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);			\
+	}								\
+									\
+      ASM_OUTPUT_LABEL (FILE, NAME);					\
+    }									\
+  while (0)
+
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set
+   by ASM_DECLARE_OBJECT_NAME when it was run for the same decl.  */
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)\
+  do								\
+    {								\
+      const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);	\
+      HOST_WIDE_INT size;					\
+								\
+      if (!flag_inhibit_size_directive				\
+	  && DECL_SIZE (DECL)					\
+	  && ! AT_END && TOP_LEVEL				\
+	  && DECL_INITIAL (DECL) == error_mark_node		\
+	  && !size_directive_output)				\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);		\
+	}							\
+    }								\
+  while (0)
+
+/* This is how to declare the size of a function.  */
+#ifndef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do								\
+    {								\
+      if (!flag_inhibit_size_directive)				\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+    }								\
+  while (0)
+#endif
+
+/* A table of bytes codes used by the ASM_OUTPUT_ASCII and
+   ASM_OUTPUT_LIMITED_STRING macros.  Each byte in the table
+   corresponds to a particular byte value [0..255].  For any
+   given byte value, if the value in the corresponding table
+   position is zero, the given character can be output directly.
+   If the table value is 1, the byte must be output as a \ooo
+   octal escape.  If the tables value is anything else, then the
+   byte value should be output as a \ followed by the value
+   in the table.  Note that we can use standard UN*X escape
+   sequences for many control characters, but we don't use
+   \a to represent BEL because some svr4 assemblers (e.g. on
+   the i386) don't know about that.  Also, we don't use \v
+   since some versions of gas, such as 2.2 did not accept it.  */
+
+#define ESCAPES \
+"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
+
+/* Some svr4 assemblers have a limit on the number of characters which
+   can appear in the operand of a .string directive.  If your assembler
+   has such a limitation, you should define STRING_LIMIT to reflect that
+   limit.  Note that at least some svr4 assemblers have a limit on the
+   actual number of bytes in the double-quoted string, and that they
+   count each character in an escape sequence as one byte.  Thus, an
+   escape sequence like \377 would count as four bytes.
+
+   If your target assembler doesn't support the .string directive, you
+   should define this to zero.
+*/
+
+#define STRING_LIMIT	((unsigned) 256)
+
+#define STRING_ASM_OP	"\t.string\t"
+
+/* The routine used to output NUL terminated strings.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable, especially for targets like the i386
+   (where the only alternative is to output character sequences as
+   comma separated lists of numbers).  */
+
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)		\
+  do							\
+    {							\
+      register const unsigned char *_limited_str =	\
+	(const unsigned char *) (STR);			\
+      register unsigned ch;				\
+							\
+      fprintf ((FILE), "%s\"", STRING_ASM_OP);		\
+							\
+      for (; (ch = *_limited_str); _limited_str++)	\
+        {						\
+	  register int escape;				\
+							\
+	  switch (escape = ESCAPES[ch])			\
+	    {						\
+	    case 0:					\
+	      putc (ch, (FILE));			\
+	      break;					\
+	    case 1:					\
+	      fprintf ((FILE), "\\%03o", ch);		\
+	      break;					\
+	    default:					\
+	      putc ('\\', (FILE));			\
+	      putc (escape, (FILE));			\
+	      break;					\
+	    }						\
+        }						\
+							\
+      fprintf ((FILE), "\"\n");				\
+    }							\
+  while (0)
+
+/* The routine used to output sequences of byte values.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable.  Note that if we find subparts of the
+   character sequence which end with NUL (and which are shorter than
+   STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
+
+#undef  ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)				\
+  do									\
+    {									\
+      const unsigned char *_ascii_bytes =				\
+	(const unsigned char *) (STR);					\
+      const unsigned char *limit = _ascii_bytes + (LENGTH);		\
+      const unsigned char *last_null = NULL;				\
+      unsigned bytes_in_chunk = 0;					\
+									\
+      for (; _ascii_bytes < limit; _ascii_bytes++)			\
+        {								\
+	  const unsigned char *p;					\
+									\
+	  if (bytes_in_chunk >= 60)					\
+	    {								\
+	      fprintf ((FILE), "\"\n");					\
+	      bytes_in_chunk = 0;					\
+	    }								\
+									\
+	  if (_ascii_bytes > last_null)					\
+	    {								\
+	      for (p = _ascii_bytes; p < limit && *p != '\0'; p++)	\
+		continue;						\
+	      last_null = p;						\
+	    }								\
+	  else								\
+	    p = last_null;						\
+									\
+	  if (p < limit && (p - _ascii_bytes) <= (long)STRING_LIMIT)	\
+	    {								\
+	      if (bytes_in_chunk > 0)					\
+		{							\
+		  fprintf ((FILE), "\"\n");				\
+		  bytes_in_chunk = 0;					\
+		}							\
+									\
+	      ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes);		\
+	      _ascii_bytes = p;						\
+	    }								\
+	  else								\
+	    {								\
+	      register int escape;					\
+	      register unsigned ch;					\
+									\
+	      if (bytes_in_chunk == 0)					\
+		fprintf ((FILE), "%s\"", ASCII_DATA_ASM_OP);		\
+									\
+	      switch (escape = ESCAPES[ch = *_ascii_bytes])		\
+		{							\
+		case 0:							\
+		  putc (ch, (FILE));					\
+		  bytes_in_chunk++;					\
+		  break;						\
+		case 1:							\
+		  fprintf ((FILE), "\\%03o", ch);			\
+		  bytes_in_chunk += 4;					\
+		  break;						\
+		default:						\
+		  putc ('\\', (FILE));					\
+		  putc (escape, (FILE));				\
+		  bytes_in_chunk += 2;					\
+		  break;						\
+		}							\
+	    }								\
+	}								\
+									\
+      if (bytes_in_chunk > 0)						\
+        fprintf ((FILE), "\"\n");					\
+    }									\
+  while (0)
+
+/* Allow the use of the -frecord-gcc-switches switch via the
+   elf_record_gcc_switches function defined in varasm.c.  */
+#undef  TARGET_ASM_RECORD_GCC_SWITCHES
+#define TARGET_ASM_RECORD_GCC_SWITCHES elf_record_gcc_switches
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM
+   any text necessary for declaring the name of an external symbol
+   named NAME which is referenced in this compilation but not defined.
+   It is needed to properly support non-default visibility.  */
+
+#ifndef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  default_elf_asm_output_external (FILE, DECL, NAME)
+#endif
diff --git a/gcc/config/fixed-bit.c b/gcc/config/fixed-bit.c
new file mode 100644
index 000000000..40ac2e29f
--- /dev/null
+++ b/gcc/config/fixed-bit.c
@@ -0,0 +1,1216 @@
+/* This is a software fixed-point library.
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This implements fixed-point arithmetic.
+
+   Contributed by Chao-ying Fu  <fu@mips.com>.  */
+
+/* To use this file, we need to define one of the following:
+   QQ_MODE, UQQ_MODE, HQ_MODE, UHQ_MODE, SQ_MODE, USQ_MODE, DQ_MODE, UDQ_MODE,
+   TQ_MODE, UTQ_MODE, HA_MODE, UHA_MODE, SA_MODE, USA_MODE, DA_MODE, UDA_MODE,
+   TA_MODE, UTA_MODE.
+   Then, all operators for this machine mode will be created.
+
+   Or, we need to define FROM_* TO_* for conversions from one mode to another
+   mode.  The mode could be one of the following:
+   Fract: QQ, UQQ, HQ, UHQ, SQ, USQ, DQ, UDQ, TQ, UTQ
+   Accum: HA, UHA, SA, USA, DA, UDA, TA, UTA
+   Signed integer: QI, HI, SI, DI, TI
+   Unsigned integer: UQI, UHI, USI, UDI, UTI
+   Floating-point: SF, DF
+   Ex: If we define FROM_QQ and TO_SI, the conversion from QQ to SI is
+   generated.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+#ifndef MIN_UNITS_PER_WORD
+#define MIN_UNITS_PER_WORD UNITS_PER_WORD
+#endif
+
+#include "config/fixed-bit.h"
+
+#if defined(FIXED_ADD) && defined(L_add)
+FIXED_C_TYPE
+FIXED_ADD (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  z = x + y;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_ADD */
+
+#if defined(FIXED_SSADD) && defined(L_ssadd)
+FIXED_C_TYPE
+FIXED_SSADD (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  z = x + y;
+  if ((((x ^ y) >> I_F_BITS) & 1) == 0)
+    {
+      if (((z ^ x) >> I_F_BITS) & 1)
+        {
+          z = 1;
+          z = z << I_F_BITS;
+          if (x >= 0)
+            z--;
+        }
+    }
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_SSADD */
+
+#if defined(FIXED_USADD) && defined(L_usadd)
+FIXED_C_TYPE
+FIXED_USADD (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  z = x + y;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  if (z < x || z < y) /* max */
+    {
+       z = -1;
+#if HAVE_PADDING_BITS
+       z = z << PADDING_BITS;
+       z = z >> PADDING_BITS;
+#endif
+    }
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_USADD */
+
+#if defined(FIXED_SUB) && defined(L_sub)
+FIXED_C_TYPE
+FIXED_SUB (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  z = x - y;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_SUB */
+
+#if defined(FIXED_SSSUB) && defined(L_sssub)
+FIXED_C_TYPE
+FIXED_SSSUB (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  z = x - y;
+  if (((x ^ y) >> I_F_BITS) & 1)
+    {
+      if (((z ^ x) >> I_F_BITS) & 1)
+        {
+          z = 1;
+          z = z << I_F_BITS;
+          if (x >= 0)
+            z--;
+        }
+    }
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_SSSUB */
+
+#if defined(FIXED_USSUB) && defined(L_ussub)
+FIXED_C_TYPE
+FIXED_USSUB (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  z = x - y;
+  if (x < y)
+    z = 0;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_USSUB */
+
+#if defined(FIXED_SATURATE1) && defined(L_saturate1)
+void
+FIXED_SATURATE1 (DINT_C_TYPE *a)
+{
+  DINT_C_TYPE max, min;
+  max = (DINT_C_TYPE)1 << I_F_BITS;
+  max = max - 1;
+#if MODE_UNSIGNED == 0
+  min = (DINT_C_TYPE)1 << (2 * FIXED_WIDTH - 1);
+  min = min >> (2 * FIXED_WIDTH - 1 - I_F_BITS);
+#else
+  min = 0;
+#endif
+  if (*a > max)
+    *a = max;
+  else if (*a < min)
+    *a = min;
+}
+#endif /* FIXED_SATURATE1 */
+
+#if defined(FIXED_SATURATE2) && defined(L_saturate2)
+void
+FIXED_SATURATE2 (INT_C_TYPE *high, INT_C_TYPE *low)
+{
+  INT_C_TYPE r_max, s_max, r_min, s_min;
+  r_max = 0;
+#if (MODE_UNSIGNED == 0) || HAVE_PADDING_BITS
+  s_max = (INT_C_TYPE)1 << I_F_BITS;
+  s_max = s_max - 1;
+#else
+  s_max = -1;
+#endif
+#if MODE_UNSIGNED == 0
+  r_min = -1;
+  s_min = (INT_C_TYPE)1 << (FIXED_WIDTH - 1);
+  s_min = s_min >> (FIXED_WIDTH - 1 - I_F_BITS);
+#else
+  r_min = 0;
+  s_min = 0;
+#endif
+
+  if (*high > r_max
+      || (*high == r_max && (UINT_C_TYPE)(*low) > (UINT_C_TYPE)s_max))
+    {
+      *high = r_max;
+      *low = s_max;
+    }
+  else if (*high < r_min ||
+	   (*high == r_min && (UINT_C_TYPE)(*low) < (UINT_C_TYPE)s_min))
+    {
+      *high = r_min;
+      *low = s_min;
+    }
+}
+#endif /* FIXED_SATURATE2 */
+
+#if defined(FIXED_MULHELPER) && defined(L_mulhelper)
+FIXED_C_TYPE
+FIXED_MULHELPER (FIXED_C_TYPE a, FIXED_C_TYPE b, word_type satp)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y;
+
+#if defined (DINT_C_TYPE)
+  INT_C_TYPE z;
+  DINT_C_TYPE dx, dy, dz;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  dx = (DINT_C_TYPE) x;
+  dy = (DINT_C_TYPE) y;
+  dz = dx * dy;
+  /* Round the result by adding (1 << (FBITS -1)).  */
+  dz += ((DINT_C_TYPE) 1 << (FBITS - 1));
+  dz = dz >> FBITS;
+  if (satp)
+    FIXED_SATURATE1 (&dz);
+
+  z = (INT_C_TYPE) dz;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+
+#else /* No DINT_C_TYPE */
+  /* The result of multiplication expands to two INT_C_TYPE.  */
+  INTunion aa, bb;
+  INTunion a_high, a_low, b_high, b_low;
+  INTunion high_high, high_low, low_high, low_low;
+  INTunion r, s, temp1, temp2;
+  INT_C_TYPE carry = 0;
+  INT_C_TYPE z;
+
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+
+  /* Decompose a and b.  */
+  aa.ll = x;
+  bb.ll = y;
+
+  a_high.s.low = aa.s.high;
+  a_high.s.high = 0;
+  a_low.s.low = aa.s.low;
+  a_low.s.high = 0;
+  b_high.s.low = bb.s.high;
+  b_high.s.high = 0;
+  b_low.s.low = bb.s.low;
+  b_low.s.high = 0;
+
+  /* Perform four multiplications.  */
+  low_low.ll = a_low.ll * b_low.ll;
+  low_high.ll = a_low.ll * b_high.ll;
+  high_low.ll = a_high.ll * b_low.ll;
+  high_high.ll = a_high.ll * b_high.ll;
+
+  /* Accumulate four results to {r, s}.  */
+  temp1.s.high = high_low.s.low;
+  temp1.s.low = 0;
+  s.ll = low_low.ll + temp1.ll;
+  if ((UINT_C_TYPE) s.ll < (UINT_C_TYPE) low_low.ll
+      || (UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp1.ll)
+    carry ++; /* Carry.  */
+  temp1.ll = s.ll;
+  temp2.s.high = low_high.s.low;
+  temp2.s.low = 0;
+  s.ll = temp1.ll + temp2.ll;
+  if ((UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp1.ll
+      || (UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp2.ll)
+    carry ++; /* Carry.  */
+
+  temp1.s.low = high_low.s.high;
+  temp1.s.high = 0;
+  r.ll = high_high.ll + temp1.ll;
+  temp1.s.low = low_high.s.high;
+  temp1.s.high = 0;
+  r.ll = r.ll + temp1.ll + carry;
+
+#if MODE_UNSIGNED == 0
+  /* For signed types, we need to add neg(y) to r, if x < 0.  */
+  if (x < 0)
+    r.ll = r.ll - y;
+  /* We need to add neg(x) to r, if y < 0.  */
+  if (y < 0)
+    r.ll = r.ll - x;
+#endif
+
+  /* Round the result by adding (1 << (FBITS -1)).  */
+  temp1.ll = s.ll;
+  s.ll += ((INT_C_TYPE) 1 << (FBITS -1));
+  if ((UINT_C_TYPE) s.ll < (UINT_C_TYPE) temp1.ll
+      || (UINT_C_TYPE) s.ll < (UINT_C_TYPE) ((INT_C_TYPE) 1 << (FBITS -1)))
+    r.ll += 1;
+
+  /* Shift right the result by FBITS.  */
+#if FBITS == FIXED_WIDTH
+  /* This happens only for unsigned types without any padding bits.
+     So, it is safe to set r.ll to 0 as it is logically shifted right.  */
+  s.ll = r.ll;
+  r.ll = 0;
+#else
+  s.ll = ((UINT_C_TYPE)s.ll) >> FBITS;
+  temp1.ll = r.ll << (FIXED_WIDTH - FBITS);
+  s.ll = s.ll | temp1.ll;
+  r.ll = r.ll >> FBITS;
+#endif
+
+  if (satp)
+    FIXED_SATURATE2 (&r.ll, &s.ll);
+
+  z = (INT_C_TYPE) s.ll;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+#endif
+}
+#endif /* FIXED_MULHELPER */
+
+#if defined(FIXED_MUL) && defined(L_mul)
+FIXED_C_TYPE
+FIXED_MUL (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_MULHELPER (a, b, 0);
+}
+#endif /* FIXED_MUL */
+
+#if defined(FIXED_SSMUL) && defined(L_ssmul)
+FIXED_C_TYPE
+FIXED_SSMUL (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_MULHELPER (a, b, 1);
+}
+#endif /* FIXED_SSMUL */
+
+#if defined(FIXED_USMUL) && defined(L_usmul)
+FIXED_C_TYPE
+FIXED_USMUL (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_MULHELPER (a, b, 1);
+}
+#endif /* FIXED_USMUL */
+
+#if defined(FIXED_DIVHELPER) && defined(L_divhelper)
+FIXED_C_TYPE
+FIXED_DIVHELPER (FIXED_C_TYPE a, FIXED_C_TYPE b, word_type satp)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y;
+  INT_C_TYPE z;
+
+#if defined (DINT_C_TYPE)
+  DINT_C_TYPE dx, dy, dz;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  dx = (DINT_C_TYPE) x;
+  dy = (DINT_C_TYPE) y;
+  dx = dx << FBITS;
+  dz = dx / dy;
+  if (satp)
+    FIXED_SATURATE1 (&dz);
+  z = (INT_C_TYPE) dz;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+
+#else /* No DINT_C_TYPE */
+  INT_C_TYPE pos_a, pos_b, r, s;
+  INT_C_TYPE quo_r, quo_s, mod, temp;
+  word_type i;
+#if MODE_UNSIGNED == 0
+  word_type num_of_neg = 0;
+#endif
+
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+  pos_a = x;
+  pos_b = y;
+
+#if MODE_UNSIGNED == 0
+  /* If a < 0, negate a.  */
+  if (pos_a < 0)
+    {
+      pos_a = -pos_a;
+      num_of_neg ++;
+    }
+  /* If b < 0, negate b.  */
+  if (pos_b < 0)
+    {
+      pos_b = -pos_b;
+      num_of_neg ++;
+    }
+#endif
+
+  /* Left shift pos_a to {r, s} by FBITS.  */
+#if FBITS == FIXED_WIDTH
+  /* This happens only for unsigned types without any padding bits.  */
+  r = pos_a;
+  s = 0;
+#else
+  s = pos_a << FBITS;
+  r = pos_a >> (FIXED_WIDTH - FBITS);
+#endif
+
+  /* Unsigned divide r by pos_b to quo_r.  The remainder is in mod.  */
+  quo_r = (UINT_C_TYPE)r / (UINT_C_TYPE)pos_b;
+  mod = (UINT_C_TYPE)r % (UINT_C_TYPE)pos_b;
+  quo_s = 0;
+
+  for (i = 0; i < FIXED_WIDTH; i++)
+    {
+      /* Record the leftmost bit of mod.  */
+      word_type leftmost_mode = (mod >> (FIXED_WIDTH - 1)) & 1;
+      /* Shift left mod by 1 bit.  */
+      mod = mod << 1;
+      /* Test the leftmost bit of s to add to mod.  */
+      if ((s >> (FIXED_WIDTH - 1)) & 1)
+	mod ++;
+      /* Shift left quo_s by 1 bit.  */
+      quo_s = quo_s << 1;
+      /* Try to calculate (mod - pos_b).  */
+      temp = mod - pos_b;
+      if (leftmost_mode || (UINT_C_TYPE)mod >= (UINT_C_TYPE)pos_b)
+	{
+	  quo_s ++;
+	  mod = temp;
+	}
+      /* Shift left s by 1 bit.  */
+      s = s << 1;
+    }
+
+#if MODE_UNSIGNED == 0
+    if (num_of_neg == 1)
+      {
+	quo_s = -quo_s;
+	if (quo_s == 0)
+	  quo_r = -quo_r;
+	else
+	  quo_r = ~quo_r;
+      }
+#endif
+  if (satp)
+    FIXED_SATURATE2 (&quo_r, &quo_s);
+  z = quo_s;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+#endif
+}
+#endif /* FIXED_DIVHELPER */
+
+#if defined(FIXED_DIV) && defined(L_div)
+FIXED_C_TYPE
+FIXED_DIV (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_DIVHELPER (a, b, 0);
+}
+#endif /* FIXED_DIV */
+
+
+#if defined(FIXED_UDIV) && defined(L_udiv)
+FIXED_C_TYPE
+FIXED_UDIV (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_DIVHELPER (a, b, 0);
+}
+#endif /* FIXED_UDIV */
+
+#if defined(FIXED_SSDIV) && defined(L_ssdiv)
+FIXED_C_TYPE
+FIXED_SSDIV (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_DIVHELPER (a, b, 1);
+}
+#endif /* FIXED_SSDIV */
+
+#if defined(FIXED_USDIV) && defined(L_usdiv)
+FIXED_C_TYPE
+FIXED_USDIV (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  return FIXED_DIVHELPER (a, b, 1);
+}
+#endif /* FIXED_USDIV */
+
+#if defined(FIXED_NEG) && defined(L_neg)
+FIXED_C_TYPE
+FIXED_NEG (FIXED_C_TYPE a)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  z = -x;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_NEG */
+
+#if defined(FIXED_SSNEG) && defined(L_ssneg)
+FIXED_C_TYPE
+FIXED_SSNEG (FIXED_C_TYPE a)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, y, z;
+  memcpy (&y, &a, FIXED_SIZE);
+  x = 0;
+  z = x - y;
+  if (((x ^ y) >> I_F_BITS) & 1)
+    {
+      if (((z ^ x) >> I_F_BITS) & 1)
+        {
+          z = 1;
+          z = z << I_F_BITS;
+          if (x >= 0)
+            z--;
+        }
+    }
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_SSNEG */
+
+#if defined(FIXED_USNEG) && defined(L_usneg)
+FIXED_C_TYPE
+FIXED_USNEG (FIXED_C_TYPE a __attribute__ ((__unused__)))
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE z;
+  z = 0;
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_USNEG */
+
+#if defined(FIXED_ASHLHELPER) && defined(L_ashlhelper)
+FIXED_C_TYPE
+FIXED_ASHLHELPER (FIXED_C_TYPE a, word_type b, word_type satp)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, z;
+
+#if defined (DINT_C_TYPE)
+  DINT_C_TYPE dx, dz;
+  memcpy (&x, &a, FIXED_SIZE);
+  dx = (DINT_C_TYPE) x;
+  if (b >= FIXED_WIDTH)
+    dz = dx << FIXED_WIDTH;
+  else
+    dz = dx << b;
+  if (satp)
+    FIXED_SATURATE1 (&dz);
+  z = (INT_C_TYPE) dz;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+
+#else /* No DINT_C_TYPE */
+  INT_C_TYPE r, s;
+  memcpy (&x, &a, FIXED_SIZE);
+  /* We need to shift left x by b bits to {r, s}.  */
+  if (b >= FIXED_WIDTH)
+    {
+      r = b;
+      s = 0;
+    }
+  else
+    {
+      s = x << b;
+      r = x >> (FIXED_WIDTH - b);
+    }
+  if (satp)
+    FIXED_SATURATE2 (&r, &s);
+  z = s;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+#endif
+}
+#endif /* FIXED_ASHLHELPER */
+
+#if defined(FIXED_ASHL) && defined(L_ashl)
+FIXED_C_TYPE
+FIXED_ASHL (FIXED_C_TYPE a, word_type b)
+{
+  return FIXED_ASHLHELPER (a, b, 0);
+}
+#endif /* FIXED_ASHL */
+
+#if defined(FIXED_ASHR) && defined(L_ashr)
+FIXED_C_TYPE
+FIXED_ASHR (FIXED_C_TYPE a, word_type b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  z = x >> b;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_ASHR */
+
+#if defined(FIXED_LSHR) && defined(L_lshr)
+FIXED_C_TYPE
+FIXED_LSHR (FIXED_C_TYPE a, word_type b)
+{
+  FIXED_C_TYPE c;
+  INT_C_TYPE x, z;
+  memcpy (&x, &a, FIXED_SIZE);
+  z = x >> b;
+#if HAVE_PADDING_BITS
+  z = z << PADDING_BITS;
+  z = z >> PADDING_BITS;
+#endif
+  memcpy (&c, &z, FIXED_SIZE);
+  return c;
+}
+#endif /* FIXED_LSHR */
+
+#if defined(FIXED_SSASHL) && defined(L_ssashl)
+FIXED_C_TYPE
+FIXED_SSASHL (FIXED_C_TYPE a, word_type b)
+{
+  return FIXED_ASHLHELPER (a, b, 1);
+}
+#endif /* FIXED_SSASHL */
+
+#if defined(FIXED_USASHL) && defined(L_usashl)
+FIXED_C_TYPE
+FIXED_USASHL (FIXED_C_TYPE a, word_type b)
+{
+  return FIXED_ASHLHELPER (a, b, 1);
+}
+#endif /* FIXED_USASHL */
+
+#if defined(FIXED_CMP) && defined(L_cmp)
+word_type
+FIXED_CMP (FIXED_C_TYPE a, FIXED_C_TYPE b)
+{
+  INT_C_TYPE x, y;
+  memcpy (&x, &a, FIXED_SIZE);
+  memcpy (&y, &b, FIXED_SIZE);
+
+  if (x < y)
+    return 0;
+  else if (x > y)
+    return 2;
+
+  return 1;
+}
+#endif /* FIXED_CMP */
+
+/* Fixed -> Fixed.  */
+#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 4 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+FRACT (FROM_FIXED_C_TYPE a)
+{
+  TO_FIXED_C_TYPE c;
+  FROM_INT_C_TYPE x;
+  TO_INT_C_TYPE z;
+  int shift_amount;
+  memcpy (&x, &a, FROM_FIXED_SIZE);
+#if TO_FBITS > FROM_FBITS  /* Need left shift.  */
+  shift_amount = TO_FBITS - FROM_FBITS;
+  z = (TO_INT_C_TYPE) x;
+  z = z << shift_amount;
+#else /* TO_FBITS <= FROM_FBITS.  Need right Shift.  */
+  shift_amount = FROM_FBITS - TO_FBITS;
+  x = x >> shift_amount;
+  z = (TO_INT_C_TYPE) x;
+#endif /* TO_FBITS > FROM_FBITS  */
+
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* FRACT && FROM_TYPE == 4 && TO_TYPE == 4  */
+
+/* Fixed -> Fixed with saturation.  */
+#if defined(SATFRACT) && defined(L_satfract) && FROM_TYPE == 4 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+SATFRACT (FROM_FIXED_C_TYPE a)
+{
+  TO_FIXED_C_TYPE c;
+  TO_INT_C_TYPE z;
+  FROM_INT_C_TYPE x;
+#if FROM_MODE_UNSIGNED == 0
+  BIG_SINT_C_TYPE high, low;
+  BIG_SINT_C_TYPE max_high, max_low;
+  BIG_SINT_C_TYPE min_high, min_low;
+#else
+  BIG_UINT_C_TYPE high, low;
+  BIG_UINT_C_TYPE max_high, max_low;
+  BIG_UINT_C_TYPE min_high, min_low;
+#endif
+#if TO_FBITS > FROM_FBITS
+  BIG_UINT_C_TYPE utemp;
+#endif
+#if TO_MODE_UNSIGNED == 0
+  BIG_SINT_C_TYPE stemp;
+#endif
+#if TO_FBITS != FROM_FBITS
+  int shift_amount;
+#endif
+  memcpy (&x, &a, FROM_FIXED_SIZE);
+
+  /* Step 1. We need to store x to {high, low}.  */
+#if FROM_MODE_UNSIGNED == 0
+  low = (BIG_SINT_C_TYPE) x;
+  if (x < 0)
+    high = -1;
+  else
+    high = 0;
+#else
+  low = (BIG_UINT_C_TYPE) x;
+  high = 0;
+#endif
+
+  /* Step 2. We need to shift {high, low}.  */
+#if TO_FBITS > FROM_FBITS /* Left shift.  */
+  shift_amount = TO_FBITS - FROM_FBITS;
+  utemp = (BIG_UINT_C_TYPE) low;
+  utemp = utemp >> (BIG_WIDTH - shift_amount);
+  high = ((BIG_UINT_C_TYPE)(high << shift_amount)) | utemp;
+  low = low << shift_amount;
+#elif TO_FBITS < FROM_FBITS /* Right shift.  */
+  shift_amount = FROM_FBITS - TO_FBITS;
+  low = low >> shift_amount;
+#endif
+
+  /* Step 3. Compare {high, low} with max and  min of TO_FIXED_C_TYPE.  */
+  max_high = 0;
+#if BIG_WIDTH > TO_FIXED_WIDTH || TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS
+  max_low = (BIG_UINT_C_TYPE)1 << TO_I_F_BITS;
+  max_low = max_low - 1;
+#else
+  max_low = -1;
+#endif
+
+#if TO_MODE_UNSIGNED == 0
+  min_high = -1;
+  stemp = (BIG_SINT_C_TYPE)1 << (BIG_WIDTH - 1);
+  stemp = stemp >> (BIG_WIDTH - 1 - TO_I_F_BITS);
+  min_low = stemp;
+#else
+  min_high = 0;
+  min_low = 0;
+#endif
+
+#if FROM_MODE_UNSIGNED == 0 && TO_MODE_UNSIGNED == 0
+  /* Signed -> Signed.  */
+  if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high
+      || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high
+	  && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+  else if ((BIG_SINT_C_TYPE) high < (BIG_SINT_C_TYPE) min_high
+	   || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) min_high
+	       && (BIG_UINT_C_TYPE) low < (BIG_UINT_C_TYPE) min_low))
+    low = min_low; /* Minimum.  */
+#elif FROM_MODE_UNSIGNED == 1 && TO_MODE_UNSIGNED == 1
+  /* Unigned -> Unsigned.  */
+  if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high
+      || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high
+	  && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+#elif FROM_MODE_UNSIGNED == 0 && TO_MODE_UNSIGNED == 1
+  /* Signed -> Unsigned.  */
+  if (x < 0)
+    low = 0; /* Minimum.  */
+  else if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high
+	   || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high
+	       && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+#elif FROM_MODE_UNSIGNED == 1 && TO_MODE_UNSIGNED == 0
+  /* Unsigned -> Signed.  */
+  if ((BIG_SINT_C_TYPE) high < 0)
+    low = max_low; /* Maximum.  */
+  else if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high
+	   || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high
+	       && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+#endif
+
+  /* Step 4. Store the result.  */
+  z = (TO_INT_C_TYPE) low;
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(SATFRACT) && FROM_TYPE == 4 && TO_TYPE == 4  */
+
+/* Fixed -> Int.  */
+#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 4 && TO_TYPE == 1
+TO_INT_C_TYPE
+FRACT (FROM_FIXED_C_TYPE a)
+{
+  FROM_INT_C_TYPE x;
+  TO_INT_C_TYPE z;
+  FROM_INT_C_TYPE i = 0;
+  memcpy (&x, &a, FROM_FIXED_SIZE);
+
+#if FROM_MODE_UNSIGNED == 0
+  if (x < 0)
+    {
+#if FROM_FIXED_WIDTH == FROM_FBITS
+      if (x != 0)
+	i = 1;
+#else
+      if (((FROM_INT_C_TYPE)(x << (FROM_FIXED_WIDTH - FROM_FBITS))) != 0)
+	i = 1;
+#endif
+    }
+#endif
+
+#if FROM_FIXED_WIDTH == FROM_FBITS
+  x = 0;
+#else
+  x = x >> FROM_FBITS;
+#endif
+  x = x + i;
+  z = (TO_INT_C_TYPE) x;
+  return z;
+}
+#endif /* defined(FRACT) && FROM_TYPE == 4 && TO_TYPE == 1  */
+
+/* Fixed -> Unsigned int.  */
+#if defined(FRACTUNS) && defined(L_fractuns) && FROM_TYPE == 4 && TO_TYPE == 2
+TO_INT_C_TYPE
+FRACTUNS (FROM_FIXED_C_TYPE a)
+{
+  FROM_INT_C_TYPE x;
+  TO_INT_C_TYPE z;
+  FROM_INT_C_TYPE i = 0;
+  memcpy (&x, &a, FROM_FIXED_SIZE);
+
+#if FROM_MODE_UNSIGNED == 0
+  if (x < 0)
+    {
+#if FROM_FIXED_WIDTH == FROM_FBITS
+      if (x != 0)
+	i = 1;
+#else
+      if (((FROM_INT_C_TYPE)(x << (FROM_FIXED_WIDTH - FROM_FBITS))) != 0)
+	i = 1;
+#endif
+    }
+#endif
+
+#if FROM_FIXED_WIDTH == FROM_FBITS
+  x = 0;
+#else
+  x = x >> FROM_FBITS;
+#endif
+  x = x + i;
+  z = (TO_INT_C_TYPE) x;
+  return z;
+}
+#endif /* defined(FRACTUNS) && FROM_TYPE == 4 && TO_TYPE == 2  */
+
+/* Int -> Fixed.  */
+#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 1 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+FRACT (FROM_INT_C_TYPE a)
+{
+  TO_FIXED_C_TYPE c;
+  TO_INT_C_TYPE z;
+  z = (TO_INT_C_TYPE) a;
+#if TO_FIXED_WIDTH == TO_FBITS
+  z = 0;
+#else
+  z = z << TO_FBITS;
+#endif
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(FRACT) && FROM_TYPE == 1 && TO_TYPE == 4  */
+
+/* Signed int -> Fixed with saturation.  */
+#if defined(SATFRACT) && defined(L_satfract) &&FROM_TYPE == 1 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+SATFRACT (FROM_INT_C_TYPE a)
+{
+  TO_FIXED_C_TYPE c;
+  TO_INT_C_TYPE z;
+  FROM_INT_C_TYPE x = a;
+  BIG_SINT_C_TYPE high, low;
+  BIG_SINT_C_TYPE max_high, max_low;
+  BIG_SINT_C_TYPE min_high, min_low;
+#if TO_MODE_UNSIGNED == 0
+  BIG_SINT_C_TYPE stemp;
+#endif
+#if BIG_WIDTH != TO_FBITS
+  BIG_UINT_C_TYPE utemp;
+  int shift_amount;
+#endif
+
+  /* Step 1. We need to store x to {high, low}.  */
+  low = (BIG_SINT_C_TYPE) x;
+  if (x < 0)
+    high = -1;
+  else
+    high = 0;
+
+  /* Step 2. We need to left shift {high, low}.  */
+#if BIG_WIDTH == TO_FBITS
+  high = low;
+  low = 0;
+#else
+  shift_amount = TO_FBITS;
+  utemp = (BIG_UINT_C_TYPE) low;
+  utemp = utemp >> (BIG_WIDTH - shift_amount);
+  high = ((BIG_UINT_C_TYPE)(high << shift_amount)) | utemp;
+  low = low << shift_amount;
+#endif
+
+  /* Step 3. Compare {high, low} with max and  min of TO_FIXED_C_TYPE.  */
+  max_high = 0;
+#if BIG_WIDTH > TO_FIXED_WIDTH || TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS
+  max_low = (BIG_UINT_C_TYPE)1 << TO_I_F_BITS;
+  max_low = max_low - 1;
+#else
+  max_low = -1;
+#endif
+
+#if TO_MODE_UNSIGNED == 0
+  min_high = -1;
+  stemp = (BIG_SINT_C_TYPE)1 << (BIG_WIDTH - 1);
+  stemp = stemp >> (BIG_WIDTH - 1 - TO_I_F_BITS);
+  min_low = stemp;
+#else
+  min_high = 0;
+  min_low = 0;
+#endif
+
+#if TO_MODE_UNSIGNED == 0
+  /* Signed -> Signed.  */
+  if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high
+      || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high
+          && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+  else if ((BIG_SINT_C_TYPE) high < (BIG_SINT_C_TYPE) min_high
+           || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) min_high
+               && (BIG_UINT_C_TYPE) low < (BIG_UINT_C_TYPE) min_low))
+    low = min_low; /* Minimum.  */
+#else
+  /* Signed -> Unsigned.  */
+  if (x < 0)
+    low = 0; /* Minimum.  */
+  else if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high
+           || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high
+               && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+#endif
+
+  /* Step 4. Store the result.  */
+  z = (TO_INT_C_TYPE) low;
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(SATFRACT) && FROM_TYPE == 1 && TO_TYPE == 4  */
+
+/* Unsigned int -> Fixed.  */
+#if defined(FRACTUNS) && defined(L_fractuns) &&FROM_TYPE == 2 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+FRACTUNS (FROM_INT_C_TYPE a)
+{
+  TO_FIXED_C_TYPE c;
+  TO_INT_C_TYPE z;
+  z = (TO_INT_C_TYPE) a;
+#if TO_FIXED_WIDTH == TO_FBITS
+  z = 0;
+#else
+  z = z << TO_FBITS;
+#endif
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(FRACTUNS) && FROM_TYPE == 2 && TO_TYPE == 4  */
+
+/* Unsigned int -> Fixed with saturation.  */
+#if defined(SATFRACTUNS) && defined(L_satfractuns) && FROM_TYPE == 2 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+SATFRACTUNS (FROM_INT_C_TYPE a)
+{
+  TO_FIXED_C_TYPE c;
+  TO_INT_C_TYPE z;
+  FROM_INT_C_TYPE x = a;
+  BIG_UINT_C_TYPE high, low;
+  BIG_UINT_C_TYPE max_high, max_low;
+#if BIG_WIDTH != TO_FBITS
+  BIG_UINT_C_TYPE utemp;
+  int shift_amount;
+#endif
+
+  /* Step 1. We need to store x to {high, low}.  */
+  low = (BIG_UINT_C_TYPE) x;
+  high = 0;
+
+  /* Step 2. We need to left shift {high, low}.  */
+#if BIG_WIDTH == TO_FBITS
+  high = low;
+  low = 0;
+#else
+  shift_amount = TO_FBITS;
+  utemp = (BIG_UINT_C_TYPE) low;
+  utemp = utemp >> (BIG_WIDTH - shift_amount);
+  high = ((BIG_UINT_C_TYPE)(high << shift_amount)) | utemp;
+  low = low << shift_amount;
+#endif
+
+  /* Step 3. Compare {high, low} with max and  min of TO_FIXED_C_TYPE.  */
+  max_high = 0;
+#if BIG_WIDTH > TO_FIXED_WIDTH || TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS
+  max_low = (BIG_UINT_C_TYPE)1 << TO_I_F_BITS;
+  max_low = max_low - 1;
+#else
+  max_low = -1;
+#endif
+
+#if TO_MODE_UNSIGNED == 1
+  /* Unigned -> Unsigned.  */
+  if ((BIG_UINT_C_TYPE) high > (BIG_UINT_C_TYPE) max_high
+      || ((BIG_UINT_C_TYPE) high == (BIG_UINT_C_TYPE) max_high
+          && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+#else
+  /* Unsigned -> Signed.  */
+  if ((BIG_SINT_C_TYPE) high < 0)
+    low = max_low; /* Maximum.  */
+  else if ((BIG_SINT_C_TYPE) high > (BIG_SINT_C_TYPE) max_high
+           || ((BIG_SINT_C_TYPE) high == (BIG_SINT_C_TYPE) max_high
+               && (BIG_UINT_C_TYPE) low > (BIG_UINT_C_TYPE) max_low))
+    low = max_low; /* Maximum.  */
+#endif
+
+  /* Step 4. Store the result.  */
+  z = (TO_INT_C_TYPE) low;
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(SATFRACTUNS) && FROM_TYPE == 2 && TO_TYPE == 4  */
+
+/* Fixed -> Float.  */
+#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 4 && TO_TYPE == 3
+TO_FLOAT_C_TYPE
+FRACT (FROM_FIXED_C_TYPE a)
+{
+  FROM_INT_C_TYPE x;
+  TO_FLOAT_C_TYPE z;
+  memcpy (&x, &a, FROM_FIXED_SIZE);
+  z = (TO_FLOAT_C_TYPE) x;
+  z = z / BASE;
+  return z;
+}
+#endif /* defined(FRACT) && FROM_TYPE == 4 && TO_TYPE == 3  */
+
+/* Float -> Fixed.  */
+#if defined(FRACT) && defined(L_fract) && FROM_TYPE == 3 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+FRACT (FROM_FLOAT_C_TYPE a)
+{
+  FROM_FLOAT_C_TYPE temp;
+  TO_INT_C_TYPE z;
+  TO_FIXED_C_TYPE c;
+
+  temp = a * BASE;
+  z = (TO_INT_C_TYPE) temp;
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(FRACT) && FROM_TYPE == 3 && TO_TYPE == 4  */
+
+/* Float -> Fixed with saturation.  */
+#if defined(SATFRACT) && defined(L_satfract) && FROM_TYPE == 3 && TO_TYPE == 4
+TO_FIXED_C_TYPE
+SATFRACT (FROM_FLOAT_C_TYPE a)
+{
+  FROM_FLOAT_C_TYPE temp;
+  TO_INT_C_TYPE z;
+  TO_FIXED_C_TYPE c;
+
+  if (a >= FIXED_MAX)
+    {
+#if TO_MODE_UNSIGNED == 0 || TO_HAVE_PADDING_BITS
+      z = (TO_INT_C_TYPE)1 << TO_I_F_BITS;
+      z = z - 1;
+#else
+      z = -1;
+#endif
+    }
+  else if (a <= FIXED_MIN)
+    {
+#if TO_MODE_UNSIGNED == 0
+      z = (TO_INT_C_TYPE)1 << TO_I_F_BITS;
+#else
+      z = 0;
+#endif
+    }
+  else
+    {
+      temp = a * BASE;
+      z = (TO_INT_C_TYPE) temp;
+    }
+
+#if TO_HAVE_PADDING_BITS
+  z = z << TO_PADDING_BITS;
+  z = z >> TO_PADDING_BITS;
+#endif
+  memcpy (&c, &z, TO_FIXED_SIZE);
+  return c;
+}
+#endif /* defined(SATFRACT) && FROM_TYPE == 3 && TO_TYPE == 4  */
+
diff --git a/gcc/config/fixed-bit.h b/gcc/config/fixed-bit.h
new file mode 100644
index 000000000..562772d71
--- /dev/null
+++ b/gcc/config/fixed-bit.h
@@ -0,0 +1,1273 @@
+/* This is a software fixed-point library.
+   Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _FIXED_BIT_H
+#define _FIXED_BIT_H
+
+/* To use this file we need to define one of the following:
+   QQ_MODE, UQQ_MODE, HQ_MODE, UHQ_MODE, SQ_MODE, USQ_MODE, DQ_MODE, UDQ_MODE,
+   TQ_MODE, UTQ_MODE, HA_MODE, UHA_MODE, SA_MODE, USA_MODE, DA_MODE, UDA_MODE,
+   TA_MODE, UTA_MODE.
+   Then, all operators for this machine mode will be created.
+
+   Or, we need to define FROM_* TO_* for conversions from one mode to another
+   mode.  The mode could be one of the following:
+   Fract: QQ, UQQ, HQ, UHQ, SQ, USQ, DQ, UDQ, TQ, UTQ
+   Accum: HA, UHA, SA, USA, DA, UDA, TA, UTA
+   Signed integer: QI, HI, SI, DI, TI
+   Unsigned integer: UQI, UHI, USI, UDI, UTI
+   Floating-point: SF, DF
+   Ex: If we define FROM_QQ and TO_SI, the conversion from QQ to SI is
+   generated.  */
+
+#ifndef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE LONG_DOUBLE_TYPE_SIZE
+#endif
+
+#ifndef LIBGCC2_HAS_SF_MODE
+#define LIBGCC2_HAS_SF_MODE (BITS_PER_UNIT == 8)
+#endif
+
+#ifndef LIBGCC2_HAS_DF_MODE
+#define LIBGCC2_HAS_DF_MODE \
+  (BITS_PER_UNIT == 8 \
+   && (__SIZEOF_DOUBLE__ * __CHAR_BIT__ == 64 \
+       || LIBGCC2_LONG_DOUBLE_TYPE_SIZE == 64))
+#endif
+
+typedef          int QItype     __attribute__ ((mode (QI)));
+typedef unsigned int UQItype    __attribute__ ((mode (QI)));
+typedef          int HItype     __attribute__ ((mode (HI)));
+typedef unsigned int UHItype    __attribute__ ((mode (HI)));
+typedef          _Fract QQtype  __attribute__ ((mode (QQ)));
+typedef unsigned _Fract UQQtype __attribute__ ((mode (UQQ)));
+typedef          _Fract HQtype  __attribute__ ((mode (HQ)));
+typedef unsigned _Fract UHQtype __attribute__ ((mode (UHQ)));
+typedef          _Fract HAtype  __attribute__ ((mode (HA)));
+typedef unsigned _Fract UHAtype __attribute__ ((mode (UHA)));
+#define HAVE_QQ		1
+#define HAVE_UQQ	1
+#define HAVE_HQ		1
+#define HAVE_UHQ	1
+#define HAVE_HA		1
+#define HAVE_UHA	1
+#define HAVE_QI		1
+#define HAVE_UQI	1
+#define HAVE_HI		1
+#define HAVE_UHI	1
+#if MIN_UNITS_PER_WORD > 1
+/* These typedefs are usually forbidden on dsp's with UNITS_PER_WORD 1.  */
+typedef          int SItype     __attribute__ ((mode (SI)));
+typedef unsigned int USItype    __attribute__ ((mode (SI)));
+typedef          _Fract SQtype  __attribute__ ((mode (SQ)));
+typedef unsigned _Fract USQtype __attribute__ ((mode (USQ)));
+typedef          _Fract SAtype  __attribute__ ((mode (SA)));
+typedef unsigned _Fract USAtype __attribute__ ((mode (USA)));
+#define HAVE_SQ		1
+#define HAVE_USQ	1
+#define HAVE_SA		1
+#define HAVE_USA	1
+#define HAVE_SI		1
+#define HAVE_USI	1
+#if LONG_LONG_TYPE_SIZE > 32
+/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 2.  */
+typedef          int DItype     __attribute__ ((mode (DI)));
+typedef unsigned int UDItype    __attribute__ ((mode (DI)));
+typedef          _Fract DQtype  __attribute__ ((mode (DQ)));
+typedef unsigned _Fract UDQtype __attribute__ ((mode (UDQ)));
+typedef          _Fract DAtype  __attribute__ ((mode (DA)));
+typedef unsigned _Fract UDAtype __attribute__ ((mode (UDA)));
+#define HAVE_DQ		1
+#define HAVE_UDQ	1
+#define HAVE_DA		1
+#define HAVE_UDA	1
+#define HAVE_DI		1
+#define HAVE_UDI	1
+#if MIN_UNITS_PER_WORD > 4
+/* These typedefs are usually forbidden on archs with UNITS_PER_WORD 4.  */
+typedef          int TItype     __attribute__ ((mode (TI)));
+typedef unsigned int UTItype    __attribute__ ((mode (TI)));
+typedef          _Fract TQtype  __attribute__ ((mode (TQ)));
+typedef unsigned _Fract UTQtype __attribute__ ((mode (UTQ)));
+typedef          _Fract TAtype  __attribute__ ((mode (TA)));
+typedef unsigned _Fract UTAtype __attribute__ ((mode (UTA)));
+#define HAVE_TQ		1
+#define HAVE_UTQ	1
+#define HAVE_TA		1
+#define HAVE_UTA	1
+#define HAVE_TI		1
+#define HAVE_UTI	1
+#endif
+#endif
+#endif
+
+#if LIBGCC2_HAS_SF_MODE
+typedef float SFtype __attribute__ ((mode (SF)));
+#define HAVE_SF		1
+#endif
+#if LIBGCC2_HAS_DF_MODE
+typedef float DFtype __attribute__ ((mode (DF)));
+#define HAVE_DF		1
+#endif
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+/* Based on modes, we create many defines.  */
+
+#if defined (QQ_MODE) && (HAVE_QQ == 1)
+#define FIXED_SIZE	1	/* in bytes.  */
+#define INT_C_TYPE	QItype
+#define UINT_C_TYPE	UQItype
+#define DINT_C_TYPE	HItype
+#define DUINT_C_TYPE	UHItype
+#define MODE_NAME	QQ
+#define MODE_NAME_S	qq
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UQQ_MODE) && (HAVE_UQQ == 1)
+#define FIXED_SIZE	1	/* in bytes.  */
+#define INT_C_TYPE	UQItype
+#define UINT_C_TYPE	UQItype
+#define DINT_C_TYPE	UHItype
+#define DUINT_C_TYPE	UHItype
+#define MODE_NAME	UQQ
+#define MODE_NAME_S	uqq
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (HQ_MODE) && (HAVE_HQ == 1)
+#define FIXED_SIZE	2	/* in bytes.  */
+#define INT_C_TYPE	HItype
+#define UINT_C_TYPE	UHItype
+
+#if HAVE_SI == 1
+#define DINT_C_TYPE	SItype
+#define DUINT_C_TYPE	USItype
+#else
+#define HINT_C_TYPE	QItype
+#define HUINT_C_TYPE	UQItype
+#endif
+
+#define MODE_NAME	HQ
+#define MODE_NAME_S	hq
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UHQ_MODE) && (HAVE_UHQ == 1)
+#define FIXED_SIZE	2	/* in bytes.  */
+#define INT_C_TYPE	UHItype
+#define UINT_C_TYPE	UHItype
+
+#if HAVE_SI == 1
+#define DINT_C_TYPE	USItype
+#define DUINT_C_TYPE	USItype
+#else
+#define HINT_C_TYPE	UQItype
+#define HUINT_C_TYPE	UQItype
+#endif
+
+#define MODE_NAME	UHQ
+#define MODE_NAME_S	uhq
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (SQ_MODE) && (HAVE_SQ == 1)
+#define FIXED_SIZE	4	/* in bytes.  */
+#define INT_C_TYPE	SItype
+#define UINT_C_TYPE	USItype
+
+#if HAVE_DI == 1
+#define DINT_C_TYPE	DItype
+#define DUINT_C_TYPE	UDItype
+#else
+#define HINT_C_TYPE	HItype
+#define HUINT_C_TYPE	UHItype
+#endif
+
+#define MODE_NAME	SQ
+#define MODE_NAME_S	sq
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (USQ_MODE) && (HAVE_USQ == 1)
+#define FIXED_SIZE	4	/* in bytes.  */
+#define INT_C_TYPE	USItype
+#define UINT_C_TYPE	USItype
+
+#if HAVE_DI == 1
+#define DINT_C_TYPE	UDItype
+#define DUINT_C_TYPE	UDItype
+#else
+#define HINT_C_TYPE	UHItype
+#define HUINT_C_TYPE	UHItype
+#endif
+
+#define MODE_NAME	USQ
+#define MODE_NAME_S	usq
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (DQ_MODE) && (HAVE_DQ == 1)
+#define FIXED_SIZE	8	/* in bytes.  */
+#define INT_C_TYPE	DItype
+#define UINT_C_TYPE	UDItype
+
+#if HAVE_TI == 1
+#define DINT_C_TYPE	TItype
+#define DUINT_C_TYPE	UTItype
+#else
+#define HINT_C_TYPE	SItype
+#define HUINT_C_TYPE	USItype
+#endif
+
+#define MODE_NAME	DQ
+#define MODE_NAME_S	dq
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UDQ_MODE) && (HAVE_UDQ == 1)
+#define FIXED_SIZE	8	/* in bytes.  */
+#define INT_C_TYPE	UDItype
+#define UINT_C_TYPE	UDItype
+
+#if HAVE_TI == 1
+#define DINT_C_TYPE	UTItype
+#define DUINT_C_TYPE	UTItype
+#else
+#define HINT_C_TYPE	USItype
+#define HUINT_C_TYPE	USItype
+#endif
+
+#define MODE_NAME	UDQ
+#define MODE_NAME_S	udq
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (TQ_MODE) && (HAVE_TQ == 1)
+#define FIXED_SIZE	16	/* in bytes.  */
+#define INT_C_TYPE	TItype
+#define UINT_C_TYPE	UTItype
+#define HINT_C_TYPE	DItype
+#define HUINT_C_TYPE	UDItype
+#define MODE_NAME	TQ
+#define MODE_NAME_S	tq
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UTQ_MODE) && (HAVE_UTQ == 1)
+#define FIXED_SIZE	16	/* in bytes.  */
+#define INT_C_TYPE	UTItype
+#define UINT_C_TYPE	UTItype
+#define HINT_C_TYPE	UDItype
+#define HUINT_C_TYPE	UDItype
+#define MODE_NAME	UTQ
+#define MODE_NAME_S	utq
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (HA_MODE) && (HAVE_HA == 1)
+#define FIXED_SIZE	2	/* in bytes.  */
+#define INT_C_TYPE	HItype
+#define UINT_C_TYPE	UHItype
+
+#if HAVE_SI == 1
+#define DINT_C_TYPE	SItype
+#define DUINT_C_TYPE	USItype
+#else
+#define HINT_C_TYPE	QItype
+#define HUINT_C_TYPE	UQItype
+#endif
+
+#define MODE_NAME	HA
+#define MODE_NAME_S	ha
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UHA_MODE) && (HAVE_UHA == 1)
+#define FIXED_SIZE	2	/* in bytes.  */
+#define INT_C_TYPE	UHItype
+#define UINT_C_TYPE	UHItype
+
+#if HAVE_SI == 1
+#define DINT_C_TYPE	USItype
+#define DUINT_C_TYPE	USItype
+#else
+#define HINT_C_TYPE	UQItype
+#define HUINT_C_TYPE	UQItype
+#endif
+
+#define MODE_NAME	UHA
+#define MODE_NAME_S	uha
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (SA_MODE) && (HAVE_SA == 1)
+#define FIXED_SIZE	4	/* in bytes.  */
+#define INT_C_TYPE	SItype
+#define UINT_C_TYPE	USItype
+
+#if HAVE_DI == 1
+#define DINT_C_TYPE	DItype
+#define DUINT_C_TYPE	UDItype
+#else
+#define HINT_C_TYPE	HItype
+#define HUINT_C_TYPE	UHItype
+#endif
+
+#define MODE_NAME	SA
+#define MODE_NAME_S	sa
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (USA_MODE) && (HAVE_USA == 1)
+#define FIXED_SIZE	4	/* in bytes.  */
+#define INT_C_TYPE	USItype
+#define UINT_C_TYPE	USItype
+
+#if HAVE_DI == 1
+#define DINT_C_TYPE	UDItype
+#define DUINT_C_TYPE	UDItype
+#else
+#define HINT_C_TYPE	UHItype
+#define HUINT_C_TYPE	UHItype
+#endif
+
+#define MODE_NAME	USA
+#define MODE_NAME_S	usa
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (DA_MODE) && (HAVE_DA == 1)
+#define FIXED_SIZE	8	/* in bytes.  */
+#define INT_C_TYPE	DItype
+#define UINT_C_TYPE	UDItype
+
+#if HAVE_TI == 1
+#define DINT_C_TYPE	TItype
+#define DUINT_C_TYPE	UTItype
+#else
+#define HINT_C_TYPE	SItype
+#define HUINT_C_TYPE	USItype
+#endif
+
+#define MODE_NAME	DA
+#define MODE_NAME_S	da
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UDA_MODE) && (HAVE_UDA == 1)
+#define FIXED_SIZE	8	/* in bytes.  */
+#define INT_C_TYPE	UDItype
+#define UINT_C_TYPE	UDItype
+
+#if HAVE_TI == 1
+#define DINT_C_TYPE	UTItype
+#define DUINT_C_TYPE	UTItype
+#else
+#define HINT_C_TYPE	USItype
+#define HUINT_C_TYPE	USItype
+#endif
+
+#define MODE_NAME	UDA
+#define MODE_NAME_S	uda
+#define MODE_UNSIGNED	1
+#endif
+
+#if defined (TA_MODE) && (HAVE_TA == 1)
+#define FIXED_SIZE	16	/* in bytes.  */
+#define INT_C_TYPE	TItype
+#define UINT_C_TYPE	UTItype
+#define HINT_C_TYPE	DItype
+#define HUINT_C_TYPE	UDItype
+#define MODE_NAME	TA
+#define MODE_NAME_S	ta
+#define MODE_UNSIGNED	0
+#endif
+
+#if defined (UTA_MODE) && (HAVE_UTA == 1)
+#define FIXED_SIZE	16	/* in bytes.  */
+#define INT_C_TYPE	UTItype
+#define UINT_C_TYPE	UTItype
+#define HINT_C_TYPE	UDItype
+#define HUINT_C_TYPE	UDItype
+#define MODE_NAME	UTA
+#define MODE_NAME_S	uta
+#define MODE_UNSIGNED	1
+#endif
+
+/* The following defines are based on the previous defines.  */
+
+#if defined (HINT_C_TYPE)
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+  struct INTstruct {HINT_C_TYPE high, low;};
+#else
+  struct INTstruct {HINT_C_TYPE low, high;};
+#endif
+
+typedef union
+{
+  struct INTstruct s;
+  INT_C_TYPE ll;
+} INTunion;
+#endif
+
+#define FIXED_WIDTH	(FIXED_SIZE * BITS_PER_UNIT) /* in bits.  */
+#define FIXED_C_TYPE1(NAME)	NAME ## type
+#define FIXED_C_TYPE2(NAME)	FIXED_C_TYPE1(NAME)
+#define FIXED_C_TYPE	FIXED_C_TYPE2(MODE_NAME)
+#define FBITS1(NAME)	__ ## NAME ## _FBIT__
+#define FBITS2(NAME)	FBITS1(NAME)
+#define FBITS		FBITS2(MODE_NAME)
+#define IBITS1(NAME)	__ ## NAME ## _IBIT__
+#define IBITS2(NAME)	IBITS1(NAME)
+#define IBITS		IBITS2(MODE_NAME)
+#define I_F_BITS	(FBITS + IBITS)
+
+#define FIXED_OP(OP,MODE,NUM)	OP ## MODE ## NUM
+
+#define FIXED_SATURATE1_TEMP(NAME)	FIXED_OP(__saturate1,NAME,)
+#define FIXED_SATURATE2_TEMP(NAME)	FIXED_OP(__saturate2,NAME,)
+#define FIXED_MULHELPER_TEMP(NAME)	FIXED_OP(__mulhelper,NAME,)
+#define FIXED_DIVHELPER_TEMP(NAME)	FIXED_OP(__divhelper,NAME,)
+#define FIXED_ASHLHELPER_TEMP(NAME)	FIXED_OP(__ashlhelper,NAME,)
+#define FIXED_ADD_TEMP(NAME)	FIXED_OP(__add,NAME,3)
+#define FIXED_SSADD_TEMP(NAME)	FIXED_OP(__ssadd,NAME,3)
+#define FIXED_USADD_TEMP(NAME)	FIXED_OP(__usadd,NAME,3)
+#define FIXED_SUB_TEMP(NAME)	FIXED_OP(__sub,NAME,3)
+#define FIXED_SSSUB_TEMP(NAME)	FIXED_OP(__sssub,NAME,3)
+#define FIXED_USSUB_TEMP(NAME)	FIXED_OP(__ussub,NAME,3)
+#define FIXED_MUL_TEMP(NAME)	FIXED_OP(__mul,NAME,3)
+#define FIXED_SSMUL_TEMP(NAME)	FIXED_OP(__ssmul,NAME,3)
+#define FIXED_USMUL_TEMP(NAME)	FIXED_OP(__usmul,NAME,3)
+#define FIXED_DIV_TEMP(NAME)	FIXED_OP(__div,NAME,3)
+#define FIXED_UDIV_TEMP(NAME)	FIXED_OP(__udiv,NAME,3)
+#define FIXED_SSDIV_TEMP(NAME)	FIXED_OP(__ssdiv,NAME,3)
+#define FIXED_USDIV_TEMP(NAME)	FIXED_OP(__usdiv,NAME,3)
+#define FIXED_NEG_TEMP(NAME)	FIXED_OP(__neg,NAME,2)
+#define FIXED_SSNEG_TEMP(NAME)	FIXED_OP(__ssneg,NAME,2)
+#define FIXED_USNEG_TEMP(NAME)	FIXED_OP(__usneg,NAME,2)
+#define FIXED_ASHL_TEMP(NAME)	FIXED_OP(__ashl,NAME,3)
+#define FIXED_ASHR_TEMP(NAME)	FIXED_OP(__ashr,NAME,3)
+#define FIXED_LSHR_TEMP(NAME)	FIXED_OP(__lshr,NAME,3)
+#define FIXED_SSASHL_TEMP(NAME)	FIXED_OP(__ssashl,NAME,3)
+#define FIXED_USASHL_TEMP(NAME)	FIXED_OP(__usashl,NAME,3)
+#define FIXED_CMP_TEMP(NAME)	FIXED_OP(__cmp,NAME,2)
+
+#if defined (MODE_NAME)
+#if defined (DINT_C_TYPE)
+#define FIXED_SATURATE1	FIXED_SATURATE1_TEMP(MODE_NAME_S)
+#else
+#define FIXED_SATURATE2	FIXED_SATURATE2_TEMP(MODE_NAME_S)
+#endif
+#define FIXED_MULHELPER	FIXED_MULHELPER_TEMP(MODE_NAME_S)
+#define FIXED_DIVHELPER	FIXED_DIVHELPER_TEMP(MODE_NAME_S)
+#define FIXED_ASHLHELPER	FIXED_ASHLHELPER_TEMP(MODE_NAME_S)
+#define FIXED_ADD	FIXED_ADD_TEMP(MODE_NAME_S)
+#define FIXED_SUB	FIXED_SUB_TEMP(MODE_NAME_S)
+#define FIXED_MUL	FIXED_MUL_TEMP(MODE_NAME_S)
+#define FIXED_NEG	FIXED_NEG_TEMP(MODE_NAME_S)
+#define FIXED_ASHL	FIXED_ASHL_TEMP(MODE_NAME_S)
+#define FIXED_CMP	FIXED_CMP_TEMP(MODE_NAME_S)
+
+/* The following functions are for all fixed-point modes.  */
+#if defined (DINT_C_TYPE)
+extern void FIXED_SATURATE1 (DINT_C_TYPE *);
+#else
+extern void FIXED_SATURATE2 (INT_C_TYPE *, INT_C_TYPE *);
+#endif
+extern FIXED_C_TYPE FIXED_MULHELPER (FIXED_C_TYPE, FIXED_C_TYPE, word_type);
+extern FIXED_C_TYPE FIXED_DIVHELPER (FIXED_C_TYPE, FIXED_C_TYPE, word_type);
+extern FIXED_C_TYPE FIXED_ASHLHELPER (FIXED_C_TYPE, word_type, word_type);
+extern FIXED_C_TYPE FIXED_ADD (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_SUB (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_MUL (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_NEG (FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_ASHL (FIXED_C_TYPE, word_type);
+extern word_type FIXED_CMP (FIXED_C_TYPE, FIXED_C_TYPE);
+#endif
+
+#if MODE_UNSIGNED == 0 /* Signed types.  */
+#define PADDING_BITS	(FIXED_WIDTH - 1 - I_F_BITS)
+#define NONPADDING_BITS	(1 + I_F_BITS)
+
+#if defined (MODE_NAME)
+#define FIXED_DIV	FIXED_DIV_TEMP(MODE_NAME_S)
+#define FIXED_ASHR	FIXED_ASHR_TEMP(MODE_NAME_S)
+#define FIXED_SSADD	FIXED_SSADD_TEMP(MODE_NAME_S)
+#define FIXED_SSSUB	FIXED_SSSUB_TEMP(MODE_NAME_S)
+#define FIXED_SSMUL	FIXED_SSMUL_TEMP(MODE_NAME_S)
+#define FIXED_SSDIV	FIXED_SSDIV_TEMP(MODE_NAME_S)
+#define FIXED_SSNEG	FIXED_SSNEG_TEMP(MODE_NAME_S)
+#define FIXED_SSASHL	FIXED_SSASHL_TEMP(MODE_NAME_S)
+
+/* The following functions are for signed fixed-point modes.  */
+extern FIXED_C_TYPE FIXED_DIV (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_ASHR (FIXED_C_TYPE, word_type);
+extern FIXED_C_TYPE FIXED_SSADD (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_SSSUB (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_SSMUL (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_SSDIV (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_SSNEG (FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_SSASHL (FIXED_C_TYPE, word_type);
+#endif
+
+#else /* Unsigned types.  */
+#define PADDING_BITS	(FIXED_WIDTH - I_F_BITS)
+#define NONPADDING_BITS	(I_F_BITS)
+
+#if defined (MODE_NAME)
+#define FIXED_UDIV	FIXED_UDIV_TEMP(MODE_NAME_S)
+#define FIXED_LSHR	FIXED_LSHR_TEMP(MODE_NAME_S)
+#define FIXED_USDIV	FIXED_USDIV_TEMP(MODE_NAME_S)
+#define FIXED_USADD	FIXED_USADD_TEMP(MODE_NAME_S)
+#define FIXED_USSUB	FIXED_USSUB_TEMP(MODE_NAME_S)
+#define FIXED_USMUL	FIXED_USMUL_TEMP(MODE_NAME_S)
+#define FIXED_USNEG	FIXED_USNEG_TEMP(MODE_NAME_S)
+#define FIXED_USASHL	FIXED_USASHL_TEMP(MODE_NAME_S)
+
+/* The following functions are for unsigned fixed-point modes.  */
+extern FIXED_C_TYPE FIXED_UDIV (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_LSHR (FIXED_C_TYPE, word_type);
+extern FIXED_C_TYPE FIXED_USADD (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_USSUB (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_USMUL (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_USDIV (FIXED_C_TYPE, FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_USNEG (FIXED_C_TYPE);
+extern FIXED_C_TYPE FIXED_USASHL (FIXED_C_TYPE, word_type);
+#endif
+
+#endif /* End of testing MODE_UNSIGNED.  */
+
+/* This define is to check if this mode have any padding bits.  */
+#define HAVE_PADDING_BITS	(PADDING_BITS > 0)
+
+/* ------------------------------------------------------------------------ */
+/* The following defines are for conversions.  */
+
+#if defined (FROM_QI) && HAVE_QI == 1
+#define FROM_TYPE		1	/* Signed integer.  */
+#define FROM_INT_C_TYPE		QItype
+#define FROM_SINT_C_TYPE	QItype
+#define FROM_UINT_C_TYPE	UQItype
+#define FROM_MODE_NAME_S	qi
+#define FROM_INT_SIZE		1	/* in bytes.  */
+
+#elif defined (FROM_HI) && HAVE_HI == 1
+#define FROM_TYPE		1	/* Signed integer.  */
+#define FROM_INT_C_TYPE		HItype
+#define FROM_SINT_C_TYPE	HItype
+#define FROM_UINT_C_TYPE	UHItype
+#define FROM_MODE_NAME_S	hi
+#define FROM_INT_SIZE		2	/* in bytes.  */
+
+#elif defined (FROM_SI) && HAVE_SI == 1
+#define FROM_TYPE		1	/* Signed integer.  */
+#define FROM_INT_C_TYPE		SItype
+#define FROM_SINT_C_TYPE	SItype
+#define FROM_UINT_C_TYPE	USItype
+#define FROM_MODE_NAME_S	si
+#define FROM_INT_SIZE		4	/* in bytes.  */
+
+#elif defined (FROM_DI) && HAVE_DI == 1
+#define FROM_TYPE		1	/* Signed integer.  */
+#define FROM_INT_C_TYPE		DItype
+#define FROM_SINT_C_TYPE	DItype
+#define FROM_UINT_C_TYPE	UDItype
+#define FROM_MODE_NAME_S	di
+#define FROM_INT_SIZE		8	/* in bytes.  */
+
+#elif defined (FROM_TI) && HAVE_TI == 1
+#define FROM_TYPE		1	/* Signed integer.  */
+#define FROM_INT_C_TYPE		TItype
+#define FROM_SINT_C_TYPE	TItype
+#define FROM_UINT_C_TYPE	UTItype
+#define FROM_MODE_NAME_S	ti
+#define FROM_INT_SIZE		16	/* in bytes.  */
+
+#elif defined (FROM_UQI) && HAVE_UQI == 1
+#define FROM_TYPE		2	/* Unsigned integer.  */
+#define FROM_INT_C_TYPE		QItype
+#define FROM_SINT_C_TYPE	QItype
+#define FROM_UINT_C_TYPE	UQItype
+#define FROM_MODE_NAME_S	qi
+#define FROM_INT_SIZE		1	/* in bytes.  */
+
+#elif defined (FROM_UHI) && HAVE_UHI == 1
+#define FROM_TYPE		2	/* Unsigned integer.  */
+#define FROM_INT_C_TYPE		UHItype
+#define FROM_SINT_C_TYPE	HItype
+#define FROM_UINT_C_TYPE	UHItype
+#define FROM_MODE_NAME_S	hi
+#define FROM_INT_SIZE		2	/* in bytes.  */
+
+#elif defined (FROM_USI) && HAVE_USI == 1
+#define FROM_TYPE		2	/* Unsigned integer.  */
+#define FROM_INT_C_TYPE		USItype
+#define FROM_SINT_C_TYPE	SItype
+#define FROM_UINT_C_TYPE	USItype
+#define FROM_MODE_NAME_S	si
+#define FROM_INT_SIZE		4	/* in bytes.  */
+
+#elif defined (FROM_UDI) && HAVE_UDI == 1
+#define FROM_TYPE		2	/* Unsigned integer.  */
+#define FROM_INT_C_TYPE		UDItype
+#define FROM_SINT_C_TYPE	DItype
+#define FROM_UINT_C_TYPE	UDItype
+#define FROM_MODE_NAME_S	di
+#define FROM_INT_SIZE		8	/* in bytes.  */
+
+#elif defined (FROM_UTI) && HAVE_UTI == 1
+#define FROM_TYPE		2	/* Unsigned integer.  */
+#define FROM_INT_C_TYPE		UTItype
+#define FROM_SINT_C_TYPE	TItype
+#define FROM_UINT_C_TYPE	UTItype
+#define FROM_MODE_NAME_S	ti
+#define FROM_INT_SIZE		16	/* in bytes.  */
+
+#elif defined (FROM_SF) && HAVE_SF == 1
+#define FROM_TYPE		3	/* Floating-point.  */
+#define FROM_FLOAT_C_TYPE	SFtype
+#define FROM_MODE_NAME_S	sf
+
+#elif defined (FROM_DF) && HAVE_DF == 1
+#define FROM_TYPE		3	/* Floating-point.  */
+#define FROM_FLOAT_C_TYPE	DFtype
+#define FROM_MODE_NAME_S	df
+
+#elif defined (FROM_QQ) && HAVE_QQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		QQ
+#define FROM_MODE_NAME_S	qq
+#define FROM_INT_C_TYPE		QItype
+#define FROM_SINT_C_TYPE	QItype
+#define FROM_UINT_C_TYPE	UQItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		1	/* in bytes.  */
+
+#elif defined (FROM_HQ) && HAVE_HQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		HQ
+#define FROM_MODE_NAME_S	hq
+#define FROM_INT_C_TYPE		HItype
+#define FROM_SINT_C_TYPE	HItype
+#define FROM_UINT_C_TYPE	UHItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (FROM_SQ) && HAVE_SQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		SQ
+#define FROM_MODE_NAME_S	sq
+#define FROM_INT_C_TYPE		SItype
+#define FROM_SINT_C_TYPE	SItype
+#define FROM_UINT_C_TYPE	USItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (FROM_DQ) && HAVE_DQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		DQ
+#define FROM_MODE_NAME_S	dq
+#define FROM_INT_C_TYPE		DItype
+#define FROM_SINT_C_TYPE	DItype
+#define FROM_UINT_C_TYPE	UDItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (FROM_TQ) && HAVE_TQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		TQ
+#define FROM_MODE_NAME_S	tq
+#define FROM_INT_C_TYPE		TItype
+#define FROM_SINT_C_TYPE	TItype
+#define FROM_UINT_C_TYPE	UTItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		16	/* in bytes.  */
+
+#elif defined (FROM_UQQ) && HAVE_UQQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UQQ
+#define FROM_MODE_NAME_S	uqq
+#define FROM_INT_C_TYPE		UQItype
+#define FROM_SINT_C_TYPE	QItype
+#define FROM_UINT_C_TYPE	UQItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		1	/* in bytes.  */
+
+#elif defined (FROM_UHQ) && HAVE_UHQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UHQ
+#define FROM_MODE_NAME_S	uhq
+#define FROM_INT_C_TYPE		UHItype
+#define FROM_SINT_C_TYPE	HItype
+#define FROM_UINT_C_TYPE	UHItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (FROM_USQ) && HAVE_USQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		USQ
+#define FROM_MODE_NAME_S	usq
+#define FROM_INT_C_TYPE		USItype
+#define FROM_SINT_C_TYPE	SItype
+#define FROM_UINT_C_TYPE	USItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (FROM_UDQ) && HAVE_UDQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UDQ
+#define FROM_MODE_NAME_S	udq
+#define FROM_INT_C_TYPE		UDItype
+#define FROM_SINT_C_TYPE	DItype
+#define FROM_UINT_C_TYPE	UDItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (FROM_UTQ) && HAVE_UTQ == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UTQ
+#define FROM_MODE_NAME_S	utq
+#define FROM_INT_C_TYPE		UTItype
+#define FROM_SINT_C_TYPE	TItype
+#define FROM_UINT_C_TYPE	UTItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		16	/* in bytes.  */
+
+#elif defined (FROM_HA) && HAVE_HA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		HA
+#define FROM_MODE_NAME_S	ha
+#define FROM_INT_C_TYPE		HItype
+#define FROM_SINT_C_TYPE	HItype
+#define FROM_UINT_C_TYPE	UHItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (FROM_SA) && HAVE_SA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		SA
+#define FROM_MODE_NAME_S	sa
+#define FROM_INT_C_TYPE		SItype
+#define FROM_SINT_C_TYPE	SItype
+#define FROM_UINT_C_TYPE	USItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (FROM_DA) && HAVE_DA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		DA
+#define FROM_MODE_NAME_S	da
+#define FROM_INT_C_TYPE		DItype
+#define FROM_SINT_C_TYPE	DItype
+#define FROM_UINT_C_TYPE	UDItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (FROM_TA) && HAVE_TA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		TA
+#define FROM_MODE_NAME_S	ta
+#define FROM_INT_C_TYPE		TItype
+#define FROM_SINT_C_TYPE	TItype
+#define FROM_UINT_C_TYPE	UTItype
+#define FROM_MODE_UNSIGNED	0
+#define FROM_FIXED_SIZE		16	/* in bytes.  */
+
+#elif defined (FROM_UHA) && HAVE_UHA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UHA
+#define FROM_MODE_NAME_S	uha
+#define FROM_INT_C_TYPE		UHItype
+#define FROM_SINT_C_TYPE	HItype
+#define FROM_UINT_C_TYPE	UHItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (FROM_USA) && HAVE_USA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		USA
+#define FROM_MODE_NAME_S	usa
+#define FROM_INT_C_TYPE		USItype
+#define FROM_SINT_C_TYPE	SItype
+#define FROM_UINT_C_TYPE	USItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (FROM_UDA) && HAVE_UDA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UDA
+#define FROM_MODE_NAME_S	uda
+#define FROM_INT_C_TYPE		UDItype
+#define FROM_SINT_C_TYPE	DItype
+#define FROM_UINT_C_TYPE	UDItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (FROM_UTA) && HAVE_UTA == 1
+#define FROM_TYPE		4	/* Fixed-point.  */
+#define FROM_MODE_NAME		UTA
+#define FROM_MODE_NAME_S	uta
+#define FROM_INT_C_TYPE		UTItype
+#define FROM_SINT_C_TYPE	TItype
+#define FROM_UINT_C_TYPE	UTItype
+#define FROM_MODE_UNSIGNED	1
+#define FROM_FIXED_SIZE		16	/* in bytes.  */
+
+#endif
+
+#if defined (TO_QI) && HAVE_QI == 1 && !defined (FROM_QI)
+#define TO_TYPE			1	/* Signed integer.  */
+#define TO_INT_C_TYPE		QItype
+#define TO_SINT_C_TYPE		QItype
+#define TO_UINT_C_TYPE		UQItype
+#define TO_MODE_NAME_S		qi
+
+#elif defined (TO_HI) && HAVE_HI == 1 && !defined (FROM_HI)
+#define TO_TYPE			1	/* Signed integer.  */
+#define TO_INT_C_TYPE		HItype
+#define TO_SINT_C_TYPE		HItype
+#define TO_UINT_C_TYPE		UHItype
+#define TO_MODE_NAME_S		hi
+
+#elif defined (TO_SI) && HAVE_SI == 1 && !defined (FROM_SI)
+#define TO_TYPE			1	/* Signed integer.  */
+#define TO_INT_C_TYPE		SItype
+#define TO_SINT_C_TYPE		SItype
+#define TO_UINT_C_TYPE		USItype
+#define TO_MODE_NAME_S		si
+
+#elif defined (TO_DI) && HAVE_DI == 1 && !defined (FROM_DI)
+#define TO_TYPE			1	/* Signed integer.  */
+#define TO_INT_C_TYPE		DItype
+#define TO_SINT_C_TYPE		DItype
+#define TO_UINT_C_TYPE		UDItype
+#define TO_MODE_NAME_S		di
+
+#elif defined (TO_TI) && HAVE_TI == 1 && !defined (FROM_TI)
+#define TO_TYPE			1	/* Signed integer.  */
+#define TO_INT_C_TYPE		TItype
+#define TO_SINT_C_TYPE		TItype
+#define TO_UINT_C_TYPE		UTItype
+#define TO_MODE_NAME_S		ti
+
+#elif defined (TO_UQI) && HAVE_UQI == 1 && !defined (FROM_UQI)
+#define TO_TYPE			2	/* Unsigned integer.  */
+#define TO_INT_C_TYPE		UQItype
+#define TO_SINT_C_TYPE		QItype
+#define TO_UINT_C_TYPE		UQItype
+#define TO_MODE_NAME_S		qi
+
+#elif defined (TO_UHI) && HAVE_UHI == 1 && !defined (FROM_UHI)
+#define TO_TYPE			2	/* Unsigned integer.  */
+#define TO_INT_C_TYPE		UHItype
+#define TO_SINT_C_TYPE		HItype
+#define TO_UINT_C_TYPE		UHItype
+#define TO_MODE_NAME_S		hi
+
+#elif defined (TO_USI) && HAVE_USI == 1 && !defined (FROM_USI)
+#define TO_TYPE			2	/* Unsigned integer.  */
+#define TO_INT_C_TYPE		USItype
+#define TO_SINT_C_TYPE		SItype
+#define TO_UINT_C_TYPE		USItype
+#define TO_MODE_NAME_S		si
+
+#elif defined (TO_UDI) && HAVE_UDI == 1 && !defined (FROM_UDI)
+#define TO_TYPE			2	/* Unsigned integer.  */
+#define TO_INT_C_TYPE		UDItype
+#define TO_SINT_C_TYPE		DItype
+#define TO_UINT_C_TYPE		UDItype
+#define TO_MODE_NAME_S		di
+
+#elif defined (TO_UTI) && HAVE_UTI == 1 && !defined (FROM_UTI)
+#define TO_TYPE			2	/* Unsigned integer.  */
+#define TO_INT_C_TYPE		UTItype
+#define TO_SINT_C_TYPE		TItype
+#define TO_UINT_C_TYPE		UTItype
+#define TO_MODE_NAME_S		ti
+
+#elif defined (TO_SF) && HAVE_SF == 1 && !defined (FROM_SF)
+#define TO_TYPE			3	/* Floating-point.  */
+#define TO_FLOAT_C_TYPE		SFtype
+#define TO_MODE_NAME_S		sf
+
+#elif defined (TO_DF) && HAVE_DF == 1 && !defined (FROM_DF)
+#define TO_TYPE			3	/* Floating-point.  */
+#define TO_FLOAT_C_TYPE		DFtype
+#define TO_MODE_NAME_S		df
+
+#elif defined (TO_QQ) && HAVE_QQ == 1 && !defined (FROM_QQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		QQ
+#define TO_MODE_NAME_S		qq
+#define TO_INT_C_TYPE		QItype
+#define TO_SINT_C_TYPE		QItype
+#define TO_UINT_C_TYPE		UQItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		1	/* in bytes.  */
+
+#elif defined (TO_HQ) && HAVE_HQ == 1 && !defined (FROM_HQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		HQ
+#define TO_MODE_NAME_S		hq
+#define TO_INT_C_TYPE		HItype
+#define TO_SINT_C_TYPE		HItype
+#define TO_UINT_C_TYPE		UHItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (TO_SQ) && HAVE_SQ == 1 && !defined (FROM_SQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		SQ
+#define TO_MODE_NAME_S		sq
+#define TO_INT_C_TYPE		SItype
+#define TO_SINT_C_TYPE		SItype
+#define TO_UINT_C_TYPE		USItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (TO_DQ) && HAVE_DQ == 1 && !defined (FROM_DQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		DQ
+#define TO_MODE_NAME_S		dq
+#define TO_INT_C_TYPE		DItype
+#define TO_SINT_C_TYPE		DItype
+#define TO_UINT_C_TYPE		UDItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (TO_TQ) && HAVE_TQ == 1 && !defined (FROM_TQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		TQ
+#define TO_MODE_NAME_S		tq
+#define TO_INT_C_TYPE		TItype
+#define TO_SINT_C_TYPE		TItype
+#define TO_UINT_C_TYPE		UTItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		16	/* in bytes.  */
+
+#elif defined (TO_UQQ) && HAVE_UQQ == 1 && !defined (FROM_UQQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UQQ
+#define TO_MODE_NAME_S		uqq
+#define TO_INT_C_TYPE		UQItype
+#define TO_SINT_C_TYPE		QItype
+#define TO_UINT_C_TYPE		UQItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		1	/* in bytes.  */
+
+#elif defined (TO_UHQ) && HAVE_UHQ == 1 && !defined (FROM_UHQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UHQ
+#define TO_MODE_NAME_S		uhq
+#define TO_INT_C_TYPE		UHItype
+#define TO_SINT_C_TYPE		HItype
+#define TO_UINT_C_TYPE		UHItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (TO_USQ) && HAVE_USQ == 1 && !defined (FROM_USQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		USQ
+#define TO_MODE_NAME_S		usq
+#define TO_INT_C_TYPE		USItype
+#define TO_SINT_C_TYPE		SItype
+#define TO_UINT_C_TYPE		USItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (TO_UDQ) && HAVE_UDQ == 1 && !defined (FROM_UDQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UDQ
+#define TO_MODE_NAME_S		udq
+#define TO_INT_C_TYPE		UDItype
+#define TO_SINT_C_TYPE		DItype
+#define TO_UINT_C_TYPE		UDItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (TO_UTQ) && HAVE_UTQ == 1 && !defined (FROM_UTQ)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UTQ
+#define TO_MODE_NAME_S		utq
+#define TO_INT_C_TYPE		UTItype
+#define TO_SINT_C_TYPE		TItype
+#define TO_UINT_C_TYPE		UTItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		16	/* in bytes.  */
+
+#elif defined (TO_HA) && HAVE_HA == 1 && !defined (FROM_HA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		HA
+#define TO_MODE_NAME_S		ha
+#define TO_INT_C_TYPE		HItype
+#define TO_SINT_C_TYPE		HItype
+#define TO_UINT_C_TYPE		UHItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (TO_SA) && HAVE_SA == 1 && !defined (FROM_SA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		SA
+#define TO_MODE_NAME_S		sa
+#define TO_INT_C_TYPE		SItype
+#define TO_SINT_C_TYPE		SItype
+#define TO_UINT_C_TYPE		USItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (TO_DA) && HAVE_DA == 1 && !defined (FROM_DA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		DA
+#define TO_MODE_NAME_S		da
+#define TO_INT_C_TYPE		DItype
+#define TO_SINT_C_TYPE		DItype
+#define TO_UINT_C_TYPE		UDItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (TO_TA) && HAVE_TA == 1 && !defined (FROM_TA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		TA
+#define TO_MODE_NAME_S		ta
+#define TO_INT_C_TYPE		TItype
+#define TO_SINT_C_TYPE		TItype
+#define TO_UINT_C_TYPE		UTItype
+#define TO_MODE_UNSIGNED	0
+#define TO_FIXED_SIZE		16	/* in bytes.  */
+
+#elif defined (TO_UHA) && HAVE_UHA == 1 && !defined (FROM_UHA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UHA
+#define TO_MODE_NAME_S		uha
+#define TO_INT_C_TYPE		UHItype
+#define TO_SINT_C_TYPE		HItype
+#define TO_UINT_C_TYPE		UHItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		2	/* in bytes.  */
+
+#elif defined (TO_USA) && HAVE_USA == 1 && !defined (FROM_USA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		USA
+#define TO_MODE_NAME_S		usa
+#define TO_INT_C_TYPE		USItype
+#define TO_SINT_C_TYPE		SItype
+#define TO_UINT_C_TYPE		USItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		4	/* in bytes.  */
+
+#elif defined (TO_UDA) && HAVE_UDA == 1 && !defined (FROM_UDA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UDA
+#define TO_MODE_NAME_S		uda
+#define TO_INT_C_TYPE		UDItype
+#define TO_SINT_C_TYPE		DItype
+#define TO_UINT_C_TYPE		UDItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		8	/* in bytes.  */
+
+#elif defined (TO_UTA) && HAVE_UTA == 1 && !defined (FROM_UTA)
+#define TO_TYPE			4	/* Fixed-point.  */
+#define TO_MODE_NAME		UTA
+#define TO_MODE_NAME_S		uta
+#define TO_INT_C_TYPE		UTItype
+#define TO_SINT_C_TYPE		TItype
+#define TO_UINT_C_TYPE		UTItype
+#define TO_MODE_UNSIGNED	1
+#define TO_FIXED_SIZE		16	/* in bytes.  */
+
+#endif
+
+#if defined (FROM_MODE_NAME_S) && defined (TO_MODE_NAME_S)
+
+#if FROM_TYPE == 1	/* Signed integer.  */
+#define FROM_INT_WIDTH		(FROM_INT_SIZE * BITS_PER_UNIT)
+#endif
+
+#if FROM_TYPE == 2	/* Unsigned integer.  */
+#define FROM_INT_WIDTH		(FROM_INT_SIZE * BITS_PER_UNIT)
+#endif
+
+#if FROM_TYPE == 4	/* Fixed-point.  */
+#define FROM_FIXED_C_TYPE	FIXED_C_TYPE2(FROM_MODE_NAME)
+#define FROM_FBITS		FBITS2(FROM_MODE_NAME)
+#define FROM_FIXED_WIDTH	(FROM_FIXED_SIZE * BITS_PER_UNIT)
+#define FROM_FBITS		FBITS2(FROM_MODE_NAME)
+#define FROM_IBITS		IBITS2(FROM_MODE_NAME)
+#define FROM_I_F_BITS		(FROM_FBITS + FROM_IBITS)
+
+#if FROM_MODE_UNSIGNED == 0 /* Signed types.  */
+#define FROM_PADDING_BITS	(FROM_FIXED_WIDTH - 1 - FROM_I_F_BITS)
+#define FROM_NONPADDING_BITS	(1 + FROM_I_F_BITS)
+#else /* Unsigned types.  */
+#define FROM_PADDING_BITS	(FROM_FIXED_WIDTH - FROM_I_F_BITS)
+#define FROM_NONPADDING_BITS	(FROM_I_F_BITS)
+#endif
+#define FROM_HAVE_PADDING_BITS	(FROM_PADDING_BITS > 0)
+#endif /* FROM_TYPE == 4  */
+
+#if TO_TYPE == 4	/* Fixed-point.  */
+#define TO_FIXED_C_TYPE		FIXED_C_TYPE2(TO_MODE_NAME)
+#define TO_FBITS		FBITS2(TO_MODE_NAME)
+#define TO_FIXED_WIDTH		(TO_FIXED_SIZE * BITS_PER_UNIT)
+#define TO_FBITS		FBITS2(TO_MODE_NAME)
+#define TO_IBITS		IBITS2(TO_MODE_NAME)
+#define TO_I_F_BITS		(TO_FBITS + TO_IBITS)
+
+#if TO_MODE_UNSIGNED == 0 /* Signed types.  */
+#define TO_PADDING_BITS		(TO_FIXED_WIDTH - 1 - TO_I_F_BITS)
+#define TO_NONPADDING_BITS	(1 + TO_I_F_BITS)
+#else /* Unsigned types.  */
+#define TO_PADDING_BITS		(TO_FIXED_WIDTH - TO_I_F_BITS)
+#define TO_NONPADDING_BITS	(TO_I_F_BITS)
+#endif
+#define TO_HAVE_PADDING_BITS	(TO_PADDING_BITS > 0)
+#endif /* TO_TYPE == 4  */
+
+#define FIXED_CONVERT_OP(OP,FROM,TO)	OP ## FROM ## TO
+#define FIXED_CONVERT_OP2(OP,FROM,TO)	OP ## FROM ## TO ## 2
+#define FRACT_TEMP(N1,N2)		FIXED_CONVERT_OP(__fract,N1,N2)
+#define FRACT2_TEMP(N1,N2)		FIXED_CONVERT_OP2(__fract,N1,N2)
+#define SATFRACT_TEMP(N1,N2)		FIXED_CONVERT_OP(__satfract,N1,N2)
+#define SATFRACT2_TEMP(N1,N2)		FIXED_CONVERT_OP2(__satfract,N1,N2)
+#define FRACTUNS_TEMP(N1,N2)		FIXED_CONVERT_OP(__fractuns,N1,N2)
+#define SATFRACTUNS_TEMP(N1,N2)		FIXED_CONVERT_OP(__satfractuns,N1,N2)
+
+/* Define conversions from fixed-point to fixed-point.  */
+#if FROM_TYPE == 4 && TO_TYPE == 4
+
+#if FROM_FIXED_SIZE > TO_FIXED_SIZE
+#define BIG_SINT_C_TYPE	FROM_SINT_C_TYPE
+#define BIG_UINT_C_TYPE	FROM_UINT_C_TYPE
+#define BIG_WIDTH	FROM_FIXED_WIDTH
+#else
+#define BIG_SINT_C_TYPE	TO_SINT_C_TYPE
+#define BIG_UINT_C_TYPE	TO_UINT_C_TYPE
+#define BIG_WIDTH	TO_FIXED_WIDTH
+#endif
+
+/* Check if FROM* and TO* are in the same machine class.  */
+#if ((FROM_MODE_UNSIGNED == TO_MODE_UNSIGNED) \
+     && ((FROM_IBITS == 0) == (TO_IBITS == 0)))
+/* Same modes: append '2' to conversion function names */
+#define FRACT		FRACT2_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#define SATFRACT	SATFRACT2_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#else
+/* Different modes: don't append '2' to conversion function names */
+#define FRACT		FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#define SATFRACT	SATFRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#endif
+
+extern TO_FIXED_C_TYPE FRACT (FROM_FIXED_C_TYPE);
+extern TO_FIXED_C_TYPE SATFRACT (FROM_FIXED_C_TYPE);
+#endif /* FROM_TYPE == 4 && TO_TYPE == 4  */
+
+/* Define conversions from fixed-point to signed integer.  */
+#if FROM_TYPE == 4 && TO_TYPE == 1
+#define FRACT		FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+extern TO_INT_C_TYPE	FRACT (FROM_FIXED_C_TYPE);
+#endif /* FROM_TYPE == 4 && TO_TYPE == 1  */
+
+/* Define conversions from fixed-point to unsigned integer.  */
+#if FROM_TYPE == 4 && TO_TYPE == 2
+#define FRACTUNS	FRACTUNS_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+extern TO_INT_C_TYPE 	FRACTUNS (FROM_FIXED_C_TYPE);
+#endif /* FROM_TYPE == 4 && TO_TYPE == 2  */
+
+/* Define conversions from fixed-point to floating-point.  */
+#if FROM_TYPE == 4 && TO_TYPE == 3
+#define BASE1(NUM)	0x1.0p ## NUM
+#define BASE2(NUM)	BASE1(NUM)
+#define BASE		BASE2(FROM_FBITS)
+#define FRACT		FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+extern TO_FLOAT_C_TYPE	FRACT (FROM_FIXED_C_TYPE);
+#endif /* FROM_TYPE == 4 && TO_TYPE == 3  */
+
+/* Define conversions from signed integer to fixed-point.  */
+#if FROM_TYPE == 1 && TO_TYPE == 4
+
+#if FROM_INT_SIZE > TO_FIXED_SIZE
+#define BIG_SINT_C_TYPE	FROM_SINT_C_TYPE
+#define BIG_UINT_C_TYPE	FROM_UINT_C_TYPE
+#define BIG_WIDTH	FROM_INT_WIDTH
+#else
+#define BIG_SINT_C_TYPE	TO_SINT_C_TYPE
+#define BIG_UINT_C_TYPE	TO_UINT_C_TYPE
+#define BIG_WIDTH	TO_FIXED_WIDTH
+#endif
+
+#define FRACT		FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#define SATFRACT	SATFRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+extern TO_FIXED_C_TYPE	FRACT (FROM_INT_C_TYPE);
+extern TO_FIXED_C_TYPE	SATFRACT (FROM_INT_C_TYPE);
+#endif /* FROM_TYPE == 1 && TO_TYPE == 4  */
+
+/* Define conversions from unsigned integer to fixed-point.  */
+#if FROM_TYPE == 2 && TO_TYPE == 4
+
+#if FROM_INT_SIZE > TO_FIXED_SIZE
+#define BIG_SINT_C_TYPE	FROM_SINT_C_TYPE
+#define BIG_UINT_C_TYPE	FROM_UINT_C_TYPE
+#define BIG_WIDTH	FROM_INT_WIDTH
+#else
+#define BIG_SINT_C_TYPE	TO_SINT_C_TYPE
+#define BIG_UINT_C_TYPE	TO_UINT_C_TYPE
+#define BIG_WIDTH	TO_FIXED_WIDTH
+#endif
+
+#define FRACTUNS	FRACTUNS_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#define SATFRACTUNS	SATFRACTUNS_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+extern TO_FIXED_C_TYPE	FRACTUNS (FROM_INT_C_TYPE);
+extern TO_FIXED_C_TYPE	SATFRACTUNS (FROM_INT_C_TYPE);
+#endif /* FROM_TYPE == 2 && TO_TYPE == 4  */
+
+/* Define conversions from floating-point to fixed-point.  */
+#if FROM_TYPE == 3 && TO_TYPE == 4
+
+#define BASE1(NUM)	(0x1.0p ## NUM)
+#define BASE2(NUM)	BASE1(NUM)
+#define BASE		BASE2(TO_FBITS)
+
+#define FIXED_MAX1(NUM1,NUM2)	(0x1.0p ## NUM1 - 0x1.0p- ## NUM2)
+#define FIXED_MAX2(NUM1,NUM2)	FIXED_MAX1(NUM1,NUM2)
+#define FIXED_MAX	FIXED_MAX2(TO_IBITS,TO_FBITS)
+
+#define FIXED_MIN1(NUM)	(-0x1.0p ## NUM)
+#define FIXED_MIN2(NUM)	FIXED_MIN1(NUM)
+#if TO_MODE_UNSIGNED == 0
+#define FIXED_MIN	FIXED_MIN2(TO_IBITS)
+#else
+#define FIXED_MIN	0.0
+#endif
+
+#define FRACT		FRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+#define SATFRACT	SATFRACT_TEMP(FROM_MODE_NAME_S,TO_MODE_NAME_S)
+extern TO_FIXED_C_TYPE	FRACT (FROM_FLOAT_C_TYPE);
+extern TO_FIXED_C_TYPE	SATFRACT (FROM_FLOAT_C_TYPE);
+#endif /* FROM_TYPE == 3 && TO_TYPE == 4  */
+
+#endif /* defined (FROM_MODE_NAME_S) && defined (TO_MODE_NAME_S)  */
+
+#endif  /* _FIXED_BIT_H */
diff --git a/gcc/config/flat.h b/gcc/config/flat.h
new file mode 100644
index 000000000..9c9ae751d
--- /dev/null
+++ b/gcc/config/flat.h
@@ -0,0 +1,22 @@
+/* Defines to be used for targets that support flat executables.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This macro applies on top of OBJECT_FORMAT_ELF and indicates that
+   we want to support both flat and ELF output.  */
+#define OBJECT_FORMAT_FLAT
diff --git a/gcc/config/floatunsidf.c b/gcc/config/floatunsidf.c
new file mode 100644
index 000000000..ff2811250
--- /dev/null
+++ b/gcc/config/floatunsidf.c
@@ -0,0 +1,15 @@
+/* Public domain.  */
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef float DFtype __attribute__ ((mode (DF)));
+
+DFtype
+__floatunsidf (USItype u)
+{
+  SItype s = (SItype) u;
+  DFtype r = (DFtype) s;
+  if (s < 0)
+    r += (DFtype)2.0 * (DFtype) ((USItype) 1
+				 << (sizeof (USItype) * __CHAR_BIT__ - 1));
+  return r;
+}
diff --git a/gcc/config/floatunsisf.c b/gcc/config/floatunsisf.c
new file mode 100644
index 000000000..11d4aa78c
--- /dev/null
+++ b/gcc/config/floatunsisf.c
@@ -0,0 +1,18 @@
+/* Public domain.  */
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef float SFtype __attribute__ ((mode (SF)));
+
+SFtype
+__floatunsisf (USItype u)
+{
+  SItype s = (SItype) u;
+  if (s < 0)
+    {
+      /* As in expand_float, compute (u & 1) | (u >> 1) to ensure
+	 correct rounding if a nonzero bit is shifted out.  */
+      return (SFtype) 2.0 * (SFtype) (SItype) ((u & 1) | (u >> 1));
+    }
+  else
+    return (SFtype) s;
+}
diff --git a/gcc/config/floatunsitf.c b/gcc/config/floatunsitf.c
new file mode 100644
index 000000000..955d67666
--- /dev/null
+++ b/gcc/config/floatunsitf.c
@@ -0,0 +1,15 @@
+/* Public domain.  */
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef float TFtype __attribute__ ((mode (TF)));
+
+TFtype
+__floatunsitf (USItype u)
+{
+  SItype s = (SItype) u;
+  TFtype r = (TFtype) s;
+  if (s < 0)
+    r += (TFtype)2.0 * (TFtype) ((USItype) 1
+				 << (sizeof (USItype) * __CHAR_BIT__ - 1));
+  return r;
+}
diff --git a/gcc/config/floatunsixf.c b/gcc/config/floatunsixf.c
new file mode 100644
index 000000000..52511688d
--- /dev/null
+++ b/gcc/config/floatunsixf.c
@@ -0,0 +1,15 @@
+/* Public domain.  */
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef float XFtype __attribute__ ((mode (XF)));
+
+XFtype
+__floatunsixf (USItype u)
+{
+  SItype s = (SItype) u;
+  XFtype r = (XFtype) s;
+  if (s < 0)
+    r += (XFtype)2.0 * (XFtype) ((USItype) 1
+				 << (sizeof (USItype) * __CHAR_BIT__ - 1));
+  return r;
+}
diff --git a/gcc/config/fp-bit.c b/gcc/config/fp-bit.c
new file mode 100644
index 000000000..82d924ec2
--- /dev/null
+++ b/gcc/config/fp-bit.c
@@ -0,0 +1,1657 @@
+/* This is a software floating point library which can be used
+   for targets without hardware floating point. 
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+   2004, 2005, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This implements IEEE 754 format arithmetic, but does not provide a
+   mechanism for setting the rounding mode, or for generating or handling
+   exceptions.
+
+   The original code by Steve Chamberlain, hacked by Mark Eichin and Jim
+   Wilson, all of Cygnus Support.  */
+
+/* The intended way to use this file is to make two copies, add `#define FLOAT'
+   to one copy, then compile both copies and add them to libgcc.a.  */
+
+#include "tconfig.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "config/fp-bit.h"
+
+/* The following macros can be defined to change the behavior of this file:
+   FLOAT: Implement a `float', aka SFmode, fp library.  If this is not
+     defined, then this file implements a `double', aka DFmode, fp library.
+   FLOAT_ONLY: Used with FLOAT, to implement a `float' only library, i.e.
+     don't include float->double conversion which requires the double library.
+     This is useful only for machines which can't support doubles, e.g. some
+     8-bit processors.
+   CMPtype: Specify the type that floating point compares should return.
+     This defaults to SItype, aka int.
+   _DEBUG_BITFLOAT: This makes debugging the code a little easier, by adding
+     two integers to the FLO_union_type.
+   NO_DENORMALS: Disable handling of denormals.
+   NO_NANS: Disable nan and infinity handling
+   SMALL_MACHINE: Useful when operations on QIs and HIs are faster
+     than on an SI */
+
+/* We don't currently support extended floats (long doubles) on machines
+   without hardware to deal with them.
+
+   These stubs are just to keep the linker from complaining about unresolved
+   references which can be pulled in from libio & libstdc++, even if the
+   user isn't using long doubles.  However, they may generate an unresolved
+   external to abort if abort is not used by the function, and the stubs
+   are referenced from within libc, since libgcc goes before and after the
+   system library.  */
+
+#ifdef DECLARE_LIBRARY_RENAMES
+  DECLARE_LIBRARY_RENAMES
+#endif
+
+#ifdef EXTENDED_FLOAT_STUBS
+extern void abort (void);
+void __extendsfxf2 (void) { abort(); }
+void __extenddfxf2 (void) { abort(); }
+void __truncxfdf2 (void) { abort(); }
+void __truncxfsf2 (void) { abort(); }
+void __fixxfsi (void) { abort(); }
+void __floatsixf (void) { abort(); }
+void __addxf3 (void) { abort(); }
+void __subxf3 (void) { abort(); }
+void __mulxf3 (void) { abort(); }
+void __divxf3 (void) { abort(); }
+void __negxf2 (void) { abort(); }
+void __eqxf2 (void) { abort(); }
+void __nexf2 (void) { abort(); }
+void __gtxf2 (void) { abort(); }
+void __gexf2 (void) { abort(); }
+void __lexf2 (void) { abort(); }
+void __ltxf2 (void) { abort(); }
+
+void __extendsftf2 (void) { abort(); }
+void __extenddftf2 (void) { abort(); }
+void __trunctfdf2 (void) { abort(); }
+void __trunctfsf2 (void) { abort(); }
+void __fixtfsi (void) { abort(); }
+void __floatsitf (void) { abort(); }
+void __addtf3 (void) { abort(); }
+void __subtf3 (void) { abort(); }
+void __multf3 (void) { abort(); }
+void __divtf3 (void) { abort(); }
+void __negtf2 (void) { abort(); }
+void __eqtf2 (void) { abort(); }
+void __netf2 (void) { abort(); }
+void __gttf2 (void) { abort(); }
+void __getf2 (void) { abort(); }
+void __letf2 (void) { abort(); }
+void __lttf2 (void) { abort(); }
+#else	/* !EXTENDED_FLOAT_STUBS, rest of file */
+
+/* IEEE "special" number predicates */
+
+#ifdef NO_NANS
+
+#define nan() 0
+#define isnan(x) 0
+#define isinf(x) 0
+#else
+
+#if   defined L_thenan_sf
+const fp_number_type __thenan_sf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_df
+const fp_number_type __thenan_df = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_tf
+const fp_number_type __thenan_tf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined TFLOAT
+extern const fp_number_type __thenan_tf;
+#elif defined FLOAT
+extern const fp_number_type __thenan_sf;
+#else
+extern const fp_number_type __thenan_df;
+#endif
+
+INLINE
+static const fp_number_type *
+makenan (void)
+{
+#ifdef TFLOAT
+  return & __thenan_tf;
+#elif defined FLOAT  
+  return & __thenan_sf;
+#else
+  return & __thenan_df;
+#endif
+}
+
+INLINE
+static int
+isnan (const fp_number_type *x)
+{
+  return __builtin_expect (x->class == CLASS_SNAN || x->class == CLASS_QNAN,
+			   0);
+}
+
+INLINE
+static int
+isinf (const fp_number_type *  x)
+{
+  return __builtin_expect (x->class == CLASS_INFINITY, 0);
+}
+
+#endif /* NO_NANS */
+
+INLINE
+static int
+iszero (const fp_number_type *  x)
+{
+  return x->class == CLASS_ZERO;
+}
+
+INLINE 
+static void
+flip_sign ( fp_number_type *  x)
+{
+  x->sign = !x->sign;
+}
+
+/* Count leading zeroes in N.  */
+INLINE
+static int
+clzusi (USItype n)
+{
+  extern int __clzsi2 (USItype);
+  if (sizeof (USItype) == sizeof (unsigned int))
+    return __builtin_clz (n);
+  else if (sizeof (USItype) == sizeof (unsigned long))
+    return __builtin_clzl (n);
+  else if (sizeof (USItype) == sizeof (unsigned long long))
+    return __builtin_clzll (n);
+  else
+    return __clzsi2 (n);
+}
+
+extern FLO_type pack_d (const fp_number_type * );
+
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
+FLO_type
+pack_d (const fp_number_type *src)
+{
+  FLO_union_type dst;
+  fractype fraction = src->fraction.ll;	/* wasn't unsigned before? */
+  int sign = src->sign;
+  int exp = 0;
+
+  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
+    {
+      /* We can't represent these values accurately.  By using the
+	 largest possible magnitude, we guarantee that the conversion
+	 of infinity is at least as big as any finite number.  */
+      exp = EXPMAX;
+      fraction = ((fractype) 1 << FRACBITS) - 1;
+    }
+  else if (isnan (src))
+    {
+      exp = EXPMAX;
+      if (src->class == CLASS_QNAN || 1)
+	{
+#ifdef QUIET_NAN_NEGATED
+	  fraction |= QUIET_NAN - 1;
+#else
+	  fraction |= QUIET_NAN;
+#endif
+	}
+    }
+  else if (isinf (src))
+    {
+      exp = EXPMAX;
+      fraction = 0;
+    }
+  else if (iszero (src))
+    {
+      exp = 0;
+      fraction = 0;
+    }
+  else if (fraction == 0)
+    {
+      exp = 0;
+    }
+  else
+    {
+      if (__builtin_expect (src->normal_exp < NORMAL_EXPMIN, 0))
+	{
+#ifdef NO_DENORMALS
+	  /* Go straight to a zero representation if denormals are not
+ 	     supported.  The denormal handling would be harmless but
+ 	     isn't unnecessary.  */
+	  exp = 0;
+	  fraction = 0;
+#else /* NO_DENORMALS */
+	  /* This number's exponent is too low to fit into the bits
+	     available in the number, so we'll store 0 in the exponent and
+	     shift the fraction to the right to make up for it.  */
+
+	  int shift = NORMAL_EXPMIN - src->normal_exp;
+
+	  exp = 0;
+
+	  if (shift > FRAC_NBITS - NGARDS)
+	    {
+	      /* No point shifting, since it's more that 64 out.  */
+	      fraction = 0;
+	    }
+	  else
+	    {
+	      int lowbit = (fraction & (((fractype)1 << shift) - 1)) ? 1 : 0;
+	      fraction = (fraction >> shift) | lowbit;
+	    }
+	  if ((fraction & GARDMASK) == GARDMSB)
+	    {
+	      if ((fraction & (1 << NGARDS)))
+		fraction += GARDROUND + 1;
+	    }
+	  else
+	    {
+	      /* Add to the guards to round up.  */
+	      fraction += GARDROUND;
+	    }
+	  /* Perhaps the rounding means we now need to change the
+             exponent, because the fraction is no longer denormal.  */
+	  if (fraction >= IMPLICIT_1)
+	    {
+	      exp += 1;
+	    }
+	  fraction >>= NGARDS;
+#endif /* NO_DENORMALS */
+	}
+      else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
+	       && __builtin_expect (src->normal_exp > EXPBIAS, 0))
+	{
+	  exp = EXPMAX;
+	  fraction = 0;
+	}
+      else
+	{
+	  exp = src->normal_exp + EXPBIAS;
+	  if (!ROUND_TOWARDS_ZERO)
+	    {
+	      /* IF the gard bits are the all zero, but the first, then we're
+		 half way between two numbers, choose the one which makes the
+		 lsb of the answer 0.  */
+	      if ((fraction & GARDMASK) == GARDMSB)
+		{
+		  if (fraction & (1 << NGARDS))
+		    fraction += GARDROUND + 1;
+		}
+	      else
+		{
+		  /* Add a one to the guards to round up */
+		  fraction += GARDROUND;
+		}
+	      if (fraction >= IMPLICIT_2)
+		{
+		  fraction >>= 1;
+		  exp += 1;
+		}
+	    }
+	  fraction >>= NGARDS;
+
+	  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
+	    {
+	      /* Saturate on overflow.  */
+	      exp = EXPMAX;
+	      fraction = ((fractype) 1 << FRACBITS) - 1;
+	    }
+	}
+    }
+
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  dst.bits.fraction = fraction;
+  dst.bits.exp = exp;
+  dst.bits.sign = sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low, unity;
+   int lowsign, lowexp;
+
+   unity = (halffractype) 1 << HALFFRACBITS;
+
+   /* Set HIGH to the high double's significand, masking out the implicit 1.
+      Set LOW to the low double's full significand.  */
+   high = (fraction >> (FRACBITS - HALFFRACBITS)) & (unity - 1);
+   low = fraction & (unity * 2 - 1);
+
+   /* Get the initial sign and exponent of the low double.  */
+   lowexp = exp - HALFFRACBITS - 1;
+   lowsign = sign;
+
+   /* HIGH should be rounded like a normal double, making |LOW| <=
+      0.5 ULP of HIGH.  Assume round-to-nearest.  */
+   if (exp < EXPMAX)
+     if (low > unity || (low == unity && (high & 1) == 1))
+       {
+	 /* Round HIGH up and adjust LOW to match.  */
+	 high++;
+	 if (high == unity)
+	   {
+	     /* May make it infinite, but that's OK.  */
+	     high = 0;
+	     exp++;
+	   }
+	 low = unity * 2 - low;
+	 lowsign ^= 1;
+       }
+
+   high |= (halffractype) exp << HALFFRACBITS;
+   high |= (halffractype) sign << (HALFFRACBITS + EXPBITS);
+
+   if (exp == EXPMAX || exp == 0 || low == 0)
+     low = 0;
+   else
+     {
+       while (lowexp > 0 && low < unity)
+	 {
+	   low <<= 1;
+	   lowexp--;
+	 }
+
+       if (lowexp <= 0)
+	 {
+	   halffractype roundmsb, round;
+	   int shift;
+
+	   shift = 1 - lowexp;
+	   roundmsb = (1 << (shift - 1));
+	   round = low & ((roundmsb << 1) - 1);
+
+	   low >>= shift;
+	   lowexp = 0;
+
+	   if (round > roundmsb || (round == roundmsb && (low & 1) == 1))
+	     {
+	       low++;
+	       if (low == unity)
+		 /* LOW rounds up to the smallest normal number.  */
+		 lowexp++;
+	     }
+	 }
+
+       low &= unity - 1;
+       low |= (halffractype) lowexp << HALFFRACBITS;
+       low |= (halffractype) lowsign << (HALFFRACBITS + EXPBITS);
+     }
+   dst.value_raw = ((fractype) high << HALFSHIFT) | low;
+ }
+# else
+  dst.value_raw = fraction & ((((fractype)1) << FRACBITS) - (fractype)1);
+  dst.value_raw |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << FRACBITS;
+  dst.value_raw |= ((fractype) (sign & 1)) << (FRACBITS | EXPBITS);
+# endif
+#endif
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+#ifdef TFLOAT
+  {
+    qrtrfractype tmp1 = dst.words[0];
+    qrtrfractype tmp2 = dst.words[1];
+    dst.words[0] = dst.words[3];
+    dst.words[1] = dst.words[2];
+    dst.words[2] = tmp2;
+    dst.words[3] = tmp1;
+  }
+#else
+  {
+    halffractype tmp = dst.words[0];
+    dst.words[0] = dst.words[1];
+    dst.words[1] = tmp;
+  }
+#endif
+#endif
+
+  return dst.value;
+}
+#endif
+
+#if defined(L_unpack_df) || defined(L_unpack_sf) || defined(L_unpack_tf)
+void
+unpack_d (FLO_union_type * src, fp_number_type * dst)
+{
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+  fractype fraction;
+  int exp;
+  int sign;
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+  FLO_union_type swapped;
+
+#ifdef TFLOAT
+  swapped.words[0] = src->words[3];
+  swapped.words[1] = src->words[2];
+  swapped.words[2] = src->words[1];
+  swapped.words[3] = src->words[0];
+#else
+  swapped.words[0] = src->words[1];
+  swapped.words[1] = src->words[0];
+#endif
+  src = &swapped;
+#endif
+  
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  fraction = src->bits.fraction;
+  exp = src->bits.exp;
+  sign = src->bits.sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low;
+   
+   high = src->value_raw >> HALFSHIFT;
+   low = src->value_raw & (((fractype)1 << HALFSHIFT) - 1);
+
+   fraction = high & ((((fractype)1) << HALFFRACBITS) - 1);
+   fraction <<= FRACBITS - HALFFRACBITS;
+   exp = ((int)(high >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+   sign = ((int)(high >> (((HALFFRACBITS + EXPBITS))))) & 1;
+
+   if (exp != EXPMAX && exp != 0 && low != 0)
+     {
+       int lowexp = ((int)(low >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+       int lowsign = ((int)(low >> (((HALFFRACBITS + EXPBITS))))) & 1;
+       int shift;
+       fractype xlow;
+
+       xlow = low & ((((fractype)1) << HALFFRACBITS) - 1);
+       if (lowexp)
+	 xlow |= (((halffractype)1) << HALFFRACBITS);
+       else
+	 lowexp = 1;
+       shift = (FRACBITS - HALFFRACBITS) - (exp - lowexp);
+       if (shift > 0)
+	 xlow <<= shift;
+       else if (shift < 0)
+	 xlow >>= -shift;
+       if (sign == lowsign)
+	 fraction += xlow;
+       else if (fraction >= xlow)
+	 fraction -= xlow;
+       else
+	 {
+	   /* The high part is a power of two but the full number is lower.
+	      This code will leave the implicit 1 in FRACTION, but we'd
+	      have added that below anyway.  */
+	   fraction = (((fractype) 1 << FRACBITS) - xlow) << 1;
+	   exp--;
+	 }
+     }
+ }
+# else
+  fraction = src->value_raw & ((((fractype)1) << FRACBITS) - 1);
+  exp = ((int)(src->value_raw >> FRACBITS)) & ((1 << EXPBITS) - 1);
+  sign = ((int)(src->value_raw >> (FRACBITS + EXPBITS))) & 1;
+# endif
+#endif
+
+  dst->sign = sign;
+  if (exp == 0)
+    {
+      /* Hmm.  Looks like 0 */
+      if (fraction == 0
+#ifdef NO_DENORMALS
+	  || 1
+#endif
+	  )
+	{
+	  /* tastes like zero */
+	  dst->class = CLASS_ZERO;
+	}
+      else
+	{
+	  /* Zero exponent with nonzero fraction - it's denormalized,
+	     so there isn't a leading implicit one - we'll shift it so
+	     it gets one.  */
+	  dst->normal_exp = exp - EXPBIAS + 1;
+	  fraction <<= NGARDS;
+
+	  dst->class = CLASS_NUMBER;
+#if 1
+	  while (fraction < IMPLICIT_1)
+	    {
+	      fraction <<= 1;
+	      dst->normal_exp--;
+	    }
+#endif
+	  dst->fraction.ll = fraction;
+	}
+    }
+  else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
+	   && __builtin_expect (exp == EXPMAX, 0))
+    {
+      /* Huge exponent*/
+      if (fraction == 0)
+	{
+	  /* Attached to a zero fraction - means infinity */
+	  dst->class = CLASS_INFINITY;
+	}
+      else
+	{
+	  /* Nonzero fraction, means nan */
+#ifdef QUIET_NAN_NEGATED
+	  if ((fraction & QUIET_NAN) == 0)
+#else
+	  if (fraction & QUIET_NAN)
+#endif
+	    {
+	      dst->class = CLASS_QNAN;
+	    }
+	  else
+	    {
+	      dst->class = CLASS_SNAN;
+	    }
+	  /* Keep the fraction part as the nan number */
+	  dst->fraction.ll = fraction;
+	}
+    }
+  else
+    {
+      /* Nothing strange about this number */
+      dst->normal_exp = exp - EXPBIAS;
+      dst->class = CLASS_NUMBER;
+      dst->fraction.ll = (fraction << NGARDS) | IMPLICIT_1;
+    }
+}
+#endif /* L_unpack_df || L_unpack_sf */
+
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
+static const fp_number_type *
+_fpadd_parts (fp_number_type * a,
+	      fp_number_type * b,
+	      fp_number_type * tmp)
+{
+  intfrac tfraction;
+
+  /* Put commonly used fields in local variables.  */
+  int a_normal_exp;
+  int b_normal_exp;
+  fractype a_fraction;
+  fractype b_fraction;
+
+  if (isnan (a))
+    {
+      return a;
+    }
+  if (isnan (b))
+    {
+      return b;
+    }
+  if (isinf (a))
+    {
+      /* Adding infinities with opposite signs yields a NaN.  */
+      if (isinf (b) && a->sign != b->sign)
+	return makenan ();
+      return a;
+    }
+  if (isinf (b))
+    {
+      return b;
+    }
+  if (iszero (b))
+    {
+      if (iszero (a))
+	{
+	  *tmp = *a;
+	  tmp->sign = a->sign & b->sign;
+	  return tmp;
+	}
+      return a;
+    }
+  if (iszero (a))
+    {
+      return b;
+    }
+
+  /* Got two numbers. shift the smaller and increment the exponent till
+     they're the same */
+  {
+    int diff;
+    int sdiff;
+
+    a_normal_exp = a->normal_exp;
+    b_normal_exp = b->normal_exp;
+    a_fraction = a->fraction.ll;
+    b_fraction = b->fraction.ll;
+
+    diff = a_normal_exp - b_normal_exp;
+    sdiff = diff;
+
+    if (diff < 0)
+      diff = -diff;
+    if (diff < FRAC_NBITS)
+      {
+	if (sdiff > 0)
+	  {
+	    b_normal_exp += diff;
+	    LSHIFT (b_fraction, diff);
+	  }
+	else if (sdiff < 0)
+	  {
+	    a_normal_exp += diff;
+	    LSHIFT (a_fraction, diff);
+	  }
+      }
+    else
+      {
+	/* Somethings's up.. choose the biggest */
+	if (a_normal_exp > b_normal_exp)
+	  {
+	    b_normal_exp = a_normal_exp;
+	    b_fraction = 0;
+	  }
+	else
+	  {
+	    a_normal_exp = b_normal_exp;
+	    a_fraction = 0;
+	  }
+      }
+  }
+
+  if (a->sign != b->sign)
+    {
+      if (a->sign)
+	{
+	  tfraction = -a_fraction + b_fraction;
+	}
+      else
+	{
+	  tfraction = a_fraction - b_fraction;
+	}
+      if (tfraction >= 0)
+	{
+	  tmp->sign = 0;
+	  tmp->normal_exp = a_normal_exp;
+	  tmp->fraction.ll = tfraction;
+	}
+      else
+	{
+	  tmp->sign = 1;
+	  tmp->normal_exp = a_normal_exp;
+	  tmp->fraction.ll = -tfraction;
+	}
+      /* and renormalize it */
+
+      while (tmp->fraction.ll < IMPLICIT_1 && tmp->fraction.ll)
+	{
+	  tmp->fraction.ll <<= 1;
+	  tmp->normal_exp--;
+	}
+    }
+  else
+    {
+      tmp->sign = a->sign;
+      tmp->normal_exp = a_normal_exp;
+      tmp->fraction.ll = a_fraction + b_fraction;
+    }
+  tmp->class = CLASS_NUMBER;
+  /* Now the fraction is added, we have to shift down to renormalize the
+     number */
+
+  if (tmp->fraction.ll >= IMPLICIT_2)
+    {
+      LSHIFT (tmp->fraction.ll, 1);
+      tmp->normal_exp++;
+    }
+  return tmp;
+}
+
+FLO_type
+add (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpadd_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+
+FLO_type
+sub (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  b.sign ^= 1;
+
+  res = _fpadd_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+#endif /* L_addsub_sf || L_addsub_df */
+
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
+static inline __attribute__ ((__always_inline__)) const fp_number_type *
+_fpmul_parts ( fp_number_type *  a,
+	       fp_number_type *  b,
+	       fp_number_type * tmp)
+{
+  fractype low = 0;
+  fractype high = 0;
+
+  if (isnan (a))
+    {
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (isnan (b))
+    {
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+  if (isinf (a))
+    {
+      if (iszero (b))
+	return makenan ();
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (isinf (b))
+    {
+      if (iszero (a))
+	{
+	  return makenan ();
+	}
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+  if (iszero (a))
+    {
+      a->sign = a->sign != b->sign;
+      return a;
+    }
+  if (iszero (b))
+    {
+      b->sign = a->sign != b->sign;
+      return b;
+    }
+
+  /* Calculate the mantissa by multiplying both numbers to get a
+     twice-as-wide number.  */
+  {
+#if defined(NO_DI_MODE) || defined(TFLOAT)
+    {
+      fractype x = a->fraction.ll;
+      fractype ylow = b->fraction.ll;
+      fractype yhigh = 0;
+      int bit;
+
+      /* ??? This does multiplies one bit at a time.  Optimize.  */
+      for (bit = 0; bit < FRAC_NBITS; bit++)
+	{
+	  int carry;
+
+	  if (x & 1)
+	    {
+	      carry = (low += ylow) < ylow;
+	      high += yhigh + carry;
+	    }
+	  yhigh <<= 1;
+	  if (ylow & FRACHIGH)
+	    {
+	      yhigh |= 1;
+	    }
+	  ylow <<= 1;
+	  x >>= 1;
+	}
+    }
+#elif defined(FLOAT) 
+    /* Multiplying two USIs to get a UDI, we're safe.  */
+    {
+      UDItype answer = (UDItype)a->fraction.ll * (UDItype)b->fraction.ll;
+      
+      high = answer >> BITS_PER_SI;
+      low = answer;
+    }
+#else
+    /* fractype is DImode, but we need the result to be twice as wide.
+       Assuming a widening multiply from DImode to TImode is not
+       available, build one by hand.  */
+    {
+      USItype nl = a->fraction.ll;
+      USItype nh = a->fraction.ll >> BITS_PER_SI;
+      USItype ml = b->fraction.ll;
+      USItype mh = b->fraction.ll >> BITS_PER_SI;
+      UDItype pp_ll = (UDItype) ml * nl;
+      UDItype pp_hl = (UDItype) mh * nl;
+      UDItype pp_lh = (UDItype) ml * nh;
+      UDItype pp_hh = (UDItype) mh * nh;
+      UDItype res2 = 0;
+      UDItype res0 = 0;
+      UDItype ps_hh__ = pp_hl + pp_lh;
+      if (ps_hh__ < pp_hl)
+	res2 += (UDItype)1 << BITS_PER_SI;
+      pp_hl = (UDItype)(USItype)ps_hh__ << BITS_PER_SI;
+      res0 = pp_ll + pp_hl;
+      if (res0 < pp_ll)
+	res2++;
+      res2 += (ps_hh__ >> BITS_PER_SI) + pp_hh;
+      high = res2;
+      low = res0;
+    }
+#endif
+  }
+
+  tmp->normal_exp = a->normal_exp + b->normal_exp
+    + FRAC_NBITS - (FRACBITS + NGARDS);
+  tmp->sign = a->sign != b->sign;
+  while (high >= IMPLICIT_2)
+    {
+      tmp->normal_exp++;
+      if (high & 1)
+	{
+	  low >>= 1;
+	  low |= FRACHIGH;
+	}
+      high >>= 1;
+    }
+  while (high < IMPLICIT_1)
+    {
+      tmp->normal_exp--;
+
+      high <<= 1;
+      if (low & FRACHIGH)
+	high |= 1;
+      low <<= 1;
+    }
+
+  if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
+    {
+      if (high & (1 << NGARDS))
+	{
+	  /* Because we're half way, we would round to even by adding
+	     GARDROUND + 1, except that's also done in the packing
+	     function, and rounding twice will lose precision and cause
+	     the result to be too far off.  Example: 32-bit floats with
+	     bit patterns 0xfff * 0x3f800400 ~= 0xfff (less than 0.5ulp
+	     off), not 0x1000 (more than 0.5ulp off).  */
+	}
+      else if (low)
+	{
+	  /* We're a further than half way by a small amount corresponding
+	     to the bits set in "low".  Knowing that, we round here and
+	     not in pack_d, because there we don't have "low" available
+	     anymore.  */
+	  high += GARDROUND + 1;
+
+	  /* Avoid further rounding in pack_d.  */
+	  high &= ~(fractype) GARDMASK;
+	}
+    }
+  tmp->fraction.ll = high;
+  tmp->class = CLASS_NUMBER;
+  return tmp;
+}
+
+FLO_type
+multiply (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  fp_number_type tmp;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpmul_parts (&a, &b, &tmp);
+
+  return pack_d (res);
+}
+#endif /* L_mul_sf || L_mul_df || L_mul_tf */
+
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
+static inline __attribute__ ((__always_inline__)) const fp_number_type *
+_fpdiv_parts (fp_number_type * a,
+	      fp_number_type * b)
+{
+  fractype bit;
+  fractype numerator;
+  fractype denominator;
+  fractype quotient;
+
+  if (isnan (a))
+    {
+      return a;
+    }
+  if (isnan (b))
+    {
+      return b;
+    }
+
+  a->sign = a->sign ^ b->sign;
+
+  if (isinf (a) || iszero (a))
+    {
+      if (a->class == b->class)
+	return makenan ();
+      return a;
+    }
+
+  if (isinf (b))
+    {
+      a->fraction.ll = 0;
+      a->normal_exp = 0;
+      return a;
+    }
+  if (iszero (b))
+    {
+      a->class = CLASS_INFINITY;
+      return a;
+    }
+
+  /* Calculate the mantissa by multiplying both 64bit numbers to get a
+     128 bit number */
+  {
+    /* quotient =
+       ( numerator / denominator) * 2^(numerator exponent -  denominator exponent)
+     */
+
+    a->normal_exp = a->normal_exp - b->normal_exp;
+    numerator = a->fraction.ll;
+    denominator = b->fraction.ll;
+
+    if (numerator < denominator)
+      {
+	/* Fraction will be less than 1.0 */
+	numerator *= 2;
+	a->normal_exp--;
+      }
+    bit = IMPLICIT_1;
+    quotient = 0;
+    /* ??? Does divide one bit at a time.  Optimize.  */
+    while (bit)
+      {
+	if (numerator >= denominator)
+	  {
+	    quotient |= bit;
+	    numerator -= denominator;
+	  }
+	bit >>= 1;
+	numerator *= 2;
+      }
+
+    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
+      {
+	if (quotient & (1 << NGARDS))
+	  {
+	    /* Because we're half way, we would round to even by adding
+	       GARDROUND + 1, except that's also done in the packing
+	       function, and rounding twice will lose precision and cause
+	       the result to be too far off.  */
+	  }
+	else if (numerator)
+	  {
+	    /* We're a further than half way by the small amount
+	       corresponding to the bits set in "numerator".  Knowing
+	       that, we round here and not in pack_d, because there we
+	       don't have "numerator" available anymore.  */
+	    quotient += GARDROUND + 1;
+
+	    /* Avoid further rounding in pack_d.  */
+	    quotient &= ~(fractype) GARDMASK;
+	  }
+      }
+
+    a->fraction.ll = quotient;
+    return (a);
+  }
+}
+
+FLO_type
+divide (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  res = _fpdiv_parts (&a, &b);
+
+  return pack_d (res);
+}
+#endif /* L_div_sf || L_div_df */
+
+#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df) \
+    || defined(L_fpcmp_parts_tf)
+/* according to the demo, fpcmp returns a comparison with 0... thus
+   a<b -> -1
+   a==b -> 0
+   a>b -> +1
+ */
+
+int
+__fpcmp_parts (fp_number_type * a, fp_number_type * b)
+{
+#if 0
+  /* either nan -> unordered. Must be checked outside of this routine.  */
+  if (isnan (a) && isnan (b))
+    {
+      return 1;			/* still unordered! */
+    }
+#endif
+
+  if (isnan (a) || isnan (b))
+    {
+      return 1;			/* how to indicate unordered compare? */
+    }
+  if (isinf (a) && isinf (b))
+    {
+      /* +inf > -inf, but +inf != +inf */
+      /* b    \a| +inf(0)| -inf(1)
+       ______\+--------+--------
+       +inf(0)| a==b(0)| a<b(-1)
+       -------+--------+--------
+       -inf(1)| a>b(1) | a==b(0)
+       -------+--------+--------
+       So since unordered must be nonzero, just line up the columns...
+       */
+      return b->sign - a->sign;
+    }
+  /* but not both...  */
+  if (isinf (a))
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (isinf (b))
+    {
+      return b->sign ? 1 : -1;
+    }
+  if (iszero (a) && iszero (b))
+    {
+      return 0;
+    }
+  if (iszero (a))
+    {
+      return b->sign ? 1 : -1;
+    }
+  if (iszero (b))
+    {
+      return a->sign ? -1 : 1;
+    }
+  /* now both are "normal".  */
+  if (a->sign != b->sign)
+    {
+      /* opposite signs */
+      return a->sign ? -1 : 1;
+    }
+  /* same sign; exponents? */
+  if (a->normal_exp > b->normal_exp)
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (a->normal_exp < b->normal_exp)
+    {
+      return a->sign ? 1 : -1;
+    }
+  /* same exponents; check size.  */
+  if (a->fraction.ll > b->fraction.ll)
+    {
+      return a->sign ? -1 : 1;
+    }
+  if (a->fraction.ll < b->fraction.ll)
+    {
+      return a->sign ? 1 : -1;
+    }
+  /* after all that, they're equal.  */
+  return 0;
+}
+#endif
+
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compoare_tf)
+CMPtype
+compare (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_compare_sf || L_compare_df */
+
+/* These should be optimized for their specific tasks someday.  */
+
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
+CMPtype
+_eq_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth == 0 */
+
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_eq_sf || L_eq_df */
+
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
+CMPtype
+_ne_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* true, truth != 0 */
+
+  return  __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_ne_sf || L_ne_df */
+
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
+CMPtype
+_gt_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return -1;			/* false, truth > 0 */
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_gt_sf || L_gt_df */
+
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
+CMPtype
+_ge_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return -1;			/* false, truth >= 0 */
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_ge_sf || L_ge_df */
+
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
+CMPtype
+_lt_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth < 0 */
+
+  return __fpcmp_parts (&a, &b);
+}
+#endif /* L_lt_sf || L_lt_df */
+
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
+CMPtype
+_le_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  if (isnan (&a) || isnan (&b))
+    return 1;			/* false, truth <= 0 */
+
+  return __fpcmp_parts (&a, &b) ;
+}
+#endif /* L_le_sf || L_le_df */
+
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
+CMPtype
+_unord_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return (isnan (&a) || isnan (&b));
+}
+#endif /* L_unord_sf || L_unord_df */
+
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
+FLO_type
+si_to_float (SItype arg_a)
+{
+  fp_number_type in;
+
+  in.class = CLASS_NUMBER;
+  in.sign = arg_a < 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      USItype uarg;
+      int shift;
+      in.normal_exp = FRACBITS + NGARDS;
+      if (in.sign) 
+	{
+	  /* Special case for minint, since there is no +ve integer
+	     representation for it */
+	  if (arg_a == (- MAX_SI_INT - 1))
+	    {
+	      return (FLO_type)(- MAX_SI_INT - 1);
+	    }
+	  uarg = (-arg_a);
+	}
+      else
+	uarg = arg_a;
+
+      in.fraction.ll = uarg;
+      shift = clzusi (uarg) - (BITS_PER_SI - 1 - FRACBITS - NGARDS);
+      if (shift > 0)
+	{
+	  in.fraction.ll <<= shift;
+	  in.normal_exp -= shift;
+	}
+    }
+  return pack_d (&in);
+}
+#endif /* L_si_to_sf || L_si_to_df */
+
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
+FLO_type
+usi_to_float (USItype arg_a)
+{
+  fp_number_type in;
+
+  in.sign = 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      int shift;
+      in.class = CLASS_NUMBER;
+      in.normal_exp = FRACBITS + NGARDS;
+      in.fraction.ll = arg_a;
+
+      shift = clzusi (arg_a) - (BITS_PER_SI - 1 - FRACBITS - NGARDS);
+      if (shift < 0)
+	{
+	  fractype guard = in.fraction.ll & (((fractype)1 << -shift) - 1);
+	  in.fraction.ll >>= -shift;
+	  in.fraction.ll |= (guard != 0);
+	  in.normal_exp -= shift;
+	}
+      else if (shift > 0)
+	{
+	  in.fraction.ll <<= shift;
+	  in.normal_exp -= shift;
+	}
+    }
+  return pack_d (&in);
+}
+#endif
+
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
+SItype
+float_to_si (FLO_type arg_a)
+{
+  fp_number_type a;
+  SItype tmp;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  if (iszero (&a))
+    return 0;
+  if (isnan (&a))
+    return 0;
+  /* get reasonable MAX_SI_INT...  */
+  if (isinf (&a))
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
+  /* it is a number, but a small one */
+  if (a.normal_exp < 0)
+    return 0;
+  if (a.normal_exp > BITS_PER_SI - 2)
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
+  tmp = a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
+  return a.sign ? (-tmp) : (tmp);
+}
+#endif /* L_sf_to_si || L_df_to_si */
+
+#if defined(L_tf_to_usi)
+USItype
+float_to_usi (FLO_type arg_a)
+{
+  fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  if (iszero (&a))
+    return 0;
+  if (isnan (&a))
+    return 0;
+  /* it is a negative number */
+  if (a.sign)
+    return 0;
+  /* get reasonable MAX_USI_INT...  */
+  if (isinf (&a))
+    return MAX_USI_INT;
+  /* it is a number, but a small one */
+  if (a.normal_exp < 0)
+    return 0;
+  if (a.normal_exp > BITS_PER_SI - 1)
+    return MAX_USI_INT;
+  else if (a.normal_exp > (FRACBITS + NGARDS))
+    return a.fraction.ll << (a.normal_exp - (FRACBITS + NGARDS));
+  else
+    return a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
+}
+#endif /* L_tf_to_usi */
+
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
+FLO_type
+negate (FLO_type arg_a)
+{
+  fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
+
+  flip_sign (&a);
+  return pack_d (&a);
+}
+#endif /* L_negate_sf || L_negate_df */
+
+#ifdef FLOAT
+
+#if defined(L_make_sf)
+SFtype
+__make_fp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     USItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_sf */
+
+#ifndef FLOAT_ONLY
+
+/* This enables one to build an fp library that supports float but not double.
+   Otherwise, we would get an undefined reference to __make_dp.
+   This is needed for some 8-bit ports that can't handle well values that
+   are 8-bytes in size, so we just don't support double for them at all.  */
+
+#if defined(L_sf_to_df)
+DFtype
+sf_to_df (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_dp (in.class, in.sign, in.normal_exp,
+		    ((UDItype) in.fraction.ll) << F_D_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#if defined(L_sf_to_tf) && defined(TMODES)
+TFtype
+sf_to_tf (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << F_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#endif /* ! FLOAT_ONLY */
+#endif /* FLOAT */
+
+#ifndef FLOAT
+
+extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype);
+
+#if defined(L_make_df)
+DFtype
+__make_dp (fp_class_type class, unsigned int sign, int exp, UDItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_df */
+
+#if defined(L_df_to_sf)
+SFtype
+df_to_sf (DFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_D_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((USItype) 1 << F_D_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_df_to_sf */
+
+#if defined(L_df_to_tf) && defined(TMODES) \
+    && !defined(FLOAT) && !defined(TFLOAT)
+TFtype
+df_to_tf (DFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << D_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#ifdef TFLOAT
+#if defined(L_make_tf)
+TFtype
+__make_tp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     UTItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_tf */
+
+#if defined(L_tf_to_df)
+DFtype
+tf_to_df (TFtype arg_a)
+{
+  fp_number_type in;
+  UDItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> D_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << D_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_dp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_df */
+
+#if defined(L_tf_to_sf)
+SFtype
+tf_to_sf (TFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << F_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_sf */
+#endif /* TFLOAT */
+
+#endif /* ! FLOAT */
+#endif /* !EXTENDED_FLOAT_STUBS */
diff --git a/gcc/config/fp-bit.h b/gcc/config/fp-bit.h
new file mode 100644
index 000000000..f0b07e91f
--- /dev/null
+++ b/gcc/config/fp-bit.h
@@ -0,0 +1,499 @@
+/* Header file for fp-bit.c.  */
+/* Copyright (C) 2000, 2002, 2003, 2006, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_FP_BIT_H
+#define GCC_FP_BIT_H
+
+/* Defining FINE_GRAINED_LIBRARIES allows one to select which routines
+   from this file are compiled via additional -D options.
+
+   This avoids the need to pull in the entire fp emulation library
+   when only a small number of functions are needed.
+
+   If FINE_GRAINED_LIBRARIES is not defined, then compile every
+   suitable routine.  */
+#ifndef FINE_GRAINED_LIBRARIES
+#define L_pack_df
+#define L_unpack_df
+#define L_pack_sf
+#define L_unpack_sf
+#define L_addsub_sf
+#define L_addsub_df
+#define L_mul_sf
+#define L_mul_df
+#define L_div_sf
+#define L_div_df
+#define L_fpcmp_parts_sf
+#define L_fpcmp_parts_df
+#define L_compare_sf
+#define L_compare_df
+#define L_eq_sf
+#define L_eq_df
+#define L_ne_sf
+#define L_ne_df
+#define L_gt_sf
+#define L_gt_df
+#define L_ge_sf
+#define L_ge_df
+#define L_lt_sf
+#define L_lt_df
+#define L_le_sf
+#define L_le_df
+#define L_unord_sf
+#define L_unord_df
+#define L_usi_to_sf
+#define L_usi_to_df
+#define L_si_to_sf
+#define L_si_to_df
+#define L_sf_to_si
+#define L_df_to_si
+#define L_f_to_usi
+#define L_df_to_usi
+#define L_negate_sf
+#define L_negate_df
+#define L_make_sf
+#define L_make_df
+#define L_sf_to_df
+#define L_df_to_sf
+#ifdef FLOAT
+#define L_thenan_sf
+#else
+#define L_thenan_df
+#endif
+#endif /* ! FINE_GRAINED_LIBRARIES */
+
+#if __LDBL_MANT_DIG__ == 113 || __LDBL_MANT_DIG__ == 106
+# if defined(TFLOAT) || defined(L_sf_to_tf) || defined(L_df_to_tf)
+#  define TMODES
+# endif
+#endif
+
+typedef float SFtype __attribute__ ((mode (SF)));
+typedef float DFtype __attribute__ ((mode (DF)));
+#ifdef TMODES
+typedef float TFtype __attribute__ ((mode (TF)));
+#endif
+
+typedef int HItype __attribute__ ((mode (HI)));
+typedef int SItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+#ifdef TMODES
+typedef int TItype __attribute__ ((mode (TI)));
+#endif
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+#ifndef CMPtype
+typedef int CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#endif
+
+typedef unsigned int UHItype __attribute__ ((mode (HI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#ifdef TMODES
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+#endif
+
+#define MAX_USI_INT  (~(USItype)0)
+#define MAX_SI_INT   ((SItype) (MAX_USI_INT >> 1))
+#define BITS_PER_SI  (4 * BITS_PER_UNIT)
+#ifdef TMODES
+#define MAX_UDI_INT  (~(UDItype)0)
+#define MAX_DI_INT   ((DItype) (MAX_UDI_INT >> 1))
+#define BITS_PER_DI  (8 * BITS_PER_UNIT)
+#endif
+
+#ifdef FLOAT_ONLY
+#define NO_DI_MODE
+#endif
+
+#ifdef TFLOAT
+# ifndef TMODES
+#  error "TFLOAT requires long double to have 113 bits of mantissa"
+# endif
+
+#	define PREFIXFPDP tp
+#	define PREFIXSFDF tf
+#	define NGARDS 10L /* Is this right? */
+#	define GARDROUND 0x1ff
+#	define GARDMASK  0x3ff
+#	define GARDMSB   0x200
+#	define FRAC_NBITS 128
+
+# if __LDBL_MANT_DIG__ == 113 /* IEEE quad */
+#	define EXPBITS 15
+#	define EXPBIAS 16383
+#	define EXPMAX (0x7fff)
+#	define QUIET_NAN ((TItype)0x8 << 108)
+#	define FRACHIGH  ((TItype)0x8 << 124)
+#	define FRACHIGH2 ((TItype)0xc << 124)
+#	define FRACBITS 112
+# endif
+
+# if __LDBL_MANT_DIG__ == 106 /* IBM extended (double+double) */
+#	define EXPBITS 11
+#	define EXPBIAS 1023
+#	define EXPMAX (0x7ff)
+#	define QUIET_NAN ((TItype)0x8 << (48 + 64))
+#	define FRACHIGH  ((TItype)0x8 << 124)
+#	define FRACHIGH2 ((TItype)0xc << 124)
+#	define FRACBITS 105
+#	define HALFFRACBITS 52
+#	define HALFSHIFT 64
+# endif
+
+#	define pack_d __pack_t
+#	define unpack_d __unpack_t
+#	define __fpcmp_parts __fpcmp_parts_t
+	typedef UTItype fractype;
+	typedef UDItype halffractype;
+	typedef USItype qrtrfractype;
+#define qrtrfractype qrtrfractype
+	typedef TFtype FLO_type;
+	typedef TItype intfrac;
+#elif defined FLOAT
+#	define NGARDS    7L
+#	define GARDROUND 0x3f
+#	define GARDMASK  0x7f
+#	define GARDMSB   0x40
+#	define EXPBITS 8
+#	define EXPBIAS 127
+#	define FRACBITS 23
+#	define EXPMAX (0xff)
+#	define QUIET_NAN 0x100000L
+#	define FRAC_NBITS 32
+#	define FRACHIGH  0x80000000L
+#	define FRACHIGH2 0xc0000000L
+#	define pack_d __pack_f
+#	define unpack_d __unpack_f
+#	define __fpcmp_parts __fpcmp_parts_f
+	typedef USItype fractype;
+	typedef UHItype halffractype;
+	typedef SFtype FLO_type;
+	typedef SItype intfrac;
+
+#else
+#	define PREFIXFPDP dp
+#	define PREFIXSFDF df
+#	define NGARDS 8L
+#	define GARDROUND 0x7f
+#	define GARDMASK  0xff
+#	define GARDMSB   0x80
+#	define EXPBITS 11
+#	define EXPBIAS 1023
+#	define FRACBITS 52
+#	define EXPMAX (0x7ff)
+#	define QUIET_NAN 0x8000000000000LL
+#	define FRAC_NBITS 64
+#	define FRACHIGH  0x8000000000000000LL
+#	define FRACHIGH2 0xc000000000000000LL
+#	define pack_d __pack_d
+#	define unpack_d __unpack_d
+#	define __fpcmp_parts __fpcmp_parts_d
+	typedef UDItype fractype;
+	typedef USItype halffractype;
+	typedef DFtype FLO_type;
+	typedef DItype intfrac;
+#endif /* FLOAT */
+
+#ifdef TFLOAT
+#	define add 		__addtf3
+#	define sub 		__subtf3
+#	define multiply 	__multf3
+#	define divide 		__divtf3
+#	define compare 		__cmptf2
+#	define _eq_f2 		__eqtf2
+#	define _ne_f2 		__netf2
+#	define _gt_f2 		__gttf2
+#	define _ge_f2 		__getf2
+#	define _lt_f2 		__lttf2
+#	define _le_f2 		__letf2
+#	define _unord_f2	__unordtf2
+#	define usi_to_float 	__floatunsitf
+#	define si_to_float 	__floatsitf
+#	define float_to_si 	__fixtfsi
+#	define float_to_usi 	__fixunstfsi
+#	define negate 		__negtf2
+#	define tf_to_sf		__trunctfsf2
+#	define tf_to_df		__trunctfdf2
+#elif defined FLOAT
+#	define add 		__addsf3
+#	define sub 		__subsf3
+#	define multiply 	__mulsf3
+#	define divide 		__divsf3
+#	define compare 		__cmpsf2
+#	define _eq_f2 		__eqsf2
+#	define _ne_f2 		__nesf2
+#	define _gt_f2 		__gtsf2
+#	define _ge_f2 		__gesf2
+#	define _lt_f2 		__ltsf2
+#	define _le_f2 		__lesf2
+#	define _unord_f2	__unordsf2
+#	define usi_to_float 	__floatunsisf
+#	define si_to_float 	__floatsisf
+#	define float_to_si 	__fixsfsi
+#	define float_to_usi 	__fixunssfsi
+#	define negate 		__negsf2
+#	define sf_to_df		__extendsfdf2
+#	define sf_to_tf		__extendsftf2
+#else
+#	define add 		__adddf3
+#	define sub 		__subdf3
+#	define multiply 	__muldf3
+#	define divide 		__divdf3
+#	define compare 		__cmpdf2
+#	define _eq_f2 		__eqdf2
+#	define _ne_f2 		__nedf2
+#	define _gt_f2 		__gtdf2
+#	define _ge_f2 		__gedf2
+#	define _lt_f2 		__ltdf2
+#	define _le_f2 		__ledf2
+#	define _unord_f2	__unorddf2
+#	define usi_to_float 	__floatunsidf
+#	define si_to_float 	__floatsidf
+#	define float_to_si 	__fixdfsi
+#	define float_to_usi 	__fixunsdfsi
+#	define negate 		__negdf2
+#	define df_to_sf		__truncdfsf2
+#	define df_to_tf		__extenddftf2
+#endif /* FLOAT */
+
+#ifndef INLINE
+#define INLINE __inline__
+#endif
+
+/* Preserve the sticky-bit when shifting fractions to the right.  */
+#define LSHIFT(a, s) { a = (a >> s) | !!(a & (((fractype) 1 << s) - 1)); }
+
+/* numeric parameters */
+/* F_D_BITOFF is the number of bits offset between the MSB of the mantissa
+   of a float and of a double. Assumes there are only two float types.
+   (double::FRAC_BITS+double::NGARDS-(float::FRAC_BITS-float::NGARDS))
+ */
+#define F_D_BITOFF (52+8-(23+7))
+
+#ifdef TMODES
+# define F_T_BITOFF (__LDBL_MANT_DIG__-1+10-(23+7))
+# define D_T_BITOFF (__LDBL_MANT_DIG__-1+10-(52+8))
+#endif
+
+
+#define NORMAL_EXPMIN (-(EXPBIAS)+1)
+#define IMPLICIT_1 ((fractype)1<<(FRACBITS+NGARDS))
+#define IMPLICIT_2 ((fractype)1<<(FRACBITS+1+NGARDS))
+
+/* common types */
+
+typedef enum
+{
+  CLASS_SNAN,
+  CLASS_QNAN,
+  CLASS_ZERO,
+  CLASS_NUMBER,
+  CLASS_INFINITY
+} fp_class_type;
+
+typedef struct
+{
+#ifdef SMALL_MACHINE
+  char class;
+  unsigned char sign;
+  short normal_exp;
+#else
+  fp_class_type class;
+  unsigned int sign;
+  int normal_exp;
+#endif
+
+  union
+    {
+      fractype ll;
+      halffractype l[2];
+    } fraction;
+} fp_number_type;
+
+typedef union
+{
+  FLO_type value;
+  fractype value_raw;
+
+#ifndef FLOAT
+# ifdef qrtrfractype
+  qrtrfractype qwords[4];
+# else
+  halffractype words[2];
+# endif
+#endif
+
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  struct
+    {
+      fractype fraction:FRACBITS __attribute__ ((packed));
+      unsigned int exp:EXPBITS __attribute__ ((packed));
+      unsigned int sign:1 __attribute__ ((packed));
+    }
+  bits;
+#endif
+
+#ifdef _DEBUG_BITFLOAT
+  struct
+    {
+      unsigned int sign:1 __attribute__ ((packed));
+      unsigned int exp:EXPBITS __attribute__ ((packed));
+      fractype fraction:FRACBITS __attribute__ ((packed));
+    }
+  bits_big_endian;
+
+  struct
+    {
+      fractype fraction:FRACBITS __attribute__ ((packed));
+      unsigned int exp:EXPBITS __attribute__ ((packed));
+      unsigned int sign:1 __attribute__ ((packed));
+    }
+  bits_little_endian;
+#endif
+}
+FLO_union_type;
+
+/* Prototypes.  */
+
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
+extern FLO_type pack_d (const fp_number_type *);
+#endif
+
+extern void unpack_d (FLO_union_type *, fp_number_type *);
+
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
+extern FLO_type add (FLO_type, FLO_type);
+extern FLO_type sub (FLO_type, FLO_type);
+#endif
+
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
+extern FLO_type multiply (FLO_type, FLO_type);
+#endif
+
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
+extern FLO_type divide (FLO_type, FLO_type);
+#endif
+
+extern int __fpcmp_parts (fp_number_type *, fp_number_type *);
+
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compare_tf)
+extern CMPtype compare (FLO_type, FLO_type);
+#endif
+
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
+extern CMPtype _eq_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
+extern CMPtype _ne_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
+extern CMPtype _gt_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
+extern CMPtype _ge_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
+extern CMPtype _lt_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
+extern CMPtype _le_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
+extern CMPtype _unord_f2 (FLO_type, FLO_type);
+#endif
+
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
+extern FLO_type si_to_float (SItype);
+#endif
+
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
+extern SItype float_to_si (FLO_type);
+#endif
+
+#if defined(L_tf_to_usi)
+extern USItype float_to_usi (FLO_type);
+#endif
+
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
+extern FLO_type usi_to_float (USItype);
+#endif
+
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
+extern FLO_type negate (FLO_type);
+#endif
+
+#ifdef FLOAT
+#if defined(L_make_sf)
+extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype);
+#endif
+#ifndef FLOAT_ONLY
+extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype);
+#if defined(L_sf_to_df)
+extern DFtype sf_to_df (SFtype);
+#endif
+#if defined(L_sf_to_tf) && defined(TMODES)
+extern TFtype sf_to_tf (SFtype);
+#endif
+#endif /* ! FLOAT_ONLY */
+#endif /* FLOAT */
+
+#ifndef FLOAT
+extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype);
+#if defined(L_make_df)
+extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype);
+#endif
+#if defined(L_df_to_sf)
+extern SFtype df_to_sf (DFtype);
+#endif
+#if defined(L_df_to_tf) && defined(TMODES)
+extern TFtype df_to_tf (DFtype);
+#endif
+#endif /* ! FLOAT */
+
+#ifdef TMODES
+extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype);
+extern TFtype __make_tp (fp_class_type, unsigned int, int, UTItype);
+#ifdef TFLOAT
+#if defined(L_tf_to_sf)
+extern SFtype tf_to_sf (TFtype);
+#endif
+#if defined(L_tf_to_df)
+extern DFtype tf_to_df (TFtype);
+#endif
+#if defined(L_di_to_tf)
+extern TFtype di_to_df (DItype);
+#endif
+#endif /* TFLOAT */
+#endif /* TMODES */
+
+#endif /* ! GCC_FP_BIT_H */
diff --git a/gcc/config/fr30/constraints.md b/gcc/config/fr30/constraints.md
new file mode 100644
index 000000000..2fb049159
--- /dev/null
+++ b/gcc/config/fr30/constraints.md
@@ -0,0 +1,72 @@
+;; Constraint definitions for the FR30.
+;; Copyright (C) 2011
+;; Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "d" "MULTIPLY_64_REG"
+  "The MDH,MDL register pair as used by MUL and MULU.")
+
+(define_register_constraint "e" "MULTIPLY_32_REG"
+  "The MDL register as used by MULH and MULUH.")
+
+(define_register_constraint "h" "HIGH_REGS"
+  "Registers 8 through 15.")
+
+(define_register_constraint "l" "LOW_REGS"
+  "Registers 0 through 7.")
+
+(define_register_constraint "a" "ALL_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "An integer in the range 0 to 15."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")))
+
+(define_constraint "J"
+  "An integer in the range -16 to -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -16, -1)")))
+
+(define_constraint "K"
+  "An integer in the range 16 to 31."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 16, 31)")))
+
+(define_constraint "L"
+  "An integer in the range 0 to 255."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "M"
+  "An integer in the range 0 to 1048575."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 1048575)")))
+
+(define_constraint "P"
+  "An integer in the range -256 to 255."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -256, 255)")))
+
+;; Extra constraints.
+(define_constraint "Q"
+  "@internal"
+  (and (match_code "mem")
+       (match_code "symbol_ref" "0")))
diff --git a/gcc/config/fr30/crti.asm b/gcc/config/fr30/crti.asm
new file mode 100644
index 000000000..4ce61231b
--- /dev/null
+++ b/gcc/config/fr30/crti.asm
@@ -0,0 +1,61 @@
+# crti.s for ELF
+
+# Copyright (C) 1992, 1998, 1999, 2008, 2009 Free Software Foundation, Inc.
+#   Written By David Vinayak Henkel-Wallace, June 1992
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+# 
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just make a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+	.section	".init"
+	.global	_init
+	.type	_init,#function
+	.align	4
+_init:
+	st	rp, @-r15
+	enter	#4
+
+	# These nops are here to align the end of this code with a 16 byte
+	# boundary.  The linker will start inserting code into the .init
+	# section at such a boundary.
+	
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	
+	.section	".fini"
+	.global	_fini
+	.type	_fini,#function
+	.align	4
+_fini:
+	st	rp, @-r15
+	enter	#4
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
diff --git a/gcc/config/fr30/crtn.asm b/gcc/config/fr30/crtn.asm
new file mode 100644
index 000000000..ac2712186
--- /dev/null
+++ b/gcc/config/fr30/crtn.asm
@@ -0,0 +1,44 @@
+# crtn.asm for ELF 
+
+#   Copyright (C) 1992, 1999, 2008, 2009 Free Software Foundation, Inc.
+#   Written By David Vinayak Henkel-Wallace, June 1992
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+	.section	".init"
+	.align		4
+
+	leave
+	ld @r15+,rp
+	ret
+
+	
+	.section	".fini"
+	.align		4
+
+	leave
+	ld @r15+,rp
+	ret
+
+# Th-th-th-that is all folks!
diff --git a/gcc/config/fr30/fr30-protos.h b/gcc/config/fr30/fr30-protos.h
new file mode 100644
index 000000000..45bb3444e
--- /dev/null
+++ b/gcc/config/fr30/fr30-protos.h
@@ -0,0 +1,35 @@
+/* Prototypes for fr30.c functions used in the md file & elsewhere.
+   Copyright (C) 1999, 2000, 2002, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void  fr30_expand_prologue (void);
+extern void  fr30_expand_epilogue (void);
+extern unsigned int fr30_compute_frame_size (int, int);
+
+#ifdef RTX_CODE
+extern int   fr30_check_multiple_regs (rtx *, int, int);
+extern void  fr30_print_operand (FILE *, rtx, int);
+extern void  fr30_print_operand_address (FILE *, rtx);
+extern rtx   fr30_move_double (rtx *);
+#ifdef HAVE_MACHINE_MODES
+#define Mmode enum machine_mode
+extern int   fr30_const_double_is_zero (rtx);
+#undef Mmode
+#endif /* HAVE_MACHINE_MODES */
+#endif /* RTX_CODE */
diff --git a/gcc/config/fr30/fr30.c b/gcc/config/fr30/fr30.c
new file mode 100644
index 000000000..74585b5dc
--- /dev/null
+++ b/gcc/config/fr30/fr30.c
@@ -0,0 +1,1066 @@
+/* FR30 specific functions.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2009,
+   2010 Free Software Foundation, Inc.
+   Contributed by Cygnus Solutions.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/*{{{  Includes */ 
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "tree.h"
+#include "output.h"
+#include "expr.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "df.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+
+/*}}}*/
+/*{{{  Function Prologues & Epilogues */ 
+
+/* The FR30 stack looks like this:
+
+             Before call                       After call
+   FP ->|                       |       |                       |
+        +-----------------------+       +-----------------------+       high 
+        |                       |       |                       |       memory
+        |  local variables,     |       |  local variables,     |
+        |  reg save area, etc.  |       |  reg save area, etc.  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        | args to the func that |       |  args to this func.   |
+        | is being called that  |       |                       |
+   SP ->| do not fit in regs    |       |                       |
+        +-----------------------+       +-----------------------+
+                                        |  args that used to be |  \
+                                        | in regs; only created |   |  pretend_size 
+                                   AP-> |   for vararg funcs    |  /  
+                                        +-----------------------+    
+                                        |                       |  \  
+                                        |  register save area   |   |
+                                        |                       |   |
+					+-----------------------+   |  reg_size
+                                        |    return address     |   | 
+					+-----------------------+   |
+                                   FP ->|   previous frame ptr  |  /
+                                        +-----------------------+    
+                                        |                       |  \   
+                                        |  local variables      |   |  var_size 
+                                        |                       |  /  
+                                        +-----------------------+    
+                                        |                       |  \       
+     low                                |  room for args to     |   |
+     memory                             |  other funcs called   |   |  args_size     
+                                        |  from this one        |   |
+                                   SP ->|                       |  /  
+                                        +-----------------------+    
+   
+   Note, AP is a fake hard register.  It will be eliminated in favor of
+   SP or FP as appropriate.
+
+   Note, Some or all of the stack sections above may be omitted if they 
+   are not needed.  */
+
+/* Structure to be filled in by fr30_compute_frame_size() with register
+   save masks, and offsets for the current function.  */
+struct fr30_frame_info
+{
+  unsigned int total_size;	/* # Bytes that the entire frame takes up.  */
+  unsigned int pretend_size;	/* # Bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # Bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # Bytes needed to store regs.  */
+  unsigned int var_size;	/* # Bytes that variables take up.  */
+  unsigned int frame_size;      /* # Bytes in current frame.  */
+  unsigned int gmask;		/* Mask of saved registers.  */
+  unsigned int save_fp;		/* Nonzero if frame pointer must be saved.  */
+  unsigned int save_rp;		/* Nonzero if return pointer must be saved.  */
+  int          initialised;	/* Nonzero if frame size already calculated.  */
+};
+
+/* Current frame information calculated by fr30_compute_frame_size().  */
+static struct fr30_frame_info 	current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+static struct fr30_frame_info 	zero_frame_info;
+
+static void fr30_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					 tree, int *, int);
+static bool fr30_must_pass_in_stack (enum machine_mode, const_tree);
+static int fr30_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				   tree, bool);
+static rtx fr30_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static void fr30_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static bool fr30_frame_pointer_required (void);
+static rtx fr30_function_value (const_tree, const_tree, bool);
+static rtx fr30_libcall_value (enum machine_mode, const_rtx);
+static bool fr30_function_value_regno_p (const unsigned int);
+static bool fr30_can_eliminate (const int, const int);
+static void fr30_asm_trampoline_template (FILE *);
+static void fr30_trampoline_init (rtx, tree, rtx);
+static int fr30_num_arg_regs (enum machine_mode, const_tree);
+
+#define FRAME_POINTER_MASK 	(1 << (FRAME_POINTER_REGNUM))
+#define RETURN_POINTER_MASK 	(1 << (RETURN_POINTER_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.  */
+#define MUST_SAVE_REGISTER(regno)      \
+  (   (regno) != RETURN_POINTER_REGNUM \
+   && (regno) != FRAME_POINTER_REGNUM  \
+   && df_regs_ever_live_p (regno)      \
+   && ! call_used_regs [regno]         )
+
+#define MUST_SAVE_FRAME_POINTER	 (df_regs_ever_live_p (FRAME_POINTER_REGNUM)  || frame_pointer_needed)
+#define MUST_SAVE_RETURN_POINTER (df_regs_ever_live_p (RETURN_POINTER_REGNUM) || crtl->profile)
+
+#if UNITS_PER_WORD == 4
+#define WORD_ALIGN(SIZE) (((SIZE) + 3) & ~3)
+#endif
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options fr30_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES fr30_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG fr30_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE fr30_function_arg_advance
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE fr30_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE fr30_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P fr30_function_value_regno_p
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS fr30_setup_incoming_varargs
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK fr30_must_pass_in_stack
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED fr30_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE fr30_can_eliminate
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE fr30_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT fr30_trampoline_init
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE fr30_option_optimization_table
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+fr30_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == FRAME_POINTER_REGNUM || ! frame_pointer_needed);
+}
+
+/* Returns the number of bytes offset between FROM_REG and TO_REG
+   for the current function.  As a side effect it fills in the 
+   current_frame_info structure, if the data is available.  */
+unsigned int
+fr30_compute_frame_size (int from_reg, int to_reg)
+{
+  int 		regno;
+  unsigned int 	return_value;
+  unsigned int	var_size;
+  unsigned int	args_size;
+  unsigned int	pretend_size;
+  unsigned int 	reg_size;
+  unsigned int 	gmask;
+
+  var_size	= WORD_ALIGN (get_frame_size ());
+  args_size	= WORD_ALIGN (crtl->outgoing_args_size);
+  pretend_size	= crtl->args.pretend_args_size;
+
+  reg_size	= 0;
+  gmask		= 0;
+
+  /* Calculate space needed for registers.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
+    {
+      if (MUST_SAVE_REGISTER (regno))
+	{
+	  reg_size += UNITS_PER_WORD;
+	  gmask |= 1 << regno;
+	}
+    }
+
+  current_frame_info.save_fp = MUST_SAVE_FRAME_POINTER;
+  current_frame_info.save_rp = MUST_SAVE_RETURN_POINTER;
+
+  reg_size += (current_frame_info.save_fp + current_frame_info.save_rp)
+	       * UNITS_PER_WORD;
+
+  /* Save computed information.  */
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.var_size     = var_size;
+  current_frame_info.args_size    = args_size;
+  current_frame_info.reg_size	  = reg_size;
+  current_frame_info.frame_size   = args_size + var_size;
+  current_frame_info.total_size   = args_size + var_size + reg_size + pretend_size;
+  current_frame_info.gmask	  = gmask;
+  current_frame_info.initialised  = reload_completed;
+
+  /* Calculate the required distance.  */
+  return_value = 0;
+  
+  if (to_reg == STACK_POINTER_REGNUM)
+    return_value += args_size + var_size;
+  
+  if (from_reg == ARG_POINTER_REGNUM)
+    return_value += reg_size;
+
+  return return_value;
+}
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in output_function_prologue(), since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.  */
+
+void
+fr30_expand_prologue (void)
+{
+  int regno;
+  rtx insn;
+
+  if (! current_frame_info.initialised)
+    fr30_compute_frame_size (0, 0);
+
+  /* This cases shouldn't happen.  Catch it now.  */
+  gcc_assert (current_frame_info.total_size || !current_frame_info.gmask);
+
+  /* Allocate space for register arguments if this is a variadic function.  */
+  if (current_frame_info.pretend_size)
+    {
+      int regs_to_save = current_frame_info.pretend_size / UNITS_PER_WORD;
+      
+      /* Push argument registers into the pretend arg area.  */
+      for (regno = FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS; regno --, regs_to_save --;)
+        {
+	  insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (current_frame_info.gmask)
+    {
+      /* Save any needed call-saved regs.  */
+      for (regno = STACK_POINTER_REGNUM; regno--;)
+	{
+	  if ((current_frame_info.gmask & (1 << regno)) != 0)
+	    {
+	      insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno)));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+    }
+
+  /* Save return address if necessary.  */
+  if (current_frame_info.save_rp)
+    {
+      insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, 
+      						     RETURN_POINTER_REGNUM)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Save old frame pointer and create new one, if necessary.  */
+  if (current_frame_info.save_fp)
+    {
+      if (current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD))
+        {
+	  int enter_size = current_frame_info.frame_size + UNITS_PER_WORD;
+	  rtx pattern;
+	  
+	  insn = emit_insn (gen_enter_func (GEN_INT (enter_size)));
+          RTX_FRAME_RELATED_P (insn) = 1;
+	  
+	  pattern = PATTERN (insn);
+	  
+	  /* Also mark all 3 subexpressions as RTX_FRAME_RELATED_P. */
+          if (GET_CODE (pattern) == PARALLEL)
+            {
+              int x;
+              for (x = XVECLEN (pattern, 0); x--;)
+		{
+		  rtx part = XVECEXP (pattern, 0, x);
+		  
+		  /* One of the insns in the ENTER pattern updates the
+		     frame pointer.  If we do not actually need the frame
+		     pointer in this function then this is a side effect
+		     rather than a desired effect, so we do not mark that
+		     insn as being related to the frame set up.  Doing this
+		     allows us to compile the crash66.C test file in the
+		     G++ testsuite.  */
+		  if (! frame_pointer_needed
+		      && GET_CODE (part) == SET
+		      && SET_DEST (part) == hard_frame_pointer_rtx)
+		    RTX_FRAME_RELATED_P (part) = 0;
+		  else
+		    RTX_FRAME_RELATED_P (part) = 1;
+		}
+            }
+	}
+      else
+	{
+	  insn = emit_insn (gen_movsi_push (frame_pointer_rtx));
+          RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (frame_pointer_needed)
+	    {
+	      insn = emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+    }
+
+  /* Allocate the stack frame.  */
+  if (current_frame_info.frame_size == 0)
+    ; /* Nothing to do.  */
+  else if (current_frame_info.save_fp
+	   && current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD))
+    ; /* Nothing to do.  */
+  else if (current_frame_info.frame_size <= 512)
+    {
+      insn = emit_insn (gen_add_to_stack
+			 (GEN_INT (- (signed) current_frame_info.frame_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+      insn = emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+}
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using an epilogue insn is favored compared to putting all of the
+   instructions in output_function_epilogue(), since it allows the scheduler
+   to intermix instructions with the restores of the caller saved registers.
+   In some cases, it might be necessary to emit a barrier instruction as the
+   first insn to prevent such scheduling.  */
+void
+fr30_expand_epilogue (void)
+{
+  int regno;
+
+  /* Perform the inversion operations of the prologue.  */
+  gcc_assert (current_frame_info.initialised);
+  
+  /* Pop local variables and arguments off the stack.
+     If frame_pointer_needed is TRUE then the frame pointer register
+     has actually been used as a frame pointer, and we can recover
+     the stack pointer from it, otherwise we must unwind the stack
+     manually.  */
+  if (current_frame_info.frame_size > 0)
+    {
+      if (current_frame_info.save_fp && frame_pointer_needed)
+	{
+	  emit_insn (gen_leave_func ());
+	  current_frame_info.save_fp = 0;
+	}
+      else if (current_frame_info.frame_size <= 508)
+	emit_insn (gen_add_to_stack
+		   (GEN_INT (current_frame_info.frame_size)));
+      else
+	{
+	  rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+	  emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size)));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+	}
+    }
+  
+  if (current_frame_info.save_fp)
+    emit_insn (gen_movsi_pop (frame_pointer_rtx));
+  
+  /* Pop all the registers that were pushed.  */
+  if (current_frame_info.save_rp)
+    emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, RETURN_POINTER_REGNUM)));
+    
+  for (regno = 0; regno < STACK_POINTER_REGNUM; regno ++)
+    if (current_frame_info.gmask & (1 << regno))
+      emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno)));
+  
+  if (current_frame_info.pretend_size)
+    emit_insn (gen_add_to_stack (GEN_INT (current_frame_info.pretend_size)));
+
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+
+  emit_jump_insn (gen_return_from_func ());
+}
+
+/* Do any needed setup for a variadic function.  We must create a register
+   parameter block, and then copy any anonymous arguments, plus the last
+   named argument, from registers into memory.  * copying actually done in
+   fr30_expand_prologue().
+
+   ARG_REGS_USED_SO_FAR has *not* been updated for the last named argument
+   which has type TYPE and mode MODE, and we rely on this fact.  */
+void
+fr30_setup_incoming_varargs (CUMULATIVE_ARGS *arg_regs_used_so_far,
+			     enum machine_mode mode,
+			     tree type ATTRIBUTE_UNUSED,
+			     int *pretend_size,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  int size;
+
+  /* All BLKmode values are passed by reference.  */
+  gcc_assert (mode != BLKmode);
+
+  /* ??? This run-time test as well as the code inside the if
+     statement is probably unnecessary.  */
+  if (targetm.calls.strict_argument_naming (arg_regs_used_so_far))
+    /* If TARGET_STRICT_ARGUMENT_NAMING returns true, then the last named
+       arg must not be treated as an anonymous arg.  */
+    arg_regs_used_so_far += fr30_num_arg_regs (mode, type);
+
+  size = FR30_NUM_ARG_REGS - (* arg_regs_used_so_far);
+
+  if (size <= 0)
+    return;
+
+  * pretend_size = (size * UNITS_PER_WORD);
+}
+
+/*}}}*/
+/*{{{  Printing operands */ 
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+void
+fr30_print_operand_address (FILE *stream, rtx address)
+{
+  switch (GET_CODE (address))
+    {
+    case SYMBOL_REF:
+      output_addr_const (stream, address);
+      break;
+      
+    default:
+      fprintf (stderr, "code = %x\n", GET_CODE (address));
+      debug_rtx (address);
+      output_operand_lossage ("fr30_print_operand_address: unhandled address");
+      break;
+    }
+}
+
+/* Print an operand.  */
+
+void
+fr30_print_operand (FILE *file, rtx x, int code)
+{
+  rtx x0;
+  
+  switch (code)
+    {
+    case '#':
+      /* Output a :D if this instruction is delayed.  */
+      if (dbr_sequence_length () != 0)
+	fputs (":D", file);
+      return;
+      
+    case 'p':
+      /* Compute the register name of the second register in a hi/lo
+	 register pair.  */
+      if (GET_CODE (x) != REG)
+	output_operand_lossage ("fr30_print_operand: unrecognized %%p code");
+      else
+	fprintf (file, "r%d", REGNO (x) + 1);
+      return;
+      
+    case 'b':
+      /* Convert GCC's comparison operators into FR30 comparison codes.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:  fprintf (file, "eq"); break;
+	case NE:  fprintf (file, "ne"); break;
+	case LT:  fprintf (file, "lt"); break;
+	case LE:  fprintf (file, "le"); break;
+	case GT:  fprintf (file, "gt"); break;
+	case GE:  fprintf (file, "ge"); break;
+	case LTU: fprintf (file, "c"); break;
+	case LEU: fprintf (file, "ls"); break;
+	case GTU: fprintf (file, "hi"); break;
+	case GEU: fprintf (file, "nc");  break;
+	default:
+	  output_operand_lossage ("fr30_print_operand: unrecognized %%b code");
+	  break;
+	}
+      return;
+      
+    case 'B':
+      /* Convert GCC's comparison operators into the complimentary FR30
+	 comparison codes.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:  fprintf (file, "ne"); break;
+	case NE:  fprintf (file, "eq"); break;
+	case LT:  fprintf (file, "ge"); break;
+	case LE:  fprintf (file, "gt"); break;
+	case GT:  fprintf (file, "le"); break;
+	case GE:  fprintf (file, "lt"); break;
+	case LTU: fprintf (file, "nc"); break;
+	case LEU: fprintf (file, "hi"); break;
+	case GTU: fprintf (file, "ls"); break;
+	case GEU: fprintf (file, "c"); break;
+	default:
+	  output_operand_lossage ("fr30_print_operand: unrecognized %%B code");
+	  break;
+	}
+      return;
+
+    case 'A':
+      /* Print a signed byte value as an unsigned value.  */
+      if (GET_CODE (x) != CONST_INT)
+	output_operand_lossage ("fr30_print_operand: invalid operand to %%A code");
+      else
+	{
+	  HOST_WIDE_INT val;
+	  
+	  val = INTVAL (x);
+
+	  val &= 0xff;
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+	}
+      return;
+      
+    case 'x':
+      if (GET_CODE (x) != CONST_INT
+	  || INTVAL (x) < 16
+	  || INTVAL (x) > 32)
+	output_operand_lossage ("fr30_print_operand: invalid %%x code");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) - 16);
+      return;
+
+    case 'F':
+      if (GET_CODE (x) != CONST_DOUBLE)
+	output_operand_lossage ("fr30_print_operand: invalid %%F code");
+      else
+	{
+	  char str[30];
+
+	  real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x),
+			   sizeof (str), 0, 1);
+	  fputs (str, file);
+	}
+      return;
+      
+    case 0:
+      /* Handled below.  */
+      break;
+      
+    default:
+      fprintf (stderr, "unknown code = %x\n", code);
+      output_operand_lossage ("fr30_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      x0 = XEXP (x,0);
+      
+      switch (GET_CODE (x0))
+	{
+	case REG:
+	  gcc_assert ((unsigned) REGNO (x0) < ARRAY_SIZE (reg_names));
+	  fprintf (file, "@%s", reg_names [REGNO (x0)]);
+	  break;
+
+	case PLUS:
+	  if (GET_CODE (XEXP (x0, 0)) != REG
+	      || REGNO (XEXP (x0, 0)) < FRAME_POINTER_REGNUM
+	      || REGNO (XEXP (x0, 0)) > STACK_POINTER_REGNUM
+	      || GET_CODE (XEXP (x0, 1)) != CONST_INT)
+	    {
+	      fprintf (stderr, "bad INDEXed address:");
+	      debug_rtx (x);
+	      output_operand_lossage ("fr30_print_operand: unhandled MEM");
+	    }
+	  else if (REGNO (XEXP (x0, 0)) == FRAME_POINTER_REGNUM)
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (x0, 1));
+	      if (val < -(1 << 9) || val > ((1 << 9) - 4))
+		{
+		  fprintf (stderr, "frame INDEX out of range:");
+		  debug_rtx (x);
+		  output_operand_lossage ("fr30_print_operand: unhandled MEM");
+		}
+	      fprintf (file, "@(r14, #" HOST_WIDE_INT_PRINT_DEC ")", val);
+	    }
+	  else
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (x0, 1));
+	      if (val < 0 || val > ((1 << 6) - 4))
+		{
+		  fprintf (stderr, "stack INDEX out of range:");
+		  debug_rtx (x);
+		  output_operand_lossage ("fr30_print_operand: unhandled MEM");
+		}
+	      fprintf (file, "@(r15, #" HOST_WIDE_INT_PRINT_DEC ")", val);
+	    }
+	  break;
+	  
+	case SYMBOL_REF:
+	  output_address (x0);
+	  break;
+	  
+	default:
+	  fprintf (stderr, "bad MEM code = %x\n", GET_CODE (x0));
+	  debug_rtx (x);
+	  output_operand_lossage ("fr30_print_operand: unhandled MEM");
+	  break;
+	}
+      break;
+      
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+
+      /* Fall through.  Let output_addr_const deal with it.  */
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+/*}}}*/
+
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+fr30_function_value (const_tree valtype,
+		     const_tree fntype_or_decli ATTRIBUTE_UNUSED,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM);
+}
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+fr30_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+fr30_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
+/*{{{  Function arguments */ 
+
+/* Return true if we should pass an argument on the stack rather than
+   in registers.  */
+
+static bool
+fr30_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode)
+    return true;
+  if (type == NULL)
+    return false;
+  return AGGREGATE_TYPE_P (type);
+}
+
+/* Compute the number of word sized registers needed to hold a
+   function argument of mode INT_MODE and tree type TYPE.  */
+static int
+fr30_num_arg_regs (enum machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  if (type && mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Returns the number of bytes in which *part* of a parameter of machine
+   mode MODE and tree type TYPE (which may be NULL if the type is not known).
+   If the argument fits entirely in the argument registers, or entirely on
+   the stack, then 0 is returned.
+   CUM is the number of argument registers already used by earlier
+   parameters to the function.  */
+
+static int
+fr30_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			tree type, bool named)
+{
+  /* Unnamed arguments, i.e. those that are prototyped as ...
+     are always passed on the stack.
+     Also check here to see if all the argument registers are full.  */
+  if (named == 0 || *cum >= FR30_NUM_ARG_REGS)
+    return 0;
+
+  /* Work out how many argument registers would be needed if this
+     parameter were to be passed entirely in registers.  If there
+     are sufficient argument registers available (or if no registers
+     are needed because the parameter must be passed on the stack)
+     then return zero, as this parameter does not require partial
+     register, partial stack stack space.  */
+  if (*cum + fr30_num_arg_regs (mode, type) <= FR30_NUM_ARG_REGS)
+    return 0;
+  
+  return (FR30_NUM_ARG_REGS - *cum) * UNITS_PER_WORD;
+}
+
+static rtx
+fr30_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  if (!named
+      || fr30_must_pass_in_stack (mode, type)
+      || *cum >= FR30_NUM_ARG_REGS)
+    return NULL_RTX;
+  else
+    return gen_rtx_REG (mode, *cum + FIRST_ARG_REGNUM);
+}
+
+/* A C statement (sans semicolon) to update the summarizer variable CUM to
+   advance past an argument in the argument list.  The values MODE, TYPE and
+   NAMED describe that argument.  Once this is done, the variable CUM is
+   suitable for analyzing the *following* argument with `FUNCTION_ARG', etc.
+
+   This macro need not do anything if the argument in question was passed on
+   the stack.  The compiler knows how to track the amount of stack space used
+   for arguments without any special help.  */
+static void
+fr30_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  *cum += named * fr30_num_arg_regs (mode, type);
+}
+
+/*}}}*/
+/*{{{  Operand predicates */ 
+
+#ifndef Mmode
+#define Mmode enum machine_mode
+#endif
+
+/* Returns true iff all the registers in the operands array
+   are in descending or ascending order.  */
+int
+fr30_check_multiple_regs (rtx *operands, int num_operands, int descending)
+{
+  if (descending)
+    {
+      unsigned int prev_regno = 0;
+      
+      while (num_operands --)
+	{
+	  if (GET_CODE (operands [num_operands]) != REG)
+	    return 0;
+	  
+	  if (REGNO (operands [num_operands]) < prev_regno)
+	    return 0;
+	  
+	  prev_regno = REGNO (operands [num_operands]);
+	}
+    }
+  else
+    {
+      unsigned int prev_regno = CONDITION_CODE_REGNUM;
+      
+      while (num_operands --)
+	{
+	  if (GET_CODE (operands [num_operands]) != REG)
+	    return 0;
+	  
+	  if (REGNO (operands [num_operands]) > prev_regno)
+	    return 0;
+	  
+	  prev_regno = REGNO (operands [num_operands]);
+	}
+    }
+
+  return 1;
+}
+
+int
+fr30_const_double_is_zero (rtx operand)
+{
+  REAL_VALUE_TYPE d;
+
+  if (operand == NULL || GET_CODE (operand) != CONST_DOUBLE)
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (d, operand);
+
+  return REAL_VALUES_EQUAL (d, dconst0);
+}
+
+/*}}}*/
+/*{{{  Instruction Output Routines  */
+
+/* Output a double word move.
+   It must be REG<-REG, REG<-MEM, MEM<-REG or REG<-CONST.
+   On the FR30 we are constrained by the fact that it does not
+   support offsetable addresses, and so we have to load the
+   address of the secnd word into the second destination register
+   before we can use it.  */
+
+rtx
+fr30_move_double (rtx * operands)
+{
+  rtx src  = operands[1];
+  rtx dest = operands[0];
+  enum rtx_code src_code = GET_CODE (src);
+  enum rtx_code dest_code = GET_CODE (dest);
+  enum machine_mode mode = GET_MODE (dest);
+  rtx val;
+
+  start_sequence ();
+
+  if (dest_code == REG)
+    {
+      if (src_code == REG)
+	{
+	  int reverse = (REGNO (dest) == REGNO (src) + 1);
+	  
+	  /* We normally copy the low-numbered register first.  However, if
+	     the first register of operand 0 is the same as the second register
+	     of operand 1, we must copy in the opposite order.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  operand_subword (src,  reverse, TRUE, mode)));
+	  
+	  emit_insn (gen_rtx_SET (VOIDmode,
+			      operand_subword (dest, !reverse, TRUE, mode),
+			      operand_subword (src,  !reverse, TRUE, mode)));
+	}
+      else if (src_code == MEM)
+	{
+	  rtx addr = XEXP (src, 0);
+	  rtx dest0 = operand_subword (dest, 0, TRUE, mode);
+	  rtx dest1 = operand_subword (dest, 1, TRUE, mode);
+	  rtx new_mem;
+	  
+	  gcc_assert (GET_CODE (addr) == REG);
+	  
+	  /* Copy the address before clobbering it.  See PR 34174.  */
+	  emit_insn (gen_rtx_SET (SImode, dest1, addr));
+	  emit_insn (gen_rtx_SET (VOIDmode, dest0,
+				  adjust_address (src, SImode, 0)));
+	  emit_insn (gen_rtx_SET (SImode, dest1,
+				  plus_constant (dest1, UNITS_PER_WORD)));
+
+	  new_mem = gen_rtx_MEM (SImode, dest1);
+	  MEM_COPY_ATTRIBUTES (new_mem, src);
+	      
+	  emit_insn (gen_rtx_SET (VOIDmode, dest1, new_mem));
+	}
+      else if (src_code == CONST_INT || src_code == CONST_DOUBLE)
+	{
+	  rtx words[2];
+	  split_double (src, &words[0], &words[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 0, TRUE, mode),
+				  words[0]));
+      
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 1, TRUE, mode),
+				  words[1]));
+	}
+    }
+  else if (src_code == REG && dest_code == MEM)
+    {
+      rtx addr = XEXP (dest, 0);
+      rtx src0;
+      rtx src1;
+
+      gcc_assert (GET_CODE (addr) == REG);
+
+      src0 = operand_subword (src, 0, TRUE, mode);
+      src1 = operand_subword (src, 1, TRUE, mode);
+
+      emit_move_insn (adjust_address (dest, SImode, 0), src0);
+
+      if (REGNO (addr) == STACK_POINTER_REGNUM
+	  || REGNO (addr) == FRAME_POINTER_REGNUM)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				adjust_address (dest, SImode, UNITS_PER_WORD),
+				src1));
+      else
+	{
+	  rtx new_mem;
+	  rtx scratch_reg_r0 = gen_rtx_REG (SImode, 0);
+
+	  /* We need a scratch register to hold the value of 'address + 4'.
+	     We use r0 for this purpose. It is used for example for long
+	     jumps and is already marked to not be used by normal register
+	     allocation.  */
+	  emit_insn (gen_movsi_internal (scratch_reg_r0, addr));
+	  emit_insn (gen_addsi_small_int (scratch_reg_r0, scratch_reg_r0,
+					  GEN_INT (UNITS_PER_WORD)));
+	  new_mem = gen_rtx_MEM (SImode, scratch_reg_r0);
+	  MEM_COPY_ATTRIBUTES (new_mem, dest);
+	  emit_move_insn (new_mem, src1);
+	  emit_insn (gen_blockage ());
+	}
+    }
+  else
+    /* This should have been prevented by the constraints on movdi_insn.  */
+    gcc_unreachable ();
+
+  val = get_insns ();
+  end_sequence ();
+
+  return val;
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+bool
+fr30_frame_pointer_required (void)
+{
+  return (flag_omit_frame_pointer == 0 || crtl->args.pretend_args_size > 0);
+}
+
+/*}}}*/
+/*{{{  Trampoline Output Routines  */
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
+   On the FR30, the trampoline is:
+
+   nop
+   ldi:32 STATIC, r12
+   nop
+   ldi:32 FUNCTION, r0
+   jmp    @r0
+
+   The no-ops are to guarantee that the static chain and final
+   target are 32 bit aligned within the trampoline.  That allows us to
+   initialize those locations with simple SImode stores.   The alternative
+   would be to use HImode stores.  */
+   
+static void
+fr30_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\tldi:32\t#0, %s\n", reg_names [STATIC_CHAIN_REGNUM]);
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\tldi:32\t#0, %s\n", reg_names [COMPILER_SCRATCH_REGISTER]);
+  fprintf (f, "\tjmp\t@%s\n", reg_names [COMPILER_SCRATCH_REGISTER]);
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+fr30_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, fnaddr);
+}
+
+/*}}}*/
+/* Local Variables: */
+/* folded-file: t   */
+/* End:		    */
diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h
new file mode 100644
index 000000000..d20d7fb79
--- /dev/null
+++ b/gcc/config/fr30/fr30.h
@@ -0,0 +1,871 @@
+/*{{{  Comment.  */ 
+
+/* Definitions of FR30 target. 
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Cygnus Solutions.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/*}}}*/ 
+/*{{{  Run-time target specifications.  */ 
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+/* Define this to be a string constant containing `-D' options to define the
+   predefined macros that identify this machine and system.  These macros will
+   be predefined unless the `-ansi' option is specified.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("fr30");		\
+      builtin_assert ("machine=fr30");		\
+    }						\
+   while (0)
+
+#define TARGET_VERSION fprintf (stderr, " (fr30)");
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+/* Include the OS stub library, so that the code can be simulated.
+   This is not the right way to do this.  Ideally this kind of thing
+   should be done in the linker script - but I have not worked out how
+   to specify the location of a linker script in a gcc command line yet... */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC  "%{!mno-lsim:-lsim} crtend.o%s crtn.o%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-lc"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/*}}}*/ 
+/*{{{  Storage Layout.  */ 
+
+#define BITS_BIG_ENDIAN 1
+
+#define BYTES_BIG_ENDIAN 1
+
+#define WORDS_BIG_ENDIAN 1
+
+#define UNITS_PER_WORD 	4
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+	  && GET_MODE_SIZE (MODE) < 4)		\
+	(MODE) = SImode;			\
+    }						\
+  while (0)
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 32
+
+#define FUNCTION_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 32
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/*}}}*/ 
+/*{{{  Layout of Source Language Data Types.  */ 
+
+#define SHORT_TYPE_SIZE 	16
+#define INT_TYPE_SIZE 		32
+#define LONG_TYPE_SIZE 		32
+#define LONG_LONG_TYPE_SIZE 	64
+#define FLOAT_TYPE_SIZE 	32
+#define DOUBLE_TYPE_SIZE 	64
+#define LONG_DOUBLE_TYPE_SIZE 	64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/*}}}*/ 
+/*{{{  REGISTER BASICS.  */ 
+
+/* Number of hardware registers known to the compiler.  They receive numbers 0
+   through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number
+   really is assigned the number `FIRST_PSEUDO_REGISTER'.  */
+#define FIRST_PSEUDO_REGISTER	21
+
+/* Fixed register assignments: */
+
+/* Here we do a BAD THING - reserve a register for use by the machine
+   description file.  There are too many places in compiler where it
+   assumes that it can issue a branch or jump instruction without
+   providing a scratch register for it, and reload just cannot cope, so
+   we keep a register back for these situations.  */
+#define COMPILER_SCRATCH_REGISTER 0
+
+/* The register that contains the result of a function call.  */
+#define RETURN_VALUE_REGNUM	 4
+
+/* The first register that can contain the arguments to a function.  */
+#define FIRST_ARG_REGNUM	 4
+
+/* A call-used register that can be used during the function prologue.  */
+#define PROLOGUE_TMP_REGNUM	 COMPILER_SCRATCH_REGISTER
+     
+/* Register numbers used for passing a function's static chain pointer.  If
+   register windows are used, the register number as seen by the called
+   function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as
+   seen by the calling function is `STATIC_CHAIN_REGNUM'.  If these registers
+   are the same, `STATIC_CHAIN_INCOMING_REGNUM' need not be defined.
+
+   The static chain register need not be a fixed register.
+
+   If the static chain is passed in memory, these macros should not be defined;
+   instead, the next two macros should be defined.  */
+#define STATIC_CHAIN_REGNUM 	12
+/* #define STATIC_CHAIN_INCOMING_REGNUM */
+
+/* An FR30 specific hardware register.  */
+#define ACCUMULATOR_REGNUM	13
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+#define FRAME_POINTER_REGNUM	14
+     
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+#define STACK_POINTER_REGNUM	15
+
+/* The following a fake hard registers that describe some of the dedicated
+   registers on the FR30.  */
+#define CONDITION_CODE_REGNUM	16
+#define RETURN_POINTER_REGNUM	17
+#define MD_HIGH_REGNUM		18
+#define MD_LOW_REGNUM		19
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.  These would include the stack pointer, the frame pointer
+   (except on machines where that can be used as a general register when no
+   frame pointer is needed), the program counter on machines where that is
+   considered one of the addressable registers, and any other numbered register
+   with a standard use.
+
+   This information is expressed as a sequence of numbers, separated by commas
+   and surrounded by braces.  The Nth number is 1 if register N is fixed, 0
+   otherwise.
+
+   The table initialized from this macro, and the table initialized by the
+   following one, may be overridden at run time either automatically, by the
+   actions of the macro `TARGET_CONDITIONAL_REGISTER_USAGE', or by the user
+   with the command options `-ffixed-REG', `-fcall-used-REG' and
+   `-fcall-saved-REG'.  */
+#define FIXED_REGISTERS 			\
+  { 1, 0, 0, 0, 0, 0, 0, 0, 	/*  0 -  7 */ 	\
+    0, 0, 0, 0, 0, 0, 0, 1,	/*  8 - 15 */ 	\
+    1, 1, 1, 1, 1 }		/* 16 - 20 */
+
+/* XXX - MDL and MDH set as fixed for now - this is until I can get the
+   mul patterns working.  */
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
+   general) by function calls as well as for fixed registers.  This macro
+   therefore identifies the registers that are not available for general
+   allocation of values that must live across function calls.
+
+   If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically
+   saves it on function entry and restores it on function exit, if the register
+   is used within the function.  */
+#define CALL_USED_REGISTERS 			\
+  { 1, 1, 1, 1, 1, 1, 1, 1,	/*  0 -  7 */ 	\
+    0, 0, 0, 0, 1, 1, 0, 1,	/*  8 - 15 */ 	\
+    1, 1, 1, 1, 1 }		/* 16 - 20 */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  This is what translates register numbers
+   in the compiler into assembler language.  */
+#define REGISTER_NAMES 						\
+{   "r0", "r1", "r2",  "r3",  "r4",  "r5", "r6", "r7",	\
+    "r8", "r9", "r10", "r11", "r12", "ac", "fp", "sp",	\
+    "cc", "rp", "mdh", "mdl", "ap"			\
+}
+
+/* If defined, a C initializer for an array of structures containing a name and
+   a register number.  This macro defines additional names for hard registers,
+   thus allowing the `asm' option in declarations to refer to registers using
+   alternate names.  */
+#define ADDITIONAL_REGISTER_NAMES 				\
+{								\
+  {"r13", 13}, {"r14", 14}, {"r15", 15}, {"usp", 15}, {"ps", 16}\
+}
+
+/*}}}*/ 
+/*{{{  How Values Fit in Registers.  */ 
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) 			\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+/*}}}*/ 
+/*{{{  Register Classes.  */ 
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there are.
+
+   Each register class has a number, which is the value of casting the class
+   name to type `int'.  The number serves as an index in many of the tables
+   described below.  */
+enum reg_class
+{
+  NO_REGS,
+  MULTIPLY_32_REG,	/* the MDL register as used by the MULH, MULUH insns */
+  MULTIPLY_64_REG,	/* the MDH,MDL register pair as used by MUL and MULU */
+  LOW_REGS,		/* registers 0 through 7 */
+  HIGH_REGS,		/* registers 8 through 15 */
+  REAL_REGS,		/* i.e. all the general hardware registers on the FR30 */
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS 	REAL_REGS
+#define N_REG_CLASSES 	((int) LIM_REG_CLASSES)
+
+#define IRA_COVER_CLASSES				\
+{							\
+  REAL_REGS, MULTIPLY_64_REG, LIM_REG_CLASSES		\
+}
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES \
+{			\
+  "NO_REGS",		\
+  "MULTIPLY_32_REG",	\
+  "MULTIPLY_64_REG",	\
+  "LOW_REGS", 		\
+  "HIGH_REGS", 		\
+  "REAL_REGS",		\
+  "ALL_REGS"		\
+ }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not suffice.
+   Then the integers are replaced by sub-initializers, braced groupings
+   containing several integers.  Each sub-initializer must be suitable as an
+   initializer for the type `HARD_REG_SET' which is defined in
+   `hard-reg-set.h'.  */
+#define REG_CLASS_CONTENTS 				\
+{ 							\
+  { 0 },						\
+  { 1 << MD_LOW_REGNUM },				\
+  { (1 << MD_LOW_REGNUM) | (1 << MD_HIGH_REGNUM) },	\
+  { (1 << 8) - 1 },					\
+  { ((1 << 8) - 1) << 8 },				\
+  { (1 << CONDITION_CODE_REGNUM) - 1 },			\
+  { (1 << FIRST_PSEUDO_REGISTER) - 1 }			\
+}
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+#define REGNO_REG_CLASS(REGNO) 			\
+  ( (REGNO) < 8 ? LOW_REGS			\
+  : (REGNO) < CONDITION_CODE_REGNUM ? HIGH_REGS	\
+  : (REGNO) == MD_LOW_REGNUM ? MULTIPLY_32_REG	\
+  : (REGNO) == MD_HIGH_REGNUM ? MULTIPLY_64_REG	\
+  : ALL_REGS)
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS 	REAL_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  */
+#define INDEX_REG_CLASS REAL_REGS
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.  */
+#define REGNO_OK_FOR_BASE_P(NUM) 1
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) 1
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+
+   This is closely related to the macro `HARD_REGNO_NREGS'.  In fact, the value
+   of the macro `CLASS_MAX_NREGS (CLASS, MODE)' should be the maximum value of
+   `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class CLASS.
+
+   This macro helps control the handling of multiple-word values in
+   the reload pass.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) HARD_REGNO_NREGS (0, MODE)
+
+/*}}}*/ 
+/*{{{  Basic Stack Layout.  */ 
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this to macro nonzero if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to be
+   allocated.
+
+   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the
+   first slot's length from `STARTING_FRAME_OFFSET'.  Otherwise, it is found by
+   adding the length of the first slot to the value `STARTING_FRAME_OFFSET'.  */
+/* #define STARTING_FRAME_OFFSET -4 */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   location at which outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET 0
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   argument's address.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.
+
+   You only need to define this macro if you want to support call frame
+   debugging information like that provided by DWARF 2.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM)
+
+/*}}}*/ 
+/*{{{  Register That Address the Stack Frame.  */ 
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  On some machines, this is the same as the frame
+   pointer register.  On some machines, the hardware determines which register
+   this is.  On other machines, you can choose any register you wish for this
+   purpose.  If this is not the same register as the frame pointer register,
+   then you must mark it as a fixed register according to `FIXED_REGISTERS', or
+   arrange to be able to eliminate it.  */
+#define ARG_POINTER_REGNUM 20
+
+/*}}}*/ 
+/*{{{  Eliminating the Frame Pointer and the Arg Pointer.  */ 
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  If it is not defined,
+   the only elimination attempted by the compiler is to replace references to
+   the frame pointer with references to the stack pointer.
+
+   The definition of this macro is a list of structure initializations, each of
+   which specifies an original and replacement register.
+
+   On some machines, the position of the argument pointer is not known until
+   the compilation is completed.  In such a case, a separate hard register must
+   be used for the argument pointer.  This register can be eliminated by
+   replacing it with either the frame pointer or the argument pointer,
+   depending on whether or not the frame pointer has been eliminated.
+
+   In this case, you might specify:
+        #define ELIMINABLE_REGS  \
+        {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+         {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+         {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+   Note that the elimination of the argument pointer with the stack pointer is
+   specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS				\
+{						\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}	\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+     (OFFSET) = fr30_compute_frame_size (FROM, TO)
+
+/*}}}*/ 
+/*{{{  Passing Function Arguments on the Stack.  */ 
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed onto the
+   stack for each call; instead, the function prologue should increase the
+   stack frame size by this amount.
+
+   Defining both `PUSH_ROUNDING' and `ACCUMULATE_OUTGOING_ARGS' is not
+   proper.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/*}}}*/ 
+/*{{{  Function Arguments in Registers.  */ 
+
+/* The number of register assigned to holding function arguments.  */
+     
+#define FR30_NUM_ARG_REGS	 4
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.
+
+   There is no need to record in `CUMULATIVE_ARGS' anything about the arguments
+   that have been passed on the stack.  The compiler has other variables to
+   keep track of that.  For target machines on which all arguments are passed
+   on the stack, there is no need to store anything in `CUMULATIVE_ARGS';
+   however, the data structure must exist and should not be empty, so use
+   `int'.  */
+/* On the FR30 this value is an accumulating count of the number of argument
+   registers that have been filled with argument values, as opposed to say,
+   the number of bytes of argument accumulated so far.  */
+#define CUMULATIVE_ARGS int
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  The variable has type
+   `CUMULATIVE_ARGS'.  The value of FNTYPE is the tree node for the data type
+   of the function which will receive the args, or 0 if the args are to a
+   compiler support library function.  The value of INDIRECT is nonzero when
+   processing an indirect call, for example a call through a function pointer.
+   The value of INDIRECT is zero for a call to an explicitly named function, a
+   library function call, or when `INIT_CUMULATIVE_ARGS' is used to find
+   arguments for the function being compiled.
+
+   When processing a call to a compiler support library function, LIBNAME
+   identifies which one.  It is a `symbol_ref' rtx which contains the name of
+   the function, as a string.  LIBNAME is 0 when an ordinary C function call is
+   being processed.  Thus, each time this macro is called, either LIBNAME or
+   FNTYPE is nonzero, but never both of them at once.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) < FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS))
+
+/*}}}*/ 
+/*{{{  How Large Values are Returned.  */ 
+
+/* Define this macro to be 1 if all structure and union return values must be
+   in memory.  Since this results in slower code, this should be defined only
+   if needed for compatibility with other compilers or with an ABI.  If you
+   define this macro to be 0, then the conventions used for structure and union
+   return values are decided by the `TARGET_RETURN_IN_MEMORY' macro.
+
+   If not defined, this defaults to the value 1.  */
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/*}}}*/ 
+/*{{{  Generating Code for Profiling.  */ 
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  Before calling, the assembler code
+   must load the address of a counter variable into a register where `mcount'
+   expects to find the address.  The name of this variable is `LP' followed by
+   the number LABELNO, so you would generate the name using `LP%d' in a
+   `fprintf'.
+
+   The details of how the address should be passed to `mcount' are determined
+   by your operating system environment, not by GCC.  To figure them out,
+   compile a small program for profiling using the system's installed C
+   compiler and look at the assembler code that results.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+{						\
+  fprintf (FILE, "\t mov rp, r1\n" );		\
+  fprintf (FILE, "\t ldi:32 mcount, r0\n" );	\
+  fprintf (FILE, "\t call @r0\n" );		\
+  fprintf (FILE, ".word\tLP%d\n", LABELNO);	\
+}
+
+/*}}}*/ 
+/*{{{  Trampolines for Nested Functions.  */ 
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+#define TRAMPOLINE_SIZE 18
+
+/* We want the trampoline to be aligned on a 32bit boundary so that we can
+   make sure the location of the static chain & target function within
+   the trampoline is also aligned on a 32bit boundary.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/*}}}*/ 
+/*{{{  Addressing Modes.  */ 
+
+/* A number, the maximum number of registers that can appear in a valid memory
+   address.  Note that it is up to you to specify a value equal to the maximum
+   number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C compound statement with a conditional `goto LABEL;' executed if X (an
+   RTX) is a legitimate memory address on the target machine for a memory
+   operand of mode MODE.  */
+
+/* On the FR30 we only have one real addressing mode - an address in a
+   register.  There are three special cases however:
+   
+   * indexed addressing using small positive offsets from the stack pointer
+   
+   * indexed addressing using small signed offsets from the frame pointer
+
+   * register plus register addressing using R13 as the base register.
+
+   At the moment we only support the first two of these special cases.  */
+   
+#ifdef REG_OK_STRICT
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)			\
+  do									\
+    {									\
+      if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+        goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 <<  6) - 4))		\
+	goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 <<  9) - 4))	\
+        goto LABEL;							\
+    }									\
+  while (0)
+#else
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)			\
+  do									\
+    {									\
+      if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+        goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 <<  6) - 4))		\
+	goto LABEL;							\
+      if (GET_CODE (X) == PLUS						\
+	  && ((MODE) == SImode || (MODE) == SFmode)			\
+	  && GET_CODE (XEXP (X, 0)) == REG				\
+          && (REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM		\
+	      || REGNO (XEXP (X, 0)) == ARG_POINTER_REGNUM)		\
+	  && GET_CODE (XEXP (X, 1)) == CONST_INT			\
+	  && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 <<  9) - 4))	\
+        goto LABEL;							\
+    }									\
+  while (0)
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as a base register.  For hard registers, it should always accept those
+   which the hardware permits and reject the others.  Whether the macro accepts
+   or rejects pseudo registers must be controlled by `REG_OK_STRICT' as
+   described above.  This usually requires two variant definitions, of which
+   `REG_OK_STRICT' controls the one actually used.  */
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) (((unsigned) REGNO (X)) <= STACK_POINTER_REGNUM)
+#else
+#define REG_OK_FOR_BASE_P(X) 1
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as an index register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand on the target machine.  You can assume that X satisfies
+   `CONSTANT_P', so you need not check this.  In fact, `1' is a suitable
+   definition for this macro on machines where anything `CONSTANT_P' is valid.  */
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+/*}}}*/ 
+/*{{{  Describing Relative Costs of Operations */ 
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory, i.e., if such access require more than one instruction or if
+   there is no difference in cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by finding
+   the smallest containing object; when it is defined, a fullword load will be
+   used if alignment permits.  Unless bytes accesses are faster than word
+   accesses, using word accesses is preferable since it may eliminate
+   subsequent memory access if subsequent accesses occur to other fields in the
+   same word of the structure, but to different bytes.  */
+#define SLOW_BYTE_ACCESS 1
+
+/*}}}*/ 
+/*{{{  Dividing the output into sections.  */ 
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  Normally `".text"' is
+   right.  */
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  Normally
+   `".data"' is right.  */
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+/*}}}*/ 
+/*{{{  The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  Normally this is `"#APP"', which is a comment
+   that has no effect on most assemblers but tells the GNU assembler that it
+   must check the lines that follow for all valid assembler constructs.  */
+#define ASM_APP_ON "#APP\n"
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  Normally this is `"#NO_APP"', which tells the
+   GNU assembler to resume making the time-saving assumptions that are valid
+   for ordinary compiler output.  */
+#define ASM_APP_OFF "#NO_APP\n"
+
+/*}}}*/ 
+/*{{{  Output and Generation of Labels.  */ 
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/*}}}*/ 
+/*{{{  Output of Assembler Instructions.  */ 
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand X.  X is an RTL expression.
+
+   CODE is a value that can be used to specify one of several ways of printing
+   the operand.  It is used when identical operands must be printed differently
+   depending on the context.  CODE comes from the `%' specification that was
+   used to request printing of the operand.  If the specification was just
+   `%DIGIT' then CODE is 0; if the specification was `%LTR DIGIT' then CODE is
+   the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.  The names
+   can be found in an array `reg_names' whose type is `char *[]'.  `reg_names'
+   is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%' followed by
+   a punctuation character), this macro is called with a null pointer for X and
+   the punctuation character for CODE.  */
+#define PRINT_OPERAND(STREAM, X, CODE)	fr30_print_operand (STREAM, X, CODE)
+
+/* A C expression which evaluates to true if CODE is a valid punctuation
+   character for use in the `PRINT_OPERAND' macro.  If
+   `PRINT_OPERAND_PUNCT_VALID_P' is not defined, it means that no punctuation
+   characters (except for the standard one, `%') are used in this way.  */
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#')
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand that is a memory reference whose address is X.  X
+   is an RTL expression.  */
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) fr30_print_operand_address (STREAM, X)
+
+#define REGISTER_PREFIX "%"
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX ""
+
+/*}}}*/ 
+/*{{{  Output of Dispatch Tables.  */ 
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a difference between two labels.
+   VALUE and REL are the numbers of two internal labels.  The definitions of
+   these labels are output using `(*targetm.asm_out.internal_label)', and they must be
+   printed in the same way here.  For example,
+
+        fprintf (STREAM, "\t.word L%d-L%d\n", VALUE, REL)  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are absolute.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a reference to a label.  VALUE
+   is the number of an internal label whose definition is output using
+   `(*targetm.asm_out.internal_label)'.  For example,
+
+        fprintf (STREAM, "\t.word L%d\n", VALUE)  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+fprintf (STREAM, "\t.word .L%d\n", VALUE)
+
+/*}}}*/ 
+/*{{{  Assembler Commands for Alignment.  */ 
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  POWER
+   will be a C expression of type `int'.  */
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+/*}}}*/ 
+/*{{{  Miscellaneous Parameters.  */ 
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+#define CASE_VECTOR_MODE SImode
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.
+
+   On many machines, this expression can be 1.
+
+   When `TRULY_NOOP_TRUNCATION' returns 1 for a pair of sizes for modes for
+   which `MODES_TIEABLE_P' is 0, suboptimal code can result.  If this is the
+   case, making `TRULY_NOOP_TRUNCATION' return 0 in such cases may improve
+   things.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* An alias for the machine mode for pointers.  On most machines, define this
+   to be the integer mode corresponding to the width of a hardware pointer;
+   `SImode' on 32-bit machine or `DImode' on 64-bit machines.  On some machines
+   you must define this to be one of the partial integer modes, such as
+   `PSImode'.
+
+   The width of `Pmode' must be at least as large as the value of
+   `POINTER_SIZE'.  If it is not equal, you must define the macro
+   `POINTERS_EXTEND_UNSIGNED' to specify how pointers are extended to `Pmode'.  */
+#define Pmode SImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  On most machines this should be
+   `QImode'.  */
+#define FUNCTION_MODE QImode
+
+/* If cross-compiling, don't require stdio.h etc to build libgcc.a.  */
+#if defined CROSS_DIRECTORY_STRUCTURE && ! defined inhibit_libc
+#define inhibit_libc
+#endif
+
+/*}}}*/ 
+
+/* Local Variables: */
+/* folded-file: t   */
+/* End:		    */
diff --git a/gcc/config/fr30/fr30.md b/gcc/config/fr30/fr30.md
new file mode 100644
index 000000000..6b3559983
--- /dev/null
+++ b/gcc/config/fr30/fr30.md
@@ -0,0 +1,1268 @@
+;; FR30 machine description.
+;; Copyright (C) 1998, 1999, 2000, 2002, 2004, 2005, 2007, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Cygnus Solutions.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;{{{ Attributes 
+
+(define_attr "length" "" (const_int 2))
+
+;; Used to distinguish between small memory model targets and big mode targets.
+
+(define_attr "size" "small,big"
+  (const (if_then_else (symbol_ref "TARGET_SMALL_MODEL")
+		       (const_string "small")
+		       (const_string "big"))))
+
+
+;; Define an attribute to be used by the delay slot code.
+;; An instruction by default is considered to be 'delayable'
+;; that is, it can be placed into a delay slot, but it is not
+;; itself a delayed branch type instruction.  An instruction
+;; whose type is 'delayed' is one which has a delay slot, and
+;; an instruction whose delay_type is 'other' is one which does
+;; not have a delay slot, nor can it be placed into a delay slot.
+
+(define_attr "delay_type" "delayable,delayed,other" (const_string "delayable"))
+
+;;}}} 
+;;{{{ Delay Slot Specifications 
+
+(define_delay (eq_attr "delay_type" "delayed")
+  [(and (eq_attr "delay_type" "delayable")
+	(eq_attr "length" "2"))
+   (nil)
+   (nil)]
+)
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;}}}
+;;{{{ Moves 
+
+;;{{{ Comment 
+
+;; Wrap moves in define_expand to prevent memory->memory moves from being
+;; generated at the RTL level, which generates better code for most machines
+;; which can't do mem->mem moves.
+
+;; If operand 0 is a `subreg' with mode M of a register whose own mode is wider
+;; than M, the effect of this instruction is to store the specified value in
+;; the part of the register that corresponds to mode M.  The effect on the rest
+;; of the register is undefined.
+
+;; This class of patterns is special in several ways.  First of all, each of
+;; these names *must* be defined, because there is no other way to copy a datum
+;; from one place to another.
+
+;; Second, these patterns are not used solely in the RTL generation pass.  Even
+;; the reload pass can generate move insns to copy values from stack slots into
+;; temporary registers.  When it does so, one of the operands is a hard
+;; register and the other is an operand that can need to be reloaded into a
+;; register.
+
+;; Therefore, when given such a pair of operands, the pattern must
+;; generate RTL which needs no reloading and needs no temporary
+;; registers--no registers other than the operands.  For example, if
+;; you support the pattern with a `define_expand', then in such a
+;; case the `define_expand' mustn't call `force_reg' or any other such
+;; function which might generate new pseudo registers.
+
+;; This requirement exists even for subword modes on a RISC machine
+;; where fetching those modes from memory normally requires several
+;; insns and some temporary registers.  Look in `spur.md' to see how
+;; the requirement can be satisfied.
+
+;; During reload a memory reference with an invalid address may be passed as an
+;; operand.  Such an address will be replaced with a valid address later in the
+;; reload pass.  In this case, nothing may be done with the address except to
+;; use it as it stands.  If it is copied, it will not be replaced with a valid
+;; address.  No attempt should be made to make such an address into a valid
+;; address and no routine (such as `change_address') that will do so may be
+;; called.  Note that `general_operand' will fail when applied to such an
+;; address.
+;;
+;; The global variable `reload_in_progress' (which must be explicitly declared
+;; if required) can be used to determine whether such special handling is
+;; required.
+;;
+;; The variety of operands that have reloads depends on the rest of
+;; the machine description, but typically on a RISC machine these can
+;; only be pseudo registers that did not get hard registers, while on
+;; other machines explicit memory references will get optional
+;; reloads.
+;;
+;; If a scratch register is required to move an object to or from memory, it
+;; can be allocated using `gen_reg_rtx' prior to reload.  But this is
+;; impossible during and after reload.  If there are cases needing scratch
+;; registers after reload, you must define `SECONDARY_INPUT_RELOAD_CLASS' and
+;; perhaps also `SECONDARY_OUTPUT_RELOAD_CLASS' to detect them, and provide
+;; patterns `reload_inM' or `reload_outM' to handle them.
+
+;; The constraints on a `moveM' must permit moving any hard register to any
+;; other hard register provided that `HARD_REGNO_MODE_OK' permits mode M in
+;; both registers and `REGISTER_MOVE_COST' applied to their classes returns a
+;; value of 2.
+
+;; It is obligatory to support floating point `moveM' instructions
+;; into and out of any registers that can hold fixed point values,
+;; because unions and structures (which have modes `SImode' or
+;; `DImode') can be in those registers and they may have floating
+;; point members.
+
+;; There may also be a need to support fixed point `moveM' instructions in and
+;; out of floating point registers.  Unfortunately, I have forgotten why this
+;; was so, and I don't know whether it is still true.  If `HARD_REGNO_MODE_OK'
+;; rejects fixed point values in floating point registers, then the constraints
+;; of the fixed point `moveM' instructions must be designed to avoid ever
+;; trying to reload into a floating point register.
+
+;;}}}
+;;{{{ Push and Pop  
+
+;; Push a register onto the stack
+(define_insn "movsi_push"
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "register_operand" "a"))]
+  ""
+  "st	%0, @-r15"
+)
+
+;; Pop a register off the stack
+(define_insn "movsi_pop"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  ""
+  "ld	@r15+, %0"
+)
+
+;;}}}
+;;{{{ 1 Byte Moves 
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+         || immediate_operand (operands[1], QImode)))
+    operands[1] = copy_to_mode_reg (QImode, operands[1]);
+}")
+
+(define_insn "movqi_unsigned_register_load"
+  [(set (match_operand:SI 0 "register_operand"              "=r")
+	(zero_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
+  ""
+  "ldub	%1, %0"
+)
+
+(define_expand "movqi_signed_register_load"
+  [(set (match_operand:SI 0 "register_operand"               "")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "")))]
+  ""
+  "
+  emit_insn (gen_movqi_unsigned_register_load (operands[0], operands[1]));
+  emit_insn (gen_extendqisi2 (operands[0], operands[0]));
+  DONE;
+  "
+)
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,red,m,r")
+	(match_operand:QI 1 "general_operand"       "i,red,r,rm"))]
+  ""
+  "@
+   ldi:8\\t#%A1, %0
+   mov  \\t%1, %0
+   stb  \\t%1, %0
+   ldub \\t%1, %0"
+)
+
+;;}}}
+;;{{{ 2 Byte Moves 
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+	 || immediate_operand (operands[1], HImode)))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+}")
+
+(define_insn "movhi_unsigned_register_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
+  ""
+  "lduh	%1, %0"
+)
+
+(define_expand "movhi_signed_register_load"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "")))]
+  ""
+  "
+  emit_insn (gen_movhi_unsigned_register_load (operands[0], operands[1]));
+  emit_insn (gen_extendhisi2 (operands[0], operands[0]));
+  DONE;
+  "
+)
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,red,m,r")
+	(match_operand:HI 1 "general_operand"       "L,M,n,red,r,rm"))]
+  ""
+  "@
+   ldi:8 \\t#%1, %0
+   ldi:20\\t#%1, %0
+   ldi:32\\t#%1, %0
+   mov   \\t%1, %0
+   sth   \\t%1, %0
+   lduh  \\t%1, %0"
+  [(set_attr "length" "*,4,6,*,*,*")]
+)
+
+;;}}}
+;;{{{ 4 Byte Moves 
+
+;; If the destination is a MEM and the source is a
+;; MEM or an CONST_INT move the source into a register.
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE(operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+	  || immediate_operand (operands[1], SImode)))
+     operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  }"
+)
+
+;; We can do some clever tricks when loading certain immediate
+;; values.  We implement these tricks as define_splits, rather
+;; than putting the code into the define_expand "movsi" above,
+;; because if we put them there, they will be evaluated at RTL
+;; generation time and then the combiner pass will come along
+;; and replace the multiple insns that have been generated with
+;; the original, slower, load insns.  (The combiner pass only
+;; cares about reducing the number of instructions, it does not
+;; care about instruction lengths or speeds).  Splits are
+;; evaluated after the combine pass and before the scheduling
+;; passes, so that they are the perfect place to put this
+;; intelligence.
+;;
+;; XXX we probably ought to implement these for QI and HI mode
+;; loads as well.
+
+;; If we are loading a small negative constant we can save space
+;; and time by loading the positive value and then sign extending it.
+(define_split
+  [(set (match_operand:SI 0 "register_operand"  "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+   "INTVAL (operands[1]) <= -1 && INTVAL (operands[1]) >= -128
+    && (GET_CODE (operands[0]) != SUBREG
+	|| SCALAR_INT_MODE_P (GET_MODE (XEXP (operands[0], 0))))"
+   [(set (match_dup 0) (match_dup 1))
+    (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+   "{
+   operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+   operands[2] = gen_lowpart (QImode, operands[0]);
+   }"
+)
+
+;; If we are loading a large negative constant, one which does
+;; not have any of its bottom 24 bit set, then we can save time
+;; and space by loading the byte value and shifting it into place.
+(define_split
+  [(set (match_operand:SI 0 "register_operand"  "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+   "(INTVAL (operands[1]) < 0) && ((INTVAL (operands[1]) & 0x00ffffff) == 0)"
+   [(set (match_dup 0) (match_dup 2))
+    (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 24)))
+	       (clobber (reg:CC 16))])]
+   "{
+   HOST_WIDE_INT val = INTVAL (operands[1]);
+   operands[2] = GEN_INT (val >> 24);
+   }"
+)
+
+;; If we are loading a large positive constant, one which has bits
+;; in the top byte set, but whose set bits all lie within an 8 bit
+;; range, then we can save time and space by loading the byte value
+;; and shifting it into place.
+(define_split
+  [(set (match_operand:SI 0 "register_operand"  "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+   "(INTVAL (operands[1]) > 0x00ffffff)
+   && ((INTVAL (operands[1]) >> exact_log2 (INTVAL (operands[1]) & (- INTVAL (operands[1])))) < 0x100)"
+   [(set (match_dup 0) (match_dup 2))
+    (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))
+	       (clobber (reg:CC 16))])]
+   "{
+   HOST_WIDE_INT val = INTVAL (operands[1]);
+   int shift = exact_log2 (val & ( - val));
+   operands[2] = GEN_INT (val >> shift);
+   operands[3] = GEN_INT (shift);
+   }"
+)
+
+;; When TARGET_SMALL_MODEL is defined we assume that all symbolic
+;; values are addresses which will fit in 20 bits.
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,red,V,r,m")
+	(match_operand:SI 1 "general_operand"       "L,M,n,i,rde,r,rm,r"))]
+  ""
+  "*
+  {
+    switch (which_alternative)
+    {
+    case 0: return   \"ldi:8 \\t#%1, %0\";
+    case 1: return   \"ldi:20\\t#%1, %0\";
+    case 2: return   \"ldi:32\\t#%1, %0\";
+    case 3: if (TARGET_SMALL_MODEL)
+	      return \"ldi:20\\t%1, %0\";
+            else
+	      return \"ldi:32\\t%1, %0\";
+    case 4: return   \"mov   \\t%1, %0\";
+    case 5: return   \"st    \\t%1, %0\";
+    case 6: return   \"ld    \\t%1, %0\";
+    case 7: return   \"st    \\t%1, %0\";
+    default: gcc_unreachable ();
+   }
+  }"
+  [(set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 4)
+			       (eq_attr "alternative" "2") (const_int 6)
+			       (eq_attr "alternative" "3") 
+			                (if_then_else (eq_attr "size" "small")
+						      (const_int 4)
+						      (const_int 6))]
+			      (const_int 2)))]
+)
+
+;;}}}
+;;{{{ 8 Byte Moves
+
+;; Note - the FR30 does not have an 8 byte load/store instruction
+;; but we have to support this pattern because some other patterns
+;; (e.g. muldisi2) can produce a DImode result.
+;; (This code is stolen from the M32R port.)
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DImode, operands[1]);
+  "
+)
+
+;; We use an insn and a split so that we can generate
+;; RTL rather than text from fr30_move_double().
+
+(define_insn "*movdi_insn"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,m,r")
+        (match_operand:DI 1 "di_operand"               "r,m,r,nF"))]
+  "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)"
+  "#"
+  [(set_attr "length" "4,8,12,12")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "")
+        (match_operand:DI 1 "di_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = fr30_move_double (operands);"
+)
+
+;;}}}
+;;{{{ Load & Store Multiple Registers 
+
+;; The load multiple and store multiple patterns are implemented
+;; as peepholes because the only time they are expected to occur
+;; is during function prologues and epilogues.
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 3 "high_register_operand" "h"))]
+  "fr30_check_multiple_regs (operands, 4, 1)"
+  "stm1	(%0, %1, %2, %3)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "high_register_operand" "h"))]
+  "fr30_check_multiple_regs (operands, 3, 1)"
+  "stm1	(%0, %1, %2)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "high_register_operand" "h"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "high_register_operand" "h"))]
+  "fr30_check_multiple_regs (operands, 2, 1)"
+  "stm1	(%0, %1)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 1 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 2 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 3 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  "fr30_check_multiple_regs (operands, 4, 0)"
+  "ldm1	(%0, %1, %2, %3)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 1 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 2 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  "fr30_check_multiple_regs (operands, 3, 0)"
+  "ldm1	(%0, %1, %2)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))
+   (set (match_operand:SI 1 "high_register_operand" "h")
+	(mem:SI (post_inc:SI (reg:SI 15))))]
+  "fr30_check_multiple_regs (operands, 2, 0)"
+  "ldm1	(%0, %1)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 3 "low_register_operand" "l"))]
+  "fr30_check_multiple_regs (operands, 4, 1)"
+  "stm0	(%0, %1, %2, %3)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 2 "low_register_operand" "l"))]
+  "fr30_check_multiple_regs (operands, 3, 1)"
+  "stm0	(%0, %1, %2)"
+  [(set_attr "delay_type" "other")]
+)
+
+(define_peephole
+  [(set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 0 "low_register_operand" "l"))
+   (set (mem:SI (pre_dec:SI (reg:SI 15)))
+	(match_operand:SI 1 "low_register_operand" "l"))]
+  "fr30_check_multiple_regs (operands, 2, 1)"
+  "stm0	(%0, %1)"
+  [(set_attr "delay_type" "other")]
+)
+
+;;}}}
+;;{{{ Floating Point Moves 
+
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesize them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "{
+  if (!reload_in_progress && !reload_completed
+      && memory_operand (operands[0], SFmode)
+      && memory_operand (operands[1], SFmode))
+    operands[1] = copy_to_mode_reg (SFmode, operands[1]);
+  }"
+)
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,red,m,r")
+	(match_operand:SF 1 "general_operand"      "Fn,i,rde,r,rm"))]
+  ""
+  "*
+  {
+    switch (which_alternative)
+    {
+    case 0: return   \"ldi:32\\t%1, %0\";
+    case 1: if (TARGET_SMALL_MODEL)
+	      return \"ldi:20\\t%1, %0\";
+            else
+	      return \"ldi:32\\t%1, %0\";
+    case 2: return   \"mov   \\t%1, %0\";
+    case 3: return   \"st    \\t%1, %0\";
+    case 4: return   \"ld    \\t%1, %0\";
+    default: gcc_unreachable ();	       
+    }
+  }"
+  [(set (attr "length") (cond [(eq_attr "alternative" "0") (const_int 6)
+			       (eq_attr "alternative" "1") 
+			                (if_then_else (eq_attr "size" "small")
+						      (const_int 4)
+						      (const_int 6))]
+			      (const_int 2)))]
+)
+
+(define_insn "*movsf_constant_store"
+  [(set (match_operand:SF 0 "memory_operand"    "=m")
+	(match_operand:SF 1 "immediate_operand" "F"))]
+  ""
+  "*
+  {
+  const char *    ldi_instr;
+  const char *    tmp_reg;
+  static char     buffer[100];
+
+  ldi_instr = fr30_const_double_is_zero (operands[1]) ? \"ldi:8\" : \"ldi:32\";
+
+  tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+  
+  sprintf (buffer, \"%s\\t#%%1, %s\\t;\\n\\tst\\t%s, %%0\\t; Created by movsf_constant_store\",
+    ldi_instr, tmp_reg, tmp_reg);
+
+  return buffer;
+  }"
+  [(set_attr "length" "8")]
+)
+
+;;}}}
+
+;;}}} 
+;;{{{ Conversions 
+
+;; Signed conversions from a smaller integer to a larger integer
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "extsb	%0"
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "extsh	%0"
+)
+
+;; Unsigned conversions from a smaller integer to a larger integer
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "extub	%0"
+)
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"                "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "extuh	%0"
+)
+
+;;}}} 
+;;{{{ Arithmetic 
+
+;;{{{ Addition 
+
+;; This is a special pattern just for adjusting the stack size.
+(define_insn "add_to_stack"
+  [(set (reg:SI 15)
+	(plus:SI (reg:SI 15)
+		 (match_operand:SI 0 "stack_add_operand" "i")))]
+  ""
+  "addsp	%0"
+)
+
+;; We need some trickery to be able to handle the addition of
+;; large (i.e. outside +/- 16) constants.  We need to be able to
+;; handle this because reload assumes that it can generate add
+;; instructions with arbitrary sized constants.
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand"           "")
+	(plus:SI (match_operand:SI 1 "register_operand"  "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "{
+  if (   GET_CODE (operands[2]) == REG
+      || GET_CODE (operands[2]) == SUBREG)
+    emit_insn (gen_addsi_regs (operands[0], operands[1], operands[2]));
+  else if (GET_CODE (operands[2]) != CONST_INT)
+    emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2]));
+  else if (INTVAL (operands[2]) >= -16
+	   && INTVAL (operands[2]) <= 15
+	   && (!REG_P (operands[1])
+	       || !REGNO_PTR_FRAME_P (REGNO (operands[1]))
+	       || REGNO (operands[1]) == STACK_POINTER_REGNUM))
+    emit_insn (gen_addsi_small_int (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2]));
+  DONE;
+  }"
+)
+
+(define_insn "addsi_regs"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand"  "r")))]
+  ""
+  "addn	%2, %0"
+)
+
+;; Do not allow an eliminable register in the source register.  It
+;; might be eliminated in favor of the stack pointer, probably
+;; increasing the offset, and so rendering the instruction illegal.
+(define_insn "addsi_small_int"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"      "0,0")
+		 (match_operand:SI 2 "add_immediate_operand" "I,J")))]
+  "!REG_P (operands[1])
+   || !REGNO_PTR_FRAME_P (REGNO (operands[1]))
+   || REGNO (operands[1]) == STACK_POINTER_REGNUM"
+  "@
+   addn	%2, %0
+   addn2	%2, %0"
+)
+
+(define_expand "addsi_big_int"
+  [(set (match_operand:SI 0 "register_operand"           "")
+	(plus:SI (match_operand:SI 1 "register_operand"  "")
+		 (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+  "{
+  /* Cope with the possibility that ops 0 and 1 are the same register.  */
+  if (rtx_equal_p (operands[0], operands[1]))
+    {
+      if (reload_in_progress || reload_completed)
+        {
+	  rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/);
+	  
+	  emit_insn (gen_movsi (reg, operands[2]));
+	  emit_insn (gen_addsi_regs (operands[0], operands[0], reg));
+	}
+      else
+	{
+	  operands[2] = force_reg (SImode, operands[2]);
+	  emit_insn (gen_addsi_regs (operands[0], operands[0], operands[2]));
+	}
+    }
+  else
+    {
+      emit_insn (gen_movsi (operands[0], operands[2]));
+      emit_insn (gen_addsi_regs (operands[0], operands[0], operands[1]));
+    }
+  DONE;
+  }"
+)
+
+(define_insn "*addsi_for_reload"
+  [(set (match_operand:SI 0 "register_operand"         "=&r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "r,r,r")
+		 (match_operand:SI 2 "immediate_operand" "L,M,n")))]
+  "reload_in_progress || reload_completed"
+  "@
+  ldi:8\\t#%2, %0  \\n\\taddn\\t%1, %0
+  ldi:20\\t#%2, %0 \\n\\taddn\\t%1, %0
+  ldi:32\\t#%2, %0 \\n\\taddn\\t%1, %0"
+  [(set_attr "length" "4,6,8")]
+)
+
+;;}}}
+;;{{{ Subtraction 
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand"       "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+	          (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "subn	%2, %0"
+)
+
+;;}}}
+;;{{{ Multiplication 
+
+;; Signed multiplication producing 64-bit results from 32-bit inputs
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	   (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		    (sign_extend:DI (match_operand:SI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "mul	%2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0"
+  [(set_attr "length" "6")]
+)
+
+;; Unsigned multiplication producing 64-bit results from 32-bit inputs
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                             "=r")
+	   (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		    (zero_extend:DI (match_operand:SI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "mulu	%2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0"
+  [(set_attr "length" "6")]
+)
+
+;; Signed multiplication producing 32-bit result from 16-bit inputs
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand"                             "=r")
+	   (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%r"))
+		    (sign_extend:SI (match_operand:HI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "mulh	%2, %1\\n\\tmov\\tmdl, %0"
+  [(set_attr "length" "4")]
+)
+
+;; Unsigned multiplication producing 32-bit result from 16-bit inputs
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand"                             "=r")
+	   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%r"))
+		    (zero_extend:SI (match_operand:HI 2 "register_operand"  "r"))))
+   (clobber (reg:CC 16))]
+  ""
+  "muluh	%2, %1\\n\\tmov\\tmdl, %0"
+  [(set_attr "length" "4")]
+)
+
+;; Signed multiplication producing 32-bit result from 32-bit inputs
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	   (mult:SI (match_operand:SI 1 "register_operand" "%r")
+		    (match_operand:SI 2 "register_operand"  "r")))
+   (clobber (reg:CC 16))]
+  ""
+  "mul	%2, %1\\n\\tmov\\tmdl, %0"
+  [(set_attr "length" "4")]
+)
+
+;;}}}
+;;}}} 
+;;{{{ Shifts 
+
+;; Arithmetic Shift Left
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,0")
+		   (match_operand:SI 2 "nonmemory_operand" "r,I,K")))
+   (clobber (reg:CC 16))]
+  ""
+  "@
+  lsl	%2, %0
+  lsl	%2, %0
+  lsl2	%x2, %0"
+)
+
+;; Arithmetic Shift Right
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,0")
+		     (match_operand:SI 2 "nonmemory_operand" "r,I,K")))
+   (clobber (reg:CC 16))]
+  ""
+  "@
+  asr	%2, %0
+  asr	%2, %0
+  asr2	%x2, %0"
+)
+
+;; Logical Shift Right
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,0")
+		     (match_operand:SI 2 "nonmemory_operand" "r,I,K")))
+   (clobber (reg:CC 16))]
+  ""
+  "@
+  lsr	%2, %0
+  lsr	%2, %0
+  lsr2	%x2, %0"
+)
+
+;;}}} 
+;;{{{ Logical Operations 
+
+;; Logical AND, 32-bit integers
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(and:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand"  "0")))
+   (clobber (reg:CC 16))]
+  ""
+  "and	%1, %0"
+)
+
+;; Inclusive OR, 32-bit integers
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand"  "0")))
+   (clobber (reg:CC 16))]
+  ""
+  "or	%1, %0"
+)
+
+;; Exclusive OR, 32-bit integers
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand"  "0")))
+   (clobber (reg:CC 16))]
+  ""
+  "eor	%1, %0"
+)
+
+;; One's complement, 32-bit integers
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "{
+  if (rtx_equal_p (operands[0], operands[1]))
+    {
+      if (reload_in_progress || reload_completed)
+        {
+	  rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/);
+	  
+	  emit_insn (gen_movsi (reg, constm1_rtx));
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], reg));
+	}
+      else
+	{
+	  rtx reg = gen_reg_rtx (SImode);
+	
+	  emit_insn (gen_movsi (reg, constm1_rtx));
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], reg));
+	}
+    }
+  else
+    {
+      emit_insn (gen_movsi_internal (operands[0], constm1_rtx));
+      emit_insn (gen_xorsi3 (operands[0], operands[1], operands[0]));
+    }
+  DONE;
+  }"
+)
+
+;;}}} 
+;;{{{ Comparisons 
+
+;; The actual comparisons, generated by the cbranch and/or cstore expanders
+
+(define_insn "*cmpsi_internal"
+  [(set (reg:CC 16)
+	(compare:CC (match_operand:SI 0 "register_operand"  "r,r,r")
+		    (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+  cmp	%1, %0
+  cmp	%1, %0
+  cmp2	%1, %0"
+)
+
+;;}}} 
+;;{{{ Branches 
+
+;; Define_expands called by the machine independent part of the compiler
+;; to allocate a new comparison register
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC 16)
+	(compare:CC (match_operand:SI 1 "register_operand"  "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator:CC 0 "ordered_comparison_operator"
+		       [(reg:CC 16) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  ""
+)
+
+
+;; Actual branches.  We must allow for the (label_ref) and the (pc) to be
+;; swapped.  If they are swapped, it reverses the sense of the branch.
+
+;; This pattern matches the (branch-if-true) branches generated above.
+;; It generates two different instruction sequences depending upon how
+;; far away the destination is.
+
+;; The calculation for the instruction length is derived as follows:
+;; The branch instruction has a 9-bit signed displacement so we have
+;; this inequality for the displacement:
+;;
+;;               -256 <= pc < 256
+;; or
+;;	   -256 + 256 <= pc + 256 < 256 + 256
+;; i.e.
+;;		    0 <= pc + 256 < 512 
+;;
+;; if we consider the displacement as an unsigned value, then negative
+;; displacements become very large positive displacements, and the
+;; inequality becomes:
+;;
+;;		pc + 256 < 512
+;;
+;; In order to allow for the fact that the real branch instruction works
+;; from pc + 2, we increase the offset to 258.
+;;
+;; Note - we do not have to worry about whether the branch is delayed or
+;; not, as branch shortening happens after delay slot reorganization.
+
+(define_insn "*branch_true"
+  [(set (pc)
+	(if_then_else (match_operator:CC 0 "comparison_operator"
+					 [(reg:CC 16)
+					  (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "*
+  {
+    if (get_attr_length (insn) == 2)
+      return \"b%b0%#\\t%l1\";
+    else
+      {
+        static char   buffer [100];
+	const char *  tmp_reg; 
+	const char *  ldi_insn;
+	
+        tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+	
+	ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\";
+
+	/* The code produced here is, for say the EQ case:
+
+	       Bne  1f
+	       LDI  <label>, r0
+	       JMP  r0
+	     1:                                         */
+	     
+	sprintf (buffer,
+	  \"b%%B0\\t1f\\t;\\n\\t%s\\t%%l1, %s\\t;\\n\\tjmp%%#\\t@%s\\t;\\n1:\",
+	  ldi_insn, tmp_reg, tmp_reg);
+ 
+        return buffer;
+    }
+  }"
+  [(set (attr "length") (if_then_else
+			  (ltu
+			    (plus
+			      (minus
+			        (match_dup 1)
+				(pc))
+			      (const_int 254))
+			    (const_int 506))
+			  (const_int 2)
+			  (if_then_else (eq_attr "size" "small")
+					(const_int 8)
+					(const_int 10))))
+   (set_attr "delay_type" "delayed")]
+)
+
+
+;; This pattern is a duplicate of the previous one, except that the
+;; branch occurs if the test is false, so the %B operator is used.
+(define_insn "*branch_false"
+  [(set (pc)
+	(if_then_else (match_operator:CC 0 "comparison_operator"
+					 [(reg:CC 16)
+					  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "*
+  {
+    if (get_attr_length (insn) == 2)
+      return \"b%B0%#\\t%l1 \";
+    else
+      {
+        static char   buffer [100];
+	const char *  tmp_reg; 
+	const char *  ldi_insn;
+	
+        tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+	
+	ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\";
+
+	sprintf (buffer,
+	  \"b%%b0\\t1f\\t;\\n\\t%s\\t%%l1, %s\\t;\\n\\tjmp%%#\\t@%s\\t;\\n1:\",
+	  ldi_insn, tmp_reg, tmp_reg);
+ 
+        return buffer;
+      }
+  }"
+  [(set (attr "length") (if_then_else (ltu (plus (minus (match_dup 1) (pc))
+						 (const_int 254))
+					   (const_int 506))
+				      (const_int 2)
+				      (if_then_else (eq_attr "size" "small")
+						    (const_int 8)
+						    (const_int 10))))
+   (set_attr "delay_type" "delayed")]
+)
+
+;;}}} 
+;;{{{ Calls & Jumps 
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+(define_insn "call"
+  [(call (match_operand 0 "call_operand" "Qm")
+	 (match_operand 1 ""             "g"))
+   (clobber (reg:SI 17))]
+  ""
+  "call%#\\t%0"
+  [(set_attr "delay_type" "delayed")]
+)
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_insn "call_value"
+  [(set (match_operand 0 "register_operand"  "=r")
+	(call (match_operand 1 "call_operand" "Qm")
+	      (match_operand 2 ""             "g")))
+   (clobber (reg:SI 17))]
+  ""
+  "call%#\\t%1"
+  [(set_attr "delay_type" "delayed")]
+)
+
+;; Normal unconditional jump.
+;; For a description of the computation of the length 
+;; attribute see the branch patterns above.
+;;
+;; Although this instruction really clobbers r0, flow
+;; relies on jump being simplejump_p in several places
+;; and as r0 is fixed, this doesn't change anything
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+  {
+    if (get_attr_length (insn) == 2)
+       return \"bra%#\\t%0\";
+    else
+      {
+        static char   buffer [100];
+	const char *  tmp_reg; 
+	const char *  ldi_insn;
+	
+        tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER];
+
+	ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\";
+
+	sprintf (buffer, \"%s\\t%%0, %s\\t;\\n\\tjmp%%#\\t@%s\\t;\",
+	  ldi_insn, tmp_reg, tmp_reg);
+ 
+        return buffer;
+      }
+  }"
+  [(set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						(const_int 254))
+					  (const_int 506))
+				     (const_int 2)
+				     (if_then_else (eq_attr "size" "small")
+						   (const_int 6)
+						   (const_int 8))))
+   (set_attr "delay_type" "delayed")]
+)
+
+;; Indirect jump through a register
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "r"))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (XEXP (operands[0], 0)) != PLUS"
+  "jmp%#\\t@%0"
+  [(set_attr "delay_type" "delayed")]
+)
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp%#\\t@%0"
+  [(set_attr "delay_type" "delayed")]
+)
+
+;;}}} 
+;;{{{ Function Prologues and Epilogues 
+
+;; Called after register allocation to add any instructions needed for the
+;; prologue.  Using a prologue insn is favored compared to putting all of the
+;; instructions in output_function_prologue(), since it allows the scheduler
+;; to intermix instructions with the saves of the caller saved registers.  In
+;; some cases, it might be necessary to emit a barrier instruction as the last
+;; insn to prevent such scheduling.
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+  "{
+  fr30_expand_prologue ();
+  DONE;
+  }"
+)
+
+;; Called after register allocation to add any instructions needed for the
+;; epilogue.  Using an epilogue insn is favored compared to putting all of the
+;; instructions in output_function_epilogue(), since it allows the scheduler
+;; to intermix instructions with the restores of the caller saved registers.
+;; In some cases, it might be necessary to emit a barrier instruction as the
+;; first insn to prevent such scheduling.
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "{
+  fr30_expand_epilogue ();
+  DONE;
+  }"
+)
+
+(define_insn "return_from_func"
+  [(return)
+   (use (reg:SI 17))]
+  "reload_completed"
+  "ret%#"
+  [(set_attr "delay_type" "delayed")]
+)
+
+(define_insn "leave_func"
+  [(set (reg:SI 15) (plus:SI (reg:SI 14) (const_int 4)))
+   (set (reg:SI 14) (mem:SI (minus:SI (reg:SI 15) (const_int 4))))]
+  "reload_completed"
+  "leave"
+)
+
+(define_expand "enter_func"
+  [(parallel
+  [(set (mem:SI (minus:SI (match_dup 1)
+			  (const_int 4)))
+	(match_dup 2))
+   (set (match_dup 2)
+	(minus:SI (match_dup 1)
+		  (const_int 4)))
+   (set (match_dup 1)
+	(minus:SI (match_dup 1)
+		  (match_operand:SI 0 "immediate_operand")))]
+  )]
+  ""
+{
+  operands[1] = stack_pointer_rtx;
+  operands[2] = hard_frame_pointer_rtx;
+})
+
+(define_insn "*enter_func"
+  [(set (mem:SI (minus:SI (reg:SI 15)
+			  (const_int 4)))
+	(reg:SI 14))
+   (set (reg:SI 14)
+	(minus:SI (reg:SI 15)
+		  (const_int 4)))
+   (set (reg:SI 15)
+	(minus:SI (reg:SI 15)
+		  (match_operand 0 "immediate_operand" "i")))]
+  "reload_completed"
+  "enter	#%0"
+  [(set_attr "delay_type" "other")]
+)
+
+;;}}} 
+;;{{{ Miscellaneous 
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+)
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
+;;}}} 
+  
+;; Local Variables:
+;; mode: md
+;; folded-file: t
+;; End:
diff --git a/gcc/config/fr30/fr30.opt b/gcc/config/fr30/fr30.opt
new file mode 100644
index 000000000..da6148a6d
--- /dev/null
+++ b/gcc/config/fr30/fr30.opt
@@ -0,0 +1,27 @@
+; Options for the FR30 port of the compiler.
+
+; Copyright (C) 2005, 2007, 2009 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msmall-model
+Target Report Mask(SMALL_MODEL)
+Assume small address space
+
+mno-lsim
+Target RejectNegative
+Assume that run-time support has been provided, so omit -lsim from the linker command line
diff --git a/gcc/config/fr30/lib1funcs.asm b/gcc/config/fr30/lib1funcs.asm
new file mode 100644
index 000000000..7c6345312
--- /dev/null
+++ b/gcc/config/fr30/lib1funcs.asm
@@ -0,0 +1,115 @@
+/* libgcc routines for the FR30.
+   Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.macro FUNC_START name
+	.text
+	.globl __\name
+	.type  __\name, @function
+__\name:
+	.endm
+
+	.macro FUNC_END name
+	.size  __\name, . - __\name
+	.endm
+
+	.macro DIV_BODY reg number
+	.if \number
+	DIV_BODY  \reg, "\number - 1"
+	div1	\reg
+	.endif
+	.endm
+	
+#ifdef L_udivsi3
+FUNC_START udivsi3
+	;; Perform an unsiged division of r4 / r5 and place the result in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0u	r5
+	DIV_BODY r5 32
+	mov	mdl, r4
+	ret
+FUNC_END udivsi3
+#endif /* L_udivsi3 */
+
+#ifdef L_divsi3
+FUNC_START divsi3
+	;; Perform a siged division of r4 / r5 and place the result in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0s	r5
+	DIV_BODY r5 32
+	div2    r5
+	div3
+	div4s
+	mov	mdl, r4
+	ret
+FUNC_END divsi3
+#endif /* L_divsi3 */
+
+#ifdef L_umodsi3
+FUNC_START umodsi3
+	;; Perform an unsiged division of r4 / r5 and places the remainder in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0u	r5
+	DIV_BODY r5 32
+	mov	mdh, r4
+	ret
+FUNC_END umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef L_modsi3
+FUNC_START modsi3
+	;; Perform a siged division of r4 / r5 and place the remainder in r4.
+	;; Does not handle overflow yet...
+	mov	r4, mdl
+	div0s	r5
+	DIV_BODY r5 32
+	div2    r5
+	div3
+	div4s
+	mov	mdh, r4
+	ret
+FUNC_END modsi3
+#endif /* L_modsi3 */
+
+#ifdef L_negsi2
+FUNC_START negsi2
+	ldi:8	#0, r0
+	sub	r4, r0
+	mov	r0, r4
+	ret
+FUNC_END negsi2
+#endif /* L_negsi2 */
+
+#ifdef L_one_cmplsi2
+FUNC_START one_cmplsi2
+	ldi:8	#0xff, r0
+	extsb	r0
+	eor	r0, r4
+	ret
+FUNC_END one_cmplsi2
+#endif /* L_one_cmplsi2 */
+
+
diff --git a/gcc/config/fr30/predicates.md b/gcc/config/fr30/predicates.md
new file mode 100644
index 000000000..9467b287d
--- /dev/null
+++ b/gcc/config/fr30/predicates.md
@@ -0,0 +1,123 @@
+;; Predicate definitions for FR30.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Returns true if OP is an integer value suitable for use in an
+;; ADDSP instruction.
+
+(define_predicate "stack_add_operand"
+  (match_code "const_int")
+{
+  return
+    (GET_CODE (op) == CONST_INT
+     && INTVAL (op) >= -512
+     && INTVAL (op) <=  508
+     && ((INTVAL (op) & 3) == 0));
+})
+
+;; Returns true if OP is hard register in the range 8 - 15.
+
+(define_predicate "high_register_operand"
+  (match_code "reg")
+{
+  return
+    (GET_CODE (op) == REG
+     && REGNO (op) <= 15
+     && REGNO (op) >= 8);
+})
+
+;; Returns true if OP is hard register in the range 0 - 7.
+
+(define_predicate "low_register_operand"
+  (match_code "reg")
+{
+  return
+    (GET_CODE (op) == REG
+     && REGNO (op) <= 7);
+})
+
+;; Returns true if OP is suitable for use in a CALL insn.
+
+(define_predicate "call_operand"
+  (match_code "mem")
+{
+  return (GET_CODE (op) == MEM
+	  && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	      || GET_CODE (XEXP (op, 0)) == REG));
+})
+
+;; Returns TRUE if OP is a valid operand of a DImode operation.
+
+(define_predicate "di_operand"
+  (match_code "const_int,const_double,reg,mem")
+{
+  if (register_operand (op, mode))
+    return TRUE;
+
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && GET_MODE (op) != DImode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  switch (GET_CODE (op))
+    {
+    case CONST_DOUBLE:
+    case CONST_INT:
+      return TRUE;
+
+    case MEM:
+      return memory_address_p (DImode, XEXP (op, 0));
+
+    default:
+      return FALSE;
+    }
+})
+
+;; Returns TRUE if OP is a DImode register or MEM.
+
+(define_predicate "nonimmediate_di_operand"
+  (match_code "reg,mem")
+{
+  if (register_operand (op, mode))
+    return TRUE;
+
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && GET_MODE (op) != DImode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) == MEM)
+    return memory_address_p (DImode, XEXP (op, 0));
+
+  return FALSE;
+})
+
+;; Returns true if OP is an integer value suitable for use in an ADD
+;; or ADD2 instruction, or if it is a register.
+
+(define_predicate "add_immediate_operand"
+  (match_code "reg,const_int")
+{
+  return
+    (GET_CODE (op) == REG
+     || (GET_CODE (op) == CONST_INT
+	 && INTVAL (op) >= -16
+	 && INTVAL (op) <=  15));
+})
diff --git a/gcc/config/fr30/t-fr30 b/gcc/config/fr30/t-fr30
new file mode 100644
index 000000000..027029223
--- /dev/null
+++ b/gcc/config/fr30/t-fr30
@@ -0,0 +1,56 @@
+# Copyright (C) 1999, 2001, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC    = fr30/lib1funcs.asm
+LIB1ASMFUNCS  = _udivsi3 _divsi3 _umodsi3 _modsi3
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/fr30/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -o $(T)crti.o -x assembler $(srcdir)/config/fr30/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/fr30/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -o $(T)crtn.o -x assembler $(srcdir)/config/fr30/crtn.asm
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+# If any special flags are necessary when building libgcc2 put them here.
+#
+# TARGET_LIBGCC2_CFLAGS
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+# Enable the following if multilibs are needed.
+# See gcc/genmultilib, gcc/gcc.texi and gcc/tm.texi for a
+# description of the options and their values.
+#
+# MULTILIB_OPTIONS    = 
+# MULTILIB_DIRNAMES   = 
+# MULTILIB_MATCHES    =
+# MULTILIB_EXCEPTIONS =
+# MULTILIB_EXTRA_OPTS = 
+#
+# LIBGCC = stmp-multilib
+# INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/freebsd-nthr.h b/gcc/config/freebsd-nthr.h
new file mode 100644
index 000000000..6554fddfe
--- /dev/null
+++ b/gcc/config/freebsd-nthr.h
@@ -0,0 +1,21 @@
+/* FreeBSD configuration setting for FreeBSD systems.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   Contributed by Loren J. Rittle <ljrittle@acm.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define FBSD_NO_THREADS
diff --git a/gcc/config/freebsd-spec.h b/gcc/config/freebsd-spec.h
new file mode 100644
index 000000000..d8b338226
--- /dev/null
+++ b/gcc/config/freebsd-spec.h
@@ -0,0 +1,144 @@
+/* Base configuration file for all FreeBSD targets.
+   Copyright (C) 1999, 2000, 2001, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Common FreeBSD configuration. 
+   All FreeBSD architectures should include this file, which will specify
+   their commonalities.
+   Adapted from gcc/config/freebsd.h by 
+   David O'Brien <obrien@FreeBSD.org>
+   Loren J. Rittle <ljrittle@acm.org>.  */
+
+
+/* In case we need to know.  */
+#define USING_CONFIG_FREEBSD_SPEC 1
+
+#define FBSD_TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_define_with_int_value ("__FreeBSD__", FBSD_MAJOR);	\
+	builtin_define_std ("unix");					\
+	builtin_define ("__KPRINTF_ATTRIBUTE__");		       	\
+	builtin_assert ("system=unix");					\
+	builtin_assert ("system=bsd");					\
+	builtin_assert ("system=FreeBSD");				\
+	FBSD_TARGET_CPU_CPP_BUILTINS();					\
+    }									\
+  while (0)
+
+/* Define the default FreeBSD-specific per-CPU hook code.  */
+#define FBSD_TARGET_CPU_CPP_BUILTINS() do {} while (0)
+
+/* Provide a CPP_SPEC appropriate for FreeBSD.  We just deal with the GCC 
+   option `-posix', and PIC issues.  */
+
+#define FBSD_CPP_SPEC "							\
+  %(cpp_cpu)								\
+  %(cpp_arch)								\
+  %{posix:-D_POSIX_SOURCE}"
+
+/* Provide a STARTFILE_SPEC appropriate for FreeBSD.  Here we add
+   the magical crtbegin.o file (see crtstuff.c) which provides part 
+	of the support for getting C++ file-scope static object constructed 
+	before entering `main'.  */
+   
+#define FBSD_STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+		       %{!p:%{profile:gcrt1.o%s} \
+			 %{!profile:crt1.o%s}}}} \
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for FreeBSD.  Here we tack on
+   the magical crtend.o file (see crtstuff.c) which provides part of 
+	the support for getting C++ file-scope static object constructed 
+	before entering `main', followed by a normal "finalizer" file, 
+	`crtn.o'.  */
+
+#define FBSD_ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+/* Provide a LIB_SPEC appropriate for FreeBSD as configured and as
+   required by the user-land thread model.  Before __FreeBSD_version
+   500016, select the appropriate libc, depending on whether we're
+   doing profiling or need threads support.  At __FreeBSD_version
+   500016 and later, when threads support is requested include both
+   -lc and the threading lib instead of only -lc_r.  To make matters
+   interesting, we can't actually use __FreeBSD_version provided by
+   <osreldate.h> directly since it breaks cross-compiling.  As a final
+   twist, make it a hard error if -pthread is provided on the command
+   line and gcc was configured with --disable-threads (this will help
+   avoid bug reports from users complaining about threading when they
+   misconfigured the gcc bootstrap but are later consulting FreeBSD
+   manual pages that refer to the mythical -pthread option).  */
+
+/* Provide a LIB_SPEC appropriate for FreeBSD.  Just select the appropriate
+   libc, depending on whether we're doing profiling or need threads support.
+   (similar to the default, except no -lg, and no -p).  */
+
+#ifdef FBSD_NO_THREADS
+#define FBSD_LIB_SPEC "							\
+  %{pthread: %eThe -pthread option is only supported on FreeBSD when gcc \
+is built with the --enable-threads configure-time option.}		\
+  %{!shared:								\
+    %{!pg: -lc}								\
+    %{pg:  -lc_p}							\
+  }"
+#else
+#if FBSD_MAJOR < 5
+#define FBSD_LIB_SPEC "							\
+  %{!shared:								\
+    %{!pg:								\
+      %{!pthread:-lc}							\
+      %{pthread:-lc_r}}							\
+    %{pg:								\
+      %{!pthread:-lc_p}							\
+      %{pthread:-lc_r_p}}						\
+  }"
+#else
+#define FBSD_LIB_SPEC "							\
+  %{!shared:								\
+    %{!pg: %{pthread:-lpthread} -lc}					\
+    %{pg:  %{pthread:-lpthread_p} -lc_p}				\
+  }									\
+  %{shared:								\
+    %{pthread:-lpthread} -lc						\
+  }"
+#endif
+#endif
+
+#if FBSD_MAJOR < 6
+#define FBSD_DYNAMIC_LINKER "/usr/libexec/ld-elf.so.1"
+#else
+#define FBSD_DYNAMIC_LINKER "/libexec/ld-elf.so.1"
+#endif
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
diff --git a/gcc/config/freebsd-stdint.h b/gcc/config/freebsd-stdint.h
new file mode 100644
index 000000000..2719e41b3
--- /dev/null
+++ b/gcc/config/freebsd-stdint.h
@@ -0,0 +1,56 @@
+/* Definitions for <stdint.h> types for FreeBSD systems.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Gerald Pfeifer <gerald@pfeifer.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE   "int"
+
+#define INT8_TYPE         "signed char"
+#define INT16_TYPE        "short int"
+#define INT32_TYPE        "int"
+#define INT64_TYPE        (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE        "unsigned char"
+#define UINT16_TYPE       "short unsigned int"
+#define UINT32_TYPE       "unsigned int"
+#define UINT64_TYPE       (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE   "signed char"
+#define INT_LEAST16_TYPE  "short int"
+#define INT_LEAST32_TYPE  "int"
+#define INT_LEAST64_TYPE  (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE  "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE    "int"
+#define INT_FAST16_TYPE   "int"
+#define INT_FAST32_TYPE   "int"
+#define INT_FAST64_TYPE   (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE   "unsigned int"
+#define UINT_FAST16_TYPE  "unsigned int"
+#define UINT_FAST32_TYPE  "unsigned int"
+#define UINT_FAST64_TYPE  (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE       (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define UINTPTR_TYPE      (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
diff --git a/gcc/config/freebsd.h b/gcc/config/freebsd.h
new file mode 100644
index 000000000..51caad294
--- /dev/null
+++ b/gcc/config/freebsd.h
@@ -0,0 +1,78 @@
+/* Base configuration file for all FreeBSD targets.
+   Copyright (C) 1999, 2000, 2001, 2007, 2008, 2009,
+   2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Common FreeBSD configuration. 
+   All FreeBSD architectures should include this file, which will specify
+   their commonalities.
+   Adapted from gcc/config/i386/freebsd-elf.h by 
+   David O'Brien <obrien@FreeBSD.org>.  
+   Further work by David O'Brien <obrien@FreeBSD.org> and
+   Loren J. Rittle <ljrittle@acm.org>.  */
+
+
+/* In case we need to know.  */
+#define USING_CONFIG_FREEBSD 1
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() FBSD_TARGET_OS_CPP_BUILTINS()
+
+#undef  CPP_SPEC
+#define CPP_SPEC FBSD_CPP_SPEC
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC FBSD_STARTFILE_SPEC
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC FBSD_ENDFILE_SPEC
+
+#undef  LIB_SPEC
+#define LIB_SPEC FBSD_LIB_SPEC
+
+/************************[  Target stuff  ]***********************************/
+
+/* All FreeBSD Architectures support the ELF object file format.  */
+#undef  OBJECT_FORMAT_ELF
+#define OBJECT_FORMAT_ELF
+
+/* Don't assume anything about the header files.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C	1
+
+/* Follow FreeBSD's standard headers (<sys/_types.h> etc...).  */
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef  WINT_TYPE
+#define WINT_TYPE "int"
+
+#define MATH_LIBRARY_PROFILE    "m_p"
+
+/* Code generation parameters.  */
+
+/* Use periods rather than dollar signs in special g++ assembler names.
+   This ensures the configuration knows our system correctly so we can link
+   with libraries compiled with the native cc.  */
+#undef NO_DOLLAR_IN_LABEL
+
+/* Used by libgcc2.c.  We support file locking with fcntl / F_SETLKW.
+   This enables the test coverage code to use file locking when exiting a
+   program, which avoids race conditions if the program has forked.  */
+#define TARGET_POSIX_IO
diff --git a/gcc/config/freebsd.opt b/gcc/config/freebsd.opt
new file mode 100644
index 000000000..2cffdb67b
--- /dev/null
+++ b/gcc/config/freebsd.opt
@@ -0,0 +1,65 @@
+; FreeBSD options.
+
+; Copyright (C) 2010, 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+assert
+Driver Separate
+
+assert=
+Driver JoinedOrMissing
+
+defsym
+Driver Separate
+
+defsym=
+Driver JoinedOrMissing
+
+posix
+Driver
+
+profile
+Driver
+
+pthread
+Driver
+
+rdynamic
+Driver
+
+rpath-link
+Driver Separate
+
+rpath-link=
+Driver JoinedOrMissing
+
+rpath=
+Driver JoinedOrMissing
+
+soname
+Driver Separate
+
+soname=
+Driver JoinedOrMissing
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/frv/cmovd.c b/gcc/config/frv/cmovd.c
new file mode 100644
index 000000000..e46070aac
--- /dev/null
+++ b/gcc/config/frv/cmovd.c
@@ -0,0 +1,51 @@
+/* Move double-word library function.
+   Copyright (C) 2000, 2003, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+void
+__cmovd (long long *dest, const long long *src, unsigned len)
+{
+  unsigned i;
+  unsigned num = len >> 3;
+  unsigned xlen = len & ~7;
+  char *dest_byte = (char *)dest;
+  const char *src_byte = (const char *)src;
+
+  if (dest_byte < src_byte || dest_byte > src_byte+len)
+    {
+      for (i = 0; i < num; i++)
+	dest[i] = src[i];
+
+      while (len > xlen)
+	{
+	  dest_byte[xlen] = src_byte[xlen];
+	  xlen++;
+	}
+    }
+  else
+    {
+      while (len-- > 0)
+	dest_byte[len] = src_byte[len];
+    }
+}
diff --git a/gcc/config/frv/cmovh.c b/gcc/config/frv/cmovh.c
new file mode 100644
index 000000000..6b0901d95
--- /dev/null
+++ b/gcc/config/frv/cmovh.c
@@ -0,0 +1,47 @@
+/* Move half-word library function.
+   Copyright (C) 2000, 2003, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+void
+__cmovh (short *dest, const short *src, unsigned len)
+{
+  unsigned i;
+  unsigned num = len >> 1;
+  char *dest_byte = (char *)dest;
+  const char *src_byte = (const char *)src;
+
+  if (dest_byte < src_byte || dest_byte > src_byte+len)
+    {
+      for (i = 0; i < num; i++)
+	dest[i] = src[i];
+
+      if ((len & 1) != 0)
+	dest_byte[len-1] = src_byte[len-1];
+    }
+  else
+    {
+      while (len-- > 0)
+	dest_byte[len] = src_byte[len];
+    }
+}
diff --git a/gcc/config/frv/cmovw.c b/gcc/config/frv/cmovw.c
new file mode 100644
index 000000000..f27db75aa
--- /dev/null
+++ b/gcc/config/frv/cmovw.c
@@ -0,0 +1,51 @@
+/* Move word library function.
+   Copyright (C) 2000, 2003, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+void
+__cmovw (int *dest, const int *src, unsigned len)
+{
+  unsigned i;
+  unsigned num = len >> 2;
+  unsigned xlen = len & ~3;
+  char *dest_byte = (char *)dest;
+  const char *src_byte = (const char *)src;
+
+  if (dest_byte < src_byte || dest_byte > src_byte+len)
+    {
+      for (i = 0; i < num; i++)
+	dest[i] = src[i];
+
+      while (len > xlen)
+	{
+	  dest_byte[xlen] = src_byte[xlen];
+	  xlen++;
+	}
+    }
+  else
+    {
+      while (len-- > 0)
+	dest_byte[len] = src_byte[len];
+    }
+}
diff --git a/gcc/config/frv/constraints.md b/gcc/config/frv/constraints.md
new file mode 100644
index 000000000..792706b03
--- /dev/null
+++ b/gcc/config/frv/constraints.md
@@ -0,0 +1,174 @@
+;; Constraint definitions for FRV.
+;; Copyright (C) 2001 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "ACC_REGS"
+  "@internal")
+
+(define_register_constraint "b" "EVEN_ACC_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CC_REGS"
+  "@internal")
+
+(define_register_constraint "d" "GPR_REGS"
+  "@internal")
+
+(define_register_constraint "e" "EVEN_REGS"
+  "@internal")
+
+(define_register_constraint "f" "FPR_REGS"
+  "@internal")
+
+(define_register_constraint "h" "FEVEN_REGS"
+  "@internal")
+
+(define_register_constraint "l" "LR_REG"
+  "@internal")
+
+(define_register_constraint "q" "QUAD_REGS"
+  "@internal")
+
+(define_register_constraint "t" "ICC_REGS"
+  "@internal")
+
+(define_register_constraint "u" "FCC_REGS"
+  "@internal")
+
+(define_register_constraint "v" "ICR_REGS"
+  "@internal")
+
+(define_register_constraint "w" "FCR_REGS"
+  "@internal")
+
+(define_register_constraint "x" "QUAD_FPR_REGS"
+  "@internal")
+
+(define_register_constraint "y" "LCR_REG"
+  "@internal")
+
+(define_register_constraint "z" "SPR_REGS"
+  "@internal")
+
+(define_register_constraint "A" "QUAD_ACC_REGS"
+  "@internal")
+
+(define_register_constraint "B" "ACCG_REGS"
+  "@internal")
+
+(define_register_constraint "C" "CR_REGS"
+  "@internal")
+
+(define_register_constraint "D89" "GR89_REGS"
+  "@internal")
+
+(define_register_constraint "D09" "GR9_REGS"
+  "@internal")
+
+(define_register_constraint "D08" "GR8_REGS"
+  "@internal")
+
+(define_register_constraint "D14" "FDPIC_FPTR_REGS"
+  "@internal")
+
+(define_register_constraint "D15" "FDPIC_REGS"
+  "@internal")
+
+(define_register_constraint "W" "FDPIC_CALL_REGS"
+  "@internal")
+
+(define_register_constraint "Z" "FDPIC_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "A signed 6-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32, 31)")))
+
+(define_constraint "J"
+  "A signed 10-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -512, 511)")))
+
+(define_constraint "K"
+  "@internal"
+  ;; Unused.
+  (and (match_code "const_int")
+       (match_test "0")))
+
+(define_constraint "L"
+  "A signed 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "M"
+  "An unsigned 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "N"
+  "A signed 12-bit immediate that is negative."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -2048, -1)")))
+
+(define_constraint "O"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P"
+  "A signed 12-bit immediate that is positive."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 2047)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (ior (and (match_test "mode == VOIDmode")
+		 (match_test "hval == 0 && lval == 0"))
+	    (and (match_test "mode == SFmode || mode == DFmode")
+		 (match_test "op == CONST0_RTX (mode)")))))
+
+(define_constraint "H"
+  "@internal"
+  ;; Unused.
+  (and (match_code "const_double")
+       (match_test "0")))
+
+(define_constraint "Q"
+  "12-bit relocations."
+  (match_test "got12_operand (op, mode)"))
+
+(define_memory_constraint "R"
+  "Double word memory ops that take one instruction."
+  (match_test "dbl_memory_one_insn_operand (op, mode)"))
+
+(define_constraint "S"
+  "SYMBOL_REF."
+  (match_test "CONSTANT_P (op) && call_operand (op, VOIDmode)"))
+
+(define_memory_constraint "T"
+  "Double word memory ops that take two instructions."
+  (match_test "dbl_memory_two_insn_operand (op, mode)"))
+
+(define_memory_constraint "U"
+  "Memory operand for conditional execution."
+  (match_test "condexec_memory_operand (op, mode)"))
diff --git a/gcc/config/frv/frv-asm.h b/gcc/config/frv/frv-asm.h
new file mode 100644
index 000000000..c36440df8
--- /dev/null
+++ b/gcc/config/frv/frv-asm.h
@@ -0,0 +1,48 @@
+/* Assembler Support.
+   Copyright (C) 2000, 2007 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation * either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* P(INSN): Emit INSN.P for VLIW machines, otherwise emit plain INSN.
+   P2(INSN): Emit INSN.P on the FR500 and above, otherwise emit plain INSN.  */
+#ifdef __FRV_VLIW__
+#ifdef __STDC__
+#define P(A) A.p
+#else
+#define P(A) A/**/.p
+#endif
+#if __FRV_VLIW__ > 2
+#define P2(A) P(A)
+#else
+#define P2(A) A
+#endif
+#else
+#define P(A) A
+#define P2(A) A
+#endif
+
+/* Add underscore if necessary to external name.  */
+#ifdef __FRV_UNDERSCORE__
+#ifdef __STDC__
+#define EXT(NAME) _##NAME
+#else
+#define EXT(NAME) _/**/NAME
+#endif
+#else
+#define EXT(NAME) NAME
+#endif
diff --git a/gcc/config/frv/frv-modes.def b/gcc/config/frv/frv-modes.def
new file mode 100644
index 000000000..ca0094a05
--- /dev/null
+++ b/gcc/config/frv/frv-modes.def
@@ -0,0 +1,34 @@
+/* Definitions of target machine for GNU compiler for FRV.
+   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* On the FRV, the CC modes used are:
+
+   CCmode	set ICCs from comparing signed integers
+   CC_UNSmode	set ICCs from comparing unsigned integers
+   CC_NZmode	set ICCs for comparisons that just need the Z and N flags
+   CC_FPmode	set FCCs from comparing floating point
+   CC_CCRmode	set CCRs to do conditional execution */
+
+CC_MODE (CC_UNS);
+CC_MODE (CC_NZ);
+CC_MODE (CC_FP);
+CC_MODE (CC_CCR);
+
+VECTOR_MODE (INT, QI, 4);     /*                 V4QI */
+VECTOR_MODE (INT, SI, 4);     /*                 V4SI */
diff --git a/gcc/config/frv/frv-protos.h b/gcc/config/frv/frv-protos.h
new file mode 100644
index 000000000..04a3f9242
--- /dev/null
+++ b/gcc/config/frv/frv-protos.h
@@ -0,0 +1,195 @@
+/* Frv prototypes.
+   Copyright (C) 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* CPU type.  This must be identical to the cpu enumeration in frv.md.  */
+typedef enum frv_cpu
+{
+  FRV_CPU_GENERIC,
+  FRV_CPU_FR550,
+  FRV_CPU_FR500,
+  FRV_CPU_FR450,
+  FRV_CPU_FR405,
+  FRV_CPU_FR400,
+  FRV_CPU_FR300,
+  FRV_CPU_SIMPLE,
+  FRV_CPU_TOMCAT
+} frv_cpu_t;
+
+extern frv_cpu_t frv_cpu_type;			/* value of -mcpu= */
+
+/* Define functions defined in frv.c */
+extern void frv_expand_prologue			(void);
+extern void frv_expand_epilogue			(bool);
+extern frv_stack_t *frv_stack_info		(void);
+extern void frv_debug_stack			(frv_stack_t *);
+extern int frv_initial_elimination_offset	(int, int);
+
+#ifdef RTX_CODE
+extern int frv_legitimate_address_p_1		(enum machine_mode, rtx,
+						 int, int, int);
+extern rtx frv_find_base_term			(rtx);
+
+#ifdef TREE_CODE
+extern void frv_init_cumulative_args		(CUMULATIVE_ARGS *, tree,
+						 rtx, tree, int);
+
+extern bool frv_function_value_regno_p		(const unsigned int);
+#endif /* TREE_CODE */
+
+extern int frv_expand_block_move		(rtx *);
+extern int frv_expand_block_clear		(rtx *);
+extern rtx frv_dynamic_chain_address		(rtx);
+extern rtx frv_return_addr_rtx			(int, rtx);
+extern rtx frv_index_memory			(rtx, enum machine_mode, int);
+extern const char *frv_asm_output_opcode
+				 	(FILE *, const char *);
+extern void frv_final_prescan_insn	(rtx, rtx *, int);
+extern void frv_emit_move		(enum machine_mode, rtx, rtx);
+extern int frv_emit_movsi		(rtx, rtx);
+extern const char *output_move_single	(rtx *, rtx);
+extern const char *output_move_double	(rtx *, rtx);
+extern const char *output_condmove_single
+					(rtx *, rtx);
+extern int frv_emit_cond_branch		(rtx *);
+extern int frv_emit_scc			(rtx *);
+extern rtx frv_split_scc		(rtx, rtx, rtx, rtx, HOST_WIDE_INT);
+extern int frv_emit_cond_move		(rtx, rtx, rtx, rtx);
+extern rtx frv_split_cond_move		(rtx *);
+extern rtx frv_split_minmax		(rtx *);
+extern rtx frv_split_abs		(rtx *);
+extern void frv_split_double_load	(rtx, rtx);
+extern void frv_split_double_store	(rtx, rtx);
+#ifdef BB_HEAD
+extern void frv_ifcvt_init_extra_fields	(ce_if_block_t *);
+extern void frv_ifcvt_modify_tests	(ce_if_block_t *, rtx *, rtx *);
+extern void frv_ifcvt_modify_multiple_tests
+					(ce_if_block_t *, basic_block,
+					 rtx *, rtx *);
+extern rtx frv_ifcvt_modify_insn	(ce_if_block_t *, rtx, rtx);
+extern void frv_ifcvt_modify_final	(ce_if_block_t *);
+extern void frv_ifcvt_modify_cancel	(ce_if_block_t *);
+#endif
+extern enum reg_class frv_secondary_reload_class
+					(enum reg_class,
+					 enum machine_mode, rtx);
+extern int frv_hard_regno_mode_ok	(int, enum machine_mode);
+extern int frv_hard_regno_nregs		(int, enum machine_mode);
+extern int frv_class_max_nregs		(enum reg_class rclass,
+					 enum machine_mode mode);
+extern int frv_legitimate_constant_p	(rtx);
+extern enum machine_mode frv_select_cc_mode (enum rtx_code, rtx, rtx);
+#endif	/* RTX_CODE */
+
+extern int frv_trampoline_size		(void);
+extern int direct_return_p		(void);
+extern int frv_issue_rate		(void);
+extern int frv_acc_group		(rtx);
+
+#ifdef TREE_CODE
+extern int frv_adjust_field_align	(tree, int);
+#endif
+
+#ifdef RTX_CODE
+extern int integer_register_operand	(rtx, enum machine_mode);
+extern int frv_load_operand		(rtx, enum machine_mode);
+extern int gpr_or_fpr_operand		(rtx, enum machine_mode);
+extern int gpr_no_subreg_operand	(rtx, enum machine_mode);
+extern int gpr_or_int6_operand		(rtx, enum machine_mode);
+extern int fpr_or_int6_operand		(rtx, enum machine_mode);
+extern int gpr_or_int_operand		(rtx, enum machine_mode);
+extern int gpr_or_int12_operand		(rtx, enum machine_mode);
+extern int gpr_fpr_or_int12_operand	(rtx, enum machine_mode);
+extern int gpr_or_int10_operand		(rtx, enum machine_mode);
+extern int move_source_operand		(rtx, enum machine_mode);
+extern int move_destination_operand	(rtx, enum machine_mode);
+extern int condexec_source_operand	(rtx, enum machine_mode);
+extern int condexec_dest_operand	(rtx, enum machine_mode);
+extern int lr_operand			(rtx, enum machine_mode);
+extern int gpr_or_memory_operand	(rtx, enum machine_mode);
+extern int fpr_or_memory_operand	(rtx, enum machine_mode);
+extern int reg_or_0_operand		(rtx, enum machine_mode);
+extern int fcc_operand			(rtx, enum machine_mode);
+extern int icc_operand			(rtx, enum machine_mode);
+extern int cc_operand			(rtx, enum machine_mode);
+extern int fcr_operand			(rtx, enum machine_mode);
+extern int icr_operand			(rtx, enum machine_mode);
+extern int cr_operand			(rtx, enum machine_mode);
+extern int call_operand			(rtx, enum machine_mode);
+extern int fpr_operand			(rtx, enum machine_mode);
+extern int even_reg_operand		(rtx, enum machine_mode);
+extern int odd_reg_operand		(rtx, enum machine_mode);
+extern int even_gpr_operand		(rtx, enum machine_mode);
+extern int odd_gpr_operand		(rtx, enum machine_mode);
+extern int quad_fpr_operand		(rtx, enum machine_mode);
+extern int even_fpr_operand		(rtx, enum machine_mode);
+extern int odd_fpr_operand		(rtx, enum machine_mode);
+extern int dbl_memory_one_insn_operand	(rtx, enum machine_mode);
+extern int dbl_memory_two_insn_operand	(rtx, enum machine_mode);
+extern int int12_operand		(rtx, enum machine_mode);
+extern int int6_operand			(rtx, enum machine_mode);
+extern int int5_operand			(rtx, enum machine_mode);
+extern int uint5_operand		(rtx, enum machine_mode);
+extern int uint4_operand		(rtx, enum machine_mode);
+extern int uint1_operand		(rtx, enum machine_mode);
+extern int int_2word_operand		(rtx, enum machine_mode);
+extern int pic_register_operand		(rtx, enum machine_mode);
+extern int pic_symbolic_operand		(rtx, enum machine_mode);
+extern int small_data_register_operand	(rtx, enum machine_mode);
+extern int small_data_symbolic_operand	(rtx, enum machine_mode);
+extern int upper_int16_operand		(rtx, enum machine_mode);
+extern int uint16_operand		(rtx, enum machine_mode);
+extern int symbolic_operand		(rtx, enum machine_mode);
+extern int relational_operator		(rtx, enum machine_mode);
+extern int signed_relational_operator	(rtx, enum machine_mode);
+extern int unsigned_relational_operator	(rtx, enum machine_mode);
+extern int float_relational_operator	(rtx, enum machine_mode);
+extern int ccr_eqne_operator		(rtx, enum machine_mode);
+extern int minmax_operator		(rtx, enum machine_mode);
+extern int condexec_si_binary_operator	(rtx, enum machine_mode);
+extern int condexec_si_media_operator	(rtx, enum machine_mode);
+extern int condexec_si_divide_operator	(rtx, enum machine_mode);
+extern int condexec_si_unary_operator	(rtx, enum machine_mode);
+extern int condexec_sf_conv_operator	(rtx, enum machine_mode);
+extern int condexec_sf_add_operator	(rtx, enum machine_mode);
+extern int condexec_memory_operand	(rtx, enum machine_mode);
+extern int intop_compare_operator	(rtx, enum machine_mode);
+extern int acc_operand			(rtx, enum machine_mode);
+extern int even_acc_operand		(rtx, enum machine_mode);
+extern int quad_acc_operand		(rtx, enum machine_mode);
+extern int accg_operand			(rtx, enum machine_mode);
+extern rtx frv_matching_accg_for_acc	(rtx);
+extern void frv_expand_fdpic_call	(rtx *, bool, bool);
+extern rtx frv_gen_GPsym2reg		(rtx, rtx);
+extern int frv_legitimate_memory_operand (rtx, enum machine_mode, int);
+
+/* Information about a relocation unspec.  SYMBOL is the relocation symbol
+   (a SYMBOL_REF or LABEL_REF), RELOC is the type of relocation and OFFSET
+   is the constant addend.  */
+struct frv_unspec {
+  rtx symbol;
+  int reloc;
+  HOST_WIDE_INT offset;
+};
+
+extern bool frv_const_unspec_p (rtx, struct frv_unspec *);
+
+#endif
+
diff --git a/gcc/config/frv/frv.c b/gcc/config/frv/frv.c
new file mode 100644
index 000000000..229b9feb7
--- /dev/null
+++ b/gcc/config/frv/frv.c
@@ -0,0 +1,9680 @@
+/* Copyright (C) 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007,
+   2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "expr.h"
+#include "obstack.h"
+#include "except.h"
+#include "function.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "targhooks.h"
+#include "integrate.h"
+#include "langhooks.h"
+#include "df.h"
+
+#ifndef FRV_INLINE
+#define FRV_INLINE inline
+#endif
+
+/* The maximum number of distinct NOP patterns.  There are three:
+   nop, fnop and mnop.  */
+#define NUM_NOP_PATTERNS 3
+
+/* Classification of instructions and units: integer, floating-point/media,
+   branch and control.  */
+enum frv_insn_group { GROUP_I, GROUP_FM, GROUP_B, GROUP_C, NUM_GROUPS };
+
+/* The DFA names of the units, in packet order.  */
+static const char *const frv_unit_names[] =
+{
+  "c",
+  "i0", "f0",
+  "i1", "f1",
+  "i2", "f2",
+  "i3", "f3",
+  "b0", "b1"
+};
+
+/* The classification of each unit in frv_unit_names[].  */
+static const enum frv_insn_group frv_unit_groups[ARRAY_SIZE (frv_unit_names)] =
+{
+  GROUP_C,
+  GROUP_I, GROUP_FM,
+  GROUP_I, GROUP_FM,
+  GROUP_I, GROUP_FM,
+  GROUP_I, GROUP_FM,
+  GROUP_B, GROUP_B
+};
+
+/* Return the DFA unit code associated with the Nth unit of integer
+   or floating-point group GROUP,  */
+#define NTH_UNIT(GROUP, N) frv_unit_codes[(GROUP) + (N) * 2 + 1]
+
+/* Return the number of integer or floating-point unit UNIT
+   (1 for I1, 2 for F2, etc.).  */
+#define UNIT_NUMBER(UNIT) (((UNIT) - 1) / 2)
+
+/* The DFA unit number for each unit in frv_unit_names[].  */
+static int frv_unit_codes[ARRAY_SIZE (frv_unit_names)];
+
+/* FRV_TYPE_TO_UNIT[T] is the last unit in frv_unit_names[] that can issue
+   an instruction of type T.  The value is ARRAY_SIZE (frv_unit_names) if
+   no instruction of type T has been seen.  */
+static unsigned int frv_type_to_unit[TYPE_UNKNOWN + 1];
+
+/* An array of dummy nop INSNs, one for each type of nop that the
+   target supports.  */
+static GTY(()) rtx frv_nops[NUM_NOP_PATTERNS];
+
+/* The number of nop instructions in frv_nops[].  */
+static unsigned int frv_num_nops;
+
+  /* The type of access.  FRV_IO_UNKNOWN means the access can be either
+     a read or a write.  */
+enum frv_io_type { FRV_IO_UNKNOWN, FRV_IO_READ, FRV_IO_WRITE };
+
+/* Information about one __builtin_read or __builtin_write access, or
+   the combination of several such accesses.  The most general value
+   is all-zeros (an unknown access to an unknown address).  */
+struct frv_io {
+  enum frv_io_type type;
+
+  /* The constant address being accessed, or zero if not known.  */
+  HOST_WIDE_INT const_address;
+
+  /* The run-time address, as used in operand 0 of the membar pattern.  */
+  rtx var_address;
+};
+
+/* Return true if instruction INSN should be packed with the following
+   instruction.  */
+#define PACKING_FLAG_P(INSN) (GET_MODE (INSN) == TImode)
+
+/* Set the value of PACKING_FLAG_P(INSN).  */
+#define SET_PACKING_FLAG(INSN) PUT_MODE (INSN, TImode)
+#define CLEAR_PACKING_FLAG(INSN) PUT_MODE (INSN, VOIDmode)
+
+/* Loop with REG set to each hard register in rtx X.  */
+#define FOR_EACH_REGNO(REG, X)						\
+  for (REG = REGNO (X);							\
+       REG < REGNO (X) + HARD_REGNO_NREGS (REGNO (X), GET_MODE (X));	\
+       REG++)
+
+/* This structure contains machine specific function data.  */
+struct GTY(()) machine_function
+{
+  /* True if we have created an rtx that relies on the stack frame.  */
+  int frame_needed;
+
+  /* True if this function contains at least one __builtin_{read,write}*.  */
+  bool has_membar_p;
+};
+
+/* Temporary register allocation support structure.  */
+typedef struct frv_tmp_reg_struct
+  {
+    HARD_REG_SET regs;		/* possible registers to allocate */
+    int next_reg[N_REG_CLASSES];	/* next register to allocate per class */
+  }
+frv_tmp_reg_t;
+
+/* Register state information for VLIW re-packing phase.  */
+#define REGSTATE_CC_MASK	0x07	/* Mask to isolate CCn for cond exec */
+#define REGSTATE_MODIFIED	0x08	/* reg modified in current VLIW insn */
+#define REGSTATE_IF_TRUE	0x10	/* reg modified in cond exec true */
+#define REGSTATE_IF_FALSE	0x20	/* reg modified in cond exec false */
+
+#define REGSTATE_IF_EITHER	(REGSTATE_IF_TRUE | REGSTATE_IF_FALSE)
+
+typedef unsigned char regstate_t;
+
+/* Used in frv_frame_accessor_t to indicate the direction of a register-to-
+   memory move.  */
+enum frv_stack_op
+{
+  FRV_LOAD,
+  FRV_STORE
+};
+
+/* Information required by frv_frame_access.  */
+typedef struct
+{
+  /* This field is FRV_LOAD if registers are to be loaded from the stack and
+     FRV_STORE if they should be stored onto the stack.  FRV_STORE implies
+     the move is being done by the prologue code while FRV_LOAD implies it
+     is being done by the epilogue.  */
+  enum frv_stack_op op;
+
+  /* The base register to use when accessing the stack.  This may be the
+     frame pointer, stack pointer, or a temporary.  The choice of register
+     depends on which part of the frame is being accessed and how big the
+     frame is.  */
+  rtx base;
+
+  /* The offset of BASE from the bottom of the current frame, in bytes.  */
+  int base_offset;
+} frv_frame_accessor_t;
+
+/* Conditional execution support gathered together in one structure.  */
+typedef struct
+  {
+    /* Linked list of insns to add if the conditional execution conversion was
+       successful.  Each link points to an EXPR_LIST which points to the pattern
+       of the insn to add, and the insn to be inserted before.  */
+    rtx added_insns_list;
+
+    /* Identify which registers are safe to allocate for if conversions to
+       conditional execution.  We keep the last allocated register in the
+       register classes between COND_EXEC statements.  This will mean we allocate
+       different registers for each different COND_EXEC group if we can.  This
+       might allow the scheduler to intermix two different COND_EXEC sections.  */
+    frv_tmp_reg_t tmp_reg;
+
+    /* For nested IFs, identify which CC registers are used outside of setting
+       via a compare isnsn, and using via a check insn.  This will allow us to
+       know if we can rewrite the register to use a different register that will
+       be paired with the CR register controlling the nested IF-THEN blocks.  */
+    HARD_REG_SET nested_cc_ok_rewrite;
+
+    /* Temporary registers allocated to hold constants during conditional
+       execution.  */
+    rtx scratch_regs[FIRST_PSEUDO_REGISTER];
+
+    /* Current number of temp registers available.  */
+    int cur_scratch_regs;
+
+    /* Number of nested conditional execution blocks.  */
+    int num_nested_cond_exec;
+
+    /* Map of insns that set up constants in scratch registers.  */
+    bitmap scratch_insns_bitmap;
+
+    /* Conditional execution test register (CC0..CC7).  */
+    rtx cr_reg;
+
+    /* Conditional execution compare register that is paired with cr_reg, so that
+       nested compares can be done.  The csubcc and caddcc instructions don't
+       have enough bits to specify both a CC register to be set and a CR register
+       to do the test on, so the same bit number is used for both.  Needless to
+       say, this is rather inconvenient for GCC.  */
+    rtx nested_cc_reg;
+
+    /* Extra CR registers used for &&, ||.  */
+    rtx extra_int_cr;
+    rtx extra_fp_cr;
+
+    /* Previous CR used in nested if, to make sure we are dealing with the same
+       nested if as the previous statement.  */
+    rtx last_nested_if_cr;
+  }
+frv_ifcvt_t;
+
+static /* GTY(()) */ frv_ifcvt_t frv_ifcvt;
+
+/* Map register number to smallest register class.  */
+enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+/* Cached value of frv_stack_info.  */
+static frv_stack_t *frv_stack_cache = (frv_stack_t *)0;
+
+/* -mcpu= support */
+frv_cpu_t frv_cpu_type = CPU_TYPE;	/* value of -mcpu= */
+
+/* Forward references */
+
+static bool frv_handle_option			(size_t, const char *, int);
+static void frv_option_override			(void);
+static bool frv_legitimate_address_p		(enum machine_mode, rtx, bool);
+static int frv_default_flags_for_cpu		(void);
+static int frv_string_begins_with		(const_tree, const char *);
+static FRV_INLINE bool frv_small_data_reloc_p	(rtx, int);
+static void frv_print_operand			(FILE *, rtx, int);
+static void frv_print_operand_address		(FILE *, rtx);
+static bool frv_print_operand_punct_valid_p	(unsigned char code);
+static void frv_print_operand_memory_reference_reg
+						(FILE *, rtx);
+static void frv_print_operand_memory_reference	(FILE *, rtx, int);
+static int frv_print_operand_jump_hint		(rtx);
+static const char *comparison_string		(enum rtx_code, rtx);
+static rtx frv_function_value			(const_tree, const_tree,
+						 bool);
+static rtx frv_libcall_value			(enum machine_mode,
+						 const_rtx);
+static FRV_INLINE int frv_regno_ok_for_base_p	(int, int);
+static rtx single_set_pattern			(rtx);
+static int frv_function_contains_far_jump	(void);
+static rtx frv_alloc_temp_reg			(frv_tmp_reg_t *,
+						 enum reg_class,
+						 enum machine_mode,
+						 int, int);
+static rtx frv_frame_offset_rtx			(int);
+static rtx frv_frame_mem			(enum machine_mode, rtx, int);
+static rtx frv_dwarf_store			(rtx, int);
+static void frv_frame_insn			(rtx, rtx);
+static void frv_frame_access			(frv_frame_accessor_t*,
+						 rtx, int);
+static void frv_frame_access_multi		(frv_frame_accessor_t*,
+						 frv_stack_t *, int);
+static void frv_frame_access_standard_regs	(enum frv_stack_op,
+						 frv_stack_t *);
+static struct machine_function *frv_init_machine_status		(void);
+static rtx frv_int_to_acc			(enum insn_code, int, rtx);
+static enum machine_mode frv_matching_accg_mode	(enum machine_mode);
+static rtx frv_read_argument			(tree, unsigned int);
+static rtx frv_read_iacc_argument		(enum machine_mode, tree, unsigned int);
+static int frv_check_constant_argument		(enum insn_code, int, rtx);
+static rtx frv_legitimize_target		(enum insn_code, rtx);
+static rtx frv_legitimize_argument		(enum insn_code, int, rtx);
+static rtx frv_legitimize_tls_address		(rtx, enum tls_model);
+static rtx frv_legitimize_address		(rtx, rtx, enum machine_mode);
+static rtx frv_expand_set_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_unop_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_binop_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_cut_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_binopimm_builtin		(enum insn_code, tree, rtx);
+static rtx frv_expand_voidbinop_builtin		(enum insn_code, tree);
+static rtx frv_expand_int_void2arg		(enum insn_code, tree);
+static rtx frv_expand_prefetches		(enum insn_code, tree);
+static rtx frv_expand_voidtriop_builtin		(enum insn_code, tree);
+static rtx frv_expand_voidaccop_builtin		(enum insn_code, tree);
+static rtx frv_expand_mclracc_builtin		(tree);
+static rtx frv_expand_mrdacc_builtin		(enum insn_code, tree);
+static rtx frv_expand_mwtacc_builtin		(enum insn_code, tree);
+static rtx frv_expand_noargs_builtin		(enum insn_code);
+static void frv_split_iacc_move			(rtx, rtx);
+static rtx frv_emit_comparison			(enum rtx_code, rtx, rtx);
+static int frv_clear_registers_used		(rtx *, void *);
+static void frv_ifcvt_add_insn			(rtx, rtx, int);
+static rtx frv_ifcvt_rewrite_mem		(rtx, enum machine_mode, rtx);
+static rtx frv_ifcvt_load_value			(rtx, rtx);
+static int frv_acc_group_1			(rtx *, void *);
+static unsigned int frv_insn_unit		(rtx);
+static bool frv_issues_to_branch_unit_p		(rtx);
+static int frv_cond_flags 			(rtx);
+static bool frv_regstate_conflict_p 		(regstate_t, regstate_t);
+static int frv_registers_conflict_p_1 		(rtx *, void *);
+static bool frv_registers_conflict_p 		(rtx);
+static void frv_registers_update_1 		(rtx, const_rtx, void *);
+static void frv_registers_update 		(rtx);
+static void frv_start_packet 			(void);
+static void frv_start_packet_block 		(void);
+static void frv_finish_packet 			(void (*) (void));
+static bool frv_pack_insn_p 			(rtx);
+static void frv_add_insn_to_packet		(rtx);
+static void frv_insert_nop_in_packet		(rtx);
+static bool frv_for_each_packet 		(void (*) (void));
+static bool frv_sort_insn_group_1		(enum frv_insn_group,
+						 unsigned int, unsigned int,
+						 unsigned int, unsigned int,
+						 state_t);
+static int frv_compare_insns			(const void *, const void *);
+static void frv_sort_insn_group			(enum frv_insn_group);
+static void frv_reorder_packet 			(void);
+static void frv_fill_unused_units		(enum frv_insn_group);
+static void frv_align_label 			(void);
+static void frv_reorg_packet 			(void);
+static void frv_register_nop			(rtx);
+static void frv_reorg 				(void);
+static void frv_pack_insns			(void);
+static void frv_function_prologue		(FILE *, HOST_WIDE_INT);
+static void frv_function_epilogue		(FILE *, HOST_WIDE_INT);
+static bool frv_assemble_integer		(rtx, unsigned, int);
+static void frv_init_builtins			(void);
+static rtx frv_expand_builtin			(tree, rtx, rtx, enum machine_mode, int);
+static void frv_init_libfuncs			(void);
+static bool frv_in_small_data_p			(const_tree);
+static void frv_asm_output_mi_thunk
+  (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void frv_setup_incoming_varargs		(CUMULATIVE_ARGS *,
+						 enum machine_mode,
+						 tree, int *, int);
+static rtx frv_expand_builtin_saveregs		(void);
+static void frv_expand_builtin_va_start		(tree, rtx);
+static bool frv_rtx_costs			(rtx, int, int, int*, bool);
+static int frv_register_move_cost		(enum machine_mode,
+						 reg_class_t, reg_class_t);
+static int frv_memory_move_cost			(enum machine_mode,
+						 reg_class_t, bool);
+static void frv_asm_out_constructor		(rtx, int);
+static void frv_asm_out_destructor		(rtx, int);
+static bool frv_function_symbol_referenced_p	(rtx);
+static bool frv_cannot_force_const_mem		(rtx);
+static const char *unspec_got_name		(int);
+static void frv_output_const_unspec		(FILE *,
+						 const struct frv_unspec *);
+static bool frv_function_ok_for_sibcall		(tree, tree);
+static rtx frv_struct_value_rtx			(tree, int);
+static bool frv_must_pass_in_stack (enum machine_mode mode, const_tree type);
+static int frv_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				  tree, bool);
+static rtx frv_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static rtx frv_function_incoming_arg (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static void frv_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static unsigned int frv_function_arg_boundary	(enum machine_mode,
+						 const_tree);
+static void frv_output_dwarf_dtprel		(FILE *, int, rtx)
+  ATTRIBUTE_UNUSED;
+static reg_class_t frv_secondary_reload		(bool, rtx, reg_class_t,
+						 enum machine_mode,
+						 secondary_reload_info *);
+static bool frv_frame_pointer_required		(void);
+static bool frv_can_eliminate			(const int, const int);
+static void frv_conditional_register_usage	(void);
+static void frv_trampoline_init			(rtx, tree, rtx);
+static bool frv_class_likely_spilled_p 		(reg_class_t);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options frv_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Allow us to easily change the default for -malloc-cc.  */
+#ifndef DEFAULT_NO_ALLOC_CC
+#define MASK_DEFAULT_ALLOC_CC	MASK_ALLOC_CC
+#else
+#define MASK_DEFAULT_ALLOC_CC	0
+#endif
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND frv_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS frv_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P frv_print_operand_punct_valid_p
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE frv_function_prologue
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE frv_function_epilogue
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER frv_assemble_integer
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_DEFAULT_ALLOC_CC			\
+   | MASK_COND_MOVE				\
+   | MASK_SCC					\
+   | MASK_COND_EXEC				\
+   | MASK_VLIW_BRANCH				\
+   | MASK_MULTI_CE				\
+   | MASK_NESTED_CE)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION frv_handle_option
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE frv_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE frv_option_optimization_table
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS frv_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN frv_expand_builtin
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS frv_init_libfuncs
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P frv_in_small_data_p
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST frv_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST frv_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS frv_rtx_costs
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR frv_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR frv_asm_out_destructor
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK frv_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE frv_issue_rate
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS frv_legitimize_address
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL frv_function_ok_for_sibcall
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM frv_cannot_force_const_mem
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX frv_struct_value_rtx
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK frv_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES frv_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG frv_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG frv_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE frv_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY frv_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS frv_expand_builtin_saveregs
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS frv_setup_incoming_varargs
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG frv_reorg
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START frv_expand_builtin_va_start
+
+#if HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL frv_output_dwarf_dtprel
+#endif
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P frv_class_likely_spilled_p
+
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD frv_secondary_reload
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P frv_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED frv_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE frv_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE frv_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT frv_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE frv_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE frv_libcall_value
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#define FRV_SYMBOL_REF_TLS_P(RTX) \
+  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0)
+
+
+/* Any function call that satisfies the machine-independent
+   requirements is eligible on FR-V.  */
+
+static bool
+frv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+			     tree exp ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Return true if SYMBOL is a small data symbol and relocation RELOC
+   can be used to access it directly in a load or store.  */
+
+static FRV_INLINE bool
+frv_small_data_reloc_p (rtx symbol, int reloc)
+{
+  return (GET_CODE (symbol) == SYMBOL_REF
+	  && SYMBOL_REF_SMALL_P (symbol)
+	  && (!TARGET_FDPIC || flag_pic == 1)
+	  && (reloc == R_FRV_GOTOFF12 || reloc == R_FRV_GPREL12));
+}
+
+/* Return true if X is a valid relocation unspec.  If it is, fill in UNSPEC
+   appropriately.  */
+
+bool
+frv_const_unspec_p (rtx x, struct frv_unspec *unspec)
+{
+  if (GET_CODE (x) == CONST)
+    {
+      unspec->offset = 0;
+      x = XEXP (x, 0);
+      if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  unspec->offset += INTVAL (XEXP (x, 1));
+	  x = XEXP (x, 0);
+	}
+      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOT)
+	{
+	  unspec->symbol = XVECEXP (x, 0, 0);
+	  unspec->reloc = INTVAL (XVECEXP (x, 0, 1));
+
+	  if (unspec->offset == 0)
+	    return true;
+
+	  if (frv_small_data_reloc_p (unspec->symbol, unspec->reloc)
+	      && unspec->offset > 0
+	      && unspec->offset < g_switch_value)
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* Decide whether we can force certain constants to memory.  If we
+   decide we can't, the caller should be able to cope with it in
+   another way.
+
+   We never allow constants to be forced into memory for TARGET_FDPIC.
+   This is necessary for several reasons:
+
+   1. Since LEGITIMATE_CONSTANT_P rejects constant pool addresses, the
+      target-independent code will try to force them into the constant
+      pool, thus leading to infinite recursion.
+
+   2. We can never introduce new constant pool references during reload.
+      Any such reference would require use of the pseudo FDPIC register.
+
+   3. We can't represent a constant added to a function pointer (which is
+      not the same as a pointer to a function+constant).
+
+   4. In many cases, it's more efficient to calculate the constant in-line.  */
+
+static bool
+frv_cannot_force_const_mem (rtx x ATTRIBUTE_UNUSED)
+{
+  return TARGET_FDPIC;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+frv_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mcpu_:
+      if (strcmp (arg, "simple") == 0)
+	frv_cpu_type = FRV_CPU_SIMPLE;
+      else if (strcmp (arg, "tomcat") == 0)
+	frv_cpu_type = FRV_CPU_TOMCAT;
+      else if (strcmp (arg, "fr550") == 0)
+	frv_cpu_type = FRV_CPU_FR550;
+      else if (strcmp (arg, "fr500") == 0)
+	frv_cpu_type = FRV_CPU_FR500;
+      else if (strcmp (arg, "fr450") == 0)
+	frv_cpu_type = FRV_CPU_FR450;
+      else if (strcmp (arg, "fr405") == 0)
+	frv_cpu_type = FRV_CPU_FR405;
+      else if (strcmp (arg, "fr400") == 0)
+	frv_cpu_type = FRV_CPU_FR400;
+      else if (strcmp (arg, "fr300") == 0)
+	frv_cpu_type = FRV_CPU_FR300;
+      else if (strcmp (arg, "frv") == 0)
+	frv_cpu_type = FRV_CPU_GENERIC;
+      else
+	return false;
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+static int
+frv_default_flags_for_cpu (void)
+{
+  switch (frv_cpu_type)
+    {
+    case FRV_CPU_GENERIC:
+      return MASK_DEFAULT_FRV;
+
+    case FRV_CPU_FR550:
+      return MASK_DEFAULT_FR550;
+
+    case FRV_CPU_FR500:
+    case FRV_CPU_TOMCAT:
+      return MASK_DEFAULT_FR500;
+
+    case FRV_CPU_FR450:
+      return MASK_DEFAULT_FR450;
+
+    case FRV_CPU_FR405:
+    case FRV_CPU_FR400:
+      return MASK_DEFAULT_FR400;
+
+    case FRV_CPU_FR300:
+    case FRV_CPU_SIMPLE:
+      return MASK_DEFAULT_SIMPLE;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+frv_option_override (void)
+{
+  int regno;
+  unsigned int i;
+
+  target_flags |= (frv_default_flags_for_cpu () & ~target_flags_explicit);
+
+  /* -mlibrary-pic sets -fPIC and -G0 and also suppresses warnings from the
+     linker about linking pic and non-pic code.  */
+  if (TARGET_LIBPIC)
+    {
+      if (!flag_pic)		/* -fPIC */
+	flag_pic = 2;
+
+      if (!global_options_set.x_g_switch_value)	/* -G0 */
+	{
+	  g_switch_value = 0;
+	}
+    }
+
+  /* A C expression whose value is a register class containing hard
+     register REGNO.  In general there is more than one such class;
+     choose a class which is "minimal", meaning that no smaller class
+     also contains the register.  */
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      enum reg_class rclass;
+
+      if (GPR_P (regno))
+	{
+	  int gpr_reg = regno - GPR_FIRST;
+
+	  if (gpr_reg == GR8_REG)
+	    rclass = GR8_REGS;
+
+	  else if (gpr_reg == GR9_REG)
+	    rclass = GR9_REGS;
+
+	  else if (gpr_reg == GR14_REG)
+	    rclass = FDPIC_FPTR_REGS;
+
+	  else if (gpr_reg == FDPIC_REGNO)
+	    rclass = FDPIC_REGS;
+
+	  else if ((gpr_reg & 3) == 0)
+	    rclass = QUAD_REGS;
+
+	  else if ((gpr_reg & 1) == 0)
+	    rclass = EVEN_REGS;
+
+	  else
+	    rclass = GPR_REGS;
+	}
+
+      else if (FPR_P (regno))
+	{
+	  int fpr_reg = regno - GPR_FIRST;
+	  if ((fpr_reg & 3) == 0)
+	    rclass = QUAD_FPR_REGS;
+
+	  else if ((fpr_reg & 1) == 0)
+	    rclass = FEVEN_REGS;
+
+	  else
+	    rclass = FPR_REGS;
+	}
+
+      else if (regno == LR_REGNO)
+	rclass = LR_REG;
+
+      else if (regno == LCR_REGNO)
+	rclass = LCR_REG;
+
+      else if (ICC_P (regno))
+	rclass = ICC_REGS;
+
+      else if (FCC_P (regno))
+	rclass = FCC_REGS;
+
+      else if (ICR_P (regno))
+	rclass = ICR_REGS;
+
+      else if (FCR_P (regno))
+	rclass = FCR_REGS;
+
+      else if (ACC_P (regno))
+	{
+	  int r = regno - ACC_FIRST;
+	  if ((r & 3) == 0)
+	    rclass = QUAD_ACC_REGS;
+	  else if ((r & 1) == 0)
+	    rclass = EVEN_ACC_REGS;
+	  else
+	    rclass = ACC_REGS;
+	}
+
+      else if (ACCG_P (regno))
+	rclass = ACCG_REGS;
+
+      else
+	rclass = NO_REGS;
+
+      regno_reg_class[regno] = rclass;
+    }
+
+  /* Check for small data option */
+  if (!global_options_set.x_g_switch_value && !TARGET_LIBPIC)
+    g_switch_value = SDATA_DEFAULT_SIZE;
+
+  /* There is no single unaligned SI op for PIC code.  Sometimes we
+     need to use ".4byte" and sometimes we need to use ".picptr".
+     See frv_assemble_integer for details.  */
+  if (flag_pic || TARGET_FDPIC)
+    targetm.asm_out.unaligned_op.si = 0;
+
+  if ((target_flags_explicit & MASK_LINKED_FP) == 0)
+    target_flags |= MASK_LINKED_FP;
+
+  if ((target_flags_explicit & MASK_OPTIMIZE_MEMBAR) == 0)
+    target_flags |= MASK_OPTIMIZE_MEMBAR;
+
+  for (i = 0; i < ARRAY_SIZE (frv_unit_names); i++)
+    frv_unit_codes[i] = get_cpu_unit_code (frv_unit_names[i]);
+
+  for (i = 0; i < ARRAY_SIZE (frv_type_to_unit); i++)
+    frv_type_to_unit[i] = ARRAY_SIZE (frv_unit_codes);
+
+  init_machine_status = frv_init_machine_status;
+}
+
+
+/* Return true if NAME (a STRING_CST node) begins with PREFIX.  */
+
+static int
+frv_string_begins_with (const_tree name, const char *prefix)
+{
+  const int prefix_len = strlen (prefix);
+
+  /* Remember: NAME's length includes the null terminator.  */
+  return (TREE_STRING_LENGTH (name) > prefix_len
+	  && strncmp (TREE_STRING_POINTER (name), prefix, prefix_len) == 0);
+}
+
+/* Zero or more C statements that may conditionally modify two variables
+   `fixed_regs' and `call_used_regs' (both of type `char []') after they have
+   been initialized from the two preceding macros.
+
+   This is necessary in case the fixed or call-clobbered registers depend on
+   target flags.
+
+   You need not define this macro if it has no work to do.
+
+   If the usage of an entire class of registers depends on the target flags,
+   you may indicate this to GCC by using this macro to modify `fixed_regs' and
+   `call_used_regs' to 1 for each of the registers in the classes which should
+   not be used by GCC.  Also define the macro `REG_CLASS_FROM_LETTER' to return
+   `NO_REGS' if it is called with a letter for a class that shouldn't be used.
+
+   (However, if this class is not included in `GENERAL_REGS' and all of the
+   insn patterns whose constraints permit this class are controlled by target
+   switches, then GCC will automatically avoid using these registers when the
+   target switches are opposed to them.)  */
+
+static void
+frv_conditional_register_usage (void)
+{
+  int i;
+
+  for (i = GPR_FIRST + NUM_GPRS; i <= GPR_LAST; i++)
+    fixed_regs[i] = call_used_regs[i] = 1;
+
+  for (i = FPR_FIRST + NUM_FPRS; i <= FPR_LAST; i++)
+    fixed_regs[i] = call_used_regs[i] = 1;
+
+  /* Reserve the registers used for conditional execution.  At present, we need
+     1 ICC and 1 ICR register.  */
+  fixed_regs[ICC_TEMP] = call_used_regs[ICC_TEMP] = 1;
+  fixed_regs[ICR_TEMP] = call_used_regs[ICR_TEMP] = 1;
+
+  if (TARGET_FIXED_CC)
+    {
+      fixed_regs[ICC_FIRST] = call_used_regs[ICC_FIRST] = 1;
+      fixed_regs[FCC_FIRST] = call_used_regs[FCC_FIRST] = 1;
+      fixed_regs[ICR_FIRST] = call_used_regs[ICR_FIRST] = 1;
+      fixed_regs[FCR_FIRST] = call_used_regs[FCR_FIRST] = 1;
+    }
+
+  if (TARGET_FDPIC)
+    fixed_regs[GPR_FIRST + 16] = fixed_regs[GPR_FIRST + 17] =
+      call_used_regs[GPR_FIRST + 16] = call_used_regs[GPR_FIRST + 17] = 0;
+
+#if 0
+  /* If -fpic, SDA_BASE_REG is the PIC register.  */
+  if (g_switch_value == 0 && !flag_pic)
+    fixed_regs[SDA_BASE_REG] = call_used_regs[SDA_BASE_REG] = 0;
+
+  if (!flag_pic)
+    fixed_regs[PIC_REGNO] = call_used_regs[PIC_REGNO] = 0;
+#endif
+}
+
+
+/*
+ * Compute the stack frame layout
+ *
+ * Register setup:
+ * +---------------+-----------------------+-----------------------+
+ * |Register       |type                   |caller-save/callee-save|
+ * +---------------+-----------------------+-----------------------+
+ * |GR0            |Zero register          |        -              |
+ * |GR1            |Stack pointer(SP)      |        -              |
+ * |GR2            |Frame pointer(FP)      |        -              |
+ * |GR3            |Hidden parameter       |        caller save    |
+ * |GR4-GR7        |        -              |        caller save    |
+ * |GR8-GR13       |Argument register      |        caller save    |
+ * |GR14-GR15      |        -              |        caller save    |
+ * |GR16-GR31      |        -              |        callee save    |
+ * |GR32-GR47      |        -              |        caller save    |
+ * |GR48-GR63      |        -              |        callee save    |
+ * |FR0-FR15       |        -              |        caller save    |
+ * |FR16-FR31      |        -              |        callee save    |
+ * |FR32-FR47      |        -              |        caller save    |
+ * |FR48-FR63      |        -              |        callee save    |
+ * +---------------+-----------------------+-----------------------+
+ *
+ * Stack frame setup:
+ * Low
+ *     SP-> |-----------------------------------|
+ *	    |         Argument area		|
+ *	    |-----------------------------------|
+ *	    |	 Register save area		|
+ *	    |-----------------------------------|
+ *	    |	Local variable save area	|
+ *     FP-> |-----------------------------------|
+ *	    |	    Old FP			|
+ *	    |-----------------------------------|
+ *	    |    Hidden parameter save area     |
+ *	    |-----------------------------------|
+ *	    | Return address(LR) storage area   |
+ *	    |-----------------------------------|
+ *	    |     Padding for alignment         |
+ *	    |-----------------------------------|
+ *	    |     Register argument area	|
+ * OLD SP-> |-----------------------------------|
+ *          |       Parameter area		|
+ *          |-----------------------------------|
+ * High
+ *
+ * Argument area/Parameter area:
+ *
+ * When a function is called, this area is used for argument transfer.  When
+ * the argument is set up by the caller function, this area is referred to as
+ * the argument area.  When the argument is referenced by the callee function,
+ * this area is referred to as the parameter area.  The area is allocated when
+ * all arguments cannot be placed on the argument register at the time of
+ * argument transfer.
+ *
+ * Register save area:
+ *
+ * This is a register save area that must be guaranteed for the caller
+ * function.  This area is not secured when the register save operation is not
+ * needed.
+ *
+ * Local variable save area:
+ *
+ * This is the area for local variables and temporary variables.
+ *
+ * Old FP:
+ *
+ * This area stores the FP value of the caller function.
+ *
+ * Hidden parameter save area:
+ *
+ * This area stores the start address of the return value storage
+ * area for a struct/union return function.
+ * When a struct/union is used as the return value, the caller
+ * function stores the return value storage area start address in
+ * register GR3 and passes it to the caller function.
+ * The callee function interprets the address stored in the GR3
+ * as the return value storage area start address.
+ * When register GR3 needs to be saved into memory, the callee
+ * function saves it in the hidden parameter save area.  This
+ * area is not secured when the save operation is not needed.
+ *
+ * Return address(LR) storage area:
+ *
+ * This area saves the LR.  The LR stores the address of a return to the caller
+ * function for the purpose of function calling.
+ *
+ * Argument register area:
+ *
+ * This area saves the argument register.  This area is not secured when the
+ * save operation is not needed.
+ *
+ * Argument:
+ *
+ * Arguments, the count of which equals the count of argument registers (6
+ * words), are positioned in registers GR8 to GR13 and delivered to the callee
+ * function.  When a struct/union return function is called, the return value
+ * area address is stored in register GR3.  Arguments not placed in the
+ * argument registers will be stored in the stack argument area for transfer
+ * purposes.  When an 8-byte type argument is to be delivered using registers,
+ * it is divided into two and placed in two registers for transfer.  When
+ * argument registers must be saved to memory, the callee function secures an
+ * argument register save area in the stack.  In this case, a continuous
+ * argument register save area must be established in the parameter area.  The
+ * argument register save area must be allocated as needed to cover the size of
+ * the argument register to be saved.  If the function has a variable count of
+ * arguments, it saves all argument registers in the argument register save
+ * area.
+ *
+ * Argument Extension Format:
+ *
+ * When an argument is to be stored in the stack, its type is converted to an
+ * extended type in accordance with the individual argument type.  The argument
+ * is freed by the caller function after the return from the callee function is
+ * made.
+ *
+ * +-----------------------+---------------+------------------------+
+ * |    Argument Type      |Extended Type  |Stack Storage Size(byte)|
+ * +-----------------------+---------------+------------------------+
+ * |char                   |int            |        4		    |
+ * |signed char            |int            |        4		    |
+ * |unsigned char          |int            |        4		    |
+ * |[signed] short int     |int            |        4		    |
+ * |unsigned short int     |int            |        4		    |
+ * |[signed] int           |No extension   |        4		    |
+ * |unsigned int           |No extension   |        4		    |
+ * |[signed] long int      |No extension   |        4		    |
+ * |unsigned long int      |No extension   |        4		    |
+ * |[signed] long long int |No extension   |        8		    |
+ * |unsigned long long int |No extension   |        8		    |
+ * |float                  |double         |        8		    |
+ * |double                 |No extension   |        8		    |
+ * |long double            |No extension   |        8		    |
+ * |pointer                |No extension   |        4		    |
+ * |struct/union           |-              |        4 (*1)	    |
+ * +-----------------------+---------------+------------------------+
+ *
+ * When a struct/union is to be delivered as an argument, the caller copies it
+ * to the local variable area and delivers the address of that area.
+ *
+ * Return Value:
+ *
+ * +-------------------------------+----------------------+
+ * |Return Value Type              |Return Value Interface|
+ * +-------------------------------+----------------------+
+ * |void                           |None                  |
+ * |[signed|unsigned] char         |GR8                   |
+ * |[signed|unsigned] short int    |GR8                   |
+ * |[signed|unsigned] int          |GR8                   |
+ * |[signed|unsigned] long int     |GR8                   |
+ * |pointer                        |GR8                   |
+ * |[signed|unsigned] long long int|GR8 & GR9             |
+ * |float                          |GR8                   |
+ * |double                         |GR8 & GR9             |
+ * |long double                    |GR8 & GR9             |
+ * |struct/union                   |(*1)                  |
+ * +-------------------------------+----------------------+
+ *
+ * When a struct/union is used as the return value, the caller function stores
+ * the start address of the return value storage area into GR3 and then passes
+ * it to the callee function.  The callee function interprets GR3 as the start
+ * address of the return value storage area.  When this address needs to be
+ * saved in memory, the callee function secures the hidden parameter save area
+ * and saves the address in that area.
+ */
+
+frv_stack_t *
+frv_stack_info (void)
+{
+  static frv_stack_t info, zero_info;
+  frv_stack_t *info_ptr	= &info;
+  tree fndecl		= current_function_decl;
+  int varargs_p		= 0;
+  tree cur_arg;
+  tree next_arg;
+  int range;
+  int alignment;
+  int offset;
+
+  /* If we've already calculated the values and reload is complete,
+     just return now.  */
+  if (frv_stack_cache)
+    return frv_stack_cache;
+
+  /* Zero all fields.  */
+  info = zero_info;
+
+  /* Set up the register range information.  */
+  info_ptr->regs[STACK_REGS_GPR].name         = "gpr";
+  info_ptr->regs[STACK_REGS_GPR].first        = LAST_ARG_REGNUM + 1;
+  info_ptr->regs[STACK_REGS_GPR].last         = GPR_LAST;
+  info_ptr->regs[STACK_REGS_GPR].dword_p      = TRUE;
+
+  info_ptr->regs[STACK_REGS_FPR].name         = "fpr";
+  info_ptr->regs[STACK_REGS_FPR].first        = FPR_FIRST;
+  info_ptr->regs[STACK_REGS_FPR].last         = FPR_LAST;
+  info_ptr->regs[STACK_REGS_FPR].dword_p      = TRUE;
+
+  info_ptr->regs[STACK_REGS_LR].name          = "lr";
+  info_ptr->regs[STACK_REGS_LR].first         = LR_REGNO;
+  info_ptr->regs[STACK_REGS_LR].last          = LR_REGNO;
+  info_ptr->regs[STACK_REGS_LR].special_p     = 1;
+
+  info_ptr->regs[STACK_REGS_CC].name          = "cc";
+  info_ptr->regs[STACK_REGS_CC].first         = CC_FIRST;
+  info_ptr->regs[STACK_REGS_CC].last          = CC_LAST;
+  info_ptr->regs[STACK_REGS_CC].field_p       = TRUE;
+
+  info_ptr->regs[STACK_REGS_LCR].name         = "lcr";
+  info_ptr->regs[STACK_REGS_LCR].first        = LCR_REGNO;
+  info_ptr->regs[STACK_REGS_LCR].last         = LCR_REGNO;
+
+  info_ptr->regs[STACK_REGS_STDARG].name      = "stdarg";
+  info_ptr->regs[STACK_REGS_STDARG].first     = FIRST_ARG_REGNUM;
+  info_ptr->regs[STACK_REGS_STDARG].last      = LAST_ARG_REGNUM;
+  info_ptr->regs[STACK_REGS_STDARG].dword_p   = 1;
+  info_ptr->regs[STACK_REGS_STDARG].special_p = 1;
+
+  info_ptr->regs[STACK_REGS_STRUCT].name      = "struct";
+  info_ptr->regs[STACK_REGS_STRUCT].first     = FRV_STRUCT_VALUE_REGNUM;
+  info_ptr->regs[STACK_REGS_STRUCT].last      = FRV_STRUCT_VALUE_REGNUM;
+  info_ptr->regs[STACK_REGS_STRUCT].special_p = 1;
+
+  info_ptr->regs[STACK_REGS_FP].name          = "fp";
+  info_ptr->regs[STACK_REGS_FP].first         = FRAME_POINTER_REGNUM;
+  info_ptr->regs[STACK_REGS_FP].last          = FRAME_POINTER_REGNUM;
+  info_ptr->regs[STACK_REGS_FP].special_p     = 1;
+
+  /* Determine if this is a stdarg function.  If so, allocate space to store
+     the 6 arguments.  */
+  if (cfun->stdarg)
+    varargs_p = 1;
+
+  else
+    {
+      /* Find the last argument, and see if it is __builtin_va_alist.  */
+      for (cur_arg = DECL_ARGUMENTS (fndecl); cur_arg != (tree)0; cur_arg = next_arg)
+	{
+	  next_arg = DECL_CHAIN (cur_arg);
+	  if (next_arg == (tree)0)
+	    {
+	      if (DECL_NAME (cur_arg)
+		  && !strcmp (IDENTIFIER_POINTER (DECL_NAME (cur_arg)), "__builtin_va_alist"))
+		varargs_p = 1;
+
+	      break;
+	    }
+	}
+    }
+
+  /* Iterate over all of the register ranges.  */
+  for (range = 0; range < STACK_REGS_MAX; range++)
+    {
+      frv_stack_regs_t *reg_ptr = &(info_ptr->regs[range]);
+      int first = reg_ptr->first;
+      int last = reg_ptr->last;
+      int size_1word = 0;
+      int size_2words = 0;
+      int regno;
+
+      /* Calculate which registers need to be saved & save area size.  */
+      switch (range)
+	{
+	default:
+	  for (regno = first; regno <= last; regno++)
+	    {
+	      if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
+		  || (crtl->calls_eh_return
+		      && (regno >= FIRST_EH_REGNUM && regno <= LAST_EH_REGNUM))
+		  || (!TARGET_FDPIC && flag_pic
+		      && crtl->uses_pic_offset_table && regno == PIC_REGNO))
+		{
+		  info_ptr->save_p[regno] = REG_SAVE_1WORD;
+		  size_1word += UNITS_PER_WORD;
+		}
+	    }
+	  break;
+
+	  /* Calculate whether we need to create a frame after everything else
+             has been processed.  */
+	case STACK_REGS_FP:
+	  break;
+
+	case STACK_REGS_LR:
+	  if (df_regs_ever_live_p (LR_REGNO)
+              || profile_flag
+	      /* This is set for __builtin_return_address, etc.  */
+	      || cfun->machine->frame_needed
+              || (TARGET_LINKED_FP && frame_pointer_needed)
+              || (!TARGET_FDPIC && flag_pic
+		  && crtl->uses_pic_offset_table))
+	    {
+	      info_ptr->save_p[LR_REGNO] = REG_SAVE_1WORD;
+	      size_1word += UNITS_PER_WORD;
+	    }
+	  break;
+
+	case STACK_REGS_STDARG:
+	  if (varargs_p)
+	    {
+	      /* If this is a stdarg function with a non varardic
+		 argument split between registers and the stack,
+		 adjust the saved registers downward.  */
+	      last -= (ADDR_ALIGN (crtl->args.pretend_args_size, UNITS_PER_WORD)
+		       / UNITS_PER_WORD);
+
+	      for (regno = first; regno <= last; regno++)
+		{
+		  info_ptr->save_p[regno] = REG_SAVE_1WORD;
+		  size_1word += UNITS_PER_WORD;
+		}
+
+	      info_ptr->stdarg_size = size_1word;
+	    }
+	  break;
+
+	case STACK_REGS_STRUCT:
+	  if (cfun->returns_struct)
+	    {
+	      info_ptr->save_p[FRV_STRUCT_VALUE_REGNUM] = REG_SAVE_1WORD;
+	      size_1word += UNITS_PER_WORD;
+	    }
+	  break;
+	}
+
+
+      if (size_1word)
+	{
+	  /* If this is a field, it only takes one word.  */
+	  if (reg_ptr->field_p)
+	    size_1word = UNITS_PER_WORD;
+
+	  /* Determine which register pairs can be saved together.  */
+	  else if (reg_ptr->dword_p && TARGET_DWORD)
+	    {
+	      for (regno = first; regno < last; regno += 2)
+		{
+		  if (info_ptr->save_p[regno] && info_ptr->save_p[regno+1])
+		    {
+		      size_2words += 2 * UNITS_PER_WORD;
+		      size_1word -= 2 * UNITS_PER_WORD;
+		      info_ptr->save_p[regno] = REG_SAVE_2WORDS;
+		      info_ptr->save_p[regno+1] = REG_SAVE_NO_SAVE;
+		    }
+		}
+	    }
+
+	  reg_ptr->size_1word = size_1word;
+	  reg_ptr->size_2words = size_2words;
+
+	  if (! reg_ptr->special_p)
+	    {
+	      info_ptr->regs_size_1word += size_1word;
+	      info_ptr->regs_size_2words += size_2words;
+	    }
+	}
+    }
+
+  /* Set up the sizes of each each field in the frame body, making the sizes
+     of each be divisible by the size of a dword if dword operations might
+     be used, or the size of a word otherwise.  */
+  alignment = (TARGET_DWORD? 2 * UNITS_PER_WORD : UNITS_PER_WORD);
+
+  info_ptr->parameter_size = ADDR_ALIGN (crtl->outgoing_args_size, alignment);
+  info_ptr->regs_size = ADDR_ALIGN (info_ptr->regs_size_2words
+				    + info_ptr->regs_size_1word,
+				    alignment);
+  info_ptr->vars_size = ADDR_ALIGN (get_frame_size (), alignment);
+
+  info_ptr->pretend_size = crtl->args.pretend_args_size;
+
+  /* Work out the size of the frame, excluding the header.  Both the frame
+     body and register parameter area will be dword-aligned.  */
+  info_ptr->total_size
+    = (ADDR_ALIGN (info_ptr->parameter_size
+		   + info_ptr->regs_size
+		   + info_ptr->vars_size,
+		   2 * UNITS_PER_WORD)
+       + ADDR_ALIGN (info_ptr->pretend_size
+		     + info_ptr->stdarg_size,
+		     2 * UNITS_PER_WORD));
+
+  /* See if we need to create a frame at all, if so add header area.  */
+  if (info_ptr->total_size  > 0
+      || frame_pointer_needed
+      || info_ptr->regs[STACK_REGS_LR].size_1word > 0
+      || info_ptr->regs[STACK_REGS_STRUCT].size_1word > 0)
+    {
+      offset = info_ptr->parameter_size;
+      info_ptr->header_size = 4 * UNITS_PER_WORD;
+      info_ptr->total_size += 4 * UNITS_PER_WORD;
+
+      /* Calculate the offsets to save normal register pairs.  */
+      for (range = 0; range < STACK_REGS_MAX; range++)
+	{
+	  frv_stack_regs_t *reg_ptr = &(info_ptr->regs[range]);
+	  if (! reg_ptr->special_p)
+	    {
+	      int first = reg_ptr->first;
+	      int last = reg_ptr->last;
+	      int regno;
+
+	      for (regno = first; regno <= last; regno++)
+		if (info_ptr->save_p[regno] == REG_SAVE_2WORDS
+		    && regno != FRAME_POINTER_REGNUM
+		    && (regno < FIRST_ARG_REGNUM
+			|| regno > LAST_ARG_REGNUM))
+		  {
+		    info_ptr->reg_offset[regno] = offset;
+		    offset += 2 * UNITS_PER_WORD;
+		  }
+	    }
+	}
+
+      /* Calculate the offsets to save normal single registers.  */
+      for (range = 0; range < STACK_REGS_MAX; range++)
+	{
+	  frv_stack_regs_t *reg_ptr = &(info_ptr->regs[range]);
+	  if (! reg_ptr->special_p)
+	    {
+	      int first = reg_ptr->first;
+	      int last = reg_ptr->last;
+	      int regno;
+
+	      for (regno = first; regno <= last; regno++)
+		if (info_ptr->save_p[regno] == REG_SAVE_1WORD
+		    && regno != FRAME_POINTER_REGNUM
+		    && (regno < FIRST_ARG_REGNUM
+			|| regno > LAST_ARG_REGNUM))
+		  {
+		    info_ptr->reg_offset[regno] = offset;
+		    offset += UNITS_PER_WORD;
+		  }
+	    }
+	}
+
+      /* Calculate the offset to save the local variables at.  */
+      offset = ADDR_ALIGN (offset, alignment);
+      if (info_ptr->vars_size)
+	{
+	  info_ptr->vars_offset = offset;
+	  offset += info_ptr->vars_size;
+	}
+
+      /* Align header to a dword-boundary.  */
+      offset = ADDR_ALIGN (offset, 2 * UNITS_PER_WORD);
+
+      /* Calculate the offsets in the fixed frame.  */
+      info_ptr->save_p[FRAME_POINTER_REGNUM] = REG_SAVE_1WORD;
+      info_ptr->reg_offset[FRAME_POINTER_REGNUM] = offset;
+      info_ptr->regs[STACK_REGS_FP].size_1word = UNITS_PER_WORD;
+
+      info_ptr->save_p[LR_REGNO] = REG_SAVE_1WORD;
+      info_ptr->reg_offset[LR_REGNO] = offset + 2*UNITS_PER_WORD;
+      info_ptr->regs[STACK_REGS_LR].size_1word = UNITS_PER_WORD;
+
+      if (cfun->returns_struct)
+	{
+	  info_ptr->save_p[FRV_STRUCT_VALUE_REGNUM] = REG_SAVE_1WORD;
+	  info_ptr->reg_offset[FRV_STRUCT_VALUE_REGNUM] = offset + UNITS_PER_WORD;
+	  info_ptr->regs[STACK_REGS_STRUCT].size_1word = UNITS_PER_WORD;
+	}
+
+      /* Calculate the offsets to store the arguments passed in registers
+         for stdarg functions.  The register pairs are first and the single
+         register if any is last.  The register save area starts on a
+         dword-boundary.  */
+      if (info_ptr->stdarg_size)
+	{
+	  int first = info_ptr->regs[STACK_REGS_STDARG].first;
+	  int last  = info_ptr->regs[STACK_REGS_STDARG].last;
+	  int regno;
+
+	  /* Skip the header.  */
+	  offset += 4 * UNITS_PER_WORD;
+	  for (regno = first; regno <= last; regno++)
+	    {
+	      if (info_ptr->save_p[regno] == REG_SAVE_2WORDS)
+		{
+		  info_ptr->reg_offset[regno] = offset;
+		  offset += 2 * UNITS_PER_WORD;
+		}
+	      else if (info_ptr->save_p[regno] == REG_SAVE_1WORD)
+		{
+		  info_ptr->reg_offset[regno] = offset;
+		  offset += UNITS_PER_WORD;
+		}
+	    }
+	}
+    }
+
+  if (reload_completed)
+    frv_stack_cache = info_ptr;
+
+  return info_ptr;
+}
+
+
+/* Print the information about the frv stack offsets, etc. when debugging.  */
+
+void
+frv_debug_stack (frv_stack_t *info)
+{
+  int range;
+
+  if (!info)
+    info = frv_stack_info ();
+
+  fprintf (stderr, "\nStack information for function %s:\n",
+	   ((current_function_decl && DECL_NAME (current_function_decl))
+	    ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
+	    : "<unknown>"));
+
+  fprintf (stderr, "\ttotal_size\t= %6d\n", info->total_size);
+  fprintf (stderr, "\tvars_size\t= %6d\n", info->vars_size);
+  fprintf (stderr, "\tparam_size\t= %6d\n", info->parameter_size);
+  fprintf (stderr, "\tregs_size\t= %6d, 1w = %3d, 2w = %3d\n",
+	   info->regs_size, info->regs_size_1word, info->regs_size_2words);
+
+  fprintf (stderr, "\theader_size\t= %6d\n", info->header_size);
+  fprintf (stderr, "\tpretend_size\t= %6d\n", info->pretend_size);
+  fprintf (stderr, "\tvars_offset\t= %6d\n", info->vars_offset);
+  fprintf (stderr, "\tregs_offset\t= %6d\n", info->regs_offset);
+
+  for (range = 0; range < STACK_REGS_MAX; range++)
+    {
+      frv_stack_regs_t *regs = &(info->regs[range]);
+      if ((regs->size_1word + regs->size_2words) > 0)
+	{
+	  int first = regs->first;
+	  int last  = regs->last;
+	  int regno;
+
+	  fprintf (stderr, "\t%s\tsize\t= %6d, 1w = %3d, 2w = %3d, save =",
+		   regs->name, regs->size_1word + regs->size_2words,
+		   regs->size_1word, regs->size_2words);
+
+	  for (regno = first; regno <= last; regno++)
+	    {
+	      if (info->save_p[regno] == REG_SAVE_1WORD)
+		fprintf (stderr, " %s (%d)", reg_names[regno],
+			 info->reg_offset[regno]);
+
+	      else if (info->save_p[regno] == REG_SAVE_2WORDS)
+		fprintf (stderr, " %s-%s (%d)", reg_names[regno],
+			 reg_names[regno+1], info->reg_offset[regno]);
+	    }
+
+	  fputc ('\n', stderr);
+	}
+    }
+
+  fflush (stderr);
+}
+
+
+
+
+/* Used during final to control the packing of insns.  The value is
+   1 if the current instruction should be packed with the next one,
+   0 if it shouldn't or -1 if packing is disabled altogether.  */
+
+static int frv_insn_packing_flag;
+
+/* True if the current function contains a far jump.  */
+
+static int
+frv_function_contains_far_jump (void)
+{
+  rtx insn = get_insns ();
+  while (insn != NULL
+	 && !(GET_CODE (insn) == JUMP_INSN
+	      /* Ignore tablejump patterns.  */
+	      && GET_CODE (PATTERN (insn)) != ADDR_VEC
+	      && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+	      && get_attr_far_jump (insn) == FAR_JUMP_YES))
+    insn = NEXT_INSN (insn);
+  return (insn != NULL);
+}
+
+/* For the FRV, this function makes sure that a function with far jumps
+   will return correctly.  It also does the VLIW packing.  */
+
+static void
+frv_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* If no frame was created, check whether the function uses a call
+     instruction to implement a far jump.  If so, save the link in gr3 and
+     replace all returns to LR with returns to GR3.  GR3 is used because it
+     is call-clobbered, because is not available to the register allocator,
+     and because all functions that take a hidden argument pointer will have
+     a stack frame.  */
+  if (frv_stack_info ()->total_size == 0 && frv_function_contains_far_jump ())
+    {
+      rtx insn;
+
+      /* Just to check that the above comment is true.  */
+      gcc_assert (!df_regs_ever_live_p (GPR_FIRST + 3));
+
+      /* Generate the instruction that saves the link register.  */
+      fprintf (file, "\tmovsg lr,gr3\n");
+
+      /* Replace the LR with GR3 in *return_internal patterns.  The insn
+	 will now return using jmpl @(gr3,0) rather than bralr.  We cannot
+	 simply emit a different assembly directive because bralr and jmpl
+	 execute in different units.  */
+      for (insn = get_insns(); insn != NULL; insn = NEXT_INSN (insn))
+	if (GET_CODE (insn) == JUMP_INSN)
+	  {
+	    rtx pattern = PATTERN (insn);
+	    if (GET_CODE (pattern) == PARALLEL
+		&& XVECLEN (pattern, 0) >= 2
+		&& GET_CODE (XVECEXP (pattern, 0, 0)) == RETURN
+		&& GET_CODE (XVECEXP (pattern, 0, 1)) == USE)
+	      {
+		rtx address = XEXP (XVECEXP (pattern, 0, 1), 0);
+		if (GET_CODE (address) == REG && REGNO (address) == LR_REGNO)
+		  SET_REGNO (address, GPR_FIRST + 3);
+	      }
+	  }
+    }
+
+  frv_pack_insns ();
+
+  /* Allow the garbage collector to free the nops created by frv_reorg.  */
+  memset (frv_nops, 0, sizeof (frv_nops));
+}
+
+
+/* Return the next available temporary register in a given class.  */
+
+static rtx
+frv_alloc_temp_reg (
+     frv_tmp_reg_t *info,	/* which registers are available */
+     enum reg_class rclass,	/* register class desired */
+     enum machine_mode mode,	/* mode to allocate register with */
+     int mark_as_used,		/* register not available after allocation */
+     int no_abort)		/* return NULL instead of aborting */
+{
+  int regno = info->next_reg[ (int)rclass ];
+  int orig_regno = regno;
+  HARD_REG_SET *reg_in_class = &reg_class_contents[ (int)rclass ];
+  int i, nr;
+
+  for (;;)
+    {
+      if (TEST_HARD_REG_BIT (*reg_in_class, regno)
+	  && TEST_HARD_REG_BIT (info->regs, regno))
+	  break;
+
+      if (++regno >= FIRST_PSEUDO_REGISTER)
+	regno = 0;
+      if (regno == orig_regno)
+	{
+	  gcc_assert (no_abort);
+	  return NULL_RTX;
+	}
+    }
+
+  nr = HARD_REGNO_NREGS (regno, mode);
+  info->next_reg[ (int)rclass ] = regno + nr;
+
+  if (mark_as_used)
+    for (i = 0; i < nr; i++)
+      CLEAR_HARD_REG_BIT (info->regs, regno+i);
+
+  return gen_rtx_REG (mode, regno);
+}
+
+
+/* Return an rtx with the value OFFSET, which will either be a register or a
+   signed 12-bit integer.  It can be used as the second operand in an "add"
+   instruction, or as the index in a load or store.
+
+   The function returns a constant rtx if OFFSET is small enough, otherwise
+   it loads the constant into register OFFSET_REGNO and returns that.  */
+static rtx
+frv_frame_offset_rtx (int offset)
+{
+  rtx offset_rtx = GEN_INT (offset);
+  if (IN_RANGE (offset, -2048, 2047))
+    return offset_rtx;
+  else
+    {
+      rtx reg_rtx = gen_rtx_REG (SImode, OFFSET_REGNO);
+      if (IN_RANGE (offset, -32768, 32767))
+	emit_insn (gen_movsi (reg_rtx, offset_rtx));
+      else
+	{
+	  emit_insn (gen_movsi_high (reg_rtx, offset_rtx));
+	  emit_insn (gen_movsi_lo_sum (reg_rtx, offset_rtx));
+	}
+      return reg_rtx;
+    }
+}
+
+/* Generate (mem:MODE (plus:Pmode BASE (frv_frame_offset OFFSET)))).  The
+   prologue and epilogue uses such expressions to access the stack.  */
+static rtx
+frv_frame_mem (enum machine_mode mode, rtx base, int offset)
+{
+  return gen_rtx_MEM (mode, gen_rtx_PLUS (Pmode,
+					  base,
+					  frv_frame_offset_rtx (offset)));
+}
+
+/* Generate a frame-related expression:
+
+	(set REG (mem (plus (sp) (const_int OFFSET)))).
+
+   Such expressions are used in FRAME_RELATED_EXPR notes for more complex
+   instructions.  Marking the expressions as frame-related is superfluous if
+   the note contains just a single set.  But if the note contains a PARALLEL
+   or SEQUENCE that has several sets, each set must be individually marked
+   as frame-related.  */
+static rtx
+frv_dwarf_store (rtx reg, int offset)
+{
+  rtx set = gen_rtx_SET (VOIDmode,
+			 gen_rtx_MEM (GET_MODE (reg),
+				      plus_constant (stack_pointer_rtx,
+						     offset)),
+			 reg);
+  RTX_FRAME_RELATED_P (set) = 1;
+  return set;
+}
+
+/* Emit a frame-related instruction whose pattern is PATTERN.  The
+   instruction is the last in a sequence that cumulatively performs the
+   operation described by DWARF_PATTERN.  The instruction is marked as
+   frame-related and has a REG_FRAME_RELATED_EXPR note containing
+   DWARF_PATTERN.  */
+static void
+frv_frame_insn (rtx pattern, rtx dwarf_pattern)
+{
+  rtx insn = emit_insn (pattern);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      dwarf_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Emit instructions that transfer REG to or from the memory location (sp +
+   STACK_OFFSET).  The register is stored in memory if ACCESSOR->OP is
+   FRV_STORE and loaded if it is FRV_LOAD.  Only the prologue uses this
+   function to store registers and only the epilogue uses it to load them.
+
+   The caller sets up ACCESSOR so that BASE is equal to (sp + BASE_OFFSET).
+   The generated instruction will use BASE as its base register.  BASE may
+   simply be the stack pointer, but if several accesses are being made to a
+   region far away from the stack pointer, it may be more efficient to set
+   up a temporary instead.
+
+   Store instructions will be frame-related and will be annotated with the
+   overall effect of the store.  Load instructions will be followed by a
+   (use) to prevent later optimizations from zapping them.
+
+   The function takes care of the moves to and from SPRs, using TEMP_REGNO
+   as a temporary in such cases.  */
+static void
+frv_frame_access (frv_frame_accessor_t *accessor, rtx reg, int stack_offset)
+{
+  enum machine_mode mode = GET_MODE (reg);
+  rtx mem = frv_frame_mem (mode,
+			   accessor->base,
+			   stack_offset - accessor->base_offset);
+
+  if (accessor->op == FRV_LOAD)
+    {
+      if (SPR_P (REGNO (reg)))
+	{
+	  rtx temp = gen_rtx_REG (mode, TEMP_REGNO);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, mem));
+	  emit_insn (gen_rtx_SET (VOIDmode, reg, temp));
+	}
+      else
+	{
+	  /* We cannot use reg+reg addressing for DImode access.  */
+	  if (mode == DImode
+	      && GET_CODE (XEXP (mem, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (mem, 0), 0)) == REG
+	      && GET_CODE (XEXP (XEXP (mem, 0), 1)) == REG)
+	    {
+	      rtx temp = gen_rtx_REG (SImode, TEMP_REGNO);
+
+	      emit_move_insn (temp,
+			      gen_rtx_PLUS (SImode, XEXP (XEXP (mem, 0), 0),
+					    XEXP (XEXP (mem, 0), 1)));
+	      mem = gen_rtx_MEM (DImode, temp);
+	    }
+	  emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+	}
+      emit_use (reg);
+    }
+  else
+    {
+      if (SPR_P (REGNO (reg)))
+	{
+	  rtx temp = gen_rtx_REG (mode, TEMP_REGNO);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, reg));
+	  frv_frame_insn (gen_rtx_SET (Pmode, mem, temp),
+			  frv_dwarf_store (reg, stack_offset));
+	}
+      else if (mode == DImode)
+	{
+	  /* For DImode saves, the dwarf2 version needs to be a SEQUENCE
+	     with a separate save for each register.  */
+	  rtx reg1 = gen_rtx_REG (SImode, REGNO (reg));
+	  rtx reg2 = gen_rtx_REG (SImode, REGNO (reg) + 1);
+	  rtx set1 = frv_dwarf_store (reg1, stack_offset);
+	  rtx set2 = frv_dwarf_store (reg2, stack_offset + 4);
+
+	  /* Also we cannot use reg+reg addressing.  */
+	  if (GET_CODE (XEXP (mem, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (mem, 0), 0)) == REG
+	      && GET_CODE (XEXP (XEXP (mem, 0), 1)) == REG)
+	    {
+	      rtx temp = gen_rtx_REG (SImode, TEMP_REGNO);
+	      emit_move_insn (temp,
+			      gen_rtx_PLUS (SImode, XEXP (XEXP (mem, 0), 0),
+					    XEXP (XEXP (mem, 0), 1)));
+	      mem = gen_rtx_MEM (DImode, temp);
+	    }
+
+	  frv_frame_insn (gen_rtx_SET (Pmode, mem, reg),
+			  gen_rtx_PARALLEL (VOIDmode,
+					    gen_rtvec (2, set1, set2)));
+	}
+      else
+	frv_frame_insn (gen_rtx_SET (Pmode, mem, reg),
+			frv_dwarf_store (reg, stack_offset));
+    }
+}
+
+/* A function that uses frv_frame_access to transfer a group of registers to
+   or from the stack.  ACCESSOR is passed directly to frv_frame_access, INFO
+   is the stack information generated by frv_stack_info, and REG_SET is the
+   number of the register set to transfer.  */
+static void
+frv_frame_access_multi (frv_frame_accessor_t *accessor,
+                        frv_stack_t *info,
+                        int reg_set)
+{
+  frv_stack_regs_t *regs_info;
+  int regno;
+
+  regs_info = &info->regs[reg_set];
+  for (regno = regs_info->first; regno <= regs_info->last; regno++)
+    if (info->save_p[regno])
+      frv_frame_access (accessor,
+			info->save_p[regno] == REG_SAVE_2WORDS
+			? gen_rtx_REG (DImode, regno)
+			: gen_rtx_REG (SImode, regno),
+			info->reg_offset[regno]);
+}
+
+/* Save or restore callee-saved registers that are kept outside the frame
+   header.  The function saves the registers if OP is FRV_STORE and restores
+   them if OP is FRV_LOAD.  INFO is the stack information generated by
+   frv_stack_info.  */
+static void
+frv_frame_access_standard_regs (enum frv_stack_op op, frv_stack_t *info)
+{
+  frv_frame_accessor_t accessor;
+
+  accessor.op = op;
+  accessor.base = stack_pointer_rtx;
+  accessor.base_offset = 0;
+  frv_frame_access_multi (&accessor, info, STACK_REGS_GPR);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_FPR);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_LCR);
+}
+
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in the TARGET_ASM_FUNCTION_PROLOGUE target hook, since
+   it allows the scheduler to intermix instructions with the saves of
+   the caller saved registers.  In some cases, it might be necessary
+   to emit a barrier instruction as the last insn to prevent such
+   scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.  */
+void
+frv_expand_prologue (void)
+{
+  frv_stack_t *info = frv_stack_info ();
+  rtx sp = stack_pointer_rtx;
+  rtx fp = frame_pointer_rtx;
+  frv_frame_accessor_t accessor;
+
+  if (TARGET_DEBUG_STACK)
+    frv_debug_stack (info);
+
+  if (info->total_size == 0)
+    return;
+
+  /* We're interested in three areas of the frame here:
+
+         A: the register save area
+	 B: the old FP
+	 C: the header after B
+
+     If the frame pointer isn't used, we'll have to set up A, B and C
+     using the stack pointer.  If the frame pointer is used, we'll access
+     them as follows:
+
+         A: set up using sp
+	 B: set up using sp or a temporary (see below)
+	 C: set up using fp
+
+     We set up B using the stack pointer if the frame is small enough.
+     Otherwise, it's more efficient to copy the old stack pointer into a
+     temporary and use that.
+
+     Note that it's important to make sure the prologue and epilogue use the
+     same registers to access A and C, since doing otherwise will confuse
+     the aliasing code.  */
+
+  /* Set up ACCESSOR for accessing region B above.  If the frame pointer
+     isn't used, the same method will serve for C.  */
+  accessor.op = FRV_STORE;
+  if (frame_pointer_needed && info->total_size > 2048)
+    {
+      accessor.base = gen_rtx_REG (Pmode, OLD_SP_REGNO);
+      accessor.base_offset = info->total_size;
+      emit_insn (gen_movsi (accessor.base, sp));
+    }
+  else
+    {
+      accessor.base = stack_pointer_rtx;
+      accessor.base_offset = 0;
+    }
+
+  /* Allocate the stack space.  */
+  {
+    rtx asm_offset = frv_frame_offset_rtx (-info->total_size);
+    rtx dwarf_offset = GEN_INT (-info->total_size);
+
+    frv_frame_insn (gen_stack_adjust (sp, sp, asm_offset),
+		    gen_rtx_SET (Pmode,
+				 sp,
+				 gen_rtx_PLUS (Pmode, sp, dwarf_offset)));
+  }
+
+  /* If the frame pointer is needed, store the old one at (sp + FP_OFFSET)
+     and point the new one to that location.  */
+  if (frame_pointer_needed)
+    {
+      int fp_offset = info->reg_offset[FRAME_POINTER_REGNUM];
+
+      /* ASM_SRC and DWARF_SRC both point to the frame header.  ASM_SRC is
+	 based on ACCESSOR.BASE but DWARF_SRC is always based on the stack
+	 pointer.  */
+      rtx asm_src = plus_constant (accessor.base,
+				   fp_offset - accessor.base_offset);
+      rtx dwarf_src = plus_constant (sp, fp_offset);
+
+      /* Store the old frame pointer at (sp + FP_OFFSET).  */
+      frv_frame_access (&accessor, fp, fp_offset);
+
+      /* Set up the new frame pointer.  */
+      frv_frame_insn (gen_rtx_SET (VOIDmode, fp, asm_src),
+		      gen_rtx_SET (VOIDmode, fp, dwarf_src));
+
+      /* Access region C from the frame pointer.  */
+      accessor.base = fp;
+      accessor.base_offset = fp_offset;
+    }
+
+  /* Set up region C.  */
+  frv_frame_access_multi (&accessor, info, STACK_REGS_STRUCT);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_LR);
+  frv_frame_access_multi (&accessor, info, STACK_REGS_STDARG);
+
+  /* Set up region A.  */
+  frv_frame_access_standard_regs (FRV_STORE, info);
+
+  /* If this is a varargs/stdarg function, issue a blockage to prevent the
+     scheduler from moving loads before the stores saving the registers.  */
+  if (info->stdarg_size > 0)
+    emit_insn (gen_blockage ());
+
+  /* Set up pic register/small data register for this function.  */
+  if (!TARGET_FDPIC && flag_pic && crtl->uses_pic_offset_table)
+    emit_insn (gen_pic_prologue (gen_rtx_REG (Pmode, PIC_REGNO),
+				 gen_rtx_REG (Pmode, LR_REGNO),
+				 gen_rtx_REG (SImode, OFFSET_REGNO)));
+}
+
+
+/* Under frv, all of the work is done via frv_expand_epilogue, but
+   this function provides a convenient place to do cleanup.  */
+
+static void
+frv_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+                       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  frv_stack_cache = (frv_stack_t *)0;
+
+  /* Zap last used registers for conditional execution.  */
+  memset (&frv_ifcvt.tmp_reg, 0, sizeof (frv_ifcvt.tmp_reg));
+
+  /* Release the bitmap of created insns.  */
+  BITMAP_FREE (frv_ifcvt.scratch_insns_bitmap);
+}
+
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using an epilogue insn is favored compared to putting all of the
+   instructions in the TARGET_ASM_FUNCTION_PROLOGUE target hook, since
+   it allows the scheduler to intermix instructions with the saves of
+   the caller saved registers.  In some cases, it might be necessary
+   to emit a barrier instruction as the last insn to prevent such
+   scheduling.  */
+
+void
+frv_expand_epilogue (bool emit_return)
+{
+  frv_stack_t *info = frv_stack_info ();
+  rtx fp = frame_pointer_rtx;
+  rtx sp = stack_pointer_rtx;
+  rtx return_addr;
+  int fp_offset;
+
+  fp_offset = info->reg_offset[FRAME_POINTER_REGNUM];
+
+  /* Restore the stack pointer to its original value if alloca or the like
+     is used.  */
+  if (! current_function_sp_is_unchanging)
+    emit_insn (gen_addsi3 (sp, fp, frv_frame_offset_rtx (-fp_offset)));
+
+  /* Restore the callee-saved registers that were used in this function.  */
+  frv_frame_access_standard_regs (FRV_LOAD, info);
+
+  /* Set RETURN_ADDR to the address we should return to.  Set it to NULL if
+     no return instruction should be emitted.  */
+  if (info->save_p[LR_REGNO])
+    {
+      int lr_offset;
+      rtx mem;
+
+      /* Use the same method to access the link register's slot as we did in
+	 the prologue.  In other words, use the frame pointer if available,
+	 otherwise use the stack pointer.
+
+	 LR_OFFSET is the offset of the link register's slot from the start
+	 of the frame and MEM is a memory rtx for it.  */
+      lr_offset = info->reg_offset[LR_REGNO];
+      if (frame_pointer_needed)
+	mem = frv_frame_mem (Pmode, fp, lr_offset - fp_offset);
+      else
+	mem = frv_frame_mem (Pmode, sp, lr_offset);
+
+      /* Load the old link register into a GPR.  */
+      return_addr = gen_rtx_REG (Pmode, TEMP_REGNO);
+      emit_insn (gen_rtx_SET (VOIDmode, return_addr, mem));
+    }
+  else
+    return_addr = gen_rtx_REG (Pmode, LR_REGNO);
+
+  /* Restore the old frame pointer.  Emit a USE afterwards to make sure
+     the load is preserved.  */
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, fp, gen_rtx_MEM (Pmode, fp)));
+      emit_use (fp);
+    }
+
+  /* Deallocate the stack frame.  */
+  if (info->total_size != 0)
+    {
+      rtx offset = frv_frame_offset_rtx (info->total_size);
+      emit_insn (gen_stack_adjust (sp, sp, offset));
+    }
+
+  /* If this function uses eh_return, add the final stack adjustment now.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_stack_adjust (sp, sp, EH_RETURN_STACKADJ_RTX));
+
+  if (emit_return)
+    emit_jump_insn (gen_epilogue_return (return_addr));
+  else
+    {
+      rtx lr = return_addr;
+
+      if (REGNO (return_addr) != LR_REGNO)
+	{
+	  lr = gen_rtx_REG (Pmode, LR_REGNO);
+	  emit_move_insn (lr, return_addr);
+	}
+
+      emit_use (lr);
+    }
+}
+
+
+/* Worker function for TARGET_ASM_OUTPUT_MI_THUNK.  */
+
+static void
+frv_asm_output_mi_thunk (FILE *file,
+                         tree thunk_fndecl ATTRIBUTE_UNUSED,
+                         HOST_WIDE_INT delta,
+                         HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+                         tree function)
+{
+  const char *name_func = XSTR (XEXP (DECL_RTL (function), 0), 0);
+  const char *name_arg0 = reg_names[FIRST_ARG_REGNUM];
+  const char *name_jmp = reg_names[JUMP_REGNO];
+  const char *parallel = (frv_issue_rate () > 1 ? ".p" : "");
+
+  /* Do the add using an addi if possible.  */
+  if (IN_RANGE (delta, -2048, 2047))
+    fprintf (file, "\taddi %s,#%d,%s\n", name_arg0, (int) delta, name_arg0);
+  else
+    {
+      const char *const name_add = reg_names[TEMP_REGNO];
+      fprintf (file, "\tsethi%s #hi(" HOST_WIDE_INT_PRINT_DEC "),%s\n",
+	       parallel, delta, name_add);
+      fprintf (file, "\tsetlo #lo(" HOST_WIDE_INT_PRINT_DEC "),%s\n",
+	       delta, name_add);
+      fprintf (file, "\tadd %s,%s,%s\n", name_add, name_arg0, name_arg0);
+    }
+
+  if (TARGET_FDPIC)
+    {
+      const char *name_pic = reg_names[FDPIC_REGNO];
+      name_jmp = reg_names[FDPIC_FPTR_REGNO];
+
+      if (flag_pic != 1)
+	{
+	  fprintf (file, "\tsethi%s #gotofffuncdeschi(", parallel);
+	  assemble_name (file, name_func);
+	  fprintf (file, "),%s\n", name_jmp);
+
+	  fprintf (file, "\tsetlo #gotofffuncdesclo(");
+	  assemble_name (file, name_func);
+	  fprintf (file, "),%s\n", name_jmp);
+
+	  fprintf (file, "\tldd @(%s,%s), %s\n", name_jmp, name_pic, name_jmp);
+	}
+      else
+	{
+	  fprintf (file, "\tlddo @(%s,#gotofffuncdesc12(", name_pic);
+	  assemble_name (file, name_func);
+	  fprintf (file, "\t)), %s\n", name_jmp);
+	}
+    }
+  else if (!flag_pic)
+    {
+      fprintf (file, "\tsethi%s #hi(", parallel);
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_jmp);
+
+      fprintf (file, "\tsetlo #lo(");
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_jmp);
+    }
+  else
+    {
+      /* Use JUMP_REGNO as a temporary PIC register.  */
+      const char *name_lr = reg_names[LR_REGNO];
+      const char *name_gppic = name_jmp;
+      const char *name_tmp = reg_names[TEMP_REGNO];
+
+      fprintf (file, "\tmovsg %s,%s\n", name_lr, name_tmp);
+      fprintf (file, "\tcall 1f\n");
+      fprintf (file, "1:\tmovsg %s,%s\n", name_lr, name_gppic);
+      fprintf (file, "\tmovgs %s,%s\n", name_tmp, name_lr);
+      fprintf (file, "\tsethi%s #gprelhi(1b),%s\n", parallel, name_tmp);
+      fprintf (file, "\tsetlo #gprello(1b),%s\n", name_tmp);
+      fprintf (file, "\tsub %s,%s,%s\n", name_gppic, name_tmp, name_gppic);
+
+      fprintf (file, "\tsethi%s #gprelhi(", parallel);
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_tmp);
+
+      fprintf (file, "\tsetlo #gprello(");
+      assemble_name (file, name_func);
+      fprintf (file, "),%s\n", name_tmp);
+
+      fprintf (file, "\tadd %s,%s,%s\n", name_gppic, name_tmp, name_jmp);
+    }
+
+  /* Jump to the function address.  */
+  fprintf (file, "\tjmpl @(%s,%s)\n", name_jmp, reg_names[GPR_FIRST+0]);
+}
+
+
+
+/* On frv, create a frame whenever we need to create stack.  */
+
+static bool
+frv_frame_pointer_required (void)
+{
+  /* If we forgoing the usual linkage requirements, we only need
+     a frame pointer if the stack pointer might change.  */
+  if (!TARGET_LINKED_FP)
+    return !current_function_sp_is_unchanging;
+
+  if (! current_function_is_leaf)
+    return true;
+
+  if (get_frame_size () != 0)
+    return true;
+
+  if (cfun->stdarg)
+    return true;
+
+  if (!current_function_sp_is_unchanging)
+    return true;
+
+  if (!TARGET_FDPIC && flag_pic && crtl->uses_pic_offset_table)
+    return true;
+
+  if (profile_flag)
+    return true;
+
+  if (cfun->machine->frame_needed)
+    return true;
+
+  return false;
+}
+
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+frv_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+
+/* See frv_stack_info for more details on the frv stack frame.  */
+
+int
+frv_initial_elimination_offset (int from, int to)
+{
+  frv_stack_t *info = frv_stack_info ();
+  int ret = 0;
+
+  if (to == STACK_POINTER_REGNUM && from == ARG_POINTER_REGNUM)
+    ret = info->total_size - info->pretend_size;
+
+  else if (to == STACK_POINTER_REGNUM && from == FRAME_POINTER_REGNUM)
+    ret = info->reg_offset[FRAME_POINTER_REGNUM];
+
+  else if (to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM)
+    ret = (info->total_size
+	   - info->reg_offset[FRAME_POINTER_REGNUM]
+	   - info->pretend_size);
+
+  else
+    gcc_unreachable ();
+
+  if (TARGET_DEBUG_STACK)
+    fprintf (stderr, "Eliminate %s to %s by adding %d\n",
+	     reg_names [from], reg_names[to], ret);
+
+  return ret;
+}
+
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+frv_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+                            enum machine_mode mode,
+                            tree type ATTRIBUTE_UNUSED,
+                            int *pretend_size,
+                            int second_time)
+{
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr,
+	     "setup_vararg: words = %2d, mode = %4s, pretend_size = %d, second_time = %d\n",
+	     *cum, GET_MODE_NAME (mode), *pretend_size, second_time);
+}
+
+
+/* Worker function for TARGET_EXPAND_BUILTIN_SAVEREGS.  */
+
+static rtx
+frv_expand_builtin_saveregs (void)
+{
+  int offset = UNITS_PER_WORD * FRV_NUM_ARG_REGS;
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr, "expand_builtin_saveregs: offset from ap = %d\n",
+	     offset);
+
+  return gen_rtx_PLUS (Pmode, virtual_incoming_args_rtx, GEN_INT (- offset));
+}
+
+
+/* Expand __builtin_va_start to do the va_start macro.  */
+
+static void
+frv_expand_builtin_va_start (tree valist, rtx nextarg)
+{
+  tree t;
+  int num = crtl->args.info - FIRST_ARG_REGNUM - FRV_NUM_ARG_REGS;
+
+  nextarg = gen_rtx_PLUS (Pmode, virtual_incoming_args_rtx,
+			  GEN_INT (UNITS_PER_WORD * num));
+
+  if (TARGET_DEBUG_ARG)
+    {
+      fprintf (stderr, "va_start: args_info = %d, num = %d\n",
+	       crtl->args.info, num);
+
+      debug_rtx (nextarg);
+    }
+
+  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist,
+	      fold_convert (TREE_TYPE (valist),
+			    make_tree (sizetype, nextarg)));
+  TREE_SIDE_EFFECTS (t) = 1;
+
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Expand a block move operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+/* Maximum number of loads to do before doing the stores */
+#ifndef MAX_MOVE_REG
+#define MAX_MOVE_REG 4
+#endif
+
+/* Maximum number of total loads to do.  */
+#ifndef TOTAL_MOVE_REG
+#define TOTAL_MOVE_REG 8
+#endif
+
+int
+frv_expand_block_move (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx orig_src	= operands[1];
+  rtx bytes_rtx	= operands[2];
+  rtx align_rtx = operands[3];
+  int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  int align;
+  int bytes;
+  int offset;
+  int num_reg;
+  int i;
+  rtx src_reg;
+  rtx dest_reg;
+  rtx src_addr;
+  rtx dest_addr;
+  rtx src_mem;
+  rtx dest_mem;
+  rtx tmp_reg;
+  rtx stores[MAX_MOVE_REG];
+  int move_bytes;
+  enum machine_mode mode;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (! constp)
+    return FALSE;
+
+  /* This should be a fixed size alignment.  */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+
+  align = INTVAL (align_rtx);
+
+  /* Anything to move? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return TRUE;
+
+  /* Don't support real large moves.  */
+  if (bytes > TOTAL_MOVE_REG*align)
+    return FALSE;
+
+  /* Move the address into scratch registers.  */
+  dest_reg = copy_addr_to_reg (XEXP (orig_dest, 0));
+  src_reg  = copy_addr_to_reg (XEXP (orig_src,  0));
+
+  num_reg = offset = 0;
+  for ( ; bytes > 0; (bytes -= move_bytes), (offset += move_bytes))
+    {
+      /* Calculate the correct offset for src/dest.  */
+      if (offset == 0)
+	{
+	  src_addr  = src_reg;
+	  dest_addr = dest_reg;
+	}
+      else
+	{
+	  src_addr = plus_constant (src_reg, offset);
+	  dest_addr = plus_constant (dest_reg, offset);
+	}
+
+      /* Generate the appropriate load and store, saving the stores
+	 for later.  */
+      if (bytes >= 4 && align >= 4)
+	mode = SImode;
+      else if (bytes >= 2 && align >= 2)
+	mode = HImode;
+      else
+	mode = QImode;
+
+      move_bytes = GET_MODE_SIZE (mode);
+      tmp_reg = gen_reg_rtx (mode);
+      src_mem = change_address (orig_src, mode, src_addr);
+      dest_mem = change_address (orig_dest, mode, dest_addr);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, src_mem));
+      stores[num_reg++] = gen_rtx_SET (VOIDmode, dest_mem, tmp_reg);
+
+      if (num_reg >= MAX_MOVE_REG)
+	{
+	  for (i = 0; i < num_reg; i++)
+	    emit_insn (stores[i]);
+	  num_reg = 0;
+	}
+    }
+
+  for (i = 0; i < num_reg; i++)
+    emit_insn (stores[i]);
+
+  return TRUE;
+}
+
+
+/* Expand a block clear operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the length
+   operands[3] is the alignment */
+
+int
+frv_expand_block_clear (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx bytes_rtx	= operands[1];
+  rtx align_rtx = operands[3];
+  int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  int align;
+  int bytes;
+  int offset;
+  rtx dest_reg;
+  rtx dest_addr;
+  rtx dest_mem;
+  int clear_bytes;
+  enum machine_mode mode;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (! constp)
+    return FALSE;
+
+  /* This should be a fixed size alignment.  */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+
+  align = INTVAL (align_rtx);
+
+  /* Anything to move? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return TRUE;
+
+  /* Don't support real large clears.  */
+  if (bytes > TOTAL_MOVE_REG*align)
+    return FALSE;
+
+  /* Move the address into a scratch register.  */
+  dest_reg = copy_addr_to_reg (XEXP (orig_dest, 0));
+
+  offset = 0;
+  for ( ; bytes > 0; (bytes -= clear_bytes), (offset += clear_bytes))
+    {
+      /* Calculate the correct offset for src/dest.  */
+      dest_addr = ((offset == 0)
+		   ? dest_reg
+		   : plus_constant (dest_reg, offset));
+
+      /* Generate the appropriate store of gr0.  */
+      if (bytes >= 4 && align >= 4)
+	mode = SImode;
+      else if (bytes >= 2 && align >= 2)
+	mode = HImode;
+      else
+	mode = QImode;
+
+      clear_bytes = GET_MODE_SIZE (mode);
+      dest_mem = change_address (orig_dest, mode, dest_addr);
+      emit_insn (gen_rtx_SET (VOIDmode, dest_mem, const0_rtx));
+    }
+
+  return TRUE;
+}
+
+
+/* The following variable is used to output modifiers of assembler
+   code of the current output insn.  */
+
+static rtx *frv_insn_operands;
+
+/* The following function is used to add assembler insn code suffix .p
+   if it is necessary.  */
+
+const char *
+frv_asm_output_opcode (FILE *f, const char *ptr)
+{
+  int c;
+
+  if (frv_insn_packing_flag <= 0)
+    return ptr;
+
+  for (; *ptr && *ptr != ' ' && *ptr != '\t';)
+    {
+      c = *ptr++;
+      if (c == '%' && ((*ptr >= 'a' && *ptr <= 'z')
+		       || (*ptr >= 'A' && *ptr <= 'Z')))
+	{
+	  int letter = *ptr++;
+
+	  c = atoi (ptr);
+	  frv_print_operand (f, frv_insn_operands [c], letter);
+	  while ((c = *ptr) >= '0' && c <= '9')
+	    ptr++;
+	}
+      else
+	fputc (c, f);
+    }
+
+  fprintf (f, ".p");
+
+  return ptr;
+}
+
+/* Set up the packing bit for the current output insn.  Note that this
+   function is not called for asm insns.  */
+
+void
+frv_final_prescan_insn (rtx insn, rtx *opvec,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  if (INSN_P (insn))
+    {
+      if (frv_insn_packing_flag >= 0)
+	{
+	  frv_insn_operands = opvec;
+	  frv_insn_packing_flag = PACKING_FLAG_P (insn);
+	}
+      else if (recog_memoized (insn) >= 0
+	       && get_attr_acc_group (insn) == ACC_GROUP_ODD)
+	/* Packing optimizations have been disabled, but INSN can only
+	   be issued in M1.  Insert an mnop in M0.  */
+	fprintf (asm_out_file, "\tmnop.p\n");
+    }
+}
+
+
+
+/* A C expression whose value is RTL representing the address in a stack frame
+   where the pointer to the caller's frame is stored.  Assume that FRAMEADDR is
+   an RTL expression for the address of the stack frame itself.
+
+   If you don't define this macro, the default is to return the value of
+   FRAMEADDR--that is, the stack frame address is also the address of the stack
+   word that points to the previous frame.  */
+
+/* The default is correct, but we need to make sure the frame gets created.  */
+rtx
+frv_dynamic_chain_address (rtx frame)
+{
+  cfun->machine->frame_needed = 1;
+  return frame;
+}
+
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  FRAMEADDR is the frame pointer of the COUNT frame, or the frame
+   pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' is
+   defined.
+
+   The value of the expression must always be the correct address when COUNT is
+   zero, but may be `NULL_RTX' if there is not way to determine the return
+   address of other frames.  */
+
+rtx
+frv_return_addr_rtx (int count, rtx frame)
+{
+  if (count != 0)
+    return const0_rtx;
+  cfun->machine->frame_needed = 1;
+  return gen_rtx_MEM (Pmode, plus_constant (frame, 8));
+}
+
+/* Given a memory reference MEMREF, interpret the referenced memory as
+   an array of MODE values, and return a reference to the element
+   specified by INDEX.  Assume that any pre-modification implicit in
+   MEMREF has already happened.
+
+   MEMREF must be a legitimate operand for modes larger than SImode.
+   frv_legitimate_address_p forbids register+register addresses, which
+   this function cannot handle.  */
+rtx
+frv_index_memory (rtx memref, enum machine_mode mode, int index)
+{
+  rtx base = XEXP (memref, 0);
+  if (GET_CODE (base) == PRE_MODIFY)
+    base = XEXP (base, 0);
+  return change_address (memref, mode,
+			 plus_constant (base, index * GET_MODE_SIZE (mode)));
+}
+
+
+/* Print a memory address as an operand to reference that memory location.  */
+static void
+frv_print_operand_address (FILE * stream, rtx x)
+{
+  if (GET_CODE (x) == MEM)
+    x = XEXP (x, 0);
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fputs (reg_names [ REGNO (x)], stream);
+      return;
+
+    case CONST_INT:
+      fprintf (stream, "%ld", (long) INTVAL (x));
+      return;
+
+    case SYMBOL_REF:
+      assemble_name (stream, XSTR (x, 0));
+      return;
+
+    case LABEL_REF:
+    case CONST:
+      output_addr_const (stream, x);
+      return;
+
+    case PLUS:
+      /* Poorly constructed asm statements can trigger this alternative.
+	 See gcc/testsuite/gcc.dg/asm-4.c for an example.  */
+      frv_print_operand_memory_reference (stream, x, 0);
+      return;
+      
+    default:
+      break;
+    }
+
+  fatal_insn ("bad insn to frv_print_operand_address:", x);
+}
+
+
+static void
+frv_print_operand_memory_reference_reg (FILE * stream, rtx x)
+{
+  int regno = true_regnum (x);
+  if (GPR_P (regno))
+    fputs (reg_names[regno], stream);
+  else
+    fatal_insn ("bad register to frv_print_operand_memory_reference_reg:", x);
+}
+
+/* Print a memory reference suitable for the ld/st instructions.  */
+
+static void
+frv_print_operand_memory_reference (FILE * stream, rtx x, int addr_offset)
+{
+  struct frv_unspec unspec;
+  rtx x0 = NULL_RTX;
+  rtx x1 = NULL_RTX;
+
+  switch (GET_CODE (x))
+    {
+    case SUBREG:
+    case REG:
+      x0 = x;
+      break;
+
+    case PRE_MODIFY:		/* (pre_modify (reg) (plus (reg) (reg))) */
+      x0 = XEXP (x, 0);
+      x1 = XEXP (XEXP (x, 1), 1);
+      break;
+
+    case CONST_INT:
+      x1 = x;
+      break;
+
+    case PLUS:
+      x0 = XEXP (x, 0);
+      x1 = XEXP (x, 1);
+      if (GET_CODE (x0) == CONST_INT)
+	{
+	  x0 = XEXP (x, 1);
+	  x1 = XEXP (x, 0);
+	}
+      break;
+
+    default:
+      fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+      break;
+
+    }
+
+  if (addr_offset)
+    {
+      if (!x1)
+	x1 = const0_rtx;
+      else if (GET_CODE (x1) != CONST_INT)
+	fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+    }
+
+  fputs ("@(", stream);
+  if (!x0)
+    fputs (reg_names[GPR_R0], stream);
+  else if (GET_CODE (x0) == REG || GET_CODE (x0) == SUBREG)
+    frv_print_operand_memory_reference_reg (stream, x0);
+  else
+    fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+
+  fputs (",", stream);
+  if (!x1)
+    fputs (reg_names [GPR_R0], stream);
+
+  else
+    {
+      switch (GET_CODE (x1))
+	{
+	case SUBREG:
+	case REG:
+	  frv_print_operand_memory_reference_reg (stream, x1);
+	  break;
+
+	case CONST_INT:
+	  fprintf (stream, "%ld", (long) (INTVAL (x1) + addr_offset));
+	  break;
+
+	case CONST:
+	  if (!frv_const_unspec_p (x1, &unspec))
+	    fatal_insn ("bad insn to frv_print_operand_memory_reference:", x1);
+	  frv_output_const_unspec (stream, &unspec);
+	  break;
+
+	default:
+	  fatal_insn ("bad insn to frv_print_operand_memory_reference:", x);
+	}
+    }
+
+  fputs (")", stream);
+}
+
+
+/* Return 2 for likely branches and 0 for non-likely branches  */
+
+#define FRV_JUMP_LIKELY 2
+#define FRV_JUMP_NOT_LIKELY 0
+
+static int
+frv_print_operand_jump_hint (rtx insn)
+{
+  rtx note;
+  rtx labelref;
+  int ret;
+  HOST_WIDE_INT prob = -1;
+  enum { UNKNOWN, BACKWARD, FORWARD } jump_type = UNKNOWN;
+
+  gcc_assert (GET_CODE (insn) == JUMP_INSN);
+
+  /* Assume any non-conditional jump is likely.  */
+  if (! any_condjump_p (insn))
+    ret = FRV_JUMP_LIKELY;
+
+  else
+    {
+      labelref = condjump_label (insn);
+      if (labelref)
+	{
+	  rtx label = XEXP (labelref, 0);
+	  jump_type = (insn_current_address > INSN_ADDRESSES (INSN_UID (label))
+		       ? BACKWARD
+		       : FORWARD);
+	}
+
+      note = find_reg_note (insn, REG_BR_PROB, 0);
+      if (!note)
+	ret = ((jump_type == BACKWARD) ? FRV_JUMP_LIKELY : FRV_JUMP_NOT_LIKELY);
+
+      else
+	{
+	  prob = INTVAL (XEXP (note, 0));
+	  ret = ((prob >= (REG_BR_PROB_BASE / 2))
+		 ? FRV_JUMP_LIKELY
+		 : FRV_JUMP_NOT_LIKELY);
+	}
+    }
+
+#if 0
+  if (TARGET_DEBUG)
+    {
+      char *direction;
+
+      switch (jump_type)
+	{
+	default:
+	case UNKNOWN:	direction = "unknown jump direction";	break;
+	case BACKWARD:	direction = "jump backward";		break;
+	case FORWARD:	direction = "jump forward";		break;
+	}
+
+      fprintf (stderr,
+	       "%s: uid %ld, %s, probability = %ld, max prob. = %ld, hint = %d\n",
+	       IDENTIFIER_POINTER (DECL_NAME (current_function_decl)),
+	       (long)INSN_UID (insn), direction, (long)prob,
+	       (long)REG_BR_PROB_BASE, ret);
+    }
+#endif
+
+  return ret;
+}
+
+
+/* Return the comparison operator to use for CODE given that the ICC
+   register is OP0.  */
+
+static const char *
+comparison_string (enum rtx_code code, rtx op0)
+{
+  bool is_nz_p = GET_MODE (op0) == CC_NZmode;
+  switch (code)
+    {
+    default:  output_operand_lossage ("bad condition code");
+    case EQ:  return "eq";
+    case NE:  return "ne";
+    case LT:  return is_nz_p ? "n" : "lt";
+    case LE:  return "le";
+    case GT:  return "gt";
+    case GE:  return is_nz_p ? "p" : "ge";
+    case LTU: return is_nz_p ? "no" : "c";
+    case LEU: return is_nz_p ? "eq" : "ls";
+    case GTU: return is_nz_p ? "ne" : "hi";
+    case GEU: return is_nz_p ? "ra" : "nc";
+    }
+}
+
+/* Print an operand to an assembler instruction.
+
+   `%' followed by a letter and a digit says to output an operand in an
+   alternate fashion.  Four letters have standard, built-in meanings
+   described below.  The hook `TARGET_PRINT_OPERAND' can define
+   additional letters with nonstandard meanings.
+
+   `%cDIGIT' can be used to substitute an operand that is a constant value
+   without the syntax that normally indicates an immediate operand.
+
+   `%nDIGIT' is like `%cDIGIT' except that the value of the constant is negated
+   before printing.
+
+   `%aDIGIT' can be used to substitute an operand as if it were a memory
+   reference, with the actual operand treated as the address.  This may be
+   useful when outputting a "load address" instruction, because often the
+   assembler syntax for such an instruction requires you to write the operand
+   as if it were a memory reference.
+
+   `%lDIGIT' is used to substitute a `label_ref' into a jump instruction.
+
+   `%=' outputs a number which is unique to each instruction in the entire
+   compilation.  This is useful for making local labels to be referred to more
+   than once in a single template that generates multiple assembler
+   instructions.
+
+   `%' followed by a punctuation character specifies a substitution that
+   does not use an operand.  Only one case is standard: `%%' outputs a
+   `%' into the assembler code.  Other nonstandard cases can be defined
+   in the `TARGET_PRINT_OPERAND' hook.  You must also define which
+   punctuation characters are valid with the
+   `TARGET_PRINT_OPERAND_PUNCT_VALID_P' hook.  */
+
+static void
+frv_print_operand (FILE * file, rtx x, int code)
+{
+  struct frv_unspec unspec;
+  HOST_WIDE_INT value;
+  int offset;
+
+  if (code != 0 && !ISALPHA (code))
+    value = 0;
+
+  else if (GET_CODE (x) == CONST_INT)
+    value = INTVAL (x);
+
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE rv;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+	  value = l;
+	}
+
+      else if (GET_MODE (x) == VOIDmode)
+	value = CONST_DOUBLE_LOW (x);
+
+      else
+        fatal_insn ("bad insn in frv_print_operand, bad const_double", x);
+    }
+
+  else
+    value = 0;
+
+  switch (code)
+    {
+
+    case '.':
+      /* Output r0.  */
+      fputs (reg_names[GPR_R0], file);
+      break;
+
+    case '#':
+      fprintf (file, "%d", frv_print_operand_jump_hint (current_output_insn));
+      break;
+
+    case '@':
+      /* Output small data area base register (gr16).  */
+      fputs (reg_names[SDA_BASE_REG], file);
+      break;
+
+    case '~':
+      /* Output pic register (gr17).  */
+      fputs (reg_names[PIC_REGNO], file);
+      break;
+
+    case '*':
+      /* Output the temporary integer CCR register.  */
+      fputs (reg_names[ICR_TEMP], file);
+      break;
+
+    case '&':
+      /* Output the temporary integer CC register.  */
+      fputs (reg_names[ICC_TEMP], file);
+      break;
+
+    /* case 'a': print an address.  */
+
+    case 'C':
+      /* Print appropriate test for integer branch false operation.  */
+      fputs (comparison_string (reverse_condition (GET_CODE (x)),
+				XEXP (x, 0)), file);
+      break;
+
+    case 'c':
+      /* Print appropriate test for integer branch true operation.  */
+      fputs (comparison_string (GET_CODE (x), XEXP (x, 0)), file);
+      break;
+
+    case 'e':
+      /* Print 1 for a NE and 0 for an EQ to give the final argument
+	 for a conditional instruction.  */
+      if (GET_CODE (x) == NE)
+	fputs ("1", file);
+
+      else if (GET_CODE (x) == EQ)
+	fputs ("0", file);
+
+      else
+	fatal_insn ("bad insn to frv_print_operand, 'e' modifier:", x);
+      break;
+
+    case 'F':
+      /* Print appropriate test for floating point branch false operation.  */
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'F' modifier:", x);
+
+	case EQ:  fputs ("ne",  file); break;
+	case NE:  fputs ("eq",  file); break;
+	case LT:  fputs ("uge", file); break;
+	case LE:  fputs ("ug",  file); break;
+	case GT:  fputs ("ule", file); break;
+	case GE:  fputs ("ul",  file); break;
+	}
+      break;
+
+    case 'f':
+      /* Print appropriate test for floating point branch true operation.  */
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'f' modifier:", x);
+
+	case EQ:  fputs ("eq",  file); break;
+	case NE:  fputs ("ne",  file); break;
+	case LT:  fputs ("lt",  file); break;
+	case LE:  fputs ("le",  file); break;
+	case GT:  fputs ("gt",  file); break;
+	case GE:  fputs ("ge",  file); break;
+	}
+      break;
+
+    case 'g':
+      /* Print appropriate GOT function.  */
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("bad insn to frv_print_operand, 'g' modifier:", x);
+      fputs (unspec_got_name (INTVAL (x)), file);
+      break;
+
+    case 'I':
+      /* Print 'i' if the operand is a constant, or is a memory reference that
+         adds a constant.  */
+      if (GET_CODE (x) == MEM)
+	x = ((GET_CODE (XEXP (x, 0)) == PLUS)
+	     ? XEXP (XEXP (x, 0), 1)
+	     : XEXP (x, 0));
+      else if (GET_CODE (x) == PLUS)
+	x = XEXP (x, 1);
+
+      switch (GET_CODE (x))
+	{
+	default:
+	  break;
+
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	  fputs ("i", file);
+	  break;
+	}
+      break;
+
+    case 'i':
+      /* For jump instructions, print 'i' if the operand is a constant or
+         is an expression that adds a constant.  */
+      if (GET_CODE (x) == CONST_INT)
+        fputs ("i", file);
+
+      else
+        {
+          if (GET_CODE (x) == CONST_INT
+              || (GET_CODE (x) == PLUS
+                  && (GET_CODE (XEXP (x, 1)) == CONST_INT
+                      || GET_CODE (XEXP (x, 0)) == CONST_INT)))
+            fputs ("i", file);
+        }
+      break;
+
+    case 'L':
+      /* Print the lower register of a double word register pair */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[ REGNO (x)+1 ], file);
+      else
+	fatal_insn ("bad insn to frv_print_operand, 'L' modifier:", x);
+      break;
+
+    /* case 'l': print a LABEL_REF.  */
+
+    case 'M':
+    case 'N':
+      /* Print a memory reference for ld/st/jmp, %N prints a memory reference
+         for the second word of double memory operations.  */
+      offset = (code == 'M') ? 0 : UNITS_PER_WORD;
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'M/N' modifier:", x);
+
+	case MEM:
+	  frv_print_operand_memory_reference (file, XEXP (x, 0), offset);
+	  break;
+
+	case REG:
+	case SUBREG:
+	case CONST_INT:
+	case PLUS:
+        case SYMBOL_REF:
+	  frv_print_operand_memory_reference (file, x, offset);
+	  break;
+	}
+      break;
+
+    case 'O':
+      /* Print the opcode of a command.  */
+      switch (GET_CODE (x))
+	{
+	default:
+	  fatal_insn ("bad insn to frv_print_operand, 'O' modifier:", x);
+
+	case PLUS:     fputs ("add", file); break;
+	case MINUS:    fputs ("sub", file); break;
+	case AND:      fputs ("and", file); break;
+	case IOR:      fputs ("or",  file); break;
+	case XOR:      fputs ("xor", file); break;
+	case ASHIFT:   fputs ("sll", file); break;
+	case ASHIFTRT: fputs ("sra", file); break;
+	case LSHIFTRT: fputs ("srl", file); break;
+	}
+      break;
+
+    /* case 'n': negate and print a constant int.  */
+
+    case 'P':
+      /* Print PIC label using operand as the number.  */
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("bad insn to frv_print_operand, P modifier:", x);
+
+      fprintf (file, ".LCF%ld", (long)INTVAL (x));
+      break;
+
+    case 'U':
+      /* Print 'u' if the operand is a update load/store.  */
+      if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	fputs ("u", file);
+      break;
+
+    case 'z':
+      /* If value is 0, print gr0, otherwise it must be a register.  */
+      if (GET_CODE (x) == CONST_INT && INTVAL (x) == 0)
+	fputs (reg_names[GPR_R0], file);
+
+      else if (GET_CODE (x) == REG)
+        fputs (reg_names [REGNO (x)], file);
+
+      else
+        fatal_insn ("bad insn in frv_print_operand, z case", x);
+      break;
+
+    case 'x':
+      /* Print constant in hex.  */
+      if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+        {
+	  fprintf (file, "%s0x%.4lx", IMMEDIATE_PREFIX, (long) value);
+	  break;
+	}
+
+      /* Fall through.  */
+
+    case '\0':
+      if (GET_CODE (x) == REG)
+        fputs (reg_names [REGNO (x)], file);
+
+      else if (GET_CODE (x) == CONST_INT
+              || GET_CODE (x) == CONST_DOUBLE)
+        fprintf (file, "%s%ld", IMMEDIATE_PREFIX, (long) value);
+
+      else if (frv_const_unspec_p (x, &unspec))
+	frv_output_const_unspec (file, &unspec);
+
+      else if (GET_CODE (x) == MEM)
+        frv_print_operand_address (file, XEXP (x, 0));
+
+      else if (CONSTANT_ADDRESS_P (x))
+        frv_print_operand_address (file, x);
+
+      else
+        fatal_insn ("bad insn in frv_print_operand, 0 case", x);
+
+      break;
+
+    default:
+      fatal_insn ("frv_print_operand: unknown code", x);
+      break;
+    }
+
+  return;
+}
+
+static bool
+frv_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '@' || code == '~'
+	  || code == '*' || code == '&');
+}
+
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  The variable has type
+   `CUMULATIVE_ARGS'.  The value of FNTYPE is the tree node for the data type
+   of the function which will receive the args, or 0 if the args are to a
+   compiler support library function.  The value of INDIRECT is nonzero when
+   processing an indirect call, for example a call through a function pointer.
+   The value of INDIRECT is zero for a call to an explicitly named function, a
+   library function call, or when `INIT_CUMULATIVE_ARGS' is used to find
+   arguments for the function being compiled.
+
+   When processing a call to a compiler support library function, LIBNAME
+   identifies which one.  It is a `symbol_ref' rtx which contains the name of
+   the function, as a string.  LIBNAME is 0 when an ordinary C function call is
+   being processed.  Thus, each time this macro is called, either LIBNAME or
+   FNTYPE is nonzero, but never both of them at once.  */
+
+void
+frv_init_cumulative_args (CUMULATIVE_ARGS *cum,
+                          tree fntype,
+                          rtx libname,
+                          tree fndecl,
+                          int incoming)
+{
+  *cum = FIRST_ARG_REGNUM;
+
+  if (TARGET_DEBUG_ARG)
+    {
+      fprintf (stderr, "\ninit_cumulative_args:");
+      if (!fndecl && fntype)
+	fputs (" indirect", stderr);
+
+      if (incoming)
+	fputs (" incoming", stderr);
+
+      if (fntype)
+	{
+	  tree ret_type = TREE_TYPE (fntype);
+	  fprintf (stderr, " return=%s,",
+		   tree_code_name[ (int)TREE_CODE (ret_type) ]);
+	}
+
+      if (libname && GET_CODE (libname) == SYMBOL_REF)
+	fprintf (stderr, " libname=%s", XSTR (libname, 0));
+
+      if (cfun->returns_struct)
+	fprintf (stderr, " return-struct");
+
+      putc ('\n', stderr);
+    }
+}
+
+
+/* Return true if we should pass an argument on the stack rather than
+   in registers.  */
+
+static bool
+frv_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode)
+    return true;
+  if (type == NULL)
+    return false;
+  return AGGREGATE_TYPE_P (type);
+}
+
+/* If defined, a C expression that gives the alignment boundary, in bits, of an
+   argument with the specified mode and type.  If it is not defined,
+   `PARM_BOUNDARY' is used for all arguments.  */
+
+static unsigned int
+frv_function_arg_boundary (enum machine_mode mode ATTRIBUTE_UNUSED,
+                           const_tree type ATTRIBUTE_UNUSED)
+{
+  return BITS_PER_WORD;
+}
+
+static rtx
+frv_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    const_tree type ATTRIBUTE_UNUSED, bool named,
+		    bool incoming ATTRIBUTE_UNUSED)
+{
+  enum machine_mode xmode = (mode == BLKmode) ? SImode : mode;
+  int arg_num = *cum;
+  rtx ret;
+  const char *debstr;
+
+  /* Return a marker for use in the call instruction.  */
+  if (xmode == VOIDmode)
+    {
+      ret = const0_rtx;
+      debstr = "<0>";
+    }
+
+  else if (arg_num <= LAST_ARG_REGNUM)
+    {
+      ret = gen_rtx_REG (xmode, arg_num);
+      debstr = reg_names[arg_num];
+    }
+
+  else
+    {
+      ret = NULL_RTX;
+      debstr = "memory";
+    }
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr,
+	     "function_arg: words = %2d, mode = %4s, named = %d, size = %3d, arg = %s\n",
+	     arg_num, GET_MODE_NAME (mode), named, GET_MODE_SIZE (mode), debstr);
+
+  return ret;
+}
+
+static rtx
+frv_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		  const_tree type, bool named)
+{
+  return frv_function_arg_1 (cum, mode, type, named, false);
+}
+
+static rtx
+frv_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  return frv_function_arg_1 (cum, mode, type, named, true);
+}
+
+
+/* A C statement (sans semicolon) to update the summarizer variable CUM to
+   advance past an argument in the argument list.  The values MODE, TYPE and
+   NAMED describe that argument.  Once this is done, the variable CUM is
+   suitable for analyzing the *following* argument with `FUNCTION_ARG', etc.
+
+   This macro need not do anything if the argument in question was passed on
+   the stack.  The compiler knows how to track the amount of stack space used
+   for arguments without any special help.  */
+
+static void
+frv_function_arg_advance (CUMULATIVE_ARGS *cum,
+                          enum machine_mode mode,
+                          const_tree type ATTRIBUTE_UNUSED,
+                          bool named)
+{
+  enum machine_mode xmode = (mode == BLKmode) ? SImode : mode;
+  int bytes = GET_MODE_SIZE (xmode);
+  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
+  int arg_num = *cum;
+
+  *cum = arg_num + words;
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr,
+	     "function_adv: words = %2d, mode = %4s, named = %d, size = %3d\n",
+	     arg_num, GET_MODE_NAME (mode), named, words * UNITS_PER_WORD);
+}
+
+
+/* A C expression for the number of words, at the beginning of an argument,
+   must be put in registers.  The value must be zero for arguments that are
+   passed entirely in registers or that are entirely pushed on the stack.
+
+   On some machines, certain arguments must be passed partially in registers
+   and partially in memory.  On these machines, typically the first N words of
+   arguments are passed in registers, and the rest on the stack.  If a
+   multi-word argument (a `double' or a structure) crosses that boundary, its
+   first few words must be passed in registers and the rest must be pushed.
+   This macro tells the compiler when this occurs, and how many of the words
+   should go in registers.
+
+   `FUNCTION_ARG' for these arguments should return the first register to be
+   used by the caller for this argument; likewise `FUNCTION_INCOMING_ARG', for
+   the called function.  */
+
+static int
+frv_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		       tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED)
+{
+  enum machine_mode xmode = (mode == BLKmode) ? SImode : mode;
+  int bytes = GET_MODE_SIZE (xmode);
+  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int arg_num = *cum;
+  int ret;
+
+  ret = ((arg_num <= LAST_ARG_REGNUM && arg_num + words > LAST_ARG_REGNUM+1)
+	 ? LAST_ARG_REGNUM - arg_num + 1
+	 : 0);
+  ret *= UNITS_PER_WORD;
+
+  if (TARGET_DEBUG_ARG && ret)
+    fprintf (stderr, "frv_arg_partial_bytes: %d\n", ret);
+
+  return ret;
+}
+
+
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+frv_function_value (const_tree valtype,
+		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM);
+}
+
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+frv_libcall_value (enum machine_mode mode,
+		   const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+frv_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
+/* Return true if a register is ok to use as a base or index register.  */
+
+static FRV_INLINE int
+frv_regno_ok_for_base_p (int regno, int strict_p)
+{
+  if (GPR_P (regno))
+    return TRUE;
+
+  if (strict_p)
+    return (reg_renumber[regno] >= 0 && GPR_P (reg_renumber[regno]));
+
+  if (regno == ARG_POINTER_REGNUM)
+    return TRUE;
+
+  return (regno >= FIRST_PSEUDO_REGISTER);
+}
+
+
+/* A C compound statement with a conditional `goto LABEL;' executed if X (an
+   RTX) is a legitimate memory address on the target machine for a memory
+   operand of mode MODE.
+
+   It usually pays to define several simpler macros to serve as subroutines for
+   this one.  Otherwise it may be too complicated to understand.
+
+   This macro must exist in two variants: a strict variant and a non-strict
+   one.  The strict variant is used in the reload pass.  It must be defined so
+   that any pseudo-register that has not been allocated a hard register is
+   considered a memory reference.  In contexts where some kind of register is
+   required, a pseudo-register with no hard register must be rejected.
+
+   The non-strict variant is used in other passes.  It must be defined to
+   accept all pseudo-registers in every context where some kind of register is
+   required.
+
+   Compiler source files that want to use the strict variant of this macro
+   define the macro `REG_OK_STRICT'.  You should use an `#ifdef REG_OK_STRICT'
+   conditional to define the strict variant in that case and the non-strict
+   variant otherwise.
+
+   Normally, constant addresses which are the sum of a `symbol_ref' and an
+   integer are stored inside a `const' RTX to mark them as constant.
+   Therefore, there is no need to recognize such sums specifically as
+   legitimate addresses.  Normally you would simply recognize any `const' as
+   legitimate.
+
+   Usually `TARGET_PRINT_OPERAND_ADDRESS' is not prepared to handle
+   constant sums that are not marked with `const'.  It assumes that a
+   naked `plus' indicates indexing.  If so, then you *must* reject such
+   naked constant sums as illegitimate addresses, so that none of them
+   will be given to `TARGET_PRINT_OPERAND_ADDRESS'.  */
+
+int
+frv_legitimate_address_p_1 (enum machine_mode mode,
+                            rtx x,
+                            int strict_p,
+                            int condexec_p,
+			    int allow_double_reg_p)
+{
+  rtx x0, x1;
+  int ret = 0;
+  HOST_WIDE_INT value;
+  unsigned regno0;
+
+  if (FRV_SYMBOL_REF_TLS_P (x))
+    return 0;
+
+  switch (GET_CODE (x))
+    {
+    default:
+      break;
+
+    case SUBREG:
+      x = SUBREG_REG (x);
+      if (GET_CODE (x) != REG)
+        break;
+
+      /* Fall through.  */
+
+    case REG:
+      ret = frv_regno_ok_for_base_p (REGNO (x), strict_p);
+      break;
+
+    case PRE_MODIFY:
+      x0 = XEXP (x, 0);
+      x1 = XEXP (x, 1);
+      if (GET_CODE (x0) != REG
+	  || ! frv_regno_ok_for_base_p (REGNO (x0), strict_p)
+	  || GET_CODE (x1) != PLUS
+	  || ! rtx_equal_p (x0, XEXP (x1, 0))
+	  || GET_CODE (XEXP (x1, 1)) != REG
+	  || ! frv_regno_ok_for_base_p (REGNO (XEXP (x1, 1)), strict_p))
+	break;
+
+      ret = 1;
+      break;
+
+    case CONST_INT:
+      /* 12-bit immediate */
+      if (condexec_p)
+	ret = FALSE;
+      else
+	{
+	  ret = IN_RANGE (INTVAL (x), -2048, 2047);
+
+	  /* If we can't use load/store double operations, make sure we can
+	     address the second word.  */
+	  if (ret && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	    ret = IN_RANGE (INTVAL (x) + GET_MODE_SIZE (mode) - 1,
+			    -2048, 2047);
+	}
+      break;
+
+    case PLUS:
+      x0 = XEXP (x, 0);
+      x1 = XEXP (x, 1);
+
+      if (GET_CODE (x0) == SUBREG)
+	x0 = SUBREG_REG (x0);
+
+      if (GET_CODE (x0) != REG)
+	break;
+
+      regno0 = REGNO (x0);
+      if (!frv_regno_ok_for_base_p (regno0, strict_p))
+	break;
+
+      switch (GET_CODE (x1))
+	{
+	default:
+	  break;
+
+	case SUBREG:
+	  x1 = SUBREG_REG (x1);
+	  if (GET_CODE (x1) != REG)
+	    break;
+
+	  /* Fall through.  */
+
+	case REG:
+	  /* Do not allow reg+reg addressing for modes > 1 word if we
+	     can't depend on having move double instructions.  */
+	  if (!allow_double_reg_p && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	    ret = FALSE;
+	  else
+	    ret = frv_regno_ok_for_base_p (REGNO (x1), strict_p);
+	  break;
+
+	case CONST_INT:
+          /* 12-bit immediate */
+	  if (condexec_p)
+	    ret = FALSE;
+	  else
+	    {
+	      value = INTVAL (x1);
+	      ret = IN_RANGE (value, -2048, 2047);
+
+	      /* If we can't use load/store double operations, make sure we can
+		 address the second word.  */
+	      if (ret && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+		ret = IN_RANGE (value + GET_MODE_SIZE (mode) - 1, -2048, 2047);
+	    }
+	  break;
+
+	case CONST:
+	  if (!condexec_p && got12_operand (x1, VOIDmode))
+	    ret = TRUE;
+	  break;
+
+	}
+      break;
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== legitimate_address_p, mode = %s, result = %d, addresses are %sstrict%s\n",
+	       GET_MODE_NAME (mode), ret, (strict_p) ? "" : "not ",
+	       (condexec_p) ? ", inside conditional code" : "");
+      debug_rtx (x);
+    }
+
+  return ret;
+}
+
+bool
+frv_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  return frv_legitimate_address_p_1 (mode, x, strict_p, FALSE, FALSE);
+}
+
+/* Given an ADDR, generate code to inline the PLT.  */
+static rtx
+gen_inlined_tls_plt (rtx addr)
+{
+  rtx retval, dest;
+  rtx picreg = get_hard_reg_initial_val (Pmode, FDPIC_REG);
+
+
+  dest = gen_reg_rtx (DImode);
+
+  if (flag_pic == 1)
+    {
+      /*
+	-fpic version:
+
+	lddi.p  @(gr15, #gottlsdesc12(ADDR)), gr8
+	calll    #gettlsoff(ADDR)@(gr8, gr0)
+      */
+      emit_insn (gen_tls_lddi (dest, addr, picreg));
+    }
+  else
+    {
+      /*
+	-fPIC version:
+
+	sethi.p #gottlsdeschi(ADDR), gr8
+	setlo   #gottlsdesclo(ADDR), gr8
+	ldd     #tlsdesc(ADDR)@(gr15, gr8), gr8
+	calll   #gettlsoff(ADDR)@(gr8, gr0)
+      */
+      rtx reguse = gen_reg_rtx (Pmode);
+      emit_insn (gen_tlsoff_hilo (reguse, addr, GEN_INT (R_FRV_GOTTLSDESCHI)));
+      emit_insn (gen_tls_tlsdesc_ldd (dest, picreg, reguse, addr));
+    }
+
+  retval = gen_reg_rtx (Pmode);
+  emit_insn (gen_tls_indirect_call (retval, addr, dest, picreg));
+  return retval;
+}
+
+/* Emit a TLSMOFF or TLSMOFF12 offset, depending on -mTLS.  Returns
+   the destination address.  */
+static rtx
+gen_tlsmoff (rtx addr, rtx reg)
+{
+  rtx dest = gen_reg_rtx (Pmode);
+
+  if (TARGET_BIG_TLS)
+    {
+      /* sethi.p #tlsmoffhi(x), grA
+	 setlo   #tlsmofflo(x), grA
+      */
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_tlsoff_hilo (dest, addr,
+				  GEN_INT (R_FRV_TLSMOFFHI)));
+      dest = gen_rtx_PLUS (Pmode, dest, reg);
+    }
+  else
+    {
+      /* addi grB, #tlsmoff12(x), grC
+	   -or-
+	 ld/st @(grB, #tlsmoff12(x)), grC
+      */
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_symGOTOFF2reg_i (dest, addr, reg,
+				      GEN_INT (R_FRV_TLSMOFF12)));
+    }
+  return dest;
+}
+
+/* Generate code for a TLS address.  */
+static rtx
+frv_legitimize_tls_address (rtx addr, enum tls_model model)
+{
+  rtx dest, tp = gen_rtx_REG (Pmode, 29);
+  rtx picreg = get_hard_reg_initial_val (Pmode, 15);
+
+  switch (model)
+    {
+    case TLS_MODEL_INITIAL_EXEC:
+      if (flag_pic == 1)
+	{
+	  /* -fpic version.
+	     ldi @(gr15, #gottlsoff12(x)), gr5
+	   */
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (gen_tls_load_gottlsoff12 (dest, addr, picreg));
+	  dest = gen_rtx_PLUS (Pmode, tp, dest);
+	}
+      else
+	{
+	  /* -fPIC or anything else.
+
+	    sethi.p #gottlsoffhi(x), gr14
+	    setlo   #gottlsofflo(x), gr14
+	    ld      #tlsoff(x)@(gr15, gr14), gr9
+	  */
+	  rtx tmp = gen_reg_rtx (Pmode);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (gen_tlsoff_hilo (tmp, addr,
+				      GEN_INT (R_FRV_GOTTLSOFF_HI)));
+
+	  emit_insn (gen_tls_tlsoff_ld (dest, picreg, tmp, addr));
+	  dest = gen_rtx_PLUS (Pmode, tp, dest);
+	}
+      break;
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      {
+	rtx reg, retval;
+
+	if (TARGET_INLINE_PLT)
+	  retval = gen_inlined_tls_plt (GEN_INT (0));
+	else
+	  {
+	    /* call #gettlsoff(0) */
+	    retval = gen_reg_rtx (Pmode);
+	    emit_insn (gen_call_gettlsoff (retval, GEN_INT (0), picreg));
+	  }
+
+	reg = gen_reg_rtx (Pmode);
+	emit_insn (gen_rtx_SET (VOIDmode, reg,
+				gen_rtx_PLUS (Pmode,
+					      retval, tp)));
+
+	dest = gen_tlsmoff (addr, reg);
+
+	/*
+	dest = gen_reg_rtx (Pmode);
+	emit_insn (gen_tlsoff_hilo (dest, addr,
+				    GEN_INT (R_FRV_TLSMOFFHI)));
+	dest = gen_rtx_PLUS (Pmode, dest, reg);
+	*/
+	break;
+      }
+    case TLS_MODEL_LOCAL_EXEC:
+      dest = gen_tlsmoff (addr, gen_rtx_REG (Pmode, 29));
+      break;
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      {
+	rtx retval;
+
+	if (TARGET_INLINE_PLT)
+	  retval = gen_inlined_tls_plt (addr);
+	else
+	  {
+	    /* call #gettlsoff(x) */
+	    retval = gen_reg_rtx (Pmode);
+	    emit_insn (gen_call_gettlsoff (retval, addr, picreg));
+	  }
+	dest = gen_rtx_PLUS (Pmode, retval, tp);
+	break;
+      }
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+rtx
+frv_legitimize_address (rtx x,
+			rtx oldx ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
+      if (model != 0)
+        return frv_legitimize_tls_address (x, model);
+    }
+
+  return x;
+}
+
+/* Test whether a local function descriptor is canonical, i.e.,
+   whether we can use FUNCDESC_GOTOFF to compute the address of the
+   function.  */
+
+static bool
+frv_local_funcdesc_p (rtx fnx)
+{
+  tree fn;
+  enum symbol_visibility vis;
+  bool ret;
+
+  if (! SYMBOL_REF_LOCAL_P (fnx))
+    return FALSE;
+
+  fn = SYMBOL_REF_DECL (fnx);
+
+  if (! fn)
+    return FALSE;
+
+  vis = DECL_VISIBILITY (fn);
+
+  if (vis == VISIBILITY_PROTECTED)
+    /* Private function descriptors for protected functions are not
+       canonical.  Temporarily change the visibility to global.  */
+    vis = VISIBILITY_DEFAULT;
+  else if (flag_shlib)
+    /* If we're already compiling for a shared library (that, unlike
+       executables, can't assume that the existence of a definition
+       implies local binding), we can skip the re-testing.  */
+    return TRUE;
+
+  ret = default_binds_local_p_1 (fn, flag_pic);
+
+  DECL_VISIBILITY (fn) = vis;
+
+  return ret;
+}
+
+/* Load the _gp symbol into DEST.  SRC is supposed to be the FDPIC
+   register.  */
+
+rtx
+frv_gen_GPsym2reg (rtx dest, rtx src)
+{
+  tree gp = get_identifier ("_gp");
+  rtx gp_sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (gp));
+
+  return gen_symGOT2reg (dest, gp_sym, src, GEN_INT (R_FRV_GOT12));
+}
+
+static const char *
+unspec_got_name (int i)
+{
+  switch (i)
+    {
+    case R_FRV_GOT12: return "got12";
+    case R_FRV_GOTHI: return "gothi";
+    case R_FRV_GOTLO: return "gotlo";
+    case R_FRV_FUNCDESC: return "funcdesc";
+    case R_FRV_FUNCDESC_GOT12: return "gotfuncdesc12";
+    case R_FRV_FUNCDESC_GOTHI: return "gotfuncdeschi";
+    case R_FRV_FUNCDESC_GOTLO: return "gotfuncdesclo";
+    case R_FRV_FUNCDESC_VALUE: return "funcdescvalue";
+    case R_FRV_FUNCDESC_GOTOFF12: return "gotofffuncdesc12";
+    case R_FRV_FUNCDESC_GOTOFFHI: return "gotofffuncdeschi";
+    case R_FRV_FUNCDESC_GOTOFFLO: return "gotofffuncdesclo";
+    case R_FRV_GOTOFF12: return "gotoff12";
+    case R_FRV_GOTOFFHI: return "gotoffhi";
+    case R_FRV_GOTOFFLO: return "gotofflo";
+    case R_FRV_GPREL12: return "gprel12";
+    case R_FRV_GPRELHI: return "gprelhi";
+    case R_FRV_GPRELLO: return "gprello";
+    case R_FRV_GOTTLSOFF_HI: return "gottlsoffhi";
+    case R_FRV_GOTTLSOFF_LO: return "gottlsofflo";
+    case R_FRV_TLSMOFFHI: return "tlsmoffhi";
+    case R_FRV_TLSMOFFLO: return "tlsmofflo";
+    case R_FRV_TLSMOFF12: return "tlsmoff12";
+    case R_FRV_TLSDESCHI: return "tlsdeschi";
+    case R_FRV_TLSDESCLO: return "tlsdesclo";
+    case R_FRV_GOTTLSDESCHI: return "gottlsdeschi";
+    case R_FRV_GOTTLSDESCLO: return "gottlsdesclo";
+    default: gcc_unreachable ();
+    }
+}
+
+/* Write the assembler syntax for UNSPEC to STREAM.  Note that any offset
+   is added inside the relocation operator.  */
+
+static void
+frv_output_const_unspec (FILE *stream, const struct frv_unspec *unspec)
+{
+  fprintf (stream, "#%s(", unspec_got_name (unspec->reloc));
+  output_addr_const (stream, plus_constant (unspec->symbol, unspec->offset));
+  fputs (")", stream);
+}
+
+/* Implement FIND_BASE_TERM.  See whether ORIG_X represents #gprel12(foo)
+   or #gotoff12(foo) for some small data symbol foo.  If so, return foo,
+   otherwise return ORIG_X.  */
+
+rtx
+frv_find_base_term (rtx x)
+{
+  struct frv_unspec unspec;
+
+  if (frv_const_unspec_p (x, &unspec)
+      && frv_small_data_reloc_p (unspec.symbol, unspec.reloc))
+    return plus_constant (unspec.symbol, unspec.offset);
+
+  return x;
+}
+
+/* Return 1 if operand is a valid FRV address.  CONDEXEC_P is true if
+   the operand is used by a predicated instruction.  */
+
+int
+frv_legitimate_memory_operand (rtx op, enum machine_mode mode, int condexec_p)
+{
+  return ((GET_MODE (op) == mode || mode == VOIDmode)
+	  && GET_CODE (op) == MEM
+	  && frv_legitimate_address_p_1 (mode, XEXP (op, 0),
+				         reload_completed, condexec_p, FALSE));
+}
+
+void
+frv_expand_fdpic_call (rtx *operands, bool ret_value, bool sibcall)
+{
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REG);
+  rtx c, rvrtx=0;
+  rtx addr;
+
+  if (ret_value)
+    {
+      rvrtx = operands[0];
+      operands ++;
+    }
+
+  addr = XEXP (operands[0], 0);
+
+  /* Inline PLTs if we're optimizing for speed.  We'd like to inline
+     any calls that would involve a PLT, but can't tell, since we
+     don't know whether an extern function is going to be provided by
+     a separate translation unit or imported from a separate module.
+     When compiling for shared libraries, if the function has default
+     visibility, we assume it's overridable, so we inline the PLT, but
+     for executables, we don't really have a way to make a good
+     decision: a function is as likely to be imported from a shared
+     library as it is to be defined in the executable itself.  We
+     assume executables will get global functions defined locally,
+     whereas shared libraries will have them potentially overridden,
+     so we only inline PLTs when compiling for shared libraries.
+
+     In order to mark a function as local to a shared library, any
+     non-default visibility attribute suffices.  Unfortunately,
+     there's no simple way to tag a function declaration as ``in a
+     different module'', which we could then use to trigger PLT
+     inlining on executables.  There's -minline-plt, but it affects
+     all external functions, so one would have to also mark function
+     declarations available in the same module with non-default
+     visibility, which is advantageous in itself.  */
+  if (GET_CODE (addr) == SYMBOL_REF
+      && ((!SYMBOL_REF_LOCAL_P (addr) && TARGET_INLINE_PLT)
+	  || sibcall))
+    {
+      rtx x, dest;
+      dest = gen_reg_rtx (SImode);
+      if (flag_pic != 1)
+	x = gen_symGOTOFF2reg_hilo (dest, addr, OUR_FDPIC_REG,
+				    GEN_INT (R_FRV_FUNCDESC_GOTOFF12));
+      else
+	x = gen_symGOTOFF2reg (dest, addr, OUR_FDPIC_REG,
+			       GEN_INT (R_FRV_FUNCDESC_GOTOFF12));
+      emit_insn (x);
+      crtl->uses_pic_offset_table = TRUE;
+      addr = dest;
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      /* These are always either local, or handled through a local
+	 PLT.  */
+      if (ret_value)
+	c = gen_call_value_fdpicsi (rvrtx, addr, operands[1],
+				    operands[2], picreg, lr);
+      else
+	c = gen_call_fdpicsi (addr, operands[1], operands[2], picreg, lr);
+      emit_call_insn (c);
+      return;
+    }
+  else if (! ldd_address_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  picreg = gen_reg_rtx (DImode);
+  emit_insn (gen_movdi_ldd (picreg, addr));
+
+  if (sibcall && ret_value)
+    c = gen_sibcall_value_fdpicdi (rvrtx, picreg, const0_rtx);
+  else if (sibcall)
+    c = gen_sibcall_fdpicdi (picreg, const0_rtx);
+  else if (ret_value)
+    c = gen_call_value_fdpicdi (rvrtx, picreg, const0_rtx, lr);
+  else
+    c = gen_call_fdpicdi (picreg, const0_rtx, lr);
+  emit_call_insn (c);
+}
+
+/* Look for a SYMBOL_REF of a function in an rtx.  We always want to
+   process these separately from any offsets, such that we add any
+   offsets to the function descriptor (the actual pointer), not to the
+   function address.  */
+
+static bool
+frv_function_symbol_referenced_p (rtx x)
+{
+  const char *format;
+  int length;
+  int j;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return SYMBOL_REF_FUNCTION_P (x);
+
+  length = GET_RTX_LENGTH (GET_CODE (x));
+  format = GET_RTX_FORMAT (GET_CODE (x));
+
+  for (j = 0; j < length; ++j)
+    {
+      switch (format[j])
+	{
+	case 'e':
+	  if (frv_function_symbol_referenced_p (XEXP (x, j)))
+	    return TRUE;
+	  break;
+
+	case 'V':
+	case 'E':
+	  if (XVEC (x, j) != 0)
+	    {
+	      int k;
+	      for (k = 0; k < XVECLEN (x, j); ++k)
+		if (frv_function_symbol_referenced_p (XVECEXP (x, j, k)))
+		  return TRUE;
+	    }
+	  break;
+
+	default:
+	  /* Nothing to do.  */
+	  break;
+	}
+    }
+
+  return FALSE;
+}
+
+/* Return true if the memory operand is one that can be conditionally
+   executed.  */
+
+int
+condexec_memory_operand (rtx op, enum machine_mode mode)
+{
+  enum machine_mode op_mode = GET_MODE (op);
+  rtx addr;
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (op_mode)
+    {
+    default:
+      return FALSE;
+
+    case QImode:
+    case HImode:
+    case SImode:
+    case SFmode:
+      break;
+    }
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  addr = XEXP (op, 0);
+  return frv_legitimate_address_p_1 (mode, addr, reload_completed, TRUE, FALSE);
+}
+
+/* Return true if the bare return instruction can be used outside of the
+   epilog code.  For frv, we only do it if there was no stack allocation.  */
+
+int
+direct_return_p (void)
+{
+  frv_stack_t *info;
+
+  if (!reload_completed)
+    return FALSE;
+
+  info = frv_stack_info ();
+  return (info->total_size == 0);
+}
+
+
+void
+frv_emit_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  if (GET_CODE (src) == SYMBOL_REF)
+    {
+      enum tls_model model = SYMBOL_REF_TLS_MODEL (src);
+      if (model != 0)
+	src = frv_legitimize_tls_address (src, model);
+    }
+
+  switch (mode)
+    {
+    case SImode:
+      if (frv_emit_movsi (dest, src))
+	return;
+      break;
+
+    case QImode:
+    case HImode:
+    case DImode:
+    case SFmode:
+    case DFmode:
+      if (!reload_in_progress
+	  && !reload_completed
+	  && !register_operand (dest, mode)
+	  && !reg_or_0_operand (src, mode))
+	src = copy_to_mode_reg (mode, src);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, src));
+}
+
+/* Emit code to handle a MOVSI, adding in the small data register or pic
+   register if needed to load up addresses.  Return TRUE if the appropriate
+   instructions are emitted.  */
+
+int
+frv_emit_movsi (rtx dest, rtx src)
+{
+  int base_regno = -1;
+  int unspec = 0;
+  rtx sym = src;
+  struct frv_unspec old_unspec;
+
+  if (!reload_in_progress
+      && !reload_completed
+      && !register_operand (dest, SImode)
+      && (!reg_or_0_operand (src, SImode)
+	     /* Virtual registers will almost always be replaced by an
+		add instruction, so expose this to CSE by copying to
+		an intermediate register.  */
+	  || (GET_CODE (src) == REG
+	      && IN_RANGE (REGNO (src),
+			   FIRST_VIRTUAL_REGISTER,
+			   LAST_VIRTUAL_POINTER_REGISTER))))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, copy_to_mode_reg (SImode, src)));
+      return TRUE;
+    }
+
+  /* Explicitly add in the PIC or small data register if needed.  */
+  switch (GET_CODE (src))
+    {
+    default:
+      break;
+
+    case LABEL_REF:
+    handle_label:
+      if (TARGET_FDPIC)
+	{
+	  /* Using GPREL12, we use a single GOT entry for all symbols
+	     in read-only sections, but trade sequences such as:
+
+	     sethi #gothi(label), gr#
+	     setlo #gotlo(label), gr#
+	     ld    @(gr15,gr#), gr#
+
+	     for
+
+	     ld    @(gr15,#got12(_gp)), gr#
+	     sethi #gprelhi(label), gr##
+	     setlo #gprello(label), gr##
+	     add   gr#, gr##, gr##
+
+	     We may often be able to share gr# for multiple
+	     computations of GPREL addresses, and we may often fold
+	     the final add into the pair of registers of a load or
+	     store instruction, so it's often profitable.  Even when
+	     optimizing for size, we're trading a GOT entry for an
+	     additional instruction, which trades GOT space
+	     (read-write) for code size (read-only, shareable), as
+	     long as the symbol is not used in more than two different
+	     locations.
+
+	     With -fpie/-fpic, we'd be trading a single load for a
+	     sequence of 4 instructions, because the offset of the
+	     label can't be assumed to be addressable with 12 bits, so
+	     we don't do this.  */
+	  if (TARGET_GPREL_RO)
+	    unspec = R_FRV_GPREL12;
+	  else
+	    unspec = R_FRV_GOT12;
+	}
+      else if (flag_pic)
+	base_regno = PIC_REGNO;
+
+      break;
+
+    case CONST:
+      if (frv_const_unspec_p (src, &old_unspec))
+	break;
+
+      if (TARGET_FDPIC && frv_function_symbol_referenced_p (XEXP (src, 0)))
+	{
+	handle_whatever:
+	  src = force_reg (GET_MODE (XEXP (src, 0)), XEXP (src, 0));
+	  emit_move_insn (dest, src);
+	  return TRUE;
+	}
+      else
+	{
+	  sym = XEXP (sym, 0);
+	  if (GET_CODE (sym) == PLUS
+	      && GET_CODE (XEXP (sym, 0)) == SYMBOL_REF
+	      && GET_CODE (XEXP (sym, 1)) == CONST_INT)
+	    sym = XEXP (sym, 0);
+	  if (GET_CODE (sym) == SYMBOL_REF)
+	    goto handle_sym;
+	  else if (GET_CODE (sym) == LABEL_REF)
+	    goto handle_label;
+	  else
+	    goto handle_whatever;
+	}
+      break;
+
+    case SYMBOL_REF:
+    handle_sym:
+      if (TARGET_FDPIC)
+	{
+	  enum tls_model model = SYMBOL_REF_TLS_MODEL (sym);
+
+	  if (model != 0)
+	    {
+	      src = frv_legitimize_tls_address (src, model);
+	      emit_move_insn (dest, src);
+	      return TRUE;
+	    }
+
+	  if (SYMBOL_REF_FUNCTION_P (sym))
+	    {
+	      if (frv_local_funcdesc_p (sym))
+		unspec = R_FRV_FUNCDESC_GOTOFF12;
+	      else
+		unspec = R_FRV_FUNCDESC_GOT12;
+	    }
+	  else
+	    {
+	      if (CONSTANT_POOL_ADDRESS_P (sym))
+		switch (GET_CODE (get_pool_constant (sym)))
+		  {
+		  case CONST:
+		  case SYMBOL_REF:
+		  case LABEL_REF:
+		    if (flag_pic)
+		      {
+			unspec = R_FRV_GOTOFF12;
+			break;
+		      }
+		    /* Fall through.  */
+		  default:
+		    if (TARGET_GPREL_RO)
+		      unspec = R_FRV_GPREL12;
+		    else
+		      unspec = R_FRV_GOT12;
+		    break;
+		  }
+	      else if (SYMBOL_REF_LOCAL_P (sym)
+		       && !SYMBOL_REF_EXTERNAL_P (sym)
+		       && SYMBOL_REF_DECL (sym)
+		       && (!DECL_P (SYMBOL_REF_DECL (sym))
+			   || !DECL_COMMON (SYMBOL_REF_DECL (sym))))
+		{
+		  tree decl = SYMBOL_REF_DECL (sym);
+		  tree init = TREE_CODE (decl) == VAR_DECL
+		    ? DECL_INITIAL (decl)
+		    : TREE_CODE (decl) == CONSTRUCTOR
+		    ? decl : 0;
+		  int reloc = 0;
+		  bool named_section, readonly;
+
+		  if (init && init != error_mark_node)
+		    reloc = compute_reloc_for_constant (init);
+
+		  named_section = TREE_CODE (decl) == VAR_DECL
+		    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
+		  readonly = decl_readonly_section (decl, reloc);
+
+		  if (named_section)
+		    unspec = R_FRV_GOT12;
+		  else if (!readonly)
+		    unspec = R_FRV_GOTOFF12;
+		  else if (readonly && TARGET_GPREL_RO)
+		    unspec = R_FRV_GPREL12;
+		  else
+		    unspec = R_FRV_GOT12;
+		}
+	      else
+		unspec = R_FRV_GOT12;
+	    }
+	}
+
+      else if (SYMBOL_REF_SMALL_P (sym))
+	base_regno = SDA_BASE_REG;
+
+      else if (flag_pic)
+	base_regno = PIC_REGNO;
+
+      break;
+    }
+
+  if (base_regno >= 0)
+    {
+      if (GET_CODE (sym) == SYMBOL_REF && SYMBOL_REF_SMALL_P (sym))
+	emit_insn (gen_symGOTOFF2reg (dest, src,
+				      gen_rtx_REG (Pmode, base_regno),
+				      GEN_INT (R_FRV_GPREL12)));
+      else
+	emit_insn (gen_symGOTOFF2reg_hilo (dest, src,
+					   gen_rtx_REG (Pmode, base_regno),
+					   GEN_INT (R_FRV_GPREL12)));
+      if (base_regno == PIC_REGNO)
+	crtl->uses_pic_offset_table = TRUE;
+      return TRUE;
+    }
+
+  if (unspec)
+    {
+      rtx x;
+
+      /* Since OUR_FDPIC_REG is a pseudo register, we can't safely introduce
+	 new uses of it once reload has begun.  */
+      gcc_assert (!reload_in_progress && !reload_completed);
+
+      switch (unspec)
+	{
+	case R_FRV_GOTOFF12:
+	  if (!frv_small_data_reloc_p (sym, unspec))
+	    x = gen_symGOTOFF2reg_hilo (dest, src, OUR_FDPIC_REG,
+					GEN_INT (unspec));
+	  else
+	    x = gen_symGOTOFF2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	case R_FRV_GPREL12:
+	  if (!frv_small_data_reloc_p (sym, unspec))
+	    x = gen_symGPREL2reg_hilo (dest, src, OUR_FDPIC_REG,
+				       GEN_INT (unspec));
+	  else
+	    x = gen_symGPREL2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	case R_FRV_FUNCDESC_GOTOFF12:
+	  if (flag_pic != 1)
+	    x = gen_symGOTOFF2reg_hilo (dest, src, OUR_FDPIC_REG,
+					GEN_INT (unspec));
+	  else
+	    x = gen_symGOTOFF2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	default:
+	  if (flag_pic != 1)
+	    x = gen_symGOT2reg_hilo (dest, src, OUR_FDPIC_REG,
+				     GEN_INT (unspec));
+	  else
+	    x = gen_symGOT2reg (dest, src, OUR_FDPIC_REG, GEN_INT (unspec));
+	  break;
+	}
+      emit_insn (x);
+      crtl->uses_pic_offset_table = TRUE;
+      return TRUE;
+    }
+
+
+  return FALSE;
+}
+
+
+/* Return a string to output a single word move.  */
+
+const char *
+output_move_single (rtx operands[], rtx insn)
+{
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+
+  if (GET_CODE (dest) == REG)
+    {
+      int dest_regno = REGNO (dest);
+      enum machine_mode mode = GET_MODE (dest);
+
+      if (GPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* gpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "mov %1, %0";
+
+	      else if (FPR_P (src_regno))
+		return "movfg %1, %0";
+
+	      else if (SPR_P (src_regno))
+		return "movsg %1, %0";
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* gpr <- memory */
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "ldsb%I1%U1 %M1,%0";
+
+		case HImode:
+		  return "ldsh%I1%U1 %M1,%0";
+
+		case SImode:
+		case SFmode:
+		  return "ld%I1%U1 %M1, %0";
+		}
+	    }
+
+	  else if (GET_CODE (src) == CONST_INT
+		   || GET_CODE (src) == CONST_DOUBLE)
+	    {
+	      /* gpr <- integer/floating constant */
+	      HOST_WIDE_INT value;
+
+	      if (GET_CODE (src) == CONST_INT)
+		value = INTVAL (src);
+
+	      else if (mode == SFmode)
+		{
+		  REAL_VALUE_TYPE rv;
+		  long l;
+
+		  REAL_VALUE_FROM_CONST_DOUBLE (rv, src);
+		  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+		  value = l;
+		}
+
+	      else
+		value = CONST_DOUBLE_LOW (src);
+
+	      if (IN_RANGE (value, -32768, 32767))
+		return "setlos %1, %0";
+
+	      return "#";
+	    }
+
+          else if (GET_CODE (src) == SYMBOL_REF
+		   || GET_CODE (src) == LABEL_REF
+		   || GET_CODE (src) == CONST)
+	    {
+	      return "#";
+	    }
+	}
+
+      else if (FPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* fpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "movgf %1, %0";
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (TARGET_HARD_FLOAT)
+		    return "fmovs %1, %0";
+		  else
+		    return "mor %1, %1, %0";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* fpr <- memory */
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "ldbf%I1%U1 %M1,%0";
+
+		case HImode:
+		  return "ldhf%I1%U1 %M1,%0";
+
+		case SImode:
+		case SFmode:
+		  return "ldf%I1%U1 %M1, %0";
+		}
+	    }
+
+	  else if (ZERO_P (src))
+	    return "movgf %., %0";
+	}
+
+      else if (SPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* spr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "movgs %1, %0";
+	    }
+	  else if (ZERO_P (src))
+	    return "movgs %., %0";
+	}
+    }
+
+  else if (GET_CODE (dest) == MEM)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int src_regno = REGNO (src);
+	  enum machine_mode mode = GET_MODE (dest);
+
+	  if (GPR_P (src_regno))
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "stb%I0%U0 %1, %M0";
+
+		case HImode:
+		  return "sth%I0%U0 %1, %M0";
+
+		case SImode:
+		case SFmode:
+		  return "st%I0%U0 %1, %M0";
+		}
+	    }
+
+	  else if (FPR_P (src_regno))
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "stbf%I0%U0 %1, %M0";
+
+		case HImode:
+		  return "sthf%I0%U0 %1, %M0";
+
+		case SImode:
+		case SFmode:
+		  return "stf%I0%U0 %1, %M0";
+		}
+	    }
+	}
+
+      else if (ZERO_P (src))
+	{
+	  switch (GET_MODE (dest))
+	    {
+	    default:
+	      break;
+
+	    case QImode:
+	      return "stb%I0%U0 %., %M0";
+
+	    case HImode:
+	      return "sth%I0%U0 %., %M0";
+
+	    case SImode:
+	    case SFmode:
+	      return "st%I0%U0 %., %M0";
+	    }
+	}
+    }
+
+  fatal_insn ("bad output_move_single operand", insn);
+  return "";
+}
+
+
+/* Return a string to output a double word move.  */
+
+const char *
+output_move_double (rtx operands[], rtx insn)
+{
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+
+  if (GET_CODE (dest) == REG)
+    {
+      int dest_regno = REGNO (dest);
+
+      if (GPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* gpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "#";
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (((dest_regno - GPR_FIRST) & 1) == 0
+		      && ((src_regno - FPR_FIRST) & 1) == 0)
+		    return "movfgd %1, %0";
+
+		  return "#";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* gpr <- memory */
+	      if (dbl_memory_one_insn_operand (src, mode))
+		return "ldd%I1%U1 %M1, %0";
+
+	      return "#";
+	    }
+
+	  else if (GET_CODE (src) == CONST_INT
+		   || GET_CODE (src) == CONST_DOUBLE)
+	    return "#";
+	}
+
+      else if (FPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* fpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		{
+		  if (((dest_regno - FPR_FIRST) & 1) == 0
+		      && ((src_regno - GPR_FIRST) & 1) == 0)
+		    return "movgfd %1, %0";
+
+		  return "#";
+		}
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (TARGET_DOUBLE
+		      && ((dest_regno - FPR_FIRST) & 1) == 0
+		      && ((src_regno - FPR_FIRST) & 1) == 0)
+		    return "fmovd %1, %0";
+
+		  return "#";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* fpr <- memory */
+	      if (dbl_memory_one_insn_operand (src, mode))
+		return "lddf%I1%U1 %M1, %0";
+
+	      return "#";
+	    }
+
+	  else if (ZERO_P (src))
+	    return "#";
+	}
+    }
+
+  else if (GET_CODE (dest) == MEM)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int src_regno = REGNO (src);
+
+	  if (GPR_P (src_regno))
+	    {
+	      if (((src_regno - GPR_FIRST) & 1) == 0
+		  && dbl_memory_one_insn_operand (dest, mode))
+		return "std%I0%U0 %1, %M0";
+
+	      return "#";
+	    }
+
+	  if (FPR_P (src_regno))
+	    {
+	      if (((src_regno - FPR_FIRST) & 1) == 0
+		  && dbl_memory_one_insn_operand (dest, mode))
+		return "stdf%I0%U0 %1, %M0";
+
+	      return "#";
+	    }
+	}
+
+      else if (ZERO_P (src))
+	{
+	  if (dbl_memory_one_insn_operand (dest, mode))
+	    return "std%I0%U0 %., %M0";
+
+	  return "#";
+	}
+    }
+
+  fatal_insn ("bad output_move_double operand", insn);
+  return "";
+}
+
+
+/* Return a string to output a single word conditional move.
+   Operand0 -- EQ/NE of ccr register and 0
+   Operand1 -- CCR register
+   Operand2 -- destination
+   Operand3 -- source  */
+
+const char *
+output_condmove_single (rtx operands[], rtx insn)
+{
+  rtx dest = operands[2];
+  rtx src  = operands[3];
+
+  if (GET_CODE (dest) == REG)
+    {
+      int dest_regno = REGNO (dest);
+      enum machine_mode mode = GET_MODE (dest);
+
+      if (GPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* gpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "cmov %z3, %2, %1, %e0";
+
+	      else if (FPR_P (src_regno))
+		return "cmovfg %3, %2, %1, %e0";
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* gpr <- memory */
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "cldsb%I3%U3 %M3, %2, %1, %e0";
+
+		case HImode:
+		  return "cldsh%I3%U3 %M3, %2, %1, %e0";
+
+		case SImode:
+		case SFmode:
+		  return "cld%I3%U3 %M3, %2, %1, %e0";
+		}
+	    }
+
+	  else if (ZERO_P (src))
+	    return "cmov %., %2, %1, %e0";
+	}
+
+      else if (FPR_P (dest_regno))
+	{
+	  if (GET_CODE (src) == REG)
+	    {
+	      /* fpr <- some sort of register */
+	      int src_regno = REGNO (src);
+
+	      if (GPR_P (src_regno))
+		return "cmovgf %3, %2, %1, %e0";
+
+	      else if (FPR_P (src_regno))
+		{
+		  if (TARGET_HARD_FLOAT)
+		    return "cfmovs %3,%2,%1,%e0";
+		  else
+		    return "cmor %3, %3, %2, %1, %e0";
+		}
+	    }
+
+	  else if (GET_CODE (src) == MEM)
+	    {
+	      /* fpr <- memory */
+	      if (mode == SImode || mode == SFmode)
+		return "cldf%I3%U3 %M3, %2, %1, %e0";
+	    }
+
+	  else if (ZERO_P (src))
+	    return "cmovgf %., %2, %1, %e0";
+	}
+    }
+
+  else if (GET_CODE (dest) == MEM)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int src_regno = REGNO (src);
+	  enum machine_mode mode = GET_MODE (dest);
+
+	  if (GPR_P (src_regno))
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case QImode:
+		  return "cstb%I2%U2 %3, %M2, %1, %e0";
+
+		case HImode:
+		  return "csth%I2%U2 %3, %M2, %1, %e0";
+
+		case SImode:
+		case SFmode:
+		  return "cst%I2%U2 %3, %M2, %1, %e0";
+		}
+	    }
+
+	  else if (FPR_P (src_regno) && (mode == SImode || mode == SFmode))
+	    return "cstf%I2%U2 %3, %M2, %1, %e0";
+	}
+
+      else if (ZERO_P (src))
+	{
+	  enum machine_mode mode = GET_MODE (dest);
+	  switch (mode)
+	    {
+	    default:
+	      break;
+
+	    case QImode:
+	      return "cstb%I2%U2 %., %M2, %1, %e0";
+
+	    case HImode:
+	      return "csth%I2%U2 %., %M2, %1, %e0";
+
+	    case SImode:
+	    case SFmode:
+	      return "cst%I2%U2 %., %M2, %1, %e0";
+	    }
+	}
+    }
+
+  fatal_insn ("bad output_condmove_single operand", insn);
+  return "";
+}
+
+
+/* Emit the appropriate code to do a comparison, returning the register the
+   comparison was done it.  */
+
+static rtx
+frv_emit_comparison (enum rtx_code test, rtx op0, rtx op1)
+{
+  enum machine_mode cc_mode;
+  rtx cc_reg;
+
+  /* Floating point doesn't have comparison against a constant.  */
+  if (GET_MODE (op0) == CC_FPmode && GET_CODE (op1) != REG)
+    op1 = force_reg (GET_MODE (op0), op1);
+
+  /* Possibly disable using anything but a fixed register in order to work
+     around cse moving comparisons past function calls.  */
+  cc_mode = SELECT_CC_MODE (test, op0, op1);
+  cc_reg = ((TARGET_ALLOC_CC)
+	    ? gen_reg_rtx (cc_mode)
+	    : gen_rtx_REG (cc_mode,
+			   (cc_mode == CC_FPmode) ? FCC_FIRST : ICC_FIRST));
+
+  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
+			  gen_rtx_COMPARE (cc_mode, op0, op1)));
+
+  return cc_reg;
+}
+
+
+/* Emit code for a conditional branch.
+   XXX: I originally wanted to add a clobber of a CCR register to use in
+   conditional execution, but that confuses the rest of the compiler.  */
+
+int
+frv_emit_cond_branch (rtx operands[])
+{
+  rtx test_rtx;
+  rtx label_ref;
+  rtx if_else;
+  enum rtx_code test = GET_CODE (operands[0]);
+  rtx cc_reg = frv_emit_comparison (test, operands[1], operands[2]);
+  enum machine_mode cc_mode = GET_MODE (cc_reg);
+
+  /* Branches generate:
+	(set (pc)
+	     (if_then_else (<test>, <cc_reg>, (const_int 0))
+			    (label_ref <branch_label>)
+			    (pc))) */
+  label_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  test_rtx = gen_rtx_fmt_ee (test, cc_mode, cc_reg, const0_rtx);
+  if_else = gen_rtx_IF_THEN_ELSE (cc_mode, test_rtx, label_ref, pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_else));
+  return TRUE;
+}
+
+
+/* Emit code to set a gpr to 1/0 based on a comparison.  */
+
+int
+frv_emit_scc (rtx operands[])
+{
+  rtx set;
+  rtx test_rtx;
+  rtx clobber;
+  rtx cr_reg;
+  enum rtx_code test = GET_CODE (operands[1]);
+  rtx cc_reg = frv_emit_comparison (test, operands[2], operands[3]);
+
+  /* SCC instructions generate:
+	(parallel [(set <target> (<test>, <cc_reg>, (const_int 0))
+		   (clobber (<ccr_reg>))])  */
+  test_rtx = gen_rtx_fmt_ee (test, SImode, cc_reg, const0_rtx);
+  set = gen_rtx_SET (VOIDmode, operands[0], test_rtx);
+
+  cr_reg = ((TARGET_ALLOC_CC)
+	    ? gen_reg_rtx (CC_CCRmode)
+	    : gen_rtx_REG (CC_CCRmode,
+			   ((GET_MODE (cc_reg) == CC_FPmode)
+			    ? FCR_FIRST
+			    : ICR_FIRST)));
+
+  clobber = gen_rtx_CLOBBER (VOIDmode, cr_reg);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+  return TRUE;
+}
+
+
+/* Split a SCC instruction into component parts, returning a SEQUENCE to hold
+   the separate insns.  */
+
+rtx
+frv_split_scc (rtx dest, rtx test, rtx cc_reg, rtx cr_reg, HOST_WIDE_INT value)
+{
+  rtx ret;
+
+  start_sequence ();
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (GET_CODE (test),
+					  GET_MODE (cr_reg),
+					  cc_reg,
+					  const0_rtx)));
+
+  /* Move the value into the destination.  */
+  emit_move_insn (dest, GEN_INT (value));
+
+  /* Move 0 into the destination if the test failed */
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_EQ (GET_MODE (cr_reg),
+					    cr_reg,
+					    const0_rtx),
+				gen_rtx_SET (VOIDmode, dest, const0_rtx)));
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* Emit the code for a conditional move, return TRUE if we could do the
+   move.  */
+
+int
+frv_emit_cond_move (rtx dest, rtx test_rtx, rtx src1, rtx src2)
+{
+  rtx set;
+  rtx clobber_cc;
+  rtx test2;
+  rtx cr_reg;
+  rtx if_rtx;
+  enum rtx_code test = GET_CODE (test_rtx);
+  rtx cc_reg = frv_emit_comparison (test,
+				    XEXP (test_rtx, 0), XEXP (test_rtx, 1));
+  enum machine_mode cc_mode = GET_MODE (cc_reg);
+
+  /* Conditional move instructions generate:
+	(parallel [(set <target>
+			(if_then_else (<test> <cc_reg> (const_int 0))
+				      <src1>
+				      <src2>))
+		   (clobber (<ccr_reg>))])  */
+
+  /* Handle various cases of conditional move involving two constants.  */
+  if (GET_CODE (src1) == CONST_INT && GET_CODE (src2) == CONST_INT)
+    {
+      HOST_WIDE_INT value1 = INTVAL (src1);
+      HOST_WIDE_INT value2 = INTVAL (src2);
+
+      /* Having 0 as one of the constants can be done by loading the other
+         constant, and optionally moving in gr0.  */
+      if (value1 == 0 || value2 == 0)
+	;
+
+      /* If the first value is within an addi range and also the difference
+         between the two fits in an addi's range, load up the difference, then
+         conditionally move in 0, and then unconditionally add the first
+	 value.  */
+      else if (IN_RANGE (value1, -2048, 2047)
+	       && IN_RANGE (value2 - value1, -2048, 2047))
+	;
+
+      /* If neither condition holds, just force the constant into a
+	 register.  */
+      else
+	{
+	  src1 = force_reg (GET_MODE (dest), src1);
+	  src2 = force_reg (GET_MODE (dest), src2);
+	}
+    }
+
+  /* If one value is a register, insure the other value is either 0 or a
+     register.  */
+  else
+    {
+      if (GET_CODE (src1) == CONST_INT && INTVAL (src1) != 0)
+	src1 = force_reg (GET_MODE (dest), src1);
+
+      if (GET_CODE (src2) == CONST_INT && INTVAL (src2) != 0)
+	src2 = force_reg (GET_MODE (dest), src2);
+    }
+
+  test2 = gen_rtx_fmt_ee (test, cc_mode, cc_reg, const0_rtx);
+  if_rtx = gen_rtx_IF_THEN_ELSE (GET_MODE (dest), test2, src1, src2);
+
+  set = gen_rtx_SET (VOIDmode, dest, if_rtx);
+
+  cr_reg = ((TARGET_ALLOC_CC)
+	    ? gen_reg_rtx (CC_CCRmode)
+	    : gen_rtx_REG (CC_CCRmode,
+			   (cc_mode == CC_FPmode) ? FCR_FIRST : ICR_FIRST));
+
+  clobber_cc = gen_rtx_CLOBBER (VOIDmode, cr_reg);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber_cc)));
+  return TRUE;
+}
+
+
+/* Split a conditional move into constituent parts, returning a SEQUENCE
+   containing all of the insns.  */
+
+rtx
+frv_split_cond_move (rtx operands[])
+{
+  rtx dest	= operands[0];
+  rtx test	= operands[1];
+  rtx cc_reg	= operands[2];
+  rtx src1	= operands[3];
+  rtx src2	= operands[4];
+  rtx cr_reg	= operands[5];
+  rtx ret;
+  enum machine_mode cr_mode = GET_MODE (cr_reg);
+
+  start_sequence ();
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (GET_CODE (test),
+					  GET_MODE (cr_reg),
+					  cc_reg,
+					  const0_rtx)));
+
+  /* Handle various cases of conditional move involving two constants.  */
+  if (GET_CODE (src1) == CONST_INT && GET_CODE (src2) == CONST_INT)
+    {
+      HOST_WIDE_INT value1 = INTVAL (src1);
+      HOST_WIDE_INT value2 = INTVAL (src2);
+
+      /* Having 0 as one of the constants can be done by loading the other
+         constant, and optionally moving in gr0.  */
+      if (value1 == 0)
+	{
+	  emit_move_insn (dest, src2);
+	  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+					gen_rtx_NE (cr_mode, cr_reg,
+						    const0_rtx),
+					gen_rtx_SET (VOIDmode, dest, src1)));
+	}
+
+      else if (value2 == 0)
+	{
+	  emit_move_insn (dest, src1);
+	  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+					gen_rtx_EQ (cr_mode, cr_reg,
+						    const0_rtx),
+					gen_rtx_SET (VOIDmode, dest, src2)));
+	}
+
+      /* If the first value is within an addi range and also the difference
+         between the two fits in an addi's range, load up the difference, then
+         conditionally move in 0, and then unconditionally add the first
+	 value.  */
+      else if (IN_RANGE (value1, -2048, 2047)
+	       && IN_RANGE (value2 - value1, -2048, 2047))
+	{
+	  rtx dest_si = ((GET_MODE (dest) == SImode)
+			 ? dest
+			 : gen_rtx_SUBREG (SImode, dest, 0));
+
+	  emit_move_insn (dest_si, GEN_INT (value2 - value1));
+	  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+					gen_rtx_NE (cr_mode, cr_reg,
+						    const0_rtx),
+					gen_rtx_SET (VOIDmode, dest_si,
+						     const0_rtx)));
+	  emit_insn (gen_addsi3 (dest_si, dest_si, src1));
+	}
+
+      else
+	gcc_unreachable ();
+    }
+  else
+    {
+      /* Emit the conditional move for the test being true if needed.  */
+      if (! rtx_equal_p (dest, src1))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_NE (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src1)));
+
+      /* Emit the conditional move for the test being false if needed.  */
+      if (! rtx_equal_p (dest, src2))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_EQ (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src2)));
+    }
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* Split (set DEST SOURCE), where DEST is a double register and SOURCE is a
+   memory location that is not known to be dword-aligned.  */
+void
+frv_split_double_load (rtx dest, rtx source)
+{
+  int regno = REGNO (dest);
+  rtx dest1 = gen_highpart (SImode, dest);
+  rtx dest2 = gen_lowpart (SImode, dest);
+  rtx address = XEXP (source, 0);
+
+  /* If the address is pre-modified, load the lower-numbered register
+     first, then load the other register using an integer offset from
+     the modified base register.  This order should always be safe,
+     since the pre-modification cannot affect the same registers as the
+     load does.
+
+     The situation for other loads is more complicated.  Loading one
+     of the registers could affect the value of ADDRESS, so we must
+     be careful which order we do them in.  */
+  if (GET_CODE (address) == PRE_MODIFY
+      || ! refers_to_regno_p (regno, regno + 1, address, NULL))
+    {
+      /* It is safe to load the lower-numbered register first.  */
+      emit_move_insn (dest1, change_address (source, SImode, NULL));
+      emit_move_insn (dest2, frv_index_memory (source, SImode, 1));
+    }
+  else
+    {
+      /* ADDRESS is not pre-modified and the address depends on the
+         lower-numbered register.  Load the higher-numbered register
+         first.  */
+      emit_move_insn (dest2, frv_index_memory (source, SImode, 1));
+      emit_move_insn (dest1, change_address (source, SImode, NULL));
+    }
+}
+
+/* Split (set DEST SOURCE), where DEST refers to a dword memory location
+   and SOURCE is either a double register or the constant zero.  */
+void
+frv_split_double_store (rtx dest, rtx source)
+{
+  rtx dest1 = change_address (dest, SImode, NULL);
+  rtx dest2 = frv_index_memory (dest, SImode, 1);
+  if (ZERO_P (source))
+    {
+      emit_move_insn (dest1, CONST0_RTX (SImode));
+      emit_move_insn (dest2, CONST0_RTX (SImode));
+    }
+  else
+    {
+      emit_move_insn (dest1, gen_highpart (SImode, source));
+      emit_move_insn (dest2, gen_lowpart (SImode, source));
+    }
+}
+
+
+/* Split a min/max operation returning a SEQUENCE containing all of the
+   insns.  */
+
+rtx
+frv_split_minmax (rtx operands[])
+{
+  rtx dest	= operands[0];
+  rtx minmax	= operands[1];
+  rtx src1	= operands[2];
+  rtx src2	= operands[3];
+  rtx cc_reg	= operands[4];
+  rtx cr_reg	= operands[5];
+  rtx ret;
+  enum rtx_code test_code;
+  enum machine_mode cr_mode = GET_MODE (cr_reg);
+
+  start_sequence ();
+
+  /* Figure out which test to use.  */
+  switch (GET_CODE (minmax))
+    {
+    default:
+      gcc_unreachable ();
+
+    case SMIN: test_code = LT;  break;
+    case SMAX: test_code = GT;  break;
+    case UMIN: test_code = LTU; break;
+    case UMAX: test_code = GTU; break;
+    }
+
+  /* Issue the compare instruction.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cc_reg,
+			  gen_rtx_COMPARE (GET_MODE (cc_reg),
+					   src1, src2)));
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (test_code,
+					  GET_MODE (cr_reg),
+					  cc_reg,
+					  const0_rtx)));
+
+  /* If are taking the min/max of a nonzero constant, load that first, and
+     then do a conditional move of the other value.  */
+  if (GET_CODE (src2) == CONST_INT && INTVAL (src2) != 0)
+    {
+      gcc_assert (!rtx_equal_p (dest, src1));
+
+      emit_move_insn (dest, src2);
+      emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				    gen_rtx_NE (cr_mode, cr_reg, const0_rtx),
+				    gen_rtx_SET (VOIDmode, dest, src1)));
+    }
+
+  /* Otherwise, do each half of the move.  */
+  else
+    {
+      /* Emit the conditional move for the test being true if needed.  */
+      if (! rtx_equal_p (dest, src1))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_NE (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src1)));
+
+      /* Emit the conditional move for the test being false if needed.  */
+      if (! rtx_equal_p (dest, src2))
+	emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				      gen_rtx_EQ (cr_mode, cr_reg, const0_rtx),
+				      gen_rtx_SET (VOIDmode, dest, src2)));
+    }
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* Split an integer abs operation returning a SEQUENCE containing all of the
+   insns.  */
+
+rtx
+frv_split_abs (rtx operands[])
+{
+  rtx dest	= operands[0];
+  rtx src	= operands[1];
+  rtx cc_reg	= operands[2];
+  rtx cr_reg	= operands[3];
+  rtx ret;
+
+  start_sequence ();
+
+  /* Issue the compare < 0 instruction.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cc_reg,
+			  gen_rtx_COMPARE (CCmode, src, const0_rtx)));
+
+  /* Set the appropriate CCR bit.  */
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cr_reg,
+			  gen_rtx_fmt_ee (LT, CC_CCRmode, cc_reg, const0_rtx)));
+
+  /* Emit the conditional negate if the value is negative.  */
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, cr_reg, const0_rtx),
+				gen_negsi2 (dest, src)));
+
+  /* Emit the conditional move for the test being false if needed.  */
+  if (! rtx_equal_p (dest, src))
+    emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				  gen_rtx_EQ (CC_CCRmode, cr_reg, const0_rtx),
+				  gen_rtx_SET (VOIDmode, dest, src)));
+
+  /* Finish up, return sequence.  */
+  ret = get_insns ();
+  end_sequence ();
+  return ret;
+}
+
+
+/* An internal function called by for_each_rtx to clear in a hard_reg set each
+   register used in an insn.  */
+
+static int
+frv_clear_registers_used (rtx *ptr, void *data)
+{
+  if (GET_CODE (*ptr) == REG)
+    {
+      int regno = REGNO (*ptr);
+      HARD_REG_SET *p_regs = (HARD_REG_SET *)data;
+
+      if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  int reg_max = regno + HARD_REGNO_NREGS (regno, GET_MODE (*ptr));
+
+	  while (regno < reg_max)
+	    {
+	      CLEAR_HARD_REG_BIT (*p_regs, regno);
+	      regno++;
+	    }
+	}
+    }
+
+  return 0;
+}
+
+
+/* Initialize the extra fields provided by IFCVT_EXTRA_FIELDS.  */
+
+/* On the FR-V, we don't have any extra fields per se, but it is useful hook to
+   initialize the static storage.  */
+void
+frv_ifcvt_init_extra_fields (ce_if_block_t *ce_info ATTRIBUTE_UNUSED)
+{
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+  frv_ifcvt.num_nested_cond_exec = 0;
+  frv_ifcvt.cr_reg = NULL_RTX;
+  frv_ifcvt.nested_cc_reg = NULL_RTX;
+  frv_ifcvt.extra_int_cr = NULL_RTX;
+  frv_ifcvt.extra_fp_cr = NULL_RTX;
+  frv_ifcvt.last_nested_if_cr = NULL_RTX;
+}
+
+
+/* Internal function to add a potential insn to the list of insns to be inserted
+   if the conditional execution conversion is successful.  */
+
+static void
+frv_ifcvt_add_insn (rtx pattern, rtx insn, int before_p)
+{
+  rtx link = alloc_EXPR_LIST (VOIDmode, pattern, insn);
+
+  link->jump = before_p;	/* Mark to add this before or after insn.  */
+  frv_ifcvt.added_insns_list = alloc_EXPR_LIST (VOIDmode, link,
+						frv_ifcvt.added_insns_list);
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_add_insn: add the following %s insn %d:\n",
+	       (before_p) ? "before" : "after",
+	       (int)INSN_UID (insn));
+
+      debug_rtx (pattern);
+    }
+}
+
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, possibly updating the tests in TRUE_EXPR, and
+   FALSE_EXPR for converting if-then and if-then-else code to conditional
+   instructions.  Set either TRUE_EXPR or FALSE_EXPR to a null pointer if the
+   tests cannot be converted.  */
+
+void
+frv_ifcvt_modify_tests (ce_if_block_t *ce_info, rtx *p_true, rtx *p_false)
+{
+  basic_block test_bb = ce_info->test_bb;	/* test basic block */
+  basic_block then_bb = ce_info->then_bb;	/* THEN */
+  basic_block else_bb = ce_info->else_bb;	/* ELSE or NULL */
+  basic_block join_bb = ce_info->join_bb;	/* join block or NULL */
+  rtx true_expr = *p_true;
+  rtx cr;
+  rtx cc;
+  rtx nested_cc;
+  enum machine_mode mode = GET_MODE (true_expr);
+  int j;
+  basic_block *bb;
+  int num_bb;
+  frv_tmp_reg_t *tmp_reg = &frv_ifcvt.tmp_reg;
+  rtx check_insn;
+  rtx sub_cond_exec_reg;
+  enum rtx_code code;
+  enum rtx_code code_true;
+  enum rtx_code code_false;
+  enum reg_class cc_class;
+  enum reg_class cr_class;
+  int cc_first;
+  int cc_last;
+  reg_set_iterator rsi;
+
+  /* Make sure we are only dealing with hard registers.  Also honor the
+     -mno-cond-exec switch, and -mno-nested-cond-exec switches if
+     applicable.  */
+  if (!reload_completed || !TARGET_COND_EXEC
+      || (!TARGET_NESTED_CE && ce_info->pass > 1))
+    goto fail;
+
+  /* Figure out which registers we can allocate for our own purposes.  Only
+     consider registers that are not preserved across function calls and are
+     not fixed.  However, allow the ICC/ICR temporary registers to be allocated
+     if we did not need to use them in reloading other registers.  */
+  memset (&tmp_reg->regs, 0, sizeof (tmp_reg->regs));
+  COPY_HARD_REG_SET (tmp_reg->regs, call_used_reg_set);
+  AND_COMPL_HARD_REG_SET (tmp_reg->regs, fixed_reg_set);
+  SET_HARD_REG_BIT (tmp_reg->regs, ICC_TEMP);
+  SET_HARD_REG_BIT (tmp_reg->regs, ICR_TEMP);
+
+  /* If this is a nested IF, we need to discover whether the CC registers that
+     are set/used inside of the block are used anywhere else.  If not, we can
+     change them to be the CC register that is paired with the CR register that
+     controls the outermost IF block.  */
+  if (ce_info->pass > 1)
+    {
+      CLEAR_HARD_REG_SET (frv_ifcvt.nested_cc_ok_rewrite);
+      for (j = CC_FIRST; j <= CC_LAST; j++)
+	if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	  {
+	    if (REGNO_REG_SET_P (df_get_live_in (then_bb), j))
+	      continue;
+
+	    if (else_bb
+		&& REGNO_REG_SET_P (df_get_live_in (else_bb), j))
+	      continue;
+
+	    if (join_bb
+		&& REGNO_REG_SET_P (df_get_live_in (join_bb), j))
+	      continue;
+
+	    SET_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite, j);
+	  }
+    }
+
+  for (j = 0; j < frv_ifcvt.cur_scratch_regs; j++)
+    frv_ifcvt.scratch_regs[j] = NULL_RTX;
+
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+
+  bb = (basic_block *) alloca ((2 + ce_info->num_multiple_test_blocks)
+			       * sizeof (basic_block));
+
+  if (join_bb)
+    {
+      unsigned int regno;
+
+      /* Remove anything live at the beginning of the join block from being
+         available for allocation.  */
+      EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (join_bb), 0, regno, rsi)
+	{
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    CLEAR_HARD_REG_BIT (tmp_reg->regs, regno);
+	}
+    }
+
+  /* Add in all of the blocks in multiple &&/|| blocks to be scanned.  */
+  num_bb = 0;
+  if (ce_info->num_multiple_test_blocks)
+    {
+      basic_block multiple_test_bb = ce_info->last_test_bb;
+
+      while (multiple_test_bb != test_bb)
+	{
+	  bb[num_bb++] = multiple_test_bb;
+	  multiple_test_bb = EDGE_PRED (multiple_test_bb, 0)->src;
+	}
+    }
+
+  /* Add in the THEN and ELSE blocks to be scanned.  */
+  bb[num_bb++] = then_bb;
+  if (else_bb)
+    bb[num_bb++] = else_bb;
+
+  sub_cond_exec_reg = NULL_RTX;
+  frv_ifcvt.num_nested_cond_exec = 0;
+
+  /* Scan all of the blocks for registers that must not be allocated.  */
+  for (j = 0; j < num_bb; j++)
+    {
+      rtx last_insn = BB_END (bb[j]);
+      rtx insn = BB_HEAD (bb[j]);
+      unsigned int regno;
+
+      if (dump_file)
+	fprintf (dump_file, "Scanning %s block %d, start %d, end %d\n",
+		 (bb[j] == else_bb) ? "else" : ((bb[j] == then_bb) ? "then" : "test"),
+		 (int) bb[j]->index,
+		 (int) INSN_UID (BB_HEAD (bb[j])),
+		 (int) INSN_UID (BB_END (bb[j])));
+
+      /* Anything live at the beginning of the block is obviously unavailable
+         for allocation.  */
+      EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (bb[j]), 0, regno, rsi)
+	{
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    CLEAR_HARD_REG_BIT (tmp_reg->regs, regno);
+	}
+
+      /* Loop through the insns in the block.  */
+      for (;;)
+	{
+	  /* Mark any new registers that are created as being unavailable for
+             allocation.  Also see if the CC register used in nested IFs can be
+             reallocated.  */
+	  if (INSN_P (insn))
+	    {
+	      rtx pattern;
+	      rtx set;
+	      int skip_nested_if = FALSE;
+
+	      for_each_rtx (&PATTERN (insn), frv_clear_registers_used,
+			    (void *)&tmp_reg->regs);
+
+	      pattern = PATTERN (insn);
+	      if (GET_CODE (pattern) == COND_EXEC)
+		{
+		  rtx reg = XEXP (COND_EXEC_TEST (pattern), 0);
+
+		  if (reg != sub_cond_exec_reg)
+		    {
+		      sub_cond_exec_reg = reg;
+		      frv_ifcvt.num_nested_cond_exec++;
+		    }
+		}
+
+	      set = single_set_pattern (pattern);
+	      if (set)
+		{
+		  rtx dest = SET_DEST (set);
+		  rtx src = SET_SRC (set);
+
+		  if (GET_CODE (dest) == REG)
+		    {
+		      int regno = REGNO (dest);
+		      enum rtx_code src_code = GET_CODE (src);
+
+		      if (CC_P (regno) && src_code == COMPARE)
+			skip_nested_if = TRUE;
+
+		      else if (CR_P (regno)
+			       && (src_code == IF_THEN_ELSE
+				   || COMPARISON_P (src)))
+			skip_nested_if = TRUE;
+		    }
+		}
+
+	      if (! skip_nested_if)
+		for_each_rtx (&PATTERN (insn), frv_clear_registers_used,
+			      (void *)&frv_ifcvt.nested_cc_ok_rewrite);
+	    }
+
+	  if (insn == last_insn)
+	    break;
+
+	  insn = NEXT_INSN (insn);
+	}
+    }
+
+  /* If this is a nested if, rewrite the CC registers that are available to
+     include the ones that can be rewritten, to increase the chance of being
+     able to allocate a paired CC/CR register combination.  */
+  if (ce_info->pass > 1)
+    {
+      for (j = CC_FIRST; j <= CC_LAST; j++)
+	if (TEST_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite, j))
+	  SET_HARD_REG_BIT (tmp_reg->regs, j);
+	else
+	  CLEAR_HARD_REG_BIT (tmp_reg->regs, j);
+    }
+
+  if (dump_file)
+    {
+      int num_gprs = 0;
+      fprintf (dump_file, "Available GPRs: ");
+
+      for (j = GPR_FIRST; j <= GPR_LAST; j++)
+	if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	  {
+	    fprintf (dump_file, " %d [%s]", j, reg_names[j]);
+	    if (++num_gprs > GPR_TEMP_NUM+2)
+	      break;
+	  }
+
+      fprintf (dump_file, "%s\nAvailable CRs:  ",
+	       (num_gprs > GPR_TEMP_NUM+2) ? " ..." : "");
+
+      for (j = CR_FIRST; j <= CR_LAST; j++)
+	if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	  fprintf (dump_file, " %d [%s]", j, reg_names[j]);
+
+      fputs ("\n", dump_file);
+
+      if (ce_info->pass > 1)
+	{
+	  fprintf (dump_file, "Modifiable CCs: ");
+	  for (j = CC_FIRST; j <= CC_LAST; j++)
+	    if (TEST_HARD_REG_BIT (tmp_reg->regs, j))
+	      fprintf (dump_file, " %d [%s]", j, reg_names[j]);
+
+	  fprintf (dump_file, "\n%d nested COND_EXEC statements\n",
+		   frv_ifcvt.num_nested_cond_exec);
+	}
+    }
+
+  /* Allocate the appropriate temporary condition code register.  Try to
+     allocate the ICR/FCR register that corresponds to the ICC/FCC register so
+     that conditional cmp's can be done.  */
+  if (mode == CCmode || mode == CC_UNSmode || mode == CC_NZmode)
+    {
+      cr_class = ICR_REGS;
+      cc_class = ICC_REGS;
+      cc_first = ICC_FIRST;
+      cc_last = ICC_LAST;
+    }
+  else if (mode == CC_FPmode)
+    {
+      cr_class = FCR_REGS;
+      cc_class = FCC_REGS;
+      cc_first = FCC_FIRST;
+      cc_last = FCC_LAST;
+    }
+  else
+    {
+      cc_first = cc_last = 0;
+      cr_class = cc_class = NO_REGS;
+    }
+
+  cc = XEXP (true_expr, 0);
+  nested_cc = cr = NULL_RTX;
+  if (cc_class != NO_REGS)
+    {
+      /* For nested IFs and &&/||, see if we can find a CC and CR register pair
+         so we can execute a csubcc/caddcc/cfcmps instruction.  */
+      int cc_regno;
+
+      for (cc_regno = cc_first; cc_regno <= cc_last; cc_regno++)
+	{
+	  int cr_regno = cc_regno - CC_FIRST + CR_FIRST;
+
+	  if (TEST_HARD_REG_BIT (frv_ifcvt.tmp_reg.regs, cc_regno)
+	      && TEST_HARD_REG_BIT (frv_ifcvt.tmp_reg.regs, cr_regno))
+	    {
+	      frv_ifcvt.tmp_reg.next_reg[ (int)cr_class ] = cr_regno;
+	      cr = frv_alloc_temp_reg (tmp_reg, cr_class, CC_CCRmode, TRUE,
+				       TRUE);
+
+	      frv_ifcvt.tmp_reg.next_reg[ (int)cc_class ] = cc_regno;
+	      nested_cc = frv_alloc_temp_reg (tmp_reg, cc_class, CCmode,
+						  TRUE, TRUE);
+	      break;
+	    }
+	}
+    }
+
+  if (! cr)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Could not allocate a CR temporary register\n");
+
+      goto fail;
+    }
+
+  if (dump_file)
+    fprintf (dump_file,
+	     "Will use %s for conditional execution, %s for nested comparisons\n",
+	     reg_names[ REGNO (cr)],
+	     (nested_cc) ? reg_names[ REGNO (nested_cc) ] : "<none>");
+
+  /* Set the CCR bit.  Note for integer tests, we reverse the condition so that
+     in an IF-THEN-ELSE sequence, we are testing the TRUE case against the CCR
+     bit being true.  We don't do this for floating point, because of NaNs.  */
+  code = GET_CODE (true_expr);
+  if (GET_MODE (cc) != CC_FPmode)
+    {
+      code = reverse_condition (code);
+      code_true = EQ;
+      code_false = NE;
+    }
+  else
+    {
+      code_true = NE;
+      code_false = EQ;
+    }
+
+  check_insn = gen_rtx_SET (VOIDmode, cr,
+			    gen_rtx_fmt_ee (code, CC_CCRmode, cc, const0_rtx));
+
+  /* Record the check insn to be inserted later.  */
+  frv_ifcvt_add_insn (check_insn, BB_END (test_bb), TRUE);
+
+  /* Update the tests.  */
+  frv_ifcvt.cr_reg = cr;
+  frv_ifcvt.nested_cc_reg = nested_cc;
+  *p_true = gen_rtx_fmt_ee (code_true, CC_CCRmode, cr, const0_rtx);
+  *p_false = gen_rtx_fmt_ee (code_false, CC_CCRmode, cr, const0_rtx);
+  return;
+
+  /* Fail, don't do this conditional execution.  */
+ fail:
+  *p_true = NULL_RTX;
+  *p_false = NULL_RTX;
+  if (dump_file)
+    fprintf (dump_file, "Disabling this conditional execution.\n");
+
+  return;
+}
+
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, for the basic block BB, possibly updating the tests in
+   TRUE_EXPR, and FALSE_EXPR for converting the && and || parts of if-then or
+   if-then-else code to conditional instructions.  Set either TRUE_EXPR or
+   FALSE_EXPR to a null pointer if the tests cannot be converted.  */
+
+/* p_true and p_false are given expressions of the form:
+
+	(and (eq:CC_CCR (reg:CC_CCR)
+			(const_int 0))
+	     (eq:CC (reg:CC)
+		    (const_int 0))) */
+
+void
+frv_ifcvt_modify_multiple_tests (ce_if_block_t *ce_info,
+                                 basic_block bb,
+                                 rtx *p_true,
+                                 rtx *p_false)
+{
+  rtx old_true = XEXP (*p_true, 0);
+  rtx old_false = XEXP (*p_false, 0);
+  rtx true_expr = XEXP (*p_true, 1);
+  rtx false_expr = XEXP (*p_false, 1);
+  rtx test_expr;
+  rtx old_test;
+  rtx cr = XEXP (old_true, 0);
+  rtx check_insn;
+  rtx new_cr = NULL_RTX;
+  rtx *p_new_cr = (rtx *)0;
+  rtx if_else;
+  rtx compare;
+  rtx cc;
+  enum reg_class cr_class;
+  enum machine_mode mode = GET_MODE (true_expr);
+  rtx (*logical_func)(rtx, rtx, rtx);
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_modify_multiple_tests, before modification for %s\ntrue insn:\n",
+	       ce_info->and_and_p ? "&&" : "||");
+
+      debug_rtx (*p_true);
+
+      fputs ("\nfalse insn:\n", stderr);
+      debug_rtx (*p_false);
+    }
+
+  if (!TARGET_MULTI_CE)
+    goto fail;
+
+  if (GET_CODE (cr) != REG)
+    goto fail;
+
+  if (mode == CCmode || mode == CC_UNSmode || mode == CC_NZmode)
+    {
+      cr_class = ICR_REGS;
+      p_new_cr = &frv_ifcvt.extra_int_cr;
+    }
+  else if (mode == CC_FPmode)
+    {
+      cr_class = FCR_REGS;
+      p_new_cr = &frv_ifcvt.extra_fp_cr;
+    }
+  else
+    goto fail;
+
+  /* Allocate a temp CR, reusing a previously allocated temp CR if we have 3 or
+     more &&/|| tests.  */
+  new_cr = *p_new_cr;
+  if (! new_cr)
+    {
+      new_cr = *p_new_cr = frv_alloc_temp_reg (&frv_ifcvt.tmp_reg, cr_class,
+					       CC_CCRmode, TRUE, TRUE);
+      if (! new_cr)
+	goto fail;
+    }
+
+  if (ce_info->and_and_p)
+    {
+      old_test = old_false;
+      test_expr = true_expr;
+      logical_func = (GET_CODE (old_true) == EQ) ? gen_andcr : gen_andncr;
+      *p_true = gen_rtx_NE (CC_CCRmode, cr, const0_rtx);
+      *p_false = gen_rtx_EQ (CC_CCRmode, cr, const0_rtx);
+    }
+  else
+    {
+      old_test = old_false;
+      test_expr = false_expr;
+      logical_func = (GET_CODE (old_false) == EQ) ? gen_orcr : gen_orncr;
+      *p_true = gen_rtx_EQ (CC_CCRmode, cr, const0_rtx);
+      *p_false = gen_rtx_NE (CC_CCRmode, cr, const0_rtx);
+    }
+
+  /* First add the andcr/andncr/orcr/orncr, which will be added after the
+     conditional check instruction, due to frv_ifcvt_add_insn being a LIFO
+     stack.  */
+  frv_ifcvt_add_insn ((*logical_func) (cr, cr, new_cr), BB_END (bb), TRUE);
+
+  /* Now add the conditional check insn.  */
+  cc = XEXP (test_expr, 0);
+  compare = gen_rtx_fmt_ee (GET_CODE (test_expr), CC_CCRmode, cc, const0_rtx);
+  if_else = gen_rtx_IF_THEN_ELSE (CC_CCRmode, old_test, compare, const0_rtx);
+
+  check_insn = gen_rtx_SET (VOIDmode, new_cr, if_else);
+
+  /* Add the new check insn to the list of check insns that need to be
+     inserted.  */
+  frv_ifcvt_add_insn (check_insn, BB_END (bb), TRUE);
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      fputs ("\n:::::::::: frv_ifcvt_modify_multiple_tests, after modification\ntrue insn:\n",
+	     stderr);
+
+      debug_rtx (*p_true);
+
+      fputs ("\nfalse insn:\n", stderr);
+      debug_rtx (*p_false);
+    }
+
+  return;
+
+ fail:
+  *p_true = *p_false = NULL_RTX;
+
+  /* If we allocated a CR register, release it.  */
+  if (new_cr)
+    {
+      CLEAR_HARD_REG_BIT (frv_ifcvt.tmp_reg.regs, REGNO (new_cr));
+      *p_new_cr = NULL_RTX;
+    }
+
+  if (TARGET_DEBUG_COND_EXEC)
+    fputs ("\n:::::::::: frv_ifcvt_modify_multiple_tests, failed.\n", stderr);
+
+  return;
+}
+
+
+/* Return a register which will be loaded with a value if an IF block is
+   converted to conditional execution.  This is used to rewrite instructions
+   that use constants to ones that just use registers.  */
+
+static rtx
+frv_ifcvt_load_value (rtx value, rtx insn ATTRIBUTE_UNUSED)
+{
+  int num_alloc = frv_ifcvt.cur_scratch_regs;
+  int i;
+  rtx reg;
+
+  /* We know gr0 == 0, so replace any errant uses.  */
+  if (value == const0_rtx)
+    return gen_rtx_REG (SImode, GPR_FIRST);
+
+  /* First search all registers currently loaded to see if we have an
+     applicable constant.  */
+  if (CONSTANT_P (value)
+      || (GET_CODE (value) == REG && REGNO (value) == LR_REGNO))
+    {
+      for (i = 0; i < num_alloc; i++)
+	{
+	  if (rtx_equal_p (SET_SRC (frv_ifcvt.scratch_regs[i]), value))
+	    return SET_DEST (frv_ifcvt.scratch_regs[i]);
+	}
+    }
+
+  /* Have we exhausted the number of registers available?  */
+  if (num_alloc >= GPR_TEMP_NUM)
+    {
+      if (dump_file)
+	fprintf (dump_file, "Too many temporary registers allocated\n");
+
+      return NULL_RTX;
+    }
+
+  /* Allocate the new register.  */
+  reg = frv_alloc_temp_reg (&frv_ifcvt.tmp_reg, GPR_REGS, SImode, TRUE, TRUE);
+  if (! reg)
+    {
+      if (dump_file)
+	fputs ("Could not find a scratch register\n", dump_file);
+
+      return NULL_RTX;
+    }
+
+  frv_ifcvt.cur_scratch_regs++;
+  frv_ifcvt.scratch_regs[num_alloc] = gen_rtx_SET (VOIDmode, reg, value);
+
+  if (dump_file)
+    {
+      if (GET_CODE (value) == CONST_INT)
+	fprintf (dump_file, "Register %s will hold %ld\n",
+		 reg_names[ REGNO (reg)], (long)INTVAL (value));
+
+      else if (GET_CODE (value) == REG && REGNO (value) == LR_REGNO)
+	fprintf (dump_file, "Register %s will hold LR\n",
+		 reg_names[ REGNO (reg)]);
+
+      else
+	fprintf (dump_file, "Register %s will hold a saved value\n",
+		 reg_names[ REGNO (reg)]);
+    }
+
+  return reg;
+}
+
+
+/* Update a MEM used in conditional code that might contain an offset to put
+   the offset into a scratch register, so that the conditional load/store
+   operations can be used.  This function returns the original pointer if the
+   MEM is valid to use in conditional code, NULL if we can't load up the offset
+   into a temporary register, or the new MEM if we were successful.  */
+
+static rtx
+frv_ifcvt_rewrite_mem (rtx mem, enum machine_mode mode, rtx insn)
+{
+  rtx addr = XEXP (mem, 0);
+
+  if (!frv_legitimate_address_p_1 (mode, addr, reload_completed, TRUE, FALSE))
+    {
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx addr_op0 = XEXP (addr, 0);
+	  rtx addr_op1 = XEXP (addr, 1);
+
+	  if (GET_CODE (addr_op0) == REG && CONSTANT_P (addr_op1))
+	    {
+	      rtx reg = frv_ifcvt_load_value (addr_op1, insn);
+	      if (!reg)
+		return NULL_RTX;
+
+	      addr = gen_rtx_PLUS (Pmode, addr_op0, reg);
+	    }
+
+	  else
+	    return NULL_RTX;
+	}
+
+      else if (CONSTANT_P (addr))
+	addr = frv_ifcvt_load_value (addr, insn);
+
+      else
+	return NULL_RTX;
+
+      if (addr == NULL_RTX)
+	return NULL_RTX;
+
+      else if (XEXP (mem, 0) != addr)
+	return change_address (mem, mode, addr);
+    }
+
+  return mem;
+}
+
+
+/* Given a PATTERN, return a SET expression if this PATTERN has only a single
+   SET, possibly conditionally executed.  It may also have CLOBBERs, USEs.  */
+
+static rtx
+single_set_pattern (rtx pattern)
+{
+  rtx set;
+  int i;
+
+  if (GET_CODE (pattern) == COND_EXEC)
+    pattern = COND_EXEC_CODE (pattern);
+
+  if (GET_CODE (pattern) == SET)
+    return pattern;
+
+  else if (GET_CODE (pattern) == PARALLEL)
+    {
+      for (i = 0, set = 0; i < XVECLEN (pattern, 0); i++)
+	{
+	  rtx sub = XVECEXP (pattern, 0, i);
+
+	  switch (GET_CODE (sub))
+	    {
+	    case USE:
+	    case CLOBBER:
+	      break;
+
+	    case SET:
+	      if (set)
+		return 0;
+	      else
+		set = sub;
+	      break;
+
+	    default:
+	      return 0;
+	    }
+	}
+      return set;
+    }
+
+  return 0;
+}
+
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO with the new PATTERN in INSN.  If PATTERN is a null
+   pointer after the IFCVT_MODIFY_INSN macro executes, it is assumed that that
+   insn cannot be converted to be executed conditionally.  */
+
+rtx
+frv_ifcvt_modify_insn (ce_if_block_t *ce_info,
+                       rtx pattern,
+                       rtx insn)
+{
+  rtx orig_ce_pattern = pattern;
+  rtx set;
+  rtx op0;
+  rtx op1;
+  rtx test;
+
+  gcc_assert (GET_CODE (pattern) == COND_EXEC);
+
+  test = COND_EXEC_TEST (pattern);
+  if (GET_CODE (test) == AND)
+    {
+      rtx cr = frv_ifcvt.cr_reg;
+      rtx test_reg;
+
+      op0 = XEXP (test, 0);
+      if (! rtx_equal_p (cr, XEXP (op0, 0)))
+	goto fail;
+
+      op1 = XEXP (test, 1);
+      test_reg = XEXP (op1, 0);
+      if (GET_CODE (test_reg) != REG)
+	goto fail;
+
+      /* Is this the first nested if block in this sequence?  If so, generate
+         an andcr or andncr.  */
+      if (! frv_ifcvt.last_nested_if_cr)
+	{
+	  rtx and_op;
+
+	  frv_ifcvt.last_nested_if_cr = test_reg;
+	  if (GET_CODE (op0) == NE)
+	    and_op = gen_andcr (test_reg, cr, test_reg);
+	  else
+	    and_op = gen_andncr (test_reg, cr, test_reg);
+
+	  frv_ifcvt_add_insn (and_op, insn, TRUE);
+	}
+
+      /* If this isn't the first statement in the nested if sequence, see if we
+         are dealing with the same register.  */
+      else if (! rtx_equal_p (test_reg, frv_ifcvt.last_nested_if_cr))
+	goto fail;
+
+      COND_EXEC_TEST (pattern) = test = op1;
+    }
+
+  /* If this isn't a nested if, reset state variables.  */
+  else
+    {
+      frv_ifcvt.last_nested_if_cr = NULL_RTX;
+    }
+
+  set = single_set_pattern (pattern);
+  if (set)
+    {
+      rtx dest = SET_DEST (set);
+      rtx src = SET_SRC (set);
+      enum machine_mode mode = GET_MODE (dest);
+
+      /* Check for normal binary operators.  */
+      if (mode == SImode && ARITHMETIC_P (src))
+	{
+	  op0 = XEXP (src, 0);
+	  op1 = XEXP (src, 1);
+
+	  if (integer_register_operand (op0, SImode) && CONSTANT_P (op1))
+	    {
+	      op1 = frv_ifcvt_load_value (op1, insn);
+	      if (op1)
+		COND_EXEC_CODE (pattern)
+		  = gen_rtx_SET (VOIDmode, dest, gen_rtx_fmt_ee (GET_CODE (src),
+								 GET_MODE (src),
+								 op0, op1));
+	      else
+		goto fail;
+	    }
+	}
+
+      /* For multiply by a constant, we need to handle the sign extending
+         correctly.  Add a USE of the value after the multiply to prevent flow
+         from cratering because only one register out of the two were used.  */
+      else if (mode == DImode && GET_CODE (src) == MULT)
+	{
+	  op0 = XEXP (src, 0);
+	  op1 = XEXP (src, 1);
+	  if (GET_CODE (op0) == SIGN_EXTEND && GET_CODE (op1) == CONST_INT)
+	    {
+	      op1 = frv_ifcvt_load_value (op1, insn);
+	      if (op1)
+		{
+		  op1 = gen_rtx_SIGN_EXTEND (DImode, op1);
+		  COND_EXEC_CODE (pattern)
+		    = gen_rtx_SET (VOIDmode, dest,
+				   gen_rtx_MULT (DImode, op0, op1));
+		}
+	      else
+		goto fail;
+	    }
+
+	  frv_ifcvt_add_insn (gen_use (dest), insn, FALSE);
+	}
+
+      /* If we are just loading a constant created for a nested conditional
+         execution statement, just load the constant without any conditional
+         execution, since we know that the constant will not interfere with any
+         other registers.  */
+      else if (frv_ifcvt.scratch_insns_bitmap
+	       && bitmap_bit_p (frv_ifcvt.scratch_insns_bitmap,
+				INSN_UID (insn))
+	       && REG_P (SET_DEST (set))
+	       /* We must not unconditionally set a scratch reg chosen
+		  for a nested if-converted block if its incoming
+		  value from the TEST block (or the result of the THEN
+		  branch) could/should propagate to the JOIN block.
+		  It suffices to test whether the register is live at
+		  the JOIN point: if it's live there, we can infer
+		  that we set it in the former JOIN block of the
+		  nested if-converted block (otherwise it wouldn't
+		  have been available as a scratch register), and it
+		  is either propagated through or set in the other
+		  conditional block.  It's probably not worth trying
+		  to catch the latter case, and it could actually
+		  limit scheduling of the combined block quite
+		  severely.  */
+	       && ce_info->join_bb
+	       && ! (REGNO_REG_SET_P (df_get_live_in (ce_info->join_bb),
+				      REGNO (SET_DEST (set))))
+	       /* Similarly, we must not unconditionally set a reg
+		  used as scratch in the THEN branch if the same reg
+		  is live in the ELSE branch.  */
+	       && (! ce_info->else_bb
+		   || BLOCK_FOR_INSN (insn) == ce_info->else_bb
+		   || ! (REGNO_REG_SET_P (df_get_live_in (ce_info->else_bb),
+					  REGNO (SET_DEST (set))))))
+	pattern = set;
+
+      else if (mode == QImode || mode == HImode || mode == SImode
+	       || mode == SFmode)
+	{
+	  int changed_p = FALSE;
+
+	  /* Check for just loading up a constant */
+	  if (CONSTANT_P (src) && integer_register_operand (dest, mode))
+	    {
+	      src = frv_ifcvt_load_value (src, insn);
+	      if (!src)
+		goto fail;
+
+	      changed_p = TRUE;
+	    }
+
+	  /* See if we need to fix up stores */
+	  if (GET_CODE (dest) == MEM)
+	    {
+	      rtx new_mem = frv_ifcvt_rewrite_mem (dest, mode, insn);
+
+	      if (!new_mem)
+		goto fail;
+
+	      else if (new_mem != dest)
+		{
+		  changed_p = TRUE;
+		  dest = new_mem;
+		}
+	    }
+
+	  /* See if we need to fix up loads */
+	  if (GET_CODE (src) == MEM)
+	    {
+	      rtx new_mem = frv_ifcvt_rewrite_mem (src, mode, insn);
+
+	      if (!new_mem)
+		goto fail;
+
+	      else if (new_mem != src)
+		{
+		  changed_p = TRUE;
+		  src = new_mem;
+		}
+	    }
+
+	  /* If either src or destination changed, redo SET.  */
+	  if (changed_p)
+	    COND_EXEC_CODE (pattern) = gen_rtx_SET (VOIDmode, dest, src);
+	}
+
+      /* Rewrite a nested set cccr in terms of IF_THEN_ELSE.  Also deal with
+         rewriting the CC register to be the same as the paired CC/CR register
+         for nested ifs.  */
+      else if (mode == CC_CCRmode && COMPARISON_P (src))
+	{
+	  int regno = REGNO (XEXP (src, 0));
+	  rtx if_else;
+
+	  if (ce_info->pass > 1
+	      && regno != (int)REGNO (frv_ifcvt.nested_cc_reg)
+	      && TEST_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite, regno))
+	    {
+	      src = gen_rtx_fmt_ee (GET_CODE (src),
+				    CC_CCRmode,
+				    frv_ifcvt.nested_cc_reg,
+				    XEXP (src, 1));
+	    }
+
+	  if_else = gen_rtx_IF_THEN_ELSE (CC_CCRmode, test, src, const0_rtx);
+	  pattern = gen_rtx_SET (VOIDmode, dest, if_else);
+	}
+
+      /* Remap a nested compare instruction to use the paired CC/CR reg.  */
+      else if (ce_info->pass > 1
+	       && GET_CODE (dest) == REG
+	       && CC_P (REGNO (dest))
+	       && REGNO (dest) != REGNO (frv_ifcvt.nested_cc_reg)
+	       && TEST_HARD_REG_BIT (frv_ifcvt.nested_cc_ok_rewrite,
+				     REGNO (dest))
+	       && GET_CODE (src) == COMPARE)
+	{
+	  PUT_MODE (frv_ifcvt.nested_cc_reg, GET_MODE (dest));
+	  COND_EXEC_CODE (pattern)
+	    = gen_rtx_SET (VOIDmode, frv_ifcvt.nested_cc_reg, copy_rtx (src));
+	}
+    }
+
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      rtx orig_pattern = PATTERN (insn);
+
+      PATTERN (insn) = pattern;
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_modify_insn: pass = %d, insn after modification:\n",
+	       ce_info->pass);
+
+      debug_rtx (insn);
+      PATTERN (insn) = orig_pattern;
+    }
+
+  return pattern;
+
+ fail:
+  if (TARGET_DEBUG_COND_EXEC)
+    {
+      rtx orig_pattern = PATTERN (insn);
+
+      PATTERN (insn) = orig_ce_pattern;
+      fprintf (stderr,
+	       "\n:::::::::: frv_ifcvt_modify_insn: pass = %d, insn could not be modified:\n",
+	       ce_info->pass);
+
+      debug_rtx (insn);
+      PATTERN (insn) = orig_pattern;
+    }
+
+  return NULL_RTX;
+}
+
+
+/* A C expression to perform any final machine dependent modifications in
+   converting code to conditional execution in the code described by the
+   conditional if information CE_INFO.  */
+
+void
+frv_ifcvt_modify_final (ce_if_block_t *ce_info ATTRIBUTE_UNUSED)
+{
+  rtx existing_insn;
+  rtx check_insn;
+  rtx p = frv_ifcvt.added_insns_list;
+  int i;
+
+  /* Loop inserting the check insns.  The last check insn is the first test,
+     and is the appropriate place to insert constants.  */
+  gcc_assert (p);
+
+  do
+    {
+      rtx check_and_insert_insns = XEXP (p, 0);
+      rtx old_p = p;
+
+      check_insn = XEXP (check_and_insert_insns, 0);
+      existing_insn = XEXP (check_and_insert_insns, 1);
+      p = XEXP (p, 1);
+
+      /* The jump bit is used to say that the new insn is to be inserted BEFORE
+         the existing insn, otherwise it is to be inserted AFTER.  */
+      if (check_and_insert_insns->jump)
+	{
+	  emit_insn_before (check_insn, existing_insn);
+	  check_and_insert_insns->jump = 0;
+	}
+      else
+	emit_insn_after (check_insn, existing_insn);
+
+      free_EXPR_LIST_node (check_and_insert_insns);
+      free_EXPR_LIST_node (old_p);
+    }
+  while (p != NULL_RTX);
+
+  /* Load up any constants needed into temp gprs */
+  for (i = 0; i < frv_ifcvt.cur_scratch_regs; i++)
+    {
+      rtx insn = emit_insn_before (frv_ifcvt.scratch_regs[i], existing_insn);
+      if (! frv_ifcvt.scratch_insns_bitmap)
+	frv_ifcvt.scratch_insns_bitmap = BITMAP_ALLOC (NULL);
+      bitmap_set_bit (frv_ifcvt.scratch_insns_bitmap, INSN_UID (insn));
+      frv_ifcvt.scratch_regs[i] = NULL_RTX;
+    }
+
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+}
+
+
+/* A C expression to cancel any machine dependent modifications in converting
+   code to conditional execution in the code described by the conditional if
+   information CE_INFO.  */
+
+void
+frv_ifcvt_modify_cancel (ce_if_block_t *ce_info ATTRIBUTE_UNUSED)
+{
+  int i;
+  rtx p = frv_ifcvt.added_insns_list;
+
+  /* Loop freeing up the EXPR_LIST's allocated.  */
+  while (p != NULL_RTX)
+    {
+      rtx check_and_jump = XEXP (p, 0);
+      rtx old_p = p;
+
+      p = XEXP (p, 1);
+      free_EXPR_LIST_node (check_and_jump);
+      free_EXPR_LIST_node (old_p);
+    }
+
+  /* Release any temporary gprs allocated.  */
+  for (i = 0; i < frv_ifcvt.cur_scratch_regs; i++)
+    frv_ifcvt.scratch_regs[i] = NULL_RTX;
+
+  frv_ifcvt.added_insns_list = NULL_RTX;
+  frv_ifcvt.cur_scratch_regs = 0;
+  return;
+}
+
+/* A C expression for the size in bytes of the trampoline, as an integer.
+   The template is:
+
+	setlo #0, <jmp_reg>
+	setlo #0, <static_chain>
+	sethi #0, <jmp_reg>
+	sethi #0, <static_chain>
+	jmpl @(gr0,<jmp_reg>) */
+
+int
+frv_trampoline_size (void)
+{
+  if (TARGET_FDPIC)
+    /* Allocate room for the function descriptor and the lddi
+       instruction.  */
+    return 8 + 6 * 4;
+  return 5 /* instructions */ * 4 /* instruction size.  */;
+}
+
+
+/* A C statement to initialize the variable parts of a trampoline.  ADDR is an
+   RTX for the address of the trampoline; FNADDR is an RTX for the address of
+   the nested function; STATIC_CHAIN is an RTX for the static chain value that
+   should be passed to the function when it is called.
+
+   The template is:
+
+	setlo #0, <jmp_reg>
+	setlo #0, <static_chain>
+	sethi #0, <jmp_reg>
+	sethi #0, <static_chain>
+	jmpl @(gr0,<jmp_reg>) */
+
+static void
+frv_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx addr = XEXP (m_tramp, 0);
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx sc_reg = force_reg (Pmode, static_chain);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (SImode, "__trampoline_setup"),
+		     LCT_NORMAL, VOIDmode, 4,
+		     addr, Pmode,
+		     GEN_INT (frv_trampoline_size ()), SImode,
+		     fnaddr, Pmode,
+		     sc_reg, Pmode);
+}
+
+
+/* Many machines have some registers that cannot be copied directly to or from
+   memory or even from other types of registers.  An example is the `MQ'
+   register, which on most machines, can only be copied to or from general
+   registers, but not memory.  Some machines allow copying all registers to and
+   from memory, but require a scratch register for stores to some memory
+   locations (e.g., those with symbolic address on the RT, and those with
+   certain symbolic address on the SPARC when compiling PIC).  In some cases,
+   both an intermediate and a scratch register are required.
+
+   You should define these macros to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   RCLASS in MODE requires an intermediate register, you should define
+   `SECONDARY_INPUT_RELOAD_CLASS' to return the largest register class all of
+   whose registers can be used as intermediate registers or scratch registers.
+
+   If copying a register RCLASS in MODE to X requires an intermediate or scratch
+   register, `SECONDARY_OUTPUT_RELOAD_CLASS' should be defined to return the
+   largest register class required.  If the requirements for input and output
+   reloads are the same, the macro `SECONDARY_RELOAD_CLASS' should be used
+   instead of defining both macros identically.
+
+   The values returned by these macros are often `GENERAL_REGS'.  Return
+   `NO_REGS' if no spare register is needed; i.e., if X can be directly copied
+   to or from a register of RCLASS in MODE without requiring a scratch register.
+   Do not define this macro if it would always return `NO_REGS'.
+
+   If a scratch register is required (either with or without an intermediate
+   register), you should define patterns for `reload_inM' or `reload_outM', as
+   required..  These patterns, which will normally be implemented with a
+   `define_expand', should be similar to the `movM' patterns, except that
+   operand 2 is the scratch register.
+
+   Define constraints for the reload register and scratch register that contain
+   a single register class.  If the original reload register (whose class is
+   RCLASS) can meet the constraint given in the pattern, the value returned by
+   these macros is used for the class of the scratch register.  Otherwise, two
+   additional reload registers are required.  Their classes are obtained from
+   the constraints in the insn pattern.
+
+   X might be a pseudo-register or a `subreg' of a pseudo-register, which could
+   either be in a hard register or in memory.  Use `true_regnum' to find out;
+   it will return -1 if the pseudo is in memory and the hard register number if
+   it is in a register.
+
+   These macros should not be used in the case where a particular class of
+   registers can only be copied to memory and not to another class of
+   registers.  In that case, secondary reload registers are not needed and
+   would not be helpful.  Instead, a stack location must be used to perform the
+   copy and the `movM' pattern should use memory as an intermediate storage.
+   This case often occurs between floating-point and general registers.  */
+
+enum reg_class
+frv_secondary_reload_class (enum reg_class rclass,
+                            enum machine_mode mode ATTRIBUTE_UNUSED,
+                            rtx x)
+{
+  enum reg_class ret;
+
+  switch (rclass)
+    {
+    default:
+      ret = NO_REGS;
+      break;
+
+      /* Accumulators/Accumulator guard registers need to go through floating
+         point registers.  */
+    case QUAD_REGS:
+    case EVEN_REGS:
+    case GPR_REGS:
+      ret = NO_REGS;
+      if (x && GET_CODE (x) == REG)
+	{
+	  int regno = REGNO (x);
+
+	  if (ACC_P (regno) || ACCG_P (regno))
+	    ret = FPR_REGS;
+	}
+      break;
+
+      /* Nonzero constants should be loaded into an FPR through a GPR.  */
+    case QUAD_FPR_REGS:
+    case FEVEN_REGS:
+    case FPR_REGS:
+      if (x && CONSTANT_P (x) && !ZERO_P (x))
+	ret = GPR_REGS;
+      else
+	ret = NO_REGS;
+      break;
+
+      /* All of these types need gpr registers.  */
+    case ICC_REGS:
+    case FCC_REGS:
+    case CC_REGS:
+    case ICR_REGS:
+    case FCR_REGS:
+    case CR_REGS:
+    case LCR_REG:
+    case LR_REG:
+      ret = GPR_REGS;
+      break;
+
+      /* The accumulators need fpr registers.  */
+    case ACC_REGS:
+    case EVEN_ACC_REGS:
+    case QUAD_ACC_REGS:
+    case ACCG_REGS:
+      ret = FPR_REGS;
+      break;
+    }
+
+  return ret;
+}
+
+/* This hook exists to catch the case where secondary_reload_class() is
+   called from init_reg_autoinc() in regclass.c - before the reload optabs
+   have been initialised.  */
+   
+static reg_class_t
+frv_secondary_reload (bool in_p, rtx x, reg_class_t reload_class_i,
+		      enum machine_mode reload_mode,
+		      secondary_reload_info * sri)
+{
+  enum reg_class rclass = NO_REGS;
+  enum reg_class reload_class = (enum reg_class) reload_class_i;
+
+  if (sri->prev_sri && sri->prev_sri->t_icode != CODE_FOR_nothing)
+    {
+      sri->icode = sri->prev_sri->t_icode;
+      return NO_REGS;
+    }
+
+  rclass = frv_secondary_reload_class (reload_class, reload_mode, x);
+
+  if (rclass != NO_REGS)
+    {
+      enum insn_code icode
+	= direct_optab_handler (in_p ? reload_in_optab : reload_out_optab,
+				reload_mode);
+      if (icode == 0)
+	{
+	  /* This happens when then the reload_[in|out]_optabs have
+	     not been initialised.  */
+	  sri->t_icode = CODE_FOR_nothing;
+	  return rclass;
+	}
+    }
+
+  /* Fall back to the default secondary reload handler.  */
+  return default_secondary_reload (in_p, x, reload_class, reload_mode, sri);
+
+}
+
+/* Worker function for TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+frv_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+    default:
+      break;
+
+    case GR8_REGS:
+    case GR9_REGS:
+    case GR89_REGS:
+    case FDPIC_FPTR_REGS:
+    case FDPIC_REGS:
+    case ICC_REGS:
+    case FCC_REGS:
+    case CC_REGS:
+    case ICR_REGS:
+    case FCR_REGS:
+    case CR_REGS:
+    case LCR_REG:
+    case LR_REG:
+    case SPR_REGS:
+    case QUAD_ACC_REGS:
+    case EVEN_ACC_REGS:
+    case ACC_REGS:
+    case ACCG_REGS:
+      return true;
+    }
+
+  return false;
+}
+
+
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  GCC uses this
+   value instead of the value in `BIGGEST_ALIGNMENT' or
+   `BIGGEST_FIELD_ALIGNMENT', if defined, for structure fields only.  */
+
+/* The definition type of the bit field data is either char, short, long or
+   long long. The maximum bit size is the number of bits of its own type.
+
+   The bit field data is assigned to a storage unit that has an adequate size
+   for bit field data retention and is located at the smallest address.
+
+   Consecutive bit field data are packed at consecutive bits having the same
+   storage unit, with regard to the type, beginning with the MSB and continuing
+   toward the LSB.
+
+   If a field to be assigned lies over a bit field type boundary, its
+   assignment is completed by aligning it with a boundary suitable for the
+   type.
+
+   When a bit field having a bit length of 0 is declared, it is forcibly
+   assigned to the next storage unit.
+
+   e.g)
+	struct {
+		int	a:2;
+		int	b:6;
+		char	c:4;
+		int	d:10;
+		int	 :0;
+		int	f:2;
+	} x;
+
+		+0	  +1	    +2	      +3
+	&x	00000000  00000000  00000000  00000000
+		MLM----L
+		a    b
+	&x+4	00000000  00000000  00000000  00000000
+		M--L
+		c
+	&x+8	00000000  00000000  00000000  00000000
+		M----------L
+		d
+	&x+12	00000000  00000000  00000000  00000000
+		ML
+		f
+*/
+
+int
+frv_adjust_field_align (tree field, int computed)
+{
+  /* Make sure that the bitfield is not wider than the type.  */
+  if (DECL_BIT_FIELD (field)
+      && !DECL_ARTIFICIAL (field))
+    {
+      tree parent = DECL_CONTEXT (field);
+      tree prev = NULL_TREE;
+      tree cur;
+
+      for (cur = TYPE_FIELDS (parent); cur && cur != field; cur = DECL_CHAIN (cur))
+	{
+	  if (TREE_CODE (cur) != FIELD_DECL)
+	    continue;
+
+	  prev = cur;
+	}
+
+      gcc_assert (cur);
+
+      /* If this isn't a :0 field and if the previous element is a bitfield
+	 also, see if the type is different, if so, we will need to align the
+	 bit-field to the next boundary.  */
+      if (prev
+	  && ! DECL_PACKED (field)
+	  && ! integer_zerop (DECL_SIZE (field))
+	  && DECL_BIT_FIELD_TYPE (field) != DECL_BIT_FIELD_TYPE (prev))
+	{
+	  int prev_align = TYPE_ALIGN (TREE_TYPE (prev));
+	  int cur_align  = TYPE_ALIGN (TREE_TYPE (field));
+	  computed = (prev_align > cur_align) ? prev_align : cur_align;
+	}
+    }
+
+  return computed;
+}
+
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  For a machine where all registers are equivalent, a suitable
+   definition is
+
+        #define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+   It is not necessary for this macro to check for the numbers of fixed
+   registers, because the allocation mechanism considers them to be always
+   occupied.
+
+   On some machines, double-precision values must be kept in even/odd register
+   pairs.  The way to implement that is to define this macro to reject odd
+   register numbers for such modes.
+
+   The minimum requirement for a mode to be OK in a register is that the
+   `movMODE' instruction pattern support moves between the register and any
+   other hard register for which the mode is OK; and that moving a value into
+   the register and back out not alter it.
+
+   Since the same instruction used to move `SImode' will work for all narrower
+   integer modes, it is not necessary on any machine for `HARD_REGNO_MODE_OK'
+   to distinguish between these modes, provided you define patterns `movhi',
+   etc., to take advantage of this.  This is useful because of the interaction
+   between `HARD_REGNO_MODE_OK' and `MODES_TIEABLE_P'; it is very desirable for
+   all integer modes to be tieable.
+
+   Many machines have special registers for floating point arithmetic.  Often
+   people assume that floating point machine modes are allowed only in floating
+   point registers.  This is not true.  Any registers that can hold integers
+   can safely *hold* a floating point machine mode, whether or not floating
+   arithmetic can be done on it in those registers.  Integer move instructions
+   can be used to move the values.
+
+   On some machines, though, the converse is true: fixed-point machine modes
+   may not go in floating registers.  This is true if the floating registers
+   normalize any value stored in them, because storing a non-floating value
+   there would garble it.  In this case, `HARD_REGNO_MODE_OK' should reject
+   fixed-point machine modes in floating registers.  But if the floating
+   registers do not automatically normalize, if you can store any bit pattern
+   in one and retrieve it unchanged without a trap, then any machine mode may
+   go in a floating register, so you can define this macro to say so.
+
+   The primary significance of special floating registers is rather that they
+   are the registers acceptable in floating point arithmetic instructions.
+   However, this is of no concern to `HARD_REGNO_MODE_OK'.  You handle it by
+   writing the proper constraints for those instructions.
+
+   On some machines, the floating registers are especially slow to access, so
+   that it is better to store a value in a stack frame than in such a register
+   if floating point arithmetic is not being done.  As long as the floating
+   registers are not in class `GENERAL_REGS', they will not be used unless some
+   pattern's constraint asks for one.  */
+
+int
+frv_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  int base;
+  int mask;
+
+  switch (mode)
+    {
+    case CCmode:
+    case CC_UNSmode:
+    case CC_NZmode:
+      return ICC_P (regno) || GPR_P (regno);
+
+    case CC_CCRmode:
+      return CR_P (regno) || GPR_P (regno);
+
+    case CC_FPmode:
+      return FCC_P (regno) || GPR_P (regno);
+
+    default:
+      break;
+    }
+
+  /* Set BASE to the first register in REGNO's class.  Set MASK to the
+     bits that must be clear in (REGNO - BASE) for the register to be
+     well-aligned.  */
+  if (INTEGRAL_MODE_P (mode) || FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
+    {
+      if (ACCG_P (regno))
+	{
+	  /* ACCGs store one byte.  Two-byte quantities must start in
+	     even-numbered registers, four-byte ones in registers whose
+	     numbers are divisible by four, and so on.  */
+	  base = ACCG_FIRST;
+	  mask = GET_MODE_SIZE (mode) - 1;
+	}
+      else
+	{
+	   /* The other registers store one word.  */
+	  if (GPR_P (regno) || regno == AP_FIRST)
+	    base = GPR_FIRST;
+
+	  else if (FPR_P (regno))
+	    base = FPR_FIRST;
+
+	  else if (ACC_P (regno))
+	    base = ACC_FIRST;
+
+	  else if (SPR_P (regno))
+	    return mode == SImode;
+
+	  /* Fill in the table.  */
+	  else
+	    return 0;
+
+	  /* Anything smaller than an SI is OK in any word-sized register.  */
+	  if (GET_MODE_SIZE (mode) < 4)
+	    return 1;
+
+	  mask = (GET_MODE_SIZE (mode) / 4) - 1;
+	}
+      return (((regno - base) & mask) == 0);
+    }
+
+  return 0;
+}
+
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.
+
+   On a machine where all registers are exactly one word, a suitable definition
+   of this macro is
+
+        #define HARD_REGNO_NREGS(REGNO, MODE)            \
+           ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)  \
+            / UNITS_PER_WORD))  */
+
+/* On the FRV, make the CC_FP mode take 3 words in the integer registers, so
+   that we can build the appropriate instructions to properly reload the
+   values.  Also, make the byte-sized accumulator guards use one guard
+   for each byte.  */
+
+int
+frv_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  if (ACCG_P (regno))
+    return GET_MODE_SIZE (mode);
+  else
+    return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+
+/* A C expression for the maximum number of consecutive registers of
+   class RCLASS needed to hold a value of mode MODE.
+
+   This is closely related to the macro `HARD_REGNO_NREGS'.  In fact, the value
+   of the macro `CLASS_MAX_NREGS (RCLASS, MODE)' should be the maximum value of
+   `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class RCLASS.
+
+   This macro helps control the handling of multiple-word values in
+   the reload pass.
+
+   This declaration is required.  */
+
+int
+frv_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+{
+  if (rclass == ACCG_REGS)
+    /* An N-byte value requires N accumulator guards.  */
+    return GET_MODE_SIZE (mode);
+  else
+    return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand on the target machine.  You can assume that X satisfies
+   `CONSTANT_P', so you need not check this.  In fact, `1' is a suitable
+   definition for this macro on machines where anything `CONSTANT_P' is valid.  */
+
+int
+frv_legitimate_constant_p (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  /* frv_cannot_force_const_mem always returns true for FDPIC.  This
+     means that the move expanders will be expected to deal with most
+     kinds of constant, regardless of what we return here.
+
+     However, among its other duties, LEGITIMATE_CONSTANT_P decides whether
+     a constant can be entered into reg_equiv_constant[].  If we return true,
+     reload can create new instances of the constant whenever it likes.
+
+     The idea is therefore to accept as many constants as possible (to give
+     reload more freedom) while rejecting constants that can only be created
+     at certain times.  In particular, anything with a symbolic component will
+     require use of the pseudo FDPIC register, which is only available before
+     reload.  */
+  if (TARGET_FDPIC)
+    return LEGITIMATE_PIC_OPERAND_P (x);
+
+  /* All of the integer constants are ok.  */
+  if (GET_CODE (x) != CONST_DOUBLE)
+    return TRUE;
+
+  /* double integer constants are ok.  */
+  if (mode == VOIDmode || mode == DImode)
+    return TRUE;
+
+  /* 0 is always ok.  */
+  if (x == CONST0_RTX (mode))
+    return TRUE;
+
+  /* If floating point is just emulated, allow any constant, since it will be
+     constructed in the GPRs.  */
+  if (!TARGET_HAS_FPRS)
+    return TRUE;
+
+  if (mode == DFmode && !TARGET_DOUBLE)
+    return TRUE;
+
+  /* Otherwise store the constant away and do a load.  */
+  return FALSE;
+}
+
+/* Implement SELECT_CC_MODE.  Choose CC_FP for floating-point comparisons,
+   CC_NZ for comparisons against zero in which a single Z or N flag test
+   is enough, CC_UNS for other unsigned comparisons, and CC for other
+   signed comparisons.  */
+
+enum machine_mode
+frv_select_cc_mode (enum rtx_code code, rtx x, rtx y)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    return CC_FPmode;
+
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LT:
+    case GE:
+      return y == const0_rtx ? CC_NZmode : CCmode;
+
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      return y == const0_rtx ? CC_NZmode : CC_UNSmode;
+
+    default:
+      return CCmode;
+    }
+}
+
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+#define HIGH_COST 40
+#define MEDIUM_COST 3
+#define LOW_COST 1
+
+static int
+frv_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			reg_class_t from, reg_class_t to)
+{
+  switch (from)
+    {
+    default:
+      break;
+
+    case QUAD_REGS:
+    case EVEN_REGS:
+    case GPR_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_REGS:
+	case EVEN_REGS:
+	case GPR_REGS:
+	  return LOW_COST;
+
+	case FEVEN_REGS:
+	case FPR_REGS:
+	  return LOW_COST;
+
+	case LCR_REG:
+	case LR_REG:
+	case SPR_REGS:
+	  return LOW_COST;
+	}
+
+    case FEVEN_REGS:
+    case FPR_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_REGS:
+	case EVEN_REGS:
+	case GPR_REGS:
+	case ACC_REGS:
+	case EVEN_ACC_REGS:
+	case QUAD_ACC_REGS:
+	case ACCG_REGS:
+	  return MEDIUM_COST;
+
+	case FEVEN_REGS:
+	case FPR_REGS:
+	  return LOW_COST;
+	}
+
+    case LCR_REG:
+    case LR_REG:
+    case SPR_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case QUAD_REGS:
+	case EVEN_REGS:
+	case GPR_REGS:
+	  return MEDIUM_COST;
+	}
+
+    case ACC_REGS:
+    case EVEN_ACC_REGS:
+    case QUAD_ACC_REGS:
+    case ACCG_REGS:
+      switch (to)
+	{
+	default:
+	  break;
+
+	case FEVEN_REGS:
+	case FPR_REGS:
+	  return MEDIUM_COST;
+
+	}
+    }
+
+  return HIGH_COST;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+frv_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                      reg_class_t rclass ATTRIBUTE_UNUSED,
+                      bool in ATTRIBUTE_UNUSED)
+{
+  return 4;
+}
+
+
+/* Implementation of TARGET_ASM_INTEGER.  In the FRV case we need to
+   use ".picptr" to generate safe relocations for PIC code.  We also
+   need a fixup entry for aligned (non-debugging) code.  */
+
+static bool
+frv_assemble_integer (rtx value, unsigned int size, int aligned_p)
+{
+  if ((flag_pic || TARGET_FDPIC) && size == UNITS_PER_WORD)
+    {
+      if (GET_CODE (value) == CONST
+	  || GET_CODE (value) == SYMBOL_REF
+	  || GET_CODE (value) == LABEL_REF)
+	{
+	  if (TARGET_FDPIC && GET_CODE (value) == SYMBOL_REF
+	      && SYMBOL_REF_FUNCTION_P (value))
+	    {
+	      fputs ("\t.picptr\tfuncdesc(", asm_out_file);
+	      output_addr_const (asm_out_file, value);
+	      fputs (")\n", asm_out_file);
+	      return true;
+	    }
+	  else if (TARGET_FDPIC && GET_CODE (value) == CONST
+		   && frv_function_symbol_referenced_p (value))
+	    return false;
+	  if (aligned_p && !TARGET_FDPIC)
+	    {
+	      static int label_num = 0;
+	      char buf[256];
+	      const char *p;
+
+	      ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", label_num++);
+	      p = (* targetm.strip_name_encoding) (buf);
+
+	      fprintf (asm_out_file, "%s:\n", p);
+	      fprintf (asm_out_file, "%s\n", FIXUP_SECTION_ASM_OP);
+	      fprintf (asm_out_file, "\t.picptr\t%s\n", p);
+	      fprintf (asm_out_file, "\t.previous\n");
+	    }
+	  assemble_integer_with_op ("\t.picptr\t", value);
+	  return true;
+	}
+      if (!aligned_p)
+	{
+	  /* We've set the unaligned SI op to NULL, so we always have to
+	     handle the unaligned case here.  */
+	  assemble_integer_with_op ("\t.4byte\t", value);
+	  return true;
+	}
+    }
+  return default_assemble_integer (value, size, aligned_p);
+}
+
+/* Function to set up the backend function structure.  */
+
+static struct machine_function *
+frv_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+
+int
+frv_issue_rate (void)
+{
+  if (!TARGET_PACK)
+    return 1;
+
+  switch (frv_cpu_type)
+    {
+    default:
+    case FRV_CPU_FR300:
+    case FRV_CPU_SIMPLE:
+      return 1;
+
+    case FRV_CPU_FR400:
+    case FRV_CPU_FR405:
+    case FRV_CPU_FR450:
+      return 2;
+
+    case FRV_CPU_GENERIC:
+    case FRV_CPU_FR500:
+    case FRV_CPU_TOMCAT:
+      return 4;
+
+    case FRV_CPU_FR550:
+      return 8;
+    }
+}
+
+/* A for_each_rtx callback.  If X refers to an accumulator, return
+   ACC_GROUP_ODD if the bit 2 of the register number is set and
+   ACC_GROUP_EVEN if it is clear.  Return 0 (ACC_GROUP_NONE)
+   otherwise.  */
+
+static int
+frv_acc_group_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (REG_P (*x))
+    {
+      if (ACC_P (REGNO (*x)))
+	return (REGNO (*x) - ACC_FIRST) & 4 ? ACC_GROUP_ODD : ACC_GROUP_EVEN;
+      if (ACCG_P (REGNO (*x)))
+	return (REGNO (*x) - ACCG_FIRST) & 4 ? ACC_GROUP_ODD : ACC_GROUP_EVEN;
+    }
+  return 0;
+}
+
+/* Return the value of INSN's acc_group attribute.  */
+
+int
+frv_acc_group (rtx insn)
+{
+  /* This distinction only applies to the FR550 packing constraints.  */
+  if (frv_cpu_type != FRV_CPU_FR550)
+    return ACC_GROUP_NONE;
+  return for_each_rtx (&PATTERN (insn), frv_acc_group_1, 0);
+}
+
+/* Return the index of the DFA unit in FRV_UNIT_NAMES[] that instruction
+   INSN will try to claim first.  Since this value depends only on the
+   type attribute, we can cache the results in FRV_TYPE_TO_UNIT[].  */
+
+static unsigned int
+frv_insn_unit (rtx insn)
+{
+  enum attr_type type;
+
+  type = get_attr_type (insn);
+  if (frv_type_to_unit[type] == ARRAY_SIZE (frv_unit_codes))
+    {
+      /* We haven't seen this type of instruction before.  */
+      state_t state;
+      unsigned int unit;
+
+      /* Issue the instruction on its own to see which unit it prefers.  */
+      state = alloca (state_size ());
+      state_reset (state);
+      state_transition (state, insn);
+
+      /* Find out which unit was taken.  */
+      for (unit = 0; unit < ARRAY_SIZE (frv_unit_codes); unit++)
+	if (cpu_unit_reservation_p (state, frv_unit_codes[unit]))
+	  break;
+
+      gcc_assert (unit != ARRAY_SIZE (frv_unit_codes));
+
+      frv_type_to_unit[type] = unit;
+    }
+  return frv_type_to_unit[type];
+}
+
+/* Return true if INSN issues to a branch unit.  */
+
+static bool
+frv_issues_to_branch_unit_p (rtx insn)
+{
+  return frv_unit_groups[frv_insn_unit (insn)] == GROUP_B;
+}
+
+/* The instructions in the packet, partitioned into groups.  */
+struct frv_packet_group {
+  /* How many instructions in the packet belong to this group.  */
+  unsigned int num_insns;
+
+  /* A list of the instructions that belong to this group, in the order
+     they appear in the rtl stream.  */
+  rtx insns[ARRAY_SIZE (frv_unit_codes)];
+
+  /* The contents of INSNS after they have been sorted into the correct
+     assembly-language order.  Element X issues to unit X.  The list may
+     contain extra nops.  */
+  rtx sorted[ARRAY_SIZE (frv_unit_codes)];
+
+  /* The member of frv_nops[] to use in sorted[].  */
+  rtx nop;
+};
+
+/* The current state of the packing pass, implemented by frv_pack_insns.  */
+static struct {
+  /* The state of the pipeline DFA.  */
+  state_t dfa_state;
+
+  /* Which hardware registers are set within the current packet,
+     and the conditions under which they are set.  */
+  regstate_t regstate[FIRST_PSEUDO_REGISTER];
+
+  /* The memory locations that have been modified so far in this
+     packet.  MEM is the memref and COND is the regstate_t condition
+     under which it is set.  */
+  struct {
+    rtx mem;
+    regstate_t cond;
+  } mems[2];
+
+  /* The number of valid entries in MEMS.  The value is larger than
+     ARRAY_SIZE (mems) if there were too many mems to record.  */
+  unsigned int num_mems;
+
+  /* The maximum number of instructions that can be packed together.  */
+  unsigned int issue_rate;
+
+  /* The instructions in the packet, partitioned into groups.  */
+  struct frv_packet_group groups[NUM_GROUPS];
+
+  /* The instructions that make up the current packet.  */
+  rtx insns[ARRAY_SIZE (frv_unit_codes)];
+  unsigned int num_insns;
+} frv_packet;
+
+/* Return the regstate_t flags for the given COND_EXEC condition.
+   Abort if the condition isn't in the right form.  */
+
+static int
+frv_cond_flags (rtx cond)
+{
+  gcc_assert ((GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
+	      && GET_CODE (XEXP (cond, 0)) == REG
+	      && CR_P (REGNO (XEXP (cond, 0)))
+	      && XEXP (cond, 1) == const0_rtx);
+  return ((REGNO (XEXP (cond, 0)) - CR_FIRST)
+	  | (GET_CODE (cond) == NE
+	     ? REGSTATE_IF_TRUE
+	     : REGSTATE_IF_FALSE));
+}
+
+
+/* Return true if something accessed under condition COND2 can
+   conflict with something written under condition COND1.  */
+
+static bool
+frv_regstate_conflict_p (regstate_t cond1, regstate_t cond2)
+{
+  /* If either reference was unconditional, we have a conflict.  */
+  if ((cond1 & REGSTATE_IF_EITHER) == 0
+      || (cond2 & REGSTATE_IF_EITHER) == 0)
+    return true;
+
+  /* The references might conflict if they were controlled by
+     different CRs.  */
+  if ((cond1 & REGSTATE_CC_MASK) != (cond2 & REGSTATE_CC_MASK))
+    return true;
+
+  /* They definitely conflict if they are controlled by the
+     same condition.  */
+  if ((cond1 & cond2 & REGSTATE_IF_EITHER) != 0)
+    return true;
+
+  return false;
+}
+
+
+/* A for_each_rtx callback.  Return 1 if *X depends on an instruction in
+   the current packet.  DATA points to a regstate_t that describes the
+   condition under which *X might be set or used.  */
+
+static int
+frv_registers_conflict_p_1 (rtx *x, void *data)
+{
+  unsigned int regno, i;
+  regstate_t cond;
+
+  cond = *(regstate_t *) data;
+
+  if (GET_CODE (*x) == REG)
+    FOR_EACH_REGNO (regno, *x)
+      if ((frv_packet.regstate[regno] & REGSTATE_MODIFIED) != 0)
+	if (frv_regstate_conflict_p (frv_packet.regstate[regno], cond))
+	  return 1;
+
+  if (GET_CODE (*x) == MEM)
+    {
+      /* If we ran out of memory slots, assume a conflict.  */
+      if (frv_packet.num_mems > ARRAY_SIZE (frv_packet.mems))
+	return 1;
+
+      /* Check for output or true dependencies with earlier MEMs.  */
+      for (i = 0; i < frv_packet.num_mems; i++)
+	if (frv_regstate_conflict_p (frv_packet.mems[i].cond, cond))
+	  {
+	    if (true_dependence (frv_packet.mems[i].mem, VOIDmode,
+				 *x, rtx_varies_p))
+	      return 1;
+
+	    if (output_dependence (frv_packet.mems[i].mem, *x))
+	      return 1;
+	  }
+    }
+
+  /* The return values of calls aren't significant: they describe
+     the effect of the call as a whole, not of the insn itself.  */
+  if (GET_CODE (*x) == SET && GET_CODE (SET_SRC (*x)) == CALL)
+    {
+      if (for_each_rtx (&SET_SRC (*x), frv_registers_conflict_p_1, data))
+	return 1;
+      return -1;
+    }
+
+  /* Check subexpressions.  */
+  return 0;
+}
+
+
+/* Return true if something in X might depend on an instruction
+   in the current packet.  */
+
+static bool
+frv_registers_conflict_p (rtx x)
+{
+  regstate_t flags;
+
+  flags = 0;
+  if (GET_CODE (x) == COND_EXEC)
+    {
+      if (for_each_rtx (&XEXP (x, 0), frv_registers_conflict_p_1, &flags))
+	return true;
+
+      flags |= frv_cond_flags (XEXP (x, 0));
+      x = XEXP (x, 1);
+    }
+  return for_each_rtx (&x, frv_registers_conflict_p_1, &flags);
+}
+
+
+/* A note_stores callback.  DATA points to the regstate_t condition
+   under which X is modified.  Update FRV_PACKET accordingly.  */
+
+static void
+frv_registers_update_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  unsigned int regno;
+
+  if (GET_CODE (x) == REG)
+    FOR_EACH_REGNO (regno, x)
+      frv_packet.regstate[regno] |= *(regstate_t *) data;
+
+  if (GET_CODE (x) == MEM)
+    {
+      if (frv_packet.num_mems < ARRAY_SIZE (frv_packet.mems))
+	{
+	  frv_packet.mems[frv_packet.num_mems].mem = x;
+	  frv_packet.mems[frv_packet.num_mems].cond = *(regstate_t *) data;
+	}
+      frv_packet.num_mems++;
+    }
+}
+
+
+/* Update the register state information for an instruction whose
+   body is X.  */
+
+static void
+frv_registers_update (rtx x)
+{
+  regstate_t flags;
+
+  flags = REGSTATE_MODIFIED;
+  if (GET_CODE (x) == COND_EXEC)
+    {
+      flags |= frv_cond_flags (XEXP (x, 0));
+      x = XEXP (x, 1);
+    }
+  note_stores (x, frv_registers_update_1, &flags);
+}
+
+
+/* Initialize frv_packet for the start of a new packet.  */
+
+static void
+frv_start_packet (void)
+{
+  enum frv_insn_group group;
+
+  memset (frv_packet.regstate, 0, sizeof (frv_packet.regstate));
+  frv_packet.num_mems = 0;
+  frv_packet.num_insns = 0;
+  for (group =  GROUP_I; group < NUM_GROUPS;
+       group = (enum frv_insn_group) (group + 1))
+    frv_packet.groups[group].num_insns = 0;
+}
+
+
+/* Likewise for the start of a new basic block.  */
+
+static void
+frv_start_packet_block (void)
+{
+  state_reset (frv_packet.dfa_state);
+  frv_start_packet ();
+}
+
+
+/* Finish the current packet, if any, and start a new one.  Call
+   HANDLE_PACKET with FRV_PACKET describing the completed packet.  */
+
+static void
+frv_finish_packet (void (*handle_packet) (void))
+{
+  if (frv_packet.num_insns > 0)
+    {
+      handle_packet ();
+      state_transition (frv_packet.dfa_state, 0);
+      frv_start_packet ();
+    }
+}
+
+
+/* Return true if INSN can be added to the current packet.  Update
+   the DFA state on success.  */
+
+static bool
+frv_pack_insn_p (rtx insn)
+{
+  /* See if the packet is already as long as it can be.  */
+  if (frv_packet.num_insns == frv_packet.issue_rate)
+    return false;
+
+  /* If the scheduler thought that an instruction should start a packet,
+     it's usually a good idea to believe it.  It knows much more about
+     the latencies than we do.
+
+     There are some exceptions though:
+
+       - Conditional instructions are scheduled on the assumption that
+	 they will be executed.  This is usually a good thing, since it
+	 tends to avoid unnecessary stalls in the conditional code.
+	 But we want to pack conditional instructions as tightly as
+	 possible, in order to optimize the case where they aren't
+	 executed.
+
+       - The scheduler will always put branches on their own, even
+	 if there's no real dependency.
+
+       - There's no point putting a call in its own packet unless
+	 we have to.  */
+  if (frv_packet.num_insns > 0
+      && GET_CODE (insn) == INSN
+      && GET_MODE (insn) == TImode
+      && GET_CODE (PATTERN (insn)) != COND_EXEC)
+    return false;
+
+  /* Check for register conflicts.  Don't do this for setlo since any
+     conflict will be with the partnering sethi, with which it can
+     be packed.  */
+  if (get_attr_type (insn) != TYPE_SETLO)
+    if (frv_registers_conflict_p (PATTERN (insn)))
+      return false;
+
+  return state_transition (frv_packet.dfa_state, insn) < 0;
+}
+
+
+/* Add instruction INSN to the current packet.  */
+
+static void
+frv_add_insn_to_packet (rtx insn)
+{
+  struct frv_packet_group *packet_group;
+
+  packet_group = &frv_packet.groups[frv_unit_groups[frv_insn_unit (insn)]];
+  packet_group->insns[packet_group->num_insns++] = insn;
+  frv_packet.insns[frv_packet.num_insns++] = insn;
+
+  frv_registers_update (PATTERN (insn));
+}
+
+
+/* Insert INSN (a member of frv_nops[]) into the current packet.  If the
+   packet ends in a branch or call, insert the nop before it, otherwise
+   add to the end.  */
+
+static void
+frv_insert_nop_in_packet (rtx insn)
+{
+  struct frv_packet_group *packet_group;
+  rtx last;
+
+  packet_group = &frv_packet.groups[frv_unit_groups[frv_insn_unit (insn)]];
+  last = frv_packet.insns[frv_packet.num_insns - 1];
+  if (GET_CODE (last) != INSN)
+    {
+      insn = emit_insn_before (PATTERN (insn), last);
+      frv_packet.insns[frv_packet.num_insns - 1] = insn;
+      frv_packet.insns[frv_packet.num_insns++] = last;
+    }
+  else
+    {
+      insn = emit_insn_after (PATTERN (insn), last);
+      frv_packet.insns[frv_packet.num_insns++] = insn;
+    }
+  packet_group->insns[packet_group->num_insns++] = insn;
+}
+
+
+/* If packing is enabled, divide the instructions into packets and
+   return true.  Call HANDLE_PACKET for each complete packet.  */
+
+static bool
+frv_for_each_packet (void (*handle_packet) (void))
+{
+  rtx insn, next_insn;
+
+  frv_packet.issue_rate = frv_issue_rate ();
+
+  /* Early exit if we don't want to pack insns.  */
+  if (!optimize
+      || !flag_schedule_insns_after_reload
+      || !TARGET_VLIW_BRANCH
+      || frv_packet.issue_rate == 1)
+    return false;
+
+  /* Set up the initial packing state.  */
+  dfa_start ();
+  frv_packet.dfa_state = alloca (state_size ());
+
+  frv_start_packet_block ();
+  for (insn = get_insns (); insn != 0; insn = next_insn)
+    {
+      enum rtx_code code;
+      bool eh_insn_p;
+
+      code = GET_CODE (insn);
+      next_insn = NEXT_INSN (insn);
+
+      if (code == CODE_LABEL)
+	{
+	  frv_finish_packet (handle_packet);
+	  frv_start_packet_block ();
+	}
+
+      if (INSN_P (insn))
+	switch (GET_CODE (PATTERN (insn)))
+	  {
+	  case USE:
+	  case CLOBBER:
+	  case ADDR_VEC:
+	  case ADDR_DIFF_VEC:
+	    break;
+
+	  default:
+	    /* Calls mustn't be packed on a TOMCAT.  */
+	    if (GET_CODE (insn) == CALL_INSN && frv_cpu_type == FRV_CPU_TOMCAT)
+	      frv_finish_packet (handle_packet);
+
+	    /* Since the last instruction in a packet determines the EH
+	       region, any exception-throwing instruction must come at
+	       the end of reordered packet.  Insns that issue to a
+	       branch unit are bound to come last; for others it's
+	       too hard to predict.  */
+	    eh_insn_p = (find_reg_note (insn, REG_EH_REGION, NULL) != NULL);
+	    if (eh_insn_p && !frv_issues_to_branch_unit_p (insn))
+	      frv_finish_packet (handle_packet);
+
+	    /* Finish the current packet if we can't add INSN to it.
+	       Simulate cycles until INSN is ready to issue.  */
+	    if (!frv_pack_insn_p (insn))
+	      {
+		frv_finish_packet (handle_packet);
+		while (!frv_pack_insn_p (insn))
+		  state_transition (frv_packet.dfa_state, 0);
+	      }
+
+	    /* Add the instruction to the packet.  */
+	    frv_add_insn_to_packet (insn);
+
+	    /* Calls and jumps end a packet, as do insns that throw
+	       an exception.  */
+	    if (code == CALL_INSN || code == JUMP_INSN || eh_insn_p)
+	      frv_finish_packet (handle_packet);
+	    break;
+	  }
+    }
+  frv_finish_packet (handle_packet);
+  dfa_finish ();
+  return true;
+}
+
+/* Subroutine of frv_sort_insn_group.  We are trying to sort
+   frv_packet.groups[GROUP].sorted[0...NUM_INSNS-1] into assembly
+   language order.  We have already picked a new position for
+   frv_packet.groups[GROUP].sorted[X] if bit X of ISSUED is set.
+   These instructions will occupy elements [0, LOWER_SLOT) and
+   [UPPER_SLOT, NUM_INSNS) of the final (sorted) array.  STATE is
+   the DFA state after issuing these instructions.
+
+   Try filling elements [LOWER_SLOT, UPPER_SLOT) with every permutation
+   of the unused instructions.  Return true if one such permutation gives
+   a valid ordering, leaving the successful permutation in sorted[].
+   Do not modify sorted[] until a valid permutation is found.  */
+
+static bool
+frv_sort_insn_group_1 (enum frv_insn_group group,
+		       unsigned int lower_slot, unsigned int upper_slot,
+		       unsigned int issued, unsigned int num_insns,
+		       state_t state)
+{
+  struct frv_packet_group *packet_group;
+  unsigned int i;
+  state_t test_state;
+  size_t dfa_size;
+  rtx insn;
+
+  /* Early success if we've filled all the slots.  */
+  if (lower_slot == upper_slot)
+    return true;
+
+  packet_group = &frv_packet.groups[group];
+  dfa_size = state_size ();
+  test_state = alloca (dfa_size);
+
+  /* Try issuing each unused instruction.  */
+  for (i = num_insns - 1; i + 1 != 0; i--)
+    if (~issued & (1 << i))
+      {
+	insn = packet_group->sorted[i];
+	memcpy (test_state, state, dfa_size);
+	if (state_transition (test_state, insn) < 0
+	    && cpu_unit_reservation_p (test_state,
+				       NTH_UNIT (group, upper_slot - 1))
+	    && frv_sort_insn_group_1 (group, lower_slot, upper_slot - 1,
+				      issued | (1 << i), num_insns,
+				      test_state))
+	  {
+	    packet_group->sorted[upper_slot - 1] = insn;
+	    return true;
+	  }
+      }
+
+  return false;
+}
+
+/* Compare two instructions by their frv_insn_unit.  */
+
+static int
+frv_compare_insns (const void *first, const void *second)
+{
+  const rtx *const insn1 = (rtx const *) first,
+    *const insn2 = (rtx const *) second;
+  return frv_insn_unit (*insn1) - frv_insn_unit (*insn2);
+}
+
+/* Copy frv_packet.groups[GROUP].insns[] to frv_packet.groups[GROUP].sorted[]
+   and sort it into assembly language order.  See frv.md for a description of
+   the algorithm.  */
+
+static void
+frv_sort_insn_group (enum frv_insn_group group)
+{
+  struct frv_packet_group *packet_group;
+  unsigned int first, i, nop, max_unit, num_slots;
+  state_t state, test_state;
+  size_t dfa_size;
+
+  packet_group = &frv_packet.groups[group];
+
+  /* Assume no nop is needed.  */
+  packet_group->nop = 0;
+
+  if (packet_group->num_insns == 0)
+    return;
+
+  /* Copy insns[] to sorted[].  */
+  memcpy (packet_group->sorted, packet_group->insns,
+	  sizeof (rtx) * packet_group->num_insns);
+
+  /* Sort sorted[] by the unit that each insn tries to take first.  */
+  if (packet_group->num_insns > 1)
+    qsort (packet_group->sorted, packet_group->num_insns,
+	   sizeof (rtx), frv_compare_insns);
+
+  /* That's always enough for branch and control insns.  */
+  if (group == GROUP_B || group == GROUP_C)
+    return;
+
+  dfa_size = state_size ();
+  state = alloca (dfa_size);
+  test_state = alloca (dfa_size);
+
+  /* Find the highest FIRST such that sorted[0...FIRST-1] can issue
+     consecutively and such that the DFA takes unit X when sorted[X]
+     is added.  Set STATE to the new DFA state.  */
+  state_reset (test_state);
+  for (first = 0; first < packet_group->num_insns; first++)
+    {
+      memcpy (state, test_state, dfa_size);
+      if (state_transition (test_state, packet_group->sorted[first]) >= 0
+	  || !cpu_unit_reservation_p (test_state, NTH_UNIT (group, first)))
+	break;
+    }
+
+  /* If all the instructions issued in ascending order, we're done.  */
+  if (first == packet_group->num_insns)
+    return;
+
+  /* Add nops to the end of sorted[] and try each permutation until
+     we find one that works.  */
+  for (nop = 0; nop < frv_num_nops; nop++)
+    {
+      max_unit = frv_insn_unit (frv_nops[nop]);
+      if (frv_unit_groups[max_unit] == group)
+	{
+	  packet_group->nop = frv_nops[nop];
+	  num_slots = UNIT_NUMBER (max_unit) + 1;
+	  for (i = packet_group->num_insns; i < num_slots; i++)
+	    packet_group->sorted[i] = frv_nops[nop];
+	  if (frv_sort_insn_group_1 (group, first, num_slots,
+				     (1 << first) - 1, num_slots, state))
+	    return;
+	}
+    }
+  gcc_unreachable ();
+}
+
+/* Sort the current packet into assembly-language order.  Set packing
+   flags as appropriate.  */
+
+static void
+frv_reorder_packet (void)
+{
+  unsigned int cursor[NUM_GROUPS];
+  rtx insns[ARRAY_SIZE (frv_unit_groups)];
+  unsigned int unit, to, from;
+  enum frv_insn_group group;
+  struct frv_packet_group *packet_group;
+
+  /* First sort each group individually.  */
+  for (group = GROUP_I; group < NUM_GROUPS;
+       group = (enum frv_insn_group) (group + 1))
+    {
+      cursor[group] = 0;
+      frv_sort_insn_group (group);
+    }
+
+  /* Go through the unit template and try add an instruction from
+     that unit's group.  */
+  to = 0;
+  for (unit = 0; unit < ARRAY_SIZE (frv_unit_groups); unit++)
+    {
+      group = frv_unit_groups[unit];
+      packet_group = &frv_packet.groups[group];
+      if (cursor[group] < packet_group->num_insns)
+	{
+	  /* frv_reorg should have added nops for us.  */
+	  gcc_assert (packet_group->sorted[cursor[group]]
+		      != packet_group->nop);
+	  insns[to++] = packet_group->sorted[cursor[group]++];
+	}
+    }
+
+  gcc_assert (to == frv_packet.num_insns);
+
+  /* Clear the last instruction's packing flag, thus marking the end of
+     a packet.  Reorder the other instructions relative to it.  */
+  CLEAR_PACKING_FLAG (insns[to - 1]);
+  for (from = 0; from < to - 1; from++)
+    {
+      remove_insn (insns[from]);
+      add_insn_before (insns[from], insns[to - 1], NULL);
+      SET_PACKING_FLAG (insns[from]);
+    }
+}
+
+
+/* Divide instructions into packets.  Reorder the contents of each
+   packet so that they are in the correct assembly-language order.
+
+   Since this pass can change the raw meaning of the rtl stream, it must
+   only be called at the last minute, just before the instructions are
+   written out.  */
+
+static void
+frv_pack_insns (void)
+{
+  if (frv_for_each_packet (frv_reorder_packet))
+    frv_insn_packing_flag = 0;
+  else
+    frv_insn_packing_flag = -1;
+}
+
+/* See whether we need to add nops to group GROUP in order to
+   make a valid packet.  */
+
+static void
+frv_fill_unused_units (enum frv_insn_group group)
+{
+  unsigned int non_nops, nops, i;
+  struct frv_packet_group *packet_group;
+
+  packet_group = &frv_packet.groups[group];
+
+  /* Sort the instructions into assembly-language order.
+     Use nops to fill slots that are otherwise unused.  */
+  frv_sort_insn_group (group);
+
+  /* See how many nops are needed before the final useful instruction.  */
+  i = nops = 0;
+  for (non_nops = 0; non_nops < packet_group->num_insns; non_nops++)
+    while (packet_group->sorted[i++] == packet_group->nop)
+      nops++;
+
+  /* Insert that many nops into the instruction stream.  */
+  while (nops-- > 0)
+    frv_insert_nop_in_packet (packet_group->nop);
+}
+
+/* Return true if accesses IO1 and IO2 refer to the same doubleword.  */
+
+static bool
+frv_same_doubleword_p (const struct frv_io *io1, const struct frv_io *io2)
+{
+  if (io1->const_address != 0 && io2->const_address != 0)
+    return io1->const_address == io2->const_address;
+
+  if (io1->var_address != 0 && io2->var_address != 0)
+    return rtx_equal_p (io1->var_address, io2->var_address);
+
+  return false;
+}
+
+/* Return true if operations IO1 and IO2 are guaranteed to complete
+   in order.  */
+
+static bool
+frv_io_fixed_order_p (const struct frv_io *io1, const struct frv_io *io2)
+{
+  /* The order of writes is always preserved.  */
+  if (io1->type == FRV_IO_WRITE && io2->type == FRV_IO_WRITE)
+    return true;
+
+  /* The order of reads isn't preserved.  */
+  if (io1->type != FRV_IO_WRITE && io2->type != FRV_IO_WRITE)
+    return false;
+
+  /* One operation is a write and the other is (or could be) a read.
+     The order is only guaranteed if the accesses are to the same
+     doubleword.  */
+  return frv_same_doubleword_p (io1, io2);
+}
+
+/* Generalize I/O operation X so that it covers both X and Y. */
+
+static void
+frv_io_union (struct frv_io *x, const struct frv_io *y)
+{
+  if (x->type != y->type)
+    x->type = FRV_IO_UNKNOWN;
+  if (!frv_same_doubleword_p (x, y))
+    {
+      x->const_address = 0;
+      x->var_address = 0;
+    }
+}
+
+/* Fill IO with information about the load or store associated with
+   membar instruction INSN.  */
+
+static void
+frv_extract_membar (struct frv_io *io, rtx insn)
+{
+  extract_insn (insn);
+  io->type = (enum frv_io_type) INTVAL (recog_data.operand[2]);
+  io->const_address = INTVAL (recog_data.operand[1]);
+  io->var_address = XEXP (recog_data.operand[0], 0);
+}
+
+/* A note_stores callback for which DATA points to an rtx.  Nullify *DATA
+   if X is a register and *DATA depends on X.  */
+
+static void
+frv_io_check_address (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx *other = (rtx *) data;
+
+  if (REG_P (x) && *other != 0 && reg_overlap_mentioned_p (x, *other))
+    *other = 0;
+}
+
+/* A note_stores callback for which DATA points to a HARD_REG_SET.
+   Remove every modified register from the set.  */
+
+static void
+frv_io_handle_set (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  HARD_REG_SET *set = (HARD_REG_SET *) data;
+  unsigned int regno;
+
+  if (REG_P (x))
+    FOR_EACH_REGNO (regno, x)
+      CLEAR_HARD_REG_BIT (*set, regno);
+}
+
+/* A for_each_rtx callback for which DATA points to a HARD_REG_SET.
+   Add every register in *X to the set.  */
+
+static int
+frv_io_handle_use_1 (rtx *x, void *data)
+{
+  HARD_REG_SET *set = (HARD_REG_SET *) data;
+  unsigned int regno;
+
+  if (REG_P (*x))
+    FOR_EACH_REGNO (regno, *x)
+      SET_HARD_REG_BIT (*set, regno);
+
+  return 0;
+}
+
+/* A note_stores callback that applies frv_io_handle_use_1 to an
+   entire rhs value.  */
+
+static void
+frv_io_handle_use (rtx *x, void *data)
+{
+  for_each_rtx (x, frv_io_handle_use_1, data);
+}
+
+/* Go through block BB looking for membars to remove.  There are two
+   cases where intra-block analysis is enough:
+
+   - a membar is redundant if it occurs between two consecutive I/O
+   operations and if those operations are guaranteed to complete
+   in order.
+
+   - a membar for a __builtin_read is redundant if the result is
+   used before the next I/O operation is issued.
+
+   If the last membar in the block could not be removed, and there
+   are guaranteed to be no I/O operations between that membar and
+   the end of the block, store the membar in *LAST_MEMBAR, otherwise
+   store null.
+
+   Describe the block's first I/O operation in *NEXT_IO.  Describe
+   an unknown operation if the block doesn't do any I/O.  */
+
+static void
+frv_optimize_membar_local (basic_block bb, struct frv_io *next_io,
+			   rtx *last_membar)
+{
+  HARD_REG_SET used_regs;
+  rtx next_membar, set, insn;
+  bool next_is_end_p;
+
+  /* NEXT_IO is the next I/O operation to be performed after the current
+     instruction.  It starts off as being an unknown operation.  */
+  memset (next_io, 0, sizeof (*next_io));
+
+  /* NEXT_IS_END_P is true if NEXT_IO describes the end of the block.  */
+  next_is_end_p = true;
+
+  /* If the current instruction is a __builtin_read or __builtin_write,
+     NEXT_MEMBAR is the membar instruction associated with it.  NEXT_MEMBAR
+     is null if the membar has already been deleted.
+
+     Note that the initialization here should only be needed to
+     suppress warnings.  */
+  next_membar = 0;
+
+  /* USED_REGS is the set of registers that are used before the
+     next I/O instruction.  */
+  CLEAR_HARD_REG_SET (used_regs);
+
+  for (insn = BB_END (bb); insn != BB_HEAD (bb); insn = PREV_INSN (insn))
+    if (GET_CODE (insn) == CALL_INSN)
+      {
+	/* We can't predict what a call will do to volatile memory.  */
+	memset (next_io, 0, sizeof (struct frv_io));
+	next_is_end_p = false;
+	CLEAR_HARD_REG_SET (used_regs);
+      }
+    else if (INSN_P (insn))
+      switch (recog_memoized (insn))
+	{
+	case CODE_FOR_optional_membar_qi:
+	case CODE_FOR_optional_membar_hi:
+	case CODE_FOR_optional_membar_si:
+	case CODE_FOR_optional_membar_di:
+	  next_membar = insn;
+	  if (next_is_end_p)
+	    {
+	      /* Local information isn't enough to decide whether this
+		 membar is needed.  Stash it away for later.  */
+	      *last_membar = insn;
+	      frv_extract_membar (next_io, insn);
+	      next_is_end_p = false;
+	    }
+	  else
+	    {
+	      /* Check whether the I/O operation before INSN could be
+		 reordered with one described by NEXT_IO.  If it can't,
+		 INSN will not be needed.  */
+	      struct frv_io prev_io;
+
+	      frv_extract_membar (&prev_io, insn);
+	      if (frv_io_fixed_order_p (&prev_io, next_io))
+		{
+		  if (dump_file)
+		    fprintf (dump_file,
+			     ";; [Local] Removing membar %d since order"
+			     " of accesses is guaranteed\n",
+			     INSN_UID (next_membar));
+
+		  insn = NEXT_INSN (insn);
+		  delete_insn (next_membar);
+		  next_membar = 0;
+		}
+	      *next_io = prev_io;
+	    }
+	  break;
+
+	default:
+	  /* Invalidate NEXT_IO's address if it depends on something that
+	     is clobbered by INSN.  */
+	  if (next_io->var_address)
+	    note_stores (PATTERN (insn), frv_io_check_address,
+			 &next_io->var_address);
+
+	  /* If the next membar is associated with a __builtin_read,
+	     see if INSN reads from that address.  If it does, and if
+	     the destination register is used before the next I/O access,
+	     there is no need for the membar.  */
+	  set = PATTERN (insn);
+	  if (next_io->type == FRV_IO_READ
+	      && next_io->var_address != 0
+	      && next_membar != 0
+	      && GET_CODE (set) == SET
+	      && GET_CODE (SET_DEST (set)) == REG
+	      && TEST_HARD_REG_BIT (used_regs, REGNO (SET_DEST (set))))
+	    {
+	      rtx src;
+
+	      src = SET_SRC (set);
+	      if (GET_CODE (src) == ZERO_EXTEND)
+		src = XEXP (src, 0);
+
+	      if (GET_CODE (src) == MEM
+		  && rtx_equal_p (XEXP (src, 0), next_io->var_address))
+		{
+		  if (dump_file)
+		    fprintf (dump_file,
+			     ";; [Local] Removing membar %d since the target"
+			     " of %d is used before the I/O operation\n",
+			     INSN_UID (next_membar), INSN_UID (insn));
+
+		  if (next_membar == *last_membar)
+		    *last_membar = 0;
+
+		  delete_insn (next_membar);
+		  next_membar = 0;
+		}
+	    }
+
+	  /* If INSN has volatile references, forget about any registers
+	     that are used after it.  Otherwise forget about uses that
+	     are (or might be) defined by INSN.  */
+	  if (volatile_refs_p (PATTERN (insn)))
+	    CLEAR_HARD_REG_SET (used_regs);
+	  else
+	    note_stores (PATTERN (insn), frv_io_handle_set, &used_regs);
+
+	  note_uses (&PATTERN (insn), frv_io_handle_use, &used_regs);
+	  break;
+	}
+}
+
+/* See if MEMBAR, the last membar instruction in BB, can be removed.
+   FIRST_IO[X] describes the first operation performed by basic block X.  */
+
+static void
+frv_optimize_membar_global (basic_block bb, struct frv_io *first_io,
+			    rtx membar)
+{
+  struct frv_io this_io, next_io;
+  edge succ;
+  edge_iterator ei;
+
+  /* We need to keep the membar if there is an edge to the exit block.  */
+  FOR_EACH_EDGE (succ, ei, bb->succs)
+  /* for (succ = bb->succ; succ != 0; succ = succ->succ_next) */
+    if (succ->dest == EXIT_BLOCK_PTR)
+      return;
+
+  /* Work out the union of all successor blocks.  */
+  ei = ei_start (bb->succs);
+  ei_cond (ei, &succ);
+  /* next_io = first_io[bb->succ->dest->index]; */
+  next_io = first_io[succ->dest->index];
+  ei = ei_start (bb->succs);
+  if (ei_cond (ei, &succ))
+    {
+      for (ei_next (&ei); ei_cond (ei, &succ); ei_next (&ei))
+	/*for (succ = bb->succ->succ_next; succ != 0; succ = succ->succ_next)*/
+	frv_io_union (&next_io, &first_io[succ->dest->index]);
+    }
+  else
+    gcc_unreachable ();
+
+  frv_extract_membar (&this_io, membar);
+  if (frv_io_fixed_order_p (&this_io, &next_io))
+    {
+      if (dump_file)
+	fprintf (dump_file,
+		 ";; [Global] Removing membar %d since order of accesses"
+		 " is guaranteed\n", INSN_UID (membar));
+
+      delete_insn (membar);
+    }
+}
+
+/* Remove redundant membars from the current function.  */
+
+static void
+frv_optimize_membar (void)
+{
+  basic_block bb;
+  struct frv_io *first_io;
+  rtx *last_membar;
+
+  compute_bb_for_insn ();
+  first_io = XCNEWVEC (struct frv_io, last_basic_block);
+  last_membar = XCNEWVEC (rtx, last_basic_block);
+
+  FOR_EACH_BB (bb)
+    frv_optimize_membar_local (bb, &first_io[bb->index],
+			       &last_membar[bb->index]);
+
+  FOR_EACH_BB (bb)
+    if (last_membar[bb->index] != 0)
+      frv_optimize_membar_global (bb, first_io, last_membar[bb->index]);
+
+  free (first_io);
+  free (last_membar);
+}
+
+/* Used by frv_reorg to keep track of the current packet's address.  */
+static unsigned int frv_packet_address;
+
+/* If the current packet falls through to a label, try to pad the packet
+   with nops in order to fit the label's alignment requirements.  */
+
+static void
+frv_align_label (void)
+{
+  unsigned int alignment, target, nop;
+  rtx x, last, barrier, label;
+
+  /* Walk forward to the start of the next packet.  Set ALIGNMENT to the
+     maximum alignment of that packet, LABEL to the last label between
+     the packets, and BARRIER to the last barrier.  */
+  last = frv_packet.insns[frv_packet.num_insns - 1];
+  label = barrier = 0;
+  alignment = 4;
+  for (x = NEXT_INSN (last); x != 0 && !INSN_P (x); x = NEXT_INSN (x))
+    {
+      if (LABEL_P (x))
+	{
+	  unsigned int subalign = 1 << label_to_alignment (x);
+	  alignment = MAX (alignment, subalign);
+	  label = x;
+	}
+      if (BARRIER_P (x))
+	barrier = x;
+    }
+
+  /* If -malign-labels, and the packet falls through to an unaligned
+     label, try introducing a nop to align that label to 8 bytes.  */
+  if (TARGET_ALIGN_LABELS
+      && label != 0
+      && barrier == 0
+      && frv_packet.num_insns < frv_packet.issue_rate)
+    alignment = MAX (alignment, 8);
+
+  /* Advance the address to the end of the current packet.  */
+  frv_packet_address += frv_packet.num_insns * 4;
+
+  /* Work out the target address, after alignment.  */
+  target = (frv_packet_address + alignment - 1) & -alignment;
+
+  /* If the packet falls through to the label, try to find an efficient
+     padding sequence.  */
+  if (barrier == 0)
+    {
+      /* First try adding nops to the current packet.  */
+      for (nop = 0; nop < frv_num_nops; nop++)
+	while (frv_packet_address < target && frv_pack_insn_p (frv_nops[nop]))
+	  {
+	    frv_insert_nop_in_packet (frv_nops[nop]);
+	    frv_packet_address += 4;
+	  }
+
+      /* If we still haven't reached the target, add some new packets that
+	 contain only nops.  If there are two types of nop, insert an
+	 alternating sequence of frv_nops[0] and frv_nops[1], which will
+	 lead to packets like:
+
+		nop.p
+		mnop.p/fnop.p
+		nop.p
+		mnop/fnop
+
+	 etc.  Just emit frv_nops[0] if that's the only nop we have.  */
+      last = frv_packet.insns[frv_packet.num_insns - 1];
+      nop = 0;
+      while (frv_packet_address < target)
+	{
+	  last = emit_insn_after (PATTERN (frv_nops[nop]), last);
+	  frv_packet_address += 4;
+	  if (frv_num_nops > 1)
+	    nop ^= 1;
+	}
+    }
+
+  frv_packet_address = target;
+}
+
+/* Subroutine of frv_reorg, called after each packet has been constructed
+   in frv_packet.  */
+
+static void
+frv_reorg_packet (void)
+{
+  frv_fill_unused_units (GROUP_I);
+  frv_fill_unused_units (GROUP_FM);
+  frv_align_label ();
+}
+
+/* Add an instruction with pattern NOP to frv_nops[].  */
+
+static void
+frv_register_nop (rtx nop)
+{
+  nop = make_insn_raw (nop);
+  NEXT_INSN (nop) = 0;
+  PREV_INSN (nop) = 0;
+  frv_nops[frv_num_nops++] = nop;
+}
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  Divide the instructions
+   into packets and check whether we need to insert nops in order to
+   fulfill the processor's issue requirements.  Also, if the user has
+   requested a certain alignment for a label, try to meet that alignment
+   by inserting nops in the previous packet.  */
+
+static void
+frv_reorg (void)
+{
+  if (optimize > 0 && TARGET_OPTIMIZE_MEMBAR && cfun->machine->has_membar_p)
+    frv_optimize_membar ();
+
+  frv_num_nops = 0;
+  frv_register_nop (gen_nop ());
+  if (TARGET_MEDIA)
+    frv_register_nop (gen_mnop ());
+  if (TARGET_HARD_FLOAT)
+    frv_register_nop (gen_fnop ());
+
+  /* Estimate the length of each branch.  Although this may change after
+     we've inserted nops, it will only do so in big functions.  */
+  shorten_branches (get_insns ());
+
+  frv_packet_address = 0;
+  frv_for_each_packet (frv_reorg_packet);
+}
+
+#define def_builtin(name, type, code) \
+  add_builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL)
+
+struct builtin_description
+{
+  enum insn_code icode;
+  const char *name;
+  enum frv_builtins code;
+  enum rtx_code comparison;
+  unsigned int flag;
+};
+
+/* Media intrinsics that take a single, constant argument.  */
+
+static struct builtin_description bdesc_set[] =
+{
+  { CODE_FOR_mhdsets, "__MHDSETS", FRV_BUILTIN_MHDSETS, UNKNOWN, 0 }
+};
+
+/* Media intrinsics that take just one argument.  */
+
+static struct builtin_description bdesc_1arg[] =
+{
+  { CODE_FOR_mnot, "__MNOT", FRV_BUILTIN_MNOT, UNKNOWN, 0 },
+  { CODE_FOR_munpackh, "__MUNPACKH", FRV_BUILTIN_MUNPACKH, UNKNOWN, 0 },
+  { CODE_FOR_mbtoh, "__MBTOH", FRV_BUILTIN_MBTOH, UNKNOWN, 0 },
+  { CODE_FOR_mhtob, "__MHTOB", FRV_BUILTIN_MHTOB, UNKNOWN, 0},
+  { CODE_FOR_mabshs, "__MABSHS", FRV_BUILTIN_MABSHS, UNKNOWN, 0 },
+  { CODE_FOR_scutss, "__SCUTSS", FRV_BUILTIN_SCUTSS, UNKNOWN, 0 }
+};
+
+/* Media intrinsics that take two arguments.  */
+
+static struct builtin_description bdesc_2arg[] =
+{
+  { CODE_FOR_mand, "__MAND", FRV_BUILTIN_MAND, UNKNOWN, 0},
+  { CODE_FOR_mor, "__MOR", FRV_BUILTIN_MOR, UNKNOWN, 0},
+  { CODE_FOR_mxor, "__MXOR", FRV_BUILTIN_MXOR, UNKNOWN, 0},
+  { CODE_FOR_maveh, "__MAVEH", FRV_BUILTIN_MAVEH, UNKNOWN, 0},
+  { CODE_FOR_msaths, "__MSATHS", FRV_BUILTIN_MSATHS, UNKNOWN, 0},
+  { CODE_FOR_msathu, "__MSATHU", FRV_BUILTIN_MSATHU, UNKNOWN, 0},
+  { CODE_FOR_maddhss, "__MADDHSS", FRV_BUILTIN_MADDHSS, UNKNOWN, 0},
+  { CODE_FOR_maddhus, "__MADDHUS", FRV_BUILTIN_MADDHUS, UNKNOWN, 0},
+  { CODE_FOR_msubhss, "__MSUBHSS", FRV_BUILTIN_MSUBHSS, UNKNOWN, 0},
+  { CODE_FOR_msubhus, "__MSUBHUS", FRV_BUILTIN_MSUBHUS, UNKNOWN, 0},
+  { CODE_FOR_mqaddhss, "__MQADDHSS", FRV_BUILTIN_MQADDHSS, UNKNOWN, 0},
+  { CODE_FOR_mqaddhus, "__MQADDHUS", FRV_BUILTIN_MQADDHUS, UNKNOWN, 0},
+  { CODE_FOR_mqsubhss, "__MQSUBHSS", FRV_BUILTIN_MQSUBHSS, UNKNOWN, 0},
+  { CODE_FOR_mqsubhus, "__MQSUBHUS", FRV_BUILTIN_MQSUBHUS, UNKNOWN, 0},
+  { CODE_FOR_mpackh, "__MPACKH", FRV_BUILTIN_MPACKH, UNKNOWN, 0},
+  { CODE_FOR_mcop1, "__Mcop1", FRV_BUILTIN_MCOP1, UNKNOWN, 0},
+  { CODE_FOR_mcop2, "__Mcop2", FRV_BUILTIN_MCOP2, UNKNOWN, 0},
+  { CODE_FOR_mwcut, "__MWCUT", FRV_BUILTIN_MWCUT, UNKNOWN, 0},
+  { CODE_FOR_mqsaths, "__MQSATHS", FRV_BUILTIN_MQSATHS, UNKNOWN, 0},
+  { CODE_FOR_mqlclrhs, "__MQLCLRHS", FRV_BUILTIN_MQLCLRHS, UNKNOWN, 0},
+  { CODE_FOR_mqlmths, "__MQLMTHS", FRV_BUILTIN_MQLMTHS, UNKNOWN, 0},
+  { CODE_FOR_smul, "__SMUL", FRV_BUILTIN_SMUL, UNKNOWN, 0},
+  { CODE_FOR_umul, "__UMUL", FRV_BUILTIN_UMUL, UNKNOWN, 0},
+  { CODE_FOR_addss, "__ADDSS", FRV_BUILTIN_ADDSS, UNKNOWN, 0},
+  { CODE_FOR_subss, "__SUBSS", FRV_BUILTIN_SUBSS, UNKNOWN, 0},
+  { CODE_FOR_slass, "__SLASS", FRV_BUILTIN_SLASS, UNKNOWN, 0},
+  { CODE_FOR_scan, "__SCAN", FRV_BUILTIN_SCAN, UNKNOWN, 0}
+};
+
+/* Integer intrinsics that take two arguments and have no return value.  */
+
+static struct builtin_description bdesc_int_void2arg[] =
+{
+  { CODE_FOR_smass, "__SMASS", FRV_BUILTIN_SMASS, UNKNOWN, 0},
+  { CODE_FOR_smsss, "__SMSSS", FRV_BUILTIN_SMSSS, UNKNOWN, 0},
+  { CODE_FOR_smu, "__SMU", FRV_BUILTIN_SMU, UNKNOWN, 0}
+};
+
+static struct builtin_description bdesc_prefetches[] =
+{
+  { CODE_FOR_frv_prefetch0, "__data_prefetch0", FRV_BUILTIN_PREFETCH0, UNKNOWN,
+    0},
+  { CODE_FOR_frv_prefetch, "__data_prefetch", FRV_BUILTIN_PREFETCH, UNKNOWN, 0}
+};
+
+/* Media intrinsics that take two arguments, the first being an ACC number.  */
+
+static struct builtin_description bdesc_cut[] =
+{
+  { CODE_FOR_mcut, "__MCUT", FRV_BUILTIN_MCUT, UNKNOWN, 0},
+  { CODE_FOR_mcutss, "__MCUTSS", FRV_BUILTIN_MCUTSS, UNKNOWN, 0},
+  { CODE_FOR_mdcutssi, "__MDCUTSSI", FRV_BUILTIN_MDCUTSSI, UNKNOWN, 0}
+};
+
+/* Two-argument media intrinsics with an immediate second argument.  */
+
+static struct builtin_description bdesc_2argimm[] =
+{
+  { CODE_FOR_mrotli, "__MROTLI", FRV_BUILTIN_MROTLI, UNKNOWN, 0},
+  { CODE_FOR_mrotri, "__MROTRI", FRV_BUILTIN_MROTRI, UNKNOWN, 0},
+  { CODE_FOR_msllhi, "__MSLLHI", FRV_BUILTIN_MSLLHI, UNKNOWN, 0},
+  { CODE_FOR_msrlhi, "__MSRLHI", FRV_BUILTIN_MSRLHI, UNKNOWN, 0},
+  { CODE_FOR_msrahi, "__MSRAHI", FRV_BUILTIN_MSRAHI, UNKNOWN, 0},
+  { CODE_FOR_mexpdhw, "__MEXPDHW", FRV_BUILTIN_MEXPDHW, UNKNOWN, 0},
+  { CODE_FOR_mexpdhd, "__MEXPDHD", FRV_BUILTIN_MEXPDHD, UNKNOWN, 0},
+  { CODE_FOR_mdrotli, "__MDROTLI", FRV_BUILTIN_MDROTLI, UNKNOWN, 0},
+  { CODE_FOR_mcplhi, "__MCPLHI", FRV_BUILTIN_MCPLHI, UNKNOWN, 0},
+  { CODE_FOR_mcpli, "__MCPLI", FRV_BUILTIN_MCPLI, UNKNOWN, 0},
+  { CODE_FOR_mhsetlos, "__MHSETLOS", FRV_BUILTIN_MHSETLOS, UNKNOWN, 0},
+  { CODE_FOR_mhsetloh, "__MHSETLOH", FRV_BUILTIN_MHSETLOH, UNKNOWN, 0},
+  { CODE_FOR_mhsethis, "__MHSETHIS", FRV_BUILTIN_MHSETHIS, UNKNOWN, 0},
+  { CODE_FOR_mhsethih, "__MHSETHIH", FRV_BUILTIN_MHSETHIH, UNKNOWN, 0},
+  { CODE_FOR_mhdseth, "__MHDSETH", FRV_BUILTIN_MHDSETH, UNKNOWN, 0},
+  { CODE_FOR_mqsllhi, "__MQSLLHI", FRV_BUILTIN_MQSLLHI, UNKNOWN, 0},
+  { CODE_FOR_mqsrahi, "__MQSRAHI", FRV_BUILTIN_MQSRAHI, UNKNOWN, 0}
+};
+
+/* Media intrinsics that take two arguments and return void, the first argument
+   being a pointer to 4 words in memory.  */
+
+static struct builtin_description bdesc_void2arg[] =
+{
+  { CODE_FOR_mdunpackh, "__MDUNPACKH", FRV_BUILTIN_MDUNPACKH, UNKNOWN, 0},
+  { CODE_FOR_mbtohe, "__MBTOHE", FRV_BUILTIN_MBTOHE, UNKNOWN, 0},
+};
+
+/* Media intrinsics that take three arguments, the first being a const_int that
+   denotes an accumulator, and that return void.  */
+
+static struct builtin_description bdesc_void3arg[] =
+{
+  { CODE_FOR_mcpxrs, "__MCPXRS", FRV_BUILTIN_MCPXRS, UNKNOWN, 0},
+  { CODE_FOR_mcpxru, "__MCPXRU", FRV_BUILTIN_MCPXRU, UNKNOWN, 0},
+  { CODE_FOR_mcpxis, "__MCPXIS", FRV_BUILTIN_MCPXIS, UNKNOWN, 0},
+  { CODE_FOR_mcpxiu, "__MCPXIU", FRV_BUILTIN_MCPXIU, UNKNOWN, 0},
+  { CODE_FOR_mmulhs, "__MMULHS", FRV_BUILTIN_MMULHS, UNKNOWN, 0},
+  { CODE_FOR_mmulhu, "__MMULHU", FRV_BUILTIN_MMULHU, UNKNOWN, 0},
+  { CODE_FOR_mmulxhs, "__MMULXHS", FRV_BUILTIN_MMULXHS, UNKNOWN, 0},
+  { CODE_FOR_mmulxhu, "__MMULXHU", FRV_BUILTIN_MMULXHU, UNKNOWN, 0},
+  { CODE_FOR_mmachs, "__MMACHS", FRV_BUILTIN_MMACHS, UNKNOWN, 0},
+  { CODE_FOR_mmachu, "__MMACHU", FRV_BUILTIN_MMACHU, UNKNOWN, 0},
+  { CODE_FOR_mmrdhs, "__MMRDHS", FRV_BUILTIN_MMRDHS, UNKNOWN, 0},
+  { CODE_FOR_mmrdhu, "__MMRDHU", FRV_BUILTIN_MMRDHU, UNKNOWN, 0},
+  { CODE_FOR_mqcpxrs, "__MQCPXRS", FRV_BUILTIN_MQCPXRS, UNKNOWN, 0},
+  { CODE_FOR_mqcpxru, "__MQCPXRU", FRV_BUILTIN_MQCPXRU, UNKNOWN, 0},
+  { CODE_FOR_mqcpxis, "__MQCPXIS", FRV_BUILTIN_MQCPXIS, UNKNOWN, 0},
+  { CODE_FOR_mqcpxiu, "__MQCPXIU", FRV_BUILTIN_MQCPXIU, UNKNOWN, 0},
+  { CODE_FOR_mqmulhs, "__MQMULHS", FRV_BUILTIN_MQMULHS, UNKNOWN, 0},
+  { CODE_FOR_mqmulhu, "__MQMULHU", FRV_BUILTIN_MQMULHU, UNKNOWN, 0},
+  { CODE_FOR_mqmulxhs, "__MQMULXHS", FRV_BUILTIN_MQMULXHS, UNKNOWN, 0},
+  { CODE_FOR_mqmulxhu, "__MQMULXHU", FRV_BUILTIN_MQMULXHU, UNKNOWN, 0},
+  { CODE_FOR_mqmachs, "__MQMACHS", FRV_BUILTIN_MQMACHS, UNKNOWN, 0},
+  { CODE_FOR_mqmachu, "__MQMACHU", FRV_BUILTIN_MQMACHU, UNKNOWN, 0},
+  { CODE_FOR_mqxmachs, "__MQXMACHS", FRV_BUILTIN_MQXMACHS, UNKNOWN, 0},
+  { CODE_FOR_mqxmacxhs, "__MQXMACXHS", FRV_BUILTIN_MQXMACXHS, UNKNOWN, 0},
+  { CODE_FOR_mqmacxhs, "__MQMACXHS", FRV_BUILTIN_MQMACXHS, UNKNOWN, 0}
+};
+
+/* Media intrinsics that take two accumulator numbers as argument and
+   return void.  */
+
+static struct builtin_description bdesc_voidacc[] =
+{
+  { CODE_FOR_maddaccs, "__MADDACCS", FRV_BUILTIN_MADDACCS, UNKNOWN, 0},
+  { CODE_FOR_msubaccs, "__MSUBACCS", FRV_BUILTIN_MSUBACCS, UNKNOWN, 0},
+  { CODE_FOR_masaccs, "__MASACCS", FRV_BUILTIN_MASACCS, UNKNOWN, 0},
+  { CODE_FOR_mdaddaccs, "__MDADDACCS", FRV_BUILTIN_MDADDACCS, UNKNOWN, 0},
+  { CODE_FOR_mdsubaccs, "__MDSUBACCS", FRV_BUILTIN_MDSUBACCS, UNKNOWN, 0},
+  { CODE_FOR_mdasaccs, "__MDASACCS", FRV_BUILTIN_MDASACCS, UNKNOWN, 0}
+};
+
+/* Intrinsics that load a value and then issue a MEMBAR.  The load is
+   a normal move and the ICODE is for the membar.  */
+
+static struct builtin_description bdesc_loads[] =
+{
+  { CODE_FOR_optional_membar_qi, "__builtin_read8",
+    FRV_BUILTIN_READ8, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_hi, "__builtin_read16",
+    FRV_BUILTIN_READ16, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_si, "__builtin_read32",
+    FRV_BUILTIN_READ32, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_di, "__builtin_read64",
+    FRV_BUILTIN_READ64, UNKNOWN, 0}
+};
+
+/* Likewise stores.  */
+
+static struct builtin_description bdesc_stores[] =
+{
+  { CODE_FOR_optional_membar_qi, "__builtin_write8",
+    FRV_BUILTIN_WRITE8, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_hi, "__builtin_write16",
+    FRV_BUILTIN_WRITE16, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_si, "__builtin_write32",
+    FRV_BUILTIN_WRITE32, UNKNOWN, 0},
+  { CODE_FOR_optional_membar_di, "__builtin_write64",
+    FRV_BUILTIN_WRITE64, UNKNOWN, 0},
+};
+
+/* Initialize media builtins.  */
+
+static void
+frv_init_builtins (void)
+{
+  tree endlink = void_list_node;
+  tree accumulator = integer_type_node;
+  tree integer = integer_type_node;
+  tree voidt = void_type_node;
+  tree uhalf = short_unsigned_type_node;
+  tree sword1 = long_integer_type_node;
+  tree uword1 = long_unsigned_type_node;
+  tree sword2 = long_long_integer_type_node;
+  tree uword2 = long_long_unsigned_type_node;
+  tree uword4 = build_pointer_type (uword1);
+  tree vptr   = build_pointer_type (build_type_variant (void_type_node, 0, 1));
+  tree ubyte  = unsigned_char_type_node;
+  tree iacc   = integer_type_node;
+
+#define UNARY(RET, T1) \
+  build_function_type (RET, tree_cons (NULL_TREE, T1, endlink))
+
+#define BINARY(RET, T1, T2) \
+  build_function_type (RET, tree_cons (NULL_TREE, T1, \
+			    tree_cons (NULL_TREE, T2, endlink)))
+
+#define TRINARY(RET, T1, T2, T3) \
+  build_function_type (RET, tree_cons (NULL_TREE, T1, \
+			    tree_cons (NULL_TREE, T2, \
+			    tree_cons (NULL_TREE, T3, endlink))))
+
+#define QUAD(RET, T1, T2, T3, T4) \
+  build_function_type (RET, tree_cons (NULL_TREE, T1, \
+			    tree_cons (NULL_TREE, T2, \
+			    tree_cons (NULL_TREE, T3, \
+			    tree_cons (NULL_TREE, T4, endlink)))))
+
+  tree void_ftype_void = build_function_type (voidt, endlink);
+
+  tree void_ftype_acc = UNARY (voidt, accumulator);
+  tree void_ftype_uw4_uw1 = BINARY (voidt, uword4, uword1);
+  tree void_ftype_uw4_uw2 = BINARY (voidt, uword4, uword2);
+  tree void_ftype_acc_uw1 = BINARY (voidt, accumulator, uword1);
+  tree void_ftype_acc_acc = BINARY (voidt, accumulator, accumulator);
+  tree void_ftype_acc_uw1_uw1 = TRINARY (voidt, accumulator, uword1, uword1);
+  tree void_ftype_acc_sw1_sw1 = TRINARY (voidt, accumulator, sword1, sword1);
+  tree void_ftype_acc_uw2_uw2 = TRINARY (voidt, accumulator, uword2, uword2);
+  tree void_ftype_acc_sw2_sw2 = TRINARY (voidt, accumulator, sword2, sword2);
+
+  tree uw1_ftype_uw1 = UNARY (uword1, uword1);
+  tree uw1_ftype_sw1 = UNARY (uword1, sword1);
+  tree uw1_ftype_uw2 = UNARY (uword1, uword2);
+  tree uw1_ftype_acc = UNARY (uword1, accumulator);
+  tree uw1_ftype_uh_uh = BINARY (uword1, uhalf, uhalf);
+  tree uw1_ftype_uw1_uw1 = BINARY (uword1, uword1, uword1);
+  tree uw1_ftype_uw1_int = BINARY (uword1, uword1, integer);
+  tree uw1_ftype_acc_uw1 = BINARY (uword1, accumulator, uword1);
+  tree uw1_ftype_acc_sw1 = BINARY (uword1, accumulator, sword1);
+  tree uw1_ftype_uw2_uw1 = BINARY (uword1, uword2, uword1);
+  tree uw1_ftype_uw2_int = BINARY (uword1, uword2, integer);
+
+  tree sw1_ftype_int = UNARY (sword1, integer);
+  tree sw1_ftype_sw1_sw1 = BINARY (sword1, sword1, sword1);
+  tree sw1_ftype_sw1_int = BINARY (sword1, sword1, integer);
+
+  tree uw2_ftype_uw1 = UNARY (uword2, uword1);
+  tree uw2_ftype_uw1_int = BINARY (uword2, uword1, integer);
+  tree uw2_ftype_uw2_uw2 = BINARY (uword2, uword2, uword2);
+  tree uw2_ftype_uw2_int = BINARY (uword2, uword2, integer);
+  tree uw2_ftype_acc_int = BINARY (uword2, accumulator, integer);
+  tree uw2_ftype_uh_uh_uh_uh = QUAD (uword2, uhalf, uhalf, uhalf, uhalf);
+
+  tree sw2_ftype_sw2_sw2 = BINARY (sword2, sword2, sword2);
+  tree sw2_ftype_sw2_int   = BINARY (sword2, sword2, integer);
+  tree uw2_ftype_uw1_uw1   = BINARY (uword2, uword1, uword1);
+  tree sw2_ftype_sw1_sw1   = BINARY (sword2, sword1, sword1);
+  tree void_ftype_sw1_sw1  = BINARY (voidt, sword1, sword1);
+  tree void_ftype_iacc_sw2 = BINARY (voidt, iacc, sword2);
+  tree void_ftype_iacc_sw1 = BINARY (voidt, iacc, sword1);
+  tree sw1_ftype_sw1       = UNARY (sword1, sword1);
+  tree sw2_ftype_iacc      = UNARY (sword2, iacc);
+  tree sw1_ftype_iacc      = UNARY (sword1, iacc);
+  tree void_ftype_ptr      = UNARY (voidt, const_ptr_type_node);
+  tree uw1_ftype_vptr      = UNARY (uword1, vptr);
+  tree uw2_ftype_vptr      = UNARY (uword2, vptr);
+  tree void_ftype_vptr_ub  = BINARY (voidt, vptr, ubyte);
+  tree void_ftype_vptr_uh  = BINARY (voidt, vptr, uhalf);
+  tree void_ftype_vptr_uw1 = BINARY (voidt, vptr, uword1);
+  tree void_ftype_vptr_uw2 = BINARY (voidt, vptr, uword2);
+
+  def_builtin ("__MAND", uw1_ftype_uw1_uw1, FRV_BUILTIN_MAND);
+  def_builtin ("__MOR", uw1_ftype_uw1_uw1, FRV_BUILTIN_MOR);
+  def_builtin ("__MXOR", uw1_ftype_uw1_uw1, FRV_BUILTIN_MXOR);
+  def_builtin ("__MNOT", uw1_ftype_uw1, FRV_BUILTIN_MNOT);
+  def_builtin ("__MROTLI", uw1_ftype_uw1_int, FRV_BUILTIN_MROTLI);
+  def_builtin ("__MROTRI", uw1_ftype_uw1_int, FRV_BUILTIN_MROTRI);
+  def_builtin ("__MWCUT", uw1_ftype_uw2_uw1, FRV_BUILTIN_MWCUT);
+  def_builtin ("__MAVEH", uw1_ftype_uw1_uw1, FRV_BUILTIN_MAVEH);
+  def_builtin ("__MSLLHI", uw1_ftype_uw1_int, FRV_BUILTIN_MSLLHI);
+  def_builtin ("__MSRLHI", uw1_ftype_uw1_int, FRV_BUILTIN_MSRLHI);
+  def_builtin ("__MSRAHI", sw1_ftype_sw1_int, FRV_BUILTIN_MSRAHI);
+  def_builtin ("__MSATHS", sw1_ftype_sw1_sw1, FRV_BUILTIN_MSATHS);
+  def_builtin ("__MSATHU", uw1_ftype_uw1_uw1, FRV_BUILTIN_MSATHU);
+  def_builtin ("__MADDHSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_MADDHSS);
+  def_builtin ("__MADDHUS", uw1_ftype_uw1_uw1, FRV_BUILTIN_MADDHUS);
+  def_builtin ("__MSUBHSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_MSUBHSS);
+  def_builtin ("__MSUBHUS", uw1_ftype_uw1_uw1, FRV_BUILTIN_MSUBHUS);
+  def_builtin ("__MMULHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMULHS);
+  def_builtin ("__MMULHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMULHU);
+  def_builtin ("__MMULXHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMULXHS);
+  def_builtin ("__MMULXHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMULXHU);
+  def_builtin ("__MMACHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMACHS);
+  def_builtin ("__MMACHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMACHU);
+  def_builtin ("__MMRDHS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MMRDHS);
+  def_builtin ("__MMRDHU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MMRDHU);
+  def_builtin ("__MQADDHSS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQADDHSS);
+  def_builtin ("__MQADDHUS", uw2_ftype_uw2_uw2, FRV_BUILTIN_MQADDHUS);
+  def_builtin ("__MQSUBHSS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQSUBHSS);
+  def_builtin ("__MQSUBHUS", uw2_ftype_uw2_uw2, FRV_BUILTIN_MQSUBHUS);
+  def_builtin ("__MQMULHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMULHS);
+  def_builtin ("__MQMULHU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQMULHU);
+  def_builtin ("__MQMULXHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMULXHS);
+  def_builtin ("__MQMULXHU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQMULXHU);
+  def_builtin ("__MQMACHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMACHS);
+  def_builtin ("__MQMACHU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQMACHU);
+  def_builtin ("__MCPXRS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MCPXRS);
+  def_builtin ("__MCPXRU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MCPXRU);
+  def_builtin ("__MCPXIS", void_ftype_acc_sw1_sw1, FRV_BUILTIN_MCPXIS);
+  def_builtin ("__MCPXIU", void_ftype_acc_uw1_uw1, FRV_BUILTIN_MCPXIU);
+  def_builtin ("__MQCPXRS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQCPXRS);
+  def_builtin ("__MQCPXRU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQCPXRU);
+  def_builtin ("__MQCPXIS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQCPXIS);
+  def_builtin ("__MQCPXIU", void_ftype_acc_uw2_uw2, FRV_BUILTIN_MQCPXIU);
+  def_builtin ("__MCUT", uw1_ftype_acc_uw1, FRV_BUILTIN_MCUT);
+  def_builtin ("__MCUTSS", uw1_ftype_acc_sw1, FRV_BUILTIN_MCUTSS);
+  def_builtin ("__MEXPDHW", uw1_ftype_uw1_int, FRV_BUILTIN_MEXPDHW);
+  def_builtin ("__MEXPDHD", uw2_ftype_uw1_int, FRV_BUILTIN_MEXPDHD);
+  def_builtin ("__MPACKH", uw1_ftype_uh_uh, FRV_BUILTIN_MPACKH);
+  def_builtin ("__MUNPACKH", uw2_ftype_uw1, FRV_BUILTIN_MUNPACKH);
+  def_builtin ("__MDPACKH", uw2_ftype_uh_uh_uh_uh, FRV_BUILTIN_MDPACKH);
+  def_builtin ("__MDUNPACKH", void_ftype_uw4_uw2, FRV_BUILTIN_MDUNPACKH);
+  def_builtin ("__MBTOH", uw2_ftype_uw1, FRV_BUILTIN_MBTOH);
+  def_builtin ("__MHTOB", uw1_ftype_uw2, FRV_BUILTIN_MHTOB);
+  def_builtin ("__MBTOHE", void_ftype_uw4_uw1, FRV_BUILTIN_MBTOHE);
+  def_builtin ("__MCLRACC", void_ftype_acc, FRV_BUILTIN_MCLRACC);
+  def_builtin ("__MCLRACCA", void_ftype_void, FRV_BUILTIN_MCLRACCA);
+  def_builtin ("__MRDACC", uw1_ftype_acc, FRV_BUILTIN_MRDACC);
+  def_builtin ("__MRDACCG", uw1_ftype_acc, FRV_BUILTIN_MRDACCG);
+  def_builtin ("__MWTACC", void_ftype_acc_uw1, FRV_BUILTIN_MWTACC);
+  def_builtin ("__MWTACCG", void_ftype_acc_uw1, FRV_BUILTIN_MWTACCG);
+  def_builtin ("__Mcop1", uw1_ftype_uw1_uw1, FRV_BUILTIN_MCOP1);
+  def_builtin ("__Mcop2", uw1_ftype_uw1_uw1, FRV_BUILTIN_MCOP2);
+  def_builtin ("__MTRAP", void_ftype_void, FRV_BUILTIN_MTRAP);
+  def_builtin ("__MQXMACHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQXMACHS);
+  def_builtin ("__MQXMACXHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQXMACXHS);
+  def_builtin ("__MQMACXHS", void_ftype_acc_sw2_sw2, FRV_BUILTIN_MQMACXHS);
+  def_builtin ("__MADDACCS", void_ftype_acc_acc, FRV_BUILTIN_MADDACCS);
+  def_builtin ("__MSUBACCS", void_ftype_acc_acc, FRV_BUILTIN_MSUBACCS);
+  def_builtin ("__MASACCS", void_ftype_acc_acc, FRV_BUILTIN_MASACCS);
+  def_builtin ("__MDADDACCS", void_ftype_acc_acc, FRV_BUILTIN_MDADDACCS);
+  def_builtin ("__MDSUBACCS", void_ftype_acc_acc, FRV_BUILTIN_MDSUBACCS);
+  def_builtin ("__MDASACCS", void_ftype_acc_acc, FRV_BUILTIN_MDASACCS);
+  def_builtin ("__MABSHS", uw1_ftype_sw1, FRV_BUILTIN_MABSHS);
+  def_builtin ("__MDROTLI", uw2_ftype_uw2_int, FRV_BUILTIN_MDROTLI);
+  def_builtin ("__MCPLHI", uw1_ftype_uw2_int, FRV_BUILTIN_MCPLHI);
+  def_builtin ("__MCPLI", uw1_ftype_uw2_int, FRV_BUILTIN_MCPLI);
+  def_builtin ("__MDCUTSSI", uw2_ftype_acc_int, FRV_BUILTIN_MDCUTSSI);
+  def_builtin ("__MQSATHS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQSATHS);
+  def_builtin ("__MHSETLOS", sw1_ftype_sw1_int, FRV_BUILTIN_MHSETLOS);
+  def_builtin ("__MHSETHIS", sw1_ftype_sw1_int, FRV_BUILTIN_MHSETHIS);
+  def_builtin ("__MHDSETS", sw1_ftype_int, FRV_BUILTIN_MHDSETS);
+  def_builtin ("__MHSETLOH", uw1_ftype_uw1_int, FRV_BUILTIN_MHSETLOH);
+  def_builtin ("__MHSETHIH", uw1_ftype_uw1_int, FRV_BUILTIN_MHSETHIH);
+  def_builtin ("__MHDSETH", uw1_ftype_uw1_int, FRV_BUILTIN_MHDSETH);
+  def_builtin ("__MQLCLRHS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQLCLRHS);
+  def_builtin ("__MQLMTHS", sw2_ftype_sw2_sw2, FRV_BUILTIN_MQLMTHS);
+  def_builtin ("__MQSLLHI", uw2_ftype_uw2_int, FRV_BUILTIN_MQSLLHI);
+  def_builtin ("__MQSRAHI", sw2_ftype_sw2_int, FRV_BUILTIN_MQSRAHI);
+  def_builtin ("__SMUL", sw2_ftype_sw1_sw1, FRV_BUILTIN_SMUL);
+  def_builtin ("__UMUL", uw2_ftype_uw1_uw1, FRV_BUILTIN_UMUL);
+  def_builtin ("__SMASS", void_ftype_sw1_sw1, FRV_BUILTIN_SMASS);
+  def_builtin ("__SMSSS", void_ftype_sw1_sw1, FRV_BUILTIN_SMSSS);
+  def_builtin ("__SMU", void_ftype_sw1_sw1, FRV_BUILTIN_SMU);
+  def_builtin ("__ADDSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_ADDSS);
+  def_builtin ("__SUBSS", sw1_ftype_sw1_sw1, FRV_BUILTIN_SUBSS);
+  def_builtin ("__SLASS", sw1_ftype_sw1_sw1, FRV_BUILTIN_SLASS);
+  def_builtin ("__SCAN", sw1_ftype_sw1_sw1, FRV_BUILTIN_SCAN);
+  def_builtin ("__SCUTSS", sw1_ftype_sw1, FRV_BUILTIN_SCUTSS);
+  def_builtin ("__IACCreadll", sw2_ftype_iacc, FRV_BUILTIN_IACCreadll);
+  def_builtin ("__IACCreadl", sw1_ftype_iacc, FRV_BUILTIN_IACCreadl);
+  def_builtin ("__IACCsetll", void_ftype_iacc_sw2, FRV_BUILTIN_IACCsetll);
+  def_builtin ("__IACCsetl", void_ftype_iacc_sw1, FRV_BUILTIN_IACCsetl);
+  def_builtin ("__data_prefetch0", void_ftype_ptr, FRV_BUILTIN_PREFETCH0);
+  def_builtin ("__data_prefetch", void_ftype_ptr, FRV_BUILTIN_PREFETCH);
+  def_builtin ("__builtin_read8", uw1_ftype_vptr, FRV_BUILTIN_READ8);
+  def_builtin ("__builtin_read16", uw1_ftype_vptr, FRV_BUILTIN_READ16);
+  def_builtin ("__builtin_read32", uw1_ftype_vptr, FRV_BUILTIN_READ32);
+  def_builtin ("__builtin_read64", uw2_ftype_vptr, FRV_BUILTIN_READ64);
+
+  def_builtin ("__builtin_write8", void_ftype_vptr_ub, FRV_BUILTIN_WRITE8);
+  def_builtin ("__builtin_write16", void_ftype_vptr_uh, FRV_BUILTIN_WRITE16);
+  def_builtin ("__builtin_write32", void_ftype_vptr_uw1, FRV_BUILTIN_WRITE32);
+  def_builtin ("__builtin_write64", void_ftype_vptr_uw2, FRV_BUILTIN_WRITE64);
+
+#undef UNARY
+#undef BINARY
+#undef TRINARY
+#undef QUAD
+}
+
+/* Set the names for various arithmetic operations according to the
+   FRV ABI.  */
+static void
+frv_init_libfuncs (void)
+{
+  set_optab_libfunc (smod_optab,     SImode, "__modi");
+  set_optab_libfunc (umod_optab,     SImode, "__umodi");
+
+  set_optab_libfunc (add_optab,      DImode, "__addll");
+  set_optab_libfunc (sub_optab,      DImode, "__subll");
+  set_optab_libfunc (smul_optab,     DImode, "__mulll");
+  set_optab_libfunc (sdiv_optab,     DImode, "__divll");
+  set_optab_libfunc (smod_optab,     DImode, "__modll");
+  set_optab_libfunc (umod_optab,     DImode, "__umodll");
+  set_optab_libfunc (and_optab,      DImode, "__andll");
+  set_optab_libfunc (ior_optab,      DImode, "__orll");
+  set_optab_libfunc (xor_optab,      DImode, "__xorll");
+  set_optab_libfunc (one_cmpl_optab, DImode, "__notll");
+
+  set_optab_libfunc (add_optab,      SFmode, "__addf");
+  set_optab_libfunc (sub_optab,      SFmode, "__subf");
+  set_optab_libfunc (smul_optab,     SFmode, "__mulf");
+  set_optab_libfunc (sdiv_optab,     SFmode, "__divf");
+
+  set_optab_libfunc (add_optab,      DFmode, "__addd");
+  set_optab_libfunc (sub_optab,      DFmode, "__subd");
+  set_optab_libfunc (smul_optab,     DFmode, "__muld");
+  set_optab_libfunc (sdiv_optab,     DFmode, "__divd");
+
+  set_conv_libfunc (sext_optab,   DFmode, SFmode, "__ftod");
+  set_conv_libfunc (trunc_optab,  SFmode, DFmode, "__dtof");
+
+  set_conv_libfunc (sfix_optab,   SImode, SFmode, "__ftoi");
+  set_conv_libfunc (sfix_optab,   DImode, SFmode, "__ftoll");
+  set_conv_libfunc (sfix_optab,   SImode, DFmode, "__dtoi");
+  set_conv_libfunc (sfix_optab,   DImode, DFmode, "__dtoll");
+
+  set_conv_libfunc (ufix_optab,   SImode, SFmode, "__ftoui");
+  set_conv_libfunc (ufix_optab,   DImode, SFmode, "__ftoull");
+  set_conv_libfunc (ufix_optab,   SImode, DFmode, "__dtoui");
+  set_conv_libfunc (ufix_optab,   DImode, DFmode, "__dtoull");
+
+  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__itof");
+  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__lltof");
+  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__itod");
+  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__lltod");
+}
+
+/* Convert an integer constant to an accumulator register.  ICODE is the
+   code of the target instruction, OPNUM is the number of the
+   accumulator operand and OPVAL is the constant integer.  Try both
+   ACC and ACCG registers; only report an error if neither fit the
+   instruction.  */
+
+static rtx
+frv_int_to_acc (enum insn_code icode, int opnum, rtx opval)
+{
+  rtx reg;
+  int i;
+
+  /* ACCs and ACCGs are implicit global registers if media intrinsics
+     are being used.  We set up this lazily to avoid creating lots of
+     unnecessary call_insn rtl in non-media code.  */
+  for (i = 0; i <= ACC_MASK; i++)
+    if ((i & ACC_MASK) == i)
+      global_regs[i + ACC_FIRST] = global_regs[i + ACCG_FIRST] = 1;
+
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("accumulator is not a constant integer");
+      return NULL_RTX;
+    }
+  if ((INTVAL (opval) & ~ACC_MASK) != 0)
+    {
+      error ("accumulator number is out of bounds");
+      return NULL_RTX;
+    }
+
+  reg = gen_rtx_REG (insn_data[icode].operand[opnum].mode,
+		     ACC_FIRST + INTVAL (opval));
+  if (! (*insn_data[icode].operand[opnum].predicate) (reg, VOIDmode))
+    SET_REGNO (reg, ACCG_FIRST + INTVAL (opval));
+
+  if (! (*insn_data[icode].operand[opnum].predicate) (reg, VOIDmode))
+    {
+      error ("inappropriate accumulator for %qs", insn_data[icode].name);
+      return NULL_RTX;
+    }
+  return reg;
+}
+
+/* If an ACC rtx has mode MODE, return the mode that the matching ACCG
+   should have.  */
+
+static enum machine_mode
+frv_matching_accg_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V4SImode:
+      return V4QImode;
+
+    case DImode:
+      return HImode;
+
+    case SImode:
+      return QImode;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Given that a __builtin_read or __builtin_write function is accessing
+   address ADDRESS, return the value that should be used as operand 1
+   of the membar.  */
+
+static rtx
+frv_io_address_cookie (rtx address)
+{
+  return (GET_CODE (address) == CONST_INT
+	  ? GEN_INT (INTVAL (address) / 8 * 8)
+	  : const0_rtx);
+}
+
+/* Return the accumulator guard that should be paired with accumulator
+   register ACC.  The mode of the returned register is in the same
+   class as ACC, but is four times smaller.  */
+
+rtx
+frv_matching_accg_for_acc (rtx acc)
+{
+  return gen_rtx_REG (frv_matching_accg_mode (GET_MODE (acc)),
+		      REGNO (acc) - ACC_FIRST + ACCG_FIRST);
+}
+
+/* Read the requested argument from the call EXP given by INDEX.
+   Return the value as an rtx.  */
+
+static rtx
+frv_read_argument (tree exp, unsigned int index)
+{
+  return expand_normal (CALL_EXPR_ARG (exp, index));
+}
+
+/* Like frv_read_argument, but interpret the argument as the number
+   of an IACC register and return a (reg:MODE ...) rtx for it.  */
+
+static rtx
+frv_read_iacc_argument (enum machine_mode mode, tree call,
+			unsigned int index)
+{
+  int i, regno;
+  rtx op;
+
+  op = frv_read_argument (call, index);
+  if (GET_CODE (op) != CONST_INT
+      || INTVAL (op) < 0
+      || INTVAL (op) > IACC_LAST - IACC_FIRST
+      || ((INTVAL (op) * 4) & (GET_MODE_SIZE (mode) - 1)) != 0)
+    {
+      error ("invalid IACC argument");
+      op = const0_rtx;
+    }
+
+  /* IACCs are implicit global registers.  We set up this lazily to
+     avoid creating lots of unnecessary call_insn rtl when IACCs aren't
+     being used.  */
+  regno = INTVAL (op) + IACC_FIRST;
+  for (i = 0; i < HARD_REGNO_NREGS (regno, mode); i++)
+    global_regs[regno + i] = 1;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
+   The instruction should require a constant operand of some sort.  The
+   function prints an error if OPVAL is not valid.  */
+
+static int
+frv_check_constant_argument (enum insn_code icode, int opnum, rtx opval)
+{
+  if (GET_CODE (opval) != CONST_INT)
+    {
+      error ("%qs expects a constant argument", insn_data[icode].name);
+      return FALSE;
+    }
+  if (! (*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
+    {
+      error ("constant argument out of range for %qs", insn_data[icode].name);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/* Return a legitimate rtx for instruction ICODE's return value.  Use TARGET
+   if it's not null, has the right mode, and satisfies operand 0's
+   predicate.  */
+
+static rtx
+frv_legitimize_target (enum insn_code icode, rtx target)
+{
+  enum machine_mode mode = insn_data[icode].operand[0].mode;
+
+  if (! target
+      || GET_MODE (target) != mode
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode))
+    return gen_reg_rtx (mode);
+  else
+    return target;
+}
+
+/* Given that ARG is being passed as operand OPNUM to instruction ICODE,
+   check whether ARG satisfies the operand's constraints.  If it doesn't,
+   copy ARG to a temporary register and return that.  Otherwise return ARG
+   itself.  */
+
+static rtx
+frv_legitimize_argument (enum insn_code icode, int opnum, rtx arg)
+{
+  enum machine_mode mode = insn_data[icode].operand[opnum].mode;
+
+  if ((*insn_data[icode].operand[opnum].predicate) (arg, mode))
+    return arg;
+  else
+    return copy_to_mode_reg (mode, arg);
+}
+
+/* Return a volatile memory reference of mode MODE whose address is ARG.  */
+
+static rtx
+frv_volatile_memref (enum machine_mode mode, rtx arg)
+{
+  rtx mem;
+
+  mem = gen_rtx_MEM (mode, memory_address (mode, arg));
+  MEM_VOLATILE_P (mem) = 1;
+  return mem;
+}
+
+/* Expand builtins that take a single, constant argument.  At the moment,
+   only MHDSETS falls into this category.  */
+
+static rtx
+frv_expand_set_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  if (! frv_check_constant_argument (icode, 1, op0))
+    return NULL_RTX;
+
+  target = frv_legitimize_target (icode, target);
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take one operand.  */
+
+static rtx
+frv_expand_unop_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands.  */
+
+static rtx
+frv_expand_binop_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  op1 = frv_legitimize_argument (icode, 2, op1);
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand cut-style builtins, which take two operands and an implicit ACCG
+   one.  */
+
+static rtx
+frv_expand_cut_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx op2;
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_int_to_acc (icode, 1, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  if (icode == CODE_FOR_mdcutssi || GET_CODE (op1) == CONST_INT)
+    {
+      if (! frv_check_constant_argument (icode, 2, op1))
+    	return NULL_RTX;
+    }
+  else
+    op1 = frv_legitimize_argument (icode, 2, op1);
+
+  op2 = frv_matching_accg_for_acc (op0);
+  pat = GEN_FCN (icode) (target, op0, op1, op2);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands and the second is immediate.  */
+
+static rtx
+frv_expand_binopimm_builtin (enum insn_code icode, tree call, rtx target)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  if (! frv_check_constant_argument (icode, 2, op1))
+    return NULL_RTX;
+
+  target = frv_legitimize_target (icode, target);
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand builtins that take two operands, the first operand being a pointer to
+   ints and return void.  */
+
+static rtx
+frv_expand_voidbinop_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+  rtx addr;
+
+  if (GET_CODE (op0) != MEM)
+    {
+      rtx reg = op0;
+
+      if (! offsettable_address_p (0, mode0, op0))
+	{
+	  reg = gen_reg_rtx (Pmode);
+	  emit_insn (gen_rtx_SET (VOIDmode, reg, op0));
+	}
+
+      op0 = gen_rtx_MEM (SImode, reg);
+    }
+
+  addr = XEXP (op0, 0);
+  if (! offsettable_address_p (0, mode0, addr))
+    addr = copy_to_mode_reg (Pmode, op0);
+
+  op0 = change_address (op0, V4SImode, addr);
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  pat = GEN_FCN (icode) (op0, op1);
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return 0;
+}
+
+/* Expand builtins that take two long operands and return void.  */
+
+static rtx
+frv_expand_int_void2arg (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  op0 = frv_legitimize_argument (icode, 1, op0);
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  pat = GEN_FCN (icode) (op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Expand prefetch builtins.  These take a single address as argument.  */
+
+static rtx
+frv_expand_prefetches (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  pat = GEN_FCN (icode) (force_reg (Pmode, op0));
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return 0;
+}
+
+/* Expand builtins that take three operands and return void.  The first
+   argument must be a constant that describes a pair or quad accumulators.  A
+   fourth argument is created that is the accumulator guard register that
+   corresponds to the accumulator.  */
+
+static rtx
+frv_expand_voidtriop_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx op2 = frv_read_argument (call, 2);
+  rtx op3;
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  op2 = frv_legitimize_argument (icode, 2, op2);
+  op3 = frv_matching_accg_for_acc (op0);
+  pat = GEN_FCN (icode) (op0, op1, op2, op3);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Expand builtins that perform accumulator-to-accumulator operations.
+   These builtins take two accumulator numbers as argument and return
+   void.  */
+
+static rtx
+frv_expand_voidaccop_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx op2;
+  rtx op3;
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  op1 = frv_int_to_acc (icode, 1, op1);
+  if (! op1)
+    return NULL_RTX;
+
+  op2 = frv_matching_accg_for_acc (op0);
+  op3 = frv_matching_accg_for_acc (op1);
+  pat = GEN_FCN (icode) (op0, op1, op2, op3);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return NULL_RTX;
+}
+
+/* Expand a __builtin_read* function.  ICODE is the instruction code for the
+   membar and TARGET_MODE is the mode that the loaded value should have.  */
+
+static rtx
+frv_expand_load_builtin (enum insn_code icode, enum machine_mode target_mode,
+                         tree call, rtx target)
+{
+  rtx op0 = frv_read_argument (call, 0);
+  rtx cookie = frv_io_address_cookie (op0);
+
+  if (target == 0 || !REG_P (target))
+    target = gen_reg_rtx (target_mode);
+  op0 = frv_volatile_memref (insn_data[icode].operand[0].mode, op0);
+  convert_move (target, op0, 1);
+  emit_insn (GEN_FCN (icode) (copy_rtx (op0), cookie, GEN_INT (FRV_IO_READ)));
+  cfun->machine->has_membar_p = 1;
+  return target;
+}
+
+/* Likewise __builtin_write* functions.  */
+
+static rtx
+frv_expand_store_builtin (enum insn_code icode, tree call)
+{
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+  rtx cookie = frv_io_address_cookie (op0);
+
+  op0 = frv_volatile_memref (insn_data[icode].operand[0].mode, op0);
+  convert_move (op0, force_reg (insn_data[icode].operand[0].mode, op1), 1);
+  emit_insn (GEN_FCN (icode) (copy_rtx (op0), cookie, GEN_INT (FRV_IO_WRITE)));
+  cfun->machine->has_membar_p = 1;
+  return NULL_RTX;
+}
+
+/* Expand the MDPACKH builtin.  It takes four unsigned short arguments and
+   each argument forms one word of the two double-word input registers.
+   CALL is the tree for the call and TARGET, if nonnull, suggests a good place
+   to put the return value.  */
+
+static rtx
+frv_expand_mdpackh_builtin (tree call, rtx target)
+{
+  enum insn_code icode = CODE_FOR_mdpackh;
+  rtx pat, op0, op1;
+  rtx arg1 = frv_read_argument (call, 0);
+  rtx arg2 = frv_read_argument (call, 1);
+  rtx arg3 = frv_read_argument (call, 2);
+  rtx arg4 = frv_read_argument (call, 3);
+
+  target = frv_legitimize_target (icode, target);
+  op0 = gen_reg_rtx (DImode);
+  op1 = gen_reg_rtx (DImode);
+
+  /* The high half of each word is not explicitly initialized, so indicate
+     that the input operands are not live before this point.  */
+  emit_clobber (op0);
+  emit_clobber (op1);
+
+  /* Move each argument into the low half of its associated input word.  */
+  emit_move_insn (simplify_gen_subreg (HImode, op0, DImode, 2), arg1);
+  emit_move_insn (simplify_gen_subreg (HImode, op0, DImode, 6), arg2);
+  emit_move_insn (simplify_gen_subreg (HImode, op1, DImode, 2), arg3);
+  emit_move_insn (simplify_gen_subreg (HImode, op1, DImode, 6), arg4);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand the MCLRACC builtin.  This builtin takes a single accumulator
+   number as argument.  */
+
+static rtx
+frv_expand_mclracc_builtin (tree call)
+{
+  enum insn_code icode = CODE_FOR_mclracc;
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  pat = GEN_FCN (icode) (op0);
+  if (pat)
+    emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+/* Expand builtins that take no arguments.  */
+
+static rtx
+frv_expand_noargs_builtin (enum insn_code icode)
+{
+  rtx pat = GEN_FCN (icode) (const0_rtx);
+  if (pat)
+    emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+/* Expand MRDACC and MRDACCG.  These builtins take a single accumulator
+   number or accumulator guard number as argument and return an SI integer.  */
+
+static rtx
+frv_expand_mrdacc_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx target = gen_reg_rtx (SImode);
+  rtx op0 = frv_read_argument (call, 0);
+
+  op0 = frv_int_to_acc (icode, 1, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand MWTACC and MWTACCG.  These builtins take an accumulator or
+   accumulator guard as their first argument and an SImode value as their
+   second.  */
+
+static rtx
+frv_expand_mwtacc_builtin (enum insn_code icode, tree call)
+{
+  rtx pat;
+  rtx op0 = frv_read_argument (call, 0);
+  rtx op1 = frv_read_argument (call, 1);
+
+  op0 = frv_int_to_acc (icode, 0, op0);
+  if (! op0)
+    return NULL_RTX;
+
+  op1 = frv_legitimize_argument (icode, 1, op1);
+  pat = GEN_FCN (icode) (op0, op1);
+  if (pat)
+    emit_insn (pat);
+
+  return NULL_RTX;
+}
+
+/* Emit a move from SRC to DEST in SImode chunks.  This can be used
+   to move DImode values into and out of IACC0.  */
+
+static void
+frv_split_iacc_move (rtx dest, rtx src)
+{
+  enum machine_mode inner;
+  int i;
+
+  inner = GET_MODE (dest);
+  for (i = 0; i < GET_MODE_SIZE (inner); i += GET_MODE_SIZE (SImode))
+    emit_move_insn (simplify_gen_subreg (SImode, dest, inner, i),
+		    simplify_gen_subreg (SImode, src, inner, i));
+}
+
+/* Expand builtins.  */
+
+static rtx
+frv_expand_builtin (tree exp,
+                    rtx target,
+                    rtx subtarget ATTRIBUTE_UNUSED,
+                    enum machine_mode mode ATTRIBUTE_UNUSED,
+                    int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned fcode = (unsigned)DECL_FUNCTION_CODE (fndecl);
+  unsigned i;
+  struct builtin_description *d;
+
+  if (fcode < FRV_BUILTIN_FIRST_NONMEDIA && !TARGET_MEDIA)
+    {
+      error ("media functions are not available unless -mmedia is used");
+      return NULL_RTX;
+    }
+
+  switch (fcode)
+    {
+    case FRV_BUILTIN_MCOP1:
+    case FRV_BUILTIN_MCOP2:
+    case FRV_BUILTIN_MDUNPACKH:
+    case FRV_BUILTIN_MBTOHE:
+      if (! TARGET_MEDIA_REV1)
+	{
+	  error ("this media function is only available on the fr500");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_MQXMACHS:
+    case FRV_BUILTIN_MQXMACXHS:
+    case FRV_BUILTIN_MQMACXHS:
+    case FRV_BUILTIN_MADDACCS:
+    case FRV_BUILTIN_MSUBACCS:
+    case FRV_BUILTIN_MASACCS:
+    case FRV_BUILTIN_MDADDACCS:
+    case FRV_BUILTIN_MDSUBACCS:
+    case FRV_BUILTIN_MDASACCS:
+    case FRV_BUILTIN_MABSHS:
+    case FRV_BUILTIN_MDROTLI:
+    case FRV_BUILTIN_MCPLHI:
+    case FRV_BUILTIN_MCPLI:
+    case FRV_BUILTIN_MDCUTSSI:
+    case FRV_BUILTIN_MQSATHS:
+    case FRV_BUILTIN_MHSETLOS:
+    case FRV_BUILTIN_MHSETLOH:
+    case FRV_BUILTIN_MHSETHIS:
+    case FRV_BUILTIN_MHSETHIH:
+    case FRV_BUILTIN_MHDSETS:
+    case FRV_BUILTIN_MHDSETH:
+      if (! TARGET_MEDIA_REV2)
+	{
+	  error ("this media function is only available on the fr400"
+		 " and fr550");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_SMASS:
+    case FRV_BUILTIN_SMSSS:
+    case FRV_BUILTIN_SMU:
+    case FRV_BUILTIN_ADDSS:
+    case FRV_BUILTIN_SUBSS:
+    case FRV_BUILTIN_SLASS:
+    case FRV_BUILTIN_SCUTSS:
+    case FRV_BUILTIN_IACCreadll:
+    case FRV_BUILTIN_IACCreadl:
+    case FRV_BUILTIN_IACCsetll:
+    case FRV_BUILTIN_IACCsetl:
+      if (!TARGET_FR405_BUILTINS)
+	{
+	  error ("this builtin function is only available"
+		 " on the fr405 and fr450");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_PREFETCH:
+      if (!TARGET_FR500_FR550_BUILTINS)
+	{
+	  error ("this builtin function is only available on the fr500"
+		 " and fr550");
+	  return NULL_RTX;
+	}
+      break;
+
+    case FRV_BUILTIN_MQLCLRHS:
+    case FRV_BUILTIN_MQLMTHS:
+    case FRV_BUILTIN_MQSLLHI:
+    case FRV_BUILTIN_MQSRAHI:
+      if (!TARGET_MEDIA_FR450)
+	{
+	  error ("this builtin function is only available on the fr450");
+	  return NULL_RTX;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  /* Expand unique builtins.  */
+
+  switch (fcode)
+    {
+    case FRV_BUILTIN_MTRAP:
+      return frv_expand_noargs_builtin (CODE_FOR_mtrap);
+
+    case FRV_BUILTIN_MCLRACC:
+      return frv_expand_mclracc_builtin (exp);
+
+    case FRV_BUILTIN_MCLRACCA:
+      if (TARGET_ACC_8)
+	return frv_expand_noargs_builtin (CODE_FOR_mclracca8);
+      else
+	return frv_expand_noargs_builtin (CODE_FOR_mclracca4);
+
+    case FRV_BUILTIN_MRDACC:
+      return frv_expand_mrdacc_builtin (CODE_FOR_mrdacc, exp);
+
+    case FRV_BUILTIN_MRDACCG:
+      return frv_expand_mrdacc_builtin (CODE_FOR_mrdaccg, exp);
+
+    case FRV_BUILTIN_MWTACC:
+      return frv_expand_mwtacc_builtin (CODE_FOR_mwtacc, exp);
+
+    case FRV_BUILTIN_MWTACCG:
+      return frv_expand_mwtacc_builtin (CODE_FOR_mwtaccg, exp);
+
+    case FRV_BUILTIN_MDPACKH:
+      return frv_expand_mdpackh_builtin (exp, target);
+
+    case FRV_BUILTIN_IACCreadll:
+      {
+	rtx src = frv_read_iacc_argument (DImode, exp, 0);
+	if (target == 0 || !REG_P (target))
+	  target = gen_reg_rtx (DImode);
+	frv_split_iacc_move (target, src);
+	return target;
+      }
+
+    case FRV_BUILTIN_IACCreadl:
+      return frv_read_iacc_argument (SImode, exp, 0);
+
+    case FRV_BUILTIN_IACCsetll:
+      {
+	rtx dest = frv_read_iacc_argument (DImode, exp, 0);
+	rtx src = frv_read_argument (exp, 1);
+	frv_split_iacc_move (dest, force_reg (DImode, src));
+	return 0;
+      }
+
+    case FRV_BUILTIN_IACCsetl:
+      {
+	rtx dest = frv_read_iacc_argument (SImode, exp, 0);
+	rtx src = frv_read_argument (exp, 1);
+	emit_move_insn (dest, force_reg (SImode, src));
+	return 0;
+      }
+
+    default:
+      break;
+    }
+
+  /* Expand groups of builtins.  */
+
+  for (i = 0, d = bdesc_set; i < ARRAY_SIZE (bdesc_set); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_set_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_unop_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_binop_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_cut; i < ARRAY_SIZE (bdesc_cut); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_cut_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_2argimm; i < ARRAY_SIZE (bdesc_2argimm); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_binopimm_builtin (d->icode, exp, target);
+
+  for (i = 0, d = bdesc_void2arg; i < ARRAY_SIZE (bdesc_void2arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_voidbinop_builtin (d->icode, exp);
+
+  for (i = 0, d = bdesc_void3arg; i < ARRAY_SIZE (bdesc_void3arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_voidtriop_builtin (d->icode, exp);
+
+  for (i = 0, d = bdesc_voidacc; i < ARRAY_SIZE (bdesc_voidacc); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_voidaccop_builtin (d->icode, exp);
+
+  for (i = 0, d = bdesc_int_void2arg;
+       i < ARRAY_SIZE (bdesc_int_void2arg); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_int_void2arg (d->icode, exp);
+
+  for (i = 0, d = bdesc_prefetches;
+       i < ARRAY_SIZE (bdesc_prefetches); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_prefetches (d->icode, exp);
+
+  for (i = 0, d = bdesc_loads; i < ARRAY_SIZE (bdesc_loads); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_load_builtin (d->icode, TYPE_MODE (TREE_TYPE (exp)),
+				      exp, target);
+
+  for (i = 0, d = bdesc_stores; i < ARRAY_SIZE (bdesc_stores); i++, d++)
+    if (d->code == fcode)
+      return frv_expand_store_builtin (d->icode, exp);
+
+  return 0;
+}
+
+static bool
+frv_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+  const_tree section_name;
+
+  /* Don't apply the -G flag to internal compiler structures.  We
+     should leave such structures in the main data section, partly
+     for efficiency and partly because the size of some of them
+     (such as C++ typeinfos) is not known until later.  */
+  if (TREE_CODE (decl) != VAR_DECL || DECL_ARTIFICIAL (decl))
+    return false;
+
+  /* If we already know which section the decl should be in, see if
+     it's a small data section.  */
+  section_name = DECL_SECTION_NAME (decl);
+  if (section_name)
+    {
+      gcc_assert (TREE_CODE (section_name) == STRING_CST);
+      if (frv_string_begins_with (section_name, ".sdata"))
+	return true;
+      if (frv_string_begins_with (section_name, ".sbss"))
+	return true;
+      return false;
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  if (size > 0 && size <= g_switch_value)
+    return true;
+
+  return false;
+}
+
+static bool
+frv_rtx_costs (rtx x,
+               int code ATTRIBUTE_UNUSED,
+               int outer_code ATTRIBUTE_UNUSED,
+               int *total,
+	       bool speed ATTRIBUTE_UNUSED)
+{
+  if (outer_code == MEM)
+    {
+      /* Don't differentiate between memory addresses.  All the ones
+	 we accept have equal cost.  */
+      *total = COSTS_N_INSNS (0);
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* Make 12-bit integers really cheap.  */
+      if (IN_RANGE (INTVAL (x), -2048, 2047))
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* Fall through.  */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case NOT:
+    case NEG:
+    case COMPARE:
+      if (GET_MODE (x) == SImode)
+	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE (x) == DImode)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (3);
+      return true;
+
+    case MULT:
+      if (GET_MODE (x) == SImode)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (6);	/* guess */
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (18);
+      return true;
+
+    case MEM:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+static void
+frv_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (ctors_section);
+  assemble_align (POINTER_SIZE);
+  if (TARGET_FDPIC)
+    {
+      int ok = frv_assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, 1);
+
+      gcc_assert (ok);
+      return;
+    }
+  assemble_integer_with_op ("\t.picptr\t", symbol);
+}
+
+static void
+frv_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
+{
+  switch_to_section (dtors_section);
+  assemble_align (POINTER_SIZE);
+  if (TARGET_FDPIC)
+    {
+      int ok = frv_assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, 1);
+
+      gcc_assert (ok);
+      return;
+    }
+  assemble_integer_with_op ("\t.picptr\t", symbol);
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+frv_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, FRV_STRUCT_VALUE_REGNUM);
+}
+
+#define TLS_BIAS (2048 - 16)
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+frv_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.picptr\ttlsmoff(", file);
+  /* We want the unbiased TLS offset, so add the bias to the
+     expression, such that the implicit biasing cancels out.  */
+  output_addr_const (file, plus_constant (x, TLS_BIAS));
+  fputs (")", file);
+}
+
+#include "gt-frv.h"
diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
new file mode 100644
index 000000000..299a85676
--- /dev/null
+++ b/gcc/config/frv/frv.h
@@ -0,0 +1,2188 @@
+/* Target macros for the FRV port of GCC.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009,
+   2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __FRV_H__
+#define __FRV_H__
+
+/* Frv general purpose macros.  */
+/* Align an address.  */
+#define ADDR_ALIGN(addr,align) (((addr) + (align) - 1) & ~((align) - 1))
+
+/* Driver configuration.  */
+
+/* -fpic and -fPIC used to imply the -mlibrary-pic multilib, but with
+    FDPIC which multilib to use depends on whether FDPIC is in use or
+    not.  The trick we use is to introduce -multilib-library-pic as a
+    pseudo-flag that selects the library-pic multilib, and map fpic
+    and fPIC to it only if fdpic is not selected.  Also, if fdpic is
+    selected and no PIC/PIE options are present, we imply -fPIE.
+    Otherwise, if -fpic or -fPIC are enabled and we're optimizing for
+    speed, or if we have -On with n>=3, enable inlining of PLTs.  As
+    for -mgprel-ro, we want to enable it by default, but not for -fpic or
+    -fpie.  */
+
+#define DRIVER_SELF_SPECS SUBTARGET_DRIVER_SELF_SPECS \
+"%{mno-pack:\
+   %{!mhard-float:-msoft-float}\
+   %{!mmedia:-mno-media}}\
+ %{!mfdpic:%{fpic|fPIC: -multilib-library-pic}}\
+ %{mfdpic:%{!fpic:%{!fpie:%{!fPIC:%{!fPIE:\
+   	    %{!fno-pic:%{!fno-pie:%{!fno-PIC:%{!fno-PIE:-fPIE}}}}}}}} \
+	  %{!mno-inline-plt:%{O*:%{!O0:%{!Os:%{fpic|fPIC:-minline-plt} \
+                    %{!fpic:%{!fPIC:%{!O:%{!O1:%{!O2:-minline-plt}}}}}}}}} \
+	  %{!mno-gprel-ro:%{!fpic:%{!fpie:-mgprel-ro}}}} \
+"
+#ifndef SUBTARGET_DRIVER_SELF_SPECS
+# define SUBTARGET_DRIVER_SELF_SPECS
+#endif
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{G*} \
+%{mtomcat-stats} \
+%{!mno-eflags: \
+    %{mcpu=*} \
+    %{mgpr-*} %{mfpr-*} \
+    %{msoft-float} %{mhard-float} \
+    %{mdword} %{mno-dword} \
+    %{mdouble} %{mno-double} \
+    %{mmedia} %{mno-media} \
+    %{mmuladd} %{mno-muladd} \
+    %{mpack} %{mno-pack} \
+    %{mno-fdpic:-mnopic} %{mfdpic} \
+    %{fpic|fpie: -mpic} %{fPIC|fPIE: -mPIC} %{mlibrary-pic}}"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0%O%s frvbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "frvend%O%s"
+
+
+#define MASK_DEFAULT_FRV	\
+  (MASK_MEDIA			\
+   | MASK_DOUBLE		\
+   | MASK_MULADD		\
+   | MASK_DWORD			\
+   | MASK_PACK)
+
+#define MASK_DEFAULT_FR500 \
+  (MASK_MEDIA | MASK_DWORD | MASK_PACK)
+
+#define MASK_DEFAULT_FR550 \
+  (MASK_MEDIA | MASK_DWORD | MASK_PACK)
+
+#define MASK_DEFAULT_FR450	\
+  (MASK_GPR_32			\
+   | MASK_FPR_32		\
+   | MASK_MEDIA			\
+   | MASK_SOFT_FLOAT		\
+   | MASK_DWORD			\
+   | MASK_PACK)
+
+#define MASK_DEFAULT_FR400	\
+  (MASK_GPR_32			\
+   | MASK_FPR_32		\
+   | MASK_MEDIA			\
+   | MASK_ACC_4			\
+   | MASK_SOFT_FLOAT		\
+   | MASK_DWORD			\
+   | MASK_PACK)
+
+#define MASK_DEFAULT_SIMPLE \
+  (MASK_GPR_32 | MASK_SOFT_FLOAT)
+
+/* A C string constant that tells the GCC driver program options to pass to
+   `cc1'.  It can also specify how to translate options you give to GCC into
+   options for GCC to pass to the `cc1'.
+
+   Do not define this macro if it does not need to do anything.  */
+/* For ABI compliance, we need to put bss data into the normal data section.  */
+#define CC1_SPEC "%{G*}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+%{h*} %{v:-V} \
+%{mfdpic:-melf32frvfd -z text} \
+%{static:-dn -Bstatic} \
+%{shared:-Bdynamic} \
+%{symbolic:-Bsymbolic} \
+%{G*}"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "--start-group -lc -lsim --end-group"
+
+#ifndef CPU_TYPE
+#define CPU_TYPE		FRV_CPU_FR500
+#endif
+
+/* Run-time target specifications */
+
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      int issue_rate;							\
+									\
+      builtin_define ("__frv__");					\
+      builtin_assert ("machine=frv");					\
+									\
+      issue_rate = frv_issue_rate ();					\
+      if (issue_rate > 1)						\
+	builtin_define_with_int_value ("__FRV_VLIW__", issue_rate);	\
+      builtin_define_with_int_value ("__FRV_GPR__", NUM_GPRS);		\
+      builtin_define_with_int_value ("__FRV_FPR__", NUM_FPRS);		\
+      builtin_define_with_int_value ("__FRV_ACC__", NUM_ACCS);		\
+									\
+      switch (frv_cpu_type)						\
+	{								\
+	case FRV_CPU_GENERIC:						\
+	  builtin_define ("__CPU_GENERIC__");				\
+	  break;							\
+	case FRV_CPU_FR550:						\
+	  builtin_define ("__CPU_FR550__");				\
+	  break;							\
+	case FRV_CPU_FR500:						\
+	case FRV_CPU_TOMCAT:						\
+	  builtin_define ("__CPU_FR500__");				\
+	  break;							\
+	case FRV_CPU_FR450:						\
+	  builtin_define ("__CPU_FR450__");				\
+	  break;							\
+	case FRV_CPU_FR405:						\
+	  builtin_define ("__CPU_FR405__");				\
+	  break;							\
+	case FRV_CPU_FR400:						\
+	  builtin_define ("__CPU_FR400__");				\
+	  break;							\
+	case FRV_CPU_FR300:						\
+	case FRV_CPU_SIMPLE:						\
+	  builtin_define ("__CPU_FR300__");				\
+	  break;							\
+	}								\
+									\
+      if (TARGET_HARD_FLOAT)						\
+	builtin_define ("__FRV_HARD_FLOAT__");				\
+      if (TARGET_DWORD)							\
+	builtin_define ("__FRV_DWORD__");				\
+      if (TARGET_FDPIC)							\
+	builtin_define ("__FRV_FDPIC__");				\
+      if (flag_leading_underscore > 0)					\
+	builtin_define ("__FRV_UNDERSCORE__");				\
+    }									\
+  while (0)
+
+
+#define TARGET_HAS_FPRS		(TARGET_HARD_FLOAT || TARGET_MEDIA)
+
+#define NUM_GPRS		(TARGET_GPR_32? 32 : 64)
+#define NUM_FPRS		(!TARGET_HAS_FPRS? 0 : TARGET_FPR_32? 32 : 64)
+#define NUM_ACCS		(!TARGET_MEDIA? 0 : TARGET_ACC_4? 4 : 8)
+
+/* X is a valid accumulator number if (X & ACC_MASK) == X.  */
+#define ACC_MASK						\
+  (!TARGET_MEDIA ? 0						\
+   : TARGET_ACC_4 ? 3						\
+   : frv_cpu_type == FRV_CPU_FR450 ? 11				\
+   : 7)
+
+/* Macros to identify the blend of media instructions available.  Revision 1
+   is the one found on the FR500.  Revision 2 includes the changes made for
+   the FR400.
+
+   Treat the generic processor as a revision 1 machine for now, for
+   compatibility with earlier releases.  */
+
+#define TARGET_MEDIA_REV1					\
+  (TARGET_MEDIA							\
+   && (frv_cpu_type == FRV_CPU_GENERIC				\
+       || frv_cpu_type == FRV_CPU_FR500))
+
+#define TARGET_MEDIA_REV2					\
+  (TARGET_MEDIA							\
+   && (frv_cpu_type == FRV_CPU_FR400				\
+       || frv_cpu_type == FRV_CPU_FR405				\
+       || frv_cpu_type == FRV_CPU_FR450				\
+       || frv_cpu_type == FRV_CPU_FR550))
+
+#define TARGET_MEDIA_FR450					\
+  (frv_cpu_type == FRV_CPU_FR450)
+
+#define TARGET_FR500_FR550_BUILTINS				\
+   (frv_cpu_type == FRV_CPU_FR500				\
+    || frv_cpu_type == FRV_CPU_FR550)
+
+#define TARGET_FR405_BUILTINS					\
+  (frv_cpu_type == FRV_CPU_FR405				\
+   || frv_cpu_type == FRV_CPU_FR450)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+/* This macro is a C statement to print on `stderr' a string describing the
+   particular machine description choice.  Every machine description should
+   define `TARGET_VERSION'.  For example:
+
+        #ifdef MOTOROLA
+        #define TARGET_VERSION \
+          fprintf (stderr, " (68k, Motorola syntax)");
+        #else
+        #define TARGET_VERSION \
+          fprintf (stderr, " (68k, MIT syntax)");
+        #endif  */
+#define TARGET_VERSION fprintf (stderr, _(" (frv)"))
+
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL) (TARGET_ALIGN_LABELS ? 3 : 0)
+
+/* Small Data Area Support.  */
+/* Maximum size of variables that go in .sdata/.sbss.
+   The -msdata=foo switch also controls how small variables are handled.  */
+#ifndef SDATA_DEFAULT_SIZE
+#define SDATA_DEFAULT_SIZE 8
+#endif
+
+
+/* Storage Layout */
+
+/* Define this macro to have the value 1 if the most significant bit in a byte
+   has the lowest number; otherwise define it to have the value zero.  This
+   means that bit-field instructions count from the most significant bit.  If
+   the machine has no bit-field instructions, then this must still be defined,
+   but it doesn't matter which value it is defined to.  This macro need not be
+   a constant.
+
+   This macro does not affect the way structure fields are packed into bytes or
+   words; that is controlled by `BYTES_BIG_ENDIAN'.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this macro to have the value 1 if the most significant byte in a word
+   has the lowest number.  This macro need not be a constant.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this macro to have the value 1 if, in a multiword object, the most
+   significant word has the lowest number.  This applies to both memory
+   locations and registers; GCC fundamentally assumes that the order of
+   words in memory is the same as the order in registers.  This macro need not
+   be a constant.  */
+#define WORDS_BIG_ENDIAN 1
+
+/* Number of storage units in a word; normally 4.  */
+#define UNITS_PER_WORD 4
+
+/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and
+   which has the specified mode and signedness is to be stored in a register.
+   This macro is only called when TYPE is a scalar type.
+
+   On most RISC machines, which only have operations that operate on a full
+   register, define this macro to set M to `word_mode' if M is an integer mode
+   narrower than `BITS_PER_WORD'.  In most cases, only integer modes should be
+   widened because wider-precision floating-point operations are usually more
+   expensive than their narrower counterparts.
+
+   For most machines, the macro definition does not change UNSIGNEDP.  However,
+   some machines, have instructions that preferentially handle either signed or
+   unsigned quantities of certain modes.  For example, on the DEC Alpha, 32-bit
+   loads from memory and 32-bit add instructions sign-extend the result to 64
+   bits.  On such machines, set UNSIGNEDP according to which kind of extension
+   is more efficient.
+
+   Do not define this macro if it would never modify MODE.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+	  && GET_MODE_SIZE (MODE) < 4)		\
+	(MODE) = SImode;			\
+    }						\
+  while (0)
+
+/* Normal alignment required for function parameters on the stack, in bits.
+   All stack parameters receive at least this much alignment regardless of data
+   type.  On most machines, this is the same as the size of an integer.  */
+#define PARM_BOUNDARY 32
+
+/* Define this macro if you wish to preserve a certain alignment for the stack
+   pointer.  The definition is a C expression for the desired alignment
+   (measured in bits).
+
+   If `PUSH_ROUNDING' is not defined, the stack will always be aligned to the
+   specified boundary.  If `PUSH_ROUNDING' is defined and specifies a less
+   strict alignment than `STACK_BOUNDARY', the stack may be momentarily
+   unaligned while pushing arguments.  */
+#define STACK_BOUNDARY 64
+
+/* Alignment required for a function entry point, in bits.  */
+#define FUNCTION_BOUNDARY 128
+
+/* Biggest alignment that any data type can require on this machine,
+   in bits.  */
+#define BIGGEST_ALIGNMENT 64
+
+/* @@@ A hack, needed because libobjc wants to use ADJUST_FIELD_ALIGN for
+   some reason.  */
+#ifdef IN_TARGET_LIBS
+#define BIGGEST_FIELD_ALIGNMENT 64
+#else
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  GCC uses this
+   value instead of the value in `BIGGEST_ALIGNMENT' or
+   `BIGGEST_FIELD_ALIGNMENT', if defined, for structure fields only.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) 				\
+  frv_adjust_field_align (FIELD, COMPUTED)
+#endif
+
+/* If defined, a C expression to compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that the object
+   would ordinarily have.  The value of this macro is used instead of that
+   alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size data to make
+   it all fit in fewer cache lines.  Another is to cause character arrays to be
+   word-aligned so that `strcpy' calls that copy constants to character arrays
+   can be done inline.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a constant that
+   is being placed in memory.  CONSTANT is the constant and ALIGN is the
+   alignment that the object would ordinarily have.  The value of this macro is
+   used instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string constants
+   to be word aligned so that `strcpy' calls that copy constants can be done
+   inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Define this macro to be the value 1 if instructions will fail to work if
+   given data not on the nominal alignment.  If instructions will merely go
+   slower in that case, define this macro as 0.  */
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+
+/* Layout of Source Language Data Types.  */
+
+#define CHAR_TYPE_SIZE         8
+#define SHORT_TYPE_SIZE       16
+#define INT_TYPE_SIZE         32
+#define LONG_TYPE_SIZE        32
+#define LONG_LONG_TYPE_SIZE   64
+#define FLOAT_TYPE_SIZE       32
+#define DOUBLE_TYPE_SIZE      64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* An expression whose value is 1 or 0, according to whether the type `char'
+   should be signed or unsigned by default.  The user can always override this
+   default with the options `-fsigned-char' and `-funsigned-char'.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+
+/* General purpose registers.  */
+#define GPR_FIRST       0                       /* First gpr */
+#define GPR_LAST        (GPR_FIRST + 63)        /* Last gpr */
+#define GPR_R0          GPR_FIRST               /* R0, constant 0 */
+#define GPR_FP          (GPR_FIRST + 2)         /* Frame pointer */
+#define GPR_SP          (GPR_FIRST + 1)         /* Stack pointer */
+						/* small data register */
+#define SDA_BASE_REG    ((unsigned)(TARGET_FDPIC ? -1 : flag_pic ? PIC_REGNO : (GPR_FIRST + 16)))
+#define PIC_REGNO       (GPR_FIRST + (TARGET_FDPIC?15:17))        /* PIC register.  */
+#define FDPIC_FPTR_REGNO  (GPR_FIRST + 14)        /* uClinux PIC function pointer register.  */
+#define FDPIC_REGNO   (GPR_FIRST + 15)        /* uClinux PIC register.  */
+
+#define HARD_REGNO_RENAME_OK(from,to) (TARGET_FDPIC ? ((to) != FDPIC_REG) : 1)
+
+#define OUR_FDPIC_REG	get_hard_reg_initial_val (SImode, FDPIC_REGNO)
+
+#define FPR_FIRST       64			/* First FP reg */
+#define FPR_LAST        127			/* Last  FP reg */
+
+#define GPR_TEMP_NUM	frv_condexec_temps	/* # gprs to reserve for temps */
+
+/* We reserve the last CR and CCR in each category to be used as a reload
+   register to reload the CR/CCR registers.  This is a kludge.  */
+#define CC_FIRST	128			/* First ICC/FCC reg */
+#define CC_LAST		135			/* Last  ICC/FCC reg */
+#define ICC_FIRST	(CC_FIRST + 4)		/* First ICC reg */
+#define ICC_LAST	(CC_FIRST + 7)		/* Last  ICC reg */
+#define ICC_TEMP	(CC_FIRST + 7)		/* Temporary ICC reg */
+#define FCC_FIRST	(CC_FIRST)		/* First FCC reg */
+#define FCC_LAST	(CC_FIRST + 3)		/* Last  FCC reg */
+
+/* Amount to shift a value to locate a ICC or FCC register in the CCR
+   register and shift it to the bottom 4 bits.  */
+#define CC_SHIFT_RIGHT(REGNO) (((REGNO) - CC_FIRST) << 2)
+
+/* Mask to isolate a single ICC/FCC value.  */
+#define CC_MASK		0xf
+
+/* Masks to isolate the various bits in an ICC field.  */
+#define ICC_MASK_N	0x8	/* negative */
+#define ICC_MASK_Z	0x4	/* zero */
+#define ICC_MASK_V	0x2	/* overflow */
+#define ICC_MASK_C	0x1	/* carry */
+
+/* Mask to isolate the N/Z flags in an ICC.  */
+#define ICC_MASK_NZ (ICC_MASK_N | ICC_MASK_Z)
+
+/* Mask to isolate the Z/C flags in an ICC.  */
+#define ICC_MASK_ZC (ICC_MASK_Z | ICC_MASK_C)
+
+/* Masks to isolate the various bits in a FCC field.  */
+#define FCC_MASK_E	0x8	/* equal */
+#define FCC_MASK_L	0x4	/* less than */
+#define FCC_MASK_G	0x2	/* greater than */
+#define FCC_MASK_U	0x1	/* unordered */
+
+/* For CCR registers, the machine wants CR4..CR7 to be used for integer
+   code and CR0..CR3 to be used for floating point.  */
+#define CR_FIRST	136			/* First CCR */
+#define CR_LAST		143			/* Last  CCR */
+#define CR_NUM		(CR_LAST-CR_FIRST+1)	/* # of CCRs (8) */
+#define ICR_FIRST	(CR_FIRST + 4)		/* First integer CCR */
+#define ICR_LAST	(CR_FIRST + 7)		/* Last  integer CCR */
+#define ICR_TEMP	ICR_LAST		/* Temp  integer CCR */
+#define FCR_FIRST	(CR_FIRST + 0)		/* First float CCR */
+#define FCR_LAST	(CR_FIRST + 3)		/* Last  float CCR */
+
+/* Amount to shift a value to locate a CR register in the CCCR special purpose
+   register and shift it to the bottom 2 bits.  */
+#define CR_SHIFT_RIGHT(REGNO) (((REGNO) - CR_FIRST) << 1)
+
+/* Mask to isolate a single CR value.  */
+#define CR_MASK		0x3
+
+#define ACC_FIRST	144			/* First acc register */
+#define ACC_LAST	155			/* Last  acc register */
+
+#define ACCG_FIRST	156			/* First accg register */
+#define ACCG_LAST	167			/* Last  accg register */
+
+#define AP_FIRST	168			/* fake argument pointer */
+
+#define SPR_FIRST	169
+#define SPR_LAST	172
+#define LR_REGNO	(SPR_FIRST)
+#define LCR_REGNO	(SPR_FIRST + 1)
+#define IACC_FIRST	(SPR_FIRST + 2)
+#define IACC_LAST	(SPR_FIRST + 3)
+
+#define GPR_P(R)	IN_RANGE (R, GPR_FIRST, GPR_LAST)
+#define GPR_OR_AP_P(R)	(GPR_P (R) || (R) == ARG_POINTER_REGNUM)
+#define FPR_P(R)	IN_RANGE (R, FPR_FIRST, FPR_LAST)
+#define CC_P(R)		IN_RANGE (R, CC_FIRST, CC_LAST)
+#define ICC_P(R)	IN_RANGE (R, ICC_FIRST, ICC_LAST)
+#define FCC_P(R)	IN_RANGE (R, FCC_FIRST, FCC_LAST)
+#define CR_P(R)		IN_RANGE (R, CR_FIRST, CR_LAST)
+#define ICR_P(R)	IN_RANGE (R, ICR_FIRST, ICR_LAST)
+#define FCR_P(R)	IN_RANGE (R, FCR_FIRST, FCR_LAST)
+#define ACC_P(R)	IN_RANGE (R, ACC_FIRST, ACC_LAST)
+#define ACCG_P(R)	IN_RANGE (R, ACCG_FIRST, ACCG_LAST)
+#define SPR_P(R)	IN_RANGE (R, SPR_FIRST, SPR_LAST)
+
+#define GPR_OR_PSEUDO_P(R)	(GPR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define FPR_OR_PSEUDO_P(R)	(FPR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define GPR_AP_OR_PSEUDO_P(R)	(GPR_OR_AP_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define CC_OR_PSEUDO_P(R)	(CC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ICC_OR_PSEUDO_P(R)	(ICC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define FCC_OR_PSEUDO_P(R)	(FCC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define CR_OR_PSEUDO_P(R)	(CR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ICR_OR_PSEUDO_P(R)	(ICR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define FCR_OR_PSEUDO_P(R)	(FCR_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ACC_OR_PSEUDO_P(R)	(ACC_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+#define ACCG_OR_PSEUDO_P(R)	(ACCG_P (R) || (R) >= FIRST_PSEUDO_REGISTER)
+
+#define MAX_STACK_IMMEDIATE_OFFSET 2047
+
+
+/* Register Basics.  */
+
+/* Number of hardware registers known to the compiler.  They receive numbers 0
+   through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number
+   really is assigned the number `FIRST_PSEUDO_REGISTER'.  */
+#define FIRST_PSEUDO_REGISTER (SPR_LAST + 1)
+
+/* The first/last register that can contain the arguments to a function.  */
+#define FIRST_ARG_REGNUM	(GPR_FIRST + 8)
+#define LAST_ARG_REGNUM		(FIRST_ARG_REGNUM + FRV_NUM_ARG_REGS - 1)
+
+/* Registers used by the exception handling functions.  These should be
+   registers that are not otherwise used by the calling sequence.  */
+#define FIRST_EH_REGNUM		14
+#define LAST_EH_REGNUM		15
+
+/* Scratch registers used in the prologue, epilogue and thunks.
+   OFFSET_REGNO is for loading constant addends that are too big for a
+   single instruction.  TEMP_REGNO is used for transferring SPRs to and from
+   the stack, and various other activities.  */
+#define OFFSET_REGNO		4
+#define TEMP_REGNO		5
+
+/* Registers used in the prologue.  OLD_SP_REGNO is the old stack pointer,
+   which is sometimes used to set up the frame pointer.  */
+#define OLD_SP_REGNO		6
+
+/* Registers used in the epilogue.  STACKADJ_REGNO stores the exception
+   handler's stack adjustment.  */
+#define STACKADJ_REGNO		6
+
+/* Registers used in thunks.  JMP_REGNO is used for loading the target
+   address.  */
+#define JUMP_REGNO		6
+
+#define EH_RETURN_DATA_REGNO(N)	((N) <= (LAST_EH_REGNUM - FIRST_EH_REGNUM)? \
+				 (N) + FIRST_EH_REGNUM : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (SImode, STACKADJ_REGNO)
+#define EH_RETURN_HANDLER_RTX   RETURN_ADDR_RTX (0, frame_pointer_rtx)
+
+#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNO)
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.  These would include the stack pointer, the frame pointer
+   (except on machines where that can be used as a general register when no
+   frame pointer is needed), the program counter on machines where that is
+   considered one of the addressable registers, and any other numbered register
+   with a standard use.
+
+   This information is expressed as a sequence of numbers, separated by commas
+   and surrounded by braces.  The Nth number is 1 if register N is fixed, 0
+   otherwise.
+
+   The table initialized from this macro, and the table initialized by the
+   following one, may be overridden at run time either automatically, by the
+   actions of the macro `CONDITIONAL_REGISTER_USAGE', or by the user with the
+   command options `-ffixed-REG', `-fcall-used-REG' and `-fcall-saved-REG'.  */
+
+/* gr0  -- Hard Zero
+   gr1  -- Stack Pointer
+   gr2  -- Frame Pointer
+   gr3  -- Hidden Parameter
+   gr16 -- Small Data reserved
+   gr17 -- Pic reserved
+   gr28 -- OS reserved
+   gr29 -- OS reserved
+   gr30 -- OS reserved
+   gr31 -- OS reserved
+   cr3  -- reserved to reload FCC registers.
+   cr7  -- reserved to reload ICC registers.  */
+#define FIXED_REGISTERS							\
+{	/* Integer Registers */						\
+	1, 1, 1, 1, 0, 0, 0, 0,		/* 000-007, gr0  - gr7  */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 008-015, gr8  - gr15 */	\
+	1, 1, 0, 0, 0, 0, 0, 0,		/* 016-023, gr16 - gr23 */	\
+	0, 0, 0, 0, 1, 1, 1, 1,		/* 024-031, gr24 - gr31 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 032-039, gr32 - gr39 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 040-040, gr48 - gr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 048-055, gr48 - gr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 056-063, gr56 - gr63 */	\
+	/* Float Registers */						\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 064-071, fr0  - fr7  */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 072-079, fr8  - fr15 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 080-087, fr16 - fr23 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 088-095, fr24 - fr31 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 096-103, fr32 - fr39 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 104-111, fr48 - fr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 112-119, fr48 - fr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 120-127, fr56 - fr63 */	\
+	/* Condition Code Registers */					\
+	0, 0, 0, 0,			/* 128-131, fcc0 - fcc3  */	\
+	0, 0, 0, 1,			/* 132-135, icc0 - icc3 */	\
+	/* Conditional execution Registers (CCR) */			\
+	0, 0, 0, 0, 0, 0, 0, 1,		/* 136-143, cr0 - cr7 */	\
+	/* Accumulators */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 144-151, acc0  - acc7 */	\
+	1, 1, 1, 1,			/* 152-155, acc8  - acc11 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 156-163, accg0 - accg7 */	\
+	1, 1, 1, 1,			/* 164-167, accg8 - accg11 */	\
+	/* Other registers */						\
+	1,				/* 168, AP   - fake arg ptr */	\
+	1,				/* 169, LR   - Link register*/	\
+	0,				/* 170, LCR  - Loop count reg*/	\
+	1, 1				/* 171-172, iacc0 */		\
+}
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
+   general) by function calls as well as for fixed registers.  This macro
+   therefore identifies the registers that are not available for general
+   allocation of values that must live across function calls.
+
+   If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically
+   saves it on function entry and restores it on function exit, if the register
+   is used within the function.  */
+#define CALL_USED_REGISTERS						\
+{	/* Integer Registers */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 000-007, gr0  - gr7  */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 008-015, gr8  - gr15 */	\
+	1, 1, 0, 0, 0, 0, 0, 0,		/* 016-023, gr16 - gr23 */	\
+	0, 0, 0, 0, 1, 1, 1, 1,		/* 024-031, gr24 - gr31 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 032-039, gr32 - gr39 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 040-040, gr48 - gr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 048-055, gr48 - gr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 056-063, gr56 - gr63 */	\
+	/* Float Registers */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 064-071, fr0  - fr7  */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 072-079, fr8  - fr15 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 080-087, fr16 - fr23 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 088-095, fr24 - fr31 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 096-103, fr32 - fr39 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 104-111, fr48 - fr47 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 112-119, fr48 - fr55 */	\
+	0, 0, 0, 0, 0, 0, 0, 0,		/* 120-127, fr56 - fr63 */	\
+	/* Condition Code Registers */					\
+	1, 1, 1, 1,			/* 128-131, fcc0 - fcc3 */	\
+	1, 1, 1, 1,			/* 132-135, icc0 - icc3  */	\
+	/* Conditional execution Registers (CCR) */			\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 136-143, cr0 - cr7 */	\
+	/* Accumulators */						\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 144-151, acc0 - acc7 */	\
+	1, 1, 1, 1,			/* 152-155, acc8 - acc11 */	\
+	1, 1, 1, 1, 1, 1, 1, 1,		/* 156-163, accg0 - accg7 */	\
+	1, 1, 1, 1,			/* 164-167, accg8 - accg11 */	\
+	/* Other registers */						\
+	1,				/* 168, AP  - fake arg ptr */	\
+	1,				/* 169, LR  - Link register*/	\
+	1,				/* 170, LCR - Loop count reg */	\
+	1, 1				/* 171-172, iacc0 */		\
+}
+
+
+/* Order of allocation of registers.  */
+
+/* If defined, an initializer for a vector of integers, containing the numbers
+   of hard registers in the order in which GCC should prefer to use them
+   (from most preferred to least).
+
+   If this macro is not defined, registers are used lowest numbered first (all
+   else being equal).
+
+   One use of this macro is on machines where the highest numbered registers
+   must always be saved and the save-multiple-registers instruction supports
+   only sequences of consecutive registers.  On such machines, define
+   `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered
+   allocatable register first.  */
+
+/* On the FRV, allocate GR16 and GR17 after other saved registers so that we
+   have a better chance of allocating 2 registers at a time and can use the
+   double word load/store instructions in the prologue.  */
+#define REG_ALLOC_ORDER							\
+{									\
+  /* volatile registers */						\
+  GPR_FIRST  +  4, GPR_FIRST  +  5, GPR_FIRST  +  6, GPR_FIRST 	+  7,	\
+  GPR_FIRST  +  8, GPR_FIRST  +  9, GPR_FIRST  + 10, GPR_FIRST 	+ 11,	\
+  GPR_FIRST  + 12, GPR_FIRST  + 13, GPR_FIRST  + 14, GPR_FIRST 	+ 15,	\
+  GPR_FIRST  + 32, GPR_FIRST  + 33, GPR_FIRST  + 34, GPR_FIRST 	+ 35,	\
+  GPR_FIRST  + 36, GPR_FIRST  + 37, GPR_FIRST  + 38, GPR_FIRST 	+ 39,	\
+  GPR_FIRST  + 40, GPR_FIRST  + 41, GPR_FIRST  + 42, GPR_FIRST 	+ 43,	\
+  GPR_FIRST  + 44, GPR_FIRST  + 45, GPR_FIRST  + 46, GPR_FIRST 	+ 47,	\
+									\
+  FPR_FIRST  +  0, FPR_FIRST  +  1, FPR_FIRST  +  2, FPR_FIRST 	+  3,	\
+  FPR_FIRST  +  4, FPR_FIRST  +  5, FPR_FIRST  +  6, FPR_FIRST 	+  7,	\
+  FPR_FIRST  +  8, FPR_FIRST  +  9, FPR_FIRST  + 10, FPR_FIRST 	+ 11,	\
+  FPR_FIRST  + 12, FPR_FIRST  + 13, FPR_FIRST  + 14, FPR_FIRST 	+ 15,	\
+  FPR_FIRST  + 32, FPR_FIRST  + 33, FPR_FIRST  + 34, FPR_FIRST 	+ 35,	\
+  FPR_FIRST  + 36, FPR_FIRST  + 37, FPR_FIRST  + 38, FPR_FIRST 	+ 39,	\
+  FPR_FIRST  + 40, FPR_FIRST  + 41, FPR_FIRST  + 42, FPR_FIRST 	+ 43,	\
+  FPR_FIRST  + 44, FPR_FIRST  + 45, FPR_FIRST  + 46, FPR_FIRST 	+ 47,	\
+									\
+  ICC_FIRST  +  0, ICC_FIRST  +  1, ICC_FIRST  +  2, ICC_FIRST 	+  3,	\
+  FCC_FIRST  +  0, FCC_FIRST  +  1, FCC_FIRST  +  2, FCC_FIRST 	+  3,	\
+  CR_FIRST   +  0, CR_FIRST   +  1, CR_FIRST   +  2, CR_FIRST  	+  3,	\
+  CR_FIRST   +  4, CR_FIRST   +  5, CR_FIRST   +  6, CR_FIRST  	+  7,	\
+									\
+  /* saved registers */							\
+  GPR_FIRST  + 18, GPR_FIRST  + 19,					\
+  GPR_FIRST  + 20, GPR_FIRST  + 21, GPR_FIRST  + 22, GPR_FIRST 	+ 23,	\
+  GPR_FIRST  + 24, GPR_FIRST  + 25, GPR_FIRST  + 26, GPR_FIRST 	+ 27,	\
+  GPR_FIRST  + 48, GPR_FIRST  + 49, GPR_FIRST  + 50, GPR_FIRST 	+ 51,	\
+  GPR_FIRST  + 52, GPR_FIRST  + 53, GPR_FIRST  + 54, GPR_FIRST 	+ 55,	\
+  GPR_FIRST  + 56, GPR_FIRST  + 57, GPR_FIRST  + 58, GPR_FIRST 	+ 59,	\
+  GPR_FIRST  + 60, GPR_FIRST  + 61, GPR_FIRST  + 62, GPR_FIRST 	+ 63,	\
+  GPR_FIRST  + 16, GPR_FIRST  + 17,					\
+									\
+  FPR_FIRST  + 16, FPR_FIRST  + 17, FPR_FIRST  + 18, FPR_FIRST 	+ 19,	\
+  FPR_FIRST  + 20, FPR_FIRST  + 21, FPR_FIRST  + 22, FPR_FIRST 	+ 23,	\
+  FPR_FIRST  + 24, FPR_FIRST  + 25, FPR_FIRST  + 26, FPR_FIRST 	+ 27,	\
+  FPR_FIRST  + 28, FPR_FIRST  + 29, FPR_FIRST  + 30, FPR_FIRST 	+ 31,	\
+  FPR_FIRST  + 48, FPR_FIRST  + 49, FPR_FIRST  + 50, FPR_FIRST 	+ 51,	\
+  FPR_FIRST  + 52, FPR_FIRST  + 53, FPR_FIRST  + 54, FPR_FIRST 	+ 55,	\
+  FPR_FIRST  + 56, FPR_FIRST  + 57, FPR_FIRST  + 58, FPR_FIRST 	+ 59,	\
+  FPR_FIRST  + 60, FPR_FIRST  + 61, FPR_FIRST  + 62, FPR_FIRST 	+ 63,	\
+									\
+  /* special or fixed registers */					\
+  GPR_FIRST  +  0, GPR_FIRST  +  1, GPR_FIRST  +  2, GPR_FIRST 	+  3,	\
+  GPR_FIRST  + 28, GPR_FIRST  + 29, GPR_FIRST  + 30, GPR_FIRST 	+ 31,	\
+  ACC_FIRST  +  0, ACC_FIRST  +  1, ACC_FIRST  +  2, ACC_FIRST 	+  3,	\
+  ACC_FIRST  +  4, ACC_FIRST  +  5, ACC_FIRST  +  6, ACC_FIRST 	+  7,	\
+  ACC_FIRST  +  8, ACC_FIRST  +  9, ACC_FIRST  + 10, ACC_FIRST 	+ 11,	\
+  ACCG_FIRST +  0, ACCG_FIRST +  1, ACCG_FIRST +  2, ACCG_FIRST	+  3,	\
+  ACCG_FIRST +  4, ACCG_FIRST +  5, ACCG_FIRST +  6, ACCG_FIRST	+  7,	\
+  ACCG_FIRST +  8, ACCG_FIRST +  9, ACCG_FIRST + 10, ACCG_FIRST	+ 11,	\
+  AP_FIRST, 	   LR_REGNO,       LCR_REGNO,				\
+  IACC_FIRST +  0, IACC_FIRST +  1					\
+}
+
+
+/* How Values Fit in Registers.  */
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.
+
+   On a machine where all registers are exactly one word, a suitable definition
+   of this macro is
+
+        #define HARD_REGNO_NREGS(REGNO, MODE)            \
+           ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)  \
+            / UNITS_PER_WORD))  */
+
+/* On the FRV, make the CC modes take 3 words in the integer registers, so that
+   we can build the appropriate instructions to properly reload the values.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) frv_hard_regno_nregs (REGNO, MODE)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  For a machine where all registers are equivalent, a suitable
+   definition is
+
+        #define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+   It is not necessary for this macro to check for the numbers of fixed
+   registers, because the allocation mechanism considers them to be always
+   occupied.
+
+   On some machines, double-precision values must be kept in even/odd register
+   pairs.  The way to implement that is to define this macro to reject odd
+   register numbers for such modes.
+
+   The minimum requirement for a mode to be OK in a register is that the
+   `movMODE' instruction pattern support moves between the register and any
+   other hard register for which the mode is OK; and that moving a value into
+   the register and back out not alter it.
+
+   Since the same instruction used to move `SImode' will work for all narrower
+   integer modes, it is not necessary on any machine for `HARD_REGNO_MODE_OK'
+   to distinguish between these modes, provided you define patterns `movhi',
+   etc., to take advantage of this.  This is useful because of the interaction
+   between `HARD_REGNO_MODE_OK' and `MODES_TIEABLE_P'; it is very desirable for
+   all integer modes to be tieable.
+
+   Many machines have special registers for floating point arithmetic.  Often
+   people assume that floating point machine modes are allowed only in floating
+   point registers.  This is not true.  Any registers that can hold integers
+   can safely *hold* a floating point machine mode, whether or not floating
+   arithmetic can be done on it in those registers.  Integer move instructions
+   can be used to move the values.
+
+   On some machines, though, the converse is true: fixed-point machine modes
+   may not go in floating registers.  This is true if the floating registers
+   normalize any value stored in them, because storing a non-floating value
+   there would garble it.  In this case, `HARD_REGNO_MODE_OK' should reject
+   fixed-point machine modes in floating registers.  But if the floating
+   registers do not automatically normalize, if you can store any bit pattern
+   in one and retrieve it unchanged without a trap, then any machine mode may
+   go in a floating register, so you can define this macro to say so.
+
+   The primary significance of special floating registers is rather that they
+   are the registers acceptable in floating point arithmetic instructions.
+   However, this is of no concern to `HARD_REGNO_MODE_OK'.  You handle it by
+   writing the proper constraints for those instructions.
+
+   On some machines, the floating registers are especially slow to access, so
+   that it is better to store a value in a stack frame than in such a register
+   if floating point arithmetic is not being done.  As long as the floating
+   registers are not in class `GENERAL_REGS', they will not be used unless some
+   pattern's constraint asks for one.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) frv_hard_regno_mode_ok (REGNO, MODE)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) (MODE1 == MODE2)
+
+/* Define this macro if the compiler should avoid copies to/from CCmode
+   registers.  You should only define this macro if support fo copying to/from
+   CCmode is incomplete.  */
+#define AVOID_CCMODE_COPIES
+
+
+/* Register Classes.  */
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there are.
+
+   Each register class has a number, which is the value of casting the class
+   name to type `int'.  The number serves as an index in many of the tables
+   described below.  */
+enum reg_class
+{
+  NO_REGS,
+  ICC_REGS,
+  FCC_REGS,
+  CC_REGS,
+  ICR_REGS,
+  FCR_REGS,
+  CR_REGS,
+  LCR_REG,
+  LR_REG,
+  GR8_REGS,
+  GR9_REGS,
+  GR89_REGS,
+  FDPIC_REGS,
+  FDPIC_FPTR_REGS,
+  FDPIC_CALL_REGS,
+  SPR_REGS,
+  QUAD_ACC_REGS,
+  EVEN_ACC_REGS,
+  ACC_REGS,
+  ACCG_REGS,
+  QUAD_FPR_REGS,
+  FEVEN_REGS,
+  FPR_REGS,
+  QUAD_REGS,
+  EVEN_REGS,
+  GPR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS GPR_REGS
+
+/* The number of distinct register classes, defined as follows:
+
+        #define N_REG_CLASSES (int) LIM_REG_CLASSES  */
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES {						\
+   "NO_REGS",								\
+   "ICC_REGS",								\
+   "FCC_REGS",								\
+   "CC_REGS",								\
+   "ICR_REGS",								\
+   "FCR_REGS",								\
+   "CR_REGS",								\
+   "LCR_REG",								\
+   "LR_REG",								\
+   "GR8_REGS",                                                          \
+   "GR9_REGS",                                                          \
+   "GR89_REGS",                                                         \
+   "FDPIC_REGS",							\
+   "FDPIC_FPTR_REGS",							\
+   "FDPIC_CALL_REGS",							\
+   "SPR_REGS",								\
+   "QUAD_ACC_REGS",							\
+   "EVEN_ACC_REGS",							\
+   "ACC_REGS",								\
+   "ACCG_REGS",								\
+   "QUAD_FPR_REGS",							\
+   "FEVEN_REGS",							\
+   "FPR_REGS",								\
+   "QUAD_REGS",								\
+   "EVEN_REGS",								\
+   "GPR_REGS",								\
+   "ALL_REGS"								\
+}
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not suffice.
+   Then the integers are replaced by sub-initializers, braced groupings
+   containing several integers.  Each sub-initializer must be suitable as an
+   initializer for the type `HARD_REG_SET' which is defined in
+   `hard-reg-set.h'.  */
+#define REG_CLASS_CONTENTS						       \
+{  /* gr0-gr31 gr32-gr63  fr0-fr31   fr32-fr-63 cc/ccr/acc ap/spr */	       \
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* NO_REGS  */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x000000f0,0x0}, /* ICC_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0000000f,0x0}, /* FCC_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x000000ff,0x0}, /* CC_REGS  */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0000f000,0x0}, /* ICR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000f00,0x0}, /* FCR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0000ff00,0x0}, /* CR_REGS  */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x400}, /* LCR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x200}, /* LR_REGS  */\
+  { 0x00000100,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* GR8_REGS */\
+  { 0x00000200,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* GR9_REGS */\
+  { 0x00000300,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* GR89_REGS */\
+  { 0x00008000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* FDPIC_REGS */\
+  { 0x00004000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* FDPIC_FPTR_REGS */\
+  { 0x0000c000,0x00000000,0x00000000,0x00000000,0x00000000,0x0}, /* FDPIC_CALL_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x1e00}, /* SPR_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0fff0000,0x0}, /* QUAD_ACC */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0fff0000,0x0}, /* EVEN_ACC */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0x0fff0000,0x0}, /* ACC_REGS */\
+  { 0x00000000,0x00000000,0x00000000,0x00000000,0xf0000000,0xff}, /* ACCG_REGS*/\
+  { 0x00000000,0x00000000,0xffffffff,0xffffffff,0x00000000,0x0}, /* QUAD_FPR */\
+  { 0x00000000,0x00000000,0xffffffff,0xffffffff,0x00000000,0x0}, /* FEVEN_REG*/\
+  { 0x00000000,0x00000000,0xffffffff,0xffffffff,0x00000000,0x0}, /* FPR_REGS */\
+  { 0x0ffffffc,0xffffffff,0x00000000,0x00000000,0x00000000,0x0}, /* QUAD_REGS*/\
+  { 0xfffffffc,0xffffffff,0x00000000,0x00000000,0x00000000,0x0}, /* EVEN_REGS*/\
+  { 0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,0x100}, /* GPR_REGS */\
+  { 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x1fff}, /* ALL_REGS */\
+}
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						\
+{									\
+  GPR_REGS, FPR_REGS, ACC_REGS, ICR_REGS, FCR_REGS, ICC_REGS, FCC_REGS, \
+  ACCG_REGS, SPR_REGS,							\
+  LIM_REG_CLASSES							\
+}
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+
+extern enum reg_class regno_reg_class[];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class [REGNO]
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS GPR_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  */
+#define INDEX_REG_CLASS GPR_REGS
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.  */
+#define REGNO_OK_FOR_BASE_P(NUM)           \
+  ((NUM) < FIRST_PSEUDO_REGISTER           \
+   ? GPR_P (NUM)                           \
+   : (reg_renumber [NUM] >= 0 && GPR_P (reg_renumber [NUM])))
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REGNO_OK_FOR_INDEX_P(NUM)                                       \
+  ((NUM) < FIRST_PSEUDO_REGISTER                                        \
+   ? GPR_P (NUM)                                                        \
+   : (reg_renumber [NUM] >= 0 && GPR_P (reg_renumber [NUM])))
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  frv_secondary_reload_class (CLASS, MODE, X)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  frv_secondary_reload_class (CLASS, MODE, X)
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+
+   This is closely related to the macro `HARD_REGNO_NREGS'.  In fact, the value
+   of the macro `CLASS_MAX_NREGS (CLASS, MODE)' should be the maximum value of
+   `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class CLASS.
+
+   This macro helps control the handling of multiple-word values in
+   the reload pass.
+
+   This declaration is required.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) frv_class_max_nregs (CLASS, MODE)
+
+#define ZERO_P(x) (x == CONST0_RTX (GET_MODE (x)))
+
+
+/* Basic Stack Layout.  */
+
+/* Structure to describe information about a saved range of registers */
+
+typedef struct frv_stack_regs {
+  const char * name;		/* name of the register ranges */
+  int first;			/* first register in the range */
+  int last;			/* last register in the range */
+  int size_1word;		/* # of bytes to be stored via 1 word stores */
+  int size_2words;		/* # of bytes to be stored via 2 word stores */
+  unsigned char field_p;	/* true if the registers are a single SPR */
+  unsigned char dword_p;	/* true if we can do dword stores */
+  unsigned char special_p;	/* true if the regs have a fixed save loc.  */
+} frv_stack_regs_t;
+
+/* Register ranges to look into saving.  */
+#define STACK_REGS_GPR		0	/* Gprs (normally gr16..gr31, gr48..gr63) */
+#define STACK_REGS_FPR		1	/* Fprs (normally fr16..fr31, fr48..fr63) */
+#define STACK_REGS_LR		2	/* LR register */
+#define STACK_REGS_CC		3	/* CCrs (normally not saved) */
+#define STACK_REGS_LCR		5	/* lcr register */
+#define STACK_REGS_STDARG	6	/* stdarg registers */
+#define STACK_REGS_STRUCT	7	/* structure return (gr3) */
+#define STACK_REGS_FP		8	/* FP register */
+#define STACK_REGS_MAX		9	/* # of register ranges */
+
+/* Values for save_p field.  */
+#define REG_SAVE_NO_SAVE	0	/* register not saved */
+#define REG_SAVE_1WORD		1	/* save the register */
+#define REG_SAVE_2WORDS		2	/* save register and register+1 */
+
+/* Structure used to define the frv stack.  */
+
+typedef struct frv_stack {
+  int total_size;		/* total bytes allocated for stack */
+  int vars_size;		/* variable save area size */
+  int parameter_size;		/* outgoing parameter size */
+  int stdarg_size;		/* size of regs needed to be saved for stdarg */
+  int regs_size;		/* size of the saved registers */
+  int regs_size_1word;		/* # of bytes to be stored via 1 word stores */
+  int regs_size_2words;		/* # of bytes to be stored via 2 word stores */
+  int header_size;		/* size of the old FP, struct ret., LR save */
+  int pretend_size;		/* size of pretend args */
+  int vars_offset;		/* offset to save local variables from new SP*/
+  int regs_offset;		/* offset to save registers from new SP */
+				/* register range information */
+  frv_stack_regs_t regs[STACK_REGS_MAX];
+				/* offset to store each register */
+  int reg_offset[FIRST_PSEUDO_REGISTER];
+				/* whether to save register (& reg+1) */
+  unsigned char save_p[FIRST_PSEUDO_REGISTER];
+} frv_stack_t;
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this macro to nonzero if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to be
+   allocated.
+
+   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the
+   first slot's length from `STARTING_FRAME_OFFSET'.  Otherwise, it is found by
+   adding the length of the first slot to the value `STARTING_FRAME_OFFSET'.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   location at which outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET 0
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.
+
+   If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first
+   argument's address.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the address in a stack frame
+   where the pointer to the caller's frame is stored.  Assume that FRAMEADDR is
+   an RTL expression for the address of the stack frame itself.
+
+   If you don't define this macro, the default is to return the value of
+   FRAMEADDR--that is, the stack frame address is also the address of the stack
+   word that points to the previous frame.  */
+#define DYNAMIC_CHAIN_ADDRESS(FRAMEADDR) frv_dynamic_chain_address (FRAMEADDR)
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  FRAMEADDR is the frame pointer of the COUNT frame, or the frame
+   pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' is
+   defined.
+
+   The value of the expression must always be the correct address when COUNT is
+   zero, but may be `NULL_RTX' if there is not way to determine the return
+   address of other frames.  */
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) frv_return_addr_rtx (COUNT, FRAMEADDR)
+
+#define RETURN_POINTER_REGNUM LR_REGNO
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.
+
+   You only need to define this macro if you want to support call frame
+   debugging information like that provided by DWARF 2.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM)
+
+
+/* Register That Address the Stack Frame.  */
+
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+#define STACK_POINTER_REGNUM (GPR_FIRST + 1)
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+#define FRAME_POINTER_REGNUM (GPR_FIRST + 2)
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  On some machines, this is the same as the frame
+   pointer register.  On some machines, the hardware determines which register
+   this is.  On other machines, you can choose any register you wish for this
+   purpose.  If this is not the same register as the frame pointer register,
+   then you must mark it as a fixed register according to `FIXED_REGISTERS', or
+   arrange to be able to eliminate it.  */
+
+/* On frv this is a fake register that is eliminated in
+   terms of either the frame pointer or stack pointer.  */
+#define ARG_POINTER_REGNUM AP_FIRST
+
+/* Register numbers used for passing a function's static chain pointer.  If
+   register windows are used, the register number as seen by the called
+   function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as
+   seen by the calling function is `STATIC_CHAIN_REGNUM'.  If these registers
+   are the same, `STATIC_CHAIN_INCOMING_REGNUM' need not be defined.
+
+   The static chain register need not be a fixed register.
+
+   If the static chain is passed in memory, these macros should not be defined;
+   instead, the next two macros should be defined.  */
+#define STATIC_CHAIN_REGNUM (GPR_FIRST + 7)
+#define STATIC_CHAIN_INCOMING_REGNUM (GPR_FIRST + 7)
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer.  */
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  If it is not defined,
+   the only elimination attempted by the compiler is to replace references to
+   the frame pointer with references to the stack pointer.
+
+   The definition of this macro is a list of structure initializations, each of
+   which specifies an original and replacement register.
+
+   On some machines, the position of the argument pointer is not known until
+   the compilation is completed.  In such a case, a separate hard register must
+   be used for the argument pointer.  This register can be eliminated by
+   replacing it with either the frame pointer or the argument pointer,
+   depending on whether or not the frame pointer has been eliminated.
+
+   In this case, you might specify:
+        #define ELIMINABLE_REGS  \
+        {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+         {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+         {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+   Note that the elimination of the argument pointer with the stack pointer is
+   specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}				\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It specifies the
+   initial difference between the specified pair of registers.  This macro must
+   be defined if `ELIMINABLE_REGS' is defined.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  (OFFSET) = frv_initial_elimination_offset (FROM, TO)
+
+
+/* Passing Function Arguments on the Stack.  */
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed onto the
+   stack for each call; instead, the function prologue should increase the
+   stack frame size by this amount.
+
+   Defining both `PUSH_ROUNDING' and `ACCUMULATE_OUTGOING_ARGS' is not
+   proper.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* The number of register assigned to holding function arguments.  */
+
+#define FRV_NUM_ARG_REGS        6
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.
+
+   There is no need to record in `CUMULATIVE_ARGS' anything about the arguments
+   that have been passed on the stack.  The compiler has other variables to
+   keep track of that.  For target machines on which all arguments are passed
+   on the stack, there is no need to store anything in `CUMULATIVE_ARGS';
+   however, the data structure must exist and should not be empty, so use
+   `int'.  */
+#define CUMULATIVE_ARGS int
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  The variable has type
+   `CUMULATIVE_ARGS'.  The value of FNTYPE is the tree node for the data type
+   of the function which will receive the args, or 0 if the args are to a
+   compiler support library function.  The value of INDIRECT is nonzero when
+   processing an indirect call, for example a call through a function pointer.
+   The value of INDIRECT is zero for a call to an explicitly named function, a
+   library function call, or when `INIT_CUMULATIVE_ARGS' is used to find
+   arguments for the function being compiled.
+
+   When processing a call to a compiler support library function, LIBNAME
+   identifies which one.  It is a `symbol_ref' rtx which contains the name of
+   the function, as a string.  LIBNAME is 0 when an ordinary C function call is
+   being processed.  Thus, each time this macro is called, either LIBNAME or
+   FNTYPE is nonzero, but never both of them at once.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  frv_init_cumulative_args (&CUM, FNTYPE, LIBNAME, FNDECL, FALSE)
+
+/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the
+   arguments for the function being compiled.  If this macro is undefined,
+   `INIT_CUMULATIVE_ARGS' is used instead.
+
+   The value passed for LIBNAME is always 0, since library routines with
+   special calling conventions are never compiled with GCC.  The argument
+   LIBNAME exists for symmetry with `INIT_CUMULATIVE_ARGS'.  */
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+  frv_init_cumulative_args (&CUM, FNTYPE, LIBNAME, NULL, TRUE)
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) <= LAST_ARG_REGNUM))
+
+
+/* How Scalar Function Values are Returned.  */
+
+/* The number of the hard register that is used to return a scalar value from a
+   function call.  */
+#define RETURN_VALUE_REGNUM	(GPR_FIRST + 8)
+
+#define FUNCTION_VALUE_REGNO_P(REGNO) frv_function_value_regno_p (REGNO)
+
+
+/* How Large Values are Returned.  */
+
+/* The number of the register that is used to pass the structure
+   value address.  */
+#define FRV_STRUCT_VALUE_REGNUM (GPR_FIRST + 3)
+
+
+/* Function Entry and Exit.  */
+
+/* Define this macro as a C expression that is nonzero if the return
+   instruction or the function epilogue ignores the value of the stack pointer;
+   in other words, if it is safe to delete an instruction to adjust the stack
+   pointer before a return from the function.
+
+   Note that this macro's value is relevant only for functions for which frame
+   pointers are maintained.  It is never safe to delete a final stack
+   adjustment in a function that has no frame pointer, and the compiler knows
+   this regardless of `EXIT_IGNORE_STACK'.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Generating Code for Profiling.  */
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  Before calling, the assembler code
+   must load the address of a counter variable into a register where `mcount'
+   expects to find the address.  The name of this variable is `LP' followed by
+   the number LABELNO, so you would generate the name using `LP%d' in a
+   `fprintf'.
+
+   The details of how the address should be passed to `mcount' are determined
+   by your operating system environment, not by GCC.  To figure them out,
+   compile a small program for profiling using the system's installed C
+   compiler and look at the assembler code that results.
+
+   This declaration must be present, but it can be an abort if profiling is
+   not implemented.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/* Trampolines for Nested Functions.  */
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+#define TRAMPOLINE_SIZE frv_trampoline_size ()
+
+/* Alignment required for trampolines, in bits.
+
+   If you don't define this macro, the value of `BIGGEST_ALIGNMENT' is used for
+   aligning trampolines.  */
+#define TRAMPOLINE_ALIGNMENT (TARGET_FDPIC ? 64 : 32)
+
+/* Define this macro if trampolines need a special subroutine to do their work.
+   The macro should expand to a series of `asm' statements which will be
+   compiled with GCC.  They go in a library function named
+   `__transfer_from_trampoline'.
+
+   If you need to avoid executing the ordinary prologue code of a compiled C
+   function when you jump to the subroutine, you can do so by placing a special
+   label of your own in the assembler code.  Use one `asm' statement to
+   generate an assembler label, and another to make the label global.  Then
+   trampolines can use that label to jump directly to your special assembler
+   code.  */
+
+#ifdef __FRV_UNDERSCORE__
+#define TRAMPOLINE_TEMPLATE_NAME "___trampoline_template"
+#else
+#define TRAMPOLINE_TEMPLATE_NAME "__trampoline_template"
+#endif
+
+#define Twrite _write
+
+#if ! __FRV_FDPIC__
+#define TRANSFER_FROM_TRAMPOLINE					\
+extern int Twrite (int, const void *, unsigned);			\
+									\
+void									\
+__trampoline_setup (short * addr, int size, int fnaddr, int sc)		\
+{									\
+  extern short __trampoline_template[];					\
+  short * to = addr;							\
+  short * from = &__trampoline_template[0];				\
+  int i;								\
+									\
+  if (size < 20)							\
+    {									\
+      Twrite (2, "__trampoline_setup bad size\n",			\
+	      sizeof ("__trampoline_setup bad size\n") - 1);		\
+      exit (-1);							\
+    }									\
+									\
+  to[0] = from[0];							\
+  to[1] = (short)(fnaddr);						\
+  to[2] = from[2];							\
+  to[3] = (short)(sc);							\
+  to[4] = from[4];							\
+  to[5] = (short)(fnaddr >> 16);					\
+  to[6] = from[6];							\
+  to[7] = (short)(sc >> 16);						\
+  to[8] = from[8];							\
+  to[9] = from[9];							\
+									\
+  for (i = 0; i < 20; i++)						\
+    __asm__ volatile ("dcf @(%0,%1)\n\tici @(%0,%1)" :: "r" (to), "r" (i)); \
+}									\
+									\
+__asm__("\n"								\
+	"\t.globl " TRAMPOLINE_TEMPLATE_NAME "\n"			\
+	"\t.text\n"							\
+	TRAMPOLINE_TEMPLATE_NAME ":\n"					\
+	"\tsetlos #0, gr6\n"	/* jump register */			\
+	"\tsetlos #0, gr7\n"	/* static chain */			\
+	"\tsethi #0, gr6\n"						\
+	"\tsethi #0, gr7\n"						\
+	"\tjmpl @(gr0,gr6)\n");
+#else
+#define TRANSFER_FROM_TRAMPOLINE					\
+extern int Twrite (int, const void *, unsigned);			\
+									\
+void									\
+__trampoline_setup (addr, size, fnaddr, sc)				\
+     short * addr;							\
+     int size;								\
+     int fnaddr;							\
+     int sc;								\
+{									\
+  extern short __trampoline_template[];					\
+  short * from = &__trampoline_template[0];				\
+  int i;								\
+  short **desc = (short **)addr;					\
+  short * to = addr + 4;						\
+									\
+  if (size != 32)							\
+    {									\
+      Twrite (2, "__trampoline_setup bad size\n",			\
+	      sizeof ("__trampoline_setup bad size\n") - 1);		\
+      exit (-1);							\
+    }									\
+									\
+  /* Create a function descriptor with the address of the code below	\
+     and NULL as the FDPIC value.  We don't need the real GOT value	\
+     here, since we don't use it, so we use NULL, that is just as	\
+     good.  */								\
+  desc[0] = to;								\
+  desc[1] = NULL;							\
+  size -= 8;								\
+									\
+  to[0] = from[0];							\
+  to[1] = (short)(fnaddr);						\
+  to[2] = from[2];							\
+  to[3] = (short)(sc);							\
+  to[4] = from[4];							\
+  to[5] = (short)(fnaddr >> 16);					\
+  to[6] = from[6];							\
+  to[7] = (short)(sc >> 16);						\
+  to[8] = from[8];							\
+  to[9] = from[9];							\
+  to[10] = from[10];							\
+  to[11] = from[11];							\
+									\
+  for (i = 0; i < size; i++)						\
+    __asm__ volatile ("dcf @(%0,%1)\n\tici @(%0,%1)" :: "r" (to), "r" (i)); \
+}									\
+									\
+__asm__("\n"								\
+	"\t.globl " TRAMPOLINE_TEMPLATE_NAME "\n"			\
+	"\t.text\n"							\
+	TRAMPOLINE_TEMPLATE_NAME ":\n"					\
+	"\tsetlos #0, gr6\n"	/* Jump register.  */			\
+	"\tsetlos #0, gr7\n"	/* Static chain.  */			\
+	"\tsethi #0, gr6\n"						\
+	"\tsethi #0, gr7\n"						\
+	"\tldd @(gr6,gr0),gr14\n"					\
+	"\tjmpl @(gr14,gr0)\n"						\
+	);
+#endif
+
+
+/* Addressing Modes.  */
+
+/* A number, the maximum number of registers that can appear in a valid memory
+   address.  Note that it is up to you to specify a value equal to the maximum
+   number that `TARGET_LEGITIMATE_ADDRESS_P' would ever accept.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as a base register.  For hard registers, it should always accept those
+   which the hardware permits and reject the others.  Whether the macro accepts
+   or rejects pseudo registers must be controlled by `REG_OK_STRICT' as
+   described above.  This usually requires two variant definitions, of which
+   `REG_OK_STRICT' controls the one actually used.  */
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) GPR_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X) GPR_AP_OR_PSEUDO_P (REGNO (X))
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as an index register.
+
+   The difference between an index register and a base register is that the
+   index register may be scaled.  If an address involves the sum of two
+   registers, neither one of them scaled, then either one may be labeled the
+   "base" and the other the "index"; but whichever labeling is used must fit
+   the machine's constraints of which registers may serve in each capacity.
+   The compiler will try both labelings, looking for one that is valid, and
+   will reload one or both registers only if neither labeling works.  */
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+#define FIND_BASE_TERM frv_find_base_term
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand on the target machine.  You can assume that X satisfies
+   `CONSTANT_P', so you need not check this.  In fact, `1' is a suitable
+   definition for this macro on machines where anything `CONSTANT_P' is valid.  */
+#define LEGITIMATE_CONSTANT_P(X) frv_legitimate_constant_p (X)
+
+/* The load-and-update commands allow pre-modification in addresses.
+   The index has to be in a register.  */
+#define HAVE_PRE_MODIFY_REG 1
+
+
+/* We define extra CC modes in frv-modes.def so we need a selector.  */
+
+#define SELECT_CC_MODE frv_select_cc_mode
+
+/* A C expression whose value is one if it is always safe to reverse a
+   comparison whose mode is MODE.  If `SELECT_CC_MODE' can ever return MODE for
+   a floating-point inequality comparison, then `REVERSIBLE_CC_MODE (MODE)'
+   must be zero.
+
+   You need not define this macro if it would always returns zero or if the
+   floating-point format is anything other than `IEEE_FLOAT_FORMAT'.  For
+   example, here is the definition used on the SPARC, where floating-point
+   inequality comparisons are always given `CCFPEmode':
+
+        #define REVERSIBLE_CC_MODE(MODE)  ((MODE) != CCFPEmode)  */
+
+/* On frv, don't consider floating point comparisons to be reversible.  In
+   theory, fp equality comparisons can be reversible.  */
+#define REVERSIBLE_CC_MODE(MODE) \
+  ((MODE) == CCmode || (MODE) == CC_UNSmode || (MODE) == CC_NZmode)
+
+/* Frv CCR_MODE's are not reversible.  */
+#define REVERSE_CONDEXEC_PREDICATES_P(x,y)      0
+
+
+/* Describing Relative Costs of Operations.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1 is the
+   default; other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) frv_branch_cost_int
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory, i.e., if such access require more than one instruction or if
+   there is no difference in cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by finding
+   the smallest containing object; when it is defined, a fullword load will be
+   used if alignment permits.  Unless bytes accesses are faster than word
+   accesses, using word accesses is preferable since it may eliminate
+   subsequent memory access if subsequent accesses occur to other fields in the
+   same word of the structure, but to different bytes.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the output into sections.  */
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  Normally `".text"' is
+   right.  */
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  Normally
+   `".data"' is right.  */
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#define BSS_SECTION_ASM_OP "\t.section .bss,\"aw\""
+
+/* Short Data Support */
+#define SDATA_SECTION_ASM_OP	"\t.section .sdata,\"aw\""
+
+#undef	INIT_SECTION_ASM_OP
+#undef	FINI_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section .init,\"ax\""
+#define FINI_SECTION_ASM_OP	"\t.section .fini,\"ax\""
+
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"a\""
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"a\""
+
+/* A C expression whose value is a string containing the assembler operation to
+   switch to the fixup section that records all initialized pointers in a -fpic
+   program so they can be changed program startup time if the program is loaded
+   at a different address than linked for.  */
+#define FIXUP_SECTION_ASM_OP	"\t.section .rofixup,\"a\""
+
+/* Position Independent Code.  */
+
+/* A C expression that is nonzero if X is a legitimate immediate operand on the
+   target machine when generating position independent code.  You can assume
+   that X satisfies `CONSTANT_P', so you need not check this.  You can also
+   assume FLAG_PIC is true, so you need not check it either.  You need not
+   define this macro if all constants (including `SYMBOL_REF') can be immediate
+   operands when generating position independent code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+  (   GET_CODE (X) == CONST_INT						\
+   || GET_CODE (X) == CONST_DOUBLE					\
+   || (GET_CODE (X) == HIGH && GET_CODE (XEXP (X, 0)) == CONST_INT)	\
+   || got12_operand (X, VOIDmode))					\
+
+
+/* The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  Normally this is `"#APP"', which is a comment
+   that has no effect on most assemblers but tells the GNU assembler that it
+   must check the lines that follow for all valid assembler constructs.  */
+#define ASM_APP_ON "#APP\n"
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  Normally this is `"#NO_APP"', which tells the
+   GNU assembler to resume making the time-saving assumptions that are valid
+   for ordinary compiler output.  */
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Output of Data.  */
+
+/* This is how to output a label to dwarf/dwarf2.  */
+#define ASM_OUTPUT_DWARF_ADDR(STREAM, LABEL)				\
+do {									\
+  fprintf (STREAM, "\t.picptr\t");					\
+  assemble_name (STREAM, LABEL);					\
+} while (0)
+
+/* Whether to emit the gas specific dwarf2 line number support.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DEBUG_LOC)
+
+/* Output of Uninitialized Variables.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a local-common-label named NAME whose size is SIZE
+   bytes.  The variable ROUNDED is the size rounded up to whatever alignment
+   the caller wants.
+
+   Use the expression `assemble_name (STREAM, NAME)' to output the name itself;
+   before and after that, output the additional assembler syntax for defining
+   the name, and a newline.
+
+   This macro controls how the assembler definitions of uninitialized static
+   variables are output.  */
+#undef ASM_OUTPUT_LOCAL
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+
+/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME.  */
+extern int size_directive_output;
+
+/* Like `ASM_OUTPUT_ALIGNED_LOCAL' except that it takes an additional
+   parameter - the DECL of variable to be output, if there is one.
+   This macro can be called with DECL == NULL_TREE.  If you define
+   this macro, it is used in place of `ASM_OUTPUT_LOCAL' and
+   `ASM_OUTPUT_ALIGNED_LOCAL', and gives you more flexibility in
+   handling the destination of the variable.  */
+#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGN)	\
+do {                                                                   	\
+  if ((SIZE) > 0 && (SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)	\
+    switch_to_section (get_named_section (NULL, ".sbss", 0));           \
+  else                                                                 	\
+    switch_to_section (bss_section);                                  	\
+  ASM_OUTPUT_ALIGN (STREAM, floor_log2 ((ALIGN) / BITS_PER_UNIT));     	\
+  ASM_DECLARE_OBJECT_NAME (STREAM, NAME, DECL);                        	\
+  ASM_OUTPUT_SKIP (STREAM, (SIZE) ? (SIZE) : 1);                       	\
+} while (0)
+
+
+/* Output and Generation of Labels.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a label named NAME.  Use the expression
+   `assemble_name (STREAM, NAME)' to output the name itself; before and after
+   that, output the additional assembler syntax for defining the name, and a
+   newline.  */
+#define ASM_OUTPUT_LABEL(STREAM, NAME)					\
+do {									\
+  assemble_name (STREAM, NAME);						\
+  fputs (":\n", STREAM);						\
+} while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)			\
+do {									\
+  sprintf (LABEL, "*.%s%ld", PREFIX, (long)NUM);			\
+} while (0)
+
+
+/* Macros Controlling Initialization Routines.  */
+
+#undef INIT_SECTION_ASM_OP
+
+/* If defined, `main' will call `__main' despite the presence of
+   `INIT_SECTION_ASM_OP'.  This macro should be defined for systems where the
+   init section is not actually run automatically, but is still useful for
+   collecting the lists of constructors and destructors.  */
+#define INVOKE__main
+
+/* Output of Assembler Instructions.  */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  This is what translates register numbers
+   in the compiler into assembler language.  */
+#define REGISTER_NAMES							\
+{									\
+ "gr0",  "sp",   "fp",   "gr3",  "gr4",  "gr5",  "gr6",  "gr7",		\
+  "gr8",  "gr9",  "gr10", "gr11", "gr12", "gr13", "gr14", "gr15",	\
+  "gr16", "gr17", "gr18", "gr19", "gr20", "gr21", "gr22", "gr23",	\
+  "gr24", "gr25", "gr26", "gr27", "gr28", "gr29", "gr30", "gr31",	\
+  "gr32", "gr33", "gr34", "gr35", "gr36", "gr37", "gr38", "gr39",	\
+  "gr40", "gr41", "gr42", "gr43", "gr44", "gr45", "gr46", "gr47",	\
+  "gr48", "gr49", "gr50", "gr51", "gr52", "gr53", "gr54", "gr55",	\
+  "gr56", "gr57", "gr58", "gr59", "gr60", "gr61", "gr62", "gr63",	\
+									\
+  "fr0",  "fr1",  "fr2",  "fr3",  "fr4",  "fr5",  "fr6",  "fr7",	\
+  "fr8",  "fr9",  "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",	\
+  "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",	\
+  "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31",	\
+  "fr32", "fr33", "fr34", "fr35", "fr36", "fr37", "fr38", "fr39",	\
+  "fr40", "fr41", "fr42", "fr43", "fr44", "fr45", "fr46", "fr47",	\
+  "fr48", "fr49", "fr50", "fr51", "fr52", "fr53", "fr54", "fr55",	\
+  "fr56", "fr57", "fr58", "fr59", "fr60", "fr61", "fr62", "fr63",	\
+									\
+  "fcc0", "fcc1", "fcc2", "fcc3", "icc0", "icc1", "icc2", "icc3",	\
+  "cc0",  "cc1",  "cc2",  "cc3",  "cc4",  "cc5",  "cc6",  "cc7",	\
+  "acc0", "acc1", "acc2", "acc3", "acc4", "acc5", "acc6", "acc7",	\
+  "acc8", "acc9", "acc10", "acc11",					\
+  "accg0","accg1","accg2","accg3","accg4","accg5","accg6","accg7",	\
+  "accg8", "accg9", "accg10", "accg11",					\
+  "ap",   "lr",   "lcr",  "iacc0h", "iacc0l"				\
+}
+
+/* Define this macro if you are using an unusual assembler that
+   requires different names for the machine instructions.
+
+   The definition is a C statement or statements which output an
+   assembler instruction opcode to the stdio stream STREAM.  The
+   macro-operand PTR is a variable of type `char *' which points to
+   the opcode name in its "internal" form--the form that is written
+   in the machine description.  The definition should output the
+   opcode name to STREAM, performing any translation you desire, and
+   increment the variable PTR to point at the end of the opcode so
+   that it will not be output twice.
+
+   In fact, your macro definition may process less than the entire
+   opcode name, or more than the opcode name; but if you want to
+   process text that includes `%'-sequences to substitute operands,
+   you must take care of the substitution yourself.  Just be sure to
+   increment PTR over whatever text should not be output normally.
+
+   If you need to look at the operand values, they can be found as the
+   elements of `recog_operand'.
+
+   If the macro definition does nothing, the instruction is output in
+   the usual way.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)\
+   (PTR) = frv_asm_output_opcode (STREAM, PTR)
+
+/* If defined, a C statement to be executed just prior to the output
+   of assembler code for INSN, to modify the extracted operands so
+   they will be output differently.
+
+   Here the argument OPVEC is the vector containing the operands
+   extracted from INSN, and NOPERANDS is the number of elements of
+   the vector which contain meaningful data for this insn.  The
+   contents of this vector are what will be used to convert the insn
+   template into assembler code, so you can change the assembler
+   output by changing the contents of the vector.
+
+   This macro is useful when various assembler syntaxes share a single
+   file of instruction patterns; by defining this macro differently,
+   you can cause a large class of instructions to be output
+   differently (such as with rearranged operands).  Naturally,
+   variations in assembler syntax affecting individual insn patterns
+   ought to be handled by writing conditional output routines in
+   those patterns.
+
+   If this macro is not defined, it is equivalent to a null statement.  */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)\
+  frv_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define IMMEDIATE_PREFIX "#"
+
+
+/* Output of dispatch tables.  */
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a difference between two labels.
+   VALUE and REL are the numbers of two internal labels.  The definitions of
+   these labels are output using `(*targetm.asm_out.internal_label)', and they must be
+   printed in the same way here.  For example,
+
+        fprintf (STREAM, "\t.word L%d-L%d\n", VALUE, REL)  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are absolute.
+
+   The definition should be a C statement to output to the stdio stream STREAM
+   an assembler pseudo-instruction to generate a reference to a label.  VALUE
+   is the number of an internal label whose definition is output using
+   `(*targetm.asm_out.internal_label)'.  For example,
+
+        fprintf (STREAM, "\t.word L%d\n", VALUE)  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+fprintf (STREAM, "\t.word .L%d\n", VALUE)
+
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+/* Assembler Commands for Exception Regions.  */
+
+/* Define this macro to 0 if your target supports DWARF 2 frame unwind
+   information, but it does not yet work with exception handling.  Otherwise,
+   if your target supports this information (if it defines
+   `INCOMING_RETURN_ADDR_RTX' and either `UNALIGNED_INT_ASM_OP' or
+   `OBJECT_FORMAT_ELF'), GCC will provide a default definition of 1.
+
+   If this macro is defined to 1, the DWARF 2 unwinder will be the default
+   exception handling mechanism; otherwise, setjmp/longjmp will be used by
+   default.
+
+   If this macro is defined to anything, the DWARF 2 unwinder will be used
+   instead of inline unwinders and __unwind_function in the non-setjmp case.  */
+#define DWARF2_UNWIND_INFO 1
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LR_REGNO)
+
+/* Assembler Commands for Alignment.  */
+
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES) \
+  fprintf (STREAM, "\t.zero\t%u\n", (int)(NBYTES))
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  POWER
+   will be a C expression of type `int'.  */
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+/* Inside the text section, align with unpacked nops rather than zeros.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2alignl %d,0x80880000\n", (POWER))
+
+/* Macros Affecting all Debug Formats.  */
+
+/* A C expression that returns the DBX register number for the compiler
+   register number REGNO.  In simple cases, the value of this expression may be
+   REGNO itself.  But sometimes there are some registers that the compiler
+   knows about and DBX does not, or vice versa.  In such cases, some register
+   may need to have one number in the compiler and another for DBX.
+
+   If two registers have consecutive numbers inside GCC, and they can be
+   used as a pair to hold a multiword value, then they *must* have consecutive
+   numbers after renumbering with `DBX_REGISTER_NUMBER'.  Otherwise, debuggers
+   will be unable to access such a pair, because they expect register pairs to
+   be consecutive in their own numbering scheme.
+
+   If you find yourself defining `DBX_REGISTER_NUMBER' in way that does not
+   preserve register pairs, then what you must do instead is redefine the
+   actual register numbering scheme.
+
+   This declaration is required.  */
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Miscellaneous Parameters.  */
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define this macro if operations between registers with integral mode smaller
+   than a word are always performed on the entire register.  Most RISC machines
+   have this property and most CISC machines do not.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define this macro to be a C expression indicating when insns that read
+   memory in MODE, an integral mode narrower than a word, set the bits outside
+   of MODE to be either the sign-extension or the zero-extension of the data
+   read.  Return `SIGN_EXTEND' for values of MODE for which the insn
+   sign-extends, `ZERO_EXTEND' for which it zero-extends, and `UNKNOWN' for other
+   modes.
+
+   This macro is not called with MODE non-integral or with a width greater than
+   or equal to `BITS_PER_WORD', so you may return any value in this case.  Do
+   not define this macro if it would always return `UNKNOWN'.  On machines where
+   this macro is defined, you will normally define it as the constant
+   `SIGN_EXTEND' or `ZERO_EXTEND'.  */
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.
+
+   On many machines, this expression can be 1.
+
+   When `TRULY_NOOP_TRUNCATION' returns 1 for a pair of sizes for modes for
+   which `MODES_TIEABLE_P' is 0, suboptimal code can result.  If this is the
+   case, making `TRULY_NOOP_TRUNCATION' return 0 in such cases may improve
+   things.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* An alias for the machine mode for pointers.  On most machines, define this
+   to be the integer mode corresponding to the width of a hardware pointer;
+   `SImode' on 32-bit machine or `DImode' on 64-bit machines.  On some machines
+   you must define this to be one of the partial integer modes, such as
+   `PSImode'.
+
+   The width of `Pmode' must be at least as large as the value of
+   `POINTER_SIZE'.  If it is not equal, you must define the macro
+   `POINTERS_EXTEND_UNSIGNED' to specify how pointers are extended to `Pmode'.  */
+#define Pmode SImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  On most machines this should be
+   `QImode'.  */
+#define FUNCTION_MODE QImode
+
+/* A C expression for the maximum number of instructions to execute via
+   conditional execution instructions instead of a branch.  A value of
+   BRANCH_COST+1 is the default if the machine does not use
+   cc0, and 1 if it does use cc0.  */
+#define MAX_CONDITIONAL_EXECUTE frv_condexec_insns
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, possibly updating the tests in TRUE_EXPR, and
+   FALSE_EXPR for converting if-then and if-then-else code to conditional
+   instructions.  Set either TRUE_EXPR or FALSE_EXPR to a null pointer if the
+   tests cannot be converted.  */
+#define IFCVT_MODIFY_TESTS(CE_INFO, TRUE_EXPR, FALSE_EXPR)		\
+frv_ifcvt_modify_tests (CE_INFO, &TRUE_EXPR, &FALSE_EXPR)
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO, for the basic block BB, possibly updating the tests in
+   TRUE_EXPR, and FALSE_EXPR for converting the && and || parts of if-then or
+   if-then-else code to conditional instructions.  OLD_TRUE and OLD_FALSE are
+   the previous tests.  Set either TRUE_EXPR or FALSE_EXPR to a null pointer if
+   the tests cannot be converted.  */
+#define IFCVT_MODIFY_MULTIPLE_TESTS(CE_INFO, BB, TRUE_EXPR, FALSE_EXPR) \
+frv_ifcvt_modify_multiple_tests (CE_INFO, BB, &TRUE_EXPR, &FALSE_EXPR)
+
+/* A C expression to modify the code described by the conditional if
+   information CE_INFO with the new PATTERN in INSN.  If PATTERN is a null
+   pointer after the IFCVT_MODIFY_INSN macro executes, it is assumed that that
+   insn cannot be converted to be executed conditionally.  */
+#define IFCVT_MODIFY_INSN(CE_INFO, PATTERN, INSN) \
+(PATTERN) = frv_ifcvt_modify_insn (CE_INFO, PATTERN, INSN)
+
+/* A C expression to perform any final machine dependent modifications in
+   converting code to conditional execution in the code described by the
+   conditional if information CE_INFO.  */
+#define IFCVT_MODIFY_FINAL(CE_INFO) frv_ifcvt_modify_final (CE_INFO)
+
+/* A C expression to cancel any machine dependent modifications in converting
+   code to conditional execution in the code described by the conditional if
+   information CE_INFO.  */
+#define IFCVT_MODIFY_CANCEL(CE_INFO) frv_ifcvt_modify_cancel (CE_INFO)
+
+/* Initialize the extra fields provided by IFCVT_EXTRA_FIELDS.  */
+#define IFCVT_INIT_EXTRA_FIELDS(CE_INFO) frv_ifcvt_init_extra_fields (CE_INFO)
+
+/* The definition of the following macro results in that the 2nd jump
+   optimization (after the 2nd insn scheduling) is minimal.  It is
+   necessary to define when start cycle marks of insns (TImode is used
+   for this) is used for VLIW insn packing.  Some jump optimizations
+   make such marks invalid.  These marks are corrected for some
+   (minimal) optimizations.  ??? Probably the macro is temporary.
+   Final solution could making the 2nd jump optimizations before the
+   2nd instruction scheduling or corrections of the marks for all jump
+   optimizations.  Although some jump optimizations are actually
+   deoptimizations for VLIW (super-scalar) processors.  */
+
+#define MINIMAL_SECOND_JUMP_OPTIMIZATION
+
+
+/* If the following macro is defined and nonzero and deterministic
+   finite state automata are used for pipeline hazard recognition, we
+   will try to exchange insns in queue ready to improve the schedule.
+   The more macro value, the more tries will be made.  */
+#define FIRST_CYCLE_MULTIPASS_SCHEDULING 1
+
+/* The following macro is used only when value of
+   FIRST_CYCLE_MULTIPASS_SCHEDULING is nonzero.  The more macro value,
+   the more tries will be made to choose better schedule.  If the
+   macro value is zero or negative there will be no multi-pass
+   scheduling.  */
+#define FIRST_CYCLE_MULTIPASS_SCHEDULING_LOOKAHEAD frv_sched_lookahead
+
+enum frv_builtins
+{
+  FRV_BUILTIN_MAND,
+  FRV_BUILTIN_MOR,
+  FRV_BUILTIN_MXOR,
+  FRV_BUILTIN_MNOT,
+  FRV_BUILTIN_MAVEH,
+  FRV_BUILTIN_MSATHS,
+  FRV_BUILTIN_MSATHU,
+  FRV_BUILTIN_MADDHSS,
+  FRV_BUILTIN_MADDHUS,
+  FRV_BUILTIN_MSUBHSS,
+  FRV_BUILTIN_MSUBHUS,
+  FRV_BUILTIN_MPACKH,
+  FRV_BUILTIN_MQADDHSS,
+  FRV_BUILTIN_MQADDHUS,
+  FRV_BUILTIN_MQSUBHSS,
+  FRV_BUILTIN_MQSUBHUS,
+  FRV_BUILTIN_MUNPACKH,
+  FRV_BUILTIN_MDPACKH,
+  FRV_BUILTIN_MBTOH,
+  FRV_BUILTIN_MHTOB,
+  FRV_BUILTIN_MCOP1,
+  FRV_BUILTIN_MCOP2,
+  FRV_BUILTIN_MROTLI,
+  FRV_BUILTIN_MROTRI,
+  FRV_BUILTIN_MWCUT,
+  FRV_BUILTIN_MSLLHI,
+  FRV_BUILTIN_MSRLHI,
+  FRV_BUILTIN_MSRAHI,
+  FRV_BUILTIN_MEXPDHW,
+  FRV_BUILTIN_MEXPDHD,
+  FRV_BUILTIN_MMULHS,
+  FRV_BUILTIN_MMULHU,
+  FRV_BUILTIN_MMULXHS,
+  FRV_BUILTIN_MMULXHU,
+  FRV_BUILTIN_MMACHS,
+  FRV_BUILTIN_MMACHU,
+  FRV_BUILTIN_MMRDHS,
+  FRV_BUILTIN_MMRDHU,
+  FRV_BUILTIN_MQMULHS,
+  FRV_BUILTIN_MQMULHU,
+  FRV_BUILTIN_MQMULXHU,
+  FRV_BUILTIN_MQMULXHS,
+  FRV_BUILTIN_MQMACHS,
+  FRV_BUILTIN_MQMACHU,
+  FRV_BUILTIN_MCPXRS,
+  FRV_BUILTIN_MCPXRU,
+  FRV_BUILTIN_MCPXIS,
+  FRV_BUILTIN_MCPXIU,
+  FRV_BUILTIN_MQCPXRS,
+  FRV_BUILTIN_MQCPXRU,
+  FRV_BUILTIN_MQCPXIS,
+  FRV_BUILTIN_MQCPXIU,
+  FRV_BUILTIN_MCUT,
+  FRV_BUILTIN_MCUTSS,
+  FRV_BUILTIN_MWTACC,
+  FRV_BUILTIN_MWTACCG,
+  FRV_BUILTIN_MRDACC,
+  FRV_BUILTIN_MRDACCG,
+  FRV_BUILTIN_MTRAP,
+  FRV_BUILTIN_MCLRACC,
+  FRV_BUILTIN_MCLRACCA,
+  FRV_BUILTIN_MDUNPACKH,
+  FRV_BUILTIN_MBTOHE,
+  FRV_BUILTIN_MQXMACHS,
+  FRV_BUILTIN_MQXMACXHS,
+  FRV_BUILTIN_MQMACXHS,
+  FRV_BUILTIN_MADDACCS,
+  FRV_BUILTIN_MSUBACCS,
+  FRV_BUILTIN_MASACCS,
+  FRV_BUILTIN_MDADDACCS,
+  FRV_BUILTIN_MDSUBACCS,
+  FRV_BUILTIN_MDASACCS,
+  FRV_BUILTIN_MABSHS,
+  FRV_BUILTIN_MDROTLI,
+  FRV_BUILTIN_MCPLHI,
+  FRV_BUILTIN_MCPLI,
+  FRV_BUILTIN_MDCUTSSI,
+  FRV_BUILTIN_MQSATHS,
+  FRV_BUILTIN_MQLCLRHS,
+  FRV_BUILTIN_MQLMTHS,
+  FRV_BUILTIN_MQSLLHI,
+  FRV_BUILTIN_MQSRAHI,
+  FRV_BUILTIN_MHSETLOS,
+  FRV_BUILTIN_MHSETLOH,
+  FRV_BUILTIN_MHSETHIS,
+  FRV_BUILTIN_MHSETHIH,
+  FRV_BUILTIN_MHDSETS,
+  FRV_BUILTIN_MHDSETH,
+  FRV_BUILTIN_SMUL,
+  FRV_BUILTIN_UMUL,
+  FRV_BUILTIN_PREFETCH0,
+  FRV_BUILTIN_PREFETCH,
+  FRV_BUILTIN_SMASS,
+  FRV_BUILTIN_SMSSS,
+  FRV_BUILTIN_SMU,
+  FRV_BUILTIN_SCUTSS,
+  FRV_BUILTIN_ADDSS,
+  FRV_BUILTIN_SUBSS,
+  FRV_BUILTIN_SLASS,
+  FRV_BUILTIN_IACCreadll,
+  FRV_BUILTIN_IACCreadl,
+  FRV_BUILTIN_IACCsetll,
+  FRV_BUILTIN_IACCsetl,
+  FRV_BUILTIN_SCAN,
+  FRV_BUILTIN_READ8,
+  FRV_BUILTIN_READ16,
+  FRV_BUILTIN_READ32,
+  FRV_BUILTIN_READ64,
+  FRV_BUILTIN_WRITE8,
+  FRV_BUILTIN_WRITE16,
+  FRV_BUILTIN_WRITE32,
+  FRV_BUILTIN_WRITE64
+};
+#define FRV_BUILTIN_FIRST_NONMEDIA FRV_BUILTIN_SMUL
+
+/* Enable prototypes on the call rtl functions.  */
+#define MD_CALL_PROTOTYPES 1
+
+#define CPU_UNITS_QUERY 1
+
+#ifdef __FRV_FDPIC__
+#define CRT_GET_RFIB_DATA(dbase) \
+  ({ extern void *_GLOBAL_OFFSET_TABLE_; (dbase) = &_GLOBAL_OFFSET_TABLE_; })
+#endif
+
+#endif /* __FRV_H__ */
diff --git a/gcc/config/frv/frv.md b/gcc/config/frv/frv.md
new file mode 100644
index 000000000..cc4a49141
--- /dev/null
+++ b/gcc/config/frv/frv.md
@@ -0,0 +1,8022 @@
+;; Frv Machine Description
+;; Copyright (C) 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Unspec's used
+;; ::
+;; ::::::::::::::::::::
+
+;; GOT constants must go 12/HI/LO for the splitter to work
+
+(define_constants
+  [(UNSPEC_BLOCKAGE		0)
+   (UNSPEC_CC_TO_GPR		1)
+   (UNSPEC_GPR_TO_CC		2)
+   (UNSPEC_PIC_PROLOGUE		3)
+   (UNSPEC_CR_LOGIC		4)
+   (UNSPEC_STACK_ADJUST		5)
+   (UNSPEC_EH_RETURN_EPILOGUE	6)
+   (UNSPEC_GOT			7)
+   (UNSPEC_LDD			8)
+   (UNSPEC_OPTIONAL_MEMBAR	9)
+
+   (UNSPEC_GETTLSOFF			200)
+   (UNSPEC_TLS_LOAD_GOTTLSOFF12		201)
+   (UNSPEC_TLS_INDIRECT_CALL		202)
+   (UNSPEC_TLS_TLSDESC_LDD		203)
+   (UNSPEC_TLS_TLSDESC_LDD_AUX		204)
+   (UNSPEC_TLS_TLSOFF_LD		205)
+   (UNSPEC_TLS_LDDI			206)
+   (UNSPEC_TLSOFF_HILO			207)
+
+   (R_FRV_GOT12			11)
+   (R_FRV_GOTHI			12)
+   (R_FRV_GOTLO			13)
+   (R_FRV_FUNCDESC		14)
+   (R_FRV_FUNCDESC_GOT12	15)
+   (R_FRV_FUNCDESC_GOTHI	16)
+   (R_FRV_FUNCDESC_GOTLO	17)
+   (R_FRV_FUNCDESC_VALUE	18)
+   (R_FRV_FUNCDESC_GOTOFF12	19)
+   (R_FRV_FUNCDESC_GOTOFFHI	20)
+   (R_FRV_FUNCDESC_GOTOFFLO	21)
+   (R_FRV_GOTOFF12		22)
+   (R_FRV_GOTOFFHI		23)
+   (R_FRV_GOTOFFLO		24)
+   (R_FRV_GPREL12		25)
+   (R_FRV_GPRELHI		26)
+   (R_FRV_GPRELLO		27)
+   (R_FRV_GOTTLSOFF_HI		28)
+   (R_FRV_GOTTLSOFF_LO		29)
+   (R_FRV_TLSMOFFHI		30)
+   (R_FRV_TLSMOFFLO           	31)
+   (R_FRV_TLSMOFF12           	32)
+   (R_FRV_TLSDESCHI           	33)
+   (R_FRV_TLSDESCLO           	34)
+   (R_FRV_GOTTLSDESCHI		35)
+   (R_FRV_GOTTLSDESCLO		36)
+
+   (GR8_REG			8)
+   (GR9_REG			9)
+   (GR14_REG			14)
+   ;; LR_REG conflicts with definition in frv.h
+   (LRREG                       169)
+   (FDPIC_REG			15)
+   ])
+
+(define_mode_iterator IMODE [QI HI SI DI])
+(define_mode_attr IMODEsuffix [(QI "b") (HI "h") (SI "") (DI "d")])
+(define_mode_attr BREADsuffix [(QI "ub") (HI "uh") (SI "") (DI "d")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Constraints
+;; ::
+;; ::::::::::::::::::::
+
+;; Standard Constraints
+;;
+;; `m' A memory operand is allowed, with any kind of address that the
+;;     machine supports in general.
+;;
+;; `o' A memory operand is allowed, but only if the address is
+;;     "offsettable".  This means that adding a small integer (actually, the
+;;     width in bytes of the operand, as determined by its machine mode) may be
+;;     added to the address and the result is also a valid memory address.
+;;
+;; `V' A memory operand that is not offsettable.  In other words,
+;;     anything that would fit the `m' constraint but not the `o' constraint.
+;;
+;; `<' A memory operand with autodecrement addressing (either
+;;     predecrement or postdecrement) is allowed.
+;;
+;; `>' A memory operand with autoincrement addressing (either
+;;     preincrement or postincrement) is allowed.
+;;
+;; `r' A register operand is allowed provided that it is in a general
+;;     register.
+;;
+;; `d', `a', `f', ...
+;;     Other letters can be defined in machine-dependent fashion to stand for
+;;     particular classes of registers.  `d', `a' and `f' are defined on the
+;;     68000/68020 to stand for data, address and floating point registers.
+;;
+;; `i' An immediate integer operand (one with constant value) is allowed.
+;;     This includes symbolic constants whose values will be known only at
+;;     assembly time.
+;;
+;; `n' An immediate integer operand with a known numeric value is allowed.
+;;     Many systems cannot support assembly-time constants for operands less
+;;     than a word wide.  Constraints for these operands should use `n' rather
+;;     than `i'.
+;;
+;; 'I' First machine-dependent integer constant (6-bit signed ints).
+;; 'J' Second machine-dependent integer constant (10-bit signed ints).
+;; 'K' Third machine-dependent integer constant (-2048).
+;; 'L' Fourth machine-dependent integer constant (16-bit signed ints).
+;; 'M' Fifth machine-dependent integer constant (16-bit unsigned ints).
+;; 'N' Sixth machine-dependent integer constant (-2047..-1).
+;; 'O' Seventh machine-dependent integer constant (zero).
+;; 'P' Eighth machine-dependent integer constant (1..2047).
+;;
+;;     Other letters in the range `I' through `P' may be defined in a
+;;     machine-dependent fashion to permit immediate integer operands with
+;;     explicit integer values in specified ranges.  For example, on the 68000,
+;;     `I' is defined to stand for the range of values 1 to 8.  This is the
+;;     range permitted as a shift count in the shift instructions.
+;;
+;; `E' An immediate floating operand (expression code `const_double') is
+;;     allowed, but only if the target floating point format is the same as
+;;     that of the host machine (on which the compiler is running).
+;;
+;; `F' An immediate floating operand (expression code `const_double') is
+;;     allowed.
+;;
+;; 'G' First machine-dependent const_double.
+;; 'H' Second machine-dependent const_double.
+;;
+;; `s' An immediate integer operand whose value is not an explicit
+;;     integer is allowed.
+;;
+;;     This might appear strange; if an insn allows a constant operand with a
+;;     value not known at compile time, it certainly must allow any known
+;;     value.  So why use `s' instead of `i'?  Sometimes it allows better code
+;;     to be generated.
+;;
+;;     For example, on the 68000 in a fullword instruction it is possible to
+;;     use an immediate operand; but if the immediate value is between -128 and
+;;     127, better code results from loading the value into a register and
+;;     using the register.  This is because the load into the register can be
+;;     done with a `moveq' instruction.  We arrange for this to happen by
+;;     defining the letter `K' to mean "any integer outside the range -128 to
+;;     127", and then specifying `Ks' in the operand constraints.
+;;
+;; `g' Any register, memory or immediate integer operand is allowed,
+;;     except for registers that are not general registers.
+;;
+;; `X' Any operand whatsoever is allowed, even if it does not satisfy
+;;     `general_operand'.  This is normally used in the constraint of a
+;;     `match_scratch' when certain alternatives will not actually require a
+;;     scratch register.
+;;
+;; `0' Match operand 0.
+;; `1' Match operand 1.
+;; `2' Match operand 2.
+;; `3' Match operand 3.
+;; `4' Match operand 4.
+;; `5' Match operand 5.
+;; `6' Match operand 6.
+;; `7' Match operand 7.
+;; `8' Match operand 8.
+;; `9' Match operand 9.
+;;
+;;     An operand that matches the specified operand number is allowed.  If a
+;;     digit is used together with letters within the same alternative, the
+;;     digit should come last.
+;;
+;;     This is called a "matching constraint" and what it really means is that
+;;     the assembler has only a single operand that fills two roles considered
+;;     separate in the RTL insn.  For example, an add insn has two input
+;;     operands and one output operand in the RTL, but on most CISC machines an
+;;     add instruction really has only two operands, one of them an
+;;     input-output operand:
+;;
+;;          addl #35,r12
+;;
+;;     Matching constraints are used in these circumstances.  More precisely,
+;;     the two operands that match must include one input-only operand and one
+;;     output-only operand.  Moreover, the digit must be a smaller number than
+;;     the number of the operand that uses it in the constraint.
+;;
+;;     For operands to match in a particular case usually means that they are
+;;     identical-looking RTL expressions.  But in a few special cases specific
+;;     kinds of dissimilarity are allowed.  For example, `*x' as an input
+;;     operand will match `*x++' as an output operand.  For proper results in
+;;     such cases, the output template should always use the output-operand's
+;;     number when printing the operand.
+;;
+;; `p' An operand that is a valid memory address is allowed.  This is for
+;;     "load address" and "push address" instructions.
+;;
+;;     `p' in the constraint must be accompanied by `address_operand' as the
+;;     predicate in the `match_operand'.  This predicate interprets the mode
+;;     specified in the `match_operand' as the mode of the memory reference for
+;;     which the address would be valid.
+;;
+;; `Q` First non constant, non register machine-dependent insns
+;; `R` Second non constant, non register machine-dependent insns
+;; `S` Third non constant, non register machine-dependent insns
+;; `T` Fourth non constant, non register machine-dependent insns
+;; `U` Fifth non constant, non register machine-dependent insns
+;;
+;;     Letters in the range `Q' through `U' may be defined in a
+;;     machine-dependent fashion to stand for arbitrary operand types.  The
+;;     machine description macro `EXTRA_CONSTRAINT' is passed the operand as
+;;     its first argument and the constraint letter as its second operand.
+;;
+;;     A typical use for this would be to distinguish certain types of memory
+;;     references that affect other insn operands.
+;;
+;;     Do not define these constraint letters to accept register references
+;;     (`reg'); the reload pass does not expect this and would not handle it
+;;     properly.
+
+;; Multiple Alternative Constraints
+;; `?' Disparage slightly the alternative that the `?' appears in, as a
+;;     choice when no alternative applies exactly.  The compiler regards this
+;;     alternative as one unit more costly for each `?' that appears in it.
+;;
+;; `!' Disparage severely the alternative that the `!' appears in.  This
+;;     alternative can still be used if it fits without reloading, but if
+;;     reloading is needed, some other alternative will be used.
+
+;; Constraint modifiers
+;; `=' Means that this operand is write-only for this instruction: the
+;;     previous value is discarded and replaced by output data.
+;;
+;; `+' Means that this operand is both read and written by the
+;;     instruction.
+;;
+;;     When the compiler fixes up the operands to satisfy the constraints, it
+;;     needs to know which operands are inputs to the instruction and which are
+;;     outputs from it.  `=' identifies an output; `+' identifies an operand
+;;     that is both input and output; all other operands are assumed to be
+;;     input only.
+;;
+;; `&' Means (in a particular alternative) that this operand is written
+;;     before the instruction is finished using the input operands.  Therefore,
+;;     this operand may not lie in a register that is used as an input operand
+;;     or as part of any memory address.
+;;
+;;     `&' applies only to the alternative in which it is written.  In
+;;     constraints with multiple alternatives, sometimes one alternative
+;;     requires `&' while others do not.
+;;
+;;     `&' does not obviate the need to write `='.
+;;
+;; `%' Declares the instruction to be commutative for this operand and the
+;;     following operand.  This means that the compiler may interchange the two
+;;     operands if that is the cheapest way to make all operands fit the
+;;     constraints.  This is often used in patterns for addition instructions
+;;     that really have only two operands: the result must go in one of the
+;;     arguments.
+;;
+;; `#' Says that all following characters, up to the next comma, are to be
+;;     ignored as a constraint.  They are significant only for choosing
+;;     register preferences.
+;;
+;; `*' Says that the following character should be ignored when choosing
+;;     register preferences.  `*' has no effect on the meaning of the
+;;     constraint as a constraint, and no effect on reloading.
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; The `define_attr' expression is used to define each attribute required by
+;; the target machine.  It looks like:
+;;
+;; (define_attr NAME LIST-OF-VALUES DEFAULT)
+
+;; NAME is a string specifying the name of the attribute being defined.
+
+;; LIST-OF-VALUES is either a string that specifies a comma-separated list of
+;; values that can be assigned to the attribute, or a null string to indicate
+;; that the attribute takes numeric values.
+
+;; DEFAULT is an attribute expression that gives the value of this attribute
+;; for insns that match patterns whose definition does not include an explicit
+;; value for this attribute.
+
+;; For each defined attribute, a number of definitions are written to the
+;; `insn-attr.h' file.  For cases where an explicit set of values is specified
+;; for an attribute, the following are defined:
+
+;; * A `#define' is written for the symbol `HAVE_ATTR_NAME'.
+;;
+;; * An enumeral class is defined for `attr_NAME' with elements of the
+;;   form `UPPER-NAME_UPPER-VALUE' where the attribute name and value are first
+;;   converted to upper case.
+;;
+;; * A function `get_attr_NAME' is defined that is passed an insn and
+;;   returns the attribute value for that insn.
+
+;; For example, if the following is present in the `md' file:
+;;
+;; (define_attr "type" "branch,fp,load,store,arith" ...)
+;;
+;; the following lines will be written to the file `insn-attr.h'.
+;;
+;; #define HAVE_ATTR_type
+;; enum attr_type {TYPE_BRANCH, TYPE_FP, TYPE_LOAD, TYPE_STORE, TYPE_ARITH};
+;; extern enum attr_type get_attr_type ();
+
+;; If the attribute takes numeric values, no `enum' type will be defined and
+;; the function to obtain the attribute's value will return `int'.
+
+(define_attr "length" "" (const_int 4))
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in frv-protos.h.
+
+(define_attr "cpu" "generic,fr550,fr500,fr450,fr405,fr400,fr300,simple,tomcat"
+  (const (symbol_ref "(enum attr_cpu) frv_cpu_type")))
+
+;; Attribute is "yes" for branches and jumps that span too great a distance
+;; to be implemented in the most natural way.  Such instructions will use
+;; a call instruction in some way.
+
+(define_attr "far_jump" "yes,no" (const_string "no"))
+
+;; Instruction type
+;; "unknown" must come last.
+(define_attr "type"
+  "int,sethi,setlo,mul,div,gload,gstore,fload,fstore,movfg,movgf,macc,scan,cut,branch,jump,jumpl,call,spr,trap,fnop,fsconv,fsadd,fscmp,fsmul,fsmadd,fsdiv,sqrt_single,fdconv,fdadd,fdcmp,fdmul,fdmadd,fddiv,sqrt_double,mnop,mlogic,maveh,msath,maddh,mqaddh,mpackh,munpackh,mdpackh,mbhconv,mrot,mshift,mexpdhw,mexpdhd,mwcut,mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,mcpx,mqcpx,mcut,mclracc,mclracca,mdunpackh,mbhconve,mrdacc,mwtacc,maddacc,mdaddacc,mabsh,mdrot,mcpl,mdcut,mqsath,mqlimh,mqshift,mset,ccr,multi,load_or_call,unknown"
+  (const_string "unknown"))
+
+(define_attr "acc_group" "none,even,odd"
+  (symbol_ref "(enum attr_acc_group) frv_acc_group (insn)"))
+
+;; Scheduling and Packing Overview
+;; -------------------------------
+;;
+;; FR-V instructions are divided into five groups: integer, floating-point,
+;; media, branch and control.  Each group is associated with a separate set
+;; of processing units, the number and behavior of which depend on the target
+;; target processor.  Integer units have names like I0 and I1, floating-point
+;; units have names like F0 and F1, and so on.
+;;
+;; Each member of the FR-V family has its own restrictions on which
+;; instructions can issue to which units.  For example, some processors
+;; allow loads to issue to I0 or I1 while others only allow them to issue
+;; to I0.  As well as these processor-specific restrictions, there is a
+;; general rule that an instruction can only issue to unit X + 1 if an
+;; instruction in the same packet issued to unit X.
+;;
+;; Sometimes the only way to honor these restrictions is by adding nops
+;; to a packet.  For example, on the fr550, media instructions that access
+;; ACC4-7 can only issue to M1 or M3.  It is therefore only possible to
+;; execute these instructions by packing them with something that issues
+;; to M0.  When no useful M0 instruction exists, an "mnop" can be used
+;; instead.
+;;
+;; Having decided which instructions should issue to which units, the packet
+;; should be ordered according to the following template:
+;;
+;;     I0 F0/M0 I1 F1/M1 .... B0 B1 ...
+;;
+;; Note that VLIW packets execute strictly in parallel.  Every instruction
+;; in the packet will stall until all input operands are ready.  These
+;; operands are then read simultaneously before any registers are modified.
+;; This means that it's OK to have write-after-read hazards between
+;; instructions in the same packet, even if the write is listed earlier
+;; than the read.
+;;
+;; Three gcc passes are involved in generating VLIW packets:
+;;
+;;    (1) The scheduler.  This pass uses the standard scheduling code and
+;;	  behaves in much the same way as it would for a superscalar RISC
+;;	  architecture.
+;;
+;;    (2) frv_reorg.  This pass inserts nops into packets in order to meet
+;;	  the processor's issue requirements.  It also has code to optimize
+;;	  the type of padding used to align labels.
+;;
+;;    (3) frv_pack_insns.  The final packing phase, which puts the
+;;	  instructions into assembly language order according to the
+;;	  "I0 F0/M0 ..." template above.
+;;
+;; In the ideal case, these three passes will agree on which instructions
+;; should be packed together, but this won't always happen.  In particular:
+;;
+;;    (a) (2) might not pack predicated instructions in the same way as (1).
+;;	  The scheduler tries to schedule predicated instructions for the
+;;	  worst case, assuming the predicate is true.  However, if we have
+;;	  something like a predicated load, it isn't always possible to
+;;	  fill the load delay with useful instructions.  (2) should then
+;;	  pack the user of the loaded value as aggressively as possible,
+;;	  in order to optimize the case when the predicate is false.
+;;	  See frv_pack_insn_p for more details.
+;;
+;;    (b) The final shorten_branches pass runs between (2) and (3).
+;;	  Since (2) inserts nops, it is possible that some branches
+;;	  that were thought to be in range during (2) turned out to
+;;	  out-of-range in (3).
+;;
+;; All three passes use DFAs to model issue restrictions.  The main
+;; question that the DFAs are supposed to answer is simply: can these
+;; instructions be packed together?  The DFAs are not responsible for
+;; assigning instructions to execution units; that's the job of
+;; frv_sort_insn_group, see below for details.
+;;
+;; To get the best results, the DFAs should try to allow packets to
+;; be built in every possible order.  This gives the scheduler more
+;; flexibility, removing the need for things like multipass lookahead.
+;; It also means we can take more advantage of inter-packet dependencies.
+;;
+;; For example, suppose we're compiling for the fr400 and we have:
+;;
+;;	addi	gr4,#1,gr5
+;;	ldi	@(gr6,gr0),gr4
+;;
+;; We can pack these instructions together by assigning the load to I0 and
+;; the addition to I1.  However, because of the anti dependence between the
+;; two instructions, the scheduler must schedule the addition first.
+;; We should generally get better schedules if the DFA allows both
+;; (ldi, addi) and (addi, ldi), leaving the final packing pass to
+;; reorder the packet where appropriate.
+;;
+;; Almost all integer instructions can issue to any unit in the range I0
+;; to Ix, where the value of "x" depends on the type of instruction and
+;; on the target processor.  The rules for other instruction groups are
+;; usually similar.
+;;
+;; When the restrictions are as regular as this, we can get the desired
+;; behavior by claiming the DFA unit associated with the highest unused
+;; execution unit.  For example, if an instruction can issue to I0 or I1,
+;; the DFA first tries to take the DFA unit associated with I1, and will
+;; only take I0's unit if I1 isn't free.  (Note that, as mentioned above,
+;; the DFA does not assign instructions to units.  An instruction that
+;; claims DFA unit I1 will not necessarily issue to I1 in the final packet.)
+;;
+;; There are some cases, such as the fr550 media restriction mentioned
+;; above, where the rule is not as simple as "any unit between 0 and X".
+;; Even so, allocating higher units first brings us close to the ideal.
+;;
+;; Having divided instructions into packets, passes (2) and (3) must
+;; assign instructions to specific execution units.  They do this using
+;; the following algorithm:
+;;
+;;    1. Partition the instructions into groups (integer, float/media, etc.)
+;;
+;;    2. For each group of instructions:
+;;
+;;	 (a) Issue each instruction in the reset DFA state and use the
+;;	     DFA cpu_unit_query interface to find out which unit it picks
+;;	     first.
+;;
+;;	 (b) Sort the instructions into ascending order of picked units.
+;;	     Instructions that pick I1 first come after those that pick
+;;	     I0 first, and so on.  Let S be the sorted sequence and S[i]
+;;	     be the ith element of it (counting from zero).
+;;
+;;	 (c) If this is the control or branch group, goto (i)
+;;
+;;	 (d) Find the largest L such that S[0]...S[L-1] can be issued
+;;	     consecutively from the reset state and such that the DFA
+;;	     claims unit X when S[X] is added.  Let D be the DFA state
+;;	     after instructions S[0]...S[L-1] have been issued.
+;;
+;;	 (e) If L is the length of S, goto (i)
+;;
+;;	 (f) Let U be the number of units belonging to this group and #S be
+;;	     the length of S.  Create a new sequence S' by concatenating
+;;	     S[L]...S[#S-1] and (U - #S) nops.
+;;
+;;	 (g) For each permutation S'' of S', try issuing S'' from last to
+;;	     first, starting with state D.  See if the DFA claims unit
+;;	     X + L when each S''[X] is added.  If so, set S to the
+;;	     concatenation of S[0]...S[L-1] and S'', then goto (i).
+;;
+;;	 (h) If (g) found no permutation, abort.
+;;
+;;	 (i) S is now the sorted sequence for this group, meaning that S[X]
+;;	     issues to unit X.  Trim any unwanted nops from the end of S.
+;;
+;; The sequence calculated by (b) is trivially correct for control
+;; instructions since they can't be packed.  It is also correct for branch
+;; instructions due to their simple issue requirements.  For integer and
+;; floating-point/media instructions, the sequence calculated by (b) is
+;; often the correct answer; the rest of the algorithm is optimized for
+;; the case in which it is correct.
+;;
+;; If there were no irregularities in the issue restrictions then step
+;; (d) would not be needed.  It is mainly there to cope with the fr550
+;; integer restrictions, where a store can issue to I1, but only if a store
+;; also issues to I0.  (Note that if a packet has two stores, they will be
+;; at the beginning of the sequence calculated by (b).)  It also copes
+;; with fr400 M-2 instructions, which must issue to M0, and which cannot
+;; be issued together with an mnop in M1.
+;;
+;; Step (g) is the main one for integer and float/media instructions.
+;; The first permutation it tries is S' itself (because, as noted above,
+;; the sequence calculated by (b) is often correct).  If S' doesn't work,
+;; the implementation tries varying the beginning of the sequence first.
+;; Thus the nops towards the end of the sequence will only move to lower
+;; positions if absolutely necessary.
+;;
+;; The algorithm is theoretically exponential in the number of instructions
+;; in a group, although it's only O(n log(n)) if the sequence calculated by
+;; (b) is acceptable.  In practice, the algorithm completes quickly even
+;; in the rare cases where (g) needs to try other permutations.
+(define_automaton "integer, float_media, branch, control, idiv, div")
+
+;; The main issue units.  Note that not all units are available on
+;; all processors.
+(define_query_cpu_unit "i0,i1,i2,i3" "integer")
+(define_query_cpu_unit "f0,f1,f2,f3" "float_media")
+(define_query_cpu_unit "b0,b1" "branch")
+(define_query_cpu_unit "c" "control")
+
+;; Division units.
+(define_cpu_unit "idiv1,idiv2" "idiv")
+(define_cpu_unit "div1,div2,root" "div")
+
+;; Control instructions cannot be packed with others.
+(define_reservation "control" "i0+i1+i2+i3+f0+f1+f2+f3+b0+b1")
+
+;; Generic reservation for control insns
+(define_insn_reservation "control" 1
+  (eq_attr "type" "trap,spr,unknown,multi")
+  "c + control")
+
+;; Reservation for relaxable calls to gettlsoff.
+(define_insn_reservation "load_or_call" 3
+  (eq_attr "type" "load_or_call")
+  "c + control")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Generic/FR500 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Integer insns
+;; Synthetic units used to describe issue restrictions.
+(define_automaton "fr500_integer")
+(define_cpu_unit "fr500_load0,fr500_load1,fr500_store0" "fr500_integer")
+(exclusion_set "fr500_load0,fr500_load1" "fr500_store0")
+
+(define_bypass 0 "fr500_i1_sethi" "fr500_i1_setlo")
+(define_insn_reservation "fr500_i1_sethi" 1
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "sethi"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_setlo" 1
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "setlo"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_int" 1
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "int"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_mul" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mul"))
+  "i1|i0")
+
+(define_insn_reservation "fr500_i1_div" 19
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "div"))
+  "(i1|i0),(idiv1*18|idiv2*18)")
+
+(define_insn_reservation "fr500_i2" 4
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "gload,fload"))
+  "(i1|i0) + (fr500_load0|fr500_load1)")
+
+(define_insn_reservation "fr500_i3" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "gstore,fstore"))
+  "i0 + fr500_store0")
+
+(define_insn_reservation "fr500_i4" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "movgf,movfg"))
+  "i0")
+
+(define_insn_reservation "fr500_i5" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "jumpl"))
+  "i0")
+
+;;
+;; Branch-instructions
+;;
+(define_insn_reservation "fr500_branch" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "jump,branch,ccr"))
+  "b1|b0")
+
+(define_insn_reservation "fr500_call" 0
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "call"))
+  "b0")
+
+;; Floating point insns.  The default latencies are for non-media
+;; instructions; media instructions incur an extra cycle.
+
+(define_bypass 4 "fr500_farith" "fr500_m1,fr500_m2,fr500_m3,
+			         fr500_m4,fr500_m5,fr500_m6")
+(define_insn_reservation "fr500_farith" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "fnop,fsconv,fsadd,fsmul,fsmadd,fdconv,fdadd,fdmul,fdmadd"))
+  "(f1|f0)")
+
+(define_insn_reservation "fr500_fcmp" 4
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "fscmp,fdcmp"))
+  "(f1|f0)")
+
+(define_bypass 11 "fr500_fdiv" "fr500_m1,fr500_m2,fr500_m3,
+			        fr500_m4,fr500_m5,fr500_m6")
+(define_insn_reservation "fr500_fdiv" 10
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "fsdiv,fddiv"))
+  "(f1|f0),(div1*9 | div2*9)")
+
+(define_bypass 16 "fr500_froot" "fr500_m1,fr500_m2,fr500_m3,
+				 fr500_m4,fr500_m5,fr500_m6")
+(define_insn_reservation "fr500_froot" 15
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "sqrt_single,sqrt_double"))
+  "(f1|f0) + root*15")
+
+;; Media insns.  Conflict table is as follows:
+;;
+;;           M1  M2  M3  M4  M5  M6
+;;        M1  -   -   -   -   -   -
+;;        M2  -   -   -   -   X   X
+;;        M3  -   -   -   -   X   X
+;;        M4  -   -   -   -   -   X
+;;        M5  -   X   X   -   X   X
+;;        M6  -   X   X   X   X   X
+;;
+;; where X indicates an invalid combination.
+;;
+;; Target registers are as follows:
+;;
+;;	  M1 : FPRs
+;;	  M2 : FPRs
+;;	  M3 : ACCs
+;;	  M4 : ACCs
+;;	  M5 : FPRs
+;;	  M6 : ACCs
+;;
+;; The default FPR latencies are for integer instructions.
+;; Floating-point instructions need one cycle more and media
+;; instructions need one cycle less.
+(define_automaton "fr500_media")
+(define_cpu_unit "fr500_m2_0,fr500_m2_1" "fr500_media")
+(define_cpu_unit "fr500_m3_0,fr500_m3_1" "fr500_media")
+(define_cpu_unit "fr500_m4_0,fr500_m4_1" "fr500_media")
+(define_cpu_unit "fr500_m5" "fr500_media")
+(define_cpu_unit "fr500_m6" "fr500_media")
+
+(exclusion_set "fr500_m5,fr500_m6" "fr500_m2_0,fr500_m2_1,
+				    fr500_m3_0,fr500_m3_1")
+(exclusion_set "fr500_m6" "fr500_m4_0,fr500_m4_1,fr500_m5")
+
+(define_bypass 2 "fr500_m1" "fr500_m1,fr500_m2,fr500_m3,
+			     fr500_m4,fr500_m5,fr500_m6")
+(define_bypass 4 "fr500_m1" "fr500_farith,fr500_fcmp,fr500_fdiv,fr500_froot")
+(define_insn_reservation "fr500_m1" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mnop,mlogic,maveh,msath,maddh,mqaddh"))
+  "(f1|f0)")
+
+(define_bypass 2 "fr500_m2" "fr500_m1,fr500_m2,fr500_m3,
+			     fr500_m4,fr500_m5,fr500_m6")
+(define_bypass 4 "fr500_m2" "fr500_farith,fr500_fcmp,fr500_fdiv,fr500_froot")
+(define_insn_reservation "fr500_m2" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mrdacc,mpackh,munpackh,mbhconv,mrot,mshift,mexpdhw,mexpdhd,mwcut,mcut,mdunpackh,mbhconve"))
+  "(f1|f0) + (fr500_m2_0|fr500_m2_1)")
+
+(define_bypass 1 "fr500_m3" "fr500_m4")
+(define_insn_reservation "fr500_m3" 2
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mclracc,mwtacc"))
+  "(f1|f0) + (fr500_m3_0|fr500_m3_1)")
+
+(define_bypass 1 "fr500_m4" "fr500_m4")
+(define_insn_reservation "fr500_m4" 2
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,mcpx,mqcpx"))
+  "(f1|f0) + (fr500_m4_0|fr500_m4_1)")
+
+(define_bypass 2 "fr500_m5" "fr500_m1,fr500_m2,fr500_m3,
+			     fr500_m4,fr500_m5,fr500_m6")
+(define_bypass 4 "fr500_m5" "fr500_farith,fr500_fcmp,fr500_fdiv,fr500_froot")
+(define_insn_reservation "fr500_m5" 3
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mdpackh"))
+  "(f1|f0) + fr500_m5")
+
+(define_bypass 1 "fr500_m6" "fr500_m4")
+(define_insn_reservation "fr500_m6" 2
+  (and (eq_attr "cpu" "generic,fr500,tomcat")
+       (eq_attr "type" "mclracca"))
+  "(f1|f0) + fr500_m6")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: FR400 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Category 2 media instructions use both media units, but can be packed
+;; with non-media instructions.  Use fr400_m1unit to claim the M1 unit
+;; without claiming a slot.
+
+;; Name		Class	Units	Latency
+;; ====	        =====	=====	=======
+;; int		I1	I0/I1	1
+;; sethi	I1	I0/I1	0       -- does not interfere with setlo
+;; setlo	I1	I0/I1	1
+;; mul		I1	I0	3  (*)
+;; div		I1	I0	20 (*)
+;; gload	I2	I0	4  (*)
+;; fload	I2	I0	4       -- only 3 if read by a media insn
+;; gstore	I3	I0	0       -- provides no result
+;; fstore	I3	I0	0       -- provides no result
+;; movfg	I4	I0	3  (*)
+;; movgf	I4	I0	3  (*)
+;; jumpl	I5	I0	0       -- provides no result
+;;
+;; (*) The results of these instructions can be read one cycle earlier
+;; than indicated.  The penalty given is for instructions with write-after-
+;; write dependencies.
+
+;; The FR400 can only do loads and stores in I0, so we there's no danger
+;; of memory unit collision in the same packet.  There's only one divide
+;; unit too.
+
+(define_automaton "fr400_integer")
+(define_cpu_unit "fr400_mul" "fr400_integer")
+
+(define_insn_reservation "fr400_i1_int" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "int"))
+  "i1|i0")
+
+(define_bypass 0 "fr400_i1_sethi" "fr400_i1_setlo")
+(define_insn_reservation "fr400_i1_sethi" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "sethi"))
+  "i1|i0")
+
+(define_insn_reservation "fr400_i1_setlo" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "setlo"))
+  "i1|i0")
+
+;; 3 is the worst case (write-after-write hazard).
+(define_insn_reservation "fr400_i1_mul" 3
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mul"))
+  "i0 + fr400_mul")
+
+(define_insn_reservation "fr450_i1_mul" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mul"))
+  "i0 + fr400_mul")
+
+(define_bypass 1 "fr400_i1_macc" "fr400_i1_macc")
+(define_insn_reservation "fr400_i1_macc" 2
+  (and (eq_attr "cpu" "fr405,fr450")
+       (eq_attr "type" "macc"))
+  "(i0|i1) + fr400_mul")
+
+(define_insn_reservation "fr400_i1_scan" 1
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "scan"))
+  "i0")
+
+(define_insn_reservation "fr400_i1_cut" 2
+  (and (eq_attr "cpu" "fr405,fr450")
+       (eq_attr "type" "cut"))
+  "i0 + fr400_mul")
+
+;; 20 is for a write-after-write hazard.
+(define_insn_reservation "fr400_i1_div" 20
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "div"))
+  "i0 + idiv1*19")
+
+(define_insn_reservation "fr450_i1_div" 19
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "div"))
+  "i0 + idiv1*19")
+
+;; 4 is for a write-after-write hazard.
+(define_insn_reservation "fr400_i2" 4
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "gload,fload"))
+  "i0")
+
+(define_insn_reservation "fr450_i2_gload" 3
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "gload"))
+  "i0")
+
+;; 4 is for a write-after-write hazard.
+(define_insn_reservation "fr450_i2_fload" 4
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "fload"))
+  "i0")
+
+(define_insn_reservation "fr400_i3" 0
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "gstore,fstore"))
+  "i0")
+
+;; 3 is for a write-after-write hazard.
+(define_insn_reservation "fr400_i4" 3
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "movfg,movgf"))
+  "i0")
+
+(define_insn_reservation "fr450_i4_movfg" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "movfg"))
+  "i0")
+
+;; 3 is for a write-after-write hazard.
+(define_insn_reservation "fr450_i4_movgf" 3
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "movgf"))
+  "i0")
+
+(define_insn_reservation "fr400_i5" 0
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "jumpl"))
+  "i0")
+
+;; The bypass between FPR loads and media instructions, described above.
+
+(define_bypass 3
+  "fr400_i2"
+  "fr400_m1_1,fr400_m1_2,\
+   fr400_m2_1,fr400_m2_2,\
+   fr400_m3_1,fr400_m3_2,\
+   fr400_m4_1,fr400_m4_2,\
+   fr400_m5")
+
+;; The branch instructions all use the B unit and produce no result.
+
+(define_insn_reservation "fr400_b" 0
+  (and (eq_attr "cpu" "fr400,fr405,fr450")
+       (eq_attr "type" "jump,branch,ccr,call"))
+  "b0")
+
+;; FP->FP moves are marked as "fsconv" instructions in the define_insns
+;; below, but are implemented on the FR400 using "mlogic" instructions.
+;; It's easier to class "fsconv" as a "m1:1" instruction than provide
+;; separate define_insns for the FR400.
+
+;; M1 instructions store their results in FPRs.  Any instruction can read
+;; the result in the following cycle, so no penalty occurs.
+
+(define_automaton "fr400_media")
+(define_cpu_unit "fr400_m1a,fr400_m1b,fr400_m2a" "fr400_media")
+(exclusion_set "fr400_m1a,fr400_m1b" "fr400_m2a")
+
+(define_reservation "fr400_m1" "(f1|f0) + (fr400_m1a|fr400_m1b)")
+(define_reservation "fr400_m2" "f0 + fr400_m2a")
+
+(define_insn_reservation "fr400_m1_1" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "fsconv,mnop,mlogic,maveh,msath,maddh,mabsh,mset"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m1_2" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mqaddh,mqsath,mqlimh,mqshift"))
+  "fr400_m2")
+
+;; M2 instructions store their results in accumulators, which are read
+;; by M2 or M4 media commands.  M2 instructions can read the results in
+;; the following cycle, but M4 instructions must wait a cycle more.
+
+(define_bypass 1
+  "fr400_m2_1,fr400_m2_2"
+  "fr400_m2_1,fr400_m2_2")
+
+(define_insn_reservation "fr400_m2_1" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mcpx,maddacc"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m2_2" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mqmulh,mqmulxh,mqmach,mqcpx,mdaddacc"))
+  "fr400_m2")
+
+;; For our purposes, there seems to be little real difference between
+;; M1 and M3 instructions.  Keep them separate anyway in case the distinction
+;; is needed later.
+
+(define_insn_reservation "fr400_m3_1" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mpackh,mrot,mshift,mexpdhw"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m3_2" 1
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "munpackh,mdpackh,mbhconv,mexpdhd,mwcut,mdrot,mcpl"))
+  "fr400_m2")
+
+;; M4 instructions write to accumulators or FPRs.  MOVFG and STF
+;; instructions can read an FPR result in the following cycle, but
+;; M-unit instructions must wait a cycle more for either kind of result.
+
+(define_bypass 1 "fr400_m4_1,fr400_m4_2" "fr400_i3,fr400_i4")
+
+(define_insn_reservation "fr400_m4_1" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mrdacc,mcut,mclracc"))
+  "fr400_m1")
+
+(define_insn_reservation "fr400_m4_2" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mclracca,mdcut"))
+  "fr400_m2")
+
+;; M5 instructions always incur a 1-cycle penalty.
+
+(define_insn_reservation "fr400_m5" 2
+  (and (eq_attr "cpu" "fr400,fr405")
+       (eq_attr "type" "mwtacc"))
+  "fr400_m2")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: FR450 media scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; The FR451 media restrictions are similar to the FR400's, but not as
+;; strict and not as regular.  There are 6 categories with the following
+;; restrictions:
+;;
+;;		          M1
+;;	      M-1  M-2  M-3  M-4  M-5  M-6
+;;	M-1:         x         x         x
+;;	M-2:    x    x    x    x    x    x
+;;  M0	M-3:         x         x         x
+;;	M-4:    x    x    x    x
+;;	M-5:         x         x         x
+;;	M-6:    x    x    x    x    x    x
+;;
+;; where "x" indicates a conflict.
+;;
+;; There is no difference between M-1 and M-3 as far as issue
+;; restrictions are concerned, so they are combined as "m13".
+
+;; Units for odd-numbered categories.  There can be two of these
+;; in a packet.
+(define_cpu_unit "fr450_m13a,fr450_m13b" "float_media")
+(define_cpu_unit "fr450_m5a,fr450_m5b" "float_media")
+
+;; Units for even-numbered categories.  There can only be one per packet.
+(define_cpu_unit "fr450_m2a,fr450_m4a,fr450_m6a" "float_media")
+
+;; Enforce the restriction matrix above.
+(exclusion_set "fr450_m2a,fr450_m4a,fr450_m6a" "fr450_m13a,fr450_m13b")
+(exclusion_set "fr450_m2a,fr450_m6a" "fr450_m5a,fr450_m5b")
+(exclusion_set "fr450_m4a,fr450_m6a" "fr450_m2a")
+
+(define_reservation "fr450_m13" "(f1|f0) + (fr450_m13a|fr450_m13b)")
+(define_reservation "fr450_m2" "f0 + fr450_m2a")
+(define_reservation "fr450_m4" "f0 + fr450_m4a")
+(define_reservation "fr450_m5" "(f1|f0) + (fr450_m5a|fr450_m5b)")
+(define_reservation "fr450_m6" "(f0|f1) + fr450_m6a")
+
+;; MD-1, MD-3 and MD-8 instructions, which are the same as far
+;; as scheduling is concerned.  The inputs and outputs are FPRs.
+;; Instructions that have 32-bit inputs and outputs belong to M-1 while
+;; the rest belong to M-2.
+;;
+;; ??? Arithmetic shifts (MD-6) have an extra cycle latency, but we don't
+;; make the distinction between them and logical shifts.
+(define_insn_reservation "fr450_md138_1" 1
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "fsconv,mnop,mlogic,maveh,msath,maddh,mabsh,mset,
+			mrot,mshift,mexpdhw,mpackh"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md138_2" 1
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mqaddh,mqsath,mqlimh,
+			mdrot,mwcut,mqshift,mexpdhd,
+			munpackh,mdpackh,mbhconv,mcpl"))
+  "fr450_m2")
+
+;; MD-2 instructions.  These take FPR or ACC inputs and produce an ACC output.
+;; Instructions that write to double ACCs belong to M-3 while those that write
+;; to quad ACCs belong to M-4.
+(define_insn_reservation "fr450_md2_3" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mmulh,mmach,mcpx,mmulxh,mmrdh,maddacc"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md2_4" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mqmulh,mqmach,mqcpx,mqmulxh,mdaddacc"))
+  "fr450_m4")
+
+;; Another MD-2 instruction can use the result on the following cycle.
+(define_bypass 1 "fr450_md2_3,fr450_md2_4" "fr450_md2_3,fr450_md2_4")
+
+;; MD-4 instructions that write to ACCs.
+(define_insn_reservation "fr450_md4_3" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mclracc"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md4_4" 3
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mclracca"))
+  "fr450_m4")
+
+;; MD-4 instructions that write to FPRs.
+(define_insn_reservation "fr450_md4_1" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mcut"))
+  "fr450_m13")
+
+(define_insn_reservation "fr450_md4_5" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mrdacc"))
+  "fr450_m5")
+
+(define_insn_reservation "fr450_md4_6" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mdcut"))
+  "fr450_m6")
+
+;; Integer instructions can read the FPR result of an MD-4 instruction on
+;; the following cycle.
+(define_bypass 1 "fr450_md4_1,fr450_md4_5,fr450_md4_6"
+		 "fr400_i3,fr450_i4_movfg")
+
+;; MD-5 instructions, which belong to M-3.  They take FPR inputs and
+;; write to ACCs.
+(define_insn_reservation "fr450_md5_3" 2
+  (and (eq_attr "cpu" "fr450")
+       (eq_attr "type" "mwtacc"))
+  "fr450_m13")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: FR550 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Prevent loads and stores from being issued in the same packet.
+;; These units must go into the generic "integer" reservation because
+;; of the constraints on fr550_store0 and fr550_store1.
+(define_cpu_unit "fr550_load0,fr550_load1" "integer")
+(define_cpu_unit "fr550_store0,fr550_store1" "integer")
+(exclusion_set "fr550_load0,fr550_load1" "fr550_store0,fr550_store1")
+
+;; A store can only issue to I1 if one has also been issued to I0.
+(presence_set "fr550_store1" "fr550_store0")
+
+(define_bypass 0 "fr550_sethi" "fr550_setlo")
+(define_insn_reservation "fr550_sethi" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "sethi"))
+  "i3|i2|i1|i0")
+
+(define_insn_reservation "fr550_setlo" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "setlo"))
+  "i3|i2|i1|i0")
+
+(define_insn_reservation "fr550_int" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "int"))
+  "i3|i2|i1|i0")
+
+(define_insn_reservation "fr550_mul" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mul"))
+  "i1|i0")
+
+(define_insn_reservation "fr550_div" 19
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "div"))
+  "(i1|i0),(idiv1*18 | idiv2*18)")
+
+(define_insn_reservation "fr550_load" 3
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "gload,fload"))
+  "(i1|i0)+(fr550_load0|fr550_load1)")
+
+;; We can only issue a store to I1 if one was also issued to I0.
+;; This means that, as far as frv_reorder_packet is concerned,
+;; the instruction has the same priority as an I0-only instruction.
+(define_insn_reservation "fr550_store" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "gstore,fstore"))
+  "(i0+fr550_store0)|(i1+fr550_store1)")
+
+(define_insn_reservation "fr550_transfer" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "movgf,movfg"))
+  "i0")
+
+(define_insn_reservation "fr550_jumpl" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "jumpl"))
+  "i0")
+
+(define_cpu_unit "fr550_ccr0,fr550_ccr1" "float_media")
+
+(define_insn_reservation "fr550_branch" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "jump,branch"))
+  "b1|b0")
+
+(define_insn_reservation "fr550_ccr" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "ccr"))
+  "(b1|b0) + (fr550_ccr1|fr550_ccr0)")
+
+(define_insn_reservation "fr550_call" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "call"))
+  "b0")
+
+(define_automaton "fr550_float_media")
+(define_cpu_unit "fr550_add0,fr550_add1" "fr550_float_media")
+
+;; There are three possible combinations of floating-point/media instructions:
+;;
+;;    - one media and one float
+;;    - up to four float, no media
+;;    - up to four media, no float
+(define_cpu_unit "fr550_f0,fr550_f1,fr550_f2,fr550_f3" "fr550_float_media")
+(define_cpu_unit "fr550_m0,fr550_m1,fr550_m2,fr550_m3" "fr550_float_media")
+(exclusion_set "fr550_f1,fr550_f2,fr550_f3" "fr550_m1,fr550_m2,fr550_m3")
+(exclusion_set "fr550_m0" "fr550_f1,fr550_f2,fr550_f3")
+;; FIXME: This next exclusion set should be defined as well, so that we do
+;; not get a packet containing multiple media instructions plus a single
+;; floating point instruction.  At the moment we can get away with not
+;; defining it because gcc does not seem to generate such packets.
+;;
+;; If we do enable the exclusion however the insertion of fnop insns into
+;; a packet containing media instructions will stop working, because the
+;; fnop insn counts as a floating point instruction.  The correct solution
+;; is to fix the reservation for the fnop insn so that it does not have the
+;; same restrictions as ordinary floating point insns.
+;;(exclusion_set "fr550_f0" "fr550_m1,fr550_m2,fr550_m3")
+
+(define_reservation "fr550_float" "fr550_f0|fr550_f1|fr550_f2|fr550_f3")
+(define_reservation "fr550_media" "fr550_m0|fr550_m1|fr550_m2|fr550_m3")
+
+(define_insn_reservation "fr550_f1" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fnop"))
+  "(f3|f2|f1|f0) + fr550_float")
+
+(define_insn_reservation "fr550_f2" 3
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fsconv,fsadd,fscmp"))
+  "(f3|f2|f1|f0) + (fr550_add0|fr550_add1) + fr550_float")
+
+(define_insn_reservation "fr550_f3_mul" 3
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fsmul"))
+  "(f1|f0) + fr550_float")
+
+(define_insn_reservation "fr550_f3_div" 10
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "fsdiv"))
+  "(f1|f0) + fr550_float")
+
+(define_insn_reservation "fr550_f3_sqrt" 15
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "sqrt_single"))
+  "(f1|f0) + fr550_float")
+
+;; Synthetic units for enforcing media issue restrictions.  Certain types
+;; of insn in M2 conflict with certain types in M0:
+;;
+;;			     M2
+;;               MNOP   MALU   MSFT   MMAC   MSET
+;;         MNOP     -      -      x      -      -
+;;         MALU     -      x      x      -      -
+;;   M0    MSFT     -      -      x      -      x
+;;         MMAC     -      -      x      x      -
+;;         MSET     -      -      x      -      -
+;;
+;; where "x" indicates a conflict.  The same restrictions apply to
+;; M3 and M1.
+;;
+;; In addition -- and this is the awkward bit! -- instructions that
+;; access ACC0-3 can only issue to M0 or M2.  Those that access ACC4-7
+;; can only issue to M1 or M3.  We refer to such instructions as "even"
+;; and "odd" respectively.
+(define_cpu_unit "fr550_malu0,fr550_malu1" "float_media")
+(define_cpu_unit "fr550_malu2,fr550_malu3" "float_media")
+(define_cpu_unit "fr550_msft0,fr550_msft1" "float_media")
+(define_cpu_unit "fr550_mmac0,fr550_mmac1" "float_media")
+(define_cpu_unit "fr550_mmac2,fr550_mmac3" "float_media")
+(define_cpu_unit "fr550_mset0,fr550_mset1" "float_media")
+(define_cpu_unit "fr550_mset2,fr550_mset3" "float_media")
+
+(exclusion_set "fr550_malu0" "fr550_malu2")
+(exclusion_set "fr550_malu1" "fr550_malu3")
+
+(exclusion_set "fr550_msft0" "fr550_mset2")
+(exclusion_set "fr550_msft1" "fr550_mset3")
+
+(exclusion_set "fr550_mmac0" "fr550_mmac2")
+(exclusion_set "fr550_mmac1" "fr550_mmac3")
+
+;; If an MSFT or MMAC instruction issues to a unit other than M0, we may
+;; need to insert some nops.  In the worst case, the packet will end up
+;; having 4 integer instructions and 4 media instructions, leaving no
+;; room for any branch instructions that the DFA might have accepted.
+;;
+;; This doesn't matter for JUMP_INSNs and CALL_INSNs because they are
+;; always the last instructions to be passed to the DFA, and could be
+;; pushed out to a separate packet once the nops have been added.
+;; However, it does cause problems for ccr instructions since they
+;; can occur anywhere in the unordered packet.
+(exclusion_set "fr550_msft1,fr550_mmac1,fr550_mmac2,fr550_mmac3"
+	       "fr550_ccr0,fr550_ccr1")
+
+(define_reservation "fr550_malu"
+  "(f3 + fr550_malu3) | (f2 + fr550_malu2)
+   | (f1 + fr550_malu1) | (f0 + fr550_malu0)")
+
+(define_reservation "fr550_msft_even"
+  "f0 + fr550_msft0")
+
+(define_reservation "fr550_msft_odd"
+  "f1 + fr550_msft1")
+
+(define_reservation "fr550_msft_either"
+  "(f1 + fr550_msft1) | (f0 + fr550_msft0)")
+
+(define_reservation "fr550_mmac_even"
+  "(f2 + fr550_mmac2) | (f0 + fr550_mmac0)")
+
+(define_reservation "fr550_mmac_odd"
+  "(f3 + fr550_mmac3) | (f1 + fr550_mmac1)")
+
+(define_reservation "fr550_mset"
+  "(f3 + fr550_mset3) | (f2 + fr550_mset2)
+    | (f1 + fr550_mset1) | (f0 + fr550_mset0)")
+
+(define_insn_reservation "fr550_mnop" 0
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mnop"))
+  "fr550_media + (f3|f2|f1|f0)")
+
+(define_insn_reservation "fr550_malu" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mlogic,maveh,msath,mabsh,maddh,mqaddh,mqsath"))
+  "fr550_media + fr550_malu")
+
+;; These insns only operate on FPRs and so don't need to be classified
+;; as even/odd.
+(define_insn_reservation "fr550_msft_1_either" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mrot,mwcut,mshift,mexpdhw,mexpdhd,mpackh,
+			munpackh,mdpackh,mbhconv,mdrot,mcpl"))
+  "fr550_media + fr550_msft_either")
+
+;; These insns read from ACC0-3.
+(define_insn_reservation "fr550_msft_1_even" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mcut,mrdacc,mdcut")
+	    (eq_attr "acc_group" "even")))
+  "fr550_media + fr550_msft_even")
+
+;; These insns read from ACC4-7.
+(define_insn_reservation "fr550_msft_1_odd" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mcut,mrdacc,mdcut")
+	    (eq_attr "acc_group" "odd")))
+  "fr550_media + fr550_msft_odd")
+
+;; MCLRACC with A=1 can issue to either M0 or M1.
+(define_insn_reservation "fr550_msft_2_either" 2
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mclracca"))
+  "fr550_media + fr550_msft_either")
+
+;; These insns write to ACC0-3.
+(define_insn_reservation "fr550_msft_2_even" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mclracc,mwtacc")
+	    (eq_attr "acc_group" "even")))
+  "fr550_media + fr550_msft_even")
+
+;; These insns write to ACC4-7.
+(define_insn_reservation "fr550_msft_2_odd" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mclracc,mwtacc")
+	    (eq_attr "acc_group" "odd")))
+  "fr550_media + fr550_msft_odd")
+
+;; These insns read from and write to ACC0-3.
+(define_insn_reservation "fr550_mmac_even" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,
+			     maddacc,mdaddacc,mcpx,mqcpx")
+	    (eq_attr "acc_group" "even")))
+  "fr550_media + fr550_mmac_even")
+
+;; These insns read from and write to ACC4-7.
+(define_insn_reservation "fr550_mmac_odd" 2
+  (and (eq_attr "cpu" "fr550")
+       (and (eq_attr "type" "mmulh,mmulxh,mmach,mmrdh,mqmulh,mqmulxh,mqmach,
+			     maddacc,mdaddacc,mcpx,mqcpx")
+	    (eq_attr "acc_group" "odd")))
+  "fr550_media + fr550_mmac_odd")
+
+(define_insn_reservation "fr550_mset" 1
+  (and (eq_attr "cpu" "fr550")
+       (eq_attr "type" "mset"))
+  "fr550_media + fr550_mset")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Simple/FR300 scheduler description
+;; ::
+;; ::::::::::::::::::::
+
+;; Fr300 or simple processor.  To describe it as 1 insn issue
+;; processor, we use control unit.
+
+(define_insn_reservation "fr300_lat1" 1
+  (and (eq_attr "cpu" "fr300,simple")
+       (eq_attr "type" "!gload,fload,movfg,movgf"))
+  "c + control")
+
+(define_insn_reservation "fr300_lat2" 2
+  (and (eq_attr "cpu" "fr300,simple")
+       (eq_attr "type" "gload,fload,movfg,movgf"))
+  "c + control")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Delay Slots
+;; ::
+;; ::::::::::::::::::::
+
+;; The insn attribute mechanism can be used to specify the requirements for
+;; delay slots, if any, on a target machine.  An instruction is said to require
+;; a "delay slot" if some instructions that are physically after the
+;; instruction are executed as if they were located before it.  Classic
+;; examples are branch and call instructions, which often execute the following
+;; instruction before the branch or call is performed.
+
+;; On some machines, conditional branch instructions can optionally "annul"
+;; instructions in the delay slot.  This means that the instruction will not be
+;; executed for certain branch outcomes.  Both instructions that annul if the
+;; branch is true and instructions that annul if the branch is false are
+;; supported.
+
+;; Delay slot scheduling differs from instruction scheduling in that
+;; determining whether an instruction needs a delay slot is dependent only
+;; on the type of instruction being generated, not on data flow between the
+;; instructions.  See the next section for a discussion of data-dependent
+;; instruction scheduling.
+
+;; The requirement of an insn needing one or more delay slots is indicated via
+;; the `define_delay' expression.  It has the following form:
+;;
+;; (define_delay TEST
+;;   [DELAY-1 ANNUL-TRUE-1 ANNUL-FALSE-1
+;;    DELAY-2 ANNUL-TRUE-2 ANNUL-FALSE-2
+;;    ...])
+
+;; TEST is an attribute test that indicates whether this `define_delay' applies
+;; to a particular insn.  If so, the number of required delay slots is
+;; determined by the length of the vector specified as the second argument.  An
+;; insn placed in delay slot N must satisfy attribute test DELAY-N.
+;; ANNUL-TRUE-N is an attribute test that specifies which insns may be annulled
+;; if the branch is true.  Similarly, ANNUL-FALSE-N specifies which insns in
+;; the delay slot may be annulled if the branch is false.  If annulling is not
+;; supported for that delay slot, `(nil)' should be coded.
+
+;; For example, in the common case where branch and call insns require a single
+;; delay slot, which may contain any insn other than a branch or call, the
+;; following would be placed in the `md' file:
+
+;; (define_delay (eq_attr "type" "branch,call")
+;;		 [(eq_attr "type" "!branch,call") (nil) (nil)])
+
+;; Multiple `define_delay' expressions may be specified.  In this case, each
+;; such expression specifies different delay slot requirements and there must
+;; be no insn for which tests in two `define_delay' expressions are both true.
+
+;; For example, if we have a machine that requires one delay slot for branches
+;; but two for calls, no delay slot can contain a branch or call insn, and any
+;; valid insn in the delay slot for the branch can be annulled if the branch is
+;; true, we might represent this as follows:
+
+;; (define_delay (eq_attr "type" "branch")
+;;   [(eq_attr "type" "!branch,call")
+;;    (eq_attr "type" "!branch,call")
+;;    (nil)])
+;;
+;; (define_delay (eq_attr "type" "call")
+;;   [(eq_attr "type" "!branch,call") (nil) (nil)
+;;    (eq_attr "type" "!branch,call") (nil) (nil)])
+
+;; Note - it is the backend's responsibility to fill any unfilled delay slots
+;; at assembler generation time.  This is usually done by adding a special print
+;; operand to the delayed instruction, and then in the PRINT_OPERAND function
+;; calling dbr_sequence_length() to determine how many delay slots were filled.
+;; For example:
+;;
+;; --------------<machine>.md-----------------
+;; (define_insn "call"
+;;  [(call (match_operand 0 "memory_operand" "m")
+;;         (match_operand 1 "" ""))]
+;;   ""
+;;   "call_delayed %0,%1,%2%#"
+;;  [(set_attr "length" "4")
+;;   (set_attr "type" "call")])
+;;
+;; -------------<machine>.h-------------------
+;; #define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#')
+;;
+;;  ------------<machine>.c------------------
+;; void
+;; machine_print_operand (file, x, code)
+;;     FILE * file;
+;;     rtx    x;
+;;     int    code;
+;; {
+;;   switch (code)
+;;   {
+;;   case '#':
+;;     if (dbr_sequence_length () == 0)
+;;       fputs ("\n\tnop", file);
+;;     return;
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Notes on Patterns
+;; ::
+;; ::::::::::::::::::::
+
+;; If you need to construct a sequence of assembler instructions in order
+;; to implement a pattern be sure to escape any backslashes and double quotes
+;; that you use, e.g.:
+;;
+;; (define_insn "an example"
+;;   [(some rtl)]
+;;   ""
+;;   "*
+;;    { static char buffer [100];
+;;      sprintf (buffer, \"insn \\t %d\", REGNO (operands[1]));
+;;      return buffer;
+;;    }"
+;; )
+;;
+;; Also if there is more than one instruction, they can be separated by \\;
+;; which is a space saving synonym for \\n\\t:
+;;
+;; (define_insn "another example"
+;;   [(some rtl)]
+;;   ""
+;;   "*
+;;    { static char buffer [100];
+;;      sprintf (buffer, \"insn1 \\t %d\\;insn2 \\t %%1\",
+;;        REGNO (operands[1]));
+;;      return buffer;
+;;    }"
+;; )
+;;
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+;; Wrap moves in define_expand to prevent memory->memory moves from being
+;; generated at the RTL level, which generates better code for most machines
+;; which can't do mem->mem moves.
+
+;; If operand 0 is a `subreg' with mode M of a register whose own mode is wider
+;; than M, the effect of this instruction is to store the specified value in
+;; the part of the register that corresponds to mode M.  The effect on the rest
+;; of the register is undefined.
+
+;; This class of patterns is special in several ways.  First of all, each of
+;; these names *must* be defined, because there is no other way to copy a datum
+;; from one place to another.
+
+;; Second, these patterns are not used solely in the RTL generation pass.  Even
+;; the reload pass can generate move insns to copy values from stack slots into
+;; temporary registers.  When it does so, one of the operands is a hard
+;; register and the other is an operand that can need to be reloaded into a
+;; register.
+
+;; Therefore, when given such a pair of operands, the pattern must
+;; generate RTL which needs no reloading and needs no temporary
+;; registers--no registers other than the operands.  For example, if
+;; you support the pattern with a `define_expand', then in such a
+;; case the `define_expand' mustn't call `force_reg' or any other such
+;; function which might generate new pseudo registers.
+
+;; This requirement exists even for subword modes on a RISC machine
+;; where fetching those modes from memory normally requires several
+;; insns and some temporary registers.  Look in `spur.md' to see how
+;; the requirement can be satisfied.
+
+;; During reload a memory reference with an invalid address may be passed as an
+;; operand.  Such an address will be replaced with a valid address later in the
+;; reload pass.  In this case, nothing may be done with the address except to
+;; use it as it stands.  If it is copied, it will not be replaced with a valid
+;; address.  No attempt should be made to make such an address into a valid
+;; address and no routine (such as `change_address') that will do so may be
+;; called.  Note that `general_operand' will fail when applied to such an
+;; address.
+;;
+;; The global variable `reload_in_progress' (which must be explicitly declared
+;; if required) can be used to determine whether such special handling is
+;; required.
+;;
+;; The variety of operands that have reloads depends on the rest of
+;; the machine description, but typically on a RISC machine these can
+;; only be pseudo registers that did not get hard registers, while on
+;; other machines explicit memory references will get optional
+;; reloads.
+;;
+;; If a scratch register is required to move an object to or from memory, it
+;; can be allocated using `gen_reg_rtx' prior to reload.  But this is
+;; impossible during and after reload.  If there are cases needing scratch
+;; registers after reload, you must define `SECONDARY_INPUT_RELOAD_CLASS' and
+;; perhaps also `SECONDARY_OUTPUT_RELOAD_CLASS' to detect them, and provide
+;; patterns `reload_inM' or `reload_outM' to handle them.
+
+;; The constraints on a `moveM' must permit moving any hard register to any
+;; other hard register provided that `HARD_REGNO_MODE_OK' permits mode M in
+;; both registers and `REGISTER_MOVE_COST' applied to their classes returns a
+;; value of 2.
+
+;; It is obligatory to support floating point `moveM' instructions
+;; into and out of any registers that can hold fixed point values,
+;; because unions and structures (which have modes `SImode' or
+;; `DImode') can be in those registers and they may have floating
+;; point members.
+
+;; There may also be a need to support fixed point `moveM' instructions in and
+;; out of floating point registers.  Unfortunately, I have forgotten why this
+;; was so, and I don't know whether it is still true.  If `HARD_REGNO_MODE_OK'
+;; rejects fixed point values in floating point registers, then the constraints
+;; of the fixed point `moveM' instructions must be designed to avoid ever
+;; trying to reload into a floating point register.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (QImode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movqi_load"
+  [(set (match_operand:QI 0 "register_operand" "=d,f")
+	(match_operand:QI 1 "frv_load_operand" "m,m"))]
+  ""
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload,fload")])
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "move_destination_operand" "=d,d,m,m,?f,?f,?d,?m,f,d,f")
+	(match_operand:QI 1 "move_source_operand"       "L,d,d,O, d, f, f, f,GO,!m,!m"))]
+  "register_operand(operands[0], QImode) || reg_or_0_operand (operands[1], QImode)"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,int,gstore,gstore,movgf,fsconv,movfg,fstore,movgf,gload,fload")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (HImode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movhi_load"
+  [(set (match_operand:HI 0 "register_operand" "=d,f")
+	(match_operand:HI 1 "frv_load_operand" "m,m"))]
+  ""
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload,fload")])
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "move_destination_operand" "=d,d,d,m,m,?f,?f,?d,?m,f,d,f")
+	(match_operand:HI 1 "move_source_operand"       "L,n,d,d,O, d, f, f, f,GO,!m,!m"))]
+  "register_operand(operands[0], HImode) || reg_or_0_operand (operands[1], HImode)"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,8,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "type" "int,multi,int,gstore,gstore,movgf,fsconv,movfg,fstore,movgf,gload,fload")])
+
+;; Split 2 word load of constants into sethi/setlo instructions
+(define_split
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(match_operand:HI 1 "int_2word_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(high:HI (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:HI (match_dup 0)
+		(match_dup 1)))]
+  "")
+
+(define_insn "movhi_high"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d")
+	(high:HI (match_operand:HI 1 "int_2word_operand" "i")))]
+  ""
+  "sethi #hi(%1), %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "movhi_lo_sum"
+  [(set (match_operand:HI 0 "integer_register_operand" "+d")
+	(lo_sum:HI (match_dup 0)
+		   (match_operand:HI 1 "int_2word_operand" "i")))]
+  ""
+  "setlo #lo(%1), %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "move_destination_operand" "")
+	(match_operand:SI 1 "move_source_operand" ""))]
+  ""
+  "{ frv_emit_move (SImode, operands[0], operands[1]); DONE; }")
+
+;; Note - it is best to only have one movsi pattern and to handle
+;; all the various contingencies by the use of alternatives.  This
+;; allows reload the greatest amount of flexibility (since reload will
+;; only choose amongst alternatives for a selected insn, it will not
+;; replace the insn with another one).
+
+;; Unfortunately, we do have to separate out load-type moves from the rest,
+;; and only allow memory source operands in the former.  If we do memory and
+;; constant loads in a single pattern, reload will be tempted to force
+;; constants into memory when the destination is a floating-point register.
+;; That may make a function use a PIC pointer when it didn't before, and we
+;; cannot change PIC usage (and hence stack layout) so late in the game.
+;; The resulting sequences for loading constants into FPRs are preferable
+;; even when we're not generating PIC code.
+
+;; However, if we don't accept input from memory at all in the generic
+;; movsi pattern, reloads for asm instructions that reference pseudos
+;; that end up assigned to memory will fail to match, because we
+;; recognize them right after they're emitted, and we don't
+;; re-recognize them again after the substitution for memory.  So keep
+;; a memory constraint available, just make sure reload won't be
+;; tempted to use it.
+;;
+		   
+		   
+(define_insn "*movsi_load"
+  [(set (match_operand:SI 0 "register_operand" "=d,f")
+	(match_operand:SI 1 "frv_load_operand" "m,m"))]
+  ""
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload,fload")])
+
+(define_insn "*movsi_got"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operand:SI 1 "got12_operand" ""))]
+  ""
+  "addi gr0, %1, %0"
+  [(set_attr "type" "int")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_high_got"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(high:SI (match_operand:SI 1 "const_unspec_operand" "")))]
+  ""
+  "sethi %1, %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_lo_sum_got"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(lo_sum:SI (match_operand:SI 1 "integer_register_operand" "0")
+		   (match_operand:SI 2 "const_unspec_operand" "")))]
+  ""
+  "setlo %2, %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "move_destination_operand" "=d,d,d,m,m,z,d,d,f,f,m,?f,?z,d,f")
+	(match_operand:SI 1 "move_source_operand"      "L,n,d,d,O,d,z,f,d,f,f,GO,GO,!m,!m"))]
+  "register_operand (operands[0], SImode) || reg_or_0_operand (operands[1], SImode)"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,8,4,4,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "type" "int,multi,int,gstore,gstore,spr,spr,movfg,movgf,fsconv,fstore,movgf,spr,gload,fload")])
+
+;; Split 2 word load of constants into sethi/setlo instructions
+(define_insn_and_split "*movsi_2word"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operand:SI 1 "int_2word_operand" "i"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(high:SI (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0)
+		(match_dup 1)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+(define_insn "movsi_high"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(high:SI (match_operand:SI 1 "int_2word_operand" "i")))]
+  ""
+  "sethi #hi(%1), %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_lo_sum"
+  [(set (match_operand:SI 0 "integer_register_operand" "+d")
+	(lo_sum:SI (match_dup 0)
+		   (match_operand:SI 1 "int_2word_operand" "i")))]
+  ""
+  "setlo #lo(%1), %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (DImode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movdi_double"
+  [(set (match_operand:DI 0 "move_destination_operand" "=e,?h,??d,??f,R,?R,??m,??m,e,?h,??d,??f,?e,??d,?h,??f,R,m,e,??d,e,??d,?h,??f")
+	(match_operand:DI 1 "move_source_operand"      " e,h,d,f,e,h,d,f,R,R,m,m,h,f,e,d,GO,GO,GO,GO,nF,nF,GO,GO"))]
+  "TARGET_DOUBLE
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "8,4,8,8,4,4,8,8,4,4,8,8,4,8,4,8,4,8,8,8,16,16,8,8")
+   (set_attr "type" "multi,fdconv,multi,multi,gstore,fstore,gstore,fstore,gload,fload,gload,fload,movfg,movfg,movgf,movgf,gstore,gstore,multi,multi,multi,multi,movgf,movgf")])
+
+(define_insn "*movdi_nodouble"
+  [(set (match_operand:DI 0 "move_destination_operand" "=e,?h,??d,??f,R,?R,??m,??m,e,?h,??d,??f,?e,??d,?h,??f,R,m,e,??d,e,??d,?h,??f")
+	(match_operand:DI 1 "move_source_operand"      " e,h,d,f,e,h,d,f,R,R,m,m,h,f,e,d,GO,GO,GO,GO,nF,nF,GO,GO"))]
+  "!TARGET_DOUBLE
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "8,8,8,8,4,4,8,8,4,4,8,8,8,8,8,8,4,8,8,8,16,16,8,8")
+   (set_attr "type" "multi,multi,multi,multi,gstore,fstore,gstore,fstore,gload,fload,gload,fload,movfg,movfg,movgf,movgf,gstore,gstore,multi,multi,multi,multi,movgf,movgf")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "dbl_memory_two_insn_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "odd_reg_operand" "")
+	(match_operand:DI 1 "memory_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "dbl_memory_two_insn_operand" "")
+	(match_operand:DI 1 "reg_or_0_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(match_operand:DI 1 "odd_reg_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))]
+  "reload_completed
+   && (odd_reg_operand (operands[0], DImode)
+       || odd_reg_operand (operands[1], DImode)
+       || (integer_register_operand (operands[0], DImode)
+	   && integer_register_operand (operands[1], DImode))
+       || (!TARGET_DOUBLE
+	   && fpr_operand (operands[0], DImode)
+	   && fpr_operand (operands[1], DImode)))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0      = operands[0];
+  rtx op0_low  = gen_lowpart (SImode, op0);
+  rtx op0_high = gen_highpart (SImode, op0);
+  rtx op1      = operands[1];
+  rtx op1_low  = gen_lowpart (SImode, op1);
+  rtx op1_high = gen_highpart (SImode, op1);
+
+  /* We normally copy the low-numbered register first.  However, if the first
+     register operand 0 is the same as the second register of operand 1, we
+     must copy in the opposite order.  */
+
+  if (REGNO (op0_high) == REGNO (op1_low))
+    {
+      operands[2] = op0_low;
+      operands[3] = op0_high;
+      operands[4] = op1_low;
+      operands[5] = op1_high;
+    }
+  else
+    {
+      operands[2] = op0_high;
+      operands[3] = op0_low;
+      operands[4] = op1_high;
+      operands[5] = op1_low;
+    }
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  if (HOST_BITS_PER_WIDE_INT <= 32)
+    {
+      operands[4] = GEN_INT ((INTVAL (op1) < 0) ? -1 : 0);
+      operands[5] = op1;
+    }
+  else
+    {
+      operands[4] = gen_int_mode ((INTVAL (op1) >> 16) >> 16, SImode);
+      operands[5] = gen_int_mode (INTVAL (op1), SImode);
+    }
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  operands[4] = GEN_INT (CONST_DOUBLE_HIGH (op1));
+  operands[5] = GEN_INT (CONST_DOUBLE_LOW (op1));
+}")
+
+;; Floating Point Moves
+;;
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesize them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (SFmode, operands[0], operands[1]); DONE; }")
+
+(define_split
+  [(set (match_operand:SF 0 "integer_register_operand" "")
+	(match_operand:SF 1 "int_2word_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(high:SF (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:SF (match_dup 0)
+		(match_dup 1)))]
+  "")
+
+(define_insn "*movsf_load_has_fprs"
+  [(set (match_operand:SF 0 "register_operand" "=f,d")
+	(match_operand:SF 1 "frv_load_operand" "m,m"))]
+  "TARGET_HAS_FPRS"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "fload,gload")])
+
+(define_insn "*movsf_internal_has_fprs"
+  [(set (match_operand:SF 0 "move_destination_operand" "=f,f,m,m,?f,?d,?d,m,?d")
+	(match_operand:SF 1 "move_source_operand" "f,OG,f,OG,d,f,d,d,F"))]
+  "TARGET_HAS_FPRS
+   && (register_operand (operands[0], SFmode) || reg_or_0_operand (operands[1], SFmode))"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,4,4,4,4,4,4,4,8")
+   (set_attr "type" "fsconv,movgf,fstore,gstore,movgf,movfg,int,gstore,multi")])
+
+;; If we don't support the double instructions, prefer gprs over fprs, since it
+;; will all be emulated
+(define_insn "*movsf_internal_no_fprs"
+  [(set (match_operand:SF 0 "move_destination_operand" "=d,d,m,d,d")
+	(match_operand:SF 1 "move_source_operand"      " d,OG,dOG,m,F"))]
+  "!TARGET_HAS_FPRS
+   && (register_operand (operands[0], SFmode) || reg_or_0_operand (operands[1], SFmode))"
+  "* return output_move_single (operands, insn);"
+  [(set_attr "length" "4,4,4,4,8")
+   (set_attr "type" "int,int,gstore,gload,multi")])
+
+(define_insn "movsf_high"
+  [(set (match_operand:SF 0 "integer_register_operand" "=d")
+	(high:SF (match_operand:SF 1 "int_2word_operand" "i")))]
+  ""
+  "sethi #hi(%1), %0"
+  [(set_attr "type" "sethi")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_lo_sum"
+  [(set (match_operand:SF 0 "integer_register_operand" "+d")
+	(lo_sum:SF (match_dup 0)
+		   (match_operand:SF 1 "int_2word_operand" "i")))]
+  ""
+  "setlo #lo(%1), %0"
+  [(set_attr "type" "setlo")
+   (set_attr "length" "4")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "{ frv_emit_move (DFmode, operands[0], operands[1]); DONE; }")
+
+(define_insn "*movdf_double"
+  [(set (match_operand:DF 0 "move_destination_operand" "=h,?e,??f,??d,R,?R,??m,??m,h,?e,??f,??d,?h,??f,?e,??d,R,m,h,??f,e,??d,e,??d")
+	(match_operand:DF 1 "move_source_operand"      " h,e,f,d,h,e,f,d,R,R,m,m,e,d,h,f,GO,GO,GO,GO,GO,GO,F,F"))]
+  "TARGET_DOUBLE
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "4,8,8,8,4,4,8,8,4,4,8,8,4,8,4,8,4,8,8,8,8,8,16,16")
+   (set_attr "type" "fdconv,multi,multi,multi,fstore,gstore,fstore,gstore,fload,gload,fload,gload,movgf,movgf,movfg,movfg,gstore,gstore,movgf,movgf,multi,multi,multi,multi")])
+
+;; If we don't support the double instructions, prefer gprs over fprs, since it
+;; will all be emulated
+(define_insn "*movdf_nodouble"
+  [(set (match_operand:DF 0 "move_destination_operand" "=e,?h,??d,??f,R,?R,??m,??m,e,?h,??d,??f,?e,??d,?h,??f,R,m,e,??d,e,??d,?h,??f")
+	(match_operand:DF 1 "move_source_operand"      " e,h,d,f,e,h,d,f,R,R,m,m,h,f,e,d,GO,GO,GO,GO,nF,nF,GO,GO"))]
+  "!TARGET_DOUBLE
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  "* return output_move_double (operands, insn);"
+  [(set_attr "length" "8,8,8,8,4,4,8,8,4,4,8,8,8,8,8,8,4,8,8,8,16,16,8,8")
+   (set_attr "type" "multi,multi,multi,multi,gstore,fstore,gstore,fstore,gload,fload,gload,fload,movfg,movfg,movgf,movgf,gstore,gstore,multi,multi,multi,multi,movgf,movgf")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "dbl_memory_two_insn_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "odd_reg_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_load (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "dbl_memory_two_insn_operand" "")
+	(match_operand:DF 1 "reg_or_0_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "odd_reg_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "frv_split_double_store (operands[0], operands[1]);")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && (odd_reg_operand (operands[0], DFmode)
+       || odd_reg_operand (operands[1], DFmode)
+       || (integer_register_operand (operands[0], DFmode)
+	   && integer_register_operand (operands[1], DFmode))
+       || (!TARGET_DOUBLE
+	   && fpr_operand (operands[0], DFmode)
+	   && fpr_operand (operands[1], DFmode)))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0      = operands[0];
+  rtx op0_low  = gen_lowpart (SImode, op0);
+  rtx op0_high = gen_highpart (SImode, op0);
+  rtx op1      = operands[1];
+  rtx op1_low  = gen_lowpart (SImode, op1);
+  rtx op1_high = gen_highpart (SImode, op1);
+
+  /* We normally copy the low-numbered register first.  However, if the first
+     register operand 0 is the same as the second register of operand 1, we
+     must copy in the opposite order.  */
+
+  if (REGNO (op0_high) == REGNO (op1_low))
+    {
+      operands[2] = op0_low;
+      operands[3] = op0_high;
+      operands[4] = op1_low;
+      operands[5] = op1_high;
+    }
+  else
+    {
+      operands[2] = op0_high;
+      operands[3] = op0_low;
+      operands[4] = op1_high;
+      operands[5] = op1_low;
+    }
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  if (HOST_BITS_PER_WIDE_INT <= 32)
+    {
+      operands[4] = GEN_INT ((INTVAL (op1) < 0) ? -1 : 0);
+      operands[5] = op1;
+    }
+  else
+    {
+      operands[4] = GEN_INT (((((unsigned HOST_WIDE_INT)INTVAL (op1) >> 16)
+			      >> 16) ^ ((unsigned HOST_WIDE_INT)1 << 31))
+			     - ((unsigned HOST_WIDE_INT)1 << 31));
+      operands[5] = GEN_INT (trunc_int_for_mode (INTVAL (op1), SImode));
+    }
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  REAL_VALUE_TYPE rv;
+  long l[2];
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, op1);
+  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+
+  operands[2] = gen_highpart (SImode, op0);
+  operands[3] = gen_lowpart (SImode, op0);
+  operands[4] = GEN_INT (l[0]);
+  operands[5] = GEN_INT (l[1]);
+}")
+
+;; String/block move insn.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  if (frv_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; String/block set insn.
+;; Argument 0 is the destination
+;; Argument 1 is the length
+;; Argument 2 is the byte value -- ignore any value but zero
+;; Argument 3 is the alignment
+
+(define_expand "setmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "" ""))
+	      (use (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (frv_expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+
+;; The "membar" part of a __builtin_read* or __builtin_write* function.
+;; Operand 0 is a volatile reference to the memory that the function reads
+;; or writes.  Operand 1 is the address being accessed, or zero if the
+;; address isn't a known constant.  Operand 2 describes the __builtin
+;; function (either FRV_IO_READ or FRV_IO_WRITE).
+(define_insn "optional_membar_<mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "=m")
+	(unspec:IMODE [(match_operand 1 "const_int_operand" "")
+		       (match_operand 2 "const_int_operand" "")]
+		      UNSPEC_OPTIONAL_MEMBAR))]
+  ""
+  "membar"
+  [(set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Reload CC registers
+;; ::
+;; ::::::::::::::::::::
+
+;; Use as a define_expand so that cse/gcse/combine can't accidentally
+;; create movcc insns.
+
+(define_expand "movcc"
+  [(parallel [(set (match_operand:CC 0 "move_destination_operand" "")
+		   (match_operand:CC 1 "move_source_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "
+{
+ if (! reload_in_progress && ! reload_completed)
+    FAIL;
+
+ operands[2] = gen_rtx_REG (CC_CCRmode, ICR_TEMP);
+}")
+
+(define_insn "*internal_movcc"
+  [(set (match_operand:CC 0 "move_destination_operand" "=t,d,d,m,d")
+	(match_operand:CC 1 "move_source_operand" "d,d,m,d,t"))
+   (clobber (match_scratch:CC_CCR 2 "=X,X,X,X,&v"))]
+  "reload_in_progress || reload_completed"
+  "@
+   cmpi %1, #0, %0
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #"
+  [(set_attr "length" "4,4,4,4,20")
+   (set_attr "type" "int,int,gload,gstore,multi")])
+
+;; To move an ICC value to a GPR for a signed comparison, we create a value
+;; that when compared to 0, sets the N and Z flags appropriately (we don't care
+;; about the V and C flags, since these comparisons are signed).
+
+(define_split
+  [(set (match_operand:CC 0 "integer_register_operand" "")
+	(match_operand:CC 1 "icc_operand" ""))
+   (clobber (match_operand:CC_CCR 2 "icr_operand" ""))]
+  "reload_in_progress || reload_completed"
+  [(match_dup 3)]
+  "
+{
+  rtx dest = simplify_gen_subreg (SImode, operands[0], CCmode, 0);
+  rtx icc  = operands[1];
+  rtx icr  = operands[2];
+
+  start_sequence ();
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_LT (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_movsi (dest, const1_rtx));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_rtx_SET (VOIDmode, dest,
+					     gen_rtx_NEG (SImode, dest))));
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_EQ (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_rtx_SET (VOIDmode, dest, const0_rtx)));
+
+  operands[3] = get_insns ();
+  end_sequence ();
+}")
+
+;; Reload CC_UNSmode for unsigned integer comparisons
+;; Use define_expand so that cse/gcse/combine can't create movcc_uns insns
+
+(define_expand "movcc_uns"
+  [(parallel [(set (match_operand:CC_UNS 0 "move_destination_operand" "")
+		   (match_operand:CC_UNS 1 "move_source_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "
+{
+ if (! reload_in_progress && ! reload_completed)
+    FAIL;
+ operands[2] = gen_rtx_REG (CC_CCRmode, ICR_TEMP);
+}")
+
+(define_insn "*internal_movcc_uns"
+  [(set (match_operand:CC_UNS 0 "move_destination_operand" "=t,d,d,m,d")
+	(match_operand:CC_UNS 1 "move_source_operand" "d,d,m,d,t"))
+   (clobber (match_scratch:CC_CCR 2 "=X,X,X,X,&v"))]
+  "reload_in_progress || reload_completed"
+  "@
+   cmpi %1, #1, %0
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #"
+  [(set_attr "length" "4,4,4,4,20")
+   (set_attr "type" "int,int,gload,gstore,multi")])
+
+;; To move an ICC value to a GPR for an unsigned comparison, we create a value
+;; that when compared to 1, sets the Z, V, and C flags appropriately (we don't
+;; care about the N flag, since these comparisons are unsigned).
+
+(define_split
+  [(set (match_operand:CC_UNS 0 "integer_register_operand" "")
+	(match_operand:CC_UNS 1 "icc_operand" ""))
+   (clobber (match_operand:CC_CCR 2 "icr_operand" ""))]
+  "reload_in_progress || reload_completed"
+  [(match_dup 3)]
+  "
+{
+  rtx dest = simplify_gen_subreg (SImode, operands[0], CC_UNSmode, 0);
+  rtx icc  = operands[1];
+  rtx icr  = operands[2];
+
+  start_sequence ();
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_GTU (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_movsi (dest, const1_rtx));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_addsi3 (dest, dest, dest)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, icr,
+			  gen_rtx_LTU (CC_CCRmode, icc, const0_rtx)));
+
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
+				gen_rtx_SET (VOIDmode, dest, const0_rtx)));
+
+  operands[3] = get_insns ();
+  end_sequence ();
+}")
+
+;; Reload CC_NZmode.  This is mostly the same as the CCmode and CC_UNSmode
+;; handling, but it uses different sequences for moving between GPRs and ICCs.
+
+(define_expand "movcc_nz"
+  [(parallel [(set (match_operand:CC_NZ 0 "move_destination_operand" "")
+		   (match_operand:CC_NZ 1 "move_source_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "
+{
+  if (!reload_in_progress && !reload_completed)
+    FAIL;
+  operands[2] = gen_rtx_REG (CC_CCRmode, ICR_TEMP);
+}")
+
+(define_insn "*internal_movcc_nz"
+  [(set (match_operand:CC_NZ 0 "move_destination_operand" "=t,d,d,m,d")
+	(match_operand:CC_NZ 1 "move_source_operand" "d,d,m,d,t"))
+   (clobber (match_scratch:CC_CCR 2 "=X,X,X,X,&v"))]
+  "reload_in_progress || reload_completed"
+  "@
+   cmpi %1, #0, %0
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #"
+  [(set_attr "length" "4,4,4,4,20")
+   (set_attr "type" "int,int,gload,gstore,multi")])
+
+;; Set the destination to a value that, when compared with zero, will
+;; restore the value of the Z and N flags.  The values of the other
+;; flags don't matter.  The sequence is:
+;;
+;;     setlos op0,#-1
+;;     ckp op1,op2
+;;     csub gr0,op0,op0,op2
+;;     ckeq op1,op2
+;;     cmov gr0,op0,op2
+(define_split
+  [(set (match_operand:CC_NZ 0 "integer_register_operand" "")
+	(match_operand:CC_NZ 1 "icc_operand" ""))
+   (clobber (match_operand:CC_CCR 2 "icr_operand" ""))]
+  "reload_in_progress || reload_completed"
+  [(set (match_dup 3)
+	(const_int -1))
+   (set (match_dup 2)
+	(ge:CC_CCR (match_dup 1)
+		   (const_int 0)))
+   (cond_exec (ne:CC_CCR (match_dup 2)
+			 (const_int 0))
+	      (set (match_dup 3)
+		   (neg:SI (match_dup 3))))
+   (set (match_dup 2)
+	(eq:CC_CCR (match_dup 1)
+		   (const_int 0)))
+   (cond_exec (ne:CC_CCR (match_dup 2)
+			 (const_int 0))
+	      (set (match_dup 3) (const_int 0)))]
+  "operands[3] = simplify_gen_subreg (SImode, operands[0], CC_NZmode, 0);")
+
+;; Reload CC_FPmode for floating point comparisons
+;; We use a define_expand here so that cse/gcse/combine can't accidentally
+;; create movcc insns.  If this was a named define_insn, we would not be able
+;; to make it conditional on reload.
+
+(define_expand "movcc_fp"
+  [(set (match_operand:CC_FP 0 "movcc_fp_destination_operand" "")
+	(match_operand:CC_FP 1 "move_source_operand" ""))]
+  "TARGET_HAS_FPRS"
+  "
+{
+ if (! reload_in_progress && ! reload_completed)
+    FAIL;
+}")
+
+(define_insn "*movcc_fp_internal"
+  [(set (match_operand:CC_FP 0 "movcc_fp_destination_operand" "=d,d,d,m")
+	(match_operand:CC_FP 1 "move_source_operand" "u,d,m,d"))]
+  "TARGET_HAS_FPRS && (reload_in_progress || reload_completed)"
+  "@
+   #
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0"
+  [(set_attr "length" "12,4,4,4")
+   (set_attr "type" "multi,int,gload,gstore")])
+
+
+(define_expand "reload_incc_fp"
+  [(match_operand:CC_FP 0 "fcc_operand" "=u")
+   (match_operand:CC_FP 1 "gpr_or_memory_operand_with_scratch" "m")
+   (match_operand:TI 2 "integer_register_operand" "=&d")]
+  "TARGET_HAS_FPRS"
+  "
+{
+  rtx cc_op2 = simplify_gen_subreg (CC_FPmode, operands[2], TImode, 0);
+  rtx int_op2 = simplify_gen_subreg (SImode, operands[2], TImode, 0);
+  rtx temp1 = simplify_gen_subreg (SImode, operands[2], TImode, 4);
+  rtx temp2 = simplify_gen_subreg (SImode, operands[2], TImode, 8);
+  int shift = CC_SHIFT_RIGHT (REGNO (operands[0]));
+  HOST_WIDE_INT mask;
+
+  if (!gpr_or_memory_operand (operands[1], CC_FPmode))
+    {
+      rtx addr;
+      rtx temp3 = simplify_gen_subreg (SImode, operands[2], TImode, 12);
+
+      gcc_assert (GET_CODE (operands[1]) == MEM);
+
+      addr = XEXP (operands[1], 0);
+
+      gcc_assert (GET_CODE (addr) == PLUS);
+
+      emit_move_insn (temp3, XEXP (addr, 1));
+
+      operands[1] = replace_equiv_address (operands[1],
+					   gen_rtx_PLUS (GET_MODE (addr),
+							 XEXP (addr, 0),
+							 temp3));
+    }
+
+  emit_insn (gen_movcc_fp (cc_op2, operands[1]));
+  if (shift)
+    emit_insn (gen_ashlsi3 (int_op2, int_op2, GEN_INT (shift)));
+
+  mask = ~ ((HOST_WIDE_INT)CC_MASK << shift);
+  emit_insn (gen_movsi (temp1, GEN_INT (mask)));
+  emit_insn (gen_update_fcc (operands[0], int_op2, temp1, temp2));
+  DONE;
+}")
+
+(define_expand "reload_outcc_fp"
+  [(set (match_operand:CC_FP 2 "integer_register_operand" "=&d")
+	(match_operand:CC_FP 1 "fcc_operand" "u"))
+   (set (match_operand:CC_FP 0 "memory_operand" "=m")
+	(match_dup 2))]
+  "TARGET_HAS_FPRS"
+ "")
+
+;; Convert a FCC value to gpr
+(define_insn "read_fcc"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(unspec:SI [(match_operand:CC_FP 1 "fcc_operand" "u")]
+		   UNSPEC_CC_TO_GPR))]
+  "TARGET_HAS_FPRS"
+  "movsg ccr, %0"
+  [(set_attr "type" "spr")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:CC_FP 0 "integer_register_operand" "")
+	(match_operand:CC_FP 1 "fcc_operand" ""))]
+  "reload_completed && TARGET_HAS_FPRS"
+  [(match_dup 2)]
+  "
+{
+  rtx int_op0 = simplify_gen_subreg (SImode, operands[0], CC_FPmode, 0);
+  int shift = CC_SHIFT_RIGHT (REGNO (operands[1]));
+
+  start_sequence ();
+
+  emit_insn (gen_read_fcc (int_op0, operands[1]));
+  if (shift)
+    emit_insn (gen_lshrsi3 (int_op0, int_op0, GEN_INT (shift)));
+
+  emit_insn (gen_andsi3 (int_op0, int_op0, GEN_INT (CC_MASK)));
+
+  operands[2] = get_insns ();
+  end_sequence ();
+}")
+
+;; Move a gpr value to FCC.
+;; Operand0 = FCC
+;; Operand1 = reloaded value shifted appropriately
+;; Operand2 = mask to eliminate current register
+;; Operand3 = temporary to load/store ccr
+(define_insn "update_fcc"
+  [(set (match_operand:CC_FP 0 "fcc_operand" "=u")
+	(unspec:CC_FP [(match_operand:SI 1 "integer_register_operand" "d")
+		       (match_operand:SI 2 "integer_register_operand" "d")]
+		      UNSPEC_GPR_TO_CC))
+   (clobber (match_operand:SI 3 "integer_register_operand" "=&d"))]
+  "TARGET_HAS_FPRS"
+  "movsg ccr, %3\;and %2, %3, %3\;or %1, %3, %3\;movgs %3, ccr"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+;; Reload CC_CCRmode for conditional execution registers
+(define_insn "movcc_ccr"
+  [(set (match_operand:CC_CCR 0 "move_destination_operand" "=d,d,d,m,v,?w,C,d")
+	(match_operand:CC_CCR 1 "move_source_operand" "C,d,m,d,n,n,C,L"))]
+  ""
+  "@
+   #
+   mov %1, %0
+   ld%I1%U1 %M1, %0
+   st%I0%U0 %1, %M0
+   #
+   #
+   orcr %1, %1, %0
+   setlos #%1, %0"
+  [(set_attr "length" "8,4,4,4,8,12,4,4")
+   (set_attr "type" "multi,int,gload,gstore,multi,multi,ccr,int")])
+
+(define_expand "reload_incc_ccr"
+  [(match_operand:CC_CCR 0 "cr_operand" "=C")
+   (match_operand:CC_CCR 1 "memory_operand" "m")
+   (match_operand:CC_CCR 2 "integer_register_operand" "=&d")]
+  ""
+  "
+{
+  rtx icc = gen_rtx_REG (CCmode, ICC_TEMP);
+  rtx int_op2 = simplify_gen_subreg (SImode, operands[2], CC_CCRmode, 0);
+  rtx icr = (ICR_P (REGNO (operands[0]))
+	     ? operands[0] : gen_rtx_REG (CC_CCRmode, ICR_TEMP));
+
+  emit_insn (gen_movcc_ccr (operands[2], operands[1]));
+  emit_insn (gen_cmpsi_cc (icc, int_op2, const0_rtx));
+  emit_insn (gen_movcc_ccr (icr, gen_rtx_NE (CC_CCRmode, icc, const0_rtx)));
+
+  if (! ICR_P (REGNO (operands[0])))
+    emit_insn (gen_movcc_ccr (operands[0], icr));
+
+  DONE;
+}")
+
+(define_expand "reload_outcc_ccr"
+  [(set (match_operand:CC_CCR 2 "integer_register_operand" "=&d")
+	(match_operand:CC_CCR 1 "cr_operand" "C"))
+   (set (match_operand:CC_CCR 0 "memory_operand" "=m")
+	(match_dup 2))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:CC_CCR 0 "integer_register_operand" "")
+	(match_operand:CC_CCR 1 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "
+{
+  rtx int_op0 = simplify_gen_subreg (SImode, operands[0], CC_CCRmode, 0);
+
+  start_sequence ();
+  emit_move_insn (operands[0], const1_rtx);
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_EQ (CC_CCRmode,
+					    operands[1],
+					    const0_rtx),
+				gen_rtx_SET (VOIDmode, int_op0,
+					     const0_rtx)));
+
+  operands[2] = get_insns ();
+  end_sequence ();
+}")
+
+(define_split
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(match_operand:CC_CCR 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "
+{
+  rtx icc = gen_rtx_REG (CCmode, ICC_TEMP);
+  rtx r0  = gen_rtx_REG (SImode, GPR_FIRST);
+  rtx icr = (ICR_P (REGNO (operands[0]))
+	     ? operands[0] : gen_rtx_REG (CC_CCRmode, ICR_TEMP));
+
+  start_sequence ();
+
+ emit_insn (gen_cmpsi_cc (icc, r0, const0_rtx));
+
+  emit_insn (gen_movcc_ccr (icr,
+			    gen_rtx_fmt_ee (((INTVAL (operands[1]) == 0)
+					     ? EQ : NE), CC_CCRmode,
+					    r0, const0_rtx)));
+
+  if (! ICR_P (REGNO (operands[0])))
+    emit_insn (gen_movcc_ccr (operands[0], icr));
+
+  operands[2] = get_insns ();
+  end_sequence ();
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+;; Signed conversions from a smaller integer to a larger integer
+;;
+;; These operations are optional.  If they are not
+;; present GCC will synthesize them for itself
+;; Even though frv does not provide these instructions, we define them
+;; to allow load + sign extend to be collapsed together
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d")
+	(sign_extend:HI (match_operand:QI 1 "gpr_or_memory_operand" "d,m")))]
+  ""
+  "@
+   #
+   ldsb%I1%U1 %M1,%0"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "multi,gload")])
+
+(define_split
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "integer_register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx op1   = gen_lowpart (SImode, operands[1]);
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (op0, op1, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(sign_extend:SI (match_operand:QI 1 "gpr_or_memory_operand" "d,m")))]
+  ""
+  "@
+   #
+   ldsb%I1%U1 %M1,%0"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "multi,gload")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "integer_register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx op1   = gen_lowpart (SImode, operands[1]);
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (op0, op1, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+;;(define_insn "extendqidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(sign_extend:DI (match_operand:QI 1 "general_operand" "g")))]
+;;  ""
+;;  "extendqihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(sign_extend:SI (match_operand:HI 1 "gpr_or_memory_operand" "d,m")))]
+  ""
+  "@
+   #
+   ldsh%I1%U1 %M1,%0"
+  [(set_attr "length" "8,4")
+   (set_attr "type" "multi,gload")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "integer_register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx op1   = gen_lowpart (SImode, operands[1]);
+  rtx shift = GEN_INT (16);
+
+  operands[2] = gen_ashlsi3 (op0, op1, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+;;(define_insn "extendhidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(sign_extend:DI (match_operand:HI 1 "general_operand" "g")))]
+;;  ""
+;;  "extendhihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "extendsidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(sign_extend:DI (match_operand:SI 1 "general_operand" "g")))]
+;;  ""
+;;  "extendsidi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+;; Unsigned conversions from a smaller integer to a larger integer
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d")
+	(zero_extend:HI
+	  (match_operand:QI 1 "gpr_or_memory_operand" "d,L,m")))]
+  ""
+  "@
+   andi %1,#0xff,%0
+   setlos %1,%0
+   ldub%I1%U1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,int,gload")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d")
+	(zero_extend:SI
+	  (match_operand:QI 1 "gpr_or_memory_operand" "d,L,m")))]
+  ""
+  "@
+   andi %1,#0xff,%0
+   setlos %1,%0
+   ldub%I1%U1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,int,gload")])
+
+;;(define_insn "zero_extendqidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(zero_extend:DI (match_operand:QI 1 "general_operand" "g")))]
+;;  ""
+;;  "zero_extendqihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+;; Do not set the type for the sethi to "sethi", since the scheduler will think
+;; the sethi takes 0 cycles as part of allowing sethi/setlo to be in the same
+;; VLIW instruction.
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(zero_extend:SI (match_operand:HI 1 "gpr_or_memory_operand" "0,m")))]
+  ""
+  "@
+    sethi #hi(#0),%0
+    lduh%I1%U1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload")])
+
+;;(define_insn "zero_extendhidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(zero_extend:DI (match_operand:HI 1 "general_operand" "g")))]
+;;  ""
+;;  "zero_extendhihi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "zero_extendsidi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(zero_extend:DI (match_operand:SI 1 "general_operand" "g")))]
+;;  ""
+;;  "zero_extendsidi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;;; Convert between floating point types of different sizes.
+;;
+;;(define_insn "extendsfdf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(float_extend:DF (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "extendsfdf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "truncdfsf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(float_truncate:SF (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "truncdfsf2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+;;;; Convert between signed integer types and floating point.
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fitos %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "fpr_operand" "=h")
+	(float:DF (match_operand:SI 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fitod %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;;(define_insn "floatdisf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(float:SF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatdisf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatdidf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(float:DF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatdidf2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(fix:SI (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fstoi %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(fix:SI (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fdtoi %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;;(define_insn "fix_truncsfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(fix:DI (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "fix_truncsfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fix_truncdfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(fix:DI (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "fix_truncdfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;;; Convert between unsigned integer types and floating point.
+;;
+;;(define_insn "floatunssisf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(unsigned_float:SF (match_operand:SI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunssisf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatunssidf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(unsigned_float:DF (match_operand:SI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunssidf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatunsdisf2"
+;;  [(set (match_operand:SF 0 "register_operand" "=r")
+;;	(unsigned_float:SF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunsdisf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "floatunsdidf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=r")
+;;	(unsigned_float:DF (match_operand:DI 1 "register_operand" "r")))]
+;;  ""
+;;  "floatunsdidf2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncsfsi2"
+;;  [(set (match_operand:SI 0 "register_operand" "=r")
+;;	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncsfsi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncdfsi2"
+;;  [(set (match_operand:SI 0 "register_operand" "=r")
+;;	(unsigned_fix:SI (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncdfsi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncsfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(unsigned_fix:DI (match_operand:SF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncsfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+;;
+;;(define_insn "fixuns_truncdfdi2"
+;;  [(set (match_operand:DI 0 "register_operand" "=r")
+;;	(unsigned_fix:DI (match_operand:DF 1 "register_operand" "r")))]
+;;  ""
+;;  "fixuns_truncdfdi2 %0,%1"
+;;  [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		 (match_operand:SI 2 "gpr_or_int12_operand" "dNOPQ")))]
+  ""
+  "add%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Subtraction.  No need to worry about constants, since the compiler
+;; canonicalizes them into addsi3's.  We prevent SUBREG's here to work around a
+;; combine bug, that combines the 32x32->upper 32 bit multiply that uses a
+;; SUBREG with a minus that shows up in modulus by constants.
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "gpr_no_subreg_operand" "d")
+		  (match_operand:SI 2 "gpr_no_subreg_operand" "d")))]
+  ""
+  "sub %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Signed multiplication producing 64-bit results from 32-bit inputs
+;; Note, frv doesn't have a 32x32->32 bit multiply, but the compiler
+;; will do the 32x32->64 bit multiply and use the bottom word.
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "integer_register_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "integer_register_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "gpr_or_int12_operand" ""))))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_mulsidi3_const (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*mulsidi3_reg"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "integer_register_operand" "%d"))
+		 (sign_extend:DI (match_operand:SI 2 "integer_register_operand" "d"))))]
+  ""
+  "smul %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "mulsidi3_const"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "integer_register_operand" "d"))
+		 (match_operand:SI 2 "int12_operand" "NOP")))]
+  ""
+  "smuli %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+;; Unsigned multiplication producing 64-bit results from 32-bit inputs
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "even_gpr_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "integer_register_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "gpr_or_int12_operand" ""))))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_umulsidi3_const (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*mulsidi3_reg"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "integer_register_operand" "%d"))
+		 (zero_extend:DI (match_operand:SI 2 "integer_register_operand" "d"))))]
+  ""
+  "umul %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "umulsidi3_const"
+  [(set (match_operand:DI 0 "even_gpr_operand" "=e")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "integer_register_operand" "d"))
+		 (match_operand:SI 2 "int12_operand" "NOP")))]
+  ""
+  "umuli %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+;; Signed Division
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(div:SI (match_operand:SI 1 "register_operand" "d,d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "sdiv%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "div")])
+
+;; Unsigned Division
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(udiv:SI (match_operand:SI 1 "register_operand" "d,d")
+		 (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "udiv%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "div")])
+
+;; Negation
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "integer_register_operand" "d")))]
+  ""
+  "sub %.,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Find first one bit
+;; (define_insn "ffssi2"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(ffs:SI (match_operand:SI 1 "register_operand" "r")))]
+;;   ""
+;;   "ffssi2 %0,%1"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn_and_split "adddi3"
+  [(set (match_operand:DI 0 "integer_register_operand" "=&e,e")
+	(plus:DI (match_operand:DI 1 "integer_register_operand" "%e,0")
+		 (match_operand:DI 2 "gpr_or_int10_operand" "eJ,eJ")))
+   (clobber (match_scratch:CC 3 "=t,t"))]
+  ""
+  "#"
+  "reload_completed"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx parts[3][2];
+  int op, part;
+
+  for (op = 0; op < 3; op++)
+    for (part = 0; part < 2; part++)
+      parts[op][part] = simplify_gen_subreg (SImode, operands[op],
+					     DImode, part * UNITS_PER_WORD);
+
+  operands[4] = gen_adddi3_lower (parts[0][1], parts[1][1], parts[2][1],
+				  operands[3]);
+  operands[5] = gen_adddi3_upper (parts[0][0], parts[1][0], parts[2][0],
+				  copy_rtx (operands[3]));
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Subtraction  No need to worry about constants, since the compiler
+;; canonicalizes them into adddi3's.
+(define_insn_and_split "subdi3"
+  [(set (match_operand:DI 0 "integer_register_operand" "=&e,e,e")
+	(minus:DI (match_operand:DI 1 "integer_register_operand" "e,0,e")
+		  (match_operand:DI 2 "integer_register_operand" "e,e,0")))
+   (clobber (match_scratch:CC 3 "=t,t,t"))]
+  ""
+  "#"
+  "reload_completed"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx op0_high = gen_highpart (SImode, operands[0]);
+  rtx op1_high = gen_highpart (SImode, operands[1]);
+  rtx op2_high = gen_highpart (SImode, operands[2]);
+  rtx op0_low  = gen_lowpart (SImode, operands[0]);
+  rtx op1_low  = gen_lowpart (SImode, operands[1]);
+  rtx op2_low  = gen_lowpart (SImode, operands[2]);
+  rtx op3 = operands[3];
+
+  operands[4] = gen_subdi3_lower (op0_low, op1_low, op2_low, op3);
+  operands[5] = gen_subdi3_upper (op0_high, op1_high, op2_high, op3);
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Patterns for addsi3/subdi3 after splitting
+(define_insn "adddi3_lower"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		 (match_operand:SI 2 "gpr_or_int10_operand" "dJ")))
+   (set (match_operand:CC 3 "icc_operand" "=t")
+	(compare:CC (plus:SI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "add%I2cc %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "adddi3_upper"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		 (plus:SI (match_operand:SI 2 "gpr_or_int10_operand" "dJ")
+			  (match_operand:CC 3 "icc_operand" "t"))))]
+  ""
+  "addx%I2 %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "subdi3_lower"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		  (match_operand:SI 2 "integer_register_operand" "d")))
+   (set (match_operand:CC 3 "icc_operand" "=t")
+	(compare:CC (plus:SI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "subcc %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "subdi3_upper"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "integer_register_operand" "d")
+		  (minus:SI (match_operand:SI 2 "integer_register_operand" "d")
+			    (match_operand:CC 3 "icc_operand" "t"))))]
+  ""
+  "subx %1,%2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn_and_split "negdi2"
+  [(set (match_operand:DI 0 "integer_register_operand" "=&e,e")
+	(neg:DI (match_operand:DI 1 "integer_register_operand" "e,0")))
+   (clobber (match_scratch:CC 2 "=t,t"))]
+  ""
+  "#"
+  "reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+  rtx op0_high = gen_highpart (SImode, operands[0]);
+  rtx op1_high = gen_rtx_REG (SImode, GPR_FIRST);
+  rtx op2_high = gen_highpart (SImode, operands[1]);
+  rtx op0_low  = gen_lowpart (SImode, operands[0]);
+  rtx op1_low  = op1_high;
+  rtx op2_low  = gen_lowpart (SImode, operands[1]);
+  rtx op3 = operands[2];
+
+  operands[3] = gen_subdi3_lower (op0_low, op1_low, op2_low, op3);
+  operands[4] = gen_subdi3_upper (op0_high, op1_high, op2_high, op3);
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Multiplication (same size)
+;; (define_insn "muldi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(mult:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		 (match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "muldi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Signed Division
+;; (define_insn "divdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(div:DI (match_operand:DI 1 "register_operand" "r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "divdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Undsgned Division
+;; (define_insn "udivdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(udiv:DI (match_operand:DI 1 "register_operand" "r")
+;; 		 (match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "udivdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Negation
+;; (define_insn "negdi2"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+;;   ""
+;;   "negdi2 %0,%1"
+;;   [(set_attr "length" "4")])
+
+;; Find first one bit
+;; (define_insn "ffsdi2"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ffs:DI (match_operand:DI 1 "register_operand" "r")))]
+;;   ""
+;;   "ffsdi2 %0,%1"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fpr_operand" "%f")
+		 (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fadds %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsadd")])
+
+;; Subtraction
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fpr_operand" "f")
+		  (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fsubs %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsadd")])
+
+;; Multiplication
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fpr_operand" "%f")
+		 (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fmuls %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmul")])
+
+;; Multiplication with addition/subtraction
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fpr_operand" "f")
+		(match_operand:SF 2 "fpr_operand" "f")
+		(match_operand:SF 3 "fpr_operand" "0")))]
+  "TARGET_HARD_FLOAT && TARGET_MULADD"
+  "fmadds %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmadd")])
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fpr_operand" "f")
+		(match_operand:SF 2 "fpr_operand" "f")
+		(neg:SF (match_operand:SF 3 "fpr_operand" "0"))))]
+  "TARGET_HARD_FLOAT && TARGET_MULADD"
+  "fmsubs %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmadd")])
+
+;; Division
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(div:SF (match_operand:SF 1 "fpr_operand" "f")
+		(match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fdivs %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsdiv")])
+
+;; Negation
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fnegs %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+;; Absolute value
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fabss %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+;; Square root
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "fpr_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fsqrts %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "sqrt_single")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(plus:DF (match_operand:DF 1 "fpr_operand" "%h")
+		 (match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "faddd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdadd")])
+
+;; Subtraction
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(minus:DF (match_operand:DF 1 "fpr_operand" "h")
+		  (match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fsubd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdadd")])
+
+;; Multiplication
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(mult:DF (match_operand:DF 1 "fpr_operand" "%h")
+		 (match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fmuld %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdmul")])
+
+;; Multiplication with addition/subtraction
+(define_insn "*muladddf4"
+  [(set (match_operand:DF 0 "fpr_operand" "=f")
+	(plus:DF (mult:DF (match_operand:DF 1 "fpr_operand" "%f")
+			  (match_operand:DF 2 "fpr_operand" "f"))
+		 (match_operand:DF 3 "fpr_operand" "0")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE && TARGET_MULADD"
+  "fmaddd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdmadd")])
+
+(define_insn "*mulsubdf4"
+  [(set (match_operand:DF 0 "fpr_operand" "=f")
+	(minus:DF (mult:DF (match_operand:DF 1 "fpr_operand" "%f")
+			   (match_operand:DF 2 "fpr_operand" "f"))
+		  (match_operand:DF 3 "fpr_operand" "0")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE && TARGET_MULADD"
+  "fmsubd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdmadd")])
+
+;; Division
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(div:DF (match_operand:DF 1 "fpr_operand" "h")
+		(match_operand:DF 2 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fdivd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fddiv")])
+
+;; Negation
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(neg:DF (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fnegd %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;; Absolute value
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(abs:DF (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fabsd %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdconv")])
+
+;; Square root
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "even_fpr_operand" "=h")
+	(sqrt:DF (match_operand:DF 1 "fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fsqrtd %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "sqrt_double")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(ashift:SI (match_operand:SI 1 "integer_register_operand" "d,d")
+		   (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "sll%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Arithmetic Shift Right
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(ashiftrt:SI (match_operand:SI 1 "integer_register_operand" "d,d")
+		     (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "sra%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Logical Shift Right
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(lshiftrt:SI (match_operand:SI 1 "integer_register_operand" "d,d")
+		     (match_operand:SI 2 "gpr_or_int12_operand" "d,NOP")))]
+  ""
+  "srl%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Rotate Left
+;; (define_insn "rotlsi3"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(rotate:SI (match_operand:SI 1 "register_operand" "r")
+;; 		   (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotlsi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Rotate Right
+;; (define_insn "rotrsi3"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(rotatert:SI (match_operand:SI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotrsi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+;; (define_insn "ashldi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+;; 		   (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "ashldi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Arithmetic Shift Right
+;; (define_insn "ashrdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "ashrdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Logical Shift Right
+;; (define_insn "lshrdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "lshrdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Rotate Left
+;; (define_insn "rotldi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(rotate:DI (match_operand:DI 1 "register_operand" "r")
+;; 		   (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotldi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Rotate Right
+;; (define_insn "rotrdi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(rotatert:DI (match_operand:DI 1 "register_operand" "r")
+;; 		     (match_operand:SI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "rotrdi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Logical AND, 32-bit integers
+(define_insn "andsi3_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(and:SI (match_operand:SI 1 "gpr_or_fpr_operand" "%d,f")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "dNOP,f")))]
+  "TARGET_MEDIA"
+  "@
+   and%I2 %1, %2, %0
+   mand %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "andsi3_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(and:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "dNOP")))]
+  "!TARGET_MEDIA"
+  "and%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(and:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "")))]
+  ""
+  "")
+
+;; Inclusive OR, 32-bit integers
+(define_insn "iorsi3_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(ior:SI (match_operand:SI 1 "gpr_or_fpr_operand" "%d,f")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "dNOP,f")))]
+  "TARGET_MEDIA"
+  "@
+   or%I2 %1, %2, %0
+   mor %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "iorsi3_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(ior:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "dNOP")))]
+  "!TARGET_MEDIA"
+  "or%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(ior:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "")))]
+  ""
+  "")
+
+;; Exclusive OR, 32-bit integers
+(define_insn "xorsi3_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(xor:SI (match_operand:SI 1 "gpr_or_fpr_operand" "%d,f")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "dNOP,f")))]
+  "TARGET_MEDIA"
+  "@
+   xor%I2 %1, %2, %0
+   mxor %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "xorsi3_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(xor:SI (match_operand:SI 1 "integer_register_operand" "%d")
+		(match_operand:SI 2 "gpr_or_int12_operand" "dNOP")))]
+  "!TARGET_MEDIA"
+  "xor%I2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(xor:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")
+		(match_operand:SI 2 "gpr_fpr_or_int12_operand" "")))]
+  ""
+  "")
+
+;; One's complement, 32-bit integers
+(define_insn "one_cmplsi2_media"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "=d,f")
+	(not:SI (match_operand:SI 1 "gpr_or_fpr_operand" "d,f")))]
+  "TARGET_MEDIA"
+  "@
+   not %1, %0
+   mnot %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,mlogic")])
+
+(define_insn "one_cmplsi2_nomedia"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(not:SI (match_operand:SI 1 "integer_register_operand" "d")))]
+  "!TARGET_MEDIA"
+  "not %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "gpr_or_fpr_operand" "")
+	(not:SI (match_operand:SI 1 "gpr_or_fpr_operand" "")))]
+  ""
+  "")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Logical AND, 64-bit integers
+;; (define_insn "anddi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(and:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "anddi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Inclusive OR, 64-bit integers
+;; (define_insn "iordi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(ior:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "iordi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; Exclusive OR, 64-bit integers
+;; (define_insn "xordi3"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+;; 		(match_operand:DI 2 "nonmemory_operand" "ri")))]
+;;   ""
+;;   "xordi3 %0,%1,%2"
+;;   [(set_attr "length" "4")])
+
+;; One's complement, 64-bit integers
+;; (define_insn "one_cmpldi2"
+;;   [(set (match_operand:DI 0 "register_operand" "=r")
+;; 	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+;;   ""
+;;   "notdi3 %0,%1"
+;;   [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Combination of integer operation with comparison
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "*combo_intop_compare1"
+  [(set (match_operand:CC_NZ 0 "icc_operand" "=t")
+	(compare:CC_NZ
+	 (match_operator:SI 1 "intop_compare_operator"
+		       [(match_operand:SI 2 "integer_register_operand" "d")
+			(match_operand:SI 3 "gpr_or_int10_operand" "dJ")])
+	 (const_int 0)))]
+  ""
+  "%O1%I3cc %2, %3, %., %0"
+  [(set_attr "type" "int")
+   (set_attr "length" "4")])
+
+(define_insn "*combo_intop_compare2"
+  [(set (match_operand:CC_NZ 0 "icc_operand" "=t")
+	(compare:CC_NZ
+	 (match_operator:SI 1 "intop_compare_operator"
+			[(match_operand:SI 2 "integer_register_operand" "d")
+			 (match_operand:SI 3 "gpr_or_int10_operand" "dJ")])
+	 (const_int 0)))
+   (set (match_operand:SI 4 "integer_register_operand" "=d")
+	(match_operator:SI 5 "intop_compare_operator"
+			   [(match_dup 2)
+			    (match_dup 3)]))]
+  "GET_CODE (operands[1]) == GET_CODE (operands[5])"
+  "%O1%I3cc %2, %3, %4, %0"
+  [(set_attr "type" "int")
+   (set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Comparisons
+;; ::
+;; ::::::::::::::::::::
+
+;; The comparisons are generated by the branch and/or scc operations
+
+(define_insn "cmpsi_cc"
+  [(set (match_operand:CC 0 "icc_operand" "=t,t")
+	(compare:CC (match_operand:SI 1 "integer_register_operand" "d,d")
+		    (match_operand:SI 2 "gpr_or_int10_operand" "d,J")))]
+  ""
+  "cmp%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cmpsi_cc_uns"
+  [(set (match_operand:CC_UNS 0 "icc_operand" "=t,t")
+	(compare:CC_UNS (match_operand:SI 1 "integer_register_operand" "d,d")
+			(match_operand:SI 2 "gpr_or_int10_operand" "d,J")))]
+  ""
+  "cmp%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; The only requirement for a CC_NZmode GPR or memory value is that
+;; comparing it against zero must set the Z and N flags appropriately.
+;; The source operand is therefore a valid CC_NZmode value.
+(define_insn "*cmpsi_cc_nz"
+  [(set (match_operand:CC_NZ 0 "nonimmediate_operand" "=t,d,m")
+	(compare:CC_NZ (match_operand:SI 1 "integer_register_operand" "d,d,d")
+		       (const_int 0)))]
+  ""
+  "@
+   cmpi %1, #0, %0
+   mov %1, %0
+   st%I0%U0 %1, %M0"
+  [(set_attr "length" "4,4,4")
+   (set_attr "type" "int,int,gstore")])
+
+(define_insn "*cmpsf_cc_fp"
+  [(set (match_operand:CC_FP 0 "fcc_operand" "=u")
+	(compare:CC_FP (match_operand:SF 1 "fpr_operand" "f")
+		       (match_operand:SF 2 "fpr_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "fcmps %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fscmp")])
+
+(define_insn "*cmpdf_cc_fp"
+  [(set (match_operand:CC_FP 0 "fcc_operand" "=u")
+	(compare:CC_FP (match_operand:DF 1 "even_fpr_operand" "h")
+		       (match_operand:DF 2 "even_fpr_operand" "h")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "fcmpd %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fdcmp")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+;; Define_expands called by the machine independent part of the compiler
+;; to allocate a new comparison register.
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:DF 1 "fpr_operand" "")
+          (match_operand:DF 2 "fpr_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  { if (frv_emit_cond_branch (operands)) DONE; gcc_unreachable (); })
+
+(define_expand "cbranchsf4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SF 1 "fpr_operand" "")
+          (match_operand:SF 2 "fpr_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_HARD_FLOAT"
+  { if (frv_emit_cond_branch (operands)) DONE; gcc_unreachable (); })
+
+(define_expand "cbranchsi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SI 1 "integer_register_operand" "")
+          (match_operand:SI 2 "gpr_or_int10_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { if (frv_emit_cond_branch (operands)) DONE; gcc_unreachable (); })
+
+;; Actual branches.  We must allow for the (label_ref) and the (pc) to be
+;; swapped.  If they are swapped, it reverses the sense of the branch.
+;;
+;; Note - unlike the define expands above, these patterns can be amalgamated
+;; into one pattern for branch-if-true and one for branch-if-false.  This does
+;; require an operand operator to select the correct branch mnemonic.
+;;
+;; If a fixed condition code register is being used, (as opposed to, say,
+;; using cc0), then the expands could look like this:
+;;
+;; (define_insn "*branch_true"
+;;   [(set (pc)
+;; 	(if_then_else (match_operator:CC 0 "comparison_operator"
+;; 					 [(reg:CC <number_of_CC_register>)
+;; 					  (const_int 0)])
+;; 		      (label_ref (match_operand 1 "" ""))
+;; 		      (pc)))]
+;;   ""
+;;   "b%B0 %1"
+;;   [(set_attr "length" "4")]
+;; )
+;;
+;; In the above example the %B is a directive to frv_print_operand()
+;; to decode and print the correct branch mnemonic.
+
+(define_insn "*branch_int_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"b%c0 %1,%#,%l2\";
+  else
+    return \"b%C0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+(define_insn "*branch_int_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"b%C0 %1,%#,%l2\";
+  else
+    return \"b%c0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+(define_insn "*branch_fp_true"
+  [(set (pc)
+	(if_then_else (match_operator:CC_FP 0 "float_relational_operator"
+					    [(match_operand 1 "fcc_operand" "u")
+					     (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"fb%f0 %1,%#,%l2\";
+  else
+    return \"fb%F0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+(define_insn "*branch_fp_false"
+  [(set (pc)
+	(if_then_else (match_operator:CC_FP 0 "float_relational_operator"
+					    [(match_operand 1 "fcc_operand" "u")
+					     (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"fb%F0 %1,%#,%l2\";
+  else
+    return \"fb%f0 %1,%#,1f\;call %l2\\n1:\";
+}"
+  [(set (attr "length")
+	(if_then_else
+	    (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
+		 (le (minus (match_dup 2) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "branch")
+	    (const_string "multi")))])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Set flag operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Define_expands called by the machine independent part of the compiler
+;; to allocate a new comparison register
+
+(define_expand "cstoredf4"
+  [(use (match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:DF 2 "fpr_operand")
+          (match_operand:DF 3 "fpr_operand")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  { if (frv_emit_scc (operands)) DONE; else FAIL; })
+
+(define_expand "cstoresf4"
+  [(use (match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:SF 2 "fpr_operand")
+          (match_operand:SF 3 "fpr_operand")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  "TARGET_HARD_FLOAT"
+  { if (frv_emit_scc (operands)) DONE; else FAIL; })
+
+(define_expand "cstoresi4"
+  [(use (match_operator:SI 1 "ordered_comparison_operator"
+         [(match_operand:SI 2 "integer_register_operand")
+          (match_operand:SI 3 "gpr_or_int10_operand")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+  { if (frv_emit_scc (operands)) DONE; else FAIL; })
+
+(define_insn "*scc_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operator:SI 1 "integer_relational_operator"
+			   [(match_operand 2 "icc_operand" "t")
+			    (const_int 0)]))
+   (clobber (match_operand:CC_CCR 3 "icr_operand" "=v"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*scc_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(match_operator:SI 1 "float_relational_operator"
+			   [(match_operand:CC_FP 2 "fcc_operand" "u")
+			    (const_int 0)]))
+   (clobber (match_operand:CC_CCR 3 "fcr_operand" "=w"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+;; XXX -- add reload_completed to the splits, because register allocation
+;; currently isn't ready to see cond_exec packets.
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(match_operator:SI 1 "relational_operator"
+			   [(match_operand 2 "cc_operand" "")
+			    (const_int 0)]))
+   (clobber (match_operand 3 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 4)]
+  "operands[4] = frv_split_scc (operands[0], operands[1], operands[2],
+				operands[3], (HOST_WIDE_INT) 1);")
+
+(define_insn "*scc_neg1_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(neg:SI (match_operator:SI 1 "integer_relational_operator"
+				   [(match_operand 2 "icc_operand" "t")
+				    (const_int 0)])))
+   (clobber (match_operand:CC_CCR 3 "icr_operand" "=v"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*scc_neg1_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(neg:SI (match_operator:SI 1 "float_relational_operator"
+				   [(match_operand:CC_FP 2 "fcc_operand" "u")
+				    (const_int 0)])))
+   (clobber (match_operand:CC_CCR 3 "fcr_operand" "=w"))]
+  ""
+  "#"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(neg:SI (match_operator:SI 1 "relational_operator"
+				   [(match_operand 2 "cc_operand" "")
+				    (const_int 0)])))
+   (clobber (match_operand 3 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 4)]
+  "operands[4] = frv_split_scc (operands[0], operands[1], operands[2],
+				operands[3], (HOST_WIDE_INT) -1);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditionally executed instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Convert ICC/FCC comparison into CCR bits so we can do conditional execution
+(define_insn "*ck_signed"
+  [(set (match_operand:CC_CCR 0 "icr_operand" "=v")
+	(match_operator:CC_CCR 1 "integer_relational_operator"
+			       [(match_operand 2 "icc_operand" "t")
+				(const_int 0)]))]
+  ""
+  "ck%c1 %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+(define_insn "*fck_float"
+  [(set (match_operand:CC_CCR 0 "fcr_operand" "=w")
+	(match_operator:CC_CCR 1 "float_relational_operator"
+			       [(match_operand:CC_FP 2 "fcc_operand" "u")
+				(const_int 0)]))]
+  "TARGET_HAS_FPRS"
+  "fck%c1 %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+;; Conditionally convert ICC/FCC comparison into CCR bits to provide && and ||
+;; tests in conditional execution
+(define_insn "cond_exec_ck"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "=v,w")
+	(if_then_else:CC_CCR (match_operator 1 "ccr_eqne_operator"
+					     [(match_operand 2 "cr_operand" "C,C")
+					      (const_int 0)])
+			     (match_operator 3 "relational_operator"
+					     [(match_operand 4 "cc_operand" "t,u")
+					      (const_int 0)])
+			     (const_int 0)))]
+  ""
+  "@
+   cck%c3 %4, %0, %2, %e1
+   cfck%f3 %4, %0, %2, %e1"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+;; Conditionally set a register to either 0 or another register
+(define_insn "*cond_exec_movqi"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:QI 2 "condexec_dest_operand" "=d,d,U,?f,?f,?d")
+	 (match_operand:QI 3 "condexec_source_operand" "dO,U,dO,f,d,f")))]
+  "register_operand(operands[2], QImode) || reg_or_0_operand (operands[3], QImode)"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore,fsconv,movgf,movfg")])
+
+(define_insn "*cond_exec_movhi"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:HI 2 "condexec_dest_operand" "=d,d,U,?f,?f,?d")
+	 (match_operand:HI 3 "condexec_source_operand" "dO,U,dO,f,d,f")))]
+  "register_operand(operands[2], HImode) || reg_or_0_operand (operands[3], HImode)"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore,fsconv,movgf,movfg")])
+
+(define_insn "*cond_exec_movsi"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "condexec_dest_operand" "=d,d,U,?f,?f,?d,?f,?m")
+	 (match_operand:SI 3 "condexec_source_operand" "dO,U,dO,f,d,f,m,f")))]
+  "register_operand(operands[2], SImode) || reg_or_0_operand (operands[3], SImode)"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore,fsconv,movgf,movfg,fload,fstore")])
+
+
+(define_insn "*cond_exec_movsf_has_fprs"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C,C,C,C,C,C,C,C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "condexec_dest_operand" "=f,?d,?d,?f,f,f,?d,U,?U,U")
+	 (match_operand:SF 3 "condexec_source_operand" "f,d,f,d,G,U,U,f,d,G")))]
+  "TARGET_HAS_FPRS"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv,int,movgf,movfg,movgf,fload,gload,fstore,gstore,gstore")])
+
+(define_insn "*cond_exec_movsf_no_fprs"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C,C,C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "condexec_dest_operand" "=d,d,U")
+	 (match_operand:SF 3 "condexec_source_operand" "d,U,dG")))]
+  "! TARGET_HAS_FPRS"
+  "* return output_condmove_single (operands, insn);"
+  [(set_attr "length" "4")
+   (set_attr "type" "int,gload,gstore")])
+
+(define_insn "*cond_exec_si_binary1"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "integer_register_operand" "=d")
+	 (match_operator:SI 3 "condexec_si_binary_operator"
+			    [(match_operand:SI 4 "integer_register_operand" "d")
+			     (match_operand:SI 5 "integer_register_operand" "d")])))]
+  ""
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case PLUS:     return \"cadd %4, %z5, %2, %1, %e0\";
+      case MINUS:    return \"csub %4, %z5, %2, %1, %e0\";
+      case AND:      return \"cand %4, %z5, %2, %1, %e0\";
+      case IOR:      return \"cor %4, %z5, %2, %1, %e0\";
+      case XOR:      return \"cxor %4, %z5, %2, %1, %e0\";
+      case ASHIFT:   return \"csll %4, %z5, %2, %1, %e0\";
+      case ASHIFTRT: return \"csra %4, %z5, %2, %1, %e0\";
+      case LSHIFTRT: return \"csrl %4, %z5, %2, %1, %e0\";
+      default:       gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_si_binary2"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (match_operator:SI 3 "condexec_si_media_operator"
+			    [(match_operand:SI 4 "fpr_operand" "f")
+			     (match_operand:SI 5 "fpr_operand" "f")])))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case AND: return \"cmand %4, %5, %2, %1, %e0\";
+      case IOR: return \"cmor %4, %5, %2, %1, %e0\";
+      case XOR: return \"cmxor %4, %5, %2, %1, %e0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+;; Note, flow does not (currently) know how to handle an operation that uses
+;; only part of the hard registers allocated for a multiregister value, such as
+;; DImode in this case if the user is only interested in the lower 32-bits.  So
+;; we emit a USE of the entire register after the csmul instruction so it won't
+;; get confused.  See frv_ifcvt_modify_insn for more details.
+
+(define_insn "*cond_exec_si_smul"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_gpr_operand" "=e")
+	 (mult:DI (sign_extend:DI (match_operand:SI 3 "integer_register_operand" "%d"))
+		  (sign_extend:DI (match_operand:SI 4 "integer_register_operand" "d")))))]
+  ""
+  "csmul %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "*cond_exec_si_divide"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "integer_register_operand" "=d")
+	 (match_operator:SI 3 "condexec_si_divide_operator"
+			    [(match_operand:SI 4 "integer_register_operand" "d")
+			     (match_operand:SI 5 "integer_register_operand" "d")])))]
+  ""
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case DIV:  return \"csdiv %4, %z5, %2, %1, %e0\";
+      case UDIV: return \"cudiv %4, %z5, %2, %1, %e0\";
+      default:   gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "div")])
+
+(define_insn "*cond_exec_si_unary1"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "integer_register_operand" "=d")
+	 (match_operator:SI 3 "condexec_si_unary_operator"
+			    [(match_operand:SI 4 "integer_register_operand" "d")])))]
+  ""
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case NOT: return \"cnot %4, %2, %1, %e0\";
+      case NEG: return \"csub %., %4, %2, %1, %e0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_si_unary2"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (not:SI (match_operand:SI 3 "fpr_operand" "f"))))]
+  "TARGET_MEDIA"
+  "cmnot %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+(define_insn "*cond_exec_cmpsi_cc"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC 2 "icc_operand" "=t")
+	 (compare:CC (match_operand:SI 3 "integer_register_operand" "d")
+		     (match_operand:SI 4 "reg_or_0_operand" "dO"))))]
+  "reload_completed
+   && REGNO (operands[1]) == REGNO (operands[2]) - ICC_FIRST + ICR_FIRST"
+  "ccmp %3, %z4, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_cmpsi_cc_uns"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC_UNS 2 "icc_operand" "=t")
+	 (compare:CC_UNS (match_operand:SI 3 "integer_register_operand" "d")
+			 (match_operand:SI 4 "reg_or_0_operand" "dO"))))]
+  "reload_completed
+   && REGNO (operands[1]) == REGNO (operands[2]) - ICC_FIRST + ICR_FIRST"
+  "ccmp %3, %z4, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_cmpsi_cc_nz"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC_NZ 2 "icc_operand" "=t")
+	 (compare:CC_NZ (match_operand:SI 3 "integer_register_operand" "d")
+			(const_int 0))))]
+  "reload_completed
+   && REGNO (operands[1]) == REGNO (operands[2]) - ICC_FIRST + ICR_FIRST"
+  "ccmp %3, %., %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "*cond_exec_sf_conv"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (match_operator:SF 3 "condexec_sf_conv_operator"
+			    [(match_operand:SF 4 "fpr_operand" "f")])))]
+  "TARGET_HARD_FLOAT"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case ABS: return \"cfabss %4, %2, %1, %e0\";
+      case NEG: return \"cfnegs %4, %2, %1, %e0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+(define_insn "*cond_exec_sf_add"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (match_operator:SF 3 "condexec_sf_add_operator"
+			    [(match_operand:SF 4 "fpr_operand" "f")
+			     (match_operand:SF 5 "fpr_operand" "f")])))]
+  "TARGET_HARD_FLOAT"
+  "*
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case PLUS:  return \"cfadds %4, %5, %2, %1, %e0\";
+      case MINUS: return \"cfsubs %4, %5, %2, %1, %e0\";
+      default:    gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsadd")])
+
+(define_insn "*cond_exec_sf_mul"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (mult:SF (match_operand:SF 3 "fpr_operand" "f")
+		  (match_operand:SF 4 "fpr_operand" "f"))))]
+  "TARGET_HARD_FLOAT"
+  "cfmuls %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsmul")])
+
+(define_insn "*cond_exec_sf_div"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (div:SF (match_operand:SF 3 "fpr_operand" "f")
+		 (match_operand:SF 4 "fpr_operand" "f"))))]
+  "TARGET_HARD_FLOAT"
+  "cfdivs %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsdiv")])
+
+(define_insn "*cond_exec_sf_sqrt"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SF 2 "fpr_operand" "=f")
+	 (sqrt:SF (match_operand:SF 3 "fpr_operand" "f"))))]
+  "TARGET_HARD_FLOAT"
+  "cfsqrts %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsdiv")])
+
+(define_insn "*cond_exec_cmpsi_cc_fp"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:CC_FP 2 "fcc_operand" "=u")
+	 (compare:CC_FP (match_operand:SF 3 "fpr_operand" "f")
+			(match_operand:SF 4 "fpr_operand" "f"))))]
+  "reload_completed && TARGET_HARD_FLOAT
+   && REGNO (operands[1]) == REGNO (operands[2]) - FCC_FIRST + FCR_FIRST"
+  "cfcmps %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fsconv")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Logical operations on CR registers
+;; ::
+;; ::::::::::::::::::::
+
+;; We use UNSPEC to encode andcr/iorcr/etc. rather than the normal RTL
+;; operations, since the RTL operations only have an idea of TRUE and FALSE,
+;; while the CRs have TRUE, FALSE, and UNDEFINED.
+
+(define_expand "andcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 0)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "orcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 1)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "xorcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 2)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "nandcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 3)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "norcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 4)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "andncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 5)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "orncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 6)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "nandncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 7)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "norncr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_operand:CC_CCR 2 "cr_operand" "")
+			(const_int 8)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_expand "notcr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "")
+			(match_dup 1)
+			(const_int 9)] UNSPEC_CR_LOGIC))]
+  ""
+  "")
+
+(define_insn "*logical_cr"
+  [(set (match_operand:CC_CCR 0 "cr_operand" "=C")
+	(unspec:CC_CCR [(match_operand:CC_CCR 1 "cr_operand" "C")
+			(match_operand:CC_CCR 2 "cr_operand" "C")
+			(match_operand:SI 3 "const_int_operand" "n")]
+		       UNSPEC_CR_LOGIC))]
+  ""
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default: break;
+  case 0: return \"andcr %1, %2, %0\";
+  case 1: return \"orcr %1, %2, %0\";
+  case 2: return \"xorcr %1, %2, %0\";
+  case 3: return \"nandcr %1, %2, %0\";
+  case 4: return \"norcr %1, %2, %0\";
+  case 5: return \"andncr %1, %2, %0\";
+  case 6: return \"orncr %1, %2, %0\";
+  case 7: return \"nandncr %1, %2, %0\";
+  case 8: return \"norncr %1, %2, %0\";
+  case 9: return \"notcr %1, %0\";
+  }
+
+  fatal_insn (\"logical_cr\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "ccr")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional move instructions
+;; ::
+;; ::::::::::::::::::::
+
+
+;; - conditional moves based on floating-point comparisons require
+;;   TARGET_HARD_FLOAT, because an FPU is required to do the comparison.
+
+;; - conditional moves between FPRs based on integer comparisons
+;;   require TARGET_HAS_FPRS.
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "integer_register_operand" "")
+	(if_then_else:QI (match_operand 1 "" "")
+			 (match_operand:QI 2 "gpr_or_int_operand" "")
+			 (match_operand:QI 3 "gpr_or_int_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movqicc_internal1_int"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:QI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:QI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:QI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  ""
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movqicc_internal1_float"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:QI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u")
+			      (const_int 0)])
+			 (match_operand:QI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:QI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movqicc_internal2_int"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:QI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:QI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:QI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[4]) == 0
+    || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+        && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movqicc_internal2_float"
+  [(set (match_operand:QI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:QI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:QI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:QI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT
+   && (INTVAL (operands[3]) == 0
+       || INTVAL (operands[4]) == 0
+       || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+	   && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:QI 0 "integer_register_operand" "")
+	(if_then_else:QI (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:QI 3 "gpr_or_int_operand" "")
+			 (match_operand:QI 4 "gpr_or_int_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+(define_expand "movhicc"
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(if_then_else:HI (match_operand 1 "" "")
+			 (match_operand:HI 2 "gpr_or_int_operand" "")
+			 (match_operand:HI 3 "gpr_or_int_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movhicc_internal1_int"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:HI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:HI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:HI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  ""
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movhicc_internal1_float"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:HI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u")
+			      (const_int 0)])
+			 (match_operand:HI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:HI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movhicc_internal2_int"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:HI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:HI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:HI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[4]) == 0
+    || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+        && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movhicc_internal2_float"
+  [(set (match_operand:HI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:HI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:HI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:HI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT
+   && (INTVAL (operands[3]) == 0
+       || INTVAL (operands[4]) == 0
+       || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+	   && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:HI 0 "integer_register_operand" "")
+	(if_then_else:HI (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:HI 3 "gpr_or_int_operand" "")
+			 (match_operand:HI 4 "gpr_or_int_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(if_then_else:SI (match_operand 1 "" "")
+			 (match_operand:SI 2 "gpr_or_int_operand" "")
+			 (match_operand:SI 3 "gpr_or_int_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movsicc_internal1_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:SI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:SI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:SI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  ""
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsicc_internal1_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:SI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u")
+			      (const_int 0)])
+			 (match_operand:SI 3 "reg_or_0_operand" "0,dO,dO")
+			 (match_operand:SI 4 "reg_or_0_operand" "dO,0,dO")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsicc_internal2_int"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:SI (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:SI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:SI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v"))]
+  "(INTVAL (operands[3]) == 0
+    || INTVAL (operands[4]) == 0
+    || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+        && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsicc_internal2_float"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,d,d,d")
+	(if_then_else:SI (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:SI 3 "const_int_operand" "O,O,L,n,n")
+			 (match_operand:SI 4 "const_int_operand" "L,n,O,O,n")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT
+   && (INTVAL (operands[3]) == 0
+       || INTVAL (operands[4]) == 0
+       || (IN_RANGE (INTVAL (operands[3]), -2048, 2047)
+	   && IN_RANGE (INTVAL (operands[4]) - INTVAL (operands[3]), -2048, 2047)))"
+  "#"
+  [(set_attr "length" "8,12,8,12,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:SI 0 "integer_register_operand" "")
+	(if_then_else:SI (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:SI 3 "gpr_or_int_operand" "")
+			 (match_operand:SI 4 "gpr_or_int_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "" "")
+			 (match_operand:SF 2 "register_operand" "")
+			 (match_operand:SF 3 "register_operand" "")))]
+  "TARGET_COND_MOVE"
+  "
+{
+  if (!frv_emit_cond_move (operands[0], operands[1], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "*movsfcc_has_fprs_int"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,f,?f,?f,?d")
+	(if_then_else:SF (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t,t,t,t")
+			      (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "0,f,f,f,d,fd")
+			 (match_operand:SF 4 "register_operand" "f,0,f,d,fd,fd")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v,v,v,v"))]
+  "TARGET_HAS_FPRS"
+  "#"
+  [(set_attr "length" "8,8,12,12,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsfcc_hardfloat_float"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,f,?f,?f,?d")
+	(if_then_else:SF (match_operator:CC_FP 1 "float_relational_operator"
+			     [(match_operand:CC_FP 2 "fcc_operand" "u,u,u,u,u,u")
+			      (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "0,f,f,f,d,fd")
+			 (match_operand:SF 4 "register_operand" "f,0,f,d,fd,fd")))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w,w,w,w"))]
+  "TARGET_HARD_FLOAT"
+  "#"
+  [(set_attr "length" "8,8,12,12,12,12")
+   (set_attr "type" "multi")])
+
+(define_insn "*movsfcc_no_fprs_int"
+  [(set (match_operand:SF 0 "integer_register_operand" "=d,d,d")
+	(if_then_else:SF (match_operator 1 "integer_relational_operator"
+			     [(match_operand 2 "icc_operand" "t,t,t")
+			      (const_int 0)])
+			 (match_operand:SF 3 "integer_register_operand" "0,d,d")
+			 (match_operand:SF 4 "integer_register_operand" "d,0,d")))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  "! TARGET_HAS_FPRS"
+  "#"
+  [(set_attr "length" "8,8,12")
+   (set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operator 1 "relational_operator"
+			     [(match_operand 2 "cc_operand" "")
+			      (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "")
+			 (match_operand:SF 4 "register_operand" "")))
+   (clobber (match_operand:CC_CCR 5 "cr_operand" ""))]
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_cond_move (operands);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Minimum, maximum, and integer absolute value
+;; ::
+;; ::::::::::::::::::::
+
+;; These 'instructions' are provided to give the compiler a slightly better
+;; nudge at register allocation, then it would if it constructed the
+;; instructions from basic building blocks (since it indicates it prefers one
+;; of the operands to be the same as the destination.  It also helps the
+;; earlier passes of the compiler, by not breaking things into small basic
+;; blocks.
+
+(define_expand "abssi2"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (abs:SI (match_operand:SI 1 "integer_register_operand" "")))
+	      (clobber (match_dup 2))
+	      (clobber (match_dup 3))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[2] = gen_reg_rtx (CCmode);
+  operands[3] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*abssi2_internal"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d")
+	(abs:SI (match_operand:SI 1 "integer_register_operand" "0,d")))
+   (clobber (match_operand:CC 2 "icc_operand" "=t,t"))
+   (clobber (match_operand:CC_CCR 3 "icr_operand" "=v,v"))]
+  "TARGET_COND_MOVE"
+  "#"
+  "reload_completed"
+  [(match_dup 4)]
+  "operands[4] = frv_split_abs (operands);"
+  [(set_attr "length" "12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "sminsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (smin:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "smaxsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (smax:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CCmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_si_signed"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,&d")
+	(match_operator:SI 1 "minmax_operator"
+			   [(match_operand:SI 2 "integer_register_operand" "%0,dO,d")
+			    (match_operand:SI 3 "gpr_or_int10_operand" "dO,0,dJ")]))
+   (clobber (match_operand:CC 4 "icc_operand" "=t,t,t"))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  "TARGET_COND_MOVE"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "uminsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (umin:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_UNSmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "umaxsi3"
+  [(parallel [(set (match_operand:SI 0 "integer_register_operand" "")
+		   (umax:SI (match_operand:SI 1 "integer_register_operand" "")
+			    (match_operand:SI 2 "gpr_or_int10_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_UNSmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_si_unsigned"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d,d,&d")
+	(match_operator:SI 1 "minmax_operator"
+			   [(match_operand:SI 2 "integer_register_operand" "%0,dO,d")
+			    (match_operand:SI 3 "gpr_or_int10_operand" "dO,0,dJ")]))
+   (clobber (match_operand:CC_UNS 4 "icc_operand" "=t,t,t"))
+   (clobber (match_operand:CC_CCR 5 "icr_operand" "=v,v,v"))]
+  "TARGET_COND_MOVE"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "sminsf3"
+  [(parallel [(set (match_operand:SF 0 "fpr_operand" "")
+		   (smin:SF (match_operand:SF 1 "fpr_operand" "")
+			    (match_operand:SF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "smaxsf3"
+  [(parallel [(set (match_operand:SF 0 "fpr_operand" "")
+		   (smax:SF (match_operand:SF 1 "fpr_operand" "")
+			    (match_operand:SF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_sf"
+  [(set (match_operand:SF 0 "fpr_operand" "=f,f,f")
+	(match_operator:SF 1 "minmax_operator"
+			   [(match_operand:SF 2 "fpr_operand" "%0,f,f")
+			    (match_operand:SF 3 "fpr_operand" "f,0,f")]))
+   (clobber (match_operand:CC_FP 4 "fcc_operand" "=u,u,u"))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+(define_expand "smindf3"
+  [(parallel [(set (match_operand:DF 0 "fpr_operand" "")
+		   (smin:DF (match_operand:DF 1 "fpr_operand" "")
+			    (match_operand:DF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_expand "smaxdf3"
+  [(parallel [(set (match_operand:DF 0 "fpr_operand" "")
+		   (smax:DF (match_operand:DF 1 "fpr_operand" "")
+			    (match_operand:DF 2 "fpr_operand" "")))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "
+{
+  operands[3] = gen_reg_rtx (CC_FPmode);
+  operands[4] = gen_reg_rtx (CC_CCRmode);
+}")
+
+(define_insn_and_split "*minmax_df"
+  [(set (match_operand:DF 0 "fpr_operand" "=f,f,f")
+	(match_operator:DF 1 "minmax_operator"
+			   [(match_operand:DF 2 "fpr_operand" "%0,f,f")
+			    (match_operand:DF 3 "fpr_operand" "f,0,f")]))
+   (clobber (match_operand:CC_FP 4 "fcc_operand" "=u,u,u"))
+   (clobber (match_operand:CC_CCR 5 "fcr_operand" "=w,w,w"))]
+  "TARGET_COND_MOVE && TARGET_HARD_FLOAT && TARGET_DOUBLE"
+  "#"
+  "reload_completed"
+  [(match_dup 6)]
+  "operands[6] = frv_split_minmax (operands);"
+  [(set_attr "length" "12,12,16")
+   (set_attr "type" "multi")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(use (match_operand:QI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+  "
+{
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+  addr = XEXP (operands[0], 0);
+  if (! call_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[2])
+    operands[2] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, false, false);
+  else
+    emit_call_insn (gen_call_internal (addr, operands[1], operands[2], lr));
+
+  DONE;
+}")
+
+(define_insn "call_internal"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "S,dNOP"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (match_operand:SI 3 "lr_operand" "=l,l"))]
+  "! TARGET_FDPIC"
+  "@
+   call %0
+   call%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+;; The odd use of GR0 within the UNSPEC below prevents cseing or
+;; hoisting function descriptor loads out of loops.  This is almost
+;; never desirable, since if we preserve the function descriptor in a
+;; pair of registers, it takes two insns to move it to gr14/gr15, and
+;; if it's in the stack, we just waste space with the store, since
+;; we'll have to load back from memory anyway.  And, in the worst
+;; case, we may end up reusing a function descriptor still pointing at
+;; a PLT entry, instead of to the resolved function, which means going
+;; through the resolver for every call that uses the outdated value.
+;; Bad!
+
+;; The explicit MEM inside the SPEC prevents the compiler from moving
+;; the load before a branch after a NULL test, or before a store that
+;; initializes a function descriptor.
+
+(define_insn "movdi_ldd"
+  [(set (match_operand:DI 0 "fdpic_fptr_operand" "=e")
+	(unspec:DI [(mem:DI (match_operand:SI 1 "ldd_address_operand" "p"))
+		    (reg:SI 0)] UNSPEC_LDD))]
+  ""
+  "ldd%I1 %M1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
+
+(define_insn "call_fdpicdi"
+  [(call (mem:QI (match_operand:DI 0 "fdpic_fptr_operand" "W"))
+	 (match_operand 1 "" ""))
+   (clobber (match_operand:SI 2 "lr_operand" "=l"))]
+  "TARGET_FDPIC"
+  "call%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "call_fdpicsi"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "S,dNOP"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand:SI 3 "fdpic_operand" "Z,Z"))
+   (clobber (match_operand:SI 4 "lr_operand" "=l,l"))]
+  "TARGET_FDPIC"
+  "@
+   call %0
+   call%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+(define_expand "sibcall"
+  [(use (match_operand:QI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+  "
+{
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+  addr = XEXP (operands[0], 0);
+  if (! sibcall_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[2])
+    operands[2] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, false, true);
+  else
+    emit_call_insn (gen_sibcall_internal (addr, operands[1], operands[2]));
+
+  DONE;
+}")
+  
+;; It might seem that these sibcall patterns are missing references to
+;; LR, but they're not necessary because sibcall_epilogue will make
+;; sure LR is restored, and having LR here will set
+;; regs_ever_used[REG_LR], forcing it to be saved on the stack, and
+;; then restored in sibcalls and regular return code paths, even if
+;; the function becomes a leaf function after tail-call elimination.
+
+;; We must not use a call-saved register here.  `W' limits ourselves
+;; to gr14 or gr15, but since we're almost running out of constraint
+;; letters, and most other call-clobbered registers are often used for
+;; argument-passing, this will do.
+(define_insn "sibcall_internal"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_operand" "WNOP"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (return)]
+  "! TARGET_FDPIC"
+  "jmp%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "sibcall_fdpicdi"
+  [(call (mem:QI (match_operand:DI 0 "fdpic_fptr_operand" "W"))
+	 (match_operand 1 "" ""))
+   (return)]
+  "TARGET_FDPIC"
+  "jmp%i0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:QI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+  "
+{
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  addr = XEXP (operands[1], 0);
+  if (! call_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[3])
+    operands[3] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, true, false);
+  else
+    emit_call_insn (gen_call_value_internal (operands[0], addr, operands[2],
+					     operands[3], lr));
+
+  DONE;
+}")
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=d,d")
+	(call (mem:QI (match_operand:SI 1 "call_operand" "S,dNOP"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (match_operand:SI 4 "lr_operand" "=l,l"))]
+  "! TARGET_FDPIC"
+  "@
+   call %1
+   call%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+(define_insn "call_value_fdpicdi"
+  [(set (match_operand 0 "register_operand" "=d")
+	(call (mem:QI (match_operand:DI 1 "fdpic_fptr_operand" "W"))
+	      (match_operand 2 "" "")))
+   (clobber (match_operand:SI 3 "lr_operand" "=l"))]
+  "TARGET_FDPIC"
+  "call%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "call_value_fdpicsi"
+  [(set (match_operand 0 "register_operand" "=d,d")
+	(call (mem:QI (match_operand:SI 1 "call_operand" "S,dNOP"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (use (match_operand:SI 4 "fdpic_operand" "Z,Z"))
+   (clobber (match_operand:SI 5 "lr_operand" "=l,l"))]
+  "TARGET_FDPIC"
+  "@
+   call %1
+   call%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "call,jumpl")])
+
+(define_expand "sibcall_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:QI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+  "
+{
+  rtx addr;
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  addr = XEXP (operands[1], 0);
+  if (! sibcall_operand (addr, Pmode))
+    addr = force_reg (Pmode, addr);
+
+  if (! operands[3])
+    operands[3] = const0_rtx;
+
+  if (TARGET_FDPIC)
+    frv_expand_fdpic_call (operands, true, true);
+  else
+    emit_call_insn (gen_sibcall_value_internal (operands[0], addr, operands[2],
+						operands[3]));
+  DONE;
+}")
+
+(define_insn "sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "=d")
+	(call (mem:QI (match_operand:SI 1 "sibcall_operand" "WNOP"))
+		      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (return)]
+  "! TARGET_FDPIC"
+  "jmp%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "sibcall_value_fdpicdi"
+  [(set (match_operand 0 "register_operand" "=d")
+	(call (mem:QI (match_operand:DI 1 "fdpic_fptr_operand" "W"))
+	      (match_operand 2 "" "")))
+   (return)]
+  "TARGET_FDPIC"
+  "jmp%i1l %M1"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+;; return instruction generated instead of jmp to epilog
+(define_expand "return"
+  [(parallel [(return)
+	      (use (match_dup 0))
+	      (use (const_int 1))])]
+  "direct_return_p ()"
+  "
+{
+  operands[0] = gen_rtx_REG (Pmode, LR_REGNO);
+}")
+
+;; return instruction generated by the epilogue
+(define_expand "epilogue_return"
+  [(parallel [(return)
+	      (use (match_operand:SI 0 "register_operand" ""))
+	      (use (const_int 0))])]
+  ""
+  "")
+
+(define_insn "*return_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" "l,d"))
+   (use (match_operand:SI 1 "immediate_operand" "n,n"))]
+  ""
+  "@
+    ret
+    jmpl @(%0,%.)"
+  [(set_attr "length" "4")
+   (set_attr "type" "jump,jumpl")])
+
+(define_insn "*return_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (return)
+		      (pc)))]
+  "direct_return_p ()"
+  "b%c0lr %1,%#"
+  [(set_attr "length" "4")
+   (set_attr "type" "jump")])
+
+(define_insn "*return_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "integer_relational_operator"
+				      [(match_operand 1 "icc_operand" "t")
+				       (const_int 0)])
+		      (pc)
+		      (return)))]
+  "direct_return_p ()"
+  "b%C0lr %1,%#"
+  [(set_attr "length" "4")
+   (set_attr "type" "jump")])
+
+;; A version of addsi3 for deallocating stack space at the end of the
+;; epilogue.  The addition is done in parallel with an (unspec_volatile),
+;; which represents the clobbering of the deallocated space.
+(define_insn "stack_adjust"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (plus:SI (match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "general_operand" "dNOP")))
+   (unspec_volatile [(const_int 0)] UNSPEC_STACK_ADJUST)]
+  ""
+  "add%I2 %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+;; Normal unconditional jump
+
+;; Use the "call" instruction for long branches, but prefer to use "bra" for
+;; short ones since it does not force us to save the link register.
+
+;; This define_insn uses the branch-shortening code to decide which
+;; instruction it emits.  Since the main branch-shortening interface is
+;; through get_attr_length(), the two alternatives must be given different
+;; lengths.  Here we pretend that the far jump is 8 rather than 4 bytes
+;; long, though both alternatives are really the same size.
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  if (get_attr_length (insn) == 4)
+    return \"bra %l0\";
+  else
+    return \"call %l0\";
+}"
+  [(set (attr "length")
+        (if_then_else
+	    (and (ge (minus (match_dup 0) (pc)) (const_int -32768))
+		 (le (minus (match_dup 0) (pc)) (const_int 32764)))
+	    (const_int 4)
+	    (const_int 8)))
+   (set (attr "far_jump")
+        (if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "no")
+	    (const_string "yes")))
+   (set (attr "type")
+	(if_then_else
+	    (eq_attr "length" "4")
+	    (const_string "jump")
+	    (const_string "call")))])
+
+;; Indirect jump through a register
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "d,l"))]
+  ""
+  "@
+   jmpl @(%0,%.)
+   bralr"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl,branch")])
+
+;; Instruction to jump to a variable address.  This is a low-level capability
+;; which can be used to implement a dispatch table when there is no `casesi'
+;; pattern.  Either the 'casesi' pattern or the 'tablejump' pattern, or both,
+;; MUST be present in this file.
+
+;; This pattern requires two operands: the address or offset, and a label which
+;; should immediately precede the jump table.  If the macro
+;; `CASE_VECTOR_PC_RELATIVE' is defined then the first operand is an offset
+;; which counts from the address of the table; otherwise, it is an absolute
+;; address to jump to.  In either case, the first operand has mode `Pmode'.
+
+;; The `tablejump' insn is always the last insn before the jump table it uses.
+;; Its assembler code normally has no need to use the second operand, but you
+;; should incorporate it in the RTL pattern so that the jump optimizer will not
+;; delete the table as unreachable code.
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "!flag_pic"
+  "")
+
+(define_insn "tablejump_insn"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp%I0l %M0"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+;; Implement switch statements when generating PIC code.  Switches are
+;; implemented by `tablejump' when not using -fpic.
+
+;; Emit code here to do the range checking and make the index zero based.
+;; operand 0 is the index
+;; operand 1 is the lower bound
+;; operand 2 is the range of indices (highest - lowest + 1)
+;; operand 3 is the label that precedes the table itself
+;; operand 4 is the fall through label
+
+(define_expand "casesi"
+  [(use (match_operand:SI 0 "integer_register_operand" ""))
+   (use (match_operand:SI 1 "const_int_operand" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  "flag_pic"
+  "
+{
+  rtx indx;
+  rtx scale;
+  rtx low = operands[1];
+  rtx range = operands[2];
+  rtx table = operands[3];
+  rtx treg;
+  rtx fail = operands[4];
+  rtx mem;
+  rtx reg2;
+  rtx reg3;
+
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  /* If we can't generate an immediate instruction, promote to register.  */
+  if (! IN_RANGE (INTVAL (range), -2048, 2047))
+    range = force_reg (SImode, range);
+
+  /* If low bound is 0, we don't have to subtract it.  */
+  if (INTVAL (operands[1]) == 0)
+    indx = operands[0];
+  else
+    {
+      indx = gen_reg_rtx (SImode);
+      if (IN_RANGE (INTVAL (low), -2047, 2048))
+	emit_insn (gen_addsi3 (indx, operands[0], GEN_INT (- INTVAL (low))));
+      else
+	emit_insn (gen_subsi3 (indx, operands[0], force_reg (SImode, low)));
+    }
+
+  /* Do an unsigned comparison (in the proper mode) between the index
+     expression and the value which represents the length of the range.
+     Since we just finished subtracting the lower bound of the range
+     from the index expression, this comparison allows us to simultaneously
+     check that the original index expression value is both greater than
+     or equal to the minimum value of the range and less than or equal to
+     the maximum value of the range.  */
+
+  emit_cmp_and_jump_insns (indx, range, GTU, NULL_RTX, SImode, 1, fail);
+
+  /* Move the table address to a register.  */
+  treg = gen_reg_rtx (Pmode);
+  emit_insn (gen_movsi (treg, gen_rtx_LABEL_REF (VOIDmode, table)));
+
+  /* Scale index-low by wordsize.  */
+  scale = gen_reg_rtx (SImode);
+  emit_insn (gen_ashlsi3 (scale, indx, const2_rtx));
+
+  /* Load the address, add the start of the table back in,
+     and jump to it.  */
+  mem = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, scale, treg));
+  reg2 = gen_reg_rtx (SImode);
+  reg3 = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (reg2, mem));
+  emit_insn (gen_addsi3 (reg3, reg2, treg));
+  emit_jump_insn (gen_tablejump_insn (reg3, table));
+  DONE;
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Called after register allocation to add any instructions needed for the
+;; prologue.  Using a prologue insn is favored compared to putting all of the
+;; instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler
+;; to intermix instructions with the saves of the caller saved registers.  In
+;; some cases, it might be necessary to emit a barrier instruction as the last
+;; insn to prevent such scheduling.
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  frv_expand_prologue ();
+  DONE;
+}")
+
+;; Called after register allocation to add any instructions needed for the
+;; epilogue.  Using an epilogue insn is favored compared to putting all of the
+;; instructions in the FUNCTION_EPILOGUE macro, since it allows the scheduler
+;; to intermix instructions with the restores of the caller saved registers.
+;; In some cases, it might be necessary to emit a barrier instruction as the
+;; first insn to prevent such scheduling.
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  "
+{
+  frv_expand_epilogue (true);
+  DONE;
+}")
+
+;; This pattern, if defined, emits RTL for exit from a function without the final
+;; branch back to the calling function.  This pattern will be emitted before any
+;; sibling call (aka tail call) sites.
+;;
+;; The sibcall_epilogue pattern must not clobber any arguments used for
+;; parameter passing or any stack slots for arguments passed to the current
+;; function.
+(define_expand "sibcall_epilogue"
+  [(const_int 3)]
+  ""
+  "
+{
+  frv_expand_epilogue (false);
+  DONE;
+}")
+
+;; Set up the pic register to hold the address of the pic table
+(define_insn "pic_prologue"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec_volatile:SI [(const_int 0)] UNSPEC_PIC_PROLOGUE))
+   (clobber (match_operand:SI 1 "lr_operand" "=l"))
+   (clobber (match_operand:SI 2 "integer_register_operand" "=d"))]
+  ""
+  "*
+{
+  static int frv_pic_labelno = 0;
+
+  operands[3] = GEN_INT (frv_pic_labelno++);
+  return \"call %P3\\n%P3:\;movsg %1, %0\;sethi #gprelhi(%P3), %2\;setlo #gprello(%P3), %2\;sub %0,%2,%0\";
+}"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "fnop"
+  [(const_int 1)]
+  ""
+  "fnop"
+  [(set_attr "length" "4")
+   (set_attr "type" "fnop")])
+
+(define_insn "mnop"
+  [(const_int 2)]
+  ""
+  "mnop"
+  [(set_attr "length" "4")
+   (set_attr "type" "mnop")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.  Note, type unknown is used to make sure the VLIW instructions are
+;; not continued past this point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  "# blockage"
+  [(set_attr "length" "0")
+   (set_attr "type" "unknown")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Media instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Unimplemented instructions:
+;;   - MCMPSH, MCMPUH
+
+(define_constants
+  [(UNSPEC_MLOGIC		100)
+   (UNSPEC_MNOT			101)
+   (UNSPEC_MAVEH		102)
+   (UNSPEC_MSATH		103)
+   (UNSPEC_MADDH		104)
+   (UNSPEC_MQADDH		105)
+   (UNSPEC_MPACKH		106)
+   (UNSPEC_MUNPACKH		107)
+   (UNSPEC_MDPACKH		108)
+   (UNSPEC_MBTOH		109)
+   (UNSPEC_MHTOB		110)
+   (UNSPEC_MROT			111)
+   (UNSPEC_MSHIFT		112)
+   (UNSPEC_MEXPDHW		113)
+   (UNSPEC_MEXPDHD		114)
+   (UNSPEC_MWCUT		115)
+   (UNSPEC_MMULH		116)
+   (UNSPEC_MMULXH		117)
+   (UNSPEC_MMACH		118)
+   (UNSPEC_MMRDH		119)
+   (UNSPEC_MQMULH		120)
+   (UNSPEC_MQMULXH		121)
+   (UNSPEC_MQMACH		122)
+   (UNSPEC_MCPX			123)
+   (UNSPEC_MQCPX		124)
+   (UNSPEC_MCUT			125)
+   (UNSPEC_MRDACC		126)
+   (UNSPEC_MRDACCG		127)
+   (UNSPEC_MWTACC		128)
+   (UNSPEC_MWTACCG		129)
+   (UNSPEC_MTRAP		130)
+   (UNSPEC_MCLRACC		131)
+   (UNSPEC_MCLRACCA		132)
+   (UNSPEC_MCOP1		133)
+   (UNSPEC_MCOP2		134)
+   (UNSPEC_MDUNPACKH		135)
+   (UNSPEC_MDUNPACKH_INTERNAL	136)
+   (UNSPEC_MBTOHE		137)
+   (UNSPEC_MBTOHE_INTERNAL	138)
+   (UNSPEC_MBTOHE		137)
+   (UNSPEC_MBTOHE_INTERNAL	138)
+   (UNSPEC_MQMACH2		139)
+   (UNSPEC_MADDACC		140)
+   (UNSPEC_MDADDACC		141)
+   (UNSPEC_MABSHS		142)
+   (UNSPEC_MDROTLI		143)
+   (UNSPEC_MCPLHI		144)
+   (UNSPEC_MCPLI		145)
+   (UNSPEC_MDCUTSSI		146)
+   (UNSPEC_MQSATHS		147)
+   (UNSPEC_MHSETLOS		148)
+   (UNSPEC_MHSETLOH		149)
+   (UNSPEC_MHSETHIS		150)
+   (UNSPEC_MHSETHIH		151)
+   (UNSPEC_MHDSETS		152)
+   (UNSPEC_MHDSETH		153)
+   (UNSPEC_MQLCLRHS		154)
+   (UNSPEC_MQLMTHS		155)
+   (UNSPEC_MQSLLHI		156)
+   (UNSPEC_MQSRAHI		157)
+   (UNSPEC_MASACCS		158)
+   (UNSPEC_MDASACCS		159)
+])
+
+;; Logic operations: type "mlogic"
+
+(define_expand "mand"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+		    (match_operand:SI 2 "fpr_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MAND);")
+
+(define_expand "mor"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+		    (match_operand:SI 2 "fpr_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MOR);")
+
+(define_expand "mxor"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+		    (match_operand:SI 2 "fpr_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MXOR);")
+
+(define_insn "*mlogic"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+		    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MLOGIC))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		 break;
+  case FRV_BUILTIN_MAND: return \"mand %1, %2, %0\";
+  case FRV_BUILTIN_MOR:  return \"mor %1, %2, %0\";
+  case FRV_BUILTIN_MXOR: return \"mxor %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mlogic\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+(define_insn "*cond_exec_mlogic"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+         (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "fpr_operand" "f")
+		     (match_operand:SI 5 "const_int_operand" "n")]
+		    UNSPEC_MLOGIC)))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MAND: return \"cmand %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MOR:  return \"cmor %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MXOR: return \"cmxor %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mlogic\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+;; Logical not: type "mlogic"
+
+(define_insn "mnot"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MNOT))]
+  "TARGET_MEDIA"
+  "mnot %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+(define_insn "*cond_exec_mnot"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+         (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")] UNSPEC_MNOT)))]
+  "TARGET_MEDIA"
+  "cmnot %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mlogic")])
+
+;; Dual average (halfword): type "maveh"
+
+(define_insn "maveh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")]
+		   UNSPEC_MAVEH))]
+  "TARGET_MEDIA"
+  "maveh %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "maveh")])
+
+;; Dual saturation (halfword): type "msath"
+
+(define_expand "msaths"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MSATH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSATHS);")
+
+(define_expand "msathu"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MSATH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSATHU);")
+
+(define_insn "*msath"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MSATH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MSATHS:  return \"msaths %1, %2, %0\";
+  case FRV_BUILTIN_MSATHU:  return \"msathu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, msath\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "msath")])
+
+;; Dual addition/subtraction with saturation (halfword): type "maddh"
+
+(define_expand "maddhss"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+		   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MADDHSS);")
+
+(define_expand "maddhus"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MADDHUS);")
+
+(define_expand "msubhss"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSUBHSS);")
+
+(define_expand "msubhus"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_dup 3)]
+                   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSUBHUS);")
+
+(define_insn "*maddh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MADDH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MADDHSS: return \"maddhss %1, %2, %0\";
+  case FRV_BUILTIN_MADDHUS: return \"maddhus %1, %2, %0\";
+  case FRV_BUILTIN_MSUBHSS: return \"msubhss %1, %2, %0\";
+  case FRV_BUILTIN_MSUBHUS: return \"msubhus %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, maddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddh")])
+
+(define_insn "*cond_exec_maddh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "fpr_operand" "f")
+		     (match_operand:SI 5 "const_int_operand" "n")]
+		    UNSPEC_MADDH)))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MADDHSS: return \"cmaddhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MADDHUS: return \"cmaddhus %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MSUBHSS: return \"cmsubhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MSUBHUS: return \"cmsubhus %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_maddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddh")])
+
+;; Quad addition/subtraction with saturation (halfword): type "mqaddh"
+
+(define_expand "mqaddhss"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQADDHSS);")
+
+(define_expand "mqaddhus"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQADDHUS);")
+
+(define_expand "mqsubhss"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQSUBHSS);")
+
+(define_expand "mqsubhus"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_dup 3)]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MQSUBHUS);")
+
+(define_insn "*mqaddh"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+                    (match_operand:DI 2 "even_fpr_operand" "h")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MQADDH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQADDHSS: return \"mqaddhss %1, %2, %0\";
+  case FRV_BUILTIN_MQADDHUS: return \"mqaddhus %1, %2, %0\";
+  case FRV_BUILTIN_MQSUBHSS: return \"mqsubhss %1, %2, %0\";
+  case FRV_BUILTIN_MQSUBHUS: return \"mqsubhus %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqaddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqaddh")])
+
+(define_insn "*cond_exec_mqaddh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_fpr_operand" "=h")
+         (unspec:DI [(match_operand:DI 3 "even_fpr_operand" "h")
+                     (match_operand:DI 4 "even_fpr_operand" "h")
+		     (match_operand:SI 5 "const_int_operand" "n")]
+		    UNSPEC_MQADDH)))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQADDHSS: return \"cmqaddhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQADDHUS: return \"cmqaddhus %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQSUBHSS: return \"cmqsubhss %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQSUBHUS: return \"cmqsubhus %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mqaddh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqaddh")])
+
+;; Pack halfword: type "mpackh"
+
+(define_insn "mpackh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:HI 1 "fpr_operand" "f")
+                    (match_operand:HI 2 "fpr_operand" "f")]
+		   UNSPEC_MPACKH))]
+  "TARGET_MEDIA"
+  "mpackh %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mpackh")])
+
+;; Unpack halfword: type "mpackh"
+
+(define_insn "munpackh"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")]
+		   UNSPEC_MUNPACKH))]
+  "TARGET_MEDIA"
+  "munpackh %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "munpackh")])
+
+;; Dual pack halfword: type "mdpackh"
+
+(define_insn "mdpackh"
+    [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+	  (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")]
+		     UNSPEC_MDPACKH))]
+  "TARGET_MEDIA"
+  "mdpackh %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdpackh")])
+
+;; Byte-halfword conversion: type "mbhconv"
+
+(define_insn "mbtoh"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")]
+		   UNSPEC_MBTOH))]
+  "TARGET_MEDIA"
+  "mbtoh %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+(define_insn "*cond_exec_mbtoh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_fpr_operand" "=h")
+	 (unspec:DI [(match_operand:SI 3 "fpr_operand" "f")]
+		    UNSPEC_MBTOH)))]
+  "TARGET_MEDIA"
+  "cmbtoh %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+(define_insn "mhtob"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "even_fpr_operand" "h")]
+		   UNSPEC_MHTOB))]
+  "TARGET_MEDIA"
+  "mhtob %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+(define_insn "*cond_exec_mhtob"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (unspec:SI [(match_operand:DI 3 "even_fpr_operand" "h")]
+		    UNSPEC_MHTOB)))]
+  "TARGET_MEDIA"
+  "cmhtob %3, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconv")])
+
+;; Rotate: type "mrot"
+
+(define_expand "mrotli"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint5_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MROT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MROTLI);")
+
+(define_expand "mrotri"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint5_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MROT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MROTRI);")
+
+(define_insn "*mrot"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint5_operand" "I")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MROT))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MROTLI: return \"mrotli %1, %2, %0\";
+  case FRV_BUILTIN_MROTRI: return \"mrotri %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mrot\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mrot")])
+
+;; Dual shift halfword: type "msh"
+
+(define_expand "msllhi"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint4_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSLLHI);")
+
+(define_expand "msrlhi"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint4_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSRLHI);")
+
+(define_expand "msrahi"
+  [(set (match_operand:SI 0 "fpr_operand" "")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "")
+                    (match_operand:SI 2 "uint4_operand" "")
+		    (match_dup 3)]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "operands[3] = GEN_INT (FRV_BUILTIN_MSRAHI);")
+
+(define_insn "*mshift"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint4_operand" "I")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MSHIFT))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MSLLHI: return \"msllhi %1, %2, %0\";
+  case FRV_BUILTIN_MSRLHI: return \"msrlhi %1, %2, %0\";
+  case FRV_BUILTIN_MSRAHI: return \"msrahi %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mshift\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mshift")])
+
+;; Expand halfword to word: type "mexpdhw"
+
+(define_insn "mexpdhw"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint1_operand" "I")]
+		   UNSPEC_MEXPDHW))]
+  "TARGET_MEDIA"
+  "mexpdhw %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhw")])
+
+(define_insn "*cond_exec_mexpdhw"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:SI 2 "fpr_operand" "=f")
+	 (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "uint1_operand" "I")]
+		    UNSPEC_MEXPDHW)))]
+  "TARGET_MEDIA"
+  "cmexpdhw %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhw")])
+
+;; Expand halfword to double: type "mexpdhd"
+
+(define_insn "mexpdhd"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "uint1_operand" "I")]
+		   UNSPEC_MEXPDHD))]
+  "TARGET_MEDIA"
+  "mexpdhd %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhd")])
+
+(define_insn "*cond_exec_mexpdhd"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (set (match_operand:DI 2 "even_fpr_operand" "=h")
+	 (unspec:DI [(match_operand:SI 3 "fpr_operand" "f")
+		     (match_operand:SI 4 "uint1_operand" "I")]
+		    UNSPEC_MEXPDHD)))]
+  "TARGET_MEDIA"
+  "cmexpdhd %3, %4, %2, %1, %e0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mexpdhd")])
+
+;; FR cut: type "mwcut"
+
+(define_insn "mwcut"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")]
+		   UNSPEC_MWCUT))]
+  "TARGET_MEDIA"
+  "mwcut%i2 %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mwcut")])
+
+;; Dual multiplication (halfword): type "mmulh"
+
+(define_expand "mmulhs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULHS);")
+
+(define_expand "mmulhu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULHU);")
+
+(define_insn "*mmulh"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MMULH))
+   (set (match_operand:HI 4 "accg_operand" "=B")
+	(unspec:HI [(const_int 0)] UNSPEC_MMULH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MMULHS:  return \"mmulhs %1, %2, %0\";
+  case FRV_BUILTIN_MMULHU:  return \"mmulhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmulh")])
+
+(define_insn "*cond_exec_mmulh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:DI 2 "even_acc_operand" "=b")
+		    (unspec:DI [(match_operand:SI 3 "fpr_operand" "f")
+				(match_operand:SI 4 "fpr_operand" "f")
+				(match_operand:SI 5 "const_int_operand" "n")]
+			       UNSPEC_MMULH))
+	       (set (match_operand:HI 6 "accg_operand" "=B")
+		    (unspec:HI [(const_int 0)] UNSPEC_MMULH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MMULHS:  return \"cmmulhs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MMULHU:  return \"cmmulhu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmulh")])
+
+;; Dual cross multiplication (halfword): type "mmulxh"
+
+(define_expand "mmulxhs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULXH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULXHS);")
+
+(define_expand "mmulxhu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MMULXH))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMULXHU);")
+
+(define_insn "*mmulxh"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+        (unspec:DI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MMULXH))
+   (set (match_operand:HI 4 "accg_operand" "=B")
+	(unspec:HI [(const_int 0)] UNSPEC_MMULXH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MMULXHS: return \"mmulxhs %1, %2, %0\";
+  case FRV_BUILTIN_MMULXHU: return \"mmulxhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mmulxh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmulxh")])
+
+;; Dual product-sum (halfword): type "mmach"
+
+(define_expand "mmachs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMACH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMACHS);")
+
+(define_expand "mmachu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMACH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMACHU);")
+
+(define_insn "*mmach"
+  [(set (match_operand:DI 0 "even_acc_operand" "+b")
+        (unspec:DI [(match_dup 0)
+		    (match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:HI 3 "accg_operand" "+B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MMACH))
+   (set (match_dup 3) (unspec:HI [(const_int 0)] UNSPEC_MMACH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MMACHS: return \"mmachs %1, %2, %0\";
+  case FRV_BUILTIN_MMACHU: return \"mmachu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmach")])
+
+(define_insn "*cond_exec_mmach"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:DI 2 "even_acc_operand" "+b")
+		    (unspec:DI [(match_dup 2)
+				(match_operand:SI 3 "fpr_operand" "f")
+				(match_operand:SI 4 "fpr_operand" "f")
+				(match_operand:HI 5 "accg_operand" "+B")
+				(match_operand:SI 6 "const_int_operand" "n")]
+			       UNSPEC_MMACH))
+	       (set (match_dup 5)
+		    (unspec:HI [(const_int 0)] UNSPEC_MMACH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[6]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MMACHS: return \"cmmachs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MMACHU: return \"cmmachu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmach")])
+
+;; Dual product-difference: type "mmrdh"
+
+(define_expand "mmrdhs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMRDH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMRDH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMRDHS);")
+
+(define_expand "mmrdhu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "+b")
+		   (unspec:DI [(match_dup 0)
+			       (match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:HI 3 "accg_operand" "+B")
+			       (match_dup 4)]
+			      UNSPEC_MMRDH))
+	      (set (match_dup 3)
+		   (unspec:HI [(const_int 0)] UNSPEC_MMRDH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MMRDHU);")
+
+(define_insn "*mmrdh"
+  [(set (match_operand:DI 0 "even_acc_operand" "+b")
+        (unspec:DI [(match_dup 0)
+		    (match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")
+		    (match_operand:HI 3 "accg_operand" "+B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MMRDH))
+   (set (match_dup 3)
+	(unspec:HI [(const_int 0)] UNSPEC_MMRDH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MMRDHS: return \"mmrdhs %1, %2, %0\";
+  case FRV_BUILTIN_MMRDHU: return \"mmrdhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mrdh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mmrdh")])
+
+;; Quad multiply (halfword): type "mqmulh"
+
+(define_expand "mqmulhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULHS);")
+
+(define_expand "mqmulhu"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULHU);")
+
+(define_insn "*mqmulh"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_MQMULH))
+   (set (match_operand:V4QI 4 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQMULHS:  return \"mqmulhs %1, %2, %0\";
+  case FRV_BUILTIN_MQMULHU:  return \"mqmulhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmulh")])
+
+(define_insn "*cond_exec_mqmulh"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:V4SI 2 "quad_acc_operand" "=A")
+		    (unspec:V4SI [(match_operand:DI 3 "even_fpr_operand" "h")
+				  (match_operand:DI 4 "even_fpr_operand" "h")
+				  (match_operand:SI 5 "const_int_operand" "n")]
+				 UNSPEC_MQMULH))
+	       (set (match_operand:V4QI 6 "accg_operand" "=B")
+		    (unspec:V4QI [(const_int 0)] UNSPEC_MQMULH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQMULHS:  return \"cmqmulhs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQMULHU:  return \"cmqmulhu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mqmulh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmulh")])
+
+;; Quad cross multiply (halfword): type "mqmulxh"
+
+(define_expand "mqmulxhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULXH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULXHS);")
+
+(define_expand "mqmulxhu"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+		   (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_dup 4)]
+				UNSPEC_MQMULXH))
+	      (set (match_operand:V4QI 3 "accg_operand" "=B")
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMULXH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMULXHU);")
+
+(define_insn "*mqmulxh"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_MQMULXH))
+   (set (match_operand:V4QI 4 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMULXH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MQMULXHS: return \"mqmulxhs %1, %2, %0\";
+  case FRV_BUILTIN_MQMULXHU: return \"mqmulxhu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmulxh\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmulxh")])
+
+;; Quad product-sum (halfword): type "mqmach"
+
+(define_expand "mqmachs"
+  [(parallel [(set (match_operand:V4SI 0 "even_acc_operand" "+A")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_operand:V4QI 3 "accg_operand" "+B")
+				 (match_dup 4)]
+				UNSPEC_MQMACH))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMACHS);")
+
+(define_expand "mqmachu"
+  [(parallel [(set (match_operand:V4SI 0 "even_acc_operand" "+A")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "h")
+				 (match_operand:DI 2 "even_fpr_operand" "h")
+				 (match_operand:V4QI 3 "accg_operand" "+B")
+				 (match_dup 4)]
+				UNSPEC_MQMACH))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMACHU);")
+
+(define_insn "*mqmach"
+  [(set (match_operand:V4SI 0 "even_acc_operand" "+A")
+        (unspec:V4SI [(match_dup 0)
+		      (match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:V4QI 3 "accg_operand" "+B")
+		      (match_operand:SI 4 "const_int_operand" "n")]
+		     UNSPEC_MQMACH))
+   (set (match_dup 3)
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MQMACHS: return \"mqmachs %1, %2, %0\";
+  case FRV_BUILTIN_MQMACHU: return \"mqmachu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmach")])
+
+(define_insn "*cond_exec_mqmach"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:V4SI 2 "even_acc_operand" "+A")
+		    (unspec:V4SI [(match_dup 2)
+				  (match_operand:DI 3 "even_fpr_operand" "h")
+				  (match_operand:DI 4 "even_fpr_operand" "h")
+				  (match_operand:V4QI 5 "accg_operand" "+B")
+				  (match_operand:SI 6 "const_int_operand" "n")]
+				 UNSPEC_MQMACH))
+	       (set (match_dup 5)
+		    (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[6]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MQMACHS: return \"cmqmachs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MQMACHU: return \"cmqmachu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mqmach\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmach")])
+
+;; Dual complex number product-sum (halfword)
+
+(define_expand "mcpxrs"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXRS);")
+
+(define_expand "mcpxru"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXRU);")
+
+(define_expand "mcpxis"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXIS);")
+
+(define_expand "mcpxiu"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 3 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCPXIU);")
+
+(define_insn "*mcpx"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+			       (match_operand:SI 2 "fpr_operand" "f")
+			       (match_operand:SI 3 "const_int_operand" "n")]
+			      UNSPEC_MCPX))
+	      (set (match_operand:QI 4 "accg_operand" "=B")
+		   (unspec:QI [(const_int 0)] UNSPEC_MCPX))])]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MCPXRS: return \"mcpxrs %1, %2, %0\";
+  case FRV_BUILTIN_MCPXRU: return \"mcpxru %1, %2, %0\";
+  case FRV_BUILTIN_MCPXIS: return \"mcpxis %1, %2, %0\";
+  case FRV_BUILTIN_MCPXIU: return \"mcpxiu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mcpx\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpx")])
+
+(define_insn "*cond_exec_mcpx"
+  [(cond_exec
+    (match_operator 0 "ccr_eqne_operator"
+		    [(match_operand 1 "cr_operand" "C")
+		     (const_int 0)])
+    (parallel [(set (match_operand:SI 2 "acc_operand" "=a")
+		    (unspec:SI [(match_operand:SI 3 "fpr_operand" "f")
+				(match_operand:SI 4 "fpr_operand" "f")
+				(match_operand:SI 5 "const_int_operand" "n")]
+			       UNSPEC_MCPX))
+	       (set (match_operand:QI 6 "accg_operand" "=B")
+		    (unspec:QI [(const_int 0)] UNSPEC_MCPX))]))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[5]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MCPXRS: return \"cmcpxrs %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MCPXRU: return \"cmcpxru %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MCPXIS: return \"cmcpxis %3, %4, %2, %1, %e0\";
+  case FRV_BUILTIN_MCPXIU: return \"cmcpxiu %3, %4, %2, %1, %e0\";
+  }
+
+  fatal_insn (\"Bad media insn, cond_exec_mcpx\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpx")])
+
+;; Quad complex number product-sum (halfword): type "mqcpx"
+
+(define_expand "mqcpxrs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXRS);")
+
+(define_expand "mqcpxru"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXRU);")
+
+(define_expand "mqcpxis"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXIS);")
+
+(define_expand "mqcpxiu"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "=b")
+		   (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+			       (match_operand:DI 2 "fpr_operand" "f")
+			       (match_dup 4)]
+			      UNSPEC_MQCPX))
+	      (set (match_operand:HI 3 "accg_operand" "=B")
+		   (unspec:HI [(const_int 0)] UNSPEC_MQCPX))])]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQCPXIU);")
+
+(define_insn "*mqcpx"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+        (unspec:DI [(match_operand:DI 1 "fpr_operand" "f")
+                    (match_operand:DI 2 "fpr_operand" "f")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_MQCPX))
+   (set (match_operand:HI 4 "accg_operand" "=B")
+	(unspec:HI [(const_int 0)] UNSPEC_MQCPX))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[3]))
+  {
+  default:		    break;
+  case FRV_BUILTIN_MQCPXRS: return \"mqcpxrs %1, %2, %0\";
+  case FRV_BUILTIN_MQCPXRU: return \"mqcpxru %1, %2, %0\";
+  case FRV_BUILTIN_MQCPXIS: return \"mqcpxis %1, %2, %0\";
+  case FRV_BUILTIN_MQCPXIU: return \"mqcpxiu %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqcpx\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqcpx")])
+
+;; Cut: type "mcut"
+
+(define_expand "mcut"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "acc_operand" "a")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")
+		    (match_operand:QI 3 "accg_operand" "B")
+		    (match_dup 4)]
+		   UNSPEC_MCUT))]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCUT);")
+
+(define_expand "mcutss"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "acc_operand" "a")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")
+		    (match_operand:QI 3 "accg_operand" "B")
+		    (match_dup 4)]
+		   UNSPEC_MCUT))]
+  "TARGET_MEDIA"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MCUTSS);")
+
+(define_insn "*mcut"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "acc_operand" "a")
+                    (match_operand:SI 2 "fpr_or_int6_operand" "fI")
+		    (match_operand:QI 3 "accg_operand" "B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MCUT))]
+  "TARGET_MEDIA"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		   break;
+  case FRV_BUILTIN_MCUT:   return \"mcut%i2 %1, %2, %0\";
+  case FRV_BUILTIN_MCUTSS: return \"mcutss%i2 %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mcut\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcut")])
+
+;; Accumulator read: type "mrdacc"
+
+(define_insn "mrdacc"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "acc_operand" "a")] UNSPEC_MRDACC))]
+  "TARGET_MEDIA"
+  "mrdacc %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mrdacc")])
+
+(define_insn "mrdaccg"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:QI 1 "accg_operand" "B")] UNSPEC_MRDACCG))]
+  "TARGET_MEDIA"
+  "mrdaccg %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mrdacc")])
+
+;; Accumulator write: type "mwtacc"
+
+(define_insn "mwtacc"
+  [(set (match_operand:SI 0 "acc_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MWTACC))]
+  "TARGET_MEDIA"
+  "mwtacc %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mwtacc")])
+
+(define_insn "mwtaccg"
+  [(set (match_operand:QI 0 "accg_operand" "=B")
+	(unspec:QI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MWTACCG))]
+  "TARGET_MEDIA"
+  "mwtaccg %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mwtacc")])
+
+;; Trap: This one executes on the control unit, not the media units.
+
+(define_insn "mtrap"
+  [(unspec_volatile [(const_int 0)] UNSPEC_MTRAP)]
+  "TARGET_MEDIA"
+  "mtrap"
+  [(set_attr "length" "4")
+   (set_attr "type" "trap")])
+
+;; Clear single accumulator: type "mclracc"
+
+(define_insn "mclracc_internal"
+  [(set (match_operand:SI 0 "acc_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_MCLRACC))
+   (set (match_operand:QI 1 "accg_operand" "=B")
+	(unspec:QI [(const_int 0)] UNSPEC_MCLRACC))]
+  "TARGET_MEDIA"
+  "mclracc %0,#0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mclracc")])
+
+(define_expand "mclracc"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "=a")
+		   (unspec:SI [(const_int 0)] UNSPEC_MCLRACC))
+	      (set (match_dup 1)
+		   (unspec:QI [(const_int 0)] UNSPEC_MCLRACC))])]
+  "TARGET_MEDIA"
+  "
+{
+  if (GET_CODE (operands[0]) != REG || !ACC_P (REGNO (operands[0])))
+    FAIL;
+
+  operands[1] = frv_matching_accg_for_acc (operands[0]);
+}")
+
+;; Clear all accumulators: type "mclracca"
+
+(define_insn "mclracca8_internal"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=b")
+	(unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4SI 1 "quad_acc_operand" "=b")
+	(unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4QI 2 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4QI 3 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))]
+  "TARGET_MEDIA && TARGET_ACC_8"
+  "mclracc acc0,#1"
+  [(set_attr "length" "4")
+   (set_attr "type" "mclracca")])
+
+(define_insn "mclracca4_internal"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=b")
+	(unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+   (set (match_operand:V4QI 1 "accg_operand" "=B")
+	(unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))]
+  "TARGET_MEDIA && TARGET_ACC_4"
+  "mclracc acc0,#1"
+  [(set_attr "length" "4")
+   (set_attr "type" "mclracca")])
+
+(define_expand "mclracca8"
+  [(parallel [(set (match_dup 0) (unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 1) (unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 2) (unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 3) (unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))])]
+  "TARGET_MEDIA && TARGET_ACC_8"
+  "
+{
+  operands[0] = gen_rtx_REG (V4SImode, ACC_FIRST);
+  operands[1] = gen_rtx_REG (V4SImode, ACC_FIRST + (~3 & ACC_MASK));
+  operands[2] = gen_rtx_REG (V4QImode, ACCG_FIRST);
+  operands[3] = gen_rtx_REG (V4QImode, ACCG_FIRST + (~3 & ACC_MASK));
+}")
+
+(define_expand "mclracca4"
+  [(parallel [(set (match_dup 0) (unspec:V4SI [(const_int 0)] UNSPEC_MCLRACCA))
+	      (set (match_dup 1) (unspec:V4QI [(const_int 0)] UNSPEC_MCLRACCA))])]
+  "TARGET_MEDIA && TARGET_ACC_4"
+  "
+{
+  operands[0] = gen_rtx_REG (V4SImode, ACC_FIRST);
+  operands[1] = gen_rtx_REG (V4QImode, ACCG_FIRST);
+}")
+
+(define_insn "mcop1"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")] UNSPEC_MCOP1))]
+  "TARGET_MEDIA_REV1"
+  "mcop1 %1, %2, %0"
+  [(set_attr "length" "4")
+;; What is the class of the insn ???
+   (set_attr "type" "multi")])
+
+(define_insn "mcop2"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")
+                    (match_operand:SI 2 "fpr_operand" "f")] UNSPEC_MCOP2))]
+  "TARGET_MEDIA_REV1"
+  "mcop2 %1, %2, %0"
+  [(set_attr "length" "4")
+;; What is the class of the insn ???
+   (set_attr "type" "multi")])
+
+(define_insn "*mdunpackh_internal"
+  [(set (match_operand:V4SI 0 "quad_fpr_operand" "=x")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")]
+		     UNSPEC_MDUNPACKH_INTERNAL))]
+  "TARGET_MEDIA_REV1"
+  "mdunpackh %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdunpackh")])
+
+(define_insn_and_split "mdunpackh"
+  [(set (match_operand:V4SI 0 "memory_operand" "=o")
+        (unspec:V4SI [(match_operand:DI 1 "even_fpr_operand" "h")]
+		     UNSPEC_MDUNPACKH))
+   (clobber (match_scratch:V4SI 2 "=x"))]
+  "TARGET_MEDIA_REV1"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_dup 1)] UNSPEC_MDUNPACKH_INTERNAL))
+   (set (match_dup 3)
+	(match_dup 4))
+   (set (match_dup 5)
+	(match_dup 6))]
+  "
+{
+  operands[3] = change_address (operands[0], DImode, NULL_RTX);
+  operands[4] = gen_rtx_REG (DImode, REGNO (operands[2]));
+  operands[5] = frv_index_memory (operands[0], DImode, 1);
+  operands[6] = gen_rtx_REG (DImode, REGNO (operands[2])+2);
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "multi")])
+
+(define_insn "*mbtohe_internal"
+  [(set (match_operand:V4SI 0 "quad_fpr_operand" "=x")
+        (unspec:V4SI [(match_operand:SI 1 "fpr_operand" "f")]
+		     UNSPEC_MBTOHE_INTERNAL))]
+  "TARGET_MEDIA_REV1"
+  "mbtohe %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mbhconve")])
+
+(define_insn_and_split "mbtohe"
+  [(set (match_operand:V4SI 0 "memory_operand" "=o")
+        (unspec:V4SI [(match_operand:SI 1 "fpr_operand" "f")]
+		     UNSPEC_MBTOHE))
+   (clobber (match_scratch:V4SI 2 "=x"))]
+  "TARGET_MEDIA_REV1"
+  "#"
+  "reload_completed"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_dup 1)] UNSPEC_MBTOHE_INTERNAL))
+   (set (match_dup 3)
+	(match_dup 4))
+   (set (match_dup 5)
+	(match_dup 6))]
+  "
+{
+  operands[3] = change_address (operands[0], DImode, NULL_RTX);
+  operands[4] = gen_rtx_REG (DImode, REGNO (operands[2]));
+  operands[5] = frv_index_memory (operands[0], DImode, 1);
+  operands[6] = gen_rtx_REG (DImode, REGNO (operands[2])+2);
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "multi")])
+
+;; Quad product-sum (halfword) instructions only found on the FR400.
+;; type "mqmach"
+
+(define_expand "mqxmachs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "")
+	           (unspec:V4SI [(match_dup 0)
+		   	         (match_operand:DI 1 "even_fpr_operand" "")
+			         (match_operand:DI 2 "even_fpr_operand" "")
+				 (match_operand:V4QI 3 "accg_operand" "")
+				 (match_dup 4)]
+				UNSPEC_MQMACH2))
+		(set (match_dup 3)
+		     (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQXMACHS);")
+
+(define_expand "mqxmacxhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "")
+				 (match_operand:DI 2 "even_fpr_operand" "")
+				 (match_operand:V4QI 3 "accg_operand" "")
+				 (match_dup 4)]
+				UNSPEC_MQMACH2))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQXMACXHS);")
+
+(define_expand "mqmacxhs"
+  [(parallel [(set (match_operand:V4SI 0 "quad_acc_operand" "")
+		   (unspec:V4SI [(match_dup 0)
+				 (match_operand:DI 1 "even_fpr_operand" "")
+				 (match_operand:DI 2 "even_fpr_operand" "")
+				 (match_operand:V4QI 3 "accg_operand" "")
+				 (match_dup 4)]
+				UNSPEC_MQMACH2))
+	      (set (match_dup 3)
+		   (unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MQMACXHS);")
+
+(define_insn "*mqmach2"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "+A")
+        (unspec:V4SI [(match_dup 0)
+		      (match_operand:DI 1 "even_fpr_operand" "h")
+		      (match_operand:DI 2 "even_fpr_operand" "h")
+		      (match_operand:V4QI 3 "accg_operand" "+B")
+		      (match_operand:SI 4 "const_int_operand" "n")]
+		     UNSPEC_MQMACH2))
+   (set (match_dup 3)
+	(unspec:V4QI [(const_int 0)] UNSPEC_MQMACH2))]
+  "TARGET_MEDIA_REV2"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		      break;
+  case FRV_BUILTIN_MQXMACHS:  return \"mqxmachs %1, %2, %0\";
+  case FRV_BUILTIN_MQXMACXHS: return \"mqxmacxhs %1, %2, %0\";
+  case FRV_BUILTIN_MQMACXHS:  return \"mqmacxhs %1, %2, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mqmach2\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqmach")])
+
+;; Accumulator addition/subtraction: type "maddacc"
+
+(define_expand "maddaccs"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "")
+		   (unspec:SI [(match_operand:DI 1 "even_acc_operand" "")]
+			      UNSPEC_MADDACC))
+	      (set (match_operand:QI 2 "accg_operand" "")
+		   (unspec:QI [(match_operand:HI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MADDACCS);")
+
+(define_expand "msubaccs"
+  [(parallel [(set (match_operand:SI 0 "acc_operand" "")
+		   (unspec:SI [(match_operand:DI 1 "even_acc_operand" "")]
+			      UNSPEC_MADDACC))
+	      (set (match_operand:QI 2 "accg_operand" "")
+		   (unspec:QI [(match_operand:HI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MSUBACCS);")
+
+(define_insn "masaccs"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+	(unspec:DI [(match_operand:DI 1 "even_acc_operand" "b")]
+		   UNSPEC_MASACCS))
+   (set (match_operand:HI 2 "accg_operand" "=B")
+	(unspec:HI [(match_operand:HI 3 "accg_operand" "B")]
+		   UNSPEC_MASACCS))]
+  "TARGET_MEDIA_REV2"
+  "masaccs %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddacc")])
+
+(define_insn "*maddacc"
+  [(set (match_operand:SI 0 "acc_operand" "=a")
+	(unspec:SI [(match_operand:DI 1 "even_acc_operand" "b")]
+		   UNSPEC_MADDACC))
+   (set (match_operand:QI 2 "accg_operand" "=B")
+	(unspec:QI [(match_operand:HI 3 "accg_operand" "B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MADDACC))]
+  "TARGET_MEDIA_REV2"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		     break;
+  case FRV_BUILTIN_MADDACCS: return \"maddaccs %1, %0\";
+  case FRV_BUILTIN_MSUBACCS: return \"msubaccs %1, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, maddacc\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "maddacc")])
+
+;; Dual accumulator addition/subtraction: type "mdaddacc"
+
+(define_expand "mdaddaccs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "")
+		   (unspec:DI [(match_operand:V4SI 1 "quad_acc_operand" "")]
+			      UNSPEC_MDADDACC))
+	      (set (match_operand:HI 2 "accg_operand" "")
+		   (unspec:HI [(match_operand:V4QI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MDADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MDADDACCS);")
+
+(define_expand "mdsubaccs"
+  [(parallel [(set (match_operand:DI 0 "even_acc_operand" "")
+		   (unspec:DI [(match_operand:V4SI 1 "quad_acc_operand" "")]
+			      UNSPEC_MDADDACC))
+	      (set (match_operand:HI 2 "accg_operand" "")
+	      	   (unspec:HI [(match_operand:V4QI 3 "accg_operand" "")
+			       (match_dup 4)]
+			      UNSPEC_MDADDACC))])]
+  "TARGET_MEDIA_REV2"
+  "operands[4] = GEN_INT (FRV_BUILTIN_MDSUBACCS);")
+
+(define_insn "mdasaccs"
+  [(set (match_operand:V4SI 0 "quad_acc_operand" "=A")
+	(unspec:V4SI [(match_operand:V4SI 1 "quad_acc_operand" "A")]
+		     UNSPEC_MDASACCS))
+   (set (match_operand:V4QI 2 "accg_operand" "=B")
+	(unspec:V4QI [(match_operand:V4QI 3 "accg_operand" "B")]
+		     UNSPEC_MDASACCS))]
+  "TARGET_MEDIA_REV2"
+  "mdasaccs %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdaddacc")])
+
+(define_insn "*mdaddacc"
+  [(set (match_operand:DI 0 "even_acc_operand" "=b")
+	(unspec:DI [(match_operand:V4SI 1 "quad_acc_operand" "A")]
+		   UNSPEC_MDADDACC))
+   (set (match_operand:HI 2 "accg_operand" "=B")
+	(unspec:HI [(match_operand:V4QI 3 "accg_operand" "B")
+		    (match_operand:SI 4 "const_int_operand" "n")]
+		   UNSPEC_MDADDACC))]
+  "TARGET_MEDIA_REV2"
+  "*
+{
+  switch (INTVAL (operands[4]))
+  {
+  default:		      break;
+  case FRV_BUILTIN_MDADDACCS: return \"mdaddaccs %1, %0\";
+  case FRV_BUILTIN_MDSUBACCS: return \"mdsubaccs %1, %0\";
+  }
+
+  fatal_insn (\"Bad media insn, mdaddacc\", insn);
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdaddacc")])
+
+;; Dual absolute (halfword): type "mabsh"
+
+(define_insn "mabshs"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:SI 1 "fpr_operand" "f")] UNSPEC_MABSHS))]
+  "TARGET_MEDIA_REV2"
+  "mabshs %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mabsh")])
+
+;; Dual rotate: type "mdrot"
+
+(define_insn "mdrotli"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:SI 2 "uint5_operand" "I")]
+		   UNSPEC_MDROTLI))]
+  "TARGET_MEDIA_REV2"
+  "mdrotli %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdrot")])
+
+;; Dual coupling (concatenation): type "mcpl"
+
+(define_insn "mcplhi"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "fpr_operand" "h")
+		    (match_operand:SI 2 "uint4_operand" "I")]
+		   UNSPEC_MCPLHI))]
+  "TARGET_MEDIA_REV2"
+  "mcplhi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpl")])
+
+(define_insn "mcpli"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+        (unspec:SI [(match_operand:DI 1 "fpr_operand" "h")
+		    (match_operand:SI 2 "uint5_operand" "I")]
+		   UNSPEC_MCPLI))]
+  "TARGET_MEDIA_REV2"
+  "mcpli %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mcpl")])
+
+;; Dual cut: type "mdcut"
+
+(define_insn "mdcutssi"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_acc_operand" "b")
+		    (match_operand:SI 2 "int6_operand" "I")
+		    (match_operand:HI 3 "accg_operand" "B")]
+		   UNSPEC_MDCUTSSI))]
+  "TARGET_MEDIA_REV2"
+  "mdcutssi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mdcut")])
+
+;; Quad saturate (halfword): type "mqsath"
+
+(define_insn "mqsaths"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:DI 2 "even_fpr_operand" "h")]
+		   UNSPEC_MQSATHS))]
+  "TARGET_MEDIA_REV2"
+  "mqsaths %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqsath")])
+
+;; Quad limit instructions: type "mqlimh"
+
+(define_insn "mqlclrhs"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:DI 2 "even_fpr_operand" "h")]
+		   UNSPEC_MQLCLRHS))]
+  "TARGET_MEDIA_FR450"
+  "mqlclrhs %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqlimh")])
+
+(define_insn "mqlmths"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:DI 2 "even_fpr_operand" "h")]
+		   UNSPEC_MQLMTHS))]
+  "TARGET_MEDIA_FR450"
+  "mqlmths %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqlimh")])
+
+(define_insn "mqsllhi"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:SI 2 "int6_operand" "I")]
+		   UNSPEC_MQSLLHI))]
+  "TARGET_MEDIA_FR450"
+  "mqsllhi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqshift")])
+
+(define_insn "mqsrahi"
+  [(set (match_operand:DI 0 "even_fpr_operand" "=h")
+        (unspec:DI [(match_operand:DI 1 "even_fpr_operand" "h")
+		    (match_operand:SI 2 "int6_operand" "I")]
+		   UNSPEC_MQSRAHI))]
+  "TARGET_MEDIA_FR450"
+  "mqsrahi %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mqshift")])
+
+;; Set hi/lo instructions: type "mset"
+
+(define_insn "mhsetlos"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int12_operand" "NOP")]
+		   UNSPEC_MHSETLOS))]
+  "TARGET_MEDIA_REV2"
+  "mhsetlos %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhsetloh"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int5_operand" "I")]
+		   UNSPEC_MHSETLOH))]
+  "TARGET_MEDIA_REV2"
+  "mhsetloh %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhsethis"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int12_operand" "NOP")]
+		   UNSPEC_MHSETHIS))]
+  "TARGET_MEDIA_REV2"
+  "mhsethis %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhsethih"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int5_operand" "I")]
+		   UNSPEC_MHSETHIH))]
+  "TARGET_MEDIA_REV2"
+  "mhsethih %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhdsets"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "int12_operand" "NOP")]
+		   UNSPEC_MHDSETS))]
+  "TARGET_MEDIA_REV2"
+  "mhdsets %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+(define_insn "mhdseth"
+  [(set (match_operand:SI 0 "fpr_operand" "=f")
+	(unspec:SI [(match_operand:SI 1 "fpr_operand" "0")
+		    (match_operand:SI 2 "int5_operand" "I")]
+		   UNSPEC_MHDSETH))]
+  "TARGET_MEDIA_REV2"
+  "mhdseth %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mset")])
+
+;;-----------------------------------------------------------------------------
+
+(define_expand "symGOT2reg"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")]
+  ""
+  "
+{
+  rtx insn;
+
+  insn = emit_insn (gen_symGOT2reg_i (operands[0], operands[1], operands[2], operands[3]));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symGOT2reg_i"
+  [(set (match_operand:SI 0 "" "")
+	(mem:SI (plus:SI (match_operand:SI 2 "" "")
+			 (const:SI (unspec:SI [(match_operand:SI 1 "" "")
+					       (match_operand:SI 3 "" "")]
+					      UNSPEC_GOT)))))]
+  ""
+  "")
+
+(define_expand "symGOT2reg_hilo"
+  [(set (match_dup 6)
+	(high:SI (const:SI (unspec:SI [(match_operand:SI 1 "" "")
+				       (match_dup 4)] UNSPEC_GOT))))
+   (set (match_dup 5)
+	(lo_sum:SI (match_dup 6)
+		   (const:SI (unspec:SI [(match_dup 1)
+					 (match_operand:SI 3 "" "")]
+					UNSPEC_GOT))))
+   (set (match_operand:SI 0 "" "")
+	(mem:SI (plus:SI (match_dup 5)
+			 (match_operand:SI 2 "" ""))))
+   ]
+  ""
+  "
+{
+  if (!can_create_pseudo_p ())
+    operands[6] = operands[5] = operands[0];
+  else
+    {
+      operands[6] = gen_reg_rtx (SImode);
+      operands[5] = gen_reg_rtx (SImode);
+    }
+
+  operands[4] = GEN_INT (INTVAL (operands[3]) + 1);
+  operands[3] = GEN_INT (INTVAL (operands[3]) + 2);
+}")
+
+(define_expand "symGOTOFF2reg_hilo"
+  [(set (match_dup 6)
+	(high:SI (const:SI (unspec:SI [(match_operand:SI 1 "" "")
+				       (match_dup 4)] UNSPEC_GOT))))
+   (set (match_dup 5)
+	(lo_sum:SI (match_dup 6)
+		   (const:SI (unspec:SI [(match_dup 1)
+					 (match_operand:SI 3 "" "")]
+					UNSPEC_GOT))))
+   (set (match_operand:SI 0 "" "")
+	(plus:SI (match_dup 5)
+		 (match_operand:SI 2 "" "")))
+   ]
+  ""
+  "
+{
+  if (!can_create_pseudo_p ())
+    operands[6] = operands[5] = operands[0];
+  else
+    {
+      operands[6] = gen_reg_rtx (SImode);
+      operands[5] = gen_reg_rtx (SImode);
+    }
+
+  operands[4] = GEN_INT (INTVAL (operands[3]) + 1);
+  operands[3] = GEN_INT (INTVAL (operands[3]) + 2);
+}")
+
+(define_expand "symGOTOFF2reg"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")]
+  ""
+  "
+{
+  rtx insn = emit_insn (gen_symGOTOFF2reg_i (operands[0], operands[1], operands[2], operands[3]));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symGOTOFF2reg_i"
+  [(set (match_operand:SI 0 "" "")
+	(plus:SI (match_operand:SI 2 "" "")
+		 (const:SI
+		  (unspec:SI [(match_operand:SI 1 "" "")
+			     (match_operand:SI 3 "" "")]
+			     UNSPEC_GOT))))]
+  ""
+  "")
+
+(define_expand "symGPREL2reg"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")
+   (match_dup 4)]
+  ""
+  "
+{
+  rtx insn;
+
+  if (!can_create_pseudo_p ())
+    operands[4] = operands[0];
+  else
+    operands[4] = gen_reg_rtx (SImode);
+
+  emit_insn (frv_gen_GPsym2reg (operands[4], operands[2]));
+
+  insn = emit_insn (gen_symGOTOFF2reg_i (operands[0], operands[1],
+					 operands[4], operands[3]));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symGPREL2reg_hilo"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")
+   (match_operand:SI 3 "" "")
+   (match_dup 4)]
+  ""
+  "
+{
+  rtx insn;
+
+  if (!can_create_pseudo_p ())
+    {
+      emit_insn (gen_symGOT2reg (operands[0], operands[1], operands[2],
+				 GEN_INT (R_FRV_GOT12)));
+      DONE;
+    }
+
+  operands[4] = gen_reg_rtx (SImode);
+
+  emit_insn (frv_gen_GPsym2reg (operands[4], operands[2]));
+
+  insn = emit_insn (gen_symGOTOFF2reg_hilo (operands[0], operands[1],
+					    operands[4], operands[3]));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_constants
+  [
+   (UNSPEC_SMUL			154)
+   (UNSPEC_UMUL			155)
+   (UNSPEC_SMU			156)
+   (UNSPEC_ADDSS		157)
+   (UNSPEC_SUBSS		158)
+   (UNSPEC_SLASS		159)
+   (UNSPEC_SCAN			160)
+   (UNSPEC_INTSS                161)
+   (UNSPEC_SCUTSS		162)
+   (UNSPEC_PREFETCH0		163)
+   (UNSPEC_PREFETCH		164)
+   (UNSPEC_IACCreadll		165)
+   (UNSPEC_IACCreadl		166)
+   (UNSPEC_IACCsetll		167)
+   (UNSPEC_IACCsetl		168)
+   (UNSPEC_SMASS		169)
+   (UNSPEC_SMSSS		170)
+   (UNSPEC_IMUL			171)
+
+   (IACC0_REG			171)
+])
+
+(define_insn "smul"
+  [(set (match_operand:DI 0 "integer_register_operand" "=d")
+        (unspec:DI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SMUL))]
+  ""
+  "smul %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "umul"
+  [(set (match_operand:DI 0 "integer_register_operand" "=d")
+        (unspec:DI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_UMUL))]
+  ""
+  "umul %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "mul")])
+
+(define_insn "smass"
+  [(set (reg:DI IACC0_REG)
+	(unspec:DI [(match_operand:SI 0 "integer_register_operand" "d")
+		    (match_operand:SI 1 "integer_register_operand" "d")
+		    (reg:DI IACC0_REG)]
+		   UNSPEC_SMASS))]
+  "TARGET_FR405_BUILTINS"
+  "smass %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "macc")])
+
+(define_insn "smsss"
+  [(set (reg:DI IACC0_REG)
+	(unspec:DI [(match_operand:SI 0 "integer_register_operand" "d")
+		    (match_operand:SI 1 "integer_register_operand" "d")
+		    (reg:DI IACC0_REG)]
+		   UNSPEC_SMSSS))]
+  "TARGET_FR405_BUILTINS"
+  "smsss %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "macc")])
+
+(define_insn "smu"
+  [(set (reg:DI IACC0_REG)
+	(unspec:DI [(match_operand:SI 0 "integer_register_operand" "d")
+		    (match_operand:SI 1 "integer_register_operand" "d")]
+		   UNSPEC_SMU))]
+  "TARGET_FR405_BUILTINS"
+  "smu %1, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "macc")])
+
+(define_insn "addss"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_ADDSS))]
+  "TARGET_FR405_BUILTINS"
+  "addss %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "subss"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SUBSS))]
+  "TARGET_FR405_BUILTINS"
+  "subss %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "slass"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SLASS))]
+  "TARGET_FR405_BUILTINS"
+  "slass %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "int")])
+
+(define_insn "scan"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (match_operand:SI 2 "integer_register_operand" "d")]
+		   UNSPEC_SCAN))]
+  ""
+  "scan %1, %2, %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "scan")])
+
+(define_insn "scutss"
+  [(set (match_operand:SI 0 "integer_register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "integer_register_operand" "d")
+		    (reg:DI IACC0_REG)]
+		   UNSPEC_SCUTSS))]
+  "TARGET_FR405_BUILTINS"
+  "scutss %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "type" "cut")])
+
+(define_insn "frv_prefetch0"
+  [(prefetch (unspec:SI [(match_operand:SI 0 "register_operand" "r")]
+			UNSPEC_PREFETCH0)
+	     (const_int 0)
+	     (const_int 0))]
+  ""
+  "dcpl %0, gr0, #0"
+  [(set_attr "length" "4")])
+
+(define_insn "frv_prefetch"
+  [(prefetch (unspec:SI [(match_operand:SI 0 "register_operand" "r")]
+			UNSPEC_PREFETCH)
+	     (const_int 0)
+	     (const_int 0))]
+  "TARGET_FR500_FR550_BUILTINS"
+  "nop.p\\n\\tnldub @(%0, gr0), gr0"
+  [(set_attr "length" "8")])
+
+;; TLS patterns
+
+(define_insn "call_gettlsoff"
+  [(set (match_operand:SI 0 "register_operand" "=D09")
+	(unspec:SI
+	 [(match_operand:SI 1 "symbolic_operand" "")]
+	 UNSPEC_GETTLSOFF))
+   (clobber (reg:SI GR8_REG))
+   (clobber (reg:SI LRREG))
+   (use (match_operand:SI 2 "register_operand" "D15"))]
+  "HAVE_AS_TLS"
+  "call #gettlsoff(%a1)"
+  [(set_attr "length" "4")
+   (set_attr "type" "load_or_call")])
+
+;; We have to expand this like a libcall (it sort of actually is)
+;; because otherwise sched may move, for example, an insn that sets up
+;; GR8 for a subsequence call before the *tls_indirect_call insn, and
+;; then reload won't be able to fix things up.
+(define_expand "tls_indirect_call"
+  [(set (reg:DI GR8_REG)
+	(match_operand:DI 2 "register_operand" ""))
+   (parallel
+    [(set (reg:SI GR9_REG)
+	  (unspec:SI
+	   [(match_operand:SI 1 "symbolic_operand" "")
+	   (reg:DI GR8_REG)]
+	   UNSPEC_TLS_INDIRECT_CALL))
+    (clobber (reg:SI GR8_REG))
+    (clobber (reg:SI LRREG))
+    (use (match_operand:SI 3 "register_operand" ""))])
+   (set (match_operand:SI 0 "register_operand" "")
+	(reg:SI GR9_REG))]
+  "HAVE_AS_TLS")
+
+(define_insn "*tls_indirect_call"
+  [(set (reg:SI GR9_REG)
+	(unspec:SI
+	 [(match_operand:SI 0 "symbolic_operand" "")
+	  (reg:DI GR8_REG)]
+	 UNSPEC_TLS_INDIRECT_CALL))
+   (clobber (reg:SI GR8_REG))
+   (clobber (reg:SI LRREG))
+   ;; If there was a way to represent the fact that we don't need GR9
+   ;; or GR15 to be set before this instruction (it could be in
+   ;; parallel), we could use it here.  This change wouldn't apply to
+   ;; call_gettlsoff, thought, since the linker may turn the latter
+   ;; into ldi @(gr15,offset),gr9.
+   (use (match_operand:SI 1 "register_operand" "D15"))]
+  "HAVE_AS_TLS"
+  "calll #gettlsoff(%a0)@(gr8,gr0)"
+  [(set_attr "length" "4")
+   (set_attr "type" "jumpl")])
+
+(define_insn "tls_load_gottlsoff12"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	 [(match_operand:SI 1 "symbolic_operand" "")
+	  (match_operand:SI 2 "register_operand" "r")]
+	 UNSPEC_TLS_LOAD_GOTTLSOFF12))]
+  "HAVE_AS_TLS"
+  "ldi @(%2, #gottlsoff12(%1)), %0"
+  [(set_attr "length" "4")])
+
+(define_expand "tlsoff_hilo"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (const:SI (unspec:SI
+			    [(match_operand:SI 1 "symbolic_operand" "")
+			     (match_operand:SI 2 "immediate_operand" "n")]
+			    UNSPEC_GOT))))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0)
+		   (const:SI (unspec:SI [(match_dup 1)
+					 (match_dup 3)] UNSPEC_GOT))))]
+  ""
+  "
+{
+  operands[3] = GEN_INT (INTVAL (operands[2]) + 1);
+}")
+
+;; Just like movdi_ldd, but with relaxation annotations.
+(define_insn "tls_tlsdesc_ldd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(mem:DI (unspec:SI
+			     [(match_operand:SI 1 "register_operand" "r")
+			      (match_operand:SI 2 "register_operand" "r")
+			      (match_operand:SI 3 "symbolic_operand" "")]
+			     UNSPEC_TLS_TLSDESC_LDD_AUX))]
+		   UNSPEC_TLS_TLSDESC_LDD))]
+  ""
+  "ldd #tlsdesc(%a3)@(%1,%2), %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
+
+(define_insn "tls_tlsoff_ld"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (unspec:SI
+		 [(match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r")
+		  (match_operand:SI 3 "symbolic_operand" "")]
+		 UNSPEC_TLS_TLSOFF_LD)))]
+  ""
+  "ld #tlsoff(%a3)@(%1,%2), %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
+
+(define_insn "tls_lddi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:SI 1 "symbolic_operand" "")
+		    (match_operand:SI 2 "register_operand" "d")]
+		   UNSPEC_TLS_LDDI))]
+  ""
+  "lddi @(%2, #gottlsdesc12(%a1)), %0"
+  [(set_attr "length" "4")
+   (set_attr "type" "gload")])
diff --git a/gcc/config/frv/frv.opt b/gcc/config/frv/frv.opt
new file mode 100644
index 000000000..f44de1ff9
--- /dev/null
+++ b/gcc/config/frv/frv.opt
@@ -0,0 +1,199 @@
+; Options for the FR-V port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+macc-4
+Target Report RejectNegative Mask(ACC_4)
+Use 4 media accumulators
+
+macc-8
+Target Report RejectNegative InverseMask(ACC_4, ACC_8)
+Use 8 media accumulators
+
+malign-labels
+Target Report Mask(ALIGN_LABELS)
+Enable label alignment optimizations
+
+malloc-cc
+Target Report RejectNegative Mask(ALLOC_CC)
+Dynamically allocate cc registers
+
+; We used to default the branch cost to 2, but it was changed it to 1 to avoid
+; generating SCC instructions and or/and-ing them together, and then doing the
+; branch on the result, which collectively generate much worse code.
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(frv_branch_cost_int) Init(1)
+Set the cost of branches
+
+mcond-exec
+Target Report Mask(COND_EXEC)
+Enable conditional execution other than moves/scc
+
+mcond-exec-insns=
+Target RejectNegative Joined UInteger Var(frv_condexec_insns) Init(8)
+Change the maximum length of conditionally-executed sequences
+
+mcond-exec-temps=
+Target RejectNegative Joined UInteger Var(frv_condexec_temps) Init(4)
+Change the number of temporary registers that are available to conditionally-executed sequences
+
+mcond-move
+Target Report Mask(COND_MOVE)
+Enable conditional moves
+
+mcpu=
+Target RejectNegative Joined
+Set the target CPU type
+
+mdebug
+Target Undocumented Var(TARGET_DEBUG)
+
+mdebug-arg
+Target Undocumented Var(TARGET_DEBUG_ARG)
+
+mdebug-addr
+Target Undocumented Var(TARGET_DEBUG_ADDR)
+
+mdebug-cond-exec
+Target Undocumented Var(TARGET_DEBUG_COND_EXEC)
+
+mdebug-loc
+Target Undocumented Var(TARGET_DEBUG_LOC)
+
+mdebug-stack
+Target Undocumented Var(TARGET_DEBUG_STACK)
+
+mdouble
+Target Report Mask(DOUBLE)
+Use fp double instructions
+
+mdword
+Target Report Mask(DWORD)
+Change the ABI to allow double word insns
+
+mfdpic
+Target Report Mask(FDPIC)
+Enable Function Descriptor PIC mode
+
+mfixed-cc
+Target Report RejectNegative InverseMask(ALLOC_CC, FIXED_CC)
+Just use icc0/fcc0
+
+mfpr-32
+Target Report RejectNegative Mask(FPR_32)
+Only use 32 FPRs
+
+mfpr-64
+Target Report RejectNegative InverseMask(FPR_32, FPR_64)
+Use 64 FPRs
+
+mgpr-32
+Target Report RejectNegative Mask(GPR_32)
+Only use 32 GPRs
+
+mgpr-64
+Target Report RejectNegative InverseMask(GPR_32, GPR_64)
+Use 64 GPRs
+
+mgprel-ro
+Target Report Mask(GPREL_RO)
+Enable use of GPREL for read-only data in FDPIC
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Use hardware floating point
+
+minline-plt
+Target Report Mask(INLINE_PLT)
+Enable inlining of PLT in function calls
+
+mlibrary-pic
+Target Report Mask(LIBPIC)
+Enable PIC support for building libraries
+
+mlinked-fp
+Target Report Mask(LINKED_FP)
+Follow the EABI linkage requirements
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Disallow direct calls to global functions
+
+mmedia
+Target Report Mask(MEDIA)
+Use media instructions
+
+mmuladd
+Target Report Mask(MULADD)
+Use multiply add/subtract instructions
+
+mmulti-cond-exec
+Target Report Mask(MULTI_CE)
+Enable optimizing &&/|| in conditional execution
+
+mnested-cond-exec
+Target Report Mask(NESTED_CE)
+Enable nested conditional execution optimizations
+
+; Not used by the compiler proper.
+mno-eflags
+Target RejectNegative
+Do not mark ABI switches in e_flags
+
+moptimize-membar
+Target Report Mask(OPTIMIZE_MEMBAR)
+Remove redundant membars
+
+mpack
+Target Report Mask(PACK)
+Pack VLIW instructions
+
+mscc
+Target Report Mask(SCC)
+Enable setting GPRs to the result of comparisons
+
+msched-lookahead=
+Target RejectNegative Joined UInteger Var(frv_sched_lookahead) Init(4)
+Change the amount of scheduler lookahead
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT)
+Use software floating point
+
+mTLS
+Target Report RejectNegative Mask(BIG_TLS)
+Assume a large TLS segment
+
+mtls
+Target Report RejectNegative InverseMask(BIG_TLS)
+Do not assume a large TLS segment
+
+; Not used by the compiler proper.
+mtomcat-stats
+Target
+Cause gas to print tomcat statistics
+
+; Not used by the compiler proper.
+multilib-library-pic
+Target RejectNegative
+Link with the library-pic libraries
+
+mvliw-branch
+Target Report Mask(VLIW_BRANCH)
+Allow branches to be packed with other instructions
diff --git a/gcc/config/frv/frvbegin.c b/gcc/config/frv/frvbegin.c
new file mode 100644
index 000000000..963ebd091
--- /dev/null
+++ b/gcc/config/frv/frvbegin.c
@@ -0,0 +1,157 @@
+/* Frv initialization file linked before all user modules
+   Copyright (C) 1999, 2000, 2003, 2004, 2009 Free Software Foundation, Inc.
+    Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.
+
+   This file was originally taken from the file crtstuff.c in the
+   main compiler directory, and simplified.  */
+
+#include "defaults.h"
+#include <stddef.h>
+#include "unwind-dw2-fde.h"
+#include "gbl-ctors.h"
+
+/*  Declare a pointer to void function type.  */
+#define STATIC static
+
+#ifdef __FRV_UNDERSCORE__
+#define UNDERSCORE "_"
+#else
+#define UNDERSCORE ""
+#endif
+
+#define INIT_SECTION_NEG_ONE(SECTION, FLAGS, NAME)			\
+__asm__ (".section " SECTION "," FLAGS "\n\t"				\
+	 ".globl   " UNDERSCORE NAME "\n\t"				\
+	 ".type    " UNDERSCORE NAME ",@object\n\t"			\
+	 ".p2align  2\n"						\
+	 UNDERSCORE NAME ":\n\t"					\
+	 ".word     -1\n\t"						\
+	 ".previous")
+
+#define INIT_SECTION(SECTION, FLAGS, NAME)				\
+__asm__ (".section " SECTION "," FLAGS "\n\t"				\
+	 ".globl   " UNDERSCORE NAME "\n\t"				\
+	 ".type    " UNDERSCORE NAME ",@object\n\t"			\
+	 ".p2align  2\n"						\
+	 UNDERSCORE NAME ":\n\t"					\
+	 ".previous")
+
+/* Beginning of .ctor/.dtor sections that provides a list of constructors and
+   destructors to run.  */
+
+INIT_SECTION_NEG_ONE (".ctors", "\"aw\"", "__CTOR_LIST__");
+INIT_SECTION_NEG_ONE (".dtors", "\"aw\"", "__DTOR_LIST__");
+
+/* Beginning of .eh_frame section that provides all of the exception handling
+   tables.  */
+
+INIT_SECTION (".eh_frame", "\"aw\"", "__EH_FRAME_BEGIN__");
+
+#if ! __FRV_FDPIC__
+/* In FDPIC, the linker itself generates this.  */
+/* Beginning of .rofixup section that provides a list of pointers that we
+   need to adjust.  */
+
+INIT_SECTION (".rofixup", "\"a\"", "__ROFIXUP_LIST__");
+#endif /* __FRV_FDPIC__ */
+
+extern void __frv_register_eh(void) __attribute__((__constructor__));
+extern void __frv_deregister_eh(void) __attribute__((__destructor__));
+
+extern func_ptr __EH_FRAME_BEGIN__[];
+
+/* Register the exception handling table as the first constructor.  */
+void
+__frv_register_eh (void)
+{
+  static struct object object;
+  if (__register_frame_info)
+    __register_frame_info (__EH_FRAME_BEGIN__, &object);
+}
+
+/* Note, do not declare __{,de}register_frame_info weak as it seems
+   to interfere with the pic support.  */
+
+/* Unregister the exception handling table as a deconstructor.  */
+void
+__frv_deregister_eh (void)
+{
+  static int completed = 0;
+
+  if (completed)
+    return;
+
+  if (__deregister_frame_info)
+    __deregister_frame_info (__EH_FRAME_BEGIN__);
+
+  completed = 1;
+}
+
+/* Run the global destructors.  */
+void
+__do_global_dtors (void)
+{
+  static func_ptr *p = __DTOR_LIST__ + 1;
+  while (*p)
+    {
+      p++;
+      (*(p-1)) ();
+    }
+}
+
+/* Run the global constructors.  */
+void
+__do_global_ctors (void)
+{
+  unsigned long nptrs = (unsigned long) __CTOR_LIST__[0];
+  unsigned i;
+
+  if (nptrs == (unsigned long)-1)
+    for (nptrs = 0; __CTOR_LIST__[nptrs + 1] != 0; nptrs++);
+
+  for (i = nptrs; i >= 1; i--)
+    __CTOR_LIST__[i] ();
+
+  atexit (__do_global_dtors);
+}
+
+/* Subroutine called automatically by `main'.
+   Compiling a global function named `main'
+   produces an automatic call to this function at the beginning.
+
+   For many systems, this routine calls __do_global_ctors.
+   For systems which support a .init section we use the .init section
+   to run __do_global_ctors, so we need not do anything here.  */
+
+void
+__main (void)
+{
+  /* Support recursive calls to `main': run initializers just once.  */
+  static int initialized;
+  if (! initialized)
+    {
+      initialized = 1;
+      __do_global_ctors ();
+    }
+}
diff --git a/gcc/config/frv/frvend.c b/gcc/config/frv/frvend.c
new file mode 100644
index 000000000..04b880df3
--- /dev/null
+++ b/gcc/config/frv/frvend.c
@@ -0,0 +1,70 @@
+/* Frv initialization file linked after all user modules
+   Copyright (C) 1999, 2000, 2003, 2004, 2009 Free Software Foundation, Inc.
+    Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "defaults.h"
+#include <stddef.h>
+#include "unwind-dw2-fde.h"
+
+#ifdef __FRV_UNDERSCORE__
+#define UNDERSCORE "_"
+#else
+#define UNDERSCORE ""
+#endif
+
+#define FINI_SECTION_ZERO(SECTION, FLAGS, NAME)				\
+__asm__ (".section " SECTION "," FLAGS "\n\t"				\
+	 ".globl   " UNDERSCORE NAME "\n\t"				\
+	 ".type    " UNDERSCORE NAME ",@object\n\t"			\
+	 ".p2align  2\n"						\
+	 UNDERSCORE NAME ":\n\t"					\
+	 ".word     0\n\t"						\
+	 ".previous")
+
+#define FINI_SECTION(SECTION, FLAGS, NAME)				\
+__asm__ (".section " SECTION "," FLAGS "\n\t"				\
+	 ".globl   " UNDERSCORE NAME "\n\t"				\
+	 ".type    " UNDERSCORE NAME ",@object\n\t"			\
+	 ".p2align  2\n"						\
+	 UNDERSCORE NAME ":\n\t"					\
+	 ".previous")
+
+/* End of .ctor/.dtor sections that provides a list of constructors and
+   destructors to run.  */
+
+FINI_SECTION_ZERO (".ctors", "\"aw\"", "__CTOR_END__");
+FINI_SECTION_ZERO (".dtors", "\"aw\"", "__DTOR_END__");
+
+/* End of .eh_frame section that provides all of the exception handling
+   tables.  */
+
+FINI_SECTION_ZERO (".eh_frame", "\"aw\"", "__FRAME_END__");
+
+#if ! __FRV_FDPIC__
+/* In FDPIC, the linker itself generates this.  */
+/* End of .rofixup section that provides a list of pointers that we
+   need to adjust.  */
+
+FINI_SECTION (".rofixup", "\"a\"", "__ROFIXUP_END__");
+#endif /* __FRV_FDPIC__ */
diff --git a/gcc/config/frv/lib1funcs.asm b/gcc/config/frv/lib1funcs.asm
new file mode 100644
index 000000000..d1ffcab61
--- /dev/null
+++ b/gcc/config/frv/lib1funcs.asm
@@ -0,0 +1,269 @@
+/* Library functions.
+   Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+  
+   This file is part of GCC.
+  
+   GCC is free software ; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+  
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY ; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <frv-asm.h>
+
+
+#ifdef L_cmpll
+/* icc0 = __cmpll (long long a, long long b)  */
+
+	.globl	EXT(__cmpll)
+	.type	EXT(__cmpll),@function
+	.text
+	.p2align 4
+EXT(__cmpll):
+	cmp	gr8, gr10, icc0
+	ckeq	icc0, cc4
+	P(ccmp)	gr9, gr11, cc4, 1
+	ret
+.Lend:
+	.size	EXT(__cmpll),.Lend-EXT(__cmpll)
+#endif /* L_cmpll */
+
+#ifdef L_cmpf
+/* icc0 = __cmpf (float a, float b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+   handle NaNs.  */
+
+	.globl	EXT(__cmpf)
+	.type	EXT(__cmpf),@function
+	.text
+	.p2align 4
+EXT(__cmpf):
+#ifdef __FRV_HARD_FLOAT__	/* floating point instructions available */
+	movgf	gr8, fr0
+	P(movgf) gr9, fr1
+	setlos	#1, gr8
+	fcmps	fr0, fr1, fcc0
+	P(fcklt) fcc0, cc0
+	fckeq	fcc0, cc1
+	csub	gr0, gr8, gr8, cc0, 1
+	cmov	gr0, gr8, cc1, 1
+	cmpi	gr8, 0, icc0
+	ret
+#else				/* no floating point instructions available */
+	movsg	lr, gr4
+	addi	sp, #-16, sp
+	sti	gr4, @(sp, 8)
+	st	fp, @(sp, gr0)
+	mov	sp, fp
+	call	EXT(__cmpsf2)
+	cmpi	gr8, #0, icc0
+	ldi	@(sp, 8), gr4
+	movgs	gr4, lr
+	ld	@(sp,gr0), fp
+	addi	sp, #16, sp
+	ret
+#endif
+.Lend:
+	.size	EXT(__cmpf),.Lend-EXT(__cmpf)
+#endif
+
+#ifdef L_cmpd
+/* icc0 = __cmpd (double a, double b) */
+/* Note, because this function returns the result in ICC0, it means it can't
+   handle NaNs.  */
+
+	.globl	EXT(__cmpd)
+	.type	EXT(__cmpd),@function
+	.text
+	.p2align 4
+EXT(__cmpd):
+	movsg	lr, gr4
+	addi	sp, #-16, sp
+	sti	gr4, @(sp, 8)
+	st	fp, @(sp, gr0)
+	mov	sp, fp
+	call	EXT(__cmpdf2)
+	cmpi	gr8, #0, icc0
+	ldi	@(sp, 8), gr4
+	movgs	gr4, lr
+	ld	@(sp,gr0), fp
+	addi	sp, #16, sp
+	ret
+.Lend:
+	.size	EXT(__cmpd),.Lend-EXT(__cmpd)
+#endif
+
+#ifdef L_addll
+/* gr8,gr9 = __addll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__addll)
+	.type	EXT(__addll),@function
+	.text
+	.p2align
+EXT(__addll):
+	addcc	gr9, gr11, gr9, icc0
+	addx	gr8, gr10, gr8, icc0
+	ret
+.Lend:
+	.size	EXT(__addll),.Lend-EXT(__addll)
+#endif
+
+#ifdef L_subll
+/* gr8,gr9 = __subll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__subll)
+	.type	EXT(__subll),@function
+	.text
+	.p2align 4
+EXT(__subll):
+	subcc	gr9, gr11, gr9, icc0
+	subx	gr8, gr10, gr8, icc0
+	ret
+.Lend:
+	.size	EXT(__subll),.Lend-EXT(__subll)
+#endif
+
+#ifdef L_andll
+/* gr8,gr9 = __andll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__andll)
+	.type	EXT(__andll),@function
+	.text
+	.p2align 4
+EXT(__andll):
+	P(and)	gr9, gr11, gr9
+	P2(and)	gr8, gr10, gr8
+	ret
+.Lend:
+	.size	EXT(__andll),.Lend-EXT(__andll)
+#endif
+
+#ifdef L_orll
+/* gr8,gr9 = __orll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__orll)
+	.type	EXT(__orll),@function
+	.text
+	.p2align 4
+EXT(__orll):
+	P(or)	gr9, gr11, gr9
+	P2(or)	gr8, gr10, gr8
+	ret
+.Lend:
+	.size	EXT(__orll),.Lend-EXT(__orll)
+#endif
+
+#ifdef L_xorll
+/* gr8,gr9 = __xorll (long long a, long long b) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__xorll)
+	.type	EXT(__xorll),@function
+	.text
+	.p2align 4
+EXT(__xorll):
+	P(xor)	gr9, gr11, gr9
+	P2(xor)	gr8, gr10, gr8
+	ret
+.Lend:
+	.size	EXT(__xorll),.Lend-EXT(__xorll)
+#endif
+
+#ifdef L_notll
+/* gr8,gr9 = __notll (long long a) */
+/* Note, gcc will never call this function, but it is present in case an
+   ABI program calls it.  */
+
+	.globl	EXT(__notll)
+	.type	EXT(__notll),@function
+	.text
+	.p2align 4
+EXT(__notll):
+	P(not)	gr9, gr9
+	P2(not)	gr8, gr8
+	ret
+.Lend:
+	.size	EXT(__notll),.Lend-EXT(__notll)
+#endif
+
+#ifdef L_cmov
+/* (void) __cmov (char *dest, const char *src, size_t len) */
+/*
+ * void __cmov (char *dest, const char *src, size_t len)
+ * {
+ *   size_t i;
+ * 
+ *   if (dest < src || dest > src+len)
+ *     {
+ *	 for (i = 0; i < len; i++)
+ *	 dest[i] = src[i];
+ *     }
+ *   else
+ *     {
+ *	 while (len-- > 0)
+ *	 dest[len] = src[len];
+ *     }
+ * }
+ */
+
+	.globl	EXT(__cmov)
+	.type	EXT(__cmov),@function
+	.text
+	.p2align 4
+EXT(__cmov):
+	P(cmp)	gr8, gr9, icc0
+	add	gr9, gr10, gr4
+	P(cmp)	gr8, gr4, icc1
+	bc	icc0, 0, .Lfwd
+	bls	icc1, 0, .Lback
+.Lfwd:
+	/* move bytes in a forward direction */
+	P(setlos) #0, gr5
+	cmp	gr0, gr10, icc0
+	P(subi)	gr9, #1, gr9
+	P2(subi) gr8, #1, gr8
+	bnc	icc0, 0, .Lret
+.Lfloop:
+	/* forward byte move loop */
+	addi	gr5, #1, gr5
+	P(ldsb)	@(gr9, gr5), gr4
+	cmp	gr5, gr10, icc0
+	P(stb)	gr4, @(gr8, gr5)
+	bc	icc0, 0, .Lfloop
+	ret
+.Lbloop:
+	/* backward byte move loop body */
+	ldsb	@(gr9,gr10),gr4
+	stb	gr4,@(gr8,gr10)
+.Lback:
+	P(cmpi)	gr10, #0, icc0
+	addi	gr10, #-1, gr10
+	bne	icc0, 0, .Lbloop
+.Lret:
+	ret
+.Lend:
+	.size	 EXT(__cmov),.Lend-EXT(__cmov)
+#endif
diff --git a/gcc/config/frv/libgcc-frv.ver b/gcc/config/frv/libgcc-frv.ver
new file mode 100644
index 000000000..6e27b4f9b
--- /dev/null
+++ b/gcc/config/frv/libgcc-frv.ver
@@ -0,0 +1,73 @@
+# Copyright (C) 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+GCC_3.4 {
+  # frv abi symbol names
+  __ftod
+  __ftoi
+  __ftoui
+  __dtoi
+  __ftoui
+  __dtoui
+  __ftoll
+  __dtoll
+  __ftoull
+  __dtoull
+  __itof
+  __lltof
+  __dtof
+  __itod
+  __lltof
+  __lltod
+  __addd
+  __subd
+  __muld
+  __divd
+  __addf
+  __subf
+  __mulf
+  __divf
+  __sllll
+  __srlll
+  __srall
+  __addll
+  __subll
+  __mulll
+  __umulll
+  __divll
+  __udivll
+  __modll
+  __umodll
+  __cmpll
+  __cmpf
+  __cmpd
+  __andll
+  __orll
+  __xorll
+  __notll
+  __cmov
+  __cmovd
+  __cmovh
+  __cmovw
+  __modi
+  __uitod
+  __uitof
+  __ulltod
+  __ulltof
+  __umodi
+}
diff --git a/gcc/config/frv/linux.h b/gcc/config/frv/linux.h
new file mode 100644
index 000000000..4f11618b4
--- /dev/null
+++ b/gcc/config/frv/linux.h
@@ -0,0 +1,75 @@
+/* Target macros for the FRV Linux port of GCC.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __FRV_LINUX_H__
+#define __FRV_LINUX_H__
+
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#define SUBTARGET_DRIVER_SELF_SPECS \
+  "%{!mno-fdpic:-mfdpic}",
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{mfdpic: -m elf32frvfd -z text} %{shared} %{pie} \
+  %{!shared: %{!static: \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+   %{static}}"
+
+/* Support for compile-time default CPU.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }
+
+/* Define OS-specific predefined preprocessor macros.  */
+#define TARGET_OS_CPP_BUILTINS()	\
+  do {					\
+    builtin_define ("__gnu_linux__");	\
+    builtin_define_std ("linux");	\
+    builtin_define_std ("unix");	\
+    builtin_assert ("system=linux");	\
+  } while (0)
+
+#define HAS_INIT_SECTION 1
+#define INIT_SECTION_ASM_OP	"\t.section .init,\"ax\""
+#define FINI_SECTION_ASM_OP	"\t.section .fini,\"ax\""
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+asm (SECTION_OP); \
+asm ("ldi.p @(fp,4), gr15 ! call " #FUNC); \
+asm (TEXT_SECTION_ASM_OP);
+
+#undef INVOKE__main
+
+#undef Twrite
+#define Twrite __write
+
+#endif /* __FRV_LINUX_H__ */
diff --git a/gcc/config/frv/modi.c b/gcc/config/frv/modi.c
new file mode 100644
index 000000000..d5a91fc0f
--- /dev/null
+++ b/gcc/config/frv/modi.c
@@ -0,0 +1,4 @@
+int __modi (int a, int b)
+{
+  return a % b;
+}
diff --git a/gcc/config/frv/predicates.md b/gcc/config/frv/predicates.md
new file mode 100644
index 000000000..aeef3d823
--- /dev/null
+++ b/gcc/config/frv/predicates.md
@@ -0,0 +1,1543 @@
+;; Predicate definitions for Frv.
+;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if operand is a GPR register.
+
+(define_predicate "integer_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_AP_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 is OP is a memory operand, or will be turned into one by
+;; reload.
+
+(define_predicate "frv_load_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (reload_in_progress)
+    {
+      rtx tmp = op;
+      if (GET_CODE (tmp) == SUBREG)
+	tmp = SUBREG_REG (tmp);
+      if (GET_CODE (tmp) == REG
+	  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
+	op = reg_equiv_memory_loc[REGNO (tmp)];
+    }
+
+  return op && memory_operand (op, mode);
+})
+
+;; Return true if operand is a GPR register.  Do not allow SUBREG's
+;; here, in order to prevent a combine bug.
+
+(define_predicate "gpr_no_subreg_operand"
+  (match_code "reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a GPR register or a FPR register.
+
+(define_predicate "gpr_or_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (GPR_P (regno) || FPR_P (regno) || regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return 1 if operand is a GPR register or 12-bit signed immediate.
+
+(define_predicate "gpr_or_int12_operand"
+  (match_code "reg,subreg,const_int,const")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -2048, 2047);
+
+  if (got12_operand (op, mode))
+    return true;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a GPR register, or a FPR register, or a 12
+;; bit signed immediate.
+
+(define_predicate "gpr_fpr_or_int12_operand"
+  (match_code "reg,subreg,const_int")
+{
+  int regno;
+
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -2048, 2047);
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (GPR_P (regno) || FPR_P (regno) || regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return 1 if operand is a register or 10-bit signed immediate.
+
+(define_predicate "gpr_or_int10_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -512, 511);
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a register or an integer immediate.
+
+(define_predicate "gpr_or_int_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return TRUE;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return GPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return true if operand is something that can be an input for a move
+;; operation.
+
+(define_predicate "move_source_operand"
+  (match_code "reg,subreg,const_int,mem,const_double,const,symbol_ref,label_ref")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return immediate_operand (op, mode);
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, FALSE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, FALSE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is something that can be an output for a
+;; move operation.
+
+(define_predicate "move_destination_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, FALSE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, FALSE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if we the operand is a valid destination for a movcc_fp
+;; instruction.  This means rejecting fcc_operands, since we need
+;; scratch registers to write to them.
+
+(define_predicate "movcc_fp_destination_operand"
+  (match_code "reg,subreg,mem")
+{
+  if (fcc_operand (op, mode))
+    return FALSE;
+
+  return move_destination_operand (op, mode);
+})
+
+;; Return true if operand is something that can be an input for a
+;; conditional move operation.
+
+(define_predicate "condexec_source_operand"
+  (match_code "reg,subreg,const_int,mem,const_double")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return ZERO_P (op);
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, TRUE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, TRUE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is something that can be an output for a
+;; conditional move operation.
+
+(define_predicate "condexec_dest_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx subreg;
+  enum rtx_code code;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      subreg = SUBREG_REG (op);
+      code = GET_CODE (subreg);
+      if (code == MEM)
+	return frv_legitimate_address_p_1 (mode, XEXP (subreg, 0),
+					   reload_completed, TRUE, FALSE);
+
+      return (code == REG);
+
+    case REG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+        return FALSE;
+
+      return TRUE;
+
+    case MEM:
+      return frv_legitimate_memory_operand (op, mode, TRUE);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is a register of any flavor or a 0 of the
+;; appropriate type.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case REG:
+    case SUBREG:
+      if (GET_MODE (op) != mode && mode != VOIDmode)
+	return FALSE;
+
+      return register_operand (op, mode);
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return ZERO_P (op);
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is the link register.
+
+(define_predicate "lr_operand"
+  (match_code "reg")
+{
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (REGNO (op) != LR_REGNO && REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; Return true if operand is a gpr register or a valid memory operand.
+
+(define_predicate "gpr_or_memory_operand"
+  (match_code "reg,subreg,mem")
+{
+  return (integer_register_operand (op, mode)
+	  || frv_legitimate_memory_operand (op, mode, FALSE));
+})
+
+;; Return true if operand is a gpr register, a valid memory operand,
+;; or a memory operand that can be made valid using an additional gpr
+;; register.
+
+(define_predicate "gpr_or_memory_operand_with_scratch"
+  (match_code "reg,subreg,mem")
+{
+  rtx addr;
+
+  if (gpr_or_memory_operand (op, mode))
+    return TRUE;
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  if (GET_MODE (op) != mode)
+    return FALSE;
+
+  addr = XEXP (op, 0);
+
+  if (GET_CODE (addr) != PLUS)
+    return FALSE;
+      
+  if (!integer_register_operand (XEXP (addr, 0), Pmode))
+    return FALSE;
+
+  if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; Return true if operand is a fpr register or a valid memory
+;; operation.
+
+(define_predicate "fpr_or_memory_operand"
+  (match_code "reg,subreg,mem")
+{
+  return (fpr_operand (op, mode)
+	  || frv_legitimate_memory_operand (op, mode, FALSE));
+})
+
+;; Return 1 if operand is a 12-bit signed immediate.
+
+(define_predicate "int12_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return IN_RANGE (INTVAL (op), -2048, 2047);
+})
+
+;; Return 1 if operand is an integer constant that takes 2
+;; instructions to load up and can be split into sethi/setlo
+;; instructions..
+
+(define_predicate "int_2word_operand"
+  (match_code "const_int,const_double,symbol_ref,label_ref,const")
+{
+  HOST_WIDE_INT value;
+  REAL_VALUE_TYPE rv;
+  long l;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      break;
+
+    case LABEL_REF:
+      if (TARGET_FDPIC)
+	return FALSE;
+      
+      return (flag_pic == 0);
+
+    case CONST:
+      if (flag_pic || TARGET_FDPIC)
+	return FALSE;
+
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
+	op = XEXP (op, 0);
+      return GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF;
+
+    case SYMBOL_REF:
+      if (TARGET_FDPIC)
+	return FALSE;
+      
+      /* small data references are already 1 word */
+      return (flag_pic == 0) && (! SYMBOL_REF_SMALL_P (op));
+
+    case CONST_INT:
+      return ! IN_RANGE (INTVAL (op), -32768, 32767);
+
+    case CONST_DOUBLE:
+      if (GET_MODE (op) == SFmode)
+	{
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+	  value = l;
+	  return ! IN_RANGE (value, -32768, 32767);
+	}
+      else if (GET_MODE (op) == VOIDmode)
+	{
+	  value = CONST_DOUBLE_LOW (op);
+	  return ! IN_RANGE (value, -32768, 32767);
+	}
+      break;
+    }
+
+  return FALSE;
+})
+
+;; Return true if operand is the uClinux PIC register.
+
+(define_predicate "fdpic_operand"
+  (match_code "reg")
+{
+  if (!TARGET_FDPIC)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (REGNO (op) != FDPIC_REGNO && REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fdpic_fptr_operand"
+  (match_code "reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+  if (GET_CODE (op) != REG)
+    return FALSE;
+  if (REGNO (op) != FDPIC_FPTR_REGNO && REGNO (op) < FIRST_PSEUDO_REGISTER)
+    return FALSE;
+  return TRUE;
+})
+
+;; An address operand that may use a pair of registers, an addressing
+;; mode that we reject in general.
+
+(define_predicate "ldd_address_operand"
+  (match_code "reg,subreg,plus")
+{
+  if (GET_MODE (op) != mode && GET_MODE (op) != VOIDmode)
+    return FALSE;
+
+  return frv_legitimate_address_p_1 (DImode, op, reload_completed, FALSE, TRUE);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "got12_operand"
+  (match_code "const")
+{
+  struct frv_unspec unspec;
+
+  if (frv_const_unspec_p (op, &unspec))
+    switch (unspec.reloc)
+      {
+      case R_FRV_GOT12:
+      case R_FRV_GOTOFF12:
+      case R_FRV_FUNCDESC_GOT12:
+      case R_FRV_FUNCDESC_GOTOFF12:
+      case R_FRV_GPREL12:
+      case R_FRV_TLSMOFF12:
+	return true;
+      }
+  return false;
+})
+
+;; Return true if OP is a valid const-unspec expression.
+
+(define_predicate "const_unspec_operand"
+  (match_code "const")
+{
+  struct frv_unspec unspec;
+
+  return frv_const_unspec_p (op, &unspec);
+})
+
+;; Return true if operand is an icc register.
+
+(define_predicate "icc_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return ICC_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is an fcc register.
+
+(define_predicate "fcc_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return FCC_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is either an fcc or icc register.
+
+(define_predicate "cc_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (CC_OR_PSEUDO_P (regno))
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operand is an integer CCR register.
+
+(define_predicate "icr_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return ICR_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is an fcc register.
+
+(define_predicate "fcr_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  return FCR_OR_PSEUDO_P (regno);
+})
+
+;; Return true if operand is either an fcc or icc register.
+
+(define_predicate "cr_operand"
+  (match_code "reg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (CR_OR_PSEUDO_P (regno))
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operand is a FPR register.
+
+(define_predicate "fpr_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return FPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return true if operand is an even GPR or FPR register.
+
+(define_predicate "even_reg_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (GPR_P (regno))
+    return (((regno - GPR_FIRST) & 1) == 0);
+
+  if (FPR_P (regno))
+    return (((regno - FPR_FIRST) & 1) == 0);
+
+  return FALSE;
+})
+
+;; Return true if operand is an odd GPR register.
+
+(define_predicate "odd_reg_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  /* Assume that reload will give us an even register.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  if (GPR_P (regno))
+    return (((regno - GPR_FIRST) & 1) != 0);
+
+  if (FPR_P (regno))
+    return (((regno - FPR_FIRST) & 1) != 0);
+
+  return FALSE;
+})
+
+;; Return true if operand is an even GPR register.
+
+(define_predicate "even_gpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (! GPR_P (regno))
+    return FALSE;
+
+  return (((regno - GPR_FIRST) & 1) == 0);
+})
+
+;; Return true if operand is an odd GPR register.
+
+(define_predicate "odd_gpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  /* Assume that reload will give us an even register.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  if (! GPR_P (regno))
+    return FALSE;
+
+  return (((regno - GPR_FIRST) & 1) != 0);
+})
+
+;; Return true if operand is a quad aligned FPR register.
+
+(define_predicate "quad_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (! FPR_P (regno))
+    return FALSE;
+
+  return (((regno - FPR_FIRST) & 3) == 0);
+})
+
+;; Return true if operand is an even FPR register.
+
+(define_predicate "even_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return TRUE;
+
+  if (! FPR_P (regno))
+    return FALSE;
+
+  return (((regno - FPR_FIRST) & 1) == 0);
+})
+
+;; Return true if operand is an odd FPR register.
+
+(define_predicate "odd_fpr_operand"
+  (match_code "reg,subreg")
+{
+  int regno;
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+        return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  regno = REGNO (op);
+  /* Assume that reload will give us an even register.  */
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return FALSE;
+
+  if (! FPR_P (regno))
+    return FALSE;
+
+  return (((regno - FPR_FIRST) & 1) != 0);
+})
+
+;; Return true if operand is a 2 word memory address that can be
+;; loaded in one instruction to load or store.  We assume the stack
+;; and frame pointers are suitably aligned, and variables in the small
+;; data area.  FIXME -- at some we should recognize other globals and
+;; statics. We can't assume that any old pointer is aligned, given
+;; that arguments could be passed on an odd word on the stack and the
+;; address taken and passed through to another function.
+
+(define_predicate "dbl_memory_one_insn_operand"
+  (match_code "mem")
+{
+  rtx addr;
+  rtx addr_reg;
+
+  if (! TARGET_DWORD)
+    return FALSE;
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  if (mode != VOIDmode && GET_MODE_SIZE (mode) != 2*UNITS_PER_WORD)
+    return FALSE;
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == REG)
+    addr_reg = addr;
+
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rtx addr0 = XEXP (addr, 0);
+      rtx addr1 = XEXP (addr, 1);
+
+      if (GET_CODE (addr0) != REG)
+	return FALSE;
+
+      if (got12_operand (addr1, VOIDmode))
+	return TRUE;
+
+      if (GET_CODE (addr1) != CONST_INT)
+	return FALSE;
+
+      if ((INTVAL (addr1) & 7) != 0)
+	return FALSE;
+
+      addr_reg = addr0;
+    }
+
+  else
+    return FALSE;
+
+  if (addr_reg == frame_pointer_rtx || addr_reg == stack_pointer_rtx)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operand is a 2 word memory address that needs to use
+;; two instructions to load or store.
+
+(define_predicate "dbl_memory_two_insn_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  if (mode != VOIDmode && GET_MODE_SIZE (mode) != 2*UNITS_PER_WORD)
+    return FALSE;
+
+  if (! TARGET_DWORD)
+    return TRUE;
+
+  return ! dbl_memory_one_insn_operand (op, mode);
+})
+
+;; Return true if operand is a memory reference suitable for a call.
+
+(define_predicate "call_operand"
+  (match_code "reg,subreg,const_int,const,symbol_ref")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode && GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return !TARGET_LONG_CALLS || SYMBOL_REF_LOCAL_P (op);
+
+  /* Note this doesn't allow reg+reg or reg+imm12 addressing (which should
+     never occur anyway), but prevents reload from not handling the case
+     properly of a call through a pointer on a function that calls
+     vfork/setjmp, etc. due to the need to flush all of the registers to stack.  */
+  return gpr_or_int12_operand (op, mode);
+})
+
+;; Return true if operand is a memory reference suitable for a
+;; sibcall.
+
+(define_predicate "sibcall_operand"
+  (match_code "reg,subreg,const_int,const")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode && GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  /* Note this doesn't allow reg+reg or reg+imm12 addressing (which should
+     never occur anyway), but prevents reload from not handling the case
+     properly of a call through a pointer on a function that calls
+     vfork/setjmp, etc. due to the need to flush all of the registers to stack.  */
+  return gpr_or_int12_operand (op, mode);
+})
+
+;; Return 1 if operand is an integer constant with the bottom 16 bits
+;; clear.
+
+(define_predicate "upper_int16_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return ((INTVAL (op) & 0xffff) == 0);
+})
+
+;; Return 1 if operand is a 16-bit unsigned immediate.
+
+(define_predicate "uint16_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return IN_RANGE (INTVAL (op), 0, 0xffff);
+})
+
+;; Returns 1 if OP is either a SYMBOL_REF or a constant.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,const,const_int")
+{
+  enum rtx_code c = GET_CODE (op);
+
+  if (c == CONST)
+    {
+      /* Allow (const:SI (plus:SI (symbol_ref) (const_int))).  */
+      return GET_MODE (op) == SImode
+	&& GET_CODE (XEXP (op, 0)) == PLUS
+	&& GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT;
+    }
+
+  return c == SYMBOL_REF || c == CONST_INT;
+})
+
+;; Return true if operator is a kind of relational operator.
+
+(define_predicate "relational_operator"
+  (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu")
+{
+  return (integer_relational_operator (op, mode)
+	  || float_relational_operator (op, mode));
+})
+
+;; Return true if OP is a relational operator suitable for CCmode,
+;; CC_UNSmode or CC_NZmode.
+
+(define_predicate "integer_relational_operator"
+  (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return FALSE;
+
+  /* The allowable relations depend on the mode of the ICC register.  */
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case EQ:
+    case NE:
+    case LT:
+    case GE:
+      return (GET_MODE (XEXP (op, 0)) == CC_NZmode
+	      || GET_MODE (XEXP (op, 0)) == CCmode);
+
+    case LE:
+    case GT:
+      return GET_MODE (XEXP (op, 0)) == CCmode;
+
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      return (GET_MODE (XEXP (op, 0)) == CC_NZmode
+	      || GET_MODE (XEXP (op, 0)) == CC_UNSmode);
+    }
+})
+
+;; Return true if operator is a floating point relational operator.
+
+(define_predicate "float_relational_operator"
+  (match_code "eq,ne,le,lt,ge,gt")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case EQ: case NE:
+    case LE: case LT:
+    case GE: case GT:
+#if 0
+    case UEQ: case UNE:
+    case ULE: case ULT:
+    case UGE: case UGT:
+    case ORDERED:
+    case UNORDERED:
+#endif
+      return GET_MODE (XEXP (op, 0)) == CC_FPmode;
+    }
+})
+
+;; Return true if operator is EQ/NE of a conditional execution
+;; register.
+
+(define_predicate "ccr_eqne_operator"
+  (match_code "eq,ne")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+  rtx op0;
+  rtx op1;
+  int regno;
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case EQ:
+    case NE:
+      break;
+    }
+
+  op1 = XEXP (op, 1);
+  if (op1 != const0_rtx)
+    return FALSE;
+
+  op0 = XEXP (op, 0);
+  if (GET_CODE (op0) != REG)
+    return FALSE;
+
+  regno = REGNO (op0);
+  if (op_mode == CC_CCRmode && CR_OR_PSEUDO_P (regno))
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return true if operator is a minimum or maximum operator (both
+;; signed and unsigned).
+
+(define_predicate "minmax_operator"
+  (match_code "smin,smax,umin,umax")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case SMIN:
+    case SMAX:
+    case UMIN:
+    case UMAX:
+      break;
+    }
+
+  return TRUE;
+})
+
+;; Return true if operator is an integer binary operator that can
+;; executed conditionally and takes 1 cycle.
+
+(define_predicate "condexec_si_binary_operator"
+  (match_code "plus,minus,and,ior,xor,ashift,ashiftrt,lshiftrt")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an integer binary operator that can be
+;; executed conditionally by a media instruction.
+
+(define_predicate "condexec_si_media_operator"
+  (match_code "and,ior,xor")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case AND:
+    case IOR:
+    case XOR:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an integer division operator that can
+;; executed conditionally.
+
+(define_predicate "condexec_si_divide_operator"
+  (match_code "div,udiv")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case DIV:
+    case UDIV:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an integer unary operator that can
+;; executed conditionally.
+
+(define_predicate "condexec_si_unary_operator"
+  (match_code "not,neg")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case NEG:
+    case NOT:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is an addition or subtraction
+;; expression. Such expressions can be evaluated conditionally by
+;; floating-point instructions.
+
+(define_predicate "condexec_sf_add_operator"
+  (match_code "plus,minus")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case PLUS:
+    case MINUS:
+      return TRUE;
+    }
+})
+
+;; Return true if operator is a conversion-type expression that can be
+;; evaluated conditionally by floating-point instructions.
+
+(define_predicate "condexec_sf_conv_operator"
+  (match_code "abs,neg")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (mode != VOIDmode && op_mode != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case NEG:
+    case ABS:
+      return TRUE;
+    }
+})
+
+;; Return true if OP is an integer binary operator that can be
+;; combined with a (set ... (compare:CC_NZ ...)) pattern.
+
+(define_predicate "intop_compare_operator"
+  (match_code "plus,minus,and,ior,xor,ashift,ashiftrt,lshiftrt")
+{
+  if (mode != VOIDmode && GET_MODE (op) != mode)
+    return FALSE;
+
+  switch (GET_CODE (op))
+    {
+    default:
+      return FALSE;
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      return GET_MODE (op) == SImode;
+    }
+})
+
+;; Return 1 if operand is a register or 6-bit signed immediate.
+
+(define_predicate "fpr_or_int6_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return IN_RANGE (INTVAL (op), -32, 31);
+
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) == SUBREG)
+    {
+      if (GET_CODE (SUBREG_REG (op)) != REG)
+	return register_operand (op, mode);
+
+      op = SUBREG_REG (op);
+    }
+
+  if (GET_CODE (op) != REG)
+    return FALSE;
+
+  return FPR_OR_PSEUDO_P (REGNO (op));
+})
+
+;; Return 1 if operand is a 6-bit signed immediate.
+
+(define_predicate "int6_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return FALSE;
+
+  return IN_RANGE (INTVAL (op), -32, 31);
+})
+
+;; Return 1 if operand is a 5-bit signed immediate.
+
+(define_predicate "int5_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), -16, 15);
+})
+
+;; Return 1 if operand is a 5-bit unsigned immediate.
+
+(define_predicate "uint5_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), 0, 31);
+})
+
+;; Return 1 if operand is a 4-bit unsigned immediate.
+
+(define_predicate "uint4_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), 0, 15);
+})
+
+;; Return 1 if operand is a 1-bit unsigned immediate (0 or 1).
+
+(define_predicate "uint1_operand"
+  (match_code "const_int")
+{
+  return GET_CODE (op) == CONST_INT && IN_RANGE (INTVAL (op), 0, 1);
+})
+
+;; Return 1 if operand is a valid ACC register number.
+
+(define_predicate "acc_operand"
+  (match_code "reg,subreg")
+{
+  return ((mode == VOIDmode || mode == GET_MODE (op))
+	  && REG_P (op) && ACC_P (REGNO (op))
+	  && ((REGNO (op) - ACC_FIRST) & ~ACC_MASK) == 0);
+})
+
+;; Return 1 if operand is a valid even ACC register number.
+
+(define_predicate "even_acc_operand"
+  (match_code "reg,subreg")
+{
+  return acc_operand (op, mode) && ((REGNO (op) - ACC_FIRST) & 1) == 0;
+})
+
+;; Return 1 if operand is zero or four.
+
+(define_predicate "quad_acc_operand"
+  (match_code "reg,subreg")
+{
+  return acc_operand (op, mode) && ((REGNO (op) - ACC_FIRST) & 3) == 0;
+})
+
+;; Return 1 if operand is a valid ACCG register number.
+
+(define_predicate "accg_operand"
+  (match_code "reg,subreg")
+{
+  return ((mode == VOIDmode || mode == GET_MODE (op))
+	  && REG_P (op) && ACCG_P (REGNO (op))
+	  && ((REGNO (op) - ACCG_FIRST) & ~ACC_MASK) == 0);
+})
diff --git a/gcc/config/frv/t-frv b/gcc/config/frv/t-frv
new file mode 100644
index 000000000..0c58bb167
--- /dev/null
+++ b/gcc/config/frv/t-frv
@@ -0,0 +1,113 @@
+# Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Name of assembly file containing libgcc1 functions.
+# This entry must be present, but it can be empty if the target does
+# not need any assembler functions to support its code generation.
+#
+# Alternatively if assembler functions *are* needed then define the
+# entries below:
+CROSS_LIBGCC1	= libgcc1-asm.a
+LIB1ASMSRC	= frv/lib1funcs.asm
+LIB1ASMFUNCS	= _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov
+LIB2FUNCS_EXTRA	= cmovh.c cmovw.c cmovd.c modi.c umodi.c uitof.c uitod.c ulltof.c ulltod.c
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+# If any special flags are necessary when building libgcc2 put them here.
+TARGET_LIBGCC2_CFLAGS =
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#include "config/frv/frv-abi.h"' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#include "config/frv/frv-abi.h"' > dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+cmovh.c: $(srcdir)/config/frv/cmovh.c
+	$(LN_S) $(srcdir)/config/frv/cmovh.c .
+
+cmovw.c: $(srcdir)/config/frv/cmovw.c
+	$(LN_S) $(srcdir)/config/frv/cmovw.c .
+
+cmovd.c: $(srcdir)/config/frv/cmovd.c
+	$(LN_S) $(srcdir)/config/frv/cmovd.c .
+
+modi.c: $(srcdir)/config/frv/modi.c
+	$(LN_S) $(srcdir)/config/frv/modi.c .
+
+umodi.c: $(srcdir)/config/frv/umodi.c
+	$(LN_S) $(srcdir)/config/frv/umodi.c .
+
+uitof.c: $(srcdir)/config/frv/uitof.c
+	$(LN_S) $(srcdir)/config/frv/uitof.c .
+
+uitod.c: $(srcdir)/config/frv/uitod.c
+	$(LN_S) $(srcdir)/config/frv/uitod.c .
+
+ulltof.c: $(srcdir)/config/frv/ulltof.c
+	$(LN_S) $(srcdir)/config/frv/ulltof.c .
+
+ulltod.c: $(srcdir)/config/frv/ulltod.c
+	$(LN_S) $(srcdir)/config/frv/ulltod.c .
+
+# Build frvbegin.o and frvend.o
+EXTRA_MULTILIB_PARTS=frvbegin.o frvend.o
+
+# Compile two additional files that are linked with every program
+# linked using GCC on systems using COFF or ELF, for the sake of C++
+# constructors.
+
+FRVSTUFF_CFLAGS = $(TARGET_LIBGCC2_CFLAGS)
+
+$(T)frvbegin$(objext): $(srcdir)/config/frv/frvbegin.c $(GCC_PASSES) \
+  $(CONFIG_H) defaults.h unwind-dw2-fde.h gbl-ctors.h
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) $(FRVSTUFF_CFLAGS) \
+	  -c $(srcdir)/config/frv/frvbegin.c -o $(T)frvbegin$(objext)
+
+$(T)frvend$(objext): $(srcdir)/config/frv/frvend.c $(GCC_PASSES) \
+  $(CONFIG_H) defaults.h unwind-dw2-fde.h gbl-ctors.h
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) $(FRVSTUFF_CFLAGS) \
+	  -c $(srcdir)/config/frv/frvend.c -o $(T)frvend$(objext)
+
+# Enable the following if multilibs are needed.
+# See gcc/genmultilib, gcc/gcc.texi and gcc/tm.texi for a
+# description of the options and their values.
+#
+#MULTILIB_OPTIONS	= mcpu=fr500/mcpu=tomcat/mcpu=simple/mcpu=frv msoft-float mdword/mno-dword
+#MULTILIB_DIRNAMES	= fr500 tomcat simple frv nof dw no-dw
+#MULTILIB_MATCHES	= mcpu?simple=mcpu?fr300 mno-double=mcpu?fr500 mcpu?frv=mdouble
+#MULTILIB_EXCEPTIONS	= *mcpu=simple/*msoft-float* *mcpu=frv/*msoft-float*
+#MULTILIB_EXTRA_OPTS	= mlibrary-pic
+
+MULTILIB_OPTIONS	= mcpu=fr400/mcpu=fr550 mno-pack mlibrary-pic/mfdpic
+MULTILIB_DIRNAMES	= fr400 fr550 unpacked pic fdpic
+MULTILIB_MATCHES	= mcpu?simple=mcpu?fr300 \
+			  mlibrary-pic=multilib-library-pic \
+			  mcpu?fr400=mcpu?fr405 mcpu?fr400=mcpu?fr450
+MULTILIB_EXCEPTIONS	= mcpu=frv/mno-pack* mcpu=simple/mno-pack*
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_HEADERS = $(srcdir)/config/frv/frv-asm.h
diff --git a/gcc/config/frv/t-linux b/gcc/config/frv/t-linux
new file mode 100644
index 000000000..13eb5369e
--- /dev/null
+++ b/gcc/config/frv/t-linux
@@ -0,0 +1,33 @@
+# Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We don't want multilibs.
+MULTILIB_OPTIONS=
+MULTILIB_DIRNAMES=
+MULTILIB_MATCHES=
+MULTILIB_EXCEPTIONS=
+MULTILIB_EXTRA_OPTS=
+
+# We don't use frvbegin.o or frvend.o.
+EXTRA_MULTILIB_PARTS =
+
+CRTSTUFF_T_CFLAGS = -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
+
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \
+		 $(srcdir)/config/frv/libgcc-frv.ver
diff --git a/gcc/config/frv/uitod.c b/gcc/config/frv/uitod.c
new file mode 100644
index 000000000..14290ab6b
--- /dev/null
+++ b/gcc/config/frv/uitod.c
@@ -0,0 +1,4 @@
+double __uitod (unsigned int a)
+{
+  return a;
+}
diff --git a/gcc/config/frv/uitof.c b/gcc/config/frv/uitof.c
new file mode 100644
index 000000000..059bc7c74
--- /dev/null
+++ b/gcc/config/frv/uitof.c
@@ -0,0 +1,4 @@
+float __uitof (unsigned int a)
+{
+  return a;
+}
diff --git a/gcc/config/frv/ulltod.c b/gcc/config/frv/ulltod.c
new file mode 100644
index 000000000..e6bee1208
--- /dev/null
+++ b/gcc/config/frv/ulltod.c
@@ -0,0 +1,4 @@
+double __ulltod (unsigned long long a)
+{
+  return a;
+}
diff --git a/gcc/config/frv/ulltof.c b/gcc/config/frv/ulltof.c
new file mode 100644
index 000000000..29cdfd4d2
--- /dev/null
+++ b/gcc/config/frv/ulltof.c
@@ -0,0 +1,4 @@
+float __ulltof (unsigned long long a)
+{
+  return a;
+}
diff --git a/gcc/config/frv/umodi.c b/gcc/config/frv/umodi.c
new file mode 100644
index 000000000..4ffe5ad81
--- /dev/null
+++ b/gcc/config/frv/umodi.c
@@ -0,0 +1,4 @@
+unsigned int __umodi (unsigned int a, unsigned int b)
+{
+  return a % b;
+}
diff --git a/gcc/config/fused-madd.opt b/gcc/config/fused-madd.opt
new file mode 100644
index 000000000..b1214cbb4
--- /dev/null
+++ b/gcc/config/fused-madd.opt
@@ -0,0 +1,25 @@
+; -mfused-madd option (some targets only).
+;
+; Copyright (C) 2010
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mfused-madd
+Target Undocumented Alias(ffp-contract=, fast, off) Warn(%<-mfused-madd%> is deprecated; use %<-ffp-contract=%> instead)
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/g.opt b/gcc/config/g.opt
new file mode 100644
index 000000000..b0e30348d
--- /dev/null
+++ b/gcc/config/g.opt
@@ -0,0 +1,30 @@
+; -G option (small data, some targets only).
+
+; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+G
+Target Joined Separate UInteger Var(g_switch_value)
+-G<number>	Put global and static data smaller than <number> bytes into a special section (on some targets)
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/glibc-stdint.h b/gcc/config/glibc-stdint.h
new file mode 100644
index 000000000..4f8fe07a1
--- /dev/null
+++ b/gcc/config/glibc-stdint.h
@@ -0,0 +1,55 @@
+/* Definitions for <stdint.h> types on systems using GNU libc or uClibc.
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define INT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+#define UINT_FAST32_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define UINTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h
new file mode 100644
index 000000000..1c4e71d1b
--- /dev/null
+++ b/gcc/config/gnu-user.h
@@ -0,0 +1,97 @@
+/* Definitions for systems using, at least optionally, a GNU
+   (glibc-based) userspace or other userspace with libc derived from
+   glibc (e.g. uClibc) or for which similar specs are appropriate.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2003, 2004, 2005, 2006,
+   2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Provide a STARTFILE_SPEC appropriate for GNU userspace.  Here we add
+   the GNU userspace magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+   
+#if defined HAVE_LD_PIE
+#define GNU_USER_TARGET_STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define GNU_USER_TARGET_STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC
+
+/* Provide a ENDFILE_SPEC appropriate for GNU userspace.  Here we tack on
+   the GNU userspace magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU userspace "finalizer" file, `crtn.o'.  */
+
+#define GNU_USER_TARGET_ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#define GNU_USER_TARGET_CC1_SPEC "%{profile:-p}"
+#ifndef CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+#endif
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#define GNU_USER_TARGET_LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}}"
+#undef  LIB_SPEC
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#define TARGET_POSIX_IO
diff --git a/gcc/config/gnu-user.opt b/gcc/config/gnu-user.opt
new file mode 100644
index 000000000..7624fdfd5
--- /dev/null
+++ b/gcc/config/gnu-user.opt
@@ -0,0 +1,39 @@
+; Options for systems using gnu-user.h.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+posix
+Driver
+
+profile
+Driver
+
+pthread
+Driver
+
+rdynamic
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/gnu.h b/gcc/config/gnu.h
new file mode 100644
index 000000000..fcf6ebcd7
--- /dev/null
+++ b/gcc/config/gnu.h
@@ -0,0 +1,46 @@
+/* Configuration common to all targets running the GNU system.  */
+
+/*
+Copyright (C) 1994, 1995, 1997, 1998, 1999, 2002, 2003, 2004, 2007, 2008, 2011
+Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* Provide GCC options for standard feature-test macros.  */
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+/* Default C library spec.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{pthread:-lpthread} %{pg|p|profile:-lc_p;:-lc}"
+
+/* Standard include directory.  In GNU, "/usr" is a four-letter word.  */
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR "/include"
+
+#undef LINUX_TARGET_OS_CPP_BUILTINS
+#define LINUX_TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define ("__gnu_hurd__");	\
+	builtin_define ("__GNU__");		\
+	builtin_define_std ("unix");		\
+	builtin_define_std ("MACH");		\
+	builtin_assert ("system=gnu");		\
+	builtin_assert ("system=mach");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    } while (0)
diff --git a/gcc/config/h8300/clzhi2.c b/gcc/config/h8300/clzhi2.c
new file mode 100644
index 000000000..54db7b9c5
--- /dev/null
+++ b/gcc/config/h8300/clzhi2.c
@@ -0,0 +1,35 @@
+/* The implementation of __clzhi2.
+   Copyright (C) 2003, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+int __clzhi2 (unsigned short x);
+
+int
+__clzhi2 (unsigned short x)
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    if (x & ((unsigned short) 1 << (15 - i)))
+      break;
+  return i;
+}
diff --git a/gcc/config/h8300/crti.asm b/gcc/config/h8300/crti.asm
new file mode 100644
index 000000000..e457409a1
--- /dev/null
+++ b/gcc/config/h8300/crti.asm
@@ -0,0 +1,63 @@
+/* Copyright (C) 2001, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* The code in sections .init and .fini is supposed to be a single
+   regular function.  The function in .init is called directly from
+   start in crt0.asm.  The function in .fini is atexit()ed in crt0.asm
+   too.
+
+   crti.asm contributes the prologue of a function to these sections,
+   and crtn.asm comes up the epilogue.  STARTFILE_SPEC should list
+   crti.o before any other object files that might add code to .init
+   or .fini sections, and ENDFILE_SPEC should list crtn.o after any
+   such object files.  */
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+	.h8300hn
+#else
+	.h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+	.h8300sn
+#else
+	.h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+	.h8300sxn
+#else
+	.h8300sx
+#endif
+#endif
+
+	.section .init
+        .global  __init
+__init:
+        .section .fini
+        .global  __fini
+__fini:
diff --git a/gcc/config/h8300/crtn.asm b/gcc/config/h8300/crtn.asm
new file mode 100644
index 000000000..3115fcbc3
--- /dev/null
+++ b/gcc/config/h8300/crtn.asm
@@ -0,0 +1,53 @@
+/* Copyright (C) 2001, 2009 Free Software Foundation, Inc.
+   This file was adapted from glibc sources.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* See an explanation about .init and .fini in crti.asm.  */
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+	.h8300hn
+#else
+	.h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+	.h8300sn
+#else
+	.h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+	.h8300sxn
+#else
+	.h8300sx
+#endif
+#endif
+	.section .init
+	rts
+
+	.section .fini
+	rts
diff --git a/gcc/config/h8300/ctzhi2.c b/gcc/config/h8300/ctzhi2.c
new file mode 100644
index 000000000..ba6f8e908
--- /dev/null
+++ b/gcc/config/h8300/ctzhi2.c
@@ -0,0 +1,35 @@
+/* The implementation of __ctzhi2.
+   Copyright (C) 2003, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+int __ctzhi2 (unsigned short x);
+
+int
+__ctzhi2 (unsigned short x)
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    if (x & ((unsigned short) 1 << i))
+      break;
+  return i;
+}
diff --git a/gcc/config/h8300/elf.h b/gcc/config/h8300/elf.h
new file mode 100644
index 000000000..693381d8b
--- /dev/null
+++ b/gcc/config/h8300/elf.h
@@ -0,0 +1,44 @@
+/* Definitions of target machine for GNU compiler.
+   Renesas H8/300 version generating elf
+   Copyright (C) 2001, 2002, 2003, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_H8300_ELF_H
+#define GCC_H8300_ELF_H
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s %{pg:gcrtn.o%s}%{!pg:crtn.o%s}"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			%{pg:gcrti.o%s}%{!pg:crti.o%s} \
+			crtbegin.o%s"
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{mh:%{mn:-m h8300hnelf}} %{mh:%{!mn:-m h8300helf}} %{ms:%{mn:-m h8300snelf}} %{ms:%{!mn:-m h8300self}} %{msx:%{mn:-m h8300sxnelf;:-m h8300sxelf}}"
+
+#endif /* h8300/elf.h */
diff --git a/gcc/config/h8300/fixunssfsi.c b/gcc/config/h8300/fixunssfsi.c
new file mode 100644
index 000000000..2fe62b7a1
--- /dev/null
+++ b/gcc/config/h8300/fixunssfsi.c
@@ -0,0 +1,41 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* The libgcc2.c implementation gets confused by our type setup and creates
+   a directly recursive call, so we do our own implementation.  For
+   the H8/300, that's in lib1funcs.asm, for H8/300H and H8S, it's here.  */
+
+#ifndef __H8300__
+long __fixunssfsi (float a);
+
+long
+__fixunssfsi (float a)
+{
+  if (a >= (float) 32768L)
+    return (long) (a - 32768L) + 32768L;
+  return (long) a;
+}
+#endif
diff --git a/gcc/config/h8300/genmova.sh b/gcc/config/h8300/genmova.sh
new file mode 100644
index 000000000..59f0b4629
--- /dev/null
+++ b/gcc/config/h8300/genmova.sh
@@ -0,0 +1,198 @@
+#!/bin/sh
+# Generate mova.md, a file containing patterns that can be implemented
+# using the h8sx mova instruction.
+
+# Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+echo ";; -*- buffer-read-only: t -*-"
+echo ";; Generated automatically from genmova.sh"
+echo ";; Copyright (C) 2004, 2009 Free Software Foundation, Inc."
+echo ";;"
+echo ";; This file is part of GCC."
+echo ";;"
+echo ";; GCC is free software; you can redistribute it and/or modify"
+echo ";; it under the terms of the GNU General Public License as published by"
+echo ";; the Free Software Foundation; either version 3, or (at your option)"
+echo ";; any later version."
+echo ";;"
+echo ";; GCC is distributed in the hope that it will be useful,"
+echo ";; but WITHOUT ANY WARRANTY; without even the implied warranty of"
+echo ";; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
+echo ";; GNU General Public License for more details."
+echo ";;"
+echo ";; You should have received a copy of the GNU General Public License"
+echo ";; along with GCC; see the file COPYING3.  If not see"
+echo ";; <http://www.gnu.org/licenses/>."
+
+# Loop over modes for the source operand (the index).  Only 8-bit and
+# 16-bit indices are allowed.
+for s in QI HI; do
+
+  # Set $src to the operand syntax for this size of index.
+  case $s in
+    QI) src=%X1.b;;
+    HI) src=%T1.w;;
+  esac
+
+  # A match_operand for the source.
+  operand="(match_operand:$s 1 \"h8300_dst_operand\" \"0,rQ\")"
+
+  # Loop over the destination register's mode.  The QI and HI versions use
+  # the same instructions as the SI ones, they just ignore the upper bits
+  # of the result.
+  for d in QI HI SI; do
+
+    # If the destination is larger than the source, include a
+    # zero_extend/plus pattern.  We could also match zero extensions
+    # of memory without the plus, but it's not any smaller or faster
+    # than separate insns.
+    case $d:$s in
+      SI:QI | SI:HI | HI:QI)
+	cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	(plus:$d (zero_extend:$d $operand)
+		 (match_operand:$d 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+	;;
+    esac
+
+    # Loop over the shift amount.
+    for shift in 1 2; do
+      case $shift in
+	1) opsize=w mult=2;;
+	2) opsize=l mult=4;;
+      esac
+
+      # Calculate the mask of bits that will be nonzero after the source
+      # has been extended and shifted.
+      case $s:$shift in
+	QI:1) mask=510;;
+	QI:2) mask=1020;;
+	HI:1) mask=131070;;
+	HI:2) mask=262140;;
+      esac
+
+      # There doesn't seem to be a well-established canonical form for
+      # some of the patterns we need.  Emit both shift and multiplication
+      # patterns.
+      for form in mult ashift; do
+	case $form in
+	  mult) amount=$mult;;
+	  ashift) amount=$shift;;
+	esac
+
+	case $d:$s in
+	  # If the source and destination are the same size, we can treat
+	  # mova as a sort of multiply-add instruction.
+	  QI:QI | HI:HI)
+	    cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	(plus:$d ($form:$d $operand
+			   (const_int $amount))
+		 (match_operand:$d 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(%o2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+	    ;;
+
+	  # Handle the cases where the source is smaller than the
+	  # destination.  Sometimes combine will keep the extension,
+	  # sometimes it will use an AND.
+	  SI:QI | SI:HI | HI:QI)
+
+	    # Emit the forms that use zero_extend.
+	    cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	($form:$d (zero_extend:$d $operand)
+		  (const_int $amount)))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(0,$src),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r,r")
+	(plus:$d ($form:$d (zero_extend:$d $operand)
+			   (const_int $amount))
+		 (match_operand:$d 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(%o2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+
+	    # Now emit the forms that use AND.  When the index is a register,
+	    # these forms are effectively $d-mode operations: the index will
+	    # be a $d-mode REG or SUBREG.  When the index is a memory
+	    # location, we will have a paradoxical subreg such as:
+	    #
+	    #	(and:SI (mult:SI (subreg:SI (mem:QI ...) 0)
+	    #			 (const_int 4))
+	    #		(const_int 1020))
+	    #
+	    # Match the two case separately: a $d-mode register_operand
+	    # or a $d-mode subreg of an $s-mode memory_operand.  Match the
+	    # memory form first since register_operand accepts mem subregs
+	    # before reload.
+	    memory="(match_operand:$s 1 \"memory_operand\" \"m\")"
+	    memory="(subreg:$d $memory 0)"
+	    register="(match_operand:$d 1 \"register_operand\" \"0\")"
+	    for paradoxical in "$memory" "$register"; do
+	      cat <<EOF
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r")
+	(and:$d ($form:$d $paradoxical
+			  (const_int $amount))
+		(const_int $mask)))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(0,$src),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:$d 0 "register_operand" "=r")
+	(plus:$d (and:$d ($form:$d $paradoxical
+				   (const_int $amount))
+			 (const_int $mask))
+		 (match_operand:$d 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/$opsize.l @(%o2,$src),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+EOF
+	      done
+	    ;;
+	esac
+      done
+    done
+  done
+done
diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
new file mode 100644
index 000000000..1211c6389
--- /dev/null
+++ b/gcc/config/h8300/h8300-protos.h
@@ -0,0 +1,122 @@
+/* Definitions of target machine for GNU compiler.
+   Renesas H8/300 version
+   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_H8300_PROTOS_H
+#define GCC_H8300_PROTOS_H
+
+/* Declarations for functions used in insn-output.c.  */
+#ifdef RTX_CODE
+extern unsigned int compute_mov_length (rtx *);
+extern const char *output_plussi (rtx *);
+extern unsigned int compute_plussi_length (rtx *);
+extern const char *output_a_shift (rtx *);
+extern unsigned int compute_a_shift_length (rtx, rtx *);
+extern const char *output_a_rotate (enum rtx_code, rtx *);
+extern unsigned int compute_a_rotate_length (rtx *);
+extern const char *output_simode_bld (int, rtx[]);
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern void final_prescan_insn (rtx, rtx *, int);
+extern int h8300_expand_movsi (rtx[]);
+extern void notice_update_cc (rtx, rtx);
+extern const char *output_logical_op (enum machine_mode, rtx *);
+extern unsigned int compute_logical_op_length (enum machine_mode,
+					       rtx *);
+#ifdef HAVE_ATTR_cc
+extern enum attr_cc compute_plussi_cc (rtx *);
+extern enum attr_cc compute_a_shift_cc (rtx, rtx *);
+extern enum attr_cc compute_logical_op_cc (enum machine_mode, rtx *);
+#endif
+extern void h8300_expand_branch (rtx[]);
+extern void h8300_expand_store (rtx[]);
+extern bool expand_a_shift (enum machine_mode, enum rtx_code, rtx[]);
+extern int h8300_shift_needs_scratch_p (int, enum machine_mode);
+extern int expand_a_rotate (rtx[]);
+extern int fix_bit_operand (rtx *, enum rtx_code);
+extern int h8300_adjust_insn_length (rtx, int);
+extern void split_adds_subs (enum machine_mode, rtx[]);
+
+extern int h8300_eightbit_constant_address_p (rtx);
+extern int h8300_tiny_constant_address_p (rtx);
+extern int byte_accesses_mergeable_p (rtx, rtx);
+extern int same_cmp_preceding_p (rtx);
+extern int same_cmp_following_p (rtx);
+
+extern int h8300_legitimate_constant_p (rtx);
+
+/* Used in builtins.c */
+extern rtx h8300_return_addr_rtx (int, rtx);
+
+/* Classifies an h8sx shift operation.
+
+   H8SX_SHIFT_NONE
+	The shift cannot be done in a single instruction.
+
+   H8SX_SHIFT_UNARY
+	The shift is effectively a unary operation.  The instruction will
+	allow any sort of destination operand and have a format similar
+	to neg and not.  This is true of certain power-of-2 shifts.
+
+   H8SX_SHIFT_BINARY
+	The shift is a binary operation.  The destination must be a
+	register and the source can be a register or a constant.  */
+enum h8sx_shift_type {
+  H8SX_SHIFT_NONE,
+  H8SX_SHIFT_UNARY,
+  H8SX_SHIFT_BINARY
+};
+
+extern enum h8sx_shift_type h8sx_classify_shift (enum machine_mode, enum rtx_code, rtx);
+extern int h8300_ldm_stm_parallel (rtvec, int, int);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern int h8300_funcvec_function_p (tree);
+extern int h8300_eightbit_data_p (tree);
+extern int h8300_tiny_data_p (tree);
+#endif /* TREE_CODE */
+
+extern int h8300_can_use_return_insn_p (void);
+extern void h8300_expand_prologue (void);
+extern void h8300_expand_epilogue (void);
+extern int h8300_current_function_interrupt_function_p (void);
+extern int h8300_initial_elimination_offset (int, int);
+extern int h8300_regs_ok_for_stm (int, rtx[]);
+extern int h8300_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int h8300_hard_regno_nregs (int, enum machine_mode);
+extern int h8300_hard_regno_mode_ok (int, enum machine_mode);
+
+struct cpp_reader;
+extern void h8300_pr_interrupt (struct cpp_reader *);
+extern void h8300_pr_saveall (struct cpp_reader *);
+extern enum reg_class  h8300_reg_class_from_letter (int);
+extern rtx             h8300_get_index (rtx, enum machine_mode mode, int *);
+extern unsigned int    h8300_insn_length_from_table (rtx, rtx *);
+extern const char *    output_h8sx_shift (rtx *, int, int);
+extern bool            h8300_operands_match_p (rtx *);
+extern bool            h8sx_mergeable_memrefs_p (rtx, rtx);
+extern bool            h8sx_emit_movmd (rtx, rtx, rtx, HOST_WIDE_INT);
+extern void            h8300_swap_into_er6 (rtx);
+extern void            h8300_swap_out_of_er6 (rtx);
+
+#endif /* ! GCC_H8300_PROTOS_H */
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
new file mode 100644
index 000000000..55acfff5c
--- /dev/null
+++ b/gcc/config/h8300/h8300.c
@@ -0,0 +1,5980 @@
+/* Subroutines for insn-output.c for Renesas H8/300.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "function.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "c-family/c-pragma.h"	/* ??? */
+#include "tm_p.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* Classifies a h8300_src_operand or h8300_dst_operand.
+
+   H8OP_IMMEDIATE
+	A constant operand of some sort.
+
+   H8OP_REGISTER
+	An ordinary register.
+
+   H8OP_MEM_ABSOLUTE
+	A memory reference with a constant address.
+
+   H8OP_MEM_BASE
+	A memory reference with a register as its address.
+
+   H8OP_MEM_COMPLEX
+	Some other kind of memory reference.  */
+enum h8300_operand_class
+{
+  H8OP_IMMEDIATE,
+  H8OP_REGISTER,
+  H8OP_MEM_ABSOLUTE,
+  H8OP_MEM_BASE,
+  H8OP_MEM_COMPLEX,
+  NUM_H8OPS
+};
+
+/* For a general two-operand instruction, element [X][Y] gives
+   the length of the opcode fields when the first operand has class
+   (X + 1) and the second has class Y.  */
+typedef unsigned char h8300_length_table[NUM_H8OPS - 1][NUM_H8OPS];
+
+/* Forward declarations.  */
+static const char *byte_reg (rtx, int);
+static int h8300_interrupt_function_p (tree);
+static int h8300_saveall_function_p (tree);
+static int h8300_monitor_function_p (tree);
+static int h8300_os_task_function_p (tree);
+static void h8300_emit_stack_adjustment (int, HOST_WIDE_INT, bool);
+static HOST_WIDE_INT round_frame_size (HOST_WIDE_INT);
+static unsigned int compute_saved_regs (void);
+static void push (int);
+static void pop (int);
+static const char *cond_string (enum rtx_code);
+static unsigned int h8300_asm_insn_count (const char *);
+static tree h8300_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree h8300_handle_eightbit_data_attribute (tree *, tree, tree, int, bool *);
+static tree h8300_handle_tiny_data_attribute (tree *, tree, tree, int, bool *);
+#ifndef OBJECT_FORMAT_ELF
+static void h8300_asm_named_section (const char *, unsigned int, tree);
+#endif
+static int h8300_and_costs (rtx);
+static int h8300_shift_costs (rtx);
+static void          h8300_push_pop               (int, int, bool, bool);
+static int           h8300_stack_offset_p         (rtx, int);
+static int           h8300_ldm_stm_regno          (rtx, int, int, int);
+static void          h8300_reorg                  (void);
+static unsigned int  h8300_constant_length        (rtx);
+static unsigned int  h8300_displacement_length    (rtx, int);
+static unsigned int  h8300_classify_operand       (rtx, int, enum h8300_operand_class *);
+static unsigned int  h8300_length_from_table      (rtx, rtx, const h8300_length_table *);
+static unsigned int  h8300_unary_length           (rtx);
+static unsigned int  h8300_short_immediate_length (rtx);
+static unsigned int  h8300_bitfield_length        (rtx, rtx);
+static unsigned int  h8300_binary_length          (rtx, const h8300_length_table *);
+static bool          h8300_short_move_mem_p       (rtx, enum rtx_code);
+static unsigned int  h8300_move_length            (rtx *, const h8300_length_table *);
+static bool	     h8300_hard_regno_scratch_ok  (unsigned int);
+
+/* CPU_TYPE, says what cpu we're compiling for.  */
+int cpu_type;
+
+/* True if a #pragma interrupt has been seen for the current function.  */
+static int pragma_interrupt;
+
+/* True if a #pragma saveall has been seen for the current function.  */
+static int pragma_saveall;
+
+static const char *const names_big[] =
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7" };
+
+static const char *const names_extended[] =
+{ "er0", "er1", "er2", "er3", "er4", "er5", "er6", "er7" };
+
+static const char *const names_upper_extended[] =
+{ "e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7" };
+
+/* Points to one of the above.  */
+/* ??? The above could be put in an array indexed by CPU_TYPE.  */
+const char * const *h8_reg_names;
+
+/* Various operations needed by the following, indexed by CPU_TYPE.  */
+
+const char *h8_push_op, *h8_pop_op, *h8_mov_op;
+
+/* Value of MOVE_RATIO.  */
+int h8300_move_ratio;
+
+/* See below where shifts are handled for explanation of this enum.  */
+
+enum shift_alg
+{
+  SHIFT_INLINE,
+  SHIFT_ROT_AND,
+  SHIFT_SPECIAL,
+  SHIFT_LOOP
+};
+
+/* Symbols of the various shifts which can be used as indices.  */
+
+enum shift_type
+{
+  SHIFT_ASHIFT, SHIFT_LSHIFTRT, SHIFT_ASHIFTRT
+};
+
+/* Macros to keep the shift algorithm tables small.  */
+#define INL SHIFT_INLINE
+#define ROT SHIFT_ROT_AND
+#define LOP SHIFT_LOOP
+#define SPC SHIFT_SPECIAL
+
+/* The shift algorithms for each machine, mode, shift type, and shift
+   count are defined below.  The three tables below correspond to
+   QImode, HImode, and SImode, respectively.  Each table is organized
+   by, in the order of indices, machine, shift type, and shift count.  */
+
+static enum shift_alg shift_alg_qi[3][3][8] = {
+  {
+    /* TARGET_H8300  */
+    /* 0    1    2    3    4    5    6    7  */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC }  /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300H  */
+    /* 0    1    2    3    4    5    6    7  */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC }  /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300S  */
+    /*  0    1    2    3    4    5    6    7  */
+    { INL, INL, INL, INL, INL, INL, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, SPC }  /* SHIFT_ASHIFTRT */
+  }
+};
+
+static enum shift_alg shift_alg_hi[3][3][16] = {
+  {
+    /* TARGET_H8300  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300H  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300S  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      SPC, SPC, SPC, SPC, SPC, ROT, ROT, ROT }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      SPC, SPC, SPC, SPC, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFTRT */
+  }
+};
+
+static enum shift_alg shift_alg_si[3][3][32] = {
+  {
+    /* TARGET_H8300  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    /* 16   17   18   19   20   21   22   23  */
+    /* 24   25   26   27   28   29   30   31  */
+    { INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, LOP, LOP, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, LOP, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300H  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    /* 16   17   18   19   20   21   22   23  */
+    /* 24   25   26   27   28   29   30   31  */
+    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, LOP, LOP, LOP, LOP,
+      SPC, LOP, LOP, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+  },
+  {
+    /* TARGET_H8300S  */
+    /*  0    1    2    3    4    5    6    7  */
+    /*  8    9   10   11   12   13   14   15  */
+    /* 16   17   18   19   20   21   22   23  */
+    /* 24   25   26   27   28   29   30   31  */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      INL, INL, INL, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, LOP, LOP,
+      SPC, SPC, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_ASHIFT   */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      INL, INL, INL, LOP, LOP, LOP, LOP, SPC,
+      SPC, SPC, SPC, SPC, SPC, SPC, LOP, LOP,
+      SPC, SPC, LOP, LOP, SPC, SPC, SPC, SPC }, /* SHIFT_LSHIFTRT */
+    { INL, INL, INL, INL, INL, INL, INL, INL,
+      INL, INL, INL, LOP, LOP, LOP, LOP, LOP,
+      SPC, SPC, SPC, SPC, SPC, SPC, LOP, LOP,
+      SPC, SPC, LOP, LOP, LOP, LOP, LOP, SPC }, /* SHIFT_ASHIFTRT */
+  }
+};
+
+#undef INL
+#undef ROT
+#undef LOP
+#undef SPC
+
+enum h8_cpu
+{
+  H8_300,
+  H8_300H,
+  H8_S
+};
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options h8300_option_optimization_table[] =
+  {
+    /* Basic block reordering is only beneficial on targets with cache
+       and/or variable-cycle branches where (cycle count taken !=
+       cycle count not taken).  */
+    { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize various cpu specific globals at start up.  */
+
+static void
+h8300_option_override (void)
+{
+  static const char *const h8_push_ops[2] = { "push" , "push.l" };
+  static const char *const h8_pop_ops[2]  = { "pop"  , "pop.l"  };
+  static const char *const h8_mov_ops[2]  = { "mov.w", "mov.l"  };
+
+  if (TARGET_H8300)
+    {
+      cpu_type = (int) CPU_H8300;
+      h8_reg_names = names_big;
+    }
+  else
+    {
+      /* For this we treat the H8/300H and H8S the same.  */
+      cpu_type = (int) CPU_H8300H;
+      h8_reg_names = names_extended;
+    }
+  h8_push_op = h8_push_ops[cpu_type];
+  h8_pop_op = h8_pop_ops[cpu_type];
+  h8_mov_op = h8_mov_ops[cpu_type];
+
+  if (!TARGET_H8300S && TARGET_MAC)
+    {
+      error ("-ms2600 is used without -ms");
+      target_flags |= MASK_H8300S_1;
+    }
+
+  if (TARGET_H8300 && TARGET_NORMAL_MODE)
+    {
+      error ("-mn is used without -mh or -ms");
+      target_flags ^= MASK_NORMAL_MODE;
+    }
+
+  /* Some of the shifts are optimized for speed by default.
+     See http://gcc.gnu.org/ml/gcc-patches/2002-07/msg01858.html
+     If optimizing for size, change shift_alg for those shift to
+     SHIFT_LOOP.  */
+  if (optimize_size)
+    {
+      /* H8/300 */
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][6] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFT][14] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300][SHIFT_LSHIFTRT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_LSHIFTRT][14] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300][SHIFT_ASHIFTRT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
+
+      /* H8/300H */
+      shift_alg_hi[H8_300H][SHIFT_ASHIFT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFT][6] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300H][SHIFT_LSHIFTRT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_LSHIFTRT][6] = SHIFT_LOOP;
+
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][5] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][6] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][13] = SHIFT_LOOP;
+      shift_alg_hi[H8_300H][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
+
+      /* H8S */
+      shift_alg_hi[H8_S][SHIFT_ASHIFTRT][14] = SHIFT_LOOP;
+    }
+
+  /* Work out a value for MOVE_RATIO.  */
+  if (!TARGET_H8300SX)
+    {
+      /* Memory-memory moves are quite expensive without the
+	 h8sx instructions.  */
+      h8300_move_ratio = 3;
+    }
+  else if (flag_omit_frame_pointer)
+    {
+      /* movmd sequences are fairly cheap when er6 isn't fixed.  They can
+	 sometimes be as short as two individual memory-to-memory moves,
+	 but since they use all the call-saved registers, it seems better
+	 to allow up to three moves here.  */
+      h8300_move_ratio = 4;
+    }
+  else if (optimize_size)
+    {
+      /* In this case we don't use movmd sequences since they tend
+	 to be longer than calls to memcpy().  Memory-to-memory
+	 moves are cheaper than for !TARGET_H8300SX, so it makes
+	 sense to have a slightly higher threshold.  */
+      h8300_move_ratio = 4;
+    }
+  else
+    {
+      /* We use movmd sequences for some moves since it can be quicker
+	 than calling memcpy().  The sequences will need to save and
+	 restore er6 though, so bump up the cost.  */
+      h8300_move_ratio = 6;
+    }
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+}
+
+/* Implement REG_CLASS_FROM_LETTER.
+
+   Some patterns need to use er6 as a scratch register.  This is
+   difficult to arrange since er6 is the frame pointer and usually
+   can't be spilled.
+
+   Such patterns should define two alternatives, one which allows only
+   er6 and one which allows any general register.  The former alternative
+   should have a 'd' constraint while the latter should be disparaged and
+   use 'D'.
+
+   Normally, 'd' maps to DESTINATION_REGS and 'D' maps to GENERAL_REGS.
+   However, there are cases where they should be NO_REGS:
+
+     - 'd' should be NO_REGS when reloading a function that uses the
+       frame pointer.  In this case, DESTINATION_REGS won't contain any
+       spillable registers, so the first alternative can't be used.
+
+     - -fno-omit-frame-pointer means that the frame pointer will
+       always be in use.  It's therefore better to map 'd' to NO_REGS
+       before reload so that register allocator will pick the second
+       alternative.
+
+     - we would like 'D' to be be NO_REGS when the frame pointer isn't
+       live, but we the frame pointer may turn out to be needed after
+       we start reload, and then we may have already decided we don't
+       have a choice, so we can't do that.  Forcing the register
+       allocator to use er6 if possible might produce better code for
+       small functions: it's more efficient to save and restore er6 in
+       the prologue & epilogue than to do it in a define_split.
+       Hopefully disparaging 'D' will have a similar effect, without
+       forcing a reload failure if the frame pointer is found to be
+       needed too late.  */
+
+enum reg_class
+h8300_reg_class_from_letter (int c)
+{
+  switch (c)
+    {
+    case 'a':
+      return MAC_REGS;
+
+    case 'c':
+      return COUNTER_REGS;
+
+    case 'd':
+      if (!flag_omit_frame_pointer && !reload_completed)
+	return NO_REGS;
+      if (frame_pointer_needed && reload_in_progress)
+	return NO_REGS;
+      return DESTINATION_REGS;
+
+    case 'D':
+      /* The meaning of a constraint shouldn't change dynamically, so
+	 we can't make this NO_REGS.  */
+      return GENERAL_REGS;
+
+    case 'f':
+      return SOURCE_REGS;
+
+    default:
+      return NO_REGS;
+    }
+}
+
+/* Return the byte register name for a register rtx X.  B should be 0
+   if you want a lower byte register.  B should be 1 if you want an
+   upper byte register.  */
+
+static const char *
+byte_reg (rtx x, int b)
+{
+  static const char *const names_small[] = {
+    "r0l", "r0h", "r1l", "r1h", "r2l", "r2h", "r3l", "r3h",
+    "r4l", "r4h", "r5l", "r5h", "r6l", "r6h", "r7l", "r7h"
+  };
+
+  gcc_assert (REG_P (x));
+
+  return names_small[REGNO (x) * 2 + b];
+}
+
+/* REGNO must be saved/restored across calls if this macro is true.  */
+
+#define WORD_REG_USED(regno)						\
+  (regno < SP_REG							\
+   /* No need to save registers if this function will not return.  */	\
+   && ! TREE_THIS_VOLATILE (current_function_decl)			\
+   && (h8300_saveall_function_p (current_function_decl)			\
+       /* Save any call saved register that was used.  */		\
+       || (df_regs_ever_live_p (regno) && !call_used_regs[regno])	\
+       /* Save the frame pointer if it was used.  */			\
+       || (regno == HARD_FRAME_POINTER_REGNUM && df_regs_ever_live_p (regno)) \
+       /* Save any register used in an interrupt handler.  */		\
+       || (h8300_current_function_interrupt_function_p ()		\
+	   && df_regs_ever_live_p (regno))				\
+       /* Save call clobbered registers in non-leaf interrupt		\
+	  handlers.  */							\
+       || (h8300_current_function_interrupt_function_p ()		\
+	   && call_used_regs[regno]					\
+	   && !current_function_is_leaf)))
+
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x, bool set_it)
+{
+  if (set_it)
+    RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Mark all the subexpressions of the PARALLEL rtx PAR as
+   frame-related.  Return PAR.
+
+   dwarf2out.c:dwarf2out_frame_debug_expr ignores sub-expressions of a
+   PARALLEL rtx other than the first if they do not have the
+   FRAME_RELATED flag set on them.  */
+static rtx
+Fpa (rtx par)
+{
+  int len = XVECLEN (par, 0);
+  int i;
+
+  for (i = 0; i < len; i++)
+    F (XVECEXP (par, 0, i), true);
+
+  return par;
+}
+
+/* Output assembly language to FILE for the operation OP with operand size
+   SIZE to adjust the stack pointer.  */
+
+static void
+h8300_emit_stack_adjustment (int sign, HOST_WIDE_INT size, bool in_prologue)
+{
+  /* If the frame size is 0, we don't have anything to do.  */
+  if (size == 0)
+    return;
+
+  /* H8/300 cannot add/subtract a large constant with a single
+     instruction.  If a temporary register is available, load the
+     constant to it and then do the addition.  */
+  if (TARGET_H8300
+      && size > 4
+      && !h8300_current_function_interrupt_function_p ()
+      && !(cfun->static_chain_decl != NULL && sign < 0))
+    {
+      rtx r3 = gen_rtx_REG (Pmode, 3);
+      F (emit_insn (gen_movhi (r3, GEN_INT (sign * size))), in_prologue);
+      F (emit_insn (gen_addhi3 (stack_pointer_rtx,
+				stack_pointer_rtx, r3)), in_prologue);
+    }
+  else
+    {
+      /* The stack adjustment made here is further optimized by the
+	 splitter.  In case of H8/300, the splitter always splits the
+	 addition emitted here to make the adjustment interrupt-safe.
+	 FIXME: We don't always tag those, because we don't know what
+	 the splitter will do.  */
+      if (Pmode == HImode)
+	{
+	  rtx x = emit_insn (gen_addhi3 (stack_pointer_rtx,
+					 stack_pointer_rtx, GEN_INT (sign * size)));
+	  if (size < 4)
+	    F (x, in_prologue);
+	}
+      else
+	F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				  stack_pointer_rtx, GEN_INT (sign * size))), in_prologue);
+    }
+}
+
+/* Round up frame size SIZE.  */
+
+static HOST_WIDE_INT
+round_frame_size (HOST_WIDE_INT size)
+{
+  return ((size + STACK_BOUNDARY / BITS_PER_UNIT - 1)
+	  & -STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Compute which registers to push/pop.
+   Return a bit vector of registers.  */
+
+static unsigned int
+compute_saved_regs (void)
+{
+  unsigned int saved_regs = 0;
+  int regno;
+
+  /* Construct a bit vector of registers to be pushed/popped.  */
+  for (regno = 0; regno <= HARD_FRAME_POINTER_REGNUM; regno++)
+    {
+      if (WORD_REG_USED (regno))
+	saved_regs |= 1 << regno;
+    }
+
+  /* Don't push/pop the frame pointer as it is treated separately.  */
+  if (frame_pointer_needed)
+    saved_regs &= ~(1 << HARD_FRAME_POINTER_REGNUM);
+
+  return saved_regs;
+}
+
+/* Emit an insn to push register RN.  */
+
+static void
+push (int rn)
+{
+  rtx reg = gen_rtx_REG (word_mode, rn);
+  rtx x;
+
+  if (TARGET_H8300)
+    x = gen_push_h8300 (reg);
+  else if (!TARGET_NORMAL_MODE)
+    x = gen_push_h8300hs_advanced (reg);
+  else
+    x = gen_push_h8300hs_normal (reg);
+  x = F (emit_insn (x), true);
+  add_reg_note (x, REG_INC, stack_pointer_rtx);
+}
+
+/* Emit an insn to pop register RN.  */
+
+static void
+pop (int rn)
+{
+  rtx reg = gen_rtx_REG (word_mode, rn);
+  rtx x;
+
+  if (TARGET_H8300)
+    x = gen_pop_h8300 (reg);
+  else if (!TARGET_NORMAL_MODE)
+    x = gen_pop_h8300hs_advanced (reg);
+  else
+    x = gen_pop_h8300hs_normal (reg);
+  x = emit_insn (x);
+  add_reg_note (x, REG_INC, stack_pointer_rtx);
+}
+
+/* Emit an instruction to push or pop NREGS consecutive registers
+   starting at register REGNO.  POP_P selects a pop rather than a
+   push and RETURN_P is true if the instruction should return.
+
+   It must be possible to do the requested operation in a single
+   instruction.  If NREGS == 1 && !RETURN_P, use a normal push
+   or pop insn.  Otherwise emit a parallel of the form:
+
+     (parallel
+       [(return)  ;; if RETURN_P
+	(save or restore REGNO)
+	(save or restore REGNO + 1)
+	...
+	(save or restore REGNO + NREGS - 1)
+	(set sp (plus sp (const_int adjust)))]  */
+
+static void
+h8300_push_pop (int regno, int nregs, bool pop_p, bool return_p)
+{
+  int i, j;
+  rtvec vec;
+  rtx sp, offset, x;
+
+  /* See whether we can use a simple push or pop.  */
+  if (!return_p && nregs == 1)
+    {
+      if (pop_p)
+	pop (regno);
+      else
+	push (regno);
+      return;
+    }
+
+  /* We need one element for the return insn, if present, one for each
+     register, and one for stack adjustment.  */
+  vec = rtvec_alloc ((return_p ? 1 : 0) + nregs + 1);
+  sp = stack_pointer_rtx;
+  i = 0;
+
+  /* Add the return instruction.  */
+  if (return_p)
+    {
+      RTVEC_ELT (vec, i) = gen_rtx_RETURN (VOIDmode);
+      i++;
+    }
+
+  /* Add the register moves.  */
+  for (j = 0; j < nregs; j++)
+    {
+      rtx lhs, rhs;
+
+      if (pop_p)
+	{
+	  /* Register REGNO + NREGS - 1 is popped first.  Before the
+	     stack adjustment, its slot is at address @sp.  */
+	  lhs = gen_rtx_REG (SImode, regno + j);
+	  rhs = gen_rtx_MEM (SImode, plus_constant (sp, (nregs - j - 1) * 4));
+	}
+      else
+	{
+	  /* Register REGNO is pushed first and will be stored at @(-4,sp).  */
+	  lhs = gen_rtx_MEM (SImode, plus_constant (sp, (j + 1) * -4));
+	  rhs = gen_rtx_REG (SImode, regno + j);
+	}
+      RTVEC_ELT (vec, i + j) = gen_rtx_SET (VOIDmode, lhs, rhs);
+    }
+
+  /* Add the stack adjustment.  */
+  offset = GEN_INT ((pop_p ? nregs : -nregs) * 4);
+  RTVEC_ELT (vec, i + j) = gen_rtx_SET (VOIDmode, sp,
+					gen_rtx_PLUS (Pmode, sp, offset));
+
+  x = gen_rtx_PARALLEL (VOIDmode, vec);
+  if (!pop_p)
+    x = Fpa (x);
+
+  if (return_p)
+    emit_jump_insn (x);
+  else
+    emit_insn (x);
+}
+
+/* Return true if X has the value sp + OFFSET.  */
+
+static int
+h8300_stack_offset_p (rtx x, int offset)
+{
+  if (offset == 0)
+    return x == stack_pointer_rtx;
+
+  return (GET_CODE (x) == PLUS
+	  && XEXP (x, 0) == stack_pointer_rtx
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) == offset);
+}
+
+/* A subroutine of h8300_ldm_stm_parallel.  X is one pattern in
+   something that may be an ldm or stm instruction.  If it fits
+   the required template, return the register it loads or stores,
+   otherwise return -1.
+
+   LOAD_P is true if X should be a load, false if it should be a store.
+   NREGS is the number of registers that the whole instruction is expected
+   to load or store.  INDEX is the index of the register that X should
+   load or store, relative to the lowest-numbered register.  */
+
+static int
+h8300_ldm_stm_regno (rtx x, int load_p, int index, int nregs)
+{
+  int regindex, memindex, offset;
+
+  if (load_p)
+    regindex = 0, memindex = 1, offset = (nregs - index - 1) * 4;
+  else
+    memindex = 0, regindex = 1, offset = (index + 1) * -4;
+
+  if (GET_CODE (x) == SET
+      && GET_CODE (XEXP (x, regindex)) == REG
+      && GET_CODE (XEXP (x, memindex)) == MEM
+      && h8300_stack_offset_p (XEXP (XEXP (x, memindex), 0), offset))
+    return REGNO (XEXP (x, regindex));
+
+  return -1;
+}
+
+/* Return true if the elements of VEC starting at FIRST describe an
+   ldm or stm instruction (LOAD_P says which).  */
+
+int
+h8300_ldm_stm_parallel (rtvec vec, int load_p, int first)
+{
+  rtx last;
+  int nregs, i, regno, adjust;
+
+  /* There must be a stack adjustment, a register move, and at least one
+     other operation (a return or another register move).  */
+  if (GET_NUM_ELEM (vec) < 3)
+    return false;
+
+  /* Get the range of registers to be pushed or popped.  */
+  nregs = GET_NUM_ELEM (vec) - first - 1;
+  regno = h8300_ldm_stm_regno (RTVEC_ELT (vec, first), load_p, 0, nregs);
+
+  /* Check that the call to h8300_ldm_stm_regno succeeded and
+     that we're only dealing with GPRs.  */
+  if (regno < 0 || regno + nregs > 8)
+    return false;
+
+  /* 2-register h8s instructions must start with an even-numbered register.
+     3- and 4-register instructions must start with er0 or er4.  */
+  if (!TARGET_H8300SX)
+    {
+      if ((regno & 1) != 0)
+	return false;
+      if (nregs > 2 && (regno & 3) != 0)
+	return false;
+    }
+
+  /* Check the other loads or stores.  */
+  for (i = 1; i < nregs; i++)
+    if (h8300_ldm_stm_regno (RTVEC_ELT (vec, first + i), load_p, i, nregs)
+	!= regno + i)
+      return false;
+
+  /* Check the stack adjustment.  */
+  last = RTVEC_ELT (vec, first + nregs);
+  adjust = (load_p ? nregs : -nregs) * 4;
+  return (GET_CODE (last) == SET
+	  && SET_DEST (last) == stack_pointer_rtx
+	  && h8300_stack_offset_p (SET_SRC (last), adjust));
+}
+
+/* This is what the stack looks like after the prolog of
+   a function with a frame has been set up:
+
+   <args>
+   PC
+   FP			<- fp
+   <locals>
+   <saved registers>	<- sp
+
+   This is what the stack looks like after the prolog of
+   a function which doesn't have a frame:
+
+   <args>
+   PC
+   <locals>
+   <saved registers>	<- sp
+*/
+
+/* Generate RTL code for the function prologue.  */
+
+void
+h8300_expand_prologue (void)
+{
+  int regno;
+  int saved_regs;
+  int n_regs;
+
+  /* If the current function has the OS_Task attribute set, then
+     we have a naked prologue.  */
+  if (h8300_os_task_function_p (current_function_decl))
+    return;
+
+  if (h8300_monitor_function_p (current_function_decl))
+    /* My understanding of monitor functions is they act just like
+       interrupt functions, except the prologue must mask
+       interrupts.  */
+    emit_insn (gen_monitor_prologue ());
+
+  if (frame_pointer_needed)
+    {
+      /* Push fp.  */
+      push (HARD_FRAME_POINTER_REGNUM);
+      F (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx), true);
+    }
+
+  /* Push the rest of the registers in ascending order.  */
+  saved_regs = compute_saved_regs ();
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno += n_regs)
+    {
+      n_regs = 1;
+      if (saved_regs & (1 << regno))
+	{
+	  if (TARGET_H8300S)
+	    {
+	      /* See how many registers we can push at the same time.  */
+	      if ((!TARGET_H8300SX || (regno & 3) == 0)
+		  && ((saved_regs >> regno) & 0x0f) == 0x0f)
+		n_regs = 4;
+
+	      else if ((!TARGET_H8300SX || (regno & 3) == 0)
+		       && ((saved_regs >> regno) & 0x07) == 0x07)
+		n_regs = 3;
+
+	      else if ((!TARGET_H8300SX || (regno & 1) == 0)
+		       && ((saved_regs >> regno) & 0x03) == 0x03)
+		n_regs = 2;
+	    }
+
+	  h8300_push_pop (regno, n_regs, false, false);
+	}
+    }
+
+  /* Leave room for locals.  */
+  h8300_emit_stack_adjustment (-1, round_frame_size (get_frame_size ()), true);
+}
+
+/* Return nonzero if we can use "rts" for the function currently being
+   compiled.  */
+
+int
+h8300_can_use_return_insn_p (void)
+{
+  return (reload_completed
+	  && !frame_pointer_needed
+	  && get_frame_size () == 0
+	  && compute_saved_regs () == 0);
+}
+
+/* Generate RTL code for the function epilogue.  */
+
+void
+h8300_expand_epilogue (void)
+{
+  int regno;
+  int saved_regs;
+  int n_regs;
+  HOST_WIDE_INT frame_size;
+  bool returned_p;
+
+  if (h8300_os_task_function_p (current_function_decl))
+    /* OS_Task epilogues are nearly naked -- they just have an
+       rts instruction.  */
+    return;
+
+  frame_size = round_frame_size (get_frame_size ());
+  returned_p = false;
+
+  /* Deallocate locals.  */
+  h8300_emit_stack_adjustment (1, frame_size, false);
+
+  /* Pop the saved registers in descending order.  */
+  saved_regs = compute_saved_regs ();
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno -= n_regs)
+    {
+      n_regs = 1;
+      if (saved_regs & (1 << regno))
+	{
+	  if (TARGET_H8300S)
+	    {
+	      /* See how many registers we can pop at the same time.  */
+	      if ((TARGET_H8300SX || (regno & 3) == 3)
+		  && ((saved_regs << 3 >> regno) & 0x0f) == 0x0f)
+		n_regs = 4;
+
+	      else if ((TARGET_H8300SX || (regno & 3) == 2)
+		       && ((saved_regs << 2 >> regno) & 0x07) == 0x07)
+		n_regs = 3;
+
+	      else if ((TARGET_H8300SX || (regno & 1) == 1)
+		       && ((saved_regs << 1 >> regno) & 0x03) == 0x03)
+		n_regs = 2;
+	    }
+
+	  /* See if this pop would be the last insn before the return.
+	     If so, use rte/l or rts/l instead of pop or ldm.l.  */
+	  if (TARGET_H8300SX
+	      && !frame_pointer_needed
+	      && frame_size == 0
+	      && (saved_regs & ((1 << (regno - n_regs + 1)) - 1)) == 0)
+	    returned_p = true;
+
+	  h8300_push_pop (regno - n_regs + 1, n_regs, true, returned_p);
+	}
+    }
+
+  /* Pop frame pointer if we had one.  */
+  if (frame_pointer_needed)
+    {
+      if (TARGET_H8300SX)
+	returned_p = true;
+      h8300_push_pop (HARD_FRAME_POINTER_REGNUM, 1, true, returned_p);
+    }
+
+  if (!returned_p)
+    emit_jump_insn (gen_rtx_RETURN (VOIDmode));
+}
+
+/* Return nonzero if the current function is an interrupt
+   function.  */
+
+int
+h8300_current_function_interrupt_function_p (void)
+{
+  return (h8300_interrupt_function_p (current_function_decl)
+	  || h8300_monitor_function_p (current_function_decl));
+}
+
+/* Output assembly code for the start of the file.  */
+
+static void
+h8300_file_start (void)
+{
+  default_file_start ();
+
+  if (TARGET_H8300H)
+    fputs (TARGET_NORMAL_MODE ? "\t.h8300hn\n" : "\t.h8300h\n", asm_out_file);
+  else if (TARGET_H8300SX)
+    fputs (TARGET_NORMAL_MODE ? "\t.h8300sxn\n" : "\t.h8300sx\n", asm_out_file);
+  else if (TARGET_H8300S)
+    fputs (TARGET_NORMAL_MODE ? "\t.h8300sn\n" : "\t.h8300s\n", asm_out_file);
+}
+
+/* Output assembly language code for the end of file.  */
+
+static void
+h8300_file_end (void)
+{
+  fputs ("\t.end\n", asm_out_file);
+}
+
+/* Split an add of a small constant into two adds/subs insns.
+
+   If USE_INCDEC_P is nonzero, we generate the last insn using inc/dec
+   instead of adds/subs.  */
+
+void
+split_adds_subs (enum machine_mode mode, rtx *operands)
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+  rtx reg = operands[0];
+  HOST_WIDE_INT sign = 1;
+  HOST_WIDE_INT amount;
+  rtx (*gen_add) (rtx, rtx, rtx);
+
+  /* Force VAL to be positive so that we do not have to consider the
+     sign.  */
+  if (val < 0)
+    {
+      val = -val;
+      sign = -1;
+    }
+
+  switch (mode)
+    {
+    case HImode:
+      gen_add = gen_addhi3;
+      break;
+
+    case SImode:
+      gen_add = gen_addsi3;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Try different amounts in descending order.  */
+  for (amount = (TARGET_H8300H || TARGET_H8300S) ? 4 : 2;
+       amount > 0;
+       amount /= 2)
+    {
+      for (; val >= amount; val -= amount)
+	emit_insn (gen_add (reg, reg, GEN_INT (sign * amount)));
+    }
+
+  return;
+}
+
+/* Handle machine specific pragmas for compatibility with existing
+   compilers for the H8/300.
+
+   pragma saveall generates prologue/epilogue code which saves and
+   restores all the registers on function entry.
+
+   pragma interrupt saves and restores all registers, and exits with
+   an rte instruction rather than an rts.  A pointer to a function
+   with this attribute may be safely used in an interrupt vector.  */
+
+void
+h8300_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  pragma_interrupt = 1;
+}
+
+void
+h8300_pr_saveall (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  pragma_saveall = 1;
+}
+
+/* If the next function argument with MODE and TYPE is to be passed in
+   a register, return a reg RTX for the hard register in which to pass
+   the argument.  CUM represents the state after the last argument.
+   If the argument is to be pushed, NULL_RTX is returned.
+
+   On the H8/300 all normal args are pushed, unless -mquickcall in which
+   case the first 3 arguments are passed in registers.  */
+
+static rtx
+h8300_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  static const char *const hand_list[] = {
+    "__main",
+    "__cmpsi2",
+    "__divhi3",
+    "__modhi3",
+    "__udivhi3",
+    "__umodhi3",
+    "__divsi3",
+    "__modsi3",
+    "__udivsi3",
+    "__umodsi3",
+    "__mulhi3",
+    "__mulsi3",
+    "__reg_memcpy",
+    "__reg_memset",
+    "__ucmpsi2",
+    0,
+  };
+
+  rtx result = NULL_RTX;
+  const char *fname;
+  int regpass = 0;
+
+  /* Never pass unnamed arguments in registers.  */
+  if (!named)
+    return NULL_RTX;
+
+  /* Pass 3 regs worth of data in regs when user asked on the command line.  */
+  if (TARGET_QUICKCALL)
+    regpass = 3;
+
+  /* If calling hand written assembler, use 4 regs of args.  */
+  if (cum->libcall)
+    {
+      const char * const *p;
+
+      fname = XSTR (cum->libcall, 0);
+
+      /* See if this libcall is one of the hand coded ones.  */
+      for (p = hand_list; *p && strcmp (*p, fname) != 0; p++)
+	;
+
+      if (*p)
+	regpass = 4;
+    }
+
+  if (regpass)
+    {
+      int size;
+
+      if (mode == BLKmode)
+	size = int_size_in_bytes (type);
+      else
+	size = GET_MODE_SIZE (mode);
+
+      if (size + cum->nbytes <= regpass * UNITS_PER_WORD
+	  && cum->nbytes / UNITS_PER_WORD <= 3)
+	result = gen_rtx_REG (mode, cum->nbytes / UNITS_PER_WORD);
+    }
+
+  return result;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+h8300_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  cum->nbytes += (mode != BLKmode
+		  ? (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD
+		  : (int_size_in_bytes (type) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD);
+}
+
+
+/* Compute the cost of an and insn.  */
+
+static int
+h8300_and_costs (rtx x)
+{
+  rtx operands[4];
+
+  if (GET_MODE (x) == QImode)
+    return 1;
+
+  if (GET_MODE (x) != HImode
+      && GET_MODE (x) != SImode)
+    return 100;
+
+  operands[0] = NULL;
+  operands[1] = XEXP (x, 0);
+  operands[2] = XEXP (x, 1);
+  operands[3] = x;
+  return compute_logical_op_length (GET_MODE (x), operands) / 2;
+}
+
+/* Compute the cost of a shift insn.  */
+
+static int
+h8300_shift_costs (rtx x)
+{
+  rtx operands[4];
+
+  if (GET_MODE (x) != QImode
+      && GET_MODE (x) != HImode
+      && GET_MODE (x) != SImode)
+    return 100;
+
+  operands[0] = NULL;
+  operands[1] = NULL;
+  operands[2] = XEXP (x, 1);
+  operands[3] = x;
+  return compute_a_shift_length (NULL, operands) / 2;
+}
+
+/* Worker function for TARGET_RTX_COSTS.  */
+
+static bool
+h8300_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+{
+  if (TARGET_H8300SX && outer_code == MEM)
+    {
+      /* Estimate the number of execution states needed to calculate
+	 the address.  */
+      if (register_operand (x, VOIDmode)
+	  || GET_CODE (x) == POST_INC
+	  || GET_CODE (x) == POST_DEC
+	  || CONSTANT_P (x))
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      {
+	HOST_WIDE_INT n = INTVAL (x);
+
+	if (TARGET_H8300SX)
+	  {
+	    /* Constant operands need the same number of processor
+	       states as register operands.  Although we could try to
+	       use a size-based cost for !speed, the lack of
+	       of a mode makes the results very unpredictable.  */
+	    *total = 0;
+	    return true;
+	  }
+	if (-4 <= n || n <= 4)
+	  {
+	    switch ((int) n)
+	      {
+	      case 0:
+		*total = 0;
+		return true;
+	      case 1:
+	      case 2:
+	      case -1:
+	      case -2:
+		*total = 0 + (outer_code == SET);
+		return true;
+	      case 4:
+	      case -4:
+		if (TARGET_H8300H || TARGET_H8300S)
+		  *total = 0 + (outer_code == SET);
+		else
+		  *total = 1;
+		return true;
+	      }
+	  }
+	*total = 1;
+	return true;
+      }
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_H8300SX)
+	{
+	  /* See comment for CONST_INT.  */
+	  *total = 0;
+	  return true;
+	}
+      *total = 3;
+      return true;
+
+    case CONST_DOUBLE:
+      *total = 20;
+      return true;
+
+    case COMPARE:
+      if (XEXP (x, 1) == const0_rtx)
+	*total = 0;
+      return false;
+
+    case AND:
+      if (!h8300_dst_operand (XEXP (x, 0), VOIDmode)
+	  || !h8300_src_operand (XEXP (x, 1), VOIDmode))
+	return false;
+      *total = COSTS_N_INSNS (h8300_and_costs (x));
+      return true;
+
+    /* We say that MOD and DIV are so expensive because otherwise we'll
+       generate some really horrible code for division of a power of two.  */
+    case MOD:
+    case DIV:
+    case UMOD:
+    case UDIV:
+      if (TARGET_H8300SX)
+	switch (GET_MODE (x))
+	  {
+	  case QImode:
+	  case HImode:
+	    *total = COSTS_N_INSNS (!speed ? 4 : 10);
+	    return false;
+
+	  case SImode:
+	    *total = COSTS_N_INSNS (!speed ? 4 : 18);
+	    return false;
+
+	  default:
+	    break;
+	  }
+      *total = COSTS_N_INSNS (12);
+      return true;
+
+    case MULT:
+      if (TARGET_H8300SX)
+	switch (GET_MODE (x))
+	  {
+	  case QImode:
+	  case HImode:
+	    *total = COSTS_N_INSNS (2);
+	    return false;
+
+	  case SImode:
+	    *total = COSTS_N_INSNS (5);
+	    return false;
+
+	  default:
+	    break;
+	  }
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (h8sx_binary_shift_operator (x, VOIDmode))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+      else if (h8sx_unary_shift_operator (x, VOIDmode))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      *total = COSTS_N_INSNS (h8300_shift_costs (x));
+      return true;
+
+    case ROTATE:
+    case ROTATERT:
+      if (GET_MODE (x) == HImode)
+	*total = 2;
+      else
+	*total = 8;
+      return true;
+
+    default:
+      *total = COSTS_N_INSNS (1);
+      return false;
+    }
+}
+
+/* Documentation for the machine specific operand escapes:
+
+   'E' like s but negative.
+   'F' like t but negative.
+   'G' constant just the negative
+   'R' print operand as a byte:8 address if appropriate, else fall back to
+       'X' handling.
+   'S' print operand as a long word
+   'T' print operand as a word
+   'V' find the set bit, and print its number.
+   'W' find the clear bit, and print its number.
+   'X' print operand as a byte
+   'Y' print either l or h depending on whether last 'Z' operand < 8 or >= 8.
+       If this operand isn't a register, fall back to 'R' handling.
+   'Z' print int & 7.
+   'c' print the opcode corresponding to rtl
+   'e' first word of 32-bit value - if reg, then least reg. if mem
+       then least. if const then most sig word
+   'f' second word of 32-bit value - if reg, then biggest reg. if mem
+       then +2. if const then least sig word
+   'j' print operand as condition code.
+   'k' print operand as reverse condition code.
+   'm' convert an integer operand to a size suffix (.b, .w or .l)
+   'o' print an integer without a leading '#'
+   's' print as low byte of 16-bit value
+   't' print as high byte of 16-bit value
+   'w' print as low byte of 32-bit value
+   'x' print as 2nd byte of 32-bit value
+   'y' print as 3rd byte of 32-bit value
+   'z' print as msb of 32-bit value
+*/
+
+/* Return assembly language string which identifies a comparison type.  */
+
+static const char *
+cond_string (enum rtx_code code)
+{
+  switch (code)
+    {
+    case NE:
+      return "ne";
+    case EQ:
+      return "eq";
+    case GE:
+      return "ge";
+    case GT:
+      return "gt";
+    case LE:
+      return "le";
+    case LT:
+      return "lt";
+    case GEU:
+      return "hs";
+    case GTU:
+      return "hi";
+    case LEU:
+      return "ls";
+    case LTU:
+      return "lo";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Print operand X using operand code CODE to assembly language output file
+   FILE.  */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  /* This is used for communication between codes V,W,Z and Y.  */
+  static int bitint;
+
+  switch (code)
+    {
+    case 'E':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, "%sl", names_big[REGNO (x)]);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", (-INTVAL (x)) & 0xff);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'F':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, "%sh", names_big[REGNO (x)]);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", ((-INTVAL (x)) & 0xff00) >> 8);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'G':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, "#%ld", 0xff & (-INTVAL (x)));
+      break;
+    case 'S':
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", names_extended[REGNO (x)]);
+      else
+	goto def;
+      break;
+    case 'T':
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", names_big[REGNO (x)]);
+      else
+	goto def;
+      break;
+    case 'V':
+      bitint = (INTVAL (x) & 0xffff);
+      if ((exact_log2 ((bitint >> 8) & 0xff)) == -1)
+	bitint = exact_log2 (bitint & 0xff);
+      else
+        bitint = exact_log2 ((bitint >> 8) & 0xff);	      
+      gcc_assert (bitint >= 0);
+      fprintf (file, "#%d", bitint);
+      break;
+    case 'W':
+      bitint = ((~INTVAL (x)) & 0xffff);
+      if ((exact_log2 ((bitint >> 8) & 0xff)) == -1 )
+	bitint = exact_log2 (bitint & 0xff);
+      else
+	bitint = (exact_log2 ((bitint >> 8) & 0xff));      
+      gcc_assert (bitint >= 0);
+      fprintf (file, "#%d", bitint);
+      break;
+    case 'R':
+    case 'X':
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", byte_reg (x, 0));
+      else
+	goto def;
+      break;
+    case 'Y':
+      gcc_assert (bitint >= 0);
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s%c", names_big[REGNO (x)], bitint > 7 ? 'h' : 'l');
+      else
+	print_operand (file, x, 'R');
+      bitint = -1;
+      break;
+    case 'Z':
+      bitint = INTVAL (x);
+      fprintf (file, "#%d", bitint & 7);
+      break;
+    case 'c':
+      switch (GET_CODE (x))
+	{
+	case IOR:
+	  fprintf (file, "or");
+	  break;
+	case XOR:
+	  fprintf (file, "xor");
+	  break;
+	case AND:
+	  fprintf (file, "and");
+	  break;
+	default:
+	  break;
+	}
+      break;
+    case 'e':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (TARGET_H8300)
+	    fprintf (file, "%s", names_big[REGNO (x)]);
+	  else
+	    fprintf (file, "%s", names_upper_extended[REGNO (x)]);
+	  break;
+	case MEM:
+	  print_operand (file, x, 0);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", ((INTVAL (x) >> 16) & 0xffff));
+	  break;
+	case CONST_DOUBLE:
+	  {
+	    long val;
+	    REAL_VALUE_TYPE rv;
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "#%ld", ((val >> 16) & 0xffff));
+	    break;
+	  }
+	default:
+	  gcc_unreachable ();
+	  break;
+	}
+      break;
+    case 'f':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  if (TARGET_H8300)
+	    fprintf (file, "%s", names_big[REGNO (x) + 1]);
+	  else
+	    fprintf (file, "%s", names_big[REGNO (x)]);
+	  break;
+	case MEM:
+	  x = adjust_address (x, HImode, 2);
+	  print_operand (file, x, 0);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#%ld", INTVAL (x) & 0xffff);
+	  break;
+	case CONST_DOUBLE:
+	  {
+	    long val;
+	    REAL_VALUE_TYPE rv;
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "#%ld", (val & 0xffff));
+	    break;
+	  }
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'j':
+      fputs (cond_string (GET_CODE (x)), file);
+      break;
+    case 'k':
+      fputs (cond_string (reverse_condition (GET_CODE (x))), file);
+      break;
+    case 'm':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      switch (INTVAL (x))
+	{
+	case 1:
+	  fputs (".b", file);
+	  break;
+
+	case 2:
+	  fputs (".w", file);
+	  break;
+
+	case 4:
+	  fputs (".l", file);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'o':
+      print_operand_address (file, x);
+      break;
+    case 's':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x)) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 0));
+      break;
+    case 't':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 8) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 1));
+      break;
+    case 'w':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", INTVAL (x) & 0xff);
+      else
+	fprintf (file, "%s",
+		 byte_reg (x, TARGET_H8300 ? 2 : 0));
+      break;
+    case 'x':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 8) & 0xff);
+      else
+	fprintf (file, "%s",
+		 byte_reg (x, TARGET_H8300 ? 3 : 1));
+      break;
+    case 'y':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 16) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 0));
+      break;
+    case 'z':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "#%ld", (INTVAL (x) >> 24) & 0xff);
+      else
+	fprintf (file, "%s", byte_reg (x, 1));
+      break;
+
+    default:
+    def:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  switch (GET_MODE (x))
+	    {
+	    case QImode:
+#if 0 /* Is it asm ("mov.b %0,r2l", ...) */
+	      fprintf (file, "%s", byte_reg (x, 0));
+#else /* ... or is it asm ("mov.b %0l,r2l", ...) */
+	      fprintf (file, "%s", names_big[REGNO (x)]);
+#endif
+	      break;
+	    case HImode:
+	      fprintf (file, "%s", names_big[REGNO (x)]);
+	      break;
+	    case SImode:
+	    case SFmode:
+	      fprintf (file, "%s", names_extended[REGNO (x)]);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	case MEM:
+	  {
+	    rtx addr = XEXP (x, 0);
+
+	    fprintf (file, "@");
+	    output_address (addr);
+
+	    /* Add a length suffix to constant addresses.  Although this
+	       is often unnecessary, it helps to avoid ambiguity in the
+	       syntax of mova.  If we wrote an insn like:
+
+		    mova/w.l @(1,@foo.b),er0
+
+	       then .b would be considered part of the symbol name.
+	       Adding a length after foo will avoid this.  */
+	    if (CONSTANT_P (addr))
+	      switch (code)
+		{
+		case 'R':
+		  /* Used for mov.b and bit operations.  */
+		  if (h8300_eightbit_constant_address_p (addr))
+		    {
+		      fprintf (file, ":8");
+		      break;
+		    }
+
+		  /* Fall through.  We should not get here if we are
+		     processing bit operations on H8/300 or H8/300H
+		     because 'U' constraint does not allow bit
+		     operations on the tiny area on these machines.  */
+
+		case 'X':
+		case 'T':
+		case 'S':
+		  if (h8300_constant_length (addr) == 2)
+		    fprintf (file, ":16");
+		  else
+		    fprintf (file, ":32");
+		  break;
+		default:
+		  break;
+		}
+	  }
+	  break;
+
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	  fprintf (file, "#");
+	  print_operand_address (file, x);
+	  break;
+	case CONST_DOUBLE:
+	  {
+	    long val;
+	    REAL_VALUE_TYPE rv;
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "#%ld", val);
+	    break;
+	  }
+	default:
+	  break;
+	}
+    }
+}
+
+/* Output assembly language output for the address ADDR to FILE.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  rtx index;
+  int size;
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "%s", h8_reg_names[REGNO (addr)]);
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "-%s", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (file, "%s+", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PRE_INC:
+      fprintf (file, "+%s", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_DEC:
+      fprintf (file, "%s-", h8_reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PLUS:
+      fprintf (file, "(");
+
+      index = h8300_get_index (XEXP (addr, 0), VOIDmode, &size);
+      if (GET_CODE (index) == REG)
+	{
+	  /* reg,foo */
+	  print_operand_address (file, XEXP (addr, 1));
+	  fprintf (file, ",");
+	  switch (size)
+	    {
+	    case 0:
+	      print_operand_address (file, index);
+	      break;
+
+	    case 1:
+	      print_operand (file, index, 'X');
+	      fputs (".b", file);
+	      break;
+
+	    case 2:
+	      print_operand (file, index, 'T');
+	      fputs (".w", file);
+	      break;
+
+	    case 4:
+	      print_operand (file, index, 'S');
+	      fputs (".l", file);
+	      break;
+	    }
+	  /* print_operand_address (file, XEXP (addr, 0)); */
+	}
+      else
+	{
+	  /* foo+k */
+	  print_operand_address (file, XEXP (addr, 0));
+	  fprintf (file, "+");
+	  print_operand_address (file, XEXP (addr, 1));
+	}
+      fprintf (file, ")");
+      break;
+
+    case CONST_INT:
+      {
+	/* Since the H8/300 only has 16-bit pointers, negative values are also
+	   those >= 32768.  This happens for example with pointer minus a
+	   constant.  We don't want to turn (char *p - 2) into
+	   (char *p + 65534) because loop unrolling can build upon this
+	   (IE: char *p + 131068).  */
+	int n = INTVAL (addr);
+	if (TARGET_H8300)
+	  n = (int) (short) n;
+	fprintf (file, "%d", n);
+	break;
+      }
+
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Output all insn addresses and their sizes into the assembly language
+   output file.  This is helpful for debugging whether the length attributes
+   in the md file are correct.  This is not meant to be a user selectable
+   option.  */
+
+void
+final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+		    int num_operands ATTRIBUTE_UNUSED)
+{
+  /* This holds the last insn address.  */
+  static int last_insn_address = 0;
+
+  const int uid = INSN_UID (insn);
+
+  if (TARGET_ADDRESSES)
+    {
+      fprintf (asm_out_file, "; 0x%x %d\n", INSN_ADDRESSES (uid),
+	       INSN_ADDRESSES (uid) - last_insn_address);
+      last_insn_address = INSN_ADDRESSES (uid);
+    }
+}
+
+/* Prepare for an SI sized move.  */
+
+int
+h8300_expand_movsi (rtx operands[])
+{
+  rtx src = operands[1];
+  rtx dst = operands[0];
+  if (!reload_in_progress && !reload_completed)
+    {
+      if (!register_operand (dst, GET_MODE (dst)))
+	{
+	  rtx tmp = gen_reg_rtx (GET_MODE (dst));
+	  emit_move_insn (tmp, src);
+	  operands[1] = tmp;
+	}
+    }
+  return 0;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   For the h8300, if frame pointer elimination is being done, we would like to
+   convert ap and rp into sp, not fp.
+
+   All other eliminations are valid.  */
+
+static bool
+h8300_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Conditionally modify register usage based on target flags.  */
+
+static void
+h8300_conditional_register_usage (void)
+{
+  if (!TARGET_MAC)
+    fixed_regs[MAC_REG] = call_used_regs[MAC_REG] = 1;
+}
+
+/* Function for INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET).
+   Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+h8300_initial_elimination_offset (int from, int to)
+{
+  /* The number of bytes that the return address takes on the stack.  */
+  int pc_size = POINTER_SIZE / BITS_PER_UNIT;
+
+  /* The number of bytes that the saved frame pointer takes on the stack.  */
+  int fp_size = frame_pointer_needed * UNITS_PER_WORD;
+
+  /* The number of bytes that the saved registers, excluding the frame
+     pointer, take on the stack.  */
+  int saved_regs_size = 0;
+
+  /* The number of bytes that the locals takes on the stack.  */
+  int frame_size = round_frame_size (get_frame_size ());
+
+  int regno;
+
+  for (regno = 0; regno <= HARD_FRAME_POINTER_REGNUM; regno++)
+    if (WORD_REG_USED (regno))
+      saved_regs_size += UNITS_PER_WORD;
+
+  /* Adjust saved_regs_size because the above loop took the frame
+     pointer int account.  */
+  saved_regs_size -= fp_size;
+
+  switch (to)
+    {
+    case HARD_FRAME_POINTER_REGNUM:
+      switch (from)
+	{
+	case ARG_POINTER_REGNUM:
+	  return pc_size + fp_size;
+	case RETURN_ADDRESS_POINTER_REGNUM:
+	  return fp_size;
+	case FRAME_POINTER_REGNUM:
+	  return -saved_regs_size;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case STACK_POINTER_REGNUM:
+      switch (from)
+	{
+	case ARG_POINTER_REGNUM:
+	  return pc_size + saved_regs_size + frame_size;
+	case RETURN_ADDRESS_POINTER_REGNUM:
+	  return saved_regs_size + frame_size;
+	case FRAME_POINTER_REGNUM:
+	  return frame_size;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Worker function for RETURN_ADDR_RTX.  */
+
+rtx
+h8300_return_addr_rtx (int count, rtx frame)
+{
+  rtx ret;
+
+  if (count == 0)
+    ret = gen_rtx_MEM (Pmode,
+		       gen_rtx_REG (Pmode, RETURN_ADDRESS_POINTER_REGNUM));
+  else if (flag_omit_frame_pointer)
+    return (rtx) 0;
+  else
+    ret = gen_rtx_MEM (Pmode,
+		       memory_address (Pmode,
+				       plus_constant (frame, UNITS_PER_WORD)));
+  set_mem_alias_set (ret, get_frame_alias_set ());
+  return ret;
+}
+
+/* Update the condition code from the insn.  */
+
+void
+notice_update_cc (rtx body, rtx insn)
+{
+  rtx set;
+
+  switch (get_attr_cc (insn))
+    {
+    case CC_NONE:
+      /* Insn does not affect CC at all.  */
+      break;
+
+    case CC_NONE_0HIT:
+      /* Insn does not change CC, but the 0'th operand has been changed.  */
+      if (cc_status.value1 != 0
+	  && reg_overlap_mentioned_p (recog_data.operand[0], cc_status.value1))
+	cc_status.value1 = 0;
+      if (cc_status.value2 != 0
+	  && reg_overlap_mentioned_p (recog_data.operand[0], cc_status.value2))
+	cc_status.value2 = 0;
+      break;
+
+    case CC_SET_ZN:
+      /* Insn sets the Z,N flags of CC to recog_data.operand[0].
+	 The V flag is unusable.  The C flag may or may not be known but
+	 that's ok because alter_cond will change tests to use EQ/NE.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
+      set = single_set (insn);
+      cc_status.value1 = SET_SRC (set);
+      if (SET_DEST (set) != cc0_rtx)
+	cc_status.value2 = SET_DEST (set);
+      break;
+
+    case CC_SET_ZNV:
+      /* Insn sets the Z,N,V flags of CC to recog_data.operand[0].
+	 The C flag may or may not be known but that's ok because
+	 alter_cond will change tests to use EQ/NE.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_NO_CARRY;
+      set = single_set (insn);
+      cc_status.value1 = SET_SRC (set);
+      if (SET_DEST (set) != cc0_rtx)
+	{
+	  /* If the destination is STRICT_LOW_PART, strip off
+	     STRICT_LOW_PART.  */
+	  if (GET_CODE (SET_DEST (set)) == STRICT_LOW_PART)
+	    cc_status.value2 = XEXP (SET_DEST (set), 0);
+	  else
+	    cc_status.value2 = SET_DEST (set);
+	}
+      break;
+
+    case CC_COMPARE:
+      /* The insn is a compare instruction.  */
+      CC_STATUS_INIT;
+      cc_status.value1 = SET_SRC (body);
+      break;
+
+    case CC_CLOBBER:
+      /* Insn doesn't leave CC in a usable state.  */
+      CC_STATUS_INIT;
+      break;
+    }
+}
+
+/* Given that X occurs in an address of the form (plus X constant),
+   return the part of X that is expected to be a register.  There are
+   four kinds of addressing mode to recognize:
+
+	@(dd,Rn)
+	@(dd,RnL.b)
+	@(dd,Rn.w)
+	@(dd,ERn.l)
+
+   If SIZE is nonnull, and the address is one of the last three forms,
+   set *SIZE to the index multiplication factor.  Set it to 0 for
+   plain @(dd,Rn) addresses.
+
+   MODE is the mode of the value being accessed.  It can be VOIDmode
+   if the address is known to be valid, but its mode is unknown.  */
+
+rtx
+h8300_get_index (rtx x, enum machine_mode mode, int *size)
+{
+  int dummy, factor;
+
+  if (size == 0)
+    size = &dummy;
+
+  factor = (mode == VOIDmode ? 0 : GET_MODE_SIZE (mode));
+  if (TARGET_H8300SX
+      && factor <= 4
+      && (mode == VOIDmode
+	  || GET_MODE_CLASS (mode) == MODE_INT
+	  || GET_MODE_CLASS (mode) == MODE_FLOAT))
+    {
+      if (factor <= 1 && GET_CODE (x) == ZERO_EXTEND)
+	{
+	  /* When accessing byte-sized values, the index can be
+	     a zero-extended QImode or HImode register.  */
+	  *size = GET_MODE_SIZE (GET_MODE (XEXP (x, 0)));
+	  return XEXP (x, 0);
+	}
+      else
+	{
+	  /* We're looking for addresses of the form:
+
+		 (mult X I)
+	      or (mult (zero_extend X) I)
+
+	     where I is the size of the operand being accessed.
+	     The canonical form of the second expression is:
+
+		 (and (mult (subreg X) I) J)
+
+	     where J == GET_MODE_MASK (GET_MODE (X)) * I.  */
+	  rtx index;
+
+	  if (GET_CODE (x) == AND
+	      && GET_CODE (XEXP (x, 1)) == CONST_INT
+	      && (factor == 0
+		  || INTVAL (XEXP (x, 1)) == 0xff * factor
+		  || INTVAL (XEXP (x, 1)) == 0xffff * factor))
+	    {
+	      index = XEXP (x, 0);
+	      *size = (INTVAL (XEXP (x, 1)) >= 0xffff ? 2 : 1);
+	    }
+	  else
+	    {
+	      index = x;
+	      *size = 4;
+	    }
+
+	  if (GET_CODE (index) == MULT
+	      && GET_CODE (XEXP (index, 1)) == CONST_INT
+	      && (factor == 0 || factor == INTVAL (XEXP (index, 1))))
+	    return XEXP (index, 0);
+	}
+    }
+  *size = 0;
+  return x;
+}
+
+static const h8300_length_table addb_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* add.b xx,Rd  */
+  {  4,   4,   4,   4,   6  }, /* add.b xx,@aa */
+  {  4,   4,   4,   4,   6  }, /* add.b xx,@Rd */
+  {  6,   4,   4,   4,   6  }  /* add.b xx,@xx */
+};
+
+static const h8300_length_table addw_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* add.w xx,Rd  */
+  {  4,   4,   4,   4,   6  }, /* add.w xx,@aa */
+  {  4,   4,   4,   4,   6  }, /* add.w xx,@Rd */
+  {  4,   4,   4,   4,   6  }  /* add.w xx,@xx */
+};
+
+static const h8300_length_table addl_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* add.l xx,Rd  */
+  {  4,   4,   6,   6,   6  }, /* add.l xx,@aa */
+  {  4,   4,   6,   6,   6  }, /* add.l xx,@Rd */
+  {  4,   4,   6,   6,   6  }  /* add.l xx,@xx */
+};
+
+#define logicb_length_table addb_length_table
+#define logicw_length_table addw_length_table
+
+static const h8300_length_table logicl_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   4,   4,   4,   4  }, /* and.l xx,Rd  */
+  {  4,   4,   6,   6,   6  }, /* and.l xx,@aa */
+  {  4,   4,   6,   6,   6  }, /* and.l xx,@Rd */
+  {  4,   4,   6,   6,   6  }  /* and.l xx,@xx */
+};
+
+static const h8300_length_table movb_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   2,   2,   4  }, /* mov.b xx,Rd  */
+  {  4,   2,   4,   4,   4  }, /* mov.b xx,@aa */
+  {  4,   2,   4,   4,   4  }, /* mov.b xx,@Rd */
+  {  4,   4,   4,   4,   4  }  /* mov.b xx,@xx */
+};
+
+#define movw_length_table movb_length_table
+
+static const h8300_length_table movl_length_table =
+{
+  /* #xx  Rs   @aa  @Rs  @xx  */
+  {  2,   2,   4,   4,   4  }, /* mov.l xx,Rd  */
+  {  4,   4,   4,   4,   4  }, /* mov.l xx,@aa */
+  {  4,   4,   4,   4,   4  }, /* mov.l xx,@Rd */
+  {  4,   4,   4,   4,   4  }  /* mov.l xx,@xx */
+};
+
+/* Return the size of the given address or displacement constant.  */
+
+static unsigned int
+h8300_constant_length (rtx constant)
+{
+  /* Check for (@d:16,Reg).  */
+  if (GET_CODE (constant) == CONST_INT
+      && IN_RANGE (INTVAL (constant), -0x8000, 0x7fff))
+    return 2;
+
+  /* Check for (@d:16,Reg) in cases where the displacement is
+     an absolute address.  */
+  if (Pmode == HImode || h8300_tiny_constant_address_p (constant))
+    return 2;
+
+  return 4;
+}
+
+/* Return the size of a displacement field in address ADDR, which should
+   have the form (plus X constant).  SIZE is the number of bytes being
+   accessed.  */
+
+static unsigned int
+h8300_displacement_length (rtx addr, int size)
+{
+  rtx offset;
+
+  offset = XEXP (addr, 1);
+
+  /* Check for @(d:2,Reg).  */
+  if (register_operand (XEXP (addr, 0), VOIDmode)
+      && GET_CODE (offset) == CONST_INT
+      && (INTVAL (offset) == size
+	  || INTVAL (offset) == size * 2
+	  || INTVAL (offset) == size * 3))
+    return 0;
+
+  return h8300_constant_length (offset);
+}
+
+/* Store the class of operand OP in *OPCLASS and return the length of any
+   extra operand fields.  SIZE is the number of bytes in OP.  OPCLASS
+   can be null if only the length is needed.  */
+
+static unsigned int
+h8300_classify_operand (rtx op, int size, enum h8300_operand_class *opclass)
+{
+  enum h8300_operand_class dummy;
+
+  if (opclass == 0)
+    opclass = &dummy;
+
+  if (CONSTANT_P (op))
+    {
+      *opclass = H8OP_IMMEDIATE;
+
+      /* Byte-sized immediates are stored in the opcode fields.  */
+      if (size == 1)
+	return 0;
+
+      /* If this is a 32-bit instruction, see whether the constant
+	 will fit into a 16-bit immediate field.  */
+      if (TARGET_H8300SX
+	  && size == 4
+	  && GET_CODE (op) == CONST_INT
+	  && IN_RANGE (INTVAL (op), 0, 0xffff))
+	return 2;
+
+      return size;
+    }
+  else if (GET_CODE (op) == MEM)
+    {
+      op = XEXP (op, 0);
+      if (CONSTANT_P (op))
+	{
+	  *opclass = H8OP_MEM_ABSOLUTE;
+	  return h8300_constant_length (op);
+	}
+      else if (GET_CODE (op) == PLUS && CONSTANT_P (XEXP (op, 1)))
+	{
+	  *opclass = H8OP_MEM_COMPLEX;
+	  return h8300_displacement_length (op, size);
+	}
+      else if (GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+	{
+	  *opclass = H8OP_MEM_COMPLEX;
+	  return 0;
+	}
+      else if (register_operand (op, VOIDmode))
+	{
+	  *opclass = H8OP_MEM_BASE;
+	  return 0;
+	}
+    }
+  gcc_assert (register_operand (op, VOIDmode));
+  *opclass = H8OP_REGISTER;
+  return 0;
+}
+
+/* Return the length of the instruction described by TABLE given that
+   its operands are OP1 and OP2.  OP1 must be an h8300_dst_operand
+   and OP2 must be an h8300_src_operand.  */
+
+static unsigned int
+h8300_length_from_table (rtx op1, rtx op2, const h8300_length_table *table)
+{
+  enum h8300_operand_class op1_class, op2_class;
+  unsigned int size, immediate_length;
+
+  size = GET_MODE_SIZE (GET_MODE (op1));
+  immediate_length = (h8300_classify_operand (op1, size, &op1_class)
+		      + h8300_classify_operand (op2, size, &op2_class));
+  return immediate_length + (*table)[op1_class - 1][op2_class];
+}
+
+/* Return the length of a unary instruction such as neg or not given that
+   its operand is OP.  */
+
+unsigned int
+h8300_unary_length (rtx op)
+{
+  enum h8300_operand_class opclass;
+  unsigned int size, operand_length;
+
+  size = GET_MODE_SIZE (GET_MODE (op));
+  operand_length = h8300_classify_operand (op, size, &opclass);
+  switch (opclass)
+    {
+    case H8OP_REGISTER:
+      return 2;
+
+    case H8OP_MEM_BASE:
+      return (size == 4 ? 6 : 4);
+
+    case H8OP_MEM_ABSOLUTE:
+      return operand_length + (size == 4 ? 6 : 4);
+
+    case H8OP_MEM_COMPLEX:
+      return operand_length + 6;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Likewise short immediate instructions such as add.w #xx:3,OP.  */
+
+static unsigned int
+h8300_short_immediate_length (rtx op)
+{
+  enum h8300_operand_class opclass;
+  unsigned int size, operand_length;
+
+  size = GET_MODE_SIZE (GET_MODE (op));
+  operand_length = h8300_classify_operand (op, size, &opclass);
+
+  switch (opclass)
+    {
+    case H8OP_REGISTER:
+      return 2;
+
+    case H8OP_MEM_BASE:
+    case H8OP_MEM_ABSOLUTE:
+    case H8OP_MEM_COMPLEX:
+      return 4 + operand_length;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Likewise bitfield load and store instructions.  */
+
+static unsigned int
+h8300_bitfield_length (rtx op, rtx op2)
+{
+  enum h8300_operand_class opclass;
+  unsigned int size, operand_length;
+
+  if (GET_CODE (op) == REG)
+    op = op2;
+  gcc_assert (GET_CODE (op) != REG);
+  
+  size = GET_MODE_SIZE (GET_MODE (op));
+  operand_length = h8300_classify_operand (op, size, &opclass);
+
+  switch (opclass)
+    {
+    case H8OP_MEM_BASE:
+    case H8OP_MEM_ABSOLUTE:
+    case H8OP_MEM_COMPLEX:
+      return 4 + operand_length;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Calculate the length of general binary instruction INSN using TABLE.  */
+
+static unsigned int
+h8300_binary_length (rtx insn, const h8300_length_table *table)
+{
+  rtx set;
+
+  set = single_set (insn);
+  gcc_assert (set);
+
+  if (BINARY_P (SET_SRC (set)))
+    return h8300_length_from_table (XEXP (SET_SRC (set), 0),
+				    XEXP (SET_SRC (set), 1), table);
+  else
+    {
+      gcc_assert (GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == RTX_TERNARY);
+      return h8300_length_from_table (XEXP (XEXP (SET_SRC (set), 1), 0),
+				      XEXP (XEXP (SET_SRC (set), 1), 1),
+				      table);
+    }
+}
+
+/* Subroutine of h8300_move_length.  Return true if OP is 1- or 2-byte
+   memory reference and either (1) it has the form @(d:16,Rn) or
+   (2) its address has the code given by INC_CODE.  */
+
+static bool
+h8300_short_move_mem_p (rtx op, enum rtx_code inc_code)
+{
+  rtx addr;
+  unsigned int size;
+
+  if (GET_CODE (op) != MEM)
+    return false;
+
+  addr = XEXP (op, 0);
+  size = GET_MODE_SIZE (GET_MODE (op));
+  if (size != 1 && size != 2)
+    return false;
+
+  return (GET_CODE (addr) == inc_code
+	  || (GET_CODE (addr) == PLUS
+	      && GET_CODE (XEXP (addr, 0)) == REG
+	      && h8300_displacement_length (addr, size) == 2));
+}
+
+/* Calculate the length of move instruction INSN using the given length
+   table.  Although the tables are correct for most cases, there is some
+   irregularity in the length of mov.b and mov.w.  The following forms:
+
+	mov @ERs+, Rd
+	mov @(d:16,ERs), Rd
+	mov Rs, @-ERd
+	mov Rs, @(d:16,ERd)
+
+   are two bytes shorter than most other "mov Rs, @complex" or
+   "mov @complex,Rd" combinations.  */
+
+static unsigned int
+h8300_move_length (rtx *operands, const h8300_length_table *table)
+{
+  unsigned int size;
+
+  size = h8300_length_from_table (operands[0], operands[1], table);
+  if (REG_P (operands[0]) && h8300_short_move_mem_p (operands[1], POST_INC))
+    size -= 2;
+  if (REG_P (operands[1]) && h8300_short_move_mem_p (operands[0], PRE_DEC))
+    size -= 2;
+  return size;
+}
+
+/* Return the length of a mova instruction with the given operands.
+   DEST is the register destination, SRC is the source address and
+   OFFSET is the 16-bit or 32-bit displacement.  */
+
+static unsigned int
+h8300_mova_length (rtx dest, rtx src, rtx offset)
+{
+  unsigned int size;
+
+  size = (2
+	  + h8300_constant_length (offset)
+	  + h8300_classify_operand (src, GET_MODE_SIZE (GET_MODE (src)), 0));
+  if (!REG_P (dest) || !REG_P (src) || REGNO (src) != REGNO (dest))
+    size += 2;
+  return size;
+}
+
+/* Compute the length of INSN based on its length_table attribute.
+   OPERANDS is the array of its operands.  */
+
+unsigned int
+h8300_insn_length_from_table (rtx insn, rtx * operands)
+{
+  switch (get_attr_length_table (insn))
+    {
+    case LENGTH_TABLE_NONE:
+      gcc_unreachable ();
+
+    case LENGTH_TABLE_ADDB:
+      return h8300_binary_length (insn, &addb_length_table);
+
+    case LENGTH_TABLE_ADDW:
+      return h8300_binary_length (insn, &addw_length_table);
+
+    case LENGTH_TABLE_ADDL:
+      return h8300_binary_length (insn, &addl_length_table);
+
+    case LENGTH_TABLE_LOGICB:
+      return h8300_binary_length (insn, &logicb_length_table);
+
+    case LENGTH_TABLE_MOVB:
+      return h8300_move_length (operands, &movb_length_table);
+
+    case LENGTH_TABLE_MOVW:
+      return h8300_move_length (operands, &movw_length_table);
+
+    case LENGTH_TABLE_MOVL:
+      return h8300_move_length (operands, &movl_length_table);
+
+    case LENGTH_TABLE_MOVA:
+      return h8300_mova_length (operands[0], operands[1], operands[2]);
+
+    case LENGTH_TABLE_MOVA_ZERO:
+      return h8300_mova_length (operands[0], operands[1], const0_rtx);
+
+    case LENGTH_TABLE_UNARY:
+      return h8300_unary_length (operands[0]);
+
+    case LENGTH_TABLE_MOV_IMM4:
+      return 2 + h8300_classify_operand (operands[0], 0, 0);
+
+    case LENGTH_TABLE_SHORT_IMMEDIATE:
+      return h8300_short_immediate_length (operands[0]);
+
+    case LENGTH_TABLE_BITFIELD:
+      return h8300_bitfield_length (operands[0], operands[1]);
+      
+    case LENGTH_TABLE_BITBRANCH:
+      return h8300_bitfield_length (operands[1], operands[2]) - 2;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if LHS and RHS are memory references that can be mapped
+   to the same h8sx assembly operand.  LHS appears as the destination of
+   an instruction and RHS appears as a source.
+
+   Three cases are allowed:
+
+	- RHS is @+Rn or @-Rn, LHS is @Rn
+	- RHS is @Rn, LHS is @Rn+ or @Rn-
+	- RHS and LHS have the same address and neither has side effects.  */
+
+bool
+h8sx_mergeable_memrefs_p (rtx lhs, rtx rhs)
+{
+  if (GET_CODE (rhs) == MEM && GET_CODE (lhs) == MEM)
+    {
+      rhs = XEXP (rhs, 0);
+      lhs = XEXP (lhs, 0);
+
+      if (GET_CODE (rhs) == PRE_INC || GET_CODE (rhs) == PRE_DEC)
+	return rtx_equal_p (XEXP (rhs, 0), lhs);
+
+      if (GET_CODE (lhs) == POST_INC || GET_CODE (lhs) == POST_DEC)
+	return rtx_equal_p (rhs, XEXP (lhs, 0));
+
+      if (rtx_equal_p (rhs, lhs))
+	return true;
+    }
+  return false;
+}
+
+/* Return true if OPERANDS[1] can be mapped to the same assembly
+   operand as OPERANDS[0].  */
+
+bool
+h8300_operands_match_p (rtx *operands)
+{
+  if (register_operand (operands[0], VOIDmode)
+      && register_operand (operands[1], VOIDmode))
+    return true;
+
+  if (h8sx_mergeable_memrefs_p (operands[0], operands[1]))
+    return true;
+
+  return false;
+}
+
+/* Try using movmd to move LENGTH bytes from memory region SRC to memory
+   region DEST.  The two regions do not overlap and have the common
+   alignment given by ALIGNMENT.  Return true on success.
+
+   Using movmd for variable-length moves seems to involve some
+   complex trade-offs.  For instance:
+
+      - Preparing for a movmd instruction is similar to preparing
+	for a memcpy.  The main difference is that the arguments
+	are moved into er4, er5 and er6 rather than er0, er1 and er2.
+
+      - Since movmd clobbers the frame pointer, we need to save
+	and restore it somehow when frame_pointer_needed.  This can
+	sometimes make movmd sequences longer than calls to memcpy().
+
+      - The counter register is 16 bits, so the instruction is only
+	suitable for variable-length moves when sizeof (size_t) == 2.
+	That's only true in normal mode.
+
+      - We will often lack static alignment information.  Falling back
+	on movmd.b would likely be slower than calling memcpy(), at least
+	for big moves.
+
+   This function therefore only uses movmd when the length is a
+   known constant, and only then if -fomit-frame-pointer is in
+   effect or if we're not optimizing for size.
+
+   At the moment the function uses movmd for all in-range constants,
+   but it might be better to fall back on memcpy() for large moves
+   if ALIGNMENT == 1.  */
+
+bool
+h8sx_emit_movmd (rtx dest, rtx src, rtx length,
+		 HOST_WIDE_INT alignment)
+{
+  if (!flag_omit_frame_pointer && optimize_size)
+    return false;
+
+  if (GET_CODE (length) == CONST_INT)
+    {
+      rtx dest_reg, src_reg, first_dest, first_src;
+      HOST_WIDE_INT n;
+      int factor;
+
+      /* Use movmd.l if the alignment allows it, otherwise fall back
+	 on movmd.b.  */
+      factor = (alignment >= 2 ? 4 : 1);
+
+      /* Make sure the length is within range.  We can handle counter
+	 values up to 65536, although HImode truncation will make
+	 the count appear negative in rtl dumps.  */
+      n = INTVAL (length);
+      if (n <= 0 || n / factor > 65536)
+	return false;
+
+      /* Create temporary registers for the source and destination
+	 pointers.  Initialize them to the start of each region.  */
+      dest_reg = copy_addr_to_reg (XEXP (dest, 0));
+      src_reg = copy_addr_to_reg (XEXP (src, 0));
+
+      /* Create references to the movmd source and destination blocks.  */
+      first_dest = replace_equiv_address (dest, dest_reg);
+      first_src = replace_equiv_address (src, src_reg);
+
+      set_mem_size (first_dest, GEN_INT (n & -factor));
+      set_mem_size (first_src, GEN_INT (n & -factor));
+
+      length = copy_to_mode_reg (HImode, gen_int_mode (n / factor, HImode));
+      emit_insn (gen_movmd (first_dest, first_src, length, GEN_INT (factor)));
+
+      if ((n & -factor) != n)
+	{
+	  /* Move SRC and DEST past the region we just copied.
+	     This is done to update the memory attributes.  */
+	  dest = adjust_address (dest, BLKmode, n & -factor);
+	  src = adjust_address (src, BLKmode, n & -factor);
+
+	  /* Replace the addresses with the source and destination
+	     registers, which movmd has left with the right values.  */
+	  dest = replace_equiv_address (dest, dest_reg);
+	  src = replace_equiv_address (src, src_reg);
+
+	  /* Mop up the left-over bytes.  */
+	  if (n & 2)
+	    emit_move_insn (adjust_address (dest, HImode, 0),
+			    adjust_address (src, HImode, 0));
+	  if (n & 1)
+	    emit_move_insn (adjust_address (dest, QImode, n & 2),
+			    adjust_address (src, QImode, n & 2));
+	}
+      return true;
+    }
+  return false;
+}
+
+/* Move ADDR into er6 after pushing its old value onto the stack.  */
+
+void
+h8300_swap_into_er6 (rtx addr)
+{
+  push (HARD_FRAME_POINTER_REGNUM);
+  emit_move_insn (hard_frame_pointer_rtx, addr);
+  if (REGNO (addr) == SP_REG)
+    emit_move_insn (hard_frame_pointer_rtx,
+		    plus_constant (hard_frame_pointer_rtx,
+				   GET_MODE_SIZE (word_mode)));
+}
+
+/* Move the current value of er6 into ADDR and pop its old value
+   from the stack.  */
+
+void
+h8300_swap_out_of_er6 (rtx addr)
+{
+  if (REGNO (addr) != SP_REG)
+    emit_move_insn (addr, hard_frame_pointer_rtx);
+  pop (HARD_FRAME_POINTER_REGNUM);
+}
+
+/* Return the length of mov instruction.  */
+
+unsigned int
+compute_mov_length (rtx *operands)
+{
+  /* If the mov instruction involves a memory operand, we compute the
+     length, assuming the largest addressing mode is used, and then
+     adjust later in the function.  Otherwise, we compute and return
+     the exact length in one step.  */
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx addr;
+
+  if (GET_CODE (src) == MEM)
+    addr = XEXP (src, 0);
+  else if (GET_CODE (dest) == MEM)
+    addr = XEXP (dest, 0);
+  else
+    addr = NULL_RTX;
+
+  if (TARGET_H8300)
+    {
+      unsigned int base_length;
+
+      switch (mode)
+	{
+	case QImode:
+	  if (addr == NULL_RTX)
+	    return 2;
+
+	  /* The eightbit addressing is available only in QImode, so
+	     go ahead and take care of it.  */
+	  if (h8300_eightbit_constant_address_p (addr))
+	    return 2;
+
+	  base_length = 4;
+	  break;
+
+	case HImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 2;
+
+	      if (src == const0_rtx)
+		return 2;
+
+	      return 4;
+	    }
+
+	  base_length = 4;
+	  break;
+
+	case SImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 4;
+
+	      if (GET_CODE (src) == CONST_INT)
+		{
+		  if (src == const0_rtx)
+		    return 4;
+
+		  if ((INTVAL (src) & 0xffff) == 0)
+		    return 6;
+
+		  if ((INTVAL (src) & 0xffff) == 0)
+		    return 6;
+
+		  if ((INTVAL (src) & 0xffff)
+		      == ((INTVAL (src) >> 16) & 0xffff))
+		    return 6;
+		}
+	      return 8;
+	    }
+
+	  base_length = 8;
+	  break;
+
+	case SFmode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 4;
+
+	      if (CONST_DOUBLE_OK_FOR_LETTER_P (src, 'G'))
+		return 4;
+
+	      return 8;
+	    }
+
+	  base_length = 8;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Adjust the length based on the addressing mode used.
+	 Specifically, we subtract the difference between the actual
+	 length and the longest one, which is @(d:16,Rs).  For SImode
+	 and SFmode, we double the adjustment because two mov.w are
+	 used to do the job.  */
+
+      /* @Rs+ and @-Rd are 2 bytes shorter than the longest.  */
+      if (GET_CODE (addr) == PRE_DEC
+	  || GET_CODE (addr) == POST_INC)
+	{
+	  if (mode == QImode || mode == HImode)
+	    return base_length - 2;
+	  else
+	    /* In SImode and SFmode, we use two mov.w instructions, so
+	       double the adjustment.  */
+	    return base_length - 4;
+	}
+
+      /* @Rs and @Rd are 2 bytes shorter than the longest.  Note that
+	 in SImode and SFmode, the second mov.w involves an address
+	 with displacement, namely @(2,Rs) or @(2,Rd), so we subtract
+	 only 2 bytes.  */
+      if (GET_CODE (addr) == REG)
+	return base_length - 2;
+
+      return base_length;
+    }
+  else
+    {
+      unsigned int base_length;
+
+      switch (mode)
+	{
+	case QImode:
+	  if (addr == NULL_RTX)
+	    return 2;
+
+	  /* The eightbit addressing is available only in QImode, so
+	     go ahead and take care of it.  */
+	  if (h8300_eightbit_constant_address_p (addr))
+	    return 2;
+
+	  base_length = 8;
+	  break;
+
+	case HImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 2;
+
+	      if (src == const0_rtx)
+		return 2;
+
+	      return 4;
+	    }
+
+	  base_length = 8;
+	  break;
+
+	case SImode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		{
+		  if (REGNO (src) == MAC_REG || REGNO (dest) == MAC_REG)
+		    return 4;
+		  else
+		    return 2;
+		}
+
+	      if (GET_CODE (src) == CONST_INT)
+		{
+		  int val = INTVAL (src);
+
+		  if (val == 0)
+		    return 2;
+
+		  if (val == (val & 0x00ff) || val == (val & 0xff00))
+		    return 4;
+
+		  switch (val & 0xffffffff)
+		    {
+		    case 0xffffffff:
+		    case 0xfffffffe:
+		    case 0xfffffffc:
+		    case 0x0000ffff:
+		    case 0x0000fffe:
+		    case 0xffff0000:
+		    case 0xfffe0000:
+		    case 0x00010000:
+		    case 0x00020000:
+		      return 4;
+		    }
+		}
+	      return 6;
+	    }
+
+	  base_length = 10;
+	  break;
+
+	case SFmode:
+	  if (addr == NULL_RTX)
+	    {
+	      if (REG_P (src))
+		return 2;
+
+	      if (CONST_DOUBLE_OK_FOR_LETTER_P (src, 'G'))
+		return 2;
+
+	      return 6;
+	    }
+
+	  base_length = 10;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Adjust the length based on the addressing mode used.
+	 Specifically, we subtract the difference between the actual
+	 length and the longest one, which is @(d:24,ERs).  */
+
+      /* @ERs+ and @-ERd are 6 bytes shorter than the longest.  */
+      if (GET_CODE (addr) == PRE_DEC
+	  || GET_CODE (addr) == POST_INC)
+	return base_length - 6;
+
+      /* @ERs and @ERd are 6 bytes shorter than the longest.  */
+      if (GET_CODE (addr) == REG)
+	return base_length - 6;
+
+      /* @(d:16,ERs) and @(d:16,ERd) are 4 bytes shorter than the
+	 longest.  */
+      if (GET_CODE (addr) == PLUS
+	  && GET_CODE (XEXP (addr, 0)) == REG
+	  && GET_CODE (XEXP (addr, 1)) == CONST_INT
+	  && INTVAL (XEXP (addr, 1)) > -32768
+	  && INTVAL (XEXP (addr, 1)) < 32767)
+	return base_length - 4;
+
+      /* @aa:16 is 4 bytes shorter than the longest.  */
+      if (h8300_tiny_constant_address_p (addr))
+	return base_length - 4;
+
+      /* @aa:24 is 2 bytes shorter than the longest.  */
+      if (CONSTANT_P (addr))
+	return base_length - 2;
+
+      return base_length;
+    }
+}
+
+/* Output an addition insn.  */
+
+const char *
+output_plussi (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  gcc_assert (mode == SImode);
+
+  if (TARGET_H8300)
+    {
+      if (GET_CODE (operands[2]) == REG)
+	return "add.w\t%f2,%f0\n\taddx\t%y2,%y0\n\taddx\t%z2,%z0";
+
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  HOST_WIDE_INT n = INTVAL (operands[2]);
+
+	  if ((n & 0xffffff) == 0)
+	    return "add\t%z2,%z0";
+	  if ((n & 0xffff) == 0)
+	    return "add\t%y2,%y0\n\taddx\t%z2,%z0";
+	  if ((n & 0xff) == 0)
+	    return "add\t%x2,%x0\n\taddx\t%y2,%y0\n\taddx\t%z2,%z0";
+	}
+
+      return "add\t%w2,%w0\n\taddx\t%x2,%x0\n\taddx\t%y2,%y0\n\taddx\t%z2,%z0";
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && register_operand (operands[1], VOIDmode))
+	{
+	  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+	  if (TARGET_H8300SX && (intval >= 1 && intval <= 7))
+	    return "add.l\t%S2,%S0";
+	  if (TARGET_H8300SX && (intval >= -7 && intval <= -1))
+	    return "sub.l\t%G2,%S0";
+
+	  /* See if we can finish with 2 bytes.  */
+
+	  switch ((unsigned int) intval & 0xffffffff)
+	    {
+	    case 0x00000001:
+	    case 0x00000002:
+	    case 0x00000004:
+	      return "adds\t%2,%S0";
+
+	    case 0xffffffff:
+	    case 0xfffffffe:
+	    case 0xfffffffc:
+	      return "subs\t%G2,%S0";
+
+	    case 0x00010000:
+	    case 0x00020000:
+	      operands[2] = GEN_INT (intval >> 16);
+	      return "inc.w\t%2,%e0";
+
+	    case 0xffff0000:
+	    case 0xfffe0000:
+	      operands[2] = GEN_INT (intval >> 16);
+	      return "dec.w\t%G2,%e0";
+	    }
+
+	  /* See if we can finish with 4 bytes.  */
+	  if ((intval & 0xffff) == 0)
+	    {
+	      operands[2] = GEN_INT (intval >> 16);
+	      return "add.w\t%2,%e0";
+	    }
+	}
+
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  return "sub.l\t%S2,%S0";
+	}
+      return "add.l\t%S2,%S0";
+    }
+}
+
+/* ??? It would be much easier to add the h8sx stuff if a single function
+   classified the addition as either inc/dec, adds/subs, add.w or add.l.  */
+/* Compute the length of an addition insn.  */
+
+unsigned int
+compute_plussi_length (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  gcc_assert (mode == SImode);
+
+  if (TARGET_H8300)
+    {
+      if (GET_CODE (operands[2]) == REG)
+	return 6;
+
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  HOST_WIDE_INT n = INTVAL (operands[2]);
+
+	  if ((n & 0xffffff) == 0)
+	    return 2;
+	  if ((n & 0xffff) == 0)
+	    return 4;
+	  if ((n & 0xff) == 0)
+	    return 6;
+	}
+
+      return 8;
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && register_operand (operands[1], VOIDmode))
+	{
+	  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+	  if (TARGET_H8300SX && (intval >= 1 && intval <= 7))
+	    return 2;
+	  if (TARGET_H8300SX && (intval >= -7 && intval <= -1))
+	    return 2;
+
+	  /* See if we can finish with 2 bytes.  */
+
+	  switch ((unsigned int) intval & 0xffffffff)
+	    {
+	    case 0x00000001:
+	    case 0x00000002:
+	    case 0x00000004:
+	      return 2;
+
+	    case 0xffffffff:
+	    case 0xfffffffe:
+	    case 0xfffffffc:
+	      return 2;
+
+	    case 0x00010000:
+	    case 0x00020000:
+	      return 2;
+
+	    case 0xffff0000:
+	    case 0xfffe0000:
+	      return 2;
+	    }
+
+	  /* See if we can finish with 4 bytes.  */
+	  if ((intval & 0xffff) == 0)
+	    return 4;
+	}
+
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+	return h8300_length_from_table (operands[0],
+					GEN_INT (-INTVAL (operands[2])),
+					&addl_length_table);
+      else
+	return h8300_length_from_table (operands[0], operands[2],
+					&addl_length_table);
+      return 6;
+    }
+}
+
+/* Compute which flag bits are valid after an addition insn.  */
+
+enum attr_cc
+compute_plussi_cc (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  gcc_assert (mode == SImode);
+
+  if (TARGET_H8300)
+    {
+      return CC_CLOBBER;
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && register_operand (operands[1], VOIDmode))
+	{
+	  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+	  if (TARGET_H8300SX && (intval >= 1 && intval <= 7))
+	    return CC_SET_ZN;
+	  if (TARGET_H8300SX && (intval >= -7 && intval <= -1))
+	    return CC_SET_ZN;
+
+	  /* See if we can finish with 2 bytes.  */
+
+	  switch ((unsigned int) intval & 0xffffffff)
+	    {
+	    case 0x00000001:
+	    case 0x00000002:
+	    case 0x00000004:
+	      return CC_NONE_0HIT;
+
+	    case 0xffffffff:
+	    case 0xfffffffe:
+	    case 0xfffffffc:
+	      return CC_NONE_0HIT;
+
+	    case 0x00010000:
+	    case 0x00020000:
+	      return CC_CLOBBER;
+
+	    case 0xffff0000:
+	    case 0xfffe0000:
+	      return CC_CLOBBER;
+	    }
+
+	  /* See if we can finish with 4 bytes.  */
+	  if ((intval & 0xffff) == 0)
+	    return CC_CLOBBER;
+	}
+
+      return CC_SET_ZN;
+    }
+}
+
+/* Output a logical insn.  */
+
+const char *
+output_logical_op (enum machine_mode mode, rtx *operands)
+{
+  /* Figure out the logical op that we need to perform.  */
+  enum rtx_code code = GET_CODE (operands[3]);
+  /* Pretend that every byte is affected if both operands are registers.  */
+  const unsigned HOST_WIDE_INT intval =
+    (unsigned HOST_WIDE_INT) ((GET_CODE (operands[2]) == CONST_INT)
+			      /* Always use the full instruction if the
+				 first operand is in memory.  It is better
+				 to use define_splits to generate the shorter
+				 sequence where valid.  */
+			      && register_operand (operands[1], VOIDmode)
+			      ? INTVAL (operands[2]) : 0x55555555);
+  /* The determinant of the algorithm.  If we perform an AND, 0
+     affects a bit.  Otherwise, 1 affects a bit.  */
+  const unsigned HOST_WIDE_INT det = (code != AND) ? intval : ~intval;
+  /* Break up DET into pieces.  */
+  const unsigned HOST_WIDE_INT b0 = (det >>  0) & 0xff;
+  const unsigned HOST_WIDE_INT b1 = (det >>  8) & 0xff;
+  const unsigned HOST_WIDE_INT b2 = (det >> 16) & 0xff;
+  const unsigned HOST_WIDE_INT b3 = (det >> 24) & 0xff;
+  const unsigned HOST_WIDE_INT w0 = (det >>  0) & 0xffff;
+  const unsigned HOST_WIDE_INT w1 = (det >> 16) & 0xffff;
+  int lower_half_easy_p = 0;
+  int upper_half_easy_p = 0;
+  /* The name of an insn.  */
+  const char *opname;
+  char insn_buf[100];
+
+  switch (code)
+    {
+    case AND:
+      opname = "and";
+      break;
+    case IOR:
+      opname = "or";
+      break;
+    case XOR:
+      opname = "xor";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (mode)
+    {
+    case HImode:
+      /* First, see if we can finish with one insn.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && b0 != 0
+	  && b1 != 0)
+	{
+	  sprintf (insn_buf, "%s.w\t%%T2,%%T0", opname);
+	  output_asm_insn (insn_buf, operands);
+	}
+      else
+	{
+	  /* Take care of the lower byte.  */
+	  if (b0 != 0)
+	    {
+	      sprintf (insn_buf, "%s\t%%s2,%%s0", opname);
+	      output_asm_insn (insn_buf, operands);
+	    }
+	  /* Take care of the upper byte.  */
+	  if (b1 != 0)
+	    {
+	      sprintf (insn_buf, "%s\t%%t2,%%t0", opname);
+	      output_asm_insn (insn_buf, operands);
+	    }
+	}
+      break;
+    case SImode:
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* Determine if the lower half can be taken care of in no more
+	     than two bytes.  */
+	  lower_half_easy_p = (b0 == 0
+			       || b1 == 0
+			       || (code != IOR && w0 == 0xffff));
+
+	  /* Determine if the upper half can be taken care of in no more
+	     than two bytes.  */
+	  upper_half_easy_p = ((code != IOR && w1 == 0xffff)
+			       || (code == AND && w1 == 0xff00));
+	}
+
+      /* Check if doing everything with one insn is no worse than
+	 using multiple insns.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && w0 != 0 && w1 != 0
+	  && !(lower_half_easy_p && upper_half_easy_p)
+	  && !(code == IOR && w1 == 0xffff
+	       && (w0 & 0x8000) != 0 && lower_half_easy_p))
+	{
+	  sprintf (insn_buf, "%s.l\t%%S2,%%S0", opname);
+	  output_asm_insn (insn_buf, operands);
+	}
+      else
+	{
+	  /* Take care of the lower and upper words individually.  For
+	     each word, we try different methods in the order of
+
+	     1) the special insn (in case of AND or XOR),
+	     2) the word-wise insn, and
+	     3) The byte-wise insn.  */
+	  if (w0 == 0xffff
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    output_asm_insn ((code == AND)
+			     ? "sub.w\t%f0,%f0" : "not.w\t%f0",
+			     operands);
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && (b0 != 0)
+		   && (b1 != 0))
+	    {
+	      sprintf (insn_buf, "%s.w\t%%f2,%%f0", opname);
+	      output_asm_insn (insn_buf, operands);
+	    }
+	  else
+	    {
+	      if (b0 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%w2,%%w0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	      if (b1 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%x2,%%x0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	    }
+
+	  if ((w1 == 0xffff)
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    output_asm_insn ((code == AND)
+			     ? "sub.w\t%e0,%e0" : "not.w\t%e0",
+			     operands);
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == IOR
+		   && w1 == 0xffff
+		   && (w0 & 0x8000) != 0)
+	    {
+	      output_asm_insn ("exts.l\t%S0", operands);
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == AND
+		   && w1 == 0xff00)
+	    {
+	      output_asm_insn ("extu.w\t%e0", operands);
+	    }
+	  else if (TARGET_H8300H || TARGET_H8300S)
+	    {
+	      if (w1 != 0)
+		{
+		  sprintf (insn_buf, "%s.w\t%%e2,%%e0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	    }
+	  else
+	    {
+	      if (b2 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%y2,%%y0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	      if (b3 != 0)
+		{
+		  sprintf (insn_buf, "%s\t%%z2,%%z0", opname);
+		  output_asm_insn (insn_buf, operands);
+		}
+	    }
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return "";
+}
+
+/* Compute the length of a logical insn.  */
+
+unsigned int
+compute_logical_op_length (enum machine_mode mode, rtx *operands)
+{
+  /* Figure out the logical op that we need to perform.  */
+  enum rtx_code code = GET_CODE (operands[3]);
+  /* Pretend that every byte is affected if both operands are registers.  */
+  const unsigned HOST_WIDE_INT intval =
+    (unsigned HOST_WIDE_INT) ((GET_CODE (operands[2]) == CONST_INT)
+			      /* Always use the full instruction if the
+				 first operand is in memory.  It is better
+				 to use define_splits to generate the shorter
+				 sequence where valid.  */
+			      && register_operand (operands[1], VOIDmode)
+			      ? INTVAL (operands[2]) : 0x55555555);
+  /* The determinant of the algorithm.  If we perform an AND, 0
+     affects a bit.  Otherwise, 1 affects a bit.  */
+  const unsigned HOST_WIDE_INT det = (code != AND) ? intval : ~intval;
+  /* Break up DET into pieces.  */
+  const unsigned HOST_WIDE_INT b0 = (det >>  0) & 0xff;
+  const unsigned HOST_WIDE_INT b1 = (det >>  8) & 0xff;
+  const unsigned HOST_WIDE_INT b2 = (det >> 16) & 0xff;
+  const unsigned HOST_WIDE_INT b3 = (det >> 24) & 0xff;
+  const unsigned HOST_WIDE_INT w0 = (det >>  0) & 0xffff;
+  const unsigned HOST_WIDE_INT w1 = (det >> 16) & 0xffff;
+  int lower_half_easy_p = 0;
+  int upper_half_easy_p = 0;
+  /* Insn length.  */
+  unsigned int length = 0;
+
+  switch (mode)
+    {
+    case HImode:
+      /* First, see if we can finish with one insn.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && b0 != 0
+	  && b1 != 0)
+	{
+	  length = h8300_length_from_table (operands[1], operands[2],
+					    &logicw_length_table);
+	}
+      else
+	{
+	  /* Take care of the lower byte.  */
+	  if (b0 != 0)
+	    length += 2;
+
+	  /* Take care of the upper byte.  */
+	  if (b1 != 0)
+	    length += 2;
+	}
+      break;
+    case SImode:
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* Determine if the lower half can be taken care of in no more
+	     than two bytes.  */
+	  lower_half_easy_p = (b0 == 0
+			       || b1 == 0
+			       || (code != IOR && w0 == 0xffff));
+
+	  /* Determine if the upper half can be taken care of in no more
+	     than two bytes.  */
+	  upper_half_easy_p = ((code != IOR && w1 == 0xffff)
+			       || (code == AND && w1 == 0xff00));
+	}
+
+      /* Check if doing everything with one insn is no worse than
+	 using multiple insns.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && w0 != 0 && w1 != 0
+	  && !(lower_half_easy_p && upper_half_easy_p)
+	  && !(code == IOR && w1 == 0xffff
+	       && (w0 & 0x8000) != 0 && lower_half_easy_p))
+	{
+	  length = h8300_length_from_table (operands[1], operands[2],
+					    &logicl_length_table);
+	}
+      else
+	{
+	  /* Take care of the lower and upper words individually.  For
+	     each word, we try different methods in the order of
+
+	     1) the special insn (in case of AND or XOR),
+	     2) the word-wise insn, and
+	     3) The byte-wise insn.  */
+	  if (w0 == 0xffff
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    {
+	      length += 2;
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && (b0 != 0)
+		   && (b1 != 0))
+	    {
+	      length += 4;
+	    }
+	  else
+	    {
+	      if (b0 != 0)
+		length += 2;
+
+	      if (b1 != 0)
+		length += 2;
+	    }
+
+	  if (w1 == 0xffff
+	      && (TARGET_H8300 ? (code == AND) : (code != IOR)))
+	    {
+	      length += 2;
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == IOR
+		   && w1 == 0xffff
+		   && (w0 & 0x8000) != 0)
+	    {
+	      length += 2;
+	    }
+	  else if ((TARGET_H8300H || TARGET_H8300S)
+		   && code == AND
+		   && w1 == 0xff00)
+	    {
+	      length += 2;
+	    }
+	  else if (TARGET_H8300H || TARGET_H8300S)
+	    {
+	      if (w1 != 0)
+		length += 4;
+	    }
+	  else
+	    {
+	      if (b2 != 0)
+		length += 2;
+
+	      if (b3 != 0)
+		length += 2;
+	    }
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return length;
+}
+
+/* Compute which flag bits are valid after a logical insn.  */
+
+enum attr_cc
+compute_logical_op_cc (enum machine_mode mode, rtx *operands)
+{
+  /* Figure out the logical op that we need to perform.  */
+  enum rtx_code code = GET_CODE (operands[3]);
+  /* Pretend that every byte is affected if both operands are registers.  */
+  const unsigned HOST_WIDE_INT intval =
+    (unsigned HOST_WIDE_INT) ((GET_CODE (operands[2]) == CONST_INT)
+			      /* Always use the full instruction if the
+				 first operand is in memory.  It is better
+				 to use define_splits to generate the shorter
+				 sequence where valid.  */
+			      && register_operand (operands[1], VOIDmode)
+			      ? INTVAL (operands[2]) : 0x55555555);
+  /* The determinant of the algorithm.  If we perform an AND, 0
+     affects a bit.  Otherwise, 1 affects a bit.  */
+  const unsigned HOST_WIDE_INT det = (code != AND) ? intval : ~intval;
+  /* Break up DET into pieces.  */
+  const unsigned HOST_WIDE_INT b0 = (det >>  0) & 0xff;
+  const unsigned HOST_WIDE_INT b1 = (det >>  8) & 0xff;
+  const unsigned HOST_WIDE_INT w0 = (det >>  0) & 0xffff;
+  const unsigned HOST_WIDE_INT w1 = (det >> 16) & 0xffff;
+  int lower_half_easy_p = 0;
+  int upper_half_easy_p = 0;
+  /* Condition code.  */
+  enum attr_cc cc = CC_CLOBBER;
+
+  switch (mode)
+    {
+    case HImode:
+      /* First, see if we can finish with one insn.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && b0 != 0
+	  && b1 != 0)
+	{
+	  cc = CC_SET_ZNV;
+	}
+      break;
+    case SImode:
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* Determine if the lower half can be taken care of in no more
+	     than two bytes.  */
+	  lower_half_easy_p = (b0 == 0
+			       || b1 == 0
+			       || (code != IOR && w0 == 0xffff));
+
+	  /* Determine if the upper half can be taken care of in no more
+	     than two bytes.  */
+	  upper_half_easy_p = ((code != IOR && w1 == 0xffff)
+			       || (code == AND && w1 == 0xff00));
+	}
+
+      /* Check if doing everything with one insn is no worse than
+	 using multiple insns.  */
+      if ((TARGET_H8300H || TARGET_H8300S)
+	  && w0 != 0 && w1 != 0
+	  && !(lower_half_easy_p && upper_half_easy_p)
+	  && !(code == IOR && w1 == 0xffff
+	       && (w0 & 0x8000) != 0 && lower_half_easy_p))
+	{
+	  cc = CC_SET_ZNV;
+	}
+      else
+	{
+	  if ((TARGET_H8300H || TARGET_H8300S)
+	      && code == IOR
+	      && w1 == 0xffff
+	      && (w0 & 0x8000) != 0)
+	    {
+	      cc = CC_SET_ZNV;
+	    }
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return cc;
+}
+
+/* Expand a conditional branch.  */
+
+void
+h8300_expand_branch (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx label = operands[3];
+  rtx tmp;
+
+  tmp = gen_rtx_COMPARE (VOIDmode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, cc0_rtx, tmp));
+
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+}
+
+
+/* Expand a conditional store.  */
+
+void
+h8300_expand_store (rtx operands[])
+{
+  rtx dest = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx tmp;
+
+  tmp = gen_rtx_COMPARE (VOIDmode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, cc0_rtx, tmp));
+
+  tmp = gen_rtx_fmt_ee (code, GET_MODE (dest), cc0_rtx, const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+}
+
+/* Shifts.
+
+   We devote a fair bit of code to getting efficient shifts since we
+   can only shift one bit at a time on the H8/300 and H8/300H and only
+   one or two bits at a time on the H8S.
+
+   All shift code falls into one of the following ways of
+   implementation:
+
+   o SHIFT_INLINE: Emit straight line code for the shift; this is used
+     when a straight line shift is about the same size or smaller than
+     a loop.
+
+   o SHIFT_ROT_AND: Rotate the value the opposite direction, then mask
+     off the bits we don't need.  This is used when only a few of the
+     bits in the original value will survive in the shifted value.
+
+   o SHIFT_SPECIAL: Often it's possible to move a byte or a word to
+     simulate a shift by 8, 16, or 24 bits.  Once moved, a few inline
+     shifts can be added if the shift count is slightly more than 8 or
+     16.  This case also includes other oddballs that are not worth
+     explaining here.
+
+   o SHIFT_LOOP: Emit a loop using one (or two on H8S) bit shifts.
+
+   For each shift count, we try to use code that has no trade-off
+   between code size and speed whenever possible.
+
+   If the trade-off is unavoidable, we try to be reasonable.
+   Specifically, the fastest version is one instruction longer than
+   the shortest version, we take the fastest version.  We also provide
+   the use a way to switch back to the shortest version with -Os.
+
+   For the details of the shift algorithms for various shift counts,
+   refer to shift_alg_[qhs]i.  */
+
+/* Classify a shift with the given mode and code.  OP is the shift amount.  */
+
+enum h8sx_shift_type
+h8sx_classify_shift (enum machine_mode mode, enum rtx_code code, rtx op)
+{
+  if (!TARGET_H8300SX)
+    return H8SX_SHIFT_NONE;
+
+  switch (code)
+    {
+    case ASHIFT:
+    case LSHIFTRT:
+      /* Check for variable shifts (shll Rs,Rd and shlr Rs,Rd).  */
+      if (GET_CODE (op) != CONST_INT)
+	return H8SX_SHIFT_BINARY;
+
+      /* Reject out-of-range shift amounts.  */
+      if (INTVAL (op) <= 0 || INTVAL (op) >= GET_MODE_BITSIZE (mode))
+	return H8SX_SHIFT_NONE;
+
+      /* Power-of-2 shifts are effectively unary operations.  */
+      if (exact_log2 (INTVAL (op)) >= 0)
+	return H8SX_SHIFT_UNARY;
+
+      return H8SX_SHIFT_BINARY;
+
+    case ASHIFTRT:
+      if (op == const1_rtx || op == const2_rtx)
+	return H8SX_SHIFT_UNARY;
+      return H8SX_SHIFT_NONE;
+
+    case ROTATE:
+      if (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) == 1
+	      || INTVAL (op) == 2
+	      || INTVAL (op) == GET_MODE_BITSIZE (mode) - 2
+	      || INTVAL (op) == GET_MODE_BITSIZE (mode) - 1))
+	return H8SX_SHIFT_UNARY;
+      return H8SX_SHIFT_NONE;
+
+    default:
+      return H8SX_SHIFT_NONE;
+    }
+}
+
+/* Return the asm template for a single h8sx shift instruction.
+   OPERANDS[0] and OPERANDS[1] are the destination, OPERANDS[2]
+   is the source and OPERANDS[3] is the shift.  SUFFIX is the
+   size suffix ('b', 'w' or 'l') and OPTYPE is the print_operand
+   prefix for the destination operand.  */
+
+const char *
+output_h8sx_shift (rtx *operands, int suffix, int optype)
+{
+  static char buffer[16];
+  const char *stem;
+
+  switch (GET_CODE (operands[3]))
+    {
+    case ASHIFT:
+      stem = "shll";
+      break;
+
+    case ASHIFTRT:
+      stem = "shar";
+      break;
+
+    case LSHIFTRT:
+      stem = "shlr";
+      break;
+
+    case ROTATE:
+      stem = "rotl";
+      if (INTVAL (operands[2]) > 2)
+	{
+	  /* This is really a right rotate.  */
+	  operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[0]))
+				 - INTVAL (operands[2]));
+	  stem = "rotr";
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  if (operands[2] == const1_rtx)
+    sprintf (buffer, "%s.%c\t%%%c0", stem, suffix, optype);
+  else
+    sprintf (buffer, "%s.%c\t%%X2,%%%c0", stem, suffix, optype);
+  return buffer;
+}
+
+/* Emit code to do shifts.  */
+
+bool
+expand_a_shift (enum machine_mode mode, enum rtx_code code, rtx operands[])
+{
+  switch (h8sx_classify_shift (mode, code, operands[2]))
+    {
+    case H8SX_SHIFT_BINARY:
+      operands[1] = force_reg (mode, operands[1]);
+      return false;
+
+    case H8SX_SHIFT_UNARY:
+      return false;
+
+    case H8SX_SHIFT_NONE:
+      break;
+    }
+
+  emit_move_insn (copy_rtx (operands[0]), operands[1]);
+
+  /* Need a loop to get all the bits we want  - we generate the
+     code at emit time, but need to allocate a scratch reg now.  */
+
+  emit_insn (gen_rtx_PARALLEL
+	     (VOIDmode,
+	      gen_rtvec (2,
+			 gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+				      gen_rtx_fmt_ee (code, mode,
+						      copy_rtx (operands[0]), operands[2])),
+			 gen_rtx_CLOBBER (VOIDmode,
+					  gen_rtx_SCRATCH (QImode)))));
+  return true;
+}
+
+/* Symbols of the various modes which can be used as indices.  */
+
+enum shift_mode
+{
+  QIshift, HIshift, SIshift
+};
+
+/* For single bit shift insns, record assembler and what bits of the
+   condition code are valid afterwards (represented as various CC_FOO
+   bits, 0 means CC isn't left in a usable state).  */
+
+struct shift_insn
+{
+  const char *const assembler;
+  const enum attr_cc cc_valid;
+};
+
+/* Assembler instruction shift table.
+
+   These tables are used to look up the basic shifts.
+   They are indexed by cpu, shift_type, and mode.  */
+
+static const struct shift_insn shift_one[2][3][3] =
+{
+/* H8/300 */
+  {
+/* SHIFT_ASHIFT */
+    {
+      { "shll\t%X0", CC_SET_ZNV },
+      { "add.w\t%T0,%T0", CC_SET_ZN },
+      { "add.w\t%f0,%f0\n\taddx\t%y0,%y0\n\taddx\t%z0,%z0", CC_CLOBBER }
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      { "shlr\t%X0", CC_SET_ZNV },
+      { "shlr\t%t0\n\trotxr\t%s0", CC_CLOBBER },
+      { "shlr\t%z0\n\trotxr\t%y0\n\trotxr\t%x0\n\trotxr\t%w0", CC_CLOBBER }
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      { "shar\t%X0", CC_SET_ZNV },
+      { "shar\t%t0\n\trotxr\t%s0", CC_CLOBBER },
+      { "shar\t%z0\n\trotxr\t%y0\n\trotxr\t%x0\n\trotxr\t%w0", CC_CLOBBER }
+    }
+  },
+/* H8/300H */
+  {
+/* SHIFT_ASHIFT */
+    {
+      { "shll.b\t%X0", CC_SET_ZNV },
+      { "shll.w\t%T0", CC_SET_ZNV },
+      { "shll.l\t%S0", CC_SET_ZNV }
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      { "shlr.b\t%X0", CC_SET_ZNV },
+      { "shlr.w\t%T0", CC_SET_ZNV },
+      { "shlr.l\t%S0", CC_SET_ZNV }
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      { "shar.b\t%X0", CC_SET_ZNV },
+      { "shar.w\t%T0", CC_SET_ZNV },
+      { "shar.l\t%S0", CC_SET_ZNV }
+    }
+  }
+};
+
+static const struct shift_insn shift_two[3][3] =
+{
+/* SHIFT_ASHIFT */
+    {
+      { "shll.b\t#2,%X0", CC_SET_ZNV },
+      { "shll.w\t#2,%T0", CC_SET_ZNV },
+      { "shll.l\t#2,%S0", CC_SET_ZNV }
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      { "shlr.b\t#2,%X0", CC_SET_ZNV },
+      { "shlr.w\t#2,%T0", CC_SET_ZNV },
+      { "shlr.l\t#2,%S0", CC_SET_ZNV }
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      { "shar.b\t#2,%X0", CC_SET_ZNV },
+      { "shar.w\t#2,%T0", CC_SET_ZNV },
+      { "shar.l\t#2,%S0", CC_SET_ZNV }
+    }
+};
+
+/* Rotates are organized by which shift they'll be used in implementing.
+   There's no need to record whether the cc is valid afterwards because
+   it is the AND insn that will decide this.  */
+
+static const char *const rotate_one[2][3][3] =
+{
+/* H8/300 */
+  {
+/* SHIFT_ASHIFT */
+    {
+      "rotr\t%X0",
+      "shlr\t%t0\n\trotxr\t%s0\n\tbst\t#7,%t0",
+      0
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      "rotl\t%X0",
+      "shll\t%s0\n\trotxl\t%t0\n\tbst\t#0,%s0",
+      0
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      "rotl\t%X0",
+      "shll\t%s0\n\trotxl\t%t0\n\tbst\t#0,%s0",
+      0
+    }
+  },
+/* H8/300H */
+  {
+/* SHIFT_ASHIFT */
+    {
+      "rotr.b\t%X0",
+      "rotr.w\t%T0",
+      "rotr.l\t%S0"
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      "rotl.b\t%X0",
+      "rotl.w\t%T0",
+      "rotl.l\t%S0"
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      "rotl.b\t%X0",
+      "rotl.w\t%T0",
+      "rotl.l\t%S0"
+    }
+  }
+};
+
+static const char *const rotate_two[3][3] =
+{
+/* SHIFT_ASHIFT */
+    {
+      "rotr.b\t#2,%X0",
+      "rotr.w\t#2,%T0",
+      "rotr.l\t#2,%S0"
+    },
+/* SHIFT_LSHIFTRT */
+    {
+      "rotl.b\t#2,%X0",
+      "rotl.w\t#2,%T0",
+      "rotl.l\t#2,%S0"
+    },
+/* SHIFT_ASHIFTRT */
+    {
+      "rotl.b\t#2,%X0",
+      "rotl.w\t#2,%T0",
+      "rotl.l\t#2,%S0"
+    }
+};
+
+struct shift_info {
+  /* Shift algorithm.  */
+  enum shift_alg alg;
+
+  /* The number of bits to be shifted by shift1 and shift2.  Valid
+     when ALG is SHIFT_SPECIAL.  */
+  unsigned int remainder;
+
+  /* Special insn for a shift.  Valid when ALG is SHIFT_SPECIAL.  */
+  const char *special;
+
+  /* Insn for a one-bit shift.  Valid when ALG is either SHIFT_INLINE
+     or SHIFT_SPECIAL, and REMAINDER is nonzero.  */
+  const char *shift1;
+
+  /* Insn for a two-bit shift.  Valid when ALG is either SHIFT_INLINE
+     or SHIFT_SPECIAL, and REMAINDER is nonzero.  */
+  const char *shift2;
+
+  /* CC status for SHIFT_INLINE.  */
+  enum attr_cc cc_inline;
+
+  /* CC status  for SHIFT_SPECIAL.  */
+  enum attr_cc cc_special;
+};
+
+static void get_shift_alg (enum shift_type,
+			   enum shift_mode, unsigned int,
+			   struct shift_info *);
+
+/* Given SHIFT_TYPE, SHIFT_MODE, and shift count COUNT, determine the
+   best algorithm for doing the shift.  The assembler code is stored
+   in the pointers in INFO.  We achieve the maximum efficiency in most
+   cases when !TARGET_H8300.  In case of TARGET_H8300, shifts in
+   SImode in particular have a lot of room to optimize.
+
+   We first determine the strategy of the shift algorithm by a table
+   lookup.  If that tells us to use a hand crafted assembly code, we
+   go into the big switch statement to find what that is.  Otherwise,
+   we resort to a generic way, such as inlining.  In either case, the
+   result is returned through INFO.  */
+
+static void
+get_shift_alg (enum shift_type shift_type, enum shift_mode shift_mode,
+	       unsigned int count, struct shift_info *info)
+{
+  enum h8_cpu cpu;
+
+  /* Find the target CPU.  */
+  if (TARGET_H8300)
+    cpu = H8_300;
+  else if (TARGET_H8300H)
+    cpu = H8_300H;
+  else
+    cpu = H8_S;
+
+  /* Find the shift algorithm.  */
+  info->alg = SHIFT_LOOP;
+  switch (shift_mode)
+    {
+    case QIshift:
+      if (count < GET_MODE_BITSIZE (QImode))
+	info->alg = shift_alg_qi[cpu][shift_type][count];
+      break;
+
+    case HIshift:
+      if (count < GET_MODE_BITSIZE (HImode))
+	info->alg = shift_alg_hi[cpu][shift_type][count];
+      break;
+
+    case SIshift:
+      if (count < GET_MODE_BITSIZE (SImode))
+	info->alg = shift_alg_si[cpu][shift_type][count];
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Fill in INFO.  Return unless we have SHIFT_SPECIAL.  */
+  switch (info->alg)
+    {
+    case SHIFT_INLINE:
+      info->remainder = count;
+      /* Fall through.  */
+
+    case SHIFT_LOOP:
+      /* It is up to the caller to know that looping clobbers cc.  */
+      info->shift1 = shift_one[cpu_type][shift_type][shift_mode].assembler;
+      info->shift2 = shift_two[shift_type][shift_mode].assembler;
+      info->cc_inline = shift_one[cpu_type][shift_type][shift_mode].cc_valid;
+      goto end;
+
+    case SHIFT_ROT_AND:
+      info->shift1 = rotate_one[cpu_type][shift_type][shift_mode];
+      info->shift2 = rotate_two[shift_type][shift_mode];
+      info->cc_inline = CC_CLOBBER;
+      goto end;
+
+    case SHIFT_SPECIAL:
+      /* REMAINDER is 0 for most cases, so initialize it to 0.  */
+      info->remainder = 0;
+      info->shift1 = shift_one[cpu_type][shift_type][shift_mode].assembler;
+      info->shift2 = shift_two[shift_type][shift_mode].assembler;
+      info->cc_inline = shift_one[cpu_type][shift_type][shift_mode].cc_valid;
+      info->cc_special = CC_CLOBBER;
+      break;
+    }
+
+  /* Here we only deal with SHIFT_SPECIAL.  */
+  switch (shift_mode)
+    {
+    case QIshift:
+      /* For ASHIFTRT by 7 bits, the sign bit is simply replicated
+	 through the entire value.  */
+      gcc_assert (shift_type == SHIFT_ASHIFTRT && count == 7);
+      info->special = "shll\t%X0\n\tsubx\t%X0,%X0";
+      goto end;
+
+    case HIshift:
+      if (count == 7)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300)
+		info->special = "shar.b\t%t0\n\tmov.b\t%s0,%t0\n\trotxr.b\t%t0\n\trotr.b\t%s0\n\tand.b\t#0x80,%s0";
+	      else
+		info->special = "shar.b\t%t0\n\tmov.b\t%s0,%t0\n\trotxr.w\t%T0\n\tand.b\t#0x80,%s0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		info->special = "shal.b\t%s0\n\tmov.b\t%t0,%s0\n\trotxl.b\t%s0\n\trotl.b\t%t0\n\tand.b\t#0x01,%t0";
+	      else
+		info->special = "shal.b\t%s0\n\tmov.b\t%t0,%s0\n\trotxl.w\t%T0\n\tand.b\t#0x01,%t0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "shal.b\t%s0\n\tmov.b\t%t0,%s0\n\trotxl.b\t%s0\n\tsubx\t%t0,%t0";
+	      goto end;
+	    }
+	}
+      else if ((8 <= count && count <= 13)
+	       || (TARGET_H8300S && count == 14))
+	{
+	  info->remainder = count - 8;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%s0,%t0\n\tsub.b\t%s0,%s0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.b\t%t0,%s0\n\tsub.b\t%t0,%t0";
+		  info->shift1  = "shlr.b\t%s0";
+		  info->cc_inline = CC_SET_ZNV;
+		}
+	      else
+		{
+		  info->special = "mov.b\t%t0,%s0\n\textu.w\t%T0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.b\t%t0,%s0\n\tbld\t#7,%s0\n\tsubx\t%t0,%t0";
+		  info->shift1  = "shar.b\t%s0";
+		}
+	      else
+		{
+		  info->special = "mov.b\t%t0,%s0\n\texts.w\t%T0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    }
+	}
+      else if (count == 14)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300)
+		info->special = "mov.b\t%s0,%t0\n\trotr.b\t%t0\n\trotr.b\t%t0\n\tand.b\t#0xC0,%t0\n\tsub.b\t%s0,%s0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		info->special = "mov.b\t%t0,%s0\n\trotl.b\t%s0\n\trotl.b\t%s0\n\tand.b\t#3,%s0\n\tsub.b\t%t0,%t0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      if (TARGET_H8300)
+		info->special = "mov.b\t%t0,%s0\n\tshll.b\t%s0\n\tsubx.b\t%t0,%t0\n\tshll.b\t%s0\n\tmov.b\t%t0,%s0\n\tbst.b\t#0,%s0";
+	      else if (TARGET_H8300H)
+		{
+		  info->special = "shll.b\t%t0\n\tsubx.b\t%s0,%s0\n\tshll.b\t%t0\n\trotxl.b\t%s0\n\texts.w\t%T0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      else /* TARGET_H8300S */
+		gcc_unreachable ();
+	      goto end;
+	    }
+	}
+      else if (count == 15)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "bld\t#0,%s0\n\txor\t%s0,%s0\n\txor\t%t0,%t0\n\tbst\t#7,%t0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "bld\t#7,%t0\n\txor\t%s0,%s0\n\txor\t%t0,%t0\n\tbst\t#0,%s0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "shll\t%t0\n\tsubx\t%t0,%t0\n\tmov.b\t%t0,%s0";
+	      goto end;
+	    }
+	}
+      gcc_unreachable ();
+
+    case SIshift:
+      if (TARGET_H8300 && 8 <= count && count <= 9)
+	{
+	  info->remainder = count - 8;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%y0,%z0\n\tmov.b\t%x0,%y0\n\tmov.b\t%w0,%x0\n\tsub.b\t%w0,%w0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.b\t%x0,%w0\n\tmov.b\t%y0,%x0\n\tmov.b\t%z0,%y0\n\tsub.b\t%z0,%z0";
+	      info->shift1  = "shlr\t%y0\n\trotxr\t%x0\n\trotxr\t%w0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.b\t%x0,%w0\n\tmov.b\t%y0,%x0\n\tmov.b\t%z0,%y0\n\tshll\t%z0\n\tsubx\t%z0,%z0";
+	      goto end;
+	    }
+	}
+      else if (count == 8 && !TARGET_H8300)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.w\t%e0,%f4\n\tmov.b\t%s4,%t4\n\tmov.b\t%t0,%s4\n\tmov.b\t%s0,%t0\n\tsub.b\t%s0,%s0\n\tmov.w\t%f4,%e0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.w\t%e0,%f4\n\tmov.b\t%t0,%s0\n\tmov.b\t%s4,%t0\n\tmov.b\t%t4,%s4\n\textu.w\t%f4\n\tmov.w\t%f4,%e0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.w\t%e0,%f4\n\tmov.b\t%t0,%s0\n\tmov.b\t%s4,%t0\n\tmov.b\t%t4,%s4\n\texts.w\t%f4\n\tmov.w\t%f4,%e0";
+	      goto end;
+	    }
+	}
+      else if (count == 15 && TARGET_H8300)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      gcc_unreachable ();
+	    case SHIFT_LSHIFTRT:
+	      info->special = "bld\t#7,%z0\n\tmov.w\t%e0,%f0\n\txor\t%y0,%y0\n\txor\t%z0,%z0\n\trotxl\t%w0\n\trotxl\t%x0\n\trotxl\t%y0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "bld\t#7,%z0\n\tmov.w\t%e0,%f0\n\trotxl\t%w0\n\trotxl\t%x0\n\tsubx\t%y0,%y0\n\tsubx\t%z0,%z0";
+	      goto end;
+	    }
+	}
+      else if (count == 15 && !TARGET_H8300)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "shlr.w\t%e0\n\tmov.w\t%f0,%e0\n\txor.w\t%f0,%f0\n\trotxr.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "shll.w\t%f0\n\tmov.w\t%e0,%f0\n\txor.w\t%e0,%e0\n\trotxl.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if ((TARGET_H8300 && 16 <= count && count <= 20)
+	       || (TARGET_H8300H && 16 <= count && count <= 19)
+	       || (TARGET_H8300S && 16 <= count && count <= 21))
+	{
+	  info->remainder = count - 16;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.w\t%f0,%e0\n\tsub.w\t%f0,%f0";
+	      if (TARGET_H8300)
+		info->shift1 = "add.w\t%e0,%e0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.w\t%e0,%f0\n\tsub.w\t%e0,%e0";
+		  info->shift1  = "shlr\t%x0\n\trotxr\t%w0";
+		}
+	      else
+		{
+		  info->special = "mov.w\t%e0,%f0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      if (TARGET_H8300)
+		{
+		  info->special = "mov.w\t%e0,%f0\n\tshll\t%z0\n\tsubx\t%z0,%z0\n\tmov.b\t%z0,%y0";
+		  info->shift1  = "shar\t%x0\n\trotxr\t%w0";
+		}
+	      else
+		{
+		  info->special = "mov.w\t%e0,%f0\n\texts.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    }
+	}
+      else if (TARGET_H8300 && 24 <= count && count <= 28)
+	{
+	  info->remainder = count - 24;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%w0,%z0\n\tsub.b\t%y0,%y0\n\tsub.w\t%f0,%f0";
+	      info->shift1  = "shll.b\t%z0";
+	      info->cc_inline = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.b\t%z0,%w0\n\tsub.b\t%x0,%x0\n\tsub.w\t%e0,%e0";
+	      info->shift1  = "shlr.b\t%w0";
+	      info->cc_inline = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.b\t%z0,%w0\n\tbld\t#7,%w0\n\tsubx\t%x0,%x0\n\tsubx\t%x0,%x0\n\tsubx\t%x0,%x0";
+	      info->shift1  = "shar.b\t%w0";
+	      info->cc_inline = CC_SET_ZNV;
+	      goto end;
+	    }
+	}
+      else if ((TARGET_H8300H && count == 24)
+	       || (TARGET_H8300S && 24 <= count && count <= 25))
+	{
+	  info->remainder = count - 24;
+
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      info->special = "mov.b\t%s0,%t0\n\tsub.b\t%s0,%s0\n\tmov.w\t%f0,%e0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      info->special = "mov.w\t%e0,%f0\n\tmov.b\t%t0,%s0\n\textu.w\t%f0\n\textu.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      info->special = "mov.w\t%e0,%f0\n\tmov.b\t%t0,%s0\n\texts.w\t%f0\n\texts.l\t%S0";
+	      info->cc_special = CC_SET_ZNV;
+	      goto end;
+	    }
+	}
+      else if (!TARGET_H8300 && count == 28)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      else
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\trotr.l\t#2,%S0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300H)
+		{
+		  info->special = "sub.w\t%f0,%f0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      else
+		info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\trotl.l\t#2,%S0\n\textu.l\t%S0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (!TARGET_H8300 && count == 29)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      else
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300H)
+		{
+		  info->special = "sub.w\t%f0,%f0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      else
+		{
+		  info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		}
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (!TARGET_H8300 && count == 30)
+	{
+	  switch (shift_type)
+	    {
+	    case SHIFT_ASHIFT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t%S0\n\trotr.l\t%S0\n\tsub.w\t%f0,%f0";
+	      else
+		info->special = "sub.w\t%e0,%e0\n\trotr.l\t#2,%S0\n\tsub.w\t%f0,%f0";
+	      goto end;
+	    case SHIFT_LSHIFTRT:
+	      if (TARGET_H8300H)
+		info->special = "sub.w\t%f0,%f0\n\trotl.l\t%S0\n\trotl.l\t%S0\n\textu.l\t%S0";
+	      else
+		info->special = "sub.w\t%f0,%f0\n\trotl.l\t#2,%S0\n\textu.l\t%S0";
+	      goto end;
+	    case SHIFT_ASHIFTRT:
+	      gcc_unreachable ();
+	    }
+	}
+      else if (count == 31)
+	{
+	  if (TARGET_H8300)
+	    {
+	      switch (shift_type)
+		{
+		case SHIFT_ASHIFT:
+		  info->special = "sub.w\t%e0,%e0\n\tshlr\t%w0\n\tmov.w\t%e0,%f0\n\trotxr\t%z0";
+		  goto end;
+		case SHIFT_LSHIFTRT:
+		  info->special = "sub.w\t%f0,%f0\n\tshll\t%z0\n\tmov.w\t%f0,%e0\n\trotxl\t%w0";
+		  goto end;
+		case SHIFT_ASHIFTRT:
+		  info->special = "shll\t%z0\n\tsubx\t%w0,%w0\n\tmov.b\t%w0,%x0\n\tmov.w\t%f0,%e0";
+		  goto end;
+		}
+	    }
+	  else
+	    {
+	      switch (shift_type)
+		{
+		case SHIFT_ASHIFT:
+		  info->special = "shlr.l\t%S0\n\txor.l\t%S0,%S0\n\trotxr.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		  goto end;
+		case SHIFT_LSHIFTRT:
+		  info->special = "shll.l\t%S0\n\txor.l\t%S0,%S0\n\trotxl.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		  goto end;
+		case SHIFT_ASHIFTRT:
+		  info->special = "shll\t%e0\n\tsubx\t%w0,%w0\n\texts.w\t%T0\n\texts.l\t%S0";
+		  info->cc_special = CC_SET_ZNV;
+		  goto end;
+		}
+	    }
+	}
+      gcc_unreachable ();
+
+    default:
+      gcc_unreachable ();
+    }
+
+ end:
+  if (!TARGET_H8300S)
+    info->shift2 = NULL;
+}
+
+/* Given COUNT and MODE of a shift, return 1 if a scratch reg may be
+   needed for some shift with COUNT and MODE.  Return 0 otherwise.  */
+
+int
+h8300_shift_needs_scratch_p (int count, enum machine_mode mode)
+{
+  enum h8_cpu cpu;
+  int a, lr, ar;
+
+  if (GET_MODE_BITSIZE (mode) <= count)
+    return 1;
+
+  /* Find out the target CPU.  */
+  if (TARGET_H8300)
+    cpu = H8_300;
+  else if (TARGET_H8300H)
+    cpu = H8_300H;
+  else
+    cpu = H8_S;
+
+  /* Find the shift algorithm.  */
+  switch (mode)
+    {
+    case QImode:
+      a  = shift_alg_qi[cpu][SHIFT_ASHIFT][count];
+      lr = shift_alg_qi[cpu][SHIFT_LSHIFTRT][count];
+      ar = shift_alg_qi[cpu][SHIFT_ASHIFTRT][count];
+      break;
+
+    case HImode:
+      a  = shift_alg_hi[cpu][SHIFT_ASHIFT][count];
+      lr = shift_alg_hi[cpu][SHIFT_LSHIFTRT][count];
+      ar = shift_alg_hi[cpu][SHIFT_ASHIFTRT][count];
+      break;
+
+    case SImode:
+      a  = shift_alg_si[cpu][SHIFT_ASHIFT][count];
+      lr = shift_alg_si[cpu][SHIFT_LSHIFTRT][count];
+      ar = shift_alg_si[cpu][SHIFT_ASHIFTRT][count];
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* On H8/300H, count == 8 uses a scratch register.  */
+  return (a == SHIFT_LOOP || lr == SHIFT_LOOP || ar == SHIFT_LOOP
+	  || (TARGET_H8300H && mode == SImode && count == 8));
+}
+
+/* Output the assembler code for doing shifts.  */
+
+const char *
+output_a_shift (rtx *operands)
+{
+  static int loopend_lab;
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  enum shift_type shift_type;
+  enum shift_mode shift_mode;
+  struct shift_info info;
+  int n;
+
+  loopend_lab++;
+
+  switch (mode)
+    {
+    case QImode:
+      shift_mode = QIshift;
+      break;
+    case HImode:
+      shift_mode = HIshift;
+      break;
+    case SImode:
+      shift_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ASHIFTRT:
+      shift_type = SHIFT_ASHIFTRT;
+      break;
+    case LSHIFTRT:
+      shift_type = SHIFT_LSHIFTRT;
+      break;
+    case ASHIFT:
+      shift_type = SHIFT_ASHIFT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* This case must be taken care of by one of the two splitters
+     that convert a variable shift into a loop.  */
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+  
+  n = INTVAL (operands[2]);
+
+  /* If the count is negative, make it 0.  */
+  if (n < 0)
+    n = 0;
+  /* If the count is too big, truncate it.
+     ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+     do the intuitive thing.  */
+  else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
+    n = GET_MODE_BITSIZE (mode);
+
+  get_shift_alg (shift_type, shift_mode, n, &info);
+  
+  switch (info.alg)
+    {
+    case SHIFT_SPECIAL:
+      output_asm_insn (info.special, operands);
+      /* Fall through.  */
+
+    case SHIFT_INLINE:
+      n = info.remainder;
+
+      /* Emit two bit shifts first.  */
+      if (info.shift2 != NULL)
+	{
+	  for (; n > 1; n -= 2)
+	    output_asm_insn (info.shift2, operands);
+	}
+
+      /* Now emit one bit shifts for any residual.  */
+      for (; n > 0; n--)
+	output_asm_insn (info.shift1, operands);
+      return "";
+      
+    case SHIFT_ROT_AND:
+      {
+	int m = GET_MODE_BITSIZE (mode) - n;
+	const int mask = (shift_type == SHIFT_ASHIFT
+			  ? ((1 << m) - 1) << n
+			  : (1 << m) - 1);
+	char insn_buf[200];
+
+	/* Not all possibilities of rotate are supported.  They shouldn't
+	   be generated, but let's watch for 'em.  */
+	gcc_assert (info.shift1);
+	
+	/* Emit two bit rotates first.  */
+	if (info.shift2 != NULL)
+	  {
+	    for (; m > 1; m -= 2)
+	      output_asm_insn (info.shift2, operands);
+	  }
+	
+	/* Now single bit rotates for any residual.  */
+	for (; m > 0; m--)
+	  output_asm_insn (info.shift1, operands);
+	
+	/* Now mask off the high bits.  */
+	switch (mode)
+	  {
+	  case QImode:
+	    sprintf (insn_buf, "and\t#%d,%%X0", mask);
+	    break;
+
+	  case HImode:
+	    gcc_assert (TARGET_H8300H || TARGET_H8300S);
+	    sprintf (insn_buf, "and.w\t#%d,%%T0", mask);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	output_asm_insn (insn_buf, operands);
+	return "";
+      }
+
+    case SHIFT_LOOP:
+      /* A loop to shift by a "large" constant value.
+	 If we have shift-by-2 insns, use them.  */
+      if (info.shift2 != NULL)
+	{
+	  fprintf (asm_out_file, "\tmov.b	#%d,%sl\n", n / 2,
+		   names_big[REGNO (operands[4])]);
+	  fprintf (asm_out_file, ".Llt%d:\n", loopend_lab);
+	  output_asm_insn (info.shift2, operands);
+	  output_asm_insn ("add	#0xff,%X4", operands);
+	  fprintf (asm_out_file, "\tbne	.Llt%d\n", loopend_lab);
+	  if (n % 2)
+	    output_asm_insn (info.shift1, operands);
+	}
+      else
+	{
+	  fprintf (asm_out_file, "\tmov.b	#%d,%sl\n", n,
+		   names_big[REGNO (operands[4])]);
+	  fprintf (asm_out_file, ".Llt%d:\n", loopend_lab);
+	  output_asm_insn (info.shift1, operands);
+	  output_asm_insn ("add	#0xff,%X4", operands);
+	  fprintf (asm_out_file, "\tbne	.Llt%d\n", loopend_lab);
+	}
+      return "";
+      
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Count the number of assembly instructions in a string TEMPL.  */
+
+static unsigned int
+h8300_asm_insn_count (const char *templ)
+{
+  unsigned int count = 1;
+
+  for (; *templ; templ++)
+    if (*templ == '\n')
+      count++;
+
+  return count;
+}
+
+/* Compute the length of a shift insn.  */
+
+unsigned int
+compute_a_shift_length (rtx insn ATTRIBUTE_UNUSED, rtx *operands)
+{
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  enum shift_type shift_type;
+  enum shift_mode shift_mode;
+  struct shift_info info;
+  unsigned int wlength = 0;
+
+  switch (mode)
+    {
+    case QImode:
+      shift_mode = QIshift;
+      break;
+    case HImode:
+      shift_mode = HIshift;
+      break;
+    case SImode:
+      shift_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ASHIFTRT:
+      shift_type = SHIFT_ASHIFTRT;
+      break;
+    case LSHIFTRT:
+      shift_type = SHIFT_LSHIFTRT;
+      break;
+    case ASHIFT:
+      shift_type = SHIFT_ASHIFT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      /* Get the assembler code to do one shift.  */
+      get_shift_alg (shift_type, shift_mode, 1, &info);
+
+      return (4 + h8300_asm_insn_count (info.shift1)) * 2;
+    }
+  else
+    {
+      int n = INTVAL (operands[2]);
+
+      /* If the count is negative, make it 0.  */
+      if (n < 0)
+	n = 0;
+      /* If the count is too big, truncate it.
+         ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+	 do the intuitive thing.  */
+      else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
+	n = GET_MODE_BITSIZE (mode);
+
+      get_shift_alg (shift_type, shift_mode, n, &info);
+
+      switch (info.alg)
+	{
+	case SHIFT_SPECIAL:
+	  wlength += h8300_asm_insn_count (info.special);
+
+	  /* Every assembly instruction used in SHIFT_SPECIAL case
+	     takes 2 bytes except xor.l, which takes 4 bytes, so if we
+	     see xor.l, we just pretend that xor.l counts as two insns
+	     so that the insn length will be computed correctly.  */
+	  if (strstr (info.special, "xor.l") != NULL)
+	    wlength++;
+
+	  /* Fall through.  */
+
+	case SHIFT_INLINE:
+	  n = info.remainder;
+
+	  if (info.shift2 != NULL)
+	    {
+	      wlength += h8300_asm_insn_count (info.shift2) * (n / 2);
+	      n = n % 2;
+	    }
+
+	  wlength += h8300_asm_insn_count (info.shift1) * n;
+
+	  return 2 * wlength;
+
+	case SHIFT_ROT_AND:
+	  {
+	    int m = GET_MODE_BITSIZE (mode) - n;
+
+	    /* Not all possibilities of rotate are supported.  They shouldn't
+	       be generated, but let's watch for 'em.  */
+	    gcc_assert (info.shift1);
+
+	    if (info.shift2 != NULL)
+	      {
+		wlength += h8300_asm_insn_count (info.shift2) * (m / 2);
+		m = m % 2;
+	      }
+
+	    wlength += h8300_asm_insn_count (info.shift1) * m;
+
+	    /* Now mask off the high bits.  */
+	    switch (mode)
+	      {
+	      case QImode:
+		wlength += 1;
+		break;
+	      case HImode:
+		wlength += 2;
+		break;
+	      case SImode:
+		gcc_assert (!TARGET_H8300);
+		wlength += 3;
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	    return 2 * wlength;
+	  }
+
+	case SHIFT_LOOP:
+	  /* A loop to shift by a "large" constant value.
+	     If we have shift-by-2 insns, use them.  */
+	  if (info.shift2 != NULL)
+	    {
+	      wlength += 3 + h8300_asm_insn_count (info.shift2);
+	      if (n % 2)
+		wlength += h8300_asm_insn_count (info.shift1);
+	    }
+	  else
+	    {
+	      wlength += 3 + h8300_asm_insn_count (info.shift1);
+	    }
+	  return 2 * wlength;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}
+
+/* Compute which flag bits are valid after a shift insn.  */
+
+enum attr_cc
+compute_a_shift_cc (rtx insn ATTRIBUTE_UNUSED, rtx *operands)
+{
+  rtx shift = operands[3];
+  enum machine_mode mode = GET_MODE (shift);
+  enum rtx_code code = GET_CODE (shift);
+  enum shift_type shift_type;
+  enum shift_mode shift_mode;
+  struct shift_info info;
+  int n;
+  
+  switch (mode)
+    {
+    case QImode:
+      shift_mode = QIshift;
+      break;
+    case HImode:
+      shift_mode = HIshift;
+      break;
+    case SImode:
+      shift_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ASHIFTRT:
+      shift_type = SHIFT_ASHIFTRT;
+      break;
+    case LSHIFTRT:
+      shift_type = SHIFT_LSHIFTRT;
+      break;
+    case ASHIFT:
+      shift_type = SHIFT_ASHIFT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* This case must be taken care of by one of the two splitters
+     that convert a variable shift into a loop.  */
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+  
+  n = INTVAL (operands[2]);
+
+  /* If the count is negative, make it 0.  */
+  if (n < 0)
+    n = 0;
+  /* If the count is too big, truncate it.
+     ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
+     do the intuitive thing.  */
+  else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
+    n = GET_MODE_BITSIZE (mode);
+  
+  get_shift_alg (shift_type, shift_mode, n, &info);
+  
+  switch (info.alg)
+    {
+    case SHIFT_SPECIAL:
+      if (info.remainder == 0)
+	return info.cc_special;
+
+      /* Fall through.  */
+
+    case SHIFT_INLINE:
+      return info.cc_inline;
+      
+    case SHIFT_ROT_AND:
+      /* This case always ends with an and instruction.  */
+      return CC_SET_ZNV;
+      
+    case SHIFT_LOOP:
+      /* A loop to shift by a "large" constant value.
+	 If we have shift-by-2 insns, use them.  */
+      if (info.shift2 != NULL)
+	{
+	  if (n % 2)
+	    return info.cc_inline;
+	}
+      return CC_CLOBBER;
+      
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A rotation by a non-constant will cause a loop to be generated, in
+   which a rotation by one bit is used.  A rotation by a constant,
+   including the one in the loop, will be taken care of by
+   output_a_rotate () at the insn emit time.  */
+
+int
+expand_a_rotate (rtx operands[])
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx rotate_amount = operands[2];
+  enum machine_mode mode = GET_MODE (dst);
+
+  if (h8sx_classify_shift (mode, ROTATE, rotate_amount) == H8SX_SHIFT_UNARY)
+    return false;
+
+  /* We rotate in place.  */
+  emit_move_insn (dst, src);
+
+  if (GET_CODE (rotate_amount) != CONST_INT)
+    {
+      rtx counter = gen_reg_rtx (QImode);
+      rtx start_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+
+      /* If the rotate amount is less than or equal to 0,
+	 we go out of the loop.  */
+      emit_cmp_and_jump_insns (rotate_amount, const0_rtx, LE, NULL_RTX,
+			       QImode, 0, end_label);
+
+      /* Initialize the loop counter.  */
+      emit_move_insn (counter, rotate_amount);
+
+      emit_label (start_label);
+
+      /* Rotate by one bit.  */
+      switch (mode)
+	{
+	case QImode:
+	  emit_insn (gen_rotlqi3_1 (dst, dst, const1_rtx));
+	  break;
+	case HImode:
+	  emit_insn (gen_rotlhi3_1 (dst, dst, const1_rtx));
+	  break;
+	case SImode:
+	  emit_insn (gen_rotlsi3_1 (dst, dst, const1_rtx));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Decrement the counter by 1.  */
+      emit_insn (gen_addqi3 (counter, counter, constm1_rtx));
+
+      /* If the loop counter is nonzero, we go back to the beginning
+	 of the loop.  */
+      emit_cmp_and_jump_insns (counter, const0_rtx, NE, NULL_RTX, QImode, 1,
+			       start_label);
+
+      emit_label (end_label);
+    }
+  else
+    {
+      /* Rotate by AMOUNT bits.  */
+      switch (mode)
+	{
+	case QImode:
+	  emit_insn (gen_rotlqi3_1 (dst, dst, rotate_amount));
+	  break;
+	case HImode:
+	  emit_insn (gen_rotlhi3_1 (dst, dst, rotate_amount));
+	  break;
+	case SImode:
+	  emit_insn (gen_rotlsi3_1 (dst, dst, rotate_amount));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  return 1;
+}
+
+/* Output a rotate insn.  */
+
+const char *
+output_a_rotate (enum rtx_code code, rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx rotate_amount = operands[2];
+  enum shift_mode rotate_mode;
+  enum shift_type rotate_type;
+  const char *insn_buf;
+  int bits;
+  int amount;
+  enum machine_mode mode = GET_MODE (dst);
+
+  gcc_assert (GET_CODE (rotate_amount) == CONST_INT);
+
+  switch (mode)
+    {
+    case QImode:
+      rotate_mode = QIshift;
+      break;
+    case HImode:
+      rotate_mode = HIshift;
+      break;
+    case SImode:
+      rotate_mode = SIshift;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case ROTATERT:
+      rotate_type = SHIFT_ASHIFT;
+      break;
+    case ROTATE:
+      rotate_type = SHIFT_LSHIFTRT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  amount = INTVAL (rotate_amount);
+
+  /* Clean up AMOUNT.  */
+  if (amount < 0)
+    amount = 0;
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode))
+    amount = GET_MODE_BITSIZE (mode);
+
+  /* Determine the faster direction.  After this phase, amount will be
+     at most a half of GET_MODE_BITSIZE (mode).  */
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode) / (unsigned) 2)
+    {
+      /* Flip the direction.  */
+      amount = GET_MODE_BITSIZE (mode) - amount;
+      rotate_type =
+	(rotate_type == SHIFT_ASHIFT) ? SHIFT_LSHIFTRT : SHIFT_ASHIFT;
+    }
+
+  /* See if a byte swap (in HImode) or a word swap (in SImode) can
+     boost up the rotation.  */
+  if ((mode == HImode && TARGET_H8300 && amount >= 5)
+      || (mode == HImode && TARGET_H8300H && amount >= 6)
+      || (mode == HImode && TARGET_H8300S && amount == 8)
+      || (mode == SImode && TARGET_H8300H && amount >= 10)
+      || (mode == SImode && TARGET_H8300S && amount >= 13))
+    {
+      switch (mode)
+	{
+	case HImode:
+	  /* This code works on any family.  */
+	  insn_buf = "xor.b\t%s0,%t0\n\txor.b\t%t0,%s0\n\txor.b\t%s0,%t0";
+	  output_asm_insn (insn_buf, operands);
+	  break;
+
+	case SImode:
+	  /* This code works on the H8/300H and H8S.  */
+	  insn_buf = "xor.w\t%e0,%f0\n\txor.w\t%f0,%e0\n\txor.w\t%e0,%f0";
+	  output_asm_insn (insn_buf, operands);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Adjust AMOUNT and flip the direction.  */
+      amount = GET_MODE_BITSIZE (mode) / 2 - amount;
+      rotate_type =
+	(rotate_type == SHIFT_ASHIFT) ? SHIFT_LSHIFTRT : SHIFT_ASHIFT;
+    }
+
+  /* Output rotate insns.  */
+  for (bits = TARGET_H8300S ? 2 : 1; bits > 0; bits /= 2)
+    {
+      if (bits == 2)
+	insn_buf = rotate_two[rotate_type][rotate_mode];
+      else
+	insn_buf = rotate_one[cpu_type][rotate_type][rotate_mode];
+
+      for (; amount >= bits; amount -= bits)
+	output_asm_insn (insn_buf, operands);
+    }
+
+  return "";
+}
+
+/* Compute the length of a rotate insn.  */
+
+unsigned int
+compute_a_rotate_length (rtx *operands)
+{
+  rtx src = operands[1];
+  rtx amount_rtx = operands[2];
+  enum machine_mode mode = GET_MODE (src);
+  int amount;
+  unsigned int length = 0;
+
+  gcc_assert (GET_CODE (amount_rtx) == CONST_INT);
+
+  amount = INTVAL (amount_rtx);
+
+  /* Clean up AMOUNT.  */
+  if (amount < 0)
+    amount = 0;
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode))
+    amount = GET_MODE_BITSIZE (mode);
+
+  /* Determine the faster direction.  After this phase, amount
+     will be at most a half of GET_MODE_BITSIZE (mode).  */
+  if ((unsigned int) amount > GET_MODE_BITSIZE (mode) / (unsigned) 2)
+    /* Flip the direction.  */
+    amount = GET_MODE_BITSIZE (mode) - amount;
+
+  /* See if a byte swap (in HImode) or a word swap (in SImode) can
+     boost up the rotation.  */
+  if ((mode == HImode && TARGET_H8300 && amount >= 5)
+      || (mode == HImode && TARGET_H8300H && amount >= 6)
+      || (mode == HImode && TARGET_H8300S && amount == 8)
+      || (mode == SImode && TARGET_H8300H && amount >= 10)
+      || (mode == SImode && TARGET_H8300S && amount >= 13))
+    {
+      /* Adjust AMOUNT and flip the direction.  */
+      amount = GET_MODE_BITSIZE (mode) / 2 - amount;
+      length += 6;
+    }
+
+  /* We use 2-bit rotations on the H8S.  */
+  if (TARGET_H8300S)
+    amount = amount / 2 + amount % 2;
+
+  /* The H8/300 uses three insns to rotate one bit, taking 6
+     length.  */
+  length += amount * ((TARGET_H8300 && mode == HImode) ? 6 : 2);
+
+  return length;
+}
+
+/* Fix the operands of a gen_xxx so that it could become a bit
+   operating insn.  */
+
+int
+fix_bit_operand (rtx *operands, enum rtx_code code)
+{
+  /* The bit_operand predicate accepts any memory during RTL generation, but
+     only 'U' memory afterwards, so if this is a MEM operand, we must force
+     it to be valid for 'U' by reloading the address.  */
+
+  if (code == AND
+      ? single_zero_operand (operands[2], QImode)
+      : single_one_operand (operands[2], QImode))
+    {
+      /* OK to have a memory dest.  */
+      if (GET_CODE (operands[0]) == MEM
+	  && !OK_FOR_U (operands[0]))
+	{
+	  rtx mem = gen_rtx_MEM (GET_MODE (operands[0]),
+				 copy_to_mode_reg (Pmode,
+						   XEXP (operands[0], 0)));
+	  MEM_COPY_ATTRIBUTES (mem, operands[0]);
+	  operands[0] = mem;
+	}
+
+      if (GET_CODE (operands[1]) == MEM
+	  && !OK_FOR_U (operands[1]))
+	{
+	  rtx mem = gen_rtx_MEM (GET_MODE (operands[1]),
+				 copy_to_mode_reg (Pmode,
+						   XEXP (operands[1], 0)));
+	  MEM_COPY_ATTRIBUTES (mem, operands[0]);
+	  operands[1] = mem;
+	}
+      return 0;
+    }
+
+  /* Dest and src op must be register.  */
+
+  operands[1] = force_reg (QImode, operands[1]);
+  {
+    rtx res = gen_reg_rtx (QImode);
+    switch (code)
+      {
+      case AND:
+	emit_insn (gen_andqi3_1 (res, operands[1], operands[2]));
+	break;
+      case IOR:
+	emit_insn (gen_iorqi3_1 (res, operands[1], operands[2]));
+	break;
+      case XOR:
+	emit_insn (gen_xorqi3_1 (res, operands[1], operands[2]));
+	break;
+      default:
+	gcc_unreachable ();
+      }
+    emit_insn (gen_movqi (operands[0], res));
+  }
+  return 1;
+}
+
+/* Return nonzero if FUNC is an interrupt function as specified
+   by the "interrupt" attribute.  */
+
+static int
+h8300_interrupt_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a saveall function as specified by the
+   "saveall" attribute.  */
+
+static int
+h8300_saveall_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("saveall", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is an OS_Task function as specified
+   by the "OS_Task" attribute.  */
+
+static int
+h8300_os_task_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("OS_Task", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a monitor function as specified
+   by the "monitor" attribute.  */
+
+static int
+h8300_monitor_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("monitor", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a function that should be called
+   through the function vector.  */
+
+int
+h8300_funcvec_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("function_vector", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if DECL is a variable that's in the eight bit
+   data area.  */
+
+int
+h8300_eightbit_data_p (tree decl)
+{
+  tree a;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return 0;
+
+  a = lookup_attribute ("eightbit_data", DECL_ATTRIBUTES (decl));
+  return a != NULL_TREE;
+}
+
+/* Return nonzero if DECL is a variable that's in the tiny
+   data area.  */
+
+int
+h8300_tiny_data_p (tree decl)
+{
+  tree a;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return 0;
+
+  a = lookup_attribute ("tiny_data", DECL_ATTRIBUTES (decl));
+  return a != NULL_TREE;
+}
+
+/* Generate an 'interrupt_handler' attribute for decls.  We convert
+   all the pragmas to corresponding attributes.  */
+
+static void
+h8300_insert_attributes (tree node, tree *attributes)
+{
+  if (TREE_CODE (node) == FUNCTION_DECL)
+    {
+      if (pragma_interrupt)
+	{
+	  pragma_interrupt = 0;
+
+	  /* Add an 'interrupt_handler' attribute.  */
+	  *attributes = tree_cons (get_identifier ("interrupt_handler"),
+				   NULL, *attributes);
+	}
+
+      if (pragma_saveall)
+	{
+	  pragma_saveall = 0;
+
+	  /* Add an 'saveall' attribute.  */
+	  *attributes = tree_cons (get_identifier ("saveall"),
+				   NULL, *attributes);
+	}
+    }
+}
+
+/* Supported attributes:
+
+   interrupt_handler: output a prologue and epilogue suitable for an
+   interrupt handler.
+
+   saveall: output a prologue and epilogue that saves and restores
+   all registers except the stack pointer.
+
+   function_vector: This function should be called through the
+   function vector.
+
+   eightbit_data: This variable lives in the 8-bit data area and can
+   be referenced with 8-bit absolute memory addresses.
+
+   tiny_data: This variable lives in the tiny data area and can be
+   referenced with 16-bit absolute memory references.  */
+
+static const struct attribute_spec h8300_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt_handler", 0, 0, true,  false, false, h8300_handle_fndecl_attribute },
+  { "saveall",           0, 0, true,  false, false, h8300_handle_fndecl_attribute },
+  { "OS_Task",           0, 0, true,  false, false, h8300_handle_fndecl_attribute },
+  { "monitor",           0, 0, true,  false, false, h8300_handle_fndecl_attribute },
+  { "function_vector",   0, 0, true,  false, false, h8300_handle_fndecl_attribute },
+  { "eightbit_data",     0, 0, true,  false, false, h8300_handle_eightbit_data_attribute },
+  { "tiny_data",         0, 0, true,  false, false, h8300_handle_tiny_data_attribute },
+  { NULL,                0, 0, false, false, false, NULL }
+};
+
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+h8300_handle_fndecl_attribute (tree *node, tree name,
+			       tree args ATTRIBUTE_UNUSED,
+			       int flags ATTRIBUTE_UNUSED,
+			       bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "eightbit_data" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+h8300_handle_eightbit_data_attribute (tree *node, tree name,
+				      tree args ATTRIBUTE_UNUSED,
+				      int flags ATTRIBUTE_UNUSED,
+				      bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+    {
+      DECL_SECTION_NAME (decl) = build_string (7, ".eight");
+    }
+  else
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "tiny_data" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+h8300_handle_tiny_data_attribute (tree *node, tree name,
+				  tree args ATTRIBUTE_UNUSED,
+				  int flags ATTRIBUTE_UNUSED,
+				  bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+    {
+      DECL_SECTION_NAME (decl) = build_string (6, ".tiny");
+    }
+  else
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Mark function vectors, and various small data objects.  */
+
+static void
+h8300_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int extra_flags = 0;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && h8300_funcvec_function_p (decl))
+    extra_flags = SYMBOL_FLAG_FUNCVEC_FUNCTION;
+  else if (TREE_CODE (decl) == VAR_DECL
+	   && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    {
+      if (h8300_eightbit_data_p (decl))
+	extra_flags = SYMBOL_FLAG_EIGHTBIT_DATA;
+      else if (first && h8300_tiny_data_p (decl))
+	extra_flags = SYMBOL_FLAG_TINY_DATA;
+    }
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= extra_flags;
+}
+
+/* Output a single-bit extraction.  */
+
+const char *
+output_simode_bld (int bild, rtx operands[])
+{
+  if (TARGET_H8300)
+    {
+      /* Clear the destination register.  */
+      output_asm_insn ("sub.w\t%e0,%e0\n\tsub.w\t%f0,%f0", operands);
+
+      /* Now output the bit load or bit inverse load, and store it in
+	 the destination.  */
+      if (bild)
+	output_asm_insn ("bild\t%Z2,%Y1", operands);
+      else
+	output_asm_insn ("bld\t%Z2,%Y1", operands);
+
+      output_asm_insn ("bst\t#0,%w0", operands);
+    }
+  else
+    {
+      /* Determine if we can clear the destination first.  */
+      int clear_first = (REG_P (operands[0]) && REG_P (operands[1])
+			 && REGNO (operands[0]) != REGNO (operands[1]));
+
+      if (clear_first)
+	output_asm_insn ("sub.l\t%S0,%S0", operands);
+
+      /* Output the bit load or bit inverse load.  */
+      if (bild)
+	output_asm_insn ("bild\t%Z2,%Y1", operands);
+      else
+	output_asm_insn ("bld\t%Z2,%Y1", operands);
+
+      if (!clear_first)
+	output_asm_insn ("xor.l\t%S0,%S0", operands);
+
+      /* Perform the bit store.  */
+      output_asm_insn ("rotxl.l\t%S0", operands);
+    }
+
+  /* All done.  */
+  return "";
+}
+
+/* Delayed-branch scheduling is more effective if we have some idea
+   how long each instruction will be.  Use a shorten_branches pass
+   to get an initial estimate.  */
+
+static void
+h8300_reorg (void)
+{
+  if (flag_delayed_branch)
+    shorten_branches (get_insns ());
+}
+
+#ifndef OBJECT_FORMAT_ELF
+static void
+h8300_asm_named_section (const char *name, unsigned int flags ATTRIBUTE_UNUSED,
+			 tree decl)
+{
+  /* ??? Perhaps we should be using default_coff_asm_named_section.  */
+  fprintf (asm_out_file, "\t.section %s\n", name);
+}
+#endif /* ! OBJECT_FORMAT_ELF */
+
+/* Nonzero if X is a constant address suitable as an 8-bit absolute,
+   which is a special case of the 'R' operand.  */
+
+int
+h8300_eightbit_constant_address_p (rtx x)
+{
+  /* The ranges of the 8-bit area.  */
+  const unsigned HOST_WIDE_INT n1 = trunc_int_for_mode (0xff00, HImode);
+  const unsigned HOST_WIDE_INT n2 = trunc_int_for_mode (0xffff, HImode);
+  const unsigned HOST_WIDE_INT h1 = trunc_int_for_mode (0x00ffff00, SImode);
+  const unsigned HOST_WIDE_INT h2 = trunc_int_for_mode (0x00ffffff, SImode);
+  const unsigned HOST_WIDE_INT s1 = trunc_int_for_mode (0xffffff00, SImode);
+  const unsigned HOST_WIDE_INT s2 = trunc_int_for_mode (0xffffffff, SImode);
+
+  unsigned HOST_WIDE_INT addr;
+
+  /* We accept symbols declared with eightbit_data.  */
+  if (GET_CODE (x) == SYMBOL_REF)
+    return (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_EIGHTBIT_DATA) != 0;
+
+  if (GET_CODE (x) != CONST_INT)
+    return 0;
+
+  addr = INTVAL (x);
+
+  return (0
+	  || ((TARGET_H8300 || TARGET_NORMAL_MODE) && IN_RANGE (addr, n1, n2))
+	  || (TARGET_H8300H && IN_RANGE (addr, h1, h2))
+	  || (TARGET_H8300S && IN_RANGE (addr, s1, s2)));
+}
+
+/* Nonzero if X is a constant address suitable as an 16-bit absolute
+   on H8/300H and H8S.  */
+
+int
+h8300_tiny_constant_address_p (rtx x)
+{
+  /* The ranges of the 16-bit area.  */
+  const unsigned HOST_WIDE_INT h1 = trunc_int_for_mode (0x00000000, SImode);
+  const unsigned HOST_WIDE_INT h2 = trunc_int_for_mode (0x00007fff, SImode);
+  const unsigned HOST_WIDE_INT h3 = trunc_int_for_mode (0x00ff8000, SImode);
+  const unsigned HOST_WIDE_INT h4 = trunc_int_for_mode (0x00ffffff, SImode);
+  const unsigned HOST_WIDE_INT s1 = trunc_int_for_mode (0x00000000, SImode);
+  const unsigned HOST_WIDE_INT s2 = trunc_int_for_mode (0x00007fff, SImode);
+  const unsigned HOST_WIDE_INT s3 = trunc_int_for_mode (0xffff8000, SImode);
+  const unsigned HOST_WIDE_INT s4 = trunc_int_for_mode (0xffffffff, SImode);
+
+  unsigned HOST_WIDE_INT addr;
+
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      /* In the normal mode, any symbol fits in the 16-bit absolute
+	 address range.  We also accept symbols declared with
+	 tiny_data.  */
+      return (TARGET_NORMAL_MODE
+	      || (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_TINY_DATA) != 0);
+
+    case CONST_INT:
+      addr = INTVAL (x);
+      return (TARGET_NORMAL_MODE
+	      || (TARGET_H8300H
+		  && (IN_RANGE (addr, h1, h2) || IN_RANGE (addr, h3, h4)))
+	      || (TARGET_H8300S
+		  && (IN_RANGE (addr, s1, s2) || IN_RANGE (addr, s3, s4))));
+
+    case CONST:
+      return TARGET_NORMAL_MODE;
+
+    default:
+      return 0;
+    }
+
+}
+
+/* Return nonzero if ADDR1 and ADDR2 point to consecutive memory
+   locations that can be accessed as a 16-bit word.  */
+
+int
+byte_accesses_mergeable_p (rtx addr1, rtx addr2)
+{
+  HOST_WIDE_INT offset1, offset2;
+  rtx reg1, reg2;
+
+  if (REG_P (addr1))
+    {
+      reg1 = addr1;
+      offset1 = 0;
+    }
+  else if (GET_CODE (addr1) == PLUS
+	   && REG_P (XEXP (addr1, 0))
+	   && GET_CODE (XEXP (addr1, 1)) == CONST_INT)
+    {
+      reg1 = XEXP (addr1, 0);
+      offset1 = INTVAL (XEXP (addr1, 1));
+    }
+  else
+    return 0;
+
+  if (REG_P (addr2))
+    {
+      reg2 = addr2;
+      offset2 = 0;
+    }
+  else if (GET_CODE (addr2) == PLUS
+	   && REG_P (XEXP (addr2, 0))
+	   && GET_CODE (XEXP (addr2, 1)) == CONST_INT)
+    {
+      reg2 = XEXP (addr2, 0);
+      offset2 = INTVAL (XEXP (addr2, 1));
+    }
+  else
+    return 0;
+
+  if (((reg1 == stack_pointer_rtx && reg2 == stack_pointer_rtx)
+       || (reg1 == frame_pointer_rtx && reg2 == frame_pointer_rtx))
+      && offset1 % 2 == 0
+      && offset1 + 1 == offset2)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if we have the same comparison insn as I3 two insns
+   before I3.  I3 is assumed to be a comparison insn.  */
+
+int
+same_cmp_preceding_p (rtx i3)
+{
+  rtx i1, i2;
+
+  /* Make sure we have a sequence of three insns.  */
+  i2 = prev_nonnote_insn (i3);
+  if (i2 == NULL_RTX)
+    return 0;
+  i1 = prev_nonnote_insn (i2);
+  if (i1 == NULL_RTX)
+    return 0;
+
+  return (INSN_P (i1) && rtx_equal_p (PATTERN (i1), PATTERN (i3))
+	  && any_condjump_p (i2) && onlyjump_p (i2));
+}
+
+/* Return nonzero if we have the same comparison insn as I1 two insns
+   after I1.  I1 is assumed to be a comparison insn.  */
+
+int
+same_cmp_following_p (rtx i1)
+{
+  rtx i2, i3;
+
+  /* Make sure we have a sequence of three insns.  */
+  i2 = next_nonnote_insn (i1);
+  if (i2 == NULL_RTX)
+    return 0;
+  i3 = next_nonnote_insn (i2);
+  if (i3 == NULL_RTX)
+    return 0;
+
+  return (INSN_P (i3) && rtx_equal_p (PATTERN (i1), PATTERN (i3))
+	  && any_condjump_p (i2) && onlyjump_p (i2));
+}
+
+/* Return nonzero if OPERANDS are valid for stm (or ldm) that pushes
+   (or pops) N registers.  OPERANDS are assumed to be an array of
+   registers.  */
+
+int
+h8300_regs_ok_for_stm (int n, rtx operands[])
+{
+  switch (n)
+    {
+    case 2:
+      return ((REGNO (operands[0]) == 0 && REGNO (operands[1]) == 1)
+	      || (REGNO (operands[0]) == 2 && REGNO (operands[1]) == 3)
+	      || (REGNO (operands[0]) == 4 && REGNO (operands[1]) == 5));
+    case 3:
+      return ((REGNO (operands[0]) == 0
+	       && REGNO (operands[1]) == 1
+	       && REGNO (operands[2]) == 2)
+	      || (REGNO (operands[0]) == 4
+		  && REGNO (operands[1]) == 5
+		  && REGNO (operands[2]) == 6));
+
+    case 4:
+      return (REGNO (operands[0]) == 0
+	      && REGNO (operands[1]) == 1
+	      && REGNO (operands[2]) == 2
+	      && REGNO (operands[3]) == 3);
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+int
+h8300_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			    unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if (h8300_current_function_interrupt_function_p ()
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Returns true if register REGNO is safe to be allocated as a scratch
+   register in the current function.  */
+
+static bool
+h8300_hard_regno_scratch_ok (unsigned int regno)
+{
+  if (h8300_current_function_interrupt_function_p ()
+      && ! WORD_REG_USED (regno))
+    return false;
+
+  return true;
+}
+
+
+/* Return nonzero if X is a legitimate constant.  */
+
+int
+h8300_legitimate_constant_p (rtx x ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Return nonzero if X is a REG or SUBREG suitable as a base register.  */
+
+static int
+h8300_rtx_ok_for_base_p (rtx x, int strict)
+{
+  /* Strip off SUBREG if any.  */
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && (strict
+	      ? REG_OK_FOR_BASE_STRICT_P (x)
+	      : REG_OK_FOR_BASE_NONSTRICT_P (x)));
+}
+
+/* Return nozero if X is a legitimate address.  On the H8/300, a
+   legitimate address has the form REG, REG+CONSTANT_ADDRESS or
+   CONSTANT_ADDRESS.  */
+
+static bool
+h8300_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  /* The register indirect addresses like @er0 is always valid.  */
+  if (h8300_rtx_ok_for_base_p (x, strict))
+    return 1;
+
+  if (CONSTANT_ADDRESS_P (x))
+    return 1;
+
+  if (TARGET_H8300SX
+      && (   GET_CODE (x) == PRE_INC
+	  || GET_CODE (x) == PRE_DEC
+	  || GET_CODE (x) == POST_INC
+	  || GET_CODE (x) == POST_DEC)
+      && h8300_rtx_ok_for_base_p (XEXP (x, 0), strict))
+    return 1;
+
+  if (GET_CODE (x) == PLUS
+      && CONSTANT_ADDRESS_P (XEXP (x, 1))
+      && h8300_rtx_ok_for_base_p (h8300_get_index (XEXP (x, 0),
+						   mode, 0), strict))
+    return 1;
+
+  return 0;
+}
+
+/* Worker function for HARD_REGNO_NREGS.
+
+   We pretend the MAC register is 32bits -- we don't have any data
+   types on the H8 series to handle more than 32bits.  */
+
+int
+h8300_hard_regno_nregs (int regno ATTRIBUTE_UNUSED, enum machine_mode mode)
+{
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Worker function for HARD_REGNO_MODE_OK.  */
+
+int
+h8300_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  if (TARGET_H8300)
+    /* If an even reg, then anything goes.  Otherwise the mode must be
+       QI or HI.  */
+    return ((regno & 1) == 0) || (mode == HImode) || (mode == QImode);
+  else
+    /* MAC register can only be of SImode.  Otherwise, anything
+       goes.  */
+    return regno == MAC_REG ? mode == SImode : 1;
+}
+
+/* Perform target dependent optabs initialization.  */
+static void
+h8300_init_libfuncs (void)
+{
+  set_optab_libfunc (smul_optab, HImode, "__mulhi3");
+  set_optab_libfunc (sdiv_optab, HImode, "__divhi3");
+  set_optab_libfunc (udiv_optab, HImode, "__udivhi3");
+  set_optab_libfunc (smod_optab, HImode, "__modhi3");
+  set_optab_libfunc (umod_optab, HImode, "__umodhi3");
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+h8300_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return (TYPE_MODE (type) == BLKmode
+	  || GET_MODE_SIZE (TYPE_MODE (type)) > (TARGET_H8300 ? 4 : 8));
+}
+
+/* We emit the entire trampoline here.  Depending on the pointer size,
+   we use a different trampoline.
+
+   Pmode == HImode
+	      vvvv context
+   1 0000 7903xxxx		mov.w	#0x1234,r3
+   2 0004 5A00xxxx		jmp	@0x1234
+	      ^^^^ function
+
+   Pmode == SImode
+	      vvvvvvvv context
+   2 0000 7A03xxxxxxxx		mov.l	#0x12345678,er3
+   3 0006 5Axxxxxx		jmp	@0x123456
+	    ^^^^^^ function
+*/
+
+static void
+h8300_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  if (Pmode == HImode)
+    {
+      mem = adjust_address (m_tramp, HImode, 0);
+      emit_move_insn (mem, GEN_INT (0x7903));
+      mem = adjust_address (m_tramp, Pmode, 2);
+      emit_move_insn (mem, cxt);
+      mem = adjust_address (m_tramp, HImode, 4);
+      emit_move_insn (mem, GEN_INT (0x5a00));
+      mem = adjust_address (m_tramp, Pmode, 6);
+      emit_move_insn (mem, fnaddr);
+    }
+  else
+    {
+      rtx tem;
+
+      mem = adjust_address (m_tramp, HImode, 0);
+      emit_move_insn (mem, GEN_INT (0x7a03));
+      mem = adjust_address (m_tramp, Pmode, 2);
+      emit_move_insn (mem, cxt);
+
+      tem = copy_to_reg (fnaddr);
+      emit_insn (gen_andsi3 (tem, tem, GEN_INT (0x00ffffff)));
+      emit_insn (gen_iorsi3 (tem, tem, GEN_INT (0x5a000000)));
+      mem = adjust_address (m_tramp, SImode, 6);
+      emit_move_insn (mem, tem);
+    }
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE h8300_attribute_table
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START h8300_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END h8300_file_end
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO h8300_encode_section_info
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES h8300_insert_attributes
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS h8300_rtx_costs
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS h8300_init_libfuncs
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY h8300_return_in_memory
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG h8300_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE h8300_function_arg_advance
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG h8300_reorg
+
+#undef TARGET_HARD_REGNO_SCRATCH_OK
+#define TARGET_HARD_REGNO_SCRATCH_OK h8300_hard_regno_scratch_ok
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	h8300_legitimate_address_p
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE h8300_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE h8300_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT h8300_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE h8300_option_override
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE h8300_option_optimization_table
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
new file mode 100644
index 000000000..589b70f89
--- /dev/null
+++ b/gcc/config/h8300/h8300.h
@@ -0,0 +1,1071 @@
+/* Definitions of target machine for GNU compiler.
+   Renesas H8/300 (generic)
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com),
+   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_H8300_H
+#define GCC_H8300_H
+
+/* Which CPU to compile for.
+   We use int for CPU_TYPE to avoid lots of casts.  */
+#if 0 /* defined in insn-attr.h, here for documentation */
+enum attr_cpu { CPU_H8300, CPU_H8300H };
+#endif
+extern int cpu_type;
+
+/* Various globals defined in h8300.c.  */
+
+extern const char *h8_push_op, *h8_pop_op, *h8_mov_op;
+extern const char * const *h8_reg_names;
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+      if (TARGET_H8300H)				\
+	{						\
+	  builtin_define ("__H8300H__");		\
+	  builtin_assert ("cpu=h8300h");		\
+	  builtin_assert ("machine=h8300h");		\
+	  if (TARGET_NORMAL_MODE)			\
+	    {						\
+	      builtin_define ("__NORMAL_MODE__");	\
+	    }						\
+	}						\
+      else if (TARGET_H8300SX)				\
+	{						\
+	  builtin_define ("__H8300SX__");		\
+	  if (TARGET_NORMAL_MODE)			\
+	    {						\
+	      builtin_define ("__NORMAL_MODE__");	\
+	    }						\
+	}						\
+      else if (TARGET_H8300S)				\
+	{						\
+	  builtin_define ("__H8300S__");		\
+	  builtin_assert ("cpu=h8300s");		\
+	  builtin_assert ("machine=h8300s");		\
+	  if (TARGET_NORMAL_MODE)			\
+	    {						\
+	      builtin_define ("__NORMAL_MODE__");	\
+	    }						\
+	}						\
+      else						\
+	{						\
+	  builtin_define ("__H8300__");			\
+	  builtin_assert ("cpu=h8300");			\
+	  builtin_assert ("machine=h8300");		\
+	}						\
+    }							\
+  while (0)
+
+#define LINK_SPEC "%{mh:%{mn:-m h8300hn}} %{mh:%{!mn:-m h8300h}} %{ms:%{mn:-m h8300sn}} %{ms:%{!mn:-m h8300s}}"
+
+#define LIB_SPEC "%{mrelax:-relax} %{g:-lg} %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}"
+
+/* Print subsidiary information on the compiler version in use.  */
+
+#define TARGET_VERSION fprintf (stderr, " (Renesas H8/300)");
+
+/* Macros used in the machine description to test the flags.  */
+
+/* Select between the H8/300 and H8/300H CPUs.  */
+#define TARGET_H8300	(! TARGET_H8300H && ! TARGET_H8300S)
+#define TARGET_H8300S	(TARGET_H8300S_1 || TARGET_H8300SX)
+/* Some multiply instructions are not available in all H8SX variants.
+   Use this macro instead of TARGET_H8300SX to indicate this, even
+   though we don't actually generate different code for now.  */
+#define TARGET_H8300SXMUL TARGET_H8300SX
+
+#ifdef IN_LIBGCC2
+#undef TARGET_H8300H
+#undef TARGET_H8300S
+#undef TARGET_NORMAL_MODE
+/* If compiling libgcc2, make these compile time constants based on what
+   flags are we actually compiling with.  */
+#ifdef __H8300H__
+#define TARGET_H8300H	1
+#else
+#define TARGET_H8300H	0
+#endif
+#ifdef __H8300S__
+#define TARGET_H8300S	1
+#else
+#define TARGET_H8300S	0
+#endif
+#ifdef __NORMAL_MODE__
+#define TARGET_NORMAL_MODE 1
+#else
+#define TARGET_NORMAL_MODE 0
+#endif
+#endif /* !IN_LIBGCC2 */
+
+/* Default target_flags if no switches specified.  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_QUICKCALL)
+#endif
+
+/* We want dwarf2 info available to gdb.  */
+#define DWARF2_DEBUGGING_INFO        1
+
+/* The return address is pushed on the stack.  */
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_MEM (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM))
+#define INCOMING_FRAME_SP_OFFSET   (POINTER_SIZE / 8)
+
+#define DWARF_CIE_DATA_ALIGNMENT	2
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.
+
+   Calls through a register are cheaper than calls to named
+   functions; however, the register pressure this causes makes
+   CSEing of function addresses generally a lose.  */
+#define NO_FUNCTION_CSE
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the H8/300.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on the H8/300.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD	32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		(TARGET_H8300H || TARGET_H8300S ? 4 : 2)
+#define MIN_UNITS_PER_WORD	2
+
+#define SHORT_TYPE_SIZE	16
+#define INT_TYPE_SIZE		(TARGET_INT32 ? 32 : 16)
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE	32
+#define DOUBLE_TYPE_SIZE	32
+#define LONG_DOUBLE_TYPE_SIZE	DOUBLE_TYPE_SIZE
+
+#define MAX_FIXED_MODE_SIZE	32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_H8300H || TARGET_H8300S ? 32 : 16)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Alignment of field after `int : 0' in a structure.  */
+/* One can argue this should be 32 for -mint32, but since 32-bit ints only
+   need 16-bit alignment, this is left as is so that -mint32 doesn't change
+   structure layouts.  */
+#define EMPTY_FIELD_BOUNDARY 16
+
+/* No data type wants to be aligned rounder than this.
+   32-bit values are aligned as such on the H8/300H and H8S for speed.  */
+#define BIGGEST_ALIGNMENT \
+(((TARGET_H8300H || TARGET_H8300S) && ! TARGET_ALIGN_300) ? 32 : 16)
+
+/* The stack goes in 16/32 bit lumps.  */
+#define STACK_BOUNDARY (TARGET_H8300 ? 16 : 32)
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+/* On the H8/300, longs can be aligned on halfword boundaries, but not
+   byte boundaries.  */
+#define STRICT_ALIGNMENT 1
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   Reg 9 does not correspond to any hardware register, but instead
+   appears in the RTL as an argument pointer prior to reload, and is
+   eliminated during reloading in favor of either the stack or frame
+   pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 12
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS				\
+/* r0 r1 r2 r3 r4 r5 r6 r7 mac ap rap fp */	\
+  { 0, 0, 0, 0, 0, 0, 0, 1,  0, 1,  1, 1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you
+   like.
+
+   H8 destroys r0,r1,r2,r3.  */
+
+#define CALL_USED_REGISTERS			\
+/* r0 r1 r2 r3 r4 r5 r6 r7 mac ap rap fp */	\
+  { 1, 1, 1, 1, 0, 0, 0, 1,  1, 1,  1, 1 }
+
+#define REG_ALLOC_ORDER				\
+/* r0 r1 r2 r3 r4 r5 r6 r7 mac ap rap  fp */	\
+  { 2, 3, 0, 1, 4, 5, 6, 8,  7, 9, 10, 11 }
+
+#define HARD_REGNO_NREGS(REGNO, MODE)		\
+  h8300_hard_regno_nregs ((REGNO), (MODE))
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)		\
+  h8300_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)					  \
+  ((MODE1) == (MODE2)							  \
+   || (((MODE1) == QImode || (MODE1) == HImode				  \
+	|| ((TARGET_H8300H || TARGET_H8300S) && (MODE1) == SImode))	  \
+       &&  ((MODE2) == QImode || (MODE2) == HImode			  \
+	    || ((TARGET_H8300H || TARGET_H8300S) && (MODE2) == SImode))))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG)		\
+   h8300_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* H8/300 pc is not overloaded on a register.  */
+
+/*#define PC_REGNUM 15*/
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM SP_REG
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM HFP_REG
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM FP_REG
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM AP_REG
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM SC_REG
+
+/* Fake register that holds the address on the stack of the
+   current function's return address.  */
+#define RETURN_ADDRESS_POINTER_REGNUM RAP_REG
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, assuming
+   a stack layout with the frame pointer as the first saved register.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME) h8300_return_addr_rtx ((COUNT), (FRAME))
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class {
+  NO_REGS, COUNTER_REGS, SOURCE_REGS, DESTINATION_REGS,
+  GENERAL_REGS, MAC_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "COUNTER_REGS", "SOURCE_REGS", "DESTINATION_REGS", \
+  "GENERAL_REGS", "MAC_REGS", "ALL_REGS", "LIM_REGS" }
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES \
+{						\
+  GENERAL_REGS, MAC_REGS, LIM_REG_CLASSES	\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS			\
+{      {0},		/* No regs      */	\
+   {0x010},		/* COUNTER_REGS */	\
+   {0x020},		/* SOURCE_REGS */	\
+   {0x040},		/* DESTINATION_REGS */	\
+   {0xeff},		/* GENERAL_REGS */	\
+   {0x100},		/* MAC_REGS */		\
+   {0xfff},		/* ALL_REGS	*/	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)				\
+  ((REGNO) == MAC_REG ? MAC_REGS			\
+   : (REGNO) == COUNTER_REG ? COUNTER_REGS		\
+   : (REGNO) == SOURCE_REG ? SOURCE_REGS		\
+   : (REGNO) == DESTINATION_REG ? DESTINATION_REGS	\
+   : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS (TARGET_H8300SX ? GENERAL_REGS : NO_REGS)
+#define BASE_REG_CLASS  GENERAL_REGS
+
+/* Get reg_class from a letter such as appears in the machine description.
+
+   'a' is the MAC register.  */
+
+#define REG_CLASS_FROM_LETTER(C) (h8300_reg_class_from_letter (C))
+
+/* The letters I, J, K, L, M, N, O, P in a register constraint string
+   can be used to stand for particular ranges of immediate operands.
+   This macro defines what the ranges are.
+   C is the letter, and VALUE is a constant value.
+   Return 1 if VALUE is in the range specified by C.  */
+
+#define CONST_OK_FOR_I(VALUE) ((VALUE) == 0)
+#define CONST_OK_FOR_J(VALUE) (((VALUE) & 0xff) == 0)
+#define CONST_OK_FOR_L(VALUE)				\
+  (TARGET_H8300H || TARGET_H8300S			\
+   ? (VALUE) == 1 || (VALUE) == 2 || (VALUE) == 4	\
+   : (VALUE) == 1 || (VALUE) == 2)
+#define CONST_OK_FOR_M(VALUE)				\
+  ((VALUE) == 1 || (VALUE) == 2)
+#define CONST_OK_FOR_N(VALUE)				\
+  (TARGET_H8300H || TARGET_H8300S			\
+   ? (VALUE) == -1 || (VALUE) == -2 || (VALUE) == -4	\
+   : (VALUE) == -1 || (VALUE) == -2)
+#define CONST_OK_FOR_O(VALUE)				\
+  ((VALUE) == -1 || (VALUE) == -2)
+
+/* Multi-letter constraints for constant are always started with P
+   (just because it was the only letter in the range left.  New
+   constraints for constants should be added here.  */
+#define CONST_OK_FOR_Ppositive(VALUE, NBITS)		\
+  ((VALUE) > 0 && (VALUE) < (1 << (NBITS)))
+#define CONST_OK_FOR_Pnegative(VALUE, NBITS)		\
+  ((VALUE) < 0 && (VALUE) > -(1 << (NBITS)))
+#define CONST_OK_FOR_P(VALUE, STR) \
+  ((STR)[1] >= '1' && (STR)[1] <= '9' && (STR)[2] == '<' 	\
+   ? (((STR)[3] == '0' || ((STR)[3] == 'X' && TARGET_H8300SX))	\
+      && CONST_OK_FOR_Pnegative ((VALUE), (STR)[1] - '0'))	\
+   : ((STR)[1] >= '1' && (STR)[1] <= '9' && (STR)[2] == '>')	\
+   ? (((STR)[3] == '0' || ((STR)[3] == 'X' && TARGET_H8300SX))	\
+      && CONST_OK_FOR_Ppositive ((VALUE), (STR)[1] - '0'))	\
+   : 0)
+#define CONSTRAINT_LEN_FOR_P(STR) \
+  ((((STR)[1] >= '1' && (STR)[1] <= '9')			\
+    && ((STR)[2] == '<' || (STR)[2] == '>')			\
+    && ((STR)[3] == 'X' || (STR)[3] == '0')) ? 4		\
+   : 0)
+
+#define CONST_OK_FOR_CONSTRAINT_P(VALUE, C, STR)	\
+  ((C) == 'P' ? CONST_OK_FOR_P ((VALUE), (STR))		\
+   : CONST_OK_FOR_LETTER_P ((VALUE), (C)))
+  
+#define CONST_OK_FOR_LETTER_P(VALUE, C)		\
+  ((C) == 'I' ? CONST_OK_FOR_I (VALUE) :	\
+   (C) == 'J' ? CONST_OK_FOR_J (VALUE) :	\
+   (C) == 'L' ? CONST_OK_FOR_L (VALUE) :	\
+   (C) == 'M' ? CONST_OK_FOR_M (VALUE) :	\
+   (C) == 'N' ? CONST_OK_FOR_N (VALUE) :	\
+   (C) == 'O' ? CONST_OK_FOR_O (VALUE) :	\
+   0)
+
+/* Similar, but for floating constants, and defining letters G and H.
+   Here VALUE is the CONST_DOUBLE rtx itself.
+
+  `G' is a floating-point zero.  */
+
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C)	\
+  ((C) == 'G' ? (VALUE) == CONST0_RTX (SFmode)	\
+   : 0)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+/* On the H8, this is the size of MODE in words.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Any SI register-to-register move may need to be reloaded,
+   so define REGISTER_MOVE_COST to be > 2 so that reload never
+   shortcuts.  */
+
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)  \
+  (CLASS1 == MAC_REGS || CLASS2 == MAC_REGS ? 6 : 3)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+
+   On the H8/300, @-sp really pushes a byte if you ask it to - but that's
+   dangerous, so we claim that it always pushes a word, then we catch
+   the mov.b rx,@-sp and turn it into a mov.w rx,@-sp on output.
+
+   On the H8/300H, we simplify TARGET_QUICKCALL by setting this to 4
+   and doing a similar thing.  */
+
+#define PUSH_ROUNDING(BYTES) \
+  (((BYTES) + PARM_BOUNDARY / 8 - 1) & -PARM_BOUNDARY / 8)
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* Is equal to the size of the saved fp + pc, even if an fp isn't
+   saved since the value is used before we know.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have three registers that can be eliminated on the h8300.
+   First, the frame pointer register can often be eliminated in favor
+   of the stack pointer register.  Secondly, the argument pointer
+   register and the return address pointer register are always
+   eliminated; they are replaced with either the stack or frame
+   pointer.  */
+
+#define ELIMINABLE_REGS						\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)		\
+  ((OFFSET) = h8300_initial_elimination_offset ((FROM), (TO)))
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.
+
+   On the H8 the return value is in R0/R1.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  gen_rtx_REG (TYPE_MODE (VALTYPE), R0_REG)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+/* On the H8 the return value is in R0/R1.  */
+
+#define LIBCALL_VALUE(MODE) \
+  gen_rtx_REG (MODE, R0_REG)
+
+/* 1 if N is a possible register number for a function value.
+   On the H8, R0 is the only register thus used.  */
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == R0_REG)
+
+/* Define this if PCC uses the nonreentrant convention for returning
+   structure and union values.  */
+
+/*#define PCC_STATIC_STRUCT_RETURN*/
+
+/* 1 if N is a possible register number for function argument passing.
+   On the H8, no registers are used in this way.  */
+
+#define FUNCTION_ARG_REGNO_P(N) (TARGET_QUICKCALL ? N < 3 : 0)
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the H8/300, this is a two item struct, the first is the number
+   of bytes scanned so far and the second is the rtx of the called
+   library function if any.  */
+
+#define CUMULATIVE_ARGS struct cum_arg
+struct cum_arg
+{
+  int nbytes;
+  struct rtx_def *libcall;
+};
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On the H8/300, the offset starts at 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).nbytes = 0, (CUM).libcall = LIBNAME)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fprintf (FILE, "\t%s\t#LP%d,%s\n\tjsr @mcount\n", \
+	   h8_mov_op, (LABELNO), h8_reg_names[0]);
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 0
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE ((Pmode == HImode) ? 8 : 12)
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_DECREMENT TARGET_H8300SX
+#define HAVE_PRE_INCREMENT TARGET_H8300SX
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(regno) 0
+
+#define REGNO_OK_FOR_BASE_P(regno)				\
+  (((regno) < FIRST_PSEUDO_REGISTER && regno != MAC_REG)	\
+   || reg_renumber[regno] >= 0)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* 1 if X is an rtx for a constant that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X)					\
+  (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
+   || (GET_CODE (X) == CONST_INT				\
+       /* We handle signed and unsigned offsets here.  */	\
+       && INTVAL (X) > (TARGET_H8300 ? -0x10000 : -0x1000000)	\
+       && INTVAL (X) < (TARGET_H8300 ? 0x10000 : 0x1000000))	\
+   || (GET_CODE (X) == HIGH || GET_CODE (X) == CONST))
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_CONSTANT_P(X) (h8300_legitimate_constant_p (X))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Non-strict versions.  */
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X) 0
+/* Don't use REGNO_OK_FOR_BASE_P here because it uses reg_renumber.  */
+#define REG_OK_FOR_BASE_NONSTRICT_P(X)				\
+  (REGNO (X) >= FIRST_PSEUDO_REGISTER || REGNO (X) != MAC_REG)
+
+/* Strict versions.  */
+#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#define REG_OK_FOR_BASE_STRICT_P(X)  REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifndef REG_OK_STRICT
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_NONSTRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)  REG_OK_FOR_BASE_NONSTRICT_P (X)
+
+#else
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_STRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)  REG_OK_FOR_BASE_STRICT_P (X)
+
+#endif
+
+/* Extra constraints.  */
+
+#define OK_FOR_Q(OP)					\
+  (TARGET_H8300SX && memory_operand ((OP), VOIDmode))
+
+#define OK_FOR_R(OP)					\
+  (GET_CODE (OP) == CONST_INT				\
+   ? !h8300_shift_needs_scratch_p (INTVAL (OP), QImode)	\
+   : 0)
+
+#define OK_FOR_S(OP)					\
+  (GET_CODE (OP) == CONST_INT				\
+   ? !h8300_shift_needs_scratch_p (INTVAL (OP), HImode)	\
+   : 0)
+
+#define OK_FOR_T(OP)					\
+  (GET_CODE (OP) == CONST_INT				\
+   ? !h8300_shift_needs_scratch_p (INTVAL (OP), SImode)	\
+   : 0)
+
+/* 'U' if valid for a bset destination;
+   i.e. a register, register indirect, or the eightbit memory region
+   (a SYMBOL_REF with an SYMBOL_REF_FLAG set).
+
+   On the H8S 'U' can also be a 16bit or 32bit absolute.  */
+#define OK_FOR_U(OP)							\
+  ((GET_CODE (OP) == REG && REG_OK_FOR_BASE_P (OP))			\
+   || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == REG		\
+       && REG_OK_FOR_BASE_P (XEXP (OP, 0)))				\
+   || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == SYMBOL_REF	\
+       && TARGET_H8300S)						\
+   || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == CONST		\
+       && GET_CODE (XEXP (XEXP (OP, 0), 0)) == PLUS			\
+       && GET_CODE (XEXP (XEXP (XEXP (OP, 0), 0), 0)) == SYMBOL_REF	\
+       && GET_CODE (XEXP (XEXP (XEXP (OP, 0), 0), 1)) == CONST_INT	\
+       && (TARGET_H8300S						\
+	   || SYMBOL_REF_FLAG (XEXP (XEXP (XEXP (OP, 0), 0), 0))))	\
+   || (GET_CODE (OP) == MEM						\
+       && h8300_eightbit_constant_address_p (XEXP (OP, 0)))		\
+   || (GET_CODE (OP) == MEM && (TARGET_H8300S || TARGET_H8300SX)	\
+       && GET_CODE (XEXP (OP, 0)) == CONST_INT))
+
+/* Multi-letter constraints starting with W are to be used for
+   operands that require a memory operand, i.e,. that are never used
+   along with register constraints (see EXTRA_MEMORY_CONSTRAINTS).  */
+
+#define OK_FOR_WU(OP)					\
+  (GET_CODE (OP) == MEM && OK_FOR_U (OP))
+
+#define OK_FOR_W(OP, STR)				\
+  ((STR)[1] == 'U' ? OK_FOR_WU (OP)			\
+   : 0)
+
+#define CONSTRAINT_LEN_FOR_W(STR)			\
+  ((STR)[1] == 'U' ? 2					\
+   : 0)
+
+/* Multi-letter constraints starting with Y are to be used for operands
+   that are constant immediates and have single 1 or 0 in their binary
+   representation.  */
+
+#define OK_FOR_Y2(OP)                                   \
+  ((GET_CODE (OP) == CONST_INT) && (exact_log2 (INTVAL (OP) & 0xff) != -1))
+
+#define OK_FOR_Y0(OP)                                   \
+  ((GET_CODE (OP) == CONST_INT) && (exact_log2 (~INTVAL (OP) & 0xff) != -1))
+
+#define OK_FOR_Y(OP, STR)                               \
+  ((STR)[1] == '2' ? OK_FOR_Y2 (OP)                     \
+   : (STR)[1] == '0' ? OK_FOR_Y0 (OP)	\
+   : 0)
+
+#define CONSTRAINT_LEN_FOR_Y(STR)			\
+  ((STR)[1] == '2' ? 2                                  \
+   : (STR)[1] == '0' ? 2		\
+   : 0)
+
+#define OK_FOR_Z(OP)					\
+  (TARGET_H8300SX					\
+   && GET_CODE (OP) == MEM				\
+   && CONSTANT_P (XEXP ((OP), 0)))
+
+#define EXTRA_CONSTRAINT_STR(OP, C, STR)	\
+  ((C) == 'Q' ? OK_FOR_Q (OP) :			\
+   (C) == 'R' ? OK_FOR_R (OP) :			\
+   (C) == 'S' ? OK_FOR_S (OP) :			\
+   (C) == 'T' ? OK_FOR_T (OP) :			\
+   (C) == 'U' ? OK_FOR_U (OP) :			\
+   (C) == 'W' ? OK_FOR_W ((OP), (STR)) :	\
+   (C) == 'Y' ? OK_FOR_Y ((OP), (STR)) :	\
+   (C) == 'Z' ? OK_FOR_Z (OP) :			\
+   0)
+
+#define CONSTRAINT_LEN(C, STR) \
+  ((C) == 'P' ? CONSTRAINT_LEN_FOR_P (STR)	\
+   : (C) == 'W' ? CONSTRAINT_LEN_FOR_W (STR)	\
+   : (C) == 'Y' ? CONSTRAINT_LEN_FOR_Y (STR)	\
+   : DEFAULT_CONSTRAINT_LEN ((C), (STR)))
+
+/* Experiments suggest that it's better not add 'Q' or 'U' here.  No
+   patterns need it for correctness (no patterns use 'Q' and 'U'
+   without also providing a register alternative).  And defining it
+   will mean that a spilled pseudo could be replaced by its frame
+   location in several consecutive insns.
+
+   Instead, it seems to be better to force pseudos to be reloaded
+   into registers and then use peepholes to recombine insns when
+   beneficial.
+
+   Unfortunately, for WU (unlike plain U, that matches regs as well),
+   we must require a memory address.  In fact, all multi-letter
+   constraints started with W are supposed to have this property, so
+   we just test for W here.  */
+#define EXTRA_MEMORY_CONSTRAINT(C, STR) \
+  ((C) == 'W')
+
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   On the H8/300, the predecrement and postincrement address depend thus
+   (the amount of decrement or increment being the length of the operand).  */
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \
+  if (GET_CODE (ADDR) == PLUS \
+      && h8300_get_index (XEXP (ADDR, 0), VOIDmode, 0) != XEXP (ADDR, 0)) \
+    goto LABEL;
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+/* Define this as 1 if `char' should by default be signed; else as 0.
+
+   On the H8/300, sign extension is expensive, so we'll say that chars
+   are unsigned.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* This flag, if defined, says the same insns that convert to a signed fixnum
+   also convert validly to an unsigned one.  */
+#define FIXUNS_TRUNC_LIKE_FIX_TRUNC
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX	(TARGET_H8300H || TARGET_H8300S ? 4 : 2)
+#define MAX_MOVE_MAX	4
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS TARGET_SLOWBYTE
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode								      \
+  ((TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE ? SImode : HImode)
+
+/* ANSI C types.
+   We use longs for the H8/300H and the H8S because ints can be 16 or 32.
+   GCC requires SIZE_TYPE to be the same size as pointers.  */
+#define SIZE_TYPE								\
+  (TARGET_H8300 || TARGET_NORMAL_MODE ? TARGET_INT32 ? "short unsigned int" : "unsigned int" : "long unsigned int")
+#define PTRDIFF_TYPE						\
+  (TARGET_H8300 || TARGET_NORMAL_MODE ? TARGET_INT32 ? "short int" : "int" : "long int")
+
+#define POINTER_SIZE							\
+  ((TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE ? 32 : 16)
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Return the length of JUMP's delay slot insn (0 if it has none).
+   If JUMP is a delayed branch, NEXT_INSN (PREV_INSN (JUMP)) will
+   be the containing SEQUENCE, not JUMP itself.  */
+#define DELAY_SLOT_LENGTH(JUMP) \
+  (NEXT_INSN (PREV_INSN (JUMP)) == JUMP ? 0 : 2)
+
+#define BRANCH_COST(speed_p, predictable_p) 0
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  No extra ones are needed for the h8300.  */
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) notice_update_cc (EXP, INSN)
+
+/* The add insns don't set overflow in a usable way.  */
+#define CC_OVERFLOW_UNUSABLE 01000
+/* The mov,and,or,xor insns don't set carry.  That's OK though as the
+   Z bit is all we need when doing unsigned comparisons on the result of
+   these insns (since they're always with 0).  However, conditions.h has
+   CC_NO_OVERFLOW defined for this purpose.  Rename it to something more
+   understandable.  */
+#define CC_NO_CARRY CC_NO_OVERFLOW
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "; #APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "; #NO_APP\n"
+
+#define FILE_ASM_OP "\t.file\n"
+
+/* The assembler op to get a word, 2 bytes for the H8/300, 4 for H8/300H.  */
+#define ASM_WORD_OP							\
+  (TARGET_H8300 || TARGET_NORMAL_MODE ? "\t.word\t" : "\t.long\t")
+
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+#define DATA_SECTION_ASM_OP "\t.section .data"
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+#undef DO_GLOBAL_CTORS_BODY
+#define DO_GLOBAL_CTORS_BODY			\
+{						\
+  extern func_ptr __ctors[];			\
+  extern func_ptr __ctors_end[];		\
+  func_ptr *p;					\
+  for (p = __ctors_end; p > __ctors; )		\
+    {						\
+      (*--p)();					\
+    }						\
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY			\
+{						\
+  extern func_ptr __dtors[];			\
+  extern func_ptr __dtors_end[];		\
+  func_ptr *p;					\
+  for (p = __dtors; p < __dtors_end; p++)	\
+    {						\
+      (*p)();					\
+    }						\
+}
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "sp", "mac", "ap", "rap", "fp" }
+
+#define ADDITIONAL_REGISTER_NAMES \
+{ {"er0", 0}, {"er1", 1}, {"er2", 2}, {"er3", 3}, {"er4", 4}, \
+  {"er5", 5}, {"er6", 6}, {"er7", 7}, {"r7", 7} }
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+   ASM_OUTPUT_LABEL (FILE, NAME)
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.
+
+   N.B.: The h8300.md branch_true and branch_false patterns also know
+   how to generate internal labels.  */
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
+  sprintf (LABEL, "*.%s%lu", PREFIX, (unsigned long)(NUM))
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \
+  fprintf (FILE, "\t%s\t%s\n", h8_push_op, h8_reg_names[REGNO])
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO) \
+  fprintf (FILE, "\t%s\t%s\n", h8_pop_op, h8_reg_names[REGNO])
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "%s.L%d\n", ASM_WORD_OP, VALUE)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "%s.L%d-.L%d\n", ASM_WORD_OP, VALUE, REL)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)		\
+  if ((LOG) != 0)				\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+  fprintf (FILE, "\t.space %d\n", (int)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+( fputs ("\t.comm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ",%lu\n", (unsigned long)(SIZE)))
+
+/* This says how to output the assembler to define a global
+   uninitialized but not common symbol.
+   Try to use asm_output_bss to implement this macro.  */
+
+#define ASM_OUTPUT_BSS(FILE, DECL, NAME, SIZE, ROUNDED)		\
+  asm_output_bss ((FILE), (DECL), (NAME), (SIZE), (ROUNDED))
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+( fputs ("\t.lcomm ", (FILE)),				\
+  assemble_name ((FILE), (NAME)),			\
+  fprintf ((FILE), ",%d\n", (int)(SIZE)))
+
+#define ASM_PN_FORMAT "%s___%lu"
+
+/* Print an instruction operand X on file FILE.
+   Look in h8300.c for details.  */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+  ((CODE) == '#')
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+/* Print a memory operand whose address is X, on file FILE.
+   This uses a function in h8300.c.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* H8300 specific pragmas.  */
+#define REGISTER_TARGET_PRAGMAS()				\
+  do								\
+    {								\
+      c_register_pragma (0, "saveall", h8300_pr_saveall);	\
+      c_register_pragma (0, "interrupt", h8300_pr_interrupt);	\
+    }								\
+  while (0)
+
+#define FINAL_PRESCAN_INSN(insn, operand, nop)	\
+  final_prescan_insn (insn, operand, nop)
+
+extern int h8300_move_ratio;
+#define MOVE_RATIO(speed) h8300_move_ratio
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_FLAG_EIGHTBIT_DATA	(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_FLAG_TINY_DATA		(SYMBOL_FLAG_MACH_DEP << 2)
+
+#endif /* ! GCC_H8300_H */
diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
new file mode 100644
index 000000000..21ab39102
--- /dev/null
+++ b/gcc/config/h8300/h8300.md
@@ -0,0 +1,6267 @@
+;; GCC machine description for Renesas H8/300
+;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+
+;;   Contributed by Steve Chamberlain (sac@cygnus.com),
+;;   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; We compute exact length on each instruction for most of the time.
+;; In some case, most notably bit operations that may involve memory
+;; operands, the lengths in this file are "worst case".
+
+;; On the H8/300H and H8S, adds/subs operate on the 32bit "er"
+;; registers.  Right now GCC doesn't expose the "e" half to the
+;; compiler, so using add/subs for addhi and subhi is safe.  Long
+;; term, we want to expose the "e" half to the compiler (gives us 8
+;; more 16bit registers).  At that point addhi and subhi can't use
+;; adds/subs.
+
+;; There's currently no way to have an insv/extzv expander for the H8/300H
+;; because word_mode is different for the H8/300 and H8/300H.
+
+;; Shifts/rotates by small constants should be handled by special
+;; patterns so we get the length and cc status correct.
+
+;; Bitfield operations no longer accept memory operands.  We need
+;; to add variants which operate on memory back to the MD.
+
+;; ??? Implement remaining bit ops available on the h8300
+
+;; ----------------------------------------------------------------------
+;; CONSTANTS
+;; ----------------------------------------------------------------------
+
+(define_constants
+  [(UNSPEC_INCDEC	0)
+   (UNSPEC_MONITOR	1)])
+
+(define_constants
+  [(UNSPEC_MOVMD	100)
+   (UNSPEC_STPCPY	101)])
+
+(define_constants
+  [(R0_REG	 0)
+   (SC_REG	 3)
+   (COUNTER_REG  4)
+   (SOURCE_REG   5)
+   (DESTINATION_REG 6)
+   (HFP_REG	 6)
+   (SP_REG	 7)
+   (MAC_REG	 8)
+   (AP_REG	 9)
+   (RAP_REG	10)
+   (FP_REG	11)])
+
+;; ----------------------------------------------------------------------
+;; ATTRIBUTES
+;; ----------------------------------------------------------------------
+
+(define_attr "cpu" "h8300,h8300h"
+  (const (symbol_ref "cpu_type")))
+
+(define_attr "type" "branch,arith,bitbranch,call"
+  (const_string "arith"))
+
+(define_attr "length_table" "none,addb,addw,addl,logicb,movb,movw,movl,mova_zero,mova,unary,mov_imm4,short_immediate,bitfield,bitbranch"
+  (const_string "none"))
+
+;; The size of instructions in bytes.
+
+(define_attr "length" ""
+  (cond [(eq_attr "type" "branch")
+	 ;; In a forward delayed branch, (pc) represents the end of the
+	 ;; delay sequence, not the end of the branch itself.
+	 (if_then_else (and (ge (minus (match_dup 0) (pc))
+				(const_int -126))
+			    (le (plus (minus (match_dup 0) (pc))
+				      (symbol_ref "DELAY_SLOT_LENGTH (insn)"))
+				(const_int 125)))
+		       (const_int 2)
+		       (if_then_else (and (eq_attr "cpu" "h8300h")
+					  (and (ge (minus (pc) (match_dup 0))
+						   (const_int -32000))
+					       (le (minus (pc) (match_dup 0))
+						   (const_int 32000))))
+				     (const_int 4)
+				     (const_int 6)))
+	 (eq_attr "type" "bitbranch")
+	 (if_then_else
+	  (and (ge (minus (match_dup 0) (pc))
+		   (const_int -126))
+	       (le (minus (match_dup 0) (pc))
+		   (const_int 126)))
+	  (plus
+	   (symbol_ref "h8300_insn_length_from_table (insn, operands)")
+	   (const_int 2))
+	  (if_then_else
+	   (and (eq_attr "cpu" "h8300h")
+		(and (ge (minus (pc) (match_dup 0))
+			 (const_int -32000))
+		     (le (minus (pc) (match_dup 0))
+			 (const_int 32000))))
+	   (plus
+	    (symbol_ref "h8300_insn_length_from_table (insn, operands)")
+	    (const_int 4))
+	   (plus
+	    (symbol_ref "h8300_insn_length_from_table (insn, operands)")
+	    (const_int 6))))
+	 (eq_attr "length_table" "!none")
+	 (symbol_ref "h8300_insn_length_from_table (insn, operands)")]
+	(const_int 200)))
+
+;; Condition code settings.
+;;
+;; none - insn does not affect cc
+;; none_0hit - insn does not affect cc but it does modify operand 0
+;;	This attribute is used to keep track of when operand 0 changes.
+;;	See the description of NOTICE_UPDATE_CC for more info.
+;; set_znv - insn sets z,n,v to usable values (like a tst insn); c is unknown.
+;; set_zn  - insn sets z,n to usable values; v,c are unknown.
+;; compare - compare instruction
+;; clobber - value of cc is unknown
+
+(define_attr "cc" "none,none_0hit,set_znv,set_zn,compare,clobber"
+  (const_string "clobber"))
+
+;; Type of delay slot.  NONE means the instruction has no delay slot.
+;; JUMP means it is an unconditional jump that (if short enough)
+;; could be implemented using bra/s.
+(define_attr "delay_slot" "none,jump"
+  (const_string "none"))
+
+;; "yes" if the instruction can be put into a delay slot.  It's not
+;; entirely clear that jsr is not valid in delay slots, but it
+;; definitely doesn't have the effect of causing the called function
+;; to return to the target of the delayed branch.
+(define_attr "can_delay" "no,yes"
+  (cond [(eq_attr "type" "branch,bitbranch,call")
+	   (const_string "no")
+	 (geu (symbol_ref "get_attr_length (insn)") (const_int 2))
+	   (const_string "no")]
+	(const_string "yes")))
+
+;; Only allow jumps to have a delay slot if we think they might
+;; be short enough.  This is just an optimization: we don't know
+;; for certain whether they will be or not.
+(define_delay (and (eq_attr "delay_slot" "jump")
+		   (eq (symbol_ref "get_attr_length (insn)") (const_int 2)))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+;; Provide the maximum length of an assembly instruction in an asm
+;; statement.  The maximum length of 14 bytes is achieved on H8SX.
+
+(define_asm_attributes
+  [(set (attr "length")
+	(cond [(ne (symbol_ref "TARGET_H8300")  (const_int 0)) (const_int 4)
+	       (ne (symbol_ref "TARGET_H8300H") (const_int 0)) (const_int 10)
+	       (ne (symbol_ref "TARGET_H8300S") (const_int 0)) (const_int 10)]
+	      (const_int 14)))])
+
+(include "predicates.md")
+
+;; ----------------------------------------------------------------------
+;; MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; movqi
+
+(define_insn "*movqi_h8300"
+  [(set (match_operand:QI 0 "general_operand_dst" "=r,r ,<,r,r,m")
+	(match_operand:QI 1 "general_operand_src" " I,r>,r,n,m,r"))]
+  "TARGET_H8300
+   && (register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   sub.b	%X0,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0
+   mov.b	%R1,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0"
+  [(set_attr "length" "2,2,2,2,4,4")
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+(define_insn "*movqi_h8300hs"
+  [(set (match_operand:QI 0 "general_operand_dst" "=r,r ,<,r,r,m")
+	(match_operand:QI 1 "general_operand_src" " I,r>,r,n,m,r"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+   && (register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  "@
+   sub.b	%X0,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0
+   mov.b	%R1,%X0
+   mov.b	%R1,%X0
+   mov.b	%X1,%R0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,clobber,set_znv,set_znv")])
+
+(define_insn "*movqi_h8sx"
+  [(set (match_operand:QI 0 "general_operand_dst" "=Z,rQ")
+	(match_operand:QI 1 "general_operand_src" "P4>X,rQi"))]
+  "TARGET_H8300SX"
+  "@
+    mov.b	%X1:4,%X0
+    mov.b	%X1,%X0"
+  [(set_attr "length_table" "mov_imm4,movb")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand_dst" "")
+	(match_operand:QI 1 "general_operand_src" ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register.  */
+  if (!TARGET_H8300SX
+      && !register_operand (operand0, QImode)
+      && !register_operand (operand1, QImode))
+    {
+      operands[1] = copy_to_mode_reg (QImode, operand1);
+    }
+}")
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "general_operand_dst" "+r,r"))
+			 (match_operand:QI 1 "general_operand_src" "I,rmi>"))]
+  ""
+  "@
+   sub.b	%X0,%X0
+   mov.b	%X1,%X0"
+  [(set_attr "length" "2,*")
+   (set_attr "length_table" "*,movb")
+   (set_attr "cc" "set_zn,set_znv")])
+
+;; movhi
+
+(define_insn "*movhi_h8300"
+  [(set (match_operand:HI 0 "general_operand_dst" "=r,r,<,r,r,m")
+	(match_operand:HI 1 "general_operand_src" "I,r>,r,i,m,r"))]
+  "TARGET_H8300
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))
+   && !(GET_CODE (operands[0]) == MEM
+	&& GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	&& GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+	&& GET_CODE (operands[1]) == REG
+	&& REGNO (XEXP (XEXP (operands[0], 0), 0)) == REGNO (operands[1]))"
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+(define_insn "*movhi_h8300hs"
+  [(set (match_operand:HI 0 "general_operand_dst" "=r,r,<,r,r,m")
+	(match_operand:HI 1 "general_operand_src" "I,r>,r,i,m,r"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+(define_insn "*movhi_h8sx"
+  [(set (match_operand:HI 0 "general_operand_dst" "=r,r,Z,Q,rQ")
+	(match_operand:HI 1 "general_operand_src" "I,P3>X,P4>X,IP8>X,rQi"))]
+  "TARGET_H8300SX"
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1:3,%T0
+   mov.w	%T1:4,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set_attr "length_table" "*,*,mov_imm4,short_immediate,movw")
+   (set_attr "length" "2,2,*,*,*")
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand_dst" "")
+	(match_operand:HI 1 "general_operand_src" ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, HImode)
+      && !register_operand (operand0, HImode))
+    {
+      operands[1] = copy_to_mode_reg (HImode, operand1);
+    }
+}")
+
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "general_operand_dst" "+r,r,r"))
+			 (match_operand:HI 1 "general_operand_src" "I,P3>X,rmi"))]
+  ""
+  "@
+   sub.w	%T0,%T0
+   mov.w	%T1,%T0
+   mov.w	%T1,%T0"
+  [(set_attr "length" "2,2,*")
+   (set_attr "length_table" "*,*,movw")
+   (set_attr "cc" "set_zn,set_znv,set_znv")])
+
+;; movsi
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand_dst" "")
+	(match_operand:SI 1 "general_operand_src" ""))]
+  ""
+  "
+{
+  if (TARGET_H8300)
+    {
+      if (h8300_expand_movsi (operands))
+	DONE;
+    }
+  else if (!TARGET_H8300SX)
+    {
+      /* One of the ops has to be in a register.  */
+      if (!register_operand (operand1, SImode)
+	  && !register_operand (operand0, SImode))
+	{
+	  operands[1] = copy_to_mode_reg (SImode, operand1);
+	}
+    }
+}")
+
+(define_insn "*movsi_h8300"
+  [(set (match_operand:SI 0 "general_operand_dst" "=r,r,r,o,<,r")
+	(match_operand:SI 1 "general_operand_src" "I,r,io,r,r,>"))]
+  "TARGET_H8300
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "*
+{
+  unsigned int rn = -1;
+  switch (which_alternative)
+    {
+    case 0:
+      return \"sub.w	%e0,%e0\;sub.w	%f0,%f0\";
+    case 1:
+      if (REGNO (operands[0]) < REGNO (operands[1]))
+	return \"mov.w	%e1,%e0\;mov.w	%f1,%f0\";
+      else
+	return \"mov.w	%f1,%f0\;mov.w	%e1,%e0\";
+    case 2:
+      /* Make sure we don't trample the register we index with.  */
+      if (GET_CODE (operands[1]) == MEM)
+	{
+	  rtx inside = XEXP (operands[1], 0);
+	  if (REG_P (inside))
+	    {
+	      rn = REGNO (inside);
+	    }
+	  else if (GET_CODE (inside) == PLUS)
+	    {
+	      rtx lhs = XEXP (inside, 0);
+	      rtx rhs = XEXP (inside, 1);
+	      if (REG_P (lhs)) rn = REGNO (lhs);
+	      if (REG_P (rhs)) rn = REGNO (rhs);
+	    }
+	}
+      if (rn == REGNO (operands[0]))
+	{
+	  /* Move the second word first.  */
+	  return \"mov.w	%f1,%f0\;mov.w	%e1,%e0\";
+	}
+      else
+	{
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      /* If either half is zero, use sub.w to clear that
+		 half.  */
+	      if ((INTVAL (operands[1]) & 0xffff) == 0)
+		return \"mov.w	%e1,%e0\;sub.w	%f0,%f0\";
+	      if (((INTVAL (operands[1]) >> 16) & 0xffff) == 0)
+		return \"sub.w	%e0,%e0\;mov.w	%f1,%f0\";
+	      /* If the upper half and the lower half are the same,
+		 copy one half to the other.  */
+	      if ((INTVAL (operands[1]) & 0xffff)
+		  == ((INTVAL (operands[1]) >> 16) & 0xffff))
+		return \"mov.w\\t%e1,%e0\;mov.w\\t%e0,%f0\";
+	    }
+	  return \"mov.w	%e1,%e0\;mov.w	%f1,%f0\";
+	}
+    case 3:
+      return \"mov.w	%e1,%e0\;mov.w	%f1,%f0\";
+    case 4:
+      return \"mov.w	%f1,%T0\;mov.w	%e1,%T0\";
+    case 5:
+      return \"mov.w	%T1,%e0\;mov.w	%T1,%f0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))])
+
+(define_insn "*movsi_h8300hs"
+  [(set (match_operand:SI 0 "general_operand_dst" "=r,r,r,<,r,r,m,*a,*a,r")
+	(match_operand:SI 1 "general_operand_src" "I,r,i,r,>,m,r,I,r,*a"))]
+  "(TARGET_H8300S || TARGET_H8300H) && !TARGET_H8300SX
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))
+   && !(GET_CODE (operands[0]) == MEM
+	&& GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	&& GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+	&& GET_CODE (operands[1]) == REG
+	&& REGNO (XEXP (XEXP (operands[0], 0), 0)) == REGNO (operands[1]))"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return \"sub.l	%S0,%S0\";
+    case 7:
+      return \"clrmac\";
+    case 8:
+      return \"clrmac\;ldmac %1,macl\";
+    case 9:
+      return \"stmac	macl,%0\";
+    default:
+      if (GET_CODE (operands[1]) == CONST_INT)
+	{
+	  int val = INTVAL (operands[1]);
+
+	  /* Look for constants which can be made by adding an 8-bit
+	     number to zero in one of the two low bytes.  */
+	  if (val == (val & 0xff))
+	    {
+	      operands[1] = GEN_INT ((char) val & 0xff);
+	      return \"sub.l\\t%S0,%S0\;add.b\\t%1,%w0\";
+	    }
+
+	  if (val == (val & 0xff00))
+	    {
+	      operands[1] = GEN_INT ((char) (val >> 8) & 0xff);
+	      return \"sub.l\\t%S0,%S0\;add.b\\t%1,%x0\";
+	    }
+
+	  /* Look for constants that can be obtained by subs, inc, and
+	     dec to 0.  */
+	  switch (val & 0xffffffff)
+	    {
+	    case 0xffffffff:
+	      return \"sub.l\\t%S0,%S0\;subs\\t#1,%S0\";
+	    case 0xfffffffe:
+	      return \"sub.l\\t%S0,%S0\;subs\\t#2,%S0\";
+	    case 0xfffffffc:
+	      return \"sub.l\\t%S0,%S0\;subs\\t#4,%S0\";
+
+	    case 0x0000ffff:
+	      return \"sub.l\\t%S0,%S0\;dec.w\\t#1,%f0\";
+	    case 0x0000fffe:
+	      return \"sub.l\\t%S0,%S0\;dec.w\\t#2,%f0\";
+
+	    case 0xffff0000:
+	      return \"sub.l\\t%S0,%S0\;dec.w\\t#1,%e0\";
+	    case 0xfffe0000:
+	      return \"sub.l\\t%S0,%S0\;dec.w\\t#2,%e0\";
+
+	    case 0x00010000:
+	      return \"sub.l\\t%S0,%S0\;inc.w\\t#1,%e0\";
+	    case 0x00020000:
+	      return \"sub.l\\t%S0,%S0\;inc.w\\t#2,%e0\";
+	    }
+	}
+    }
+   return \"mov.l	%S1,%S0\";
+}"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,clobber,set_znv,set_znv,set_znv,set_znv,none_0hit,none_0hit,set_znv")])
+
+(define_insn "*movsi_h8sx"
+  [(set (match_operand:SI 0 "general_operand_dst" "=r,r,Q,rQ,*a,*a,r")
+	(match_operand:SI 1 "general_operand_src" "I,P3>X,IP8>X,rQi,I,r,*a"))]
+  "TARGET_H8300SX"
+  "@
+   sub.l	%S0,%S0
+   mov.l	%S1:3,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   clrmac
+   clrmac\;ldmac	%1,macl
+   stmac	macl,%0"
+  [(set_attr "length_table" "*,*,short_immediate,movl,*,*,*")
+   (set_attr "length" "2,2,*,*,2,6,4")
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,none_0hit,none_0hit,set_znv")])
+
+(define_insn "*movsf_h8sx"
+  [(set (match_operand:SF 0 "general_operand_dst" "=r,rQ")
+	(match_operand:SF 1 "general_operand_src" "G,rQi"))]
+  "TARGET_H8300SX"
+  "@
+    sub.l	%S0,%S0
+    mov.l	%S1,%S0"
+  [(set_attr "length" "2,*")
+   (set_attr "length_table" "*,movl")
+   (set_attr "cc" "set_zn,set_znv")])
+
+;; Implement block moves using movmd.  Defining movmemsi allows the full
+;; range of constant lengths (up to 0x40000 bytes when using movmd.l).
+;; See h8sx_emit_movmd for details.
+(define_expand "movmemsi"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:SI 2 "" ""))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  "TARGET_H8300SX"
+  {
+    if (h8sx_emit_movmd (operands[0], operands[1], operands[2],
+			 INTVAL (operands[3])))
+      DONE;
+    else
+      FAIL;
+  })
+
+;; Expander for generating movmd insns.  Operand 0 is the destination
+;; memory region, operand 1 is the source, operand 2 is the counter
+;; register and operand 3 is the chunk size (1, 2 or 4).
+(define_expand "movmd"
+  [(parallel
+       [(set (match_operand:BLK 0 "memory_operand" "")
+	     (match_operand:BLK 1 "memory_operand" ""))
+	(unspec [(match_operand:HI 2 "register_operand" "")
+		 (match_operand:HI 3 "const_int_operand" "")] UNSPEC_MOVMD)
+	(clobber (match_dup 4))
+	(clobber (match_dup 5))
+	(set (match_dup 2)
+	     (const_int 0))])]
+  "TARGET_H8300SX"
+  {
+    operands[4] = copy_rtx (XEXP (operands[0], 0));
+    operands[5] = copy_rtx (XEXP (operands[1], 0));
+  })
+
+
+;; This is a difficult instruction to reload since operand 0 must be the
+;; frame pointer.  See h8300_reg_class_from_letter for an explanation.
+(define_insn "movmd_internal_normal"
+  [(set (mem:BLK (match_operand:HI 3 "register_operand" "0,r"))
+	(mem:BLK (match_operand:HI 4 "register_operand" "1,1")))
+   (unspec [(match_operand:HI 5 "register_operand" "2,2")
+	    (match_operand:HI 6 "const_int_operand" "n,n")] UNSPEC_MOVMD)
+   (clobber (match_operand:HI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:HI 1 "register_operand" "=f,f"))
+   (set (match_operand:HI 2 "register_operand" "=c,c")
+	(const_int 0))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE"
+  "@
+    movmd%m6
+    #"
+  [(set_attr "length" "2,14")
+   (set_attr "can_delay" "no")
+   (set_attr "cc" "none,clobber")])
+
+(define_insn "movmd_internal"
+  [(set (mem:BLK (match_operand:SI 3 "register_operand" "0,r"))
+	(mem:BLK (match_operand:SI 4 "register_operand" "1,1")))
+   (unspec [(match_operand:HI 5 "register_operand" "2,2")
+	    (match_operand:HI 6 "const_int_operand" "n,n")] UNSPEC_MOVMD)
+   (clobber (match_operand:SI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:SI 1 "register_operand" "=f,f"))
+   (set (match_operand:HI 2 "register_operand" "=c,c")
+	(const_int 0))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE"
+  "@
+    movmd%m6
+    #"
+  [(set_attr "length" "2,14")
+   (set_attr "can_delay" "no")
+   (set_attr "cc" "none,clobber")])
+
+;; Split the above instruction if the destination register isn't er6.
+;; We need a sequence like:
+;;
+;;	mov.l	er6,@-er7
+;;	mov.l	<dest>,er6
+;;	movmd.sz
+;;	mov.l	er6,<dest>
+;;	mov.l	@er7+,er6
+;;
+;; where <dest> is the current destination register (operand 4).
+;; The fourth instruction will be deleted if <dest> dies here.
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(match_operand:BLK 1 "memory_operand" ""))
+   (unspec [(match_operand:HI 2 "register_operand" "")
+	    (match_operand:HI 3 "const_int_operand" "")] UNSPEC_MOVMD)
+   (clobber (match_operand:HI 4 "register_operand" ""))
+   (clobber (match_operand:HI 5 "register_operand" ""))
+   (set (match_dup 2)
+	(const_int 0))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE
+   && reload_completed
+   && REGNO (operands[4]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movmd (dest, operands[1], operands[2], operands[3]));
+    h8300_swap_out_of_er6 (operands[4]);
+    DONE;
+  })
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(match_operand:BLK 1 "memory_operand" ""))
+   (unspec [(match_operand:HI 2 "register_operand" "")
+	    (match_operand:HI 3 "const_int_operand" "")] UNSPEC_MOVMD)
+   (clobber (match_operand:SI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))
+   (set (match_dup 2)
+	(const_int 0))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE
+   && reload_completed
+   && REGNO (operands[4]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movmd (dest, operands[1], operands[2], operands[3]));
+    h8300_swap_out_of_er6 (operands[4]);
+    DONE;
+  })
+
+;; Expand a call to stpcpy() using movsd.  Operand 0 should point to
+;; the final character, but movsd leaves it pointing to the character
+;; after that.
+(define_expand "movstr"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:BLK 2 "memory_operand" ""))]
+  "TARGET_H8300SX"
+  {
+    operands[1] = replace_equiv_address
+      (operands[1], copy_to_mode_reg (Pmode, XEXP (operands[1], 0)));
+    operands[2] = replace_equiv_address
+      (operands[2], copy_to_mode_reg (Pmode, XEXP (operands[2], 0)));
+    emit_insn (gen_movsd (operands[1], operands[2], gen_reg_rtx (Pmode)));
+    emit_insn (gen_add3_insn (operands[0],
+			      XEXP (operands[1], 0),
+			      constm1_rtx));
+    DONE;
+  })
+
+;; Expander for generating a movsd instruction.  Operand 0 is the
+;; destination string, operand 1 is the source string and operand 2
+;; is a scratch register.
+(define_expand "movsd"
+  [(parallel
+     [(set (match_operand:BLK 0 "memory_operand" "")
+	   (unspec:BLK [(match_operand:BLK 1 "memory_operand" "")]
+		       UNSPEC_STPCPY))
+      (clobber (match_dup 3))
+      (clobber (match_dup 4))
+      (clobber (match_operand 2 "register_operand" ""))])]
+  "TARGET_H8300SX"
+  {
+    operands[3] = copy_rtx (XEXP (operands[0], 0));
+    operands[4] = copy_rtx (XEXP (operands[1], 0));
+  })
+
+;; See comments above memcpy_internal().
+(define_insn "stpcpy_internal_normal"
+  [(set (mem:BLK (match_operand:HI 3 "register_operand" "0,r"))
+	(unspec:BLK [(mem:BLK (match_operand:HI 4 "register_operand" "1,1"))]
+		UNSPEC_STPCPY))
+   (clobber (match_operand:HI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:HI 1 "register_operand" "=f,f"))
+   (clobber (match_operand:HI 2 "register_operand" "=c,c"))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE"
+  "@
+    \n1:\tmovsd\t2f\;bra\t1b\n2:
+    #"
+  [(set_attr "length" "6,18")
+   (set_attr "cc" "none,clobber")])
+
+(define_insn "stpcpy_internal"
+  [(set (mem:BLK (match_operand:SI 3 "register_operand" "0,r"))
+	(unspec:BLK [(mem:BLK (match_operand:SI 4 "register_operand" "1,1"))]
+		UNSPEC_STPCPY))
+   (clobber (match_operand:SI 0 "register_operand" "=d,??D"))
+   (clobber (match_operand:SI 1 "register_operand" "=f,f"))
+   (clobber (match_operand:SI 2 "register_operand" "=c,c"))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE"
+  "@
+    \n1:\tmovsd\t2f\;bra\t1b\n2:
+    #"
+  [(set_attr "length" "6,18")
+   (set_attr "cc" "none,clobber")])
+
+;; Split the above instruction if the destination isn't er6.  This works
+;; in the same way as the movmd splitter.
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(unspec:BLK [(match_operand:BLK 1 "memory_operand" "")] UNSPEC_STPCPY))
+   (clobber (match_operand:HI 2 "register_operand" ""))
+   (clobber (match_operand:HI 3 "register_operand" ""))
+   (clobber (match_operand:HI 4 "register_operand" ""))]
+  "TARGET_H8300SX && TARGET_NORMAL_MODE
+   && reload_completed
+   && REGNO (operands[2]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movsd (dest, operands[1], operands[4]));
+    h8300_swap_out_of_er6 (operands[2]);
+    DONE;
+  })
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+	(unspec:BLK [(match_operand:BLK 1 "memory_operand" "")] UNSPEC_STPCPY))
+   (clobber (match_operand:SI 2 "register_operand" ""))
+   (clobber (match_operand:SI 3 "register_operand" ""))
+   (clobber (match_operand:SI 4 "register_operand" ""))]
+  "TARGET_H8300SX && !TARGET_NORMAL_MODE
+   && reload_completed
+   && REGNO (operands[2]) != DESTINATION_REG"
+  [(const_int 0)]
+  {
+    rtx dest;
+
+    h8300_swap_into_er6 (XEXP (operands[0], 0));
+    dest = replace_equiv_address (operands[0], hard_frame_pointer_rtx);
+    emit_insn (gen_movsd (dest, operands[1], operands[4]));
+    h8300_swap_out_of_er6 (operands[2]);
+    DONE;
+  })
+
+(include "mova.md")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand_dst" "")
+	(match_operand:SF 1 "general_operand_src" ""))]
+  ""
+  "
+{
+  if (TARGET_H8300)
+    {
+      if (h8300_expand_movsi (operands))
+	DONE;
+    }
+  else if (!TARGET_H8300SX)
+    {
+      /* One of the ops has to be in a register.  */
+      if (!register_operand (operand1, SFmode)
+	  && !register_operand (operand0, SFmode))
+	{
+	  operands[1] = copy_to_mode_reg (SFmode, operand1);
+	}
+    }
+}")
+
+(define_insn "*movsf_h8300"
+  [(set (match_operand:SF 0 "general_operand_dst" "=r,r,r,o,<,r")
+	(match_operand:SF 1 "general_operand_src" "G,r,io,r,r,>"))]
+  "TARGET_H8300
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+  "*
+{
+  /* Copy of the movsi stuff.  */
+  unsigned int rn = -1;
+  switch (which_alternative)
+    {
+    case 0:
+      return \"sub.w	%e0,%e0\;sub.w	%f0,%f0\";
+    case 1:
+      if (REGNO (operands[0]) < REGNO (operands[1]))
+	return \"mov.w	%e1,%e0\;mov.w	%f1,%f0\";
+      else
+	return \"mov.w	%f1,%f0\;mov.w	%e1,%e0\";
+    case 2:
+      /* Make sure we don't trample the register we index with.  */
+      if (GET_CODE (operands[1]) == MEM)
+	{
+	  rtx inside = XEXP (operands[1], 0);
+	  if (REG_P (inside))
+	    {
+	      rn = REGNO (inside);
+	    }
+	  else if (GET_CODE (inside) == PLUS)
+	    {
+	      rtx lhs = XEXP (inside, 0);
+	      rtx rhs = XEXP (inside, 1);
+	      if (REG_P (lhs)) rn = REGNO (lhs);
+	      if (REG_P (rhs)) rn = REGNO (rhs);
+	    }
+	}
+      if (rn == REGNO (operands[0]))
+	/* Move the second word first.  */
+	return \"mov.w	%f1,%f0\;mov.w	%e1,%e0\";
+      else
+	/* Move the first word first.  */
+	return \"mov.w	%e1,%e0\;mov.w	%f1,%f0\";
+
+    case 3:
+      return \"mov.w	%e1,%e0\;mov.w	%f1,%f0\";
+    case 4:
+      return \"mov.w	%f1,%T0\;mov.w	%e1,%T0\";
+    case 5:
+      return \"mov.w	%T1,%e0\;mov.w	%T1,%f0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))])
+
+(define_insn "*movsf_h8300hs"
+  [(set (match_operand:SF 0 "general_operand_dst" "=r,r,r,m,<,r")
+	(match_operand:SF 1 "general_operand_src" "G,r,im,r,r,>"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+  "@
+   sub.l	%S0,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0
+   mov.l	%S1,%S0"
+  [(set (attr "length")
+	(symbol_ref "compute_mov_length (operands)"))
+   (set_attr "cc" "set_zn,set_znv,set_znv,set_znv,set_znv,set_znv")])
+
+;; ----------------------------------------------------------------------
+;; PUSH INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "pushqi1_h8300"
+  [(set (reg:HI SP_REG)
+	(plus:HI (reg:HI SP_REG) (const_int -2)))
+   (set (mem:QI (plus:HI (reg:HI SP_REG) (const_int -1)))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "TARGET_H8300
+   && operands[0] != stack_pointer_rtx"
+  "mov.w\\t%T0,@-r7"
+  [(set_attr "length" "2")])
+
+(define_insn "pushqi1_h8300hs_advanced"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (mem:QI (plus:SI (reg:SI SP_REG) (const_int -3)))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && operands[0] != stack_pointer_rtx"
+  "mov.l\\t%S0,@-er7"
+  [(set_attr "length" "4")])
+
+(define_insn "pushqi1_h8300hs_normal"
+  [(set (reg:HI SP_REG)
+	(plus:HI (reg:HI SP_REG) (const_int -4)))
+   (set (mem:QI (plus:HI (reg:HI SP_REG) (const_int -3)))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && operands[0] != stack_pointer_rtx"
+  "mov.l\\t%S0,@-er7"
+  [(set_attr "length" "4")])
+
+(define_expand "pushqi1"
+  [(match_operand:QI 0 "register_operand" "")]
+  ""
+  "
+{
+  if (TARGET_H8300)
+    emit_insn (gen_pushqi1_h8300 (operands[0]));
+  else if (!TARGET_NORMAL_MODE)
+    emit_insn (gen_pushqi1_h8300hs_advanced (operands[0]));
+  else
+    emit_insn (gen_pushqi1_h8300hs_normal (operands[0]));
+  DONE;
+}")
+
+(define_expand "pushhi1_h8300"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:HI 0 "register_operand" ""))]
+  "TARGET_H8300
+   && operands[0] != stack_pointer_rtx"
+  "")
+
+(define_insn "pushhi1_h8300hs_advanced"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (mem:HI (plus:SI (reg:SI SP_REG) (const_int -2)))
+	(match_operand:HI 0 "register_operand" "r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && operands[0] != stack_pointer_rtx"
+  "mov.l\\t%S0,@-er7"
+  [(set_attr "length" "4")])
+
+(define_insn "pushhi1_h8300hs_normal"
+  [(set (reg:HI SP_REG)
+	(plus:HI (reg:HI SP_REG) (const_int -4)))
+   (set (mem:HI (plus:HI (reg:HI SP_REG) (const_int -2)))
+	(match_operand:HI 0 "register_operand" "r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && operands[0] != stack_pointer_rtx"
+  "mov.l\\t%S0,@-er7"
+  [(set_attr "length" "4")])
+
+(define_expand "pushhi1"
+  [(match_operand:HI 0 "register_operand" "")]
+  ""
+  "
+{
+  if (TARGET_H8300)
+    emit_insn (gen_pushhi1_h8300 (operands[0]));
+  else if (!TARGET_NORMAL_MODE)
+    emit_insn (gen_pushhi1_h8300hs_advanced (operands[0]));
+  else
+    emit_insn (gen_pushhi1_h8300hs_normal (operands[0]));
+  DONE;
+}")
+
+;; ----------------------------------------------------------------------
+;; TEST INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn ""
+  [(set (cc0) (compare
+	       (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "r,U")
+			        (const_int 1)
+			        (match_operand 1 "const_int_operand" "n,n"))
+	       (const_int 0)))]
+  "TARGET_H8300"
+  "btst	%Z1,%Y0"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn ""
+  [(set (cc0) (compare
+	       (zero_extract:HI (match_operand:HI 0 "register_operand" "r")
+			        (const_int 1)
+			        (match_operand 1 "const_int_operand" "n"))
+	       (const_int 0)))]
+  "TARGET_H8300"
+  "btst	%Z1,%Y0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn_and_split "*tst_extzv_1_n"
+  [(set (cc0) (compare
+	       (zero_extract:SI (match_operand:QI 0 "general_operand_src" "r,U,mn>")
+				(const_int 1)
+				(match_operand 1 "const_int_operand" "n,n,n"))
+	       (const_int 0)))
+   (clobber (match_scratch:QI 2 "=X,X,&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "@
+   btst\\t%Z1,%Y0
+   btst\\t%Z1,%Y0
+   #"
+  "&& reload_completed
+   && !OK_FOR_U (operands[0])"
+  [(set (match_dup 2)
+	(match_dup 0))
+   (parallel [(set (cc0) (compare (zero_extract:SI (match_dup 2)
+						   (const_int 1)
+						   (match_dup 1))
+				  (const_int 0)))
+	      (clobber (scratch:QI))])]
+  ""
+  [(set_attr "length" "2,8,10")
+   (set_attr "cc" "set_zn,set_zn,set_zn")])
+
+(define_insn ""
+  [(set (cc0) (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+				        (const_int 1)
+				        (match_operand 1 "const_int_operand" "n"))
+		       (const_int 0)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) <= 15"
+  "btst	%Z1,%Y0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn_and_split "*tstsi_upper_bit"
+  [(set (cc0) (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+				        (const_int 1)
+				        (match_operand 1 "const_int_operand" "n"))
+		       (const_int 0)))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) >= 16"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ior:SI (and:SI (match_dup 2)
+			(const_int -65536))
+		(lshiftrt:SI (match_dup 0)
+			     (const_int 16))))
+   (set (cc0) (compare (zero_extract:SI (match_dup 2)
+					(const_int 1)
+				        (match_dup 3))
+		       (const_int 0)))]
+  "operands[3] = GEN_INT (INTVAL (operands[1]) - 16);")
+
+(define_insn "*tstsi_variable_bit"
+  [(set (cc0) (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+					(const_int 1)
+					(and:SI (match_operand:SI 1 "register_operand" "r")
+						(const_int 7)))
+		       (const_int 0)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "btst	%w1,%w0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn_and_split "*tstsi_variable_bit_qi"
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI (zero_extend:SI (match_operand:QI 0 "general_operand_src" "r,U,mn>"))
+			  (const_int 1)
+			  (and:SI (match_operand:SI 1 "register_operand" "r,r,r")
+				  (const_int 7)))
+         (const_int 0)))
+   (clobber (match_scratch:QI 2 "=X,X,&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "@
+   btst\\t%w1,%X0
+   btst\\t%w1,%X0
+   #"
+  "&& reload_completed
+   && !OK_FOR_U (operands[0])"
+  [(set (match_dup 2)
+	(match_dup 0))
+   (parallel [(set (cc0) (compare (zero_extract:SI (zero_extend:SI (match_dup 2))
+						   (const_int 1)
+						   (and:SI (match_dup 1)
+							   (const_int 7)))
+				  (const_int 0)))
+	      (clobber (scratch:QI))])]
+  ""
+  [(set_attr "length" "2,8,10")
+   (set_attr "cc" "set_zn,set_zn,set_zn")])
+
+(define_insn "*tstqi"
+  [(set (cc0) (compare (match_operand:QI 0 "register_operand" "r")
+		       (const_int 0)))]
+  ""
+  "mov.b	%X0,%X0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tsthi"
+  [(set (cc0) (compare (match_operand:HI 0 "register_operand" "r")
+		       (const_int 0)))]
+  ""
+  "mov.w	%T0,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tsthi_upper"
+  [(set (cc0) (compare (and:HI (match_operand:HI 0 "register_operand" "r")
+			       (const_int -256))
+		       (const_int 0)))]
+  ""
+  "mov.b	%t0,%t0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tstsi"
+  [(set (cc0) (compare (match_operand:SI 0 "register_operand" "r")
+		       (const_int 0)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.l	%S0,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*tstsi_upper"
+  [(set (cc0) (compare (and:SI (match_operand:SI 0 "register_operand" "r")
+			       (const_int -65536))
+		       (const_int 0)))]
+  ""
+  "mov.w	%e0,%e0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*cmpqi"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
+		 (match_operand:QI 1 "h8300_src_operand" "rQi")))]
+  ""
+  "cmp.b	%X1,%X0"
+  [(set_attr "length_table" "addb")
+   (set_attr "cc" "compare")])
+
+(define_insn "*cmphi_h8300_znvc"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "r")
+		 (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_H8300"
+  "cmp.w	%T1,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "compare")])
+
+(define_insn "*cmphi_h8300hs_znvc"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
+		 (match_operand:HI 1 "h8300_src_operand" "P3>X,rQi")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (!TARGET_H8300SX)
+	return \"cmp.w	%T1,%T0\";
+      else
+	return \"cmp.w	%T1:3,%T0\";
+    case 1:
+      return \"cmp.w	%T1,%T0\";
+    default:
+      gcc_unreachable ();
+      }
+}"
+  [(set_attr "length_table" "short_immediate,addw")
+   (set_attr "cc" "compare,compare")])
+
+(define_insn "cmpsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
+		 (match_operand:SI 1 "h8300_src_operand" "P3>X,rQi")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (!TARGET_H8300SX)
+	return \"cmp.l	%S1,%S0\";
+      else
+	return \"cmp.l	%S1:3,%S0\";
+    case 1:
+      return \"cmp.l	%S1,%S0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "2,*")
+   (set_attr "length_table" "*,addl")
+   (set_attr "cc" "compare,compare")])
+
+;; ----------------------------------------------------------------------
+;; ADD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "addqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(plus:QI (match_operand:QI 1 "register_operand" "")
+		 (match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*addqi3"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(plus:QI (match_operand:QI 1 "h8300_dst_operand" "%0")
+		 (match_operand:QI 2 "h8300_src_operand" "rQi")))]
+  "h8300_operands_match_p (operands)"
+  "add.b	%X2,%X0"
+  [(set_attr "length_table" "addb")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*addhi3_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0")
+		 (match_operand:HI 2 "h8300_src_operand" "L,N,J,n,r")))]
+  "TARGET_H8300"
+  "@
+   adds	%2,%T0
+   subs	%G2,%T0
+   add.b	%t2,%t0
+   add.b	%s2,%s0\;addx	%t2,%t0
+   add.w	%T2,%T0"
+  [(set_attr "length" "2,2,2,4,2")
+   (set_attr "cc" "none_0hit,none_0hit,clobber,clobber,set_zn")])
+
+;; This splitter is very important to make the stack adjustment
+;; interrupt-safe.  The combination of add.b and addx is unsafe!
+;;
+;; We apply this split after the peephole2 pass so that we won't end
+;; up creating too many adds/subs when a scratch register is
+;; available, which is actually a common case because stack unrolling
+;; tends to happen immediately after a function call.
+
+(define_split
+  [(set (match_operand:HI 0 "stack_pointer_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand 1 "const_int_gt_2_operand" "")))]
+  "TARGET_H8300 && epilogue_completed"
+  [(const_int 0)]
+  "split_adds_subs (HImode, operands); DONE;")
+
+(define_peephole2
+  [(match_scratch:HI 2 "r")
+   (set (match_operand:HI 0 "stack_pointer_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "const_int_ge_8_operand" "")))]
+  "TARGET_H8300"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_dup 2)))]
+  "")
+
+(define_insn "*addhi3_h8300hs"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0")
+		 (match_operand:HI 2 "h8300_src_operand" "L,N,J,n,r")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX"
+  "@
+   adds	%2,%S0
+   subs	%G2,%S0
+   add.b	%t2,%t0
+   add.w	%T2,%T0
+   add.w	%T2,%T0"
+  [(set_attr "length" "2,2,2,4,2")
+   (set_attr "cc" "none_0hit,none_0hit,clobber,set_zn,set_zn")])
+
+(define_insn "*addhi3_incdec"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "0,0")
+		    (match_operand:HI 2 "incdec_operand" "M,O")]
+		   UNSPEC_INCDEC))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   inc.w	%2,%T0
+   dec.w	%G2,%T0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "*addhi3_h8sx"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rU,rU,r,rQ")
+	(plus:HI (match_operand:HI 1 "h8300_dst_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "h8300_src_operand" "P3>X,P3<X,J,rQi")))]
+  "TARGET_H8300SX && h8300_operands_match_p (operands)"
+  "@
+   add.w	%T2:3,%T0
+   sub.w	%G2:3,%T0
+   add.b	%t2,%t0
+   add.w	%T2,%T0"
+  [(set_attr "length_table" "short_immediate,short_immediate,*,addw")
+   (set_attr "length" "*,*,2,*")
+   (set_attr "cc" "set_zn")])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "two_insn_adds_subs_operand" "")))]
+  ""
+  [(const_int 0)]
+  "split_adds_subs (HImode, operands); DONE;")
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*addsi_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0")
+		 (match_operand:SI 2 "h8300_src_operand" "n,r")))]
+  "TARGET_H8300"
+  "* return output_plussi (operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_plussi_length (operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_plussi_cc (operands)"))])
+
+(define_insn "*addsi_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ,rQ")
+	(plus:SI (match_operand:SI 1 "h8300_dst_operand" "%0,0")
+		 (match_operand:SI 2 "h8300_src_operand" "i,rQ")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "* return output_plussi (operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_plussi_length (operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_plussi_cc (operands)"))])
+
+(define_insn "*addsi3_incdec"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0,0")
+		    (match_operand:SI 2 "incdec_operand" "M,O")]
+		   UNSPEC_INCDEC))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   inc.l	%2,%S0
+   dec.l	%G2,%S0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "two_insn_adds_subs_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(const_int 0)]
+  "split_adds_subs (SImode, operands); DONE;")
+
+;; ----------------------------------------------------------------------
+;; SUBTRACT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "subqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(minus:QI (match_operand:QI 1 "register_operand" "")
+		  (match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*subqi3"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(minus:QI (match_operand:QI 1 "h8300_dst_operand" "0")
+		  (match_operand:QI 2 "h8300_dst_operand" "rQ")))]
+  "h8300_operands_match_p (operands)"
+  "sub.b	%X2,%X0"
+  [(set_attr "length_table" "addb")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(minus:HI (match_operand:HI 1 "register_operand" "")
+		  (match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*subhi3_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0")
+		  (match_operand:HI 2 "h8300_src_operand" "r,n")))]
+  "TARGET_H8300"
+  "@
+   sub.w	%T2,%T0
+   add.b	%E2,%s0\;addx	%F2,%t0"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "set_zn,clobber")])
+
+(define_insn "*subhi3_h8300hs"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ,rQ")
+	(minus:HI (match_operand:HI 1 "h8300_dst_operand" "0,0")
+		  (match_operand:HI 2 "h8300_src_operand" "rQ,i")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "@
+   sub.w	%T2,%T0
+   sub.w	%T2,%T0"
+  [(set_attr "length_table" "addw")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "register_operand" "")
+		  (match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+{
+  if (TARGET_H8300)
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "*subsi3_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_H8300"
+  "sub.w	%f2,%f0\;subx	%y2,%y0\;subx	%z2,%z0"
+  [(set_attr "length" "6")])
+
+(define_insn "*subsi3_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ,rQ")
+	(minus:SI (match_operand:SI 1 "h8300_dst_operand" "0,0")
+		  (match_operand:SI 2 "h8300_src_operand" "rQ,i")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "@
+   sub.l	%S2,%S0
+   sub.l	%S2,%S0"
+  [(set_attr "length_table" "addl")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; MULTIPLY INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Note that the H8/300 can only handle umulqihi3.
+
+(define_expand "mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:QI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "
+{
+  if (GET_MODE (operands[2]) != VOIDmode)
+    operands[2] = gen_rtx_SIGN_EXTEND (HImode, operands[2]);
+}")
+
+(define_insn "*mulqihi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (match_operand:QI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxs.b	%X2,%T0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mulxs.b	%X2,%T0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:HI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "
+{
+  if (GET_MODE (operands[2]) != VOIDmode)
+    operands[2] = gen_rtx_SIGN_EXTEND (SImode, operands[2]);
+}")
+
+(define_insn "*mulhisi3_const"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (match_operand:SI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxs.w	%T2,%S0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mulxs.w	%T2,%S0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:QI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "
+{
+  if (GET_MODE (operands[2]) != VOIDmode)
+    operands[2] = gen_rtx_ZERO_EXTEND (HImode, operands[2]);
+}")
+
+(define_insn "*umulqihi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (match_operand:QI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxu.b	%X2,%T0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0"))
+		 (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  "mulxu.b	%X2,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")])
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+		 ;; intentionally-mismatched modes
+		 (match_operand:HI 2 "reg_or_nibble_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "
+{
+  if (GET_MODE (operands[2]) != VOIDmode)
+    operands[2] = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
+}")
+
+(define_insn "*umulhisi3_const"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (match_operand:SI 2 "nibble_operand" "IP4>X")))]
+  "TARGET_H8300SX"
+  "mulxu.w	%T2,%S0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "*umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mulxu.w	%T2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")])
+
+;; We could have used mulu.[wl] here, but mulu.[lw] is only available
+;; on a H8SX with a multiplier, whereas muls.w seems to be available
+;; on all H8SX variants.
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (mult:HI (match_operand:HI 1 "register_operand" "%0")
+		 (match_operand:HI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "muls.w\\t%T2,%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "muls.l\\t%S2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+	   (sign_extend:DI (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))
+	  (const_int 32))))]
+  "TARGET_H8300SXMUL"
+  "muls/u.l\\t%S2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (ashiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+	   (zero_extend:DI (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))
+	  (const_int 32))))]
+  "TARGET_H8300SX"
+  "mulu/u.l\\t%S2,%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")])
+
+;; This is a "bridge" instruction.  Combine can't cram enough insns
+;; together to crate a MAC instruction directly, but it can create
+;; this instruction, which then allows combine to create the real
+;; MAC insn.
+;;
+;; Unfortunately, if combine doesn't create a MAC instruction, this
+;; insn must generate reasonably correct code.  Egad.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mult:SI
+	  (sign_extend:SI
+	    (mem:HI (post_inc:SI (match_operand:SI 1 "register_operand" "r"))))
+	  (sign_extend:SI
+	    (mem:HI (post_inc:SI (match_operand:SI 2 "register_operand" "r"))))))]
+  "TARGET_MAC"
+  "clrmac\;mac	@%2+,@%1+"
+  [(set_attr "length" "6")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI
+	  (sign_extend:SI (mem:HI
+	    (post_inc:SI (match_operand:SI 1 "register_operand" "r"))))
+	  (sign_extend:SI (mem:HI
+	    (post_inc:SI (match_operand:SI 2 "register_operand" "r")))))
+	      (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_MAC"
+  "mac	@%2+,@%1+"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+;; ----------------------------------------------------------------------
+;; DIVIDE/MOD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "udivhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(udiv:HI
+	 (match_operand:HI 1 "register_operand" "0")
+	 (match_operand:HI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divu.w\\t%T2,%T0"
+  [(set_attr "length" "2")])
+  
+(define_insn "divhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(div:HI
+	 (match_operand:HI 1 "register_operand" "0")
+	 (match_operand:HI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divs.w\\t%T2,%T0"
+  [(set_attr "length" "2")])
+  
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI
+	 (match_operand:SI 1 "register_operand" "0")
+	 (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divu.l\\t%S2,%S0"
+  [(set_attr "length" "2")])
+  
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI
+	 (match_operand:SI 1 "register_operand" "0")
+	 (match_operand:SI 2 "reg_or_nibble_operand" "r IP4>X")))]
+  "TARGET_H8300SX"
+  "divs.l\\t%S2,%S0"
+  [(set_attr "length" "2")])
+  
+(define_insn "udivmodqi4"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(truncate:QI
+	  (udiv:HI
+	    (match_operand:HI 1 "register_operand" "0")
+	    (zero_extend:HI (match_operand:QI 2 "register_operand" "r")))))
+   (set (match_operand:QI 3 "register_operand" "=r")
+	(truncate:QI
+	  (umod:HI
+	    (match_dup 1)
+	    (zero_extend:HI (match_dup 2)))))]
+  ""
+  "*
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return \"divxu.b\\t%X2,%T0\";
+  else
+    return \"divxu.b\\t%X2,%T0\;mov.b\\t%t0,%s3\";
+}"
+  [(set_attr "length" "4")])
+
+(define_insn "divmodqi4"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(truncate:QI
+	  (div:HI
+	    (match_operand:HI 1 "register_operand" "0")
+	    (sign_extend:HI (match_operand:QI 2 "register_operand" "r")))))
+   (set (match_operand:QI 3 "register_operand" "=r")
+	(truncate:QI
+	  (mod:HI
+	    (match_dup 1)
+	    (sign_extend:HI (match_dup 2)))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "*
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return \"divxs.b\\t%X2,%T0\";
+  else
+    return \"divxs.b\\t%X2,%T0\;mov.b\\t%t0,%s3\";
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(truncate:HI
+	  (udiv:SI
+	    (match_operand:SI 1 "register_operand" "0")
+	    (zero_extend:SI (match_operand:HI 2 "register_operand" "r")))))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(truncate:HI
+	  (umod:SI
+	    (match_dup 1)
+	    (zero_extend:SI (match_dup 2)))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "*
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return \"divxu.w\\t%T2,%S0\";
+  else
+    return \"divxu.w\\t%T2,%S0\;mov.w\\t%e0,%f3\";
+}"
+  [(set_attr "length" "4")])
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(truncate:HI
+	  (div:SI
+	    (match_operand:SI 1 "register_operand" "0")
+	    (sign_extend:SI (match_operand:HI 2 "register_operand" "r")))))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(truncate:HI
+	  (mod:SI
+	    (match_dup 1)
+	    (sign_extend:SI (match_dup 2)))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "*
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return \"divxs.w\\t%T2,%S0\";
+  else
+    return \"divxs.w\\t%T2,%S0\;mov.w\\t%e0,%f3\";
+}"
+  [(set_attr "length" "6")])
+
+;; ----------------------------------------------------------------------
+;; AND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+(define_insn "bclrqi_msx"
+  [(set (match_operand:QI 0 "bit_register_indirect_operand" "=WU")
+	(and:QI (match_operand:QI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:QI 2 "single_zero_operand" "Y0")))]
+  "TARGET_H8300SX"
+  "bclr\\t%W2,%0"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=U")
+	(and:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_zero_operand" "Y0")))]
+  "TARGET_H8300SX"
+  [(set (match_dup 0)
+	(and:QI (match_dup 1)
+		(match_dup 2)))]
+{
+  if (abs (INTVAL (operands[2])) > 0xFF)
+    {
+      operands[0] = adjust_address (operands[0], QImode, 0);
+      operands[1] = adjust_address (operands[1], QImode, 0);
+      operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
+    }
+  else
+    {
+      operands[0] = adjust_address (operands[0], QImode, 1);
+      operands[1] = adjust_address (operands[1], QImode, 1);
+    }
+})
+
+(define_insn "bclrhi_msx"
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=m")
+	(and:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_zero_operand" "Y0")))]
+  "TARGET_H8300SX"
+  "bclr\\t%W2,%0"
+  [(set_attr "length" "8")])
+(define_insn "*andqi3_2"
+  [(set (match_operand:QI 0 "bit_operand" "=rQ,r")
+	(and:QI (match_operand:QI 1 "bit_operand" "%0,WU")
+		(match_operand:QI 2 "h8300_src_operand" "rQi,IP1>X")))]
+  "TARGET_H8300SX"
+  "@
+   and	%X2,%X0
+   bfld	%2,%1,%R0"
+  [(set_attr "length" "*,8")
+   (set_attr "length_table" "logicb,*")
+   (set_attr "cc" "set_znv,none_0hit")])
+
+(define_insn "andqi3_1"
+  [(set (match_operand:QI 0 "bit_operand" "=r,U")
+	(and:QI (match_operand:QI 1 "bit_operand" "%0,0")
+		(match_operand:QI 2 "h8300_src_operand" "rn,n")))]
+  "register_operand (operands[0], QImode)
+   || single_zero_operand (operands[2], QImode)"
+  "@
+   and	%X2,%X0
+   bclr	%W2,%R0"
+  [(set_attr "length" "2,8")
+   (set_attr "cc" "set_znv,none_0hit")])
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(and:QI (match_operand:QI 1 "register_operand" "")
+		(match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*andorqi3"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(ior:QI (and:QI (match_operand:QI 2 "register_operand" "r")
+			(match_operand:QI 3 "single_one_operand" "n"))
+		(match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "bld\\t%V3,%X2\;bor\\t%V3,%X0\;bst\\t%V3,%X0"
+  [(set_attr "length" "6")])
+
+(define_insn "*andorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (and:HI (match_operand:HI 2 "register_operand" "r")
+			(match_operand:HI 3 "single_one_operand" "n"))
+		(match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "*
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xffff);
+  if (INTVAL (operands[3]) > 128)
+    {
+      operands[3] = GEN_INT (INTVAL (operands[3]) >> 8);
+      return \"bld\\t%V3,%t2\;bor\\t%V3,%t0\;bst\\t%V3,%t0\";
+    }
+  return \"bld\\t%V3,%s2\;bor\\t%V3,%s0\;bst\\t%V3,%s0\";
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "*andorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand:SI 3 "single_one_operand" "n"))
+		(match_operand:SI 1 "register_operand" "0")))]
+  "(INTVAL (operands[3]) & 0xffff) != 0"
+  "*
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xffff);
+  if (INTVAL (operands[3]) > 128)
+    {
+      operands[3] = GEN_INT (INTVAL (operands[3]) >> 8);
+      return \"bld\\t%V3,%x2\;bor\\t%V3,%x0\;bst\\t%V3,%x0\";
+    }
+  return \"bld\\t%V3,%w2\;bor\\t%V3,%w0\;bst\\t%V3,%w0\";
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "*andorsi3_shift_8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 2 "register_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "or.b\\t%w2,%x0"
+  [(set_attr "length" "2")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+;; ----------------------------------------------------------------------
+;; OR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+(define_insn "bsetqi_msx"
+  [(set (match_operand:QI 0 "bit_register_indirect_operand" "=WU")
+	(ior:QI (match_operand:QI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:QI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX" 
+  "bset\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=U")
+	(ior:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  [(set (match_dup 0)
+	(ior:QI (match_dup 1)
+		(match_dup 2)))]
+{
+  if (abs (INTVAL (operands[2])) > 0xFF)
+    {
+      operands[0] = adjust_address (operands[0], QImode, 0);
+      operands[1] = adjust_address (operands[1], QImode, 0);
+      operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
+    }
+  else
+    {
+      operands[0] = adjust_address (operands[0], QImode, 1);
+      operands[1] = adjust_address (operands[1], QImode, 1);
+    }
+})
+
+(define_insn "bsethi_msx"
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=m")
+	(ior:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  "bset\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_insn "iorqi3_1"
+  [(set (match_operand:QI 0 "bit_operand" "=rQ,U")
+	(ior:QI (match_operand:QI 1 "bit_operand" "%0,0")
+		(match_operand:QI 2 "h8300_src_operand" "rQi,n")))]
+  "TARGET_H8300SX || register_operand (operands[0], QImode)
+   || single_one_operand (operands[2], QImode)"
+  "@
+   or\\t%X2,%X0
+   bset\\t%V2,%R0"
+  [(set_attr "length" "*,8")
+   (set_attr "length_table" "logicb,*")
+   (set_attr "cc" "set_znv,none_0hit")])
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ior:QI (match_operand:QI 1 "register_operand" "")
+		(match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "iorhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ior:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+;; ----------------------------------------------------------------------
+;; XOR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+(define_insn "bnotqi_msx"
+  [(set (match_operand:QI 0 "bit_register_indirect_operand" "=WU")
+	(xor:QI (match_operand:QI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:QI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  "bnot\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=U")
+	(xor:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  [(set (match_dup 0)
+	(xor:QI (match_dup 1)
+		(match_dup 2)))]
+{
+  if (abs (INTVAL (operands[2])) > 0xFF)
+    {
+      operands[0] = adjust_address (operands[0], QImode, 0);
+      operands[1] = adjust_address (operands[1], QImode, 0);
+      operands[2] = GEN_INT ((INTVAL (operands[2])) >> 8);
+    }
+  else
+    {
+      operands[0] = adjust_address (operands[0], QImode, 1);
+      operands[1] = adjust_address (operands[1], QImode, 1);
+    }
+})
+
+(define_insn "bnothi_msx"
+  [(set (match_operand:HI 0 "bit_register_indirect_operand" "=m")
+	(xor:HI (match_operand:HI 1 "bit_register_indirect_operand" "%0")
+		(match_operand:HI 2 "single_one_operand" "Y2")))]
+  "TARGET_H8300SX"
+  "bnot\\t%V2,%0"
+  [(set_attr "length" "8")])
+
+(define_insn "xorqi3_1"
+  [(set (match_operand:QI 0 "bit_operand" "=r,U")
+	(xor:QI (match_operand:QI 1 "bit_operand" "%0,0")
+		(match_operand:QI 2 "h8300_src_operand" "rQi,n")))]
+  "TARGET_H8300SX || register_operand (operands[0], QImode)
+   || single_one_operand (operands[2], QImode)"
+  "@
+   xor\\t%X2,%X0
+   bnot\\t%V2,%R0"
+  [(set_attr "length" "*,8")
+   (set_attr "length_table" "logicb,*")
+   (set_attr "cc" "set_znv,none_0hit")])
+
+(define_expand "xorqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(xor:QI (match_operand:QI 1 "register_operand" "")
+		(match_operand:QI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(xor:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "h8300_src_operand" "")))]
+  ""
+  "")
+
+;; ----------------------------------------------------------------------
+;; {AND,IOR,XOR}{HI3,SI3} PATTERNS
+;; ----------------------------------------------------------------------
+
+;; We need a separate pattern here because machines other than the
+;; original H8300 don't have to split the 16-bit operand into a pair
+;; of high/low instructions, so we can accept literal addresses, that
+;; have to be loaded into a register on H8300.
+(define_insn "*logicalhi3_sn"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:HI 3 "bit_operator"
+	  [(match_operand:HI 1 "h8300_dst_operand" "%0")
+	   (match_operand:HI 2 "h8300_src_operand" "rQi")]))]
+  "(TARGET_H8300S || TARGET_H8300H) && h8300_operands_match_p (operands)"
+  "* return output_logical_op (HImode, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (HImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (HImode, operands)"))])
+
+(define_insn "*logicalsi3_sn"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:SI 3 "bit_operator"
+	  [(match_operand:SI 1 "h8300_dst_operand" "%0")
+	   (match_operand:SI 2 "h8300_src_operand" "rQi")]))]
+  "(TARGET_H8300S || TARGET_H8300H) && h8300_operands_match_p (operands)"
+  "* return output_logical_op (SImode, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (SImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (SImode, operands)"))])
+
+(define_insn "*logicalhi3"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:HI 3 "bit_operator"
+	  [(match_operand:HI 1 "h8300_dst_operand" "%0")
+	   (match_operand:HI 2 "h8300_src_operand" "rQi")]))]
+  "h8300_operands_match_p (operands)"
+  "* return output_logical_op (HImode, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (HImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (HImode, operands)"))])
+
+(define_insn "*logicalsi3"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:SI 3 "bit_operator"
+	  [(match_operand:SI 1 "h8300_dst_operand" "%0")
+	   (match_operand:SI 2 "h8300_src_operand" "rQi")]))]
+  "h8300_operands_match_p (operands)"
+  "* return output_logical_op (SImode, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_logical_op_length (SImode, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_logical_op_cc (SImode, operands)"))])
+
+;; ----------------------------------------------------------------------
+;; NEGATION INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "negqi2"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(neg:QI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*negqi2"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(neg:QI (match_operand:QI 1 "h8300_dst_operand" "0")))]
+  ""
+  "neg	%X0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(neg:HI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_H8300)
+    {
+      emit_insn (gen_neghi2_h8300 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "neghi2_h8300"
+  [(set (match_dup 2)
+	(not:HI (match_operand:HI 1 "register_operand" "")))
+   (set (match_dup 2) (plus:HI (match_dup 2) (const_int 1)))
+   (set (match_operand:HI 0 "register_operand" "")
+	(match_dup 2))]
+  ""
+  "operands[2] = gen_reg_rtx (HImode);")
+
+(define_insn "*neghi2_h8300hs"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(neg:HI (match_operand:HI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "neg.w	%T0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_H8300)
+    {
+      emit_insn (gen_negsi2_h8300 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "negsi2_h8300"
+  [(set (match_dup 2)
+	(not:SI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 2) (plus:SI (match_dup 2) (const_int 1)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(match_dup 2))]
+  ""
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_insn "*negsi2_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(neg:SI (match_operand:SI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "neg.l	%S0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_zn")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (match_operand:SF 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*negsf2_h8300"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "xor.b\\t#128,%z0"
+  [(set_attr "length" "2")])
+
+(define_insn "*negsf2_h8300hs"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "xor.w\\t#32768,%e0"
+  [(set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------
+;; ABSOLUTE VALUE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(abs:SF (match_operand:SF 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*abssf2_h8300"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "and.b\\t#127,%z0"
+  [(set_attr "length" "2")])
+
+(define_insn "*abssf2_h8300hs"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "and.w\\t#32767,%e0"
+  [(set_attr "length" "4")])
+
+;; ----------------------------------------------------------------------
+;; NOT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "one_cmplqi2"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(not:QI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*one_cmplqi2"
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(not:QI (match_operand:QI 1 "h8300_dst_operand" "0")))]
+  ""
+  "not	%X0"
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(not:HI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*one_cmplhi2_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(not:HI (match_operand:HI 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "not	%s0\;not	%t0"
+  [(set_attr "length" "4")])
+
+(define_insn "*one_cmplhi2_h8300hs"
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(not:HI (match_operand:HI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "not.w	%T0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length_table" "unary")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*one_cmplsi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_H8300"
+  "not	%w0\;not	%x0\;not	%y0\;not	%z0"
+  [(set_attr "length" "8")])
+
+(define_insn "*one_cmplsi2_h8300hs"
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(not:SI (match_operand:SI 1 "h8300_dst_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && h8300_operands_match_p (operands)"
+  "not.l	%S0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length_table" "unary")])
+
+;; ----------------------------------------------------------------------
+;; JUMP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Conditional jump instructions
+
+(define_expand "cbranchqi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:QI 1 "h8300_dst_operand" "")
+          (match_operand:QI 2 "h8300_src_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  "h8300_expand_branch (operands); DONE;")
+
+(define_expand "cbranchhi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:HI 1 "h8300_dst_operand" "")
+          (match_operand:HI 2 "h8300_src_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  "
+{
+  /* Force operand1 into a register if we're compiling
+     for the H8/300.  */
+  if ((GET_CODE (operands[2]) != REG && operands[2] != const0_rtx)
+      && TARGET_H8300)
+    operands[2] = force_reg (HImode, operands[2]);
+  h8300_expand_branch (operands); DONE;
+}")
+
+(define_expand "cbranchsi4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+         [(match_operand:SI 1 "h8300_dst_operand" "")
+          (match_operand:SI 2 "h8300_src_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "h8300_expand_branch (operands); DONE;")
+
+(define_insn "branch_true"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  if (get_attr_length (insn) == 2)
+    return \"b%j1	%l0\";
+  else if (get_attr_length (insn) == 4)
+    return \"b%j1	%l0:16\";
+  else
+    return \"b%k1	.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:\";
+}"
+ [(set_attr "type" "branch")
+   (set_attr "cc" "none")])
+
+(define_insn "branch_false"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  if (get_attr_length (insn) == 2)
+    return \"b%k1	%l0\";
+  else if (get_attr_length (insn) == 4)
+    return \"b%k1	%l0:16\";
+  else
+    return \"b%j1	.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "cc" "none")])
+
+(define_insn "*brabc"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract (match_operand:QI 1 "bit_memory_operand" "WU")
+			   (const_int 1)
+			   (match_operand:QI 2 "immediate_operand" "n"))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_H8300SX"
+  "*
+{
+  switch (get_attr_length (insn)
+	  - h8300_insn_length_from_table (insn, operands))
+    {
+    case 2:
+      return \"bra/bc	%2,%R1,%l0\";
+
+    case 4:
+      return \"bra/bc	%2,%R1,%l0:16\";
+
+    default:
+      return \"bra/bs	%2,%R1,.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:\";
+    }
+}"
+  [(set_attr "type" "bitbranch")
+   (set_attr "length_table" "bitbranch")
+   (set_attr "cc" "none")])
+
+(define_insn "*brabs"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract (match_operand:QI 1 "bit_memory_operand" "WU")
+			   (const_int 1)
+			   (match_operand:QI 2 "immediate_operand" "n"))
+	     (const_int 0))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_H8300SX"
+  "*
+{
+  switch (get_attr_length (insn)
+	  - h8300_insn_length_from_table (insn, operands))
+    {
+    case 2:
+      return \"bra/bs	%2,%R1,%l0\";
+
+    case 4:
+      return \"bra/bs	%2,%R1,%l0:16\";
+
+    default:
+      return \"bra/bc	%2,%R1,.Lh8BR%=\;jmp	@%l0\\n.Lh8BR%=:\";
+    }
+}"
+  [(set_attr "type" "bitbranch")
+   (set_attr "length_table" "bitbranch")
+   (set_attr "cc" "none")])
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  if (final_sequence != 0)
+    {
+      if (get_attr_length (insn) == 2)
+	return \"bra/s	%l0\";
+      else
+	{
+	  /* The branch isn't short enough to use bra/s.  Output the
+	     branch and delay slot in their normal order.
+
+	     If this is a backward branch, it will now be branching two
+	     bytes further than previously thought.  The length-based
+	     test for bra vs. jump is very conservative though, so the
+	     branch will still be within range.  */
+	  rtvec vec;
+	  int seen;
+
+	  vec = XVEC (final_sequence, 0);
+	  final_sequence = 0;
+	  final_scan_insn (RTVEC_ELT (vec, 1), asm_out_file, optimize, 1, & seen);
+	  final_scan_insn (RTVEC_ELT (vec, 0), asm_out_file, optimize, 1, & seen);
+	  INSN_DELETED_P (RTVEC_ELT (vec, 1)) = 1;
+	  return \"\";
+	}
+    }
+  else if (get_attr_length (insn) == 2)
+    return \"bra	%l0\";
+  else if (get_attr_length (insn) == 4)
+    return \"bra	%l0:16\";
+  else
+    return \"jmp	@%l0\";
+}"
+  [(set_attr "type" "branch")
+   (set (attr "delay_slot")
+	(if_then_else (ne (symbol_ref "TARGET_H8300SX") (const_int 0))
+		      (const_string "jump")
+		      (const_string "none")))
+   (set_attr "cc" "none")])
+
+;; This is a define expand, because pointers may be either 16 or 32 bits.
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+  "")
+
+(define_insn "*tablejump_h8300"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_H8300"
+  "jmp	@%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*tablejump_h8300hs_advanced"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE"
+  "jmp	@%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*tablejump_h8300hs_normal"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && TARGET_NORMAL_MODE"
+  "jmp @%S0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+;; This is a define expand, because pointers may be either 16 or 32 bits.
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "jump_address_operand" ""))]
+  ""
+  "")
+
+(define_insn "*indirect_jump_h8300"
+  [(set (pc) (match_operand:HI 0 "jump_address_operand" "Vr"))]
+  "TARGET_H8300"
+  "jmp	@%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*indirect_jump_h8300hs_advanced"
+  [(set (pc) (match_operand:SI 0 "jump_address_operand" "Vr"))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE"
+  "jmp @%0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+(define_insn "*indirect_jump_h8300hs_normal"
+  [(set (pc) (match_operand:HI 0 "jump_address_operand" "Vr"))]
+  "(TARGET_H8300H || TARGET_H8300S) && TARGET_NORMAL_MODE"
+  "jmp @%S0"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+;; Call subroutine with no return value.
+
+;; ??? Even though we use HImode here, this works on the H8/300H and H8S.
+
+(define_insn "call"
+  [(call (match_operand:QI 0 "call_insn_operand" "or")
+	 (match_operand:HI 1 "general_operand" "g"))]
+  ""
+  "*
+{
+  if (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+      && SYMBOL_REF_FLAG (XEXP (operands[0], 0)))
+    return \"jsr\\t@%0:8\";
+  else
+    return \"jsr\\t%0\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (match_operand:QI 0 "small_call_insn_operand" "")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+
+;; ??? Even though we use HImode here, this works on the H8/300H and H8S.
+
+(define_insn "call_value"
+  [(set (match_operand 0 "" "=r")
+	(call (match_operand:QI 1 "call_insn_operand" "or")
+	      (match_operand:HI 2 "general_operand" "g")))]
+  ""
+  "*
+{
+  if (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+      && SYMBOL_REF_FLAG (XEXP (operands[1], 0)))
+    return \"jsr\\t@%1:8\";
+  else
+    return \"jsr\\t%1\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (match_operand:QI 0 "small_call_insn_operand" "")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "cc" "none")
+   (set_attr "length" "2")])
+
+;; ----------------------------------------------------------------------
+;; PROLOGUE/EPILOGUE-RELATED INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "push_h8300"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REG)))
+        (match_operand:HI 0 "register_operand" ""))]
+  "TARGET_H8300"
+  "")
+
+(define_expand "push_h8300hs_advanced"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+        (match_operand:SI 0 "register_operand" ""))]
+  "TARGET_H8300H && TARGET_H8300S && !TARGET_NORMAL_MODE"
+  "")
+
+(define_expand "push_h8300hs_normal"
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+        (match_operand:SI 0 "register_operand" ""))]
+  "TARGET_H8300H && TARGET_H8300S && TARGET_NORMAL_MODE"
+  "")
+
+(define_expand "pop_h8300"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mem:HI (post_inc:HI (reg:HI SP_REG))))]
+  "TARGET_H8300"
+  "")
+
+(define_expand "pop_h8300hs_advanced"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_H8300H && TARGET_H8300S && !TARGET_NORMAL_MODE"
+  "")
+
+(define_expand "pop_h8300hs_normal"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (post_inc:HI (reg:HI SP_REG))))]
+  "TARGET_H8300H && TARGET_H8300S && TARGET_NORMAL_MODE"
+  "")
+
+(define_insn "ldm_h8300sx"
+  [(match_parallel           0 "h8300_ldm_parallel"
+     [(set (match_operand:SI 1 "register_operand" "")
+	   (match_operand:SI 2 "memory_operand" ""))])]
+  "TARGET_H8300S"
+  {
+    operands[3] = SET_DEST (XVECEXP (operands[0], 0,
+				     XVECLEN (operands[0], 0) - 2));
+    return "ldm.l\t@er7+,%S1-%S3";
+  }
+  [(set_attr "cc" "none")
+   (set_attr "length" "4")])
+
+(define_insn "stm_h8300sx"
+  [(match_parallel           0 "h8300_stm_parallel"
+     [(set (match_operand:SI 1 "memory_operand" "")
+	   (match_operand:SI 2 "register_operand" ""))])]
+  "TARGET_H8300S"
+  {
+    operands[3] = SET_SRC (XVECEXP (operands[0], 0,
+				    XVECLEN (operands[0], 0) - 2));
+    return "stm.l\t%S2-%S3,@-er7";
+  }
+  [(set_attr "cc" "none")
+   (set_attr "length" "4")])
+
+(define_insn "return_h8sx"
+  [(match_parallel           0 "h8300_return_parallel"
+     [(return)
+      (set (match_operand:SI 1 "register_operand" "")
+	   (match_operand:SI 2 "memory_operand" ""))])]
+  "TARGET_H8300SX"
+  {
+    operands[3] = SET_DEST (XVECEXP (operands[0], 0,
+				     XVECLEN (operands[0], 0) - 2));
+    if (h8300_current_function_interrupt_function_p ())
+      return "rte/l\t%S1-%S3";
+    else
+      return "rts/l\t%S1-%S3";
+  }
+  [(set_attr "cc" "none")
+   (set_attr "can_delay" "no")
+   (set_attr "length" "2")])
+
+(define_expand "return"
+  [(return)]
+  "h8300_can_use_return_insn_p ()"
+  "")
+
+(define_insn "*return_1"
+  [(return)]
+  "reload_completed"
+  "*
+{
+  if (h8300_current_function_interrupt_function_p ())
+    return \"rte\";
+  else
+    return \"rts\";
+}"
+  [(set_attr "cc" "none")
+   (set_attr "can_delay" "no")
+   (set_attr "length" "2")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "h8300_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "h8300_expand_epilogue (); DONE;")
+
+(define_insn "monitor_prologue"
+  [(unspec_volatile [(const_int 0)] UNSPEC_MONITOR)]
+  ""
+  "*
+{
+  if (TARGET_H8300)
+    return \"subs\\t#2,r7\;mov.w\\tr0,@-r7\;stc\\tccr,r0l\;mov.b\tr0l,@(2,r7)\;mov.w\\t@r7+,r0\;orc\t#128,ccr\";
+  else if (TARGET_H8300H)
+    return \"mov.l\\ter0,@-er7\;stc\\tccr,r0l\;mov.b\\tr0l,@(4,er7)\;mov.l\\t@er7+,er0\;orc\\t#128,ccr\";
+  else if (TARGET_H8300S)
+    return \"stc\texr,@-er7\;mov.l\\ter0,@-er7\;stc\tccr,r0l\;mov.b\tr0l,@(6,er7)\;mov.l\\t@er7+,er0\;orc\t#128,ccr\";
+  gcc_unreachable ();
+}"
+  [(set_attr "length" "20")])
+
+;; ----------------------------------------------------------------------
+;; EXTEND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendqihi2_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  mov.b	#0,%t0
+  #"
+  [(set_attr "length" "2,10")])
+
+(define_insn "*zero_extendqihi2_h8300hs"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+  extu.w	%T0
+  #"
+  [(set_attr "length" "2,10")
+   (set_attr "cc" "set_znv,set_znv")])
+
+;; Split the zero extension of a general operand (actually a memory
+;; operand) into a load of the operand and the actual zero extension
+;; so that 1) the length will be accurate, and 2) the zero extensions
+;; appearing at the end of basic blocks may be merged.
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "general_operand_src" "")))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 0)
+	(zero_extend:HI (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));")
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "")))]
+  ""
+  {
+    if (TARGET_H8300SX)
+      operands[1] = force_reg (QImode, operands[1]);
+  })
+
+(define_insn "*zero_extendqisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  mov.b	#0,%x0\;sub.w	%e0,%e0
+  mov.b	%R1,%w0\;mov.b	#0,%x0\;sub.w	%e0,%e0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*zero_extendqisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+   && reg_overlap_mentioned_p (operands[0], operands[1])
+   && reload_completed"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 3)
+	(zero_extend:HI (match_dup 2)))
+   (set (match_dup 0)
+	(zero_extend:SI (match_dup 3)))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);
+   operands[3] = gen_lowpart (HImode, operands[0]);")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand_src" "")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && reload_completed"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (strict_low_part (match_dup 2))
+	(match_dup 1))]
+  "operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));")
+
+(define_insn "*zero_extendqisi2_h8sx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  "TARGET_H8300SX"
+  "extu.l\t#2,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+;; %e prints the high part of a CONST_INT, not the low part.  Arggh.
+(define_insn "*zero_extendhisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_operand_src" "0,i,g>")))]
+  "TARGET_H8300"
+  "@
+  sub.w	%e0,%e0
+  mov.w	%f1,%f0\;sub.w	%e0,%e0
+  mov.w	%e1,%f0\;sub.w	%e0,%e0"
+  [(set_attr "length" "2,4,6")])
+
+(define_insn "*zero_extendhisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "extu.l	%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendqihi2_h8300"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  bld	#7,%s0\;subx	%t0,%t0
+  mov.b	%R1,%s0\;bld	#7,%s0\;subx	%t0,%t0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*extendqihi2_h8300hs"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "exts.w	%T0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendqisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  bld	#7,%w0\;subx	%x0,%x0\;subx	%y0,%y0\;subx	%z0,%z0
+  mov.b	%R1,%w0\;bld	#7,%w0\;subx	%x0,%x0\;subx	%y0,%y0\;subx	%z0,%z0"
+  [(set_attr "length" "8,12")])
+
+;; The following pattern is needed because without the pattern, the
+;; combiner would split (sign_extend:SI (reg:QI)) into two 24-bit
+;; shifts, one ashift and one ashiftrt.
+
+(define_insn_and_split "*extendqisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:HI (match_dup 1)))
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+(define_insn "*extendqisi2_h8sx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "0")))]
+  "TARGET_H8300SX"
+  "exts.l\t#2,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendhisi2_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_operand_src" "0,g>")))]
+  "TARGET_H8300"
+  "@
+  bld	#7,%x0\;subx	%y0,%y0\;subx	%z0,%z0
+  mov.w	%T1,%f0\;bld	#7,%x0\;subx	%y0,%y0\;subx	%z0,%z0"
+  [(set_attr "length" "6,10")])
+
+(define_insn "*extendhisi2_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "exts.l	%S0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_znv")])
+
+;; ----------------------------------------------------------------------
+;; SHIFTS
+;; ----------------------------------------------------------------------
+;;
+;; We make some attempt to provide real efficient shifting.  One example is
+;; doing an 8-bit shift of a 16-bit value by moving a byte reg into the other
+;; reg and moving 0 into the former reg.
+;;
+;; We also try to achieve this in a uniform way.  IE: We don't try to achieve
+;; this in both rtl and at insn emit time.  Ideally, we'd use rtl as that would
+;; give the optimizer more cracks at the code.  However, we wish to do things
+;; like optimizing shifting the sign bit to bit 0 by rotating the other way.
+;; There is rtl to handle this (rotate + and), but the H8/300 doesn't handle
+;; 16-bit rotates.  Also, if we emit complicated rtl, combine may not be able
+;; to detect cases it can optimize.
+;;
+;; For these and other fuzzy reasons, I've decided to go the less pretty but
+;; easier "do it at insn emit time" route.
+
+;; QI BIT SHIFTS
+
+(define_expand "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ashift:QI (match_operand:QI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (QImode, ASHIFT, operands)) DONE;")
+
+(define_expand "ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (QImode, ASHIFTRT, operands)) DONE;")
+
+(define_expand "lshrqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (QImode, LSHIFTRT, operands)) DONE;")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:QI 3 "h8sx_unary_shift_operator"
+			[(match_operand:QI 1 "h8300_dst_operand" "0")
+			 (match_operand:QI 2 "const_int_operand" "")]))]
+  "h8300_operands_match_p (operands)"
+  { return output_h8sx_shift (operands, 'b', 'X'); }
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(match_operator:QI 3 "h8sx_binary_shift_operator"
+			[(match_operand:QI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "r P3>X")]))]
+  ""
+  { return output_h8sx_shift (operands, 'b', 'X'); }
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*shiftqi"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(match_operator:QI 3 "nshift_operator"
+			[ (match_operand:QI 1 "register_operand" "0,0")
+			  (match_operand:QI 2 "nonmemory_operand" "R,rn")]))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  ""
+  "* return output_a_shift (operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_a_shift_length (insn, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_a_shift_cc (insn, operands)"))])
+
+;; HI BIT SHIFTS
+
+(define_expand "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ashift:HI (match_operand:HI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (HImode, ASHIFT, operands)) DONE;")
+
+(define_expand "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (HImode, LSHIFTRT, operands)) DONE;")
+
+(define_expand "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (HImode, ASHIFTRT, operands)) DONE;")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:HI 3 "h8sx_unary_shift_operator"
+			[(match_operand:HI 1 "h8300_dst_operand" "0")
+			 (match_operand:QI 2 "const_int_operand" "")]))]
+  "h8300_operands_match_p (operands)"
+  { return output_h8sx_shift (operands, 'w', 'T'); }
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 3 "h8sx_binary_shift_operator"
+			[(match_operand:HI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "r P4>X")]))]
+  ""
+  { return output_h8sx_shift (operands, 'w', 'T'); }
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*shifthi"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(match_operator:HI 3 "nshift_operator"
+			[ (match_operand:HI 1 "register_operand" "0,0")
+			  (match_operand:QI 2 "nonmemory_operand" "S,rn")]))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  ""
+  "* return output_a_shift (operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_a_shift_length (insn, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_a_shift_cc (insn, operands)"))])
+
+;;  SI BIT SHIFTS
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (SImode, ASHIFT, operands)) DONE;")
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (SImode, LSHIFTRT, operands)) DONE;")
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_shift (SImode, ASHIFTRT, operands)) DONE;")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "h8300_dst_operand" "=rQ")
+	(match_operator:SI 3 "h8sx_unary_shift_operator"
+			[(match_operand:SI 1 "h8300_dst_operand" "0")
+			 (match_operand:QI 2 "const_int_operand" "")]))]
+  "h8300_operands_match_p (operands)"
+  { return output_h8sx_shift (operands, 'l', 'S'); }
+  [(set_attr "length_table" "unary")
+   (set_attr "cc" "set_znv")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 3 "h8sx_binary_shift_operator"
+			[(match_operand:SI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "r P5>X")]))]
+  ""
+  { return output_h8sx_shift (operands, 'l', 'S'); }
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*shiftsi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI 3 "nshift_operator"
+			[ (match_operand:SI 1 "register_operand" "0,0")
+			  (match_operand:QI 2 "nonmemory_operand" "T,rn")]))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  ""
+  "* return output_a_shift (operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_a_shift_length (insn, operands)"))
+   (set (attr "cc")
+	(symbol_ref "compute_a_shift_cc (insn, operands)"))])
+
+;; Split a variable shift into a loop.  If the register containing
+;; the shift count dies, then we just use that register.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 2 "nshift_operator"
+			[(match_dup 0)
+			 (match_operand:QI 1 "register_operand" "")]))
+   (clobber (match_operand:QI 3 "register_operand" ""))]
+  "epilogue_completed
+   && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))"
+  [(set (cc0) (compare (match_dup 1)
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (le (cc0) (const_int 0))
+		      (label_ref (match_dup 5))
+		      (pc)))
+   (match_dup 4)
+   (parallel
+     [(set (match_dup 0)
+	   (match_op_dup 2 [(match_dup 0) (const_int 1)]))
+      (clobber (scratch:QI))])
+   (set (match_dup 1)
+	(plus:QI (match_dup 1) (const_int -1)))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_dup 4))
+		      (pc)))
+   (match_dup 5)]
+  "operands[4] = gen_label_rtx ();
+   operands[5] = gen_label_rtx ();")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 2 "nshift_operator"
+			[(match_dup 0)
+			 (match_operand:QI 1 "register_operand" "")]))
+   (clobber (match_operand:QI 3 "register_operand" ""))]
+  "epilogue_completed
+   && !find_regno_note (insn, REG_DEAD, REGNO (operands[1]))"
+  [(set (match_dup 3)
+	(match_dup 1))
+   (set (cc0) (compare (match_dup 3)
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (le (cc0) (const_int 0))
+		      (label_ref (match_dup 5))
+		      (pc)))
+   (match_dup 4)
+   (parallel
+     [(set (match_dup 0)
+	   (match_op_dup 2 [(match_dup 0) (const_int 1)]))
+      (clobber (scratch:QI))])
+   (set (match_dup 3)
+	(plus:QI (match_dup 3) (const_int -1)))
+   (set (cc0) (compare (match_dup 3)
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (ne (cc0) (const_int 0))
+		      (label_ref (match_dup 4))
+		      (pc)))
+   (match_dup 5)]
+  "operands[4] = gen_label_rtx ();
+   operands[5] = gen_label_rtx ();")
+
+;; ----------------------------------------------------------------------
+;; ROTATIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(rotate:QI (match_operand:QI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_rotate (operands)) DONE;")
+
+(define_insn "rotlqi3_1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0")
+		   (match_operand:QI 2 "immediate_operand" "")))]
+  ""
+  "* return output_a_rotate (ROTATE, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_a_rotate_length (operands)"))])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(rotate:HI (match_operand:HI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "if (expand_a_rotate (operands)) DONE;")
+
+(define_insn "rotlhi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:QI 2 "immediate_operand" "")))]
+  ""
+  "* return output_a_rotate (ROTATE, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_a_rotate_length (operands)"))])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(rotate:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "if (expand_a_rotate (operands)) DONE;")
+
+(define_insn "rotlsi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:QI 2 "immediate_operand" "")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "* return output_a_rotate (ROTATE, operands);"
+  [(set (attr "length")
+	(symbol_ref "compute_a_rotate_length (operands)"))])
+
+;; -----------------------------------------------------------------
+;; BIT FIELDS
+;; -----------------------------------------------------------------
+;; The H8/300 has given 1/8th of its opcode space to bitfield
+;; instructions so let's use them as well as we can.
+
+;; You'll never believe all these patterns perform one basic action --
+;; load a bit from the source, optionally invert the bit, then store it
+;; in the destination (which is known to be zero).
+;;
+;; Combine obviously need some work to better identify this situation and
+;; canonicalize the form better.
+
+;;
+;; Normal loads with a 16bit destination.
+;;
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(zero_extract:HI (match_operand:HI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:HI 2 "immediate_operand" "n")))]
+  "TARGET_H8300"
+  "sub.w	%0,%0\;bld	%Z2,%Y1\;bst	#0,%X0"
+  [(set_attr "length" "6")])
+
+;;
+;; Inverted loads with a 16bit destination.
+;;
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(zero_extract:HI (xor:HI (match_operand:HI 1 "register_operand" "r")
+				 (match_operand:HI 3 "const_int_operand" "n"))
+			 (const_int 1)
+			 (match_operand:HI 2 "const_int_operand" "n")))]
+  "(TARGET_H8300 || TARGET_H8300SX)
+   && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
+  "sub.w	%0,%0\;bild	%Z2,%Y1\;bst	#0,%X0"
+  [(set_attr "length" "8")])
+
+;;
+;; Normal loads with a 32bit destination.
+;;
+
+(define_insn "*extzv_1_r_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(zero_extract:SI (match_operand:HI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n")))]
+  "TARGET_H8300
+   && INTVAL (operands[2]) < 16"
+  "* return output_simode_bld (0, operands);"
+  [(set_attr "length" "8")])
+
+(define_insn "*extzv_1_r_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "?0,r")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n,n")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[2]) < 16"
+  "* return output_simode_bld (0, operands);"
+  [(set_attr "cc" "set_znv,set_znv")
+   (set_attr "length" "8,6")])
+
+;;
+;; Inverted loads with a 32bit destination.
+;;
+
+(define_insn "*extzv_1_r_inv_h8300"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(zero_extract:SI (xor:HI (match_operand:HI 1 "register_operand" "r")
+				 (match_operand:HI 3 "const_int_operand" "n"))
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n")))]
+  "TARGET_H8300
+   && INTVAL (operands[2]) < 16
+   && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
+  "* return output_simode_bld (1, operands);"
+  [(set_attr "length" "8")])
+
+(define_insn "*extzv_1_r_inv_h8300hs"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extract:SI (xor:SI (match_operand:SI 1 "register_operand" "?0,r")
+				 (match_operand 3 "const_int_operand" "n,n"))
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "n,n")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[2]) < 16
+   && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
+  "* return output_simode_bld (1, operands);"
+  [(set_attr "cc" "set_znv,set_znv")
+   (set_attr "length" "8,6")])
+
+(define_expand "insv"
+  [(set (zero_extract:HI (match_operand:HI 0 "general_operand" "")
+			 (match_operand:HI 1 "general_operand" "")
+			 (match_operand:HI 2 "general_operand" ""))
+	(match_operand:HI 3 "general_operand" ""))]
+  "TARGET_H8300 || TARGET_H8300SX"
+  "
+{
+  if (TARGET_H8300SX)
+    {
+      if (GET_CODE (operands[1]) == CONST_INT
+          && GET_CODE (operands[2]) == CONST_INT
+          && INTVAL (operands[1]) <= 8
+          && INTVAL (operands[2]) >= 0
+          && INTVAL (operands[1]) + INTVAL (operands[2]) <= 8
+	  && memory_operand (operands[0], GET_MODE (operands[0])))
+        {
+	  /* If the source operand is zero, it's better to use AND rather
+	     than BFST.  Likewise OR if the operand is all ones.  */
+	  if (GET_CODE (operands[3]) == CONST_INT)
+	    {
+	      HOST_WIDE_INT mask = (1 << INTVAL (operands[1])) - 1;
+	      if ((INTVAL (operands[3]) & mask) == 0)
+		FAIL;
+	      if ((INTVAL (operands[3]) & mask) == mask)
+		FAIL;
+	    }
+          if (! bit_memory_operand (operands[0], GET_MODE (operands[0])))
+	    {
+	      if (!can_create_pseudo_p ())
+		FAIL;
+	      operands[0] =
+	        replace_equiv_address (operands[0],
+				       force_reg (Pmode,
+						  XEXP (operands[0], 0)));
+	    }
+	  operands[3] = gen_lowpart (QImode, operands[3]);
+	  if (! operands[3])
+	    FAIL;
+	  if (! register_operand (operands[3], QImode))
+	    {
+	      if (!can_create_pseudo_p ())
+		FAIL;
+	      operands[3] = force_reg (QImode, operands[3]);
+	    }
+	  emit_insn (gen_bfst (adjust_address (operands[0], QImode, 0),
+			       operands[3], operands[1], operands[2]));
+	  DONE;
+        }
+
+      FAIL;
+    }
+
+  /* We only have single bit bit-field instructions.  */
+  if (INTVAL (operands[1]) != 1)
+    FAIL;
+
+  /* For now, we don't allow memory operands.  */
+  if (GET_CODE (operands[0]) == MEM
+      || GET_CODE (operands[3]) == MEM)
+    FAIL;
+
+  if (GET_CODE (operands[3]) != REG)
+    operands[3] = force_reg (HImode, operands[3]);
+}")
+
+(define_insn ""
+  [(set (zero_extract:HI (match_operand:HI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:HI 1 "immediate_operand" "n"))
+	(match_operand:HI 2 "register_operand" "r"))]
+  ""
+  "bld	#0,%R2\;bst	%Z1,%Y0 ; i1"
+  [(set_attr "length" "4")])
+
+(define_expand "extzv"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extract:HI (match_operand:HI 1 "bit_operand" "")
+			 (match_operand:HI 2 "general_operand" "")
+			 (match_operand:HI 3 "general_operand" "")))]
+  "TARGET_H8300 || TARGET_H8300SX"
+  "
+{
+  if (TARGET_H8300SX)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT
+          && GET_CODE (operands[3]) == CONST_INT
+          && INTVAL (operands[2]) <= 8
+          && INTVAL (operands[3]) >= 0
+          && INTVAL (operands[2]) + INTVAL (operands[3]) <= 8
+	  && memory_operand (operands[1], QImode))
+	{
+	  rtx temp;
+
+	  /* Optimize the case where we're extracting into a paradoxical
+	     subreg.  It's only necessary to extend to the inner reg.  */
+	  if (GET_CODE (operands[0]) == SUBREG
+	      && subreg_lowpart_p (operands[0])
+	      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))
+		  < GET_MODE_SIZE (GET_MODE (operands[0])))
+	      && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0])))
+		  == MODE_INT))
+	    operands[0] = SUBREG_REG (operands[0]);
+
+	  if (!can_create_pseudo_p ())
+	    temp = gen_lowpart (QImode, operands[0]);
+	  else
+	    temp = gen_reg_rtx (QImode);
+	  if (! temp)
+	    FAIL;
+          if (! bit_memory_operand (operands[1], QImode))
+	    {
+	      if (!can_create_pseudo_p ())
+		FAIL;
+	      operands[1] =
+	        replace_equiv_address (operands[1],
+				       force_reg (Pmode,
+						  XEXP (operands[1], 0)));
+	    }
+	  emit_insn (gen_bfld (temp, operands[1], operands[2], operands[3]));
+	  convert_move (operands[0], temp, 1);
+	  DONE;
+        }
+      FAIL;
+    }
+
+  /* We only have single bit bit-field instructions.  */
+  if (INTVAL (operands[2]) != 1)
+    FAIL;
+
+  /* For now, we don't allow memory operands.  */
+  if (GET_CODE (operands[1]) == MEM)
+    FAIL;
+}")
+
+;; BAND, BOR, and BXOR patterns
+
+(define_insn ""
+  [(set (match_operand:HI 0 "bit_operand" "=Ur")
+	(match_operator:HI 4 "bit_operator"
+	   [(zero_extract:HI (match_operand:HI 1 "register_operand" "r")
+			     (const_int 1)
+			     (match_operand:HI 2 "immediate_operand" "n"))
+	    (match_operand:HI 3 "bit_operand" "0")]))]
+  ""
+  "bld	%Z2,%Y1\;b%c4	#0,%R0\;bst	#0,%R0; bl1"
+  [(set_attr "length" "6")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "bit_operand" "=Ur")
+	(match_operator:HI 5 "bit_operator"
+	   [(zero_extract:HI (match_operand:HI 1 "register_operand" "r")
+			     (const_int 1)
+			     (match_operand:HI 2 "immediate_operand" "n"))
+	    (zero_extract:HI (match_operand:HI 3 "register_operand" "r")
+			     (const_int 1)
+			     (match_operand:HI 4 "immediate_operand" "n"))]))]
+  ""
+  "bld	%Z2,%Y1\;b%c5	%Z4,%Y3\;bst	#0,%R0; bl3"
+  [(set_attr "length" "6")])
+
+(define_insn "bfld"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(zero_extract:QI (match_operand:QI 1 "bit_memory_operand" "WU")
+			 (match_operand:QI 2 "immediate_operand" "n")
+			 (match_operand:QI 3 "immediate_operand" "n")))]
+  "TARGET_H8300SX && INTVAL (operands[2]) + INTVAL (operands[3]) <= 8"
+  "*
+{
+  operands[2] = GEN_INT ((1 << (INTVAL (operands[2]) + INTVAL (operands[3])))
+			 - (1 << INTVAL (operands[3])));
+  return \"bfld	%2,%1,%R0\";
+}"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "bitfield")])
+
+(define_insn "bfst"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU")
+			 (match_operand:QI 2 "immediate_operand" "n")
+			 (match_operand:QI 3 "immediate_operand" "n"))
+	(match_operand:QI 1 "register_operand" "r"))]
+  "TARGET_H8300SX && INTVAL (operands[2]) + INTVAL (operands[3]) <= 8"
+  "*
+{
+  operands[2] = GEN_INT ((1 << (INTVAL (operands[2]) + INTVAL (operands[3])))
+			 - (1 << INTVAL (operands[3])));
+  return \"bfst	%R1,%2,%0\";
+}"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "bitfield")])
+
+(define_expand "cstoreqi4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:QI 2 "h8300_dst_operand" "")
+          (match_operand:QI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:HI 0 "register_operand"))]
+  "TARGET_H8300SX"
+  "h8300_expand_store (operands); DONE;")
+
+(define_expand "cstorehi4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:HI 2 "h8300_dst_operand" "")
+          (match_operand:HI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:HI 0 "register_operand"))]
+  "TARGET_H8300SX"
+  "h8300_expand_store (operands); DONE;")
+
+(define_expand "cstoresi4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:SI 2 "h8300_dst_operand" "")
+          (match_operand:SI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:HI 0 "register_operand"))]
+  "TARGET_H8300SX"
+  "h8300_expand_store (operands); DONE;")
+
+(define_insn "*bstzhireg"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "eqne_operator" [(cc0) (const_int 0)]))]
+  "TARGET_H8300SX"
+  "mulu.w	#0,%T0\;b%k1	.Lh8BR%=\;inc.w	#1,%T0\\n.Lh8BR%=:"
+  [(set_attr "cc" "clobber")])
+
+(define_insn_and_split "*cmpstz"
+  [(set (zero_extract:QI
+	 (match_operand:QI 0 "bit_memory_operand" "+WU,+WU")
+	 (const_int 1)
+	 (match_operand:QI 1 "immediate_operand" "n,n"))
+	(match_operator:QI
+	 2 "eqne_operator"
+	 [(match_operand 3 "h8300_dst_operand" "r,rQ")
+	  (match_operand 4 "h8300_src_operand" "I,rQi")]))]
+  "TARGET_H8300SX
+   && (GET_MODE (operands[3]) == GET_MODE (operands[4])
+       || GET_CODE (operands[4]) == CONST_INT)
+   && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[3])) <= 4"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 5))
+   (set (zero_extract:QI (match_dup 0) (const_int 1) (match_dup 1))
+	(match_op_dup:QI 2 [(cc0) (const_int 0)]))]
+  "operands[5] = gen_rtx_COMPARE (VOIDmode, operands[3], operands[4]);"
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*bstz"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU")
+			 (const_int 1)
+			 (match_operand:QI 1 "immediate_operand" "n"))
+	(eq:QI (cc0) (const_int 0)))]
+  "TARGET_H8300SX && reload_completed"
+  "bstz	%1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "unary")])
+
+(define_insn "*bistz"
+  [(set (zero_extract:QI (match_operand:QI 0 "bit_memory_operand" "+WU")
+			 (const_int 1)
+			 (match_operand:QI 1 "immediate_operand" "n"))
+	(ne:QI (cc0) (const_int 0)))]
+  "TARGET_H8300SX && reload_completed"
+  "bistz	%1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "unary")])
+
+(define_insn_and_split "*cmpcondbset"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI
+	 (match_operator
+	  1 "eqne_operator"
+	  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+	   (match_operand 3 "h8300_src_operand" "I,rQi")])
+	 (ior:QI
+	  (match_operand:QI 4 "bit_memory_operand" "0,0")
+	  (match_operand:QI 5 "single_one_operand" "n,n"))
+	 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI
+	 (match_op_dup 1 [(cc0) (const_int 0)])
+	 (ior:QI (match_dup 4) (match_dup 5)) (match_dup 4)))]
+  "operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);"
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbset"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI
+	 (match_operator:QI 2 "eqne_operator"
+			    [(cc0) (const_int 0)])
+	 (ior:QI
+	  (match_operand:QI 3 "bit_memory_operand" "0")
+	  (match_operand:QI 1 "single_one_operand" "n"))
+	 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bset/%j2\t%V1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+(define_insn_and_split "*cmpcondbclr"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI
+	 (match_operator
+	  1 "eqne_operator"
+	  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+	   (match_operand 3 "h8300_src_operand" "I,rQi")])
+	 (and:QI
+	  (match_operand:QI 4 "bit_memory_operand" "0,0")
+	  (match_operand:QI 5 "single_zero_operand" "n,n"))
+	 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI
+	 (match_op_dup 1 [(cc0) (const_int 0)])
+	 (and:QI (match_dup 4) (match_dup 5)) (match_dup 4)))]
+  "operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);"
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbclr"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI
+	 (match_operator:QI 2 "eqne_operator"
+			    [(cc0) (const_int 0)])
+	 (and:QI
+	  (match_operand:QI 3 "bit_memory_operand" "0")
+	  (match_operand:QI 1 "single_zero_operand" "n"))
+	 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bclr/%j2\t%W1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+(define_insn_and_split "*cmpcondbsetreg"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI
+	 (match_operator
+	  1 "eqne_operator"
+	  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+	   (match_operand 3 "h8300_src_operand" "I,rQi")])
+	 (ior:QI
+	  (match_operand:QI 4 "bit_memory_operand" "0,0")
+	  (ashift:QI (const_int 1)
+		     (match_operand:QI 5 "register_operand" "r,r")))
+	 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI
+	 (match_op_dup 1 [(cc0) (const_int 0)])
+	 (ior:QI (match_dup 4)
+		 (ashift:QI (const_int 1)
+			    (match_operand:QI 5 "register_operand" "r,r")))
+	 (match_dup 4)))]
+  "operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);"
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbsetreg"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI
+	 (match_operator:QI 2 "eqne_operator"
+			    [(cc0) (const_int 0)])
+	 (ior:QI
+	  (match_operand:QI 3 "bit_memory_operand" "0")
+	  (ashift:QI (const_int 1)
+		     (match_operand:QI 1 "register_operand" "r")))
+	 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bset/%j2\t%R1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+(define_insn_and_split "*cmpcondbclrreg"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=WU,WU")
+	(if_then_else:QI
+	 (match_operator
+	  1 "eqne_operator"
+	  [(match_operand 2 "h8300_dst_operand" "r,rQ")
+	   (match_operand 3 "h8300_src_operand" "I,rQi")])
+	 (and:QI
+	  (match_operand:QI 4 "bit_memory_operand" "0,0")
+	  (ashift:QI (const_int 1)
+		     (match_operand:QI 5 "register_operand" "r,r")))
+	 (match_dup 4)))]
+  "TARGET_H8300SX"
+  "#"
+  "reload_completed"
+  [(set (cc0) (match_dup 6))
+   (set (match_dup 0)
+	(if_then_else:QI
+	 (match_op_dup 1 [(cc0) (const_int 0)])
+	 (and:QI (match_dup 4)
+		 (ashift:QI (const_int 1)
+			    (match_operand:QI 5 "register_operand" "r,r")))
+	 (match_dup 4)))]
+  "operands[6] = gen_rtx_COMPARE (VOIDmode, operands[2], operands[3]);"
+  [(set_attr "cc" "set_znv,compare")])
+   
+(define_insn "*condbclrreg"
+  [(set (match_operand:QI 0 "bit_memory_operand" "=WU")
+	(if_then_else:QI
+	 (match_operator:QI 2 "eqne_operator"
+			    [(cc0) (const_int 0)])
+	 (and:QI
+	  (match_operand:QI 3 "bit_memory_operand" "0")
+	  (ashift:QI (const_int 1)
+		     (match_operand:QI 1 "register_operand" "r")))
+	 (match_dup 3)))]
+  "TARGET_H8300SX && reload_completed"
+  "bclr/%j2\t%R1,%0"
+  [(set_attr "cc" "none_0hit")
+   (set_attr "length_table" "logicb")])
+
+
+;; -----------------------------------------------------------------
+;; COMBINE PATTERNS
+;; -----------------------------------------------------------------
+
+;; insv:SI
+
+(define_insn "*insv_si_1_n"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(match_operand:SI 2 "register_operand" "r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) < 16"
+  "bld\\t#0,%w2\;bst\\t%Z1,%Y0"
+  [(set_attr "length" "4")])
+
+(define_insn "*insv_si_1_n_lshiftrt"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+		     (match_operand:SI 3 "const_int_operand" "n")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) < 16
+   && INTVAL (operands[3]) < 16"
+  "bld\\t%Z3,%Y2\;bst\\t%Z1,%Y0"
+  [(set_attr "length" "4")])
+
+(define_insn "*insv_si_1_n_lshiftrt_16"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+		     (const_int 16)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) < 16"
+  "rotr.w\\t%e2\;rotl.w\\t%e2\;bst\\t%Z1,%Y0"
+  [(set_attr "length" "6")])
+
+(define_insn "*insv_si_8_8"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:SI 1 "register_operand" "r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.b\\t%w1,%x0"
+  [(set_attr "length" "2")])
+
+(define_insn "*insv_si_8_8_lshiftrt_8"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (const_int 8)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.b\\t%x1,%x0"
+  [(set_attr "length" "2")])
+
+;; extzv:SI
+
+(define_insn "*extzv_8_8"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "?0,r")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "@
+   mov.b\\t%x1,%w0\;extu.w\\t%f0\;extu.l\\t%S0
+   sub.l\\t%S0,%S0\;mov.b\\t%x1,%w0"
+  [(set_attr "cc" "set_znv,clobber")
+   (set_attr "length" "6,4")])
+
+(define_insn "*extzv_8_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 8)
+			 (const_int 16)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e1,%f0\;extu.w\\t%f0\;extu.l\\t%S0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "6")])
+
+(define_insn "*extzv_16_8"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 16)
+			 (const_int 8)))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_H8300H"
+  "mov.w\\t%e1,%f2\;mov.b\\t%x1,%w0\;mov.b\\t%w2,%x0\;extu.l\\t%S0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "set_znv")])
+
+;; Extract the exponent of a float.
+
+(define_insn_and_split "*extzv_8_23"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "0")
+			 (const_int 8)
+			 (const_int 23)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 1)))
+	      (clobber (scratch:QI))])
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:SI (match_dup 0)
+				(const_int 24)))
+	      (clobber (scratch:QI))])]
+  "")
+
+;; and:SI
+
+;; ((SImode) HImode) << 15
+
+(define_insn_and_split "*twoshifts_l16_r1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (const_int 15))
+		(const_int 2147450880)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])
+   (parallel [(set (match_dup 0)
+		   (lshiftrt:SI (match_dup 0)
+				(const_int 1)))
+	      (clobber (scratch:QI))])]
+  "")
+
+;; Transform (SImode << B) & 0xffff into (SImode) (HImode << B).
+
+(define_insn_and_split "*andsi3_ashift_n_lower"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0,0")
+			   (match_operand:QI 2 "const_int_operand" "S,n"))
+		(match_operand:SI 3 "const_int_operand" "n,n")))
+   (clobber (match_scratch:QI 4 "=X,&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[2]) <= 15
+   && INTVAL (operands[3]) == ((-1 << INTVAL (operands[2])) & 0xffff)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 5)
+		   (ashift:HI (match_dup 5)
+			      (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(zero_extend:SI (match_dup 5)))]
+  "operands[5] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+;; Accept (A >> 30) & 2 and the like.
+
+(define_insn "*andsi3_lshiftrt_n_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			     (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "single_one_operand" "n")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && exact_log2 (INTVAL (operands[3])) < 16
+   && INTVAL (operands[2]) + exact_log2 (INTVAL (operands[3])) == 31"
+  "*
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return \"shll.l\\t%S0\;xor.l\\t%S0,%S0\;bst\\t%Z3,%Y0\";
+}"
+  [(set_attr "length" "8")])
+
+(define_insn_and_split "*andsi3_lshiftrt_9_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			     (const_int 9))
+		(const_int 4194304)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(and:SI (lshiftrt:SI (match_dup 0)
+			     (const_int 25))
+		(const_int 64)))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])]
+  "")
+
+;; plus:SI
+
+(define_insn "*addsi3_upper"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (const_int 65536))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "add.w\\t%f1,%e0"
+  [(set_attr "length" "2")])
+
+(define_insn "*addsi3_lshiftrt_16_zexthi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+			      (const_int 16))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "0"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "add.w\\t%e1,%f0\;xor.w\\t%e0,%e0\;rotxl.w\\t%e0"
+  [(set_attr "length" "6")])
+
+(define_insn_and_split "*addsi3_and_r_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 1))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (cc0) (compare (zero_extract:SI (match_dup 1)
+					(const_int 1)
+					(const_int 0))
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (eq (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 3))
+		      (pc)))
+   (set (match_dup 2)
+        (plus:SI (match_dup 2)
+		 (const_int 1)))
+   (match_dup 3)]
+  "operands[3] = gen_label_rtx ();")
+
+(define_insn_and_split "*addsi3_and_not_r_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+			 (const_int 1))
+		 (match_operand:SI 2 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (cc0) (compare (zero_extract:SI (match_dup 1)
+					(const_int 1)
+					(const_int 0))
+		       (const_int 0)))
+   (set (pc)
+        (if_then_else (ne (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 3))
+		      (pc)))
+   (set (match_dup 2)
+        (plus:SI (match_dup 2)
+		 (const_int 1)))
+   (match_dup 3)]
+  "operands[3] = gen_label_rtx ();")
+
+;; [ix]or:HI
+
+(define_insn "*ixorhi3_zext"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "iorxor_operator"
+	  [(zero_extend:HI (match_operand:QI 2 "register_operand" "r"))
+	   (match_operand:HI 3 "register_operand" "0")]))]
+  ""
+  "%c1.b\\t%X2,%s0"
+  [(set_attr "length" "2")])
+
+;; [ix]or:SI
+
+(define_insn "*ixorsi3_zext_qi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	  [(zero_extend:SI (match_operand:QI 2 "register_operand" "r"))
+	   (match_operand:SI 3 "register_operand" "0")]))]
+  ""
+  "%c1.b\\t%X2,%w0"
+  [(set_attr "length" "2")])
+
+(define_insn "*ixorsi3_zext_hi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	  [(zero_extend:SI (match_operand:HI 2 "register_operand" "r"))
+	   (match_operand:SI 3 "register_operand" "0")]))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "%c1.w\\t%T2,%f0"
+  [(set_attr "length" "2")])
+
+(define_insn "*ixorsi3_ashift_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	  [(ashift:SI (match_operand:SI 2 "register_operand" "r")
+		      (const_int 16))
+	   (match_operand:SI 3 "register_operand" "0")]))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "%c1.w\\t%f2,%e0"
+  [(set_attr "length" "2")])
+
+(define_insn "*ixorsi3_lshiftrt_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "iorxor_operator"
+	  [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			(const_int 16))
+	   (match_operand:SI 3 "register_operand" "0")]))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "%c1.w\\t%e2,%f0"
+  [(set_attr "length" "2")])
+
+;; ior:HI
+
+(define_insn "*iorhi3_ashift_8"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (ashift:HI (match_operand:HI 1 "register_operand" "r")
+			   (const_int 8))
+		(match_operand:HI 2 "register_operand" "0")))]
+  ""
+  "or.b\\t%s1,%t0"
+  [(set_attr "length" "2")])
+
+(define_insn "*iorhi3_lshiftrt_8"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (lshiftrt:HI (match_operand:HI 1 "register_operand" "r")
+			     (const_int 8))
+		(match_operand:HI 2 "register_operand" "0")))]
+  ""
+  "or.b\\t%t1,%s0"
+  [(set_attr "length" "2")])
+
+(define_insn "*iorhi3_two_qi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "0"))
+		(ashift:HI (match_operand:HI 2 "register_operand" "r")
+			   (const_int 8))))]
+  ""
+  "mov.b\\t%s2,%t0"
+  [(set_attr "length" "2")])
+
+(define_insn "*iorhi3_two_qi_mem"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(ior:HI (zero_extend:HI (match_operand:QI 1 "memory_operand" "m"))
+		(ashift:HI (subreg:HI (match_operand:QI 2 "memory_operand" "m") 0)
+			   (const_int 8))))]
+  ""
+  "mov.b\\t%X2,%t0\;mov.b\\t%X1,%s0"
+  [(set_attr "length" "16")])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ior:HI (zero_extend:HI (match_operand:QI 1 "memory_operand" ""))
+		(ashift:HI (subreg:HI (match_operand:QI 2 "memory_operand" "") 0)
+			   (const_int 8))))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && reload_completed
+   && byte_accesses_mergeable_p (XEXP (operands[2], 0), XEXP (operands[1], 0))"
+  [(set (match_dup 0)
+	(match_dup 3))]
+  "operands[3] = gen_rtx_MEM (HImode, XEXP (operands[2], 0));")
+
+;; ior:SI
+
+(define_insn "*iorsi3_two_hi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 16))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%f2,%e0"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*iorsi3_two_qi_zext"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ior:SI (zero_extend:SI (match_operand:QI 1 "memory_operand" "m"))
+
+		(and:SI (ashift:SI (subreg:SI (match_operand:QI 2 "memory_operand" "m") 0)
+				   (const_int 8))
+			(const_int 65280))))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(ior:HI (zero_extend:HI (match_dup 1))
+		(ashift:HI (subreg:HI (match_dup 2) 0)
+			   (const_int 8))))
+   (set (match_dup 0)
+	(zero_extend:SI (match_dup 3)))]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+(define_insn "*iorsi3_e2f"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int -65536))
+		(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			     (const_int 16))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e2,%f0"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*iorsi3_two_qi_sext"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extend:SI (match_operand:QI 1 "register_operand" "0"))
+		(ashift:SI (sign_extend:SI (match_operand:QI 2 "register_operand" "r"))
+			   (const_int 8))))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(ior:HI (zero_extend:HI (match_dup 1))
+		(ashift:HI (match_dup 4)
+			   (const_int 8))))
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 3)))]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+   operands[4] = gen_rtx_REG (HImode, REGNO (operands[2]));")
+
+(define_insn "*iorsi3_w"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0,0")
+			(const_int -256))
+		(zero_extend:SI (match_operand:QI 2 "general_operand_src" "r,g>"))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.b\\t%X2,%w0"
+  [(set_attr "length" "2,8")])
+
+(define_insn "*iorsi3_ashift_31"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 31))
+		(match_operand:SI 2 "register_operand" "0")))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "rotxl.l\\t%S0\;bor\\t#0,%w1\;rotxr.l\\t%S0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "*iorsi3_and_ashift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+				   (match_operand:SI 2 "const_int_operand" "n"))
+			(match_operand:SI 3 "single_one_operand" "n"))
+		(match_operand:SI 4 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && (INTVAL (operands[3]) & ~0xffff) == 0"
+  "*
+{
+  rtx srcpos = GEN_INT (exact_log2 (INTVAL (operands[3]))
+			- INTVAL (operands[2]));
+  rtx dstpos = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  operands[2] = srcpos;
+  operands[3] = dstpos;
+  return \"bld\\t%Z2,%Y1\;bor\\t%Z3,%Y0\;bst\\t%Z3,%Y0\";
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "*iorsi3_and_lshiftrt"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (match_operand:SI 2 "const_int_operand" "n"))
+			(match_operand:SI 3 "single_one_operand" "n"))
+		(match_operand:SI 4 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && ((INTVAL (operands[3]) << INTVAL (operands[2])) & ~0xffff) == 0"
+  "*
+{
+  rtx srcpos = GEN_INT (exact_log2 (INTVAL (operands[3]))
+			+ INTVAL (operands[2]));
+  rtx dstpos = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  operands[2] = srcpos;
+  operands[3] = dstpos;
+  return \"bld\\t%Z2,%Y1\;bor\\t%Z3,%Y0\;bst\\t%Z3,%Y0\";
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "*iorsi3_zero_extract"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+				 (const_int 1)
+				 (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[2]) < 16"
+  "bld\\t%Z2,%Y1\;bor\\t#0,%w0\;bst\\t#0,%w0"
+  [(set_attr "length" "6")])
+
+(define_insn "*iorsi3_and_lshiftrt_n_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (const_int 30))
+			(const_int 2))
+		(match_operand:SI 2 "register_operand" "0")))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "rotl.l\\t%S1\;rotr.l\\t%S1\;bor\\t#1,%w0\;bst\\t#1,%w0"
+  [(set_attr "length" "8")])
+
+(define_insn "*iorsi3_and_lshiftrt_9_sb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (const_int 9))
+			(const_int 4194304))
+		(match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:HI 3 "=&r"))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "*
+{
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return \"shll.l\\t%S1\;xor.w\\t%T3,%T3\;bst\\t#6,%s3\;or.w\\t%T3,%e0\";
+  else
+    return \"rotl.l\\t%S1\;rotr.l\\t%S1\;xor.w\\t%T3,%T3\;bst\\t#6,%s3\;or.w\\t%T3,%e0\";
+}"
+  [(set_attr "length" "10")])
+
+;; Used to OR the exponent of a float.
+
+(define_insn "*iorsi3_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (const_int 23))
+		(match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 23))
+		(match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && epilogue_completed
+   && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+   && REGNO (operands[0]) != REGNO (operands[1])"
+  [(parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 1)
+			   (const_int 16))
+		(match_dup 0)))]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[1]));")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 23))
+		(match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && epilogue_completed
+   && !(find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+        && REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 2)
+			   (const_int 16))
+		(match_dup 0)))]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[2]));")
+
+(define_insn "*iorsi2_and_1_lshiftrt_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "register_operand" "0")
+			(const_int 1))
+		(lshiftrt:SI (match_dup 1)
+			     (const_int 1))))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "shlr.l\\t%S0\;bor\\t#0,%w0\;bst\\t#0,%w0"
+  [(set_attr "length" "6")])
+
+(define_insn_and_split "*iorsi3_ashift_16_ashift_24"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (const_int 16))
+		(ashift:SI (match_operand:SI 2 "register_operand" "r")
+			   (const_int 24))))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (ior:HI (ashift:HI (match_dup 4)
+			   (const_int 8))
+		(match_dup 3)))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+   operands[4] = gen_rtx_REG (HImode, REGNO (operands[2]));")
+
+(define_insn_and_split "*iorsi3_ashift_16_ashift_24_mem"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ior:SI (and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 16))
+			(const_int 16711680))
+		(ashift:SI (subreg:SI (match_operand:QI 2 "memory_operand" "m") 0)
+			   (const_int 24))))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+        (ior:HI (zero_extend:HI (match_dup 1))
+		(ashift:HI (subreg:HI (match_dup 2) 0)
+			   (const_int 8))))
+   (parallel [(set (match_dup 0)
+		   (ashift:SI (match_dup 0)
+			      (const_int 16)))
+	      (clobber (scratch:QI))])]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+;; Used to add the exponent of a float.
+
+(define_insn "*addsi3_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (const_int 8388608))
+		 (match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8388608))
+		 (match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && epilogue_completed
+   && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+   && REGNO (operands[0]) != REGNO (operands[1])"
+  [(parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 1)
+			  (const_int 65536))
+		 (match_dup 0)))]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[1]));")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8388608))
+		 (match_dup 0)))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && epilogue_completed
+   && !(find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
+        && REGNO (operands[0]) != REGNO (operands[1]))"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (parallel [(set (match_dup 3)
+		   (ashift:HI (match_dup 3)
+			      (const_int 7)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 2)
+			  (const_int 65536))
+		 (match_dup 0)))]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[2]));")
+
+;; ashift:SI
+
+(define_insn_and_split "*ashiftsi_sextqi_7"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (sign_extend:SI (match_operand:QI 1 "register_operand" "0"))
+		   (const_int 7)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 2)
+		   (ashift:HI (match_dup 2)
+			      (const_int 8)))
+	      (clobber (scratch:QI))])
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 2)))
+   (parallel [(set (match_dup 0)
+		   (ashiftrt:SI (match_dup 0)
+				(const_int 1)))
+	      (clobber (scratch:QI))])]
+  "operands[2] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+;; Storing a part of HImode to QImode.
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:HI (match_operand:HI 1 "register_operand" "r")
+				(const_int 8)) 1))]
+  ""
+  "mov.b\\t%t1,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "8")])
+
+;; Storing a part of SImode to QImode.
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				(const_int 8)) 3))]
+  ""
+  "mov.b\\t%x1,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				(const_int 16)) 3))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e1,%f2\;mov.b\\t%w2,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "10")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "general_operand_dst" "=rm<")
+	(subreg:QI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				(const_int 24)) 3))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_H8300H || TARGET_H8300S"
+  "mov.w\\t%e1,%f2\;mov.b\\t%x2,%R0"
+  [(set_attr "cc" "set_znv")
+   (set_attr "length" "10")])
+
+(define_insn_and_split ""
+  [(set (pc)
+	(if_then_else (eq (zero_extract:SI (subreg:SI (match_operand:QI 0 "register_operand" "") 0)
+					   (const_int 1)
+					   (const_int 7))
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "#"
+  ""
+  [(set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+(define_insn_and_split ""
+  [(set (pc)
+	(if_then_else (ne (zero_extract:SI (subreg:SI (match_operand:QI 0 "register_operand" "") 0)
+					   (const_int 1)
+					   (const_int 7))
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "#"
+  ""
+  [(set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "")
+
+;; -----------------------------------------------------------------
+;; PEEPHOLE PATTERNS
+;; -----------------------------------------------------------------
+
+;; Convert (A >> B) & C to (A & 255) >> B if C == 255 >> B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (lshiftrt:HI (match_dup 0)
+				(match_operand:HI 1 "const_int_operand" "")))
+	      (clobber (match_operand:HI 2 "" ""))])
+   (set (match_dup 0)
+	(and:HI (match_dup 0)
+		(match_operand:HI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 >> INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:HI (match_dup 0)
+		(const_int 255)))
+   (parallel
+     [(set (match_dup 0)
+	   (lshiftrt:HI (match_dup 0)
+			(match_dup 1)))
+      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A << B) & C to (A & 255) << B if C == 255 << B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (ashift:HI (match_dup 0)
+			      (match_operand:HI 1 "const_int_operand" "")))
+	      (clobber (match_operand:HI 2 "" ""))])
+   (set (match_dup 0)
+	(and:HI (match_dup 0)
+		(match_operand:HI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 << INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:HI (match_dup 0)
+		(const_int 255)))
+   (parallel
+     [(set (match_dup 0)
+	   (ashift:HI (match_dup 0)
+		      (match_dup 1)))
+      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A >> B) & C to (A & 255) >> B if C == 255 >> B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (lshiftrt:SI (match_dup 0)
+				(match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 >> INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 255)))
+   (parallel
+     [(set (match_dup 0)
+	   (lshiftrt:SI (match_dup 0)
+			(match_dup 1)))
+      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A << B) & C to (A & 255) << B if C == 255 << B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashift:SI (match_dup 0)
+			      (match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (255 << INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 255)))
+   (parallel
+     [(set (match_dup 0)
+	   (ashift:SI (match_dup 0)
+		      (match_dup 1)))
+      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A >> B) & C to (A & 65535) >> B if C == 65535 >> B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (lshiftrt:SI (match_dup 0)
+				(match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (65535 >> INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 65535)))
+   (parallel
+     [(set (match_dup 0)
+	   (lshiftrt:SI (match_dup 0)
+			(match_dup 1)))
+      (clobber (match_dup 2))])]
+  "")
+
+;; Convert (A << B) & C to (A & 65535) << B if C == 65535 << B.
+
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashift:SI (match_dup 0)
+			      (match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (match_operand:SI 2 "" ""))])
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "INTVAL (operands[3]) == (65535 << INTVAL (operands[1]))"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 65535)))
+   (parallel
+     [(set (match_dup 0)
+	   (ashift:SI (match_dup 0)
+		      (match_dup 1)))
+      (clobber (match_dup 2))])]
+  "")
+
+;; Convert a QImode push into an SImode push so that the
+;; define_peephole2 below can cram multiple pushes into one stm.l.
+
+(define_peephole2
+  [(parallel [(set (reg:SI SP_REG)
+                   (plus:SI (reg:SI SP_REG) (const_int -4)))
+              (set (mem:QI (plus:SI (reg:SI SP_REG) (const_int -3)))
+                   (match_operand:QI 0 "register_operand" ""))])]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  "operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
+(define_peephole2
+  [(parallel [(set (reg:HI SP_REG)
+                   (plus:HI (reg:HI SP_REG) (const_int -4)))
+              (set (mem:QI (plus:HI (reg:HI SP_REG) (const_int -3)))
+                   (match_operand:QI 0 "register_operand" ""))])]
+  "TARGET_H8300S && TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_dup 0))]
+  "operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
+;; Convert a HImode push into an SImode push so that the
+;; define_peephole2 below can cram multiple pushes into one stm.l.
+
+(define_peephole2
+  [(parallel [(set (reg:SI SP_REG)
+                   (plus:SI (reg:SI SP_REG) (const_int -4)))
+              (set (mem:HI (plus:SI (reg:SI SP_REG) (const_int -2)))
+                   (match_operand:HI 0 "register_operand" ""))])]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  "operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
+(define_peephole2
+  [(parallel [(set (reg:HI SP_REG)
+                   (plus:HI (reg:HI SP_REG) (const_int -4)))
+              (set (mem:HI (plus:HI (reg:HI SP_REG) (const_int -2)))
+                   (match_operand:HI 0 "register_operand" ""))])]
+  "TARGET_H8300S && TARGET_NORMAL_MODE && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_dup 0))]
+  "operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
+;; Cram four pushes into stm.l.
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[3])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && REGNO (operands[3]) == REGNO (operands[0]) + 3
+       && (TARGET_H8300SX || REGNO (operands[0]) == 0))"
+  [(parallel [(set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -16)))
+		   (match_dup 3))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (const_int -16)))])]
+  "")
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_H8300S && TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[3])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && REGNO (operands[3]) == REGNO (operands[0]) + 3
+       && (TARGET_H8300SX || REGNO (operands[0]) == 0))"
+  [(parallel [(set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -16)))
+		   (match_dup 3))
+	      (set (reg:HI SP_REG)
+		   (plus:HI (reg:HI SP_REG)
+			    (const_int -16)))])]
+  "")
+
+;; Cram three pushes into stm.l.
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[2])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 3) == 0))"
+  [(parallel [(set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (const_int -12)))])]
+  "")
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 2 "register_operand" ""))]
+  "TARGET_H8300S && TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[2])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && REGNO (operands[2]) == REGNO (operands[0]) + 2
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 3) == 0))"
+  [(parallel [(set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -12)))
+		   (match_dup 2))
+	      (set (reg:HI SP_REG)
+		   (plus:HI (reg:HI SP_REG)
+			    (const_int -12)))])]
+  "")
+
+;; Cram two pushes into stm.l.
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_H8300S && !TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[1])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 1) == 0))"
+  [(parallel [(set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (const_int -8)))])]
+  "")
+
+(define_peephole2
+  [(set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 0 "register_operand" ""))
+   (set (mem:SI (pre_dec:HI (reg:HI SP_REG)))
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_H8300S && TARGET_NORMAL_MODE
+   && (REGNO_REG_CLASS (REGNO (operands[1])) == GENERAL_REGS
+       && REGNO (operands[1]) == REGNO (operands[0]) + 1
+       && (TARGET_H8300SX || (REGNO (operands[0]) & 1) == 0))"
+  [(parallel [(set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -4)))
+		   (match_dup 0))
+	      (set (mem:SI (plus:HI (reg:HI SP_REG) (const_int -8)))
+		   (match_dup 1))
+	      (set (reg:HI SP_REG)
+		   (plus:HI (reg:HI SP_REG)
+			    (const_int -8)))])]
+  "")
+
+;; Turn
+;;
+;;   mov.w #2,r0
+;;   add.w r7,r0  (6 bytes)
+;;
+;; into
+;;
+;;   mov.w r7,r0
+;;   adds  #2,r0  (4 bytes)
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 2 "register_operand" "")))]
+  "REG_P (operands[0]) && REG_P (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && (CONST_OK_FOR_J (INTVAL (operands[1]))
+       || CONST_OK_FOR_L (INTVAL (operands[1]))
+       || CONST_OK_FOR_N (INTVAL (operands[1])))"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_dup 1)))]
+  "")
+
+;; Turn
+;;
+;;   sub.l  er0,er0
+;;   add.b  #4,r0l
+;;   add.l  er7,er0  (6 bytes)
+;;
+;; into
+;;
+;;   mov.l  er7,er0
+;;   adds   #4,er0   (4 bytes)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "register_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && REG_P (operands[0]) && REG_P (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && (CONST_OK_FOR_L (INTVAL (operands[1]))
+       || CONST_OK_FOR_N (INTVAL (operands[1])))"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "")
+
+;; Turn
+;;
+;;   mov.l er7,er0
+;;   add.l #10,er0  (takes 8 bytes)
+;;
+;; into
+;;
+;;   sub.l er0,er0
+;;   add.b #10,r0l
+;;   add.l er7,er0  (takes 6 bytes)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "const_int_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && !CONST_OK_FOR_L (INTVAL (operands[2]))
+   && !CONST_OK_FOR_N (INTVAL (operands[2]))
+   && ((INTVAL (operands[2]) & 0xff) == INTVAL (operands[2])
+       || (INTVAL (operands[2]) & 0xff00) == INTVAL (operands[2])
+       || INTVAL (operands[2]) == 0xffff
+       || INTVAL (operands[2]) == 0xfffe)"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  "")
+
+;; Turn
+;;
+;;   subs   #1,er4
+;;   mov.w  r4,r4
+;;   bne    .L2028
+;;
+;; into
+;;
+;;   dec.w  #1,r4
+;;   bne    .L2028
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand 1 "incdec_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(unspec:HI [(match_dup 0)
+		    (match_dup 1)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; The SImode version of the previous pattern.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0)
+		 (match_operand 1 "incdec_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_dup 0)
+		    (match_dup 1)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+(define_peephole2
+  [(parallel [(set (cc0)
+		   (compare (zero_extract:SI (match_operand:QI 0 "register_operand" "")
+					     (const_int 1)
+					     (const_int 7))
+			    (const_int 0)))
+	      (clobber (scratch:QI))])
+   (set (pc)
+	(if_then_else (match_operator 1 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  [(set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[3] = ((GET_CODE (operands[1]) == EQ)
+		  ? gen_rtx_GE (VOIDmode, cc0_rtx, const0_rtx)
+		  : gen_rtx_LT (VOIDmode, cc0_rtx, const0_rtx));")
+
+;; The next three peephole2's will try to transform
+;;
+;;   mov.b A,r0l    (or mov.l A,er0)
+;;   and.l #CST,er0
+;;
+;; into
+;;
+;;   sub.l er0
+;;   mov.b A,r0l
+;;   and.b #CST,r0l (if CST is not 255)
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand" "")
+	(match_operand:QI 1 "general_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(and:SI (match_dup 2)
+		(const_int 255)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !reg_overlap_mentioned_p (operands[2], operands[1])
+   && REGNO (operands[0]) == REGNO (operands[2])"
+  [(set (match_dup 2)
+	(const_int 0))
+   (set (strict_low_part (match_dup 0))
+	(match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "general_operand" ""))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 255)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !(GET_CODE (operands[1]) == MEM && !offsettable_memref_p (operands[1]))
+   && !(GET_CODE (operands[1]) == MEM && MEM_VOLATILE_P (operands[1]))"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (strict_low_part (match_dup 2))
+	(match_dup 3))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);
+   operands[3] = gen_lowpart (QImode, operands[1]);")
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "general_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+	(and:SI (match_dup 2)
+		(match_operand:SI 3 "const_int_qi_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && (GET_MODE (operands[0]) == QImode
+       || GET_MODE (operands[0]) == HImode
+       || GET_MODE (operands[0]) == SImode)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && !reg_overlap_mentioned_p (operands[2], operands[1])
+   && !(GET_MODE (operands[1]) != QImode
+	&& GET_CODE (operands[1]) == MEM
+	&& !offsettable_memref_p (operands[1]))
+   && !(GET_MODE (operands[1]) != QImode
+	&& GET_CODE (operands[1]) == MEM
+	&& MEM_VOLATILE_P (operands[1]))"
+  [(set (match_dup 2)
+	(const_int 0))
+   (set (strict_low_part (match_dup 4))
+	(match_dup 5))
+   (set (match_dup 2)
+	(and:SI (match_dup 2)
+		(match_dup 6)))]
+  "operands[4] = gen_lowpart (QImode, operands[0]);
+   operands[5] = gen_lowpart (QImode, operands[1]);
+   operands[6] = GEN_INT (~0xff | INTVAL (operands[3]));")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(const_int 65280)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (zero_extract:SI (match_dup 0)
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_dup 1)
+		     (const_int 8)))]
+  "")
+
+;; If a load of mem:SI is followed by an AND that turns off the upper
+;; half, then we can load mem:HI instead.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "memory_operand" ""))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_operand:SI 2 "const_int_operand" "")))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !MEM_VOLATILE_P (operands[1])
+   && offsettable_memref_p (operands[1])
+   && (INTVAL (operands[2]) & ~0xffff) == 0
+   && INTVAL (operands[2]) != 255"
+  [(set (match_dup 3)
+	(match_dup 4))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_dup 2)))]
+  "operands[3] = gen_lowpart (HImode, operands[0]);
+   operands[4] = gen_lowpart (HImode, operands[1]);")
+
+;; Convert a memory comparison to a move if there is a scratch register.
+
+(define_peephole2
+  [(match_scratch:QI 1 "r")
+   (set (cc0)
+	(compare (match_operand:QI 0 "memory_operand" "")
+		 (const_int 0)))]
+  ""
+  [(set (match_dup 1)
+	(match_dup 0))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:HI 1 "r")
+   (set (cc0)
+	(compare (match_operand:HI 0 "memory_operand" "")
+		 (const_int 0)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  [(set (match_dup 1)
+	(match_dup 0))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:SI 1 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "memory_operand" "")
+		 (const_int 0)))]
+  "(TARGET_H8300H || TARGET_H8300S)"
+  [(set (match_dup 1)
+	(match_dup 0))
+   (set (cc0) (compare (match_dup 1)
+		       (const_int 0)))]
+  "")
+
+
+;; (compare (reg:HI) (const_int)) takes 4 bytes, so we try to achieve
+;; the equivalent with shorter sequences.  Here is the summary.  Cases
+;; are grouped for each define_peephole2.
+;;
+;; reg  const_int                   use     insn
+;; --------------------------------------------------------
+;; dead    -2                       eq/ne   inc.l
+;; dead    -1                       eq/ne   inc.l
+;; dead     1                       eq/ne   dec.l
+;; dead     2                       eq/ne   dec.l
+;;
+;; dead     1                       ge/lt shar.l
+;; dead     3 (H8S)                 ge/lt shar.l
+;;
+;; dead     1                       geu/ltu shar.l
+;; dead     3 (H8S)                 geu/ltu shar.l
+;;
+;; ----   255                       ge/lt mov.b
+;;
+;; ----   255                       geu/ltu mov.b
+
+;; Transform
+;;
+;;	cmp.w	#1,r0
+;;	bne	.L1
+;;
+;; into
+;;
+;;	dec.w	#1,r0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (match_operand:HI 1 "incdec_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) != 0
+   && peep2_reg_dead_p (1, operands[0])"
+  [(set (match_dup 0)
+	(unspec:HI [(match_dup 0)
+		    (match_dup 4)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = GEN_INT (- INTVAL (operands[1]));")
+
+;; Transform
+;;
+;;	cmp.w	#1,r0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	shar.w	r0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (match_operand:HI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:HI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));")
+
+;; Transform
+;;
+;;	cmp.w	#1,r0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	shar.w	r0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (match_operand:HI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:HI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 5)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				VOIDmode,
+				cc0_rtx,
+				const0_rtx);
+})
+
+;; Transform
+;;
+;;	cmp.w	#255,r0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	mov.b	r0h,r0h
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (const_int 255)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtle_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:HI (match_dup 0)
+			       (const_int -256))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; Transform
+;;
+;;	cmp.w	#255,r0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	mov.b	r0h,r0h
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:HI 0 "register_operand" "")
+		 (const_int 255)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtuleu_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:HI (match_dup 0)
+			       (const_int -256))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 3)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+{
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == GTU ? NE : EQ,
+				VOIDmode,
+				cc0_rtx,
+				const0_rtx);
+})
+
+;; (compare (reg:SI) (const_int)) takes 6 bytes, so we try to achieve
+;; the equivalent with shorter sequences.  Here is the summary.  Cases
+;; are grouped for each define_peephole2.
+;;
+;; reg  const_int                   use     insn
+;; --------------------------------------------------------
+;; live    -2                       eq/ne   copy and inc.l
+;; live    -1                       eq/ne   copy and inc.l
+;; live     1                       eq/ne   copy and dec.l
+;; live     2                       eq/ne   copy and dec.l
+;;
+;; dead    -2                       eq/ne   inc.l
+;; dead    -1                       eq/ne   inc.l
+;; dead     1                       eq/ne   dec.l
+;; dead     2                       eq/ne   dec.l
+;;
+;; dead -131072                     eq/ne   inc.w and test
+;; dead  -65536                     eq/ne   inc.w and test
+;; dead   65536                     eq/ne   dec.w and test
+;; dead  131072                     eq/ne   dec.w and test
+;;
+;; dead 0x000000?? except 1 and 2   eq/ne   xor.b and test
+;; dead 0x0000??00                  eq/ne   xor.b and test
+;; dead 0x0000ffff                  eq/ne   not.w and test
+;;
+;; dead 0xffffff?? except -1 and -2 eq/ne   xor.b and not.l
+;; dead 0xffff??ff                  eq/ne   xor.b and not.l
+;; dead 0x40000000 (H8S)            eq/ne   rotl.l and dec.l
+;; dead 0x80000000                  eq/ne   rotl.l and dec.l
+;;
+;; live     1                       ge/lt copy and shar.l
+;; live     3 (H8S)                 ge/lt copy and shar.l
+;;
+;; live     1                       geu/ltu copy and shar.l
+;; live     3 (H8S)                 geu/ltu copy and shar.l
+;;
+;; dead     1                       ge/lt shar.l
+;; dead     3 (H8S)                 ge/lt shar.l
+;;
+;; dead     1                       geu/ltu shar.l
+;; dead     3 (H8S)                 geu/ltu shar.l
+;;
+;; dead     3 (H8/300H)             ge/lt and.b and test
+;; dead     7                       ge/lt and.b and test
+;; dead    15                       ge/lt and.b and test
+;; dead    31                       ge/lt and.b and test
+;; dead    63                       ge/lt and.b and test
+;; dead   127                       ge/lt and.b and test
+;; dead   255                       ge/lt and.b and test
+;;
+;; dead     3 (H8/300H)             geu/ltu and.b and test
+;; dead     7                       geu/ltu and.b and test
+;; dead    15                       geu/ltu and.b and test
+;; dead    31                       geu/ltu and.b and test
+;; dead    63                       geu/ltu and.b and test
+;; dead   127                       geu/ltu and.b and test
+;; dead   255                       geu/ltu and.b and test
+;;
+;; ---- 65535                       ge/lt mov.w
+;;
+;; ---- 65535                       geu/ltu mov.w
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	dec.l	#1,er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "incdec_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) != 0
+   && peep2_reg_dead_p (1, operands[0])"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 0)
+		    (match_dup 4)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = GEN_INT (- INTVAL (operands[1]));")
+
+;; Transform
+;;
+;;	cmp.l	#65536,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	dec.l	#1,e0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == -131072
+       || INTVAL (operands[1]) == -65536
+       || INTVAL (operands[1]) == 65536
+       || INTVAL (operands[1]) == 131072)"
+  [(set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 4)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = GEN_INT (- INTVAL (operands[1]));")
+
+;; Transform
+;;
+;;	cmp.l	#100,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	xor.b	#100,er0
+;;	mov.l	er0,er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && ((INTVAL (operands[1]) & 0x00ff) == INTVAL (operands[1])
+       || (INTVAL (operands[1]) & 0xff00) == INTVAL (operands[1])
+       || INTVAL (operands[1]) == 0x0000ffff)
+   && INTVAL (operands[1]) != 0
+   && INTVAL (operands[1]) != 1
+   && INTVAL (operands[1]) != 2"
+  [(set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(match_dup 1)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; Transform
+;;
+;;	cmp.l	#-100,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	xor.b	#99,er0
+;;	not.l	er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && ((INTVAL (operands[1]) | 0x00ff) == -1
+	|| (INTVAL (operands[1]) | 0xff00) == -1)
+   && INTVAL (operands[1]) != -1
+   && INTVAL (operands[1]) != -2"
+  [(set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(match_dup 4)))
+   (set (match_dup 0)
+	(not:SI (match_dup 0)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = GEN_INT (INTVAL (operands[1]) ^ -1);")
+
+;; Transform
+;;
+;;	cmp.l	#-2147483648,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	rotl.l	er0
+;;	dec.l	#1,er0
+;;	beq	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == -2147483647 - 1
+       || (TARGET_H8300S && INTVAL (operands[1]) == 1073741824))"
+  [(set (match_dup 0)
+	(rotate:SI (match_dup 0)
+		   (match_dup 4)))
+   (set (match_dup 0)
+	(unspec:SI [(match_dup 0)
+		    (const_int -1)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = GEN_INT (INTVAL (operands[1]) == -2147483647 - 1 ? 1 : 2);")
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	mov.l	er0,er1
+;;	shar.l	er1
+;;	bgt	.L1
+
+;; We avoid this transformation if we see more than one copy of the
+;; same compare insn immediately before this one.
+
+(define_peephole2
+  [(match_scratch:SI 4 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))
+   && !same_cmp_preceding_p (insn)"
+  [(set (match_dup 4)
+	(match_dup 0))
+   (parallel [(set (match_dup 4)
+		   (ashiftrt:SI (match_dup 4)
+				(match_dup 5)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "operands[5] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));")
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	mov.l	er0,er1
+;;	shar.l	er1
+;;	bne	.L1
+
+;; We avoid this transformation if we see more than one copy of the
+;; same compare insn immediately before this one.
+
+(define_peephole2
+  [(match_scratch:SI 4 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && !peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))
+   && !same_cmp_preceding_p (insn)"
+  [(set (match_dup 4)
+	(match_dup 0))
+   (parallel [(set (match_dup 4)
+		   (ashiftrt:SI (match_dup 4)
+				(match_dup 5)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 6)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[5] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				VOIDmode,
+				cc0_rtx,
+				const0_rtx);
+})
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	shar.l	er0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:SI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));")
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	shar.l	er0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && (INTVAL (operands[1]) == 1
+	|| (TARGET_H8300S && INTVAL (operands[1]) == 3))"
+  [(parallel [(set (match_dup 0)
+		   (ashiftrt:SI (match_dup 0)
+				(match_dup 4)))
+	      (clobber (scratch:QI))])
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 5)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[4] = GEN_INT (exact_log2 (INTVAL (operands[1]) + 1));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				VOIDmode,
+				cc0_rtx,
+				const0_rtx);
+})
+
+;; Transform
+;;
+;;	cmp.l	#15,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	and	#240,r0l
+;;	mov.l	er0,er0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtle_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && ((TARGET_H8300H && INTVAL (operands[1]) == 3)
+       || INTVAL (operands[1]) == 7
+       || INTVAL (operands[1]) == 15
+       || INTVAL (operands[1]) == 31
+       || INTVAL (operands[1]) == 63
+       || INTVAL (operands[1]) == 127
+       || INTVAL (operands[1]) == 255)"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_dup 4)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 2)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "operands[4] = GEN_INT (~INTVAL (operands[1]));")
+
+;; Transform
+;;
+;;	cmp.l	#15,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	and	#240,r0l
+;;	mov.l	er0,er0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "const_int_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 2 "gtuleu_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && peep2_reg_dead_p (1, operands[0])
+   && ((TARGET_H8300H && INTVAL (operands[1]) == 3)
+       || INTVAL (operands[1]) == 7
+       || INTVAL (operands[1]) == 15
+       || INTVAL (operands[1]) == 31
+       || INTVAL (operands[1]) == 63
+       || INTVAL (operands[1]) == 127
+       || INTVAL (operands[1]) == 255)"
+  [(set (match_dup 0)
+	(and:SI (match_dup 0)
+		(match_dup 4)))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 5)
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[1]));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[2]) == GTU ? NE : EQ,
+				VOIDmode,
+				cc0_rtx,
+				const0_rtx);
+})
+
+;; Transform
+;;
+;;	cmp.l	#65535,er0
+;;	bgt	.L1
+;;
+;; into
+;;
+;;	mov.l	e0,e0
+;;	bgt	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (const_int 65535)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtle_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:SI (match_dup 0)
+			       (const_int -65536))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "")
+
+;; Transform
+;;
+;;	cmp.l	#65535,er0
+;;	bhi	.L1
+;;
+;; into
+;;
+;;	mov.l	e0,e0
+;;	bne	.L1
+
+(define_peephole2
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (const_int 65535)))
+   (set (pc)
+	(if_then_else (match_operator 1 "gtuleu_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_H8300H || TARGET_H8300S"
+  [(set (cc0) (compare (and:SI (match_dup 0)
+			       (const_int -65536))
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_dup 3)
+		      (label_ref (match_dup 2))
+		      (pc)))]
+{
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == GTU ? NE : EQ,
+				VOIDmode,
+				cc0_rtx,
+				const0_rtx);
+})
+
+;; Transform
+;;
+;;	cmp.l	#1,er0
+;;	beq	.L1
+;;
+;; into
+;;
+;;	mov.l	er0,er1
+;;	dec.l	#1,er1
+;;	beq	.L1
+
+;; We avoid this transformation if we see more than one copy of the
+;; same compare insn.
+
+(define_peephole2
+  [(match_scratch:SI 4 "r")
+   (set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "")
+		 (match_operand:SI 1 "incdec_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "(TARGET_H8300H || TARGET_H8300S)
+   && INTVAL (operands[1]) != 0
+   && !peep2_reg_dead_p (1, operands[0])
+   && !same_cmp_following_p (insn)"
+  [(set (match_dup 4)
+	(match_dup 0))
+   (set (match_dup 4)
+	(unspec:SI [(match_dup 4)
+		    (match_dup 5)]
+		   UNSPEC_INCDEC))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[5] = GEN_INT (- INTVAL (operands[1]));")
+
+;; Narrow the mode of testing if possible.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_dup 0)
+		(match_operand:HI 1 "const_int_qi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4)
+	(and:QI (match_dup 4)
+		(match_dup 5)))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = gen_rtx_REG (QImode, REGNO (operands[0]));
+   operands[5] = gen_int_mode (INTVAL (operands[1]), QImode);")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "const_int_qi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4)
+	(and:QI (match_dup 4)
+		(match_dup 5)))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = gen_rtx_REG (QImode, REGNO (operands[0]));
+   operands[5] = gen_int_mode (INTVAL (operands[1]), QImode);")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "const_int_hi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 4)
+	(and:HI (match_dup 4)
+		(match_dup 5)))
+   (set (cc0) (compare (match_dup 4)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 3 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 2))
+		      (pc)))]
+  "operands[4] = gen_rtx_REG (HImode, REGNO (operands[0]));
+   operands[5] = gen_int_mode (INTVAL (operands[1]), HImode);")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "const_int_qi_operand" "")))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(match_operand:SI 2 "const_int_qi_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 4 "eqne_operator"
+			[(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "peep2_reg_dead_p (3, operands[0])
+   && (~INTVAL (operands[1]) & INTVAL (operands[2])) == 0"
+  [(set (match_dup 5)
+	(and:QI (match_dup 5)
+		(match_dup 6)))
+   (set (match_dup 5)
+	(xor:QI (match_dup 5)
+		(match_dup 7)))
+   (set (cc0) (compare (match_dup 5)
+		       (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 4 [(cc0) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+  "operands[5] = gen_rtx_REG (QImode, REGNO (operands[0]));
+   operands[6] = gen_int_mode (INTVAL (operands[1]), QImode);
+   operands[7] = gen_int_mode (INTVAL (operands[2]), QImode);")
+
+;; These triggers right at the end of allocation of locals in the
+;; prologue (and possibly at other places).
+
+;; stack adjustment of -4, generate one push
+;;
+;; before : 6 bytes, 10 clocks
+;; after  : 4 bytes, 10 clocks
+
+(define_peephole2
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -4)))
+   (set (mem:SI (reg:SI SP_REG))
+	(match_operand:SI 0 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE
+   && REGNO (operands[0]) != SP_REG"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  "")
+
+;; stack adjustment of -12, generate one push
+;;
+;; before : 10 bytes, 14 clocks
+;; after  :  8 bytes, 14 clocks
+
+(define_peephole2
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -12)))
+   (set (mem:SI (reg:SI SP_REG))
+	(match_operand:SI 0 "register_operand" ""))]
+  "(TARGET_H8300H || TARGET_H8300S) && !TARGET_NORMAL_MODE
+   && REGNO (operands[0]) != SP_REG"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -4)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int -4)))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_dup 0))]
+  "")
+
+;; Transform
+;;
+;;	mov	dst,reg
+;;	op	src,reg
+;;	mov	reg,dst
+;;
+;; into
+;;
+;;	op	src,dst
+;;
+;; if "reg" dies at the end of the sequence.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))
+   (set (match_dup 0)
+	(match_operator 2 "h8sx_binary_memory_operator"
+	   [(match_dup 0)
+	    (match_operand 3 "h8300_src_operand" "")]))
+   (set (match_operand 4 "memory_operand" "")
+	(match_dup 0))]
+  "0 /* Disable because it breaks compiling fp-bit.c.  */
+   && TARGET_H8300SX
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])
+   && h8sx_mergeable_memrefs_p (operands[4], operands[1])"
+  [(set (match_dup 4)
+	(match_dup 5))]
+  {
+    operands[5] = shallow_copy_rtx (operands[2]);
+    XEXP (operands[5], 0) = operands[1];
+  })
+
+;; Transform
+;;
+;;	mov	src,reg
+;;	op	reg,dst
+;;
+;; into
+;;
+;;	op	src,dst
+;;
+;; if "reg" dies in the second insn.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_src_operand" ""))
+   (set (match_operand 2 "h8300_dst_operand" "")
+	(match_operator 3 "h8sx_binary_memory_operator"
+	   [(match_operand 4 "h8300_dst_operand" "")
+	    (match_dup 0)]))]
+  "0 /* Disable because it breaks compiling fp-bit.c.  */
+   && TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(set (match_dup 2)
+	(match_dup 5))]
+  {
+    operands[5] = shallow_copy_rtx (operands[3]);
+    XEXP (operands[5], 1) = operands[1];
+  })
+
+;; Transform
+;;
+;;	mov	dst,reg
+;;	op	reg
+;;	mov	reg,dst
+;;
+;; into
+;;
+;;	op	dst
+;;
+;; if "reg" dies at the end of the sequence.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))
+   (set (match_dup 0)
+	(match_operator 2 "h8sx_unary_memory_operator"
+	   [(match_dup 0)]))
+   (set (match_operand 3 "memory_operand" "")
+	(match_dup 0))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])
+   && h8sx_mergeable_memrefs_p (operands[3], operands[1])"
+  [(set (match_dup 3)
+	(match_dup 4))]
+  {
+    operands[4] = shallow_copy_rtx (operands[2]);
+    XEXP (operands[4], 0) = operands[1];
+  })
+
+;; Transform
+;;
+;;	mov	src1,reg
+;;	cmp	reg,src2
+;;
+;; into
+;;
+;;	cmp	src1,src2
+;;
+;; if "reg" dies in the comparison.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_dst_operand" ""))
+   (set (cc0)
+	(compare (match_dup 0)
+		 (match_operand 2 "h8300_src_operand" "")))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && operands[2] != const0_rtx"
+  [(set (cc0)
+	(compare (match_dup 1)
+		 (match_dup 2)))])
+
+;; Likewise for the second operand.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_src_operand" ""))
+   (set (cc0)
+	(compare (match_operand 2 "h8300_dst_operand" "")
+		 (match_dup 0)))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (cc0)
+	(compare (match_dup 2)
+		 (match_dup 1)))])
+
+;; Combine two moves.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "h8300_src_operand" ""))
+   (set (match_operand 2 "h8300_dst_operand" "")
+	(match_dup 0))]
+  "TARGET_H8300SX
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 2)
+	(match_dup 1))])
diff --git a/gcc/config/h8300/h8300.opt b/gcc/config/h8300/h8300.opt
new file mode 100644
index 000000000..989375e15
--- /dev/null
+++ b/gcc/config/h8300/h8300.opt
@@ -0,0 +1,62 @@
+; Options for the Renesas H8/300 port of the compiler
+;
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+ms
+Target Mask(H8300S_1)
+Generate H8S code
+
+msx
+Target Mask(H8300SX)
+Generate H8SX code
+
+ms2600
+Target Mask(MAC)
+Generate H8S/2600 code
+
+mint32
+Target RejectNegative Mask(INT32)
+Make integers 32 bits wide
+
+maddresses
+Target Undocumented RejectNegative Mask(ADDRESSES)
+
+mquickcall
+Target Mask(QUICKCALL)
+Use registers for argument passing
+
+mslowbyte
+Target RejectNegative Mask(SLOWBYTE)
+Consider access to byte sized memory slow
+
+mrelax
+Target RejectNegative Mask(RELAX)
+Enable linker relaxing
+
+mh
+Target Mask(H8300H)
+Generate H8/300H code
+
+mn
+Target Mask(NORMAL_MODE)
+Enable the normal mode
+
+malign-300
+Target RejectNegative Mask(ALIGN_300)
+Use H8/300 alignment rules
diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm
new file mode 100644
index 000000000..1b75b7326
--- /dev/null
+++ b/gcc/config/h8300/lib1funcs.asm
@@ -0,0 +1,838 @@
+;; libgcc routines for the Renesas H8/300 CPU.
+;; Contributed by Steve Chamberlain <sac@cygnus.com>
+;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
+
+/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Assembler register definitions.  */
+
+#define A0 r0
+#define A0L r0l
+#define A0H r0h
+
+#define A1 r1
+#define A1L r1l
+#define A1H r1h
+
+#define A2 r2
+#define A2L r2l
+#define A2H r2h
+
+#define A3 r3
+#define A3L r3l
+#define A3H r3h
+
+#define S0 r4
+#define S0L r4l
+#define S0H r4h
+
+#define S1 r5
+#define S1L r5l
+#define S1H r5h
+
+#define S2 r6
+#define S2L r6l
+#define S2H r6h
+
+#ifdef __H8300__
+#define PUSHP	push
+#define POPP	pop
+
+#define A0P	r0
+#define A1P	r1
+#define A2P	r2
+#define A3P	r3
+#define S0P	r4
+#define S1P	r5
+#define S2P	r6
+#endif
+
+#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
+#define PUSHP	push.l
+#define POPP	pop.l
+
+#define A0P	er0
+#define A1P	er1
+#define A2P	er2
+#define A3P	er3
+#define S0P	er4
+#define S1P	er5
+#define S2P	er6
+
+#define A0E	e0
+#define A1E	e1
+#define A2E	e2
+#define A3E	e3
+#endif
+
+#ifdef __H8300H__
+#ifdef __NORMAL_MODE__
+	.h8300hn
+#else
+	.h8300h
+#endif
+#endif
+
+#ifdef __H8300S__
+#ifdef __NORMAL_MODE__
+	.h8300sn
+#else
+	.h8300s
+#endif
+#endif
+#ifdef __H8300SX__
+#ifdef __NORMAL_MODE__
+	.h8300sxn
+#else
+	.h8300sx
+#endif
+#endif
+
+#ifdef L_cmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___cmpsi2
+___cmpsi2:
+	cmp.w	A0,A2
+	bne	.L2
+	cmp.w	A1,A3
+	bne	.L4
+	mov.w	#1,A0
+	rts
+.L2:
+	bgt	.L5
+.L3:
+	mov.w	#2,A0
+	rts
+.L4:
+	bls	.L3
+.L5:
+	sub.w	A0,A0
+	rts
+	.end
+#endif
+#endif /* L_cmpsi2 */
+
+#ifdef L_ucmpsi2
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global ___ucmpsi2
+___ucmpsi2:
+	cmp.w	A0,A2
+	bne	.L2
+	cmp.w	A1,A3
+	bne	.L4
+	mov.w	#1,A0
+	rts
+.L2:
+	bhi	.L5
+.L3:
+	mov.w	#2,A0
+	rts
+.L4:
+	bls	.L3
+.L5:
+	sub.w	A0,A0
+	rts
+	.end
+#endif
+#endif /* L_ucmpsi2 */
+
+#ifdef L_divhi3
+
+;; HImode divides for the H8/300.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+; general purpose normalize routine
+;
+; divisor in A0
+; dividend in A1
+; turns both into +ve numbers, and leaves what the answer sign
+; should be in A2L
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+divnorm:
+	or	A0H,A0H		; is divisor > 0
+	stc	ccr,A2L
+	bge	_lab1
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0
+_lab1:	or	A1H,A1H	; look at dividend
+	bge	_lab2
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+	xor	#0x8,A2L; and toggle sign of result
+_lab2:	rts
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	or	A0H,A0H		; is divisor > 0
+	stc	ccr,A2L
+	bge	_lab7
+	not	A0H		; no - then make it +ve
+	not	A0L
+	adds	#1,A0
+_lab7:	or	A1H,A1H	; look at dividend
+	bge	_lab8
+	not	A1H		; it is -ve, make it positive
+	not	A1L
+	adds	#1,A1
+_lab8:	rts
+
+; A0=A0/A1 signed
+
+	.global	___divhi3
+___divhi3:
+	bsr	divnorm
+	bsr	___udivhi3
+negans:	btst	#3,A2L	; should answer be negative ?
+	beq	_lab4
+	not	A0H	; yes, so make it so
+	not	A0L
+	adds	#1,A0
+_lab4:	rts
+
+; A0=A0%A1 signed
+
+	.global	___modhi3
+___modhi3:
+	bsr	modnorm
+	bsr	___udivhi3
+	mov	A3,A0
+	bra	negans
+
+; A0=A0%A1 unsigned
+
+	.global	___umodhi3
+___umodhi3:
+	bsr	___udivhi3
+	mov	A3,A0
+	rts
+
+; A0=A0/A1 unsigned
+; A3=A0%A1 unsigned
+; A2H trashed
+; D high 8 bits of denom
+; d low 8 bits of denom
+; N high 8 bits of num
+; n low 8 bits of num
+; M high 8 bits of mod
+; m low 8 bits of mod
+; Q high 8 bits of quot
+; q low 8 bits of quot
+; P preserve
+
+; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
+; see how to partition up the expression.
+
+	.global	___udivhi3
+___udivhi3:
+				; A0 A1 A2 A3
+				; Nn Dd       P
+	sub.w	A3,A3		; Nn Dd xP 00
+	or	A1H,A1H
+	bne	divlongway
+	or	A0H,A0H
+	beq	_lab6
+
+; we know that D == 0 and N is != 0
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	divxu	A1L,A3		;          MQ
+	mov.b	A3L,A0H	 	; Q
+; dealt with N, do n
+_lab6:	mov.b	A0L,A3L		;           n
+	divxu	A1L,A3		;          mq
+	mov.b	A3L,A0L		; Qq
+	mov.b	A3H,A3L         ;           m
+	mov.b	#0x0,A3H	; Qq       0m
+	rts
+
+; D != 0 - which means the denominator is
+;          loop around to get the result.
+
+divlongway:
+	mov.b	A0H,A3L		; Nn Dd xP 0N
+	mov.b	#0x0,A0H	; high byte of answer has to be zero
+	mov.b	#0x8,A2H	;       8
+div8:	add.b	A0L,A0L		; n*=2
+	rotxl	A3L		; Make remainder bigger
+	rotxl	A3H
+	sub.w	A1,A3		; Q-=N
+	bhs	setbit		; set a bit ?
+	add.w	A1,A3		;  no : too far , Q+=N
+
+	dec	A2H
+	bne	div8		; next bit
+	rts
+
+setbit:	inc	A0L		; do insert bit
+	dec	A2H
+	bne	div8		; next bit
+	rts
+
+#endif /* __H8300__ */
+#endif /* L_divhi3 */
+
+#ifdef L_divsi3
+
+;; 4 byte integer divides for the H8/300.
+;;
+;; We have one routine which does all the work and lots of
+;; little ones which prepare the args and massage the sign.
+;; We bunch all of this into one object file since there are several
+;; "supporting routines".
+
+	.section .text
+	.align 2
+
+; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
+; This function is here to keep branch displacements small.
+
+#ifdef __H8300__
+
+divnorm:
+	mov.b	A0H,A0H		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	postive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+postive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	postive2
+	not	A2L
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+	xor.b	#0x08,S2L	; toggle the result sign
+postive2:
+	rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	mov.b	A0H,A0H		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	mpostive
+
+	; negate arg
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+mpostive:
+	mov.b	A2H,A2H		; is the denominator -ve
+	bge	mpostive2
+	not	A2L
+	not	A2H
+	not	A3L
+	not	A3H
+	add.b	#1,A3L
+	addx	#0,A3H
+	addx	#0,A2L
+	addx	#0,A2H
+mpostive2:
+	rts
+
+#else /* __H8300H__ */
+
+divnorm:
+	mov.l	A0P,A0P		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	postive
+
+	neg.l	A0P		; negate arg
+
+postive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	postive2
+
+	neg.l	A1P		; negate arg
+	xor.b	#0x08,S2L	; toggle the result sign
+
+postive2:
+	rts
+
+;; Basically the same, except that the sign of the divisor determines
+;; the sign.
+modnorm:
+	mov.l	A0P,A0P		; is the numerator -ve
+	stc	ccr,S2L		; keep the sign in bit 3 of S2L
+	bge	mpostive
+
+	neg.l	A0P		; negate arg
+
+mpostive:
+	mov.l	A1P,A1P		; is the denominator -ve
+	bge	mpostive2
+
+	neg.l	A1P		; negate arg
+
+mpostive2:
+	rts
+
+#endif
+
+; numerator in A0/A1
+; denominator in A2/A3
+	.global	___modsi3
+___modsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	modnorm
+	bsr	divmodsi4
+	mov	S0,A0
+	mov	S1,A1
+	bra	exitdiv
+#else
+	PUSHP	S2P
+	bsr	modnorm
+	bsr	___udivsi3
+	mov.l	er3,er0
+	bra	exitdiv
+#endif
+
+	;; H8/300H and H8S version of ___udivsi3 is defined later in
+	;; the file.
+#ifdef __H8300__
+	.global	___udivsi3
+___udivsi3:
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	divmodsi4
+	bra	reti
+#endif
+
+	.global	___umodsi3
+___umodsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	bsr	divmodsi4
+	mov	S0,A0
+	mov	S1,A1
+	bra	reti
+#else
+	bsr	___udivsi3
+	mov.l	er3,er0
+	rts
+#endif
+
+	.global	___divsi3
+___divsi3:
+#ifdef __H8300__
+	PUSHP	S2P
+	PUSHP	S0P
+	PUSHP	S1P
+	jsr	divnorm
+	jsr	divmodsi4
+#else
+	PUSHP	S2P
+	jsr	divnorm
+	bsr	___udivsi3
+#endif
+
+	; examine what the sign should be
+exitdiv:
+	btst	#3,S2L
+	beq	reti
+
+	; should be -ve
+#ifdef __H8300__
+	not	A0H
+	not	A1H
+	not	A0L
+	not	A1L
+
+	add	#1,A1L
+	addx	#0,A1H
+	addx	#0,A0L
+	addx	#0,A0H
+#else /* __H8300H__ */
+	neg.l	A0P
+#endif
+
+reti:
+#ifdef __H8300__
+	POPP	S1P
+	POPP	S0P
+#endif
+	POPP	S2P
+	rts
+
+	; takes A0/A1 numerator (A0P for H8/300H)
+	; A2/A3 denominator (A1P for H8/300H)
+	; returns A0/A1 quotient (A0P for H8/300H)
+	; S0/S1 remainder (S0P for H8/300H)
+	; trashes S2H
+
+#ifdef __H8300__
+
+divmodsi4:
+        sub.w	S0,S0		; zero play area
+        mov.w	S0,S1
+        mov.b	A2H,S2H
+        or	A2L,S2H
+        or	A3H,S2H
+        bne	DenHighNonZero
+        mov.b	A0H,A0H
+        bne	NumByte0Zero
+        mov.b	A0L,A0L
+        bne	NumByte1Zero
+        mov.b	A1H,A1H
+        bne	NumByte2Zero
+        bra	NumByte3Zero
+NumByte0Zero:
+	mov.b	A0H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0H
+NumByte1Zero:
+	mov.b	A0L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A0L
+NumByte2Zero:
+	mov.b	A1H,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1H
+NumByte3Zero:
+	mov.b	A1L,S1L
+        divxu	A3L,S1
+        mov.b	S1L,A1L
+
+        mov.b	S1H,S1L
+        mov.b	#0x0,S1H
+        rts
+
+; have to do the divide by shift and test
+DenHighNonZero:
+	mov.b	A0H,S1L
+        mov.b	A0L,A0H
+        mov.b	A1H,A0L
+        mov.b	A1L,A1H
+
+        mov.b	#0,A1L
+        mov.b	#24,S2H	; only do 24 iterations
+
+nextbit:
+	add.w	A1,A1	; double the answer guess
+        rotxl	A0L
+        rotxl	A0H
+
+        rotxl	S1L	; double remainder
+        rotxl	S1H
+        rotxl	S0L
+        rotxl	S0H
+        sub.w	A3,S1	; does it all fit
+        subx	A2L,S0L
+        subx	A2H,S0H
+        bhs	setone
+
+        add.w	A3,S1	; no, restore mistake
+        addx	A2L,S0L
+        addx	A2H,S0H
+
+        dec	S2H
+        bne	nextbit
+        rts
+
+setone:
+	inc	A1L
+        dec	S2H
+        bne	nextbit
+        rts
+
+#else /* __H8300H__ */
+
+	;; This function also computes the remainder and stores it in er3.
+	.global	___udivsi3
+___udivsi3:
+	mov.w	A1E,A1E		; denominator top word 0?
+	bne	DenHighNonZero
+
+	; do it the easy way, see page 107 in manual
+	mov.w	A0E,A2
+	extu.l	A2P
+	divxu.w	A1,A2P
+	mov.w	A2E,A0E
+	divxu.w	A1,A0P
+	mov.w	A0E,A3
+	mov.w	A2,A0E
+	extu.l	A3P
+	rts
+
+ 	; er0 = er0 / er1
+ 	; er3 = er0 % er1
+ 	; trashes er1 er2
+ 	; expects er1 >= 2^16
+DenHighNonZero:
+	mov.l	er0,er3
+	mov.l	er1,er2
+#ifdef __H8300H__
+divmod_L21:
+	shlr.l	er0
+	shlr.l	er2		; make divisor < 2^16
+	mov.w	e2,e2
+	bne	divmod_L21
+#else
+	shlr.l	#2,er2		; make divisor < 2^16
+	mov.w	e2,e2
+	beq	divmod_L22A
+divmod_L21:
+	shlr.l	#2,er0
+divmod_L22:
+	shlr.l	#2,er2		; make divisor < 2^16
+	mov.w	e2,e2
+	bne	divmod_L21
+divmod_L22A:
+	rotxl.w	r2
+	bcs	divmod_L23
+	shlr.l	er0
+	bra	divmod_L24
+divmod_L23:
+	rotxr.w	r2
+	shlr.l	#2,er0
+divmod_L24:
+#endif
+	;; At this point,
+	;;  er0 contains shifted dividend
+	;;  er1 contains divisor
+	;;  er2 contains shifted divisor
+	;;  er3 contains dividend, later remainder
+	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
+	extu.l	er0
+	beq	divmod_L25
+	subs	#1,er0		; er0 = AQ - 1
+	mov.w	e1,r2
+	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
+	sub.w	r2,e3		; dividend - 65536 * er2
+	mov.w	r1,r2
+	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
+	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
+divmod_L25:
+ 	cmp.l	er1,er3		; is divisor < remainder?
+	blo	divmod_L26
+ 	adds	#1,er0
+	sub.l	er1,er3		; correct the remainder
+divmod_L26:
+	rts
+
+#endif
+#endif /* L_divsi3 */
+
+#ifdef L_mulhi3
+
+;; HImode multiply.
+; The H8/300 only has an 8*8->16 multiply.
+; The answer is the same as:
+;
+; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
+; (we can ignore A1.h * A0.h cause that will all off the top)
+; A0 in
+; A1 in
+; A0 answer
+
+#ifdef __H8300__
+	.section .text
+	.align 2
+	.global	___mulhi3
+___mulhi3:
+	mov.b	A1L,A2L		; A2l gets srcb.l
+	mulxu	A0L,A2		; A2 gets first sub product
+
+	mov.b	A0H,A3L		; prepare for
+	mulxu	A1L,A3		; second sub product
+
+	add.b	A3L,A2H		; sum first two terms
+
+	mov.b	A1H,A3L		; third sub product
+	mulxu	A0L,A3
+
+	add.b	A3L,A2H		; almost there
+	mov.w	A2,A0		; that is
+	rts
+
+#endif
+#endif /* L_mulhi3 */
+
+#ifdef L_mulsi3
+
+;; SImode multiply.
+;;
+;; I think that shift and add may be sufficient for this.  Using the
+;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
+;; the inner loop uses maybe 20 cycles + overhead, but terminates
+;; quickly on small args.
+;;
+;; A0/A1 src_a
+;; A2/A3 src_b
+;;
+;;  while (a)
+;;    {
+;;      if (a & 1)
+;;        r += b;
+;;      a >>= 1;
+;;      b <<= 1;
+;;    }
+
+	.section .text
+	.align 2
+
+#ifdef __H8300__
+
+	.global	___mulsi3
+___mulsi3:
+	PUSHP	S0P
+	PUSHP	S1P
+
+	sub.w	S0,S0
+	sub.w	S1,S1
+
+	; while (a)
+_top:	mov.w	A0,A0
+	bne	_more
+	mov.w	A1,A1
+	beq	_done
+_more:	; if (a & 1)
+	bld	#0,A1L
+	bcc	_nobit
+	; r += b
+	add.w	A3,S1
+	addx	A2L,S0L
+	addx	A2H,S0H
+_nobit:
+	; a >>= 1
+	shlr	A0H
+	rotxr	A0L
+	rotxr	A1H
+	rotxr	A1L
+
+	; b <<= 1
+	add.w	A3,A3
+	addx	A2L,A2L
+	addx	A2H,A2H
+	bra 	_top
+
+_done:
+	mov.w	S0,A0
+	mov.w	S1,A1
+	POPP	S1P
+	POPP	S0P
+	rts
+
+#else /* __H8300H__ */
+
+;
+; mulsi3 for H8/300H - based on Renesas SH implementation
+;
+; by Toshiyasu Morita
+;
+; Old code:
+;
+; 16b * 16b = 372 states (worst case)
+; 32b * 32b = 724 states (worst case)
+;
+; New code:
+;
+; 16b * 16b =  48 states
+; 16b * 32b =  72 states
+; 32b * 32b =  92 states
+;
+
+	.global	___mulsi3
+___mulsi3:
+	mov.w	r1,r2   ; ( 2 states) b * d
+	mulxu	r0,er2  ; (22 states)
+
+	mov.w	e0,r3   ; ( 2 states) a * d
+	beq	L_skip1 ; ( 4 states)
+	mulxu	r1,er3  ; (22 states)
+	add.w	r3,e2   ; ( 2 states)
+
+L_skip1:
+	mov.w	e1,r3   ; ( 2 states) c * b
+	beq	L_skip2 ; ( 4 states)
+	mulxu	r0,er3  ; (22 states)
+	add.w	r3,e2   ; ( 2 states)
+
+L_skip2:
+	mov.l	er2,er0	; ( 2 states)
+	rts		; (10 states)
+
+#endif
+#endif /* L_mulsi3 */
+#ifdef L_fixunssfsi_asm
+/* For the h8300 we use asm to save some bytes, to
+   allow more programs to fit into the tiny address
+   space.  For the H8/300H and H8S, the C version is good enough.  */
+#ifdef __H8300__
+/* We still treat NANs different than libgcc2.c, but then, the
+   behavior is undefined anyways.  */
+	.global	___fixunssfsi
+___fixunssfsi:
+	cmp.b #0x4f,r0h
+	bge Large_num
+	jmp     @___fixsfsi
+Large_num:
+	bhi L_huge_num
+	xor.b #0x80,A0L
+	bmi L_shift8
+L_huge_num:
+	mov.w #65535,A0
+	mov.w A0,A1
+	rts
+L_shift8:
+	mov.b A0L,A0H
+	mov.b A1H,A0L
+	mov.b A1L,A1H
+	mov.b #0,A1L
+	rts
+#endif
+#endif /* L_fixunssfsi_asm */
diff --git a/gcc/config/h8300/mova.md b/gcc/config/h8300/mova.md
new file mode 100644
index 000000000..f6348f3d3
--- /dev/null
+++ b/gcc/config/h8300/mova.md
@@ -0,0 +1,858 @@
+;; -*- buffer-read-only: t -*-
+;; Generated automatically from genmova.sh
+;; Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (mult:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (ashift:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 1))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (mult:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 4))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(plus:QI (ashift:QI (match_operand:QI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:QI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 1)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 1))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 4)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 4))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (mult:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (subreg:HI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(plus:HI (and:HI (ashift:HI (match_operand:HI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:HI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 1)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 1))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 1))
+		(const_int 510)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 1))
+			 (const_int 510))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 4)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 4))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 4))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 4))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:QI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:QI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 1020)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%X1.b),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 1020))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%X1.b),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 1))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (mult:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 4))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (match_operand:HI 1 "h8300_dst_operand" "0,rQ")
+			   (const_int 2))
+		 (match_operand:HI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/b.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 1)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 1))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 1))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 1))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 1))
+		(const_int 131070)))]
+  "TARGET_H8300SX"
+  "mova/w.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 1))
+			 (const_int 131070))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/w.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 4)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 4))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 4))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 4))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 4))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (mult:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 4))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+		  (const_int 2)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (ashift:SI (zero_extend:SI (match_operand:HI 1 "h8300_dst_operand" "0,rQ"))
+			   (const_int 2))
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+			  (const_int 2))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (subreg:SI (match_operand:HI 1 "memory_operand" "m") 0)
+				   (const_int 2))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			  (const_int 2))
+		(const_int 262140)))]
+  "TARGET_H8300SX"
+  "mova/l.l @(0,%T1.w),%S0"
+  [(set_attr "length_table" "mova_zero")
+   (set_attr "cc" "none")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+				   (const_int 2))
+			 (const_int 262140))
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_H8300SX"
+  "mova/l.l @(%o2,%T1.w),%S0"
+  [(set_attr "length_table" "mova")
+   (set_attr "cc" "none")])
+
diff --git a/gcc/config/h8300/parityhi2.c b/gcc/config/h8300/parityhi2.c
new file mode 100644
index 000000000..d58cb89b5
--- /dev/null
+++ b/gcc/config/h8300/parityhi2.c
@@ -0,0 +1,36 @@
+/* The implementation of __parityhi2.
+   Copyright (C) 2003, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+int __parityhi2 (unsigned short x);
+
+int
+__parityhi2 (unsigned short x)
+{
+  int i;
+  int count = 0;
+  for (i = 0; i < 16; i++)
+    if (x & ((unsigned short) 1 << i))
+      count++;
+  return count & 1;
+}
diff --git a/gcc/config/h8300/popcounthi2.c b/gcc/config/h8300/popcounthi2.c
new file mode 100644
index 000000000..47be193b3
--- /dev/null
+++ b/gcc/config/h8300/popcounthi2.c
@@ -0,0 +1,36 @@
+/* The implementation of __popcounthi2.
+   Copyright (C) 2003, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+int __popcounthi2 (unsigned short x);
+
+int
+__popcounthi2 (unsigned short x)
+{
+  int i;
+  int count = 0;
+  for (i = 0; i < 16; i++)
+    if (x & ((unsigned short) 1 << i))
+      count++;
+  return count;
+}
diff --git a/gcc/config/h8300/predicates.md b/gcc/config/h8300/predicates.md
new file mode 100644
index 000000000..895698b82
--- /dev/null
+++ b/gcc/config/h8300/predicates.md
@@ -0,0 +1,493 @@
+;; Predicate definitions for Renesas H8/300.
+;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is a valid source operand for an integer move
+;; instruction.
+
+(define_predicate "general_operand_src"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  if (GET_MODE (op) == mode
+      && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == POST_INC)
+    return 1;
+  return general_operand (op, mode);
+})
+
+;; Return true if OP is a valid destination operand for an integer
+;; move instruction.
+
+(define_predicate "general_operand_dst"
+  (match_code "subreg,reg,mem")
+{
+  if (GET_MODE (op) == mode
+      && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == PRE_DEC)
+    return 1;
+  return general_operand (op, mode);
+})
+
+;; Likewise the second operand.
+
+(define_predicate "h8300_src_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  if (TARGET_H8300SX)
+    return general_operand (op, mode);
+  return nonmemory_operand (op, mode);
+})
+
+;; Return true if OP is a suitable first operand for a general
+;; arithmetic insn such as "add".
+
+(define_predicate "h8300_dst_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (TARGET_H8300SX)
+    return nonimmediate_operand (op, mode);
+  return register_operand (op, mode);
+})
+
+;; Check that an operand is either a register or an unsigned 4-bit
+;; constant.
+
+(define_predicate "nibble_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && TARGET_H8300SX
+	  && INTVAL (op) >= 0 && INTVAL (op) <= 15);
+})
+
+;; Check that an operand is either a register or an unsigned 4-bit
+;; constant.
+
+(define_predicate "reg_or_nibble_operand"
+  (match_code "const_int,subreg,reg")
+{
+  return (nibble_operand (op, mode) || register_operand (op, mode));
+})
+
+;; Return true if X is a shift operation of type H8SX_SHIFT_UNARY.
+
+(define_predicate "h8sx_unary_shift_operator"
+  (match_code "ashiftrt,lshiftrt,ashift,rotate")
+{
+  return (BINARY_P (op) && NON_COMMUTATIVE_P (op)
+	  && (h8sx_classify_shift (GET_MODE (op), GET_CODE (op), XEXP (op, 1))
+	      == H8SX_SHIFT_UNARY));
+})
+
+;; Likewise H8SX_SHIFT_BINARY.
+
+(define_predicate "h8sx_binary_shift_operator"
+  (match_code "ashiftrt,lshiftrt,ashift")
+{
+  return (BINARY_P (op) && NON_COMMUTATIVE_P (op)
+	  && (h8sx_classify_shift (GET_MODE (op), GET_CODE (op), XEXP (op, 1))
+	      == H8SX_SHIFT_BINARY));
+})
+
+;; Return true if OP is a binary operator in which it would be safe to
+;; replace register operands with memory operands.
+
+(define_predicate "h8sx_binary_memory_operator"
+  (match_code "plus,minus,and,ior,xor,ashift,ashiftrt,lshiftrt,rotate")
+{
+  if (!TARGET_H8300SX)
+    return false;
+
+  if (GET_MODE (op) != QImode
+      && GET_MODE (op) != HImode
+      && GET_MODE (op) != SImode)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+      return true;
+
+    default:
+      return h8sx_unary_shift_operator (op, mode);
+    }
+})
+
+;; Like h8sx_binary_memory_operator, but applies to unary operators.
+
+(define_predicate "h8sx_unary_memory_operator"
+  (match_code "neg,not")
+{
+  if (!TARGET_H8300SX)
+    return false;
+
+  if (GET_MODE (op) != QImode
+      && GET_MODE (op) != HImode
+      && GET_MODE (op) != SImode)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case NEG:
+    case NOT:
+      return true;
+
+    default:
+      return false;
+    }
+})
+
+;; Return true if X is an ldm.l pattern.  X is known to be parallel.
+
+(define_predicate "h8300_ldm_parallel"
+  (match_code "parallel")
+{
+  return h8300_ldm_stm_parallel (XVEC (op, 0), 1, 0);
+})
+
+;; Likewise stm.l.
+
+(define_predicate "h8300_stm_parallel"
+  (match_code "parallel")
+{
+  return h8300_ldm_stm_parallel (XVEC (op, 0), 0, 0);
+})
+
+;; Likewise rts/l and rte/l.  Note that the .md pattern will check for
+;; the return so there's no need to do that here.
+
+(define_predicate "h8300_return_parallel"
+  (match_code "parallel")
+{
+  return h8300_ldm_stm_parallel (XVEC (op, 0), 1, 1);
+})
+
+;; Return true if OP is a constant that contains only one 1 in its
+;; binary representation.
+
+(define_predicate "single_one_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    {
+      /* We really need to do this masking because 0x80 in QImode is
+	 represented as -128 for example.  */
+      if (exact_log2 (INTVAL (op) & GET_MODE_MASK (mode)) >= 0)
+	return 1;
+    }
+
+  return 0;
+})
+
+;; Return true if OP is a constant that contains only one 0 in its
+;; binary representation.
+
+(define_predicate "single_zero_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    {
+      /* We really need to do this masking because 0x80 in QImode is
+	 represented as -128 for example.  */
+      if (exact_log2 (~INTVAL (op) & GET_MODE_MASK (mode)) >= 0)
+	return 1;
+    }
+
+  return 0;
+})
+
+;; Return true if OP is a valid call operand.
+
+(define_predicate "call_insn_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+      if (register_operand (inside, Pmode))
+	return 1;
+      if (CONSTANT_ADDRESS_P (inside))
+	return 1;
+    }
+  return 0;
+})
+
+;; Return true if OP is a valid call operand, and OP represents an
+;; operand for a small call (4 bytes instead of 6 bytes).
+
+(define_predicate "small_call_insn_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+
+      /* Register indirect is a small call.  */
+      if (register_operand (inside, Pmode))
+	return 1;
+
+      /* A call through the function vector is a small call too.  */
+      if (GET_CODE (inside) == SYMBOL_REF
+	  && (SYMBOL_REF_FLAGS (inside) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+	return 1;
+    }
+  /* Otherwise it's a large call.  */
+  return 0;
+})
+
+;; Return true if OP is a valid jump operand.
+
+(define_predicate "jump_address_operand"
+  (match_code "reg,mem")
+{
+  if (GET_CODE (op) == REG)
+    return mode == Pmode;
+
+  if (GET_CODE (op) == MEM)
+    {
+      rtx inside = XEXP (op, 0);
+      if (register_operand (inside, Pmode))
+	return 1;
+      if (CONSTANT_ADDRESS_P (inside))
+	return 1;
+    }
+  return 0;
+})
+
+;; Return 1 if an addition/subtraction of a constant integer can be
+;; transformed into two consecutive adds/subs that are faster than the
+;; straightforward way.  Otherwise, return 0.
+
+(define_predicate "two_insn_adds_subs_operand"
+  (match_code "const_int")
+{
+  if (TARGET_H8300SX)
+    return 0;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (op);
+
+      /* Force VALUE to be positive so that we do not have to consider
+         the negative case.  */
+      if (value < 0)
+	value = -value;
+      if (TARGET_H8300H || TARGET_H8300S)
+	{
+	  /* A constant addition/subtraction takes 2 states in QImode,
+	     4 states in HImode, and 6 states in SImode.  Thus, the
+	     only case we can win is when SImode is used, in which
+	     case, two adds/subs are used, taking 4 states.  */
+	  if (mode == SImode
+	      && (value == 2 + 1
+		  || value == 4 + 1
+		  || value == 4 + 2
+		  || value == 4 + 4))
+	    return 1;
+	}
+      else
+	{
+	  /* We do not profit directly by splitting addition or
+	     subtraction of 3 and 4.  However, since these are
+	     implemented as a sequence of adds or subs, they do not
+	     clobber (cc0) unlike a sequence of add.b and add.x.  */
+	  if (mode == HImode
+	      && (value == 2 + 1
+		  || value == 2 + 2))
+	    return 1;
+	}
+    }
+
+  return 0;
+})
+
+;; Recognize valid operands for bit-field instructions.
+
+(define_predicate "bit_operand"
+  (match_code "reg,subreg,mem")
+{
+  /* We can accept any nonimmediate operand, except that MEM operands must
+     be limited to those that use addresses valid for the 'U' constraint.  */
+  if (!nonimmediate_operand (op, mode) && !OK_FOR_U (op))
+    return 0;
+
+  /* H8SX accepts pretty much anything here.  */
+  if (TARGET_H8300SX)
+    return 1;
+
+  /* Accept any mem during RTL generation.  Otherwise, the code that does
+     insv and extzv will think that we cannot handle memory.  However,
+     to avoid reload problems, we only accept 'U' MEM operands after RTL
+     generation.  This means that any named pattern which uses this predicate
+     must force its operands to match 'U' before emitting RTL.  */
+
+  if (GET_CODE (op) == REG)
+    return 1;
+  if (GET_CODE (op) == SUBREG)
+    return 1;
+  return (GET_CODE (op) == MEM
+	  && OK_FOR_U (op));
+})
+
+;; Return nonzero if OP is a MEM suitable for bit manipulation insns.
+
+(define_predicate "bit_memory_operand"
+  (match_code "mem")
+{
+  return (GET_CODE (op) == MEM
+	  && OK_FOR_U (op));
+})
+
+;; Return nonzero if OP is indirect register or constant memory
+;; suitable for bit manipulation insns.
+
+(define_predicate "bit_register_indirect_operand"
+  (match_code "mem")
+{
+  return (GET_CODE (op) == MEM
+          && (GET_CODE (XEXP (op, 0)) == REG
+              || GET_CODE (XEXP (op, 0)) == CONST_INT));
+})
+
+;; Return nonzero if X is a stack pointer.
+
+(define_predicate "stack_pointer_operand"
+  (match_code "reg")
+{
+  return op == stack_pointer_rtx;
+})
+
+;; Return nonzero if X is a constant whose absolute value is greater
+;; than 2.
+
+(define_predicate "const_int_gt_2_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && abs (INTVAL (op)) > 2);
+})
+
+;; Return nonzero if X is a constant whose absolute value is no
+;; smaller than 8.
+
+(define_predicate "const_int_ge_8_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && abs (INTVAL (op)) >= 8);
+})
+
+;; Return nonzero if X is a constant expressible in QImode.
+
+(define_predicate "const_int_qi_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) & 0xff) == INTVAL (op));
+})
+
+;; Return nonzero if X is a constant expressible in HImode.
+
+(define_predicate "const_int_hi_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) & 0xffff) == INTVAL (op));
+})
+
+;; Return nonzero if X is a constant suitable for inc/dec.
+
+(define_predicate "incdec_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && (CONST_OK_FOR_M (INTVAL (op))
+	      || CONST_OK_FOR_O (INTVAL (op))));
+})
+
+;; Recognize valid operators for bit instructions.
+
+(define_predicate "bit_operator"
+  (match_code "xor,and,ior")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == XOR
+	  || code == AND
+	  || code == IOR);
+})
+
+;; Return nonzero if OP is a shift operator.
+
+(define_predicate "nshift_operator"
+  (match_code "ashiftrt,lshiftrt,ashift")
+{
+  switch (GET_CODE (op))
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ASHIFT:
+      return 1;
+
+    default:
+      return 0;
+    }
+})
+
+;; Return nonzero if X is either EQ or NE.
+
+(define_predicate "eqne_operator"
+  (match_code "eq,ne")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == EQ || code == NE);
+})
+
+;; Return nonzero if X is either GT or LE.
+
+(define_predicate "gtle_operator"
+  (match_code "gt,le,gtu,leu")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == GT || code == LE);
+})
+
+;; Return nonzero if X is either GTU or LEU.
+
+(define_predicate "gtuleu_operator"
+  (match_code "gtu,leu")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == GTU || code == LEU);
+})
+
+;; Return nonzero if X is either IOR or XOR.
+
+(define_predicate "iorxor_operator"
+  (match_code "ior,xor")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == IOR || code == XOR);
+})
diff --git a/gcc/config/h8300/rtems.h b/gcc/config/h8300/rtems.h
new file mode 100644
index 000000000..6d053d63b
--- /dev/null
+++ b/gcc/config/h8300/rtems.h
@@ -0,0 +1,29 @@
+/* Definitions for rtems targeting a H8
+   Copyright (C) 1996, 1997, 2000, 2002, 2003, 2007 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS preprocessor built-ins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("h8300");		\
+      builtin_define ("__rtems__");		\
+      builtin_assert ("system=rtems");		\
+    }						\
+  while (0)
diff --git a/gcc/config/h8300/t-elf b/gcc/config/h8300/t-elf
new file mode 100644
index 000000000..c1f1dac32
--- /dev/null
+++ b/gcc/config/h8300/t-elf
@@ -0,0 +1,6 @@
+EXTRA_MULTILIB_PARTS= crti.o crtn.o crtbegin.o crtend.o
+
+$(T)crti.o: $(srcdir)/config/h8300/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/h8300/crti.asm
+$(T)crtn.o: $(srcdir)/config/h8300/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/h8300/crtn.asm
diff --git a/gcc/config/h8300/t-h8300 b/gcc/config/h8300/t-h8300
new file mode 100644
index 000000000..570157887
--- /dev/null
+++ b/gcc/config/h8300/t-h8300
@@ -0,0 +1,62 @@
+# Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003,
+# 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = h8300/lib1funcs.asm
+LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \
+  _fixunssfsi_asm
+
+LIB2FUNCS_EXTRA = \
+	$(srcdir)/config/h8300/clzhi2.c \
+	$(srcdir)/config/h8300/ctzhi2.c \
+	$(srcdir)/config/h8300/parityhi2.c \
+	$(srcdir)/config/h8300/popcounthi2.c \
+	$(srcdir)/config/h8300/fixunssfsi.c
+
+# We do not have DF type, so fake out the libgcc2 compilation.
+TARGET_LIBGCC2_CFLAGS = -DDF=SF
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#define FLOAT_ONLY' >> fp-bit.c
+	echo '#define SMALL_MACHINE' >> fp-bit.c
+	echo '#ifdef __H8300__' >> fp-bit.c
+	echo '#define CMPtype HItype' >> fp-bit.c
+	echo '#else' >> fp-bit.c
+	echo '#define CMPtype SItype' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+MULTILIB_OPTIONS = mh/ms/msx mn mint32
+MULTILIB_DIRNAMES = h8300h h8300s h8sx normal int32
+MULTILIB_EXCEPTIONS = mint32 mn mn/mint32
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+s-config s-conditions s-flags s-codes s-constants s-emit s-recog \
+s-opinit s-extract s-peep s-attr s-attrtab s-output: \
+	$(srcdir)/config/h8300/mova.md
+
+$(srcdir)/config/h8300/mova.md: $(srcdir)/config/h8300/genmova.sh
+	$(SHELL) $(srcdir)/config/h8300/genmova.sh \
+	  > $(srcdir)/config/h8300/mova.md
diff --git a/gcc/config/h8300/t-rtems b/gcc/config/h8300/t-rtems
new file mode 100644
index 000000000..0d76437f8
--- /dev/null
+++ b/gcc/config/h8300/t-rtems
@@ -0,0 +1,7 @@
+# Custom multilibs for RTEMS
+
+# -mn is not applicable to RTEMS (-mn implies 16bit void*)
+
+MULTILIB_OPTIONS = mh/ms/msx mint32
+MULTILIB_DIRNAMES = h8300h h8300s h8sx int32
+MULTILIB_EXCEPTIONS = mint32
diff --git a/gcc/config/host-darwin.c b/gcc/config/host-darwin.c
new file mode 100644
index 000000000..4d5a5512e
--- /dev/null
+++ b/gcc/config/host-darwin.c
@@ -0,0 +1,77 @@
+/* Darwin host-specific hook definitions.
+   Copyright (C) 2003, 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "config/host-darwin.h"
+
+/* Yes, this is really supposed to work.  */
+static char pch_address_space[1024*1024*1024] __attribute__((aligned (4096)));
+
+/* Return the address of the PCH address space, if the PCH will fit in it.  */
+
+void *
+darwin_gt_pch_get_address (size_t sz, int fd ATTRIBUTE_UNUSED)
+{
+  if (sz <= sizeof (pch_address_space))
+    return pch_address_space;
+  else
+    return NULL;
+}
+
+/* Check ADDR and SZ for validity, and deallocate (using munmap) that part of
+   pch_address_space beyond SZ.  */
+
+int
+darwin_gt_pch_use_address (void *addr, size_t sz, int fd, size_t off)
+{
+  const size_t pagesize = getpagesize();
+  void *mmap_result;
+  int ret;
+
+  gcc_assert ((size_t)pch_address_space % pagesize == 0
+	      && sizeof (pch_address_space) % pagesize == 0);
+  
+  ret = (addr == pch_address_space && sz <= sizeof (pch_address_space));
+  if (! ret)
+    sz = 0;
+
+  /* Round the size to a whole page size.  Normally this is a no-op.  */
+  sz = (sz + pagesize - 1) / pagesize * pagesize;
+
+  if (munmap (pch_address_space + sz, sizeof (pch_address_space) - sz) != 0)
+    fatal_error ("couldn%'t unmap pch_address_space: %m");
+
+  if (ret)
+    {
+      mmap_result = mmap (addr, sz,
+			  PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
+			  fd, off);
+
+      /* The file might not be mmap-able.  */
+      ret = mmap_result != (void *) MAP_FAILED;
+
+      /* Sanity check for broken MAP_FIXED.  */
+      gcc_assert (!ret || mmap_result == addr);
+    }
+
+  return ret;
+}
diff --git a/gcc/config/host-darwin.h b/gcc/config/host-darwin.h
new file mode 100644
index 000000000..0cde7f329
--- /dev/null
+++ b/gcc/config/host-darwin.h
@@ -0,0 +1,27 @@
+/* Darwin host-specific hook definitions.
+   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+extern void * darwin_gt_pch_get_address (size_t sz, int fd);
+extern int darwin_gt_pch_use_address (void *addr, size_t sz, int fd, 
+				      size_t off);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS darwin_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS darwin_gt_pch_use_address
diff --git a/gcc/config/host-hpux.c b/gcc/config/host-hpux.c
new file mode 100644
index 000000000..00adfcf9b
--- /dev/null
+++ b/gcc/config/host-hpux.c
@@ -0,0 +1,129 @@
+/* HP-UX host-specific hook definitions.
+   Copyright (C) 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+static void *hpux_gt_pch_get_address (size_t, int);
+static int hpux_gt_pch_use_address (void *, size_t, int, size_t);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS hpux_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS hpux_gt_pch_use_address
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  */
+#if (defined(__hppa__) || defined(__ia64__)) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000000000
+#elif defined(__hppa__) || defined(__ia64__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate
+   SIZE bytes.  FD is the PCH file, though we should return with the
+   file unmapped.  */
+
+static void *
+hpux_gt_pch_get_address (size_t size, int fd)
+{
+  void *addr;
+
+  addr = mmap ((void *)TRY_EMPTY_VM_SPACE, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap (addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
+   mapping the data at BASE, -1 if we couldn't.
+
+   It's not possibly to reliably mmap a file using MAP_PRIVATE to
+   a specific START address on either hpux or linux.  First we see
+   if mmap with MAP_PRIVATE works.  If it does, we are off to the
+   races.  If it doesn't, we try an anonymous private mmap since the
+   kernel is more likely to honor the BASE address in anonymous maps.
+   We then copy the data to the anonymous private map.  This assumes
+   of course that we don't need to change the data in the PCH file
+   after it is created.
+
+   This approach obviously causes a performance penalty but there is
+   little else we can do given the current PCH implementation.  */
+
+static int
+hpux_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  /* Try to map the file with MAP_PRIVATE.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  if (addr == base)
+    return 1;
+
+  if (addr != (void *) MAP_FAILED)
+    munmap (addr, size);
+
+  /* Try to make an anonymous private mmap at the desired location.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+  if (addr != base)
+    {
+      if (addr != (void *) MAP_FAILED)
+        munmap (addr, size);
+      return -1;
+    }
+
+  if (lseek (fd, offset, SEEK_SET) == (off_t)-1)
+    return -1;
+
+  while (size)
+    {
+      ssize_t nbytes;
+
+      nbytes = read (fd, base, MIN (size, SSIZE_MAX));
+      if (nbytes <= 0)
+        return -1;
+      base = (char *) base + nbytes;
+      size -= nbytes;
+    }
+
+  return 1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/host-linux.c b/gcc/config/host-linux.c
new file mode 100644
index 000000000..ec6105577
--- /dev/null
+++ b/gcc/config/host-linux.c
@@ -0,0 +1,220 @@
+/* Linux host-specific hook definitions.
+   Copyright (C) 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+
+/* Linux has a feature called exec-shield-randomize that perturbs the
+   address of non-fixed mapped segments by a (relatively) small amount.
+   The feature is intended to make it harder to attack the system with
+   buffer overflow attacks, since every invocation of a program will
+   have its libraries and data segments at slightly different addresses.
+
+   This feature causes us problems with PCH because it makes it that
+   much harder to acquire a stable location at which to map our PCH
+   data file.
+
+   [ The feature causes other points of non-determinism within the
+     compiler as well, so we'd *really* like to be able to have the
+     driver disable exec-shield-randomize for the process group, but
+     that isn't possible at present.  ]
+
+   We're going to try several things:
+
+      * Select an architecture specific address as "likely" and see
+	if that's free.  For our 64-bit hosts, we can easily choose
+	an address in Never Never Land.
+
+      * If exec-shield-randomize is disabled, then just use the
+	address chosen by mmap in step one.
+
+      * If exec-shield-randomize is enabled, then temporarily allocate
+	32M of memory as a buffer, then allocate PCH memory, then
+	free the buffer.  The theory here is that the perturbation is
+	no more than 16M, and so by allocating our buffer larger than
+	that we make it considerably more likely that the address will
+	be free when we want to load the data back.
+*/
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS linux_gt_pch_get_address
+
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS linux_gt_pch_use_address
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  */
+#if defined(__alpha)
+# define TRY_EMPTY_VM_SPACE	0x10000000000
+#elif defined(__ia64)
+# define TRY_EMPTY_VM_SPACE	0x2000000100000000
+#elif defined(__x86_64)
+# define TRY_EMPTY_VM_SPACE	0x1000000000
+#elif defined(__i386)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__powerpc__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__s390x__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000
+#elif defined(__s390__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__sparc__) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000
+#elif defined(__sparc__)
+# define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__mc68000__)
+# define TRY_EMPTY_VM_SPACE	0x40000000
+#elif defined(__ARM_EABI__)
+# define TRY_EMPTY_VM_SPACE     0x60000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate SIZE
+   bytes.  FD is the PCH file, though we should return with the file 
+   unmapped.  */
+
+static void *
+linux_gt_pch_get_address (size_t size, int fd)
+{
+  size_t buffer_size = 32 * 1024 * 1024;
+  void *addr, *buffer;
+  FILE *f;
+  bool randomize_on;
+
+  addr = mmap ((void *)TRY_EMPTY_VM_SPACE, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap (addr, size);
+
+  /* If we got the exact area we requested, then that's great.  */
+  if (TRY_EMPTY_VM_SPACE && addr == (void *) TRY_EMPTY_VM_SPACE)
+    return addr;
+
+  /* If we didn't, then we need to look to see if virtual address
+     randomization is on.  That is recorded in
+     kernel.randomize_va_space.  An older implementation used
+     kernel.exec-shield-randomize.  */
+  f = fopen ("/proc/sys/kernel/randomize_va_space", "r");
+  if (f == NULL)
+    f = fopen ("/proc/sys/kernel/exec-shield-randomize", "r");
+  randomize_on = false;
+  if (f != NULL)
+    {
+      char buf[100];
+      size_t c;
+
+      c = fread (buf, 1, sizeof buf - 1, f);
+      if (c > 0)
+	{
+	  buf[c] = '\0';
+	  randomize_on = (atoi (buf) > 0);
+	}
+      fclose (f);
+    }
+
+  /* If it isn't, then accept the address that mmap selected as fine.  */
+  if (!randomize_on)
+    return addr;
+
+  /* Otherwise, we need to try again with buffer space.  */
+  buffer = mmap (0, buffer_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+  addr = mmap (0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+  if (buffer != (void *) MAP_FAILED)
+    munmap (buffer, buffer_size);
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  munmap (addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
+   mapping the data at BASE, -1 if we couldn't.
+
+   It's not possibly to reliably mmap a file using MAP_PRIVATE to
+   a specific START address on either hpux or linux.  First we see
+   if mmap with MAP_PRIVATE works.  If it does, we are off to the
+   races.  If it doesn't, we try an anonymous private mmap since the
+   kernel is more likely to honor the BASE address in anonymous maps.
+   We then copy the data to the anonymous private map.  This assumes
+   of course that we don't need to change the data in the PCH file
+   after it is created.
+
+   This approach obviously causes a performance penalty but there is
+   little else we can do given the current PCH implementation.  */
+
+static int
+linux_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  /* Try to map the file with MAP_PRIVATE.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  if (addr == base)
+    return 1;
+
+  if (addr != (void *) MAP_FAILED)
+    munmap (addr, size);
+
+  /* Try to make an anonymous private mmap at the desired location.  */
+  addr = mmap (base, size, PROT_READ | PROT_WRITE,
+	       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+  if (addr != base)
+    {
+      if (addr != (void *) MAP_FAILED)
+        munmap (addr, size);
+      return -1;
+    }
+
+  if (lseek (fd, offset, SEEK_SET) == (off_t)-1)
+    return -1;
+
+  while (size)
+    {
+      ssize_t nbytes;
+
+      nbytes = read (fd, base, MIN (size, SSIZE_MAX));
+      if (nbytes <= 0)
+        return -1;
+      base = (char *) base + nbytes;
+      size -= nbytes;
+    }
+
+  return 1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/host-solaris.c b/gcc/config/host-solaris.c
new file mode 100644
index 000000000..12eab3c61
--- /dev/null
+++ b/gcc/config/host-solaris.c
@@ -0,0 +1,125 @@
+/* Solaris host-specific hook definitions.
+   Copyright (C) 2004, 2007, 2008, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS sol_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS sol_gt_pch_use_address
+
+/* Before Solaris 11, the mmap ADDR parameter is mostly ignored without
+   MAP_FIXED set.  Before we give up, search the desired address space with
+   mincore to see if the space is really free.  */
+
+static void *
+mmap_fixed (void *addr, size_t len, int prot, int flags, int fd, off_t off)
+{
+  void *base;
+
+  base = mmap ((caddr_t) addr, len, prot, flags, fd, off);
+  
+  if (base != addr)
+    {
+      size_t page_size = getpagesize();
+      char one_byte;
+      size_t i;
+
+      if (base != (void *) MAP_FAILED)
+	munmap ((caddr_t) base, len);
+
+      errno = 0;
+      for (i = 0; i < len; i += page_size)
+	if (mincore ((char *)addr + i, page_size, (char *) &one_byte) == -1
+	    && errno == ENOMEM)
+	  continue; /* The page is not mapped.  */
+	else
+	  break;
+
+      if (i >= len)
+	base = mmap ((caddr_t) addr, len, prot, flags | MAP_FIXED, fd, off);
+    }
+
+  return base;
+}
+
+/* For various ports, try to guess a fixed spot in the vm space
+   that's probably free.  Based on McDougall, Mauro, Solaris Internals, 2nd
+   ed., p.460-461, fig. 9-3, 9-4, 9-5.  */
+#if defined(__sparcv9__)
+/* This low to avoid VA hole on UltraSPARC I/II.  */
+# define TRY_EMPTY_VM_SPACE	0x70000000000
+#elif defined(__sparc__)
+# define TRY_EMPTY_VM_SPACE	0x80000000
+#elif defined(__x86_64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000000000
+#elif defined(__i386__)
+# define TRY_EMPTY_VM_SPACE	0xB0000000
+#else
+# define TRY_EMPTY_VM_SPACE	0
+#endif
+
+/* Determine a location where we might be able to reliably allocate
+   SIZE bytes.  FD is the PCH file, though we should return with the
+   file unmapped.  */
+
+static void *
+sol_gt_pch_get_address (size_t size, int fd)
+{
+  void *addr;
+
+  addr = mmap_fixed ((caddr_t) TRY_EMPTY_VM_SPACE, size,
+		     PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+  /* If we failed the map, that means there's *no* free space.  */
+  if (addr == (void *) MAP_FAILED)
+    return NULL;
+  /* Unmap the area before returning.  */
+  munmap ((caddr_t) addr, size);
+
+  return addr;
+}
+
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+   mapping the data at BASE, -1 if we couldn't.  */
+
+static int
+sol_gt_pch_use_address (void *base, size_t size, int fd, size_t offset)
+{
+  void *addr;
+
+  /* We're called with size == 0 if we're not planning to load a PCH
+     file at all.  This allows the hook to free any static space that
+     we might have allocated at link time.  */
+  if (size == 0)
+    return -1;
+
+  addr = mmap_fixed ((caddr_t) base, size,
+		     PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, offset);
+
+  return addr == base ? 1 : -1;
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/hpux-stdint.h b/gcc/config/hpux-stdint.h
new file mode 100644
index 000000000..b06813b47
--- /dev/null
+++ b/gcc/config/hpux-stdint.h
@@ -0,0 +1,34 @@
+
+/* These should be correct for ia64-hp-hpux11.23.  */
+
+#define SIG_ATOMIC_TYPE "unsigned int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "int"
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned int"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
diff --git a/gcc/config/hpux11.opt b/gcc/config/hpux11.opt
new file mode 100644
index 000000000..a7541477f
--- /dev/null
+++ b/gcc/config/hpux11.opt
@@ -0,0 +1,33 @@
+; HP-UX 11 options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+mt
+Target RejectNegative
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/i386/abmintrin.h b/gcc/config/i386/abmintrin.h
new file mode 100644
index 000000000..9d87f5745
--- /dev/null
+++ b/gcc/config/i386/abmintrin.h
@@ -0,0 +1,55 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <abmintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __ABM__
+# error "ABM instruction set not enabled"
+#endif /* __ABM__ */
+
+#ifndef _ABMINTRIN_H_INCLUDED
+#define _ABMINTRIN_H_INCLUDED
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt16 (unsigned short __X)
+{
+  return __builtin_clzs (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt (unsigned int __X)
+{
+  return __builtin_clz (__X);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt64 (unsigned long __X)
+{
+  return __builtin_clzl (__X);
+}
+#endif
+
+#endif /* _ABMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h
new file mode 100644
index 000000000..3647b3193
--- /dev/null
+++ b/gcc/config/i386/ammintrin.h
@@ -0,0 +1,88 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the AMD Programmers
+   Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4A__
+# error "SSE4A instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+  __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+  __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
+{
+  return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
+}
+#else
+#define _mm_extracti_si64(X, I, L)					\
+  ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X),		\
+				    (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+  return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
+{
+  return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
+}
+#else
+#define _mm_inserti_si64(X, Y, I, L)					\
+  ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X),		\
+				      (__v2di)(__m128i)(Y),		\
+				      (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+#endif /* __SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md
new file mode 100644
index 000000000..2896a154d
--- /dev/null
+++ b/gcc/config/i386/athlon.md
@@ -0,0 +1,1187 @@
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006,
+;; 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; AMD Athlon Scheduling
+;;
+;; The Athlon does contain three pipelined FP units, three integer units and
+;; three address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line. So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available. They can decode three DirectPath instructions or one VectorPath
+;; instruction per cycle.
+;; Decoded macro instructions are then passed to 72 entry instruction control
+;; unit, that passes
+;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+
+(define_attr "athlon_decode" "direct,vector,double"
+  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
+	   (const_string "vector")
+         (and (eq_attr "type" "push")
+              (match_operand 1 "memory_operand" ""))
+	   (const_string "vector")
+         (and (eq_attr "type" "fmov")
+	      (and (eq_attr "memory" "load,store")
+		   (eq_attr "mode" "XF")))
+	   (const_string "vector")]
+	(const_string "direct")))
+
+(define_attr "amdfam10_decode" "direct,vector,double"
+  (const_string "direct"))
+;;
+;;           decode0 decode1 decode2
+;;                 \    |   /
+;;    instruction control unit (72 entry scheduler)
+;;                |                        |
+;;      integer scheduler (18)         stack map
+;;     /  |    |    |    |   \        stack rename
+;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
+;;    |  agu0  |   agu1      agu2    register file
+;;    |      \ |    |       /         |     |     |
+;;     \      /\    |     /         fadd  fmul  fstore
+;;       \  /    \  |   /           fadd  fmul  fstore
+;;       imul  load/store (2x)      fadd  fmul  fstore
+
+(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_cpu_unit "athlon-decodev" "athlon")
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow troughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too. Vector decoded instructions then can't be issued when
+;; modeled as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "athlon-decode2" "athlon-decode0")
+(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
+(define_reservation "athlon-vector" "nothing,athlon-decodev")
+(define_reservation "athlon-direct0" "nothing,athlon-decode0")
+(define_reservation "athlon-direct" "nothing,
+				     (athlon-decode0 | athlon-decode1
+				     | athlon-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
+				     | (nothing,(athlon-decode0 + athlon-decode1))
+				     | (nothing,(athlon-decode1 + athlon-decode2)))")
+
+;; Agu and ieu unit results in extremely large automatons and
+;; in our approximation they are hardly filled in.  Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations.  Skip the models.
+
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+(define_cpu_unit "athlon-ieu0" "athlon")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing")
+
+(define_cpu_unit "athlon-mult" "athlon_mult")
+
+(define_cpu_unit "athlon-load0" "athlon_load")
+(define_cpu_unit "athlon-load1" "athlon_load")
+(define_reservation "athlon-load" "athlon-agu,
+				   (athlon-load0 | athlon-load1),nothing")
+;; 128bit SSE instructions issue two loads at once
+(define_reservation "athlon-load2" "athlon-agu,
+				   (athlon-load0 + athlon-load1),nothing")
+
+(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
+;; 128bit SSE instructions issue two stores at once
+(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
+
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for Athlon and 11 for K8
+;; Compensate the difference for Athlon because it results in significantly
+;; smaller automata.
+(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
+(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
+
+
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
+
+
+;; Jump instructions are executed in the branch unit completely transparent to us
+(define_insn_reservation "athlon_branch" 0
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "ibr"))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_call" 0
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "type" "call,callv"))
+			 "athlon-vector,athlon-ieu")
+(define_insn_reservation "athlon_call_amdfam10" 0
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "call,callv"))
+			 "athlon-double,athlon-ieu")
+
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "push"))
+			 "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "type" "pop"))
+			 "athlon-vector,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_pop_k8" 3
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (eq_attr "type" "pop"))
+			 "athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_pop_amdfam10" 3
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "pop"))
+			 "athlon-direct,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "leave"))
+			 "athlon-vector,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave_k8" 3
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (eq_attr "type" "leave"))
+			 "athlon-double,(athlon-ieu+athlon-load)")
+
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "type" "lea"))
+			 "athlon-direct,athlon-agu,nothing")
+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
+(define_insn_reservation "athlon_lea_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "lea"))
+			 "athlon-direct,athlon-agu,nothing")
+
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
+;; ??? Widening multiply is vector or double.
+(define_insn_reservation "athlon_imul_k8_DI" 4
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_k8" 3
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_mem" 8
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8_DI" 7
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8" 6
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
+
+;; Idiv cannot execute in parallel with other instructions.  Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
+;; of the other code
+;; Using the same heuristics for amdfam10 as K8 with idiv
+
+(define_insn_reservation "athlon_idiv" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none,unknown")))
+			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
+(define_insn_reservation "athlon_idiv_mem" 9
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "load,both")))
+			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
+;; The parallelism of string instructions is not documented.  Model it same way
+;; as idiv to create smaller automata.  This probably does not matter much.
+;; Using the same heuristics for amdfam10 as K8 with idiv
+(define_insn_reservation "athlon_str" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "athlon-vector,athlon-load,athlon-ieu0*6")
+
+(define_insn_reservation "athlon_idirect" 1
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_idirect_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu,athlon-ieu")
+
+(define_insn_reservation "athlon_idirect_loadmov" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-load")
+
+(define_insn_reservation "athlon_idirect_load" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+
+(define_insn_reservation "athlon_idirect_movstore" 1
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "athlon-direct,athlon-agu,athlon-store")
+
+(define_insn_reservation "athlon_idirect_both" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-direct,athlon-load,
+			  athlon-ieu,athlon-store,
+			  athlon-store")
+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-direct,athlon-load,
+			  athlon-ieu,athlon-store,
+			  athlon-store")
+
+(define_insn_reservation "athlon_ivector_both" 6
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-vector,athlon-load,
+			  athlon-ieu,
+			  athlon-ieu,
+			  athlon-store")
+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-vector,athlon-load,
+			  athlon-ieu,
+			  athlon-ieu,
+			  athlon-store")
+
+(define_insn_reservation "athlon_idirect_store" 1
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-direct,(athlon-ieu+athlon-agu),
+			  athlon-store")
+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-direct,(athlon-ieu+athlon-agu),
+			  athlon-store")
+
+(define_insn_reservation "athlon_ivector_store" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+			  athlon-store")
+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+			  athlon-store")
+
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
+(define_insn_reservation "athlon_fldxf_k8" 13
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
+;; Assume superforwarding to take place so effective latency of fany op is 0.
+(define_insn_reservation "athlon_fld" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_fld_k8" 2
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+
+(define_insn_reservation "athlon_fstxf" 10
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
+(define_insn_reservation "athlon_fstxf_k8" 8
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
+(define_insn_reservation "athlon_fst" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fst_k8" 2
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fist" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fmov" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fmov"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fadd_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fop"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fmul_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fmul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fsgn"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load_k8" 13
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "fdiv"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_k8" 11
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (eq_attr "type" "fdiv"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fpspc" 100
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fpspc"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load" 7
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fcmov" 7
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "fcmov"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load_k8" 17
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_k8" 15
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (eq_attr "type" "fcmov"))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+;; fcomi is vector decoded by uses only one pipe.
+(define_insn_reservation "athlon_fcomi_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "athlon_decode" "vector")
+				        (eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcomi_load_k8" 5
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "athlon_decode" "vector")
+				        (eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "athlon_decode" "vector")
+				   (eq_attr "type" "fcmp")))
+			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load_k8" 4
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (eq_attr "type" "fcmp"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+;; Never seen by the scheduler because we still don't do post reg-stack
+;; scheduling.
+;(define_insn_reservation "athlon_fxch" 2
+;			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+;			      (eq_attr "type" "fxch"))
+;			 "athlon-direct,athlon-fpsched,athlon-fany")
+
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+
+(define_insn_reservation "athlon_movlpd_load" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand" "")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_movlpd_load_k8" 2
+			 (and (eq_attr "cpu" "k8")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand" "")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_movsd_load_generic64" 2
+			 (and (eq_attr "cpu" "generic64")
+			      (and (eq_attr "type" "ssemov")
+				   (match_operand:DF 1 "memory_operand" "")))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
+(define_insn_reservation "athlon_movaps_load_k8" 2
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_movaps_load" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
+(define_insn_reservation "athlon_movss_load" 1
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
+(define_insn_reservation "athlon_movss_load_k8" 1
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
+(define_insn_reservation "athlon_mmxsseld" 0
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_mmxsseld_k8" 2
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
+;; loads  generated are direct path, latency of 2 and do not use any FP
+;; executions units. No separate entries for movlpx/movhpx loads, which
+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
+;; as they will not be generated.
+(define_insn_reservation "athlon_sseld_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8")
+;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
+;; and can use any  FP executions units
+(define_insn_reservation "athlon_mmxld_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "mmxmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8, athlon-fany")
+(define_insn_reservation "athlon_mmxssest" 3
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_k8" 3
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_short" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+;; On AMDFAM10 all double, single and integer packed SSEx data stores
+;; generated are all double path, latency of 2 and use the FSTORE FP
+;; execution unit. No entries separate for movupx/movdqu, which are
+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
+;; as they will not be generated.
+(define_insn_reservation "athlon_ssest_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
+;; data stores generated are all direct path, latency of 2 and use
+;; the FSTORE FP execution unit
+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_movaps_k8" 2
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "mode" "V4SF,V2DF,TI")))
+			 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
+(define_insn_reservation "athlon_movaps" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "mode" "V4SF,V2DF,TI")))
+			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
+(define_insn_reservation "athlon_mmxssemov" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "type" "mmxmov,ssemov"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "mmxmul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "type" "mmxmul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 3
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "unit" "mmx")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fpload,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "unit" "mmx"))
+			 "athlon-direct,athlon-fpsched,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well.  The latency
+;; is same as for i387 operations for scalar operations
+
+(define_insn_reservation "athlon_sselog_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_k8" 5
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
+(define_insn_reservation "athlon_sselog" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "sselog,sselog1"))
+			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
+(define_insn_reservation "athlon_sselog_k8" 3
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (eq_attr "type" "sselog,sselog1"))
+			 "athlon-double,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_sselog_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "sselog,sselog1"))
+			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
+
+;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
+(define_insn_reservation "athlon_ssecmp_load" 2
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecmp")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp_load_k8" 4
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (and (eq_attr "mode" "SF,DF,DI,TI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "mode" "SF,DF,DI,TI")))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector" 3
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_k8" 3
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (eq_attr "type" "ssecomi"))
+			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
+			 (and (eq_attr "cpu" "amdfam10")
+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
+			      (eq_attr "type" "ssecomi"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseadd")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "sseadd")
+				   (and (eq_attr "mode" "SF,DF,DI")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "mode" "SF,DF,DI")))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_k8" 7
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "sseadd"))
+			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_k8" 5
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (eq_attr "type" "sseadd"))
+			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "sseadd"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately.  Assume that the mode is always set to the
+;; destination one and athlon_decode is set to the K8 versions.
+
+;; cvtss2sd
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
+			 (and (eq_attr "cpu" "k8,athlon,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(and (eq_attr "mode" "DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(eq_attr "mode" "DF"))))
+			 "athlon-direct,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(eq_attr "mode" "DF"))))
+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
+;; cvtps2pd.  Model same way the other double decoded FP conversions.
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
+			 (and (eq_attr "cpu" "k8,athlon,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "V2DF,V4SF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "direct")
+					(and (eq_attr "mode" "V2DF,V4SF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
+			 (and (eq_attr "cpu" "k8,athlon,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(eq_attr "mode" "V2DF,V4SF,TI"))))
+			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "direct")
+					(eq_attr "mode" "V2DF,V4SF,TI"))))
+			 "athlon-direct,athlon-fpsched,athlon-fstore")
+;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
+;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
+			 (and (eq_attr "cpu" "athlon,k8")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "direct")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2ss mem, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
+			 (and (eq_attr "cpu" "k8,athlon,generic64")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2ss reg, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
+			 (and (eq_attr "cpu" "k8,athlon,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
+;; ??? Why it is fater than cvtsd2ss?
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
+			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "vector")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
+;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "athlon_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
+
+
+(define_insn_reservation "athlon_ssemul_load" 4
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemul")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_ssemul_load_k8" 6
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "mode" "SF,DF")))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_k8" 7
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector" 5
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_k8" 5
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")
+;; divsd timings.  divss is faster
+(define_insn_reservation "athlon_ssediv_load" 20
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fpload,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv_load_k8" 22
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv" 20
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "mode" "SF,DF")))
+			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector_load" 39
+			 (and (eq_attr "cpu" "athlon")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_k8" 35
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector" 39
+			 (and (eq_attr "cpu" "athlon")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-vector,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_k8" 39
+			 (and (eq_attr "cpu" "k8,generic64")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-double,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-direct,athlon-fmul*17")
+(define_insn_reservation "athlon_sseins_amdfam10" 5
+                         (and (eq_attr "cpu" "amdfam10")
+                              (and (eq_attr "type" "sseins")
+                                   (eq_attr "mode" "TI")))
+                         "athlon-vector,athlon-fpsched,athlon-faddmul")
diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md
new file mode 100644
index 000000000..3c2b95758
--- /dev/null
+++ b/gcc/config/i386/atom.md
@@ -0,0 +1,796 @@
+;; Atom Scheduling
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Atom is an in-order core with two integer pipelines.
+
+
+(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" 
+  (const_string "other"))
+
+(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
+  (const_string "other"))
+
+(define_automaton "atom")
+
+;;  Atom has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "atom-port-0,atom-port-1" "atom")
+
+;;  EU: Execution Unit
+;;  Atom EUs are connected by port 0 or port 1. 
+
+(define_cpu_unit "atom-eu-0, atom-eu-1,
+                  atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
+                  "atom")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
+
+;;; Some instructions is dual-pipe execution, need both ports
+;;; Complex multi-op macro-instructoins need both ports and all EUs
+(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
+(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + 
+                                    atom-imul-1 + atom-imul-2 + atom-imul-3 +
+                                    atom-imul-4)")
+
+;;; Most of simple instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
+(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
+(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
+
+;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
+(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
+(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
+
+;;; fadd can has 5 cycles latency depends on instruction forms
+(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
+
+;;; imul insn has 5 cycles latency
+(define_reservation "atom-imul-32" 
+                    "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, 
+                     atom-port-0")
+;;; imul instruction excludes other non-FP instructions.
+(exclusion_set "atom-eu-0, atom-eu-1" 
+               "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on 
+;;; instruction forms
+(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
+(define_reservation "atom-dual-2c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
+(define_reservation "atom-dual-5c"
+                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
+
+(define_insn_reservation  "atom_other" 9
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "!jeu")))
+  "atom-complex, atom-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation  "atom_other_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "other")
+            (eq_attr "atom_unit" "jeu")))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_multi" 9
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "multi"))
+  "atom-complex, atom-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Normal alu insns without carry
+(define_insn_reservation  "atom_alu_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                 (eq_attr "use_carry" "0"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "none")
+                 (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation  "atom_alu_carry_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu")
+            (and (eq_attr "memory" "!none")
+                (eq_attr "use_carry" "1"))))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_alu1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "alu1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_negnot_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "negnot")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_imov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32
+(define_insn_reservation  "atom_imovx" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation  "atom_imovx_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (and (match_operand:HI 0 "register_operand")
+                           (match_operand:HI 1 "general_operand"))
+                      (and (match_operand:SI 0 "register_operand")
+                           (match_operand:SI 1 "general_operand"))))))
+  "atom-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation  "atom_imovx_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation  "atom_imovx_2_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (eq_attr "memory" "!none")
+                 (ior (match_operand:QI 0 "register_operand")
+                      (ior (and (match_operand:SI 0 "register_operand")
+                                (not (match_operand:SI 1 "general_operand")))
+                           (match_operand:DI 0 "register_operand"))))))
+  "atom-simple-0")
+
+;; 16<-8
+(define_insn_reservation  "atom_imovx_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imovx")
+            (and (match_operand:HI 0 "register_operand")
+                 (match_operand:QI 1 "general_operand"))))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_lea" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "!HI")))
+  "atom-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation  "atom_lea_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "lea")
+            (eq_attr "mode" "HI")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_incdec" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_incdec_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "incdec")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation  "atom_ishift" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation  "atom_ishift_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+  "atom-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
+(define_insn_reservation  "atom_ishift_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift")
+            (eq_attr "prefix_0f" "1")))
+  "atom-complex, atom-all-eu*6")
+
+(define_insn_reservation  "atom_ishift1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_ishift1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ishift1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_rotate1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "rotate1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_imul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+(define_insn_reservation  "atom_imul_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+  "atom-imul-32")
+
+;; latency set to 10 as common 64x64 imul
+(define_insn_reservation  "atom_imul_3" 10
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "imul")
+            (eq_attr "mode" "!SI")))
+  "atom-complex, atom-all-eu*9")
+
+(define_insn_reservation  "atom_idiv" 65
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "idiv"))
+  "atom-complex, atom-all-eu*32, nothing*32")
+
+(define_insn_reservation  "atom_icmp" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmp_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmp")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_test_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "test")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ibr" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "!load")))
+  "atom-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation  "atom_ibr_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ibr")
+            (eq_attr "memory" "load")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_setcc" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "!store")))
+  "atom-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation  "atom_setcc_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "setcc")
+            (eq_attr "memory" "store")))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_icmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_icmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "icmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_push" 2
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "push"))
+  "atom-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "DI")))
+  "atom-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation  "atom_pop_2" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "pop")
+            (eq_attr "mode" "!DI")))
+  "atom-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation  "atom_call" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "call"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_callv" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "callv"))
+  "atom-dual-1c")
+
+(define_insn_reservation  "atom_leave" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "leave"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_str" 3
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "str"))
+  "atom-complex, atom-all-eu*2")
+
+(define_insn_reservation  "atom_sselog" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_sselog1" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "none")))
+  "atom-simple-0")
+
+(define_insn_reservation  "atom_sselog1_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sselog1")
+            (eq_attr "memory" "!none")))
+  "atom-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation  "atom_sseiadd" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "!simul")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation  "atom_sseiadd_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "DI")))))
+  "atom-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation  "atom_sseiadd_3" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (and (not (match_operand:V2DI 0 "register_operand"))
+                 (and (eq_attr "atom_unit" "simul" )
+                      (eq_attr "mode" "TI")))))
+  "atom-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation  "atom_sseiadd_4" 6
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseiadd")
+            (ior (match_operand:V2DI 0 "register_operand")
+                 (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*5")
+
+;; if immediate op. 
+(define_insn_reservation  "atom_sseishft" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (and (eq_attr "atom_unit" "!sishuf")
+                 (match_operand 2 "immediate_operand"))))
+  "atom-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation  "atom_sseishft_2" 1
+  (and (eq_attr "cpu" "atom")
+       (ior (eq_attr "type" "sseishft1")
+	    (and (eq_attr "type" "sseishft")
+		 (and (eq_attr "atom_unit" "sishuf")
+		      (match_operand 2 "immediate_operand")))))
+  "atom-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation  "atom_sseishft_3" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseishft")
+            (not (match_operand 2 "immediate_operand"))))
+  "atom-complex, atom-all-eu")
+
+(define_insn_reservation  "atom_sseimul" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "sseimul"))
+  "atom-simple-0")
+
+;; rcpss or rsqrtss
+(define_insn_reservation  "atom_sse" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+  "atom-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation  "atom_sse_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "movdup")))
+  "atom-simple-0")
+
+;; lfence
+(define_insn_reservation  "atom_sse_3" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (eq_attr "atom_sse_attr" "lfence")))
+  "atom-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation  "atom_sse_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (eq_attr "atom_sse_attr" "fence")
+                 (eq_attr "atom_sse_attr" "prefetch"))))
+  "atom-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation  "atom_sse_5" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sse")
+            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+                      (eq_attr "atom_sse_attr" "mxcsr"))
+                 (and (eq_attr "atom_sse_attr" "rcp")
+                      (eq_attr "mode" "V4SF")))))
+  "atom-complex, atom-all-eu*6")
+
+;; xmm->xmm
+(define_insn_reservation  "atom_ssemov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
+  "atom-simple-either")
+
+;; reg->xmm
+(define_insn_reservation  "atom_ssemov_2" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
+  "atom-simple-0")
+
+;; xmm->reg
+(define_insn_reservation  "atom_ssemov_3" 3
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
+  "atom-eu-0-3-1")
+
+;; mov mem
+(define_insn_reservation  "atom_ssemov_4" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+  "atom-simple-0")
+
+;; movu mem
+(define_insn_reservation  "atom_ssemov_5" 2
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemov")
+            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+  "atom-complex, atom-all-eu")
+
+;; no memory simple
+(define_insn_reservation  "atom_sseadd" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-fadd-5c")
+
+;; memory simple
+(define_insn_reservation  "atom_sseadd_mem" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (and (eq_attr "memory" "!none")
+                 (and (eq_attr "mode" "!V2DF")
+                      (eq_attr "atom_unit" "!complex")))))
+  "atom-dual-5c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation  "atom_sseadd_3" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseadd")
+            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+  "atom-complex, atom-all-eu*7")
+
+;; Except dppd/dpps
+(define_insn_reservation  "atom_ssemul" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "!SF")))
+  "atom-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation  "atom_ssemul_2" 4
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssemul")
+            (eq_attr "mode" "SF")))
+  "atom-fmul-4c")
+
+(define_insn_reservation  "atom_ssecmp" 1
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecmp"))
+  "atom-simple-either")
+
+(define_insn_reservation  "atom_ssecomi" 10
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssecomi"))
+  "atom-complex, atom-all-eu*9")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "register_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "register_operand")))))
+  "atom-fadd-5c")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation  "atom_ssecvt_2" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (ior (and (match_operand:V2SI 0 "register_operand")
+                      (match_operand:V4SF 1 "memory_operand"))
+                 (and (match_operand:V4SF 0 "register_operand")
+                      (match_operand:V2SI 1 "memory_operand")))))
+  "atom-dual-5c")
+
+;; otherwise. 7 cycles average for cvtss2sd
+(define_insn_reservation  "atom_ssecvt_3" 7
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "ssecvt")
+            (not (ior (and (match_operand:V2SI 0 "register_operand")
+                           (match_operand:V4SF 1 "nonimmediate_operand"))
+                      (and (match_operand:V4SF 0 "register_operand")
+                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
+  "atom-complex, atom-all-eu*6")
+
+;; memory and cvtsi2sd
+(define_insn_reservation  "atom_sseicvt" 5
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (and (match_operand:V2DF 0 "register_operand")
+                 (match_operand:SI 1 "memory_operand"))))
+  "atom-dual-5c")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation  "atom_sseicvt_2" 8
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "sseicvt")
+            (not (and (match_operand:V2DF 0 "register_operand")
+                      (match_operand:SI 1 "memory_operand")))))
+  "atom-complex, atom-all-eu*7")
+
+(define_insn_reservation  "atom_ssediv" 62
+  (and (eq_attr "cpu" "atom")
+       (eq_attr "type" "ssediv"))
+  "atom-complex, atom-all-eu*12, nothing*49")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "none")))
+  "atom-simple-either")
+
+;; simple for fmov
+(define_insn_reservation  "atom_fmov_mem" 1
+  (and (eq_attr "cpu" "atom")
+       (and (eq_attr "type" "fmov")
+            (eq_attr "memory" "!none")))
+  "atom-simple-either")
+
+;; Define bypass here
+
+;; There will be no stall from lea to non-mem EX insns
+(define_bypass 0 "atom_lea"
+                 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec, atom_setcc, atom_icmov, atom_pop")
+
+(define_bypass 0 "atom_lea"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "!ix86_agi_dependent")
+
+;; There will be 3 cycles stall from EX insns to AGAN insns LEA
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_lea")
+
+;; There will be 3 cycles stall from EX insns to insns need addr calculation
+(define_bypass 4 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                  "ix86_agi_dependent")
+
+;; Stall from imul to lea is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
+
+;; Stall from imul to memory address is 8 cycles.
+(define_bypass 9 "atom_imul, atom_imul_mem" 
+                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
+                  atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
+                  atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
+                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
+                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
+                  "ix86_agi_dependent")
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
+                  atom_alu1, atom_negnot, atom_incdec, atom_ishift,
+                  atom_ishift1, atom_rotate, atom_rotate1"
+                 "atom_icmov, atom_alu_carry")
+
+;; lea to shift count stall is 2 cycles
+(define_bypass 3 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "atom_lea"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
+                 "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "atom_alu_carry,
+                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
+                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
+                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
+                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
+                  atom_imovx_mem, atom_imovx_2_mem,
+                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
+                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
+                  atom_ishift_mem, atom_ishift1_mem, 
+                  atom_rotate_mem, atom_rotate1_mem"
+                 "ix86_dep_by_shift_count")
diff --git a/gcc/config/i386/att.h b/gcc/config/i386/att.h
new file mode 100644
index 000000000..c16b2f922
--- /dev/null
+++ b/gcc/config/i386/att.h
@@ -0,0 +1,92 @@
+/* Definitions for AT&T assembler syntax for the Intel 80386.
+   Copyright (C) 1988, 1996, 2000, 2001, 2002, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Define the syntax of instructions and addresses.  */
+
+/* Prefix for internally generated assembler labels.  */
+#define LPREFIX ".L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.value\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"  /* Should not be used for 32bit compilation.  */
+
+/* How to output an ASCII string constant.  */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, PTR, SIZE)			\
+do								\
+{ size_t i = 0, limit = (SIZE); 				\
+  while (i < limit)						\
+    { if (i%10 == 0) { if (i!=0) putc ('\n', (FILE));		\
+		       fputs (ASM_BYTE, (FILE)); }		\
+      else putc (',', (FILE));					\
+      fprintf ((FILE), "0x%x", ((PTR)[i++] & 0377)) ;}		\
+      putc ('\n', (FILE));					\
+} while (0)
+
+/* Output at beginning of assembler file.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf ((FILE), "\t.set .,.+%u\n", (int)(SIZE))
+
+/* Can't use ASM_OUTPUT_SKIP in text section; it doesn't leave 0s.  */
+
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* Define the syntax of labels and symbol definitions/declarations.  */
+
+/* The prefix to add for compiler private assembler symbols.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* This is how to store into the string BUF
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER)	\
+  sprintf ((BUF), LOCAL_LABEL_PREFIX "%s%ld", (PREFIX), (long)(NUMBER))
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
new file mode 100644
index 000000000..6d4213dc8
--- /dev/null
+++ b/gcc/config/i386/avxintrin.h
@@ -0,0 +1,1426 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 11.0.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+/* Internal data types for implementing the intrinsics.  */
+typedef double __v4df __attribute__ ((__vector_size__ (32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m256 __attribute__ ((__vector_size__ (32),
+				     __may_alias__));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+					  __may_alias__));
+typedef double __m256d __attribute__ ((__vector_size__ (32),
+				       __may_alias__));
+
+/* Compare predicates for scalar and packed compare intrinsics.  */
+
+/* Equal (ordered, non-signaling)  */
+#define _CMP_EQ_OQ	0x00
+/* Less-than (ordered, signaling)  */
+#define _CMP_LT_OS	0x01
+/* Less-than-or-equal (ordered, signaling)  */
+#define _CMP_LE_OS	0x02
+/* Unordered (non-signaling)  */
+#define _CMP_UNORD_Q	0x03
+/* Not-equal (unordered, non-signaling)  */
+#define _CMP_NEQ_UQ	0x04
+/* Not-less-than (unordered, signaling)  */
+#define _CMP_NLT_US	0x05
+/* Not-less-than-or-equal (unordered, signaling)  */
+#define _CMP_NLE_US	0x06
+/* Ordered (nonsignaling)   */
+#define _CMP_ORD_Q	0x07
+/* Equal (unordered, non-signaling)  */
+#define _CMP_EQ_UQ	0x08
+/* Not-greater-than-or-equal (unordered, signaling)  */
+#define _CMP_NGE_US	0x09
+/* Not-greater-than (unordered, signaling)  */
+#define _CMP_NGT_US	0x0a
+/* False (ordered, non-signaling)  */
+#define _CMP_FALSE_OQ	0x0b
+/* Not-equal (ordered, non-signaling)  */
+#define _CMP_NEQ_OQ	0x0c
+/* Greater-than-or-equal (ordered, signaling)  */
+#define _CMP_GE_OS	0x0d
+/* Greater-than (ordered, signaling)  */
+#define _CMP_GT_OS	0x0e
+/* True (unordered, non-signaling)  */
+#define _CMP_TRUE_UQ	0x0f
+/* Equal (ordered, signaling)  */
+#define _CMP_EQ_OS	0x10
+/* Less-than (ordered, non-signaling)  */
+#define _CMP_LT_OQ	0x11
+/* Less-than-or-equal (ordered, non-signaling)  */
+#define _CMP_LE_OQ	0x12
+/* Unordered (signaling)  */
+#define _CMP_UNORD_S	0x13
+/* Not-equal (unordered, signaling)  */
+#define _CMP_NEQ_US	0x14
+/* Not-less-than (unordered, non-signaling)  */
+#define _CMP_NLT_UQ	0x15
+/* Not-less-than-or-equal (unordered, non-signaling)  */
+#define _CMP_NLE_UQ	0x16
+/* Ordered (signaling)  */
+#define _CMP_ORD_S	0x17
+/* Equal (unordered, signaling)  */
+#define _CMP_EQ_US	0x18
+/* Not-greater-than-or-equal (unordered, non-signaling)  */
+#define _CMP_NGE_UQ	0x19
+/* Not-greater-than (unordered, non-signaling)  */
+#define _CMP_NGT_UQ	0x1a
+/* False (ordered, signaling)  */
+#define _CMP_FALSE_OS	0x1b
+/* Not-equal (ordered, signaling)  */
+#define _CMP_NEQ_OS	0x1c
+/* Greater-than-or-equal (ordered, non-signaling)  */
+#define _CMP_GE_OQ	0x1d
+/* Greater-than (ordered, non-signaling)  */
+#define _CMP_GT_OQ	0x1e
+/* True (unordered, signaling)  */
+#define _CMP_TRUE_US	0x1f
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Double/single precision floating point blend instructions - select
+   data from 2 sources using constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
+{
+  return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
+					      (__v4df)__Y,
+					      __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
+{
+  return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
+					     (__v8sf)__Y,
+					     __M);
+}
+#else
+#define _mm256_blend_pd(X, Y, M)					\
+  ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X),		\
+					(__v4df)(__m256d)(Y), (int)(M)))
+
+#define _mm256_blend_ps(X, Y, M)					\
+  ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X),		\
+				       (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
+{
+  return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
+					       (__v4df)__Y,
+					       (__v4df)__M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
+{
+  return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
+					      (__v8sf)__Y,
+					      (__v8sf)__M);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+   of result.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
+{
+  return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
+					  (__v8sf)__Y,
+					  __M);
+}
+#else
+#define _mm256_dp_ps(X, Y, M)						\
+  ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X),		\
+				    (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_pd (__m256d __X, __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_ps (__m256 __X, __m256 __Y)
+{
+  return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_pd (__m256d __X, __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_ps (__m256 __X, __m256 __Y)
+{
+  return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
+{
+  return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
+					     __mask);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
+{
+  return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
+					    __mask);
+}
+#else
+#define _mm256_shuffle_pd(A, B, N)					\
+  ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A),		\
+				      (__v4df)(__m256d)(B), (int)(N)))
+
+#define _mm256_shuffle_ps(A, B, N)					\
+  ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A),		\
+				      (__v8sf)(__m256)(B), (int)(N)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
+{
+  return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
+{
+  return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
+{
+  return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
+					    __P);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
+{
+  return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
+					   __P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
+{
+  return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
+{
+  return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+#else
+#define _mm_cmp_pd(X, Y, P)						\
+  ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X),		\
+				   (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ps(X, Y, P)						\
+  ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X),			\
+				  (__v4sf)(__m128)(Y), (int)(P)))
+
+#define _mm256_cmp_pd(X, Y, P)						\
+  ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X),		\
+				      (__v4df)(__m256d)(Y), (int)(P)))
+
+#define _mm256_cmp_ps(X, Y, P)						\
+  ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X),		\
+				     (__v8sf)(__m256)(Y), (int)(P)))
+
+#define _mm_cmp_sd(X, Y, P)						\
+  ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X),		\
+				   (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ss(X, Y, P)						\
+  ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X),			\
+				  (__v4sf)(__m128)(Y), (int)(P)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_pd (__m128i __A)
+{
+  return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ps (__m256i __A)
+{
+  return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_ps (__m256d __A)
+{
+  return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi32 (__m256 __A)
+{
+  return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_pd (__m128 __A)
+{
+  return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi32 (__m256d __A)
+{
+  return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi32 (__m256d __A)
+{
+  return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi32 (__m256 __A)
+{
+  return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_pd (__m256d __X, const int __N)
+{
+  return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_ps (__m256 __X, const int __N)
+{
+  return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_si256 (__m256i __X, const int __N)
+{
+  return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi32 (__m256i __X, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+  return _mm_extract_epi32 (__Y, __N % 4);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi16 (__m256i __X, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+  return _mm_extract_epi16 (__Y, __N % 8);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi8 (__m256i __X, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+  return _mm_extract_epi8 (__Y, __N % 16);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi64 (__m256i __X, const int __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+  return _mm_extract_epi64 (__Y, __N % 2);
+}
+#endif
+#else
+#define _mm256_extractf128_pd(X, N)					\
+  ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X),	\
+						(int)(N)))
+
+#define _mm256_extractf128_ps(X, N)					\
+  ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X),	\
+					       (int)(N)))
+
+#define _mm256_extractf128_si256(X, N)					\
+  ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X),	\
+						(int)(N)))
+
+#define _mm256_extract_epi32(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
+      _mm_extract_epi32 (__Y, (N) % 4);					\
+    }))
+
+#define _mm256_extract_epi16(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
+      _mm_extract_epi16 (__Y, (N) % 8);					\
+    }))
+
+#define _mm256_extract_epi8(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
+      _mm_extract_epi8 (__Y, (N) % 16);					\
+    }))
+
+#ifdef __x86_64__
+#define _mm256_extract_epi64(X, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
+      _mm_extract_epi64 (__Y, (N) % 2);					\
+    }))
+#endif
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroall (void)
+{
+  __builtin_ia32_vzeroall ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroupper (void)
+{
+  __builtin_ia32_vzeroupper ();
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_pd (__m128d __A, __m128i __C)
+{
+  return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
+						(__v2di)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_pd (__m256d __A, __m256i __C)
+{
+  return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
+						   (__v4di)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_ps (__m128 __A, __m128i __C)
+{
+  return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
+					       (__v4si)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_ps (__m256 __A, __m256i __C)
+{
+  return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
+						  (__v8si)__C);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_pd (__m128d __X, const int __C)
+{
+  return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_pd (__m256d __X, const int __C)
+{
+  return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_ps (__m128 __X, const int __C)
+{
+  return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_ps (__m256 __X, const int __C)
+{
+  return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
+}
+#else
+#define _mm_permute_pd(X, C)						\
+  ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
+
+#define _mm256_permute_pd(X, C)						\
+  ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X),	(int)(C)))
+
+#define _mm_permute_ps(X, C)						\
+  ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
+
+#define _mm256_permute_ps(X, C)						\
+  ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
+{
+  return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
+						    (__v4df)__Y,
+						    __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
+{
+  return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
+						   (__v8sf)__Y,
+						   __C);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
+{
+  return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
+						    (__v8si)__Y,
+						    __C);
+}
+#else
+#define _mm256_permute2f128_pd(X, Y, C)					\
+  ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X),	\
+					      (__v4df)(__m256d)(Y),	\
+					      (int)(C)))
+
+#define _mm256_permute2f128_ps(X, Y, C)					\
+  ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X),	\
+					     (__v8sf)(__m256)(Y),	\
+					     (int)(C)))
+
+#define _mm256_permute2f128_si256(X, Y, C)				\
+  ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X),	\
+					      (__v8si)(__m256i)(Y),	\
+					      (int)(C)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_ss (float const *__X)
+{
+  return (__m128) __builtin_ia32_vbroadcastss (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_sd (double const *__X)
+{
+  return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ss (float const *__X)
+{
+  return (__m256) __builtin_ia32_vbroadcastss256 (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_pd (__m128d const *__X)
+{
+  return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ps (__m128 const *__X)
+{
+  return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
+{
+  return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
+						     (__v2df)__Y,
+						     __O);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
+{
+  return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
+						    (__v4sf)__Y,
+						    __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
+{
+  return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
+						     (__v4si)__Y,
+						     __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+  __Y = _mm_insert_epi32 (__Y, __D, __N % 4);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+  __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+  __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
+}
+
+#ifdef __x86_64__
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi64 (__m256i __X, long long __D, int const __N)
+{
+  __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+  __Y = _mm_insert_epi64 (__Y, __D, __N % 2);
+  return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
+}
+#endif
+#else
+#define _mm256_insertf128_pd(X, Y, O)					\
+  ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X),	\
+					       (__v2df)(__m128d)(Y),	\
+					       (int)(O)))
+
+#define _mm256_insertf128_ps(X, Y, O)					\
+  ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X),	\
+					      (__v4sf)(__m128)(Y),  	\
+					      (int)(O)))
+
+#define _mm256_insertf128_si256(X, Y, O)				\
+  ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X),	\
+					       (__v4si)(__m128i)(Y),	\
+					       (int)(O)))
+
+#define _mm256_insert_epi32(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2);		\
+      __Y = _mm_insert_epi32 (__Y, (D), (N) % 4);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 2);			\
+    }))
+
+#define _mm256_insert_epi16(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3);		\
+      __Y = _mm_insert_epi16 (__Y, (D), (N) % 8);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 3);			\
+    }))
+
+#define _mm256_insert_epi8(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4);		\
+      __Y = _mm_insert_epi8 (__Y, (D), (N) % 16);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 4);			\
+    }))
+
+#ifdef __x86_64__
+#define _mm256_insert_epi64(X, D, N)					\
+  (__extension__							\
+   ({									\
+      __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1);		\
+      __Y = _mm_insert_epi64 (__Y, (D), (N) % 2);			\
+      _mm256_insertf128_si256 ((X), __Y, (N) >> 1);			\
+    }))
+#endif
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_pd (double const *__P)
+{
+  return *(__m256d *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_pd (double *__P, __m256d __A)
+{
+  *(__m256d *)__P = __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ps (float const *__P)
+{
+  return *(__m256 *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ps (float *__P, __m256 __A)
+{
+  *(__m256 *)__P = __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_pd (double const *__P)
+{
+  return (__m256d) __builtin_ia32_loadupd256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_pd (double *__P, __m256d __A)
+{
+  __builtin_ia32_storeupd256 (__P, (__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ps (float const *__P)
+{
+  return (__m256) __builtin_ia32_loadups256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ps (float *__P, __m256 __A)
+{
+  __builtin_ia32_storeups256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_si256 (__m256i const *__P)
+{
+  return *__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_si256 (__m256i *__P, __m256i __A)
+{
+  *__P = __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_si256 (__m256i const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_si256 (__m256i *__P, __m256i __A)
+{
+  __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_pd (double const *__P, __m128i __M)
+{
+  return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
+					      (__v2di)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
+{
+  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_pd (double const *__P, __m256i __M)
+{
+  return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
+						 (__v4di)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
+{
+  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_ps (float const *__P, __m128i __M)
+{
+  return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
+					     (__v4si)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
+{
+  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_ps (float const *__P, __m256i __M)
+{
+  return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
+						(__v8si)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
+{
+  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movehdup_ps (__m256 __X)
+{
+  return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_moveldup_ps (__m256 __X)
+{
+  return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movedup_pd (__m256d __X)
+{
+  return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lddqu_si256 (__m256i const *__P)
+{
+  return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_si256 (__m256i *__A, __m256i __B)
+{
+  __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_pd (double *__A, __m256d __B)
+{
+  __builtin_ia32_movntpd256 (__A, (__v4df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_ps (float *__P, __m256 __A)
+{
+  __builtin_ia32_movntps256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_pd (__m256d __V, const int __M)
+{
+  return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_ps (__m256 __V, const int __M)
+{
+  return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+}
+#else
+#define _mm256_round_pd(V, M) \
+  ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
+
+#define _mm256_round_ps(V, M) \
+  ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
+#endif
+
+#define _mm256_ceil_pd(V)	_mm256_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_pd(V)	_mm256_round_pd ((V), _MM_FROUND_FLOOR)
+#define _mm256_ceil_ps(V)	_mm256_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_ps(V)	_mm256_round_ps ((V), _MM_FROUND_FLOOR)
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_pd (__m256d __A, __m256d __B)
+{
+  return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_ps (__m256 __A, __m256 __B)
+{
+  return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+  return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+  return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_pd (__m256d __M, __m256d __V)
+{
+  return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_pd (__m256d __M, __m256d __V)
+{
+  return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_pd (__m256d __M, __m256d __V)
+{
+  return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_ps (__m256 __M, __m256 __V)
+{
+  return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_ps (__m256 __M, __m256 __V)
+{
+  return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_ps (__m256 __M, __m256 __V)
+{
+  return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_si256 (__m256i __M, __m256i __V)
+{
+  return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_si256 (__m256i __M, __m256i __V)
+{
+  return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_si256 (__m256i __M, __m256i __V)
+{
+  return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_pd (__m256d __A)
+{
+  return __builtin_ia32_movmskpd256 ((__v4df)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_ps (__m256 __A)
+{
+  return __builtin_ia32_movmskps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_pd (void)
+{
+  return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_ps (void)
+{
+  return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+				 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_si256 (void)
+{
+  return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
+}
+
+/* Create the vector [A B C D].  */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_pd (double __A, double __B, double __C, double __D)
+{
+  return __extension__ (__m256d){ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H].  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_ps (float __A, float __B, float __C, float __D,
+	       float __E, float __F, float __G, float __H)
+{
+  return __extension__ (__m256){ __H, __G, __F, __E,
+				 __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H].  */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi32 (int __A, int __B, int __C, int __D,
+		  int __E, int __F, int __G, int __H)
+{
+  return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+					  __D, __C, __B, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
+		  short __q11, short __q10, short __q09, short __q08,
+		  short __q07, short __q06, short __q05, short __q04,
+		  short __q03, short __q02, short __q01, short __q00)
+{
+  return __extension__ (__m256i)(__v16hi){
+    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+  };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi8  (char __q31, char __q30, char __q29, char __q28,
+		  char __q27, char __q26, char __q25, char __q24,
+		  char __q23, char __q22, char __q21, char __q20,
+		  char __q19, char __q18, char __q17, char __q16,
+		  char __q15, char __q14, char __q13, char __q12,
+		  char __q11, char __q10, char __q09, char __q08,
+		  char __q07, char __q06, char __q05, char __q04,
+		  char __q03, char __q02, char __q01, char __q00)
+{
+  return __extension__ (__m256i)(__v32qi){
+    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
+    __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
+    __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
+  };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi64x (long long __A, long long __B, long long __C,
+		   long long __D)
+{
+  return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
+}
+
+/* Create a vector with all elements equal to A.  */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pd (double __A)
+{
+  return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A.  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_ps (float __A)
+{
+  return __extension__ (__m256){ __A, __A, __A, __A,
+				 __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A.  */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi32 (int __A)
+{
+  return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+					  __A, __A, __A, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi16 (short __A)
+{
+  return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
+			   __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi8 (char __A)
+{
+  return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+			  __A, __A, __A, __A, __A, __A, __A, __A,
+			  __A, __A, __A, __A, __A, __A, __A, __A,
+			  __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi64x (long long __A)
+{
+  return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
+}
+
+/* Create vectors of elements in the reversed order from the
+   _mm256_set_XXX functions.  */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_pd (double __A, double __B, double __C, double __D)
+{
+  return _mm256_set_pd (__D, __C, __B, __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_ps (float __A, float __B, float __C, float __D,
+		float __E, float __F, float __G, float __H)
+{
+  return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
+		   int __E, int __F, int __G, int __H)
+{
+  return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
+		   short __q11, short __q10, short __q09, short __q08,
+		   short __q07, short __q06, short __q05, short __q04,
+		   short __q03, short __q02, short __q01, short __q00)
+{
+  return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
+			   __q04, __q05, __q06, __q07,
+			   __q08, __q09, __q10, __q11,
+			   __q12, __q13, __q14, __q15);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi8  (char __q31, char __q30, char __q29, char __q28,
+		   char __q27, char __q26, char __q25, char __q24,
+		   char __q23, char __q22, char __q21, char __q20,
+		   char __q19, char __q18, char __q17, char __q16,
+		   char __q15, char __q14, char __q13, char __q12,
+		   char __q11, char __q10, char __q09, char __q08,
+		   char __q07, char __q06, char __q05, char __q04,
+		   char __q03, char __q02, char __q01, char __q00)
+{
+  return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
+			  __q04, __q05, __q06, __q07,
+			  __q08, __q09, __q10, __q11,
+			  __q12, __q13, __q14, __q15,
+			  __q16, __q17, __q18, __q19,
+			  __q20, __q21, __q22, __q23,
+			  __q24, __q25, __q26, __q27,
+			  __q28, __q29, __q30, __q31);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi64x (long long __A, long long __B, long long __C,
+		    long long __D)
+{
+  return _mm256_set_epi64x (__D, __C, __B, __A);
+}
+
+/* Casts between various SP, DP, INT vector types.  Note that these do no
+   conversion of values, they just change the type.  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ps (__m256d __A)
+{
+  return (__m256) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_si256 (__m256d __A)
+{
+  return (__m256i) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_pd (__m256 __A)
+{
+  return (__m256d) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_si256(__m256 __A)
+{
+  return (__m256i) __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ps (__m256i __A)
+{
+  return (__m256) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_pd (__m256i __A)
+{
+  return (__m256d) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd256_pd128 (__m256d __A)
+{
+  return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps256_ps128 (__m256 __A)
+{
+  return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_si128 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
+}
+
+/* When cast is done from a 128 to 256-bit type, the low 128 bits of
+   the 256-bit result contain source parameter value and the upper 128
+   bits of the result are undefined.  Those intrinsics shouldn't
+   generate any extra moves.  */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd128_pd256 (__m128d __A)
+{
+  return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps128_ps256 (__m128 __A)
+{
+  return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi128_si256 (__m128i __A)
+{
+  return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
+}
diff --git a/gcc/config/i386/avxmath.h b/gcc/config/i386/avxmath.h
new file mode 100644
index 000000000..997842b10
--- /dev/null
+++ b/gcc/config/i386/avxmath.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT FPMATH_SSE
+
+#undef TARGET_SUBTARGET_ISA_DEFAULT
+#define TARGET_SUBTARGET_ISA_DEFAULT					\
+  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2	\
+   | OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3			\
+   | OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2			\
+   | OPTION_MASK_ISA_AVX)
+
diff --git a/gcc/config/i386/bdver1.md b/gcc/config/i386/bdver1.md
new file mode 100644
index 000000000..3cde476b5
--- /dev/null
+++ b/gcc/config/i386/bdver1.md
@@ -0,0 +1,796 @@
+;; Copyright (C) 2010, Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; AMD bdver1 Scheduling
+;;
+;; The bdver1 contains four pipelined FP units, two integer units and
+;; two address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line.  So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available.  They can decode three DirectPath instructions or one
+;; VectorPath instruction per cycle.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+
+
+(define_attr "bdver1_decode" "direct,vector,double"
+  (const_string "direct"))
+
+(define_automaton "bdver1,bdver1_int,bdver1_load,bdver1_mult,bdver1_fp")
+
+(define_cpu_unit "bdver1-decode0" "bdver1")
+(define_cpu_unit "bdver1-decode1" "bdver1")
+(define_cpu_unit "bdver1-decode2" "bdver1")
+(define_cpu_unit "bdver1-decodev" "bdver1")
+
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow throughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too.  Vector decoded instructions then can't be issued when modeled
+;; as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "bdver1-decode2" "bdver1-decode0")
+(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
+
+(define_reservation "bdver1-vector" "nothing,bdver1-decodev")
+(define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
+(define_reservation "bdver1-direct" "nothing,
+				     (bdver1-decode0 | bdver1-decode1
+				     | bdver1-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
+				     | (nothing,(bdver1-decode0 + bdver1-decode1))
+				     | (nothing,(bdver1-decode1 + bdver1-decode2)))")
+
+
+(define_cpu_unit "bdver1-ieu0" "bdver1_int")
+(define_cpu_unit "bdver1-ieu1" "bdver1_int")
+(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
+
+(define_cpu_unit "bdver1-agu0" "bdver1_int")
+(define_cpu_unit "bdver1-agu1" "bdver1_int")
+(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
+
+(define_cpu_unit "bdver1-mult" "bdver1_mult")
+
+(define_cpu_unit "bdver1-load0" "bdver1_load")
+(define_cpu_unit "bdver1-load1" "bdver1_load")
+(define_reservation "bdver1-load" "bdver1-agu,
+				   (bdver1-load0 | bdver1-load1),nothing")
+;; 128bit SSE instructions issue two loads at once.
+(define_reservation "bdver1-load2" "bdver1-agu,
+				   (bdver1-load0 + bdver1-load1),nothing")
+
+(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
+;; 128bit SSE instructions issue two stores at once.
+(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for athlon and 11 for K8
+;; Compensate the difference for athlon because it results in significantly
+;; smaller automata.
+;; NOTE: the above information was just copied from athlon.md, and was not
+;; actually verified for bdver1.
+(define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
+(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
+
+;; Four FP units.
+(define_cpu_unit "bdver1-ffma0" "bdver1_fp")
+(define_cpu_unit "bdver1-ffma1" "bdver1_fp")
+(define_cpu_unit "bdver1-fmal0" "bdver1_fp")
+(define_cpu_unit "bdver1-fmal1" "bdver1_fp")
+
+(define_reservation "bdver1-ffma"     "(bdver1-ffma0 | bdver1-ffma1)")
+(define_reservation "bdver1-fcvt"     "bdver1-ffma0")
+(define_reservation "bdver1-fmma"     "bdver1-ffma0")
+(define_reservation "bdver1-fxbar"    "bdver1-ffma1")
+(define_reservation "bdver1-fmal"     "(bdver1-fmal0 | bdver1-fmal1)")
+(define_reservation "bdver1-fsto"     "bdver1-fmal1")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "bdver1-fvector"  "(bdver1-ffma0 + bdver1-ffma1
+					+ bdver1-fmal0 + bdver1-fmal1)")
+
+;; Jump instructions are executed in the branch unit completely transparent to us.
+(define_insn_reservation "bdver1_call" 0
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "call,callv"))
+			 "bdver1-double,bdver1-agu,bdver1-ieu")
+;; PUSH mem is double path.
+(define_insn_reservation "bdver1_push" 1
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "push"))
+			 "bdver1-direct,bdver1-agu,bdver1-store")
+;; POP r16/mem are double path.
+(define_insn_reservation "bdver1_pop" 1
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "pop"))
+			 "bdver1-direct,(bdver1-ieu+bdver1-load)")
+;; LEAVE no latency info so far, assume same with amdfam10.
+(define_insn_reservation "bdver1_leave" 3
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "leave"))
+			 "bdver1-vector,(bdver1-ieu+bdver1-load)")
+;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
+(define_insn_reservation "bdver1_lea" 1
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "lea"))
+			 "bdver1-direct,bdver1-agu,nothing")
+
+;; MUL executes in special multiplier unit attached to IEU1.
+(define_insn_reservation "bdver1_imul_DI" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver1-direct1,bdver1-ieu1,bdver1-mult,nothing,bdver1-ieu1")
+(define_insn_reservation "bdver1_imul" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "none,unknown")))
+			 "bdver1-direct1,bdver1-ieu1,bdver1-mult,bdver1-ieu1")
+(define_insn_reservation "bdver1_imul_mem_DI" 10
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "memory" "load,both"))))
+			 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,nothing,bdver1-ieu")
+(define_insn_reservation "bdver1_imul_mem" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load,both")))
+			 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,bdver1-ieu")
+
+;; IDIV cannot execute in parallel with other instructions.  Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
+;; of the other code.
+(define_insn_reservation "bdver1_idiv" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none,unknown")))
+			 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
+
+(define_insn_reservation "bdver1_idiv_mem" 10
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "load,both")))
+			 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
+
+;; The parallelism of string instructions is not documented.  Model it same way
+;; as IDIV to create smaller automata.  This probably does not matter much.
+;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
+(define_insn_reservation "bdver1_str" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both,store")))
+			 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
+
+;; Integer instructions.
+(define_insn_reservation "bdver1_idirect" 1
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver1-direct,bdver1-ieu")
+(define_insn_reservation "bdver1_ivector" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "bdver1-vector,bdver1-ieu,bdver1-ieu")
+(define_insn_reservation "bdver1_idirect_loadmov" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-load")
+(define_insn_reservation "bdver1_idirect_load" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-load,bdver1-ieu")
+(define_insn_reservation "bdver1_ivector_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
+(define_insn_reservation "bdver1_idirect_movstore" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "bdver1-direct,bdver1-agu,bdver1-store")
+(define_insn_reservation "bdver1_idirect_both" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "bdver1-direct,bdver1-load,
+			  bdver1-ieu,bdver1-store,
+			  bdver1-store")
+(define_insn_reservation "bdver1_ivector_both" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "bdver1-vector,bdver1-load,
+			  bdver1-ieu,
+			  bdver1-ieu,
+			  bdver1-store")
+(define_insn_reservation "bdver1_idirect_store" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "bdver1-direct,(bdver1-ieu+bdver1-agu),
+			  bdver1-store")
+(define_insn_reservation "bdver1_ivector_store" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
+			  bdver1-store")
+
+;; BDVER1 floating point units.
+(define_insn_reservation "bdver1_fldxf" 13
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "load")
+					(eq_attr "mode" "XF"))))
+			 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
+(define_insn_reservation "bdver1_fld" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fstxf" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fmov")
+				   (and (eq_attr "memory" "store,both")
+					(eq_attr "mode" "XF"))))
+			 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
+(define_insn_reservation "bdver1_fst" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fmov")
+				   (eq_attr "memory" "store,both")))
+			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
+(define_insn_reservation "bdver1_fist" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
+(define_insn_reservation "bdver1_fmov_bdver1" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fmov"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fadd_load" 10
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fop")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fadd" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fop"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fmul_load" 10
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load")))
+			 "bdver1-double,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fmul" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fmul"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fsgn" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fsgn"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fdiv_load" 46
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fdiv")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fdiv" 42
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fdiv"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fpspc_load" 103
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_fpspc" 100
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fpspc")
+				   (eq_attr "memory" "load")))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_fcmov_load" 17
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fcmov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_fcmov" 15
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fcmov"))
+			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
+(define_insn_reservation "bdver1_fcomi_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fcmp")
+				   (and (eq_attr "bdver1_decode" "double")
+					(eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
+(define_insn_reservation "bdver1_fcomi" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "bdver1_decode" "double")
+				   (eq_attr "type" "fcmp")))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
+(define_insn_reservation "bdver1_fcom_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "fcmp")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_fcom" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fcmp"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_fxch" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "fxch"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+
+;; SSE loads.
+(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+					(and (eq_attr "movu" "1")
+					     (and (eq_attr "mode" "V4SF,V2DF")
+						  (eq_attr "memory" "load"))))))
+			 "bdver1-direct,bdver1-fpload")
+(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "movu" "1")
+				        (and (eq_attr "mode" "V8SF,V4DF")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-double,bdver1-fpload")
+(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "movu" "1")
+				        (and (eq_attr "mode" "V4SF,V2DF")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssevector_avx128_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+				        (and (eq_attr "mode" "V4SF,V2DF,TI")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssevector_avx256_load" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssevector_sse128_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload")
+(define_insn_reservation "bdver1_ssescalar_movq_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "DI")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "prefix" "vex")
+				        (and (eq_attr "mode" "SF")
+				             (eq_attr "memory" "load")))))
+			 "bdver1-direct,bdver1-fpload")
+(define_insn_reservation "bdver1_ssescalar_sse128_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DF")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload, bdver1-ffma")
+(define_insn_reservation "bdver1_mmxsse_load" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload, bdver1-fmal")
+
+;; SSE stores.
+(define_insn_reservation "bdver1_sse_store_avx256" 5
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+					(eq_attr "memory" "store,both"))))
+			 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
+(define_insn_reservation "bdver1_sse_store" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
+(define_insn_reservation "bdver1_mmxsse_store_short" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
+
+;; Register moves.
+(define_insn_reservation "bdver1_ssevector_avx256" 3
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
+(define_insn_reservation "bdver1_movss_movsd" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "SF,DF")
+                                        (eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_mmxssemov" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
+;; SSE logs.
+(define_insn_reservation "bdver1_sselog_load_256" 7
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (and (eq_attr "mode" "V8SF")
+				   (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_sselog_256" 3
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sselog,sselog1")
+                                   (eq_attr "mode" "V8SF")))
+			 "bdver1-double,bdver1-fpsched,bdver1-fmal")
+(define_insn_reservation "bdver1_sselog_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
+(define_insn_reservation "bdver1_sselog" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "sselog,sselog1"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
+
+;; PCMP actually executes in FMAL.
+(define_insn_reservation "bdver1_ssecmp_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_ssecmp" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "ssecmp"))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_ssecomi_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
+(define_insn_reservation "bdver1_ssecomi" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (eq_attr "type" "ssecomi"))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately.
+
+;; 256 bit conversion.
+(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+					(ior (ior (match_operand:V4DF 0 "register_operand")
+					          (ior (match_operand:V8SF 0 "register_operand")
+						       (match_operand:V8SI 0 "register_operand")))
+					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
+						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
+						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
+			 "bdver1-vector,bdver1-fpload,bdver1-fvector")
+(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+					(ior (ior (match_operand:V4DF 0 "register_operand")
+					          (ior (match_operand:V8SF 0 "register_operand")
+						       (match_operand:V8SI 0 "register_operand")))
+					     (ior (match_operand:V4DF 1 "nonimmediate_operand")
+						  (ior (match_operand:V8SF 1 "nonimmediate_operand")
+						       (match_operand:V8SI 1 "nonimmediate_operand")))))))
+			 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
+;; CVTSS2SD, CVTSD2SS.
+(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
+;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
+(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SF,DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
+;; CVTPD2PS.
+(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (match_operand:V2DF 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
+(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (match_operand:V2DF 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
+;; CVTPI2PS, CVTDQ2PS.
+(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V4SF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SI 1 "nonimmediate_operand"))))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
+;; CVTDQ2PD.
+(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (match_operand:V4SI 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
+(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (match_operand:V4SI 1 "nonimmediate_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
+;; CVTPS2PD, CVTPI2PD.
+(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
+(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+                                        (and (match_operand:V2DF 0 "register_operand")
+					     (ior (match_operand:V2SI 1 "nonimmediate_operand")
+					          (match_operand:V4SF 1 "nonimmediate_operand"))))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
+;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
+(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SI,DI")
+					(eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
+(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "mode" "SI,DI")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
+;; CVTPD2PI, CVTTPD2PI.
+(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V2SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
+(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V2SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
+;; CVTPD2DQ, CVTTPD2DQ.
+(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "load")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V4SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
+(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V2DF 1 "nonimmediate_operand")
+					     (match_operand:V4SI 0 "register_operand")))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
+;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
+(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+                                   (and (eq_attr "memory" "load")
+				        (and (match_operand:V4SF 1 "nonimmediate_operand")
+				             (ior (match_operand: V2SI 0 "register_operand")
+						  (match_operand: V4SI 0 "register_operand"))))))
+			 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
+(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "memory" "none")
+				        (and (match_operand:V4SF 1 "nonimmediate_operand")
+				             (ior (match_operand: V2SI 0 "register_operand")
+						  (match_operand: V4SI 0 "register_operand"))))))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
+
+;; SSE MUL, ADD, and MULADD.
+(define_insn_reservation "bdver1_ssemuladd_load_256" 11
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+				   (and (eq_attr "mode" "V8SF,V4DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_ssemuladd_256" 7
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+				   (and (eq_attr "mode" "V8SF,V4DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_ssemuladd_load" 10
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-ffma")
+(define_insn_reservation "bdver1_ssemuladd" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssemul,sseadd,ssemuladd")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
+(define_insn_reservation "bdver1_sseimul_load" 8
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseimul")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmma")
+(define_insn_reservation "bdver1_sseimul" 4
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseimul")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
+(define_insn_reservation "bdver1_sseiadd_load" 6
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseiadd")
+				   (eq_attr "memory" "load")))
+			 "bdver1-direct,bdver1-fpload,bdver1-fmal")
+(define_insn_reservation "bdver1_sseiadd" 2
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "sseiadd")
+				   (eq_attr "memory" "none")))
+			 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
+
+;; SSE DIV: no throughput information (assume same as amdfam10).
+(define_insn_reservation "bdver1_ssediv_double_load_256" 31
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V4DF")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_double_256" 27
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V4DF")
+				        (eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single_load_256" 28
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V8SF")
+				        (eq_attr "memory" "load"))))
+			 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single_256" 24
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "V8SF")
+				        (eq_attr "memory" "none"))))
+			 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_double_load" 31
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_double" 27
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single_load" 28
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "memory" "load"))))
+			 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+(define_insn_reservation "bdver1_ssediv_single" 24
+			 (and (eq_attr "cpu" "bdver1")
+			      (and (eq_attr "type" "ssediv")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "memory" "none"))))
+			 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
+
+(define_insn_reservation "bdver1_sseins" 3
+                         (and (eq_attr "cpu" "bdver1")
+                              (and (eq_attr "type" "sseins")
+                                   (eq_attr "mode" "TI")))
+                         "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
+
diff --git a/gcc/config/i386/biarch64.h b/gcc/config/i386/biarch64.h
new file mode 100644
index 000000000..629ec980d
--- /dev/null
+++ b/gcc/config/i386/biarch64.h
@@ -0,0 +1,29 @@
+/* Make configure files to produce biarch compiler defaulting to 64bit mode.
+   This file must be included very first, while the OS specific file later
+   to overwrite otherwise wrong defaults. 
+   Copyright (C) 2001, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_64BIT_DEFAULT OPTION_MASK_ISA_64BIT
+#define TARGET_BI_ARCH 1
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
new file mode 100644
index 000000000..225f2ecbe
--- /dev/null
+++ b/gcc/config/i386/bmiintrin.h
@@ -0,0 +1,145 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __BMI__
+# error "BMI instruction set not enabled"
+#endif /* __BMI__ */
+
+#ifndef _BMIINTRIN_H_INCLUDED
+#define _BMIINTRIN_H_INCLUDED
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt_u16 (unsigned short __X)
+{
+  return __builtin_clzs (__X);
+}
+
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ctzs (__X);
+}
+
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u32 (unsigned int __X, unsigned int __Y)
+{
+  unsigned int tmp = ~(__X) & (__Y);
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bextr_u32 (__X, __Y);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u32 (unsigned int __X)
+{
+  unsigned int tmp = (__X) & (-(__X));
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u32 (unsigned int __X)
+{
+  unsigned int tmp = (__X) ^ (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u32 (unsigned int __X)
+{
+  unsigned int tmp = (__X) & (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt_u32 (unsigned int __X)
+{
+  return __builtin_clz (__X);
+}
+
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u32 (unsigned int __X)
+{
+  return __builtin_ctz (__X);
+}
+
+
+#ifdef  __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  unsigned long long tmp = ~(__X) & (__Y);
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextr_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bextr_u64 (__X, __Y);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsi_u64 (unsigned long long __X)
+{
+  unsigned long long tmp = (__X) & (-(__X));
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsmsk_u64 (unsigned long long __X)
+{
+  unsigned long long tmp = (__X) ^ (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsr_u64 (unsigned long long __X)
+{
+  unsigned long long tmp = (__X) & (__X - 1);
+  return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_clzll (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_ctzll (__X);
+}
+
+#endif /* __x86_64__  */
+
+#endif /* _BMIINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/bmmintrin.h b/gcc/config/i386/bmmintrin.h
new file mode 100644
index 000000000..91d4e7742
--- /dev/null
+++ b/gcc/config/i386/bmmintrin.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _BMMINTRIN_H_INCLUDED
+#define _BMMINTRIN_H_INCLUDED
+
+# error "SSE5 instruction set removed from compiler"
+
+#endif /* _BMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/bsd.h b/gcc/config/i386/bsd.h
new file mode 100644
index 000000000..e408ccdb0
--- /dev/null
+++ b/gcc/config/i386/bsd.h
@@ -0,0 +1,100 @@
+/* Definitions for BSD assembler syntax for Intel 386
+   (actually AT&T syntax for insns and operands,
+   adapted to BSD conventions for symbol names and debugging.)
+   Copyright (C) 1988, 1996, 2000, 2002, 2007, 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use the Sequent Symmetry assembler syntax.  */
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+/* Prefix for internally generated assembler labels.  If we aren't using
+   underscores, we are using prefix `.'s to identify labels that should
+   be ignored, as in `i386/gas.h' --karl@cs.umb.edu  */
+
+#define LPREFIX "L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"  /* Should not be used for 32bit compilation.  */
+
+/* This was suggested, but it shouldn't be right for DBX output. -- RMS
+   #define ASM_OUTPUT_SOURCE_FILENAME(FILE, NAME) */
+
+
+/* Define the syntax of labels and symbol definitions/declarations.  */
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* Define the syntax of labels and symbol definitions/declarations.  */
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#ifdef HAVE_GAS_LCOMM_WITH_ALIGNMENT
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGNMENT)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (int)(ALIGNMENT) / BITS_PER_UNIT))
+#endif
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* This is how to store into the string BUF
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER)	\
+    sprintf ((BUF), "*%s%ld", (PREFIX), (long)(NUMBER))
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* Sequent has some changes in the format of DBX symbols.  */
+#define DBX_NO_XREFS 1
+
+/* Don't split DBX symbols into continuations.  */
+#define DBX_CONTIN_LENGTH 0
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
new file mode 100644
index 000000000..6233b79ec
--- /dev/null
+++ b/gcc/config/i386/constraints.md
@@ -0,0 +1,175 @@
+;; Constraint definitions for IA-32 and x86-64.
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;     B     H           T  W
+;;;           h jk          vw
+
+;; Integer register constraints.
+;; It is not necessary to define 'r' here.
+(define_register_constraint "R" "LEGACY_REGS"
+ "Legacy register---the eight integer registers available on all
+  i386 processors (@code{a}, @code{b}, @code{c}, @code{d},
+  @code{si}, @code{di}, @code{bp}, @code{sp}).")
+
+(define_register_constraint "q" "TARGET_64BIT ? GENERAL_REGS : Q_REGS"
+ "Any register accessible as @code{@var{r}l}.  In 32-bit mode, @code{a},
+  @code{b}, @code{c}, and @code{d}; in 64-bit mode, any integer register.")
+
+(define_register_constraint "Q" "Q_REGS"
+ "Any register accessible as @code{@var{r}h}: @code{a}, @code{b},
+  @code{c}, and @code{d}.")
+
+(define_register_constraint "l" "INDEX_REGS"
+ "@internal Any register that can be used as the index in a base+index
+  memory access: that is, any general register except the stack pointer.")
+
+(define_register_constraint "a" "AREG"
+ "The @code{a} register.")
+
+(define_register_constraint "b" "BREG"
+ "The @code{b} register.")
+
+(define_register_constraint "c" "CREG"
+ "The @code{c} register.")
+
+(define_register_constraint "d" "DREG"
+ "The @code{d} register.")
+
+(define_register_constraint "S" "SIREG"
+ "The @code{si} register.")
+
+(define_register_constraint "D" "DIREG"
+ "The @code{di} register.")
+
+(define_register_constraint "A" "AD_REGS"
+ "The @code{a} and @code{d} registers, as a pair (for instructions
+  that return half the result in one and half in the other).")
+
+(define_register_constraint "U" "CLOBBERED_REGS"
+ "The call-clobbered integer registers.")
+
+;; Floating-point register constraints.
+(define_register_constraint "f"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FLOAT_REGS : NO_REGS"
+ "Any 80387 floating-point (stack) register.")
+
+(define_register_constraint "t"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_TOP_REG : NO_REGS"
+ "Top of 80387 floating-point stack (@code{%st(0)}).")
+
+(define_register_constraint "u"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
+ "Second from top of 80387 floating-point stack (@code{%st(1)}).")
+
+;; Vector registers (also used for plain floating point nowadays).
+(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
+ "Any MMX register.")
+
+(define_register_constraint "x" "TARGET_SSE ? SSE_REGS : NO_REGS"
+ "Any SSE register.")
+
+;; We use the Y prefix to denote any number of conditional register sets:
+;;  z	First SSE register.
+;;  2	SSE2 enabled
+;;  i	SSE2 inter-unit moves enabled
+;;  m	MMX inter-unit moves enabled
+
+(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
+ "First SSE register (@code{%xmm0}).")
+
+(define_register_constraint "Y2" "TARGET_SSE2 ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 is enabled.")
+
+(define_register_constraint "Yi"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.")
+
+(define_register_constraint "Ym"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves are enabled.")
+
+(define_constraint "z"
+  "@internal Constant call address operand."
+  (match_operand 0 "constant_call_address_operand"))
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 31)")))
+
+(define_constraint "J"
+  "Integer constant in the range 0 @dots{} 63, for 64-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "K"
+  "Signed 8-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -128, 127)")))
+
+(define_constraint "L"
+  "@code{0xFF} or @code{0xFFFF}, for andsi as a zero-extending move."
+  (and (match_code "const_int")
+       (match_test "ival == 0xFF || ival == 0xFFFF")))
+
+(define_constraint "M"
+  "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "N"
+  "Unsigned 8-bit integer constant (for @code{in} and @code{out}
+   instructions)."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "O"
+  "@internal Integer constant in the range 0 @dots{} 127, for 128-bit shifts."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 127)")))
+
+;; Floating-point constant constraints.
+;; We allow constants even if TARGET_80387 isn't set, because the
+;; stack register converter may need to load 0.0 into the function
+;; value register (top of stack).
+(define_constraint "G"
+  "Standard 80387 floating point constant."
+  (and (match_code "const_double")
+       (match_test "standard_80387_constant_p (op) > 0")))
+
+;; This can theoretically be any mode's CONST0_RTX.
+(define_constraint "C"
+  "Standard SSE floating point constant."
+  (match_test "standard_sse_constant_p (op)"))
+
+;; Constant-or-symbol-reference constraints.
+
+(define_constraint "e"
+  "32-bit signed integer constant, or a symbolic reference known
+   to fit that range (for immediate operands in sign-extending x86-64
+   instructions)."
+  (match_operand 0 "x86_64_immediate_operand"))
+
+(define_constraint "Z"
+  "32-bit unsigned integer constant, or a symbolic reference known
+   to fit that range (for immediate operands in zero-extending x86-64
+   instructions)."
+  (match_operand 0 "x86_64_zext_immediate_operand"))
diff --git a/gcc/config/i386/core2.md b/gcc/config/i386/core2.md
new file mode 100644
index 000000000..d154cdc07
--- /dev/null
+++ b/gcc/config/i386/core2.md
@@ -0,0 +1,691 @@
+;; Scheduling for Core 2 and derived processors.
+;; Copyright (C) 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; The scheduling description in this file is based on the one in ppro.md,
+;; with additional information obtained from
+;;
+;;    "How to optimize for the Pentium family of microprocessors",
+;;    by Agner Fog, PhD.
+;;
+;; The major difference from the P6 pipeline is one extra decoder, and
+;; one extra execute unit.  Due to micro-op fusion, many insns no longer
+;; need to be decoded in decoder 0, but can be handled by all of them.
+
+;; The core2_idiv, core2_fdiv and core2_ssediv automata are used to
+;; model issue latencies of idiv, fdiv and ssediv type insns.
+(define_automaton "core2_decoder,core2_core,core2_idiv,core2_fdiv,core2_ssediv,core2_load,core2_store")
+
+;; The CPU domain, used for Core i7 bypass latencies
+(define_attr "i7_domain" "int,float,simd"
+  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
+	   (const_string "float")
+	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
+			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
+	   (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
+		    (const_string "float")
+		  (eq_attr "mode" "SI")
+		    (const_string "int")]
+		  (const_string "simd"))
+	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+	   (const_string "simd")]
+	(const_string "int")))
+
+;; As for the Pentium Pro,
+;;  - an instruction with 1 uop can be decoded by any of the three
+;;    decoders in one cycle.
+;;  - an instruction with 1 to 4 uops can be decoded only by decoder 0
+;;    but still in only one cycle.
+;;  - a complex (microcode) instruction can also only be decoded by
+;;    decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-one uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "c2_decoder0" "core2_decoder")
+(define_cpu_unit "c2_decoder1" "core2_decoder")
+(define_cpu_unit "c2_decoder2" "core2_decoder")
+(define_cpu_unit "c2_decoder3" "core2_decoder")
+
+;; We first wish to find an instruction for c2_decoder0, so exclude
+;; c2_decoder1 and c2_decoder2 from being reserved until c2_decoder 0 is
+;; reserved.
+(presence_set "c2_decoder1" "c2_decoder0")
+(presence_set "c2_decoder2" "c2_decoder0")
+(presence_set "c2_decoder3" "c2_decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "c2_decodern" "(c2_decoder0|c2_decoder1|c2_decoder2|c2_decoder3)")
+
+;; The out-of-order core has six pipelines.  These are similar to the
+;; Pentium Pro's five pipelines.  Port 2 is responsible for memory loads,
+;; port 3 for store address calculations, port 4 for memory stores, and
+;; ports 0, 1 and 5 for everything else.
+
+(define_cpu_unit "c2_p0,c2_p1,c2_p5" "core2_core")
+(define_cpu_unit "c2_p2" "core2_load")
+(define_cpu_unit "c2_p3,c2_p4" "core2_store")
+(define_cpu_unit "c2_idiv" "core2_idiv")
+(define_cpu_unit "c2_fdiv" "core2_fdiv")
+(define_cpu_unit "c2_ssediv" "core2_ssediv")
+
+;; Only the irregular instructions have to be modeled here.  A load
+;; increases the latency by 2 or 3, or by nothing if the manual gives
+;; a latency already.  Store latencies are not accounted for.
+;;
+;; The simple instructions follow a very regular pattern of 1 uop per
+;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
+;; on port 4 and port 3.  These instructions are modelled at the bottom
+;; of this file.
+;;
+;; For microcoded instructions we don't know how many uops are produced.
+;; These instructions are the "complex" ones in the Intel manuals.  All
+;; we _do_ know is that they typically produce four or more uops, so
+;; they can only be decoded on c2_decoder0.  Modelling their latencies
+;; doesn't make sense because we don't know how these instructions are
+;; executed in the core.  So we just model that they can only be decoded
+;; on decoder 0, and say that it takes a little while before the result
+;; is available.
+(define_insn_reservation "c2_complex_insn" 6
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "other,multi,str"))
+			 "c2_decoder0")
+
+(define_insn_reservation "c2_call" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "call,callv"))
+			 "c2_decoder0")
+
+;; imov with memory operands does not use the integer units.
+;; imovx always decodes to one uop, and also doesn't use the integer
+;; units if it has memory operands.
+(define_insn_reservation "c2_imov" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imov,imovx")))
+			 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
+
+(define_insn_reservation "c2_imov_load" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "imov,imovx")))
+			 "c2_decodern,c2_p2")
+
+(define_insn_reservation "c2_imov_store" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "imov")))
+			 "c2_decodern,c2_p4+c2_p3")
+
+(define_insn_reservation "c2_icmov" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "icmov")))
+			 "c2_decoder0,(c2_p0|c2_p1|c2_p5)*2")
+
+(define_insn_reservation "c2_icmov_load" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "icmov")))
+			 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5)*2")
+
+(define_insn_reservation "c2_push_reg" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "push")))
+			 "c2_decodern,c2_p4+c2_p3")
+
+(define_insn_reservation "c2_push_mem" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "push")))
+			 "c2_decoder0,c2_p2,c2_p4+c2_p3")
+
+;; lea executes on port 0 with latency one and throughput 1.
+(define_insn_reservation "c2_lea" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "lea")))
+			 "c2_decodern,c2_p0")
+
+;; Shift and rotate decode as two uops which can go to port 0 or 5.
+;; The load and store units need to be reserved when memory operands
+;; are involved.
+(define_insn_reservation "c2_shift_rotate" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "c2_decodern,(c2_p0|c2_p5)")
+
+(define_insn_reservation "c2_shift_rotate_mem" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "c2_decoder0,c2_p2,(c2_p0|c2_p5),c2_p4+c2_p3")
+
+;; See comments in ppro.md for the corresponding reservation.
+(define_insn_reservation "c2_branch" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ibr")))
+			 "c2_decodern,c2_p5")
+
+;; ??? Indirect branches probably have worse latency than this.
+(define_insn_reservation "c2_indirect_branch" 6
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ibr")))
+			 "c2_decoder0,c2_p2+c2_p5")
+
+(define_insn_reservation "c2_leave" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "leave"))
+			 "c2_decoder0,c2_p2+(c2_p0|c2_p1),(c2_p0|c2_p1)")
+
+;; mul and imul with two/three operands only execute on port 1 for HImode
+;; and SImode, port 0 for DImode.
+(define_insn_reservation "c2_imul_hisi" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "HI,SI")
+					(eq_attr "type" "imul"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_imul_hisi_mem" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "HI,SI")
+					(eq_attr "type" "imul"))))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_imul_di" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "type" "imul"))))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_imul_di_mem" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "DI")
+					(eq_attr "type" "imul"))))
+			 "c2_decoder0,c2_p2+c2_p0")
+
+;; div and idiv are very similar, so we model them the same.
+;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
+;; These issue latencies are modelled via the c2_div automaton.
+(define_insn_reservation "c2_idiv_QI" 19
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,(c2_p0+c2_idiv)*2,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9")
+
+(define_insn_reservation "c2_idiv_QI_load" 19
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9")
+
+(define_insn_reservation "c2_idiv_HI" 23
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*17")
+
+(define_insn_reservation "c2_idiv_HI_load" 23
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*18")
+
+(define_insn_reservation "c2_idiv_SI" 39
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*33")
+
+(define_insn_reservation "c2_idiv_SI_load" 39
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*34")
+
+;; x87 floating point operations.
+
+(define_insn_reservation "c2_fxch" 0
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "fxch"))
+			 "c2_decodern")
+
+(define_insn_reservation "c2_fop" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "fop")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_fop_load" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fop")))
+			 "c2_decoder0,c2_p2+c2_p1,c2_p1")
+
+(define_insn_reservation "c2_fop_store" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "fop")))
+			 "c2_decoder0,c2_p0,c2_p0,c2_p0+c2_p4+c2_p3")
+
+(define_insn_reservation "c2_fop_both" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "fop")))
+			 "c2_decoder0,c2_p2+c2_p0,c2_p0+c2_p4+c2_p3")
+
+(define_insn_reservation "c2_fsgn" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "fsgn"))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_fistp" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "fistp"))
+			 "c2_decoder0,c2_p0*2,c2_p4+c2_p3")
+
+(define_insn_reservation "c2_fcmov" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (eq_attr "type" "fcmov"))
+			 "c2_decoder0,c2_p0*2")
+
+(define_insn_reservation "c2_fcmp" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fcmp")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_fcmp_load" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fcmp")))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_fmov" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmov")))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_fmov_load" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decodern,c2_p2")
+
+(define_insn_reservation "c2_fmov_XF_load" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decoder0,(c2_p2+c2_p0)*2")
+
+(define_insn_reservation "c2_fmov_store" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decodern,c2_p3+c2_p4")
+
+(define_insn_reservation "c2_fmov_XF_store" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "c2_decoder0,(c2_p3+c2_p4),(c2_p3+c2_p4)")
+
+;; fmul executes on port 0 with latency 5.  It has issue latency 2,
+;; but we don't model this.
+(define_insn_reservation "c2_fmul" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmul")))
+			 "c2_decoder0,c2_p0*2")
+
+(define_insn_reservation "c2_fmul_load" 6
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fmul")))
+			 "c2_decoder0,c2_p2+c2_p0,c2_p0")
+
+;; fdiv latencies depend on the mode of the operands.  XFmode gives
+;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
+;; Division by a power of 2 takes only 9 cycles, but we cannot model
+;; that.  Throughput is equal to latency - 1, which we model using the
+;; c2_div automaton.
+(define_insn_reservation "c2_fdiv_SF" 18
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*16")
+
+(define_insn_reservation "c2_fdiv_SF_load" 19
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*16")
+
+(define_insn_reservation "c2_fdiv_DF" 32
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*30")
+
+(define_insn_reservation "c2_fdiv_DF_load" 33
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*30")
+
+(define_insn_reservation "c2_fdiv_XF" 38
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*36")
+
+(define_insn_reservation "c2_fdiv_XF_load" 39
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*36")
+
+;; MMX instructions.
+
+(define_insn_reservation "c2_mmx_add" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxadd,sseiadd")))
+			 "c2_decodern,c2_p0|c2_p5")
+
+(define_insn_reservation "c2_mmx_add_load" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "mmxadd,sseiadd")))
+			 "c2_decodern,c2_p2+c2_p0|c2_p5")
+
+(define_insn_reservation "c2_mmx_shft" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxshft")))
+			 "c2_decodern,c2_p0|c2_p5")
+
+(define_insn_reservation "c2_mmx_shft_load" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "mmxshft")))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "!0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft_load" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "!0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft1" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_sse_shft1_load" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "type" "sseishft")
+					(eq_attr "length_immediate" "0"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_mul" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul,sseimul")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_mmx_mul_load" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul,sseimul")))
+			 "c2_decoder0,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_sse_mmxcvt" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "mode" "DI")
+				   (eq_attr "type" "mmxcvt")))
+			 "c2_decodern,c2_p1")
+
+;; FIXME: These are Pentium III only, but we cannot tell here if
+;; we're generating code for PentiumPro/Pentium II or Pentium III
+;; (define_insn_reservation "c2_sse_mmxshft" 2
+;;			 (and (eq_attr "cpu" "core2,corei7")
+;;			      (and (eq_attr "mode" "TI")
+;;				   (eq_attr "type" "mmxshft")))
+;;			 "c2_decodern,c2_p0")
+
+;; The sfence instruction.
+(define_insn_reservation "c2_sse_sfence" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "unknown")
+				   (eq_attr "type" "sse")))
+			 "c2_decoder0,c2_p4+c2_p3")
+
+;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
+(define_insn_reservation "c2_sse_SFDF" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "mode" "SF,DF")
+				   (eq_attr "type" "sse")))
+			 "c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_sse_V4SF" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "mode" "V4SF")
+				   (eq_attr "type" "sse")))
+			 "c2_decoder0,c2_p1*2")
+
+(define_insn_reservation "c2_sse_addcmp" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "sseadd,ssecmp,ssecomi")))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_sse_addcmp_load" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "sseadd,ssecmp,ssecomi")))
+			 "c2_decodern,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_sse_mul_SF" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_sse_mul_SF_load" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p2+c2_p0")
+
+(define_insn_reservation "c2_sse_mul_DF" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p0")
+
+(define_insn_reservation "c2_sse_mul_DF_load" 5
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssemul"))))
+			"c2_decodern,c2_p2+c2_p0")
+
+(define_insn_reservation "c2_sse_div_SF" 18
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,c2_p0,c2_ssediv*17")
+
+(define_insn_reservation "c2_sse_div_SF_load" 18
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF,V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*17")
+
+(define_insn_reservation "c2_sse_div_DF" 32
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,c2_p0,c2_ssediv*31")
+
+(define_insn_reservation "c2_sse_div_DF_load" 32
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF,V2DF")
+					(eq_attr "type" "ssediv"))))
+			 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*31")
+
+;; FIXME: these have limited throughput
+(define_insn_reservation "c2_sse_icvt_SF" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_SF_load" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,c2_p2+c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_DF" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decoder0,c2_p0+c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_DF_load" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decoder0,(c2_p2+c2_p1)")
+
+(define_insn_reservation "c2_sse_icvt_SI" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,c2_p1")
+
+(define_insn_reservation "c2_sse_icvt_SI_load" 3
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "!none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "sseicvt"))))
+			 "c2_decodern,(c2_p2+c2_p1)")
+
+(define_insn_reservation "c2_sse_mov" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ssemov")))
+			 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
+
+(define_insn_reservation "c2_sse_mov_load" 2
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "ssemov")))
+			 "c2_decodern,c2_p2")
+
+(define_insn_reservation "c2_sse_mov_store" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "ssemov")))
+			 "c2_decodern,c2_p4+c2_p3")
+
+;; All other instructions are modelled as simple instructions.
+;; We have already modelled all i387 floating point instructions, so all
+;; other instructions execute on either port 0, 1 or 5.  This includes
+;; the ALU units, and the MMX units.
+;;
+;; reg-reg instructions produce 1 uop so they can be decoded on any of
+;; the three decoders.  Loads benefit from micro-op fusion and can be
+;; treated in the same way.
+(define_insn_reservation "c2_insn" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
+			 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
+
+(define_insn_reservation "c2_insn_load" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
+			 "c2_decodern,c2_p2,(c2_p0|c2_p1|c2_p5)")
+
+;; register-memory instructions have three uops,  so they have to be
+;; decoded on c2_decoder0.
+(define_insn_reservation "c2_insn_store" 1
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
+			 "c2_decoder0,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3")
+
+;; read-modify-store instructions produce 4 uops so they have to be
+;; decoded on c2_decoder0 as well.
+(define_insn_reservation "c2_insn_both" 4
+			 (and (eq_attr "cpu" "core2,corei7")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
+			 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3")
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
new file mode 100644
index 000000000..3c3f47b00
--- /dev/null
+++ b/gcc/config/i386/cpuid.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* %ecx */
+#define bit_SSE3	(1 << 0)
+#define bit_PCLMUL	(1 << 1)
+#define bit_SSSE3	(1 << 9)
+#define bit_FMA		(1 << 12)
+#define bit_CMPXCHG16B	(1 << 13)
+#define bit_SSE4_1	(1 << 19)
+#define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
+#define bit_POPCNT	(1 << 23)
+#define bit_AES		(1 << 25)
+#define bit_XSAVE	(1 << 26)
+#define bit_OSXSAVE	(1 << 27)
+#define bit_AVX		(1 << 28)
+#define bit_F16C	(1 << 29)
+#define bit_RDRND	(1 << 30)
+
+/* %edx */
+#define bit_CMPXCHG8B	(1 << 8)
+#define bit_CMOV	(1 << 15)
+#define bit_MMX		(1 << 23)
+#define bit_FXSAVE	(1 << 24)
+#define bit_SSE		(1 << 25)
+#define bit_SSE2	(1 << 26)
+
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM	(1 << 0)
+#define bit_ABM		(1 << 5)
+#define bit_SSE4a	(1 << 6)
+#define bit_XOP         (1 << 11)
+#define bit_LWP 	(1 << 15)
+#define bit_FMA4        (1 << 16)
+#define bit_TBM         (1 << 21)
+
+/* %edx */
+#define bit_MMXEXT	(1 << 22)
+#define bit_LM		(1 << 29)
+#define bit_3DNOWP	(1 << 30)
+#define bit_3DNOW	(1 << 31)
+
+/* Extended Features (%eax == 7) */
+#define bit_FSGSBASE	(1 << 0)
+#define bit_BMI		(1 << 3)
+
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register.  */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchg{l}\t{%%}ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("xchgl\t%%ebx, %1\n\t"			\
+	   "cpuid\n\t"					\
+	   "xchgl\t%%ebx, %1\n\t"			\
+	   : "=a" (a), "=r" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+#else
+#define __cpuid(level, a, b, c, d)			\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d)		\
+  __asm__ ("cpuid\n\t"					\
+	   : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+	   : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction.  ext can
+   be either 0x0 or 0x8000000 to return highest supported value for
+   basic or extended cpuid information.  Function returns 0 if cpuid
+   is not supported or whatever cpuid returns in eax register.  If sig
+   pointer is non-null, then first four bytes of the signature
+   (as found in ebx register) are returned in location pointed by sig.  */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+  unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+  /* See if we can use cpuid.  On AMD64 we always can.  */
+#if __GNUC__ >= 3
+  __asm__ ("pushf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "mov{l}\t{%0, %1|%1, %0}\n\t"
+	   "xor{l}\t{%2, %0|%0, %2}\n\t"
+	   "push{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   "pushf{l|d}\n\t"
+	   "pop{l}\t%0\n\t"
+	   "popf{l|d}\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+   nor alternatives in i386 code.  */
+  __asm__ ("pushfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "movl\t%0, %1\n\t"
+	   "xorl\t%2, %0\n\t"
+	   "pushl\t%0\n\t"
+	   "popfl\n\t"
+	   "pushfl\n\t"
+	   "popl\t%0\n\t"
+	   "popfl\n\t"
+	   : "=&r" (__eax), "=&r" (__ebx)
+	   : "i" (0x00200000));
+#endif
+
+  if (!((__eax ^ __ebx) & 0x00200000))
+    return 0;
+#endif
+
+  /* Host supports cpuid.  Return highest supported cpuid input value.  */
+  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+  if (__sig)
+    *__sig = __ebx;
+
+  return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+   eax, ebx, ecx and edx registers.  The function checks if cpuid is
+   supported and returns 1 for valid cpuid information or 0 for
+   unsupported cpuid level.  All pointers are required to be non-null.  */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+	     unsigned int *__eax, unsigned int *__ebx,
+	     unsigned int *__ecx, unsigned int *__edx)
+{
+  unsigned int __ext = __level & 0x80000000;
+
+  if (__get_cpuid_max (__ext, 0) < __level)
+    return 0;
+
+  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+  return 1;
+}
diff --git a/gcc/config/i386/cross-stdarg.h b/gcc/config/i386/cross-stdarg.h
new file mode 100644
index 000000000..7139ffa74
--- /dev/null
+++ b/gcc/config/i386/cross-stdarg.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __CROSS_STDARG_H_INCLUDED
+#define __CROSS_STDARG_H_INCLUDED
+
+/* Make sure that for non x64 targets cross builtins are defined.  */
+#ifndef __x86_64__
+/* Call abi ms_abi.  */
+#define __builtin_ms_va_list __builtin_va_list
+#define __builtin_ms_va_copy __builtin_va_copy
+#define __builtin_ms_va_start __builtin_va_start
+#define __builtin_ms_va_end __builtin_va_end
+
+/* Call abi sysv_abi.  */
+#define __builtin_sysv_va_list __builtin_va_list
+#define __builtin_sysv_va_copy __builtin_va_copy
+#define __builtin_sysv_va_start __builtin_va_start
+#define __builtin_sysv_va_end __builtin_va_end
+#endif
+
+#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
+#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
+#define __ms_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
+#define __ms_va_end(__v) __builtin_ms_va_end(__v)
+
+#define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s)
+#define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l)
+#define __sysv_va_arg(__v,__l)	__builtin_va_arg(__v,__l)
+#define __sysv_va_end(__v) __builtin_sysv_va_end(__v)
+
+#ifndef __GNUC_SYSV_VA_LIST
+#define __GNUC_SYSV_VA_LIST
+  typedef __builtin_sysv_va_list __gnuc_sysv_va_list;
+#endif
+
+#ifndef _SYSV_VA_LIST_DEFINED
+#define _SYSV_VA_LIST_DEFINED
+  typedef __gnuc_sysv_va_list sysv_va_list;
+#endif
+
+#ifndef __GNUC_MS_VA_LIST
+#define __GNUC_MS_VA_LIST
+  typedef __builtin_ms_va_list __gnuc_ms_va_list;
+#endif
+
+#ifndef _MS_VA_LIST_DEFINED
+#define _MS_VA_LIST_DEFINED
+  typedef __gnuc_ms_va_list ms_va_list;
+#endif
+
+#endif /* __CROSS_STDARG_H_INCLUDED */
diff --git a/gcc/config/i386/crtdll.h b/gcc/config/i386/crtdll.h
new file mode 100644
index 000000000..1e5cefd62
--- /dev/null
+++ b/gcc/config/i386/crtdll.h
@@ -0,0 +1,42 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using GNU tools and the Windows32 API Library.
+   This variant uses CRTDLL.DLL instead of MSVCRTDLL.DLL.
+   Copyright (C) 1998, 1999, 2000, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef EXTRA_OS_CPP_BUILTINS
+#define EXTRA_OS_CPP_BUILTINS()					\
+  do								\
+    {								\
+      builtin_define ("__CRTDLL__");				\
+      builtin_define ("__MINGW32__");			   	\
+      builtin_define ("_WIN32");				\
+      builtin_define_std ("WIN32");				\
+      builtin_define_std ("WINNT");				\
+    }								\
+  while (0)
+
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC \
+  "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lcoldname -libmingwex -lcrtdll"
+
+/* Specify a different entry point when linking a DLL */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt1%O%s} \
+  %{!shared:%{!mdll:crt1%O%s}} %{pg:gcrt1%O%s}"
+
diff --git a/gcc/config/i386/crtfastmath.c b/gcc/config/i386/crtfastmath.c
new file mode 100644
index 000000000..1c1ce2c78
--- /dev/null
+++ b/gcc/config/i386/crtfastmath.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2005, 2007, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#define MXCSR_DAZ (1 << 6)	/* Enable denormals are zero mode */
+#define MXCSR_FTZ (1 << 15)	/* Enable flush to zero mode */
+
+#ifndef __x86_64__
+/* All 64-bit targets have SSE and DAZ;
+   only check them explicitly for 32-bit ones. */
+#include "cpuid.h"
+#endif
+
+static void __attribute__((constructor))
+#ifndef __x86_64__
+/* The i386 ABI only requires 4-byte stack alignment, so this is necessary
+   to make sure the fxsave struct gets correct alignment.
+   See PR27537 and PR28621.  */
+__attribute__ ((force_align_arg_pointer))
+#endif
+set_fast_math (void)
+{
+#ifndef __x86_64__
+  unsigned int eax, ebx, ecx, edx;
+
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return;
+
+  if (edx & bit_SSE)
+    {
+      unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+  
+      mxcsr |= MXCSR_FTZ;
+
+      if (edx & bit_FXSAVE)
+	{
+	  /* Check if DAZ is available.  */
+	  struct
+	    {
+	      unsigned short int cwd;
+	      unsigned short int swd;
+	      unsigned short int twd;
+	      unsigned short int fop;
+	      long int fip;
+	      long int fcs;
+	      long int foo;
+	      long int fos;
+	      long int mxcsr;
+	      long int mxcsr_mask;
+	      long int st_space[32];
+	      long int xmm_space[32];
+	      long int padding[56];
+	    } __attribute__ ((aligned (16))) fxsave;
+
+	  __builtin_memset (&fxsave, 0, sizeof (fxsave));
+
+	  asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave));
+
+	  if (fxsave.mxcsr_mask & MXCSR_DAZ)
+	    mxcsr |= MXCSR_DAZ;
+	}
+
+      __builtin_ia32_ldmxcsr (mxcsr);
+    }
+#else
+  unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+  mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
+  __builtin_ia32_ldmxcsr (mxcsr);
+#endif
+}
diff --git a/gcc/config/i386/crtprec.c b/gcc/config/i386/crtprec.c
new file mode 100644
index 000000000..4f42a8fa1
--- /dev/null
+++ b/gcc/config/i386/crtprec.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#if __PREC == 32
+ #define X87CW		(0 << 8)	/* Single precision (24 bits) */
+#elif __PREC == 64
+ #define X87CW		(2 << 8)	/* Double precision (53 bits) */
+#elif __PREC == 80
+ #define X87CW		(3 << 8)	/* Extended precision (64 bits) */
+#else
+ #error "Wrong precision requested."
+#endif
+
+#define X87CW_PCMASK	(3 << 8)
+
+static void __attribute__((constructor))
+set_precision (void)
+{
+  unsigned short int cwd;
+
+  asm volatile ("fstcw\t%0" : "=m" (cwd));
+
+  cwd &= ~X87CW_PCMASK;
+  cwd |= X87CW;
+
+  asm volatile ("fldcw\t%0" : : "m" (cwd));
+}
diff --git a/gcc/config/i386/cygming-crtbegin.c b/gcc/config/i386/cygming-crtbegin.c
new file mode 100644
index 000000000..fc36cce25
--- /dev/null
+++ b/gcc/config/i386/cygming-crtbegin.c
@@ -0,0 +1,135 @@
+/* crtbegin object for windows32 targets.
+   Copyright (C) 2007, 2009, 2010  Free Software Foundation, Inc.
+
+   Contributed by Danny Smith <dannysmith@users.sourceforge.net>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target machine header files require this define. */
+#define IN_LIBGCC2
+
+#include "auto-host.h"
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "unwind-dw2-fde.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#ifndef LIBGCC_SONAME
+#define LIBGCC_SONAME "libgcc_s.dll"
+#endif
+
+#ifndef LIBGCJ_SONAME
+#define LIBGCJ_SONAME "libgcj_s.dll"
+#endif
+
+
+/* Make the declarations weak.  This is critical for
+   _Jv_RegisterClasses because it lives in libgcj.a  */
+extern void __register_frame_info (const void *, struct object *)
+				   TARGET_ATTRIBUTE_WEAK;
+extern void *__deregister_frame_info (const void *)
+				      TARGET_ATTRIBUTE_WEAK;
+extern void _Jv_RegisterClasses (const void *) TARGET_ATTRIBUTE_WEAK;
+
+#if defined(HAVE_LD_RO_RW_SECTION_MIXING)
+# define EH_FRAME_SECTION_CONST const
+#else
+# define EH_FRAME_SECTION_CONST
+#endif
+
+/* Stick a label at the beginning of the frame unwind info so we can
+   register/deregister it with the exception handling library code.  */
+#if DWARF2_UNWIND_INFO
+static EH_FRAME_SECTION_CONST char __EH_FRAME_BEGIN__[]
+  __attribute__((used, section(EH_FRAME_SECTION_NAME), aligned(4)))
+  = { };
+
+static struct object obj;
+#endif
+
+#if TARGET_USE_JCR_SECTION
+static void *__JCR_LIST__[]
+  __attribute__ ((used, section(JCR_SECTION_NAME), aligned(4)))
+  = { };
+#endif
+
+/* Pull in references from libgcc.a(unwind-dw2-fde.o) in the
+   startfile. These are referenced by a ctor and dtor in crtend.o.  */
+extern void __gcc_register_frame (void);
+extern void __gcc_deregister_frame (void);
+
+void
+__gcc_register_frame (void)
+{
+#if DWARF2_UNWIND_INFO
+/* Weak undefined symbols won't be pulled in from dlls; hence
+   we first test if the dll is already loaded and, if so,
+   get the symbol's address at run-time.  If the dll is not loaded,
+   fallback to weak linkage to static archive.  */
+
+  void (*register_frame_fn) (const void *, struct object *);
+  HANDLE h = GetModuleHandle (LIBGCC_SONAME);
+  if (h)
+    register_frame_fn = (void (*) (const void *, struct object *))
+			GetProcAddress (h, "__register_frame_info");
+  else 
+    register_frame_fn = __register_frame_info;
+  if (register_frame_fn)
+     register_frame_fn (__EH_FRAME_BEGIN__, &obj);
+#endif
+
+#if TARGET_USE_JCR_SECTION 
+  if (__JCR_LIST__[0])
+    {
+      void (*register_class_fn) (const void *);
+      HANDLE h = GetModuleHandle (LIBGCJ_SONAME);
+      if (h)
+	register_class_fn = (void (*) (const void *))
+			     GetProcAddress (h, "_Jv_RegisterClasses");
+      else
+	register_class_fn = _Jv_RegisterClasses;
+
+      if (register_class_fn)
+	register_class_fn (__JCR_LIST__);
+    }
+#endif
+}
+
+void
+__gcc_deregister_frame (void)
+{
+#if DWARF2_UNWIND_INFO
+  void *  (*deregister_frame_fn) (const void *);
+  HANDLE h = GetModuleHandle (LIBGCC_SONAME);
+  if (h)
+    deregister_frame_fn = (void* (*) (const void *))
+			  GetProcAddress (h, "__deregister_frame_info");
+  else 
+    deregister_frame_fn = __deregister_frame_info;
+  if (deregister_frame_fn)
+     deregister_frame_fn (__EH_FRAME_BEGIN__);
+#endif
+}
diff --git a/gcc/config/i386/cygming-crtend.c b/gcc/config/i386/cygming-crtend.c
new file mode 100644
index 000000000..8545420b2
--- /dev/null
+++ b/gcc/config/i386/cygming-crtend.c
@@ -0,0 +1,88 @@
+/* crtend object for windows32 targets.
+   Copyright (C) 2007, 2008, 2009, 2010  Free Software Foundation, Inc.
+
+   Contributed by Danny Smith <dannysmith@users.sourceforge.net>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target machine header files require this define. */
+#define IN_LIBGCC2
+
+/* auto-host.h is needed by cygming.h for HAVE_GAS_WEAK and here
+   for HAVE_LD_RO_RW_SECTION_MIXING.  */  
+#include "auto-host.h"
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "unwind-dw2-fde.h"
+
+#if defined(HAVE_LD_RO_RW_SECTION_MIXING)
+# define EH_FRAME_SECTION_CONST const
+#else
+# define EH_FRAME_SECTION_CONST
+#endif
+
+#if DWARF2_UNWIND_INFO
+/* Terminate the frame unwind info section with a 0 as a sentinel;
+   this would be the 'length' field in a real FDE.  */
+
+static EH_FRAME_SECTION_CONST int __FRAME_END__[]
+  __attribute__ ((used,  section(EH_FRAME_SECTION_NAME),
+		  aligned(4)))
+  = { 0 };
+#endif
+
+#if TARGET_USE_JCR_SECTION
+/* Null terminate the .jcr section array.  */
+static void *__JCR_END__[1] 
+   __attribute__ ((used, section(JCR_SECTION_NAME),
+		   aligned(sizeof(void *))))
+   = { 0 };
+#endif
+
+extern void __gcc_register_frame (void); 
+extern void __gcc_deregister_frame (void);
+
+static void register_frame_ctor (void) __attribute__ ((constructor (0)));
+
+static void
+register_frame_ctor (void)
+{
+  __gcc_register_frame ();
+#if DEFAULT_USE_CXA_ATEXIT
+  /* If we use the __cxa_atexit method to register C++ dtors
+     at object construction,  also use atexit to register eh frame
+     info cleanup.  */
+  atexit (__gcc_deregister_frame);
+#endif
+}
+
+#if !DEFAULT_USE_CXA_ATEXIT
+static void deregister_frame_dtor (void) __attribute__ ((destructor (0)));
+
+static void
+deregister_frame_dtor (void)
+{
+  __gcc_deregister_frame ();
+}
+#endif
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
new file mode 100644
index 000000000..9c32dda2e
--- /dev/null
+++ b/gcc/config/i386/cygming.h
@@ -0,0 +1,478 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using a Unix style C library and tools.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#if TARGET_64BIT_DEFAULT || defined (HAVE_GAS_PE_SECREL32_RELOC)
+#define DWARF2_DEBUGGING_INFO 1
+#endif
+
+#undef PREFERRED_DEBUGGING_TYPE
+#if (DWARF2_DEBUGGING_INFO)
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#else
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+#endif
+
+#undef TARGET_SEH
+#define TARGET_SEH  (TARGET_64BIT_MS_ABI && flag_unwind_tables)
+
+/* Win64 with SEH cannot represent DRAP stack frames.  Disable its use.
+   Force the use of different mechanisms to allocate aligned local data.  */
+#undef MAX_STACK_ALIGNMENT
+#define MAX_STACK_ALIGNMENT  (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
+
+/* Support hooks for SEH.  */
+#undef  TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
+#undef  TARGET_ASM_UNWIND_EMIT_BEFORE_INSN
+#define TARGET_ASM_UNWIND_EMIT_BEFORE_INSN  false
+#undef  TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE  i386_pe_seh_end_prologue
+#define SUBTARGET_ASM_UNWIND_INIT  i386_pe_seh_init
+
+#undef DEFAULT_ABI
+#define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI)
+
+#if ! defined (USE_MINGW64_LEADING_UNDERSCORES)
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX (TARGET_64BIT ? "" : "_")
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX (TARGET_64BIT ? "." : "")
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER)  \
+  sprintf ((BUF), "*%s%s%ld", LOCAL_LABEL_PREFIX, \
+	   (PREFIX), (long)(NUMBER))
+
+#undef LPREFIX
+#define LPREFIX (TARGET_64BIT ? ".L" : "L")
+
+#endif
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)				\
+  (TARGET_64BIT ? dbx64_register_map[n]			\
+   : (write_symbols == DWARF2_DEBUG			\
+      ? svr4_dbx_register_map[n] : dbx_register_map[n]))
+
+/* Map gcc register number to DWARF 2 CFA column number. For 32 bit
+   target, always use the svr4_dbx_register_map for DWARF .eh_frame
+   even if we don't use DWARF .debug_frame. */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(n)				\
+  (TARGET_64BIT ? dbx64_register_map[(n)]		\
+		: svr4_dbx_register_map[(n)])
+
+/* The MS_ABI changes the set of call-used registers.  */
+#undef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS (TARGET_64BIT ? 33 : 17)
+
+#ifdef HAVE_GAS_PE_SECREL32_RELOC
+/* Use section relative relocations for debugging offsets.  Unlike
+   other targets that fake this by putting the section VMA at 0, PE
+   won't allow it.  */
+#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL, SECTION)	\
+  do {								\
+    switch (SIZE)						\
+      {								\
+      case 4:							\
+	fputs ("\t.secrel32\t", FILE);				\
+	assemble_name (FILE, LABEL);				\
+	break;							\
+      case 8:							\
+	/* This is a hack.  There is no 64-bit section relative	\
+	   relocation.  However, the COFF format also does not	\
+	   support 64-bit file offsets; 64-bit applications are	\
+	   limited to 32-bits of code+data in any one module.	\
+	   Fake the 64-bit offset by zero-extending it.  */	\
+	fputs ("\t.secrel32\t", FILE);				\
+	assemble_name (FILE, LABEL);				\
+	fputs ("\n\t.long\t0", FILE);				\
+	break;							\
+      default:							\
+	gcc_unreachable ();					\
+      }								\
+  } while (0)
+#endif
+
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	if (!TARGET_64BIT)						\
+	  builtin_define ("_X86_=1");					\
+	if (TARGET_SEH)							\
+	  builtin_define ("__SEH__");				\
+	builtin_assert ("system=winnt");				\
+	builtin_define ("__stdcall=__attribute__((__stdcall__))");	\
+	builtin_define ("__fastcall=__attribute__((__fastcall__))");	\
+	builtin_define ("__thiscall=__attribute__((__thiscall__))");	\
+	builtin_define ("__cdecl=__attribute__((__cdecl__))");		\
+	if (!flag_iso)							\
+	  {								\
+	    builtin_define ("_stdcall=__attribute__((__stdcall__))");	\
+	    builtin_define ("_fastcall=__attribute__((__fastcall__))");	\
+	    builtin_define ("_thiscall=__attribute__((__thiscall__))");	\
+	    builtin_define ("_cdecl=__attribute__((__cdecl__))");	\
+	  }								\
+	/* Even though linkonce works with static libs, this is needed 	\
+	    to compare typeinfo symbols across dll boundaries.  */	\
+	builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0");		\
+	builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0");		\
+	EXTRA_OS_CPP_BUILTINS ();					\
+  }									\
+  while (0)
+
+/* Get tree.c to declare a target-specific specialization of
+   merge_decl_attributes.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "mingw_include_path", DEFAULT_TARGET_MACHINE }
+
+#undef MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+#define SIZE_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long long int" : "int")
+
+#define WCHAR_TYPE_SIZE 16
+#define WCHAR_TYPE "short unsigned int"
+
+/* Windows64 continues to use a 32-bit long type.  */
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+
+union tree_node;
+#define TREE union tree_node *
+
+#define drectve_section() \
+  (fprintf (asm_out_file, "\t.section .drectve\n"), \
+   in_section = NULL)
+
+/* Older versions of gas don't handle 'r' as data.
+   Explicitly set data flag with 'd'.  */  
+#define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata,\"dr\""
+
+/* Don't allow flag_pic to propagate since gas may produce invalid code
+   otherwise.  */
+
+#undef  SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && flag_pic != 1)					\
+    {									\
+      if (flag_pic > 1)							\
+        warning (0,							\
+	         "-fPIC ignored for target (all code is position independent)"\
+                 );                         				\
+      flag_pic = 1;							\
+    }									\
+  else if (!TARGET_64BIT && flag_pic)					\
+    {									\
+      warning (0, "-f%s ignored for target (all code is position independent)",\
+	       (flag_pic > 1) ? "PIC" : "pic");				\
+      flag_pic = 0;							\
+    }									\
+} while (0)								\
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variable or
+   function named by the symbol (such as what section it is in).
+
+   On i386 running Windows NT, modify the assembler name with a suffix
+   consisting of an atsign (@) followed by string of digits that represents
+   the number of bytes of arguments passed to the function, if it has the
+   attribute STDCALL.
+
+   In addition, we must mark dll symbols specially. Definitions of
+   dllexport'd objects install some info in the .drectve section.
+   References to dllimport'd objects are fetched indirectly via
+   _imp__.  If both are declared, dllexport overrides.  This is also
+   needed to implement one-only vtables: they go into their own
+   section and we need to set DECL_SECTION_NAME so we do that here.
+   Note that we can be called twice on the same decl.  */
+
+#define SUBTARGET_ENCODE_SECTION_INFO  i386_pe_encode_section_info
+
+/* Output a common block.  */
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON \
+  i386_pe_asm_output_aligned_decl_common
+
+/* Output the label for an initialized variable.  */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)	\
+do {							\
+  i386_pe_maybe_record_exported_symbol (DECL, NAME, 1);	\
+  ASM_OUTPUT_LABEL ((STREAM), (NAME));			\
+} while (0)
+
+/* Output a reference to a label. Fastcall function symbols
+   keep their '@' prefix, while other symbols are prefixed
+   with user_label_prefix.  */
+#undef ASM_OUTPUT_LABELREF
+#define  ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+do {						\
+  if ((NAME)[0] != FASTCALL_PREFIX)		\
+    fputs (user_label_prefix, (STREAM));	\
+  fputs ((NAME), (STREAM));			\
+} while (0)
+
+/* This does much the same in memory rather than to a stream.  */
+#undef TARGET_MANGLE_ASSEMBLER_NAME
+#define TARGET_MANGLE_ASSEMBLER_NAME i386_pe_mangle_assembler_name
+
+
+/* Emit code to check the stack when allocating more than 4000
+   bytes in one go.  */
+#define CHECK_STACK_LIMIT 4000
+
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY	(ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   returns float values in the 387 and needs stack probes.
+   We also align doubles to 64-bits for MSVC default compatibility.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS \
+	 | MASK_STACK_PROBE | MASK_ALIGN_DOUBLE)
+
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT \
+	MASK_128BIT_LONG_DOUBLE
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* Windows uses explicit import from shared libraries.  */
+#define MULTIPLE_SYMBOL_SPACES 1
+
+#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  i386_pe_asm_named_section
+
+/* Select attributes for named sections.  */
+#define TARGET_SECTION_TYPE_FLAGS  i386_pe_section_type_flags
+
+/* Write the extra assembler code needed to declare a function
+   properly.  If we are generating SDB debugging information, this
+   will happen automatically, so we only need to handle other cases.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+  i386_pe_start_function (FILE, NAME, DECL)
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE,NAME,DECL) \
+  i386_pe_end_function (FILE, NAME, DECL)
+
+/* Add an external function to the list of functions to be declared at
+   the end of the file.  */
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)				\
+  do									\
+    {									\
+      if (TREE_CODE (DECL) == FUNCTION_DECL)				\
+	i386_pe_record_external_function ((DECL), (NAME));		\
+    }									\
+  while (0)
+
+/* Declare the type properly for any external libcall.  */
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
+  i386_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+
+/* This says out to put a global symbol in the BSS section.  */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+/* Output function declarations at the end of the file.  */
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END i386_pe_file_end
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START " #"
+
+#ifndef DWARF2_UNWIND_INFO
+/* If configured with --disable-sjlj-exceptions, use DWARF2, else
+   default to SJLJ.  */
+#if  (defined (CONFIG_SJLJ_EXCEPTIONS) && !CONFIG_SJLJ_EXCEPTIONS)
+/* The logic of this #if must be kept synchronised with the logic
+   for selecting the tmake_eh_file fragment in config.gcc.  */
+#define DWARF2_UNWIND_INFO 1
+/* If multilib is selected break build as sjlj is required.  */
+#if defined (TARGET_BI_ARCH)
+#error For 64-bit windows and 32-bit based multilib version of gcc just SJLJ exceptions are supported.
+#endif
+#else
+#define DWARF2_UNWIND_INFO 0
+#endif
+#endif
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef PROFILE_HOOK
+#define PROFILE_HOOK(LABEL)						\
+  if (MAIN_NAME_P (DECL_NAME (current_function_decl)))			\
+    {									\
+      emit_call_insn (gen_rtx_CALL (VOIDmode,				\
+	gen_rtx_MEM (FUNCTION_MODE,					\
+		     gen_rtx_SYMBOL_REF (Pmode, "_monstartup")),	\
+	const0_rtx));							\
+    }
+
+/* Java Native Interface (JNI) methods on Win32 are invoked using the
+   stdcall calling convention.  */
+#undef MODIFY_JNI_METHOD_CALL
+#define MODIFY_JNI_METHOD_CALL(MDECL)					      \
+  build_type_attribute_variant ((MDECL),				      \
+			       build_tree_list (get_identifier ("stdcall"),   \
+						NULL))
+
+/* For Win32 ABI compatibility */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* MSVC returns aggregate types of up to 8 bytes via registers.
+   See i386.c:ix86_return_in_memory.  */
+#undef MS_AGGREGATE_RETURN
+#define MS_AGGREGATE_RETURN 1
+
+/* Biggest alignment supported by the object file format of this
+   machine.  Use this macro to limit the alignment which can be
+   specified using the `__attribute__ ((aligned (N)))' construct.  If
+   not defined, the default value is `BIGGEST_ALIGNMENT'.  */
+/* IMAGE_SCN_ALIGN_8192BYTES is the largest section alignment flag
+   specified in the PECOFF60 spec.  Native MS compiler also limits
+   user-specified alignment to 8192 bytes.  */
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (8192 * 8)
+
+/* BIGGEST_FIELD_ALIGNMENT macro is used directly by libobjc, There, we
+   align internal doubles in structures on dword boundaries. Otherwise,
+   support vector modes using ADJUST_FIELD_ALIGN, defined in i386.h.  */
+#ifdef IN_TARGET_LIBS
+#undef	BIGGEST_FIELD_ALIGNMENT
+#define BIGGEST_FIELD_ALIGNMENT 64
+#endif
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#undef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#define GROUP_BITFIELDS_BY_ALIGN TYPE_NATIVE(rec)
+
+/* Enable alias attribute support.  */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* This implements the `alias' attribute, keeping any stdcall or
+   fastcall decoration.  */
+#undef	ASM_OUTPUT_DEF_FROM_DECLS
+#define	ASM_OUTPUT_DEF_FROM_DECLS(STREAM, DECL, TARGET)			\
+  do									\
+    {									\
+      const char *alias							\
+	= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));		\
+      i386_pe_maybe_record_exported_symbol (DECL, alias, 0);		\
+      if (TREE_CODE (DECL) == FUNCTION_DECL)				\
+	i386_pe_declare_function_type (STREAM, alias,			\
+				       TREE_PUBLIC (DECL));		\
+      ASM_OUTPUT_DEF (STREAM, alias, IDENTIFIER_POINTER (TARGET));	\
+    } while (0)
+
+/* GNU as supports weak symbols on PECOFF. */
+#ifdef HAVE_GAS_WEAK
+#define ASM_WEAKEN_LABEL(FILE, NAME)  \
+  do                                  \
+    {                                 \
+      fputs ("\t.weak\t", (FILE));    \
+      assemble_name ((FILE), (NAME)); \
+      fputc ('\n', (FILE));           \
+    }                                 \
+  while (0)
+#endif /* HAVE_GAS_WEAK */
+
+/* FIXME: SUPPORTS_WEAK && TARGET_HAVE_NAMED_SECTIONS is true,
+   but for .jcr section to work we also need crtbegin and crtend
+   objects.  */
+#define TARGET_USE_JCR_SECTION 1
+
+/* Decide whether it is safe to use a local alias for a virtual function
+   when constructing thunks.  */
+#undef TARGET_USE_LOCAL_THUNK_ALIAS_P
+#define TARGET_USE_LOCAL_THUNK_ALIAS_P(DECL) (!DECL_ONE_ONLY (DECL))
+
+#define SUBTARGET_ATTRIBUTE_TABLE \
+  { "selectany", 0, 0, true, false, false, ix86_handle_selectany_attribute }
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+
+/*  mcount() does not need a counter variable.  */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+#define TARGET_VALID_DLLIMPORT_ATTRIBUTE_P i386_pe_valid_dllimport_attribute_p
+#define TARGET_CXX_ADJUST_CLASS_AT_DEFINITION i386_pe_adjust_class_at_definition
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_pe_mangle_decl_assembler_name
+
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY i386_pe_assemble_visibility
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#undef TREE
+
+#ifndef BUFSIZ
+# undef FILE
+#endif
diff --git a/gcc/config/i386/cygming.opt b/gcc/config/i386/cygming.opt
new file mode 100644
index 000000000..0fb325bde
--- /dev/null
+++ b/gcc/config/i386/cygming.opt
@@ -0,0 +1,54 @@
+; Cygwin- and MinGW-specific options.
+
+; Copyright (C) 2005, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mconsole
+Target RejectNegative
+Create console application
+
+mdll
+Target RejectNegative
+Generate code for a DLL
+
+mnop-fun-dllimport
+Target Report Var(TARGET_NOP_FUN_DLLIMPORT)
+Ignore dllimport for functions
+
+mthreads
+Target RejectNegative
+Use Mingw-specific thread support
+
+mwin32
+Target
+Set Windows defines
+
+mwindows
+Target
+Create GUI application
+
+mpe-aligned-commons
+Target Var(use_pe_aligned_common) Init(HAVE_GAS_ALIGNED_COMM)
+Use the GNU extension to the PE format for aligned common data
+
+muse-libstdc-wrappers
+Target Condition({defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)})
+Compile code that relies on Cygwin DLL wrappers to support C++ operator new/delete replacement
+
+posix
+Driver
diff --git a/gcc/config/i386/cygwin-stdint.h b/gcc/config/i386/cygwin-stdint.h
new file mode 100644
index 000000000..df865f717
--- /dev/null
+++ b/gcc/config/i386/cygwin-stdint.h
@@ -0,0 +1,62 @@
+/* Definitions for <stdint.h> types on systems using Cygwin.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Exact-width integer types */
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+/* Minimum-width integer types */
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+/* Fastest minimum-width integer types */
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+/* Integer types capable of holding object pointers */
+
+#define INTPTR_TYPE "int"
+#define UINTPTR_TYPE "unsigned int"
+
diff --git a/gcc/config/i386/cygwin.asm b/gcc/config/i386/cygwin.asm
new file mode 100644
index 000000000..8f9c48685
--- /dev/null
+++ b/gcc/config/i386/cygwin.asm
@@ -0,0 +1,188 @@
+/* stuff needed for libgcc on win32.
+ *
+ *   Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010
+ *   Free Software Foundation, Inc.
+ *   Written By Steve Chamberlain
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE
+	.cfi_sections	.debug_frame
+# define cfi_startproc()		.cfi_startproc
+# define cfi_endproc()			.cfi_endproc
+# define cfi_adjust_cfa_offset(X) 	.cfi_adjust_cfa_offset X
+# define cfi_def_cfa_register(X)	.cfi_def_cfa_register X
+# define cfi_register(D,S)		.cfi_register D, S
+# ifdef _WIN64
+#  define cfi_push(X)		.cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0
+#  define cfi_pop(X)		.cfi_adjust_cfa_offset -8; .cfi_restore X
+# else
+#  define cfi_push(X)		.cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0
+#  define cfi_pop(X)		.cfi_adjust_cfa_offset -4; .cfi_restore X
+# endif
+#else
+# define cfi_startproc()
+# define cfi_endproc()
+# define cfi_adjust_cfa_offset(X)
+# define cfi_def_cfa_register(X)
+# define cfi_register(D,S)
+# define cfi_push(X)
+# define cfi_pop(X)
+#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */
+
+#ifdef L_chkstk
+/* Function prologue calls __chkstk to probe the stack when allocating more
+   than CHECK_STACK_LIMIT bytes in one go.  Touching the stack at 4K
+   increments is necessary to ensure that the guard pages used
+   by the OS virtual memory manger are allocated in correct sequence.  */
+
+	.global ___chkstk
+	.global	__alloca
+#ifdef _WIN64
+/* __alloca is a normal function call, which uses %rcx as the argument.  */
+	cfi_startproc()
+__alloca:
+	movq	%rcx, %rax
+	/* FALLTHRU */
+
+/* ___chkstk is a *special* function call, which uses %rax as the argument.
+   We avoid clobbering the 4 integer argument registers, %rcx, %rdx, 
+   %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use.  */
+	.align	4
+___chkstk:
+	popq	%r11			/* pop return address */
+	cfi_adjust_cfa_offset(-8)	/* indicate return address in r11 */
+	cfi_register(%rip, %r11)
+	movq	%rsp, %r10
+	cmpq	$0x1000, %rax		/* > 4k ?*/
+	jb	2f
+
+1:	subq	$0x1000, %r10  		/* yes, move pointer down 4k*/
+	orl	$0x0, (%r10)   		/* probe there */
+	subq	$0x1000, %rax  	 	/* decrement count */
+	cmpq	$0x1000, %rax
+	ja	1b			/* and do it again */
+
+2:	subq	%rax, %r10
+	movq	%rsp, %rax		/* hold CFA until return */
+	cfi_def_cfa_register(%rax)
+	orl	$0x0, (%r10)		/* less than 4k, just peek here */
+	movq	%r10, %rsp		/* decrement stack */
+
+	/* Push the return value back.  Doing this instead of just
+	   jumping to %r11 preserves the cached call-return stack
+	   used by most modern processors.  */
+	pushq	%r11
+	ret
+	cfi_endproc()
+#else
+	cfi_startproc()
+___chkstk:
+__alloca:
+	pushl	%ecx			/* save temp */
+	cfi_push(%eax)
+	leal	8(%esp), %ecx		/* point past return addr */
+	cmpl	$0x1000, %eax		/* > 4k ?*/
+	jb	2f
+
+1:	subl	$0x1000, %ecx  		/* yes, move pointer down 4k*/
+	orl	$0x0, (%ecx)   		/* probe there */
+	subl	$0x1000, %eax  	 	/* decrement count */
+	cmpl	$0x1000, %eax
+	ja	1b			/* and do it again */
+
+2:	subl	%eax, %ecx	   
+	orl	$0x0, (%ecx)		/* less than 4k, just peek here */
+	movl	%esp, %eax		/* save current stack pointer */
+	cfi_def_cfa_register(%eax)
+	movl	%ecx, %esp		/* decrement stack */
+	movl	(%eax), %ecx		/* recover saved temp */
+
+	/* Copy the return register.  Doing this instead of just jumping to
+	   the address preserves the cached call-return stack used by most
+	   modern processors.  */
+	pushl	4(%eax)
+	ret
+	cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk */
+
+#ifdef L_chkstk_ms
+/* ___chkstk_ms is a *special* function call, which uses %rax as the argument.
+   We avoid clobbering any registers.  Unlike ___chkstk, it just probes the
+   stack and does no stack allocation.  */
+	.global ___chkstk_ms
+#ifdef _WIN64
+	cfi_startproc()
+___chkstk_ms:
+	pushq	%rcx			/* save temps */
+	cfi_push(%rcx)
+	pushq	%rax
+	cfi_push(%rax)
+	cmpq	$0x1000, %rax		/* > 4k ?*/
+	leaq	24(%rsp), %rcx		/* point past return addr */
+	jb	2f
+
+1:	subq	$0x1000, %rcx  		/* yes, move pointer down 4k */
+	orq	$0x0, (%rcx)   		/* probe there */
+	subq	$0x1000, %rax  	 	/* decrement count */
+	cmpq	$0x1000, %rax
+	ja	1b			/* and do it again */
+
+2:	subq	%rax, %rcx
+	orq	$0x0, (%rcx)		/* less than 4k, just peek here */
+
+	popq	%rax
+	cfi_pop(%rax)
+	popq	%rcx
+	cfi_pop(%rcx)
+	ret
+	cfi_endproc()
+#else
+	cfi_startproc()
+___chkstk_ms:
+	pushl	%ecx			/* save temp */
+	cfi_push(%ecx)
+	pushl	%eax
+	cfi_push(%eax)
+	cmpl	$0x1000, %eax		/* > 4k ?*/
+	leal	12(%esp), %ecx		/* point past return addr */
+	jb	2f
+
+1:	subl	$0x1000, %ecx  		/* yes, move pointer down 4k*/
+	orl	$0x0, (%ecx)   		/* probe there */
+	subl	$0x1000, %eax  	 	/* decrement count */
+	cmpl	$0x1000, %eax
+	ja	1b			/* and do it again */
+
+2:	subl	%eax, %ecx
+	orl	$0x0, (%ecx)		/* less than 4k, just peek here */
+
+	popl	%eax
+	cfi_pop(%eax)
+	popl	%ecx
+	cfi_pop(%ecx)
+	ret
+	cfi_endproc()
+#endif /* _WIN64 */
+#endif /* L_chkstk_ms */
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
new file mode 100644
index 000000000..f8daeecec
--- /dev/null
+++ b/gcc/config/i386/cygwin.h
@@ -0,0 +1,142 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using a Unix style C library and tools.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_VERSION fprintf (stderr, " (x86 Cygwin)");
+
+#define EXTRA_OS_CPP_BUILTINS()  /* Nothing.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} \
+  -D__CYGWIN32__ -D__CYGWIN__ %{!ansi:-Dunix} -D__unix__ -D__unix \
+  %{mwin32:-DWIN32 -D_WIN32 -D__WIN32 -D__WIN32__ %{!ansi:-DWINNT}} \
+  %{!nostdinc:%{!mno-win32:-idirafter ../include/w32api%s -idirafter ../../include/w32api%s}}\
+"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+  %{!shared: %{!mdll: crt0%O%s \
+  %{pg:gcrt0%O%s}}}\
+  crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
+   crtend.o%s"
+
+/* Normally, -lgcc is not needed since everything in it is in the DLL, but we
+   want to allow things to be added to it when installing new versions of
+   GCC without making a new CYGWIN.DLL, so we leave it.  Profiling is handled
+   by calling the init function from main.  */
+
+#ifdef ENABLE_SHARED_LIBGCC
+#define SHARED_LIBGCC_SPEC " \
+ %{static|static-libgcc:-lgcc -lgcc_eh} \
+ %{!static: \
+   %{!static-libgcc: \
+     %{!shared: \
+       %{!shared-libgcc:-lgcc -lgcc_eh} \
+       %{shared-libgcc:-lgcc_s -lgcc} \
+      } \
+     %{shared:-lgcc_s -lgcc} \
+    } \
+  } "
+#else
+#define SHARED_LIBGCC_SPEC " -lgcc "
+#endif
+
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC SHARED_LIBGCC_SPEC
+
+/* We have to dynamic link to get to the system DLLs.  All of libc, libm and
+   the Unix stuff is in cygwin.dll.  The import library is called
+   'libcygwin.a'.  For Windows applications, include more libraries, but
+   always include kernel32.  We'd like to specific subsystem windows to
+   ld, but that doesn't work just yet.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+  %{pg:-lgmon} \
+  -lcygwin \
+  %{mwindows:-lgdi32 -lcomdlg32} \
+  -ladvapi32 -lshell32 -luser32 -lkernel32"
+
+/* To implement C++ function replacement we always wrap the cxx
+   malloc-like operators.  See N2800 #17.6.4.6 [replacement.functions] */
+#define CXX_WRAP_SPEC_LIST " \
+  --wrap _Znwj \
+  --wrap _Znaj \
+  --wrap _ZdlPv \
+  --wrap _ZdaPv \
+  --wrap _ZnwjRKSt9nothrow_t \
+  --wrap _ZnajRKSt9nothrow_t \
+  --wrap _ZdlPvRKSt9nothrow_t \
+  --wrap _ZdaPvRKSt9nothrow_t \
+"
+
+#if defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)
+
+#if USE_CYGWIN_LIBSTDCXX_WRAPPERS
+/* Default on, only explict -mno disables.  */
+#define CXX_WRAP_SPEC_OPT "!mno-use-libstdc-wrappers"
+#else
+/* Default off, only explict -m enables.  */
+#define CXX_WRAP_SPEC_OPT "muse-libstdc-wrappers"
+#endif
+
+#define CXX_WRAP_SPEC "%{" CXX_WRAP_SPEC_OPT ":" CXX_WRAP_SPEC_LIST "}"
+
+#else /* !defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS)  */
+
+#define CXX_WRAP_SPEC ""
+
+#endif /* ?defined (USE_CYGWIN_LIBSTDCXX_WRAPPERS) */
+
+#define LINK_SPEC "\
+  %{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  " CXX_WRAP_SPEC " \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: --enable-auto-image-base -e __cygwin_dll_entry@12} \
+  --dll-search-prefix=cyg -tsaware"
+
+/* Binutils does not handle weak symbols from dlls correctly.  For now,
+   do not use them unnecessarily in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+/* Every program on cygwin links against cygwin1.dll which contains 
+   the pthread routines.  There is no need to explicitly link them
+   and the -pthread flag is not recognized.  */
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS ""
+
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygwin. */
+#if DWARF2_UNWIND_INFO
+#define LIBGCC_EH_EXTN ""
+#else
+#define LIBGCC_EH_EXTN "-sjlj"
+#endif
+#define LIBGCC_SONAME "cyggcc_s" LIBGCC_EH_EXTN "-1.dll"
+
+/* We should find a way to not have to update this manually.  */
+#define LIBGCJ_SONAME "cyggcj" /*LIBGCC_EH_EXTN*/ "-12.dll"
+
diff --git a/gcc/config/i386/darwin-libgcc.10.4.ver b/gcc/config/i386/darwin-libgcc.10.4.ver
new file mode 100644
index 000000000..67f5e239c
--- /dev/null
+++ b/gcc/config/i386/darwin-libgcc.10.4.ver
@@ -0,0 +1,98 @@
+# Copyright (C) 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divxc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunsxfdi
+___fixunsxfsi
+___fixxfdi
+___floatdidf
+___floatdisf
+___floatdixf
+___gcc_personality_v0
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___mulvdi3
+___mulvsi3
+___mulxc3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powixf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc/config/i386/darwin-libgcc.10.5.ver b/gcc/config/i386/darwin-libgcc.10.5.ver
new file mode 100644
index 000000000..eeec9fbfc
--- /dev/null
+++ b/gcc/config/i386/darwin-libgcc.10.5.ver
@@ -0,0 +1,102 @@
+# Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetIPInfo
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divxc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunsxfdi
+___fixunsxfsi
+___fixxfdi
+___floatdidf
+___floatdisf
+___floatdixf
+___floatundidf
+___floatundisf
+___floatundixf
+___gcc_personality_v0
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___mulvdi3
+___mulvsi3
+___mulxc3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powixf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
new file mode 100644
index 000000000..08b6c5253
--- /dev/null
+++ b/gcc/config/i386/darwin.h
@@ -0,0 +1,323 @@
+/* Target definitions for x86 running Darwin.
+   Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable Mach-O bits in generic x86 code.  */
+#undef TARGET_MACHO
+#define TARGET_MACHO 1
+
+#undef DARWIN_X86
+#define DARWIN_X86 1
+
+#define TARGET_VERSION fprintf (stderr, " (i686 Darwin)");
+
+#undef  TARGET_64BIT
+#define TARGET_64BIT OPTION_ISA_64BIT
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* Size of the Obj-C jump buffer.  */
+#define OBJC_JBLEN ((TARGET_64BIT) ? ((9 * 2) + 3 + 16) : (18))
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      builtin_define ("__LITTLE_ENDIAN__");     \
+      darwin_cpp_builtins (pfile);		\
+    }                                           \
+  while (0)
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Generate branch islands stubs if this is true.  */
+extern int darwin_emit_branch_islands;
+
+#undef TARGET_MACHO_BRANCH_ISLANDS
+#define TARGET_MACHO_BRANCH_ISLANDS darwin_emit_branch_islands
+
+/* For compatibility with OSX system tools, use the new style of pic stub
+   if this is set.  */
+#undef  MACHOPIC_ATT_STUB
+#define MACHOPIC_ATT_STUB (darwin_macho_att_stub)
+
+#undef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD 64
+
+#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
+
+#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
+
+/* On Darwin, the stack is 128-bit aligned at the point of every call.
+   Failure to ensure this will lead to a crash in the system libraries
+   or dynamic loader.  */
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY \
+ ((profile_flag || (TARGET_64BIT && ix86_abi == MS_ABI)) \
+  ? 128 : BITS_PER_WORD)
+
+#undef MAIN_STACK_BOUNDARY
+#define MAIN_STACK_BOUNDARY 128
+
+/* Since we'll never want a stack boundary less aligned than 128 bits
+   we need the extra work here otherwise bits of gcc get very grumpy
+   when we ask for lower alignment.  We could just reject values less
+   than 128 bits for Darwin, but it's easier to up the alignment if
+   it's below the minimum.  */
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY			\
+  MAX (128, ix86_preferred_stack_boundary)
+
+/* We want -fPIC by default, unless we're using -static to compile for
+   the kernel or some such.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) \
+  %{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \
+  %{!mmacosx-version-min=*:-mmacosx-version-min=%(darwin_minversion)} \
+  %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }} " \
+  DARWIN_CC1_SPEC
+
+#undef ASM_SPEC
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL \
+  %{static}"
+
+#define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}"
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+/* Determine a minimum version based on compiler options.  */
+#define DARWIN_MINVERSION_SPEC				\
+ "%{!m64|fgnu-runtime:10.4;				\
+    ,objective-c|,objc-cpp-output:10.5;			\
+    ,objective-c-header:10.5;				\
+    ,objective-c++|,objective-c++-cpp-output:10.5;	\
+    ,objective-c++-header|,objc++-cpp-output:10.5;	\
+    :10.4}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{mpc32:crtprec32.o%s} \
+   %{mpc64:crtprec64.o%s} \
+   %{mpc80:crtprec80.o%s}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS                                   \
+  DARWIN_EXTRA_SPECS                                            \
+  { "darwin_arch", DARWIN_ARCH_SPEC },                          \
+  { "darwin_crt2", "" },                                        \
+  { "darwin_subarch", DARWIN_SUBARCH_SPEC },
+
+/* The Darwin assembler mostly follows AT&T syntax.  */
+#undef ASSEMBLER_DIALECT
+#define ASSEMBLER_DIALECT ASM_ATT
+
+/* Define macro used to output shift-double opcodes when the shift
+   count is in %cl.  Some assemblers require %cl as an argument;
+   some don't.  This macro controls what to do: by default, don't
+   print %cl.  */
+
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_file_end
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+/* String containing the assembler's comment-starter.  */
+
+#define ASM_COMMENT_START "#"
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   and returns float values in the 387.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE)
+
+/* For darwin we want to target specific processor features as a minimum,
+   but these unfortunately don't correspond to a specific processor.  */
+#undef TARGET_SUBTARGET32_ISA_DEFAULT
+#define TARGET_SUBTARGET32_ISA_DEFAULT (OPTION_MASK_ISA_MMX		\
+					| OPTION_MASK_ISA_SSE		\
+					| OPTION_MASK_ISA_SSE2		\
+					| OPTION_MASK_ISA_SSE3)
+
+#undef TARGET_SUBTARGET64_ISA_DEFAULT
+#define TARGET_SUBTARGET64_ISA_DEFAULT TARGET_SUBTARGET32_ISA_DEFAULT
+
+#undef GOT_SYMBOL_NAME
+#define GOT_SYMBOL_NAME MACHOPIC_FUNCTION_BASE_NAME
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+#define LPREFIX "L"
+
+/* Assembler pseudos to introduce constants of various size.  */
+
+#define ASM_BYTE "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"
+
+#define SUBTARGET_ENCODE_SECTION_INFO  darwin_encode_section_info
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+ do { if ((LOG) != 0)			\
+        {				\
+          if (in_section == text_section) \
+            fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \
+          else				\
+            fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \
+        }				\
+    } while (0)
+
+/* Darwin profiling -- call mcount.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+    do {								\
+      if (TARGET_MACHO_BRANCH_ISLANDS 					\
+	   && MACHOPIC_INDIRECT && !TARGET_64BIT)			\
+	{								\
+	  const char *name = machopic_mcount_stub_name ();		\
+	  fprintf (FILE, "\tcall %s\n", name+1);  /*  skip '&'  */	\
+	  machopic_validate_stub_or_non_lazy_ptr (name);		\
+	}								\
+      else fprintf (FILE, "\tcall mcount\n");				\
+    } while (0)
+
+#define C_COMMON_OVERRIDE_OPTIONS					\
+  do {									\
+    SUBTARGET_C_COMMON_OVERRIDE_OPTIONS;				\
+  } while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do {									\
+  if (TARGET_64BIT && MACHO_DYNAMIC_NO_PIC_P)				\
+    target_flags &= ~MASK_MACHO_DYNAMIC_NO_PIC;				\
+} while (0)
+
+/* Darwin on x86_64 uses dwarf-2 by default.  Pre-darwin9 32-bit
+   compiles default to stabs+.  darwin9+ defaults to dwarf-2.  */
+#ifndef DARWIN_PREFER_DWARF
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE (TARGET_64BIT ? DWARF2_DEBUG : DBX_DEBUG)
+#endif
+
+/* Darwin uses the standard DWARF register numbers but the default
+   register numbers for STABS.  Fortunately for 64-bit code the
+   default and the standard are the same.  */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) 					\
+  (TARGET_64BIT ? dbx64_register_map[n]				\
+   : write_symbols == DWARF2_DEBUG ? svr4_dbx_register_map[n]	\
+   : dbx_register_map[n])
+
+/* Unfortunately, the 32-bit EH information also doesn't use the standard
+   DWARF register numbers.  */
+#define DWARF2_FRAME_REG_OUT(n, for_eh)					\
+  (! (for_eh) || write_symbols != DWARF2_DEBUG || TARGET_64BIT ? (n)	\
+   : (n) == 5 ? 4							\
+   : (n) == 4 ? 5							\
+   : (n) >= 11 && (n) <= 18 ? (n) + 1					\
+   : (n))
+
+#undef REGISTER_SUBTARGET_PRAGMAS
+#define REGISTER_SUBTARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS()
+
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes
+
+/* For 64-bit, we need to add 4 because @GOTPCREL is relative to the
+   end of the instruction, but without the 4 we'd only have the right
+   address for the start of the instruction.  */
+#undef ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE)	\
+  if (TARGET_64BIT)				                                \
+    {                                                                           \
+      if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_pcrel)			\
+        {                                                                       \
+	   fputs (ASM_LONG, FILE);                                              \
+	   assemble_name (FILE, XSTR (ADDR, 0));				\
+	   fputs ("+4@GOTPCREL", FILE);                                         \
+	   goto DONE;                                                           \
+        }									\
+    }										\
+  else                                                                          \
+    {										\
+      if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1))                      \
+        {                                                                       \
+          darwin_non_lazy_pcrel (FILE, ADDR);                                   \
+          goto DONE;								\
+        }                                                                       \
+    }
+
+/* This needs to move since i386 uses the first flag and other flags are
+   used in Mach-O.  */
+#undef MACHO_SYMBOL_FLAG_VARIABLE
+#define MACHO_SYMBOL_FLAG_VARIABLE ((SYMBOL_FLAG_MACH_DEP) << 3)
+
+#undef MACHOPIC_NL_SYMBOL_PTR_SECTION
+#define MACHOPIC_NL_SYMBOL_PTR_SECTION \
+		".section __IMPORT,__pointers,non_lazy_symbol_pointers"
+
+#define SUBTARGET32_DEFAULT_CPU "i686"
+
+#undef  SUBTARGET_INIT_BUILTINS
+#define SUBTARGET_INIT_BUILTINS					\
+do {								\
+  ix86_builtins[(int) IX86_BUILTIN_CFSTRING]			\
+    = darwin_init_cfstring_builtins ((unsigned) (IX86_BUILTIN_CFSTRING));	\
+  darwin_rename_builtins ();					\
+} while(0)
+
+/* The system ___divdc3 routine in libSystem on darwin10 is not
+   accurate to 1ulp, ours is, so we avoid ever using the system name
+   for this routine and instead install a non-conflicting name that is
+   accurate.  See darwin_rename_builtins.  */
+#ifdef L_divdc3
+#define DECLARE_LIBRARY_RENAMES \
+  asm(".text; ___divdc3: jmp ___ieee_divdc3 ; .globl ___divdc3");
+#endif
diff --git a/gcc/config/i386/darwin64.h b/gcc/config/i386/darwin64.h
new file mode 100644
index 000000000..9562faa90
--- /dev/null
+++ b/gcc/config/i386/darwin64.h
@@ -0,0 +1,35 @@
+/* Target definitions for x86_64 running Darwin.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (x86_64 Darwin)");
+
+#undef  DARWIN_ARCH_SPEC
+#define DARWIN_ARCH_SPEC "%{m32:i386;:x86_64}"
+
+#undef  DARWIN_SUBARCH_SPEC
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS                                   \
+  DARWIN_EXTRA_SPECS                                            \
+  { "darwin_arch", DARWIN_ARCH_SPEC },                          \
+  { "darwin_crt2", "" },                                        \
+  { "darwin_subarch", DARWIN_SUBARCH_SPEC },
diff --git a/gcc/config/i386/djgpp-stdint.h b/gcc/config/i386/djgpp-stdint.h
new file mode 100644
index 000000000..8fd3a2565
--- /dev/null
+++ b/gcc/config/i386/djgpp-stdint.h
@@ -0,0 +1,62 @@
+/* Definitions for <stdint.h> types on systems using DJGPP.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Exact-width integer types */
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "signed short int"
+#define INT32_TYPE "signed long int"
+#define INT64_TYPE "signed long long int"
+
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "long unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+/* Minimum-width integer types */
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "signed short int"
+#define INT_LEAST32_TYPE "signed int"
+#define INT_LEAST64_TYPE "signed long long int"
+
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+/* Fastest minimum-width integer types */
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "signed int"
+#define INT_FAST32_TYPE "signed int"
+#define INT_FAST64_TYPE "long long signed int"
+
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+/* Integer types capable of holding object pointers */
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h
new file mode 100644
index 000000000..34a15facb
--- /dev/null
+++ b/gcc/config/i386/djgpp.h
@@ -0,0 +1,182 @@
+/* Configuration for an i386 running MS-DOS with DJGPP.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2007,
+   2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Support generation of DWARF2 debugging info.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Define the name of the .data section.  */
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.section .data"
+
+/* Define the name of the .ident op.  */
+#undef IDENT_ASM_OP
+#define IDENT_ASM_OP "\t.ident\t"
+
+/* Enable alias attribute support.  */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* Define the name of the .text section.  */
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+
+/* Define standard DJGPP installation paths.  */
+/* We override default /usr or /usr/local part with /dev/env/DJDIR which */
+/* points to actual DJGPP installation directory.  */
+
+/* Standard include directory */
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR "/dev/env/DJDIR/include/"
+
+/* Search for as.exe and ld.exe in DJGPP's binary directory.  */ 
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/dev/env/DJDIR/bin/"
+
+/* Standard DJGPP library and startup files */
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/dev/env/DJDIR/lib/"
+
+/* Correctly handle absolute filename detection in cp/xref.c */
+#define FILE_NAME_ABSOLUTE_P(NAME) \
+        (((NAME)[0] == '/') || ((NAME)[0] == '\\') || \
+        (((NAME)[0] >= 'A') && ((NAME)[0] <= 'z') && ((NAME)[1] == ':')))
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define_std ("MSDOS");		\
+	builtin_define_std ("GO32");		\
+	builtin_assert ("system=msdos");	\
+    }						\
+  while (0)
+
+/* Include <sys/version.h> so __DJGPP__ and __DJGPP_MINOR__ are defined.  */
+#undef CPP_SPEC
+#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \
+  -imacros %s../include/sys/version.h"
+
+/* We need to override link_command_spec in gcc.c so support -Tdjgpp.djl.
+   This cannot be done in LINK_SPECS as that LINK_SPECS is processed
+   before library search directories are known by the linker.
+   This avoids problems when specs file is not available. An alternate way,
+   suggested by Robert Hoehne, is to use SUBTARGET_EXTRA_SPECS instead.
+*/ 
+
+#undef LINK_COMMAND_SPEC
+#define LINK_COMMAND_SPEC \
+"%{!fsyntax-only: \
+%{!c:%{!M:%{!MM:%{!E:%{!S:%(linker) %l %X %{o*} %{e*} %{N} %{n} \
+\t%{r} %{s} %{t} %{u*} %{z} %{Z}\
+\t%{!nostdlib:%{!nostartfiles:%S}}\
+\t%{static:} %{L*} %D %o\
+\t%{!nostdlib:%{!nodefaultlibs:%G %L %G}}\
+\t%{!nostdlib:%{!nostartfiles:%E}}\
+\t-Tdjgpp.djl %{T*}}}}}}}\n\
+%{!c:%{!M:%{!MM:%{!E:%{!S:stubify %{v} %{o*:%*} %{!o*:a.out} }}}}}"
+
+/* Always just link in 'libc.a'.  */
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* Pick the right startup code depending on the -pg flag.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s}"
+
+/* Make sure that gcc will not look for .h files in /usr/local/include 
+   unless user explicitly requests it.  */
+#undef LOCAL_INCLUDE_DIR
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_coff_asm_named_section
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG) != 0) fprintf ((FILE), "\t.p2align %d\n", LOG)
+
+/* This is how to output a global symbol in the BSS section.  */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* This is how to tell assembler that a symbol is weak  */ 
+#undef ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+       fputc ('\n', FILE); } while (0)
+
+/* djgpp automatically calls its own version of __main, so don't define one
+   in libgcc, nor call one in main().  */
+#define HAS_INIT_SECTION
+
+/* Definitions for types and sizes. Wide characters are 16-bits long so
+   Win32 compiler add-ons will be wide character compatible.  */
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Used to be defined in xm-djgpp.h, but moved here for cross-compilers.  */
+#define LIBSTDCXX "stdcxx"
+
+#define TARGET_VERSION fprintf (stderr, " (80386, MS-DOS DJGPP)"); 
+
+/* Warn that -mbnu210 is now obsolete.  */
+#undef  SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do \
+  { \
+    if (TARGET_BNU210) \
+      {	\
+        warning (0, "-mbnu210 is ignored (option is obsolete)"); \
+      }	\
+  } \
+while (0)
+
+/* Support for C++ templates.  */
+#undef MAKE_DECL_ONE_ONLY
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
diff --git a/gcc/config/i386/djgpp.opt b/gcc/config/i386/djgpp.opt
new file mode 100644
index 000000000..7e4affca9
--- /dev/null
+++ b/gcc/config/i386/djgpp.opt
@@ -0,0 +1,28 @@
+; DJGPP-specific options.
+
+; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; -mbnu210 is now ignored and obsolete.  It was used to enable support for
+;; weak symbols, and .gnu.linkonce support.
+mbnu210
+Target Var(TARGET_BNU210)
+Ignored (obsolete)
+
+posix
+Driver
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
new file mode 100644
index 000000000..69128c58c
--- /dev/null
+++ b/gcc/config/i386/driver-i386.c
@@ -0,0 +1,769 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2006-2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+const char *host_detect_local_cpu (int argc, const char **argv);
+
+#ifdef __GNUC__
+#include "cpuid.h"
+
+struct cache_desc
+{
+  unsigned sizekb;
+  unsigned assoc;
+  unsigned line;
+};
+
+/* Returns command line parameters that describe size and
+   cache line size of the processor caches.  */
+
+static char *
+describe_cache (struct cache_desc level1, struct cache_desc level2)
+{
+  char size[100], line[100], size2[100];
+
+  /* At the moment, gcc does not use the information
+     about the associativity of the cache.  */
+
+  snprintf (size, sizeof (size),
+	    "--param l1-cache-size=%u ", level1.sizekb);
+  snprintf (line, sizeof (line),
+	    "--param l1-cache-line-size=%u ", level1.line);
+
+  snprintf (size2, sizeof (size2),
+	    "--param l2-cache-size=%u ", level2.sizekb);
+
+  return concat (size, line, size2, NULL);
+}
+
+/* Detect L2 cache parameters using CPUID extended function 0x80000006.  */
+
+static void
+detect_l2_cache (struct cache_desc *level2)
+{
+  unsigned eax, ebx, ecx, edx;
+  unsigned assoc;
+
+  __cpuid (0x80000006, eax, ebx, ecx, edx);
+
+  level2->sizekb = (ecx >> 16) & 0xffff;
+  level2->line = ecx & 0xff;
+
+  assoc = (ecx >> 12) & 0xf;
+  if (assoc == 6)
+    assoc = 8;
+  else if (assoc == 8)
+    assoc = 16;
+  else if (assoc >= 0xa && assoc <= 0xc)
+    assoc = 32 + (assoc - 0xa) * 16;
+  else if (assoc >= 0xd && assoc <= 0xe)
+    assoc = 96 + (assoc - 0xd) * 32;
+
+  level2->assoc = assoc;
+}
+
+/* Returns the description of caches for an AMD processor.  */
+
+static const char *
+detect_caches_amd (unsigned max_ext_level)
+{
+  unsigned eax, ebx, ecx, edx;
+
+  struct cache_desc level1, level2 = {0, 0, 0};
+
+  if (max_ext_level < 0x80000005)
+    return "";
+
+  __cpuid (0x80000005, eax, ebx, ecx, edx);
+
+  level1.sizekb = (ecx >> 24) & 0xff;
+  level1.assoc = (ecx >> 16) & 0xff;
+  level1.line = ecx & 0xff;
+
+  if (max_ext_level >= 0x80000006)
+    detect_l2_cache (&level2);
+
+  return describe_cache (level1, level2);
+}
+
+/* Decodes the size, the associativity and the cache line size of
+   L1/L2 caches of an Intel processor.  Values are based on
+   "Intel Processor Identification and the CPUID Instruction"
+   [Application Note 485], revision -032, December 2007.  */
+
+static void
+decode_caches_intel (unsigned reg, bool xeon_mp,
+		     struct cache_desc *level1, struct cache_desc *level2)
+{
+  int i;
+
+  for (i = 24; i >= 0; i -= 8)
+    switch ((reg >> i) & 0xff)
+      {
+      case 0x0a:
+	level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
+	break;
+      case 0x0c:
+	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
+	break;
+      case 0x2c:
+	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
+	break;
+      case 0x39:
+	level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x3a:
+	level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
+	break;
+      case 0x3b:
+	level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
+	break;
+      case 0x3c:
+	level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x3d:
+	level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
+	break;
+      case 0x3e:
+	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x41:
+	level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x42:
+	level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x43:
+	level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x44:
+	level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x45:
+	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
+	break;
+      case 0x49:
+	if (xeon_mp)
+	  break;
+	level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
+	break;
+      case 0x4e:
+	level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
+	break;
+      case 0x60:
+	level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
+	break;
+      case 0x66:
+	level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x67:
+	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x68:
+	level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x78:
+	level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x79:
+	level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7a:
+	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7b:
+	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7c:
+	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7d:
+	level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x7f:
+	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
+	break;
+      case 0x82:
+	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x83:
+	level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x84:
+	level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x85:
+	level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
+	break;
+      case 0x86:
+	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
+	break;
+      case 0x87:
+	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
+
+      default:
+	break;
+      }
+}
+
+/* Detect cache parameters using CPUID function 2.  */
+
+static void
+detect_caches_cpuid2 (bool xeon_mp, 
+		      struct cache_desc *level1, struct cache_desc *level2)
+{
+  unsigned regs[4];
+  int nreps, i;
+
+  __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
+
+  nreps = regs[0] & 0x0f;
+  regs[0] &= ~0x0f;
+
+  while (--nreps >= 0)
+    {
+      for (i = 0; i < 4; i++)
+	if (regs[i] && !((regs[i] >> 31) & 1))
+	  decode_caches_intel (regs[i], xeon_mp, level1, level2);
+
+      if (nreps)
+	__cpuid (2, regs[0], regs[1], regs[2], regs[3]);
+    }
+}
+
+/* Detect cache parameters using CPUID function 4. This
+   method doesn't require hardcoded tables.  */
+
+enum cache_type
+{
+  CACHE_END = 0,
+  CACHE_DATA = 1,
+  CACHE_INST = 2,
+  CACHE_UNIFIED = 3
+};
+
+static void
+detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
+		      struct cache_desc *level3)
+{
+  struct cache_desc *cache;
+
+  unsigned eax, ebx, ecx, edx;
+  int count;
+
+  for (count = 0;; count++)
+    { 
+      __cpuid_count(4, count, eax, ebx, ecx, edx);
+      switch (eax & 0x1f)
+	{
+	case CACHE_END:
+	  return;
+	case CACHE_DATA:
+	case CACHE_UNIFIED:
+	  {
+	    switch ((eax >> 5) & 0x07)
+	      {
+	      case 1:
+		cache = level1;
+		break;
+	      case 2:
+		cache = level2;
+		break;
+	      case 3:
+		cache = level3;
+		break;
+	      default:
+		cache = NULL;
+	      }
+
+	    if (cache)
+	      {
+		unsigned sets = ecx + 1;
+		unsigned part = ((ebx >> 12) & 0x03ff) + 1;
+
+		cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
+		cache->line = (ebx & 0x0fff) + 1;
+
+		cache->sizekb = (cache->assoc * part
+				 * cache->line * sets) / 1024;
+	      }
+	  }
+	default:
+	  break;
+	}
+    }
+}
+
+/* Returns the description of caches for an Intel processor.  */
+
+static const char *
+detect_caches_intel (bool xeon_mp, unsigned max_level,
+		     unsigned max_ext_level, unsigned *l2sizekb)
+{
+  struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
+
+  if (max_level >= 4)
+    detect_caches_cpuid4 (&level1, &level2, &level3);
+  else if (max_level >= 2)
+    detect_caches_cpuid2 (xeon_mp, &level1, &level2);
+  else
+    return "";
+
+  if (level1.sizekb == 0)
+    return "";
+
+  /* Let the L3 replace the L2. This assumes inclusive caches
+     and single threaded program for now. */
+  if (level3.sizekb)
+    level2 = level3;
+
+  /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
+     method if other methods fail to provide L2 cache parameters.  */
+  if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
+    detect_l2_cache (&level2);
+
+  *l2sizekb = level2.sizekb;
+
+  return describe_cache (level1, level2);
+}
+
+enum vendor_signatures
+{
+  SIG_INTEL =	0x756e6547 /* Genu */,
+  SIG_AMD =	0x68747541 /* Auth */
+};
+
+enum processor_signatures
+{
+  SIG_GEODE =	0x646f6547 /* Geod */
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch" or "tune" as argument depending on if -march=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-march=k8" on an AMD64 machine
+   for -march=native.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+
+const char *host_detect_local_cpu (int argc, const char **argv)
+{
+  enum processor_type processor = PROCESSOR_I386;
+  const char *cpu = "i386";
+
+  const char *cache = "";
+  const char *options = "";
+
+  unsigned int eax, ebx, ecx, edx;
+
+  unsigned int max_level, ext_level;
+
+  unsigned int vendor;
+  unsigned int model, family;
+
+  unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
+  unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
+
+  /* Extended features */
+  unsigned int has_lahf_lm = 0, has_sse4a = 0;
+  unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
+  unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
+  unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
+  unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
+  unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
+  unsigned int has_bmi = 0, has_tbm = 0;
+  unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
+  unsigned int has_osxsave = 0;
+
+  bool arch;
+
+  unsigned int l2sizekb = 0;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = !strcmp (argv[0], "arch");
+
+  if (!arch && strcmp (argv[0], "tune"))
+    return NULL;
+
+  max_level = __get_cpuid_max (0, &vendor);
+  if (max_level < 1)
+    goto done;
+
+  __cpuid (1, eax, ebx, ecx, edx);
+
+  model = (eax >> 4) & 0x0f;
+  family = (eax >> 8) & 0x0f;
+  if (vendor == SIG_INTEL)
+    {
+      unsigned int extended_model, extended_family;
+
+      extended_model = (eax >> 12) & 0xf0;
+      extended_family = (eax >> 20) & 0xff;
+      if (family == 0x0f)
+	{
+	  family += extended_family;
+	  model += extended_model;
+	}
+      else if (family == 0x06)
+	model += extended_model;
+    }
+
+  has_sse3 = ecx & bit_SSE3;
+  has_ssse3 = ecx & bit_SSSE3;
+  has_sse4_1 = ecx & bit_SSE4_1;
+  has_sse4_2 = ecx & bit_SSE4_2;
+  has_avx = ecx & bit_AVX;
+  has_osxsave = ecx & bit_OSXSAVE;
+  has_cmpxchg16b = ecx & bit_CMPXCHG16B;
+  has_movbe = ecx & bit_MOVBE;
+  has_popcnt = ecx & bit_POPCNT;
+  has_aes = ecx & bit_AES;
+  has_pclmul = ecx & bit_PCLMUL;
+  has_fma = ecx & bit_FMA;
+  has_f16c = ecx & bit_F16C;
+  has_rdrnd = ecx & bit_RDRND;
+
+  has_cmpxchg8b = edx & bit_CMPXCHG8B;
+  has_cmov = edx & bit_CMOV;
+  has_mmx = edx & bit_MMX;
+  has_sse = edx & bit_SSE;
+  has_sse2 = edx & bit_SSE2;
+
+  if (max_level >= 7)
+    {
+      __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+      has_fsgsbase = ebx & bit_FSGSBASE;
+    }
+
+  /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
+#define XCR_XFEATURE_ENABLED_MASK	0x0
+#define XSTATE_FP			0x1
+#define XSTATE_SSE			0x2
+#define XSTATE_YMM			0x4
+  if (has_osxsave)
+    asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
+	 : "=a" (eax), "=d" (edx)
+	 : "c" (XCR_XFEATURE_ENABLED_MASK));
+
+  /* Check if SSE and YMM states are supported.  */
+  if (!has_osxsave
+      || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM))
+    {
+      has_avx = 0;
+      has_fma = 0;
+      has_fma4 = 0;
+      has_xop = 0;
+    }
+
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      has_lahf_lm = ecx & bit_LAHF_LM;
+      has_sse4a = ecx & bit_SSE4a;
+      has_abm = ecx & bit_ABM;
+      has_lwp = ecx & bit_LWP;
+      has_fma4 = ecx & bit_FMA4;
+      has_xop = ecx & bit_XOP;
+      has_tbm = ecx & bit_TBM;
+
+      has_longmode = edx & bit_LM;
+      has_3dnowp = edx & bit_3DNOWP;
+      has_3dnow = edx & bit_3DNOW;
+
+      __cpuid (0x7, eax, ebx, ecx, edx);
+
+      has_bmi = ebx & bit_BMI;
+    }
+
+  if (!arch)
+    {
+      if (vendor == SIG_AMD)
+	cache = detect_caches_amd (ext_level);
+      else if (vendor == SIG_INTEL)
+	{
+	  bool xeon_mp = (family == 15 && model == 6);
+	  cache = detect_caches_intel (xeon_mp, max_level,
+				       ext_level, &l2sizekb);
+	}
+    }
+
+  if (vendor == SIG_AMD)
+    {
+      unsigned int name;
+
+      /* Detect geode processor by its processor signature.  */
+      if (ext_level > 0x80000001)
+	__cpuid (0x80000002, name, ebx, ecx, edx);
+      else
+	name = 0;
+
+      if (name == SIG_GEODE)
+	processor = PROCESSOR_GEODE;
+      else if (has_xop)
+	processor = PROCESSOR_BDVER1;
+      else if (has_sse4a && has_ssse3)
+        processor = PROCESSOR_BTVER1;
+      else if (has_sse4a)
+	processor = PROCESSOR_AMDFAM10;
+      else if (has_sse2 || has_longmode)
+	processor = PROCESSOR_K8;
+      else if (has_3dnowp && family == 6)
+	processor = PROCESSOR_ATHLON;
+      else if (has_mmx)
+	processor = PROCESSOR_K6;
+      else
+	processor = PROCESSOR_PENTIUM;
+    }
+  else
+    {
+      switch (family)
+	{
+	case 4:
+	  processor = PROCESSOR_I486;
+	  break;
+	case 5:
+	  processor = PROCESSOR_PENTIUM;
+	  break;
+	case 6:
+	  processor = PROCESSOR_PENTIUMPRO;
+	  break;
+	case 15:
+	  processor = PROCESSOR_PENTIUM4;
+	  break;
+	default:
+	  /* We have no idea.  */
+	  processor = PROCESSOR_GENERIC32;
+	}
+    }
+
+  switch (processor)
+    {
+    case PROCESSOR_I386:
+      /* Default.  */
+      break;
+    case PROCESSOR_I486:
+      cpu = "i486";
+      break;
+    case PROCESSOR_PENTIUM:
+      if (arch && has_mmx)
+	cpu = "pentium-mmx";
+      else
+	cpu = "pentium";
+      break;
+    case PROCESSOR_PENTIUMPRO:
+      switch (model)
+	{
+	case 0x1c:
+	case 0x26:
+	  /* Atom.  */
+	  cpu = "atom";
+	  break;
+	case 0x1a:
+	case 0x1e:
+	case 0x1f:
+	case 0x2e:
+	  /* Nehalem.  */
+	  cpu = "corei7";
+	  break;
+	case 0x25:
+	case 0x2c:
+	case 0x2f:
+	  /* Westmere.  */
+	  cpu = "corei7";
+	  break;
+	case 0x2a:
+	  /* Sandy Bridge.  */
+	  cpu = "corei7-avx";
+	  break;
+	case 0x17:
+	case 0x1d:
+	  /* Penryn.  */
+	  cpu = "core2";
+	  break;
+	case 0x0f:
+	  /* Merom.  */
+	  cpu = "core2";
+	  break;
+	default:
+	  if (arch)
+	    {
+	      /* This is unknown family 0x6 CPU.  */
+	      if (has_avx)
+		/* Assume Sandy Bridge.  */
+		cpu = "corei7-avx";
+	      else if (has_sse4_2)
+		/* Assume Core i7.  */
+		cpu = "corei7";
+	      else if (has_ssse3)
+		{
+		  if (has_movbe)
+		    /* Assume Atom.  */
+		    cpu = "atom";
+		  else
+		    /* Assume Core 2.  */
+		    cpu = "core2";
+		}
+	      else if (has_sse3)
+		/* It is Core Duo.  */
+		cpu = "pentium-m";
+	      else if (has_sse2)
+		/* It is Pentium M.  */
+		cpu = "pentium-m";
+	      else if (has_sse)
+		/* It is Pentium III.  */
+		cpu = "pentium3";
+	      else if (has_mmx)
+		/* It is Pentium II.  */
+		cpu = "pentium2";
+	      else
+		/* Default to Pentium Pro.  */
+		cpu = "pentiumpro";
+	    }
+	  else
+	    /* For -mtune, we default to -mtune=generic.  */
+	    cpu = "generic";
+	  break;
+	}
+      break;
+    case PROCESSOR_PENTIUM4:
+      if (has_sse3)
+	{
+	  if (has_longmode)
+	    cpu = "nocona";
+	  else
+	    cpu = "prescott";
+	}
+      else
+	cpu = "pentium4";
+      break;
+    case PROCESSOR_GEODE:
+      cpu = "geode";
+      break;
+    case PROCESSOR_K6:
+      if (arch && has_3dnow)
+	cpu = "k6-3";
+      else
+	cpu = "k6";
+      break;
+    case PROCESSOR_ATHLON:
+      if (arch && has_sse)
+	cpu = "athlon-4";
+      else
+	cpu = "athlon";
+      break;
+    case PROCESSOR_K8:
+      if (arch && has_sse3)
+	cpu = "k8-sse3";
+      else
+	cpu = "k8";
+      break;
+    case PROCESSOR_AMDFAM10:
+      cpu = "amdfam10";
+      break;
+    case PROCESSOR_BDVER1:
+      cpu = "bdver1";
+      break;
+    case PROCESSOR_BTVER1:
+      cpu = "btver1";
+      break;
+
+    default:
+      /* Use something reasonable.  */
+      if (arch)
+	{
+	  if (has_ssse3)
+	    cpu = "core2";
+	  else if (has_sse3)
+	    {
+	      if (has_longmode)
+		cpu = "nocona";
+	      else
+		cpu = "prescott";
+	    }
+	  else if (has_sse2)
+	    cpu = "pentium4";
+	  else if (has_cmov)
+	    cpu = "pentiumpro";
+	  else if (has_mmx)
+	    cpu = "pentium-mmx";
+	  else if (has_cmpxchg8b)
+	    cpu = "pentium";
+	}
+      else
+	cpu = "generic";
+    }
+
+  if (arch)
+    {
+      const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
+      const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
+      const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
+      const char *ase = has_aes ? " -maes" : " -mno-aes";
+      const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
+      const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
+      const char *abm = has_abm ? " -mabm" : " -mno-abm";
+      const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
+      const char *fma = has_fma ? " -mfma" : " -mno-fma";
+      const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
+      const char *xop = has_xop ? " -mxop" : " -mno-xop";
+      const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
+      const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
+      const char *avx = has_avx ? " -mavx" : " -mno-avx";
+      const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
+      const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
+      const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
+      const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
+      const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
+
+      options = concat (options, cx16, sahf, movbe, ase, pclmul,
+			popcnt, abm, lwp, fma, fma4, xop, bmi, tbm,
+			avx, sse4_2, sse4_1, rdrnd, f16c, fsgsbase, NULL);
+    }
+
+done:
+  return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
+}
+#else
+
+/* If we aren't compiling with GCC then the driver will just ignore
+   -march and -mtune "native" target and will leave to the newly
+   built compiler to generate code for its default target.  */
+
+const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED,
+				   const char **argv ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+#endif /* __GNUC__ */
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
new file mode 100644
index 000000000..fe4cd6aba
--- /dev/null
+++ b/gcc/config/i386/emmintrin.h
@@ -0,0 +1,1513 @@
+/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _EMMINTRIN_H_INCLUDED
+#define _EMMINTRIN_H_INCLUDED
+
+#ifndef __SSE2__
+# error "SSE2 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE header files*/
+#include <xmmintrin.h>
+
+/* SSE2 */
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Create a selector for use with the SHUFPD instruction.  */
+#define _MM_SHUFFLE2(fp1,fp0) \
+ (((fp1) << 1) | (fp0))
+
+/* Create a vector with element 0 as F and the rest zero.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_sd (double __F)
+{
+  return __extension__ (__m128d){ __F, 0.0 };
+}
+
+/* Create a vector with both elements equal to F.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pd (double __F)
+{
+  return __extension__ (__m128d){ __F, __F };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd1 (double __F)
+{
+  return _mm_set1_pd (__F);
+}
+
+/* Create a vector with the lower value X and upper value W.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd (double __W, double __X)
+{
+  return __extension__ (__m128d){ __X, __W };
+}
+
+/* Create a vector with the lower value W and upper value X.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pd (double __W, double __X)
+{
+  return __extension__ (__m128d){ __W, __X };
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_pd (void)
+{
+  return __extension__ (__m128d){ 0.0, 0.0 };
+}
+
+/* Sets the low DPFP value of A from the low value of B.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd (double const *__P)
+{
+  return *(__m128d *)__P;
+}
+
+/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_pd (double const *__P)
+{
+  return __builtin_ia32_loadupd (__P);
+}
+
+/* Create a vector with all two elements equal to *P.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_pd (double const *__P)
+{
+  return _mm_set1_pd (*__P);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_sd (double const *__P)
+{
+  return _mm_set_sd (*__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd1 (double const *__P)
+{
+  return _mm_load1_pd (__P);
+}
+
+/* Load two DPFP values in reverse order.  The address must be aligned.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_pd (double const *__P)
+{
+  __m128d __tmp = _mm_load_pd (__P);
+  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
+}
+
+/* Store two DPFP values.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd (double *__P, __m128d __A)
+{
+  *(__m128d *)__P = __A;
+}
+
+/* Store two DPFP values.  The address need not be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_pd (double *__P, __m128d __A)
+{
+  __builtin_ia32_storeupd (__P, __A);
+}
+
+/* Stores the lower DPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_sd (double *__P, __m128d __A)
+{
+  *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_f64 (__m128d __A)
+{
+  return __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pd (double *__P, __m128d __A)
+{
+  _mm_store_sd (__P, __A);
+}
+
+/* Stores the upper DPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pd (double *__P, __m128d __A)
+{
+  *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
+}
+
+/* Store the lower DPFP value across two words.
+   The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_pd (double *__P, __m128d __A)
+{
+  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd1 (double *__P, __m128d __A)
+{
+  _mm_store1_pd (__P, __A);
+}
+
+/* Store two DPFP values in reverse order.  The address must be aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_pd (double *__P, __m128d __A)
+{
+  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si32 (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64 (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64x (__m128i __A)
+{
+  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_pd (__m128d __A)
+{
+  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+}
+
+/* Return pair {sqrt (A[0), B[1]}.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_sd (__m128d __A, __m128d __B)
+{
+  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpltsd ((__v2df) __B,
+								 (__v2df)
+								 __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmplesd ((__v2df) __B,
+								 (__v2df)
+								 __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpnltsd ((__v2df) __B,
+								  (__v2df)
+								  __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+					 (__v2df)
+					 __builtin_ia32_cmpnlesd ((__v2df) __B,
+								  (__v2df)
+								  __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_sd (__m128d __A, __m128d __B)
+{
+  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* Create a vector of Qi, where i is the element number.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64 (__m64 __q1,  __m64 __q0)
+{
+  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
+	       short __q3, short __q2, short __q1, short __q0)
+{
+  return __extension__ (__m128i)(__v8hi){
+    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
+	      char __q11, char __q10, char __q09, char __q08,
+	      char __q07, char __q06, char __q05, char __q04,
+	      char __q03, char __q02, char __q01, char __q00)
+{
+  return __extension__ (__m128i)(__v16qi){
+    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+  };
+}
+
+/* Set all of the elements of the vector to A.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64x (long long __A)
+{
+  return _mm_set_epi64x (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64 (__m64 __A)
+{
+  return _mm_set_epi64 (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi32 (int __A)
+{
+  return _mm_set_epi32 (__A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi16 (short __A)
+{
+  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi8 (char __A)
+{
+  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+		       __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+/* Create a vector of Qi, where i is the element number.
+   The parameter order is reversed from the _mm_set_epi* functions.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi64 (__m64 __q0, __m64 __q1)
+{
+  return _mm_set_epi64 (__q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
+{
+  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
+	        short __q4, short __q5, short __q6, short __q7)
+{
+  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
+	       char __q04, char __q05, char __q06, char __q07,
+	       char __q08, char __q09, char __q10, char __q11,
+	       char __q12, char __q13, char __q14, char __q15)
+{
+  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
+		       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_si128 (__m128i const *__P)
+{
+  return *__P;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_epi64 (__m128i const *__P)
+{
+  return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+  *__P = __B;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si128 (__m128i *__P, __m128i __B)
+{
+  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_epi64 (__m128i *__P, __m128i __B)
+{
+  *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi64_pi64 (__m128i __B)
+{
+  return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movpi64_epi64 (__m64 __A)
+{
+  return _mm_set_epi64 ((__m64)0LL, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi64 (__m128i __A)
+{
+  return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si128 (void)
+{
+  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_pd (__m128i __A)
+{
+  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_ps (__m128i __A)
+{
+  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epi32 (__m128d __A)
+{
+  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_pi32 (__m128d __A)
+{
+  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_ps (__m128d __A)
+{
+  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epi32 (__m128d __A)
+{
+  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_pi32 (__m128d __A)
+{
+  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_pd (__m64 __A)
+{
+  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi32 (__m128 __A)
+{
+  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epi32 (__m128 __A)
+{
+  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pd (__m128 __A)
+{
+  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si32 (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64 (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si32 (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64 (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_ss (__m128 __A, __m128d __B)
+{
+  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_sd (__m128d __A, int __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_sd (__m128d __A, long long __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sd (__m128d __A, __m128 __B)
+{
+  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
+{
+  return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
+}
+#else
+#define _mm_shuffle_pd(A, B, N)						\
+  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),		\
+				   (__v2df)(__m128d)(B), (int)(N)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pd (__m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pd (__m128d __A, double const *__B)
+{
+  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pd (__m128d __A, double const *__B)
+{
+  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pd (__m128d __A)
+{
+  return __builtin_ia32_movmskpd ((__v2df)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_su32 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epu32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi64 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si128 (__m128i __A, const int __N)
+{
+  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si128 (__m128i __A, const int __N)
+{
+  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
+}
+#else
+#define _mm_srli_si128(A, N) \
+  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
+#define _mm_slli_si128(A, N) \
+  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi16 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi32 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi64 (__m128i __A, int __B)
+{
+  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi16 (__m128i const __A, int const __N)
+{
+  return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
+}
+#else
+#define _mm_extract_epi16(A, N) \
+  ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_insert_epi16(A, D, N)				\
+  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A),	\
+					  (int)(D), (int)(N)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_epi8 (__m128i __A)
+{
+  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflehi_epi16 (__m128i __A, const int __mask)
+{
+  return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflelo_epi16 (__m128i __A, const int __mask)
+{
+  return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi32 (__m128i __A, const int __mask)
+{
+  return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
+}
+#else
+#define _mm_shufflehi_epi16(A, N) \
+  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shufflelo_epi16(A, N) \
+  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shuffle_epi32(A, N) \
+  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+{
+  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu16 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_epu8 (__m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si32 (int *__A, int __B)
+{
+  __builtin_ia32_movnti (__A, __B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si128 (__m128i *__A, __m128i __B)
+{
+  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pd (double *__A, __m128d __B)
+{
+  __builtin_ia32_movntpd (__A, (__v2df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clflush (void const *__A)
+{
+  __builtin_ia32_clflush (__A);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lfence (void)
+{
+  __builtin_ia32_lfence ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mfence (void)
+{
+  __builtin_ia32_mfence ();
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si128 (int __A)
+{
+  return _mm_set_epi32 (0, 0, 0, __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si128 (long long __A)
+{
+  return _mm_set_epi64x (0, __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si128 (long long __A)
+{
+  return _mm_set_epi64x (0, __A);
+}
+#endif
+
+/* Casts between various SP, DP, INT vector types.  Note that these do no
+   conversion of values, they just change the type.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ps(__m128d __A)
+{
+  return (__m128) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_si128(__m128d __A)
+{
+  return (__m128i) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_pd(__m128 __A)
+{
+  return (__m128d) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_si128(__m128 __A)
+{
+  return (__m128i) __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ps(__m128i __A)
+{
+  return (__m128) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_pd(__m128i __A)
+{
+  return (__m128d) __A;
+}
+
+#endif /* __SSE2__  */
+
+#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
new file mode 100644
index 000000000..ae30bfec9
--- /dev/null
+++ b/gcc/config/i386/fma4intrin.h
@@ -0,0 +1,236 @@
+/* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _FMA4INTRIN_H_INCLUDED
+#define _FMA4INTRIN_H_INCLUDED
+
+#ifndef __FMA4__
+# error "FMA4 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files.  */
+#include <ammintrin.h>
+
+/* 128b Floating point multiply/add type instructions.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
+
+{
+  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
+}
+
+/* 256b Floating point multiply/add type instructions.  */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
+
+{
+  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
+}
+
+#endif
+
+#endif
diff --git a/gcc/config/i386/freebsd.h b/gcc/config/i386/freebsd.h
new file mode 100644
index 000000000..6d2c559e7
--- /dev/null
+++ b/gcc/config/i386/freebsd.h
@@ -0,0 +1,152 @@
+/* Definitions for Intel 386 running FreeBSD with ELF format
+   Copyright (C) 1996, 2000, 2002, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu.
+   Adapted from GNU/Linux version by John Polstra.
+   Continued development by David O'Brien <obrien@freebsd.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_VERSION fprintf (stderr, " (i386 FreeBSD/ELF)");
+
+/* Override the default comment-starter of "/".  */
+#undef  ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+#undef  NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
+
+/* Tell final.c that we don't need a label passed to mcount.  */
+
+#undef  MCOUNT_NAME
+#define MCOUNT_NAME ".mcount"
+
+/* Make gcc agree with <machine/ansi.h>.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE	(TARGET_64BIT ? "long unsigned int" : "unsigned int")
+ 
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE	(TARGET_64BIT ? "long int" : "int")
+  
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE	(TARGET_64BIT ? 32 : BITS_PER_WORD)
+
+#undef  SUBTARGET_EXTRA_SPECS	/* i386.h bogusly defines it.  */
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+    
+/* Provide a STARTFILE_SPEC appropriate for FreeBSD.  Here we add
+   the magical crtbegin.o file (see crtstuff.c) which provides part 
+	of the support for getting C++ file-scope static object constructed 
+	before entering `main'.  */
+   
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+		       %{!p:%{profile:gcrt1.o%s} \
+			 %{!profile:crt1.o%s}}}} \
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for FreeBSD.  Here we tack on
+   the magical crtend.o file (see crtstuff.c) which provides part of 
+	the support for getting C++ file-scope static object constructed 
+	before entering `main', followed by a normal "finalizer" file, 
+	`crtn.o'.  */
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+/* Provide a LINK_SPEC appropriate for FreeBSD.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+    %{!shared: \
+      %{!static: \
+        %{rdynamic:-export-dynamic} \
+        -dynamic-linker %(fbsd_dynamic_linker) } \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#undef  ASM_OUTPUT_MAX_SKIP_ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)					\
+  if ((LOG) != 0) {														\
+    if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+    else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+  }
+#endif
+
+/* Don't default to pcc-struct-return, we want to retain compatibility with
+   older gcc versions AND pcc-struct-return is nonreentrant.
+   (even though the SVR4 ABI for the i386 says that records and unions are
+   returned in memory).  */
+
+#undef  DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* FreeBSD sets the rounding precision of the FPU to 53 bits.  Let the
+   compiler get the contents of <float.h> and std::numeric_limits correct.  */
+#undef TARGET_96_ROUND_53_LONG_DOUBLE
+#define TARGET_96_ROUND_53_LONG_DOUBLE (!TARGET_64BIT)
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Support for i386 has been removed from FreeBSD 6.0 onward.  */
+#if FBSD_MAJOR >= 6
+#define SUBTARGET32_DEFAULT_CPU "i486"
+#endif
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
diff --git a/gcc/config/i386/freebsd64.h b/gcc/config/i386/freebsd64.h
new file mode 100644
index 000000000..8b5b149b0
--- /dev/null
+++ b/gcc/config/i386/freebsd64.h
@@ -0,0 +1,46 @@
+/* Definitions for AMD x86-64 running FreeBSD with ELF format
+   Copyright (C) 2002, 2004, 2007, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by David O'Brien <obrien@FreeBSD.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/x86-64 ELF)");
+
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+/* Provide a LINK_SPEC appropriate for the FreeBSD/x86-64 ELF target.
+   This is a copy of LINK_SPEC from <i386/freebsd.h> tweaked for
+   the x86-64 target.  */
+
+#undef	LINK_SPEC
+#define LINK_SPEC "\
+  %{m32:-m elf_i386_fbsd} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+    %{!shared: \
+      %{!static: \
+        %{rdynamic:-export-dynamic} \
+	-dynamic-linker %(fbsd_dynamic_linker) } \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
diff --git a/gcc/config/i386/gas.h b/gcc/config/i386/gas.h
new file mode 100644
index 000000000..4c7c9d1ac
--- /dev/null
+++ b/gcc/config/i386/gas.h
@@ -0,0 +1,124 @@
+/* Definitions for Intel 386 using GAS.
+   Copyright (C) 1988, 1993, 1994, 1996, 2002, 2004, 2007, 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Note that i386/seq-gas.h is a GAS configuration that does not use this
+   file.  */
+
+/* Use the bsd assembler syntax.  */
+/* we need to do this because gas is really a bsd style assembler,
+ * and so doesn't work well this these att-isms:
+ *
+ *  ASM_OUTPUT_SKIP is .set .,.+N, which isn't implemented in gas
+ *  ASM_OUTPUT_LOCAL is done with .set .,.+N, but that can't be
+ *   used to define bss static space
+ *
+ * Next is the question of whether to uses underscores.  RMS didn't
+ * like this idea at first, but since it is now obvious that we
+ * need this separate tm file for use with gas, at least to get
+ * dbx debugging info, I think we should also switch to underscores.
+ * We can keep i386v for real att style output, and the few
+ * people who want both form will have to compile twice.
+ */
+
+/* these come from i386/bsd.h, but are specific to sequent */
+#undef DBX_NO_XREFS
+#undef DBX_CONTIN_LENGTH
+
+/* Ask for COFF symbols.  */
+
+#define SDB_DEBUGGING_INFO 1
+
+/* Output #ident as a .ident.  */
+
+#define ASM_OUTPUT_IDENT(FILE, NAME) fprintf (FILE, "\t.ident \"%s\"\n", NAME);
+
+/* In the past there was confusion as to what the argument to .align was
+   in GAS.  For the last several years the rule has been this: for a.out
+   file formats that argument is LOG, and for all other file formats the
+   argument is 1<<LOG.
+
+   However, GAS now has .p2align and .balign pseudo-ops so to remove any
+   doubt or guess work, and since this file is used for both a.out and other
+   file formats, we use one of them.  */
+
+#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN 
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
+#endif
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#  define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+     if ((LOG) != 0) {\
+       if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+       else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+     }
+#endif
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in its
+   "internal" form--the form that is written in the machine description.
+
+   GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
+   So use `repe' instead.  */
+
+#undef ASM_OUTPUT_OPCODE
+#define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
+{									\
+  if ((PTR)[0] == 'r'							\
+      && (PTR)[1] == 'e'						\
+      && (PTR)[2] == 'p')						\
+    {									\
+      if ((PTR)[3] == 'z')						\
+	{								\
+	  fputs ("repe", (STREAM));					\
+	  (PTR) += 4;							\
+	}								\
+      else if ((PTR)[3] == 'n' && (PTR)[4] == 'z')			\
+	{								\
+	  fputs ("repne", (STREAM));					\
+	  (PTR) += 5;							\
+	}								\
+    }									\
+  else									\
+    ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR));				\
+}
+
+/* Define macro used to output shift-double opcodes when the shift
+   count is in %cl.  Some assemblers require %cl as an argument;
+   some don't.
+
+   GAS requires the %cl argument, so override i386/unix.h.  */
+
+#undef SHIFT_DOUBLE_OMITS_COUNT
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+/* The comment-starter string as GAS expects it. */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
diff --git a/gcc/config/i386/geode.md b/gcc/config/i386/geode.md
new file mode 100644
index 000000000..c063e58b9
--- /dev/null
+++ b/gcc/config/i386/geode.md
@@ -0,0 +1,152 @@
+;; Geode Scheduling
+;; Copyright (C) 2006, 2007
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; The Geode architecture is one insn issue processor.
+;;
+;; This description is based on data from the following documents:
+;;
+;;    "AMD Geode GX Processor Data Book"
+;;    Advanced Micro Devices, Inc., Aug 2005.
+;;
+;;    "AMD Geode LX Processor Data Book"
+;;    Advanced Micro Devices, Inc., Jan 2006.
+;;
+;;
+;; CPU execution units of the Geode:
+;;
+;; issue	describes the issue pipeline.
+;; alu		describes the Integer unit
+;; fpu		describes the FP unit
+;;
+;; The fp unit is out of order execution unit with register renaming.
+;; There is also memory management unit and execution pipeline for
+;; load/store operations.  We ignore it and difference between insns
+;; using memory and registers.
+
+(define_automaton "geode")
+
+(define_cpu_unit "geode_issue,geode_alu,geode_fpu" "geode")
+
+(define_insn_reservation "alu" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "alu,alu1,negnot,icmp,lea,test,imov,imovx,icmov,incdec,setcc"))
+			 "geode_issue,geode_alu")
+
+(define_insn_reservation "shift" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "ishift,ishift1,rotate,rotate1"))
+			 "geode_issue,geode_alu*2")
+
+(define_insn_reservation "imul" 7
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "imul"))
+			 "geode_issue,geode_alu*7")
+
+(define_insn_reservation "idiv" 40
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "idiv"))
+			 "geode_issue,geode_alu*40")
+
+;; The branch unit.
+(define_insn_reservation "call" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "call,callv"))
+			 "geode_issue,geode_alu*2")
+
+(define_insn_reservation "geode_branch" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "ibr"))
+			 "geode_issue,geode_alu")
+
+(define_insn_reservation "geode_pop_push" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "pop,push"))
+			 "geode_issue,geode_alu")
+
+(define_insn_reservation "geode_leave" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "leave"))
+			 "geode_issue,geode_alu*2")
+
+(define_insn_reservation "geode_load_str" 4
+			 (and (eq_attr "cpu" "geode")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both")))
+			 "geode_issue,geode_alu*4")
+
+(define_insn_reservation "geode_store_str" 2
+			 (and (eq_attr "cpu" "geode")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "store")))
+			 "geode_issue,geode_alu*2")
+
+;; Be optimistic
+(define_insn_reservation "geode_unknown" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "multi,other"))
+			 "geode_issue,geode_alu")
+
+;; FPU
+
+(define_insn_reservation "geode_fop" 6
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fop,fcmp"))
+			 "geode_issue,geode_fpu*6")
+
+(define_insn_reservation "geode_fsimple" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fmov,fcmov,fsgn,fxch"))
+			 "geode_issue,geode_fpu")
+
+(define_insn_reservation "geode_fist" 4
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fistp,fisttp"))
+			 "geode_issue,geode_fpu*4")
+
+(define_insn_reservation "geode_fmul" 10
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fmul"))
+			 "geode_issue,geode_fpu*10")
+
+(define_insn_reservation "geode_fdiv" 47
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fdiv"))
+			 "geode_issue,geode_fpu*47")
+
+;; We use minimal latency (fsin) here
+(define_insn_reservation "geode_fpspc" 54
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "fpspc"))
+			 "geode_issue,geode_fpu*54")
+
+(define_insn_reservation "geode_frndint" 12
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "frndint"))
+			 "geode_issue,geode_fpu*12")
+
+(define_insn_reservation "geode_mmxmov" 1
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "mmxmov"))
+			 "geode_issue,geode_fpu")
+
+(define_insn_reservation "geode_mmx" 2
+			 (and (eq_attr "cpu" "geode")
+			      (eq_attr "type" "mmx,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"))
+			 "geode_issue,geode_fpu*2")
diff --git a/gcc/config/i386/gmm_malloc.h b/gcc/config/i386/gmm_malloc.h
new file mode 100644
index 000000000..7a7e84069
--- /dev/null
+++ b/gcc/config/i386/gmm_malloc.h
@@ -0,0 +1,74 @@
+/* Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+#include <errno.h>
+
+static __inline__ void* 
+_mm_malloc (size_t size, size_t align)
+{
+  void * malloc_ptr;
+  void * aligned_ptr;
+
+  /* Error if align is not a power of two.  */
+  if (align & (align - 1))
+    {
+      errno = EINVAL;
+      return ((void*) 0);
+    }
+
+  if (size == 0)
+    return ((void *) 0);
+
+ /* Assume malloc'd pointer is aligned at least to sizeof (void*).
+    If necessary, add another sizeof (void*) to store the value
+    returned by malloc. Effectively this enforces a minimum alignment
+    of sizeof double. */     
+    if (align < 2 * sizeof (void *))
+      align = 2 * sizeof (void *);
+
+  malloc_ptr = malloc (size + align);
+  if (!malloc_ptr)
+    return ((void *) 0);
+
+  /* Align  We have at least sizeof (void *) space below malloc'd ptr. */
+  aligned_ptr = (void *) (((size_t) malloc_ptr + align)
+			  & ~((size_t) (align) - 1));
+
+  /* Store the original pointer just before p.  */	
+  ((void **) aligned_ptr) [-1] = malloc_ptr;
+
+  return aligned_ptr;
+}
+
+static __inline__ void
+_mm_free (void * aligned_ptr)
+{
+  if (aligned_ptr)
+    free (((void **) aligned_ptr) [-1]);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc/config/i386/gmon-sol2.c b/gcc/config/i386/gmon-sol2.c
new file mode 100644
index 000000000..44bbb4448
--- /dev/null
+++ b/gcc/config/i386/gmon-sol2.c
@@ -0,0 +1,459 @@
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. [rescinded 22 July 1999]
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a modified gmon.c by J.W.Hawtin <oolon@ankh.org>,
+ * 14/8/96 based on the original gmon.c in GCC and the hacked version
+ * solaris 2 sparc version (config/sparc/gmon-sol.c) by Mark Eichin. To do
+ * process profiling on solaris 2.X X86
+ *
+ * It must be used in conjunction with sol2-gc1.asm, which is used to start
+ * and stop process monitoring.
+ *
+ * Differences.
+ *
+ * On Solaris 2 _mcount is called by library functions not mcount, so support
+ * has been added for both.
+ *
+ * Also the prototype for profil() is different
+ *
+ * Solaris 2 does not seem to have char *minbrk whcih allows the setting of
+ * the minimum SBRK region so this code has been removed and lets pray malloc
+ * does not mess it up.
+ *
+ * Notes
+ *
+ * This code could easily be integrated with the original gmon.c and perhaps
+ * should be.
+ */
+#include "tconfig.h"
+#include "tsystem.h"
+#include <fcntl.h> /* for creat() */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+static void moncontrol (int);
+extern void monstartup (char *, char *);
+extern void _mcleanup (void);
+extern void internal_mcount (
+#ifdef __x86_64__
+			     char *, unsigned short *
+#else
+			     void
+#endif
+			     );
+
+
+struct phdr {
+                char    *lpc;
+                char    *hpc;
+                int     ncnt;
+};
+
+
+#define HISTFRACTION 2
+#define HISTCOUNTER unsigned short
+#define HASHFRACTION 1
+#define ARCDENSITY 2
+#define MINARCS 50
+#define BASEADDRESS 0x8000000 /* On Solaris 2 X86 all executables start here
+				 and not at 0 */ 
+
+struct tostruct {
+  char *selfpc;
+  long count;
+  unsigned short link;
+};
+
+struct rawarc {
+    unsigned long       raw_frompc;
+    unsigned long       raw_selfpc;
+    long                raw_count;
+};
+#define ROUNDDOWN(x,y)  (((x)/(y))*(y))
+#define ROUNDUP(x,y)    ((((x)+(y)-1)/(y))*(y))
+
+/* char *minbrk; */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ intptr_t;
+
+    /*
+     *	froms is actually a bunch of unsigned shorts indexing tos
+     */
+static int		profiling = 3;
+static unsigned short	*froms;
+static struct tostruct	*tos = 0;
+static long		tolimit = 0;
+static char		*s_lowpc = 0;
+static char		*s_highpc = 0;
+static size_t		s_textsize = 0;
+
+static int	ssiz;
+static char	*sbuf;
+static int	s_scale;
+    /* see profil(2) where this is describe (incorrectly) */
+#define		SCALE_1_TO_1	0x10000L
+
+#define	MSG "No space for profiling buffer(s)\n"
+
+extern int errno;
+
+extern void *sbrk (intptr_t);
+
+void
+monstartup(char *lowpc, char *highpc)
+{
+    size_t		monsize;
+    char		*buffer;
+    register size_t	o;
+
+	/*
+	 *	round lowpc and highpc to multiples of the density we're using
+	 *	so the rest of the scaling (here and in gprof) stays in ints.
+	 */
+    lowpc = (char *)
+	    ROUNDDOWN((size_t)lowpc, HISTFRACTION*sizeof(HISTCOUNTER));
+    s_lowpc = lowpc;
+    highpc = (char *)
+	    ROUNDUP((size_t)highpc, HISTFRACTION*sizeof(HISTCOUNTER));
+    s_highpc = highpc;
+    s_textsize = highpc - lowpc;
+    monsize = (s_textsize / HISTFRACTION) + sizeof(struct phdr);
+    buffer = (char *) sbrk( monsize );
+    if ( buffer == (char *) -1 ) {
+	write( 2 , MSG , sizeof(MSG) );
+	return;
+    }
+    froms = (unsigned short *) sbrk( s_textsize / HASHFRACTION );
+    if ( froms == (unsigned short *) -1 ) {
+	write( 2 , MSG , sizeof(MSG) );
+	froms = 0;
+	return;
+    }
+    tolimit = s_textsize * ARCDENSITY / 100;
+    if ( tolimit < MINARCS ) {
+	tolimit = MINARCS;
+    } else if ( tolimit > 65534 ) {
+	tolimit = 65534;
+    }
+    tos = (struct tostruct *) sbrk( tolimit * sizeof( struct tostruct ) );
+    if ( tos == (struct tostruct *) -1 ) {
+	write( 2 , MSG , sizeof(MSG) );
+	froms = 0;
+	tos = 0;
+	return;
+    }
+/*    minbrk = (char *) sbrk(0);*/
+    tos[0].link = 0;
+    sbuf = buffer;
+    ssiz = monsize;
+    ( (struct phdr *) buffer ) -> lpc = lowpc;
+    ( (struct phdr *) buffer ) -> hpc = highpc;
+    ( (struct phdr *) buffer ) -> ncnt = ssiz;
+    monsize -= sizeof(struct phdr);
+    if ( monsize <= 0 )
+	return;
+    o = highpc - lowpc;
+    if( monsize < o )
+#ifndef hp300
+	s_scale = ( (float) monsize / o ) * SCALE_1_TO_1;
+#else /* avoid floating point */
+    {
+	int quot = o / monsize;
+
+	if (quot >= 0x10000)
+		s_scale = 1;
+	else if (quot >= 0x100)
+		s_scale = 0x10000 / quot;
+	else if (o >= 0x800000)
+		s_scale = 0x1000000 / (o / (monsize >> 8));
+	else
+		s_scale = 0x1000000 / ((o << 8) / monsize);
+    }
+#endif
+    else
+	s_scale = SCALE_1_TO_1;
+    moncontrol(1);
+}
+
+void
+_mcleanup (void)
+{
+    int			fd;
+    int			fromindex;
+    int			endfrom;
+    char		*frompc;
+    int			toindex;
+    struct rawarc	rawarc;
+
+    moncontrol(0);
+    fd = creat( "gmon.out" , 0666 );
+    if ( fd < 0 ) {
+	perror( "mcount: gmon.out" );
+	return;
+    }
+#   ifdef DEBUG
+	fprintf( stderr , "[mcleanup] sbuf %#x ssiz %d\n" , sbuf , ssiz );
+#   endif /* DEBUG */
+
+    write( fd , sbuf , ssiz );
+    endfrom = s_textsize / (HASHFRACTION * sizeof(*froms));
+    for ( fromindex = 0 ; fromindex < endfrom ; fromindex++ ) {
+	if ( froms[fromindex] == 0 ) {
+	    continue;
+	}
+	frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof(*froms));
+	for (toindex=froms[fromindex]; toindex!=0; toindex=tos[toindex].link) {
+#	    ifdef DEBUG
+		fprintf( stderr ,
+			"[mcleanup] frompc %#x selfpc %#x count %d\n" ,
+			frompc , tos[toindex].selfpc , tos[toindex].count );
+#	    endif /* DEBUG */
+	    rawarc.raw_frompc = (unsigned long) frompc;
+	    rawarc.raw_selfpc = (unsigned long) tos[toindex].selfpc;
+	    rawarc.raw_count = tos[toindex].count;
+	    write( fd , &rawarc , sizeof rawarc );
+	}
+    }
+    close( fd );
+}
+
+#ifdef __x86_64__
+/* See GLIBC for additional information about this technique.  */
+asm(".globl _mcount\n" 
+    "\t.type\t_mcount, @function\n"
+    "_mcount:\n"
+    /* The compiler calls _mcount after the prologue, and does not
+       save any of the registers.  Therefore we must preserve all
+       seven registers which may contain function arguments.  */
+    "\tsubq\t$0x38,%rsp\n"
+    "\tmovq\t%rax,(%rsp)\n"
+    "\tmovq\t%rcx,0x08(%rsp)\n"
+    "\tmovq\t%rdx,0x10(%rsp)\n"
+    "\tmovq\t%rsi,0x18(%rsp)\n"
+    "\tmovq\t%rdi,0x20(%rsp)\n"
+    "\tmovq\t%r8,0x28(%rsp)\n"
+    "\tmovq\t%r9,0x30(%rsp)\n"
+    /* Get SELFPC (pushed by the call to this function) and
+       FROMPCINDEX (via the frame pointer.  */
+    "\tmovq\t0x38(%rsp),%rdi\n"
+    "\tmovq\t0x8(%rbp),%rsi\n"
+    "\tcall\tinternal_mcount\n"
+    /* Restore the saved registers.  */
+    "\tmovq\t0x30(%rsp),%r9\n"
+    "\tmovq\t0x28(%rsp),%r8\n"
+    "\tmovq\t0x20(%rsp),%rdi\n"
+    "\tmovq\t0x18(%rsp),%rsi\n"
+    "\tmovq\t0x10(%rsp),%rdx\n"
+    "\tmovq\t0x08(%rsp),%rcx\n"
+    "\tmovq\t(%rsp),%rax\n"
+    "\taddq\t$0x38,%rsp\n"
+    "\tretq\n"
+    );
+#else
+/* Solaris 2 libraries use _mcount.  */
+asm(".globl _mcount; _mcount: jmp internal_mcount");
+/* This is for compatibility with old versions of gcc which used mcount.  */
+asm(".globl mcount; mcount: jmp internal_mcount");
+#endif
+
+void
+internal_mcount (
+#ifdef __x86_64__
+		 char *selfpc,
+		 unsigned short *frompcindex
+#else
+		 void
+#endif
+		 )
+{
+#ifndef __x86_64__
+	register char			*selfpc;
+	register unsigned short		*frompcindex;
+#endif
+	register struct tostruct	*top;
+	register struct tostruct	*prevtop;
+	register long			toindex;
+	static char already_setup;
+
+#ifndef __x86_64__
+	/*
+	 *	find the return address for mcount,
+	 *	and the return address for mcount's caller.
+	 */
+
+	/* selfpc = pc pushed by mcount call.
+	   This identifies the function that was just entered.  */
+	selfpc = (void *) __builtin_return_address (0);
+	/* frompcindex = pc in preceding frame.
+	   This identifies the caller of the function just entered.  */
+	frompcindex = (void *) __builtin_return_address (1);
+#endif
+
+	if(!already_setup) {
+          extern char etext[];
+	  already_setup = 1;
+#ifdef __x86_64__
+	  monstartup(0, etext);
+#else
+	  monstartup((char*)0x08040000, etext);
+#endif
+#ifdef USE_ONEXIT
+	  on_exit(_mcleanup, 0);
+#else
+	  atexit(_mcleanup);
+#endif
+	}
+	/*
+	 *	check that we are profiling
+	 *	and that we aren't recursively invoked.
+	 */
+	if (profiling) {
+		goto out;
+	}
+	profiling++;
+	/*
+	 *	check that frompcindex is a reasonable pc value.
+	 *	for example:	signal catchers get called from the stack,
+	 *			not from text space.  too bad.
+	 */
+	frompcindex = (unsigned short *)((long)frompcindex - (long)s_lowpc);
+	if ((unsigned long)frompcindex > s_textsize) {
+		goto done;
+	}
+	frompcindex =
+	    &froms[((long)frompcindex) / (HASHFRACTION * sizeof(*froms))];
+	toindex = *frompcindex;
+	if (toindex == 0) {
+		/*
+		 *	first time traversing this arc
+		 */
+		toindex = ++tos[0].link;
+		if (toindex >= tolimit) {
+			goto overflow;
+		}
+		*frompcindex = toindex;
+		top = &tos[toindex];
+		top->selfpc = selfpc;
+		top->count = 1;
+		top->link = 0;
+		goto done;
+	}
+	top = &tos[toindex];
+	if (top->selfpc == selfpc) {
+		/*
+		 *	arc at front of chain; usual case.
+		 */
+		top->count++;
+		goto done;
+	}
+	/*
+	 *	have to go looking down chain for it.
+	 *	top points to what we are looking at,
+	 *	prevtop points to previous top.
+	 *	we know it is not at the head of the chain.
+	 */
+	for (; /* goto done */; ) {
+		if (top->link == 0) {
+			/*
+			 *	top is end of the chain and none of the chain
+			 *	had top->selfpc == selfpc.
+			 *	so we allocate a new tostruct
+			 *	and link it to the head of the chain.
+			 */
+			toindex = ++tos[0].link;
+			if (toindex >= tolimit) {
+				goto overflow;
+			}
+			top = &tos[toindex];
+			top->selfpc = selfpc;
+			top->count = 1;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+		/*
+		 *	otherwise, check the next arc on the chain.
+		 */
+		prevtop = top;
+		top = &tos[top->link];
+		if (top->selfpc == selfpc) {
+			/*
+			 *	there it is.
+			 *	increment its count
+			 *	move it to the head of the chain.
+			 */
+			top->count++;
+			toindex = prevtop->link;
+			prevtop->link = top->link;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+
+	}
+done:
+	profiling--;
+	/* and fall through */
+out:
+	return;		/* normal return restores saved registers */
+
+overflow:
+	profiling++; /* halt further profiling */
+#   define	TOLIMIT	"mcount: tos overflow\n"
+	write(2, TOLIMIT, sizeof(TOLIMIT));
+	goto out;
+}
+
+/*
+ * Control profiling
+ *	profiling is what mcount checks to see if
+ *	all the data structures are ready.
+ */
+static void
+moncontrol(int mode)
+{
+    if (mode)
+    {
+      /* start */
+      profil((unsigned short *)(sbuf + sizeof(struct phdr)),
+	     ssiz - sizeof(struct phdr),
+	     (size_t)s_lowpc, s_scale);
+      
+      profiling = 0;
+    } else {
+      /* stop */
+      profil((unsigned short *)0, 0, 0, 0);
+      profiling = 3;
+    }
+}
diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
new file mode 100644
index 000000000..ce37683b4
--- /dev/null
+++ b/gcc/config/i386/gnu.h
@@ -0,0 +1,56 @@
+/* Configuration for an i386 running GNU with ELF as the target machine.  */
+
+/*
+Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+2005, 2007, 2008, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#undef GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (i386 GNU)");
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{pthread:-D_REENTRANT} %{posix:-D_POSIX_SOURCE}"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu)"
+
+#undef	STARTFILE_SPEC
+#if defined HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt0.o%s;pie:Scrt1.o%s;static:crt0.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt0.o%s;static:crt0.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* FIXME: Is a Hurd-specific fallback mechanism necessary?  */
+#undef MD_UNWIND_SUPPORT
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* Not supported yet.  */
+#undef TARGET_THREAD_SSP_OFFSET
+#endif
diff --git a/gcc/config/i386/gstabs.h b/gcc/config/i386/gstabs.h
new file mode 100644
index 000000000..e9a621871
--- /dev/null
+++ b/gcc/config/i386/gstabs.h
@@ -0,0 +1,7 @@
+/* We do not want to output SDB debugging information.  */
+
+#undef SDB_DEBUGGING_INFO
+
+/* We want to output DBX debugging information.  */
+
+#define DBX_DEBUGGING_INFO 1
diff --git a/gcc/config/i386/gthr-win32.c b/gcc/config/i386/gthr-win32.c
new file mode 100644
index 000000000..46ecb0d4b
--- /dev/null
+++ b/gcc/config/i386/gthr-win32.c
@@ -0,0 +1,260 @@
+/* Implementation of W32-specific threads compatibility routines for
+   libgcc2.  */
+
+/* Copyright (C) 1999, 2000, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Mumit Khan <khan@xraylith.wisc.edu>.
+   Modified and moved to separate file by Danny Smith
+   <dannysmith@users.sourceforge.net>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <windows.h>
+#ifndef __GTHREAD_HIDE_WIN32API
+# define __GTHREAD_HIDE_WIN32API 1
+#endif
+#undef  __GTHREAD_I486_INLINE_LOCK_PRIMITIVES
+#define __GTHREAD_I486_INLINE_LOCK_PRIMITIVES
+#include <gthr-win32.h>
+
+/* Windows32 threads specific definitions. The windows32 threading model
+   does not map well into pthread-inspired gcc's threading model, and so 
+   there are caveats one needs to be aware of.
+
+   1. The destructor supplied to __gthread_key_create is ignored for
+      generic x86-win32 ports. This will certainly cause memory leaks 
+      due to unreclaimed eh contexts (sizeof (eh_context) is at least 
+      24 bytes for x86 currently).
+
+      This memory leak may be significant for long-running applications
+      that make heavy use of C++ EH.
+
+      However, Mingw runtime (version 0.3 or newer) provides a mechanism
+      to emulate pthreads key dtors; the runtime provides a special DLL,
+      linked in if -mthreads option is specified, that runs the dtors in
+      the reverse order of registration when each thread exits. If
+      -mthreads option is not given, a stub is linked in instead of the
+      DLL, which results in memory leak. Other x86-win32 ports can use 
+      the same technique of course to avoid the leak.
+
+   2. The error codes returned are non-POSIX like, and cast into ints.
+      This may cause incorrect error return due to truncation values on 
+      hw where sizeof (DWORD) > sizeof (int).
+   
+   3. We are currently using a special mutex instead of the Critical
+      Sections, since Win9x does not support TryEnterCriticalSection
+      (while NT does).
+  
+   The basic framework should work well enough. In the long term, GCC
+   needs to use Structured Exception Handling on Windows32.  */
+
+int
+__gthr_win32_once (__gthread_once_t *once, void (*func) (void))
+{
+  if (once == NULL || func == NULL)
+    return EINVAL;
+
+  if (! once->done)
+    {
+      if (InterlockedIncrement (&(once->started)) == 0)
+        {
+	  (*func) ();
+	  once->done = TRUE;
+	}
+      else
+	{
+	  /* Another thread is currently executing the code, so wait for it 
+	     to finish; yield the CPU in the meantime.  If performance 
+	     does become an issue, the solution is to use an Event that 
+	     we wait on here (and set above), but that implies a place to 
+	     create the event before this routine is called.  */ 
+	  while (! once->done)
+	    Sleep (0);
+	}
+    }
+  return 0;
+}
+
+/* Windows32 thread local keys don't support destructors; this leads to
+   leaks, especially in threaded applications making extensive use of 
+   C++ EH. Mingw uses a thread-support DLL to work-around this problem.  */
+
+int
+__gthr_win32_key_create (__gthread_key_t *key,
+			 void (*dtor) (void *) __attribute__((unused)))
+{
+  int status = 0;
+  DWORD tls_index = TlsAlloc ();
+  if (tls_index != 0xFFFFFFFF)
+    {
+      *key = tls_index;
+#ifdef MINGW32_SUPPORTS_MT_EH
+      /* Mingw runtime will run the dtors in reverse order for each thread
+         when the thread exits.  */
+      status = __mingwthr_key_dtor (*key, dtor);
+#endif
+    }
+  else
+    status = (int) GetLastError ();
+  return status;
+}
+
+int
+__gthr_win32_key_delete (__gthread_key_t key)
+{
+  return (TlsFree (key) != 0) ? 0 : (int) GetLastError ();
+}
+
+void *
+__gthr_win32_getspecific (__gthread_key_t key)
+{
+  DWORD lasterror;
+  void *ptr;
+  lasterror = GetLastError();
+  ptr = TlsGetValue(key);
+  SetLastError( lasterror );
+  return ptr;
+}
+
+int
+__gthr_win32_setspecific (__gthread_key_t key, const void *ptr)
+{
+  if (TlsSetValue (key, CONST_CAST2(void *, const void *, ptr)) != 0)
+    return 0;
+  else
+    return GetLastError ();
+}
+
+void
+__gthr_win32_mutex_init_function (__gthread_mutex_t *mutex)
+{
+  mutex->counter = -1;
+  mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL);
+}
+
+void
+__gthr_win32_mutex_destroy (__gthread_mutex_t *mutex)
+{
+  CloseHandle ((HANDLE) mutex->sema);
+}
+
+int
+__gthr_win32_mutex_lock (__gthread_mutex_t *mutex)
+{
+  if (InterlockedIncrement (&mutex->counter) == 0 ||
+      WaitForSingleObject (mutex->sema, INFINITE) == WAIT_OBJECT_0)
+    return 0;
+  else
+    {
+      /* WaitForSingleObject returns WAIT_FAILED, and we can only do
+         some best-effort cleanup here.  */
+      InterlockedDecrement (&mutex->counter);
+      return 1;
+    }
+}
+
+int
+__gthr_win32_mutex_trylock (__gthread_mutex_t *mutex)
+{
+  if (__GTHR_W32_InterlockedCompareExchange (&mutex->counter, 0, -1) < 0)
+    return 0;
+  else
+    return 1;
+}
+
+int
+__gthr_win32_mutex_unlock (__gthread_mutex_t *mutex)
+{
+  if (InterlockedDecrement (&mutex->counter) >= 0)
+    return ReleaseSemaphore (mutex->sema, 1, NULL) ? 0 : 1;
+  else
+    return 0;
+}
+
+void
+__gthr_win32_recursive_mutex_init_function (__gthread_recursive_mutex_t *mutex)
+{
+  mutex->counter = -1;
+  mutex->depth = 0;
+  mutex->owner = 0;
+  mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL);
+}
+
+int
+__gthr_win32_recursive_mutex_lock (__gthread_recursive_mutex_t *mutex)
+{
+  DWORD me = GetCurrentThreadId();
+  if (InterlockedIncrement (&mutex->counter) == 0)
+    {
+      mutex->depth = 1;
+      mutex->owner = me;
+    }
+  else if (mutex->owner == me)
+    {
+      InterlockedDecrement (&mutex->counter);
+      ++(mutex->depth);
+    }
+  else if (WaitForSingleObject (mutex->sema, INFINITE) == WAIT_OBJECT_0)
+    {
+      mutex->depth = 1;
+      mutex->owner = me;
+    }
+  else
+    {
+      /* WaitForSingleObject returns WAIT_FAILED, and we can only do
+         some best-effort cleanup here.  */
+      InterlockedDecrement (&mutex->counter);
+      return 1;
+    }
+  return 0;
+}
+
+int
+__gthr_win32_recursive_mutex_trylock (__gthread_recursive_mutex_t *mutex)
+{
+  DWORD me = GetCurrentThreadId();
+  if (__GTHR_W32_InterlockedCompareExchange (&mutex->counter, 0, -1) < 0)
+    {
+      mutex->depth = 1;
+      mutex->owner = me;
+    }
+  else if (mutex->owner == me)
+    ++(mutex->depth);
+  else
+    return 1;
+
+  return 0;
+}
+
+int
+__gthr_win32_recursive_mutex_unlock (__gthread_recursive_mutex_t *mutex)
+{
+  --(mutex->depth);
+  if (mutex->depth == 0)
+    {
+      mutex->owner = 0;
+
+      if (InterlockedDecrement (&mutex->counter) >= 0)
+	return ReleaseSemaphore (mutex->sema, 1, NULL) ? 0 : 1;
+    }
+
+  return 0;
+}
diff --git a/gcc/config/i386/host-cygwin.c b/gcc/config/i386/host-cygwin.c
new file mode 100644
index 000000000..7d975af43
--- /dev/null
+++ b/gcc/config/i386/host-cygwin.c
@@ -0,0 +1,78 @@
+/* Cygwin host-specific hook definitions.
+ Copyright (C) 2004, 2007, 2010 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "diagnostic.h"
+
+static void * cygwin_gt_pch_get_address (size_t, int fd);
+static size_t cygwin_gt_pch_alloc_granularity (void);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS cygwin_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY
+#define HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY cygwin_gt_pch_alloc_granularity
+
+/* Granularity for reserving address space.  */
+static const size_t va_granularity = 0x10000;
+
+/*  Return the alignment required for allocating virtual memory. */
+static size_t
+cygwin_gt_pch_alloc_granularity (void)
+{
+  return va_granularity;
+}
+
+/* Identify an address that's likely to be free in a subsequent invocation
+   of the compiler.  The area should be able to hold SIZE bytes.  FD is an
+   open file descriptor if the host would like to probe with mmap.  */
+static void *
+cygwin_gt_pch_get_address (size_t sz, int fd)
+{
+  void *base;
+  off_t p = lseek(fd, 0, SEEK_CUR);
+
+  if (p == (off_t) -1)
+    fatal_error ("can%'t get position in PCH file: %m");
+
+   /* Cygwin requires that the underlying file be at least
+      as large as the requested mapping.  */
+  if ((size_t) p < sz)
+  { 
+    if ( ftruncate (fd, sz) == -1 )
+      fatal_error ("can%'t extend PCH file: %m");
+  }
+
+  base = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+  if (base == MAP_FAILED)
+    base = NULL;
+  else
+    munmap (base, sz);
+
+  if (lseek (fd, p, SEEK_SET) == (off_t) -1 )
+    fatal_error ("can%'t set position in PCH file: %m");
+
+  return base;
+}
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/i386/host-i386-darwin.c b/gcc/config/i386/host-i386-darwin.c
new file mode 100644
index 000000000..03a19aa4c
--- /dev/null
+++ b/gcc/config/i386/host-i386-darwin.c
@@ -0,0 +1,30 @@
+/* i386-darwin host-specific hook definitions.
+   Copyright (C) 2003, 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for x86 hosts; this file exists just
+   to include config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/i386/host-mingw32.c b/gcc/config/i386/host-mingw32.c
new file mode 100644
index 000000000..c224b2807
--- /dev/null
+++ b/gcc/config/i386/host-mingw32.c
@@ -0,0 +1,179 @@
+/* mingw32 host-specific hook definitions.
+   Copyright (C) 2004, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "diagnostic.h"
+
+
+#define WIN32_LEAN_AND_MEAN  /* Not so important if we have windows.h.gch.  */
+#include <windows.h>
+
+static void * mingw32_gt_pch_get_address (size_t, int);
+static int mingw32_gt_pch_use_address (void *, size_t, int, size_t);
+static size_t mingw32_gt_pch_alloc_granularity (void);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS mingw32_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS mingw32_gt_pch_use_address
+#undef HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY
+#define HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY mingw32_gt_pch_alloc_granularity
+
+static inline void w32_error(const char*, const char*, int, const char*);
+
+/* FIXME: Is this big enough?  */
+static const size_t pch_VA_max_size  = 128 * 1024 * 1024;
+
+/* Granularity for reserving address space.  */
+static const size_t va_granularity = 0x10000;
+
+/* Print out the GetLastError() translation.  */ 
+static inline void
+w32_error (const char* function, const char* file, int line,
+	   const char* my_msg)
+{
+  LPSTR w32_msgbuf;
+  FormatMessageA (FORMAT_MESSAGE_ALLOCATE_BUFFER
+		  | FORMAT_MESSAGE_FROM_SYSTEM
+		  | FORMAT_MESSAGE_IGNORE_INSERTS
+		  | FORMAT_MESSAGE_MAX_WIDTH_MASK,
+    		  NULL, GetLastError(),
+		  MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+		  (LPSTR) &w32_msgbuf, 0, NULL);
+  fprintf(stderr, "internal error in %s, at %s:%d: %s: %s\n",
+	  function, trim_filename (file), line, my_msg, w32_msgbuf);
+  LocalFree ((HLOCAL)w32_msgbuf);
+}
+
+/* Granularity for reserving address space.  */
+static size_t mingw32_gt_pch_alloc_granularity (void)
+{
+  return va_granularity;
+}
+
+/* Identify an address that's likely to be free in a subsequent invocation
+   of the compiler.  The area should be able to hold SIZE bytes.  FD is an
+   open file descriptor if the host would like to probe with mmap.  */
+
+static void *
+mingw32_gt_pch_get_address (size_t size, int fd  ATTRIBUTE_UNUSED)
+{
+  void* res;
+  size = (size + va_granularity - 1) & ~(va_granularity - 1);
+  if (size > pch_VA_max_size)
+    return NULL;
+
+  /* FIXME: We let system determine base by setting first arg to NULL.
+     Allocating at top of available address space avoids unnecessary
+     fragmentation of "ordinary" (malloc's)  address space but may not
+     be safe  with delayed load of system dll's. Preferred addresses
+     for NT system dlls is in 0x70000000 to 0x78000000 range.
+     If we allocate at bottom we need to reserve the address as early
+     as possible and at the same point in each invocation. */
+ 
+  res = VirtualAlloc (NULL, pch_VA_max_size,
+		      MEM_RESERVE | MEM_TOP_DOWN,
+		      PAGE_NOACCESS);
+  if (!res)
+    w32_error (__FUNCTION__, __FILE__, __LINE__, "VirtualAlloc");
+  else
+    /* We do not need the address space for now, so free it.  */
+    VirtualFree (res, 0, MEM_RELEASE);
+
+  return res; 
+}
+
+/* ADDR is an address returned by gt_pch_get_address.  Attempt to allocate
+   SIZE bytes at the same address and load it with the data from FD at 
+   OFFSET.  Return -1 if we couldn't allocate memory at ADDR, return 0
+   if the memory is allocated but the data not loaded, return 1 if done.  */
+
+static int
+mingw32_gt_pch_use_address (void *addr, size_t size, int fd,
+			    size_t offset)
+{
+  void * mmap_addr;
+  HANDLE mmap_handle;
+
+  /* Apparently, MS Vista puts unnamed file mapping objects into Global
+     namespace when running an application in a Terminal Server
+     session.  This causes failure since, by default, applications 
+     don't get SeCreateGlobalPrivilege. We don't need global
+     memory sharing so explicitly put object into Local namespace.
+
+     If multiple concurrent GCC processes are using PCH functionality,
+     MapViewOfFileEx returns "Access Denied" error.  So we ensure the
+     session-wide mapping name is unique by appending process ID.  */
+
+#define OBJECT_NAME_FMT "Local\\MinGWGCCPCH-"
+
+  char* object_name = NULL;
+  /* However, the documentation for CreateFileMapping says that on NT4
+     and earlier, backslashes are invalid in object name.  So, we need
+     to check if we are on Windows2000 or higher.  */
+  OSVERSIONINFO version_info;
+  version_info.dwOSVersionInfoSize = sizeof (version_info);
+
+  if (size == 0)
+    return 0; 
+
+  /* Offset must be also be a multiple of allocation granularity for
+     this to work.  We can't change the offset. */ 
+  if ((offset & (va_granularity - 1)) != 0 || size > pch_VA_max_size)
+    return -1;
+
+
+  /* Determine the version of Windows we are running on and use a
+     uniquely-named local object if running > 4.  */
+  GetVersionEx (&version_info);
+  if (version_info.dwMajorVersion > 4)
+    {
+      char local_object_name [sizeof (OBJECT_NAME_FMT)
+			      + sizeof (DWORD) * 2];
+      snprintf (local_object_name, sizeof (local_object_name),
+		OBJECT_NAME_FMT "%lx", GetCurrentProcessId());
+      object_name = local_object_name;
+    }
+     
+  mmap_handle = CreateFileMappingA ((HANDLE) _get_osfhandle (fd), NULL,
+				    PAGE_WRITECOPY | SEC_COMMIT, 0, 0,
+				    object_name);
+
+  if (mmap_handle == NULL)
+    {
+      w32_error (__FUNCTION__,  __FILE__, __LINE__, "CreateFileMapping");
+      return -1; 
+    }
+  mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+			       size, addr);
+  if (mmap_addr != addr)
+    {
+      w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+      CloseHandle(mmap_handle);
+      return  -1;
+    }
+
+  return 1;
+}
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/i386/i386-builtin-types.awk b/gcc/config/i386/i386-builtin-types.awk
new file mode 100644
index 000000000..7b016f44c
--- /dev/null
+++ b/gcc/config/i386/i386-builtin-types.awk
@@ -0,0 +1,280 @@
+#  Copyright (C) 2009 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Generates compressed tables for types for i386 builtin functions.
+
+function do_error(string) {
+    print FILENAME ":" FNR ": " string > "/dev/stderr"
+    errors = 1
+}
+
+function check_type(string) {
+    if (!(string in type_hash))
+	do_error("undefined type code " string)
+}
+
+# We can significantly reduce the size of the read-only tables
+# by forcing the compiler to use a smaller implementation type
+# for the enumerations.
+function attribute_mode(count) {
+    # ??? Except that we get strange "comparison always false" warnings
+    # for comparisons between different elements of the enumeration.
+    #    print "#ifdef __GNUC__"
+    #    if (count < 256)
+    #	print "  __attribute__((__mode__(__QI__)))"
+    #    else
+    #	print "  __attribute__((__mode__(__HI__)))"
+    #    print "#endif"
+}
+
+BEGIN {
+    FS = "[() \t,]+"
+   
+    prim_defs = 0
+    vect_defs = 0
+    ptr_defs = 0
+    cptr_defs = 0
+    func_defs = 0
+    func_args = 0
+    alias_defs = 0
+}
+
+# Skip blank lines or comments.
+/^[ \t]*(#|$)/ {
+    next
+}
+
+$1 == "DEF_PRIMITIVE_TYPE" {
+    if (NF == 4) {
+	type_hash[$2] = 1
+	prim_name[prim_defs] = $2
+	prim_base[prim_defs] = $3
+	prim_defs++
+    } else
+	do_error("DEF_PRIMITIVE_TYPE expected 2 arguments")
+    next
+}
+
+$1 == "DEF_VECTOR_TYPE" {
+    if (NF == 4 || NF == 5) {
+	check_type($3)
+	type_hash[$2] = 1
+	vect_name[vect_defs] = $2
+	vect_base[vect_defs] = $3
+	vect_mode[vect_defs] = (NF == 5 ? $4 : $2)
+	vect_defs++
+    } else
+	do_error("DEF_VECTOR_TYPE expected 2 arguments")
+    next
+}
+
+$1 == "DEF_POINTER_TYPE" {
+    if (NF == 4) {
+	check_type($3)
+	type_hash[$2] = 1
+	ptr_name[ptr_defs] = $2
+	ptr_base[ptr_defs] = $3
+	ptr_defs++
+    } else if (NF == 5) {
+	check_type($3)
+	if ($4 == "CONST") {
+	    type_hash[$2] = 1
+	    cptr_name[cptr_defs] = $2
+	    cptr_base[cptr_defs] = $3
+	    cptr_defs++
+	} else
+	    do_error("invalid qualifier \"" $4 "\"")
+    }
+    else
+	do_error("DEF_POINTER_TYPE expected 2 or 3 arguments")
+    next
+}
+
+$1 == "DEF_FUNCTION_TYPE" {
+    func_start[func_defs] = func_args
+    for (i = 2; i < NF; ++i) {
+	check_type($i)
+	func_types[func_args++] = $i
+    }
+
+    if (NF < 3)
+	do_error("DEF_FUNCTION_TYPE expected at least 1 argument")
+    else if (NF == 3)
+	name = $2 "_FTYPE_VOID"
+    else {
+	name = $2 "_FTYPE"
+	for (i = 3; i < NF; ++i)
+	    name = name "_" $i
+    }
+    func_hash[name] = 1
+    func_name[func_defs++] = name
+    next
+}
+
+$1 == "DEF_FUNCTION_TYPE_ALIAS" {
+    if (NF == 4) {
+	if ($2 in func_hash) {
+	    alias_base[alias_defs] = $2
+	    alias_name[alias_defs] = $2 "_" $3
+	    alias_defs++
+	} else
+	    do_error("undefined function code " $2)
+    } else
+	do_error("DEF_FUNCTION_TYPE_ALIAS expected 2 arguments")
+    next
+}
+
+{
+    do_error("unknown directive \"" $1 "\"");
+}
+
+END {
+    if (errors)
+	exit 1
+
+    print "/* This file is auto-generated by i386-builtin-types.awk.  */\n"
+
+    # This first enumeration contains all of the non-function types.
+    print "enum ix86_builtin_type {"
+    for (i = 0; i < prim_defs; ++i)
+	print "  IX86_BT_" prim_name[i] ","
+    print "  IX86_BT_LAST_PRIM = IX86_BT_" prim_name[i-1] ","
+    for (i = 0; i < vect_defs; ++i)
+	print "  IX86_BT_" vect_name[i] ","
+    print "  IX86_BT_LAST_VECT = IX86_BT_" vect_name[i-1] ","
+    for (i = 0; i < ptr_defs; ++i)
+	print "  IX86_BT_" ptr_name[i] ","
+    print "  IX86_BT_LAST_PTR = IX86_BT_" ptr_name[i-1] ","
+    for (i = 0; i < cptr_defs; ++i)
+	print "  IX86_BT_" cptr_name[i] ","
+    print "  IX86_BT_LAST_CPTR = IX86_BT_" cptr_name[i-1] "\n}"
+    attribute_mode(prim_defs + vect_defs + ptr_defs + cptr_defs)
+    print ";\n\n"
+
+    # We can't tabularize the initialization of the primitives, since
+    # at least one of them is created via a local variable.  That's ok,
+    # just create a nice big macro to do all the work.
+    print "#define DEFINE_BUILTIN_PRIMITIVE_TYPES \\"
+    for (i = 0; i < prim_defs; ++i) {
+	printf "  ix86_builtin_type_tab[(int)IX86_BT_" prim_name[i] \
+	    "] = " prim_base[i]
+	if (i < prim_defs - 1)
+	    print ", \\"
+    }
+    print "\n\n"
+
+    # The vector types are defined via two tables defining the real
+    # machine mode and the builtin primitive type.  We use two tables
+    # rather than a structure to avoid structure padding and save space.
+    print "static const enum machine_mode ix86_builtin_type_vect_mode[] = {"
+    for (i = 0; i < vect_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 6 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf vect_mode[i] "mode"
+    }
+    print "\n};\n\n"
+
+    print "static const enum ix86_builtin_type " \
+	"ix86_builtin_type_vect_base[] = {"
+    for (i = 0; i < vect_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 4 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf "IX86_BT_" vect_base[i]
+    }
+    print "\n};\n\n"
+
+    # The pointer types are defined via a single table defining the
+    # builtin primitive type.  The const-ness of the pointer is taken
+    # from the enumeration value > IX86_BT_LAST_PTR.
+    print "static const enum ix86_builtin_type " \
+	"ix86_builtin_type_ptr_base[] = {"
+    for (i = 0; i < ptr_defs; ++i) {
+	if (i == 0)
+	    printf " "
+	else if (i % 4 == 0)
+	    printf "\n "
+	printf " IX86_BT_" ptr_base[i] ","
+    }
+    print "\n  /* pointer-to-constant defs start here */"
+    for (i = 0; i < cptr_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 4 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf "IX86_BT_" cptr_base[i]
+    }
+    print "\n};\n\n"
+
+    # This second enumeration contains all of the function types.
+    print "enum ix86_builtin_func_type {"
+    for (i = 0; i < func_defs; ++i)
+	print "  " func_name[i] ","
+    print "  IX86_BT_LAST_FUNC = " func_name[i-1] ","
+    for (i = 0; i < alias_defs; ++i)
+	print "  " alias_name[i] ","
+    print "  IX86_BT_LAST_ALIAS = " alias_name[i-1] "\n}"
+    attribute_mode(func_defs + alias_defs)
+    print ";\n\n"
+
+    # The function types are defined via two tables.  The first contains
+    # ranges consiting of the function's return type, followed by all of
+    # the function argument types.  The ranges for all of the builtin
+    # functions are smooshed together in the same array.  The second array
+    # contains, for each builtin, the index of the function's return type
+    # within the first array.
+    print "static const enum ix86_builtin_type ix86_builtin_func_args[] = {"
+    for (i = 0; i < func_args; ++i) {
+	if (i == 0)
+	    printf "  "
+	else if (i % 4 == 0)
+	    printf ",\n  "
+	else
+	    printf ", "
+	printf "IX86_BT_" func_types[i]
+    }
+    print "\n};\n\n"
+
+    print "static const unsigned short ix86_builtin_func_start[] = {"
+    for (i = 0; i < func_defs; ++i) {
+	if (i == 0)
+	    printf " "
+	else if (i % 10 == 0)
+	    printf "\n "
+	printf " " func_start[i] ","
+    }
+    print " " func_args "\n};\n\n"
+
+    print "static const enum ix86_builtin_func_type " \
+	"ix86_builtin_func_alias_base[] = {"
+    for (i = 0; i < alias_defs; ++i) {
+	if (i == 0)
+	    printf "  "
+	else
+	    printf ",\n  "
+	printf alias_base[i]
+    }
+    print "\n};"
+}
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
new file mode 100644
index 000000000..05a7f5468
--- /dev/null
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -0,0 +1,420 @@
+# This file provides a declarative way of describing the types that
+# are used when declaring ix86 builtin functions.  It is processed
+# with i386-builtin-type.awk to produce C code.
+#
+# DEF_PRIMITIVE_TYPE (ENUM, TYPE)
+#
+#   The ENUM is an identifier indicating which type is being defined.
+#   TYPE is a variable that represents the type.
+#   ??? Note that the awk program expects a single token for TYPE.
+#   At present, that's all that's required; revisit if it turns out
+#   that we need more than that.
+#
+# DEF_VECTOR_TYPE (ENUM, TYPE [, MODE])
+#
+#  This describes a vector type.  ENUM is an identifier as above.
+#  TYPE is the enumeral for the inner type which should of course
+#  name a type of the proper inner mode.  If present, MODE is the
+#  machine mode, else the machine mode should be the same as ENUM.
+#
+# DEF_POINTER_TYPE (ENUM, TYPE [, CONST])
+#
+#  This describes a pointer type.  ENUM is an identifier as above;
+#  TYPE is the enumeral for the type pointed to.  An optional third 
+#  argument is the keyword CONST, which defines this to be a pointer to
+#  a constant type.
+#
+# DEF_FUNCTION_TYPE (RETURN, ARGN*)
+#
+#   This describes a function type.  The return type and the arguments
+#   are the enumerals defined above.  The enumeration name for the 
+#   function is formed by RETURN ## _FTYPE_ ## ARG1 ## _ ## ARG2 ...
+#
+# DEF_FUNCTION_TYPE_ALIAS (ENUM, SUFFIX)
+#
+#   This defines an enumeration ENUM ## _ ## SUFFIX and arranges for
+#   the function type to be copied from ENUM.  This is used to control
+#   how the expanders treat the function.
+#
+
+DEF_PRIMITIVE_TYPE (VOID, void_type_node)
+DEF_PRIMITIVE_TYPE (CHAR, char_type_node)
+DEF_PRIMITIVE_TYPE (UCHAR, unsigned_char_type_node)
+# ??? Logically this should be intQI_type_node, but that maps to "signed char"
+# which is a different type than "char" even if "char" is signed.  This must
+# match the usage in emmintrin.h and changing this would change name mangling
+# and so is not advisable.
+DEF_PRIMITIVE_TYPE (QI, char_type_node)
+DEF_PRIMITIVE_TYPE (HI, intHI_type_node)
+DEF_PRIMITIVE_TYPE (SI, intSI_type_node)
+# ??? Logically this should be intDI_type_node, but that maps to "long"
+# with 64-bit, and that's not how the emmintrin.h is written.  Again, 
+# changing this would change name mangling.
+DEF_PRIMITIVE_TYPE (DI, long_long_integer_type_node)
+DEF_PRIMITIVE_TYPE (UQI, unsigned_intQI_type_node)
+DEF_PRIMITIVE_TYPE (UHI, unsigned_intHI_type_node)
+DEF_PRIMITIVE_TYPE (USI, unsigned_intSI_type_node)
+DEF_PRIMITIVE_TYPE (UDI, long_long_unsigned_type_node)
+# ??? Some of the types below should use the mode types above.
+DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (INT, integer_type_node)
+DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (UNSIGNED, unsigned_type_node)
+DEF_PRIMITIVE_TYPE (LONGLONG, long_long_integer_type_node)
+DEF_PRIMITIVE_TYPE (ULONGLONG, long_long_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node)
+DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node)
+DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node)
+DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)
+DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)
+DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node)
+DEF_PRIMITIVE_TYPE (FLOAT128, float128_type_node)
+
+# MMX vectors
+DEF_VECTOR_TYPE (V2SF, FLOAT)
+DEF_VECTOR_TYPE (V1DI, DI)
+DEF_VECTOR_TYPE (V2SI, SI)
+DEF_VECTOR_TYPE (V4HI, HI)
+DEF_VECTOR_TYPE (V8QI, QI)
+
+# SSE vectors
+DEF_VECTOR_TYPE (V2DF, DOUBLE)
+DEF_VECTOR_TYPE (V4SF, FLOAT)
+DEF_VECTOR_TYPE (V2DI, DI)
+DEF_VECTOR_TYPE (V4SI, SI)
+DEF_VECTOR_TYPE (V8HI, HI)
+DEF_VECTOR_TYPE (V16QI, QI)
+DEF_VECTOR_TYPE (V2UDI, UDI, V2DI)
+DEF_VECTOR_TYPE (V4USI, USI, V4SI)
+DEF_VECTOR_TYPE (V8UHI, UHI, V8HI)
+DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
+
+# AVX vectors
+DEF_VECTOR_TYPE (V4DF, DOUBLE)
+DEF_VECTOR_TYPE (V8SF, FLOAT)
+DEF_VECTOR_TYPE (V4DI, DI)
+DEF_VECTOR_TYPE (V8SI, SI)
+DEF_VECTOR_TYPE (V16HI, HI)
+DEF_VECTOR_TYPE (V32QI, QI)
+
+
+DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
+DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
+DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST)
+DEF_POINTER_TYPE (PCHAR, CHAR)
+DEF_POINTER_TYPE (PCVOID, VOID, CONST)
+DEF_POINTER_TYPE (PVOID, VOID)
+DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
+DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PUSHORT, USHORT)
+DEF_POINTER_TYPE (PINT, INT)
+DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
+DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
+
+DEF_POINTER_TYPE (PV2DF, V2DF)
+DEF_POINTER_TYPE (PV2DI, V2DI)
+DEF_POINTER_TYPE (PV2SF, V2SF)
+DEF_POINTER_TYPE (PV4DF, V4DF)
+DEF_POINTER_TYPE (PV4DI, V4DI)
+DEF_POINTER_TYPE (PV4SF, V4SF)
+DEF_POINTER_TYPE (PV8SF, V8SF)
+
+DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
+DEF_POINTER_TYPE (PCV2SF, V2SF, CONST)
+DEF_POINTER_TYPE (PCV4DF, V4DF, CONST)
+DEF_POINTER_TYPE (PCV4SF, V4SF, CONST)
+DEF_POINTER_TYPE (PCV8SF, V8SF, CONST)
+
+DEF_FUNCTION_TYPE (FLOAT128)
+DEF_FUNCTION_TYPE (UINT64)
+DEF_FUNCTION_TYPE (UNSIGNED)
+DEF_FUNCTION_TYPE (VOID)
+DEF_FUNCTION_TYPE (PVOID)
+
+DEF_FUNCTION_TYPE (FLOAT, FLOAT)
+DEF_FUNCTION_TYPE (FLOAT128, FLOAT128)
+DEF_FUNCTION_TYPE (INT, INT)
+DEF_FUNCTION_TYPE (INT, V16QI)
+DEF_FUNCTION_TYPE (INT, V2DF)
+DEF_FUNCTION_TYPE (INT, V4DF)
+DEF_FUNCTION_TYPE (INT, V4SF)
+DEF_FUNCTION_TYPE (INT, V8QI)
+DEF_FUNCTION_TYPE (INT, V8SF)
+DEF_FUNCTION_TYPE (INT64, INT64)
+DEF_FUNCTION_TYPE (INT64, V2DF)
+DEF_FUNCTION_TYPE (INT64, V4SF)
+DEF_FUNCTION_TYPE (UINT64, INT)
+DEF_FUNCTION_TYPE (UINT16, UINT16)
+DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
+DEF_FUNCTION_TYPE (V16QI, PCCHAR)
+DEF_FUNCTION_TYPE (V16QI, V16QI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE)
+DEF_FUNCTION_TYPE (V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2SI)
+DEF_FUNCTION_TYPE (V2DF, V4DF)
+DEF_FUNCTION_TYPE (V2DF, V4SF)
+DEF_FUNCTION_TYPE (V2DF, V4SI)
+DEF_FUNCTION_TYPE (V2DI, PV2DI)
+DEF_FUNCTION_TYPE (V2DI, V16QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI)
+DEF_FUNCTION_TYPE (V2DI, V4SI)
+DEF_FUNCTION_TYPE (V2DI, V8HI)
+DEF_FUNCTION_TYPE (V2SF, V2SF)
+DEF_FUNCTION_TYPE (V2SF, V2SI)
+DEF_FUNCTION_TYPE (V2SI, V2DF)
+DEF_FUNCTION_TYPE (V2SI, V2SF)
+DEF_FUNCTION_TYPE (V2SI, V2SI)
+DEF_FUNCTION_TYPE (V2SI, V4SF)
+DEF_FUNCTION_TYPE (V32QI, PCCHAR)
+DEF_FUNCTION_TYPE (V4DF, PCDOUBLE)
+DEF_FUNCTION_TYPE (V4DF, PCV2DF)
+DEF_FUNCTION_TYPE (V4DF, V2DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF)
+DEF_FUNCTION_TYPE (V4DF, V4SF)
+DEF_FUNCTION_TYPE (V4DF, V4SI)
+DEF_FUNCTION_TYPE (V4HI, V4HI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
+DEF_FUNCTION_TYPE (V4SF, V2DF)
+DEF_FUNCTION_TYPE (V4SF, V4DF)
+DEF_FUNCTION_TYPE (V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V8SF)
+DEF_FUNCTION_TYPE (V4SF, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V16QI)
+DEF_FUNCTION_TYPE (V4SI, V2DF)
+DEF_FUNCTION_TYPE (V4SI, V4DF)
+DEF_FUNCTION_TYPE (V4SI, V4SF)
+DEF_FUNCTION_TYPE (V4SI, V4SI)
+DEF_FUNCTION_TYPE (V4SI, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V8SI)
+DEF_FUNCTION_TYPE (V8HI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8QI, V8QI)
+DEF_FUNCTION_TYPE (V8SF, PCFLOAT)
+DEF_FUNCTION_TYPE (V8SF, PCV4SF)
+DEF_FUNCTION_TYPE (V8SF, V4SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF)
+DEF_FUNCTION_TYPE (V8SF, V8SI)
+DEF_FUNCTION_TYPE (V8SF, V8HI)
+DEF_FUNCTION_TYPE (V8SI, V4SI)
+DEF_FUNCTION_TYPE (V8SI, V8SF)
+DEF_FUNCTION_TYPE (VOID, PCVOID)
+DEF_FUNCTION_TYPE (VOID, PVOID)
+DEF_FUNCTION_TYPE (VOID, UINT64)
+DEF_FUNCTION_TYPE (VOID, UNSIGNED)
+DEF_FUNCTION_TYPE (INT, PUSHORT)
+DEF_FUNCTION_TYPE (INT, PUNSIGNED)
+DEF_FUNCTION_TYPE (INT, PULONGLONG)
+
+DEF_FUNCTION_TYPE (DI, V2DI, INT)
+DEF_FUNCTION_TYPE (DOUBLE, V2DF, INT)
+DEF_FUNCTION_TYPE (FLOAT, V4SF, INT)
+DEF_FUNCTION_TYPE (FLOAT128, FLOAT128, FLOAT128)
+DEF_FUNCTION_TYPE (HI, V4HI, INT)
+DEF_FUNCTION_TYPE (HI, V8HI, INT)
+DEF_FUNCTION_TYPE (INT, V2DF, V2DF)
+DEF_FUNCTION_TYPE (INT, V2DI, V2DI)
+DEF_FUNCTION_TYPE (INT, V4DF, V4DF)
+DEF_FUNCTION_TYPE (INT, V4DI, V4DI)
+DEF_FUNCTION_TYPE (INT, V4SF, V4SF)
+DEF_FUNCTION_TYPE (INT, V8SF, V8SF)
+DEF_FUNCTION_TYPE (QI, V16QI, INT)
+DEF_FUNCTION_TYPE (QI, V8QI, INT)
+DEF_FUNCTION_TYPE (SI, V2SI, INT)
+DEF_FUNCTION_TYPE (SI, V4SI, INT)
+DEF_FUNCTION_TYPE (UINT, UINT, UCHAR)
+DEF_FUNCTION_TYPE (UINT, UINT, UINT)
+DEF_FUNCTION_TYPE (UINT, UINT, USHORT)
+DEF_FUNCTION_TYPE (UINT16, UINT16, INT)
+DEF_FUNCTION_TYPE (UINT64, UINT64, UINT64)
+DEF_FUNCTION_TYPE (UINT8, UINT8, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, SI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V16QI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V1DI, V1DI, SI)
+DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI)
+DEF_FUNCTION_TYPE (V1DI, V2SI, V2SI)
+DEF_FUNCTION_TYPE (V1DI, V8QI, V8QI)
+DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE)
+DEF_FUNCTION_TYPE (V2DF, V2DF, SI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF)
+DEF_FUNCTION_TYPE (V2DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V2DI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V2DI, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
+DEF_FUNCTION_TYPE (V2SI, INT, INT)
+DEF_FUNCTION_TYPE (V2SI, V2SF, V2SF)
+DEF_FUNCTION_TYPE (V2SI, V2SI, SI)
+DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI)
+DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI)
+DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI)
+DEF_FUNCTION_TYPE (V4HI, V2SI, V2SI)
+DEF_FUNCTION_TYPE (V4HI, V4HI, INT)
+DEF_FUNCTION_TYPE (V4HI, V4HI, SI)
+DEF_FUNCTION_TYPE (V4HI, V4HI, V4HI)
+DEF_FUNCTION_TYPE (V4HI, V8QI, V8QI)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, DI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCV2SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2SI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI)
+DEF_FUNCTION_TYPE (V4SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V4SI, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V4SI, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, SI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, INT)
+DEF_FUNCTION_TYPE (V8HI, V8HI, SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8HI, V8SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI)
+DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI)
+DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF)
+DEF_FUNCTION_TYPE (VOID, PINT, INT)
+DEF_FUNCTION_TYPE (VOID, PULONGLONG, ULONGLONG)
+DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI)
+DEF_FUNCTION_TYPE (VOID, PV2SF, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI)
+DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
+
+DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
+DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT)
+DEF_FUNCTION_TYPE (UCHAR, UINT64, UINT, UINT)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, QI, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, UINT, UINT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V2DI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, HI, INT)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V4SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED)
+DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF)
+DEF_FUNCTION_TYPE (VOID, PV4DF, V4DI, V4DF)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SI, V4SF)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SI, V8SF)
+DEF_FUNCTION_TYPE (VOID, UINT, UINT, UINT)
+DEF_FUNCTION_TYPE (VOID, UINT64, UINT, UINT)
+DEF_FUNCTION_TYPE (VOID, V16QI, V16QI, PCHAR)
+DEF_FUNCTION_TYPE (VOID, V8QI, V8QI, PCHAR)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI)
+DEF_FUNCTION_TYPE (V2UDI, V2UDI, V2UDI, V2UDI)
+DEF_FUNCTION_TYPE (V4USI, V4USI, V4USI, V4USI)
+DEF_FUNCTION_TYPE (V8UHI, V8UHI, V8UHI, V8UHI)
+DEF_FUNCTION_TYPE (V16UQI, V16UQI, V16UQI, V16UQI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI)
+
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
+DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
+
+DEF_FUNCTION_TYPE (INT, V16QI, INT, V16QI, INT, INT)
+DEF_FUNCTION_TYPE (V16QI, V16QI, INT, V16QI, INT, INT)
+
+DEF_FUNCTION_TYPE (V8QI, QI, QI, QI, QI, QI, QI, QI, QI)
+
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DF_V4DF, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4DI_V4DI, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V4SF_V4SF, PTEST)
+DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V8SF_V8SF, PTEST)
+
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, VEC_MERGE)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, VEC_MERGE)
+
+DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2SI_FTYPE_V2SI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4HI_FTYPE_V4HI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V2SI_FTYPE_V2SI_V2SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4HI_FTYPE_V4HI_V4HI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, COUNT)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, COUNT)
+
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, SWAP)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, SWAP)
+
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI_INT, CONVERT)
+
+DEF_FUNCTION_TYPE_ALIAS (V16QI_FTYPE_V16QI_V16QI, CMP)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, CMP)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, CMP)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, CMP)
+
+DEF_FUNCTION_TYPE_ALIAS (V16QI_FTYPE_V16QI_V16QI, TF)
+DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, TF)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI, TF)
+DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, TF)
+DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SI_V4SI, TF)
+DEF_FUNCTION_TYPE_ALIAS (V8HI_FTYPE_V8HI_V8HI, TF)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
new file mode 100644
index 000000000..149735133
--- /dev/null
+++ b/gcc/config/i386/i386-c.c
@@ -0,0 +1,401 @@
+/* Subroutines used for macro/preprocessor support on the ia-32.
+   Copyright (C) 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "flags.h"
+#include "c-family/c-common.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "cpplib.h"
+#include "c-family/c-pragma.h"
+
+static bool ix86_pragma_target_parse (tree, tree);
+static void ix86_target_macros_internal
+  (int, enum processor_type, enum processor_type, enum fpmath_unit,
+   void (*def_or_undef) (cpp_reader *, const char *));
+
+
+/* Internal function to either define or undef the appropriate system
+   macros.  */
+static void
+ix86_target_macros_internal (int isa_flag,
+			     enum processor_type arch,
+			     enum processor_type tune,
+			     enum fpmath_unit fpmath,
+			     void (*def_or_undef) (cpp_reader *,
+						   const char *))
+{
+  /* For some of the k6/pentium varients there weren't seperate ISA bits to
+     identify which tune/arch flag was passed, so figure it out here.  */
+  size_t arch_len = strlen (ix86_arch_string);
+  size_t tune_len = strlen (ix86_tune_string);
+  int last_arch_char = ix86_arch_string[arch_len - 1];
+  int last_tune_char = ix86_tune_string[tune_len - 1];
+
+  /* Built-ins based on -march=.  */
+  switch (arch)
+    {
+    case PROCESSOR_I386:
+      break;
+    case PROCESSOR_I486:
+      def_or_undef (parse_in, "__i486");
+      def_or_undef (parse_in, "__i486__");
+      break;
+    case PROCESSOR_PENTIUM:
+      def_or_undef (parse_in, "__i586");
+      def_or_undef (parse_in, "__i586__");
+      def_or_undef (parse_in, "__pentium");
+      def_or_undef (parse_in, "__pentium__");
+      if (isa_flag & OPTION_MASK_ISA_MMX)
+	def_or_undef (parse_in, "__pentium_mmx__");
+      break;
+    case PROCESSOR_PENTIUMPRO:
+      def_or_undef (parse_in, "__i686");
+      def_or_undef (parse_in, "__i686__");
+      def_or_undef (parse_in, "__pentiumpro");
+      def_or_undef (parse_in, "__pentiumpro__");
+      break;
+    case PROCESSOR_GEODE:
+      def_or_undef (parse_in, "__geode");
+      def_or_undef (parse_in, "__geode__");
+      break;
+    case PROCESSOR_K6:
+      def_or_undef (parse_in, "__k6");
+      def_or_undef (parse_in, "__k6__");
+      if (last_arch_char == '2')
+	def_or_undef (parse_in, "__k6_2__");
+      else if (last_arch_char == '3')
+	def_or_undef (parse_in, "__k6_3__");
+      else if (isa_flag & OPTION_MASK_ISA_3DNOW)
+	def_or_undef (parse_in, "__k6_3__");
+      break;
+    case PROCESSOR_ATHLON:
+      def_or_undef (parse_in, "__athlon");
+      def_or_undef (parse_in, "__athlon__");
+      if (isa_flag & OPTION_MASK_ISA_SSE)
+	def_or_undef (parse_in, "__athlon_sse__");
+      break;
+    case PROCESSOR_K8:
+      def_or_undef (parse_in, "__k8");
+      def_or_undef (parse_in, "__k8__");
+      break;
+    case PROCESSOR_AMDFAM10:
+      def_or_undef (parse_in, "__amdfam10");
+      def_or_undef (parse_in, "__amdfam10__");
+      break;
+    case PROCESSOR_BDVER1:
+      def_or_undef (parse_in, "__bdver1");
+      def_or_undef (parse_in, "__bdver1__");
+      break;
+    case PROCESSOR_BTVER1:
+      def_or_undef (parse_in, "__btver1");
+      def_or_undef (parse_in, "__btver1__");
+      break;
+    case PROCESSOR_PENTIUM4:
+      def_or_undef (parse_in, "__pentium4");
+      def_or_undef (parse_in, "__pentium4__");
+      break;
+    case PROCESSOR_NOCONA:
+      def_or_undef (parse_in, "__nocona");
+      def_or_undef (parse_in, "__nocona__");
+      break;
+    case PROCESSOR_CORE2_32:
+    case PROCESSOR_CORE2_64:
+      def_or_undef (parse_in, "__core2");
+      def_or_undef (parse_in, "__core2__");
+      break;
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+      def_or_undef (parse_in, "__corei7");
+      def_or_undef (parse_in, "__corei7__");
+      break;
+    case PROCESSOR_ATOM:
+      def_or_undef (parse_in, "__atom");
+      def_or_undef (parse_in, "__atom__");
+      break;
+    /* use PROCESSOR_max to not set/unset the arch macro.  */
+    case PROCESSOR_max:
+      break;
+    case PROCESSOR_GENERIC32:
+    case PROCESSOR_GENERIC64:
+      gcc_unreachable ();
+    }
+
+  /* Built-ins based on -mtune=.  */
+  switch (tune)
+    {
+    case PROCESSOR_I386:
+      def_or_undef (parse_in, "__tune_i386__");
+      break;
+    case PROCESSOR_I486:
+      def_or_undef (parse_in, "__tune_i486__");
+      break;
+    case PROCESSOR_PENTIUM:
+      def_or_undef (parse_in, "__tune_i586__");
+      def_or_undef (parse_in, "__tune_pentium__");
+      if (last_tune_char == 'x')
+	def_or_undef (parse_in, "__tune_pentium_mmx__");
+      break;
+    case PROCESSOR_PENTIUMPRO:
+      def_or_undef (parse_in, "__tune_i686__");
+      def_or_undef (parse_in, "__tune_pentiumpro__");
+      switch (last_tune_char)
+	{
+	case '3':
+	  def_or_undef (parse_in, "__tune_pentium3__");
+	  /* FALLTHRU */
+	case '2':
+	  def_or_undef (parse_in, "__tune_pentium2__");
+	  break;
+	}
+      break;
+    case PROCESSOR_GEODE:
+      def_or_undef (parse_in, "__tune_geode__");
+      break;
+    case PROCESSOR_K6:
+      def_or_undef (parse_in, "__tune_k6__");
+      if (last_tune_char == '2')
+	def_or_undef (parse_in, "__tune_k6_2__");
+      else if (last_tune_char == '3')
+	def_or_undef (parse_in, "__tune_k6_3__");
+      else if (isa_flag & OPTION_MASK_ISA_3DNOW)
+	def_or_undef (parse_in, "__tune_k6_3__");
+      break;
+    case PROCESSOR_ATHLON:
+      def_or_undef (parse_in, "__tune_athlon__");
+      if (isa_flag & OPTION_MASK_ISA_SSE)
+	def_or_undef (parse_in, "__tune_athlon_sse__");
+      break;
+    case PROCESSOR_K8:
+      def_or_undef (parse_in, "__tune_k8__");
+      break;
+    case PROCESSOR_AMDFAM10:
+      def_or_undef (parse_in, "__tune_amdfam10__");
+      break;
+    case PROCESSOR_BDVER1:
+      def_or_undef (parse_in, "__tune_bdver1__");
+      break;
+   case PROCESSOR_BTVER1:
+      def_or_undef (parse_in, "__tune_btver1__");
+      break;
+    case PROCESSOR_PENTIUM4:
+      def_or_undef (parse_in, "__tune_pentium4__");
+      break;
+    case PROCESSOR_NOCONA:
+      def_or_undef (parse_in, "__tune_nocona__");
+      break;
+    case PROCESSOR_CORE2_32:
+    case PROCESSOR_CORE2_64:
+      def_or_undef (parse_in, "__tune_core2__");
+      break;
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+      def_or_undef (parse_in, "__tune_corei7__");
+      break;
+    case PROCESSOR_ATOM:
+      def_or_undef (parse_in, "__tune_atom__");
+      break;
+    case PROCESSOR_GENERIC32:
+    case PROCESSOR_GENERIC64:
+      break;
+    /* use PROCESSOR_max to not set/unset the tune macro.  */
+    case PROCESSOR_max:
+      break;
+    }
+
+  if (isa_flag & OPTION_MASK_ISA_MMX)
+    def_or_undef (parse_in, "__MMX__");
+  if (isa_flag & OPTION_MASK_ISA_3DNOW)
+    def_or_undef (parse_in, "__3dNOW__");
+  if (isa_flag & OPTION_MASK_ISA_3DNOW_A)
+    def_or_undef (parse_in, "__3dNOW_A__");
+  if (isa_flag & OPTION_MASK_ISA_SSE)
+    def_or_undef (parse_in, "__SSE__");
+  if (isa_flag & OPTION_MASK_ISA_SSE2)
+    def_or_undef (parse_in, "__SSE2__");
+  if (isa_flag & OPTION_MASK_ISA_SSE3)
+    def_or_undef (parse_in, "__SSE3__");
+  if (isa_flag & OPTION_MASK_ISA_SSSE3)
+    def_or_undef (parse_in, "__SSSE3__");
+  if (isa_flag & OPTION_MASK_ISA_SSE4_1)
+    def_or_undef (parse_in, "__SSE4_1__");
+  if (isa_flag & OPTION_MASK_ISA_SSE4_2)
+    def_or_undef (parse_in, "__SSE4_2__");
+  if (isa_flag & OPTION_MASK_ISA_AES)
+    def_or_undef (parse_in, "__AES__");
+  if (isa_flag & OPTION_MASK_ISA_PCLMUL)
+    def_or_undef (parse_in, "__PCLMUL__");
+  if (isa_flag & OPTION_MASK_ISA_AVX)
+    def_or_undef (parse_in, "__AVX__");
+  if (isa_flag & OPTION_MASK_ISA_FMA)
+    def_or_undef (parse_in, "__FMA__");
+  if (isa_flag & OPTION_MASK_ISA_SSE4A)
+    def_or_undef (parse_in, "__SSE4A__");
+  if (isa_flag & OPTION_MASK_ISA_FMA4)
+    def_or_undef (parse_in, "__FMA4__");
+  if (isa_flag & OPTION_MASK_ISA_XOP)
+    def_or_undef (parse_in, "__XOP__");
+  if (isa_flag & OPTION_MASK_ISA_LWP)
+    def_or_undef (parse_in, "__LWP__");
+  if (isa_flag & OPTION_MASK_ISA_ABM)
+    def_or_undef (parse_in, "__ABM__");
+  if (isa_flag & OPTION_MASK_ISA_BMI)
+    def_or_undef (parse_in, "__BMI__");
+  if (isa_flag & OPTION_MASK_ISA_TBM)
+    def_or_undef (parse_in, "__TBM__");
+  if (isa_flag & OPTION_MASK_ISA_POPCNT)
+    def_or_undef (parse_in, "__POPCNT__");
+  if (isa_flag & OPTION_MASK_ISA_FSGSBASE)
+    def_or_undef (parse_in, "__FSGSBASE__");
+  if (isa_flag & OPTION_MASK_ISA_RDRND)
+    def_or_undef (parse_in, "__RDRND__");
+  if (isa_flag & OPTION_MASK_ISA_F16C)
+    def_or_undef (parse_in, "__F16C__");
+  if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
+    def_or_undef (parse_in, "__SSE_MATH__");
+  if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
+    def_or_undef (parse_in, "__SSE2_MATH__");
+}
+
+
+/* Hook to validate the current #pragma GCC target and set the state, and
+   update the macros based on what was changed.  If ARGS is NULL, then
+   POP_TARGET is used to reset the options.  */
+
+static bool
+ix86_pragma_target_parse (tree args, tree pop_target)
+{
+  tree prev_tree = build_target_option_node ();
+  tree cur_tree;
+  struct cl_target_option *prev_opt;
+  struct cl_target_option *cur_opt;
+  int prev_isa;
+  int cur_isa;
+  int diff_isa;
+  enum processor_type prev_arch;
+  enum processor_type prev_tune;
+  enum processor_type cur_arch;
+  enum processor_type cur_tune;
+
+  if (! args)
+    {
+      cur_tree = ((pop_target)
+		  ? pop_target
+		  : target_option_default_node);
+      cl_target_option_restore (&global_options,
+				TREE_TARGET_OPTION (cur_tree));
+    }
+  else
+    {
+      cur_tree = ix86_valid_target_attribute_tree (args);
+      if (!cur_tree)
+	return false;
+    }
+
+  target_option_current_node = cur_tree;
+
+  /* Figure out the previous/current isa, arch, tune and the differences.  */
+  prev_opt  = TREE_TARGET_OPTION (prev_tree);
+  cur_opt   = TREE_TARGET_OPTION (cur_tree);
+  prev_isa  = prev_opt->x_ix86_isa_flags;
+  cur_isa   = cur_opt->x_ix86_isa_flags;
+  diff_isa  = (prev_isa ^ cur_isa);
+  prev_arch = (enum processor_type) prev_opt->arch;
+  prev_tune = (enum processor_type) prev_opt->tune;
+  cur_arch  = (enum processor_type) cur_opt->arch;
+  cur_tune  = (enum processor_type) cur_opt->tune;
+
+  /* If the same processor is used for both previous and current options, don't
+     change the macros.  */
+  if (cur_arch == prev_arch)
+    cur_arch = prev_arch = PROCESSOR_max;
+
+  if (cur_tune == prev_tune)
+    cur_tune = prev_tune = PROCESSOR_max;
+
+  /* Undef all of the macros for that are no longer current.  */
+  ix86_target_macros_internal (prev_isa & diff_isa,
+			       prev_arch,
+			       prev_tune,
+			       (enum fpmath_unit) prev_opt->fpmath,
+			       cpp_undef);
+
+  /* Define all of the macros for new options that were just turned on.  */
+  ix86_target_macros_internal (cur_isa & diff_isa,
+			       cur_arch,
+			       cur_tune,
+			       (enum fpmath_unit) cur_opt->fpmath,
+			       cpp_define);
+
+  return true;
+}
+
+/* Function to tell the preprocessor about the defines for the current target.  */
+
+void
+ix86_target_macros (void)
+{
+  /* 32/64-bit won't change with target specific options, so do the assert and
+     builtin_define_std calls here.  */
+  if (TARGET_64BIT)
+    {
+      cpp_assert (parse_in, "cpu=x86_64");
+      cpp_assert (parse_in, "machine=x86_64");
+      cpp_define (parse_in, "__amd64");
+      cpp_define (parse_in, "__amd64__");
+      cpp_define (parse_in, "__x86_64");
+      cpp_define (parse_in, "__x86_64__");
+    }
+  else
+    {
+      cpp_assert (parse_in, "cpu=i386");
+      cpp_assert (parse_in, "machine=i386");
+      builtin_define_std ("i386");
+    }
+
+  ix86_target_macros_internal (ix86_isa_flags,
+			       ix86_arch,
+			       ix86_tune,
+			       ix86_fpmath,
+			       cpp_define);
+}
+
+
+/* Register target pragmas.  We need to add the hook for parsing #pragma GCC
+   option here rather than in i386.c since it will pull in various preprocessor
+   functions, and those are not present in languages like fortran without a
+   preprocessor.  */
+
+void
+ix86_register_pragmas (void)
+{
+  /* Update pragma hook to allow parsing #pragma GCC target.  */
+  targetm.target_option.pragma_parse = ix86_pragma_target_parse;
+
+#ifdef REGISTER_SUBTARGET_PRAGMAS
+  REGISTER_SUBTARGET_PRAGMAS ();
+#endif
+}
diff --git a/gcc/config/i386/i386-interix.h b/gcc/config/i386/i386-interix.h
new file mode 100644
index 000000000..a2f579a1c
--- /dev/null
+++ b/gcc/config/i386/i386-interix.h
@@ -0,0 +1,357 @@
+/* Target definitions for GCC for Intel 80386 running Interix
+   Parts Copyright (C) 1991, 1999, 2000, 2002, 2003, 2004, 2007, 2008, 2009,
+   2010 Free Software Foundation, Inc.
+
+   Parts:
+     by Douglas B. Rupp (drupp@cs.washington.edu).
+     by Ron Guilmette (rfg@netcom.com).
+     by Donn Terry (donn@softway.com).
+     by Mumit Khan (khan@xraylith.wisc.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The rest must follow.  */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   and returns float values in the 387 and needs stack probes
+   We also align doubles to 64-bits for MSVC default compatibility
+   We do bitfields MSVC-compatibly by default, too.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+   (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE | \
+    MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
+
+#undef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_i486
+
+#define WCHAR_TYPE_SIZE 16
+#define WCHAR_TYPE "short unsigned int"
+
+/* WinNT (and thus Interix) use unsigned int */
+#define SIZE_TYPE "unsigned int"
+
+#define ASM_LOAD_ADDR(loc, reg)   "     leal " #loc "," #reg "\n"
+
+#define TARGET_DECLSPEC 1
+
+/* cpp handles __STDC__ */
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_define ("__INTERIX");					\
+	builtin_define ("__OPENNT");					\
+	builtin_define ("_M_IX86=300");					\
+	builtin_define ("_X86_=1");					\
+	builtin_define ("__stdcall=__attribute__((__stdcall__))");	\
+	builtin_define ("__cdecl=__attribute__((__cdecl__))");		\
+	builtin_assert ("system=unix");					\
+	builtin_assert ("system=interix");				\
+	if (preprocessing_asm_p ())					\
+	  builtin_define_std ("LANGUAGE_ASSEMBLY");			\
+	else								\
+	  {								\
+	     builtin_define_std ("LANGUAGE_C");				\
+	     if (c_dialect_cxx ())					\
+	       builtin_define_std ("LANGUAGE_C_PLUS_PLUS");		\
+	     if (c_dialect_objc ())					\
+	       builtin_define_std ("LANGUAGE_OBJECTIVE_C");		\
+	  } 								\
+    }									\
+  while (0)
+
+#undef CPP_SPEC
+/* Write out the correct language type definition for the header files.  
+   Unless we have assembler language, write out the symbols for C.
+   mieee is an Alpha specific variant.  Cross pollination a bad idea.
+   */
+#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \
+-isystem %$INTERIX_ROOT/usr/include"
+
+#define TARGET_VERSION fprintf (stderr, " (i386 Interix)");
+
+/* The global __fltused is necessary to cause the printf/scanf routines
+   for outputting/inputting floating point numbers to be loaded.  Since this
+   is kind of hard to detect, we just do it all the time.  */
+#undef X86_FILE_START_FLTUSED
+#define X86_FILE_START_FLTUSED 1
+
+/* A table of bytes codes used by the ASM_OUTPUT_ASCII and
+   ASM_OUTPUT_LIMITED_STRING macros.  Each byte in the table
+   corresponds to a particular byte value [0..255].  For any
+   given byte value, if the value in the corresponding table
+   position is zero, the given character can be output directly.
+   If the table value is 1, the byte must be output as a \ooo
+   octal escape.  If the tables value is anything else, then the
+   byte value should be output as a \ followed by the value
+   in the table.  Note that we can use standard UN*X escape
+   sequences for many control characters, but we don't use
+   \a to represent BEL because some svr4 assemblers (e.g. on
+   the i386) don't know about that.  Also, we don't use \v
+   since some versions of gas, such as 2.2 did not accept it.  */
+
+#define ESCAPES \
+"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
+
+/* Some svr4 assemblers have a limit on the number of characters which
+   can appear in the operand of a .string directive.  If your assembler
+   has such a limitation, you should define STRING_LIMIT to reflect that
+   limit.  Note that at least some svr4 assemblers have a limit on the
+   actual number of bytes in the double-quoted string, and that they
+   count each character in an escape sequence as one byte.  Thus, an
+   escape sequence like \377 would count as four bytes.
+
+   If your target assembler doesn't support the .string directive, you
+   should define this to zero.
+*/
+
+#define STRING_LIMIT	((unsigned) 256)
+
+#define STRING_ASM_OP	"\t.string\t"
+
+/* The routine used to output NUL terminated strings.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable, especially for targets like the i386
+   (where the only alternative is to output character sequences as
+   comma separated lists of numbers).  */
+
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)				\
+  do									\
+    {									\
+      const unsigned char *_limited_str =				\
+        (const unsigned char *) (STR);					\
+      unsigned ch;							\
+      fprintf ((FILE), "%s\"", STRING_ASM_OP);				\
+      for (; (ch = *_limited_str); _limited_str++)			\
+        {								\
+	  int escape = ESCAPES[ch];					\
+	  switch (escape)						\
+	    {								\
+	    case 0:							\
+	      putc (ch, (FILE));					\
+	      break;							\
+	    case 1:							\
+	      fprintf ((FILE), "\\%03o", ch);				\
+	      break;							\
+	    default:							\
+	      putc ('\\', (FILE));					\
+	      putc (escape, (FILE));					\
+	      break;							\
+	    }								\
+        }								\
+      fprintf ((FILE), "\"\n");						\
+    }									\
+  while (0)
+
+/* The routine used to output sequences of byte values.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable.  Note that if we find subparts of the
+   character sequence which end with NUL (and which are shorter than
+   STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)				\
+  do									\
+    {									\
+      const unsigned char *_ascii_bytes =				\
+        (const unsigned char *) (STR);					\
+      const unsigned char *limit = _ascii_bytes + (LENGTH);		\
+      unsigned bytes_in_chunk = 0;					\
+      for (; _ascii_bytes < limit; _ascii_bytes++)			\
+        {								\
+	  const unsigned char *p;					\
+	  if (bytes_in_chunk >= 64)					\
+	    {								\
+	      fputc ('\n', (FILE));					\
+	      bytes_in_chunk = 0;					\
+	    }								\
+	  for (p = _ascii_bytes; p < limit && *p != '\0'; p++)		\
+	    continue;							\
+	  if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT)	\
+	    {								\
+	      if (bytes_in_chunk > 0)					\
+		{							\
+		  fputc ('\n', (FILE));					\
+		  bytes_in_chunk = 0;					\
+		}							\
+	      ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes);		\
+	      _ascii_bytes = p;						\
+	    }								\
+	  else								\
+	    {								\
+	      if (bytes_in_chunk == 0)					\
+		fputs (ASM_BYTE, (FILE));				\
+	      else							\
+		fputc (',', (FILE));					\
+	      fprintf ((FILE), "0x%02x", *_ascii_bytes);		\
+	      bytes_in_chunk += 5;					\
+	    }								\
+	}								\
+      if (bytes_in_chunk > 0)						\
+        fputc ('\n', (FILE));						\
+    }									\
+  while (0)
+
+/* Emit code to check the stack when allocating more that 4000
+   bytes in one go.  */
+
+#define CHECK_STACK_LIMIT 0x1000
+
+/* the following are OSF linker (not gld) specific... we don't want them */
+#undef HAS_INIT_SECTION
+#undef LD_INIT_SWITCH
+#undef LD_FINI_SWITCH
+
+/* The following are needed for us to be able to use winnt.c, but are not
+   otherwise meaningful to Interix.  (The functions that use these are
+   never called because we don't do DLLs.) */
+#define TARGET_NOP_FUN_DLLIMPORT 1
+#define drectve_section()  /* nothing */
+
+/* Objective-C has its own packing rules...
+   Objc tries to parallel the code in stor-layout.c at runtime	
+   (see libobjc/encoding.c).  This (compile-time) packing info isn't 
+   available at runtime, so it's hopeless to try.
+
+   And if the user tries to set the flag for objc, give an error
+   so he has some clue.  */
+
+#undef  SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (strcmp (lang_hooks.name, "GNU Objective-C") == 0)			\
+    {									\
+      if ((target_flags & MASK_MS_BITFIELD_LAYOUT) != 0			\
+	  && (target_flags_explicit & MASK_MS_BITFIELD_LAYOUT) != 0)	\
+	{								\
+	   error ("ms-bitfields not supported for objc");		\
+	}								\
+      target_flags &= ~MASK_MS_BITFIELD_LAYOUT;				\
+    }									\
+} while (0)
+
+#define EH_FRAME_IN_DATA_SECTION
+
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rdata,\"r\""
+
+/* The MS compilers take alignment as a number of bytes, so we do as well */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
+
+/* The linker will take care of this, and having them causes problems with
+   ld -r (specifically -rU).  */
+#define CTOR_LISTS_DEFINED_EXTERNALLY 1
+
+#define SET_ASM_OP	"\t.set\t"
+/* Output a definition (implements alias) */
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+do									\
+{									\
+    fputs (SET_ASM_OP, (FILE));						\
+    assemble_name (FILE, LABEL1);					\
+    fputc (',', (FILE));						\
+    assemble_name (FILE, LABEL2);					\
+    fputc ('\n', (FILE));						\
+    }									\
+while (0)
+
+#define HOST_PTR_AS_INT unsigned long
+
+/* The following two flags are usually "off" for i386, because some non-gnu
+   tools (for the i386) don't handle them.  However, we don't have that
+   problem, so....  */
+
+/* Forward references to tags are allowed.  */
+#define SDB_ALLOW_FORWARD_REFERENCES
+
+/* Unknown tags are also allowed.  */
+#define SDB_ALLOW_UNKNOWN_REFERENCES
+
+/* The integer half of this list needs to be constant.  However, there's
+   a lot of disagreement about what the floating point adjustments should
+   be.  We pick one that works with gdb.  (The underlying problem is
+   what to do about the segment registers.  Since we have access to them
+   from /proc, we'll allow them to be accessed in gdb, even tho the
+   gcc compiler can't generate them.  (There's some evidence that 
+   MSVC does, but possibly only for certain special "canned" sequences.) */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+(TARGET_64BIT ? dbx64_register_map[n] \
+ : (n) == 0 ? 0 \
+ : (n) == 1 ? 2 \
+ : (n) == 2 ? 1 \
+ : (n) == 3 ? 3 \
+ : (n) == 4 ? 6 \
+ : (n) == 5 ? 7 \
+ : (n) == 6 ? 5 \
+ : (n) == 7 ? 4 \
+ : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (n)+8 \
+ : (-1))
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variable or
+   function named by the symbol (such as what section it is in).  */
+
+#define SUBTARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  i386_pe_strip_name_encoding_full
+
+#if 0	
+/* Turn this back on when the linker is updated to handle grouped
+   .data$ sections correctly. See corresponding note in i386/interix.c. 
+   MK.  */
+
+/* Interix uses explicit import from shared libraries.  */
+#define MULTIPLE_SYMBOL_SPACES 1
+
+extern void i386_pe_unique_section (tree, int);
+#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#define SUPPORTS_ONE_ONLY 1
+#endif /* 0 */
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_pe_asm_named_section
+
+/* DWARF2 Unwinding doesn't work with exception handling yet.  */
+#define DWARF2_UNWIND_INFO 0
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* MSVC returns structs of up to 8 bytes via registers.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+	(TYPE_MODE (TYPE) == BLKmode \
+	 || (AGGREGATE_TYPE_P (TYPE) && int_size_in_bytes (TYPE) > 8 ))
diff --git a/gcc/config/i386/i386-interix3.h b/gcc/config/i386/i386-interix3.h
new file mode 100644
index 000000000..abd202c91
--- /dev/null
+++ b/gcc/config/i386/i386-interix3.h
@@ -0,0 +1,23 @@
+/* Target definitions for GCC for Intel 80386 running Interix V3.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   Contributed by Douglas B. Rupp (rupp@gnat.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
new file mode 100644
index 000000000..c1e82cc6d
--- /dev/null
+++ b/gcc/config/i386/i386-modes.def
@@ -0,0 +1,91 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 2002, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The x86_64 ABI specifies both XF and TF modes.
+   XFmode is __float80 is IEEE extended; TFmode is __float128
+   is IEEE quad.  */
+
+FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* In ILP32 mode, XFmode has size 12 and alignment 4.
+   In LP64 mode, XFmode has size and alignment 16.  */
+ADJUST_FLOAT_FORMAT (XF, (TARGET_128BIT_LONG_DOUBLE
+			  ? &ieee_extended_intel_128_format
+			  : TARGET_96_ROUND_53_LONG_DOUBLE
+			  ? &ieee_extended_intel_96_round_53_format
+			  : &ieee_extended_intel_96_format));
+ADJUST_BYTESIZE  (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 12);
+ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
+
+/* Add any extra modes needed to represent the condition code.
+
+   For the i386, we need separate modes when floating-point
+   equality comparisons are being done.
+
+   Add CCNO to indicate comparisons against zero that requires
+   Overflow flag to be unset.  Sign bit test is used instead and
+   thus can be used to form "a&b>0" type of tests.
+
+   Add CCGC to indicate comparisons against zero that allows
+   unspecified garbage in the Carry flag.  This mode is used
+   by inc/dec instructions.
+
+   Add CCGOC to indicate comparisons against zero that allows
+   unspecified garbage in the Carry and Overflow flag. This
+   mode is used to simulate comparisons of (a-b) and (a+b)
+   against zero using sub/cmp/add operations.
+
+   Add CCA to indicate that only the Above flag is valid.
+   Add CCC to indicate that only the Carry flag is valid.
+   Add CCO to indicate that only the Overflow flag is valid.
+   Add CCS to indicate that only the Sign flag is valid.
+   Add CCZ to indicate that only the Zero flag is valid.  */
+
+CC_MODE (CCGC);
+CC_MODE (CCGOC);
+CC_MODE (CCNO);
+CC_MODE (CCA);
+CC_MODE (CCC);
+CC_MODE (CCO);
+CC_MODE (CCS);
+CC_MODE (CCZ);
+CC_MODE (CCFP);
+CC_MODE (CCFPU);
+
+/* Vector modes.  Note that VEC_CONCAT patterns require vector
+   sizes twice as big as implemented in hardware.  */
+VECTOR_MODES (INT, 4);        /*              V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*         V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /*   V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 32);       /*  V32QI V16HI V8SI V4DI */
+VECTOR_MODES (INT, 64);       /* V64QI V32HI V16SI V8DI */
+VECTOR_MODES (FLOAT, 8);      /*              V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*         V8HF V4SF V2DF */
+VECTOR_MODES (FLOAT, 32);     /*        V16HF V8SF V4DF */
+VECTOR_MODES (FLOAT, 64);     /*       V32HF V16SF V8DF */
+VECTOR_MODE (INT, TI, 1);     /*                   V1TI */
+VECTOR_MODE (INT, DI, 1);     /*                   V1DI */
+VECTOR_MODE (INT, SI, 1);     /*                   V1SI */
+VECTOR_MODE (INT, QI, 2);     /*                   V2QI */
+
+INT_MODE (OI, 32);
+
+/* The symbol Pmode stands for one of the above machine modes (usually SImode).
+   The tm.h file specifies which one.  It is not a distinct mode.  */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
new file mode 100644
index 000000000..d4513fa8e
--- /dev/null
+++ b/gcc/config/i386/i386-protos.h
@@ -0,0 +1,292 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Functions in i386.c */
+extern bool ix86_target_stack_probe (void);
+extern bool ix86_can_use_return_insn_p (void);
+extern void ix86_setup_frame_addresses (void);
+
+extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
+extern void ix86_expand_prologue (void);
+extern void ix86_expand_epilogue (int);
+extern void ix86_expand_split_stack_prologue (void);
+
+extern void ix86_output_addr_vec_elt (FILE *, int);
+extern void ix86_output_addr_diff_elt (FILE *, int, int);
+
+extern enum calling_abi ix86_cfun_abi (void);
+extern enum calling_abi ix86_function_type_abi (const_tree);
+
+#ifdef RTX_CODE
+extern int standard_80387_constant_p (rtx);
+extern const char *standard_80387_constant_opcode (rtx);
+extern rtx standard_80387_constant_rtx (int);
+extern int standard_sse_constant_p (rtx);
+extern const char *standard_sse_constant_opcode (rtx, rtx);
+extern bool symbolic_reference_mentioned_p (rtx);
+extern bool extended_reg_mentioned_p (rtx);
+extern bool x86_extended_QIreg_mentioned_p (rtx);
+extern bool x86_extended_reg_mentioned_p (rtx);
+extern bool x86_maybe_negate_const_int (rtx *, enum machine_mode);
+extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
+
+extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
+extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
+
+extern bool ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
+extern bool ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
+extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
+
+extern bool legitimate_constant_p (rtx);
+extern bool constant_address_p (rtx);
+extern bool legitimate_pic_operand_p (rtx);
+extern bool legitimate_pic_address_disp_p (rtx);
+extern bool ix86_legitimize_reload_address (rtx, enum machine_mode,
+					    int, int, int);
+extern void print_reg (rtx, int, FILE*);
+extern void ix86_print_operand (FILE *, rtx, int);
+
+extern void split_double_mode (enum machine_mode, rtx[], int, rtx[], rtx[]);
+
+extern const char *output_set_got (rtx, rtx);
+extern const char *output_387_binary_op (rtx, rtx*);
+extern const char *output_387_reg_move (rtx, rtx*);
+extern const char *output_fix_trunc (rtx, rtx*, int);
+extern const char *output_fp_compare (rtx, rtx*, int, int);
+extern const char *output_adjust_stack_and_probe (rtx);
+extern const char *output_probe_stack_range (rtx, rtx);
+
+extern void ix86_expand_clear (rtx);
+extern void ix86_expand_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
+extern void ix86_expand_push (enum machine_mode, rtx);
+extern rtx ix86_fixup_binary_operands (enum rtx_code,
+				       enum machine_mode, rtx[]);
+extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
+						enum machine_mode, rtx[]);
+extern void ix86_expand_binary_operator (enum rtx_code,
+					 enum machine_mode, rtx[]);
+extern bool ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_lea_for_add_ok (rtx, rtx[]);
+extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
+extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
+extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
+extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
+					rtx[]);
+extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
+extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
+extern void ix86_split_convert_uns_si_sse (rtx[]);
+extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
+extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
+extern enum ix86_fpcmp_strategy ix86_fp_comparison_strategy (enum rtx_code);
+extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
+					    rtx[]);
+extern void ix86_expand_copysign (rtx []);
+extern void ix86_split_copysign_const (rtx []);
+extern void ix86_split_copysign_var (rtx []);
+extern bool ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern bool ix86_match_ccmode (rtx, enum machine_mode);
+extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
+extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx);
+extern bool ix86_expand_int_movcc (rtx[]);
+extern bool ix86_expand_fp_movcc (rtx[]);
+extern bool ix86_expand_fp_vcond (rtx[]);
+extern bool ix86_expand_int_vcond (rtx[]);
+extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
+extern bool ix86_expand_int_addcc (rtx[]);
+extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern void ix86_split_call_vzeroupper (rtx, rtx);
+extern void x86_initialize_trampoline (rtx, rtx, rtx);
+extern rtx ix86_zero_extend_to_Pmode (rtx);
+extern void ix86_split_long_move (rtx[]);
+extern void ix86_split_ashl (rtx *, rtx, enum machine_mode);
+extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
+extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
+extern rtx ix86_find_base_term (rtx);
+extern bool ix86_check_movabs (rtx, int);
+extern void ix86_split_idivmod (enum machine_mode, rtx[], bool);
+
+extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
+extern int ix86_attr_length_immediate_default (rtx, int);
+extern int ix86_attr_length_address_default (rtx);
+extern int ix86_attr_length_vex_default (rtx, int, int);
+
+extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
+
+extern rtx ix86_libcall_value (enum machine_mode);
+extern bool ix86_function_arg_regno_p (int);
+extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern rtx ix86_force_to_memory (enum machine_mode, rtx);
+extern void ix86_free_from_memory (enum machine_mode);
+extern void ix86_call_abi_override (const_tree);
+extern int ix86_reg_parm_stack_space (const_tree);
+
+extern void ix86_split_fp_branch (enum rtx_code code, rtx, rtx,
+				  rtx, rtx, rtx, rtx);
+extern bool ix86_hard_regno_mode_ok (int, enum machine_mode);
+extern bool ix86_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern bool ix86_secondary_memory_needed (enum reg_class, enum reg_class,
+					  enum machine_mode, int);
+extern bool ix86_cannot_change_mode_class (enum machine_mode,
+					   enum machine_mode, enum reg_class);
+extern int ix86_mode_needed (int, rtx);
+extern void emit_i387_cw_initialization (int);
+extern void x86_order_regs_for_local_alloc (void);
+extern void x86_function_profiler (FILE *, int);
+extern void x86_emit_floatuns (rtx [2]);
+extern void ix86_emit_fp_unordered_jump (rtx);
+
+extern void ix86_emit_i387_log1p (rtx, rtx);
+extern void ix86_emit_swdivsf (rtx, rtx, rtx, enum machine_mode);
+extern void ix86_emit_swsqrtsf (rtx, rtx, enum machine_mode, bool);
+
+extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
+
+extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
+extern void ix86_expand_floorceil (rtx, rtx, bool);
+extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
+extern void ix86_expand_round (rtx, rtx);
+extern void ix86_expand_rounddf_32 (rtx, rtx);
+extern void ix86_expand_trunc (rtx, rtx);
+extern void ix86_expand_truncdf_32 (rtx, rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+#endif	/* TREE_CODE  */
+
+#endif	/* RTX_CODE  */
+
+#ifdef TREE_CODE
+extern int ix86_data_alignment (tree, int);
+extern unsigned int ix86_local_alignment (tree, enum machine_mode,
+					  unsigned int);
+extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
+					    unsigned int);
+extern int ix86_constant_alignment (tree, int);
+extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *);
+extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *);
+extern int x86_field_alignment (tree, int);
+extern tree ix86_valid_target_attribute_tree (tree);
+#endif
+
+extern rtx ix86_tls_get_addr (void);
+extern rtx ix86_tls_module_base (void);
+
+extern void ix86_expand_vector_init (bool, rtx, rtx);
+extern void ix86_expand_vector_set (bool, rtx, rtx, int);
+extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
+extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
+
+extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
+
+/* In i386-c.c  */
+extern void ix86_target_macros (void);
+extern void ix86_register_pragmas (void);
+
+/* In winnt.c  */
+extern void i386_pe_unique_section (tree, int);
+extern void i386_pe_declare_function_type (FILE *, const char *, int);
+extern void i386_pe_record_external_function (tree, const char *);
+extern void i386_pe_maybe_record_exported_symbol (tree, const char *, int);
+extern void i386_pe_encode_section_info (tree, rtx, int);
+extern bool i386_pe_binds_local_p (const_tree);
+extern const char *i386_pe_strip_name_encoding_full (const char *);
+extern bool i386_pe_valid_dllimport_attribute_p (const_tree);
+extern unsigned int i386_pe_section_type_flags (tree, const char *, int);
+extern void i386_pe_asm_named_section (const char *, unsigned int, tree);
+extern void i386_pe_asm_output_aligned_decl_common (FILE *, tree,
+						    const char *,
+						    HOST_WIDE_INT,
+						    HOST_WIDE_INT);
+extern void i386_pe_file_end (void);
+extern void i386_pe_start_function (FILE *, const char *, tree);
+extern void i386_pe_end_function (FILE *, const char *, tree);
+extern void i386_pe_assemble_visibility (tree, int);
+extern tree i386_pe_mangle_decl_assembler_name (tree, tree);
+extern tree i386_pe_mangle_assembler_name (const char *);
+
+extern void i386_pe_seh_init (FILE *);
+extern void i386_pe_seh_end_prologue (FILE *);
+extern void i386_pe_seh_unwind_emit (FILE *, rtx);
+
+/* In winnt-cxx.c and winnt-stubs.c  */
+extern void i386_pe_adjust_class_at_definition (tree);
+extern bool i386_pe_type_dllimport_p (tree);
+extern bool i386_pe_type_dllexport_p (tree);
+
+extern rtx maybe_get_pool_constant (rtx);
+
+extern char internal_label_prefix[16];
+extern int internal_label_prefix_len;
+
+enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS };
+struct ix86_address
+{
+  rtx base, index, disp;
+  HOST_WIDE_INT scale;
+  enum ix86_address_seg seg;
+};
+
+extern int ix86_decompose_address (rtx, struct ix86_address *);
+extern int memory_address_length (rtx addr);
+extern void x86_output_aligned_bss (FILE *, tree, const char *,
+				    unsigned HOST_WIDE_INT, int);
+extern void x86_elf_aligned_common (FILE *, const char *,
+				    unsigned HOST_WIDE_INT, int);
+
+#ifdef RTX_CODE
+extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
+				      enum rtx_code *, enum rtx_code *);
+extern enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
+extern rtx construct_plt_address (rtx);
+#endif
+extern int asm_preferred_eh_data_format (int, int);
+
+#ifdef HAVE_ATTR_cpu
+extern enum attr_cpu ix86_schedule;
+#endif
+
+extern const char * ix86_output_call_insn (rtx insn, rtx call_op, int addr_op);
+
+#ifdef RTX_CODE
+/* Target data for multipass lookahead scheduling.
+   Currently used for Core 2/i7 tuning.  */
+struct ix86_first_cycle_multipass_data_
+{
+  /* The length (in bytes) of ifetch block in this solution.  */
+  int ifetch_block_len;
+  /* Number of instructions in ifetch block in this solution.  */
+  int ifetch_block_n_insns;
+  /* Bitmap to remember changes to ready_try for backtracking.  */
+  sbitmap ready_try_change;
+  /* Size of the bitmap.  */
+  int ready_try_change_size;
+};
+# define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T	\
+  struct ix86_first_cycle_multipass_data_
+#endif /* RTX_CODE */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
new file mode 100644
index 000000000..c43d3ed57
--- /dev/null
+++ b/gcc/config/i386/i386.c
@@ -0,0 +1,35376 @@
+/* Subroutines used for code generation on IA-32.
+   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "cgraph.h"
+#include "gimple.h"
+#include "dwarf2.h"
+#include "df.h"
+#include "tm-constrs.h"
+#include "params.h"
+#include "cselib.h"
+#include "debug.h"
+#include "dwarf2out.h"
+#include "sched-int.h"
+#include "sbitmap.h"
+#include "fibheap.h"
+
+enum upper_128bits_state
+{
+  unknown = 0,
+  unused,
+  used
+};
+
+typedef struct block_info_def
+{
+  /* State of the upper 128bits of AVX registers at exit.  */
+  enum upper_128bits_state state;
+  /* TRUE if state of the upper 128bits of AVX registers is unchanged
+     in this block.  */
+  bool unchanged;
+  /* TRUE if block has been processed.  */
+  bool processed;
+  /* TRUE if block has been scanned.  */
+  bool scanned;
+  /* Previous state of the upper 128bits of AVX registers at entry.  */
+  enum upper_128bits_state prev;
+} *block_info;
+
+#define BLOCK_INFO(B)   ((block_info) (B)->aux)
+
+enum call_avx256_state
+{
+  /* Callee returns 256bit AVX register.  */
+  callee_return_avx256 = -1,
+  /* Callee returns and passes 256bit AVX register.  */
+  callee_return_pass_avx256,
+  /* Callee passes 256bit AVX register.  */
+  callee_pass_avx256,
+  /* Callee doesn't return nor passe 256bit AVX register, or no
+     256bit AVX register in function return.  */
+  call_no_avx256,
+  /* vzeroupper intrinsic.  */
+  vzeroupper_intrinsic
+};
+
+/* Check if a 256bit AVX register is referenced in stores.   */
+
+static void
+check_avx256_stores (rtx dest, const_rtx set, void *data)
+{
+  if ((REG_P (dest)
+       && VALID_AVX256_REG_MODE (GET_MODE (dest)))
+      || (GET_CODE (set) == SET
+	  && REG_P (SET_SRC (set))
+	  && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
+    {
+      enum upper_128bits_state *state
+	= (enum upper_128bits_state *) data;
+      *state = used;
+    }
+}
+
+/* Helper function for move_or_delete_vzeroupper_1.  Look for vzeroupper
+   in basic block BB.  Delete it if upper 128bit AVX registers are
+   unused.  If it isn't deleted, move it to just before a jump insn.
+   
+   STATE is state of the upper 128bits of AVX registers at entry.  */
+
+static void
+move_or_delete_vzeroupper_2 (basic_block bb,
+			     enum upper_128bits_state state)
+{
+  rtx insn, bb_end;
+  rtx vzeroupper_insn = NULL_RTX;
+  rtx pat;
+  int avx256;
+  bool unchanged;
+
+  if (BLOCK_INFO (bb)->unchanged)
+    {
+      if (dump_file)
+	fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
+		 bb->index, state);
+
+      BLOCK_INFO (bb)->state = state;
+      return;
+    }
+
+  if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
+    {
+      if (dump_file)
+	fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
+		 bb->index, BLOCK_INFO (bb)->state);
+      return;
+    }
+
+  BLOCK_INFO (bb)->prev = state;
+
+  if (dump_file)
+    fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
+	     bb->index, state);
+
+  unchanged = true;
+
+  /* BB_END changes when it is deleted.  */
+  bb_end = BB_END (bb);
+  insn = BB_HEAD (bb);
+  while (insn != bb_end)
+    {
+      insn = NEXT_INSN (insn);
+
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+
+      /* Move vzeroupper before jump/call.  */
+      if (JUMP_P (insn) || CALL_P (insn))
+	{
+	  if (!vzeroupper_insn)
+	    continue;
+
+	  if (PREV_INSN (insn) != vzeroupper_insn)
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "Move vzeroupper after:\n");
+		  print_rtl_single (dump_file, PREV_INSN (insn));
+		  fprintf (dump_file, "before:\n");
+		  print_rtl_single (dump_file, insn);
+		}
+	      reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
+				  PREV_INSN (insn));
+	    }
+	  vzeroupper_insn = NULL_RTX;
+	  continue;
+	}
+
+      pat = PATTERN (insn);
+
+      /* Check insn for vzeroupper intrinsic.  */
+      if (GET_CODE (pat) == UNSPEC_VOLATILE
+	  && XINT (pat, 1) == UNSPECV_VZEROUPPER)
+	{
+	  if (dump_file)
+	    {
+	      /* Found vzeroupper intrinsic.  */
+	      fprintf (dump_file, "Found vzeroupper:\n");
+	      print_rtl_single (dump_file, insn);
+	    }
+	}
+      else
+	{
+	  /* Check insn for vzeroall intrinsic.  */
+	  if (GET_CODE (pat) == PARALLEL
+	      && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
+	      && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
+	    {
+	      state = unused;
+	      unchanged = false;
+
+	      /* Delete pending vzeroupper insertion.  */
+	      if (vzeroupper_insn)
+		{
+		  delete_insn (vzeroupper_insn);
+		  vzeroupper_insn = NULL_RTX;
+		}
+	    }
+	  else if (state != used)
+	    {
+	      note_stores (pat, check_avx256_stores, &state);
+	      if (state == used)
+		unchanged = false;
+	    }
+	  continue;
+	}
+
+      /* Process vzeroupper intrinsic.  */
+      avx256 = INTVAL (XVECEXP (pat, 0, 0));
+
+      if (state == unused)
+	{
+	  /* Since the upper 128bits are cleared, callee must not pass
+	     256bit AVX register.  We only need to check if callee
+	     returns 256bit AVX register.  */
+	  if (avx256 == callee_return_avx256)
+	    {
+	      state = used;
+	      unchanged = false;
+	    }
+
+	  /* Remove unnecessary vzeroupper since upper 128bits are
+	     cleared.  */
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, "Delete redundant vzeroupper:\n");
+	      print_rtl_single (dump_file, insn);
+	    }
+	  delete_insn (insn);
+	}
+      else
+	{
+	  /* Set state to UNUSED if callee doesn't return 256bit AVX
+	     register.  */
+	  if (avx256 != callee_return_pass_avx256)
+	    state = unused;
+
+	  if (avx256 == callee_return_pass_avx256
+	      || avx256 == callee_pass_avx256)
+	    {
+	      /* Must remove vzeroupper since callee passes in 256bit
+		 AVX register.  */
+	      if (dump_file)
+		{
+		  fprintf (dump_file, "Delete callee pass vzeroupper:\n");
+		  print_rtl_single (dump_file, insn);
+		}
+	      delete_insn (insn);
+	    }
+	  else
+	    {
+	      vzeroupper_insn = insn;
+	      unchanged = false;
+	    }
+	}
+    }
+
+  BLOCK_INFO (bb)->state = state;
+  BLOCK_INFO (bb)->unchanged = unchanged;
+  BLOCK_INFO (bb)->scanned = true;
+
+  if (dump_file)
+    fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
+	     bb->index, unchanged ? "unchanged" : "changed",
+	     state);
+}
+
+/* Helper function for move_or_delete_vzeroupper.  Process vzeroupper
+   in BLOCK and check its predecessor blocks.  Treat UNKNOWN state
+   as USED if UNKNOWN_IS_UNUSED is true.  Return TRUE if the exit
+   state is changed.  */
+
+static bool
+move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
+{
+  edge e;
+  edge_iterator ei;
+  enum upper_128bits_state state, old_state, new_state;
+  bool seen_unknown;
+
+  if (dump_file)
+    fprintf (dump_file, " Process [bb %i]: status: %d\n",
+	     block->index, BLOCK_INFO (block)->processed);
+
+  if (BLOCK_INFO (block)->processed)
+    return false;
+
+  state = unused;
+
+  /* Check all predecessor edges of this block.  */
+  seen_unknown = false;
+  FOR_EACH_EDGE (e, ei, block->preds)
+    {
+      if (e->src == block)
+	continue;
+      switch (BLOCK_INFO (e->src)->state)
+	{
+	case unknown:
+	  if (!unknown_is_unused)
+	    seen_unknown = true;
+	case unused:
+	  break;
+	case used:
+	  state = used;
+	  goto done;
+	}
+    }
+
+  if (seen_unknown)
+    state = unknown;
+
+done:
+  old_state = BLOCK_INFO (block)->state;
+  move_or_delete_vzeroupper_2 (block, state);
+  new_state = BLOCK_INFO (block)->state;
+
+  if (state != unknown || new_state == used)
+    BLOCK_INFO (block)->processed = true;
+
+  /* Need to rescan if the upper 128bits of AVX registers are changed
+     to USED at exit.  */
+  if (new_state != old_state)
+    {
+      if (new_state == used)
+	cfun->machine->rescan_vzeroupper_p = 1;
+      return true;
+    }
+  else
+    return false;
+}
+
+/* Go through the instruction stream looking for vzeroupper.  Delete
+   it if upper 128bit AVX registers are unused.  If it isn't deleted,
+   move it to just before a jump insn.  */
+
+static void
+move_or_delete_vzeroupper (void)
+{
+  edge e;
+  edge_iterator ei;
+  basic_block bb;
+  fibheap_t worklist, pending, fibheap_swap;
+  sbitmap visited, in_worklist, in_pending, sbitmap_swap;
+  int *bb_order;
+  int *rc_order;
+  int i;
+
+  /* Set up block info for each basic block.  */
+  alloc_aux_for_blocks (sizeof (struct block_info_def));
+
+  /* Process outgoing edges of entry point.  */
+  if (dump_file)
+    fprintf (dump_file, "Process outgoing edges of entry point\n");
+
+  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
+    {
+      move_or_delete_vzeroupper_2 (e->dest,
+				   cfun->machine->caller_pass_avx256_p
+				   ? used : unused);
+      BLOCK_INFO (e->dest)->processed = true;
+    }
+
+  /* Compute reverse completion order of depth first search of the CFG
+     so that the data-flow runs faster.  */
+  rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
+  bb_order = XNEWVEC (int, last_basic_block);
+  pre_and_rev_post_order_compute (NULL, rc_order, false);
+  for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
+    bb_order[rc_order[i]] = i;
+  free (rc_order);
+
+  worklist = fibheap_new ();
+  pending = fibheap_new ();
+  visited = sbitmap_alloc (last_basic_block);
+  in_worklist = sbitmap_alloc (last_basic_block);
+  in_pending = sbitmap_alloc (last_basic_block);
+  sbitmap_zero (in_worklist);
+
+  /* Don't check outgoing edges of entry point.  */
+  sbitmap_ones (in_pending);
+  FOR_EACH_BB (bb)
+    if (BLOCK_INFO (bb)->processed)
+      RESET_BIT (in_pending, bb->index);
+    else
+      {
+	move_or_delete_vzeroupper_1 (bb, false);
+	fibheap_insert (pending, bb_order[bb->index], bb);
+      }
+
+  if (dump_file)
+    fprintf (dump_file, "Check remaining basic blocks\n");
+
+  while (!fibheap_empty (pending))
+    {
+      fibheap_swap = pending;
+      pending = worklist;
+      worklist = fibheap_swap;
+      sbitmap_swap = in_pending;
+      in_pending = in_worklist;
+      in_worklist = sbitmap_swap;
+
+      sbitmap_zero (visited);
+
+      cfun->machine->rescan_vzeroupper_p = 0;
+
+      while (!fibheap_empty (worklist))
+	{
+	  bb = (basic_block) fibheap_extract_min (worklist);
+	  RESET_BIT (in_worklist, bb->index);
+	  gcc_assert (!TEST_BIT (visited, bb->index));
+	  if (!TEST_BIT (visited, bb->index))
+	    {
+	      edge_iterator ei;
+
+	      SET_BIT (visited, bb->index);
+
+	      if (move_or_delete_vzeroupper_1 (bb, false))
+		FOR_EACH_EDGE (e, ei, bb->succs)
+		  {
+		    if (e->dest == EXIT_BLOCK_PTR
+			|| BLOCK_INFO (e->dest)->processed)
+		      continue;
+
+		    if (TEST_BIT (visited, e->dest->index))
+		      {
+			if (!TEST_BIT (in_pending, e->dest->index))
+			  {
+			    /* Send E->DEST to next round.  */
+			    SET_BIT (in_pending, e->dest->index);
+			    fibheap_insert (pending,
+					    bb_order[e->dest->index],
+					    e->dest);
+			  }
+		      }
+		    else if (!TEST_BIT (in_worklist, e->dest->index))
+		      {
+			/* Add E->DEST to current round.  */
+			SET_BIT (in_worklist, e->dest->index);
+			fibheap_insert (worklist, bb_order[e->dest->index],
+					e->dest);
+		      }
+		  }
+	    }
+	}
+
+      if (!cfun->machine->rescan_vzeroupper_p)
+	break;
+    }
+
+  free (bb_order);
+  fibheap_delete (worklist);
+  fibheap_delete (pending);
+  sbitmap_free (visited);
+  sbitmap_free (in_worklist);
+  sbitmap_free (in_pending);
+
+  if (dump_file)
+    fprintf (dump_file, "Process remaining basic blocks\n");
+
+  FOR_EACH_BB (bb)
+    move_or_delete_vzeroupper_1 (bb, true);
+
+  free_aux_for_blocks ();
+}
+
+static rtx legitimize_dllimport_symbol (rtx, bool);
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT (-1)
+#endif
+
+/* Return index of given mode in mult and division cost tables.  */
+#define MODE_INDEX(mode)					\
+  ((mode) == QImode ? 0						\
+   : (mode) == HImode ? 1					\
+   : (mode) == SImode ? 2					\
+   : (mode) == DImode ? 3					\
+   : 4)
+
+/* Processor costs (relative to an add) */
+/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
+#define COSTS_N_BYTES(N) ((N) * 2)
+
+#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
+
+const
+struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+  COSTS_N_BYTES (2),			/* cost of an add instruction */
+  COSTS_N_BYTES (3),			/* cost of a lea instruction */
+  COSTS_N_BYTES (2),			/* variable shift costs */
+  COSTS_N_BYTES (3),			/* constant shift costs */
+  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
+   COSTS_N_BYTES (3),			/*				 HI */
+   COSTS_N_BYTES (3),			/*				 SI */
+   COSTS_N_BYTES (3),			/*				 DI */
+   COSTS_N_BYTES (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
+   COSTS_N_BYTES (3),			/*			    HI */
+   COSTS_N_BYTES (3),			/*			    SI */
+   COSTS_N_BYTES (3),			/*			    DI */
+   COSTS_N_BYTES (5)},			/*			    other */
+  COSTS_N_BYTES (3),			/* cost of movsx */
+  COSTS_N_BYTES (3),			/* cost of movzx */
+  0,					/* "large" insn */
+  2,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {2, 2, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 2},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {2, 2, 2},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  3,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {3, 3},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  3,					/* cost of moving SSE register */
+  {3, 3, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {3, 3, 3},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  0,					/* size of l1 cache  */
+  0,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
+  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
+  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  1,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  1,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Processor costs (relative to an add) */
+static const
+struct processor_costs i386_cost = {	/* 386 specific costs */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (6),			/*				 HI */
+   COSTS_N_INSNS (6),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*			    HI */
+   COSTS_N_INSNS (23),			/*			    SI */
+   COSTS_N_INSNS (23),			/*			    DI */
+   COSTS_N_INSNS (23)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  15,					/* "large" insn */
+  3,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {8, 8, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {8, 8, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  0,					/* size of l1 cache  */
+  0,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
+  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs i486_cost = {	/* 486 specific costs */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (12),			/*				 HI */
+   COSTS_N_INSNS (12),			/*				 SI */
+   COSTS_N_INSNS (12),			/*				 DI */
+   COSTS_N_INSNS (12)},			/*			      other */
+  1,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (40),			/*			    HI */
+   COSTS_N_INSNS (40),			/*			    SI */
+   COSTS_N_INSNS (40),			/*			    DI */
+   COSTS_N_INSNS (40)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  15,					/* "large" insn */
+  3,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {8, 8, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {8, 8, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  4,					/* size of l1 cache.  486 has 8kB cache
+					   shared for code and data, so 4kB is
+					   not really precise.  */
+  4,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
+  {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs pentium_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (11),			/*				 HI */
+   COSTS_N_INSNS (11),			/*				 SI */
+   COSTS_N_INSNS (11),			/*				 DI */
+   COSTS_N_INSNS (11)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (25),			/*			    HI */
+   COSTS_N_INSNS (25),			/*			    SI */
+   COSTS_N_INSNS (25),			/*			    DI */
+   COSTS_N_INSNS (25)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  6,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  8,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  8,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
+  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  {{libcall, {{-1, rep_prefix_4_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs pentiumpro_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (4)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (17),			/*			    HI */
+   COSTS_N_INSNS (17),			/*			    SI */
+   COSTS_N_INSNS (17),			/*			    DI */
+   COSTS_N_INSNS (17)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache  */
+  32,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+     (we ensure the alignment).  For small blocks inline loop is still a
+     noticeable win, for bigger blocks either rep movsl or rep movsb is
+     way to go.  Rep movsb has apparently more expensive startup time in CPU,
+     but after 4K the difference is down in the noise.  */
+  {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
+			{8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  {{rep_prefix_4_byte, {{1024, unrolled_loop},
+  			{8192, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs geode_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (2),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (7),			/*				 SI */
+   COSTS_N_INSNS (7),			/*				 DI */
+   COSTS_N_INSNS (7)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*			    HI */
+   COSTS_N_INSNS (39),			/*			    SI */
+   COSTS_N_INSNS (39),			/*			    DI */
+   COSTS_N_INSNS (39)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  4,					/* MOVE_RATIO */
+  1,				     /* cost for loading QImode using movzbl */
+  {1, 1, 1},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {1, 1, 1},				/* cost of storing integer registers */
+  1,					/* cost of reg,reg fld/fst */
+  {1, 1, 1},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 6, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+
+  1,					/* cost of moving MMX register */
+  {1, 1},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {1, 1},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  1,					/* cost of moving SSE register */
+  {1, 1, 1},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {1, 1, 1},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  1,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  128,					/* size of l2 cache.  */
+  32,					/* size of prefetch block */
+  1,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
+  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs k6_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (3),			/*				 DI */
+   COSTS_N_INSNS (3)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (18),			/*			    HI */
+   COSTS_N_INSNS (18),			/*			    SI */
+   COSTS_N_INSNS (18),			/*			    DI */
+   COSTS_N_INSNS (18)},			/*			    other */
+  COSTS_N_INSNS (2),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  4,					/* MOVE_RATIO */
+  3,				     /* cost for loading QImode using movzbl */
+  {4, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 3, 2},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {6, 6, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 4},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  6,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  32,					/* size of l2 cache.  Some models
+					   have integrated l2 cache, but
+					   optimizing for k6 is not important
+					   enough to worry about that.  */
+  32,					/* size of prefetch block */
+  1,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs athlon_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (5),			/*				 HI */
+   COSTS_N_INSNS (5),			/*				 SI */
+   COSTS_N_INSNS (5),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 6},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  5,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  /* For some reason, Athlon deals better with REP prefix (relative to loops)
+     compared to K8. Alignment becomes important after 8 bytes for memcpy and
+     128 bytes for memset.  */
+  {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs k8_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 3, 6},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  /* K8 has optimized REP instruction for medium sized blocks, but for very
+     small blocks it is better to use loop. For large blocks, libcall can
+     do nontemporary accesses and beat inline considerably.  */
+  {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {24, unrolled_loop},
+	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  5,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  3,					/* vec_unalign_load_cost.  */
+  3,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+struct processor_costs amdfam10_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  					/* On K8:
+  					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+
+  /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
+     very small blocks it is better to use loop. For large blocks, libcall can
+     do nontemporary accesses and beat inline considerably.  */
+  {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {24, unrolled_loop},
+	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+struct processor_costs bdver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  					/* On K8:
+					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+
+  /*  BDVER1 has optimized REP instruction for medium sized blocks, but for
+      very small blocks it is better to use loop. For large blocks, libcall
+      can do nontemporary accesses and beat inline considerably.  */
+  {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {24, unrolled_loop},
+	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+struct processor_costs btver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+					/* On K8:
+					   MOVD reg64, xmmreg Double FSTORE 4
+					   MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					   MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+
+  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
+     very small blocks it is better to use loop. For large blocks, libcall can
+     do nontemporary accesses and beat inline considerably.  */
+  {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {24, unrolled_loop},
+	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs pentium4_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (3),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (4),			/* constant shift costs */
+  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (15),			/*				 HI */
+   COSTS_N_INSNS (15),			/*				 SI */
+   COSTS_N_INSNS (15),			/*				 DI */
+   COSTS_N_INSNS (15)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (56),			/*			    HI */
+   COSTS_N_INSNS (56),			/*			    SI */
+   COSTS_N_INSNS (56),			/*			    DI */
+   COSTS_N_INSNS (56)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  16,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 3, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  12,					/* cost of moving SSE register */
+  {12, 12, 12},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  10,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
+  {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
+   DUMMY_STRINGOP_ALGS},
+  {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
+   {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs nocona_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (10),			/*				 HI */
+   COSTS_N_INSNS (10),			/*				 SI */
+   COSTS_N_INSNS (10),			/*				 DI */
+   COSTS_N_INSNS (10)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (66),			/*			    HI */
+   COSTS_N_INSNS (66),			/*			    SI */
+   COSTS_N_INSNS (66),			/*			    DI */
+   COSTS_N_INSNS (66)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  16,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  3,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 4},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  6,					/* cost of moving MMX register */
+  {12, 12},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {12, 12},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  6,					/* cost of moving SSE register */
+  {12, 12, 12},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {12, 12, 12},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  8,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  1024,					/* size of l2 cache.  */
+  128,					/* size of prefetch block */
+  8,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
+  {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
+	      {100000, unrolled_loop}, {-1, libcall}}}},
+  {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
+   {-1, libcall}}},
+   {libcall, {{24, loop}, {64, unrolled_loop},
+	      {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs atom_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+	  {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {{libcall, {{8, loop}, {15, unrolled_loop},
+	  {2048, rep_prefix_4_byte}, {-1, libcall}}},
+   {libcall, {{24, loop}, {32, unrolled_loop},
+	  {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Generic64 should produce code tuned for Nocona and K8.  */
+static const
+struct processor_costs generic64_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  /* On all chips taken into consideration lea is 2 cycles and more.  With
+     this cost however our current implementation of synth_mult results in
+     use of unnecessary temporary registers causing regression on several
+     SPECfp benchmarks.  */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
+     value is increased to perhaps more appropriate value of 5.  */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  {DUMMY_STRINGOP_ALGS,
+   {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  {DUMMY_STRINGOP_ALGS,
+   {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
+   Athlon and K8.  */
+static const
+struct processor_costs generic32_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
+   DUMMY_STRINGOP_ALGS},
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+const struct processor_costs *ix86_cost = &pentium_cost;
+
+/* Processor feature/optimization bitmasks.  */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
+#define m_NOCONA  (1<<PROCESSOR_NOCONA)
+#define m_CORE2_32  (1<<PROCESSOR_CORE2_32)
+#define m_CORE2_64  (1<<PROCESSOR_CORE2_64)
+#define m_COREI7_32  (1<<PROCESSOR_COREI7_32)
+#define m_COREI7_64  (1<<PROCESSOR_COREI7_64)
+#define m_COREI7  (m_COREI7_32 | m_COREI7_64)
+#define m_CORE2I7_32  (m_CORE2_32 | m_COREI7_32)
+#define m_CORE2I7_64  (m_CORE2_64 | m_COREI7_64)
+#define m_CORE2I7  (m_CORE2I7_32 | m_CORE2I7_64)
+#define m_ATOM  (1<<PROCESSOR_ATOM)
+
+#define m_GEODE  (1<<PROCESSOR_GEODE)
+#define m_K6  (1<<PROCESSOR_K6)
+#define m_K6_GEODE  (m_K6 | m_GEODE)
+#define m_K8  (1<<PROCESSOR_K8)
+#define m_ATHLON  (1<<PROCESSOR_ATHLON)
+#define m_ATHLON_K8  (m_K8 | m_ATHLON)
+#define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
+#define m_BDVER1  (1<<PROCESSOR_BDVER1)
+#define m_BTVER1  (1<<PROCESSOR_BTVER1)
+#define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
+
+#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
+#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
+
+/* Generic instruction choice should be common subset of supported CPUs
+   (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
+#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
+
+/* Feature tests against the various tunings.  */
+unsigned char ix86_tune_features[X86_TUNE_LAST];
+
+/* Feature tests against the various tunings used to create ix86_tune_features
+   based on the processor mask.  */
+static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
+  /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
+     negatively, so enabling for Generic64 seems like good code size
+     tradeoff.  We can't enable it for 32bit generic because it does not
+     work well with PPro base chips.  */
+  m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
+
+  /* X86_TUNE_PUSH_MEMORY */
+  m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
+  | m_NOCONA | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_ZERO_EXTEND_WITH_AND */
+  m_486 | m_PENT,
+
+  /* X86_TUNE_UNROLL_STRLEN */
+  m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
+  | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
+  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
+  | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+     on simulation result. But after P4 was made, no performance benefit
+     was observed with branch hints.  It also increases the code size.
+     As a result, icc never generates branch hints.  */
+  0,
+
+  /* X86_TUNE_DOUBLE_WITH_ADD */
+  ~m_386,
+
+  /* X86_TUNE_USE_SAHF */
+  m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
+  | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
+     partial dependencies.  */
+  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
+  | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
+
+  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
+     register stalls on Generic32 compilation setting as well.  However
+     in current implementation the partial register stalls are not eliminated
+     very well - they can be introduced via subregs synthesized by combine
+     and can happen in caller/callee saving sequences.  Because this option
+     pays back little on PPro based chips and is in conflict with partial reg
+     dependencies used by Athlon/P4 based chips, it is better to leave it off
+     for generic32 for now.  */
+  m_PPRO,
+
+  /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
+  m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_USE_HIMODE_FIOP */
+  m_386 | m_486 | m_K6_GEODE,
+
+  /* X86_TUNE_USE_SIMODE_FIOP */
+  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
+
+  /* X86_TUNE_USE_MOV0 */
+  m_K6,
+
+  /* X86_TUNE_USE_CLTD */
+  ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
+
+  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
+  m_PENT4,
+
+  /* X86_TUNE_SPLIT_LONG_MOVES */
+  m_PPRO,
+
+  /* X86_TUNE_READ_MODIFY_WRITE */
+  ~m_PENT,
+
+  /* X86_TUNE_READ_MODIFY */
+  ~(m_PENT | m_PPRO),
+
+  /* X86_TUNE_PROMOTE_QIMODE */
+  m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
+  | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
+
+  /* X86_TUNE_FAST_PREFIX */
+  ~(m_PENT | m_486 | m_386),
+
+  /* X86_TUNE_SINGLE_STRINGOP */
+  m_386 | m_PENT4 | m_NOCONA,
+
+  /* X86_TUNE_QIMODE_MATH */
+  ~0,
+
+  /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
+     register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
+     might be considered for Generic32 if our scheme for avoiding partial
+     stalls was more effective.  */
+  ~m_PPRO,
+
+  /* X86_TUNE_PROMOTE_QI_REGS */
+  0,
+
+  /* X86_TUNE_PROMOTE_HI_REGS */
+  m_PPRO,
+
+  /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
+     over esp addition.  */
+  m_386 | m_486 | m_PENT | m_PPRO,
+
+  /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
+     over esp addition.  */
+  m_PENT,
+
+  /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
+     over esp subtraction.  */
+  m_386 | m_486 | m_PENT | m_K6_GEODE,
+
+  /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
+     over esp subtraction.  */
+  m_PENT | m_K6_GEODE,
+
+  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
+     for DFmode copies */
+  ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
+    | m_GENERIC | m_GEODE),
+
+  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
+  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
+     conflict here in between PPro/Pentium4 based chips that thread 128bit
+     SSE registers as single units versus K8 based chips that divide SSE
+     registers to two 64bit halves.  This knob promotes all store destinations
+     to be 128bit to allow register renaming on 128bit SSE units, but usually
+     results in one extra microop on 64bit SSE units.  Experimental results
+     shows that disabling this option on P4 brings over 20% SPECfp regression,
+     while enabling it on K8 brings roughly 2.4% regression that can be partly
+     masked by careful scheduling of moves.  */
+  m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
+  | m_AMDFAM10 | m_BDVER1,
+
+  /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
+  m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
+
+  /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
+  m_BDVER1 | m_COREI7,
+
+  /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
+  m_BDVER1,
+
+  /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
+     are resolved on SSE register parts instead of whole registers, so we may
+     maintain just lower part of scalar values in proper format leaving the
+     upper part undefined.  */
+  m_ATHLON_K8,
+
+  /* X86_TUNE_SSE_TYPELESS_STORES */
+  m_AMD_MULTIPLE,
+
+  /* X86_TUNE_SSE_LOAD0_BY_PXOR */
+  m_PPRO | m_PENT4 | m_NOCONA,
+
+  /* X86_TUNE_MEMORY_MISMATCH_STALL */
+  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_PROLOGUE_USING_MOVE */
+  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_EPILOGUE_USING_MOVE */
+  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_SHIFT1 */
+  ~m_486,
+
+  /* X86_TUNE_USE_FFREEP */
+  m_AMD_MULTIPLE,
+
+  /* X86_TUNE_INTER_UNIT_MOVES */
+  ~(m_AMD_MULTIPLE | m_GENERIC),
+
+  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
+  ~(m_AMDFAM10 | m_BDVER1),
+
+  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
+     than 4 branch instructions in the 16 byte window.  */
+  m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
+  | m_GENERIC,
+
+  /* X86_TUNE_SCHEDULE */
+  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
+  | m_GENERIC,
+
+  /* X86_TUNE_USE_BT */
+  m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_USE_INCDEC */
+  ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
+
+  /* X86_TUNE_PAD_RETURNS */
+  m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion.  */
+  m_ATOM,
+
+  /* X86_TUNE_EXT_80387_CONSTANTS */
+  m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
+  | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_SHORTEN_X87_SSE */
+  ~m_K8,
+
+  /* X86_TUNE_AVOID_VECTOR_DECODE */
+  m_K8 | m_CORE2I7_64 | m_GENERIC64,
+
+  /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
+     and SImode multiply, but 386 and 486 do HImode multiply faster.  */
+  ~(m_386 | m_486),
+
+  /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
+     vector path on AMD machines.  */
+  m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
+
+  /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
+     machines.  */
+  m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
+
+  /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
+     than a MOV.  */
+  m_PENT,
+
+  /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
+     but one byte longer.  */
+  m_PENT,
+
+  /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
+     operand that cannot be represented using a modRM byte.  The XOR
+     replacement is long decoded, so this split helps here as well.  */
+  m_K6,
+
+  /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
+     from FP to FP. */
+  m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
+
+  /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
+     from integer to FP. */
+  m_AMDFAM10,
+
+  /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
+     with a subsequent conditional jump instruction into a single
+     compare-and-branch uop.  */
+  m_BDVER1,
+
+  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+     will impact LEA instruction selection. */
+  m_ATOM,
+
+  /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
+     instructions.  */
+  ~m_ATOM,
+
+  /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
+     the auto-vectorizer.  */
+  m_BDVER1,
+};
+
+/* Feature tests against the various architecture variations.  */
+unsigned char ix86_arch_features[X86_ARCH_LAST];
+
+/* Feature tests against the various architecture variations, used to create
+   ix86_arch_features based on the processor mask.  */
+static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
+  /* X86_ARCH_CMOV: Conditional move was added for pentiumpro.  */
+  ~(m_386 | m_486 | m_PENT | m_K6),
+
+  /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
+  ~m_386,
+
+  /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
+  ~(m_386 | m_486),
+
+  /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
+  ~m_386,
+
+  /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
+  ~m_386,
+};
+
+static const unsigned int x86_accumulate_outgoing_args
+  = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
+    | m_GENERIC;
+
+static const unsigned int x86_arch_always_fancy_math_387
+  = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
+    | m_NOCONA | m_CORE2I7 | m_GENERIC;
+
+static const unsigned int x86_avx256_split_unaligned_load
+  = m_COREI7 | m_GENERIC;
+
+static const unsigned int x86_avx256_split_unaligned_store
+  = m_COREI7 | m_BDVER1 | m_GENERIC;
+
+static enum stringop_alg stringop_alg = no_stringop;
+
+/* In case the average insn count for single function invocation is
+   lower than this constant, emit fast (but longer) prologue and
+   epilogue code.  */
+#define FAST_PROLOGUE_INSN_COUNT 20
+
+/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
+static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
+static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
+static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
+
+/* Array of the smallest class containing reg number REGNO, indexed by
+   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
+
+enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
+{
+  /* ax, dx, cx, bx */
+  AREG, DREG, CREG, BREG,
+  /* si, di, bp, sp */
+  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
+  /* FP registers */
+  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
+  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
+  /* arg pointer */
+  NON_Q_REGS,
+  /* flags, fpsr, fpcr, frame */
+  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
+  /* SSE registers */
+  SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+  SSE_REGS, SSE_REGS,
+  /* MMX registers */
+  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
+  MMX_REGS, MMX_REGS,
+  /* REX registers */
+  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+  /* SSE REX registers */
+  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+  SSE_REGS, SSE_REGS,
+};
+
+/* The "default" register map used in 32bit mode.  */
+
+int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
+  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
+  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
+  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
+  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
+};
+
+/* The "default" register map used in 64bit mode.  */
+
+int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
+  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
+  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
+  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
+  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
+  8,9,10,11,12,13,14,15,		/* extended integer registers */
+  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
+};
+
+/* Define the register numbers to be used in Dwarf debugging information.
+   The SVR4 reference port C compiler uses the following register numbers
+   in its Dwarf output code:
+	0 for %eax (gcc regno = 0)
+	1 for %ecx (gcc regno = 2)
+	2 for %edx (gcc regno = 1)
+	3 for %ebx (gcc regno = 3)
+	4 for %esp (gcc regno = 7)
+	5 for %ebp (gcc regno = 6)
+	6 for %esi (gcc regno = 4)
+	7 for %edi (gcc regno = 5)
+   The following three DWARF register numbers are never generated by
+   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
+   believes these numbers have these meanings.
+	8  for %eip    (no gcc equivalent)
+	9  for %eflags (gcc regno = 17)
+	10 for %trapno (no gcc equivalent)
+   It is not at all clear how we should number the FP stack registers
+   for the x86 architecture.  If the version of SDB on x86/svr4 were
+   a bit less brain dead with respect to floating-point then we would
+   have a precedent to follow with respect to DWARF register numbers
+   for x86 FP registers, but the SDB on x86/svr4 is so completely
+   broken with respect to FP registers that it is hardly worth thinking
+   of it as something to strive for compatibility with.
+   The version of x86/svr4 SDB I have at the moment does (partially)
+   seem to believe that DWARF register number 11 is associated with
+   the x86 register %st(0), but that's about all.  Higher DWARF
+   register numbers don't seem to be associated with anything in
+   particular, and even for DWARF regno 11, SDB only seems to under-
+   stand that it should say that a variable lives in %st(0) (when
+   asked via an `=' command) if we said it was in DWARF regno 11,
+   but SDB still prints garbage when asked for the value of the
+   variable in question (via a `/' command).
+   (Also note that the labels SDB prints for various FP stack regs
+   when doing an `x' command are all wrong.)
+   Note that these problems generally don't affect the native SVR4
+   C compiler because it doesn't allow the use of -O with -g and
+   because when it is *not* optimizing, it allocates a memory
+   location for each floating-point variable, and the memory
+   location is what gets described in the DWARF AT_location
+   attribute for the variable in question.
+   Regardless of the severe mental illness of the x86/svr4 SDB, we
+   do something sensible here and we use the following DWARF
+   register numbers.  Note that these are all stack-top-relative
+   numbers.
+	11 for %st(0) (gcc regno = 8)
+	12 for %st(1) (gcc regno = 9)
+	13 for %st(2) (gcc regno = 10)
+	14 for %st(3) (gcc regno = 11)
+	15 for %st(4) (gcc regno = 12)
+	16 for %st(5) (gcc regno = 13)
+	17 for %st(6) (gcc regno = 14)
+	18 for %st(7) (gcc regno = 15)
+*/
+int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
+  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
+  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
+  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
+  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
+  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
+};
+
+/* Define parameter passing and return registers.  */
+
+static int const x86_64_int_parameter_registers[6] =
+{
+  DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_ms_abi_int_parameter_registers[4] =
+{
+  CX_REG, DX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_int_return_registers[4] =
+{
+  AX_REG, DX_REG, DI_REG, SI_REG
+};
+
+/* Define the structure for the machine field in struct function.  */
+
+struct GTY(()) stack_local_entry {
+  unsigned short mode;
+  unsigned short n;
+  rtx rtl;
+  struct stack_local_entry *next;
+};
+
+/* Structure describing stack frame layout.
+   Stack grows downward:
+
+   [arguments]
+					<- ARG_POINTER
+   saved pc
+
+   saved static chain			if ix86_static_chain_on_stack
+
+   saved frame pointer			if frame_pointer_needed
+					<- HARD_FRAME_POINTER
+   [saved regs]
+					<- regs_save_offset
+   [padding0]
+
+   [saved SSE regs]
+					<- sse_regs_save_offset
+   [padding1]          |
+		       |		<- FRAME_POINTER
+   [va_arg registers]  |
+		       |
+   [frame]	       |
+		       |
+   [padding2]	       | = to_allocate
+					<- STACK_POINTER
+  */
+struct ix86_frame
+{
+  int nsseregs;
+  int nregs;
+  int va_arg_size;
+  int red_zone_size;
+  int outgoing_arguments_size;
+  HOST_WIDE_INT frame;
+
+  /* The offsets relative to ARG_POINTER.  */
+  HOST_WIDE_INT frame_pointer_offset;
+  HOST_WIDE_INT hard_frame_pointer_offset;
+  HOST_WIDE_INT stack_pointer_offset;
+  HOST_WIDE_INT hfp_save_offset;
+  HOST_WIDE_INT reg_save_offset;
+  HOST_WIDE_INT sse_reg_save_offset;
+
+  /* When save_regs_using_mov is set, emit prologue using
+     move instead of push instructions.  */
+  bool save_regs_using_mov;
+};
+
+/* Code model option.  */
+enum cmodel ix86_cmodel;
+/* Asm dialect.  */
+enum asm_dialect ix86_asm_dialect = ASM_ATT;
+/* TLS dialects.  */
+enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
+
+/* Which unit we are generating floating point math for.  */
+enum fpmath_unit ix86_fpmath;
+
+/* Which cpu are we scheduling for.  */
+enum attr_cpu ix86_schedule;
+
+/* Which cpu are we optimizing for.  */
+enum processor_type ix86_tune;
+
+/* Which instruction set architecture to use.  */
+enum processor_type ix86_arch;
+
+/* true if sse prefetch instruction is not NOOP.  */
+int x86_prefetch_sse;
+
+/* ix86_regparm_string as a number */
+static int ix86_regparm;
+
+/* -mstackrealign option */
+static const char ix86_force_align_arg_pointer_string[]
+  = "force_align_arg_pointer";
+
+static rtx (*ix86_gen_leave) (void);
+static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
+static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
+static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
+static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
+static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
+static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
+static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
+
+/* Preferred alignment for stack boundary in bits.  */
+unsigned int ix86_preferred_stack_boundary;
+
+/* Alignment for incoming stack boundary in bits specified at
+   command line.  */
+static unsigned int ix86_user_incoming_stack_boundary;
+
+/* Default alignment for incoming stack boundary in bits.  */
+static unsigned int ix86_default_incoming_stack_boundary;
+
+/* Alignment for incoming stack boundary in bits.  */
+unsigned int ix86_incoming_stack_boundary;
+
+/* The abi used by target.  */
+enum calling_abi ix86_abi;
+
+/* Values 1-5: see jump.c */
+int ix86_branch_cost;
+
+/* Calling abi specific va_list type nodes.  */
+static GTY(()) tree sysv_va_list_type_node;
+static GTY(()) tree ms_va_list_type_node;
+
+/* Variables which are this size or smaller are put in the data/bss
+   or ldata/lbss sections.  */
+
+int ix86_section_threshold = 65536;
+
+/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
+char internal_label_prefix[16];
+int internal_label_prefix_len;
+
+/* Fence to use after loop using movnt.  */
+tree x86_mfence;
+
+/* Register class used for passing given 64bit part of the argument.
+   These represent classes as documented by the PS ABI, with the exception
+   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
+   use SF or DFmode move instead of DImode to avoid reformatting penalties.
+
+   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
+   whenever possible (upper half does contain padding).  */
+enum x86_64_reg_class
+  {
+    X86_64_NO_CLASS,
+    X86_64_INTEGER_CLASS,
+    X86_64_INTEGERSI_CLASS,
+    X86_64_SSE_CLASS,
+    X86_64_SSESF_CLASS,
+    X86_64_SSEDF_CLASS,
+    X86_64_SSEUP_CLASS,
+    X86_64_X87_CLASS,
+    X86_64_X87UP_CLASS,
+    X86_64_COMPLEX_X87_CLASS,
+    X86_64_MEMORY_CLASS
+  };
+
+#define MAX_CLASSES 4
+
+/* Table of constants used by fldpi, fldln2, etc....  */
+static REAL_VALUE_TYPE ext_80387_constants_table [5];
+static bool ext_80387_constants_init = 0;
+
+
+static struct machine_function * ix86_init_machine_status (void);
+static rtx ix86_function_value (const_tree, const_tree, bool);
+static bool ix86_function_value_regno_p (const unsigned int);
+static unsigned int ix86_function_arg_boundary (enum machine_mode,
+						const_tree);
+static rtx ix86_static_chain (const_tree, bool);
+static int ix86_function_regparm (const_tree, const_tree);
+static void ix86_compute_frame_layout (struct ix86_frame *);
+static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
+						 rtx, rtx, int);
+static void ix86_add_new_builtins (int);
+static rtx ix86_expand_vec_perm_builtin (tree);
+static tree ix86_canonical_va_list_type (tree);
+static void predict_jump (int);
+static unsigned int split_stack_prologue_scratch_regno (void);
+static bool i386_asm_output_addr_const_extra (FILE *, rtx);
+
+enum ix86_function_specific_strings
+{
+  IX86_FUNCTION_SPECIFIC_ARCH,
+  IX86_FUNCTION_SPECIFIC_TUNE,
+  IX86_FUNCTION_SPECIFIC_FPMATH,
+  IX86_FUNCTION_SPECIFIC_MAX
+};
+
+static char *ix86_target_string (int, int, const char *, const char *,
+				 const char *, bool);
+static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
+static void ix86_function_specific_save (struct cl_target_option *);
+static void ix86_function_specific_restore (struct cl_target_option *);
+static void ix86_function_specific_print (FILE *, int,
+					  struct cl_target_option *);
+static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
+static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
+static bool ix86_can_inline_p (tree, tree);
+static void ix86_set_current_function (tree);
+static unsigned int ix86_minimum_incoming_stack_boundary (bool);
+
+static enum calling_abi ix86_function_abi (const_tree);
+
+
+#ifndef SUBTARGET32_DEFAULT_CPU
+#define SUBTARGET32_DEFAULT_CPU "i386"
+#endif
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  */
+#ifndef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+#endif
+
+/* Whether -mtune= or -march= were specified */
+static int ix86_tune_defaulted;
+static int ix86_arch_specified;
+
+/* A mask of ix86_isa_flags that includes bit X if X
+   was set or cleared on the command line.  */
+static int ix86_isa_flags_explicit;
+
+/* Define a set of ISAs which are available when a given ISA is
+   enabled.  MMX and SSE ISAs are handled separately.  */
+
+#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
+#define OPTION_MASK_ISA_3DNOW_SET \
+  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
+
+#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
+#define OPTION_MASK_ISA_SSE2_SET \
+  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
+#define OPTION_MASK_ISA_SSE3_SET \
+  (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SSSE3_SET \
+  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE4_1_SET \
+  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
+#define OPTION_MASK_ISA_SSE4_2_SET \
+  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+#define OPTION_MASK_ISA_AVX_SET \
+  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
+#define OPTION_MASK_ISA_FMA_SET \
+  (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+   as -msse4.2.  */
+#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
+
+#define OPTION_MASK_ISA_SSE4A_SET \
+  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_FMA4_SET \
+  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
+   | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_XOP_SET \
+  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
+#define OPTION_MASK_ISA_LWP_SET \
+  OPTION_MASK_ISA_LWP
+
+/* AES and PCLMUL need SSE2 because they use xmm registers */
+#define OPTION_MASK_ISA_AES_SET \
+  (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_PCLMUL_SET \
+  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
+
+#define OPTION_MASK_ISA_ABM_SET \
+  (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
+
+#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
+#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
+#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
+#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
+#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
+
+#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_F16C_SET \
+  (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
+
+/* Define a set of ISAs which aren't available when a given ISA is
+   disabled.  MMX and SSE ISAs are handled separately.  */
+
+#define OPTION_MASK_ISA_MMX_UNSET \
+  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_3DNOW_UNSET \
+  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
+#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
+
+#define OPTION_MASK_ISA_SSE_UNSET \
+  (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
+#define OPTION_MASK_ISA_SSE2_UNSET \
+  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
+#define OPTION_MASK_ISA_SSE3_UNSET \
+  (OPTION_MASK_ISA_SSE3 \
+   | OPTION_MASK_ISA_SSSE3_UNSET \
+   | OPTION_MASK_ISA_SSE4A_UNSET )
+#define OPTION_MASK_ISA_SSSE3_UNSET \
+  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
+#define OPTION_MASK_ISA_SSE4_1_UNSET \
+  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
+#define OPTION_MASK_ISA_SSE4_2_UNSET \
+  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
+#define OPTION_MASK_ISA_AVX_UNSET \
+  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
+   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
+#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
+
+/* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
+   as -mno-sse4.1. */
+#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
+
+#define OPTION_MASK_ISA_SSE4A_UNSET \
+  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
+
+#define OPTION_MASK_ISA_FMA4_UNSET \
+  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
+#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
+
+#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
+#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
+#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
+#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
+#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
+#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
+#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
+#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
+
+#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
+
+/* Vectorization library interface and handlers.  */
+static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
+
+static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
+static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
+
+/* Processor target table, indexed by processor number */
+struct ptt
+{
+  const struct processor_costs *cost;		/* Processor costs */
+  const int align_loop;				/* Default alignments.  */
+  const int align_loop_max_skip;
+  const int align_jump;
+  const int align_jump_max_skip;
+  const int align_func;
+};
+
+static const struct ptt processor_target_table[PROCESSOR_max] =
+{
+  {&i386_cost, 4, 3, 4, 3, 4},
+  {&i486_cost, 16, 15, 16, 15, 16},
+  {&pentium_cost, 16, 7, 16, 7, 16},
+  {&pentiumpro_cost, 16, 15, 16, 10, 16},
+  {&geode_cost, 0, 0, 0, 0, 0},
+  {&k6_cost, 32, 7, 32, 7, 32},
+  {&athlon_cost, 16, 7, 16, 7, 16},
+  {&pentium4_cost, 0, 0, 0, 0, 0},
+  {&k8_cost, 16, 7, 16, 7, 16},
+  {&nocona_cost, 0, 0, 0, 0, 0},
+  /* Core 2 32-bit.  */
+  {&generic32_cost, 16, 10, 16, 10, 16},
+  /* Core 2 64-bit.  */
+  {&generic64_cost, 16, 10, 16, 10, 16},
+  /* Core i7 32-bit.  */
+  {&generic32_cost, 16, 10, 16, 10, 16},
+  /* Core i7 64-bit.  */
+  {&generic64_cost, 16, 10, 16, 10, 16},
+  {&generic32_cost, 16, 7, 16, 7, 16},
+  {&generic64_cost, 16, 10, 16, 10, 16},
+  {&amdfam10_cost, 32, 24, 32, 7, 32},
+  {&bdver1_cost, 32, 24, 32, 7, 32},
+  {&btver1_cost, 32, 24, 32, 7, 32},
+  {&atom_cost, 16, 7, 16, 7, 16}
+};
+
+static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
+{
+  "generic",
+  "i386",
+  "i486",
+  "pentium",
+  "pentium-mmx",
+  "pentiumpro",
+  "pentium2",
+  "pentium3",
+  "pentium4",
+  "pentium-m",
+  "prescott",
+  "nocona",
+  "core2",
+  "corei7",
+  "atom",
+  "geode",
+  "k6",
+  "k6-2",
+  "k6-3",
+  "athlon",
+  "athlon-4",
+  "k8",
+  "amdfam10",
+  "bdver1",
+  "btver1"
+};
+
+/* Return true if a red-zone is in use.  */
+
+static inline bool
+ix86_using_red_zone (void)
+{
+  return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
+{
+  switch (code)
+    {
+    case OPT_mmmx:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
+	}
+      return true;
+
+    case OPT_m3dnow:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
+	}
+      return true;
+
+    case OPT_m3dnowa:
+      return false;
+
+    case OPT_msse:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
+	}
+      return true;
+
+    case OPT_msse2:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
+	}
+      return true;
+
+    case OPT_msse3:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
+	}
+      return true;
+
+    case OPT_mssse3:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
+	}
+      return true;
+
+    case OPT_msse4_1:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
+	}
+      return true;
+
+    case OPT_msse4_2:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
+	}
+      return true;
+
+    case OPT_mavx:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
+	}
+      return true;
+
+    case OPT_mfma:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
+	}
+      return true;
+
+    case OPT_msse4:
+      ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
+      ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
+      return true;
+
+    case OPT_mno_sse4:
+      ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
+      ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
+      return true;
+
+    case OPT_msse4a:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
+	}
+      return true;
+
+    case OPT_mfma4:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
+	}
+      return true;
+
+   case OPT_mxop:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
+	}
+      return true;
+
+   case OPT_mlwp:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
+	}
+      return true;
+
+    case OPT_mabm:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
+	}
+      return true;
+
+    case OPT_mbmi:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
+	}
+      return true;
+
+    case OPT_mtbm:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
+	}
+      return true;
+
+    case OPT_mpopcnt:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
+	}
+      return true;
+
+    case OPT_msahf:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
+	}
+      return true;
+
+    case OPT_mcx16:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
+	}
+      return true;
+
+    case OPT_mmovbe:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
+	}
+      return true;
+
+    case OPT_mcrc32:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
+	}
+      return true;
+
+    case OPT_maes:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
+	}
+      return true;
+
+    case OPT_mpclmul:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
+	}
+      return true;
+
+    case OPT_mfsgsbase:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
+	}
+      return true;
+
+    case OPT_mrdrnd:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
+	}
+      return true;
+
+    case OPT_mf16c:
+      if (value)
+	{
+	  ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
+	}
+      else
+	{
+	  ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
+	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
+	}
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Return a string that documents the current -m options.  The caller is
+   responsible for freeing the string.  */
+
+static char *
+ix86_target_string (int isa, int flags, const char *arch, const char *tune,
+		    const char *fpmath, bool add_nl_p)
+{
+  struct ix86_target_opts
+  {
+    const char *option;		/* option string */
+    int mask;			/* isa mask options */
+  };
+
+  /* This table is ordered so that options like -msse4.2 that imply
+     preceding options while match those first.  */
+  static struct ix86_target_opts isa_opts[] =
+  {
+    { "-m64",		OPTION_MASK_ISA_64BIT },
+    { "-mfma4",		OPTION_MASK_ISA_FMA4 },
+    { "-mfma",		OPTION_MASK_ISA_FMA },
+    { "-mxop",		OPTION_MASK_ISA_XOP },
+    { "-mlwp",		OPTION_MASK_ISA_LWP },
+    { "-msse4a",	OPTION_MASK_ISA_SSE4A },
+    { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
+    { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
+    { "-mssse3",	OPTION_MASK_ISA_SSSE3 },
+    { "-msse3",		OPTION_MASK_ISA_SSE3 },
+    { "-msse2",		OPTION_MASK_ISA_SSE2 },
+    { "-msse",		OPTION_MASK_ISA_SSE },
+    { "-m3dnow",	OPTION_MASK_ISA_3DNOW },
+    { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
+    { "-mmmx",		OPTION_MASK_ISA_MMX },
+    { "-mabm",		OPTION_MASK_ISA_ABM },
+    { "-mbmi",		OPTION_MASK_ISA_BMI },
+    { "-mtbm",		OPTION_MASK_ISA_TBM },
+    { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
+    { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
+    { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
+    { "-maes",		OPTION_MASK_ISA_AES },
+    { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
+    { "-mfsgsbase",	OPTION_MASK_ISA_FSGSBASE },
+    { "-mrdrnd",	OPTION_MASK_ISA_RDRND },
+    { "-mf16c",		OPTION_MASK_ISA_F16C },
+  };
+
+  /* Flag options.  */
+  static struct ix86_target_opts flag_opts[] =
+  {
+    { "-m128bit-long-double",		MASK_128BIT_LONG_DOUBLE },
+    { "-m80387",			MASK_80387 },
+    { "-maccumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS },
+    { "-malign-double",			MASK_ALIGN_DOUBLE },
+    { "-mcld",				MASK_CLD },
+    { "-mfp-ret-in-387",		MASK_FLOAT_RETURNS },
+    { "-mieee-fp",			MASK_IEEE_FP },
+    { "-minline-all-stringops",		MASK_INLINE_ALL_STRINGOPS },
+    { "-minline-stringops-dynamically",	MASK_INLINE_STRINGOPS_DYNAMICALLY },
+    { "-mms-bitfields",			MASK_MS_BITFIELD_LAYOUT },
+    { "-mno-align-stringops",		MASK_NO_ALIGN_STRINGOPS },
+    { "-mno-fancy-math-387",		MASK_NO_FANCY_MATH_387 },
+    { "-mno-push-args",			MASK_NO_PUSH_ARGS },
+    { "-mno-red-zone",			MASK_NO_RED_ZONE },
+    { "-momit-leaf-frame-pointer",	MASK_OMIT_LEAF_FRAME_POINTER },
+    { "-mrecip",			MASK_RECIP },
+    { "-mrtd",				MASK_RTD },
+    { "-msseregparm",			MASK_SSEREGPARM },
+    { "-mstack-arg-probe",		MASK_STACK_PROBE },
+    { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
+    { "-mvect8-ret-in-mem",		MASK_VECT8_RETURNS },
+    { "-m8bit-idiv",			MASK_USE_8BIT_IDIV },
+    { "-mvzeroupper",			MASK_VZEROUPPER },
+    { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD},
+    { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE},
+    { "-mprefer-avx128",		MASK_PREFER_AVX128},
+  };
+
+  const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
+
+  char isa_other[40];
+  char target_other[40];
+  unsigned num = 0;
+  unsigned i, j;
+  char *ret;
+  char *ptr;
+  size_t len;
+  size_t line_len;
+  size_t sep_len;
+
+  memset (opts, '\0', sizeof (opts));
+
+  /* Add -march= option.  */
+  if (arch)
+    {
+      opts[num][0] = "-march=";
+      opts[num++][1] = arch;
+    }
+
+  /* Add -mtune= option.  */
+  if (tune)
+    {
+      opts[num][0] = "-mtune=";
+      opts[num++][1] = tune;
+    }
+
+  /* Pick out the options in isa options.  */
+  for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
+    {
+      if ((isa & isa_opts[i].mask) != 0)
+	{
+	  opts[num++][0] = isa_opts[i].option;
+	  isa &= ~ isa_opts[i].mask;
+	}
+    }
+
+  if (isa && add_nl_p)
+    {
+      opts[num++][0] = isa_other;
+      sprintf (isa_other, "(other isa: %#x)", isa);
+    }
+
+  /* Add flag options.  */
+  for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
+    {
+      if ((flags & flag_opts[i].mask) != 0)
+	{
+	  opts[num++][0] = flag_opts[i].option;
+	  flags &= ~ flag_opts[i].mask;
+	}
+    }
+
+  if (flags && add_nl_p)
+    {
+      opts[num++][0] = target_other;
+      sprintf (target_other, "(other flags: %#x)", flags);
+    }
+
+  /* Add -fpmath= option.  */
+  if (fpmath)
+    {
+      opts[num][0] = "-mfpmath=";
+      opts[num++][1] = fpmath;
+    }
+
+  /* Any options?  */
+  if (num == 0)
+    return NULL;
+
+  gcc_assert (num < ARRAY_SIZE (opts));
+
+  /* Size the string.  */
+  len = 0;
+  sep_len = (add_nl_p) ? 3 : 1;
+  for (i = 0; i < num; i++)
+    {
+      len += sep_len;
+      for (j = 0; j < 2; j++)
+	if (opts[i][j])
+	  len += strlen (opts[i][j]);
+    }
+
+  /* Build the string.  */
+  ret = ptr = (char *) xmalloc (len);
+  line_len = 0;
+
+  for (i = 0; i < num; i++)
+    {
+      size_t len2[2];
+
+      for (j = 0; j < 2; j++)
+	len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
+
+      if (i != 0)
+	{
+	  *ptr++ = ' ';
+	  line_len++;
+
+	  if (add_nl_p && line_len + len2[0] + len2[1] > 70)
+	    {
+	      *ptr++ = '\\';
+	      *ptr++ = '\n';
+	      line_len = 0;
+	    }
+	}
+
+      for (j = 0; j < 2; j++)
+	if (opts[i][j])
+	  {
+	    memcpy (ptr, opts[i][j], len2[j]);
+	    ptr += len2[j];
+	    line_len += len2[j];
+	  }
+    }
+
+  *ptr = '\0';
+  gcc_assert (ret + len >= ptr);
+
+  return ret;
+}
+
+/* Return TRUE if software prefetching is beneficial for the
+   given CPU. */
+
+static bool
+software_prefetching_beneficial_p (void)
+{
+  switch (ix86_tune)
+    {
+    case PROCESSOR_GEODE:
+    case PROCESSOR_K6:
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_BTVER1:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true, if profiling code should be emitted before
+   prologue. Otherwise it returns false.
+   Note: For x86 with "hotfix" it is sorried.  */
+static bool
+ix86_profile_before_prologue (void)
+{
+  return flag_fentry != 0;
+}
+
+/* Function that is callable from the debugger to print the current
+   options.  */
+void
+ix86_debug_options (void)
+{
+  char *opts = ix86_target_string (ix86_isa_flags, target_flags,
+				   ix86_arch_string, ix86_tune_string,
+				   ix86_fpmath_string, true);
+
+  if (opts)
+    {
+      fprintf (stderr, "%s\n\n", opts);
+      free (opts);
+    }
+  else
+    fputs ("<no options>\n\n", stderr);
+
+  return;
+}
+
+/* Override various settings based on options.  If MAIN_ARGS_P, the
+   options are from the command line, otherwise they are from
+   attributes.  */
+
+static void
+ix86_option_override_internal (bool main_args_p)
+{
+  int i;
+  unsigned int ix86_arch_mask, ix86_tune_mask;
+  const bool ix86_tune_specified = (ix86_tune_string != NULL);
+  const char *prefix;
+  const char *suffix;
+  const char *sw;
+
+  /* Comes from final.c -- no real reason to change it.  */
+#define MAX_CODE_ALIGN 16
+
+  enum pta_flags
+    {
+      PTA_SSE = 1 << 0,
+      PTA_SSE2 = 1 << 1,
+      PTA_SSE3 = 1 << 2,
+      PTA_MMX = 1 << 3,
+      PTA_PREFETCH_SSE = 1 << 4,
+      PTA_3DNOW = 1 << 5,
+      PTA_3DNOW_A = 1 << 6,
+      PTA_64BIT = 1 << 7,
+      PTA_SSSE3 = 1 << 8,
+      PTA_CX16 = 1 << 9,
+      PTA_POPCNT = 1 << 10,
+      PTA_ABM = 1 << 11,
+      PTA_SSE4A = 1 << 12,
+      PTA_NO_SAHF = 1 << 13,
+      PTA_SSE4_1 = 1 << 14,
+      PTA_SSE4_2 = 1 << 15,
+      PTA_AES = 1 << 16,
+      PTA_PCLMUL = 1 << 17,
+      PTA_AVX = 1 << 18,
+      PTA_FMA = 1 << 19,
+      PTA_MOVBE = 1 << 20,
+      PTA_FMA4 = 1 << 21,
+      PTA_XOP = 1 << 22,
+      PTA_LWP = 1 << 23,
+      PTA_FSGSBASE = 1 << 24,
+      PTA_RDRND = 1 << 25,
+      PTA_F16C = 1 << 26,
+      PTA_BMI = 1 << 27,
+      PTA_TBM = 1 << 28
+      /* if this reaches 32, need to widen struct pta flags below */
+    };
+
+  static struct pta
+    {
+      const char *const name;		/* processor name or nickname.  */
+      const enum processor_type processor;
+      const enum attr_cpu schedule;
+      const unsigned /*enum pta_flags*/ flags;
+    }
+  const processor_alias_table[] =
+    {
+      {"i386", PROCESSOR_I386, CPU_NONE, 0},
+      {"i486", PROCESSOR_I486, CPU_NONE, 0},
+      {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+      {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+      {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
+      {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
+      {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
+      {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
+      {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
+      {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+      {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+      {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
+      {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE},
+      {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE},
+      {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+	PTA_MMX | PTA_SSE | PTA_SSE2},
+      {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
+	PTA_MMX |PTA_SSE | PTA_SSE2},
+      {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
+	PTA_MMX | PTA_SSE | PTA_SSE2},
+      {"prescott", PROCESSOR_NOCONA, CPU_NONE,
+	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
+      {"nocona", PROCESSOR_NOCONA, CPU_NONE,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_CX16 | PTA_NO_SAHF},
+      {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_CX16},
+      {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
+      {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
+	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
+      {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
+	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
+	| PTA_RDRND | PTA_F16C},
+      {"atom", PROCESSOR_ATOM, CPU_ATOM,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
+      {"geode", PROCESSOR_GEODE, CPU_GEODE,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
+      {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
+      {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+      {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+      {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
+      {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
+      {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+      {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+      {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
+	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+      {"x86-64", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
+      {"k8", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF},
+      {"k8-sse3", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+      {"opteron", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF},
+      {"opteron-sse3", PROCESSOR_K8, CPU_K8,
+        PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+      {"athlon64", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF},
+      {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+      {"athlon-fx", PROCESSOR_K8, CPU_K8,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_NO_SAHF},
+      {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
+      {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+	| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
+      {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
+	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
+	| PTA_XOP | PTA_LWP},
+      {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
+        PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
+        | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
+      {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
+	0 /* flags are only used for -march switch.  */ },
+      {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
+	PTA_64BIT /* flags are only used for -march switch.  */ },
+    };
+
+  int const pta_size = ARRAY_SIZE (processor_alias_table);
+
+  /* Set up prefix/suffix so the error messages refer to either the command
+     line argument, or the attribute(target).  */
+  if (main_args_p)
+    {
+      prefix = "-m";
+      suffix = "";
+      sw = "switch";
+    }
+  else
+    {
+      prefix = "option(\"";
+      suffix = "\")";
+      sw = "attribute";
+    }
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+  SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* -fPIC is the default for x86_64.  */
+  if (TARGET_MACHO && TARGET_64BIT)
+    flag_pic = 2;
+
+  /* Need to check -mtune=generic first.  */
+  if (ix86_tune_string)
+    {
+      if (!strcmp (ix86_tune_string, "generic")
+	  || !strcmp (ix86_tune_string, "i686")
+	  /* As special support for cross compilers we read -mtune=native
+	     as -mtune=generic.  With native compilers we won't see the
+	     -mtune=native, as it was changed by the driver.  */
+	  || !strcmp (ix86_tune_string, "native"))
+	{
+	  if (TARGET_64BIT)
+	    ix86_tune_string = "generic64";
+	  else
+	    ix86_tune_string = "generic32";
+	}
+      /* If this call is for setting the option attribute, allow the
+	 generic32/generic64 that was previously set.  */
+      else if (!main_args_p
+	       && (!strcmp (ix86_tune_string, "generic32")
+		   || !strcmp (ix86_tune_string, "generic64")))
+	;
+      else if (!strncmp (ix86_tune_string, "generic", 7))
+        error ("bad value (%s) for %stune=%s %s",
+	       ix86_tune_string, prefix, suffix, sw);
+      else if (!strcmp (ix86_tune_string, "x86-64"))
+        warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
+                 "%stune=k8%s or %stune=generic%s instead as appropriate",
+                 prefix, suffix, prefix, suffix, prefix, suffix);
+    }
+  else
+    {
+      if (ix86_arch_string)
+	ix86_tune_string = ix86_arch_string;
+      if (!ix86_tune_string)
+	{
+	  ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
+	  ix86_tune_defaulted = 1;
+	}
+
+      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
+	 need to use a sensible tune option.  */
+      if (!strcmp (ix86_tune_string, "generic")
+	  || !strcmp (ix86_tune_string, "x86-64")
+	  || !strcmp (ix86_tune_string, "i686"))
+	{
+	  if (TARGET_64BIT)
+	    ix86_tune_string = "generic64";
+	  else
+	    ix86_tune_string = "generic32";
+	}
+    }
+
+  if (ix86_stringop_string)
+    {
+      if (!strcmp (ix86_stringop_string, "rep_byte"))
+	stringop_alg = rep_prefix_1_byte;
+      else if (!strcmp (ix86_stringop_string, "libcall"))
+	stringop_alg = libcall;
+      else if (!strcmp (ix86_stringop_string, "rep_4byte"))
+	stringop_alg = rep_prefix_4_byte;
+      else if (!strcmp (ix86_stringop_string, "rep_8byte")
+	       && TARGET_64BIT)
+	/* rep; movq isn't available in 32-bit code.  */
+	stringop_alg = rep_prefix_8_byte;
+      else if (!strcmp (ix86_stringop_string, "byte_loop"))
+	stringop_alg = loop_1_byte;
+      else if (!strcmp (ix86_stringop_string, "loop"))
+	stringop_alg = loop;
+      else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
+	stringop_alg = unrolled_loop;
+      else
+	error ("bad value (%s) for %sstringop-strategy=%s %s",
+	       ix86_stringop_string, prefix, suffix, sw);
+    }
+
+  if (!ix86_arch_string)
+    ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
+  else
+    ix86_arch_specified = 1;
+
+  /* Validate -mabi= value.  */
+  if (ix86_abi_string)
+    {
+      if (strcmp (ix86_abi_string, "sysv") == 0)
+	ix86_abi = SYSV_ABI;
+      else if (strcmp (ix86_abi_string, "ms") == 0)
+	ix86_abi = MS_ABI;
+      else
+	error ("unknown ABI (%s) for %sabi=%s %s",
+	       ix86_abi_string, prefix, suffix, sw);
+    }
+  else
+    ix86_abi = DEFAULT_ABI;
+
+  if (ix86_cmodel_string != 0)
+    {
+      if (!strcmp (ix86_cmodel_string, "small"))
+	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+      else if (!strcmp (ix86_cmodel_string, "medium"))
+	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
+      else if (!strcmp (ix86_cmodel_string, "large"))
+	ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
+      else if (flag_pic)
+	error ("code model %s does not support PIC mode", ix86_cmodel_string);
+      else if (!strcmp (ix86_cmodel_string, "32"))
+	ix86_cmodel = CM_32;
+      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
+	ix86_cmodel = CM_KERNEL;
+      else
+	error ("bad value (%s) for %scmodel=%s %s",
+	       ix86_cmodel_string, prefix, suffix, sw);
+    }
+  else
+    {
+      /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
+	 use of rip-relative addressing.  This eliminates fixups that
+	 would otherwise be needed if this object is to be placed in a
+	 DLL, and is essentially just as efficient as direct addressing.  */
+      if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+	ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
+      else if (TARGET_64BIT)
+	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+      else
+        ix86_cmodel = CM_32;
+    }
+  if (ix86_asm_string != 0)
+    {
+      if (! TARGET_MACHO
+	  && !strcmp (ix86_asm_string, "intel"))
+	ix86_asm_dialect = ASM_INTEL;
+      else if (!strcmp (ix86_asm_string, "att"))
+	ix86_asm_dialect = ASM_ATT;
+      else
+	error ("bad value (%s) for %sasm=%s %s",
+	       ix86_asm_string, prefix, suffix, sw);
+    }
+  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
+    error ("code model %qs not supported in the %s bit mode",
+	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
+  if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
+    sorry ("%i-bit mode not compiled in",
+	   (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
+
+  for (i = 0; i < pta_size; i++)
+    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
+      {
+	ix86_schedule = processor_alias_table[i].schedule;
+	ix86_arch = processor_alias_table[i].processor;
+	/* Default cpu tuning to the architecture.  */
+	ix86_tune = ix86_arch;
+
+	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+	  error ("CPU you selected does not support x86-64 "
+		 "instruction set");
+
+	if (processor_alias_table[i].flags & PTA_MMX
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
+	  ix86_isa_flags |= OPTION_MASK_ISA_MMX;
+	if (processor_alias_table[i].flags & PTA_3DNOW
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
+	  ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
+	if (processor_alias_table[i].flags & PTA_3DNOW_A
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
+	  ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
+	if (processor_alias_table[i].flags & PTA_SSE
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE;
+	if (processor_alias_table[i].flags & PTA_SSE2
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
+	if (processor_alias_table[i].flags & PTA_SSE3
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
+	if (processor_alias_table[i].flags & PTA_SSSE3
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
+	if (processor_alias_table[i].flags & PTA_SSE4_1
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
+	if (processor_alias_table[i].flags & PTA_SSE4_2
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
+	if (processor_alias_table[i].flags & PTA_AVX
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AVX;
+	if (processor_alias_table[i].flags & PTA_FMA
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
+	  ix86_isa_flags |= OPTION_MASK_ISA_FMA;
+	if (processor_alias_table[i].flags & PTA_SSE4A
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
+	if (processor_alias_table[i].flags & PTA_FMA4
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
+	  ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
+	if (processor_alias_table[i].flags & PTA_XOP
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
+	  ix86_isa_flags |= OPTION_MASK_ISA_XOP;
+	if (processor_alias_table[i].flags & PTA_LWP
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
+	  ix86_isa_flags |= OPTION_MASK_ISA_LWP;
+	if (processor_alias_table[i].flags & PTA_ABM
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
+	  ix86_isa_flags |= OPTION_MASK_ISA_ABM;
+	if (processor_alias_table[i].flags & PTA_BMI
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI;
+	if (processor_alias_table[i].flags & PTA_TBM
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
+	  ix86_isa_flags |= OPTION_MASK_ISA_TBM;
+	if (processor_alias_table[i].flags & PTA_CX16
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
+	  ix86_isa_flags |= OPTION_MASK_ISA_CX16;
+	if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
+	  ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
+	if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
+	  ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
+	if (processor_alias_table[i].flags & PTA_MOVBE
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
+	  ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
+	if (processor_alias_table[i].flags & PTA_AES
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
+	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
+	if (processor_alias_table[i].flags & PTA_PCLMUL
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
+	  ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
+	if (processor_alias_table[i].flags & PTA_FSGSBASE
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
+	  ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
+	if (processor_alias_table[i].flags & PTA_RDRND
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
+	  ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
+	if (processor_alias_table[i].flags & PTA_F16C
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
+	  ix86_isa_flags |= OPTION_MASK_ISA_F16C;
+	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
+	  x86_prefetch_sse = true;
+
+	break;
+      }
+
+  if (!strcmp (ix86_arch_string, "generic"))
+    error ("generic CPU can be used only for %stune=%s %s",
+	   prefix, suffix, sw);
+  else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
+    error ("bad value (%s) for %sarch=%s %s",
+	   ix86_arch_string, prefix, suffix, sw);
+
+  ix86_arch_mask = 1u << ix86_arch;
+  for (i = 0; i < X86_ARCH_LAST; ++i)
+    ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
+
+  for (i = 0; i < pta_size; i++)
+    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
+      {
+	ix86_schedule = processor_alias_table[i].schedule;
+	ix86_tune = processor_alias_table[i].processor;
+	if (TARGET_64BIT)
+	  {
+	    if (!(processor_alias_table[i].flags & PTA_64BIT))
+	      {
+		if (ix86_tune_defaulted)
+		  {
+		    ix86_tune_string = "x86-64";
+		    for (i = 0; i < pta_size; i++)
+		      if (! strcmp (ix86_tune_string,
+				    processor_alias_table[i].name))
+			break;
+		    ix86_schedule = processor_alias_table[i].schedule;
+		    ix86_tune = processor_alias_table[i].processor;
+		  }
+		else
+		  error ("CPU you selected does not support x86-64 "
+			 "instruction set");
+	      }
+	  }
+	else
+	  {
+	    /* Adjust tuning when compiling for 32-bit ABI.  */
+	    switch (ix86_tune)
+	      {
+	      case PROCESSOR_GENERIC64:
+		ix86_tune = PROCESSOR_GENERIC32;
+		ix86_schedule = CPU_PENTIUMPRO;
+		break;
+
+	      case PROCESSOR_CORE2_64:
+		ix86_tune = PROCESSOR_CORE2_32;
+		break;
+
+	      case PROCESSOR_COREI7_64:
+		ix86_tune = PROCESSOR_COREI7_32;
+		break;
+
+	      default:
+		break;
+	      }
+	  }
+	/* Intel CPUs have always interpreted SSE prefetch instructions as
+	   NOPs; so, we can enable SSE prefetch instructions even when
+	   -mtune (rather than -march) points us to a processor that has them.
+	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
+	   higher processors.  */
+	if (TARGET_CMOV
+	    && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
+	  x86_prefetch_sse = true;
+	break;
+      }
+
+  if (ix86_tune_specified && i == pta_size)
+    error ("bad value (%s) for %stune=%s %s",
+	   ix86_tune_string, prefix, suffix, sw);
+
+  ix86_tune_mask = 1u << ix86_tune;
+  for (i = 0; i < X86_TUNE_LAST; ++i)
+    ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+
+#ifndef USE_IX86_FRAME_POINTER
+#define USE_IX86_FRAME_POINTER 0
+#endif
+
+#ifndef USE_X86_64_FRAME_POINTER
+#define USE_X86_64_FRAME_POINTER 0
+#endif
+
+  /* Set the default values for switches whose default depends on TARGET_64BIT
+     in case they weren't overwritten by command line options.  */
+  if (TARGET_64BIT)
+    {
+      if (optimize > 1 && !global_options_set.x_flag_zee)
+        flag_zee = 1;
+      if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
+	flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
+      if (flag_asynchronous_unwind_tables == 2)
+	flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
+      if (flag_pcc_struct_return == 2)
+	flag_pcc_struct_return = 0;
+    }
+  else
+    {
+      if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
+	flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
+      if (flag_asynchronous_unwind_tables == 2)
+	flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
+      if (flag_pcc_struct_return == 2)
+	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
+    }
+
+  if (optimize_size)
+    ix86_cost = &ix86_size_cost;
+  else
+    ix86_cost = processor_target_table[ix86_tune].cost;
+
+  /* Arrange to set up i386_stack_locals for all functions.  */
+  init_machine_status = ix86_init_machine_status;
+
+  /* Validate -mregparm= value.  */
+  if (ix86_regparm_string)
+    {
+      if (TARGET_64BIT)
+	warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
+      i = atoi (ix86_regparm_string);
+      if (i < 0 || i > REGPARM_MAX)
+	error ("%sregparm=%d%s is not between 0 and %d",
+	       prefix, i, suffix, REGPARM_MAX);
+      else
+	ix86_regparm = i;
+    }
+  if (TARGET_64BIT)
+    ix86_regparm = REGPARM_MAX;
+
+  /* If the user has provided any of the -malign-* options,
+     warn and use that value only if -falign-* is not set.
+     Remove this code in GCC 3.2 or later.  */
+  if (ix86_align_loops_string)
+    {
+      warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
+	       prefix, suffix, suffix);
+      if (align_loops == 0)
+	{
+	  i = atoi (ix86_align_loops_string);
+	  if (i < 0 || i > MAX_CODE_ALIGN)
+	    error ("%salign-loops=%d%s is not between 0 and %d",
+		   prefix, i, suffix, MAX_CODE_ALIGN);
+	  else
+	    align_loops = 1 << i;
+	}
+    }
+
+  if (ix86_align_jumps_string)
+    {
+      warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
+	       prefix, suffix, suffix);
+      if (align_jumps == 0)
+	{
+	  i = atoi (ix86_align_jumps_string);
+	  if (i < 0 || i > MAX_CODE_ALIGN)
+	    error ("%salign-loops=%d%s is not between 0 and %d",
+		   prefix, i, suffix, MAX_CODE_ALIGN);
+	  else
+	    align_jumps = 1 << i;
+	}
+    }
+
+  if (ix86_align_funcs_string)
+    {
+      warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
+	       prefix, suffix, suffix);
+      if (align_functions == 0)
+	{
+	  i = atoi (ix86_align_funcs_string);
+	  if (i < 0 || i > MAX_CODE_ALIGN)
+	    error ("%salign-loops=%d%s is not between 0 and %d",
+		   prefix, i, suffix, MAX_CODE_ALIGN);
+	  else
+	    align_functions = 1 << i;
+	}
+    }
+
+  /* Default align_* from the processor table.  */
+  if (align_loops == 0)
+    {
+      align_loops = processor_target_table[ix86_tune].align_loop;
+      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+    }
+  if (align_jumps == 0)
+    {
+      align_jumps = processor_target_table[ix86_tune].align_jump;
+      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+    }
+  if (align_functions == 0)
+    {
+      align_functions = processor_target_table[ix86_tune].align_func;
+    }
+
+  /* Validate -mbranch-cost= value, or provide default.  */
+  ix86_branch_cost = ix86_cost->branch_cost;
+  if (ix86_branch_cost_string)
+    {
+      i = atoi (ix86_branch_cost_string);
+      if (i < 0 || i > 5)
+	error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
+      else
+	ix86_branch_cost = i;
+    }
+  if (ix86_section_threshold_string)
+    {
+      i = atoi (ix86_section_threshold_string);
+      if (i < 0)
+	error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
+      else
+	ix86_section_threshold = i;
+    }
+
+  if (ix86_tls_dialect_string)
+    {
+      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
+	ix86_tls_dialect = TLS_DIALECT_GNU;
+      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+	ix86_tls_dialect = TLS_DIALECT_GNU2;
+      else
+	error ("bad value (%s) for %stls-dialect=%s %s",
+	       ix86_tls_dialect_string, prefix, suffix, sw);
+    }
+
+  if (ix87_precision_string)
+    {
+      i = atoi (ix87_precision_string);
+      if (i != 32 && i != 64 && i != 80)
+	error ("pc%d is not valid precision setting (32, 64 or 80)", i);
+    }
+
+  if (TARGET_64BIT)
+    {
+      target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
+
+      /* Enable by default the SSE and MMX builtins.  Do allow the user to
+	 explicitly disable any of these.  In particular, disabling SSE and
+	 MMX for kernel code is extremely useful.  */
+      if (!ix86_arch_specified)
+      ix86_isa_flags
+	|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
+	     | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
+
+      if (TARGET_RTD)
+	warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
+    }
+  else
+    {
+      target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
+
+      if (!ix86_arch_specified)
+      ix86_isa_flags
+	|= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
+
+      /* i386 ABI does not specify red zone.  It still makes sense to use it
+         when programmer takes care to stack from being destroyed.  */
+      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
+        target_flags |= MASK_NO_RED_ZONE;
+    }
+
+  /* Keep nonleaf frame pointers.  */
+  if (flag_omit_frame_pointer)
+    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
+    flag_omit_frame_pointer = 1;
+
+  /* If we're doing fast math, we don't care about comparison order
+     wrt NaNs.  This lets us use a shorter comparison sequence.  */
+  if (flag_finite_math_only)
+    target_flags &= ~MASK_IEEE_FP;
+
+  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
+     since the insns won't need emulation.  */
+  if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
+    target_flags &= ~MASK_NO_FANCY_MATH_387;
+
+  /* Likewise, if the target doesn't have a 387, or we've specified
+     software floating point, don't use 387 inline intrinsics.  */
+  if (!TARGET_80387)
+    target_flags |= MASK_NO_FANCY_MATH_387;
+
+  /* Turn on MMX builtins for -msse.  */
+  if (TARGET_SSE)
+    {
+      ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
+      x86_prefetch_sse = true;
+    }
+
+  /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
+  if (TARGET_SSE4_2 || TARGET_ABM)
+    ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
+
+  /* Validate -mpreferred-stack-boundary= value or default it to
+     PREFERRED_STACK_BOUNDARY_DEFAULT.  */
+  ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
+  if (ix86_preferred_stack_boundary_string)
+    {
+      int min = (TARGET_64BIT ? 4 : 2);
+      int max = (TARGET_SEH ? 4 : 12);
+
+      i = atoi (ix86_preferred_stack_boundary_string);
+      if (i < min || i > max)
+	{
+	  if (min == max)
+	    error ("%spreferred-stack-boundary%s is not supported "
+		   "for this target", prefix, suffix);
+	  else
+	    error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
+		   prefix, i, suffix, min, max);
+	}
+      else
+	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+    }
+
+  /* Set the default value for -mstackrealign.  */
+  if (ix86_force_align_arg_pointer == -1)
+    ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
+
+  ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
+
+  /* Validate -mincoming-stack-boundary= value or default it to
+     MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
+  ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
+  if (ix86_incoming_stack_boundary_string)
+    {
+      i = atoi (ix86_incoming_stack_boundary_string);
+      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
+	error ("-mincoming-stack-boundary=%d is not between %d and 12",
+	       i, TARGET_64BIT ? 4 : 2);
+      else
+	{
+	  ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
+	  ix86_incoming_stack_boundary
+	    = ix86_user_incoming_stack_boundary;
+	}
+    }
+
+  /* Accept -msseregparm only if at least SSE support is enabled.  */
+  if (TARGET_SSEREGPARM
+      && ! TARGET_SSE)
+    error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
+
+  ix86_fpmath = TARGET_FPMATH_DEFAULT;
+  if (ix86_fpmath_string != 0)
+    {
+      if (! strcmp (ix86_fpmath_string, "387"))
+	ix86_fpmath = FPMATH_387;
+      else if (! strcmp (ix86_fpmath_string, "sse"))
+	{
+	  if (!TARGET_SSE)
+	    {
+	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
+	      ix86_fpmath = FPMATH_387;
+	    }
+	  else
+	    ix86_fpmath = FPMATH_SSE;
+	}
+      else if (! strcmp (ix86_fpmath_string, "387,sse")
+	       || ! strcmp (ix86_fpmath_string, "387+sse")
+	       || ! strcmp (ix86_fpmath_string, "sse,387")
+	       || ! strcmp (ix86_fpmath_string, "sse+387")
+	       || ! strcmp (ix86_fpmath_string, "both"))
+	{
+	  if (!TARGET_SSE)
+	    {
+	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
+	      ix86_fpmath = FPMATH_387;
+	    }
+	  else if (!TARGET_80387)
+	    {
+	      warning (0, "387 instruction set disabled, using SSE arithmetics");
+	      ix86_fpmath = FPMATH_SSE;
+	    }
+	  else
+	    ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
+	}
+      else
+	error ("bad value (%s) for %sfpmath=%s %s",
+	       ix86_fpmath_string, prefix, suffix, sw);
+    }
+
+  /* If the i387 is disabled, then do not return values in it. */
+  if (!TARGET_80387)
+    target_flags &= ~MASK_FLOAT_RETURNS;
+
+  /* Use external vectorized library in vectorizing intrinsics.  */
+  if (ix86_veclibabi_string)
+    {
+      if (strcmp (ix86_veclibabi_string, "svml") == 0)
+	ix86_veclib_handler = ix86_veclibabi_svml;
+      else if (strcmp (ix86_veclibabi_string, "acml") == 0)
+	ix86_veclib_handler = ix86_veclibabi_acml;
+      else
+	error ("unknown vectorization library ABI type (%s) for "
+	       "%sveclibabi=%s %s", ix86_veclibabi_string,
+	       prefix, suffix, sw);
+    }
+
+  if ((!USE_IX86_FRAME_POINTER
+       || (x86_accumulate_outgoing_args & ix86_tune_mask))
+      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+      && !optimize_size)
+    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+
+  /* ??? Unwind info is not correct around the CFG unless either a frame
+     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
+     unwind info generation to be aware of the CFG and propagating states
+     around edges.  */
+  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+       || flag_exceptions || flag_non_call_exceptions)
+      && flag_omit_frame_pointer
+      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+    {
+      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+	warning (0, "unwind tables currently require either a frame pointer "
+		 "or %saccumulate-outgoing-args%s for correctness",
+		 prefix, suffix);
+      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+    }
+
+  /* If stack probes are required, the space used for large function
+     arguments on the stack must also be probed, so enable
+     -maccumulate-outgoing-args so this happens in the prologue.  */
+  if (TARGET_STACK_PROBE
+      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+    {
+      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+	warning (0, "stack probing requires %saccumulate-outgoing-args%s "
+		 "for correctness", prefix, suffix);
+      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+    }
+
+  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
+  {
+    char *p;
+    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
+    p = strchr (internal_label_prefix, 'X');
+    internal_label_prefix_len = p - internal_label_prefix;
+    *p = '\0';
+  }
+
+  /* When scheduling description is not available, disable scheduler pass
+     so it won't slow down the compilation and make x87 code slower.  */
+  if (!TARGET_SCHEDULE)
+    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
+
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			 ix86_cost->simultaneous_prefetches,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+
+  /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
+  if (flag_prefetch_loop_arrays < 0
+      && HAVE_prefetch
+      && optimize >= 3
+      && software_prefetching_beneficial_p ())
+    flag_prefetch_loop_arrays = 1;
+
+  /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
+     can be optimized to ap = __builtin_next_arg (0).  */
+  if (!TARGET_64BIT && !flag_split_stack)
+    targetm.expand_builtin_va_start = NULL;
+
+  if (TARGET_64BIT)
+    {
+      ix86_gen_leave = gen_leave_rex64;
+      ix86_gen_add3 = gen_adddi3;
+      ix86_gen_sub3 = gen_subdi3;
+      ix86_gen_sub3_carry = gen_subdi3_carry;
+      ix86_gen_one_cmpl2 = gen_one_cmpldi2;
+      ix86_gen_monitor = gen_sse3_monitor64;
+      ix86_gen_andsp = gen_anddi3;
+      ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
+      ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
+      ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
+    }
+  else
+    {
+      ix86_gen_leave = gen_leave;
+      ix86_gen_add3 = gen_addsi3;
+      ix86_gen_sub3 = gen_subsi3;
+      ix86_gen_sub3_carry = gen_subsi3_carry;
+      ix86_gen_one_cmpl2 = gen_one_cmplsi2;
+      ix86_gen_monitor = gen_sse3_monitor;
+      ix86_gen_andsp = gen_andsi3;
+      ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
+      ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
+      ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
+    }
+
+#ifdef USE_IX86_CLD
+  /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
+  if (!TARGET_64BIT)
+    target_flags |= MASK_CLD & ~target_flags_explicit;
+#endif
+
+  if (!TARGET_64BIT && flag_pic)
+    {
+      if (flag_fentry > 0)
+        sorry ("-mfentry isn%'t supported for 32-bit in combination "
+	       "with -fpic");
+      flag_fentry = 0;
+    }
+  else if (TARGET_SEH)
+    {
+      if (flag_fentry == 0)
+	sorry ("-mno-fentry isn%'t compatible with SEH");
+      flag_fentry = 1;
+    }
+  else if (flag_fentry < 0)
+   {
+#if defined(PROFILE_BEFORE_PROLOGUE)
+     flag_fentry = 1;
+#else
+     flag_fentry = 0;
+#endif
+   }
+
+  /* Save the initial options in case the user does function specific options */
+  if (main_args_p)
+    target_option_default_node = target_option_current_node
+      = build_target_option_node ();
+
+  if (TARGET_AVX)
+    {
+      /* When not optimize for size, enable vzeroupper optimization for
+	 TARGET_AVX with -fexpensive-optimizations and split 32-byte
+	 AVX unaligned load/store.  */
+      if (!optimize_size)
+	{
+	  if (flag_expensive_optimizations
+	      && !(target_flags_explicit & MASK_VZEROUPPER))
+	    target_flags |= MASK_VZEROUPPER;
+	  if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+	      && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+	    target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+	  if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+	      && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+	    target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+	  /* Enable 128-bit AVX instruction generation for the auto-vectorizer.  */
+	  if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
+	    target_flags |= MASK_PREFER_AVX128;
+	}
+    }
+  else 
+    {
+      /* Disable vzeroupper pass if TARGET_AVX is disabled.  */
+      target_flags &= ~MASK_VZEROUPPER;
+    }
+}
+
+/* Return TRUE if VAL is passed in register with 256bit AVX modes.  */
+
+static bool
+function_pass_avx256_p (const_rtx val)
+{
+  if (!val)
+    return false;
+
+  if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
+    return true;
+
+  if (GET_CODE (val) == PARALLEL)
+    {
+      int i;
+      rtx r;
+
+      for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
+	{
+	  r = XVECEXP (val, 0, i);
+	  if (GET_CODE (r) == EXPR_LIST
+	      && XEXP (r, 0)
+	      && REG_P (XEXP (r, 0))
+	      && (GET_MODE (XEXP (r, 0)) == OImode
+		  || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook.  */
+
+static void
+ix86_option_override (void)
+{
+  ix86_option_override_internal (true);
+}
+
+/* Update register usage after having seen the compiler flags.  */
+
+static void
+ix86_conditional_register_usage (void)
+{
+  int i;
+  unsigned int j;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (fixed_regs[i] > 1)
+	fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
+      if (call_used_regs[i] > 1)
+	call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
+    }
+
+  /* The PIC register, if it exists, is fixed.  */
+  j = PIC_OFFSET_TABLE_REGNUM;
+  if (j != INVALID_REGNUM)
+    fixed_regs[j] = call_used_regs[j] = 1;
+
+  /* The MS_ABI changes the set of call-used registers.  */
+  if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
+    {
+      call_used_regs[SI_REG] = 0;
+      call_used_regs[DI_REG] = 0;
+      call_used_regs[XMM6_REG] = 0;
+      call_used_regs[XMM7_REG] = 0;
+      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+	call_used_regs[i] = 0;
+    }
+
+  /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
+     other call-clobbered regs for 64-bit.  */
+  if (TARGET_64BIT)
+    {
+      CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
+
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
+	    && call_used_regs[i])
+	  SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
+    }
+
+  /* If MMX is disabled, squash the registers.  */
+  if (! TARGET_MMX)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If SSE is disabled, squash the registers.  */
+  if (! TARGET_SSE)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If the FPU is disabled, squash the registers.  */
+  if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
+	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+  /* If 32-bit, squash the 64-bit registers.  */
+  if (! TARGET_64BIT)
+    {
+      for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
+	reg_names[i] = "";
+      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+	reg_names[i] = "";
+    }
+}
+
+
+/* Save the current options */
+
+static void
+ix86_function_specific_save (struct cl_target_option *ptr)
+{
+  ptr->arch = ix86_arch;
+  ptr->schedule = ix86_schedule;
+  ptr->tune = ix86_tune;
+  ptr->fpmath = ix86_fpmath;
+  ptr->branch_cost = ix86_branch_cost;
+  ptr->tune_defaulted = ix86_tune_defaulted;
+  ptr->arch_specified = ix86_arch_specified;
+  ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
+  ptr->ix86_target_flags_explicit = target_flags_explicit;
+
+  /* The fields are char but the variables are not; make sure the
+     values fit in the fields.  */
+  gcc_assert (ptr->arch == ix86_arch);
+  gcc_assert (ptr->schedule == ix86_schedule);
+  gcc_assert (ptr->tune == ix86_tune);
+  gcc_assert (ptr->fpmath == ix86_fpmath);
+  gcc_assert (ptr->branch_cost == ix86_branch_cost);
+}
+
+/* Restore the current options */
+
+static void
+ix86_function_specific_restore (struct cl_target_option *ptr)
+{
+  enum processor_type old_tune = ix86_tune;
+  enum processor_type old_arch = ix86_arch;
+  unsigned int ix86_arch_mask, ix86_tune_mask;
+  int i;
+
+  ix86_arch = (enum processor_type) ptr->arch;
+  ix86_schedule = (enum attr_cpu) ptr->schedule;
+  ix86_tune = (enum processor_type) ptr->tune;
+  ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
+  ix86_branch_cost = ptr->branch_cost;
+  ix86_tune_defaulted = ptr->tune_defaulted;
+  ix86_arch_specified = ptr->arch_specified;
+  ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
+  target_flags_explicit = ptr->ix86_target_flags_explicit;
+
+  /* Recreate the arch feature tests if the arch changed */
+  if (old_arch != ix86_arch)
+    {
+      ix86_arch_mask = 1u << ix86_arch;
+      for (i = 0; i < X86_ARCH_LAST; ++i)
+	ix86_arch_features[i]
+	  = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
+    }
+
+  /* Recreate the tune optimization tests */
+  if (old_tune != ix86_tune)
+    {
+      ix86_tune_mask = 1u << ix86_tune;
+      for (i = 0; i < X86_TUNE_LAST; ++i)
+	ix86_tune_features[i]
+	  = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+    }
+}
+
+/* Print the current options */
+
+static void
+ix86_function_specific_print (FILE *file, int indent,
+			      struct cl_target_option *ptr)
+{
+  char *target_string
+    = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
+			  NULL, NULL, NULL, false);
+
+  fprintf (file, "%*sarch = %d (%s)\n",
+	   indent, "",
+	   ptr->arch,
+	   ((ptr->arch < TARGET_CPU_DEFAULT_max)
+	    ? cpu_names[ptr->arch]
+	    : "<unknown>"));
+
+  fprintf (file, "%*stune = %d (%s)\n",
+	   indent, "",
+	   ptr->tune,
+	   ((ptr->tune < TARGET_CPU_DEFAULT_max)
+	    ? cpu_names[ptr->tune]
+	    : "<unknown>"));
+
+  fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
+	   (ptr->fpmath & FPMATH_387) ? ", 387" : "",
+	   (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
+  fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
+
+  if (target_string)
+    {
+      fprintf (file, "%*s%s\n", indent, "", target_string);
+      free (target_string);
+    }
+}
+
+
+/* Inner function to process the attribute((target(...))), take an argument and
+   set the current options from the argument. If we have a list, recursively go
+   over the list.  */
+
+static bool
+ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
+{
+  char *next_optstr;
+  bool ret = true;
+
+#define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
+#define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
+#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
+#define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
+
+  enum ix86_opt_type
+  {
+    ix86_opt_unknown,
+    ix86_opt_yes,
+    ix86_opt_no,
+    ix86_opt_str,
+    ix86_opt_isa
+  };
+
+  static const struct
+  {
+    const char *string;
+    size_t len;
+    enum ix86_opt_type type;
+    int opt;
+    int mask;
+  } attrs[] = {
+    /* isa options */
+    IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
+    IX86_ATTR_ISA ("abm",	OPT_mabm),
+    IX86_ATTR_ISA ("bmi",	OPT_mbmi),
+    IX86_ATTR_ISA ("tbm",	OPT_mtbm),
+    IX86_ATTR_ISA ("aes",	OPT_maes),
+    IX86_ATTR_ISA ("avx",	OPT_mavx),
+    IX86_ATTR_ISA ("mmx",	OPT_mmmx),
+    IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
+    IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
+    IX86_ATTR_ISA ("sse",	OPT_msse),
+    IX86_ATTR_ISA ("sse2",	OPT_msse2),
+    IX86_ATTR_ISA ("sse3",	OPT_msse3),
+    IX86_ATTR_ISA ("sse4",	OPT_msse4),
+    IX86_ATTR_ISA ("sse4.1",	OPT_msse4_1),
+    IX86_ATTR_ISA ("sse4.2",	OPT_msse4_2),
+    IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
+    IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
+    IX86_ATTR_ISA ("fma4",	OPT_mfma4),
+    IX86_ATTR_ISA ("xop",	OPT_mxop),
+    IX86_ATTR_ISA ("lwp",	OPT_mlwp),
+    IX86_ATTR_ISA ("fsgsbase",	OPT_mfsgsbase),
+    IX86_ATTR_ISA ("rdrnd",	OPT_mrdrnd),
+    IX86_ATTR_ISA ("f16c",	OPT_mf16c),
+
+    /* string options */
+    IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
+    IX86_ATTR_STR ("fpmath=",	IX86_FUNCTION_SPECIFIC_FPMATH),
+    IX86_ATTR_STR ("tune=",	IX86_FUNCTION_SPECIFIC_TUNE),
+
+    /* flag options */
+    IX86_ATTR_YES ("cld",
+		   OPT_mcld,
+		   MASK_CLD),
+
+    IX86_ATTR_NO ("fancy-math-387",
+		  OPT_mfancy_math_387,
+		  MASK_NO_FANCY_MATH_387),
+
+    IX86_ATTR_YES ("ieee-fp",
+		   OPT_mieee_fp,
+		   MASK_IEEE_FP),
+
+    IX86_ATTR_YES ("inline-all-stringops",
+		   OPT_minline_all_stringops,
+		   MASK_INLINE_ALL_STRINGOPS),
+
+    IX86_ATTR_YES ("inline-stringops-dynamically",
+		   OPT_minline_stringops_dynamically,
+		   MASK_INLINE_STRINGOPS_DYNAMICALLY),
+
+    IX86_ATTR_NO ("align-stringops",
+		  OPT_mno_align_stringops,
+		  MASK_NO_ALIGN_STRINGOPS),
+
+    IX86_ATTR_YES ("recip",
+		   OPT_mrecip,
+		   MASK_RECIP),
+
+  };
+
+  /* If this is a list, recurse to get the options.  */
+  if (TREE_CODE (args) == TREE_LIST)
+    {
+      bool ret = true;
+
+      for (; args; args = TREE_CHAIN (args))
+	if (TREE_VALUE (args)
+	    && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
+	  ret = false;
+
+      return ret;
+    }
+
+  else if (TREE_CODE (args) != STRING_CST)
+    gcc_unreachable ();
+
+  /* Handle multiple arguments separated by commas.  */
+  next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
+
+  while (next_optstr && *next_optstr != '\0')
+    {
+      char *p = next_optstr;
+      char *orig_p = p;
+      char *comma = strchr (next_optstr, ',');
+      const char *opt_string;
+      size_t len, opt_len;
+      int opt;
+      bool opt_set_p;
+      char ch;
+      unsigned i;
+      enum ix86_opt_type type = ix86_opt_unknown;
+      int mask = 0;
+
+      if (comma)
+	{
+	  *comma = '\0';
+	  len = comma - next_optstr;
+	  next_optstr = comma + 1;
+	}
+      else
+	{
+	  len = strlen (p);
+	  next_optstr = NULL;
+	}
+
+      /* Recognize no-xxx.  */
+      if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
+	{
+	  opt_set_p = false;
+	  p += 3;
+	  len -= 3;
+	}
+      else
+	opt_set_p = true;
+
+      /* Find the option.  */
+      ch = *p;
+      opt = N_OPTS;
+      for (i = 0; i < ARRAY_SIZE (attrs); i++)
+	{
+	  type = attrs[i].type;
+	  opt_len = attrs[i].len;
+	  if (ch == attrs[i].string[0]
+	      && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
+	      && memcmp (p, attrs[i].string, opt_len) == 0)
+	    {
+	      opt = attrs[i].opt;
+	      mask = attrs[i].mask;
+	      opt_string = attrs[i].string;
+	      break;
+	    }
+	}
+
+      /* Process the option.  */
+      if (opt == N_OPTS)
+	{
+	  error ("attribute(target(\"%s\")) is unknown", orig_p);
+	  ret = false;
+	}
+
+      else if (type == ix86_opt_isa)
+	ix86_handle_option (opt, p, opt_set_p);
+
+      else if (type == ix86_opt_yes || type == ix86_opt_no)
+	{
+	  if (type == ix86_opt_no)
+	    opt_set_p = !opt_set_p;
+
+	  if (opt_set_p)
+	    target_flags |= mask;
+	  else
+	    target_flags &= ~mask;
+	}
+
+      else if (type == ix86_opt_str)
+	{
+	  if (p_strings[opt])
+	    {
+	      error ("option(\"%s\") was already specified", opt_string);
+	      ret = false;
+	    }
+	  else
+	    p_strings[opt] = xstrdup (p + opt_len);
+	}
+
+      else
+	gcc_unreachable ();
+    }
+
+  return ret;
+}
+
+/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
+
+tree
+ix86_valid_target_attribute_tree (tree args)
+{
+  const char *orig_arch_string = ix86_arch_string;
+  const char *orig_tune_string = ix86_tune_string;
+  const char *orig_fpmath_string = ix86_fpmath_string;
+  int orig_tune_defaulted = ix86_tune_defaulted;
+  int orig_arch_specified = ix86_arch_specified;
+  char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
+  tree t = NULL_TREE;
+  int i;
+  struct cl_target_option *def
+    = TREE_TARGET_OPTION (target_option_default_node);
+
+  /* Process each of the options on the chain.  */
+  if (! ix86_valid_target_attribute_inner_p (args, option_strings))
+    return NULL_TREE;
+
+  /* If the changed options are different from the default, rerun
+     ix86_option_override_internal, and then save the options away.
+     The string options are are attribute options, and will be undone
+     when we copy the save structure.  */
+  if (ix86_isa_flags != def->x_ix86_isa_flags
+      || target_flags != def->x_target_flags
+      || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
+      || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
+      || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
+    {
+      /* If we are using the default tune= or arch=, undo the string assigned,
+	 and use the default.  */
+      if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
+	ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
+      else if (!orig_arch_specified)
+	ix86_arch_string = NULL;
+
+      if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
+	ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
+      else if (orig_tune_defaulted)
+	ix86_tune_string = NULL;
+
+      /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
+      if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
+	ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
+      else if (!TARGET_64BIT && TARGET_SSE)
+	ix86_fpmath_string = "sse,387";
+
+      /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
+      ix86_option_override_internal (false);
+
+      /* Add any builtin functions with the new isa if any.  */
+      ix86_add_new_builtins (ix86_isa_flags);
+
+      /* Save the current options unless we are validating options for
+	 #pragma.  */
+      t = build_target_option_node ();
+
+      ix86_arch_string = orig_arch_string;
+      ix86_tune_string = orig_tune_string;
+      ix86_fpmath_string = orig_fpmath_string;
+
+      /* Free up memory allocated to hold the strings */
+      for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
+	if (option_strings[i])
+	  free (option_strings[i]);
+    }
+
+  return t;
+}
+
+/* Hook to validate attribute((target("string"))).  */
+
+static bool
+ix86_valid_target_attribute_p (tree fndecl,
+			       tree ARG_UNUSED (name),
+			       tree args,
+			       int ARG_UNUSED (flags))
+{
+  struct cl_target_option cur_target;
+  bool ret = true;
+  tree old_optimize = build_optimization_node ();
+  tree new_target, new_optimize;
+  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  /* If the function changed the optimization levels as well as setting target
+     options, start with the optimizations specified.  */
+  if (func_optimize && func_optimize != old_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (func_optimize));
+
+  /* The target attributes may also change some optimization flags, so update
+     the optimization options if necessary.  */
+  cl_target_option_save (&cur_target, &global_options);
+  new_target = ix86_valid_target_attribute_tree (args);
+  new_optimize = build_optimization_node ();
+
+  if (!new_target)
+    ret = false;
+
+  else if (fndecl)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+      if (old_optimize != new_optimize)
+	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+    }
+
+  cl_target_option_restore (&global_options, &cur_target);
+
+  if (old_optimize != new_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (old_optimize));
+
+  return ret;
+}
+
+
+/* Hook to determine if one function can safely inline another.  */
+
+static bool
+ix86_can_inline_p (tree caller, tree callee)
+{
+  bool ret = false;
+  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+  /* If callee has no option attributes, then it is ok to inline.  */
+  if (!callee_tree)
+    ret = true;
+
+  /* If caller has no option attributes, but callee does then it is not ok to
+     inline.  */
+  else if (!caller_tree)
+    ret = false;
+
+  else
+    {
+      struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+
+      /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
+	 can inline a SSE2 function but a SSE2 function can't inline a SSE4
+	 function.  */
+      if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
+	  != callee_opts->x_ix86_isa_flags)
+	ret = false;
+
+      /* See if we have the same non-isa options.  */
+      else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
+	ret = false;
+
+      /* See if arch, tune, etc. are the same.  */
+      else if (caller_opts->arch != callee_opts->arch)
+	ret = false;
+
+      else if (caller_opts->tune != callee_opts->tune)
+	ret = false;
+
+      else if (caller_opts->fpmath != callee_opts->fpmath)
+	ret = false;
+
+      else if (caller_opts->branch_cost != callee_opts->branch_cost)
+	ret = false;
+
+      else
+	ret = true;
+    }
+
+  return ret;
+}
+
+
+/* Remember the last target of ix86_set_current_function.  */
+static GTY(()) tree ix86_previous_fndecl;
+
+/* Establish appropriate back-end context for processing the function
+   FNDECL.  The argument might be NULL to indicate processing at top
+   level, outside of any function scope.  */
+static void
+ix86_set_current_function (tree fndecl)
+{
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want to
+     slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl && fndecl != ix86_previous_fndecl)
+    {
+      tree old_tree = (ix86_previous_fndecl
+		       ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
+		       : NULL_TREE);
+
+      tree new_tree = (fndecl
+		       ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+		       : NULL_TREE);
+
+      ix86_previous_fndecl = fndecl;
+      if (old_tree == new_tree)
+	;
+
+      else if (new_tree)
+	{
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  target_reinit ();
+	}
+
+      else if (old_tree)
+	{
+	  struct cl_target_option *def
+	    = TREE_TARGET_OPTION (target_option_current_node);
+
+	  cl_target_option_restore (&global_options, def);
+	  target_reinit ();
+	}
+    }
+}
+
+
+/* Return true if this goes in large data/bss.  */
+
+static bool
+ix86_in_large_data_p (tree exp)
+{
+  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
+    return false;
+
+  /* Functions are never large data.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+      if (strcmp (section, ".ldata") == 0
+	  || strcmp (section, ".lbss") == 0)
+	return true;
+      return false;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in data because it might be too big when completed.  */
+      if (!size || size > ix86_section_threshold)
+	return true;
+    }
+
+  return false;
+}
+
+/* Switch to the appropriate section for output of DECL.
+   DECL is either a `VAR_DECL' node or a constant of some sort.
+   RELOC indicates whether forming the initial value of DECL requires
+   link-time relocations.  */
+
+static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
+	ATTRIBUTE_UNUSED;
+
+static section *
+x86_64_elf_select_section (tree decl, int reloc,
+			   unsigned HOST_WIDE_INT align)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && ix86_in_large_data_p (decl))
+    {
+      const char *sname = NULL;
+      unsigned int flags = SECTION_WRITE;
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_DATA:
+	  sname = ".ldata";
+	  break;
+	case SECCAT_DATA_REL:
+	  sname = ".ldata.rel";
+	  break;
+	case SECCAT_DATA_REL_LOCAL:
+	  sname = ".ldata.rel.local";
+	  break;
+	case SECCAT_DATA_REL_RO:
+	  sname = ".ldata.rel.ro";
+	  break;
+	case SECCAT_DATA_REL_RO_LOCAL:
+	  sname = ".ldata.rel.ro.local";
+	  break;
+	case SECCAT_BSS:
+	  sname = ".lbss";
+	  flags |= SECTION_BSS;
+	  break;
+	case SECCAT_RODATA:
+	case SECCAT_RODATA_MERGE_STR:
+	case SECCAT_RODATA_MERGE_STR_INIT:
+	case SECCAT_RODATA_MERGE_CONST:
+	  sname = ".lrodata";
+	  flags = 0;
+	  break;
+	case SECCAT_SRODATA:
+	case SECCAT_SDATA:
+	case SECCAT_SBSS:
+	  gcc_unreachable ();
+	case SECCAT_TEXT:
+	case SECCAT_TDATA:
+	case SECCAT_TBSS:
+	  /* We don't split these for medium model.  Place them into
+	     default sections and hope for best.  */
+	  break;
+	}
+      if (sname)
+	{
+	  /* We might get called with string constants, but get_named_section
+	     doesn't like them as they are not DECLs.  Also, we need to set
+	     flags in that case.  */
+	  if (!DECL_P (decl))
+	    return get_section (sname, flags, NULL);
+	  return get_named_section (decl, sname, reloc);
+	}
+    }
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Build up a unique section name, expressed as a
+   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+   RELOC indicates whether the initial value of EXP requires
+   link-time relocations.  */
+
+static void ATTRIBUTE_UNUSED
+x86_64_elf_unique_section (tree decl, int reloc)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && ix86_in_large_data_p (decl))
+    {
+      const char *prefix = NULL;
+      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
+      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
+
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_DATA:
+	case SECCAT_DATA_REL:
+	case SECCAT_DATA_REL_LOCAL:
+	case SECCAT_DATA_REL_RO:
+	case SECCAT_DATA_REL_RO_LOCAL:
+          prefix = one_only ? ".ld" : ".ldata";
+	  break;
+	case SECCAT_BSS:
+          prefix = one_only ? ".lb" : ".lbss";
+	  break;
+	case SECCAT_RODATA:
+	case SECCAT_RODATA_MERGE_STR:
+	case SECCAT_RODATA_MERGE_STR_INIT:
+	case SECCAT_RODATA_MERGE_CONST:
+          prefix = one_only ? ".lr" : ".lrodata";
+	  break;
+	case SECCAT_SRODATA:
+	case SECCAT_SDATA:
+	case SECCAT_SBSS:
+	  gcc_unreachable ();
+	case SECCAT_TEXT:
+	case SECCAT_TDATA:
+	case SECCAT_TBSS:
+	  /* We don't split these for medium model.  Place them into
+	     default sections and hope for best.  */
+	  break;
+	}
+      if (prefix)
+	{
+	  const char *name, *linkonce;
+	  char *string;
+
+	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+	  name = targetm.strip_name_encoding (name);
+
+	  /* If we're using one_only, then there needs to be a .gnu.linkonce
+     	     prefix to the section name.  */
+	  linkonce = one_only ? ".gnu.linkonce" : "";
+
+	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
+
+	  DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
+	  return;
+	}
+    }
+  default_unique_section (decl, reloc);
+}
+
+#ifdef COMMON_ASM_OP
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.
+
+   For medium model x86-64 we need to use .largecomm opcode for
+   large objects.  */
+void
+x86_elf_aligned_common (FILE *file,
+			const char *name, unsigned HOST_WIDE_INT size,
+			int align)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && size > (unsigned int)ix86_section_threshold)
+    fputs (".largecomm\t", file);
+  else
+    fputs (COMMON_ASM_OP, file);
+  assemble_name (file, name);
+  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
+	   size, align / BITS_PER_UNIT);
+}
+#endif
+
+/* Utility function for targets to use in implementing
+   ASM_OUTPUT_ALIGNED_BSS.  */
+
+void
+x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
+			const char *name, unsigned HOST_WIDE_INT size,
+			int align)
+{
+  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+      && size > (unsigned int)ix86_section_threshold)
+    switch_to_section (get_named_section (decl, ".lbss", 0));
+  else
+    switch_to_section (bss_section);
+  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+  last_assemble_variable_decl = decl;
+  ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+  /* Standard thing is just output label for the object.  */
+  ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+  ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+
+static const struct default_options ix86_option_optimization_table[] =
+  {
+    /* Turn off -fschedule-insns by default.  It tends to make the
+       problem with not enough registers even worse.  */
+#ifdef INSN_SCHEDULING
+    { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
+#endif
+
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+    SUBTARGET_OPTIMIZATION_OPTIONS,
+#endif
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Implement TARGET_OPTION_INIT_STRUCT.  */
+
+static void
+ix86_option_init_struct (struct gcc_options *opts)
+{
+  if (TARGET_MACHO)
+    /* The Darwin libraries never set errno, so we might as well
+       avoid calling them when that's the only reason we would.  */
+    opts->x_flag_errno_math = 0;
+
+  opts->x_flag_pcc_struct_return = 2;
+  opts->x_flag_asynchronous_unwind_tables = 2;
+  opts->x_flag_vect_cost_model = 1;
+}
+
+/* Decide whether we must probe the stack before any space allocation
+   on this target.  It's essentially TARGET_STACK_PROBE except when
+   -fstack-check causes the stack to be already probed differently.  */
+
+bool
+ix86_target_stack_probe (void)
+{
+  /* Do not probe the stack twice if static stack checking is enabled.  */
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    return false;
+
+  return TARGET_STACK_PROBE;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+
+static bool
+ix86_function_ok_for_sibcall (tree decl, tree exp)
+{
+  tree type, decl_or_type;
+  rtx a, b;
+
+  /* If we are generating position-independent code, we cannot sibcall
+     optimize any indirect call, or a direct call to a global function,
+     as the PLT requires %ebx be live. (Darwin does not have a PLT.)  */
+  if (!TARGET_MACHO
+      && !TARGET_64BIT 
+      && flag_pic 
+      && (!decl || !targetm.binds_local_p (decl)))
+    return false;
+
+  /* If we need to align the outgoing stack, then sibcalling would
+     unalign the stack, which may break the called function.  */
+  if (ix86_minimum_incoming_stack_boundary (true)
+      < PREFERRED_STACK_BOUNDARY)
+    return false;
+
+  if (decl)
+    {
+      decl_or_type = decl;
+      type = TREE_TYPE (decl);
+    }
+  else
+    {
+      /* We're looking at the CALL_EXPR, we need the type of the function.  */
+      type = CALL_EXPR_FN (exp);		/* pointer expression */
+      type = TREE_TYPE (type);			/* pointer type */
+      type = TREE_TYPE (type);			/* function type */
+      decl_or_type = type;
+    }
+
+  /* Check that the return value locations are the same.  Like
+     if we are returning floats on the 80387 register stack, we cannot
+     make a sibcall from a function that doesn't return a float to a
+     function that does or, conversely, from a function that does return
+     a float to a function that doesn't; the necessary stack adjustment
+     would not be executed.  This is also the place we notice
+     differences in the return value ABI.  Note that it is ok for one
+     of the functions to have void return type as long as the return
+     value of the other is passed in a register.  */
+  a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
+  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+			   cfun->decl, false);
+  if (STACK_REG_P (a) || STACK_REG_P (b))
+    {
+      if (!rtx_equal_p (a, b))
+	return false;
+    }
+  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Disable sibcall if we need to generate vzeroupper after
+	 callee returns.  */
+      if (TARGET_VZEROUPPER
+	  && cfun->machine->callee_return_avx256_p
+	  && !cfun->machine->caller_return_avx256_p)
+	return false;
+    }
+  else if (!rtx_equal_p (a, b))
+    return false;
+
+  if (TARGET_64BIT)
+    {
+      /* The SYSV ABI has more call-clobbered registers;
+	 disallow sibcalls from MS to SYSV.  */
+      if (cfun->machine->call_abi == MS_ABI
+	  && ix86_function_type_abi (type) == SYSV_ABI)
+	return false;
+    }
+  else
+    {
+      /* If this call is indirect, we'll need to be able to use a
+	 call-clobbered register for the address of the target function.
+	 Make sure that all such registers are not used for passing
+	 parameters.  Note that DLLIMPORT functions are indirect.  */
+      if (!decl
+	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
+	{
+	  if (ix86_function_regparm (type, NULL) >= 3)
+	    {
+	      /* ??? Need to count the actual number of registers to be used,
+		 not the possible number of registers.  Fix later.  */
+	      return false;
+	    }
+	}
+    }
+
+  /* Otherwise okay.  That also includes certain types of indirect calls.  */
+  return true;
+}
+
+/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
+   and "sseregparm" calling convention attributes;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+ix86_handle_cconv_attribute (tree *node, tree name,
+				   tree args,
+				   int flags ATTRIBUTE_UNUSED,
+				   bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  /* Can combine regparm with all attributes but fastcall.  */
+  if (is_attribute_p ("regparm", name))
+    {
+      tree cst;
+
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and regparm attributes are not compatible");
+	}
+
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("regparam and thiscall attributes are not compatible");
+	}
+
+      cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != INTEGER_CST)
+	{
+	  warning (OPT_Wattributes,
+		   "%qE attribute requires an integer constant argument",
+		   name);
+	  *no_add_attrs = true;
+	}
+      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
+	{
+	  warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
+		   name, REGPARM_MAX);
+	  *no_add_attrs = true;
+	}
+
+      return NULL_TREE;
+    }
+
+  if (TARGET_64BIT)
+    {
+      /* Do not warn when emulating the MS ABI.  */
+      if ((TREE_CODE (*node) != FUNCTION_TYPE
+	   && TREE_CODE (*node) != METHOD_TYPE)
+	  || ix86_function_type_abi (*node) != MS_ABI)
+	warning (OPT_Wattributes, "%qE attribute ignored",
+	         name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
+  if (is_attribute_p ("fastcall", name))
+    {
+      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and stdcall attributes are not compatible");
+	}
+      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and regparm attributes are not compatible");
+	}
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("fastcall and thiscall attributes are not compatible");
+	}
+    }
+
+  /* Can combine stdcall with fastcall (redundant), regparm and
+     sseregparm.  */
+  else if (is_attribute_p ("stdcall", name))
+    {
+      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("stdcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("stdcall and fastcall attributes are not compatible");
+	}
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("stdcall and thiscall attributes are not compatible");
+	}
+    }
+
+  /* Can combine cdecl with regparm and sseregparm.  */
+  else if (is_attribute_p ("cdecl", name))
+    {
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("stdcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("fastcall and cdecl attributes are not compatible");
+	}
+      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("cdecl and thiscall attributes are not compatible");
+	}
+    }
+  else if (is_attribute_p ("thiscall", name))
+    {
+      if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
+	warning (OPT_Wattributes, "%qE attribute is used for none class-method",
+	         name);
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("stdcall and thiscall attributes are not compatible");
+	}
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("fastcall and thiscall attributes are not compatible");
+	}
+      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("cdecl and thiscall attributes are not compatible");
+	}
+    }
+
+  /* Can combine sseregparm with all attributes.  */
+
+  return NULL_TREE;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+   are compatible, and 2 if they are nearly compatible (which causes a
+   warning to be generated).  */
+
+static int
+ix86_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  /* Check for mismatch of non-default calling convention.  */
+  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
+
+  if (TREE_CODE (type1) != FUNCTION_TYPE
+      && TREE_CODE (type1) != METHOD_TYPE)
+    return 1;
+
+  /* Check for mismatched fastcall/regparm types.  */
+  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
+       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
+      || (ix86_function_regparm (type1, NULL)
+	  != ix86_function_regparm (type2, NULL)))
+    return 0;
+
+  /* Check for mismatched sseregparm types.  */
+  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  /* Check for mismatched thiscall types.  */
+  if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  /* Check for mismatched return types (cdecl vs stdcall).  */
+  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
+      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
+    return 0;
+
+  return 1;
+}
+
+/* Return the regparm value for a function with the indicated TYPE and DECL.
+   DECL may be NULL when calling function indirectly
+   or considering a libcall.  */
+
+static int
+ix86_function_regparm (const_tree type, const_tree decl)
+{
+  tree attr;
+  int regparm;
+
+  if (TARGET_64BIT)
+    return (ix86_function_type_abi (type) == SYSV_ABI
+	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
+
+  regparm = ix86_regparm;
+  attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+  if (attr)
+    {
+      regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+      return regparm;
+    }
+
+  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+    return 2;
+
+  if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
+    return 1;
+
+  /* Use register calling convention for local functions when possible.  */
+  if (decl
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && optimize
+      && !(profile_flag && !flag_fentry))
+    {
+      /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
+      struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
+      if (i && i->local && i->can_change_signature)
+	{
+	  int local_regparm, globals = 0, regno;
+
+	  /* Make sure no regparm register is taken by a
+	     fixed register variable.  */
+	  for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
+	    if (fixed_regs[local_regparm])
+	      break;
+
+	  /* We don't want to use regparm(3) for nested functions as
+	     these use a static chain pointer in the third argument.  */
+	  if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
+	    local_regparm = 2;
+
+	  /* In 32-bit mode save a register for the split stack.  */
+	  if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
+	    local_regparm = 2;
+
+	  /* Each fixed register usage increases register pressure,
+	     so less registers should be used for argument passing.
+	     This functionality can be overriden by an explicit
+	     regparm value.  */
+	  for (regno = 0; regno <= DI_REG; regno++)
+	    if (fixed_regs[regno])
+	      globals++;
+
+	  local_regparm
+	    = globals < local_regparm ? local_regparm - globals : 0;
+
+	  if (local_regparm > regparm)
+	    regparm = local_regparm;
+	}
+    }
+
+  return regparm;
+}
+
+/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
+   DFmode (2) arguments in SSE registers for a function with the
+   indicated TYPE and DECL.  DECL may be NULL when calling function
+   indirectly or considering a libcall.  Otherwise return 0.  */
+
+static int
+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
+{
+  gcc_assert (!TARGET_64BIT);
+
+  /* Use SSE registers to pass SFmode and DFmode arguments if requested
+     by the sseregparm attribute.  */
+  if (TARGET_SSEREGPARM
+      || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
+    {
+      if (!TARGET_SSE)
+	{
+	  if (warn)
+	    {
+	      if (decl)
+		error ("calling %qD with attribute sseregparm without "
+		       "SSE/SSE2 enabled", decl);
+	      else
+		error ("calling %qT with attribute sseregparm without "
+		       "SSE/SSE2 enabled", type);
+	    }
+	  return 0;
+	}
+
+      return 2;
+    }
+
+  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
+     (and DFmode for SSE2) arguments in SSE registers.  */
+  if (decl && TARGET_SSE_MATH && optimize
+      && !(profile_flag && !flag_fentry))
+    {
+      /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
+      struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+      if (i && i->local && i->can_change_signature)
+	return TARGET_SSE2 ? 2 : 1;
+    }
+
+  return 0;
+}
+
+/* Return true if EAX is live at the start of the function.  Used by
+   ix86_expand_prologue to determine if we need special help before
+   calling allocate_stack_worker.  */
+
+static bool
+ix86_eax_live_at_start_p (void)
+{
+  /* Cheat.  Don't bother working forward from ix86_function_regparm
+     to the function type to whether an actual argument is located in
+     eax.  Instead just look at cfg info, which is still close enough
+     to correct at this point.  This gives false positives for broken
+     functions that might use uninitialized data that happens to be
+     allocated in eax, but who cares?  */
+  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
+}
+
+static bool
+ix86_keep_aggregate_return_pointer (tree fntype)
+{
+  tree attr;
+
+  attr = lookup_attribute ("callee_pop_aggregate_return",
+			   TYPE_ATTRIBUTES (fntype));
+  if (attr)
+    return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
+
+  return KEEP_AGGREGATE_RETURN_POINTER != 0;
+}
+
+/* Value is the number of bytes of arguments automatically
+   popped when returning from a subroutine call.
+   FUNDECL is the declaration node of the function (as a tree),
+   FUNTYPE is the data type of the function (as a tree),
+   or for a library call it is an identifier node for the subroutine name.
+   SIZE is the number of bytes of arguments passed on the stack.
+
+   On the 80386, the RTD insn may be used to pop them if the number
+     of args is fixed, but if the number is variable then the caller
+     must pop them all.  RTD can't be used for library calls now
+     because the library is compiled with the Unix compiler.
+   Use of RTD is a selectable option, since it is incompatible with
+   standard Unix calling sequences.  If the option is not selected,
+   the caller must always pop the args.
+
+   The attribute stdcall is equivalent to RTD on a per module basis.  */
+
+static int
+ix86_return_pops_args (tree fundecl, tree funtype, int size)
+{
+  int rtd;
+
+  /* None of the 64-bit ABIs pop arguments.  */
+  if (TARGET_64BIT)
+    return 0;
+
+  rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
+
+  /* Cdecl functions override -mrtd, and never pop the stack.  */
+  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
+    {
+      /* Stdcall and fastcall functions will pop the stack if not
+         variable args.  */
+      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
+	  || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
+          || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
+	rtd = 1;
+
+      if (rtd && ! stdarg_p (funtype))
+	return size;
+    }
+
+  /* Lose any fake structure return argument if it is passed on the stack.  */
+  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
+      && !ix86_keep_aggregate_return_pointer (funtype))
+    {
+      int nregs = ix86_function_regparm (funtype, fundecl);
+      if (nregs == 0)
+	return GET_MODE_SIZE (Pmode);
+    }
+
+  return 0;
+}
+
+/* Argument support functions.  */
+
+/* Return true when register may be used to pass function parameters.  */
+bool
+ix86_function_arg_regno_p (int regno)
+{
+  int i;
+  const int *parm_regs;
+
+  if (!TARGET_64BIT)
+    {
+      if (TARGET_MACHO)
+        return (regno < REGPARM_MAX
+                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
+      else
+        return (regno < REGPARM_MAX
+	        || (TARGET_MMX && MMX_REGNO_P (regno)
+	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
+	        || (TARGET_SSE && SSE_REGNO_P (regno)
+		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
+    }
+
+  if (TARGET_MACHO)
+    {
+      if (SSE_REGNO_P (regno) && TARGET_SSE)
+        return true;
+    }
+  else
+    {
+      if (TARGET_SSE && SSE_REGNO_P (regno)
+          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
+        return true;
+    }
+
+  /* TODO: The function should depend on current function ABI but
+     builtins.c would need updating then. Therefore we use the
+     default ABI.  */
+
+  /* RAX is used as hidden argument to va_arg functions.  */
+  if (ix86_abi == SYSV_ABI && regno == AX_REG)
+    return true;
+
+  if (ix86_abi == MS_ABI)
+    parm_regs = x86_64_ms_abi_int_parameter_registers;
+  else
+    parm_regs = x86_64_int_parameter_registers;
+  for (i = 0; i < (ix86_abi == MS_ABI
+		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
+    if (regno == parm_regs[i])
+      return true;
+  return false;
+}
+
+/* Return if we do not know how to pass TYPE solely in registers.  */
+
+static bool
+ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (must_pass_in_stack_var_size_or_pad (mode, type))
+    return true;
+
+  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
+     The layout_type routine is crafty and tries to trick us into passing
+     currently unsupported vector types on the stack by using TImode.  */
+  return (!TARGET_64BIT && mode == TImode
+	  && type && TREE_CODE (type) != VECTOR_TYPE);
+}
+
+/* It returns the size, in bytes, of the area reserved for arguments passed
+   in registers for the function represented by fndecl dependent to the used
+   abi format.  */
+int
+ix86_reg_parm_stack_space (const_tree fndecl)
+{
+  enum calling_abi call_abi = SYSV_ABI;
+  if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
+    call_abi = ix86_function_abi (fndecl);
+  else
+    call_abi = ix86_function_type_abi (fndecl);
+  if (call_abi == MS_ABI)
+    return 32;
+  return 0;
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
+   call abi used.  */
+enum calling_abi
+ix86_function_type_abi (const_tree fntype)
+{
+  if (TARGET_64BIT && fntype != NULL)
+    {
+      enum calling_abi abi = ix86_abi;
+      if (abi == SYSV_ABI)
+	{
+	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
+	    abi = MS_ABI;
+	}
+      else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
+	abi = SYSV_ABI;
+      return abi;
+    }
+  return ix86_abi;
+}
+
+static bool
+ix86_function_ms_hook_prologue (const_tree fn)
+{
+  if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
+    {
+      if (decl_function_context (fn) != NULL_TREE)
+	error_at (DECL_SOURCE_LOCATION (fn),
+		  "ms_hook_prologue is not compatible with nested function");
+      else
+        return true;
+    }
+  return false;
+}
+
+static enum calling_abi
+ix86_function_abi (const_tree fndecl)
+{
+  if (! fndecl)
+    return ix86_abi;
+  return ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
+   call abi used.  */
+enum calling_abi
+ix86_cfun_abi (void)
+{
+  if (! cfun || ! TARGET_64BIT)
+    return ix86_abi;
+  return cfun->machine->call_abi;
+}
+
+/* Write the extra assembler code needed to declare a function properly.  */
+
+void
+ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
+				tree decl)
+{
+  bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
+
+  if (is_ms_hook)
+    {
+      int i, filler_count = (TARGET_64BIT ? 32 : 16);
+      unsigned int filler_cc = 0xcccccccc;
+
+      for (i = 0; i < filler_count; i += 4)
+        fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
+    }
+
+#ifdef SUBTARGET_ASM_UNWIND_INIT
+  SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
+#endif
+
+  ASM_OUTPUT_LABEL (asm_out_file, fname);
+
+  /* Output magic byte marker, if hot-patch attribute is set.  */
+  if (is_ms_hook)
+    {
+      if (TARGET_64BIT)
+	{
+	  /* leaq [%rsp + 0], %rsp  */
+	  asm_fprintf (asm_out_file, ASM_BYTE
+		       "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
+	}
+      else
+	{
+          /* movl.s %edi, %edi
+	     push   %ebp
+	     movl.s %esp, %ebp */
+	  asm_fprintf (asm_out_file, ASM_BYTE
+		       "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
+	}
+    }
+}
+
+/* regclass.c  */
+extern void init_regs (void);
+
+/* Implementation of call abi switching target hook. Specific to FNDECL
+   the specific call register sets are set.  See also
+   ix86_conditional_register_usage for more details.  */
+void
+ix86_call_abi_override (const_tree fndecl)
+{
+  if (fndecl == NULL_TREE)
+    cfun->machine->call_abi = ix86_abi;
+  else
+    cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* MS and SYSV ABI have different set of call used registers.  Avoid expensive
+   re-initialization of init_regs each time we switch function context since
+   this is needed only during RTL expansion.  */
+static void
+ix86_maybe_switch_abi (void)
+{
+  if (TARGET_64BIT &&
+      call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
+    reinit_regs ();
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
+		      tree fntype,	/* tree ptr for function decl */
+		      rtx libname,	/* SYMBOL_REF of library name or 0 */
+		      tree fndecl,
+		      int caller)
+{
+  struct cgraph_local_info *i;
+  tree fnret_type;
+
+  memset (cum, 0, sizeof (*cum));
+
+  /* Initialize for the current callee.  */
+  if (caller)
+    {
+      cfun->machine->callee_pass_avx256_p = false;
+      cfun->machine->callee_return_avx256_p = false;
+    }
+
+  if (fndecl)
+    {
+      i = cgraph_local_info (fndecl);
+      cum->call_abi = ix86_function_abi (fndecl);
+      fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
+    }
+  else
+    {
+      i = NULL;
+      cum->call_abi = ix86_function_type_abi (fntype);
+      if (fntype)
+	fnret_type = TREE_TYPE (fntype);
+      else
+	fnret_type = NULL;
+    }
+
+  if (TARGET_VZEROUPPER && fnret_type)
+    {
+      rtx fnret_value = ix86_function_value (fnret_type, fntype,
+					     false);
+      if (function_pass_avx256_p (fnret_value))
+	{
+	  /* The return value of this function uses 256bit AVX modes.  */
+	  if (caller)
+	    {
+	      cfun->machine->callee_return_avx256_p = true;
+	      cum->callee_return_avx256_p = true;
+	    }
+	  else
+	    cfun->machine->caller_return_avx256_p = true;
+	}
+    }
+
+  cum->caller = caller;
+
+  /* Set up the number of registers to use for passing arguments.  */
+
+  if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
+    sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
+	   "or subtarget optimization implying it");
+  cum->nregs = ix86_regparm;
+  if (TARGET_64BIT)
+    {
+      cum->nregs = (cum->call_abi == SYSV_ABI
+                   ? X86_64_REGPARM_MAX
+                   : X86_64_MS_REGPARM_MAX);
+    }
+  if (TARGET_SSE)
+    {
+      cum->sse_nregs = SSE_REGPARM_MAX;
+      if (TARGET_64BIT)
+        {
+          cum->sse_nregs = (cum->call_abi == SYSV_ABI
+                           ? X86_64_SSE_REGPARM_MAX
+                           : X86_64_MS_SSE_REGPARM_MAX);
+        }
+    }
+  if (TARGET_MMX)
+    cum->mmx_nregs = MMX_REGPARM_MAX;
+  cum->warn_avx = true;
+  cum->warn_sse = true;
+  cum->warn_mmx = true;
+
+  /* Because type might mismatch in between caller and callee, we need to
+     use actual type of function for local calls.
+     FIXME: cgraph_analyze can be told to actually record if function uses
+     va_start so for local functions maybe_vaarg can be made aggressive
+     helping K&R code.
+     FIXME: once typesytem is fixed, we won't need this code anymore.  */
+  if (i && i->local && i->can_change_signature)
+    fntype = TREE_TYPE (fndecl);
+  cum->maybe_vaarg = (fntype
+		      ? (!prototype_p (fntype) || stdarg_p (fntype))
+		      : !libname);
+
+  if (!TARGET_64BIT)
+    {
+      /* If there are variable arguments, then we won't pass anything
+         in registers in 32-bit mode. */
+      if (stdarg_p (fntype))
+	{
+	  cum->nregs = 0;
+	  cum->sse_nregs = 0;
+	  cum->mmx_nregs = 0;
+	  cum->warn_avx = 0;
+	  cum->warn_sse = 0;
+	  cum->warn_mmx = 0;
+	  return;
+	}
+
+      /* Use ecx and edx registers if function has fastcall attribute,
+	 else look for regparm information.  */
+      if (fntype)
+	{
+	  if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
+	    {
+	      cum->nregs = 1;
+	      cum->fastcall = 1; /* Same first register as in fastcall.  */
+	    }
+	  else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+	    {
+	      cum->nregs = 2;
+	      cum->fastcall = 1;
+	    }
+	  else
+	    cum->nregs = ix86_function_regparm (fntype, fndecl);
+	}
+
+      /* Set up the number of SSE registers used for passing SFmode
+	 and DFmode arguments.  Warn for mismatching ABI.  */
+      cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
+    }
+}
+
+/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
+   But in the case of vector types, it is some vector mode.
+
+   When we have only some of our vector isa extensions enabled, then there
+   are some modes for which vector_mode_supported_p is false.  For these
+   modes, the generic vector support in gcc will choose some non-vector mode
+   in order to implement the type.  By computing the natural mode, we'll
+   select the proper ABI location for the operand and not depend on whatever
+   the middle-end decides to do with these vector types.
+
+   The midde-end can't deal with the vector types > 16 bytes.  In this
+   case, we return the original mode and warn ABI change if CUM isn't
+   NULL.  */
+
+static enum machine_mode
+type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      if ((size == 8 || size == 16 || size == 32)
+	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
+	  && TYPE_VECTOR_SUBPARTS (type) > 1)
+	{
+	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
+
+	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+	    mode = MIN_MODE_VECTOR_FLOAT;
+	  else
+	    mode = MIN_MODE_VECTOR_INT;
+
+	  /* Get the mode which has this inner mode and number of units.  */
+	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
+	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
+		&& GET_MODE_INNER (mode) == innermode)
+	      {
+		if (size == 32 && !TARGET_AVX)
+		  {
+		    static bool warnedavx;
+
+		    if (cum
+			&& !warnedavx
+			&& cum->warn_avx)
+		      {
+			warnedavx = true;
+			warning (0, "AVX vector argument without AVX "
+				 "enabled changes the ABI");
+		      }
+		    return TYPE_MODE (type);
+		  }
+		else
+		  return mode;
+	      }
+
+	  gcc_unreachable ();
+	}
+    }
+
+  return mode;
+}
+
+/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
+   this may not agree with the mode that the type system has chosen for the
+   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
+   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
+
+static rtx
+gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
+		     unsigned int regno)
+{
+  rtx tmp;
+
+  if (orig_mode != BLKmode)
+    tmp = gen_rtx_REG (orig_mode, regno);
+  else
+    {
+      tmp = gen_rtx_REG (mode, regno);
+      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
+      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
+    }
+
+  return tmp;
+}
+
+/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
+   of this code is to classify each 8bytes of incoming argument by the register
+   class and assign registers accordingly.  */
+
+/* Return the union class of CLASS1 and CLASS2.
+   See the x86-64 PS ABI for details.  */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+  /* Rule #1: If both classes are equal, this is the resulting class.  */
+  if (class1 == class2)
+    return class1;
+
+  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+     the other class.  */
+  if (class1 == X86_64_NO_CLASS)
+    return class2;
+  if (class2 == X86_64_NO_CLASS)
+    return class1;
+
+  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
+  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
+  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+    return X86_64_INTEGERSI_CLASS;
+  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+    return X86_64_INTEGER_CLASS;
+
+  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     MEMORY is used.  */
+  if (class1 == X86_64_X87_CLASS
+      || class1 == X86_64_X87UP_CLASS
+      || class1 == X86_64_COMPLEX_X87_CLASS
+      || class2 == X86_64_X87_CLASS
+      || class2 == X86_64_X87UP_CLASS
+      || class2 == X86_64_COMPLEX_X87_CLASS)
+    return X86_64_MEMORY_CLASS;
+
+  /* Rule #6: Otherwise class SSE is used.  */
+  return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+   CLASSES will be filled by the register class used to pass each word
+   of the operand.  The number of words is returned.  In case the parameter
+   should be passed in memory, 0 is returned. As a special case for zero
+   sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+   BIT_OFFSET is used internally for handling records and specifies offset
+   of the offset in bits modulo 256 to avoid overflow cases.
+
+   See the x86-64 PS ABI for details.
+*/
+
+static int
+classify_argument (enum machine_mode mode, const_tree type,
+		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
+{
+  HOST_WIDE_INT bytes =
+    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* Variable sized entities are always passed/returned in memory.  */
+  if (bytes < 0)
+    return 0;
+
+  if (mode != VOIDmode
+      && targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  if (type && AGGREGATE_TYPE_P (type))
+    {
+      int i;
+      tree field;
+      enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+      /* On x86-64 we pass structures larger than 32 bytes on the stack.  */
+      if (bytes > 32)
+	return 0;
+
+      for (i = 0; i < words; i++)
+	classes[i] = X86_64_NO_CLASS;
+
+      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
+	 signalize memory class, so handle it as special case.  */
+      if (!words)
+	{
+	  classes[0] = X86_64_NO_CLASS;
+	  return 1;
+	}
+
+      /* Classify each field of record and merge classes.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	  /* And now merge the fields of structure.  */
+	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	    {
+	      if (TREE_CODE (field) == FIELD_DECL)
+		{
+		  int num;
+
+		  if (TREE_TYPE (field) == error_mark_node)
+		    continue;
+
+		  /* Bitfields are always classified as integer.  Handle them
+		     early, since later code would consider them to be
+		     misaligned integers.  */
+		  if (DECL_BIT_FIELD (field))
+		    {
+		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+			   i < ((int_bit_position (field) + (bit_offset % 64))
+			        + tree_low_cst (DECL_SIZE (field), 0)
+				+ 63) / 8 / 8; i++)
+			classes[i] =
+			  merge_classes (X86_64_INTEGER_CLASS,
+					 classes[i]);
+		    }
+		  else
+		    {
+		      int pos;
+
+		      type = TREE_TYPE (field);
+
+		      /* Flexible array member is ignored.  */
+		      if (TYPE_MODE (type) == BLKmode
+			  && TREE_CODE (type) == ARRAY_TYPE
+			  && TYPE_SIZE (type) == NULL_TREE
+			  && TYPE_DOMAIN (type) != NULL_TREE
+			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
+			      == NULL_TREE))
+			{
+			  static bool warned;
+
+			  if (!warned && warn_psabi)
+			    {
+			      warned = true;
+			      inform (input_location,
+				      "the ABI of passing struct with"
+				      " a flexible array member has"
+				      " changed in GCC 4.4");
+			    }
+			  continue;
+			}
+		      num = classify_argument (TYPE_MODE (type), type,
+					       subclasses,
+					       (int_bit_position (field)
+						+ bit_offset) % 256);
+		      if (!num)
+			return 0;
+		      pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+		      for (i = 0; i < num && (i + pos) < words; i++)
+			classes[i + pos] =
+			  merge_classes (subclasses[i], classes[i + pos]);
+		    }
+		}
+	    }
+	  break;
+
+	case ARRAY_TYPE:
+	  /* Arrays are handled as small records.  */
+	  {
+	    int num;
+	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
+				     TREE_TYPE (type), subclasses, bit_offset);
+	    if (!num)
+	      return 0;
+
+	    /* The partial classes are now full classes.  */
+	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
+	      subclasses[0] = X86_64_SSE_CLASS;
+	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
+		&& !((bit_offset % 64) == 0 && bytes == 4))
+	      subclasses[0] = X86_64_INTEGER_CLASS;
+
+	    for (i = 0; i < words; i++)
+	      classes[i] = subclasses[i % num];
+
+	    break;
+	  }
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  /* Unions are similar to RECORD_TYPE but offset is always 0.
+	     */
+	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	    {
+	      if (TREE_CODE (field) == FIELD_DECL)
+		{
+		  int num;
+
+		  if (TREE_TYPE (field) == error_mark_node)
+		    continue;
+
+		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+					   TREE_TYPE (field), subclasses,
+					   bit_offset);
+		  if (!num)
+		    return 0;
+		  for (i = 0; i < num; i++)
+		    classes[i] = merge_classes (subclasses[i], classes[i]);
+		}
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (words > 2)
+	{
+	  /* When size > 16 bytes, if the first one isn't
+	     X86_64_SSE_CLASS or any other ones aren't
+	     X86_64_SSEUP_CLASS, everything should be passed in
+	     memory.  */
+	  if (classes[0] != X86_64_SSE_CLASS)
+	      return 0;
+
+	  for (i = 1; i < words; i++)
+	    if (classes[i] != X86_64_SSEUP_CLASS)
+	      return 0;
+	}
+
+      /* Final merger cleanup.  */
+      for (i = 0; i < words; i++)
+	{
+	  /* If one class is MEMORY, everything should be passed in
+	     memory.  */
+	  if (classes[i] == X86_64_MEMORY_CLASS)
+	    return 0;
+
+	  /* The X86_64_SSEUP_CLASS should be always preceded by
+	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
+	  if (classes[i] == X86_64_SSEUP_CLASS
+	      && classes[i - 1] != X86_64_SSE_CLASS
+	      && classes[i - 1] != X86_64_SSEUP_CLASS)
+	    {
+	      /* The first one should never be X86_64_SSEUP_CLASS.  */
+	      gcc_assert (i != 0);
+	      classes[i] = X86_64_SSE_CLASS;
+	    }
+
+	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+	       everything should be passed in memory.  */
+	  if (classes[i] == X86_64_X87UP_CLASS
+	      && (classes[i - 1] != X86_64_X87_CLASS))
+	    {
+	      static bool warned;
+
+	      /* The first one should never be X86_64_X87UP_CLASS.  */
+	      gcc_assert (i != 0);
+	      if (!warned && warn_psabi)
+		{
+		  warned = true;
+		  inform (input_location,
+			  "the ABI of passing union with long double"
+			  " has changed in GCC 4.4");
+		}
+	      return 0;
+	    }
+	}
+      return words;
+    }
+
+  /* Compute alignment needed.  We align all types to natural boundaries with
+     exception of XFmode that is aligned to 64bits.  */
+  if (mode != VOIDmode && mode != BLKmode)
+    {
+      int mode_alignment = GET_MODE_BITSIZE (mode);
+
+      if (mode == XFmode)
+	mode_alignment = 128;
+      else if (mode == XCmode)
+	mode_alignment = 256;
+      if (COMPLEX_MODE_P (mode))
+	mode_alignment /= 2;
+      /* Misaligned fields are always returned in memory.  */
+      if (bit_offset % mode_alignment)
+	return 0;
+    }
+
+  /* for V1xx modes, just use the base mode */
+  if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
+      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
+    mode = GET_MODE_INNER (mode);
+
+  /* Classification of atomic types.  */
+  switch (mode)
+    {
+    case SDmode:
+    case DDmode:
+      classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case TDmode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      return 2;
+    case DImode:
+    case SImode:
+    case HImode:
+    case QImode:
+    case CSImode:
+    case CHImode:
+    case CQImode:
+      {
+	int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
+
+	if (size <= 32)
+	  {
+	    classes[0] = X86_64_INTEGERSI_CLASS;
+	    return 1;
+	  }
+	else if (size <= 64)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    return 1;
+	  }
+	else if (size <= 64+32)
+	  {
+	    classes[0] = X86_64_INTEGER_CLASS;
+	    classes[1] = X86_64_INTEGERSI_CLASS;
+	    return 2;
+	  }
+	else if (size <= 64+64)
+	  {
+	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+	    return 2;
+	  }
+	else
+	  gcc_unreachable ();
+      }
+    case CDImode:
+    case TImode:
+      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+      return 2;
+    case COImode:
+    case OImode:
+      /* OImode shouldn't be used directly.  */
+      gcc_unreachable ();
+    case CTImode:
+      return 0;
+    case SFmode:
+      if (!(bit_offset % 64))
+	classes[0] = X86_64_SSESF_CLASS;
+      else
+	classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case DFmode:
+      classes[0] = X86_64_SSEDF_CLASS;
+      return 1;
+    case XFmode:
+      classes[0] = X86_64_X87_CLASS;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case TFmode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      return 2;
+    case SCmode:
+      classes[0] = X86_64_SSE_CLASS;
+      if (!(bit_offset % 64))
+	return 1;
+      else
+	{
+	  static bool warned;
+
+	  if (!warned && warn_psabi)
+	    {
+	      warned = true;
+	      inform (input_location,
+		      "the ABI of passing structure with complex float"
+		      " member has changed in GCC 4.4");
+	    }
+	  classes[1] = X86_64_SSESF_CLASS;
+	  return 2;
+	}
+    case DCmode:
+      classes[0] = X86_64_SSEDF_CLASS;
+      classes[1] = X86_64_SSEDF_CLASS;
+      return 2;
+    case XCmode:
+      classes[0] = X86_64_COMPLEX_X87_CLASS;
+      return 1;
+    case TCmode:
+      /* This modes is larger than 16 bytes.  */
+      return 0;
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      classes[2] = X86_64_SSEUP_CLASS;
+      classes[3] = X86_64_SSEUP_CLASS;
+      return 4;
+    case V4SFmode:
+    case V4SImode:
+    case V16QImode:
+    case V8HImode:
+    case V2DFmode:
+    case V2DImode:
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      return 2;
+    case V1TImode:
+    case V1DImode:
+    case V2SFmode:
+    case V2SImode:
+    case V4HImode:
+    case V8QImode:
+      classes[0] = X86_64_SSE_CLASS;
+      return 1;
+    case BLKmode:
+    case VOIDmode:
+      return 0;
+    default:
+      gcc_assert (VECTOR_MODE_P (mode));
+
+      if (bytes > 16)
+	return 0;
+
+      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
+
+      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+	classes[0] = X86_64_INTEGERSI_CLASS;
+      else
+	classes[0] = X86_64_INTEGER_CLASS;
+      classes[1] = X86_64_INTEGER_CLASS;
+      return 1 + (bytes > 8);
+    }
+}
+
+/* Examine the argument and return set number of register required in each
+   class.  Return 0 iff parameter should be passed in memory.  */
+static int
+examine_argument (enum machine_mode mode, const_tree type, int in_return,
+		  int *int_nregs, int *sse_nregs)
+{
+  enum x86_64_reg_class regclass[MAX_CLASSES];
+  int n = classify_argument (mode, type, regclass, 0);
+
+  *int_nregs = 0;
+  *sse_nregs = 0;
+  if (!n)
+    return 0;
+  for (n--; n >= 0; n--)
+    switch (regclass[n])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	(*int_nregs)++;
+	break;
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	(*sse_nregs)++;
+	break;
+      case X86_64_NO_CLASS:
+      case X86_64_SSEUP_CLASS:
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_X87UP_CLASS:
+	if (!in_return)
+	  return 0;
+	break;
+      case X86_64_COMPLEX_X87_CLASS:
+	return in_return ? 2 : 0;
+      case X86_64_MEMORY_CLASS:
+	gcc_unreachable ();
+      }
+  return 1;
+}
+
+/* Construct container for the argument used by GCC interface.  See
+   FUNCTION_ARG for the detailed description.  */
+
+static rtx
+construct_container (enum machine_mode mode, enum machine_mode orig_mode,
+		     const_tree type, int in_return, int nintregs, int nsseregs,
+		     const int *intreg, int sse_regno)
+{
+  /* The following variables hold the static issued_error state.  */
+  static bool issued_sse_arg_error;
+  static bool issued_sse_ret_error;
+  static bool issued_x87_ret_error;
+
+  enum machine_mode tmpmode;
+  int bytes =
+    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+  enum x86_64_reg_class regclass[MAX_CLASSES];
+  int n;
+  int i;
+  int nexps = 0;
+  int needed_sseregs, needed_intregs;
+  rtx exp[MAX_CLASSES];
+  rtx ret;
+
+  n = classify_argument (mode, type, regclass, 0);
+  if (!n)
+    return NULL;
+  if (!examine_argument (mode, type, in_return, &needed_intregs,
+			 &needed_sseregs))
+    return NULL;
+  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
+    return NULL;
+
+  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
+     some less clueful developer tries to use floating-point anyway.  */
+  if (needed_sseregs && !TARGET_SSE)
+    {
+      if (in_return)
+	{
+	  if (!issued_sse_ret_error)
+	    {
+	      error ("SSE register return with SSE disabled");
+	      issued_sse_ret_error = true;
+	    }
+	}
+      else if (!issued_sse_arg_error)
+	{
+	  error ("SSE register argument with SSE disabled");
+	  issued_sse_arg_error = true;
+	}
+      return NULL;
+    }
+
+  /* Likewise, error if the ABI requires us to return values in the
+     x87 registers and the user specified -mno-80387.  */
+  if (!TARGET_80387 && in_return)
+    for (i = 0; i < n; i++)
+      if (regclass[i] == X86_64_X87_CLASS
+	  || regclass[i] == X86_64_X87UP_CLASS
+	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
+	{
+	  if (!issued_x87_ret_error)
+	    {
+	      error ("x87 register return with x87 disabled");
+	      issued_x87_ret_error = true;
+	    }
+	  return NULL;
+	}
+
+  /* First construct simple cases.  Avoid SCmode, since we want to use
+     single register to pass this type.  */
+  if (n == 1 && mode != SCmode)
+    switch (regclass[0])
+      {
+      case X86_64_INTEGER_CLASS:
+      case X86_64_INTEGERSI_CLASS:
+	return gen_rtx_REG (mode, intreg[0]);
+      case X86_64_SSE_CLASS:
+      case X86_64_SSESF_CLASS:
+      case X86_64_SSEDF_CLASS:
+	if (mode != BLKmode)
+	  return gen_reg_or_parallel (mode, orig_mode,
+				      SSE_REGNO (sse_regno));
+	break;
+      case X86_64_X87_CLASS:
+      case X86_64_COMPLEX_X87_CLASS:
+	return gen_rtx_REG (mode, FIRST_STACK_REG);
+      case X86_64_NO_CLASS:
+	/* Zero sized array, struct or class.  */
+	return NULL;
+      default:
+	gcc_unreachable ();
+      }
+  if (n == 2 && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
+    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+  if (n == 4
+      && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS
+      && regclass[2] == X86_64_SSEUP_CLASS
+      && regclass[3] == X86_64_SSEUP_CLASS
+      && mode != BLKmode)
+    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+
+  if (n == 2
+      && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
+    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
+  if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
+      && regclass[1] == X86_64_INTEGER_CLASS
+      && (mode == CDImode || mode == TImode || mode == TFmode)
+      && intreg[0] + 1 == intreg[1])
+    return gen_rtx_REG (mode, intreg[0]);
+
+  /* Otherwise figure out the entries of the PARALLEL.  */
+  for (i = 0; i < n; i++)
+    {
+      int pos;
+
+      switch (regclass[i])
+        {
+	  case X86_64_NO_CLASS:
+	    break;
+	  case X86_64_INTEGER_CLASS:
+	  case X86_64_INTEGERSI_CLASS:
+	    /* Merge TImodes on aligned occasions here too.  */
+	    if (i * 8 + 8 > bytes)
+	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
+	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
+	      tmpmode = SImode;
+	    else
+	      tmpmode = DImode;
+	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
+	    if (tmpmode == BLKmode)
+	      tmpmode = DImode;
+	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+					       gen_rtx_REG (tmpmode, *intreg),
+					       GEN_INT (i*8));
+	    intreg++;
+	    break;
+	  case X86_64_SSESF_CLASS:
+	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+					       gen_rtx_REG (SFmode,
+							    SSE_REGNO (sse_regno)),
+					       GEN_INT (i*8));
+	    sse_regno++;
+	    break;
+	  case X86_64_SSEDF_CLASS:
+	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+					       gen_rtx_REG (DFmode,
+							    SSE_REGNO (sse_regno)),
+					       GEN_INT (i*8));
+	    sse_regno++;
+	    break;
+	  case X86_64_SSE_CLASS:
+	    pos = i;
+	    switch (n)
+	      {
+	      case 1:
+		tmpmode = DImode;
+		break;
+	      case 2:
+		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
+		  {
+		    tmpmode = TImode;
+		    i++;
+		  }
+		else
+		  tmpmode = DImode;
+		break;
+	      case 4:
+		gcc_assert (i == 0
+			    && regclass[1] == X86_64_SSEUP_CLASS
+			    && regclass[2] == X86_64_SSEUP_CLASS
+			    && regclass[3] == X86_64_SSEUP_CLASS);
+		tmpmode = OImode;
+		i += 3;
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+					       gen_rtx_REG (tmpmode,
+							    SSE_REGNO (sse_regno)),
+					       GEN_INT (pos*8));
+	    sse_regno++;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+    }
+
+  /* Empty aligned struct, union or class.  */
+  if (nexps == 0)
+    return NULL;
+
+  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
+  for (i = 0; i < nexps; i++)
+    XVECEXP (ret, 0, i) = exp [i];
+  return ret;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE
+   and data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type, HOST_WIDE_INT bytes,
+			 HOST_WIDE_INT words)
+{
+  switch (mode)
+    {
+    default:
+      break;
+
+    case BLKmode:
+      if (bytes < 0)
+	break;
+      /* FALLTHRU */
+
+    case DImode:
+    case SImode:
+    case HImode:
+    case QImode:
+      cum->words += words;
+      cum->nregs -= words;
+      cum->regno += words;
+
+      if (cum->nregs <= 0)
+	{
+	  cum->nregs = 0;
+	  cum->regno = 0;
+	}
+      break;
+
+    case OImode:
+      /* OImode shouldn't be used directly.  */
+      gcc_unreachable ();
+
+    case DFmode:
+      if (cum->float_in_sse < 2)
+	break;
+    case SFmode:
+      if (cum->float_in_sse < 1)
+	break;
+      /* FALLTHRU */
+
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+    case TImode:
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V4SFmode:
+    case V2DFmode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  cum->sse_words += words;
+	  cum->sse_nregs -= 1;
+	  cum->sse_regno += 1;
+	  if (cum->sse_nregs <= 0)
+	    {
+	      cum->sse_nregs = 0;
+	      cum->sse_regno = 0;
+	    }
+	}
+      break;
+
+    case V8QImode:
+    case V4HImode:
+    case V2SImode:
+    case V2SFmode:
+    case V1TImode:
+    case V1DImode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  cum->mmx_words += words;
+	  cum->mmx_nregs -= 1;
+	  cum->mmx_regno += 1;
+	  if (cum->mmx_nregs <= 0)
+	    {
+	      cum->mmx_nregs = 0;
+	      cum->mmx_regno = 0;
+	    }
+	}
+      break;
+    }
+}
+
+static void
+function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type, HOST_WIDE_INT words, bool named)
+{
+  int int_nregs, sse_nregs;
+
+  /* Unnamed 256bit vector mode parameters are passed on stack.  */
+  if (!named && VALID_AVX256_REG_MODE (mode))
+    return;
+
+  if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
+      && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+    {
+      cum->nregs -= int_nregs;
+      cum->sse_nregs -= sse_nregs;
+      cum->regno += int_nregs;
+      cum->sse_regno += sse_nregs;
+    }
+  else
+    {
+      int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
+      cum->words = (cum->words + align - 1) & ~(align - 1);
+      cum->words += words;
+    }
+}
+
+static void
+function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
+			    HOST_WIDE_INT words)
+{
+  /* Otherwise, this should be passed indirect.  */
+  gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
+
+  cum->words += words;
+  if (cum->nregs > 0)
+    {
+      cum->nregs -= 1;
+      cum->regno += 1;
+    }
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  HOST_WIDE_INT bytes, words;
+
+  if (mode == BLKmode)
+    bytes = int_size_in_bytes (type);
+  else
+    bytes = GET_MODE_SIZE (mode);
+  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (type)
+    mode = type_natural_mode (type, NULL);
+
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+    function_arg_advance_ms_64 (cum, bytes, words);
+  else if (TARGET_64BIT)
+    function_arg_advance_64 (cum, mode, type, words, named);
+  else
+    function_arg_advance_32 (cum, mode, type, bytes, words);
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 enum machine_mode orig_mode, const_tree type,
+		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
+{
+  static bool warnedsse, warnedmmx;
+
+  /* Avoid the AL settings for the Unix64 ABI.  */
+  if (mode == VOIDmode)
+    return constm1_rtx;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case BLKmode:
+      if (bytes < 0)
+	break;
+      /* FALLTHRU */
+    case DImode:
+    case SImode:
+    case HImode:
+    case QImode:
+      if (words <= cum->nregs)
+	{
+	  int regno = cum->regno;
+
+	  /* Fastcall allocates the first two DWORD (SImode) or
+            smaller arguments to ECX and EDX if it isn't an
+            aggregate type .  */
+	  if (cum->fastcall)
+	    {
+	      if (mode == BLKmode
+		  || mode == DImode
+		  || (type && AGGREGATE_TYPE_P (type)))
+	        break;
+
+	      /* ECX not EAX is the first allocated register.  */
+	      if (regno == AX_REG)
+		regno = CX_REG;
+	    }
+	  return gen_rtx_REG (mode, regno);
+	}
+      break;
+
+    case DFmode:
+      if (cum->float_in_sse < 2)
+	break;
+    case SFmode:
+      if (cum->float_in_sse < 1)
+	break;
+      /* FALLTHRU */
+    case TImode:
+      /* In 32bit, we pass TImode in xmm registers.  */
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V4SFmode:
+    case V2DFmode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  if (!TARGET_SSE && !warnedsse && cum->warn_sse)
+	    {
+	      warnedsse = true;
+	      warning (0, "SSE vector argument without SSE enabled "
+		       "changes the ABI");
+	    }
+	  if (cum->sse_nregs)
+	    return gen_reg_or_parallel (mode, orig_mode,
+				        cum->sse_regno + FIRST_SSE_REG);
+	}
+      break;
+
+    case OImode:
+      /* OImode shouldn't be used directly.  */
+      gcc_unreachable ();
+
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  if (cum->sse_nregs)
+	    return gen_reg_or_parallel (mode, orig_mode,
+				        cum->sse_regno + FIRST_SSE_REG);
+	}
+      break;
+
+    case V8QImode:
+    case V4HImode:
+    case V2SImode:
+    case V2SFmode:
+    case V1TImode:
+    case V1DImode:
+      if (!type || !AGGREGATE_TYPE_P (type))
+	{
+	  if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
+	    {
+	      warnedmmx = true;
+	      warning (0, "MMX vector argument without MMX enabled "
+		       "changes the ABI");
+	    }
+	  if (cum->mmx_nregs)
+	    return gen_reg_or_parallel (mode, orig_mode,
+				        cum->mmx_regno + FIRST_MMX_REG);
+	}
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+static rtx
+function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 enum machine_mode orig_mode, const_tree type, bool named)
+{
+  /* Handle a hidden AL argument containing number of registers
+     for varargs x86-64 functions.  */
+  if (mode == VOIDmode)
+    return GEN_INT (cum->maybe_vaarg
+		    ? (cum->sse_nregs < 0
+		       ? X86_64_SSE_REGPARM_MAX
+		       : cum->sse_regno)
+		    : -1);
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+      /* Unnamed 256bit vector mode parameters are passed on stack.  */
+      if (!named)
+	return NULL;
+      break;
+    }
+
+  return construct_container (mode, orig_mode, type, 0, cum->nregs,
+			      cum->sse_nregs,
+			      &x86_64_int_parameter_registers [cum->regno],
+			      cum->sse_regno);
+}
+
+static rtx
+function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    enum machine_mode orig_mode, bool named,
+		    HOST_WIDE_INT bytes)
+{
+  unsigned int regno;
+
+  /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
+     We use value of -2 to specify that current function call is MSABI.  */
+  if (mode == VOIDmode)
+    return GEN_INT (-2);
+
+  /* If we've run out of registers, it goes on the stack.  */
+  if (cum->nregs == 0)
+    return NULL_RTX;
+
+  regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
+
+  /* Only floating point modes are passed in anything but integer regs.  */
+  if (TARGET_SSE && (mode == SFmode || mode == DFmode))
+    {
+      if (named)
+	regno = cum->regno + FIRST_SSE_REG;
+      else
+	{
+	  rtx t1, t2;
+
+	  /* Unnamed floating parameters are passed in both the
+	     SSE and integer registers.  */
+	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
+	  t2 = gen_rtx_REG (mode, regno);
+	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
+	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
+	}
+    }
+  /* Handle aggregated types passed in register.  */
+  if (orig_mode == BLKmode)
+    {
+      if (bytes > 0 && bytes <= 8)
+        mode = (bytes > 4 ? DImode : SImode);
+      if (mode == BLKmode)
+        mode = DImode;
+    }
+
+  return gen_reg_or_parallel (mode, orig_mode, regno);
+}
+
+/* Return where to put the arguments to a function.
+   Return zero to push the argument on the stack, or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.  TYPE is the data type of the
+   argument.  It is null for libcalls where that information may not be
+   available.  CUM gives information about the preceding args and about
+   the function being called.  NAMED is nonzero if this argument is a
+   named parameter (otherwise it is an extra parameter matching an
+   ellipsis).  */
+
+static rtx
+ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
+		   const_tree type, bool named)
+{
+  enum machine_mode mode = omode;
+  HOST_WIDE_INT bytes, words;
+  rtx arg;
+
+  if (mode == BLKmode)
+    bytes = int_size_in_bytes (type);
+  else
+    bytes = GET_MODE_SIZE (mode);
+  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* To simplify the code below, represent vector types with a vector mode
+     even if MMX/SSE are not active.  */
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    mode = type_natural_mode (type, cum);
+
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+    arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
+  else if (TARGET_64BIT)
+    arg = function_arg_64 (cum, mode, omode, type, named);
+  else
+    arg = function_arg_32 (cum, mode, omode, type, bytes, words);
+
+  if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
+    {
+      /* This argument uses 256bit AVX modes.  */
+      if (cum->caller)
+	cum->callee_pass_avx256_p = true;
+      else
+	cfun->machine->caller_pass_avx256_p = true;
+    }
+
+  if (cum->caller && mode == VOIDmode)
+    {
+      /* This function is called with MODE == VOIDmode immediately
+	 before the call instruction is emitted.  We copy callee 256bit
+	 AVX info from the current CUM here.  */
+      cfun->machine->callee_return_avx256_p = cum->callee_return_avx256_p;
+      cfun->machine->callee_pass_avx256_p = cum->callee_pass_avx256_p;
+    }
+
+  return arg;
+}
+
+/* A C expression that indicates when an argument must be passed by
+   reference.  If nonzero for an argument, a copy of that argument is
+   made in memory and a pointer to the argument is passed instead of
+   the argument itself.  The pointer is passed in whatever way is
+   appropriate for passing a pointer to that type.  */
+
+static bool
+ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* See Windows x64 Software Convention.  */
+  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
+    {
+      int msize = (int) GET_MODE_SIZE (mode);
+      if (type)
+	{
+	  /* Arrays are passed by reference.  */
+	  if (TREE_CODE (type) == ARRAY_TYPE)
+	    return true;
+
+	  if (AGGREGATE_TYPE_P (type))
+	    {
+	      /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
+	         are passed by reference.  */
+	      msize = int_size_in_bytes (type);
+	    }
+	}
+
+      /* __m128 is passed by reference.  */
+      switch (msize) {
+      case 1: case 2: case 4: case 8:
+        break;
+      default:
+        return true;
+      }
+    }
+  else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
+    return 1;
+
+  return 0;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument
+   passing ABI.  XXX: This function is obsolete and is only used for
+   checking psABI compatibility with previous versions of GCC.  */
+
+static bool
+ix86_compat_aligned_value_p (const_tree type)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
+       || mode == TDmode
+       || mode == TFmode
+       || mode == TCmode)
+      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+    return true;
+  if (TYPE_ALIGN (type) < 128)
+    return false;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      /* Walk the aggregates recursively.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  {
+	    tree field;
+
+	    /* Walk all the structure fields.  */
+	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	      {
+		if (TREE_CODE (field) == FIELD_DECL
+		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
+		  return true;
+	      }
+	    break;
+	  }
+
+	case ARRAY_TYPE:
+	  /* Just for use if some languages passes arrays by value.  */
+	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
+	    return true;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  return false;
+}
+
+/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
+   XXX: This function is obsolete and is only used for checking psABI
+   compatibility with previous versions of GCC.  */
+
+static unsigned int
+ix86_compat_function_arg_boundary (enum machine_mode mode,
+				   const_tree type, unsigned int align)
+{
+  /* In 32bit, only _Decimal128 and __float128 are aligned to their
+     natural boundaries.  */
+  if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
+    {
+      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
+	 make an exception for SSE modes since these require 128bit
+	 alignment.
+
+	 The handling here differs from field_alignment.  ICC aligns MMX
+	 arguments to 4 byte boundaries, while structure fields are aligned
+	 to 8 byte boundaries.  */
+      if (!type)
+	{
+	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
+	    align = PARM_BOUNDARY;
+	}
+      else
+	{
+	  if (!ix86_compat_aligned_value_p (type))
+	    align = PARM_BOUNDARY;
+	}
+    }
+  if (align > BIGGEST_ALIGNMENT)
+    align = BIGGEST_ALIGNMENT;
+  return align;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument
+   passing ABI.  */
+
+static bool
+ix86_contains_aligned_value_p (const_tree type)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (mode == XFmode || mode == XCmode)
+    return false;
+
+  if (TYPE_ALIGN (type) < 128)
+    return false;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      /* Walk the aggregates recursively.  */
+      switch (TREE_CODE (type))
+	{
+	case RECORD_TYPE:
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  {
+	    tree field;
+
+	    /* Walk all the structure fields.  */
+	    for (field = TYPE_FIELDS (type);
+		 field;
+		 field = DECL_CHAIN (field))
+	      {
+		if (TREE_CODE (field) == FIELD_DECL
+		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
+		  return true;
+	      }
+	    break;
+	  }
+
+	case ARRAY_TYPE:
+	  /* Just for use if some languages passes arrays by value.  */
+	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
+	    return true;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    return TYPE_ALIGN (type) >= 128;
+
+  return false;
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+   specified mode and type.  */
+
+static unsigned int
+ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int align;
+  if (type)
+    {
+      /* Since the main variant type is used for call, we convert it to
+	 the main variant type.  */
+      type = TYPE_MAIN_VARIANT (type);
+      align = TYPE_ALIGN (type);
+    }
+  else
+    align = GET_MODE_ALIGNMENT (mode);
+  if (align < PARM_BOUNDARY)
+    align = PARM_BOUNDARY;
+  else
+    {
+      static bool warned;
+      unsigned int saved_align = align;
+
+      if (!TARGET_64BIT)
+	{
+	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
+	  if (!type)
+	    {
+	      if (mode == XFmode || mode == XCmode)
+		align = PARM_BOUNDARY;
+	    }
+	  else if (!ix86_contains_aligned_value_p (type))
+	    align = PARM_BOUNDARY;
+
+	  if (align < 128)
+	    align = PARM_BOUNDARY;
+	}
+
+      if (warn_psabi
+	  && !warned
+	  && align != ix86_compat_function_arg_boundary (mode, type,
+							 saved_align))
+	{
+	  warned = true;
+	  inform (input_location,
+		  "The ABI for passing parameters with %d-byte"
+		  " alignment has changed in GCC 4.6",
+		  align / BITS_PER_UNIT);
+	}
+    }
+
+  return align;
+}
+
+/* Return true if N is a possible register number of function value.  */
+
+static bool
+ix86_function_value_regno_p (const unsigned int regno)
+{
+  switch (regno)
+    {
+    case 0:
+      return true;
+
+    case FIRST_FLOAT_REG:
+      /* TODO: The function should depend on current function ABI but
+       builtins.c would need updating then. Therefore we use the
+       default ABI.  */
+      if (TARGET_64BIT && ix86_abi == MS_ABI)
+	return false;
+      return TARGET_FLOAT_RETURNS_IN_80387;
+
+    case FIRST_SSE_REG:
+      return TARGET_SSE;
+
+    case FIRST_MMX_REG:
+      if (TARGET_MACHO || TARGET_64BIT)
+	return false;
+      return TARGET_MMX;
+    }
+
+  return false;
+}
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+
+static rtx
+function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
+		   const_tree fntype, const_tree fn)
+{
+  unsigned int regno;
+
+  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+     we normally prevent this case when mmx is not available.  However
+     some ABIs may require the result to be returned like DImode.  */
+  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+    regno = TARGET_MMX ? FIRST_MMX_REG : 0;
+
+  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
+     we prevent this case when sse is not available.  However some ABIs
+     may require the result to be returned like integer TImode.  */
+  else if (mode == TImode
+	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+    regno = TARGET_SSE ? FIRST_SSE_REG : 0;
+
+  /* 32-byte vector modes in %ymm0.   */
+  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
+    regno = TARGET_AVX ? FIRST_SSE_REG : 0;
+
+  /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
+  else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
+    regno = FIRST_FLOAT_REG;
+  else
+    /* Most things go in %eax.  */
+    regno = AX_REG;
+
+  /* Override FP return register with %xmm0 for local functions when
+     SSE math is enabled or for functions with sseregparm attribute.  */
+  if ((fn || fntype) && (mode == SFmode || mode == DFmode))
+    {
+      int sse_level = ix86_function_sseregparm (fntype, fn, false);
+      if ((sse_level >= 1 && mode == SFmode)
+	  || (sse_level == 2 && mode == DFmode))
+	regno = FIRST_SSE_REG;
+    }
+
+  /* OImode shouldn't be used directly.  */
+  gcc_assert (mode != OImode);
+
+  return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
+function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
+		   const_tree valtype)
+{
+  rtx ret;
+
+  /* Handle libcalls, which don't provide a type node.  */
+  if (valtype == NULL)
+    {
+      switch (mode)
+	{
+	case SFmode:
+	case SCmode:
+	case DFmode:
+	case DCmode:
+	case TFmode:
+	case SDmode:
+	case DDmode:
+	case TDmode:
+	  return gen_rtx_REG (mode, FIRST_SSE_REG);
+	case XFmode:
+	case XCmode:
+	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
+	case TCmode:
+	  return NULL;
+	default:
+	  return gen_rtx_REG (mode, AX_REG);
+	}
+    }
+
+  ret = construct_container (mode, orig_mode, valtype, 1,
+			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
+			     x86_64_int_return_registers, 0);
+
+  /* For zero sized structures, construct_container returns NULL, but we
+     need to keep rest of compiler happy by returning meaningful value.  */
+  if (!ret)
+    ret = gen_rtx_REG (orig_mode, AX_REG);
+
+  return ret;
+}
+
+static rtx
+function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
+{
+  unsigned int regno = AX_REG;
+
+  if (TARGET_SSE)
+    {
+      switch (GET_MODE_SIZE (mode))
+        {
+        case 16:
+          if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
+	     && !COMPLEX_MODE_P (mode))
+	    regno = FIRST_SSE_REG;
+	  break;
+	case 8:
+	case 4:
+	  if (mode == SFmode || mode == DFmode)
+	    regno = FIRST_SSE_REG;
+	  break;
+	default:
+	  break;
+        }
+    }
+  return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
+ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
+		       enum machine_mode orig_mode, enum machine_mode mode)
+{
+  const_tree fn, fntype;
+
+  fn = NULL_TREE;
+  if (fntype_or_decl && DECL_P (fntype_or_decl))
+    fn = fntype_or_decl;
+  fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
+
+  if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
+    return function_value_ms_64 (orig_mode, mode);
+  else if (TARGET_64BIT)
+    return function_value_64 (orig_mode, mode, valtype);
+  else
+    return function_value_32 (orig_mode, mode, fntype, fn);
+}
+
+static rtx
+ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode, orig_mode;
+
+  orig_mode = TYPE_MODE (valtype);
+  mode = type_natural_mode (valtype, NULL);
+  return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
+}
+
+rtx
+ix86_libcall_value (enum machine_mode mode)
+{
+  return ix86_function_value_1 (NULL, NULL, mode, mode);
+}
+
+/* Return true iff type is returned in memory.  */
+
+static bool ATTRIBUTE_UNUSED
+return_in_memory_32 (const_tree type, enum machine_mode mode)
+{
+  HOST_WIDE_INT size;
+
+  if (mode == BLKmode)
+    return true;
+
+  size = int_size_in_bytes (type);
+
+  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
+    return false;
+
+  if (VECTOR_MODE_P (mode) || mode == TImode)
+    {
+      /* User-created vectors small enough to fit in EAX.  */
+      if (size < 8)
+	return false;
+
+      /* MMX/3dNow values are returned in MM0,
+	 except when it doesn't exits or the ABI prescribes otherwise.  */
+      if (size == 8)
+	return !TARGET_MMX || TARGET_VECT8_RETURNS;
+
+      /* SSE values are returned in XMM0, except when it doesn't exist.  */
+      if (size == 16)
+	return !TARGET_SSE;
+
+      /* AVX values are returned in YMM0, except when it doesn't exist.  */
+      if (size == 32)
+	return !TARGET_AVX;
+    }
+
+  if (mode == XFmode)
+    return false;
+
+  if (size > 12)
+    return true;
+
+  /* OImode shouldn't be used directly.  */
+  gcc_assert (mode != OImode);
+
+  return false;
+}
+
+static bool ATTRIBUTE_UNUSED
+return_in_memory_64 (const_tree type, enum machine_mode mode)
+{
+  int needed_intregs, needed_sseregs;
+  return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+}
+
+static bool ATTRIBUTE_UNUSED
+return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
+{
+  HOST_WIDE_INT size = int_size_in_bytes (type);
+
+  /* __m128 is returned in xmm0.  */
+  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
+      && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
+    return false;
+
+  /* Otherwise, the size must be exactly in [1248]. */
+  return size != 1 && size != 2 && size != 4 && size != 8;
+}
+
+static bool
+ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+#ifdef SUBTARGET_RETURN_IN_MEMORY
+  return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
+#else
+  const enum machine_mode mode = type_natural_mode (type, NULL);
+
+  if (TARGET_64BIT)
+    {
+      if (ix86_function_type_abi (fntype) == MS_ABI)
+	return return_in_memory_ms_64 (type, mode);
+      else
+	return return_in_memory_64 (type, mode);
+    }
+  else
+    return return_in_memory_32 (type, mode);
+#endif
+}
+
+/* When returning SSE vector types, we have a choice of either
+     (1) being abi incompatible with a -march switch, or
+     (2) generating an error.
+   Given no good solution, I think the safest thing is one warning.
+   The user won't be able to use -Werror, but....
+
+   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
+   called in response to actually generating a caller or callee that
+   uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
+   via aggregate_value_p for general type probing from tree-ssa.  */
+
+static rtx
+ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
+{
+  static bool warnedsse, warnedmmx;
+
+  if (!TARGET_64BIT && type)
+    {
+      /* Look at the return type of the function, not the function type.  */
+      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
+
+      if (!TARGET_SSE && !warnedsse)
+	{
+	  if (mode == TImode
+	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+	    {
+	      warnedsse = true;
+	      warning (0, "SSE vector return without SSE enabled "
+		       "changes the ABI");
+	    }
+	}
+
+      if (!TARGET_MMX && !warnedmmx)
+	{
+	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+	    {
+	      warnedmmx = true;
+	      warning (0, "MMX vector return without MMX enabled "
+		       "changes the ABI");
+	    }
+	}
+    }
+
+  return NULL;
+}
+
+
+/* Create the va_list data type.  */
+
+/* Returns the calling convention specific va_list date type.
+   The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
+
+static tree
+ix86_build_builtin_va_list_abi (enum calling_abi abi)
+{
+  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+  /* For i386 we use plain pointer to argument area.  */
+  if (!TARGET_64BIT || abi == MS_ABI)
+    return build_pointer_type (char_type_node);
+
+  record = lang_hooks.types.make_type (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_gpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("gp_offset"),
+		      unsigned_type_node);
+  f_fpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("fp_offset"),
+		      unsigned_type_node);
+  f_ovf = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("overflow_arg_area"),
+		      ptr_type_node);
+  f_sav = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("reg_save_area"),
+		      ptr_type_node);
+
+  va_list_gpr_counter_field = f_gpr;
+  va_list_fpr_counter_field = f_fpr;
+
+  DECL_FIELD_CONTEXT (f_gpr) = record;
+  DECL_FIELD_CONTEXT (f_fpr) = record;
+  DECL_FIELD_CONTEXT (f_ovf) = record;
+  DECL_FIELD_CONTEXT (f_sav) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_gpr;
+  DECL_CHAIN (f_gpr) = f_fpr;
+  DECL_CHAIN (f_fpr) = f_ovf;
+  DECL_CHAIN (f_ovf) = f_sav;
+
+  layout_type (record);
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Setup the builtin va_list data type and for 64-bit the additional
+   calling convention specific va_list data types.  */
+
+static tree
+ix86_build_builtin_va_list (void)
+{
+  tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
+
+  /* Initialize abi specific va_list builtin types.  */
+  if (TARGET_64BIT)
+    {
+      tree t;
+      if (ix86_abi == MS_ABI)
+        {
+          t = ix86_build_builtin_va_list_abi (SYSV_ABI);
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          sysv_va_list_type_node = t;
+        }
+      else
+        {
+          t = ret;
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          sysv_va_list_type_node = t;
+        }
+      if (ix86_abi != MS_ABI)
+        {
+          t = ix86_build_builtin_va_list_abi (MS_ABI);
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          ms_va_list_type_node = t;
+        }
+      else
+        {
+          t = ret;
+          if (TREE_CODE (t) != RECORD_TYPE)
+            t = build_variant_type_copy (t);
+          ms_va_list_type_node = t;
+        }
+    }
+
+  return ret;
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
+{
+  rtx save_area, mem;
+  alias_set_type set;
+  int i, max;
+
+  /* GPR size of varargs save area.  */
+  if (cfun->va_list_gpr_size)
+    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+  else
+    ix86_varargs_gpr_size = 0;
+
+  /* FPR size of varargs save area.  We don't need it if we don't pass
+     anything in SSE registers.  */
+  if (TARGET_SSE && cfun->va_list_fpr_size)
+    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+  else
+    ix86_varargs_fpr_size = 0;
+
+  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+    return;
+
+  save_area = frame_pointer_rtx;
+  set = get_varargs_alias_set ();
+
+  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+  if (max > X86_64_REGPARM_MAX)
+    max = X86_64_REGPARM_MAX;
+
+  for (i = cum->regno; i < max; i++)
+    {
+      mem = gen_rtx_MEM (Pmode,
+			 plus_constant (save_area, i * UNITS_PER_WORD));
+      MEM_NOTRAP_P (mem) = 1;
+      set_mem_alias_set (mem, set);
+      emit_move_insn (mem, gen_rtx_REG (Pmode,
+					x86_64_int_parameter_registers[i]));
+    }
+
+  if (ix86_varargs_fpr_size)
+    {
+      enum machine_mode smode;
+      rtx label, test;
+
+      /* Now emit code to save SSE registers.  The AX parameter contains number
+	 of SSE parameter registers used to call this function, though all we
+	 actually check here is the zero/non-zero status.  */
+
+      label = gen_label_rtx ();
+      test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
+      emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
+				      label));
+
+      /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
+	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
+	 be if we could determine the real mode of the data, via a hook
+	 into pass_stdarg.  Ignore all that for now.  */
+      smode = V4SFmode;
+      if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
+	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
+
+      max = cum->sse_regno + cfun->va_list_fpr_size / 16;
+      if (max > X86_64_SSE_REGPARM_MAX)
+	max = X86_64_SSE_REGPARM_MAX;
+
+      for (i = cum->sse_regno; i < max; ++i)
+	{
+	  mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
+	  mem = gen_rtx_MEM (smode, mem);
+	  MEM_NOTRAP_P (mem) = 1;
+	  set_mem_alias_set (mem, set);
+	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
+
+	  emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
+	}
+
+      emit_label (label);
+    }
+}
+
+static void
+setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
+{
+  alias_set_type set = get_varargs_alias_set ();
+  int i;
+
+  for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
+    {
+      rtx reg, mem;
+
+      mem = gen_rtx_MEM (Pmode,
+			 plus_constant (virtual_incoming_args_rtx,
+					i * UNITS_PER_WORD));
+      MEM_NOTRAP_P (mem) = 1;
+      set_mem_alias_set (mem, set);
+
+      reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
+      emit_move_insn (mem, reg);
+    }
+}
+
+static void
+ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
+			     int no_rtl)
+{
+  CUMULATIVE_ARGS next_cum;
+  tree fntype;
+
+  /* This argument doesn't appear to be used anymore.  Which is good,
+     because the old code here didn't suppress rtl generation.  */
+  gcc_assert (!no_rtl);
+
+  if (!TARGET_64BIT)
+    return;
+
+  fntype = TREE_TYPE (current_function_decl);
+
+  /* For varargs, we do not want to skip the dummy va_dcl argument.
+     For stdargs, we do want to skip the last named argument.  */
+  next_cum = *cum;
+  if (stdarg_p (fntype))
+    ix86_function_arg_advance (&next_cum, mode, type, true);
+
+  if (cum->call_abi == MS_ABI)
+    setup_incoming_varargs_ms_64 (&next_cum);
+  else
+    setup_incoming_varargs_64 (&next_cum);
+}
+
+/* Checks if TYPE is of kind va_list char *.  */
+
+static bool
+is_va_list_char_pointer (tree type)
+{
+  tree canonic;
+
+  /* For 32-bit it is always true.  */
+  if (!TARGET_64BIT)
+    return true;
+  canonic = ix86_canonical_va_list_type (type);
+  return (canonic == ms_va_list_type_node
+          || (ix86_abi == MS_ABI && canonic == va_list_type_node));
+}
+
+/* Implement va_start.  */
+
+static void
+ix86_va_start (tree valist, rtx nextarg)
+{
+  HOST_WIDE_INT words, n_gpr, n_fpr;
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+  tree type;
+  rtx ovf_rtx;
+
+  if (flag_split_stack
+      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+    {
+      unsigned int scratch_regno;
+
+      /* When we are splitting the stack, we can't refer to the stack
+	 arguments using internal_arg_pointer, because they may be on
+	 the old stack.  The split stack prologue will arrange to
+	 leave a pointer to the old stack arguments in a scratch
+	 register, which we here copy to a pseudo-register.  The split
+	 stack prologue can't set the pseudo-register directly because
+	 it (the prologue) runs before any registers have been saved.  */
+
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      if (scratch_regno != INVALID_REGNUM)
+	{
+	  rtx reg, seq;
+
+	  reg = gen_reg_rtx (Pmode);
+	  cfun->machine->split_stack_varargs_pointer = reg;
+
+	  start_sequence ();
+	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
+	  seq = get_insns ();
+	  end_sequence ();
+
+	  push_topmost_sequence ();
+	  emit_insn_after (seq, entry_of_function ());
+	  pop_topmost_sequence ();
+	}
+    }
+
+  /* Only 64bit target needs something special.  */
+  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+    {
+      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+	std_expand_builtin_va_start (valist, nextarg);
+      else
+	{
+	  rtx va_r, next;
+
+	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
+	  next = expand_binop (ptr_mode, add_optab,
+			       cfun->machine->split_stack_varargs_pointer,
+			       crtl->args.arg_offset_rtx,
+			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
+	  convert_move (va_r, next, 0);
+	}
+      return;
+    }
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_simple_mem_ref (valist);
+  TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
+  /* The following should be folded into the MEM_REF offset.  */
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
+		f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+		f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+		f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+		f_sav, NULL_TREE);
+
+  /* Count number of gp and fp argument registers used.  */
+  words = crtl->args.info.words;
+  n_gpr = crtl->args.info.regno;
+  n_fpr = crtl->args.info.sse_regno;
+
+  if (cfun->va_list_gpr_size)
+    {
+      type = TREE_TYPE (gpr);
+      t = build2 (MODIFY_EXPR, type,
+		  gpr, build_int_cst (type, n_gpr * 8));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  if (TARGET_SSE && cfun->va_list_fpr_size)
+    {
+      type = TREE_TYPE (fpr);
+      t = build2 (MODIFY_EXPR, type, fpr,
+		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* Find the overflow area.  */
+  type = TREE_TYPE (ovf);
+  if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+    ovf_rtx = crtl->args.internal_arg_pointer;
+  else
+    ovf_rtx = cfun->machine->split_stack_varargs_pointer;
+  t = make_tree (type, ovf_rtx);
+  if (words != 0)
+    t = build2 (POINTER_PLUS_EXPR, type, t,
+	        size_int (words * UNITS_PER_WORD));
+  t = build2 (MODIFY_EXPR, type, ovf, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
+    {
+      /* Find the register save area.
+	 Prologue of the function save it right above stack frame.  */
+      type = TREE_TYPE (sav);
+      t = make_tree (type, frame_pointer_rtx);
+      if (!ix86_varargs_gpr_size)
+	t = build2 (POINTER_PLUS_EXPR, type, t,
+		    size_int (-8 * X86_64_REGPARM_MAX));
+      t = build2 (MODIFY_EXPR, type, sav, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+}
+
+/* Implement va_arg.  */
+
+static tree
+ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		      gimple_seq *post_p)
+{
+  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+  int size, rsize;
+  tree lab_false, lab_over = NULL_TREE;
+  tree addr, t2;
+  rtx container;
+  int indirect_p = 0;
+  tree ptrtype;
+  enum machine_mode nat_mode;
+  unsigned int arg_boundary;
+
+  /* Only 64bit target needs something special.  */
+  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
+		build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
+  valist = build_va_arg_indirect_ref (valist);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect_p)
+    type = build_pointer_type (type);
+  size = int_size_in_bytes (type);
+  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  nat_mode = type_natural_mode (type, NULL);
+  switch (nat_mode)
+    {
+    case V8SFmode:
+    case V8SImode:
+    case V32QImode:
+    case V16HImode:
+    case V4DFmode:
+    case V4DImode:
+      /* Unnamed 256bit vector mode parameters are passed on stack.  */
+      if (ix86_cfun_abi () == SYSV_ABI)
+	{
+	  container = NULL;
+	  break;
+	}
+
+    default:
+      container = construct_container (nat_mode, TYPE_MODE (type),
+				       type, 0, X86_64_REGPARM_MAX,
+				       X86_64_SSE_REGPARM_MAX, intreg,
+				       0);
+      break;
+    }
+
+  /* Pull the value out of the saved registers.  */
+
+  addr = create_tmp_var (ptr_type_node, "addr");
+
+  if (container)
+    {
+      int needed_intregs, needed_sseregs;
+      bool need_temp;
+      tree int_addr, sse_addr;
+
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
+
+      need_temp = (!REG_P (container)
+		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
+		       || TYPE_ALIGN (type) > 128));
+
+      /* In case we are passing structure, verify that it is consecutive block
+         on the register save area.  If not we need to do moves.  */
+      if (!need_temp && !REG_P (container))
+	{
+	  /* Verify that all registers are strictly consecutive  */
+	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
+	    {
+	      int i;
+
+	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+		{
+		  rtx slot = XVECEXP (container, 0, i);
+		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
+		      || INTVAL (XEXP (slot, 1)) != i * 16)
+		    need_temp = 1;
+		}
+	    }
+	  else
+	    {
+	      int i;
+
+	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+		{
+		  rtx slot = XVECEXP (container, 0, i);
+		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
+		      || INTVAL (XEXP (slot, 1)) != i * 8)
+		    need_temp = 1;
+		}
+	    }
+	}
+      if (!need_temp)
+	{
+	  int_addr = addr;
+	  sse_addr = addr;
+	}
+      else
+	{
+	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
+	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
+	}
+
+      /* First ensure that we fit completely in registers.  */
+      if (needed_intregs)
+	{
+	  t = build_int_cst (TREE_TYPE (gpr),
+			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
+	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
+	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+	  gimplify_and_add (t, pre_p);
+	}
+      if (needed_sseregs)
+	{
+	  t = build_int_cst (TREE_TYPE (fpr),
+			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+			     + X86_64_REGPARM_MAX * 8);
+	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
+	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+	  gimplify_and_add (t, pre_p);
+	}
+
+      /* Compute index to start of area used for integer regs.  */
+      if (needed_intregs)
+	{
+	  /* int_addr = gpr + sav; */
+	  t = fold_convert (sizetype, gpr);
+	  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+	  gimplify_assign (int_addr, t, pre_p);
+	}
+      if (needed_sseregs)
+	{
+	  /* sse_addr = fpr + sav; */
+	  t = fold_convert (sizetype, fpr);
+	  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+	  gimplify_assign (sse_addr, t, pre_p);
+	}
+      if (need_temp)
+	{
+	  int i, prev_size = 0;
+	  tree temp = create_tmp_var (type, "va_arg_tmp");
+
+	  /* addr = &temp; */
+	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
+	  gimplify_assign (addr, t, pre_p);
+
+	  for (i = 0; i < XVECLEN (container, 0); i++)
+	    {
+	      rtx slot = XVECEXP (container, 0, i);
+	      rtx reg = XEXP (slot, 0);
+	      enum machine_mode mode = GET_MODE (reg);
+	      tree piece_type;
+	      tree addr_type;
+	      tree daddr_type;
+	      tree src_addr, src;
+	      int src_offset;
+	      tree dest_addr, dest;
+	      int cur_size = GET_MODE_SIZE (mode);
+
+	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
+	      prev_size = INTVAL (XEXP (slot, 1));
+	      if (prev_size + cur_size > size)
+		{
+		  cur_size = size - prev_size;
+		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
+		  if (mode == BLKmode)
+		    mode = QImode;
+		}
+	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
+	      if (mode == GET_MODE (reg))
+		addr_type = build_pointer_type (piece_type);
+	      else
+		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							 true);
+	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
+							true);
+
+	      if (SSE_REGNO_P (REGNO (reg)))
+		{
+		  src_addr = sse_addr;
+		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
+		}
+	      else
+		{
+		  src_addr = int_addr;
+		  src_offset = REGNO (reg) * 8;
+		}
+	      src_addr = fold_convert (addr_type, src_addr);
+	      src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
+				      size_int (src_offset));
+
+	      dest_addr = fold_convert (daddr_type, addr);
+	      dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
+				       size_int (prev_size));
+	      if (cur_size == GET_MODE_SIZE (mode))
+		{
+		  src = build_va_arg_indirect_ref (src_addr);
+		  dest = build_va_arg_indirect_ref (dest_addr);
+
+		  gimplify_assign (dest, src, pre_p);
+		}
+	      else
+		{
+		  tree copy
+		    = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+				       3, dest_addr, src_addr,
+				       size_int (cur_size));
+		  gimplify_and_add (copy, pre_p);
+		}
+	      prev_size += cur_size;
+	    }
+	}
+
+      if (needed_intregs)
+	{
+	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
+		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
+	  gimplify_assign (gpr, t, pre_p);
+	}
+
+      if (needed_sseregs)
+	{
+	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
+		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
+	  gimplify_assign (fpr, t, pre_p);
+	}
+
+      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+    }
+
+  /* ... otherwise out of the overflow area.  */
+
+  /* When we align parameter on stack for caller, if the parameter
+     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
+     here with caller.  */
+  arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
+  if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+    arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  /* Care for on-stack alignment if needed.  */
+  if (arg_boundary <= 64 || size == 0)
+    t = ovf;
+ else
+    {
+      HOST_WIDE_INT align = arg_boundary / 8;
+      t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
+		  size_int (align - 1));
+      t = fold_convert (sizetype, t);
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  size_int (-align));
+      t = fold_convert (TREE_TYPE (ovf), t);
+    }
+
+  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+  gimplify_assign (addr, t, pre_p);
+
+  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
+	      size_int (rsize * UNITS_PER_WORD));
+  gimplify_assign (unshare_expr (ovf), t, pre_p);
+
+  if (container)
+    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+
+  ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
+  addr = fold_convert (ptrtype, addr);
+
+  if (indirect_p)
+    addr = build_va_arg_indirect_ref (addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Return true if OPNUM's MEM should be matched
+   in movabs* patterns.  */
+
+bool
+ix86_check_movabs (rtx insn, int opnum)
+{
+  rtx set, mem;
+
+  set = PATTERN (insn);
+  if (GET_CODE (set) == PARALLEL)
+    set = XVECEXP (set, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  mem = XEXP (set, opnum);
+  while (GET_CODE (mem) == SUBREG)
+    mem = SUBREG_REG (mem);
+  gcc_assert (MEM_P (mem));
+  return volatile_ok || !MEM_VOLATILE_P (mem);
+}
+
+/* Initialize the table of extra 80387 mathematical constants.  */
+
+static void
+init_ext_80387_constants (void)
+{
+  static const char * cst[5] =
+  {
+    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
+    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
+    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
+    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
+    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
+  };
+  int i;
+
+  for (i = 0; i < 5; i++)
+    {
+      real_from_string (&ext_80387_constants_table[i], cst[i]);
+      /* Ensure each constant is rounded to XFmode precision.  */
+      real_convert (&ext_80387_constants_table[i],
+		    XFmode, &ext_80387_constants_table[i]);
+    }
+
+  ext_80387_constants_init = 1;
+}
+
+/* Return non-zero if the constant is something that
+   can be loaded with a special instruction.  */
+
+int
+standard_80387_constant_p (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  REAL_VALUE_TYPE r;
+
+  if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
+    return -1;
+
+  if (x == CONST0_RTX (mode))
+    return 1;
+  if (x == CONST1_RTX (mode))
+    return 2;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* For XFmode constants, try to find a special 80387 instruction when
+     optimizing for size or on those CPUs that benefit from them.  */
+  if (mode == XFmode
+      && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
+    {
+      int i;
+
+      if (! ext_80387_constants_init)
+	init_ext_80387_constants ();
+
+      for (i = 0; i < 5; i++)
+        if (real_identical (&r, &ext_80387_constants_table[i]))
+	  return i + 3;
+    }
+
+  /* Load of the constant -0.0 or -1.0 will be split as
+     fldz;fchs or fld1;fchs sequence.  */
+  if (real_isnegzero (&r))
+    return 8;
+  if (real_identical (&r, &dconstm1))
+    return 9;
+
+  return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+   the constant X.  */
+
+const char *
+standard_80387_constant_opcode (rtx x)
+{
+  switch (standard_80387_constant_p (x))
+    {
+    case 1:
+      return "fldz";
+    case 2:
+      return "fld1";
+    case 3:
+      return "fldlg2";
+    case 4:
+      return "fldln2";
+    case 5:
+      return "fldl2e";
+    case 6:
+      return "fldl2t";
+    case 7:
+      return "fldpi";
+    case 8:
+    case 9:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the CONST_DOUBLE representing the 80387 constant that is
+   loaded by the specified special instruction.  The argument IDX
+   matches the return value from standard_80387_constant_p.  */
+
+rtx
+standard_80387_constant_rtx (int idx)
+{
+  int i;
+
+  if (! ext_80387_constants_init)
+    init_ext_80387_constants ();
+
+  switch (idx)
+    {
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+      i = idx - 3;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
+				       XFmode);
+}
+
+/* Return 1 if X is all 0s and 2 if x is all 1s
+   in supported SSE vector mode.  */
+
+int
+standard_sse_constant_p (rtx x)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+    return 1;
+  if (vector_all_ones_operand (x, mode))
+    switch (mode)
+      {
+      case V16QImode:
+      case V8HImode:
+      case V4SImode:
+      case V2DImode:
+	if (TARGET_SSE2)
+	  return 2;
+      default:
+	break;
+      }
+
+  return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+   the constant X.  */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+  switch (standard_sse_constant_p (x))
+    {
+    case 1:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	  else
+	    return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	  else
+	    return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+	case MODE_V8SF:
+	  return "vxorps\t%x0, %x0, %x0";
+	case MODE_V4DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "vxorps\t%x0, %x0, %x0";
+	  else
+	    return "vxorpd\t%x0, %x0, %x0";
+	case MODE_OI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "vxorps\t%x0, %x0, %x0";
+	  else
+	    return "vpxor\t%x0, %x0, %x0";
+	default:
+	  break;
+	}
+    case 2:
+      return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
+    default:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+/* Returns true if OP contains a symbol reference */
+
+bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if it is appropriate to emit `ret' instructions in the
+   body of a function.  Do this only if the epilogue is simple, needing a
+   couple of insns.  Prior to reloading, we can't tell how many registers
+   must be saved, so return false then.  Return false if there is no frame
+   marker to de-allocate.  */
+
+bool
+ix86_can_use_return_insn_p (void)
+{
+  struct ix86_frame frame;
+
+  if (! reload_completed || frame_pointer_needed)
+    return 0;
+
+  /* Don't allow more than 32k pop, since that's all we can do
+     with one instruction.  */
+  if (crtl->args.pops_args && crtl->args.size >= 32768)
+    return 0;
+
+  ix86_compute_frame_layout (&frame);
+  return (frame.stack_pointer_offset == UNITS_PER_WORD
+	  && (frame.nregs + frame.nsseregs) == 0);
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may
+   be accessed via the stack pointer) in functions that seem suitable.  */
+
+static bool
+ix86_frame_pointer_required (void)
+{
+  /* If we accessed previous frames, then the generated code expects
+     to be able to access the saved ebp value in our frame.  */
+  if (cfun->machine->accesses_prev_frame)
+    return true;
+
+  /* Several x86 os'es need a frame pointer for other reasons,
+     usually pertaining to setjmp.  */
+  if (SUBTARGET_FRAME_POINTER_REQUIRED)
+    return true;
+
+  /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
+     turns off the frame pointer by default.  Turn it back on now if
+     we've not got a leaf function.  */
+  if (TARGET_OMIT_LEAF_FRAME_POINTER
+      && (!current_function_is_leaf
+	  || ix86_current_function_calls_tls_descriptor))
+    return true;
+
+  if (crtl->profile && !flag_fentry)
+    return true;
+
+  return false;
+}
+
+/* Record that the current function accesses previous call frames.  */
+
+void
+ix86_setup_frame_addresses (void)
+{
+  cfun->machine->accesses_prev_frame = 1;
+}
+
+#ifndef USE_HIDDEN_LINKONCE
+# if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+#  define USE_HIDDEN_LINKONCE 1
+# else
+#  define USE_HIDDEN_LINKONCE 0
+# endif
+#endif
+
+static int pic_labels_used;
+
+/* Fills in the label name that should be used for a pc thunk for
+   the given register.  */
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+  gcc_assert (!TARGET_64BIT);
+
+  if (USE_HIDDEN_LINKONCE)
+    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
+  else
+    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
+}
+
+
+/* This function generates code for -fpic that loads %ebx with
+   the return address of the caller and then returns.  */
+
+static void
+ix86_code_end (void)
+{
+  rtx xops[2];
+  int regno;
+
+  for (regno = AX_REG; regno <= SP_REG; regno++)
+    {
+      char name[32];
+      tree decl;
+
+      if (!(pic_labels_used & (1 << regno)))
+	continue;
+
+      get_pc_thunk_name (name, regno);
+
+      decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+			 get_identifier (name),
+			 build_function_type (void_type_node, void_list_node));
+      DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				       NULL_TREE, void_type_node);
+      TREE_PUBLIC (decl) = 1;
+      TREE_STATIC (decl) = 1;
+
+#if TARGET_MACHO
+      if (TARGET_MACHO)
+	{
+	  switch_to_section (darwin_sections[text_coal_section]);
+	  fputs ("\t.weak_definition\t", asm_out_file);
+	  assemble_name (asm_out_file, name);
+	  fputs ("\n\t.private_extern\t", asm_out_file);
+	  assemble_name (asm_out_file, name);
+	  putc ('\n', asm_out_file);
+	  ASM_OUTPUT_LABEL (asm_out_file, name);
+	  DECL_WEAK (decl) = 1;
+	}
+      else
+#endif
+      if (USE_HIDDEN_LINKONCE)
+	{
+	  DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+
+	  targetm.asm_out.unique_section (decl, 0);
+	  switch_to_section (get_named_section (decl, NULL, 0));
+
+	  targetm.asm_out.globalize_label (asm_out_file, name);
+	  fputs ("\t.hidden\t", asm_out_file);
+	  assemble_name (asm_out_file, name);
+	  putc ('\n', asm_out_file);
+	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+	}
+      else
+	{
+	  switch_to_section (text_section);
+	  ASM_OUTPUT_LABEL (asm_out_file, name);
+	}
+
+      DECL_INITIAL (decl) = make_node (BLOCK);
+      current_function_decl = decl;
+      init_function_start (decl);
+      first_function_block_is_cold = false;
+      /* Make sure unwind info is emitted for the thunk if needed.  */
+      final_start_function (emit_barrier (), asm_out_file, 1);
+
+      /* Pad stack IP move with 4 instructions (two NOPs count
+	 as one instruction).  */
+      if (TARGET_PAD_SHORT_FUNCTION)
+	{
+	  int i = 8;
+
+	  while (i--)
+	    fputs ("\tnop\n", asm_out_file);
+	}
+
+      xops[0] = gen_rtx_REG (Pmode, regno);
+      xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+      output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
+      fputs ("\tret\n", asm_out_file);
+      final_end_function ();
+      init_insn_lengths ();
+      free_after_compilation (cfun);
+      set_cfun (NULL);
+      current_function_decl = NULL;
+    }
+
+  if (flag_split_stack)
+    file_end_indicate_split_stack ();
+}
+
+/* Emit code for the SET_GOT patterns.  */
+
+const char *
+output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
+{
+  rtx xops[3];
+
+  xops[0] = dest;
+
+  if (TARGET_VXWORKS_RTP && flag_pic)
+    {
+      /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
+      xops[2] = gen_rtx_MEM (Pmode,
+			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
+      output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+
+      /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
+	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
+	 an unadorned address.  */
+      xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
+      output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
+      return "";
+    }
+
+  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+
+  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+    {
+      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+
+      if (!flag_pic)
+	output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
+      else
+	{
+	  output_asm_insn ("call\t%a2", xops);
+#ifdef DWARF2_UNWIND_INFO
+	  /* The call to next label acts as a push.  */
+	  if (dwarf2out_do_frame ())
+	    {
+	      rtx insn;
+	      start_sequence ();
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+					     gen_rtx_PLUS (Pmode,
+							   stack_pointer_rtx,
+							   GEN_INT (-4))));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      dwarf2out_frame_debug (insn, true);
+	      end_sequence ();
+	    }
+#endif
+	}
+
+#if TARGET_MACHO
+      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
+         is what will be referenced by the Mach-O PIC subsystem.  */
+      if (!label)
+	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
+#endif
+
+      targetm.asm_out.internal_label (asm_out_file, "L",
+				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+
+      if (flag_pic)
+	{
+	  output_asm_insn ("pop%z0\t%0", xops);
+#ifdef DWARF2_UNWIND_INFO
+	  /* The pop is a pop and clobbers dest, but doesn't restore it
+	     for unwind info purposes.  */
+	  if (dwarf2out_do_frame ())
+	    {
+	      rtx insn;
+	      start_sequence ();
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+	      dwarf2out_frame_debug (insn, true);
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+					     gen_rtx_PLUS (Pmode,
+							   stack_pointer_rtx,
+							   GEN_INT (4))));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      dwarf2out_frame_debug (insn, true);
+	      end_sequence ();
+	    }
+#endif
+	}
+    }
+  else
+    {
+      char name[32];
+      get_pc_thunk_name (name, REGNO (dest));
+      pic_labels_used |= 1 << REGNO (dest);
+
+#ifdef DWARF2_UNWIND_INFO
+      /* Ensure all queued register saves are flushed before the
+	 call.  */
+      if (dwarf2out_do_frame ())
+	dwarf2out_flush_queued_reg_saves ();
+#endif
+      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+      xops[2] = gen_rtx_MEM (QImode, xops[2]);
+      output_asm_insn ("call\t%X2", xops);
+      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
+         is what will be referenced by the Mach-O PIC subsystem.  */
+#if TARGET_MACHO
+      if (!label)
+	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
+      else
+        targetm.asm_out.internal_label (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (label));
+#endif
+    }
+
+  if (TARGET_MACHO)
+    return "";
+
+  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+    output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
+  else
+    output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+
+  return "";
+}
+
+/* Generate an "push" pattern for input ARG.  */
+
+static rtx
+gen_push (rtx arg)
+{
+  struct machine_function *m = cfun->machine;
+
+  if (m->fs.cfa_reg == stack_pointer_rtx)
+    m->fs.cfa_offset += UNITS_PER_WORD;
+  m->fs.sp_offset += UNITS_PER_WORD;
+
+  return gen_rtx_SET (VOIDmode,
+		      gen_rtx_MEM (Pmode,
+				   gen_rtx_PRE_DEC (Pmode,
+						    stack_pointer_rtx)),
+		      arg);
+}
+
+/* Generate an "pop" pattern for input ARG.  */
+
+static rtx
+gen_pop (rtx arg)
+{
+  return gen_rtx_SET (VOIDmode,
+		      arg,
+		      gen_rtx_MEM (Pmode,
+				   gen_rtx_POST_INC (Pmode,
+						     stack_pointer_rtx)));
+}
+
+/* Return >= 0 if there is an unused call-clobbered register available
+   for the entire function.  */
+
+static unsigned int
+ix86_select_alt_pic_regnum (void)
+{
+  if (current_function_is_leaf
+      && !crtl->profile
+      && !ix86_current_function_calls_tls_descriptor)
+    {
+      int i, drap;
+      /* Can't use the same register for both PIC and DRAP.  */
+      if (crtl->drap_reg)
+	drap = REGNO (crtl->drap_reg);
+      else
+	drap = -1;
+      for (i = 2; i >= 0; --i)
+        if (i != drap && !df_regs_ever_live_p (i))
+	  return i;
+    }
+
+  return INVALID_REGNUM;
+}
+
+/* Return 1 if we need to save REGNO.  */
+static int
+ix86_save_reg (unsigned int regno, int maybe_eh_return)
+{
+  if (pic_offset_table_rtx
+      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
+      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
+	  || crtl->profile
+	  || crtl->calls_eh_return
+	  || crtl->uses_const_pool))
+    {
+      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
+	return 0;
+      return 1;
+    }
+
+  if (crtl->calls_eh_return && maybe_eh_return)
+    {
+      unsigned i;
+      for (i = 0; ; i++)
+	{
+	  unsigned test = EH_RETURN_DATA_REGNO (i);
+	  if (test == INVALID_REGNUM)
+	    break;
+	  if (test == regno)
+	    return 1;
+	}
+    }
+
+  if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+    return 1;
+
+  return (df_regs_ever_live_p (regno)
+	  && !call_used_regs[regno]
+	  && !fixed_regs[regno]
+	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
+}
+
+/* Return number of saved general prupose registers.  */
+
+static int
+ix86_nsaved_regs (void)
+{
+  int nregs = 0;
+  int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      nregs ++;
+  return nregs;
+}
+
+/* Return number of saved SSE registrers.  */
+
+static int
+ix86_nsaved_sseregs (void)
+{
+  int nregs = 0;
+  int regno;
+
+  if (ix86_cfun_abi () != MS_ABI)
+    return 0;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      nregs ++;
+  return nregs;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed.  If stack alignment is needed, we can only replace argument
+   pointer with hard frame pointer, or replace frame pointer with stack
+   pointer.  Otherwise, frame pointer elimination is automatically
+   handled and all other eliminations are valid.  */
+
+static bool
+ix86_can_eliminate (const int from, const int to)
+{
+  if (stack_realign_fp)
+    return ((from == ARG_POINTER_REGNUM
+	     && to == HARD_FRAME_POINTER_REGNUM)
+	    || (from == FRAME_POINTER_REGNUM
+		&& to == STACK_POINTER_REGNUM));
+  else
+    return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+ix86_initial_elimination_offset (int from, int to)
+{
+  struct ix86_frame frame;
+  ix86_compute_frame_layout (&frame);
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return frame.hard_frame_pointer_offset;
+  else if (from == FRAME_POINTER_REGNUM
+	   && to == HARD_FRAME_POINTER_REGNUM)
+    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
+  else
+    {
+      gcc_assert (to == STACK_POINTER_REGNUM);
+
+      if (from == ARG_POINTER_REGNUM)
+	return frame.stack_pointer_offset;
+
+      gcc_assert (from == FRAME_POINTER_REGNUM);
+      return frame.stack_pointer_offset - frame.frame_pointer_offset;
+    }
+}
+
+/* In a dynamically-aligned function, we can't know the offset from
+   stack pointer to frame pointer, so we must ensure that setjmp
+   eliminates fp against the hard fp (%ebp) rather than trying to
+   index from %esp up to the top of the frame across a gap that is
+   of unknown (at compile-time) size.  */
+static rtx
+ix86_builtin_setjmp_frame_value (void)
+{
+  return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
+}
+
+/* On the x86 -fsplit-stack and -fstack-protector both use the same
+   field in the TCB, so they can not be used together.  */
+
+static bool
+ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
+			   struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  bool ret = true;
+
+#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
+  if (report)
+    error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
+  ret = false;
+#else
+  if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
+    {
+      if (report)
+	error ("%<-fsplit-stack%> requires "
+	       "assembler support for CFI directives");
+      ret = false;
+    }
+#endif
+
+  return ret;
+}
+
+/* When using -fsplit-stack, the allocation routines set a field in
+   the TCB to the bottom of the stack plus this much space, measured
+   in bytes.  */
+
+#define SPLIT_STACK_AVAILABLE 256
+
+/* Fill structure ix86_frame about frame of currently computed function.  */
+
+static void
+ix86_compute_frame_layout (struct ix86_frame *frame)
+{
+  unsigned int stack_alignment_needed;
+  HOST_WIDE_INT offset;
+  unsigned int preferred_alignment;
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT to_allocate;
+
+  frame->nregs = ix86_nsaved_regs ();
+  frame->nsseregs = ix86_nsaved_sseregs ();
+
+  stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
+  preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+
+  /* MS ABI seem to require stack alignment to be always 16 except for function
+     prologues and leaf.  */
+  if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+      && (!current_function_is_leaf || cfun->calls_alloca != 0
+          || ix86_current_function_calls_tls_descriptor))
+    {
+      preferred_alignment = 16;
+      stack_alignment_needed = 16;
+      crtl->preferred_stack_boundary = 128;
+      crtl->stack_alignment_needed = 128;
+    }
+
+  gcc_assert (!size || stack_alignment_needed);
+  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
+  gcc_assert (preferred_alignment <= stack_alignment_needed);
+
+  /* For SEH we have to limit the amount of code movement into the prologue.
+     At present we do this via a BLOCKAGE, at which point there's very little
+     scheduling that can be done, which means that there's very little point
+     in doing anything except PUSHs.  */
+  if (TARGET_SEH)
+    cfun->machine->use_fast_prologue_epilogue = false;
+
+  /* During reload iteration the amount of registers saved can change.
+     Recompute the value as needed.  Do not recompute when amount of registers
+     didn't change as reload does multiple calls to the function and does not
+     expect the decision to change within single iteration.  */
+  else if (!optimize_function_for_size_p (cfun)
+           && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+    {
+      int count = frame->nregs;
+      struct cgraph_node *node = cgraph_node (current_function_decl);
+
+      cfun->machine->use_fast_prologue_epilogue_nregs = count;
+
+      /* The fast prologue uses move instead of push to save registers.  This
+         is significantly longer, but also executes faster as modern hardware
+         can execute the moves in parallel, but can't do that for push/pop.
+
+	 Be careful about choosing what prologue to emit:  When function takes
+	 many instructions to execute we may use slow version as well as in
+	 case function is known to be outside hot spot (this is known with
+	 feedback only).  Weight the size of function by number of registers
+	 to save as it is cheap to use one or two push instructions but very
+	 slow to use many of them.  */
+      if (count)
+	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
+      if (node->frequency < NODE_FREQUENCY_NORMAL
+	  || (flag_branch_probabilities
+	      && node->frequency < NODE_FREQUENCY_HOT))
+        cfun->machine->use_fast_prologue_epilogue = false;
+      else
+        cfun->machine->use_fast_prologue_epilogue
+	   = !expensive_function_p (count);
+    }
+  if (TARGET_PROLOGUE_USING_MOVE
+      && cfun->machine->use_fast_prologue_epilogue)
+    frame->save_regs_using_mov = true;
+  else
+    frame->save_regs_using_mov = false;
+
+  /* If static stack checking is enabled and done with probes, the registers
+     need to be saved before allocating the frame.  */
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    frame->save_regs_using_mov = false;
+
+  /* Skip return address.  */
+  offset = UNITS_PER_WORD;
+
+  /* Skip pushed static chain.  */
+  if (ix86_static_chain_on_stack)
+    offset += UNITS_PER_WORD;
+
+  /* Skip saved base pointer.  */
+  if (frame_pointer_needed)
+    offset += UNITS_PER_WORD;
+  frame->hfp_save_offset = offset;
+
+  /* The traditional frame pointer location is at the top of the frame.  */
+  frame->hard_frame_pointer_offset = offset;
+
+  /* Register save area */
+  offset += frame->nregs * UNITS_PER_WORD;
+  frame->reg_save_offset = offset;
+
+  /* Align and set SSE register save area.  */
+  if (frame->nsseregs)
+    {
+      /* The only ABI that has saved SSE registers (Win64) also has a
+         16-byte aligned default stack, and thus we don't need to be
+	 within the re-aligned local stack frame to save them.  */
+      gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
+      offset = (offset + 16 - 1) & -16;
+      offset += frame->nsseregs * 16;
+    }
+  frame->sse_reg_save_offset = offset;
+
+  /* The re-aligned stack starts here.  Values before this point are not
+     directly comparable with values below this point.  In order to make
+     sure that no value happens to be the same before and after, force
+     the alignment computation below to add a non-zero value.  */
+  if (stack_realign_fp)
+    offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
+
+  /* Va-arg area */
+  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+  offset += frame->va_arg_size;
+
+  /* Align start of frame for local function.  */
+  if (stack_realign_fp
+      || offset != frame->sse_reg_save_offset
+      || size != 0
+      || !current_function_is_leaf
+      || cfun->calls_alloca
+      || ix86_current_function_calls_tls_descriptor)
+    offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
+
+  /* Frame pointer points here.  */
+  frame->frame_pointer_offset = offset;
+
+  offset += size;
+
+  /* Add outgoing arguments area.  Can be skipped if we eliminated
+     all the function calls as dead code.
+     Skipping is however impossible when function calls alloca.  Alloca
+     expander assumes that last crtl->outgoing_args_size
+     of stack frame are unused.  */
+  if (ACCUMULATE_OUTGOING_ARGS
+      && (!current_function_is_leaf || cfun->calls_alloca
+	  || ix86_current_function_calls_tls_descriptor))
+    {
+      offset += crtl->outgoing_args_size;
+      frame->outgoing_arguments_size = crtl->outgoing_args_size;
+    }
+  else
+    frame->outgoing_arguments_size = 0;
+
+  /* Align stack boundary.  Only needed if we're calling another function
+     or using alloca.  */
+  if (!current_function_is_leaf || cfun->calls_alloca
+      || ix86_current_function_calls_tls_descriptor)
+    offset = (offset + preferred_alignment - 1) & -preferred_alignment;
+
+  /* We've reached end of stack frame.  */
+  frame->stack_pointer_offset = offset;
+
+  /* Size prologue needs to allocate.  */
+  to_allocate = offset - frame->sse_reg_save_offset;
+
+  if ((!to_allocate && frame->nregs <= 1)
+      || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
+    frame->save_regs_using_mov = false;
+
+  if (ix86_using_red_zone ()
+      && current_function_sp_is_unchanging
+      && current_function_is_leaf
+      && !ix86_current_function_calls_tls_descriptor)
+    {
+      frame->red_zone_size = to_allocate;
+      if (frame->save_regs_using_mov)
+	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
+	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
+    }
+  else
+    frame->red_zone_size = 0;
+  frame->stack_pointer_offset -= frame->red_zone_size;
+
+  /* The SEH frame pointer location is near the bottom of the frame.
+     This is enforced by the fact that the difference between the
+     stack pointer and the frame pointer is limited to 240 bytes in
+     the unwind data structure.  */
+  if (TARGET_SEH)
+    {
+      HOST_WIDE_INT diff;
+
+      /* If we can leave the frame pointer where it is, do so.  */
+      diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
+      if (diff > 240 || (diff & 15) != 0)
+	{
+	  /* Ideally we'd determine what portion of the local stack frame
+	     (within the constraint of the lowest 240) is most heavily used.
+	     But without that complication, simply bias the frame pointer
+	     by 128 bytes so as to maximize the amount of the local stack
+	     frame that is addressable with 8-bit offsets.  */
+	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
+	}
+    }
+}
+
+/* This is semi-inlined memory_address_length, but simplified
+   since we know that we're always dealing with reg+offset, and
+   to avoid having to create and discard all that rtl.  */
+
+static inline int
+choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
+{
+  int len = 4;
+
+  if (offset == 0)
+    {
+      /* EBP and R13 cannot be encoded without an offset.  */
+      len = (regno == BP_REG || regno == R13_REG);
+    }
+  else if (IN_RANGE (offset, -128, 127))
+    len = 1;
+
+  /* ESP and R12 must be encoded with a SIB byte.  */
+  if (regno == SP_REG || regno == R12_REG)
+    len++;
+
+  return len;
+}
+  
+/* Return an RTX that points to CFA_OFFSET within the stack frame.
+   The valid base registers are taken from CFUN->MACHINE->FS.  */
+
+static rtx
+choose_baseaddr (HOST_WIDE_INT cfa_offset)
+{
+  const struct machine_function *m = cfun->machine;
+  rtx base_reg = NULL;
+  HOST_WIDE_INT base_offset = 0;
+
+  if (m->use_fast_prologue_epilogue)
+    {
+      /* Choose the base register most likely to allow the most scheduling
+         opportunities.  Generally FP is valid througout the function,
+         while DRAP must be reloaded within the epilogue.  But choose either
+         over the SP due to increased encoding size.  */
+
+      if (m->fs.fp_valid)
+	{
+	  base_reg = hard_frame_pointer_rtx;
+	  base_offset = m->fs.fp_offset - cfa_offset;
+	}
+      else if (m->fs.drap_valid)
+	{
+	  base_reg = crtl->drap_reg;
+	  base_offset = 0 - cfa_offset;
+	}
+      else if (m->fs.sp_valid)
+	{
+	  base_reg = stack_pointer_rtx;
+	  base_offset = m->fs.sp_offset - cfa_offset;
+	}
+    }
+  else
+    {
+      HOST_WIDE_INT toffset;
+      int len = 16, tlen;
+
+      /* Choose the base register with the smallest address encoding.
+         With a tie, choose FP > DRAP > SP.  */
+      if (m->fs.sp_valid)
+	{
+	  base_reg = stack_pointer_rtx;
+	  base_offset = m->fs.sp_offset - cfa_offset;
+          len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
+	}
+      if (m->fs.drap_valid)
+	{
+	  toffset = 0 - cfa_offset;
+	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
+	  if (tlen <= len)
+	    {
+	      base_reg = crtl->drap_reg;
+	      base_offset = toffset;
+	      len = tlen;
+	    }
+	}
+      if (m->fs.fp_valid)
+	{
+	  toffset = m->fs.fp_offset - cfa_offset;
+	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
+	  if (tlen <= len)
+	    {
+	      base_reg = hard_frame_pointer_rtx;
+	      base_offset = toffset;
+	      len = tlen;
+	    }
+	}
+    }
+  gcc_assert (base_reg != NULL);
+
+  return plus_constant (base_reg, base_offset);
+}
+
+/* Emit code to save registers in the prologue.  */
+
+static void
+ix86_emit_save_regs (void)
+{
+  unsigned int regno;
+  rtx insn;
+
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      {
+	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+	RTX_FRAME_RELATED_P (insn) = 1;
+      }
+}
+
+/* Emit a single register save at CFA - CFA_OFFSET.  */
+
+static void
+ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
+			      HOST_WIDE_INT cfa_offset)
+{
+  struct machine_function *m = cfun->machine;
+  rtx reg = gen_rtx_REG (mode, regno);
+  rtx mem, addr, base, insn;
+
+  addr = choose_baseaddr (cfa_offset);
+  mem = gen_frame_mem (mode, addr);
+
+  /* For SSE saves, we need to indicate the 128-bit alignment.  */
+  set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
+
+  insn = emit_move_insn (mem, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  base = addr;
+  if (GET_CODE (base) == PLUS)
+    base = XEXP (base, 0);
+  gcc_checking_assert (REG_P (base));
+
+  /* When saving registers into a re-aligned local stack frame, avoid
+     any tricky guessing by dwarf2out.  */
+  if (m->fs.realigned)
+    {
+      gcc_checking_assert (stack_realign_drap);
+
+      if (regno == REGNO (crtl->drap_reg))
+	{
+	  /* A bit of a hack.  We force the DRAP register to be saved in
+	     the re-aligned stack frame, which provides us with a copy
+	     of the CFA that will last past the prologue.  Install it.  */
+	  gcc_checking_assert (cfun->machine->fs.fp_valid);
+	  addr = plus_constant (hard_frame_pointer_rtx,
+				cfun->machine->fs.fp_offset - cfa_offset);
+	  mem = gen_rtx_MEM (mode, addr);
+	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
+	}
+      else
+	{
+	  /* The frame pointer is a stable reference within the
+	     aligned frame.  Use it.  */
+	  gcc_checking_assert (cfun->machine->fs.fp_valid);
+	  addr = plus_constant (hard_frame_pointer_rtx,
+				cfun->machine->fs.fp_offset - cfa_offset);
+	  mem = gen_rtx_MEM (mode, addr);
+	  add_reg_note (insn, REG_CFA_EXPRESSION,
+			gen_rtx_SET (VOIDmode, mem, reg));
+	}
+    }
+
+  /* The memory may not be relative to the current CFA register,
+     which means that we may need to generate a new pattern for
+     use by the unwind info.  */
+  else if (base != m->fs.cfa_reg)
+    {
+      addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
+      mem = gen_rtx_MEM (mode, addr);
+      add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
+    }
+}
+
+/* Emit code to save registers using MOV insns.
+   First register is stored at CFA - CFA_OFFSET.  */
+static void
+ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      {
+        ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Emit code to save SSE registers using MOV insns.
+   First register is stored at CFA - CFA_OFFSET.  */
+static void
+ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+      {
+	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
+	cfa_offset -= 16;
+      }
+}
+
+static GTY(()) rtx queued_cfa_restores;
+
+/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
+   manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
+   Don't add the note if the previously saved value will be left untouched
+   within stack red-zone till return, as unwinders can find the same value
+   in the register and on the stack.  */
+
+static void
+ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
+{
+  if (cfa_offset <= cfun->machine->fs.red_zone_offset)
+    return;
+
+  if (insn)
+    {
+      add_reg_note (insn, REG_CFA_RESTORE, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    queued_cfa_restores
+      = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
+}
+
+/* Add queued REG_CFA_RESTORE notes if any to INSN.  */
+
+static void
+ix86_add_queued_cfa_restore_notes (rtx insn)
+{
+  rtx last;
+  if (!queued_cfa_restores)
+    return;
+  for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
+    ;
+  XEXP (last, 1) = REG_NOTES (insn);
+  REG_NOTES (insn) = queued_cfa_restores;
+  queued_cfa_restores = NULL_RTX;
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Expand prologue or epilogue stack adjustment.
+   The pattern exist to put a dependency on all ebp-based memory accesses.
+   STYLE should be negative if instructions should be marked as frame related,
+   zero if %r11 register is live and cannot be freely used and positive
+   otherwise.  */
+
+static void
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
+			   int style, bool set_cfa)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn;
+  bool add_frame_related_expr = false;
+
+  if (! TARGET_64BIT)
+    insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
+  else if (x86_64_immediate_operand (offset, DImode))
+    insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
+  else
+    {
+      rtx tmp;
+      /* r11 is used by indirect sibcall return as well, set before the
+	 epilogue and used after the epilogue.  */
+      if (style)
+        tmp = gen_rtx_REG (DImode, R11_REG);
+      else
+	{
+	  gcc_assert (src != hard_frame_pointer_rtx
+		      && dest != hard_frame_pointer_rtx);
+	  tmp = hard_frame_pointer_rtx;
+	}
+      insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
+      if (style < 0)
+	add_frame_related_expr = true;
+
+      insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
+    }
+
+  insn = emit_insn (insn);
+  if (style >= 0)
+    ix86_add_queued_cfa_restore_notes (insn);
+
+  if (set_cfa)
+    {
+      rtx r;
+
+      gcc_assert (m->fs.cfa_reg == src);
+      m->fs.cfa_offset += INTVAL (offset);
+      m->fs.cfa_reg = dest;
+
+      r = gen_rtx_PLUS (Pmode, src, offset);
+      r = gen_rtx_SET (VOIDmode, dest, r);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else if (style < 0)
+    {
+      RTX_FRAME_RELATED_P (insn) = 1;
+      if (add_frame_related_expr)
+	{
+	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
+	  r = gen_rtx_SET (VOIDmode, dest, r);
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
+	}
+    }
+
+  if (dest == stack_pointer_rtx)
+    {
+      HOST_WIDE_INT ooffset = m->fs.sp_offset;
+      bool valid = m->fs.sp_valid;
+
+      if (src == hard_frame_pointer_rtx)
+	{
+	  valid = m->fs.fp_valid;
+	  ooffset = m->fs.fp_offset;
+	}
+      else if (src == crtl->drap_reg)
+	{
+	  valid = m->fs.drap_valid;
+	  ooffset = 0;
+	}
+      else
+	{
+	  /* Else there are two possibilities: SP itself, which we set
+	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
+	     taken care of this by hand along the eh_return path.  */
+	  gcc_checking_assert (src == stack_pointer_rtx
+			       || offset == const0_rtx);
+	}
+
+      m->fs.sp_offset = ooffset - INTVAL (offset);
+      m->fs.sp_valid = valid;
+    }
+}
+
+/* Find an available register to be used as dynamic realign argument
+   pointer regsiter.  Such a register will be written in prologue and
+   used in begin of body, so it must not be
+	1. parameter passing register.
+	2. GOT pointer.
+   We reuse static-chain register if it is available.  Otherwise, we
+   use DI for i386 and R13 for x86-64.  We chose R13 since it has
+   shorter encoding.
+
+   Return: the regno of chosen register.  */
+
+static unsigned int
+find_drap_reg (void)
+{
+  tree decl = cfun->decl;
+
+  if (TARGET_64BIT)
+    {
+      /* Use R13 for nested function or function need static chain.
+	 Since function with tail call may use any caller-saved
+	 registers in epilogue, DRAP must not use caller-saved
+	 register in such case.  */
+      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
+	return R13_REG;
+
+      return R10_REG;
+    }
+  else
+    {
+      /* Use DI for nested function or function need static chain.
+	 Since function with tail call may use any caller-saved
+	 registers in epilogue, DRAP must not use caller-saved
+	 register in such case.  */
+      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
+	return DI_REG;
+
+      /* Reuse static chain register if it isn't used for parameter
+         passing.  */
+      if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
+	  && !lookup_attribute ("fastcall",
+    				TYPE_ATTRIBUTES (TREE_TYPE (decl)))
+	  && !lookup_attribute ("thiscall",
+    				TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+	return CX_REG;
+      else
+	return DI_REG;
+    }
+}
+
+/* Return minimum incoming stack alignment.  */
+
+static unsigned int
+ix86_minimum_incoming_stack_boundary (bool sibcall)
+{
+  unsigned int incoming_stack_boundary;
+
+  /* Prefer the one specified at command line. */
+  if (ix86_user_incoming_stack_boundary)
+    incoming_stack_boundary = ix86_user_incoming_stack_boundary;
+  /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
+     if -mstackrealign is used, it isn't used for sibcall check and
+     estimated stack alignment is 128bit.  */
+  else if (!sibcall
+	   && !TARGET_64BIT
+	   && ix86_force_align_arg_pointer
+	   && crtl->stack_alignment_estimated == 128)
+    incoming_stack_boundary = MIN_STACK_BOUNDARY;
+  else
+    incoming_stack_boundary = ix86_default_incoming_stack_boundary;
+
+  /* Incoming stack alignment can be changed on individual functions
+     via force_align_arg_pointer attribute.  We use the smallest
+     incoming stack boundary.  */
+  if (incoming_stack_boundary > MIN_STACK_BOUNDARY
+      && lookup_attribute (ix86_force_align_arg_pointer_string,
+			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    incoming_stack_boundary = MIN_STACK_BOUNDARY;
+
+  /* The incoming stack frame has to be aligned at least at
+     parm_stack_boundary.  */
+  if (incoming_stack_boundary < crtl->parm_stack_boundary)
+    incoming_stack_boundary = crtl->parm_stack_boundary;
+
+  /* Stack at entrance of main is aligned by runtime.  We use the
+     smallest incoming stack boundary. */
+  if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
+      && DECL_NAME (current_function_decl)
+      && MAIN_NAME_P (DECL_NAME (current_function_decl))
+      && DECL_FILE_SCOPE_P (current_function_decl))
+    incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+
+  return incoming_stack_boundary;
+}
+
+/* Update incoming stack boundary and estimated stack alignment.  */
+
+static void
+ix86_update_stack_boundary (void)
+{
+  ix86_incoming_stack_boundary
+    = ix86_minimum_incoming_stack_boundary (false);
+
+  /* x86_64 vararg needs 16byte stack alignment for register save
+     area.  */
+  if (TARGET_64BIT
+      && cfun->stdarg
+      && crtl->stack_alignment_estimated < 128)
+    crtl->stack_alignment_estimated = 128;
+}
+
+/* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
+   needed or an rtx for DRAP otherwise.  */
+
+static rtx
+ix86_get_drap_rtx (void)
+{
+  if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
+    crtl->need_drap = true;
+
+  if (stack_realign_drap)
+    {
+      /* Assign DRAP to vDRAP and returns vDRAP */
+      unsigned int regno = find_drap_reg ();
+      rtx drap_vreg;
+      rtx arg_ptr;
+      rtx seq, insn;
+
+      arg_ptr = gen_rtx_REG (Pmode, regno);
+      crtl->drap_reg = arg_ptr;
+
+      start_sequence ();
+      drap_vreg = copy_to_reg (arg_ptr);
+      seq = get_insns ();
+      end_sequence ();
+
+      insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
+      if (!optimize)
+	{
+	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      return drap_vreg;
+    }
+  else
+    return NULL;
+}
+
+/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
+
+static rtx
+ix86_internal_arg_pointer (void)
+{
+  return virtual_incoming_args_rtx;
+}
+
+struct scratch_reg {
+  rtx reg;
+  bool saved;
+};
+
+/* Return a short-lived scratch register for use on function entry.
+   In 32-bit mode, it is valid only after the registers are saved
+   in the prologue.  This register must be released by means of
+   release_scratch_register_on_entry once it is dead.  */
+
+static void
+get_scratch_register_on_entry (struct scratch_reg *sr)
+{
+  int regno;
+
+  sr->saved = false;
+
+  if (TARGET_64BIT)
+    {
+      /* We always use R11 in 64-bit mode.  */
+      regno = R11_REG;
+    }
+  else
+    {
+      tree decl = current_function_decl, fntype = TREE_TYPE (decl);
+      bool fastcall_p
+	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+      bool static_chain_p = DECL_STATIC_CHAIN (decl);
+      int regparm = ix86_function_regparm (fntype, decl);
+      int drap_regno
+	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
+
+      /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
+	  for the static chain register.  */
+      if ((regparm < 1 || (fastcall_p && !static_chain_p))
+	  && drap_regno != AX_REG)
+	regno = AX_REG;
+      else if (regparm < 2 && drap_regno != DX_REG)
+	regno = DX_REG;
+      /* ecx is the static chain register.  */
+      else if (regparm < 3 && !fastcall_p && !static_chain_p
+	       && drap_regno != CX_REG)
+	regno = CX_REG;
+      else if (ix86_save_reg (BX_REG, true))
+	regno = BX_REG;
+      /* esi is the static chain register.  */
+      else if (!(regparm == 3 && static_chain_p)
+	       && ix86_save_reg (SI_REG, true))
+	regno = SI_REG;
+      else if (ix86_save_reg (DI_REG, true))
+	regno = DI_REG;
+      else
+	{
+	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
+	  sr->saved = true;
+	}
+    }
+
+  sr->reg = gen_rtx_REG (Pmode, regno);
+  if (sr->saved)
+    {
+      rtx insn = emit_insn (gen_push (sr->reg));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+/* Release a scratch register obtained from the preceding function.  */
+
+static void
+release_scratch_register_on_entry (struct scratch_reg *sr)
+{
+  if (sr->saved)
+    {
+      rtx x, insn = emit_insn (gen_pop (sr->reg));
+
+      /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop.  */
+      RTX_FRAME_RELATED_P (insn) = 1;
+      x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
+      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+    }
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Emit code to adjust the stack pointer by SIZE bytes while probing it.  */
+
+static void
+ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
+{
+  /* We skip the probe for the first interval + a small dope of 4 words and
+     probe that many bytes past the specified size to maintain a protection
+     area at the botton of the stack.  */
+  const int dope = 4 * UNITS_PER_WORD;
+  rtx size_rtx = GEN_INT (size), last;
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  The run-time loop is made up of 11 insns in the
+     generic case while the compile-time loop is made up of 3+2*(n-1) insns
+     for n # of intervals.  */
+  if (size <= 5 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i, adjust;
+      bool first_probe = true;
+
+      /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
+	 values of N from 1 until it exceeds SIZE.  If only one probe is
+	 needed, this will not generate any code.  Then adjust and probe
+	 to PROBE_INTERVAL + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  if (first_probe)
+	    {
+	      adjust = 2 * PROBE_INTERVAL + dope;
+	      first_probe = false;
+	    }
+	  else
+	    adjust = PROBE_INTERVAL;
+
+	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				  plus_constant (stack_pointer_rtx, -adjust)));
+	  emit_stack_probe (stack_pointer_rtx);
+	}
+
+      if (first_probe)
+	adjust = size + PROBE_INTERVAL + dope;
+      else
+        adjust = size + PROBE_INTERVAL - i;
+
+      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      plus_constant (stack_pointer_rtx, -adjust)));
+      emit_stack_probe (stack_pointer_rtx);
+
+      /* Adjust back to account for the additional first interval.  */
+      last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (stack_pointer_rtx,
+						    PROBE_INTERVAL + dope)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      struct scratch_reg sr;
+
+      get_scratch_register_on_entry (&sr);
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* SP = SP_0 + PROBE_INTERVAL.  */
+      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      plus_constant (stack_pointer_rtx,
+					     - (PROBE_INTERVAL + dope))));
+
+      /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
+      emit_move_insn (sr.reg, GEN_INT (-rounded_size));
+      emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
+			      gen_rtx_PLUS (Pmode, sr.reg,
+					    stack_pointer_rtx)));
+
+
+      /* Step 3: the loop
+
+	 while (SP != LAST_ADDR)
+	   {
+	     SP = SP + PROBE_INTERVAL
+	     probe at SP
+	   }
+
+	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
+	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
+
+
+      /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
+	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			          plus_constant (stack_pointer_rtx,
+						 rounded_size - size)));
+	  emit_stack_probe (stack_pointer_rtx);
+	}
+
+      /* Adjust back to account for the additional first interval.  */
+      last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (stack_pointer_rtx,
+						    PROBE_INTERVAL + dope)));
+
+      release_scratch_register_on_entry (&sr);
+    }
+
+  gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
+
+  /* Even if the stack pointer isn't the CFA register, we need to correctly
+     describe the adjustments made to it, in particular differentiate the
+     frame-related ones from the frame-unrelated ones.  */
+  if (size > 0)
+    {
+      rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
+      XVECEXP (expr, 0, 0)
+	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		       plus_constant (stack_pointer_rtx, -size));
+      XVECEXP (expr, 0, 1)
+	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		       plus_constant (stack_pointer_rtx,
+				      PROBE_INTERVAL + dope + size));
+      add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
+      RTX_FRAME_RELATED_P (last) = 1;
+
+      cfun->machine->fs.sp_offset += size;
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Adjust the stack pointer up to REG while probing it.  */
+
+const char *
+output_adjust_stack_and_probe (rtx reg)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if SP == LAST_ADDR.  */
+  xops[0] = stack_pointer_rtx;
+  xops[1] = reg;
+  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+  fputs ("\tje\t", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* SP = SP + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (PROBE_INTERVAL);
+  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+  /* Probe at SP.  */
+  xops[1] = const0_rtx;
+  output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
+
+  fprintf (asm_out_file, "\tjmp\t");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  The run-time loop is made up of 7 insns in the
+     generic case while the compile-time loop is made up of n insns for n #
+     of intervals.  */
+  if (size <= 7 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i;
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+	 it exceeds SIZE.  If only one probe is needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
+
+      emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size, last;
+      struct scratch_reg sr;
+
+      get_scratch_register_on_entry (&sr);
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_OFFSET = FIRST.  */
+      emit_move_insn (sr.reg, GEN_INT (-first));
+
+      /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
+      last = first + rounded_size;
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+         probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+         until it is equal to ROUNDED_SIZE.  */
+
+      emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
+						       stack_pointer_rtx,
+						       sr.reg),
+					 rounded_size - size));
+
+      release_scratch_register_on_entry (&sr);
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG to END, inclusive.  These are
+   offsets from the current stack pointer.  */
+
+const char *
+output_probe_stack_range (rtx reg, rtx end)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[3];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg;
+  xops[1] = end;
+  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+  fputs ("\tje\t", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (PROBE_INTERVAL);
+  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+  /* Probe at TEST_ADDR.  */
+  xops[0] = stack_pointer_rtx;
+  xops[1] = reg;
+  xops[2] = const0_rtx;
+  output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
+
+  fprintf (asm_out_file, "\tjmp\t");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
+   to be generated in correct form.  */
+static void
+ix86_finalize_stack_realign_flags (void)
+{
+  /* Check if stack realign is really needed after reload, and
+     stores result in cfun */
+  unsigned int incoming_stack_boundary
+    = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
+       ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
+  unsigned int stack_realign = (incoming_stack_boundary
+				< (current_function_is_leaf
+				   ? crtl->max_used_stack_slot_alignment
+				   : crtl->stack_alignment_needed));
+
+  if (crtl->stack_realign_finalized)
+    {
+      /* After stack_realign_needed is finalized, we can't no longer
+	 change it.  */
+      gcc_assert (crtl->stack_realign_needed == stack_realign);
+    }
+  else
+    {
+      crtl->stack_realign_needed = stack_realign;
+      crtl->stack_realign_finalized = true;
+    }
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+ix86_expand_prologue (void)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn, t;
+  bool pic_reg_used;
+  struct ix86_frame frame;
+  HOST_WIDE_INT allocate;
+  bool int_registers_saved;
+
+  ix86_finalize_stack_realign_flags ();
+
+  /* DRAP should not coexist with stack_realign_fp */
+  gcc_assert (!(crtl->drap_reg && stack_realign_fp));
+
+  memset (&m->fs, 0, sizeof (m->fs));
+
+  /* Initialize CFA state for before the prologue.  */
+  m->fs.cfa_reg = stack_pointer_rtx;
+  m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
+
+  /* Track SP offset to the CFA.  We continue tracking this after we've
+     swapped the CFA register away from SP.  In the case of re-alignment
+     this is fudged; we're interested to offsets within the local frame.  */
+  m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+  m->fs.sp_valid = true;
+
+  ix86_compute_frame_layout (&frame);
+
+  if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
+    {
+      /* We should have already generated an error for any use of
+         ms_hook on a nested function.  */
+      gcc_checking_assert (!ix86_static_chain_on_stack);
+
+      /* Check if profiling is active and we shall use profiling before
+         prologue variant. If so sorry.  */
+      if (crtl->profile && flag_fentry != 0)
+        sorry ("ms_hook_prologue attribute isn%'t compatible "
+	       "with -mfentry for 32-bit");
+
+      /* In ix86_asm_output_function_label we emitted:
+	 8b ff     movl.s %edi,%edi
+	 55        push   %ebp
+	 8b ec     movl.s %esp,%ebp
+
+	 This matches the hookable function prologue in Win32 API
+	 functions in Microsoft Windows XP Service Pack 2 and newer.
+	 Wine uses this to enable Windows apps to hook the Win32 API
+	 functions provided by Wine.
+
+	 What that means is that we've already set up the frame pointer.  */
+
+      if (frame_pointer_needed
+	  && !(crtl->drap_reg && crtl->stack_realign_needed))
+	{
+	  rtx push, mov;
+
+	  /* We've decided to use the frame pointer already set up.
+	     Describe this to the unwinder by pretending that both
+	     push and mov insns happen right here.
+
+	     Putting the unwind info here at the end of the ms_hook
+	     is done so that we can make absolutely certain we get
+	     the required byte sequence at the start of the function,
+	     rather than relying on an assembler that can produce
+	     the exact encoding required.
+
+	     However it does mean (in the unpatched case) that we have
+	     a 1 insn window where the asynchronous unwind info is
+	     incorrect.  However, if we placed the unwind info at
+	     its correct location we would have incorrect unwind info
+	     in the patched case.  Which is probably all moot since
+	     I don't expect Wine generates dwarf2 unwind info for the
+	     system libraries that use this feature.  */
+
+	  insn = emit_insn (gen_blockage ());
+
+	  push = gen_push (hard_frame_pointer_rtx);
+	  mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+			     stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (push) = 1;
+	  RTX_FRAME_RELATED_P (mov) = 1;
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
+
+	  /* Note that gen_push incremented m->fs.cfa_offset, even
+	     though we didn't emit the push insn here.  */
+	  m->fs.cfa_reg = hard_frame_pointer_rtx;
+	  m->fs.fp_offset = m->fs.cfa_offset;
+	  m->fs.fp_valid = true;
+	}
+      else
+	{
+	  /* The frame pointer is not needed so pop %ebp again.
+	     This leaves us with a pristine state.  */
+	  emit_insn (gen_pop (hard_frame_pointer_rtx));
+	}
+    }
+
+  /* The first insn of a function that accepts its static chain on the
+     stack is to push the register that would be filled in by a direct
+     call.  This insn will be skipped by the trampoline.  */
+  else if (ix86_static_chain_on_stack)
+    {
+      insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
+      emit_insn (gen_blockage ());
+
+      /* We don't want to interpret this push insn as a register save,
+	 only as a stack adjustment.  The real copy of the register as
+	 a save will be done later, if needed.  */
+      t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
+      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Emit prologue code to adjust stack alignment and setup DRAP, in case
+     of DRAP is needed and stack realignment is really needed after reload */
+  if (stack_realign_drap)
+    {
+      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+
+      /* Only need to push parameter pointer reg if it is caller saved.  */
+      if (!call_used_regs[REGNO (crtl->drap_reg)])
+	{
+	  /* Push arg pointer reg */
+	  insn = emit_insn (gen_push (crtl->drap_reg));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* Grab the argument pointer.  */
+      t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
+      insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      m->fs.cfa_reg = crtl->drap_reg;
+      m->fs.cfa_offset = 0;
+
+      /* Align the stack.  */
+      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+					stack_pointer_rtx,
+					GEN_INT (-align_bytes)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Replicate the return address on the stack so that return
+	 address can be reached via (argp - 1) slot.  This is needed
+	 to implement macro RETURN_ADDR_RTX and intrinsic function
+	 expand_builtin_return_addr etc.  */
+      t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
+      t = gen_frame_mem (Pmode, t);
+      insn = emit_insn (gen_push (t));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* For the purposes of frame and register save area addressing,
+	 we've started over with a new frame.  */
+      m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
+      m->fs.realigned = true;
+    }
+
+  if (frame_pointer_needed && !m->fs.fp_valid)
+    {
+      /* Note: AT&T enter does NOT have reversed args.  Enter is probably
+         slower on all targets.  Also sdb doesn't like it.  */
+      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
+	{
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (m->fs.cfa_reg == stack_pointer_rtx)
+	    m->fs.cfa_reg = hard_frame_pointer_rtx;
+	  m->fs.fp_offset = m->fs.sp_offset;
+	  m->fs.fp_valid = true;
+	}
+    }
+
+  int_registers_saved = (frame.nregs == 0);
+
+  if (!int_registers_saved)
+    {
+      /* If saving registers via PUSH, do so now.  */
+      if (!frame.save_regs_using_mov)
+	{
+	  ix86_emit_save_regs ();
+	  int_registers_saved = true;
+	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+	}
+
+      /* When using red zone we may start register saving before allocating
+	 the stack frame saving one cycle of the prologue.  However, avoid
+	 doing this if we have to probe the stack; at least on x86_64 the
+	 stack probe can turn into a call that clobbers a red zone location. */
+      else if (ix86_using_red_zone ()
+	       && (! TARGET_STACK_PROBE
+		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+	{
+	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+	  int_registers_saved = true;
+	}
+    }
+
+  if (stack_realign_fp)
+    {
+      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+      gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
+
+      /* The computation of the size of the re-aligned stack frame means
+	 that we must allocate the size of the register save area before
+	 performing the actual alignment.  Otherwise we cannot guarantee
+	 that there's enough storage above the realignment point.  */
+      if (m->fs.sp_offset != frame.sse_reg_save_offset)
+        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (m->fs.sp_offset
+					    - frame.sse_reg_save_offset),
+				   -1, false);
+
+      /* Align the stack.  */
+      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
+					stack_pointer_rtx,
+					GEN_INT (-align_bytes)));
+
+      /* For the purposes of register save area addressing, the stack
+         pointer is no longer valid.  As for the value of sp_offset,
+	 see ix86_compute_frame_layout, which we need to match in order
+	 to pass verification of stack_pointer_offset at the end.  */
+      m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
+      m->fs.sp_valid = false;
+    }
+
+  allocate = frame.stack_pointer_offset - m->fs.sp_offset;
+
+  if (flag_stack_usage)
+    {
+      /* We start to count from ARG_POINTER.  */
+      HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
+
+      /* If it was realigned, take into account the fake frame.  */
+      if (stack_realign_drap)
+	{
+	  if (ix86_static_chain_on_stack)
+	    stack_size += UNITS_PER_WORD;
+
+	  if (!call_used_regs[REGNO (crtl->drap_reg)])
+	    stack_size += UNITS_PER_WORD;
+
+	  /* This over-estimates by 1 minimal-stack-alignment-unit but
+	     mitigates that by counting in the new return address slot.  */
+	  current_function_dynamic_stack_size
+	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
+	}
+
+      current_function_static_stack_size = stack_size;
+    }
+
+  /* The stack has already been decremented by the instruction calling us
+     so probe if the size is non-negative to preserve the protection area.  */
+  if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      /* We expect the registers to be saved when probes are used.  */
+      gcc_assert (int_registers_saved);
+
+      if (STACK_CHECK_MOVING_SP)
+	{
+	  ix86_adjust_stack_and_probe (allocate);
+	  allocate = 0;
+	}
+      else
+	{
+	  HOST_WIDE_INT size = allocate;
+
+	  if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
+	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+
+	  if (TARGET_STACK_PROBE)
+	    ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+	  else
+	    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	}
+    }
+
+  if (allocate == 0)
+    ;
+  else if (!ix86_target_stack_probe ()
+	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
+    {
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+			         GEN_INT (-allocate), -1,
+			         m->fs.cfa_reg == stack_pointer_rtx);
+    }
+  else
+    {
+      rtx eax = gen_rtx_REG (Pmode, AX_REG);
+      rtx r10 = NULL;
+      rtx (*adjust_stack_insn)(rtx, rtx, rtx);
+
+      bool eax_live = false;
+      bool r10_live = false;
+
+      if (TARGET_64BIT)
+        r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
+      if (!TARGET_64BIT_MS_ABI)
+        eax_live = ix86_eax_live_at_start_p ();
+
+      if (eax_live)
+	{
+	  emit_insn (gen_push (eax));
+	  allocate -= UNITS_PER_WORD;
+	}
+      if (r10_live)
+	{
+	  r10 = gen_rtx_REG (Pmode, R10_REG);
+	  emit_insn (gen_push (r10));
+	  allocate -= UNITS_PER_WORD;
+	}
+
+      emit_move_insn (eax, GEN_INT (allocate));
+      emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
+
+      /* Use the fact that AX still contains ALLOCATE.  */
+      adjust_stack_insn = (TARGET_64BIT
+			   ? gen_pro_epilogue_adjust_stack_di_sub
+			   : gen_pro_epilogue_adjust_stack_si_sub);
+
+      insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
+					   stack_pointer_rtx, eax));
+
+      /* Note that SEH directives need to continue tracking the stack
+	 pointer even after the frame pointer has been set up.  */
+      if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
+	{
+	  if (m->fs.cfa_reg == stack_pointer_rtx)
+	    m->fs.cfa_offset += allocate;
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				     plus_constant (stack_pointer_rtx,
+						    -allocate)));
+	}
+      m->fs.sp_offset += allocate;
+
+      if (r10_live && eax_live)
+        {
+	  t = choose_baseaddr (m->fs.sp_offset - allocate);
+	  emit_move_insn (r10, gen_frame_mem (Pmode, t));
+	  t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
+	  emit_move_insn (eax, gen_frame_mem (Pmode, t));
+	}
+      else if (eax_live || r10_live)
+	{
+	  t = choose_baseaddr (m->fs.sp_offset - allocate);
+	  emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
+	}
+    }
+  gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
+
+  /* If we havn't already set up the frame pointer, do so now.  */
+  if (frame_pointer_needed && !m->fs.fp_valid)
+    {
+      insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (frame.stack_pointer_offset
+				     - frame.hard_frame_pointer_offset));
+      insn = emit_insn (insn);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
+
+      if (m->fs.cfa_reg == stack_pointer_rtx)
+	m->fs.cfa_reg = hard_frame_pointer_rtx;
+      m->fs.fp_offset = frame.hard_frame_pointer_offset;
+      m->fs.fp_valid = true;
+    }
+
+  if (!int_registers_saved)
+    ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+  if (frame.nsseregs)
+    ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+
+  pic_reg_used = false;
+  if (pic_offset_table_rtx
+      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
+	  || crtl->profile))
+    {
+      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
+
+      if (alt_pic_reg_used != INVALID_REGNUM)
+	SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
+
+      pic_reg_used = true;
+    }
+
+  if (pic_reg_used)
+    {
+      if (TARGET_64BIT)
+	{
+	  if (ix86_cmodel == CM_LARGE_PIC)
+	    {
+              rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
+	      rtx label = gen_label_rtx ();
+	      emit_label (label);
+	      LABEL_PRESERVE_P (label) = 1;
+	      gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
+	      insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
+	      insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
+	      insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
+					    pic_offset_table_rtx, tmp_reg));
+	    }
+	  else
+            insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+	}
+      else
+        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+    }
+
+  /* In the pic_reg_used case, make sure that the got load isn't deleted
+     when mcount needs it.  Blockage to avoid call movement across mcount
+     call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
+     note.  */
+  if (crtl->profile && !flag_fentry && pic_reg_used)
+    emit_insn (gen_prologue_use (pic_offset_table_rtx));
+
+  if (crtl->drap_reg && !crtl->stack_realign_needed)
+    {
+      /* vDRAP is setup but after reload it turns out stack realign
+         isn't necessary, here we will emit prologue to setup DRAP
+         without stack realign adjustment */
+      t = choose_baseaddr (0);
+      emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
+    }
+
+  /* Prevent instructions from being scheduled into register save push
+     sequence when access to the redzone area is done through frame pointer.
+     The offset between the frame pointer and the stack pointer is calculated
+     relative to the value of the stack pointer at the end of the function
+     prologue, and moving instructions that access redzone area via frame
+     pointer inside push sequence violates this assumption.  */
+  if (frame_pointer_needed && frame.red_zone_size)
+    emit_insn (gen_memory_blockage ());
+
+  /* Emit cld instruction if stringops are used in the function.  */
+  if (TARGET_CLD && ix86_current_function_needs_cld)
+    emit_insn (gen_cld ());
+
+  /* SEH requires that the prologue end within 256 bytes of the start of
+     the function.  Prevent instruction schedules that would extend that.  */
+  if (TARGET_SEH)
+    emit_insn (gen_blockage ());
+}
+
+/* Emit code to restore REG using a POP insn.  */
+
+static void
+ix86_emit_restore_reg_using_pop (rtx reg)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn = emit_insn (gen_pop (reg));
+
+  ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
+  m->fs.sp_offset -= UNITS_PER_WORD;
+
+  if (m->fs.cfa_reg == crtl->drap_reg
+      && REGNO (reg) == REGNO (crtl->drap_reg))
+    {
+      /* Previously we'd represented the CFA as an expression
+	 like *(%ebp - 8).  We've just popped that value from
+	 the stack, which means we need to reset the CFA to
+	 the drap register.  This will remain until we restore
+	 the stack pointer.  */
+      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* This means that the DRAP register is valid for addressing too.  */
+      m->fs.drap_valid = true;
+      return;
+    }
+
+  if (m->fs.cfa_reg == stack_pointer_rtx)
+    {
+      rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
+      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      m->fs.cfa_offset -= UNITS_PER_WORD;
+    }
+
+  /* When the frame pointer is the CFA, and we pop it, we are
+     swapping back to the stack pointer as the CFA.  This happens
+     for stack frames that don't allocate other data, so we assume
+     the stack pointer is now pointing at the return address, i.e.
+     the function entry state, which makes the offset be 1 word.  */
+  if (reg == hard_frame_pointer_rtx)
+    {
+      m->fs.fp_valid = false;
+      if (m->fs.cfa_reg == hard_frame_pointer_rtx)
+	{
+	  m->fs.cfa_reg = stack_pointer_rtx;
+	  m->fs.cfa_offset -= UNITS_PER_WORD;
+
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				      GEN_INT (m->fs.cfa_offset)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Emit code to restore saved registers using POP insns.  */
+
+static void
+ix86_emit_restore_regs_using_pop (void)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
+      ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
+}
+
+/* Emit code and notes for the LEAVE instruction.  */
+
+static void
+ix86_emit_leave (void)
+{
+  struct machine_function *m = cfun->machine;
+  rtx insn = emit_insn (ix86_gen_leave ());
+
+  ix86_add_queued_cfa_restore_notes (insn);
+
+  gcc_assert (m->fs.fp_valid);
+  m->fs.sp_valid = true;
+  m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
+  m->fs.fp_valid = false;
+
+  if (m->fs.cfa_reg == hard_frame_pointer_rtx)
+    {
+      m->fs.cfa_reg = stack_pointer_rtx;
+      m->fs.cfa_offset = m->fs.sp_offset;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (stack_pointer_rtx, m->fs.sp_offset));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
+				 m->fs.fp_offset);
+    }
+}
+
+/* Emit code to restore saved registers using MOV insns.
+   First register is restored from CFA - CFA_OFFSET.  */
+static void
+ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
+				  int maybe_eh_return)
+{
+  struct machine_function *m = cfun->machine;
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+      {
+	rtx reg = gen_rtx_REG (Pmode, regno);
+	rtx insn, mem;
+	
+	mem = choose_baseaddr (cfa_offset);
+	mem = gen_frame_mem (Pmode, mem);
+	insn = emit_move_insn (reg, mem);
+
+        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+	  {
+	    /* Previously we'd represented the CFA as an expression
+	       like *(%ebp - 8).  We've just popped that value from
+	       the stack, which means we need to reset the CFA to
+	       the drap register.  This will remain until we restore
+	       the stack pointer.  */
+	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	    /* This means that the DRAP register is valid for addressing.  */
+	    m->fs.drap_valid = true;
+	  }
+	else
+	  ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
+
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Emit code to restore saved registers using MOV insns.
+   First register is restored from CFA - CFA_OFFSET.  */
+static void
+ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
+				      int maybe_eh_return)
+{
+  unsigned int regno;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+      {
+	rtx reg = gen_rtx_REG (V4SFmode, regno);
+	rtx mem;
+
+	mem = choose_baseaddr (cfa_offset);
+	mem = gen_rtx_MEM (V4SFmode, mem);
+	set_mem_align (mem, 128);
+	emit_move_insn (reg, mem);
+
+	ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
+
+	cfa_offset -= 16;
+      }
+}
+
+/* Restore function stack, frame, and registers.  */
+
+void
+ix86_expand_epilogue (int style)
+{
+  struct machine_function *m = cfun->machine;
+  struct machine_frame_state frame_state_save = m->fs;
+  struct ix86_frame frame;
+  bool restore_regs_via_mov;
+  bool using_drap;
+
+  ix86_finalize_stack_realign_flags ();
+  ix86_compute_frame_layout (&frame);
+
+  m->fs.sp_valid = (!frame_pointer_needed
+		    || (current_function_sp_is_unchanging
+			&& !stack_realign_fp));
+  gcc_assert (!m->fs.sp_valid
+	      || m->fs.sp_offset == frame.stack_pointer_offset);
+
+  /* The FP must be valid if the frame pointer is present.  */
+  gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+  gcc_assert (!m->fs.fp_valid
+	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
+
+  /* We must have *some* valid pointer to the stack frame.  */
+  gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
+
+  /* The DRAP is never valid at this point.  */
+  gcc_assert (!m->fs.drap_valid);
+
+  /* See the comment about red zone and frame
+     pointer usage in ix86_expand_prologue.  */
+  if (frame_pointer_needed && frame.red_zone_size)
+    emit_insn (gen_memory_blockage ());
+
+  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
+  gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
+
+  /* Determine the CFA offset of the end of the red-zone.  */
+  m->fs.red_zone_offset = 0;
+  if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
+    {
+      /* The red-zone begins below the return address.  */
+      m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
+
+      /* When the register save area is in the aligned portion of
+         the stack, determine the maximum runtime displacement that
+	 matches up with the aligned frame.  */
+      if (stack_realign_drap)
+	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
+				  + UNITS_PER_WORD);
+    }
+
+  /* Special care must be taken for the normal return case of a function
+     using eh_return: the eax and edx registers are marked as saved, but
+     not restored along this path.  Adjust the save location to match.  */
+  if (crtl->calls_eh_return && style != 2)
+    frame.reg_save_offset -= 2 * UNITS_PER_WORD;
+
+  /* EH_RETURN requires the use of moves to function properly.  */
+  if (crtl->calls_eh_return)
+    restore_regs_via_mov = true;
+  /* SEH requires the use of pops to identify the epilogue.  */
+  else if (TARGET_SEH)
+    restore_regs_via_mov = false;
+  /* If we're only restoring one register and sp is not valid then
+     using a move instruction to restore the register since it's
+     less work than reloading sp and popping the register.  */
+  else if (!m->fs.sp_valid && frame.nregs <= 1)
+    restore_regs_via_mov = true;
+  else if (TARGET_EPILOGUE_USING_MOVE
+	   && cfun->machine->use_fast_prologue_epilogue
+	   && (frame.nregs > 1
+	       || m->fs.sp_offset != frame.reg_save_offset))
+    restore_regs_via_mov = true;
+  else if (frame_pointer_needed
+	   && !frame.nregs
+	   && m->fs.sp_offset != frame.reg_save_offset)
+    restore_regs_via_mov = true;
+  else if (frame_pointer_needed
+	   && TARGET_USE_LEAVE
+	   && cfun->machine->use_fast_prologue_epilogue
+	   && frame.nregs == 1)
+    restore_regs_via_mov = true;
+  else
+    restore_regs_via_mov = false;
+
+  if (restore_regs_via_mov || frame.nsseregs)
+    {
+      /* Ensure that the entire register save area is addressable via
+	 the stack pointer, if we will restore via sp.  */
+      if (TARGET_64BIT
+	  && m->fs.sp_offset > 0x7fffffff
+	  && !(m->fs.fp_valid || m->fs.drap_valid)
+	  && (frame.nsseregs + frame.nregs) != 0)
+	{
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (m->fs.sp_offset
+					      - frame.sse_reg_save_offset),
+				     style,
+				     m->fs.cfa_reg == stack_pointer_rtx);
+	}
+    }
+
+  /* If there are any SSE registers to restore, then we have to do it
+     via moves, since there's obviously no pop for SSE regs.  */
+  if (frame.nsseregs)
+    ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
+					  style == 2);
+
+  if (restore_regs_via_mov)
+    {
+      rtx t;
+
+      if (frame.nregs)
+	ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
+
+      /* eh_return epilogues need %ecx added to the stack pointer.  */
+      if (style == 2)
+	{
+	  rtx insn, sa = EH_RETURN_STACKADJ_RTX;
+
+	  /* Stack align doesn't work with eh_return.  */
+	  gcc_assert (!stack_realign_drap);
+	  /* Neither does regparm nested functions.  */
+	  gcc_assert (!ix86_static_chain_on_stack);
+
+	  if (frame_pointer_needed)
+	    {
+	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+	      t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
+	      emit_insn (gen_rtx_SET (VOIDmode, sa, t));
+
+	      t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
+	      insn = emit_move_insn (hard_frame_pointer_rtx, t);
+
+	      /* Note that we use SA as a temporary CFA, as the return
+		 address is at the proper place relative to it.  We
+		 pretend this happens at the FP restore insn because
+		 prior to this insn the FP would be stored at the wrong
+		 offset relative to SA, and after this insn we have no
+		 other reasonable register to use for the CFA.  We don't
+		 bother resetting the CFA to the SP for the duration of
+		 the return insn.  */
+	      add_reg_note (insn, REG_CFA_DEF_CFA,
+			    plus_constant (sa, UNITS_PER_WORD));
+	      ix86_add_queued_cfa_restore_notes (insn);
+	      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+	      RTX_FRAME_RELATED_P (insn) = 1;
+
+	      m->fs.cfa_reg = sa;
+	      m->fs.cfa_offset = UNITS_PER_WORD;
+	      m->fs.fp_valid = false;
+
+	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
+					 const0_rtx, style, false);
+	    }
+	  else
+	    {
+	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+	      t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
+	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
+	      ix86_add_queued_cfa_restore_notes (insn);
+
+	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
+	      if (m->fs.cfa_offset != UNITS_PER_WORD)
+		{
+		  m->fs.cfa_offset = UNITS_PER_WORD;
+		  add_reg_note (insn, REG_CFA_DEF_CFA,
+				plus_constant (stack_pointer_rtx,
+					       UNITS_PER_WORD));
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		}
+	    }
+	  m->fs.sp_offset = UNITS_PER_WORD;
+	  m->fs.sp_valid = true;
+	}
+    }
+  else
+    {
+      /* SEH requires that the function end with (1) a stack adjustment
+	 if necessary, (2) a sequence of pops, and (3) a return or
+	 jump instruction.  Prevent insns from the function body from
+	 being scheduled into this sequence.  */
+      if (TARGET_SEH)
+	{
+	  /* Prevent a catch region from being adjacent to the standard
+	     epilogue sequence.  Unfortuantely crtl->uses_eh_lsda nor
+	     several other flags that would be interesting to test are
+	     not yet set up.  */
+	  if (flag_non_call_exceptions)
+	    emit_insn (gen_nops (const1_rtx));
+	  else
+	    emit_insn (gen_blockage ());
+	}
+
+      /* First step is to deallocate the stack frame so that we can
+	 pop the registers.  */
+      if (!m->fs.sp_valid)
+	{
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
+				     GEN_INT (m->fs.fp_offset
+					      - frame.reg_save_offset),
+				     style, false);
+	}
+      else if (m->fs.sp_offset != frame.reg_save_offset)
+	{
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (m->fs.sp_offset
+					      - frame.reg_save_offset),
+				     style,
+				     m->fs.cfa_reg == stack_pointer_rtx);
+	}
+
+      ix86_emit_restore_regs_using_pop ();
+    }
+
+  /* If we used a stack pointer and haven't already got rid of it,
+     then do so now.  */
+  if (m->fs.fp_valid)
+    {
+      /* If the stack pointer is valid and pointing at the frame
+	 pointer store address, then we only need a pop.  */
+      if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
+	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+      /* Leave results in shorter dependency chains on CPUs that are
+	 able to grok it fast.  */
+      else if (TARGET_USE_LEAVE
+	       || optimize_function_for_size_p (cfun)
+	       || !cfun->machine->use_fast_prologue_epilogue)
+	ix86_emit_leave ();
+      else
+        {
+	  pro_epilogue_adjust_stack (stack_pointer_rtx,
+				     hard_frame_pointer_rtx,
+				     const0_rtx, style, !using_drap);
+	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
+        }
+    }
+
+  if (using_drap)
+    {
+      int param_ptr_offset = UNITS_PER_WORD;
+      rtx insn;
+
+      gcc_assert (stack_realign_drap);
+
+      if (ix86_static_chain_on_stack)
+	param_ptr_offset += UNITS_PER_WORD;
+      if (!call_used_regs[REGNO (crtl->drap_reg)])
+	param_ptr_offset += UNITS_PER_WORD;
+
+      insn = emit_insn (gen_rtx_SET
+			(VOIDmode, stack_pointer_rtx,
+			 gen_rtx_PLUS (Pmode,
+				       crtl->drap_reg,
+				       GEN_INT (-param_ptr_offset))));
+      m->fs.cfa_reg = stack_pointer_rtx;
+      m->fs.cfa_offset = param_ptr_offset;
+      m->fs.sp_offset = param_ptr_offset;
+      m->fs.realigned = false;
+
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				  GEN_INT (param_ptr_offset)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (!call_used_regs[REGNO (crtl->drap_reg)])
+	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
+    }
+
+  /* At this point the stack pointer must be valid, and we must have
+     restored all of the registers.  We may not have deallocated the
+     entire stack frame.  We've delayed this until now because it may
+     be possible to merge the local stack deallocation with the
+     deallocation forced by ix86_static_chain_on_stack.   */
+  gcc_assert (m->fs.sp_valid);
+  gcc_assert (!m->fs.fp_valid);
+  gcc_assert (!m->fs.realigned);
+  if (m->fs.sp_offset != UNITS_PER_WORD)
+    {
+      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
+				 style, true);
+    }
+
+  /* Sibcall epilogues don't want a return instruction.  */
+  if (style == 0)
+    {
+      m->fs = frame_state_save;
+      return;
+    }
+
+  /* Emit vzeroupper if needed.  */
+  if (TARGET_VZEROUPPER
+      && !TREE_THIS_VOLATILE (cfun->decl)
+      && !cfun->machine->caller_return_avx256_p)
+    emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256))); 
+
+  if (crtl->args.pops_args && crtl->args.size)
+    {
+      rtx popc = GEN_INT (crtl->args.pops_args);
+
+      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
+	 address, do explicit add, and jump indirectly to the caller.  */
+
+      if (crtl->args.pops_args >= 65536)
+	{
+	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
+	  rtx insn;
+
+	  /* There is no "pascal" calling convention in any 64bit ABI.  */
+	  gcc_assert (!TARGET_64BIT);
+
+	  insn = emit_insn (gen_pop (ecx));
+	  m->fs.cfa_offset -= UNITS_PER_WORD;
+	  m->fs.sp_offset -= UNITS_PER_WORD;
+
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+	  add_reg_note (insn, REG_CFA_REGISTER,
+			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				     popc, -1, true);
+	  emit_jump_insn (gen_return_indirect_internal (ecx));
+	}
+      else
+	emit_jump_insn (gen_return_pop_internal (popc));
+    }
+  else
+    emit_jump_insn (gen_return_internal ());
+
+  /* Restore the state back to the state from the prologue,
+     so that it's correct for the next epilogue.  */
+  m->fs = frame_state_save;
+}
+
+/* Reset from the function's potential modifications.  */
+
+static void
+ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (pic_offset_table_rtx)
+    SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
+#if TARGET_MACHO
+  /* Mach-O doesn't support labels at the end of objects, so if
+     it looks like we might want one, insert a NOP.  */
+  {
+    rtx insn = get_last_insn ();
+    while (insn
+	   && NOTE_P (insn)
+	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
+      insn = PREV_INSN (insn);
+    if (insn
+	&& (LABEL_P (insn)
+	    || (NOTE_P (insn)
+		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
+      fputs ("\tnop\n", file);
+  }
+#endif
+
+}
+
+/* Return a scratch register to use in the split stack prologue.  The
+   split stack prologue is used for -fsplit-stack.  It is the first
+   instructions in the function, even before the regular prologue.
+   The scratch register can be any caller-saved register which is not
+   used for parameters or for the static chain.  */
+
+static unsigned int
+split_stack_prologue_scratch_regno (void)
+{
+  if (TARGET_64BIT)
+    return R11_REG;
+  else
+    {
+      bool is_fastcall;
+      int regparm;
+
+      is_fastcall = (lookup_attribute ("fastcall",
+				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
+		     != NULL);
+      regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
+
+      if (is_fastcall)
+	{
+	  if (DECL_STATIC_CHAIN (cfun->decl))
+	    {
+	      sorry ("-fsplit-stack does not support fastcall with "
+		     "nested function");
+	      return INVALID_REGNUM;
+	    }
+	  return AX_REG;
+	}
+      else if (regparm < 3)
+	{
+	  if (!DECL_STATIC_CHAIN (cfun->decl))
+	    return CX_REG;
+	  else
+	    {
+	      if (regparm >= 2)
+		{
+		  sorry ("-fsplit-stack does not support 2 register "
+			 " parameters for a nested function");
+		  return INVALID_REGNUM;
+		}
+	      return DX_REG;
+	    }
+	}
+      else
+	{
+	  /* FIXME: We could make this work by pushing a register
+	     around the addition and comparison.  */
+	  sorry ("-fsplit-stack does not support 3 register parameters");
+	  return INVALID_REGNUM;
+	}
+    }
+}
+
+/* A SYMBOL_REF for the function which allocates new stackspace for
+   -fsplit-stack.  */
+
+static GTY(()) rtx split_stack_fn;
+
+/* A SYMBOL_REF for the more stack function when using the large
+   model.  */
+
+static GTY(()) rtx split_stack_fn_large;
+
+/* Handle -fsplit-stack.  These are the first instructions in the
+   function, even before the regular prologue.  */
+
+void
+ix86_expand_split_stack_prologue (void)
+{
+  struct ix86_frame frame;
+  HOST_WIDE_INT allocate;
+  unsigned HOST_WIDE_INT args_size;
+  rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
+  rtx scratch_reg = NULL_RTX;
+  rtx varargs_label = NULL_RTX;
+  rtx fn;
+
+  gcc_assert (flag_split_stack && reload_completed);
+
+  ix86_finalize_stack_realign_flags ();
+  ix86_compute_frame_layout (&frame);
+  allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
+
+  /* This is the label we will branch to if we have enough stack
+     space.  We expect the basic block reordering pass to reverse this
+     branch if optimizing, so that we branch in the unlikely case.  */
+  label = gen_label_rtx ();
+
+  /* We need to compare the stack pointer minus the frame size with
+     the stack boundary in the TCB.  The stack boundary always gives
+     us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
+     can compare directly.  Otherwise we need to do an addition.  */
+
+  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			  UNSPEC_STACK_CHECK);
+  limit = gen_rtx_CONST (Pmode, limit);
+  limit = gen_rtx_MEM (Pmode, limit);
+  if (allocate < SPLIT_STACK_AVAILABLE)
+    current = stack_pointer_rtx;
+  else
+    {
+      unsigned int scratch_regno;
+      rtx offset;
+
+      /* We need a scratch register to hold the stack pointer minus
+	 the required frame size.  Since this is the very start of the
+	 function, the scratch register can be any caller-saved
+	 register which is not used for parameters.  */
+      offset = GEN_INT (- allocate);
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      if (scratch_regno == INVALID_REGNUM)
+	return;
+      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+      if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
+	{
+	  /* We don't use ix86_gen_add3 in this case because it will
+	     want to split to lea, but when not optimizing the insn
+	     will not be split after this point.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
+				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+						offset)));
+	}
+      else
+	{
+	  emit_move_insn (scratch_reg, offset);
+	  emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
+				 stack_pointer_rtx));
+	}
+      current = scratch_reg;
+    }
+
+  ix86_expand_branch (GEU, current, limit, label);
+  jump_insn = get_last_insn ();
+  JUMP_LABEL (jump_insn) = label;
+
+  /* Mark the jump as very likely to be taken.  */
+  add_reg_note (jump_insn, REG_BR_PROB,
+		GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
+
+  if (split_stack_fn == NULL_RTX)
+    split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+  fn = split_stack_fn;
+
+  /* Get more stack space.  We pass in the desired stack space and the
+     size of the arguments to copy to the new stack.  In 32-bit mode
+     we push the parameters; __morestack will return on a new stack
+     anyhow.  In 64-bit mode we pass the parameters in r10 and
+     r11.  */
+  allocate_rtx = GEN_INT (allocate);
+  args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
+  call_fusage = NULL_RTX;
+  if (TARGET_64BIT)
+    {
+      rtx reg10, reg11;
+
+      reg10 = gen_rtx_REG (Pmode, R10_REG);
+      reg11 = gen_rtx_REG (Pmode, R11_REG);
+
+      /* If this function uses a static chain, it will be in %r10.
+	 Preserve it across the call to __morestack.  */
+      if (DECL_STATIC_CHAIN (cfun->decl))
+	{
+	  rtx rax;
+
+	  rax = gen_rtx_REG (Pmode, AX_REG);
+	  emit_move_insn (rax, reg10);
+	  use_reg (&call_fusage, rax);
+	}
+
+      if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
+	{
+	  HOST_WIDE_INT argval;
+
+	  /* When using the large model we need to load the address
+	     into a register, and we've run out of registers.  So we
+	     switch to a different calling convention, and we call a
+	     different function: __morestack_large.  We pass the
+	     argument size in the upper 32 bits of r10 and pass the
+	     frame size in the lower 32 bits.  */
+	  gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
+	  gcc_assert ((args_size & 0xffffffff) == args_size);
+
+	  if (split_stack_fn_large == NULL_RTX)
+	    split_stack_fn_large =
+	      gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
+
+	  if (ix86_cmodel == CM_LARGE_PIC)
+	    {
+	      rtx label, x;
+
+	      label = gen_label_rtx ();
+	      emit_label (label);
+	      LABEL_PRESERVE_P (label) = 1;
+	      emit_insn (gen_set_rip_rex64 (reg10, label));
+	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
+	      emit_insn (gen_adddi3 (reg10, reg10, reg11));
+	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
+				  UNSPEC_GOT);
+	      x = gen_rtx_CONST (Pmode, x);
+	      emit_move_insn (reg11, x);
+	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
+	      x = gen_const_mem (Pmode, x);
+	      emit_move_insn (reg11, x);
+	    }
+	  else
+	    emit_move_insn (reg11, split_stack_fn_large);
+
+	  fn = reg11;
+
+	  argval = ((args_size << 16) << 16) + allocate;
+	  emit_move_insn (reg10, GEN_INT (argval));
+	}
+      else
+	{
+	  emit_move_insn (reg10, allocate_rtx);
+	  emit_move_insn (reg11, GEN_INT (args_size));
+	  use_reg (&call_fusage, reg11);
+	}
+
+      use_reg (&call_fusage, reg10);
+    }
+  else
+    {
+      emit_insn (gen_push (GEN_INT (args_size)));
+      emit_insn (gen_push (allocate_rtx));
+    }
+  call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
+				GEN_INT (UNITS_PER_WORD), constm1_rtx,
+				NULL_RTX, 0);
+  add_function_usage_to (call_insn, call_fusage);
+
+  /* In order to make call/return prediction work right, we now need
+     to execute a return instruction.  See
+     libgcc/config/i386/morestack.S for the details on how this works.
+
+     For flow purposes gcc must not see this as a return
+     instruction--we need control flow to continue at the subsequent
+     label.  Therefore, we use an unspec.  */
+  gcc_assert (crtl->args.pops_args < 65536);
+  emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
+
+  /* If we are in 64-bit mode and this function uses a static chain,
+     we saved %r10 in %rax before calling _morestack.  */
+  if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
+    emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
+		    gen_rtx_REG (Pmode, AX_REG));
+
+  /* If this function calls va_start, we need to store a pointer to
+     the arguments on the old stack, because they may not have been
+     all copied to the new stack.  At this point the old stack can be
+     found at the frame pointer value used by __morestack, because
+     __morestack has set that up before calling back to us.  Here we
+     store that pointer in a scratch register, and in
+     ix86_expand_prologue we store the scratch register in a stack
+     slot.  */
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      unsigned int scratch_regno;
+      rtx frame_reg;
+      int words;
+
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+      frame_reg = gen_rtx_REG (Pmode, BP_REG);
+
+      /* 64-bit:
+	 fp -> old fp value
+	       return address within this function
+	       return address of caller of this function
+	       stack arguments
+	 So we add three words to get to the stack arguments.
+
+	 32-bit:
+	 fp -> old fp value
+	       return address within this function
+               first argument to __morestack
+               second argument to __morestack
+               return address of caller of this function
+               stack arguments
+         So we add five words to get to the stack arguments.
+      */
+      words = TARGET_64BIT ? 3 : 5;
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
+			      gen_rtx_PLUS (Pmode, frame_reg,
+					    GEN_INT (words * UNITS_PER_WORD))));
+
+      varargs_label = gen_label_rtx ();
+      emit_jump_insn (gen_jump (varargs_label));
+      JUMP_LABEL (get_last_insn ()) = varargs_label;
+
+      emit_barrier ();
+    }
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  /* If this function calls va_start, we now have to set the scratch
+     register for the case where we do not call __morestack.  In this
+     case we need to set it based on the stack pointer.  */
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    GEN_INT (UNITS_PER_WORD))));
+
+      emit_label (varargs_label);
+      LABEL_NUSES (varargs_label) = 1;
+    }
+}
+
+/* We may have to tell the dataflow pass that the split stack prologue
+   is initializing a scratch register.  */
+
+static void
+ix86_live_on_entry (bitmap regs)
+{
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      gcc_assert (flag_split_stack);
+      bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
+    }
+}
+
+/* Extract the parts of an RTL expression that is a valid memory address
+   for an instruction.  Return 0 if the structure of the address is
+   grossly off.  Return -1 if the address contains ASHIFT, so it is not
+   strictly valid, but still used for computing length of lea instruction.  */
+
+int
+ix86_decompose_address (rtx addr, struct ix86_address *out)
+{
+  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+  rtx base_reg, index_reg;
+  HOST_WIDE_INT scale = 1;
+  rtx scale_rtx = NULL_RTX;
+  rtx tmp;
+  int retval = 1;
+  enum ix86_address_seg seg = SEG_DEFAULT;
+
+  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+    base = addr;
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rtx addends[4], op;
+      int n = 0, i;
+
+      op = addr;
+      do
+	{
+	  if (n >= 4)
+	    return 0;
+	  addends[n++] = XEXP (op, 1);
+	  op = XEXP (op, 0);
+	}
+      while (GET_CODE (op) == PLUS);
+      if (n >= 4)
+	return 0;
+      addends[n] = op;
+
+      for (i = n; i >= 0; --i)
+	{
+	  op = addends[i];
+	  switch (GET_CODE (op))
+	    {
+	    case MULT:
+	      if (index)
+		return 0;
+	      index = XEXP (op, 0);
+	      scale_rtx = XEXP (op, 1);
+	      break;
+
+	    case ASHIFT:
+	      if (index)
+		return 0;
+	      index = XEXP (op, 0);
+	      tmp = XEXP (op, 1);
+	      if (!CONST_INT_P (tmp))
+		return 0;
+	      scale = INTVAL (tmp);
+	      if ((unsigned HOST_WIDE_INT) scale > 3)
+		return 0;
+	      scale = 1 << scale;
+	      break;
+
+	    case UNSPEC:
+	      if (XINT (op, 1) == UNSPEC_TP
+	          && TARGET_TLS_DIRECT_SEG_REFS
+	          && seg == SEG_DEFAULT)
+		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
+	      else
+		return 0;
+	      break;
+
+	    case REG:
+	    case SUBREG:
+	      if (!base)
+		base = op;
+	      else if (!index)
+		index = op;
+	      else
+		return 0;
+	      break;
+
+	    case CONST:
+	    case CONST_INT:
+	    case SYMBOL_REF:
+	    case LABEL_REF:
+	      if (disp)
+		return 0;
+	      disp = op;
+	      break;
+
+	    default:
+	      return 0;
+	    }
+	}
+    }
+  else if (GET_CODE (addr) == MULT)
+    {
+      index = XEXP (addr, 0);		/* index*scale */
+      scale_rtx = XEXP (addr, 1);
+    }
+  else if (GET_CODE (addr) == ASHIFT)
+    {
+      /* We're called for lea too, which implements ashift on occasion.  */
+      index = XEXP (addr, 0);
+      tmp = XEXP (addr, 1);
+      if (!CONST_INT_P (tmp))
+	return 0;
+      scale = INTVAL (tmp);
+      if ((unsigned HOST_WIDE_INT) scale > 3)
+	return 0;
+      scale = 1 << scale;
+      retval = -1;
+    }
+  else
+    disp = addr;			/* displacement */
+
+  /* Extract the integral value of scale.  */
+  if (scale_rtx)
+    {
+      if (!CONST_INT_P (scale_rtx))
+	return 0;
+      scale = INTVAL (scale_rtx);
+    }
+
+  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
+  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
+
+  /* Avoid useless 0 displacement.  */
+  if (disp == const0_rtx && (base || index))
+    disp = NULL_RTX;
+
+  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
+  if (base_reg && index_reg && scale == 1
+      && (index_reg == arg_pointer_rtx
+	  || index_reg == frame_pointer_rtx
+	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
+    {
+      rtx tmp;
+      tmp = base, base = index, index = tmp;
+      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
+    }
+
+  /* Special case: %ebp cannot be encoded as a base without a displacement.
+     Similarly %r13.  */
+  if (!disp
+      && base_reg
+      && (base_reg == hard_frame_pointer_rtx
+	  || base_reg == frame_pointer_rtx
+	  || base_reg == arg_pointer_rtx
+	  || (REG_P (base_reg)
+	      && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
+		  || REGNO (base_reg) == R13_REG))))
+    disp = const0_rtx;
+
+  /* Special case: on K6, [%esi] makes the instruction vector decoded.
+     Avoid this by transforming to [%esi+0].
+     Reload calls address legitimization without cfun defined, so we need
+     to test cfun for being non-NULL. */
+  if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
+      && base_reg && !index_reg && !disp
+      && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
+    disp = const0_rtx;
+
+  /* Special case: encode reg+reg instead of reg*2.  */
+  if (!base && index && scale == 2)
+    base = index, base_reg = index_reg, scale = 1;
+
+  /* Special case: scaling cannot be encoded without base or displacement.  */
+  if (!base && !disp && index && scale != 1)
+    disp = const0_rtx;
+
+  out->base = base;
+  out->index = index;
+  out->disp = disp;
+  out->scale = scale;
+  out->seg = seg;
+
+  return retval;
+}
+
+/* Return cost of the memory address x.
+   For i386, it is better to use a complex address than let gcc copy
+   the address into a reg and make a new pseudo.  But not if the address
+   requires to two regs - that would mean more pseudos with longer
+   lifetimes.  */
+static int
+ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+  struct ix86_address parts;
+  int cost = 1;
+  int ok = ix86_decompose_address (x, &parts);
+
+  gcc_assert (ok);
+
+  if (parts.base && GET_CODE (parts.base) == SUBREG)
+    parts.base = SUBREG_REG (parts.base);
+  if (parts.index && GET_CODE (parts.index) == SUBREG)
+    parts.index = SUBREG_REG (parts.index);
+
+  /* Attempt to minimize number of registers in the address.  */
+  if ((parts.base
+       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
+      || (parts.index
+	  && (!REG_P (parts.index)
+	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
+    cost++;
+
+  if (parts.base
+      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
+      && parts.index
+      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
+      && parts.base != parts.index)
+    cost++;
+
+  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
+     since it's predecode logic can't detect the length of instructions
+     and it degenerates to vector decoded.  Increase cost of such
+     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
+     to split such addresses or even refuse such addresses at all.
+
+     Following addressing modes are affected:
+      [base+scale*index]
+      [scale*index+disp]
+      [base+index]
+
+     The first and last case  may be avoidable by explicitly coding the zero in
+     memory address, but I don't have AMD-K6 machine handy to check this
+     theory.  */
+
+  if (TARGET_K6
+      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
+	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
+	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
+    cost += 10;
+
+  return cost;
+}
+
+/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
+   this is used for to form addresses to local data when -fPIC is in
+   use.  */
+
+static bool
+darwin_local_data_pic (rtx disp)
+{
+  return (GET_CODE (disp) == UNSPEC
+	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
+}
+
+/* Determine if a given RTX is a valid constant.  We already know this
+   satisfies CONSTANT_P.  */
+
+bool
+legitimate_constant_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (!CONST_INT_P (XEXP (x, 1)))
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      if (TARGET_MACHO && darwin_local_data_pic (x))
+	return true;
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (x) == UNSPEC)
+	switch (XINT (x, 1))
+	  {
+	  case UNSPEC_GOT:
+	  case UNSPEC_GOTOFF:
+	  case UNSPEC_PLTOFF:
+	    return TARGET_64BIT;
+	  case UNSPEC_TPOFF:
+	  case UNSPEC_NTPOFF:
+	    x = XVECEXP (x, 0, 0);
+	    return (GET_CODE (x) == SYMBOL_REF
+		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_DTPOFF:
+	    x = XVECEXP (x, 0, 0);
+	    return (GET_CODE (x) == SYMBOL_REF
+		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+	  default:
+	    return false;
+	  }
+
+      /* We must have drilled down to a symbol.  */
+      if (GET_CODE (x) == LABEL_REF)
+	return true;
+      if (GET_CODE (x) != SYMBOL_REF)
+	return false;
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* TLS symbols are never valid.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return false;
+
+      /* DLLIMPORT symbols are never valid.  */
+      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+	  && SYMBOL_REF_DLLIMPORT_P (x))
+	return false;
+
+#if TARGET_MACHO
+      /* mdynamic-no-pic */
+      if (MACHO_DYNAMIC_NO_PIC_P)
+	return machopic_symbol_defined_p (x);
+#endif
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == TImode
+	  && x != CONST0_RTX (TImode)
+          && !TARGET_64BIT)
+	return false;
+      break;
+
+    case CONST_VECTOR:
+      if (!standard_sse_constant_p (x))
+	return false;
+
+    default:
+      break;
+    }
+
+  /* Otherwise we handle everything else in the move patterns.  */
+  return true;
+}
+
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible for the address of thread-local symbols, which
+   is checked above.  */
+
+static bool
+ix86_cannot_force_const_mem (rtx x)
+{
+  /* We can always put integral constants and vectors in memory.  */
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+      return false;
+
+    default:
+      break;
+    }
+  return !legitimate_constant_p (x);
+}
+
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+  rtx inner;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      inner = XEXP (x, 0);
+      if (GET_CODE (inner) == PLUS
+	  && CONST_INT_P (XEXP (inner, 1)))
+	inner = XEXP (inner, 0);
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (inner) == UNSPEC)
+	switch (XINT (inner, 1))
+	  {
+	  case UNSPEC_GOT:
+	  case UNSPEC_GOTOFF:
+	  case UNSPEC_PLTOFF:
+	    return TARGET_64BIT;
+	  case UNSPEC_TPOFF:
+	    x = XVECEXP (inner, 0, 0);
+	    return (GET_CODE (x) == SYMBOL_REF
+		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_MACHOPIC_OFFSET:
+	    return legitimate_pic_address_disp_p (x);
+	  default:
+	    return false;
+	  }
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return legitimate_pic_address_disp_p (x);
+
+    default:
+      return true;
+    }
+}
+
+/* Determine if a given CONST RTX is a valid memory displacement
+   in PIC mode.  */
+
+bool
+legitimate_pic_address_disp_p (rtx disp)
+{
+  bool saw_plus;
+
+  /* In 64bit mode we can allow direct addresses of symbols and labels
+     when they are not dynamic symbols.  */
+  if (TARGET_64BIT)
+    {
+      rtx op0 = disp, op1;
+
+      switch (GET_CODE (disp))
+	{
+	case LABEL_REF:
+	  return true;
+
+	case CONST:
+	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
+	    break;
+	  op0 = XEXP (XEXP (disp, 0), 0);
+	  op1 = XEXP (XEXP (disp, 0), 1);
+	  if (!CONST_INT_P (op1)
+	      || INTVAL (op1) >= 16*1024*1024
+	      || INTVAL (op1) < -16*1024*1024)
+            break;
+	  if (GET_CODE (op0) == LABEL_REF)
+	    return true;
+	  if (GET_CODE (op0) != SYMBOL_REF)
+	    break;
+	  /* FALLTHRU */
+
+	case SYMBOL_REF:
+	  /* TLS references should always be enclosed in UNSPEC.  */
+	  if (SYMBOL_REF_TLS_MODEL (op0))
+	    return false;
+	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
+	      && ix86_cmodel != CM_LARGE_PIC)
+	    return true;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+  if (GET_CODE (disp) != CONST)
+    return false;
+  disp = XEXP (disp, 0);
+
+  if (TARGET_64BIT)
+    {
+      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
+         of GOT tables.  We should not need these anyway.  */
+      if (GET_CODE (disp) != UNSPEC
+	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
+	      && XINT (disp, 1) != UNSPEC_GOTOFF
+	      && XINT (disp, 1) != UNSPEC_PCREL
+	      && XINT (disp, 1) != UNSPEC_PLTOFF))
+	return false;
+
+      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+	return false;
+      return true;
+    }
+
+  saw_plus = false;
+  if (GET_CODE (disp) == PLUS)
+    {
+      if (!CONST_INT_P (XEXP (disp, 1)))
+	return false;
+      disp = XEXP (disp, 0);
+      saw_plus = true;
+    }
+
+  if (TARGET_MACHO && darwin_local_data_pic (disp))
+    return true;
+
+  if (GET_CODE (disp) != UNSPEC)
+    return false;
+
+  switch (XINT (disp, 1))
+    {
+    case UNSPEC_GOT:
+      if (saw_plus)
+	return false;
+      /* We need to check for both symbols and labels because VxWorks loads
+	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
+	 details.  */
+      return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
+    case UNSPEC_GOTOFF:
+      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
+	 While ABI specify also 32bit relocation but we don't produce it in
+	 small PIC model at all.  */
+      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
+	  && !TARGET_64BIT)
+        return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
+      return false;
+    case UNSPEC_GOTTPOFF:
+    case UNSPEC_GOTNTPOFF:
+    case UNSPEC_INDNTPOFF:
+      if (saw_plus)
+	return false;
+      disp = XVECEXP (disp, 0, 0);
+      return (GET_CODE (disp) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
+    case UNSPEC_NTPOFF:
+      disp = XVECEXP (disp, 0, 0);
+      return (GET_CODE (disp) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
+    case UNSPEC_DTPOFF:
+      disp = XVECEXP (disp, 0, 0);
+      return (GET_CODE (disp) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+    }
+
+  return false;
+}
+
+/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
+   replace the input X, or the original X if no replacement is called for.
+   The output parameter *WIN is 1 if the calling macro should goto WIN,
+   0 if it should not.  */
+
+bool
+ix86_legitimize_reload_address (rtx x,
+				enum machine_mode mode ATTRIBUTE_UNUSED,
+				int opnum, int type,
+				int ind_levels ATTRIBUTE_UNUSED)
+{
+  /* Reload can generate:
+
+     (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
+		       (reg:DI 97))
+	      (reg:DI 2 cx))
+
+     This RTX is rejected from ix86_legitimate_address_p due to
+     non-strictness of base register 97.  Following this rejection, 
+     reload pushes all three components into separate registers,
+     creating invalid memory address RTX.
+
+     Following code reloads only the invalid part of the
+     memory address RTX.  */
+
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 1))
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 1)))
+    {
+      rtx base, index;
+      bool something_reloaded = false;
+
+      base = XEXP (XEXP (x, 0), 1);      
+      if (!REG_OK_FOR_BASE_STRICT_P (base))
+	{
+	  push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
+		       BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		       opnum, (enum reload_type)type);
+	  something_reloaded = true;
+	}
+
+      index = XEXP (x, 1);
+      if (!REG_OK_FOR_INDEX_STRICT_P (index))
+	{
+	  push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
+		       INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		       opnum, (enum reload_type)type);
+	  something_reloaded = true;
+	}
+
+      gcc_assert (something_reloaded);
+      return true;
+    }
+
+  return false;
+}
+
+/* Recognizes RTL expressions that are valid memory addresses for an
+   instruction.  The MODE argument is the machine mode for the MEM
+   expression that wants to use this address.
+
+   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
+   convert common non-canonical forms to canonical form so that they will
+   be recognized.  */
+
+static bool
+ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+		           rtx addr, bool strict)
+{
+  struct ix86_address parts;
+  rtx base, index, disp;
+  HOST_WIDE_INT scale;
+
+  if (ix86_decompose_address (addr, &parts) <= 0)
+    /* Decomposition failed.  */
+    return false;
+
+  base = parts.base;
+  index = parts.index;
+  disp = parts.disp;
+  scale = parts.scale;
+
+  /* Validate base register.
+
+     Don't allow SUBREG's that span more than a word here.  It can lead to spill
+     failures when the base is one word out of a two word structure, which is
+     represented internally as a DImode int.  */
+
+  if (base)
+    {
+      rtx reg;
+
+      if (REG_P (base))
+  	reg = base;
+      else if (GET_CODE (base) == SUBREG
+	       && REG_P (SUBREG_REG (base))
+	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
+		  <= UNITS_PER_WORD)
+  	reg = SUBREG_REG (base);
+      else
+	/* Base is not a register.  */
+	return false;
+
+      if (GET_MODE (base) != Pmode)
+	/* Base is not in Pmode.  */
+	return false;
+
+      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
+	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
+	/* Base is not valid.  */
+	return false;
+    }
+
+  /* Validate index register.
+
+     Don't allow SUBREG's that span more than a word here -- same as above.  */
+
+  if (index)
+    {
+      rtx reg;
+
+      if (REG_P (index))
+  	reg = index;
+      else if (GET_CODE (index) == SUBREG
+	       && REG_P (SUBREG_REG (index))
+	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
+		  <= UNITS_PER_WORD)
+  	reg = SUBREG_REG (index);
+      else
+	/* Index is not a register.  */
+	return false;
+
+      if (GET_MODE (index) != Pmode)
+	/* Index is not in Pmode.  */
+	return false;
+
+      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
+	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
+	/* Index is not valid.  */
+	return false;
+    }
+
+  /* Validate scale factor.  */
+  if (scale != 1)
+    {
+      if (!index)
+	/* Scale without index.  */
+	return false;
+
+      if (scale != 2 && scale != 4 && scale != 8)
+	/* Scale is not a valid multiplier.  */
+	return false;
+    }
+
+  /* Validate displacement.  */
+  if (disp)
+    {
+      if (GET_CODE (disp) == CONST
+	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
+	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
+	switch (XINT (XEXP (disp, 0), 1))
+	  {
+	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
+	     used.  While ABI specify also 32bit relocations, we don't produce
+	     them at all and use IP relative instead.  */
+	  case UNSPEC_GOT:
+	  case UNSPEC_GOTOFF:
+	    gcc_assert (flag_pic);
+	    if (!TARGET_64BIT)
+	      goto is_legitimate_pic;
+
+	    /* 64bit address unspec.  */
+	    return false;
+
+	  case UNSPEC_GOTPCREL:
+	  case UNSPEC_PCREL:
+	    gcc_assert (flag_pic);
+	    goto is_legitimate_pic;
+
+	  case UNSPEC_GOTTPOFF:
+	  case UNSPEC_GOTNTPOFF:
+	  case UNSPEC_INDNTPOFF:
+	  case UNSPEC_NTPOFF:
+	  case UNSPEC_DTPOFF:
+	    break;
+
+	  case UNSPEC_STACK_CHECK:
+	    gcc_assert (flag_split_stack);
+	    break;
+
+	  default:
+	    /* Invalid address unspec.  */
+	    return false;
+	  }
+
+      else if (SYMBOLIC_CONST (disp)
+	       && (flag_pic
+		   || (TARGET_MACHO
+#if TARGET_MACHO
+		       && MACHOPIC_INDIRECT
+		       && !machopic_operand_p (disp)
+#endif
+	       )))
+	{
+
+	is_legitimate_pic:
+	  if (TARGET_64BIT && (index || base))
+	    {
+	      /* foo@dtpoff(%rX) is ok.  */
+	      if (GET_CODE (disp) != CONST
+		  || GET_CODE (XEXP (disp, 0)) != PLUS
+		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
+		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
+		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
+		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+		/* Non-constant pic memory reference.  */
+		return false;
+	    }
+	  else if ((!TARGET_MACHO || flag_pic)
+		    && ! legitimate_pic_address_disp_p (disp))
+	    /* Displacement is an invalid pic construct.  */
+	    return false;
+#if TARGET_MACHO
+	  else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
+	    /* displacment must be referenced via non_lazy_pointer */
+	    return false;
+#endif
+
+          /* This code used to verify that a symbolic pic displacement
+	     includes the pic_offset_table_rtx register.
+
+	     While this is good idea, unfortunately these constructs may
+	     be created by "adds using lea" optimization for incorrect
+	     code like:
+
+	     int a;
+	     int foo(int i)
+	       {
+	         return *(&a+i);
+	       }
+
+	     This code is nonsensical, but results in addressing
+	     GOT table with pic_offset_table_rtx base.  We can't
+	     just refuse it easily, since it gets matched by
+	     "addsi3" pattern, that later gets split to lea in the
+	     case output register differs from input.  While this
+	     can be handled by separate addsi pattern for this case
+	     that never results in lea, this seems to be easier and
+	     correct fix for crash to disable this test.  */
+	}
+      else if (GET_CODE (disp) != LABEL_REF
+	       && !CONST_INT_P (disp)
+	       && (GET_CODE (disp) != CONST
+		   || !legitimate_constant_p (disp))
+	       && (GET_CODE (disp) != SYMBOL_REF
+		   || !legitimate_constant_p (disp)))
+	/* Displacement is not constant.  */
+	return false;
+      else if (TARGET_64BIT
+	       && !x86_64_immediate_operand (disp, VOIDmode))
+	/* Displacement is out of range.  */
+	return false;
+    }
+
+  /* Everything looks valid.  */
+  return true;
+}
+
+/* Determine if a given RTX is a valid constant address.  */
+
+bool
+constant_address_p (rtx x)
+{
+  return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
+}
+
+/* Return a unique alias set for the GOT.  */
+
+static alias_set_type
+ix86_GOT_alias_set (void)
+{
+  static alias_set_type set = -1;
+  if (set == -1)
+    set = new_alias_set ();
+  return set;
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+   register REG.  If REG is 0, a new pseudo is generated.
+
+   There are two types of references that must be handled:
+
+   1. Global data references must load the address from the GOT, via
+      the PIC reg.  An insn is emitted to do this load, and the reg is
+      returned.
+
+   2. Static data references, constant pool addresses, and code labels
+      compute the address as an offset from the GOT, whose base is in
+      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
+      differentiate them from global data objects.  The returned
+      address is the PIC reg + an unspec constant.
+
+   TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
+   reg also appears in the address.  */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+  rtx addr = orig;
+  rtx new_rtx = orig;
+  rtx base;
+
+#if TARGET_MACHO
+  if (TARGET_MACHO && !TARGET_64BIT)
+    {
+      if (reg == 0)
+	reg = gen_reg_rtx (Pmode);
+      /* Use the generic Mach-O PIC machinery.  */
+      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
+    }
+#endif
+
+  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
+    new_rtx = addr;
+  else if (TARGET_64BIT
+	   && ix86_cmodel != CM_SMALL_PIC
+	   && gotoff_operand (addr, Pmode))
+    {
+      rtx tmpreg;
+      /* This symbol may be referenced via a displacement from the PIC
+	 base address (@GOTOFF).  */
+
+      if (reload_in_progress)
+	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      if (GET_CODE (addr) == CONST)
+	addr = XEXP (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	  {
+            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
+				      UNSPEC_GOTOFF);
+	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
+	  }
+	else
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+      if (!reg)
+        tmpreg = gen_reg_rtx (Pmode);
+      else
+	tmpreg = reg;
+      emit_move_insn (tmpreg, new_rtx);
+
+      if (reg != 0)
+	{
+	  new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
+					 tmpreg, 1, OPTAB_DIRECT);
+	  new_rtx = reg;
+	}
+      else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
+    }
+  else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
+    {
+      /* This symbol may be referenced via a displacement from the PIC
+	 base address (@GOTOFF).  */
+
+      if (reload_in_progress)
+	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      if (GET_CODE (addr) == CONST)
+	addr = XEXP (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	  {
+            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
+				      UNSPEC_GOTOFF);
+	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
+	  }
+	else
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+
+      if (reg != 0)
+	{
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+    }
+  else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
+	   /* We can't use @GOTOFF for text labels on VxWorks;
+	      see gotoff_operand.  */
+	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
+    {
+      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+        {
+          if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
+            return legitimize_dllimport_symbol (addr, true);
+          if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
+              && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
+              && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
+            {
+              rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
+              return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
+            }
+        }
+
+      /* For x64 PE-COFF there is no GOT table.  So we use address
+         directly.  */
+      if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+      {
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+
+	  if (reg == 0)
+	    reg = gen_reg_rtx (Pmode);
+  	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+      }
+      else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
+	{
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  new_rtx = gen_const_mem (Pmode, new_rtx);
+	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
+
+	  if (reg == 0)
+	    reg = gen_reg_rtx (Pmode);
+	  /* Use directly gen_movsi, otherwise the address is loaded
+	     into register for CSE.  We don't want to CSE this addresses,
+	     instead we CSE addresses from the GOT table, so skip this.  */
+	  emit_insn (gen_movsi (reg, new_rtx));
+	  new_rtx = reg;
+	}
+      else
+	{
+	  /* This symbol must be referenced via a load from the
+	     Global Offset Table (@GOT).  */
+
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  if (TARGET_64BIT)
+	    new_rtx = force_reg (Pmode, new_rtx);
+	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+	  new_rtx = gen_const_mem (Pmode, new_rtx);
+	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
+
+	  if (reg == 0)
+	    reg = gen_reg_rtx (Pmode);
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+    }
+  else
+    {
+      if (CONST_INT_P (addr)
+	  && !x86_64_immediate_operand (addr, VOIDmode))
+	{
+	  if (reg)
+	    {
+	      emit_move_insn (reg, addr);
+	      new_rtx = reg;
+	    }
+	  else
+	    new_rtx = force_reg (Pmode, addr);
+	}
+      else if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+
+	  /* We must match stuff we generate before.  Assume the only
+	     unspecs that can get here are ours.  Not that we could do
+	     anything with them anyway....  */
+	  if (GET_CODE (addr) == UNSPEC
+	      || (GET_CODE (addr) == PLUS
+		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
+	    return orig;
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	}
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+	  /* Check first to see if this is a constant offset from a @GOTOFF
+	     symbol reference.  */
+	  if (gotoff_operand (op0, Pmode)
+	      && CONST_INT_P (op1))
+	    {
+	      if (!TARGET_64BIT)
+		{
+		  if (reload_in_progress)
+		    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
+					    UNSPEC_GOTOFF);
+		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
+		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+		  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+
+		  if (reg != 0)
+		    {
+		      emit_move_insn (reg, new_rtx);
+		      new_rtx = reg;
+		    }
+		}
+	      else
+		{
+		  if (INTVAL (op1) < -16*1024*1024
+		      || INTVAL (op1) >= 16*1024*1024)
+		    {
+		      if (!x86_64_immediate_operand (op1, Pmode))
+			op1 = force_reg (Pmode, op1);
+		      new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
+		    }
+		}
+	    }
+	  else
+	    {
+	      base = legitimize_pic_address (XEXP (addr, 0), reg);
+	      new_rtx  = legitimize_pic_address (XEXP (addr, 1),
+						 base == reg ? NULL_RTX : reg);
+
+	      if (CONST_INT_P (new_rtx))
+		new_rtx = plus_constant (base, INTVAL (new_rtx));
+	      else
+		{
+		  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
+		    {
+		      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+		      new_rtx = XEXP (new_rtx, 1);
+		    }
+		  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
+		}
+	    }
+	}
+    }
+  return new_rtx;
+}
+
+/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
+
+static rtx
+get_thread_pointer (int to_reg)
+{
+  rtx tp, reg, insn;
+
+  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  if (!to_reg)
+    return tp;
+
+  reg = gen_reg_rtx (Pmode);
+  insn = gen_rtx_SET (VOIDmode, reg, tp);
+  insn = emit_insn (insn);
+
+  return reg;
+}
+
+/* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
+   false if we expect this to be used for a memory address and true if
+   we expect to load the address into a register.  */
+
+static rtx
+legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
+{
+  rtx dest, base, off, pic, tp;
+  int type;
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      dest = gen_reg_rtx (Pmode);
+      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+	{
+	  rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
+
+	  start_sequence ();
+	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
+	  insns = get_insns ();
+	  end_sequence ();
+
+	  RTL_CONST_CALL_P (insns) = 1;
+	  emit_libcall_block (insns, dest, rax, x);
+	}
+      else if (TARGET_64BIT && TARGET_GNU2_TLS)
+	emit_insn (gen_tls_global_dynamic_64 (dest, x));
+      else
+	emit_insn (gen_tls_global_dynamic_32 (dest, x));
+
+      if (TARGET_GNU2_TLS)
+	{
+	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
+	}
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      base = gen_reg_rtx (Pmode);
+      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+	{
+	  rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
+
+	  start_sequence ();
+	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
+	  insns = get_insns ();
+	  end_sequence ();
+
+	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
+	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
+	  RTL_CONST_CALL_P (insns) = 1;
+	  emit_libcall_block (insns, base, rax, note);
+	}
+      else if (TARGET_64BIT && TARGET_GNU2_TLS)
+	emit_insn (gen_tls_local_dynamic_base_64 (base));
+      else
+	emit_insn (gen_tls_local_dynamic_base_32 (base));
+
+      if (TARGET_GNU2_TLS)
+	{
+	  rtx x = ix86_tls_module_base ();
+
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
+			       gen_rtx_MINUS (Pmode, x, tp));
+	}
+
+      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
+      off = gen_rtx_CONST (Pmode, off);
+
+      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+      if (TARGET_GNU2_TLS)
+	{
+	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
+	}
+
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_SUN_TLS)
+	    {
+	      /* The Sun linker took the AMD64 TLS spec literally
+		 and can only handle %rax as destination of the
+		 initial executable code sequence.  */
+
+	      dest = gen_reg_rtx (Pmode);
+	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
+	      return dest;
+	    }
+
+	  pic = NULL;
+	  type = UNSPEC_GOTNTPOFF;
+	}
+      else if (flag_pic)
+	{
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  pic = pic_offset_table_rtx;
+	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+	}
+      else if (!TARGET_ANY_GNU_TLS)
+	{
+	  pic = gen_reg_rtx (Pmode);
+	  emit_insn (gen_set_got (pic));
+	  type = UNSPEC_GOTTPOFF;
+	}
+      else
+	{
+	  pic = NULL;
+	  type = UNSPEC_INDNTPOFF;
+	}
+
+      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
+      off = gen_rtx_CONST (Pmode, off);
+      if (pic)
+	off = gen_rtx_PLUS (Pmode, pic, off);
+      off = gen_const_mem (Pmode, off);
+      set_mem_alias_set (off, ix86_GOT_alias_set ());
+
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+	{
+          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+	  off = force_reg (Pmode, off);
+	  return gen_rtx_PLUS (Pmode, base, off);
+	}
+      else
+	{
+	  base = get_thread_pointer (true);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (gen_subsi3 (dest, base, off));
+	}
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
+      off = gen_rtx_CONST (Pmode, off);
+
+      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+	{
+	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+	  return gen_rtx_PLUS (Pmode, base, off);
+	}
+      else
+	{
+	  base = get_thread_pointer (true);
+	  dest = gen_reg_rtx (Pmode);
+	  emit_insn (gen_subsi3 (dest, base, off));
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+/* Create or return the unique __imp_DECL dllimport symbol corresponding
+   to symbol DECL.  */
+
+static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
+  htab_t dllimport_map;
+
+static tree
+get_dllimport_decl (tree decl)
+{
+  struct tree_map *h, in;
+  void **loc;
+  const char *name;
+  const char *prefix;
+  size_t namelen, prefixlen;
+  char *imp_name;
+  tree to;
+  rtx rtl;
+
+  if (!dllimport_map)
+    dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
+
+  in.hash = htab_hash_pointer (decl);
+  in.base.from = decl;
+  loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
+  h = (struct tree_map *) *loc;
+  if (h)
+    return h->to;
+
+  *loc = h = ggc_alloc_tree_map ();
+  h->hash = in.hash;
+  h->base.from = decl;
+  h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
+			   VAR_DECL, NULL, ptr_type_node);
+  DECL_ARTIFICIAL (to) = 1;
+  DECL_IGNORED_P (to) = 1;
+  DECL_EXTERNAL (to) = 1;
+  TREE_READONLY (to) = 1;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = targetm.strip_name_encoding (name);
+  prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
+    ? "*__imp_" : "*__imp__";
+  namelen = strlen (name);
+  prefixlen = strlen (prefix);
+  imp_name = (char *) alloca (namelen + prefixlen + 1);
+  memcpy (imp_name, prefix, prefixlen);
+  memcpy (imp_name + prefixlen, name, namelen + 1);
+
+  name = ggc_alloc_string (imp_name, namelen + prefixlen);
+  rtl = gen_rtx_SYMBOL_REF (Pmode, name);
+  SET_SYMBOL_REF_DECL (rtl, to);
+  SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
+
+  rtl = gen_const_mem (Pmode, rtl);
+  set_mem_alias_set (rtl, ix86_GOT_alias_set ());
+
+  SET_DECL_RTL (to, rtl);
+  SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
+
+  return to;
+}
+
+/* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
+   true if we require the result be a register.  */
+
+static rtx
+legitimize_dllimport_symbol (rtx symbol, bool want_reg)
+{
+  tree imp_decl;
+  rtx x;
+
+  gcc_assert (SYMBOL_REF_DECL (symbol));
+  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
+
+  x = DECL_RTL (imp_decl);
+  if (want_reg)
+    x = force_reg (Pmode, x);
+  return x;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   It is always safe for this macro to do nothing.  It exists to recognize
+   opportunities to optimize the output.
+
+   For the 80386, we handle X+REG by loading X into a register R and
+   using R+REG.  R will go in a general reg and indexing will be used.
+   However, if REG is a broken-out memory address or multiplication,
+   nothing needs to be done because REG can certainly go in a general reg.
+
+   When -fpic is used, special handling is needed for symbolic references.
+   See comments by legitimize_pic_address in i386.c for details.  */
+
+static rtx
+ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  int changed = 0;
+  unsigned log;
+
+  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
+  if (log)
+    return legitimize_tls_address (x, (enum tls_model) log, false);
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
+    {
+      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
+				      (enum tls_model) log, false);
+      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+    }
+
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+    {
+      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
+	return legitimize_dllimport_symbol (x, true);
+      if (GET_CODE (x) == CONST
+	  && GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
+	{
+	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
+	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+	}
+    }
+
+  if (flag_pic && SYMBOLIC_CONST (x))
+    return legitimize_pic_address (x, 0);
+
+#if TARGET_MACHO
+  if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
+    return machopic_indirect_data_reference (x, 0);
+#endif
+
+  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
+  if (GET_CODE (x) == ASHIFT
+      && CONST_INT_P (XEXP (x, 1))
+      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
+    {
+      changed = 1;
+      log = INTVAL (XEXP (x, 1));
+      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+			GEN_INT (1 << log));
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
+
+      if (GET_CODE (XEXP (x, 0)) == ASHIFT
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
+	{
+	  changed = 1;
+	  log = INTVAL (XEXP (XEXP (x, 0), 1));
+	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
+				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
+				      GEN_INT (1 << log));
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == ASHIFT
+	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
+	{
+	  changed = 1;
+	  log = INTVAL (XEXP (XEXP (x, 1), 1));
+	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
+				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
+				      GEN_INT (1 << log));
+	}
+
+      /* Put multiply first if it isn't already.  */
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  rtx tmp = XEXP (x, 0);
+	  XEXP (x, 0) = XEXP (x, 1);
+	  XEXP (x, 1) = tmp;
+	  changed = 1;
+	}
+
+      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
+	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
+	 created by virtual register instantiation, register elimination, and
+	 similar optimizations.  */
+      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
+	{
+	  changed = 1;
+	  x = gen_rtx_PLUS (Pmode,
+			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
+					  XEXP (XEXP (x, 1), 0)),
+			    XEXP (XEXP (x, 1), 1));
+	}
+
+      /* Canonicalize
+	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
+	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
+      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
+	       && CONSTANT_P (XEXP (x, 1)))
+	{
+	  rtx constant;
+	  rtx other = NULL_RTX;
+
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      constant = XEXP (x, 1);
+	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
+	    }
+	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
+	    {
+	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
+	      other = XEXP (x, 1);
+	    }
+	  else
+	    constant = 0;
+
+	  if (constant)
+	    {
+	      changed = 1;
+	      x = gen_rtx_PLUS (Pmode,
+				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
+					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
+				plus_constant (other, INTVAL (constant)));
+	    }
+	}
+
+      if (changed && ix86_legitimate_address_p (mode, x, false))
+	return x;
+
+      if (GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  changed = 1;
+	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+	}
+
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  changed = 1;
+	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+	}
+
+      if (changed
+	  && REG_P (XEXP (x, 1))
+	  && REG_P (XEXP (x, 0)))
+	return x;
+
+      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
+	{
+	  changed = 1;
+	  x = legitimize_pic_address (x, 0);
+	}
+
+      if (changed && ix86_legitimate_address_p (mode, x, false))
+	return x;
+
+      if (REG_P (XEXP (x, 0)))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 1), temp);
+	  if (val != temp)
+	    emit_move_insn (temp, val);
+
+	  XEXP (x, 1) = temp;
+	  return x;
+	}
+
+      else if (REG_P (XEXP (x, 1)))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 0), temp);
+	  if (val != temp)
+	    emit_move_insn (temp, val);
+
+	  XEXP (x, 0) = temp;
+	  return x;
+	}
+    }
+
+  return x;
+}
+
+/* Print an integer constant expression in assembler syntax.  Addition
+   and subtraction are the only arithmetic that may appear in these
+   expressions.  FILE is the stdio stream to write to, X is the rtx, and
+   CODE is the operand print code from the output string.  */
+
+static void
+output_pic_addr_const (FILE *file, rtx x, int code)
+{
+  char buf[256];
+
+  switch (GET_CODE (x))
+    {
+    case PC:
+      gcc_assert (flag_pic);
+      putc ('.', file);
+      break;
+
+    case SYMBOL_REF:
+      if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
+	output_addr_const (file, x);
+      else
+	{
+	  const char *name = XSTR (x, 0);
+
+	  /* Mark the decl as referenced so that cgraph will
+	     output the function.  */
+	  if (SYMBOL_REF_DECL (x))
+	    mark_decl_referenced (SYMBOL_REF_DECL (x));
+
+#if TARGET_MACHO
+	  if (MACHOPIC_INDIRECT
+	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+	    name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+	  assemble_name (file, name);
+	}
+      if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+	fputs ("@PLT", file);
+      break;
+
+    case LABEL_REF:
+      x = XEXP (x, 0);
+      /* FALLTHRU */
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      assemble_name (asm_out_file, buf);
+      break;
+
+    case CONST_INT:
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+
+    case CONST:
+      /* This used to output parentheses around the expression,
+	 but that does not work on the 386 (either ATT or BSD assembler).  */
+      output_pic_addr_const (file, XEXP (x, 0), code);
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+	{
+	  /* We can use %d if the number is <32 bits and positive.  */
+	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
+	    fprintf (file, "0x%lx%08lx",
+		     (unsigned long) CONST_DOUBLE_HIGH (x),
+		     (unsigned long) CONST_DOUBLE_LOW (x));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   TARGET_PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case PLUS:
+      /* Some assemblers need integer constants to appear first.  */
+      if (CONST_INT_P (XEXP (x, 0)))
+	{
+	  output_pic_addr_const (file, XEXP (x, 0), code);
+	  putc ('+', file);
+	  output_pic_addr_const (file, XEXP (x, 1), code);
+	}
+      else
+	{
+	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
+	  output_pic_addr_const (file, XEXP (x, 1), code);
+	  putc ('+', file);
+	  output_pic_addr_const (file, XEXP (x, 0), code);
+	}
+      break;
+
+    case MINUS:
+      if (!TARGET_MACHO)
+	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
+      output_pic_addr_const (file, XEXP (x, 0), code);
+      putc ('-', file);
+      output_pic_addr_const (file, XEXP (x, 1), code);
+      if (!TARGET_MACHO)
+	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
+      break;
+
+     case UNSPEC:
+       if (XINT (x, 1) == UNSPEC_STACK_CHECK)
+	 {
+	   bool f = i386_asm_output_addr_const_extra (file, x);
+	   gcc_assert (f);
+	   break;
+	 }
+
+       gcc_assert (XVECLEN (x, 0) == 1);
+       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+       switch (XINT (x, 1))
+	{
+	case UNSPEC_GOT:
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLTOFF:
+	  fputs ("@PLTOFF", file);
+	  break;
+	case UNSPEC_PCREL:
+	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+		 "(%rip)" : "[rip]", file);
+	  break;
+	case UNSPEC_GOTPCREL:
+	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
+	  break;
+	case UNSPEC_GOTTPOFF:
+	  /* FIXME: This might be @TPOFF in Sun ld too.  */
+	  fputs ("@gottpoff", file);
+	  break;
+	case UNSPEC_TPOFF:
+	  fputs ("@tpoff", file);
+	  break;
+	case UNSPEC_NTPOFF:
+	  if (TARGET_64BIT)
+	    fputs ("@tpoff", file);
+	  else
+	    fputs ("@ntpoff", file);
+	  break;
+	case UNSPEC_DTPOFF:
+	  fputs ("@dtpoff", file);
+	  break;
+	case UNSPEC_GOTNTPOFF:
+	  if (TARGET_64BIT)
+	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
+	  else
+	    fputs ("@gotntpoff", file);
+	  break;
+	case UNSPEC_INDNTPOFF:
+	  fputs ("@indntpoff", file);
+	  break;
+#if TARGET_MACHO
+	case UNSPEC_MACHOPIC_OFFSET:
+	  putc ('-', file);
+	  machopic_output_function_base_name (file);
+	  break;
+#endif
+	default:
+	  output_operand_lossage ("invalid UNSPEC as operand");
+	  break;
+	}
+       break;
+
+    default:
+      output_operand_lossage ("invalid expression as operand");
+    }
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void ATTRIBUTE_UNUSED
+i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  fputs (ASM_LONG, file);
+  output_addr_const (file, x);
+  fputs ("@dtpoff", file);
+  switch (size)
+    {
+    case 4:
+      break;
+    case 8:
+      fputs (", 0", file);
+      break;
+    default:
+      gcc_unreachable ();
+   }
+}
+
+/* Return true if X is a representation of the PIC register.  This copes
+   with calls from ix86_find_base_term, where the register might have
+   been replaced by a cselib value.  */
+
+static bool
+ix86_pic_register_p (rtx x)
+{
+  if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
+    return (pic_offset_table_rtx
+	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+  else
+    return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
+}
+
+/* Helper function for ix86_delegitimize_address.
+   Attempt to delegitimize TLS local-exec accesses.  */
+
+static rtx
+ix86_delegitimize_tls_address (rtx orig_x)
+{
+  rtx x = orig_x, unspec;
+  struct ix86_address addr;
+
+  if (!TARGET_TLS_DIRECT_SEG_REFS)
+    return orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
+    return orig_x;
+  if (ix86_decompose_address (x, &addr) == 0
+      || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
+      || addr.disp == NULL_RTX
+      || GET_CODE (addr.disp) != CONST)
+    return orig_x;
+  unspec = XEXP (addr.disp, 0);
+  if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
+    unspec = XEXP (unspec, 0);
+  if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
+    return orig_x;
+  x = XVECEXP (unspec, 0, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  if (unspec != XEXP (addr.disp, 0))
+    x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
+  if (addr.index)
+    {
+      rtx idx = addr.index;
+      if (addr.scale != 1)
+	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
+      x = gen_rtx_PLUS (Pmode, idx, x);
+    }
+  if (addr.base)
+    x = gen_rtx_PLUS (Pmode, addr.base, x);
+  if (MEM_P (orig_x))
+    x = replace_equiv_address_nv (orig_x, x);
+  return x;
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize PIC+GOTOFF and turn it back
+   into a direct symbol reference.
+
+   On Darwin, this is necessary to avoid a crash, because Darwin
+   has a different PIC label for each routine but the DWARF debugging
+   information is not associated with any particular routine, so it's
+   necessary to remove references to the PIC label from RTL stored by
+   the DWARF output code.  */
+
+static rtx
+ix86_delegitimize_address (rtx x)
+{
+  rtx orig_x = delegitimize_mem_from_attrs (x);
+  /* addend is NULL or some rtx if x is something+GOTOFF where
+     something doesn't include the PIC register.  */
+  rtx addend = NULL_RTX;
+  /* reg_addend is NULL or a multiple of some register.  */
+  rtx reg_addend = NULL_RTX;
+  /* const_addend is NULL or a const_int.  */
+  rtx const_addend = NULL_RTX;
+  /* This is the result, or NULL.  */
+  rtx result = NULL_RTX;
+
+  x = orig_x;
+
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+
+  if (TARGET_64BIT)
+    {
+      if (GET_CODE (x) != CONST
+	  || GET_CODE (XEXP (x, 0)) != UNSPEC
+	  || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
+	      && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
+	  || !MEM_P (orig_x))
+	return ix86_delegitimize_tls_address (orig_x);
+      x = XVECEXP (XEXP (x, 0), 0, 0);
+      if (GET_MODE (orig_x) != Pmode)
+	{
+	  x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
+	  if (x == NULL_RTX)
+	    return orig_x;
+	}
+      return x;
+    }
+
+  if (GET_CODE (x) != PLUS
+      || GET_CODE (XEXP (x, 1)) != CONST)
+    return ix86_delegitimize_tls_address (orig_x);
+
+  if (ix86_pic_register_p (XEXP (x, 0)))
+    /* %ebx + GOT/GOTOFF */
+    ;
+  else if (GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      /* %ebx + %reg * scale + GOT/GOTOFF */
+      reg_addend = XEXP (x, 0);
+      if (ix86_pic_register_p (XEXP (reg_addend, 0)))
+	reg_addend = XEXP (reg_addend, 1);
+      else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
+	reg_addend = XEXP (reg_addend, 0);
+      else
+	{
+	  reg_addend = NULL_RTX;
+	  addend = XEXP (x, 0);
+	}
+    }
+  else
+    addend = XEXP (x, 0);
+
+  x = XEXP (XEXP (x, 1), 0);
+  if (GET_CODE (x) == PLUS
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      const_addend = XEXP (x, 1);
+      x = XEXP (x, 0);
+    }
+
+  if (GET_CODE (x) == UNSPEC
+      && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
+	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
+    result = XVECEXP (x, 0, 0);
+
+  if (TARGET_MACHO && darwin_local_data_pic (x)
+      && !MEM_P (orig_x))
+    result = XVECEXP (x, 0, 0);
+
+  if (! result)
+    return ix86_delegitimize_tls_address (orig_x);
+
+  if (const_addend)
+    result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
+  if (reg_addend)
+    result = gen_rtx_PLUS (Pmode, reg_addend, result);
+  if (addend)
+    {
+      /* If the rest of original X doesn't involve the PIC register, add
+	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
+	 for code like:
+	 leal (%ebx, %ecx, 4), %ecx
+	 ...
+	 movl foo@GOTOFF(%ecx), %edx
+	 in which case we return (%ecx - %ebx) + foo.  */
+      if (pic_offset_table_rtx)
+        result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
+						     pic_offset_table_rtx),
+			       result);
+      else
+	return orig_x;
+    }
+  if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
+    {
+      result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
+      if (result == NULL_RTX)
+	return orig_x;
+    }
+  return result;
+}
+
+/* If X is a machine specific address (i.e. a symbol or label being
+   referenced as a displacement from the GOT implemented using an
+   UNSPEC), then return the base term.  Otherwise return X.  */
+
+rtx
+ix86_find_base_term (rtx x)
+{
+  rtx term;
+
+  if (TARGET_64BIT)
+    {
+      if (GET_CODE (x) != CONST)
+	return x;
+      term = XEXP (x, 0);
+      if (GET_CODE (term) == PLUS
+	  && (CONST_INT_P (XEXP (term, 1))
+	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
+	term = XEXP (term, 0);
+      if (GET_CODE (term) != UNSPEC
+	  || (XINT (term, 1) != UNSPEC_GOTPCREL
+	      && XINT (term, 1) != UNSPEC_PCREL))
+	return x;
+
+      return XVECEXP (term, 0, 0);
+    }
+
+  return ix86_delegitimize_address (x);
+}
+
+static void
+put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
+		    int fp, FILE *file)
+{
+  const char *suffix;
+
+  if (mode == CCFPmode || mode == CCFPUmode)
+    {
+      code = ix86_fp_compare_code_to_integer (code);
+      mode = CCmode;
+    }
+  if (reverse)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case EQ:
+      switch (mode)
+	{
+	case CCAmode:
+	  suffix = "a";
+	  break;
+
+	case CCCmode:
+	  suffix = "c";
+	  break;
+
+	case CCOmode:
+	  suffix = "o";
+	  break;
+
+	case CCSmode:
+	  suffix = "s";
+	  break;
+
+	default:
+	  suffix = "e";
+	}
+      break;
+    case NE:
+      switch (mode)
+	{
+	case CCAmode:
+	  suffix = "na";
+	  break;
+
+	case CCCmode:
+	  suffix = "nc";
+	  break;
+
+	case CCOmode:
+	  suffix = "no";
+	  break;
+
+	case CCSmode:
+	  suffix = "ns";
+	  break;
+
+	default:
+	  suffix = "ne";
+	}
+      break;
+    case GT:
+      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
+      suffix = "g";
+      break;
+    case GTU:
+      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
+	 Those same assemblers have the same but opposite lossage on cmov.  */
+      if (mode == CCmode)
+	suffix = fp ? "nbe" : "a";
+      else if (mode == CCCmode)
+	suffix = "b";
+      else
+	gcc_unreachable ();
+      break;
+    case LT:
+      switch (mode)
+	{
+	case CCNOmode:
+	case CCGOCmode:
+	  suffix = "s";
+	  break;
+
+	case CCmode:
+	case CCGCmode:
+	  suffix = "l";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case LTU:
+      gcc_assert (mode == CCmode || mode == CCCmode);
+      suffix = "b";
+      break;
+    case GE:
+      switch (mode)
+	{
+	case CCNOmode:
+	case CCGOCmode:
+	  suffix = "ns";
+	  break;
+
+	case CCmode:
+	case CCGCmode:
+	  suffix = "ge";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case GEU:
+      /* ??? As above.  */
+      gcc_assert (mode == CCmode || mode == CCCmode);
+      suffix = fp ? "nb" : "ae";
+      break;
+    case LE:
+      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
+      suffix = "le";
+      break;
+    case LEU:
+      /* ??? As above.  */
+      if (mode == CCmode)
+	suffix = "be";
+      else if (mode == CCCmode)
+	suffix = fp ? "nb" : "ae";
+      else
+	gcc_unreachable ();
+      break;
+    case UNORDERED:
+      suffix = fp ? "u" : "p";
+      break;
+    case ORDERED:
+      suffix = fp ? "nu" : "np";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  fputs (suffix, file);
+}
+
+/* Print the name of register X to FILE based on its machine mode and number.
+   If CODE is 'w', pretend the mode is HImode.
+   If CODE is 'b', pretend the mode is QImode.
+   If CODE is 'k', pretend the mode is SImode.
+   If CODE is 'q', pretend the mode is DImode.
+   If CODE is 'x', pretend the mode is V4SFmode.
+   If CODE is 't', pretend the mode is V8SFmode.
+   If CODE is 'h', pretend the reg is the 'high' byte register.
+   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'd', duplicate the operand for AVX instruction.
+ */
+
+void
+print_reg (rtx x, int code, FILE *file)
+{
+  const char *reg;
+  bool duplicated = code == 'd' && TARGET_AVX;
+
+  gcc_assert (x == pc_rtx
+	      || (REGNO (x) != ARG_POINTER_REGNUM
+		  && REGNO (x) != FRAME_POINTER_REGNUM
+		  && REGNO (x) != FLAGS_REG
+		  && REGNO (x) != FPSR_REG
+		  && REGNO (x) != FPCR_REG));
+
+  if (ASSEMBLER_DIALECT == ASM_ATT)
+    putc ('%', file);
+
+  if (x == pc_rtx)
+    {
+      gcc_assert (TARGET_64BIT);
+      fputs ("rip", file);
+      return;
+    }
+
+  if (code == 'w' || MMX_REG_P (x))
+    code = 2;
+  else if (code == 'b')
+    code = 1;
+  else if (code == 'k')
+    code = 4;
+  else if (code == 'q')
+    code = 8;
+  else if (code == 'y')
+    code = 3;
+  else if (code == 'h')
+    code = 0;
+  else if (code == 'x')
+    code = 16;
+  else if (code == 't')
+    code = 32;
+  else
+    code = GET_MODE_SIZE (GET_MODE (x));
+
+  /* Irritatingly, AMD extended registers use different naming convention
+     from the normal registers.  */
+  if (REX_INT_REG_P (x))
+    {
+      gcc_assert (TARGET_64BIT);
+      switch (code)
+	{
+	  case 0:
+	    error ("extended registers have no high halves");
+	    break;
+	  case 1:
+	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
+	    break;
+	  case 2:
+	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
+	    break;
+	  case 4:
+	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
+	    break;
+	  case 8:
+	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
+	    break;
+	  default:
+	    error ("unsupported operand size for extended register");
+	    break;
+	}
+      return;
+    }
+
+  reg = NULL;
+  switch (code)
+    {
+    case 3:
+      if (STACK_TOP_P (x))
+	{
+	  reg = "st(0)";
+	  break;
+	}
+      /* FALLTHRU */
+    case 8:
+    case 4:
+    case 12:
+      if (! ANY_FP_REG_P (x))
+	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
+      /* FALLTHRU */
+    case 16:
+    case 2:
+    normal:
+      reg = hi_reg_name[REGNO (x)];
+      break;
+    case 1:
+      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
+	goto normal;
+      reg = qi_reg_name[REGNO (x)];
+      break;
+    case 0:
+      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
+	goto normal;
+      reg = qi_high_reg_name[REGNO (x)];
+      break;
+    case 32:
+      if (SSE_REG_P (x))
+	{
+	  gcc_assert (!duplicated);
+	  putc ('y', file);
+	  fputs (hi_reg_name[REGNO (x)] + 1, file);
+	  return;
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  fputs (reg, file);
+  if (duplicated)
+    {
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	fprintf (file, ", %%%s", reg);
+      else
+	fprintf (file, ", %s", reg);
+    }
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in some tls_local_dynamic_base
+   pattern.  */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (NONDEBUG_INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  return NULL;
+}
+
+/* Meaning of CODE:
+   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+   C -- print opcode suffix for set/cmov insn.
+   c -- like C, but print reversed condition
+   F,f -- likewise, but for floating-point.
+   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+        otherwise nothing
+   R -- print the prefix for register names.
+   z -- print the opcode suffix for the size of the current operand.
+   Z -- likewise, with special suffixes for x87 instructions.
+   * -- print a star (in certain assembler syntax)
+   A -- print an absolute memory reference.
+   w -- print the operand as if it's a "word" (HImode) even if it isn't.
+   s -- print a shift double count, followed by the assemblers argument
+	delimiter.
+   b -- print the QImode name of the register for the indicated operand.
+	%b0 would print %al if operands[0] is reg 0.
+   w --  likewise, print the HImode name of the register.
+   k --  likewise, print the SImode name of the register.
+   q --  likewise, print the DImode name of the register.
+   x --  likewise, print the V4SFmode name of the register.
+   t --  likewise, print the V8SFmode name of the register.
+   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+   y -- print "st(0)" instead of "st" as a register.
+   d -- print duplicated register operand for AVX instruction.
+   D -- print condition for SSE cmp instruction.
+   P -- if PIC, print an @PLT suffix.
+   X -- don't print any sort of PIC '@' suffix for a symbol.
+   & -- print some in-use local-dynamic symbol name.
+   H -- print a memory address offset by 8; used for sse high-parts
+   Y -- print condition for XOP pcom* instruction.
+   + -- print a branch hint as 'cs' or 'ds' prefix
+   ; -- print a semicolon (after prefixes due to bug in older gas).
+   @ -- print a segment register of thread base pointer load
+ */
+
+void
+ix86_print_operand (FILE *file, rtx x, int code)
+{
+  if (code)
+    {
+      switch (code)
+	{
+	case '*':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('*', file);
+	  return;
+
+	case '&':
+	  {
+	    const char *name = get_some_local_dynamic_name ();
+	    if (name == NULL)
+	      output_operand_lossage ("'%%&' used without any "
+				      "local dynamic TLS references");
+	    else
+	      assemble_name (file, name);
+	    return;
+	  }
+
+	case 'A':
+	  switch (ASSEMBLER_DIALECT)
+	    {
+	    case ASM_ATT:
+	      putc ('*', file);
+	      break;
+
+	    case ASM_INTEL:
+	      /* Intel syntax. For absolute addresses, registers should not
+		 be surrounded by braces.  */
+	      if (!REG_P (x))
+		{
+		  putc ('[', file);
+		  ix86_print_operand (file, x, 0);
+		  putc (']', file);
+		  return;
+		}
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  ix86_print_operand (file, x, 0);
+	  return;
+
+
+	case 'L':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('l', file);
+	  return;
+
+	case 'W':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('w', file);
+	  return;
+
+	case 'B':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('b', file);
+	  return;
+
+	case 'Q':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('l', file);
+	  return;
+
+	case 'S':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('s', file);
+	  return;
+
+	case 'T':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('t', file);
+	  return;
+
+	case 'z':
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	    {
+	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
+	      if (ASSEMBLER_DIALECT == ASM_INTEL)
+		return;
+
+	      switch (GET_MODE_SIZE (GET_MODE (x)))
+		{
+		case 1:
+		  putc ('b', file);
+		  return;
+
+		case 2:
+		  putc ('w', file);
+		  return;
+
+		case 4:
+		  putc ('l', file);
+		  return;
+
+		case 8:
+		  putc ('q', file);
+		  return;
+
+		default:
+		  output_operand_lossage
+		    ("invalid operand size for operand code '%c'", code);
+		  return;
+		}
+	    }
+
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	    warning
+	      (0, "non-integer operand used with operand code '%c'", code);
+	  /* FALLTHRU */
+
+	case 'Z':
+	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
+	  if (ASSEMBLER_DIALECT == ASM_INTEL)
+	    return;
+
+	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	    {
+	      switch (GET_MODE_SIZE (GET_MODE (x)))
+		{
+		case 2:
+#ifdef HAVE_AS_IX86_FILDS
+		  putc ('s', file);
+#endif
+		  return;
+
+		case 4:
+		  putc ('l', file);
+		  return;
+
+		case 8:
+#ifdef HAVE_AS_IX86_FILDQ
+		  putc ('q', file);
+#else
+		  fputs ("ll", file);
+#endif
+		  return;
+
+		default:
+		  break;
+		}
+	    }
+	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	    {
+	      /* 387 opcodes don't get size suffixes
+		 if the operands are registers.  */
+	      if (STACK_REG_P (x))
+		return;
+
+	      switch (GET_MODE_SIZE (GET_MODE (x)))
+		{
+		case 4:
+		  putc ('s', file);
+		  return;
+
+		case 8:
+		  putc ('l', file);
+		  return;
+
+		case 12:
+		case 16:
+		  putc ('t', file);
+		  return;
+
+		default:
+		  break;
+		}
+	    }
+	  else
+	    {
+	      output_operand_lossage
+		("invalid operand type used with operand code '%c'", code);
+	      return;
+	    }
+
+	  output_operand_lossage
+	    ("invalid operand size for operand code '%c'", code);
+	  return;
+
+	case 'd':
+	case 'b':
+	case 'w':
+	case 'k':
+	case 'q':
+	case 'h':
+	case 't':
+	case 'y':
+	case 'x':
+	case 'X':
+	case 'P':
+	  break;
+
+	case 's':
+	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
+	    {
+	      ix86_print_operand (file, x, 0);
+	      fputs (", ", file);
+	    }
+	  return;
+
+	case 'D':
+	  /* Little bit of braindamage here.  The SSE compare instructions
+	     does use completely different names for the comparisons that the
+	     fp conditional moves.  */
+	  if (TARGET_AVX)
+	    {
+	      switch (GET_CODE (x))
+		{
+		case EQ:
+		  fputs ("eq", file);
+		  break;
+		case UNEQ:
+		  fputs ("eq_us", file);
+		  break;
+		case LT:
+		  fputs ("lt", file);
+		  break;
+		case UNLT:
+		  fputs ("nge", file);
+		  break;
+		case LE:
+		  fputs ("le", file);
+		  break;
+		case UNLE:
+		  fputs ("ngt", file);
+		  break;
+		case UNORDERED:
+		  fputs ("unord", file);
+		  break;
+		case NE:
+		  fputs ("neq", file);
+		  break;
+		case LTGT:
+		  fputs ("neq_oq", file);
+		  break;
+		case GE:
+		  fputs ("ge", file);
+		  break;
+		case UNGE:
+		  fputs ("nlt", file);
+		  break;
+		case GT:
+		  fputs ("gt", file);
+		  break;
+		case UNGT:
+		  fputs ("nle", file);
+		  break;
+		case ORDERED:
+		  fputs ("ord", file);
+		  break;
+		default:
+		  output_operand_lossage ("operand is not a condition code, "
+					  "invalid operand code 'D'");
+		  return;
+		}
+	    }
+	  else
+	    {
+	      switch (GET_CODE (x))
+		{
+		case EQ:
+		case UNEQ:
+		  fputs ("eq", file);
+		  break;
+		case LT:
+		case UNLT:
+		  fputs ("lt", file);
+		  break;
+		case LE:
+		case UNLE:
+		  fputs ("le", file);
+		  break;
+		case UNORDERED:
+		  fputs ("unord", file);
+		  break;
+		case NE:
+		case LTGT:
+		  fputs ("neq", file);
+		  break;
+		case UNGE:
+		case GE:
+		  fputs ("nlt", file);
+		  break;
+		case UNGT:
+		case GT:
+		  fputs ("nle", file);
+		  break;
+		case ORDERED:
+		  fputs ("ord", file);
+		  break;
+		default:
+		  output_operand_lossage ("operand is not a condition code, "
+					  "invalid operand code 'D'");
+		  return;
+		}
+	    }
+	  return;
+	case 'O':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    {
+	      switch (GET_MODE (x))
+		{
+		case HImode: putc ('w', file); break;
+		case SImode:
+		case SFmode: putc ('l', file); break;
+		case DImode:
+		case DFmode: putc ('q', file); break;
+		default: gcc_unreachable ();
+		}
+	      putc ('.', file);
+	    }
+#endif
+	  return;
+	case 'C':
+	  if (!COMPARISON_P (x))
+	    {
+	      output_operand_lossage ("operand is neither a constant nor a "
+				      "condition code, invalid operand code "
+				      "'C'");
+	      return;
+	    }
+	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
+	  return;
+	case 'F':
+	  if (!COMPARISON_P (x))
+	    {
+	      output_operand_lossage ("operand is neither a constant nor a "
+				      "condition code, invalid operand code "
+				      "'F'");
+	      return;
+	    }
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('.', file);
+#endif
+	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
+	  return;
+
+	  /* Like above, but reverse condition */
+	case 'c':
+	  /* Check to see if argument to %c is really a constant
+	     and not a condition code which needs to be reversed.  */
+	  if (!COMPARISON_P (x))
+	    {
+	      output_operand_lossage ("operand is neither a constant nor a "
+				      "condition code, invalid operand "
+				      "code 'c'");
+	      return;
+	    }
+	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
+	  return;
+	case 'f':
+	  if (!COMPARISON_P (x))
+	    {
+	      output_operand_lossage ("operand is neither a constant nor a "
+				      "condition code, invalid operand "
+				      "code 'f'");
+	      return;
+	    }
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('.', file);
+#endif
+	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
+	  return;
+
+	case 'H':
+	  if (!offsettable_memref_p (x))
+	    {
+	      output_operand_lossage ("operand is not an offsettable memory "
+				      "reference, invalid operand "
+				      "code 'H'");
+	      return;
+	    }
+	  /* It doesn't actually matter what mode we use here, as we're
+	     only going to use this for printing.  */
+	  x = adjust_address_nv (x, DImode, 8);
+	  break;
+
+	case '+':
+	  {
+	    rtx x;
+
+	    if (!optimize
+	        || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
+	      return;
+
+	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+	    if (x)
+	      {
+		int pred_val = INTVAL (XEXP (x, 0));
+
+		if (pred_val < REG_BR_PROB_BASE * 45 / 100
+		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
+		  {
+		    int taken = pred_val > REG_BR_PROB_BASE / 2;
+		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
+
+		    /* Emit hints only in the case default branch prediction
+		       heuristics would fail.  */
+		    if (taken != cputaken)
+		      {
+			/* We use 3e (DS) prefix for taken branches and
+			   2e (CS) prefix for not taken branches.  */
+			if (taken)
+			  fputs ("ds ; ", file);
+			else
+			  fputs ("cs ; ", file);
+		      }
+		  }
+	      }
+	    return;
+	  }
+
+	case 'Y':
+	  switch (GET_CODE (x))
+	    {
+	    case NE:
+	      fputs ("neq", file);
+	      break;
+	    case EQ:
+	      fputs ("eq", file);
+	      break;
+	    case GE:
+	    case GEU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+	      break;
+	    case GT:
+	    case GTU:
+	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+	      break;
+	    case LE:
+	    case LEU:
+	      fputs ("le", file);
+	      break;
+	    case LT:
+	    case LTU:
+	      fputs ("lt", file);
+	      break;
+	    case UNORDERED:
+	      fputs ("unord", file);
+	      break;
+	    case ORDERED:
+	      fputs ("ord", file);
+	      break;
+	    case UNEQ:
+	      fputs ("ueq", file);
+	      break;
+	    case UNGE:
+	      fputs ("nlt", file);
+	      break;
+	    case UNGT:
+	      fputs ("nle", file);
+	      break;
+	    case UNLE:
+	      fputs ("ule", file);
+	      break;
+	    case UNLT:
+	      fputs ("ult", file);
+	      break;
+	    case LTGT:
+	      fputs ("une", file);
+	      break;
+	    default:
+	      output_operand_lossage ("operand is not a condition code, "
+				      "invalid operand code 'Y'");
+	      return;
+	    }
+	  return;
+
+	case ';':
+#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
+	  putc (';', file);
+#endif
+	  return;
+
+	case '@':
+	  if (ASSEMBLER_DIALECT == ASM_ATT)
+	    putc ('%', file);
+
+	  /* The kernel uses a different segment register for performance
+	     reasons; a system call would not have to trash the userspace
+	     segment register, which would be expensive.  */
+	  if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
+	    fputs ("fs", file);
+	  else
+	    fputs ("gs", file);
+	  return;
+
+	default:
+	    output_operand_lossage ("invalid operand code '%c'", code);
+	}
+    }
+
+  if (REG_P (x))
+    print_reg (x, code, file);
+
+  else if (MEM_P (x))
+    {
+      /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
+      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
+	  && GET_MODE (x) != BLKmode)
+	{
+	  const char * size;
+	  switch (GET_MODE_SIZE (GET_MODE (x)))
+	    {
+	    case 1: size = "BYTE"; break;
+	    case 2: size = "WORD"; break;
+	    case 4: size = "DWORD"; break;
+	    case 8: size = "QWORD"; break;
+	    case 12: size = "TBYTE"; break;
+	    case 16:
+	      if (GET_MODE (x) == XFmode)
+		size = "TBYTE";
+              else
+		size = "XMMWORD";
+              break;
+	    case 32: size = "YMMWORD"; break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
+	  if (code == 'b')
+	    size = "BYTE";
+	  else if (code == 'w')
+	    size = "WORD";
+	  else if (code == 'k')
+	    size = "DWORD";
+
+	  fputs (size, file);
+	  fputs (" PTR ", file);
+	}
+
+      x = XEXP (x, 0);
+      /* Avoid (%rip) for call operands.  */
+      if (CONSTANT_ADDRESS_P (x) && code == 'P'
+	  && !CONST_INT_P (x))
+	output_addr_const (file, x);
+      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
+	output_operand_lossage ("invalid constraints for operand");
+      else
+	output_address (x);
+    }
+
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('$', file);
+      /* Sign extend 32bit SFmode immediate to 8 bytes.  */
+      if (code == 'q')
+	fprintf (file, "0x%08llx", (unsigned long long) (int) l);
+      else
+	fprintf (file, "0x%08x", (unsigned int) l);
+    }
+
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l[2];
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('$', file);
+      fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
+    }
+
+  /* These float cases don't actually occur as immediate operands.  */
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
+    {
+      char dstr[30];
+
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+      fputs (dstr, file);
+    }
+
+  else
+    {
+      /* We have patterns that allow zero sets of memory, for instance.
+	 In 64-bit mode, we should probably support all 8-byte vectors,
+	 since we can in fact encode that into an immediate.  */
+      if (GET_CODE (x) == CONST_VECTOR)
+	{
+	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
+	  x = const0_rtx;
+	}
+
+      if (code != 'P')
+	{
+	  if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
+	    {
+	      if (ASSEMBLER_DIALECT == ASM_ATT)
+		putc ('$', file);
+	    }
+	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
+		   || GET_CODE (x) == LABEL_REF)
+	    {
+	      if (ASSEMBLER_DIALECT == ASM_ATT)
+		putc ('$', file);
+	      else
+		fputs ("OFFSET FLAT:", file);
+	    }
+	}
+      if (CONST_INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      else if (flag_pic || MACHOPIC_INDIRECT)
+	output_pic_addr_const (file, x, code);
+      else
+	output_addr_const (file, x);
+    }
+}
+
+static bool
+ix86_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '@' || code == '*' || code == '+'
+	  || code == '&' || code == ';');
+}
+
+/* Print a memory operand whose address is ADDR.  */
+
+static void
+ix86_print_operand_address (FILE *file, rtx addr)
+{
+  struct ix86_address parts;
+  rtx base, index, disp;
+  int scale;
+  int ok = ix86_decompose_address (addr, &parts);
+
+  gcc_assert (ok);
+
+  base = parts.base;
+  index = parts.index;
+  disp = parts.disp;
+  scale = parts.scale;
+
+  switch (parts.seg)
+    {
+    case SEG_DEFAULT:
+      break;
+    case SEG_FS:
+    case SEG_GS:
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	putc ('%', file);
+      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Use one byte shorter RIP relative addressing for 64bit mode.  */
+  if (TARGET_64BIT && !base && !index)
+    {
+      rtx symbol = disp;
+
+      if (GET_CODE (disp) == CONST
+	  && GET_CODE (XEXP (disp, 0)) == PLUS
+	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+	symbol = XEXP (XEXP (disp, 0), 0);
+
+      if (GET_CODE (symbol) == LABEL_REF
+	  || (GET_CODE (symbol) == SYMBOL_REF
+	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
+	base = pc_rtx;
+    }
+  if (!base && !index)
+    {
+      /* Displacement only requires special attention.  */
+
+      if (CONST_INT_P (disp))
+	{
+	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
+	    fputs ("ds:", file);
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
+	}
+      else if (flag_pic)
+	output_pic_addr_const (file, disp, 0);
+      else
+	output_addr_const (file, disp);
+    }
+  else
+    {
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	{
+	  if (disp)
+	    {
+	      if (flag_pic)
+		output_pic_addr_const (file, disp, 0);
+	      else if (GET_CODE (disp) == LABEL_REF)
+		output_asm_label (disp);
+	      else
+		output_addr_const (file, disp);
+	    }
+
+	  putc ('(', file);
+	  if (base)
+	    print_reg (base, 0, file);
+	  if (index)
+	    {
+	      putc (',', file);
+	      print_reg (index, 0, file);
+	      if (scale != 1)
+		fprintf (file, ",%d", scale);
+	    }
+	  putc (')', file);
+	}
+      else
+	{
+	  rtx offset = NULL_RTX;
+
+	  if (disp)
+	    {
+	      /* Pull out the offset of a symbol; print any symbol itself.  */
+	      if (GET_CODE (disp) == CONST
+		  && GET_CODE (XEXP (disp, 0)) == PLUS
+		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+		{
+		  offset = XEXP (XEXP (disp, 0), 1);
+		  disp = gen_rtx_CONST (VOIDmode,
+					XEXP (XEXP (disp, 0), 0));
+		}
+
+	      if (flag_pic)
+		output_pic_addr_const (file, disp, 0);
+	      else if (GET_CODE (disp) == LABEL_REF)
+		output_asm_label (disp);
+	      else if (CONST_INT_P (disp))
+		offset = disp;
+	      else
+		output_addr_const (file, disp);
+	    }
+
+	  putc ('[', file);
+	  if (base)
+	    {
+	      print_reg (base, 0, file);
+	      if (offset)
+		{
+		  if (INTVAL (offset) >= 0)
+		    putc ('+', file);
+		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+		}
+	    }
+	  else if (offset)
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+	  else
+	    putc ('0', file);
+
+	  if (index)
+	    {
+	      putc ('+', file);
+	      print_reg (index, 0, file);
+	      if (scale != 1)
+		fprintf (file, "*%d", scale);
+	    }
+	  putc (']', file);
+	}
+    }
+}
+
+/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+i386_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  rtx op;
+
+  if (GET_CODE (x) != UNSPEC)
+    return false;
+
+  op = XVECEXP (x, 0, 0);
+  switch (XINT (x, 1))
+    {
+    case UNSPEC_GOTTPOFF:
+      output_addr_const (file, op);
+      /* FIXME: This might be @TPOFF in Sun ld.  */
+      fputs ("@gottpoff", file);
+      break;
+    case UNSPEC_TPOFF:
+      output_addr_const (file, op);
+      fputs ("@tpoff", file);
+      break;
+    case UNSPEC_NTPOFF:
+      output_addr_const (file, op);
+      if (TARGET_64BIT)
+	fputs ("@tpoff", file);
+      else
+	fputs ("@ntpoff", file);
+      break;
+    case UNSPEC_DTPOFF:
+      output_addr_const (file, op);
+      fputs ("@dtpoff", file);
+      break;
+    case UNSPEC_GOTNTPOFF:
+      output_addr_const (file, op);
+      if (TARGET_64BIT)
+	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
+      else
+	fputs ("@gotntpoff", file);
+      break;
+    case UNSPEC_INDNTPOFF:
+      output_addr_const (file, op);
+      fputs ("@indntpoff", file);
+      break;
+#if TARGET_MACHO
+    case UNSPEC_MACHOPIC_OFFSET:
+      output_addr_const (file, op);
+      putc ('-', file);
+      machopic_output_function_base_name (file);
+      break;
+#endif
+
+    case UNSPEC_STACK_CHECK:
+      {
+	int offset;
+
+	gcc_assert (flag_split_stack);
+
+#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
+	offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
+#else
+	gcc_unreachable ();
+#endif
+
+	fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
+      }
+      break;
+
+    default:
+      return false;
+    }
+
+  return true;
+}
+
+/* Split one or more double-mode RTL references into pairs of half-mode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_double_mode (enum machine_mode mode, rtx operands[],
+		   int num, rtx lo_half[], rtx hi_half[])
+{
+  enum machine_mode half_mode;
+  unsigned int byte;
+
+  switch (mode)
+    {
+    case TImode:
+      half_mode = DImode;
+      break;
+    case DImode:
+      half_mode = SImode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  byte = GET_MODE_SIZE (half_mode);
+
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuse to split volatile memory addresses,
+         but we still have to handle it.  */
+      if (MEM_P (op))
+	{
+	  lo_half[num] = adjust_address (op, half_mode, 0);
+	  hi_half[num] = adjust_address (op, half_mode, byte);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (half_mode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? mode : GET_MODE (op), 0);
+	  hi_half[num] = simplify_gen_subreg (half_mode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? mode : GET_MODE (op), byte);
+	}
+    }
+}
+
+/* Output code to perform a 387 binary operation in INSN, one of PLUS,
+   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
+   is the expression of the binary operation.  The output may either be
+   emitted here, or returned to the caller, like all output_* functions.
+
+   There is no guarantee that the operands are the same mode, as they
+   might be within FLOAT or FLOAT_EXTEND expressions.  */
+
+#ifndef SYSV386_COMPAT
+/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
+   wants to fix the assemblers because that causes incompatibility
+   with gcc.  No-one wants to fix gcc because that causes
+   incompatibility with assemblers...  You can use the option of
+   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
+#define SYSV386_COMPAT 1
+#endif
+
+const char *
+output_387_binary_op (rtx insn, rtx *operands)
+{
+  static char buf[40];
+  const char *p;
+  const char *ssep;
+  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
+
+#ifdef ENABLE_CHECKING
+  /* Even if we do not want to check the inputs, this documents input
+     constraints.  Which helps in understanding the following code.  */
+  if (STACK_REG_P (operands[0])
+      && ((REG_P (operands[1])
+	   && REGNO (operands[0]) == REGNO (operands[1])
+	   && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
+	  || (REG_P (operands[2])
+	      && REGNO (operands[0]) == REGNO (operands[2])
+	      && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
+      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
+    ; /* ok */
+  else
+    gcc_assert (is_sse);
+#endif
+
+  switch (GET_CODE (operands[3]))
+    {
+    case PLUS:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fiadd";
+      else
+	p = "fadd";
+      ssep = "vadd";
+      break;
+
+    case MINUS:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fisub";
+      else
+	p = "fsub";
+      ssep = "vsub";
+      break;
+
+    case MULT:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fimul";
+      else
+	p = "fmul";
+      ssep = "vmul";
+      break;
+
+    case DIV:
+      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+	p = "fidiv";
+      else
+	p = "fdiv";
+      ssep = "vdiv";
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (is_sse)
+   {
+     if (TARGET_AVX)
+       {
+	 strcpy (buf, ssep);
+	 if (GET_MODE (operands[0]) == SFmode)
+	   strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
+	 else
+	   strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
+       }
+     else
+       {
+	 strcpy (buf, ssep + 1);
+	 if (GET_MODE (operands[0]) == SFmode)
+	   strcat (buf, "ss\t{%2, %0|%0, %2}");
+	 else
+	   strcat (buf, "sd\t{%2, %0|%0, %2}");
+       }
+      return buf;
+   }
+  strcpy (buf, p);
+
+  switch (GET_CODE (operands[3]))
+    {
+    case MULT:
+    case PLUS:
+      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
+	{
+	  rtx temp = operands[2];
+	  operands[2] = operands[1];
+	  operands[1] = temp;
+	}
+
+      /* know operands[0] == operands[1].  */
+
+      if (MEM_P (operands[2]))
+	{
+	  p = "%Z2\t%2";
+	  break;
+	}
+
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+	{
+	  if (STACK_TOP_P (operands[0]))
+	    /* How is it that we are storing to a dead operand[2]?
+	       Well, presumably operands[1] is dead too.  We can't
+	       store the result to st(0) as st(0) gets popped on this
+	       instruction.  Instead store to operands[2] (which I
+	       think has to be st(1)).  st(1) will be popped later.
+	       gcc <= 2.8.1 didn't have this check and generated
+	       assembly code that the Unixware assembler rejected.  */
+	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
+	  else
+	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
+	  break;
+	}
+
+      if (STACK_TOP_P (operands[0]))
+	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
+      else
+	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
+      break;
+
+    case MINUS:
+    case DIV:
+      if (MEM_P (operands[1]))
+	{
+	  p = "r%Z1\t%1";
+	  break;
+	}
+
+      if (MEM_P (operands[2]))
+	{
+	  p = "%Z2\t%2";
+	  break;
+	}
+
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+	{
+#if SYSV386_COMPAT
+	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
+	     derived assemblers, confusingly reverse the direction of
+	     the operation for fsub{r} and fdiv{r} when the
+	     destination register is not st(0).  The Intel assembler
+	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
+	     figure out what the hardware really does.  */
+	  if (STACK_TOP_P (operands[0]))
+	    p = "{p\t%0, %2|rp\t%2, %0}";
+	  else
+	    p = "{rp\t%2, %0|p\t%0, %2}";
+#else
+	  if (STACK_TOP_P (operands[0]))
+	    /* As above for fmul/fadd, we can't store to st(0).  */
+	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
+	  else
+	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
+#endif
+	  break;
+	}
+
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+	{
+#if SYSV386_COMPAT
+	  if (STACK_TOP_P (operands[0]))
+	    p = "{rp\t%0, %1|p\t%1, %0}";
+	  else
+	    p = "{p\t%1, %0|rp\t%0, %1}";
+#else
+	  if (STACK_TOP_P (operands[0]))
+	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
+	  else
+	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
+#endif
+	  break;
+	}
+
+      if (STACK_TOP_P (operands[0]))
+	{
+	  if (STACK_TOP_P (operands[1]))
+	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
+	  else
+	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
+	  break;
+	}
+      else if (STACK_TOP_P (operands[1]))
+	{
+#if SYSV386_COMPAT
+	  p = "{\t%1, %0|r\t%0, %1}";
+#else
+	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
+#endif
+	}
+      else
+	{
+#if SYSV386_COMPAT
+	  p = "{r\t%2, %0|\t%0, %2}";
+#else
+	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
+#endif
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  strcat (buf, p);
+  return buf;
+}
+
+/* Return needed mode for entity in optimize_mode_switching pass.  */
+
+int
+ix86_mode_needed (int entity, rtx insn)
+{
+  enum attr_i387_cw mode;
+
+  /* The mode UNINITIALIZED is used to store control word after a
+     function call or ASM pattern.  The mode ANY specify that function
+     has no requirements on the control word and make no changes in the
+     bits we are interested in.  */
+
+  if (CALL_P (insn)
+      || (NONJUMP_INSN_P (insn)
+	  && (asm_noperands (PATTERN (insn)) >= 0
+	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
+    return I387_CW_UNINITIALIZED;
+
+  if (recog_memoized (insn) < 0)
+    return I387_CW_ANY;
+
+  mode = get_attr_i387_cw (insn);
+
+  switch (entity)
+    {
+    case I387_TRUNC:
+      if (mode == I387_CW_TRUNC)
+	return mode;
+      break;
+
+    case I387_FLOOR:
+      if (mode == I387_CW_FLOOR)
+	return mode;
+      break;
+
+    case I387_CEIL:
+      if (mode == I387_CW_CEIL)
+	return mode;
+      break;
+
+    case I387_MASK_PM:
+      if (mode == I387_CW_MASK_PM)
+	return mode;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return I387_CW_ANY;
+}
+
+/* Output code to initialize control word copies used by trunc?f?i and
+   rounding patterns.  CURRENT_MODE is set to current control word,
+   while NEW_MODE is set to new control word.  */
+
+void
+emit_i387_cw_initialization (int mode)
+{
+  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  rtx new_mode;
+
+  enum ix86_stack_slot slot;
+
+  rtx reg = gen_reg_rtx (HImode);
+
+  emit_insn (gen_x86_fnstcw_1 (stored_mode));
+  emit_move_insn (reg, copy_rtx (stored_mode));
+
+  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
+      || optimize_function_for_size_p (cfun))
+    {
+      switch (mode)
+	{
+	case I387_CW_TRUNC:
+	  /* round toward zero (truncate) */
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+	  slot = SLOT_CW_TRUNC;
+	  break;
+
+	case I387_CW_FLOOR:
+	  /* round down toward -oo */
+	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+	  slot = SLOT_CW_FLOOR;
+	  break;
+
+	case I387_CW_CEIL:
+	  /* round up toward +oo */
+	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+	  slot = SLOT_CW_CEIL;
+	  break;
+
+	case I387_CW_MASK_PM:
+	  /* mask precision exception for nearbyint() */
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+	  slot = SLOT_CW_MASK_PM;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (mode)
+	{
+	case I387_CW_TRUNC:
+	  /* round toward zero (truncate) */
+	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+	  slot = SLOT_CW_TRUNC;
+	  break;
+
+	case I387_CW_FLOOR:
+	  /* round down toward -oo */
+	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
+	  slot = SLOT_CW_FLOOR;
+	  break;
+
+	case I387_CW_CEIL:
+	  /* round up toward +oo */
+	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
+	  slot = SLOT_CW_CEIL;
+	  break;
+
+	case I387_CW_MASK_PM:
+	  /* mask precision exception for nearbyint() */
+	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+	  slot = SLOT_CW_MASK_PM;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  gcc_assert (slot < MAX_386_STACK_LOCALS);
+
+  new_mode = assign_386_stack_local (HImode, slot);
+  emit_move_insn (new_mode, reg);
+}
+
+/* Output code for INSN to convert a float to a signed int.  OPERANDS
+   are the insn operands.  The output may be [HSD]Imode and the input
+   operand may be [SDX]Fmode.  */
+
+const char *
+output_fix_trunc (rtx insn, rtx *operands, int fisttp)
+{
+  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+  int dimode_p = GET_MODE (operands[0]) == DImode;
+  int round_mode = get_attr_i387_cw (insn);
+
+  /* Jump through a hoop or two for DImode, since the hardware has no
+     non-popping instruction.  We used to do this a different way, but
+     that was somewhat fragile and broke with post-reload splitters.  */
+  if ((dimode_p || fisttp) && !stack_top_dies)
+    output_asm_insn ("fld\t%y1", operands);
+
+  gcc_assert (STACK_TOP_P (operands[1]));
+  gcc_assert (MEM_P (operands[0]));
+  gcc_assert (GET_MODE (operands[1]) != TFmode);
+
+  if (fisttp)
+      output_asm_insn ("fisttp%Z0\t%0", operands);
+  else
+    {
+      if (round_mode != I387_CW_ANY)
+	output_asm_insn ("fldcw\t%3", operands);
+      if (stack_top_dies || dimode_p)
+	output_asm_insn ("fistp%Z0\t%0", operands);
+      else
+	output_asm_insn ("fist%Z0\t%0", operands);
+      if (round_mode != I387_CW_ANY)
+	output_asm_insn ("fldcw\t%2", operands);
+    }
+
+  return "";
+}
+
+/* Output code for x87 ffreep insn.  The OPNO argument, which may only
+   have the values zero or one, indicates the ffreep insn's operand
+   from the OPERANDS array.  */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+  if (TARGET_USE_FFREEP)
+#ifdef HAVE_AS_IX86_FFREEP
+    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+    {
+      static char retval[32];
+      int regno = REGNO (operands[opno]);
+
+      gcc_assert (FP_REGNO_P (regno));
+
+      regno -= FIRST_STACK_REG;
+
+      snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
+      return retval;
+    }
+#endif
+
+  return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
+/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
+   should be used.  UNORDERED_P is true when fucom should be used.  */
+
+const char *
+output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
+{
+  int stack_top_dies;
+  rtx cmp_op0, cmp_op1;
+  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
+
+  if (eflags_p)
+    {
+      cmp_op0 = operands[0];
+      cmp_op1 = operands[1];
+    }
+  else
+    {
+      cmp_op0 = operands[1];
+      cmp_op1 = operands[2];
+    }
+
+  if (is_sse)
+    {
+      static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
+      static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
+      static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
+      static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
+
+      if (GET_MODE (operands[0]) == SFmode)
+	if (unordered_p)
+	  return &ucomiss[TARGET_AVX ? 0 : 1];
+	else
+	  return &comiss[TARGET_AVX ? 0 : 1];
+      else
+	if (unordered_p)
+	  return &ucomisd[TARGET_AVX ? 0 : 1];
+	else
+	  return &comisd[TARGET_AVX ? 0 : 1];
+    }
+
+  gcc_assert (STACK_TOP_P (cmp_op0));
+
+  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+
+  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
+    {
+      if (stack_top_dies)
+	{
+	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
+	  return output_387_ffreep (operands, 1);
+	}
+      else
+	return "ftst\n\tfnstsw\t%0";
+    }
+
+  if (STACK_REG_P (cmp_op1)
+      && stack_top_dies
+      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
+      && REGNO (cmp_op1) != FIRST_STACK_REG)
+    {
+      /* If both the top of the 387 stack dies, and the other operand
+	 is also a stack register that dies, then this must be a
+	 `fcompp' float compare */
+
+      if (eflags_p)
+	{
+	  /* There is no double popping fcomi variant.  Fortunately,
+	     eflags is immune from the fstp's cc clobbering.  */
+	  if (unordered_p)
+	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
+	  else
+	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
+	  return output_387_ffreep (operands, 0);
+	}
+      else
+	{
+	  if (unordered_p)
+	    return "fucompp\n\tfnstsw\t%0";
+	  else
+	    return "fcompp\n\tfnstsw\t%0";
+	}
+    }
+  else
+    {
+      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
+
+      static const char * const alt[16] =
+      {
+	"fcom%Z2\t%y2\n\tfnstsw\t%0",
+	"fcomp%Z2\t%y2\n\tfnstsw\t%0",
+	"fucom%Z2\t%y2\n\tfnstsw\t%0",
+	"fucomp%Z2\t%y2\n\tfnstsw\t%0",
+
+	"ficom%Z2\t%y2\n\tfnstsw\t%0",
+	"ficomp%Z2\t%y2\n\tfnstsw\t%0",
+	NULL,
+	NULL,
+
+	"fcomi\t{%y1, %0|%0, %y1}",
+	"fcomip\t{%y1, %0|%0, %y1}",
+	"fucomi\t{%y1, %0|%0, %y1}",
+	"fucomip\t{%y1, %0|%0, %y1}",
+
+	NULL,
+	NULL,
+	NULL,
+	NULL
+      };
+
+      int mask;
+      const char *ret;
+
+      mask  = eflags_p << 3;
+      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
+      mask |= unordered_p << 1;
+      mask |= stack_top_dies;
+
+      gcc_assert (mask < 16);
+      ret = alt[mask];
+      gcc_assert (ret);
+
+      return ret;
+    }
+}
+
+void
+ix86_output_addr_vec_elt (FILE *file, int value)
+{
+  const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+  if (TARGET_64BIT)
+    directive = ASM_QUAD;
+#else
+  gcc_assert (!TARGET_64BIT);
+#endif
+
+  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
+}
+
+void
+ix86_output_addr_diff_elt (FILE *file, int value, int rel)
+{
+  const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+  if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
+    directive = ASM_QUAD;
+#else
+  gcc_assert (!TARGET_64BIT);
+#endif
+  /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
+  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+    fprintf (file, "%s%s%d-%s%d\n",
+	     directive, LPREFIX, value, LPREFIX, rel);
+  else if (HAVE_AS_GOTOFF_IN_DATA)
+    fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
+#if TARGET_MACHO
+  else if (TARGET_MACHO)
+    {
+      fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
+      machopic_output_function_base_name (file);
+      putc ('\n', file);
+    }
+#endif
+  else
+    asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
+		 GOT_SYMBOL_NAME, LPREFIX, value);
+}
+
+/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
+   for the target.  */
+
+void
+ix86_expand_clear (rtx dest)
+{
+  rtx tmp;
+
+  /* We play register width games, which are only valid after reload.  */
+  gcc_assert (reload_completed);
+
+  /* Avoid HImode and its attendant prefix byte.  */
+  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
+    dest = gen_rtx_REG (SImode, REGNO (dest));
+  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
+
+  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
+  if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
+    {
+      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+    }
+
+  emit_insn (tmp);
+}
+
+/* X is an unchanging MEM.  If it is a constant pool reference, return
+   the constant pool rtx, else NULL.  */
+
+rtx
+maybe_get_pool_constant (rtx x)
+{
+  x = ix86_delegitimize_address (XEXP (x, 0));
+
+  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+    return get_pool_constant (x);
+
+  return NULL_RTX;
+}
+
+void
+ix86_expand_move (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1;
+  enum tls_model model;
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (GET_CODE (op1) == SYMBOL_REF)
+    {
+      model = SYMBOL_REF_TLS_MODEL (op1);
+      if (model)
+	{
+	  op1 = legitimize_tls_address (op1, model, true);
+	  op1 = force_operand (op1, op0);
+	  if (op1 == op0)
+	    return;
+	}
+      else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+	       && SYMBOL_REF_DLLIMPORT_P (op1))
+	op1 = legitimize_dllimport_symbol (op1, false);
+    }
+  else if (GET_CODE (op1) == CONST
+	   && GET_CODE (XEXP (op1, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
+    {
+      rtx addend = XEXP (XEXP (op1, 0), 1);
+      rtx symbol = XEXP (XEXP (op1, 0), 0);
+      rtx tmp = NULL;
+
+      model = SYMBOL_REF_TLS_MODEL (symbol);
+      if (model)
+	tmp = legitimize_tls_address (symbol, model, true);
+      else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+	       && SYMBOL_REF_DLLIMPORT_P (symbol))
+	tmp = legitimize_dllimport_symbol (symbol, true);
+
+      if (tmp)
+	{
+	  tmp = force_operand (tmp, NULL);
+	  op1 = expand_simple_binop (Pmode, PLUS, tmp, addend,
+				     op0, 1, OPTAB_DIRECT);
+	  if (op1 == op0)
+	    return;
+	}
+    }
+
+  if ((flag_pic || MACHOPIC_INDIRECT) 
+       && mode == Pmode && symbolic_operand (op1, Pmode))
+    {
+      if (TARGET_MACHO && !TARGET_64BIT)
+	{
+#if TARGET_MACHO
+	  /* dynamic-no-pic */
+	  if (MACHOPIC_INDIRECT)
+	    {
+	      rtx temp = ((reload_in_progress
+			   || ((op0 && REG_P (op0))
+			       && mode == Pmode))
+			  ? op0 : gen_reg_rtx (Pmode));
+	      op1 = machopic_indirect_data_reference (op1, temp);
+	      if (MACHOPIC_PURE)
+		op1 = machopic_legitimize_pic_address (op1, mode,
+						       temp == op1 ? 0 : temp);
+	    }
+	  if (op0 != op1 && GET_CODE (op0) != MEM)
+	    {
+	      rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
+	      emit_insn (insn);
+	      return;
+	    }
+	  if (GET_CODE (op0) == MEM)
+	    op1 = force_reg (Pmode, op1);
+	  else
+	    {
+	      rtx temp = op0;
+	      if (GET_CODE (temp) != REG)
+		temp = gen_reg_rtx (Pmode);
+	      temp = legitimize_pic_address (op1, temp);
+	      if (temp == op0)
+	    return;
+	      op1 = temp;
+	    }
+      /* dynamic-no-pic */
+#endif
+	}
+      else
+	{
+	  if (MEM_P (op0))
+	    op1 = force_reg (Pmode, op1);
+	  else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
+	    {
+	      rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
+	      op1 = legitimize_pic_address (op1, reg);
+	      if (op0 == op1)
+		return;
+	    }
+	}
+    }
+  else
+    {
+      if (MEM_P (op0)
+	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
+	      || !push_operand (op0, mode))
+	  && MEM_P (op1))
+	op1 = force_reg (mode, op1);
+
+      if (push_operand (op0, mode)
+	  && ! general_no_elim_operand (op1, mode))
+	op1 = copy_to_mode_reg (mode, op1);
+
+      /* Force large constants in 64bit compilation into register
+	 to get them CSEed.  */
+      if (can_create_pseudo_p ()
+	  && (mode == DImode) && TARGET_64BIT
+	  && immediate_operand (op1, mode)
+	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
+	  && !register_operand (op0, mode)
+	  && optimize)
+	op1 = copy_to_mode_reg (mode, op1);
+
+      if (can_create_pseudo_p ()
+	  && FLOAT_MODE_P (mode)
+	  && GET_CODE (op1) == CONST_DOUBLE)
+	{
+	  /* If we are loading a floating point constant to a register,
+	     force the value to memory now, since we'll get better code
+	     out the back end.  */
+
+	  op1 = validize_mem (force_const_mem (mode, op1));
+	  if (!register_operand (op0, mode))
+	    {
+	      rtx temp = gen_reg_rtx (mode);
+	      emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+	      emit_move_insn (op0, temp);
+	      return;
+	    }
+	}
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+void
+ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
+{
+  rtx op0 = operands[0], op1 = operands[1];
+  unsigned int align = GET_MODE_ALIGNMENT (mode);
+
+  /* Force constants other than zero into memory.  We do not know how
+     the instructions used to build constants modify the upper 64 bits
+     of the register, once we have that information we may be able
+     to handle some of them more efficiently.  */
+  if (can_create_pseudo_p ()
+      && register_operand (op0, mode)
+      && (CONSTANT_P (op1)
+	  || (GET_CODE (op1) == SUBREG
+	      && CONSTANT_P (SUBREG_REG (op1))))
+      && !standard_sse_constant_p (op1))
+    op1 = validize_mem (force_const_mem (mode, op1));
+
+  /* We need to check memory alignment for SSE mode since attribute
+     can make operands unaligned.  */
+  if (can_create_pseudo_p ()
+      && SSE_REG_MODE_P (mode)
+      && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
+	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
+    {
+      rtx tmp[2];
+
+      /* ix86_expand_vector_move_misalign() does not like constants ... */
+      if (CONSTANT_P (op1)
+	  || (GET_CODE (op1) == SUBREG
+	      && CONSTANT_P (SUBREG_REG (op1))))
+	op1 = validize_mem (force_const_mem (mode, op1));
+
+      /* ... nor both arguments in memory.  */
+      if (!register_operand (op0, mode)
+	  && !register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      tmp[0] = op0; tmp[1] = op1;
+      ix86_expand_vector_move_misalign (mode, tmp);
+      return;
+    }
+
+  /* Make operand1 a register if it isn't already.  */
+  if (can_create_pseudo_p ()
+      && !register_operand (op0, mode)
+      && !register_operand (op1, mode))
+    {
+      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+      return;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Split 32-byte AVX unaligned load and store if needed.  */
+
+static void
+ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
+{
+  rtx m;
+  rtx (*extract) (rtx, rtx, rtx);
+  rtx (*move_unaligned) (rtx, rtx);
+  enum machine_mode mode;
+
+  switch (GET_MODE (op0))
+    {
+    default:
+      gcc_unreachable ();
+    case V32QImode:
+      extract = gen_avx_vextractf128v32qi;
+      move_unaligned = gen_avx_movdqu256;
+      mode = V16QImode;
+      break;
+    case V8SFmode:
+      extract = gen_avx_vextractf128v8sf;
+      move_unaligned = gen_avx_movups256;
+      mode = V4SFmode;
+      break;
+    case V4DFmode:
+      extract = gen_avx_vextractf128v4df;
+      move_unaligned = gen_avx_movupd256;
+      mode = V2DFmode;
+      break;
+    }
+
+  if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+    {
+      rtx r = gen_reg_rtx (mode);
+      m = adjust_address (op1, mode, 0);
+      emit_move_insn (r, m);
+      m = adjust_address (op1, mode, 16);
+      r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+      emit_move_insn (op0, r);
+    }
+  else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+    {
+      m = adjust_address (op0, mode, 0);
+      emit_insn (extract (m, op1, const0_rtx));
+      m = adjust_address (op0, mode, 16);
+      emit_insn (extract (m, op1, const1_rtx));
+    }
+  else
+    emit_insn (move_unaligned (op0, op1));
+}
+
+/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
+   straight to ix86_expand_vector_move.  */
+/* Code generation for scalar reg-reg moves of single and double precision data:
+     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
+       movaps reg, reg
+     else
+       movss reg, reg
+     if (x86_sse_partial_reg_dependency == true)
+       movapd reg, reg
+     else
+       movsd reg, reg
+
+   Code generation for scalar loads of double precision data:
+     if (x86_sse_split_regs == true)
+       movlpd mem, reg      (gas syntax)
+     else
+       movsd mem, reg
+
+   Code generation for unaligned packed loads of single precision data
+   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
+     if (x86_sse_unaligned_move_optimal)
+       movups mem, reg
+
+     if (x86_sse_partial_reg_dependency == true)
+       {
+         xorps  reg, reg
+         movlps mem, reg
+         movhps mem+8, reg
+       }
+     else
+       {
+         movlps mem, reg
+         movhps mem+8, reg
+       }
+
+   Code generation for unaligned packed loads of double precision data
+   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
+     if (x86_sse_unaligned_move_optimal)
+       movupd mem, reg
+
+     if (x86_sse_split_regs == true)
+       {
+         movlpd mem, reg
+         movhpd mem+8, reg
+       }
+     else
+       {
+         movsd  mem, reg
+         movhpd mem+8, reg
+       }
+ */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1, m;
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (TARGET_AVX)
+    {
+      switch (GET_MODE_CLASS (mode))
+	{
+	case MODE_VECTOR_INT:
+	case MODE_INT:
+	  switch (GET_MODE_SIZE (mode))
+	    {
+	    case 16:
+	      /*  If we're optimizing for size, movups is the smallest.  */
+	      if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+		{
+		  op0 = gen_lowpart (V4SFmode, op0);
+		  op1 = gen_lowpart (V4SFmode, op1);
+		  emit_insn (gen_avx_movups (op0, op1));
+		  return;
+		}
+	      op0 = gen_lowpart (V16QImode, op0);
+	      op1 = gen_lowpart (V16QImode, op1);
+	      emit_insn (gen_avx_movdqu (op0, op1));
+	      break;
+	    case 32:
+	      op0 = gen_lowpart (V32QImode, op0);
+	      op1 = gen_lowpart (V32QImode, op1);
+	      ix86_avx256_split_vector_move_misalign (op0, op1);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+	case MODE_VECTOR_FLOAT:
+	  op0 = gen_lowpart (mode, op0);
+	  op1 = gen_lowpart (mode, op1);
+
+	  switch (mode)
+	    {
+	    case V4SFmode:
+	      emit_insn (gen_avx_movups (op0, op1));
+	      break;
+	    case V8SFmode:
+	      ix86_avx256_split_vector_move_misalign (op0, op1);
+	      break;
+	    case V2DFmode:
+	      if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+		{
+		  op0 = gen_lowpart (V4SFmode, op0);
+		  op1 = gen_lowpart (V4SFmode, op1);
+		  emit_insn (gen_avx_movups (op0, op1));
+		  return;
+		}
+	      emit_insn (gen_avx_movupd (op0, op1));
+	      break;
+	    case V4DFmode:
+	      ix86_avx256_split_vector_move_misalign (op0, op1);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      return;
+    }
+
+  if (MEM_P (op1))
+    {
+      /* If we're optimizing for size, movups is the smallest.  */
+      if (optimize_insn_for_size_p ()
+	  || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	{
+	  op0 = gen_lowpart (V4SFmode, op0);
+	  op1 = gen_lowpart (V4SFmode, op1);
+	  emit_insn (gen_sse_movups (op0, op1));
+	  return;
+	}
+
+      /* ??? If we have typed data, then it would appear that using
+	 movdqu is the only way to get unaligned data loaded with
+	 integer type.  */
+      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  op0 = gen_lowpart (V16QImode, op0);
+	  op1 = gen_lowpart (V16QImode, op1);
+	  emit_insn (gen_sse2_movdqu (op0, op1));
+	  return;
+	}
+
+      if (TARGET_SSE2 && mode == V2DFmode)
+        {
+          rtx zero;
+
+	  if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
+	    {
+	      op0 = gen_lowpart (V2DFmode, op0);
+	      op1 = gen_lowpart (V2DFmode, op1);
+	      emit_insn (gen_sse2_movupd (op0, op1));
+	      return;
+	    }
+
+	  /* When SSE registers are split into halves, we can avoid
+	     writing to the top half twice.  */
+	  if (TARGET_SSE_SPLIT_REGS)
+	    {
+	      emit_clobber (op0);
+	      zero = op0;
+	    }
+	  else
+	    {
+	      /* ??? Not sure about the best option for the Intel chips.
+		 The following would seem to satisfy; the register is
+		 entirely cleared, breaking the dependency chain.  We
+		 then store to the upper half, with a dependency depth
+		 of one.  A rumor has it that Intel recommends two movsd
+		 followed by an unpacklpd, but this is unconfirmed.  And
+		 given that the dependency depth of the unpacklpd would
+		 still be one, I'm not sure why this would be better.  */
+	      zero = CONST0_RTX (V2DFmode);
+	    }
+
+	  m = adjust_address (op1, DFmode, 0);
+	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
+	  m = adjust_address (op1, DFmode, 8);
+	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
+	}
+      else
+        {
+	  if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
+	    {
+	      op0 = gen_lowpart (V4SFmode, op0);
+	      op1 = gen_lowpart (V4SFmode, op1);
+	      emit_insn (gen_sse_movups (op0, op1));
+	      return;
+            }
+
+	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+	    emit_move_insn (op0, CONST0_RTX (mode));
+	  else
+	    emit_clobber (op0);
+
+	  if (mode != V4SFmode)
+	    op0 = gen_lowpart (V4SFmode, op0);
+	  m = adjust_address (op1, V2SFmode, 0);
+	  emit_insn (gen_sse_loadlps (op0, op0, m));
+	  m = adjust_address (op1, V2SFmode, 8);
+	  emit_insn (gen_sse_loadhps (op0, op0, m));
+	}
+    }
+  else if (MEM_P (op0))
+    {
+      /* If we're optimizing for size, movups is the smallest.  */
+      if (optimize_insn_for_size_p ()
+	  || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	{
+	  op0 = gen_lowpart (V4SFmode, op0);
+	  op1 = gen_lowpart (V4SFmode, op1);
+	  emit_insn (gen_sse_movups (op0, op1));
+	  return;
+	}
+
+      /* ??? Similar to above, only less clear because of quote
+	 typeless stores unquote.  */
+      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
+	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        {
+	  op0 = gen_lowpart (V16QImode, op0);
+	  op1 = gen_lowpart (V16QImode, op1);
+	  emit_insn (gen_sse2_movdqu (op0, op1));
+	  return;
+	}
+
+      if (TARGET_SSE2 && mode == V2DFmode)
+	{
+	  if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+	    {
+	      op0 = gen_lowpart (V2DFmode, op0);
+	      op1 = gen_lowpart (V2DFmode, op1);
+	      emit_insn (gen_sse2_movupd (op0, op1));
+	    }
+	  else
+	    {
+	      m = adjust_address (op0, DFmode, 0);
+	      emit_insn (gen_sse2_storelpd (m, op1));
+	      m = adjust_address (op0, DFmode, 8);
+	      emit_insn (gen_sse2_storehpd (m, op1));
+	    }
+	}
+      else
+	{
+	  if (mode != V4SFmode)
+	    op1 = gen_lowpart (V4SFmode, op1);
+
+	  if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+	    {
+	      op0 = gen_lowpart (V4SFmode, op0);
+	      emit_insn (gen_sse_movups (op0, op1));
+	    }
+	  else
+	    {
+	      m = adjust_address (op0, V2SFmode, 0);
+	      emit_insn (gen_sse_storelps (m, op1));
+	      m = adjust_address (op0, V2SFmode, 8);
+	      emit_insn (gen_sse_storehps (m, op1));
+	    }
+	}
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Expand a push in MODE.  This is some mode for which we do not support
+   proper push instructions, at least from the registers that we expect
+   the value to live in.  */
+
+void
+ix86_expand_push (enum machine_mode mode, rtx x)
+{
+  rtx tmp;
+
+  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
+			     GEN_INT (-GET_MODE_SIZE (mode)),
+			     stack_pointer_rtx, 1, OPTAB_DIRECT);
+  if (tmp != stack_pointer_rtx)
+    emit_move_insn (stack_pointer_rtx, tmp);
+
+  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
+
+  /* When we push an operand onto stack, it has to be aligned at least
+     at the function argument boundary.  However since we don't have
+     the argument type, we can't determine the actual argument
+     boundary.  */
+  emit_move_insn (tmp, x);
+}
+
+/* Helper function of ix86_fixup_binary_operands to canonicalize
+   operand order.  Returns true if the operands should be swapped.  */
+
+static bool
+ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
+			     rtx operands[])
+{
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+
+  /* If the operation is not commutative, we can't do anything.  */
+  if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+    return false;
+
+  /* Highest priority is that src1 should match dst.  */
+  if (rtx_equal_p (dst, src1))
+    return false;
+  if (rtx_equal_p (dst, src2))
+    return true;
+
+  /* Next highest priority is that immediate constants come second.  */
+  if (immediate_operand (src2, mode))
+    return false;
+  if (immediate_operand (src1, mode))
+    return true;
+
+  /* Lowest priority is that memory references should come second.  */
+  if (MEM_P (src2))
+    return false;
+  if (MEM_P (src1))
+    return true;
+
+  return false;
+}
+
+
+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
+   destination to use for the operation.  If different from the true
+   destination in operands[0], a copy operation will be required.  */
+
+rtx
+ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
+			    rtx operands[])
+{
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+
+  /* Canonicalize operand order.  */
+  if (ix86_swap_binary_operands_p (code, mode, operands))
+    {
+      rtx temp;
+
+      /* It is invalid to swap operands of different modes.  */
+      gcc_assert (GET_MODE (src1) == GET_MODE (src2));
+
+      temp = src1;
+      src1 = src2;
+      src2 = temp;
+    }
+
+  /* Both source operands cannot be in memory.  */
+  if (MEM_P (src1) && MEM_P (src2))
+    {
+      /* Optimization: Only read from memory once.  */
+      if (rtx_equal_p (src1, src2))
+	{
+	  src2 = force_reg (mode, src2);
+	  src1 = src2;
+	}
+      else
+	src2 = force_reg (mode, src2);
+    }
+
+  /* If the destination is memory, and we do not have matching source
+     operands, do things in registers.  */
+  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
+    dst = gen_reg_rtx (mode);
+
+  /* Source 1 cannot be a constant.  */
+  if (CONSTANT_P (src1))
+    src1 = force_reg (mode, src1);
+
+  /* Source 1 cannot be a non-matching memory.  */
+  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+    src1 = force_reg (mode, src1);
+
+  operands[1] = src1;
+  operands[2] = src2;
+  return dst;
+}
+
+/* Similarly, but assume that the destination has already been
+   set up properly.  */
+
+void
+ix86_fixup_binary_operands_no_copy (enum rtx_code code,
+				    enum machine_mode mode, rtx operands[])
+{
+  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+  gcc_assert (dst == operands[0]);
+}
+
+/* Attempt to expand a binary operator.  Make the expansion closer to the
+   actual machine, then just general_operand, which will allow 3 separate
+   memory references (one output, two input) in a single insn.  */
+
+void
+ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
+			     rtx operands[])
+{
+  rtx src1, src2, dst, op, clob;
+
+  dst = ix86_fixup_binary_operands (code, mode, operands);
+  src1 = operands[1];
+  src2 = operands[2];
+
+ /* Emit the instruction.  */
+
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
+  if (reload_in_progress)
+    {
+      /* Reload doesn't know about the flags register, and doesn't know that
+         it doesn't want to clobber it.  We can only do this with PLUS.  */
+      gcc_assert (code == PLUS);
+      emit_insn (op);
+    }
+  else if (reload_completed
+	   && code == PLUS
+	   && !rtx_equal_p (dst, src1))
+    {
+      /* This is going to be an LEA; avoid splitting it later.  */
+      emit_insn (op);
+    }
+  else
+    {
+      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+    }
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the binary operator meets the
+   appropriate constraints.  */
+
+bool
+ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
+			 rtx operands[3])
+{
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+
+  /* Both source operands cannot be in memory.  */
+  if (MEM_P (src1) && MEM_P (src2))
+    return false;
+
+  /* Canonicalize operand order for commutative operators.  */
+  if (ix86_swap_binary_operands_p (code, mode, operands))
+    {
+      rtx temp = src1;
+      src1 = src2;
+      src2 = temp;
+    }
+
+  /* If the destination is memory, we must have a matching source operand.  */
+  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
+      return false;
+
+  /* Source 1 cannot be a constant.  */
+  if (CONSTANT_P (src1))
+    return false;
+
+  /* Source 1 cannot be a non-matching memory.  */
+  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+    {
+      /* Support "andhi/andsi/anddi" as a zero-extending move.  */
+      return (code == AND
+	      && (mode == HImode
+		  || mode == SImode
+		  || (TARGET_64BIT && mode == DImode))
+	      && CONST_INT_P (src2)
+	      && (INTVAL (src2) == 0xff
+		  || INTVAL (src2) == 0xffff));
+    }
+
+  return true;
+}
+
+/* Attempt to expand a unary operator.  Make the expansion closer to the
+   actual machine, then just general_operand, which will allow 2 separate
+   memory references (one output, one input) in a single insn.  */
+
+void
+ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
+			    rtx operands[])
+{
+  int matching_memory;
+  rtx src, dst, op, clob;
+
+  dst = operands[0];
+  src = operands[1];
+
+  /* If the destination is memory, and we do not have matching source
+     operands, do things in registers.  */
+  matching_memory = 0;
+  if (MEM_P (dst))
+    {
+      if (rtx_equal_p (dst, src))
+	matching_memory = 1;
+      else
+	dst = gen_reg_rtx (mode);
+    }
+
+  /* When source operand is memory, destination must match.  */
+  if (MEM_P (src) && !matching_memory)
+    src = force_reg (mode, src);
+
+  /* Emit the instruction.  */
+
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
+  if (reload_in_progress || code == NOT)
+    {
+      /* Reload doesn't know about the flags register, and doesn't know that
+         it doesn't want to clobber it.  */
+      gcc_assert (code == NOT);
+      emit_insn (op);
+    }
+  else
+    {
+      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+    }
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+}
+
+/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
+   divisor are within the the range [0-255].  */
+
+void
+ix86_split_idivmod (enum machine_mode mode, rtx operands[],
+		    bool signed_p)
+{
+  rtx end_label, qimode_label;
+  rtx insn, div, mod;
+  rtx scratch, tmp0, tmp1, tmp2;
+  rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
+  rtx (*gen_zero_extend) (rtx, rtx);
+  rtx (*gen_test_ccno_1) (rtx, rtx);
+
+  switch (mode)
+    {
+    case SImode:
+      gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+      gen_test_ccno_1 = gen_testsi_ccno_1;
+      gen_zero_extend = gen_zero_extendqisi2;
+      break;
+    case DImode:
+      gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
+      gen_test_ccno_1 = gen_testdi_ccno_1;
+      gen_zero_extend = gen_zero_extendqidi2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  end_label = gen_label_rtx ();
+  qimode_label = gen_label_rtx ();
+
+  scratch = gen_reg_rtx (mode);
+
+  /* Use 8bit unsigned divimod if dividend and divisor are within the
+     the range [0-255].  */
+  emit_move_insn (scratch, operands[2]);
+  scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
+				 scratch, 1, OPTAB_DIRECT);
+  emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
+  tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
+  tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
+			       gen_rtx_LABEL_REF (VOIDmode, qimode_label),
+			       pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = qimode_label;
+
+  /* Generate original signed/unsigned divimod.  */
+  div = gen_divmod4_1 (operands[0], operands[1],
+		       operands[2], operands[3]);
+  emit_insn (div);
+
+  /* Branch to the end.  */
+  emit_jump_insn (gen_jump (end_label));
+  emit_barrier ();
+
+  /* Generate 8bit unsigned divide.  */
+  emit_label (qimode_label);
+  /* Don't use operands[0] for result of 8bit divide since not all
+     registers support QImode ZERO_EXTRACT.  */
+  tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
+  tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
+  tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
+  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
+
+  if (signed_p)
+    {
+      div = gen_rtx_DIV (SImode, operands[2], operands[3]);
+      mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
+    }
+  else
+    {
+      div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
+      mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
+    }
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+  if (REG_P (operands[1]))
+    insn = emit_move_insn (operands[1], tmp1);
+  else
+    {
+      /* Need a new scratch register since the old one has result 
+	 of 8bit divide.  */
+      scratch = gen_reg_rtx (mode);
+      emit_move_insn (scratch, tmp1);
+      insn = emit_move_insn (operands[1], scratch);
+    }
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Zero extend quotient from AL.  */
+  tmp1 = gen_lowpart (QImode, tmp0);
+  insn = emit_insn (gen_zero_extend (operands[0], tmp1));
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  emit_label (end_label);
+}
+
+#define LEA_SEARCH_THRESHOLD 12
+
+/* Search backward for non-agu definition of register number REGNO1
+   or register number REGNO2 in INSN's basic block until
+   1. Pass LEA_SEARCH_THRESHOLD instructions, or
+   2. Reach BB boundary, or
+   3. Reach agu definition.
+   Returns the distance between the non-agu definition point and INSN.
+   If no definition point, returns -1.  */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+			 rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  df_ref *def_rec;
+
+  if (insn != BB_HEAD (bb))
+    {
+      rtx prev = PREV_INSN (insn);
+      while (prev && distance < LEA_SEARCH_THRESHOLD)
+	{
+	  if (NONDEBUG_INSN_P (prev))
+	    {
+	      distance++;
+              for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+                if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+                    && !DF_REF_IS_ARTIFICIAL (*def_rec)
+                    && (regno1 == DF_REF_REGNO (*def_rec)
+			|| regno2 == DF_REF_REGNO (*def_rec)))
+		  {
+		    if (recog_memoized (prev) < 0
+			|| get_attr_type (prev) != TYPE_LEA)
+		      goto done;
+		  }
+	    }
+	  if (prev == BB_HEAD (bb))
+	    break;
+	  prev = PREV_INSN (prev);
+	}
+    }
+
+  if (distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	if (e->src == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+
+      if (simple_loop)
+	{
+	  rtx prev = BB_END (bb);
+	  while (prev
+		 && prev != insn
+		 && distance < LEA_SEARCH_THRESHOLD)
+	    {
+	      if (NONDEBUG_INSN_P (prev))
+		{
+		  distance++;
+		  for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+			&& (regno1 == DF_REF_REGNO (*def_rec)
+			    || regno2 == DF_REF_REGNO (*def_rec)))
+		      {
+			if (recog_memoized (prev) < 0
+			    || get_attr_type (prev) != TYPE_LEA)
+			  goto done;
+		      }
+		}
+	      prev = PREV_INSN (prev);
+	    }
+	}
+    }
+
+  distance = -1;
+
+done:
+  /* get_attr_type may modify recog data.  We want to make sure
+     that recog data is valid for instruction INSN, on which
+     distance_non_agu_define is called.  INSN is unchanged here.  */
+  extract_insn_cached (insn);
+  return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+   register number REGNO0 in memory address.  Return -1 if no such
+   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  int distance = 0;
+  df_ref *def_rec;
+  df_ref *use_rec;
+
+  if (insn != BB_END (bb))
+    {
+      rtx next = NEXT_INSN (insn);
+      while (next && distance < LEA_SEARCH_THRESHOLD)
+	{
+	  if (NONDEBUG_INSN_P (next))
+	    {
+	      distance++;
+
+	      for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+		if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+		     || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+		    && regno0 == DF_REF_REGNO (*use_rec))
+		  {
+		    /* Return DISTANCE if OP0 is used in memory
+		       address in NEXT.  */
+		    return distance;
+		  }
+
+	      for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+		if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+		    && !DF_REF_IS_ARTIFICIAL (*def_rec)
+		    && regno0 == DF_REF_REGNO (*def_rec))
+		  {
+		    /* Return -1 if OP0 is set in NEXT.  */
+		    return -1;
+		  }
+	    }
+	  if (next == BB_END (bb))
+	    break;
+	  next = NEXT_INSN (next);
+	}
+    }
+
+  if (distance < LEA_SEARCH_THRESHOLD)
+    {
+      edge e;
+      edge_iterator ei;
+      bool simple_loop = false;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+        if (e->dest == bb)
+	  {
+	    simple_loop = true;
+	    break;
+	  }
+
+      if (simple_loop)
+	{
+	  rtx next = BB_HEAD (bb);
+	  while (next
+		 && next != insn
+		 && distance < LEA_SEARCH_THRESHOLD)
+	    {
+	      if (NONDEBUG_INSN_P (next))
+		{
+		  distance++;
+
+		  for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+		    if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+			 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+			&& regno0 == DF_REF_REGNO (*use_rec))
+		      {
+			/* Return DISTANCE if OP0 is used in memory
+			   address in NEXT.  */
+			return distance;
+		      }
+
+		  for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
+			&& regno0 == DF_REF_REGNO (*def_rec))
+		      {
+			/* Return -1 if OP0 is set in NEXT.  */
+			return -1;
+		      }
+
+		}
+	      next = NEXT_INSN (next);
+	    }
+	}
+    }
+
+  return -1;
+}
+
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+   there is a dilemma of choicing LEA or ADD
+   Negative value: ADD is more preferred than LEA
+   Zero: Netrual
+   Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 2
+
+/* Return true if it is ok to optimize an ADD operation to LEA
+   operation to avoid flag register consumation.  For most processors,
+   ADD is faster than LEA.  For the processors like ATOM, if the
+   destination register of LEA holds an actual address which will be
+   used soon, LEA is better and otherwise ADD is better.  */
+
+bool
+ix86_lea_for_add_ok (rtx insn, rtx operands[])
+{
+  unsigned int regno0 = true_regnum (operands[0]);
+  unsigned int regno1 = true_regnum (operands[1]);
+  unsigned int regno2 = true_regnum (operands[2]);
+
+  /* If a = b + c, (a!=b && a!=c), must use lea form. */
+  if (regno0 != regno1 && regno0 != regno2)
+    return true;
+
+  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+    return false;
+  else
+    {
+      int dist_define, dist_use;
+
+      /* Return false if REGNO0 isn't used in memory address. */
+      dist_use = distance_agu_use (regno0, insn);
+      if (dist_use <= 0)
+	return false;
+
+      dist_define = distance_non_agu_define (regno1, regno2, insn);
+      if (dist_define <= 0)
+        return true;
+
+      /* If this insn has both backward non-agu dependence and forward
+         agu dependence, the one with short distance take effect. */
+      if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
+        return false;
+
+      return true;
+    }
+}
+
+/* Return true if destination reg of SET_BODY is shift count of
+   USE_BODY.  */
+
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+  rtx set_dest;
+  rtx shift_rtx;
+  int i;
+
+  /* Retrieve destination of SET_BODY.  */
+  switch (GET_CODE (set_body))
+    {
+    case SET:
+      set_dest = SET_DEST (set_body);
+      if (!set_dest || !REG_P (set_dest))
+	return false;
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+					  use_body))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  /* Retrieve shift count of USE_BODY.  */
+  switch (GET_CODE (use_body))
+    {
+    case SET:
+      shift_rtx = XEXP (use_body, 1);
+      break;
+    case PARALLEL:
+      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+	if (ix86_dep_by_shift_count_body (set_body,
+					  XVECEXP (use_body, 0, i)))
+	  return true;
+    default:
+      return false;
+      break;
+    }
+
+  if (shift_rtx
+      && (GET_CODE (shift_rtx) == ASHIFT
+	  || GET_CODE (shift_rtx) == LSHIFTRT
+	  || GET_CODE (shift_rtx) == ASHIFTRT
+	  || GET_CODE (shift_rtx) == ROTATE
+	  || GET_CODE (shift_rtx) == ROTATERT))
+    {
+      rtx shift_count = XEXP (shift_rtx, 1);
+
+      /* Return true if shift count is dest of SET_BODY.  */
+      if (REG_P (shift_count)
+	  && true_regnum (set_dest) == true_regnum (shift_count))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+   USE_INSN.  */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+				       PATTERN (use_insn));
+}
+
+/* Return TRUE or FALSE depending on whether the unary operator meets the
+   appropriate constraints.  */
+
+bool
+ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			rtx operands[2] ATTRIBUTE_UNUSED)
+{
+  /* If one of operands is memory, source and destination must match.  */
+  if ((MEM_P (operands[0])
+       || MEM_P (operands[1]))
+      && ! rtx_equal_p (operands[0], operands[1]))
+    return false;
+  return true;
+}
+
+/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
+   are ok, keeping in mind the possible movddup alternative.  */
+
+bool
+ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
+{
+  if (MEM_P (operands[0]))
+    return rtx_equal_p (operands[0], operands[1 + high]);
+  if (MEM_P (operands[1]) && MEM_P (operands[2]))
+    return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
+  return true;
+}
+
+/* Post-reload splitter for converting an SF or DFmode value in an
+   SSE register into an unsigned SImode.  */
+
+void
+ix86_split_convert_uns_si_sse (rtx operands[])
+{
+  enum machine_mode vecmode;
+  rtx value, large, zero_or_two31, input, two31, x;
+
+  large = operands[1];
+  zero_or_two31 = operands[2];
+  input = operands[3];
+  two31 = operands[4];
+  vecmode = GET_MODE (large);
+  value = gen_rtx_REG (vecmode, REGNO (operands[0]));
+
+  /* Load up the value into the low element.  We must ensure that the other
+     elements are valid floats -- zero is the easiest such value.  */
+  if (MEM_P (input))
+    {
+      if (vecmode == V4SFmode)
+	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
+      else
+	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
+    }
+  else
+    {
+      input = gen_rtx_REG (vecmode, REGNO (input));
+      emit_move_insn (value, CONST0_RTX (vecmode));
+      if (vecmode == V4SFmode)
+	emit_insn (gen_sse_movss (value, value, input));
+      else
+	emit_insn (gen_sse2_movsd (value, value, input));
+    }
+
+  emit_move_insn (large, two31);
+  emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
+
+  x = gen_rtx_fmt_ee (LE, vecmode, large, value);
+  emit_insn (gen_rtx_SET (VOIDmode, large, x));
+
+  x = gen_rtx_AND (vecmode, zero_or_two31, large);
+  emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
+
+  x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
+  emit_insn (gen_rtx_SET (VOIDmode, value, x));
+
+  large = gen_rtx_REG (V4SImode, REGNO (large));
+  emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
+
+  x = gen_rtx_REG (V4SImode, REGNO (value));
+  if (vecmode == V4SFmode)
+    emit_insn (gen_sse2_cvttps2dq (x, value));
+  else
+    emit_insn (gen_sse2_cvttpd2dq (x, value));
+  value = x;
+
+  emit_insn (gen_xorv4si3 (value, value, large));
+}
+
+/* Convert an unsigned DImode value into a DFmode, using only SSE.
+   Expects the 64-bit DImode to be supplied in a pair of integral
+   registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
+   -mfpmath=sse, !optimize_size only.  */
+
+void
+ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
+  rtx int_xmm, fp_xmm;
+  rtx biases, exponents;
+  rtx x;
+
+  int_xmm = gen_reg_rtx (V4SImode);
+  if (TARGET_INTER_UNIT_MOVES)
+    emit_insn (gen_movdi_to_sse (int_xmm, input));
+  else if (TARGET_SSE_SPLIT_REGS)
+    {
+      emit_clobber (int_xmm);
+      emit_move_insn (gen_lowpart (DImode, int_xmm), input);
+    }
+  else
+    {
+      x = gen_reg_rtx (V2DImode);
+      ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
+      emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
+    }
+
+  x = gen_rtx_CONST_VECTOR (V4SImode,
+			    gen_rtvec (4, GEN_INT (0x43300000UL),
+				       GEN_INT (0x45300000UL),
+				       const0_rtx, const0_rtx));
+  exponents = validize_mem (force_const_mem (V4SImode, x));
+
+  /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
+  emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
+
+  /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
+     yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
+     Similarly (0x45300000UL ## fp_value_hi_xmm) yields
+     (0x1.0p84 + double(fp_value_hi_xmm)).
+     Note these exponents differ by 32.  */
+
+  fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
+
+  /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
+     in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
+  real_ldexp (&bias_lo_rvt, &dconst1, 52);
+  real_ldexp (&bias_hi_rvt, &dconst1, 84);
+  biases = const_double_from_real_value (bias_lo_rvt, DFmode);
+  x = const_double_from_real_value (bias_hi_rvt, DFmode);
+  biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
+  biases = validize_mem (force_const_mem (V2DFmode, biases));
+  emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
+
+  /* Add the upper and lower DFmode values together.  */
+  if (TARGET_SSE3)
+    emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
+  else
+    {
+      x = copy_to_mode_reg (V2DFmode, fp_xmm);
+      emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
+      emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
+    }
+
+  ix86_expand_vector_extract (false, target, fp_xmm, 0);
+}
+
+/* Not used, but eases macroization of patterns.  */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+				  rtx input ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+
+/* Convert an unsigned SImode value into a DFmode.  Only currently used
+   for SSE, but applicable anywhere.  */
+
+void
+ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE TWO31r;
+  rtx x, fp;
+
+  x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
+			   NULL, 1, OPTAB_DIRECT);
+
+  fp = gen_reg_rtx (DFmode);
+  emit_insn (gen_floatsidf2 (fp, x));
+
+  real_ldexp (&TWO31r, &dconst1, 31);
+  x = const_double_from_real_value (TWO31r, DFmode);
+
+  x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
+  if (x != target)
+    emit_move_insn (target, x);
+}
+
+/* Convert a signed DImode value into a DFmode.  Only used for SSE in
+   32-bit mode; otherwise we have a direct convert instruction.  */
+
+void
+ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx fp_lo, fp_hi, x;
+
+  fp_lo = gen_reg_rtx (DFmode);
+  fp_hi = gen_reg_rtx (DFmode);
+
+  emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+  fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
+
+  ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
+
+  x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
+			   0, OPTAB_DIRECT);
+  if (x != target)
+    emit_move_insn (target, x);
+}
+
+/* Convert an unsigned SImode value into a SFmode, using only SSE.
+   For x86_32, -mfpmath=sse, !optimize_size only.  */
+void
+ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
+{
+  REAL_VALUE_TYPE ONE16r;
+  rtx fp_hi, fp_lo, int_hi, int_lo, x;
+
+  real_ldexp (&ONE16r, &dconst1, 16);
+  x = const_double_from_real_value (ONE16r, SFmode);
+  int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
+				      NULL, 0, OPTAB_DIRECT);
+  int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
+				      NULL, 0, OPTAB_DIRECT);
+  fp_hi = gen_reg_rtx (SFmode);
+  fp_lo = gen_reg_rtx (SFmode);
+  emit_insn (gen_floatsisf2 (fp_hi, int_hi));
+  emit_insn (gen_floatsisf2 (fp_lo, int_lo));
+  fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
+			       0, OPTAB_DIRECT);
+  fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
+			       0, OPTAB_DIRECT);
+  if (!rtx_equal_p (target, fp_hi))
+    emit_move_insn (target, fp_hi);
+}
+
+/* A subroutine of ix86_build_signbit_mask.  If VECT is true,
+   then replicate the value for all elements of the vector
+   register.  */
+
+rtx
+ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
+{
+  rtvec v;
+  switch (mode)
+    {
+    case V4SImode:
+      gcc_assert (vect);
+      v = gen_rtvec (4, value, value, value, value);
+      return gen_rtx_CONST_VECTOR (V4SImode, v);
+
+    case V2DImode:
+      gcc_assert (vect);
+      v = gen_rtvec (2, value, value);
+      return gen_rtx_CONST_VECTOR (V2DImode, v);
+
+    case V8SFmode:
+      if (vect)
+	v = gen_rtvec (8, value, value, value, value,
+		       value, value, value, value);
+      else
+	v = gen_rtvec (8, value, CONST0_RTX (SFmode),
+		       CONST0_RTX (SFmode), CONST0_RTX (SFmode),
+		       CONST0_RTX (SFmode), CONST0_RTX (SFmode),
+		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+      return gen_rtx_CONST_VECTOR (V8SFmode, v);
+
+    case V4SFmode:
+      if (vect)
+	v = gen_rtvec (4, value, value, value, value);
+      else
+	v = gen_rtvec (4, value, CONST0_RTX (SFmode),
+		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+      return gen_rtx_CONST_VECTOR (V4SFmode, v);
+
+    case V4DFmode:
+      if (vect)
+	v = gen_rtvec (4, value, value, value, value);
+      else
+	v = gen_rtvec (4, value, CONST0_RTX (DFmode),
+		       CONST0_RTX (DFmode), CONST0_RTX (DFmode));
+      return gen_rtx_CONST_VECTOR (V4DFmode, v);
+
+    case V2DFmode:
+      if (vect)
+	v = gen_rtvec (2, value, value);
+      else
+	v = gen_rtvec (2, value, CONST0_RTX (DFmode));
+      return gen_rtx_CONST_VECTOR (V2DFmode, v);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
+   and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
+   for an SSE register.  If VECT is true, then replicate the mask for
+   all elements of the vector register.  If INVERT is true, then create
+   a mask excluding the sign bit.  */
+
+rtx
+ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
+{
+  enum machine_mode vec_mode, imode;
+  HOST_WIDE_INT hi, lo;
+  int shift = 63;
+  rtx v;
+  rtx mask;
+
+  /* Find the sign bit, sign extended to 2*HWI.  */
+  switch (mode)
+    {
+    case V4SImode:
+    case V8SFmode:
+    case V4SFmode:
+      vec_mode = mode;
+      mode = GET_MODE_INNER (mode);
+      imode = SImode;
+      lo = 0x80000000, hi = lo < 0;
+      break;
+
+    case V2DImode:
+    case V4DFmode:
+    case V2DFmode:
+      vec_mode = mode;
+      mode = GET_MODE_INNER (mode);
+      imode = DImode;
+      if (HOST_BITS_PER_WIDE_INT >= 64)
+	lo = (HOST_WIDE_INT)1 << shift, hi = -1;
+      else
+	lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+      break;
+
+    case TImode:
+    case TFmode:
+      vec_mode = VOIDmode;
+      if (HOST_BITS_PER_WIDE_INT >= 64)
+	{
+	  imode = TImode;
+	  lo = 0, hi = (HOST_WIDE_INT)1 << shift;
+	}
+      else
+	{
+	  rtvec vec;
+
+	  imode = DImode;
+	  lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+
+	  if (invert)
+	    {
+	      lo = ~lo, hi = ~hi;
+	      v = constm1_rtx;
+	    }
+	  else
+	    v = const0_rtx;
+
+	  mask = immed_double_const (lo, hi, imode);
+
+	  vec = gen_rtvec (2, v, mask);
+	  v = gen_rtx_CONST_VECTOR (V2DImode, vec);
+	  v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
+
+	  return v;
+	}
+     break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (invert)
+    lo = ~lo, hi = ~hi;
+
+  /* Force this value into the low part of a fp vector constant.  */
+  mask = immed_double_const (lo, hi, imode);
+  mask = gen_lowpart (mode, mask);
+
+  if (vec_mode == VOIDmode)
+    return force_reg (mode, mask);
+
+  v = ix86_build_const_vector (vec_mode, vect, mask);
+  return force_reg (vec_mode, v);
+}
+
+/* Generate code for floating point ABS or NEG.  */
+
+void
+ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
+				rtx operands[])
+{
+  rtx mask, set, dst, src;
+  bool use_sse = false;
+  bool vector_mode = VECTOR_MODE_P (mode);
+  enum machine_mode vmode = mode;
+
+  if (vector_mode)
+    use_sse = true;
+  else if (mode == TFmode)
+    use_sse = true;
+  else if (TARGET_SSE_MATH)
+    {
+      use_sse = SSE_FLOAT_MODE_P (mode);
+      if (mode == SFmode)
+	vmode = V4SFmode;
+      else if (mode == DFmode)
+	vmode = V2DFmode;
+    }
+
+  /* NEG and ABS performed with SSE use bitwise mask operations.
+     Create the appropriate mask now.  */
+  if (use_sse)
+    mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
+  else
+    mask = NULL_RTX;
+
+  dst = operands[0];
+  src = operands[1];
+
+  set = gen_rtx_fmt_e (code, mode, src);
+  set = gen_rtx_SET (VOIDmode, dst, set);
+
+  if (mask)
+    {
+      rtx use, clob;
+      rtvec par;
+
+      use = gen_rtx_USE (VOIDmode, mask);
+      if (vector_mode)
+	par = gen_rtvec (2, set, use);
+      else
+	{
+          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+	  par = gen_rtvec (3, set, use, clob);
+        }
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+    }
+  else
+    emit_insn (set);
+}
+
+/* Expand a copysign operation.  Special case operand 0 being a constant.  */
+
+void
+ix86_expand_copysign (rtx operands[])
+{
+  enum machine_mode mode, vmode;
+  rtx dest, op0, op1, mask, nmask;
+
+  dest = operands[0];
+  op0 = operands[1];
+  op1 = operands[2];
+
+  mode = GET_MODE (dest);
+
+  if (mode == SFmode)
+    vmode = V4SFmode;
+  else if (mode == DFmode)
+    vmode = V2DFmode;
+  else
+    vmode = mode;
+
+  if (GET_CODE (op0) == CONST_DOUBLE)
+    {
+      rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
+
+      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
+	op0 = simplify_unary_operation (ABS, mode, op0, mode);
+
+      if (mode == SFmode || mode == DFmode)
+	{
+	  if (op0 == CONST0_RTX (mode))
+	    op0 = CONST0_RTX (vmode);
+	  else
+	    {
+	      rtx v = ix86_build_const_vector (vmode, false, op0);
+
+	      op0 = force_reg (vmode, v);
+	    }
+	}
+      else if (op0 != CONST0_RTX (mode))
+	op0 = force_reg (mode, op0);
+
+      mask = ix86_build_signbit_mask (vmode, 0, 0);
+
+      if (mode == SFmode)
+	copysign_insn = gen_copysignsf3_const;
+      else if (mode == DFmode)
+	copysign_insn = gen_copysigndf3_const;
+      else
+	copysign_insn = gen_copysigntf3_const;
+
+	emit_insn (copysign_insn (dest, op0, op1, mask));
+    }
+  else
+    {
+      rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
+
+      nmask = ix86_build_signbit_mask (vmode, 0, 1);
+      mask = ix86_build_signbit_mask (vmode, 0, 0);
+
+      if (mode == SFmode)
+	copysign_insn = gen_copysignsf3_var;
+      else if (mode == DFmode)
+	copysign_insn = gen_copysigndf3_var;
+      else
+	copysign_insn = gen_copysigntf3_var;
+
+      emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
+    }
+}
+
+/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
+   be a constant, and so has already been expanded into a vector constant.  */
+
+void
+ix86_split_copysign_const (rtx operands[])
+{
+  enum machine_mode mode, vmode;
+  rtx dest, op0, mask, x;
+
+  dest = operands[0];
+  op0 = operands[1];
+  mask = operands[3];
+
+  mode = GET_MODE (dest);
+  vmode = GET_MODE (mask);
+
+  dest = simplify_gen_subreg (vmode, dest, mode, 0);
+  x = gen_rtx_AND (vmode, dest, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+  if (op0 != CONST0_RTX (vmode))
+    {
+      x = gen_rtx_IOR (vmode, dest, op0);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+}
+
+/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
+   so we have to do two masks.  */
+
+void
+ix86_split_copysign_var (rtx operands[])
+{
+  enum machine_mode mode, vmode;
+  rtx dest, scratch, op0, op1, mask, nmask, x;
+
+  dest = operands[0];
+  scratch = operands[1];
+  op0 = operands[2];
+  op1 = operands[3];
+  nmask = operands[4];
+  mask = operands[5];
+
+  mode = GET_MODE (dest);
+  vmode = GET_MODE (mask);
+
+  if (rtx_equal_p (op0, op1))
+    {
+      /* Shouldn't happen often (it's useless, obviously), but when it does
+	 we'd generate incorrect code if we continue below.  */
+      emit_move_insn (dest, op0);
+      return;
+    }
+
+  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
+    {
+      gcc_assert (REGNO (op1) == REGNO (scratch));
+
+      x = gen_rtx_AND (vmode, scratch, mask);
+      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+      dest = mask;
+      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+      x = gen_rtx_NOT (vmode, dest);
+      x = gen_rtx_AND (vmode, x, op0);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else
+    {
+      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
+	{
+	  x = gen_rtx_AND (vmode, scratch, mask);
+	}
+      else						/* alternative 2,4 */
+	{
+          gcc_assert (REGNO (mask) == REGNO (scratch));
+          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
+	  x = gen_rtx_AND (vmode, scratch, op1);
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
+	{
+	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
+	  x = gen_rtx_AND (vmode, dest, nmask);
+	}
+      else						/* alternative 3,4 */
+	{
+          gcc_assert (REGNO (nmask) == REGNO (dest));
+	  dest = nmask;
+	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+	  x = gen_rtx_AND (vmode, dest, op0);
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+
+  x = gen_rtx_IOR (vmode, dest, scratch);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Return TRUE or FALSE depending on whether the first SET in INSN
+   has source and destination with matching CC modes, and that the
+   CC mode is at least as constrained as REQ_MODE.  */
+
+bool
+ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+  rtx set;
+  enum machine_mode set_mode;
+
+  set = PATTERN (insn);
+  if (GET_CODE (set) == PARALLEL)
+    set = XVECEXP (set, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
+
+  set_mode = GET_MODE (SET_DEST (set));
+  switch (set_mode)
+    {
+    case CCNOmode:
+      if (req_mode != CCNOmode
+	  && (req_mode != CCmode
+	      || XEXP (SET_SRC (set), 1) != const0_rtx))
+	return false;
+      break;
+    case CCmode:
+      if (req_mode == CCGCmode)
+	return false;
+      /* FALLTHRU */
+    case CCGCmode:
+      if (req_mode == CCGOCmode || req_mode == CCNOmode)
+	return false;
+      /* FALLTHRU */
+    case CCGOCmode:
+      if (req_mode == CCZmode)
+	return false;
+      /* FALLTHRU */
+    case CCZmode:
+      break;
+
+    case CCAmode:
+    case CCCmode:
+    case CCOmode:
+    case CCSmode:
+      if (set_mode != req_mode)
+	return false;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return GET_MODE (SET_SRC (set)) == set_mode;
+}
+
+/* Generate insn patterns to do an integer compare of OPERANDS.  */
+
+static rtx
+ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode cmpmode;
+  rtx tmp, flags;
+
+  cmpmode = SELECT_CC_MODE (code, op0, op1);
+  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
+
+  /* This is very simple, but making the interface the same as in the
+     FP case makes the rest of the code easier.  */
+  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
+
+  /* Return the test that should be put into the flags user, i.e.
+     the bcc, scc, or cmov instruction.  */
+  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
+}
+
+/* Figure out whether to use ordered or unordered fp comparisons.
+   Return the appropriate mode to use.  */
+
+enum machine_mode
+ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+  /* ??? In order to make all comparisons reversible, we do all comparisons
+     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
+     all forms trapping and nontrapping comparisons, we can make inequality
+     comparisons trapping again, since it results in better code when using
+     FCOM based compares.  */
+  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
+}
+
+enum machine_mode
+ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+
+  if (SCALAR_FLOAT_MODE_P (mode))
+    {
+      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+      return ix86_fp_compare_mode (code);
+    }
+
+  switch (code)
+    {
+      /* Only zero flag is needed.  */
+    case EQ:			/* ZF=0 */
+    case NE:			/* ZF!=0 */
+      return CCZmode;
+      /* Codes needing carry flag.  */
+    case GEU:			/* CF=0 */
+    case LTU:			/* CF=1 */
+      /* Detect overflow checks.  They need just the carry flag.  */
+      if (GET_CODE (op0) == PLUS
+	  && rtx_equal_p (op1, XEXP (op0, 0)))
+	return CCCmode;
+      else
+	return CCmode;
+    case GTU:			/* CF=0 & ZF=0 */
+    case LEU:			/* CF=1 | ZF=1 */
+      /* Detect overflow checks.  They need just the carry flag.  */
+      if (GET_CODE (op0) == MINUS
+	  && rtx_equal_p (op1, XEXP (op0, 0)))
+	return CCCmode;
+      else
+	return CCmode;
+      /* Codes possibly doable only with sign flag when
+         comparing against zero.  */
+    case GE:			/* SF=OF   or   SF=0 */
+    case LT:			/* SF<>OF  or   SF=1 */
+      if (op1 == const0_rtx)
+	return CCGOCmode;
+      else
+	/* For other cases Carry flag is not required.  */
+	return CCGCmode;
+      /* Codes doable only with sign flag when comparing
+         against zero, but we miss jump instruction for it
+         so we need to use relational tests against overflow
+         that thus needs to be zero.  */
+    case GT:			/* ZF=0 & SF=OF */
+    case LE:			/* ZF=1 | SF<>OF */
+      if (op1 == const0_rtx)
+	return CCNOmode;
+      else
+	return CCGCmode;
+      /* strcmp pattern do (use flags) and combine may ask us for proper
+	 mode.  */
+    case USE:
+      return CCmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = FLAGS_REG;
+  *p2 = FPSR_REG;
+  return true;
+}
+
+/* If two condition code modes are compatible, return a condition code
+   mode which is compatible with both.  Otherwise, return
+   VOIDmode.  */
+
+static enum machine_mode
+ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+  if (m1 == m2)
+    return m1;
+
+  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
+    return VOIDmode;
+
+  if ((m1 == CCGCmode && m2 == CCGOCmode)
+      || (m1 == CCGOCmode && m2 == CCGCmode))
+    return CCGCmode;
+
+  switch (m1)
+    {
+    default:
+      gcc_unreachable ();
+
+    case CCmode:
+    case CCGCmode:
+    case CCGOCmode:
+    case CCNOmode:
+    case CCAmode:
+    case CCCmode:
+    case CCOmode:
+    case CCSmode:
+    case CCZmode:
+      switch (m2)
+	{
+	default:
+	  return VOIDmode;
+
+	case CCmode:
+	case CCGCmode:
+	case CCGOCmode:
+	case CCNOmode:
+	case CCAmode:
+	case CCCmode:
+	case CCOmode:
+	case CCSmode:
+	case CCZmode:
+	  return CCmode;
+	}
+
+    case CCFPmode:
+    case CCFPUmode:
+      /* These are only compatible with themselves, which we already
+	 checked above.  */
+      return VOIDmode;
+    }
+}
+
+
+/* Return a comparison we can do and that it is equivalent to
+   swap_condition (code) apart possibly from orderedness.
+   But, never change orderedness if TARGET_IEEE_FP, returning
+   UNKNOWN in that case if necessary.  */
+
+static enum rtx_code
+ix86_fp_swap_condition (enum rtx_code code)
+{
+  switch (code)
+    {
+    case GT:                   /* GTU - CF=0 & ZF=0 */
+      return TARGET_IEEE_FP ? UNKNOWN : UNLT;
+    case GE:                   /* GEU - CF=0 */
+      return TARGET_IEEE_FP ? UNKNOWN : UNLE;
+    case UNLT:                 /* LTU - CF=1 */
+      return TARGET_IEEE_FP ? UNKNOWN : GT;
+    case UNLE:                 /* LEU - CF=1 | ZF=1 */
+      return TARGET_IEEE_FP ? UNKNOWN : GE;
+    default:
+      return swap_condition (code);
+    }
+}
+
+/* Return cost of comparison CODE using the best strategy for performance.
+   All following functions do use number of instructions as a cost metrics.
+   In future this should be tweaked to compute bytes for optimize_size and
+   take into account performance of various instructions on various CPUs.  */
+
+static int
+ix86_fp_comparison_cost (enum rtx_code code)
+{
+  int arith_cost;
+
+  /* The cost of code using bit-twiddling on %ah.  */
+  switch (code)
+    {
+    case UNLE:
+    case UNLT:
+    case LTGT:
+    case GT:
+    case GE:
+    case UNORDERED:
+    case ORDERED:
+    case UNEQ:
+      arith_cost = 4;
+      break;
+    case LT:
+    case NE:
+    case EQ:
+    case UNGE:
+      arith_cost = TARGET_IEEE_FP ? 5 : 4;
+      break;
+    case LE:
+    case UNGT:
+      arith_cost = TARGET_IEEE_FP ? 6 : 4;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (ix86_fp_comparison_strategy (code))
+    {
+    case IX86_FPCMP_COMI:
+      return arith_cost > 4 ? 3 : 2;
+    case IX86_FPCMP_SAHF:
+      return arith_cost > 4 ? 4 : 3;
+    default:
+      return arith_cost;
+    }
+}
+
+/* Return strategy to use for floating-point.  We assume that fcomi is always
+   preferrable where available, since that is also true when looking at size
+   (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
+
+enum ix86_fpcmp_strategy
+ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+  /* Do fcomi/sahf based test when profitable.  */
+
+  if (TARGET_CMOVE)
+    return IX86_FPCMP_COMI;
+
+  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
+    return IX86_FPCMP_SAHF;
+
+  return IX86_FPCMP_ARITH;
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+   to a fp comparison.  The operands are updated in place; the new
+   comparison code is returned.  */
+
+static enum rtx_code
+ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
+{
+  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
+  rtx op0 = *pop0, op1 = *pop1;
+  enum machine_mode op_mode = GET_MODE (op0);
+  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
+
+  /* All of the unordered compare instructions only work on registers.
+     The same is true of the fcomi compare instructions.  The XFmode
+     compare instructions require registers except when comparing
+     against zero or when converting operand 1 from fixed point to
+     floating point.  */
+
+  if (!is_sse
+      && (fpcmp_mode == CCFPUmode
+	  || (op_mode == XFmode
+	      && ! (standard_80387_constant_p (op0) == 1
+		    || standard_80387_constant_p (op1) == 1)
+	      && GET_CODE (op1) != FLOAT)
+	  || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
+    {
+      op0 = force_reg (op_mode, op0);
+      op1 = force_reg (op_mode, op1);
+    }
+  else
+    {
+      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
+	 things around if they appear profitable, otherwise force op0
+	 into a register.  */
+
+      if (standard_80387_constant_p (op0) == 0
+	  || (MEM_P (op0)
+	      && ! (standard_80387_constant_p (op1) == 0
+		    || MEM_P (op1))))
+	{
+	  enum rtx_code new_code = ix86_fp_swap_condition (code);
+	  if (new_code != UNKNOWN)
+	    {
+	      rtx tmp;
+	      tmp = op0, op0 = op1, op1 = tmp;
+	      code = new_code;
+	    }
+	}
+
+      if (!REG_P (op0))
+	op0 = force_reg (op_mode, op0);
+
+      if (CONSTANT_P (op1))
+	{
+	  int tmp = standard_80387_constant_p (op1);
+	  if (tmp == 0)
+	    op1 = validize_mem (force_const_mem (op_mode, op1));
+	  else if (tmp == 1)
+	    {
+	      if (TARGET_CMOVE)
+		op1 = force_reg (op_mode, op1);
+	    }
+	  else
+	    op1 = force_reg (op_mode, op1);
+	}
+    }
+
+  /* Try to rearrange the comparison to make it cheaper.  */
+  if (ix86_fp_comparison_cost (code)
+      > ix86_fp_comparison_cost (swap_condition (code))
+      && (REG_P (op1) || can_create_pseudo_p ()))
+    {
+      rtx tmp;
+      tmp = op0, op0 = op1, op1 = tmp;
+      code = swap_condition (code);
+      if (!REG_P (op0))
+	op0 = force_reg (op_mode, op0);
+    }
+
+  *pop0 = op0;
+  *pop1 = op1;
+  return code;
+}
+
+/* Convert comparison codes we use to represent FP comparison to integer
+   code that will result in proper branch.  Return UNKNOWN if no such code
+   is available.  */
+
+enum rtx_code
+ix86_fp_compare_code_to_integer (enum rtx_code code)
+{
+  switch (code)
+    {
+    case GT:
+      return GTU;
+    case GE:
+      return GEU;
+    case ORDERED:
+    case UNORDERED:
+      return code;
+      break;
+    case UNEQ:
+      return EQ;
+      break;
+    case UNLT:
+      return LTU;
+      break;
+    case UNLE:
+      return LEU;
+      break;
+    case LTGT:
+      return NE;
+      break;
+    default:
+      return UNKNOWN;
+    }
+}
+
+/* Generate insn patterns to do a floating point compare of OPERANDS.  */
+
+static rtx
+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
+{
+  enum machine_mode fpcmp_mode, intcmp_mode;
+  rtx tmp, tmp2;
+
+  fpcmp_mode = ix86_fp_compare_mode (code);
+  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
+
+  /* Do fcomi/sahf based test when profitable.  */
+  switch (ix86_fp_comparison_strategy (code))
+    {
+    case IX86_FPCMP_COMI:
+      intcmp_mode = fpcmp_mode;
+      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+			 tmp);
+      emit_insn (tmp);
+      break;
+
+    case IX86_FPCMP_SAHF:
+      intcmp_mode = fpcmp_mode;
+      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+			 tmp);
+
+      if (!scratch)
+	scratch = gen_reg_rtx (HImode);
+      tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
+      break;
+
+    case IX86_FPCMP_ARITH:
+      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
+      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+      if (!scratch)
+	scratch = gen_reg_rtx (HImode);
+      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+
+      /* In the unordered case, we have to check C2 for NaN's, which
+	 doesn't happen to work out to anything nice combination-wise.
+	 So do some bit twiddling on the value we've got in AH to come
+	 up with an appropriate set of condition codes.  */
+
+      intcmp_mode = CCNOmode;
+      switch (code)
+	{
+	case GT:
+	case UNGT:
+	  if (code == GT || !TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
+	      intcmp_mode = CCmode;
+	      code = GEU;
+	    }
+	  break;
+	case LT:
+	case UNLT:
+	  if (code == LT && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
+	      intcmp_mode = CCmode;
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
+	      code = NE;
+	    }
+	  break;
+	case GE:
+	case UNGE:
+	  if (code == GE || !TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
+	      code = NE;
+	    }
+	  break;
+	case LE:
+	case UNLE:
+	  if (code == LE && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+	      intcmp_mode = CCmode;
+	      code = LTU;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+	      code = NE;
+	    }
+	  break;
+	case EQ:
+	case UNEQ:
+	  if (code == EQ && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+	      intcmp_mode = CCmode;
+	      code = EQ;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+	      code = NE;
+	    }
+	  break;
+	case NE:
+	case LTGT:
+	  if (code == NE && TARGET_IEEE_FP)
+	    {
+	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+					     GEN_INT (0x40)));
+	      code = NE;
+	    }
+	  else
+	    {
+	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+	      code = EQ;
+	    }
+	  break;
+
+	case UNORDERED:
+	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+	  code = NE;
+	  break;
+	case ORDERED:
+	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+	  code = EQ;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+	break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  /* Return the test that should be put into the flags user, i.e.
+     the bcc, scc, or cmov instruction.  */
+  return gen_rtx_fmt_ee (code, VOIDmode,
+			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
+			 const0_rtx);
+}
+
+static rtx
+ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx ret;
+
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+    ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+
+  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+    {
+      gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
+      ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
+    }
+  else
+    ret = ix86_expand_int_compare (code, op0, op1);
+
+  return ret;
+}
+
+void
+ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx tmp;
+
+  switch (mode)
+    {
+    case SFmode:
+    case DFmode:
+    case XFmode:
+    case QImode:
+    case HImode:
+    case SImode:
+      simple:
+      tmp = ix86_expand_compare (code, op0, op1);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, label),
+				  pc_rtx);
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+      return;
+
+    case DImode:
+      if (TARGET_64BIT)
+	goto simple;
+    case TImode:
+      /* Expand DImode branch into multiple compare+branch.  */
+      {
+	rtx lo[2], hi[2], label2;
+	enum rtx_code code1, code2, code3;
+	enum machine_mode submode;
+
+	if (CONSTANT_P (op0) && !CONSTANT_P (op1))
+	  {
+	    tmp = op0, op0 = op1, op1 = tmp;
+	    code = swap_condition (code);
+	  }
+
+	split_double_mode (mode, &op0, 1, lo+0, hi+0);
+	split_double_mode (mode, &op1, 1, lo+1, hi+1);
+
+	submode = mode == DImode ? SImode : DImode;
+
+	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
+	   avoid two branches.  This costs one extra insn, so disable when
+	   optimizing for size.  */
+
+	if ((code == EQ || code == NE)
+	    && (!optimize_insn_for_size_p ()
+	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
+	  {
+	    rtx xor0, xor1;
+
+	    xor1 = hi[0];
+	    if (hi[1] != const0_rtx)
+	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
+				   NULL_RTX, 0, OPTAB_WIDEN);
+
+	    xor0 = lo[0];
+	    if (lo[1] != const0_rtx)
+	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
+				   NULL_RTX, 0, OPTAB_WIDEN);
+
+	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
+				NULL_RTX, 0, OPTAB_WIDEN);
+
+	    ix86_expand_branch (code, tmp, const0_rtx, label);
+	    return;
+	  }
+
+	/* Otherwise, if we are doing less-than or greater-or-equal-than,
+	   op1 is a constant and the low word is zero, then we can just
+	   examine the high word.  Similarly for low word -1 and
+	   less-or-equal-than or greater-than.  */
+
+	if (CONST_INT_P (hi[1]))
+	  switch (code)
+	    {
+	    case LT: case LTU: case GE: case GEU:
+	      if (lo[1] == const0_rtx)
+		{
+		  ix86_expand_branch (code, hi[0], hi[1], label);
+		  return;
+		}
+	      break;
+	    case LE: case LEU: case GT: case GTU:
+	      if (lo[1] == constm1_rtx)
+		{
+		  ix86_expand_branch (code, hi[0], hi[1], label);
+		  return;
+		}
+	      break;
+	    default:
+	      break;
+	    }
+
+	/* Otherwise, we need two or three jumps.  */
+
+	label2 = gen_label_rtx ();
+
+	code1 = code;
+	code2 = swap_condition (code);
+	code3 = unsigned_condition (code);
+
+	switch (code)
+	  {
+	  case LT: case GT: case LTU: case GTU:
+	    break;
+
+	  case LE:   code1 = LT;  code2 = GT;  break;
+	  case GE:   code1 = GT;  code2 = LT;  break;
+	  case LEU:  code1 = LTU; code2 = GTU; break;
+	  case GEU:  code1 = GTU; code2 = LTU; break;
+
+	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
+	  case NE:   code2 = UNKNOWN; break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	/*
+	 * a < b =>
+	 *    if (hi(a) < hi(b)) goto true;
+	 *    if (hi(a) > hi(b)) goto false;
+	 *    if (lo(a) < lo(b)) goto true;
+	 *  false:
+	 */
+
+	if (code1 != UNKNOWN)
+	  ix86_expand_branch (code1, hi[0], hi[1], label);
+	if (code2 != UNKNOWN)
+	  ix86_expand_branch (code2, hi[0], hi[1], label2);
+
+	ix86_expand_branch (code3, lo[0], lo[1], label);
+
+	if (code2 != UNKNOWN)
+	  emit_label (label2);
+	return;
+      }
+
+    default:
+      gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
+      goto simple;
+    }
+}
+
+/* Split branch based on floating point condition.  */
+void
+ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
+		      rtx target1, rtx target2, rtx tmp, rtx pushed)
+{
+  rtx condition;
+  rtx i;
+
+  if (target2 != pc_rtx)
+    {
+      rtx tmp = target2;
+      code = reverse_condition_maybe_unordered (code);
+      target2 = target1;
+      target1 = tmp;
+    }
+
+  condition = ix86_expand_fp_compare (code, op1, op2,
+				      tmp);
+
+  /* Remove pushed operand from stack.  */
+  if (pushed)
+    ix86_free_from_memory (GET_MODE (pushed));
+
+  i = emit_jump_insn (gen_rtx_SET
+		      (VOIDmode, pc_rtx,
+		       gen_rtx_IF_THEN_ELSE (VOIDmode,
+					     condition, target1, target2)));
+  if (split_branch_probability >= 0)
+    add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
+}
+
+void
+ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx ret;
+
+  gcc_assert (GET_MODE (dest) == QImode);
+
+  ret = ix86_expand_compare (code, op0, op1);
+  PUT_MODE (ret, QImode);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
+}
+
+/* Expand comparison setting or clearing carry flag.  Return true when
+   successful and set pop for the operation.  */
+static bool
+ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
+{
+  enum machine_mode mode =
+    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+
+  /* Do not handle double-mode compares that go through special path.  */
+  if (mode == (TARGET_64BIT ? TImode : DImode))
+    return false;
+
+  if (SCALAR_FLOAT_MODE_P (mode))
+    {
+      rtx compare_op, compare_seq;
+
+      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+
+      /* Shortcut:  following common codes never translate
+	 into carry flag compares.  */
+      if (code == EQ || code == NE || code == UNEQ || code == LTGT
+	  || code == ORDERED || code == UNORDERED)
+	return false;
+
+      /* These comparisons require zero flag; swap operands so they won't.  */
+      if ((code == GT || code == UNLE || code == LE || code == UNGT)
+	  && !TARGET_IEEE_FP)
+	{
+	  rtx tmp = op0;
+	  op0 = op1;
+	  op1 = tmp;
+	  code = swap_condition (code);
+	}
+
+      /* Try to expand the comparison and verify that we end up with
+	 carry flag based comparison.  This fails to be true only when
+	 we decide to expand comparison using arithmetic that is not
+	 too common scenario.  */
+      start_sequence ();
+      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
+      compare_seq = get_insns ();
+      end_sequence ();
+
+      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
+      else
+	code = GET_CODE (compare_op);
+
+      if (code != LTU && code != GEU)
+	return false;
+
+      emit_insn (compare_seq);
+      *pop = compare_op;
+      return true;
+    }
+
+  if (!INTEGRAL_MODE_P (mode))
+    return false;
+
+  switch (code)
+    {
+    case LTU:
+    case GEU:
+      break;
+
+    /* Convert a==0 into (unsigned)a<1.  */
+    case EQ:
+    case NE:
+      if (op1 != const0_rtx)
+	return false;
+      op1 = const1_rtx;
+      code = (code == EQ ? LTU : GEU);
+      break;
+
+    /* Convert a>b into b<a or a>=b-1.  */
+    case GTU:
+    case LEU:
+      if (CONST_INT_P (op1))
+	{
+	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+	  /* Bail out on overflow.  We still can swap operands but that
+	     would force loading of the constant into register.  */
+	  if (op1 == const0_rtx
+	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+	    return false;
+	  code = (code == GTU ? GEU : LTU);
+	}
+      else
+	{
+	  rtx tmp = op1;
+	  op1 = op0;
+	  op0 = tmp;
+	  code = (code == GTU ? LTU : GEU);
+	}
+      break;
+
+    /* Convert a>=0 into (unsigned)a<0x80000000.  */
+    case LT:
+    case GE:
+      if (mode == DImode || op1 != const0_rtx)
+	return false;
+      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+      code = (code == LT ? GEU : LTU);
+      break;
+    case LE:
+    case GT:
+      if (mode == DImode || op1 != constm1_rtx)
+	return false;
+      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+      code = (code == LE ? GEU : LTU);
+      break;
+
+    default:
+      return false;
+    }
+  /* Swapping operands may cause constant to appear as first operand.  */
+  if (!nonimmediate_operand (op0, VOIDmode))
+    {
+      if (!can_create_pseudo_p ())
+	return false;
+      op0 = force_reg (mode, op0);
+    }
+  *pop = ix86_expand_compare (code, op0, op1);
+  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
+  return true;
+}
+
+bool
+ix86_expand_int_movcc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]), compare_code;
+  rtx compare_seq, compare_op;
+  enum machine_mode mode = GET_MODE (operands[0]);
+  bool sign_bit_compare_p = false;
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  start_sequence ();
+  compare_op = ix86_expand_compare (code, op0, op1);
+  compare_seq = get_insns ();
+  end_sequence ();
+
+  compare_code = GET_CODE (compare_op);
+
+  if ((op1 == const0_rtx && (code == GE || code == LT))
+      || (op1 == constm1_rtx && (code == GT || code == LE)))
+    sign_bit_compare_p = true;
+
+  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
+     HImode insns, we'd be swallowed in word prefix ops.  */
+
+  if ((mode != HImode || TARGET_FAST_PREFIX)
+      && (mode != (TARGET_64BIT ? TImode : DImode))
+      && CONST_INT_P (operands[2])
+      && CONST_INT_P (operands[3]))
+    {
+      rtx out = operands[0];
+      HOST_WIDE_INT ct = INTVAL (operands[2]);
+      HOST_WIDE_INT cf = INTVAL (operands[3]);
+      HOST_WIDE_INT diff;
+
+      diff = ct - cf;
+      /*  Sign bit compares are better done using shifts than we do by using
+	  sbb.  */
+      if (sign_bit_compare_p
+	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
+	{
+	  /* Detect overlap between destination and compare sources.  */
+	  rtx tmp = out;
+
+          if (!sign_bit_compare_p)
+	    {
+	      rtx flags;
+	      bool fpcmp = false;
+
+	      compare_code = GET_CODE (compare_op);
+
+	      flags = XEXP (compare_op, 0);
+
+	      if (GET_MODE (flags) == CCFPmode
+		  || GET_MODE (flags) == CCFPUmode)
+		{
+		  fpcmp = true;
+		  compare_code
+		    = ix86_fp_compare_code_to_integer (compare_code);
+		}
+
+	      /* To simplify rest of code, restrict to the GEU case.  */
+	      if (compare_code == LTU)
+		{
+		  HOST_WIDE_INT tmp = ct;
+		  ct = cf;
+		  cf = tmp;
+		  compare_code = reverse_condition (compare_code);
+		  code = reverse_condition (code);
+		}
+	      else
+		{
+		  if (fpcmp)
+		    PUT_CODE (compare_op,
+			      reverse_condition_maybe_unordered
+			        (GET_CODE (compare_op)));
+		  else
+		    PUT_CODE (compare_op,
+			      reverse_condition (GET_CODE (compare_op)));
+		}
+	      diff = ct - cf;
+
+	      if (reg_overlap_mentioned_p (out, op0)
+		  || reg_overlap_mentioned_p (out, op1))
+		tmp = gen_reg_rtx (mode);
+
+	      if (mode == DImode)
+		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
+	      else
+		emit_insn (gen_x86_movsicc_0_m1	(gen_lowpart (SImode, tmp),
+						 flags, compare_op));
+	    }
+	  else
+	    {
+	      if (code == GT || code == GE)
+		code = reverse_condition (code);
+	      else
+		{
+		  HOST_WIDE_INT tmp = ct;
+		  ct = cf;
+		  cf = tmp;
+		  diff = ct - cf;
+		}
+	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
+	    }
+
+	  if (diff == 1)
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * [addl dest, ct]
+	       *
+	       * Size 5 - 8.
+	       */
+	      if (ct)
+		tmp = expand_simple_binop (mode, PLUS,
+					   tmp, GEN_INT (ct),
+					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+	  else if (cf == -1)
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * orl $ct, dest
+	       *
+	       * Size 8.
+	       */
+	      tmp = expand_simple_binop (mode, IOR,
+					 tmp, GEN_INT (ct),
+					 copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+	  else if (diff == -1 && ct)
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * notl dest
+	       * [addl dest, cf]
+	       *
+	       * Size 8 - 11.
+	       */
+	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+	      if (cf)
+		tmp = expand_simple_binop (mode, PLUS,
+					   copy_rtx (tmp), GEN_INT (cf),
+					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+	  else
+	    {
+	      /*
+	       * cmpl op0,op1
+	       * sbbl dest,dest
+	       * [notl dest]
+	       * andl cf - ct, dest
+	       * [addl dest, ct]
+	       *
+	       * Size 8 - 11.
+	       */
+
+	      if (cf == 0)
+		{
+		  cf = ct;
+		  ct = 0;
+		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+		}
+
+	      tmp = expand_simple_binop (mode, AND,
+					 copy_rtx (tmp),
+					 gen_int_mode (cf - ct, mode),
+					 copy_rtx (tmp), 1, OPTAB_DIRECT);
+	      if (ct)
+		tmp = expand_simple_binop (mode, PLUS,
+					   copy_rtx (tmp), GEN_INT (ct),
+					   copy_rtx (tmp), 1, OPTAB_DIRECT);
+	    }
+
+	  if (!rtx_equal_p (tmp, out))
+	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
+
+	  return true;
+	}
+
+      if (diff < 0)
+	{
+	  enum machine_mode cmp_mode = GET_MODE (op0);
+
+	  HOST_WIDE_INT tmp;
+	  tmp = ct, ct = cf, cf = tmp;
+	  diff = -diff;
+
+	  if (SCALAR_FLOAT_MODE_P (cmp_mode))
+	    {
+	      gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+	      /* We may be reversing unordered compare to normal compare, that
+		 is not valid in general (we may convert non-trapping condition
+		 to trapping one), however on i386 we currently emit all
+		 comparisons unordered.  */
+	      compare_code = reverse_condition_maybe_unordered (compare_code);
+	      code = reverse_condition_maybe_unordered (code);
+	    }
+	  else
+	    {
+	      compare_code = reverse_condition (compare_code);
+	      code = reverse_condition (code);
+	    }
+	}
+
+      compare_code = UNKNOWN;
+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
+	  && CONST_INT_P (op1))
+	{
+	  if (op1 == const0_rtx
+	      && (code == LT || code == GE))
+	    compare_code = code;
+	  else if (op1 == constm1_rtx)
+	    {
+	      if (code == LE)
+		compare_code = LT;
+	      else if (code == GT)
+		compare_code = GE;
+	    }
+	}
+
+      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
+      if (compare_code != UNKNOWN
+	  && GET_MODE (op0) == GET_MODE (out)
+	  && (cf == -1 || ct == -1))
+	{
+	  /* If lea code below could be used, only optimize
+	     if it results in a 2 insn sequence.  */
+
+	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
+		 || diff == 3 || diff == 5 || diff == 9)
+	      || (compare_code == LT && ct == -1)
+	      || (compare_code == GE && cf == -1))
+	    {
+	      /*
+	       * notl op1	(if necessary)
+	       * sarl $31, op1
+	       * orl cf, op1
+	       */
+	      if (ct != -1)
+		{
+		  cf = ct;
+		  ct = -1;
+		  code = reverse_condition (code);
+		}
+
+	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
+
+	      out = expand_simple_binop (mode, IOR,
+					 out, GEN_INT (cf),
+					 out, 1, OPTAB_DIRECT);
+	      if (out != operands[0])
+		emit_move_insn (operands[0], out);
+
+	      return true;
+	    }
+	}
+
+
+      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
+	   || diff == 3 || diff == 5 || diff == 9)
+	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
+	  && (mode != DImode
+	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
+	{
+	  /*
+	   * xorl dest,dest
+	   * cmpl op1,op2
+	   * setcc dest
+	   * lea cf(dest*(ct-cf)),dest
+	   *
+	   * Size 14.
+	   *
+	   * This also catches the degenerate setcc-only case.
+	   */
+
+	  rtx tmp;
+	  int nops;
+
+	  out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
+
+	  nops = 0;
+	  /* On x86_64 the lea instruction operates on Pmode, so we need
+	     to get arithmetics done in proper mode to match.  */
+	  if (diff == 1)
+	    tmp = copy_rtx (out);
+	  else
+	    {
+	      rtx out1;
+	      out1 = copy_rtx (out);
+	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
+	      nops++;
+	      if (diff & 1)
+		{
+		  tmp = gen_rtx_PLUS (mode, tmp, out1);
+		  nops++;
+		}
+	    }
+	  if (cf != 0)
+	    {
+	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
+	      nops++;
+	    }
+	  if (!rtx_equal_p (tmp, out))
+	    {
+	      if (nops == 1)
+		out = force_operand (tmp, copy_rtx (out));
+	      else
+		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
+	    }
+	  if (!rtx_equal_p (out, operands[0]))
+	    emit_move_insn (operands[0], copy_rtx (out));
+
+	  return true;
+	}
+
+      /*
+       * General case:			Jumpful:
+       *   xorl dest,dest		cmpl op1, op2
+       *   cmpl op1, op2		movl ct, dest
+       *   setcc dest			jcc 1f
+       *   decl dest			movl cf, dest
+       *   andl (cf-ct),dest		1:
+       *   addl ct,dest
+       *
+       * Size 20.			Size 14.
+       *
+       * This is reasonably steep, but branch mispredict costs are
+       * high on modern cpus, so consider failing only if optimizing
+       * for space.
+       */
+
+      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+	  && BRANCH_COST (optimize_insn_for_speed_p (),
+		  	  false) >= 2)
+	{
+	  if (cf == 0)
+	    {
+	      enum machine_mode cmp_mode = GET_MODE (op0);
+
+	      cf = ct;
+	      ct = 0;
+
+	      if (SCALAR_FLOAT_MODE_P (cmp_mode))
+		{
+		  gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+		  /* We may be reversing unordered compare to normal compare,
+		     that is not valid in general (we may convert non-trapping
+		     condition to trapping one), however on i386 we currently
+		     emit all comparisons unordered.  */
+		  code = reverse_condition_maybe_unordered (code);
+		}
+	      else
+		{
+		  code = reverse_condition (code);
+		  if (compare_code != UNKNOWN)
+		    compare_code = reverse_condition (compare_code);
+		}
+	    }
+
+	  if (compare_code != UNKNOWN)
+	    {
+	      /* notl op1	(if needed)
+		 sarl $31, op1
+		 andl (cf-ct), op1
+		 addl ct, op1
+
+		 For x < 0 (resp. x <= -1) there will be no notl,
+		 so if possible swap the constants to get rid of the
+		 complement.
+		 True/false will be -1/0 while code below (store flag
+		 followed by decrement) is 0/-1, so the constants need
+		 to be exchanged once more.  */
+
+	      if (compare_code == GE || !cf)
+		{
+		  code = reverse_condition (code);
+		  compare_code = LT;
+		}
+	      else
+		{
+		  HOST_WIDE_INT tmp = cf;
+		  cf = ct;
+		  ct = tmp;
+		}
+
+	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
+	    }
+	  else
+	    {
+	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
+
+	      out = expand_simple_binop (mode, PLUS, copy_rtx (out),
+					 constm1_rtx,
+					 copy_rtx (out), 1, OPTAB_DIRECT);
+	    }
+
+	  out = expand_simple_binop (mode, AND, copy_rtx (out),
+				     gen_int_mode (cf - ct, mode),
+				     copy_rtx (out), 1, OPTAB_DIRECT);
+	  if (ct)
+	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
+				       copy_rtx (out), 1, OPTAB_DIRECT);
+	  if (!rtx_equal_p (out, operands[0]))
+	    emit_move_insn (operands[0], copy_rtx (out));
+
+	  return true;
+	}
+    }
+
+  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+    {
+      /* Try a few things more with specific constants and a variable.  */
+
+      optab op;
+      rtx var, orig_out, out, tmp;
+
+      if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
+	return false;
+
+      /* If one of the two operands is an interesting constant, load a
+	 constant with the above and mask it in with a logical operation.  */
+
+      if (CONST_INT_P (operands[2]))
+	{
+	  var = operands[3];
+	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
+	    operands[3] = constm1_rtx, op = and_optab;
+	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
+	    operands[3] = const0_rtx, op = ior_optab;
+	  else
+	    return false;
+	}
+      else if (CONST_INT_P (operands[3]))
+	{
+	  var = operands[2];
+	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
+	    operands[2] = constm1_rtx, op = and_optab;
+	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
+	    operands[2] = const0_rtx, op = ior_optab;
+	  else
+	    return false;
+	}
+      else
+        return false;
+
+      orig_out = operands[0];
+      tmp = gen_reg_rtx (mode);
+      operands[0] = tmp;
+
+      /* Recurse to get the constant loaded.  */
+      if (ix86_expand_int_movcc (operands) == 0)
+        return false;
+
+      /* Mask in the interesting variable.  */
+      out = expand_binop (mode, op, var, tmp, orig_out, 0,
+			  OPTAB_WIDEN);
+      if (!rtx_equal_p (out, orig_out))
+	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
+
+      return true;
+    }
+
+  /*
+   * For comparison with above,
+   *
+   * movl cf,dest
+   * movl ct,tmp
+   * cmpl op1,op2
+   * cmovcc tmp,dest
+   *
+   * Size 15.
+   */
+
+  if (! nonimmediate_operand (operands[2], mode))
+    operands[2] = force_reg (mode, operands[2]);
+  if (! nonimmediate_operand (operands[3], mode))
+    operands[3] = force_reg (mode, operands[3]);
+
+  if (! register_operand (operands[2], VOIDmode)
+      && (mode == QImode
+          || ! register_operand (operands[3], VOIDmode)))
+    operands[2] = force_reg (mode, operands[2]);
+
+  if (mode == QImode
+      && ! register_operand (operands[3], VOIDmode))
+    operands[3] = force_reg (mode, operands[3]);
+
+  emit_insn (compare_seq);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_IF_THEN_ELSE (mode,
+						compare_op, operands[2],
+						operands[3])));
+  return true;
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+   to an sse comparison with a mask result.  Thus we differ a bit from
+   ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+   The DEST operand exists to help determine whether to commute commutative
+   operators.  The POP0/POP1 operands are updated in place.  The new
+   comparison code is returned, or UNKNOWN if not implementable.  */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+				  rtx *pop0, rtx *pop1)
+{
+  rtx tmp;
+
+  /* AVX supports all the needed comparisons, no need to swap arguments
+     nor help reload.  */
+  if (TARGET_AVX)
+    return code;
+
+  switch (code)
+    {
+    case LTGT:
+    case UNEQ:
+      /* We have no LTGT as an operator.  We could implement it with
+	 NE & ORDERED, but this requires an extra temporary.  It's
+	 not clear that it's worth it.  */
+      return UNKNOWN;
+
+    case LT:
+    case LE:
+    case UNGT:
+    case UNGE:
+      /* These are supported directly.  */
+      break;
+
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case ORDERED:
+      /* For commutative operators, try to canonicalize the destination
+	 operand to be first in the comparison - this helps reload to
+	 avoid extra moves.  */
+      if (!dest || !rtx_equal_p (dest, *pop1))
+	break;
+      /* FALLTHRU */
+
+    case GE:
+    case GT:
+    case UNLE:
+    case UNLT:
+      /* These are not supported directly.  Swap the comparison operands
+	 to transform into something that is supported.  */
+      tmp = *pop0;
+      *pop0 = *pop1;
+      *pop1 = tmp;
+      code = swap_condition (code);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+   semantics.  Note that this is IEEE safe, as long as we don't
+   interchange the operands.
+
+   Returns FALSE if this conditional move doesn't match a MIN/MAX,
+   and TRUE if the operation is successful and instructions are emitted.  */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+			   rtx cmp_op1, rtx if_true, rtx if_false)
+{
+  enum machine_mode mode;
+  bool is_min;
+  rtx tmp;
+
+  if (code == LT)
+    ;
+  else if (code == UNGE)
+    {
+      tmp = if_true;
+      if_true = if_false;
+      if_false = tmp;
+    }
+  else
+    return false;
+
+  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+    is_min = true;
+  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+    is_min = false;
+  else
+    return false;
+
+  mode = GET_MODE (dest);
+
+  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+     but MODE may be a vector mode and thus not appropriate.  */
+  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+    {
+      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+      rtvec v;
+
+      if_true = force_reg (mode, if_true);
+      v = gen_rtvec (2, if_true, if_false);
+      tmp = gen_rtx_UNSPEC (mode, v, u);
+    }
+  else
+    {
+      code = is_min ? SMIN : SMAX;
+      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+  return true;
+}
+
+/* Expand an sse vector comparison.  Return the register with the result.  */
+
+static rtx
+ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+		     rtx op_true, rtx op_false)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  rtx x;
+
+  cmp_op0 = force_reg (mode, cmp_op0);
+  if (!nonimmediate_operand (cmp_op1, mode))
+    cmp_op1 = force_reg (mode, cmp_op1);
+
+  if (optimize
+      || reg_overlap_mentioned_p (dest, op_true)
+      || reg_overlap_mentioned_p (dest, op_false))
+    dest = gen_reg_rtx (mode);
+
+  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+  return dest;
+}
+
+/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
+   operations.  This is used for both scalar and vector conditional moves.  */
+
+static void
+ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  rtx t2, t3, x;
+
+  if (op_false == CONST0_RTX (mode))
+    {
+      op_true = force_reg (mode, op_true);
+      x = gen_rtx_AND (mode, cmp, op_true);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else if (op_true == CONST0_RTX (mode))
+    {
+      op_false = force_reg (mode, op_false);
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else if (TARGET_XOP)
+    {
+      op_true = force_reg (mode, op_true);
+
+      if (!nonimmediate_operand (op_false, mode))
+	op_false = force_reg (mode, op_false);
+
+      emit_insn (gen_rtx_SET (mode, dest,
+			      gen_rtx_IF_THEN_ELSE (mode, cmp,
+						    op_true,
+						    op_false)));
+    }
+  else
+    {
+      op_true = force_reg (mode, op_true);
+      op_false = force_reg (mode, op_false);
+
+      t2 = gen_reg_rtx (mode);
+      if (optimize)
+	t3 = gen_reg_rtx (mode);
+      else
+	t3 = dest;
+
+      x = gen_rtx_AND (mode, op_true, cmp);
+      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+      x = gen_rtx_IOR (mode, t3, t2);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+}
+
+/* Expand a floating-point conditional move.  Return true if successful.  */
+
+bool
+ix86_expand_fp_movcc (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx tmp, compare_op;
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+    {
+      enum machine_mode cmode;
+
+      /* Since we've no cmove for sse registers, don't force bad register
+	 allocation just to gain access to it.  Deny movcc when the
+	 comparison mode doesn't match the move mode.  */
+      cmode = GET_MODE (op0);
+      if (cmode == VOIDmode)
+	cmode = GET_MODE (op1);
+      if (cmode != mode)
+	return false;
+
+      code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
+      if (code == UNKNOWN)
+	return false;
+
+      if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
+				     operands[2], operands[3]))
+	return true;
+
+      tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
+				 operands[2], operands[3]);
+      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
+      return true;
+    }
+
+  /* The floating point conditional move instructions don't directly
+     support conditions resulting from a signed integer comparison.  */
+
+  compare_op = ix86_expand_compare (code, op0, op1);
+  if (!fcmov_comparison_operator (compare_op, VOIDmode))
+    {
+      tmp = gen_reg_rtx (QImode);
+      ix86_expand_setcc (tmp, code, op0, op1);
+
+      compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
+						operands[2], operands[3])));
+
+  return true;
+}
+
+/* Expand a floating-point vector conditional move; a vcond operation
+   rather than a movcc operation.  */
+
+bool
+ix86_expand_fp_vcond (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[3]);
+  rtx cmp;
+
+  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+					   &operands[4], &operands[5]);
+  if (code == UNKNOWN)
+    {
+      rtx temp;
+      switch (GET_CODE (operands[3]))
+	{
+	case LTGT:
+	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = AND;
+	  break;
+	case UNEQ:
+	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = IOR;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+				 OPTAB_DIRECT);
+      ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+      return true;
+    }
+
+  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
+				 operands[5], operands[1], operands[2]))
+    return true;
+
+  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
+			     operands[1], operands[2]);
+  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+  return true;
+}
+
+/* Expand a signed/unsigned integral vector conditional move.  */
+
+bool
+ix86_expand_int_vcond (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum rtx_code code = GET_CODE (operands[3]);
+  bool negate = false;
+  rtx x, cop0, cop1;
+
+  cop0 = operands[4];
+  cop1 = operands[5];
+
+  /* XOP supports all of the comparisons on all vector int types.  */
+  if (!TARGET_XOP)
+    {
+      /* Canonicalize the comparison to EQ, GT, GTU.  */
+      switch (code)
+	{
+	case EQ:
+	case GT:
+	case GTU:
+	  break;
+
+	case NE:
+	case LE:
+	case LEU:
+	  code = reverse_condition (code);
+	  negate = true;
+	  break;
+
+	case GE:
+	case GEU:
+	  code = reverse_condition (code);
+	  negate = true;
+	  /* FALLTHRU */
+
+	case LT:
+	case LTU:
+	  code = swap_condition (code);
+	  x = cop0, cop0 = cop1, cop1 = x;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Only SSE4.1/SSE4.2 supports V2DImode.  */
+      if (mode == V2DImode)
+	{
+	  switch (code)
+	    {
+	    case EQ:
+	      /* SSE4.1 supports EQ.  */
+	      if (!TARGET_SSE4_1)
+		return false;
+	      break;
+
+	    case GT:
+	    case GTU:
+	      /* SSE4.2 supports GT/GTU.  */
+	      if (!TARGET_SSE4_2)
+		return false;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Unsigned parallel compare is not supported by the hardware.
+	 Play some tricks to turn this into a signed comparison
+	 against 0.  */
+      if (code == GTU)
+	{
+	  cop0 = force_reg (mode, cop0);
+
+	  switch (mode)
+	    {
+	    case V4SImode:
+	    case V2DImode:
+		{
+		  rtx t1, t2, mask;
+		  rtx (*gen_sub3) (rtx, rtx, rtx);
+
+		  /* Subtract (-(INT MAX) - 1) from both operands to make
+		     them signed.  */
+		  mask = ix86_build_signbit_mask (mode, true, false);
+		  gen_sub3 = (mode == V4SImode
+			      ? gen_subv4si3 : gen_subv2di3);
+		  t1 = gen_reg_rtx (mode);
+		  emit_insn (gen_sub3 (t1, cop0, mask));
+
+		  t2 = gen_reg_rtx (mode);
+		  emit_insn (gen_sub3 (t2, cop1, mask));
+
+		  cop0 = t1;
+		  cop1 = t2;
+		  code = GT;
+		}
+	      break;
+
+	    case V16QImode:
+	    case V8HImode:
+	      /* Perform a parallel unsigned saturating subtraction.  */
+	      x = gen_reg_rtx (mode);
+	      emit_insn (gen_rtx_SET (VOIDmode, x,
+				      gen_rtx_US_MINUS (mode, cop0, cop1)));
+
+	      cop0 = x;
+	      cop1 = CONST0_RTX (mode);
+	      code = EQ;
+	      negate = !negate;
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+
+  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+			   operands[1+negate], operands[2-negate]);
+
+  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
+			 operands[2-negate]);
+  return true;
+}
+
+/* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
+   true if we should do zero extension, else sign extension.  HIGH_P is
+   true if we want the N/2 high elements, else the low elements.  */
+
+void
+ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx, rtx);
+  rtx se, dest;
+
+  switch (imode)
+    {
+    case V16QImode:
+      if (high_p)
+        unpack = gen_vec_interleave_highv16qi;
+      else
+        unpack = gen_vec_interleave_lowv16qi;
+      break;
+    case V8HImode:
+      if (high_p)
+        unpack = gen_vec_interleave_highv8hi;
+      else
+        unpack = gen_vec_interleave_lowv8hi;
+      break;
+    case V4SImode:
+      if (high_p)
+        unpack = gen_vec_interleave_highv4si;
+      else
+        unpack = gen_vec_interleave_lowv4si;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = gen_lowpart (imode, operands[0]);
+
+  if (unsigned_p)
+    se = force_reg (imode, CONST0_RTX (imode));
+  else
+    se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+                              operands[1], pc_rtx, pc_rtx);
+
+  emit_insn (unpack (dest, operands[1], se));
+}
+
+/* This function performs the same task as ix86_expand_sse_unpack,
+   but with SSE4.1 instructions.  */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx);
+  rtx src, dest;
+
+  switch (imode)
+    {
+    case V16QImode:
+      if (unsigned_p)
+	unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+      else
+	unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+      break;
+    case V8HImode:
+      if (unsigned_p)
+	unpack = gen_sse4_1_zero_extendv4hiv4si2;
+      else
+	unpack = gen_sse4_1_sign_extendv4hiv4si2;
+      break;
+    case V4SImode:
+      if (unsigned_p)
+	unpack = gen_sse4_1_zero_extendv2siv2di2;
+      else
+	unpack = gen_sse4_1_sign_extendv2siv2di2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = operands[0];
+  if (high_p)
+    {
+      /* Shift higher 8 bytes to lower 8 bytes.  */
+      src = gen_reg_rtx (imode);
+      emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
+				     gen_lowpart (V1TImode, operands[1]),
+				     GEN_INT (64)));
+    }
+  else
+    src = operands[1];
+
+  emit_insn (unpack (dest, src));
+}
+
+/* Expand conditional increment or decrement using adb/sbb instructions.
+   The default case using setcc followed by the conditional move can be
+   done by generic code.  */
+bool
+ix86_expand_int_addcc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx flags;
+  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
+  rtx compare_op;
+  rtx val = const0_rtx;
+  bool fpcmp = false;
+  enum machine_mode mode;
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  if (operands[3] != const1_rtx
+      && operands[3] != constm1_rtx)
+    return false;
+  if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
+     return false;
+  code = GET_CODE (compare_op);
+
+  flags = XEXP (compare_op, 0);
+
+  if (GET_MODE (flags) == CCFPmode
+      || GET_MODE (flags) == CCFPUmode)
+    {
+      fpcmp = true;
+      code = ix86_fp_compare_code_to_integer (code);
+    }
+
+  if (code != LTU)
+    {
+      val = constm1_rtx;
+      if (fpcmp)
+	PUT_CODE (compare_op,
+		  reverse_condition_maybe_unordered
+		    (GET_CODE (compare_op)));
+      else
+	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+    }
+
+  mode = GET_MODE (operands[0]);
+
+  /* Construct either adc or sbb insn.  */
+  if ((code == LTU) == (operands[3] == constm1_rtx))
+    {
+      switch (mode)
+	{
+	  case QImode:
+	    insn = gen_subqi3_carry;
+	    break;
+	  case HImode:
+	    insn = gen_subhi3_carry;
+	    break;
+	  case SImode:
+	    insn = gen_subsi3_carry;
+	    break;
+	  case DImode:
+	    insn = gen_subdi3_carry;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (mode)
+	{
+	  case QImode:
+	    insn = gen_addqi3_carry;
+	    break;
+	  case HImode:
+	    insn = gen_addhi3_carry;
+	    break;
+	  case SImode:
+	    insn = gen_addsi3_carry;
+	    break;
+	  case DImode:
+	    insn = gen_adddi3_carry;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+    }
+  emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
+
+  return true;
+}
+
+
+/* Split operands 0 and 1 into half-mode parts.  Similar to split_double_mode,
+   but works for floating pointer parameters and nonoffsetable memories.
+   For pushes, it returns just stack offsets; the values will be saved
+   in the right order.  Maximally three parts are generated.  */
+
+static int
+ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
+{
+  int size;
+
+  if (!TARGET_64BIT)
+    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
+  else
+    size = (GET_MODE_SIZE (mode) + 4) / 8;
+
+  gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
+  gcc_assert (size >= 2 && size <= 4);
+
+  /* Optimize constant pool reference to immediates.  This is used by fp
+     moves, that force all constants to memory to allow combining.  */
+  if (MEM_P (operand) && MEM_READONLY_P (operand))
+    {
+      rtx tmp = maybe_get_pool_constant (operand);
+      if (tmp)
+	operand = tmp;
+    }
+
+  if (MEM_P (operand) && !offsettable_memref_p (operand))
+    {
+      /* The only non-offsetable memories we handle are pushes.  */
+      int ok = push_operand (operand, VOIDmode);
+
+      gcc_assert (ok);
+
+      operand = copy_rtx (operand);
+      PUT_MODE (operand, Pmode);
+      parts[0] = parts[1] = parts[2] = parts[3] = operand;
+      return size;
+    }
+
+  if (GET_CODE (operand) == CONST_VECTOR)
+    {
+      enum machine_mode imode = int_mode_for_mode (mode);
+      /* Caution: if we looked through a constant pool memory above,
+	 the operand may actually have a different mode now.  That's
+	 ok, since we want to pun this all the way back to an integer.  */
+      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
+      gcc_assert (operand != NULL);
+      mode = imode;
+    }
+
+  if (!TARGET_64BIT)
+    {
+      if (mode == DImode)
+	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
+      else
+	{
+	  int i;
+
+	  if (REG_P (operand))
+	    {
+	      gcc_assert (reload_completed);
+	      for (i = 0; i < size; i++)
+		parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
+	    }
+	  else if (offsettable_memref_p (operand))
+	    {
+	      operand = adjust_address (operand, SImode, 0);
+	      parts[0] = operand;
+	      for (i = 1; i < size; i++)
+		parts[i] = adjust_address (operand, SImode, 4 * i);
+	    }
+	  else if (GET_CODE (operand) == CONST_DOUBLE)
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l[4];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+	      switch (mode)
+		{
+		case TFmode:
+		  real_to_target (l, &r, mode);
+		  parts[3] = gen_int_mode (l[3], SImode);
+		  parts[2] = gen_int_mode (l[2], SImode);
+		  break;
+		case XFmode:
+		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+		  parts[2] = gen_int_mode (l[2], SImode);
+		  break;
+		case DFmode:
+		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      parts[1] = gen_int_mode (l[1], SImode);
+	      parts[0] = gen_int_mode (l[0], SImode);
+	    }
+	  else
+	    gcc_unreachable ();
+	}
+    }
+  else
+    {
+      if (mode == TImode)
+	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
+      if (mode == XFmode || mode == TFmode)
+	{
+	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
+	  if (REG_P (operand))
+	    {
+	      gcc_assert (reload_completed);
+	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
+	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
+	    }
+	  else if (offsettable_memref_p (operand))
+	    {
+	      operand = adjust_address (operand, DImode, 0);
+	      parts[0] = operand;
+	      parts[1] = adjust_address (operand, upper_mode, 8);
+	    }
+	  else if (GET_CODE (operand) == CONST_DOUBLE)
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l[4];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+	      real_to_target (l, &r, mode);
+
+	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
+	      if (HOST_BITS_PER_WIDE_INT >= 64)
+	        parts[0]
+		  = gen_int_mode
+		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
+		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
+		       DImode);
+	      else
+	        parts[0] = immed_double_const (l[0], l[1], DImode);
+
+	      if (upper_mode == SImode)
+	        parts[1] = gen_int_mode (l[2], SImode);
+	      else if (HOST_BITS_PER_WIDE_INT >= 64)
+	        parts[1]
+		  = gen_int_mode
+		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
+		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
+		       DImode);
+	      else
+	        parts[1] = immed_double_const (l[2], l[3], DImode);
+	    }
+	  else
+	    gcc_unreachable ();
+	}
+    }
+
+  return size;
+}
+
+/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
+   Return false when normal moves are needed; true when all required
+   insns have been emitted.  Operands 2-4 contain the input values
+   int the correct order; operands 5-7 contain the output values.  */
+
+void
+ix86_split_long_move (rtx operands[])
+{
+  rtx part[2][4];
+  int nparts, i, j;
+  int push = 0;
+  int collisions = 0;
+  enum machine_mode mode = GET_MODE (operands[0]);
+  bool collisionparts[4];
+
+  /* The DFmode expanders may ask us to move double.
+     For 64bit target this is single move.  By hiding the fact
+     here we simplify i386.md splitters.  */
+  if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
+    {
+      /* Optimize constant pool reference to immediates.  This is used by
+	 fp moves, that force all constants to memory to allow combining.  */
+
+      if (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
+	operands[1] = get_pool_constant (XEXP (operands[1], 0));
+      if (push_operand (operands[0], VOIDmode))
+	{
+	  operands[0] = copy_rtx (operands[0]);
+	  PUT_MODE (operands[0], Pmode);
+	}
+      else
+        operands[0] = gen_lowpart (DImode, operands[0]);
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      emit_move_insn (operands[0], operands[1]);
+      return;
+    }
+
+  /* The only non-offsettable memory we handle is push.  */
+  if (push_operand (operands[0], VOIDmode))
+    push = 1;
+  else
+    gcc_assert (!MEM_P (operands[0])
+		|| offsettable_memref_p (operands[0]));
+
+  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
+  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
+
+  /* When emitting push, take care for source operands on the stack.  */
+  if (push && MEM_P (operands[1])
+      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+    {
+      rtx src_base = XEXP (part[1][nparts - 1], 0);
+
+      /* Compensate for the stack decrement by 4.  */
+      if (!TARGET_64BIT && nparts == 3
+	  && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
+	src_base = plus_constant (src_base, 4);
+
+      /* src_base refers to the stack pointer and is
+	 automatically decreased by emitted push.  */
+      for (i = 0; i < nparts; i++)
+	part[1][i] = change_address (part[1][i],
+				     GET_MODE (part[1][i]), src_base);
+    }
+
+  /* We need to do copy in the right order in case an address register
+     of the source overlaps the destination.  */
+  if (REG_P (part[0][0]) && MEM_P (part[1][0]))
+    {
+      rtx tmp;
+
+      for (i = 0; i < nparts; i++)
+	{
+	  collisionparts[i]
+	    = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
+	  if (collisionparts[i])
+	    collisions++;
+	}
+
+      /* Collision in the middle part can be handled by reordering.  */
+      if (collisions == 1 && nparts == 3 && collisionparts [1])
+	{
+	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+	}
+      else if (collisions == 1
+	       && nparts == 4
+	       && (collisionparts [1] || collisionparts [2]))
+	{
+	  if (collisionparts [1])
+	    {
+	      tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+	      tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+	    }
+	  else
+	    {
+	      tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
+	      tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
+	    }
+	}
+
+      /* If there are more collisions, we can't handle it by reordering.
+	 Do an lea to the last part and use only one colliding move.  */
+      else if (collisions > 1)
+	{
+	  rtx base;
+
+	  collisions = 1;
+
+	  base = part[0][nparts - 1];
+
+	  /* Handle the case when the last part isn't valid for lea.
+	     Happens in 64-bit mode storing the 12-byte XFmode.  */
+	  if (GET_MODE (base) != Pmode)
+	    base = gen_rtx_REG (Pmode, REGNO (base));
+
+	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+	  part[1][0] = replace_equiv_address (part[1][0], base);
+	  for (i = 1; i < nparts; i++)
+	    {
+	      tmp = plus_constant (base, UNITS_PER_WORD * i);
+	      part[1][i] = replace_equiv_address (part[1][i], tmp);
+	    }
+	}
+    }
+
+  if (push)
+    {
+      if (!TARGET_64BIT)
+	{
+	  if (nparts == 3)
+	    {
+	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+                emit_insn (gen_addsi3 (stack_pointer_rtx,
+				       stack_pointer_rtx, GEN_INT (-4)));
+	      emit_move_insn (part[0][2], part[1][2]);
+	    }
+	  else if (nparts == 4)
+	    {
+	      emit_move_insn (part[0][3], part[1][3]);
+	      emit_move_insn (part[0][2], part[1][2]);
+	    }
+	}
+      else
+	{
+	  /* In 64bit mode we don't have 32bit push available.  In case this is
+	     register, it is OK - we will just use larger counterpart.  We also
+	     retype memory - these comes from attempt to avoid REX prefix on
+	     moving of second half of TFmode value.  */
+	  if (GET_MODE (part[1][1]) == SImode)
+	    {
+	      switch (GET_CODE (part[1][1]))
+		{
+		case MEM:
+		  part[1][1] = adjust_address (part[1][1], DImode, 0);
+		  break;
+
+		case REG:
+		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
+		  break;
+
+		default:
+		  gcc_unreachable ();
+		}
+
+	      if (GET_MODE (part[1][0]) == SImode)
+		part[1][0] = part[1][1];
+	    }
+	}
+      emit_move_insn (part[0][1], part[1][1]);
+      emit_move_insn (part[0][0], part[1][0]);
+      return;
+    }
+
+  /* Choose correct order to not overwrite the source before it is copied.  */
+  if ((REG_P (part[0][0])
+       && REG_P (part[1][1])
+       && (REGNO (part[0][0]) == REGNO (part[1][1])
+	   || (nparts == 3
+	       && REGNO (part[0][0]) == REGNO (part[1][2]))
+	   || (nparts == 4
+	       && REGNO (part[0][0]) == REGNO (part[1][3]))))
+      || (collisions > 0
+	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
+    {
+      for (i = 0, j = nparts - 1; i < nparts; i++, j--)
+	{
+	  operands[2 + i] = part[0][j];
+	  operands[6 + i] = part[1][j];
+	}
+    }
+  else
+    {
+      for (i = 0; i < nparts; i++)
+	{
+	  operands[2 + i] = part[0][i];
+	  operands[6 + i] = part[1][i];
+	}
+    }
+
+  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
+  if (optimize_insn_for_size_p ())
+    {
+      for (j = 0; j < nparts - 1; j++)
+	if (CONST_INT_P (operands[6 + j])
+	    && operands[6 + j] != const0_rtx
+	    && REG_P (operands[2 + j]))
+	  for (i = j; i < nparts - 1; i++)
+	    if (CONST_INT_P (operands[7 + i])
+		&& INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
+	      operands[7 + i] = operands[2 + j];
+    }
+
+  for (i = 0; i < nparts; i++)
+    emit_move_insn (operands[2 + i], operands[6 + i]);
+
+  return;
+}
+
+/* Helper function of ix86_split_ashl used to generate an SImode/DImode
+   left shift by a constant, either using a single shift or
+   a sequence of add instructions.  */
+
+static void
+ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
+{
+  rtx (*insn)(rtx, rtx, rtx);
+
+  if (count == 1
+      || (count * ix86_cost->add <= ix86_cost->shift_const
+	  && !optimize_insn_for_size_p ()))
+    {
+      insn = mode == DImode ? gen_addsi3 : gen_adddi3;
+      while (count-- > 0)
+	emit_insn (insn (operand, operand, operand));
+    }
+  else
+    {
+      insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
+      emit_insn (insn (operand, operand, GEN_INT (count)));
+    }
+}
+
+void
+ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+  rtx (*gen_ashl3)(rtx, rtx, rtx);
+  rtx (*gen_shld)(rtx, rtx, rtx);
+  int half_width = GET_MODE_BITSIZE (mode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      split_double_mode (mode, operands, 2, low, high);
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
+
+      if (count >= half_width)
+	{
+	  emit_move_insn (high[0], low[1]);
+	  emit_move_insn (low[0], const0_rtx);
+
+	  if (count > half_width)
+	    ix86_expand_ashl_const (high[0], count - half_width, mode);
+	}
+      else
+	{
+	  gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
+
+	  if (!rtx_equal_p (operands[0], operands[1]))
+	    emit_move_insn (operands[0], operands[1]);
+
+	  emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
+	  ix86_expand_ashl_const (low[0], count, mode);
+	}
+      return;
+    }
+
+  split_double_mode (mode, operands, 1, low, high);
+
+  gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
+
+  if (operands[1] == const1_rtx)
+    {
+      /* Assuming we've chosen a QImode capable registers, then 1 << N
+	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
+      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+	{
+	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+	  ix86_expand_clear (low[0]);
+	  ix86_expand_clear (high[0]);
+	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
+
+	  d = gen_lowpart (QImode, low[0]);
+	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
+	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
+
+	  d = gen_lowpart (QImode, high[0]);
+	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+	  s = gen_rtx_NE (QImode, flags, const0_rtx);
+	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
+	}
+
+      /* Otherwise, we can get the same results by manually performing
+	 a bit extract operation on bit 5/6, and then performing the two
+	 shifts.  The two methods of getting 0/1 into low/high are exactly
+	 the same size.  Avoiding the shift in the bit extract case helps
+	 pentium4 a bit; no one else seems to care much either way.  */
+      else
+	{
+	  enum machine_mode half_mode;
+	  rtx (*gen_lshr3)(rtx, rtx, rtx);
+	  rtx (*gen_and3)(rtx, rtx, rtx);
+	  rtx (*gen_xor3)(rtx, rtx, rtx);
+	  HOST_WIDE_INT bits;
+	  rtx x;
+
+	  if (mode == DImode)
+	    {
+	      half_mode = SImode;
+	      gen_lshr3 = gen_lshrsi3;
+	      gen_and3 = gen_andsi3;
+	      gen_xor3 = gen_xorsi3;
+	      bits = 5;
+	    }
+	  else
+	    {
+	      half_mode = DImode;
+	      gen_lshr3 = gen_lshrdi3;
+	      gen_and3 = gen_anddi3;
+	      gen_xor3 = gen_xordi3;
+	      bits = 6;
+	    }
+
+	  if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
+	    x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
+	  else
+	    x = gen_lowpart (half_mode, operands[2]);
+	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+
+	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
+	  emit_insn (gen_and3 (high[0], high[0], const1_rtx));
+	  emit_move_insn (low[0], high[0]);
+	  emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
+	}
+
+      emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
+      emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
+      return;
+    }
+
+  if (operands[1] == constm1_rtx)
+    {
+      /* For -1 << N, we can avoid the shld instruction, because we
+	 know that we're shifting 0...31/63 ones into a -1.  */
+      emit_move_insn (low[0], constm1_rtx);
+      if (optimize_insn_for_size_p ())
+	emit_move_insn (high[0], low[0]);
+      else
+	emit_move_insn (high[0], constm1_rtx);
+    }
+  else
+    {
+      gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
+
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+
+      split_double_mode (mode, operands, 1, low, high);
+      emit_insn (gen_shld (high[0], low[0], operands[2]));
+    }
+
+  emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
+
+  if (TARGET_CMOVE && scratch)
+    {
+      rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
+	= mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
+
+      ix86_expand_clear (scratch);
+      emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
+    }
+  else
+    {
+      rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
+	= mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
+
+      emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
+    }
+}
+
+void
+ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+  rtx (*gen_ashr3)(rtx, rtx, rtx)
+    = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
+  rtx (*gen_shrd)(rtx, rtx, rtx);
+  int half_width = GET_MODE_BITSIZE (mode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      split_double_mode (mode, operands, 2, low, high);
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
+
+      if (count == GET_MODE_BITSIZE (mode) - 1)
+	{
+	  emit_move_insn (high[0], high[1]);
+	  emit_insn (gen_ashr3 (high[0], high[0],
+				GEN_INT (half_width - 1)));
+	  emit_move_insn (low[0], high[0]);
+
+	}
+      else if (count >= half_width)
+	{
+	  emit_move_insn (low[0], high[1]);
+	  emit_move_insn (high[0], low[0]);
+	  emit_insn (gen_ashr3 (high[0], high[0],
+				GEN_INT (half_width - 1)));
+
+	  if (count > half_width)
+	    emit_insn (gen_ashr3 (low[0], low[0],
+				  GEN_INT (count - half_width)));
+	}
+      else
+	{
+	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+	  if (!rtx_equal_p (operands[0], operands[1]))
+	    emit_move_insn (operands[0], operands[1]);
+
+	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
+	  emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
+	}
+    }
+  else
+    {
+      gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+     if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+
+      split_double_mode (mode, operands, 1, low, high);
+
+      emit_insn (gen_shrd (low[0], high[0], operands[2]));
+      emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
+
+      if (TARGET_CMOVE && scratch)
+	{
+	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
+
+	  emit_move_insn (scratch, high[0]);
+	  emit_insn (gen_ashr3 (scratch, scratch,
+				GEN_INT (half_width - 1)));
+	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
+					  scratch));
+	}
+      else
+	{
+	  rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
+
+	  emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
+	}
+    }
+}
+
+void
+ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+  rtx (*gen_lshr3)(rtx, rtx, rtx)
+    = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
+  rtx (*gen_shrd)(rtx, rtx, rtx);
+  int half_width = GET_MODE_BITSIZE (mode) >> 1;
+
+  rtx low[2], high[2];
+  int count;
+
+  if (CONST_INT_P (operands[2]))
+    {
+      split_double_mode (mode, operands, 2, low, high);
+      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
+
+      if (count >= half_width)
+	{
+	  emit_move_insn (low[0], high[1]);
+	  ix86_expand_clear (high[0]);
+
+	  if (count > half_width)
+	    emit_insn (gen_lshr3 (low[0], low[0],
+				  GEN_INT (count - half_width)));
+	}
+      else
+	{
+	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+	  if (!rtx_equal_p (operands[0], operands[1]))
+	    emit_move_insn (operands[0], operands[1]);
+
+	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
+	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
+	}
+    }
+  else
+    {
+      gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
+
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operands[0], operands[1]);
+
+      split_double_mode (mode, operands, 1, low, high);
+
+      emit_insn (gen_shrd (low[0], high[0], operands[2]));
+      emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
+
+      if (TARGET_CMOVE && scratch)
+	{
+	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
+
+	  ix86_expand_clear (scratch);
+	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
+					  scratch));
+	}
+      else
+	{
+	  rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
+	    = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
+
+	  emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
+	}
+    }
+}
+
+/* Predict just emitted jump instruction to be taken with probability PROB.  */
+static void
+predict_jump (int prob)
+{
+  rtx insn = get_last_insn ();
+  gcc_assert (JUMP_P (insn));
+  add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
+}
+
+/* Helper function for the string operations below.  Dest VARIABLE whether
+   it is aligned to VALUE bytes.  If true, jump to the label.  */
+static rtx
+ix86_expand_aligntest (rtx variable, int value, bool epilogue)
+{
+  rtx label = gen_label_rtx ();
+  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+  if (GET_MODE (variable) == DImode)
+    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+  else
+    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+			   1, label);
+  if (epilogue)
+    predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  else
+    predict_jump (REG_BR_PROB_BASE * 90 / 100);
+  return label;
+}
+
+/* Adjust COUNTER by the VALUE.  */
+static void
+ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
+{
+  rtx (*gen_add)(rtx, rtx, rtx)
+    = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
+
+  emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register.  */
+rtx
+ix86_zero_extend_to_Pmode (rtx exp)
+{
+  rtx r;
+  if (GET_MODE (exp) == VOIDmode)
+    return force_reg (Pmode, exp);
+  if (GET_MODE (exp) == Pmode)
+    return copy_to_mode_reg (Pmode, exp);
+  r = gen_reg_rtx (Pmode);
+  emit_insn (gen_zero_extendsidi2 (r, exp));
+  return r;
+}
+
+/* Divide COUNTREG by SCALE.  */
+static rtx
+scale_counter (rtx countreg, int scale)
+{
+  rtx sc;
+
+  if (scale == 1)
+    return countreg;
+  if (CONST_INT_P (countreg))
+    return GEN_INT (INTVAL (countreg) / scale);
+  gcc_assert (REG_P (countreg));
+
+  sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
+			    GEN_INT (exact_log2 (scale)),
+			    NULL, 1, OPTAB_DIRECT);
+  return sc;
+}
+
+/* Return mode for the memcpy/memset loop counter.  Prefer SImode over
+   DImode for constant loop counts.  */
+
+static enum machine_mode
+counter_mode (rtx count_exp)
+{
+  if (GET_MODE (count_exp) != VOIDmode)
+    return GET_MODE (count_exp);
+  if (!CONST_INT_P (count_exp))
+    return Pmode;
+  if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
+    return DImode;
+  return SImode;
+}
+
+/* When SRCPTR is non-NULL, output simple loop to move memory
+   pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
+   overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
+   equivalent loop to set memory by VALUE (supposed to be in MODE).
+
+   The size is rounded down to whole number of chunk size moved at once.
+   SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
+
+
+static void
+expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
+			       rtx destptr, rtx srcptr, rtx value,
+			       rtx count, enum machine_mode mode, int unroll,
+			       int expected_size)
+{
+  rtx out_label, top_label, iter, tmp;
+  enum machine_mode iter_mode = counter_mode (count);
+  rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
+  rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
+  rtx size;
+  rtx x_addr;
+  rtx y_addr;
+  int i;
+
+  top_label = gen_label_rtx ();
+  out_label = gen_label_rtx ();
+  iter = gen_reg_rtx (iter_mode);
+
+  size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
+			      NULL, 1, OPTAB_DIRECT);
+  /* Those two should combine.  */
+  if (piece_size == const1_rtx)
+    {
+      emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
+			       true, out_label);
+      predict_jump (REG_BR_PROB_BASE * 10 / 100);
+    }
+  emit_move_insn (iter, const0_rtx);
+
+  emit_label (top_label);
+
+  tmp = convert_modes (Pmode, iter_mode, iter, true);
+  x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
+  destmem = change_address (destmem, mode, x_addr);
+
+  if (srcmem)
+    {
+      y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
+      srcmem = change_address (srcmem, mode, y_addr);
+
+      /* When unrolling for chips that reorder memory reads and writes,
+	 we can save registers by using single temporary.
+	 Also using 4 temporaries is overkill in 32bit mode.  */
+      if (!TARGET_64BIT && 0)
+	{
+	  for (i = 0; i < unroll; i++)
+	    {
+	      if (i)
+		{
+		  destmem =
+		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+		  srcmem =
+		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
+		}
+	      emit_move_insn (destmem, srcmem);
+	    }
+	}
+      else
+	{
+	  rtx tmpreg[4];
+	  gcc_assert (unroll <= 4);
+	  for (i = 0; i < unroll; i++)
+	    {
+	      tmpreg[i] = gen_reg_rtx (mode);
+	      if (i)
+		{
+		  srcmem =
+		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
+		}
+	      emit_move_insn (tmpreg[i], srcmem);
+	    }
+	  for (i = 0; i < unroll; i++)
+	    {
+	      if (i)
+		{
+		  destmem =
+		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+		}
+	      emit_move_insn (destmem, tmpreg[i]);
+	    }
+	}
+    }
+  else
+    for (i = 0; i < unroll; i++)
+      {
+	if (i)
+	  destmem =
+	    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+	emit_move_insn (destmem, value);
+      }
+
+  tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
+			     true, OPTAB_LIB_WIDEN);
+  if (tmp != iter)
+    emit_move_insn (iter, tmp);
+
+  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+			   true, top_label);
+  if (expected_size != -1)
+    {
+      expected_size /= GET_MODE_SIZE (mode) * unroll;
+      if (expected_size == 0)
+	predict_jump (0);
+      else if (expected_size > REG_BR_PROB_BASE)
+	predict_jump (REG_BR_PROB_BASE - 1);
+      else
+        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+    }
+  else
+    predict_jump (REG_BR_PROB_BASE * 80 / 100);
+  iter = ix86_zero_extend_to_Pmode (iter);
+  tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
+			     true, OPTAB_LIB_WIDEN);
+  if (tmp != destptr)
+    emit_move_insn (destptr, tmp);
+  if (srcptr)
+    {
+      tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
+				 true, OPTAB_LIB_WIDEN);
+      if (tmp != srcptr)
+	emit_move_insn (srcptr, tmp);
+    }
+  emit_label (out_label);
+}
+
+/* Output "rep; mov" instruction.
+   Arguments have same meaning as for previous function */
+static void
+expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
+			   rtx destptr, rtx srcptr,
+			   rtx count,
+			   enum machine_mode mode)
+{
+  rtx destexp;
+  rtx srcexp;
+  rtx countreg;
+
+  /* If the size is known, it is shorter to use rep movs.  */
+  if (mode == QImode && CONST_INT_P (count)
+      && !(INTVAL (count) & 3))
+    mode = SImode;
+
+  if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
+    destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
+  if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
+    srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
+  countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
+  if (mode != QImode)
+    {
+      destexp = gen_rtx_ASHIFT (Pmode, countreg,
+				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+      destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
+      srcexp = gen_rtx_ASHIFT (Pmode, countreg,
+			       GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+      srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
+    }
+  else
+    {
+      destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+      srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
+    }
+  if (CONST_INT_P (count))
+    {
+      count = GEN_INT (INTVAL (count)
+		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+      destmem = shallow_copy_rtx (destmem);
+      srcmem = shallow_copy_rtx (srcmem);
+      set_mem_size (destmem, count);
+      set_mem_size (srcmem, count);
+    }
+  else
+    {
+      if (MEM_SIZE (destmem))
+	set_mem_size (destmem, NULL_RTX);
+      if (MEM_SIZE (srcmem))
+	set_mem_size (srcmem, NULL_RTX);
+    }
+  emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
+			  destexp, srcexp));
+}
+
+/* Output "rep; stos" instruction.
+   Arguments have same meaning as for previous function */
+static void
+expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
+			    rtx count, enum machine_mode mode,
+			    rtx orig_value)
+{
+  rtx destexp;
+  rtx countreg;
+
+  if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
+    destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
+  value = force_reg (mode, gen_lowpart (mode, value));
+  countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
+  if (mode != QImode)
+    {
+      destexp = gen_rtx_ASHIFT (Pmode, countreg,
+				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+      destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
+    }
+  else
+    destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+  if (orig_value == const0_rtx && CONST_INT_P (count))
+    {
+      count = GEN_INT (INTVAL (count)
+		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+      destmem = shallow_copy_rtx (destmem);
+      set_mem_size (destmem, count);
+    }
+  else if (MEM_SIZE (destmem))
+    set_mem_size (destmem, NULL_RTX);
+  emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
+}
+
+static void
+emit_strmov (rtx destmem, rtx srcmem,
+	     rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
+{
+  rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
+  rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
+  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+}
+
+/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
+static void
+expand_movmem_epilogue (rtx destmem, rtx srcmem,
+			rtx destptr, rtx srcptr, rtx count, int max_size)
+{
+  rtx src, dest;
+  if (CONST_INT_P (count))
+    {
+      HOST_WIDE_INT countval = INTVAL (count);
+      int offset = 0;
+
+      if ((countval & 0x10) && max_size > 16)
+	{
+	  if (TARGET_64BIT)
+	    {
+	      emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
+	      emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
+	    }
+	  else
+	    gcc_unreachable ();
+	  offset += 16;
+	}
+      if ((countval & 0x08) && max_size > 8)
+	{
+	  if (TARGET_64BIT)
+	    emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
+	  else
+	    {
+	      emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
+	      emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
+	    }
+	  offset += 8;
+	}
+      if ((countval & 0x04) && max_size > 4)
+	{
+          emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
+	  offset += 4;
+	}
+      if ((countval & 0x02) && max_size > 2)
+	{
+          emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
+	  offset += 2;
+	}
+      if ((countval & 0x01) && max_size > 1)
+	{
+          emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
+	  offset += 1;
+	}
+      return;
+    }
+  if (max_size > 8)
+    {
+      count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
+				    count, 1, OPTAB_DIRECT);
+      expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
+				     count, QImode, 1, 4);
+      return;
+    }
+
+  /* When there are stringops, we can cheaply increase dest and src pointers.
+     Otherwise we save code size by maintaining offset (zero is readily
+     available from preceding rep operation) and using x86 addressing modes.
+   */
+  if (TARGET_SINGLE_STRINGOP)
+    {
+      if (max_size > 4)
+	{
+	  rtx label = ix86_expand_aligntest (count, 4, true);
+	  src = change_address (srcmem, SImode, srcptr);
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 2)
+	{
+	  rtx label = ix86_expand_aligntest (count, 2, true);
+	  src = change_address (srcmem, HImode, srcptr);
+	  dest = change_address (destmem, HImode, destptr);
+	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 1)
+	{
+	  rtx label = ix86_expand_aligntest (count, 1, true);
+	  src = change_address (srcmem, QImode, srcptr);
+	  dest = change_address (destmem, QImode, destptr);
+	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+    }
+  else
+    {
+      rtx offset = force_reg (Pmode, const0_rtx);
+      rtx tmp;
+
+      if (max_size > 4)
+	{
+	  rtx label = ix86_expand_aligntest (count, 4, true);
+	  src = change_address (srcmem, SImode, srcptr);
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_move_insn (dest, src);
+	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
+				     true, OPTAB_LIB_WIDEN);
+	  if (tmp != offset)
+	    emit_move_insn (offset, tmp);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 2)
+	{
+	  rtx label = ix86_expand_aligntest (count, 2, true);
+	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
+	  src = change_address (srcmem, HImode, tmp);
+	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
+	  dest = change_address (destmem, HImode, tmp);
+	  emit_move_insn (dest, src);
+	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
+				     true, OPTAB_LIB_WIDEN);
+	  if (tmp != offset)
+	    emit_move_insn (offset, tmp);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+      if (max_size > 1)
+	{
+	  rtx label = ix86_expand_aligntest (count, 1, true);
+	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
+	  src = change_address (srcmem, QImode, tmp);
+	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
+	  dest = change_address (destmem, QImode, tmp);
+	  emit_move_insn (dest, src);
+	  emit_label (label);
+	  LABEL_NUSES (label) = 1;
+	}
+    }
+}
+
+/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
+static void
+expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
+				 rtx count, int max_size)
+{
+  count =
+    expand_simple_binop (counter_mode (count), AND, count,
+			 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
+  expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
+				 gen_lowpart (QImode, value), count, QImode,
+				 1, max_size / 2);
+}
+
+/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
+static void
+expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
+{
+  rtx dest;
+
+  if (CONST_INT_P (count))
+    {
+      HOST_WIDE_INT countval = INTVAL (count);
+      int offset = 0;
+
+      if ((countval & 0x10) && max_size > 16)
+	{
+	  if (TARGET_64BIT)
+	    {
+	      dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
+	      emit_insn (gen_strset (destptr, dest, value));
+	      dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
+	      emit_insn (gen_strset (destptr, dest, value));
+	    }
+	  else
+	    gcc_unreachable ();
+	  offset += 16;
+	}
+      if ((countval & 0x08) && max_size > 8)
+	{
+	  if (TARGET_64BIT)
+	    {
+	      dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
+	      emit_insn (gen_strset (destptr, dest, value));
+	    }
+	  else
+	    {
+	      dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
+	      emit_insn (gen_strset (destptr, dest, value));
+	      dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
+	      emit_insn (gen_strset (destptr, dest, value));
+	    }
+	  offset += 8;
+	}
+      if ((countval & 0x04) && max_size > 4)
+	{
+	  dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
+	  emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
+	  offset += 4;
+	}
+      if ((countval & 0x02) && max_size > 2)
+	{
+	  dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
+	  emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
+	  offset += 2;
+	}
+      if ((countval & 0x01) && max_size > 1)
+	{
+	  dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
+	  emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
+	  offset += 1;
+	}
+      return;
+    }
+  if (max_size > 32)
+    {
+      expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
+      return;
+    }
+  if (max_size > 16)
+    {
+      rtx label = ix86_expand_aligntest (count, 16, true);
+      if (TARGET_64BIT)
+	{
+	  dest = change_address (destmem, DImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      else
+	{
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  emit_insn (gen_strset (destptr, dest, value));
+	  emit_insn (gen_strset (destptr, dest, value));
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 8)
+    {
+      rtx label = ix86_expand_aligntest (count, 8, true);
+      if (TARGET_64BIT)
+	{
+	  dest = change_address (destmem, DImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      else
+	{
+	  dest = change_address (destmem, SImode, destptr);
+	  emit_insn (gen_strset (destptr, dest, value));
+	  emit_insn (gen_strset (destptr, dest, value));
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 4)
+    {
+      rtx label = ix86_expand_aligntest (count, 4, true);
+      dest = change_address (destmem, SImode, destptr);
+      emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 2)
+    {
+      rtx label = ix86_expand_aligntest (count, 2, true);
+      dest = change_address (destmem, HImode, destptr);
+      emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (max_size > 1)
+    {
+      rtx label = ix86_expand_aligntest (count, 1, true);
+      dest = change_address (destmem, QImode, destptr);
+      emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+}
+
+/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
+   DESIRED_ALIGNMENT.  */
+static void
+expand_movmem_prologue (rtx destmem, rtx srcmem,
+			rtx destptr, rtx srcptr, rtx count,
+			int align, int desired_alignment)
+{
+  if (align <= 1 && desired_alignment > 1)
+    {
+      rtx label = ix86_expand_aligntest (destptr, 1, false);
+      srcmem = change_address (srcmem, QImode, srcptr);
+      destmem = change_address (destmem, QImode, destptr);
+      emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
+      ix86_adjust_counter (count, 1);
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (align <= 2 && desired_alignment > 2)
+    {
+      rtx label = ix86_expand_aligntest (destptr, 2, false);
+      srcmem = change_address (srcmem, HImode, srcptr);
+      destmem = change_address (destmem, HImode, destptr);
+      emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
+      ix86_adjust_counter (count, 2);
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (align <= 4 && desired_alignment > 4)
+    {
+      rtx label = ix86_expand_aligntest (destptr, 4, false);
+      srcmem = change_address (srcmem, SImode, srcptr);
+      destmem = change_address (destmem, SImode, destptr);
+      emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
+      ix86_adjust_counter (count, 4);
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  gcc_assert (desired_alignment <= 8);
+}
+
+/* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
+   ALIGN_BYTES is how many bytes need to be copied.  */
+static rtx
+expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
+				 int desired_align, int align_bytes)
+{
+  rtx src = *srcp;
+  rtx src_size, dst_size;
+  int off = 0;
+  int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
+  if (src_align_bytes >= 0)
+    src_align_bytes = desired_align - src_align_bytes;
+  src_size = MEM_SIZE (src);
+  dst_size = MEM_SIZE (dst);
+  if (align_bytes & 1)
+    {
+      dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+      src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
+      off = 1;
+      emit_insn (gen_strmov (destreg, dst, srcreg, src));
+    }
+  if (align_bytes & 2)
+    {
+      dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+      src = adjust_automodify_address_nv (src, HImode, srcreg, off);
+      if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+	set_mem_align (dst, 2 * BITS_PER_UNIT);
+      if (src_align_bytes >= 0
+	  && (src_align_bytes & 1) == (align_bytes & 1)
+	  && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
+	set_mem_align (src, 2 * BITS_PER_UNIT);
+      off = 2;
+      emit_insn (gen_strmov (destreg, dst, srcreg, src));
+    }
+  if (align_bytes & 4)
+    {
+      dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+      src = adjust_automodify_address_nv (src, SImode, srcreg, off);
+      if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+	set_mem_align (dst, 4 * BITS_PER_UNIT);
+      if (src_align_bytes >= 0)
+	{
+	  unsigned int src_align = 0;
+	  if ((src_align_bytes & 3) == (align_bytes & 3))
+	    src_align = 4;
+	  else if ((src_align_bytes & 1) == (align_bytes & 1))
+	    src_align = 2;
+	  if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+	    set_mem_align (src, src_align * BITS_PER_UNIT);
+	}
+      off = 4;
+      emit_insn (gen_strmov (destreg, dst, srcreg, src));
+    }
+  dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+  src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
+  if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+    set_mem_align (dst, desired_align * BITS_PER_UNIT);
+  if (src_align_bytes >= 0)
+    {
+      unsigned int src_align = 0;
+      if ((src_align_bytes & 7) == (align_bytes & 7))
+	src_align = 8;
+      else if ((src_align_bytes & 3) == (align_bytes & 3))
+	src_align = 4;
+      else if ((src_align_bytes & 1) == (align_bytes & 1))
+	src_align = 2;
+      if (src_align > (unsigned int) desired_align)
+	src_align = desired_align;
+      if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+	set_mem_align (src, src_align * BITS_PER_UNIT);
+    }
+  if (dst_size)
+    set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+  if (src_size)
+    set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+  *srcp = src;
+  return dst;
+}
+
+/* Set enough from DEST to align DEST known to by aligned by ALIGN to
+   DESIRED_ALIGNMENT.  */
+static void
+expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
+			int align, int desired_alignment)
+{
+  if (align <= 1 && desired_alignment > 1)
+    {
+      rtx label = ix86_expand_aligntest (destptr, 1, false);
+      destmem = change_address (destmem, QImode, destptr);
+      emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
+      ix86_adjust_counter (count, 1);
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (align <= 2 && desired_alignment > 2)
+    {
+      rtx label = ix86_expand_aligntest (destptr, 2, false);
+      destmem = change_address (destmem, HImode, destptr);
+      emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
+      ix86_adjust_counter (count, 2);
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  if (align <= 4 && desired_alignment > 4)
+    {
+      rtx label = ix86_expand_aligntest (destptr, 4, false);
+      destmem = change_address (destmem, SImode, destptr);
+      emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
+      ix86_adjust_counter (count, 4);
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+  gcc_assert (desired_alignment <= 8);
+}
+
+/* Set enough from DST to align DST known to by aligned by ALIGN to
+   DESIRED_ALIGN.  ALIGN_BYTES is how many bytes need to be stored.  */
+static rtx
+expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
+				 int desired_align, int align_bytes)
+{
+  int off = 0;
+  rtx dst_size = MEM_SIZE (dst);
+  if (align_bytes & 1)
+    {
+      dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+      off = 1;
+      emit_insn (gen_strset (destreg, dst,
+			     gen_lowpart (QImode, value)));
+    }
+  if (align_bytes & 2)
+    {
+      dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+      if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+	set_mem_align (dst, 2 * BITS_PER_UNIT);
+      off = 2;
+      emit_insn (gen_strset (destreg, dst,
+			     gen_lowpart (HImode, value)));
+    }
+  if (align_bytes & 4)
+    {
+      dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+      if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+	set_mem_align (dst, 4 * BITS_PER_UNIT);
+      off = 4;
+      emit_insn (gen_strset (destreg, dst,
+			     gen_lowpart (SImode, value)));
+    }
+  dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+  if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+    set_mem_align (dst, desired_align * BITS_PER_UNIT);
+  if (dst_size)
+    set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+  return dst;
+}
+
+/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
+static enum stringop_alg
+decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
+	    int *dynamic_check)
+{
+  const struct stringop_algs * algs;
+  bool optimize_for_speed;
+  /* Algorithms using the rep prefix want at least edi and ecx;
+     additionally, memset wants eax and memcpy wants esi.  Don't
+     consider such algorithms if the user has appropriated those
+     registers for their own purposes.	*/
+  bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
+                             || (memset
+				 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
+
+#define ALG_USABLE_P(alg) (rep_prefix_usable			\
+			   || (alg != rep_prefix_1_byte		\
+			       && alg != rep_prefix_4_byte      \
+			       && alg != rep_prefix_8_byte))
+  const struct processor_costs *cost;
+
+  /* Even if the string operation call is cold, we still might spend a lot
+     of time processing large blocks.  */
+  if (optimize_function_for_size_p (cfun)
+      || (optimize_insn_for_size_p ()
+          && expected_size != -1 && expected_size < 256))
+    optimize_for_speed = false;
+  else
+    optimize_for_speed = true;
+
+  cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
+
+  *dynamic_check = -1;
+  if (memset)
+    algs = &cost->memset[TARGET_64BIT != 0];
+  else
+    algs = &cost->memcpy[TARGET_64BIT != 0];
+  if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
+    return stringop_alg;
+  /* rep; movq or rep; movl is the smallest variant.  */
+  else if (!optimize_for_speed)
+    {
+      if (!count || (count & 3))
+	return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
+      else
+	return rep_prefix_usable ? rep_prefix_4_byte : loop;
+    }
+  /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
+   */
+  else if (expected_size != -1 && expected_size < 4)
+    return loop_1_byte;
+  else if (expected_size != -1)
+    {
+      unsigned int i;
+      enum stringop_alg alg = libcall;
+      for (i = 0; i < MAX_STRINGOP_ALGS; i++)
+	{
+	  /* We get here if the algorithms that were not libcall-based
+	     were rep-prefix based and we are unable to use rep prefixes
+	     based on global register usage.  Break out of the loop and
+	     use the heuristic below.  */
+	  if (algs->size[i].max == 0)
+	    break;
+	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
+	    {
+	      enum stringop_alg candidate = algs->size[i].alg;
+
+	      if (candidate != libcall && ALG_USABLE_P (candidate))
+		alg = candidate;
+	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
+		 last non-libcall inline algorithm.  */
+	      if (TARGET_INLINE_ALL_STRINGOPS)
+		{
+		  /* When the current size is best to be copied by a libcall,
+		     but we are still forced to inline, run the heuristic below
+		     that will pick code for medium sized blocks.  */
+		  if (alg != libcall)
+		    return alg;
+		  break;
+		}
+	      else if (ALG_USABLE_P (candidate))
+		return candidate;
+	    }
+	}
+      gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
+    }
+  /* When asked to inline the call anyway, try to pick meaningful choice.
+     We look for maximal size of block that is faster to copy by hand and
+     take blocks of at most of that size guessing that average size will
+     be roughly half of the block.
+
+     If this turns out to be bad, we might simply specify the preferred
+     choice in ix86_costs.  */
+  if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+      && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
+    {
+      int max = -1;
+      enum stringop_alg alg;
+      int i;
+      bool any_alg_usable_p = true;
+
+      for (i = 0; i < MAX_STRINGOP_ALGS; i++)
+        {
+          enum stringop_alg candidate = algs->size[i].alg;
+          any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
+
+          if (candidate != libcall && candidate
+              && ALG_USABLE_P (candidate))
+              max = algs->size[i].max;
+        }
+      /* If there aren't any usable algorithms, then recursing on
+         smaller sizes isn't going to find anything.  Just return the
+         simple byte-at-a-time copy loop.  */
+      if (!any_alg_usable_p)
+        {
+          /* Pick something reasonable.  */
+          if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+            *dynamic_check = 128;
+          return loop_1_byte;
+        }
+      if (max == -1)
+	max = 4096;
+      alg = decide_alg (count, max / 2, memset, dynamic_check);
+      gcc_assert (*dynamic_check == -1);
+      gcc_assert (alg != libcall);
+      if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+	*dynamic_check = max;
+      return alg;
+    }
+  return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
+#undef ALG_USABLE_P
+}
+
+/* Decide on alignment.  We know that the operand is already aligned to ALIGN
+   (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
+static int
+decide_alignment (int align,
+		  enum stringop_alg alg,
+		  int expected_size)
+{
+  int desired_align = 0;
+  switch (alg)
+    {
+      case no_stringop:
+	gcc_unreachable ();
+      case loop:
+      case unrolled_loop:
+	desired_align = GET_MODE_SIZE (Pmode);
+	break;
+      case rep_prefix_8_byte:
+	desired_align = 8;
+	break;
+      case rep_prefix_4_byte:
+	/* PentiumPro has special logic triggering for 8 byte aligned blocks.
+	   copying whole cacheline at once.  */
+	if (TARGET_PENTIUMPRO)
+	  desired_align = 8;
+	else
+	  desired_align = 4;
+	break;
+      case rep_prefix_1_byte:
+	/* PentiumPro has special logic triggering for 8 byte aligned blocks.
+	   copying whole cacheline at once.  */
+	if (TARGET_PENTIUMPRO)
+	  desired_align = 8;
+	else
+	  desired_align = 1;
+	break;
+      case loop_1_byte:
+	desired_align = 1;
+	break;
+      case libcall:
+	return 0;
+    }
+
+  if (optimize_size)
+    desired_align = 1;
+  if (desired_align < align)
+    desired_align = align;
+  if (expected_size != -1 && expected_size < 4)
+    desired_align = align;
+  return desired_align;
+}
+
+/* Return the smallest power of 2 greater than VAL.  */
+static int
+smallest_pow2_greater_than (int val)
+{
+  int ret = 1;
+  while (ret <= val)
+    ret <<= 1;
+  return ret;
+}
+
+/* Expand string move (memcpy) operation.  Use i386 string operations when
+   profitable.  expand_setmem contains similar code.  The code depends upon
+   architecture, block size and alignment, but always has the same
+   overall structure:
+
+   1) Prologue guard: Conditional that jumps up to epilogues for small
+      blocks that can be handled by epilogue alone.  This is faster but
+      also needed for correctness, since prologue assume the block is larger
+      than the desired alignment.
+
+      Optional dynamic check for size and libcall for large
+      blocks is emitted here too, with -minline-stringops-dynamically.
+
+   2) Prologue: copy first few bytes in order to get destination aligned
+      to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
+      DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
+      We emit either a jump tree on power of two sized blocks, or a byte loop.
+
+   3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
+      with specified algorithm.
+
+   4) Epilogue: code copying tail of the block that is too small to be
+      handled by main body (or up to size guarded by prologue guard).  */
+
+bool
+ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
+		    rtx expected_align_exp, rtx expected_size_exp)
+{
+  rtx destreg;
+  rtx srcreg;
+  rtx label = NULL;
+  rtx tmp;
+  rtx jump_around_label = NULL;
+  HOST_WIDE_INT align = 1;
+  unsigned HOST_WIDE_INT count = 0;
+  HOST_WIDE_INT expected_size = -1;
+  int size_needed = 0, epilogue_size_needed;
+  int desired_align = 0, align_bytes = 0;
+  enum stringop_alg alg;
+  int dynamic_check;
+  bool need_zero_guard = false;
+
+  if (CONST_INT_P (align_exp))
+    align = INTVAL (align_exp);
+  /* i386 can do misaligned access on reasonably increased cost.  */
+  if (CONST_INT_P (expected_align_exp)
+      && INTVAL (expected_align_exp) > align)
+    align = INTVAL (expected_align_exp);
+  /* ALIGN is the minimum of destination and source alignment, but we care here
+     just about destination alignment.  */
+  else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
+    align = MEM_ALIGN (dst) / BITS_PER_UNIT;
+
+  if (CONST_INT_P (count_exp))
+    count = expected_size = INTVAL (count_exp);
+  if (CONST_INT_P (expected_size_exp) && count == 0)
+    expected_size = INTVAL (expected_size_exp);
+
+  /* Make sure we don't need to care about overflow later on.  */
+  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+    return false;
+
+  /* Step 0: Decide on preferred algorithm, desired alignment and
+     size of chunks to be copied by main loop.  */
+
+  alg = decide_alg (count, expected_size, false, &dynamic_check);
+  desired_align = decide_alignment (align, alg, expected_size);
+
+  if (!TARGET_ALIGN_STRINGOPS)
+    align = desired_align;
+
+  if (alg == libcall)
+    return false;
+  gcc_assert (alg != no_stringop);
+  if (!count)
+    count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
+  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+  switch (alg)
+    {
+    case libcall:
+    case no_stringop:
+      gcc_unreachable ();
+    case loop:
+      need_zero_guard = true;
+      size_needed = GET_MODE_SIZE (Pmode);
+      break;
+    case unrolled_loop:
+      need_zero_guard = true;
+      size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
+      break;
+    case rep_prefix_8_byte:
+      size_needed = 8;
+      break;
+    case rep_prefix_4_byte:
+      size_needed = 4;
+      break;
+    case rep_prefix_1_byte:
+      size_needed = 1;
+      break;
+    case loop_1_byte:
+      need_zero_guard = true;
+      size_needed = 1;
+      break;
+    }
+
+  epilogue_size_needed = size_needed;
+
+  /* Step 1: Prologue guard.  */
+
+  /* Alignment code needs count to be in register.  */
+  if (CONST_INT_P (count_exp) && desired_align > align)
+    {
+      if (INTVAL (count_exp) > desired_align
+	  && INTVAL (count_exp) > size_needed)
+	{
+	  align_bytes
+	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+	  if (align_bytes <= 0)
+	    align_bytes = 0;
+	  else
+	    align_bytes = desired_align - align_bytes;
+	}
+      if (align_bytes == 0)
+	count_exp = force_reg (counter_mode (count_exp), count_exp);
+    }
+  gcc_assert (desired_align >= 1 && align >= 1);
+
+  /* Ensure that alignment prologue won't copy past end of block.  */
+  if (size_needed > 1 || (desired_align > 1 && desired_align > align))
+    {
+      epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
+      /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
+	 Make sure it is power of 2.  */
+      epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
+
+      if (count)
+	{
+	  if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+	    {
+	      /* If main algorithm works on QImode, no epilogue is needed.
+		 For small sizes just don't align anything.  */
+	      if (size_needed == 1)
+		desired_align = align;
+	      else
+		goto epilogue;
+	    }
+	}
+      else
+	{
+	  label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (epilogue_size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, label);
+	  if (expected_size == -1 || expected_size < epilogue_size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	}
+    }
+
+  /* Emit code to decide on runtime whether library call or inline should be
+     used.  */
+  if (dynamic_check != -1)
+    {
+      if (CONST_INT_P (count_exp))
+	{
+	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
+	    {
+	      emit_block_move_via_libcall (dst, src, count_exp, false);
+	      count_exp = const0_rtx;
+	      goto epilogue;
+	    }
+	}
+      else
+	{
+	  rtx hot_label = gen_label_rtx ();
+	  jump_around_label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
+				   LEU, 0, GET_MODE (count_exp), 1, hot_label);
+	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
+	  emit_block_move_via_libcall (dst, src, count_exp, false);
+	  emit_jump (jump_around_label);
+	  emit_label (hot_label);
+	}
+    }
+
+  /* Step 2: Alignment prologue.  */
+
+  if (desired_align > align)
+    {
+      if (align_bytes == 0)
+	{
+	  /* Except for the first move in epilogue, we no longer know
+	     constant offset in aliasing info.  It don't seems to worth
+	     the pain to maintain it for the first move, so throw away
+	     the info early.  */
+	  src = change_address (src, BLKmode, srcreg);
+	  dst = change_address (dst, BLKmode, destreg);
+	  expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
+				  desired_align);
+	}
+      else
+	{
+	  /* If we know how many bytes need to be stored before dst is
+	     sufficiently aligned, maintain aliasing info accurately.  */
+	  dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
+						 desired_align, align_bytes);
+	  count_exp = plus_constant (count_exp, -align_bytes);
+	  count -= align_bytes;
+	}
+      if (need_zero_guard
+	  && (count < (unsigned HOST_WIDE_INT) size_needed
+	      || (align_bytes == 0
+		  && count < ((unsigned HOST_WIDE_INT) size_needed
+			      + desired_align - align))))
+	{
+	  /* It is possible that we copied enough so the main loop will not
+	     execute.  */
+	  gcc_assert (size_needed > 1);
+	  if (label == NULL_RTX)
+	    label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, label);
+	  if (expected_size == -1
+	      || expected_size < (desired_align - align) / 2 + size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	}
+    }
+  if (label && size_needed == 1)
+    {
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+      label = NULL;
+      epilogue_size_needed = 1;
+    }
+  else if (label == NULL_RTX)
+    epilogue_size_needed = size_needed;
+
+  /* Step 3: Main loop.  */
+
+  switch (alg)
+    {
+    case libcall:
+    case no_stringop:
+      gcc_unreachable ();
+    case loop_1_byte:
+      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
+				     count_exp, QImode, 1, expected_size);
+      break;
+    case loop:
+      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
+				     count_exp, Pmode, 1, expected_size);
+      break;
+    case unrolled_loop:
+      /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
+	 registers for 4 temporaries anyway.  */
+      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
+				     count_exp, Pmode, TARGET_64BIT ? 4 : 2,
+				     expected_size);
+      break;
+    case rep_prefix_8_byte:
+      expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
+				 DImode);
+      break;
+    case rep_prefix_4_byte:
+      expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
+				 SImode);
+      break;
+    case rep_prefix_1_byte:
+      expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
+				 QImode);
+      break;
+    }
+  /* Adjust properly the offset of src and dest memory for aliasing.  */
+  if (CONST_INT_P (count_exp))
+    {
+      src = adjust_automodify_address_nv (src, BLKmode, srcreg,
+					  (count / size_needed) * size_needed);
+      dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
+					  (count / size_needed) * size_needed);
+    }
+  else
+    {
+      src = change_address (src, BLKmode, srcreg);
+      dst = change_address (dst, BLKmode, destreg);
+    }
+
+  /* Step 4: Epilogue to copy the remaining bytes.  */
+ epilogue:
+  if (label)
+    {
+      /* When the main loop is done, COUNT_EXP might hold original count,
+ 	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
+	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
+	 bytes. Compensate if needed.  */
+
+      if (size_needed < epilogue_size_needed)
+	{
+	  tmp =
+	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
+				 GEN_INT (size_needed - 1), count_exp, 1,
+				 OPTAB_DIRECT);
+	  if (tmp != count_exp)
+	    emit_move_insn (count_exp, tmp);
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+
+  if (count_exp != const0_rtx && epilogue_size_needed > 1)
+    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
+			    epilogue_size_needed);
+  if (jump_around_label)
+    emit_label (jump_around_label);
+  return true;
+}
+
+/* Helper function for memcpy.  For QImode value 0xXY produce
+   0xXYXYXYXY of wide specified by MODE.  This is essentially
+   a * 0x10101010, but we can do slightly better than
+   synth_mult by unwinding the sequence by hand on CPUs with
+   slow multiply.  */
+static rtx
+promote_duplicated_reg (enum machine_mode mode, rtx val)
+{
+  enum machine_mode valmode = GET_MODE (val);
+  rtx tmp;
+  int nops = mode == DImode ? 3 : 2;
+
+  gcc_assert (mode == SImode || mode == DImode);
+  if (val == const0_rtx)
+    return copy_to_mode_reg (mode, const0_rtx);
+  if (CONST_INT_P (val))
+    {
+      HOST_WIDE_INT v = INTVAL (val) & 255;
+
+      v |= v << 8;
+      v |= v << 16;
+      if (mode == DImode)
+        v |= (v << 16) << 16;
+      return copy_to_mode_reg (mode, gen_int_mode (v, mode));
+    }
+
+  if (valmode == VOIDmode)
+    valmode = QImode;
+  if (valmode != QImode)
+    val = gen_lowpart (QImode, val);
+  if (mode == QImode)
+    return val;
+  if (!TARGET_PARTIAL_REG_STALL)
+    nops--;
+  if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
+      + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
+      <= (ix86_cost->shift_const + ix86_cost->add) * nops
+          + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
+    {
+      rtx reg = convert_modes (mode, QImode, val, true);
+      tmp = promote_duplicated_reg (mode, const1_rtx);
+      return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
+				  OPTAB_DIRECT);
+    }
+  else
+    {
+      rtx reg = convert_modes (mode, QImode, val, true);
+
+      if (!TARGET_PARTIAL_REG_STALL)
+	if (mode == SImode)
+	  emit_insn (gen_movsi_insv_1 (reg, reg));
+	else
+	  emit_insn (gen_movdi_insv_1 (reg, reg));
+      else
+	{
+	  tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
+				     NULL, 1, OPTAB_DIRECT);
+	  reg =
+	    expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+	}
+      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
+			         NULL, 1, OPTAB_DIRECT);
+      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+      if (mode == SImode)
+	return reg;
+      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
+				 NULL, 1, OPTAB_DIRECT);
+      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+      return reg;
+    }
+}
+
+/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
+   be needed by main loop copying SIZE_NEEDED chunks and prologue getting
+   alignment from ALIGN to DESIRED_ALIGN.  */
+static rtx
+promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
+{
+  rtx promoted_val;
+
+  if (TARGET_64BIT
+      && (size_needed > 4 || (desired_align > align && desired_align > 4)))
+    promoted_val = promote_duplicated_reg (DImode, val);
+  else if (size_needed > 2 || (desired_align > align && desired_align > 2))
+    promoted_val = promote_duplicated_reg (SImode, val);
+  else if (size_needed > 1 || (desired_align > align && desired_align > 1))
+    promoted_val = promote_duplicated_reg (HImode, val);
+  else
+    promoted_val = val;
+
+  return promoted_val;
+}
+
+/* Expand string clear operation (bzero).  Use i386 string operations when
+   profitable.  See expand_movmem comment for explanation of individual
+   steps performed.  */
+bool
+ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
+		    rtx expected_align_exp, rtx expected_size_exp)
+{
+  rtx destreg;
+  rtx label = NULL;
+  rtx tmp;
+  rtx jump_around_label = NULL;
+  HOST_WIDE_INT align = 1;
+  unsigned HOST_WIDE_INT count = 0;
+  HOST_WIDE_INT expected_size = -1;
+  int size_needed = 0, epilogue_size_needed;
+  int desired_align = 0, align_bytes = 0;
+  enum stringop_alg alg;
+  rtx promoted_val = NULL;
+  bool force_loopy_epilogue = false;
+  int dynamic_check;
+  bool need_zero_guard = false;
+
+  if (CONST_INT_P (align_exp))
+    align = INTVAL (align_exp);
+  /* i386 can do misaligned access on reasonably increased cost.  */
+  if (CONST_INT_P (expected_align_exp)
+      && INTVAL (expected_align_exp) > align)
+    align = INTVAL (expected_align_exp);
+  if (CONST_INT_P (count_exp))
+    count = expected_size = INTVAL (count_exp);
+  if (CONST_INT_P (expected_size_exp) && count == 0)
+    expected_size = INTVAL (expected_size_exp);
+
+  /* Make sure we don't need to care about overflow later on.  */
+  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+    return false;
+
+  /* Step 0: Decide on preferred algorithm, desired alignment and
+     size of chunks to be copied by main loop.  */
+
+  alg = decide_alg (count, expected_size, true, &dynamic_check);
+  desired_align = decide_alignment (align, alg, expected_size);
+
+  if (!TARGET_ALIGN_STRINGOPS)
+    align = desired_align;
+
+  if (alg == libcall)
+    return false;
+  gcc_assert (alg != no_stringop);
+  if (!count)
+    count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
+  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+  switch (alg)
+    {
+    case libcall:
+    case no_stringop:
+      gcc_unreachable ();
+    case loop:
+      need_zero_guard = true;
+      size_needed = GET_MODE_SIZE (Pmode);
+      break;
+    case unrolled_loop:
+      need_zero_guard = true;
+      size_needed = GET_MODE_SIZE (Pmode) * 4;
+      break;
+    case rep_prefix_8_byte:
+      size_needed = 8;
+      break;
+    case rep_prefix_4_byte:
+      size_needed = 4;
+      break;
+    case rep_prefix_1_byte:
+      size_needed = 1;
+      break;
+    case loop_1_byte:
+      need_zero_guard = true;
+      size_needed = 1;
+      break;
+    }
+  epilogue_size_needed = size_needed;
+
+  /* Step 1: Prologue guard.  */
+
+  /* Alignment code needs count to be in register.  */
+  if (CONST_INT_P (count_exp) && desired_align > align)
+    {
+      if (INTVAL (count_exp) > desired_align
+	  && INTVAL (count_exp) > size_needed)
+	{
+	  align_bytes
+	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+	  if (align_bytes <= 0)
+	    align_bytes = 0;
+	  else
+	    align_bytes = desired_align - align_bytes;
+	}
+      if (align_bytes == 0)
+	{
+	  enum machine_mode mode = SImode;
+	  if (TARGET_64BIT && (count & ~0xffffffff))
+	    mode = DImode;
+	  count_exp = force_reg (mode, count_exp);
+	}
+    }
+  /* Do the cheap promotion to allow better CSE across the
+     main loop and epilogue (ie one load of the big constant in the
+     front of all code.  */
+  if (CONST_INT_P (val_exp))
+    promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
+						   desired_align, align);
+  /* Ensure that alignment prologue won't copy past end of block.  */
+  if (size_needed > 1 || (desired_align > 1 && desired_align > align))
+    {
+      epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
+      /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
+	 Make sure it is power of 2.  */
+      epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
+
+      /* To improve performance of small blocks, we jump around the VAL
+	 promoting mode.  This mean that if the promoted VAL is not constant,
+	 we might not use it in the epilogue and have to use byte
+	 loop variant.  */
+      if (epilogue_size_needed > 2 && !promoted_val)
+        force_loopy_epilogue = true;
+      if (count)
+	{
+	  if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+	    {
+	      /* If main algorithm works on QImode, no epilogue is needed.
+		 For small sizes just don't align anything.  */
+	      if (size_needed == 1)
+		desired_align = align;
+	      else
+		goto epilogue;
+	    }
+	}
+      else
+	{
+	  label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (epilogue_size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, label);
+	  if (expected_size == -1 || expected_size <= epilogue_size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	}
+    }
+  if (dynamic_check != -1)
+    {
+      rtx hot_label = gen_label_rtx ();
+      jump_around_label = gen_label_rtx ();
+      emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
+			       LEU, 0, counter_mode (count_exp), 1, hot_label);
+      predict_jump (REG_BR_PROB_BASE * 90 / 100);
+      set_storage_via_libcall (dst, count_exp, val_exp, false);
+      emit_jump (jump_around_label);
+      emit_label (hot_label);
+    }
+
+  /* Step 2: Alignment prologue.  */
+
+  /* Do the expensive promotion once we branched off the small blocks.  */
+  if (!promoted_val)
+    promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
+						   desired_align, align);
+  gcc_assert (desired_align >= 1 && align >= 1);
+
+  if (desired_align > align)
+    {
+      if (align_bytes == 0)
+	{
+	  /* Except for the first move in epilogue, we no longer know
+	     constant offset in aliasing info.  It don't seems to worth
+	     the pain to maintain it for the first move, so throw away
+	     the info early.  */
+	  dst = change_address (dst, BLKmode, destreg);
+	  expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
+				  desired_align);
+	}
+      else
+	{
+	  /* If we know how many bytes need to be stored before dst is
+	     sufficiently aligned, maintain aliasing info accurately.  */
+	  dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
+						 desired_align, align_bytes);
+	  count_exp = plus_constant (count_exp, -align_bytes);
+	  count -= align_bytes;
+	}
+      if (need_zero_guard
+	  && (count < (unsigned HOST_WIDE_INT) size_needed
+	      || (align_bytes == 0
+		  && count < ((unsigned HOST_WIDE_INT) size_needed
+			      + desired_align - align))))
+	{
+	  /* It is possible that we copied enough so the main loop will not
+	     execute.  */
+	  gcc_assert (size_needed > 1);
+	  if (label == NULL_RTX)
+	    label = gen_label_rtx ();
+	  emit_cmp_and_jump_insns (count_exp,
+				   GEN_INT (size_needed),
+				   LTU, 0, counter_mode (count_exp), 1, label);
+	  if (expected_size == -1
+	      || expected_size < (desired_align - align) / 2 + size_needed)
+	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
+	  else
+	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
+	}
+    }
+  if (label && size_needed == 1)
+    {
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+      label = NULL;
+      promoted_val = val_exp;
+      epilogue_size_needed = 1;
+    }
+  else if (label == NULL_RTX)
+    epilogue_size_needed = size_needed;
+
+  /* Step 3: Main loop.  */
+
+  switch (alg)
+    {
+    case libcall:
+    case no_stringop:
+      gcc_unreachable ();
+    case loop_1_byte:
+      expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
+				     count_exp, QImode, 1, expected_size);
+      break;
+    case loop:
+      expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
+				     count_exp, Pmode, 1, expected_size);
+      break;
+    case unrolled_loop:
+      expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
+				     count_exp, Pmode, 4, expected_size);
+      break;
+    case rep_prefix_8_byte:
+      expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
+				  DImode, val_exp);
+      break;
+    case rep_prefix_4_byte:
+      expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
+				  SImode, val_exp);
+      break;
+    case rep_prefix_1_byte:
+      expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
+				  QImode, val_exp);
+      break;
+    }
+  /* Adjust properly the offset of src and dest memory for aliasing.  */
+  if (CONST_INT_P (count_exp))
+    dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
+					(count / size_needed) * size_needed);
+  else
+    dst = change_address (dst, BLKmode, destreg);
+
+  /* Step 4: Epilogue to copy the remaining bytes.  */
+
+  if (label)
+    {
+      /* When the main loop is done, COUNT_EXP might hold original count,
+ 	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
+	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
+	 bytes. Compensate if needed.  */
+
+      if (size_needed < epilogue_size_needed)
+	{
+	  tmp =
+	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
+				 GEN_INT (size_needed - 1), count_exp, 1,
+				 OPTAB_DIRECT);
+	  if (tmp != count_exp)
+	    emit_move_insn (count_exp, tmp);
+	}
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+    }
+ epilogue:
+  if (count_exp != const0_rtx && epilogue_size_needed > 1)
+    {
+      if (force_loopy_epilogue)
+	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
+					 epilogue_size_needed);
+      else
+	expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
+				epilogue_size_needed);
+    }
+  if (jump_around_label)
+    emit_label (jump_around_label);
+  return true;
+}
+
+/* Expand the appropriate insns for doing strlen if not just doing
+   repnz; scasb
+
+   out = result, initialized with the start address
+   align_rtx = alignment of the address.
+   scratch = scratch register, initialized with the startaddress when
+	not aligned, otherwise undefined
+
+   This is just the body. It needs the initializations mentioned above and
+   some address computing at the end.  These things are done in i386.md.  */
+
+static void
+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
+{
+  int align;
+  rtx tmp;
+  rtx align_2_label = NULL_RTX;
+  rtx align_3_label = NULL_RTX;
+  rtx align_4_label = gen_label_rtx ();
+  rtx end_0_label = gen_label_rtx ();
+  rtx mem;
+  rtx tmpreg = gen_reg_rtx (SImode);
+  rtx scratch = gen_reg_rtx (SImode);
+  rtx cmp;
+
+  align = 0;
+  if (CONST_INT_P (align_rtx))
+    align = INTVAL (align_rtx);
+
+  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
+
+  /* Is there a known alignment and is it less than 4?  */
+  if (align < 4)
+    {
+      rtx scratch1 = gen_reg_rtx (Pmode);
+      emit_move_insn (scratch1, out);
+      /* Is there a known alignment and is it not 2? */
+      if (align != 2)
+	{
+	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
+	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
+
+	  /* Leave just the 3 lower bits.  */
+	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
+				    NULL_RTX, 0, OPTAB_WIDEN);
+
+	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+				   Pmode, 1, align_4_label);
+	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
+				   Pmode, 1, align_2_label);
+	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
+				   Pmode, 1, align_3_label);
+	}
+      else
+        {
+	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
+	     check if is aligned to 4 - byte.  */
+
+	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
+				    NULL_RTX, 0, OPTAB_WIDEN);
+
+	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+				   Pmode, 1, align_4_label);
+        }
+
+      mem = change_address (src, QImode, out);
+
+      /* Now compare the bytes.  */
+
+      /* Compare the first n unaligned byte on a byte per byte basis.  */
+      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
+			       QImode, 1, end_0_label);
+
+      /* Increment the address.  */
+      emit_insn (ix86_gen_add3 (out, out, const1_rtx));
+
+      /* Not needed with an alignment of 2 */
+      if (align != 2)
+	{
+	  emit_label (align_2_label);
+
+	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+				   end_0_label);
+
+	  emit_insn (ix86_gen_add3 (out, out, const1_rtx));
+
+	  emit_label (align_3_label);
+	}
+
+      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+			       end_0_label);
+
+      emit_insn (ix86_gen_add3 (out, out, const1_rtx));
+    }
+
+  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
+     align this loop.  It gives only huge programs, but does not help to
+     speed up.  */
+  emit_label (align_4_label);
+
+  mem = change_address (src, SImode, out);
+  emit_move_insn (scratch, mem);
+  emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
+
+  /* This formula yields a nonzero result iff one of the bytes is zero.
+     This saves three branches inside loop and many cycles.  */
+
+  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
+  emit_insn (gen_one_cmplsi2 (scratch, scratch));
+  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
+  emit_insn (gen_andsi3 (tmpreg, tmpreg,
+			 gen_int_mode (0x80808080, SImode)));
+  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
+			   align_4_label);
+
+  if (TARGET_CMOVE)
+    {
+       rtx reg = gen_reg_rtx (SImode);
+       rtx reg2 = gen_reg_rtx (Pmode);
+       emit_move_insn (reg, tmpreg);
+       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
+
+       /* If zero is not in the first two bytes, move two bytes forward.  */
+       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
+			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
+						     reg,
+						     tmpreg)));
+       /* Emit lea manually to avoid clobbering of flags.  */
+       emit_insn (gen_rtx_SET (SImode, reg2,
+			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
+
+       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+       emit_insn (gen_rtx_SET (VOIDmode, out,
+			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+						     reg2,
+						     out)));
+    }
+  else
+    {
+       rtx end_2_label = gen_label_rtx ();
+       /* Is zero in the first two bytes? */
+
+       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
+                            pc_rtx);
+       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+       JUMP_LABEL (tmp) = end_2_label;
+
+       /* Not in the first two.  Move two bytes forward.  */
+       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
+       emit_insn (ix86_gen_add3 (out, out, const2_rtx));
+
+       emit_label (end_2_label);
+
+    }
+
+  /* Avoid branch in fixing the byte.  */
+  tmpreg = gen_lowpart (QImode, tmpreg);
+  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+  tmp = gen_rtx_REG (CCmode, FLAGS_REG);
+  cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
+  emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
+
+  emit_label (end_0_label);
+}
+
+/* Expand strlen.  */
+
+bool
+ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
+{
+  rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+  /* The generic case of strlen expander is long.  Avoid it's
+     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
+
+  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+      && !TARGET_INLINE_ALL_STRINGOPS
+      && !optimize_insn_for_size_p ()
+      && (!CONST_INT_P (align) || INTVAL (align) < 4))
+    return false;
+
+  addr = force_reg (Pmode, XEXP (src, 0));
+  scratch1 = gen_reg_rtx (Pmode);
+
+  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+      && !optimize_insn_for_size_p ())
+    {
+      /* Well it seems that some optimizer does not combine a call like
+         foo(strlen(bar), strlen(bar));
+         when the move and the subtraction is done here.  It does calculate
+         the length just once when these instructions are done inside of
+         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
+         often used and I use one fewer register for the lifetime of
+         output_strlen_unroll() this is better.  */
+
+      emit_move_insn (out, addr);
+
+      ix86_expand_strlensi_unroll_1 (out, src, align);
+
+      /* strlensi_unroll_1 returns the address of the zero at the end of
+         the string, like memchr(), so compute the length by subtracting
+         the start address.  */
+      emit_insn (ix86_gen_sub3 (out, out, addr));
+    }
+  else
+    {
+      rtx unspec;
+
+      /* Can't use this if the user has appropriated eax, ecx, or edi.  */
+      if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
+        return false;
+
+      scratch2 = gen_reg_rtx (Pmode);
+      scratch3 = gen_reg_rtx (Pmode);
+      scratch4 = force_reg (Pmode, constm1_rtx);
+
+      emit_move_insn (scratch3, addr);
+      eoschar = force_reg (QImode, eoschar);
+
+      src = replace_equiv_address_nv (src, scratch3);
+
+      /* If .md starts supporting :P, this can be done in .md.  */
+      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
+						 scratch4), UNSPEC_SCAS);
+      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
+      emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
+      emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
+    }
+  return true;
+}
+
+/* For given symbol (function) construct code to compute address of it's PLT
+   entry in large x86-64 PIC model.  */
+rtx
+construct_plt_address (rtx symbol)
+{
+  rtx tmp = gen_reg_rtx (Pmode);
+  rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
+
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+  gcc_assert (ix86_cmodel == CM_LARGE_PIC);
+
+  emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
+  emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
+  return tmp;
+}
+
+rtx
+ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+		  rtx callarg2,
+		  rtx pop, int sibcall)
+{
+  rtx use = NULL, call;
+
+  if (pop == const0_rtx)
+    pop = NULL;
+  gcc_assert (!TARGET_64BIT || !pop);
+
+  if (TARGET_MACHO && !TARGET_64BIT)
+    {
+#if TARGET_MACHO
+      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+	fnaddr = machopic_indirect_call_target (fnaddr);
+#endif
+    }
+  else
+    {
+      /* Static functions and indirect calls don't need the pic register.  */
+      if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
+	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+	use_reg (&use, pic_offset_table_rtx);
+    }
+
+  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
+    {
+      rtx al = gen_rtx_REG (QImode, AX_REG);
+      emit_move_insn (al, callarg2);
+      use_reg (&use, al);
+    }
+
+  if (ix86_cmodel == CM_LARGE_PIC
+      && MEM_P (fnaddr)
+      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+      && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
+    fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
+  else if (sibcall
+	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
+	   : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
+    {
+      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+      fnaddr = gen_rtx_MEM (QImode, fnaddr);
+    }
+
+  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+  if (retval)
+    call = gen_rtx_SET (VOIDmode, retval, call);
+  if (pop)
+    {
+      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
+      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
+      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+    }
+  if (TARGET_64BIT
+      && ix86_cfun_abi () == MS_ABI
+      && (!callarg2 || INTVAL (callarg2) != -2))
+    {
+      /* We need to represent that SI and DI registers are clobbered
+	 by SYSV calls.  */
+      static int clobbered_registers[] = {
+	XMM6_REG, XMM7_REG, XMM8_REG,
+	XMM9_REG, XMM10_REG, XMM11_REG,
+	XMM12_REG, XMM13_REG, XMM14_REG,
+	XMM15_REG, SI_REG, DI_REG
+      };
+      unsigned int i;
+      rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
+      rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+      				   UNSPEC_MS_TO_SYSV_CALL);
+
+      vec[0] = call;
+      vec[1] = unspec;
+      for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+        vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+				      ? TImode : DImode,
+				      gen_rtx_REG
+				        (SSE_REGNO_P (clobbered_registers[i])
+						      ? TImode : DImode,
+					 clobbered_registers[i]));
+
+      call = gen_rtx_PARALLEL (VOIDmode,
+      			       gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
+			       + 2, vec));
+    }
+
+  /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration.  */
+  if (TARGET_VZEROUPPER)
+    {
+      rtx unspec;
+      int avx256;
+
+      if (cfun->machine->callee_pass_avx256_p)
+	{
+	  if (cfun->machine->callee_return_avx256_p)
+	    avx256 = callee_return_pass_avx256;
+	  else
+	    avx256 = callee_pass_avx256;
+	}
+      else if (cfun->machine->callee_return_avx256_p)
+	avx256 = callee_return_avx256;
+      else
+	avx256 = call_no_avx256;
+
+      if (reload_completed)
+	emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
+      else
+	{
+	  unspec = gen_rtx_UNSPEC (VOIDmode,
+				   gen_rtvec (1, GEN_INT (avx256)),
+				   UNSPEC_CALL_NEEDS_VZEROUPPER);
+	  call = gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (2, call, unspec));
+	}
+    }
+
+  call = emit_call_insn (call);
+  if (use)
+    CALL_INSN_FUNCTION_USAGE (call) = use;
+
+  return call;
+}
+
+void
+ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
+{
+  rtx call = XVECEXP (PATTERN (insn), 0, 0);
+  emit_insn (gen_avx_vzeroupper (vzeroupper));
+  emit_call_insn (call);
+}
+
+/* Output the assembly for a call instruction.  */
+
+const char *
+ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
+{
+  bool direct_p = constant_call_address_operand (call_op, Pmode);
+  bool seh_nop_p = false;
+
+  gcc_assert (addr_op == 0 || addr_op == 1);
+
+  if (SIBLING_CALL_P (insn))
+    {
+      if (direct_p)
+	return addr_op ? "jmp\t%P1" : "jmp\t%P0";
+      /* SEH epilogue detection requires the indirect branch case
+	 to include REX.W.  */
+      else if (TARGET_SEH)
+	return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
+      else
+	return addr_op ? "jmp\t%A1" : "jmp\t%A0";
+    }
+
+  /* SEH unwinding can require an extra nop to be emitted in several
+     circumstances.  Determine if we have one of those.  */
+  if (TARGET_SEH)
+    {
+      rtx i;
+
+      for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
+	{
+	  /* If we get to another real insn, we don't need the nop.  */
+	  if (INSN_P (i))
+	    break;
+
+	  /* If we get to the epilogue note, prevent a catch region from
+	     being adjacent to the standard epilogue sequence.  If non-
+	     call-exceptions, we'll have done this during epilogue emission. */
+	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
+	      && !flag_non_call_exceptions
+	      && !can_throw_internal (insn))
+	    {
+	      seh_nop_p = true;
+	      break;
+	    }
+	}
+
+      /* If we didn't find a real insn following the call, prevent the
+	 unwinder from looking into the next function.  */
+      if (i == NULL)
+	seh_nop_p = true;
+    }
+
+  if (direct_p)
+    {
+      if (seh_nop_p)
+	return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
+      else
+	return addr_op ? "call\t%P1" : "call\t%P0";
+    }
+  else
+    {
+      if (seh_nop_p)
+	return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
+      else
+	return addr_op ? "call\t%A1" : "call\t%A0";
+    }
+}
+
+/* Clear stack slot assignments remembered from previous functions.
+   This is called from INIT_EXPANDERS once before RTL is emitted for each
+   function.  */
+
+static struct machine_function *
+ix86_init_machine_status (void)
+{
+  struct machine_function *f;
+
+  f = ggc_alloc_cleared_machine_function ();
+  f->use_fast_prologue_epilogue_nregs = -1;
+  f->tls_descriptor_call_expanded_p = 0;
+  f->call_abi = ix86_abi;
+
+  return f;
+}
+
+/* Return a MEM corresponding to a stack slot with mode MODE.
+   Allocate a new slot if necessary.
+
+   The RTL for a function can have several slots available: N is
+   which slot to use.  */
+
+rtx
+assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
+{
+  struct stack_local_entry *s;
+
+  gcc_assert (n < MAX_386_STACK_LOCALS);
+
+  /* Virtual slot is valid only before vregs are instantiated.  */
+  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
+
+  for (s = ix86_stack_locals; s; s = s->next)
+    if (s->mode == mode && s->n == n)
+      return validize_mem (copy_rtx (s->rtl));
+
+  s = ggc_alloc_stack_local_entry ();
+  s->n = n;
+  s->mode = mode;
+  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+
+  s->next = ix86_stack_locals;
+  ix86_stack_locals = s;
+  return validize_mem (s->rtl);
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function.  */
+
+static GTY(()) rtx ix86_tls_symbol;
+rtx
+ix86_tls_get_addr (void)
+{
+
+  if (!ix86_tls_symbol)
+    {
+      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
+					    (TARGET_ANY_GNU_TLS
+					     && !TARGET_64BIT)
+					    ? "___tls_get_addr"
+					    : "__tls_get_addr");
+    }
+
+  return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+rtx
+ix86_tls_module_base (void)
+{
+
+  if (!ix86_tls_module_base_symbol)
+    {
+      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
+							"_TLS_MODULE_BASE_");
+      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+    }
+
+  return ix86_tls_module_base_symbol;
+}
+
+/* Calculate the length of the memory address in the instruction
+   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
+
+int
+memory_address_length (rtx addr)
+{
+  struct ix86_address parts;
+  rtx base, index, disp;
+  int len;
+  int ok;
+
+  if (GET_CODE (addr) == PRE_DEC
+      || GET_CODE (addr) == POST_INC
+      || GET_CODE (addr) == PRE_MODIFY
+      || GET_CODE (addr) == POST_MODIFY)
+    return 0;
+
+  ok = ix86_decompose_address (addr, &parts);
+  gcc_assert (ok);
+
+  if (parts.base && GET_CODE (parts.base) == SUBREG)
+    parts.base = SUBREG_REG (parts.base);
+  if (parts.index && GET_CODE (parts.index) == SUBREG)
+    parts.index = SUBREG_REG (parts.index);
+
+  base = parts.base;
+  index = parts.index;
+  disp = parts.disp;
+  len = 0;
+
+  /* Rule of thumb:
+       - esp as the base always wants an index,
+       - ebp as the base always wants a displacement,
+       - r12 as the base always wants an index,
+       - r13 as the base always wants a displacement.  */
+
+  /* Register Indirect.  */
+  if (base && !index && !disp)
+    {
+      /* esp (for its index) and ebp (for its displacement) need
+	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
+	 code.  */
+      if (REG_P (addr)
+	  && (addr == arg_pointer_rtx
+	      || addr == frame_pointer_rtx
+	      || REGNO (addr) == SP_REG
+	      || REGNO (addr) == BP_REG
+	      || REGNO (addr) == R12_REG
+	      || REGNO (addr) == R13_REG))
+	len = 1;
+    }
+
+  /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
+     is not disp32, but disp32(%rip), so for disp32
+     SIB byte is needed, unless print_operand_address
+     optimizes it into disp32(%rip) or (%rip) is implied
+     by UNSPEC.  */
+  else if (disp && !base && !index)
+    {
+      len = 4;
+      if (TARGET_64BIT)
+	{
+	  rtx symbol = disp;
+
+	  if (GET_CODE (disp) == CONST)
+	    symbol = XEXP (disp, 0);
+	  if (GET_CODE (symbol) == PLUS
+	      && CONST_INT_P (XEXP (symbol, 1)))
+	    symbol = XEXP (symbol, 0);
+
+	  if (GET_CODE (symbol) != LABEL_REF
+	      && (GET_CODE (symbol) != SYMBOL_REF
+		  || SYMBOL_REF_TLS_MODEL (symbol) != 0)
+	      && (GET_CODE (symbol) != UNSPEC
+		  || (XINT (symbol, 1) != UNSPEC_GOTPCREL
+		      && XINT (symbol, 1) != UNSPEC_PCREL
+		      && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
+	    len += 1;
+	}
+    }
+
+  else
+    {
+      /* Find the length of the displacement constant.  */
+      if (disp)
+	{
+	  if (base && satisfies_constraint_K (disp))
+	    len = 1;
+	  else
+	    len = 4;
+	}
+      /* ebp always wants a displacement.  Similarly r13.  */
+      else if (base && REG_P (base)
+	       && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
+	len = 1;
+
+      /* An index requires the two-byte modrm form....  */
+      if (index
+	  /* ...like esp (or r12), which always wants an index.  */
+	  || base == arg_pointer_rtx
+	  || base == frame_pointer_rtx
+	  || (base && REG_P (base)
+	      && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
+	len += 1;
+    }
+
+  switch (parts.seg)
+    {
+    case SEG_FS:
+    case SEG_GS:
+      len += 1;
+      break;
+    default:
+      break;
+    }
+
+  return len;
+}
+
+/* Compute default value for "length_immediate" attribute.  When SHORTFORM
+   is set, expect that insn have 8bit immediate alternative.  */
+int
+ix86_attr_length_immediate_default (rtx insn, int shortform)
+{
+  int len = 0;
+  int i;
+  extract_insn_cached (insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (CONSTANT_P (recog_data.operand[i]))
+      {
+        enum attr_mode mode = get_attr_mode (insn);
+
+	gcc_assert (!len);
+	if (shortform && CONST_INT_P (recog_data.operand[i]))
+	  {
+	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
+	    switch (mode)
+	      {
+	      case MODE_QI:
+		len = 1;
+		continue;
+	      case MODE_HI:
+		ival = trunc_int_for_mode (ival, HImode);
+		break;
+	      case MODE_SI:
+		ival = trunc_int_for_mode (ival, SImode);
+		break;
+	      default:
+		break;
+	      }
+	    if (IN_RANGE (ival, -128, 127))
+	      {
+		len = 1;
+		continue;
+	      }
+	  }
+	switch (mode)
+	  {
+	  case MODE_QI:
+	    len = 1;
+	    break;
+	  case MODE_HI:
+	    len = 2;
+	    break;
+	  case MODE_SI:
+	    len = 4;
+	    break;
+	  /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
+	  case MODE_DI:
+	    len = 4;
+	    break;
+	  default:
+	    fatal_insn ("unknown insn mode", insn);
+	}
+      }
+  return len;
+}
+/* Compute default value for "length_address" attribute.  */
+int
+ix86_attr_length_address_default (rtx insn)
+{
+  int i;
+
+  if (get_attr_type (insn) == TYPE_LEA)
+    {
+      rtx set = PATTERN (insn), addr;
+
+      if (GET_CODE (set) == PARALLEL)
+	set = XVECEXP (set, 0, 0);
+
+      gcc_assert (GET_CODE (set) == SET);
+
+      addr = SET_SRC (set);
+      if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
+	{
+	  if (GET_CODE (addr) == ZERO_EXTEND)
+	    addr = XEXP (addr, 0);
+	  if (GET_CODE (addr) == SUBREG)
+	    addr = SUBREG_REG (addr);
+	}
+
+      return memory_address_length (addr);
+    }
+
+  extract_insn_cached (insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+        constrain_operands_cached (reload_completed);
+        if (which_alternative != -1)
+	  {
+	    const char *constraints = recog_data.constraints[i];
+	    int alt = which_alternative;
+
+	    while (*constraints == '=' || *constraints == '+')
+	      constraints++;
+	    while (alt-- > 0)
+	      while (*constraints++ != ',')
+		;
+	    /* Skip ignored operands.  */
+	    if (*constraints == 'X')
+	      continue;
+	  }
+	return memory_address_length (XEXP (recog_data.operand[i], 0));
+      }
+  return 0;
+}
+
+/* Compute default value for "length_vex" attribute. It includes
+   2 or 3 byte VEX prefix and 1 opcode byte.  */
+
+int
+ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
+			      int has_vex_w)
+{
+  int i;
+
+  /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
+     byte VEX prefix.  */
+  if (!has_0f_opcode || has_vex_w)
+    return 3 + 1;
+
+ /* We can always use 2 byte VEX prefix in 32bit.  */
+  if (!TARGET_64BIT)
+    return 2 + 1;
+
+  extract_insn_cached (insn);
+
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (REG_P (recog_data.operand[i]))
+      {
+	/* REX.W bit uses 3 byte VEX prefix.  */
+	if (GET_MODE (recog_data.operand[i]) == DImode
+	    && GENERAL_REG_P (recog_data.operand[i]))
+	  return 3 + 1;
+      }
+    else
+      {
+	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
+	if (MEM_P (recog_data.operand[i])
+	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
+	  return 3 + 1;
+      }
+
+  return 2 + 1;
+}
+
+/* Return the maximum number of instructions a cpu can issue.  */
+
+static int
+ix86_issue_rate (void)
+{
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+    case PROCESSOR_ATOM:
+    case PROCESSOR_K6:
+      return 2;
+
+    case PROCESSOR_PENTIUMPRO:
+    case PROCESSOR_PENTIUM4:
+    case PROCESSOR_CORE2_32:
+    case PROCESSOR_CORE2_64:
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_NOCONA:
+    case PROCESSOR_GENERIC32:
+    case PROCESSOR_GENERIC64:
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BTVER1:
+      return 3;
+
+    default:
+      return 1;
+    }
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
+   by DEP_INSN and nothing set by DEP_INSN.  */
+
+static int
+ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+  rtx set, set2;
+
+  /* Simplify the test for uninteresting insns.  */
+  if (insn_type != TYPE_SETCC
+      && insn_type != TYPE_ICMOV
+      && insn_type != TYPE_FCMOV
+      && insn_type != TYPE_IBR)
+    return 0;
+
+  if ((set = single_set (dep_insn)) != 0)
+    {
+      set = SET_DEST (set);
+      set2 = NULL_RTX;
+    }
+  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
+	   && XVECLEN (PATTERN (dep_insn), 0) == 2
+	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
+	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
+    {
+      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+    }
+  else
+    return 0;
+
+  if (!REG_P (set) || REGNO (set) != FLAGS_REG)
+    return 0;
+
+  /* This test is true if the dependent insn reads the flags but
+     not any other potentially set register.  */
+  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
+    return 0;
+
+  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
+    return 0;
+
+  return 1;
+}
+
+/* Return true iff USE_INSN has a memory address with operands set by
+   SET_INSN.  */
+
+bool
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
+{
+  int i;
+  extract_insn_cached (use_insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+	rtx addr = XEXP (recog_data.operand[i], 0);
+	return modified_in_p (addr, set_insn) != 0;
+      }
+  return false;
+}
+
+static int
+ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type, dep_insn_type;
+  enum attr_memory memory;
+  rtx set, set2;
+  int dep_insn_code_number;
+
+  /* Anti and output dependencies have zero cost on all CPUs.  */
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+
+  dep_insn_code_number = recog_memoized (dep_insn);
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+    return cost;
+
+  insn_type = get_attr_type (insn);
+  dep_insn_type = get_attr_type (dep_insn);
+
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+      /* Address Generation Interlock adds a cycle of latency.  */
+      if (insn_type == TYPE_LEA)
+	{
+	  rtx addr = PATTERN (insn);
+
+	  if (GET_CODE (addr) == PARALLEL)
+	    addr = XVECEXP (addr, 0, 0);
+
+	  gcc_assert (GET_CODE (addr) == SET);
+
+	  addr = SET_SRC (addr);
+	  if (modified_in_p (addr, dep_insn))
+	    cost += 1;
+	}
+      else if (ix86_agi_dependent (dep_insn, insn))
+	cost += 1;
+
+      /* ??? Compares pair with jump/setcc.  */
+      if (ix86_flags_dependent (insn, dep_insn, insn_type))
+	cost = 0;
+
+      /* Floating point stores require value to be ready one cycle earlier.  */
+      if (insn_type == TYPE_FMOV
+	  && get_attr_memory (insn) == MEMORY_STORE
+	  && !ix86_agi_dependent (dep_insn, insn))
+	cost += 1;
+      break;
+
+    case PROCESSOR_PENTIUMPRO:
+      memory = get_attr_memory (insn);
+
+      /* INT->FP conversion is expensive.  */
+      if (get_attr_fp_int_src (dep_insn))
+	cost += 5;
+
+      /* There is one cycle extra latency between an FP op and a store.  */
+      if (insn_type == TYPE_FMOV
+	  && (set = single_set (dep_insn)) != NULL_RTX
+	  && (set2 = single_set (insn)) != NULL_RTX
+	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
+	  && MEM_P (SET_DEST (set2)))
+	cost += 1;
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  /* Claim moves to take one cycle, as core can issue one load
+	     at time and the next load can start cycle later.  */
+	  if (dep_insn_type == TYPE_IMOV
+	      || dep_insn_type == TYPE_FMOV)
+	    cost = 1;
+	  else if (cost > 1)
+	    cost--;
+	}
+      break;
+
+    case PROCESSOR_K6:
+      memory = get_attr_memory (insn);
+
+      /* The esp dependency is resolved before the instruction is really
+         finished.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 1;
+
+      /* INT->FP conversion is expensive.  */
+      if (get_attr_fp_int_src (dep_insn))
+	cost += 5;
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  /* Claim moves to take one cycle, as core can issue one load
+	     at time and the next load can start cycle later.  */
+	  if (dep_insn_type == TYPE_IMOV
+	      || dep_insn_type == TYPE_FMOV)
+	    cost = 1;
+	  else if (cost > 2)
+	    cost -= 2;
+	  else
+	    cost = 1;
+	}
+      break;
+
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BTVER1:
+    case PROCESSOR_ATOM:
+    case PROCESSOR_GENERIC32:
+    case PROCESSOR_GENERIC64:
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  enum attr_unit unit = get_attr_unit (insn);
+	  int loadcost = 3;
+
+	  /* Because of the difference between the length of integer and
+	     floating unit pipeline preparation stages, the memory operands
+	     for floating point are cheaper.
+
+	     ??? For Athlon it the difference is most probably 2.  */
+	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+	    loadcost = 3;
+	  else
+	    loadcost = TARGET_ATHLON ? 2 : 0;
+
+	  if (cost >= loadcost)
+	    cost -= loadcost;
+	  else
+	    cost = 0;
+	}
+
+    default:
+      break;
+    }
+
+  return cost;
+}
+
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.  */
+
+static int
+ia32_multipass_dfa_lookahead (void)
+{
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+      return 2;
+
+    case PROCESSOR_PENTIUMPRO:
+    case PROCESSOR_K6:
+      return 1;
+
+    case PROCESSOR_CORE2_32:
+    case PROCESSOR_CORE2_64:
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+      /* Generally, we want haifa-sched:max_issue() to look ahead as far
+	 as many instructions can be executed on a cycle, i.e.,
+	 issue_rate.  I wonder why tuning for many CPUs does not do this.  */
+      return ix86_issue_rate ();
+
+    default:
+      return 0;
+    }
+}
+
+
+
+/* Model decoder of Core 2/i7.
+   Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
+   track the instruction fetch block boundaries and make sure that long
+   (9+ bytes) instructions are assigned to D0.  */
+
+/* Maximum length of an insn that can be handled by
+   a secondary decoder unit.  '8' for Core 2/i7.  */
+static int core2i7_secondary_decoder_max_insn_size;
+
+/* Ifetch block size, i.e., number of bytes decoder reads per cycle.
+   '16' for Core 2/i7.  */
+static int core2i7_ifetch_block_size;
+
+/* Maximum number of instructions decoder can handle per cycle.
+   '6' for Core 2/i7.  */
+static int core2i7_ifetch_block_max_insns;
+
+typedef struct ix86_first_cycle_multipass_data_ *
+  ix86_first_cycle_multipass_data_t;
+typedef const struct ix86_first_cycle_multipass_data_ *
+  const_ix86_first_cycle_multipass_data_t;
+
+/* A variable to store target state across calls to max_issue within
+   one cycle.  */
+static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
+  *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
+
+/* Initialize DATA.  */
+static void
+core2i7_first_cycle_multipass_init (void *_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+
+  data->ifetch_block_len = 0;
+  data->ifetch_block_n_insns = 0;
+  data->ready_try_change = NULL;
+  data->ready_try_change_size = 0;
+}
+
+/* Advancing the cycle; reset ifetch block counts.  */
+static void
+core2i7_dfa_post_advance_cycle (void)
+{
+  ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
+
+  gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
+
+  data->ifetch_block_len = 0;
+  data->ifetch_block_n_insns = 0;
+}
+
+static int min_insn_size (rtx);
+
+/* Filter out insns from ready_try that the core will not be able to issue
+   on current cycle due to decoder.  */
+static void
+core2i7_first_cycle_multipass_filter_ready_try
+(const_ix86_first_cycle_multipass_data_t data,
+ char *ready_try, int n_ready, bool first_cycle_insn_p)
+{
+  while (n_ready--)
+    {
+      rtx insn;
+      int insn_size;
+
+      if (ready_try[n_ready])
+	continue;
+
+      insn = get_ready_element (n_ready);
+      insn_size = min_insn_size (insn);
+
+      if (/* If this is a too long an insn for a secondary decoder ...  */
+	  (!first_cycle_insn_p
+	   && insn_size > core2i7_secondary_decoder_max_insn_size)
+	  /* ... or it would not fit into the ifetch block ...  */
+	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
+	  /* ... or the decoder is full already ...  */
+	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
+	/* ... mask the insn out.  */
+	{
+	  ready_try[n_ready] = 1;
+
+	  if (data->ready_try_change)
+	    SET_BIT (data->ready_try_change, n_ready);
+	}
+    }
+}
+
+/* Prepare for a new round of multipass lookahead scheduling.  */
+static void
+core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
+				     bool first_cycle_insn_p)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+  const_ix86_first_cycle_multipass_data_t prev_data
+    = ix86_first_cycle_multipass_data;
+
+  /* Restore the state from the end of the previous round.  */
+  data->ifetch_block_len = prev_data->ifetch_block_len;
+  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
+
+  /* Filter instructions that cannot be issued on current cycle due to
+     decoder restrictions.  */
+  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
+						  first_cycle_insn_p);
+}
+
+/* INSN is being issued in current solution.  Account for its impact on
+   the decoder model.  */
+static void
+core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
+				     rtx insn, const void *_prev_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+  const_ix86_first_cycle_multipass_data_t prev_data
+    = (const_ix86_first_cycle_multipass_data_t) _prev_data;
+
+  int insn_size = min_insn_size (insn);
+
+  data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
+  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
+  gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
+	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
+
+  /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
+  if (!data->ready_try_change)
+    {
+      data->ready_try_change = sbitmap_alloc (n_ready);
+      data->ready_try_change_size = n_ready;
+    }
+  else if (data->ready_try_change_size < n_ready)
+    {
+      data->ready_try_change = sbitmap_resize (data->ready_try_change,
+					       n_ready, 0);
+      data->ready_try_change_size = n_ready;
+    }
+  sbitmap_zero (data->ready_try_change);
+
+  /* Filter out insns from ready_try that the core will not be able to issue
+     on current cycle due to decoder.  */
+  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
+						  false);
+}
+
+/* Revert the effect on ready_try.  */
+static void
+core2i7_first_cycle_multipass_backtrack (const void *_data,
+					 char *ready_try,
+					 int n_ready ATTRIBUTE_UNUSED)
+{
+  const_ix86_first_cycle_multipass_data_t data
+    = (const_ix86_first_cycle_multipass_data_t) _data;
+  unsigned int i = 0;
+  sbitmap_iterator sbi;
+
+  gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
+  EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
+    {
+      ready_try[i] = 0;
+    }
+}
+
+/* Save the result of multipass lookahead scheduling for the next round.  */
+static void
+core2i7_first_cycle_multipass_end (const void *_data)
+{
+  const_ix86_first_cycle_multipass_data_t data
+    = (const_ix86_first_cycle_multipass_data_t) _data;
+  ix86_first_cycle_multipass_data_t next_data
+    = ix86_first_cycle_multipass_data;
+
+  if (data != NULL)
+    {
+      next_data->ifetch_block_len = data->ifetch_block_len;
+      next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
+    }
+}
+
+/* Deallocate target data.  */
+static void
+core2i7_first_cycle_multipass_fini (void *_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+
+  if (data->ready_try_change)
+    {
+      sbitmap_free (data->ready_try_change);
+      data->ready_try_change = NULL;
+      data->ready_try_change_size = 0;
+    }
+}
+
+/* Prepare for scheduling pass.  */
+static void
+ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+			int verbose ATTRIBUTE_UNUSED,
+			int max_uid ATTRIBUTE_UNUSED)
+{
+  /* Install scheduling hooks for current CPU.  Some of these hooks are used
+     in time-critical parts of the scheduler, so we only set them up when
+     they are actually used.  */
+  switch (ix86_tune)
+    {
+    case PROCESSOR_CORE2_32:
+    case PROCESSOR_CORE2_64:
+    case PROCESSOR_COREI7_32:
+    case PROCESSOR_COREI7_64:
+      targetm.sched.dfa_post_advance_cycle
+	= core2i7_dfa_post_advance_cycle;
+      targetm.sched.first_cycle_multipass_init
+	= core2i7_first_cycle_multipass_init;
+      targetm.sched.first_cycle_multipass_begin
+	= core2i7_first_cycle_multipass_begin;
+      targetm.sched.first_cycle_multipass_issue
+	= core2i7_first_cycle_multipass_issue;
+      targetm.sched.first_cycle_multipass_backtrack
+	= core2i7_first_cycle_multipass_backtrack;
+      targetm.sched.first_cycle_multipass_end
+	= core2i7_first_cycle_multipass_end;
+      targetm.sched.first_cycle_multipass_fini
+	= core2i7_first_cycle_multipass_fini;
+
+      /* Set decoder parameters.  */
+      core2i7_secondary_decoder_max_insn_size = 8;
+      core2i7_ifetch_block_size = 16;
+      core2i7_ifetch_block_max_insns = 6;
+      break;
+
+    default:
+      targetm.sched.dfa_post_advance_cycle = NULL;
+      targetm.sched.first_cycle_multipass_init = NULL;
+      targetm.sched.first_cycle_multipass_begin = NULL;
+      targetm.sched.first_cycle_multipass_issue = NULL;
+      targetm.sched.first_cycle_multipass_backtrack = NULL;
+      targetm.sched.first_cycle_multipass_end = NULL;
+      targetm.sched.first_cycle_multipass_fini = NULL;
+      break;
+    }
+}
+
+
+/* Compute the alignment given to a constant that is being placed in memory.
+   EXP is the constant and ALIGN is the alignment that the object would
+   ordinarily have.
+   The value of this function is used instead of that alignment to align
+   the object.  */
+
+int
+ix86_constant_alignment (tree exp, int align)
+{
+  if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+      || TREE_CODE (exp) == INTEGER_CST)
+    {
+      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
+	return 64;
+      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
+	return 128;
+    }
+  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
+	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
+    return BITS_PER_WORD;
+
+  return align;
+}
+
+/* Compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this function is used
+   instead of that alignment to align the object.  */
+
+int
+ix86_data_alignment (tree type, int align)
+{
+  int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+
+  if (AGGREGATE_TYPE_P (type)
+      && TYPE_SIZE (type)
+      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
+	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
+      && align < max_align)
+    align = max_align;
+
+  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+     to 16byte boundary.  */
+  if (TARGET_64BIT)
+    {
+      if (AGGREGATE_TYPE_P (type)
+	   && TYPE_SIZE (type)
+	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+	return 128;
+    }
+
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+
+      if (TYPE_MODE (type) == DCmode && align < 64)
+	return 64;
+      if ((TYPE_MODE (type) == XCmode
+	   || TYPE_MODE (type) == TCmode) && align < 128)
+	return 128;
+    }
+  else if ((TREE_CODE (type) == RECORD_TYPE
+	    || TREE_CODE (type) == UNION_TYPE
+	    || TREE_CODE (type) == QUAL_UNION_TYPE)
+	   && TYPE_FIELDS (type))
+    {
+      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+	   || TREE_CODE (type) == INTEGER_TYPE)
+    {
+      if (TYPE_MODE (type) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+	return 128;
+    }
+
+  return align;
+}
+
+/* Compute the alignment for a local variable or a stack slot.  EXP is
+   the data type or decl itself, MODE is the widest mode available and
+   ALIGN is the alignment that the object would ordinarily have.  The
+   value of this macro is used instead of that alignment to align the
+   object.  */
+
+unsigned int
+ix86_local_alignment (tree exp, enum machine_mode mode,
+		      unsigned int align)
+{
+  tree type, decl;
+
+  if (exp && DECL_P (exp))
+    {
+      type = TREE_TYPE (exp);
+      decl = exp;
+    }
+  else
+    {
+      type = exp;
+      decl = NULL;
+    }
+
+  /* Don't do dynamic stack realignment for long long objects with
+     -mpreferred-stack-boundary=2.  */
+  if (!TARGET_64BIT
+      && align == 64
+      && ix86_preferred_stack_boundary < 64
+      && (mode == DImode || (type && TYPE_MODE (type) == DImode))
+      && (!type || !TYPE_USER_ALIGN (type))
+      && (!decl || !DECL_USER_ALIGN (decl)))
+    align = 32;
+
+  /* If TYPE is NULL, we are allocating a stack slot for caller-save
+     register in MODE.  We will return the largest alignment of XF
+     and DF.  */
+  if (!type)
+    {
+      if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
+	align = GET_MODE_ALIGNMENT (DFmode);
+      return align;
+    }
+
+  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+     to 16byte boundary.  Exact wording is:
+
+     An array uses the same alignment as its elements, except that a local or
+     global array variable of length at least 16 bytes or
+     a C99 variable-length array variable always has alignment of at least 16 bytes.
+
+     This was added to allow use of aligned SSE instructions at arrays.  This
+     rule is meant for static storage (where compiler can not do the analysis
+     by itself).  We follow it for automatic variables only when convenient.
+     We fully control everything in the function compiled and functions from
+     other unit can not rely on the alignment.
+
+     Exclude va_list type.  It is the common case of local array where
+     we can not benefit from the alignment.  */
+  if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
+      && TARGET_SSE)
+    {
+      if (AGGREGATE_TYPE_P (type)
+	   && (va_list_type_node == NULL_TREE
+	       || (TYPE_MAIN_VARIANT (type)
+		   != TYPE_MAIN_VARIANT (va_list_type_node)))
+	   && TYPE_SIZE (type)
+	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
+	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+	return 128;
+    }
+  if (TREE_CODE (type) == ARRAY_TYPE)
+    {
+      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      if (TYPE_MODE (type) == DCmode && align < 64)
+	return 64;
+      if ((TYPE_MODE (type) == XCmode
+	   || TYPE_MODE (type) == TCmode) && align < 128)
+	return 128;
+    }
+  else if ((TREE_CODE (type) == RECORD_TYPE
+	    || TREE_CODE (type) == UNION_TYPE
+	    || TREE_CODE (type) == QUAL_UNION_TYPE)
+	   && TYPE_FIELDS (type))
+    {
+      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+	return 128;
+    }
+  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+	   || TREE_CODE (type) == INTEGER_TYPE)
+    {
+
+      if (TYPE_MODE (type) == DFmode && align < 64)
+	return 64;
+      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+	return 128;
+    }
+  return align;
+}
+
+/* Compute the minimum required alignment for dynamic stack realignment
+   purposes for a local variable, parameter or a stack slot.  EXP is
+   the data type or decl itself, MODE is its mode and ALIGN is the
+   alignment that the object would ordinarily have.  */
+
+unsigned int
+ix86_minimum_alignment (tree exp, enum machine_mode mode,
+			unsigned int align)
+{
+  tree type, decl;
+
+  if (exp && DECL_P (exp))
+    {
+      type = TREE_TYPE (exp);
+      decl = exp;
+    }
+  else
+    {
+      type = exp;
+      decl = NULL;
+    }
+
+  if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
+    return align;
+
+  /* Don't do dynamic stack realignment for long long objects with
+     -mpreferred-stack-boundary=2.  */
+  if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
+      && (!type || !TYPE_USER_ALIGN (type))
+      && (!decl || !DECL_USER_ALIGN (decl)))
+    return 32;
+
+  return align;
+}
+
+/* Find a location for the static chain incoming to a nested function.
+   This is a register, unless all free registers are used by arguments.  */
+
+static rtx
+ix86_static_chain (const_tree fndecl, bool incoming_p)
+{
+  unsigned regno;
+
+  if (!DECL_STATIC_CHAIN (fndecl))
+    return NULL;
+
+  if (TARGET_64BIT)
+    {
+      /* We always use R10 in 64-bit mode.  */
+      regno = R10_REG;
+    }
+  else
+    {
+      tree fntype;
+      /* By default in 32-bit mode we use ECX to pass the static chain.  */
+      regno = CX_REG;
+
+      fntype = TREE_TYPE (fndecl);
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+	{
+	  /* Fastcall functions use ecx/edx for arguments, which leaves
+	     us with EAX for the static chain.  */
+	  regno = AX_REG;
+	}
+      else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
+	{
+	  /* Thiscall functions use ecx for arguments, which leaves
+	     us with EAX for the static chain.  */
+	  regno = AX_REG;
+	}
+      else if (ix86_function_regparm (fntype, fndecl) == 3)
+	{
+	  /* For regparm 3, we have no free call-clobbered registers in
+	     which to store the static chain.  In order to implement this,
+	     we have the trampoline push the static chain to the stack.
+	     However, we can't push a value below the return address when
+	     we call the nested function directly, so we have to use an
+	     alternate entry point.  For this we use ESI, and have the
+	     alternate entry point push ESI, so that things appear the
+	     same once we're executing the nested function.  */
+	  if (incoming_p)
+	    {
+	      if (fndecl == current_function_decl)
+		ix86_static_chain_on_stack = true;
+	      return gen_frame_mem (SImode,
+				    plus_constant (arg_pointer_rtx, -8));
+	    }
+	  regno = SI_REG;
+	}
+    }
+
+  return gen_rtx_REG (Pmode, regno);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNDECL is the decl of the target address; M_TRAMP is a MEM for
+   the trampoline, and CHAIN_VALUE is an RTX for the static chain
+   to be passed to the target function.  */
+
+static void
+ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, fnaddr;
+
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  if (!TARGET_64BIT)
+    {
+      rtx disp, chain;
+      int opcode;
+
+      /* Depending on the static chain location, either load a register
+	 with a constant, or push the constant to the stack.  All of the
+	 instructions are the same size.  */
+      chain = ix86_static_chain (fndecl, true);
+      if (REG_P (chain))
+	{
+	  if (REGNO (chain) == CX_REG)
+	    opcode = 0xb9;
+	  else if (REGNO (chain) == AX_REG)
+	    opcode = 0xb8;
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	opcode = 0x68;
+
+      mem = adjust_address (m_tramp, QImode, 0);
+      emit_move_insn (mem, gen_int_mode (opcode, QImode));
+
+      mem = adjust_address (m_tramp, SImode, 1);
+      emit_move_insn (mem, chain_value);
+
+      /* Compute offset from the end of the jmp to the target function.
+	 In the case in which the trampoline stores the static chain on
+	 the stack, we need to skip the first insn which pushes the
+	 (call-saved) register static chain; this push is 1 byte.  */
+      disp = expand_binop (SImode, sub_optab, fnaddr,
+			   plus_constant (XEXP (m_tramp, 0),
+					  MEM_P (chain) ? 9 : 10),
+			   NULL_RTX, 1, OPTAB_DIRECT);
+
+      mem = adjust_address (m_tramp, QImode, 5);
+      emit_move_insn (mem, gen_int_mode (0xe9, QImode));
+
+      mem = adjust_address (m_tramp, SImode, 6);
+      emit_move_insn (mem, disp);
+    }
+  else
+    {
+      int offset = 0;
+
+      /* Load the function address to r11.  Try to load address using
+	 the shorter movl instead of movabs.  We may want to support
+	 movq for kernel mode, but kernel does not use trampolines at
+	 the moment.  */
+      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
+	{
+	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
+
+	  mem = adjust_address (m_tramp, HImode, offset);
+	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
+
+	  mem = adjust_address (m_tramp, SImode, offset + 2);
+	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
+	  offset += 6;
+	}
+      else
+	{
+	  mem = adjust_address (m_tramp, HImode, offset);
+	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
+
+	  mem = adjust_address (m_tramp, DImode, offset + 2);
+	  emit_move_insn (mem, fnaddr);
+	  offset += 10;
+	}
+
+      /* Load static chain using movabs to r10.  */
+      mem = adjust_address (m_tramp, HImode, offset);
+      emit_move_insn (mem, gen_int_mode (0xba49, HImode));
+
+      mem = adjust_address (m_tramp, DImode, offset + 2);
+      emit_move_insn (mem, chain_value);
+      offset += 10;
+
+      /* Jump to r11; the last (unused) byte is a nop, only there to
+	 pad the write out to a single 32-bit store.  */
+      mem = adjust_address (m_tramp, SImode, offset);
+      emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
+      offset += 4;
+
+      gcc_assert (offset <= TRAMPOLINE_SIZE);
+    }
+
+#ifdef ENABLE_EXECUTE_STACK
+#ifdef CHECK_EXECUTE_STACK_ENABLED
+  if (CHECK_EXECUTE_STACK_ENABLED)
+#endif
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+}
+
+/* The following file contains several enumerations and data structures
+   built from the definitions in i386-builtin-types.def.  */
+
+#include "i386-builtin-types.inc"
+
+/* Table for the ix86 builtin non-function types.  */
+static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
+
+/* Retrieve an element from the above table, building some of
+   the types lazily.  */
+
+static tree
+ix86_get_builtin_type (enum ix86_builtin_type tcode)
+{
+  unsigned int index;
+  tree type, itype;
+
+  gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
+
+  type = ix86_builtin_type_tab[(int) tcode];
+  if (type != NULL)
+    return type;
+
+  gcc_assert (tcode > IX86_BT_LAST_PRIM);
+  if (tcode <= IX86_BT_LAST_VECT)
+    {
+      enum machine_mode mode;
+
+      index = tcode - IX86_BT_LAST_PRIM - 1;
+      itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
+      mode = ix86_builtin_type_vect_mode[index];
+
+      type = build_vector_type_for_mode (itype, mode);
+    }
+  else
+    {
+      int quals;
+
+      index = tcode - IX86_BT_LAST_VECT - 1;
+      if (tcode <= IX86_BT_LAST_PTR)
+	quals = TYPE_UNQUALIFIED;
+      else
+	quals = TYPE_QUAL_CONST;
+
+      itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
+      if (quals != TYPE_UNQUALIFIED)
+	itype = build_qualified_type (itype, quals);
+
+      type = build_pointer_type (itype);
+    }
+
+  ix86_builtin_type_tab[(int) tcode] = type;
+  return type;
+}
+
+/* Table for the ix86 builtin function types.  */
+static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
+
+/* Retrieve an element from the above table, building some of
+   the types lazily.  */
+
+static tree
+ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
+{
+  tree type;
+
+  gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
+
+  type = ix86_builtin_func_type_tab[(int) tcode];
+  if (type != NULL)
+    return type;
+
+  if (tcode <= IX86_BT_LAST_FUNC)
+    {
+      unsigned start = ix86_builtin_func_start[(int) tcode];
+      unsigned after = ix86_builtin_func_start[(int) tcode + 1];
+      tree rtype, atype, args = void_list_node;
+      unsigned i;
+
+      rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
+      for (i = after - 1; i > start; --i)
+	{
+	  atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
+	  args = tree_cons (NULL, atype, args);
+	}
+
+      type = build_function_type (rtype, args);
+    }
+  else
+    {
+      unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
+      enum ix86_builtin_func_type icode;
+
+      icode = ix86_builtin_func_alias_base[index];
+      type = ix86_get_builtin_func_type (icode);
+    }
+
+  ix86_builtin_func_type_tab[(int) tcode] = type;
+  return type;
+}
+
+
+/* Codes for all the SSE/MMX builtins.  */
+enum ix86_builtins
+{
+  IX86_BUILTIN_ADDPS,
+  IX86_BUILTIN_ADDSS,
+  IX86_BUILTIN_DIVPS,
+  IX86_BUILTIN_DIVSS,
+  IX86_BUILTIN_MULPS,
+  IX86_BUILTIN_MULSS,
+  IX86_BUILTIN_SUBPS,
+  IX86_BUILTIN_SUBSS,
+
+  IX86_BUILTIN_CMPEQPS,
+  IX86_BUILTIN_CMPLTPS,
+  IX86_BUILTIN_CMPLEPS,
+  IX86_BUILTIN_CMPGTPS,
+  IX86_BUILTIN_CMPGEPS,
+  IX86_BUILTIN_CMPNEQPS,
+  IX86_BUILTIN_CMPNLTPS,
+  IX86_BUILTIN_CMPNLEPS,
+  IX86_BUILTIN_CMPNGTPS,
+  IX86_BUILTIN_CMPNGEPS,
+  IX86_BUILTIN_CMPORDPS,
+  IX86_BUILTIN_CMPUNORDPS,
+  IX86_BUILTIN_CMPEQSS,
+  IX86_BUILTIN_CMPLTSS,
+  IX86_BUILTIN_CMPLESS,
+  IX86_BUILTIN_CMPNEQSS,
+  IX86_BUILTIN_CMPNLTSS,
+  IX86_BUILTIN_CMPNLESS,
+  IX86_BUILTIN_CMPNGTSS,
+  IX86_BUILTIN_CMPNGESS,
+  IX86_BUILTIN_CMPORDSS,
+  IX86_BUILTIN_CMPUNORDSS,
+
+  IX86_BUILTIN_COMIEQSS,
+  IX86_BUILTIN_COMILTSS,
+  IX86_BUILTIN_COMILESS,
+  IX86_BUILTIN_COMIGTSS,
+  IX86_BUILTIN_COMIGESS,
+  IX86_BUILTIN_COMINEQSS,
+  IX86_BUILTIN_UCOMIEQSS,
+  IX86_BUILTIN_UCOMILTSS,
+  IX86_BUILTIN_UCOMILESS,
+  IX86_BUILTIN_UCOMIGTSS,
+  IX86_BUILTIN_UCOMIGESS,
+  IX86_BUILTIN_UCOMINEQSS,
+
+  IX86_BUILTIN_CVTPI2PS,
+  IX86_BUILTIN_CVTPS2PI,
+  IX86_BUILTIN_CVTSI2SS,
+  IX86_BUILTIN_CVTSI642SS,
+  IX86_BUILTIN_CVTSS2SI,
+  IX86_BUILTIN_CVTSS2SI64,
+  IX86_BUILTIN_CVTTPS2PI,
+  IX86_BUILTIN_CVTTSS2SI,
+  IX86_BUILTIN_CVTTSS2SI64,
+
+  IX86_BUILTIN_MAXPS,
+  IX86_BUILTIN_MAXSS,
+  IX86_BUILTIN_MINPS,
+  IX86_BUILTIN_MINSS,
+
+  IX86_BUILTIN_LOADUPS,
+  IX86_BUILTIN_STOREUPS,
+  IX86_BUILTIN_MOVSS,
+
+  IX86_BUILTIN_MOVHLPS,
+  IX86_BUILTIN_MOVLHPS,
+  IX86_BUILTIN_LOADHPS,
+  IX86_BUILTIN_LOADLPS,
+  IX86_BUILTIN_STOREHPS,
+  IX86_BUILTIN_STORELPS,
+
+  IX86_BUILTIN_MASKMOVQ,
+  IX86_BUILTIN_MOVMSKPS,
+  IX86_BUILTIN_PMOVMSKB,
+
+  IX86_BUILTIN_MOVNTPS,
+  IX86_BUILTIN_MOVNTQ,
+
+  IX86_BUILTIN_LOADDQU,
+  IX86_BUILTIN_STOREDQU,
+
+  IX86_BUILTIN_PACKSSWB,
+  IX86_BUILTIN_PACKSSDW,
+  IX86_BUILTIN_PACKUSWB,
+
+  IX86_BUILTIN_PADDB,
+  IX86_BUILTIN_PADDW,
+  IX86_BUILTIN_PADDD,
+  IX86_BUILTIN_PADDQ,
+  IX86_BUILTIN_PADDSB,
+  IX86_BUILTIN_PADDSW,
+  IX86_BUILTIN_PADDUSB,
+  IX86_BUILTIN_PADDUSW,
+  IX86_BUILTIN_PSUBB,
+  IX86_BUILTIN_PSUBW,
+  IX86_BUILTIN_PSUBD,
+  IX86_BUILTIN_PSUBQ,
+  IX86_BUILTIN_PSUBSB,
+  IX86_BUILTIN_PSUBSW,
+  IX86_BUILTIN_PSUBUSB,
+  IX86_BUILTIN_PSUBUSW,
+
+  IX86_BUILTIN_PAND,
+  IX86_BUILTIN_PANDN,
+  IX86_BUILTIN_POR,
+  IX86_BUILTIN_PXOR,
+
+  IX86_BUILTIN_PAVGB,
+  IX86_BUILTIN_PAVGW,
+
+  IX86_BUILTIN_PCMPEQB,
+  IX86_BUILTIN_PCMPEQW,
+  IX86_BUILTIN_PCMPEQD,
+  IX86_BUILTIN_PCMPGTB,
+  IX86_BUILTIN_PCMPGTW,
+  IX86_BUILTIN_PCMPGTD,
+
+  IX86_BUILTIN_PMADDWD,
+
+  IX86_BUILTIN_PMAXSW,
+  IX86_BUILTIN_PMAXUB,
+  IX86_BUILTIN_PMINSW,
+  IX86_BUILTIN_PMINUB,
+
+  IX86_BUILTIN_PMULHUW,
+  IX86_BUILTIN_PMULHW,
+  IX86_BUILTIN_PMULLW,
+
+  IX86_BUILTIN_PSADBW,
+  IX86_BUILTIN_PSHUFW,
+
+  IX86_BUILTIN_PSLLW,
+  IX86_BUILTIN_PSLLD,
+  IX86_BUILTIN_PSLLQ,
+  IX86_BUILTIN_PSRAW,
+  IX86_BUILTIN_PSRAD,
+  IX86_BUILTIN_PSRLW,
+  IX86_BUILTIN_PSRLD,
+  IX86_BUILTIN_PSRLQ,
+  IX86_BUILTIN_PSLLWI,
+  IX86_BUILTIN_PSLLDI,
+  IX86_BUILTIN_PSLLQI,
+  IX86_BUILTIN_PSRAWI,
+  IX86_BUILTIN_PSRADI,
+  IX86_BUILTIN_PSRLWI,
+  IX86_BUILTIN_PSRLDI,
+  IX86_BUILTIN_PSRLQI,
+
+  IX86_BUILTIN_PUNPCKHBW,
+  IX86_BUILTIN_PUNPCKHWD,
+  IX86_BUILTIN_PUNPCKHDQ,
+  IX86_BUILTIN_PUNPCKLBW,
+  IX86_BUILTIN_PUNPCKLWD,
+  IX86_BUILTIN_PUNPCKLDQ,
+
+  IX86_BUILTIN_SHUFPS,
+
+  IX86_BUILTIN_RCPPS,
+  IX86_BUILTIN_RCPSS,
+  IX86_BUILTIN_RSQRTPS,
+  IX86_BUILTIN_RSQRTPS_NR,
+  IX86_BUILTIN_RSQRTSS,
+  IX86_BUILTIN_RSQRTF,
+  IX86_BUILTIN_SQRTPS,
+  IX86_BUILTIN_SQRTPS_NR,
+  IX86_BUILTIN_SQRTSS,
+
+  IX86_BUILTIN_UNPCKHPS,
+  IX86_BUILTIN_UNPCKLPS,
+
+  IX86_BUILTIN_ANDPS,
+  IX86_BUILTIN_ANDNPS,
+  IX86_BUILTIN_ORPS,
+  IX86_BUILTIN_XORPS,
+
+  IX86_BUILTIN_EMMS,
+  IX86_BUILTIN_LDMXCSR,
+  IX86_BUILTIN_STMXCSR,
+  IX86_BUILTIN_SFENCE,
+
+  /* 3DNow! Original */
+  IX86_BUILTIN_FEMMS,
+  IX86_BUILTIN_PAVGUSB,
+  IX86_BUILTIN_PF2ID,
+  IX86_BUILTIN_PFACC,
+  IX86_BUILTIN_PFADD,
+  IX86_BUILTIN_PFCMPEQ,
+  IX86_BUILTIN_PFCMPGE,
+  IX86_BUILTIN_PFCMPGT,
+  IX86_BUILTIN_PFMAX,
+  IX86_BUILTIN_PFMIN,
+  IX86_BUILTIN_PFMUL,
+  IX86_BUILTIN_PFRCP,
+  IX86_BUILTIN_PFRCPIT1,
+  IX86_BUILTIN_PFRCPIT2,
+  IX86_BUILTIN_PFRSQIT1,
+  IX86_BUILTIN_PFRSQRT,
+  IX86_BUILTIN_PFSUB,
+  IX86_BUILTIN_PFSUBR,
+  IX86_BUILTIN_PI2FD,
+  IX86_BUILTIN_PMULHRW,
+
+  /* 3DNow! Athlon Extensions */
+  IX86_BUILTIN_PF2IW,
+  IX86_BUILTIN_PFNACC,
+  IX86_BUILTIN_PFPNACC,
+  IX86_BUILTIN_PI2FW,
+  IX86_BUILTIN_PSWAPDSI,
+  IX86_BUILTIN_PSWAPDSF,
+
+  /* SSE2 */
+  IX86_BUILTIN_ADDPD,
+  IX86_BUILTIN_ADDSD,
+  IX86_BUILTIN_DIVPD,
+  IX86_BUILTIN_DIVSD,
+  IX86_BUILTIN_MULPD,
+  IX86_BUILTIN_MULSD,
+  IX86_BUILTIN_SUBPD,
+  IX86_BUILTIN_SUBSD,
+
+  IX86_BUILTIN_CMPEQPD,
+  IX86_BUILTIN_CMPLTPD,
+  IX86_BUILTIN_CMPLEPD,
+  IX86_BUILTIN_CMPGTPD,
+  IX86_BUILTIN_CMPGEPD,
+  IX86_BUILTIN_CMPNEQPD,
+  IX86_BUILTIN_CMPNLTPD,
+  IX86_BUILTIN_CMPNLEPD,
+  IX86_BUILTIN_CMPNGTPD,
+  IX86_BUILTIN_CMPNGEPD,
+  IX86_BUILTIN_CMPORDPD,
+  IX86_BUILTIN_CMPUNORDPD,
+  IX86_BUILTIN_CMPEQSD,
+  IX86_BUILTIN_CMPLTSD,
+  IX86_BUILTIN_CMPLESD,
+  IX86_BUILTIN_CMPNEQSD,
+  IX86_BUILTIN_CMPNLTSD,
+  IX86_BUILTIN_CMPNLESD,
+  IX86_BUILTIN_CMPORDSD,
+  IX86_BUILTIN_CMPUNORDSD,
+
+  IX86_BUILTIN_COMIEQSD,
+  IX86_BUILTIN_COMILTSD,
+  IX86_BUILTIN_COMILESD,
+  IX86_BUILTIN_COMIGTSD,
+  IX86_BUILTIN_COMIGESD,
+  IX86_BUILTIN_COMINEQSD,
+  IX86_BUILTIN_UCOMIEQSD,
+  IX86_BUILTIN_UCOMILTSD,
+  IX86_BUILTIN_UCOMILESD,
+  IX86_BUILTIN_UCOMIGTSD,
+  IX86_BUILTIN_UCOMIGESD,
+  IX86_BUILTIN_UCOMINEQSD,
+
+  IX86_BUILTIN_MAXPD,
+  IX86_BUILTIN_MAXSD,
+  IX86_BUILTIN_MINPD,
+  IX86_BUILTIN_MINSD,
+
+  IX86_BUILTIN_ANDPD,
+  IX86_BUILTIN_ANDNPD,
+  IX86_BUILTIN_ORPD,
+  IX86_BUILTIN_XORPD,
+
+  IX86_BUILTIN_SQRTPD,
+  IX86_BUILTIN_SQRTSD,
+
+  IX86_BUILTIN_UNPCKHPD,
+  IX86_BUILTIN_UNPCKLPD,
+
+  IX86_BUILTIN_SHUFPD,
+
+  IX86_BUILTIN_LOADUPD,
+  IX86_BUILTIN_STOREUPD,
+  IX86_BUILTIN_MOVSD,
+
+  IX86_BUILTIN_LOADHPD,
+  IX86_BUILTIN_LOADLPD,
+
+  IX86_BUILTIN_CVTDQ2PD,
+  IX86_BUILTIN_CVTDQ2PS,
+
+  IX86_BUILTIN_CVTPD2DQ,
+  IX86_BUILTIN_CVTPD2PI,
+  IX86_BUILTIN_CVTPD2PS,
+  IX86_BUILTIN_CVTTPD2DQ,
+  IX86_BUILTIN_CVTTPD2PI,
+
+  IX86_BUILTIN_CVTPI2PD,
+  IX86_BUILTIN_CVTSI2SD,
+  IX86_BUILTIN_CVTSI642SD,
+
+  IX86_BUILTIN_CVTSD2SI,
+  IX86_BUILTIN_CVTSD2SI64,
+  IX86_BUILTIN_CVTSD2SS,
+  IX86_BUILTIN_CVTSS2SD,
+  IX86_BUILTIN_CVTTSD2SI,
+  IX86_BUILTIN_CVTTSD2SI64,
+
+  IX86_BUILTIN_CVTPS2DQ,
+  IX86_BUILTIN_CVTPS2PD,
+  IX86_BUILTIN_CVTTPS2DQ,
+
+  IX86_BUILTIN_MOVNTI,
+  IX86_BUILTIN_MOVNTPD,
+  IX86_BUILTIN_MOVNTDQ,
+
+  IX86_BUILTIN_MOVQ128,
+
+  /* SSE2 MMX */
+  IX86_BUILTIN_MASKMOVDQU,
+  IX86_BUILTIN_MOVMSKPD,
+  IX86_BUILTIN_PMOVMSKB128,
+
+  IX86_BUILTIN_PACKSSWB128,
+  IX86_BUILTIN_PACKSSDW128,
+  IX86_BUILTIN_PACKUSWB128,
+
+  IX86_BUILTIN_PADDB128,
+  IX86_BUILTIN_PADDW128,
+  IX86_BUILTIN_PADDD128,
+  IX86_BUILTIN_PADDQ128,
+  IX86_BUILTIN_PADDSB128,
+  IX86_BUILTIN_PADDSW128,
+  IX86_BUILTIN_PADDUSB128,
+  IX86_BUILTIN_PADDUSW128,
+  IX86_BUILTIN_PSUBB128,
+  IX86_BUILTIN_PSUBW128,
+  IX86_BUILTIN_PSUBD128,
+  IX86_BUILTIN_PSUBQ128,
+  IX86_BUILTIN_PSUBSB128,
+  IX86_BUILTIN_PSUBSW128,
+  IX86_BUILTIN_PSUBUSB128,
+  IX86_BUILTIN_PSUBUSW128,
+
+  IX86_BUILTIN_PAND128,
+  IX86_BUILTIN_PANDN128,
+  IX86_BUILTIN_POR128,
+  IX86_BUILTIN_PXOR128,
+
+  IX86_BUILTIN_PAVGB128,
+  IX86_BUILTIN_PAVGW128,
+
+  IX86_BUILTIN_PCMPEQB128,
+  IX86_BUILTIN_PCMPEQW128,
+  IX86_BUILTIN_PCMPEQD128,
+  IX86_BUILTIN_PCMPGTB128,
+  IX86_BUILTIN_PCMPGTW128,
+  IX86_BUILTIN_PCMPGTD128,
+
+  IX86_BUILTIN_PMADDWD128,
+
+  IX86_BUILTIN_PMAXSW128,
+  IX86_BUILTIN_PMAXUB128,
+  IX86_BUILTIN_PMINSW128,
+  IX86_BUILTIN_PMINUB128,
+
+  IX86_BUILTIN_PMULUDQ,
+  IX86_BUILTIN_PMULUDQ128,
+  IX86_BUILTIN_PMULHUW128,
+  IX86_BUILTIN_PMULHW128,
+  IX86_BUILTIN_PMULLW128,
+
+  IX86_BUILTIN_PSADBW128,
+  IX86_BUILTIN_PSHUFHW,
+  IX86_BUILTIN_PSHUFLW,
+  IX86_BUILTIN_PSHUFD,
+
+  IX86_BUILTIN_PSLLDQI128,
+  IX86_BUILTIN_PSLLWI128,
+  IX86_BUILTIN_PSLLDI128,
+  IX86_BUILTIN_PSLLQI128,
+  IX86_BUILTIN_PSRAWI128,
+  IX86_BUILTIN_PSRADI128,
+  IX86_BUILTIN_PSRLDQI128,
+  IX86_BUILTIN_PSRLWI128,
+  IX86_BUILTIN_PSRLDI128,
+  IX86_BUILTIN_PSRLQI128,
+
+  IX86_BUILTIN_PSLLDQ128,
+  IX86_BUILTIN_PSLLW128,
+  IX86_BUILTIN_PSLLD128,
+  IX86_BUILTIN_PSLLQ128,
+  IX86_BUILTIN_PSRAW128,
+  IX86_BUILTIN_PSRAD128,
+  IX86_BUILTIN_PSRLW128,
+  IX86_BUILTIN_PSRLD128,
+  IX86_BUILTIN_PSRLQ128,
+
+  IX86_BUILTIN_PUNPCKHBW128,
+  IX86_BUILTIN_PUNPCKHWD128,
+  IX86_BUILTIN_PUNPCKHDQ128,
+  IX86_BUILTIN_PUNPCKHQDQ128,
+  IX86_BUILTIN_PUNPCKLBW128,
+  IX86_BUILTIN_PUNPCKLWD128,
+  IX86_BUILTIN_PUNPCKLDQ128,
+  IX86_BUILTIN_PUNPCKLQDQ128,
+
+  IX86_BUILTIN_CLFLUSH,
+  IX86_BUILTIN_MFENCE,
+  IX86_BUILTIN_LFENCE,
+
+  IX86_BUILTIN_BSRSI,
+  IX86_BUILTIN_BSRDI,
+  IX86_BUILTIN_RDPMC,
+  IX86_BUILTIN_RDTSC,
+  IX86_BUILTIN_RDTSCP,
+  IX86_BUILTIN_ROLQI,
+  IX86_BUILTIN_ROLHI,
+  IX86_BUILTIN_RORQI,
+  IX86_BUILTIN_RORHI,
+
+  /* SSE3.  */
+  IX86_BUILTIN_ADDSUBPS,
+  IX86_BUILTIN_HADDPS,
+  IX86_BUILTIN_HSUBPS,
+  IX86_BUILTIN_MOVSHDUP,
+  IX86_BUILTIN_MOVSLDUP,
+  IX86_BUILTIN_ADDSUBPD,
+  IX86_BUILTIN_HADDPD,
+  IX86_BUILTIN_HSUBPD,
+  IX86_BUILTIN_LDDQU,
+
+  IX86_BUILTIN_MONITOR,
+  IX86_BUILTIN_MWAIT,
+
+  /* SSSE3.  */
+  IX86_BUILTIN_PHADDW,
+  IX86_BUILTIN_PHADDD,
+  IX86_BUILTIN_PHADDSW,
+  IX86_BUILTIN_PHSUBW,
+  IX86_BUILTIN_PHSUBD,
+  IX86_BUILTIN_PHSUBSW,
+  IX86_BUILTIN_PMADDUBSW,
+  IX86_BUILTIN_PMULHRSW,
+  IX86_BUILTIN_PSHUFB,
+  IX86_BUILTIN_PSIGNB,
+  IX86_BUILTIN_PSIGNW,
+  IX86_BUILTIN_PSIGND,
+  IX86_BUILTIN_PALIGNR,
+  IX86_BUILTIN_PABSB,
+  IX86_BUILTIN_PABSW,
+  IX86_BUILTIN_PABSD,
+
+  IX86_BUILTIN_PHADDW128,
+  IX86_BUILTIN_PHADDD128,
+  IX86_BUILTIN_PHADDSW128,
+  IX86_BUILTIN_PHSUBW128,
+  IX86_BUILTIN_PHSUBD128,
+  IX86_BUILTIN_PHSUBSW128,
+  IX86_BUILTIN_PMADDUBSW128,
+  IX86_BUILTIN_PMULHRSW128,
+  IX86_BUILTIN_PSHUFB128,
+  IX86_BUILTIN_PSIGNB128,
+  IX86_BUILTIN_PSIGNW128,
+  IX86_BUILTIN_PSIGND128,
+  IX86_BUILTIN_PALIGNR128,
+  IX86_BUILTIN_PABSB128,
+  IX86_BUILTIN_PABSW128,
+  IX86_BUILTIN_PABSD128,
+
+  /* AMDFAM10 - SSE4A New Instructions.  */
+  IX86_BUILTIN_MOVNTSD,
+  IX86_BUILTIN_MOVNTSS,
+  IX86_BUILTIN_EXTRQI,
+  IX86_BUILTIN_EXTRQ,
+  IX86_BUILTIN_INSERTQI,
+  IX86_BUILTIN_INSERTQ,
+
+  /* SSE4.1.  */
+  IX86_BUILTIN_BLENDPD,
+  IX86_BUILTIN_BLENDPS,
+  IX86_BUILTIN_BLENDVPD,
+  IX86_BUILTIN_BLENDVPS,
+  IX86_BUILTIN_PBLENDVB128,
+  IX86_BUILTIN_PBLENDW128,
+
+  IX86_BUILTIN_DPPD,
+  IX86_BUILTIN_DPPS,
+
+  IX86_BUILTIN_INSERTPS128,
+
+  IX86_BUILTIN_MOVNTDQA,
+  IX86_BUILTIN_MPSADBW128,
+  IX86_BUILTIN_PACKUSDW128,
+  IX86_BUILTIN_PCMPEQQ,
+  IX86_BUILTIN_PHMINPOSUW128,
+
+  IX86_BUILTIN_PMAXSB128,
+  IX86_BUILTIN_PMAXSD128,
+  IX86_BUILTIN_PMAXUD128,
+  IX86_BUILTIN_PMAXUW128,
+
+  IX86_BUILTIN_PMINSB128,
+  IX86_BUILTIN_PMINSD128,
+  IX86_BUILTIN_PMINUD128,
+  IX86_BUILTIN_PMINUW128,
+
+  IX86_BUILTIN_PMOVSXBW128,
+  IX86_BUILTIN_PMOVSXBD128,
+  IX86_BUILTIN_PMOVSXBQ128,
+  IX86_BUILTIN_PMOVSXWD128,
+  IX86_BUILTIN_PMOVSXWQ128,
+  IX86_BUILTIN_PMOVSXDQ128,
+
+  IX86_BUILTIN_PMOVZXBW128,
+  IX86_BUILTIN_PMOVZXBD128,
+  IX86_BUILTIN_PMOVZXBQ128,
+  IX86_BUILTIN_PMOVZXWD128,
+  IX86_BUILTIN_PMOVZXWQ128,
+  IX86_BUILTIN_PMOVZXDQ128,
+
+  IX86_BUILTIN_PMULDQ128,
+  IX86_BUILTIN_PMULLD128,
+
+  IX86_BUILTIN_ROUNDPD,
+  IX86_BUILTIN_ROUNDPS,
+  IX86_BUILTIN_ROUNDSD,
+  IX86_BUILTIN_ROUNDSS,
+
+  IX86_BUILTIN_PTESTZ,
+  IX86_BUILTIN_PTESTC,
+  IX86_BUILTIN_PTESTNZC,
+
+  IX86_BUILTIN_VEC_INIT_V2SI,
+  IX86_BUILTIN_VEC_INIT_V4HI,
+  IX86_BUILTIN_VEC_INIT_V8QI,
+  IX86_BUILTIN_VEC_EXT_V2DF,
+  IX86_BUILTIN_VEC_EXT_V2DI,
+  IX86_BUILTIN_VEC_EXT_V4SF,
+  IX86_BUILTIN_VEC_EXT_V4SI,
+  IX86_BUILTIN_VEC_EXT_V8HI,
+  IX86_BUILTIN_VEC_EXT_V2SI,
+  IX86_BUILTIN_VEC_EXT_V4HI,
+  IX86_BUILTIN_VEC_EXT_V16QI,
+  IX86_BUILTIN_VEC_SET_V2DI,
+  IX86_BUILTIN_VEC_SET_V4SF,
+  IX86_BUILTIN_VEC_SET_V4SI,
+  IX86_BUILTIN_VEC_SET_V8HI,
+  IX86_BUILTIN_VEC_SET_V4HI,
+  IX86_BUILTIN_VEC_SET_V16QI,
+
+  IX86_BUILTIN_VEC_PACK_SFIX,
+
+  /* SSE4.2.  */
+  IX86_BUILTIN_CRC32QI,
+  IX86_BUILTIN_CRC32HI,
+  IX86_BUILTIN_CRC32SI,
+  IX86_BUILTIN_CRC32DI,
+
+  IX86_BUILTIN_PCMPESTRI128,
+  IX86_BUILTIN_PCMPESTRM128,
+  IX86_BUILTIN_PCMPESTRA128,
+  IX86_BUILTIN_PCMPESTRC128,
+  IX86_BUILTIN_PCMPESTRO128,
+  IX86_BUILTIN_PCMPESTRS128,
+  IX86_BUILTIN_PCMPESTRZ128,
+  IX86_BUILTIN_PCMPISTRI128,
+  IX86_BUILTIN_PCMPISTRM128,
+  IX86_BUILTIN_PCMPISTRA128,
+  IX86_BUILTIN_PCMPISTRC128,
+  IX86_BUILTIN_PCMPISTRO128,
+  IX86_BUILTIN_PCMPISTRS128,
+  IX86_BUILTIN_PCMPISTRZ128,
+
+  IX86_BUILTIN_PCMPGTQ,
+
+  /* AES instructions */
+  IX86_BUILTIN_AESENC128,
+  IX86_BUILTIN_AESENCLAST128,
+  IX86_BUILTIN_AESDEC128,
+  IX86_BUILTIN_AESDECLAST128,
+  IX86_BUILTIN_AESIMC128,
+  IX86_BUILTIN_AESKEYGENASSIST128,
+
+  /* PCLMUL instruction */
+  IX86_BUILTIN_PCLMULQDQ128,
+
+  /* AVX */
+  IX86_BUILTIN_ADDPD256,
+  IX86_BUILTIN_ADDPS256,
+  IX86_BUILTIN_ADDSUBPD256,
+  IX86_BUILTIN_ADDSUBPS256,
+  IX86_BUILTIN_ANDPD256,
+  IX86_BUILTIN_ANDPS256,
+  IX86_BUILTIN_ANDNPD256,
+  IX86_BUILTIN_ANDNPS256,
+  IX86_BUILTIN_BLENDPD256,
+  IX86_BUILTIN_BLENDPS256,
+  IX86_BUILTIN_BLENDVPD256,
+  IX86_BUILTIN_BLENDVPS256,
+  IX86_BUILTIN_DIVPD256,
+  IX86_BUILTIN_DIVPS256,
+  IX86_BUILTIN_DPPS256,
+  IX86_BUILTIN_HADDPD256,
+  IX86_BUILTIN_HADDPS256,
+  IX86_BUILTIN_HSUBPD256,
+  IX86_BUILTIN_HSUBPS256,
+  IX86_BUILTIN_MAXPD256,
+  IX86_BUILTIN_MAXPS256,
+  IX86_BUILTIN_MINPD256,
+  IX86_BUILTIN_MINPS256,
+  IX86_BUILTIN_MULPD256,
+  IX86_BUILTIN_MULPS256,
+  IX86_BUILTIN_ORPD256,
+  IX86_BUILTIN_ORPS256,
+  IX86_BUILTIN_SHUFPD256,
+  IX86_BUILTIN_SHUFPS256,
+  IX86_BUILTIN_SUBPD256,
+  IX86_BUILTIN_SUBPS256,
+  IX86_BUILTIN_XORPD256,
+  IX86_BUILTIN_XORPS256,
+  IX86_BUILTIN_CMPSD,
+  IX86_BUILTIN_CMPSS,
+  IX86_BUILTIN_CMPPD,
+  IX86_BUILTIN_CMPPS,
+  IX86_BUILTIN_CMPPD256,
+  IX86_BUILTIN_CMPPS256,
+  IX86_BUILTIN_CVTDQ2PD256,
+  IX86_BUILTIN_CVTDQ2PS256,
+  IX86_BUILTIN_CVTPD2PS256,
+  IX86_BUILTIN_CVTPS2DQ256,
+  IX86_BUILTIN_CVTPS2PD256,
+  IX86_BUILTIN_CVTTPD2DQ256,
+  IX86_BUILTIN_CVTPD2DQ256,
+  IX86_BUILTIN_CVTTPS2DQ256,
+  IX86_BUILTIN_EXTRACTF128PD256,
+  IX86_BUILTIN_EXTRACTF128PS256,
+  IX86_BUILTIN_EXTRACTF128SI256,
+  IX86_BUILTIN_VZEROALL,
+  IX86_BUILTIN_VZEROUPPER,
+  IX86_BUILTIN_VPERMILVARPD,
+  IX86_BUILTIN_VPERMILVARPS,
+  IX86_BUILTIN_VPERMILVARPD256,
+  IX86_BUILTIN_VPERMILVARPS256,
+  IX86_BUILTIN_VPERMILPD,
+  IX86_BUILTIN_VPERMILPS,
+  IX86_BUILTIN_VPERMILPD256,
+  IX86_BUILTIN_VPERMILPS256,
+  IX86_BUILTIN_VPERMIL2PD,
+  IX86_BUILTIN_VPERMIL2PS,
+  IX86_BUILTIN_VPERMIL2PD256,
+  IX86_BUILTIN_VPERMIL2PS256,
+  IX86_BUILTIN_VPERM2F128PD256,
+  IX86_BUILTIN_VPERM2F128PS256,
+  IX86_BUILTIN_VPERM2F128SI256,
+  IX86_BUILTIN_VBROADCASTSS,
+  IX86_BUILTIN_VBROADCASTSD256,
+  IX86_BUILTIN_VBROADCASTSS256,
+  IX86_BUILTIN_VBROADCASTPD256,
+  IX86_BUILTIN_VBROADCASTPS256,
+  IX86_BUILTIN_VINSERTF128PD256,
+  IX86_BUILTIN_VINSERTF128PS256,
+  IX86_BUILTIN_VINSERTF128SI256,
+  IX86_BUILTIN_LOADUPD256,
+  IX86_BUILTIN_LOADUPS256,
+  IX86_BUILTIN_STOREUPD256,
+  IX86_BUILTIN_STOREUPS256,
+  IX86_BUILTIN_LDDQU256,
+  IX86_BUILTIN_MOVNTDQ256,
+  IX86_BUILTIN_MOVNTPD256,
+  IX86_BUILTIN_MOVNTPS256,
+  IX86_BUILTIN_LOADDQU256,
+  IX86_BUILTIN_STOREDQU256,
+  IX86_BUILTIN_MASKLOADPD,
+  IX86_BUILTIN_MASKLOADPS,
+  IX86_BUILTIN_MASKSTOREPD,
+  IX86_BUILTIN_MASKSTOREPS,
+  IX86_BUILTIN_MASKLOADPD256,
+  IX86_BUILTIN_MASKLOADPS256,
+  IX86_BUILTIN_MASKSTOREPD256,
+  IX86_BUILTIN_MASKSTOREPS256,
+  IX86_BUILTIN_MOVSHDUP256,
+  IX86_BUILTIN_MOVSLDUP256,
+  IX86_BUILTIN_MOVDDUP256,
+
+  IX86_BUILTIN_SQRTPD256,
+  IX86_BUILTIN_SQRTPS256,
+  IX86_BUILTIN_SQRTPS_NR256,
+  IX86_BUILTIN_RSQRTPS256,
+  IX86_BUILTIN_RSQRTPS_NR256,
+
+  IX86_BUILTIN_RCPPS256,
+
+  IX86_BUILTIN_ROUNDPD256,
+  IX86_BUILTIN_ROUNDPS256,
+
+  IX86_BUILTIN_UNPCKHPD256,
+  IX86_BUILTIN_UNPCKLPD256,
+  IX86_BUILTIN_UNPCKHPS256,
+  IX86_BUILTIN_UNPCKLPS256,
+
+  IX86_BUILTIN_SI256_SI,
+  IX86_BUILTIN_PS256_PS,
+  IX86_BUILTIN_PD256_PD,
+  IX86_BUILTIN_SI_SI256,
+  IX86_BUILTIN_PS_PS256,
+  IX86_BUILTIN_PD_PD256,
+
+  IX86_BUILTIN_VTESTZPD,
+  IX86_BUILTIN_VTESTCPD,
+  IX86_BUILTIN_VTESTNZCPD,
+  IX86_BUILTIN_VTESTZPS,
+  IX86_BUILTIN_VTESTCPS,
+  IX86_BUILTIN_VTESTNZCPS,
+  IX86_BUILTIN_VTESTZPD256,
+  IX86_BUILTIN_VTESTCPD256,
+  IX86_BUILTIN_VTESTNZCPD256,
+  IX86_BUILTIN_VTESTZPS256,
+  IX86_BUILTIN_VTESTCPS256,
+  IX86_BUILTIN_VTESTNZCPS256,
+  IX86_BUILTIN_PTESTZ256,
+  IX86_BUILTIN_PTESTC256,
+  IX86_BUILTIN_PTESTNZC256,
+
+  IX86_BUILTIN_MOVMSKPD256,
+  IX86_BUILTIN_MOVMSKPS256,
+
+  /* TFmode support builtins.  */
+  IX86_BUILTIN_INFQ,
+  IX86_BUILTIN_HUGE_VALQ,
+  IX86_BUILTIN_FABSQ,
+  IX86_BUILTIN_COPYSIGNQ,
+
+  /* Vectorizer support builtins.  */
+  IX86_BUILTIN_CPYSGNPS,
+  IX86_BUILTIN_CPYSGNPD,
+  IX86_BUILTIN_CPYSGNPS256,
+  IX86_BUILTIN_CPYSGNPD256,
+
+  IX86_BUILTIN_CVTUDQ2PS,
+
+  IX86_BUILTIN_VEC_PERM_V2DF,
+  IX86_BUILTIN_VEC_PERM_V4SF,
+  IX86_BUILTIN_VEC_PERM_V2DI,
+  IX86_BUILTIN_VEC_PERM_V4SI,
+  IX86_BUILTIN_VEC_PERM_V8HI,
+  IX86_BUILTIN_VEC_PERM_V16QI,
+  IX86_BUILTIN_VEC_PERM_V2DI_U,
+  IX86_BUILTIN_VEC_PERM_V4SI_U,
+  IX86_BUILTIN_VEC_PERM_V8HI_U,
+  IX86_BUILTIN_VEC_PERM_V16QI_U,
+  IX86_BUILTIN_VEC_PERM_V4DF,
+  IX86_BUILTIN_VEC_PERM_V8SF,
+
+  /* FMA4 and XOP instructions.  */
+  IX86_BUILTIN_VFMADDSS,
+  IX86_BUILTIN_VFMADDSD,
+  IX86_BUILTIN_VFMADDPS,
+  IX86_BUILTIN_VFMADDPD,
+  IX86_BUILTIN_VFMADDPS256,
+  IX86_BUILTIN_VFMADDPD256,
+  IX86_BUILTIN_VFMADDSUBPS,
+  IX86_BUILTIN_VFMADDSUBPD,
+  IX86_BUILTIN_VFMADDSUBPS256,
+  IX86_BUILTIN_VFMADDSUBPD256,
+
+  IX86_BUILTIN_VPCMOV,
+  IX86_BUILTIN_VPCMOV_V2DI,
+  IX86_BUILTIN_VPCMOV_V4SI,
+  IX86_BUILTIN_VPCMOV_V8HI,
+  IX86_BUILTIN_VPCMOV_V16QI,
+  IX86_BUILTIN_VPCMOV_V4SF,
+  IX86_BUILTIN_VPCMOV_V2DF,
+  IX86_BUILTIN_VPCMOV256,
+  IX86_BUILTIN_VPCMOV_V4DI256,
+  IX86_BUILTIN_VPCMOV_V8SI256,
+  IX86_BUILTIN_VPCMOV_V16HI256,
+  IX86_BUILTIN_VPCMOV_V32QI256,
+  IX86_BUILTIN_VPCMOV_V8SF256,
+  IX86_BUILTIN_VPCMOV_V4DF256,
+
+  IX86_BUILTIN_VPPERM,
+
+  IX86_BUILTIN_VPMACSSWW,
+  IX86_BUILTIN_VPMACSWW,
+  IX86_BUILTIN_VPMACSSWD,
+  IX86_BUILTIN_VPMACSWD,
+  IX86_BUILTIN_VPMACSSDD,
+  IX86_BUILTIN_VPMACSDD,
+  IX86_BUILTIN_VPMACSSDQL,
+  IX86_BUILTIN_VPMACSSDQH,
+  IX86_BUILTIN_VPMACSDQL,
+  IX86_BUILTIN_VPMACSDQH,
+  IX86_BUILTIN_VPMADCSSWD,
+  IX86_BUILTIN_VPMADCSWD,
+
+  IX86_BUILTIN_VPHADDBW,
+  IX86_BUILTIN_VPHADDBD,
+  IX86_BUILTIN_VPHADDBQ,
+  IX86_BUILTIN_VPHADDWD,
+  IX86_BUILTIN_VPHADDWQ,
+  IX86_BUILTIN_VPHADDDQ,
+  IX86_BUILTIN_VPHADDUBW,
+  IX86_BUILTIN_VPHADDUBD,
+  IX86_BUILTIN_VPHADDUBQ,
+  IX86_BUILTIN_VPHADDUWD,
+  IX86_BUILTIN_VPHADDUWQ,
+  IX86_BUILTIN_VPHADDUDQ,
+  IX86_BUILTIN_VPHSUBBW,
+  IX86_BUILTIN_VPHSUBWD,
+  IX86_BUILTIN_VPHSUBDQ,
+
+  IX86_BUILTIN_VPROTB,
+  IX86_BUILTIN_VPROTW,
+  IX86_BUILTIN_VPROTD,
+  IX86_BUILTIN_VPROTQ,
+  IX86_BUILTIN_VPROTB_IMM,
+  IX86_BUILTIN_VPROTW_IMM,
+  IX86_BUILTIN_VPROTD_IMM,
+  IX86_BUILTIN_VPROTQ_IMM,
+
+  IX86_BUILTIN_VPSHLB,
+  IX86_BUILTIN_VPSHLW,
+  IX86_BUILTIN_VPSHLD,
+  IX86_BUILTIN_VPSHLQ,
+  IX86_BUILTIN_VPSHAB,
+  IX86_BUILTIN_VPSHAW,
+  IX86_BUILTIN_VPSHAD,
+  IX86_BUILTIN_VPSHAQ,
+
+  IX86_BUILTIN_VFRCZSS,
+  IX86_BUILTIN_VFRCZSD,
+  IX86_BUILTIN_VFRCZPS,
+  IX86_BUILTIN_VFRCZPD,
+  IX86_BUILTIN_VFRCZPS256,
+  IX86_BUILTIN_VFRCZPD256,
+
+  IX86_BUILTIN_VPCOMEQUB,
+  IX86_BUILTIN_VPCOMNEUB,
+  IX86_BUILTIN_VPCOMLTUB,
+  IX86_BUILTIN_VPCOMLEUB,
+  IX86_BUILTIN_VPCOMGTUB,
+  IX86_BUILTIN_VPCOMGEUB,
+  IX86_BUILTIN_VPCOMFALSEUB,
+  IX86_BUILTIN_VPCOMTRUEUB,
+
+  IX86_BUILTIN_VPCOMEQUW,
+  IX86_BUILTIN_VPCOMNEUW,
+  IX86_BUILTIN_VPCOMLTUW,
+  IX86_BUILTIN_VPCOMLEUW,
+  IX86_BUILTIN_VPCOMGTUW,
+  IX86_BUILTIN_VPCOMGEUW,
+  IX86_BUILTIN_VPCOMFALSEUW,
+  IX86_BUILTIN_VPCOMTRUEUW,
+
+  IX86_BUILTIN_VPCOMEQUD,
+  IX86_BUILTIN_VPCOMNEUD,
+  IX86_BUILTIN_VPCOMLTUD,
+  IX86_BUILTIN_VPCOMLEUD,
+  IX86_BUILTIN_VPCOMGTUD,
+  IX86_BUILTIN_VPCOMGEUD,
+  IX86_BUILTIN_VPCOMFALSEUD,
+  IX86_BUILTIN_VPCOMTRUEUD,
+
+  IX86_BUILTIN_VPCOMEQUQ,
+  IX86_BUILTIN_VPCOMNEUQ,
+  IX86_BUILTIN_VPCOMLTUQ,
+  IX86_BUILTIN_VPCOMLEUQ,
+  IX86_BUILTIN_VPCOMGTUQ,
+  IX86_BUILTIN_VPCOMGEUQ,
+  IX86_BUILTIN_VPCOMFALSEUQ,
+  IX86_BUILTIN_VPCOMTRUEUQ,
+
+  IX86_BUILTIN_VPCOMEQB,
+  IX86_BUILTIN_VPCOMNEB,
+  IX86_BUILTIN_VPCOMLTB,
+  IX86_BUILTIN_VPCOMLEB,
+  IX86_BUILTIN_VPCOMGTB,
+  IX86_BUILTIN_VPCOMGEB,
+  IX86_BUILTIN_VPCOMFALSEB,
+  IX86_BUILTIN_VPCOMTRUEB,
+
+  IX86_BUILTIN_VPCOMEQW,
+  IX86_BUILTIN_VPCOMNEW,
+  IX86_BUILTIN_VPCOMLTW,
+  IX86_BUILTIN_VPCOMLEW,
+  IX86_BUILTIN_VPCOMGTW,
+  IX86_BUILTIN_VPCOMGEW,
+  IX86_BUILTIN_VPCOMFALSEW,
+  IX86_BUILTIN_VPCOMTRUEW,
+
+  IX86_BUILTIN_VPCOMEQD,
+  IX86_BUILTIN_VPCOMNED,
+  IX86_BUILTIN_VPCOMLTD,
+  IX86_BUILTIN_VPCOMLED,
+  IX86_BUILTIN_VPCOMGTD,
+  IX86_BUILTIN_VPCOMGED,
+  IX86_BUILTIN_VPCOMFALSED,
+  IX86_BUILTIN_VPCOMTRUED,
+
+  IX86_BUILTIN_VPCOMEQQ,
+  IX86_BUILTIN_VPCOMNEQ,
+  IX86_BUILTIN_VPCOMLTQ,
+  IX86_BUILTIN_VPCOMLEQ,
+  IX86_BUILTIN_VPCOMGTQ,
+  IX86_BUILTIN_VPCOMGEQ,
+  IX86_BUILTIN_VPCOMFALSEQ,
+  IX86_BUILTIN_VPCOMTRUEQ,
+
+  /* LWP instructions.  */
+  IX86_BUILTIN_LLWPCB,
+  IX86_BUILTIN_SLWPCB,
+  IX86_BUILTIN_LWPVAL32,
+  IX86_BUILTIN_LWPVAL64,
+  IX86_BUILTIN_LWPINS32,
+  IX86_BUILTIN_LWPINS64,
+
+  IX86_BUILTIN_CLZS,
+
+  /* BMI instructions.  */
+  IX86_BUILTIN_BEXTR32,
+  IX86_BUILTIN_BEXTR64,
+  IX86_BUILTIN_CTZS,
+
+  /* TBM instructions.  */
+  IX86_BUILTIN_BEXTRI32,
+  IX86_BUILTIN_BEXTRI64,
+
+
+  /* FSGSBASE instructions.  */
+  IX86_BUILTIN_RDFSBASE32,
+  IX86_BUILTIN_RDFSBASE64,
+  IX86_BUILTIN_RDGSBASE32,
+  IX86_BUILTIN_RDGSBASE64,
+  IX86_BUILTIN_WRFSBASE32,
+  IX86_BUILTIN_WRFSBASE64,
+  IX86_BUILTIN_WRGSBASE32,
+  IX86_BUILTIN_WRGSBASE64,
+
+  /* RDRND instructions.  */
+  IX86_BUILTIN_RDRAND16_STEP,
+  IX86_BUILTIN_RDRAND32_STEP,
+  IX86_BUILTIN_RDRAND64_STEP,
+
+  /* F16C instructions.  */
+  IX86_BUILTIN_CVTPH2PS,
+  IX86_BUILTIN_CVTPH2PS256,
+  IX86_BUILTIN_CVTPS2PH,
+  IX86_BUILTIN_CVTPS2PH256,
+
+  /* CFString built-in for darwin */
+  IX86_BUILTIN_CFSTRING,
+
+  IX86_BUILTIN_MAX
+};
+
+/* Table for the ix86 builtin decls.  */
+static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
+
+/* Table of all of the builtin functions that are possible with different ISA's
+   but are waiting to be built until a function is declared to use that
+   ISA.  */
+struct builtin_isa {
+  const char *name;		/* function name */
+  enum ix86_builtin_func_type tcode; /* type to use in the declaration */
+  int isa;			/* isa_flags this builtin is defined for */
+  bool const_p;			/* true if the declaration is constant */
+  bool set_and_not_built_p;
+};
+
+static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+
+
+/* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
+   of which isa_flags to use in the ix86_builtins_isa array.  Stores the
+   function decl in the ix86_builtins array.  Returns the function decl or
+   NULL_TREE, if the builtin was not added.
+
+   If the front end has a special hook for builtin functions, delay adding
+   builtin functions that aren't in the current ISA until the ISA is changed
+   with function specific optimization.  Doing so, can save about 300K for the
+   default compiler.  When the builtin is expanded, check at that time whether
+   it is valid.
+
+   If the front end doesn't have a special hook, record all builtins, even if
+   it isn't an instruction set in the current ISA in case the user uses
+   function specific options for a different ISA, so that we don't get scope
+   errors if a builtin is added in the middle of a function scope.  */
+
+static inline tree
+def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
+	     enum ix86_builtins code)
+{
+  tree decl = NULL_TREE;
+
+  if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
+    {
+      ix86_builtins_isa[(int) code].isa = mask;
+
+      mask &= ~OPTION_MASK_ISA_64BIT;
+      if (mask == 0
+	  || (mask & ix86_isa_flags) != 0
+	  || (lang_hooks.builtin_function
+	      == lang_hooks.builtin_function_ext_scope))
+
+	{
+	  tree type = ix86_get_builtin_func_type (tcode);
+	  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
+				       NULL, NULL_TREE);
+	  ix86_builtins[(int) code] = decl;
+	  ix86_builtins_isa[(int) code].set_and_not_built_p = false;
+	}
+      else
+	{
+	  ix86_builtins[(int) code] = NULL_TREE;
+	  ix86_builtins_isa[(int) code].tcode = tcode;
+	  ix86_builtins_isa[(int) code].name = name;
+	  ix86_builtins_isa[(int) code].const_p = false;
+	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
+	}
+    }
+
+  return decl;
+}
+
+/* Like def_builtin, but also marks the function decl "const".  */
+
+static inline tree
+def_builtin_const (int mask, const char *name,
+		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
+{
+  tree decl = def_builtin (mask, name, tcode, code);
+  if (decl)
+    TREE_READONLY (decl) = 1;
+  else
+    ix86_builtins_isa[(int) code].const_p = true;
+
+  return decl;
+}
+
+/* Add any new builtin functions for a given ISA that may not have been
+   declared.  This saves a bit of space compared to adding all of the
+   declarations to the tree, even if we didn't use them.  */
+
+static void
+ix86_add_new_builtins (int isa)
+{
+  int i;
+
+  for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
+    {
+      if ((ix86_builtins_isa[i].isa & isa) != 0
+	  && ix86_builtins_isa[i].set_and_not_built_p)
+	{
+	  tree decl, type;
+
+	  /* Don't define the builtin again.  */
+	  ix86_builtins_isa[i].set_and_not_built_p = false;
+
+	  type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
+	  decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
+						 type, i, BUILT_IN_MD, NULL,
+						 NULL_TREE);
+
+	  ix86_builtins[i] = decl;
+	  if (ix86_builtins_isa[i].const_p)
+	    TREE_READONLY (decl) = 1;
+	}
+    }
+}
+
+/* Bits for builtin_description.flag.  */
+
+/* Set when we don't support the comparison natively, and should
+   swap_comparison in order to support it.  */
+#define BUILTIN_DESC_SWAP_OPERANDS	1
+
+struct builtin_description
+{
+  const unsigned int mask;
+  const enum insn_code icode;
+  const char *const name;
+  const enum ix86_builtins code;
+  const enum rtx_code comparison;
+  const int flag;
+};
+
+static const struct builtin_description bdesc_comi[] =
+{
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
+};
+
+static const struct builtin_description bdesc_pcmpestr[] =
+{
+  /* SSE4.2 */
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_pcmpistr[] =
+{
+  /* SSE4.2 */
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+/* Special builtins with variable number of arguments.  */
+static const struct builtin_description bdesc_special_args[] =
+{
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
+
+  /* MMX */
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* 3DNow! */
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* SSE */
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+
+  /* SSE or 3DNow!A  */
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
+
+  /* SSE2 */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+
+  /* SSE3 */
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+  /* SSE4.1 */
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
+
+  /* SSE4A */
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+
+  /* AVX */
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
+
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
+  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
+
+  /* FSGSBASE */
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
+  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
+};
+
+/* Builtins with variable number of arguments.  */
+static const struct builtin_description bdesc_args[] =
+{
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
+  { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
+  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
+
+  /* MMX */
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
+
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+
+  /* 3DNow! */
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  /* 3DNow!A */
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+
+  /* SSE */
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3,  "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+
+  /* SSE MMX or 3Dnow!A */
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
+
+  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
+
+  /* SSE2 */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
+  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+
+  { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
+  /* SSE2 MMX */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
+
+  /* SSE3 */
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+  /* SSSE3 */
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
+
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+  /* SSSE3.  */
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
+  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
+
+  /* SSE4.1 */
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
+
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
+  /* SSE4.1 */
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+
+  /* SSE4.2 */
+  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+  /* SSE4A */
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
+  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  /* AES */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+  /* PCLMUL */
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
+
+  /* AVX */
+  { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3,  "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3,  "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+
+  { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
+
+  /* BMI */
+  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2,       "__builtin_ctzs",           IX86_BUILTIN_CTZS,    UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+
+  /* TBM */
+  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+  /* F16C */
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
+  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
+};
+
+/* FMA4 and XOP.  */
+#define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
+#define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
+#define MULTI_ARG_4_SF2_SI_I	V4SF_FTYPE_V4SF_V4SF_V4SI_INT
+#define MULTI_ARG_4_SF2_SI_I1	V8SF_FTYPE_V8SF_V8SF_V8SI_INT
+#define MULTI_ARG_3_SF		V4SF_FTYPE_V4SF_V4SF_V4SF
+#define MULTI_ARG_3_DF		V2DF_FTYPE_V2DF_V2DF_V2DF
+#define MULTI_ARG_3_SF2		V8SF_FTYPE_V8SF_V8SF_V8SF
+#define MULTI_ARG_3_DF2		V4DF_FTYPE_V4DF_V4DF_V4DF
+#define MULTI_ARG_3_DI		V2DI_FTYPE_V2DI_V2DI_V2DI
+#define MULTI_ARG_3_SI		V4SI_FTYPE_V4SI_V4SI_V4SI
+#define MULTI_ARG_3_SI_DI	V4SI_FTYPE_V4SI_V4SI_V2DI
+#define MULTI_ARG_3_HI		V8HI_FTYPE_V8HI_V8HI_V8HI
+#define MULTI_ARG_3_HI_SI	V8HI_FTYPE_V8HI_V8HI_V4SI
+#define MULTI_ARG_3_QI		V16QI_FTYPE_V16QI_V16QI_V16QI
+#define MULTI_ARG_3_DI2		V4DI_FTYPE_V4DI_V4DI_V4DI
+#define MULTI_ARG_3_SI2		V8SI_FTYPE_V8SI_V8SI_V8SI
+#define MULTI_ARG_3_HI2		V16HI_FTYPE_V16HI_V16HI_V16HI
+#define MULTI_ARG_3_QI2		V32QI_FTYPE_V32QI_V32QI_V32QI
+#define MULTI_ARG_2_SF		V4SF_FTYPE_V4SF_V4SF
+#define MULTI_ARG_2_DF		V2DF_FTYPE_V2DF_V2DF
+#define MULTI_ARG_2_DI		V2DI_FTYPE_V2DI_V2DI
+#define MULTI_ARG_2_SI		V4SI_FTYPE_V4SI_V4SI
+#define MULTI_ARG_2_HI		V8HI_FTYPE_V8HI_V8HI
+#define MULTI_ARG_2_QI		V16QI_FTYPE_V16QI_V16QI
+#define MULTI_ARG_2_DI_IMM	V2DI_FTYPE_V2DI_SI
+#define MULTI_ARG_2_SI_IMM	V4SI_FTYPE_V4SI_SI
+#define MULTI_ARG_2_HI_IMM	V8HI_FTYPE_V8HI_SI
+#define MULTI_ARG_2_QI_IMM	V16QI_FTYPE_V16QI_SI
+#define MULTI_ARG_2_DI_CMP	V2DI_FTYPE_V2DI_V2DI_CMP
+#define MULTI_ARG_2_SI_CMP	V4SI_FTYPE_V4SI_V4SI_CMP
+#define MULTI_ARG_2_HI_CMP	V8HI_FTYPE_V8HI_V8HI_CMP
+#define MULTI_ARG_2_QI_CMP	V16QI_FTYPE_V16QI_V16QI_CMP
+#define MULTI_ARG_2_SF_TF	V4SF_FTYPE_V4SF_V4SF_TF
+#define MULTI_ARG_2_DF_TF	V2DF_FTYPE_V2DF_V2DF_TF
+#define MULTI_ARG_2_DI_TF	V2DI_FTYPE_V2DI_V2DI_TF
+#define MULTI_ARG_2_SI_TF	V4SI_FTYPE_V4SI_V4SI_TF
+#define MULTI_ARG_2_HI_TF	V8HI_FTYPE_V8HI_V8HI_TF
+#define MULTI_ARG_2_QI_TF	V16QI_FTYPE_V16QI_V16QI_TF
+#define MULTI_ARG_1_SF		V4SF_FTYPE_V4SF
+#define MULTI_ARG_1_DF		V2DF_FTYPE_V2DF
+#define MULTI_ARG_1_SF2		V8SF_FTYPE_V8SF
+#define MULTI_ARG_1_DF2		V4DF_FTYPE_V4DF
+#define MULTI_ARG_1_DI		V2DI_FTYPE_V2DI
+#define MULTI_ARG_1_SI		V4SI_FTYPE_V4SI
+#define MULTI_ARG_1_HI		V8HI_FTYPE_V8HI
+#define MULTI_ARG_1_QI		V16QI_FTYPE_V16QI
+#define MULTI_ARG_1_SI_DI	V2DI_FTYPE_V4SI
+#define MULTI_ARG_1_HI_DI	V2DI_FTYPE_V8HI
+#define MULTI_ARG_1_HI_SI	V4SI_FTYPE_V8HI
+#define MULTI_ARG_1_QI_DI	V2DI_FTYPE_V16QI
+#define MULTI_ARG_1_QI_SI	V4SI_FTYPE_V16QI
+#define MULTI_ARG_1_QI_HI	V8HI_FTYPE_V16QI
+
+static const struct builtin_description bdesc_multi_arg[] =
+{
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
+    "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
+    "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
+    "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
+    "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
+    "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
+    UNKNOWN, (int)MULTI_ARG_3_SF2 },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
+    "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
+    UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
+    "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
+    "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
+    "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
+    UNKNOWN, (int)MULTI_ARG_3_SF2 },
+  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
+    "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
+    UNKNOWN, (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_2_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
+
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3,     "__builtin_ia32_vpermil2pd",  IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3,     "__builtin_ia32_vpermil2ps",  IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3,     "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3,     "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
+
+};
+
+/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
+   in the current target ISA to allow the user to compile particular modules
+   with different target specific options that differ from the command line
+   options.  */
+static void
+ix86_init_mmx_sse_builtins (void)
+{
+  const struct builtin_description * d;
+  enum ix86_builtin_func_type ftype;
+  size_t i;
+
+  /* Add all special builtins with variable number of operands.  */
+  for (i = 0, d = bdesc_special_args;
+       i < ARRAY_SIZE (bdesc_special_args);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin (d->mask, d->name, ftype, d->code);
+    }
+
+  /* Add all builtins with variable number of operands.  */
+  for (i = 0, d = bdesc_args;
+       i < ARRAY_SIZE (bdesc_args);
+       i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* pcmpestr[im] insns.  */
+  for (i = 0, d = bdesc_pcmpestr;
+       i < ARRAY_SIZE (bdesc_pcmpestr);
+       i++, d++)
+    {
+      if (d->code == IX86_BUILTIN_PCMPESTRM128)
+	ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
+      else
+	ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* pcmpistr[im] insns.  */
+  for (i = 0, d = bdesc_pcmpistr;
+       i < ARRAY_SIZE (bdesc_pcmpistr);
+       i++, d++)
+    {
+      if (d->code == IX86_BUILTIN_PCMPISTRM128)
+	ftype = V16QI_FTYPE_V16QI_V16QI_INT;
+      else
+	ftype = INT_FTYPE_V16QI_V16QI_INT;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* comi/ucomi insns.  */
+  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+    {
+      if (d->mask == OPTION_MASK_ISA_SSE2)
+	ftype = INT_FTYPE_V2DF_V2DF;
+      else
+	ftype = INT_FTYPE_V4SF_V4SF;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+
+  /* SSE */
+  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
+	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
+  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
+	       UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
+
+  /* SSE or 3DNow!A */
+  def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
+	       IX86_BUILTIN_MASKMOVQ);
+
+  /* SSE2 */
+  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
+	       VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
+
+  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
+	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
+  x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
+			    VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
+
+  /* SSE3.  */
+  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
+	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
+  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
+	       VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
+
+  /* AES */
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
+		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
+		     V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
+  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
+		     V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
+
+  /* PCLMUL */
+  def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
+		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
+
+  /* RDRND */
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
+	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
+	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
+  def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
+	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
+	       IX86_BUILTIN_RDRAND64_STEP);
+
+  /* MMX access to the vec_init patterns.  */
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
+		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
+		     V4HI_FTYPE_HI_HI_HI_HI,
+		     IX86_BUILTIN_VEC_INIT_V4HI);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
+		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
+		     IX86_BUILTIN_VEC_INIT_V8QI);
+
+  /* Access to the vec_extract patterns.  */
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
+		     DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
+		     DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
+  def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
+		     FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
+		     SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
+		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+		     "__builtin_ia32_vec_ext_v4hi",
+		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
+
+  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
+		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
+		     QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
+
+  /* Access to the vec_set patterns.  */
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
+		     "__builtin_ia32_vec_set_v2di",
+		     V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
+		     V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
+		     V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
+		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+		     "__builtin_ia32_vec_set_v4hi",
+		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
+
+  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
+		     V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
+
+  /* Add FMA4 multi-arg argument instructions */
+  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+    {
+      if (d->name == 0)
+	continue;
+
+      ftype = (enum ix86_builtin_func_type) d->flag;
+      def_builtin_const (d->mask, d->name, ftype, d->code);
+    }
+}
+
+/* Internal method for ix86_init_builtins.  */
+
+static void
+ix86_init_builtins_va_builtins_abi (void)
+{
+  tree ms_va_ref, sysv_va_ref;
+  tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
+  tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
+  tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
+  tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
+
+  if (!TARGET_64BIT)
+    return;
+  fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
+  fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
+  ms_va_ref = build_reference_type (ms_va_list_type_node);
+  sysv_va_ref =
+    build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
+
+  fnvoid_va_end_ms =
+    build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+  fnvoid_va_start_ms =
+    build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+  fnvoid_va_end_sysv =
+    build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
+  fnvoid_va_start_sysv =
+    build_varargs_function_type_list (void_type_node, sysv_va_ref,
+    				       NULL_TREE);
+  fnvoid_va_copy_ms =
+    build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
+    			      NULL_TREE);
+  fnvoid_va_copy_sysv =
+    build_function_type_list (void_type_node, sysv_va_ref,
+    			      sysv_va_ref, NULL_TREE);
+
+  add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
+  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
+  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
+			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
+  add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
+  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+  add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
+  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+  add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
+			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+}
+
+static void
+ix86_init_builtin_types (void)
+{
+  tree float128_type_node, float80_type_node;
+
+  /* The __float80 type.  */
+  float80_type_node = long_double_type_node;
+  if (TYPE_MODE (float80_type_node) != XFmode)
+    {
+      /* The __float80 type.  */
+      float80_type_node = make_node (REAL_TYPE);
+
+      TYPE_PRECISION (float80_type_node) = 80;
+      layout_type (float80_type_node);
+    }
+  lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
+
+  /* The __float128 type.  */
+  float128_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (float128_type_node) = 128;
+  layout_type (float128_type_node);
+  lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
+
+  /* This macro is built by i386-builtin-types.awk.  */
+  DEFINE_BUILTIN_PRIMITIVE_TYPES;
+}
+
+static void
+ix86_init_builtins (void)
+{
+  tree t;
+
+  ix86_init_builtin_types ();
+
+  /* TFmode support builtins.  */
+  def_builtin_const (0, "__builtin_infq",
+		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
+  def_builtin_const (0, "__builtin_huge_valq",
+		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
+
+  /* We will expand them to normal call if SSE2 isn't available since
+     they are used by libgcc. */
+  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
+  t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
+			    BUILT_IN_MD, "__fabstf2", NULL_TREE);
+  TREE_READONLY (t) = 1;
+  ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
+
+  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
+  t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
+			    BUILT_IN_MD, "__copysigntf3", NULL_TREE);
+  TREE_READONLY (t) = 1;
+  ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
+
+  ix86_init_mmx_sse_builtins ();
+
+  if (TARGET_64BIT)
+    ix86_init_builtins_va_builtins_abi ();
+
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
+}
+
+/* Return the ix86 builtin for CODE.  */
+
+static tree
+ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= IX86_BUILTIN_MAX)
+    return error_mark_node;
+
+  return ix86_builtins[code];
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x == const0_rtx)
+    x = CONST0_RTX (mode);
+  return x;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
+
+static rtx
+ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (GET_MODE (op1) == SImode && mode1 == TImode)
+    {
+      rtx x = gen_reg_rtx (V4SImode);
+      emit_insn (gen_sse2_loadd (x, op1));
+      op1 = gen_lowpart (TImode, x);
+    }
+
+  if (!insn_data[icode].operand[1].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!insn_data[icode].operand[2].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
+
+static rtx
+ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
+			       enum ix86_builtin_func_type m_type,
+			       enum rtx_code sub_code)
+{
+  rtx pat;
+  int i;
+  int nargs;
+  bool comparison_p = false;
+  bool tf_p = false;
+  bool last_arg_constant = false;
+  int num_memory = 0;
+  struct {
+    rtx op;
+    enum machine_mode mode;
+  } args[4];
+
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+  switch (m_type)
+    {
+    case MULTI_ARG_4_DF2_DI_I:
+    case MULTI_ARG_4_DF2_DI_I1:
+    case MULTI_ARG_4_SF2_SI_I:
+    case MULTI_ARG_4_SF2_SI_I1:
+      nargs = 4;
+      last_arg_constant = true;
+      break;
+
+    case MULTI_ARG_3_SF:
+    case MULTI_ARG_3_DF:
+    case MULTI_ARG_3_SF2:
+    case MULTI_ARG_3_DF2:
+    case MULTI_ARG_3_DI:
+    case MULTI_ARG_3_SI:
+    case MULTI_ARG_3_SI_DI:
+    case MULTI_ARG_3_HI:
+    case MULTI_ARG_3_HI_SI:
+    case MULTI_ARG_3_QI:
+    case MULTI_ARG_3_DI2:
+    case MULTI_ARG_3_SI2:
+    case MULTI_ARG_3_HI2:
+    case MULTI_ARG_3_QI2:
+      nargs = 3;
+      break;
+
+    case MULTI_ARG_2_SF:
+    case MULTI_ARG_2_DF:
+    case MULTI_ARG_2_DI:
+    case MULTI_ARG_2_SI:
+    case MULTI_ARG_2_HI:
+    case MULTI_ARG_2_QI:
+      nargs = 2;
+      break;
+
+    case MULTI_ARG_2_DI_IMM:
+    case MULTI_ARG_2_SI_IMM:
+    case MULTI_ARG_2_HI_IMM:
+    case MULTI_ARG_2_QI_IMM:
+      nargs = 2;
+      last_arg_constant = true;
+      break;
+
+    case MULTI_ARG_1_SF:
+    case MULTI_ARG_1_DF:
+    case MULTI_ARG_1_SF2:
+    case MULTI_ARG_1_DF2:
+    case MULTI_ARG_1_DI:
+    case MULTI_ARG_1_SI:
+    case MULTI_ARG_1_HI:
+    case MULTI_ARG_1_QI:
+    case MULTI_ARG_1_SI_DI:
+    case MULTI_ARG_1_HI_DI:
+    case MULTI_ARG_1_HI_SI:
+    case MULTI_ARG_1_QI_DI:
+    case MULTI_ARG_1_QI_SI:
+    case MULTI_ARG_1_QI_HI:
+      nargs = 1;
+      break;
+
+    case MULTI_ARG_2_DI_CMP:
+    case MULTI_ARG_2_SI_CMP:
+    case MULTI_ARG_2_HI_CMP:
+    case MULTI_ARG_2_QI_CMP:
+      nargs = 2;
+      comparison_p = true;
+      break;
+
+    case MULTI_ARG_2_SF_TF:
+    case MULTI_ARG_2_DF_TF:
+    case MULTI_ARG_2_DI_TF:
+    case MULTI_ARG_2_SI_TF:
+    case MULTI_ARG_2_HI_TF:
+    case MULTI_ARG_2_QI_TF:
+      nargs = 2;
+      tf_p = true;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert (nargs <= 4);
+
+  for (i = 0; i < nargs; i++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      rtx op = expand_normal (arg);
+      int adjust = (comparison_p) ? 1 : 0;
+      enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
+
+      if (last_arg_constant && i == nargs - 1)
+	{
+	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
+	    {
+	      enum insn_code new_icode = icode;
+	      switch (icode)
+		{
+		case CODE_FOR_xop_vpermil2v2df3:
+		case CODE_FOR_xop_vpermil2v4sf3:
+		case CODE_FOR_xop_vpermil2v4df3:
+		case CODE_FOR_xop_vpermil2v8sf3:
+		  error ("the last argument must be a 2-bit immediate");
+		  return gen_reg_rtx (tmode);
+		case CODE_FOR_xop_rotlv2di3:
+		  new_icode = CODE_FOR_rotlv2di3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv4si3:
+		  new_icode = CODE_FOR_rotlv4si3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv8hi3:
+		  new_icode = CODE_FOR_rotlv8hi3;
+		  goto xop_rotl;
+		case CODE_FOR_xop_rotlv16qi3:
+		  new_icode = CODE_FOR_rotlv16qi3;
+		xop_rotl:
+		  if (CONST_INT_P (op))
+		    {
+		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+		      op = GEN_INT (INTVAL (op) & mask);
+		      gcc_checking_assert
+			(insn_data[icode].operand[i + 1].predicate (op, mode));
+		    }
+		  else
+		    {
+		      gcc_checking_assert
+			(nargs == 2
+			 && insn_data[new_icode].operand[0].mode == tmode
+			 && insn_data[new_icode].operand[1].mode == tmode
+			 && insn_data[new_icode].operand[2].mode == mode
+			 && insn_data[new_icode].operand[0].predicate
+			    == insn_data[icode].operand[0].predicate
+			 && insn_data[new_icode].operand[1].predicate
+			    == insn_data[icode].operand[1].predicate);
+		      icode = new_icode;
+		      goto non_constant;
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+	}
+      else
+	{
+	non_constant:
+	  if (VECTOR_MODE_P (mode))
+	    op = safe_vector_operand (op, mode);
+
+	  /* If we aren't optimizing, only allow one memory operand to be
+	     generated.  */
+	  if (memory_operand (op, mode))
+	    num_memory++;
+
+	  gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
+
+	  if (optimize
+	      || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
+	      || num_memory > 1)
+	    op = force_reg (mode, op);
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (target, args[0].op);
+      break;
+
+    case 2:
+      if (tf_p)
+	pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+			       GEN_INT ((int)sub_code));
+      else if (! comparison_p)
+	pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+      else
+	{
+	  rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
+				       args[0].op,
+				       args[1].op);
+
+	  pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
+	}
+      break;
+
+    case 3:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+      break;
+
+    case 4:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
+   insns with vec_merge.  */
+
+static rtx
+ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
+				    rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op1, op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[icode].operand[1].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  op1 = op0;
+  if (!insn_data[icode].operand[2].predicate (op1, mode0))
+    op1 = copy_to_mode_reg (mode0, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
+
+static rtx
+ix86_expand_sse_compare (const struct builtin_description *d,
+			 tree exp, rtx target, bool swap)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2;
+  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+  enum rtx_code comparison = d->comparison;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  /* Swap operands if we have a comparison that isn't available in
+     hardware.  */
+  if (swap)
+    {
+      rtx tmp = gen_reg_rtx (mode1);
+      emit_move_insn (tmp, op1);
+      op1 = op0;
+      op0 = tmp;
+    }
+
+  if (optimize || !target
+      || GET_MODE (target) != tmode
+      || !insn_data[d->icode].operand[0].predicate (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[1].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[2].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
+
+static rtx
+ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
+		      rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+  enum rtx_code comparison = d->comparison;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  /* Swap operands if we have a comparison that isn't available in
+     hardware.  */
+  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+    {
+      rtx tmp = op1;
+      op1 = op0;
+      op0 = tmp;
+    }
+
+  target = gen_reg_rtx (SImode);
+  emit_move_insn (target, const0_rtx);
+  target = gen_rtx_SUBREG (QImode, target, 0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (d->icode) (op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+			  gen_rtx_fmt_ee (comparison, QImode,
+					  SET_DEST (pat),
+					  const0_rtx)));
+
+  return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+		       rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+  enum rtx_code comparison = d->comparison;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  target = gen_reg_rtx (SImode);
+  emit_move_insn (target, const0_rtx);
+  target = gen_rtx_SUBREG (QImode, target, 0);
+
+  if ((optimize && !register_operand (op0, mode0))
+      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if ((optimize && !register_operand (op1, mode1))
+      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (d->icode) (op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+			  gen_rtx_fmt_ee (comparison, QImode,
+					  SET_DEST (pat),
+					  const0_rtx)));
+
+  return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+			  tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  tree arg3 = CALL_EXPR_ARG (exp, 3);
+  tree arg4 = CALL_EXPR_ARG (exp, 4);
+  rtx scratch0, scratch1;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx op3 = expand_normal (arg3);
+  rtx op4 = expand_normal (arg4);
+  enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+  tmode0 = insn_data[d->icode].operand[0].mode;
+  tmode1 = insn_data[d->icode].operand[1].mode;
+  modev2 = insn_data[d->icode].operand[2].mode;
+  modei3 = insn_data[d->icode].operand[3].mode;
+  modev4 = insn_data[d->icode].operand[4].mode;
+  modei5 = insn_data[d->icode].operand[5].mode;
+  modeimm = insn_data[d->icode].operand[6].mode;
+
+  if (VECTOR_MODE_P (modev2))
+    op0 = safe_vector_operand (op0, modev2);
+  if (VECTOR_MODE_P (modev4))
+    op2 = safe_vector_operand (op2, modev4);
+
+  if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
+    op0 = copy_to_mode_reg (modev2, op0);
+  if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
+    op1 = copy_to_mode_reg (modei3, op1);
+  if ((optimize && !register_operand (op2, modev4))
+      || !insn_data[d->icode].operand[4].predicate (op2, modev4))
+    op2 = copy_to_mode_reg (modev4, op2);
+  if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
+    op3 = copy_to_mode_reg (modei5, op3);
+
+  if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
+    {
+      error ("the fifth argument must be an 8-bit immediate");
+      return const0_rtx;
+    }
+
+  if (d->code == IX86_BUILTIN_PCMPESTRI128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode0
+	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
+	target = gen_reg_rtx (tmode0);
+
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+    }
+  else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode1
+	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
+	target = gen_reg_rtx (tmode1);
+
+      scratch0 = gen_reg_rtx (tmode0);
+
+      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+    }
+  else
+    {
+      gcc_assert (d->flag);
+
+      scratch0 = gen_reg_rtx (tmode0);
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  if (d->flag)
+    {
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (target, const0_rtx);
+      target = gen_rtx_SUBREG (QImode, target, 0);
+
+      emit_insn
+	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+		      gen_rtx_fmt_ee (EQ, QImode,
+				      gen_rtx_REG ((enum machine_mode) d->flag,
+						   FLAGS_REG),
+				      const0_rtx)));
+      return SUBREG_REG (target);
+    }
+  else
+    return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+			  tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx scratch0, scratch1;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+  tmode0 = insn_data[d->icode].operand[0].mode;
+  tmode1 = insn_data[d->icode].operand[1].mode;
+  modev2 = insn_data[d->icode].operand[2].mode;
+  modev3 = insn_data[d->icode].operand[3].mode;
+  modeimm = insn_data[d->icode].operand[4].mode;
+
+  if (VECTOR_MODE_P (modev2))
+    op0 = safe_vector_operand (op0, modev2);
+  if (VECTOR_MODE_P (modev3))
+    op1 = safe_vector_operand (op1, modev3);
+
+  if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
+    op0 = copy_to_mode_reg (modev2, op0);
+  if ((optimize && !register_operand (op1, modev3))
+      || !insn_data[d->icode].operand[3].predicate (op1, modev3))
+    op1 = copy_to_mode_reg (modev3, op1);
+
+  if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
+    {
+      error ("the third argument must be an 8-bit immediate");
+      return const0_rtx;
+    }
+
+  if (d->code == IX86_BUILTIN_PCMPISTRI128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode0
+	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
+	target = gen_reg_rtx (tmode0);
+
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+    }
+  else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+    {
+      if (optimize || !target
+	  || GET_MODE (target) != tmode1
+	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
+	target = gen_reg_rtx (tmode1);
+
+      scratch0 = gen_reg_rtx (tmode0);
+
+      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+    }
+  else
+    {
+      gcc_assert (d->flag);
+
+      scratch0 = gen_reg_rtx (tmode0);
+      scratch1 = gen_reg_rtx (tmode1);
+
+      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  if (d->flag)
+    {
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (target, const0_rtx);
+      target = gen_rtx_SUBREG (QImode, target, 0);
+
+      emit_insn
+	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+		      gen_rtx_fmt_ee (EQ, QImode,
+				      gen_rtx_REG ((enum machine_mode) d->flag,
+						   FLAGS_REG),
+				      const0_rtx)));
+      return SUBREG_REG (target);
+    }
+  else
+    return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of insns with
+   variable number of operands.  */
+
+static rtx
+ix86_expand_args_builtin (const struct builtin_description *d,
+			  tree exp, rtx target)
+{
+  rtx pat, real_target;
+  unsigned int i, nargs;
+  unsigned int nargs_constant = 0;
+  int num_memory = 0;
+  struct
+    {
+      rtx op;
+      enum machine_mode mode;
+    } args[4];
+  bool last_arg_count = false;
+  enum insn_code icode = d->icode;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  enum machine_mode tmode = insn_p->operand[0].mode;
+  enum machine_mode rmode = VOIDmode;
+  bool swap = false;
+  enum rtx_code comparison = d->comparison;
+
+  switch ((enum ix86_builtin_func_type) d->flag)
+    {
+    case INT_FTYPE_V8SF_V8SF_PTEST:
+    case INT_FTYPE_V4DI_V4DI_PTEST:
+    case INT_FTYPE_V4DF_V4DF_PTEST:
+    case INT_FTYPE_V4SF_V4SF_PTEST:
+    case INT_FTYPE_V2DI_V2DI_PTEST:
+    case INT_FTYPE_V2DF_V2DF_PTEST:
+      return ix86_expand_sse_ptest (d, exp, target);
+    case FLOAT128_FTYPE_FLOAT128:
+    case FLOAT_FTYPE_FLOAT:
+    case INT_FTYPE_INT:
+    case UINT64_FTYPE_INT:
+    case UINT16_FTYPE_UINT16:
+    case INT64_FTYPE_INT64:
+    case INT64_FTYPE_V4SF:
+    case INT64_FTYPE_V2DF:
+    case INT_FTYPE_V16QI:
+    case INT_FTYPE_V8QI:
+    case INT_FTYPE_V8SF:
+    case INT_FTYPE_V4DF:
+    case INT_FTYPE_V4SF:
+    case INT_FTYPE_V2DF:
+    case V16QI_FTYPE_V16QI:
+    case V8SI_FTYPE_V8SF:
+    case V8SI_FTYPE_V4SI:
+    case V8HI_FTYPE_V8HI:
+    case V8HI_FTYPE_V16QI:
+    case V8QI_FTYPE_V8QI:
+    case V8SF_FTYPE_V8SF:
+    case V8SF_FTYPE_V8SI:
+    case V8SF_FTYPE_V4SF:
+    case V8SF_FTYPE_V8HI:
+    case V4SI_FTYPE_V4SI:
+    case V4SI_FTYPE_V16QI:
+    case V4SI_FTYPE_V4SF:
+    case V4SI_FTYPE_V8SI:
+    case V4SI_FTYPE_V8HI:
+    case V4SI_FTYPE_V4DF:
+    case V4SI_FTYPE_V2DF:
+    case V4HI_FTYPE_V4HI:
+    case V4DF_FTYPE_V4DF:
+    case V4DF_FTYPE_V4SI:
+    case V4DF_FTYPE_V4SF:
+    case V4DF_FTYPE_V2DF:
+    case V4SF_FTYPE_V4SF:
+    case V4SF_FTYPE_V4SI:
+    case V4SF_FTYPE_V8SF:
+    case V4SF_FTYPE_V4DF:
+    case V4SF_FTYPE_V8HI:
+    case V4SF_FTYPE_V2DF:
+    case V2DI_FTYPE_V2DI:
+    case V2DI_FTYPE_V16QI:
+    case V2DI_FTYPE_V8HI:
+    case V2DI_FTYPE_V4SI:
+    case V2DF_FTYPE_V2DF:
+    case V2DF_FTYPE_V4SI:
+    case V2DF_FTYPE_V4DF:
+    case V2DF_FTYPE_V4SF:
+    case V2DF_FTYPE_V2SI:
+    case V2SI_FTYPE_V2SI:
+    case V2SI_FTYPE_V4SF:
+    case V2SI_FTYPE_V2SF:
+    case V2SI_FTYPE_V2DF:
+    case V2SF_FTYPE_V2SF:
+    case V2SF_FTYPE_V2SI:
+      nargs = 1;
+      break;
+    case V4SF_FTYPE_V4SF_VEC_MERGE:
+    case V2DF_FTYPE_V2DF_VEC_MERGE:
+      return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
+    case FLOAT128_FTYPE_FLOAT128_FLOAT128:
+    case V16QI_FTYPE_V16QI_V16QI:
+    case V16QI_FTYPE_V8HI_V8HI:
+    case V8QI_FTYPE_V8QI_V8QI:
+    case V8QI_FTYPE_V4HI_V4HI:
+    case V8HI_FTYPE_V8HI_V8HI:
+    case V8HI_FTYPE_V16QI_V16QI:
+    case V8HI_FTYPE_V4SI_V4SI:
+    case V8SF_FTYPE_V8SF_V8SF:
+    case V8SF_FTYPE_V8SF_V8SI:
+    case V4SI_FTYPE_V4SI_V4SI:
+    case V4SI_FTYPE_V8HI_V8HI:
+    case V4SI_FTYPE_V4SF_V4SF:
+    case V4SI_FTYPE_V2DF_V2DF:
+    case V4HI_FTYPE_V4HI_V4HI:
+    case V4HI_FTYPE_V8QI_V8QI:
+    case V4HI_FTYPE_V2SI_V2SI:
+    case V4DF_FTYPE_V4DF_V4DF:
+    case V4DF_FTYPE_V4DF_V4DI:
+    case V4SF_FTYPE_V4SF_V4SF:
+    case V4SF_FTYPE_V4SF_V4SI:
+    case V4SF_FTYPE_V4SF_V2SI:
+    case V4SF_FTYPE_V4SF_V2DF:
+    case V4SF_FTYPE_V4SF_DI:
+    case V4SF_FTYPE_V4SF_SI:
+    case V2DI_FTYPE_V2DI_V2DI:
+    case V2DI_FTYPE_V16QI_V16QI:
+    case V2DI_FTYPE_V4SI_V4SI:
+    case V2DI_FTYPE_V2DI_V16QI:
+    case V2DI_FTYPE_V2DF_V2DF:
+    case V2SI_FTYPE_V2SI_V2SI:
+    case V2SI_FTYPE_V4HI_V4HI:
+    case V2SI_FTYPE_V2SF_V2SF:
+    case V2DF_FTYPE_V2DF_V2DF:
+    case V2DF_FTYPE_V2DF_V4SF:
+    case V2DF_FTYPE_V2DF_V2DI:
+    case V2DF_FTYPE_V2DF_DI:
+    case V2DF_FTYPE_V2DF_SI:
+    case V2SF_FTYPE_V2SF_V2SF:
+    case V1DI_FTYPE_V1DI_V1DI:
+    case V1DI_FTYPE_V8QI_V8QI:
+    case V1DI_FTYPE_V2SI_V2SI:
+      if (comparison == UNKNOWN)
+	return ix86_expand_binop_builtin (icode, exp, target);
+      nargs = 2;
+      break;
+    case V4SF_FTYPE_V4SF_V4SF_SWAP:
+    case V2DF_FTYPE_V2DF_V2DF_SWAP:
+      gcc_assert (comparison != UNKNOWN);
+      nargs = 2;
+      swap = true;
+      break;
+    case V8HI_FTYPE_V8HI_V8HI_COUNT:
+    case V8HI_FTYPE_V8HI_SI_COUNT:
+    case V4SI_FTYPE_V4SI_V4SI_COUNT:
+    case V4SI_FTYPE_V4SI_SI_COUNT:
+    case V4HI_FTYPE_V4HI_V4HI_COUNT:
+    case V4HI_FTYPE_V4HI_SI_COUNT:
+    case V2DI_FTYPE_V2DI_V2DI_COUNT:
+    case V2DI_FTYPE_V2DI_SI_COUNT:
+    case V2SI_FTYPE_V2SI_V2SI_COUNT:
+    case V2SI_FTYPE_V2SI_SI_COUNT:
+    case V1DI_FTYPE_V1DI_V1DI_COUNT:
+    case V1DI_FTYPE_V1DI_SI_COUNT:
+      nargs = 2;
+      last_arg_count = true;
+      break;
+    case UINT64_FTYPE_UINT64_UINT64:
+    case UINT_FTYPE_UINT_UINT:
+    case UINT_FTYPE_UINT_USHORT:
+    case UINT_FTYPE_UINT_UCHAR:
+    case UINT16_FTYPE_UINT16_INT:
+    case UINT8_FTYPE_UINT8_INT:
+      nargs = 2;
+      break;
+    case V2DI_FTYPE_V2DI_INT_CONVERT:
+      nargs = 2;
+      rmode = V1TImode;
+      nargs_constant = 1;
+      break;
+    case V8HI_FTYPE_V8HI_INT:
+    case V8HI_FTYPE_V8SF_INT:
+    case V8HI_FTYPE_V4SF_INT:
+    case V8SF_FTYPE_V8SF_INT:
+    case V4SI_FTYPE_V4SI_INT:
+    case V4SI_FTYPE_V8SI_INT:
+    case V4HI_FTYPE_V4HI_INT:
+    case V4DF_FTYPE_V4DF_INT:
+    case V4SF_FTYPE_V4SF_INT:
+    case V4SF_FTYPE_V8SF_INT:
+    case V2DI_FTYPE_V2DI_INT:
+    case V2DF_FTYPE_V2DF_INT:
+    case V2DF_FTYPE_V4DF_INT:
+      nargs = 2;
+      nargs_constant = 1;
+      break;
+    case V16QI_FTYPE_V16QI_V16QI_V16QI:
+    case V8SF_FTYPE_V8SF_V8SF_V8SF:
+    case V4DF_FTYPE_V4DF_V4DF_V4DF:
+    case V4SF_FTYPE_V4SF_V4SF_V4SF:
+    case V2DF_FTYPE_V2DF_V2DF_V2DF:
+      nargs = 3;
+      break;
+    case V16QI_FTYPE_V16QI_V16QI_INT:
+    case V8HI_FTYPE_V8HI_V8HI_INT:
+    case V8SI_FTYPE_V8SI_V8SI_INT:
+    case V8SI_FTYPE_V8SI_V4SI_INT:
+    case V8SF_FTYPE_V8SF_V8SF_INT:
+    case V8SF_FTYPE_V8SF_V4SF_INT:
+    case V4SI_FTYPE_V4SI_V4SI_INT:
+    case V4DF_FTYPE_V4DF_V4DF_INT:
+    case V4DF_FTYPE_V4DF_V2DF_INT:
+    case V4SF_FTYPE_V4SF_V4SF_INT:
+    case V2DI_FTYPE_V2DI_V2DI_INT:
+    case V2DF_FTYPE_V2DF_V2DF_INT:
+      nargs = 3;
+      nargs_constant = 1;
+      break;
+    case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
+      nargs = 3;
+      rmode = V2DImode;
+      nargs_constant = 1;
+      break;
+    case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
+      nargs = 3;
+      rmode = DImode;
+      nargs_constant = 1;
+      break;
+    case V2DI_FTYPE_V2DI_UINT_UINT:
+      nargs = 3;
+      nargs_constant = 2;
+      break;
+    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+    case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+    case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+      nargs = 4;
+      nargs_constant = 1;
+      break;
+    case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
+      nargs = 4;
+      nargs_constant = 2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (nargs <= ARRAY_SIZE (args));
+
+  if (comparison != UNKNOWN)
+    {
+      gcc_assert (nargs == 2);
+      return ix86_expand_sse_compare (d, exp, target, swap);
+    }
+
+  if (rmode == VOIDmode || rmode == tmode)
+    {
+      if (optimize
+	  || target == 0
+	  || GET_MODE (target) != tmode
+	  || !insn_p->operand[0].predicate (target, tmode))
+	target = gen_reg_rtx (tmode);
+      real_target = target;
+    }
+  else
+    {
+      target = gen_reg_rtx (rmode);
+      real_target = simplify_gen_subreg (tmode, target, rmode, 0);
+    }
+
+  for (i = 0; i < nargs; i++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      rtx op = expand_normal (arg);
+      enum machine_mode mode = insn_p->operand[i + 1].mode;
+      bool match = insn_p->operand[i + 1].predicate (op, mode);
+
+      if (last_arg_count && (i + 1) == nargs)
+	{
+	  /* SIMD shift insns take either an 8-bit immediate or
+	     register as count.  But builtin functions take int as
+	     count.  If count doesn't match, we put it in register.  */
+	  if (!match)
+	    {
+	      op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
+	      if (!insn_p->operand[i + 1].predicate (op, mode))
+		op = copy_to_reg (op);
+	    }
+	}
+      else if ((nargs - i) <= nargs_constant)
+	{
+	  if (!match)
+	    switch (icode)
+	      {
+	      case CODE_FOR_sse4_1_roundpd:
+	      case CODE_FOR_sse4_1_roundps:
+	      case CODE_FOR_sse4_1_roundsd:
+	      case CODE_FOR_sse4_1_roundss:
+	      case CODE_FOR_sse4_1_blendps:
+	      case CODE_FOR_avx_blendpd256:
+	      case CODE_FOR_avx_vpermilv4df:
+	      case CODE_FOR_avx_roundpd256:
+	      case CODE_FOR_avx_roundps256:
+		error ("the last argument must be a 4-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_sse4_1_blendpd:
+	      case CODE_FOR_avx_vpermilv2df:
+	      case CODE_FOR_xop_vpermil2v2df3:
+	      case CODE_FOR_xop_vpermil2v4sf3:
+	      case CODE_FOR_xop_vpermil2v4df3:
+	      case CODE_FOR_xop_vpermil2v8sf3:
+		error ("the last argument must be a 2-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_avx_vextractf128v4df:
+	      case CODE_FOR_avx_vextractf128v8sf:
+	      case CODE_FOR_avx_vextractf128v8si:
+	      case CODE_FOR_avx_vinsertf128v4df:
+	      case CODE_FOR_avx_vinsertf128v8sf:
+	      case CODE_FOR_avx_vinsertf128v8si:
+		error ("the last argument must be a 1-bit immediate");
+		return const0_rtx;
+
+	      case CODE_FOR_avx_cmpsdv2df3:
+	      case CODE_FOR_avx_cmpssv4sf3:
+	      case CODE_FOR_avx_cmppdv2df3:
+	      case CODE_FOR_avx_cmppsv4sf3:
+	      case CODE_FOR_avx_cmppdv4df3:
+	      case CODE_FOR_avx_cmppsv8sf3:
+		error ("the last argument must be a 5-bit immediate");
+		return const0_rtx;
+
+	     default:
+		switch (nargs_constant)
+		  {
+		  case 2:
+		    if ((nargs - i) == nargs_constant)
+		      {
+			error ("the next to last argument must be an 8-bit immediate");
+			break;
+		      }
+		  case 1:
+		    error ("the last argument must be an 8-bit immediate");
+		    break;
+		  default:
+		    gcc_unreachable ();
+		  }
+		return const0_rtx;
+	      }
+	}
+      else
+	{
+	  if (VECTOR_MODE_P (mode))
+	    op = safe_vector_operand (op, mode);
+
+	  /* If we aren't optimizing, only allow one memory operand to
+	     be generated.  */
+	  if (memory_operand (op, mode))
+	    num_memory++;
+
+	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+	    {
+	      if (optimize || !match || num_memory > 1)
+		op = copy_to_mode_reg (mode, op);
+	    }
+	  else
+	    {
+	      op = copy_to_reg (op);
+	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+	    }
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (real_target, args[0].op);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+			     args[2].op);
+      break;
+    case 4:
+      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+			     args[2].op, args[3].op);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of special insns
+   with variable number of operands.  */
+
+static rtx
+ix86_expand_special_args_builtin (const struct builtin_description *d,
+				    tree exp, rtx target)
+{
+  tree arg;
+  rtx pat, op;
+  unsigned int i, nargs, arg_adjust, memory;
+  struct
+    {
+      rtx op;
+      enum machine_mode mode;
+    } args[3];
+  enum insn_code icode = d->icode;
+  bool last_arg_constant = false;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  enum machine_mode tmode = insn_p->operand[0].mode;
+  enum { load, store } klass;
+
+  switch ((enum ix86_builtin_func_type) d->flag)
+    {
+    case VOID_FTYPE_VOID:
+      if (icode == CODE_FOR_avx_vzeroupper)
+	target = GEN_INT (vzeroupper_intrinsic);
+      emit_insn (GEN_FCN (icode) (target));
+      return 0;
+    case VOID_FTYPE_UINT64:
+    case VOID_FTYPE_UNSIGNED:
+      nargs = 0;
+      klass = store;
+      memory = 0;
+      break;
+      break;
+    case UINT64_FTYPE_VOID:
+    case UNSIGNED_FTYPE_VOID:
+      nargs = 0;
+      klass = load;
+      memory = 0;
+      break;
+    case UINT64_FTYPE_PUNSIGNED:
+    case V2DI_FTYPE_PV2DI:
+    case V32QI_FTYPE_PCCHAR:
+    case V16QI_FTYPE_PCCHAR:
+    case V8SF_FTYPE_PCV4SF:
+    case V8SF_FTYPE_PCFLOAT:
+    case V4SF_FTYPE_PCFLOAT:
+    case V4DF_FTYPE_PCV2DF:
+    case V4DF_FTYPE_PCDOUBLE:
+    case V2DF_FTYPE_PCDOUBLE:
+    case VOID_FTYPE_PVOID:
+      nargs = 1;
+      klass = load;
+      memory = 0;
+      break;
+    case VOID_FTYPE_PV2SF_V4SF:
+    case VOID_FTYPE_PV4DI_V4DI:
+    case VOID_FTYPE_PV2DI_V2DI:
+    case VOID_FTYPE_PCHAR_V32QI:
+    case VOID_FTYPE_PCHAR_V16QI:
+    case VOID_FTYPE_PFLOAT_V8SF:
+    case VOID_FTYPE_PFLOAT_V4SF:
+    case VOID_FTYPE_PDOUBLE_V4DF:
+    case VOID_FTYPE_PDOUBLE_V2DF:
+    case VOID_FTYPE_PULONGLONG_ULONGLONG:
+    case VOID_FTYPE_PINT_INT:
+      nargs = 1;
+      klass = store;
+      /* Reserve memory operand for target.  */
+      memory = ARRAY_SIZE (args);
+      break;
+    case V4SF_FTYPE_V4SF_PCV2SF:
+    case V2DF_FTYPE_V2DF_PCDOUBLE:
+      nargs = 2;
+      klass = load;
+      memory = 1;
+      break;
+    case V8SF_FTYPE_PCV8SF_V8SI:
+    case V4DF_FTYPE_PCV4DF_V4DI:
+    case V4SF_FTYPE_PCV4SF_V4SI:
+    case V2DF_FTYPE_PCV2DF_V2DI:
+      nargs = 2;
+      klass = load;
+      memory = 0;
+      break;
+    case VOID_FTYPE_PV8SF_V8SI_V8SF:
+    case VOID_FTYPE_PV4DF_V4DI_V4DF:
+    case VOID_FTYPE_PV4SF_V4SI_V4SF:
+    case VOID_FTYPE_PV2DF_V2DI_V2DF:
+      nargs = 2;
+      klass = store;
+      /* Reserve memory operand for target.  */
+      memory = ARRAY_SIZE (args);
+      break;
+    case VOID_FTYPE_UINT_UINT_UINT:
+    case VOID_FTYPE_UINT64_UINT_UINT:
+    case UCHAR_FTYPE_UINT_UINT_UINT:
+    case UCHAR_FTYPE_UINT64_UINT_UINT:
+      nargs = 3;
+      klass = load;
+      memory = ARRAY_SIZE (args);
+      last_arg_constant = true;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (nargs <= ARRAY_SIZE (args));
+
+  if (klass == store)
+    {
+      arg = CALL_EXPR_ARG (exp, 0);
+      op = expand_normal (arg);
+      gcc_assert (target == 0);
+      if (memory)
+	target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+      else
+	target = force_reg (tmode, op);
+      arg_adjust = 1;
+    }
+  else
+    {
+      arg_adjust = 0;
+      if (optimize
+	  || target == 0
+	  || !register_operand (target, tmode)
+	  || GET_MODE (target) != tmode)
+	target = gen_reg_rtx (tmode);
+    }
+
+  for (i = 0; i < nargs; i++)
+    {
+      enum machine_mode mode = insn_p->operand[i + 1].mode;
+      bool match;
+
+      arg = CALL_EXPR_ARG (exp, i + arg_adjust);
+      op = expand_normal (arg);
+      match = insn_p->operand[i + 1].predicate (op, mode);
+
+      if (last_arg_constant && (i + 1) == nargs)
+	{
+	  if (!match)
+	    {
+	      if (icode == CODE_FOR_lwp_lwpvalsi3
+		  || icode == CODE_FOR_lwp_lwpinssi3
+		  || icode == CODE_FOR_lwp_lwpvaldi3
+		  || icode == CODE_FOR_lwp_lwpinsdi3)
+		error ("the last argument must be a 32-bit immediate");
+	      else
+		error ("the last argument must be an 8-bit immediate");
+	      return const0_rtx;
+	    }
+	}
+      else
+	{
+	  if (i == memory)
+	    {
+	      /* This must be the memory operand.  */
+	      op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+	      gcc_assert (GET_MODE (op) == mode
+			  || GET_MODE (op) == VOIDmode);
+	    }
+	  else
+	    {
+	      /* This must be register.  */
+	      if (VECTOR_MODE_P (mode))
+		op = safe_vector_operand (op, mode);
+
+	      gcc_assert (GET_MODE (op) == mode
+			  || GET_MODE (op) == VOIDmode);
+	      op = copy_to_mode_reg (mode, op);
+	    }
+	}
+
+      args[i].op = op;
+      args[i].mode = mode;
+    }
+
+  switch (nargs)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      pat = GEN_FCN (icode) (target, args[0].op);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return klass == store ? 0 : target;
+}
+
+/* Return the integer constant in ARG.  Constrain it to be in the range
+   of the subparts of VEC_TYPE; issue an error if not.  */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+  if (!host_integerp (arg, 1)
+      || (elt = tree_low_cst (arg, 1), elt > max))
+    {
+      error ("selector must be an integer constant in the range 0..%wi", max);
+      return 0;
+    }
+
+  return elt;
+}
+
+/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
+   ix86_expand_vector_init.  We DO have language-level syntax for this, in
+   the form of  (type){ init-list }.  Except that since we can't place emms
+   instructions from inside the compiler, we can't allow the use of MMX
+   registers unless the user explicitly asks for it.  So we do *not* define
+   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
+   we have builtins invoked by mmintrin.h that gives us license to emit
+   these sorts of instructions.  */
+
+static rtx
+ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
+{
+  enum machine_mode tmode = TYPE_MODE (type);
+  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+  int i, n_elt = GET_MODE_NUNITS (tmode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  gcc_assert (VECTOR_MODE_P (tmode));
+  gcc_assert (call_expr_nargs (exp) == n_elt);
+
+  for (i = 0; i < n_elt; ++i)
+    {
+      rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
+      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+    }
+
+  if (!target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
+  return target;
+}
+
+/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
+   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
+   had a language-level syntax for referencing vector elements.  */
+
+static rtx
+ix86_expand_vec_ext_builtin (tree exp, rtx target)
+{
+  enum machine_mode tmode, mode0;
+  tree arg0, arg1;
+  int elt;
+  rtx op0;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+
+  op0 = expand_normal (arg0);
+  elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  mode0 = TYPE_MODE (TREE_TYPE (arg0));
+  gcc_assert (VECTOR_MODE_P (mode0));
+
+  op0 = force_reg (mode0, op0);
+
+  if (optimize || !target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  ix86_expand_vector_extract (true, target, op0, elt);
+
+  return target;
+}
+
+/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
+   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
+   a language-level syntax for referencing vector elements.  */
+
+static rtx
+ix86_expand_vec_set_builtin (tree exp)
+{
+  enum machine_mode tmode, mode1;
+  tree arg0, arg1, arg2;
+  int elt;
+  rtx op0, op1, target;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  tmode = TYPE_MODE (TREE_TYPE (arg0));
+  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  gcc_assert (VECTOR_MODE_P (tmode));
+
+  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+  elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+  op0 = force_reg (tmode, op0);
+  op1 = force_reg (mode1, op1);
+
+  /* OP0 is the source of these builtin functions and shouldn't be
+     modified.  Create a copy, use it and return it as target.  */
+  target = gen_reg_rtx (tmode);
+  emit_move_insn (target, op0);
+  ix86_expand_vector_set (true, target, op1, elt);
+
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  const struct builtin_description *d;
+  size_t i;
+  enum insn_code icode;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, pat;
+  enum machine_mode mode0, mode1, mode2;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  /* Determine whether the builtin function is available under the current ISA.
+     Originally the builtin was not created if it wasn't applicable to the
+     current ISA based on the command line switches.  With function specific
+     options, we need to check in the context of the function making the call
+     whether it is supported.  */
+  if (ix86_builtins_isa[fcode].isa
+      && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
+    {
+      char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
+				       NULL, NULL, false);
+
+      if (!opts)
+	error ("%qE needs unknown isa option", fndecl);
+      else
+	{
+	  gcc_assert (opts != NULL);
+	  error ("%qE needs isa option %s", fndecl, opts);
+	  free (opts);
+	}
+      return const0_rtx;
+    }
+
+  switch (fcode)
+    {
+    case IX86_BUILTIN_MASKMOVQ:
+    case IX86_BUILTIN_MASKMOVDQU:
+      icode = (fcode == IX86_BUILTIN_MASKMOVQ
+	       ? CODE_FOR_mmx_maskmovq
+	       : CODE_FOR_sse2_maskmovdqu);
+      /* Note the arg order is different from the operand order.  */
+      arg1 = CALL_EXPR_ARG (exp, 0);
+      arg2 = CALL_EXPR_ARG (exp, 1);
+      arg0 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+
+      op0 = force_reg (Pmode, op0);
+      op0 = gen_rtx_MEM (mode1, op0);
+
+      if (!insn_data[icode].operand[0].predicate (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      if (!insn_data[icode].operand[1].predicate (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+      if (!insn_data[icode].operand[2].predicate (op2, mode2))
+	op2 = copy_to_mode_reg (mode2, op2);
+      pat = GEN_FCN (icode) (op0, op1, op2);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return 0;
+
+    case IX86_BUILTIN_LDMXCSR:
+      op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+      emit_move_insn (target, op0);
+      emit_insn (gen_sse_ldmxcsr (target));
+      return 0;
+
+    case IX86_BUILTIN_STMXCSR:
+      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+      emit_insn (gen_sse_stmxcsr (target));
+      return copy_to_mode_reg (SImode, target);
+
+    case IX86_BUILTIN_CLFLUSH:
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_normal (arg0);
+	icode = CODE_FOR_sse2_clflush;
+	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+	    op0 = copy_to_mode_reg (Pmode, op0);
+
+	emit_insn (gen_sse2_clflush (op0));
+	return 0;
+
+    case IX86_BUILTIN_MONITOR:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      arg2 = CALL_EXPR_ARG (exp, 2);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      op2 = expand_normal (arg2);
+      if (!REG_P (op0))
+	op0 = copy_to_mode_reg (Pmode, op0);
+      if (!REG_P (op1))
+	op1 = copy_to_mode_reg (SImode, op1);
+      if (!REG_P (op2))
+	op2 = copy_to_mode_reg (SImode, op2);
+      emit_insn (ix86_gen_monitor (op0, op1, op2));
+      return 0;
+
+    case IX86_BUILTIN_MWAIT:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      if (!REG_P (op0))
+	op0 = copy_to_mode_reg (SImode, op0);
+      if (!REG_P (op1))
+	op1 = copy_to_mode_reg (SImode, op1);
+      emit_insn (gen_sse3_mwait (op0, op1));
+      return 0;
+
+    case IX86_BUILTIN_VEC_INIT_V2SI:
+    case IX86_BUILTIN_VEC_INIT_V4HI:
+    case IX86_BUILTIN_VEC_INIT_V8QI:
+      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
+
+    case IX86_BUILTIN_VEC_EXT_V2DF:
+    case IX86_BUILTIN_VEC_EXT_V2DI:
+    case IX86_BUILTIN_VEC_EXT_V4SF:
+    case IX86_BUILTIN_VEC_EXT_V4SI:
+    case IX86_BUILTIN_VEC_EXT_V8HI:
+    case IX86_BUILTIN_VEC_EXT_V2SI:
+    case IX86_BUILTIN_VEC_EXT_V4HI:
+    case IX86_BUILTIN_VEC_EXT_V16QI:
+      return ix86_expand_vec_ext_builtin (exp, target);
+
+    case IX86_BUILTIN_VEC_SET_V2DI:
+    case IX86_BUILTIN_VEC_SET_V4SF:
+    case IX86_BUILTIN_VEC_SET_V4SI:
+    case IX86_BUILTIN_VEC_SET_V8HI:
+    case IX86_BUILTIN_VEC_SET_V4HI:
+    case IX86_BUILTIN_VEC_SET_V16QI:
+      return ix86_expand_vec_set_builtin (exp);
+
+    case IX86_BUILTIN_VEC_PERM_V2DF:
+    case IX86_BUILTIN_VEC_PERM_V4SF:
+    case IX86_BUILTIN_VEC_PERM_V2DI:
+    case IX86_BUILTIN_VEC_PERM_V4SI:
+    case IX86_BUILTIN_VEC_PERM_V8HI:
+    case IX86_BUILTIN_VEC_PERM_V16QI:
+    case IX86_BUILTIN_VEC_PERM_V2DI_U:
+    case IX86_BUILTIN_VEC_PERM_V4SI_U:
+    case IX86_BUILTIN_VEC_PERM_V8HI_U:
+    case IX86_BUILTIN_VEC_PERM_V16QI_U:
+    case IX86_BUILTIN_VEC_PERM_V4DF:
+    case IX86_BUILTIN_VEC_PERM_V8SF:
+      return ix86_expand_vec_perm_builtin (exp);
+
+    case IX86_BUILTIN_INFQ:
+    case IX86_BUILTIN_HUGE_VALQ:
+      {
+	REAL_VALUE_TYPE inf;
+	rtx tmp;
+
+	real_inf (&inf);
+	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
+
+	tmp = validize_mem (force_const_mem (mode, tmp));
+
+	if (target == 0)
+	  target = gen_reg_rtx (mode);
+
+	emit_move_insn (target, tmp);
+	return target;
+      }
+
+    case IX86_BUILTIN_LLWPCB:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      icode = CODE_FOR_lwp_llwpcb;
+      if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+	op0 = copy_to_mode_reg (Pmode, op0);
+      emit_insn (gen_lwp_llwpcb (op0));
+      return 0;
+
+    case IX86_BUILTIN_SLWPCB:
+      icode = CODE_FOR_lwp_slwpcb;
+      if (!target
+	  || !insn_data[icode].operand[0].predicate (target, Pmode))
+	target = gen_reg_rtx (Pmode);
+      emit_insn (gen_lwp_slwpcb (target));
+      return target;
+
+    case IX86_BUILTIN_BEXTRI32:
+    case IX86_BUILTIN_BEXTRI64:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+      icode = (fcode == IX86_BUILTIN_BEXTRI32
+	  ? CODE_FOR_tbm_bextri_si
+	  : CODE_FOR_tbm_bextri_di);
+      if (!CONST_INT_P (op1))
+        {
+          error ("last argument must be an immediate");
+          return const0_rtx;
+        }
+      else
+        {
+          unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
+          unsigned char lsb_index = INTVAL (op1) & 0xFF;
+          op1 = GEN_INT (length);
+          op2 = GEN_INT (lsb_index);
+          pat = GEN_FCN (icode) (target, op0, op1, op2);
+          if (pat)
+            emit_insn (pat);
+          return target;
+        }
+
+    case IX86_BUILTIN_RDRAND16_STEP:
+      icode = CODE_FOR_rdrandhi_1;
+      mode0 = HImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND32_STEP:
+      icode = CODE_FOR_rdrandsi_1;
+      mode0 = SImode;
+      goto rdrand_step;
+
+    case IX86_BUILTIN_RDRAND64_STEP:
+      icode = CODE_FOR_rdranddi_1;
+      mode0 = DImode;
+
+rdrand_step:
+      op0 = gen_reg_rtx (mode0);
+      emit_insn (GEN_FCN (icode) (op0));
+
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op1 = expand_normal (arg0);
+      if (!address_operand (op1, VOIDmode))
+	op1 = copy_addr_to_reg (op1);
+      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+
+      op1 = gen_reg_rtx (SImode);
+      emit_move_insn (op1, CONST1_RTX (SImode));
+
+      /* Emit SImode conditional move.  */
+      if (mode0 == HImode)
+	{
+	  op2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_zero_extendhisi2 (op2, op0));
+	}
+      else if (mode0 == SImode)
+	op2 = op0;
+      else
+	op2 = gen_rtx_SUBREG (SImode, op0, 0);
+
+      if (target == 0)
+	target = gen_reg_rtx (SImode);
+
+      pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			 const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
+      return target;
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_special_args;
+       i < ARRAY_SIZE (bdesc_special_args);
+       i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_special_args_builtin (d, exp, target);
+
+  for (i = 0, d = bdesc_args;
+       i < ARRAY_SIZE (bdesc_args);
+       i++, d++)
+    if (d->code == fcode)
+      switch (fcode)
+	{
+	case IX86_BUILTIN_FABSQ:
+	case IX86_BUILTIN_COPYSIGNQ:
+	  if (!TARGET_SSE2)
+	    /* Emit a normal call if SSE2 isn't available.  */
+	    return expand_call (exp, target, ignore);
+	default:
+	  return ix86_expand_args_builtin (d, exp, target);
+	}
+
+  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_sse_comi (d, exp, target);
+
+  for (i = 0, d = bdesc_pcmpestr;
+       i < ARRAY_SIZE (bdesc_pcmpestr);
+       i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_sse_pcmpestr (d, exp, target);
+
+  for (i = 0, d = bdesc_pcmpistr;
+       i < ARRAY_SIZE (bdesc_pcmpistr);
+       i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_sse_pcmpistr (d, exp, target);
+
+  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+    if (d->code == fcode)
+      return ix86_expand_multi_arg_builtin (d->icode, exp, target,
+					    (enum ix86_builtin_func_type)
+					    d->flag, d->comparison);
+
+  gcc_unreachable ();
+}
+
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+ix86_builtin_vectorized_function (tree fndecl, tree type_out,
+				  tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    case BUILT_IN_SQRT:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_builtins[IX86_BUILTIN_SQRTPD];
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+	}
+      break;
+
+    case BUILT_IN_SQRTF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+	}
+      break;
+
+    case BUILT_IN_LRINT:
+      if (out_mode == SImode && out_n == 4
+	  && in_mode == DFmode && in_n == 2)
+	return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+      break;
+
+    case BUILT_IN_LRINTF:
+      if (out_mode == SImode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
+	}
+      break;
+
+    case BUILT_IN_COPYSIGN:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
+	  else if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
+	}
+      break;
+
+    case BUILT_IN_COPYSIGNF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
+	}
+      break;
+
+    case BUILT_IN_FMA:
+      if (out_mode == DFmode && in_mode == DFmode)
+	{
+	  if (out_n == 2 && in_n == 2)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
+	}
+      break;
+
+    case BUILT_IN_FMAF:
+      if (out_mode == SFmode && in_mode == SFmode)
+	{
+	  if (out_n == 4 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPS];
+	  if (out_n == 8 && in_n == 8)
+	    return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  /* Dispatch to a handler for a vectorization library.  */
+  if (ix86_veclib_handler)
+    return ix86_veclib_handler ((enum built_in_function) fn, type_out,
+				type_in);
+
+  return NULL_TREE;
+}
+
+/* Handler for an SVML-style interface to
+   a library with vectorized intrinsics.  */
+
+static tree
+ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
+{
+  char name[20];
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* The SVML is suitable for unsafe math only.  */
+  if (!flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  switch (fn)
+    {
+    case BUILT_IN_EXP:
+    case BUILT_IN_LOG:
+    case BUILT_IN_LOG10:
+    case BUILT_IN_POW:
+    case BUILT_IN_TANH:
+    case BUILT_IN_TAN:
+    case BUILT_IN_ATAN:
+    case BUILT_IN_ATAN2:
+    case BUILT_IN_ATANH:
+    case BUILT_IN_CBRT:
+    case BUILT_IN_SINH:
+    case BUILT_IN_SIN:
+    case BUILT_IN_ASINH:
+    case BUILT_IN_ASIN:
+    case BUILT_IN_COSH:
+    case BUILT_IN_COS:
+    case BUILT_IN_ACOSH:
+    case BUILT_IN_ACOS:
+      if (el_mode != DFmode || n != 2)
+	return NULL_TREE;
+      break;
+
+    case BUILT_IN_EXPF:
+    case BUILT_IN_LOGF:
+    case BUILT_IN_LOG10F:
+    case BUILT_IN_POWF:
+    case BUILT_IN_TANHF:
+    case BUILT_IN_TANF:
+    case BUILT_IN_ATANF:
+    case BUILT_IN_ATAN2F:
+    case BUILT_IN_ATANHF:
+    case BUILT_IN_CBRTF:
+    case BUILT_IN_SINHF:
+    case BUILT_IN_SINF:
+    case BUILT_IN_ASINHF:
+    case BUILT_IN_ASINF:
+    case BUILT_IN_COSHF:
+    case BUILT_IN_COSF:
+    case BUILT_IN_ACOSHF:
+    case BUILT_IN_ACOSF:
+      if (el_mode != SFmode || n != 4)
+	return NULL_TREE;
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+
+  if (fn == BUILT_IN_LOGF)
+    strcpy (name, "vmlsLn4");
+  else if (fn == BUILT_IN_LOG)
+    strcpy (name, "vmldLn2");
+  else if (n == 4)
+    {
+      sprintf (name, "vmls%s", bname+10);
+      name[strlen (name)-1] = '4';
+    }
+  else
+    sprintf (name, "vmld%s2", bname+10);
+
+  /* Convert to uppercase. */
+  name[4] &= ~0x20;
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+       args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Handler for an ACML-style interface to
+   a library with vectorized intrinsics.  */
+
+static tree
+ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
+{
+  char name[20] = "__vr.._";
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* The ACML is 64bits only and suitable for unsafe math only as
+     it does not correctly support parts of IEEE with the required
+     precision such as denormals.  */
+  if (!TARGET_64BIT
+      || !flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  switch (fn)
+    {
+    case BUILT_IN_SIN:
+    case BUILT_IN_COS:
+    case BUILT_IN_EXP:
+    case BUILT_IN_LOG:
+    case BUILT_IN_LOG2:
+    case BUILT_IN_LOG10:
+      name[4] = 'd';
+      name[5] = '2';
+      if (el_mode != DFmode
+	  || n != 2)
+	return NULL_TREE;
+      break;
+
+    case BUILT_IN_SINF:
+    case BUILT_IN_COSF:
+    case BUILT_IN_EXPF:
+    case BUILT_IN_POWF:
+    case BUILT_IN_LOGF:
+    case BUILT_IN_LOG2F:
+    case BUILT_IN_LOG10F:
+      name[4] = 's';
+      name[5] = '4';
+      if (el_mode != SFmode
+	  || n != 4)
+	return NULL_TREE;
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+  sprintf (name + 7, "%s", bname+10);
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+       args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+
+/* Returns a decl of a function that implements conversion of an integer vector
+   into a floating-point vector, or vice-versa.  DEST_TYPE and SRC_TYPE
+   are the types involved when converting according to CODE.
+   Return NULL_TREE if it is not available.  */
+
+static tree
+ix86_vectorize_builtin_conversion (unsigned int code,
+				   tree dest_type, tree src_type)
+{
+  if (! TARGET_SSE2)
+    return NULL_TREE;
+
+  switch (code)
+    {
+    case FLOAT_EXPR:
+      switch (TYPE_MODE (src_type))
+	{
+	case V4SImode:
+	  switch (TYPE_MODE (dest_type))
+	    {
+	    case V4SFmode:
+	      return (TYPE_UNSIGNED (src_type)
+		      ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
+		      : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
+	    case V4DFmode:
+	      return (TYPE_UNSIGNED (src_type)
+		      ? NULL_TREE
+		      : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
+	    default:
+	      return NULL_TREE;
+	    }
+	  break;
+	case V8SImode:
+	  switch (TYPE_MODE (dest_type))
+	    {
+	    case V8SFmode:
+	      return (TYPE_UNSIGNED (src_type)
+		      ? NULL_TREE
+		      : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
+	    default:
+	      return NULL_TREE;
+	    }
+	  break;
+	default:
+	  return NULL_TREE;
+	}
+
+    case FIX_TRUNC_EXPR:
+      switch (TYPE_MODE (dest_type))
+	{
+	case V4SImode:
+	  switch (TYPE_MODE (src_type))
+	    {
+	    case V4SFmode:
+	      return (TYPE_UNSIGNED (dest_type)
+		      ? NULL_TREE
+		      : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
+	    case V4DFmode:
+	      return (TYPE_UNSIGNED (dest_type)
+		      ? NULL_TREE
+		      : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
+	    default:
+	      return NULL_TREE;
+	    }
+	  break;
+
+	case V8SImode:
+	  switch (TYPE_MODE (src_type))
+	    {
+	    case V8SFmode:
+	      return (TYPE_UNSIGNED (dest_type)
+		      ? NULL_TREE
+		      : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
+	    default:
+	      return NULL_TREE;
+	    }
+	  break;
+
+	default:
+	  return NULL_TREE;
+	}
+
+    default:
+      return NULL_TREE;
+    }
+
+  return NULL_TREE;
+}
+
+/* Returns a code for a target-specific builtin that implements
+   reciprocal of the function, or NULL_TREE if not available.  */
+
+static tree
+ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
+			 bool sqrt ATTRIBUTE_UNUSED)
+{
+  if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
+	 && flag_finite_math_only && !flag_trapping_math
+	 && flag_unsafe_math_optimizations))
+    return NULL_TREE;
+
+  if (md_fn)
+    /* Machine dependent builtins.  */
+    switch (fn)
+      {
+	/* Vectorized version of sqrt to rsqrt conversion.  */
+      case IX86_BUILTIN_SQRTPS_NR:
+	return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
+
+      case IX86_BUILTIN_SQRTPS_NR256:
+	return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
+
+      default:
+	return NULL_TREE;
+      }
+  else
+    /* Normal builtins.  */
+    switch (fn)
+      {
+	/* Sqrt to rsqrt conversion.  */
+      case BUILT_IN_SQRTF:
+	return ix86_builtins[IX86_BUILTIN_RSQRTF];
+
+      default:
+	return NULL_TREE;
+      }
+}
+
+/* Helper for avx_vpermilps256_operand et al.  This is also used by
+   the expansion functions to turn the parallel back into a mask.
+   The return value is 0 for no match and the imm8+1 for a match.  */
+
+int
+avx_vpermilp_parallel (rtx par, enum machine_mode mode)
+{
+  unsigned i, nelt = GET_MODE_NUNITS (mode);
+  unsigned mask = 0;
+  unsigned char ipar[8];
+
+  if (XVECLEN (par, 0) != (int) nelt)
+    return 0;
+
+  /* Validate that all of the elements are constants, and not totally
+     out of range.  Copy the data into an integral array to make the
+     subsequent checks easier.  */
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx er = XVECEXP (par, 0, i);
+      unsigned HOST_WIDE_INT ei;
+
+      if (!CONST_INT_P (er))
+	return 0;
+      ei = INTVAL (er);
+      if (ei >= nelt)
+	return 0;
+      ipar[i] = ei;
+    }
+
+  switch (mode)
+    {
+    case V4DFmode:
+      /* In the 256-bit DFmode case, we can only move elements within
+         a 128-bit lane.  */
+      for (i = 0; i < 2; ++i)
+	{
+	  if (ipar[i] >= 2)
+	    return 0;
+	  mask |= ipar[i] << i;
+	}
+      for (i = 2; i < 4; ++i)
+	{
+	  if (ipar[i] < 2)
+	    return 0;
+	  mask |= (ipar[i] - 2) << i;
+	}
+      break;
+
+    case V8SFmode:
+      /* In the 256-bit SFmode case, we have full freedom of movement
+	 within the low 128-bit lane, but the high 128-bit lane must
+	 mirror the exact same pattern.  */
+      for (i = 0; i < 4; ++i)
+	if (ipar[i] + 4 != ipar[i + 4])
+	  return 0;
+      nelt = 4;
+      /* FALLTHRU */
+
+    case V2DFmode:
+    case V4SFmode:
+      /* In the 128-bit case, we've full freedom in the placement of
+	 the elements from the source operand.  */
+      for (i = 0; i < nelt; ++i)
+	mask |= ipar[i] << (i * (nelt / 2));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Make sure success has a non-zero value by adding one.  */
+  return mask + 1;
+}
+
+/* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
+   the expansion functions to turn the parallel back into a mask.
+   The return value is 0 for no match and the imm8+1 for a match.  */
+
+int
+avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
+{
+  unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
+  unsigned mask = 0;
+  unsigned char ipar[8];
+
+  if (XVECLEN (par, 0) != (int) nelt)
+    return 0;
+
+  /* Validate that all of the elements are constants, and not totally
+     out of range.  Copy the data into an integral array to make the
+     subsequent checks easier.  */
+  for (i = 0; i < nelt; ++i)
+    {
+      rtx er = XVECEXP (par, 0, i);
+      unsigned HOST_WIDE_INT ei;
+
+      if (!CONST_INT_P (er))
+	return 0;
+      ei = INTVAL (er);
+      if (ei >= 2 * nelt)
+	return 0;
+      ipar[i] = ei;
+    }
+
+  /* Validate that the halves of the permute are halves.  */
+  for (i = 0; i < nelt2 - 1; ++i)
+    if (ipar[i] + 1 != ipar[i + 1])
+      return 0;
+  for (i = nelt2; i < nelt - 1; ++i)
+    if (ipar[i] + 1 != ipar[i + 1])
+      return 0;
+
+  /* Reconstruct the mask.  */
+  for (i = 0; i < 2; ++i)
+    {
+      unsigned e = ipar[i * nelt2];
+      if (e % nelt2)
+	return 0;
+      e /= nelt2;
+      mask |= e << (i * 4);
+    }
+
+  /* Make sure success has a non-zero value by adding one.  */
+  return mask + 1;
+}
+
+
+/* Store OPERAND to the memory after reload is completed.  This means
+   that we can't easily use assign_stack_local.  */
+rtx
+ix86_force_to_memory (enum machine_mode mode, rtx operand)
+{
+  rtx result;
+
+  gcc_assert (reload_completed);
+  if (ix86_using_red_zone ())
+    {
+      result = gen_rtx_MEM (mode,
+			    gen_rtx_PLUS (Pmode,
+					  stack_pointer_rtx,
+					  GEN_INT (-RED_ZONE_SIZE)));
+      emit_move_insn (result, operand);
+    }
+  else if (TARGET_64BIT)
+    {
+      switch (mode)
+	{
+	case HImode:
+	case SImode:
+	  operand = gen_lowpart (DImode, operand);
+	  /* FALLTHRU */
+	case DImode:
+	  emit_insn (
+		      gen_rtx_SET (VOIDmode,
+				   gen_rtx_MEM (DImode,
+						gen_rtx_PRE_DEC (DImode,
+							stack_pointer_rtx)),
+				   operand));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      result = gen_rtx_MEM (mode, stack_pointer_rtx);
+    }
+  else
+    {
+      switch (mode)
+	{
+	case DImode:
+	  {
+	    rtx operands[2];
+	    split_double_mode (mode, &operand, 1, operands, operands + 1);
+	    emit_insn (
+			gen_rtx_SET (VOIDmode,
+				     gen_rtx_MEM (SImode,
+						  gen_rtx_PRE_DEC (Pmode,
+							stack_pointer_rtx)),
+				     operands[1]));
+	    emit_insn (
+			gen_rtx_SET (VOIDmode,
+				     gen_rtx_MEM (SImode,
+						  gen_rtx_PRE_DEC (Pmode,
+							stack_pointer_rtx)),
+				     operands[0]));
+	  }
+	  break;
+	case HImode:
+	  /* Store HImodes as SImodes.  */
+	  operand = gen_lowpart (SImode, operand);
+	  /* FALLTHRU */
+	case SImode:
+	  emit_insn (
+		      gen_rtx_SET (VOIDmode,
+				   gen_rtx_MEM (GET_MODE (operand),
+						gen_rtx_PRE_DEC (SImode,
+							stack_pointer_rtx)),
+				   operand));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      result = gen_rtx_MEM (mode, stack_pointer_rtx);
+    }
+  return result;
+}
+
+/* Free operand from the memory.  */
+void
+ix86_free_from_memory (enum machine_mode mode)
+{
+  if (!ix86_using_red_zone ())
+    {
+      int size;
+
+      if (mode == DImode || TARGET_64BIT)
+	size = 8;
+      else
+	size = 4;
+      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
+         to pop or add instruction if registers are available.  */
+      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    GEN_INT (size))));
+    }
+}
+
+/* Implement TARGET_IRA_COVER_CLASSES.  If -mfpmath=sse, we prefer
+   SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
+   same.  */
+static const reg_class_t *
+i386_ira_cover_classes (void)
+{
+  static const reg_class_t sse_fpmath_classes[] = {
+    GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
+  };
+  static const reg_class_t no_sse_fpmath_classes[] = {
+    GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
+  };
+
+ return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.
+
+   Put float CONST_DOUBLE in the constant pool instead of fp regs.
+   QImode must go into class Q_REGS.
+   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
+   movdf to do mem-to-mem moves through integer regs.  */
+
+static reg_class_t
+ix86_preferred_reload_class (rtx x, reg_class_t regclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  /* We're only allowed to return a subclass of CLASS.  Many of the
+     following checks fail for NO_REGS, so eliminate that early.  */
+  if (regclass == NO_REGS)
+    return NO_REGS;
+
+  /* All classes can load zeros.  */
+  if (x == CONST0_RTX (mode))
+    return regclass;
+
+  /* Force constants into memory if we are loading a (nonzero) constant into
+     an MMX or SSE register.  This is because there are no MMX/SSE instructions
+     to load from a constant.  */
+  if (CONSTANT_P (x)
+      && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
+    return NO_REGS;
+
+  /* Prefer SSE regs only, if we can use them for math.  */
+  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
+    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+
+  /* Floating-point constants need more complex checks.  */
+  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
+    {
+      /* General regs can load everything.  */
+      if (reg_class_subset_p (regclass, GENERAL_REGS))
+        return regclass;
+
+      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
+	 zero above.  We only want to wind up preferring 80387 registers if
+	 we plan on doing computation with them.  */
+      if (TARGET_80387
+	  && standard_80387_constant_p (x) > 0)
+	{
+	  /* Limit class to non-sse.  */
+	  if (regclass == FLOAT_SSE_REGS)
+	    return FLOAT_REGS;
+	  if (regclass == FP_TOP_SSE_REGS)
+	    return FP_TOP_REG;
+	  if (regclass == FP_SECOND_SSE_REGS)
+	    return FP_SECOND_REG;
+	  if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
+	    return regclass;
+	}
+
+      return NO_REGS;
+    }
+
+  /* Generally when we see PLUS here, it's the function invariant
+     (plus soft-fp const_int).  Which can only be computed into general
+     regs.  */
+  if (GET_CODE (x) == PLUS)
+    return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
+
+  /* QImode constants are easy to load, but non-constant QImode data
+     must go into Q_REGS.  */
+  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
+    {
+      if (reg_class_subset_p (regclass, Q_REGS))
+	return regclass;
+      if (reg_class_subset_p (Q_REGS, regclass))
+	return Q_REGS;
+      return NO_REGS;
+    }
+
+  return regclass;
+}
+
+/* Discourage putting floating-point values in SSE registers unless
+   SSE math is being used, and likewise for the 387 registers.  */
+static reg_class_t
+ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  /* Restrict the output reload class to the register bank that we are doing
+     math on.  If we would like not to return a subset of CLASS, reject this
+     alternative: if reload cannot do this, it will still use its choice.  */
+  mode = GET_MODE (x);
+  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+    return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
+
+  if (X87_FLOAT_MODE_P (mode))
+    {
+      if (regclass == FP_TOP_SSE_REGS)
+	return FP_TOP_REG;
+      else if (regclass == FP_SECOND_SSE_REGS)
+	return FP_SECOND_REG;
+      else
+	return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
+    }
+
+  return regclass;
+}
+
+static reg_class_t
+ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+		       enum machine_mode mode,
+		       secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  /* QImode spills from non-QI registers require
+     intermediate register on 32bit targets.  */
+  if (!TARGET_64BIT
+      && !in_p && mode == QImode
+      && (rclass == GENERAL_REGS
+	  || rclass == LEGACY_REGS
+	  || rclass == INDEX_REGS))
+    {
+      int regno;
+
+      if (REG_P (x))
+	regno = REGNO (x);
+      else
+	regno = -1;
+
+      if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
+	regno = true_regnum (x);
+
+      /* Return Q_REGS if the operand is in memory.  */
+      if (regno == -1)
+	return Q_REGS;
+    }
+
+  /* This condition handles corner case where an expression involving
+     pointers gets vectorized.  We're trying to use the address of a
+     stack slot as a vector initializer.  
+
+     (set (reg:V2DI 74 [ vect_cst_.2 ])
+          (vec_duplicate:V2DI (reg/f:DI 20 frame)))
+
+     Eventually frame gets turned into sp+offset like this:
+
+     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
+	                               (const_int 392 [0x188]))))
+
+     That later gets turned into:
+
+     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
+	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
+
+     We'll have the following reload recorded:
+
+     Reload 0: reload_in (DI) =
+           (plus:DI (reg/f:DI 7 sp)
+            (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
+     reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+     SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
+     reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
+     reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
+     reload_reg_rtx: (reg:V2DI 22 xmm1)
+
+     Which isn't going to work since SSE instructions can't handle scalar
+     additions.  Returning GENERAL_REGS forces the addition into integer
+     register and reload can handle subsequent reloads without problems.  */
+
+  if (in_p && GET_CODE (x) == PLUS
+      && SSE_CLASS_P (rclass)
+      && SCALAR_INT_MODE_P (mode))
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+ix86_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+      case AREG:
+      case DREG:
+      case CREG:
+      case BREG:
+      case AD_REGS:
+      case SIREG:
+      case DIREG:
+      case SSE_FIRST_REG:
+      case FP_TOP_REG:
+      case FP_SECOND_REG:
+	return true;
+
+      default:
+	break;
+    }
+
+  return false;
+}
+
+/* If we are copying between general and FP registers, we need a memory
+   location. The same is true for SSE and MMX registers.
+
+   To optimize register_move_cost performance, allow inline variant.
+
+   The macro can't work reliably when one of the CLASSES is class containing
+   registers from multiple units (SSE, MMX, integer).  We avoid this by never
+   combining those units in single alternative in the machine description.
+   Ensure that this constraint holds to avoid unexpected surprises.
+
+   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
+   enforce these sanity checks.  */
+
+static inline bool
+inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+				enum machine_mode mode, int strict)
+{
+  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
+      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
+      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
+      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
+      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
+      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
+    {
+      gcc_assert (!strict);
+      return true;
+    }
+
+  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
+    return true;
+
+  /* ??? This is a lie.  We do have moves between mmx/general, and for
+     mmx/sse2.  But by saying we need secondary memory we discourage the
+     register allocator from using the mmx registers unless needed.  */
+  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
+    return true;
+
+  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+    {
+      /* SSE1 doesn't have any direct moves from other classes.  */
+      if (!TARGET_SSE2)
+	return true;
+
+      /* If the target says that inter-unit moves are more expensive
+	 than moving through memory, then don't generate them.  */
+      if (!TARGET_INTER_UNIT_MOVES)
+	return true;
+
+      /* Between SSE and general, we have moves no larger than word size.  */
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return true;
+    }
+
+  return false;
+}
+
+bool
+ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+			      enum machine_mode mode, int strict)
+{
+  return inline_secondary_memory_needed (class1, class2, mode, strict);
+}
+
+/* Return true if the registers in CLASS cannot represent the change from
+   modes FROM to TO.  */
+
+bool
+ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			       enum reg_class regclass)
+{
+  if (from == to)
+    return false;
+
+  /* x87 registers can't do subreg at all, as all values are reformatted
+     to extended precision.  */
+  if (MAYBE_FLOAT_CLASS_P (regclass))
+    return true;
+
+  if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
+    {
+      /* Vector registers do not support QI or HImode loads.  If we don't
+	 disallow a change to these modes, reload will assume it's ok to
+	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
+	 the vec_dupv4hi pattern.  */
+      if (GET_MODE_SIZE (from) < 4)
+	return true;
+
+      /* Vector registers do not support subreg with nonzero offsets, which
+	 are otherwise valid for integer registers.  Since we can't see
+	 whether we have a nonzero offset from here, prohibit all
+         nonparadoxical subregs changing size.  */
+      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return the cost of moving data of mode M between a
+   register and memory.  A value of 2 is the default; this cost is
+   relative to those in `REGISTER_MOVE_COST'.
+
+   This function is used extensively by register_move_cost that is used to
+   build tables at startup.  Make it inline in this case.
+   When IN is 2, return maximum of in and out move cost.
+
+   If moving between registers and memory is more expensive than
+   between two registers, you should define this macro to express the
+   relative cost.
+
+   Model also increased moving costs of QImode registers in non
+   Q_REGS classes.
+ */
+static inline int
+inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
+			 int in)
+{
+  int cost;
+  if (FLOAT_CLASS_P (regclass))
+    {
+      int index;
+      switch (mode)
+	{
+	  case SFmode:
+	    index = 0;
+	    break;
+	  case DFmode:
+	    index = 1;
+	    break;
+	  case XFmode:
+	    index = 2;
+	    break;
+	  default:
+	    return 100;
+	}
+      if (in == 2)
+        return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
+      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+    }
+  if (SSE_CLASS_P (regclass))
+    {
+      int index;
+      switch (GET_MODE_SIZE (mode))
+	{
+	  case 4:
+	    index = 0;
+	    break;
+	  case 8:
+	    index = 1;
+	    break;
+	  case 16:
+	    index = 2;
+	    break;
+	  default:
+	    return 100;
+	}
+      if (in == 2)
+        return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
+      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+    }
+  if (MMX_CLASS_P (regclass))
+    {
+      int index;
+      switch (GET_MODE_SIZE (mode))
+	{
+	  case 4:
+	    index = 0;
+	    break;
+	  case 8:
+	    index = 1;
+	    break;
+	  default:
+	    return 100;
+	}
+      if (in)
+        return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
+      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+    }
+  switch (GET_MODE_SIZE (mode))
+    {
+      case 1:
+	if (Q_CLASS_P (regclass) || TARGET_64BIT)
+	  {
+	    if (!in)
+	      return ix86_cost->int_store[0];
+	    if (TARGET_PARTIAL_REG_DEPENDENCY
+	        && optimize_function_for_speed_p (cfun))
+	      cost = ix86_cost->movzbl_load;
+	    else
+	      cost = ix86_cost->int_load[0];
+	    if (in == 2)
+	      return MAX (cost, ix86_cost->int_store[0]);
+	    return cost;
+	  }
+	else
+	  {
+	   if (in == 2)
+	     return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
+	   if (in)
+	     return ix86_cost->movzbl_load;
+	   else
+	     return ix86_cost->int_store[0] + 4;
+	  }
+	break;
+      case 2:
+	if (in == 2)
+	  return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
+	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+      default:
+	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
+	if (mode == TFmode)
+	  mode = XFmode;
+	if (in == 2)
+	  cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
+	else if (in)
+	  cost = ix86_cost->int_load[2];
+	else
+	  cost = ix86_cost->int_store[2];
+	return (cost * (((int) GET_MODE_SIZE (mode)
+		        + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+    }
+}
+
+static int
+ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
+		       bool in)
+{
+  return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
+}
+
+
+/* Return the cost of moving data from a register in class CLASS1 to
+   one in class CLASS2.
+
+   It is not required that the cost always equal 2 when FROM is the same as TO;
+   on some machines it is expensive to move between registers if they are not
+   general registers.  */
+
+static int
+ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
+			 reg_class_t class2_i)
+{
+  enum reg_class class1 = (enum reg_class) class1_i;
+  enum reg_class class2 = (enum reg_class) class2_i;
+
+  /* In case we require secondary memory, compute cost of the store followed
+     by load.  In order to avoid bad register allocation choices, we need
+     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
+
+  if (inline_secondary_memory_needed (class1, class2, mode, 0))
+    {
+      int cost = 1;
+
+      cost += inline_memory_move_cost (mode, class1, 2);
+      cost += inline_memory_move_cost (mode, class2, 2);
+
+      /* In case of copying from general_purpose_register we may emit multiple
+         stores followed by single load causing memory size mismatch stall.
+         Count this as arbitrarily high cost of 20.  */
+      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+	cost += 20;
+
+      /* In the case of FP/MMX moves, the registers actually overlap, and we
+	 have to switch modes in order to treat them differently.  */
+      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
+          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
+	cost += 20;
+
+      return cost;
+    }
+
+  /* Moves between SSE/MMX and integer unit are expensive.  */
+  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
+      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+
+    /* ??? By keeping returned value relatively high, we limit the number
+       of moves between integer and MMX/SSE registers for all targets.
+       Additionally, high value prevents problem with x86_modes_tieable_p(),
+       where integer modes in MMX/SSE registers are not tieable
+       because of missing QImode and HImode moves to, from or between
+       MMX/SSE registers.  */
+    return MAX (8, ix86_cost->mmxsse_to_integer);
+
+  if (MAYBE_FLOAT_CLASS_P (class1))
+    return ix86_cost->fp_move;
+  if (MAYBE_SSE_CLASS_P (class1))
+    return ix86_cost->sse_move;
+  if (MAYBE_MMX_CLASS_P (class1))
+    return ix86_cost->mmx_move;
+  return 2;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+
+bool
+ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  /* Flags and only flags can only hold CCmode values.  */
+  if (CC_REGNO_P (regno))
+    return GET_MODE_CLASS (mode) == MODE_CC;
+  if (GET_MODE_CLASS (mode) == MODE_CC
+      || GET_MODE_CLASS (mode) == MODE_RANDOM
+      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+    return 0;
+  if (FP_REGNO_P (regno))
+    return VALID_FP_MODE_P (mode);
+  if (SSE_REGNO_P (regno))
+    {
+      /* We implement the move patterns for all vector modes into and
+	 out of SSE registers, even when no operation instructions
+	 are available.  OImode move is available only when AVX is
+	 enabled.  */
+      return ((TARGET_AVX && mode == OImode)
+	      || VALID_AVX256_REG_MODE (mode)
+	      || VALID_SSE_REG_MODE (mode)
+	      || VALID_SSE2_REG_MODE (mode)
+	      || VALID_MMX_REG_MODE (mode)
+	      || VALID_MMX_REG_MODE_3DNOW (mode));
+    }
+  if (MMX_REGNO_P (regno))
+    {
+      /* We implement the move patterns for 3DNOW modes even in MMX mode,
+	 so if the register is available at all, then we can move data of
+	 the given mode into or out of it.  */
+      return (VALID_MMX_REG_MODE (mode)
+	      || VALID_MMX_REG_MODE_3DNOW (mode));
+    }
+
+  if (mode == QImode)
+    {
+      /* Take care for QImode values - they can be in non-QI regs,
+	 but then they do cause partial register stalls.  */
+      if (regno <= BX_REG || TARGET_64BIT)
+	return 1;
+      if (!TARGET_PARTIAL_REG_STALL)
+	return 1;
+      return reload_in_progress || reload_completed;
+    }
+  /* We handle both integer and floats in the general purpose registers.  */
+  else if (VALID_INT_MODE_P (mode))
+    return 1;
+  else if (VALID_FP_MODE_P (mode))
+    return 1;
+  else if (VALID_DFP_MODE_P (mode))
+    return 1;
+  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
+     on to use that value in smaller contexts, this can easily force a
+     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
+     supporting DImode, allow it.  */
+  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
+    return 1;
+
+  return 0;
+}
+
+/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
+   tieable integer mode.  */
+
+static bool
+ix86_tieable_integer_mode_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case HImode:
+    case SImode:
+      return true;
+
+    case QImode:
+      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
+
+    case DImode:
+      return TARGET_64BIT;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if MODE1 is accessible in a register that can hold MODE2
+   without copying.  That is, all register classes that can hold MODE2
+   can also hold MODE1.  */
+
+bool
+ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (mode1 == mode2)
+    return true;
+
+  if (ix86_tieable_integer_mode_p (mode1)
+      && ix86_tieable_integer_mode_p (mode2))
+    return true;
+
+  /* MODE2 being XFmode implies fp stack or general regs, which means we
+     can tie any smaller floating point modes to it.  Note that we do not
+     tie this with TFmode.  */
+  if (mode2 == XFmode)
+    return mode1 == SFmode || mode1 == DFmode;
+
+  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
+     that we can tie it with SFmode.  */
+  if (mode2 == DFmode)
+    return mode1 == SFmode;
+
+  /* If MODE2 is only appropriate for an SSE register, then tie with
+     any other mode acceptable to SSE registers.  */
+  if (GET_MODE_SIZE (mode2) == 16
+      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+    return (GET_MODE_SIZE (mode1) == 16
+	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+
+  /* If MODE2 is appropriate for an MMX register, then tie
+     with any other mode acceptable to MMX registers.  */
+  if (GET_MODE_SIZE (mode2) == 8
+      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
+    return (GET_MODE_SIZE (mode1) == 8
+	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
+
+  return false;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
+{
+  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
+  enum machine_mode mode = GET_MODE (x);
+  const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
+	*total = 3;
+      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
+	*total = 2;
+      else if (flag_pic && SYMBOLIC_CONST (x)
+	       && (!TARGET_64BIT
+		   || (!GET_CODE (x) != LABEL_REF
+		       && (GET_CODE (x) != SYMBOL_REF
+		           || !SYMBOL_REF_LOCAL_P (x)))))
+	*total = 1;
+      else
+	*total = 0;
+      return true;
+
+    case CONST_DOUBLE:
+      if (mode == VOIDmode)
+	*total = 0;
+      else
+	switch (standard_80387_constant_p (x))
+	  {
+	  case 1: /* 0.0 */
+	    *total = 1;
+	    break;
+	  default: /* Other constants */
+	    *total = 2;
+	    break;
+	  case 0:
+	  case -1:
+	    /* Start with (MEM (SYMBOL_REF)), since that's where
+	       it'll probably end up.  Add a penalty for size.  */
+	    *total = (COSTS_N_INSNS (1)
+		      + (flag_pic != 0 && !TARGET_64BIT)
+		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
+	    break;
+	  }
+      return true;
+
+    case ZERO_EXTEND:
+      /* The zero extensions is often completely free on x86_64, so make
+	 it as cheap as possible.  */
+      if (TARGET_64BIT && mode == DImode
+	  && GET_MODE (XEXP (x, 0)) == SImode)
+	*total = 1;
+      else if (TARGET_ZERO_EXTEND_WITH_AND)
+	*total = cost->add;
+      else
+	*total = cost->movzx;
+      return false;
+
+    case SIGN_EXTEND:
+      *total = cost->movsx;
+      return false;
+
+    case ASHIFT:
+      if (CONST_INT_P (XEXP (x, 1))
+	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
+	{
+	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+	  if (value == 1)
+	    {
+	      *total = cost->add;
+	      return false;
+	    }
+	  if ((value == 2 || value == 3)
+	      && cost->lea <= cost->shift_const)
+	    {
+	      *total = cost->lea;
+	      return false;
+	    }
+	}
+      /* FALLTHRU */
+
+    case ROTATE:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
+	{
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      if (INTVAL (XEXP (x, 1)) > 32)
+		*total = cost->shift_const + COSTS_N_INSNS (2);
+	      else
+		*total = cost->shift_const * 2;
+	    }
+	  else
+	    {
+	      if (GET_CODE (XEXP (x, 1)) == AND)
+		*total = cost->shift_var * 2;
+	      else
+		*total = cost->shift_var * 6 + COSTS_N_INSNS (2);
+	    }
+	}
+      else
+	{
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    *total = cost->shift_const;
+	  else if (GET_CODE (XEXP (x, 1)) == SUBREG
+		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
+	    {
+	      /* Return the cost after shift-and truncation.  */
+	      *total = cost->shift_var;
+	      return true;
+	    }
+	  else
+	    *total = cost->shift_var;
+	}
+      return false;
+
+    case FMA:
+      {
+	rtx sub;
+
+        gcc_assert (FLOAT_MODE_P (mode));
+        gcc_assert (TARGET_FMA || TARGET_FMA4);
+
+        /* ??? SSE scalar/vector cost should be used here.  */
+        /* ??? Bald assumption that fma has the same cost as fmul.  */
+        *total = cost->fmul;
+	*total += rtx_cost (XEXP (x, 1), FMA, speed);
+
+        /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
+	sub = XEXP (x, 0);
+	if (GET_CODE (sub) == NEG)
+	  sub = XEXP (sub, 0);
+	*total += rtx_cost (sub, FMA, speed);
+
+	sub = XEXP (x, 2);
+	if (GET_CODE (sub) == NEG)
+	  sub = XEXP (sub, 0);
+	*total += rtx_cost (sub, FMA, speed);
+	return true;
+      }
+
+    case MULT:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	{
+	  /* ??? SSE scalar cost should be used here.  */
+	  *total = cost->fmul;
+	  return false;
+	}
+      else if (X87_FLOAT_MODE_P (mode))
+	{
+	  *total = cost->fmul;
+	  return false;
+	}
+      else if (FLOAT_MODE_P (mode))
+	{
+	  /* ??? SSE vector cost should be used here.  */
+	  *total = cost->fmul;
+	  return false;
+	}
+      else
+	{
+	  rtx op0 = XEXP (x, 0);
+	  rtx op1 = XEXP (x, 1);
+	  int nbits;
+	  if (CONST_INT_P (XEXP (x, 1)))
+	    {
+	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+	      for (nbits = 0; value != 0; value &= value - 1)
+	        nbits++;
+	    }
+	  else
+	    /* This is arbitrary.  */
+	    nbits = 7;
+
+	  /* Compute costs correctly for widening multiplication.  */
+	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
+	         == GET_MODE_SIZE (mode))
+	    {
+	      int is_mulwiden = 0;
+	      enum machine_mode inner_mode = GET_MODE (op0);
+
+	      if (GET_CODE (op0) == GET_CODE (op1))
+		is_mulwiden = 1, op1 = XEXP (op1, 0);
+	      else if (CONST_INT_P (op1))
+		{
+		  if (GET_CODE (op0) == SIGN_EXTEND)
+		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
+			          == INTVAL (op1);
+		  else
+		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
+	        }
+
+	      if (is_mulwiden)
+	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
+	    }
+
+  	  *total = (cost->mult_init[MODE_INDEX (mode)]
+		    + nbits * cost->mult_bit
+	            + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
+
+          return true;
+	}
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	/* ??? SSE cost should be used here.  */
+	*total = cost->fdiv;
+      else if (X87_FLOAT_MODE_P (mode))
+	*total = cost->fdiv;
+      else if (FLOAT_MODE_P (mode))
+	/* ??? SSE vector cost should be used here.  */
+	*total = cost->fdiv;
+      else
+	*total = cost->divide[MODE_INDEX (mode)];
+      return false;
+
+    case PLUS:
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+	      && CONSTANT_P (XEXP (x, 1)))
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
+	      if (val == 2 || val == 4 || val == 8)
+		{
+		  *total = cost->lea;
+		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+				      outer_code, speed);
+		  *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+		  return true;
+		}
+	    }
+	  else if (GET_CODE (XEXP (x, 0)) == MULT
+		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+	      if (val == 2 || val == 4 || val == 8)
+		{
+		  *total = cost->lea;
+		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+		  *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+		  return true;
+		}
+	    }
+	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
+	    {
+	      *total = cost->lea;
+	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+	      *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+	      return true;
+	    }
+	}
+      /* FALLTHRU */
+
+    case MINUS:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	{
+	  /* ??? SSE cost should be used here.  */
+	  *total = cost->fadd;
+	  return false;
+	}
+      else if (X87_FLOAT_MODE_P (mode))
+	{
+	  *total = cost->fadd;
+	  return false;
+	}
+      else if (FLOAT_MODE_P (mode))
+	{
+	  /* ??? SSE vector cost should be used here.  */
+	  *total = cost->fadd;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (!TARGET_64BIT && mode == DImode)
+	{
+	  *total = (cost->add * 2
+		    + (rtx_cost (XEXP (x, 0), outer_code, speed)
+		       << (GET_MODE (XEXP (x, 0)) != DImode))
+		    + (rtx_cost (XEXP (x, 1), outer_code, speed)
+	               << (GET_MODE (XEXP (x, 1)) != DImode)));
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case NEG:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	{
+	  /* ??? SSE cost should be used here.  */
+	  *total = cost->fchs;
+	  return false;
+	}
+      else if (X87_FLOAT_MODE_P (mode))
+	{
+	  *total = cost->fchs;
+	  return false;
+	}
+      else if (FLOAT_MODE_P (mode))
+	{
+	  /* ??? SSE vector cost should be used here.  */
+	  *total = cost->fchs;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case NOT:
+      if (!TARGET_64BIT && mode == DImode)
+	*total = cost->add * 2;
+      else
+	*total = cost->add;
+      return false;
+
+    case COMPARE:
+      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+	  && XEXP (XEXP (x, 0), 1) == const1_rtx
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
+	  && XEXP (x, 1) == const0_rtx)
+	{
+	  /* This kind of construct is implemented using test[bwl].
+	     Treat it as if we had an AND.  */
+	  *total = (cost->add
+		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+		    + rtx_cost (const1_rtx, outer_code, speed));
+	  return true;
+	}
+      return false;
+
+    case FLOAT_EXTEND:
+      if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
+	*total = 0;
+      return false;
+
+    case ABS:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	/* ??? SSE cost should be used here.  */
+	*total = cost->fabs;
+      else if (X87_FLOAT_MODE_P (mode))
+	*total = cost->fabs;
+      else if (FLOAT_MODE_P (mode))
+	/* ??? SSE vector cost should be used here.  */
+	*total = cost->fabs;
+      return false;
+
+    case SQRT:
+      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+	/* ??? SSE cost should be used here.  */
+	*total = cost->fsqrt;
+      else if (X87_FLOAT_MODE_P (mode))
+	*total = cost->fsqrt;
+      else if (FLOAT_MODE_P (mode))
+	/* ??? SSE vector cost should be used here.  */
+	*total = cost->fsqrt;
+      return false;
+
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_TP)
+	*total = 0;
+      return false;
+
+    case VEC_SELECT:
+    case VEC_CONCAT:
+    case VEC_MERGE:
+    case VEC_DUPLICATE:
+      /* ??? Assume all of these vector manipulation patterns are
+	 recognizable.  In which case they all pretty much have the
+	 same cost.  */
+     *total = COSTS_N_INSNS (1);
+     return true;
+
+    default:
+      return false;
+    }
+}
+
+#if TARGET_MACHO
+
+static int current_machopic_label_num;
+
+/* Given a symbol name and its associated stub, write out the
+   definition of the stub.  */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+  unsigned int length;
+  char *binder_name, *symbol_name, lazy_ptr_name[32];
+  int label = ++current_machopic_label_num;
+
+  /* For 64-bit we shouldn't get here.  */
+  gcc_assert (!TARGET_64BIT);
+
+  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
+  symb = targetm.strip_name_encoding (symb);
+
+  length = strlen (stub);
+  binder_name = XALLOCAVEC (char, length + 32);
+  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
+
+  length = strlen (symb);
+  symbol_name = XALLOCAVEC (char, length + 32);
+  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+  sprintf (lazy_ptr_name, "L%d$lz", label);
+
+  if (MACHOPIC_ATT_STUB)
+    switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
+  else if (MACHOPIC_PURE)
+    {
+      if (TARGET_DEEP_BRANCH_PREDICTION)
+	switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
+      else
+    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
+    }
+  else
+    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
+
+  fprintf (file, "%s:\n", stub);
+  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+  if (MACHOPIC_ATT_STUB)
+    {
+      fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
+    }
+  else if (MACHOPIC_PURE)
+    {
+      /* PIC stub.  */
+      if (TARGET_DEEP_BRANCH_PREDICTION)
+	{
+	  /* 25-byte PIC stub using "CALL get_pc_thunk".  */
+	  rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+	  output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
+	  fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
+	}
+      else
+	{
+	  /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax".  */
+	  fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
+	  fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
+	}
+      fprintf (file, "\tjmp\t*%%ecx\n");
+    }
+  else
+    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
+
+  /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
+     it needs no stub-binding-helper.  */
+  if (MACHOPIC_ATT_STUB)
+    return;
+
+  fprintf (file, "%s:\n", binder_name);
+
+  if (MACHOPIC_PURE)
+    {
+      fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
+      fprintf (file, "\tpushl\t%%ecx\n");
+    }
+  else
+    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
+
+  fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
+
+  /* N.B. Keep the correspondence of these
+     'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
+     old-pic/new-pic/non-pic stubs; altering this will break
+     compatibility with existing dylibs.  */
+  if (MACHOPIC_PURE)
+    {
+      /* PIC stubs.  */
+      if (TARGET_DEEP_BRANCH_PREDICTION)
+	/* 25-byte PIC stub using "CALL get_pc_thunk".  */
+	switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
+      else
+	/* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx".  */
+  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+    }
+  else
+    /* 16-byte -mdynamic-no-pic stub.  */
+    switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
+
+  fprintf (file, "%s:\n", lazy_ptr_name);
+  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+  fprintf (file, ASM_LONG "%s\n", binder_name);
+}
+#endif /* TARGET_MACHO */
+
+/* Order the registers for register allocator.  */
+
+void
+x86_order_regs_for_local_alloc (void)
+{
+   int pos = 0;
+   int i;
+
+   /* First allocate the local general purpose registers.  */
+   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+     if (GENERAL_REGNO_P (i) && call_used_regs[i])
+	reg_alloc_order [pos++] = i;
+
+   /* Global general purpose registers.  */
+   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
+	reg_alloc_order [pos++] = i;
+
+   /* x87 registers come first in case we are doing FP math
+      using them.  */
+   if (!TARGET_SSE_MATH)
+     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+       reg_alloc_order [pos++] = i;
+
+   /* SSE registers.  */
+   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+     reg_alloc_order [pos++] = i;
+   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+     reg_alloc_order [pos++] = i;
+
+   /* x87 registers.  */
+   if (TARGET_SSE_MATH)
+     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+       reg_alloc_order [pos++] = i;
+
+   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
+     reg_alloc_order [pos++] = i;
+
+   /* Initialize the rest of array as we do not allocate some registers
+      at all.  */
+   while (pos < FIRST_PSEUDO_REGISTER)
+     reg_alloc_order [pos++] = 0;
+}
+
+/* Handle a "callee_pop_aggregate_return" attribute; arguments as
+   in struct attribute_spec handler.  */
+static tree
+ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
+					      tree args,
+					      int flags ATTRIBUTE_UNUSED,
+					      bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  if (TARGET_64BIT)
+    {
+      warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  if (is_attribute_p ("callee_pop_aggregate_return", name))
+    {
+      tree cst;
+
+      cst = TREE_VALUE (args);
+      if (TREE_CODE (cst) != INTEGER_CST)
+	{
+	  warning (OPT_Wattributes,
+		   "%qE attribute requires an integer constant argument",
+		   name);
+	  *no_add_attrs = true;
+	}
+      else if (compare_tree_int (cst, 0) != 0
+	       && compare_tree_int (cst, 1) != 0)
+	{
+	  warning (OPT_Wattributes,
+		   "argument to %qE attribute is neither zero, nor one",
+		   name);
+	  *no_add_attrs = true;
+	}
+
+      return NULL_TREE;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "ms_abi" or "sysv" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+ix86_handle_abi_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  if (!TARGET_64BIT)
+    {
+      warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  /* Can combine regparm with all attributes but fastcall.  */
+  if (is_attribute_p ("ms_abi", name))
+    {
+      if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("ms_abi and sysv_abi attributes are not compatible");
+	}
+
+      return NULL_TREE;
+    }
+  else if (is_attribute_p ("sysv_abi", name))
+    {
+      if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
+        {
+	  error ("ms_abi and sysv_abi attributes are not compatible");
+	}
+
+      return NULL_TREE;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+ix86_handle_struct_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  tree *type = NULL;
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) == TYPE_DECL)
+	type = &TREE_TYPE (*node);
+    }
+  else
+    type = node;
+
+  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+		 || TREE_CODE (*type) == UNION_TYPE)))
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  else if ((is_attribute_p ("ms_struct", name)
+	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+	   || ((is_attribute_p ("gcc_struct", name)
+		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+    {
+      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
+               name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+ix86_handle_fndecl_attribute (tree *node, tree name,
+                              tree args ATTRIBUTE_UNUSED,
+                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+               name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+static bool
+ix86_ms_bitfield_layout_p (const_tree record_type)
+{
+  return ((TARGET_MS_BITFIELD_LAYOUT
+	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+          || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
+}
+
+/* Returns an expression indicating where the this parameter is
+   located on entry to the FUNCTION.  */
+
+static rtx
+x86_this_parameter (tree function)
+{
+  tree type = TREE_TYPE (function);
+  bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
+  int nregs;
+
+  if (TARGET_64BIT)
+    {
+      const int *parm_regs;
+
+      if (ix86_function_type_abi (type) == MS_ABI)
+        parm_regs = x86_64_ms_abi_int_parameter_registers;
+      else
+        parm_regs = x86_64_int_parameter_registers;
+      return gen_rtx_REG (DImode, parm_regs[aggr]);
+    }
+
+  nregs = ix86_function_regparm (type, function);
+
+  if (nregs > 0 && !stdarg_p (type))
+    {
+      int regno;
+
+      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+	regno = aggr ? DX_REG : CX_REG;
+      else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
+        {
+	  regno = CX_REG;
+	  if (aggr)
+	    return gen_rtx_MEM (SImode,
+				plus_constant (stack_pointer_rtx, 4));
+	}
+      else
+        {
+	  regno = AX_REG;
+	  if (aggr)
+	    {
+	      regno = DX_REG;
+	      if (nregs == 1)
+		return gen_rtx_MEM (SImode,
+				    plus_constant (stack_pointer_rtx, 4));
+	    }
+	}
+      return gen_rtx_REG (SImode, regno);
+    }
+
+  return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
+}
+
+/* Determine whether x86_output_mi_thunk can succeed.  */
+
+static bool
+x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+			 HOST_WIDE_INT vcall_offset, const_tree function)
+{
+  /* 64-bit can handle anything.  */
+  if (TARGET_64BIT)
+    return true;
+
+  /* For 32-bit, everything's fine if we have one free register.  */
+  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
+    return true;
+
+  /* Need a free register for vcall_offset.  */
+  if (vcall_offset)
+    return false;
+
+  /* Need a free register for GOT references.  */
+  if (flag_pic && !targetm.binds_local_p (function))
+    return false;
+
+  /* Otherwise ok.  */
+  return true;
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+x86_output_mi_thunk (FILE *file,
+		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset, tree function)
+{
+  rtx xops[3];
+  rtx this_param = x86_this_parameter (function);
+  rtx this_reg, tmp;
+
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), file, 1);
+
+  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
+     pull it in now and let DELTA benefit.  */
+  if (REG_P (this_param))
+    this_reg = this_param;
+  else if (vcall_offset)
+    {
+      /* Put the this parameter into %eax.  */
+      xops[0] = this_param;
+      xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
+      output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+    }
+  else
+    this_reg = NULL_RTX;
+
+  /* Adjust the this parameter by a fixed constant.  */
+  if (delta)
+    {
+      xops[0] = GEN_INT (delta);
+      xops[1] = this_reg ? this_reg : this_param;
+      if (TARGET_64BIT)
+	{
+	  if (!x86_64_general_operand (xops[0], DImode))
+	    {
+	      tmp = gen_rtx_REG (DImode, R10_REG);
+	      xops[1] = tmp;
+	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
+	      xops[0] = tmp;
+	      xops[1] = this_param;
+	    }
+	  if (x86_maybe_negate_const_int (&xops[0], DImode))
+	    output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
+	  else
+	    output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
+	}
+      else if (x86_maybe_negate_const_int (&xops[0], SImode))
+	output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
+      else
+	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+    }
+
+  /* Adjust the this parameter by a value stored in the vtable.  */
+  if (vcall_offset)
+    {
+      if (TARGET_64BIT)
+	tmp = gen_rtx_REG (DImode, R10_REG);
+      else
+	{
+	  int tmp_regno = CX_REG;
+	  if (lookup_attribute ("fastcall",
+				TYPE_ATTRIBUTES (TREE_TYPE (function)))
+	      || lookup_attribute ("thiscall",
+				   TYPE_ATTRIBUTES (TREE_TYPE (function))))
+	    tmp_regno = AX_REG;
+	  tmp = gen_rtx_REG (SImode, tmp_regno);
+	}
+
+      xops[0] = gen_rtx_MEM (Pmode, this_reg);
+      xops[1] = tmp;
+      output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+
+      /* Adjust the this parameter.  */
+      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
+      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
+	{
+	  rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
+	  xops[0] = GEN_INT (vcall_offset);
+	  xops[1] = tmp2;
+	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
+	}
+      xops[1] = this_reg;
+      output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
+    }
+
+  /* If necessary, drop THIS back to its stack slot.  */
+  if (this_reg && this_reg != this_param)
+    {
+      xops[0] = this_reg;
+      xops[1] = this_param;
+      output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+    }
+
+  xops[0] = XEXP (DECL_RTL (function), 0);
+  if (TARGET_64BIT)
+    {
+      if (!flag_pic || targetm.binds_local_p (function)
+	  || DEFAULT_ABI == MS_ABI)
+	output_asm_insn ("jmp\t%P0", xops);
+      /* All thunks should be in the same object as their target,
+	 and thus binds_local_p should be true.  */
+      else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+	gcc_unreachable ();
+      else
+	{
+	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
+	  tmp = gen_rtx_CONST (Pmode, tmp);
+	  tmp = gen_rtx_MEM (QImode, tmp);
+	  xops[0] = tmp;
+	  output_asm_insn ("jmp\t%A0", xops);
+	}
+    }
+  else
+    {
+      if (!flag_pic || targetm.binds_local_p (function))
+	output_asm_insn ("jmp\t%P0", xops);
+      else
+#if TARGET_MACHO
+	if (TARGET_MACHO)
+	  {
+	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
+	    if (TARGET_MACHO_BRANCH_ISLANDS)
+	      sym_ref = (gen_rtx_SYMBOL_REF
+		   (Pmode,
+		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
+	    tmp = gen_rtx_MEM (QImode, sym_ref);
+	    xops[0] = tmp;
+	    output_asm_insn ("jmp\t%0", xops);
+	  }
+	else
+#endif /* TARGET_MACHO */
+	{
+	  tmp = gen_rtx_REG (SImode, CX_REG);
+	  output_set_got (tmp, NULL_RTX);
+
+	  xops[1] = tmp;
+	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
+	  output_asm_insn ("jmp\t{*}%1", xops);
+	}
+    }
+  final_end_function ();
+}
+
+static void
+x86_file_start (void)
+{
+  default_file_start ();
+#if TARGET_MACHO
+  darwin_file_start ();
+#endif
+  if (X86_FILE_START_VERSION_DIRECTIVE)
+    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
+  if (X86_FILE_START_FLTUSED)
+    fputs ("\t.global\t__fltused\n", asm_out_file);
+  if (ix86_asm_dialect == ASM_INTEL)
+    fputs ("\t.intel_syntax noprefix\n", asm_out_file);
+}
+
+int
+x86_field_alignment (tree field, int computed)
+{
+  enum machine_mode mode;
+  tree type = TREE_TYPE (field);
+
+  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
+    return computed;
+  mode = TYPE_MODE (strip_array_types (type));
+  if (mode == DFmode || mode == DCmode
+      || GET_MODE_CLASS (mode) == MODE_INT
+      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
+    return MIN (32, computed);
+  return computed;
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+void
+x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+  const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
+					 : MCOUNT_NAME);
+
+  if (TARGET_64BIT)
+    {
+#ifndef NO_PROFILE_COUNTERS
+      fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
+#endif
+
+      if (DEFAULT_ABI == SYSV_ABI && flag_pic)
+	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
+      else
+	fprintf (file, "\tcall\t%s\n", mcount_name);
+    }
+  else if (flag_pic)
+    {
+#ifndef NO_PROFILE_COUNTERS
+      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
+	       LPREFIX, labelno);
+#endif
+      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
+    }
+  else
+    {
+#ifndef NO_PROFILE_COUNTERS
+      fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
+	       LPREFIX, labelno);
+#endif
+      fprintf (file, "\tcall\t%s\n", mcount_name);
+    }
+}
+
+/* We don't have exact information about the insn sizes, but we may assume
+   quite safely that we are informed about all 1 byte insns and memory
+   address sizes.  This is enough to eliminate unnecessary padding in
+   99% of cases.  */
+
+static int
+min_insn_size (rtx insn)
+{
+  int l = 0, len;
+
+  if (!INSN_P (insn) || !active_insn_p (insn))
+    return 0;
+
+  /* Discard alignments we've emit and jump instructions.  */
+  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
+    return 0;
+  if (JUMP_TABLE_DATA_P (insn))
+    return 0;
+
+  /* Important case - calls are always 5 bytes.
+     It is common to have many calls in the row.  */
+  if (CALL_P (insn)
+      && symbolic_reference_mentioned_p (PATTERN (insn))
+      && !SIBLING_CALL_P (insn))
+    return 5;
+  len = get_attr_length (insn);
+  if (len <= 1)
+    return 1;
+
+  /* For normal instructions we rely on get_attr_length being exact,
+     with a few exceptions.  */
+  if (!JUMP_P (insn))
+    {
+      enum attr_type type = get_attr_type (insn);
+
+      switch (type)
+	{
+	case TYPE_MULTI:
+	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	      || asm_noperands (PATTERN (insn)) >= 0)
+	    return 0;
+	  break;
+	case TYPE_OTHER:
+	case TYPE_FCMP:
+	  break;
+	default:
+	  /* Otherwise trust get_attr_length.  */
+	  return len;
+	}
+
+      l = get_attr_length_address (insn);
+      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+	l = 4;
+    }
+  if (l)
+    return 1+l;
+  else
+    return 2;
+}
+
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+
+/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
+   window.  */
+
+static void
+ix86_avoid_jump_mispredicts (void)
+{
+  rtx insn, start = get_insns ();
+  int nbytes = 0, njumps = 0;
+  int isjump = 0;
+
+  /* Look for all minimal intervals of instructions containing 4 jumps.
+     The intervals are bounded by START and INSN.  NBYTES is the total
+     size of instructions in the interval including INSN and not including
+     START.  When the NBYTES is smaller than 16 bytes, it is possible
+     that the end of START and INSN ends up in the same 16byte page.
+
+     The smallest offset in the page INSN can start is the case where START
+     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
+     We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
+     */
+  for (insn = start; insn; insn = NEXT_INSN (insn))
+    {
+      int min_size;
+
+      if (LABEL_P (insn))
+	{
+	  int align = label_to_alignment (insn);
+	  int max_skip = label_to_max_skip (insn);
+
+	  if (max_skip > 15)
+	    max_skip = 15;
+	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
+	     already in the current 16 byte page, because otherwise
+	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
+	     bytes to reach 16 byte boundary.  */
+	  if (align <= 0
+	      || (align <= 3 && max_skip != (1 << align) - 1))
+	    max_skip = 0;
+	  if (dump_file)
+	    fprintf (dump_file, "Label %i with max_skip %i\n",
+		     INSN_UID (insn), max_skip);
+	  if (max_skip)
+	    {
+	      while (nbytes + max_skip >= 16)
+		{
+		  start = NEXT_INSN (start);
+		  if ((JUMP_P (start)
+		       && GET_CODE (PATTERN (start)) != ADDR_VEC
+		       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+		      || CALL_P (start))
+		    njumps--, isjump = 1;
+		  else
+		    isjump = 0;
+		  nbytes -= min_insn_size (start);
+		}
+	    }
+	  continue;
+	}
+
+      min_size = min_insn_size (insn);
+      nbytes += min_size;
+      if (dump_file)
+	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
+		 INSN_UID (insn), min_size);
+      if ((JUMP_P (insn)
+	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
+	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+	  || CALL_P (insn))
+	njumps++;
+      else
+	continue;
+
+      while (njumps > 3)
+	{
+	  start = NEXT_INSN (start);
+	  if ((JUMP_P (start)
+	       && GET_CODE (PATTERN (start)) != ADDR_VEC
+	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+	      || CALL_P (start))
+	    njumps--, isjump = 1;
+	  else
+	    isjump = 0;
+	  nbytes -= min_insn_size (start);
+	}
+      gcc_assert (njumps >= 0);
+      if (dump_file)
+        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
+		 INSN_UID (start), INSN_UID (insn), nbytes);
+
+      if (njumps == 3 && isjump && nbytes < 16)
+	{
+	  int padsize = 15 - nbytes + min_insn_size (insn);
+
+	  if (dump_file)
+	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
+		     INSN_UID (insn), padsize);
+          emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
+	}
+    }
+}
+#endif
+
+/* AMD Athlon works faster
+   when RET is not destination of conditional jump or directly preceded
+   by other jump instruction.  We avoid the penalty by inserting NOP just
+   before the RET instructions in such cases.  */
+static void
+ix86_pad_returns (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      basic_block bb = e->src;
+      rtx ret = BB_END (bb);
+      rtx prev;
+      bool replace = false;
+
+      if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
+	  || optimize_bb_for_size_p (bb))
+	continue;
+      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+	if (active_insn_p (prev) || LABEL_P (prev))
+	  break;
+      if (prev && LABEL_P (prev))
+	{
+	  edge e;
+	  edge_iterator ei;
+
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
+		&& !(e->flags & EDGE_FALLTHRU))
+	      replace = true;
+	}
+      if (!replace)
+	{
+	  prev = prev_active_insn (ret);
+	  if (prev
+	      && ((JUMP_P (prev) && any_condjump_p (prev))
+		  || CALL_P (prev)))
+	    replace = true;
+	  /* Empty functions get branch mispredict even when
+	     the jump destination is not visible to us.  */
+	  if (!prev && !optimize_function_for_size_p (cfun))
+	    replace = true;
+	}
+      if (replace)
+	{
+	  emit_jump_insn_before (gen_return_internal_long (), ret);
+	  delete_insn (ret);
+	}
+    }
+}
+
+/* Count the minimum number of instructions in BB.  Return 4 if the
+   number of instructions >= 4.  */
+
+static int 
+ix86_count_insn_bb (basic_block bb)
+{
+  rtx insn;
+  int insn_count = 0;
+
+  /* Count number of instructions in this block.  Return 4 if the number
+     of instructions >= 4.  */
+  FOR_BB_INSNS (bb, insn)
+    {
+      /* Only happen in exit blocks.  */
+      if (JUMP_P (insn)
+	  && GET_CODE (PATTERN (insn)) == RETURN)
+	break;
+
+      if (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER)
+	{
+	  insn_count++;
+	  if (insn_count >= 4)
+	    return insn_count;
+	}
+    }
+
+  return insn_count;
+}
+
+
+/* Count the minimum number of instructions in code path in BB.  
+   Return 4 if the number of instructions >= 4.  */
+
+static int 
+ix86_count_insn (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+  int min_prev_count;
+
+  /* Only bother counting instructions along paths with no
+     more than 2 basic blocks between entry and exit.  Given
+     that BB has an edge to exit, determine if a predecessor
+     of BB has an edge from entry.  If so, compute the number
+     of instructions in the predecessor block.  If there
+     happen to be multiple such blocks, compute the minimum.  */
+  min_prev_count = 4;
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      edge prev_e;
+      edge_iterator prev_ei;
+
+      if (e->src == ENTRY_BLOCK_PTR)
+	{
+	  min_prev_count = 0;
+	  break;
+	}
+      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
+	{
+	  if (prev_e->src == ENTRY_BLOCK_PTR)
+	    {
+	      int count = ix86_count_insn_bb (e->src);
+	      if (count < min_prev_count)
+		min_prev_count = count;
+	      break;
+	    }
+	}
+    }
+
+  if (min_prev_count < 4)
+    min_prev_count += ix86_count_insn_bb (bb);
+
+  return min_prev_count;
+}
+
+/* Pad short funtion to 4 instructions.   */
+
+static void
+ix86_pad_short_function (void)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    {
+      rtx ret = BB_END (e->src);
+      if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
+	{
+	  int insn_count = ix86_count_insn (e->src);
+
+	  /* Pad short function.  */
+	  if (insn_count < 4)
+	    {
+	      rtx insn = ret;
+
+	      /* Find epilogue.  */
+	      while (insn
+		     && (!NOTE_P (insn)
+			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
+		insn = PREV_INSN (insn);
+
+	      if (!insn)
+		insn = ret;
+
+	      /* Two NOPs count as one instruction.  */
+	      insn_count = 2 * (4 - insn_count);
+	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
+	    }
+	}
+    }
+}
+
+/* Implement machine specific optimizations.  We implement padding of returns
+   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
+static void
+ix86_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  if (optimize && optimize_function_for_speed_p (cfun))
+    {
+      if (TARGET_PAD_SHORT_FUNCTION)
+	ix86_pad_short_function ();
+      else if (TARGET_PAD_RETURNS)
+	ix86_pad_returns ();
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+      if (TARGET_FOUR_JUMP_LIMIT)
+	ix86_avoid_jump_mispredicts ();
+#endif
+    }
+
+  /* Run the vzeroupper optimization if needed.  */
+  if (TARGET_VZEROUPPER)
+    move_or_delete_vzeroupper ();
+}
+
+/* Return nonzero when QImode register that must be represented via REX prefix
+   is used.  */
+bool
+x86_extended_QIreg_mentioned_p (rtx insn)
+{
+  int i;
+  extract_insn_cached (insn);
+  for (i = 0; i < recog_data.n_operands; i++)
+    if (REG_P (recog_data.operand[i])
+	&& REGNO (recog_data.operand[i]) > BX_REG)
+       return true;
+  return false;
+}
+
+/* Return nonzero when P points to register encoded via REX prefix.
+   Called via for_each_rtx.  */
+static int
+extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
+{
+   unsigned int regno;
+   if (!REG_P (*p))
+     return 0;
+   regno = REGNO (*p);
+   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
+}
+
+/* Return true when INSN mentions register that must be encoded using REX
+   prefix.  */
+bool
+x86_extended_reg_mentioned_p (rtx insn)
+{
+  return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
+		       extended_reg_mentioned_1, NULL);
+}
+
+/* If profitable, negate (without causing overflow) integer constant
+   of mode MODE at location LOC.  Return true in this case.  */
+bool
+x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+
+  if (!CONST_INT_P (*loc))
+    return false;
+
+  switch (mode)
+    {
+    case DImode:
+      /* DImode x86_64 constants must fit in 32 bits.  */
+      gcc_assert (x86_64_immediate_operand (*loc, mode));
+
+      mode = SImode;
+      break;
+
+    case SImode:
+    case HImode:
+    case QImode:
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Avoid overflows.  */
+  if (mode_signbit_p (mode, *loc))
+    return false;
+
+  val = INTVAL (*loc);
+
+  /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
+     Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
+  if ((val < 0 && val != -128)
+      || val == 128)
+    {
+      *loc = GEN_INT (-val);
+      return true;
+    }
+
+  return false;
+}
+
+/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.  */
+
+void
+x86_emit_floatuns (rtx operands[2])
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+  enum machine_mode mode, inmode;
+
+  inmode = GET_MODE (operands[1]);
+  gcc_assert (inmode == SImode || inmode == DImode);
+
+  out = operands[0];
+  in = force_reg (inmode, operands[1]);
+  mode = GET_MODE (out);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
+
+  expand_float (out, in, 0);
+
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
+			    1, OPTAB_DIRECT);
+  i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
+			    1, OPTAB_DIRECT);
+  i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
+
+  expand_float (f0, i0, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* AVX does not support 32-byte integer vector operations,
+   thus the longest vector we are faced with is V16QImode.  */
+#define MAX_VECT_LEN	16
+
+struct expand_vec_perm_d
+{
+  rtx target, op0, op1;
+  unsigned char perm[MAX_VECT_LEN];
+  enum machine_mode vmode;
+  unsigned char nelt;
+  bool testing_p;
+};
+
+static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+
+/* Get a vector mode of the same size as the original but with elements
+   twice as wide.  This is only guaranteed to apply to integral vectors.  */
+
+static inline enum machine_mode
+get_mode_wider_vector (enum machine_mode o)
+{
+  /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
+  enum machine_mode n = GET_MODE_WIDER_MODE (o);
+  gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
+  gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
+  return n;
+}
+
+/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
+   with all elements equal to VAR.  Return true if successful.  */
+
+static bool
+ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
+				   rtx target, rtx val)
+{
+  bool ok;
+
+  switch (mode)
+    {
+    case V2SImode:
+    case V2SFmode:
+      if (!mmx_ok)
+	return false;
+      /* FALLTHRU */
+
+    case V4DFmode:
+    case V4DImode:
+    case V8SFmode:
+    case V8SImode:
+    case V2DFmode:
+    case V2DImode:
+    case V4SFmode:
+    case V4SImode:
+      {
+	rtx insn, dup;
+
+	/* First attempt to recognize VAL as-is.  */
+	dup = gen_rtx_VEC_DUPLICATE (mode, val);
+	insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
+	if (recog_memoized (insn) < 0)
+	  {
+	    rtx seq;
+	    /* If that fails, force VAL into a register.  */
+
+	    start_sequence ();
+	    XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
+	    seq = get_insns ();
+	    end_sequence ();
+	    if (seq)
+	      emit_insn_before (seq, insn);
+
+	    ok = recog_memoized (insn) >= 0;
+	    gcc_assert (ok);
+	  }
+      }
+      return true;
+
+    case V4HImode:
+      if (!mmx_ok)
+	return false;
+      if (TARGET_SSE || TARGET_3DNOW_A)
+	{
+	  rtx x;
+
+	  val = gen_lowpart (SImode, val);
+	  x = gen_rtx_TRUNCATE (HImode, val);
+	  x = gen_rtx_VEC_DUPLICATE (mode, x);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
+	  return true;
+	}
+      goto widen;
+
+    case V8QImode:
+      if (!mmx_ok)
+	return false;
+      goto widen;
+
+    case V8HImode:
+      if (TARGET_SSE2)
+	{
+	  struct expand_vec_perm_d dperm;
+	  rtx tmp1, tmp2;
+
+	permute:
+	  memset (&dperm, 0, sizeof (dperm));
+	  dperm.target = target;
+	  dperm.vmode = mode;
+	  dperm.nelt = GET_MODE_NUNITS (mode);
+	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
+
+	  /* Extend to SImode using a paradoxical SUBREG.  */
+	  tmp1 = gen_reg_rtx (SImode);
+	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
+
+	  /* Insert the SImode value as low element of a V4SImode vector. */
+	  tmp2 = gen_lowpart (V4SImode, dperm.op0);
+	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
+
+	  ok = (expand_vec_perm_1 (&dperm)
+		|| expand_vec_perm_broadcast_1 (&dperm));
+	  gcc_assert (ok);
+	  return ok;
+	}
+      goto widen;
+
+    case V16QImode:
+      if (TARGET_SSE2)
+	goto permute;
+      goto widen;
+
+    widen:
+      /* Replicate the value once into the next wider mode and recurse.  */
+      {
+	enum machine_mode smode, wsmode, wvmode;
+	rtx x;
+
+	smode = GET_MODE_INNER (mode);
+	wvmode = get_mode_wider_vector (mode);
+	wsmode = GET_MODE_INNER (wvmode);
+
+	val = convert_modes (wsmode, smode, val, true);
+	x = expand_simple_binop (wsmode, ASHIFT, val,
+				 GEN_INT (GET_MODE_BITSIZE (smode)),
+				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+	x = gen_lowpart (wvmode, target);
+	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
+	gcc_assert (ok);
+	return ok;
+      }
+
+    case V16HImode:
+    case V32QImode:
+      {
+	enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+	rtx x = gen_reg_rtx (hvmode);
+
+	ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+	gcc_assert (ok);
+
+	x = gen_rtx_VEC_CONCAT (mode, x, x);
+	emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      }
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
+   whose ONE_VAR element is VAR, and other elements are zero.  Return true
+   if successful.  */
+
+static bool
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+				     rtx target, rtx var, int one_var)
+{
+  enum machine_mode vsimode;
+  rtx new_target;
+  rtx x, tmp;
+  bool use_vector_set = false;
+
+  switch (mode)
+    {
+    case V2DImode:
+      /* For SSE4.1, we normally use vector set.  But if the second
+	 element is zero and inter-unit moves are OK, we use movq
+	 instead.  */
+      use_vector_set = (TARGET_64BIT
+			&& TARGET_SSE4_1
+			&& !(TARGET_INTER_UNIT_MOVES
+			     && one_var == 0));
+      break;
+    case V16QImode:
+    case V4SImode:
+    case V4SFmode:
+      use_vector_set = TARGET_SSE4_1;
+      break;
+    case V8HImode:
+      use_vector_set = TARGET_SSE2;
+      break;
+    case V4HImode:
+      use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
+      break;
+    case V32QImode:
+    case V16HImode:
+    case V8SImode:
+    case V8SFmode:
+    case V4DFmode:
+      use_vector_set = TARGET_AVX;
+      break;
+    case V4DImode:
+      /* Use ix86_expand_vector_set in 64bit mode only.  */
+      use_vector_set = TARGET_AVX && TARGET_64BIT;
+      break;
+    default:
+      break;
+    }
+
+  if (use_vector_set)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
+      var = force_reg (GET_MODE_INNER (mode), var);
+      ix86_expand_vector_set (mmx_ok, target, var, one_var);
+      return true;
+    }
+
+  switch (mode)
+    {
+    case V2SFmode:
+    case V2SImode:
+      if (!mmx_ok)
+	return false;
+      /* FALLTHRU */
+
+    case V2DFmode:
+    case V2DImode:
+      if (one_var != 0)
+	return false;
+      var = force_reg (GET_MODE_INNER (mode), var);
+      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
+      emit_insn (gen_rtx_SET (VOIDmode, target, x));
+      return true;
+
+    case V4SFmode:
+    case V4SImode:
+      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+	new_target = gen_reg_rtx (mode);
+      else
+	new_target = target;
+      var = force_reg (GET_MODE_INNER (mode), var);
+      x = gen_rtx_VEC_DUPLICATE (mode, var);
+      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+      if (one_var != 0)
+	{
+	  /* We need to shuffle the value to the correct position, so
+	     create a new pseudo to store the intermediate result.  */
+
+	  /* With SSE2, we can use the integer shuffle insns.  */
+	  if (mode != V4SFmode && TARGET_SSE2)
+	    {
+	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+					    const1_rtx,
+					    GEN_INT (one_var == 1 ? 0 : 1),
+					    GEN_INT (one_var == 2 ? 0 : 1),
+					    GEN_INT (one_var == 3 ? 0 : 1)));
+	      if (target != new_target)
+		emit_move_insn (target, new_target);
+	      return true;
+	    }
+
+	  /* Otherwise convert the intermediate result to V4SFmode and
+	     use the SSE1 shuffle instructions.  */
+	  if (mode != V4SFmode)
+	    {
+	      tmp = gen_reg_rtx (V4SFmode);
+	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+	    }
+	  else
+	    tmp = new_target;
+
+	  emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
+				       const1_rtx,
+				       GEN_INT (one_var == 1 ? 0 : 1),
+				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
+				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+	  if (mode != V4SFmode)
+	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+	  else if (tmp != target)
+	    emit_move_insn (target, tmp);
+	}
+      else if (target != new_target)
+	emit_move_insn (target, new_target);
+      return true;
+
+    case V8HImode:
+    case V16QImode:
+      vsimode = V4SImode;
+      goto widen;
+    case V4HImode:
+    case V8QImode:
+      if (!mmx_ok)
+	return false;
+      vsimode = V2SImode;
+      goto widen;
+    widen:
+      if (one_var != 0)
+	return false;
+
+      /* Zero extend the variable element to SImode and recurse.  */
+      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
+
+      x = gen_reg_rtx (vsimode);
+      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+						var, one_var))
+	gcc_unreachable ();
+
+      emit_move_insn (target, gen_lowpart (mode, x));
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
+   consisting of the values in VALS.  It is known that all elements
+   except ONE_VAR are constants.  Return true if successful.  */
+
+static bool
+ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
+				 rtx target, rtx vals, int one_var)
+{
+  rtx var = XVECEXP (vals, 0, one_var);
+  enum machine_mode wmode;
+  rtx const_vec, x;
+
+  const_vec = copy_rtx (vals);
+  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
+  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+
+  switch (mode)
+    {
+    case V2DFmode:
+    case V2DImode:
+    case V2SFmode:
+    case V2SImode:
+      /* For the two element vectors, it's just as easy to use
+	 the general case.  */
+      return false;
+
+    case V4DImode:
+      /* Use ix86_expand_vector_set in 64bit mode only.  */
+      if (!TARGET_64BIT)
+	return false;
+    case V4DFmode:
+    case V8SFmode:
+    case V8SImode:
+    case V16HImode:
+    case V32QImode:
+    case V4SFmode:
+    case V4SImode:
+    case V8HImode:
+    case V4HImode:
+      break;
+
+    case V16QImode:
+      if (TARGET_SSE4_1)
+	break;
+      wmode = V8HImode;
+      goto widen;
+    case V8QImode:
+      wmode = V4HImode;
+      goto widen;
+    widen:
+      /* There's no way to set one QImode entry easily.  Combine
+	 the variable value with its adjacent constant value, and
+	 promote to an HImode set.  */
+      x = XVECEXP (vals, 0, one_var ^ 1);
+      if (one_var & 1)
+	{
+	  var = convert_modes (HImode, QImode, var, true);
+	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	  x = GEN_INT (INTVAL (x) & 0xff);
+	}
+      else
+	{
+	  var = convert_modes (HImode, QImode, var, true);
+	  x = gen_int_mode (INTVAL (x) << 8, HImode);
+	}
+      if (x != const0_rtx)
+	var = expand_simple_binop (HImode, IOR, var, x, var,
+				   1, OPTAB_LIB_WIDEN);
+
+      x = gen_reg_rtx (wmode);
+      emit_move_insn (x, gen_lowpart (wmode, const_vec));
+      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
+
+      emit_move_insn (target, gen_lowpart (mode, x));
+      return true;
+
+    default:
+      return false;
+    }
+
+  emit_move_insn (target, const_vec);
+  ix86_expand_vector_set (mmx_ok, target, var, one_var);
+  return true;
+}
+
+/* A subroutine of ix86_expand_vector_init_general.  Use vector
+   concatenate to handle the most general case: all values variable,
+   and none identical.  */
+
+static void
+ix86_expand_vector_init_concat (enum machine_mode mode,
+				rtx target, rtx *ops, int n)
+{
+  enum machine_mode cmode, hmode = VOIDmode;
+  rtx first[8], second[4];
+  rtvec v;
+  int i, j;
+
+  switch (n)
+    {
+    case 2:
+      switch (mode)
+	{
+	case V8SImode:
+	  cmode = V4SImode;
+	  break;
+	case V8SFmode:
+	  cmode = V4SFmode;
+	  break;
+	case V4DImode:
+	  cmode = V2DImode;
+	  break;
+	case V4DFmode:
+	  cmode = V2DFmode;
+	  break;
+	case V4SImode:
+	  cmode = V2SImode;
+	  break;
+	case V4SFmode:
+	  cmode = V2SFmode;
+	  break;
+	case V2DImode:
+	  cmode = DImode;
+	  break;
+	case V2SImode:
+	  cmode = SImode;
+	  break;
+	case V2DFmode:
+	  cmode = DFmode;
+	  break;
+	case V2SFmode:
+	  cmode = SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (!register_operand (ops[1], cmode))
+	ops[1] = force_reg (cmode, ops[1]);
+      if (!register_operand (ops[0], cmode))
+	ops[0] = force_reg (cmode, ops[0]);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_CONCAT (mode, ops[0],
+						  ops[1])));
+      break;
+
+    case 4:
+      switch (mode)
+	{
+	case V4DImode:
+	  cmode = V2DImode;
+	  break;
+	case V4DFmode:
+	  cmode = V2DFmode;
+	  break;
+	case V4SImode:
+	  cmode = V2SImode;
+	  break;
+	case V4SFmode:
+	  cmode = V2SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      goto half;
+
+    case 8:
+      switch (mode)
+	{
+	case V8SImode:
+	  cmode = V2SImode;
+	  hmode = V4SImode;
+	  break;
+	case V8SFmode:
+	  cmode = V2SFmode;
+	  hmode = V4SFmode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      goto half;
+
+half:
+      /* FIXME: We process inputs backward to help RA.  PR 36222.  */
+      i = n - 1;
+      j = (n >> 1) - 1;
+      for (; i > 0; i -= 2, j--)
+	{
+	  first[j] = gen_reg_rtx (cmode);
+	  v = gen_rtvec (2, ops[i - 1], ops[i]);
+	  ix86_expand_vector_init (false, first[j],
+				   gen_rtx_PARALLEL (cmode, v));
+	}
+
+      n >>= 1;
+      if (n > 2)
+	{
+	  gcc_assert (hmode != VOIDmode);
+	  for (i = j = 0; i < n; i += 2, j++)
+	    {
+	      second[j] = gen_reg_rtx (hmode);
+	      ix86_expand_vector_init_concat (hmode, second [j],
+					      &first [i], 2);
+	    }
+	  n >>= 1;
+	  ix86_expand_vector_init_concat (mode, target, second, n);
+	}
+      else
+	ix86_expand_vector_init_concat (mode, target, first, n);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init_general.  Use vector
+   interleave to handle the most general case: all values variable,
+   and none identical.  */
+
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+				    rtx target, rtx *ops, int n)
+{
+  enum machine_mode first_imode, second_imode, third_imode, inner_mode;
+  int i, j;
+  rtx op0, op1;
+  rtx (*gen_load_even) (rtx, rtx, rtx);
+  rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+  rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case V8HImode:
+      gen_load_even = gen_vec_setv8hi;
+      gen_interleave_first_low = gen_vec_interleave_lowv4si;
+      gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      inner_mode = HImode;
+      first_imode = V4SImode;
+      second_imode = V2DImode;
+      third_imode = VOIDmode;
+      break;
+    case V16QImode:
+      gen_load_even = gen_vec_setv16qi;
+      gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+      gen_interleave_second_low = gen_vec_interleave_lowv4si;
+      inner_mode = QImode;
+      first_imode = V8HImode;
+      second_imode = V4SImode;
+      third_imode = V2DImode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  for (i = 0; i < n; i++)
+    {
+      /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
+      op0 = gen_reg_rtx (SImode);
+      emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
+
+      /* Insert the SImode value as low element of V4SImode vector. */
+      op1 = gen_reg_rtx (V4SImode);
+      op0 = gen_rtx_VEC_MERGE (V4SImode,
+			       gen_rtx_VEC_DUPLICATE (V4SImode,
+						      op0),
+			       CONST0_RTX (V4SImode),
+			       const1_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+      /* Cast the V4SImode vector back to a vector in orignal mode.  */
+      op0 = gen_reg_rtx (mode);
+      emit_move_insn (op0, gen_lowpart (mode, op1));
+
+      /* Load even elements into the second positon.  */
+      emit_insn (gen_load_even (op0,
+				force_reg (inner_mode,
+					   ops [i + i + 1]),
+				const1_rtx));
+
+      /* Cast vector to FIRST_IMODE vector.  */
+      ops[i] = gen_reg_rtx (first_imode);
+      emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+    }
+
+  /* Interleave low FIRST_IMODE vectors.  */
+  for (i = j = 0; i < n; i += 2, j++)
+    {
+      op0 = gen_reg_rtx (first_imode);
+      emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
+
+      /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
+      ops[j] = gen_reg_rtx (second_imode);
+      emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+    }
+
+  /* Interleave low SECOND_IMODE vectors.  */
+  switch (second_imode)
+    {
+    case V4SImode:
+      for (i = j = 0; i < n / 2; i += 2, j++)
+	{
+	  op0 = gen_reg_rtx (second_imode);
+	  emit_insn (gen_interleave_second_low (op0, ops[i],
+						ops[i + 1]));
+
+	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+	     vector.  */
+	  ops[j] = gen_reg_rtx (third_imode);
+	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+	}
+      second_imode = V2DImode;
+      gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      /* FALLTHRU */
+
+    case V2DImode:
+      op0 = gen_reg_rtx (second_imode);
+      emit_insn (gen_interleave_second_low (op0, ops[0],
+					    ops[1]));
+
+      /* Cast the SECOND_IMODE vector back to a vector on original
+	 mode.  */
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_lowpart (mode, op0)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
+   all values variable, and none identical.  */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+				 rtx target, rtx vals)
+{
+  rtx ops[32], op0, op1;
+  enum machine_mode half_mode = VOIDmode;
+  int n, i;
+
+  switch (mode)
+    {
+    case V2SFmode:
+    case V2SImode:
+      if (!mmx_ok && !TARGET_SSE)
+	break;
+      /* FALLTHRU */
+
+    case V8SFmode:
+    case V8SImode:
+    case V4DFmode:
+    case V4DImode:
+    case V4SFmode:
+    case V4SImode:
+    case V2DFmode:
+    case V2DImode:
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      ix86_expand_vector_init_concat (mode, target, ops, n);
+      return;
+
+    case V32QImode:
+      half_mode = V16QImode;
+      goto half;
+
+    case V16HImode:
+      half_mode = V8HImode;
+      goto half;
+
+half:
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      op0 = gen_reg_rtx (half_mode);
+      op1 = gen_reg_rtx (half_mode);
+      ix86_expand_vector_init_interleave (half_mode, op0, ops,
+					  n >> 2);
+      ix86_expand_vector_init_interleave (half_mode, op1,
+					  &ops [n >> 1], n >> 2);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
+      return;
+
+    case V16QImode:
+      if (!TARGET_SSE4_1)
+	break;
+      /* FALLTHRU */
+
+    case V8HImode:
+      if (!TARGET_SSE2)
+	break;
+
+      /* Don't use ix86_expand_vector_init_interleave if we can't
+	 move from GPR to SSE register directly.  */
+      if (!TARGET_INTER_UNIT_MOVES)
+	break;
+
+      n = GET_MODE_NUNITS (mode);
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+      return;
+
+    case V4HImode:
+    case V8QImode:
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+    {
+      int i, j, n_elts, n_words, n_elt_per_word;
+      enum machine_mode inner_mode;
+      rtx words[4], shift;
+
+      inner_mode = GET_MODE_INNER (mode);
+      n_elts = GET_MODE_NUNITS (mode);
+      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+      n_elt_per_word = n_elts / n_words;
+      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
+
+      for (i = 0; i < n_words; ++i)
+	{
+	  rtx word = NULL_RTX;
+
+	  for (j = 0; j < n_elt_per_word; ++j)
+	    {
+	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
+	      elt = convert_modes (word_mode, inner_mode, elt, true);
+
+	      if (j == 0)
+		word = elt;
+	      else
+		{
+		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
+					      word, 1, OPTAB_LIB_WIDEN);
+		  word = expand_simple_binop (word_mode, IOR, word, elt,
+					      word, 1, OPTAB_LIB_WIDEN);
+		}
+	    }
+
+	  words[i] = word;
+	}
+
+      if (n_words == 1)
+	emit_move_insn (target, gen_lowpart (mode, words[0]));
+      else if (n_words == 2)
+	{
+	  rtx tmp = gen_reg_rtx (mode);
+	  emit_clobber (tmp);
+	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
+	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
+	  emit_move_insn (target, tmp);
+	}
+      else if (n_words == 4)
+	{
+	  rtx tmp = gen_reg_rtx (V4SImode);
+	  gcc_assert (word_mode == SImode);
+	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
+	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
+	  emit_move_insn (target, gen_lowpart (mode, tmp));
+	}
+      else
+	gcc_unreachable ();
+    }
+}
+
+/* Initialize vector TARGET via VALS.  Suppress the use of MMX
+   instructions unless MMX_OK is true.  */
+
+void
+ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true, all_const_zero = true;
+  int i;
+  rtx x;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
+	n_var++, one_var = i;
+      else if (x != CONST0_RTX (inner_mode))
+	all_const_zero = false;
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  /* Constants are best loaded from the constant pool.  */
+  if (n_var == 0)
+    {
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+      return;
+    }
+
+  /* If all values are identical, broadcast the value.  */
+  if (all_same
+      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
+					    XVECEXP (vals, 0, 0)))
+    return;
+
+  /* Values where only one field is non-constant are best loaded from
+     the pool and overwritten via move later.  */
+  if (n_var == 1)
+    {
+      if (all_const_zero
+	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+						  XVECEXP (vals, 0, one_var),
+						  one_var))
+	return;
+
+      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
+	return;
+    }
+
+  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
+}
+
+void
+ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  enum machine_mode half_mode;
+  bool use_vec_merge = false;
+  rtx tmp;
+  static rtx (*gen_extract[6][2]) (rtx, rtx)
+    = {
+	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+      };
+  static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+    = {
+	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+      };
+  int i, j, n;
+
+  switch (mode)
+    {
+    case V2SFmode:
+    case V2SImode:
+      if (mmx_ok)
+	{
+	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
+	  if (elt == 0)
+	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+	  else
+	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+	  return;
+	}
+      break;
+
+    case V2DImode:
+      use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
+      if (use_vec_merge)
+	break;
+
+      tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+      ix86_expand_vector_extract (false, tmp, target, 1 - elt);
+      if (elt == 0)
+	tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+      else
+	tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+      return;
+
+    case V2DFmode:
+      {
+	rtx op0, op1;
+
+	/* For the two element vectors, we implement a VEC_CONCAT with
+	   the extraction of the other element.  */
+
+	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
+	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
+
+	if (elt == 0)
+	  op0 = val, op1 = tmp;
+	else
+	  op0 = tmp, op1 = val;
+
+	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
+	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+      }
+      return;
+
+    case V4SFmode:
+      use_vec_merge = TARGET_SSE4_1;
+      if (use_vec_merge)
+	break;
+
+      switch (elt)
+	{
+	case 0:
+	  use_vec_merge = true;
+	  break;
+
+	case 1:
+	  /* tmp = target = A B C D */
+	  tmp = copy_to_reg (target);
+	  /* target = A A B B */
+	  emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
+	  /* target = X A B B */
+	  ix86_expand_vector_set (false, target, val, 0);
+	  /* target = A X C D  */
+	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+					  const1_rtx, const0_rtx,
+					  GEN_INT (2+4), GEN_INT (3+4)));
+	  return;
+
+	case 2:
+	  /* tmp = target = A B C D */
+	  tmp = copy_to_reg (target);
+	  /* tmp = X B C D */
+	  ix86_expand_vector_set (false, tmp, val, 0);
+	  /* target = A B X D */
+	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+					  const0_rtx, const1_rtx,
+					  GEN_INT (0+4), GEN_INT (3+4)));
+	  return;
+
+	case 3:
+	  /* tmp = target = A B C D */
+	  tmp = copy_to_reg (target);
+	  /* tmp = X B C D */
+	  ix86_expand_vector_set (false, tmp, val, 0);
+	  /* target = A B X D */
+	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+					  const0_rtx, const1_rtx,
+					  GEN_INT (2+4), GEN_INT (0+4)));
+	  return;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case V4SImode:
+      use_vec_merge = TARGET_SSE4_1;
+      if (use_vec_merge)
+	break;
+
+      /* Element 0 handled by vec_merge below.  */
+      if (elt == 0)
+	{
+	  use_vec_merge = true;
+	  break;
+	}
+
+      if (TARGET_SSE2)
+	{
+	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
+	     store into element 0, then shuffle them back.  */
+
+	  rtx order[4];
+
+	  order[0] = GEN_INT (elt);
+	  order[1] = const1_rtx;
+	  order[2] = const2_rtx;
+	  order[3] = GEN_INT (3);
+	  order[elt] = const0_rtx;
+
+	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+					order[1], order[2], order[3]));
+
+	  ix86_expand_vector_set (false, target, val, 0);
+
+	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+					order[1], order[2], order[3]));
+	}
+      else
+	{
+	  /* For SSE1, we have to reuse the V4SF code.  */
+	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
+				  gen_lowpart (SFmode, val), elt);
+	}
+      return;
+
+    case V8HImode:
+      use_vec_merge = TARGET_SSE2;
+      break;
+    case V4HImode:
+      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+      break;
+
+    case V16QImode:
+      use_vec_merge = TARGET_SSE4_1;
+      break;
+
+    case V8QImode:
+      break;
+
+    case V32QImode:
+      half_mode = V16QImode;
+      j = 0;
+      n = 16;
+      goto half;
+
+    case V16HImode:
+      half_mode = V8HImode;
+      j = 1;
+      n = 8;
+      goto half;
+
+    case V8SImode:
+      half_mode = V4SImode;
+      j = 2;
+      n = 4;
+      goto half;
+
+    case V4DImode:
+      half_mode = V2DImode;
+      j = 3;
+      n = 2;
+      goto half;
+
+    case V8SFmode:
+      half_mode = V4SFmode;
+      j = 4;
+      n = 4;
+      goto half;
+
+    case V4DFmode:
+      half_mode = V2DFmode;
+      j = 5;
+      n = 2;
+      goto half;
+
+half:
+      /* Compute offset.  */
+      i = elt / n;
+      elt %= n;
+
+      gcc_assert (i <= 1);
+
+      /* Extract the half.  */
+      tmp = gen_reg_rtx (half_mode);
+      emit_insn (gen_extract[j][i] (tmp, target));
+
+      /* Put val in tmp at elt.  */
+      ix86_expand_vector_set (false, tmp, val, elt);
+
+      /* Put it back.  */
+      emit_insn (gen_insert[j][i] (target, target, tmp));
+      return;
+
+    default:
+      break;
+    }
+
+  if (use_vec_merge)
+    {
+      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
+      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else
+    {
+      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+      emit_move_insn (mem, target);
+
+      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+      emit_move_insn (tmp, val);
+
+      emit_move_insn (target, mem);
+    }
+}
+
+void
+ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
+{
+  enum machine_mode mode = GET_MODE (vec);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  bool use_vec_extr = false;
+  rtx tmp;
+
+  switch (mode)
+    {
+    case V2SImode:
+    case V2SFmode:
+      if (!mmx_ok)
+	break;
+      /* FALLTHRU */
+
+    case V2DFmode:
+    case V2DImode:
+      use_vec_extr = true;
+      break;
+
+    case V4SFmode:
+      use_vec_extr = TARGET_SSE4_1;
+      if (use_vec_extr)
+	break;
+
+      switch (elt)
+	{
+	case 0:
+	  tmp = vec;
+	  break;
+
+	case 1:
+	case 3:
+	  tmp = gen_reg_rtx (mode);
+	  emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
+				       GEN_INT (elt), GEN_INT (elt),
+				       GEN_INT (elt+4), GEN_INT (elt+4)));
+	  break;
+
+	case 2:
+	  tmp = gen_reg_rtx (mode);
+	  emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      vec = tmp;
+      use_vec_extr = true;
+      elt = 0;
+      break;
+
+    case V4SImode:
+      use_vec_extr = TARGET_SSE4_1;
+      if (use_vec_extr)
+	break;
+
+      if (TARGET_SSE2)
+	{
+	  switch (elt)
+	    {
+	    case 0:
+	      tmp = vec;
+	      break;
+
+	    case 1:
+	    case 3:
+	      tmp = gen_reg_rtx (mode);
+	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
+					    GEN_INT (elt), GEN_INT (elt),
+					    GEN_INT (elt), GEN_INT (elt)));
+	      break;
+
+	    case 2:
+	      tmp = gen_reg_rtx (mode);
+	      emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  vec = tmp;
+	  use_vec_extr = true;
+	  elt = 0;
+	}
+      else
+	{
+	  /* For SSE1, we have to reuse the V4SF code.  */
+	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
+				      gen_lowpart (V4SFmode, vec), elt);
+	  return;
+	}
+      break;
+
+    case V8HImode:
+      use_vec_extr = TARGET_SSE2;
+      break;
+    case V4HImode:
+      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+      break;
+
+    case V16QImode:
+      use_vec_extr = TARGET_SSE4_1;
+      break;
+
+    case V8QImode:
+      /* ??? Could extract the appropriate HImode element and shift.  */
+    default:
+      break;
+    }
+
+  if (use_vec_extr)
+    {
+      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
+      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
+
+      /* Let the rtl optimizers know about the zero extension performed.  */
+      if (inner_mode == QImode || inner_mode == HImode)
+	{
+	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
+	  target = gen_lowpart (SImode, target);
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+    }
+  else
+    {
+      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+      emit_move_insn (mem, vec);
+
+      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+      emit_move_insn (target, tmp);
+    }
+}
+
+/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
+   pattern to reduce; DEST is the destination; IN is the input vector.  */
+
+void
+ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+  rtx tmp1, tmp2, tmp3;
+
+  tmp1 = gen_reg_rtx (V4SFmode);
+  tmp2 = gen_reg_rtx (V4SFmode);
+  tmp3 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_sse_movhlps (tmp1, in, in));
+  emit_insn (fn (tmp2, tmp1, in));
+
+  emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
+				  const1_rtx, const1_rtx,
+				  GEN_INT (1+4), GEN_INT (1+4)));
+  emit_insn (fn (dest, tmp2, tmp3));
+}
+
+/* Target hook for scalar_mode_supported_p.  */
+static bool
+ix86_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+  else if (mode == TFmode)
+    return true;
+  else
+    return default_scalar_mode_supported_p (mode);
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+static bool
+ix86_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+    return true;
+  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
+    return true;
+  if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+    return true;
+  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+    return true;
+  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+    return true;
+  return false;
+}
+
+/* Target hook for c_mode_for_suffix.  */
+static enum machine_mode
+ix86_c_mode_for_suffix (char suffix)
+{
+  if (suffix == 'q')
+    return TFmode;
+  if (suffix == 'w')
+    return XFmode;
+
+  return VOIDmode;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+
+   We do this in the new i386 backend to maintain source compatibility
+   with the old cc0-based compiler.  */
+
+static tree
+ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+		      tree inputs ATTRIBUTE_UNUSED,
+		      tree clobbers)
+{
+  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
+			clobbers);
+  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
+			clobbers);
+  return clobbers;
+}
+
+/* Implements target vector targetm.asm.encode_section_info.  This
+   is not used by netware.  */
+
+static void ATTRIBUTE_UNUSED
+ix86_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == VAR_DECL
+      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+      && ix86_in_large_data_p (decl))
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
+}
+
+/* Worker function for REVERSE_CONDITION.  */
+
+enum rtx_code
+ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
+{
+  return (mode != CCFPmode && mode != CCFPUmode
+	  ? reverse_condition (code)
+	  : reverse_condition_maybe_unordered (code));
+}
+
+/* Output code to perform an x87 FP register move, from OPERANDS[1]
+   to OPERANDS[0].  */
+
+const char *
+output_387_reg_move (rtx insn, rtx *operands)
+{
+  if (REG_P (operands[0]))
+    {
+      if (REG_P (operands[1])
+	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+	{
+	  if (REGNO (operands[0]) == FIRST_STACK_REG)
+	    return output_387_ffreep (operands, 0);
+	  return "fstp\t%y0";
+	}
+      if (STACK_TOP_P (operands[0]))
+	return "fld%Z1\t%y1";
+      return "fst\t%y0";
+    }
+  else if (MEM_P (operands[0]))
+    {
+      gcc_assert (REG_P (operands[1]));
+      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+	return "fstp%Z0\t%y0";
+      else
+	{
+	  /* There is no non-popping store to memory for XFmode.
+	     So if we need one, follow the store with a load.  */
+	  if (GET_MODE (operands[0]) == XFmode)
+	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
+	  else
+	    return "fst%Z0\t%y0";
+	}
+    }
+  else
+    gcc_unreachable();
+}
+
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+   FP status register is set.  */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+  rtx reg = gen_reg_rtx (HImode);
+  rtx temp;
+
+  emit_insn (gen_x86_fnstsw_1 (reg));
+
+  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
+    {
+      emit_insn (gen_x86_sahf_1 (reg));
+
+      temp = gen_rtx_REG (CCmode, FLAGS_REG);
+      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+    }
+  else
+    {
+      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
+
+      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
+    }
+
+  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+
+  emit_jump_insn (temp);
+  predict_jump (REG_BR_PROB_BASE * 10 / 100);
+}
+
+/* Output code to perform a log1p XFmode calculation.  */
+
+void ix86_emit_i387_log1p (rtx op0, rtx op1)
+{
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+
+  rtx tmp = gen_reg_rtx (XFmode);
+  rtx tmp2 = gen_reg_rtx (XFmode);
+  rtx test;
+
+  emit_insn (gen_absxf2 (tmp, op1));
+  test = gen_rtx_GE (VOIDmode, tmp,
+    CONST_DOUBLE_FROM_REAL_VALUE (
+       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
+       XFmode));
+  emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
+
+  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+  emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
+  emit_jump (label2);
+
+  emit_label (label1);
+  emit_move_insn (tmp, CONST1_RTX (XFmode));
+  emit_insn (gen_addxf3 (tmp, op1, tmp));
+  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+  emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
+
+  emit_label (label2);
+}
+
+/* Output code to perform a Newton-Rhapson approximation of a single precision
+   floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
+
+void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
+{
+  rtx x0, x1, e0, e1, two;
+
+  x0 = gen_reg_rtx (mode);
+  e0 = gen_reg_rtx (mode);
+  e1 = gen_reg_rtx (mode);
+  x1 = gen_reg_rtx (mode);
+
+  two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
+
+  if (VECTOR_MODE_P (mode))
+    two = ix86_build_const_vector (mode, true, two);
+
+  two = force_reg (mode, two);
+
+  /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
+
+  /* x0 = rcp(b) estimate */
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+					  UNSPEC_RCP)));
+  /* e0 = x0 * a */
+  emit_insn (gen_rtx_SET (VOIDmode, e0,
+			  gen_rtx_MULT (mode, x0, a)));
+  /* e1 = x0 * b */
+  emit_insn (gen_rtx_SET (VOIDmode, e1,
+			  gen_rtx_MULT (mode, x0, b)));
+  /* x1 = 2. - e1 */
+  emit_insn (gen_rtx_SET (VOIDmode, x1,
+			  gen_rtx_MINUS (mode, two, e1)));
+  /* res = e0 * x1 */
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_MULT (mode, e0, x1)));
+}
+
+/* Output code to perform a Newton-Rhapson approximation of a
+   single precision floating point [reciprocal] square root.  */
+
+void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
+			 bool recip)
+{
+  rtx x0, e0, e1, e2, e3, mthree, mhalf;
+  REAL_VALUE_TYPE r;
+
+  x0 = gen_reg_rtx (mode);
+  e0 = gen_reg_rtx (mode);
+  e1 = gen_reg_rtx (mode);
+  e2 = gen_reg_rtx (mode);
+  e3 = gen_reg_rtx (mode);
+
+  real_from_integer (&r, VOIDmode, -3, -1, 0);
+  mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+
+  real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
+  mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+
+  if (VECTOR_MODE_P (mode))
+    {
+      mthree = ix86_build_const_vector (mode, true, mthree);
+      mhalf = ix86_build_const_vector (mode, true, mhalf);
+    }
+
+  /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
+     rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
+
+  /* x0 = rsqrt(a) estimate */
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
+					  UNSPEC_RSQRT)));
+
+  /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
+  if (!recip)
+    {
+      rtx zero, mask;
+
+      zero = gen_reg_rtx (mode);
+      mask = gen_reg_rtx (mode);
+
+      zero = force_reg (mode, CONST0_RTX(mode));
+      emit_insn (gen_rtx_SET (VOIDmode, mask,
+			      gen_rtx_NE (mode, zero, a)));
+
+      emit_insn (gen_rtx_SET (VOIDmode, x0,
+			      gen_rtx_AND (mode, x0, mask)));
+    }
+
+  /* e0 = x0 * a */
+  emit_insn (gen_rtx_SET (VOIDmode, e0,
+			  gen_rtx_MULT (mode, x0, a)));
+  /* e1 = e0 * x0 */
+  emit_insn (gen_rtx_SET (VOIDmode, e1,
+			  gen_rtx_MULT (mode, e0, x0)));
+
+  /* e2 = e1 - 3. */
+  mthree = force_reg (mode, mthree);
+  emit_insn (gen_rtx_SET (VOIDmode, e2,
+			  gen_rtx_PLUS (mode, e1, mthree)));
+
+  mhalf = force_reg (mode, mhalf);
+  if (recip)
+    /* e3 = -.5 * x0 */
+    emit_insn (gen_rtx_SET (VOIDmode, e3,
+			    gen_rtx_MULT (mode, x0, mhalf)));
+  else
+    /* e3 = -.5 * e0 */
+    emit_insn (gen_rtx_SET (VOIDmode, e3,
+			    gen_rtx_MULT (mode, e0, mhalf)));
+  /* ret = e2 * e3 */
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_MULT (mode, e2, e3)));
+}
+
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
+
+static void ATTRIBUTE_UNUSED
+i386_solaris_elf_named_section (const char *name, unsigned int flags,
+				tree decl)
+{
+  /* With Binutils 2.15, the "@unwind" marker must be specified on
+     every occurrence of the ".eh_frame" section, not just the first
+     one.  */
+  if (TARGET_64BIT
+      && strcmp (name, ".eh_frame") == 0)
+    {
+      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
+	       flags & SECTION_WRITE ? "aw" : "a");
+      return;
+    }
+  default_elf_asm_named_section (name, flags, decl);
+}
+
+/* Return the mangling of TYPE if it is an extended fundamental type.  */
+
+static const char *
+ix86_mangle_type (const_tree type)
+{
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+    return NULL;
+
+  switch (TYPE_MODE (type))
+    {
+    case TFmode:
+      /* __float128 is "g".  */
+      return "g";
+    case XFmode:
+      /* "long double" or __float80 is "e".  */
+      return "e";
+    default:
+      return NULL;
+    }
+}
+
+/* For 32-bit code we can save PIC register setup by using
+   __stack_chk_fail_local hidden function instead of calling
+   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
+   register, so it is better to call __stack_chk_fail directly.  */
+
+static tree
+ix86_stack_protect_fail (void)
+{
+  return TARGET_64BIT
+	 ? default_external_stack_protect_fail ()
+	 : default_hidden_stack_protect_fail ();
+}
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   ??? All x86 object file formats are capable of representing this.
+   After all, the relocation needed is the same as for the call insn.
+   Whether or not a particular assembler allows us to enter such, I
+   guess we'll have to see.  */
+int
+asm_preferred_eh_data_format (int code, int global)
+{
+  if (flag_pic)
+    {
+      int type = DW_EH_PE_sdata8;
+      if (!TARGET_64BIT
+	  || ix86_cmodel == CM_SMALL_PIC
+	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
+	type = DW_EH_PE_sdata4;
+      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+    }
+  if (ix86_cmodel == CM_SMALL
+      || (ix86_cmodel == CM_MEDIUM && code))
+    return DW_EH_PE_udata4;
+  return DW_EH_PE_absptr;
+}
+
+/* Expand copysign from SIGN to the positive value ABS_VALUE
+   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
+   the sign-bit.  */
+static void
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
+{
+  enum machine_mode mode = GET_MODE (sign);
+  rtx sgn = gen_reg_rtx (mode);
+  if (mask == NULL_RTX)
+    {
+      enum machine_mode vmode;
+
+      if (mode == SFmode)
+	vmode = V4SFmode;
+      else if (mode == DFmode)
+	vmode = V2DFmode;
+      else
+	vmode = mode;
+
+      mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
+      if (!VECTOR_MODE_P (mode))
+	{
+	  /* We need to generate a scalar mode mask in this case.  */
+	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+	  mask = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+	}
+    }
+  else
+    mask = gen_rtx_NOT (mode, mask);
+  emit_insn (gen_rtx_SET (VOIDmode, sgn,
+			  gen_rtx_AND (mode, mask, sign)));
+  emit_insn (gen_rtx_SET (VOIDmode, result,
+			  gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result.  The
+   mask for masking out the sign-bit is stored in *SMASK, if that is
+   non-null.  */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+  enum machine_mode vmode, mode = GET_MODE (op0);
+  rtx xa, mask;
+
+  xa = gen_reg_rtx (mode);
+  if (mode == SFmode)
+    vmode = V4SFmode;
+  else if (mode == DFmode)
+    vmode = V2DFmode;
+  else
+    vmode = mode;
+  mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
+  if (!VECTOR_MODE_P (mode))
+    {
+      /* We need to generate a scalar mode mask in this case.  */
+      rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+      tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+      mask = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+    }
+  emit_insn (gen_rtx_SET (VOIDmode, xa,
+			  gen_rtx_AND (mode, op0, mask)));
+
+  if (smask)
+    *smask = mask;
+
+  return xa;
+}
+
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+   swapping the operands if SWAP_OPERANDS is true.  The expanded
+   code is a forward jump to a newly created label in case the
+   comparison is true.  The generated label rtx is returned.  */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+                                  bool swap_operands)
+{
+  rtx label, tmp;
+
+  if (swap_operands)
+    {
+      tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  label = gen_label_rtx ();
+  tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+			  gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  return label;
+}
+
+/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
+   using comparison code CODE.  Operands are swapped for the comparison if
+   SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
+static rtx
+ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
+			      bool swap_operands)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx mask = gen_reg_rtx (mode);
+
+  if (swap_operands)
+    {
+      rtx tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  if (mode == DFmode)
+    emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
+				    gen_rtx_fmt_ee (code, mode, op0, op1)));
+  else
+    emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
+				   gen_rtx_fmt_ee (code, mode, op0, op1)));
+
+  return mask;
+}
+
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+  REAL_VALUE_TYPE TWO52r;
+  rtx TWO52;
+
+  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+  TWO52 = const_double_from_real_value (TWO52r, mode);
+  TWO52 = force_reg (mode, TWO52);
+
+  return TWO52;
+}
+
+/* Expand SSE sequence for computing lround from OP1 storing
+   into OP0.  */
+void
+ix86_expand_lround (rtx op0, rtx op1)
+{
+  /* C code for the stuff we're doing below:
+       tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
+       return (long)tmp;
+   */
+  enum machine_mode mode = GET_MODE (op1);
+  const struct real_format *fmt;
+  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+  rtx adj;
+
+  /* load nextafter (0.5, 0.0) */
+  fmt = REAL_MODE_FORMAT (mode);
+  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+
+  /* adj = copysign (0.5, op1) */
+  adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
+  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
+
+  /* adj = op1 + adj */
+  adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* op0 = (imode)adj */
+  expand_fix (op0, adj, 0);
+}
+
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+  /* C code for the stuff we're doing below (for do_floor):
+	xi = (long)op1;
+        xi -= (double)xi > op1 ? 1 : 0;
+        return xi;
+   */
+  enum machine_mode fmode = GET_MODE (op1);
+  enum machine_mode imode = GET_MODE (op0);
+  rtx ireg, freg, label, tmp;
+
+  /* reg = (long)op1 */
+  ireg = gen_reg_rtx (imode);
+  expand_fix (ireg, op1, 0);
+
+  /* freg = (double)reg */
+  freg = gen_reg_rtx (fmode);
+  expand_float (freg, ireg, 0);
+
+  /* ireg = (freg > op1) ? ireg - 1 : ireg */
+  label = ix86_expand_sse_compare_and_jump (UNLE,
+					    freg, op1, !do_floor);
+  tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (ireg, tmp);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (op0, ireg);
+}
+
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+   result in OPERAND0.  */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+	xa = fabs (operand1);
+        if (!isless (xa, 2**52))
+	  return operand1;
+        xa = xa + 2**52 - 2**52;
+        return copysign (xa, operand1);
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, xa, label, TWO52, mask;
+
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  TWO52 = ix86_gen_TWO52 (mode);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+  ix86_sse_copysign_to_positive (res, xa, res, mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
+{
+  /* C code for the stuff we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        xa = xa + TWO52 - TWO52;
+        x2 = copysign (xa, x);
+     Compensate.  Floor:
+        if (x2 > x)
+          x2 -= 1;
+     Compensate.  Ceil:
+        if (x2 < x)
+          x2 -= -1;
+        return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, TWO52, tmp, label, one, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* xa = xa + TWO52 - TWO52; */
+  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+  /* xa = copysign (xa, operand1) */
+  ix86_sse_copysign_to_positive (xa, xa, res, mask);
+
+  /* generate 1.0 or -1.0 */
+  one = force_reg (mode,
+	           const_double_from_real_value (do_floor
+						 ? dconst1 : dconstm1, mode));
+
+  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  /* We always need to subtract here to preserve signed zero.  */
+  tmp = expand_simple_binop (mode, MINUS,
+			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
+{
+  /* C code for the stuff we expand below.
+	double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+	x2 = (double)(long)x;
+     Compensate.  Floor:
+	if (x2 > x)
+	  x2 -= 1;
+     Compensate.  Ceil:
+	if (x2 < x)
+	  x2 += 1;
+	if (HONOR_SIGNED_ZEROS (mode))
+	  return copysign (x2, x);
+	return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xi, TWO52, tmp, label, one, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* xa = (double)(long)x */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, res, 0);
+  expand_float (xa, xi, 0);
+
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
+			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  if (HONOR_SIGNED_ZEROS (mode))
+    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round from OPERAND1 storing
+   into OPERAND0.  Sequence that works without relying on DImode truncation
+   via cvttsd2siq that is only available on 64bit targets.  */
+void
+ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we expand below.
+        double xa = fabs (x), xa2, x2;
+        if (!isless (xa, TWO52))
+          return x;
+     Using the absolute value and copying back sign makes
+     -0.0 -> -0.0 correct.
+        xa2 = xa + TWO52 - TWO52;
+     Compensate.
+	dxa = xa2 - xa;
+        if (dxa <= -0.5)
+          xa2 += 1;
+        else if (dxa > 0.5)
+          xa2 -= 1;
+        x2 = copysign (xa2, x);
+        return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* xa2 = xa + TWO52 - TWO52; */
+  xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
+
+  /* dxa = xa2 - xa; */
+  dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* generate 0.5, 1.0 and -0.5 */
+  half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
+  one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
+  mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
+			       0, OPTAB_DIRECT);
+
+  /* Compensate.  */
+  tmp = gen_reg_rtx (mode);
+  /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+  /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
+  tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp,
+                          gen_rtx_AND (mode, one, tmp)));
+  xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* res = copysign (xa2, operand1) */
+  ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_trunc (rtx operand0, rtx operand1)
+{
+  /* C code for SSE variant we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        x2 = (double)(long)x;
+	if (HONOR_SIGNED_ZEROS (mode))
+	  return copysign (x2, x);
+	return x2;
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, xi, TWO52, label, res, mask;
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &mask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* x = (double)(long)x */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, res, 0);
+  expand_float (res, xi, 0);
+
+  if (HONOR_SIGNED_ZEROS (mode))
+    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
+{
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx xa, mask, TWO52, label, one, res, smask, tmp;
+
+  /* C code for SSE variant we expand below.
+        double xa = fabs (x), x2;
+        if (!isless (xa, TWO52))
+          return x;
+        xa2 = xa + TWO52 - TWO52;
+     Compensate:
+        if (xa2 > xa)
+          xa2 -= 1.0;
+        x2 = copysign (xa2, x);
+        return x2;
+   */
+
+  TWO52 = ix86_gen_TWO52 (mode);
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  /* xa = abs (operand1) */
+  xa = ix86_expand_sse_fabs (res, &smask);
+
+  /* if (!isless (xa, TWO52)) goto label; */
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* res = xa + TWO52 - TWO52; */
+  tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+  tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  /* generate 1.0 */
+  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+  /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
+  mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
+  emit_insn (gen_rtx_SET (VOIDmode, mask,
+                          gen_rtx_AND (mode, mask, one)));
+  tmp = expand_simple_binop (mode, MINUS,
+			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
+  emit_move_insn (res, tmp);
+
+  /* res = copysign (res, operand1) */
+  ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round from OPERAND1 storing
+   into OPERAND0.  */
+void
+ix86_expand_round (rtx operand0, rtx operand1)
+{
+  /* C code for the stuff we're doing below:
+        double xa = fabs (x);
+        if (!isless (xa, TWO52))
+          return x;
+        xa = (double)(long)(xa + nextafter (0.5, 0.0));
+        return copysign (xa, x);
+   */
+  enum machine_mode mode = GET_MODE (operand0);
+  rtx res, TWO52, xa, label, xi, half, mask;
+  const struct real_format *fmt;
+  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+
+  /* Temporary for holding the result, initialized to the input
+     operand to ease control flow.  */
+  res = gen_reg_rtx (mode);
+  emit_move_insn (res, operand1);
+
+  TWO52 = ix86_gen_TWO52 (mode);
+  xa = ix86_expand_sse_fabs (res, &mask);
+  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+  /* load nextafter (0.5, 0.0) */
+  fmt = REAL_MODE_FORMAT (mode);
+  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+
+  /* xa = xa + 0.5 */
+  half = force_reg (mode, const_double_from_real_value (pred_half, mode));
+  xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
+
+  /* xa = (double)(int64_t)xa */
+  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+  expand_fix (xi, xa, 0);
+  expand_float (xa, xi, 0);
+
+  /* res = copysign (xa, operand1) */
+  ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operand0, res);
+}
+
+
+/* Table of valid machine attributes.  */
+static const struct attribute_spec ix86_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  /* Stdcall attribute says callee is responsible for popping arguments
+     if they are not variable.  */
+  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
+  /* Fastcall attribute says callee is responsible for popping arguments
+     if they are not variable.  */
+  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
+  /* Thiscall attribute says callee is responsible for popping arguments
+     if they are not variable.  */
+  { "thiscall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
+  /* Cdecl attribute says the callee is a normal C declaration */
+  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
+  /* Regparm attribute specifies how many integer arguments are to be
+     passed in registers.  */
+  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
+  /* Sseregparm attribute says we are using x86_64 calling conventions
+     for FP arguments.  */
+  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+  /* force_align_arg_pointer says this function realigns the stack at entry.  */
+  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
+    false, true,  true, ix86_handle_cconv_attribute },
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
+  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
+  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
+#endif
+  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
+  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+  SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+  /* ms_abi and sysv_abi calling convention function attributes.  */
+  { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
+  { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
+  { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
+  { "callee_pop_aggregate_return", 1, 1, false, true, true,
+    ix86_handle_callee_pop_aggregate_return },
+  /* End element.  */
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                 tree vectype ATTRIBUTE_UNUSED,
+                                 int misalign ATTRIBUTE_UNUSED)
+{
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+        return ix86_cost->scalar_stmt_cost;
+
+      case scalar_load:
+        return ix86_cost->scalar_load_cost;
+
+      case scalar_store:
+        return ix86_cost->scalar_store_cost;
+
+      case vector_stmt:
+        return ix86_cost->vec_stmt_cost;
+
+      case vector_load:
+        return ix86_cost->vec_align_load_cost;
+
+      case vector_store:
+        return ix86_cost->vec_store_cost;
+
+      case vec_to_scalar:
+        return ix86_cost->vec_to_scalar_cost;
+
+      case scalar_to_vec:
+        return ix86_cost->scalar_to_vec_cost;
+
+      case unaligned_load:
+      case unaligned_store:
+        return ix86_cost->vec_unalign_load_cost;
+
+      case cond_branch_taken:
+        return ix86_cost->cond_taken_branch_cost;
+
+      case cond_branch_not_taken:
+        return ix86_cost->cond_not_taken_branch_cost;
+
+      case vec_perm:
+      case vec_promote_demote:
+        return ix86_cost->vec_stmt_cost;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+
+/* Implement targetm.vectorize.builtin_vec_perm.  */
+
+static tree
+ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
+{
+  tree itype = TREE_TYPE (vec_type);
+  bool u = TYPE_UNSIGNED (itype);
+  enum machine_mode vmode = TYPE_MODE (vec_type);
+  enum ix86_builtins fcode;
+  bool ok = TARGET_SSE2;
+
+  switch (vmode)
+    {
+    case V4DFmode:
+      ok = TARGET_AVX;
+      fcode = IX86_BUILTIN_VEC_PERM_V4DF;
+      goto get_di;
+    case V2DFmode:
+      fcode = IX86_BUILTIN_VEC_PERM_V2DF;
+    get_di:
+      itype = ix86_get_builtin_type (IX86_BT_DI);
+      break;
+
+    case V8SFmode:
+      ok = TARGET_AVX;
+      fcode = IX86_BUILTIN_VEC_PERM_V8SF;
+      goto get_si;
+    case V4SFmode:
+      ok = TARGET_SSE;
+      fcode = IX86_BUILTIN_VEC_PERM_V4SF;
+    get_si:
+      itype = ix86_get_builtin_type (IX86_BT_SI);
+      break;
+
+    case V2DImode:
+      fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
+      break;
+    case V4SImode:
+      fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
+      break;
+    case V8HImode:
+      fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
+      break;
+    case V16QImode:
+      fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
+      break;
+    default:
+      ok = false;
+      break;
+    }
+
+  if (!ok)
+    return NULL_TREE;
+
+  *mask_type = itype;
+  return ix86_builtins[(int) fcode];
+}
+
+/* Return a vector mode with twice as many elements as VMODE.  */
+/* ??? Consider moving this to a table generated by genmodes.c.  */
+
+static enum machine_mode
+doublesize_vector_mode (enum machine_mode vmode)
+{
+  switch (vmode)
+    {
+    case V2SFmode:	return V4SFmode;
+    case V1DImode:	return V2DImode;
+    case V2SImode:	return V4SImode;
+    case V4HImode:	return V8HImode;
+    case V8QImode:	return V16QImode;
+
+    case V2DFmode:	return V4DFmode;
+    case V4SFmode:	return V8SFmode;
+    case V2DImode:	return V4DImode;
+    case V4SImode:	return V8SImode;
+    case V8HImode:	return V16HImode;
+    case V16QImode:	return V32QImode;
+
+    case V4DFmode:	return V8DFmode;
+    case V8SFmode:	return V16SFmode;
+    case V4DImode:	return V8DImode;
+    case V8SImode:	return V16SImode;
+    case V16HImode:	return V32HImode;
+    case V32QImode:	return V64QImode;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Construct (set target (vec_select op0 (parallel perm))) and
+   return true if that's a valid instruction in the active ISA.  */
+
+static bool
+expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+{
+  rtx rperm[MAX_VECT_LEN], x;
+  unsigned i;
+
+  for (i = 0; i < nelt; ++i)
+    rperm[i] = GEN_INT (perm[i]);
+
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
+  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
+  x = gen_rtx_SET (VOIDmode, target, x);
+
+  x = emit_insn (x);
+  if (recog_memoized (x) < 0)
+    {
+      remove_insn (x);
+      return false;
+    }
+  return true;
+}
+
+/* Similar, but generate a vec_concat from op0 and op1 as well.  */
+
+static bool
+expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+			const unsigned char *perm, unsigned nelt)
+{
+  enum machine_mode v2mode;
+  rtx x;
+
+  v2mode = doublesize_vector_mode (GET_MODE (op0));
+  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+  return expand_vselect (target, x, perm, nelt);
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of blendp[sd] / pblendw / pblendvb.  */
+
+static bool
+expand_vec_perm_blend (struct expand_vec_perm_d *d)
+{
+  enum machine_mode vmode = d->vmode;
+  unsigned i, mask, nelt = d->nelt;
+  rtx target, op0, op1, x;
+
+  if (!TARGET_SSE4_1 || d->op0 == d->op1)
+    return false;
+  if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
+    return false;
+
+  /* This is a blend, not a permute.  Elements must stay in their
+     respective lanes.  */
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = d->perm[i];
+      if (!(e == i || e == i + nelt))
+	return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
+     decision should be extracted elsewhere, so that we only try that
+     sequence once all budget==3 options have been tried.  */
+
+  /* For bytes, see if bytes move in pairs so we can use pblendw with
+     an immediate argument, rather than pblendvb with a vector argument.  */
+  if (vmode == V16QImode)
+    {
+      bool pblendw_ok = true;
+      for (i = 0; i < 16 && pblendw_ok; i += 2)
+	pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
+
+      if (!pblendw_ok)
+	{
+	  rtx rperm[16], vperm;
+
+	  for (i = 0; i < nelt; ++i)
+	    rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
+
+	  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
+	  vperm = force_reg (V16QImode, vperm);
+
+	  emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
+	  return true;
+	}
+    }
+
+  target = d->target;
+  op0 = d->op0;
+  op1 = d->op1;
+  mask = 0;
+
+  switch (vmode)
+    {
+    case V4DFmode:
+    case V8SFmode:
+    case V2DFmode:
+    case V4SFmode:
+    case V8HImode:
+      for (i = 0; i < nelt; ++i)
+	mask |= (d->perm[i] >= nelt) << i;
+      break;
+
+    case V2DImode:
+      for (i = 0; i < 2; ++i)
+	mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
+      goto do_subreg;
+
+    case V4SImode:
+      for (i = 0; i < 4; ++i)
+	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
+      goto do_subreg;
+
+    case V16QImode:
+      for (i = 0; i < 8; ++i)
+	mask |= (d->perm[i * 2] >= 16) << i;
+
+    do_subreg:
+      vmode = V8HImode;
+      target = gen_lowpart (vmode, target);
+      op0 = gen_lowpart (vmode, op0);
+      op1 = gen_lowpart (vmode, op1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* This matches five different patterns with the different modes.  */
+  x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
+  x = gen_rtx_SET (VOIDmode, target, x);
+  emit_insn (x);
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of the variable form of vpermilps.
+
+   Note that we will have already failed the immediate input vpermilps,
+   which requires that the high and low part shuffle be identical; the
+   variable form doesn't require that.  */
+
+static bool
+expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
+{
+  rtx rperm[8], vperm;
+  unsigned i;
+
+  if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
+    return false;
+
+  /* We can only permute within the 128-bit lane.  */
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned e = d->perm[i];
+      if (i < 4 ? e >= 4 : e < 4)
+	return false;
+    }
+
+  if (d->testing_p)
+    return true;
+
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned e = d->perm[i];
+
+      /* Within each 128-bit lane, the elements of op0 are numbered
+	 from 0 and the elements of op1 are numbered from 4.  */
+      if (e >= 8 + 4)
+	e -= 8;
+      else if (e >= 4)
+	e -= 4;
+
+      rperm[i] = GEN_INT (e);
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
+  vperm = force_reg (V8SImode, vperm);
+  emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of pshufb or vpperm.  */
+
+static bool
+expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt, eltsz;
+  rtx rperm[16], vperm, target, op0, op1;
+
+  if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
+    return false;
+  if (GET_MODE_SIZE (d->vmode) != 16)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  nelt = d->nelt;
+  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned j, e = d->perm[i];
+      for (j = 0; j < eltsz; ++j)
+	rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
+  vperm = force_reg (V16QImode, vperm);
+
+  target = gen_lowpart (V16QImode, d->target);
+  op0 = gen_lowpart (V16QImode, d->op0);
+  if (d->op0 == d->op1)
+    emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
+  else
+    {
+      op1 = gen_lowpart (V16QImode, d->op1);
+      emit_insn (gen_xop_pperm (target, op0, op1, vperm));
+    }
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
+   in a single instruction.  */
+
+static bool
+expand_vec_perm_1 (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+  unsigned char perm2[MAX_VECT_LEN];
+
+  /* Check plain VEC_SELECT first, because AVX has instructions that could
+     match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
+     input where SEL+CONCAT may not.  */
+  if (d->op0 == d->op1)
+    {
+      int mask = nelt - 1;
+
+      for (i = 0; i < nelt; i++)
+	perm2[i] = d->perm[i] & mask;
+
+      if (expand_vselect (d->target, d->op0, perm2, nelt))
+	return true;
+
+      /* There are plenty of patterns in sse.md that are written for
+	 SEL+CONCAT and are not replicated for a single op.  Perhaps
+	 that should be changed, to avoid the nastiness here.  */
+
+      /* Recognize interleave style patterns, which means incrementing
+	 every other permutation operand.  */
+      for (i = 0; i < nelt; i += 2)
+	{
+	  perm2[i] = d->perm[i] & mask;
+	  perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
+	}
+      if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
+	return true;
+
+      /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
+      if (nelt >= 4)
+	{
+	  for (i = 0; i < nelt; i += 4)
+	    {
+	      perm2[i + 0] = d->perm[i + 0] & mask;
+	      perm2[i + 1] = d->perm[i + 1] & mask;
+	      perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
+	      perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
+	    }
+
+	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
+	    return true;
+	}
+    }
+
+  /* Finally, try the fully general two operand permute.  */
+  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+    return true;
+
+  /* Recognize interleave style patterns with reversed operands.  */
+  if (d->op0 != d->op1)
+    {
+      for (i = 0; i < nelt; ++i)
+	{
+	  unsigned e = d->perm[i];
+	  if (e >= nelt)
+	    e -= nelt;
+	  else
+	    e += nelt;
+	  perm2[i] = e;
+	}
+
+      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+	return true;
+    }
+
+  /* Try the SSE4.1 blend variable merge instructions.  */
+  if (expand_vec_perm_blend (d))
+    return true;
+
+  /* Try one of the AVX vpermil variable permutations.  */
+  if (expand_vec_perm_vpermil (d))
+    return true;
+
+  /* Try the SSSE3 pshufb or XOP vpperm variable permutation.  */
+  if (expand_vec_perm_pshufb (d))
+    return true;
+
+  return false;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
+   in terms of a pair of pshuflw + pshufhw instructions.  */
+
+static bool
+expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
+{
+  unsigned char perm2[MAX_VECT_LEN];
+  unsigned i;
+  bool ok;
+
+  if (d->vmode != V8HImode || d->op0 != d->op1)
+    return false;
+
+  /* The two permutations only operate in 64-bit lanes.  */
+  for (i = 0; i < 4; ++i)
+    if (d->perm[i] >= 4)
+      return false;
+  for (i = 4; i < 8; ++i)
+    if (d->perm[i] < 4)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  /* Emit the pshuflw.  */
+  memcpy (perm2, d->perm, 4);
+  for (i = 4; i < 8; ++i)
+    perm2[i] = i;
+  ok = expand_vselect (d->target, d->op0, perm2, 8);
+  gcc_assert (ok);
+
+  /* Emit the pshufhw.  */
+  memcpy (perm2 + 4, d->perm + 4, 4);
+  for (i = 0; i < 4; ++i)
+    perm2[i] = i;
+  ok = expand_vselect (d->target, d->target, perm2, 8);
+  gcc_assert (ok);
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   the permutation using the SSSE3 palignr instruction.  This succeeds
+   when all of the elements in PERM fit within one vector and we merely
+   need to shift them down so that a single vector permutation has a
+   chance to succeed.  */
+
+static bool
+expand_vec_perm_palignr (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt = d->nelt;
+  unsigned min, max;
+  bool in_order, ok;
+  rtx shift;
+
+  /* Even with AVX, palignr only operates on 128-bit vectors.  */
+  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+    return false;
+
+  min = nelt, max = 0;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = d->perm[i];
+      if (e < min)
+	min = e;
+      if (e > max)
+	max = e;
+    }
+  if (min == 0 || max - min >= nelt)
+    return false;
+
+  /* Given that we have SSSE3, we know we'll be able to implement the
+     single operand permutation after the palignr with pshufb.  */
+  if (d->testing_p)
+    return true;
+
+  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+  emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
+				  gen_lowpart (TImode, d->op1),
+				  gen_lowpart (TImode, d->op0), shift));
+
+  d->op0 = d->op1 = d->target;
+
+  in_order = true;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = d->perm[i] - min;
+      if (e != i)
+	in_order = false;
+      d->perm[i] = e;
+    }
+
+  /* Test for the degenerate case where the alignment by itself
+     produces the desired permutation.  */
+  if (in_order)
+    return true;
+
+  ok = expand_vec_perm_1 (d);
+  gcc_assert (ok);
+
+  return ok;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   a two vector permutation into a single vector permutation by using
+   an interleave operation to merge the vectors.  */
+
+static bool
+expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
+{
+  struct expand_vec_perm_d dremap, dfinal;
+  unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
+  unsigned contents, h1, h2, h3, h4;
+  unsigned char remap[2 * MAX_VECT_LEN];
+  rtx seq;
+  bool ok;
+
+  if (d->op0 == d->op1)
+    return false;
+
+  /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
+     lanes.  We can use similar techniques with the vperm2f128 instruction,
+     but it requires slightly different logic.  */
+  if (GET_MODE_SIZE (d->vmode) != 16)
+    return false;
+
+  /* Examine from whence the elements come.  */
+  contents = 0;
+  for (i = 0; i < nelt; ++i)
+    contents |= 1u << d->perm[i];
+
+  /* Split the two input vectors into 4 halves.  */
+  h1 = (1u << nelt2) - 1;
+  h2 = h1 << nelt2;
+  h3 = h2 << nelt2;
+  h4 = h3 << nelt2;
+
+  memset (remap, 0xff, sizeof (remap));
+  dremap = *d;
+
+  /* If the elements from the low halves use interleave low, and similarly
+     for interleave high.  If the elements are from mis-matched halves, we
+     can use shufps for V4SF/V4SI or do a DImode shuffle.  */
+  if ((contents & (h1 | h3)) == contents)
+    {
+      for (i = 0; i < nelt2; ++i)
+	{
+	  remap[i] = i * 2;
+	  remap[i + nelt] = i * 2 + 1;
+	  dremap.perm[i * 2] = i;
+	  dremap.perm[i * 2 + 1] = i + nelt;
+	}
+    }
+  else if ((contents & (h2 | h4)) == contents)
+    {
+      for (i = 0; i < nelt2; ++i)
+	{
+	  remap[i + nelt2] = i * 2;
+	  remap[i + nelt + nelt2] = i * 2 + 1;
+	  dremap.perm[i * 2] = i + nelt2;
+	  dremap.perm[i * 2 + 1] = i + nelt + nelt2;
+	}
+    }
+  else if ((contents & (h1 | h4)) == contents)
+    {
+      for (i = 0; i < nelt2; ++i)
+	{
+	  remap[i] = i;
+	  remap[i + nelt + nelt2] = i + nelt2;
+	  dremap.perm[i] = i;
+	  dremap.perm[i + nelt2] = i + nelt + nelt2;
+	}
+      if (nelt != 4)
+	{
+	  dremap.vmode = V2DImode;
+	  dremap.nelt = 2;
+	  dremap.perm[0] = 0;
+	  dremap.perm[1] = 3;
+	}
+    }
+  else if ((contents & (h2 | h3)) == contents)
+    {
+      for (i = 0; i < nelt2; ++i)
+	{
+	  remap[i + nelt2] = i;
+	  remap[i + nelt] = i + nelt2;
+	  dremap.perm[i] = i + nelt2;
+	  dremap.perm[i + nelt2] = i + nelt;
+	}
+      if (nelt != 4)
+	{
+	  dremap.vmode = V2DImode;
+	  dremap.nelt = 2;
+	  dremap.perm[0] = 1;
+	  dremap.perm[1] = 2;
+	}
+    }
+  else
+    return false;
+
+  /* Use the remapping array set up above to move the elements from their
+     swizzled locations into their final destinations.  */
+  dfinal = *d;
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned e = remap[d->perm[i]];
+      gcc_assert (e < nelt);
+      dfinal.perm[i] = e;
+    }
+  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+  dfinal.op1 = dfinal.op0;
+  dremap.target = dfinal.op0;
+
+  /* Test if the final remap can be done with a single insn.  For V4SFmode or
+     V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
+  start_sequence ();
+  ok = expand_vec_perm_1 (&dfinal);
+  seq = get_insns ();
+  end_sequence ();
+
+  if (!ok)
+    return false;
+
+  if (dremap.vmode != dfinal.vmode)
+    {
+      dremap.target = gen_lowpart (dremap.vmode, dremap.target);
+      dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
+      dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
+    }
+
+  ok = expand_vec_perm_1 (&dremap);
+  gcc_assert (ok);
+
+  emit_insn (seq);
+  return true;
+}
+
+/* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
+   permutation with two pshufb insns and an ior.  We should have already
+   failed all two instruction sequences.  */
+
+static bool
+expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
+{
+  rtx rperm[2][16], vperm, l, h, op, m128;
+  unsigned int i, nelt, eltsz;
+
+  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+    return false;
+  gcc_assert (d->op0 != d->op1);
+
+  nelt = d->nelt;
+  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
+
+  /* Generate two permutation masks.  If the required element is within
+     the given vector it is shuffled into the proper lane.  If the required
+     element is in the other vector, force a zero into the lane by setting
+     bit 7 in the permutation mask.  */
+  m128 = GEN_INT (-128);
+  for (i = 0; i < nelt; ++i)
+    {
+      unsigned j, e = d->perm[i];
+      unsigned which = (e >= nelt);
+      if (e >= nelt)
+	e -= nelt;
+
+      for (j = 0; j < eltsz; ++j)
+	{
+	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
+	  rperm[1-which][i*eltsz + j] = m128;
+	}
+    }
+
+  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
+  vperm = force_reg (V16QImode, vperm);
+
+  l = gen_reg_rtx (V16QImode);
+  op = gen_lowpart (V16QImode, d->op0);
+  emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
+
+  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
+  vperm = force_reg (V16QImode, vperm);
+
+  h = gen_reg_rtx (V16QImode);
+  op = gen_lowpart (V16QImode, d->op1);
+  emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
+
+  op = gen_lowpart (V16QImode, d->target);
+  emit_insn (gen_iorv16qi3 (op, l, h));
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
+   and extract-odd permutations.  */
+
+static bool
+expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
+{
+  rtx t1, t2, t3;
+
+  switch (d->vmode)
+    {
+    case V4DFmode:
+      t1 = gen_reg_rtx (V4DFmode);
+      t2 = gen_reg_rtx (V4DFmode);
+
+      /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
+      emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
+      emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
+
+      /* Now an unpck[lh]pd will produce the result required.  */
+      if (odd)
+	t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
+      else
+	t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
+      emit_insn (t3);
+      break;
+
+    case V8SFmode:
+      {
+	int mask = odd ? 0xdd : 0x88;
+
+	t1 = gen_reg_rtx (V8SFmode);
+	t2 = gen_reg_rtx (V8SFmode);
+	t3 = gen_reg_rtx (V8SFmode);
+
+	/* Shuffle within the 128-bit lanes to produce:
+	   { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }.  */
+	emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
+				      GEN_INT (mask)));
+
+	/* Shuffle the lanes around to produce:
+	   { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }.  */
+	emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
+					    GEN_INT (0x3)));
+
+	/* Shuffle within the 128-bit lanes to produce:
+	   { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }.  */
+	emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
+
+	/* Shuffle within the 128-bit lanes to produce:
+	   { 8 a c e c e 8 a } | { 9 b d f d f 9 b }.  */
+	emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
+
+	/* Shuffle the lanes around to produce:
+	   { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }.  */
+	emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
+					    GEN_INT (0x20)));
+      }
+      break;
+
+    case V2DFmode:
+    case V4SFmode:
+    case V2DImode:
+    case V4SImode:
+      /* These are always directly implementable by expand_vec_perm_1.  */
+      gcc_unreachable ();
+
+    case V8HImode:
+      if (TARGET_SSSE3)
+	return expand_vec_perm_pshufb2 (d);
+      else
+	{
+	  /* We need 2*log2(N)-1 operations to achieve odd/even
+	     with interleave. */
+	  t1 = gen_reg_rtx (V8HImode);
+	  t2 = gen_reg_rtx (V8HImode);
+	  emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
+	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
+	  if (odd)
+	    t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
+	  else
+	    t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
+	  emit_insn (t3);
+	}
+      break;
+
+    case V16QImode:
+      if (TARGET_SSSE3)
+	return expand_vec_perm_pshufb2 (d);
+      else
+	{
+	  t1 = gen_reg_rtx (V16QImode);
+	  t2 = gen_reg_rtx (V16QImode);
+	  t3 = gen_reg_rtx (V16QImode);
+	  emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
+	  emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
+	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
+	  emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
+	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
+	  if (odd)
+	    t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
+	  else
+	    t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
+	  emit_insn (t3);
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return true;
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
+   extract-even and extract-odd permutations.  */
+
+static bool
+expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
+{
+  unsigned i, odd, nelt = d->nelt;
+
+  odd = d->perm[0];
+  if (odd != 0 && odd != 1)
+    return false;
+
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != 2 * i + odd)
+      return false;
+
+  return expand_vec_perm_even_odd_1 (d, odd);
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
+   permutations.  We assume that expand_vec_perm_1 has already failed.  */
+
+static bool
+expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
+{
+  unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
+  enum machine_mode vmode = d->vmode;
+  unsigned char perm2[4];
+  rtx op0 = d->op0;
+  bool ok;
+
+  switch (vmode)
+    {
+    case V4DFmode:
+    case V8SFmode:
+      /* These are special-cased in sse.md so that we can optionally
+	 use the vbroadcast instruction.  They expand to two insns
+	 if the input happens to be in a register.  */
+      gcc_unreachable ();
+
+    case V2DFmode:
+    case V2DImode:
+    case V4SFmode:
+    case V4SImode:
+      /* These are always implementable using standard shuffle patterns.  */
+      gcc_unreachable ();
+
+    case V8HImode:
+    case V16QImode:
+      /* These can be implemented via interleave.  We save one insn by
+	 stopping once we have promoted to V4SImode and then use pshufd.  */
+      do
+	{
+	  optab otab = vec_interleave_low_optab;
+
+	  if (elt >= nelt2)
+	    {
+	      otab = vec_interleave_high_optab;
+	      elt -= nelt2;
+	    }
+	  nelt2 /= 2;
+
+	  op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
+	  vmode = get_mode_wider_vector (vmode);
+	  op0 = gen_lowpart (vmode, op0);
+	}
+      while (vmode != V4SImode);
+
+      memset (perm2, elt, 4);
+      ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
+      gcc_assert (ok);
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
+   broadcast permutations.  */
+
+static bool
+expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
+{
+  unsigned i, elt, nelt = d->nelt;
+
+  if (d->op0 != d->op1)
+    return false;
+
+  elt = d->perm[0];
+  for (i = 1; i < nelt; ++i)
+    if (d->perm[i] != elt)
+      return false;
+
+  return expand_vec_perm_broadcast_1 (d);
+}
+
+/* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
+   With all of the interface bits taken care of, perform the expansion
+   in D and return true on success.  */
+
+static bool
+ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
+{
+  /* Try a single instruction expansion.  */
+  if (expand_vec_perm_1 (d))
+    return true;
+
+  /* Try sequences of two instructions.  */
+
+  if (expand_vec_perm_pshuflw_pshufhw (d))
+    return true;
+
+  if (expand_vec_perm_palignr (d))
+    return true;
+
+  if (expand_vec_perm_interleave2 (d))
+    return true;
+
+  if (expand_vec_perm_broadcast (d))
+    return true;
+
+  /* Try sequences of three instructions.  */
+
+  if (expand_vec_perm_pshufb2 (d))
+    return true;
+
+  /* ??? Look for narrow permutations whose element orderings would
+     allow the promotion to a wider mode.  */
+
+  /* ??? Look for sequences of interleave or a wider permute that place
+     the data into the correct lanes for a half-vector shuffle like
+     pshuf[lh]w or vpermilps.  */
+
+  /* ??? Look for sequences of interleave that produce the desired results.
+     The combinatorics of punpck[lh] get pretty ugly... */
+
+  if (expand_vec_perm_even_odd (d))
+    return true;
+
+  return false;
+}
+
+/* Extract the values from the vector CST into the permutation array in D.
+   Return 0 on error, 1 if all values from the permutation come from the
+   first vector, 2 if all values from the second vector, and 3 otherwise.  */
+
+static int
+extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
+{
+  tree list = TREE_VECTOR_CST_ELTS (cst);
+  unsigned i, nelt = d->nelt;
+  int ret = 0;
+
+  for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
+    {
+      unsigned HOST_WIDE_INT e;
+
+      if (!host_integerp (TREE_VALUE (list), 1))
+	return 0;
+      e = tree_low_cst (TREE_VALUE (list), 1);
+      if (e >= 2 * nelt)
+	return 0;
+
+      ret |= (e < nelt ? 1 : 2);
+      d->perm[i] = e;
+    }
+  gcc_assert (list == NULL);
+
+  /* For all elements from second vector, fold the elements to first.  */
+  if (ret == 2)
+    for (i = 0; i < nelt; ++i)
+      d->perm[i] -= nelt;
+
+  return ret;
+}
+
+static rtx
+ix86_expand_vec_perm_builtin (tree exp)
+{
+  struct expand_vec_perm_d d;
+  tree arg0, arg1, arg2;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  d.vmode = TYPE_MODE (TREE_TYPE (arg0));
+  d.nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+  gcc_assert (VECTOR_MODE_P (d.vmode));
+
+  if (TREE_CODE (arg2) != VECTOR_CST)
+    {
+      error_at (EXPR_LOCATION (exp),
+		"vector permutation requires vector constant");
+      goto exit_error;
+    }
+
+  switch (extract_vec_perm_cst (&d, arg2))
+    {
+    default:
+      gcc_unreachable();
+
+    case 0:
+      error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
+      goto exit_error;
+
+    case 3:
+      if (!operand_equal_p (arg0, arg1, 0))
+	{
+	  d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
+	  d.op0 = force_reg (d.vmode, d.op0);
+	  d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
+	  d.op1 = force_reg (d.vmode, d.op1);
+	  break;
+	}
+
+      /* The elements of PERM do not suggest that only the first operand
+	 is used, but both operands are identical.  Allow easier matching
+	 of the permutation by folding the permutation into the single
+	 input vector.  */
+      {
+	unsigned i, nelt = d.nelt;
+	for (i = 0; i < nelt; ++i)
+	  if (d.perm[i] >= nelt)
+	    d.perm[i] -= nelt;
+      }
+      /* FALLTHRU */
+
+    case 1:
+      d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
+      d.op0 = force_reg (d.vmode, d.op0);
+      d.op1 = d.op0;
+      break;
+
+    case 2:
+      d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
+      d.op0 = force_reg (d.vmode, d.op0);
+      d.op1 = d.op0;
+      break;
+    }
+
+  d.target = gen_reg_rtx (d.vmode);
+  if (ix86_expand_vec_perm_builtin_1 (&d))
+    return d.target;
+
+  /* For compiler generated permutations, we should never got here, because
+     the compiler should also be checking the ok hook.  But since this is a
+     builtin the user has access too, so don't abort.  */
+  switch (d.nelt)
+    {
+    case 2:
+      sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
+      break;
+    case 4:
+      sorry ("vector permutation (%d %d %d %d)",
+	     d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
+      break;
+    case 8:
+      sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
+	     d.perm[0], d.perm[1], d.perm[2], d.perm[3],
+	     d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
+      break;
+    case 16:
+      sorry ("vector permutation "
+	     "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
+	     d.perm[0], d.perm[1], d.perm[2], d.perm[3],
+	     d.perm[4], d.perm[5], d.perm[6], d.perm[7],
+	     d.perm[8], d.perm[9], d.perm[10], d.perm[11],
+	     d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+ exit_error:
+  return CONST0_RTX (d.vmode);
+}
+
+/* Implement targetm.vectorize.builtin_vec_perm_ok.  */
+
+static bool
+ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
+{
+  struct expand_vec_perm_d d;
+  int vec_mask;
+  bool ret, one_vec;
+
+  d.vmode = TYPE_MODE (vec_type);
+  d.nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = true;
+
+  /* Given sufficient ISA support we can just return true here
+     for selected vector modes.  */
+  if (GET_MODE_SIZE (d.vmode) == 16)
+    {
+      /* All implementable with a single vpperm insn.  */
+      if (TARGET_XOP)
+	return true;
+      /* All implementable with 2 pshufb + 1 ior.  */
+      if (TARGET_SSSE3)
+	return true;
+      /* All implementable with shufpd or unpck[lh]pd.  */
+      if (d.nelt == 2)
+	return true;
+    }
+
+  vec_mask = extract_vec_perm_cst (&d, mask);
+
+  /* This hook is cannot be called in response to something that the
+     user does (unlike the builtin expander) so we shouldn't ever see
+     an error generated from the extract.  */
+  gcc_assert (vec_mask > 0 && vec_mask <= 3);
+  one_vec = (vec_mask != 3);
+
+  /* Implementable with shufps or pshufd.  */
+  if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
+    return true;
+
+  /* Otherwise we have to go through the motions and see if we can
+     figure out how to generate the requested permutation.  */
+  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  if (!one_vec)
+    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+  start_sequence ();
+  ret = ix86_expand_vec_perm_builtin_1 (&d);
+  end_sequence ();
+
+  return ret;
+}
+
+void
+ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
+{
+  struct expand_vec_perm_d d;
+  unsigned i, nelt;
+
+  d.target = targ;
+  d.op0 = op0;
+  d.op1 = op1;
+  d.vmode = GET_MODE (targ);
+  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+  d.testing_p = false;
+
+  for (i = 0; i < nelt; ++i)
+    d.perm[i] = i * 2 + odd;
+
+  /* We'll either be able to implement the permutation directly...  */
+  if (expand_vec_perm_1 (&d))
+    return;
+
+  /* ... or we use the special-case patterns.  */
+  expand_vec_perm_even_odd_1 (&d, odd);
+}
+
+/* This function returns the calling abi specific va_list type node.
+   It returns  the FNDECL specific va_list type.  */
+
+static tree
+ix86_fn_abi_va_list (tree fndecl)
+{
+  if (!TARGET_64BIT)
+    return va_list_type_node;
+  gcc_assert (fndecl != NULL_TREE);
+
+  if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
+    return ms_va_list_type_node;
+  else
+    return sysv_va_list_type_node;
+}
+
+/* Returns the canonical va_list type specified by TYPE. If there
+   is no valid TYPE provided, it return NULL_TREE.  */
+
+static tree
+ix86_canonical_va_list_type (tree type)
+{
+  tree wtype, htype;
+
+  /* Resolve references and pointers to va_list type.  */
+  if (TREE_CODE (type) == MEM_REF)
+    type = TREE_TYPE (type);
+  else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
+    type = TREE_TYPE (type);
+  else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
+    type = TREE_TYPE (type);
+
+  if (TARGET_64BIT && va_list_type_node != NULL_TREE)
+    {
+      wtype = va_list_type_node;
+	  gcc_assert (wtype != NULL_TREE);
+      htype = type;
+      if (TREE_CODE (wtype) == ARRAY_TYPE)
+	{
+	  /* If va_list is an array type, the argument may have decayed
+	     to a pointer type, e.g. by being passed to another function.
+	     In that case, unwrap both types so that we can compare the
+	     underlying records.  */
+	  if (TREE_CODE (htype) == ARRAY_TYPE
+	      || POINTER_TYPE_P (htype))
+	    {
+	      wtype = TREE_TYPE (wtype);
+	      htype = TREE_TYPE (htype);
+	    }
+	}
+      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+	return va_list_type_node;
+      wtype = sysv_va_list_type_node;
+	  gcc_assert (wtype != NULL_TREE);
+      htype = type;
+      if (TREE_CODE (wtype) == ARRAY_TYPE)
+	{
+	  /* If va_list is an array type, the argument may have decayed
+	     to a pointer type, e.g. by being passed to another function.
+	     In that case, unwrap both types so that we can compare the
+	     underlying records.  */
+	  if (TREE_CODE (htype) == ARRAY_TYPE
+	      || POINTER_TYPE_P (htype))
+	    {
+	      wtype = TREE_TYPE (wtype);
+	      htype = TREE_TYPE (htype);
+	    }
+	}
+      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+	return sysv_va_list_type_node;
+      wtype = ms_va_list_type_node;
+	  gcc_assert (wtype != NULL_TREE);
+      htype = type;
+      if (TREE_CODE (wtype) == ARRAY_TYPE)
+	{
+	  /* If va_list is an array type, the argument may have decayed
+	     to a pointer type, e.g. by being passed to another function.
+	     In that case, unwrap both types so that we can compare the
+	     underlying records.  */
+	  if (TREE_CODE (htype) == ARRAY_TYPE
+	      || POINTER_TYPE_P (htype))
+	    {
+	      wtype = TREE_TYPE (wtype);
+	      htype = TREE_TYPE (htype);
+	    }
+	}
+      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+	return ms_va_list_type_node;
+      return NULL_TREE;
+    }
+  return std_canonical_va_list_type (type);
+}
+
+/* Iterate through the target-specific builtin types for va_list.
+   IDX denotes the iterator, *PTREE is set to the result type of
+   the va_list builtin, and *PNAME to its internal type.
+   Returns zero if there is no element for this index, otherwise
+   IDX should be increased upon the next call.
+   Note, do not iterate a base builtin's name like __builtin_va_list.
+   Used from c_common_nodes_and_builtins.  */
+
+static int
+ix86_enum_va_list (int idx, const char **pname, tree *ptree)
+{
+  if (TARGET_64BIT)
+    {
+      switch (idx)
+	{
+	default:
+	  break;
+
+	case 0:
+	  *ptree = ms_va_list_type_node;
+	  *pname = "__builtin_ms_va_list";
+	  return 1;
+
+	case 1:
+	  *ptree = sysv_va_list_type_node;
+	  *pname = "__builtin_sysv_va_list";
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
+#undef TARGET_SCHED_DISPATCH
+#define TARGET_SCHED_DISPATCH has_dispatch
+#undef TARGET_SCHED_DISPATCH_DO
+#define TARGET_SCHED_DISPATCH_DO do_dispatch
+
+/* The size of the dispatch window is the total number of bytes of
+   object code allowed in a window.  */
+#define DISPATCH_WINDOW_SIZE 16
+
+/* Number of dispatch windows considered for scheduling.  */
+#define MAX_DISPATCH_WINDOWS 3
+
+/* Maximum number of instructions in a window.  */
+#define MAX_INSN 4
+
+/* Maximum number of immediate operands in a window.  */
+#define MAX_IMM 4
+
+/* Maximum number of immediate bits allowed in a window.  */
+#define MAX_IMM_SIZE 128
+
+/* Maximum number of 32 bit immediates allowed in a window.  */
+#define MAX_IMM_32 4
+
+/* Maximum number of 64 bit immediates allowed in a window.  */
+#define MAX_IMM_64 2
+
+/* Maximum total of loads or prefetches allowed in a window.  */
+#define MAX_LOAD 2
+
+/* Maximum total of stores allowed in a window.  */
+#define MAX_STORE 1
+
+#undef BIG
+#define BIG 100
+
+
+/* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
+enum dispatch_group {
+  disp_no_group = 0,
+  disp_load,
+  disp_store,
+  disp_load_store,
+  disp_prefetch,
+  disp_imm,
+  disp_imm_32,
+  disp_imm_64,
+  disp_branch,
+  disp_cmp,
+  disp_jcc,
+  disp_last
+};
+
+/* Number of allowable groups in a dispatch window.  It is an array
+   indexed by dispatch_group enum.  100 is used as a big number,
+   because the number of these kind of operations does not have any
+   effect in dispatch window, but we need them for other reasons in
+   the table.  */
+static unsigned int num_allowable_groups[disp_last] = {
+  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};
+
+char group_name[disp_last + 1][16] = {
+  "disp_no_group", "disp_load", "disp_store", "disp_load_store",
+  "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
+  "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
+};
+
+/* Instruction path.  */
+enum insn_path {
+  no_path = 0,
+  path_single, /* Single micro op.  */
+  path_double, /* Double micro op.  */
+  path_multi,  /* Instructions with more than 2 micro op..  */
+  last_path
+};
+
+/* sched_insn_info defines a window to the instructions scheduled in
+   the basic block.  It contains a pointer to the insn_info table and
+   the instruction scheduled.
+
+   Windows are allocated for each basic block and are linked
+   together.  */
+typedef struct sched_insn_info_s {
+  rtx insn;
+  enum dispatch_group group;
+  enum insn_path path;
+  int byte_len;
+  int imm_bytes;
+} sched_insn_info;
+
+/* Linked list of dispatch windows.  This is a two way list of
+   dispatch windows of a basic block.  It contains information about
+   the number of uops in the window and the total number of
+   instructions and of bytes in the object code for this dispatch
+   window.  */
+typedef struct dispatch_windows_s {
+  int num_insn;            /* Number of insn in the window.  */
+  int num_uops;            /* Number of uops in the window.  */
+  int window_size;         /* Number of bytes in the window.  */
+  int window_num;          /* Window number between 0 or 1.  */
+  int num_imm;             /* Number of immediates in an insn.  */
+  int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
+  int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
+  int imm_size;            /* Total immediates in the window.  */
+  int num_loads;           /* Total memory loads in the window.  */
+  int num_stores;          /* Total memory stores in the window.  */
+  int violation;          /* Violation exists in window.  */
+  sched_insn_info *window; /* Pointer to the window.  */
+  struct dispatch_windows_s *next;
+  struct dispatch_windows_s *prev;
+} dispatch_windows;
+
+/* Immediate valuse used in an insn.  */
+typedef struct imm_info_s
+  {
+    int imm;
+    int imm32;
+    int imm64;
+  } imm_info;
+
+static dispatch_windows *dispatch_window_list;
+static dispatch_windows *dispatch_window_list1;
+
+/* Get dispatch group of insn.  */
+
+static enum dispatch_group
+get_mem_group (rtx insn)
+{
+  enum attr_memory memory;
+
+  if (INSN_CODE (insn) < 0)
+    return disp_no_group;
+  memory = get_attr_memory (insn);
+  if (memory == MEMORY_STORE)
+    return disp_store;
+
+  if (memory == MEMORY_LOAD)
+    return disp_load;
+
+  if (memory == MEMORY_BOTH)
+    return disp_load_store;
+
+  return disp_no_group;
+}
+
+/* Return true if insn is a compare instruction.  */
+
+static bool
+is_cmp (rtx insn)
+{
+  enum attr_type type;
+
+  type = get_attr_type (insn);
+  return (type == TYPE_TEST
+	  || type == TYPE_ICMP
+	  || type == TYPE_FCMP
+	  || GET_CODE (PATTERN (insn)) == COMPARE);
+}
+
+/* Return true if a dispatch violation encountered.  */
+
+static bool
+dispatch_violation (void)
+{
+  if (dispatch_window_list->next)
+    return dispatch_window_list->next->violation;
+  return dispatch_window_list->violation;
+}
+
+/* Return true if insn is a branch instruction.  */
+
+static bool
+is_branch (rtx insn)
+{
+  return (CALL_P (insn) || JUMP_P (insn));
+}
+
+/* Return true if insn is a prefetch instruction.  */
+
+static bool
+is_prefetch (rtx insn)
+{
+  return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
+}
+
+/* This function initializes a dispatch window and the list container holding a
+   pointer to the window.  */
+
+static void
+init_window (int window_num)
+{
+  int i;
+  dispatch_windows *new_list;
+
+  if (window_num == 0)
+    new_list = dispatch_window_list;
+  else
+    new_list = dispatch_window_list1;
+
+  new_list->num_insn = 0;
+  new_list->num_uops = 0;
+  new_list->window_size = 0;
+  new_list->next = NULL;
+  new_list->prev = NULL;
+  new_list->window_num = window_num;
+  new_list->num_imm = 0;
+  new_list->num_imm_32 = 0;
+  new_list->num_imm_64 = 0;
+  new_list->imm_size = 0;
+  new_list->num_loads = 0;
+  new_list->num_stores = 0;
+  new_list->violation = false;
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      new_list->window[i].insn = NULL;
+      new_list->window[i].group = disp_no_group;
+      new_list->window[i].path = no_path;
+      new_list->window[i].byte_len = 0;
+      new_list->window[i].imm_bytes = 0;
+    }
+  return;
+}
+
+/* This function allocates and initializes a dispatch window and the
+   list container holding a pointer to the window.  */
+
+static dispatch_windows *
+allocate_window (void)
+{
+  dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
+  new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
+
+  return new_list;
+}
+
+/* This routine initializes the dispatch scheduling information.  It
+   initiates building dispatch scheduler tables and constructs the
+   first dispatch window.  */
+
+static void
+init_dispatch_sched (void)
+{
+  /* Allocate a dispatch list and a window.  */
+  dispatch_window_list = allocate_window ();
+  dispatch_window_list1 = allocate_window ();
+  init_window (0);
+  init_window (1);
+}
+
+/* This function returns true if a branch is detected.  End of a basic block
+   does not have to be a branch, but here we assume only branches end a
+   window.  */
+
+static bool
+is_end_basic_block (enum dispatch_group group)
+{
+  return group == disp_branch;
+}
+
+/* This function is called when the end of a window processing is reached.  */
+
+static void
+process_end_window (void)
+{
+  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+  if (dispatch_window_list->next)
+    {
+      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+      gcc_assert (dispatch_window_list->window_size
+		  + dispatch_window_list1->window_size <= 48);
+      init_window (1);
+    }
+  init_window (0);
+}
+
+/* Allocates a new dispatch window and adds it to WINDOW_LIST.
+   WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
+   for 48 bytes of instructions.  Note that these windows are not dispatch
+   windows that their sizes are DISPATCH_WINDOW_SIZE.  */
+
+static dispatch_windows *
+allocate_next_window (int window_num)
+{
+  if (window_num == 0)
+    {
+      if (dispatch_window_list->next)
+	  init_window (1);
+      init_window (0);
+      return dispatch_window_list;
+    }
+
+  dispatch_window_list->next = dispatch_window_list1;
+  dispatch_window_list1->prev = dispatch_window_list;
+
+  return dispatch_window_list1;
+}
+
+/* Increment the number of immediate operands of an instruction.  */
+
+static int
+find_constant_1 (rtx *in_rtx, imm_info *imm_values)
+{
+  if (*in_rtx == 0)
+    return 0;
+
+    switch ( GET_CODE (*in_rtx))
+    {
+    case CONST:
+    case SYMBOL_REF:
+    case CONST_INT:
+      (imm_values->imm)++;
+      if (x86_64_immediate_operand (*in_rtx, SImode))
+	(imm_values->imm32)++;
+      else
+	(imm_values->imm64)++;
+      break;
+
+    case CONST_DOUBLE:
+      (imm_values->imm)++;
+      (imm_values->imm64)++;
+      break;
+
+    case CODE_LABEL:
+      if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
+	{
+	  (imm_values->imm)++;
+	  (imm_values->imm32)++;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Compute number of immediate operands of an instruction.  */
+
+static void
+find_constant (rtx in_rtx, imm_info *imm_values)
+{
+  for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
+		(rtx_function) find_constant_1, (void *) imm_values);
+}
+
+/* Return total size of immediate operands of an instruction along with number
+   of corresponding immediate-operands.  It initializes its parameters to zero
+   befor calling FIND_CONSTANT.
+   INSN is the input instruction.  IMM is the total of immediates.
+   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
+   bit immediates.  */
+
+static int
+get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
+{
+  imm_info imm_values = {0, 0, 0};
+
+  find_constant (insn, &imm_values);
+  *imm = imm_values.imm;
+  *imm32 = imm_values.imm32;
+  *imm64 = imm_values.imm64;
+  return imm_values.imm32 * 4 + imm_values.imm64 * 8;
+}
+
+/* This function indicates if an operand of an instruction is an
+   immediate.  */
+
+static bool
+has_immediate (rtx insn)
+{
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (insn)
+    return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+			       &num_imm64_operand);
+  return false;
+}
+
+/* Return single or double path for instructions.  */
+
+static enum insn_path
+get_insn_path (rtx insn)
+{
+  enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
+
+  if ((int)path == 0)
+    return path_single;
+
+  if ((int)path == 1)
+    return path_double;
+
+  return path_multi;
+}
+
+/* Return insn dispatch group.  */
+
+static enum dispatch_group
+get_insn_group (rtx insn)
+{
+  enum dispatch_group group = get_mem_group (insn);
+  if (group)
+    return group;
+
+  if (is_branch (insn))
+    return disp_branch;
+
+  if (is_cmp (insn))
+    return disp_cmp;
+
+  if (has_immediate (insn))
+    return disp_imm;
+
+  if (is_prefetch (insn))
+    return disp_prefetch;
+
+  return disp_no_group;
+}
+
+/* Count number of GROUP restricted instructions in a dispatch
+   window WINDOW_LIST.  */
+
+static int
+count_num_restricted (rtx insn, dispatch_windows *window_list)
+{
+  enum dispatch_group group = get_insn_group (insn);
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (group == disp_no_group)
+    return 0;
+
+  if (group == disp_imm)
+    {
+      imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+			      &num_imm64_operand);
+      if (window_list->imm_size + imm_size > MAX_IMM_SIZE
+	  || num_imm_operand + window_list->num_imm > MAX_IMM
+	  || (num_imm32_operand > 0
+	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
+		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
+	  || (num_imm64_operand > 0
+	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
+		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
+	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
+	      && num_imm64_operand > 0
+	      && ((window_list->num_imm_64 > 0
+		   && window_list->num_insn >= 2)
+		  || window_list->num_insn >= 3)))
+	return BIG;
+
+      return 1;
+    }
+
+  if ((group == disp_load_store
+       && (window_list->num_loads >= MAX_LOAD
+	   || window_list->num_stores >= MAX_STORE))
+      || ((group == disp_load
+	   || group == disp_prefetch)
+	  && window_list->num_loads >= MAX_LOAD)
+      || (group == disp_store
+	  && window_list->num_stores >= MAX_STORE))
+    return BIG;
+
+  return 1;
+}
+
+/* This function returns true if insn satisfies dispatch rules on the
+   last window scheduled.  */
+
+static bool
+fits_dispatch_window (rtx insn)
+{
+  dispatch_windows *window_list = dispatch_window_list;
+  dispatch_windows *window_list_next = dispatch_window_list->next;
+  unsigned int num_restrict;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int sum;
+
+  /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
+     instructions should be given the lowest priority in the
+     scheduling process in Haifa scheduler to make sure they will be
+     scheduled in the same dispatch window as the refrence to them.  */
+  if (group == disp_jcc || group == disp_cmp)
+    return false;
+
+  /* Check nonrestricted.  */
+  if (group == disp_no_group || group == disp_branch)
+    return true;
+
+  /* Get last dispatch window.  */
+  if (window_list_next)
+    window_list = window_list_next;
+
+  if (window_list->window_num == 1)
+    {
+      sum = window_list->prev->window_size + window_list->window_size;
+
+      if (sum == 32
+	  || (min_insn_size (insn) + sum) >= 48)
+	/* Window 1 is full.  Go for next window.  */
+	return true;
+    }
+
+  num_restrict = count_num_restricted (insn, window_list);
+
+  if (num_restrict > num_allowable_groups[group])
+    return false;
+
+  /* See if it fits in the first window.  */
+  if (window_list->window_num == 0)
+    {
+      /* The first widow should have only single and double path
+	 uops.  */
+      if (path == path_double
+	  && (window_list->num_uops + 2) > MAX_INSN)
+	return false;
+      else if (path != path_single)
+        return false;
+    }
+  return true;
+}
+
+/* Add an instruction INSN with NUM_UOPS micro-operations to the
+   dispatch window WINDOW_LIST.  */
+
+static void
+add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
+{
+  int byte_len = min_insn_size (insn);
+  int num_insn = window_list->num_insn;
+  int imm_size;
+  sched_insn_info *window = window_list->window;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (!window_list->violation && group != disp_cmp
+      && !fits_dispatch_window (insn))
+    window_list->violation = true;
+
+  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+				 &num_imm64_operand);
+
+  /* Initialize window with new instruction.  */
+  window[num_insn].insn = insn;
+  window[num_insn].byte_len = byte_len;
+  window[num_insn].group = group;
+  window[num_insn].path = path;
+  window[num_insn].imm_bytes = imm_size;
+
+  window_list->window_size += byte_len;
+  window_list->num_insn = num_insn + 1;
+  window_list->num_uops = window_list->num_uops + num_uops;
+  window_list->imm_size += imm_size;
+  window_list->num_imm += num_imm_operand;
+  window_list->num_imm_32 += num_imm32_operand;
+  window_list->num_imm_64 += num_imm64_operand;
+
+  if (group == disp_store)
+    window_list->num_stores += 1;
+  else if (group == disp_load
+	   || group == disp_prefetch)
+    window_list->num_loads += 1;
+  else if (group == disp_load_store)
+    {
+      window_list->num_stores += 1;
+      window_list->num_loads += 1;
+    }
+}
+
+/* Adds a scheduled instruction, INSN, to the current dispatch window.
+   If the total bytes of instructions or the number of instructions in
+   the window exceed allowable, it allocates a new window.  */
+
+static void
+add_to_dispatch_window (rtx insn)
+{
+  int byte_len;
+  dispatch_windows *window_list;
+  dispatch_windows *next_list;
+  dispatch_windows *window0_list;
+  enum insn_path path;
+  enum dispatch_group insn_group;
+  bool insn_fits;
+  int num_insn;
+  int num_uops;
+  int window_num;
+  int insn_num_uops;
+  int sum;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = min_insn_size (insn);
+  window_list = dispatch_window_list;
+  next_list = window_list->next;
+  path = get_insn_path (insn);
+  insn_group = get_insn_group (insn);
+
+  /* Get the last dispatch window.  */
+  if (next_list)
+      window_list = dispatch_window_list->next;
+
+  if (path == path_single)
+    insn_num_uops = 1;
+  else if (path == path_double)
+    insn_num_uops = 2;
+  else
+    insn_num_uops = (int) path;
+
+  /* If current window is full, get a new window.
+     Window number zero is full, if MAX_INSN uops are scheduled in it.
+     Window number one is full, if window zero's bytes plus window
+     one's bytes is 32, or if the bytes of the new instruction added
+     to the total makes it greater than 48, or it has already MAX_INSN
+     instructions in it.  */
+  num_insn = window_list->num_insn;
+  num_uops = window_list->num_uops;
+  window_num = window_list->window_num;
+  insn_fits = fits_dispatch_window (insn);
+
+  if (num_insn >= MAX_INSN
+      || num_uops + insn_num_uops > MAX_INSN
+      || !(insn_fits))
+    {
+      window_num = ~window_num & 1;
+      window_list = allocate_next_window (window_num);
+    }
+
+  if (window_num == 0)
+    {
+      add_insn_window (insn, window_list, insn_num_uops);
+      if (window_list->num_insn >= MAX_INSN
+	  && insn_group == disp_branch)
+	{
+	  process_end_window ();
+	  return;
+	}
+    }
+  else if (window_num == 1)
+    {
+      window0_list = window_list->prev;
+      sum = window0_list->window_size + window_list->window_size;
+      if (sum == 32
+	  || (byte_len + sum) >= 48)
+	{
+	  process_end_window ();
+	  window_list = dispatch_window_list;
+	}
+
+      add_insn_window (insn, window_list, insn_num_uops);
+    }
+  else
+    gcc_unreachable ();
+
+  if (is_end_basic_block (insn_group))
+    {
+      /* End of basic block is reached do end-basic-block process.  */
+      process_end_window ();
+      return;
+    }
+}
+
+/* Print the dispatch window, WINDOW_NUM, to FILE.  */
+
+DEBUG_FUNCTION static void
+debug_dispatch_window_file (FILE *file, int window_num)
+{
+  dispatch_windows *list;
+  int i;
+
+  if (window_num == 0)
+    list = dispatch_window_list;
+  else
+    list = dispatch_window_list1;
+
+  fprintf (file, "Window #%d:\n", list->window_num);
+  fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
+	  list->num_insn, list->num_uops, list->window_size);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
+
+  fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
+	  list->num_stores);
+  fprintf (file, " insn info:\n");
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      if (!list->window[i].insn)
+	break;
+      fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
+	      i, group_name[list->window[i].group],
+	      i, (void *)list->window[i].insn,
+	      i, list->window[i].path,
+	      i, list->window[i].byte_len,
+	      i, list->window[i].imm_bytes);
+    }
+}
+
+/* Print to stdout a dispatch window.  */
+
+DEBUG_FUNCTION void
+debug_dispatch_window (int window_num)
+{
+  debug_dispatch_window_file (stdout, window_num);
+}
+
+/* Print INSN dispatch information to FILE.  */
+
+DEBUG_FUNCTION static void
+debug_insn_dispatch_info_file (FILE *file, rtx insn)
+{
+  int byte_len;
+  enum insn_path path;
+  enum dispatch_group group;
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = min_insn_size (insn);
+  path = get_insn_path (insn);
+  group = get_insn_group (insn);
+  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+				 &num_imm64_operand);
+
+  fprintf (file, " insn info:\n");
+  fprintf (file, "  group = %s, path = %d, byte_len = %d\n",
+	   group_name[group], path, byte_len);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
+}
+
+/* Print to STDERR the status of the ready list with respect to
+   dispatch windows.  */
+
+DEBUG_FUNCTION void
+debug_ready_dispatch (void)
+{
+  int i;
+  int no_ready = number_in_ready ();
+
+  fprintf (stdout, "Number of ready: %d\n", no_ready);
+
+  for (i = 0; i < no_ready; i++)
+    debug_insn_dispatch_info_file (stdout, get_ready_element (i));
+}
+
+/* This routine is the driver of the dispatch scheduler.  */
+
+static void
+do_dispatch (rtx insn, int mode)
+{
+  if (mode == DISPATCH_INIT)
+    init_dispatch_sched ();
+  else if (mode == ADD_TO_DISPATCH_WINDOW)
+    add_to_dispatch_window (insn);
+}
+
+/* Return TRUE if Dispatch Scheduling is supported.  */
+
+static bool
+has_dispatch (rtx insn, int action)
+{
+  if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
+    switch (action)
+      {
+      default:
+	return false;
+
+      case IS_DISPATCH_ON:
+	return true;
+	break;
+
+      case IS_CMP:
+	return is_cmp (insn);
+
+      case DISPATCH_VIOLATION:
+	return dispatch_violation ();
+
+      case FITS_DISPATCH_WINDOW:
+	return fits_dispatch_window (insn);
+      }
+
+  return false;
+}
+
+/* ??? No autovectorization into MMX or 3DNOW until we can reliably
+   place emms and femms instructions.  */
+
+static enum machine_mode
+ix86_preferred_simd_mode (enum machine_mode mode)
+{
+  /* Disable double precision vectorizer if needed.  */
+  if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
+    return word_mode;
+
+  if (!TARGET_AVX && !TARGET_SSE)
+    return word_mode;
+
+  switch (mode)
+    {
+    case SFmode:
+      return (TARGET_AVX && !TARGET_PREFER_AVX128) ?  V8SFmode : V4SFmode;
+    case DFmode:
+      return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DFmode : V2DFmode;
+    case DImode:
+      return V2DImode;
+    case SImode:
+      return V4SImode;
+    case HImode:
+      return V8HImode;
+    case QImode:
+      return V16QImode;
+
+    default:;
+    }
+
+  return word_mode;
+}
+
+/* If AVX is enabled then try vectorizing with both 256bit and 128bit
+   vectors.  */
+
+static unsigned int
+ix86_autovectorize_vector_sizes (void)
+{
+  return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#  undef TARGET_MERGE_DECL_ATTRIBUTES
+#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ix86_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL ix86_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  ix86_builtin_vectorized_function
+
+#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
+#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
+
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifndef SUBTARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
+#endif
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP ASM_BYTE
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
+#ifdef ASM_QUAD
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
+#endif
+
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
+#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND ix86_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra 
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  ia32_multipass_dfa_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
+#endif
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START x86_file_start
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS	\
+  (TARGET_DEFAULT			\
+   | TARGET_SUBTARGET_DEFAULT		\
+   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION ix86_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE ix86_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST ix86_address_cost
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+
+#undef TARGET_ENUM_VA_LIST_P
+#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
+
+#undef TARGET_FN_ABI_VA_LIST
+#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
+
+#undef TARGET_CANONICAL_VA_LIST_TYPE
+#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG ix86_function_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
+#undef TARGET_UPDATE_STACK_BOUNDARY
+#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
+#undef TARGET_GET_DRAP_RTX
+#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN ix86_static_chain
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
+#endif
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ix86_mangle_type
+
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
+
+#undef TARGET_SUPPORTS_SPLIT_STACK
+#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ix86_function_value
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
+#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  ix86_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
+#define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
+  ix86_vectorize_builtin_vec_perm
+#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
+#define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
+  ix86_vectorize_builtin_vec_perm_ok
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
+  ix86_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  ix86_autovectorize_vector_sizes
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
+
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
+
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE ix86_function_specific_save
+
+#undef TARGET_OPTION_RESTORE
+#define TARGET_OPTION_RESTORE ix86_function_specific_restore
+
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT ix86_function_specific_print
+
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P ix86_can_inline_p
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
+
+#undef TARGET_IRA_COVER_CLASSES
+#define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE ix86_can_eliminate
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
+
+#undef TARGET_ASM_CODE_END
+#define TARGET_ASM_CODE_END ix86_code_end
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
+
+#if TARGET_MACHO
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS darwin_rename_builtins
+#endif
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
new file mode 100644
index 000000000..bb23674d7
--- /dev/null
+++ b/gcc/config/i386/i386.h
@@ -0,0 +1,2400 @@
+/* Definitions of target machine for GCC for IA-32.
+   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* The purpose of this file is to define the characteristics of the i386,
+   independent of assembler syntax or operating system.
+
+   Three other files build on this one to describe a specific assembler syntax:
+   bsd386.h, att386.h, and sun386.h.
+
+   The actual tm.h file for a particular system should include
+   this file, and then the file for the appropriate assembler syntax.
+
+   Many macros that specify assembler syntax are omitted entirely from
+   this file because they really belong in the files for particular
+   assemblers.  These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR,
+   ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many
+   that start with ASM_ or end in ASM_OP.  */
+
+/* Redefines for option macros.  */
+
+#define TARGET_64BIT	OPTION_ISA_64BIT
+#define TARGET_MMX	OPTION_ISA_MMX
+#define TARGET_3DNOW	OPTION_ISA_3DNOW
+#define TARGET_3DNOW_A	OPTION_ISA_3DNOW_A
+#define TARGET_SSE	OPTION_ISA_SSE
+#define TARGET_SSE2	OPTION_ISA_SSE2
+#define TARGET_SSE3	OPTION_ISA_SSE3
+#define TARGET_SSSE3	OPTION_ISA_SSSE3
+#define TARGET_SSE4_1	OPTION_ISA_SSE4_1
+#define TARGET_SSE4_2	OPTION_ISA_SSE4_2
+#define TARGET_AVX	OPTION_ISA_AVX
+#define TARGET_FMA	OPTION_ISA_FMA
+#define TARGET_SSE4A	OPTION_ISA_SSE4A
+#define TARGET_FMA4	OPTION_ISA_FMA4
+#define TARGET_XOP	OPTION_ISA_XOP
+#define TARGET_LWP	OPTION_ISA_LWP
+#define TARGET_ROUND	OPTION_ISA_ROUND
+#define TARGET_ABM	OPTION_ISA_ABM
+#define TARGET_BMI	OPTION_ISA_BMI
+#define TARGET_TBM	OPTION_ISA_TBM
+#define TARGET_POPCNT	OPTION_ISA_POPCNT
+#define TARGET_SAHF	OPTION_ISA_SAHF
+#define TARGET_MOVBE	OPTION_ISA_MOVBE
+#define TARGET_CRC32	OPTION_ISA_CRC32
+#define TARGET_AES	OPTION_ISA_AES
+#define TARGET_PCLMUL	OPTION_ISA_PCLMUL
+#define TARGET_CMPXCHG16B OPTION_ISA_CX16
+#define TARGET_FSGSBASE	OPTION_ISA_FSGSBASE
+#define TARGET_RDRND	OPTION_ISA_RDRND
+#define TARGET_F16C	OPTION_ISA_F16C
+
+
+/* SSE4.1 defines round instructions */
+#define	OPTION_MASK_ISA_ROUND	OPTION_MASK_ISA_SSE4_1
+#define	OPTION_ISA_ROUND	((ix86_isa_flags & OPTION_MASK_ISA_ROUND) != 0)
+
+#include "config/vxworks-dummy.h"
+
+/* Algorithm to expand string function with.  */
+enum stringop_alg
+{
+   no_stringop,
+   libcall,
+   rep_prefix_1_byte,
+   rep_prefix_4_byte,
+   rep_prefix_8_byte,
+   loop_1_byte,
+   loop,
+   unrolled_loop
+};
+
+#define MAX_STRINGOP_ALGS 4
+
+/* Specify what algorithm to use for stringops on known size.
+   When size is unknown, the UNKNOWN_SIZE alg is used.  When size is
+   known at compile time or estimated via feedback, the SIZE array
+   is walked in order until MAX is greater then the estimate (or -1
+   means infinity).  Corresponding ALG is used then.
+   For example initializer:
+    {{256, loop}, {-1, rep_prefix_4_byte}}
+   will use loop for blocks smaller or equal to 256 bytes, rep prefix will
+   be used otherwise.  */
+struct stringop_algs
+{
+  const enum stringop_alg unknown_size;
+  const struct stringop_strategy {
+    const int max;
+    const enum stringop_alg alg;
+  } size [MAX_STRINGOP_ALGS];
+};
+
+/* Define the specific costs for a given cpu */
+
+struct processor_costs {
+  const int add;		/* cost of an add instruction */
+  const int lea;		/* cost of a lea instruction */
+  const int shift_var;		/* variable shift costs */
+  const int shift_const;	/* constant shift costs */
+  const int mult_init[5];	/* cost of starting a multiply
+				   in QImode, HImode, SImode, DImode, TImode*/
+  const int mult_bit;		/* cost of multiply per each bit set */
+  const int divide[5];		/* cost of a divide/mod
+				   in QImode, HImode, SImode, DImode, TImode*/
+  int movsx;			/* The cost of movsx operation.  */
+  int movzx;			/* The cost of movzx operation.  */
+  const int large_insn;		/* insns larger than this cost more */
+  const int move_ratio;		/* The threshold of number of scalar
+				   memory-to-memory move insns.  */
+  const int movzbl_load;	/* cost of loading using movzbl */
+  const int int_load[3];	/* cost of loading integer registers
+				   in QImode, HImode and SImode relative
+				   to reg-reg move (2).  */
+  const int int_store[3];	/* cost of storing integer register
+				   in QImode, HImode and SImode */
+  const int fp_move;		/* cost of reg,reg fld/fst */
+  const int fp_load[3];		/* cost of loading FP register
+				   in SFmode, DFmode and XFmode */
+  const int fp_store[3];	/* cost of storing FP register
+				   in SFmode, DFmode and XFmode */
+  const int mmx_move;		/* cost of moving MMX register.  */
+  const int mmx_load[2];	/* cost of loading MMX register
+				   in SImode and DImode */
+  const int mmx_store[2];	/* cost of storing MMX register
+				   in SImode and DImode */
+  const int sse_move;		/* cost of moving SSE register.  */
+  const int sse_load[3];	/* cost of loading SSE register
+				   in SImode, DImode and TImode*/
+  const int sse_store[3];	/* cost of storing SSE register
+				   in SImode, DImode and TImode*/
+  const int mmxsse_to_integer;	/* cost of moving mmxsse register to
+				   integer and vice versa.  */
+  const int l1_cache_size;	/* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size;	/* size of l2 cache, in kilobytes.  */
+  const int prefetch_block;	/* bytes moved to cache for prefetch.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+				   operations.  */
+  const int branch_cost;	/* Default value for BRANCH_COST.  */
+  const int fadd;		/* cost of FADD and FSUB instructions.  */
+  const int fmul;		/* cost of FMUL instruction.  */
+  const int fdiv;		/* cost of FDIV instruction.  */
+  const int fabs;		/* cost of FABS instruction.  */
+  const int fchs;		/* cost of FCHS instruction.  */
+  const int fsqrt;		/* cost of FSQRT instruction.  */
+				/* Specify what algorithm
+				   to use for stringops on unknown size.  */
+  struct stringop_algs memcpy[2], memset[2];
+  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
+				   load and store.  */
+  const int scalar_load_cost;   /* Cost of scalar load.  */
+  const int scalar_store_cost;  /* Cost of scalar store.  */
+  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
+                                   load, store, vector-to-scalar and
+                                   scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
+  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
+  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
+  const int vec_store_cost;        /* Cost of vector store.  */
+  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
+					  cost model.  */
+  const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+					  vectorizer cost model.  */
+};
+
+extern const struct processor_costs *ix86_cost;
+extern const struct processor_costs ix86_size_cost;
+
+#define ix86_cur_cost() \
+  (optimize_insn_for_size_p () ? &ix86_size_cost: ix86_cost)
+
+/* Macros used in the machine description to test the flags.  */
+
+/* configure can arrange to make this 2, to force a 486.  */
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_generic
+#endif
+
+#ifndef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT \
+  (TARGET_64BIT && TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+#endif
+
+#define TARGET_FLOAT_RETURNS_IN_80387 TARGET_FLOAT_RETURNS
+
+/* 64bit Sledgehammer mode.  For libgcc2 we make sure this is a
+   compile-time constant.  */
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#else
+#ifndef TARGET_BI_ARCH
+#undef TARGET_64BIT
+#if TARGET_64BIT_DEFAULT
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+#endif
+
+#define HAS_LONG_COND_BRANCH 1
+#define HAS_LONG_UNCOND_BRANCH 1
+
+#define TARGET_386 (ix86_tune == PROCESSOR_I386)
+#define TARGET_486 (ix86_tune == PROCESSOR_I486)
+#define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM)
+#define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO)
+#define TARGET_GEODE (ix86_tune == PROCESSOR_GEODE)
+#define TARGET_K6 (ix86_tune == PROCESSOR_K6)
+#define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON)
+#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
+#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
+#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
+#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
+#define TARGET_CORE2_32 (ix86_tune == PROCESSOR_CORE2_32)
+#define TARGET_CORE2_64 (ix86_tune == PROCESSOR_CORE2_64)
+#define TARGET_CORE2 (TARGET_CORE2_32 || TARGET_CORE2_64)
+#define TARGET_COREI7_32 (ix86_tune == PROCESSOR_COREI7_32)
+#define TARGET_COREI7_64 (ix86_tune == PROCESSOR_COREI7_64)
+#define TARGET_COREI7 (TARGET_COREI7_32 || TARGET_COREI7_64)
+#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
+#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
+#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
+#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
+#define TARGET_BDVER1 (ix86_tune == PROCESSOR_BDVER1)
+#define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
+#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
+
+/* Feature tests against the various tunings.  */
+enum ix86_tune_indices {
+  X86_TUNE_USE_LEAVE,
+  X86_TUNE_PUSH_MEMORY,
+  X86_TUNE_ZERO_EXTEND_WITH_AND,
+  X86_TUNE_UNROLL_STRLEN,
+  X86_TUNE_DEEP_BRANCH_PREDICTION,
+  X86_TUNE_BRANCH_PREDICTION_HINTS,
+  X86_TUNE_DOUBLE_WITH_ADD,
+  X86_TUNE_USE_SAHF,
+  X86_TUNE_MOVX,
+  X86_TUNE_PARTIAL_REG_STALL,
+  X86_TUNE_PARTIAL_FLAG_REG_STALL,
+  X86_TUNE_USE_HIMODE_FIOP,
+  X86_TUNE_USE_SIMODE_FIOP,
+  X86_TUNE_USE_MOV0,
+  X86_TUNE_USE_CLTD,
+  X86_TUNE_USE_XCHGB,
+  X86_TUNE_SPLIT_LONG_MOVES,
+  X86_TUNE_READ_MODIFY_WRITE,
+  X86_TUNE_READ_MODIFY,
+  X86_TUNE_PROMOTE_QIMODE,
+  X86_TUNE_FAST_PREFIX,
+  X86_TUNE_SINGLE_STRINGOP,
+  X86_TUNE_QIMODE_MATH,
+  X86_TUNE_HIMODE_MATH,
+  X86_TUNE_PROMOTE_QI_REGS,
+  X86_TUNE_PROMOTE_HI_REGS,
+  X86_TUNE_SINGLE_POP,
+  X86_TUNE_DOUBLE_POP,
+  X86_TUNE_SINGLE_PUSH,
+  X86_TUNE_DOUBLE_PUSH,
+  X86_TUNE_INTEGER_DFMODE_MOVES,
+  X86_TUNE_PARTIAL_REG_DEPENDENCY,
+  X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
+  X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL,
+  X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL,
+  X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL,
+  X86_TUNE_SSE_SPLIT_REGS,
+  X86_TUNE_SSE_TYPELESS_STORES,
+  X86_TUNE_SSE_LOAD0_BY_PXOR,
+  X86_TUNE_MEMORY_MISMATCH_STALL,
+  X86_TUNE_PROLOGUE_USING_MOVE,
+  X86_TUNE_EPILOGUE_USING_MOVE,
+  X86_TUNE_SHIFT1,
+  X86_TUNE_USE_FFREEP,
+  X86_TUNE_INTER_UNIT_MOVES,
+  X86_TUNE_INTER_UNIT_CONVERSIONS,
+  X86_TUNE_FOUR_JUMP_LIMIT,
+  X86_TUNE_SCHEDULE,
+  X86_TUNE_USE_BT,
+  X86_TUNE_USE_INCDEC,
+  X86_TUNE_PAD_RETURNS,
+  X86_TUNE_PAD_SHORT_FUNCTION,
+  X86_TUNE_EXT_80387_CONSTANTS,
+  X86_TUNE_SHORTEN_X87_SSE,
+  X86_TUNE_AVOID_VECTOR_DECODE,
+  X86_TUNE_PROMOTE_HIMODE_IMUL,
+  X86_TUNE_SLOW_IMUL_IMM32_MEM,
+  X86_TUNE_SLOW_IMUL_IMM8,
+  X86_TUNE_MOVE_M1_VIA_OR,
+  X86_TUNE_NOT_UNPAIRABLE,
+  X86_TUNE_NOT_VECTORMODE,
+  X86_TUNE_USE_VECTOR_FP_CONVERTS,
+  X86_TUNE_USE_VECTOR_CONVERTS,
+  X86_TUNE_FUSE_CMP_AND_BRANCH,
+  X86_TUNE_OPT_AGU,
+  X86_TUNE_VECTORIZE_DOUBLE,
+  X86_TUNE_AVX128_OPTIMAL,
+
+  X86_TUNE_LAST
+};
+
+extern unsigned char ix86_tune_features[X86_TUNE_LAST];
+
+#define TARGET_USE_LEAVE	ix86_tune_features[X86_TUNE_USE_LEAVE]
+#define TARGET_PUSH_MEMORY	ix86_tune_features[X86_TUNE_PUSH_MEMORY]
+#define TARGET_ZERO_EXTEND_WITH_AND \
+	ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
+#define TARGET_UNROLL_STRLEN	ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
+#define TARGET_DEEP_BRANCH_PREDICTION \
+	ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION]
+#define TARGET_BRANCH_PREDICTION_HINTS \
+	ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_DOUBLE_WITH_ADD	ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
+#define TARGET_USE_SAHF		ix86_tune_features[X86_TUNE_USE_SAHF]
+#define TARGET_MOVX		ix86_tune_features[X86_TUNE_MOVX]
+#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
+#define TARGET_PARTIAL_FLAG_REG_STALL \
+	ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_USE_HIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
+#define TARGET_USE_SIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
+#define TARGET_USE_MOV0		ix86_tune_features[X86_TUNE_USE_MOV0]
+#define TARGET_USE_CLTD		ix86_tune_features[X86_TUNE_USE_CLTD]
+#define TARGET_USE_XCHGB	ix86_tune_features[X86_TUNE_USE_XCHGB]
+#define TARGET_SPLIT_LONG_MOVES	ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
+#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
+#define TARGET_READ_MODIFY	ix86_tune_features[X86_TUNE_READ_MODIFY]
+#define TARGET_PROMOTE_QImode	ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
+#define TARGET_FAST_PREFIX	ix86_tune_features[X86_TUNE_FAST_PREFIX]
+#define TARGET_SINGLE_STRINGOP	ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
+#define TARGET_QIMODE_MATH	ix86_tune_features[X86_TUNE_QIMODE_MATH]
+#define TARGET_HIMODE_MATH	ix86_tune_features[X86_TUNE_HIMODE_MATH]
+#define TARGET_PROMOTE_QI_REGS	ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
+#define TARGET_PROMOTE_HI_REGS	ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
+#define TARGET_SINGLE_POP	ix86_tune_features[X86_TUNE_SINGLE_POP]
+#define TARGET_DOUBLE_POP	ix86_tune_features[X86_TUNE_DOUBLE_POP]
+#define TARGET_SINGLE_PUSH	ix86_tune_features[X86_TUNE_SINGLE_PUSH]
+#define TARGET_DOUBLE_PUSH	ix86_tune_features[X86_TUNE_DOUBLE_PUSH]
+#define TARGET_INTEGER_DFMODE_MOVES \
+	ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
+#define TARGET_PARTIAL_REG_DEPENDENCY \
+	ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+	ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
+	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
+#define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \
+	ix86_tune_features[X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL]
+#define TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL \
+	ix86_tune_features[X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL]
+#define TARGET_SSE_SPLIT_REGS	ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
+#define TARGET_SSE_TYPELESS_STORES \
+	ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
+#define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
+#define TARGET_MEMORY_MISMATCH_STALL \
+	ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
+#define TARGET_PROLOGUE_USING_MOVE \
+	ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
+#define TARGET_EPILOGUE_USING_MOVE \
+	ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
+#define TARGET_SHIFT1		ix86_tune_features[X86_TUNE_SHIFT1]
+#define TARGET_USE_FFREEP	ix86_tune_features[X86_TUNE_USE_FFREEP]
+#define TARGET_INTER_UNIT_MOVES	ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
+#define TARGET_INTER_UNIT_CONVERSIONS\
+	ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
+#define TARGET_FOUR_JUMP_LIMIT	ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
+#define TARGET_SCHEDULE		ix86_tune_features[X86_TUNE_SCHEDULE]
+#define TARGET_USE_BT		ix86_tune_features[X86_TUNE_USE_BT]
+#define TARGET_USE_INCDEC	ix86_tune_features[X86_TUNE_USE_INCDEC]
+#define TARGET_PAD_RETURNS	ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_PAD_SHORT_FUNCTION \
+	ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
+#define TARGET_EXT_80387_CONSTANTS \
+	ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
+#define TARGET_SHORTEN_X87_SSE	ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
+#define TARGET_AVOID_VECTOR_DECODE \
+	ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
+#define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
+	ix86_tune_features[X86_TUNE_PROMOTE_HIMODE_IMUL]
+#define TARGET_SLOW_IMUL_IMM32_MEM \
+	ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM32_MEM]
+#define TARGET_SLOW_IMUL_IMM8	ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM8]
+#define	TARGET_MOVE_M1_VIA_OR	ix86_tune_features[X86_TUNE_MOVE_M1_VIA_OR]
+#define TARGET_NOT_UNPAIRABLE	ix86_tune_features[X86_TUNE_NOT_UNPAIRABLE]
+#define TARGET_NOT_VECTORMODE	ix86_tune_features[X86_TUNE_NOT_VECTORMODE]
+#define TARGET_USE_VECTOR_FP_CONVERTS \
+	ix86_tune_features[X86_TUNE_USE_VECTOR_FP_CONVERTS]
+#define TARGET_USE_VECTOR_CONVERTS \
+	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
+#define TARGET_FUSE_CMP_AND_BRANCH \
+	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
+#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
+#define TARGET_VECTORIZE_DOUBLE \
+	ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE]
+#define TARGET_AVX128_OPTIMAL \
+	ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
+
+/* Feature tests against the various architecture variations.  */
+enum ix86_arch_indices {
+  X86_ARCH_CMOV,
+  X86_ARCH_CMPXCHG,
+  X86_ARCH_CMPXCHG8B,
+  X86_ARCH_XADD,
+  X86_ARCH_BSWAP,
+
+  X86_ARCH_LAST
+};
+
+extern unsigned char ix86_arch_features[X86_ARCH_LAST];
+
+#define TARGET_CMOV		ix86_arch_features[X86_ARCH_CMOV]
+#define TARGET_CMPXCHG		ix86_arch_features[X86_ARCH_CMPXCHG]
+#define TARGET_CMPXCHG8B	ix86_arch_features[X86_ARCH_CMPXCHG8B]
+#define TARGET_XADD		ix86_arch_features[X86_ARCH_XADD]
+#define TARGET_BSWAP		ix86_arch_features[X86_ARCH_BSWAP]
+
+/* For sane SSE instruction set generation we need fcomi instruction.
+   It is safe to enable all CMOVE instructions.  Also, RDRAND intrinsic
+   expands to a sequence that includes conditional move. */
+#define TARGET_CMOVE		(TARGET_CMOV || TARGET_SSE || TARGET_RDRND)
+
+#define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
+
+extern int x86_prefetch_sse;
+
+#define TARGET_PREFETCH_SSE	x86_prefetch_sse
+
+#define ASSEMBLER_DIALECT	(ix86_asm_dialect)
+
+#define TARGET_SSE_MATH		((ix86_fpmath & FPMATH_SSE) != 0)
+#define TARGET_MIX_SSE_I387 \
+ ((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
+
+#define TARGET_GNU_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS	(TARGET_GNU_TLS || TARGET_GNU2_TLS)
+#define TARGET_SUN_TLS		0
+
+#ifndef TARGET_64BIT_DEFAULT
+#define TARGET_64BIT_DEFAULT 0
+#endif
+#ifndef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
+#endif
+
+/* Fence to use after loop using storent.  */
+
+extern tree x86_mfence;
+#define FENCE_FOLLOWING_MOVNT x86_mfence
+
+/* Once GDB has been enhanced to deal with functions without frame
+   pointers, we can change this to allow for elimination of
+   the frame pointer in leaf functions.  */
+#define TARGET_DEFAULT 0
+
+/* Extra bits to force.  */
+#define TARGET_SUBTARGET_DEFAULT 0
+#define TARGET_SUBTARGET_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 32-bit mode.  */
+#define TARGET_SUBTARGET32_DEFAULT 0
+#define TARGET_SUBTARGET32_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 64-bit mode.  */
+#define TARGET_SUBTARGET64_DEFAULT 0
+#define TARGET_SUBTARGET64_ISA_DEFAULT 0
+
+/* Replace MACH-O, ifdefs by in-line tests, where possible. 
+   (a) Macros defined in config/i386/darwin.h  */
+#define TARGET_MACHO 0
+#define TARGET_MACHO_BRANCH_ISLANDS 0
+#define MACHOPIC_ATT_STUB 0
+/* (b) Macros defined in config/darwin.h  */
+#define MACHO_DYNAMIC_NO_PIC_P 0
+#define MACHOPIC_INDIRECT 0
+#define MACHOPIC_PURE 0
+
+/* For the Windows 64-bit ABI.  */
+#define TARGET_64BIT_MS_ABI (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
+
+/* This is re-defined by cygming.h.  */
+#define TARGET_SEH 0
+
+/* Available call abi.  */
+enum calling_abi
+{
+  SYSV_ABI = 0,
+  MS_ABI = 1
+};
+
+/* The abi used by target.  */
+extern enum calling_abi ix86_abi;
+
+/* The default abi used by target.  */
+#define DEFAULT_ABI SYSV_ABI
+
+/* Subtargets may reset this to 1 in order to enable 96-bit long double
+   with the rounding mode forced to 53 bits.  */
+#define TARGET_96_ROUND_53_LONG_DOUBLE 0
+
+/* -march=native handling only makes sense with compiler running on
+   an x86 or x86_64 chip.  If changing this condition, also change
+   the condition in driver-i386.c.  */
+#if defined(__i386__) || defined(__x86_64__)
+/* In driver-i386.c.  */
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS \
+  { "local_cpu_detect", host_detect_local_cpu },
+#define HAVE_LOCAL_CPU_DETECT
+#endif
+
+#if TARGET_64BIT_DEFAULT
+#define OPT_ARCH64 "!m32"
+#define OPT_ARCH32 "m32"
+#else
+#define OPT_ARCH64 "m64"
+#define OPT_ARCH32 "!m64"
+#endif
+
+/* Support for configure-time defaults of some command line options.
+   The order here is important so that -march doesn't squash the
+   tune or cpu values.  */
+#define OPTION_DEFAULT_SPECS					   \
+  {"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" },  \
+  {"cpu_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+  {"arch", "%{!march=*:-march=%(VALUE)}"},			   \
+  {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"},	   \
+  {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
+
+/* Specs for the compiler proper */
+
+#ifndef CC1_CPU_SPEC
+#define CC1_CPU_SPEC_1 ""
+
+#ifndef HAVE_LOCAL_CPU_DETECT
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1
+#else
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1 \
+"%{march=native:%>march=native %:local_cpu_detect(arch) \
+  %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} \
+%{mtune=native:%>mtune=native %:local_cpu_detect(tune)}"
+#endif
+#endif
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS() ix86_target_macros ()
+
+/* Target Pragmas.  */
+#define REGISTER_TARGET_PRAGMAS() ix86_register_pragmas ()
+
+enum target_cpu_default
+{
+  TARGET_CPU_DEFAULT_generic = 0,
+
+  TARGET_CPU_DEFAULT_i386,
+  TARGET_CPU_DEFAULT_i486,
+  TARGET_CPU_DEFAULT_pentium,
+  TARGET_CPU_DEFAULT_pentium_mmx,
+  TARGET_CPU_DEFAULT_pentiumpro,
+  TARGET_CPU_DEFAULT_pentium2,
+  TARGET_CPU_DEFAULT_pentium3,
+  TARGET_CPU_DEFAULT_pentium4,
+  TARGET_CPU_DEFAULT_pentium_m,
+  TARGET_CPU_DEFAULT_prescott,
+  TARGET_CPU_DEFAULT_nocona,
+  TARGET_CPU_DEFAULT_core2,
+  TARGET_CPU_DEFAULT_corei7,
+  TARGET_CPU_DEFAULT_atom,
+
+  TARGET_CPU_DEFAULT_geode,
+  TARGET_CPU_DEFAULT_k6,
+  TARGET_CPU_DEFAULT_k6_2,
+  TARGET_CPU_DEFAULT_k6_3,
+  TARGET_CPU_DEFAULT_athlon,
+  TARGET_CPU_DEFAULT_athlon_sse,
+  TARGET_CPU_DEFAULT_k8,
+  TARGET_CPU_DEFAULT_amdfam10,
+  TARGET_CPU_DEFAULT_bdver1,
+  TARGET_CPU_DEFAULT_btver1,
+
+  TARGET_CPU_DEFAULT_max
+};
+
+#ifndef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) "
+#endif
+
+/* This macro defines names of additional specifications to put in the
+   specs that can be used in various specifications like CC1_SPEC.  Its
+   definition is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS							\
+  { "cc1_cpu",  CC1_CPU_SPEC },						\
+  SUBTARGET_EXTRA_SPECS
+
+
+/* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
+   FPU, assume that the fpcw is set to extended precision; when using
+   only SSE, rounding is correct; when using both SSE and the FPU,
+   the rounding precision is indeterminate, since either may be chosen
+   apparently at random.  */
+#define TARGET_FLT_EVAL_METHOD \
+  (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2)
+
+/* Whether to allow x87 floating-point arithmetic on MODE (one of
+   SFmode, DFmode and XFmode) in the current excess precision
+   configuration.  */
+#define X87_ENABLE_ARITH(MODE) \
+  (flag_excess_precision == EXCESS_PRECISION_FAST || (MODE) == XFmode)
+
+/* Likewise, whether to allow direct conversions from integer mode
+   IMODE (HImode, SImode or DImode) to MODE.  */
+#define X87_ENABLE_FLOAT(MODE, IMODE)			\
+  (flag_excess_precision == EXCESS_PRECISION_FAST	\
+   || (MODE) == XFmode					\
+   || ((MODE) == DFmode && (IMODE) == SImode)		\
+   || (IMODE) == HImode)
+
+/* target machine storage layout */
+
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 80
+
+#define WIDEST_HARDWARE_FP_SIZE LONG_DOUBLE_TYPE_SIZE
+
+#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT
+#define MAX_BITS_PER_WORD 64
+#else
+#define MAX_BITS_PER_WORD 32
+#endif
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on the 80386.  */
+
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is not true on the 80386.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+/* Not true for 80386 */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		(TARGET_64BIT ? 8 : 4)
+
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD	4
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY \
+ (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
+
+/* Stack boundary of the main function guaranteed by OS.  */
+#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
+
+/* Minimum stack boundary.  */
+#define MIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
+
+/* Boundary (in *bits*) on which the stack pointer prefers to be
+   aligned; the compiler cannot rely on having this alignment.  */
+#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
+
+/* It should be MIN_STACK_BOUNDARY.  But we set it to 128 bits for
+   both 32bit and 64bit, to support codes that need 128 bit stack
+   alignment for SSE instructions, but can't realign the stack.  */
+#define PREFERRED_STACK_BOUNDARY_DEFAULT 128
+
+/* 1 if -mstackrealign should be turned on by default.  It will
+   generate an alternate prologue and epilogue that realigns the
+   runtime stack if nessary.  This supports mixing codes that keep a
+   4-byte aligned stack, as specified by i386 psABI, with codes that
+   need a 16-byte aligned stack, as required by SSE instructions.  */
+#define STACK_REALIGN_DEFAULT 0
+
+/* Boundary (in *bits*) on which the incoming stack is aligned.  */
+#define INCOMING_STACK_BOUNDARY ix86_incoming_stack_boundary
+
+/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack.  This is
+   mandatory for the 64-bit ABI, and may or may not be true for other
+   operating systems.  */
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
+
+/* Minimum allocation boundary for the code of a function.  */
+#define FUNCTION_BOUNDARY 8
+
+/* C++ stores the virtual bit in the lowest bit of function pointers.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_pfn
+
+/* Minimum size in bits of the largest boundary to which any
+   and all fundamental data types supported by the hardware
+   might need to be aligned. No data type wants to be aligned
+   rounder than this.
+
+   Pentium+ prefers DFmode values to be aligned to 64 bit boundary
+   and Pentium Pro XFmode values at 128 bit boundaries.  */
+
+#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256 : 128)
+
+/* Maximum stack alignment.  */
+#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
+
+/* Alignment value for attribute ((aligned)).  It is a constant since
+   it is the part of the ABI.  We shouldn't change it with -mavx.  */
+#define ATTRIBUTE_ALIGNED_VALUE 128
+
+/* Decide whether a variable of mode MODE should be 128 bit aligned.  */
+#define ALIGN_MODE_128(MODE) \
+ ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
+
+/* The published ABIs say that doubles should be aligned on word
+   boundaries, so lower the alignment for structure fields unless
+   -malign-double is set.  */
+
+/* ??? Blah -- this macro is used directly by libobjc.  Since it
+   supports no vector modes, cut out the complexity and fall back
+   on BIGGEST_FIELD_ALIGNMENT.  */
+#ifdef IN_TARGET_LIBS
+#ifdef __x86_64__
+#define BIGGEST_FIELD_ALIGNMENT 128
+#else
+#define BIGGEST_FIELD_ALIGNMENT 32
+#endif
+#else
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+   x86_field_alignment (FIELD, COMPUTED)
+#endif
+
+/* If defined, a C expression to compute the alignment given to a
+   constant that is being placed in memory.  EXP is the constant
+   and ALIGN is the alignment that the object would ordinarily have.
+   The value of this macro is used instead of that alignment to align
+   the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that `strcpy' calls that copy
+   constants can be done inline.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) ix86_constant_alignment ((EXP), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  ix86_local_alignment ((TYPE), VOIDmode, (ALIGN))
+
+/* If defined, a C expression to compute the alignment for stack slot.
+   TYPE is the data type, MODE is the widest mode available, and ALIGN
+   is the alignment that the slot would ordinarily have.  The value of
+   this macro is used instead of that alignment to align the slot.
+
+   If this macro is not defined, then ALIGN is used when TYPE is NULL,
+   Otherwise, LOCAL_ALIGNMENT will be used.
+
+   One use of this macro is to set alignment of stack slot to the
+   maximum alignment of all possible modes which the slot may have.  */
+
+#define STACK_SLOT_ALIGNMENT(TYPE, MODE, ALIGN) \
+  ix86_local_alignment ((TYPE), (MODE), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+   variable DECL.
+
+   If this macro is not defined, then
+   LOCAL_ALIGNMENT (TREE_TYPE (DECL), DECL_ALIGN (DECL)) will be used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+
+#define LOCAL_DECL_ALIGNMENT(DECL) \
+  ix86_local_alignment ((DECL), VOIDmode, DECL_ALIGN (DECL))
+
+/* If defined, a C expression to compute the minimum required alignment
+   for dynamic stack realignment purposes for EXP (a TYPE or DECL),
+   MODE, assuming normal alignment ALIGN.
+
+   If this macro is not defined, then (ALIGN) will be used.  */
+
+#define MINIMUM_ALIGNMENT(EXP, MODE, ALIGN) \
+  ix86_minimum_alignment (EXP, MODE, ALIGN)
+
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 0
+
+/* If bit field type is int, don't let it cross an int,
+   and give entire struct the alignment of an int.  */
+/* Required on the 386 since it doesn't have bit-field insns.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Standard register usage.  */
+
+/* This processor has special stack-like registers.  See reg-stack.c
+   for details.  */
+
+#define STACK_REGS
+
+#define IS_STACK_MODE(MODE)					\
+  (((MODE) == SFmode && !(TARGET_SSE && TARGET_SSE_MATH))	\
+   || ((MODE) == DFmode && !(TARGET_SSE2 && TARGET_SSE_MATH))	\
+   || (MODE) == XFmode)
+
+/* Cover class containing the stack registers.  */
+#define STACK_REG_COVER_CLASS FLOAT_REGS
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   In the 80386 we give the 8 general purpose registers the numbers 0-7.
+   We number the floating point registers 8-15.
+   Note that registers 0-7 can be accessed as a  short or int,
+   while only 0-3 may be used with byte `mov' instructions.
+
+   Reg 16 does not correspond to any hardware register, but instead
+   appears in the RTL as an argument pointer prior to reload, and is
+   eliminated during reloading in favor of either the stack or frame
+   pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 53
+
+/* Number of hardware registers that go into the DWARF-2 unwind info.
+   If not defined, equals FIRST_PSEUDO_REGISTER.  */
+
+#define DWARF_FRAME_REGISTERS 17
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the 80386, the stack pointer is such, as is the arg pointer.
+
+   The value is zero if the register is not fixed on either 32 or
+   64 bit targets, one if the register if fixed on both 32 and 64
+   bit targets, two if it is only fixed on 32bit targets and three
+   if its only fixed on 64bit targets.
+   Proper values are computed in TARGET_CONDITIONAL_REGISTER_USAGE.
+ */
+#define FIXED_REGISTERS						\
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/	\
+{  0, 0, 0, 0, 0, 0, 0, 1, 0,  0,  0,  0,  0,  0,  0,  0,	\
+/*arg,flags,fpsr,fpcr,frame*/					\
+    1,    1,   1,   1,    1,					\
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/			\
+     0,   0,   0,   0,   0,   0,   0,   0,			\
+/* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/			\
+     0,   0,   0,   0,   0,   0,   0,   0,			\
+/*  r8,  r9, r10, r11, r12, r13, r14, r15*/			\
+     2,   2,   2,   2,   2,   2,   2,   2,			\
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
+     2,   2,    2,    2,    2,    2,    2,    2 }
+
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.
+
+   The value is zero if the register is not call used on either 32 or
+   64 bit targets, one if the register if call used on both 32 and 64
+   bit targets, two if it is only call used on 32bit targets and three
+   if its only call used on 64bit targets.
+   Proper values are computed in TARGET_CONDITIONAL_REGISTER_USAGE.
+*/
+#define CALL_USED_REGISTERS					\
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/	\
+{  1, 1, 1, 0, 3, 3, 0, 1, 1,  1,  1,  1,  1,  1,  1,  1,	\
+/*arg,flags,fpsr,fpcr,frame*/					\
+    1,   1,    1,   1,    1,					\
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/			\
+     1,   1,   1,   1,   1,   1,   1,   1,			\
+/* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/			\
+     1,   1,   1,   1,   1,   1,   1,   1,			\
+/*  r8,  r9, r10, r11, r12, r13, r14, r15*/			\
+     1,   1,   1,   1,   2,   2,   2,   2,			\
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
+     1,   1,    1,    1,    1,    1,    1,    1 }
+
+/* Order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  List frame pointer
+   late and fixed registers last.  Note that, in general, we prefer
+   registers listed in CALL_USED_REGISTERS, keeping the others
+   available for storage of persistent values.
+
+   The ADJUST_REG_ALLOC_ORDER actually overwrite the order,
+   so this is just empty initializer for array.  */
+
+#define REG_ALLOC_ORDER 					\
+{  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
+   18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,	\
+   33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,  \
+   48, 49, 50, 51, 52 }
+
+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
+   to be rearranged based on a particular function.  When using sse math,
+   we want to allocate SSE before x87 registers and vice versa.  */
+
+#define ADJUST_REG_ALLOC_ORDER x86_order_regs_for_local_alloc ()
+
+
+#define OVERRIDE_ABI_FORMAT(FNDECL) ix86_call_abi_override (FNDECL)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   Actually there are no two word move instructions for consecutive
+   registers.  And only registers 0-3 may have mov byte instructions
+   applied to them.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)	\
+   ? (COMPLEX_MODE_P (MODE) ? 2 : 1)					\
+   : ((MODE) == XFmode							\
+      ? (TARGET_64BIT ? 2 : 3)						\
+      : (MODE) == XCmode						\
+      ? (TARGET_64BIT ? 4 : 6)						\
+      : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
+
+#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE)			\
+  ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT)				\
+   ? (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)	\
+      ? 0								\
+      : ((MODE) == XFmode || (MODE) == XCmode))				\
+   : 0)
+
+#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
+
+#define VALID_AVX256_REG_MODE(MODE)					\
+  ((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode	\
+   || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
+#define VALID_SSE2_REG_MODE(MODE)					\
+  ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
+   || (MODE) == V2DImode || (MODE) == DFmode)
+
+#define VALID_SSE_REG_MODE(MODE)					\
+  ((MODE) == V1TImode || (MODE) == TImode				\
+   || (MODE) == V4SFmode || (MODE) == V4SImode				\
+   || (MODE) == SFmode || (MODE) == TFmode)
+
+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+  ((MODE) == V2SFmode || (MODE) == SFmode)
+
+#define VALID_MMX_REG_MODE(MODE)					\
+  ((MODE == V1DImode) || (MODE) == DImode				\
+   || (MODE) == V2SImode || (MODE) == SImode				\
+   || (MODE) == V4HImode || (MODE) == V8QImode)
+
+#define VALID_DFP_MODE_P(MODE) \
+  ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
+
+#define VALID_FP_MODE_P(MODE)						\
+  ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode		\
+   || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)		\
+
+#define VALID_INT_MODE_P(MODE)						\
+  ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode		\
+   || (MODE) == DImode							\
+   || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode	\
+   || (MODE) == CDImode							\
+   || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode		\
+			|| (MODE) == TFmode || (MODE) == TCmode)))
+
+/* Return true for modes passed in SSE registers.  */
+#define SSE_REG_MODE_P(MODE)						\
+  ((MODE) == V1TImode || (MODE) == TImode || (MODE) == V16QImode	\
+   || (MODE) == TFmode || (MODE) == V8HImode || (MODE) == V2DFmode	\
+   || (MODE) == V2DImode || (MODE) == V4SFmode || (MODE) == V4SImode	\
+   || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode	\
+   || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	\
+   ix86_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2)  ix86_modes_tieable_p (MODE1, MODE2)
+
+/* It is possible to write patterns to move flags; but until someone
+   does it,  */
+#define AVOID_CCMODE_COPIES
+
+/* Specify the modes required to caller save a given hard regno.
+   We do this on i386 to prevent flags from being saved at all.
+
+   Kill any attempts to combine saving of modes.  */
+
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)			\
+  (CC_REGNO_P (REGNO) ? VOIDmode					\
+   : (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode			\
+   : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false) \
+   : (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode		\
+   : (MODE) == QImode && (REGNO) > BX_REG && !TARGET_64BIT ? SImode 	\
+   : (MODE))
+
+/* The only ABI that saves SSE registers across calls is Win64 (thus no
+   need to check the current ABI here), and with AVX enabled Win64 only
+   guarantees that the low 16 bytes are saved.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)             \
+  (SSE_REGNO_P (REGNO) && GET_MODE_SIZE (MODE) > 16)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* on the 386 the pc register is %eip, and is not usable as a general
+   register.  The ordinary mov instructions won't work */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 7
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 6
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 20
+
+/* First floating point reg */
+#define FIRST_FLOAT_REG 8
+
+/* First & last stack-like regs */
+#define FIRST_STACK_REG FIRST_FLOAT_REG
+#define LAST_STACK_REG (FIRST_FLOAT_REG + 7)
+
+#define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1)
+#define LAST_SSE_REG  (FIRST_SSE_REG + 7)
+
+#define FIRST_MMX_REG  (LAST_SSE_REG + 1)
+#define LAST_MMX_REG   (FIRST_MMX_REG + 7)
+
+#define FIRST_REX_INT_REG  (LAST_MMX_REG + 1)
+#define LAST_REX_INT_REG   (FIRST_REX_INT_REG + 7)
+
+#define FIRST_REX_SSE_REG  (LAST_REX_INT_REG + 1)
+#define LAST_REX_SSE_REG   (FIRST_REX_SSE_REG + 7)
+
+/* Override this in other tm.h files to cope with various OS lossage
+   requiring a frame pointer.  */
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+/* Make sure we can access arbitrary call frames.  */
+#define SETUP_FRAME_ADDRESSES()  ix86_setup_frame_addresses ()
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 16
+
+/* Register to hold the addressing base for position independent
+   code access to data items.  We don't use PIC pointer for 64bit
+   mode.  Define the regnum to dummy value to prevent gcc from
+   pessimizing code dealing with EBX.
+
+   To avoid clobbering a call-saved register unnecessarily, we renumber
+   the pic register when possible.  The change is visible after the
+   prologue has been emitted.  */
+
+#define REAL_PIC_OFFSET_TABLE_REGNUM  BX_REG
+
+#define PIC_OFFSET_TABLE_REGNUM				\
+  ((TARGET_64BIT && ix86_cmodel == CM_SMALL_PIC)	\
+   || !flag_pic ? INVALID_REGNUM			\
+   : reload_completed ? REGNO (pic_offset_table_rtx)	\
+   : REAL_PIC_OFFSET_TABLE_REGNUM)
+
+#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
+
+/* This is overridden by <cygwin.h>.  */
+#define MS_AGGREGATE_RETURN 0
+
+/* This is overridden by <netware.h>.  */
+#define KEEP_AGGREGATE_RETURN_POINTER 0
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It might seem that class BREG is unnecessary, since no useful 386
+   opcode needs reg %ebx.  But some systems pass args to the OS in ebx,
+   and the "b" register constraint is useful in asms for syscalls.
+
+   The flags, fpsr and fpcr registers are in no class.  */
+
+enum reg_class
+{
+  NO_REGS,
+  AREG, DREG, CREG, BREG, SIREG, DIREG,
+  AD_REGS,			/* %eax/%edx for DImode */
+  CLOBBERED_REGS,		/* call-clobbered integers */
+  Q_REGS,			/* %eax %ebx %ecx %edx */
+  NON_Q_REGS,			/* %esi %edi %ebp %esp */
+  INDEX_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp */
+  LEGACY_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp %esp */
+  GENERAL_REGS,			/* %eax %ebx %ecx %edx %esi %edi %ebp %esp
+				   %r8 %r9 %r10 %r11 %r12 %r13 %r14 %r15 */
+  FP_TOP_REG, FP_SECOND_REG,	/* %st(0) %st(1) */
+  FLOAT_REGS,
+  SSE_FIRST_REG,
+  SSE_REGS,
+  MMX_REGS,
+  FP_TOP_SSE_REGS,
+  FP_SECOND_SSE_REGS,
+  FLOAT_SSE_REGS,
+  FLOAT_INT_REGS,
+  INT_SSE_REGS,
+  FLOAT_INT_SSE_REGS,
+  ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define INTEGER_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), GENERAL_REGS)
+#define FLOAT_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), FLOAT_REGS)
+#define SSE_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), SSE_REGS)
+#define MMX_CLASS_P(CLASS) \
+  ((CLASS) == MMX_REGS)
+#define MAYBE_INTEGER_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), GENERAL_REGS)
+#define MAYBE_FLOAT_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), FLOAT_REGS)
+#define MAYBE_SSE_CLASS_P(CLASS) \
+  reg_classes_intersect_p (SSE_REGS, (CLASS))
+#define MAYBE_MMX_CLASS_P(CLASS) \
+  reg_classes_intersect_p (MMX_REGS, (CLASS))
+
+#define Q_CLASS_P(CLASS) \
+  reg_class_subset_p ((CLASS), Q_REGS)
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+{  "NO_REGS",				\
+   "AREG", "DREG", "CREG", "BREG",	\
+   "SIREG", "DIREG",			\
+   "AD_REGS",				\
+   "CLOBBERED_REGS",			\
+   "Q_REGS", "NON_Q_REGS",		\
+   "INDEX_REGS",			\
+   "LEGACY_REGS",			\
+   "GENERAL_REGS",			\
+   "FP_TOP_REG", "FP_SECOND_REG",	\
+   "FLOAT_REGS",			\
+   "SSE_FIRST_REG",			\
+   "SSE_REGS",				\
+   "MMX_REGS",				\
+   "FP_TOP_SSE_REGS",			\
+   "FP_SECOND_SSE_REGS",		\
+   "FLOAT_SSE_REGS",			\
+   "FLOAT_INT_REGS",			\
+   "INT_SSE_REGS",			\
+   "FLOAT_INT_SSE_REGS",		\
+   "ALL_REGS" }
+
+/* Define which registers fit in which classes.  This is an initializer
+   for a vector of HARD_REG_SET of length N_REG_CLASSES.
+
+   Note that the default setting of CLOBBERED_REGS is for 32-bit; this
+   is adjusted by TARGET_CONDITIONAL_REGISTER_USAGE for the 64-bit ABI
+   in effect.  */
+
+#define REG_CLASS_CONTENTS						\
+{     { 0x00,     0x0 },						\
+      { 0x01,     0x0 }, { 0x02, 0x0 },	/* AREG, DREG */		\
+      { 0x04,     0x0 }, { 0x08, 0x0 },	/* CREG, BREG */		\
+      { 0x10,     0x0 }, { 0x20, 0x0 },	/* SIREG, DIREG */		\
+      { 0x03,     0x0 },		/* AD_REGS */			\
+      { 0x07,     0x0 },		/* CLOBBERED_REGS */		\
+      { 0x0f,     0x0 },		/* Q_REGS */			\
+  { 0x1100f0,  0x1fe0 },		/* NON_Q_REGS */		\
+      { 0x7f,  0x1fe0 },		/* INDEX_REGS */		\
+  { 0x1100ff,     0x0 },		/* LEGACY_REGS */		\
+  { 0x1100ff,  0x1fe0 },		/* GENERAL_REGS */		\
+     { 0x100,     0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
+    { 0xff00,     0x0 },		/* FLOAT_REGS */		\
+  { 0x200000,     0x0 },		/* SSE_FIRST_REG */		\
+{ 0x1fe00000,0x1fe000 },		/* SSE_REGS */			\
+{ 0xe0000000,    0x1f },		/* MMX_REGS */			\
+{ 0x1fe00100,0x1fe000 },		/* FP_TOP_SSE_REG */		\
+{ 0x1fe00200,0x1fe000 },		/* FP_SECOND_SSE_REG */		\
+{ 0x1fe0ff00,0x1fe000 },		/* FLOAT_SSE_REGS */		\
+   { 0x1ffff,  0x1fe0 },		/* FLOAT_INT_REGS */		\
+{ 0x1fe100ff,0x1fffe0 },		/* INT_SSE_REGS */		\
+{ 0x1fe1ffff,0x1fffe0 },		/* FLOAT_INT_SSE_REGS */	\
+{ 0xffffffff,0x1fffff }							\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO])
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+#define QI_REG_P(X) (REG_P (X) && REGNO (X) <= BX_REG)
+
+#define GENERAL_REGNO_P(N) \
+  ((N) <= STACK_POINTER_REGNUM || REX_INT_REGNO_P (N))
+
+#define GENERAL_REG_P(X) \
+  (REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+
+#define ANY_QI_REG_P(X) (TARGET_64BIT ? GENERAL_REG_P(X) : QI_REG_P (X))
+
+#define REX_INT_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_REX_INT_REG, LAST_REX_INT_REG)
+#define REX_INT_REG_P(X) (REG_P (X) && REX_INT_REGNO_P (REGNO (X)))
+
+#define FP_REG_P(X) (REG_P (X) && FP_REGNO_P (REGNO (X)))
+#define FP_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
+#define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X)))
+#define ANY_FP_REGNO_P(N) (FP_REGNO_P (N) || SSE_REGNO_P (N))
+
+#define X87_FLOAT_MODE_P(MODE)	\
+  (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
+
+#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
+#define SSE_REGNO_P(N)						\
+  (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG)			\
+   || REX_SSE_REGNO_P (N))
+
+#define REX_SSE_REGNO_P(N) \
+  IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
+
+#define SSE_REGNO(N) \
+  ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
+
+#define SSE_FLOAT_MODE_P(MODE) \
+  ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+
+#define SSE_VEC_FLOAT_MODE_P(MODE) \
+  ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+
+#define AVX_FLOAT_MODE_P(MODE) \
+  (TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode))
+
+#define AVX128_VEC_FLOAT_MODE_P(MODE) \
+  (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode))
+
+#define AVX256_VEC_FLOAT_MODE_P(MODE) \
+  (TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define AVX_VEC_FLOAT_MODE_P(MODE) \
+  (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \
+		  || (MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define FMA4_VEC_FLOAT_MODE_P(MODE) \
+  (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
+		  || (MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
+#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
+
+#define STACK_REG_P(XOP) (REG_P (XOP) && STACK_REGNO_P (REGNO (XOP)))
+#define STACK_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
+
+#define STACK_TOP_P(XOP) (REG_P (XOP) && REGNO (XOP) == FIRST_STACK_REG)
+
+#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define CC_REGNO_P(X) ((X) == FLAGS_REG || (X) == FPSR_REG)
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS INDEX_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Place additional restrictions on the register class to use when it
+   is necessary to be able to hold a value of mode MODE in a reload
+   register for which class CLASS would ordinarily be used.  */
+
+#define LIMIT_RELOAD_CLASS(MODE, CLASS) 			\
+  ((MODE) == QImode && !TARGET_64BIT				\
+   && ((CLASS) == ALL_REGS || (CLASS) == GENERAL_REGS		\
+       || (CLASS) == LEGACY_REGS || (CLASS) == INDEX_REGS)	\
+   ? Q_REGS : (CLASS))
+
+/* If we are copying between general and FP registers, we need a memory
+   location. The same is true for SSE and MMX registers.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+  ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1)
+
+/* Get_secondary_mem widens integral modes to BITS_PER_WORD.
+   There is no need to emit full 64 bit move on 64 bit targets
+   for integral modes that can be moved using 32 bit move.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)			\
+  (GET_MODE_BITSIZE (MODE) < 32 && INTEGRAL_MODE_P (MODE)	\
+   ? mode_for_size (32, GET_MODE_CLASS (MODE), 0)		\
+   : MODE)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+/* On the 80386, this is the size of MODE in words,
+   except in the FP regs, where a single reg is always enough.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  (MAYBE_INTEGER_CLASS_P (CLASS)					\
+   ? ((MODE) == XFmode							\
+      ? (TARGET_64BIT ? 2 : 3)						\
+      : (MODE) == XCmode						\
+      ? (TARGET_64BIT ? 4 : 6)						\
+      : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))	\
+   : (COMPLEX_MODE_P (MODE) ? 2 : 1))
+
+/* Return a class of registers that cannot change FROM mode to TO mode.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  ix86_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes, this says how many the stack
+   pointer really advances by.  On 386, we have pushw instruction that
+   decrements by exactly 2 no matter what the position was, there is no pushb.
+
+   But as CIE data alignment factor on this arch is -4 for 32bit targets
+   and -8 for 64bit targets, we need to make sure all stack pointer adjustments
+   are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
+
+#define PUSH_ROUNDING(BYTES) \
+  (((BYTES) + UNITS_PER_WORD - 1) & -UNITS_PER_WORD)
+
+/* If defined, the maximum amount of space required for outgoing arguments
+   will be computed and placed into the variable `crtl->outgoing_args_size'.
+   No space will be pushed onto the stack for each call; instead, the
+   function prologue should increase the stack frame size by this amount.  
+   
+   MS ABI seem to require 16 byte alignment everywhere except for function
+   prologue and apilogue.  This is not possible without
+   ACCUMULATE_OUTGOING_ARGS.  */
+
+#define ACCUMULATE_OUTGOING_ARGS \
+  (TARGET_ACCUMULATE_OUTGOING_ARGS || ix86_cfun_abi () == MS_ABI)
+
+/* If defined, a C expression whose value is nonzero when we want to use PUSH
+   instructions to pass outgoing arguments.  */
+
+#define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS)
+
+/* We want the stack and args grow in opposite directions, even if
+   PUSH_ARGS is 0.  */
+#define PUSH_ARGS_REVERSED 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define this macro if functions should assume that stack space has been
+   allocated for arguments even when their values are passed in registers.
+
+   The value of this macro is the size, in bytes, of the area reserved for
+   arguments passed in registers for the function represented by FNDECL.
+
+   This space can be allocated by the caller, or be a part of the
+   machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' says
+   which.  */
+#define REG_PARM_STACK_SPACE(FNDECL) ix86_reg_parm_stack_space (FNDECL)
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) \
+  (ix86_function_type_abi (FNTYPE) == MS_ABI)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) ix86_libcall_value (MODE)
+
+/* Define the size of the result block used for communication between
+   untyped_call and untyped_return.  The block contains a DImode value
+   followed by the block used by fnsave and frstor.  */
+
+#define APPLY_RESULT_SIZE (8+108)
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N)
+
+#ifndef USED_FOR_TARGET
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+
+typedef struct ix86_args {
+  int words;			/* # words passed so far */
+  int nregs;			/* # registers available for passing */
+  int regno;			/* next available register number */
+  int fastcall;			/* fastcall or thiscall calling convention
+				   is used */
+  int sse_words;		/* # sse words passed so far */
+  int sse_nregs;		/* # sse registers available for passing */
+  int warn_avx;			/* True when we want to warn about AVX ABI.  */
+  int warn_sse;			/* True when we want to warn about SSE ABI.  */
+  int warn_mmx;			/* True when we want to warn about MMX ABI.  */
+  int sse_regno;		/* next available sse register number */
+  int mmx_words;		/* # mmx words passed so far */
+  int mmx_nregs;		/* # mmx registers available for passing */
+  int mmx_regno;		/* next available mmx register number */
+  int maybe_vaarg;		/* true for calls to possibly vardic fncts.  */
+  int caller;			/* true if it is caller.  */
+  int float_in_sse;		/* Set to 1 or 2 for 32bit targets if
+				   SFmode/DFmode arguments should be passed
+				   in SSE registers.  Otherwise 0.  */
+  enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
+ 				   MS_ABI for ms abi.  */
+  /* Nonzero if it passes 256bit AVX modes.  */
+  BOOL_BITFIELD callee_pass_avx256_p : 1;
+  /* Nonzero if it returns 256bit AVX modes.  */
+  BOOL_BITFIELD callee_return_avx256_p : 1;
+} CUMULATIVE_ARGS;
+#endif
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL), \
+			(N_NAMED_ARGS) != -1)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) x86_function_profiler (FILE, LABELNO)
+
+#define MCOUNT_NAME "_mcount"
+
+#define MCOUNT_NAME_BEFORE_PROLOGUE "__fentry__"
+
+#define PROFILE_COUNT_REGISTER "edx"
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+/* Note on the 386 it might be more efficient not to define this since
+   we have to restore it ourselves from the frame pointer, in order to
+   use pop */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.  */
+
+/* On the 386, the trampoline contains two instructions:
+     mov #STATIC,ecx
+     jmp FUNCTION
+   The trampoline is generated entirely at runtime.  The operand of JMP
+   is the address of FUNCTION relative to the instruction following the
+   JMP (which is 5 bytes long).  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 24 : 10)
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   There are two registers that can always be eliminated on the i386.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}	\
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = ix86_initial_elimination_offset ((FROM), (TO)))
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 					\
+  ((REGNO) < STACK_POINTER_REGNUM 					\
+   || REX_INT_REGNO_P (REGNO)						\
+   || (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM		\
+   || REX_INT_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
+
+#define REGNO_OK_FOR_BASE_P(REGNO) 					\
+  (GENERAL_REGNO_P (REGNO)						\
+   || (REGNO) == ARG_POINTER_REGNUM 					\
+   || (REGNO) == FRAME_POINTER_REGNUM 					\
+   || GENERAL_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+
+/* Non strict versions, pseudos are ok.  */
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X)					\
+  (REGNO (X) < STACK_POINTER_REGNUM					\
+   || REX_INT_REGNO_P (REGNO (X))					\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+#define REG_OK_FOR_BASE_NONSTRICT_P(X)					\
+  (GENERAL_REGNO_P (REGNO (X))						\
+   || REGNO (X) == ARG_POINTER_REGNUM					\
+   || REGNO (X) == FRAME_POINTER_REGNUM 				\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Strict versions, hard registers only */
+#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#define REG_OK_FOR_BASE_STRICT_P(X)  REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifndef REG_OK_STRICT
+#define REG_OK_FOR_INDEX_P(X)  REG_OK_FOR_INDEX_NONSTRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)   REG_OK_FOR_BASE_NONSTRICT_P (X)
+
+#else
+#define REG_OK_FOR_INDEX_P(X)  REG_OK_FOR_INDEX_STRICT_P (X)
+#define REG_OK_FOR_BASE_P(X)   REG_OK_FOR_BASE_STRICT_P (X)
+#endif
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in TARGET_LEGITIMATE_ADDRESS_P,
+   except for CONSTANT_ADDRESS_P which is usually machine-independent.
+
+   See legitimize_pic_address in i386.c for details as to what
+   constitutes a legitimate address when -fpic is used.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define CONSTANT_ADDRESS_P(X)  constant_address_p (X)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_CONSTANT_P(X)  legitimate_constant_p (X)
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, INDL, WIN)	\
+do {									\
+  if (ix86_legitimize_reload_address ((X), (MODE), (OPNUM),		\
+				      (int)(TYPE), (INDL)))		\
+    goto WIN;								\
+} while (0)
+
+/* If defined, a C expression to determine the base term of address X.
+   This macro is used in only one place: `find_base_term' in alias.c.
+
+   It is always safe for this macro to not be defined.  It exists so
+   that alias analysis can understand machine-dependent addresses.
+
+   The typical use of this macro is to handle addresses containing
+   a label_ref or symbol_ref within an UNSPEC.  */
+
+#define FIND_BASE_TERM(X) ix86_find_base_term (X)
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+
+#define SYMBOLIC_CONST(X)	\
+  (GET_CODE (X) == SYMBOL_REF						\
+   || GET_CODE (X) == LABEL_REF						\
+   || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+/* Max number of args passed in registers.  If this is more than 3, we will
+   have problems with ebx (register #4), since it is a caller save register and
+   is also used as the pic register in ELF.  So for now, don't allow more than
+   3 registers to be passed in registers.  */
+
+/* Abi specific values for REGPARM_MAX and SSE_REGPARM_MAX */
+#define X86_64_REGPARM_MAX 6
+#define X86_64_MS_REGPARM_MAX 4
+
+#define X86_32_REGPARM_MAX 3
+
+#define REGPARM_MAX							\
+  (TARGET_64BIT								\
+   ? (TARGET_64BIT_MS_ABI						\
+      ? X86_64_MS_REGPARM_MAX						\
+      : X86_64_REGPARM_MAX)						\
+   : X86_32_REGPARM_MAX)
+
+#define X86_64_SSE_REGPARM_MAX 8
+#define X86_64_MS_SSE_REGPARM_MAX 4
+
+#define X86_32_SSE_REGPARM_MAX (TARGET_SSE ? (TARGET_MACHO ? 4 : 3) : 0)
+
+#define SSE_REGPARM_MAX							\
+  (TARGET_64BIT								\
+   ? (TARGET_64BIT_MS_ABI						\
+      ? X86_64_MS_SSE_REGPARM_MAX					\
+      : X86_64_SSE_REGPARM_MAX)						\
+   : X86_32_SSE_REGPARM_MAX)
+
+#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE \
+ (!TARGET_64BIT || (flag_pic && ix86_cmodel != CM_LARGE_PIC) ? SImode : DImode)
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 16
+
+/* MOVE_MAX_PIECES is the number of bytes at a time which we can
+   move efficiently, as opposed to  MOVE_MAX which is the maximum
+   number of bytes we can move with a single instruction.  */
+#define MOVE_MAX_PIECES UNITS_PER_WORD
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.
+   Increasing the value will always make code faster, but eventually
+   incurs high cost in increased code size.
+
+   If you don't define this, a reasonable default is used.  */
+
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
+
+/* If a clear memory operation would take CLEAR_RATIO or more simple
+   move-instruction sequences, we will do a clrmem or libcall instead.  */
+
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
+
+/* Define if shifts truncate the shift count which implies one can
+   omit a sign-extension or zero-extension of a shift count.
+
+   On i386, shifts do truncate the count.  But bit test instructions
+   take the modulo of the bit offset operand.  */
+
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A macro to update M and UNSIGNEDP when an object whose type is
+   TYPE and which has the specified mode and signedness is to be
+   stored in a register.  This macro is only called when TYPE is a
+   scalar type.
+
+   On i386 it is sometimes useful to promote HImode and QImode
+   quantities to SImode.  The choice depends on target type.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) 		\
+do {							\
+  if (((MODE) == HImode && TARGET_PROMOTE_HI_REGS)	\
+      || ((MODE) == QImode && TARGET_PROMOTE_QI_REGS))	\
+    (MODE) = SImode;					\
+} while (0)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode (TARGET_64BIT ? DImode : SImode)
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+
+#define BRANCH_COST(speed_p, predictable_p) \
+  (!(speed_p) ? 2 : (predictable_p) ? 0 : ix86_branch_cost)
+
+/* Define this macro as a C expression which is nonzero if accessing
+   less than a word of memory (i.e. a `char' or a `short') is no
+   faster than accessing a word of memory, i.e., if such access
+   require more than one instruction or if there is no difference in
+   cost between byte and (aligned) word loads.
+
+   When this macro is not defined, the compiler will access a field by
+   finding the smallest containing object; when it is defined, a
+   fullword load will be used if alignment permits.  Unless bytes
+   accesses are faster than word accesses, using word accesses is
+   preferable since it may eliminate subsequent memory access if
+   subsequent accesses occur to other fields in the same word of the
+   structure, but to different bytes.  */
+
+#define SLOW_BYTE_ACCESS 0
+
+/* Nonzero if access to memory by shorts is slow and undesirable.  */
+#define SLOW_SHORT_ACCESS 0
+
+/* Define this macro to be the value 1 if unaligned accesses have a
+   cost many times greater than aligned accesses, for example if they
+   are emulated in a trap handler.
+
+   When this macro is nonzero, the compiler will act as if
+   `STRICT_ALIGNMENT' were nonzero when generating code for block
+   moves.  This can cause significantly more instructions to be
+   produced.  Therefore, do not set this macro nonzero if unaligned
+   accesses only add a cycle or two to the time for a memory access.
+
+   If the value of this macro is always zero, it need not be defined.  */
+
+/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.
+
+   Desirable on the 386 because a CALL with a constant address is
+   faster than one with a register address.  */
+
+#define NO_FUNCTION_CSE
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.
+
+   For floating-point equality comparisons, CCFPEQmode should be used.
+   VOIDmode should be used in all other cases.
+
+   For integer comparisons against zero, reduce to CCNOmode or CCZmode if
+   possible, to allow for more combinations.  */
+
+#define SELECT_CC_MODE(OP, X, Y) ix86_cc_mode ((OP), (X), (Y))
+
+/* Return nonzero if MODE implies a floating point inequality can be
+   reversed.  */
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* A C expression whose value is reversed condition code of the CODE for
+   comparison done in CC_MODE mode.  */
+#define REVERSE_CONDITION(CODE, MODE) ix86_reverse_condition ((CODE), (MODE))
+
+
+/* Control the assembler format that we output, to the extent
+   this does not vary between assemblers.  */
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+/* In order to refer to the first 8 regs as 32-bit regs, prefix an "e".
+   For non floating point regs, the following are the HImode names.
+
+   For float regs, the stack top is sometimes referred to as "%st(0)"
+   instead of just "%st".  TARGET_PRINT_OPERAND handles this with the
+   "y" code.  */
+
+#define HI_REGISTER_NAMES						\
+{"ax","dx","cx","bx","si","di","bp","sp",				\
+ "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)",		\
+ "argp", "flags", "fpsr", "fpcr", "frame",				\
+ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",		\
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",		\
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",			\
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
+
+#define REGISTER_NAMES HI_REGISTER_NAMES
+
+/* Table of additional register names to use in user input.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{ { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 },	\
+  { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 },	\
+  { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 },	\
+  { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 },	\
+  { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 },		\
+  { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 } }
+
+/* Note we are omitting these since currently I don't know how
+to get gcc to use these, since they want the same but different
+number as al, and ax.
+*/
+
+#define QI_REGISTER_NAMES \
+{"al", "dl", "cl", "bl", "sil", "dil", "bpl", "spl",}
+
+/* These parallel the array above, and can be used to access bits 8:15
+   of regs 0 through 3.  */
+
+#define QI_HIGH_REGISTER_NAMES \
+{"ah", "dh", "ch", "bh", }
+
+/* How to renumber registers for dbx and gdb.  */
+
+#define DBX_REGISTER_NUMBER(N) \
+  (TARGET_64BIT ? dbx64_register_map[(N)] : dbx_register_map[(N)])
+
+extern int const dbx_register_map[FIRST_PSEUDO_REGISTER];
+extern int const dbx64_register_map[FIRST_PSEUDO_REGISTER];
+extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
+
+/* Before the prologue, RA is at 0(%esp).  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* After the prologue, RA is at -4(AP) in the current frame.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)					   \
+  ((COUNT) == 0								   \
+   ? gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, -UNITS_PER_WORD)) \
+   : gen_rtx_MEM (Pmode, plus_constant (FRAME, UNITS_PER_WORD)))
+
+/* PC is dbx register 8; let's use that column for RA.  */
+#define DWARF_FRAME_RETURN_COLUMN 	(TARGET_64BIT ? 16 : 8)
+
+/* Before the prologue, the top of the frame is at 4(%esp).  */
+#define INCOMING_FRAME_SP_OFFSET UNITS_PER_WORD
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) <= DX_REG ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, CX_REG)
+
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   ??? All x86 object file formats are capable of representing this.
+   After all, the relocation needed is the same as for the call insn.
+   Whether or not a particular assembler allows us to enter such, I
+   guess we'll have to see.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)       		\
+  asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO)  \
+do {									\
+  if (TARGET_64BIT)							\
+    asm_fprintf ((FILE), "\tpush{q}\t%%r%s\n",				\
+		 reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0));	\
+  else									\
+    asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]);	\
+} while (0)
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO)  \
+do {									\
+  if (TARGET_64BIT)							\
+    asm_fprintf ((FILE), "\tpop{q}\t%%r%s\n",				\
+		 reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0));	\
+  else									\
+    asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]);	\
+} while (0)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  ix86_output_addr_vec_elt ((FILE), (VALUE))
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
+
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is true.  */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR)	\
+{						\
+  if ((PTR)[0] == '%' && (PTR)[1] == 'v')	\
+    (PTR) += TARGET_AVX ? 1 : 2;		\
+}
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in
+   its "internal" form--the form that is written in the machine
+   description.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to pad the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#undef  ASM_OUTPUT_MAX_SKIP_PAD
+#define ASM_OUTPUT_MAX_SKIP_PAD(FILE, LOG, MAX_SKIP)			\
+  if ((LOG) != 0)							\
+    {									\
+      if ((MAX_SKIP) == 0)						\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
+        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+    }
+#endif
+
+/* Write the extra assembler code needed to declare a function
+   properly.  */
+
+#undef ASM_OUTPUT_FUNCTION_LABEL
+#define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
+  ix86_asm_output_function_label (FILE, NAME, DECL)
+
+/* Under some conditions we need jump tables in the text section,
+   because the assembler cannot handle label differences between
+   sections.  This is the case for x86_64 on Mach-O for example.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION \
+  (flag_pic && ((TARGET_MACHO && TARGET_64BIT) \
+   || (!TARGET_64BIT && !HAVE_AS_GOTOFF_IN_DATA)))
+
+/* Switch to init or fini section via SECTION_OP, emit a call to FUNC,
+   and switch back.  For x86 we do this only to save a few bytes that
+   would otherwise be unused in the text section.  */
+#define CRT_MKSTR2(VAL) #VAL
+#define CRT_MKSTR(x) CRT_MKSTR2(x)
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)		\
+   asm (SECTION_OP "\n\t"					\
+	"call " CRT_MKSTR(__USER_LABEL_PREFIX__) #FUNC "\n"	\
+	TEXT_SECTION_ASM_OP);
+
+/* Which processor to tune code generation for.  */
+
+enum processor_type
+{
+  PROCESSOR_I386 = 0,			/* 80386 */
+  PROCESSOR_I486,			/* 80486DX, 80486SX, 80486DX[24] */
+  PROCESSOR_PENTIUM,
+  PROCESSOR_PENTIUMPRO,
+  PROCESSOR_GEODE,
+  PROCESSOR_K6,
+  PROCESSOR_ATHLON,
+  PROCESSOR_PENTIUM4,
+  PROCESSOR_K8,
+  PROCESSOR_NOCONA,
+  PROCESSOR_CORE2_32,
+  PROCESSOR_CORE2_64,
+  PROCESSOR_COREI7_32,
+  PROCESSOR_COREI7_64,
+  PROCESSOR_GENERIC32,
+  PROCESSOR_GENERIC64,
+  PROCESSOR_AMDFAM10,
+  PROCESSOR_BDVER1,
+  PROCESSOR_BTVER1,
+  PROCESSOR_ATOM,
+  PROCESSOR_max
+};
+
+extern enum processor_type ix86_tune;
+extern enum processor_type ix86_arch;
+
+enum fpmath_unit
+{
+  FPMATH_387 = 1,
+  FPMATH_SSE = 2
+};
+
+extern enum fpmath_unit ix86_fpmath;
+
+enum tls_dialect
+{
+  TLS_DIALECT_GNU,
+  TLS_DIALECT_GNU2,
+  TLS_DIALECT_SUN
+};
+
+extern enum tls_dialect ix86_tls_dialect;
+
+enum cmodel {
+  CM_32,	/* The traditional 32-bit ABI.  */
+  CM_SMALL,	/* Assumes all code and data fits in the low 31 bits.  */
+  CM_KERNEL,	/* Assumes all code and data fits in the high 31 bits.  */
+  CM_MEDIUM,	/* Assumes code fits in the low 31 bits; data unlimited.  */
+  CM_LARGE,	/* No assumptions.  */
+  CM_SMALL_PIC,	/* Assumes code+data+got/plt fits in a 31 bit region.  */
+  CM_MEDIUM_PIC,/* Assumes code+got/plt fits in a 31 bit region.  */
+  CM_LARGE_PIC	/* No assumptions.  */
+};
+
+extern enum cmodel ix86_cmodel;
+
+/* Size of the RED_ZONE area.  */
+#define RED_ZONE_SIZE 128
+/* Reserved area of the red zone for temporaries.  */
+#define RED_ZONE_RESERVE 8
+
+enum asm_dialect {
+  ASM_ATT,
+  ASM_INTEL
+};
+
+extern enum asm_dialect ix86_asm_dialect;
+extern unsigned int ix86_preferred_stack_boundary;
+extern unsigned int ix86_incoming_stack_boundary;
+extern int ix86_branch_cost, ix86_section_threshold;
+
+/* Smallest class containing REGNO.  */
+extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER];
+
+enum ix86_fpcmp_strategy {
+  IX86_FPCMP_SAHF,
+  IX86_FPCMP_COMI,
+  IX86_FPCMP_ARITH
+};
+
+/* To properly truncate FP values into integers, we need to set i387 control
+   word.  We can't emit proper mode switching code before reload, as spills
+   generated by reload may truncate values incorrectly, but we still can avoid
+   redundant computation of new control word by the mode switching pass.
+   The fldcw instructions are still emitted redundantly, but this is probably
+   not going to be noticeable problem, as most CPUs do have fast path for
+   the sequence.
+
+   The machinery is to emit simple truncation instructions and split them
+   before reload to instructions having USEs of two memory locations that
+   are filled by this code to old and new control word.
+
+   Post-reload pass may be later used to eliminate the redundant fildcw if
+   needed.  */
+
+enum ix86_entity
+{
+  I387_TRUNC = 0,
+  I387_FLOOR,
+  I387_CEIL,
+  I387_MASK_PM,
+  MAX_386_ENTITIES
+};
+
+enum ix86_stack_slot
+{
+  SLOT_VIRTUAL = 0,
+  SLOT_TEMP,
+  SLOT_CW_STORED,
+  SLOT_CW_TRUNC,
+  SLOT_CW_FLOOR,
+  SLOT_CW_CEIL,
+  SLOT_CW_MASK_PM,
+  MAX_386_STACK_LOCALS
+};
+
+/* Define this macro if the port needs extra instructions inserted
+   for mode switching in an optimizing compilation.  */
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \
+   ix86_optimize_mode_switching[(ENTITY)]
+
+/* If you define `OPTIMIZE_MODE_SWITCHING', you have to define this as
+   initializer for an array of integers.  Each initializer element N
+   refers to an entity that needs mode switching, and specifies the
+   number of different modes that might need to be set for this
+   entity.  The position of the initializer in the initializer -
+   starting counting at zero - determines the integer that is used to
+   refer to the mode-switched entity in question.  */
+
+#define NUM_MODES_FOR_MODE_SWITCHING \
+   { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+
+/* ENTITY is an integer specifying a mode-switched entity.  If
+   `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
+   return an integer value not larger than the corresponding element
+   in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
+   must be switched into prior to the execution of INSN. */
+
+#define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
+
+/* This macro specifies the order in which modes for ENTITY are
+   processed.  0 is the highest priority.  */
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) (N)
+
+/* Generate one or more insns to set ENTITY to MODE.  HARD_REG_LIVE
+   is the set of hard registers live at the point where the insn(s)
+   are to be inserted.  */
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) 			\
+  ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED		\
+   ? emit_i387_cw_initialization (MODE), 0				\
+   : 0)
+
+
+/* Avoid renaming of stack registers, as doing so in combination with
+   scheduling just increases amount of live registers at time and in
+   the turn amount of fxch instructions needed.
+
+   ??? Maybe Pentium chips benefits from renaming, someone can try....  */
+
+#define HARD_REGNO_RENAME_OK(SRC, TARGET)  \
+  (! IN_RANGE ((SRC), FIRST_STACK_REG, LAST_STACK_REG))
+
+
+#define FASTCALL_PREFIX '@'
+
+/* Machine specific frame tracking during prologue/epilogue generation.  */
+
+#ifndef USED_FOR_TARGET
+struct GTY(()) machine_frame_state
+{
+  /* This pair tracks the currently active CFA as reg+offset.  When reg
+     is drap_reg, we don't bother trying to record here the real CFA when
+     it might really be a DW_CFA_def_cfa_expression.  */
+  rtx cfa_reg;
+  HOST_WIDE_INT cfa_offset;
+
+  /* The current offset (canonically from the CFA) of ESP and EBP.
+     When stack frame re-alignment is active, these may not be relative
+     to the CFA.  However, in all cases they are relative to the offsets
+     of the saved registers stored in ix86_frame.  */
+  HOST_WIDE_INT sp_offset;
+  HOST_WIDE_INT fp_offset;
+
+  /* The size of the red-zone that may be assumed for the purposes of
+     eliding register restore notes in the epilogue.  This may be zero
+     if no red-zone is in effect, or may be reduced from the real
+     red-zone value by a maximum runtime stack re-alignment value.  */
+  int red_zone_offset;
+
+  /* Indicate whether each of ESP, EBP or DRAP currently holds a valid
+     value within the frame.  If false then the offset above should be
+     ignored.  Note that DRAP, if valid, *always* points to the CFA and
+     thus has an offset of zero.  */
+  BOOL_BITFIELD sp_valid : 1;
+  BOOL_BITFIELD fp_valid : 1;
+  BOOL_BITFIELD drap_valid : 1;
+
+  /* Indicate whether the local stack frame has been re-aligned.  When
+     set, the SP/FP offsets above are relative to the aligned frame
+     and not the CFA.  */
+  BOOL_BITFIELD realigned : 1;
+};
+
+/* Private to winnt.c.  */
+struct seh_frame_state;
+
+struct GTY(()) machine_function {
+  struct stack_local_entry *stack_locals;
+  const char *some_ld_name;
+  int varargs_gpr_size;
+  int varargs_fpr_size;
+  int optimize_mode_switching[MAX_386_ENTITIES];
+
+  /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE
+     has been computed for.  */
+  int use_fast_prologue_epilogue_nregs;
+
+  /* For -fsplit-stack support: A stack local which holds a pointer to
+     the stack arguments for a function with a variable number of
+     arguments.  This is set at the start of the function and is used
+     to initialize the overflow_arg_area field of the va_list
+     structure.  */
+  rtx split_stack_varargs_pointer;
+
+  /* This value is used for amd64 targets and specifies the current abi
+     to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
+  ENUM_BITFIELD(calling_abi) call_abi : 8;
+
+  /* Nonzero if the function accesses a previous frame.  */
+  BOOL_BITFIELD accesses_prev_frame : 1;
+
+  /* Nonzero if the function requires a CLD in the prologue.  */
+  BOOL_BITFIELD needs_cld : 1;
+
+  /* Set by ix86_compute_frame_layout and used by prologue/epilogue
+     expander to determine the style used.  */
+  BOOL_BITFIELD use_fast_prologue_epilogue : 1;
+
+  /* If true, the current function needs the default PIC register, not
+     an alternate register (on x86) and must not use the red zone (on
+     x86_64), even if it's a leaf function.  We don't want the
+     function to be regarded as non-leaf because TLS calls need not
+     affect register allocation.  This flag is set when a TLS call
+     instruction is expanded within a function, and never reset, even
+     if all such instructions are optimized away.  Use the
+     ix86_current_function_calls_tls_descriptor macro for a better
+     approximation.  */
+  BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
+
+  /* If true, the current function has a STATIC_CHAIN is placed on the
+     stack below the return address.  */
+  BOOL_BITFIELD static_chain_on_stack : 1;
+
+  /* Nonzero if caller passes 256bit AVX modes.  */
+  BOOL_BITFIELD caller_pass_avx256_p : 1;
+
+  /* Nonzero if caller returns 256bit AVX modes.  */
+  BOOL_BITFIELD caller_return_avx256_p : 1;
+
+  /* Nonzero if the current callee passes 256bit AVX modes.  */
+  BOOL_BITFIELD callee_pass_avx256_p : 1;
+
+  /* Nonzero if the current callee returns 256bit AVX modes.  */
+  BOOL_BITFIELD callee_return_avx256_p : 1;
+
+  /* Nonzero if rescan vzerouppers in the current function is needed.  */
+  BOOL_BITFIELD rescan_vzeroupper_p : 1;
+
+  /* During prologue/epilogue generation, the current frame state.
+     Otherwise, the frame state at the end of the prologue.  */
+  struct machine_frame_state fs;
+
+  /* During SEH output, this is non-null.  */
+  struct seh_frame_state * GTY((skip(""))) seh;
+};
+#endif
+
+#define ix86_stack_locals (cfun->machine->stack_locals)
+#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
+#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size)
+#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
+#define ix86_tls_descriptor_calls_expanded_in_cfun \
+  (cfun->machine->tls_descriptor_call_expanded_p)
+/* Since tls_descriptor_call_expanded is not cleared, even if all TLS
+   calls are optimized away, we try to detect cases in which it was
+   optimized away.  Since such instructions (use (reg REG_SP)), we can
+   verify whether there's any such instruction live by testing that
+   REG_SP is live.  */
+#define ix86_current_function_calls_tls_descriptor \
+  (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
+#define ix86_static_chain_on_stack (cfun->machine->static_chain_on_stack)
+
+/* Control behavior of x86_file_start.  */
+#define X86_FILE_START_VERSION_DIRECTIVE false
+#define X86_FILE_START_FLTUSED false
+
+/* Flag to mark data that is in the large address area.  */
+#define SYMBOL_FLAG_FAR_ADDR		(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_FAR_ADDR_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_FAR_ADDR) != 0)
+
+/* Flags to mark dllimport/dllexport.  Used by PE ports, but handy to
+   have defined always, to avoid ifdefing.  */
+#define SYMBOL_FLAG_DLLIMPORT		(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_DLLIMPORT_P(X) \
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLIMPORT) != 0)
+
+#define SYMBOL_FLAG_DLLEXPORT		(SYMBOL_FLAG_MACH_DEP << 2)
+#define SYMBOL_REF_DLLEXPORT_P(X) \
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
+
+extern void debug_ready_dispatch (void);
+extern void debug_dispatch_window (int);
+
+/* The value at zero is only defined for the BMI instructions
+   LZCNT and TZCNT, not the BSR/BSF insns in the original isa.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI)
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+	((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI)
+
+
+/*
+Local variables:
+version-control: t
+End:
+*/
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
new file mode 100644
index 000000000..3a27ca46c
--- /dev/null
+++ b/gcc/config/i386/i386.md
@@ -0,0 +1,18347 @@
+;; GCC machine description for IA-32 and x86-64.
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;; Mostly by William Schelter.
+;; x86_64 support added by Jan Hubicka
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+;;
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+;;
+;; The special asm out single letter directives following a '%' are:
+;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+;; C -- print opcode suffix for set/cmov insn.
+;; c -- like C, but print reversed condition
+;; F,f -- likewise, but for floating-point.
+;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+;;      otherwise nothing
+;; R -- print the prefix for register names.
+;; z -- print the opcode suffix for the size of the current operand.
+;; Z -- likewise, with special suffixes for x87 instructions.
+;; * -- print a star (in certain assembler syntax)
+;; A -- print an absolute memory reference.
+;; w -- print the operand as if it's a "word" (HImode) even if it isn't.
+;; s -- print a shift double count, followed by the assemblers argument
+;;	delimiter.
+;; b -- print the QImode name of the register for the indicated operand.
+;;	%b0 would print %al if operands[0] is reg 0.
+;; w --  likewise, print the HImode name of the register.
+;; k --  likewise, print the SImode name of the register.
+;; q --  likewise, print the DImode name of the register.
+;; x --  likewise, print the V4SFmode name of the register.
+;; t --  likewise, print the V8SFmode name of the register.
+;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+;; y -- print "st(0)" instead of "st" as a register.
+;; d -- print duplicated register operand for AVX instruction.
+;; D -- print condition for SSE cmp instruction.
+;; P -- if PIC, print an @PLT suffix.
+;; X -- don't print any sort of PIC '@' suffix for a symbol.
+;; & -- print some in-use local-dynamic symbol name.
+;; H -- print a memory address offset by 8; used for sse high-parts
+;; Y -- print condition for XOP pcom* instruction.
+;; + -- print a branch hint as 'cs' or 'ds' prefix
+;; ; -- print a semicolon (after prefixes due to bug in older gas).
+;; @ -- print a segment register of thread base pointer load
+
+;; UNSPEC usage:
+
+(define_c_enum "unspec" [
+  ;; Relocation specifiers
+  UNSPEC_GOT
+  UNSPEC_GOTOFF
+  UNSPEC_GOTPCREL
+  UNSPEC_GOTTPOFF
+  UNSPEC_TPOFF
+  UNSPEC_NTPOFF
+  UNSPEC_DTPOFF
+  UNSPEC_GOTNTPOFF
+  UNSPEC_INDNTPOFF
+  UNSPEC_PLTOFF
+  UNSPEC_MACHOPIC_OFFSET
+  UNSPEC_PCREL
+
+  ;; Prologue support
+  UNSPEC_STACK_ALLOC
+  UNSPEC_SET_GOT
+  UNSPEC_REG_SAVE
+  UNSPEC_DEF_CFA
+  UNSPEC_SET_RIP
+  UNSPEC_SET_GOT_OFFSET
+  UNSPEC_MEMORY_BLOCKAGE
+  UNSPEC_STACK_CHECK
+
+  ;; TLS support
+  UNSPEC_TP
+  UNSPEC_TLS_GD
+  UNSPEC_TLS_LD_BASE
+  UNSPEC_TLSDESC
+  UNSPEC_TLS_IE_SUN
+
+  ;; Other random patterns
+  UNSPEC_SCAS
+  UNSPEC_FNSTSW
+  UNSPEC_SAHF
+  UNSPEC_PARITY
+  UNSPEC_FSTCW
+  UNSPEC_ADD_CARRY
+  UNSPEC_FLDCW
+  UNSPEC_REP
+  UNSPEC_LD_MPIC	; load_macho_picbase
+  UNSPEC_TRUNC_NOOP
+  UNSPEC_DIV_ALREADY_SPLIT
+  UNSPEC_CALL_NEEDS_VZEROUPPER
+
+  ;; For SSE/MMX support:
+  UNSPEC_FIX_NOTRUNC
+  UNSPEC_MASKMOV
+  UNSPEC_MOVMSK
+  UNSPEC_MOVNT
+  UNSPEC_MOVU
+  UNSPEC_RCP
+  UNSPEC_RSQRT
+  UNSPEC_SFENCE
+  UNSPEC_PFRCP
+  UNSPEC_PFRCPIT1
+  UNSPEC_PFRCPIT2
+  UNSPEC_PFRSQRT
+  UNSPEC_PFRSQIT1
+  UNSPEC_MFENCE
+  UNSPEC_LFENCE
+  UNSPEC_PSADBW
+  UNSPEC_LDDQU
+  UNSPEC_MS_TO_SYSV_CALL
+
+  ;; Generic math support
+  UNSPEC_COPYSIGN
+  UNSPEC_IEEE_MIN	; not commutative
+  UNSPEC_IEEE_MAX	; not commutative
+
+  ;; x87 Floating point
+  UNSPEC_SIN
+  UNSPEC_COS
+  UNSPEC_FPATAN
+  UNSPEC_FYL2X
+  UNSPEC_FYL2XP1
+  UNSPEC_FRNDINT
+  UNSPEC_FIST
+  UNSPEC_F2XM1
+  UNSPEC_TAN
+  UNSPEC_FXAM
+
+  ;; x87 Rounding
+  UNSPEC_FRNDINT_FLOOR
+  UNSPEC_FRNDINT_CEIL
+  UNSPEC_FRNDINT_TRUNC
+  UNSPEC_FRNDINT_MASK_PM
+  UNSPEC_FIST_FLOOR
+  UNSPEC_FIST_CEIL
+
+  ;; x87 Double output FP
+  UNSPEC_SINCOS_COS
+  UNSPEC_SINCOS_SIN
+  UNSPEC_XTRACT_FRACT
+  UNSPEC_XTRACT_EXP
+  UNSPEC_FSCALE_FRACT
+  UNSPEC_FSCALE_EXP
+  UNSPEC_FPREM_F
+  UNSPEC_FPREM_U
+  UNSPEC_FPREM1_F
+  UNSPEC_FPREM1_U
+
+  UNSPEC_C2_FLAG
+  UNSPEC_FXAM_MEM
+
+  ;; SSP patterns
+  UNSPEC_SP_SET
+  UNSPEC_SP_TEST
+  UNSPEC_SP_TLS_SET
+  UNSPEC_SP_TLS_TEST
+
+  ;; SSSE3
+  UNSPEC_PSHUFB
+  UNSPEC_PSIGN
+  UNSPEC_PALIGNR
+
+  ;; For SSE4A support
+  UNSPEC_EXTRQI
+  UNSPEC_EXTRQ
+  UNSPEC_INSERTQI
+  UNSPEC_INSERTQ
+
+  ;; For SSE4.1 support
+  UNSPEC_BLENDV
+  UNSPEC_INSERTPS
+  UNSPEC_DP
+  UNSPEC_MOVNTDQA
+  UNSPEC_MPSADBW
+  UNSPEC_PHMINPOSUW
+  UNSPEC_PTEST
+  UNSPEC_ROUND
+
+  ;; For SSE4.2 support
+  UNSPEC_CRC32
+  UNSPEC_PCMPESTR
+  UNSPEC_PCMPISTR
+
+  ;; For FMA4 support
+  UNSPEC_FMADDSUB
+  UNSPEC_XOP_UNSIGNED_CMP
+  UNSPEC_XOP_TRUEFALSE
+  UNSPEC_XOP_PERMUTE
+  UNSPEC_FRCZ
+
+  ;; For AES support
+  UNSPEC_AESENC
+  UNSPEC_AESENCLAST
+  UNSPEC_AESDEC
+  UNSPEC_AESDECLAST
+  UNSPEC_AESIMC
+  UNSPEC_AESKEYGENASSIST
+
+  ;; For PCLMUL support
+  UNSPEC_PCLMUL
+
+  ;; For AVX support
+  UNSPEC_PCMP
+  UNSPEC_VPERMIL
+  UNSPEC_VPERMIL2
+  UNSPEC_VPERMIL2F128
+  UNSPEC_MASKLOAD
+  UNSPEC_MASKSTORE
+  UNSPEC_CAST
+  UNSPEC_VTESTP
+  UNSPEC_VCVTPH2PS
+  UNSPEC_VCVTPS2PH
+
+  ;; For BMI support
+  UNSPEC_BEXTR
+])
+
+(define_c_enum "unspecv" [
+  UNSPECV_BLOCKAGE
+  UNSPECV_STACK_PROBE
+  UNSPECV_PROBE_STACK_RANGE
+  UNSPECV_EMMS
+  UNSPECV_LDMXCSR
+  UNSPECV_STMXCSR
+  UNSPECV_FEMMS
+  UNSPECV_CLFLUSH
+  UNSPECV_ALIGN
+  UNSPECV_MONITOR
+  UNSPECV_MWAIT
+  UNSPECV_CMPXCHG
+  UNSPECV_XCHG
+  UNSPECV_LOCK
+  UNSPECV_PROLOGUE_USE
+  UNSPECV_CLD
+  UNSPECV_NOPS
+  UNSPECV_VZEROALL
+  UNSPECV_VZEROUPPER
+  UNSPECV_RDTSC
+  UNSPECV_RDTSCP
+  UNSPECV_RDPMC
+  UNSPECV_LLWP_INTRINSIC
+  UNSPECV_SLWP_INTRINSIC
+  UNSPECV_LWPVAL_INTRINSIC
+  UNSPECV_LWPINS_INTRINSIC
+  UNSPECV_RDFSBASE
+  UNSPECV_RDGSBASE
+  UNSPECV_WRFSBASE
+  UNSPECV_WRGSBASE
+  UNSPECV_SPLIT_STACK_RETURN
+
+  ;; For RDRAND support
+  UNSPECV_RDRAND
+])
+
+;; Constants to represent pcomtrue/pcomfalse variants
+(define_constants
+  [(PCOM_FALSE			0)
+   (PCOM_TRUE			1)
+   (COM_FALSE_S			2)
+   (COM_FALSE_P			3)
+   (COM_TRUE_S			4)
+   (COM_TRUE_P			5)
+  ])
+
+;; Constants used in the XOP pperm instruction
+(define_constants
+  [(PPERM_SRC			0x00)	/* copy source */
+   (PPERM_INVERT		0x20)	/* invert source */
+   (PPERM_REVERSE		0x40)	/* bit reverse source */
+   (PPERM_REV_INV		0x60)	/* bit reverse & invert src */
+   (PPERM_ZERO			0x80)	/* all 0's */
+   (PPERM_ONES			0xa0)	/* all 1's */
+   (PPERM_SIGN			0xc0)	/* propagate sign bit */
+   (PPERM_INV_SIGN		0xe0)	/* invert & propagate sign */
+   (PPERM_SRC1			0x00)	/* use first source byte */
+   (PPERM_SRC2			0x10)	/* use second source byte */
+   ])
+
+;; Registers by name.
+(define_constants
+  [(AX_REG			 0)
+   (DX_REG			 1)
+   (CX_REG			 2)
+   (BX_REG			 3)
+   (SI_REG			 4)
+   (DI_REG			 5)
+   (BP_REG			 6)
+   (SP_REG			 7)
+   (ST0_REG			 8)
+   (ST1_REG			 9)
+   (ST2_REG			10)
+   (ST3_REG			11)
+   (ST4_REG			12)
+   (ST5_REG			13)
+   (ST6_REG			14)
+   (ST7_REG			15)
+   (FLAGS_REG			17)
+   (FPSR_REG			18)
+   (FPCR_REG			19)
+   (XMM0_REG			21)
+   (XMM1_REG			22)
+   (XMM2_REG			23)
+   (XMM3_REG			24)
+   (XMM4_REG			25)
+   (XMM5_REG			26)
+   (XMM6_REG			27)
+   (XMM7_REG			28)
+   (MM0_REG			29)
+   (MM1_REG			30)
+   (MM2_REG			31)
+   (MM3_REG			32)
+   (MM4_REG			33)
+   (MM5_REG			34)
+   (MM6_REG			35)
+   (MM7_REG			36)
+   (R8_REG			37)
+   (R9_REG			38)
+   (R10_REG			39)
+   (R11_REG			40)
+   (R12_REG			41)
+   (R13_REG			42)
+   (XMM8_REG			45)
+   (XMM9_REG			46)
+   (XMM10_REG			47)
+   (XMM11_REG			48)
+   (XMM12_REG			49)
+   (XMM13_REG			50)
+   (XMM14_REG			51)
+   (XMM15_REG			52)
+  ])
+
+;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
+;; from i386.c.
+
+;; In C guard expressions, put expressions which may be compile-time
+;; constants first.  This allows for better optimization.  For
+;; example, write "TARGET_64BIT && reload_completed", not
+;; "reload_completed && TARGET_64BIT".
+
+
+;; Processor type.
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
+		    atom,generic64,amdfam10,bdver1,btver1"
+  (const (symbol_ref "ix86_schedule")))
+
+;; A basic instruction type.  Refinements due to arguments to be
+;; provided in other attributes.
+(define_attr "type"
+  "other,multi,
+   alu,alu1,negnot,imov,imovx,lea,
+   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   icmp,test,ibr,setcc,icmov,
+   push,pop,call,callv,leave,
+   str,bitmanip,
+   fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
+   sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
+   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
+   ssemuladd,sse4arg,lwp,
+   mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
+  (const_string "other"))
+
+;; Main data type used by the insn
+(define_attr "mode"
+  "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
+  (const_string "unknown"))
+
+;; The CPU unit operations uses.
+(define_attr "unit" "integer,i387,sse,mmx,unknown"
+  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
+	   (const_string "i387")
+	 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
+			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+			  ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
+	   (const_string "sse")
+	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+	   (const_string "mmx")
+	 (eq_attr "type" "other")
+	   (const_string "unknown")]
+	 (const_string "integer")))
+
+;; The (bounding maximum) length of an instruction immediate.
+(define_attr "length_immediate" ""
+  (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
+                          bitmanip")
+	   (const_int 0)
+	 (eq_attr "unit" "i387,sse,mmx")
+	   (const_int 0)
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
+			  imul,icmp,push,pop")
+	   (symbol_ref "ix86_attr_length_immediate_default(insn,1)")
+	 (eq_attr "type" "imov,test")
+	   (symbol_ref "ix86_attr_length_immediate_default(insn,0)")
+	 (eq_attr "type" "call")
+	   (if_then_else (match_operand 0 "constant_call_address_operand" "")
+	     (const_int 4)
+	     (const_int 0))
+	 (eq_attr "type" "callv")
+	   (if_then_else (match_operand 1 "constant_call_address_operand" "")
+	     (const_int 4)
+	     (const_int 0))
+	 ;; We don't know the size before shorten_branches.  Expect
+	 ;; the instruction to fit for better scheduling.
+	 (eq_attr "type" "ibr")
+	   (const_int 1)
+	 ]
+	 (symbol_ref "/* Update immediate_length and other attributes! */
+		      gcc_unreachable (),1")))
+
+;; The (bounding maximum) length of an instruction address.
+(define_attr "length_address" ""
+  (cond [(eq_attr "type" "str,other,multi,fxch")
+	   (const_int 0)
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 ]
+	 (symbol_ref "ix86_attr_length_address_default (insn)")))
+
+;; Set when length prefix is used.
+(define_attr "prefix_data16" ""
+  (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
+	   (const_int 0)
+	 (eq_attr "mode" "HI")
+	   (const_int 1)
+	 (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI"))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; Set when string REP prefix is used.
+(define_attr "prefix_rep" ""
+  (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1")
+	   (const_int 0)
+	 (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; Set when 0f opcode prefix is used.
+(define_attr "prefix_0f" ""
+  (if_then_else
+    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
+	 (eq_attr "unit" "sse,mmx"))
+    (const_int 1)
+    (const_int 0)))
+
+;; Set when REX opcode prefix is used.
+(define_attr "prefix_rex" ""
+  (cond [(eq (symbol_ref "TARGET_64BIT") (const_int 0))
+	   (const_int 0)
+	 (and (eq_attr "mode" "DI")
+	      (and (eq_attr "type" "!push,pop,call,callv,leave,ibr")
+		   (eq_attr "unit" "!mmx")))
+	   (const_int 1)
+	 (and (eq_attr "mode" "QI")
+	      (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)")
+		  (const_int 0)))
+	   (const_int 1)
+	 (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)")
+	     (const_int 0))
+	   (const_int 1)
+	 (and (eq_attr "type" "imovx")
+	      (match_operand:QI 1 "ext_QIreg_operand" ""))
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; There are also additional prefixes in 3DNOW, SSSE3.
+;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte,
+;; sseiadd1,ssecvt1 to 0f7a with no DREX byte.
+;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a.
+(define_attr "prefix_extra" ""
+  (cond [(eq_attr "type" "ssemuladd,sse4arg")
+	   (const_int 2)
+	 (eq_attr "type" "sseiadd1,ssecvt1")
+	   (const_int 1)
+	]
+	(const_int 0)))
+
+;; Prefix used: original, VEX or maybe VEX.
+(define_attr "prefix" "orig,vex,maybe_vex"
+  (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
+    (const_string "vex")
+    (const_string "orig")))
+
+;; VEX W bit is used.
+(define_attr "prefix_vex_w" "" (const_int 0))
+
+;; The length of VEX prefix
+;; Only instructions with 0f prefix can have 2 byte VEX prefix,
+;; 0f38/0f3a prefixes can't.  In i386.md 0f3[8a] is
+;; still prefix_0f 1, with prefix_extra 1.
+(define_attr "length_vex" ""
+  (if_then_else (and (eq_attr "prefix_0f" "1")
+		     (eq_attr "prefix_extra" "0"))
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)"))
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)"))))
+
+;; Set when modrm byte is used.
+(define_attr "modrm" ""
+  (cond [(eq_attr "type" "str,leave")
+	   (const_int 0)
+	 (eq_attr "unit" "i387")
+	   (const_int 0)
+         (and (eq_attr "type" "incdec")
+	      (and (eq (symbol_ref "TARGET_64BIT") (const_int 0))
+		   (ior (match_operand:SI 1 "register_operand" "")
+			(match_operand:HI 1 "register_operand" ""))))
+	   (const_int 0)
+	 (and (eq_attr "type" "push")
+	      (not (match_operand 1 "memory_operand" "")))
+	   (const_int 0)
+	 (and (eq_attr "type" "pop")
+	      (not (match_operand 0 "memory_operand" "")))
+	   (const_int 0)
+	 (and (eq_attr "type" "imov")
+	      (and (not (eq_attr "mode" "DI"))
+		   (ior (and (match_operand 0 "register_operand" "")
+			     (match_operand 1 "immediate_operand" ""))
+		        (ior (and (match_operand 0 "ax_reg_operand" "")
+				  (match_operand 1 "memory_displacement_only_operand" ""))
+			     (and (match_operand 0 "memory_displacement_only_operand" "")
+				  (match_operand 1 "ax_reg_operand" ""))))))
+	   (const_int 0)
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand" ""))
+	     (const_int 0)
+	 (and (eq_attr "type" "alu,alu1,icmp,test")
+	      (match_operand 0 "ax_reg_operand" ""))
+	     (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))")
+	 ]
+	 (const_int 1)))
+
+;; The (bounding maximum) length of an instruction in bytes.
+;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
+;; Later we may want to split them and compute proper length as for
+;; other insns.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "other,multi,fistp,frndint")
+	   (const_int 16)
+	 (eq_attr "type" "fcmp")
+	   (const_int 4)
+	 (eq_attr "unit" "i387")
+	   (plus (const_int 2)
+		 (plus (attr "prefix_data16")
+		       (attr "length_address")))
+	 (ior (eq_attr "prefix" "vex")
+	      (and (eq_attr "prefix" "maybe_vex")
+		    (ne (symbol_ref "TARGET_AVX") (const_int 0))))
+	   (plus (attr "length_vex")
+		 (plus (attr "length_immediate")
+		       (plus (attr "modrm")
+			     (attr "length_address"))))]
+	 (plus (plus (attr "modrm")
+		     (plus (attr "prefix_0f")
+			   (plus (attr "prefix_rex")
+				 (plus (attr "prefix_extra")
+				       (const_int 1)))))
+	       (plus (attr "prefix_rep")
+		     (plus (attr "prefix_data16")
+			   (plus (attr "length_immediate")
+				 (attr "length_address")))))))
+
+;; The `memory' attribute is `none' if no memory is referenced, `load' or
+;; `store' if there is a simple memory reference therein, or `unknown'
+;; if the instruction is complex.
+
+(define_attr "memory" "none,load,store,both,unknown"
+  (cond [(eq_attr "type" "other,multi,str,lwp")
+	   (const_string "unknown")
+	 (eq_attr "type" "lea,fcmov,fpspc")
+	   (const_string "none")
+	 (eq_attr "type" "fistp,leave")
+	   (const_string "both")
+	 (eq_attr "type" "frndint")
+	   (const_string "load")
+	 (eq_attr "type" "push")
+	   (if_then_else (match_operand 1 "memory_operand" "")
+	     (const_string "both")
+	     (const_string "store"))
+	 (eq_attr "type" "pop")
+	   (if_then_else (match_operand 0 "memory_operand" "")
+	     (const_string "both")
+	     (const_string "load"))
+	 (eq_attr "type" "setcc")
+	   (if_then_else (match_operand 0 "memory_operand" "")
+	     (const_string "store")
+	     (const_string "none"))
+	 (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
+	   (if_then_else (ior (match_operand 0 "memory_operand" "")
+			      (match_operand 1 "memory_operand" ""))
+	     (const_string "load")
+	     (const_string "none"))
+	 (eq_attr "type" "ibr")
+	   (if_then_else (match_operand 0 "memory_operand" "")
+	     (const_string "load")
+	     (const_string "none"))
+	 (eq_attr "type" "call")
+	   (if_then_else (match_operand 0 "constant_call_address_operand" "")
+	     (const_string "none")
+	     (const_string "load"))
+	 (eq_attr "type" "callv")
+	   (if_then_else (match_operand 1 "constant_call_address_operand" "")
+	     (const_string "none")
+	     (const_string "load"))
+	 (and (eq_attr "type" "alu1,negnot,ishift1,sselog1")
+	      (match_operand 1 "memory_operand" ""))
+	   (const_string "both")
+	 (and (match_operand 0 "memory_operand" "")
+	      (match_operand 1 "memory_operand" ""))
+	   (const_string "both")
+	 (match_operand 0 "memory_operand" "")
+	   (const_string "store")
+	 (match_operand 1 "memory_operand" "")
+	   (const_string "load")
+	 (and (eq_attr "type"
+		 "!alu1,negnot,ishift1,
+		   imov,imovx,icmp,test,bitmanip,
+		   fmov,fcmp,fsgn,
+		   sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
+		   sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
+	      (match_operand 2 "memory_operand" ""))
+	   (const_string "load")
+	 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
+	      (match_operand 3 "memory_operand" ""))
+	   (const_string "load")
+	]
+	(const_string "none")))
+
+;; Indicates if an instruction has both an immediate and a displacement.
+
+(define_attr "imm_disp" "false,true,unknown"
+  (cond [(eq_attr "type" "other,multi")
+	   (const_string "unknown")
+	 (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
+	      (and (match_operand 0 "memory_displacement_operand" "")
+		   (match_operand 1 "immediate_operand" "")))
+	   (const_string "true")
+	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	      (and (match_operand 0 "memory_displacement_operand" "")
+		   (match_operand 2 "immediate_operand" "")))
+	   (const_string "true")
+	]
+	(const_string "false")))
+
+;; Indicates if an FP operation has an integer source.
+
+(define_attr "fp_int_src" "false,true"
+  (const_string "false"))
+
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+  (const_string "any"))
+
+;; Define attribute to classify add/sub insns that consumes carry flag (CF)
+(define_attr "use_carry" "0,1" (const_string "0"))
+
+;; Define attribute to indicate unaligned ssemov insns
+(define_attr "movu" "0,1" (const_string "0"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "length" "128")
+   (set_attr "type" "multi")])
+
+(define_code_iterator plusminus [plus minus])
+
+(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
+
+;; Base name for define_insn
+(define_code_attr plusminus_insn
+  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
+   (minus "sub") (ss_minus "sssub") (us_minus "ussub")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminus_mnemonic
+  [(plus "add") (ss_plus "adds") (us_plus "addus")
+   (minus "sub") (ss_minus "subs") (us_minus "subus")])
+(define_code_attr plusminus_carry_mnemonic
+  [(plus "adc") (minus "sbb")])
+
+;; Mark commutative operators as such in constraints.
+(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
+			(minus "") (ss_minus "") (us_minus "")])
+
+;; Mapping of signed max and min
+(define_code_iterator smaxmin [smax smin])
+
+;; Mapping of unsigned max and min
+(define_code_iterator umaxmin [umax umin])
+
+;; Base name for integer and FP insn mnemonic
+(define_code_attr maxmin_int [(smax "maxs") (smin "mins")
+			      (umax "maxu") (umin "minu")])
+(define_code_attr maxmin_float [(smax "max") (smin "min")])
+
+;; Mapping of logic operators
+(define_code_iterator any_logic [and ior xor])
+(define_code_iterator any_or [ior xor])
+
+;; Base name for insn mnemonic.
+(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of shift-right operators
+(define_code_iterator any_shiftrt [lshiftrt ashiftrt])
+
+;; Base name for define_insn
+(define_code_attr shiftrt_insn [(lshiftrt "lshr") (ashiftrt "ashr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr shiftrt [(lshiftrt "shr") (ashiftrt "sar")])
+
+;; Mapping of rotate operators
+(define_code_iterator any_rotate [rotate rotatert])
+
+;; Base name for define_insn
+(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr rotate [(rotate "rol") (rotatert "ror")])
+
+;; Mapping of abs neg operators
+(define_code_iterator absneg [abs neg])
+
+;; Base name for x87 insn mnemonic.
+(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")])
+
+;; Used in signed and unsigned widening multiplications.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; Various insn prefixes for signed and unsigned operations.
+(define_code_attr u [(sign_extend "") (zero_extend "u")
+		     (div "") (udiv "u")])
+(define_code_attr s [(sign_extend "s") (zero_extend "u")])
+
+;; Used in signed and unsigned divisions.
+(define_code_iterator any_div [div udiv])
+
+;; Instruction prefix for signed and unsigned operations.
+(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
+			     (div "i") (udiv "")])
+
+;; 64bit single word integer modes.
+(define_mode_iterator SWI1248x [QI HI SI DI])
+
+;; 64bit single word integer modes without QImode and HImode.
+(define_mode_iterator SWI48x [SI DI])
+
+;; Single word integer modes.
+(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
+
+;; Single word integer modes without SImode and DImode.
+(define_mode_iterator SWI12 [QI HI])
+
+;; Single word integer modes without DImode.
+(define_mode_iterator SWI124 [QI HI SI])
+
+;; Single word integer modes without QImode and DImode.
+(define_mode_iterator SWI24 [HI SI])
+
+;; Single word integer modes without QImode.
+(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
+
+;; Single word integer modes without QImode and HImode.
+(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")])
+
+;; All math-dependant single and double word integer modes.
+(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH")
+			     (HI "TARGET_HIMODE_MATH")
+			     SI DI (TI "TARGET_64BIT")])
+
+;; Math-dependant single word integer modes.
+(define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH")
+			    (HI "TARGET_HIMODE_MATH")
+			    SI (DI "TARGET_64BIT")])
+
+;; Math-dependant single word integer modes without DImode.
+(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
+			       (HI "TARGET_HIMODE_MATH")
+			       SI])
+
+;; Math-dependant single word integer modes without QImode.
+(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
+		      	       SI (DI "TARGET_64BIT")])
+
+;; Double word integer modes.
+(define_mode_iterator DWI [(DI "!TARGET_64BIT")
+			   (TI "TARGET_64BIT")])
+
+;; Double word integer modes as mode attribute.
+(define_mode_attr DWI [(SI "DI") (DI "TI")])
+(define_mode_attr dwi [(SI "di") (DI "ti")])
+
+;; Half mode for double word integer modes.
+(define_mode_iterator DWIH [(SI "!TARGET_64BIT")
+			    (DI "TARGET_64BIT")])
+
+;; Instruction suffix for integer modes.
+(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Pointer size prefix for integer modes (Intel asm dialect)
+(define_mode_attr iptrsize [(QI "BYTE")
+			    (HI "WORD")
+			    (SI "DWORD")
+			    (DI "QWORD")])
+
+;; Register class for integer modes.
+(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
+
+;; Immediate operand constraint for integer modes.
+(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")])
+
+;; General operand constraint for word modes.
+(define_mode_attr g [(QI "qmn") (HI "rmn") (SI "g") (DI "rme")])
+
+;; Immediate operand constraint for double integer modes.
+(define_mode_attr di [(SI "iF") (DI "e")])
+
+;; Immediate operand constraint for shifts.
+(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+
+;; General operand predicate for integer modes.
+(define_mode_attr general_operand
+	[(QI "general_operand")
+	 (HI "general_operand")
+	 (SI "general_operand")
+	 (DI "x86_64_general_operand")
+	 (TI "x86_64_general_operand")])
+
+;; General sign/zero extend operand predicate for integer modes.
+(define_mode_attr general_szext_operand
+	[(QI "general_operand")
+	 (HI "general_operand")
+	 (SI "general_operand")
+	 (DI "x86_64_szext_general_operand")])
+
+;; Immediate operand predicate for integer modes.
+(define_mode_attr immediate_operand
+	[(QI "immediate_operand")
+	 (HI "immediate_operand")
+	 (SI "immediate_operand")
+	 (DI "x86_64_immediate_operand")])
+
+;; Nonmemory operand predicate for integer modes.
+(define_mode_attr nonmemory_operand
+	[(QI "nonmemory_operand")
+	 (HI "nonmemory_operand")
+	 (SI "nonmemory_operand")
+	 (DI "x86_64_nonmemory_operand")])
+
+;; Operand predicate for shifts.
+(define_mode_attr shift_operand
+	[(QI "nonimmediate_operand")
+	 (HI "nonimmediate_operand")
+	 (SI "nonimmediate_operand")
+	 (DI "shiftdi_operand")
+	 (TI "register_operand")])
+
+;; Operand predicate for shift argument.
+(define_mode_attr shift_immediate_operand
+	[(QI "const_1_to_31_operand")
+	 (HI "const_1_to_31_operand")
+	 (SI "const_1_to_31_operand")
+	 (DI "const_1_to_63_operand")])
+
+;; Input operand predicate for arithmetic left shifts.
+(define_mode_attr ashl_input_operand
+	[(QI "nonimmediate_operand")
+	 (HI "nonimmediate_operand")
+	 (SI "nonimmediate_operand")
+	 (DI "ashldi_input_operand")
+	 (TI "reg_or_pm1_operand")])
+
+;; SSE and x87 SFmode and DFmode floating point modes
+(define_mode_iterator MODEF [SF DF])
+
+;; All x87 floating point modes
+(define_mode_iterator X87MODEF [SF DF XF])
+
+;; All integer modes handled by x87 fisttp operator.
+(define_mode_iterator X87MODEI [HI SI DI])
+
+;; All integer modes handled by integer x87 operators.
+(define_mode_iterator X87MODEI12 [HI SI])
+
+;; All integer modes handled by SSE cvtts?2si* operators.
+(define_mode_iterator SSEMODEI24 [SI DI])
+
+;; SSE asm suffix for floating point modes
+(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")])
+
+;; SSE vector mode corresponding to a scalar mode
+(define_mode_attr ssevecmode
+  [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
+
+;; Instruction suffix for REX 64bit operators.
+(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; Scheduling descriptions
+
+(include "pentium.md")
+(include "ppro.md")
+(include "k6.md")
+(include "athlon.md")
+(include "bdver1.md")
+(include "geode.md")
+(include "atom.md")
+(include "core2.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare and branch/compare and store instructions.
+
+(define_expand "cbranch<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SDWIM 1 "nonimmediate_operand" "")
+		    (match_operand:SDWIM 2 "<general_operand>" "")))
+   (set (pc) (if_then_else
+	       (match_operator 0 "ordered_comparison_operator"
+		[(reg:CC FLAGS_REG) (const_int 0)])
+	       (label_ref (match_operand 3 "" ""))
+	       (pc)))]
+  ""
+{
+  if (MEM_P (operands[1]) && MEM_P (operands[2]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstore<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SWIM 2 "nonimmediate_operand" "")
+		    (match_operand:SWIM 3 "<general_operand>" "")))
+   (set (match_operand:QI 0 "register_operand" "")
+	(match_operator 1 "ordered_comparison_operator"
+	  [(reg:CC FLAGS_REG) (const_int 0)]))]
+  ""
+{
+  if (MEM_P (operands[2]) && MEM_P (operands[3]))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cmp<mode>_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:SWI48 0 "nonimmediate_operand" "")
+		    (match_operand:SWI48 1 "<general_operand>" "")))])
+
+(define_insn "*cmp<mode>_ccno_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>")
+		 (match_operand:SWI 1 "const0_operand" "")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "@
+   test{<imodesuffix>}\t%0, %0
+   cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test,icmp")
+   (set_attr "length_immediate" "0,1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmp<mode>_1"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+		 (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m")))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmp<mode>_minus_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+		     (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpqi_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:QI 0 "general_operand" "Qm")
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:QI 0 "register_operand" "Q")
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %0|%0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "const0_operand" "")))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t%h0, %h0"
+  [(set_attr "type" "test")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_expand "cmpqi_ext_3"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "immediate_operand" "")))])
+
+(define_insn "*cmpqi_ext_3_insn"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "general_operand" "Qmn")))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_3_insn_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (match_operand:QI 1 "nonmemory_operand" "Qn")))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)
+	  (subreg:QI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)) 0)))]
+  "ix86_match_ccmode (insn, CCmode)"
+  "cmp{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "QI")])
+
+;; These implement float point compares.
+;; %%% See if we can get away with VOIDmode operands on the actual insns,
+;; which would allow mix and match FP modes on the compares.  Which is what
+;; the old patterns did, but with many more of them.
+
+(define_expand "cbranchxf4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:XF 1 "nonmemory_operand" "")
+		    (match_operand:XF 2 "nonmemory_operand" "")))
+   (set (pc) (if_then_else
+              (match_operator 0 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  "TARGET_80387"
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstorexf4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:XF 2 "nonmemory_operand" "")
+		    (match_operand:XF 3 "nonmemory_operand" "")))
+   (set (match_operand:QI 0 "register_operand" "")
+              (match_operator 1 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)]))]
+  "TARGET_80387"
+{
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand" "")
+		    (match_operand:MODEF 2 "cmp_fp_expander_operand" "")))
+   (set (pc) (if_then_else
+              (match_operator 0 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstore<mode>4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand" "")
+		    (match_operand:MODEF 3 "cmp_fp_expander_operand" "")))
+   (set (match_operand:QI 0 "register_operand" "")
+              (match_operator 1 "ix86_fp_comparison_operator"
+               [(reg:CC FLAGS_REG)
+                (const_int 0)]))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cbranchcc4"
+  [(set (pc) (if_then_else
+              (match_operator 0 "comparison_operator"
+               [(match_operand 1 "flags_reg_operand" "")
+                (match_operand 2 "const0_operand" "")])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  ""
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "cstorecc4"
+  [(set (match_operand:QI 0 "register_operand" "")
+              (match_operator 1 "comparison_operator"
+               [(match_operand 2 "flags_reg_operand" "")
+                (match_operand 3 "const0_operand" "")]))]
+  ""
+{
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     operands[2], operands[3]);
+  DONE;
+})
+
+
+;; FP compares, step 1:
+;; Set the FP condition codes.
+;;
+;; CCFPmode	compare with exceptions
+;; CCFPUmode	compare with no exceptions
+
+;; We may not use "#" to split and emit these, since the REG_DEAD notes
+;; used to manage the reg stack popping would not be preserved.
+
+(define_insn "*cmpfp_0"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand 1 "register_operand" "f")
+	     (match_operand 2 "const0_operand" ""))]
+	UNSPEC_FNSTSW))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn_and_split "*cmpfp_0_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand 1 "register_operand" "f")
+	  (match_operand 2 "const0_operand" "")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn "*cmpfp_xf"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:XF 1 "register_operand" "f")
+	     (match_operand:XF 2 "register_operand" "f"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn_and_split "*cmpfp_xf_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:XF 1 "register_operand" "f")
+	  (match_operand:XF 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387
+   && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cmpfp_<mode>"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand:MODEF 1 "register_operand" "f")
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
+	  UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:MODEF 1 "register_operand" "f")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387
+   && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpfp_u"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFPU
+	     (match_operand 1 "register_operand" "f")
+	     (match_operand 2 "register_operand" "f"))]
+	  UNSPEC_FNSTSW))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "* return output_fp_compare (insn, operands, 0, 1);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn_and_split "*cmpfp_u_cc"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU
+	  (match_operand 1 "register_operand" "f")
+	  (match_operand 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFPU (match_dup 1)(match_dup 2))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))])
+
+(define_insn "*cmpfp_<mode>"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_operand 1 "register_operand" "f")
+	     (match_operator 3 "float_operator"
+	       [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
+	  UNSPEC_FNSTSW))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+  "* return output_fp_compare (insn, operands, 0, 0);"
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand 1 "register_operand" "f")
+	  (match_operator 3 "float_operator"
+	    [(match_operand:X87MODEI12 2 "memory_operand" "m")])))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:HI
+	  [(compare:CCFP
+	     (match_dup 1)
+	     (match_op_dup 3 [(match_dup 2)]))]
+	UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+;; FP compares, step 2
+;; Move the fpsw to ax.
+
+(define_insn "x86_fnstsw_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+  "TARGET_80387"
+  "fnstsw\t%0"
+  [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+   (set_attr "mode" "SI")
+   (set_attr "unit" "i387")])
+
+;; FP compares, step 3
+;; Get ax into flags, general case.
+
+(define_insn "x86_sahf_1"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:HI 0 "register_operand" "a")]
+		   UNSPEC_SAHF))]
+  "TARGET_SAHF"
+{
+#ifndef HAVE_AS_IX86_SAHF
+  if (TARGET_64BIT)
+    return ASM_BYTE "0x9e";
+  else
+#endif
+  return "sahf";
+}
+  [(set_attr "length" "1")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; Pentium Pro can do steps 1 through 3 in one go.
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
+(define_insn "*cmpfp_i_mixed"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP (match_operand 0 "register_operand" "f,x")
+		      (match_operand 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 0);"
+  [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set (attr "prefix_rep")
+	(if_then_else (eq_attr "type" "ssecomi")
+		      (const_string "0")
+		      (const_string "*")))
+   (set (attr "prefix_data16")
+	(cond [(eq_attr "type" "fcmp")
+		 (const_string "*")
+	       (eq_attr "mode" "DF")
+		 (const_string "1")
+	      ]
+	      (const_string "0")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpfp_i_sse"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP (match_operand 0 "register_operand" "x")
+		      (match_operand 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 0);"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpfp_i_i387"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP (match_operand 0 "register_operand" "f")
+		      (match_operand 1 "register_operand" "f")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 0);"
+  [(set_attr "type" "fcmp")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpfp_iu_mixed"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU (match_operand 0 "register_operand" "f,x")
+		       (match_operand 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set (attr "prefix_rep")
+	(if_then_else (eq_attr "type" "ssecomi")
+		      (const_string "0")
+		      (const_string "*")))
+   (set (attr "prefix_data16")
+	(cond [(eq_attr "type" "fcmp")
+		 (const_string "*")
+	       (eq_attr "mode" "DF")
+		 (const_string "1")
+	      ]
+	      (const_string "0")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpfp_iu_sse"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU (match_operand 0 "register_operand" "x")
+		       (match_operand 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+     (if_then_else (match_operand:SF 1 "" "")
+        (const_string "SF")
+        (const_string "DF")))
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*cmpfp_iu_387"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU (match_operand 0 "register_operand" "f")
+		       (match_operand 1 "register_operand" "f")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "fcmp")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+	      (const_string "SF")
+	    (match_operand:DF 1 "" "")
+	      (const_string "DF")
+	   ]
+	   (const_string "XF")))
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
+
+;; Push/pop instructions.
+
+(define_insn "*push<mode>2"
+  [(set (match_operand:DWI 0 "push_operand" "=<")
+	(match_operand:DWI 1 "general_no_elim_operand" "riF*m"))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:TI 0 "push_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_64BIT && reload_completed
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushdi2_rex64"
+  [(set (match_operand:DI 0 "push_operand" "=<,!<")
+	(match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+  "TARGET_64BIT"
+  "@
+   push{q}\t%1
+   #"
+  [(set_attr "type" "push,multi")
+   (set_attr "mode" "DI")])
+
+;; Convert impossible pushes of immediate to existing instructions.
+;; First try to get scratch register and go through it.  In case this
+;; fails, push sign extended lower part first and then overwrite
+;; upper part by 32bit move.
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+  [(set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode) && 1"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+						   GEN_INT (4)));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)
+   && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+{
+  split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+						   GEN_INT (4)));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushsi2"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+  "!TARGET_64BIT"
+  "push{l}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+;; emit_push_insn when it calls move_by_pieces requires an insn to
+;; "push a byte/word".  But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
+
+;; For TARGET_64BIT we always round up to 8 bytes.
+(define_insn "*push<mode>2_rex64"
+  [(set (match_operand:SWI124 0 "push_operand" "=X")
+	(match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))]
+  "TARGET_64BIT"
+  "push{q}\t%q1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "DI")])
+
+(define_insn "*push<mode>2"
+  [(set (match_operand:SWI12 0 "push_operand" "=X")
+	(match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))]
+  "!TARGET_64BIT"
+  "push{l}\t%k1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "SI")])
+
+(define_insn "*push<mode>2_prologue"
+  [(set (match_operand:P 0 "push_operand" "=<")
+	(match_operand:P 1 "general_no_elim_operand" "r<i>*m"))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "push{<imodesuffix>}\t%1"
+  [(set_attr "type" "push")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pop<mode>1"
+  [(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
+	(match_operand:P 1 "pop_operand" ">"))]
+  ""
+  "pop{<imodesuffix>}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pop<mode>1_epilogue"
+  [(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
+	(match_operand:P 1 "pop_operand" ">"))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "pop{<imodesuffix>}\t%0"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+;; Move instructions.
+
+(define_expand "movoi"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "")
+	(match_operand:OI 1 "general_operand" ""))]
+  "TARGET_AVX"
+  "ix86_expand_move (OImode, operands); DONE;")
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT || TARGET_SSE"
+{
+  if (TARGET_64BIT)
+    ix86_expand_move (TImode, operands);
+  else if (push_operand (operands[0], TImode))
+    ix86_expand_push (TImode, operands[1]);
+  else
+    ix86_expand_vector_move (TImode, operands);
+  DONE;
+})
+
+;; This expands to what emit_move_complex would generate if we didn't
+;; have a movti pattern.  Having this avoids problems with reload on
+;; 32-bit targets when SSE is present, but doesn't seem to be harmful
+;; to have around all the time.
+(define_expand "movcdi"
+  [(set (match_operand:CDI 0 "nonimmediate_operand" "")
+	(match_operand:CDI 1 "general_operand" ""))]
+  ""
+{
+  if (push_operand (operands[0], CDImode))
+    emit_move_complex_push (CDImode, operands[0], operands[1]);
+  else
+    emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:SWI1248x 0 "nonimmediate_operand" "")
+	(match_operand:SWI1248x 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (<MODE>mode, operands); DONE;")
+
+(define_insn "*mov<mode>_xor"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{l}\t%k0, %k0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>_or"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "const_int_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && operands[1] == constm1_rtx"
+  "or{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "1")])
+
+(define_insn "*movoi_internal_avx"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vxorps\t%0, %0, %0";
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], OImode)
+	  || misaligned_operand (operands[1], OImode))
+	return "vmovdqu\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "*movti_internal_rex64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm")
+	(match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 3:
+    case 4:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+   (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
+   (set (attr "mode")
+   	(cond [(eq_attr "alternative" "2,3")
+		 (if_then_else
+		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		       (const_int 0))
+		   (const_string "V4SF")
+		   (const_string "TI"))
+	       (eq_attr "alternative" "4")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			    (const_int 0))
+			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "TI"))]
+	       (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  "reload_completed
+   && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movti_internal_sse"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 1:
+    case 2:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovups\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V4SF)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	 else
+	   return "%vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set (attr "mode")
+	(cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		    (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			(const_int 0)))
+		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "2")
+		    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			(const_int 0)))
+		 (const_string "V4SF")]
+	      (const_string "TI")))])
+
+(define_insn "*movdi_internal_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+	  "=r,r  ,r,m ,!o,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
+	(match_operand:DI 1 "general_operand"
+	  "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r   ,m  ,C ,*x,*Yi,*x,r  ,m ,*Ym,*x"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSECVT:
+      if (SSE_REG_P (operands[0]))
+	return "movq2dq\t{%1, %0|%0, %1}";
+      else
+	return "movdq2q\t{%1, %0|%0, %1}";
+
+    case TYPE_SSEMOV:
+      if (get_attr_mode (insn) == MODE_TI)
+	return "%vmovdqa\t{%1, %0|%0, %1}";
+      /* Handle broken assemblers that require movd instead of movq.  */
+      if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
+	return "%vmovd\t{%1, %0|%0, %1}";
+      return "%vmovq\t{%1, %0|%0, %1}";
+
+    case TYPE_MMXMOV:
+      /* Handle broken assemblers that require movd instead of movq.  */
+      if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
+	return "movd\t{%1, %0|%0, %1}";
+      return "movq\t{%1, %0|%0, %1}";
+
+    case TYPE_SSELOG1:
+      return "%vpxor\t%0, %d0";
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_LEA:
+      return "lea{q}\t{%a1, %0|%0, %a1}";
+
+    default:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else if (which_alternative == 2)
+	return "movabs{q}\t{%1, %0|%0, %1}";
+      else
+	return "mov{q}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "5")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "6,7,8,9,10")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "11")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "12,13,14,15,16")
+	      (const_string "ssemov")
+	    (eq_attr "alternative" "17,18")
+	      (const_string "ssecvt")
+	    (eq_attr "alternative" "4")
+	      (const_string "multi")
+ 	    (match_operand:DI 1 "pic_32bit_operand" "")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set (attr "modrm")
+     (if_then_else
+       (and (eq_attr "alternative" "2") (eq_attr "type" "imov"))
+	 (const_string "0")
+	 (const_string "*")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "alternative" "2") (eq_attr "type" "imov"))
+	 (const_string "8")
+	 (const_string "*")))
+   (set_attr "prefix_rex" "*,*,*,*,*,*,*,1,*,1,*,*,*,*,*,*,*,*,*")
+   (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,1,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "11,12,13,14,15,16")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
+
+;; Convert impossible stores of immediate to existing instructions.
+;; First try to get scratch register and go through it.  In case this
+;; fails, move by 32bit parts.
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode) && 1"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+		    ? epilogue_completed : reload_completed)
+   && !symbolic_operand (operands[1], DImode)
+   && !x86_64_immediate_operand (operands[1], DImode)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "split_double_mode (DImode, &operands[0], 2, &operands[2], &operands[4]);")
+
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
+	(match_operand:DI 1 "general_operand"
+			"riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  ,C ,*x,*x,m "))]
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   #
+   #
+   pxor\t%0, %0
+   movq\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
+   xorps\t%0, %0
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
+   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			"=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
+	(match_operand:SI 1 "general_operand"
+			"g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r   ,m "))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      if (get_attr_mode (insn) == MODE_TI)
+        return "%vpxor\t%0, %d0";
+      return "%vxorps\t%0, %d0";
+
+    case TYPE_SSEMOV:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_TI:
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_SI:
+          return "%vmovd\t{%1, %0|%0, %1}";
+	case MODE_SF:
+          return "%vmovss\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TYPE_MMX:
+      return "pxor\t%0, %0";
+
+    case TYPE_MMXMOV:
+      if (get_attr_mode (insn) == MODE_DI)
+	return "movq\t{%1, %0|%0, %1}";
+      return "movd\t{%1, %0|%0, %1}";
+
+    case TYPE_LEA:
+      return "lea{l}\t{%a1, %0|%0, %a1}";
+
+    default:
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+      return "mov{l}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "mmx")
+	    (eq_attr "alternative" "3,4,5")
+	      (const_string "mmxmov")
+	    (eq_attr "alternative" "6")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "7,8,9,10,11")
+	      (const_string "ssemov")
+ 	    (match_operand:DI 1 "pic_32bit_operand" "")
+	      (const_string "lea")
+	   ]
+	   (const_string "imov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "prefix_data16")
+     (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2,3")
+	      (const_string "DI")
+	    (eq_attr "alternative" "6,7")
+	      (if_then_else
+	        (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+	        (const_string "V4SF")
+	        (const_string "TI"))
+	    (and (eq_attr "alternative" "8,9,10,11")
+	         (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+	      (const_string "SF")
+	   ]
+	   (const_string "SI")))])
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      /* movzwl is faster than movw on p2 due to partial word stalls,
+	 though not as fast as an aligned movl.  */
+      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{w}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		(const_int 0))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "0")
+		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			  (const_int 0))
+		      (eq (symbol_ref "TARGET_HIMODE_MATH")
+			  (const_int 0))))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "1,2")
+		 (match_operand:HI 1 "aligned_operand" ""))
+	      (const_string "imov")
+	    (and (ne (symbol_ref "TARGET_MOVX")
+		     (const_int 0))
+		 (eq_attr "alternative" "0,2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+    (set (attr "mode")
+      (cond [(eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "1,2")
+		  (match_operand:HI 1 "aligned_operand" ""))
+	       (const_string "SI")
+	     (and (eq_attr "alternative" "0")
+		  (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			   (const_int 0))
+		       (eq (symbol_ref "TARGET_HIMODE_MATH")
+			   (const_int 0))))
+	       (const_string "SI")
+	    ]
+	    (const_string "HI")))])
+
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there.  Then we use movzx.
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
+	(match_operand:QI 1 "general_operand"      " q,qn,qm,q,rn,qm,qn"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
+      return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+    default:
+      if (get_attr_mode (insn) == MODE_SI)
+        return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+        return "mov{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (eq_attr "alternative" "5")
+		 (not (match_operand:QI 1 "aligned_operand" "")))
+	      (const_string "imovx")
+	    (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		(const_int 0))
+	      (const_string "imov")
+	    (and (eq_attr "alternative" "3")
+		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+			  (const_int 0))
+		      (eq (symbol_ref "TARGET_QIMODE_MATH")
+			  (const_int 0))))
+	      (const_string "imov")
+	    (eq_attr "alternative" "3,5")
+	      (const_string "imovx")
+	    (and (ne (symbol_ref "TARGET_MOVX")
+		     (const_int 0))
+		 (eq_attr "alternative" "2"))
+	      (const_string "imovx")
+	   ]
+	   (const_string "imov")))
+   (set (attr "mode")
+      (cond [(eq_attr "alternative" "3,4,5")
+	       (const_string "SI")
+	     (eq_attr "alternative" "6")
+	       (const_string "QI")
+	     (eq_attr "type" "imovx")
+	       (const_string "SI")
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+				(const_int 0))
+			    (and (eq (symbol_ref "optimize_function_for_size_p (cfun)")
+				     (const_int 0))
+				 (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+				     (const_int 0))))))
+	       (const_string "SI")
+	     ;; Avoid partial register stalls when not using QImode arithmetic
+	     (and (eq_attr "type" "imov")
+		  (and (eq_attr "alternative" "0,1")
+		       (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
+				(const_int 0))
+			    (eq (symbol_ref "TARGET_QIMODE_MATH")
+				(const_int 0)))))
+	       (const_string "SI")
+	   ]
+	   (const_string "QI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabs<mode>_1"
+  [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+	(match_operand:SWI1248x 1 "nonmemory_operand" "a,er"))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+  "@
+   movabs{<imodesuffix>}\t{%1, %P0|[%P0], %1}
+   mov{<imodesuffix>}\t{%1, %a0|%a0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movabs<mode>_2"
+  [(set (match_operand:SWI1248x 0 "register_operand" "=a,r")
+        (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+  "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+  "@
+   movabs{<imodesuffix>}\t{%P1, %0|%0, [%P1]}
+   mov{<imodesuffix>}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0,*")
+   (set_attr "length_address" "8,0")
+   (set_attr "length_immediate" "0")
+   (set_attr "memory" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*swap<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "+r")
+	(match_operand:SWI48 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  ""
+  "xchg{<imodesuffix>}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*swap<mode>_1"
+  [(set (match_operand:SWI12 0 "register_operand" "+r")
+	(match_operand:SWI12 1 "register_operand" "+r"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "xchg{l}\t%k1, %k0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL
+;; is disabled for AMDFAM10
+(define_insn "*swap<mode>_2"
+  [(set (match_operand:SWI12 0 "register_operand" "+<r>")
+	(match_operand:SWI12 1 "register_operand" "+<r>"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_PARTIAL_REG_STALL"
+  "xchg{<imodesuffix>}\t%1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstrict<mode>"
+  [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand" ""))
+	(match_operand:SWI12 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
+  if (GET_CODE (operands[0]) == SUBREG
+      && GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT)
+    FAIL;
+  /* Don't generate memory->memory moves, go through a register */
+  if (MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "*movstrict<mode>_1"
+  [(set (strict_low_part
+	  (match_operand:SWI12 0 "nonimmediate_operand" "+<r>m,<r>"))
+	(match_operand:SWI12 1 "general_operand" "<r>n,m"))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "mov{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movstrict<mode>_xor"
+  [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
+	(match_operand:SWI12 1 "const0_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  "xor{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>_extv_1"
+  [(set (match_operand:SWI24 0 "register_operand" "=R")
+	(sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q")
+			    (const_int 8)
+			    (const_int 8)))]
+  ""
+  "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extv_1_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extv_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r")
+        (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+                         (const_int 8)
+                         (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*mov<mode>_extzv_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=R")
+	(zero_extract:SWI48 (match_operand 1 "ext_register_operand" "Q")
+			    (const_int 8)
+			    (const_int 8)))]
+  ""
+  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extzv_2_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+        (subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+			   (const_int 8)
+			   (const_int 8)) 0))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+        (subreg:QI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+			   (const_int 8)
+			   (const_int 8)) 0))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_expand "mov<mode>_insv_1"
+  [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "")
+			    (const_int 8)
+			    (const_int 8))
+	(match_operand:SWI48 1 "nonmemory_operand" ""))])
+
+(define_insn "*mov<mode>_insv_1_rex64"
+  [(set (zero_extract:SWI48x (match_operand 0 "ext_register_operand" "+Q")
+			     (const_int 8)
+			     (const_int 8))
+	(match_operand:SWI48x 1 "nonmemory_operand" "Qn"))]
+  "TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movsi_insv_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(match_operand:SI 1 "general_operand" "Qmn"))]
+  "!TARGET_64BIT"
+  "mov{b}\t{%b1, %h0|%h0, %b1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movqi_insv_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+			 (const_int 8)
+			 (const_int 8))
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
+		     (const_int 8)))]
+  ""
+  "mov{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "QI")])
+
+;; Floating point push instructions.
+
+(define_insn "*pushtf"
+  [(set (match_operand:TF 0 "push_operand" "=<,<,<")
+	(match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
+  "TARGET_SSE2"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "sse,*,*")
+   (set_attr "mode" "TF,SI,SI")])
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+	(match_operand:TF 1 "sse_reg_operand" ""))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
+   (set (mem:TF (reg:P SP_REG)) (match_dup 1))])
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  "TARGET_SSE2 && reload_completed
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushxf"
+  [(set (match_operand:XF 0 "push_operand" "=<,<")
+	(match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
+  "optimize_function_for_speed_p (cfun)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*")
+   (set_attr "mode" "XF,SI")])
+
+;; Size of pushxf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushxf using integer instructions is 3+3*memory operand size
+;; Pushing using integer instructions is longer except for constants
+;; and direct memory references (assuming that any given constant is pushed
+;; only once, but this ought to be handled elsewhere).
+
+(define_insn "*pushxf_nointeger"
+  [(set (match_operand:XF 0 "push_operand" "=X,X,X")
+	(match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
+  "optimize_function_for_size_p (cfun)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "XF,SI,SI")])
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(match_operand:XF 1 "fp_register_operand" ""))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  "reload_completed
+   && !FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushdf"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
+  "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "DF,SI,DF")])
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter.  Allow this
+;; pattern for optimize_size too.
+
+(define_insn "*pushdf_nointeger"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+	(match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
+  "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*,*")
+   (set_attr "mode" "DF,SI,SI,DF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(match_operand:DF 1 "any_fp_register_operand" ""))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (match_dup 1))])
+
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "reload_completed
+   && !ANY_FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*pushsf_rex64"
+  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
+  "TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{q}\t%q1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,DI,SF")])
+
+(define_insn "*pushsf"
+  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+	(match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))]
+  "!TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{l}\t%1";
+}
+  [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "mode" "SF,SI,SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))]
+  "reload_completed
+   && MEM_P (operands[1])
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0)
+	(match_dup 2))])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "any_fp_register_operand" ""))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:SF (reg:P SP_REG)) (match_dup 1))]
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (<MODE>mode));")
+
+;; Floating point move instructions.
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE2"
+{
+  ix86_expand_move (TFmode, operands);
+  DONE;
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:X87MODEF 0 "nonimmediate_operand" "")
+	(match_operand:X87MODEF 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (<MODE>mode, operands); DONE;")
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
+	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  "TARGET_SSE2
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+	return "%vmovdqa\t{%1, %0|%0, %1}";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vxorps\t%0, %d0";
+      else
+	return "%vpxor\t%0, %d0";
+    case 3:
+    case 4:
+	return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,2")
+		 (if_then_else
+		   (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		       (const_int 0))
+		   (const_string "V4SF")
+		   (const_string "TI"))
+	       (eq_attr "alternative" "1")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			    (const_int 0))
+			(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "TI"))]
+	       (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "general_operand" ""))]
+  "reload_completed
+   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movxf_internal"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
+	(match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
+  "optimize_function_for_speed_p (cfun)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], XFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3: case 4:
+      return "#";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+   (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+;; Do not use integer registers when optimizing for size
+(define_insn "*movxf_internal_nointeger"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
+	(match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
+  "optimize_function_for_size_p (cfun)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || standard_80387_constant_p (operands[1]) > 0
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], XFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3: case 4:
+      return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+   (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_split
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  "reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ! (FP_REG_P (operands[0]) ||
+	 (GET_CODE (operands[0]) == SUBREG
+	  && FP_REG_P (SUBREG_REG (operands[0]))))
+   && ! (FP_REG_P (operands[1]) ||
+	 (GET_CODE (operands[1]) == SUBREG
+	  && FP_REG_P (SUBREG_REG (operands[1]))))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movdf_internal_rex64"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+		"=f,m,f,r ,m,!r,!o,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+	(match_operand:DF 1 "general_operand"
+		"fm,f,G,rm,r,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]) > 0)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], DFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "mov{q}\t{%1, %0|%0, %1}";
+
+    case 5:
+      return "movabs{q}\t{%1, %0|%0, %1}";
+
+    case 6:
+      return "#";
+
+    case 7:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vxorpd\t%0, %d0";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vpxor\t%0, %d0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 8:
+    case 9:
+    case 10:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  return "%vmovlpd\t{%1, %d0|%d0, %1}";
+	case MODE_V2SF:
+	  return "%vmovlps\t{%1, %d0|%d0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case 11:
+    case 12:
+      /* Handle broken assemblers that require movd instead of movq.  */
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,imov,imov,imov,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+   (set (attr "modrm")
+     (if_then_else
+       (and (eq_attr "alternative" "5") (eq_attr "type" "imov"))
+	 (const_string "0")
+	 (const_string "*")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "alternative" "5") (eq_attr "type" "imov"))
+	 (const_string "8")
+	 (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5,6")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "prefix_data16")
+     (if_then_else (eq_attr "mode" "V1DF")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4,5,6,11,12")
+		 (const_string "DI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "7,8")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "7")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "8")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "9")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+(define_insn "*movdf_internal"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+		"=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
+	(match_operand:DF 1 "general_operand"
+		"fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && optimize_function_for_speed_p (cfun)
+   && TARGET_INTEGER_DFMODE_MOVES
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], DFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "#";
+
+    case 5:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vxorpd\t%0, %d0";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vpxor\t%0, %d0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 6:
+    case 7:
+    case 8:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
+	case MODE_V2SF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "prefix_data16")
+     (if_then_else (eq_attr "mode" "V1DF")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "SI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "5")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+;; Moving is usually shorter when only FP registers are used. This separate
+;; movdf pattern avoids the use of integer registers for FP operations
+;; when optimizing for size.
+
+(define_insn "*movdf_internal_nointeger"
+  [(set (match_operand:DF 0 "nonimmediate_operand"
+			"=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
+	(match_operand:DF 1 "general_operand"
+			"fm,f,G,*roF,F*r,C   ,Y2*x,mY2*x,Y2*x"))]
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_INTEGER_DFMODE_MOVES)
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+           && !memory_operand (operands[0], DFmode)
+	   && standard_80387_constant_p (operands[1]) > 0)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || ((optimize_function_for_size_p (cfun)
+            || !TARGET_MEMORY_MISMATCH_STALL)
+ 	   && memory_operand (operands[0], DFmode)))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "#";
+
+    case 5:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vxorpd\t%0, %d0";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vxorps\t%0, %d0";
+	  else
+	    return "%vpxor\t%0, %d0";
+	default:
+	  gcc_unreachable ();
+	}
+    case 6:
+    case 7:
+    case 8:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "%vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovapd\t{%1, %0|%0, %1}";
+	case MODE_TI:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "%vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "%vmovdqa\t{%1, %0|%0, %1}";
+	case MODE_DI:
+	  return "%vmovq\t{%1, %0|%0, %1}";
+	case MODE_DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
+	case MODE_V1DF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
+	case MODE_V2SF:
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
+   (set (attr "prefix_data16")
+     (if_then_else (eq_attr "mode" "V1DF")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,1,2")
+		 (const_string "DF")
+	       (eq_attr "alternative" "3,4")
+		 (const_string "SI")
+
+	       /* For SSE1, we have many fewer alternatives.  */
+	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+		 (cond [(eq_attr "alternative" "5,6")
+			  (const_string "V4SF")
+		       ]
+		   (const_string "V2SF"))
+
+	       /* xorps is one byte shorter.  */
+	       (eq_attr "alternative" "5")
+		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0))
+			  (const_string "V4SF")
+			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    (const_int 0))
+			  (const_string "TI")
+		       ]
+		       (const_string "V2DF"))
+
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APD move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  movaps encodes one byte shorter.  */
+	       (eq_attr "alternative" "6")
+		 (cond
+		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+		        (const_int 0))
+		      (const_string "V4SF")
+		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+		        (const_int 0))
+		      (const_string "V2DF")
+		   ]
+		   (const_string "DF"))
+	       /* For architectures resolving dependencies on register
+		  parts we may avoid extra work to zero out upper part
+		  of register.  */
+	       (eq_attr "alternative" "7")
+		 (if_then_else
+		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+		       (const_int 0))
+		   (const_string "V1DF")
+		   (const_string "DF"))
+	      ]
+	      (const_string "DF")))])
+
+(define_split
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  "reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ! (ANY_FP_REG_P (operands[0]) ||
+	 (GET_CODE (operands[0]) == SUBREG
+	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+   && ! (ANY_FP_REG_P (operands[1]) ||
+	 (GET_CODE (operands[1]) == SUBREG
+	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand"
+	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
+	(match_operand:SF 1 "general_operand"
+	  "fm,f,G,rmF,Fr,C,x,xm,x,m  ,*y,*y ,r  ,Yi,r   ,*Ym"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (reload_in_progress || reload_completed
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]) > 0)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || memory_operand (operands[0], SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return standard_80387_constant_opcode (operands[1]);
+
+    case 3:
+    case 4:
+      return "mov{l}\t{%1, %0|%0, %1}";
+    case 5:
+      if (get_attr_mode (insn) == MODE_TI)
+	return "%vpxor\t%0, %d0";
+      else
+	return "%vxorps\t%0, %d0";
+    case 6:
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+	return "%vmovss\t{%1, %d0|%d0, %1}";
+    case 7:
+      if (TARGET_AVX)
+	return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+				   : "vmovss\t{%1, %0|%0, %1}";
+      else
+	return "movss\t{%1, %0|%0, %1}";
+    case 8:
+      return "%vmovss\t{%1, %0|%0, %1}";
+
+    case 9: case 10: case 14: case 15:
+      return "movd\t{%1, %0|%0, %1}";
+
+    case 11:
+      return "movq\t{%1, %0|%0, %1}";
+
+    case 12: case 13:
+      return "%vmovd\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "3,4,9,10")
+		 (const_string "SI")
+	       (eq_attr "alternative" "5")
+		 (if_then_else
+		   (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+			    	 (const_int 0))
+			     (ne (symbol_ref "TARGET_SSE2")
+				 (const_int 0)))
+			(eq (symbol_ref "optimize_function_for_size_p (cfun)")
+			    (const_int 0)))
+		   (const_string "TI")
+		   (const_string "V4SF"))
+	       /* For architectures resolving dependencies on
+		  whole SSE registers use APS move to break dependency
+		  chains, otherwise use short move to avoid extra work.
+
+		  Do the same for architectures resolving dependencies on
+		  the parts.  While in DF mode it is better to always handle
+		  just register parts, the SF mode is different due to lack
+		  of instructions to load just part of the register.  It is
+		  better to maintain the whole registers in single format
+		  to avoid problems on using packed logical operations.  */
+	       (eq_attr "alternative" "6")
+		 (if_then_else
+		   (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			    (const_int 0))
+			(ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+			    (const_int 0)))
+		   (const_string "V4SF")
+		   (const_string "SF"))
+	       (eq_attr "alternative" "11")
+		 (const_string "DI")]
+	       (const_string "SF")))])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  "reload_completed
+   && MEM_P (operands[1])
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode
+       || GET_MODE (operands[0]) == SFmode)
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  rtx c = operands[2];
+  rtx r = operands[0];
+
+  if (GET_CODE (r) == SUBREG)
+    r = SUBREG_REG (r);
+
+  if (SSE_REG_P (r))
+    {
+      if (!standard_sse_constant_p (c))
+	FAIL;
+    }
+  else if (FP_REG_P (r))
+    {
+      if (standard_80387_constant_p (c) < 1)
+	FAIL;
+    }
+  else if (MMX_REG_P (r))
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(float_extend (match_operand 1 "memory_operand" "")))]
+  "reload_completed
+   && MEM_P (operands[1])
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode
+       || GET_MODE (operands[0]) == SFmode)
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  rtx c = operands[2];
+  rtx r = operands[0];
+
+  if (GET_CODE (r) == SUBREG)
+    r = SUBREG_REG (r);
+
+  if (SSE_REG_P (r))
+    {
+      if (!standard_sse_constant_p (c))
+	FAIL;
+    }
+  else if (FP_REG_P (r))
+    {
+      if (standard_80387_constant_p (c) < 1)
+	FAIL;
+    }
+  else if (MMX_REG_P (r))
+    FAIL;
+})
+
+;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(match_operand:X87MODEF 1 "immediate_operand" ""))]
+  "reload_completed && FP_REGNO_P (REGNO (operands[0]))
+   && (standard_80387_constant_p (operands[1]) == 8
+       || standard_80387_constant_p (operands[1]) == 9)"
+  [(set (match_dup 0)(match_dup 1))
+   (set (match_dup 0)
+	(neg:X87MODEF (match_dup 0)))]
+{
+  REAL_VALUE_TYPE r;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  if (real_isnegzero (&r))
+    operands[1] = CONST0_RTX (<MODE>mode);
+  else
+    operands[1] = CONST1_RTX (<MODE>mode);
+})
+
+(define_insn "swapxf"
+  [(set (match_operand:XF 0 "register_operand" "+f")
+	(match_operand:XF 1 "register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "XF")])
+
+(define_insn "*swap<mode>"
+  [(set (match_operand:MODEF 0 "fp_register_operand" "+f")
+	(match_operand:MODEF 1 "fp_register_operand" "+f"))
+   (set (match_dup 1)
+	(match_dup 0))]
+  "TARGET_80387 || reload_completed"
+{
+  if (STACK_TOP_P (operands[0]))
+    return "fxch\t%1";
+  else
+    return "fxch\t%0";
+}
+  [(set_attr "type" "fxch")
+   (set_attr "mode" "<MODE>")])
+
+;; Zero extension instructions
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_64BIT)
+    {
+      emit_insn (gen_zero_extendsidi2_1 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendsidi2_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand"  "=r,o,?*Ym,?*y,?*Yi,*Y2")
+	(zero_extend:DI
+	 (match_operand:SI 1 "nonimmediate_operand" "rm,0,r   ,m  ,r   ,m")))]
+  "TARGET_64BIT"
+  "@
+   mov{l}\t{%1, %k0|%k0, %1}
+   #
+   movd\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
+   (set_attr "prefix_0f" "0,*,*,*,*,*")
+   (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+     	(zero_extend:DI (match_dup 0)))]
+  "TARGET_64BIT"
+  [(set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+;; %%% Kill me once multi-word ops are sane.
+(define_insn "zero_extendsidi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2")
+	(zero_extend:DI
+	 (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m  ,r   ,m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "@
+   #
+   #
+   #
+   movd\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_insn "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "TARGET_64BIT"
+  "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+    {
+      operands[1] = force_reg (HImode, operands[1]);
+      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn_and_split "zero_extendhisi2_and"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendhisi2_movzwl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+  "!TARGET_ZERO_EXTEND_WITH_AND
+   || optimize_function_for_size_p (cfun)"
+  "movz{wl|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqi<mode>2"
+  [(parallel
+    [(set (match_operand:SWI24 0 "register_operand" "")
+	  (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*zero_extendqi<mode>2_and"
+  [(set (match_operand:SWI24 0 "register_operand" "=r,?&q")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand" "")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+   && ANY_QI_REG_P (operands[0])
+   && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
+   && !reg_overlap_mentioned_p (operands[0], operands[1])"
+  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
+{
+  operands[2] = gen_lowpart (QImode, operands[0]);
+  ix86_expand_clear (operands[0]);
+})
+
+(define_insn "*zero_extendqi<mode>2_movzbl_and"
+  [(set (match_operand:SWI24 0 "register_operand" "=r,r")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
+  "#"
+  [(set_attr "type" "imovx,alu1")
+   (set_attr "mode" "<MODE>")])
+
+;; For the movzbl case strip only the clobber
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand" "")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+  [(set (match_dup 0)
+	(zero_extend:SWI24 (match_dup 1)))])
+
+; zero extend to SImode to avoid partial register stalls
+(define_insn "*zero_extendqi<mode>2_movzbl"
+  [(set (match_operand:SWI24 0 "register_operand" "=r")
+	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "reload_completed
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
+  "movz{bl|x}\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")])
+
+;; Rest is handled by single and.
+(define_split
+  [(set (match_operand:SWI24 0 "register_operand" "")
+	(zero_extend:SWI24 (match_operand:QI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(parallel [(set (match_dup 0) (and:SWI24 (match_dup 0) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Sign extension instructions
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  if (!TARGET_64BIT)
+    {
+      emit_insn (gen_extendsidi2_1 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*extendsidi2_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=*a,r")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
+  "TARGET_64BIT"
+  "@
+   {cltq|cdqe}
+   movs{lq|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")
+   (set_attr "prefix_0f" "0")
+   (set_attr "modrm" "0,1")])
+
+(define_insn "extendsidi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
+  "!TARGET_64BIT"
+  "#")
+
+;; Extend to memory case when source register does die.
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "(reload_completed
+    && dead_or_set_p (insn, operands[1])
+    && !reg_mentioned_p (operands[1], operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 4) (match_dup 1))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+;; Extend to memory case when source register does not die.
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
+
+  emit_move_insn (operands[3], operands[1]);
+
+  /* Generate a cltd if possible and doing so it profitable.  */
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+      && true_regnum (operands[1]) == AX_REG
+      && true_regnum (operands[2]) == DX_REG)
+    {
+      emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
+    }
+  else
+    {
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31)));
+    }
+  emit_move_insn (operands[4], operands[2]);
+  DONE;
+})
+
+;; Extend to register case.  Optimize case where source and destination
+;; registers match and cases where we can use cltd.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
+
+  if (true_regnum (operands[3]) != true_regnum (operands[1]))
+    emit_move_insn (operands[3], operands[1]);
+
+  /* Generate a cltd if possible and doing so it profitable.  */
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+      && true_regnum (operands[3]) == AX_REG
+      && true_regnum (operands[4]) == DX_REG)
+    {
+      emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
+      DONE;
+    }
+
+  if (true_regnum (operands[4]) != true_regnum (operands[1]))
+    emit_move_insn (operands[4], operands[1]);
+
+  emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
+  DONE;
+})
+
+(define_insn "extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "TARGET_64BIT"
+  "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=*a,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cwtl|cwde}";
+    default:
+      return "movs{wl|x}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "*extendhisi2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=*a,r")
+	(zero_extend:DI
+	 (sign_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cwtl|cwde}";
+    default:
+      return "movs{wl|x}\t{%1, %k0|%k0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "SI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  ""
+  "movs{bl|x}\t{%1, %0|%0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
+  "TARGET_64BIT"
+  "movs{bl|x}\t{%1, %k0|%k0, %1}"
+   [(set_attr "type" "imovx")
+    (set_attr "mode" "SI")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=*a,r")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
+  ""
+{
+  switch (get_attr_prefix_0f (insn))
+    {
+    case 0:
+      return "{cbtw|cbw}";
+    default:
+      return "movs{bw|x}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "HI")
+   (set (attr "prefix_0f")
+     ;; movsx is short decodable while cwtl is vector decoded.
+     (if_then_else (and (eq_attr "cpu" "!k6")
+			(eq_attr "alternative" "0"))
+	(const_string "0")
+	(const_string "1")))
+   (set (attr "modrm")
+     (if_then_else (eq_attr "prefix_0f" "0")
+	(const_string "0")
+	(const_string "1")))])
+
+;; Conversions between float and double.
+
+;; These are all no-ops in the model used for the 80387.
+;; So just emit moves.
+
+;; %%% Kill these when call knows how to work out a DFmode push earlier.
+(define_split
+  [(set (match_operand:DF 0 "push_operand" "")
+	(float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:XF 0 "push_operand" "")
+	(float_extend:XF (match_operand:MODEF 1 "fp_register_operand" "")))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+        (float_extend:DF (match_operand:SF 1 "general_operand" "")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  /* ??? Needed for compress_float_constant since all fp constants
+     are LEGITIMATE_CONSTANT_P.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+	  && standard_80387_constant_p (operands[1]) > 0)
+	{
+	  operands[1] = simplify_const_unary_operation
+	    (FLOAT_EXTEND, DFmode, operands[1], SFmode);
+	  emit_move_insn_1 (operands[0], operands[1]);
+	  DONE;
+	}
+      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+    }
+})
+
+/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
+   cvtss2sd:
+      unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtps2pd xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand" "")))]
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+	 (float_extend:V2DF
+	   (vec_select:V2SF
+	     (match_dup 3)
+	     (parallel [(const_int 0) (const_int 1)]))))]
+{
+  operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+  operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0);
+  /* Use movss for loading from memory, unpcklps reg, reg for registers.
+     Try to avoid move when unpacking can be done in source.  */
+  if (REG_P (operands[1]))
+    {
+      /* If it is unsafe to overwrite upper half of source, we need
+	 to move to destination and unpack there.  */
+      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	   || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+	  && true_regnum (operands[0]) != true_regnum (operands[1]))
+	{
+	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+	  emit_move_insn (tmp, operands[1]);
+	}
+      else
+	operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
+      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+      		 			     operands[3]));
+    }
+  else
+    emit_insn (gen_vec_setv4sf_0 (operands[3],
+				  CONST0_RTX (V4SFmode), operands[1]));
+})
+
+(define_insn "*extendsfdf2_mixed"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
+        (float_extend:DF
+	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "SF,XF,DF")])
+
+(define_insn "*extendsfdf2_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "%vcvtss2sd\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "*extendsfdf2_i387"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF,XF")])
+
+(define_expand "extend<mode>xf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+        (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))]
+  "TARGET_80387"
+{
+  /* ??? Needed for compress_float_constant since all fp constants
+     are LEGITIMATE_CONSTANT_P.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      if (standard_80387_constant_p (operands[1]) > 0)
+	{
+	  operands[1] = simplify_const_unary_operation
+	    (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
+	  emit_move_insn_1 (operands[0], operands[1]);
+	  DONE;
+	}
+      operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
+    }
+})
+
+(define_insn "*extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
+        (float_extend:XF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>,XF")])
+
+;; %%% This seems bad bad news.
+;; This cannot output into an f-reg because there is no way to be sure
+;; of truncating in that case.  Otherwise this is just like a simple move
+;; insn.  So we pretend we can output to a reg in order to get better
+;; register preferencing, but we really use a stack slot.
+
+;; Conversion from DFmode to SFmode.
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+    ;
+  else if (flag_unsafe_math_optimizations)
+    ;
+  else
+    {
+      enum ix86_stack_slot slot = (virtuals_instantiated
+				   ? SLOT_TEMP
+				   : SLOT_VIRTUAL);
+      rtx temp = assign_386_stack_local (SFmode, slot);
+      emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
+      DONE;
+    }
+})
+
+/* For converting DF(xmm2) to SF(xmm1), use the following code instead of
+   cvtsd2ss:
+      unpcklpd xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtpd2ps xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "")))]
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+	 (vec_concat:V4SF
+	   (float_truncate:V2SF
+	     (match_dup 4))
+	   (match_dup 3)))]
+{
+  operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+  operands[3] = CONST0_RTX (V2SFmode);
+  operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0);
+  /* Use movsd for loading from memory, unpcklpd for registers.
+     Try to avoid move when unpacking can be done in source, or SSE3
+     movddup is available.  */
+  if (REG_P (operands[1]))
+    {
+      if (!TARGET_SSE3
+	  && true_regnum (operands[0]) != true_regnum (operands[1])
+	  && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	      || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+	{
+	  rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0);
+	  emit_move_insn (tmp, operands[1]);
+	  operands[1] = tmp;
+	}
+      else if (!TARGET_SSE3)
+	operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+      emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
+    }
+  else
+    emit_insn (gen_sse2_loadlpd (operands[4],
+				 CONST0_RTX (V2DFmode), operands[1]));
+})
+
+(define_expand "truncdfsf2_with_temp"
+  [(parallel [(set (match_operand:SF 0 "" "")
+		   (float_truncate:SF (match_operand:DF 1 "" "")))
+	      (clobber (match_operand:SF 2 "" ""))])])
+
+(define_insn "*truncdfsf_fast_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,x")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "f  ,xm")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,ssecvt")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "SF")])
+
+;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
+;; because nothing we do here is unsafe.
+(define_insn "*truncdfsf_fast_sse"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=x")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_fast_i387"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm")
+        (float_truncate:SF
+          (match_operand:DF 1 "nonimmediate_operand" "f")))]
+  "TARGET_80387 && flag_unsafe_math_optimizations"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,Y2 ,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "f ,Y2m,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"     "=X,X  ,m ,m ,m"))]
+  "TARGET_MIX_SSE_I387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+
+    default:
+      return "#";
+    }
+}
+  [(set_attr "type" "fmov,ssecvt,multi,multi,multi")
+   (set_attr "unit" "*,*,i387,i387,i387")
+   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_i387"
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"     "=X,m ,m ,m"))]
+  "TARGET_80387"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return output_387_reg_move (insn, operands);
+
+    default:
+      return "#";
+    }
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf2_i387_1"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float_truncate:SF
+	  (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_80387
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !TARGET_MIX_SSE_I387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float_truncate:SF
+	 (match_operand:DF 1 "fp_register_operand" "")))
+   (clobber (match_operand 2 "" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));")
+
+;; Conversion from XFmode to {SF,DF}mode
+
+(define_expand "truncxf<mode>2"
+  [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "")
+		   (float_truncate:MODEF
+		     (match_operand:XF 1 "register_operand" "")))
+	      (clobber (match_dup 2))])]
+  "TARGET_80387"
+{
+  if (flag_unsafe_math_optimizations)
+    {
+      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1]));
+      if (reg != operands[0])
+	emit_move_insn (operands[0], reg);
+      DONE;
+    }
+  else
+    {
+      enum ix86_stack_slot slot = (virtuals_instantiated
+				   ? SLOT_TEMP
+				   : SLOT_VIRTUAL);
+      operands[2] = assign_386_stack_local (<MODE>mode, slot);
+    }
+})
+
+(define_insn "*truncxfsf2_mixed"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r")
+	(float_truncate:SF
+	  (match_operand:XF 1 "register_operand"   "f ,f ,f ,f")))
+   (clobber (match_operand:SF 2 "memory_operand"   "=X,m ,m ,m"))]
+  "TARGET_80387"
+{
+  gcc_assert (!which_alternative);
+  return output_387_reg_move (insn, operands);
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncxfdf2_mixed"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?Y2,?*r")
+	(float_truncate:DF
+	  (match_operand:XF 1 "register_operand"   "f ,f ,f  ,f")))
+   (clobber (match_operand:DF 2 "memory_operand"   "=X,m ,m  ,m"))]
+  "TARGET_80387"
+{
+  gcc_assert (!which_alternative);
+  return output_387_reg_move (insn, operands);
+}
+  [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
+   (set_attr "mode" "DF")])
+
+(define_insn "truncxf<mode>2_i387_noop"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387 && flag_unsafe_math_optimizations"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*truncxf<mode>2_i387"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "f")))]
+  "TARGET_80387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "")))
+   (clobber (match_operand:MODEF 2 "memory_operand" ""))]
+  "TARGET_80387 && reload_completed"
+  [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:MODEF 0 "memory_operand" "")
+	(float_truncate:MODEF
+	  (match_operand:XF 1 "register_operand" "")))
+   (clobber (match_operand:MODEF 2 "memory_operand" ""))]
+  "TARGET_80387"
+  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))])
+
+;; Signed conversion to DImode.
+
+(define_expand "fix_truncxfdi2"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+                   (fix:DI (match_operand:XF 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>di2"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+                   (fix:DI (match_operand:MODEF 1 "register_operand" "")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
+{
+  if (TARGET_FISTTP
+      && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
+     emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
+     if (out != operands[0])
+	emit_move_insn (operands[0], out);
+     DONE;
+   }
+})
+
+;; Signed conversion to SImode.
+
+(define_expand "fix_truncxfsi2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+                   (fix:SI (match_operand:XF 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>si2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	           (fix:SI (match_operand:MODEF 1 "register_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (TARGET_FISTTP
+      && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+     emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
+     if (out != operands[0])
+	emit_move_insn (operands[0], out);
+     DONE;
+   }
+})
+
+;; Signed conversion to HImode.
+
+(define_expand "fix_trunc<mode>hi2"
+  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	           (fix:HI (match_operand:X87MODEF 1 "register_operand" "")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+;; Unsigned conversion to SImode.
+
+(define_expand "fixuns_trunc<mode>si2"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unsigned_fix:SI
+	    (match_operand:MODEF 1 "nonimmediate_operand" "")))
+     (use (match_dup 2))
+     (clobber (match_scratch:<ssevecmode> 3 ""))
+     (clobber (match_scratch:<ssevecmode> 4 ""))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  enum machine_mode mode = <MODE>mode;
+  enum machine_mode vecmode = <ssevecmode>mode;
+  REAL_VALUE_TYPE TWO31r;
+  rtx two31;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  real_ldexp (&TWO31r, &dconst1, 31);
+  two31 = const_double_from_real_value (TWO31r, mode);
+  two31 = ix86_build_const_vector (vecmode, true, two31);
+  operands[2] = force_reg (vecmode, two31);
+})
+
+(define_insn_and_split "*fixuns_trunc<mode>_1"
+  [(set (match_operand:SI 0 "register_operand" "=&x,&x")
+	(unsigned_fix:SI
+	  (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
+   (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
+   (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
+   (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_convert_uns_si_sse (operands);
+  DONE;
+})
+
+;; Unsigned conversion to HImode.
+;; Without these patterns, we'll try the unsigned SI conversion which
+;; is complex for SSE, rather than the signed SI conversion, which isn't.
+
+(define_expand "fixuns_trunc<mode>hi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "")))
+   (set (match_operand:HI 0 "nonimmediate_operand" "")
+	(subreg:HI (match_dup 2) 0))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+;; When SSE is available, it is always faster to use it!
+(define_insn "fix_trunc<mode>di_sse"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "%vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix_rex" "1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
+
+(define_insn "fix_trunc<mode>si_sse"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
+
+;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
+(define_peephole2
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(match_operand:MODEF 1 "memory_operand" ""))
+   (set (match_operand:SSEMODEI24 2 "register_operand" "")
+	(fix:SSEMODEI24 (match_dup 0)))]
+  "TARGET_SHORTEN_X87_SSE
+   && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ())
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))])
+
+;; Avoid vector decoded forms of the instruction.
+(define_peephole2
+  [(match_scratch:DF 2 "Y2")
+   (set (match_operand:SSEMODEI24 0 "register_operand" "")
+	(fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
+  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))])
+
+(define_peephole2
+  [(match_scratch:SF 2 "x")
+   (set (match_operand:SSEMODEI24 0 "register_operand" "")
+	(fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
+  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))])
+
+(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
+							    operands[1],
+							    operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp"
+  [(set (match_operand:X87MODEI 0 "memory_operand" "=m")
+	(fix:X87MODEI (match_operand 1 "register_operand" "f")))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)"
+  "* return output_fix_trunc (insn, operands, 1);"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+	(fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	&& (TARGET_64BIT || <MODE>mode != DImode))
+	&& TARGET_SSE_MATH)"
+  "#"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI 0 "register_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1)))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:X87MODEI 0 "memory_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1)))
+	      (clobber (match_dup 3))])])
+
+;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
+;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
+;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
+;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
+;; function in i386.c.
+(define_insn_and_split "*fix_trunc<mode>_i387_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+	 && (TARGET_64BIT || <MODE>mode != DImode))
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+					 operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
+						     operands[2], operands[3],
+						     operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_truncdi_i387"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(fix:DI (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fix_truncdi_i387_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(fix:DI (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(fix:DI (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:DI (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(fix:DI (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])])
+
+(define_insn "fix_trunc<mode>_i387"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])])
+
+(define_insn "x86_fnstcw_1"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+	(unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
+  "TARGET_80387"
+  "fnstcw\t%0"
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+   (set_attr "mode" "HI")
+   (set_attr "unit" "i387")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_fldcw_1"
+  [(set (reg:HI FPCR_REG)
+	(unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
+  "TARGET_80387"
+  "fldcw\t%0"
+  [(set (attr "length")
+	(symbol_ref "ix86_attr_length_address_default (insn) + 2"))
+   (set_attr "mode" "HI")
+   (set_attr "unit" "i387")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+;; Conversion between fixed point and floating point.
+
+;; Even though we only accept memory inputs, the backend _really_
+;; wants to be able to do this between registers.
+
+(define_expand "floathi<mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)")
+
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*floathi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+	      (float:X87MODEF (match_dup 1)))
+   (clobber (match_dup 2))])]
+  "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);")
+
+(define_insn "*floathi<mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:HI 2 "memory_operand" "=m,m"))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "#"
+  [(set_attr "type" "fmov,multi")
+   (set_attr "mode" "<MODE>")
+   (set_attr "unit" "*,i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floathi<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "fild%Z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "register_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:HI 1 "memory_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+   "TARGET_80387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+        || TARGET_MIX_SSE_I387)
+    && reload_completed"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+
+(define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+       && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (!((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+	&& SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+      && !X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode))
+    {
+      rtx reg = gen_reg_rtx (XFmode);
+      rtx insn;
+
+      emit_insn (gen_float<SSEMODEI24:mode>xf2 (reg, operands[1]));
+
+      if (<X87MODEF:MODE>mode == SFmode)
+	insn = gen_truncxfsf2 (operands[0], reg);
+      else if (<X87MODEF:MODE>mode == DFmode)
+	insn = gen_truncxfdf2 (operands[0], reg);
+      else
+	gcc_unreachable ();
+
+      emit_insn (insn);
+      DONE;
+    }
+})
+
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*float<SSEMODEI24:mode><X87MODEF:mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))]
+  "((TARGET_80387
+     && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)
+     && (!((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+	   && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+	 || TARGET_MIX_SSE_I387))
+    || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+	&& SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+	&& ((<SSEMODEI24:MODE>mode == SImode
+	     && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS
+	     && optimize_function_for_speed_p (cfun)
+	     && flag_trapping_math)
+	    || !(TARGET_INTER_UNIT_CONVERSIONS
+	         || optimize_function_for_size_p (cfun)))))
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
+	      (clobber (match_dup 2))])]
+{
+  operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
+
+  /* Avoid store forwarding (partial memory) stall penalty
+     by passing DImode value through XMM registers.  */
+  if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT
+      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+      && optimize_function_for_speed_p (cfun))
+    {
+      emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+							    operands[1],
+							    operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
+	(float:MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt")
+   (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<ssevecmode>")
+   (set_attr "unit" "*,i387,*,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct,double")
+   (set_attr "amdfam10_decode" "*,*,vector,double,double")
+   (set_attr "bdver1_decode" "*,*,double,direct,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "@
+   fild%Z1\t%1
+   #"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "<MODE>,<ssevecmode>")
+   (set_attr "unit" "i387,*")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "bdver1_decode" "*,direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387"
+  "#"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "*,i387,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
+   (set_attr "bdver1_decode" "*,*,double,direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && TARGET_INTER_UNIT_CONVERSIONS
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "@
+   fild%Z1\t%1
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "fmov,sseicvt,sseicvt")
+   (set_attr "prefix" "orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "prefix" "maybe_vex")
+	    (ne (symbol_ref "<SSEMODEI24:MODE>mode == DImode") (const_int 0)))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "unit" "i387,*,*")
+   (set_attr "athlon_decode" "*,double,direct")
+   (set_attr "amdfam10_decode" "*,vector,double")
+   (set_attr "bdver1_decode" "*,double,direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "@
+   fild%Z1\t%1
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "prefix" "maybe_vex")
+	    (ne (symbol_ref "<SSEMODEI24:MODE>mode == DImode") (const_int 0)))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "bdver1_decode" "*,direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x")
+	(float:MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
+   (set_attr "athlon_decode" "double,direct,double")
+   (set_attr "amdfam10_decode" "vector,double,double")
+   (set_attr "bdver1_decode" "double,direct,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+				  CONST0_RTX (V4SImode), operands[1]));
+    }
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_sse2_loadld (operands[4],
+				  CONST0_RTX (V4SImode), operands[2]));
+    }
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+			      CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1))
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      if (TARGET_INTER_UNIT_MOVES)
+	emit_insn (gen_sse2_loadld (operands[4],
+				    CONST0_RTX (V4SImode), operands[1]));
+      else
+	{
+	  operands[5] = ix86_force_to_memory (GET_MODE (operands[1]),
+					      operands[1]);
+	  emit_insn (gen_sse2_loadld (operands[4],
+				      CONST0_RTX (V4SImode), operands[5]));
+	  ix86_free_from_memory (GET_MODE (operands[1]));
+	}
+    }
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
+    gcc_unreachable ();
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SI 1 "memory_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+				     <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+			      CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "prefix" "maybe_vex")
+	    (ne (symbol_ref "<SSEMODEI24:MODE>mode == DImode") (const_int 0)))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(float:MODEF
+	  (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "prefix" "maybe_vex")
+	    (ne (symbol_ref "<SSEMODEI24:MODE>mode == DImode") (const_int 0)))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))])
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+	   && SSE_REG_P (SUBREG_REG (operands[0]))))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))])
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF
+	  (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))]
+  "TARGET_80387
+   && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)"
+  "@
+   fild%Z1\t%1
+   #"
+  [(set_attr "type" "fmov,multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "*,i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(float:X87MODEF
+	  (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+  "TARGET_80387
+   && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)"
+  "fild%Z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(float:X87MODEF
+	  (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+   (clobber (match_scratch:V4SI 3 "=X,x"))
+   (clobber (match_scratch:V4SI 4 "=X,x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+   (clobber (match_scratch:V4SI 3 ""))
+   (clobber (match_scratch:V4SI 4 ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 0)));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+			      gen_rtx_SUBREG (SImode, operands[1], 4)));
+  emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
+  	    				 operands[4]));
+
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+   (clobber (match_scratch:V4SI 3 ""))
+   (clobber (match_scratch:V4SI 4 ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
+
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:SI 3 "=X,x"))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0)
+	(float:X87MODEF (match_dup 2)))]
+  "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(unsigned_float:X87MODEF
+	  (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+   && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+	(float:X87MODEF (match_dup 2)))]
+{
+  emit_move_insn (operands[3], operands[1]);
+  operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
+})
+
+(define_expand "floatunssi<mode>2"
+  [(parallel
+     [(set (match_operand:X87MODEF 0 "register_operand" "")
+	   (unsigned_float:X87MODEF
+	     (match_operand:SI 1 "nonimmediate_operand" "")))
+      (clobber (match_dup 2))
+      (clobber (match_scratch:SI 3 ""))])]
+  "!TARGET_64BIT
+   && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
+	&& TARGET_SSE)
+       || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    {
+      ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+      DONE;
+    }
+  else
+    {
+      enum ix86_stack_slot slot = (virtuals_instantiated
+				   ? SLOT_TEMP
+				   : SLOT_VIRTUAL);
+      operands[2] = assign_386_stack_local (DImode, slot);
+    }
+})
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT && TARGET_SSE_MATH"
+  "x86_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+  "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
+   && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  if (TARGET_64BIT)
+    x86_emit_floatuns (operands);
+  else
+    ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
+  DONE;
+})
+
+;; Add instructions
+
+(define_expand "add<mode>3"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand" "")
+	(plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "")
+		    (match_operand:SDWIM 2 "<general_operand>" "")))]
+  ""
+  "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*add<dwi>3_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(plus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (unspec:CC [(match_dup 1) (match_dup 2)]
+			      UNSPEC_ADD_CARRY))
+	      (set (match_dup 0)
+		   (plus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (plus:DWIH
+		     (match_dup 4)
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "*add<mode>3_cc"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")]
+	  UNSPEC_ADD_CARRY))
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(plus:SWI48 (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "addqi3_cc"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:QI 2 "general_operand" "qn,qm")]
+	  UNSPEC_ADD_CARRY))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+	(plus:QI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands)"
+  "add{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+(define_insn "*lea_1"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(match_operand:P 1 "no_seg_address_operand" "p"))]
+  ""
+  "lea{<imodesuffix>}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*lea_2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))]
+  "TARGET_64BIT"
+  "lea{l}\t{%a1, %0|%0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lea_2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))]
+  "TARGET_64BIT"
+  "lea{l}\t{%a1, %k0|%k0, %a1}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn "*add<mode>_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
+	  (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,0,l<i>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 2)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+        
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "3")
+              (const_string "lea")
+	    (match_operand:SWI48 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; It may seem that nonimmediate operand is proper one for operand 1.
+;; The addsi_1 pattern allows nonimmediate operand at that place and
+;; we take care in ix86_binary_operator_ok to not allow two memory
+;; operands so proper swapping will be done in reload.  This allow
+;; patterns constructed from addsi_1 to match.
+
+(define_insn "*addsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
+		   (match_operand:SI 2 "general_operand" "g,0,li"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 1)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+	    (match_operand:SI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*addhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "general_operand" "rn,rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], HImode))
+	return "sub{w}\t{%2, %0|%0, %2}";
+
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:HI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "HI")])
+
+(define_insn "*addhi_1_lea"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,r")
+		 (match_operand:HI 2 "general_operand" "rmn,rn,0,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+	return "inc{w}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{w}\t%0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  This alternative
+	 was added to use ADD as much as possible.  */
+      if (which_alternative == 2)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], HImode))
+	return "sub{w}\t{%2, %0|%0, %2}";
+
+      return "add{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "3")
+              (const_string "lea")
+	    (match_operand:HI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "HI,HI,HI,SI")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*addqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		 (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+  int widen = (which_alternative == 2);
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], QImode))
+	{
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "QI,QI,SI")])
+
+;; %%% Potential partial reg stall on alternatives 3 and 4.  What to do?
+(define_insn "*addqi_1_lea"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,q,r,r,r")
+	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,r")
+		 (match_operand:QI 2 "general_operand" "qmn,qn,0,rn,0,ln")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+  int widen = (which_alternative == 3 || which_alternative == 4);
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_INCDEC:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (operands[2] == const1_rtx)
+	return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	}
+
+    default:
+      /* For most processors, ADD is faster than LEA.  These alternatives
+	 were added to use ADD as much as possible.  */
+      if (which_alternative == 2 || which_alternative == 4)
+	{
+	  rtx tmp;
+	  tmp = operands[1], operands[1] = operands[2], operands[2] = tmp;
+	}
+
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], QImode))
+	{
+	  if (widen)
+	    return "sub{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sub{b}\t{%2, %0|%0, %2}";
+	}
+      if (widen)
+        return "add{l}\t{%k2, %k0|%k0, %k2}";
+      else
+        return "add{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "5")
+              (const_string "lea")
+	    (match_operand:QI 2 "incdec_operand" "")
+	      (const_string "incdec")
+	   ]
+	   (const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "QI,QI,QI,SI,SI,SI")])
+
+(define_insn "*addqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(plus:QI (match_dup 0)
+		 (match_operand:QI 1 "general_operand" "qn,qnm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[1] == const1_rtx)
+	return "inc{b}\t%0";
+      else
+	{
+	  gcc_assert (operands[1] == constm1_rtx);
+	  return "dec{b}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[1], QImode))
+	return "sub{b}\t{%1, %0|%0, %1}";
+
+      return "add{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 1 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu1")))
+   (set (attr "memory")
+     (if_then_else (match_operand 1 "memory_operand" "")
+        (const_string "load")
+        (const_string "none")))
+   (set_attr "mode" "QI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(plus (match_operand 1 "register_operand" "")
+              (match_operand 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && ix86_lea_for_add_ok (insn, operands)" 
+  [(const_int 0)]
+{
+  rtx pat;
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
+     may confuse gen_lowpart.  */
+  if (mode != Pmode)
+    {
+      operands[1] = gen_lowpart (Pmode, operands[1]);
+      operands[2] = gen_lowpart (Pmode, operands[2]);
+    }
+
+  pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+
+  if (TARGET_64BIT && mode != Pmode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+;; ??? This pattern handles immediate operands that do not satisfy immediate
+;; operand predicate (LEGITIMATE_CONSTANT_P) in the previous pattern.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "x86_64_immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed 
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 1) (match_dup 2)))])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && ix86_lea_for_add_ok (insn, operands)"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+})
+
+(define_insn "*add<mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		   (match_operand:SI 2 "general_operand" "g"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*add<mode>_3"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0")))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_match_ccmode (insn, CCZmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_3_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (neg:SI (match_operand:SI 2 "general_operand" "g"))
+	  (match_operand:SI 1 "nonimmediate_operand" "%0")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
+   && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{l}\t%k0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], SImode))
+        return "sub{l}\t{%2, %k0|%k0, %2}";
+
+      return "add{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "SI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+
+(define_insn "*adddi_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:DI 1 "nonimmediate_operand" "0")
+	  (match_operand:DI 2 "x86_64_immediate_operand" "e")))
+   (clobber (match_scratch:DI 0 "=rm"))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCGCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{q}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{q}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], DImode))
+	return "add{q}\t{%2, %0|%0, %2}";
+
+      return "sub{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:DI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "DI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2.  This pattern
+; is matched then.  We can't accept general immediate, because for
+; case of overflows,  the result is messed up.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+
+(define_insn "*add<mode>_4"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (match_operand:SWI124 1 "nonimmediate_operand" "0")
+	  (match_operand:SWI124 2 "const_int_operand" "n")))
+   (clobber (match_scratch:SWI124 0 "=<r>m"))]
+  "ix86_match_ccmode (insn, CCGCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == constm1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+	  gcc_assert (operands[2] == const1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:<MODE> 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>_5"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+        return "inc{<imodesuffix>}\t%0";
+      else
+        {
+          gcc_assert (operands[2] == constm1_rtx);
+          return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+        return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand" ""))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*addqi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand:QI 2 "nonmemory_operand" "Qn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%h0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{b}\t%h0";
+        }
+
+    default:
+      return "add{b}\t{%2, %h0|%h0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "addqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand:QI 2 "general_operand" "Qmn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{b}\t%h0";
+      else
+        {
+	  gcc_assert (operands[2] == constm1_rtx);
+          return "dec{b}\t%h0";
+	}
+
+    default:
+      return "add{b}\t{%2, %h0|%h0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:QI 2 "incdec_operand" "")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(plus:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "%0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extract:SI
+	    (match_operand 2 "ext_register_operand" "Q")
+	    (const_int 8)
+	    (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "add{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI")])
+
+;; The lea patterns for non-Pmodes needs to be matched by
+;; several insns converted to real lea by splitters.
+
+(define_insn_and_split "*lea_general_1"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "register_operand" "r"))
+	      (match_operand 3 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[2])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]),
+  		      operands[3]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (plus:SI
+		     (match_operand:SI 1 "index_register_operand" "l")
+		     (match_operand:SI 2 "register_operand" "r"))
+		   (match_operand:SI 3 "immediate_operand" "i"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1)
+						     (match_dup 2))
+					    (match_dup 3)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_lowpart (Pmode, operands[2]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (mult (match_operand 1 "index_register_operand" "l")
+		    (match_operand 2 "const248_operand" "i"))
+	      (match_operand 3 "nonmemory_operand" "ri")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+       || GET_MODE (operands[3]) == VOIDmode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]),
+  		      operands[3]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (mult:SI
+		     (match_operand:SI 1 "index_register_operand" "l")
+		     (match_operand:SI 2 "const248_operand" "n"))
+		   (match_operand:SI 3 "nonmemory_operand" "ri"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1)
+						     (match_dup 2))
+					    (match_dup 3)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3"
+  [(set (match_operand 0 "register_operand" "=r")
+	(plus (plus (mult (match_operand 1 "index_register_operand" "l")
+			  (match_operand 2 "const248_operand" "i"))
+		    (match_operand 3 "register_operand" "r"))
+	      (match_operand 4 "immediate_operand" "i")))]
+  "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+    || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE (operands[0]) == GET_MODE (operands[3])"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx pat;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  operands[4] = gen_lowpart (Pmode, operands[4]);
+  pat = gen_rtx_PLUS (Pmode,
+  		      gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1],
+		      					 operands[2]),
+				    operands[3]),
+  		      operands[4]);
+  if (Pmode != SImode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (plus:SI
+		     (mult:SI
+		       (match_operand:SI 1 "index_register_operand" "l")
+		       (match_operand:SI 2 "const248_operand" "n"))
+		     (match_operand:SI 3 "register_operand" "r"))
+		   (match_operand:SI 4 "immediate_operand" "i"))))]
+  "TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1)
+							      (match_dup 2))
+						     (match_dup 3))
+					    (match_dup 4)) 0)))]
+{
+  operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[3] = gen_lowpart (Pmode, operands[3]);
+  operands[4] = gen_lowpart (Pmode, operands[4]);
+}
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")])
+
+;; Subtract instructions
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand" "")
+	(minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "")
+		     (match_operand:SDWIM 2 "<general_operand>" "")))]
+  ""
+  "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*sub<dwi>3_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o")
+	(minus:<DWI>
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0")
+	  (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0)
+		   (minus:DWIH (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+		   (minus:DWIH
+		     (match_dup 4)
+		     (plus:DWIH
+		       (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 5))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "*sub<mode>_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(minus:QI (match_dup 0)
+		  (match_operand:QI 1 "general_operand" "qn,qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "sub{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*sub<mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "general_operand" "g"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_dup 1)
+		    (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*sub<mode>_3"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+		 (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi_3_zext"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "general_operand" "g")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_dup 1)
+		    (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+   && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sub{l}\t{%2, %1|%1, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; Add with carry and subtract with borrow
+
+(define_expand "<plusminus_insn><mode>3_carry"
+  [(parallel
+    [(set (match_operand:SWI 0 "nonimmediate_operand" "")
+	  (plusminus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "")
+	    (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator"
+		       [(match_operand 3 "flags_reg_operand" "")
+			(const_int 0)])
+		      (match_operand:SWI 2 "<general_operand>" ""))))
+     (clobber (reg:CC FLAGS_REG))])]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)")
+
+(define_insn "*<plusminus_insn><mode>3_carry"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plusminus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+	  (plus:SWI
+	    (match_operator 3 "ix86_carry_flag_operator"
+	     [(reg FLAGS_REG) (const_int 0)])
+	    (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*addsi3_carry_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		   (plus:SI (match_operator 3 "ix86_carry_flag_operator"
+			     [(reg FLAGS_REG) (const_int 0)])
+			    (match_operand:SI 2 "general_operand" "g")))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "adc{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi3_carry_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI (match_operand:SI 1 "register_operand" "0")
+		    (plus:SI (match_operator 3 "ix86_carry_flag_operator"
+			      [(reg FLAGS_REG) (const_int 0)])
+			     (match_operand:SI 2 "general_operand" "g")))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sbb{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
+;; Overflow setting add and subtract instructions
+
+(define_insn "*add<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plus:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "%0")
+	    (match_operand:SWI 2 "<general_operand>" "<g>"))
+	  (match_dup 1)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sub<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (minus:SWI
+	    (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+	    (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+	  (match_dup 0)))]
+  ""
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<plusminus_insn><mode>3_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	    (plusminus:SWI
+		(match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+	    (match_dup 1)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(plusminus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (plusminus:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
+	    (match_operand:SI 2 "general_operand" "g"))
+	  (match_dup 1)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "<plusminus_insn>xf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(plusminus:XF
+	  (match_operand:XF 1 "register_operand" "")
+	  (match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387")
+
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(plusminus:MODEF
+	  (match_operand:MODEF 1 "register_operand" "")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
+    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
+
+;; Multiply instructions
+
+(define_expand "mul<mode>3"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
+		   (mult:SWIM248
+		     (match_operand:SWIM248 1 "register_operand" "")
+		     (match_operand:SWIM248 2 "<general_operand>" "")))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_expand "mulqi3"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (mult:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH")
+
+;; On AMDFAM10
+;; IMUL reg32/64, reg32/64, imm8 	Direct
+;; IMUL reg32/64, mem32/64, imm8 	VectorPath
+;; IMUL reg32/64, reg32/64, imm32 	Direct
+;; IMUL reg32/64, mem32/64, imm32 	VectorPath
+;; IMUL reg32/64, reg32/64 		Direct
+;; IMUL reg32/64, mem32/64 		Direct
+;;
+;; On BDVER1, all above IMULs use DirectPath
+
+(define_insn "*mul<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r,r")
+	(mult:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%rm,rm,0")
+	  (match_operand:SWI48 2 "<general_operand>" "K,<i>,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mulsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(zero_extend:DI
+	  (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+		   (match_operand:SI 2 "general_operand" "K,i,mr"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+   imul{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")
+	       (and (eq_attr "alternative" "2")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 	VectorPath
+;; IMUL reg16, mem16, imm8 	VectorPath
+;; IMUL reg16, reg16, imm16 	VectorPath
+;; IMUL reg16, mem16, imm16 	VectorPath
+;; IMUL reg16, reg16 		Direct
+;; IMUL reg16, mem16 		Direct
+;;
+;; On BDVER1, all HI MULs use DoublePath
+
+(define_insn "*mulhi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
+		 (match_operand:HI 2 "general_operand" "K,n,mr")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_HIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{w}\t{%2, %1, %0|%0, %1, %2}
+   imul{w}\t{%2, %1, %0|%0, %1, %2}
+   imul{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "prefix_0f" "0,0,1")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1,2")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(eq_attr "alternative" "0,1")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "double")
+   (set_attr "mode" "HI")])
+
+;;On AMDFAM10 and BDVER1
+;; MUL reg8 	Direct
+;; MUL mem8 	Direct
+
+(define_insn "*mulqi3_1"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+		 (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "QI")])
+
+(define_expand "<u>mul<mode><dwi>3"
+  [(parallel [(set (match_operand:<DWI> 0 "register_operand" "")
+		   (mult:<DWI>
+		     (any_extend:<DWI>
+		       (match_operand:DWIH 1 "nonimmediate_operand" ""))
+		     (any_extend:<DWI>
+		       (match_operand:DWIH 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_expand "<u>mulqihi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (mult:HI
+		     (any_extend:HI
+		       (match_operand:QI 1 "nonimmediate_operand" ""))
+		     (any_extend:HI
+		       (match_operand:QI 2 "register_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH")
+
+(define_insn "*<u>mul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A")
+	(mult:<DWI>
+	  (any_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
+	  (any_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{<imodesuffix>}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<u>mulqihi3_1"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI
+	  (any_extend:HI
+	    (match_operand:QI 1 "nonimmediate_operand" "%0"))
+	  (any_extend:HI
+	    (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{b}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "QI")])
+
+(define_expand "<s>mul<mode>3_highpart"
+  [(parallel [(set (match_operand:SWI48 0 "register_operand" "")
+		   (truncate:SWI48
+		     (lshiftrt:<DWI>
+		       (mult:<DWI>
+			 (any_extend:<DWI>
+			   (match_operand:SWI48 1 "nonimmediate_operand" ""))
+			 (any_extend:<DWI>
+			   (match_operand:SWI48 2 "register_operand" "")))
+		       (match_dup 4))))
+	      (clobber (match_scratch:SWI48 3 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
+
+(define_insn "*<s>muldi3_highpart_1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI
+	      (any_extend:TI
+		(match_operand:DI 1 "nonimmediate_operand" "%a"))
+	      (any_extend:TI
+		(match_operand:DI 2 "nonimmediate_operand" "rm")))
+	    (const_int 64))))
+   (clobber (match_scratch:DI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{q}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "DI")])
+
+(define_insn "*<s>mulsi3_highpart_1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI
+	      (any_extend:DI
+		(match_operand:SI 1 "nonimmediate_operand" "%a"))
+	      (any_extend:DI
+		(match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<s>mulsi3_highpart_zext"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(zero_extend:DI (truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (any_extend:DI
+		       (match_operand:SI 1 "nonimmediate_operand" "%a"))
+		     (any_extend:DI
+		       (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	    (const_int 32)))))
+   (clobber (match_scratch:SI 3 "=1"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<sgnprefix>mul{l}\t%2"
+  [(set_attr "type" "imul")
+   (set_attr "length_immediate" "0")
+   (set (attr "athlon_decode")
+     (if_then_else (eq_attr "cpu" "athlon")
+        (const_string "vector")
+        (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "mulxf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(mult:XF (match_operand:XF 1 "register_operand" "")
+		 (match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(mult:MODEF (match_operand:MODEF 1 "register_operand" "")
+		    (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))
+    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)")
+
+;; Divide instructions
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "divxf3"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(div:XF (match_operand:XF 1 "register_operand" "")
+		(match_operand:XF 2 "register_operand" "")))]
+  "TARGET_80387")
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "")
+ 	(div:DF (match_operand:DF 1 "register_operand" "")
+ 		(match_operand:DF 2 "nonimmediate_operand" "")))]
+   "(TARGET_80387 && X87_ENABLE_ARITH (DFmode))
+    || (TARGET_SSE2 && TARGET_SSE_MATH)")
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(div:SF (match_operand:SF 1 "register_operand" "")
+		(match_operand:SF 2 "nonimmediate_operand" "")))]
+  "(TARGET_80387 && X87_ENABLE_ARITH (SFmode))
+    || TARGET_SSE_MATH"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1],
+			 operands[2], SFmode);
+      DONE;
+    }
+})
+
+;; Divmod instructions.
+
+(define_expand "divmod<mode>4"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
+		   (div:SWIM248
+		     (match_operand:SWIM248 1 "register_operand" "")
+		     (match_operand:SWIM248 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SWIM248 3 "register_operand" "")
+		   (mod:SWIM248 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand" "")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "")))
+   (set (match_operand:SWI48 1 "register_operand" "")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+
+(define_insn_and_split "divmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+		   (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (mod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*divmod<mode>4"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+	(mod:SWIM248 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+		   (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SWIM248 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (mod:SWIM248 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+  if (<MODE>mode != HImode
+      && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*divmod<mode>4_noext"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		    (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(mod:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "idiv{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx tmp0, tmp1;
+  
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  emit_insn (gen_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8));
+  insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_MOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_DIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+;; Change div/mod to HImode and extend the second argument to HImode
+;; so that mode of div/mod matches with mode of arguments.  Otherwise
+;; combine may fail.
+(define_insn "divmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(sign_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (sign_extend:HI (match_dup 2)))))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_expand "udivmod<mode>4"
+  [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
+		   (udiv:SWIM248
+		     (match_operand:SWIM248 1 "register_operand" "")
+		     (match_operand:SWIM248 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SWIM248 3 "register_operand" "")
+		   (umod:SWIM248 (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "")))
+   (set (match_operand:SWI48 1 "register_operand" "")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
+
+(define_insn_and_split "udivmod<mode>4_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=a")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 1 "register_operand" "=&d")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT)
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (umod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*udivmod<mode>4"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=&d")
+	(umod:SWIM248 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SWIM248 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (umod:SWIM248 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*udivmod<mode>4_noext"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
+		      (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+	(umod:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "div{<imodesuffix>}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "udivmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (udiv:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (umod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, mod, insn;
+  rtx tmp0, tmp1;
+  
+  tmp0 = gen_reg_rtx (HImode);
+  tmp1 = gen_reg_rtx (HImode);
+
+  /* Extend operands[1] to HImode.  Generate 8bit divide.  Result is
+     in AX.  */
+  emit_insn (gen_zero_extendqihi2 (tmp1, operands[1]));
+  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2]));
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8));
+  tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0);
+  insn = emit_move_insn (operands[3], tmp1);
+
+  mod = gen_rtx_UMOD (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Extract quotient from AL.  */
+  insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0));
+
+  div = gen_rtx_UDIV (QImode, operands[1], operands[2]);
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  DONE;
+})
+
+(define_insn "udivmodhiqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(ior:HI
+	  (ashift:HI
+	    (zero_extend:HI
+	      (truncate:QI
+		(mod:HI (match_operand:HI 1 "register_operand" "0")
+			(zero_extend:HI
+			  (match_operand:QI 2 "nonimmediate_operand" "qm")))))
+	    (const_int 8))
+	  (zero_extend:HI
+	    (truncate:QI
+	      (div:HI (match_dup 1) (zero_extend:HI (match_dup 2)))))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+;; We cannot use div/idiv for double division, because it causes
+;; "division by zero" on the overflow and that's not what we expect
+;; from truncate.  Because true (non truncating) double division is
+;; never generated, we can't create this insn anyway.
+;
+;(define_insn ""
+;  [(set (match_operand:SI 0 "register_operand" "=a")
+;	(truncate:SI
+;	  (udiv:DI (match_operand:DI 1 "register_operand" "A")
+;		   (zero_extend:DI
+;		     (match_operand:SI 2 "nonimmediate_operand" "rm")))))
+;   (set (match_operand:SI 3 "register_operand" "=d")
+;	(truncate:SI
+;	  (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
+;   (clobber (reg:CC FLAGS_REG))]
+;  ""
+;  "div{l}\t{%2, %0|%0, %2}"
+;  [(set_attr "type" "idiv")])
+
+;;- Logical AND instructions
+
+;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
+;; Note that this excludes ah.
+
+(define_expand "testsi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:SI (match_operand:SI 0 "nonimmediate_operand" "")
+		  (match_operand:SI 1 "nonmemory_operand" ""))
+	  (const_int 0)))])
+
+(define_expand "testqi_ccz_1"
+  [(set (reg:CCZ FLAGS_REG)
+        (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "")
+			     (match_operand:QI 1 "nonmemory_operand" ""))
+		 (const_int 0)))])
+
+(define_expand "testdi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:DI (match_operand:DI 0 "nonimmediate_operand" "")
+		  (match_operand:DI 1 "x86_64_szext_general_operand" ""))
+	  (const_int 0)))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
+
+(define_insn "*testdi_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	 (and:DI
+	  (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm")
+	  (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re"))
+	 (const_int 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   test{l}\t{%k1, %k0|%k0, %k1}
+   test{l}\t{%k1, %k0|%k0, %k1}
+   test{q}\t{%1, %0|%0, %1}
+   test{q}\t{%1, %0|%0, %1}
+   test{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,0,1,1")
+   (set_attr "mode" "SI,SI,DI,DI,DI")])
+
+(define_insn "*testqi_1_maybe_si"
+  [(set (reg FLAGS_REG)
+        (compare
+	  (and:QI
+	    (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
+	    (match_operand:QI 1 "general_operand" "n,n,qn,n"))
+	  (const_int 0)))]
+   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+    && ix86_match_ccmode (insn,
+ 			 CONST_INT_P (operands[1])
+ 			 && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
+{
+  if (which_alternative == 3)
+    {
+      if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
+	operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+      return "test{l}\t{%1, %k0|%k0, %1}";
+    }
+  return "test{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1,1")
+   (set_attr "mode" "QI,QI,QI,SI")
+   (set_attr "pent_pair" "uv,np,uv,np")])
+
+(define_insn "*test<mode>_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	 (and:SWI124
+	  (match_operand:SWI124 0 "nonimmediate_operand" "%!*a,<r>,<r>m")
+	  (match_operand:SWI124 1 "general_operand" "<i>,<i>,<r><i>"))
+	 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "test{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "mode" "<MODE>")
+   (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ext_ccno_0"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 1 "const_int_operand" ""))
+	  (const_int 0)))])
+
+(define_insn "*testqi_ext_0"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 1 "const_int_operand" "n"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "pent_pair" "np")])
+
+(define_insn "*testqi_ext_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extend:SI
+	      (match_operand:QI 1 "register_operand" "Q")))
+	  (const_int 0)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extend:SI
+	      (match_operand:QI 1 "general_operand" "Qm")))
+	  (const_int 0)))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%1, %h0|%h0, %1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 0 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8))
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "Q")
+	      (const_int 8)
+	      (const_int 8)))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "test{b}\t{%h1, %h0|%h0, %h1}"
+  [(set_attr "type" "test")
+   (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_3_rex64"
+  [(set (reg FLAGS_REG)
+        (compare (zero_extract:DI
+		   (match_operand 0 "nonimmediate_operand" "rm")
+		   (match_operand:DI 1 "const_int_operand" "")
+		   (match_operand:DI 2 "const_int_operand" ""))
+		 (const_int 0)))]
+  "TARGET_64BIT
+   && ix86_match_ccmode (insn, CCNOmode)
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[2]) >= 0
+   /* Ensure that resulting mask is zero or sign extended operand.  */
+   && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+       || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64
+	   && INTVAL (operands[1]) > 32))
+   && (GET_MODE (operands[0]) == SImode
+       || GET_MODE (operands[0]) == DImode
+       || GET_MODE (operands[0]) == HImode
+       || GET_MODE (operands[0]) == QImode)"
+  "#")
+
+;; Combine likes to form bit extractions for some tests.  Humor it.
+(define_insn "*testqi_ext_3"
+  [(set (reg FLAGS_REG)
+        (compare (zero_extract:SI
+		   (match_operand 0 "nonimmediate_operand" "rm")
+		   (match_operand:SI 1 "const_int_operand" "")
+		   (match_operand:SI 2 "const_int_operand" ""))
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[2]) >= 0
+   && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+   && (GET_MODE (operands[0]) == SImode
+       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)
+       || GET_MODE (operands[0]) == HImode
+       || GET_MODE (operands[0]) == QImode)"
+  "#")
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+        (match_operator 1 "compare_operator"
+	  [(zero_extract
+	     (match_operand 2 "nonimmediate_operand" "")
+	     (match_operand 3 "const_int_operand" "")
+	     (match_operand 4 "const_int_operand" ""))
+	   (const_int 0)]))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+{
+  rtx val = operands[2];
+  HOST_WIDE_INT len = INTVAL (operands[3]);
+  HOST_WIDE_INT pos = INTVAL (operands[4]);
+  HOST_WIDE_INT mask;
+  enum machine_mode mode, submode;
+
+  mode = GET_MODE (val);
+  if (MEM_P (val))
+    {
+      /* ??? Combine likes to put non-volatile mem extractions in QImode
+	 no matter the size of the test.  So find a mode that works.  */
+      if (! MEM_VOLATILE_P (val))
+	{
+	  mode = smallest_mode_for_size (pos + len, MODE_INT);
+	  val = adjust_address (val, mode, 0);
+	}
+    }
+  else if (GET_CODE (val) == SUBREG
+	   && (submode = GET_MODE (SUBREG_REG (val)),
+	       GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+	   && pos + len <= GET_MODE_BITSIZE (submode)
+	   && GET_MODE_CLASS (submode) == MODE_INT)
+    {
+      /* Narrow a paradoxical subreg to prevent partial register stalls.  */
+      mode = submode;
+      val = SUBREG_REG (val);
+    }
+  else if (mode == HImode && pos + len <= 8)
+    {
+      /* Small HImode tests can be converted to QImode.  */
+      mode = QImode;
+      val = gen_lowpart (QImode, val);
+    }
+
+  if (len == HOST_BITS_PER_WIDE_INT)
+    mask = -1;
+  else
+    mask = ((HOST_WIDE_INT)1 << len) - 1;
+  mask <<= pos;
+
+  operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode));
+})
+
+;; Convert HImode/SImode test instructions with immediate to QImode ones.
+;; i386 does not allow to encode test with 8bit sign extended immediate, so
+;; this is relatively important trick.
+;; Do the conversion only post-reload to avoid limiting of the register class
+;; to QI regs.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand 2 "register_operand" "")
+	        (match_operand 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+   "reload_completed
+    && QI_REG_P (operands[2])
+    && GET_MODE (operands[2]) != QImode
+    && ((ix86_match_ccmode (insn, CCZmode)
+    	 && !(INTVAL (operands[3]) & ~(255 << 8)))
+	|| (ix86_match_ccmode (insn, CCNOmode)
+	    && !(INTVAL (operands[3]) & ~(127 << 8))))"
+  [(set (match_dup 0)
+	(match_op_dup 1
+	  [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8))
+		   (match_dup 3))
+	   (const_int 0)]))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);
+   operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);")
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand 2 "nonimmediate_operand" "")
+	        (match_operand 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+   "reload_completed
+    && GET_MODE (operands[2]) != QImode
+    && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
+    && ((ix86_match_ccmode (insn, CCZmode)
+	 && !(INTVAL (operands[3]) & ~255))
+	|| (ix86_match_ccmode (insn, CCNOmode)
+	    && !(INTVAL (operands[3]) & ~127)))"
+  [(set (match_dup 0)
+	(match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+			 (const_int 0)]))]
+  "operands[2] = gen_lowpart (QImode, operands[2]);
+   operands[3] = gen_lowpart (QImode, operands[3]);")
+
+;; %%% This used to optimize known byte-wide and operations to memory,
+;; and sometimes to QImode registers.  If this is considered useful,
+;; it should be done with splitters.
+
+(define_expand "and<mode>3"
+  [(set (match_operand:SWIM 0 "nonimmediate_operand" "")
+	(and:SWIM (match_operand:SWIM 1 "nonimmediate_operand" "")
+		  (match_operand:SWIM 2 "<general_szext_operand>" "")))]
+  ""
+  "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
+
+(define_insn "*anddi_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+	(and:DI
+	 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
+	 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      {
+	enum machine_mode mode;
+
+	gcc_assert (CONST_INT_P (operands[2]));
+        if (INTVAL (operands[2]) == 0xff)
+	  mode = QImode;
+	else
+	  {
+	    gcc_assert (INTVAL (operands[2]) == 0xffff);
+	    mode = HImode;
+	  }
+
+	operands[1] = gen_lowpart (mode, operands[1]);
+	if (mode == QImode)
+	  return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+	else
+	  return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+      }
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (get_attr_mode (insn) == MODE_SI)
+	return "and{l}\t{%k2, %k0|%k0, %k2}";
+      else
+	return "and{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,alu,imovx")
+   (set_attr "length_immediate" "*,*,*,0")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "type" "imovx")
+	    (and (ne (symbol_ref "INTVAL (operands[2]) == 0xff") (const_int 0))
+		 (match_operand 1 "ext_QIreg_nomode_operand" "")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "SI,DI,DI,SI")])
+
+(define_insn "*andsi_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
+		(match_operand:SI 2 "general_operand" "ri,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      {
+	enum machine_mode mode;
+
+	gcc_assert (CONST_INT_P (operands[2]));
+        if (INTVAL (operands[2]) == 0xff)
+	  mode = QImode;
+	else
+	  {
+	    gcc_assert (INTVAL (operands[2]) == 0xffff);
+	    mode = HImode;
+	  }
+
+	operands[1] = gen_lowpart (mode, operands[1]);
+	if (mode == QImode)
+	  return "movz{bl|x}\t{%1, %0|%0, %1}";
+	else
+	  return "movz{wl|x}\t{%1, %0|%0, %1}";
+      }
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "and{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,imovx")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "type" "imovx")
+	    (and (ne (symbol_ref "INTVAL (operands[2]) == 0xff") (const_int 0))
+		 (match_operand 1 "ext_QIreg_nomode_operand" "")))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "length_immediate" "*,*,0")
+   (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andhi_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+	(and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
+		(match_operand:HI 2 "general_operand" "rn,rm,L")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      gcc_assert (CONST_INT_P (operands[2]));
+      gcc_assert (INTVAL (operands[2]) == 0xff);
+      return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+
+    default:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+      return "and{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set_attr "type" "alu,alu,imovx")
+   (set_attr "length_immediate" "*,*,0")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (eq_attr "type" "imovx")
+	    (match_operand 1 "ext_QIreg_nomode_operand" ""))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "HI,HI,SI")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*andqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		(match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (AND, QImode, operands)"
+  "@
+   and{b}\t{%2, %0|%0, %2}
+   and{b}\t{%2, %0|%0, %2}
+   and{l}\t{%k2, %k0|%k0, %k2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "qn,qmn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "and{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(and (match_dup 0)
+	     (const_int -65536)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)
+    || optimize_function_for_size_p (cfun)"
+  [(set (strict_low_part (match_dup 1)) (const_int 0))]
+  "operands[1] = gen_lowpart (HImode, operands[0]);")
+
+(define_split
+  [(set (match_operand 0 "ext_register_operand" "")
+	(and (match_dup 0)
+	     (const_int -256)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  [(set (strict_low_part (match_dup 1)) (const_int 0))]
+  "operands[1] = gen_lowpart (QImode, operands[0]);")
+
+(define_split
+  [(set (match_operand 0 "ext_register_operand" "")
+	(and (match_dup 0)
+	     (const_int -65281)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  [(parallel [(set (zero_extract:SI (match_dup 0)
+				    (const_int 8)
+				    (const_int 8))
+		   (xor:SI
+		     (zero_extract:SI (match_dup 0)
+				      (const_int 8)
+				      (const_int 8))
+		     (zero_extract:SI (match_dup 0)
+				      (const_int 8)
+				      (const_int 8))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);")
+
+(define_insn "*anddi_2"
+  [(set (reg FLAGS_REG)
+	(compare
+	 (and:DI
+	  (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
+	  (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
+	(and:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, DImode, operands)"
+  "@
+   and{l}\t{%k2, %k0|%k0, %k2}
+   and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI,DI,DI")])
+
+(define_insn "*andqi_2_maybe_si"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		  (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		  (match_operand:QI 2 "general_operand" "qmn,qn,n"))
+		 (const_int 0)))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+	(and:QI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (AND, QImode, operands)
+   && ix86_match_ccmode (insn,
+			 CONST_INT_P (operands[2])
+			 && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
+{
+  if (which_alternative == 2)
+    {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+        operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
+      return "and{l}\t{%2, %k0|%k0, %2}";
+    }
+  return "and{b}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*and<mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare (and:SWI124
+		  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
+		  (match_operand:SWI124 2 "general_operand" "<g>,<r><i>"))
+		 (const_int 0)))
+   (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m")
+	(and:SWI124 (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
+  "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (and:SI
+		  (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, SImode, operands)"
+  "and{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andqi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (and:QI
+		   (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+		   (match_operand:QI 1 "nonimmediate_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(and:QI (match_dup 0) (match_dup 1)))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "and{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+;; ??? A bug in recog prevents it from recognizing a const_int as an
+;; operand to zero_extend in andqi_ext_1.  It was checking explicitly
+;; for a QImode operand, which of course failed.
+(define_insn "andqi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+;; Generated by peephole translating test to and.  This shows up
+;; often in fp comparisons.
+(define_insn "*andqi_ext_0_cc"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand 2 "const_int_operand" "n"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 8)
+	    (const_int 8))
+	  (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand 2 "ext_register_operand" "Q"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "general_operand" "Qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "and{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(and:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "%0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extract:SI
+	    (match_operand 2 "ext_register_operand" "Q")
+	    (const_int 8)
+	    (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "and{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+;; Convert wide AND instructions with immediate operand to shorter QImode
+;; equivalents when possible.
+;; Don't do the splitting with memory operands, since it introduces risk
+;; of memory mismatch stalls.  We may want to do the splitting for optimizing
+;; for size, but that can (should?) be handled by generic code instead.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(and (match_operand 1 "register_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(~INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (and:SI (zero_extract:SI (match_dup 1)
+					    (const_int 8) (const_int 8))
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since AND can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is not set.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(and (match_operand 1 "general_operand" "")
+	     (match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(~INTVAL (operands[2]) & ~255)
+    && !(INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (and:QI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (QImode, operands[0]);
+   operands[1] = gen_lowpart (QImode, operands[1]);
+   operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical inclusive and exclusive OR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:SWIM 0 "nonimmediate_operand" "")
+	(any_or:SWIM (match_operand:SWIM 1 "nonimmediate_operand" "")
+		     (match_operand:SWIM 2 "<general_operand>" "")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*<code><mode>_1"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=r,rm")
+	(any_or:SWI248
+	 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0")
+	 (match_operand:SWI248 2 "<general_operand>" "<g>,r<i>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*<code>qi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+	(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+		   (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, QImode, operands)"
+  "@
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{l}\t{%k2, %k0|%k0, %k2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "QI,QI,SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*<code>si_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	 (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+		    (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>si_1_zext_imm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_or:DI
+	 (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+	 (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>qi_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m"))
+	(any_or:QI (match_dup 0)
+		   (match_operand:QI 1 "general_operand" "qmn,qn")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "<logic>{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code><mode>_2"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SWI
+		  (match_operand:SWI 1 "nonimmediate_operand" "%0,0")
+		  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))
+		 (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
+	(any_or:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*<code>si_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+			    (match_operand:SI 2 "general_operand" "g"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>si_2_zext_imm"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SI
+		  (match_operand:SI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<code>qi_2_slp"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+			    (match_operand:QI 1 "general_operand" "qmn,qn"))
+		 (const_int 0)))
+   (set (strict_low_part (match_dup 0))
+	(any_or:QI (match_dup 0) (match_dup 1)))]
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "<logic>{b}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code><mode>_3"
+  [(set (reg FLAGS_REG)
+	(compare (any_or:SWI
+		  (match_operand:SWI 1 "nonimmediate_operand" "%0")
+		  (match_operand:SWI 2 "<general_operand>" "<g>"))
+		 (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>qi_ext_0"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (match_operand 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "1")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code>qi_ext_1_rex64"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand 2 "ext_register_operand" "Q"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code>qi_ext_1"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI
+	    (match_operand 1 "ext_register_operand" "0")
+	    (const_int 8)
+	    (const_int 8))
+	  (zero_extend:SI
+	    (match_operand:QI 2 "general_operand" "Qm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_insn "*<code>qi_ext_2"
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(any_or:SI
+	  (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+	  		   (const_int 8)
+			   (const_int 8))
+	  (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+	  		   (const_int 8)
+			   (const_int 8))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
+  [(set_attr "type" "alu")
+   (set_attr "length_immediate" "0")
+   (set_attr "mode" "QI")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(any_or (match_operand 1 "register_operand" "")
+		(match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~(255 << 8))
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+		   (any_or:SI (zero_extract:SI (match_dup 1)
+					       (const_int 8) (const_int 8))
+			      (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since OR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(any_or (match_operand 1 "general_operand" "")
+		(match_operand 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+   "reload_completed
+    && ANY_QI_REG_P (operands[0])
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+    && !(INTVAL (operands[2]) & ~255)
+    && (INTVAL (operands[2]) & 128)
+    && GET_MODE (operands[0]) != QImode"
+  [(parallel [(set (strict_low_part (match_dup 0))
+		   (any_or:QI (match_dup 1)
+			      (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (QImode, operands[0]);
+   operands[1] = gen_lowpart (QImode, operands[1]);
+   operands[2] = gen_lowpart (QImode, operands[2]);")
+
+(define_expand "xorqi_cc_ext_1"
+  [(parallel [
+     (set (reg:CCNO FLAGS_REG)
+	  (compare:CCNO
+	    (xor:SI
+	      (zero_extract:SI
+		(match_operand 1 "ext_register_operand" "")
+		(const_int 8)
+		(const_int 8))
+	      (match_operand:QI 2 "general_operand" ""))
+	    (const_int 0)))
+     (set (zero_extract:SI (match_operand 0 "ext_register_operand" "")
+			   (const_int 8)
+			   (const_int 8))
+	  (xor:SI
+	    (zero_extract:SI
+	     (match_dup 1)
+	     (const_int 8)
+	     (const_int 8))
+	    (match_dup 2)))])])
+
+(define_insn "*xorqi_cc_ext_1_rex64"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand:QI 2 "nonmemory_operand" "Qn"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI
+	   (match_dup 1)
+	   (const_int 8)
+	   (const_int 8))
+	  (match_dup 2)))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (xor:SI
+	    (zero_extract:SI
+	      (match_operand 1 "ext_register_operand" "0")
+	      (const_int 8)
+	      (const_int 8))
+	    (match_operand:QI 2 "general_operand" "qmn"))
+	  (const_int 0)))
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q")
+			 (const_int 8)
+			 (const_int 8))
+	(xor:SI
+	  (zero_extract:SI
+	   (match_dup 1)
+	   (const_int 8)
+	   (const_int 8))
+	  (match_dup 2)))]
+  "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+  "xor{b}\t{%2, %h0|%h0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "1")
+   (set_attr "mode" "QI")])
+
+;; Negation instructions
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SDWIM 0 "nonimmediate_operand" "")
+	(neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand" "")))]
+  ""
+  "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*neg<dwi>2_doubleword"
+  [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro")
+	(neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, <DWI>mode, operands)"
+  "#"
+  "reload_completed"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+	  (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0)))
+     (set (match_dup 0) (neg:DWIH (match_dup 1)))])
+   (parallel
+    [(set (match_dup 2)
+	  (plus:DWIH (match_dup 3)
+		     (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+				(const_int 0))))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_dup 2)
+	  (neg:DWIH (match_dup 2)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);")
+
+(define_insn "*neg<mode>2_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
+  "neg{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+;; Combine is quite creative about this pattern.
+(define_insn "*negsi2_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	  (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0")
+			     (const_int 32)))
+	(const_int 32)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*neg<mode>2_cmpz"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		   (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(neg:SWI (match_dup 1)))]
+  "ix86_unary_operator_ok (NEG, <MODE>mode, operands)"
+  "neg{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*negsi2_cmpz_zext"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (lshiftrt:DI
+	    (neg:DI (ashift:DI
+		      (match_operand:DI 1 "register_operand" "0")
+		      (const_int 32)))
+	    (const_int 32))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
+					(const_int 32)))
+		     (const_int 32)))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+  "neg{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+;; Changing of sign for FP values is doable using integer unit too.
+
+(define_expand "<code><mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*absneg<mode>2_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
+	(match_operator:MODEF 3 "absneg_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
+   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (<MODE>mode)"
+  "#")
+
+(define_insn "*absneg<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r")
+	(match_operator:MODEF 3 "absneg_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0 ,x,0")]))
+   (use (match_operand:<ssevecmode> 2 "register_operand" "xm,0,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "#")
+
+(define_insn "*absneg<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
+	(match_operator:X87MODEF 3 "absneg_operator"
+	  [(match_operand:X87MODEF 1 "register_operand" "0,0")]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "#")
+
+(define_expand "<code>tf2"
+  [(set (match_operand:TF 0 "register_operand" "")
+	(absneg:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
+
+(define_insn "*absnegtf2_sse"
+  [(set (match_operand:TF 0 "register_operand" "=x,x")
+	(match_operator:TF 3 "absneg_operator"
+	  [(match_operand:TF 1 "register_operand" "0,x")]))
+   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_SSE2"
+  "#")
+
+;; Splitters for fp abs and neg.
+
+(define_split
+  [(set (match_operand 0 "fp_register_operand" "")
+	(match_operator 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "absneg_operator"
+	  [(match_operand 1 "register_operand" "")]))
+   (use (match_operand 2 "nonimmediate_operand" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && SSE_REG_P (operands[0])"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum machine_mode vmode = GET_MODE (operands[2]);
+  rtx tmp;
+
+  operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0);
+  operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0);
+  if (operands_match_p (operands[0], operands[2]))
+    {
+      tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+  if (GET_CODE (operands[3]) == ABS)
+    tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
+  else
+    tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
+  operands[3] = tmp;
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand:V4SF 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  if (GET_CODE (operands[1]) == ABS)
+    {
+      tmp = gen_int_mode (0x7fffffff, SImode);
+      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+    }
+  else
+    {
+      tmp = gen_int_mode (0x80000000, SImode);
+      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+    }
+  operands[1] = tmp;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  if (TARGET_64BIT)
+    {
+      tmp = gen_lowpart (DImode, operands[0]);
+      tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
+      operands[0] = tmp;
+
+      if (GET_CODE (operands[1]) == ABS)
+	tmp = const0_rtx;
+      else
+	tmp = gen_rtx_NOT (DImode, tmp);
+    }
+  else
+    {
+      operands[0] = gen_highpart (SImode, operands[0]);
+      if (GET_CODE (operands[1]) == ABS)
+	{
+	  tmp = gen_int_mode (0x7fffffff, SImode);
+	  tmp = gen_rtx_AND (SImode, operands[0], tmp);
+	}
+      else
+	{
+	  tmp = gen_int_mode (0x80000000, SImode);
+	  tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+	}
+    }
+  operands[1] = tmp;
+})
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  rtx tmp;
+  operands[0] = gen_rtx_REG (SImode,
+			     true_regnum (operands[0])
+			     + (TARGET_64BIT ? 1 : 2));
+  if (GET_CODE (operands[1]) == ABS)
+    {
+      tmp = GEN_INT (0x7fff);
+      tmp = gen_rtx_AND (SImode, operands[0], tmp);
+    }
+  else
+    {
+      tmp = GEN_INT (0x8000);
+      tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+    }
+  operands[1] = tmp;
+})
+
+;; Conditionalize these after reload. If they match before reload, we
+;; lose the clobber and ability to use integer instructions.
+
+(define_insn "*<code><mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
+  "TARGET_80387
+   && (reload_completed
+       || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(absneg:DF (float_extend:DF
+		     (match_operand:SF 1 "register_operand" "0"))))]
+  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "DF")])
+
+(define_insn "*<code>extendsfxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(absneg:XF (float_extend:XF
+		     (match_operand:SF 1 "register_operand" "0"))))]
+  "TARGET_80387"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "XF")])
+
+(define_insn "*<code>extenddfxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(absneg:XF (float_extend:XF
+		     (match_operand:DF 1 "register_operand" "0"))))]
+  "TARGET_80387"
+  "f<absneg_mnemonic>"
+  [(set_attr "type" "fsgn")
+   (set_attr "mode" "XF")])
+
+;; Copysign instructions
+
+(define_mode_iterator CSGNMODE [SF DF TF])
+(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")])
+
+(define_expand "copysign<mode>3"
+  [(match_operand:CSGNMODE 0 "register_operand" "")
+   (match_operand:CSGNMODE 1 "nonmemory_operand" "")
+   (match_operand:CSGNMODE 2 "register_operand" "")]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+  "ix86_expand_copysign (operands); DONE;")
+
+(define_insn_and_split "copysign<mode>3_const"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
+	(unspec:CSGNMODE
+	  [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
+	   (match_operand:CSGNMODE 2 "register_operand" "0")
+	   (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
+	  UNSPEC_COPYSIGN))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_copysign_const (operands); DONE;")
+
+(define_insn "copysign<mode>3_var"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
+	(unspec:CSGNMODE
+	  [(match_operand:CSGNMODE 2 "register_operand"	"x,0,0,x,x")
+	   (match_operand:CSGNMODE 3 "register_operand"	"1,1,x,1,x")
+	   (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
+	   (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
+	  UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+  "#")
+
+(define_split
+  [(set (match_operand:CSGNMODE 0 "register_operand" "")
+	(unspec:CSGNMODE
+	  [(match_operand:CSGNMODE 2 "register_operand" "")
+	   (match_operand:CSGNMODE 3 "register_operand" "")
+	   (match_operand:<CSGNVMODE> 4 "" "")
+	   (match_operand:<CSGNVMODE> 5 "" "")]
+	  UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 ""))]
+  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    || (TARGET_SSE2 && (<MODE>mode == TFmode)))
+   && reload_completed"
+  [(const_int 0)]
+  "ix86_split_copysign_var (operands); DONE;")
+
+;; One complement instructions
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SWIM 0 "nonimmediate_operand" "")
+	(not:SWIM (match_operand:SWIM 1 "nonimmediate_operand" "")))]
+  ""
+  "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
+
+(define_insn "*one_cmpl<mode>2_1"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
+	(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
+  "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+  "not{<imodesuffix>}\t%0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "<MODE>")])
+
+;; %%% Potential partial reg stall on alternative 1.  What to do?
+(define_insn "*one_cmplqi2_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+  "ix86_unary_operator_ok (NOT, QImode, operands)"
+  "@
+   not{b}\t%0
+   not{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "QI,SI")])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (not:SI (match_operand:SI 1 "register_operand" "0"))))]
+  "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "not{l}\t%k0"
+  [(set_attr "type" "negnot")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmpl<mode>2_2"
+  [(set (reg FLAGS_REG)
+	(compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(not:SWI (match_dup 1)))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:SWI (match_operand:SWI 3 "nonimmediate_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:SWI 1 "nonimmediate_operand" "")
+	(not:SWI (match_dup 3)))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1))
+				    (const_int 0)]))
+	      (set (match_dup 1)
+		   (xor:SWI (match_dup 3) (const_int -1)))])])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_2_zext"
+  [(set (reg FLAGS_REG)
+	(compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (not:SI (match_dup 1))))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+   && ix86_unary_operator_ok (NOT, SImode, operands)"
+  "#"
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
+
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(not:SI (match_operand:SI 3 "register_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand:DI 1 "register_operand" "")
+	(zero_extend:DI (not:SI (match_dup 3))))]
+  "ix86_match_ccmode (insn, CCNOmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+				    (const_int 0)]))
+	      (set (match_dup 1)
+		   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])])
+
+;; Shift instructions
+
+;; DImode shifts are implemented using the i386 "shift double" opcode,
+;; which is written as "sh[lr]d[lw] imm,reg,reg/mem".  If the shift count
+;; is variable, then the count is in %cl and the "imm" operand is dropped
+;; from the assembler input.
+;;
+;; This instruction shifts the target reg/mem as usual, but instead of
+;; shifting in zeros, bits are shifted in from reg operand.  If the insn
+;; is a left shift double, bits are taken from the high order bits of
+;; reg, else if the insn is a shift right double, bits are taken from the
+;; low order bits of reg.  So if %eax is "1234" and %edx is "5678",
+;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
+;;
+;; Since sh[lr]d does not change the `reg' operand, that is done
+;; separately, making all shifts emit pairs of shift double and normal
+;; shift.  Since sh[lr]d does not shift more than 31 bits, and we wish to
+;; support a 63 bit shift, each shift where the count is in a reg expands
+;; to a pair of shifts, a branch, a shift by 32 and a label.
+;;
+;; If the shift count is a constant, we need never emit more than one
+;; shift pair, instead using moves and sign extension for counts greater
+;; than 31.
+
+(define_expand "ashl<mode>3"
+  [(set (match_operand:SDWIM 0 "<shift_operand>" "")
+	(ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>" "")
+		      (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;")
+
+(define_insn "*ashl<mode>3_doubleword"
+  [(set (match_operand:DWI 0 "register_operand" "=&r,r")
+	(ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "n,0")
+		    (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:DWI 0 "register_operand" "")
+	(ashift:DWI (match_operand:DWI 1 "nonmemory_operand" "")
+		    (match_operand:QI 2 "nonmemory_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;")
+
+;; By default we don't ask for a scratch register, because when DWImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+
+(define_peephole2
+  [(match_scratch:DWIH 3 "r")
+   (parallel [(set (match_operand:<DWI> 0 "register_operand" "")
+		   (ashift:<DWI>
+		     (match_operand:<DWI> 1 "nonmemory_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+
+(define_insn "x86_64_shld"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
+		(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		  (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shld"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_expand "x86_shift<mode>_adj_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+			     (match_dup 4))
+		     (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "")
+        (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			    (match_operand:SWI48 1 "register_operand" "")
+			    (match_dup 0)))
+   (set (match_dup 1)
+	(if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0))
+			    (match_operand:SWI48 3 "register_operand" "")
+			    (match_dup 1)))]
+  "TARGET_CMOVE"
+  "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));")
+
+(define_expand "x86_shift<mode>_adj_2"
+  [(use (match_operand:SWI48 0 "register_operand" ""))
+   (use (match_operand:SWI48 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2],
+			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  ix86_expand_clear (operands[1]);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+;; Avoid useless masking of count operand.
+(define_insn "*ashl<mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(ashift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+{
+  return "sal{<imodesuffix>}\t{%b2, %0|%0, %b2}";
+}
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%k0, %k0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		     (const_int 0))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlhi3_1"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+		   (match_operand:QI 2 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "HI")])
+
+(define_insn "*ashlhi3_1_lea"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{w}\t%0";
+      else
+	return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "1")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "HI,SI")])
+
+(define_insn "*ashlqi3_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+        return "add{l}\t%k0, %k0";
+      else
+        return "add{b}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t%k0";
+	  else
+	    return "sal{b}\t%0";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sal{b}\t{%2, %0|%0, %2}";
+	}
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI,SI")])
+
+;; %%% Potential partial reg stall on alternative 2.  What to do?
+(define_insn "*ashlqi3_1_lea"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
+		   (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_LEA:
+      return "#";
+
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+        return "add{l}\t%k0, %k0";
+      else
+        return "add{b}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t%k0";
+	  else
+	    return "sal{b}\t%0";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_SI)
+	    return "sal{l}\t{%2, %k0|%k0, %2}";
+	  else
+	    return "sal{b}\t{%2, %0|%0, %2}";
+	}
+    }
+}
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "2")
+	      (const_string "lea")
+            (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI,SI,SI")])
+
+(define_insn "*ashlqi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(ashift:QI (match_dup 0)
+		   (match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[1] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[1] == const1_rtx);
+      return "add{b}\t%0, %0";
+
+    default:
+      if (operands[1] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{b}\t%0";
+      else
+	return "sal{b}\t{%1, %0|%0, %1}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 1 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift1")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift1")
+		 (and (match_operand 1 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(ashift (match_operand 1 "index_register_operand" "")
+                (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  rtx pat;
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  if (mode != Pmode)
+    operands[1] = gen_lowpart (Pmode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+
+  pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
+
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+
+  if (TARGET_64BIT && mode != Pmode)
+    pat = gen_rtx_SUBREG (SImode, pat, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+  DONE;
+})
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "index_register_operand" "")
+		     (match_operand:QI 2 "const_int_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (mult:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);
+})
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashl<mode>3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(ashift:SWI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ashlsi3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t%k0, %k0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{l}\t%k0";
+      else
+	return "sal{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		     (const_int 0))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashl<mode>3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
+		      (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1
+	    || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{<imodesuffix>}\t%0, %0";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "sal{<imodesuffix>}\t%0";
+      else
+	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+		          (const_int 0))
+		      (match_operand 0 "register_operand" ""))
+		 (match_operand 2 "const1_operand" ""))
+	      (const_string "alu")
+	   ]
+	   (const_string "ishift")))
+   (set (attr "length_immediate")
+     (if_then_else
+       (ior (eq_attr "type" "alu")
+	    (and (eq_attr "type" "ishift")
+		 (and (match_operand 2 "const1_operand" "")
+		      (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+			  (const_int 0)))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; See comment above `ashl<mode>3' about how this works.
+
+(define_expand "<shiftrt_insn><mode>3"
+  [(set (match_operand:SDWIM 0 "<shift_operand>" "")
+	(any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>" "")
+			   (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Avoid useless masking of count operand.
+(define_insn "*<shiftrt_insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+{
+  return "<shiftrt>{<imodesuffix>}\t{%b2, %0|%0, %b2}";
+}
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*<shiftrt_insn><mode>3_doubleword"
+  [(set (match_operand:DWI 0 "register_operand" "=r")
+	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
+  [(const_int 0)]
+  "ix86_split_<shiftrt_insn> (operands, NULL_RTX, <MODE>mode); DONE;"
+  [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DWImode
+;; values are manipulated, registers are already at a premium.  But if
+;; we have one handy, we won't turn it away.
+
+(define_peephole2
+  [(match_scratch:DWIH 3 "r")
+   (parallel [(set (match_operand:<DWI> 0 "register_operand" "")
+		   (any_shiftrt:<DWI>
+		     (match_operand:<DWI> 1 "register_operand" "")
+		     (match_operand:QI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_CMOVE"
+  [(const_int 0)]
+  "ix86_split_<shiftrt_insn> (operands, operands[3], <DWI>mode); DONE;")
+
+(define_insn "x86_64_shrd"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashiftrt:DI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
+		(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		  (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "x86_shrd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashiftrt:SI (match_dup 0)
+		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
+		(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		  (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn "ashrdi3_cvt"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 63
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "@
+   {cqto|cqo}
+   sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "DI")])
+
+(define_insn "ashrsi3_cvt"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+	(ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+		     (match_operand:QI 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   {cltd|cdq}
+   sar{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cvt_zext"
+  [(set (match_operand:DI 0 "register_operand" "=*d,r")
+	(zero_extend:DI
+	  (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+		       (match_operand:QI 2 "const_int_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "@
+   {cltd|cdq}
+   sar{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "imovx,ishift")
+   (set_attr "prefix_0f" "0,*")
+   (set_attr "length_immediate" "0,*")
+   (set_attr "modrm" "0,1")
+   (set_attr "mode" "SI")])
+
+(define_expand "x86_shift<mode>_adj_3"
+  [(use (match_operand:SWI48 0 "register_operand" ""))
+   (use (match_operand:SWI48 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2],
+			       GEN_INT (GET_MODE_BITSIZE (<MODE>mode))));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+			      gen_rtx_LABEL_REF (VOIDmode, label),
+			      pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1],
+				  GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1)));
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{<imodesuffix>}\t%0";
+  else
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{l}\t%k0";
+  else
+    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>qi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(any_shiftrt:QI (match_dup 0)
+			(match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+	&& TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{b}\t%0";
+  else
+    return "<shiftrt>{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "ishift1")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags.  We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*<shiftrt_insn><mode>3_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (any_shiftrt:SWI
+	    (match_operand:SWI 1 "nonimmediate_operand" "0")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI (match_dup 1) (match_dup 2)))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{<imodesuffix>}\t%0";
+  else
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<shiftrt_insn>si3_cmp_zext"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:QI 2 "const_1_to_31_operand" "I"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT
+   && (optimize_function_for_size_p (cfun)
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{l}\t%k0";
+  else
+    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn><mode>3_cconly"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (any_shiftrt:SWI
+	    (match_operand:SWI 1 "register_operand" "0")
+	    (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+	  (const_int 0)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+	&& TARGET_SHIFT1))
+   && ix86_match_ccmode (insn, CCGOCmode)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<shiftrt>{<imodesuffix>}\t%0";
+  else
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "ishift")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+;; Rotate instructions
+
+(define_expand "<rotate_insn>ti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(any_rotate:TI (match_operand:TI 1 "register_operand" "")
+		       (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+{
+  if (const_1_to_63_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>ti3_doubleword
+		(operands[0], operands[1], operands[2]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_expand "<rotate_insn>di3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+	(any_rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+		       (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+{
+  if (TARGET_64BIT)
+    ix86_expand_binary_operator (<CODE>, DImode, operands);
+  else if (const_1_to_31_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>di3_doubleword
+		(operands[0], operands[1], operands[2]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_expand "<rotate_insn><mode>3"
+  [(set (match_operand:SWIM124 0 "nonimmediate_operand" "")
+	(any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand" "")
+			    (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Avoid useless masking of count operand.
+(define_insn "*<rotate_insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
+	  (subreg:QI
+	    (and:SI
+	      (match_operand:SI 2 "register_operand" "c")
+	      (match_operand:SI 3 "const_int_operand" "n")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+{
+  return "<rotate>{<imodesuffix>}\t{%b2, %0|%0, %b2}";
+}
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "<MODE>")])
+
+;; Implement rotation using two double-precision
+;; shift instructions and a scratch register.
+
+(define_insn_and_split "ix86_rotl<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+		     (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:DWIH 3 "=&r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+	 (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
+		   (lshiftrt:DWIH (match_dup 5)
+				  (minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+	 (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
+		   (lshiftrt:DWIH (match_dup 3)
+				  (minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
+})
+
+(define_insn_and_split "ix86_rotr<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+		       (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:DWIH 3 "=&r"))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+	 (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+		   (ashift:DWIH (match_dup 5)
+				(minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+	 (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+		   (ashift:DWIH (match_dup 3)
+				(minus:QI (match_dup 6) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
+})
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
+  else
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "rotate")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
+			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+    if (operands[2] == const1_rtx
+	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{l}\t%k0";
+  else
+    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+}
+  [(set_attr "type" "rotate")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>qi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(any_rotate:QI (match_dup 0)
+		       (match_operand:QI 1 "nonmemory_operand" "cI")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+	&& TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{b}\t%0";
+  else
+    return "<rotate>{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "rotate1")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+       (any_rotate:HI (match_dup 0) (const_int 8)))
+  (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+  && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
+ [(parallel [(set (strict_low_part (match_dup 0))
+		  (bswap:HI (match_dup 0)))
+	     (clobber (reg:CC FLAGS_REG))])])
+
+;; Bit set / bit test instructions
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const8_operand" "")
+			 (match_operand:SI 3 "const8_operand" "")))]
+  ""
+{
+  /* Handle extractions from %ah et al.  */
+  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[1], VOIDmode))
+    FAIL;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand 1 "ext_register_operand" "")
+			 (match_operand:SI 2 "const8_operand" "")
+			 (match_operand:SI 3 "const8_operand" "")))]
+  ""
+{
+  /* Handle extractions from %ah et al.  */
+  if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[1], VOIDmode))
+    FAIL;
+})
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "ext_register_operand" "")
+		      (match_operand 1 "const8_operand" "")
+		      (match_operand 2 "const8_operand" ""))
+        (match_operand 3 "register_operand" ""))]
+  ""
+{
+  rtx (*gen_mov_insv_1) (rtx, rtx);
+
+  /* Handle insertions to %ah et al.  */
+  if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (! ext_register_operand (operands[0], VOIDmode))
+    FAIL;
+
+  gen_mov_insv_1 = (TARGET_64BIT
+		    ? gen_movdi_insv_1 : gen_movsi_insv_1);
+
+  emit_insn (gen_mov_insv_1 (operands[0], operands[3]));
+  DONE;
+})
+
+;; %%% bts, btr, btc, bt.
+;; In general these instructions are *slow* when applied to memory,
+;; since they enforce atomic operation.  When applied to registers,
+;; it depends on the cpu implementation.  They're never faster than
+;; the corresponding and/ior/xor operations, so with 32-bit there's
+;; no point.  But in 64-bit, we can't hold the relevant immediates
+;; within the instruction itself, so operating on bits in the high
+;; 32-bits of a register becomes easier.
+;;
+;; These are slow on Nocona, but fast on Athlon64.  We do require the use
+;; of btrq and btcq for corner cases of post-reload expansion of absdf and
+;; negdf respectively, so they can never be disabled entirely.
+
+(define_insn "*btsq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand" ""))
+	(const_int 1))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "bts{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*btrq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand" ""))
+	(const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "btr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*btcq"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (const_int 1)
+			 (match_operand:DI 1 "const_0_to_63_operand" ""))
+	(not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+  "btc{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+;; Allow Nocona to avoid these instructions if a register is available.
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand" "")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand" ""))
+		   (const_int 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_iordi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand" "")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand" ""))
+		   (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (~lo, ~hi, DImode);
+  if (i >= 32)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_anddi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_peephole2
+  [(match_scratch:DI 2 "r")
+   (parallel [(set (zero_extract:DI
+		     (match_operand:DI 0 "register_operand" "")
+		     (const_int 1)
+		     (match_operand:DI 1 "const_0_to_63_operand" ""))
+	      (not:DI (zero_extract:DI
+			(match_dup 0) (const_int 1) (match_dup 1))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && !TARGET_USE_BT"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+  rtx op1;
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
+
+  emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+  DONE;
+})
+
+(define_insn "*bt<mode>"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_operand:SWI48 0 "register_operand" "r")
+	    (const_int 1)
+	    (match_operand:SWI48 1 "nonmemory_operand" "rN"))
+	  (const_int 0)))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "bt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "<MODE>")])
+
+;; Store-flag instructions.
+
+;; For all sCOND expanders, also expand the compare or test insn that
+;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
+
+(define_insn_and_split "*setcc_di_1"
+  [(set (match_operand:DI 0 "register_operand" "=q")
+	(match_operator:DI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:DI (match_dup 2)))]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn_and_split "*setcc_si_1_and"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(match_operator:SI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_PARTIAL_REG_STALL
+   && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn_and_split "*setcc_si_1_movzbl"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(match_operator:SI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "!TARGET_PARTIAL_REG_STALL
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  PUT_MODE (operands[1], QImode);
+  operands[2] = gen_lowpart (QImode, operands[0]);
+})
+
+(define_insn "*setcc_qi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(match_operator:QI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  ""
+  "set%C1\t%0"
+  [(set_attr "type" "setcc")
+   (set_attr "mode" "QI")])
+
+(define_insn "*setcc_qi_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+	(match_operator:QI 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  ""
+  "set%C1\t%0"
+  [(set_attr "type" "setcc")
+   (set_attr "mode" "QI")])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one.  Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;;	seta	%al
+;;	testb	%al, %al
+;;	sete	%al
+
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(ne:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+  "PUT_MODE (operands[1], QImode);")
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+	(ne:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+  "PUT_MODE (operands[1], QImode);")
+
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(eq:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx new_op1 = copy_rtx (operands[1]);
+  operands[1] = new_op1;
+  PUT_MODE (new_op1, QImode);
+  PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+					     GET_MODE (XEXP (new_op1, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op1, VOIDmode))
+    FAIL;
+})
+
+(define_split
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+	(eq:QI (match_operator 1 "ix86_comparison_operator"
+	         [(reg FLAGS_REG) (const_int 0)])
+	    (const_int 0)))]
+  ""
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx new_op1 = copy_rtx (operands[1]);
+  operands[1] = new_op1;
+  PUT_MODE (new_op1, QImode);
+  PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+					     GET_MODE (XEXP (new_op1, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op1, VOIDmode))
+    FAIL;
+})
+
+;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
+;; subsequent logical operations are used to imitate conditional moves.
+;; 0xffffffff is NaN, but not in normalized form, so we can't represent
+;; it directly.
+
+(define_insn "*avx_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 1 "avx_comparison_float_operator"
+	  [(match_operand:MODEF 2 "register_operand" "x")
+	   (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_AVX"
+  "vcmp%D1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sse_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 1 "sse_comparison_operator"
+	  [(match_operand:MODEF 2 "register_operand" "0")
+	   (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+;; Basic conditional jump instructions.
+;; We ignore the overflow flag for signed branch instructions.
+
+(define_insn "*jcc_1"
+  [(set (pc)
+	(if_then_else (match_operator 1 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "%+j%C1\t%l0"
+  [(set_attr "type" "ibr")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 6)))])
+
+(define_insn "*jcc_2"
+  [(set (pc)
+	(if_then_else (match_operator 1 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "%+j%c1\t%l0"
+  [(set_attr "type" "ibr")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 6)))])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one.  Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;;	seta	%al
+;;	testb	%al, %al
+;;	je	Lfoo
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  [(set (pc)
+	(if_then_else (match_dup 0)
+		      (label_ref (match_dup 1))
+		      (pc)))]
+  "PUT_MODE (operands[0], VOIDmode);")
+
+(define_split
+  [(set (pc)
+	(if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+				      [(reg FLAGS_REG) (const_int 0)])
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  [(set (pc)
+	(if_then_else (match_dup 0)
+		      (label_ref (match_dup 1))
+		      (pc)))]
+{
+  rtx new_op0 = copy_rtx (operands[0]);
+  operands[0] = new_op0;
+  PUT_MODE (new_op0, VOIDmode);
+  PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0),
+					     GET_MODE (XEXP (new_op0, 0))));
+
+  /* Make sure that (a) the CCmode we have for the flags is strong
+     enough for the reversed compare or (b) we have a valid FP compare.  */
+  if (! ix86_comparison_operator (new_op0, VOIDmode))
+    FAIL;
+})
+
+;; zero_extend in SImode is correct also for DImode, since this is what combine
+;; pass generates from shift insn with QImode operand.  Actually, the mode
+;; of operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; appropriate modulo of the bit offset value.
+
+(define_insn_and_split "*jcc_bt<mode>"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (zero_extend:SI
+			     (match_operand:QI 2 "register_operand" "r")))
+			 (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; Avoid useless masking of bit offset operand.  "and" in SImode is correct
+;; also for DImode, this is what combine produces.
+(define_insn_and_split "*jcc_bt<mode>_mask"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(zero_extract:SWI48
+			   (match_operand:SWI48 1 "register_operand" "r")
+			   (const_int 1)
+			   (and:SI
+			     (match_operand:SI 2 "register_operand" "r")
+			     (match_operand:SI 3 "const_int_operand" "n")))])
+		      (label_ref (match_operand 4 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SWI48
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+(define_insn_and_split "*jcc_btsi_1"
+  [(set (pc)
+  	(if_then_else (match_operator 0 "bt_comparison_operator"
+			[(and:SI
+			   (lshiftrt:SI
+			     (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:QI 2 "register_operand" "r"))
+			   (const_int 1))
+			 (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask_1"
+  [(set (pc)
+  	(if_then_else
+	  (match_operator 0 "bt_comparison_operator"
+	    [(and:SI
+	       (lshiftrt:SI
+		 (match_operand:SI 1 "register_operand" "r")
+		 (subreg:QI
+		   (and:SI
+		     (match_operand:SI 2 "register_operand" "r")
+		     (match_operand:SI 3 "const_int_operand" "n")) 0))
+	       (const_int 1))
+	     (const_int 0)])
+	  (label_ref (match_operand 4 "" ""))
+	  (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extract:SI
+	    (match_dup 1)
+	    (const_int 1)
+	    (match_dup 2))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+		      (label_ref (match_dup 4))
+		      (pc)))]
+  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+;; Define combination compare-and-branch fp compare instructions to help
+;; combine.
+
+(define_insn "*fp_jcc_1_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "nonimmediate_operand" "fm")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387
+   && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && SELECT_CC_MODE (GET_CODE (operands[0]),
+		      operands[1], operands[2]) == CCFPmode
+   && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*fp_jcc_1r_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "nonimmediate_operand" "fm")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "TARGET_80387
+   && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && SELECT_CC_MODE (GET_CODE (operands[0]),
+		      operands[1], operands[2]) == CCFPmode
+   && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*fp_jcc_2_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "register_operand" "f")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*fp_jcc_2r_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "register_operand" "f")])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && !TARGET_CMOVE"
+  "#")
+
+(define_insn "*fp_jcc_3_387"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "f")
+			 (match_operand 2 "const0_operand" "")])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 4 "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
+   && SELECT_CC_MODE (GET_CODE (operands[0]),
+		      operands[1], operands[2]) == CCFPmode
+   && !TARGET_CMOVE"
+  "#")
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "")
+			 (match_operand 2 "nonimmediate_operand" "")])
+	  (match_operand 3 "" "")
+	  (match_operand 4 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+	                operands[3], operands[4], NULL_RTX, NULL_RTX);
+  DONE;
+})
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
+			[(match_operand 1 "register_operand" "")
+			 (match_operand 2 "general_operand" "")])
+	  (match_operand 3 "" "")
+	  (match_operand 4 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+	     		operands[3], operands[4], operands[5], NULL_RTX);
+  DONE;
+})
+
+;; The order of operands in *fp_jcc_4_387 is forced by combine in
+;; simplify_comparison () function. Float operator is treated as RTX_OBJ
+;; with a precedence over other operators and is always put in the first
+;; place. Swap condition and operands to match ficom instruction.
+
+(define_insn "*fp_jcc_4_<mode>_387"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator 1 "float_operator"
+	      [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
+	     (match_operand 3 "register_operand" "f,f")])
+	  (label_ref (match_operand 4 "" ""))
+	  (pc)))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 5 "=a,a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+   && GET_MODE (operands[1]) == GET_MODE (operands[3])
+   && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
+   && !TARGET_CMOVE"
+  "#")
+
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator 1 "float_operator"
+	      [(match_operand:X87MODEI12 2 "memory_operand" "")])
+	     (match_operand 3 "register_operand" "")])
+	  (match_operand 4 "" "")
+	  (match_operand 5 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 6 "=a"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]);
+
+  ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+			operands[3], operands[7],
+			operands[4], operands[5], operands[6], NULL_RTX);
+  DONE;
+})
+
+;; %%% Kill this when reload knows how to do it.
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "ix86_swapped_fp_comparison_operator"
+	    [(match_operator 1 "float_operator"
+	      [(match_operand:X87MODEI12 2 "register_operand" "")])
+	     (match_operand 3 "register_operand" "")])
+	  (match_operand 4 "" "")
+	  (match_operand 5 "" "")))
+   (clobber (reg:CCFP FPSR_REG))
+   (clobber (reg:CCFP FLAGS_REG))
+   (clobber (match_scratch:HI 6 "=a"))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+  operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]);
+
+  ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+			operands[3], operands[7],
+			operands[4], operands[5], operands[6], operands[2]);
+  DONE;
+})
+
+;; Unconditional and other jump instructions
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmp\t%l0"
+  [(set_attr "type" "ibr")
+   (set (attr "length")
+	   (if_then_else (and (ge (minus (match_dup 0) (pc))
+				  (const_int -126))
+			      (lt (minus (match_dup 0) (pc))
+				  (const_int 128)))
+	     (const_int 2)
+	     (const_int 5)))
+   (set_attr "modrm" "0")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_insn "*indirect_jump"
+  [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))]
+  ""
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
+     relative.  Convert the relative address to an absolute address.  */
+  if (flag_pic)
+    {
+      rtx op0, op1;
+      enum rtx_code code;
+
+      /* We can't use @GOTOFF for text labels on VxWorks;
+	 see gotoff_operand.  */
+      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+	{
+	  code = PLUS;
+	  op0 = operands[0];
+	  op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+	}
+      else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
+	{
+	  code = PLUS;
+	  op0 = operands[0];
+	  op1 = pic_offset_table_rtx;
+	}
+      else
+	{
+	  code = MINUS;
+	  op0 = pic_offset_table_rtx;
+	  op1 = operands[0];
+	}
+
+      operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
+					 OPTAB_DIRECT);
+    }
+})
+
+(define_insn "*tablejump_1"
+  [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+   (set (match_operand:QI 1 "register_operand" "")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand 3 "q_regs_operand" "")
+	(zero_extend (match_dup 1)))]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 2))]
+{
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[5] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+;; Similar, but match zero_extendhisi2_and, which adds a clobber.
+
+(define_peephole2
+  [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+   (set (match_operand:QI 1 "register_operand" "")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (parallel [(set (match_operand 3 "q_regs_operand" "")
+		   (zero_extend (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(peep2_reg_dead_p (3, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 2))]
+{
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+  operands[5] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
+
+;; Call instructions.
+
+;; The predicates normally associated with named expanders are not properly
+;; checked for calls.  This is a bug in the generic code, but it isn't that
+;; easy to fix.  Ignore it for now and be prepared to fix things up.
+
+;; P6 processors will jump to the address after the decrement when %esp
+;; is used as a call operand, so they will execute return address as a code.
+;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
+ 
+;; Call subroutine returning no value.
+
+(define_expand "call_pop"
+  [(parallel [(call (match_operand:QI 0 "" "")
+		    (match_operand:SI 1 "" ""))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 3 "" "")))])]
+  "!TARGET_64BIT"
+{
+  ix86_expand_call (NULL, operands[0], operands[1],
+		    operands[2], operands[3], 0);
+  DONE;
+})
+
+(define_insn_and_split "*call_pop_0_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+	   (match_operand:SI 1 "" ""))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 2 "immediate_operand" "")))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_pop_0"
+  [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "")))]
+  "!TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P0";
+  else
+    return "call\t%P0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_pop_1_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm"))
+	   (match_operand:SI 1 "" ""))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 2 "immediate_operand" "i")))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_pop_1"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm"))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "i")))]
+  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*sibcall_pop_1_vzeroupper"
+ [(parallel
+   [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U"))
+	   (match_operand:SI 1 "" ""))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 2 "immediate_operand" "i,i")))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_pop_1"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U"))
+	 (match_operand:SI 1 "" ""))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 2 "immediate_operand" "i,i")))]
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "@
+   jmp\t%P0
+   jmp\t%A0"
+  [(set_attr "type" "call")])
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "" "")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))]
+  ""
+{
+  ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(call (match_operand:QI 0 "" "")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))]
+  ""
+{
+  ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1);
+  DONE;
+})
+
+(define_insn_and_split "*call_0_vzeroupper"
+  [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_0"
+  [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+	 (match_operand 1 "" ""))]
+  ""
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1"
+  [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm"))
+	 (match_operand 1 "" ""))]
+  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*sibcall_1_vzeroupper"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U"))
+	 (match_operand 1 "" ""))]
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm"))
+	 (match_operand 1 "" ""))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm"))
+	   (match_operand 1 "" ""))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64_ms_sysv"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm"))
+	 (match_operand 1 "" ""))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*call_1_rex64_large_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64_large"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+	 (match_operand 1 "" ""))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+(define_insn_and_split "*sibcall_1_rex64_vzeroupper"
+  [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "z,U"))
+	 (match_operand 1 "" ""))
+   (unspec [(match_operand 2 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+  [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64"
+  [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "z,U"))
+	 (match_operand 1 "" ""))]
+  "TARGET_64BIT && SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[0], 0); }
+  [(set_attr "type" "call")])
+
+;; Call subroutine, returning value in operand 0
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "" "")
+			 (match_operand:SI 2 "" "")))
+	      (set (reg:SI SP_REG)
+		   (plus:SI (reg:SI SP_REG)
+			    (match_operand:SI 4 "" "")))])]
+  "!TARGET_64BIT"
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], operands[4], 0);
+  DONE;
+})
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "" "")
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "" ""))]
+  ;; Operand 3 is not used on the i386.
+  ""
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], NULL, 0);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "" "")
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "" ""))]
+  ;; Operand 3 is not used on the i386.
+  ""
+{
+  ix86_expand_call (operands[0], operands[1], operands[2],
+		    operands[3], NULL, 1);
+  DONE;
+})
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  /* In order to give reg-stack an easier job in validating two
+     coprocessor registers as containing a possible return value,
+     simply pretend the untyped call returns a complex long double
+     value. 
+
+     We can't use SSE_REGPARM_MAX here since callee is unprototyped
+     and should have the default ABI.  */
+
+  ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
+		     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
+		    operands[0], const0_rtx,
+		    GEN_INT ((TARGET_64BIT
+			      ? (ix86_abi == SYSV_ABI
+				 ? X86_64_SSE_REGPARM_MAX
+				 : X86_64_MS_SSE_REGPARM_MAX)
+			      : X86_32_SSE_REGPARM_MAX)
+		    	     - 1),
+		    NULL, 0);
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; Prologue and epilogue instructions
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Do not schedule instructions accessing memory across this point.
+
+(define_expand "memory_blockage"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_blockage"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for PIC code
+(define_insn "prologue_use"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Insn emitted into the body of a function to return from a function.
+;; This is only done if the function's epilogue is known to be simple.
+;; See comments for ix86_can_use_return_insn_p in i386.c.
+
+(define_expand "return"
+  [(return)]
+  "ix86_can_use_return_insn_p ()"
+{
+  if (crtl->args.pops_args)
+    {
+      rtx popc = GEN_INT (crtl->args.pops_args);
+      emit_jump_insn (gen_return_pop_internal (popc));
+      DONE;
+    }
+})
+
+(define_insn "return_internal"
+  [(return)]
+  "reload_completed"
+  "ret"
+  [(set_attr "length" "1")
+   (set_attr "atom_unit" "jeu")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+;; instruction Athlon and K8 have.
+
+(define_insn "return_internal_long"
+  [(return)
+   (unspec [(const_int 0)] UNSPEC_REP)]
+  "reload_completed"
+  "rep\;ret"
+  [(set_attr "length" "2")
+   (set_attr "atom_unit" "jeu")
+   (set_attr "length_immediate" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "modrm" "0")])
+
+(define_insn "return_pop_internal"
+  [(return)
+   (use (match_operand:SI 0 "const_int_operand" ""))]
+  "reload_completed"
+  "ret\t%0"
+  [(set_attr "length" "3")
+   (set_attr "atom_unit" "jeu")
+   (set_attr "length_immediate" "2")
+   (set_attr "modrm" "0")])
+
+(define_insn "return_indirect_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" "r"))]
+  "reload_completed"
+  "jmp\t%A0"
+  [(set_attr "type" "ibr")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Generate nops.  Operand 0 is the number of nops, up to 8.
+(define_insn "nops"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
+		    UNSPECV_NOPS)]
+  "reload_completed"
+{
+  int num = INTVAL (operands[0]);
+
+  gcc_assert (num >= 1 && num <= 8);
+
+  while (num--)
+    fputs ("\tnop\n", asm_out_file);
+
+  return "";
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+;; Pad to 16-byte boundary, max skip in op0.  Used to avoid
+;; branch prediction penalty for the third jump in a 16-byte
+;; block on K8.
+
+(define_insn "pad"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)]
+  ""
+{
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+  ASM_OUTPUT_MAX_SKIP_PAD (asm_out_file, 4, (int)INTVAL (operands[0]));
+#else
+  /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
+     The align insn is used to avoid 3 jump instructions in the row to improve
+     branch prediction and the benefits hardly outweigh the cost of extra 8
+     nops on the average inserted by full alignment pseudo operation.  */
+#endif
+  return "";
+}
+  [(set_attr "length" "16")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_prologue (); DONE;")
+
+(define_insn "set_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "* return output_set_got (operands[0], NULL_RTX);"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_labelled"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(label_ref (match_operand 1 "" ""))]
+	 UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  "* return output_set_got (operands[0], operands[1]);"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
+  "TARGET_64BIT"
+  "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "length_address" "4")
+   (set_attr "mode" "DI")])
+
+(define_insn "set_rip_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(label_ref (match_operand 1 "" ""))] UNSPEC_SET_RIP))]
+  "TARGET_64BIT"
+  "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "length_address" "4")
+   (set_attr "mode" "DI")])
+
+(define_insn "set_got_offset_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(label_ref (match_operand 1 "" ""))]
+	  UNSPEC_SET_GOT_OFFSET))]
+  "TARGET_64BIT"
+  "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
+  [(set_attr "type" "imov")
+   (set_attr "length_immediate" "0")
+   (set_attr "length_address" "8")
+   (set_attr "mode" "DI")])
+
+(define_expand "epilogue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_epilogue (1); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+  "ix86_expand_epilogue (0); DONE;")
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
+
+  /* Tricky bit: we write the address of the handler to which we will
+     be returning into someone else's stack frame, one word below the
+     stack address we wish to restore.  */
+  tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
+  tmp = plus_constant (tmp, -UNITS_PER_WORD);
+  tmp = gen_rtx_MEM (Pmode, tmp);
+  emit_move_insn (tmp, ra);
+
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+  "ix86_expand_epilogue (2); DONE;")
+
+(define_insn "leave"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
+   (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
+   (clobber (mem:BLK (scratch)))]
+  "!TARGET_64BIT"
+  "leave"
+  [(set_attr "type" "leave")])
+
+(define_insn "leave_rex64"
+  [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+   (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+   (clobber (mem:BLK (scratch)))]
+  "TARGET_64BIT"
+  "leave"
+  [(set_attr "type" "leave")])
+
+;; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+  [(const_int 0)]
+  ""
+{
+  ix86_expand_split_stack_prologue ();
+  DONE;
+})
+
+;; In order to support the call/return predictor, we use a return
+;; instruction which the middle-end doesn't see.
+(define_insn "split_stack_return"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
+		     UNSPECV_SPLIT_STACK_RETURN)]
+  ""
+{
+  if (operands[0] == const0_rtx)
+    return "ret";
+  else
+    return "ret\t%0";
+}
+  [(set_attr "atom_unit" "jeu")
+   (set_attr "modrm" "0")
+   (set (attr "length")
+	(if_then_else (match_operand:SI 0 "const0_operand" "")
+		      (const_int 1)
+		      (const_int 3)))
+   (set (attr "length_immediate")
+	(if_then_else (match_operand:SI 0 "const0_operand" "")
+		      (const_int 0)
+		      (const_int 2)))])
+
+;; If there are operand 0 bytes available on the stack, jump to
+;; operand 1.
+
+(define_expand "split_stack_space_check"
+  [(set (pc) (if_then_else
+	      (ltu (minus (reg SP_REG)
+			  (match_operand 0 "register_operand" ""))
+		   (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+	      (label_ref (match_operand 1 "" ""))
+	      (pc)))]
+  ""
+{
+  rtx reg, size, limit;
+
+  reg = gen_reg_rtx (Pmode);
+  size = force_reg (Pmode, operands[0]);
+  emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size));
+  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			  UNSPEC_STACK_CHECK);
+  limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit));
+  ix86_expand_branch (GEU, reg, limit, operands[1]);
+
+  DONE;
+})
+
+;; Bit manipulation instructions.
+
+(define_expand "ffs<mode>2"
+  [(set (match_dup 2) (const_int -1))
+   (parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ
+		     (match_operand:SWI48 1 "nonimmediate_operand" "")
+		     (const_int 0)))
+	      (set (match_operand:SWI48 0 "register_operand" "")
+		   (ctz:SWI48 (match_dup 1)))])
+   (set (match_dup 0) (if_then_else:SWI48
+			(eq (reg:CCZ FLAGS_REG) (const_int 0))
+			(match_dup 2)
+			(match_dup 0)))
+   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (<MODE>mode == SImode && !TARGET_CMOVE)
+    {
+      emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (<MODE>mode);
+})
+
+(define_insn_and_split "ffssi2_no_cmove"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+   (clobber (match_scratch:SI 2 "=&q"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (match_dup 1) (const_int 0)))
+	      (set (match_dup 0) (ctz:SI (match_dup 1)))])
+   (set (strict_low_part (match_dup 3))
+	(eq:QI (reg:CCZ FLAGS_REG) (const_int 0)))
+   (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[3] = gen_lowpart (QImode, operands[2]);
+  ix86_expand_clear (operands[2]);
+})
+
+(define_insn "*ffs<mode>_1"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		     (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(ctz:SWI48 (match_dup 1)))]
+  ""
+  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ctz<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (TARGET_BMI)
+    return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+  else
+    return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set (attr "prefix_rep") (symbol_ref "TARGET_BMI"))
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "clz<mode>2"
+  [(parallel
+     [(set (match_operand:SWI248 0 "register_operand" "")
+	   (minus:SWI248
+	     (match_dup 2)
+	     (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" ""))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clz<mode>2_abm (operands[0], operands[1]));
+      DONE;
+    }
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+})
+
+(define_insn "clz<mode>2_abm"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM || TARGET_BMI"
+  "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+;; BMI instructions.
+(define_insn "*bmi_andn_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (not:SWI48
+            (match_operand:SWI48 1 "register_operand" "r"))
+            (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "andn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi_bextr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_BEXTR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "bextr\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsi_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (neg:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI"
+  "blsmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bmi_blsr_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_BMI"
+   "blsr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+;; TBM instructions.
+(define_insn "tbm_bextri_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (zero_extract:SWI48
+          (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+          (match_operand:SWI48 2 "const_0_to_255_operand" "n")
+          (match_operand:SWI48 3 "const_0_to_255_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3]));
+  return "bextr\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcfill_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcfill\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blci_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (not:SWI48
+            (plus:SWI48
+              (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+              (const_int 1)))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blci\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcic_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcic\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blcs_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blcs\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blsfill_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blsfill\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_blsic_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "blsic\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_t1mskc_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (ior:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int 1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "t1mskc\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*tbm_tzmsk_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (and:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (not:SWI48
+            (match_dup 1))))
+   (clobber (reg:CC FLAGS_REG))]
+   "TARGET_TBM"
+   "tzmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bsr_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (const_int 63)
+		  (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "bsr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "bsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (const_int 31)
+		  (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")])
+
+(define_insn "*bsrhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (const_int 15)
+		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "HI")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(popcount:SWI248
+	  (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcount<mode>2_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (popcount:SWI248
+	    (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
+	  (const_int 0)))
+   (set (match_operand:SWI248 0 "register_operand" "=r")
+	(popcount:SWI248 (match_dup 1)))]
+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcountsi2_cmp_zext"
+  [(set (reg FLAGS_REG)
+        (compare
+          (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+          (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI(popcount:SI (match_dup 1))))]
+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{l}\t{%1, %0|%0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_expand "bswap<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(bswap:SWI48 (match_operand:SWI48 1 "register_operand" "")))]
+  ""
+{
+  if (<MODE>mode == SImode && !(TARGET_BSWAP || TARGET_MOVBE))
+    {
+      rtx x = operands[0];
+
+      emit_move_insn (x, operands[1]);
+      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
+      emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+      DONE;
+    }
+})
+
+(define_insn "*bswap<mode>2_movbe"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
+	(bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_MOVBE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    bswap\t%0
+    movbe\t{%1, %0|%0, %1}
+    movbe\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip,imov,imov")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "prefix_0f" "*,1,1")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bswap<mode>2_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
+  "TARGET_BSWAP"
+  "bswap\t%0"
+  [(set_attr "type" "bitmanip")
+   (set_attr "modrm" "0")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*bswaphi_lowpart_1"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
+	(bswap:HI (match_dup 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)"
+  "@
+    xchg{b}\t{%h0, %b0|%b0, %h0}
+    rol{w}\t{$8, %0|%0, 8}"
+  [(set_attr "length" "2,4")
+   (set_attr "mode" "QI,HI")])
+
+(define_insn "bswaphi_lowpart"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+	(bswap:HI (match_dup 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "rol{w}\t{$8, %0|%0, 8}"
+  [(set_attr "length" "4")
+   (set_attr "mode" "HI")])
+
+(define_expand "paritydi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(parity:DI (match_operand:DI 1 "register_operand" "")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
+				NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  if (TARGET_64BIT)
+    emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
+  else
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extendqisi2 (tmp, scratch));
+      emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
+    }
+  DONE;
+})
+
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(parity:SI (match_operand:SI 1 "register_operand" "")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+  DONE;
+})
+
+(define_insn_and_split "paritydi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:DI 3 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:DI 0 "=r"))
+   (clobber (match_scratch:SI 1 "=&r"))
+   (clobber (match_scratch:HI 2 "=Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:SI (match_dup 1) (match_dup 4)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
+      (clobber (match_dup 1))
+      (clobber (match_dup 2))])]
+{
+  operands[4] = gen_lowpart (SImode, operands[3]);
+
+  if (TARGET_64BIT)
+    {
+      emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
+      emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
+    }
+  else
+    operands[1] = gen_highpart (SImode, operands[3]);
+})
+
+(define_insn_and_split "paritysi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:SI 2 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:SI 0 "=r"))
+   (clobber (match_scratch:HI 1 "=&Q"))]
+  "! TARGET_POPCNT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+     [(set (match_dup 1)
+	   (xor:HI (match_dup 1) (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (reg:CC FLAGS_REG)
+	   (unspec:CC [(match_dup 1)] UNSPEC_PARITY))
+      (clobber (match_dup 1))])]
+{
+  operands[3] = gen_lowpart (HImode, operands[2]);
+
+  emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
+  emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
+})
+
+(define_insn "*parityhi2_cmp"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:HI 1 "register_operand" "0")]
+		   UNSPEC_PARITY))
+   (clobber (match_scratch:HI 0 "=Q"))]
+  "! TARGET_POPCNT"
+  "xor{b}\t{%h0, %b0|%b0, %h0}"
+  [(set_attr "length" "2")
+   (set_attr "mode" "HI")])
+
+;; Thread-local storage patterns for ELF.
+;;
+;; Note that these code sequences must appear exactly as shown
+;; in order to allow linker relaxation.
+
+(define_insn "*tls_global_dynamic_32_gnu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
+		    (match_operand:SI 2 "tls_symbolic_operand" "")
+		    (match_operand:SI 3 "call_insn_operand" "")]
+		    UNSPEC_TLS_GD))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU_TLS"
+  "lea{l}\t{%a2@tlsgd(,%1,1), %0|%0, %a2@tlsgd[%1*1]}\;call\t%P3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_expand "tls_global_dynamic_32"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI
+		    [(match_dup 2)
+		     (match_operand:SI 1 "tls_symbolic_operand" "")
+		     (match_dup 3)]
+		    UNSPEC_TLS_GD))
+	      (clobber (match_scratch:SI 4 ""))
+	      (clobber (match_scratch:SI 5 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (flag_pic)
+    operands[2] = pic_offset_table_rtx;
+  else
+    {
+      operands[2] = gen_reg_rtx (Pmode);
+      emit_insn (gen_set_got (operands[2]));
+    }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32
+		  (operands[0], operands[1], operands[2]));
+       DONE;
+    }
+  operands[3] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_global_dynamic_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
+		 (match_operand:DI 3 "" "")))
+   (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+	      UNSPEC_TLS_GD)]
+  "TARGET_64BIT"
+  { return ASM_BYTE "0x66\n\tlea{q}\t{%a1@tlsgd(%%rip), %%rdi|rdi, %a1@tlsgd[rip]}\n" ASM_SHORT "0x6666\n\trex64\n\tcall\t%P2"; }
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+(define_expand "tls_global_dynamic_64"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (call:DI (mem:QI (match_dup 2)) (const_int 0)))
+	      (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+			 UNSPEC_TLS_GD)])]
+  ""
+{
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64
+		  (operands[0], operands[1]));
+       DONE;
+    }
+  operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_32_gnu"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "b")
+                    (match_operand:SI 2 "call_insn_operand" "")]
+		   UNSPEC_TLS_LD_BASE))
+   (clobber (match_scratch:SI 3 "=d"))
+   (clobber (match_scratch:SI 4 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU_TLS"
+  "lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}\;call\t%P2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "11")])
+
+(define_expand "tls_local_dynamic_base_32"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI [(match_dup 1) (match_dup 2)]
+			      UNSPEC_TLS_LD_BASE))
+	      (clobber (match_scratch:SI 3 ""))
+	      (clobber (match_scratch:SI 4 ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (flag_pic)
+    operands[1] = pic_offset_table_rtx;
+  else
+    {
+      operands[1] = gen_reg_rtx (Pmode);
+      emit_insn (gen_set_got (operands[1]));
+    }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32
+		  (operands[0], ix86_tls_module_base (), operands[1]));
+       DONE;
+    }
+  operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
+		 (match_operand:DI 2 "" "")))
+   (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+  "TARGET_64BIT"
+  "lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}\;call\t%P1"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_expand "tls_local_dynamic_base_64"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (call:DI (mem:QI (match_dup 1)) (const_int 0)))
+	      (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
+  ""
+{
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64
+		  (operands[0], ix86_tls_module_base ()));
+       DONE;
+    }
+  operands[1] = ix86_tls_get_addr ();
+})
+
+;; Local dynamic of a single variable is a lose.  Show combine how
+;; to convert that back to global dynamic.
+
+(define_insn_and_split "*tls_local_dynamic_32_once"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+			     (match_operand:SI 2 "call_insn_operand" "")]
+			    UNSPEC_TLS_LD_BASE)
+		 (const:SI (unspec:SI
+			    [(match_operand:SI 3 "tls_symbolic_operand" "")]
+			    UNSPEC_DTPOFF))))
+   (clobber (match_scratch:SI 4 "=d"))
+   (clobber (match_scratch:SI 5 "=c"))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+		   (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+			      UNSPEC_TLS_GD))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Segment register for the thread base ptr load
+(define_mode_attr tp_seg [(SI "gs") (DI "fs")])
+
+;; Load and add the thread base pointer from %gs:0.
+(define_insn "*load_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(const_int 0)] UNSPEC_TP))]
+  ""
+  "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
+  [(set_attr "type" "imov")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(plus:P (unspec:P [(const_int 0)] UNSPEC_TP)
+		(match_operand:P 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}"
+  [(set_attr "type" "alu")
+   (set_attr "modrm" "0")
+   (set_attr "length" "7")
+   (set_attr "memory" "load")
+   (set_attr "imm_disp" "false")])
+
+;; The Sun linker took the AMD64 TLS spec literally and can only handle
+;; %rax as destination of the initial executable code sequence.
+(define_insn "tls_initial_exec_64_sun"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI
+	 [(match_operand:DI 1 "tls_symbolic_operand" "")]
+	 UNSPEC_TLS_IE_SUN))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_SUN_TLS"
+  "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}\n\tadd{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"
+  [(set_attr "type" "multi")])
+
+;; GNU2 TLS patterns can be split.
+
+(define_expand "tls_dynamic_gnu2_32"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (const:SI
+		  (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+			     UNSPEC_TLSDESC))))
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unspec:SI [(match_dup 1) (match_dup 3)
+		      (match_dup 2) (reg:SI SP_REG)]
+		      UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "b")
+		 (const:SI
+		  (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+			      UNSPEC_TLSDESC))))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")
+   (set_attr "length" "6")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+		    (match_operand:SI 2 "register_operand" "0")
+		    ;; we have to make sure %ebx still points to the GOT
+		    (match_operand:SI 3 "register_operand" "b")
+		    (reg:SI SP_REG)]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+	(plus:SI
+	 (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "")
+		     (match_operand:SI 4 "" "")
+		     (match_operand:SI 2 "register_operand" "b")
+		     (reg:SI SP_REG)]
+		    UNSPEC_TLSDESC)
+	 (const:SI (unspec:SI
+		    [(match_operand:SI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 5))]
+{
+  operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+  [(set (match_dup 2)
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))
+   (parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)]
+		     UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TLSDESC))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")
+   (set_attr "length" "7")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+		    (match_operand:DI 2 "register_operand" "0")
+		    (reg:DI SP_REG)]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+  [(set (match_operand:DI 0 "register_operand" "=&a")
+	(plus:DI
+	 (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "")
+		     (match_operand:DI 3 "" "")
+		     (reg:DI SP_REG)]
+		    UNSPEC_TLSDESC)
+	 (const:DI (unspec:DI
+		    [(match_operand:DI 1 "tls_symbolic_operand" "")]
+		    UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 4))]
+{
+  operands[4] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0];
+  emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
+})
+
+;; These patterns match the binary 387 instructions for addM3, subM3,
+;; mulM3 and divM3.  There are three patterns for each of DFmode and
+;; SFmode.  The first is the normal insn, the second the same insn but
+;; with one operand a conversion, and the third the same insn but with
+;; the other operand a conversion.  The conversion may be SFmode or
+;; SImode if the target mode DFmode, but only SImode if the target mode
+;; is SFmode.
+
+;; Gcc is slightly more smart about handling normal two address instructions
+;; so use special patterns for add and mull.
+
+(define_insn "*fop_<mode>_comm_mixed_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (eq_attr "alternative" "1")
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "ssemul")
+	      (const_string "sseadd"))
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "fmul")
+	      (const_string "fop"))))
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (eq_attr "alternative" "1")
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "ssemul")
+	      (const_string "sseadd"))
+	   (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	      (const_string "fmul")
+	      (const_string "fop"))))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	   (const_string "ssemul")
+	   (const_string "sseadd")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	   (const_string "ssemul")
+	   (const_string "sseadd")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+	(if_then_else (match_operand:MODEF 3 "mult_operator" "")
+	   (const_string "fmul")
+	   (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "mult_operator" ""))
+                 (const_string "ssemul")
+	       (and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "div_operator" ""))
+                 (const_string "ssediv")
+	       (eq_attr "alternative" "2")
+                 (const_string "sseadd")
+	       (match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "mult_operator" ""))
+                 (const_string "ssemul")
+	       (and (eq_attr "alternative" "2")
+	            (match_operand:MODEF 3 "div_operator" ""))
+                 (const_string "ssediv")
+	       (eq_attr "alternative" "2")
+                 (const_string "sseadd")
+	       (match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*rcpsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_RCP))]
+  "TARGET_SSE_MATH"
+  "%vrcpss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_<mode>_1_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "ssemul")
+	       (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "ssediv")
+              ]
+              (const_string "sseadd")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "ssemul")
+	       (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "ssediv")
+              ]
+              (const_string "sseadd")))
+   (set_attr "mode" "<MODE>")])
+
+;; This pattern is not fully shadowed by the pattern above.
+(define_insn "*fop_<mode>_1_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   && !COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+;; ??? Add SSE splitters for these!
+(define_insn "*fop_<MODEF:mode>_2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(float:MODEF
+	     (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+	   (match_operand:MODEF 2 "register_operand" "0,0")]))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <X87MODEI12:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<X87MODEI12:MODE>")])
+
+(define_insn "*fop_<MODEF:mode>_3_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+	(match_operator:MODEF 3 "binary_fp_operator"
+	  [(match_operand:MODEF 1 "register_operand" "0,0")
+	   (float:MODEF
+	     (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <X87MODEI12:MODE>mode)
+   && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:MODEF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_4_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	   [(float_extend:DF
+	     (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:DF 2 "register_operand" "0,f")]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_5_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	  [(match_operand:DF 1 "register_operand" "0,f")
+	   (float_extend:DF
+	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_6_i387"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 3 "binary_fp_operator"
+	  [(float_extend:DF
+	    (match_operand:SF 1 "register_operand" "0,f"))
+	   (float_extend:DF
+	    (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387 && X87_ENABLE_ARITH (DFmode)
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:DF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:DF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_comm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "%0")
+			 (match_operand:XF 2 "register_operand" "f")]))]
+  "TARGET_80387
+   && COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:XF 3 "mult_operator" "")
+           (const_string "fmul")
+           (const_string "fop")))
+   (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_1_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+			[(match_operand:XF 1 "register_operand" "0,f")
+			 (match_operand:XF 2 "register_operand" "f,0")]))]
+  "TARGET_80387
+   && !COMMUTATIVE_ARITH_P (operands[3])"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float:XF
+	     (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+	   (match_operand:XF 2 "register_operand" "0,0")]))]
+  "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0,0")
+	   (float:XF
+	     (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	   [(float_extend:XF
+	      (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
+	    (match_operand:XF 2 "register_operand" "0,f")]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_5_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(match_operand:XF 1 "register_operand" "0,f")
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_6_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(match_operator:XF 3 "binary_fp_operator"
+	  [(float_extend:XF
+	     (match_operand:MODEF 1 "register_operand" "0,f"))
+	   (float_extend:XF
+	     (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+  "TARGET_80387"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (cond [(match_operand:XF 3 "mult_operator" "")
+                 (const_string "fmul")
+               (match_operand:XF 3 "div_operator" "")
+                 (const_string "fdiv")
+              ]
+              (const_string "fop")))
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "binary_fp_operator"
+	   [(float (match_operand:X87MODEI12 1 "register_operand" ""))
+	    (match_operand 2 "register_operand" "")]))]
+  "reload_completed
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && X87_ENABLE_FLOAT (GET_MODE (operands[0]), GET_MODE (operands[1]))"
+  [(const_int 0)]
+{
+  operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
+  operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (GET_CODE (operands[3]),
+					  GET_MODE (operands[3]),
+					  operands[4],
+					  operands[2])));
+  ix86_free_from_memory (GET_MODE (operands[1]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "binary_fp_operator"
+	   [(match_operand 1 "register_operand" "")
+	    (float (match_operand:X87MODEI12 2 "register_operand" ""))]))]
+  "reload_completed
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && X87_ENABLE_FLOAT (GET_MODE (operands[0]), GET_MODE (operands[2]))"
+  [(const_int 0)]
+{
+  operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+  operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_fmt_ee (GET_CODE (operands[3]),
+					  GET_MODE (operands[3]),
+					  operands[1],
+					  operands[4])));
+  ix86_free_from_memory (GET_MODE (operands[2]));
+  DONE;
+})
+
+;; FPU special functions.
+
+;; This pattern implements a no-op XFmode truncation for
+;; all fancy i386 XFmode math functions.
+
+(define_insn "truncxf<mode>2_i387_noop_unspec"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
+	UNSPEC_TRUNC_NOOP))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_387_reg_move (insn, operands);"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sqrtxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
+
+(define_insn "sqrt_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(sqrt:XF
+	  (float_extend:XF
+	    (match_operand:MODEF 1 "register_operand" "0"))))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fsqrt"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "bdver1_decode" "direct")])
+
+(define_insn "*rsqrtsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+  "%vrsqrtss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
+
+(define_expand "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")]
+		   UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
+  DONE;
+})
+
+(define_insn "*sqrt<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(sqrt:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "*")
+   (set_attr "amdfam10_decode" "*")
+   (set_attr "bdver1_decode" "*")])
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+	(sqrt:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "")))]
+  "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode))
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  if (<MODE>mode == SFmode
+      && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun)
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
+      DONE;
+    }
+
+  if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+      emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
+      emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+      DONE;
+   }
+})
+
+(define_insn "fpremxf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM_U))
+   (set (reg:CCFP FPSR_REG)
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fprem"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "fmodxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "general_operand" ""))
+   (use (match_operand:XF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
+
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "fmod<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx (*gen_truncxf) (rtx, rtx);
+
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    gen_truncxf = gen_truncxf<mode>2;
+  else
+    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+
+  emit_insn (gen_truncxf (operands[0], op1));
+  DONE;
+})
+
+(define_insn "fprem1xf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FPREM1_F))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FPREM1_U))
+   (set (reg:CCFP FPSR_REG)
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fprem1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "remainderxf3"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "general_operand" ""))
+   (use (match_operand:XF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
+
+  emit_label (label);
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "remainder<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx (*gen_truncxf) (rtx, rtx);
+
+  rtx label = gen_label_rtx ();
+
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+  emit_label (label);
+
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+  ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
+
+  /* Truncate the result properly for strict SSE math.  */
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !TARGET_MIX_SSE_I387)
+    gen_truncxf = gen_truncxf<mode>2;
+  else
+    gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec;
+
+  emit_insn (gen_truncxf (operands[0], op1));
+  DONE;
+})
+
+(define_insn "*sinxf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fsin"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*sin_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))]
+		   UNSPEC_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fsin"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cosxf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fcos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cos_extend<mode>xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))]
+		   UNSPEC_COS))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fcos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+;; When sincos pattern is defined, sin and cos builtin functions will be
+;; expanded to sincos pattern with one of its outputs left unused.
+;; CSE pass will figure out if two sincos patterns can be combined,
+;; otherwise sincos pattern will be split back to sin or cos pattern,
+;; depending on the unused output.
+
+(define_insn "sincosxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fsincos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "")]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))])
+
+(define_insn "sincos_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fsincos"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" ""))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 1)
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))])
+
+(define_split
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" ""))]
+		   UNSPEC_SINCOS_COS))
+   (set (match_operand:XF 1 "register_operand" "")
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+   && !(reload_completed || reload_in_progress)"
+  [(set (match_dup 0)
+	(unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))])
+
+(define_expand "sincos<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1));
+  DONE;
+})
+
+(define_insn "fptanxf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(match_operand:XF 3 "const_double_operand" "F"))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_TAN))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && standard_80387_constant_p (operands[3]) == 2"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fptan_extend<mode>xf4_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+	(match_operand:MODEF 3 "const_double_operand" "F"))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_TAN))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations
+   && standard_80387_constant_p (operands[3]) == 2"
+  "fptan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "tanxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx one = gen_reg_rtx (XFmode);
+  rtx op2 = CONST1_RTX (XFmode); /* fld1 */
+
+  emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "tan<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx one = gen_reg_rtx (<MODE>mode);
+  rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */
+
+  emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0,
+					     operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "*fpatanxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+	            (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FPATAN))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fpatan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fpatan_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "u"))]
+	           UNSPEC_FPATAN))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fpatan"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "atan2xf3"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 2 "register_operand" "")
+			       (match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_FPATAN))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations")
+
+(define_expand "atan2<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "atanxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 2)
+			       (match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_FPATAN))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "atan<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (op2, CONST1_RTX (<MODE>mode));  /* fld1 */
+
+  emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "asinxf2"
+  [(set (match_dup 2)
+	(mult:XF (match_operand:XF 1 "register_operand" "")
+		 (match_dup 1)))
+   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+   (set (match_dup 5) (sqrt:XF (match_dup 4)))
+   (parallel [(set (match_operand:XF 0 "register_operand" "")
+        	   (unspec:XF [(match_dup 5) (match_dup 1)]
+			      UNSPEC_FPATAN))
+   	      (clobber (match_scratch:XF 6 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "asin<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_asinxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "acosxf2"
+  [(set (match_dup 2)
+	(mult:XF (match_operand:XF 1 "register_operand" "")
+		 (match_dup 1)))
+   (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+   (set (match_dup 5) (sqrt:XF (match_dup 4)))
+   (parallel [(set (match_operand:XF 0 "register_operand" "")
+        	   (unspec:XF [(match_dup 1) (match_dup 5)]
+			      UNSPEC_FPATAN))
+   	      (clobber (match_scratch:XF 6 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 6; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[3], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "acos<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_acosxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fyl2xxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2X))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fyl2x"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fyl2x_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2X))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fyl2x"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "logxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */
+})
+
+(define_expand "log<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "log10xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */
+})
+
+(define_expand "log10<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "log2xf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)] UNSPEC_FYL2X))
+	      (clobber (match_scratch:XF 3 ""))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "log2<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+
+  rtx op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
+
+  emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fyl2xp1xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2XP1))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fyl2xp1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fyl2xp1_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+        (unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 1 "register_operand" "0"))
+		    (match_operand:XF 2 "register_operand" "u")]
+	           UNSPEC_FYL2XP1))
+   (clobber (match_scratch:XF 3 "=2"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fyl2xp1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "log1pxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  ix86_emit_i387_log1p (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "log1p<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+
+  operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
+
+  ix86_emit_i387_log1p (op0, operands[1]);
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fxtractxf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+		   UNSPEC_XTRACT_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fxtract"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "fxtract_extend<mode>xf3_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(float_extend:XF
+		      (match_operand:MODEF 2 "register_operand" "0"))]
+		   UNSPEC_XTRACT_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "fxtract"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "logbxf2"
+  [(parallel [(set (match_dup 2)
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_XTRACT_FRACT))
+	      (set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "operands[2] = gen_reg_rtx (XFmode);")
+
+(define_expand "logb<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1));
+  DONE;
+})
+
+(define_expand "ilogbxf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+  DONE;
+})
+
+(define_expand "ilogb<mode>2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+  DONE;
+})
+
+(define_insn "*f2xm1xf2_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_F2XM1))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "f2xm1"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_insn "*fscalexf4_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 2 "register_operand" "0")
+		    (match_operand:XF 3 "register_operand" "1")]
+		   UNSPEC_FSCALE_FRACT))
+   (set (match_operand:XF 1 "register_operand" "=u")
+	(unspec:XF [(match_dup 2) (match_dup 3)]
+		   UNSPEC_FSCALE_EXP))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fscale"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "expNcorexf3"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+			       (match_operand:XF 2 "register_operand" "")))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
+   (parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_dup 8) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 9)
+		   (unspec:XF [(match_dup 8) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 3; i < 10; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  emit_move_insn (operands[7], CONST1_RTX (XFmode));  /* fld1 */
+})
+
+(define_expand "expxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "exp10xf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp10<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_exp10xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "exp2xf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
+  emit_move_insn (op2, CONST1_RTX (XFmode));  /* fld1 */
+
+  emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+  DONE;
+})
+
+(define_expand "exp2<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_exp2xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "expm1xf2"
+  [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+			       (match_dup 2)))
+   (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+   (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+   (set (match_dup 9) (float_extend:XF (match_dup 13)))
+   (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+   (parallel [(set (match_dup 7)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 8)
+		   (unspec:XF [(match_dup 6) (match_dup 4)]
+			      UNSPEC_FSCALE_EXP))])
+   (parallel [(set (match_dup 10)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 11)
+		   (unspec:XF [(match_dup 9) (match_dup 8)]
+			      UNSPEC_FSCALE_EXP))])
+   (set (match_dup 12) (minus:XF (match_dup 10)
+				 (float_extend:XF (match_dup 13))))
+   (set (match_operand:XF 0 "register_operand" "")
+	(plus:XF (match_dup 12) (match_dup 7)))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  for (i = 2; i < 13; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+
+  operands[13]
+    = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */
+
+  emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
+})
+
+(define_expand "expm1<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expm1xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "ldexpxf3"
+  [(set (match_dup 3)
+	(float:XF (match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:XF 0 " register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_dup 3)]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 4)
+		   (unspec:XF [(match_dup 1) (match_dup 3)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  operands[3] = gen_reg_rtx (XFmode);
+  operands[4] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexp<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:SI 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "scalbxf3"
+  [(parallel [(set (match_operand:XF 0 " register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")
+			       (match_operand:XF 2 "register_operand" "")]
+			      UNSPEC_FSCALE_FRACT))
+	      (set (match_dup 3)
+		   (unspec:XF [(match_dup 1) (match_dup 2)]
+			      UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  operands[3] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "scalb<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0, op1, op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+  op2 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_scalbxf3 (op0, op1, op2));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "significandxf2"
+  [(parallel [(set (match_operand:XF 0 "register_operand" "")
+		   (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+			      UNSPEC_XTRACT_FRACT))
+	      (set (match_dup 2)
+		   (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "operands[2] = gen_reg_rtx (XFmode);")
+
+(define_expand "significand<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+
+(define_insn "sse4_1_round<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")
+		       (match_operand:SI 2 "const_0_to_15_operand" "n")]
+		      UNSPEC_ROUND))]
+  "TARGET_ROUND"
+  "%vrounds<ssemodefsuffix>\t{%2, %1, %d0|%d0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rintxf2"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+		   UNSPEC_FRNDINT))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "frndint"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "XF")])
+
+(define_expand "rint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math)
+    {
+      if (!TARGET_ROUND && optimize_insn_for_size_p ())
+	FAIL;
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x04)));
+      else
+	ix86_expand_rint (operands[0], operands[1]);
+    }
+  else
+    {
+      rtx op0 = gen_reg_rtx (XFmode);
+      rtx op1 = gen_reg_rtx (XFmode);
+
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_rintxf2 (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_expand "round<mode>2"
+  [(match_operand:MODEF 0 "register_operand" "")
+   (match_operand:MODEF 1 "nonimmediate_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math && !flag_rounding_math"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  if (TARGET_64BIT || (<MODE>mode != DFmode))
+    ix86_expand_round (operands[0], operands[1]);
+  else
+    ix86_expand_rounddf_32 (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn_and_split "*fistdi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+		   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fistdi2 (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+      emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
+					 operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+		   UNSPEC_FIST))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+		   UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+	      (clobber (match_dup 3))])])
+
+(define_insn_and_split "*fist<mode>2_1"
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+			   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+  emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
+					operands[2]));
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+			   UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_with_temp"
+  [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+			   UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
+  "TARGET_USE_FANCY_MATH_387"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+			   UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))
+   (set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+			   UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))])
+
+(define_expand "lrintxf<mode>2"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+     (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+		      UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387")
+
+(define_expand "lrint<MODEF:mode><SSEMODEI24:mode>2"
+  [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+     (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")]
+			UNSPEC_FIX_NOTRUNC))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)")
+
+(define_expand "lround<MODEF:mode><SSEMODEI24:mode>2"
+  [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)
+   && !flag_trapping_math && !flag_rounding_math"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  ix86_expand_lround (operands[0], operands[1]);
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_floor"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_FLOOR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+
+  emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1],
+					operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_floor_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "XF")])
+
+(define_expand "floorxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  emit_insn (gen_frndintxf2_floor (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "floor<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+    {
+      if (!TARGET_ROUND && optimize_insn_for_size_p ())
+	FAIL;
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x01)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_floorceil (operands[0], operands[1], true);
+      else
+	ix86_expand_floorceildf_32 (operands[0], operands[1], true);
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_floor (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_floor_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_floor (operands[0], operands[1],
+				      operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_floor_with_temp (operands[0], operands[1],
+						  operands[2], operands[3],
+						  operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_floor"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_floor_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])])
+
+(define_insn "fist<mode>2_floor"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_floor_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_FLOOR))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])])
+
+(define_expand "lfloorxf<mode>2"
+  [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+		    UNSPEC_FIST_FLOOR))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations")
+
+(define_expand "lfloor<MODEF:mode><SWI48:mode>2"
+  [(match_operand:SWI48 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  if (TARGET_64BIT && optimize_insn_for_size_p ())
+    FAIL;
+  ix86_expand_lfloorceil (operands[0], operands[1], true);
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_ceil"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_CEIL))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+
+  emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1],
+				       operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_ceil_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_expand "ceilxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  emit_insn (gen_frndintxf2_ceil (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "ceil<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+    {
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x02)));
+      else if (optimize_insn_for_size_p ())
+	FAIL;
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_floorceil (operands[0], operands[1], false);
+      else
+	ix86_expand_floorceildf_32 (operands[0], operands[1], false);
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_ceil (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_ceil_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+	(unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_ceil (operands[0], operands[1],
+				     operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_ceil_with_temp (operands[0], operands[1],
+						 operands[2], operands[3],
+						 operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_ceil"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_ceil_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "DI")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+	(unspec:DI [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 5))])])
+
+(define_insn "fist<mode>2_ceil"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_ceil_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))])
+
+(define_split
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+	(unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+				  UNSPEC_FIST_CEIL))
+	      (use (match_dup 2))
+	      (use (match_dup 3))])])
+
+(define_expand "lceilxf<mode>2"
+  [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+		   (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+		    UNSPEC_FIST_CEIL))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations")
+
+(define_expand "lceil<MODEF:mode><SWI48:mode>2"
+  [(match_operand:SWI48 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !flag_trapping_math"
+{
+  ix86_expand_lfloorceil (operands[0], operands[1], false);
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_trunc"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_TRUNC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+
+  emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1],
+					operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_trunc_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_TRUNC))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "XF")])
+
+(define_expand "btruncxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  if (optimize_insn_for_size_p ())
+    FAIL;
+  emit_insn (gen_frndintxf2_trunc (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "btrunc<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+	|| TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math)"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+    {
+      if (TARGET_ROUND)
+	emit_insn (gen_sse4_1_round<mode>2
+		   (operands[0], operands[1], GEN_INT (0x03)));
+      else if (optimize_insn_for_size_p ())
+	FAIL;
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
+	ix86_expand_trunc (operands[0], operands[1]);
+      else
+	ix86_expand_truncdf_32 (operands[0], operands[1]);
+    }
+  else
+    {
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+	FAIL;
+
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_frndintxf2_trunc (op0, op1));
+
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+    }
+  DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_mask_pm"
+  [(set (match_operand:XF 0 "register_operand" "")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "")]
+	 UNSPEC_FRNDINT_MASK_PM))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
+
+  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
+					  operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_mask_pm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+	 UNSPEC_FRNDINT_MASK_PM))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_expand "nearbyintxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "nearbyint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_insn "fxam<mode>2_i387"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:X87MODEF 1 "register_operand" "f")]
+	  UNSPEC_FXAM))]
+  "TARGET_USE_FANCY_MATH_387"
+  "fxam\n\tfnstsw\t%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "fxam<mode>2_i387_with_temp"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(unspec:HI
+	  [(match_operand:MODEF 1 "memory_operand" "")]
+	  UNSPEC_FXAM_MEM))]
+  "TARGET_USE_FANCY_MATH_387
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)(match_dup 1))
+   (set (match_dup 0)
+	(unspec:HI [(match_dup 2)] UNSPEC_FXAM))]
+{
+  operands[2] = gen_reg_rtx (<MODE>mode);
+
+  MEM_VOLATILE_P (operands[1]) = 1;
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "isinfxf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && TARGET_C99_FUNCTIONS"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
+(define_expand "isinf<mode>2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "nonimmediate_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && TARGET_C99_FUNCTIONS
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  rtx mask = GEN_INT (0x45);
+  rtx val = GEN_INT (0x05);
+
+  rtx cond;
+
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx res = gen_reg_rtx (QImode);
+
+  /* Remove excess precision by forcing value through memory. */
+  if (memory_operand (operands[1], VOIDmode))
+    emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1]));
+  else
+    {
+      enum ix86_stack_slot slot = (virtuals_instantiated
+				   ? SLOT_TEMP
+				   : SLOT_VIRTUAL);
+      rtx temp = assign_386_stack_local (<MODE>mode, slot);
+
+      emit_move_insn (temp, operands[1]);
+      emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp));
+    }
+
+  emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+  emit_insn (gen_cmpqi_ext_3 (scratch, val));
+  cond = gen_rtx_fmt_ee (EQ, QImode,
+			 gen_rtx_REG (CCmode, FLAGS_REG),
+			 const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+  emit_insn (gen_zero_extendqisi2 (operands[0], res));
+  DONE;
+})
+
+(define_expand "signbitxf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387"
+{
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0],
+	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+  DONE;
+})
+
+(define_insn "movmsk_df"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:DF 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
+  "%vmovmskpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+;; Use movmskpd in SSE mode to avoid store forwarding stall
+;; for 32bit targets and movq+shrq sequence for 64bit targets.
+(define_expand "signbitdf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
+{
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)
+    {
+      emit_insn (gen_movmsk_df (operands[0], operands[1]));
+      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+    }
+  else
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+
+      emit_insn (gen_fxamdf2_i387 (scratch, operands[1]));
+      emit_insn (gen_andsi3 (operands[0],
+		 gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+    }
+  DONE;
+})
+
+(define_expand "signbitsf2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
+{
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxamsf2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0],
+	     gen_lowpart (SImode, scratch), GEN_INT (0x200)));
+  DONE;
+})
+
+;; Block operation instructions
+
+(define_insn "cld"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
+  ""
+  "cld"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
+(define_expand "movmem<mode>"
+  [(use (match_operand:BLK 0 "memory_operand" ""))
+   (use (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand:SWI48 2 "nonmemory_operand" ""))
+   (use (match_operand:SWI48 3 "const_int_operand" ""))
+   (use (match_operand:SI 4 "const_int_operand" ""))
+   (use (match_operand:SI 5 "const_int_operand" ""))]
+  ""
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
+			 operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strmov"
+  [(set (match_dup 4) (match_operand 3 "memory_operand" ""))
+   (set (match_operand 1 "memory_operand" "") (match_dup 4))
+   (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));
+
+  /* If .md ever supports :P for Pmode, these can be directly
+     in the pattern above.  */
+  operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
+  operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
+
+  /* Can't use this if the user has appropriated esi or edi.  */
+  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+    {
+      emit_insn (gen_strmov_singleop (operands[0], operands[1],
+				      operands[2], operands[3],
+				      operands[5], operands[6]));
+      DONE;
+    }
+
+  operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
+})
+
+(define_expand "strmov_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand" "")
+		   (match_operand 3 "memory_operand" ""))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 4 "" ""))
+	      (set (match_operand 2 "register_operand" "")
+		   (match_operand 5 "" ""))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strmovdi_rex_1"
+  [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
+	(mem:DI (match_operand:DI 3 "register_operand" "1")))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 2)
+		 (const_int 8)))
+   (set (match_operand:DI 1 "register_operand" "=S")
+	(plus:DI (match_dup 3)
+		 (const_int 8)))]
+  "TARGET_64BIT
+   && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "movsq"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "DI")])
+
+(define_insn "*strmovsi_1"
+  [(set (mem:SI (match_operand:P 2 "register_operand" "0"))
+	(mem:SI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 4)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 4)))]
+  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*strmovhi_1"
+  [(set (mem:HI (match_operand:P 2 "register_operand" "0"))
+	(mem:HI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 2)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 2)))]
+  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "movsw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strmovqi_1"
+  [(set (mem:QI (match_operand:P 2 "register_operand" "0"))
+	(mem:QI (match_operand:P 3 "register_operand" "1")))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 2)
+		(const_int 1)))
+   (set (match_operand:P 1 "register_operand" "=S")
+	(plus:P (match_dup 3)
+		(const_int 1)))]
+  "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "movsb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "both")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_expand "rep_mov"
+  [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 5 "" ""))
+	      (set (match_operand 2 "register_operand" "")
+		   (match_operand 6 "" ""))
+	      (set (match_operand 1 "memory_operand" "")
+		   (match_operand 3 "memory_operand" ""))
+	      (use (match_dup 4))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_movdi_rex64"
+  [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+			    (const_int 3))
+		 (match_operand:DI 3 "register_operand" "0")))
+   (set (match_operand:DI 1 "register_operand" "=S")
+        (plus:DI (ashift:DI (match_dup 5) (const_int 3))
+		 (match_operand:DI 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "TARGET_64BIT
+   && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "rep{%;} movsq"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "DI")])
+
+(define_insn "*rep_movsi"
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 5 "register_operand" "2")
+			  (const_int 2))
+		 (match_operand:P 3 "register_operand" "0")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (ashift:P (match_dup 5) (const_int 2))
+		(match_operand:P 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "rep{%;} movs{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi"
+  [(set (match_operand:P 2 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (match_operand:P 3 "register_operand" "0")
+		(match_operand:P 5 "register_operand" "2")))
+   (set (match_operand:P 1 "register_operand" "=S")
+        (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5)))
+   (set (mem:BLK (match_dup 3))
+	(mem:BLK (match_dup 4)))
+   (use (match_dup 5))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "rep{%;} movsb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "both")
+   (set_attr "mode" "QI")])
+
+(define_expand "setmem<mode>"
+   [(use (match_operand:BLK 0 "memory_operand" ""))
+    (use (match_operand:SWI48 1 "nonmemory_operand" ""))
+    (use (match_operand 2 "const_int_operand" ""))
+    (use (match_operand 3 "const_int_operand" ""))
+    (use (match_operand:SI 4 "const_int_operand" ""))
+    (use (match_operand:SI 5 "const_int_operand" ""))]
+  ""
+{
+ if (ix86_expand_setmem (operands[0], operands[1],
+			 operands[2], operands[3],
+			 operands[4], operands[5]))
+   DONE;
+ else
+   FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strset"
+  [(set (match_operand 1 "memory_operand" "")
+	(match_operand 2 "register_operand" ""))
+   (parallel [(set (match_operand 0 "register_operand" "")
+		   (match_dup 3))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
+    operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
+
+  /* If .md ever supports :P for Pmode, this can be directly
+     in the pattern above.  */
+  operands[3] = gen_rtx_PLUS (Pmode, operands[0],
+			      GEN_INT (GET_MODE_SIZE (GET_MODE
+						      (operands[2]))));
+  /* Can't use this if the user has appropriated eax or edi.  */
+  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+      && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]))
+    {
+      emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
+				      operands[3]));
+      DONE;
+    }
+})
+
+(define_expand "strset_singleop"
+  [(parallel [(set (match_operand 1 "memory_operand" "")
+		   (match_operand 2 "register_operand" ""))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 3 "" ""))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strsetdi_rex_1"
+  [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
+	(match_operand:DI 2 "register_operand" "a"))
+   (set (match_operand:DI 0 "register_operand" "=D")
+	(plus:DI (match_dup 1)
+		 (const_int 8)))]
+  "TARGET_64BIT
+   && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "stosq"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*strsetsi_1"
+  [(set (mem:SI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 4)))]
+  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*strsethi_1"
+  [(set (mem:HI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:HI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 2)))]
+  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "stosw"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set_attr "mode" "HI")])
+
+(define_insn "*strsetqi_1"
+  [(set (mem:QI (match_operand:P 1 "register_operand" "0"))
+	(match_operand:QI 2 "register_operand" "a"))
+   (set (match_operand:P 0 "register_operand" "=D")
+	(plus:P (match_dup 1)
+		(const_int 1)))]
+  "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])"
+  "stosb"
+  [(set_attr "type" "str")
+   (set_attr "memory" "store")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_expand "rep_stos"
+  [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0))
+	      (set (match_operand 0 "register_operand" "")
+		   (match_operand 4 "" ""))
+	      (set (match_operand 2 "memory_operand" "") (const_int 0))
+	      (use (match_operand 3 "register_operand" ""))
+	      (use (match_dup 1))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_stosdi_rex64"
+  [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:DI 0 "register_operand" "=D")
+        (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+			    (const_int 3))
+		 (match_operand:DI 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:DI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "TARGET_64BIT
+   && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "rep{%;} stosq"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+(define_insn "*rep_stossi"
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (ashift:P (match_operand:P 4 "register_operand" "1")
+			  (const_int 2))
+		 (match_operand:P 3 "register_operand" "0")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:SI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "rep{%;} stos{l|d}"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "*rep_stosqi"
+  [(set (match_operand:P 1 "register_operand" "=c") (const_int 0))
+   (set (match_operand:P 0 "register_operand" "=D")
+        (plus:P (match_operand:P 3 "register_operand" "0")
+		(match_operand:P 4 "register_operand" "1")))
+   (set (mem:BLK (match_dup 3))
+	(const_int 0))
+   (use (match_operand:QI 2 "register_operand" "a"))
+   (use (match_dup 4))]
+  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "rep{%;} stosb"
+  [(set_attr "type" "str")
+   (set_attr "prefix_rep" "1")
+   (set_attr "memory" "store")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "mode" "QI")])
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(compare:SI (match_operand:BLK 1 "general_operand" "")
+		    (match_operand:BLK 2 "general_operand" "")))
+   (use (match_operand 3 "general_operand" ""))
+   (use (match_operand 4 "immediate_operand" ""))]
+  ""
+{
+  rtx addr1, addr2, out, outlow, count, countreg, align;
+
+  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+    FAIL;
+
+  /* Can't use this if the user has appropriated ecx, esi or edi.  */
+  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
+    FAIL;
+
+  out = operands[0];
+  if (!REG_P (out))
+    out = gen_reg_rtx (SImode);
+
+  addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
+  if (addr1 != XEXP (operands[1], 0))
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  if (addr2 != XEXP (operands[2], 0))
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+
+  count = operands[3];
+  countreg = ix86_zero_extend_to_Pmode (count);
+
+  /* %%% Iff we are testing strict equality, we can use known alignment
+     to good advantage.  This may be possible with combine, particularly
+     once cc0 is dead.  */
+  align = operands[4];
+
+  if (CONST_INT_P (count))
+    {
+      if (INTVAL (count) == 0)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
+				     operands[1], operands[2]));
+    }
+  else
+    {
+      rtx (*gen_cmp) (rtx, rtx);
+
+      gen_cmp = (TARGET_64BIT
+		 ? gen_cmpdi_1 : gen_cmpsi_1);
+
+      emit_insn (gen_cmp (countreg, countreg));
+      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
+				  operands[1], operands[2]));
+    }
+
+  outlow = gen_lowpart (QImode, out);
+  emit_insn (gen_cmpintqi (outlow));
+  emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
+
+  if (operands[0] != out)
+    emit_move_insn (operands[0], out);
+
+  DONE;
+})
+
+;; Produce a tri-state integer (-1, 0, 1) from condition codes.
+
+(define_expand "cmpintqi"
+  [(set (match_dup 1)
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_dup 2)
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (minus:QI (match_dup 1)
+			     (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  operands[1] = gen_reg_rtx (QImode);
+  operands[2] = gen_reg_rtx (QImode);
+})
+
+;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
+;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
+
+(define_expand "cmpstrnqi_nz_1"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_operand 4 "memory_operand" "")
+			       (match_operand 5 "memory_operand" "")))
+	      (use (match_operand 2 "register_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (clobber (match_operand 0 "register_operand" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_nz_1"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
+		    (mem:BLK (match_operand:P 5 "register_operand" "1"))))
+   (use (match_operand:P 6 "register_operand" "2"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:P 0 "register_operand" "=S"))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (match_operand:P 2 "register_operand" "=c"))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "repz{%;} cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "prefix_rep" "1")])
+
+;; The same, but the count is not known to not be zero.
+
+(define_expand "cmpstrnqi_1"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		(if_then_else:CC (ne (match_operand 2 "register_operand" "")
+				     (const_int 0))
+		  (compare:CC (match_operand 4 "memory_operand" "")
+			      (match_operand 5 "memory_operand" ""))
+		  (const_int 0)))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (use (reg:CC FLAGS_REG))
+	      (clobber (match_operand 0 "register_operand" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (match_dup 2))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_1"
+  [(set (reg:CC FLAGS_REG)
+	(if_then_else:CC (ne (match_operand:P 6 "register_operand" "2")
+			     (const_int 0))
+	  (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0"))
+		      (mem:BLK (match_operand:P 5 "register_operand" "1")))
+	  (const_int 0)))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (use (reg:CC FLAGS_REG))
+   (clobber (match_operand:P 0 "register_operand" "=S"))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (match_operand:P 2 "register_operand" "=c"))]
+  "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])"
+  "repz{%;} cmpsb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "prefix_rep" "1")])
+
+(define_expand "strlen<mode>"
+  [(set (match_operand:P 0 "register_operand" "")
+	(unspec:P [(match_operand:BLK 1 "general_operand" "")
+		   (match_operand:QI 2 "immediate_operand" "")
+		   (match_operand 3 "immediate_operand" "")]
+		  UNSPEC_SCAS))]
+  ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+   DONE;
+ else
+   FAIL;
+})
+
+(define_expand "strlenqi_1"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		   (match_operand 2 "" ""))
+	      (clobber (match_operand 1 "register_operand" ""))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strlenqi_1"
+  [(set (match_operand:P 0 "register_operand" "=&c")
+	(unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1"))
+		   (match_operand:QI 2 "register_operand" "a")
+		   (match_operand:P 3 "immediate_operand" "i")
+		   (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS))
+   (clobber (match_operand:P 1 "register_operand" "=D"))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])"
+  "repnz{%;} scasb"
+  [(set_attr "type" "str")
+   (set_attr "mode" "QI")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (ne (symbol_ref "<P:MODE>mode == DImode") (const_int 0))
+	  (const_string "0")
+	  (const_string "*")))
+   (set_attr "prefix_rep" "1")])
+
+;; Peephole optimizations to clean up after cmpstrn*.  This should be
+;; handled in combine, but it is not currently up to the task.
+;; When used for their truth value, the cmpstrn* expanders generate
+;; code like this:
+;;
+;;   repz cmpsb
+;;   seta 	%al
+;;   setb 	%dl
+;;   cmpb 	%al, %dl
+;;   jcc	label
+;;
+;; The intermediate three instructions are unnecessary.
+
+;; This one handles cmpstrn*_nz_1...
+(define_peephole2
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+		      (mem:BLK (match_operand 5 "register_operand" ""))))
+     (use (match_operand 6 "register_operand" ""))
+     (use (match_operand:SI 3 "immediate_operand" ""))
+     (clobber (match_operand 0 "register_operand" ""))
+     (clobber (match_operand 1 "register_operand" ""))
+     (clobber (match_operand 2 "register_operand" ""))])
+   (set (match_operand:QI 7 "register_operand" "")
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_operand:QI 8 "register_operand" "")
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (reg FLAGS_REG)
+	(compare (match_dup 7) (match_dup 8)))
+  ]
+  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (compare:CC (mem:BLK (match_dup 4))
+		      (mem:BLK (match_dup 5))))
+     (use (match_dup 6))
+     (use (match_dup 3))
+     (clobber (match_dup 0))
+     (clobber (match_dup 1))
+     (clobber (match_dup 2))])])
+
+;; ...and this one handles cmpstrn*_1.
+(define_peephole2
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (if_then_else:CC (ne (match_operand 6 "register_operand" "")
+			       (const_int 0))
+	    (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+		        (mem:BLK (match_operand 5 "register_operand" "")))
+	    (const_int 0)))
+     (use (match_operand:SI 3 "immediate_operand" ""))
+     (use (reg:CC FLAGS_REG))
+     (clobber (match_operand 0 "register_operand" ""))
+     (clobber (match_operand 1 "register_operand" ""))
+     (clobber (match_operand 2 "register_operand" ""))])
+   (set (match_operand:QI 7 "register_operand" "")
+	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (match_operand:QI 8 "register_operand" "")
+	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+   (set (reg FLAGS_REG)
+	(compare (match_dup 7) (match_dup 8)))
+  ]
+  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+  [(parallel[
+     (set (reg:CC FLAGS_REG)
+	  (if_then_else:CC (ne (match_dup 6)
+			       (const_int 0))
+	    (compare:CC (mem:BLK (match_dup 4))
+			(mem:BLK (match_dup 5)))
+	    (const_int 0)))
+     (use (match_dup 3))
+     (use (reg:CC FLAGS_REG))
+     (clobber (match_dup 0))
+     (clobber (match_dup 1))
+     (clobber (match_dup 2))])])
+
+;; Conditional move instructions.
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:SWIM 0 "register_operand" "")
+	(if_then_else:SWIM (match_operand 1 "ordered_comparison_operator" "")
+			   (match_operand:SWIM 2 "general_operand" "")
+			   (match_operand:SWIM 3 "general_operand" "")))]
+  ""
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
+;; the register first winds up with `sbbl $0,reg', which is also weird.
+;; So just document what we're doing explicitly.
+
+(define_expand "x86_mov<mode>cc_0_m1"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand" "")
+	  (if_then_else:SWI48
+	    (match_operator:SWI48 2 "ix86_carry_flag_operator"
+	     [(match_operand 1 "flags_reg_operand" "")
+	      (const_int 0)])
+	    (const_int -1)
+	    (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])])
+
+(define_insn "*x86_mov<mode>cc_0_m1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator"
+			     [(reg FLAGS_REG) (const_int 0)])
+	  (const_int -1)
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
+  ; Since we don't have the proper number of operands for an alu insn,
+  ; fill in all the blanks.
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_mov<mode>cc_0_m1_se"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator"
+			     [(reg FLAGS_REG) (const_int 0)])
+			    (const_int 1)
+			    (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_mov<mode>cc_0_m1_neg"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(neg:SWI48 (match_operator 1 "ix86_carry_flag_operator"
+		    [(reg FLAGS_REG) (const_int 0)])))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{<imodesuffix>}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length_immediate" "0")])
+
+(define_insn "*mov<mode>cc_noc"
+  [(set (match_operand:SWI248 0 "register_operand" "=r,r")
+	(if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+			       [(reg FLAGS_REG) (const_int 0)])
+	  (match_operand:SWI248 2 "nonimmediate_operand" "rm,0")
+	  (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))]
+  "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*movqicc_noc"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
+			   [(match_operand 4 "flags_reg_operand" "")
+			    (const_int 0)])
+		      (match_operand:QI 2 "register_operand" "r,0")
+		      (match_operand:QI 3 "register_operand" "0,r")))]
+  "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+		      (match_dup 2)
+		      (match_dup 3)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[2] = gen_lowpart (SImode, operands[2]);
+   operands[3] = gen_lowpart (SImode, operands[3]);"
+  [(set_attr "type" "icmov")
+   (set_attr "mode" "SI")])
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+	(if_then_else:X87MODEF
+	  (match_operand 1 "ix86_fp_comparison_operator" "")
+	  (match_operand:X87MODEF 2 "register_operand" "")
+	  (match_operand:X87MODEF 3 "register_operand" "")))]
+  "(TARGET_80387 && TARGET_CMOVE)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "*movxfcc_1"
+  [(set (match_operand:XF 0 "register_operand" "=f,f")
+	(if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:XF 2 "register_operand" "f,0")
+		      (match_operand:XF 3 "register_operand" "0,f")))]
+  "TARGET_80387 && TARGET_CMOVE"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov")
+   (set_attr "mode" "XF")])
+
+(define_insn "*movdfcc_1_rex64"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+   (set_attr "mode" "DF,DF,DI,DI")])
+
+(define_insn "*movdfcc_1"
+  [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   #
+   #"
+  [(set_attr "type" "fcmov,fcmov,multi,multi")
+   (set_attr "mode" "DF,DF,DI,DI")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
+	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+				[(match_operand 4 "flags_reg_operand" "")
+				 (const_int 0)])
+		      (match_operand:DF 2 "nonimmediate_operand" "")
+		      (match_operand:DF 3 "nonimmediate_operand" "")))]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+		      (match_dup 5)
+		      (match_dup 6)))
+   (set (match_dup 3)
+	(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+		      (match_dup 7)
+		      (match_dup 8)))]
+{
+  split_double_mode (DImode, &operands[2], 2, &operands[5], &operands[7]);
+  split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
+})
+
+(define_insn "*movsfcc_1_387"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
+	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+		      (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "@
+   fcmov%F1\t{%2, %0|%0, %2}
+   fcmov%f1\t{%3, %0|%0, %3}
+   cmov%O2%C1\t{%2, %0|%0, %2}
+   cmov%O2%c1\t{%3, %0|%0, %3}"
+  [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+   (set_attr "mode" "SF,SF,SI,SI")])
+
+;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; XOP conditional move
+(define_insn "*xop_pcmov_<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(if_then_else:MODEF
+	  (match_operand:MODEF 1 "register_operand" "x")
+	  (match_operand:MODEF 2 "register_operand" "x")
+	  (match_operand:MODEF 3 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+  [(set_attr "type" "sse4arg")])
+
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smaxmin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%x")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "v<maxmin_float>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smaxmin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "<maxmin_float>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*avx_ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmins<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmaxs<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+;; Make two stack loads independent:
+;;   fld aa              fld aa
+;;   fld %st(0)     ->   fld bb
+;;   fmul bb             fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+  [(set (match_operand 0 "fp_register_operand" "")
+	(match_operand 1 "fp_register_operand" ""))
+   (set (match_dup 0)
+	(match_operator 2 "binary_fp_operator"
+	   [(match_dup 0)
+	    (match_operand 3 "memory_operand" "")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+
+  ;; The % modifier is not operational anymore in peephole2's, so we have to
+  ;; swap the operands manually in the case of addition and multiplication.
+  "if (COMMUTATIVE_ARITH_P (operands[2]))
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]),
+				   GET_MODE (operands[2]),
+				   operands[0], operands[1]);
+   else
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]),
+				   GET_MODE (operands[2]),
+				   operands[1], operands[0]);")
+
+;; Conditional addition patterns
+(define_expand "add<mode>cc"
+  [(match_operand:SWI 0 "register_operand" "")
+   (match_operand 1 "ordered_comparison_operator" "")
+   (match_operand:SWI 2 "register_operand" "")
+   (match_operand:SWI 3 "const_int_operand" "")]
+  ""
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
+
+;; Misc patterns (?)
+
+;; This pattern exists to put a dependency on all ebp-based memory accesses.
+;; Otherwise there will be nothing to keep
+;;
+;; [(set (reg ebp) (reg esp))]
+;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
+;;  (clobber (eflags)]
+;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
+;;
+;; in proper program order.
+
+(define_insn "pro_epilogue_adjust_stack_<mode>_add"
+  [(set (match_operand:P 0 "register_operand" "=r,r")
+	(plus:P (match_operand:P 1 "register_operand" "0,r")
+	        (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOV:
+      return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+    case TYPE_ALU:
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+    default:
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{<imodesuffix>}\t{%a2, %0|%0, %a2}";
+    }
+}
+  [(set (attr "type")
+	(cond [(and (eq_attr "alternative" "0")
+		    (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
+		 (const_string "alu")
+	       (match_operand:<MODE> 2 "const0_operand" "")
+		 (const_string "imov")
+	      ]
+	      (const_string "lea")))
+   (set (attr "length_immediate")
+	(cond [(eq_attr "type" "imov")
+		 (const_string "0")
+	       (and (eq_attr "type" "alu")
+		    (match_operand 2 "const128_operand" ""))
+		 (const_string "1")
+	      ]
+	      (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "pro_epilogue_adjust_stack_<mode>_sub"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(minus:P (match_operand:P 1 "register_operand" "0")
+		 (match_operand:P 2 "register_operand" "r")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "allocate_stack_worker_probe_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
+			    UNSPECV_STACK_PROBE))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_target_stack_probe ()"
+  "call\t___chkstk_ms"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5")])
+
+(define_expand "allocate_stack"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "general_operand" "")]
+  "ix86_target_stack_probe ()"
+{
+  rtx x;
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT 0
+#endif
+
+  if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
+      && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
+    {
+      x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1],
+			       stack_pointer_rtx, 0, OPTAB_DIRECT);
+      if (x != stack_pointer_rtx)
+	emit_move_insn (stack_pointer_rtx, x);
+    }
+  else
+    {
+      x = copy_to_mode_reg (Pmode, operands[1]);
+      if (TARGET_64BIT)
+        emit_insn (gen_allocate_stack_worker_probe_di (x, x));
+      else
+        emit_insn (gen_allocate_stack_worker_probe_si (x, x));
+      x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x,
+			       stack_pointer_rtx, 0, OPTAB_DIRECT);
+      if (x != stack_pointer_rtx)
+	emit_move_insn (stack_pointer_rtx, x);
+    }
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+})
+
+;; Use IOR for stack probes, this is shorter.
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  ""
+{
+  rtx (*gen_ior3) (rtx, rtx, rtx);
+
+  gen_ior3 = (GET_MODE (operands[0]) == DImode
+	      ? gen_iordi3 : gen_iorsi3);
+
+  emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx));
+  DONE;
+})
+
+(define_insn "adjust_stack_and_probe<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")]
+			    UNSPECV_PROBE_STACK_RANGE))
+   (set (reg:P SP_REG)
+        (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n")))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "* return output_adjust_stack_and_probe (operands[0]);"
+  [(set_attr "type" "multi")])
+
+(define_insn "probe_stack_range<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "const_int_operand" "n")]
+			    UNSPECV_PROBE_STACK_RANGE))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "multi")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "!TARGET_64BIT && flag_pic"
+{
+#if TARGET_MACHO
+  if (TARGET_MACHO)
+    {
+      rtx xops[3];
+      rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+      rtx label_rtx = gen_label_rtx ();
+      emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+      xops[0] = xops[1] = picreg;
+      xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
+      ix86_expand_binary_operator (MINUS, SImode, xops);
+    }
+  else
+#endif
+    emit_insn (gen_set_got (pic_offset_table_rtx));
+  DONE;
+})
+
+;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operator 3 "promotable_binary_operator"
+	   [(match_operand 1 "register_operand" "")
+	    (match_operand 2 "aligned_operand" "")]))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && ((GET_MODE (operands[0]) == HImode
+	&& ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
+            /* ??? next two lines just !satisfies_constraint_K (...) */
+	    || !CONST_INT_P (operands[2])
+	    || satisfies_constraint_K (operands[2])))
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);
+   if (GET_CODE (operands[3]) != ASHIFT)
+     operands[2] = gen_lowpart (SImode, operands[2]);
+   PUT_MODE (operands[3], SImode);")
+
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 2 "compare_operator"
+	  [(and (match_operand 3 "aligned_operand" "")
+		(match_operand 4 "const_int_operand" ""))
+	   (const_int 0)]))
+   (set (match_operand 1 "register_operand" "")
+	(and (match_dup 3) (match_dup 4)))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && optimize_insn_for_speed_p ()
+   && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
+       || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
+			            (const_int 0)]))
+	      (set (match_dup 1)
+		   (and:SI (match_dup 3) (match_dup 4)))])]
+{
+  operands[4]
+    = gen_int_mode (INTVAL (operands[4])
+		    & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+})
+
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
+(define_split
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and (match_operand:HI 2 "aligned_operand" "")
+		(match_operand:HI 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && ! TARGET_FAST_PREFIX
+   && optimize_insn_for_speed_p ()
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
+  [(set (match_dup 0)
+	(match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+		         (const_int 0)]))]
+{
+  operands[3]
+    = gen_int_mode (INTVAL (operands[3])
+		    & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(neg (match_operand 1 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(parallel [(set (match_dup 0)
+		   (neg:SI (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(not (match_operand 1 "register_operand" "")))]
+  "! TARGET_PARTIAL_REG_STALL && reload_completed
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(set (match_dup 0)
+	(not:SI (match_dup 1)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(if_then_else (match_operator 1 "ordered_comparison_operator"
+				[(reg FLAGS_REG) (const_int 0)])
+		      (match_operand 2 "register_operand" "")
+		      (match_operand 3 "register_operand" "")))]
+  "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE
+   && (GET_MODE (operands[0]) == HImode
+       || (GET_MODE (operands[0]) == QImode
+	   && (TARGET_PROMOTE_QImode
+	       || optimize_insn_for_size_p ())))"
+  [(set (match_dup 0)
+	(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  "operands[0] = gen_lowpart (SImode, operands[0]);
+   operands[2] = gen_lowpart (SImode, operands[2]);
+   operands[3] = gen_lowpart (SImode, operands[3]);")
+
+;; RTL Peephole optimizations, run before sched2.  These primarily look to
+;; transform a complex memory operation into two memory to register operations.
+
+;; Don't push memory operands
+(define_peephole2
+  [(set (match_operand:SWI 0 "push_operand" "")
+	(match_operand:SWI 1 "memory_operand" ""))
+   (match_scratch:SWI 2 "<r>")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; We need to handle SFmode only, because DFmode and XFmode are split to
+;; SImode pushes.
+(define_peephole2
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))
+   (match_scratch:SF 2 "r")]
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; Don't move an immediate directly to memory when the instruction
+;; gets too big.
+(define_peephole2
+  [(match_scratch:SWI124 1 "<r>")
+   (set (match_operand:SWI124 0 "memory_operand" "")
+        (const_int 0))]
+  "optimize_insn_for_speed_p ()
+   && !TARGET_USE_MOV0
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 2) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 1))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+  [(match_scratch:SWI124 2 "<r>")
+   (set (match_operand:SWI124 0 "memory_operand" "")
+        (match_operand:SWI124 1 "immediate_operand" ""))]
+  "optimize_insn_for_speed_p ()
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; Don't compare memory with zero, load and use a test instead.
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+ 	(match_operator 1 "compare_operator"
+	  [(match_operand:SI 2 "memory_operand" "")
+	   (const_int 0)]))
+   (match_scratch:SI 3 "r")]
+  "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))])
+
+;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
+;; Don't split NOTs with a displacement operand, because resulting XOR
+;; will not be pairable anyway.
+;;
+;; On AMD K6, NOT is vector decoded with memory operand that cannot be
+;; represented using a modRM byte.  The XOR replacement is long decoded,
+;; so this split helps here as well.
+;;
+;; Note: Can't do this as a regular split because we can't get proper
+;; lifetime information then.
+
+(define_peephole2
+  [(set (match_operand:SWI124 0 "nonimmediate_operand" "")
+	(not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand" "")))]
+  "optimize_insn_for_speed_p ()
+   && ((TARGET_NOT_UNPAIRABLE
+	&& (!MEM_P (operands[0])
+	    || !memory_displacement_operand (operands[0], <MODE>mode)))
+       || (TARGET_NOT_VECTORMODE
+	   && long_memory_operand (operands[0], <MODE>mode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0)
+		   (xor:SWI124 (match_dup 1) (const_int -1)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; Non pairable "test imm, reg" instructions can be translated to
+;; "and imm, reg" if reg dies.  The "and" form is also shorter (one
+;; byte opcode instead of two, have a short form for byte operands),
+;; so do it for other CPUs as well.  Given that the value was dead,
+;; this should not create any new dependencies.  Pass on the sub-word
+;; versions if we're concerned about partial register stalls.
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and:SI (match_operand:SI 2 "register_operand" "")
+		   (match_operand:SI 3 "immediate_operand" ""))
+	   (const_int 0)]))]
+  "ix86_match_ccmode (insn, CCNOmode)
+   && (true_regnum (operands[2]) != AX_REG
+       || satisfies_constraint_K (operands[3]))
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel
+     [(set (match_dup 0)
+	   (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+		            (const_int 0)]))
+      (set (match_dup 2)
+	   (and:SI (match_dup 2) (match_dup 3)))])])
+
+;; We don't need to handle HImode case, because it will be promoted to SImode
+;; on ! TARGET_PARTIAL_REG_STALL
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and:QI (match_operand:QI 2 "register_operand" "")
+		   (match_operand:QI 3 "immediate_operand" ""))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL
+   && ix86_match_ccmode (insn, CCNOmode)
+   && true_regnum (operands[2]) != AX_REG
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel
+     [(set (match_dup 0)
+	   (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+		            (const_int 0)]))
+      (set (match_dup 2)
+	   (and:QI (match_dup 2) (match_dup 3)))])])
+
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(and:SI
+	     (zero_extract:SI
+	       (match_operand 2 "ext_register_operand" "")
+	       (const_int 8)
+	       (const_int 8))
+	     (match_operand 3 "const_int_operand" ""))
+	   (const_int 0)]))]
+  "! TARGET_PARTIAL_REG_STALL
+   && ix86_match_ccmode (insn, CCNOmode)
+   && true_regnum (operands[2]) != AX_REG
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 1
+		     [(and:SI
+			(zero_extract:SI
+			  (match_dup 2)
+			  (const_int 8)
+			  (const_int 8))
+			(match_dup 3))
+		      (const_int 0)]))
+	      (set (zero_extract:SI (match_dup 2)
+				    (const_int 8)
+				    (const_int 8))
+		   (and:SI
+		     (zero_extract:SI
+		       (match_dup 2)
+		       (const_int 8)
+		       (const_int 8))
+		     (match_dup 3)))])])
+
+;; Don't do logical operations with memory inputs.
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 1 "memory_operand" "")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
+              (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_operand:SI 1 "memory_operand" "")
+                      (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  [(set (match_dup 2) (match_dup 1))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])])
+
+;; Prefer Load+RegOp to Mov+MemOp.  Watch out for cases when the memory address
+;; refers to the destination of the load!
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (match_dup 0)
+                   (match_operator:SI 3 "commutative_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 2 "memory_operand" "")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "REGNO (operands[0]) != REGNO (operands[1])
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 0) (match_dup 4))
+   (parallel [(set (match_dup 0)
+                   (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);")
+
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+        (match_operand 1 "register_operand" ""))
+   (set (match_dup 0)
+                   (match_operator 3 "commutative_operator"
+                     [(match_dup 0)
+                      (match_operand 2 "memory_operand" "")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])
+   && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) 
+       || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0)
+        (match_op_dup 3 [(match_dup 0) (match_dup 1)]))])
+
+; Don't do logical operations with memory outputs
+;
+; These two don't make sense for PPro/PII -- we're expanding a 4-uop
+; instruction into two 1-uop insns plus a 2-uop insn.  That last has
+; the same decoder scheduling characteristics as the original.
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_dup 0)
+                      (match_operand:SI 1 "nonmemory_operand" "")]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel [(set (match_dup 2)
+                   (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 2))])
+
+(define_peephole2
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand" "")
+                   (match_operator:SI 3 "arith_or_logical_operator"
+                     [(match_operand:SI 1 "nonmemory_operand" "")
+                      (match_dup 0)]))
+              (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+   /* Do not split stack checking probes.  */
+   && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
+  [(set (match_dup 2) (match_dup 0))
+   (parallel [(set (match_dup 2)
+                   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0) (match_dup 2))])
+
+;; Attempt to always use XOR for zeroing registers.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "const0_operand" ""))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+   && GENERAL_REG_P (operands[0])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[0] = gen_lowpart (word_mode, operands[0]);")
+
+(define_peephole2
+  [(set (strict_low_part (match_operand 0 "register_operand" ""))
+	(const_int 0))]
+  "(GET_MODE (operands[0]) == QImode
+    || GET_MODE (operands[0]) == HImode)
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg.
+(define_peephole2
+  [(set (match_operand:SWI248 0 "register_operand" "")
+	(const_int -1))]
+  "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (const_int -1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (GET_MODE_SIZE (<MODE>mode) < GET_MODE_SIZE (SImode))
+    operands[0] = gen_lowpart (SImode, operands[0]);
+})
+
+;; Attempt to convert simple lea to add/shift.
+;; These can be created by move expanders.
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand" "")
+  	(plus:SWI48 (match_dup 0)
+		    (match_operand:SWI48 1 "<nonmemory_operand>" "")))]
+  "peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+  	(subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "nonmemory_operand" "")) 0))]
+  "TARGET_64BIT
+   && peep2_regno_dead_p (0, FLAGS_REG)
+   && REGNO (operands[0]) == REGNO (operands[1])"
+  [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_peephole2
+  [(set (match_operand:SWI48 0 "register_operand" "")
+  	(mult:SWI48 (match_dup 0)
+		    (match_operand:SWI48 1 "const_int_operand" "")))]
+  "exact_log2 (INTVAL (operands[1])) >= 0
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+  	(subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand:DI 2 "const_int_operand" "")) 0))]
+  "TARGET_64BIT
+   && exact_log2 (INTVAL (operands[2])) >= 0
+   && REGNO (operands[0]) == REGNO (operands[1])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+;; The ESP adjustments can be done by the push and pop instructions.  Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
+;; On many CPUs it is also faster, since special hardware to avoid esp
+;; dependencies is present.
+
+;; While some of these conversions may be done using splitters, we use
+;; peepholes in order to allow combine_stack_adjustments pass to see
+;; nonobfuscated RTL.
+
+;; Convert prologue esp subtractions to push.
+;; We need register to push.  In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live.  First choice will also most likely in
+;; fewer dependencies.  On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead.  We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+	      (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (parallel [(set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == -2*GET_MODE_SIZE (Pmode)"
+  [(clobber (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))
+   (set (mem:P (pre_dec:P (reg:P SP_REG))) (match_dup 1))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_SINGLE_POP || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)"
+  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])])
+
+;; Two pops case is tricky, since pop causes dependency
+;; on destination register.  We use two registers if available.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (match_scratch:P 2 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ())
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])
+   (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))
+	      (clobber (mem:BLK (scratch)))])]
+  "optimize_insn_for_size_p ()
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(parallel [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+	      (clobber (mem:BLK (scratch)))])
+   (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+;; Convert esp additions to pop.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[0]) == GET_MODE_SIZE (Pmode)"
+  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+;; Two pops case is tricky, since pop causes dependency
+;; on destination register.  We use two registers if available.
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (match_scratch:P 2 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 2) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+(define_peephole2
+  [(match_scratch:P 1 "r")
+   (parallel [(set (reg:P SP_REG)
+		   (plus:P (reg:P SP_REG)
+			   (match_operand:P 0 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_size_p ()
+   && INTVAL (operands[0]) == 2*GET_MODE_SIZE (Pmode)"
+  [(set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))
+   (set (match_dup 1) (mem:P (post_inc:P (reg:P SP_REG))))])
+
+;; Convert compares with 1 to shorter inc/dec operations when CF is not
+;; required and register dies.  Similarly for 128 to -128.
+(define_peephole2
+  [(set (match_operand 0 "flags_reg_operand" "")
+	(match_operator 1 "compare_operator"
+	  [(match_operand 2 "register_operand" "")
+	   (match_operand 3 "const_int_operand" "")]))]
+  "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ())
+     && incdec_operand (operands[3], GET_MODE (operands[3])))
+    || (!TARGET_FUSE_CMP_AND_BRANCH
+	&& INTVAL (operands[3]) == 128))
+   && ix86_match_ccmode (insn, CCGCmode)
+   && peep2_reg_dead_p (1, operands[2])"
+  [(parallel [(set (match_dup 0)
+		   (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
+	      (clobber (match_dup 2))])])
+
+;; Convert imul by three, five and nine into lea
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand" "")
+	  (mult:SWI48 (match_operand:SWI48 1 "register_operand" "")
+		      (match_operand:SWI48 2 "const_int_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[2]) == 3
+   || INTVAL (operands[2]) == 5
+   || INTVAL (operands[2]) == 9"
+  [(set (match_dup 0)
+	(plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2))
+		    (match_dup 1)))]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand" "")
+	  (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:SWI48 2 "const_int_operand" "")))
+     (clobber (reg:CC FLAGS_REG))])]
+  "optimize_insn_for_speed_p ()
+   && (INTVAL (operands[2]) == 3
+       || INTVAL (operands[2]) == 5
+       || INTVAL (operands[2]) == 9)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0)
+	(plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2))
+		    (match_dup 0)))]
+  "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);")
+
+;; imul $32bit_imm, mem, reg is vector decoded, while
+;; imul $32bit_imm, reg, reg is direct decoded.
+(define_peephole2
+  [(match_scratch:SWI48 3 "r")
+   (parallel [(set (match_operand:SWI48 0 "register_operand" "")
+		   (mult:SWI48 (match_operand:SWI48 1 "memory_operand" "")
+			       (match_operand:SWI48 2 "immediate_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+  [(match_scratch:SI 3 "r")
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (zero_extend:DI
+		     (mult:SI (match_operand:SI 1 "memory_operand" "")
+			      (match_operand:SI 2 "immediate_operand" ""))))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT
+   && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+   && !satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+	      (clobber (reg:CC FLAGS_REG))])])
+
+;; imul $8/16bit_imm, regmem, reg is vector decoded.
+;; Convert it into imul reg, reg
+;; It would be better to force assembler to encode instruction using long
+;; immediate, but there is apparently no way to do so.
+(define_peephole2
+  [(parallel [(set (match_operand:SWI248 0 "register_operand" "")
+		   (mult:SWI248
+		    (match_operand:SWI248 1 "nonimmediate_operand" "")
+		    (match_operand:SWI248 2 "const_int_operand" "")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (match_scratch:SWI248 3 "r")]
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
+   && satisfies_constraint_K (operands[2])"
+  [(set (match_dup 3) (match_dup 2))
+   (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (!rtx_equal_p (operands[0], operands[1]))
+    emit_move_insn (operands[0], operands[1]);
+})
+
+;; After splitting up read-modify operations, array accesses with memory
+;; operands might end up in form:
+;;  sall    $2, %eax
+;;  movl    4(%esp), %edx
+;;  addl    %edx, %eax
+;; instead of pre-splitting:
+;;  sall    $2, %eax
+;;  addl    4(%esp), %eax
+;; Turn it into:
+;;  movl    4(%esp), %edx
+;;  leal    (%edx,%eax,4), %eax
+
+(define_peephole2
+  [(match_scratch:P 5 "r")
+   (parallel [(set (match_operand 0 "register_operand" "")
+		   (ashift (match_operand 1 "register_operand" "")
+			   (match_operand 2 "const_int_operand" "")))
+	       (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_operand 3 "register_operand" "")
+		   (plus (match_dup 0)
+			 (match_operand 4 "x86_64_general_operand" "")))
+		   (clobber (reg:CC FLAGS_REG))])]
+  "IN_RANGE (INTVAL (operands[2]), 1, 3)
+   /* Validate MODE for lea.  */
+   && ((!TARGET_PARTIAL_REG_STALL
+	&& (GET_MODE (operands[0]) == QImode
+	    || GET_MODE (operands[0]) == HImode))
+       || GET_MODE (operands[0]) == SImode
+       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+   && (rtx_equal_p (operands[0], operands[3])
+       || peep2_reg_dead_p (2, operands[0]))
+   /* We reorder load and the shift.  */
+   && !reg_overlap_mentioned_p (operands[0], operands[4])"
+  [(set (match_dup 5) (match_dup 4))
+   (set (match_dup 0) (match_dup 1))]
+{
+  enum machine_mode op1mode = GET_MODE (operands[1]);
+  enum machine_mode mode = op1mode == DImode ? DImode : SImode;
+  int scale = 1 << INTVAL (operands[2]);
+  rtx index = gen_lowpart (Pmode, operands[1]);
+  rtx base = gen_lowpart (Pmode, operands[5]);
+  rtx dest = gen_lowpart (mode, operands[3]);
+
+  operands[1] = gen_rtx_PLUS (Pmode, base,
+  			      gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+  operands[5] = base;
+  if (mode != Pmode)
+    operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+  if (op1mode != Pmode)
+    operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0);
+  operands[0] = dest;
+})
+
+;; Call-value patterns last so that the wildcard operand does not
+;; disrupt insn-recog's switch tables.
+
+(define_insn_and_split "*call_value_pop_0_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+		(match_operand:SI 2 "" "")))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 3 "immediate_operand" "")))])
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_pop_0"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "")))]
+  "!TARGET_64BIT"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_pop_1_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm"))
+		(match_operand:SI 2 "" "")))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 3 "immediate_operand" "i")))])
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_pop_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm"))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "i")))]
+  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*sibcall_value_pop_1_vzeroupper"
+ [(parallel
+   [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U"))
+		(match_operand:SI 2 "" "")))
+     (set (reg:SI SP_REG)
+	  (plus:SI (reg:SI SP_REG)
+		   (match_operand:SI 3 "immediate_operand" "i,i")))])
+   (unspec [(match_operand 4 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[4]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_pop_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U"))
+	      (match_operand:SI 2 "" "")))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 3 "immediate_operand" "i,i")))]
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_0_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+	      (match_operand:SI 2 "" "")))]
+  "!TARGET_64BIT"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_0_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))]
+  "TARGET_64BIT"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+		(match_operand:DI 2 "const_int_operand" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64_ms_sysv"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+	      (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm"))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm"))
+	      (match_operand:SI 2 "" "")))]
+  "!TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*sibcall_value_1_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U"))
+	      (match_operand:SI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U"))
+	      (match_operand:SI 2 "" "")))]
+  "!TARGET_64BIT && SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm"))
+	      (match_operand:DI 2 "" "")))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm"))
+		(match_operand:DI 2 "" "")))
+     (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+     (clobber (reg:TI XMM6_REG))
+     (clobber (reg:TI XMM7_REG))
+     (clobber (reg:TI XMM8_REG))
+     (clobber (reg:TI XMM9_REG))
+     (clobber (reg:TI XMM10_REG))
+     (clobber (reg:TI XMM11_REG))
+     (clobber (reg:TI XMM12_REG))
+     (clobber (reg:TI XMM13_REG))
+     (clobber (reg:TI XMM14_REG))
+     (clobber (reg:TI XMM15_REG))
+     (clobber (reg:DI SI_REG))
+     (clobber (reg:DI DI_REG))])
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64_ms_sysv"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:TI XMM6_REG))
+   (clobber (reg:TI XMM7_REG))
+   (clobber (reg:TI XMM8_REG))
+   (clobber (reg:TI XMM9_REG))
+   (clobber (reg:TI XMM10_REG))
+   (clobber (reg:TI XMM11_REG))
+   (clobber (reg:TI XMM12_REG))
+   (clobber (reg:TI XMM13_REG))
+   (clobber (reg:TI XMM14_REG))
+   (clobber (reg:TI XMM15_REG))
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*call_value_1_rex64_large_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64_large"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+	      (match_operand:DI 2 "" "")))]
+  "TARGET_64BIT && !SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "z,U"))
+	      (match_operand:DI 2 "" "")))
+   (unspec [(match_operand 3 "const_int_operand" "")]
+   	   UNSPEC_CALL_NEEDS_VZEROUPPER)]
+  "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+  [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "z,U"))
+	      (match_operand:DI 2 "" "")))]
+  "TARGET_64BIT && SIBLING_CALL_P (insn)"
+  { return ix86_output_call_insn (insn, operands[1], 1); }
+  [(set_attr "type" "callv")])
+
+;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
+;; That, however, is usually mapped by the OS to SIGSEGV, which is often
+;; caught for use by garbage collectors and the like.  Using an insn that
+;; maps to SIGILL makes it more likely the program will rightfully die.
+;; Keeping with tradition, "6" is in honor of #UD.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 6))]
+  ""
+  { return ASM_SHORT "0x0b0f"; }
+  [(set_attr "length" "2")])
+
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "")
+	     (match_operand:SI 1 "const_int_operand" "")
+	     (match_operand:SI 2 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+{
+  int rw = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (rw == 0 || rw == 1);
+  gcc_assert (locality >= 0 && locality <= 3);
+  gcc_assert (GET_MODE (operands[0]) == Pmode
+	      || GET_MODE (operands[0]) == VOIDmode);
+
+  /* Use 3dNOW prefetch in case we are asking for write prefetch not
+     supported by SSE counterpart or the SSE prefetch is not available
+     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+     of locality.  */
+  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+    operands[2] = GEN_INT (3);
+  else
+    operands[1] = const0_rtx;
+})
+
+(define_insn "*prefetch_sse_<mode>"
+  [(prefetch (match_operand:P 0 "address_operand" "p")
+	     (const_int 0)
+	     (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE"
+{
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
+
+  int locality = INTVAL (operands[1]);
+  gcc_assert (locality >= 0 && locality <= 3);
+
+  return patterns[locality];
+}
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0])"))
+   (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow_<mode>"
+  [(prefetch (match_operand:P 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (const_int 3))]
+  "TARGET_3DNOW"
+{
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
+}
+  [(set_attr "type" "mmx")
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0])"))
+   (set_attr "memory" "none")])
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+  rtx (*insn)(rtx, rtx);
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
+  insn = (TARGET_64BIT
+	  ? gen_stack_tls_protect_set_di
+	  : gen_stack_tls_protect_set_si);
+#else
+  insn = (TARGET_64BIT
+	  ? gen_stack_protect_set_di
+	  : gen_stack_protect_set_si);
+#endif
+
+  emit_insn (insn (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_set_<mode>"
+  [(set (match_operand:P 0 "memory_operand" "=m")
+	(unspec:P [(match_operand:P 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:P 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_<mode>"
+  [(set (match_operand:P 0 "memory_operand" "=m")
+	(unspec:P [(match_operand:P 1 "const_int_operand" "i")]
+		  UNSPEC_SP_TLS_SET))
+   (set (match_scratch:P 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{<imodesuffix>}\t{%@:%P1, %2|%2, <iptrsize> PTR %@:%P1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+  [(set_attr "type" "multi")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+  rtx (*insn)(rtx, rtx, rtx);
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET);
+  insn = (TARGET_64BIT
+	  ? gen_stack_tls_protect_test_di
+	  : gen_stack_tls_protect_test_si);
+#else
+  insn = (TARGET_64BIT
+	  ? gen_stack_protect_test_di
+	  : gen_stack_protect_test_si);
+#endif
+
+  emit_insn (insn (flags, operands[0], operands[1]));
+
+  emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+				  flags, const0_rtx, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test_<mode>"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+	(unspec:CCZ [(match_operand:P 1 "memory_operand" "m")
+		     (match_operand:P 2 "memory_operand" "m")]
+		    UNSPEC_SP_TEST))
+   (clobber (match_scratch:P 3 "=&r"))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_<mode>"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+	(unspec:CCZ [(match_operand:P 1 "memory_operand" "m")
+		     (match_operand:P 2 "const_int_operand" "i")]
+		    UNSPEC_SP_TLS_TEST))
+   (clobber (match_scratch:P 3 "=r"))]
+  ""
+  "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%@:%P2, %3|%3, <iptrsize> PTR %@:%P2}"
+  [(set_attr "type" "multi")])
+
+(define_insn "sse4_2_crc32<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")]
+	  UNSPEC_CRC32))]
+  "TARGET_SSE4_2 || TARGET_CRC32"
+  "crc32{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_data16")
+     (if_then_else (match_operand:HI 2 "" "")
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_rex")
+     (if_then_else (match_operand:QI 2 "ext_QIreg_operand" "")
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+(define_insn "sse4_2_crc32di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(match_operand:DI 1 "register_operand" "0")
+	   (match_operand:DI 2 "nonimmediate_operand" "rm")]
+	  UNSPEC_CRC32))]
+  "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)"
+  "crc32{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(define_expand "rdpmc"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")]
+  ""
+{
+  rtx reg = gen_reg_rtx (DImode);
+  rtx si;
+
+  /* Force operand 1 into ECX.  */
+  rtx ecx = gen_rtx_REG (SImode, CX_REG);
+  emit_insn (gen_rtx_SET (VOIDmode, ecx, operands[1]));
+  si = gen_rtx_UNSPEC_VOLATILE (DImode, gen_rtvec (1, ecx),
+				UNSPECV_RDPMC);
+
+  if (TARGET_64BIT)
+    {
+      rtvec vec = rtvec_alloc (2);
+      rtx load = gen_rtx_PARALLEL (VOIDmode, vec);
+      rtx upper = gen_reg_rtx (DImode);
+      rtx di = gen_rtx_UNSPEC_VOLATILE (DImode,
+					gen_rtvec (1, const0_rtx),
+					UNSPECV_RDPMC);
+      RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, reg, si);
+      RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, upper, di);
+      emit_insn (load);
+      upper = expand_simple_binop (DImode, ASHIFT, upper, GEN_INT (32),
+				   NULL, 1, OPTAB_DIRECT);
+      reg = expand_simple_binop (DImode, IOR, reg, upper, reg, 1,
+				 OPTAB_DIRECT);
+    }
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, reg, si));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], reg));
+  DONE;
+})
+
+(define_insn "*rdpmc"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+  	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
+			    UNSPECV_RDPMC))]
+  "!TARGET_64BIT"
+  "rdpmc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_insn "*rdpmc_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+  	(unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")]
+			    UNSPECV_RDPMC))
+  (set (match_operand:DI 1 "register_operand" "=d")
+       (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPMC))]
+  "TARGET_64BIT"
+  "rdpmc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_expand "rdtsc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
+  ""
+{
+  if (TARGET_64BIT)
+    {
+      rtvec vec = rtvec_alloc (2);
+      rtx load = gen_rtx_PARALLEL (VOIDmode, vec);
+      rtx upper = gen_reg_rtx (DImode);
+      rtx lower = gen_reg_rtx (DImode);
+      rtx src = gen_rtx_UNSPEC_VOLATILE (DImode,
+					 gen_rtvec (1, const0_rtx),
+					 UNSPECV_RDTSC);
+      RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, lower, src);
+      RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, upper, src);
+      emit_insn (load);
+      upper = expand_simple_binop (DImode, ASHIFT, upper, GEN_INT (32),
+				   NULL, 1, OPTAB_DIRECT);
+      lower = expand_simple_binop (DImode, IOR, lower, upper, lower, 1,
+				   OPTAB_DIRECT);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], lower));
+      DONE;
+    }
+})
+
+(define_insn "*rdtsc"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
+  "!TARGET_64BIT"
+  "rdtsc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_insn "*rdtsc_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))
+   (set (match_operand:DI 1 "register_operand" "=d")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))]
+  "TARGET_64BIT"
+  "rdtsc"
+  [(set_attr "type" "other")
+   (set_attr "length" "2")])
+
+(define_expand "rdtscp"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:SI 1 "memory_operand" "")]
+  ""
+{
+  rtx di = gen_rtx_UNSPEC_VOLATILE (DImode,
+				    gen_rtvec (1, const0_rtx),
+				    UNSPECV_RDTSCP);
+  rtx si = gen_rtx_UNSPEC_VOLATILE (SImode,
+				    gen_rtvec (1, const0_rtx),
+				    UNSPECV_RDTSCP);
+  rtx reg = gen_reg_rtx (DImode);
+  rtx tmp = gen_reg_rtx (SImode);
+
+  if (TARGET_64BIT)
+    {
+      rtvec vec = rtvec_alloc (3);
+      rtx load = gen_rtx_PARALLEL (VOIDmode, vec);
+      rtx upper = gen_reg_rtx (DImode);
+      RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, reg, di);
+      RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, upper, di);
+      RTVEC_ELT (vec, 2) = gen_rtx_SET (VOIDmode, tmp, si);
+      emit_insn (load);
+      upper = expand_simple_binop (DImode, ASHIFT, upper, GEN_INT (32),
+				   NULL, 1, OPTAB_DIRECT);
+      reg = expand_simple_binop (DImode, IOR, reg, upper, reg, 1,
+				 OPTAB_DIRECT);
+    }
+  else
+    {
+      rtvec vec = rtvec_alloc (2);
+      rtx load = gen_rtx_PARALLEL (VOIDmode, vec);
+      RTVEC_ELT (vec, 0) = gen_rtx_SET (VOIDmode, reg, di);
+      RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, tmp, si);
+      emit_insn (load);
+    }
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], reg));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[1], tmp));
+  DONE;
+})
+
+(define_insn "*rdtscp"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
+   (set (match_operand:SI 1 "register_operand" "=c")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
+  "!TARGET_64BIT"
+  "rdtscp"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+(define_insn "*rdtscp_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
+   (set (match_operand:DI 1 "register_operand" "=d")
+        (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP))
+   (set (match_operand:SI 2 "register_operand" "=c")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))]
+  "TARGET_64BIT"
+  "rdtscp"
+  [(set_attr "type" "other")
+   (set_attr "length" "3")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; LWP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "lwp_llwpcb"
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")]
+		    UNSPECV_LLWP_INTRINSIC)]
+  "TARGET_LWP")
+
+(define_insn "*lwp_llwpcb<mode>1"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+		    UNSPECV_LLWP_INTRINSIC)]
+  "TARGET_LWP"
+  "llwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "5")])
+
+(define_expand "lwp_slwpcb"
+  [(set (match_operand 0 "register_operand" "=r")
+	(unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
+  "TARGET_LWP"
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_lwp_slwpcbdi (operands[0]));
+  else
+    emit_insn (gen_lwp_slwpcbsi (operands[0]));
+  DONE;
+})
+
+(define_insn "lwp_slwpcb<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))]
+  "TARGET_LWP"
+  "slwpcb\t%0"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "5")])
+
+(define_expand "lwp_lwpval<mode>3"
+  [(unspec_volatile [(match_operand:SWI48 1 "register_operand" "r")
+    	    	     (match_operand:SI 2 "nonimmediate_operand" "rm")
+		     (match_operand:SI 3 "const_int_operand" "i")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  ;; Avoid unused variable warning.
+  "(void) operands[0];")
+
+(define_insn "*lwp_lwpval<mode>3_1"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")
+    	    	     (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "const_int_operand" "i")]
+		    UNSPECV_LWPVAL_INTRINSIC)]
+  "TARGET_LWP"
+  "lwpval\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+
+(define_expand "lwp_lwpins<mode>3"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand" "r")
+			      (match_operand:SI 2 "nonimmediate_operand" "rm")
+			      (match_operand:SI 3 "const_int_operand" "i")]
+			     UNSPECV_LWPINS_INTRINSIC))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+	(eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]
+  "TARGET_LWP")
+
+(define_insn "*lwp_lwpins<mode>3_1"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r")
+			      (match_operand:SI 1 "nonimmediate_operand" "rm")
+			      (match_operand:SI 2 "const_int_operand" "i")]
+			     UNSPECV_LWPINS_INTRINSIC))]
+  "TARGET_LWP"
+  "lwpins\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "lwp")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length")
+        (symbol_ref "ix86_attr_length_address_default (insn) + 9"))])
+
+(define_insn "rdfsbase<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDFSBASE))]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "rdfsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "rdgsbase<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(unspec_volatile:SWI48 [(const_int 0)] UNSPECV_RDGSBASE))]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "rdgsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "wrfsbase<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    UNSPECV_WRFSBASE)]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "wrfsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "wrgsbase<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+		    UNSPECV_WRGSBASE)]
+  "TARGET_64BIT && TARGET_FSGSBASE"
+  "wrgsbase %0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
+(define_insn "rdrand<mode>_1"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
+   (set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))]
+  "TARGET_RDRND"
+  "rdrand\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "1")])
+
+(include "mmx.md")
+(include "sse.md")
+(include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
new file mode 100644
index 000000000..fe5949f3b
--- /dev/null
+++ b/gcc/config/i386/i386.opt
@@ -0,0 +1,425 @@
+; Options for the IA-32 and AMD64 ports of the compiler.
+
+; Copyright (C) 2005, 2006, 2007, 2008, 2009,
+; 2010, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Bit flags that specify the ISA we are compiling for.
+Variable
+int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT
+
+;; Definitions to add to the cl_target_option structure
+;; -march= processor
+TargetSave
+unsigned char arch
+
+;; -mtune= processor
+TargetSave
+unsigned char tune
+
+;; -mfpath=
+TargetSave
+unsigned char fpmath
+
+;; CPU schedule model
+TargetSave
+unsigned char schedule
+
+;; branch cost
+TargetSave
+unsigned char branch_cost
+
+;; which flags were passed by the user
+TargetSave
+int ix86_isa_flags_explicit
+
+;; which flags were passed by the user
+TargetSave
+int ix86_target_flags_explicit
+
+;; whether -mtune was not specified
+TargetSave
+unsigned char tune_defaulted
+
+;; whether -march was specified
+TargetSave
+unsigned char arch_specified
+
+;; x86 options
+m128bit-long-double
+Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save
+sizeof(long double) is 16
+
+m80387
+Target Report Mask(80387) Save
+Use hardware fp
+
+m96bit-long-double
+Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) Save
+sizeof(long double) is 12
+
+maccumulate-outgoing-args
+Target Report Mask(ACCUMULATE_OUTGOING_ARGS) Save
+Reserve space for outgoing arguments in the function prologue
+
+malign-double
+Target Report Mask(ALIGN_DOUBLE) Save
+Align some doubles on dword boundary
+
+malign-functions=
+Target RejectNegative Joined Var(ix86_align_funcs_string)
+Function starts are aligned to this power of 2
+
+malign-jumps=
+Target RejectNegative Joined Var(ix86_align_jumps_string)
+Jump targets are aligned to this power of 2
+
+malign-loops=
+Target RejectNegative Joined Var(ix86_align_loops_string)
+Loop code aligned to this power of 2
+
+malign-stringops
+Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) Save
+Align destination of the string operations
+
+march=
+Target RejectNegative Joined Var(ix86_arch_string)
+Generate code for given CPU
+
+masm=
+Target RejectNegative Joined Var(ix86_asm_string)
+Use given assembler dialect
+
+mbranch-cost=
+Target RejectNegative Joined Var(ix86_branch_cost_string)
+Branches are this expensive (1-5, arbitrary units)
+
+mlarge-data-threshold=
+Target RejectNegative Joined Var(ix86_section_threshold_string)
+Data greater than given threshold will go into .ldata section in x86-64 medium model
+
+mcmodel=
+Target RejectNegative Joined Var(ix86_cmodel_string)
+Use given x86-64 code model
+
+mcpu=
+Target RejectNegative Joined Undocumented Alias(mtune=) Warn(%<-mcpu=%> is deprecated; use %<-mtune=%> or %<-march=%> instead)
+
+mfancy-math-387
+Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
+Generate sin, cos, sqrt for FPU
+
+mforce-drap
+Target Report Var(ix86_force_drap)
+Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
+
+mfp-ret-in-387
+Target Report Mask(FLOAT_RETURNS) Save
+Return values of functions in FPU registers
+
+mfpmath=
+Target RejectNegative Joined Var(ix86_fpmath_string)
+Generate floating point mathematics using given instruction set
+
+mhard-float
+Target RejectNegative Mask(80387) MaskExists Save
+Use hardware fp
+
+mieee-fp
+Target Report Mask(IEEE_FP) Save
+Use IEEE math for fp comparisons
+
+minline-all-stringops
+Target Report Mask(INLINE_ALL_STRINGOPS) Save
+Inline all known string operations
+
+minline-stringops-dynamically
+Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) Save
+Inline memset/memcpy string operations, but perform inline version only for small blocks
+
+mintel-syntax
+Target Undocumented Alias(masm=, intel, att) Warn(%<-mintel-syntax%> and %<-mno-intel-syntax%> are deprecated; use %<-masm=intel%> and %<-masm=att%> instead)
+;; Deprecated
+
+mms-bitfields
+Target Report Mask(MS_BITFIELD_LAYOUT) Save
+Use native (MS) bitfield layout
+
+mno-align-stringops
+Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented Save
+
+mno-fancy-math-387
+Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented Save
+
+mno-push-args
+Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented Save
+
+mno-red-zone
+Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented Save
+
+momit-leaf-frame-pointer
+Target Report Mask(OMIT_LEAF_FRAME_POINTER) Save
+Omit the frame pointer in leaf functions
+
+mpc
+Target RejectNegative Report Joined Var(ix87_precision_string)
+Set 80387 floating-point precision (-mpc32, -mpc64, -mpc80)
+
+mpreferred-stack-boundary=
+Target RejectNegative Joined Var(ix86_preferred_stack_boundary_string)
+Attempt to keep stack aligned to this power of 2
+
+mincoming-stack-boundary=
+Target RejectNegative Joined Var(ix86_incoming_stack_boundary_string)
+Assume incoming stack aligned to this power of 2
+
+mpush-args
+Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) Save
+Use push instructions to save outgoing arguments
+
+mred-zone
+Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) Save
+Use red-zone in the x86-64 code
+
+mregparm=
+Target RejectNegative Joined Var(ix86_regparm_string)
+Number of registers used to pass integer arguments
+
+mrtd
+Target Report Mask(RTD) Save
+Alternate calling convention
+
+msoft-float
+Target InverseMask(80387) Save
+Do not use hardware fp
+
+msseregparm
+Target RejectNegative Mask(SSEREGPARM) Save
+Use SSE register passing conventions for SF and DF mode
+
+mstackrealign
+Target Report Var(ix86_force_align_arg_pointer) Init(-1)
+Realign stack in prologue
+
+mstack-arg-probe
+Target Report Mask(STACK_PROBE) Save
+Enable stack probing
+
+mstringop-strategy=
+Target RejectNegative Joined Var(ix86_stringop_string)
+Chose strategy to generate stringop using
+
+mtls-dialect=
+Target RejectNegative Joined Var(ix86_tls_dialect_string)
+Use given thread-local storage dialect
+
+mtls-direct-seg-refs
+Target Report Mask(TLS_DIRECT_SEG_REFS)
+Use direct references against %gs when accessing tls data
+
+mtune=
+Target RejectNegative Joined Var(ix86_tune_string)
+Schedule code for given CPU
+
+mabi=
+Target RejectNegative Joined Var(ix86_abi_string)
+Generate code that conforms to the given ABI
+
+mveclibabi=
+Target RejectNegative Joined Var(ix86_veclibabi_string)
+Vector library ABI to use
+
+mvect8-ret-in-mem
+Target Report Mask(VECT8_RETURNS) Save
+Return 8-byte vectors in memory
+
+mrecip
+Target Report Mask(RECIP) Save
+Generate reciprocals instead of divss and sqrtss.
+
+mcld
+Target Report Mask(CLD) Save
+Generate cld instruction in the function prologue.
+
+mvzeroupper
+Target Report Mask(VZEROUPPER) Save
+Generate vzeroupper instruction before a transfer of control flow out of
+the function.
+
+mdispatch-scheduler
+Target RejectNegative Var(flag_dispatch_scheduler)
+Do dispatch scheduling if processor is bdver1 and Haifa scheduling
+is selected.
+
+mprefer-avx128
+Target Report Mask(PREFER_AVX128) SAVE
+Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer.
+
+;; ISA support
+
+m32
+Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) Save
+Generate 32bit i386 code
+
+m64
+Target RejectNegative Negative(m32) Report Mask(ISA_64BIT) Var(ix86_isa_flags) Save
+Generate 64bit x86-64 code
+
+mmmx
+Target Report Mask(ISA_MMX) Var(ix86_isa_flags) Save
+Support MMX built-in functions
+
+m3dnow
+Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) Save
+Support 3DNow! built-in functions
+
+m3dnowa
+Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) Save
+Support Athlon 3Dnow! built-in functions
+
+msse
+Target Report Mask(ISA_SSE) Var(ix86_isa_flags) Save
+Support MMX and SSE built-in functions and code generation
+
+msse2
+Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) Save
+Support MMX, SSE and SSE2 built-in functions and code generation
+
+msse3
+Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
+
+mssse3
+Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
+
+msse4.1
+Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
+
+msse4.2
+Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+msse4
+Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+mno-sse4
+Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) Save
+Do not support SSE4.1 and SSE4.2 built-in functions and code generation
+
+msse5
+Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
+;; Deprecated
+
+mavx
+Target Report Mask(ISA_AVX) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation
+
+mfma
+Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
+
+msse4a
+Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
+
+mfma4
+Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) Save
+Support FMA4 built-in functions and code generation 
+
+mxop
+Target Report Mask(ISA_XOP) Var(ix86_isa_flags) Save
+Support XOP built-in functions and code generation 
+
+mlwp
+Target Report Mask(ISA_LWP) Var(ix86_isa_flags) Save
+Support LWP built-in functions and code generation 
+
+mabm
+Target Report Mask(ISA_ABM) Var(ix86_isa_flags) Save
+Support code generation of Advanced Bit Manipulation (ABM) instructions.
+
+mpopcnt
+Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) Save
+Support code generation of popcnt instruction.
+
+mbmi
+Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
+Support BMI built-in functions and code generation
+
+mtbm
+Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save
+Support TBM built-in functions and code generation
+
+mcx16
+Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save
+Support code generation of cmpxchg16b instruction.
+
+msahf
+Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) Save
+Support code generation of sahf instruction in 64bit x86-64 code.
+
+mmovbe
+Target Report Mask(ISA_MOVBE) Var(ix86_isa_flags) Save
+Support code generation of movbe instruction.
+
+mcrc32
+Target Report Mask(ISA_CRC32) Var(ix86_isa_flags) Save
+Support code generation of crc32 instruction.
+
+maes
+Target Report Mask(ISA_AES) Var(ix86_isa_flags) Save
+Support AES built-in functions and code generation
+
+mpclmul
+Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) Save
+Support PCLMUL built-in functions and code generation
+
+msse2avx
+Target Report Var(ix86_sse2avx)
+Encode SSE instructions with VEX prefix
+
+mfsgsbase
+Target Report Mask(ISA_FSGSBASE) Var(ix86_isa_flags) Save
+Support FSGSBASE built-in functions and code generation
+
+mrdrnd
+Target Report Mask(ISA_RDRND) Var(ix86_isa_flags) Save
+Support RDRND built-in functions and code generation
+
+mf16c
+Target Report Mask(ISA_F16C) Var(ix86_isa_flags) Save
+Support F16C built-in functions and code generation
+
+mfentry
+Target Report Var(flag_fentry) Init(-1)
+Emit profiling counter call at function entry before prologue.
+
+m8bit-idiv
+Target Report Mask(USE_8BIT_IDIV) Save
+Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
+
+mavx256-split-unaligned-load
+Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save
+Split 32-byte AVX unaligned load
+
+mavx256-split-unaligned-store
+Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save
+Split 32-byte AVX unaligned store
diff --git a/gcc/config/i386/i386elf.h b/gcc/config/i386/i386elf.h
new file mode 100644
index 000000000..79d7b9585
--- /dev/null
+++ b/gcc/config/i386/i386elf.h
@@ -0,0 +1,125 @@
+/* Target definitions for GCC for Intel 80386 using ELF
+   Copyright (C) 1988, 1991, 1995, 2000, 2001, 2002, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+   Derived from sysv4.h written by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use stabs instead of DWARF debug format.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#define TARGET_VERSION fprintf (stderr, " (i386 bare ELF target)");
+
+/* The ELF ABI for the i386 says that records and unions are returned
+   in memory.  */
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+	(TYPE_MODE (TYPE) == BLKmode \
+	 || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8))
+
+#undef CPP_SPEC
+#define CPP_SPEC ""
+
+#define ENDFILE_SPEC "crtend.o%s"
+
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crtbegin.o%s"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* The routine used to output sequences of byte values.  We use a special
+   version of this for most svr4 targets because doing so makes the
+   generated assembly code more compact (and thus faster to assemble)
+   as well as more readable.  Note that if we find subparts of the
+   character sequence which end with NUL (and which are shorter than
+   STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING.  */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)				\
+  do									\
+    {									\
+      const unsigned char *_ascii_bytes =				\
+        (const unsigned char *) (STR);					\
+      const unsigned char *limit = _ascii_bytes + (LENGTH);		\
+      unsigned bytes_in_chunk = 0;					\
+      for (; _ascii_bytes < limit; _ascii_bytes++)			\
+	{								\
+	  const unsigned char *p;					\
+	  if (bytes_in_chunk >= 64)					\
+	    {								\
+	      fputc ('\n', (FILE));					\
+	      bytes_in_chunk = 0;					\
+	    }								\
+	  for (p = _ascii_bytes; p < limit && *p != '\0'; p++)		\
+	    continue;							\
+	  if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT)	\
+	    {								\
+	      if (bytes_in_chunk > 0)					\
+		{							\
+		  fputc ('\n', (FILE));					\
+		  bytes_in_chunk = 0;					\
+		}							\
+	      ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes);		\
+	      _ascii_bytes = p;						\
+	    }								\
+	  else								\
+	    {								\
+	      if (bytes_in_chunk == 0)					\
+		fputs (ASM_BYTE, (FILE));				\
+	      else							\
+		fputc (',', (FILE));					\
+	      fprintf ((FILE), "0x%02x", *_ascii_bytes);			\
+	      bytes_in_chunk += 5;					\
+	    }								\
+	}								\
+      if (bytes_in_chunk > 0)						\
+	fputc ('\n', (FILE));						\
+    }									\
+  while (0)
+
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Switch into a generic section.  */
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used
+   in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.
+
+   Try to use function `asm_output_aligned_bss' defined in file
+   `varasm.c' when defining this macro.  */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
new file mode 100644
index 000000000..76c20a69a
--- /dev/null
+++ b/gcc/config/i386/ia32intrin.h
@@ -0,0 +1,234 @@
+/* Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+/* 32bit bsf */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsfd (int __X)
+{
+  return __builtin_ctz (__X);
+}
+
+/* 32bit bsr */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsrd (int __X)
+{
+  return __builtin_ia32_bsrsi (__X);
+}
+
+/* 32bit bswap */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bswapd (int __X)
+{
+  return __builtin_bswap32 (__X);
+}
+
+#ifdef __SSE4_2__
+/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32b (unsigned int __C, unsigned char __V)
+{
+  return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32w (unsigned int __C, unsigned short __V)
+{
+  return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32d (unsigned int __C, unsigned int __V)
+{
+  return __builtin_ia32_crc32si (__C, __V);
+}
+#endif /* SSE4.2 */
+
+/* 32bit popcnt */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__popcntd (unsigned int __X)
+{
+  return __builtin_popcount (__X);
+}
+
+/* rdpmc */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdpmc (int __S)
+{
+  return __builtin_ia32_rdpmc (__S);
+}
+
+/* rdtsc */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtsc (void)
+{
+  return __builtin_ia32_rdtsc ();
+}
+
+/* rdtscp */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtscp (unsigned int *__A)
+{
+  return __builtin_ia32_rdtscp (__A);
+}
+
+/* 8bit rol */
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolb (unsigned char __X, int __C)
+{
+  return __builtin_ia32_rolqi (__X, __C);
+}
+
+/* 16bit rol */
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolw (unsigned short __X, int __C)
+{
+  return __builtin_ia32_rolhi (__X, __C);
+}
+
+/* 32bit rol */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rold (unsigned int __X, int __C)
+{
+  return (__X << __C) | (__X >> (32 - __C));
+}
+
+/* 8bit ror */
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorb (unsigned char __X, int __C)
+{
+  return __builtin_ia32_rorqi (__X, __C);
+}
+
+/* 16bit ror */
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorw (unsigned short __X, int __C)
+{
+  return __builtin_ia32_rorhi (__X, __C);
+}
+
+/* 32bit ror */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rord (unsigned int __X, int __C)
+{
+  return (__X >> __C) | (__X << (32 - __C));
+}
+
+#ifdef __x86_64__
+/* 64bit bsf */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsfq (long long __X)
+{
+  return __builtin_ctzll (__X);
+}
+
+/* 64bit bsr */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bsrq (long long __X)
+{
+  return __builtin_ia32_bsrdi (__X);
+}
+
+/* 64bit bswap */
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bswapq (long long __X)
+{
+  return __builtin_bswap64 (__X);
+}
+
+#ifdef __SSE4_2__
+/* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__crc32q (unsigned long long __C, unsigned long long __V)
+{
+  return __builtin_ia32_crc32di (__C, __V);
+}
+#endif
+
+/* 64bit popcnt */
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__popcntq (unsigned long long __X)
+{
+  return __builtin_popcountll (__X);
+}
+
+/* 64bit rol */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rolq (unsigned long long __X, int __C)
+{
+  return (__X << __C) | (__X >> (64 - __C));
+}
+
+/* 64bit ror */
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rorq (unsigned long long __X, int __C)
+{
+  return (__X >> __C) | (__X << (64 - __C));
+}
+
+#define _bswap64(a)		__bswapq(a)
+#define _popcnt64(a)		__popcntq(a)
+#define _lrotl(a,b)		__rolq((a), (b))
+#define _lrotr(a,b)		__rorq((a), (b))
+#else
+#define _lrotl(a,b)		__rold((a), (b))
+#define _lrotr(a,b)		__rord((a), (b))
+#endif
+
+#define _bit_scan_forward(a)	__bsfd(a)
+#define _bit_scan_reverse(a)	__bsrd(a)
+#define _bswap(a)		__bswapd(a)
+#define _popcnt32(a)		__popcntd(a)
+#define _rdpmc(a)		__rdpmc(a)
+#define _rdtsc()		__rdtsc()
+#define _rdtscp(a)		__rdtscp(a)
+#define _rotwl(a,b)		__rolw((a), (b))
+#define _rotwr(a,b)		__rorw((a), (b))
+#define _rotl(a,b)		__rold((a), (b))
+#define _rotr(a,b)		__rord((a), (b))
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
new file mode 100644
index 000000000..11a1a4e0c
--- /dev/null
+++ b/gcc/config/i386/immintrin.h
@@ -0,0 +1,203 @@
+/* Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#define _IMMINTRIN_H_INCLUDED
+
+#ifdef __MMX__
+#include <mmintrin.h>
+#endif
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#endif
+
+#ifdef __SSSE3__
+#include <tmmintrin.h>
+#endif
+
+#if defined (__SSE4_2__) || defined (__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#if defined (__AES__) || defined (__PCLMUL__)
+#include <wmmintrin.h>
+#endif
+
+#ifdef __AVX__
+#include <avxintrin.h>
+#endif
+
+#ifdef __RDRND__
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand16_step (unsigned short *__P)
+{
+  return __builtin_ia32_rdrand16_step (__P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand32_step (unsigned int *__P)
+{
+  return __builtin_ia32_rdrand32_step (__P);
+}
+#endif /* __RDRND__ */
+
+#ifdef  __x86_64__
+#ifdef __FSGSBASE__
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u32 (void)
+{
+  return __builtin_ia32_rdfsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u64 (void)
+{
+  return __builtin_ia32_rdfsbase64 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u32 (void)
+{
+  return __builtin_ia32_rdgsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u64 (void)
+{
+  return __builtin_ia32_rdgsbase64 ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u32 (unsigned int __B)
+{
+  __builtin_ia32_wrfsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u64 (unsigned long long __B)
+{
+  __builtin_ia32_wrfsbase64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u32 (unsigned int __B)
+{
+  __builtin_ia32_wrgsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u64 (unsigned long long __B)
+{
+  __builtin_ia32_wrgsbase64 (__B);
+}
+#endif /* __FSGSBASE__ */
+
+#ifdef __RDRND__
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand64_step (unsigned long long *__P)
+{
+  return __builtin_ia32_rdrand64_step (__P);
+}
+#endif /* __RDRND__ */
+#endif /* __x86_64__  */
+
+#ifdef __F16C__
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtsh_ss (unsigned short __S)
+{
+  __v8hi __H = __extension__ (__v8hi){ __S, 0, 0, 0, 0, 0, 0, 0 };
+  __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
+  return __builtin_ia32_vec_ext_v4sf (__A, 0);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtph_ps (__m128i __A)
+{
+  return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtph_ps (__m128i __A)
+{
+  return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_cvtss_sh (float __F, const int __I)
+{
+  __v4sf __A =  __extension__ (__v4sf){ __F, 0, 0, 0 };
+  __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
+  return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_ph (__m128 __A, const int __I)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_ph (__m256 __A, const int __I)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
+}
+#else
+#define _cvtss_sh(__F, __I)						\
+  (__extension__ 							\
+   ({									\
+      __v4sf __A =  __extension__ (__v4sf){ __F, 0, 0, 0 };		\
+      __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);			\
+      (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);		\
+    }))
+
+#define _mm_cvtps_ph(A, I) \
+  ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) A, (int) (I)))
+
+#define _mm256_cvtps_ph(A, I) \
+  ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
+#endif
+
+#endif /* __F16C__ */
+
+#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/k6.md b/gcc/config/i386/k6.md
new file mode 100644
index 000000000..030bc26a6
--- /dev/null
+++ b/gcc/config/i386/k6.md
@@ -0,0 +1,267 @@
+;; AMD K6/K6-2 Scheduling
+;; Copyright (C) 2002, 2004, 2007
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; The K6 architecture is quite similar to PPro.  Important difference is
+;; that there are only two decoders and they seems to be much slower than
+;; any of the execution units.  So we have to pay much more attention to
+;; proper scheduling for the decoders.
+;; FIXME: We don't do that right now.  A good start would be to sort the
+;;        instructions based on length.
+;;
+;; This description is based on data from the following documents:
+;;
+;;    "AMD-K6 Processor Data Sheet (Preliminary information)"
+;;    Advanced Micro Devices, Inc., 1998.
+;;
+;;    "AMD-K6 Processor Code Optimization Application Note"
+;;    Advanced Micro Devices, Inc., 2000.
+;;
+;; CPU execution units of the K6:
+;;
+;; store	describes the Store unit.  This unit is not modelled
+;;		completely and it is only used to model lea operation.
+;;		Otherwise it lies outside of any critical path.
+;; load		describes the Load unit
+;; alux		describes the Integer X unit
+;; mm		describes the Multimedia unit, which shares a pipe
+;;		with the Integer X unit.  This unit is used for MMX,
+;;		which is not implemented for K6.
+;; aluy		describes the Integer Y unit
+;; fpu		describes the FPU unit
+;; branch	describes the Branch unit
+;;
+;; The fp unit is not pipelined, and it can only do one operation per two
+;; cycles, including fxcg.
+;;
+;; Generally this is a very poor description, but at least no worse than
+;; the old description, and a lot easier to extend to something more
+;; reasonable if anyone still cares enough about this architecture in 2004.
+;;
+;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
+
+(define_automaton "k6_decoder,k6_load_unit,k6_store_unit,k6_integer_units,k6_fpu_unit,k6_branch_unit")
+
+;; The K6 instruction decoding begins before the on-chip instruction cache is
+;; filled.  Depending on the length of the instruction, two simple instructions
+;; can be decoded in two parallel short decoders, or one complex instruction can
+;; be decoded in either the long or the vector decoder.  For all practical
+;; purposes, the long and vector decoder can be modelled as one decoder.
+(define_cpu_unit "k6_decode_short0" "k6_decoder")
+(define_cpu_unit "k6_decode_short1" "k6_decoder")
+(define_cpu_unit "k6_decode_long" "k6_decoder")
+(exclusion_set "k6_decode_long" "k6_decode_short0,k6_decode_short1")
+(define_reservation "k6_decode_short" "k6_decode_short0|k6_decode_short1")
+(define_reservation "k6_decode_vector" "k6_decode_long")
+
+(define_cpu_unit "k6_store" "k6_store_unit")
+(define_cpu_unit "k6_load" "k6_load_unit")
+(define_cpu_unit "k6_alux,k6_aluy" "k6_integer_units")
+(define_cpu_unit "k6_fpu" "k6_fpu_unit")
+(define_cpu_unit "k6_branch" "k6_branch_unit")
+
+;; Shift instructions and certain arithmetic are issued only on Integer X.
+(define_insn_reservation "k6_alux_only" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_short,k6_alux")
+
+(define_insn_reservation "k6_alux_only_load" 3
+			 (and (eq_attr "cpu" "k6")
+			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+				    (eq_attr "memory" "load")))
+			 "k6_decode_short,k6_load,k6_alux")
+
+(define_insn_reservation "k6_alux_only_store" 3
+			 (and (eq_attr "cpu" "k6")
+			       (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+				    (eq_attr "memory" "store,both,unknown")))
+			 "k6_decode_long,k6_load,k6_alux,k6_store")
+
+;; Integer divide and multiply can only be issued on Integer X, too.
+(define_insn_reservation "k6_alu_imul" 2
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "imul"))
+			 "k6_decode_vector,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_load" 4
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "load")))
+			 "k6_decode_vector,k6_load,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_store" 4
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imul")
+				   (eq_attr "memory" "store,both,unknown")))
+			 "k6_decode_vector,k6_load,k6_alux*3,k6_store")
+
+;; ??? Guessed latencies based on the old pipeline description.
+(define_insn_reservation "k6_alu_idiv" 17
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_vector,k6_alux*17")
+
+(define_insn_reservation "k6_alu_idiv_mem" 19
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "memory" "!none")))
+			 "k6_decode_vector,k6_load,k6_alux*17")
+
+;; Basic word and doubleword ALU ops can be issued on both Integer units.
+(define_insn_reservation "k6_alu" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_load" 3
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+				   (eq_attr "memory" "load")))
+			 "k6_decode_short,k6_load,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_store" 3
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+				   (eq_attr "memory" "store,both,unknown")))
+			 "k6_decode_long,k6_load,k6_alux|k6_aluy,k6_store")
+
+;; A "load immediate" operation does not require execution at all,
+;; it is available immediately after decoding.  Special-case this.
+(define_insn_reservation "k6_alu_imov" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (and (eq_attr "memory" "none")
+					(match_operand 1 "nonimmediate_operand"))))
+			 "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_imov_imm" 0
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (and (eq_attr "memory" "none")
+					(match_operand 1 "immediate_operand"))))
+			 "k6_decode_short")
+
+(define_insn_reservation "k6_alu_imov_load" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "load")))
+			 "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_alu_imov_store" 1
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store")))
+			 "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_alu_imov_both" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "both,unknown")))
+			 "k6_decode_long,k6_load,k6_alux|k6_aluy")
+
+;; The branch unit.
+(define_insn_reservation "k6_branch_call" 1
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "call,callv"))
+			 "k6_decode_vector,k6_branch")
+
+(define_insn_reservation "k6_branch_branch" 1
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "ibr"))
+			 "k6_decode_short,k6_branch")
+
+;; The load and units have two pipeline stages.  The load latency is
+;; two cycles.
+(define_insn_reservation "k6_load_pop" 3
+			 (and (eq_attr "cpu" "k6")
+			      (ior (eq_attr "type" "pop")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_load_leave" 5
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "leave"))
+			 "k6_decode_long,k6_load,(k6_alux|k6_aluy)*2")
+
+;; ??? From the old pipeline description.  Egad!
+;; ??? Apparently we take care of this reservation in adjust_cost.
+(define_insn_reservation "k6_load_str" 10
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "str")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_vector,k6_load*10")
+
+;; The store unit handles lea and push.  It is otherwise unmodelled.
+(define_insn_reservation "k6_store_lea" 2
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "lea"))
+			 "k6_decode_short,k6_store,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_store_push" 2
+			 (and (eq_attr "cpu" "k6")
+			      (ior (eq_attr "type" "push")
+				   (eq_attr "memory" "store,both")))
+			 "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_store_str" 10
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "str"))
+			 "k6_store*10")
+
+;; Most FPU instructions have latency 2 and throughput 2.
+(define_insn_reservation "k6_fpu" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_vector,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_load" 6
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_short,k6_load,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_store" 6
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+				   (eq_attr "memory" "store")))
+			 "k6_decode_short,k6_store,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "none")))
+			 "k6_decode_short,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul_load" 2
+			 (and (eq_attr "cpu" "k6")
+			      (and (eq_attr "type" "fmul")
+				   (eq_attr "memory" "load,both")))
+			 "k6_decode_short,k6_load,k6_fpu*2")
+
+;; ??? Guessed latencies from the old pipeline description.
+(define_insn_reservation "k6_fpu_expensive" 56
+			 (and (eq_attr "cpu" "k6")
+			      (eq_attr "type" "fdiv,fpspc"))
+			 "k6_decode_short,k6_fpu*56")
+
diff --git a/gcc/config/i386/kfreebsd-gnu.h b/gcc/config/i386/kfreebsd-gnu.h
new file mode 100644
index 000000000..b5fb2ba29
--- /dev/null
+++ b/gcc/config/i386/kfreebsd-gnu.h
@@ -0,0 +1,25 @@
+/* Definitions for Intel 386 running kFreeBSD-based GNU systems with ELF format
+   Copyright (C) 2004, 2007
+   Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef LINK_EMULATION
+#define LINK_EMULATION "elf_i386_fbsd"
+#undef REG_NAME
+#define REG_NAME(reg) sc_ ## reg
diff --git a/gcc/config/i386/knetbsd-gnu.h b/gcc/config/i386/knetbsd-gnu.h
new file mode 100644
index 000000000..54f5a6920
--- /dev/null
+++ b/gcc/config/i386/knetbsd-gnu.h
@@ -0,0 +1,23 @@
+/* Definitions for Intel 386 running kNetBSD-based GNU systems with ELF format
+   Copyright (C) 2004, 2007
+   Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef REG_NAME
+#define REG_NAME(reg) sc_ ## reg
diff --git a/gcc/config/i386/kopensolaris-gnu.h b/gcc/config/i386/kopensolaris-gnu.h
new file mode 100644
index 000000000..3e315b83f
--- /dev/null
+++ b/gcc/config/i386/kopensolaris-gnu.h
@@ -0,0 +1,22 @@
+/* Definitions for Intel 386 running kOpenSolaris-based GNU systems with ELF format
+   Copyright (C) 2009
+   Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef MD_UNWIND_SUPPORT
diff --git a/gcc/config/i386/libgcc-glibc.ver b/gcc/config/i386/libgcc-glibc.ver
new file mode 100644
index 000000000..e79d3267f
--- /dev/null
+++ b/gcc/config/i386/libgcc-glibc.ver
@@ -0,0 +1,186 @@
+# Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+%ifndef __x86_64__
+%exclude {
+  __divdi3
+  __moddi3
+  __udivdi3
+  __umoddi3
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%inherit GCC_3.0 GLIBC_2.0
+GLIBC_2.0 {
+  # Sampling of DImode arithmetic used by (at least) i386 and m68k.
+  __divdi3
+  __moddi3
+  __udivdi3
+  __umoddi3
+
+  # Exception handling support functions used by most everyone.
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+%endif
+
+# 128 bit long double support was introduced with GCC 4.3.0 to 64bit
+# and with GCC 4.4.0 to 32bit.  These lines make the symbols to get
+# a @@GCC_4.3.0 or @@GCC_4.4.0 attached.
+
+%exclude {
+  __addtf3
+  __divtc3
+  __divtf3
+  __eqtf2
+  __extenddftf2
+  __extendsftf2
+  __extendxftf2
+  __fixtfdi
+  __fixtfsi
+  __fixtfti
+  __fixunstfdi
+  __fixunstfsi
+  __fixunstfti
+  __floatditf
+  __floatsitf
+  __floattitf
+  __floatunditf
+  __floatunsitf
+  __floatuntitf
+  __getf2
+  __gttf2
+  __letf2
+  __lttf2
+  __multc3
+  __multf3
+  __negtf2
+  __netf2
+  __powitf2
+  __subtf3
+  __trunctfdf2
+  __trunctfsf2
+  __trunctfxf2
+  __unordtf2
+}
+
+%ifdef __x86_64__
+# Those symbols had improper versions when they were added to gcc 4.3.0.
+# We corrected the default version to GCC_4.3.0.  But we keep the old
+# version for backward binary compatibility. 
+GCC_3.0 {
+  __gttf2
+  __lttf2
+  __netf2
+}
+
+GCC_4.0.0 {
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+GCC_4.3.0 {
+  __addtf3
+  __divtc3
+  __divtf3
+  __eqtf2
+  __extenddftf2
+  __extendsftf2
+  __extendxftf2
+  __fixtfdi
+  __fixtfsi
+  __fixtfti
+  __fixunstfdi
+  __fixunstfsi
+  __fixunstfti
+  __floatditf
+  __floatsitf
+  __floattitf
+  __floatunditf
+  __floatunsitf
+  __floatuntitf
+  __getf2
+  __gttf2
+  __letf2
+  __lttf2
+  __multc3
+  __multf3
+  __negtf2
+  __netf2
+  __powitf2
+  __subtf3
+  __trunctfdf2
+  __trunctfsf2
+  __trunctfxf2
+  __unordtf2
+}
+%else
+GCC_4.4.0 {
+  __addtf3
+  __copysigntf3
+  __divtc3
+  __divtf3
+  __eqtf2
+  __extenddftf2
+  __extendsftf2
+  __fabstf2
+  __fixtfdi
+  __fixtfsi
+  __fixunstfdi
+  __fixunstfsi
+  __floatditf
+  __floatsitf
+  __floatunditf
+  __floatunsitf
+  __getf2
+  __gttf2
+  __letf2
+  __lttf2
+  __multc3
+  __multf3
+  __negtf2
+  __netf2
+  __powitf2
+  __subtf3
+  __trunctfdf2
+  __trunctfsf2
+  __trunctfxf2
+  __unordtf2
+}
+GCC_4.5.0 {
+  __extendxftf2
+}
+%endif
diff --git a/gcc/config/i386/linux-unwind.h b/gcc/config/i386/linux-unwind.h
new file mode 100644
index 000000000..9e4be8010
--- /dev/null
+++ b/gcc/config/i386/linux-unwind.h
@@ -0,0 +1,197 @@
+/* DWARF2 EH unwinding support for AMD x86-64 and x86.
+   Copyright (C) 2004, 2005, 2006, 2009, 2010, 2012 Free Software Foundation,
+   Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#ifdef __x86_64__
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_64_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_64_fallback_frame_state (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+
+  /* movq __NR_rt_sigreturn, %rax ; syscall  */
+  if (*(unsigned char *)(pc+0) == 0x48
+      && *(unsigned long *)(pc+1) == 0x050f0000000fc0c7)
+    {
+      struct ucontext *uc_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &uc_->uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->rsp;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  /* Register 7 is rsp  */
+  fs->regs.cfa_reg = 7;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  /* The SVR4 register numbering macros aren't usable in libgcc.  */
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&sc->rax - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&sc->rdx - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&sc->rcx - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&sc->rbx - new_cfa;
+  fs->regs.reg[4].how = REG_SAVED_OFFSET;
+  fs->regs.reg[4].loc.offset = (long)&sc->rsi - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&sc->rdi - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&sc->rbp - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&sc->r8 - new_cfa;
+  fs->regs.reg[9].how = REG_SAVED_OFFSET;
+  fs->regs.reg[9].loc.offset = (long)&sc->r9 - new_cfa;
+  fs->regs.reg[10].how = REG_SAVED_OFFSET;
+  fs->regs.reg[10].loc.offset = (long)&sc->r10 - new_cfa;
+  fs->regs.reg[11].how = REG_SAVED_OFFSET;
+  fs->regs.reg[11].loc.offset = (long)&sc->r11 - new_cfa;
+  fs->regs.reg[12].how = REG_SAVED_OFFSET;
+  fs->regs.reg[12].loc.offset = (long)&sc->r12 - new_cfa;
+  fs->regs.reg[13].how = REG_SAVED_OFFSET;
+  fs->regs.reg[13].loc.offset = (long)&sc->r13 - new_cfa;
+  fs->regs.reg[14].how = REG_SAVED_OFFSET;
+  fs->regs.reg[14].loc.offset = (long)&sc->r14 - new_cfa;
+  fs->regs.reg[15].how = REG_SAVED_OFFSET;
+  fs->regs.reg[15].loc.offset = (long)&sc->r15 - new_cfa;
+  fs->regs.reg[16].how = REG_SAVED_OFFSET;
+  fs->regs.reg[16].loc.offset = (long)&sc->rip - new_cfa;
+  fs->retaddr_column = 16;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#else /* ifdef __x86_64__  */
+
+/* There's no sys/ucontext.h for glibc 2.0, so no
+   signal-turned-exceptions for them.  There's also no configure-run for
+   the target, so we can't check on (e.g.) HAVE_SYS_UCONTEXT_H.  Using the
+   target libc version macro should be enough.  */
+#if defined __GLIBC__ && !(__GLIBC__ == 2 && __GLIBC_MINOR__ == 0)
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_fallback_frame_state (struct _Unwind_Context *context,
+			  _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+
+  /* popl %eax ; movl $__NR_sigreturn,%eax ; int $0x80  */
+  if (*(unsigned short *)(pc+0) == 0xb858
+      && *(unsigned int *)(pc+2) == 119
+      && *(unsigned short *)(pc+6) == 0x80cd)
+    sc = context->cfa + 4;
+  /* movl $__NR_rt_sigreturn,%eax ; int $0x80  */
+  else if (*(unsigned char *)(pc+0) == 0xb8
+	   && *(unsigned int *)(pc+1) == 173
+	   && *(unsigned short *)(pc+5) == 0x80cd)
+    {
+      struct rt_sigframe {
+	int sig;
+	siginfo_t *pinfo;
+	void *puc;
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->REG_NAME(esp);
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 4;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  /* The SVR4 register numbering macros aren't usable in libgcc.  */
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&sc->REG_NAME(eax) - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&sc->REG_NAME(ebx) - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&sc->REG_NAME(ecx) - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&sc->REG_NAME(edx) - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&sc->REG_NAME(esi) - new_cfa;
+  fs->regs.reg[7].how = REG_SAVED_OFFSET;
+  fs->regs.reg[7].loc.offset = (long)&sc->REG_NAME(edi) - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&sc->REG_NAME(ebp) - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&sc->REG_NAME(eip) - new_cfa;
+  fs->retaddr_column = 8;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT x86_frob_update_context
+
+/* Fix up for kernels that have vDSO, but don't have S flag in it.  */
+
+static void
+x86_frob_update_context (struct _Unwind_Context *context,
+			 _Unwind_FrameState *fs ATTRIBUTE_UNUSED)
+{
+  unsigned char *pc = context->ra;
+
+  /* movl $__NR_rt_sigreturn,%eax ; {int $0x80 | syscall}  */
+  if (*(unsigned char *)(pc+0) == 0xb8
+      && *(unsigned int *)(pc+1) == 173
+      && (*(unsigned short *)(pc+5) == 0x80cd
+	  || *(unsigned short *)(pc+5) == 0x050f))
+    _Unwind_SetSignalFrame (context, 1);
+}
+
+#endif /* not glibc 2.0 */
+#endif /* ifdef __x86_64__  */
+#endif /* ifdef inhibit_libc  */
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
new file mode 100644
index 000000000..0084c8313
--- /dev/null
+++ b/gcc/config/i386/linux.h
@@ -0,0 +1,215 @@
+/* Definitions for Intel 386 running Linux-based GNU systems with ELF format.
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002, 2004, 2005,
+   2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Output at beginning of assembler file.  */
+/* The .file command should always begin the output.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#define TARGET_VERSION fprintf (stderr, " (i386 Linux/ELF)");
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* We arrange for the whole %gs segment to map the tls area.  */
+#undef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT MASK_TLS_DIRECT_SEG_REFS
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* Output assembler code to FILE to call the profiler.
+   To the best of my knowledge, no Linux libc has required the label
+   argument to mcount.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "mcount"
+
+/* The GLIBC version of mcount for the x86 assumes that there is a
+   frame, so we cannot allow profiling without a frame pointer.  */
+
+#undef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+ 
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+  
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+    
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	LINUX_TARGET_OS_CPP_BUILTINS();		\
+    }						\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+/* Provide a LINK_SPEC appropriate for Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+/* These macros may be overridden in k*bsd-gnu.h and i386/k*bsd-gnu.h. */
+#define LINK_EMULATION "elf_i386"
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef  ASM_SPEC
+#define ASM_SPEC \
+  "--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_emulation", LINK_EMULATION },\
+  { "dynamic_linker", LINUX_DYNAMIC_LINKER }
+
+#undef	LINK_SPEC
+#define LINK_SPEC "-m %(link_emulation) %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker %(dynamic_linker)} \
+      %{static:-static}}"
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{mpc32:crtprec32.o%s} \
+   %{mpc64:crtprec64.o%s} \
+   %{mpc80:crtprec80.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else {								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+	/* Make sure that we have at least 8 byte alignment if > 8 byte \
+	   alignment is preferred.  */					\
+	if ((LOG) > 3							\
+	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
+	    && (MAX_SKIP) >= 7)						\
+	  fputs ("\t.p2align 3\n", (FILE));				\
+      }									\
+    }									\
+  } while (0)
+#endif
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel)		\
+      {									\
+        fputs (ASM_LONG, FILE);			\
+        assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), FILE); \
+        goto DONE;							\
+      }									\
+  } while (0)
+
+/* Used by crtstuff.c to initialize the base of data-relative relocations.
+   These are GOT relative on x86, so return the pic register.  */
+#ifdef __PIC__
+#define CRT_GET_RFIB_DATA(BASE)			\
+  {						\
+    register void *ebx_ __asm__("ebx");		\
+    BASE = ebx_;				\
+  }
+#else
+#define CRT_GET_RFIB_DATA(BASE)						\
+  __asm__ ("call\t.LPR%=\n"						\
+	   ".LPR%=:\n\t"						\
+	   "pop{l}\t%0\n\t"						\
+	   /* Due to a GAS bug, this cannot use EAX.  That encodes	\
+	      smaller than the traditional EBX, which results in the	\
+	      offset being off by one.  */				\
+	   "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0"		\
+		   "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}"		\
+	   : "=d"(BASE))
+#endif
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
+
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* This macro may be overridden in i386/k*bsd-gnu.h.  */
+#define REG_NAME(reg) reg
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
+#define TARGET_THREAD_SSP_OFFSET	0x14
+
+/* We steal the last transactional memory word.  */
+#define TARGET_CAN_SPLIT_STACK
+#define TARGET_THREAD_SPLIT_STACK_OFFSET 0x30
+#endif
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
new file mode 100644
index 000000000..103ab0c99
--- /dev/null
+++ b/gcc/config/i386/linux64.h
@@ -0,0 +1,132 @@
+/* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format.
+   Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#if TARGET_64BIT_DEFAULT
+#define TARGET_VERSION fprintf (stderr, " (x86-64 Linux/ELF)");
+#else
+#define TARGET_VERSION fprintf (stderr, " (i386 Linux/ELF)");
+#endif
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	LINUX_TARGET_OS_CPP_BUILTINS();				\
+    }								\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  In the 64bit compilation we will turn this flag off in
+   ix86_option_override_internal, as we never do pcc_struct_return
+   scheme on this target.  */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* We arrange for the whole %fs segment to map the tls area.  */
+#undef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT MASK_TLS_DIRECT_SEG_REFS
+
+/* Provide a LINK_SPEC.  Here we provide support for the special GCC
+   options -static and -shared, which allow us to link things in one
+   of these three modes by applying the appropriate combinations of
+   options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux-x86-64.so.2"
+
+#if TARGET_64BIT_DEFAULT
+#define SPEC_32 "m32"
+#define SPEC_64 "!m32"
+#else
+#define SPEC_32 "!m64"
+#define SPEC_64 "m64"
+#endif
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{" SPEC_32 ":--32} %{" SPEC_64 ":--64} \
+ %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
+#undef	LINK_SPEC
+#define LINK_SPEC "%{" SPEC_64 ":-m elf_x86_64} %{" SPEC_32 ":-m elf_i386} \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      %{" SPEC_32 ":-dynamic-linker " LINUX_DYNAMIC_LINKER32 "} \
+      %{" SPEC_64 ":-dynamic-linker " LINUX_DYNAMIC_LINKER64 "}} \
+    %{static:-static}}"
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{mpc32:crtprec32.o%s} \
+   %{mpc64:crtprec64.o%s} \
+   %{mpc80:crtprec80.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#if TARGET_64BIT_DEFAULT
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
+
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* This macro may be overridden in i386/k*bsd-gnu.h.  */
+#define REG_NAME(reg) reg
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* i386 glibc provides __stack_chk_guard in %gs:0x14,
+   x86_64 glibc provides it in %fs:0x28.  */
+#define TARGET_THREAD_SSP_OFFSET	(TARGET_64BIT ? 0x28 : 0x14)
+
+/* We steal the last transactional memory word.  */
+#define TARGET_CAN_SPLIT_STACK
+#define TARGET_THREAD_SPLIT_STACK_OFFSET (TARGET_64BIT ? 0x70 : 0x30)
+#endif
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
new file mode 100644
index 000000000..954b039e5
--- /dev/null
+++ b/gcc/config/i386/lwpintrin.h
@@ -0,0 +1,100 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _LWPINTRIN_H_INCLUDED
+#define _LWPINTRIN_H_INCLUDED
+
+#ifndef __LWP__
+# error "LWP instruction set not enabled"
+#else
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__llwpcb (void *pcbAddress)
+{
+  __builtin_ia32_llwpcb (pcbAddress);
+}
+
+extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__slwpcb (void)
+{
+  return __builtin_ia32_slwpcb ();
+}
+
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+  __builtin_ia32_lwpval64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpval32(D2, D1, F) \
+  (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpval64(D2, D1, F) \
+  (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#endif
+#endif
+
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins32 (data2, data1, flags);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+{
+  return __builtin_ia32_lwpins64 (data2, data1, flags);
+}
+#endif
+#else
+#define __lwpins32(D2, D1, F) \
+  (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#ifdef __x86_64__
+#define __lwpins64(D2, D1, F) \
+  (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \
+			    (unsigned int) (F)))
+#endif
+#endif
+
+#endif /* __LWP__ */
+
+#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/lynx.h b/gcc/config/i386/lynx.h
new file mode 100644
index 000000000..df73e9b85
--- /dev/null
+++ b/gcc/config/i386/lynx.h
@@ -0,0 +1,90 @@
+/* Definitions for LynxOS on i386.
+   Copyright (C) 1993, 1995, 1996, 2002, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc. 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_VERSION fputs (" (i386/LynxOS)", stderr);
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__LITTLE_ENDIAN__");	\
+      builtin_define ("__x86__");		\
+    }						\
+  while (0)
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* BSS_SECTION_ASM_OP gets defined i386/unix.h.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* LynxOS's GDB counts the floating point registers from 16.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)						\
+  (TARGET_64BIT ? dbx64_register_map[n]					\
+   : (n) == 0 ? 0							\
+   : (n) == 1 ? 2							\
+   : (n) == 2 ? 1							\
+   : (n) == 3 ? 3							\
+   : (n) == 4 ? 6							\
+   : (n) == 5 ? 7							\
+   : (n) == 6 ? 5							\
+   : (n) == 7 ? 4							\
+   : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (int) (n) + 8	\
+   : (-1))
+  
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+    }									\
+  } while (0)
+#endif
+
+/* Undefine SUBTARGET_EXTRA_SPECS it is empty anyway.  We define it in
+   config/lynx.h.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+
+/* Undefine the definition from att.h to enable our default.  */
+
+#undef ASM_OUTPUT_ALIGN
+
+/* Undefine the definition from elfos.h to enable our default.  */
+
+#undef PREFERRED_DEBUGGING_TYPE
+
+/* The file i386.c defines TARGET_HAVE_TLS unconditionally if
+   HAVE_AS_TLS is defined.  HAVE_AS_TLS is defined as gas support for
+   TLS is detected by configure.  We undefine it here.  */
+
+#undef HAVE_AS_TLS
diff --git a/gcc/config/i386/mingw-stdint.h b/gcc/config/i386/mingw-stdint.h
new file mode 100644
index 000000000..1403737e8
--- /dev/null
+++ b/gcc/config/i386/mingw-stdint.h
@@ -0,0 +1,50 @@
+/* Definitions for <stdint.h> types on systems using mingw.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+#define INTPTR_TYPE (TARGET_64BIT ? "long long int" : "int")
+#define UINTPTR_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
diff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
new file mode 100644
index 000000000..f84434a3f
--- /dev/null
+++ b/gcc/config/i386/mingw-w64.h
@@ -0,0 +1,79 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows 32/64 via mingw-w64 runtime, using GNU tools and
+   the Windows API Library.
+   Copyright (C) 2009,
+   2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Enable -municode feature.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT} \
+  %{municode:-DUNICODE}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
+  %{!shared:%{!mdll:%{!municode:crt2%O%s}}} \
+  %{!shared:%{!mdll:%{municode:crt2u%O%s}}} \
+  %{pg:gcrt2%O%s} \
+  crtbegin.o%s"
+
+/* Enable multilib.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m32:--32} %{m64:--64}"
+
+#undef SPEC_32
+#undef SPEC_64
+#if TARGET_64BIT_DEFAULT
+#define SPEC_32 "m32"
+#define SPEC_64 "!m32"
+#else
+#define SPEC_32 "!m64"
+#define SPEC_64 "m64"
+#endif
+
+#undef SUB_LINK_ENTRY32
+#undef SUB_LINK_ENTRY64
+#define SUB_LINK_ENTRY32 "-e _DllMainCRTStartup@12"
+#if defined(USE_MINGW64_LEADING_UNDERSCORES)
+#define SUB_LINK_ENTRY64 "-e _DllMainCRTStartup"
+#else
+#define SUB_LINK_ENTRY64 "-e DllMainCRTStartup"
+#endif
+
+#undef SUB_LINK_SPEC
+#undef SUB_LINK_ENTRY
+#define SUB_LINK_SPEC "%{" SPEC_64 ":-m i386pep} %{" SPEC_32 ":-m i386pe}"
+#define SUB_LINK_ENTRY "%{" SPEC_64 ":" SUB_LINK_ENTRY64 "} %{" SPEC_32 ":" SUB_LINK_ENTRY32 "}"
+
+#undef MULTILIB_DEFAULTS
+#if TARGET_64BIT_DEFAULT
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+#undef LINK_SPEC
+#define LINK_SPEC SUB_LINK_SPEC " %{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
+  %(shared_libgcc_undefs)"
diff --git a/gcc/config/i386/mingw-w64.opt b/gcc/config/i386/mingw-w64.opt
new file mode 100644
index 000000000..965f4c0ce
--- /dev/null
+++ b/gcc/config/i386/mingw-w64.opt
@@ -0,0 +1,23 @@
+; MinGW-w64-specific options.
+
+; Copyright (C) 2009 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+municode
+Target
+Use unicode startup and define UNICODE macro
diff --git a/gcc/config/i386/mingw.opt b/gcc/config/i386/mingw.opt
new file mode 100644
index 000000000..bd9a4b630
--- /dev/null
+++ b/gcc/config/i386/mingw.opt
@@ -0,0 +1,27 @@
+; MinGW-specific options.
+
+; Copyright (C) 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Wpedantic-ms-format
+C ObjC C++ ObjC++ Var(warn_pedantic_ms_format) Init(1) Warning
+Warn about none ISO msvcrt scanf/printf width extensions
+
+fset-stack-executable
+Common Report Var(flag_setstackexecutable) Init(1) Optimization
+For nested functions on stack executable permission is set.
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
new file mode 100644
index 000000000..27da92b4e
--- /dev/null
+++ b/gcc/config/i386/mingw32.h
@@ -0,0 +1,247 @@
+/* Operating system specific defines to be used when targeting GCC for
+   hosting on Windows32, using GNU tools and the Windows32 API Library.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#if TARGET_64BIT_DEFAULT
+#define TARGET_VERSION fprintf (stderr,"(x86_64 MinGW");
+#else
+#define TARGET_VERSION fprintf (stderr," (x86 MinGW)");
+#endif
+
+/* See i386/crtdll.h for an alternative definition. _INTEGRAL_MAX_BITS
+   is for compatibility with native compiler.  */
+#define EXTRA_OS_CPP_BUILTINS()					\
+  do								\
+    {								\
+      builtin_define ("__MSVCRT__");				\
+      builtin_define ("__MINGW32__");			   	\
+      builtin_define ("_WIN32");				\
+      builtin_define_std ("WIN32");				\
+      builtin_define_std ("WINNT");				\
+      builtin_define_with_int_value ("_INTEGRAL_MAX_BITS",	\
+				     TYPE_PRECISION (intmax_type_node));\
+      if (TARGET_64BIT && ix86_abi == MS_ABI)			\
+	{							\
+	  builtin_define ("__MINGW64__");			\
+	  builtin_define_std ("WIN64");				\
+	  builtin_define ("_WIN64");				\
+	}							\
+    }								\
+  while (0)
+
+#undef SUB_LINK_ENTRY32
+#undef SUB_LINK_ENTRY64
+#define SUB_LINK_ENTRY32 "-e _DllMainCRTStartup@12"
+#if defined(USE_MINGW64_LEADING_UNDERSCORES)
+#define SUB_LINK_ENTRY64 "-e _DllMainCRTStartup"
+#else
+#define SUB_LINK_ENTRY64 "-e DllMainCRTStartup"
+#endif
+
+#undef SUB_LINK_ENTRY
+#if TARGET_64BIT_DEFAULT
+#define SUB_LINK_ENTRY SUB_LINK_ENTRY64
+#else
+#define SUB_LINK_ENTRY SUB_LINK_ENTRY32
+#endif
+
+/* Override the standard choice of /usr/include as the default prefix
+   to try when searching for header files.  */
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR "/mingw/include"
+#undef STANDARD_INCLUDE_COMPONENT
+#define STANDARD_INCLUDE_COMPONENT "MINGW"
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT}"
+
+/* For Windows applications, include more libraries, but always include
+   kernel32.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-lgmon} %{mwindows:-lgdi32 -lcomdlg32} \
+                  -ladvapi32 -lshell32 -luser32 -lkernel32"
+
+/* Weak symbols do not get resolved if using a Windows dll import lib.
+   Make the unwind registration references strong undefs.  */
+#if DWARF2_UNWIND_INFO
+/* DW2-unwind is just available for 32-bit mode.  */
+#if TARGET_64BIT_DEFAULT
+#error DW2 unwind is not available for 64-bit.
+#endif
+#define SHARED_LIBGCC_UNDEFS_SPEC \
+ "%{shared-libgcc: -u ___register_frame_info -u ___deregister_frame_info}"
+#else
+#define SHARED_LIBGCC_UNDEFS_SPEC ""
+#endif
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "shared_libgcc_undefs", SHARED_LIBGCC_UNDEFS_SPEC }
+
+#define LINK_SPEC "%{mwindows:--subsystem windows} \
+  %{mconsole:--subsystem console} \
+  %{shared: %{mdll: %eshared and mdll are not compatible}} \
+  %{shared: --shared} %{mdll:--dll} \
+  %{static:-Bstatic} %{!static:-Bdynamic} \
+  %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
+  %(shared_libgcc_undefs)"
+
+/* Include in the mingw32 libraries with libgcc */
+#ifdef ENABLE_SHARED_LIBGCC
+#define SHARED_LIBGCC_SPEC "%{shared-libgcc:-lgcc_s} %{!shared-libgcc:-lgcc_eh}"
+#else
+#define SHARED_LIBGCC_SPEC /*empty*/
+#endif
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC \
+  "%{mthreads:-lmingwthrd} -lmingw32 \
+   "SHARED_LIBGCC_SPEC" \
+   -lgcc \
+   -lmoldname -lmingwex -lmsvcrt"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
+  %{!shared:%{!mdll:crt2%O%s}} %{pg:gcrt2%O%s} \
+  crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+  crtend.o%s"
+
+/* Override startfile prefix defaults.  */
+#ifndef STANDARD_STARTFILE_PREFIX_1
+#define STANDARD_STARTFILE_PREFIX_1 "/mingw/lib/"
+#endif
+#ifndef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 ""
+#endif
+
+/* Output STRING, a string representing a filename, to FILE.
+   We canonicalize it to be in Unix format (backslashes are replaced
+   forward slashes.  */
+#undef OUTPUT_QUOTED_STRING
+#define OUTPUT_QUOTED_STRING(FILE, STRING)               \
+do {						         \
+  char c;					         \
+						         \
+  putc ('\"', asm_file);			         \
+						         \
+  while ((c = *string++) != 0)			         \
+    {						         \
+      if (c == '\\')				         \
+	c = '/';				         \
+						         \
+      if (ISPRINT (c))                                   \
+        {                                                \
+          if (c == '\"')			         \
+	    putc ('\\', asm_file);		         \
+          putc (c, asm_file);			         \
+        }                                                \
+      else                                               \
+        fprintf (asm_file, "\\%03o", (unsigned char) c); \
+    }						         \
+						         \
+  putc ('\"', asm_file);			         \
+} while (0)
+
+/* Define as short unsigned for compatibility with MS runtime.  */
+#undef WINT_TYPE
+#define WINT_TYPE "short unsigned int"
+
+/* mingw32 uses the  -mthreads option to enable thread support.  */
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS "%{fopenmp: -mthreads}"
+
+/* mingw32 atexit function is safe to use in shared libraries.  Use it
+   to register C++ static destructors.  */
+#define TARGET_CXX_USE_ATEXIT_FOR_CXA_ATEXIT hook_bool_void_true
+
+/* Contains a pointer to type target_ovr_attr defining the target specific
+   overrides of format attributes.  See c-format.h for structure
+   definition.  */
+#undef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
+#define TARGET_OVERRIDES_FORMAT_ATTRIBUTES mingw_format_attribute_overrides
+
+/* Specify the count of elements in TARGET_OVERRIDES_ATTRIBUTE.  */
+#undef TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT
+#define TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT 3
+
+/* Custom initialization for warning -Wpedantic-ms-format for c-format.  */
+#undef TARGET_OVERRIDES_FORMAT_INIT
+#define TARGET_OVERRIDES_FORMAT_INIT msformat_init
+
+/* MS specific format attributes for ms_printf, ms_scanf, ms_strftime.  */
+#undef TARGET_FORMAT_TYPES
+#define TARGET_FORMAT_TYPES mingw_format_attributes
+
+#undef TARGET_N_FORMAT_TYPES
+#define TARGET_N_FORMAT_TYPES 3
+
+/* Let defaults.h definition of TARGET_USE_JCR_SECTION apply. */
+#undef TARGET_USE_JCR_SECTION
+
+#undef MINGW_ENABLE_EXECUTE_STACK
+#define MINGW_ENABLE_EXECUTE_STACK     \
+extern void __enable_execute_stack (void *);    \
+void         \
+__enable_execute_stack (void *addr)					\
+{									\
+  MEMORY_BASIC_INFORMATION b;						\
+  if (!VirtualQuery (addr, &b, sizeof(b)))				\
+    abort ();								\
+  VirtualProtect (b.BaseAddress, b.RegionSize, PAGE_EXECUTE_READWRITE,	\
+		  &b.Protect);						\
+}
+
+#undef ENABLE_EXECUTE_STACK
+#define ENABLE_EXECUTE_STACK MINGW_ENABLE_EXECUTE_STACK
+#undef  CHECK_EXECUTE_STACK_ENABLED
+#define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
+
+#ifdef IN_LIBGCC2
+#include <windows.h>
+#endif
+
+/* For 64-bit Windows we can't use DW2 unwind info. Also for multilib
+   builds we can't use it, too.  */
+#if !TARGET_64BIT_DEFAULT && !defined (TARGET_BI_ARCH)
+#define MD_UNWIND_SUPPORT "config/i386/w32-unwind.h"
+#endif
+
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygming. */
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygwin. */
+#if DWARF2_UNWIND_INFO
+#define LIBGCC_EH_EXTN "_dw2"
+#else
+#define LIBGCC_EH_EXTN "_sjlj"
+#endif
+#define LIBGCC_SONAME "libgcc_s" LIBGCC_EH_EXTN "-1.dll"
+
+/* We should find a way to not have to update this manually.  */
+#define LIBGCJ_SONAME "libgcj" /*LIBGCC_EH_EXTN*/ "-12.dll"
+
+/* For 32-bit Windows we need valid frame-pointer for function using
+   setjmp.  */
+#undef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED \
+  (!TARGET_64BIT && cfun->calls_setjmp)
+
diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h
new file mode 100644
index 000000000..0d0735c9a
--- /dev/null
+++ b/gcc/config/i386/mm3dnow.h
@@ -0,0 +1,215 @@
+/* Copyright (C) 2004, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
+   MSVC 7.1.  */
+
+#ifndef _MM3DNOW_H_INCLUDED
+#define _MM3DNOW_H_INCLUDED
+
+#ifdef __3dNOW__
+
+#include <mmintrin.h>
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_femms (void)
+{
+  __builtin_ia32_femms();
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgusb (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2id (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfacc (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfadd (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpeq (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpge (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpgt (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmax (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmin (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmul (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcp (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit1 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit2 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqrt (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqit1 (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsub (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsubr (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fd (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhrw (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetch (void *__P)
+{
+  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchw (void *__P)
+{
+  __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_float (float __A)
+{
+  return __extension__ (__m64)(__v2sf){ __A, 0.0f };
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_float (__m64 __A)
+{
+  union { __v2sf v; float a[2]; } __tmp;
+  __tmp.v = (__v2sf)__A;
+  return __tmp.a[0];
+}
+
+#ifdef __3dNOW_A__
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2iw (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfnacc (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfpnacc (__m64 __A, __m64 __B)
+{
+  return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fw (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pswapd (__m64 __A)
+{
+  return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
+}
+
+#endif /* __3dNOW_A__ */
+#endif /* __3dNOW__ */
+
+#endif /* _MM3DNOW_H_INCLUDED */
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
new file mode 100644
index 000000000..497e22edd
--- /dev/null
+++ b/gcc/config/i386/mmintrin.h
@@ -0,0 +1,921 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+#ifndef __MMX__
+# error "MMX instruction set not enabled"
+#else
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+
+/* Internal data types for implementing the intrinsics.  */
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+/* Empty the multimedia state.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+  __builtin_ia32_emms ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_empty (void)
+{
+  _mm_empty ();
+}
+
+/* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si64 (int __i)
+{
+  return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
+}
+
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int (int __i)
+{
+  return _mm_cvtsi32_si64 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert I to a __m64 object.  */
+
+/* Intel intrinsic.  */
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_m64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi64x (long long __i)
+{
+  return (__m64) __i;
+}
+#endif
+
+/* Convert the lower 32 bits of the __m64 object into an integer.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si32 (__m64 __i)
+{
+  return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int (__m64 __i)
+{
+  return _mm_cvtsi64_si32 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert the __m64 object to a 64bit integer.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int64 (__m64 __i)
+{
+  return (long long)__i;
+}
+
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtm64_si64 (__m64 __i)
+{
+  return (long long)__i;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si64x (__m64 __i)
+{
+  return (long long)__i;
+}
+#endif
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with signed saturation.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packsswb (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pi16 (__m1, __m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with signed saturation.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packssdw (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pi32 (__m1, __m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with unsigned saturation.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packuswb (__m64 __m1, __m64 __m2)
+{
+  return _mm_packs_pu16 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+   8-bit values from the high half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhbw (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+   16-bit values from the high half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+   value from the high half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhdq (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpackhi_pi32 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+   8-bit values from the low half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklbw (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+   16-bit values from the low half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+   value from the low half of M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckldq (__m64 __m1, __m64 __m2)
+{
+  return _mm_unpacklo_pi32 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddb (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddw (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi16 (__m1, __m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddd (__m64 __m1, __m64 __m2)
+{
+  return _mm_add_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
+#ifdef __SSE2__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
+}
+#endif
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsb (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsw (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pi16 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusb (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pu8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+   saturated arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusw (__m64 __m1, __m64 __m2)
+{
+  return _mm_adds_pu16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubb (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubw (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi16 (__m1, __m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubd (__m64 __m1, __m64 __m2)
+{
+  return _mm_sub_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
+#ifdef __SSE2__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
+}
+#endif
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+   saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsb (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   signed saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsw (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pi16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+   unsigned saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusb (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pu8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   unsigned saturating arithmetic.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusw (__m64 __m1, __m64 __m2)
+{
+  return _mm_subs_pu16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaddwd (__m64 __m1, __m64 __m2)
+{
+  return _mm_madd_pi16 (__m1, __m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhw (__m64 __m1, __m64 __m2)
+{
+  return _mm_mulhi_pi16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+   the low 16 bits of the results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmullw (__m64 __m1, __m64 __m2)
+{
+  return _mm_mullo_pi16 (__m1, __m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllw (__m64 __m, __m64 __count)
+{
+  return _mm_sll_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllwi (__m64 __m, int __count)
+{
+  return _mm_slli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslld (__m64 __m, __m64 __count)
+{
+  return _mm_sll_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslldi (__m64 __m, int __count)
+{
+  return _mm_slli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllq (__m64 __m, __m64 __count)
+{
+  return _mm_sll_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllqi (__m64 __m, int __count)
+{
+  return _mm_slli_si64 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psraw (__m64 __m, __m64 __count)
+{
+  return _mm_sra_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrawi (__m64 __m, int __count)
+{
+  return _mm_srai_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrad (__m64 __m, __m64 __count)
+{
+  return _mm_sra_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psradi (__m64 __m, int __count)
+{
+  return _mm_srai_pi32 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlw (__m64 __m, __m64 __count)
+{
+  return _mm_srl_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlwi (__m64 __m, int __count)
+{
+  return _mm_srli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrld (__m64 __m, __m64 __count)
+{
+  return _mm_srl_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrldi (__m64 __m, int __count)
+{
+  return _mm_srli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlq (__m64 __m, __m64 __count)
+{
+  return _mm_srl_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlqi (__m64 __m, int __count)
+{
+  return _mm_srli_si64 (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_pand (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pand (__m64 __m1, __m64 __m2)
+{
+  return _mm_and_si64 (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+   64-bit value in M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_pandn (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pandn (__m64 __m1, __m64 __m2)
+{
+  return _mm_andnot_si64 (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_por (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_por (__m64 __m1, __m64 __m2)
+{
+  return _mm_or_si64 (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_ia32_pxor (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pxor (__m64 __m1, __m64 __m2)
+{
+  return _mm_xor_si64 (__m1, __m2);
+}
+
+/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
+   test is true and zero if false.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqb (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi8 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtb (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi8 (__m1, __m2);
+}
+
+/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
+   the test is true and zero if false.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqw (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi16 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtw (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi16 (__m1, __m2);
+}
+
+/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
+   the test is true and zero if false.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqd (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpeq_pi32 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtd (__m64 __m1, __m64 __m2)
+{
+  return _mm_cmpgt_pi32 (__m1, __m2);
+}
+
+/* Creates a 64-bit zero.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si64 (void)
+{
+  return (__m64)0LL;
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi32 (int __i1, int __i0)
+{
+  return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+  return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+	     char __b3, char __b2, char __b1, char __b0)
+{
+  return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
+					       __b4, __b5, __b6, __b7);
+}
+
+/* Similar, but with the arguments in reverse order.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi32 (int __i0, int __i1)
+{
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+	      char __b4, char __b5, char __b6, char __b7)
+{
+  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi32 (int __i)
+{
+  return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi16 (short __w)
+{
+  return _mm_set_pi16 (__w, __w, __w, __w);
+}
+
+/* Creates a vector of eight 8-bit values, all elements containing B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi8 (char __b)
+{
+  return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
+}
+
+#endif /* __MMX__ */
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
new file mode 100644
index 000000000..ca3762286
--- /dev/null
+++ b/gcc/config/i386/mmx.md
@@ -0,0 +1,1716 @@
+;; GCC machine description for MMX and 3dNOW! instructions
+;; Copyright (C) 2005, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The MMX and 3dNOW! patterns are in the same file because they use
+;; the same register file, and 3dNOW! adds a number of extensions to
+;; the base integer MMX isa.
+
+;; Note!  Except for the basic move instructions, *all* of these
+;; patterns are outside the normal optabs namespace.  This is because
+;; use of these registers requires the insertion of emms or femms
+;; instructions to return to normal fpu mode.  The compiler doesn't
+;; know how to do that itself, which means it's up to the user.  Which
+;; means that we should never use any of these patterns except at the
+;; direction of the user via a builtin.
+
+;; 8 byte integral modes handled by MMX (and by extension, SSE)
+(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+
+;; All 8-byte vector modes handled by MMX
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
+
+;; Mix-n-match
+(define_mode_iterator MMXMODE12 [V8QI V4HI])
+(define_mode_iterator MMXMODE24 [V4HI V2SI])
+(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for MMX as well as 3dNOW.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "")
+	(match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+;; movd instead of movq is required to handle broken assemblers.
+(define_insn "*mov<mode>_internal_rex64"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+	 "=rm,r,!?y,!y,!?y,m  ,!y ,*Y2,x,x ,m,r ,Yi")
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
+	 "Cr ,m,C  ,!y,m  ,!?y,*Y2,!y ,C,xm,x,Yi,r"))]
+  "TARGET_64BIT && TARGET_MMX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    mov{q}\t{%1, %0|%0, %1}
+    mov{q}\t{%1, %0|%0, %1}
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    %vpxor\t%0, %d0
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovd\t{%1, %0|%0, %1}
+    %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,*,mmx,mmx,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,*,*,1,1,*,1,*,*,*")
+   (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,*,*,1,1,1")
+   (set (attr "prefix_rex")
+     (if_then_else (eq_attr "alternative" "9,10")
+       (symbol_ref "x86_extended_reg_mentioned_p (insn)")
+       (const_string "*")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "8,9,10,11,12")
+       (const_string "maybe_vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI")])
+
+(define_insn "*mov<mode>_internal_avx"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C  ,*Y2m,*Y2,irm,r"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vpxor\t%0, %0, %0
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}
+    #
+    #"
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,r  ,m")
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C  ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
+  "TARGET_MMX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    xorps\t%0, %0
+    movaps\t{%1, %0|%0, %1}
+    movlps\t{%1, %0|%0, %1}
+    movlps\t{%1, %0|%0, %1}
+    #
+    #"
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,1,*,*,*,*,*,*,*")
+   (set_attr "prefix_data16" "*,*,*,*,*,*,*,*,1,*,*,*,*,*,*")
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (V2SFmode, operands);
+  DONE;
+})
+
+(define_insn "*movv2sf_internal_rex64_avx"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"
+	 "=rm,r,!?y,!y,!?y,m  ,!y,Y2,x,x,x,m,r,x")
+        (match_operand:V2SF 1 "vector_move_operand"
+	 "Cr ,m,C  ,!y,m  ,!?y,Y2,!y,C,x,m,x,x,r"))]
+  "TARGET_64BIT && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    mov{q}\t{%1, %0|%0, %1}
+    mov{q}\t{%1, %0|%0, %1}
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vxorps\t%0, %0, %0
+    vmovaps\t{%1, %0|%0, %1}
+    vmovlps\t{%1, %0, %0|%0, %0, %1}
+    vmovlps\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,*,*,1,1,*,*,*,*,*,*")
+   (set_attr "length_vex" "*,*,*,*,*,*,*,*,*,*,*,*,4,4")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "8,9,10,11,12,13")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+;; movd instead of movq is required to handle broken assemblers.
+(define_insn "*movv2sf_internal_rex64"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"
+	 "=rm,r,!?y,!y,!?y,m  ,!y ,*Y2,x,x,x,m,r ,Yi")
+        (match_operand:V2SF 1 "vector_move_operand"
+	 "Cr ,m,C  ,!y,m  ,!?y,*Y2,!y ,C,x,m,x,Yi,r"))]
+  "TARGET_64BIT && TARGET_MMX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    mov{q}\t{%1, %0|%0, %1}
+    mov{q}\t{%1, %0|%0, %1}
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    xorps\t%0, %0
+    movaps\t{%1, %0|%0, %1}
+    movlps\t{%1, %0|%0, %1}
+    movlps\t{%1, %0|%0, %1}
+    movd\t{%1, %0|%0, %1}
+    movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,*,*,1,1,*,*,*,*,*,*")
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_insn "*movv2sf_internal_avx"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
+        (match_operand:V2SF 1 "vector_move_operand"
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vxorps\t%0, %0, %0
+    vmovaps\t{%1, %0|%0, %1}
+    vmovlps\t{%1, %0, %0|%0, %0, %1}
+    vmovlps\t{%1, %0|%0, %1}
+    #
+    #"
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "6,7,8,9")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_insn "*movv2sf_internal"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"
+	 "=!?y,!y,!?y,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
+        (match_operand:V2SF 1 "vector_move_operand"
+	 "C   ,!y,m  ,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+  "TARGET_MMX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    xorps\t%0, %0
+    movaps\t{%1, %0|%0, %1}
+    movlps\t{%1, %0|%0, %1}
+    movlps\t{%1, %0|%0, %1}
+    #
+    #"
+  [(set_attr "type" "mmx,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set_attr "prefix_rep" "*,*,*,*,1,1,*,*,*,*,*,*")
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+;; %%% This multiword shite has got to go.
+(define_split
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+        (match_operand:MMXMODE 1 "general_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]))
+   && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_expand "push<mode>1"
+  [(match_operand:MMXMODE 0 "register_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_push (<MODE>mode, operands[0]);
+  DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+	(match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "sse_movntdi"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+		   UNSPEC_MOVNT))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "movntq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxmov")
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+	(plus:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" "")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+
+(define_insn "*mmx_addv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+  "pfadd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+        (minus:V2SF (match_operand:V2SF 1 "register_operand" "")
+		    (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW")
+
+(define_expand "mmx_subrv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+        (minus:V2SF (match_operand:V2SF 2 "register_operand" "")
+		    (match_operand:V2SF 1 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW")
+
+(define_insn "*mmx_subv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,y")
+        (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
+		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
+  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   pfsub\t{%2, %0|%0, %2}
+   pfsubr\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+
+(define_insn "*mmx_mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+  "pfmul\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
+
+(define_expand "mmx_<code>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" "")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+{
+  if (!flag_finite_math_only)
+    operands[1] = force_reg (V2SFmode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+})
+
+(define_insn "*mmx_<code>v2sf3_finite"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && flag_finite_math_only
+   && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+  "pf<maxmin_float>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "*mmx_<code>v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (smaxmin:V2SF
+	  (match_operand:V2SF 1 "register_operand" "0")
+	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf<maxmin_float>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRCP))]
+  "TARGET_3DNOW"
+  "pfrcp\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRCPIT1))]
+  "TARGET_3DNOW"
+  "pfrcpit1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit2v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRCPIT2))]
+  "TARGET_3DNOW"
+  "pfrcpit2\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqrtv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRSQRT))]
+  "TARGET_3DNOW"
+  "pfrsqrt\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqit1v2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PFRSQIT1))]
+  "TARGET_3DNOW"
+  "pfrsqit1\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmx")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_haddv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_concat:V2SF
+	  (plus:SF
+	    (vec_select:SF
+	      (match_operand:V2SF 1 "register_operand" "0")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	  (plus:SF
+            (vec_select:SF
+	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW"
+  "pfacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_hsubv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_concat:V2SF
+	  (minus:SF
+	    (vec_select:SF
+	      (match_operand:V2SF 1 "register_operand" "0")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	  (minus:SF
+            (vec_select:SF
+	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int  0)]))
+	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A"
+  "pfnacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_addsubv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+        (vec_merge:V2SF
+          (plus:V2SF
+            (match_operand:V2SF 1 "register_operand" "0")
+            (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
+          (minus:V2SF (match_dup 1) (match_dup 2))
+          (const_int 1)))]
+  "TARGET_3DNOW_A"
+  "pfpnacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "")
+	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
+
+(define_insn "*mmx_eqv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
+  "pfcmpeq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gtv2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpgt\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gev2sf3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pfcmpge\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pf2id"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pf2id\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pf2iw"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(sign_extend:V2SI
+	  (ss_truncate:V2HI
+	    (fix:V2SI
+	      (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pf2iw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pi2fw"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(float:V2SF
+	  (sign_extend:V2SI
+	    (truncate:V2HI
+	      (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
+  "TARGET_3DNOW_A"
+  "pi2fw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_floatv2si2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_3DNOW"
+  "pi2fd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pswapdv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+			 (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
+
+(define_insn "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
+	(vec_duplicate:V2SF
+	  (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_MMX"
+  "punpckldq\t%0, %0"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2sf"
+  [(set (match_operand:V2SF 0 "register_operand"     "=y,y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
+	  (match_operand:SF 2 "vector_move_operand"  "ym,C")))]
+  "TARGET_MMX && !TARGET_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt,mmxmov")
+   (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2sf"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand:SF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn_and_split "*vec_extractv2sf_0"
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x, m,y ,m,f,r")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (SFmode, REGNO (op1));
+  else
+    op1 = gen_lowpart (SFmode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_extractv2sf_1"
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,y,x,f,r")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   punpckhdq\t%0, %0
+   unpckhps\t%0, %0
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov")
+   (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(vec_select:SF
+	  (match_operand:V2SF 1 "memory_operand" "")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && reload_completed"
+  [(const_int 0)]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 4);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_extractv2sf"
+  [(match_operand:SF 0 "register_operand" "")
+   (match_operand:V2SF 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "")
+	(plusminus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "")))]
+  "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+        (plusminus:MMXMODEI8
+	  (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODE12 0 "register_operand" "")
+	(sat_plusminus:MMXMODE12
+	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "")
+	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<plusminus_insn><mode>3"
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+        (sat_plusminus:MMXMODE12
+	  (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmullw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand" ""))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand" "")))
+	    (const_int 16))))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	    (const_int 16))))]
+  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand" ""))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand" "")))
+	    (const_int 16))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	  (const_int 16))))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhuw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "")
+        (plus:V2SI
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 1 "nonimmediate_operand" "")
+		(parallel [(const_int 0) (const_int 2)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "")
+		(parallel [(const_int 0) (const_int 2)]))))
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)]))))))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmaddwd"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (plus:V2SI
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 1 "nonimmediate_operand" "%0")
+		(parallel [(const_int 0) (const_int 2)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0) (const_int 2)]))))
+	  (mult:V2SI
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)])))
+	    (sign_extend:V2SI
+	      (vec_select:V2HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)]))))))]
+  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmaddwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (mult:V4SI
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand" ""))
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand" "")))
+	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
+				  (const_int 32768) (const_int 32768)]))
+	    (const_int 16))))]
+  "TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmulhrwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (mult:V4SI
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+	        (sign_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
+				  (const_int 32768) (const_int 32768)]))
+	    (const_int 16))))]
+  "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhrw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(define_expand "sse2_umulv1siv1di3"
+  [(set (match_operand:V1DI 0 "register_operand" "")
+        (mult:V1DI
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 1 "nonimmediate_operand" "")
+	      (parallel [(const_int 0)])))
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 2 "nonimmediate_operand" "")
+	      (parallel [(const_int 0)])))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
+
+(define_insn "*sse2_umulv1siv1di3"
+  [(set (match_operand:V1DI 0 "register_operand" "=y")
+        (mult:V1DI
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+	      (parallel [(const_int 0)])))
+	  (zero_extend:V1DI
+	    (vec_select:V1SI
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int 0)])))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+  "pmuludq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxmul")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+        (smaxmin:V4HI
+	  (match_operand:V4HI 1 "nonimmediate_operand" "")
+	  (match_operand:V4HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
+
+(define_insn "*mmx_<code>v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (smaxmin:V4HI
+	  (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
+  "p<maxmin_int>w\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "")
+        (umaxmin:V8QI
+	  (match_operand:V8QI 1 "nonimmediate_operand" "")
+	  (match_operand:V8QI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
+
+(define_insn "*mmx_<code>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (umaxmin:V8QI
+	  (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
+  "p<maxmin_int>b\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashr<mode>3"
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+        (ashiftrt:MMXMODE24
+	  (match_operand:MMXMODE24 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+  "TARGET_MMX"
+  "psra<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_lshr<mode>3"
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (lshiftrt:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+  "TARGET_MMX"
+  "psrl<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashl<mode>3"
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+        (ashift:MMXMODE248
+	  (match_operand:MMXMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "yN")))]
+  "TARGET_MMX"
+  "psll<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_eq<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "")
+        (eq:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*mmx_eq<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+        (eq:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_gt<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+        (gt:MMXMODEI
+	  (match_operand:MMXMODEI 1 "register_operand" "0")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcmp")
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_andnot<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+	(and:MMXMODEI
+	  (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX"
+  "pandn\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code><mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "")
+	(any_logic:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_MMX"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<code><mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+        (any_logic:MMXMODEI
+	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+	  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+  "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<logic>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_packsswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 1 "register_operand" "0"))
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_MMX"
+  "packsswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_packssdw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 1 "register_operand" "0"))
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_MMX"
+  "packssdw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_packuswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_concat:V8QI
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 1 "register_operand" "0"))
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+  "TARGET_MMX"
+  "packuswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "0")
+	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 4) (const_int 12)
+                     (const_int 5) (const_int 13)
+                     (const_int 6) (const_int 14)
+                     (const_int 7) (const_int 15)])))]
+  "TARGET_MMX"
+  "punpckhbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "register_operand" "0")
+	    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 0) (const_int 8)
+                     (const_int 1) (const_int 9)
+                     (const_int 2) (const_int 10)
+                     (const_int 3) (const_int 11)])))]
+  "TARGET_MMX"
+  "punpcklbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "0")
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 2) (const_int 6)
+                     (const_int 3) (const_int 7)])))]
+  "TARGET_MMX"
+  "punpckhwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklwd"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "register_operand" "0")
+	    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+          (parallel [(const_int 0) (const_int 4)
+                     (const_int 1) (const_int 5)])))]
+  "TARGET_MMX"
+  "punpcklwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhdq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "register_operand" "0")
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_MMX"
+  "punpckhdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckldq"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "register_operand" "0")
+	    (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_MMX"
+  "punpckldq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+        (vec_merge:V4HI
+          (vec_duplicate:V4HI
+            (match_operand:SI 2 "nonimmediate_operand" ""))
+	  (match_operand:V4HI 1 "register_operand" "")
+          (match_operand:SI 3 "const_0_to_3_operand" "")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[2] = gen_lowpart (HImode, operands[2]);
+  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
+
+(define_insn "*mmx_pinsrw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_merge:V4HI
+          (vec_duplicate:V4HI
+            (match_operand:HI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V4HI 1 "register_operand" "0")
+          (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  if (MEM_P (operands[2]))
+    return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+    return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_pextrw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extend:SI
+	  (vec_select:HI
+	    (match_operand:V4HI 1 "register_operand" "y")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_pshufw"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V4HI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
+                               GEN_INT ((mask >> 0) & 3),
+                               GEN_INT ((mask >> 2) & 3),
+                               GEN_INT ((mask >> 4) & 3),
+                               GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "mmx_pshufw_1"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (vec_select:V4HI
+          (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+          (parallel [(match_operand 2 "const_0_to_3_operand" "")
+                     (match_operand 3 "const_0_to_3_operand" "")
+                     (match_operand 4 "const_0_to_3_operand" "")
+                     (match_operand 5 "const_0_to_3_operand" "")])))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_pswapdv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+	  (parallel [(const_int 1) (const_int 0)])))]
+  "TARGET_3DNOW_A"
+  "pswapd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_duplicate:V4HI
+	  (truncate:HI
+	    (match_operand:SI 1 "register_operand" "0"))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pshufw\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_duplicate:V2SI
+	  (match_operand:SI 1 "register_operand" "0")))]
+  "TARGET_MMX"
+  "punpckldq\t%0, %0"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2si"
+  [(set (match_operand:V2SI 0 "register_operand"     "=y,y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
+	  (match_operand:SI 2 "vector_move_operand"  "ym,C")))]
+  "TARGET_MMX && !TARGET_SSE"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt,mmxmov")
+   (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2si"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn_and_split "*vec_extractv2si_0"
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,m,y, m,r")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (SImode, REGNO (op1));
+  else
+    op1 = gen_lowpart (SImode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_extractv2si_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,Y2,Y2,x,y,x,r")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Y2,0,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   punpckhdq\t%0, %0
+   punpckhdq\t%0, %0
+   pshufd\t{$85, %1, %0|%0, %1, 85}
+   unpckhps\t%0, %0
+   #
+   #
+   #"
+  [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov")
+   (set_attr "length_immediate" "*,*,1,*,*,*,*")
+   (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(vec_select:SI
+	  (match_operand:V2SI 1 "memory_operand" "")
+	  (parallel [(const_int 1)])))]
+  "TARGET_MMX && reload_completed"
+  [(const_int 0)]
+{
+  operands[1] = adjust_address (operands[1], SImode, 4);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_extractv2si"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2SI 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv2si"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_setv4hi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extractv4hi"
+  [(match_operand:HI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv4hi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_setv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:QI 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extractv8qi"
+  [(match_operand:QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "")
+	(truncate:V8QI
+	  (lshiftrt:V8HI
+	    (plus:V8HI
+	      (plus:V8HI
+		(zero_extend:V8HI
+		  (match_operand:V8QI 1 "nonimmediate_operand" ""))
+		(zero_extend:V8HI
+		  (match_operand:V8QI 2 "nonimmediate_operand" "")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE || TARGET_3DNOW"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
+
+(define_insn "*mmx_uavgv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(truncate:V8QI
+	  (lshiftrt:V8HI
+	    (plus:V8HI
+	      (plus:V8HI
+		(zero_extend:V8HI
+		  (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+		(zero_extend:V8HI
+		  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "(TARGET_SSE || TARGET_3DNOW)
+   && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
+{
+  /* These two instructions have the same operation, but their encoding
+     is different.  Prefer the one that is de facto standard.  */
+  if (TARGET_SSE || TARGET_3DNOW_A)
+    return "pavgb\t{%2, %0|%0, %2}";
+  else
+    return "pavgusb\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "mmxshft")
+   (set (attr "prefix_extra")
+     (if_then_else
+       (eq (symbol_ref "(TARGET_SSE || TARGET_3DNOW_A)") (const_int 0))
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (plus:V4SI
+		(zero_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand" ""))
+		(zero_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand" "")))
+	      (const_vector:V4SI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
+
+(define_insn "*mmx_uavgv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (plus:V4SI
+		(zero_extend:V4SI
+		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		(zero_extend:V4SI
+		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+	      (const_vector:V4SI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "(TARGET_SSE || TARGET_3DNOW_A)
+   && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
+  "pavgw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_psadbw"
+  [(set (match_operand:V1DI 0 "register_operand" "=y")
+        (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PSADBW))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "psadbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxshft")
+   (set_attr "mode" "DI")])
+
+(define_insn "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+		   UNSPEC_MOVMSK))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "pmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_maskmovq"
+  [(set (match_operand:V8QI 0 "memory_operand" "")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "")
+		      (match_operand:V8QI 2 "register_operand" "")
+		      (match_dup 0)]
+		     UNSPEC_MASKMOV))]
+  "TARGET_SSE || TARGET_3DNOW_A")
+
+(define_insn "*mmx_maskmovq"
+  [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")
+		      (mem:V8QI (match_dup 0))]
+		     UNSPEC_MASKMOV))]
+  "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovq\t{%2, %1|%1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_insn "*mmx_maskmovq_rex"
+  [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D"))
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+		      (match_operand:V8QI 2 "register_operand" "y")
+		      (mem:V8QI (match_dup 0))]
+		     UNSPEC_MASKMOV))]
+  "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "maskmovq\t{%2, %1|%1, %2}"
+  [(set_attr "type" "mmxcvt")
+   (set_attr "mode" "DI")])
+
+(define_expand "mmx_emms"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_MMX"
+{
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (17));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_EMMS);
+
+  for (regno = 0; regno < 8; regno++)
+    {
+      XVECEXP (operands[0], 0, regno + 1)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (XFmode, FIRST_STACK_REG + regno));
+
+      XVECEXP (operands[0], 0, regno + 9)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (DImode, FIRST_MMX_REG + regno));
+    }
+})
+
+(define_insn "*mmx_emms"
+  [(match_parallel 0 "emms_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)])]
+  "TARGET_MMX"
+  "emms"
+  [(set_attr "type" "mmx")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")])
+
+(define_expand "mmx_femms"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_3DNOW"
+{
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (17));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_FEMMS);
+
+  for (regno = 0; regno < 8; regno++)
+    {
+      XVECEXP (operands[0], 0, regno + 1)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (XFmode, FIRST_STACK_REG + regno));
+
+      XVECEXP (operands[0], 0, regno + 9)
+	= gen_rtx_CLOBBER (VOIDmode,
+			   gen_rtx_REG (DImode, FIRST_MMX_REG + regno));
+    }
+})
+
+(define_insn "*mmx_femms"
+  [(match_parallel 0 "emms_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)])]
+  "TARGET_3DNOW"
+  "femms"
+  [(set_attr "type" "mmx")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")])
diff --git a/gcc/config/i386/msformat-c.c b/gcc/config/i386/msformat-c.c
new file mode 100644
index 000000000..513952e86
--- /dev/null
+++ b/gcc/config/i386/msformat-c.c
@@ -0,0 +1,197 @@
+/* Check calls to formatted I/O functions (-Wformat).
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2002, 2003, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "flags.h"
+#include "c-family/c-common.h"
+#include "intl.h"
+#include "diagnostic.h"
+#include "langhooks.h"
+#include "c-family/c-format.h"
+#include "alloc-pool.h"
+
+/* Mingw specific format attributes ms_printf, ms_scanf, and ms_strftime.  */
+
+static format_length_info ms_printf_length_specs[] =
+{
+  { "h", FMT_LEN_h, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 },
+  { "l", FMT_LEN_l, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 },
+  { "I32", FMT_LEN_l, STD_EXT, NULL, FMT_LEN_none, STD_C89, 1 },
+  { "I64", FMT_LEN_ll, STD_EXT, NULL, FMT_LEN_none, STD_C89, 1 },
+  { "I", FMT_LEN_L, STD_EXT, NULL, FMT_LEN_none, STD_C89, 1 },
+  { NULL, FMT_LEN_none, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 }
+};
+
+static const format_flag_spec ms_printf_flag_specs[] =
+{
+  { ' ',  0, 0, N_("' ' flag"),        N_("the ' ' printf flag"),              STD_C89 },
+  { '+',  0, 0, N_("'+' flag"),        N_("the '+' printf flag"),              STD_C89 },
+  { '#',  0, 0, N_("'#' flag"),        N_("the '#' printf flag"),              STD_C89 },
+  { '0',  0, 0, N_("'0' flag"),        N_("the '0' printf flag"),              STD_C89 },
+  { '-',  0, 0, N_("'-' flag"),        N_("the '-' printf flag"),              STD_C89 },
+  { '\'', 0, 0, N_("''' flag"),        N_("the ''' printf flag"),              STD_EXT },
+  { 'w',  0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
+  { 'p',  0, 0, N_("precision"),       N_("precision in printf format"),       STD_C89 },
+  { 'L',  0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+static const format_flag_pair ms_printf_flag_pairs[] =
+{
+  { ' ', '+', 1, 0   },
+  { '0', '-', 1, 0   }, { '0', 'p', 1, 'i' },
+  { 0, 0, 0, 0 }
+};
+
+static const format_flag_spec ms_scanf_flag_specs[] =
+{
+  { '*',  0, 0, N_("assignment suppression"), N_("the assignment suppression scanf feature"), STD_C89 },
+  { 'a',  0, 0, N_("'a' flag"),               N_("the 'a' scanf flag"),                       STD_EXT },
+  { 'w',  0, 0, N_("field width"),            N_("field width in scanf format"),              STD_C89 },
+  { 'L',  0, 0, N_("length modifier"),        N_("length modifier in scanf format"),          STD_C89 },
+  { '\'', 0, 0, N_("''' flag"),               N_("the ''' scanf flag"),                       STD_EXT },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+static const format_flag_pair ms_scanf_flag_pairs[] =
+{
+  { '*', 'L', 0, 0 },
+  { 0, 0, 0, 0 }
+};
+
+static const format_flag_spec ms_strftime_flag_specs[] =
+{
+  { '#', 0,   0, N_("'#' flag"),     N_("the '#' strftime flag"),          STD_EXT },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+static const format_flag_pair ms_strftime_flag_pairs[] =
+{
+  { 0, 0, 0, 0 }
+};
+
+static const format_char_info ms_print_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "di",  0, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  T99_SST,  BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "-wp0 +'",  "i",  NULL },
+  { "oxX", 0, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "-wp0#",     "i",  NULL },
+  { "u",   0, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "-wp0'",    "i",  NULL },
+  { "fgG", 0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN }, "-wp0 +#'", "",   NULL },
+  { "eE",  0, STD_C89, { T89_D,   BADLEN,  BADLEN,  T99_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN }, "-wp0 +#",  "",   NULL },
+  { "c",   0, STD_C89, { T89_I,   BADLEN,  T89_S,  T94_WI,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
+  { "s",   1, STD_C89, { T89_C,   BADLEN,  T89_S,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "cR", NULL },
+  { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "c",  NULL },
+  { "n",   1, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  BADLEN,  BADLEN, BADLEN,  T99_IM,  BADLEN,  BADLEN,  BADLEN }, "",          "W",  NULL },
+  /* X/Open conversion specifiers.  */
+  { "C",   0, STD_EXT, { TEX_WI,  BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-w",        "",   NULL },
+  { "S",   1, STD_EXT, { TEX_W,   BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "-wp",       "R",  NULL },
+  { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+static const format_char_info ms_scan_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "di",    1, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  T99_SST,  BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w'", "W",   NULL },
+  { "u",     1, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN,  BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "*w'", "W",   NULL },
+  { "oxX",   1, STD_C89, { T89_UI,  BADLEN,  T89_US,  T89_UL,  T9L_ULL, T99_ST, BADLEN,  BADLEN, BADLEN, BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
+  { "efgEG", 1, STD_C89, { T89_F,   BADLEN,  BADLEN,  T89_D,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN, BADLEN, BADLEN }, "*w'",  "W",   NULL },
+  { "c",     1, STD_C89, { T89_C,   BADLEN,  T89_S,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "cW",  NULL },
+  { "s",     1, STD_C89, { T89_C,   BADLEN,  T89_S,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*aw",  "cW",  NULL },
+  { "[",     1, STD_C89, { T89_C,   BADLEN,  BADLEN,  T94_W,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*aw",  "cW[", NULL },
+  { "p",     2, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
+  { "n",     1, STD_C89, { T89_I,   BADLEN,  T89_S,   T89_L,   T9L_LL,  BADLEN,  BADLEN, BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "",     "W",   NULL },
+  /* X/Open conversion specifiers.  */
+  { "C",     1, STD_EXT, { TEX_W,   BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*w",   "W",   NULL },
+  { "S",     1, STD_EXT, { TEX_W,   BADLEN,  T89_S,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN }, "*aw",  "W",   NULL },
+  { NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+static const format_char_info ms_time_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "ABZab",		0, STD_C89, NOLENGTHS, "#",     "",   NULL },
+  { "cx",		0, STD_C89, NOLENGTHS, "#",      "3",  NULL },
+  { "HIMSUWdmw",	0, STD_C89, NOLENGTHS, "#",  "",   NULL },
+  { "j",		0, STD_C89, NOLENGTHS, "#",  "",  NULL },
+  { "p",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
+  { "X",		0, STD_C89, NOLENGTHS, "#",      "",   NULL },
+  { "y",		0, STD_C89, NOLENGTHS, "#", "4",  NULL },
+  { "Y",		0, STD_C89, NOLENGTHS, "#", "",  NULL },
+  { "%",		0, STD_C89, NOLENGTHS, "",       "",   NULL },
+  /* C99 conversion specifiers.  */
+  { "z",		0, STD_C99, NOLENGTHS, "#",      "",  NULL },
+  { NULL,		0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+EXPORTED_CONST format_kind_info mingw_format_attributes[3] =
+{
+  { "ms_printf",   ms_printf_length_specs,  ms_print_char_table, " +#0-'", NULL,
+    ms_printf_flag_specs, ms_printf_flag_pairs,
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK,
+    'w', 0, 'p', 0, 'L', 0,
+    &integer_type_node, &integer_type_node
+  },
+  { "ms_scanf",    ms_printf_length_specs,   ms_scan_char_table,  "*'", NULL,
+    ms_scanf_flag_specs, ms_scanf_flag_pairs,
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_SCANF_A_KLUDGE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_ZERO_WIDTH_BAD|FMT_FLAG_DOLLAR_GAP_POINTER_OK,
+    'w', 0, 0, '*', 'L', 0,
+    NULL, NULL
+  },
+  { "ms_strftime", NULL,                 ms_time_char_table,  "", "#",
+    ms_strftime_flag_specs, ms_strftime_flag_pairs,
+    FMT_FLAG_FANCY_PERCENT_OK, 0, 0, 0, 0, 0, 0,
+    NULL, NULL
+  }
+};
+
+/* Default overrides for printf, scanf and strftime.  */
+EXPORTED_CONST target_ovr_attr mingw_format_attribute_overrides[4] =
+{
+  { "ms_printf", "printf" },
+  { "ms_scanf", "scanf" },
+  { "ms_strftime", "strftime" }
+};
+
+/* Setup for option Wpedantic-ms-format.  */
+
+#ifdef TARGET_OVERRIDES_FORMAT_INIT
+
+/* Make sure TARGET_OVERRIDES_FORMAT_INIT is prototyped.  */
+extern void TARGET_OVERRIDES_FORMAT_INIT (void);
+
+/* Helper.  */
+#define C89_OR_EXT (warn_pedantic_ms_format ? STD_EXT : STD_C89)
+
+void
+TARGET_OVERRIDES_FORMAT_INIT (void)
+{
+  ms_printf_length_specs[2].std = C89_OR_EXT; /* I32 */
+  ms_printf_length_specs[3].std = C89_OR_EXT; /* I64 */
+  ms_printf_length_specs[4].std = C89_OR_EXT; /* I */
+}
+
+#undef C89_OR_EXT
+
+#endif
diff --git a/gcc/config/i386/netbsd-elf.h b/gcc/config/i386/netbsd-elf.h
new file mode 100644
index 000000000..264d290a3
--- /dev/null
+++ b/gcc/config/i386/netbsd-elf.h
@@ -0,0 +1,124 @@
+/* Definitions of target machine for GCC,
+   for i386/ELF NetBSD systems.
+   Copyright (C) 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by matthew green <mrg@eterna.com.au>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+
+/* Extra specs needed for NetBSD/i386 ELF.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  { "netbsd_cpp_spec", NETBSD_CPP_SPEC },	\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/i386 ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#define NETBSD_ENTRY_POINT "__start"
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
+
+
+/* Output assembler code to FILE to call the profiler.  */
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+{									\
+  if (flag_pic)								\
+    fprintf (FILE, "\tcall __mcount@PLT\n");				\
+  else									\
+    fprintf (FILE, "\tcall __mcount\n");				\
+}
+
+
+#undef HAS_INIT_SECTION
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+  do { assemble_name(FILE, NAME1); 	 \
+       fputs(" = ", FILE);		 \
+       assemble_name(FILE, NAME2);	 \
+       fputc('\n', FILE); } while (0)
+
+/* A C statement to output to the stdio stream FILE an assembler
+   command to advance the location counter to a multiple of 1<<LOG
+   bytes if it is within MAX_SKIP bytes.
+
+   This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)			\
+  if ((LOG) != 0) {							\
+    if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+    else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+  }
+#endif
+
+/* We always use gas here, so we don't worry about ECOFF assembler
+   problems.  */
+#undef TARGET_GAS
+#define TARGET_GAS	1
+
+/* Default to pcc-struct-return, because this is the ELF abi and
+   we don't care about compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* Attempt to enable execute permissions on the stack.  */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
+
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/i386 ELF)");
diff --git a/gcc/config/i386/netbsd.h b/gcc/config/i386/netbsd.h
new file mode 100644
index 000000000..318951a77
--- /dev/null
+++ b/gcc/config/i386/netbsd.h
@@ -0,0 +1,96 @@
+/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+   2004 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_AOUT();		\
+    }						\
+  while (0)
+
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/i386 a.out)");
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+  (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  { "netbsd_cpp_spec", NETBSD_CPP_SPEC },
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* i386 netbsd still uses old binutils that don't insert nops by default
+   when the .align directive demands to insert extra space in the text
+   segment.  */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.align %d,0x90\n", (LOG))
+
+/* Profiling routines, partially copied from i386/osfrose.h.  */
+
+/* Redefine this to use %eax instead of %edx.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+{									\
+  if (flag_pic)								\
+    {									\
+      fprintf (FILE, "\tcall mcount@PLT\n");				\
+    }									\
+  else									\
+    {									\
+      fprintf (FILE, "\tcall mcount\n");				\
+    }									\
+}
+
+/* Until they use ELF or something that handles dwarf2 unwinds
+   and initialization stuff better.  */
+#define DWARF2_UNWIND_INFO 0
+
+/* Redefine this so that it becomes "_GLOBAL_OFFSET_TABLE_" when the label
+   prefix is added.  */
+#undef GOT_SYMBOL_NAME
+#define GOT_SYMBOL_NAME "GLOBAL_OFFSET_TABLE_"
+
+/* Attempt to enable execute permissions on the stack.  */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
diff --git a/gcc/config/i386/netbsd64.h b/gcc/config/i386/netbsd64.h
new file mode 100644
index 000000000..5add1032c
--- /dev/null
+++ b/gcc/config/i386/netbsd64.h
@@ -0,0 +1,72 @@
+/* Definitions of target machine for GCC,
+   for x86-64/ELF NetBSD systems.
+   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+
+/* Extra specs needed for NetBSD/x86-64 ELF.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  { "netbsd_cpp_spec", NETBSD_CPP_SPEC },	\
+  { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF },	\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/x86-64 ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{m32:-m elf_i386} \
+   %{m64:-m elf_x86_64} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "_start"
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+/* Output assembler code to FILE to call the profiler.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+{									\
+  if (TARGET_64BIT && flag_pic)						\
+    fprintf (FILE, "\tcall *__mcount@PLT\n");				\
+  else if (flag_pic)							\
+    fprintf (FILE, "\tcall *__mcount@PLT\n");				\
+  else									\
+    fprintf (FILE, "\tcall __mcount\n");				\
+}
+
+/* Attempt to enable execute permissions on the stack.  */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
+
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/x86_64 ELF)");
diff --git a/gcc/config/i386/netware-crt0.c b/gcc/config/i386/netware-crt0.c
new file mode 100644
index 000000000..03141ab99
--- /dev/null
+++ b/gcc/config/i386/netware-crt0.c
@@ -0,0 +1,79 @@
+/* Startup routines for NetWare.
+   Contributed by Jan Beulich (jbeulich@novell.com)
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "unwind-dw2-fde.h"
+
+int __init_environment (void *);
+int __deinit_environment (void *);
+
+
+#define SECTION_DECL(name, decl) decl __attribute__((__section__(name)))
+
+SECTION_DECL(".ctors",   void(*const __CTOR_LIST__)(void))
+  = (void(*)(void))(intptr_t)-1;
+SECTION_DECL(".ctors$_", void(*const __CTOR_END__)(void)) = NULL;
+
+SECTION_DECL(".dtors",   void(*const __DTOR_LIST__)(void))
+  = (void(*)(void))(intptr_t)-1;
+SECTION_DECL(".dtors$_", void(*const __DTOR_END__)(void)) = NULL;
+
+/* No need to use the __[de]register_frame_info_bases functions since
+   for us the bases are NULL always anyway. */
+void __register_frame_info (const void *, struct object *)
+  __attribute__((__weak__));
+void *__deregister_frame_info (const void *) __attribute__((__weak__));
+
+SECTION_DECL(".eh_frame", /*const*/ uint32_t __EH_FRAME_BEGIN__[]) = { };
+SECTION_DECL(".eh_frame$_", /*const*/ uint32_t __EH_FRAME_END__[]) = {0};
+
+int
+__init_environment (void *unused __attribute__((__unused__)))
+{
+  void (* const * pctor)(void);
+  static struct object object;
+
+  if (__register_frame_info)
+    __register_frame_info (__EH_FRAME_BEGIN__, &object);
+
+  for (pctor = &__CTOR_END__ - 1; pctor > &__CTOR_LIST__; --pctor)
+    if (*pctor != NULL)
+      (*pctor)();
+
+  return 0;
+}
+
+int
+__deinit_environment (void *unused __attribute__((__unused__)))
+{
+  /* This should be static to prevent calling the same destructor
+     twice (just in case where we get here multiple times).  */
+  static void (* const * pdtor)(void) = &__DTOR_LIST__ + 1;
+
+  while (pdtor < &__DTOR_END__)
+    if (*pdtor++ != NULL)
+      pdtor[-1] ();
+
+  if (__deregister_frame_info)
+    __deregister_frame_info(__EH_FRAME_BEGIN__);
+
+  return 0;
+}
diff --git a/gcc/config/i386/netware-libgcc.c b/gcc/config/i386/netware-libgcc.c
new file mode 100644
index 000000000..0925d872a
--- /dev/null
+++ b/gcc/config/i386/netware-libgcc.c
@@ -0,0 +1,58 @@
+/* Startup code for libgcc_s.nlm, necessary because we can't allow
+   libgcc_s to use libc's malloc & Co., which associate allocations
+   with the NLM owning the current (application) thread.
+   Contributed by Jan Beulich (jbeulich@novell.com)
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include <netware.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <windows.h>
+
+static rtag_t allocRTag;
+
+BOOL
+DllMain (HINSTANCE libraryId __attribute__ ((__unused__)),
+	 DWORD reason, void *hModule)
+{
+  switch (reason)
+    {
+    case DLL_NLM_STARTUP:
+      allocRTag = AllocateResourceTag (hModule,
+				       "libgcc memory", AllocSignature);
+      return allocRTag != NULL;
+    case DLL_NLM_SHUTDOWN:
+      /* This does not recover resources associated with the tag...
+         ReturnResourceTag (allocRTag, 0); */
+      break;
+    }
+  return 1;
+}
+
+void *
+malloc (size_t size)
+{
+  return AllocSleepOK (size, allocRTag, NULL);
+}
+
+void
+free (void *ptr)
+{
+  Free (ptr);
+}
diff --git a/gcc/config/i386/netware-libgcc.def b/gcc/config/i386/netware-libgcc.def
new file mode 100644
index 000000000..a545631b1
--- /dev/null
+++ b/gcc/config/i386/netware-libgcc.def
@@ -0,0 +1,2 @@
+description "gcc runtime and intrinsics support"
+copyright "Copyright (C) 1989-2005  Free Software Foundation, Inc."
diff --git a/gcc/config/i386/netware-libgcc.exp b/gcc/config/i386/netware-libgcc.exp
new file mode 100644
index 000000000..309cf7549
--- /dev/null
+++ b/gcc/config/i386/netware-libgcc.exp
@@ -0,0 +1,83 @@
+# libgcc_s.nlm exports
+	(libgcc2),
+	__absvdi2,
+	__absvsi2,
+	__addvdi3,
+	__addvsi3,
+#	__ashldi3,
+#	__ashrdi3,
+	__bswapdi2,
+	__bswapsi2,
+	__clzdi2,
+	__clzsi2,
+	__ctzdi2,
+	__ctzsi2,
+	__deregister_frame,
+	__deregister_frame_info,
+	__deregister_frame_info_bases,
+	__divdc3,
+#	__divdi3,
+	__divsc3,
+#	__divtc3,
+	__divxc3,
+	__emutls_get_address,
+	__emutls_register_common,
+	__ffsdi2,
+	__ffssi2,
+	__fixunsdfdi,
+	__fixunssfdi,
+#	__fixunstfdi,
+	__fixunsxfdi,
+	__floatundisf,
+	__floatundidf,
+#	__floatunditf,
+	__floatundixf,
+	__gcc_bcmp,
+	__gcc_personality_v0,
+#	__lshrdi3,
+#	__moddi3,
+	__muldc3,
+#	__muldi3,
+	__mulsc3,
+#	__multc3,
+	__mulvdi3,
+	__mulvsi3,
+	__mulxc3,
+	__negvdi2,
+	__negvsi2,
+	__paritydi2,
+	__paritysi2,
+	__popcountdi2,
+	__popcountsi2,
+	__powidf2
+	__powisf2
+#	__powitf2
+	__powixf2
+	__register_frame,
+	__register_frame_info,
+	__register_frame_info_bases,
+	__register_frame_info_table,
+	__register_frame_info_table_bases,
+	__register_frame_table,
+	__subvdi3,
+	__subvsi3,
+#	__umoddi3,
+#	__udivdi3,
+	_Unwind_Backtrace,
+	_Unwind_DeleteException,
+	_Unwind_FindEnclosingFunction,
+	_Unwind_Find_FDE,
+	_Unwind_ForcedUnwind,
+	_Unwind_GetCFA,
+	_Unwind_GetDataRelBase,
+	_Unwind_GetGR,
+	_Unwind_GetIP,
+	_Unwind_GetIPInfo,
+	_Unwind_GetLanguageSpecificData,
+	_Unwind_GetRegionStart,
+	_Unwind_GetTextRelBase,
+	_Unwind_RaiseException,
+	_Unwind_Resume,
+	_Unwind_Resume_or_Rethrow,
+	_Unwind_SetGR,
+	_Unwind_SetIP
diff --git a/gcc/config/i386/netware.c b/gcc/config/i386/netware.c
new file mode 100644
index 000000000..2232dbf6e
--- /dev/null
+++ b/gcc/config/i386/netware.c
@@ -0,0 +1,229 @@
+/* Subroutines for insn-output.c for NetWare.
+   Contributed by Jan Beulich (jbeulich@novell.com)
+   Copyright (C) 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "langhooks.h"
+#include "ggc.h"
+
+/* Return string which is the function name, identified by ID, modified
+   with PREFIX and a suffix consisting of an atsign (@) followed by the
+   number of bytes of arguments.  If ID is NULL use the DECL_NAME as base.
+   Return NULL if no change required.  */
+
+static tree
+gen_stdcall_or_fastcall_decoration (tree decl, tree id, char prefix)
+{
+  unsigned HOST_WIDE_INT total = 0;
+  const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+  char *new_str;
+  tree type = TREE_TYPE (decl);
+
+  if (prototype_p (type))
+    {
+      tree arg;
+      function_args_iterator args_iter;
+
+      /* This attribute is ignored for variadic functions.  */ 
+      if (stdarg_p (type))
+	return NULL_TREE;
+
+      /* Quit if we hit an incomplete type.  Error is reported
+	 by convert_arguments in c-typeck.c or cp/typeck.c.  */
+      FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+	{
+	  HOST_WIDE_INT parm_size;
+	  unsigned HOST_WIDE_INT parm_boundary_bytes;
+
+	  if (! COMPLETE_TYPE_P (arg))
+	    break;
+
+	  parm_size = int_size_in_bytes (arg);
+	  if (parm_size < 0)
+	    break;
+
+	  parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+	  /* Must round up to include padding.  This is done the same
+	     way as in store_one_arg.  */
+	  total += (parm_size + parm_boundary_bytes - 1)
+		   / parm_boundary_bytes * parm_boundary_bytes;
+	}
+    }
+
+  new_str = XALLOCAVEC (char, 1 + strlen (old_str) + 1 + 10 + 1);
+  sprintf (new_str, "%c%s@" HOST_WIDE_INT_PRINT_UNSIGNED,
+	   prefix, old_str, total);
+
+  return get_identifier (new_str);
+}
+
+/* Return string which is the function name, identified by ID, modified
+   with an _n@ prefix (where n represents the number of arguments passed in
+   registers).  If ID is NULL use the DECL_NAME as base.
+   Return NULL if no change required.  */
+
+static tree
+gen_regparm_prefix (tree decl, tree id, unsigned int nregs)
+{
+  unsigned HOST_WIDE_INT total = 0;
+  const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+  char *new_str;
+  tree type = TREE_TYPE (decl);
+
+  if (prototype_p (type))
+    {
+      tree arg;
+      function_args_iterator args_iter;
+
+      /* This attribute is ignored for variadic functions.  */ 
+      if (stdarg_p (type))
+	return NULL_TREE;
+
+      /* Quit if we hit an incomplete type.  Error is reported
+	 by convert_arguments in c-typeck.c or cp/typeck.c.  */
+      FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+	{
+	  HOST_WIDE_INT parm_size;
+	  unsigned HOST_WIDE_INT parm_boundary_bytes;
+
+	  if (! COMPLETE_TYPE_P (arg))
+	    break;
+
+	  parm_size = int_size_in_bytes (arg);
+	  if (parm_size < 0)
+	    break;
+
+	  parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+	  /* Must round up to include padding.  This is done the same
+	     way as in store_one_arg.  */
+	  total += (parm_size + parm_boundary_bytes - 1)
+		   / parm_boundary_bytes * parm_boundary_bytes;
+	}
+    }
+
+  if (nregs > total / UNITS_PER_WORD)
+    nregs = total / UNITS_PER_WORD;
+  gcc_assert (nregs <= 9);
+  new_str = XALLOCAVEC (char, 3 + strlen (old_str) + 1);
+  sprintf (new_str, "_%u@%s", nregs, old_str);
+
+  return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+   fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_nlm_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+  tree new_id;
+
+  if (lookup_attribute ("stdcall", type_attributes))
+    new_id = gen_stdcall_or_fastcall_decoration (decl, id, '_');
+  else if (lookup_attribute ("fastcall", type_attributes))
+    new_id = gen_stdcall_or_fastcall_decoration (decl, id, FASTCALL_PREFIX);
+  else if ((new_id = lookup_attribute ("regparm", type_attributes)))
+    new_id = gen_regparm_prefix (decl, id,
+		  TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (new_id))));
+  else
+    new_id = NULL_TREE;
+
+  return new_id;
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+   in the language-independent default hook
+   langhooks.c:lhd_set_decl_assembler_name ()
+   and in cp/mangle.c:mangle_decl ().  */
+tree
+i386_nlm_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree new_id = TREE_CODE (decl) == FUNCTION_DECL
+		? i386_nlm_maybe_mangle_decl_assembler_name (decl, id)
+		: NULL_TREE;
+
+  return (new_id ? new_id : id);
+}
+
+void
+i386_nlm_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      /* Do not change the identifier if a verbatim asmspec
+	 or if stdcall suffix already added.  */
+      && *IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) != '*'
+      && !strchr (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), '@')
+      /* FIXME:  Imported stdcall names are not modified by the Ada frontend.
+	 Check and decorate the RTL name now.  */
+      && strcmp (lang_hooks.name, "GNU Ada") == 0)
+    {
+      rtx symbol = XEXP (rtl, 0);
+      tree new_id;
+      tree old_id = DECL_ASSEMBLER_NAME (decl);
+
+      gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+      if ((new_id = i386_nlm_maybe_mangle_decl_assembler_name (decl, old_id)))
+	XSTR (symbol, 0) = IDENTIFIER_POINTER (new_id);
+    }
+}
+
+/* Strip the stdcall/fastcall/regparm pre-/suffix.  */
+
+const char *
+i386_nlm_strip_name_encoding (const char *str)
+{
+  const char *name = default_strip_name_encoding (str);
+
+  if (*str != '*' && (*name == '_' || *name == '@'))
+    {
+      const char *p = strchr (name + 1, '@');
+
+      if (p)
+	{
+	  ++name;
+	  if (ISDIGIT (p[1]))
+	    name = ggc_alloc_string (name, p - name);
+	  else
+	    {
+	      gcc_assert (ISDIGIT (*name));
+	      name++;
+	      gcc_assert (name == p);
+	    }
+	}
+    }
+  return name;
+}
diff --git a/gcc/config/i386/netware.h b/gcc/config/i386/netware.h
new file mode 100644
index 000000000..7f63f4518
--- /dev/null
+++ b/gcc/config/i386/netware.h
@@ -0,0 +1,177 @@
+/* Core target definitions for GCC for Intel 80x86 running Netware.
+   and using dwarf for the debugging format.
+   Copyright (C) 1993, 1994, 2004, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+   Written by David V. Henkel-Wallace (gumby@cygnus.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_VERSION fprintf (stderr, " (x86 NetWare)");
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef	LIB_SPEC
+#define LIB_SPEC ""
+
+/* Kinda useless, but what the hell */
+#undef	LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V}"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+
+#undef	RELATIVE_PREFIX_NOT_LINKDIR
+#undef	LIBGCC_SPEC
+
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_define_std ("IAPX386");					\
+	builtin_define ("_M_IX86=300");					\
+	builtin_define ("__netware__");					\
+	builtin_assert ("system=netware");				\
+	builtin_define ("__ELF__");					\
+	builtin_define ("__cdecl=__attribute__((__cdecl__))");		\
+	builtin_define ("__stdcall=__attribute__((__stdcall__))");	\
+	builtin_define ("__fastcall=__attribute__((__fastcall__))");	\
+	if (!flag_iso)							\
+	  {								\
+	    builtin_define ("_cdecl=__attribute__((__cdecl__))");	\
+	    builtin_define ("_stdcall=__attribute__((__stdcall__))");	\
+	    builtin_define ("_fastcall=__attribute__((__fastcall__))");	\
+	  }								\
+    }									\
+  while (0)
+
+#undef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_pentium4
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   returns float values in the 387, and uses MSVC bit field layout. */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | \
+	MASK_FLOAT_RETURNS | MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
+
+/* Don't allow flag_pic to propagate since invalid relocations will
+   result otherwise.  */
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (flag_pic)								\
+    {									\
+      error ("-fPIC and -fpic are not supported for this target");	\
+      flag_pic = 0;							\
+    }									\
+} while (0)
+
+#undef MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+/* Align doubles and long-longs in structures on qword boundaries.  */
+#undef BIGGEST_FIELD_ALIGNMENT
+#define BIGGEST_FIELD_ALIGNMENT 64
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Implicit arguments pointing to aggregate return values are to be
+   removed by the caller.  */
+#undef KEEP_AGGREGATE_RETURN_POINTER
+#define KEEP_AGGREGATE_RETURN_POINTER 1
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) (svr4_dbx_register_map[n])
+
+/* Default structure packing is 1-byte. */
+#define TARGET_DEFAULT_PACK_STRUCT 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel)		\
+      {									\
+        fputs (ASM_LONG, FILE);			\
+        assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), FILE); \
+        goto DONE;							\
+      }									\
+  } while (0)
+
+/* there is no TLS support in NLMs/on NetWare */
+#undef HAVE_AS_TLS
+
+#define HAS_INIT_SECTION
+#undef  INIT_SECTION_ASM_OP
+
+#define CTOR_LISTS_DEFINED_EXTERNALLY
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP    ".section\t.rodata"
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variable or
+   function named by the symbol (such as what section it is in).
+
+   On i386 running NetWare, modify the assembler name with an underscore (_)
+   or atsign (@) prefix and a suffix consisting of an atsign (@) followed by
+   a string of digits that represents the number of bytes of arguments passed
+   to the function, if it has the attribute STDCALL. Alternatively, if it has
+   the REGPARM attribute, prefix it with an underscore (_), a digit
+   representing the number of registers used, and an atsign (@). */
+void i386_nlm_encode_section_info (tree, rtx, int);
+extern tree i386_nlm_mangle_decl_assembler_name (tree, tree);
+const char *i386_nlm_strip_name_encoding (const char *);
+#define SUBTARGET_ENCODE_SECTION_INFO  i386_nlm_encode_section_info
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_nlm_mangle_decl_assembler_name
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  i386_nlm_strip_name_encoding
+
+#define TARGET_POSIX_IO
diff --git a/gcc/config/i386/netware.opt b/gcc/config/i386/netware.opt
new file mode 100644
index 000000000..e1d903a2f
--- /dev/null
+++ b/gcc/config/i386/netware.opt
@@ -0,0 +1,33 @@
+; Netware options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+posix
+Driver
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/i386/nmmintrin.h b/gcc/config/i386/nmmintrin.h
new file mode 100644
index 000000000..2a2d264c6
--- /dev/null
+++ b/gcc/config/i386/nmmintrin.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 10.0.  */
+
+#ifndef _NMMINTRIN_H_INCLUDED
+#define _NMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4_2__
+# error "SSE4.2 instruction set not enabled"
+#else
+/* We just include SSE4.1 header file.  */
+#include <smmintrin.h>
+#endif /* __SSE4_2__ */
+
+#endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/nto.h b/gcc/config/i386/nto.h
new file mode 100644
index 000000000..0a54ce02c
--- /dev/null
+++ b/gcc/config/i386/nto.h
@@ -0,0 +1,108 @@
+/* Definitions for Intel 386 running QNX/Neutrino.
+   Copyright (C) 2002, 2003, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+#undef TARGET_VERSION
+#define TARGET_VERSION	fprintf (stderr, " (QNX/Neutrino/i386 ELF)");
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+        builtin_define ("__X86__");		\
+        builtin_define ("__QNXNTO__");		\
+        builtin_define ("__QNX__");		\
+        builtin_define ("__ELF__");		\
+        builtin_define ("__LITTLEENDIAN__");	\
+        builtin_assert ("system=qnx");		\
+        builtin_assert ("system=qnxnto");	\
+        builtin_assert ("system=nto");		\
+        builtin_assert ("system=unix");		\
+    }						\
+  while (0)
+
+#undef THREAD_MODEL_SPEC
+#define THREAD_MODEL_SPEC "posix"
+
+#ifdef CROSS_DIRECTORY_STRUCTURE
+#define SYSROOT_SUFFIX_SPEC "x86"
+#endif
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#endif
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{!shared: \
+  %{!symbolic: \
+    %{pg:mcrt1.o%s} \
+    %{!pg:%{p:mcrt1.o%s} \
+    %{!p:crt1.o%s}}}} \
+crti.o%s \
+%{fexceptions: crtbegin.o%s} \
+%{!fexceptions: %R/lib/crtbegin.o}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "crtend.o%s crtn.o%s"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{h*} %{v:-V} \
+   %{static:-dn -Bstatic} \
+   %{shared:-G -dy -z text} \
+   %{symbolic:-Bsymbolic -G -dy -z text} \
+   %{G:-G} \
+   %{YP,*} \
+   %{!YP,*:%{p:-Y P,%R/lib} \
+    %{!p:-Y P,%R/lib}} \
+   %{Qy:} %{!Qn:-Qy} \
+   -m i386nto \
+   %{!shared: --dynamic-linker /usr/lib/ldqnx.so.2}"
+
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:-lc}}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#define NO_IMPLICIT_EXTERN_C 1
+
+#define TARGET_POSIX_IO
+
+#undef DBX_REGISTER_NUMBER
diff --git a/gcc/config/i386/nto.opt b/gcc/config/i386/nto.opt
new file mode 100644
index 000000000..ddfaa90c9
--- /dev/null
+++ b/gcc/config/i386/nto.opt
@@ -0,0 +1,33 @@
+; QNX options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+G
+Driver
+
+YP,
+Driver Joined
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/i386/nwld.c b/gcc/config/i386/nwld.c
new file mode 100644
index 000000000..05d1a92d1
--- /dev/null
+++ b/gcc/config/i386/nwld.c
@@ -0,0 +1,73 @@
+/* Subroutines for insn-output.c for NetWare.
+   Contributed by Jan Beulich (jbeulich@novell.com)
+   Copyright (C) 2004, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+
+void
+nwld_named_section_asm_out_constructor (rtx symbol, int priority)
+{
+#if !SUPPORTS_INIT_PRIORITY
+  const char section[] = ".ctors"TARGET_SUB_SECTION_SEPARATOR;
+#else
+  char section[20];
+
+  sprintf (section,
+	   ".ctors"TARGET_SUB_SECTION_SEPARATOR"%.5u",
+	   /* Invert the numbering so the linker puts us in the proper
+	      order; constructors are run from right to left, and the
+	      linker sorts in increasing order.  */
+	   MAX_INIT_PRIORITY - priority);
+#endif
+
+  switch_to_section (get_section (section, 0, NULL));
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+void
+nwld_named_section_asm_out_destructor (rtx symbol, int priority)
+{
+#if !SUPPORTS_INIT_PRIORITY
+  const char section[] = ".dtors"TARGET_SUB_SECTION_SEPARATOR;
+#else
+  char section[20];
+
+  sprintf (section, ".dtors"TARGET_SUB_SECTION_SEPARATOR"%.5u",
+	   /* Invert the numbering so the linker puts us in the proper
+	      order; destructors are run from left to right, and the
+	      linker sorts in increasing order.  */
+	   MAX_INIT_PRIORITY - priority);
+#endif
+
+  switch_to_section (get_section (section, 0, NULL));
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
diff --git a/gcc/config/i386/nwld.h b/gcc/config/i386/nwld.h
new file mode 100644
index 000000000..6d8e54ff9
--- /dev/null
+++ b/gcc/config/i386/nwld.h
@@ -0,0 +1,69 @@
+/* nwld.h -- defines to be used when targeting GCC for some generic NetWare
+   system while using the Novell linker.
+   Copyright (C) 2004, 2007, 2010, 2011 Free Software Foundation, Inc.
+
+   Written by Jan Beulich (jbeulich@novell.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef	LIB_SPEC
+#define LIB_SPEC "-lc --def-file libc.def%s"
+
+#undef	LIBGCC_SPEC
+#define LIBGCC_SPEC "-lgcc %{!static-libgcc:--def-file libgcc.def%s}"
+
+#undef  LINKER_NAME
+#define LINKER_NAME "nwld"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "--format:NLM --extensions:GNU" \
+	" %{static:%{!nostdlib:%{!nodefaultlibs:%estatic linking is not supported\n}}}"
+
+#undef  LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "%L %G"
+
+/* In order to permit the linker to derive the output filename from the first
+   input file, put the common startup code as the last object. */
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC "crt0%O%s ../imports/%{!posix:libc}%{posix:posix}pre.gcc%O%s" \
+	" --def-file %{!posix:libc}%{posix:posix}pre.def%s"
+
+#define DRIVER_SELF_SPECS "%{!static-libgcc:-shared-libgcc}"
+
+#define TARGET_SUB_SECTION_SEPARATOR "$"
+
+void nwld_named_section_asm_out_constructor (rtx, int);
+void nwld_named_section_asm_out_destructor (rtx, int);
+
+#define TARGET_ASM_CONSTRUCTOR nwld_named_section_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR  nwld_named_section_asm_out_destructor
+
+#define SUBSUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  /* XXX This can be enabled once gas output meets nwld's needs. */	\
+  /* if (!flag_unwind_tables && !flag_exceptions) */			\
+    flag_dwarf2_cfi_asm = 0;						\
+} while (0)
+
+#undef  EH_FRAME_SECTION_NAME
+#define EH_FRAME_SECTION_NAME ".eh_frame"TARGET_SUB_SECTION_SEPARATOR
+
+/* nwld does not currently support stabs debug info */
+#undef DBX_DEBUGGING_INFO
diff --git a/gcc/config/i386/openbsd.h b/gcc/config/i386/openbsd.h
new file mode 100644
index 000000000..d64f15907
--- /dev/null
+++ b/gcc/config/i386/openbsd.h
@@ -0,0 +1,101 @@
+/* Configuration for an OpenBSD i386 target.
+   Copyright (C) 1999, 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_VERSION fprintf (stderr, " (OpenBSD/i386)");
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+  (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__unix__");		\
+	builtin_define ("__OpenBSD__");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=bsd");		\
+	builtin_assert ("system=OpenBSD");	\
+    }						\
+  while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Assembler format: overall framework.  */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Stack & calling: aggregate returns.  */
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: alignment output.  */
+
+/* Kludgy test: when gas is upgraded, it will have p2align, and no problems
+   with nops.  */
+#ifndef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* i386 OpenBSD still uses an older gas that doesn't insert nops by default
+   when the .align directive demands to insert extra space in the text
+   segment.  */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+  if ((LOG)!=0) fprintf ((FILE), "\t.align %d,0x90\n", (LOG))
+#endif
+
+/* Stack & calling: profiling.  */
+
+/* OpenBSD's profiler recovers all information from the stack pointer.
+   The icky part is not here, but in machine/profile.h.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fputs (flag_pic ? "\tcall mcount@PLT\n": "\tcall mcount\n", FILE);
+
+/* Assembler format: exception region output.  */
+
+/* All configurations that don't use elf must be explicit about not using
+   dwarf unwind information.  */
+#define DWARF2_UNWIND_INFO 0
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START ";#"
+
+/* OpenBSD gas currently does not support quad, so do not use it.  */
+#undef ASM_QUAD
diff --git a/gcc/config/i386/openbsdelf.h b/gcc/config/i386/openbsdelf.h
new file mode 100644
index 000000000..53949e8ab
--- /dev/null
+++ b/gcc/config/i386/openbsdelf.h
@@ -0,0 +1,134 @@
+/* Configuration for an OpenBSD i386 target.
+   
+   Copyright (C) 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This keeps us from using libraries compiled with the native cc, so
+   undef it. */
+#undef NO_DOLLAR_IN_LABEL
+
+/* Override the default comment-starter of "/".  */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+/* Run-time target specifications */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+    	OPENBSD_OS_CPP_BUILTINS();		\
+    }						\
+  while (0)
+
+/* As an elf system, we need crtbegin/crtend stuff.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+	%{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \
+	crtbegin%O%s} %{shared:crtbeginS%O%s}"
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* Assembler format: overall framework.  */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef SET_ASM_OP
+#define SET_ASM_OP	"\t.set\t"
+
+/* The following macros were originally stolen from i386v4.h.
+   These have to be defined to get PIC code correct.  */
+
+/* Assembler format: dispatch tables.  */
+
+/* Assembler format: sections.  */
+
+/* Stack & calling: aggregate returns.  */
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: alignment output.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+  if ((LOG) != 0) {\
+    if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+    else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+  }
+#endif
+
+/* Stack & calling: profiling.  */
+
+/* OpenBSD's profiler recovers all information from the stack pointer.
+   The icky part is not here, but in machine/profile.h.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  fputs (flag_pic ? "\tcall __mcount@PLT\n": "\tcall __mcount\n", FILE);
+
+/* Assembler format: exception region output.  */
+
+/* our configuration still doesn't handle dwarf2 correctly */
+#define DWARF2_UNWIND_INFO 0
+
+/* Assembler format: alignment output.  */
+
+/* Note that we pick up ASM_OUTPUT_MAX_SKIP_ALIGN from i386/gas.h */
+
+/* Note that we pick up ASM_OUTPUT_MI_THUNK from unix.h.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{!nostdlib:%{!r:%{!e*:-e __start}}}} \
+   %{shared:-shared} %{R*} \
+   %{static:-Bstatic} \
+   %{!static:-Bdynamic} \
+   %{assert*} \
+   -dynamic-linker /usr/libexec/ld.so"
+
+#define OBSD_HAS_CORRECT_SPECS
diff --git a/gcc/config/i386/pentium.md b/gcc/config/i386/pentium.md
new file mode 100644
index 000000000..c6c5bd55f
--- /dev/null
+++ b/gcc/config/i386/pentium.md
@@ -0,0 +1,306 @@
+;; Pentium Scheduling
+;; Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; The Pentium is an in-order core with two integer pipelines.
+
+;; True for insns that behave like prefixed insns on the Pentium.
+(define_attr "pent_prefix" "false,true"
+  (if_then_else (ior (eq_attr "prefix_0f" "1")
+  		     (ior (eq_attr "prefix_data16" "1")
+			  (eq_attr "prefix_rep" "1")))
+    (const_string "true")
+    (const_string "false")))
+
+;; Categorize how an instruction slots.
+
+;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
+;; while MMX Pentium can slot it on either U or V.  Model non-MMX Pentium
+;; rules, because it results in noticeably better code on non-MMX Pentium
+;; and doesn't hurt much on MMX.  (Prefixed instructions are not very
+;; common, so the scheduler usually has a non-prefixed insn to pair).
+
+(define_attr "pent_pair" "uv,pu,pv,np"
+  (cond [(eq_attr "imm_disp" "true")
+	   (const_string "np")
+	 (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
+	      (and (eq_attr "type" "pop,push")
+		   (eq_attr "memory" "!both")))
+	   (if_then_else (eq_attr "pent_prefix" "true")
+	     (const_string "pu")
+	     (const_string "uv"))
+	 (eq_attr "type" "ibr")
+	   (const_string "pv")
+	 (and (eq_attr "type" "ishift")
+	      (match_operand 2 "const_int_operand" ""))
+	   (const_string "pu")
+	 (and (eq_attr "type" "rotate")
+	      (match_operand 2 "const1_operand" ""))
+	   (const_string "pu")
+	 (and (eq_attr "type" "ishift1")
+	      (match_operand 1 "const_int_operand" ""))
+	   (const_string "pu")
+	 (and (eq_attr "type" "rotate1")
+	      (match_operand 1 "const1_operand" ""))
+	   (const_string "pu")
+	 (and (eq_attr "type" "call")
+	      (match_operand 0 "constant_call_address_operand" ""))
+	   (const_string "pv")
+	 (and (eq_attr "type" "callv")
+	      (match_operand 1 "constant_call_address_operand" ""))
+	   (const_string "pv")
+	]
+	(const_string "np")))
+
+(define_automaton "pentium,pentium_fpu")
+
+;; Pentium do have U and V pipes.  Instruction to both pipes
+;; are always issued together, much like on VLIW.
+;;
+;;                    predecode
+;;                   /         \
+;;               decodeu     decodev
+;;             /    |           |
+;;           fpu executeu    executev
+;;            |     |           |
+;;           fpu  retire     retire
+;;            |
+;;           fpu
+;; We add dummy "port" pipes allocated only first cycle of
+;; instruction to specify this behavior.
+
+(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
+(define_cpu_unit "pentium-u,pentium-v" "pentium")
+(absence_set "pentium-portu" "pentium-u,pentium-v")
+(presence_set "pentium-portv" "pentium-portu")
+
+;; Floating point instructions can overlap with new issue of integer
+;; instructions.  We model only first cycle of FP pipeline, as it is
+;; fully pipelined.
+(define_cpu_unit "pentium-fp" "pentium_fpu")
+
+;; There is non-pipelined multiplier unit used for complex operations.
+(define_cpu_unit "pentium-fmul" "pentium_fpu")
+
+;; Pentium preserves memory ordering, so when load-execute-store
+;; instruction is executed together with other instruction loading
+;; data, the execution of the other instruction is delayed to very
+;; last cycle of first instruction, when data are bypassed.
+;; We model this by allocating "memory" unit when store is pending
+;; and using conflicting load units together.
+
+(define_cpu_unit "pentium-memory" "pentium")
+(define_cpu_unit "pentium-load0" "pentium")
+(define_cpu_unit "pentium-load1" "pentium")
+(absence_set "pentium-load0,pentium-load1" "pentium-memory")
+
+(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
+(define_reservation "pentium-np" "(pentium-u + pentium-v)")
+(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
+(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
+(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
+(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
+(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
+(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
+(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
+(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
+					   | (pentium-firstv,pentium-v,
+					      (pentium-load+pentium-firstv))")
+(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
+					   + pentium-memory)")
+(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstv
+					   + pentium-memory)")
+(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
+					    + pentium-memory)
+					   | (pentium-firstv,pentium-v,
+					      (pentium-load+pentium-firstv))")
+
+;; Few common long latency instructions
+(define_insn_reservation "pent_mul" 11
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "imul"))
+  "pentium-np*11")
+
+(define_insn_reservation "pent_str" 12
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "str"))
+  "pentium-np*12")
+
+;; Integer division and some other long latency instruction block all
+;; units, including the FP pipe.  There is no value in modeling the
+;; latency of these instructions and not modeling the latency
+;; decreases the size of the DFA.
+(define_insn_reservation "pent_block" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "idiv"))
+  "pentium-np+pentium-fp")
+
+;;  Moves usually have one cycle penalty, but there are exceptions.
+(define_insn_reservation "pent_fmov" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "fmov")
+	    (eq_attr "memory" "none,load")))
+  "(pentium-fp+pentium-np)")
+
+(define_insn_reservation "pent_fpmovxf" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "fmov")
+	    (and (eq_attr "memory" "load,store")
+		 (eq_attr "mode" "XF"))))
+  "(pentium-fp+pentium-np)*3")
+
+(define_insn_reservation "pent_fpstore" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "fmov")
+	    (ior (match_operand 1 "immediate_operand" "")
+		 (eq_attr "memory" "store"))))
+  "(pentium-fp+pentium-np)*2")
+
+(define_insn_reservation "pent_imov" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "imov"))
+  "pentium-firstuv")
+
+;; Push and pop instructions have 1 cycle latency and special
+;; hardware bypass allows them to be paired with other push,pop
+;; and call instructions.
+(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
+(define_insn_reservation "pent_push" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "type" "push")
+	    (eq_attr "memory" "store")))
+  "pentium-firstuv")
+
+(define_insn_reservation "pent_pop" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "pop,leave"))
+  "pentium-firstuv")
+
+;; Call and branch instruction can execute in either pipe, but
+;; they are only pairable when in the v pipe.
+(define_insn_reservation "pent_call" 10
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "call,callv"))
+  "pentium-firstv,pentium-v*9")
+
+(define_insn_reservation "pent_branch" 1
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "ibr"))
+  "pentium-firstv")
+
+;; Floating point instruction dispatch in U pipe, but continue
+;; in FP pipeline allowing other instructions to be executed.
+(define_insn_reservation "pent_fp" 3
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fop,fistp"))
+  "(pentium-firstu+pentium-fp),nothing,nothing")
+
+;; First two cycles of fmul are not pipelined.
+(define_insn_reservation "pent_fmul" 3
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fmul"))
+  "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
+
+;; Long latency FP instructions overlap with integer instructions,
+;; but only last 2 cycles with FP ones.
+(define_insn_reservation "pent_fdiv" 39
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fdiv"))
+  "(pentium-np+pentium-fp+pentium-fmul),
+   (pentium-fp+pentium-fmul)*36,pentium-fmul*2")
+
+(define_insn_reservation "pent_fpspc" 70
+  (and (eq_attr "cpu" "pentium")
+       (eq_attr "type" "fpspc"))
+  "(pentium-np+pentium-fp+pentium-fmul),
+   (pentium-fp+pentium-fmul)*67,pentium-fmul*2")
+
+;; Integer instructions.  Load/execute/store takes 3 cycles,
+;; load/execute 2 cycles and execute only one cycle.
+(define_insn_reservation "pent_uv_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "uv")
+	    (eq_attr "memory" "both")))
+  "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
+
+(define_insn_reservation "pent_u_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pu")
+	    (eq_attr "memory" "both")))
+  "pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
+
+(define_insn_reservation "pent_v_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pv")
+	    (eq_attr "memory" "both")))
+  "pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
+
+(define_insn_reservation "pent_np_both" 3
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "np")
+	    (eq_attr "memory" "both")))
+  "pentium-np,pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "uv")
+	    (eq_attr "memory" "load")))
+  "pentium-firstuvload,pentium-uv")
+
+(define_insn_reservation "pent_u_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pu")
+	    (eq_attr "memory" "load")))
+  "pentium-firstuload,pentium-u")
+
+(define_insn_reservation "pent_v_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pv")
+	    (eq_attr "memory" "load")))
+  "pentium-firstvload,pentium-v")
+
+(define_insn_reservation "pent_np_load" 2
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "np")
+	    (eq_attr "memory" "load")))
+  "pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "uv")
+	    (eq_attr "memory" "none")))
+  "pentium-firstuv")
+
+(define_insn_reservation "pent_u" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pu")
+	    (eq_attr "memory" "none")))
+  "pentium-firstu")
+
+(define_insn_reservation "pent_v" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "pv")
+	    (eq_attr "memory" "none")))
+  "pentium-firstv")
+
+(define_insn_reservation "pent_np" 1
+  (and (eq_attr "cpu" "pentium")
+       (and (eq_attr "pent_pair" "np")
+	    (eq_attr "memory" "none")))
+  "pentium-np")
+
diff --git a/gcc/config/i386/pmm_malloc.h b/gcc/config/i386/pmm_malloc.h
new file mode 100644
index 000000000..0a9f2e227
--- /dev/null
+++ b/gcc/config/i386/pmm_malloc.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2004, 2006, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+   may not be visible.  */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+  void *ptr;
+  if (alignment == 1)
+    return malloc (size);
+  if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
+    alignment = sizeof (void *);
+  if (posix_memalign (&ptr, alignment, size) == 0)
+    return ptr;
+  else
+    return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+  free (ptr);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h
new file mode 100644
index 000000000..c5c9ae27c
--- /dev/null
+++ b/gcc/config/i386/pmmintrin.h
@@ -0,0 +1,128 @@
+/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _PMMINTRIN_H_INCLUDED
+#define _PMMINTRIN_H_INCLUDED
+
+#ifndef __SSE3__
+# error "SSE3 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE2 and SSE header files*/
+#include <emmintrin.h>
+
+/* Additional bits in the MXCSR.  */
+#define _MM_DENORMALS_ZERO_MASK		0x0040
+#define _MM_DENORMALS_ZERO_ON		0x0040
+#define _MM_DENORMALS_ZERO_OFF		0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
+  _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE() \
+  (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+  return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehdup_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_moveldup_ps (__m128 __X)
+{
+  return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+  return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loaddup_pd (double const *__P)
+{
+  return _mm_load1_pd (__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movedup_pd (__m128d __X)
+{
+  return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lddqu_si128 (__m128i const *__P)
+{
+  return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_monitor (__P, __E, __H);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+  __builtin_ia32_mwait (__E, __H);
+}
+
+#endif /* __SSE3__ */
+
+#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
new file mode 100644
index 000000000..8d4d6571d
--- /dev/null
+++ b/gcc/config/i386/popcntintrin.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __POPCNT__
+# error "POPCNT instruction set not enabled"
+#endif /* __POPCNT__ */
+
+#ifndef _POPCNTINTRIN_H_INCLUDED
+#define _POPCNTINTRIN_H_INCLUDED
+
+/* Calculate a number of bits set to 1.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+  return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+  return __builtin_popcountll (__X);
+}
+#endif
+
+#endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/ppro.md b/gcc/config/i386/ppro.md
new file mode 100644
index 000000000..bc1cb59d1
--- /dev/null
+++ b/gcc/config/i386/ppro.md
@@ -0,0 +1,758 @@
+;; Scheduling for the Intel P6 family of processors
+;; Copyright (C) 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; The P6 family includes the Pentium Pro, Pentium II, Pentium III, Celeron
+;; and Xeon lines of CPUs.  The DFA scheduler description in this file is
+;; based on information that can be found in the following three documents:
+;;
+;;    "P6 Family of Processors Hardware Developer's Manual",
+;;    Intel, September 1999.
+;;
+;;    "Intel Architecture Optimization Manual",
+;;    Intel, 1999 (Order Number: 245127-001).
+;;
+;;    "How to optimize for the Pentium family of microprocessors",
+;;    by Agner Fog, PhD.
+;;
+;; The P6 pipeline has three major components:
+;;   1) the FETCH/DECODE unit, an in-order issue front-end
+;;   2) the DISPATCH/EXECUTE unit, which is the out-of-order core
+;;   3) the RETIRE unit, an in-order retirement unit
+;;
+;; So, the P6 CPUs have out-of-order cores, but the instruction decoder and
+;; retirement unit are naturally in-order.
+;;
+;;                       BUS INTERFACE UNIT
+;;                     /                   \
+;;                L1 ICACHE             L1 DCACHE
+;;              /     |     \              |     \
+;;       DECODER0  DECODER1  DECODER2  DISP/EXEC  RETIRE
+;;              \     |     /              |        |
+;;            INSTRUCTION POOL   __________|_______/
+;;          (inc. reorder buffer)
+;;
+;; Since the P6 CPUs execute instructions out-of-order, the most important
+;; consideration in performance tuning is making sure enough micro-ops are
+;; ready for execution in the out-of-order core, while not stalling the
+;; decoder.
+;;
+;; TODO:
+;; - Find a less crude way to model complex instructions, in
+;;   particular how many cycles they take to be decoded.
+;; - Include decoder latencies in the total reservation latencies.
+;;   This isn't necessary right now because we assume for every
+;;   instruction that it never blocks a decoder.
+;; - Figure out where the p0 and p1 reservations come from.  These
+;;   appear not to be in the manual
+;; - Lots more because I'm sure this is still far from optimal :-)
+
+;; The ppro_idiv and ppro_fdiv automata are used to model issue
+;; latencies of idiv and fdiv type insns.
+(define_automaton "ppro_decoder,ppro_core,ppro_idiv,ppro_fdiv,ppro_load,ppro_store")
+
+;; Simple instructions of the register-register form have only one uop.
+;; Load instructions are also only one uop.  Store instructions decode to
+;; two uops, and simple read-modify instructions also take two uops.
+;; Simple instructions of the register-memory form have two to three uops.
+;; Simple read-modify-write instructions have four uops.  The rules for
+;; the decoder are simple:
+;;  - an instruction with 1 uop can be decoded by any of the three
+;;    decoders in one cycle.
+;;  - an instruction with 1 to 4 uops can be decoded only by decoder 0
+;;    but still in only one cycle.
+;;  - a complex (microcode) instruction can also only be decoded by
+;;    decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-one uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "decoder0" "ppro_decoder")
+(define_cpu_unit "decoder1" "ppro_decoder")
+(define_cpu_unit "decoder2" "ppro_decoder")
+
+;; We first wish to find an instruction for decoder0, so exclude
+;; decoder1 and decoder2 from being reserved until decoder 0 is
+;; reserved.
+(presence_set "decoder1" "decoder0")
+(presence_set "decoder2" "decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "decodern" "(decoder0|decoder1|decoder2)")
+
+;; The out-of-order core has five pipelines.  During each cycle, the core
+;; may dispatch zero or one uop on the port of any of the five pipelines
+;; so the maximum number of dispatched uops per cycle is 5.  In practicer,
+;; 3 uops per cycle is more realistic.
+;;
+;; Two of the five pipelines contain several execution units:
+;;
+;; Port 0	Port 1		Port 2		Port 3		Port 4
+;; ALU		ALU		LOAD		SAC		SDA
+;; FPU		JUE
+;; AGU		MMX
+;; MMX		P3FPU
+;; P3FPU
+;;
+;; (SAC=Store Address Calculation, SDA=Store Data Unit, P3FPU = SSE unit,
+;;  JUE = Jump Execution Unit, AGU = Address Generation Unit)
+;;
+(define_cpu_unit "p0,p1" "ppro_core")
+(define_cpu_unit "p2" "ppro_load")
+(define_cpu_unit "p3,p4" "ppro_store")
+(define_cpu_unit "idiv" "ppro_idiv")
+(define_cpu_unit "fdiv" "ppro_fdiv")
+
+;; Only the irregular instructions have to be modeled here.  A load
+;; increases the latency by 2 or 3, or by nothing if the manual gives
+;; a latency already.  Store latencies are not accounted for.
+;;
+;; The simple instructions follow a very regular pattern of 1 uop per
+;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
+;; on port 4 and port 3.  These instructions are modelled at the bottom
+;; of this file.
+;;
+;; For microcoded instructions we don't know how many uops are produced.
+;; These instructions are the "complex" ones in the Intel manuals.  All
+;; we _do_ know is that they typically produce four or more uops, so
+;; they can only be decoded on decoder0.  Modelling their latencies
+;; doesn't make sense because we don't know how these instructions are
+;; executed in the core.  So we just model that they can only be decoded
+;; on decoder 0, and say that it takes a little while before the result
+;; is available.
+(define_insn_reservation "ppro_complex_insn" 6
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "other,multi,call,callv,str"))
+			 "decoder0")
+
+;; imov with memory operands does not use the integer units.
+(define_insn_reservation "ppro_imov" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imov")))
+			 "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imov_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "imov")))
+			 "decodern,p2")
+
+(define_insn_reservation "ppro_imov_store" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "imov")))
+			 "decoder0,p4+p3")
+
+;; imovx always decodes to one uop, and also doesn't use the integer
+;; units if it has memory operands.
+(define_insn_reservation "ppro_imovx" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imovx")))
+			 "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imovx_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "imovx")))
+			 "decodern,p2")
+
+;; lea executes on port 0 with latency one and throughput 1.
+(define_insn_reservation "ppro_lea" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "lea")))
+			 "decodern,p0")
+
+;; Shift and rotate execute on port 0 with latency and throughput 1.
+;; The load and store units need to be reserved when memory operands
+;; are involved.
+(define_insn_reservation "ppro_shift_rotate" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_shift_rotate_mem" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+			 "decoder0,p2+p0,p4+p3")
+
+
+;; The P6 has a sophisticated branch prediction mechanism to minimize
+;; latencies due to branching.  In particular, it has a fast way to
+;; execute branches that are taken multiple times (such as in loops).
+;; Branches not taken suffer no penalty, and correctly predicted
+;; branches cost only one fetch cycle.  Mispredicted branches are very
+;; costly: typically 15 cycles and possibly as many as 26 cycles.
+;;
+;; Unfortunately all this makes it quite difficult to properly model
+;; the latencies for the compiler.  Here I've made the choice to be
+;; optimistic and assume branches are often predicted correctly, so
+;; they have latency 1, and the decoders are not blocked.
+;;
+;; In addition, the model assumes a branch always decodes to only 1 uop,
+;; which is not exactly true because there are a few instructions that
+;; decode to 2 uops or microcode.  But this probably gives the best
+;; results because we can assume these instructions can decode on all
+;; decoders.
+(define_insn_reservation "ppro_branch" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "ibr")))
+			 "decodern,p1")
+
+;; ??? Indirect branches probably have worse latency than this.
+(define_insn_reservation "ppro_indirect_branch" 6
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "ibr")))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_leave" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "leave"))
+			 "decoder0,p2+(p0|p1),(p0|p1)")
+
+;; imul has throughput one, but latency 4, and can only execute on port 0.
+(define_insn_reservation "ppro_imul" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "imul")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_imul_mem" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none")
+				   (eq_attr "type" "imul")))
+			 "decoder0,p2+p0")
+
+;; div and idiv are very similar, so we model them the same.
+;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
+;; These issue latencies are modelled via the ppro_div automaton.
+(define_insn_reservation "ppro_idiv_QI" 19
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,(p0+idiv)*2,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_QI_load" 19
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "QI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_HI" 23
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*17")
+
+(define_insn_reservation "ppro_idiv_HI_load" 23
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*18")
+
+(define_insn_reservation "ppro_idiv_SI" 39
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*33")
+
+(define_insn_reservation "ppro_idiv_SI_load" 39
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SI")
+					(eq_attr "type" "idiv"))))
+			 "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*34")
+
+;; Floating point operations always execute on port 0.
+;; ??? where do these latencies come from? fadd has latency 3 and
+;;     has throughput "1/cycle (align with FADD)".  What do they
+;;     mean and how can we model that?
+(define_insn_reservation "ppro_fop" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "fop")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fop_load" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fop")))
+			 "decoder0,p2+p0,p0")
+
+(define_insn_reservation "ppro_fop_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "fop")))
+			 "decoder0,p0,p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fop_both" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "fop")))
+			 "decoder0,p2+p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fsgn" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "fsgn"))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fistp" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "fistp"))
+			 "decoder0,p0*2,p4+p3")
+
+(define_insn_reservation "ppro_fcmov" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (eq_attr "type" "fcmov"))
+			 "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fcmp" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fcmp")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fcmp_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fcmp")))
+			 "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_fmov" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmov")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_load" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "decodern,p2")
+
+(define_insn_reservation "ppro_fmov_XF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "decoder0,(p2+p0)*2")
+
+(define_insn_reservation "ppro_fmov_store" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "!XF")
+					(eq_attr "type" "fmov"))))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_XF_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fmov"))))
+			 "decoder0,(p0+p4),(p0+p3)")
+
+;; fmul executes on port 0 with latency 5.  It has issue latency 2,
+;; but we don't model this.
+(define_insn_reservation "ppro_fmul" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "fmul")))
+			 "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fmul_load" 6
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "fmul")))
+			 "decoder0,p2+p0,p0")
+
+;; fdiv latencies depend on the mode of the operands.  XFmode gives
+;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
+;; Division by a power of 2 takes only 9 cycles, but we cannot model
+;; that.  Throughput is equal to latency - 1, which we model using the
+;; ppro_div automaton.
+(define_insn_reservation "ppro_fdiv_SF" 18
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decodern,p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_SF_load" 19
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decoder0,p2+p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_DF" 32
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decodern,p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_DF_load" 33
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "DF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decoder0,p2+p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_XF" 38
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decodern,p0+fdiv,fdiv*36")
+
+(define_insn_reservation "ppro_fdiv_XF_load" 39
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "XF")
+					(eq_attr "type" "fdiv,fpspc"))))
+			 "decoder0,p2+p0+fdiv,fdiv*36")
+
+;; MMX instructions can execute on either port 0 or port 1 with a
+;; throughput of 1/cycle.
+;;   on port 0:	- ALU (latency 1)
+;;		- Multiplier Unit (latency 3)
+;;   on port 1:	- ALU (latency 1)
+;;		- Shift Unit (latency 1)
+;;
+;; MMX instructions are either of the type reg-reg, or read-modify, and
+;; except for mmxshft and mmxmul they can execute on port 0 or port 1,
+;; so they behave as "simple" instructions that need no special modelling.
+;; We only have to model mmxshft and mmxmul.
+(define_insn_reservation "ppro_mmx_shft" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxshft")))
+			 "decodern,p1")
+
+(define_insn_reservation "ppro_mmx_shft_load" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxshft")))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_mmx_mul" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_mmx_mul_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (eq_attr "type" "mmxmul")))
+			 "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mmxcvt" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "DI")
+				   (eq_attr "type" "mmxcvt")))
+			 "decodern,p1")
+
+;; FIXME: These are Pentium III only, but we cannot tell here if
+;; we're generating code for PentiumPro/Pentium II or Pentium III
+;; (define_insn_reservation "ppro_sse_mmxshft" 2
+;;			 (and (eq_attr "cpu" "pentiumpro")
+;;			      (and (eq_attr "mode" "DI")
+;;				   (eq_attr "type" "mmxshft")))
+;;			 "decodern,p0")
+
+;; SSE is very complicated, and takes a bit more effort.
+;; ??? I assumed that all SSE instructions decode on decoder0,
+;;     but is this correct?
+
+;; The sfence instruction.
+(define_insn_reservation "ppro_sse_sfence" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "unknown")
+				   (eq_attr "type" "sse")))
+			 "decoder0,p4+p3")
+
+;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
+(define_insn_reservation "ppro_sse_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "SF")
+				   (eq_attr "type" "sse")))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_sse_add_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseadd"))))
+			 "decodern,p1")
+
+(define_insn_reservation "ppro_sse_add_SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "sseadd"))))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_comi_SF" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecomi"))))
+			 "decodern,p0")
+
+(define_insn_reservation "ppro_sse_comi_SF_load" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssecomi"))))
+			 "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mul_SF" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemul"))))
+			"decodern,p0")
+
+(define_insn_reservation "ppro_sse_mul_SF_load" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemul"))))
+			"decoder0,p2+p0")
+
+;; FIXME: ssediv doesn't close p0 for 17 cycles, surely???
+(define_insn_reservation "ppro_sse_div_SF" 18
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,p0*17")
+
+(define_insn_reservation "ppro_sse_div_SF_load" 18
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,(p2+p0),p0*16")
+
+(define_insn_reservation "ppro_sse_icvt_SF" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "SF")
+				   (eq_attr "type" "sseicvt")))
+			 "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_icvt_SI" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "SI")
+				   (eq_attr "type" "sseicvt")))
+			 "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,p4+p3")
+
+(define_insn_reservation "ppro_sse_V4SF" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "mode" "V4SF")
+				   (eq_attr "type" "sse")))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sseadd"))))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sseadd"))))
+			 "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none,unknown")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecvt"))))
+			 "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "!none,unknown")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssecmp"))))
+			 "decoder0,p1,p4+p3")
+
+(define_insn_reservation "ppro_sse_mul_V4SF" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemul"))))
+			"decoder0,p0*2")
+
+(define_insn_reservation "ppro_sse_mul_V4SF_load" 5
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemul"))))
+			"decoder0,(p2+p0)*2")
+
+;; FIXME: p0 really closed this long???
+(define_insn_reservation "ppro_sse_div_V4SF" 48
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,p0*34")
+
+(define_insn_reservation "ppro_sse_div_V4SF_load" 48
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssediv"))))
+			 "decoder0,(p2+p0)*2,p0*32")
+
+(define_insn_reservation "ppro_sse_log_V4SF" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sselog,sselog1"))))
+			 "decodern,p1")
+
+(define_insn_reservation "ppro_sse_log_V4SF_load" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "sselog,sselog1"))))
+			 "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_V4SF" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,(p0|p1)*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_load" 2
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,p2*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_store" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (and (eq_attr "mode" "V4SF")
+					(eq_attr "type" "ssemov"))))
+			 "decoder0,(p4+p3)*2")
+
+;; All other instructions are modelled as simple instructions.
+;; We have already modelled all i387 floating point instructions, so all
+;; other instructions execute on either port 0 or port 1.  This includes
+;; the ALU units, and the MMX units.
+;;
+;; reg-reg instructions produce 1 uop so they can be decoded on any of
+;; the three decoders.
+(define_insn_reservation "ppro_insn" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "none,unknown")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decodern,(p0|p1)")
+
+;; read-modify and register-memory instructions have 2 or three uops,
+;; so they have to be decoded on decoder0.
+(define_insn_reservation "ppro_insn_load" 3
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "load")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_insn_store" 1
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "store")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decoder0,(p0|p1),p4+p3")
+
+;; read-modify-store instructions produce 4 uops so they have to be
+;; decoded on decoder0 as well.
+(define_insn_reservation "ppro_insn_both" 4
+			 (and (eq_attr "cpu" "pentiumpro")
+			      (and (eq_attr "memory" "both")
+				   (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseishft1,sseimul,mmx,mmxadd,mmxcmp")))
+			 "decoder0,p2+(p0|p1),p4+p3")
+
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
new file mode 100644
index 000000000..7cce9d4ad
--- /dev/null
+++ b/gcc/config/i386/predicates.md
@@ -0,0 +1,1226 @@
+;; Predicate definitions for IA-32 and x86-64.
+;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is either a i387 or SSE fp register.
+(define_predicate "any_fp_register_operand"
+  (and (match_code "reg")
+       (match_test "ANY_FP_REGNO_P (REGNO (op))")))
+
+;; Return true if OP is an i387 fp register.
+(define_predicate "fp_register_operand"
+  (and (match_code "reg")
+       (match_test "FP_REGNO_P (REGNO (op))")))
+
+;; Return true if OP is a non-fp register_operand.
+(define_predicate "register_and_not_any_fp_reg_operand"
+  (and (match_code "reg")
+       (not (match_test "ANY_FP_REGNO_P (REGNO (op))"))))
+
+;; Return true if OP is a register operand other than an i387 fp register.
+(define_predicate "register_and_not_fp_reg_operand"
+  (and (match_code "reg")
+       (not (match_test "FP_REGNO_P (REGNO (op))"))))
+
+;; True if the operand is an MMX register.
+(define_predicate "mmx_reg_operand"
+  (and (match_code "reg")
+       (match_test "MMX_REGNO_P (REGNO (op))")))
+
+;; True if the operand is an SSE register.
+(define_predicate "sse_reg_operand"
+  (and (match_code "reg")
+       (match_test "SSE_REGNO_P (REGNO (op))")))
+
+;; True if the operand is a Q_REGS class register.
+(define_predicate "q_regs_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return ANY_QI_REG_P (op);
+})
+
+;; Match an SI or HImode register for a zero_extract.
+(define_special_predicate "ext_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
+      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
+    return false;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* Be careful to accept only registers having upper parts.  */
+  return (REG_P (op)
+	  && (REGNO (op) > LAST_VIRTUAL_REGISTER || REGNO (op) <= BX_REG));
+})
+
+;; Return true if op is the AX register.
+(define_predicate "ax_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == AX_REG")))
+
+;; Return true if op is the flags register.
+(define_predicate "flags_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == FLAGS_REG")))
+
+;; Return true if op is a QImode register operand other than
+;; %[abcd][hl].
+(define_predicate "ext_QIreg_operand"
+  (and (match_code "reg")
+       (match_test "TARGET_64BIT
+		    && GET_MODE (op) == QImode
+		    && REGNO (op) > BX_REG")))
+
+;; Similarly, but don't check mode of the operand.
+(define_predicate "ext_QIreg_nomode_operand"
+  (and (match_code "reg")
+       (match_test "TARGET_64BIT
+		    && REGNO (op) > BX_REG")))
+
+;; Return true if op is not xmm0 register.
+(define_predicate "reg_not_xmm0_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return !REG_P (op) || REGNO (op) != FIRST_SSE_REG;
+})
+
+;; As above, but allow nonimmediate operands.
+(define_predicate "nonimm_not_xmm0_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "reg_not_xmm0_operand")))
+
+;; Return true if VALUE can be stored in a sign extended immediate field.
+(define_predicate "x86_64_immediate_operand"
+  (match_code "const_int,symbol_ref,label_ref,const")
+{
+  if (!TARGET_64BIT)
+    return immediate_operand (op, mode);
+
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
+         to be at least 32 and this all acceptable constants are
+	 represented as CONST_INT.  */
+      if (HOST_BITS_PER_WIDE_INT == 32)
+	return true;
+      else
+	{
+	  HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (op), DImode);
+	  return trunc_int_for_mode (val, SImode) == val;
+	}
+      break;
+
+    case SYMBOL_REF:
+      /* For certain code models, the symbolic references are known to fit.
+	 in CM_SMALL_PIC model we know it fits if it is local to the shared
+	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
+	 only if inside of UNSPEC handled below.  */
+      /* TLS symbols are not constant.  */
+      if (SYMBOL_REF_TLS_MODEL (op))
+	return false;
+      return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
+	      || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+    case LABEL_REF:
+      /* For certain code models, the code is near as well.  */
+      return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
+	      || ix86_cmodel == CM_KERNEL);
+
+    case CONST:
+      /* We also may accept the offsetted memory references in certain
+	 special cases.  */
+      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
+	switch (XINT (XEXP (op, 0), 1))
+	  {
+	  case UNSPEC_GOTPCREL:
+	  case UNSPEC_DTPOFF:
+	  case UNSPEC_GOTNTPOFF:
+	  case UNSPEC_NTPOFF:
+	    return true;
+	  default:
+	    break;
+	  }
+
+      if (GET_CODE (XEXP (op, 0)) == PLUS)
+	{
+	  rtx op1 = XEXP (XEXP (op, 0), 0);
+	  rtx op2 = XEXP (XEXP (op, 0), 1);
+	  HOST_WIDE_INT offset;
+
+	  if (ix86_cmodel == CM_LARGE)
+	    return false;
+	  if (!CONST_INT_P (op2))
+	    return false;
+	  offset = trunc_int_for_mode (INTVAL (op2), DImode);
+	  switch (GET_CODE (op1))
+	    {
+	    case SYMBOL_REF:
+	      /* TLS symbols are not constant.  */
+	      if (SYMBOL_REF_TLS_MODEL (op1))
+		return false;
+	      /* For CM_SMALL assume that latest object is 16MB before
+		 end of 31bits boundary.  We may also accept pretty
+		 large negative constants knowing that all objects are
+		 in the positive half of address space.  */
+	      if ((ix86_cmodel == CM_SMALL
+		   || (ix86_cmodel == CM_MEDIUM
+		       && !SYMBOL_REF_FAR_ADDR_P (op1)))
+		  && offset < 16*1024*1024
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      /* For CM_KERNEL we know that all object resist in the
+		 negative half of 32bits address space.  We may not
+		 accept negative offsets, since they may be just off
+		 and we may accept pretty large positive ones.  */
+	      if (ix86_cmodel == CM_KERNEL
+		  && offset > 0
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      break;
+
+	    case LABEL_REF:
+	      /* These conditions are similar to SYMBOL_REF ones, just the
+		 constraints for code models differ.  */
+	      if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+		  && offset < 16*1024*1024
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      if (ix86_cmodel == CM_KERNEL
+		  && offset > 0
+		  && trunc_int_for_mode (offset, SImode) == offset)
+		return true;
+	      break;
+
+	    case UNSPEC:
+	      switch (XINT (op1, 1))
+		{
+		case UNSPEC_DTPOFF:
+		case UNSPEC_NTPOFF:
+		  if (offset > 0
+		      && trunc_int_for_mode (offset, SImode) == offset)
+		    return true;
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+      break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  return false;
+})
+
+;; Return true if VALUE can be stored in the zero extended immediate field.
+(define_predicate "x86_64_zext_immediate_operand"
+  (match_code "const_double,const_int,symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST_DOUBLE:
+      if (HOST_BITS_PER_WIDE_INT == 32)
+	return (GET_MODE (op) == VOIDmode && !CONST_DOUBLE_HIGH (op));
+      else
+	return false;
+
+    case CONST_INT:
+      if (HOST_BITS_PER_WIDE_INT == 32)
+	return INTVAL (op) >= 0;
+      else
+	return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff);
+
+    case SYMBOL_REF:
+      /* For certain code models, the symbolic references are known to fit.  */
+      /* TLS symbols are not constant.  */
+      if (SYMBOL_REF_TLS_MODEL (op))
+	return false;
+      return (ix86_cmodel == CM_SMALL
+	      || (ix86_cmodel == CM_MEDIUM
+		  && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+    case LABEL_REF:
+      /* For certain code models, the code is near as well.  */
+      return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
+
+    case CONST:
+      /* We also may accept the offsetted memory references in certain
+	 special cases.  */
+      if (GET_CODE (XEXP (op, 0)) == PLUS)
+	{
+	  rtx op1 = XEXP (XEXP (op, 0), 0);
+	  rtx op2 = XEXP (XEXP (op, 0), 1);
+
+	  if (ix86_cmodel == CM_LARGE)
+	    return false;
+	  switch (GET_CODE (op1))
+	    {
+	    case SYMBOL_REF:
+	      /* TLS symbols are not constant.  */
+	      if (SYMBOL_REF_TLS_MODEL (op1))
+		return false;
+	      /* For small code model we may accept pretty large positive
+		 offsets, since one bit is available for free.  Negative
+		 offsets are limited by the size of NULL pointer area
+		 specified by the ABI.  */
+	      if ((ix86_cmodel == CM_SMALL
+		   || (ix86_cmodel == CM_MEDIUM
+		       && !SYMBOL_REF_FAR_ADDR_P (op1)))
+		  && CONST_INT_P (op2)
+		  && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+		  && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+		return true;
+	      /* ??? For the kernel, we may accept adjustment of
+		 -0x10000000, since we know that it will just convert
+		 negative address space to positive, but perhaps this
+		 is not worthwhile.  */
+	      break;
+
+	    case LABEL_REF:
+	      /* These conditions are similar to SYMBOL_REF ones, just the
+		 constraints for code models differ.  */
+	      if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+		  && CONST_INT_P (op2)
+		  && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+		  && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+		return true;
+	      break;
+
+	    default:
+	      return false;
+	    }
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return false;
+})
+
+;; Return true if OP is general operand representable on x86_64.
+(define_predicate "x86_64_general_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "nonimmediate_operand")
+	 (match_operand 0 "x86_64_immediate_operand"))
+    (match_operand 0 "general_operand")))
+
+;; Return true if OP is general operand representable on x86_64
+;; as either sign extended or zero extended constant.
+(define_predicate "x86_64_szext_general_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "nonimmediate_operand")
+	 (match_operand 0 "x86_64_immediate_operand")
+	 (match_operand 0 "x86_64_zext_immediate_operand"))
+    (match_operand 0 "general_operand")))
+
+;; Return true if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_nonmemory_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "register_operand")
+	 (match_operand 0 "x86_64_immediate_operand"))
+    (match_operand 0 "nonmemory_operand")))
+
+;; Return true if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_szext_nonmemory_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (ior (match_operand 0 "register_operand")
+	 (match_operand 0 "x86_64_immediate_operand")
+	 (match_operand 0 "x86_64_zext_immediate_operand"))
+    (match_operand 0 "nonmemory_operand")))
+
+;; Return true when operand is PIC expression that can be computed by lea
+;; operation.
+(define_predicate "pic_32bit_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  if (!flag_pic)
+    return false;
+  /* Rule out relocations that translate into 64bit constants.  */
+  if (TARGET_64BIT && GET_CODE (op) == CONST)
+    {
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == PLUS && CONST_INT_P (XEXP (op, 1)))
+	op = XEXP (op, 0);
+      if (GET_CODE (op) == UNSPEC
+	  && (XINT (op, 1) == UNSPEC_GOTOFF
+	      || XINT (op, 1) == UNSPEC_GOT))
+	return false;
+    }
+  return symbolic_operand (op, mode);
+})
+
+
+;; Return true if OP is nonmemory operand acceptable by movabs patterns.
+(define_predicate "x86_64_movabs_operand"
+  (if_then_else (match_test "!TARGET_64BIT || !flag_pic")
+    (match_operand 0 "nonmemory_operand")
+    (ior (match_operand 0 "register_operand")
+	 (and (match_operand 0 "const_double_operand")
+	      (match_test "GET_MODE_SIZE (mode) <= 8")))))
+
+;; Return true if OP is either a symbol reference or a sum of a symbol
+;; reference and a constant.
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF
+	  || (GET_CODE (op) == UNSPEC
+	      && (XINT (op, 1) == UNSPEC_GOT
+		  || XINT (op, 1) == UNSPEC_GOTOFF
+		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
+	return true;
+      if (GET_CODE (op) != PLUS
+	  || !CONST_INT_P (XEXP (op, 1)))
+	return false;
+
+      op = XEXP (op, 0);
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF)
+	return true;
+      /* Only @GOTOFF gets offsets.  */
+      if (GET_CODE (op) != UNSPEC
+	  || XINT (op, 1) != UNSPEC_GOTOFF)
+	return false;
+
+      op = XVECEXP (op, 0, 0);
+      if (GET_CODE (op) == SYMBOL_REF
+	  || GET_CODE (op) == LABEL_REF)
+	return true;
+      return false;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a symbolic operand that resolves locally.
+(define_predicate "local_symbolic_operand"
+  (match_code "const,label_ref,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return false;
+
+  if (SYMBOL_REF_TLS_MODEL (op))
+    return false;
+
+  if (SYMBOL_REF_LOCAL_P (op))
+    return true;
+
+  /* There is, however, a not insubstantial body of code in the rest of
+     the compiler that assumes it can just stick the results of
+     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
+  /* ??? This is a hack.  Should update the body of the compiler to
+     always create a DECL an invoke targetm.encode_section_info.  */
+  if (strncmp (XSTR (op, 0), internal_label_prefix,
+	       internal_label_prefix_len) == 0)
+    return true;
+
+  return false;
+})
+
+;; Test for a legitimate @GOTOFF operand.
+;;
+;; VxWorks does not impose a fixed gap between segments; the run-time
+;; gap can be different from the object-file gap.  We therefore can't
+;; use @GOTOFF unless we are absolutely sure that the symbol is in the
+;; same segment as the GOT.  Unfortunately, the flexibility of linker
+;; scripts means that we can't be sure of that in general, so assume
+;; that @GOTOFF is never valid on VxWorks.
+(define_predicate "gotoff_operand"
+  (and (match_test "!TARGET_VXWORKS_RTP")
+       (match_operand 0 "local_symbolic_operand")))
+
+;; Test for various thread-local symbols.
+(define_predicate "tls_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op)")))
+
+(define_predicate "tls_modbase_operand"
+  (and (match_code "symbol_ref")
+       (match_test "op == ix86_tls_module_base ()")))
+
+(define_predicate "tp_or_register_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "unspec")
+	    (match_test "XINT (op, 1) == UNSPEC_TP"))))
+
+;; Test for a pc-relative call operand
+(define_predicate "constant_call_address_operand"
+  (match_code "symbol_ref")
+{
+  if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
+    return false;
+  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
+    return false;
+  return true;
+})
+
+;; P6 processors will jump to the address after the decrement when %esp
+;; is used as a call operand, so they will execute return address as a code.
+;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17.
+
+(define_predicate "call_register_no_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!TARGET_64BIT && op == stack_pointer_rtx)
+    return false;
+
+  return register_no_elim_operand (op, mode);
+})
+
+;; True for any non-virtual or eliminable register.  Used in places where
+;; instantiation of such a register may cause the pattern to not be recognized.
+(define_predicate "register_no_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  return !(op == arg_pointer_rtx
+	   || op == frame_pointer_rtx
+	   || IN_RANGE (REGNO (op),
+			FIRST_PSEUDO_REGISTER, LAST_VIRTUAL_REGISTER));
+})
+
+;; Similarly, but include the stack pointer.  This is used to prevent esp
+;; from being used as an index reg.
+(define_predicate "index_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (reload_in_progress || reload_completed)
+    return REG_OK_FOR_INDEX_STRICT_P (op);
+  else
+    return REG_OK_FOR_INDEX_NONSTRICT_P (op);
+})
+
+;; Return false if this is any eliminable register.  Otherwise general_operand.
+(define_predicate "general_no_elim_operand"
+  (if_then_else (match_code "reg,subreg")
+    (match_operand 0 "register_no_elim_operand")
+    (match_operand 0 "general_operand")))
+
+;; Return false if this is any eliminable register.  Otherwise
+;; register_operand or a constant.
+(define_predicate "nonmemory_no_elim_operand"
+  (ior (match_operand 0 "register_no_elim_operand")
+       (match_operand 0 "immediate_operand")))
+
+;; Test for a valid operand for a call instruction.
+(define_predicate "call_insn_operand"
+  (ior (match_operand 0 "constant_call_address_operand")
+       (match_operand 0 "call_register_no_elim_operand")
+       (match_operand 0 "memory_operand")))
+
+;; Similarly, but for tail calls, in which we cannot allow memory references.
+(define_predicate "sibcall_insn_operand"
+  (ior (match_operand 0 "constant_call_address_operand")
+       (match_operand 0 "register_no_elim_operand")))
+
+;; Match exactly zero.
+(define_predicate "const0_operand"
+  (match_code "const_int,const_double,const_vector")
+{
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  return op == CONST0_RTX (mode);
+})
+
+;; Match exactly one.
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const1_rtx")))
+
+;; Match exactly eight.
+(define_predicate "const8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 8")))
+
+;; Match exactly 128.
+(define_predicate "const128_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 128")))
+
+;; Match 2, 4, or 8.  Used for leal multiplicands.
+(define_predicate "const248_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 2 || i == 4 || i == 8;
+})
+
+;; Match 0 or 1.
+(define_predicate "const_0_to_1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const0_rtx || op == const1_rtx")))
+
+;; Match 0 to 3.
+(define_predicate "const_0_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+
+;; Match 0 to 7.
+(define_predicate "const_0_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+;; Match 0 to 15.
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+;; Match 0 to 31.
+(define_predicate "const_0_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+
+;; Match 0 to 63.
+(define_predicate "const_0_to_63_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
+
+;; Match 0 to 255.
+(define_predicate "const_0_to_255_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+;; Match (0 to 255) * 8
+(define_predicate "const_0_to_255_mul_8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT val = INTVAL (op);
+  return val <= 255*8 && val % 8 == 0;
+})
+
+;; Return true if OP is CONST_INT >= 1 and <= 31 (a valid operand
+;; for shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 31)")))
+
+;; Return true if OP is CONST_INT >= 1 and <= 63 (a valid operand
+;; for 64bit shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_63_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 63)")))
+
+;; Match 2 or 3.
+(define_predicate "const_2_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+
+;; Match 4 to 5.
+(define_predicate "const_4_to_5_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 4, 5)")))
+
+;; Match 4 to 7.
+(define_predicate "const_4_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 4, 7)")))
+
+;; Match 6 to 7.
+(define_predicate "const_6_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 6, 7)")))
+
+;; Match 8 to 11.
+(define_predicate "const_8_to_11_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+
+;; Match 12 to 15.
+(define_predicate "const_12_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+
+;; Match exactly one bit in 2-bit mask.
+(define_predicate "const_pow2_1_to_2_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
+
+;; Match exactly one bit in 4-bit mask.
+(define_predicate "const_pow2_1_to_8_operand"
+  (match_code "const_int")
+{
+  unsigned int log = exact_log2 (INTVAL (op));
+  return log <= 3;
+})
+
+;; Match exactly one bit in 8-bit mask.
+(define_predicate "const_pow2_1_to_128_operand"
+  (match_code "const_int")
+{
+  unsigned int log = exact_log2 (INTVAL (op));
+  return log <= 7;
+})
+
+;; Match exactly one bit in 16-bit mask.
+(define_predicate "const_pow2_1_to_32768_operand"
+  (match_code "const_int")
+{
+  unsigned int log = exact_log2 (INTVAL (op));
+  return log <= 15;
+})
+
+;; True if this is a constant appropriate for an increment or decrement.
+(define_predicate "incdec_operand"
+  (match_code "const_int")
+{
+  /* On Pentium4, the inc and dec operations causes extra dependency on flag
+     registers, since carry flag is not set.  */
+  if (!TARGET_USE_INCDEC && !optimize_insn_for_size_p ())
+    return false;
+  return op == const1_rtx || op == constm1_rtx;
+})
+
+;; True for registers, or 1 or -1.  Used to optimize double-word shifts.
+(define_predicate "reg_or_pm1_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "op == const1_rtx || op == constm1_rtx"))))
+
+;; True if OP is acceptable as operand of DImode shift expander.
+(define_predicate "shiftdi_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "nonimmediate_operand")
+    (match_operand 0 "register_operand")))
+
+(define_predicate "ashldi_input_operand"
+  (if_then_else (match_test "TARGET_64BIT")
+    (match_operand 0 "nonimmediate_operand")
+    (match_operand 0 "reg_or_pm1_operand")))
+
+;; Return true if OP is a vector load from the constant pool with just
+;; the first element nonzero.
+(define_predicate "zero_extended_scalar_load_operand"
+  (match_code "mem")
+{
+  unsigned n_elts;
+  op = maybe_get_pool_constant (op);
+
+  if (!(op && GET_CODE (op) == CONST_VECTOR))
+    return false;
+
+  n_elts = CONST_VECTOR_NUNITS (op);
+
+  for (n_elts--; n_elts > 0; n_elts--)
+    {
+      rtx elt = CONST_VECTOR_ELT (op, n_elts);
+      if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+	return false;
+    }
+  return true;
+})
+
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "vector_all_ones_operand"
+  (match_code "const_vector")
+{
+  int nunits = GET_MODE_NUNITS (mode);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && CONST_VECTOR_NUNITS (op) == nunits)
+    {
+      int i;
+      for (i = 0; i < nunits; ++i)
+        {
+          rtx x = CONST_VECTOR_ELT (op, i);
+          if (x != constm1_rtx)
+            return false;
+        }
+      return true;
+    }
+
+  return false;
+})
+
+; Return true when OP is operand acceptable for standard SSE move.
+(define_predicate "vector_move_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return true when OP is nonimmediate or standard SSE constant.
+(define_predicate "nonimmediate_or_sse_const_operand"
+  (match_operand 0 "general_operand")
+{
+  if (nonimmediate_operand (op, mode))
+    return true;
+  if (standard_sse_constant_p (op) > 0)
+    return true;
+  return false;
+})
+
+;; Return true if OP is a register or a zero.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return true if op if a valid address, and does not contain
+;; a segment override.
+(define_special_predicate "no_seg_address_operand"
+  (match_operand 0 "address_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  ok = ix86_decompose_address (op, &parts);
+  gcc_assert (ok);
+  return parts.seg == SEG_DEFAULT;
+})
+
+;; Return true if the rtx is known to be at least 32 bits aligned.
+(define_predicate "aligned_operand"
+  (match_operand 0 "general_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  /* Registers and immediate operands are always "aligned".  */
+  if (!MEM_P (op))
+    return true;
+
+  /* All patterns using aligned_operand on memory operands ends up
+     in promoting memory operand to 64bit and thus causing memory mismatch.  */
+  if (TARGET_MEMORY_MISMATCH_STALL && !optimize_insn_for_size_p ())
+    return false;
+
+  /* Don't even try to do any aligned optimizations with volatiles.  */
+  if (MEM_VOLATILE_P (op))
+    return false;
+
+  if (MEM_ALIGN (op) >= 32)
+    return true;
+
+  op = XEXP (op, 0);
+
+  /* Pushes and pops are only valid on the stack pointer.  */
+  if (GET_CODE (op) == PRE_DEC
+      || GET_CODE (op) == POST_INC)
+    return true;
+
+  /* Decode the address.  */
+  ok = ix86_decompose_address (op, &parts);
+  gcc_assert (ok);
+
+  /* Look for some component that isn't known to be aligned.  */
+  if (parts.index)
+    {
+      if (REGNO_POINTER_ALIGN (REGNO (parts.index)) * parts.scale < 32)
+	return false;
+    }
+  if (parts.base)
+    {
+      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
+	return false;
+    }
+  if (parts.disp)
+    {
+      if (!CONST_INT_P (parts.disp)
+	  || (INTVAL (parts.disp) & 3))
+	return false;
+    }
+
+  /* Didn't find one -- this must be an aligned address.  */
+  return true;
+})
+
+;; Return true if OP is memory operand with a displacement.
+(define_predicate "memory_displacement_operand"
+  (match_operand 0 "memory_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  ok = ix86_decompose_address (XEXP (op, 0), &parts);
+  gcc_assert (ok);
+  return parts.disp != NULL_RTX;
+})
+
+;; Return true if OP is memory operand with a displacement only.
+(define_predicate "memory_displacement_only_operand"
+  (match_operand 0 "memory_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  if (TARGET_64BIT)
+    return false;
+
+  ok = ix86_decompose_address (XEXP (op, 0), &parts);
+  gcc_assert (ok);
+
+  if (parts.base || parts.index)
+    return false;
+
+  return parts.disp != NULL_RTX;
+})
+
+;; Return true if OP is memory operand which will need zero or
+;; one register at most, not counting stack pointer or frame pointer.
+(define_predicate "cmpxchg8b_pic_memory_operand"
+  (match_operand 0 "memory_operand")
+{
+  struct ix86_address parts;
+  int ok;
+
+  ok = ix86_decompose_address (XEXP (op, 0), &parts);
+  gcc_assert (ok);
+  if (parts.base == NULL_RTX
+      || parts.base == arg_pointer_rtx
+      || parts.base == frame_pointer_rtx
+      || parts.base == hard_frame_pointer_rtx
+      || parts.base == stack_pointer_rtx)
+    return true;
+
+  if (parts.index == NULL_RTX
+      || parts.index == arg_pointer_rtx
+      || parts.index == frame_pointer_rtx
+      || parts.index == hard_frame_pointer_rtx
+      || parts.index == stack_pointer_rtx)
+    return true;
+
+  return false;
+})
+
+
+;; Return true if OP is memory operand that cannot be represented
+;; by the modRM array.
+(define_predicate "long_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "memory_address_length (op)")))
+
+;; Return true if OP is a comparison operator that can be issued by fcmov.
+(define_predicate "fcmov_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+
+  if (inmode == CCFPmode || inmode == CCFPUmode)
+    {
+      if (!ix86_trivial_fp_comparison_operator (op, mode))
+	return false;
+      code = ix86_fp_compare_code_to_integer (code);
+    }
+  /* i387 supports just limited amount of conditional codes.  */
+  switch (code)
+    {
+    case LTU: case GTU: case LEU: case GEU:
+      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode
+	  || inmode == CCCmode)
+	return true;
+      return false;
+    case ORDERED: case UNORDERED:
+    case EQ: case NE:
+      return true;
+    default:
+      return false;
+    }
+})
+
+;; Return true if OP is a comparison that can be used in the CMPSS/CMPPS insns.
+;; The first set are supported directly; the second set can't be done with
+;; full IEEE support, i.e. NaNs.
+
+(define_predicate "sse_comparison_operator"
+  (match_code "eq,lt,le,unordered,ne,unge,ungt,ordered"))
+
+;; Return true if OP is a comparison operator that can be issued by
+;; avx predicate generation instructions
+(define_predicate "avx_comparison_float_operator"
+  (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))
+
+(define_predicate "ix86_comparison_int_operator"
+  (match_code "ne,eq,ge,gt,le,lt"))
+
+(define_predicate "ix86_comparison_uns_operator"
+  (match_code "ne,eq,geu,gtu,leu,ltu"))
+
+(define_predicate "bt_comparison_operator"
+  (match_code "ne,eq"))
+
+;; Return true if OP is a valid comparison operator in valid mode.
+(define_predicate "ix86_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+
+  if (inmode == CCFPmode || inmode == CCFPUmode)
+    return ix86_trivial_fp_comparison_operator (op, mode);
+
+  switch (code)
+    {
+    case EQ: case NE:
+      return true;
+    case LT: case GE:
+      if (inmode == CCmode || inmode == CCGCmode
+	  || inmode == CCGOCmode || inmode == CCNOmode)
+	return true;
+      return false;
+    case LTU: case GTU: case LEU: case GEU:
+      if (inmode == CCmode || inmode == CCCmode)
+	return true;
+      return false;
+    case ORDERED: case UNORDERED:
+      if (inmode == CCmode)
+	return true;
+      return false;
+    case GT: case LE:
+      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
+	return true;
+      return false;
+    default:
+      return false;
+    }
+})
+
+;; Return true if OP is a valid comparison operator
+;; testing carry flag to be set.
+(define_predicate "ix86_carry_flag_operator"
+  (match_code "ltu,lt,unlt,gtu,gt,ungt,le,unle,ge,unge,ltgt,uneq")
+{
+  enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+
+  if (inmode == CCFPmode || inmode == CCFPUmode)
+    {
+      if (!ix86_trivial_fp_comparison_operator (op, mode))
+	return false;
+      code = ix86_fp_compare_code_to_integer (code);
+    }
+  else if (inmode == CCCmode)
+   return code == LTU || code == GTU;
+  else if (inmode != CCmode)
+    return false;
+
+  return code == LTU;
+})
+
+;; Return true if this comparison only requires testing one flag bit.
+(define_predicate "ix86_trivial_fp_comparison_operator"
+  (match_code "gt,ge,unlt,unle,uneq,ltgt,ordered,unordered"))
+
+;; Return true if we know how to do this comparison.  Others require
+;; testing more than one flag bit, and we let the generic middle-end
+;; code do that.
+(define_predicate "ix86_fp_comparison_operator"
+  (if_then_else (match_test "ix86_fp_comparison_strategy (GET_CODE (op))
+                             == IX86_FPCMP_ARITH")
+               (match_operand 0 "comparison_operator")
+               (match_operand 0 "ix86_trivial_fp_comparison_operator")))
+
+;; Same as above, but for swapped comparison used in fp_jcc_4_387.
+(define_predicate "ix86_swapped_fp_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  enum rtx_code code = GET_CODE (op);
+  bool ret;
+
+  PUT_CODE (op, swap_condition (code));
+  ret = ix86_fp_comparison_operator (op, mode);
+  PUT_CODE (op, code);
+  return ret;
+})
+
+;; Nearly general operand, but accept any const_double, since we wish
+;; to be able to drop them into memory rather than have them get pulled
+;; into registers.
+(define_predicate "cmp_fp_expander_operand"
+  (ior (match_code "const_double")
+       (match_operand 0 "general_operand")))
+
+;; Return true if this is a valid binary floating-point operation.
+(define_predicate "binary_fp_operator"
+  (match_code "plus,minus,mult,div"))
+
+;; Return true if this is a multiply operation.
+(define_predicate "mult_operator"
+  (match_code "mult"))
+
+;; Return true if this is a division operation.
+(define_predicate "div_operator"
+  (match_code "div"))
+
+;; Return true if this is a float extend operation.
+(define_predicate "float_operator"
+  (match_code "float"))
+
+;; Return true for ARITHMETIC_P.
+(define_predicate "arith_or_logical_operator"
+  (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div,
+	       mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert"))
+
+;; Return true for COMMUTATIVE_P.
+(define_predicate "commutative_operator"
+  (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax"))
+
+;; Return true if OP is a binary operator that can be promoted to wider mode.
+(define_predicate "promotable_binary_operator"
+  (ior (match_code "plus,and,ior,xor,ashift")
+       (and (match_code "mult")
+	    (match_test "TARGET_TUNE_PROMOTE_HIMODE_IMUL"))))
+
+(define_predicate "compare_operator"
+  (match_code "compare"))
+
+(define_predicate "absneg_operator"
+  (match_code "abs,neg"))
+
+;; Return true if OP is misaligned memory operand
+(define_predicate "misaligned_operand"
+  (and (match_code "mem")
+       (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
+
+;; Return true if OP is a emms operation, known to be a PARALLEL.
+(define_predicate "emms_operation"
+  (match_code "parallel")
+{
+  unsigned i;
+
+  if (XVECLEN (op, 0) != 17)
+    return false;
+
+  for (i = 0; i < 8; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i+1);
+
+      if (GET_CODE (elt) != CLOBBER
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != XFmode
+	  || REGNO (SET_DEST (elt)) != FIRST_STACK_REG + i)
+        return false;
+
+      elt = XVECEXP (op, 0, i+9);
+
+      if (GET_CODE (elt) != CLOBBER
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != DImode
+	  || REGNO (SET_DEST (elt)) != FIRST_MMX_REG + i)
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a vzeroall operation, known to be a PARALLEL.
+(define_predicate "vzeroall_operation"
+  (match_code "parallel")
+{
+  unsigned i, nregs = TARGET_64BIT ? 16 : 8;
+
+  if ((unsigned) XVECLEN (op, 0) != 1 + nregs)
+    return false;
+
+  for (i = 0; i < nregs; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i+1);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != V8SImode
+	  || REGNO (SET_DEST (elt)) != SSE_REGNO (i)
+	  || SET_SRC (elt) != CONST0_RTX (V8SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a parallel for a vpermilp[ds] permute.
+;; ??? It would be much easier if the PARALLEL for a VEC_SELECT
+;; had a mode, but it doesn't.  So we have 4 copies and install
+;; the mode by hand.
+
+(define_predicate "avx_vpermilp_v8sf_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vpermilp_parallel (op, V8SFmode)")))
+
+(define_predicate "avx_vpermilp_v4df_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vpermilp_parallel (op, V4DFmode)")))
+
+(define_predicate "avx_vpermilp_v4sf_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vpermilp_parallel (op, V4SFmode)")))
+
+(define_predicate "avx_vpermilp_v2df_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vpermilp_parallel (op, V2DFmode)")))
+
+;; Return true if OP is a parallel for a vperm2f128 permute.
+
+(define_predicate "avx_vperm2f128_v8sf_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vperm2f128_parallel (op, V8SFmode)")))
+
+(define_predicate "avx_vperm2f128_v8si_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vperm2f128_parallel (op, V8SImode)")))
+
+(define_predicate "avx_vperm2f128_v4df_operand"
+  (and (match_code "parallel")
+       (match_test "avx_vperm2f128_parallel (op, V4DFmode)")))
+
+;; Return true if OP is a parallel for a vbroadcast permute.
+
+(define_predicate "avx_vbroadcast_operand"
+  (and (match_code "parallel")
+       (match_code "const_int" "a"))
+{
+  rtx elt = XVECEXP (op, 0, 0);
+  int i, nelt = XVECLEN (op, 0);
+
+  /* Don't bother checking there are the right number of operands,
+     merely that they're all identical.  */
+  for (i = 1; i < nelt; ++i)
+    if (XVECEXP (op, 0, i) != elt)
+      return false;
+  return true;
+})
diff --git a/gcc/config/i386/rtemself.h b/gcc/config/i386/rtemself.h
new file mode 100644
index 000000000..ac492ec35
--- /dev/null
+++ b/gcc/config/i386/rtemself.h
@@ -0,0 +1,32 @@
+/* Definitions for rtems targeting an ix86 using ELF.
+   Copyright (C) 1996, 1997, 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+	if (!TARGET_80387)			\
+	  builtin_define ("_SOFT_FLOAT");	\
+    }						\
+  while (0)
diff --git a/gcc/config/i386/sfp-machine.h b/gcc/config/i386/sfp-machine.h
new file mode 100644
index 000000000..f2df86965
--- /dev/null
+++ b/gcc/config/i386/sfp-machine.h
@@ -0,0 +1,5 @@
+#ifdef __x86_64__
+#include "config/i386/64/sfp-machine.h"
+#else
+#include "config/i386/32/sfp-machine.h"
+#endif
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
new file mode 100644
index 000000000..e12c56a17
--- /dev/null
+++ b/gcc/config/i386/smmintrin.h
@@ -0,0 +1,831 @@
+/* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.
+
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 10.0.  */
+
+#ifndef _SMMINTRIN_H_INCLUDED
+#define _SMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4_1__
+# error "SSE4.1 instruction set not enabled"
+#else
+
+/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
+   files.  */
+#include <tmmintrin.h>
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT	0x00
+#define _MM_FROUND_TO_NEG_INF		0x01
+#define _MM_FROUND_TO_POS_INF		0x02
+#define _MM_FROUND_TO_ZERO		0x03
+#define _MM_FROUND_CUR_DIRECTION	0x04
+
+#define _MM_FROUND_RAISE_EXC		0x00
+#define _MM_FROUND_NO_EXC		0x08
+
+#define _MM_FROUND_NINT		\
+  (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR	\
+  (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL		\
+  (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC	\
+  (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT		\
+  (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT	\
+  (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+
+/* Test Instruction */
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+   (__V & __M) == 0.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+   (__V & ~__M) == 0.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+   (__V & __M) != 0 && (__V & ~__M) != 0.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __M, __m128i __V)
+{
+  return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Macros for packed integer 128-bit comparison intrinsics.  */
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+#define _mm_test_all_ones(V) \
+  _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
+
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
+
+/* Packed/scalar double precision floating point rounding.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_pd (__m128d __V, const int __M)
+{
+  return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_sd(__m128d __D, __m128d __V, const int __M)
+{
+  return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
+					   (__v2df)__V,
+					   __M);
+}
+#else
+#define _mm_round_pd(V, M) \
+  ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
+
+#define _mm_round_sd(D, V, M)						\
+  ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D),		\
+				     (__v2df)(__m128d)(V), (int)(M)))
+#endif
+
+/* Packed/scalar single precision floating point rounding.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ps (__m128 __V, const int __M)
+{
+  return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ss (__m128 __D, __m128 __V, const int __M)
+{
+  return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
+					  (__v4sf)__V,
+					  __M);
+}
+#else
+#define _mm_round_ps(V, M) \
+  ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
+
+#define _mm_round_ss(D, V, M)						\
+  ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D),		\
+				    (__v4sf)(__m128)(V), (int)(M)))
+#endif
+
+/* Macros for ceil/floor intrinsics.  */
+#define _mm_ceil_pd(V)	   _mm_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(D, V)  _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_pd(V)	   _mm_round_pd((V), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
+
+#define _mm_ceil_ps(V)	   _mm_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(D, V)  _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(V)	   _mm_round_ps ((V), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
+
+/* SSE4.1 */
+
+/* Integer blend instructions - select data from 2 sources using
+   constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
+					      (__v8hi)__Y,
+					      __M);
+}
+#else
+#define _mm_blend_epi16(X, Y, M)					\
+  ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X),		\
+					(__v8hi)(__m128i)(Y), (int)(M)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
+{
+  return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
+					       (__v16qi)__Y,
+					       (__v16qi)__M);
+}
+
+/* Single precision floating point blend instructions - select data
+   from 2 sources using constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
+{
+  return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
+					  (__v4sf)__Y,
+					  __M);
+}
+#else
+#define _mm_blend_ps(X, Y, M)						\
+  ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X),		\
+				    (__v4sf)(__m128)(Y), (int)(M)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
+{
+  return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
+					   (__v4sf)__Y,
+					   (__v4sf)__M);
+}
+
+/* Double precision floating point blend instructions - select data
+   from 2 sources using constant/variable mask.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
+{
+  return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
+					   (__v2df)__Y,
+					   __M);
+}
+#else
+#define _mm_blend_pd(X, Y, M)						\
+  ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X),		\
+				     (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
+{
+  return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
+					    (__v2df)__Y,
+					    (__v2df)__M);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+   of result.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
+{
+  return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
+				       (__v4sf)__Y,
+				       __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
+{
+  return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
+					(__v2df)__Y,
+					__M);
+}
+#else
+#define _mm_dp_ps(X, Y, M)						\
+  ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X),			\
+				 (__v4sf)(__m128)(Y), (int)(M)))
+
+#define _mm_dp_pd(X, Y, M)						\
+  ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),			\
+				  (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+   corresponding parts of result.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+}
+
+/*  Min/max packed integer instructions.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication with truncation of upper
+   halves of results.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication of 2 pairs of operands
+   with two 64-bit results.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Insert single precision float into packed single precision array
+   element selected by index N.  The bits [7-6] of N define S
+   index, the bits [5-4] define D index, and bits [3-0] define
+   zeroing mask for D.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
+{
+  return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
+					      (__v4sf)__S,
+					      __N);
+}
+#else
+#define _mm_insert_ps(D, S, N)						\
+  ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D),		\
+					(__v4sf)(__m128)(S), (int)(N)))
+#endif
+
+/* Helper macro to create the N value for _mm_insert_ps.  */
+#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
+
+/* Extract binary representation of single precision float from packed
+   single precision array element of X selected by index N.  */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_ps (__m128 __X, const int __N)
+{
+  union { int i; float f; } __tmp;
+  __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
+  return __tmp.i;
+}
+#else
+#define _mm_extract_ps(X, N)						\
+  (__extension__							\
+   ({									\
+     union { int i; float f; } __tmp;					\
+     __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
+     __tmp.i;								\
+   }))
+#endif
+
+/* Extract binary representation of single precision float into
+   D from packed single precision array element of S selected
+   by index N.  */
+#define _MM_EXTRACT_FLOAT(D, S, N) \
+  { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
+  
+/* Extract specified single precision float element into the lower
+   part of __m128.  */
+#define _MM_PICK_OUT_PS(X, N)				\
+  _mm_insert_ps (_mm_setzero_ps (), (X), 		\
+		 _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
+
+/* Insert integer, S, into packed integer array element of D
+   selected by index N.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i __D, int __S, const int __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
+						 __S, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i __D, int __S, const int __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
+						 __S, __N);
+}
+
+#ifdef __x86_64__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
+{
+  return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
+						 __S, __N);
+}
+#endif
+#else
+#define _mm_insert_epi8(D, S, N)					\
+  ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D),	\
+					   (int)(S), (int)(N)))
+
+#define _mm_insert_epi32(D, S, N)				\
+  ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D),	\
+					  (int)(S), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_insert_epi64(D, S, N)					\
+  ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D),		\
+					  (long long)(S), (int)(N)))
+#endif
+#endif
+
+/* Extract integer from packed integer array element of X selected by
+   index N.  */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi8 (__m128i __X, const int __N)
+{
+   return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi32 (__m128i __X, const int __N)
+{
+   return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi64 (__m128i __X, const int __N)
+{
+  return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
+}
+#endif
+#else
+#define _mm_extract_epi8(X, N) \
+  ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
+#define _mm_extract_epi32(X, N) \
+  ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) \
+  ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
+#endif
+#endif
+
+/* Return horizontal packed word minimum and its index in bits [15:0]
+   and bits [18:16] respectively.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minpos_epu16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
+}
+
+/* Packed integer sign-extension.  */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
+}
+
+/* Packed integer zero-extension. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi64 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
+}
+
+/* Pack 8 double words from 2 operands into 8 words of result with
+   unsigned saturation. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+   byte integers in the first 2 operands.  Starting offsets within
+   operands are determined by the 3rd mask operand.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
+					      (__v16qi)__Y, __M);
+}
+#else
+#define _mm_mpsadbw_epu8(X, Y, M)					\
+  ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X),		\
+					(__v16qi)(__m128i)(Y), (int)(M)))
+#endif
+
+/* Load double quadword using non-temporal aligned hint.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_load_si128 (__m128i *__X)
+{
+  return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
+}
+
+#ifdef __SSE4_2__
+
+/* These macros specify the source data format.  */
+#define _SIDD_UBYTE_OPS			0x00
+#define _SIDD_UWORD_OPS			0x01
+#define _SIDD_SBYTE_OPS			0x02
+#define _SIDD_SWORD_OPS			0x03
+
+/* These macros specify the comparison operation.  */
+#define _SIDD_CMP_EQUAL_ANY		0x00
+#define _SIDD_CMP_RANGES		0x04
+#define _SIDD_CMP_EQUAL_EACH		0x08
+#define _SIDD_CMP_EQUAL_ORDERED		0x0c
+
+/* These macros specify the the polarity.  */
+#define _SIDD_POSITIVE_POLARITY		0x00
+#define _SIDD_NEGATIVE_POLARITY		0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY	0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY	0x30
+
+/* These macros specify the output selection in _mm_cmpXstri ().  */
+#define _SIDD_LEAST_SIGNIFICANT		0x00
+#define _SIDD_MOST_SIGNIFICANT		0x40
+
+/* These macros specify the output selection in _mm_cmpXstrm ().  */
+#define _SIDD_BIT_MASK			0x00
+#define _SIDD_UNIT_MASK			0x40
+
+/* Intrinsics for text/string processing.  */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
+{
+  return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
+						(__v16qi)__Y,
+						__M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
+				      (__v16qi)__Y,
+				      __M);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
+						(__v16qi)__Y, __LY,
+						__M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
+				      (__v16qi)__Y, __LY,
+				      __M);
+}
+#else
+#define _mm_cmpistrm(X, Y, M)						\
+  ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X),	\
+					  (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistri(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X),		\
+				      (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M)					\
+  ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X),	\
+					  (int)(LX), (__v16qi)(__m128i)(Y), \
+					  (int)(LY), (int)(M)))
+#define _mm_cmpestri(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX),	\
+				      (__v16qi)(__m128i)(Y), (int)(LY),	\
+				      (int)(M)))
+#endif
+
+/* Intrinsics for text/string processing and reading values of
+   EFlags.  */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
+{
+  return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
+				       (__v16qi)__Y,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+  return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
+				       (__v16qi)__Y, __LY,
+				       __M);
+}
+#else
+#define _mm_cmpistra(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrc(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistro(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrs(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrz(X, Y, M)						\
+  ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X),		\
+				       (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestra(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrc(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestro(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrs(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#define _mm_cmpestrz(X, LX, Y, LY, M)					\
+  ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
+				       (__v16qi)(__m128i)(Y), (int)(LY), \
+				       (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+   corresponding parts of result.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __POPCNT__
+#include <popcntintrin.h>
+#endif
+
+/* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u8 (unsigned int __C, unsigned char __V)
+{
+  return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u16 (unsigned int __C, unsigned short __V)
+{
+  return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u32 (unsigned int __C, unsigned int __V)
+{
+  return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
+{
+  return __builtin_ia32_crc32di (__C, __V);
+}
+#endif
+
+#endif /* __SSE4_2__ */
+
+#endif /* __SSE4_1__ */
+
+#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sol2-10.h b/gcc/config/i386/sol2-10.h
new file mode 100644
index 000000000..c3decd2ef
--- /dev/null
+++ b/gcc/config/i386/sol2-10.h
@@ -0,0 +1,138 @@
+/* Solaris 10 configuration.
+   Copyright (C) 2004, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "/"
+
+/* binutils' GNU as understands --32 and --64, but the native Solaris
+   assembler requires -xarch=generic or -xarch=generic64 instead.  */
+#undef ASM_SPEC
+#ifdef USE_GAS
+#define ASM_SPEC "%{m32:--32} %{m64:--64} -s %(asm_cpu)"
+#else
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} " \
+		 "%{m32:-xarch=generic} %{m64:-xarch=generic64} " \
+		 "-s %(asm_cpu)"
+#endif
+
+/* The native Solaris assembler can't calculate the difference between
+   symbols in different sections, which causes problems for -fPIC jump
+   tables in .rodata.  */
+#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA
+#undef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but
+   requires SYMBOL@rel/@rel64 instead.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    assemble_name (FILE, LABEL);			\
+    fputs (SIZE == 8 ? "@rel64" : "@rel", FILE);	\
+  } while (0)
+#endif
+
+/* As in sol2.h, override the default from i386/x86-64.h to work around
+   Sun as TLS bug.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_SUN_TLS						\
+	  && in_section							\
+	  && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS))	\
+	switch_to_section (bss_section);				\
+      x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN);			\
+    }									\
+  while  (0)
+
+#undef NO_PROFILE_COUNTERS
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "_mcount"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE (TARGET_64BIT ? "int" : "long int")
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 32
+
+#define USE_IX86_FRAME_POINTER 1
+#define USE_X86_64_FRAME_POINTER 1
+
+/* Override i386/sol2.h version: return 8-byte vectors in MMX registers if
+   possible, matching Sun Studio 12 Update 1+ compilers and other x86
+   targets.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
+
+#define SUBTARGET_OPTIMIZATION_OPTIONS				\
+  { OPT_LEVELS_1_PLUS, OPT_momit_leaf_frame_pointer, NULL, 1 }
+
+#define MULTILIB_DEFAULTS { "m32" }
+
+#undef LINK_ARCH64_SPEC_BASE
+#define LINK_ARCH64_SPEC_BASE \
+  "%{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{compat-bsd: \
+     %{!YP,*:%{p|pg:-Y P,/usr/ucblib/64:/usr/lib/libp/64:/lib/64:/usr/lib/64} \
+             %{!p:%{!pg:-Y P,/usr/ucblib/64:/lib:/usr/lib/64}}} \
+             -R /usr/ucblib/64} \
+   %{!compat-bsd: \
+     %{!YP,*:%{p|pg:-Y P,/usr/lib/libp/64:/lib/64:/usr/lib/64} \
+             %{!p:%{!pg:-Y P,/lib/64:/usr/lib/64}}}}"
+
+#undef LINK_ARCH64_SPEC
+#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE
+
+#ifdef TARGET_GNU_LD
+/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
+   follow the Solaris 2 ABI.  Prefer them if present.  */
+#ifdef HAVE_LD_SOL2_EMULATION
+#define I386_EMULATION "elf_i386_sol2"
+#define X86_64_EMULATION "elf_x86_64_sol2"
+#else
+#define I386_EMULATION "elf_i386"
+#define X86_64_EMULATION "elf_x86_64"
+#endif
+
+#define TARGET_LD_EMULATION "%{m64:-m " X86_64_EMULATION "}" \
+			    "%{!m64:-m " I386_EMULATION "} "
+#else
+#define TARGET_LD_EMULATION ""
+#endif
+
+#undef LINK_ARCH_SPEC
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION \
+		       "%{m64:" LINK_ARCH64_SPEC "}%{!m64:" LINK_ARCH32_SPEC "}"
+
+/* We do not need to search a special directory for startup files.  */
+#undef MD_STARTFILE_PREFIX
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION i386_solaris_elf_named_section
diff --git a/gcc/config/i386/sol2-c1.asm b/gcc/config/i386/sol2-c1.asm
new file mode 100644
index 000000000..4a89530cc
--- /dev/null
+++ b/gcc/config/i386/sol2-c1.asm
@@ -0,0 +1,151 @@
+! crt1.s for Solaris 2, x86
+
+!   Copyright (C) 1993, 1998, 2008, 2009 Free Software Foundation, Inc.
+!   Written By Fred Fish, Nov 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file takes control of the process from the kernel, as specified
+! in section 3 of the System V Application Binary Interface, Intel386
+! Processor Supplement.  It has been constructed from information obtained
+! from the ABI, information obtained from single stepping existing
+! Solaris executables through their startup code with gdb, and from
+! information obtained by single stepping executables on other i386 SVR4
+! implementations.  This file is the first thing linked into any executable.
+
+	.ident	"GNU C crt1.s"
+	.weak	_cleanup
+	.weak	_DYNAMIC
+	.text
+
+! Start creating the initial frame by pushing a NULL value for the return
+! address of the initial frame, and mark the end of the stack frame chain
+! (the innermost stack frame) with a NULL value, per page 3-32 of the ABI.
+! Initialize the first stack frame pointer in %ebp (the contents of which
+! are unspecified at process initialization).
+
+	.globl	_start
+_start:
+	pushl	$0x0
+	pushl	$0x0
+	movl	%esp,%ebp
+
+! As specified per page 3-32 of the ABI, %edx contains a function 
+! pointer that should be registered with atexit(), for proper
+! shared object termination.  Just push it onto the stack for now
+! to preserve it.  We want to register _cleanup() first.
+
+	pushl	%edx
+
+! Check to see if there is an _cleanup() function linked in, and if
+! so, register it with atexit() as the last thing to be run by
+! atexit().
+
+	movl	$_cleanup,%eax
+	testl	%eax,%eax
+	je	.L1
+	pushl	$_cleanup
+	call	atexit
+	addl	$0x4,%esp
+.L1:
+
+! Now check to see if we have an _DYNAMIC table, and if so then
+! we need to register the function pointer previously in %edx, but
+! now conveniently saved on the stack as the argument to pass to
+! atexit().
+
+	movl	$_DYNAMIC,%eax
+	testl	%eax,%eax
+	je	.L2
+	call	atexit
+.L2:
+
+! Register _fini() with atexit().  We will take care of calling _init()
+! directly.
+
+	pushl	$_fini
+	call	atexit
+
+! Compute the address of the environment vector on the stack and load
+! it into the global variable _environ.  Currently argc is at 8 off
+! the frame pointer.  Fetch the argument count into %eax, scale by the
+! size of each arg (4 bytes) and compute the address of the environment
+! vector which is 16 bytes (the two zero words we pushed, plus argc,
+! plus the null word terminating the arg vector) further up the stack,
+! off the frame pointer (whew!).
+
+	movl	8(%ebp),%eax
+	leal	16(%ebp,%eax,4),%edx
+	movl	%edx,_environ
+
+! Push the environment vector pointer, the argument vector pointer,
+! and the argument count on to the stack to set up the arguments
+! for _init(), _fpstart(), and main().  Note that the environment
+! vector pointer and the arg count were previously loaded into
+! %edx and %eax respectively.  The only new value we need to compute
+! is the argument vector pointer, which is at a fixed address off
+! the initial frame pointer.
+
+!
+! Make sure the stack is properly aligned.
+!
+	andl $0xfffffff0,%esp
+	subl $4,%esp
+	
+	pushl	%edx
+	leal	12(%ebp),%edx
+	pushl	%edx
+	pushl	%eax
+
+! Call _init(argc, argv, environ), _fpstart(argc, argv, environ), and
+! main(argc, argv, environ).
+
+	call	_init
+	call	__fpstart
+	call	main
+
+! Pop the argc, argv, and environ arguments off the stack, push the
+! value returned from main(), and call exit().
+
+	addl	$12,%esp
+	pushl	%eax
+	call	exit
+
+! An inline equivalent of _exit, as specified in Figure 3-26 of the ABI.
+
+	pushl	$0x0
+	movl	$0x1,%eax
+	lcall	$7,$0
+
+! If all else fails, just try a halt!
+
+	hlt
+	.type	_start,@function
+	.size	_start,.-_start
+
+! A dummy profiling support routine for non-profiling executables,
+! in case we link in some objects that have been compiled for profiling.
+
+	.weak	_mcount
+_mcount:
+	ret
+	.type	_mcount,@function
+	.size	_mcount,.-_mcount
diff --git a/gcc/config/i386/sol2-ci.asm b/gcc/config/i386/sol2-ci.asm
new file mode 100644
index 000000000..f2ff2025d
--- /dev/null
+++ b/gcc/config/i386/sol2-ci.asm
@@ -0,0 +1,40 @@
+! crti.s for Solaris 2, x86.
+
+!   Copyright (C) 1993, 2008, 2009 Free Software Foundation, Inc.
+!   Written By Fred Fish, Nov 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file just supplies labeled starting points for the .init and .fini
+! sections.  It is linked in before the values-Xx.o files and also before
+! crtbegin.o.
+ 
+	.ident	"GNU C crti.s"
+
+	.section .init
+	.globl	_init
+	.type	_init,@function
+_init:
+
+	.section .fini
+	.globl	_fini
+	.type	_fini,@function
+_fini:
diff --git a/gcc/config/i386/sol2-cn.asm b/gcc/config/i386/sol2-cn.asm
new file mode 100644
index 000000000..217f04091
--- /dev/null
+++ b/gcc/config/i386/sol2-cn.asm
@@ -0,0 +1,35 @@
+! crtn.s for Solaris 2, x86.
+
+!   Copyright (C) 1993, 2008, 2009 Free Software Foundation, Inc.
+!   Written By Fred Fish, Nov 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file just supplies returns for the .init and .fini sections.  It is
+! linked in after all other files.
+
+	.ident	"GNU C crtn.o"
+
+	.section .init
+	ret    $0x0
+
+	.section .fini
+	ret    $0x0
diff --git a/gcc/config/i386/sol2-gas.h b/gcc/config/i386/sol2-gas.h
new file mode 100644
index 000000000..8d15b9d11
--- /dev/null
+++ b/gcc/config/i386/sol2-gas.h
@@ -0,0 +1,31 @@
+/* Definitions of target machine for GCC, for x86 running Solaris 2
+   using the GNU assembler.
+
+Copyright (C) 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Undefine this so that BNSYM/ENSYM pairs are emitted by STABS+.  */
+#undef NO_DBX_BNSYM_ENSYM
+
+/* Restore default; gas doesn't understand Sun as .tcomm.  */
+#undef TLS_COMMON_ASM_OP
diff --git a/gcc/config/i386/sol2-gc1.asm b/gcc/config/i386/sol2-gc1.asm
new file mode 100644
index 000000000..8cb989a9c
--- /dev/null
+++ b/gcc/config/i386/sol2-gc1.asm
@@ -0,0 +1,155 @@
+! gcrt1.s for Solaris 2, x86
+
+!   Copyright (C) 1993, 2008, 2009 Free Software Foundation, Inc.
+!   Written By Fred Fish, Nov 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file takes control of the process from the kernel, as specified
+! in section 3 of the System V Application Binary Interface, Intel386
+! Processor Supplement.  It has been constructed from information obtained
+! from the ABI, information obtained from single stepping existing
+! Solaris executables through their startup code with gdb, and from
+! information obtained by single stepping executables on other i386 SVR4
+! implementations.  This file is the first thing linked into any executable.
+
+! This is a modified crt1.s by J.W.Hawtin <oolon@ankh.org> 15/8/96, 
+! to allow program profiling, by calling monstartup on entry and _mcleanup 
+! on exit
+
+	.ident	"GNU C gcrt1.s"
+	.weak	_DYNAMIC
+	.text
+
+! Start creating the initial frame by pushing a NULL value for the return
+! address of the initial frame, and mark the end of the stack frame chain
+! (the innermost stack frame) with a NULL value, per page 3-32 of the ABI.
+! Initialize the first stack frame pointer in %ebp (the contents of which
+! are unspecified at process initialization).
+
+	.globl	_start
+_start:
+	pushl	$0x0
+	pushl	$0x0
+	movl	%esp,%ebp
+
+! As specified per page 3-32 of the ABI, %edx contains a function 
+! pointer that should be registered with atexit(), for proper
+! shared object termination.  Just push it onto the stack for now
+! to preserve it.  We want to register _cleanup() first.
+
+	pushl	%edx
+
+! Check to see if there is an _cleanup() function linked in, and if
+! so, register it with atexit() as the last thing to be run by
+! atexit().
+
+	movl	$_mcleanup,%eax
+	testl	%eax,%eax
+	je	.L1
+	pushl	$_mcleanup
+	call	atexit
+	addl	$0x4,%esp
+.L1:
+
+! Now check to see if we have an _DYNAMIC table, and if so then
+! we need to register the function pointer previously in %edx, but
+! now conveniently saved on the stack as the argument to pass to
+! atexit().
+
+	movl	$_DYNAMIC,%eax
+	testl	%eax,%eax
+	je	.L2
+	call	atexit
+.L2:
+
+! Register _fini() with atexit().  We will take care of calling _init()
+! directly.
+
+	pushl	$_fini
+	call	atexit
+
+! Start profiling
+
+        pushl %ebp
+        movl %esp,%ebp
+        pushl $_etext
+        pushl $_start
+        call monstartup
+        addl $8,%esp
+	popl %ebp
+
+! Compute the address of the environment vector on the stack and load
+! it into the global variable _environ.  Currently argc is at 8 off
+! the frame pointer.  Fetch the argument count into %eax, scale by the
+! size of each arg (4 bytes) and compute the address of the environment
+! vector which is 16 bytes (the two zero words we pushed, plus argc,
+! plus the null word terminating the arg vector) further up the stack,
+! off the frame pointer (whew!).
+
+	movl	8(%ebp),%eax
+	leal	16(%ebp,%eax,4),%edx
+	movl	%edx,_environ
+
+! Push the environment vector pointer, the argument vector pointer,
+! and the argument count on to the stack to set up the arguments
+! for _init(), _fpstart(), and main().  Note that the environment
+! vector pointer and the arg count were previously loaded into
+! %edx and %eax respectively.  The only new value we need to compute
+! is the argument vector pointer, which is at a fixed address off
+! the initial frame pointer.
+
+!
+! Make sure the stack is properly aligned.
+!
+	andl $0xfffffff0,%esp
+	subl $4,%esp
+
+	pushl	%edx
+	leal	12(%ebp),%edx
+	pushl	%edx
+	pushl	%eax
+
+! Call _init(argc, argv, environ), _fpstart(argc, argv, environ), and
+! main(argc, argv, environ).
+
+	call	_init
+	call	__fpstart
+	call	main
+
+! Pop the argc, argv, and environ arguments off the stack, push the
+! value returned from main(), and call exit().
+
+	addl	$12,%esp
+	pushl	%eax
+	call	exit
+
+! An inline equivalent of _exit, as specified in Figure 3-26 of the ABI.
+
+	pushl	$0x0
+	movl	$0x1,%eax
+	lcall	$7,$0
+
+! If all else fails, just try a halt!
+
+	hlt
+	.type	_start,@function
+	.size	_start,.-_start
diff --git a/gcc/config/i386/sol2-unwind.h b/gcc/config/i386/sol2-unwind.h
new file mode 100644
index 000000000..d93b60c78
--- /dev/null
+++ b/gcc/config/i386/sol2-unwind.h
@@ -0,0 +1,289 @@
+/* DWARF2 EH unwinding support for AMD x86-64 and x86.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <ucontext.h>
+#include <sys/frame.h>
+
+#ifdef __x86_64__
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_64_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_64_fallback_frame_state (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  mcontext_t *mctx;
+  long new_cfa;
+
+  if (/* Solaris 10+
+	------------
+	<__sighndlr+0>:      push   %rbp
+	<__sighndlr+1>:      mov    %rsp,%rbp
+	<__sighndlr+4>:      callq  *%rcx
+	<__sighndlr+6>:      leaveq           <--- PC
+	<__sighndlr+7>:      retq  */
+      *(unsigned long *)(pc - 6) == 0xc3c9d1ffe5894855)
+
+    /* We need to move up three frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		call_user_handler
+		sigacthandler
+		<kernel>
+
+       context->cfa points into the frame after the saved frame pointer and
+       saved pc (struct frame).
+
+       The ucontext_t structure is in the kernel frame after the signal
+       number and a siginfo_t *.  Since the frame sizes vary even within
+       Solaris 10 updates, we need to walk the stack to get there.  */
+    {
+      struct frame *fp = (struct frame *) context->cfa - 1;
+      struct handler_args {
+	int signo;
+	siginfo_t *sip;
+	ucontext_t ucontext;
+      } *handler_args;
+      ucontext_t *ucp;
+
+      /* Next frame: __sighndlr frame pointer.  */
+      fp = (struct frame *) fp->fr_savfp;
+      /* call_user_handler frame pointer.  */
+      fp = (struct frame *) fp->fr_savfp;
+      /* sigacthandler frame pointer.  */
+      fp = (struct frame *) fp->fr_savfp;
+
+      /* The argument area precedes the struct frame.  */
+      handler_args = (struct handler_args *) (fp + 1);
+      ucp = &handler_args->ucontext;
+      mctx = &ucp->uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = mctx->gregs[REG_RSP];
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 7;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  /* The SVR4 register numbering macros aren't usable in libgcc.  */
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&mctx->gregs[REG_RAX] - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&mctx->gregs[REG_RDX] - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&mctx->gregs[REG_RCX] - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&mctx->gregs[REG_RBX] - new_cfa;
+  fs->regs.reg[4].how = REG_SAVED_OFFSET;
+  fs->regs.reg[4].loc.offset = (long)&mctx->gregs[REG_RSI] - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&mctx->gregs[REG_RDI] - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&mctx->gregs[REG_RBP] - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&mctx->gregs[REG_R8] - new_cfa;
+  fs->regs.reg[9].how = REG_SAVED_OFFSET;
+  fs->regs.reg[9].loc.offset = (long)&mctx->gregs[REG_R9] - new_cfa;
+  fs->regs.reg[10].how = REG_SAVED_OFFSET;
+  fs->regs.reg[10].loc.offset = (long)&mctx->gregs[REG_R10] - new_cfa;
+  fs->regs.reg[11].how = REG_SAVED_OFFSET;
+  fs->regs.reg[11].loc.offset = (long)&mctx->gregs[REG_R11] - new_cfa;
+  fs->regs.reg[12].how = REG_SAVED_OFFSET;
+  fs->regs.reg[12].loc.offset = (long)&mctx->gregs[REG_R12] - new_cfa;
+  fs->regs.reg[13].how = REG_SAVED_OFFSET;
+  fs->regs.reg[13].loc.offset = (long)&mctx->gregs[REG_R13] - new_cfa;
+  fs->regs.reg[14].how = REG_SAVED_OFFSET;
+  fs->regs.reg[14].loc.offset = (long)&mctx->gregs[REG_R14] - new_cfa;
+  fs->regs.reg[15].how = REG_SAVED_OFFSET;
+  fs->regs.reg[15].loc.offset = (long)&mctx->gregs[REG_R15] - new_cfa;
+  fs->regs.reg[16].how = REG_SAVED_OFFSET;
+  fs->regs.reg[16].loc.offset = (long)&mctx->gregs[REG_RIP] - new_cfa;
+  fs->retaddr_column = 16;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#else
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_fallback_frame_state (struct _Unwind_Context *context,
+			  _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  mcontext_t *mctx;
+  long new_cfa;
+
+  if (/* Solaris 8 - single-threaded
+	----------------------------
+	<sigacthandler+17>:  mov    0x10(%ebp),%esi
+	<sigacthandler+20>:  push   %esi
+	<sigacthandler+21>:  pushl  0xc(%ebp)
+	<sigacthandler+24>:  mov    0x8(%ebp),%ecx
+	<sigacthandler+27>:  push   %ecx
+	<sigacthandler+28>:  mov    offset(%ebx),%eax
+	<sigacthandler+34>:  call   *(%eax,%ecx,4)
+	<sigacthandler+37>:  add    $0xc,%esp        <--- PC
+	<sigacthandler+40>:  push   %esi ... */
+      (*(unsigned long *)(pc - 20) == 0x5610758b
+       && *(unsigned long *)(pc - 16) == 0x8b0c75ff
+       && *(unsigned long *)(pc - 12) == 0x8b51084d
+       && *(unsigned char *)(pc - 8)  == 0x83
+       && *(unsigned long *)(pc - 4)  == 0x8814ff00
+       && *(unsigned long *)(pc - 0)  == 0x560cc483)
+
+      || /* Solaris 8 - multi-threaded
+	   ---------------------------
+	   <__sighndlr+0>:      push   %ebp
+	   <__sighndlr+1>:      mov    %esp,%ebp
+	   <__sighndlr+3>:      pushl  0x10(%ebp)
+	   <__sighndlr+6>:      pushl  0xc(%ebp)
+	   <__sighndlr+9>:      pushl  0x8(%ebp)
+	   <__sighndlr+12>:     call   *0x14(%ebp)
+	   <__sighndlr+15>:     leave               <--- PC  */
+	 (*(unsigned long *)(pc - 15) == 0xffec8b55
+	  && *(unsigned long *)(pc - 11) == 0x75ff1075
+	  && *(unsigned long *)(pc - 7)  == 0x0875ff0c
+	  && *(unsigned long *)(pc - 3)  == 0xc91455ff)
+
+      || /* Solaris 9 - single-threaded
+	   ----------------------------
+           <sigacthandler+16>:    mov    0x244(%ebx),%ecx
+	   <sigacthandler+22>:    mov    0x8(%ebp),%eax
+	   <sigacthandler+25>:    mov    (%ecx,%eax,4),%ecx
+	   <sigacthandler+28>:    pushl  0x10(%ebp)
+	   <sigacthandler+31>:    pushl  0xc(%ebp)
+	   <sigacthandler+34>:    push   %eax
+	   <sigacthandler+35>:    call   *%ecx
+	   <sigacthandler+37>:    add    $0xc,%esp	<--- PC
+	   <sigacthandler+40>:    pushl  0x10(%ebp) */
+         (*(unsigned long *)(pc - 21) == 0x2448b8b
+	  && *(unsigned long *)(pc - 17) == 0x458b0000
+	  && *(unsigned long *)(pc - 13) == 0x810c8b08
+	  && *(unsigned long *)(pc - 9)  == 0xff1075ff
+	  && *(unsigned long *)(pc - 5)  == 0xff500c75
+	  && *(unsigned long *)(pc - 1)  == 0xcc483d1)
+
+      || /* Solaris 9 - multi-threaded, Solaris 10
+	   ---------------------------------------
+	   <__sighndlr+0>:      push   %ebp
+	   <__sighndlr+1>:      mov    %esp,%ebp
+	   <__sighndlr+3>:      pushl  0x10(%ebp)
+	   <__sighndlr+6>:      pushl  0xc(%ebp)
+	   <__sighndlr+9>:      pushl  0x8(%ebp)
+	   <__sighndlr+12>:     call   *0x14(%ebp)
+	   <__sighndlr+15>:     add    $0xc,%esp     <--- PC
+	   <__sighndlr+18>:     leave
+	   <__sighndlr+19>:     ret  */
+	 (*(unsigned long *)(pc - 15) == 0xffec8b55
+	  && *(unsigned long *)(pc - 11) == 0x75ff1075
+	  && *(unsigned long *)(pc - 7)  == 0x0875ff0c
+	  && *(unsigned long *)(pc - 3)  == 0x831455ff
+	  && *(unsigned long *)(pc + 1)  == 0xc3c90cc4)
+
+      || /* Solaris 11 before snv_125
+	   --------------------------
+	  <__sighndlr+0>       	push   %ebp
+	  <__sighndlr+1>       	mov    %esp,%ebp
+	  <__sighndlr+4>      	pushl  0x10(%ebp)
+	  <__sighndlr+6>      	pushl  0xc(%ebp)
+	  <__sighndlr+9>      	pushl  0x8(%ebp)
+	  <__sighndlr+12>      	call   *0x14(%ebp)
+	  <__sighndlr+15>	add    $0xc,%esp
+	  <__sighndlr+18>      	leave                <--- PC
+	  <__sighndlr+19>      	ret  */
+	 (*(unsigned long *)(pc - 18) == 0xffec8b55
+	  && *(unsigned long *)(pc - 14) == 0x7fff107f
+	  && *(unsigned long *)(pc - 10)  == 0x0875ff0c
+	  && *(unsigned long *)(pc - 6)  == 0x83145fff
+	  && *(unsigned long *)(pc - 1)  == 0xc3c90cc4)
+
+      || /* Solaris 11 since snv_125
+	   -------------------------
+	  <__sighndlr+0>       	push   %ebp
+	  <__sighndlr+1>       	mov    %esp,%ebp
+	  <__sighndlr+3>       	and    $0xfffffff0,%esp
+	  <__sighndlr+6>       	sub    $0x4,%esp
+	  <__sighndlr+9>      	pushl  0x10(%ebp)
+	  <__sighndlr+12>      	pushl  0xc(%ebp)
+	  <__sighndlr+15>      	pushl  0x8(%ebp)
+	  <__sighndlr+18>      	call   *0x14(%ebp)
+	  <__sighndlr+21>      	leave                <--- PC
+	  <__sighndlr+22>      	ret  */
+	 (*(unsigned long *)(pc - 21) == 0x83ec8b55
+	  && *(unsigned long *)(pc - 17) == 0xec83f0e4
+	  && *(unsigned long *)(pc - 13)  == 0x1075ff04
+	  && *(unsigned long *)(pc - 9)  == 0xff0c75ff
+	  && *(unsigned long *)(pc - 5)  == 0x55ff0875
+	  && (*(unsigned long *)(pc - 1) & 0x00ffffff) == 0x00c3c914))
+    {
+      struct handler_args {
+	int signo;
+	siginfo_t *sip;
+	ucontext_t *ucontext;
+      } *handler_args = context->cfa;
+      mctx = &handler_args->ucontext->uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = mctx->gregs[UESP];
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 4;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  /* The SVR4 register numbering macros aren't usable in libgcc.  */
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)&mctx->gregs[EAX] - new_cfa;
+  fs->regs.reg[3].how = REG_SAVED_OFFSET;
+  fs->regs.reg[3].loc.offset = (long)&mctx->gregs[EBX] - new_cfa;
+  fs->regs.reg[1].how = REG_SAVED_OFFSET;
+  fs->regs.reg[1].loc.offset = (long)&mctx->gregs[ECX] - new_cfa;
+  fs->regs.reg[2].how = REG_SAVED_OFFSET;
+  fs->regs.reg[2].loc.offset = (long)&mctx->gregs[EDX] - new_cfa;
+  fs->regs.reg[6].how = REG_SAVED_OFFSET;
+  fs->regs.reg[6].loc.offset = (long)&mctx->gregs[ESI] - new_cfa;
+  fs->regs.reg[7].how = REG_SAVED_OFFSET;
+  fs->regs.reg[7].loc.offset = (long)&mctx->gregs[EDI] - new_cfa;
+  fs->regs.reg[5].how = REG_SAVED_OFFSET;
+  fs->regs.reg[5].loc.offset = (long)&mctx->gregs[EBP] - new_cfa;
+  fs->regs.reg[8].how = REG_SAVED_OFFSET;
+  fs->regs.reg[8].loc.offset = (long)&mctx->gregs[EIP] - new_cfa;
+  fs->retaddr_column = 8;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#endif
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
new file mode 100644
index 000000000..baddbb0b9
--- /dev/null
+++ b/gcc/config/i386/sol2.h
@@ -0,0 +1,182 @@
+/* Target definitions for GCC for Intel 80386 running Solaris 2
+   Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Fred Fish (fnf@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The Solaris 2.0 x86 linker botches alignment of code sections.
+   It tries to align to a 16 byte boundary by padding with 0x00000090
+   ints, rather than 0x90 bytes (nop).  This generates trash in the
+   ".init" section since the contribution from crtbegin.o is only 7
+   bytes.  The linker pads it to 16 bytes with a single 0x90 byte, and
+   two 0x00000090 ints, which generates a segmentation violation when
+   executed.  This macro forces the assembler to do the padding, since
+   it knows what it is doing.  */
+#define FORCE_CODE_SECTION_ALIGN  asm(ALIGN_ASM_OP "16");
+
+/* Old versions of the Solaris assembler can not handle the difference of
+   labels in different sections, so force DW_EH_PE_datarel.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (flag_pic ? ((GLOBAL ? DW_EH_PE_indirect : 0)				\
+	       | (TARGET_64BIT ? DW_EH_PE_pcrel | DW_EH_PE_sdata4	\
+		  : DW_EH_PE_datarel))					\
+   : DW_EH_PE_absptr)
+
+/* The Solaris linker will not merge a read-only .eh_frame section
+   with a read-write .eh_frame section.  None of the encodings used
+   with non-PIC code require runtime relocations.  In 64-bit mode,
+   since there is no backwards compatibility issue, we use a read-only
+   section for .eh_frame.  In 32-bit mode, we use a writable .eh_frame
+   section in order to be compatible with G++ for Solaris x86.  */
+#undef EH_TABLES_CAN_BE_READ_ONLY
+#define EH_TABLES_CAN_BE_READ_ONLY (TARGET_64BIT)
+
+/* Solaris 2/Intel as chokes on #line directives.  */
+#undef CPP_SPEC
+#define CPP_SPEC "%{,assembler-with-cpp:-P} %(cpp_subtarget)"
+
+/* FIXME: Removed -K PIC from generic Solaris 2 ASM_SPEC: the native assembler
+   gives many warnings: R_386_32 relocation is used for symbol ".text".  */
+#undef ASM_SPEC
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} -s %(asm_cpu)"
+
+#define ASM_CPU_SPEC ""
+ 
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "cpp_subtarget",	CPP_SUBTARGET_SPEC },	\
+  { "asm_cpu",		ASM_CPU_SPEC },		\
+  { "startfile_arch",	STARTFILE_ARCH_SPEC },	\
+  { "link_arch",	LINK_ARCH_SPEC }
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The 32-bit Solaris assembler does not support .quad.  Do not use it.  */
+#ifndef HAVE_AS_IX86_QUAD
+#undef ASM_QUAD
+#endif
+
+/* The Solaris assembler wants a .local for non-exported aliases.  */
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)	\
+  do {							\
+    const char *declname =				\
+      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));	\
+    ASM_OUTPUT_DEF ((FILE), declname,			\
+		    IDENTIFIER_POINTER (TARGET));	\
+    if (! TREE_PUBLIC (DECL))				\
+      {							\
+	fprintf ((FILE), "%s", LOCAL_ASM_OP);		\
+	assemble_name ((FILE), declname);		\
+	fprintf ((FILE), "\n");				\
+      }							\
+  } while (0)
+
+/* Follow Sun requirements for TLS code sequences and use Sun assembler TLS
+   syntax.  */
+#undef TARGET_SUN_TLS
+#define TARGET_SUN_TLS 1
+
+/* The Sun assembler uses .tcomm for TLS common sections.  */
+#define TLS_COMMON_ASM_OP ".tcomm"
+
+/* Similar to the Sun assembler on SPARC, the native assembler requires
+   TLS objects to be declared as @tls_obj (not @tls_object).  Unlike SPARC,
+   gas doesn't understand this variant.  */
+#ifndef USE_GAS
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+								\
+      if (targetm.have_tls && DECL_THREAD_LOCAL_P (DECL))	\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "tls_obj");	\
+      else							\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");	\
+								\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+								\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+#endif
+
+/* The Solaris assembler cannot grok .stabd directives.  */
+#undef NO_DBX_BNSYM_ENSYM
+#define NO_DBX_BNSYM_ENSYM 1
+
+/* Solaris-specific #pragmas are implemented on top of attributes.  Hook in
+   the bits from config/sol2.c.  */
+#define SUBTARGET_INSERT_ATTRIBUTES solaris_insert_attributes
+#define SUBTARGET_ATTRIBUTE_TABLE SOLARIS_ATTRIBUTE_TABLE
+
+/* Register the Solaris-specific #pragma directives.  */
+#define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas ()
+
+/* Augment i386/unix.h version to return 8-byte vectors in memory, matching
+   Sun Studio compilers until version 12, the only ones supported on
+   Solaris 8 and 9.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_VECT8_RETURNS)
+
+/* Output a simple call for .init/.fini.  */
+#define ASM_OUTPUT_CALL(FILE, FN)				\
+  do								\
+    {								\
+      fprintf (FILE, "\tcall\t");				\
+      ix86_print_operand (FILE, XEXP (DECL_RTL (FN), 0), 'P');	\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+/* We do not need NT_VERSION notes.  */
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE false
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* Only recent versions of Solaris 11 ld properly support hidden .gnu.linkonce
+   sections, so don't use them.  */
+#ifndef TARGET_GNU_LD
+#define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#define MD_UNWIND_SUPPORT "config/i386/sol2-unwind.h"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
new file mode 100644
index 000000000..2463985f8
--- /dev/null
+++ b/gcc/config/i386/sse.md
@@ -0,0 +1,12125 @@
+;; GCC machine description for SSE instructions
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Instruction suffix for sign and zero extensions.
+(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
+
+;; 16 byte integral modes handled by SSE
+(define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
+
+;; All 16-byte vector modes handled by SSE
+(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
+
+;; 32 byte integral vector modes handled by AVX
+(define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
+
+;; All 32-byte vector modes handled by AVX
+(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; All QI vector modes handled by AVX
+(define_mode_iterator AVXMODEQI [V32QI V16QI])
+
+;; All DI vector modes handled by AVX
+(define_mode_iterator AVXMODEDI [V4DI V2DI])
+
+;; All vector modes handled by AVX
+(define_mode_iterator AVXMODE
+  [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
+(define_mode_iterator AVXMODE16
+  [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; Mix-n-match
+(define_mode_iterator SSEMODE12 [V16QI V8HI])
+(define_mode_iterator SSEMODE24 [V8HI V4SI])
+(define_mode_iterator SSEMODE14 [V16QI V4SI])
+(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
+(define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
+(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
+(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
+(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+
+(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
+(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
+(define_mode_iterator AVX256MODE4P [V4DI V4DF])
+(define_mode_iterator AVX256MODE8P [V8SI V8SF])
+(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
+(define_mode_iterator AVXMODEF4P [V4SF V4DF])
+(define_mode_iterator AVXMODEFDP [V2DF V4DF])
+(define_mode_iterator AVXMODEFSP [V4SF V8SF])
+(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
+(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
+
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
+
+;; Int-float size matches
+(define_mode_iterator SSEMODE4S [V4SF V4SI])
+(define_mode_iterator SSEMODE2D [V2DF V2DI])
+
+;; Modes handled by integer vcond pattern
+(define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
+				    (V2DI "TARGET_SSE4_2")])
+
+;; Modes handled by vec_extract_even/odd pattern.
+(define_mode_iterator SSEMODE_EO
+  [(V4SF "TARGET_SSE")
+   (V2DF "TARGET_SSE2")
+   (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
+   (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
+   (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
+
+;; Modes handled by storent patterns.
+(define_mode_iterator STORENT_MODE
+  [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
+   (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
+   (V4SF "TARGET_SSE")
+   (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
+
+;; Modes handled by vector float patterns.
+(define_mode_iterator VEC_FLOAT_MODE
+  [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
+   (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
+
+;; Modes handled by vector extract patterns.
+(define_mode_iterator VEC_EXTRACT_MODE
+  [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
+   (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
+   (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
+   (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
+
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
+
+;; Mapping of the insn mnemonic suffix
+(define_mode_attr ssemodesuffix
+  [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
+   (V8SI "ps") (V4DI "pd")])
+(define_mode_attr ssescalarmodesuffix 
+  [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
+   (V4DF "sd") (V4SI "d") (V4DI "sd")])
+
+;; Mapping of the max integer size for xop rotate immediate constraint
+(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
+;; Mapping of vector modes back to the scalar modes
+(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
+				 (V16QI "QI") (V8HI "HI")
+				 (V4SI "SI") (V2DI "DI")])
+
+;; Mapping of vector modes to a vector mode of double size
+(define_mode_attr ssedoublesizemode
+  [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
+   (V8HI "V16HI") (V16QI "V32QI")
+   (V4DF "V8DF") (V8SF "V16SF")
+   (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
+
+;; Number of scalar elements in each vector type
+(define_mode_attr ssescalarnum
+  [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
+   (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
+
+;; Mapping for AVX
+(define_mode_attr avxvecmode
+  [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
+   (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
+   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
+(define_mode_attr avxvecpsmode
+  [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
+   (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
+(define_mode_attr avxhalfvecmode
+  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
+   (V8SF "V4SF") (V4DF "V2DF")
+   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
+(define_mode_attr avxscalarmode
+  [(V16QI "QI") (V8HI  "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
+   (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
+(define_mode_attr avxcvtvecmode
+  [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
+(define_mode_attr avxpermvecmode
+  [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
+(define_mode_attr avxmodesuffixp
+ [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
+  (V4DF "pd")])
+(define_mode_attr avxmodesuffix
+  [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
+   (V8SI "256") (V8SF "256") (V4DF "256")])
+
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits
+  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+
+;; Mapping of immediate bits for pinsr instructions
+(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
+
+;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mov<mode>"
+  [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+	(match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+  "TARGET_AVX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*avx_mov<mode>_internal"
+  [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
+	(match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
+  "TARGET_AVX
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return standard_sse_constant_opcode (insn, operands[1]);
+    case 1:
+    case 2:
+      switch (get_attr_mode (insn))
+        {
+	case MODE_V8SF:
+	case MODE_V4SF:
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
+	    return "vmovups\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovaps\t{%1, %0|%0, %1}";
+	case MODE_V4DF:
+	case MODE_V2DF:
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
+	    return "vmovupd\t{%1, %0|%0, %1}";
+	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovapd\t{%1, %0|%0, %1}";
+	default:
+	  if (misaligned_operand (operands[0], <MODE>mode)
+	      || misaligned_operand (operands[1], <MODE>mode))
+	    return "vmovdqu\t{%1, %0|%0, %1}";
+	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "vmovaps\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+;; All of these patterns are enabled for SSE1 as well as SSE2.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
+	(match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
+	(match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
+  "TARGET_SSE
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return standard_sse_constant_opcode (insn, operands[1]);
+    case 1:
+    case 2:
+      switch (get_attr_mode (insn))
+	{
+	case MODE_V4SF:
+	  return "movaps\t{%1, %0|%0, %1}";
+	case MODE_V2DF:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movapd\t{%1, %0|%0, %1}";
+	default:
+	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+	    return "movaps\t{%1, %0|%0, %1}";
+	  else
+	    return "movdqa\t{%1, %0|%0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set (attr "mode")
+	(cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
+			 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+		    (and (eq_attr "alternative" "2")
+			 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+			     (const_int 0))))
+		 (const_string "V4SF")
+	       (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
+		 (const_string "V4SF")
+	       (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
+		 (const_string "V2DF")
+	      ]
+	  (const_string "TI")))])
+
+;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
+;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
+;; from memory, we'd prefer to load the memory directly into the %xmm
+;; register.  To facilitate this happy circumstance, this pattern won't
+;; split until after register allocation.  If the 64-bit value didn't
+;; come from memory, this is the best we can do.  This is much better
+;; than storing %edx:%eax into a stack temporary and loading an %xmm
+;; from there.
+
+(define_insn_and_split "movdi_to_sse"
+  [(parallel
+    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
+	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
+     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+ if (register_operand (operands[1], DImode))
+   {
+      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+	 Assemble the 64-bit DImode value in an xmm register.  */
+      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
+      				  gen_rtx_SUBREG (SImode, operands[1], 0)));
+      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
+				  gen_rtx_SUBREG (SImode, operands[1], 4)));
+      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
+      					     operands[2]));
+    }
+ else if (memory_operand (operands[1], DImode))
+   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
+   	     			  operands[1], const0_rtx));
+ else
+   gcc_unreachable ();
+})
+
+(define_split
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+  "TARGET_SSE && reload_completed"
+  [(set (match_dup 0)
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF (match_dup 1))
+	  (match_dup 2)
+	  (const_int 1)))]
+{
+  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+  operands[2] = CONST0_RTX (V4SFmode);
+})
+
+(define_split
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
+{
+  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+  operands[2] = CONST0_RTX (DFmode);
+})
+
+(define_expand "push<mode>1"
+  [(match_operand:AVX256MODE 0 "register_operand" "")]
+  "TARGET_AVX"
+{
+  ix86_expand_push (<MODE>mode, operands[0]);
+  DONE;
+})
+
+(define_expand "push<mode>1"
+  [(match_operand:SSEMODE16 0 "register_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_push (<MODE>mode, operands[0]);
+  DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+	(match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+  "TARGET_AVX"
+{
+  ix86_expand_vector_move_misalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
+	(match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move_misalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_MOVU))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse2_movq128"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_concat:V2DI
+	  (vec_select:DI
+	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)]))
+	  (const_int 0)))]
+  "TARGET_SSE2"
+  "%vmovq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "<sse>_movu<ssemodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_MOVU))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_movdqu<avxmodesuffix>"
+  [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
+	(unspec:AVXMODEQI
+	  [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_MOVU))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "vmovdqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_movdqu"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
+		      UNSPEC_MOVU))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "movdqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx_movnt<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_movnt<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_movnt<mode>"
+  [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
+	(unspec:AVXMODEDI
+	  [(match_operand:AVXMODEDI 1 "register_operand" "x")]
+	  UNSPEC_MOVNT))]
+  "TARGET_AVX"
+  "vmovntdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_movntv2di"
+  [(set (match_operand:V2DI 0 "memory_operand" "=m")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
+		     UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "movntdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_movntsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_MOVNT))]
+  "TARGET_SSE2"
+  "movnti\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "0")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "avx_lddqu<avxmodesuffix>"
+  [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
+	(unspec:AVXMODEQI
+	  [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
+	  UNSPEC_LDDQU))]
+  "TARGET_AVX"
+  "vlddqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "movu" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse3_lddqu"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
+		      UNSPEC_LDDQU))]
+  "TARGET_SSE3"
+  "lddqu\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "TI")])
+
+; Expand patterns for non-temporal stores.  At the moment, only those
+; that directly map to insns are defined; it would be possible to
+; define patterns for other modes that would expand to several insns.
+
+(define_expand "storent<mode>"
+  [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
+	(unspec:STORENT_MODE
+	  [(match_operand:STORENT_MODE 1 "register_operand" "")]
+	  UNSPEC_MOVNT))])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "<code><mode>2"
+  [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
+	(absneg:VEC_FLOAT_MODE
+	  (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
+  ""
+  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn_and_split "*avx_absneg<mode>2"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
+	(match_operator:AVXMODEF2P 3 "absneg_operator"
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x,m")]))
+   (use (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm,x"))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx t;
+
+  if (MEM_P (operands[1]))
+    t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
+			<MODE>mode, operands[2], operands[1]);
+  else
+    t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
+			<MODE>mode, operands[1], operands[2]);
+  t = gen_rtx_SET (VOIDmode, operands[0], t);
+  emit_insn (t);
+  DONE;
+})
+
+(define_insn_and_split "*sse_absneg<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+	(match_operator:SSEMODEF2P 3 "absneg_operator"
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,xm")]))
+   (use (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm,0"))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx t;
+
+  t = operands[rtx_equal_p (operands[0], operands[1]) ? 2 : 1];
+  t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
+		      <MODE>mode, operands[0], t);
+  t = gen_rtx_SET (VOIDmode, operands[0], t);
+  emit_insn (t);
+  DONE;
+})
+
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+	(plusminus:AVX256MODEF2P
+	  (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(plusminus:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(plusminus:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(plusminus:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vm<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (plusminus:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vm<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (plusminus:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+	(mult:AVX256MODEF2P
+	  (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*avx_mul<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(mult:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(mult:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*mul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(mult:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vmmul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (mult:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmmul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (mult:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "divv8sf3"
+  [(set (match_operand:V8SF 0 "register_operand" "")
+	(div:V8SF (match_operand:V8SF 1 "register_operand" "")
+		  (match_operand:V8SF 2 "nonimmediate_operand" "")))]
+  "TARGET_AVX"
+{
+  ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
+
+  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1],
+			 operands[2], V8SFmode);
+      DONE;
+    }
+})
+
+(define_expand "divv4df3"
+  [(set (match_operand:V4DF 0 "register_operand" "")
+	(div:V4DF (match_operand:V4DF 1 "register_operand" "")
+		  (match_operand:V4DF 2 "nonimmediate_operand" "")))]
+  "TARGET_AVX"
+  "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
+
+(define_insn "avx_div<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(div:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "register_operand" "x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "divv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(div:V4SF (match_operand:V4SF 1 "register_operand" "")
+		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1],
+			 operands[2], V4SFmode);
+      DONE;
+    }
+})
+
+(define_expand "divv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(div:V2DF (match_operand:V2DF 1 "register_operand" "")
+		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2")
+
+(define_insn "*avx_div<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(div:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_div<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(div:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "div<ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vmdiv<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (div:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmdiv<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (div:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx_rcpv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(unspec:V8SF
+	  [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+  "TARGET_AVX"
+  "vrcpps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse_rcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF
+	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+  "TARGET_SSE"
+  "%vrcpps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_RCP)
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vrcpss\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_vmrcpv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_RCP)
+	  (match_operand:V4SF 2 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "rcpss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "rcp")
+   (set_attr "mode" "SF")])
+
+(define_expand "sqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "")
+	(sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
+  "TARGET_AVX"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
+      DONE;
+    }
+})
+
+(define_insn "avx_sqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
+      DONE;
+    }
+})
+
+(define_insn "sse_sqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE"
+  "%vsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sqrtv4df2"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vsqrtpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "sqrtv2df2"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "%vsqrtpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*avx_vmsqrt<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (sqrt:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+	  (match_operand:SSEMODEF2P 2 "register_operand" "x")
+	  (const_int 1)))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmsqrt<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (sqrt:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+	  (match_operand:SSEMODEF2P 2 "register_operand" "0")
+	  (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "sqrt")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "rsqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "")
+	(unspec:V8SF
+	  [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+  "TARGET_AVX && TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
+  DONE;
+})
+
+(define_insn "avx_rsqrtv8sf2"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(unspec:V8SF
+	  [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+  "TARGET_AVX"
+  "vrsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "rsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(unspec:V4SF
+	  [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
+  DONE;
+})
+
+(define_insn "sse_rsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF
+	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+  "TARGET_SSE"
+  "%vrsqrtps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_RSQRT)
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_vmrsqrtv4sf2"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_RSQRT)
+	  (match_operand:V4SF 2 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "rsqrtss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+	(smaxmin:AVX256MODEF2P
+	  (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (!flag_finite_math_only)
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(smaxmin:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (!flag_finite_math_only)
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
+
+(define_insn "*avx_<code><mode>3_finite"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(smaxmin:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code><mode>3_finite"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(smaxmin:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(smaxmin:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(smaxmin:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vm<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (smaxmin:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	 (match_dup 1)
+	 (const_int 1)))]
+  "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vm<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (smaxmin:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	 (match_dup 1)
+	 (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<ssescalarmode>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*avx_ieee_smin<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*avx_ieee_smax<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "min<ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "max<ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_addsubv8sf3"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_merge:V8SF
+	  (plus:V8SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (minus:V8SF (match_dup 1) (match_dup 2))
+	  (const_int 170)))]
+  "TARGET_AVX"
+  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "avx_addsubv4df3"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_merge:V4DF
+	  (plus:V4DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (minus:V4DF (match_dup 1) (match_dup 2))
+	  (const_int 10)))]
+  "TARGET_AVX"
+  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_addsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (plus:V4SF
+	    (match_operand:V4SF 1 "register_operand" "x")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (minus:V4SF (match_dup 1) (match_dup 2))
+	  (const_int 10)))]
+  "TARGET_AVX"
+  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_addsubv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (plus:V4SF
+	    (match_operand:V4SF 1 "register_operand" "0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (minus:V4SF (match_dup 1) (match_dup 2))
+	  (const_int 10)))]
+  "TARGET_SSE3"
+  "addsubps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_addsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_merge:V2DF
+	  (plus:V2DF
+	    (match_operand:V2DF 1 "register_operand" "x")
+	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+	  (minus:V2DF (match_dup 1) (match_dup 2))
+	  (const_int 2)))]
+  "TARGET_AVX"
+  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_addsubv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_merge:V2DF
+	  (plus:V2DF
+	    (match_operand:V2DF 1 "register_operand" "0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+	  (minus:V2DF (match_dup 1) (match_dup 2))
+	  (const_int 2)))]
+  "TARGET_SSE3"
+  "addsubpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "avx_h<plusminus_insn>v4df3"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_concat:V4DF
+	  (vec_concat:V2DF
+	    (plusminus:DF
+	      (vec_select:DF
+		(match_operand:V4DF 1 "register_operand" "x")
+		(parallel [(const_int 0)]))
+	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	    (plusminus:DF
+	      (vec_select:DF
+		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
+	  (vec_concat:V2DF
+	    (plusminus:DF
+	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
+	    (plusminus:DF
+	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_AVX"
+  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "avx_h<plusminus_insn>v8sf3"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_concat:V8SF
+	  (vec_concat:V4SF
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF
+		  (match_operand:V8SF 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF
+		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
+	  (vec_concat:V4SF
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
+	    (vec_concat:V2SF
+	      (plusminus:SF
+		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
+	      (plusminus:SF
+		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX"
+  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_h<plusminus_insn>v4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_concat:V4SF
+	  (vec_concat:V2SF
+	    (plusminus:SF
+	      (vec_select:SF
+		(match_operand:V4SF 1 "register_operand" "x")
+		(parallel [(const_int 0)]))
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	    (plusminus:SF
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SF
+	    (plusminus:SF
+	      (vec_select:SF
+		(match_operand:V4SF 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+	    (plusminus:SF
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_AVX"
+  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_h<plusminus_insn>v4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_concat:V4SF
+	  (vec_concat:V2SF
+	    (plusminus:SF
+	      (vec_select:SF
+		(match_operand:V4SF 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+	    (plusminus:SF
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SF
+	    (plusminus:SF
+	      (vec_select:SF
+		(match_operand:V4SF 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+	    (plusminus:SF
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSE3"
+  "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_h<plusminus_insn>v2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_concat:V2DF
+	  (plusminus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "x")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (plusminus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_AVX"
+  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_h<plusminus_insn>v2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_concat:V2DF
+	  (plusminus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 1 "register_operand" "0")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+	  (plusminus:DF
+	    (vec_select:DF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)]))
+	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSE3"
+  "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "V2DF")])
+
+(define_expand "reduc_splus_v8sf"
+  [(match_operand:V8SF 0 "register_operand" "")
+   (match_operand:V8SF 1 "register_operand" "")]
+  "TARGET_AVX"
+{
+  rtx tmp = gen_reg_rtx (V8SFmode);
+  rtx tmp2 = gen_reg_rtx (V8SFmode);
+  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
+  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
+  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
+  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
+  DONE;
+})
+
+(define_expand "reduc_splus_v4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")]
+  "TARGET_SSE"
+{
+  if (TARGET_SSE3)
+    {
+      rtx tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
+      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
+    }
+  else
+    ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_splus_v4df"
+  [(match_operand:V4DF 0 "register_operand" "")
+   (match_operand:V4DF 1 "register_operand" "")]
+  "TARGET_AVX"
+{
+  rtx tmp = gen_reg_rtx (V4DFmode);
+  rtx tmp2 = gen_reg_rtx (V4DFmode);
+  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
+  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
+  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
+  DONE;
+})
+
+(define_expand "reduc_splus_v2df"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:V2DF 1 "register_operand" "")]
+  "TARGET_SSE3"
+{
+  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+  DONE;
+})
+
+(define_expand "reduc_smax_v4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_smin_v4sf"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_cmp<ssemodesuffix><mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	  UNSPEC_PCMP))]
+  "TARGET_AVX"
+  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	    [(match_operand:SSEMODEF2P 1 "register_operand" "x")
+	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	    UNSPEC_PCMP)
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_AVX"
+  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+;; We don't promote 128bit vector compare intrinsics. But vectorizer
+;; may generate 256bit vector compare instructions.
+(define_insn "*avx_maskcmp<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
+		[(match_operand:AVXMODEF2P 1 "register_operand" "x")
+		 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "<sse>_maskcmp<mode>3"
+  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
+	(match_operator:SSEMODEF4 3 "sse_comparison_operator"
+		[(match_operand:SSEMODEF4 1 "register_operand" "0")
+		 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
+  "!TARGET_XOP 
+  && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
+  "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vmmaskcmp<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
+		[(match_operand:SSEMODEF2P 1 "register_operand" "x")
+		 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
+	 (match_dup 1)
+	 (const_int 1)))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmmaskcmp<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
+		[(match_operand:SSEMODEF2P 1 "register_operand" "0")
+		 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
+	 (match_dup 1)
+	 (const_int 1)))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_comi"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 0 "register_operand" "x")
+	    (parallel [(const_int 0)]))
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)]))))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_ucomi"
+  [(set (reg:CCFPU FLAGS_REG)
+	(compare:CCFPU
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 0 "register_operand" "x")
+	    (parallel [(const_int 0)]))
+	  (vec_select:MODEF
+	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)]))))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix_rep" "0")
+   (set (attr "prefix_data16")
+	(if_then_else (eq_attr "mode" "DF")
+		      (const_string "1")
+		      (const_string "0")))
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "vcond<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
+        (if_then_else:AVXMODEF2P
+          (match_operator 3 ""
+            [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
+             (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
+          (match_operand:AVXMODEF2P 1 "general_operand" "")
+          (match_operand:AVXMODEF2P 2 "general_operand" "")))]
+  "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+    || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_andnot<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(and:AVXMODEF2P
+	  (not:AVXMODEF2P
+	    (match_operand:AVXMODEF2P 1 "register_operand" "x"))
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "<sse>_andnot<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(and:SSEMODEF2P
+	  (not:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "0"))
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+	(any_logic:AVX256MODEF2P
+	  (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(any_logic:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+    return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+	(any_logic:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(any_logic:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+    return "<logic>ps\t{%2, %0|%0, %2}";
+  else
+    return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "copysign<mode>3"
+  [(set (match_dup 4)
+	(and:VEC_FLOAT_MODE
+	  (not:VEC_FLOAT_MODE (match_dup 3))
+	  (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
+   (set (match_dup 5)
+	(and:VEC_FLOAT_MODE (match_dup 3)
+			    (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
+   (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
+	(ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
+  ""
+{
+  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
+
+  operands[4] = gen_reg_rtx (<MODE>mode);
+  operands[5] = gen_reg_rtx (<MODE>mode);
+})
+
+;; Also define scalar versions.  These are used for abs, neg, and
+;; conditional move.  Using subregs into vector modes causes register
+;; allocation lossage.  These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*avx_andnot<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(and:MODEF
+	  (not:MODEF
+	    (match_operand:MODEF 1 "register_operand" "x"))
+	    (match_operand:MODEF 2 "register_operand" "x")))]
+  "AVX_FLOAT_MODE_P (<MODE>mode)"
+  "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*andnot<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(and:MODEF
+	  (not:MODEF
+	    (match_operand:MODEF 1 "register_operand" "0"))
+	    (match_operand:MODEF 2 "register_operand" "x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+  "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(any_logic:MODEF
+	  (match_operand:MODEF 1 "register_operand" "x")
+	  (match_operand:MODEF 2 "register_operand" "x")))]
+  "AVX_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+    return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(any_logic:MODEF
+	  (match_operand:MODEF 1 "register_operand" "0")
+	  (match_operand:MODEF 2 "register_operand" "x")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+    return "<logic>ps\t{%2, %0|%0, %2}";
+  else
+    return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "<ssevecmode>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; FMA4 floating point multiply/accumulate instructions.  This
+;; includes the scalar version of the instructions as well as the
+;; vector.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
+;; combine to generate a multiply/add with two memory references.  We then
+;; split this insn, into loading up the destination register with one of the
+;; memory operations.  If we don't manage to split the insn, reload will
+;; generate the appropriate moves.  The reason this is needed, is that combine
+;; has already folded one of the memory references into both the multiply and
+;; add insns, and it can't generate a new pseudo.  I.e.:
+;;	(set (reg1) (mem (addr1)))
+;;	(set (reg2) (mult (reg1) (mem (addr2))))
+;;	(set (reg3) (plus (reg2) (mem (addr3))))
+;;
+;; ??? This is historic, pre-dating the gimple fma transformation.
+;; We could now properly represent that only one memory operand is
+;; allowed and not be penalized during optimization.
+
+;; Intrinsic FMA operations.
+
+;; The standard names for fma is only available with SSE math enabled.
+(define_expand "fma<mode>4"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+  "")
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+  "")
+
+(define_expand "fnma<mode>4"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+	(fma:FMAMODE
+	  (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+  "")
+
+(define_expand "fnms<mode>4"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+	(fma:FMAMODE
+	  (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
+	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
+  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
+  "")
+
+;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
+(define_expand "fma4i_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+  "TARGET_FMA || TARGET_FMA4"
+  "")
+
+(define_insn "*fma4i_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4"
+  "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_fmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
+  "TARGET_FMA4"
+  "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_fnmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" " x,m")
+	  (match_operand:FMAMODE   3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4"
+  "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_fnmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" " x,m")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
+  "TARGET_FMA4"
+  "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;; Scalar versions of the above.  Unlike ADDSS et al, these write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand")
+	(vec_merge:SSEMODEF2P
+	  (fma:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
+	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
+	  (match_dup 4)
+	  (const_int 1)))]
+  "TARGET_FMA4"
+{
+  operands[4] = CONST0_RTX (<MODE>mode);
+})
+
+(define_insn "*fma4i_vmfmadd_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+	(vec_merge:SSEMODEF2P
+	  (fma:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
+	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+	  (match_operand:SSEMODEF2P 4 "const0_operand" "")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_vmfmsub_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+	(vec_merge:SSEMODEF2P
+	  (fma:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
+	    (neg:SSEMODEF2P
+	      (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
+	  (match_operand:SSEMODEF2P 4 "const0_operand" "")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_vmfnmadd_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+	(vec_merge:SSEMODEF2P
+	  (fma:SSEMODEF2P
+	    (neg:SSEMODEF2P
+	      (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
+	    (match_operand:SSEMODEF2P   2 "nonimmediate_operand" " x,m")
+	    (match_operand:SSEMODEF2P   3 "nonimmediate_operand" "xm,x"))
+	  (match_operand:SSEMODEF2P 4 "const0_operand" "")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4i_vmfnmsub_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+	(vec_merge:SSEMODEF2P
+	  (fma:SSEMODEF2P
+	    (neg:SSEMODEF2P
+	      (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
+	    (match_operand:SSEMODEF2P   2 "nonimmediate_operand" " x,m")
+	    (neg:SSEMODEF2P
+	      (match_operand:SSEMODEF2P   3 "nonimmediate_operand" "xm,x")))
+	  (match_operand:SSEMODEF2P 4 "const0_operand" "")
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; FMA4 Parallel floating point multiply addsub and subadd operations.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;;   (fma op1 op2 op3)
+;;   (fma op1 op2 (neg op3))
+;;   (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
+	   (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA || TARGET_FMA4"
+  "")
+
+(define_insn "*fma4_fmaddsub_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
+	   (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA4"
+  "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4_fmsubadd_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
+	   (neg:AVXMODEF2P
+	     (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA4"
+  "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; FMA3 floating point multiply/accumulate instructions.
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "*fma_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+  "TARGET_FMA"
+  "@
+   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x")
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+  "TARGET_FMA"
+  "@
+   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
+	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0")))]
+  "TARGET_FMA"
+  "@
+   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+  "TARGET_FMA"
+  "@
+   vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmaddsub_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
+	   (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA"
+  "@
+   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P   1 "nonimmediate_operand" "%0, 0,x")
+	   (match_operand:AVXMODEF2P   2 "nonimmediate_operand" "xm, x,xm")
+	   (neg:AVXMODEF2P
+	     (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA"
+  "@
+   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (const_int 3)))]
+  "TARGET_SSE"
+  "cvtpi2ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_cvtps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvtps2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttps2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_select:V2SI
+	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "cvttps2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "prefix_rep" "0")
+   (set_attr "mode" "SF")])
+
+(define_insn "*avx_cvtsi2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+	  (match_operand:V4SF 1 "register_operand" "0,0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "cvtsi2ss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")
+   (set_attr "mode" "SF")])
+
+(define_insn "*avx_cvtsi2ssq"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX && TARGET_64BIT"
+  "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "length_vex" "4")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ssq"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+	  (match_operand:V4SF 1 "register_operand" "0,0")
+	  (const_int 1)))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtsi2ssq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix_rex" "1")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtss2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "%vcvtss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE"
+  "%vcvtss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvtss2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE && TARGET_64BIT"
+  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttss2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE"
+  "%vcvttss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse_cvttss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE && TARGET_64BIT"
+  "%vcvttss2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "avx_cvtdq2ps<avxmodesuffix>"
+  [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
+	(float:AVXMODEDCVTDQ2PS
+	  (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_cvtdq2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "sse2_cvtudq2ps"
+  [(set (match_dup 5)
+	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
+   (set (match_dup 6)
+	(lt:V4SF (match_dup 5) (match_dup 3)))
+   (set (match_dup 7)
+	(and:V4SF (match_dup 6) (match_dup 4)))
+   (set (match_operand:V4SF 0 "register_operand" "")
+	(plus:V4SF (match_dup 5) (match_dup 7)))]
+  "TARGET_SSE2"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x;
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, SFmode);
+
+  operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+  operands[4] = force_reg (V4SFmode,
+			   ix86_build_const_vector (V4SFmode, 1, x));
+
+  for (i = 5; i < 8; i++)
+    operands[i] = gen_reg_rtx (V4SFmode);
+})
+
+(define_insn "avx_cvtps2dq<avxmodesuffix>"
+  [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
+	(unspec:AVXMODEDCVTPS2DQ
+	  [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX"
+  "vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_cvtps2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx_cvttps2dq<avxmodesuffix>"
+  [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
+	(fix:AVXMODEDCVTPS2DQ
+	  (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_cvttps2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_data16" "0")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_cvtpi2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
+  "TARGET_SSE2"
+  "cvtpi2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
+   (set_attr "prefix_data16" "1,*")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtpd2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "cvtpd2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "DI")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "sse2_cvttpd2pi"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "cvttpd2pi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*avx_cvtsi2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	  (match_operand:V2DF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtsi2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+	  (match_operand:V2DF 1 "register_operand" "0,0")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "cvtsi2sd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")])
+
+(define_insn "*avx_cvtsi2sdq"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	  (match_operand:V2DF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX && TARGET_64BIT"
+  "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "length_vex" "4")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtsi2sdq"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+	  (match_operand:V2DF 1 "register_operand" "0,0")
+	  (const_int 1)))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvtsi2sdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix_rex" "1")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "double,direct")])
+
+(define_insn "sse2_cvtsd2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvtsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvtsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttsd2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(fix:SI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE2"
+  "%vcvttsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
+
+(define_insn "sse2_cvttsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(fix:DI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvttsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")])
+
+(define_insn "avx_cvtdq2pd256"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_cvtdq2pd256_2"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(float:V4DF
+	  (vec_select:V4SI
+	    (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "sse2_cvtdq2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "%vcvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "avx_cvtpd2dq256"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX"
+  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_cvtpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(vec_concat:V4SI
+	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvtpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+		       UNSPEC_FIX_NOTRUNC)
+	  (match_operand:V2SI 2 "const0_operand" "")))]
+  "TARGET_SSE2"
+  "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
+		       : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "avx_cvttpd2dq256"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_expand "sse2_cvttpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(vec_concat:V4SI
+	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
+	  (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvttpd2dq"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+	  (match_operand:V2SI 2 "const0_operand" "")))]
+  "TARGET_SSE2"
+  "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
+		       : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "*avx_cvtsd2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:V2SF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse2_cvtsd2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:V2SF
+	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
+	  (match_operand:V4SF 1 "register_operand" "0,0")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "cvtsd2ss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "bdver1_decode" "direct,direct")
+   (set_attr "mode" "SF")])
+
+(define_insn "*avx_cvtss2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_merge:V2DF
+	  (float_extend:V2DF
+	    (vec_select:V2SF
+	      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 1)])))
+	  (match_operand:V2DF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtss2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_merge:V2DF
+	  (float_extend:V2DF
+	    (vec_select:V2SF
+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
+	      (parallel [(const_int 0) (const_int 1)])))
+	  (match_operand:V2DF 1 "register_operand" "0,0")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "cvtss2sd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "athlon_decode" "direct,direct")
+   (set_attr "bdver1_decode" "direct,direct")
+   (set_attr "mode" "DF")])
+
+(define_insn "avx_cvtpd2ps256"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(float_truncate:V4SF
+	  (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "sse2_cvtpd2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(vec_concat:V4SF
+	  (float_truncate:V2SF
+	    (match_operand:V2DF 1 "nonimmediate_operand" ""))
+	  (match_dup 2)))]
+  "TARGET_SSE2"
+  "operands[2] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_concat:V4SF
+	  (float_truncate:V2SF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+	  (match_operand:V2SF 2 "const0_operand" "")))]
+  "TARGET_SSE2"
+  "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
+		       : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "bdver1_decode" "double")])
+
+(define_insn "avx_cvtps2pd256"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(float_extend:V4DF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vcvtps2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_cvtps2pd256_2"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(float_extend:V4DF
+	  (vec_select:V4SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "vcvtps2pd\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "sse2_cvtps2pd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "%vcvtps2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2DF")
+   (set_attr "prefix_data16" "0")
+   (set_attr "amdfam10_decode" "direct")
+   (set_attr "athlon_decode" "double")
+   (set_attr "bdver1_decode" "double")])
+
+(define_expand "vec_unpacks_hi_v4sf"
+  [(set (match_dup 2)
+   (vec_select:V4SF
+     (vec_concat:V8SF
+       (match_dup 2)
+       (match_operand:V4SF 1 "nonimmediate_operand" ""))
+     (parallel [(const_int 6)
+		(const_int 7)
+		(const_int 2)
+		(const_int 3)])))
+  (set (match_operand:V2DF 0 "register_operand" "")
+   (float_extend:V2DF
+     (vec_select:V2SF
+       (match_dup 2)
+       (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "operands[2] = gen_reg_rtx (V4SFmode);")
+
+(define_expand "vec_unpacks_hi_v8sf"
+  [(set (match_dup 2)
+	(vec_select:V4SF
+	  (match_operand:V8SF 1 "nonimmediate_operand" "")
+	  (parallel [(const_int 4)
+		     (const_int 5)
+		     (const_int 6)
+		     (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand" "")
+	(float_extend:V4DF
+	  (match_dup 2)))]
+  "TARGET_AVX"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2")
+
+(define_expand "vec_unpacks_lo_v8sf"
+  [(set (match_operand:V4DF 0 "register_operand" "")
+	(float_extend:V4DF
+	  (vec_select:V4SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX")
+
+(define_expand "vec_unpacks_float_hi_v8hi"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
+  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v8hi"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
+  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v8hi"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
+  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v8hi"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
+  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+  [(set (match_dup 2)
+	(vec_select:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "")
+	  (parallel [(const_int 2)
+		     (const_int 3)
+		     (const_int 2)
+		     (const_int 3)])))
+   (set (match_operand:V2DF 0 "register_operand" "")
+        (float:V2DF
+	  (vec_select:V2SI
+	  (match_dup 2)
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2"
+  "operands[2] = gen_reg_rtx (V4SImode);")
+
+(define_expand "vec_unpacks_float_lo_v4si"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE2")
+
+(define_expand "vec_unpacks_float_hi_v8si"
+  [(set (match_dup 2)
+	(vec_select:V4SI
+	  (match_operand:V8SI 1 "nonimmediate_operand" "")
+	  (parallel [(const_int 4)
+		     (const_int 5)
+		     (const_int 6)
+		     (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand" "")
+        (float:V4DF
+	  (match_dup 2)))]
+  "TARGET_AVX"
+  "operands[2] = gen_reg_rtx (V4SImode);")
+
+(define_expand "vec_unpacks_float_lo_v8si"
+  [(set (match_operand:V4DF 0 "register_operand" "")
+	(float:V4DF
+	  (vec_select:V4SI
+	    (match_operand:V8SI 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX")
+
+(define_expand "vec_unpacku_float_hi_v4si"
+  [(set (match_dup 5)
+	(vec_select:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "")
+	  (parallel [(const_int 2)
+		     (const_int 3)
+		     (const_int 2)
+		     (const_int 3)])))
+   (set (match_dup 6)
+        (float:V2DF
+	  (vec_select:V2SI
+	  (match_dup 5)
+	    (parallel [(const_int 0) (const_int 1)]))))
+   (set (match_dup 7)
+	(lt:V2DF (match_dup 6) (match_dup 3)))
+   (set (match_dup 8)
+	(and:V2DF (match_dup 7) (match_dup 4)))
+   (set (match_operand:V2DF 0 "register_operand" "")
+	(plus:V2DF (match_dup 6) (match_dup 8)))]
+  "TARGET_SSE2"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x;
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
+  operands[4] = force_reg (V2DFmode,
+			   ix86_build_const_vector (V2DFmode, 1, x));
+
+  operands[5] = gen_reg_rtx (V4SImode);
+ 
+  for (i = 6; i < 9; i++)
+    operands[i] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+  [(set (match_dup 5)
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1)]))))
+   (set (match_dup 6)
+	(lt:V2DF (match_dup 5) (match_dup 3)))
+   (set (match_dup 7)
+	(and:V2DF (match_dup 6) (match_dup 4)))
+   (set (match_operand:V2DF 0 "register_operand" "")
+	(plus:V2DF (match_dup 5) (match_dup 7)))]
+  "TARGET_SSE2"
+{
+  REAL_VALUE_TYPE TWO32r;
+  rtx x;
+  int i;
+
+  real_ldexp (&TWO32r, &dconst1, 32);
+  x = const_double_from_real_value (TWO32r, DFmode);
+
+  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
+  operands[4] = force_reg (V2DFmode,
+			   ix86_build_const_vector (V2DFmode, 1, x));
+
+  for (i = 5; i < 8; i++)
+    operands[i] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "vec_pack_trunc_v4df"
+  [(set (match_dup 3)
+	(float_truncate:V4SF
+	  (match_operand:V4DF 1 "nonimmediate_operand" "")))
+   (set (match_dup 4)
+	(float_truncate:V4SF
+	  (match_operand:V4DF 2 "nonimmediate_operand" "")))
+   (set (match_operand:V8SF 0 "register_operand" "")
+	(vec_concat:V8SF
+	  (match_dup 3)
+	  (match_dup 4)))]
+  "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_pack_trunc_v2df"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V2DF 1 "nonimmediate_operand" "")
+   (match_operand:V2DF 2 "nonimmediate_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V4SFmode);
+  r2 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
+  emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
+  emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V2DF 1 "nonimmediate_operand" "")
+   (match_operand:V2DF 2 "nonimmediate_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V4SImode);
+  r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
+  emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
+  emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
+  	    				 gen_lowpart (V2DImode, r1),
+ 					 gen_lowpart (V2DImode, r2)));
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_v2df"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V2DF 1 "nonimmediate_operand" "")
+   (match_operand:V2DF 2 "nonimmediate_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V4SImode);
+  r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
+  emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
+  emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
+  	    				 gen_lowpart (V2DImode, r1),
+ 					 gen_lowpart (V2DImode, r2)));
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse_movhlps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "")
+	    (match_operand:V4SF 2 "nonimmediate_operand" ""))
+	  (parallel [(const_int 6)
+		     (const_int 7)
+		     (const_int 2)
+		     (const_int 3)])))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+  
+  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "*avx_movhlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+	  (parallel [(const_int 6)
+		     (const_int 7)
+		     (const_int 2)
+		     (const_int 3)])))]
+  "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   vmovhlps\t{%2, %1, %0|%0, %1, %2}
+   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
+   vmovhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movhlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+	  (parallel [(const_int 6)
+		     (const_int 7)
+		     (const_int 2)
+		     (const_int 3)])))]
+  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhlps\t{%2, %0|%0, %2}
+   movlps\t{%H2, %0|%0, %H2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_expand "sse_movlhps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "")
+	    (match_operand:V4SF 2 "nonimmediate_operand" ""))
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 4)
+		     (const_int 5)])))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+  
+  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "*avx_movlhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 4)
+		     (const_int 5)])))]
+  "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+  "@
+   vmovlhps\t{%2, %1, %0|%0, %1, %2}
+   vmovhps\t{%2, %1, %0|%0, %1, %2}
+   vmovlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movlhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 4)
+		     (const_int 5)])))]
+  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}
+   movlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_insn "avx_unpckhps256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX"
+  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_interleave_highv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "x")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_AVX"
+  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "vec_interleave_highv8sf"
+  [(set (match_dup 3)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))
+   (set (match_dup 4)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))
+   (set (match_operand:V8SF 0 "register_operand" "")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V8SFmode);
+  operands[4] = gen_reg_rtx (V8SFmode);
+})
+
+(define_insn "vec_interleave_highv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_SSE"
+  "unpckhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_insn "avx_unpcklps256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))]
+  "TARGET_AVX"
+  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_interleave_lowv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "x")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_AVX"
+  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "vec_interleave_lowv8sf"
+  [(set (match_dup 3)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)])))
+   (set (match_dup 4)
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))
+   (set (match_operand:V8SF 0 "register_operand" "")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V8SFmode);
+  operands[4] = gen_reg_rtx (V8SFmode);
+})
+
+(define_insn "vec_interleave_lowv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_SSE"
+  "unpcklps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "avx_movshdup256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 1) (const_int 1)
+		     (const_int 3) (const_int 3)
+		     (const_int 5) (const_int 5)
+		     (const_int 7) (const_int 7)])))]
+  "TARGET_AVX"
+  "vmovshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_movshdup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 1)
+		     (const_int 1)
+		     (const_int 7)
+		     (const_int 7)])))]
+  "TARGET_SSE3"
+  "%vmovshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "avx_movsldup256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 0)
+		     (const_int 2) (const_int 2)
+		     (const_int 4) (const_int 4)
+		     (const_int 6) (const_int 6)])))]
+  "TARGET_AVX"
+  "vmovsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_movsldup"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+	    (match_dup 1))
+	  (parallel [(const_int 0)
+		     (const_int 0)
+		     (const_int 6)
+		     (const_int 6)])))]
+  "TARGET_SSE3"
+  "%vmovsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "avx_shufps256"
+  [(match_operand:V8SF 0 "register_operand" "")
+   (match_operand:V8SF 1 "register_operand" "")
+   (match_operand:V8SF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
+				  GEN_INT ((mask >> 0) & 3),
+				  GEN_INT ((mask >> 2) & 3),
+				  GEN_INT (((mask >> 4) & 3) + 8),
+				  GEN_INT (((mask >> 6) & 3) + 8),
+				  GEN_INT (((mask >> 0) & 3) + 4),
+				  GEN_INT (((mask >> 2) & 3) + 4),
+				  GEN_INT (((mask >> 4) & 3) + 12),
+				  GEN_INT (((mask >> 6) & 3) + 12)));
+  DONE;
+})
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx_shufps256_1"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3  "const_0_to_3_operand"   "")
+		     (match_operand 4  "const_0_to_3_operand"   "")
+		     (match_operand 5  "const_8_to_11_operand"  "")
+		     (match_operand 6  "const_8_to_11_operand"  "")
+		     (match_operand 7  "const_4_to_7_operand"   "")
+		     (match_operand 8  "const_4_to_7_operand"   "")
+		     (match_operand 9  "const_12_to_15_operand" "")
+		     (match_operand 10 "const_12_to_15_operand" "")])))]
+  "TARGET_AVX
+   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 8) << 4;
+  mask |= (INTVAL (operands[6]) - 8) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "sse_shufps"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:V4SF 1 "register_operand" "")
+   (match_operand:V4SF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
+			       GEN_INT ((mask >> 0) & 3),
+			       GEN_INT ((mask >> 2) & 3),
+			       GEN_INT (((mask >> 4) & 3) + 4),
+			       GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "*avx_shufps_<mode>"
+  [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
+	(vec_select:SSEMODE4S
+	  (vec_concat:<ssedoublesizemode>
+	    (match_operand:SSEMODE4S 1 "register_operand" "x")
+	    (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
+		     (match_operand 4 "const_0_to_3_operand" "")
+		     (match_operand 5 "const_4_to_7_operand" "")
+		     (match_operand 6 "const_4_to_7_operand" "")])))]
+  "TARGET_AVX"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[3]) << 0;
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 4) << 4;
+  mask |= (INTVAL (operands[6]) - 4) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_shufps_<mode>"
+  [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
+	(vec_select:SSEMODE4S
+	  (vec_concat:<ssedoublesizemode>
+	    (match_operand:SSEMODE4S 1 "register_operand" "0")
+	    (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
+		     (match_operand 4 "const_0_to_3_operand" "")
+		     (match_operand 5 "const_4_to_7_operand" "")
+		     (match_operand 6 "const_4_to_7_operand" "")])))]
+  "TARGET_SSE"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[3]) << 0;
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 4) << 4;
+  mask |= (INTVAL (operands[6]) - 4) << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SSE"
+  "@
+   %vmovhps\t{%1, %0|%0, %1}
+   %vmovhlps\t{%1, %d0|%d0, %1}
+   %vmovlps\t{%H1, %d0|%d0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadhps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(vec_concat:V4SF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+  
+  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "*avx_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+	(vec_concat:V4SF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+  "TARGET_AVX"
+  "@
+   vmovhps\t{%2, %1, %0|%0, %1, %2}
+   vmovlhps\t{%2, %1, %0|%0, %1, %2}
+   vmovlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+	(vec_concat:V4SF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movlps\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*avx_storelps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_AVX"
+  "@
+   vmovlps\t{%1, %0|%0, %1}
+   vmovaps\t{%1, %0|%0, %1}
+   vmovlps\t{%1, %0, %0|%0, %0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2SF,V2DF,V2SF")])
+
+(define_insn "sse_storelps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+  "@
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadlps_exp"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 2 "nonimmediate_operand" "")
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
+  
+  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+(define_insn "*avx_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "@
+   shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
+   vmovlps\t{%2, %1, %0|%0, %1, %2}
+   vmovlps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "length_immediate" "1,*,*")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+  "@
+   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+   movlps\t{%2, %0|%0, %2}
+   movlps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "length_immediate" "1,*,*")
+   (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "*avx_movss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vmovss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse_movss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (match_operand:V4SF 2 "register_operand" "x")
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "movss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_expand "vec_dupv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(vec_duplicate:V4SF
+	  (match_operand:SF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+{
+  if (!TARGET_AVX)
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_insn "*vec_dupv4sf_avx"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_duplicate:V4SF
+	  (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
+  "TARGET_AVX"
+  "@
+   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+   vbroadcastss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1,ssemov")
+   (set_attr "length_immediate" "1,0")
+   (set_attr "prefix_extra" "0,1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_dupv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_duplicate:V4SF
+	  (match_operand:SF 1 "register_operand" "0")))]
+  "TARGET_SSE"
+  "shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_concatv2sf_avx"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,*y ,*y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " x,x,m, 0 , m")
+	  (match_operand:SF 2 "vector_move_operand"  " x,m,C,*ym, C")))]
+  "TARGET_AVX"
+  "@
+   vunpcklps\t{%2, %1, %0|%0, %1, %2}
+   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
+   vmovss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "length_immediate" "*,1,*,*,*")
+   (set_attr "prefix_extra" "*,1,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "3,4")
+       (const_string "orig")
+       (const_string "vex")))
+   (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
+
+;; Although insertps takes register source, we prefer
+;; unpcklps with register source since it is shorter.
+(define_insn "*vec_concatv2sf_sse4_1"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,*y ,*y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
+	  (match_operand:SF 2 "vector_move_operand"  " x,m,C,*ym, C")))]
+  "TARGET_SSE4_1"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_data16" "*,1,*,*,*")
+   (set_attr "prefix_extra" "*,1,*,*,*")
+   (set_attr "length_immediate" "*,1,*,*,*")
+   (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*vec_concatv2sf_sse"
+  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
+  "TARGET_SSE"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*vec_concatv4sf_avx"
+  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 1 "register_operand" " x,x")
+	  (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+  "TARGET_AVX"
+  "@
+   vmovlhps\t{%2, %1, %0|%0, %1, %2}
+   vmovhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF,V2SF")])
+
+(define_insn "*vec_concatv4sf_sse"
+  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
+	(vec_concat:V4SF
+	  (match_operand:V2SF 1 "register_operand" " 0,0")
+	  (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+  "TARGET_SSE"
+  "@
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_init<mode>"
+  [(match_operand:SSEMODE 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_set<mode>_0_avx"
+  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x,x, x,x,  x,m, m,m")
+	(vec_merge:SSEMODE4S
+	  (vec_duplicate:SSEMODE4S
+	    (match_operand:<ssescalarmode> 2
+	      "general_operand"                            " x,m,*r,x,*rm,x,*r,fF"))
+	  (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x,  x,0, 0,0")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "@
+   vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
+   vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
+   vmovd\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}
+   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
+   #
+   #
+   #"
+  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*,*,*")
+   (set_attr "prefix_extra" "*,*,*,*,1,*,*,*")
+   (set_attr "length_immediate" "*,*,*,*,1,*,*,*")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*,*,*")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_set<mode>_0_sse4_1"
+  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x,x, x,x,  x, m,m")
+	(vec_merge:SSEMODE4S
+	  (vec_duplicate:SSEMODE4S
+	    (match_operand:<ssescalarmode> 2
+	      "general_operand"                            " x,m,*r,x,*rm,*r,fF"))
+	  (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0,  0, 0,0")
+	  (const_int 1)))]
+  "TARGET_SSE4_1"
+  "@
+   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
+   mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
+   movd\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   pinsrd\t{$0, %2, %0|%0, %2, 0}
+   #
+   #"
+  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*,*")
+   (set_attr "prefix_extra" "*,*,*,*,1,*,*")
+   (set_attr "length_immediate" "*,*,*,*,1,*,*")
+   (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*,*")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_set<mode>_0_sse2"
+  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x, x,x,m, m,m")
+	(vec_merge:SSEMODE4S
+	  (vec_duplicate:SSEMODE4S
+	    (match_operand:<ssescalarmode> 2
+	      "general_operand"                            " m,*r,x,x,*r,fF"))
+	  (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0, 0,0")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
+   movd\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "vec_set<mode>_0"
+  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x,x,m, m,m")
+	(vec_merge:SSEMODE4S
+	  (vec_duplicate:SSEMODE4S
+	    (match_operand:<ssescalarmode> 2
+	      "general_operand"                            " m,x,x,*r,fF"))
+	  (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0, 0,0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   movss\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF,SF,*,*,*")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_avx"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 2 "nonimmediate_operand" "xm"))
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+  return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_setv4sf_sse4_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (match_operand:SF 2 "nonimmediate_operand" "xm"))
+	  (match_operand:V4SF 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+  "TARGET_SSE4_1"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+  return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_insertps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
+		      (match_operand:V4SF 1 "register_operand" "x")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		     UNSPEC_INSERTPS))]
+  "TARGET_AVX"
+{
+  if (MEM_P (operands[2]))
+    {
+      unsigned count_s = INTVAL (operands[3]) >> 6;
+      if (count_s)
+	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
+      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
+    }
+  return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
+		      (match_operand:V4SF 1 "register_operand" "0")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		     UNSPEC_INSERTPS))]
+  "TARGET_SSE4_1"
+{
+  if (MEM_P (operands[2]))
+    {
+      unsigned count_s = INTVAL (operands[3]) >> 6;
+      if (count_s)
+	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
+      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
+    }
+  return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "V4SF")])
+
+(define_split
+  [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
+	(vec_merge:SSEMODE4S
+	  (vec_duplicate:SSEMODE4S
+	    (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_SSE && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
+		  operands[1]);
+  DONE;
+})
+
+(define_expand "vec_set<mode>"
+  [(match_operand:SSEMODE 0 "register_operand" "")
+   (match_operand:<ssescalarmode> 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (SFmode, REGNO (op1));
+  else
+    op1 = gen_lowpart (SFmode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "avx_vextractf128<mode>"
+  [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
+   (match_operand:AVX256MODE 1 "register_operand" "")
+   (match_operand:SI 2 "const_0_to_1_operand" "")]
+  "TARGET_AVX"
+{
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+      break;
+    case 1:
+      emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
+  else
+    op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
+	  (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
+  else
+    op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+	(vec_select:<avxhalfvecmode>
+	  (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
+	  (parallel [(const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn_and_split "vec_extract_lo_v16hi"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V8HI
+	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (V8HImode, REGNO (op1));
+  else
+    op1 = gen_lowpart (V8HImode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_v16hi"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V8HI
+	  (match_operand:V16HI 1 "register_operand" "x,x")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn_and_split "vec_extract_lo_v32qi"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V16QI
+	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (V16QImode, REGNO (op1));
+  else
+    op1 = gen_lowpart (V16QImode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_v32qi"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+	(vec_select:V16QI
+	  (match_operand:V32QI 1 "register_operand" "x,x")
+	  (parallel [(const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX"
+  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "*sse4_1_extractps"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+	(vec_select:SF
+	  (match_operand:V4SF 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+  "TARGET_SSE4_1"
+  "%vextractps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn_and_split "*vec_extract_v4sf_mem"
+  [(set (match_operand:SF 0 "register_operand" "=x*rf")
+       (vec_select:SF
+	 (match_operand:V4SF 1 "memory_operand" "o")
+	 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+  "TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  int i = INTVAL (operands[2]);
+
+  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+  DONE;
+})
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<avxscalarmode> 0 "register_operand" "")
+   (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_extract (false, operands[0], operands[1],
+			      INTVAL (operands[2]));
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_insn "avx_unpckhpd256"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_AVX"
+  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "vec_interleave_highv4df"
+  [(set (match_dup 3)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))
+   (set (match_dup 4)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand" "")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 2) (const_int 3)
+		     (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V4DFmode);
+  operands[4] = gen_reg_rtx (V4DFmode);
+})
+
+
+(define_expand "vec_interleave_highv2df"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "")
+	    (match_operand:V2DF 2 "nonimmediate_operand" ""))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2"
+{
+  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
+    operands[2] = force_reg (V2DFmode, operands[2]);
+})
+
+(define_insn "*avx_interleave_highv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,m")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
+  "@
+   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
+   vmovddup\t{%H1, %0|%0, %H1}
+   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
+   vmovhpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*sse3_interleave_highv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,m")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
+  "@
+   unpckhpd\t{%2, %0|%0, %2}
+   movddup\t{%H1, %0|%0, %H1}
+   movlpd\t{%H1, %0|%0, %H1}
+   movhpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+   (set_attr "prefix_data16" "*,*,1,1")
+   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*sse2_interleave_highv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
+  "@
+   unpckhpd\t{%2, %0|%0, %2}
+   movlpd\t{%H1, %0|%0, %H1}
+   movhpd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "prefix_data16" "*,1,1")
+   (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
+(define_expand "avx_movddup256"
+  [(set (match_operand:V4DF 0 "register_operand" "")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "nonimmediate_operand" "")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_AVX")
+
+(define_expand "avx_unpcklpd256"
+  [(set (match_operand:V4DF 0 "register_operand" "")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "")
+	    (match_operand:V4DF 2 "nonimmediate_operand" ""))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_AVX")
+
+(define_insn "*avx_unpcklpd256"
+  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_AVX"
+  "@
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "vec_interleave_lowv4df"
+  [(set (match_dup 3)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))
+   (set (match_dup 4)
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 1)
+	    (match_dup 2))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))
+   (set (match_operand:V4DF 0 "register_operand" "")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_dup 3)
+	    (match_dup 4))
+	  (parallel [(const_int 0) (const_int 1)
+	  	     (const_int 4) (const_int 5)])))]
+ "TARGET_AVX"
+{
+  operands[3] = gen_reg_rtx (V4DFmode);
+  operands[4] = gen_reg_rtx (V4DFmode);
+})
+
+(define_expand "vec_interleave_lowv2df"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "")
+	    (match_operand:V2DF 2 "nonimmediate_operand" ""))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2"
+{
+  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
+    operands[1] = force_reg (V2DFmode, operands[1]);
+})
+
+(define_insn "*avx_interleave_lowv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
+  "@
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   vmovddup\t{%1, %0|%0, %1}
+   vmovhpd\t{%2, %1, %0|%0, %1, %2}
+   vmovlpd\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*sse3_interleave_lowv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
+  "@
+   unpcklpd\t{%2, %0|%0, %2}
+   movddup\t{%1, %0|%0, %1}
+   movhpd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+   (set_attr "prefix_data16" "*,*,1,1")
+   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*sse2_interleave_lowv2df"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
+  "@
+   unpcklpd\t{%2, %0|%0, %2}
+   movhpd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %H0|%H0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "prefix_data16" "*,1,1")
+   (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "register_operand" "")
+	    (match_dup 1))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE3 && reload_completed"
+  [(const_int 0)]
+{
+  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "memory_operand" "")
+	    (match_dup 1))
+	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
+		     (match_operand:SI 3 "const_int_operand" "")])))]
+  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
+  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
+})
+
+(define_expand "avx_shufpd256"
+  [(match_operand:V4DF 0 "register_operand" "")
+   (match_operand:V4DF 1 "register_operand" "")
+   (match_operand:V4DF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
+				   GEN_INT (mask & 1),
+				   GEN_INT (mask & 2 ? 5 : 4),
+				   GEN_INT (mask & 4 ? 3 : 2),
+				   GEN_INT (mask & 8 ? 7 : 6)));
+  DONE;
+})
+
+(define_insn "avx_shufpd256_1"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (match_operand:V4DF 1 "register_operand" "x")
+	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
+		     (match_operand 4 "const_4_to_5_operand" "")
+		     (match_operand 5 "const_2_to_3_operand" "")
+		     (match_operand 6 "const_6_to_7_operand" "")])))]
+  "TARGET_AVX"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 4) << 1;
+  mask |= (INTVAL (operands[5]) - 2) << 2;
+  mask |= (INTVAL (operands[6]) - 6) << 3;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_expand "sse2_shufpd"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:V2DF 1 "register_operand" "")
+   (match_operand:V2DF 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
+				GEN_INT (mask & 1),
+				GEN_INT (mask & 2 ? 3 : 2)));
+  DONE;
+})
+
+(define_expand "vec_extract_even<mode>"
+  [(match_operand:SSEMODE_EO 0 "register_operand" "")
+   (match_operand:SSEMODE_EO 1 "register_operand" "")
+   (match_operand:SSEMODE_EO 2 "register_operand" "")]
+  ""
+{
+  ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "vec_extract_odd<mode>"
+  [(match_operand:SSEMODE_EO 0 "register_operand" "")
+   (match_operand:SSEMODE_EO 1 "register_operand" "")
+   (match_operand:SSEMODE_EO 2 "register_operand" "")]
+  ""
+{
+  ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
+  DONE;
+})
+
+;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "*avx_interleave_highv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "x")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_AVX"
+  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_highv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "0")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 1)
+		     (const_int 3)])))]
+  "TARGET_SSE2"
+  "punpckhqdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_lowv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "x")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_AVX"
+  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_lowv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "0")
+	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0)
+		     (const_int 2)])))]
+  "TARGET_SSE2"
+  "punpcklqdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_shufpd_<mode>"
+  [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
+	(vec_select:SSEMODE2D
+	  (vec_concat:<ssedoublesizemode>
+	    (match_operand:SSEMODE2D 1 "register_operand" "x")
+	    (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
+		     (match_operand 4 "const_2_to_3_operand" "")])))]
+  "TARGET_AVX"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 2) << 1;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_shufpd_<mode>"
+  [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
+	(vec_select:SSEMODE2D
+	  (vec_concat:<ssedoublesizemode>
+	    (match_operand:SSEMODE2D 1 "register_operand" "0")
+	    (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
+		     (match_operand 4 "const_2_to_3_operand" "")])))]
+  "TARGET_SSE2"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 2) << 1;
+  operands[3] = GEN_INT (mask);
+
+  return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "V2DF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_storehpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   vmovhpd\t{%1, %0|%0, %1}
+   vunpckhpd\t{%1, %1, %0|%0, %1, %1}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_storehpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhpd\t{%1, %0|%0, %1}
+   unpckhpd\t%0, %0
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
+   (set_attr "prefix_data16" "1,*,*,*,*")
+   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "memory_operand" "")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = adjust_address (operands[1], DFmode, 8);")
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_storelpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   %vmovlpd\t{%1, %0|%0, %1}
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
+   (set_attr "prefix_data16" "1,*,*,*,*")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" "")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (DFmode, REGNO (op1));
+  else
+    op1 = gen_lowpart (DFmode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_expand "sse2_loadhpd_exp"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+	(vec_concat:V2DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 0)]))
+	  (match_operand:DF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
+  
+  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_loadhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o,o,o")
+	(vec_concat:V2DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
+	    (parallel [(const_int 0)]))
+	  (match_operand:DF 2 "nonimmediate_operand"     " m,x,x,*f,r")))]
+  "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   vmovhpd\t{%2, %1, %0|%0, %1, %2}
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_loadhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o,o,o")
+	(vec_concat:V2DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,0,0")
+	    (parallel [(const_int 0)]))
+	  (match_operand:DF 2 "nonimmediate_operand"     " m,x,x,*f,r")))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movhpd\t{%2, %0|%0, %2}
+   unpcklpd\t{%2, %0|%0, %2}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
+   (set_attr "prefix_data16" "1,*,*,*,*")
+   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+	(vec_concat:V2DF
+	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+	  (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = adjust_address (operands[0], DFmode, 8);")
+
+(define_expand "sse2_loadlpd_exp"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+	(vec_concat:V2DF
+	  (match_operand:DF 2 "nonimmediate_operand" "")
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "")
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE2"
+{
+  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
+  
+  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], dst);
+
+  DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_loadlpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,m,m,m")
+	(vec_concat:V2DF
+	  (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,x,x,*f,r")
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
+	    (parallel [(const_int 1)]))))]
+  "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   vmovsd\t{%2, %0|%0, %2}
+   vmovlpd\t{%2, %1, %0|%0, %1, %2}
+   vmovsd\t{%2, %1, %0|%0, %1, %2}
+   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_loadlpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m,m,m")
+	(vec_concat:V2DF
+	  (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x,*f,r")
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   movsd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   movsd\t{%2, %0|%0, %2}
+   shufpd\t{$2, %1, %0|%0, %1, 2}
+   movhpd\t{%H1, %0|%0, %H1}
+   #
+   #
+   #"
+  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
+   (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
+   (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
+   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+	(vec_concat:V2DF
+	  (match_operand:DF 1 "register_operand" "")
+	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+  "TARGET_SSE2 && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = adjust_address (operands[0], DFmode, 0);")
+
+;; Not sure these two are ever used, but it doesn't hurt to have
+;; them. -aoliva
+(define_insn "*vec_extractv2df_1_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 1)])))]
+  "!TARGET_SSE2 && TARGET_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   movhlps\t{%1, %0|%0, %1}
+   movlps\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_extractv2df_0_sse"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+	  (parallel [(const_int 0)])))]
+  "!TARGET_SSE2 && TARGET_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*avx_movsd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,o")
+	(vec_merge:V2DF
+	  (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "@
+   vmovsd\t{%2, %1, %0|%0, %1, %2}
+   vmovlpd\t{%2, %1, %0|%0, %1, %2}
+   vmovlpd\t{%2, %0|%0, %2}
+   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
+   vmovhps\t{%1, %H0|%H0, %1}"
+  [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
+
+(define_insn "sse2_movsd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,x,o")
+	(vec_merge:V2DF
+	  (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   movsd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}
+   shufpd\t{$2, %1, %0|%0, %1, 2}
+   movhps\t{%H1, %0|%0, %H1}
+   movhps\t{%1, %H0|%H0, %1}"
+  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+   (set_attr "prefix_data16" "*,1,1,*,*,*")
+   (set_attr "length_immediate" "*,*,*,1,*,*")
+   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
+
+(define_expand "vec_dupv2df"
+  [(set (match_operand:V2DF 0 "register_operand" "")
+	(vec_duplicate:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (!TARGET_SSE3)
+    operands[1] = force_reg (DFmode, operands[1]);
+})
+
+(define_insn "*vec_dupv2df_sse3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_duplicate:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE3"
+  "%vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "*vec_dupv2df"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_duplicate:V2DF
+	  (match_operand:DF 1 "register_operand" "0")))]
+  "TARGET_SSE2"
+  "unpcklpd\t%0, %0"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*vec_concatv2df_sse3"
+  [(set (match_operand:V2DF 0 "register_operand" "=x")
+	(vec_concat:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" "xm")
+	  (match_dup 1)))]
+  "TARGET_SSE3"
+  "%vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DF")])
+
+(define_insn "*vec_concatv2df_avx"
+  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x")
+	(vec_concat:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
+	  (match_operand:DF 2 "vector_move_operand"  " x,m,C")))]
+  "TARGET_AVX"
+  "@
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   vmovhpd\t{%2, %1, %0|%0, %1, %2}
+   vmovsd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DF,V1DF,DF")])
+
+(define_insn "*vec_concatv2df"
+  [(set (match_operand:V2DF 0 "register_operand"     "=Y2,Y2,Y2,x,x")
+	(vec_concat:V2DF
+	  (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
+	  (match_operand:DF 2 "vector_move_operand"  " Y2,m ,C ,x,m")))]
+  "TARGET_SSE"
+  "@
+   unpcklpd\t{%2, %0|%0, %2}
+   movhpd\t{%2, %0|%0, %2}
+   movsd\t{%1, %0|%0, %1}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "prefix_data16" "*,1,*,*,*")
+   (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+	(minus:SSEMODEI
+	  (match_dup 2)
+	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
+(define_expand "<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+	(plusminus:SSEMODEI
+	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+	(plusminus:SSEMODEI
+	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+	(plusminus:SSEMODEI
+	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "")
+	(sat_plusminus:SSEMODE12
+	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+	(sat_plusminus:SSEMODE12
+	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
+	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+	(sat_plusminus:SSEMODE12
+	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
+	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "mulv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+	(mult:V16QI (match_operand:V16QI 1 "register_operand" "")
+		    (match_operand:V16QI 2 "register_operand" "")))]
+  "TARGET_SSE2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx t[6];
+  int i;
+
+  for (i = 0; i < 6; ++i)
+    t[i] = gen_reg_rtx (V16QImode);
+
+  /* Unpack data such that we've got a source byte in each low byte of
+     each word.  We don't care what goes into the high byte of each word.
+     Rather than trying to get zero in there, most convenient is to let
+     it be a copy of the low byte.  */
+  emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
+  emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
+  emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
+  emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
+
+  /* Multiply words.  The end-of-line annotations here give a picture of what
+     the output of that instruction looks like.  Dot means don't care; the
+     letters are the bytes of the result with A being the most significant.  */
+  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
+			   gen_lowpart (V8HImode, t[0]),
+			   gen_lowpart (V8HImode, t[1])));
+  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
+			   gen_lowpart (V8HImode, t[2]),
+			   gen_lowpart (V8HImode, t[3])));
+
+  /* Extract the even bytes and merge them back together.  */
+  ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
+  DONE;
+})
+
+(define_expand "mulv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_mulv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*mulv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmullw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "<s>mulv8hi3_highpart"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+        (truncate:V8HI
+          (lshiftrt:V8SI
+            (mult:V8SI
+              (any_extend:V8SI
+                (match_operand:V8HI 1 "nonimmediate_operand" ""))
+              (any_extend:V8SI
+                (match_operand:V8HI 2 "nonimmediate_operand" "")))
+            (const_int 16))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_<s>mulv8hi3_highpart"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (mult:V8SI
+	      (any_extend:V8SI
+		(match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+	      (any_extend:V8SI
+		(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+	    (const_int 16))))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*<s>mulv8hi3_highpart"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (mult:V8SI
+	      (any_extend:V8SI
+		(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+	      (any_extend:V8SI
+		(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+	    (const_int 16))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmulh<u>w\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_umulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(mult:V2DI
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*avx_umulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(mult:V2DI
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_umulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(mult:V2DI
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (zero_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "pmuludq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse4_1_mulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE4_1"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*avx_mulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_mulv2siv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+	      (parallel [(const_int 0) (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "pmuldq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "nonimmediate_operand" "")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "nonimmediate_operand" "")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)]))))
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)]))))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "nonimmediate_operand" "%x")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)]))))
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)]))))))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "nonimmediate_operand" "%0")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)]))))
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)]))))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmaddwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "mulv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+		   (match_operand:V4SI 2 "register_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1 || TARGET_AVX)
+    ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
+})
+
+(define_insn "*avx_mulv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+		   (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "vpmulld\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_mulv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+		   (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+  "pmulld\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse2_mulv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+		   (match_operand:V4SI 2 "register_operand" "")))]
+  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+  rtx op0, op1, op2;
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+  t3 = gen_reg_rtx (V4SImode);
+  t4 = gen_reg_rtx (V4SImode);
+  t5 = gen_reg_rtx (V4SImode);
+  t6 = gen_reg_rtx (V4SImode);
+  thirtytwo = GEN_INT (32);
+
+  /* Multiply elements 2 and 0.  */
+  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
+				     op1, op2));
+
+  /* Shift both input vectors down one element, so that elements 3
+     and 1 are now in the slots for elements 2 and 0.  For K8, at
+     least, this is faster than using a shuffle.  */
+  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
+				 gen_lowpart (V1TImode, op1),
+				 thirtytwo));
+  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
+				 gen_lowpart (V1TImode, op2),
+				 thirtytwo));
+  /* Multiply elements 3 and 1.  */
+  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
+				     t2, t3));
+
+  /* Move the results in element 2 down to element 1; we don't care
+     what goes in elements 2 and 3.  */
+  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
+				const0_rtx, const0_rtx));
+  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+				const0_rtx, const0_rtx));
+
+  /* Merge the parts back together.  */
+  emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
+  DONE;
+})
+
+(define_insn_and_split "mulv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(mult:V2DI (match_operand:V2DI 1 "register_operand" "")
+		   (match_operand:V2DI 2 "register_operand" "")))]
+  "TARGET_SSE2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+  rtx op0, op1, op2;
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = operands[2];
+
+  if (TARGET_XOP)
+    {
+      /* op1: A,B,C,D, op2: E,F,G,H */
+      op1 = gen_lowpart (V4SImode, op1);
+      op2 = gen_lowpart (V4SImode, op2);
+
+      t1 = gen_reg_rtx (V4SImode);
+      t2 = gen_reg_rtx (V4SImode);
+      t3 = gen_reg_rtx (V2DImode);
+      t4 = gen_reg_rtx (V2DImode);
+
+      /* t1: B,A,D,C */
+      emit_insn (gen_sse2_pshufd_1 (t1, op1,
+				    GEN_INT (1),
+				    GEN_INT (0),
+				    GEN_INT (3),
+				    GEN_INT (2)));
+
+      /* t2: (B*E),(A*F),(D*G),(C*H) */
+      emit_insn (gen_mulv4si3 (t2, t1, op2));
+
+      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
+      emit_insn (gen_xop_phadddq (t3, t2));
+
+      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
+      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
+
+      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
+      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
+    }
+  else
+    {
+      t1 = gen_reg_rtx (V2DImode);
+      t2 = gen_reg_rtx (V2DImode);
+      t3 = gen_reg_rtx (V2DImode);
+      t4 = gen_reg_rtx (V2DImode);
+      t5 = gen_reg_rtx (V2DImode);
+      t6 = gen_reg_rtx (V2DImode);
+      thirtytwo = GEN_INT (32);
+
+      /* Multiply low parts.  */
+      emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
+				         gen_lowpart (V4SImode, op2)));
+
+      /* Shift input vectors left 32 bits so we can multiply high parts.  */
+      emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
+      emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
+
+      /* Multiply high parts by low parts.  */
+      emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
+					 gen_lowpart (V4SImode, t3)));
+      emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
+					 gen_lowpart (V4SImode, t2)));
+
+      /* Shift them back.  */
+      emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
+      emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
+
+      /* Add the three parts together.  */
+      emit_insn (gen_addv2di3 (t6, t1, t4));
+      emit_insn (gen_addv2di3 (op0, t6, t5));
+    }
+  DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")
+   (match_operand:V8HI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1, op2, t1, t2, dest;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V8HImode);
+  t2 = gen_reg_rtx (V8HImode);
+  dest = gen_lowpart (V8HImode, operands[0]);
+
+  emit_insn (gen_mulv8hi3 (t1, op1, op2));
+  emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
+  emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")
+   (match_operand:V8HI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1, op2, t1, t2, dest;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V8HImode);
+  t2 = gen_reg_rtx (V8HImode);
+  dest = gen_lowpart (V8HImode, operands[0]);
+
+  emit_insn (gen_mulv8hi3 (t1, op1, op2));
+  emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
+  emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")
+   (match_operand:V8HI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1, op2, t1, t2, dest;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V8HImode);
+  t2 = gen_reg_rtx (V8HImode);
+  dest = gen_lowpart (V8HImode, operands[0]);
+
+  emit_insn (gen_mulv8hi3 (t1, op1, op2));
+  emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+  emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")
+   (match_operand:V8HI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1, op2, t1, t2, dest;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V8HImode);
+  t2 = gen_reg_rtx (V8HImode);
+  dest = gen_lowpart (V8HImode, operands[0]);
+
+  emit_insn (gen_mulv8hi3 (t1, op1, op2));
+  emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+  emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx t1, t2;
+
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx t1, t2;
+
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+				GEN_INT (0),
+				GEN_INT (2),
+				GEN_INT (1),
+				GEN_INT (3)));
+  emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1, op2, t1, t2;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1, op2, t1, t2;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  t1 = gen_reg_rtx (V4SImode);
+  t2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+  DONE;
+})
+
+(define_expand "sdot_prodv8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")
+   (match_operand:V8HI 2 "register_operand" "")
+   (match_operand:V4SI 3 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx t = gen_reg_rtx (V4SImode);
+  emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
+  emit_insn (gen_addv4si3 (operands[0], operands[3], t));
+  DONE;
+})
+
+(define_expand "udot_prodv4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")
+   (match_operand:V2DI 3 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx t1, t2, t3, t4;
+
+  t1 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
+  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
+
+  t2 = gen_reg_rtx (V4SImode);
+  t3 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
+				 gen_lowpart (V1TImode, operands[1]),
+				 GEN_INT (32)));
+  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
+				 gen_lowpart (V1TImode, operands[2]),
+				 GEN_INT (32)));
+
+  t4 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
+
+  emit_insn (gen_addv2di3 (operands[0], t1, t4));
+  DONE;
+})
+
+(define_insn "*avx_ashr<mode>3"
+  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+	(ashiftrt:SSEMODE24
+	  (match_operand:SSEMODE24 1 "register_operand" "x")
+	  (match_operand:SI 2 "nonmemory_operand" "xN")))]
+  "TARGET_AVX"
+  "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+	(ashiftrt:SSEMODE24
+	  (match_operand:SSEMODE24 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "xN")))]
+  "TARGET_SSE2"
+  "psra<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_lshrv1ti3"
+  [(set (match_operand:V1TI 0 "register_operand" "=x")
+ 	(lshiftrt:V1TI
+	 (match_operand:V1TI 1 "register_operand" "x")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_lshr<mode>3"
+  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+	(lshiftrt:SSEMODE248
+	  (match_operand:SSEMODE248 1 "register_operand" "x")
+	  (match_operand:SI 2 "nonmemory_operand" "xN")))]
+  "TARGET_AVX"
+  "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_lshrv1ti3"
+  [(set (match_operand:V1TI 0 "register_operand" "=x")
+ 	(lshiftrt:V1TI
+	 (match_operand:V1TI 1 "register_operand" "0")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "psrldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "atom_unit" "sishuf")
+   (set_attr "mode" "TI")])
+
+(define_insn "lshr<mode>3"
+  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+	(lshiftrt:SSEMODE248
+	  (match_operand:SSEMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "xN")))]
+  "TARGET_SSE2"
+  "psrl<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_ashlv1ti3"
+  [(set (match_operand:V1TI 0 "register_operand" "=x")
+	(ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
+		     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_ashl<mode>3"
+  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+	(ashift:SSEMODE248
+	  (match_operand:SSEMODE248 1 "register_operand" "x")
+	  (match_operand:SI 2 "nonmemory_operand" "xN")))]
+  "TARGET_AVX"
+  "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_ashlv1ti3"
+  [(set (match_operand:V1TI 0 "register_operand" "=x")
+	(ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
+		     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "pslldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+	(ashift:SSEMODE248
+	  (match_operand:SSEMODE248 1 "register_operand" "0")
+	  (match_operand:SI 2 "nonmemory_operand" "xN")))]
+  "TARGET_SSE2"
+  "psll<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand" "")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+        (ashift:V1TI
+	 (match_operand:SSEMODEI 1 "register_operand" "")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_lowpart (V1TImode, operands[0]);
+  operands[1] = gen_lowpart (V1TImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+        (lshiftrt:V1TI
+	 (match_operand:SSEMODEI 1 "register_operand" "")
+	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_lowpart (V1TImode, operands[0]);
+  operands[1] = gen_lowpart (V1TImode, operands[1]);
+})
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(umaxmin:SSEMODE124
+	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
+	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set (attr "prefix_extra")
+     (if_then_else (match_operand:V16QI 0 "" "")
+       (const_string "0")
+       (const_string "1")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "<code>v16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+	(umaxmin:V16QI
+	  (match_operand:V16QI 1 "nonimmediate_operand" "")
+	  (match_operand:V16QI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
+
+(define_insn "*<code>v16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(umaxmin:V16QI
+	  (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
+  "p<maxmin_int>b\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(smaxmin:SSEMODE124
+	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
+	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set (attr "prefix_extra")
+     (if_then_else (match_operand:V8HI 0 "" "")
+       (const_string "0")
+       (const_string "1")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "<code>v8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(smaxmin:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
+
+(define_insn "*<code>v8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(smaxmin:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
+  "p<maxmin_int>w\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "umaxv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(umax:V8HI (match_operand:V8HI 1 "register_operand" "")
+		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
+  else
+    {
+      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
+      if (rtx_equal_p (op3, op2))
+	op3 = gen_reg_rtx (V8HImode);
+      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
+      emit_insn (gen_addv8hi3 (op0, op3, op2));
+      DONE;
+    }
+})
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:SSEMODE14 0 "register_operand" "")
+	(smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+			(match_operand:SSEMODE14 2 "register_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      xops[0] = operands[0];
+      xops[1] = operands[1];
+      xops[2] = operands[2];
+      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
+(define_insn "*sse4_1_<code><mode>3"
+  [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
+	(smaxmin:SSEMODE14
+	  (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "smaxv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(smax:V2DI (match_operand:V2DI 1 "register_operand" "")
+		   (match_operand:V2DI 2 "register_operand" "")))]
+  "TARGET_SSE4_2"
+{
+  rtx xops[6];
+  bool ok;
+
+  xops[0] = operands[0];
+  xops[1] = operands[1];
+  xops[2] = operands[2];
+  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+  xops[4] = operands[1];
+  xops[5] = operands[2];
+  ok = ix86_expand_int_vcond (xops);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "umaxv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
+		   (match_operand:V4SI 2 "register_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      xops[0] = operands[0];
+      xops[1] = operands[1];
+      xops[2] = operands[2];
+      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
+(define_insn "*sse4_1_<code><mode>3"
+  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+	(umaxmin:SSEMODE24
+	  (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "umaxv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(umax:V2DI (match_operand:V2DI 1 "register_operand" "")
+		   (match_operand:V2DI 2 "register_operand" "")))]
+  "TARGET_SSE4_2"
+{
+  rtx xops[6];
+  bool ok;
+
+  xops[0] = operands[0];
+  xops[1] = operands[1];
+  xops[2] = operands[2];
+  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+  xops[4] = operands[1];
+  xops[5] = operands[2];
+  ok = ix86_expand_int_vcond (xops);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:SSEMODE14 0 "register_operand" "")
+	(smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+			(match_operand:SSEMODE14 2 "register_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      xops[0] = operands[0];
+      xops[1] = operands[2];
+      xops[2] = operands[1];
+      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
+(define_expand "sminv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(smin:V2DI (match_operand:V2DI 1 "register_operand" "")
+		   (match_operand:V2DI 2 "register_operand" "")))]
+  "TARGET_SSE4_2"
+{
+  rtx xops[6];
+  bool ok;
+
+  xops[0] = operands[0];
+  xops[1] = operands[2];
+  xops[2] = operands[1];
+  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+  xops[4] = operands[1];
+  xops[5] = operands[2];
+  ok = ix86_expand_int_vcond (xops);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "umin<mode>3"
+  [(set (match_operand:SSEMODE24 0 "register_operand" "")
+	(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
+			(match_operand:SSEMODE24 2 "register_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      xops[0] = operands[0];
+      xops[1] = operands[2];
+      xops[2] = operands[1];
+      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
+(define_expand "uminv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(umin:V2DI (match_operand:V2DI 1 "register_operand" "")
+		   (match_operand:V2DI 2 "register_operand" "")))]
+  "TARGET_SSE4_2"
+{
+  rtx xops[6];
+  bool ok;
+
+  xops[0] = operands[0];
+  xops[1] = operands[2];
+  xops[2] = operands[1];
+  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+  xops[4] = operands[1];
+  xops[5] = operands[2];
+  ok = ix86_expand_int_vcond (xops);
+  gcc_assert (ok);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse2_eq<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "")
+	(eq:SSEMODE124
+	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2 && !TARGET_XOP "
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*avx_eq<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(eq:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set (attr "prefix_extra")
+     (if_then_else (match_operand:V2DI 0 "" "")
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_eq<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(eq:SSEMODE124
+	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && !TARGET_XOP
+   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse4_1_eqv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "")
+	(eq:V2DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "")
+	  (match_operand:V2DI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE4_1"
+  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
+
+(define_insn "*sse4_1_eqv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(eq:V2DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
+  "pcmpeqq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_gt<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(gt:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "register_operand" "x")
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set (attr "prefix_extra")
+     (if_then_else (match_operand:V2DI 0 "" "")
+       (const_string "1")
+       (const_string "*")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_gt<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(gt:SSEMODE124
+	  (match_operand:SSEMODE124 1 "register_operand" "0")
+	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && !TARGET_XOP"
+  "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_gtv2di3"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(gt:V2DI
+	  (match_operand:V2DI 1 "register_operand" "0")
+	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE4_2"
+  "pcmpgtq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "vcond<mode>"
+  [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
+        (if_then_else:SSEMODE124C8
+          (match_operator 3 ""
+            [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
+             (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
+          (match_operand:SSEMODE124C8 1 "general_operand" "")
+          (match_operand:SSEMODE124C8 2 "general_operand" "")))]
+  "TARGET_SSE2"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcondu<mode>"
+  [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
+        (if_then_else:SSEMODE124C8
+          (match_operator 3 ""
+            [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
+             (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
+          (match_operand:SSEMODE124C8 1 "general_operand" "")
+          (match_operand:SSEMODE124C8 2 "general_operand" "")))]
+  "TARGET_SSE2"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel bitwise logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+	(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+		      (match_dup 2)))]
+  "TARGET_SSE2"
+{
+  int i, n = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n);
+
+  for (i = 0; i < n; ++i)
+    RTVEC_ELT (v, i) = constm1_rtx;
+
+  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
+})
+
+(define_insn "*avx_andnot<mode>3"
+  [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+	(and:AVX256MODEI
+	  (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
+          (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vandnps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecpsmode>")])
+
+(define_insn "*sse_andnot<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+	(and:SSEMODEI
+	  (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
+          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "(TARGET_SSE && !TARGET_SSE2)"
+  "andnps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_andnot<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+	(and:SSEMODEI
+	  (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_andnot<mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+	(and:SSEMODEI
+	  (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "pandn\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*andnottf3"
+  [(set (match_operand:TF 0 "register_operand" "=x")
+	(and:TF
+	  (not:TF (match_operand:TF 1 "register_operand" "0"))
+	  (match_operand:TF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2"
+  "pandn\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "")
+	(any_logic:SSEMODEI
+	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+        (any_logic:AVX256MODEI
+          (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
+          (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecpsmode>")])
+
+(define_insn "*sse_<code><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+        (any_logic:SSEMODEI
+          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "(TARGET_SSE && !TARGET_SSE2)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<logic>ps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+        (any_logic:SSEMODEI
+          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
+          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_<code><mode>3"
+  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+	(any_logic:SSEMODEI
+	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "p<logic>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "<code>tf3"
+  [(set (match_operand:TF 0 "register_operand" "")
+	(any_logic:TF
+	  (match_operand:TF 1 "nonimmediate_operand" "")
+	  (match_operand:TF 2 "nonimmediate_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
+
+(define_insn "*<code>tf3"
+  [(set (match_operand:TF 0 "register_operand" "=x")
+	(any_logic:TF
+	  (match_operand:TF 1 "nonimmediate_operand" "%0")
+	  (match_operand:TF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
+  "p<logic>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "vec_pack_trunc_v8hi"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")
+   (match_operand:V8HI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1 = gen_lowpart (V16QImode, operands[1]);
+  rtx op2 = gen_lowpart (V16QImode, operands[2]);
+  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_v4si"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")
+   (match_operand:V4SI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1 = gen_lowpart (V8HImode, operands[1]);
+  rtx op2 = gen_lowpart (V8HImode, operands[2]);
+  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_v2di"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V2DI 1 "register_operand" "")
+   (match_operand:V2DI 2 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  rtx op1 = gen_lowpart (V4SImode, operands[1]);
+  rtx op2 = gen_lowpart (V4SImode, operands[2]);
+  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
+  DONE;
+})
+
+(define_insn "*avx_packsswb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_concat:V16QI
+	  (ss_truncate:V8QI
+	    (match_operand:V8HI 1 "register_operand" "x"))
+	  (ss_truncate:V8QI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_AVX"
+  "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_packsswb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_concat:V16QI
+	  (ss_truncate:V8QI
+	    (match_operand:V8HI 1 "register_operand" "0"))
+	  (ss_truncate:V8QI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE2"
+  "packsswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_packssdw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (ss_truncate:V4HI
+	    (match_operand:V4SI 1 "register_operand" "x"))
+	  (ss_truncate:V4HI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_AVX"
+  "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_packssdw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (ss_truncate:V4HI
+	    (match_operand:V4SI 1 "register_operand" "0"))
+	  (ss_truncate:V4HI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE2"
+  "packssdw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_packuswb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_concat:V16QI
+	  (us_truncate:V8QI
+	    (match_operand:V8HI 1 "register_operand" "x"))
+	  (us_truncate:V8QI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_AVX"
+  "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_packuswb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_concat:V16QI
+	  (us_truncate:V8QI
+	    (match_operand:V8HI 1 "register_operand" "0"))
+	  (us_truncate:V8QI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE2"
+  "packuswb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_highv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "x")
+	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 8)  (const_int 24)
+		     (const_int 9)  (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_AVX"
+  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_highv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "0")
+	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 8)  (const_int 24)
+		     (const_int 9)  (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_SSE2"
+  "punpckhbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_lowv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "x")
+	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)])))]
+  "TARGET_AVX"
+  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_lowv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "0")
+	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)])))]
+  "TARGET_SSE2"
+  "punpcklbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_highv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "x")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX"
+  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_highv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "0")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_SSE2"
+  "punpckhwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_lowv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "x")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "TARGET_AVX"
+  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_lowv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "0")
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "TARGET_SSE2"
+  "punpcklwd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_highv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_AVX"
+  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_highv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "0")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_SSE2"
+  "punpckhdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_interleave_lowv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_AVX"
+  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "vec_interleave_lowv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "0")
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "TARGET_SSE2"
+  "punpckldq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_pinsr<ssevecsize>"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(vec_merge:SSEMODE124
+	  (vec_duplicate:SSEMODE124
+	    (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
+	  (match_operand:SSEMODE124 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  if (MEM_P (operands[2]))
+    return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  else
+    return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set (attr "prefix_extra")
+     (if_then_else (match_operand:V8HI 0 "" "")
+       (const_string "0")
+       (const_string "1")))
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pinsrb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(vec_merge:V16QI
+	  (vec_duplicate:V16QI
+	    (match_operand:QI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V16QI 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
+  "TARGET_SSE4_1"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  if (MEM_P (operands[2]))
+    return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
+  else
+    return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pinsrw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_merge:V8HI
+	  (vec_duplicate:V8HI
+	    (match_operand:HI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V8HI 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  if (MEM_P (operands[2]))
+    return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+    return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; It must come before sse2_loadld since it is preferred.
+(define_insn "*sse4_1_pinsrd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V4SI 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+  "TARGET_SSE4_1"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_pinsrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_merge:V2DI
+	  (vec_duplicate:V2DI
+	    (match_operand:DI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V2DI 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+  "TARGET_AVX && TARGET_64BIT"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pinsrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(vec_merge:V2DI
+	  (vec_duplicate:V2DI
+	    (match_operand:DI 2 "nonimmediate_operand" "rm"))
+	  (match_operand:V2DI 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+  "TARGET_SSE4_1 && TARGET_64BIT"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_rex" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extend:SWI48
+	  (vec_select:QI
+	    (match_operand:V16QI 1 "register_operand" "x")
+	    (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
+  "TARGET_SSE4_1"
+  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb_memory"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(vec_select:QI
+	  (match_operand:V16QI 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
+  "TARGET_SSE4_1"
+  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pextrw_<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extend:SWI48
+	  (vec_select:HI
+	    (match_operand:V8HI 1 "register_operand" "x")
+	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
+  "TARGET_SSE2"
+  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrw_memory"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+	(vec_select:HI
+	  (match_operand:V8HI 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
+  "TARGET_SSE4_1"
+  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(vec_select:SI
+	  (match_operand:V4SI 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+  "TARGET_SSE4_1"
+  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrd_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "register_operand" "x")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+  "TARGET_64BIT && TARGET_SSE4_1"
+  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+;; It must come before *vec_extractv2di_1_sse since it is preferred.
+(define_insn "*sse4_1_pextrq"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "register_operand" "x")
+	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+  "TARGET_SSE4_1 && TARGET_64BIT"
+  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_rex" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufd"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V4SI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
+				GEN_INT ((mask >> 0) & 3),
+				GEN_INT ((mask >> 2) & 3),
+				GEN_INT ((mask >> 4) & 3),
+				GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "sse2_pshufd_1"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_select:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
+		     (match_operand 3 "const_0_to_3_operand" "")
+		     (match_operand 4 "const_0_to_3_operand" "")
+		     (match_operand 5 "const_0_to_3_operand" "")])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshuflw"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V8HI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
+				 GEN_INT ((mask >> 0) & 3),
+				 GEN_INT ((mask >> 2) & 3),
+				 GEN_INT ((mask >> 4) & 3),
+				 GEN_INT ((mask >> 6) & 3)));
+  DONE;
+})
+
+(define_insn "sse2_pshuflw_1"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
+		     (match_operand 3 "const_0_to_3_operand" "")
+		     (match_operand 4 "const_0_to_3_operand" "")
+		     (match_operand 5 "const_0_to_3_operand" "")
+		     (const_int 4)
+		     (const_int 5)
+		     (const_int 6)
+		     (const_int 7)])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufhw"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V8HI 1 "nonimmediate_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")]
+  "TARGET_SSE2"
+{
+  int mask = INTVAL (operands[2]);
+  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
+				 GEN_INT (((mask >> 0) & 3) + 4),
+				 GEN_INT (((mask >> 2) & 3) + 4),
+				 GEN_INT (((mask >> 4) & 3) + 4),
+				 GEN_INT (((mask >> 6) & 3) + 4)));
+  DONE;
+})
+
+(define_insn "sse2_pshufhw_1"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_select:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	  (parallel [(const_int 0)
+		     (const_int 1)
+		     (const_int 2)
+		     (const_int 3)
+		     (match_operand 2 "const_4_to_7_operand" "")
+		     (match_operand 3 "const_4_to_7_operand" "")
+		     (match_operand 4 "const_4_to_7_operand" "")
+		     (match_operand 5 "const_4_to_7_operand" "")])))]
+  "TARGET_SSE2"
+{
+  int mask = 0;
+  mask |= (INTVAL (operands[2]) - 4) << 0;
+  mask |= (INTVAL (operands[3]) - 4) << 2;
+  mask |= (INTVAL (operands[4]) - 4) << 4;
+  mask |= (INTVAL (operands[5]) - 4) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_loadd"
+  [(set (match_operand:V4SI 0 "register_operand" "")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 1 "nonimmediate_operand" ""))
+	  (match_dup 2)
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_loadld"
+  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
+	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "@
+   vmovd\t{%2, %0|%0, %2}
+   vmovd\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI,TI,V4SF")])
+
+(define_insn "sse2_loadld"
+  [(set (match_operand:V4SI 0 "register_operand"       "=Y2,Yi,x,x")
+	(vec_merge:V4SI
+	  (vec_duplicate:V4SI
+	    (match_operand:SI 2 "nonimmediate_operand" "m  ,r ,m,x"))
+	  (match_operand:V4SI 1 "reg_or_0_operand"     "C  ,C ,C,0")
+	  (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   movd\t{%2, %0|%0, %2}
+   movd\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}
+   movss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "TI,TI,V4SF,SF")])
+
+(define_insn_and_split "sse2_stored"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
+	(vec_select:SI
+	  (match_operand:V4SI 1 "register_operand" "x,Yi")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "#"
+  "&& reload_completed
+   && (TARGET_INTER_UNIT_MOVES
+       || MEM_P (operands [0])
+       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
+
+(define_insn_and_split "*vec_ext_v4si_mem"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(vec_select:SI
+	  (match_operand:V4SI 1 "memory_operand" "o")
+	  (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int i = INTVAL (operands[2]);
+
+  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
+  DONE;
+})
+
+(define_expand "sse_storeq"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "register_operand" "")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE")
+
+(define_insn "*sse2_storeq_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
+	  (parallel [(const_int 0)])))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   #
+   #
+   mov{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,imov")
+   (set_attr "mode" "*,*,DI")])
+
+(define_insn "*sse2_storeq"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "register_operand" "x")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "register_operand" "")
+	  (parallel [(const_int 0)])))]
+  "TARGET_SSE
+   && reload_completed
+   && (TARGET_INTER_UNIT_MOVES
+       || MEM_P (operands [0])
+       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
+
+(define_insn "*vec_extractv2di_1_rex64_avx"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_64BIT
+   && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   vmovhps\t{%1, %0|%0, %1}
+   vpsrldq\t{$8, %1, %0|%0, %1, 8}
+   vmovq\t{%H1, %0|%0, %H1}
+   mov{q}\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
+   (set_attr "length_immediate" "*,1,*,*")
+   (set_attr "memory" "*,none,*,*")
+   (set_attr "prefix" "vex,vex,vex,orig")
+   (set_attr "mode" "V2SF,TI,TI,DI")])
+
+(define_insn "*vec_extractv2di_1_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   psrldq\t{$8, %0|%0, 8}
+   movq\t{%H1, %0|%0, %H1}
+   mov{q}\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
+   (set_attr "length_immediate" "*,1,*,*")
+   (set_attr "memory" "*,none,*,*")
+   (set_attr "mode" "V2SF,TI,TI,DI")])
+
+(define_insn "*vec_extractv2di_1_avx"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 1)])))]
+  "!TARGET_64BIT
+   && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   vmovhps\t{%1, %0|%0, %1}
+   vpsrldq\t{$8, %1, %0|%0, %1, 8}
+   vmovq\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov,sseishft1,ssemov")
+   (set_attr "length_immediate" "*,1,*")
+   (set_attr "memory" "*,none,*")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2SF,TI,TI")])
+
+(define_insn "*vec_extractv2di_1_sse2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
+	  (parallel [(const_int 1)])))]
+  "!TARGET_64BIT
+   && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   psrldq\t{$8, %0|%0, 8}
+   movq\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov,sseishft1,ssemov")
+   (set_attr "length_immediate" "*,1,*")
+   (set_attr "memory" "*,none,*")
+   (set_attr "mode" "V2SF,TI,TI")])
+
+;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
+(define_insn "*vec_extractv2di_1_sse"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 1)])))]
+  "!TARGET_SSE2 && TARGET_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   movhlps\t{%1, %0|%0, %1}
+   movlps\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_dupv4si_avx"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(vec_duplicate:V4SI
+	  (match_operand:SI 1 "register_operand" "x,m")))]
+  "TARGET_AVX"
+  "@
+   vpshufd\t{$0, %1, %0|%0, %1, 0}
+   vbroadcastss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1,ssemov")
+   (set_attr "length_immediate" "1,0")
+   (set_attr "prefix_extra" "0,1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI,V4SF")])
+
+(define_insn "*vec_dupv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
+	(vec_duplicate:V4SI
+	  (match_operand:SI 1 "register_operand" " Y2,0")))]
+  "TARGET_SSE"
+  "@
+   %vpshufd\t{$0, %1, %0|%0, %1, 0}
+   shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI,V4SF")])
+
+(define_insn "*vec_dupv2di_avx"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,x")
+	(vec_duplicate:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
+  "TARGET_AVX"
+  "@
+   vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
+   vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI,DF")])
+
+(define_insn "*vec_dupv2di_sse3"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,x")
+	(vec_duplicate:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
+  "TARGET_SSE3"
+  "@
+   punpcklqdq\t%0, %0
+   movddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "mode" "TI,DF")])
+
+(define_insn "*vec_dupv2di"
+  [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
+	(vec_duplicate:V2DI
+	  (match_operand:DI 1 "register_operand" " 0 ,0")))]
+  "TARGET_SSE"
+  "@
+   punpcklqdq\t%0, %0
+   movlhps\t%0, %0"
+  [(set_attr "type" "sselog1,ssemov")
+   (set_attr "mode" "TI,V4SF")])
+
+(define_insn "*vec_concatv2si_avx"
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,x ,*y ,*y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
+	  (match_operand:SI 2 "vector_move_operand"  "rm,x,C ,*ym,C")))]
+  "TARGET_AVX"
+  "@
+   vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
+   vmovd\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,*,*,*,*")
+   (set_attr "length_immediate" "1,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "3,4")
+       (const_string "orig")
+       (const_string "vex")))
+   (set_attr "mode" "TI,TI,TI,DI,DI")])
+
+(define_insn "*vec_concatv2si_sse4_1"
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,x ,*y ,*y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
+	  (match_operand:SI 2 "vector_move_operand"  "rm,x,C ,*ym,C")))]
+  "TARGET_SSE4_1"
+  "@
+   pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "prefix_extra" "1,*,*,*,*")
+   (set_attr "length_immediate" "1,*,*,*,*")
+   (set_attr "mode" "TI,TI,TI,DI,DI")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*vec_concatv2si_sse2"
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
+	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
+  "TARGET_SSE2"
+  "@
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "mode" "TI,TI,DI,DI")])
+
+(define_insn "*vec_concatv2si_sse"
+  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
+	  (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
+  "TARGET_SSE"
+  "@
+   unpcklps\t{%2, %0|%0, %2}
+   movss\t{%1, %0|%0, %1}
+   punpckldq\t{%2, %0|%0, %2}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+   (set_attr "mode" "V4SF,V4SF,DI,DI")])
+
+(define_insn "*vec_concatv4si_1_avx"
+  [(set (match_operand:V4SI 0 "register_operand"       "=x,x")
+	(vec_concat:V4SI
+	  (match_operand:V2SI 1 "register_operand"     " x,x")
+	  (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
+  "TARGET_AVX"
+  "@
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+   vmovhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI,V2SF")])
+
+(define_insn "*vec_concatv4si_1"
+  [(set (match_operand:V4SI 0 "register_operand"       "=Y2,x,x")
+	(vec_concat:V4SI
+	  (match_operand:V2SI 1 "register_operand"     " 0 ,0,0")
+	  (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
+  "TARGET_SSE"
+  "@
+   punpcklqdq\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov")
+   (set_attr "mode" "TI,V4SF,V2SF")])
+
+(define_insn "*vec_concatv2di_avx"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x,x")
+	(vec_concat:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
+	  (match_operand:DI 2 "vector_move_operand"  " C, C,x,m")))]
+  "!TARGET_64BIT && TARGET_AVX"
+  "@
+   vmovq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+   vmovhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "1")
+       (const_string "orig")
+       (const_string "vex")))
+   (set_attr "mode" "TI,TI,TI,V2SF")])
+
+(define_insn "vec_concatv2di"
+  [(set (match_operand:V2DI 0 "register_operand"     "=Y2 ,?Y2,Y2,x,x")
+	(vec_concat:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
+	  (match_operand:DI 2 "vector_move_operand"  " C  ,  C,Y2,x,m")))]
+  "!TARGET_64BIT && TARGET_SSE"
+  "@
+   movq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   punpcklqdq\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
+   (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
+
+(define_insn "*vec_concatv2di_rex64_avx"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,Yi,!x,x,x")
+	(vec_concat:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
+	  (match_operand:DI 2 "vector_move_operand"  "rm,C,C ,C ,x,m")))]
+  "TARGET_64BIT && TARGET_AVX"
+  "@
+   vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+   vmovq\t{%1, %0|%0, %1}
+   vmovq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+   vmovhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
+   (set_attr "prefix_extra" "1,*,*,*,*,*")
+   (set_attr "length_immediate" "1,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "3")
+       (const_string "orig")
+       (const_string "vex")))
+   (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
+
+;; movd instead of movq is required to handle broken assemblers.
+(define_insn "*vec_concatv2di_rex64_sse4_1"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x ,x ,Yi,!x,x,x,x")
+	(vec_concat:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
+	  (match_operand:DI 2 "vector_move_operand"  " rm,C ,C ,C ,x,x,m")))]
+  "TARGET_64BIT && TARGET_SSE4_1"
+  "@
+   pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
+   movq\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   punpcklqdq\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+   (set_attr "prefix_rex" "1,*,1,*,*,*,*")
+   (set_attr "prefix_extra" "1,*,*,*,*,*,*")
+   (set_attr "length_immediate" "1,*,*,*,*,*,*")
+   (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
+
+;; movd instead of movq is required to handle broken assemblers.
+(define_insn "*vec_concatv2di_rex64_sse"
+  [(set (match_operand:V2DI 0 "register_operand"     "=Y2 ,Yi,!Y2,Y2,x,x")
+	(vec_concat:V2DI
+	  (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
+	  (match_operand:DI 2 "vector_move_operand"  " C  ,C ,C  ,Y2,x,m")))]
+  "TARGET_64BIT && TARGET_SSE"
+  "@
+   movq\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   punpcklqdq\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   movhps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+   (set_attr "prefix_rex" "*,1,*,*,*,*")
+   (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
+
+(define_expand "vec_unpacku_hi_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
+  DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse2_uavgv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+	(truncate:V16QI
+	  (lshiftrt:V16HI
+	    (plus:V16HI
+	      (plus:V16HI
+		(zero_extend:V16HI
+		  (match_operand:V16QI 1 "nonimmediate_operand" ""))
+		(zero_extend:V16HI
+		  (match_operand:V16QI 2 "nonimmediate_operand" "")))
+	      (const_vector:V16QI [(const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
+
+(define_insn "*avx_uavgv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(truncate:V16QI
+	  (lshiftrt:V16HI
+	    (plus:V16HI
+	      (plus:V16HI
+		(zero_extend:V16HI
+		  (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
+		(zero_extend:V16HI
+		  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+	      (const_vector:V16QI [(const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+  "vpavgb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_uavgv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(truncate:V16QI
+	  (lshiftrt:V16HI
+	    (plus:V16HI
+	      (plus:V16HI
+		(zero_extend:V16HI
+		  (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
+		(zero_extend:V16HI
+		  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+	      (const_vector:V16QI [(const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)
+				   (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+  "pavgb\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "sse2_uavgv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (plus:V8SI
+	      (plus:V8SI
+		(zero_extend:V8SI
+		  (match_operand:V8HI 1 "nonimmediate_operand" ""))
+		(zero_extend:V8SI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE2"
+  "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
+
+(define_insn "*avx_uavgv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (plus:V8SI
+	      (plus:V8SI
+		(zero_extend:V8SI
+		  (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+		(zero_extend:V8SI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+  "vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_uavgv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (plus:V8SI
+	      (plus:V8SI
+		(zero_extend:V8SI
+		  (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+		(zero_extend:V8SI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+  "pavgw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+;; The correct representation for this is absolutely enormous, and
+;; surely not generally useful.
+(define_insn "*avx_psadbw"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
+		      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_PSADBW))]
+  "TARGET_AVX"
+  "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse2_psadbw"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
+		      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+		     UNSPEC_PSADBW))]
+  "TARGET_SSE2"
+  "psadbw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx_movmsk<ssemodesuffix>256"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_movmsk<ssemodesuffix>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+	  UNSPEC_MOVMSK))]
+  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse2_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
+		   UNSPEC_MOVMSK))]
+  "TARGET_SSE2"
+  "%vpmovmskb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_expand "sse2_maskmovdqu"
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
+		       (match_operand:V16QI 2 "register_operand" "")
+		       (match_dup 0)]
+		      UNSPEC_MASKMOV))]
+  "TARGET_SSE2")
+
+(define_insn "*sse2_maskmovdqu"
+  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+		       (match_operand:V16QI 2 "register_operand" "x")
+		       (mem:V16QI (match_dup 0))]
+		      UNSPEC_MASKMOV))]
+  "TARGET_SSE2 && !TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "%vmaskmovdqu\t{%2, %1|%1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   ;; The implicit %rdi operand confuses default length_vex computation.
+   (set_attr "length_vex" "3")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*sse2_maskmovdqu_rex64"
+  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+		       (match_operand:V16QI 2 "register_operand" "x")
+		       (mem:V16QI (match_dup 0))]
+		      UNSPEC_MASKMOV))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "%vmaskmovdqu\t{%2, %1|%1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   ;; The implicit %rdi operand confuses default length_vex computation.
+   (set (attr "length_vex")
+     (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse_ldmxcsr"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
+		    UNSPECV_LDMXCSR)]
+  "TARGET_SSE"
+  "%vldmxcsr\t%0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "load")])
+
+(define_insn "sse_stmxcsr"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
+  "TARGET_SSE"
+  "%vstmxcsr\t%0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "mxcsr")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "store")])
+
+(define_expand "sse_sfence"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse_sfence"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+  "sfence"
+  [(set_attr "type" "sse")
+   (set_attr "length_address" "0")
+   (set_attr "atom_sse_attr" "fence")
+   (set_attr "memory" "unknown")])
+
+(define_insn "sse2_clflush"
+  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
+		    UNSPECV_CLFLUSH)]
+  "TARGET_SSE2"
+  "clflush\t%a0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "fence")
+   (set_attr "memory" "unknown")])
+
+(define_expand "sse2_mfence"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_mfence"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  "TARGET_64BIT || TARGET_SSE2"
+  "mfence"
+  [(set_attr "type" "sse")
+   (set_attr "length_address" "0")
+   (set_attr "atom_sse_attr" "fence")
+   (set_attr "memory" "unknown")])
+
+(define_expand "sse2_lfence"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+  "TARGET_SSE2"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_lfence"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+  "TARGET_SSE2"
+  "lfence"
+  [(set_attr "type" "sse")
+   (set_attr "length_address" "0")
+   (set_attr "atom_sse_attr" "lfence")
+   (set_attr "memory" "unknown")])
+
+(define_insn "sse3_mwait"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")]
+		    UNSPECV_MWAIT)]
+  "TARGET_SSE3"
+;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
+;; Since 32bit register operands are implicitly zero extended to 64bit,
+;; we only need to set up 32bit registers.
+  "mwait"
+  [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")
+		     (match_operand:SI 2 "register_operand" "d")]
+		    UNSPECV_MONITOR)]
+  "TARGET_SSE3 && !TARGET_64BIT"
+  "monitor\t%0, %1, %2"
+  [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor64"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
+		     (match_operand:SI 1 "register_operand" "c")
+		     (match_operand:SI 2 "register_operand" "d")]
+		    UNSPECV_MONITOR)]
+  "TARGET_SSE3 && TARGET_64BIT"
+;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
+;; RCX and RDX are used.  Since 32bit register operands are implicitly
+;; zero extended to 64bit, we only need to set up 32bit registers.
+  "monitor"
+  [(set_attr "length" "3")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSSE3 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "*avx_phaddwv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX"
+  "vphaddw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddwv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "0")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_SSSE3"
+  "phaddw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (vec_concat:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (plus:HI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2HI
+	    (plus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	    (plus:HI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "phaddw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_phadddv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (vec_concat:V2SI
+	    (plus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 1 "register_operand" "x")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	    (plus:SI
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SI
+	    (plus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+	    (plus:SI
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_AVX"
+  "vphaddd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phadddv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (vec_concat:V2SI
+	    (plus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	    (plus:SI
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SI
+	    (plus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+	    (plus:SI
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "phaddd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phadddv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_concat:V2SI
+	  (plus:SI
+	    (vec_select:SI
+	      (match_operand:V2SI 1 "register_operand" "0")
+	      (parallel [(const_int 0)]))
+	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	  (plus:SI
+	    (vec_select:SI
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int 0)]))
+	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSSE3"
+  "phaddd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_phaddswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX"
+  "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "0")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (ss_plus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_SSSE3"
+  "phaddsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (vec_concat:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (ss_plus:HI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2HI
+	    (ss_plus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	    (ss_plus:HI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "phaddsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_phsubwv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX"
+  "vphsubw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubwv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "0")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_SSSE3"
+  "phsubw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (vec_concat:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (minus:HI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2HI
+	    (minus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	    (minus:HI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "phsubw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_phsubdv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (vec_concat:V2SI
+	    (minus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 1 "register_operand" "x")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	    (minus:SI
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SI
+	    (minus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+	    (minus:SI
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_AVX"
+  "vphsubd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubdv4si3"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(vec_concat:V4SI
+	  (vec_concat:V2SI
+	    (minus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	    (minus:SI
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2SI
+	    (minus:SI
+	      (vec_select:SI
+		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+	    (minus:SI
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "phsubd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubdv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+	(vec_concat:V2SI
+	  (minus:SI
+	    (vec_select:SI
+	      (match_operand:V2SI 1 "register_operand" "0")
+	      (parallel [(const_int 0)]))
+	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+	  (minus:SI
+	    (vec_select:SI
+	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+	      (parallel [(const_int 0)]))
+	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_SSSE3"
+  "phsubd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_phsubswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "x")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_AVX"
+  "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 1 "register_operand" "0")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+	  (vec_concat:V4HI
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI
+		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		  (parallel [(const_int 0)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	    (vec_concat:V2HI
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+	      (ss_minus:HI
+		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+  "TARGET_SSSE3"
+  "phsubsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(vec_concat:V4HI
+	  (vec_concat:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 1 "register_operand" "0")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	    (ss_minus:HI
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+	  (vec_concat:V2HI
+	    (ss_minus:HI
+	      (vec_select:HI
+		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+	    (ss_minus:HI
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+  "TARGET_SSSE3"
+  "phsubsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_pmaddubsw128"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(ss_plus:V8HI
+	  (mult:V8HI
+	    (zero_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 1 "register_operand" "x")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)
+			   (const_int 8)
+			   (const_int 10)
+			   (const_int 12)
+			   (const_int 14)])))
+	    (sign_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)
+			   (const_int 8)
+			   (const_int 10)
+			   (const_int 12)
+			   (const_int 14)]))))
+	  (mult:V8HI
+	    (zero_extend:V8HI
+	      (vec_select:V8QI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)
+			   (const_int 9)
+			   (const_int 11)
+			   (const_int 13)
+			   (const_int 15)])))
+	    (sign_extend:V8HI
+	      (vec_select:V8QI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)
+			   (const_int 9)
+			   (const_int 11)
+			   (const_int 13)
+			   (const_int 15)]))))))]
+  "TARGET_AVX"
+  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmaddubsw128"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(ss_plus:V8HI
+	  (mult:V8HI
+	    (zero_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 1 "register_operand" "0")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)
+			   (const_int 8)
+			   (const_int 10)
+			   (const_int 12)
+			   (const_int 14)])))
+	    (sign_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)
+			   (const_int 8)
+			   (const_int 10)
+			   (const_int 12)
+			   (const_int 14)]))))
+	  (mult:V8HI
+	    (zero_extend:V8HI
+	      (vec_select:V8QI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)
+			   (const_int 9)
+			   (const_int 11)
+			   (const_int 13)
+			   (const_int 15)])))
+	    (sign_extend:V8HI
+	      (vec_select:V8QI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)
+			   (const_int 9)
+			   (const_int 11)
+			   (const_int 13)
+			   (const_int 15)]))))))]
+  "TARGET_SSSE3"
+  "pmaddubsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmaddubsw"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(ss_plus:V4HI
+	  (mult:V4HI
+	    (zero_extend:V4HI
+	      (vec_select:V4QI
+		(match_operand:V8QI 1 "register_operand" "0")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)])))
+	    (sign_extend:V4HI
+	      (vec_select:V4QI
+		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)]))))
+	  (mult:V4HI
+	    (zero_extend:V4HI
+	      (vec_select:V4QI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)])))
+	    (sign_extend:V4HI
+	      (vec_select:V4QI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)]))))))]
+  "TARGET_SSSE3"
+  "pmaddubsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "atom_unit" "simul")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_expand "ssse3_pmulhrswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (plus:V8SI
+	      (lshiftrt:V8SI
+		(mult:V8SI
+		  (sign_extend:V8SI
+		    (match_operand:V8HI 1 "nonimmediate_operand" ""))
+		  (sign_extend:V8SI
+		    (match_operand:V8HI 2 "nonimmediate_operand" "")))
+		(const_int 14))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSSE3"
+  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_pmulhrswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (plus:V8SI
+	      (lshiftrt:V8SI
+		(mult:V8SI
+		  (sign_extend:V8SI
+		    (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+		  (sign_extend:V8SI
+		    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+		(const_int 14))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*ssse3_pmulhrswv8hi3"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(truncate:V8HI
+	  (lshiftrt:V8SI
+	    (plus:V8SI
+	      (lshiftrt:V8SI
+		(mult:V8SI
+		  (sign_extend:V8SI
+		    (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+		  (sign_extend:V8SI
+		    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+		(const_int 14))
+	      (const_vector:V8HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "pmulhrsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "ssse3_pmulhrswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (lshiftrt:V4SI
+		(mult:V4SI
+		  (sign_extend:V4SI
+		    (match_operand:V4HI 1 "nonimmediate_operand" ""))
+		  (sign_extend:V4SI
+		    (match_operand:V4HI 2 "nonimmediate_operand" "")))
+		(const_int 14))
+	      (const_vector:V4HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSSE3"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*ssse3_pmulhrswv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (plus:V4SI
+	      (lshiftrt:V4SI
+		(mult:V4SI
+		  (sign_extend:V4SI
+		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+		  (sign_extend:V4SI
+		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+		(const_int 14))
+	      (const_vector:V4HI [(const_int 1) (const_int 1)
+				  (const_int 1) (const_int 1)]))
+	    (const_int 1))))]
+  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "pmulhrsw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_pshufbv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_PSHUFB))]
+  "TARGET_AVX"
+  "vpshufb\t{%2, %1, %0|%0, %1, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pshufbv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_PSHUFB))]
+  "TARGET_SSSE3"
+  "pshufb\t{%2, %0|%0, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
+		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+		     UNSPEC_PSHUFB))]
+  "TARGET_SSSE3"
+  "pshufb\t{%2, %0|%0, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_psign<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(unspec:SSEMODE124
+	  [(match_operand:SSEMODE124 1 "register_operand" "x")
+	   (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_PSIGN))]
+  "TARGET_AVX"
+  "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_psign<mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(unspec:SSEMODE124
+	  [(match_operand:SSEMODE124 1 "register_operand" "0")
+	   (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_PSIGN))]
+  "TARGET_SSSE3"
+  "psign<ssevecsize>\t{%2, %0|%0, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_psign<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+	(unspec:MMXMODEI
+	  [(match_operand:MMXMODEI 1 "register_operand" "0")
+	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+	  UNSPEC_PSIGN))]
+  "TARGET_SSSE3"
+  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "*avx_palignrti"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+	(unspec:TI [(match_operand:TI 1 "register_operand" "x")
+		    (match_operand:TI 2 "nonimmediate_operand" "xm")
+		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+		   UNSPEC_PALIGNR))]
+  "TARGET_AVX"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_palignrti"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+	(unspec:TI [(match_operand:TI 1 "register_operand" "0")
+		    (match_operand:TI 2 "nonimmediate_operand" "xm")
+		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+		   UNSPEC_PALIGNR))]
+  "TARGET_SSSE3"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:DI 2 "nonimmediate_operand" "ym")
+		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+		   UNSPEC_PALIGNR))]
+  "TARGET_SSSE3"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "atom_unit" "sishuf")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
+  "TARGET_SSSE3"
+  "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+	(abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+  "TARGET_SSSE3"
+  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "1")
+   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
+   (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4a_movnt<mode>"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "x")]
+          UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4a_vmmovnt<mode>"
+  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+	(unspec:<ssescalarmode>
+	  [(vec_select:<ssescalarmode>
+	     (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "sse4a_extrqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+                      (match_operand 2 "const_0_to_255_operand" "")
+                      (match_operand 3 "const_0_to_255_operand" "")]
+                     UNSPEC_EXTRQI))]
+  "TARGET_SSE4A"
+  "extrq\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+                      (match_operand:V16QI 2 "register_operand" "x")]
+                     UNSPEC_EXTRQ))]
+  "TARGET_SSE4A"
+  "extrq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+        	      (match_operand:V2DI 2 "register_operand" "x")
+                      (match_operand 3 "const_0_to_255_operand" "")
+                      (match_operand 4 "const_0_to_255_operand" "")]
+                     UNSPEC_INSERTQI))]
+  "TARGET_SSE4A"
+  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+  [(set_attr "type" "sseins")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "length_immediate" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+        	      (match_operand:V2DI 2 "register_operand" "x")]
+        	     UNSPEC_INSERTQ))]
+  "TARGET_SSE4A"
+  "insertq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseins")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.1 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(vec_merge:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+	  (match_operand:AVXMODEF2P 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
+  "TARGET_AVX"
+  "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:AVXMODEF2P 3 "register_operand" "x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_AVX"
+  "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse4_1_blend<ssemodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
+  "TARGET_SSE4_1"
+  "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_blendv<ssemodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
+	   (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
+	   (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+  "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
+	  UNSPEC_DP))]
+  "TARGET_AVX"
+  "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemul")
+   (set_attr "prefix" "vex")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse4_1_dp<ssemodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+	   (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
+	  UNSPEC_DP))]
+  "TARGET_SSE4_1"
+  "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssemul")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_movntdqa"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
+		     UNSPEC_MOVNTDQA))]
+  "TARGET_SSE4_1"
+  "%vmovntdqa\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_mpsadbw"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+		       (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		      UNSPEC_MPSADBW))]
+  "TARGET_AVX"
+  "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "vex")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_mpsadbw"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+		       (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		      UNSPEC_MPSADBW))]
+  "TARGET_SSE4_1"
+  "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_packusdw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (us_truncate:V4HI
+	    (match_operand:V4SI 1 "register_operand" "x"))
+	  (us_truncate:V4HI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_AVX"
+  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_packusdw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (us_truncate:V4HI
+	    (match_operand:V4SI 1 "register_operand" "0"))
+	  (us_truncate:V4HI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+  "TARGET_SSE4_1"
+  "packusdw\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_pblendvb"
+  [(set (match_operand:V16QI 0 "register_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand"  "x")
+		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+		       (match_operand:V16QI 3 "register_operand" "x")]
+		      UNSPEC_BLENDV))]
+  "TARGET_AVX"
+  "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendvb"
+  [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
+	(unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand"  "0")
+		       (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
+		       (match_operand:V16QI 3 "register_operand" "Yz")]
+		      UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+  "pblendvb\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_pblendw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_merge:V8HI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	  (match_operand:V8HI 1 "register_operand" "x")
+	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+  "TARGET_AVX"
+  "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_merge:V8HI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	  (match_operand:V8HI 1 "register_operand" "0")
+	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+  "TARGET_SSE4_1"
+  "pblendw\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_phminposuw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_PHMINPOSUW))]
+  "TARGET_SSE4_1"
+  "%vphminposuw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v8qiv8hi2"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(any_extend:V8HI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 1)
+		       (const_int 2)
+		       (const_int 3)
+		       (const_int 4)
+		       (const_int 5)
+		       (const_int 6)
+		       (const_int 7)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v4qiv4si2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(any_extend:V4SI
+	  (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 1)
+		       (const_int 2)
+		       (const_int 3)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v4hiv4si2"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(any_extend:V4SI
+	  (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 1)
+		       (const_int 2)
+		       (const_int 3)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v2qiv2di2"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(any_extend:V2DI
+	  (vec_select:V2QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v2hiv2di2"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(any_extend:V2DI
+	  (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_<code>v2siv2di2"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(any_extend:V2DI
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 1)]))))]
+  "TARGET_SSE4_1"
+  "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+;; ptestps/ptestpd are very similar to comiss and ucomiss when
+;; setting FLAGS_REG. But it is not a really compare instruction.
+(define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
+		    (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_VTESTP))]
+  "TARGET_AVX"
+  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
+;; But it is not a really compare instruction.
+(define_insn "avx_ptest256"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
+		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_PTEST))]
+  "TARGET_AVX"
+  "vptest\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_ptest"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
+		    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+		   UNSPEC_PTEST))]
+  "TARGET_SSE4_1"
+  "%vptest\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx_round<ssemodesuffix>256"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+	(unspec:AVX256MODEF2P
+	  [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
+	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
+	  UNSPEC_ROUND))]
+  "TARGET_AVX"
+  "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_round<ssemodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(unspec:SSEMODEF2P
+	  [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
+	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
+	  UNSPEC_ROUND))]
+  "TARGET_ROUND"
+  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_round<ssescalarmodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	    [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+	     (match_operand:SI 3 "const_0_to_15_operand" "n")]
+	    UNSPEC_ROUND)
+	  (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	  (const_int 1)))]
+  "TARGET_AVX"
+  "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_round<ssescalarmodesuffix>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	    [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+	     (match_operand:SI 3 "const_0_to_15_operand" "n")]
+	    UNSPEC_ROUND)
+	  (match_operand:SSEMODEF2P 1 "register_operand" "0")
+	  (const_int 1)))]
+  "TARGET_ROUND"
+  "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.2 string/text processing instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "sse4_2_pcmpestr"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
+	   (match_operand:SI 3 "register_operand" "a,a")
+	   (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
+	   (match_operand:SI 5 "register_operand" "d,d")
+	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPESTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)
+	   (match_dup 6)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+				     operands[3], operands[4],
+				     operands[5], operands[6]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+				     operands[3], operands[4],
+				     operands[5], operands[6]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
+					   operands[2], operands[3],
+					   operands[4], operands[5],
+					   operands[6]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestri"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:SI 2 "register_operand" "a,a")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 4 "register_operand" "d,d")
+	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestrm"
+  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:SI 2 "register_operand" "a,a")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 4 "register_operand" "d,d")
+	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPESTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)
+	   (match_dup 5)]
+	  UNSPEC_PCMPESTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestr_cconly"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+	   (match_operand:SI 3 "register_operand" "a,a,a,a")
+	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:SI 5 "register_operand" "d,d,d,d")
+	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
+	  UNSPEC_PCMPESTR))
+   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
+  "TARGET_SSE4_2"
+  "@
+   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
+   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load,none,load")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn_and_split "sse4_2_pcmpistr"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
+	   (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
+	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPISTR))
+   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+  if (ecx)
+    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+				     operands[3], operands[4]));
+  if (xmm0)
+    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+				     operands[3], operands[4]));
+  if (flags && !(ecx || xmm0))
+    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
+					   operands[2], operands[3],
+					   operands[4]));
+  if (!(flags || ecx || xmm0))
+    emit_note (NOTE_INSN_DELETED);
+
+  DONE;
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistri"
+  [(set (match_operand:SI 0 "register_operand" "=c,c")
+	(unspec:SI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistrm"
+  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+	  UNSPEC_PCMPISTR))
+   (set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMPISTR))]
+  "TARGET_SSE4_2"
+  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "memory" "none,load")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistr_cconly"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
+	  UNSPEC_PCMPISTR))
+   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
+  "TARGET_SSE4_2"
+  "@
+   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
+   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,load,none,load")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; XOP instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; XOP parallel integer multiply/add instructions.
+;; Note the XOP multiply/add instructions
+;;     a[i] = b[i] * c[i] + d[i];
+;; do not allow the value being added to be a memory operation.
+(define_insn "xop_pmacsww"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+        (plus:V8HI
+	 (mult:V8HI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssww"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+        (ss_plus:V8HI
+	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+		    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+        (plus:V4SI
+	 (mult:V4SI
+	  (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	  (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+        (ss_plus:V4SI
+	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+		    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(ss_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (vec_select:V2SI
+	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 1)
+		      (const_int 3)])))
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacssdqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(ss_plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdql"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add.  In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as operands 1/2
+(define_insn_and_split "xop_mulv2div2di3_low"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "register_operand" "%x")
+	      (parallel [(const_int 1)
+			 (const_int 3)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 1)
+			 (const_int 3)])))))]
+  "TARGET_XOP"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 3)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 1)
+		       (const_int 3)]))))
+	 (match_dup 0)))]
+{
+  operands[3] = CONST0_RTX (V2DImode);
+}
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacsdqh"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
+;; fake it with a multiply/add.  In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as either operands[1] or operands[2]
+(define_insn_and_split "xop_mulv2div2di3_high"
+  [(set (match_operand:V2DI 0 "register_operand" "=&x")
+	(mult:V2DI
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "register_operand" "%x")
+	      (parallel [(const_int 0)
+			 (const_int 2)])))
+	  (sign_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+	      (parallel [(const_int 0)
+			 (const_int 2)])))))]
+  "TARGET_XOP"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(match_dup 3))
+   (set (match_dup 0)
+	(plus:V2DI
+	 (mult:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 1)
+	    (parallel [(const_int 0)
+		       (const_int 2)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2SI
+	    (match_dup 2)
+	    (parallel [(const_int 0)
+		       (const_int 2)]))))
+	 (match_dup 0)))]
+{
+  operands[3] = CONST0_RTX (V2DImode);
+}
+  [(set_attr "type" "ssemul")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel integer multiply/add instructions for the intrinisics
+(define_insn "xop_pmacsswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(ss_plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)]))))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmacswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (mult:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4HI
+	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 1)
+		       (const_int 3)
+		       (const_int 5)
+		       (const_int 7)]))))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcsswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(ss_plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pmadcswd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 2)
+			(const_int 4)
+			(const_int 6)]))))
+	  (mult:V4SI
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))
+	   (sign_extend:V4SI
+	    (vec_select:V4HI
+	     (match_dup 2)
+	     (parallel [(const_int 1)
+			(const_int 3)
+			(const_int 5)
+			(const_int 7)])))))
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
+  "TARGET_XOP"
+  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "TI")])
+
+;; XOP parallel XMM conditional moves
+(define_insn "xop_pcmov_<mode>"
+  [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE
+	  (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
+	  (match_operand:SSEMODE 1 "register_operand" "x,x")
+	  (match_operand:SSEMODE 2 "nonimmediate_operand" "xm,x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+(define_insn "xop_pcmov_<mode>256"
+  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
+	(if_then_else:AVX256MODE
+	  (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
+	  (match_operand:AVX256MODE 1 "register_operand" "x,x")
+	  (match_operand:AVX256MODE 2 "nonimmediate_operand" "xm,x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
+;; XOP horizontal add/subtract instructions
+(define_insn "xop_phaddbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphaddbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)
+		       (const_int 8)
+		       (const_int 12)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)
+		       (const_int 9)
+		       (const_int 13)]))))
+	 (plus:V4SI
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)
+		       (const_int 10)
+		       (const_int 14)])))
+	  (sign_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)
+		       (const_int 11)
+		       (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphaddbd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddbq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 4)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 5)]))))
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2)
+			(const_int 6)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3)
+			(const_int 7)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 8)
+			(const_int 12)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 9)
+			(const_int 13)]))))
+	  (plus:V2DI
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 10)
+			(const_int 14)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 11)
+			(const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphaddbq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphaddwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddwq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)]))))
+	 (plus:V2DI
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)])))
+	  (sign_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphaddwq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadddq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphadddq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(plus:V8HI
+	 (zero_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (zero_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphaddubw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (plus:V4SI
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)
+		       (const_int 8)
+		       (const_int 12)])))
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)
+		       (const_int 9)
+		       (const_int 13)]))))
+	 (plus:V4SI
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)
+		       (const_int 10)
+		       (const_int 14)])))
+	  (zero_extend:V4SI
+	   (vec_select:V4QI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)
+		       (const_int 11)
+		       (const_int 15)]))))))]
+  "TARGET_XOP"
+  "vphaddubd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddubq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	     (parallel [(const_int 0)
+			(const_int 4)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 1)
+			(const_int 5)]))))
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 2)
+			(const_int 6)])))
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 3)
+			(const_int 7)])))))
+	 (plus:V2DI
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 8)
+			(const_int 12)])))
+	   (sign_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 9)
+			(const_int 13)]))))
+	  (plus:V2DI
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 10)
+			(const_int 14)])))
+	   (zero_extend:V2DI
+	    (vec_select:V2QI
+	     (match_dup 1)
+	     (parallel [(const_int 11)
+			(const_int 15)])))))))]
+  "TARGET_XOP"
+  "vphaddubq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	 (zero_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (zero_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphadduwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phadduwq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (plus:V2DI
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	    (parallel [(const_int 0)
+		       (const_int 4)])))
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 1)
+		       (const_int 5)]))))
+	 (plus:V2DI
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 2)
+		       (const_int 6)])))
+	  (zero_extend:V2DI
+	   (vec_select:V2HI
+	    (match_dup 1)
+	    (parallel [(const_int 3)
+		       (const_int 7)]))))))]
+  "TARGET_XOP"
+  "vphadduwq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phaddudq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(plus:V2DI
+	 (zero_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (zero_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphaddudq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubbw"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(minus:V8HI
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)
+		      (const_int 8)
+		      (const_int 10)
+		      (const_int 12)
+		      (const_int 14)])))
+	 (sign_extend:V8HI
+	  (vec_select:V8QI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)
+		      (const_int 9)
+		      (const_int 11)
+		      (const_int 13)
+		      (const_int 15)])))))]
+  "TARGET_XOP"
+  "vphsubbw\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(minus:V4SI
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)
+		      (const_int 4)
+		      (const_int 6)])))
+	 (sign_extend:V4SI
+	  (vec_select:V4HI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)
+		      (const_int 5)
+		      (const_int 7)])))))]
+  "TARGET_XOP"
+  "vphsubwd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+(define_insn "xop_phsubdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(minus:V2DI
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+	   (parallel [(const_int 0)
+		      (const_int 2)])))
+	 (sign_extend:V2DI
+	  (vec_select:V2SI
+	   (match_dup 1)
+	   (parallel [(const_int 1)
+		      (const_int 3)])))))]
+  "TARGET_XOP"
+  "vphsubdq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseiadd1")])
+
+;; XOP permute instructions
+(define_insn "xop_pperm"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(unspec:V16QI
+	  [(match_operand:V16QI 1 "register_operand" "x,x")
+	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_XOP_PERMUTE))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP pack instructions that combine two vectors into a smaller vector
+(define_insn "xop_pperm_pack_v2di_v4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+	(vec_concat:V4SI
+	 (truncate:V2SI
+	  (match_operand:V2DI 1 "register_operand" "x,x"))
+	 (truncate:V2SI
+	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v4si_v8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+	(vec_concat:V8HI
+	 (truncate:V4HI
+	  (match_operand:V4SI 1 "register_operand" "x,x"))
+	 (truncate:V4HI
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_pperm_pack_v8hi_v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+	(vec_concat:V16QI
+	 (truncate:V8QI
+	  (match_operand:V8HI 1 "register_operand" "x,x"))
+	 (truncate:V8QI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
+  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "TI")])
+
+;; XOP packed rotate instructions
+(define_expand "rotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+	(rotate:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+        {
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+      DONE;
+    }
+})
+
+(define_expand "rotr<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+	(rotatert:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+	 (match_operand:SI 2 "general_operand")))]
+  "TARGET_XOP"
+{
+  /* If we were given a scalar, convert it to parallel */
+  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+    {
+      rtvec vs = rtvec_alloc (<ssescalarnum>);
+      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+      rtx neg = gen_reg_rtx (<MODE>mode);
+      rtx reg = gen_reg_rtx (<MODE>mode);
+      rtx op2 = operands[2];
+      int i;
+
+      if (GET_MODE (op2) != <ssescalarmode>mode)
+        {
+	  op2 = gen_reg_rtx (<ssescalarmode>mode);
+	  convert_move (op2, operands[2], false);
+	}
+
+      for (i = 0; i < <ssescalarnum>; i++)
+	RTVEC_ELT (vs, i) = op2;
+
+      emit_insn (gen_vec_init<mode> (reg, par));
+      emit_insn (gen_neg<mode>2 (neg, reg));
+      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
+      DONE;
+    }
+})
+
+(define_insn "xop_rotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(rotate:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+  "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_rotr<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(rotatert:SSEMODE1248
+	 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+  "TARGET_XOP"
+{
+  operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+  return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "vrotr<mode>3"
+  [(match_operand:SSEMODE1248 0 "register_operand" "")
+   (match_operand:SSEMODE1248 1 "register_operand" "")
+   (match_operand:SSEMODE1248 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx reg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (reg, operands[2]));
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "vrotl<mode>3"
+  [(match_operand:SSEMODE1248 0 "register_operand" "")
+   (match_operand:SSEMODE1248 1 "register_operand" "")
+   (match_operand:SSEMODE1248 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_vrotl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
+	  (const_int 0))
+	 (rotate:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
+	  (match_dup 2))
+	 (rotatert:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; XOP packed shift instructions.
+;; FIXME: add V2DI back in
+(define_expand "vlshr<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashr<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
+  DONE;
+})
+
+(define_expand "vashl<mode>3"
+  [(match_operand:SSEMODE124 0 "register_operand" "")
+   (match_operand:SSEMODE124 1 "register_operand" "")
+   (match_operand:SSEMODE124 2 "register_operand" "")]
+  "TARGET_XOP"
+{
+  emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "xop_ashl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
+	  (const_int 0))
+	 (ashift:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
+	  (match_dup 2))
+	 (ashiftrt:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_lshl<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+	(if_then_else:SSEMODE1248
+	 (ge:SSEMODE1248
+	  (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
+	  (const_int 0))
+	 (ashift:SSEMODE1248
+	  (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
+	  (match_dup 2))
+	 (lshiftrt:SSEMODE1248
+	  (match_dup 1)
+	  (neg:SSEMODE1248 (match_dup 2)))))]
+  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "mode" "TI")])
+
+;; SSE2 doesn't have some shift varients, so define versions for XOP
+(define_expand "ashlv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = operands[2];
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+  emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "lshlv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = operands[2];
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+  emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "ashrv16qi3"
+  [(match_operand:V16QI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")
+   (match_operand:SI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (16);
+  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+  rtx reg = gen_reg_rtx (V16QImode);
+  int i;
+  rtx ele = ((CONST_INT_P (operands[2]))
+	     ? GEN_INT (- INTVAL (operands[2]))
+	     : operands[2]);
+
+  for (i = 0; i < 16; i++)
+    RTVEC_ELT (vs, i) = ele;
+
+  emit_insn (gen_vec_initv16qi (reg, par));
+
+  if (!CONST_INT_P (operands[2]))
+    {
+      rtx neg = gen_reg_rtx (V16QImode);
+      emit_insn (gen_negv16qi2 (neg, reg));
+      emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
+    }
+  else
+    emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+
+  DONE;
+})
+
+(define_expand "ashrv2di3"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V2DI 1 "register_operand" "")
+   (match_operand:DI 2 "nonmemory_operand" "")]
+  "TARGET_XOP"
+{
+  rtvec vs = rtvec_alloc (2);
+  rtx par = gen_rtx_PARALLEL (V2DImode, vs);
+  rtx reg = gen_reg_rtx (V2DImode);
+  rtx ele;
+
+  if (CONST_INT_P (operands[2]))
+    ele = GEN_INT (- INTVAL (operands[2]));
+  else if (GET_MODE (operands[2]) != DImode)
+    {
+      rtx move = gen_reg_rtx (DImode);
+      ele = gen_reg_rtx (DImode);
+      convert_move (move, operands[2], false);
+      emit_insn (gen_negdi2 (ele, move));
+    }
+  else
+    {
+      ele = gen_reg_rtx (DImode);
+      emit_insn (gen_negdi2 (ele, operands[2]));
+    }
+
+  RTVEC_ELT (vs, 0) = ele;
+  RTVEC_ELT (vs, 1) = ele;
+  emit_insn (gen_vec_initv2di (reg, par));
+  emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
+  DONE;
+})
+
+;; XOP FRCZ support
+(define_insn "xop_frcz<mode>2"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
+	(unspec:FMAMODE
+	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
+	 UNSPEC_FRCZ))]
+  "TARGET_XOP"
+  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+;; scalar insns
+(define_expand "xop_vmfrcz<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	   [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
+	   UNSPEC_FRCZ)
+	  (match_dup 3)
+	  (const_int 1)))]
+  "TARGET_XOP"
+{
+  operands[3] = CONST0_RTX (<MODE>mode);
+})
+
+(define_insn "*xop_vmfrcz_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (unspec:SSEMODEF2P
+	   [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+	   UNSPEC_FRCZ)
+	  (match_operand:SSEMODEF2P 2 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_XOP"
+  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xop_maskcmp<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
+	 [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	  (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_maskcmp_uns<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+	 [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	  (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_rep" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
+;; and pcomneu* not to be converted to the signed ones in case somebody needs
+;; the exact instruction generated for the intrinsic.
+(define_insn "xop_maskcmp_uns2<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(unspec:SSEMODE1248
+	 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+	  [(match_operand:SSEMODE1248 2 "register_operand" "x")
+	   (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
+	 UNSPEC_XOP_UNSIGNED_CMP))]
+  "TARGET_XOP"
+  "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
+;; being added here to be complete.
+(define_insn "xop_pcom_tf<mode>3"
+  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+	(unspec:SSEMODE1248
+	  [(match_operand:SSEMODE1248 1 "register_operand" "x")
+	   (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_int_operand" "n")]
+	  UNSPEC_XOP_TRUEFALSE))]
+  "TARGET_XOP"
+{
+  return ((INTVAL (operands[3]) != 0)
+	  ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+	  : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix_extra" "2")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "xop_vpermil2<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+	   (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
+	   (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
+	   (match_operand:SI 4 "const_0_to_3_operand" "n")]
+	  UNSPEC_VPERMIL2))]
+  "TARGET_XOP"
+  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "*avx_aesenc"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESENC))]
+  "TARGET_AES && TARGET_AVX"
+  "vaesenc\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesenc"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESENC))]
+  "TARGET_AES"
+  "aesenc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesenclast"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESENCLAST))]
+  "TARGET_AES && TARGET_AVX"
+  "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesenclast"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESENCLAST))]
+  "TARGET_AES"
+  "aesenclast\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesdec"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESDEC))]
+  "TARGET_AES && TARGET_AVX"
+  "vaesdec\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesdec"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESDEC))]
+  "TARGET_AES"
+  "aesdec\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesdeclast"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESDECLAST))]
+  "TARGET_AES && TARGET_AVX"
+  "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesdeclast"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESDECLAST))]
+  "TARGET_AES"
+  "aesdeclast\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "aesimc"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+		      UNSPEC_AESIMC))]
+  "TARGET_AES"
+  "%vaesimc\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "aeskeygenassist"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
+		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
+		     UNSPEC_AESKEYGENASSIST))]
+  "TARGET_AES"
+  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*vpclmulqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+		      (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		     UNSPEC_PCLMUL))]
+  "TARGET_PCLMUL && TARGET_AVX"
+  "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "pclmulqdq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+		      (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+		      (match_operand:SI 3 "const_0_to_255_operand" "n")]
+		     UNSPEC_PCLMUL))]
+  "TARGET_PCLMUL"
+  "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx_vzeroall"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_AVX"
+{
+  int nregs = TARGET_64BIT ? 16 : 8;
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_VZEROALL);
+
+  for (regno = 0; regno < nregs; regno++)
+    XVECEXP (operands[0], 0, regno + 1)
+      = gen_rtx_SET (VOIDmode,
+		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
+		     CONST0_RTX (V8SImode));
+})
+
+(define_insn "*avx_vzeroall"
+  [(match_parallel 0 "vzeroall_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
+  "TARGET_AVX"
+  "vzeroall"
+  [(set_attr "type" "sse")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+;; Clear the upper 128bits of AVX registers, equivalent to a NOP
+;; if the upper 128bits are unused.
+(define_insn "avx_vzeroupper"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
+		    UNSPECV_VZEROUPPER)]
+  "TARGET_AVX"
+  "vzeroupper"
+  [(set_attr "type" "sse")
+   (set_attr "modrm" "0")
+   (set_attr "memory" "none")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
+(define_insn_and_split "vec_dup<mode>"
+  [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
+	(vec_duplicate:AVX256MODE24P
+	  (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
+  "TARGET_AVX"
+  "@
+   vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
+   #"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
+   (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "avx_vbroadcastf128_<mode>"
+  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
+	(vec_concat:AVX256MODE
+	  (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
+	  (match_dup 1)))]
+  "TARGET_AVX"
+  "@
+   vbroadcastf128\t{%1, %0|%0, %1}
+   vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
+   vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
+  [(set_attr "type" "ssemov,sselog1,sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "0,1,1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF,V8SF,V8SF")])
+
+;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
+;; If it so happens that the input is in memory, use vbroadcast.
+;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
+(define_insn "*avx_vperm_broadcast_v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+	(vec_select:V4SF
+	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
+	  (match_parallel 2 "avx_vbroadcast_operand"
+	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
+  "TARGET_AVX"
+{
+  int elt = INTVAL (operands[3]);
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
+      return "vbroadcastss\t{%1, %0|%0, %1}";
+    case 2:
+      operands[2] = GEN_INT (elt * 0x55);
+      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "0,0,1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SF,SF,V4SF")])
+
+(define_insn_and_split "*avx_vperm_broadcast_<mode>"
+  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
+	(vec_select:AVX256MODEF2P
+	  (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
+	  (match_parallel 2 "avx_vbroadcast_operand"
+	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
+  "TARGET_AVX"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
+{
+  rtx op0 = operands[0], op1 = operands[1];
+  int elt = INTVAL (operands[3]);
+
+  if (REG_P (op1))
+    {
+      int mask;
+
+      /* Shuffle element we care about into all elements of the 128-bit lane.
+	 The other lane gets shuffled too, but we don't care.  */
+      if (<MODE>mode == V4DFmode)
+	mask = (elt & 1 ? 15 : 0);
+      else
+	mask = (elt & 3) * 0x55;
+      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
+
+      /* Shuffle the lane we care about into both lanes of the dest.  */
+      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
+      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
+      DONE;
+    }
+
+  operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
+  	      			   elt * GET_MODE_SIZE (<avxscalarmode>mode));
+})
+
+(define_expand "avx_vpermil<mode>"
+  [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
+	(vec_select:AVXMODEFDP
+	  (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
+	  (match_operand:SI 2 "const_0_to_255_operand" "")))]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[2]);
+  rtx perm[<ssescalarnum>];
+
+  perm[0] = GEN_INT (mask & 1);
+  perm[1] = GEN_INT ((mask >> 1) & 1);
+  if (<MODE>mode == V4DFmode)
+    {
+      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
+      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
+    }
+
+  operands[2]
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
+})
+
+(define_expand "avx_vpermil<mode>"
+  [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
+	(vec_select:AVXMODEFSP
+	  (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
+	  (match_operand:SI 2 "const_0_to_255_operand" "")))]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[2]);
+  rtx perm[<ssescalarnum>];
+
+  perm[0] = GEN_INT (mask & 3);
+  perm[1] = GEN_INT ((mask >> 2) & 3);
+  perm[2] = GEN_INT ((mask >> 4) & 3);
+  perm[3] = GEN_INT ((mask >> 6) & 3);
+  if (<MODE>mode == V8SFmode)
+    {
+      perm[4] = GEN_INT ((mask & 3) + 4);
+      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
+      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
+      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
+    }
+
+  operands[2]
+    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
+})
+
+(define_insn "*avx_vpermilp<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(vec_select:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
+	  (match_parallel 2 "avx_vpermilp_<mode>_operand"
+	    [(match_operand 3 "const_int_operand" "")])))]
+  "TARGET_AVX"
+{
+  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
+  operands[2] = GEN_INT (mask);
+  return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vpermilvar<mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+	   (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
+	  UNSPEC_VPERMIL))]
+  "TARGET_AVX"
+  "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "avx_vperm2f128<mode>3"
+  [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
+	(unspec:AVX256MODE2P
+	  [(match_operand:AVX256MODE2P 1 "register_operand" "")
+	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
+	   (match_operand:SI 3 "const_0_to_255_operand" "")]
+	  UNSPEC_VPERMIL2F128))]
+  "TARGET_AVX"
+{
+  int mask = INTVAL (operands[3]);
+  if ((mask & 0x88) == 0)
+    {
+      rtx perm[<ssescalarnum>], t1, t2;
+      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
+
+      base = (mask & 3) * nelt2;
+      for (i = 0; i < nelt2; ++i)
+	perm[i] = GEN_INT (base + i);
+
+      base = ((mask >> 4) & 3) * nelt2;
+      for (i = 0; i < nelt2; ++i)
+	perm[i + nelt2] = GEN_INT (base + i);
+
+      t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
+			       operands[1], operands[2]);
+      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
+      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
+      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
+      emit_insn (t2);
+      DONE;
+    }
+})
+
+;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
+;; means that in order to represent this properly in rtl we'd have to
+;; nest *another* vec_concat with a zero operand and do the select from
+;; a 4x wide vector.  That doesn't seem very nice.
+(define_insn "*avx_vperm2f128<mode>_full"
+  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
+	(unspec:AVX256MODE2P
+	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
+	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
+	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
+	  UNSPEC_VPERMIL2F128))]
+  "TARGET_AVX"
+  "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_vperm2f128<mode>_nozero"
+  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
+	(vec_select:AVX256MODE2P
+	  (vec_concat:<ssedoublesizemode>
+	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
+	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
+	  (match_parallel 3 "avx_vperm2f128_<mode>_operand"
+	    [(match_operand 4 "const_int_operand" "")])))]
+  "TARGET_AVX"
+{
+  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
+  operands[3] = GEN_INT (mask);
+  return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "avx_vinsertf128<mode>"
+  [(match_operand:AVX256MODE 0 "register_operand" "")
+   (match_operand:AVX256MODE 1 "register_operand" "")
+   (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
+   (match_operand:SI 3 "const_0_to_1_operand" "")]
+  "TARGET_AVX"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
+					operands[2]));
+      break;
+    case 1:
+      emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
+					operands[2]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn "vec_set_lo_<mode>"
+  [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+	(vec_concat:AVX256MODE4P
+	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+	  (vec_select:<avxhalfvecmode>
+	    (match_operand:AVX256MODE4P 1 "register_operand" "x")
+	    (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_<mode>"
+  [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+	(vec_concat:AVX256MODE4P
+	  (vec_select:<avxhalfvecmode>
+	    (match_operand:AVX256MODE4P 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)]))
+	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_<mode>"
+  [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+	(vec_concat:AVX256MODE8P
+	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+	  (vec_select:<avxhalfvecmode>
+	    (match_operand:AVX256MODE8P 1 "register_operand" "x")
+	    (parallel [(const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_<mode>"
+  [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+	(vec_concat:AVX256MODE8P
+	  (vec_select:<avxhalfvecmode>
+	    (match_operand:AVX256MODE8P 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))
+	  (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V8HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (parallel [(const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v16hi"
+  [(set (match_operand:V16HI 0 "register_operand" "=x")
+	(vec_concat:V16HI
+	  (vec_select:V8HI
+	    (match_operand:V16HI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))
+	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_concat:V32QI
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+	  (vec_select:V16QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (parallel [(const_int 16) (const_int 17)
+		       (const_int 18) (const_int 19)
+		       (const_int 20) (const_int 21)
+		       (const_int 22) (const_int 23)
+		       (const_int 24) (const_int 25)
+		       (const_int 26) (const_int 27)
+		       (const_int 28) (const_int 29)
+		       (const_int 30) (const_int 31)]))))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v32qi"
+  [(set (match_operand:V32QI 0 "register_operand" "=x")
+	(vec_concat:V32QI
+	  (vec_select:V16QI
+	    (match_operand:V32QI 1 "register_operand" "x")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)
+		       (const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))
+	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX"
+  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(unspec:AVXMODEF2P
+	  [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
+	   (match_operand:<avxpermvecmode> 2 "register_operand" "x")]
+	  UNSPEC_MASKLOAD))]
+  "TARGET_AVX"
+  "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+	(unspec:AVXMODEF2P
+	  [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
+	   (match_operand:AVXMODEF2P 2 "register_operand" "x")
+	   (match_dup 0)]
+	  UNSPEC_MASKSTORE))]
+  "TARGET_AVX"
+  "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
+  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
+	(unspec:AVX256MODE2P
+	  [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_CAST))]
+  "TARGET_AVX"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  if (REG_P (op0))
+    op0 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op0));
+  else 
+    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
+  emit_move_insn (op0, op1);
+  DONE;
+})
+
+(define_expand "vec_init<mode>"
+  [(match_operand:AVX256MODE 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_AVX"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "*vec_concat<mode>_avx"
+  [(set (match_operand:AVX256MODE 0 "register_operand"   "=x,x")
+	(vec_concat:AVX256MODE
+	  (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
+	  (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
+  "TARGET_AVX"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
+    case 1:
+      switch (get_attr_mode (insn))
+        {
+	case MODE_V8SF:
+	  return "vmovaps\t{%1, %x0|%x0, %1}";
+	case MODE_V4DF:
+	  return "vmovapd\t{%1, %x0|%x0, %1}";
+	default:
+	  return "vmovdqa\t{%1, %x0|%x0, %1}";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog,ssemov")
+   (set_attr "prefix_extra" "1,*")
+   (set_attr "length_immediate" "1,*")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "vcvtph2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(vec_select:V4SF
+	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
+		       UNSPEC_VCVTPH2PS)
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)])))]
+  "TARGET_F16C"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vcvtph2ps_load"
+  [(set (match_operand:V4SF 0 "register_operand" "=x")
+	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
+		     UNSPEC_VCVTPH2PS))]
+  "TARGET_F16C"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "vcvtph2ps256"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+		     UNSPEC_VCVTPH2PS))]
+  "TARGET_F16C"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_expand "vcvtps2ph"
+  [(set (match_operand:V8HI 0 "register_operand" "")
+	(vec_concat:V8HI
+	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
+			(match_operand:SI 2 "const_0_to_255_operand" "")]
+		       UNSPEC_VCVTPS2PH)
+	  (match_dup 3)))]
+  "TARGET_F16C"
+  "operands[3] = CONST0_RTX (V4HImode);")
+
+(define_insn "*vcvtps2ph"
+  [(set (match_operand:V8HI 0 "register_operand" "=x")
+	(vec_concat:V8HI
+	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+			(match_operand:SI 2 "const_0_to_255_operand" "N")]
+		       UNSPEC_VCVTPS2PH)
+	  (match_operand:V4HI 3 "const0_operand" "")))]
+  "TARGET_F16C"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vcvtps2ph_store"
+  [(set (match_operand:V4HI 0 "memory_operand" "=m")
+	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
+		     UNSPEC_VCVTPS2PH))]
+  "TARGET_F16C"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "vcvtps2ph256"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
+	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
+		     UNSPEC_VCVTPS2PH))]
+  "TARGET_F16C"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
diff --git a/gcc/config/i386/ssemath.h b/gcc/config/i386/ssemath.h
new file mode 100644
index 000000000..357d6a378
--- /dev/null
+++ b/gcc/config/i386/ssemath.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE2 ? FPMATH_SSE : FPMATH_387)
+
+#undef TARGET_SUBTARGET32_ISA_DEFAULT
+#define TARGET_SUBTARGET32_ISA_DEFAULT \
+   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2)
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
new file mode 100644
index 000000000..3fdfee2e4
--- /dev/null
+++ b/gcc/config/i386/sync.md
@@ -0,0 +1,242 @@
+;; GCC machine description for i386 synchronization instructions.
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator CASMODE
+  [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+	    (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_iterator DCASMODE
+  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+
+  if (!(TARGET_64BIT || TARGET_SSE2))
+    {
+      emit_insn (gen_memory_barrier_nosse (operands[0]));
+      DONE;
+    }
+})
+
+(define_insn "memory_barrier_nosse"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(TARGET_64BIT || TARGET_SSE2)"
+  "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
+  [(set_attr "memory" "unknown")])
+
+;; ??? It would be possible to use cmpxchg8b on pentium for DImode
+;; changes.  It's complicated because the insn uses ecx:ebx as the
+;; new value; note that the registers are reversed from the order
+;; that they'd be in with (reg:DI 2 ecx).  Similarly for TImode
+;; data in 64-bit mode.
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+    [(set (match_operand:CASMODE 0 "register_operand" "")
+	  (match_operand:CASMODE 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec_volatile:CASMODE
+	    [(match_dup 1)
+	     (match_operand:CASMODE 2 "register_operand" "")
+	     (match_operand:CASMODE 3 "register_operand" "")]
+	    UNSPECV_CMPXCHG))
+   (set (reg:CCZ FLAGS_REG)
+        (compare:CCZ
+          (unspec_volatile:CASMODE
+            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
+          (match_dup 2)))])]
+  "TARGET_CMPXCHG"
+{
+  if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+    {
+      enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
+      rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
+      rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
+				      GET_MODE_SIZE (hmode));
+      low = force_reg (hmode, low);
+      high = force_reg (hmode, high);
+      if (<MODE>mode == DImode)
+	{
+	  if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
+	    operands[1] = replace_equiv_address (operands[1],
+						 force_reg (Pmode,
+							    XEXP (operands[1],
+								  0)));
+	  emit_insn (gen_sync_double_compare_and_swapdi
+		     (operands[0], operands[1], operands[2], low, high));
+	}
+      else if (<MODE>mode == TImode)
+	emit_insn (gen_sync_double_compare_and_swapti
+		   (operands[0], operands[1], operands[2], low, high));
+      else
+	gcc_unreachable ();
+      DONE;
+    }
+})
+
+(define_insn "*sync_compare_and_swap<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=a")
+	(match_operand:SWI 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec_volatile:SWI
+	  [(match_dup 1)
+	   (match_operand:SWI 2 "register_operand" "a")
+	   (match_operand:SWI 3 "register_operand" "<r>")]
+	  UNSPECV_CMPXCHG))
+   (set (reg:CCZ FLAGS_REG)
+        (compare:CCZ
+          (unspec_volatile:SWI
+            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
+          (match_dup 2)))]
+  "TARGET_CMPXCHG"
+  "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
+
+(define_insn "sync_double_compare_and_swap<mode>"
+  [(set (match_operand:DCASMODE 0 "register_operand" "=A")
+	(match_operand:DCASMODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec_volatile:DCASMODE
+	  [(match_dup 1)
+	   (match_operand:DCASMODE 2 "register_operand" "A")
+	   (match_operand:<DCASHMODE> 3 "register_operand" "b")
+	   (match_operand:<DCASHMODE> 4 "register_operand" "c")]
+	  UNSPECV_CMPXCHG))
+   (set (reg:CCZ FLAGS_REG)
+        (compare:CCZ
+          (unspec_volatile:DCASMODE
+            [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
+	    UNSPECV_CMPXCHG)
+          (match_dup 2)))]
+  ""
+  "lock{%;} cmpxchg<doublemodesuffix>b\t%1")
+
+;; Theoretically we'd like to use constraint "r" (any reg) for operand
+;; 3, but that includes ecx.  If operand 3 and 4 are the same (like when
+;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
+;; operand 4.  This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom.  Operands 3 and 4 really need to be different
+;; registers, which in this case means operand 3 must not be ecx.
+;; Instead of playing tricks with fake early clobbers or the like we
+;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+;; are just esi and edi.
+(define_insn "*sync_double_compare_and_swapdi_pic"
+  [(set (match_operand:DI 0 "register_operand" "=A")
+	(match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec_volatile:DI
+	  [(match_dup 1)
+	   (match_operand:DI 2 "register_operand" "A")
+	   (match_operand:SI 3 "register_operand" "SD")
+	   (match_operand:SI 4 "register_operand" "c")]
+	  UNSPECV_CMPXCHG))
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (unspec_volatile:DI
+	    [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
+	    UNSPECV_CMPXCHG)
+	  (match_dup 2)))]
+  "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
+  "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+
+;; For operand 2 nonmemory_operand predicate is used instead of
+;; register_operand to allow combiner to better optimize atomic
+;; additions of constants.
+(define_insn "sync_old_add<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+	(unspec_volatile:SWI
+	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+   (set (match_dup 1)
+	(plus:SWI (match_dup 1)
+		  (match_operand:SWI 2 "nonmemory_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_XADD"
+  "lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
+
+;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
+(define_insn "sync_lock_test_and_set<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+	(unspec_volatile:SWI
+	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+   (set (match_dup 1)
+	(match_operand:SWI 2 "register_operand" "0"))]
+  ""
+  "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_add<mode>"
+  [(set (match_operand:SWI 0 "memory_operand" "+m")
+	(unspec_volatile:SWI
+	  [(plus:SWI (match_dup 0)
+		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+	  UNSPECV_LOCK))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (TARGET_USE_INCDEC)
+    {
+      if (operands[1] == const1_rtx)
+	return "lock{%;} inc{<imodesuffix>}\t%0";
+      if (operands[1] == constm1_rtx)
+	return "lock{%;} dec{<imodesuffix>}\t%0";
+    }
+
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+  return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "sync_sub<mode>"
+  [(set (match_operand:SWI 0 "memory_operand" "+m")
+	(unspec_volatile:SWI
+	  [(minus:SWI (match_dup 0)
+		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+	  UNSPECV_LOCK))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+{
+  if (TARGET_USE_INCDEC)
+    {
+      if (operands[1] == const1_rtx)
+	return "lock{%;} dec{<imodesuffix>}\t%0";
+      if (operands[1] == constm1_rtx)
+	return "lock{%;} inc{<imodesuffix>}\t%0";
+    }
+
+  return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "sync_<code><mode>"
+  [(set (match_operand:SWI 0 "memory_operand" "+m")
+	(unspec_volatile:SWI
+	  [(any_logic:SWI (match_dup 0)
+			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+	  UNSPECV_LOCK))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "lock{%;} <logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
diff --git a/gcc/config/i386/sysv4.h b/gcc/config/i386/sysv4.h
new file mode 100644
index 000000000..64026e72b
--- /dev/null
+++ b/gcc/config/i386/sysv4.h
@@ -0,0 +1,73 @@
+/* Target definitions for GCC for Intel 80386 running System V.4
+   Copyright (C) 1991, 2001, 2002, 2007, 2008, 2011
+   Free Software Foundation, Inc.
+
+   Written by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Output at beginning of assembler file.  */
+/* The .file command should always begin the output.  */
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE true
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel)		\
+      {									\
+	fputs (ASM_LONG, (FILE));					\
+	assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), (FILE)); \
+	goto DONE;							\
+      }									\
+  } while (0)
+
+/* Used by crtstuff.c to initialize the base of data-relative relocations.
+   These are GOT relative on x86, so return the pic register.  */
+#ifdef __PIC__
+#define CRT_GET_RFIB_DATA(BASE)			\
+  {						\
+    register void *ebx_ __asm__("ebx");		\
+    BASE = ebx_;				\
+  }
+#else
+#define CRT_GET_RFIB_DATA(BASE)						\
+  __asm__ ("call\t.LPR%=\n"						\
+	   ".LPR%=:\n\t"						\
+	   "pop{l}\t%0\n\t"						\
+	   /* Due to a GAS bug, this cannot use EAX.  That encodes	\
+	      smaller than the traditional EBX, which results in the	\
+	      offset being off by one.  */				\
+	   "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0"		\
+		   "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}"		\
+	   : "=d"(BASE))
+#endif
diff --git a/gcc/config/i386/t-crtfm b/gcc/config/i386/t-crtfm
new file mode 100644
index 000000000..4fa27e91c
--- /dev/null
+++ b/gcc/config/i386/t-crtfm
@@ -0,0 +1,8 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c \
+  $(srcdir)/config/i386/cpuid.h $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) \
+		-msse -minline-all-stringops -c \
+		$(srcdir)/config/i386/crtfastmath.c \
+		-o $(T)crtfastmath$(objext)
diff --git a/gcc/config/i386/t-crtpc b/gcc/config/i386/t-crtpc
new file mode 100644
index 000000000..c165772f4
--- /dev/null
+++ b/gcc/config/i386/t-crtpc
@@ -0,0 +1,34 @@
+# Copyright (C) 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+EXTRA_PARTS += crtprec32.o crtprec64.o crtprec80.o
+
+$(T)crtprec32.o: $(srcdir)/config/i386/crtprec.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -D__PREC=32 -c \
+		$(srcdir)/config/i386/crtprec.c \
+		-o $(T)crtprec32$(objext)
+
+$(T)crtprec64.o: $(srcdir)/config/i386/crtprec.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -D__PREC=64 -c \
+		$(srcdir)/config/i386/crtprec.c \
+		-o $(T)crtprec64$(objext)
+
+$(T)crtprec80.o: $(srcdir)/config/i386/crtprec.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -D__PREC=80 -c \
+		$(srcdir)/config/i386/crtprec.c \
+		-o $(T)crtprec80$(objext)
diff --git a/gcc/config/i386/t-crtpic b/gcc/config/i386/t-crtpic
new file mode 100644
index 000000000..ff81a9bef
--- /dev/null
+++ b/gcc/config/i386/t-crtpic
@@ -0,0 +1,10 @@
+# The pushl in CTOR initialization interferes with frame pointer elimination.
+
+# We need to use -fPIC when we are using gcc to compile the routines in
+# crtstuff.c.  This is only really needed when we are going to use gcc/g++
+# to produce a shared library, but since we don't know ahead of time when
+# we will be doing that, we just always use -fPIC when compiling the
+# routines in crtstuff.c.
+
+CRTSTUFF_T_CFLAGS = -fPIC -fno-omit-frame-pointer
+TARGET_LIBGCC2_CFLAGS = -fPIC
diff --git a/gcc/config/i386/t-crtstuff b/gcc/config/i386/t-crtstuff
new file mode 100644
index 000000000..c14dd9411
--- /dev/null
+++ b/gcc/config/i386/t-crtstuff
@@ -0,0 +1,7 @@
+# The pushl in CTOR initialization interferes with frame pointer elimination.
+# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,
+# because then __FRAME_END__ might not be the last thing in .eh_frame
+# section.  -fno-asynchronous-unwind-tables is off by default for i386
+# and is on by default for x86-64.  We turn it off for both i386 and
+# x86-64.
+CRTSTUFF_T_CFLAGS += -fno-omit-frame-pointer -fno-asynchronous-unwind-tables
diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming
new file mode 100644
index 000000000..ccae237d3
--- /dev/null
+++ b/gcc/config/i386/t-cygming
@@ -0,0 +1,109 @@
+# Copyright (C) 2003, 2005, 2008, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = i386/cygwin.asm
+LIB1ASMFUNCS = _chkstk _chkstk_ms
+
+# cygwin and mingw always have a limits.h, but, depending upon how we are
+# doing the build, it may not be installed yet.
+LIMITS_H_TEST = true
+
+# If we are building next to winsup, this will let us find the real
+# limits.h when building libgcc2.  Otherwise, winsup must be installed
+# first.
+LIBGCC2_INCLUDES = -I$(srcdir)/../winsup/w32api/include
+
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H) $(LTO_STREAMER_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt.c
+
+winnt-cxx.o: $(srcdir)/config/i386/winnt-cxx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt-cxx.c
+
+
+winnt-stubs.o: $(srcdir)/config/i386/winnt-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/winnt-stubs.c
+
+msformat-c.o: $(srcdir)/config/i386/msformat-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/i386/msformat-c.c
+
+STMP_FIXINC=stmp-fixinc
+
+# Build a shared libgcc library for PECOFF with a DEF file
+# with the GNU linker.
+#
+# mkmap-flat.awk is used with the pe_dll option to produce a DEF instead
+# of an ELF map file.
+#
+# Warning: If SHLIB_SOVERSION or SHLIB_SONAME are updated, LIBGCC_SONAME
+# in mingw32.h and SHLIB_MKMAP_OPTS below must be updated also.
+
+SHLIB_EXT = .dll
+SHLIB_IMPLIB = @shlib_base_name@.a
+SHLIB_SOVERSION = 1
+SHLIB_SONAME = @shlib_base_name@_$(EH_MODEL)-$(SHLIB_SOVERSION)$(SHLIB_EXT)
+SHLIB_MAP = @shlib_map_file@
+SHLIB_OBJS = @shlib_objs@
+SHLIB_DIR = @multilib_dir@/shlib
+SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@
+# SHLIB_DLLDIR is defined by including one of either t-dlldir or t-dlldir-x
+# (native/cross build respectively) in the tmake_file list in gcc/config.gcc.
+ifndef SHLIB_DLLDIR
+$(error SHLIB_DLLDIR must be defined)
+endif
+
+SHLIB_LINK = $(LN_S) -f $(SHLIB_MAP) $(SHLIB_MAP).def && \
+	if [ ! -d $(SHLIB_DIR) ]; then \
+		mkdir $(SHLIB_DIR); \
+	else true; fi && \
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	$(SHLIB_MAP).def \
+	-Wl,--out-implib,$(SHLIB_DIR)/$(SHLIB_IMPLIB).tmp \
+	-o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
+	$(SHLIB_OBJS) $(SHLIB_LC) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIB_SONAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIB_SONAME) \
+		$(SHLIB_DIR)/$(SHLIB_SONAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIB_SONAME).tmp $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+	mv $(SHLIB_DIR)/$(SHLIB_IMPLIB).tmp $(SHLIB_DIR)/$(SHLIB_IMPLIB)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(SHLIB_DLLDIR) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL) $(SHLIB_DIR)/$(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(SHLIB_DLLDIR)/$(SHLIB_SONAME); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_IMPLIB) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_IMPLIB)
+SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
+# We'd like to use SHLIB_SONAME here too, but shlib_base_name
+# does not get substituted before mkmap-flat.awk is run.
+SHLIB_MKMAP_OPTS = -v pe_dll=libgcc_s_$(EH_MODEL)-$(SHLIB_SOVERSION)$(SHLIB_EXT)
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/i386/t-cygwin b/gcc/config/i386/t-cygwin
new file mode 100644
index 000000000..f5eda91c0
--- /dev/null
+++ b/gcc/config/i386/t-cygwin
@@ -0,0 +1,39 @@
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# If we are building next to winsup, this will let us find the real
+# limits.h when building libgcc2.  Otherwise, winsup must be installed
+# first.
+LIBGCC2_INCLUDES += -I$(srcdir)/../winsup/include \
+	-I$(srcdir)/../winsup/cygwin/include
+
+# Cygwin-specific parts of LIB_SPEC
+SHLIB_LC = -lcygwin -ladvapi32 -lshell32 -luser32 -lkernel32
+
+# We have already included one of the t-{dw2,sjlj}-eh fragments for EH_MODEL
+SHLIB_EH_EXTENSION = $(subst -dw2,,-$(EH_MODEL))
+
+# Cygwin uses different conventions than MinGW; override generic SHLIB_ def'ns here.
+SHLIB_IMPLIB = @shlib_base_name@$(SHLIB_EXT).a
+SHLIB_SONAME = cyggcc_s$(SHLIB_EH_EXTENSION)-$(SHLIB_SOVERSION)$(SHLIB_EXT)
+# This must match the definitions of SHLIB_SONAME/SHLIB_SOVERSION and LIBGCC_SONAME.
+# We'd like to use SHLIB_SONAME here too, and we can, since
+# we don't rely on shlib_base_name substitution for it.
+SHLIB_MKMAP_OPTS = -v pe_dll=$(SHLIB_SONAME)
+
diff --git a/gcc/config/i386/t-darwin b/gcc/config/i386/t-darwin
new file mode 100644
index 000000000..22323e4ab
--- /dev/null
+++ b/gcc/config/i386/t-darwin
@@ -0,0 +1,5 @@
+MULTILIB_OPTIONS = m64
+MULTILIB_DIRNAMES = x86_64
+LIB2_SIDITI_CONV_FUNCS=yes
+LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c
+LIB2FUNCS_EXCLUDE = _fixtfdi _fixunstfdi _floatditf _floatunditf
diff --git a/gcc/config/i386/t-darwin64 b/gcc/config/i386/t-darwin64
new file mode 100644
index 000000000..81b4565ac
--- /dev/null
+++ b/gcc/config/i386/t-darwin64
@@ -0,0 +1,8 @@
+LIB2_SIDITI_CONV_FUNCS=yes
+LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c
+
+MULTILIB_OPTIONS = m32
+MULTILIB_DIRNAMES = i386
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/i386/t-djgpp b/gcc/config/i386/t-djgpp
new file mode 100644
index 000000000..7b54b7ba7
--- /dev/null
+++ b/gcc/config/i386/t-djgpp
@@ -0,0 +1,2 @@
+# Location of DJGPP's header directory.
+NATIVE_SYSTEM_HEADER_DIR=$(DJDIR)/include
diff --git a/gcc/config/i386/t-dlldir b/gcc/config/i386/t-dlldir
new file mode 100644
index 000000000..a3e03317a
--- /dev/null
+++ b/gcc/config/i386/t-dlldir
@@ -0,0 +1,6 @@
+
+# In a native build, target DLLs go in bindir, where they can be executed.
+# Note double quoting to prevent variables from being evaluated until install
+# time; we don't want to expand them during libgcc.mvars generation.
+
+SHLIB_DLLDIR = $$(bindir)
diff --git a/gcc/config/i386/t-dlldir-x b/gcc/config/i386/t-dlldir-x
new file mode 100644
index 000000000..07dd845f0
--- /dev/null
+++ b/gcc/config/i386/t-dlldir-x
@@ -0,0 +1,9 @@
+
+# In a cross build, bindir contains host not target binaries, so target DLLs
+# instead go in toolexeclibdir, alongside other target binaries and static libs.
+# Note double quoting to prevent variables from being evaluated until install
+# time; we don't want to expand them during libgcc.mvars generation, and in
+# any case, $toolexeclibdir is not defined in the gcc/ subdirectory, only in
+# target lib directories.
+
+SHLIB_DLLDIR = $$(toolexeclibdir)
diff --git a/gcc/config/i386/t-dw2-eh b/gcc/config/i386/t-dw2-eh
new file mode 100644
index 000000000..ffcc39aea
--- /dev/null
+++ b/gcc/config/i386/t-dw2-eh
@@ -0,0 +1,3 @@
+
+# We are using Dwarf-2 EH.
+EH_MODEL = dw2
diff --git a/gcc/config/i386/t-fprules-softfp b/gcc/config/i386/t-fprules-softfp
new file mode 100644
index 000000000..0b0068f90
--- /dev/null
+++ b/gcc/config/i386/t-fprules-softfp
@@ -0,0 +1,6 @@
+softfp_float_modes := tf
+softfp_int_modes := si di ti
+softfp_extensions := sftf dftf xftf
+softfp_truncations := tfsf tfdf tfxf
+softfp_machine_header := i386/sfp-machine.h
+softfp_exclude_libgcc2 := n
diff --git a/gcc/config/i386/t-gmm_malloc b/gcc/config/i386/t-gmm_malloc
new file mode 100644
index 000000000..c37f8a759
--- /dev/null
+++ b/gcc/config/i386/t-gmm_malloc
@@ -0,0 +1,6 @@
+# Install gmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/gmm_malloc.h
+	rm -f $@
+	cat $^ > $@
diff --git a/gcc/config/i386/t-gnu b/gcc/config/i386/t-gnu
new file mode 100644
index 000000000..5f946c716
--- /dev/null
+++ b/gcc/config/i386/t-gnu
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,i386-gnu)
diff --git a/gcc/config/i386/t-gthr-win32 b/gcc/config/i386/t-gthr-win32
new file mode 100644
index 000000000..f67fa1e25
--- /dev/null
+++ b/gcc/config/i386/t-gthr-win32
@@ -0,0 +1,2 @@
+# We hide calls to w32api needed for w32 thread support here:
+LIB2FUNCS_EXTRA = $(srcdir)/config/i386/gthr-win32.c
diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
new file mode 100644
index 000000000..1c658a149
--- /dev/null
+++ b/gcc/config/i386/t-i386
@@ -0,0 +1,41 @@
+# Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h \
+  $(REAL_H) insn-config.h conditions.h output.h insn-codes.h \
+  $(INSN_ATTR_H) $(FLAGS_H) $(C_COMMON_H) except.h $(FUNCTION_H) \
+  $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \
+  $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \
+  $(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \
+  i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H)
+
+i386-c.o: $(srcdir)/config/i386/i386-c.c \
+  $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \
+  $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/i386-c.c
+
+
+i386-builtin-types.inc: s-i386-bt ; @true
+s-i386-bt: $(srcdir)/config/i386/i386-builtin-types.awk \
+  $(srcdir)/config/i386/i386-builtin-types.def
+	$(AWK) -f $^ > tmp-bt.inc
+	$(SHELL) $(srcdir)/../move-if-change tmp-bt.inc i386-builtin-types.inc
+	$(STAMP) $@
diff --git a/gcc/config/i386/t-i386elf b/gcc/config/i386/t-i386elf
new file mode 100644
index 000000000..9560d9055
--- /dev/null
+++ b/gcc/config/i386/t-i386elf
@@ -0,0 +1,4 @@
+# For svr4 we build crtbegin.o and crtend.o which serve to add begin and
+# end labels to the .ctors and .dtors section when we link using gcc.
+
+EXTRA_PARTS=crtbegin.o crtend.o
diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix
new file mode 100644
index 000000000..e7b016f1e
--- /dev/null
+++ b/gcc/config/i386/t-interix
@@ -0,0 +1,8 @@
+LIB1ASMSRC = i386/cygwin.asm
+LIB1ASMFUNCS = _chkstk _chkstk_ms
+
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+  $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/winnt.c
diff --git a/gcc/config/i386/t-kfreebsd b/gcc/config/i386/t-kfreebsd
new file mode 100644
index 000000000..b4310df8a
--- /dev/null
+++ b/gcc/config/i386/t-kfreebsd
@@ -0,0 +1,5 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,i386-kfreebsd-gnu)
+
+# MULTILIB_OSDIRNAMES are set in t-linux64.
+KFREEBSD_OS = $(filter kfreebsd%, $(word 3, $(subst -, ,$(target))))
+MULTILIB_OSDIRNAMES := $(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES))
diff --git a/gcc/config/i386/t-linux b/gcc/config/i386/t-linux
new file mode 100644
index 000000000..76e3f64f5
--- /dev/null
+++ b/gcc/config/i386/t-linux
@@ -0,0 +1,9 @@
+# On 64bit we do not need any exports for glibc for 64-bit libgcc_s.
+# Need to support TImode for x86.  Override the settings from
+# t-slibgcc-elf-ver and t-linux
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \
+		 $(srcdir)/config/i386/libgcc-glibc.ver
+
+ifneq (,$(findstring -linux,$(target)))
+MULTIARCH_DIRNAME = $(call if_multiarch,i386-linux-gnu)
+endif
diff --git a/gcc/config/i386/t-linux64 b/gcc/config/i386/t-linux64
new file mode 100644
index 000000000..057744b4b
--- /dev/null
+++ b/gcc/config/i386/t-linux64
@@ -0,0 +1,36 @@
+# Copyright (C) 2002, 2005, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32 
+MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:x86_64-linux-gnu) \
+	$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:i386-linux-gnu)
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \
+		     crtbeginT.o crtprec32.o crtprec64.o crtprec80.o \
+		     crtfastmath.o
diff --git a/gcc/config/i386/t-mingw-w32 b/gcc/config/i386/t-mingw-w32
new file mode 100644
index 000000000..a14218016
--- /dev/null
+++ b/gcc/config/i386/t-mingw-w32
@@ -0,0 +1,12 @@
+# Match SYSTEM_INCLUDE_DIR
+NATIVE_SYSTEM_HEADER_DIR = /mingw/include
+
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64 ../lib
+
+# MinGW-specific parts of LIB_SPEC
+SHLIB_LC = -lmingwthrd -lmingw32 -lmingwex -lmoldname -lmsvcrt -ladvapi32 -lshell32 -luser32 -lkernel32
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/i386/t-mingw-w64 b/gcc/config/i386/t-mingw-w64
new file mode 100644
index 000000000..dbe2d00a2
--- /dev/null
+++ b/gcc/config/i386/t-mingw-w64
@@ -0,0 +1,12 @@
+# Match SYSTEM_INCLUDE_DIR
+NATIVE_SYSTEM_HEADER_DIR = /mingw/include
+
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib ../lib32
+
+# MinGW-specific parts of LIB_SPEC
+SHLIB_LC = -lmingwthrd -lmingw32 -lmingwex -lmoldname -lmsvcrt -ladvapi32 -lshell32 -luser32 -lkernel32
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/i386/t-mingw32 b/gcc/config/i386/t-mingw32
new file mode 100644
index 000000000..a8235242a
--- /dev/null
+++ b/gcc/config/i386/t-mingw32
@@ -0,0 +1,5 @@
+# Match SYSTEM_INCLUDE_DIR
+NATIVE_SYSTEM_HEADER_DIR = /mingw/include
+
+# MinGW-specific parts of LIB_SPEC
+SHLIB_LC = -lmingwthrd -lmingw32 -lmingwex -lmoldname -lmsvcrt -ladvapi32 -lshell32 -luser32 -lkernel32
diff --git a/gcc/config/i386/t-netware b/gcc/config/i386/t-netware
new file mode 100644
index 000000000..405c98f6a
--- /dev/null
+++ b/gcc/config/i386/t-netware
@@ -0,0 +1,10 @@
+TARGET_LIBGCC2_CFLAGS = -mpreferred-stack-boundary=2 -fomit-frame-pointer
+
+netware.o: $(srcdir)/config/i386/netware.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/netware.c
+
+# We don't need some of GCC's own include files.
+USER_H = $(srcdir)/ginclude/stdarg.h \
+         $(srcdir)/ginclude/varargs.h \
+    $(EXTRA_HEADERS) $(LANG_EXTRA_HEADERS)
diff --git a/gcc/config/i386/t-nto b/gcc/config/i386/t-nto
new file mode 100644
index 000000000..b80ff8029
--- /dev/null
+++ b/gcc/config/i386/t-nto
@@ -0,0 +1,4 @@
+CRTSTUFF_T_CFLAGS = -fno-omit-frame-pointer -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC -fexceptions
+
+EXTRA_PARTS = crtbegin.o
diff --git a/gcc/config/i386/t-nwld b/gcc/config/i386/t-nwld
new file mode 100644
index 000000000..e77279116
--- /dev/null
+++ b/gcc/config/i386/t-nwld
@@ -0,0 +1,50 @@
+# Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+CRTSTUFF_T_CFLAGS = -mpreferred-stack-boundary=2
+CRT0STUFF_T_CFLAGS = -mpreferred-stack-boundary=2 $(INCLUDES)
+# this is a slight misuse (it's not an assembler file)
+CRT0_S = $(srcdir)/config/i386/netware-crt0.c
+MCRT0_S = $(srcdir)/config/i386/netware-crt0.c
+
+$(T)libgcc.def: $(srcdir)/config/i386/t-nwld
+	echo "module libgcc_s" >$@
+
+$(T)libc.def: $(srcdir)/config/i386/t-nwld
+	echo "module libc" >$@
+
+$(T)libcpre.def: $(srcdir)/config/i386/t-nwld
+	echo "start _LibCPrelude" >$@
+	echo "exit _LibCPostlude" >>$@
+	echo "check _LibCCheckUnload" >>$@
+
+$(T)posixpre.def: $(srcdir)/config/i386/t-nwld
+	echo "start POSIX_Start" >$@
+	echo "exit POSIX_Stop" >>$@
+	echo "check POSIX_CheckUnload" >>$@
+
+nwld.o: $(srcdir)/config/i386/nwld.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/nwld.c
+
+
+s-crt0: $(srcdir)/unwind-dw2-fde.h
+
+# To keep DRIVER_DEFINES correct.
+SHLIB_LINK = dummy
diff --git a/gcc/config/i386/t-openbsd b/gcc/config/i386/t-openbsd
new file mode 100644
index 000000000..183046340
--- /dev/null
+++ b/gcc/config/i386/t-openbsd
@@ -0,0 +1,6 @@
+# gdb gets confused if pic code is linked with non pic
+# We cope by building variants of libgcc.
+MULTILIB_OPTIONS = fpic
+MULTILIB_MATCHES=fpic=fPIC
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/i386/t-pmm_malloc b/gcc/config/i386/t-pmm_malloc
new file mode 100644
index 000000000..109009fbf
--- /dev/null
+++ b/gcc/config/i386/t-pmm_malloc
@@ -0,0 +1,6 @@
+# Install pmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/pmm_malloc.h
+	rm -f $@
+	cat $^ > $@
diff --git a/gcc/config/i386/t-rtems-i386 b/gcc/config/i386/t-rtems-i386
new file mode 100644
index 000000000..47dfc7e11
--- /dev/null
+++ b/gcc/config/i386/t-rtems-i386
@@ -0,0 +1,69 @@
+# Copyright (C) 1999, 2001, 2002, 2005, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+#
+# This file was based on t-sol2 - x68 Solaris implementation. Actually,
+# the source code to create crti.o anf crtn.o are exactly the same 
+# as the ones for Solaris. Later, we might want to have a RTEMS's 
+# version of these files.
+#
+
+$(T)crti.o: $(srcdir)/config/i386/sol2-ci.asm $(GCC_PASSES)
+	sed -e '/^!/d' <$(srcdir)/config/i386/sol2-ci.asm >crti.s
+	$(GCC_FOR_TARGET) -c -o $(T)crti.o crti.s
+$(T)crtn.o: $(srcdir)/config/i386/sol2-cn.asm $(GCC_PASSES)
+	sed -e '/^!/d' <$(srcdir)/config/i386/sol2-cn.asm >crtn.s
+	$(GCC_FOR_TARGET) -c -o $(T)crtn.o crtn.s
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+LIB2FUNCS_EXTRA = xp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+	echo '#endif'           >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+	echo '#endif'           >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+xp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define EXTENDED_FLOAT_STUBS' > xp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> xp-bit.c
+
+MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro \
+msoft-float
+MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float
+MULTILIB_MATCHES = msoft-float=mno-m80387
+MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?mathlon
+MULTILIB_EXCEPTIONS = \
+mtune=pentium/*msoft-float* \
+mtune=pentiumpro/*msoft-float*
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/i386/t-sjlj-eh b/gcc/config/i386/t-sjlj-eh
new file mode 100644
index 000000000..c9085f432
--- /dev/null
+++ b/gcc/config/i386/t-sjlj-eh
@@ -0,0 +1,3 @@
+
+# We are using SjLj EH.
+EH_MODEL = sjlj
diff --git a/gcc/config/i386/t-sol2-10 b/gcc/config/i386/t-sol2-10
new file mode 100644
index 000000000..95eabf63d
--- /dev/null
+++ b/gcc/config/i386/t-sol2-10
@@ -0,0 +1,29 @@
+# Copyright (C) 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = m32/m64
+MULTILIB_DIRNAMES = 32 amd64
+MULTILIB_OSDIRNAMES = . amd64
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# GCC contains i386 assembler sources for some of the startfiles
+# which aren't appropriate for amd64.  Just use the installed
+# versions of: crt1.o crti.o crtn.o gcrt1.o
+EXTRA_MULTILIB_PARTS=gmon.o crtbegin.o crtend.o
diff --git a/gcc/config/i386/t-svr3dbx b/gcc/config/i386/t-svr3dbx
new file mode 100644
index 000000000..517113791
--- /dev/null
+++ b/gcc/config/i386/t-svr3dbx
@@ -0,0 +1,7 @@
+# gas 1.38.1 supporting dbx-in-coff requires a link script.
+
+svr3.ifile: $(srcdir)/config/i386/svr3.ifile
+	rm -f svr3.ifile; cp $(srcdir)/config/i386/svr3.ifile .
+
+svr3z.ifile: $(srcdir)/config/i386/svr3z.ifile
+	rm -f svr3z.ifile; cp $(srcdir)/config/i386/svr3z.ifile .
diff --git a/gcc/config/i386/t-vxworks b/gcc/config/i386/t-vxworks
new file mode 100644
index 000000000..c440b1f90
--- /dev/null
+++ b/gcc/config/i386/t-vxworks
@@ -0,0 +1,8 @@
+# Multilibs for VxWorks.
+
+# Build multilibs for normal, -mrtp, and -mrtp -fPIC.
+MULTILIB_OPTIONS = mrtp fPIC
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC
+
diff --git a/gcc/config/i386/t-vxworksae b/gcc/config/i386/t-vxworksae
new file mode 100644
index 000000000..0cea2bbf3
--- /dev/null
+++ b/gcc/config/i386/t-vxworksae
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks AE.
+
+MULTILIB_OPTIONS = mvthreads
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS = 
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
new file mode 100644
index 000000000..8d2431d41
--- /dev/null
+++ b/gcc/config/i386/tbmintrin.h
@@ -0,0 +1,191 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __TBM__
+# error "TBM instruction set not enabled"
+#endif /* __TBM__ */
+
+#ifndef _TBMINTRIN_H_INCLUDED
+#define _TBMINTRIN_H_INCLUDED
+
+#ifdef __OPTIMIZE__
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextri_u32 (unsigned int __X, const unsigned int __I)
+{
+	return __builtin_ia32_bextri_u32 (__X, __I);
+}
+#else
+#define __bextri_u32(X, I)                                           \
+        ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \
+	                                          (unsigned int)(I)))
+#endif /*__OPTIMIZE__ */
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcfill_u32 (unsigned int __X)
+{
+	unsigned int tmp = (__X) & ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blci_u32 (unsigned int __X)
+{
+	unsigned int tmp = (__X) | (~((__X) + 1));
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcic_u32 (unsigned int __X)
+{
+	unsigned int tmp = (~(__X)) & ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcmsk_u32 (unsigned int __X)
+{
+	unsigned int tmp = (__X) ^ ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcs_u32 (unsigned int __X)
+{
+	unsigned int tmp = (__X) | ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsfill_u32 (unsigned int __X)
+{
+	unsigned int tmp = (__X) | ((__X) - 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsic_u32 (unsigned int __X)
+{
+	unsigned int tmp = (~(__X)) | ((__X) - 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__t1mskc_u32 (unsigned int __X)
+{
+	unsigned int tmp = (~(__X)) | ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzmsk_u32 (unsigned int __X)
+{
+	unsigned int tmp = (~(__X)) & ((__X) - 1);
+	return tmp;
+}
+
+
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__bextri_u64 (unsigned long long __X, const unsigned int __I)
+{
+	return __builtin_ia32_bextri_u64 (__X, __I);
+}
+#else
+#define __bextri_u64(X, I)                                                       \
+        ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long long)(X), \
+	                                                (unsigned long long)(I)))
+#endif /*__OPTIMIZE__ */
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcfill_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (__X) & ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blci_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (__X) | (~((__X) + 1));
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcic_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (~(__X)) & ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcmsk_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (__X) ^ ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blcs_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (__X) | ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsfill_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (__X) | ((__X) - 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__blsic_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (~(__X)) | ((__X) - 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__t1mskc_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (~(__X)) | ((__X) + 1);
+	return tmp;
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__tzmsk_u64 (unsigned long long __X)
+{
+	unsigned long long tmp = (~(__X)) & ((__X) - 1);
+	return tmp;
+}
+
+
+#endif /* __x86_64__  */
+#endif /* _TBMINTRIN_H_INCLUDED */
+
diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h
new file mode 100644
index 000000000..9835669ca
--- /dev/null
+++ b/gcc/config/i386/tmmintrin.h
@@ -0,0 +1,244 @@
+/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.1.  */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+#ifndef __SSSE3__
+# error "SSSE3 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+  return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
+{
+  return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
+					      (__v2di)__Y, __N * 8);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
+{
+  return (__m64) __builtin_ia32_palignr ((__v1di)__X,
+					 (__v1di)__Y, __N * 8);
+}
+#else
+#define _mm_alignr_epi8(X, Y, N)					\
+  ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X),		\
+					(__v2di)(__m128i)(Y),		\
+					(int)(N) * 8))
+#define _mm_alignr_pi8(X, Y, N)						\
+  ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X),			\
+				   (__v1di)(__m64)(Y),			\
+				   (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi8 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi16 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi32 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi8 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi16 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi32 (__m64 __X)
+{
+  return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+#endif /* __SSSE3__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/unix.h b/gcc/config/i386/unix.h
new file mode 100644
index 000000000..abd665844
--- /dev/null
+++ b/gcc/config/i386/unix.h
@@ -0,0 +1,81 @@
+/* Definitions for Unix assembler syntax for the Intel 80386.
+   Copyright (C) 1988, 1994, 1999, 2000, 2001, 2002, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file defines the aspects of assembler syntax
+   that are the same for all the i386 Unix systems
+   (though they may differ in non-Unix systems).  */
+
+/* Define macro used to output shift-double opcodes when the shift
+   count is in %cl.  Some assemblers require %cl as an argument;
+   some don't.  This macro controls what to do: by default, don't
+   print %cl.  */
+#define SHIFT_DOUBLE_OMITS_COUNT 1
+
+/* Define the syntax of pseudo-ops, labels and comments.  */
+
+/* String containing the assembler's comment-starter.
+   Note the trailing space is necessary in case the character
+   that immediately follows the comment is '*'.  If this happens
+   and the space is not there the assembler will interpret this
+   as the start of a C-like slash-star comment and complain when
+   there is no terminator.  */
+
+#define ASM_COMMENT_START "/ "
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "/APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "/NO_APP\n"
+
+/* Output before read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable (initialized) data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* Output before writable (uninitialized) data.  */
+
+#define BSS_SECTION_ASM_OP "\t.bss"
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+   and returns float values in the 387.  */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
+
+/* By default, 64-bit mode uses 128-bit long double.  */
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT \
+	MASK_128BIT_LONG_DOUBLE
diff --git a/gcc/config/i386/vx-common.h b/gcc/config/i386/vx-common.h
new file mode 100644
index 000000000..c5ec4ed79
--- /dev/null
+++ b/gcc/config/i386/vx-common.h
@@ -0,0 +1,33 @@
+/* IA32 VxWorks and VxWorks AE target definitions.
+   Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* VxWorks uses the same ABI as Solaris 2, so use i386/sol2.h version.  */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+	(MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_VECT8_RETURNS)
+
+/* Provide our target specific DBX_REGISTER_NUMBER.  VxWorks relies on
+   the SVR4 numbering.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n)  svr4_dbx_register_map[n]
diff --git a/gcc/config/i386/vxworks.h b/gcc/config/i386/vxworks.h
new file mode 100644
index 000000000..09861e493
--- /dev/null
+++ b/gcc/config/i386/vxworks.h
@@ -0,0 +1,76 @@
+/* IA32 VxWorks target definitions for GNU compiler.
+   Copyright (C) 2003, 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+   Updated by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (80586, VxWorks syntax)");
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      VXWORKS_OS_CPP_BUILTINS ();			\
+      if (TARGET_386)					\
+        builtin_define ("CPU=I80386");			\
+      else if (TARGET_486)				\
+        builtin_define ("CPU=I80486");			\
+      else if (TARGET_PENTIUM)				\
+        {						\
+          builtin_define ("CPU=PENTIUM");		\
+          builtin_define ("CPU_VARIANT=PENTIUM");	\
+        }						\
+      else if (TARGET_PENTIUMPRO)			\
+        {						\
+          builtin_define ("CPU=PENTIUM2");		\
+          builtin_define ("CPU_VARIANT=PENTIUMPRO");	\
+        }						\
+      else if (TARGET_PENTIUM4)				\
+        {						\
+          builtin_define ("CPU=PENTIUM4");		\
+          builtin_define ("CPU_VARIANT=PENTIUM4");	\
+        }						\
+    }							\
+  while (0)
+
+#undef  CPP_SPEC
+#define CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+#undef  LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+#undef  LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+
+#undef  SUBTARGET_SWITCHES
+#define SUBTARGET_SWITCHES EXTRA_SUBTARGET_SWITCHES
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
+
+/* We cannot use PC-relative accesses for VxWorks PIC because there is no
+   fixed gap between segments.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
diff --git a/gcc/config/i386/vxworksae.h b/gcc/config/i386/vxworksae.h
new file mode 100644
index 000000000..b4c9fe4eb
--- /dev/null
+++ b/gcc/config/i386/vxworksae.h
@@ -0,0 +1,35 @@
+/* IA32 VxWorks AE target definitions for GNU compiler.
+   Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* On VxWorks AE, we only want SIMNT.  */
+#undef VXWORKS_CPU_DEFINE
+#define VXWORKS_CPU_DEFINE()			\
+  do						\
+    builtin_define ("CPU=SIMNT");		\
+  while (0)
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
diff --git a/gcc/config/i386/w32-unwind.h b/gcc/config/i386/w32-unwind.h
new file mode 100644
index 000000000..449e9a9c5
--- /dev/null
+++ b/gcc/config/i386/w32-unwind.h
@@ -0,0 +1,204 @@
+/* Definitions for Dwarf2 EH unwind support for Windows32 targets
+   Copyright (C) 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Pascal Obry  <obry@adacore.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This file implements the md_fallback_frame_state_for routine for
+   Windows, triggered when the GCC table based unwinding process hits a
+   frame for which no unwind info has been registered. This typically
+   occurs when raising an exception from a signal handler, because the
+   handler is actually called from the OS kernel.
+
+   The basic idea is to detect that we are indeed trying to unwind past a
+   signal handler and to fill out the GCC internal unwinding structures for
+   the OS kernel frame as if it had been directly called from the
+   interrupted context.
+
+   This is all assuming that the code to set the handler asked the kernel
+   to pass a pointer to such context information.
+
+   There is three main parts.
+
+   1) The first thing to do is to check if we are in a signal context. If
+      not we can just return as there is nothing to do. We are probably on
+      some foreign code for which no unwind frame can be found. If this is
+      a call from the Windows signal handler, then:
+
+   2) We must get the signal context information. 
+
+      * With the standard exception filter:
+
+      This is on Windows pointed to by an EXCEPTION_POINTERS. We know that
+      the signal handle will call an UnhandledExceptionFilter with this
+      parameter. The spec for this routine is:
+
+         LONG WINAPI UnhandledExceptionFilter(struct _EXCEPTION_POINTERS*);
+
+      So the pointer to struct _EXCEPTION_POINTERS must be somewhere on the
+      stack.
+
+      This was found experimentally to always be at offset 0 of the context
+      frame in all cases handled by this implementation.
+
+      * With the SEH exception handler:
+
+      In this case the signal context is directly on the stack as the SEH
+      exception handler has the following prototype:
+
+         DWORD
+         SEH_error_handler (PEXCEPTION_RECORD ExceptionRecord,
+                            PVOID EstablisherFrame,
+                            PCONTEXT ContextRecord,
+                            PVOID DispatcherContext)
+
+      This was found experimentally to always be at offset 56 of the
+      context frame in all cases handled by this implementation.
+
+   3) When we have the signal context we just have to save some registers
+      and set the return address based on the program counter (Eip).
+
+   Note that this implementation follows closely the same principles as the
+   GNU/Linux and OSF ones.  */
+
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+/* Patterns found experimentally to be on a Windows signal handler  */
+
+/* In a standard exception filter  */
+
+#define SIG_PAT1 \
+      (pc_[-2] == 0xff && pc_[-1] == 0xd0     /* call %eax           */ \
+      && pc_[0] == 0x83 && pc_[1] == 0xf8)    /* cmp 0xdepl,%eax     */
+
+#define SIG_PAT2 \
+        (pc_[-5] == 0xe8 && pc_[-4] == 0x68   /* call (depl16)       */ \
+         && pc_[0] == 0xc3)                   /* ret                 */
+
+/* In a Win32 SEH handler  */
+
+#define SIG_SEH1 \
+        (pc_[-5] == 0xe8                      /* call addr           */ \
+         && pc_[0] == 0x83 && pc_[1] == 0xc4  /* add 0xval,%esp      */ \
+         && pc_[3] == 0xb8)                   /* mov 0xval,%eax      */
+
+#define SIG_SEH2 \
+        (pc_[-5] == 0x8b && pc_[-4] == 0x4d   /* mov depl(%ebp),%ecx */ \
+         && pc_[0] == 0x64 && pc_[1] == 0x8b) /* mov %fs:(0),<reg>   */ \
+
+/* In the GCC alloca (stack probing)  */
+
+#define SIG_ALLOCA \
+          (pc_[-1] == 0x83                    /* orl $0x0,(%ecx)     */ \
+	   && pc_[0] == 0x9 && pc_[1] == 0                              \
+	   && pc_[2] == 0x2d && pc_[3] == 0   /* subl $0x1000,%eax   */ \
+	   && pc_[4] == 0x10 && pc_[5] == 0)
+
+
+#define MD_FALLBACK_FRAME_STATE_FOR i386_w32_fallback_frame_state
+
+static _Unwind_Reason_Code
+i386_w32_fallback_frame_state (struct _Unwind_Context *context, 
+			       _Unwind_FrameState *fs)
+
+{
+  void * ctx_ra_  = (void *)(context->ra);  /* return address */
+  void * ctx_cfa_ = (void *)(context->cfa); /* context frame address */
+  unsigned char * pc_ = (unsigned char *) ctx_ra_;
+
+  /* In the test below we look for two specific patterns found
+     experimentally to be in the Windows signal handler.  */
+  if (SIG_PAT1 || SIG_PAT2 || SIG_SEH1 || SIG_SEH2)
+    {
+      PEXCEPTION_POINTERS weinfo_;
+      PCONTEXT proc_ctx_;
+      long new_cfa_;
+
+      if (SIG_SEH1) 
+	proc_ctx_ = (PCONTEXT) (*(int*)(ctx_cfa_ + 56));
+      else if (SIG_SEH2)
+	proc_ctx_ = (PCONTEXT) (*(int*)(ctx_cfa_ + 8));
+      else
+	{
+	  weinfo_ = (PEXCEPTION_POINTERS) (*(int*)ctx_cfa_);
+	  proc_ctx_ = weinfo_->ContextRecord;
+	}
+
+      /* The new context frame address is the stack pointer.  */
+      new_cfa_ = proc_ctx_->Esp;
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = __builtin_dwarf_sp_column();
+      fs->regs.cfa_offset = new_cfa_ - (long) ctx_cfa_;
+
+      /* Restore registers.  */
+      fs->regs.reg[0].how = REG_SAVED_OFFSET;
+      fs->regs.reg[0].loc.offset = (long)&proc_ctx_->Eax - new_cfa_;
+      fs->regs.reg[3].how = REG_SAVED_OFFSET;
+      fs->regs.reg[3].loc.offset = (long)&proc_ctx_->Ebx - new_cfa_;
+      fs->regs.reg[1].how = REG_SAVED_OFFSET;
+      fs->regs.reg[1].loc.offset = (long)&proc_ctx_->Ecx - new_cfa_;
+      fs->regs.reg[2].how = REG_SAVED_OFFSET;
+      fs->regs.reg[2].loc.offset = (long)&proc_ctx_->Edx - new_cfa_;
+      fs->regs.reg[6].how = REG_SAVED_OFFSET;
+      fs->regs.reg[6].loc.offset = (long)&proc_ctx_->Esi - new_cfa_;
+      fs->regs.reg[7].how = REG_SAVED_OFFSET;
+      fs->regs.reg[7].loc.offset = (long)&proc_ctx_->Edi - new_cfa_;
+      fs->regs.reg[5].how = REG_SAVED_OFFSET;
+      fs->regs.reg[5].loc.offset = (long)&proc_ctx_->Ebp - new_cfa_;
+      fs->regs.reg[8].how = REG_SAVED_OFFSET;
+      fs->regs.reg[8].loc.offset = (long)&proc_ctx_->Eip - new_cfa_;
+      fs->retaddr_column = 8;
+      fs->signal_frame = 1;
+
+      return _URC_NO_REASON;
+    }
+
+  /* Unwinding through _alloca, propagating from a trap triggered by
+     one of it's probes prior to the real SP adjustment. The only
+     operations of interest performed is "pushl %ecx", followed by
+     ecx clobbering.  */
+  else if (SIG_ALLOCA) 
+    {
+      /* Only one push between entry in _alloca and the probe trap.  */ 
+      long new_cfa_ = (long) ctx_cfa_ + 4;
+
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = __builtin_dwarf_sp_column();
+      fs->regs.cfa_offset = new_cfa_ - (long) ctx_cfa_;
+
+      /* The saved value of %ecx is at CFA - 4 */
+      fs->regs.reg[1].how = REG_SAVED_OFFSET;
+      fs->regs.reg[1].loc.offset = -4;
+
+      /* and what is stored at the CFA is the return address.  */
+      fs->retaddr_column = 8;
+      fs->regs.reg[8].how = REG_SAVED_OFFSET;
+      fs->regs.reg[8].loc.offset = 0;
+      fs->signal_frame = 1;
+
+      return _URC_NO_REASON;
+    }
+  else
+    return _URC_END_OF_STACK;
+}
diff --git a/gcc/config/i386/winnt-cxx.c b/gcc/config/i386/winnt-cxx.c
new file mode 100644
index 000000000..0c47e3a8b
--- /dev/null
+++ b/gcc/config/i386/winnt-cxx.c
@@ -0,0 +1,175 @@
+/* Target support for C++ classes on Windows.
+   Contributed by Danny Smith (dannysmith@users.sourceforge.net)
+   Copyright (C) 2005, 2007, 2009, 2010  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "cp/cp-tree.h" /* This is why we're a separate module.  */
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "hashtab.h"
+
+bool
+i386_pe_type_dllimport_p (tree decl)
+{
+  gcc_assert (TREE_CODE (decl) == VAR_DECL 
+	      || TREE_CODE (decl) == FUNCTION_DECL);
+
+  if (TARGET_NOP_FUN_DLLIMPORT && TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  /* We ignore the dllimport attribute for inline member functions.
+     This differs from MSVC behavior which treats it like GNUC
+     'extern inline' extension.  Also ignore for template
+     instantiations with linkonce semantics and artificial methods.  */
+  if (TREE_CODE (decl) ==  FUNCTION_DECL
+      && (DECL_DECLARED_INLINE_P (decl)
+	  || DECL_TEMPLATE_INSTANTIATION (decl)
+	  || DECL_ARTIFICIAL (decl)))
+    return false;
+  
+  /* Overrides of the class dllimport decls by out-of-class definitions are 
+     handled by tree.c:merge_dllimport_decl_attributes.   */
+  return true;
+}
+
+bool
+i386_pe_type_dllexport_p (tree decl)
+{
+  gcc_assert (TREE_CODE (decl) == VAR_DECL 
+              || TREE_CODE (decl) == FUNCTION_DECL);
+
+  /* Avoid exporting compiler-generated default dtors and copy ctors.
+     The only artificial methods that need to be exported are virtual
+     and non-virtual thunks.  */
+  if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE
+      && DECL_ARTIFICIAL (decl) && !DECL_THUNK_P (decl))
+    return false;
+  return true;
+}
+
+static inline void maybe_add_dllimport (tree decl) 
+{
+  if (i386_pe_type_dllimport_p (decl))
+    DECL_DLLIMPORT_P (decl) = 1;
+}
+
+static inline void maybe_add_dllexport (tree decl) 
+{
+  if (i386_pe_type_dllexport_p (decl))
+    {   
+      tree decl_attrs = DECL_ATTRIBUTES (decl);
+      if (lookup_attribute ("dllexport", decl_attrs) != NULL_TREE)
+	/* Already done.  */
+	return;
+      DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("dllexport"),
+					  NULL_TREE, decl_attrs);
+    }
+}
+
+void
+i386_pe_adjust_class_at_definition (tree t)
+{
+  tree member;
+
+  gcc_assert (CLASS_TYPE_P (t));
+ 
+ 
+  if (lookup_attribute ("dllexport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
+    {
+      tree tmv = TYPE_MAIN_VARIANT (t);
+
+      /* Make sure that we set dllexport attribute to typeinfo's
+	 base declaration, as otherwise it would fail to be exported as
+	 it isn't a class-member.  */
+      if (tmv != NULL_TREE
+	  && CLASSTYPE_TYPEINFO_VAR (tmv) != NULL_TREE)
+	{
+	  tree na, ti_decl = CLASSTYPE_TYPEINFO_VAR (tmv);
+	  na = tree_cons (get_identifier ("dllexport"), NULL_TREE,
+			  NULL_TREE);
+	  decl_attributes (&ti_decl, na, 0);
+	}
+
+      /* Check static VAR_DECL's.  */
+      for (member = TYPE_FIELDS (t); member; member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL)     
+	  maybe_add_dllexport (member);
+    
+      /* Check FUNCTION_DECL's.  */
+      for (member = TYPE_METHODS (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == FUNCTION_DECL)
+	  {
+	    tree thunk;
+	    maybe_add_dllexport (member);
+	  
+	    /* Also add the attribute to its thunks.  */
+	    for (thunk = DECL_THUNKS (member); thunk;
+		 thunk = TREE_CHAIN (thunk))
+	      maybe_add_dllexport (thunk);
+	}
+      /* Check vtables  */
+      for (member = CLASSTYPE_VTABLES (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL) 
+	  maybe_add_dllexport (member);
+    }
+
+  else if (lookup_attribute ("dllimport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
+    {
+      /* We don't actually add the attribute to the decl, just set the flag
+	 that signals that the address of this symbol is not a compile-time
+	 constant.   Any subsequent out-of-class declaration of members wil
+	 cause the DECL_DLLIMPORT_P flag to be unset.
+	 (See  tree.c: merge_dllimport_decl_attributes).
+	 That is just right since out-of class declarations can only be a
+	 definition.   */
+
+      /* Check static VAR_DECL's.  */
+      for (member = TYPE_FIELDS (t); member; member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL)     
+	  maybe_add_dllimport (member);
+    
+      /* Check FUNCTION_DECL's.  */
+      for (member = TYPE_METHODS (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == FUNCTION_DECL)
+	  {
+	    tree thunk;
+	    maybe_add_dllimport (member);
+	  
+	    /* Also add the attribute to its thunks.  */
+	    for (thunk = DECL_THUNKS (member); thunk;
+		 thunk = DECL_CHAIN (thunk))
+	      maybe_add_dllimport (thunk);
+	 }
+ 
+      /* Check vtables  */
+      for (member = CLASSTYPE_VTABLES (t); member;  member = DECL_CHAIN (member))
+	if (TREE_CODE (member) == VAR_DECL) 
+	  maybe_add_dllimport (member);
+
+      /* We leave typeinfo tables alone.  We can't mark TI objects as
+	dllimport, since the address of a secondary VTT may be needed
+	for static initialization of a primary VTT.  VTT's  of
+	dllimport'd classes should always be link-once COMDAT.  */ 
+    }
+}
diff --git a/gcc/config/i386/winnt-stubs.c b/gcc/config/i386/winnt-stubs.c
new file mode 100644
index 000000000..eb4f124bb
--- /dev/null
+++ b/gcc/config/i386/winnt-stubs.c
@@ -0,0 +1,52 @@
+/* Dummy subroutines for language-specific support on Windows.
+   Contributed by Danny Smith (dannysmith@users.sourceforge.net)
+   Copyright (C) 2005, 2007, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "hashtab.h"
+
+bool
+i386_pe_type_dllimport_p (tree decl ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+bool
+i386_pe_type_dllexport_p (tree decl ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+void
+i386_pe_adjust_class_at_definition (tree t ATTRIBUTE_UNUSED)
+{ }
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
new file mode 100644
index 000000000..169832fd3
--- /dev/null
+++ b/gcc/config/i386/winnt.c
@@ -0,0 +1,1134 @@
+/* Subroutines for insn-output.c for Windows NT.
+   Contributed by Douglas Rupp (drupp@cs.washington.edu)
+   Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "hashtab.h"
+#include "langhooks.h"
+#include "ggc.h"
+#include "target.h"
+#include "except.h"
+#include "lto-streamer.h"
+
+/* i386/PE specific attribute support.
+
+   i386/PE has two new attributes:
+   dllexport - for exporting a function/variable that will live in a dll
+   dllimport - for importing a function/variable from a dll
+
+   Microsoft allows multiple declspecs in one __declspec, separating
+   them with spaces.  We do NOT support this.  Instead, use __declspec
+   multiple times.
+*/
+
+/* Handle a "shared" attribute;
+   arguments as in struct attribute_spec.handler.  */
+tree
+ix86_handle_shared_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to variables",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "selectany" attribute;
+   arguments as in struct attribute_spec.handler.  */
+tree
+ix86_handle_selectany_attribute (tree *node, tree name,
+			         tree args ATTRIBUTE_UNUSED,
+			         int flags ATTRIBUTE_UNUSED,
+				 bool *no_add_attrs)
+{
+  /* The attribute applies only to objects that are initialized and have
+     external linkage.  However, we may not know about initialization
+     until the language frontend has processed the decl. We'll check for
+     initialization later in encode_section_info.  */
+  if (TREE_CODE (*node) != VAR_DECL || !TREE_PUBLIC (*node))
+    {	
+      error ("%qE attribute applies only to initialized variables"
+       	     " with external linkage", name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+
+/* Return the type that we should use to determine if DECL is
+   imported or exported.  */
+
+static tree
+associated_type (tree decl)
+{
+  return (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl))
+          ?  DECL_CONTEXT (decl) : NULL_TREE);
+}
+
+/* Return true if DECL should be a dllexport'd object.  */
+
+static bool
+i386_pe_determine_dllexport_p (tree decl)
+{
+  if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  /* Don't export local clones of dllexports.  */
+  if (!TREE_PUBLIC (decl))
+    return false;
+
+  if (lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)))
+    return true;
+
+  return false;
+}
+
+/* Return true if DECL should be a dllimport'd object.  */
+
+static bool
+i386_pe_determine_dllimport_p (tree decl)
+{
+  tree assoc;
+
+  if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  if (DECL_DLLIMPORT_P (decl))
+    return true;
+
+  /* The DECL_DLLIMPORT_P flag was set for decls in the class definition
+     by  targetm.cxx.adjust_class_at_definition.  Check again to emit
+     error message if the class attribute has been overridden by an
+     out-of-class definition of static data.  */
+  assoc = associated_type (decl);
+  if (assoc && lookup_attribute ("dllimport", TYPE_ATTRIBUTES (assoc))
+      && TREE_CODE (decl) == VAR_DECL
+      && TREE_STATIC (decl) && TREE_PUBLIC (decl)
+      && !DECL_EXTERNAL (decl)
+      /* vtable's are linkonce constants, so defining a vtable is not
+	 an error as long as we don't try to import it too.  */
+      && !DECL_VIRTUAL_P (decl))
+	error ("definition of static data member %q+D of "
+	       "dllimport%'d class", decl);
+
+  return false;
+}
+
+/* Handle the -mno-fun-dllimport target switch.  */
+
+bool
+i386_pe_valid_dllimport_attribute_p (const_tree decl)
+{
+   if (TARGET_NOP_FUN_DLLIMPORT && TREE_CODE (decl) == FUNCTION_DECL)
+     return false;
+   return true;
+}
+
+/* Return string which is the function name, identified by ID, modified
+   with a suffix consisting of an atsign (@) followed by the number of
+   bytes of arguments.  If ID is NULL use the DECL_NAME as base. If
+   FASTCALL is true, also add the FASTCALL_PREFIX.
+   Return NULL if no change required.  */
+
+static tree
+gen_stdcall_or_fastcall_suffix (tree decl, tree id, bool fastcall)
+{
+  HOST_WIDE_INT total = 0;
+  const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+  char *new_str, *p;
+  tree type = TREE_TYPE (decl);
+  tree arg;
+  function_args_iterator args_iter;
+
+  gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);  
+
+  if (prototype_p (type))
+    {
+      /* This attribute is ignored for variadic functions.  */ 
+      if (stdarg_p (type))
+	return NULL_TREE;
+
+      /* Quit if we hit an incomplete type.  Error is reported
+	 by convert_arguments in c-typeck.c or cp/typeck.c.  */
+      FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+	{
+	  HOST_WIDE_INT parm_size;
+	  HOST_WIDE_INT parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+	  if (! COMPLETE_TYPE_P (arg))
+	    break;
+
+	  parm_size = int_size_in_bytes (arg);
+	  if (parm_size < 0)
+	    break;
+
+	  /* Must round up to include padding.  This is done the same
+	     way as in store_one_arg.  */
+	  parm_size = ((parm_size + parm_boundary_bytes - 1)
+		       / parm_boundary_bytes * parm_boundary_bytes);
+	  total += parm_size;
+	}
+      }
+  /* Assume max of 8 base 10 digits in the suffix.  */
+  p = new_str = XALLOCAVEC (char, 1 + strlen (old_str) + 1 + 8 + 1);
+  if (fastcall)
+    *p++ = FASTCALL_PREFIX;
+  sprintf (p, "%s@" HOST_WIDE_INT_PRINT_DEC, old_str, total);
+
+  return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+   fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_pe_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree new_id = NULL_TREE;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    { 
+      tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+      if (lookup_attribute ("stdcall", type_attributes))
+	new_id = gen_stdcall_or_fastcall_suffix (decl, id, false);
+      else if (lookup_attribute ("fastcall", type_attributes))
+	new_id = gen_stdcall_or_fastcall_suffix (decl, id, true);
+    }
+
+  return new_id;
+}
+
+/* Emit an assembler directive to set symbol for DECL visibility to
+   the visibility type VIS, which must not be VISIBILITY_DEFAULT.
+   As for PE there is no hidden support in gas, we just warn for
+   user-specified visibility attributes.  */
+
+void
+i386_pe_assemble_visibility (tree decl,
+			     int vis ATTRIBUTE_UNUSED)
+{
+  if (!decl
+      || !lookup_attribute ("visibility", DECL_ATTRIBUTES (decl)))
+    return;
+  warning (OPT_Wattributes, "visibility attribute not supported "
+	   "in this configuration; ignored");
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+   in the language-independent default hook
+   langhooks,c:lhd_set_decl_assembler_name ()
+   and in cp/mangle,c:mangle_decl ().  */
+tree
+i386_pe_mangle_decl_assembler_name (tree decl, tree id)
+{
+  tree new_id = i386_pe_maybe_mangle_decl_assembler_name (decl, id);   
+
+  return (new_id ? new_id : id);
+}
+
+/* This hook behaves the same as varasm.c/assemble_name(), but
+   generates the name into memory rather than outputting it to
+   a file stream.  */
+
+tree
+i386_pe_mangle_assembler_name (const char *name ATTRIBUTE_UNUSED)
+{
+  const char *skipped = name + (*name == '*' ? 1 : 0);
+  const char *stripped = targetm.strip_name_encoding (skipped);
+  if (*name != '*' && *user_label_prefix && *stripped != FASTCALL_PREFIX)
+    stripped = ACONCAT ((user_label_prefix, stripped, NULL));
+  return get_identifier (stripped);
+}
+
+void
+i386_pe_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx symbol;
+  int flags;
+
+  /* Do this last, due to our frobbing of DECL_DLLIMPORT_P above.  */
+  default_encode_section_info (decl, rtl, first);
+
+  /* Careful not to prod global register variables.  */
+  if (!MEM_P (rtl))
+    return;
+
+  symbol = XEXP (rtl, 0);
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+  switch (TREE_CODE (decl))
+    {
+    case FUNCTION_DECL:
+      /* FIXME:  Imported stdcall names are not modified by the Ada frontend.
+	 Check and decorate the RTL name now.  */
+      if  (strcmp (lang_hooks.name, "GNU Ada") == 0)
+	{
+	  tree new_id;
+	  tree old_id = DECL_ASSEMBLER_NAME (decl);
+	  const char* asm_str = IDENTIFIER_POINTER (old_id);
+	  /* Do not change the identifier if a verbatim asmspec
+	     or if stdcall suffix already added. */
+	  if (!(*asm_str == '*' || strchr (asm_str, '@'))
+	      && (new_id = i386_pe_maybe_mangle_decl_assembler_name (decl,
+								     old_id)))
+	    XSTR (symbol, 0) = IDENTIFIER_POINTER (new_id);
+	}
+      break;
+
+    case VAR_DECL:
+      if (lookup_attribute ("selectany", DECL_ATTRIBUTES (decl)))
+	{
+	  if (DECL_INITIAL (decl)
+	      /* If an object is initialized with a ctor, the static
+		 initialization and destruction code for it is present in
+		 each unit defining the object.  The code that calls the
+		 ctor is protected by a link-once guard variable, so that
+		 the object still has link-once semantics,  */
+	      || TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (decl)))
+	    make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+	  else
+	    error ("%q+D:'selectany' attribute applies only to "
+		   "initialized objects", decl);
+	}
+      break;
+
+    default:
+      return;
+    }
+
+  /* Mark the decl so we can tell from the rtl whether the object is
+     dllexport'd or dllimport'd.  tree.c: merge_dllimport_decl_attributes
+     handles dllexport/dllimport override semantics.  */
+  flags = (SYMBOL_REF_FLAGS (symbol) &
+	   ~(SYMBOL_FLAG_DLLIMPORT | SYMBOL_FLAG_DLLEXPORT));
+  if (i386_pe_determine_dllexport_p (decl))
+    flags |= SYMBOL_FLAG_DLLEXPORT;
+  else if (i386_pe_determine_dllimport_p (decl))
+    flags |= SYMBOL_FLAG_DLLIMPORT;
+ 
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+bool
+i386_pe_binds_local_p (const_tree exp)
+{
+  /* PE does not do dynamic binding.  Indeed, the only kind of
+     non-local reference comes from a dllimport'd symbol.  */
+  if ((TREE_CODE (exp) == VAR_DECL || TREE_CODE (exp) == FUNCTION_DECL)
+      && DECL_DLLIMPORT_P (exp))
+    return false;
+
+  /* Or a weak one, now that they are supported.  */
+  if ((TREE_CODE (exp) == VAR_DECL || TREE_CODE (exp) == FUNCTION_DECL)
+      && DECL_WEAK (exp))
+    return false;
+
+  return true;
+}
+
+/* Also strip the fastcall prefix and stdcall suffix.  */
+
+const char *
+i386_pe_strip_name_encoding_full (const char *str)
+{
+  const char *p;
+  const char *name = default_strip_name_encoding (str);
+
+  /* Strip leading '@' on fastcall symbols.  */
+  if (*name == '@')
+    name++;
+
+  /* Strip trailing "@n".  */
+  p = strchr (name, '@');
+  if (p)
+    return ggc_alloc_string (name, p - name);
+
+  return name;
+}
+
+void
+i386_pe_unique_section (tree decl, int reloc)
+{
+  int len;
+  const char *name, *prefix;
+  char *string;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = i386_pe_strip_name_encoding_full (name);
+
+  /* The object is put in, for example, section .text$foo.
+     The linker will then ultimately place them in .text
+     (everything from the $ on is stripped). Don't put
+     read-only data in .rdata section to avoid a PE linker
+     bug when .rdata$* grouped sections are used in code
+     without a .rdata section.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    prefix = ".text$";
+  else if (decl_readonly_section (decl, reloc))
+    prefix = ".rdata$";
+  else
+    prefix = ".data$";
+  len = strlen (name) + strlen (prefix);
+  string = XALLOCAVEC (char, len + 1);
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+/* Select a set of attributes for section NAME based on the properties
+   of DECL and whether or not RELOC indicates that DECL's initializer
+   might contain runtime relocations.
+
+   We make the section read-only and executable for a function decl,
+   read-only for a const data decl, and writable for a non-const data decl.
+
+   If the section has already been defined, to not allow it to have
+   different attributes, as (1) this is ambiguous since we're not seeing
+   all the declarations up front and (2) some assemblers (e.g. SVR4)
+   do not recognize section redefinitions.  */
+/* ??? This differs from the "standard" PE implementation in that we
+   handle the SHARED variable attribute.  Should this be done for all
+   PE targets?  */
+
+#define SECTION_PE_SHARED	SECTION_MACH_DEP
+
+unsigned int
+i386_pe_section_type_flags (tree decl, const char *name, int reloc)
+{
+  static htab_t htab;
+  unsigned int flags;
+  unsigned int **slot;
+
+  /* The names we put in the hashtable will always be the unique
+     versions given to us by the stringtable, so we can just use
+     their addresses as the keys.  */
+  if (!htab)
+    htab = htab_create (31, htab_hash_pointer, htab_eq_pointer, NULL);
+
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL)
+    flags = SECTION_CODE;
+  else if (decl && decl_readonly_section (decl, reloc))
+    flags = 0;
+  else
+    {
+      flags = SECTION_WRITE;
+
+      if (decl && TREE_CODE (decl) == VAR_DECL
+	  && lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
+	flags |= SECTION_PE_SHARED;
+    }
+
+  if (decl && DECL_ONE_ONLY (decl))
+    flags |= SECTION_LINKONCE;
+
+  /* See if we already have an entry for this section.  */
+  slot = (unsigned int **) htab_find_slot (htab, name, INSERT);
+  if (!*slot)
+    {
+      *slot = (unsigned int *) xmalloc (sizeof (unsigned int));
+      **slot = flags;
+    }
+  else
+    {
+      if (decl && **slot != flags)
+	error ("%q+D causes a section type conflict", decl);
+    }
+
+  return flags;
+}
+
+void
+i386_pe_asm_named_section (const char *name, unsigned int flags, 
+			   tree decl)
+{
+  char flagchars[8], *f = flagchars;
+
+  if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0)
+    /* readonly data */
+    {
+      *f++ ='d';  /* This is necessary for older versions of gas.  */
+      *f++ ='r';
+    }
+  else	
+    {
+      if (flags & SECTION_CODE)
+        *f++ = 'x';
+      if (flags & SECTION_WRITE)
+        *f++ = 'w';
+      if (flags & SECTION_PE_SHARED)
+        *f++ = 's';
+    }
+
+  /* LTO sections need 1-byte alignment to avoid confusing the
+     zlib decompression algorithm with trailing zero pad bytes.  */
+  if (strncmp (name, LTO_SECTION_NAME_PREFIX,
+			strlen (LTO_SECTION_NAME_PREFIX)) == 0)
+    *f++ = '0';
+
+  *f = '\0';
+
+  fprintf (asm_out_file, "\t.section\t%s,\"%s\"\n", name, flagchars);
+
+  if (flags & SECTION_LINKONCE)
+    {
+      /* Functions may have been compiled at various levels of
+	 optimization so we can't use `same_size' here.
+	 Instead, have the linker pick one, without warning.
+	 If 'selectany' attribute has been specified,  MS compiler
+	 sets 'discard' characteristic, rather than telling linker
+	 to warn of size or content mismatch, so do the same.  */ 
+      bool discard = (flags & SECTION_CODE)
+		      || lookup_attribute ("selectany",
+					   DECL_ATTRIBUTES (decl));	 
+      fprintf (asm_out_file, "\t.linkonce %s\n",
+	       (discard  ? "discard" : "same_size"));
+    }
+}
+
+/* Beware, DECL may be NULL if compile_file() is emitting the LTO marker.  */
+
+void
+i386_pe_asm_output_aligned_decl_common (FILE *stream, tree decl,
+					const char *name, HOST_WIDE_INT size,
+					HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT rounded;
+
+  /* Compute as in assemble_noswitch_variable, since we don't have
+     support for aligned common on older binutils.  We must also
+     avoid emitting a common symbol of size zero, as this is the
+     overloaded representation that indicates an undefined external
+     symbol in the PE object file format.  */
+  rounded = size ? size : 1;
+  rounded += (BIGGEST_ALIGNMENT / BITS_PER_UNIT) - 1;
+  rounded = (rounded / (BIGGEST_ALIGNMENT / BITS_PER_UNIT)
+	     * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
+  
+  i386_pe_maybe_record_exported_symbol (decl, name, 1);
+
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  if (use_pe_aligned_common)
+    fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC ", %d\n",
+	   size ? size : (HOST_WIDE_INT) 1,
+	   exact_log2 (align) - exact_log2 (CHAR_BIT));
+  else
+    fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC "\t" ASM_COMMENT_START
+	   " " HOST_WIDE_INT_PRINT_DEC "\n", rounded, size);
+}
+
+/* The Microsoft linker requires that every function be marked as
+   DT_FCN.  When using gas on cygwin, we must emit appropriate .type
+   directives.  */
+
+#include "gsyms.h"
+
+/* Mark a function appropriately.  This should only be called for
+   functions for which we are not emitting COFF debugging information.
+   FILE is the assembler output file, NAME is the name of the
+   function, and PUB is nonzero if the function is globally
+   visible.  */
+
+void
+i386_pe_declare_function_type (FILE *file, const char *name, int pub)
+{
+  fprintf (file, "\t.def\t");
+  assemble_name (file, name);
+  fprintf (file, ";\t.scl\t%d;\t.type\t%d;\t.endef\n",
+	   pub ? (int) C_EXT : (int) C_STAT,
+	   (int) DT_FCN << N_BTSHFT);
+}
+
+/* Keep a list of external functions.  */
+
+struct GTY(()) extern_list
+{
+  struct extern_list *next;
+  tree decl;
+  const char *name;
+};
+
+static GTY(()) struct extern_list *extern_head;
+
+/* Assemble an external function reference.  We need to keep a list of
+   these, so that we can output the function types at the end of the
+   assembly.  We can't output the types now, because we might see a
+   definition of the function later on and emit debugging information
+   for it then.  */
+
+void
+i386_pe_record_external_function (tree decl, const char *name)
+{
+  struct extern_list *p;
+
+  p = ggc_alloc_extern_list ();
+  p->next = extern_head;
+  p->decl = decl;
+  p->name = name;
+  extern_head = p;
+}
+
+/* Keep a list of exported symbols.  */
+
+struct GTY(()) export_list
+{
+  struct export_list *next;
+  const char *name;
+  int is_data;		/* used to type tag exported symbols.  */
+};
+
+static GTY(()) struct export_list *export_head;
+
+/* Assemble an export symbol entry.  We need to keep a list of
+   these, so that we can output the export list at the end of the
+   assembly.  We used to output these export symbols in each function,
+   but that causes problems with GNU ld when the sections are
+   linkonce.  Beware, DECL may be NULL if compile_file() is emitting
+   the LTO marker.  */
+
+void
+i386_pe_maybe_record_exported_symbol (tree decl, const char *name, int is_data)
+{
+  rtx symbol;
+  struct export_list *p;
+
+  if (!decl)
+    return;
+
+  symbol = XEXP (DECL_RTL (decl), 0);
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+  if (!SYMBOL_REF_DLLEXPORT_P (symbol))
+    return;
+
+  gcc_assert (TREE_PUBLIC (decl));
+
+  p = ggc_alloc_export_list ();
+  p->next = export_head;
+  p->name = name;
+  p->is_data = is_data;
+  export_head = p;
+}
+
+#ifdef CXX_WRAP_SPEC_LIST
+
+/*  Hash table equality helper function.  */
+
+static int
+wrapper_strcmp (const void *x, const void *y)
+{
+  return !strcmp ((const char *) x, (const char *) y);
+}
+
+/* Search for a function named TARGET in the list of library wrappers
+   we are using, returning a pointer to it if found or NULL if not.
+   This function might be called on quite a few symbols, and we only
+   have the list of names of wrapped functions available to us as a
+   spec string, so first time round we lazily initialise a hash table
+   to make things quicker.  */
+
+static const char *
+i386_find_on_wrapper_list (const char *target)
+{
+  static char first_time = 1;
+  static htab_t wrappers;
+
+  if (first_time)
+    {
+      /* Beware that this is not a complicated parser, it assumes
+         that any sequence of non-whitespace beginning with an
+	 underscore is one of the wrapped symbols.  For now that's
+	 adequate to distinguish symbols from spec substitutions
+	 and command-line options.  */
+      static char wrapper_list_buffer[] = CXX_WRAP_SPEC_LIST;
+      char *bufptr;
+      /* Breaks up the char array into separated strings
+         strings and enter them into the hash table.  */
+      wrappers = htab_create_alloc (8, htab_hash_string, wrapper_strcmp,
+	0, xcalloc, free);
+      for (bufptr = wrapper_list_buffer; *bufptr; ++bufptr)
+	{
+	  char *found = NULL;
+	  if (ISSPACE (*bufptr))
+	    continue;
+	  if (*bufptr == '_')
+	    found = bufptr;
+	  while (*bufptr && !ISSPACE (*bufptr))
+	    ++bufptr;
+	  if (*bufptr)
+	    *bufptr = 0;
+	  if (found)
+	    *htab_find_slot (wrappers, found, INSERT) = found;
+	}
+      first_time = 0;
+    }
+
+  return (const char *) htab_find (wrappers, target);
+}
+
+#endif /* CXX_WRAP_SPEC_LIST */
+
+/* This is called at the end of assembly.  For each external function
+   which has not been defined, we output a declaration now.  We also
+   output the .drectve section.  */
+
+void
+i386_pe_file_end (void)
+{
+  struct extern_list *p;
+
+  for (p = extern_head; p != NULL; p = p->next)
+    {
+      tree decl;
+
+      decl = p->decl;
+
+      /* Positively ensure only one declaration for any given symbol.  */
+      if (! TREE_ASM_WRITTEN (decl)
+	  && TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+	{
+#ifdef CXX_WRAP_SPEC_LIST
+	  /* To ensure the DLL that provides the corresponding real
+	     functions is still loaded at runtime, we must reference
+	     the real function so that an (unused) import is created.  */
+	  const char *realsym = i386_find_on_wrapper_list (p->name);
+	  if (realsym)
+	    i386_pe_declare_function_type (asm_out_file,
+		concat ("__real_", realsym, NULL), TREE_PUBLIC (decl));
+#endif /* CXX_WRAP_SPEC_LIST */
+	  TREE_ASM_WRITTEN (decl) = 1;
+	  i386_pe_declare_function_type (asm_out_file, p->name,
+					 TREE_PUBLIC (decl));
+	}
+    }
+
+  if (export_head)
+    {
+      struct export_list *q;
+      drectve_section ();
+      for (q = export_head; q != NULL; q = q->next)
+	{
+	  fprintf (asm_out_file, "\t.ascii \" -export:\\\"%s\\\"%s\"\n",
+		   default_strip_name_encoding (q->name),
+		   (q->is_data ? ",data" : ""));
+	}
+    }
+}
+
+
+/* x64 Structured Exception Handling unwind info.  */
+
+struct seh_frame_state
+{
+  /* SEH records saves relative to the "current" stack pointer, whether
+     or not there's a frame pointer in place.  This tracks the current
+     stack pointer offset from the CFA.  */
+  HOST_WIDE_INT sp_offset;
+
+  /* The CFA is located at CFA_REG + CFA_OFFSET.  */
+  HOST_WIDE_INT cfa_offset;
+  rtx cfa_reg;
+};
+
+/* Set up data structures beginning output for SEH.  */
+
+void
+i386_pe_seh_init (FILE *f)
+{
+  struct seh_frame_state *seh;
+
+  if (!TARGET_SEH)
+    return;
+  if (cfun->is_thunk)
+    return;
+
+  /* We cannot support DRAP with SEH.  We turned off support for it by
+     re-defining MAX_STACK_ALIGNMENT when SEH is enabled.  */
+  gcc_assert (!stack_realign_drap);
+
+  seh = XCNEW (struct seh_frame_state);
+  cfun->machine->seh = seh;
+
+  seh->sp_offset = INCOMING_FRAME_SP_OFFSET;
+  seh->cfa_offset = INCOMING_FRAME_SP_OFFSET;
+  seh->cfa_reg = stack_pointer_rtx;
+
+  fputs ("\t.seh_proc\t", f);
+  assemble_name (f, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (cfun->decl)));
+  fputc ('\n', f);
+}
+
+void
+i386_pe_seh_end_prologue (FILE *f)
+{
+  struct seh_frame_state *seh;
+
+  if (!TARGET_SEH)
+    return;
+  if (cfun->is_thunk)
+    return;
+  seh = cfun->machine->seh;
+
+  /* Emit an assembler directive to set up the frame pointer.  Always do
+     this last.  The documentation talks about doing this "before" any
+     other code that uses offsets, but (experimentally) that's after we
+     emit the codes in reverse order (handled by the assembler).  */
+  if (seh->cfa_reg != stack_pointer_rtx)
+    {
+      HOST_WIDE_INT offset = seh->sp_offset - seh->cfa_offset;
+
+      gcc_assert ((offset & 15) == 0);
+      gcc_assert (IN_RANGE (offset, 0, 240));
+
+      fputs ("\t.seh_setframe\t", f);
+      print_reg (seh->cfa_reg, 0, f);
+      fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
+    }
+
+  XDELETE (seh);
+  cfun->machine->seh = NULL;
+
+  fputs ("\t.seh_endprologue\n", f);
+}
+
+static void
+i386_pe_seh_fini (FILE *f)
+{
+  if (!TARGET_SEH)
+    return;
+  if (cfun->is_thunk)
+    return;
+  fputs ("\t.seh_endproc\n", f);
+}
+
+/* Emit an assembler directive to save REG via a PUSH.  */
+
+static void
+seh_emit_push (FILE *f, struct seh_frame_state *seh, rtx reg)
+{
+  unsigned int regno = REGNO (reg);
+
+  gcc_checking_assert (GENERAL_REGNO_P (regno));
+
+  seh->sp_offset += UNITS_PER_WORD;
+  if (seh->cfa_reg == stack_pointer_rtx)
+    seh->cfa_offset += UNITS_PER_WORD;
+
+  fputs ("\t.seh_pushreg\t", f);
+  print_reg (reg, 0, f);
+  fputc ('\n', f);
+}
+
+/* Emit an assembler directive to save REG at CFA - CFA_OFFSET.  */
+
+static void
+seh_emit_save (FILE *f, struct seh_frame_state *seh,
+	       rtx reg, HOST_WIDE_INT cfa_offset)
+{
+  unsigned int regno = REGNO (reg);
+  HOST_WIDE_INT offset;
+
+  /* Negative save offsets are of course not supported, since that
+     would be a store below the stack pointer and thus clobberable.  */
+  gcc_assert (seh->sp_offset >= cfa_offset);
+  offset = seh->sp_offset - cfa_offset;
+
+  fputs ((SSE_REGNO_P (regno) ? "\t.seh_savexmm\t"
+	 : GENERAL_REGNO_P (regno) ?  "\t.seh_savereg\t"
+	 : (gcc_unreachable (), "")), f);
+  print_reg (reg, 0, f);
+  fprintf (f, ", " HOST_WIDE_INT_PRINT_DEC "\n", offset);
+}
+
+/* Emit an assembler directive to adjust RSP by OFFSET.  */
+
+static void
+seh_emit_stackalloc (FILE *f, struct seh_frame_state *seh,
+		     HOST_WIDE_INT offset)
+{
+  /* We're only concerned with prologue stack allocations, which all
+     are subtractions from the stack pointer.  */
+  gcc_assert (offset < 0);
+  offset = -offset;
+
+  if (seh->cfa_reg == stack_pointer_rtx)
+    seh->cfa_offset += offset;
+  seh->sp_offset += offset;
+
+  fprintf (f, "\t.seh_stackalloc\t" HOST_WIDE_INT_PRINT_DEC "\n", offset);
+}
+
+/* Process REG_CFA_ADJUST_CFA for SEH.  */
+
+static void
+seh_cfa_adjust_cfa (FILE *f, struct seh_frame_state *seh, rtx pat)
+{
+  rtx dest, src;
+  HOST_WIDE_INT reg_offset = 0;
+  unsigned int dest_regno;
+
+  dest = SET_DEST (pat);
+  src = SET_SRC (pat);
+
+  if (GET_CODE (src) == PLUS)
+    {
+      reg_offset = INTVAL (XEXP (src, 1));
+      src = XEXP (src, 0);
+    }
+  else if (GET_CODE (src) == MINUS)
+    {
+      reg_offset = -INTVAL (XEXP (src, 1));
+      src = XEXP (src, 0);
+    }
+  gcc_assert (src == stack_pointer_rtx);
+  gcc_assert (seh->cfa_reg == stack_pointer_rtx);
+  dest_regno = REGNO (dest);
+
+  if (dest_regno == STACK_POINTER_REGNUM)
+    seh_emit_stackalloc (f, seh, reg_offset);
+  else if (dest_regno == HARD_FRAME_POINTER_REGNUM)
+    {
+      seh->cfa_reg = dest;
+      seh->cfa_offset -= reg_offset;
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Process REG_CFA_OFFSET for SEH.  */
+
+static void
+seh_cfa_offset (FILE *f, struct seh_frame_state *seh, rtx pat)
+{
+  rtx dest, src;
+  HOST_WIDE_INT reg_offset;
+
+  dest = SET_DEST (pat);
+  src = SET_SRC (pat);
+
+  gcc_assert (MEM_P (dest));
+  dest = XEXP (dest, 0);
+  if (REG_P (dest))
+    reg_offset = 0;
+  else
+    {
+      gcc_assert (GET_CODE (dest) == PLUS);
+      reg_offset = INTVAL (XEXP (dest, 1));
+      dest = XEXP (dest, 0);
+    }
+  gcc_assert (dest == seh->cfa_reg);
+
+  seh_emit_save (f, seh, src, seh->cfa_offset - reg_offset);
+}
+
+/* Process a FRAME_RELATED_EXPR for SEH.  */
+
+static void
+seh_frame_related_expr (FILE *f, struct seh_frame_state *seh, rtx pat)
+{
+  rtx dest, src;
+  HOST_WIDE_INT addend;
+
+  /* See the full loop in dwarf2out_frame_debug_expr.  */
+  if (GET_CODE (pat) == PARALLEL || GET_CODE (pat) == SEQUENCE)
+    {
+      int i, n = XVECLEN (pat, 0), pass, npass;
+
+      npass = (GET_CODE (pat) == PARALLEL ? 2 : 1);
+      for (pass = 0; pass < npass; ++pass)
+	for (i = 0; i < n; ++i)
+	  {
+	    rtx ele = XVECEXP (pat, 0, i);
+
+	    if (GET_CODE (ele) != SET)
+	      continue;
+	    dest = SET_DEST (ele);
+
+	    /* Process each member of the PARALLEL independently.  The first
+	       member is always processed; others only if they are marked.  */
+	    if (i == 0 || RTX_FRAME_RELATED_P (ele))
+	      {
+		/* Evaluate all register saves in the first pass and all
+		   register updates in the second pass.  */
+		if ((MEM_P (dest) ^ pass) || npass == 1)
+		  seh_frame_related_expr (f, seh, ele);
+	      }
+	  }
+      return;
+    }
+
+  dest = SET_DEST (pat);
+  src = SET_SRC (pat);
+
+  switch (GET_CODE (dest))
+    {
+    case REG:
+      switch (GET_CODE (src))
+	{
+	case REG:
+	  /* REG = REG: This should be establishing a frame pointer.  */
+	  gcc_assert (src == stack_pointer_rtx);
+	  gcc_assert (dest == hard_frame_pointer_rtx);
+	  seh_cfa_adjust_cfa (f, seh, pat);
+	  break;
+
+	case PLUS:
+	  addend = INTVAL (XEXP (src, 1));
+	  src = XEXP (src, 0);
+	  if (dest == hard_frame_pointer_rtx)
+	    seh_cfa_adjust_cfa (f, seh, pat);
+	  else if (dest == stack_pointer_rtx)
+	    {
+	      gcc_assert (src == stack_pointer_rtx);
+	      seh_emit_stackalloc (f, seh, addend);
+	    }
+	  else
+	    gcc_unreachable ();
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case MEM:
+      /* A save of some kind.  */
+      dest = XEXP (dest, 0);
+      if (GET_CODE (dest) == PRE_DEC)
+	{
+	  gcc_checking_assert (GET_MODE (src) == Pmode);
+	  gcc_checking_assert (REG_P (src));
+	  seh_emit_push (f, seh, src);
+	}
+      else
+	seh_cfa_offset (f, seh, pat);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* This function looks at a single insn and emits any SEH directives
+   required for unwind of this insn.  */
+
+void
+i386_pe_seh_unwind_emit (FILE *asm_out_file, rtx insn)
+{
+  rtx note, pat;
+  bool handled_one = false;
+  struct seh_frame_state *seh;
+
+  if (!TARGET_SEH)
+    return;
+
+  /* We free the SEH data once done with the prologue.  Ignore those
+     RTX_FRAME_RELATED_P insns that are associated with the epilogue.  */
+  seh = cfun->machine->seh;
+  if (seh == NULL)
+    return;
+
+  if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
+    return;
+
+  for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
+    {
+      pat = XEXP (note, 0);
+      switch (REG_NOTE_KIND (note))
+	{
+	case REG_FRAME_RELATED_EXPR:
+	  goto found;
+
+	case REG_CFA_DEF_CFA:
+	case REG_CFA_EXPRESSION:
+	  /* Only emitted with DRAP, which we disable.  */
+	  gcc_unreachable ();
+	  break;
+
+	case REG_CFA_REGISTER:
+	  /* Only emitted in epilogues, which we skip.  */
+	  gcc_unreachable ();
+
+	case REG_CFA_ADJUST_CFA:
+	  if (pat == NULL)
+	    {
+	      pat = PATTERN (insn);
+	      if (GET_CODE (pat) == PARALLEL)
+		pat = XVECEXP (pat, 0, 0);
+	    }
+	  seh_cfa_adjust_cfa (asm_out_file, seh, pat);
+	  handled_one = true;
+	  break;
+
+	case REG_CFA_OFFSET:
+	  if (pat == NULL)
+	    pat = single_set (insn);
+	  seh_cfa_offset (asm_out_file, seh, pat);
+	  handled_one = true;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+  if (handled_one)
+    return;
+  pat = PATTERN (insn);
+ found:
+  seh_frame_related_expr (asm_out_file, seh, pat);
+}
+
+void
+i386_pe_start_function (FILE *f, const char *name, tree decl)
+{
+  i386_pe_maybe_record_exported_symbol (decl, name, 0);
+  if (write_symbols != SDB_DEBUG)
+    i386_pe_declare_function_type (f, name, TREE_PUBLIC (decl));
+  /* In case section was altered by debugging output.  */
+  if (decl != NULL_TREE)
+    switch_to_section (function_section (decl));
+  ASM_OUTPUT_FUNCTION_LABEL (f, name, decl);
+}
+
+void
+i386_pe_end_function (FILE *f, const char *name ATTRIBUTE_UNUSED,
+		      tree decl ATTRIBUTE_UNUSED)
+{
+  i386_pe_seh_fini (f);
+}
+
+
+#include "gt-winnt.h"
diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h
new file mode 100644
index 000000000..2c4bdc99a
--- /dev/null
+++ b/gcc/config/i386/wmmintrin.h
@@ -0,0 +1,120 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 10.1.  */
+
+#ifndef _WMMINTRIN_H_INCLUDED
+#define _WMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE2 header file.  */
+#include <emmintrin.h>
+
+#if !defined (__AES__) && !defined (__PCLMUL__)
+# error "AES/PCLMUL instructions not enabled"
+#else
+
+/* AES */
+
+#ifdef __AES__
+/* Performs 1 round of AES decryption of the first m128i using 
+   the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdec_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES decryption of the first m128i 
+   using the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
+						 (__v2di)__Y);
+}
+
+/* Performs 1 round of AES encryption of the first m128i using 
+   the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenc_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES encryption of the first m128i
+   using the second m128i as a round key.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the InverseMixColumn operation on the source m128i 
+   and stores the result into m128i destination.  */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesimc_si128 (__m128i __X)
+{
+  return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
+}
+
+/* Generates a m128i round key for the input m128i AES cipher key and
+   byte round constant.  The second parameter must be a compile time
+   constant.  */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
+{
+  return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
+}
+#else
+#define _mm_aeskeygenassist_si128(X, C)					\
+  ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X),	\
+						(int)(C)))
+#endif
+#endif  /* __AES__ */
+
+/* PCLMUL */
+
+#ifdef __PCLMUL__
+/* Performs carry-less integer multiplication of 64-bit halves of
+   128-bit input operands.  The third parameter inducates which 64-bit
+   haves of the input parameters v1 and v2 should be used. It must be
+   a compile time constant.  */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
+{
+  return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
+						(__v2di)__Y, __I);
+}
+#else
+#define _mm_clmulepi64_si128(X, Y, I)					\
+  ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X),		\
+					  (__v2di)(__m128i)(Y), (int)(I)))
+#endif
+#endif  /* __PCLMUL__  */
+
+#endif /* __AES__/__PCLMUL__ */
+
+#endif /* _WMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/x-cygwin b/gcc/config/i386/x-cygwin
new file mode 100644
index 000000000..752af76ef
--- /dev/null
+++ b/gcc/config/i386/x-cygwin
@@ -0,0 +1,4 @@
+host-cygwin.o : $(srcdir)/config/i386/host-cygwin.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) toplev.h diagnostic.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/host-cygwin.c
diff --git a/gcc/config/i386/x-darwin b/gcc/config/i386/x-darwin
new file mode 100644
index 000000000..f0196bac4
--- /dev/null
+++ b/gcc/config/i386/x-darwin
@@ -0,0 +1,4 @@
+host-i386-darwin.o : $(srcdir)/config/i386/host-i386-darwin.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) \
+  config/host-darwin.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/i386/x-i386 b/gcc/config/i386/x-i386
new file mode 100644
index 000000000..2bf8fed5d
--- /dev/null
+++ b/gcc/config/i386/x-i386
@@ -0,0 +1,4 @@
+driver-i386.o : $(srcdir)/config/i386/driver-i386.c \
+  $(srcdir)/config/i386/cpuid.h \
+  $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/i386/x-mingw32 b/gcc/config/i386/x-mingw32
new file mode 100644
index 000000000..2a1ca47c7
--- /dev/null
+++ b/gcc/config/i386/x-mingw32
@@ -0,0 +1,31 @@
+# Copyright (C) 2003, 2004, 2008, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+#
+#
+# Make local_includedir relative to EXEC_PREFIX 
+#
+local_includedir=$(libsubdir)/$(unlibsubdir)/..`echo $(exec_prefix) | sed -e 's|^$(prefix)||' -e 's|/[^/]*|/..|g'`/include
+
+# On MinGW, we use "%IA64d" to print 64-bit integers, and the format-checking
+# code does not handle that, so we have to disable checking here.
+WERROR_FLAGS += -Wno-format
+
+host-mingw32.o : $(srcdir)/config/i386/host-mingw32.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h toplev.h $(DIAGNOSTIC_H) $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/i386/host-mingw32.c
diff --git a/gcc/config/i386/x86-64.h b/gcc/config/i386/x86-64.h
new file mode 100644
index 000000000..b85dab9cd
--- /dev/null
+++ b/gcc/config/i386/x86-64.h
@@ -0,0 +1,106 @@
+/* OS independent definitions for AMD x86-64.
+   Copyright (C) 2001, 2005, 2007, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+  (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* Output assembler code to FILE to call the profiler.  */
+#define NO_PROFILE_COUNTERS 1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "mcount"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m32:--32} %{m64:--64}"
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  x86_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN);
+
+/* This is used to align code labels according to Intel recommendations.  */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  do {									\
+    if ((LOG) != 0) {							\
+      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
+      else {								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+	/* Make sure that we have at least 8 byte alignment if > 8 byte \
+	   alignment is preferred.  */					\
+	if ((LOG) > 3							\
+	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
+	    && (MAX_SKIP) >= 7)						\
+	  fputs ("\t.p2align 3\n", (FILE));				\
+      }									\
+    }									\
+  } while (0)
+#undef  ASM_OUTPUT_MAX_SKIP_PAD
+#define ASM_OUTPUT_MAX_SKIP_PAD(FILE, LOG, MAX_SKIP)			\
+  if ((LOG) != 0)							\
+    {									\
+      if ((MAX_SKIP) == 0)						\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
+        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
+    }
+#endif
+
+
+/* i386 System V Release 4 uses DWARF debugging info.
+   x86-64 ABI specifies DWARF2.  */
+
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_UNWIND_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  x86_64_elf_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  x86_64_elf_unique_section
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
new file mode 100644
index 000000000..36b43df87
--- /dev/null
+++ b/gcc/config/i386/x86intrin.h
@@ -0,0 +1,96 @@
+/* Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+#define _X86INTRIN_H_INCLUDED
+
+#include <ia32intrin.h>
+
+#ifdef __MMX__
+#include <mmintrin.h>
+#endif
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#endif
+
+#ifdef __SSSE3__
+#include <tmmintrin.h>
+#endif
+
+#ifdef __SSE4A__
+#include <ammintrin.h>
+#endif
+
+#if defined (__SSE4_2__) || defined (__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#if defined (__AES__) || defined (__PCLMUL__)
+#include <wmmintrin.h>
+#endif
+
+/* For including AVX instructions */
+#include <immintrin.h>
+
+#ifdef __3dNOW__
+#include <mm3dnow.h>
+#endif
+
+#ifdef __FMA4__
+#include <fma4intrin.h>
+#endif
+
+#ifdef __XOP__
+#include <xopintrin.h>
+#endif
+
+#ifdef __LWP__
+#include <lwpintrin.h>
+#endif
+
+#ifdef __ABM__
+#include <abmintrin.h>
+#endif
+
+#ifdef __BMI__
+#include <bmiintrin.h>
+#endif
+
+#ifdef __TBM__
+#include <tbmintrin.h>
+#endif
+
+#ifdef __POPCNT__
+#include <popcntintrin.h>
+#endif
+
+#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xm-cygwin.h b/gcc/config/i386/xm-cygwin.h
new file mode 100644
index 000000000..bd2238729
--- /dev/null
+++ b/gcc/config/i386/xm-cygwin.h
@@ -0,0 +1,22 @@
+/* Configuration for GCC for hosting on Windows NT.
+   using a unix style C library.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
diff --git a/gcc/config/i386/xm-djgpp.h b/gcc/config/i386/xm-djgpp.h
new file mode 100644
index 000000000..c3758ea9e
--- /dev/null
+++ b/gcc/config/i386/xm-djgpp.h
@@ -0,0 +1,84 @@
+/* Configuration for GCC for Intel 80386 running DJGPP.
+   Copyright (C) 1988, 1996, 1998, 1999, 2000, 2001, 2004, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use semicolons to separate elements of a path.  */
+#define PATH_SEPARATOR ';'
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+
+/* System dependent initialization for collect2
+   to tell system() to act like Unix.  */
+#define COLLECT2_HOST_INITIALIZATION \
+  do { __system_flags |= (__system_allow_multiple_cmds			\
+		          | __system_emulate_chdir); } while (0)
+
+/* Define a version appropriate for DOS.  */
+#undef XREF_FILE_NAME
+#define XREF_FILE_NAME(xref_file, file) \
+  do { \
+    const char xref_ext[] = ".gxref"; \
+    strcpy (xref_file, file); \
+    s = basename (xref_file); \
+    t = strchr (s, '.'); \
+    if (t) \
+      strcpy (t, xref_ext); \
+    else \
+      strcat (xref_file, xref_ext); \
+  } while (0)
+
+#undef GCC_DRIVER_HOST_INITIALIZATION
+#define GCC_DRIVER_HOST_INITIALIZATION \
+  do { \
+    /* If the environment variable DJDIR is not defined, then DJGPP is not \
+       installed correctly and GCC will quickly become confused with the \
+       default prefix settings. Report the problem now so the user doesn't \
+       receive deceptive "file not found" error messages later.  */ \
+    char *djdir = getenv ("DJDIR"); \
+    if (djdir == NULL) \
+      { \
+        /* DJDIR is automatically defined by the DJGPP environment config \
+           file pointed to by the environment variable DJGPP. Examine DJGPP \
+           to try and figure out what's wrong.  */ \
+        char *djgpp = getenv ("DJGPP"); \
+        if (djgpp == NULL) \
+          fatal ("environment variable DJGPP not defined"); \
+        else if (access (djgpp, R_OK) == 0) \
+          fatal ("environment variable DJGPP points to missing file '%s'", \
+                 djgpp); \
+        else \
+          fatal ("environment variable DJGPP points to corrupt file '%s'", \
+                  djgpp); \
+      } \
+  } while (0)
+
+/* Canonicalize paths containing '/dev/env/'; used in prefix.c.
+   _fixpath is a djgpp-specific function to canonicalize a path.
+   "/dev/env/DJDIR" evaluates to "c:/djgpp" if DJDIR is "c:/djgpp" for
+   example.  It removes any trailing '/', so add it back.  */
+/* We cannot free PATH below as it can point to string constant  */
+#define UPDATE_PATH_HOST_CANONICALIZE(PATH) \
+  if (memcmp ((PATH), "/dev/env/", sizeof("/dev/env/") - 1) == 0) \
+    {						\
+      static char fixed_path[FILENAME_MAX + 1];	\
+						\
+      _fixpath ((PATH), fixed_path);		\
+      strcat (fixed_path, "/");			\
+      (PATH) = xstrdup (fixed_path);		\
+    } 
diff --git a/gcc/config/i386/xm-mingw32.h b/gcc/config/i386/xm-mingw32.h
new file mode 100644
index 000000000..e0dd3f372
--- /dev/null
+++ b/gcc/config/i386/xm-mingw32.h
@@ -0,0 +1,35 @@
+/* Configuration for GCC for hosting on Windows32.
+   using GNU tools and the Windows32 API Library.
+   Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+
+#undef PATH_SEPARATOR
+#define PATH_SEPARATOR ';'
+
+/* This is the name of the null device on windows.  */
+#define HOST_BIT_BUCKET "nul"
+
+/*  The st_ino field of struct stat is always 0.  */
+#define HOST_LACKS_INODE_NUMBERS
+
+/* MSVCRT does not support the "ll" format specifier for printing
+   "long long" values.  Instead, we use "I64".  */
+#define HOST_LONG_LONG_FORMAT "I64"
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
new file mode 100644
index 000000000..5aefa9db0
--- /dev/null
+++ b/gcc/config/i386/xmmintrin.h
@@ -0,0 +1,1251 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+#ifndef __SSE__
+# error "SSE instruction set not enabled"
+#else
+
+/* We need type definitions from the MMX header file.  */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free ().  */
+#include <mm_malloc.h>
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Internal data types for implementing the intrinsics.  */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create a selector for use with the SHUFPS instruction.  */
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
+/* Constants for use with _mm_prefetch.  */
+enum _mm_hint
+{
+  _MM_HINT_T0 = 3,
+  _MM_HINT_T1 = 2,
+  _MM_HINT_T2 = 1,
+  _MM_HINT_NTA = 0
+};
+
+/* Bits in the MXCSR.  */
+#define _MM_EXCEPT_MASK       0x003f
+#define _MM_EXCEPT_INVALID    0x0001
+#define _MM_EXCEPT_DENORM     0x0002
+#define _MM_EXCEPT_DIV_ZERO   0x0004
+#define _MM_EXCEPT_OVERFLOW   0x0008
+#define _MM_EXCEPT_UNDERFLOW  0x0010
+#define _MM_EXCEPT_INEXACT    0x0020
+
+#define _MM_MASK_MASK         0x1f80
+#define _MM_MASK_INVALID      0x0080
+#define _MM_MASK_DENORM       0x0100
+#define _MM_MASK_DIV_ZERO     0x0200
+#define _MM_MASK_OVERFLOW     0x0400
+#define _MM_MASK_UNDERFLOW    0x0800
+#define _MM_MASK_INEXACT      0x1000
+
+#define _MM_ROUND_MASK        0x6000
+#define _MM_ROUND_NEAREST     0x0000
+#define _MM_ROUND_DOWN        0x2000
+#define _MM_ROUND_UP          0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+
+#define _MM_FLUSH_ZERO_MASK   0x8000
+#define _MM_FLUSH_ZERO_ON     0x8000
+#define _MM_FLUSH_ZERO_OFF    0x0000
+
+/* Create a vector of zeros.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+   floating-point) values of A and B; the upper three SPFP values are
+   passed through from A.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform the respective operation on the four SPFP values in A and B.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform logical bit-wise operations on 128-bit values.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_andps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_andnps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_orps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_xorps (__A, __B);
+}
+
+/* Perform a comparison on the lower SPFP values of A and B.  If the
+   comparison is true, place a mask of all ones in the result, otherwise a
+   mask of zeros.  The upper three SPFP values are passed through from A.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpltss ((__v4sf) __B,
+								(__v4sf)
+								__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpless ((__v4sf) __B,
+								(__v4sf)
+								__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpnltss ((__v4sf) __B,
+								 (__v4sf)
+								 __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+					(__v4sf)
+					__builtin_ia32_cmpnless ((__v4sf) __B,
+								 (__v4sf)
+								 __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform a comparison on the four SPFP values of A and B.  For each
+   element, if the comparison is true, place a mask of all ones in the
+   result, otherwise a mask of zeros.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+   and 0 if false.  */
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+  return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+   rounding mode.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+  return _mm_cvtss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the
+   current rounding mode.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+   current rounding mode.  Return the integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+  return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+  return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+  return _mm_cvttss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Truncate the two lower SPFP values to 32-bit integers.  Return the
+   integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+  return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+  return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+  return _mm_cvtsi32_ss (__A, __B);
+}
+
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+
+/* Intel intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+   as the two lower elements in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128 __A, __m64 __B)
+{
+  return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+  return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
+  __v4hi __sign;
+  __v2si __hisi, __losi;
+  __v4sf __zero, __ra, __rb;
+
+  /* This comparison against zero gives us a mask that can be used to
+     fill in the missing sign bits in the unpack operations below, so
+     that we get signed values after unpacking.  */
+  __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
+
+  /* Convert the four words to doublewords.  */
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
+
+  /* Convert the doublewords to floating point two at a time.  */
+  __zero = (__v4sf) _mm_setzero_ps ();
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+
+  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
+  __v2si __hisi, __losi;
+  __v4sf __zero, __ra, __rb;
+
+  /* Convert the four words to doublewords.  */
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+
+  /* Convert the doublewords to floating point two at a time.  */
+  __zero = (__v4sf) _mm_setzero_ps ();
+  __ra = __builtin_ia32_cvtpi2ps (__zero, __losi);
+  __rb = __builtin_ia32_cvtpi2ps (__ra, __hisi);
+
+  return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
+  __v8qi __sign;
+
+  /* This comparison against zero gives us a mask that can be used to
+     fill in the missing sign bits in the unpack operations below, so
+     that we get signed values after unpacking.  */
+  __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
+
+  /* Convert the four low bytes to words.  */
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
+
+  return _mm_cvtpi16_ps(__A);
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu8_ps(__m64 __A)
+{
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
+  return _mm_cvtpu16_ps(__A);
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
+{
+  __v4sf __zero = (__v4sf) _mm_setzero_ps ();
+  __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
+  __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B);
+  return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16(__m128 __A)
+{
+  __v4sf __hisf = (__v4sf)__A;
+  __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
+  __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
+  __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
+  return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8(__m128 __A)
+{
+  __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
+  return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
+}
+
+/* Selects four specific SPFP values from A and B based on MASK.  */
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
+{
+  return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
+}
+#else
+#define _mm_shuffle_ps(A, B, MASK)					\
+  ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A),			\
+				   (__v4sf)(__m128)(B), (int)(MASK)))
+#endif
+
+/* Selects and interleaves the upper two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+   the lower two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+  return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the upper two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+  __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Moves the upper two values of B into the lower two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Moves the lower two values of B into the upper two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+   the upper two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+  return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the lower two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+  __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128 __A)
+{
+  return __builtin_ia32_movmskps ((__v4sf)__A);
+}
+
+/* Return the contents of the control register.  */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getcsr (void)
+{
+  return __builtin_ia32_stmxcsr ();
+}
+
+/* Read exception bits from the control register.  */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_STATE (void)
+{
+  return _mm_getcsr() & _MM_EXCEPT_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_MASK (void)
+{
+  return _mm_getcsr() & _MM_MASK_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_ROUNDING_MODE (void)
+{
+  return _mm_getcsr() & _MM_ROUND_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_FLUSH_ZERO_MODE (void)
+{
+  return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
+}
+
+/* Set the control register to I.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setcsr (unsigned int __I)
+{
+  __builtin_ia32_ldmxcsr (__I);
+}
+
+/* Set exception bits in the control register.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_STATE(unsigned int __mask)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_MASK (unsigned int __mask)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_ROUNDING_MODE (unsigned int __mode)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
+{
+  _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
+}
+
+/* Create a vector with element 0 as F and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
+}
+
+/* Create a vector with all four elements equal to F.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+  return _mm_set1_ps (__F);
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+  return _mm_set_ss (*__P);
+}
+
+/* Create a vector with all four elements equal to *P.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+  return _mm_set1_ps (*__P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+  return _mm_load1_ps (__P);
+}
+
+/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
+  return (__m128) *(__v4sf *)__P;
+}
+
+/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
+  return (__m128) __builtin_ia32_loadups (__P);
+}
+
+/* Load four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
+  __v4sf __tmp = *(__v4sf *)__P;
+  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
+}
+
+/* Create the vector [Z Y X W].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Stores the lower SPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
+  *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
+  return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+/* Store four SPFP values.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
+  *(__v4sf *)__P = (__v4sf)__A;
+}
+
+/* Store four SPFP values.  The address need not be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+  __builtin_ia32_storeups (__P, (__v4sf)__A);
+}
+
+/* Store the lower SPFP value across four words.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
+  __v4sf __va = (__v4sf)__A;
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+  _mm_storeu_ps (__P, __tmp);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+  _mm_store1_ps (__P, __A);
+}
+
+/* Store four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
+  __v4sf __va = (__v4sf)__A;
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
+  _mm_store_ps (__P, __tmp);
+}
+
+/* Sets the low SPFP value of A from the low value of B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Extracts one of the four words of A.  The selector N must be immediate.  */
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+  return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+  return _mm_extract_pi16 (__A, __N);
+}
+#else
+#define _mm_extract_pi16(A, N)	\
+  ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
+#endif
+
+/* Inserts word D into one of four words of A.  The selector N must be
+   immediate.  */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+  return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+  return _mm_insert_pi16 (__A, __D, __N);
+}
+#else
+#define _mm_insert_pi16(A, D, N)				\
+  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A),	\
+					(int)(D), (int)(N)))
+
+#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
+#endif
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+  return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+  return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+  return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+  return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+  return __builtin_ia32_pmovmskb ((__v8qi)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+  return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+   in B and produce the high 16 bits of the 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+  return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+  return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+  return _mm_shuffle_pi16 (__A, __N);
+}
+#else
+#define _mm_shuffle_pi16(A, N) \
+  ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
+#endif
+
+/* Conditionally store byte elements of A into P.  The high bit of each
+   byte in the selector N determines whether the corresponding byte from
+   A is stored.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+  __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+  _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+  return _mm_sad_pu8 (__A, __B);
+}
+
+/* Loads one cache line from address P to a location "closer" to the
+   processor.  The selector I specifies the type of prefetch operation.  */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  __builtin_prefetch (__P, 0, __I);
+}
+#else
+#define _mm_prefetch(P, I) \
+  __builtin_prefetch ((P), 0, (I))
+#endif
+
+/* Stores the data in A to the address P without polluting the caches.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+  __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
+}
+
+/* Likewise.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+  __builtin_ia32_movntps (__P, (__v4sf)__A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+   any subsequent store.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+  __builtin_ia32_sfence ();
+}
+
+/* The execution of the next instruction is delayed by an implementation
+   specific amount of time.  The instruction does not modify the
+   architectural state.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+  __asm__ __volatile__ ("rep; nop" : : );
+}
+
+/* Transpose the 4x4 matrix composed of row[0-3].  */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
+do {									\
+  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
+  __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1);			\
+  __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3);			\
+  __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1);			\
+  __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3);			\
+  (row0) = __builtin_ia32_movlhps (__t0, __t1);				\
+  (row1) = __builtin_ia32_movhlps (__t1, __t0);				\
+  (row2) = __builtin_ia32_movlhps (__t2, __t3);				\
+  (row3) = __builtin_ia32_movhlps (__t3, __t2);				\
+} while (0)
+
+/* For backward source compatibility.  */
+#ifdef __SSE2__
+# include <emmintrin.h>
+#endif
+
+#endif /* __SSE__ */
+#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
new file mode 100644
index 000000000..3ebcb4b9f
--- /dev/null
+++ b/gcc/config/i386/xopintrin.h
@@ -0,0 +1,835 @@
+/* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _XOPMMINTRIN_H_INCLUDED
+#define _XOPMMINTRIN_H_INCLUDED
+
+#ifndef __XOP__
+# error "XOP instruction set not enabled"
+#else
+
+#include <fma4intrin.h>
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+  return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+  return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts
+   Rotates - Non-Immediate form */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A,  __m128i __B)
+{
+  return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Rotates - Immediate form */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+  return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+  ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+  ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+  ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+  ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* Shifts */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
+}
+
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A,  __m128i __B)
+{
+  return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation
+   pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+  return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_frcz_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
+}
+
+/* PERMIL2 */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
+{
+  return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
+					      (__v2df)__Y,
+					      (__v2di)__C,
+					      __I);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
+{
+  return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
+						 (__v4df)__Y,
+						 (__v4di)__C,
+						 __I);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
+{
+  return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
+					     (__v4sf)__Y,
+					     (__v4si)__C,
+					     __I);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
+{
+  return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
+						(__v8sf)__Y,
+						(__v8si)__C,
+						__I);
+}
+#else
+#define _mm_permute2_pd(X, Y, C, I)					\
+  ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
+					(__v2df)(__m128d)(Y),		\
+					(__v2di)(__m128d)(C),		\
+					(int)(I)))
+
+#define _mm256_permute2_pd(X, Y, C, I)					\
+  ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
+					   (__v4df)(__m256d)(Y),	\
+					   (__v4di)(__m256d)(C),	\
+					   (int)(I)))
+
+#define _mm_permute2_ps(X, Y, C, I)					\
+  ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
+				       (__v4sf)(__m128)(Y),		\
+				       (__v4si)(__m128)(C),		\
+				       (int)(I)))
+
+#define _mm256_permute2_ps(X, Y, C, I)					\
+  ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
+					  (__v8sf)(__m256)(Y),  	\
+					  (__v8si)(__m256)(C),		\
+ 					  (int)(I)))
+#endif /* __OPTIMIZE__ */
+
+#endif /* __XOP__ */
+
+#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/ia64/constraints.md b/gcc/config/ia64/constraints.md
new file mode 100644
index 000000000..da91245d6
--- /dev/null
+++ b/gcc/config/ia64/constraints.md
@@ -0,0 +1,154 @@
+;; Constraint definitions for IA-64
+;; Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+(define_register_constraint "a" "ADDL_REGS"
+  "addl register")
+
+(define_register_constraint "b" "BR_REGS"
+  "branch register")
+
+(define_register_constraint "c" "PR_REGS"
+  "predicate register")
+
+(define_register_constraint "d" "AR_M_REGS"
+  "memory pipeline application register")
+
+(define_register_constraint "e" "AR_I_REGS"
+  "integer pipeline application register")
+
+(define_register_constraint "f" "FR_REGS"
+  "floating-point register")
+
+(define_register_constraint "x" "FP_REGS"
+  "floating-point register, excluding f31 and f127, used for fldp")
+
+;; Integer constraints
+
+(define_constraint "I"
+  "14 bit signed immediate for arithmetic instructions"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x2000 < 0x4000")))
+
+(define_constraint "J"
+  "22 bit signed immediate for arith instructions with r0/r1/r2/r3 source"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x200000 < 0x400000")))
+
+(define_constraint "j"
+  "(2**32-2**13)..(2**32-1) for addp4 instructions"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival >= 0xffffe000
+		    && (unsigned HOST_WIDE_INT)ival <= 0xffffffff")))
+
+(define_constraint "K"
+  "8 bit signed immediate for logical instructions"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x80 < 0x100")))
+
+(define_constraint "L"
+  "8 bit adjusted signed immediate for compare pseudo-ops"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x7F < 0x100")))
+
+(define_constraint "M"
+  "6 bit unsigned immediate for shift counts"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival < 0x40")))
+
+(define_constraint "N"
+  "9 bit signed immediate for load/store post-increments"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)ival + 0x100 < 0x200")))
+
+(define_constraint "O"
+  "constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P"
+  "0 or -1 for dep instruction"
+  (and (match_code "const_int")
+       (match_test "ival == 0 || ival == -1")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "0.0 and 1.0 for fr0 and fr1"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode) || op == CONST1_RTX (mode)")))
+
+(define_constraint "Z"
+  "1.0 or (0.0 and !flag_signed_zeros)"
+  (and (match_code "const_double")
+       (ior (match_test "op == CONST1_RTX (mode)")
+	    (and (match_test "op == CONST0_RTX (mode)")
+		 (match_test "!flag_signed_zeros")))))
+
+(define_constraint "H"
+  "0.0"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Extra constraints
+
+;; Note that while this accepts mem, it only accepts non-volatile mem,
+;; and so cannot be "fixed" by adjusting the address.  Thus it cannot
+;; and does not use define_memory_constraint.
+(define_constraint "Q"
+  "Non-volatile memory for FP_REG loads/stores"
+  (and (match_operand 0 "memory_operand")
+       (match_test "!MEM_VOLATILE_P (op)")))
+
+(define_constraint "R"
+  "1..4 for shladd arguments"
+  (and (match_code "const_int")
+       (match_test "ival >= 1 && ival <= 4")))
+
+(define_constraint "T"
+  "Symbol ref to small-address-area"
+  (match_operand 0 "small_addr_symbolic_operand"))
+
+(define_constraint "U"
+  "vector zero constant"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "W"
+  "An integer vector, such that conversion to an integer yields a
+   value appropriate for an integer 'J' constraint."
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_CLASS (mode) == MODE_VECTOR_INT")
+       (match_test
+	"satisfies_constraint_J (simplify_subreg (DImode, op, mode, 0))")))
+
+(define_constraint "Y"
+  "A V2SF vector containing elements that satisfy 'G'"
+  (and (match_code "const_vector")
+       (match_test "mode == V2SFmode")
+       (match_test "satisfies_constraint_G (XVECEXP (op, 0, 0))")
+       (match_test "satisfies_constraint_G (XVECEXP (op, 0, 1))")))
+
+;; Memory constraints
+
+(define_memory_constraint "S"
+  "Non-post-inc memory for asms and other unsavory creatures"
+  (and (match_code "mem")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
diff --git a/gcc/config/ia64/crtbegin.asm b/gcc/config/ia64/crtbegin.asm
new file mode 100644
index 000000000..638489990
--- /dev/null
+++ b/gcc/config/ia64/crtbegin.asm
@@ -0,0 +1,254 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "auto-host.h"
+
+.section .ctors,"aw","progbits"
+	.align	8
+__CTOR_LIST__:
+	data8	-1
+
+.section .dtors,"aw","progbits"
+	.align	8
+__DTOR_LIST__:
+	data8	-1
+
+.section .jcr,"aw","progbits"
+	.align	8
+__JCR_LIST__:
+
+.section .sdata
+	.type dtor_ptr,@object
+	.size dtor_ptr,8
+dtor_ptr:
+	data8	@gprel(__DTOR_LIST__ + 8)
+
+	/* A handle for __cxa_finalize to manage c++ local destructors.  */
+	.global __dso_handle
+	.type __dso_handle,@object
+	.size __dso_handle,8
+#ifdef SHARED
+	.section .data
+__dso_handle:
+	data8	__dso_handle
+#else
+	.section .bss
+	.align 8
+__dso_handle:
+	.skip	8
+#endif
+	.hidden __dso_handle
+
+
+#ifdef HAVE_INITFINI_ARRAY
+
+.section .fini_array, "a"
+	data8 @fptr(__do_global_dtors_aux)
+
+.section .init_array, "a"
+	data8 @fptr(__do_jv_register_classes)
+	data8 @fptr(__do_global_ctors_aux)
+
+#else /* !HAVE_INITFINI_ARRAY */
+/*
+ * Fragment of the ELF _fini routine that invokes our dtor cleanup.
+ *
+ * We make the call by indirection, because in large programs the 
+ * .fini and .init sections are not in range of the destination, and
+ * we cannot allow the linker to insert a stub at the end of this
+ * fragment of the _fini function.  Further, Itanium does not implement
+ * the long branch instructions, and we do not wish every program to
+ * trap to the kernel for emulation.
+ *
+ * Note that we require __do_global_dtors_aux to preserve the GP,
+ * so that the next fragment in .fini gets the right value.
+ */
+.section .fini,"ax","progbits"
+	{ .mlx
+	  movl r2 = @pcrel(__do_global_dtors_aux - 16)
+	}
+	{ .mii
+	  mov r3 = ip
+	  ;;
+	  add r2 = r2, r3
+	  ;;
+	}
+	{ .mib
+	  nop 0
+	  mov b6 = r2
+	  br.call.sptk.many b0 = b6
+	}
+
+/* Likewise for _init.  */
+
+.section .init,"ax","progbits"
+	{ .mlx
+	  movl r2 = @pcrel(__do_jv_register_classes - 16)
+	}
+	{ .mii
+	  mov r3 = ip
+	  ;;
+	  add r2 = r2, r3
+	  ;;
+	}
+	{ .mib
+	  nop 0
+	  mov b6 = r2
+	  br.call.sptk.many b0 = b6
+	}
+#endif /* !HAVE_INITFINI_ARRAY */
+
+.section .text
+	.align	32
+	.proc	__do_global_dtors_aux
+__do_global_dtors_aux:
+	.prologue
+#ifndef SHARED
+	.save ar.pfs, r35
+	alloc loc3 = ar.pfs, 0, 4, 1, 0
+	addl loc0 = @gprel(dtor_ptr), gp
+	.save rp, loc1
+	mov loc1 = rp
+	.body
+
+	mov loc2 = gp
+	nop 0
+	br.sptk.many .entry
+#else
+	/*
+		if (__cxa_finalize)
+		  __cxa_finalize(__dso_handle)
+	*/
+	.save ar.pfs, r35
+	alloc loc3 = ar.pfs, 0, 4, 1, 0
+	addl loc0 = @gprel(dtor_ptr), gp
+	addl r16 = @ltoff(@fptr(__cxa_finalize)), gp
+	;;
+
+	ld8 r16 = [r16]
+	;;
+	addl out0 = @ltoff(__dso_handle), gp
+	cmp.ne p7, p0 = r0, r16
+	;;
+
+	ld8 out0 = [out0]
+(p7)	ld8 r18 = [r16], 8
+	.save rp, loc1
+	mov loc1 = rp
+	.body
+	;;
+
+	mov loc2 = gp
+(p7)	ld8 gp = [r16]
+(p7)	mov b6 = r18
+
+	nop 0
+	nop 0
+(p7)	br.call.sptk.many rp = b6
+	;;
+
+	nop 0
+	nop 0
+	br.sptk.many .entry
+#endif
+	/*
+		do {
+		  dtor_ptr++;
+		  (*(dtor_ptr-1)) ();
+		} while (dtor_ptr);
+	*/
+.loop:
+	st8 [loc0] = r15		// update dtor_ptr (in memory)
+	ld8 r17 = [r16], 8		// r17 <- dtor's entry-point
+	nop 0
+	;;
+
+	ld8 gp = [r16]			// gp <- dtor's gp
+	mov b6 = r17
+	br.call.sptk.many rp = b6
+
+.entry:	ld8 r15 = [loc0]		// r15 <- dtor_ptr (gp-relative)
+	;;
+	add r16 = r15, loc2		// r16 <- dtor_ptr (absolute)
+	adds r15 = 8, r15
+	;;
+
+	ld8 r16 = [r16]			// r16 <- pointer to dtor's fdesc
+	mov rp = loc1
+	mov ar.pfs = loc3
+	;;
+
+	cmp.ne p6, p0 = r0, r16
+(p6)	br.cond.sptk.few .loop
+	br.ret.sptk.many rp
+	.endp __do_global_dtors_aux
+
+	.align	32
+	.proc	__do_jv_register_classes
+__do_jv_register_classes:
+	.prologue
+	.save ar.pfs, r33
+	alloc loc1 = ar.pfs, 0, 3, 1, 0
+	movl out0 = @gprel(__JCR_LIST__)
+	;;
+
+	addl r14 = @ltoff(@fptr(_Jv_RegisterClasses)), gp
+	add out0 = out0, gp
+	.save rp, loc0
+	mov loc0 = rp
+	.body
+	;;
+
+	ld8 r14 = [r14]
+	ld8 r15 = [out0]
+	cmp.ne p6, p0 = r0, r0
+	;;
+
+	cmp.eq.or p6, p0 = r0, r14
+	cmp.eq.or p6, p0 = r0, r15
+(p6)	br.ret.sptk.many rp
+
+	ld8 r15 = [r14], 8
+	;;
+	nop 0
+	mov b6 = r15
+
+	mov loc2 = gp
+	ld8 gp = [r14]
+	br.call.sptk.many rp = b6
+	;;
+
+	mov gp = loc2
+	mov rp = loc0
+	mov ar.pfs = loc1
+
+	nop 0
+	nop 0
+	br.ret.sptk.many rp
+	.endp	__do_jv_register_classes
+
+#ifdef SHARED
+.weak __cxa_finalize
+#endif
+.weak _Jv_RegisterClasses
diff --git a/gcc/config/ia64/crtend.asm b/gcc/config/ia64/crtend.asm
new file mode 100644
index 000000000..a904af9cf
--- /dev/null
+++ b/gcc/config/ia64/crtend.asm
@@ -0,0 +1,121 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "auto-host.h"
+
+.section .ctors,"aw","progbits"
+	.align	8
+__CTOR_END__:
+	data8	0
+
+.section .dtors,"aw","progbits"
+	.align 8
+__DTOR_END__:
+	data8	0
+
+.section .jcr,"aw","progbits"
+	.align 8
+__JCR_END__:
+	data8	0
+
+#ifdef HAVE_INITFINI_ARRAY
+	.global __do_global_ctors_aux
+	.hidden	__do_global_ctors_aux
+#else /* !HAVE_INITFINI_ARRAY */
+/*
+ * Fragment of the ELF _init routine that invokes our dtor cleanup.
+ *
+ * We make the call by indirection, because in large programs the 
+ * .fini and .init sections are not in range of the destination, and
+ * we cannot allow the linker to insert a stub at the end of this
+ * fragment of the _fini function.  Further, Itanium does not implement
+ * the long branch instructions, and we do not wish every program to
+ * trap to the kernel for emulation.
+ *
+ * Note that we require __do_global_ctors_aux to preserve the GP,
+ * so that the next fragment in .fini gets the right value.
+ */
+.section .init,"ax","progbits"
+	{ .mlx
+	  movl r2 = @pcrel(__do_global_ctors_aux - 16)
+	}
+	{ .mii
+	  mov r3 = ip
+	  ;;
+	  add r2 = r2, r3
+	  ;;
+	}
+	{ .mib
+	  mov b6 = r2
+	  br.call.sptk.many b0 = b6
+	  ;;
+	}
+#endif /* !HAVE_INITFINI_ARRAY */
+
+.text
+	.align 32
+	.proc __do_global_ctors_aux
+__do_global_ctors_aux:
+	.prologue
+	/*
+		for (loc0 = __CTOR_END__-1; *p != -1; --p)
+		  (*p) ();
+	*/
+	.save ar.pfs, r34
+	alloc loc2 = ar.pfs, 0, 5, 0, 0
+	movl loc0 = @gprel(__CTOR_END__ - 8)
+	;;
+
+	add loc0 = loc0, gp
+	;;
+	ld8 loc3 = [loc0], -8
+	.save rp, loc1
+	mov loc1 = rp
+	.body
+	;;
+
+	cmp.eq p6, p0 = -1, loc3
+	mov loc4 = gp
+(p6)	br.cond.spnt.few .exit
+
+.loop:	ld8 r15 = [loc3], 8
+	;;
+	ld8 gp = [loc3]
+	mov b6 = r15
+
+	ld8 loc3 = [loc0], -8
+	nop 0
+	br.call.sptk.many rp = b6
+	;;
+
+	cmp.ne p6, p0 = -1, loc3
+	nop 0
+(p6)	br.cond.sptk.few .loop
+
+.exit:	mov gp = loc3
+	mov rp = loc1
+	mov ar.pfs = loc2
+
+	br.ret.sptk.many rp
+	.endp __do_global_ctors_aux
diff --git a/gcc/config/ia64/crtfastmath.c b/gcc/config/ia64/crtfastmath.c
new file mode 100644
index 000000000..0decd2a3f
--- /dev/null
+++ b/gcc/config/ia64/crtfastmath.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 2001, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by David Mosberger <davidm@hpl.hp.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* We could call fesetenv() here but that would create a confusing
+   dependency on libm (since that is where fesetenv() gets defined.
+   To avoid this, just do everything locally.  */
+#define FE_NONIEEE_ENV 0x0009a04d0270037f
+
+static void __attribute__((constructor))
+__ia64_set_fast_math (void)
+{
+  __asm__ __volatile__ ("mov.m ar.fpsr=%0" : : "r"(FE_NONIEEE_ENV));
+}
diff --git a/gcc/config/ia64/crti.asm b/gcc/config/ia64/crti.asm
new file mode 100644
index 000000000..a9d515097
--- /dev/null
+++ b/gcc/config/ia64/crti.asm
@@ -0,0 +1,53 @@
+# Copyright (C) 2000, 2001, 2008, 2009 Free Software Foundation, Inc.
+#   Written By Timothy Wall
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just make a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+	.section	".init"
+	.align 16
+	.global	_init
+_init:
+	.prologue 14, 33
+	.save ar.pfs, r34
+	alloc	r34 = ar.pfs, 0, 4, 0, 0
+	.vframe r35
+	mov	r35 = r12
+	.save rp, r33
+	mov	r33 = b0
+	.body
+
+	.section	".fini"
+	.align	16
+	.global	_fini
+_fini:
+	.prologue 14, 33
+	.save ar.pfs, r34
+	alloc	r34 = ar.pfs, 0, 4, 0, 0
+	.vframe r35
+	mov	r35 = r12
+	.save rp, r33
+	mov	r33 = b0
+	.body
+
+# end of crti.asm
diff --git a/gcc/config/ia64/crtn.asm b/gcc/config/ia64/crtn.asm
new file mode 100644
index 000000000..e1a18795f
--- /dev/null
+++ b/gcc/config/ia64/crtn.asm
@@ -0,0 +1,43 @@
+#   Copyright (C) 2000, 2001, 2008, 2009 Free Software Foundation, Inc.
+#   Written By Timothy Wall
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+	.section	".init"
+	;;
+	mov	ar.pfs = r34
+	mov	b0 = r33
+	.restore sp
+	mov	r12 = r35
+	br.ret.sptk.many b0
+
+	.section	".fini"
+	;;
+	mov	ar.pfs = r34
+	mov	b0 = r33
+	.restore sp
+	mov	r12 = r35
+	br.ret.sptk.many b0
+
+# end of crtn.asm
diff --git a/gcc/config/ia64/div.md b/gcc/config/ia64/div.md
new file mode 100644
index 000000000..d1142a200
--- /dev/null
+++ b/gcc/config/ia64/div.md
@@ -0,0 +1,1221 @@
+;; Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; For the internal conditional math routines:
+
+;; operand 0 is always the result
+;; operand 1 is always the predicate
+;; operand 2, 3, and sometimes 4 are the input values.
+;; operand 4 or 5 is the floating point status register to use.
+;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none)
+;;
+;; addrf3_cond   - F0 = F2 + F3
+;; subrf3_cond   - F0 = F2 - F3
+;; mulrf3_cond   - F0 = F2 * F3
+;; nmulrf3_cond  - F0 = - (F2 * F3)
+;; m1addrf4_cond - F0 = (F2 * F3) + F4
+;; m1subrf4_cond - F0 = (F2 * F3) - F4
+;; m2addrf4_cond - F0 = F2 + (F3 * F4)
+;; m2subrf4_cond - F0 = F2 - (F3 * F4)
+
+;; Basic plus/minus/mult operations
+
+(define_insn "addrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fadd%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "subrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fsub%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "mulrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (mult:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fmpy%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; neg-mult operation
+
+(define_insn "nmulrf3_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (neg:RF (mult:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 5 "const_int_operand" ""))
+   (use (match_operand:SI 6 "const_int_operand" ""))]
+  ""
+  "(%1) fnmpy%R6.s%5 %0 = %F2, %F3"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; add-mult/sub-mult operations (mult as op1)
+
+(define_insn "m1addrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (mult:RF
+              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "m1subrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (mult:RF
+              (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
+            (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; add-mult/sub-mult operations (mult as op2)
+
+(define_insn "m2addrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (plus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (mult:RF
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+(define_insn "m2subrf4_cond"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
+        (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand"  "c,c")
+                                (const_int 0))
+          (minus:RF
+            (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
+            (mult:RF
+              (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
+              (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
+          (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
+   (use (match_operand:SI 6 "const_int_operand" ""))
+   (use (match_operand:SI 7 "const_int_operand" ""))]
+  ""
+  "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2"
+  [(set_attr "itanium_class" "fmac")
+   (set_attr "predicable" "no")])
+
+;; Conversions to/from RF and SF/DF/XF
+;; These conversions should not generate any code but make it possible
+;; for all the instructions used to implement floating point division
+;; to be written for RFmode only and to not have to handle multiple
+;; modes or to have to handle a register in more than one mode.
+
+(define_mode_iterator SDX_F [SF DF XF])
+
+(define_insn "extend<mode>rf2"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "yes")])
+
+(define_split
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))]
+   "reload_completed"
+   [(set (match_dup 0) (match_dup 2))]
+{
+   if (operands[1] == CONST0_RTX (<MODE>mode))
+     operands[2] = gen_rtx_REG (RFmode, FR_REG (0));
+   else if (operands[1] == CONST1_RTX (<MODE>mode))
+     operands[2] = gen_rtx_REG (RFmode, FR_REG (1));
+   else
+     operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1]));
+})
+
+
+(define_insn "truncrf<mode>2"
+  [(set (match_operand:SDX_F 0 "fr_register_operand" "=f")
+        (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "yes")])
+
+(define_split
+  [(set (match_operand:SDX_F 0 "fr_register_operand" "")
+        (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))]
+   "reload_completed"
+   [(set (match_dup 0) (match_dup 2))]
+{
+   if (operands[1] == CONST0_RTX (RFmode))
+     operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0));
+   else if (operands[1] == CONST1_RTX (RFmode))
+     operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1));
+   else
+     operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+})
+
+;; Float to integer truncations using an alternative status register. 
+
+(define_insn "fix_truncrfdi2_alts"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+        (fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "fcvt.fx.trunc.s%2 %0 = %1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncrfdi2_alts"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+        (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
+   (use (match_operand:SI 2 "const_int_operand" ""))]
+  ""
+  "fcvt.fxu.trunc.s%2 %0 = %1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "setf_exp_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (unspec:RF [(match_operand:DI 1 "register_operand" "r")]
+                  UNSPEC_SETF_EXP))]
+  ""
+  "setf.exp %0 = %1"
+  [(set_attr "itanium_class" "frfr")])
+
+;; Reciprocal approximation
+
+(define_insn "recip_approx_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+        (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_FR_RECIP_APPROX_RES))
+   (set (match_operand:CCI 3 "register_operand" "=c")
+        (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX))
+   (use (match_operand:SI 4 "const_int_operand" ""))]
+  ""
+  "frcpa.s%4 %0, %3 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "no")])
+
+;; Single precision floating point division
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+	(div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx insn;
+  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
+    insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]);
+  else
+    insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]);
+  emit_insn (insn);
+  DONE;
+})
+
+;; Single precision floating point division (maximum throughput algorithm).
+
+(define_expand "divsf3_internal_thr"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx y     = gen_reg_rtx (RFmode);
+  rtx a     = gen_reg_rtx (RFmode);
+  rtx b     = gen_reg_rtx (RFmode);
+  rtx e     = gen_reg_rtx (RFmode);
+  rtx y1    = gen_reg_rtx (RFmode);
+  rtx y2    = gen_reg_rtx (RFmode);
+  rtx q     = gen_reg_rtx (RFmode);
+  rtx r     = gen_reg_rtx (RFmode);
+  rtx q_res = gen_reg_rtx (RFmode);
+  rtx cond  = gen_reg_rtx (CCImode);
+  rtx zero    = CONST0_RTX (RFmode);
+  rtx one     = CONST1_RTX (RFmode);
+  rtx status0 = CONST0_RTX (SImode);
+  rtx status1 = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off    = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode.  */
+  emit_insn (gen_extendsfrf2 (a, operands[1]));
+  emit_insn (gen_extendsfrf2 (b, operands[2]));
+  /* y = 1 / b				*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e = 1 - (b * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* y2 = y + (y1 * e)			*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off));
+  /* q = single(a * y2)			*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl));
+  /* r = a - (q * b)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off));
+  /* Q = single (q + (r * y2))		*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.	*/
+  emit_insn (gen_truncrfsf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Single precision floating point division (minimum latency algorithm).
+
+(define_expand "divsf3_internal_lat"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode.  */
+  emit_insn (gen_extendsfrf2 (a, operands[1]));
+  emit_insn (gen_extendsfrf2 (b, operands[2]));
+  /* y = 1 / b				*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* q = a * y				*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e = 1 - (b * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* e1 = e + (e * e)			*/
+  emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
+  /* q1 = single(q + (q * e1))		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
+  /* y1 = y + (y * e1)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
+  /* r = a - (q1 * b)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
+  /* Q = single (q1 + (r * y1))		*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.	*/
+  emit_insn (gen_truncrfsf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Double precision floating point division
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+	(div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx insn;
+  if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
+    insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]);
+  else
+    insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]);
+  emit_insn (insn);
+  DONE;
+})
+
+;; Double precision floating point division (maximum throughput algorithm).
+
+(define_expand "divdf3_internal_thr"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res = gen_reg_rtx (RFmode);
+  rtx a     = gen_reg_rtx (RFmode);
+  rtx b     = gen_reg_rtx (RFmode);
+  rtx y     = gen_reg_rtx (RFmode);
+  rtx e     = gen_reg_rtx (RFmode);
+  rtx y1    = gen_reg_rtx (RFmode);
+  rtx e1    = gen_reg_rtx (RFmode);
+  rtx y2    = gen_reg_rtx (RFmode);
+  rtx e2    = gen_reg_rtx (RFmode);
+  rtx y3    = gen_reg_rtx (RFmode);
+  rtx q     = gen_reg_rtx (RFmode);
+  rtx r     = gen_reg_rtx (RFmode);
+  rtx cond  = gen_reg_rtx (CCImode);
+  rtx zero    = CONST0_RTX (RFmode);
+  rtx one     = CONST1_RTX (RFmode);
+  rtx status0 = CONST0_RTX (SImode);
+  rtx status1 = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extenddfrf2 (a, operands[1]));
+  emit_insn (gen_extenddfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* e2 = e1 * e1		*/
+  emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off));
+  /* y3 = y2 + (y2 * e2)	*/
+  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off));
+  /* q  = double (a * y3)	*/
+  emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl));
+  /* r  = a - (b * q)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+  /* Q  = double (q + (r * y3))	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl));
+  /* Conversion back into DFmode */
+  emit_insn (gen_truncrfdf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Double precision floating point division (minimum latency algorithm).
+
+(define_expand "divdf3_internal_lat"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx e3        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extenddfrf2 (a, operands[1]));
+  emit_insn (gen_extenddfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e2 = e + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+  /* e1 = e * e                 */
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* e3 = e + (e1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+  /* q1 = q + (q * e2)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
+  /* y1 = y + (y * e2)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+  /* q2 = double(q + (q1 * e3))	*/
+  emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
+  /* y2 = y + (y1 * e3)		*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+  /* r1  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
+  /* Q  = double (q2 + (r1 * y2))	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
+  /* Conversion back into DFmode */
+  emit_insn (gen_truncrfdf2 (operands[0], q_res));
+  DONE;
+})
+
+;; Extended precision floating point division.
+
+(define_expand "divxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "")
+        (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "")
+                (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))]
+  "TARGET_INLINE_FLOAT_DIV"
+{
+  rtx q_res     = gen_reg_rtx (RFmode);
+  rtx a         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx y3        = gen_reg_rtx (RFmode);
+  rtx e3        = gen_reg_rtx (RFmode);
+  rtx e4        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Empty conversions to put inputs into RFmode */
+  emit_insn (gen_extendxfrf2 (a, operands[1]));
+  emit_insn (gen_extendxfrf2 (b, operands[2]));
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* e2 = e + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
+  /* e1 = e * e                 */
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y1 = y + (y * e2)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
+  /* e3 = e + (e1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
+  /* y2 = y + (y1 * e3)		*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
+  /* r  = a - (b * q)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
+  /* e4  = 1 - (b * y2)		*/
+  emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
+  /* q1 = q + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
+  /* y3 = y2 + (y2 * e4)	*/
+  emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
+  /* r1  = a - (b * q1)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
+  /* Q  = q1 + (r1 * y3)	*/
+  emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
+  /* Conversion back into XFmode */
+  emit_insn (gen_truncrfxf2 (operands[0], q_res));
+  DONE;
+})
+
+
+;; Integer division operations
+
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(div:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf, op0_di;
+
+  op0_rf = gen_reg_rtx (RFmode);
+  op0_di = gen_reg_rtx (DImode);
+
+  if (! register_operand (operands[1], SImode))
+    operands[1] = force_reg (SImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 0);
+
+  if (! register_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 0);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
+			    CONST1_RTX (SImode)));
+  
+  emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+  DONE;
+})
+
+(define_expand "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mod:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, op1_di, div;
+
+  div = gen_reg_rtx (SImode);
+  emit_insn (gen_divsi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  /* This is a trick to get us to reuse the value that we're sure to
+     have already copied to the FP regs.  */
+  op1_di = gen_reg_rtx (DImode);
+  convert_move (op1_di, operands[1], 0);
+
+  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+			  gen_lowpart (SImode, op1_di)));
+  DONE;
+})
+
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf, op0_di;
+
+  op0_rf = gen_reg_rtx (RFmode);
+  op0_di = gen_reg_rtx (DImode);
+
+  if (! register_operand (operands[1], SImode))
+    operands[1] = force_reg (SImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 1);
+
+  if (! register_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 1);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
+                            CONST1_RTX (SImode)));
+  
+  emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+  DONE;
+})
+
+(define_expand "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(umod:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, op1_di, div;
+
+  div = gen_reg_rtx (SImode);
+  emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  /* This is a trick to get us to reuse the value that we're sure to
+     have already copied to the FP regs.  */
+  op1_di = gen_reg_rtx (DImode);
+  convert_move (op1_di, operands[1], 1);
+
+  emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+			  gen_lowpart (SImode, op1_di)));
+  DONE;
+})
+
+(define_expand "divsi3_internal"
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "")
+                          (match_operand:RF 2 "fr_register_operand" ""))))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx a         = operands[1];
+  rtx b         = operands[2];
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+  rtx twon34_exp = gen_reg_rtx (DImode);
+  rtx twon34    = gen_reg_rtx (RFmode);
+
+  /* Load cosntant 2**(-34) */
+  emit_move_insn (twon34_exp, GEN_INT (65501));
+  emit_insn (gen_setf_exp_rf (twon34, twon34_exp));
+
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* q1 = q + (q * e)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
+  /* e1 = (2**-34) + (e * e)		*/
+  emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off));
+  /* q2 = q1 + (e1 * q1)		*/
+  emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off));
+  DONE;
+})
+
+(define_expand "divdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(div:DI (match_operand:DI 1 "general_operand" "")
+		(match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf;
+
+  op0_rf = gen_reg_rtx (RFmode);
+
+  if (! register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 0);
+
+  if (! register_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 0);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
+                            CONST1_RTX (DImode)));
+
+  if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
+    emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
+  else
+    emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
+  DONE;
+})
+
+(define_expand "moddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mod:SI (match_operand:DI 1 "general_operand" "")
+		(match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, div;
+
+  div = gen_reg_rtx (DImode);
+  emit_insn (gen_divdi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+  DONE;
+})
+
+(define_expand "udivdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(udiv:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op1_rf, op2_rf, op0_rf;
+
+  op0_rf = gen_reg_rtx (RFmode);
+
+  if (! register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+  op1_rf = gen_reg_rtx (RFmode);
+  expand_float (op1_rf, operands[1], 1);
+
+  if (! register_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  op2_rf = gen_reg_rtx (RFmode);
+  expand_float (op2_rf, operands[2], 1);
+
+  emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
+                            CONST1_RTX (DImode)));
+
+  if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
+    emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
+  else
+    emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
+
+  emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
+  DONE;
+})
+
+(define_expand "umoddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(umod:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx op2_neg, div;
+
+  div = gen_reg_rtx (DImode);
+  emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
+
+  op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+  emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+  DONE;
+})
+
+(define_expand "divdi3_internal_lat"
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
+                          (match_operand:RF 2 "fr_register_operand" ""))))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx a         = operands[1];
+  rtx b         = operands[2];
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q         = gen_reg_rtx (RFmode);
+  rtx q1        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* q  = a * y                 */
+  emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
+  /* q1 = q + (q * e)		*/
+  emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* q2 = q1 + (e1 * q1)	*/
+  emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* r  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* q3 = q2 + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
+  DONE;
+})
+
+(define_expand "divdi3_internal_thr"
+  [(set (match_operand:RF 0 "fr_register_operand" "")
+        (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
+                          (match_operand:RF 2 "fr_register_operand" ""))))]
+  "TARGET_INLINE_INT_DIV"
+{
+  rtx a         = operands[1];
+  rtx b         = operands[2];
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx y2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx q2        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* y  = 1 / b			*/
+  emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
+  /* e  = 1 - (b * y)		*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
+  /* y1 = y + (y * e)		*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
+  /* e1 = e * e			*/
+  emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
+  /* y2 = y1 + (y1 * e1)	*/
+  emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
+  /* q2 = y2 * a		*/
+  emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off));
+  /* r  = a - (b * q2)		*/
+  emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
+  /* q3 = q2 + (r * y2)		*/
+  emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
+  DONE;
+})
+
+;; SQRT operations
+
+
+(define_insn "sqrt_approx_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+                (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_FR_SQRT_RECIP_APPROX_RES))
+   (set (match_operand:CCI 2 "register_operand" "=c")
+        (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+  "frsqrta.s%3 %0, %2 = %F1"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "no")])
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
+	(sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx insn;
+  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
+    insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]);
+  else
+    insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_expand "sqrtsf2_internal_thr"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx s         = gen_reg_rtx (RFmode);
+  rtx f         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx c2        = ia64_dconst_0_375();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_df_c2	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_movdf (reg_df_c2, c2));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendsfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* e = 1 - (g * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
+  /* s = 0.5 + (0.375 * e)		*/
+  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
+  /* f = y * e				*/
+  emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off));
+  /* y1 = y + (f * s)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off));
+  /* g1 = single (b * y1)		*/
+  emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl));
+  /* h = 0.5 * y1			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off));
+  /* d = b - g1 * g1			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
+  /* g2 = single(g1 + (d * h))		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfsf2 (operands[0], g2));
+  DONE;
+})
+
+(define_expand "sqrtsf2_internal_lat"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx s         = gen_reg_rtx (RFmode);
+  rtx f         = gen_reg_rtx (RFmode);
+  rtx f1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx c2        = ia64_dconst_0_375();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_df_c2	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_movdf (reg_df_c2, c2));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendsfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* e = 1 - (g * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* s = 0.5 + (0.375 * e)		*/
+  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
+  /* f = e * g				*/
+  emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off));
+  /* g1 = single (g + (f * s))		*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl));
+  /* f1 = e * h				*/
+  emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off));
+  /* d = b - g1 * g1			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
+  /* h1 = h + (f1 * s)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off));
+  /* g2 = single(g1 + (d * h1))		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfsf2 (operands[0], g2));
+  DONE;
+})
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=&f")
+	(sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx insn;
+#if 0
+  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
+    insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]);
+  else
+#endif
+  insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_expand "sqrtdf2_internal_thr"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx g3        = gen_reg_rtx (RFmode);
+  rtx g4        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx h2        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx d1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extenddfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* r = 0.5 - (g * h)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
+  /* g1 = g + (g * r)			*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off));
+  /* h1 = h + (h * r)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off));
+  /* r1 = 0.5 - (g1 * h1)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
+  /* g2 = g1 + (g1 * r1)		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off));
+  /* h2 = h1 + (h1 * r1)		*/
+  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off));
+  /* d = b - (g2 * g2)			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
+  /* g3 = g2 + (d * h2)			*/
+  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
+  /* d1 = b - (g3 * g3)			*/
+  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
+  /* g4 = g3 + (d1 * h2)		*/
+  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfdf2 (operands[0], g4));
+  DONE;
+})
+
+(define_expand "sqrtxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "")
+        (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx g3        = gen_reg_rtx (RFmode);
+  rtx g4        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx h2        = gen_reg_rtx (RFmode);
+  rtx h3        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx d1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (CCImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendxfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* e = 0.5 - (g * h)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
+  /* g1 = g + (g * e)			*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off));
+  /* h1 = h + (h * e)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off));
+  /* e1 = 0.5 - (g1 * h1)		*/
+  emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
+  /* g2 = g1 + (g1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off));
+  /* h2 = h1 + (h1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off));
+  /* d = b - (g2 * g2)			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
+  /* e2 = 0.5 - (g2 * h2)		*/
+  emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off));
+  /* g3 = g2 + (d * h2)			*/
+  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
+  /* h3 = h2 + (e2 * h2)		*/
+  emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off));
+  /* d1 = b - (g3 * g3)			*/
+  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
+  /* g4 = g3 + (d1 * h3)		*/
+  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfxf2 (operands[0], g4));
+  DONE;
+})
diff --git a/gcc/config/ia64/elf.h b/gcc/config/ia64/elf.h
new file mode 100644
index 000000000..21415eee8
--- /dev/null
+++ b/gcc/config/ia64/elf.h
@@ -0,0 +1,73 @@
+/* Definitions for embedded ia64-elf target.
+
+Copyright (C) 2000, 2001, 2002, 2003, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This macro is a C statement to print on `stderr' a string describing the
+   particular machine description choice.  */
+
+#define TARGET_VERSION fprintf (stderr, " (IA-64) ELF");
+
+/* A C string constant that tells the GCC driver program options to pass to
+   the assembler.  It can also specify how to translate options you give to GNU
+   CC into options for GCC to pass to the assembler.  */
+
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_AS) != 0
+/* GNU AS.  */
+#undef  ASM_EXTRA_SPEC
+#define ASM_EXTRA_SPEC \
+  "%{mno-gnu-as:-N so} %{!mno-gnu-as:-x}"
+#else
+/* Intel ias.  */
+#undef  ASM_SPEC
+#define ASM_SPEC \
+  "%{!mgnu-as:-N so} %{mgnu-as:-x} %{mconstant-gp:-M const_gp}\
+   %{mauto-pic:-M no_plabel}"
+#endif
+
+/* A C string constant that tells the GCC driver program options to pass to
+   the linker.  It can also specify how to translate options you give to GCC
+   into options for GCC to pass to the linker.  */
+
+/* The Intel linker does not support dynamic linking, so we need -dn.
+   The Intel linker gives annoying messages unless -N so is used.  */
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GNU_LD) != 0
+/* GNU LD.  */
+#define LINK_SPEC "%{mno-gnu-ld:-dn -N so}"
+#else
+/* Intel ild.  */
+#define LINK_SPEC "%{!mgnu-ld:-dn -N so}"
+#endif
+
+/* elfos.h does not link with crti.o/crtn.o.  We override elfos.h so
+   that we can use the standard ELF Unix method.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crti.o%s crtbegin.o%s"
+
+/* End of elf.h */
diff --git a/gcc/config/ia64/fde-glibc.c b/gcc/config/ia64/fde-glibc.c
new file mode 100644
index 000000000..12760b96e
--- /dev/null
+++ b/gcc/config/ia64/fde-glibc.c
@@ -0,0 +1,162 @@
+/* Copyright (C) 2000, 2001, 2003, 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@cygnus.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Locate the FDE entry for a given address, using glibc ld.so routines
+   to avoid register/deregister calls at DSO load/unload.  */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include "config.h"
+#include <stddef.h>
+#include <stdlib.h>
+#include <link.h>
+#include "unwind-ia64.h"
+
+#if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 2) \
+    || (__GLIBC__ == 2 && __GLIBC_MINOR__ == 2 && !defined(DT_CONFIG))
+# error You need GLIBC 2.2.4 or later on IA-64 Linux
+#endif
+
+struct unw_ia64_callback_data
+{
+  Elf64_Addr pc;
+  unsigned long *segment_base;
+  unsigned long *gp;
+  struct unw_table_entry *ret;
+};
+
+static int
+_Unwind_IteratePhdrCallback (struct dl_phdr_info *info, size_t size, void *ptr)
+{
+  struct unw_ia64_callback_data *data = (struct unw_ia64_callback_data *) ptr;
+  const Elf64_Phdr *phdr, *p_unwind, *p_dynamic;
+  long n, match;
+  Elf64_Addr load_base, seg_base;
+  struct unw_table_entry *f_base, *f;
+  size_t lo, hi;
+
+  /* Make sure struct dl_phdr_info is at least as big as we need.  */
+  if (size < offsetof (struct dl_phdr_info, dlpi_phnum)
+	     + sizeof (info->dlpi_phnum))
+    return -1;
+
+  match = 0;
+  phdr = info->dlpi_phdr;
+  load_base = info->dlpi_addr;
+  p_unwind = NULL;
+  p_dynamic = NULL;
+  seg_base = ~(Elf64_Addr) 0;
+
+  /* See if PC falls into one of the loaded segments.  Find the unwind
+     segment at the same time.  */
+  for (n = info->dlpi_phnum; --n >= 0; phdr++)
+    {
+      if (phdr->p_type == PT_LOAD)
+	{
+	  Elf64_Addr vaddr = phdr->p_vaddr + load_base;
+	  if (data->pc >= vaddr && data->pc < vaddr + phdr->p_memsz)
+	    match = 1;
+	  if (vaddr < seg_base)
+	    seg_base = vaddr;
+	}
+      else if (phdr->p_type == PT_IA_64_UNWIND)
+	p_unwind = phdr;
+      else if (phdr->p_type == PT_DYNAMIC)
+	p_dynamic = phdr;
+    }
+  if (!match || !p_unwind)
+    return 0;
+
+  /* Search for the FDE within the unwind segment.  */
+
+  f_base = (struct unw_table_entry *) (p_unwind->p_vaddr + load_base);
+  lo = 0;
+  hi = p_unwind->p_memsz / sizeof (struct unw_table_entry);
+
+  while (lo < hi)
+    {
+      size_t mid = (lo + hi) / 2;
+
+      f = f_base + mid;
+      if (data->pc < f->start_offset + seg_base)
+	hi = mid;
+      else if (data->pc >= f->end_offset + seg_base)
+	lo = mid + 1;
+      else
+        goto found;
+    }
+  /* No need to search for further libraries when we know pc is contained
+     in this library.  */
+  return 1;
+
+ found:
+  *data->segment_base = seg_base;
+  *data->gp = 0;
+  data->ret = f;
+
+  if (p_dynamic)
+    {
+      /* For dynamically linked executables and shared libraries,
+	 DT_PLTGOT is the gp value for that object.  */
+      Elf64_Dyn *dyn = (Elf64_Dyn *)(p_dynamic->p_vaddr + load_base);
+      for (; dyn->d_tag != DT_NULL ; dyn++)
+	if (dyn->d_tag == DT_PLTGOT)
+	  {
+	    /* On IA-64, _DYNAMIC is writable and GLIBC has relocated it.  */
+	    *data->gp = dyn->d_un.d_ptr;
+	    break;
+	  }
+    }
+  else
+    {
+      /* Otherwise this is a static executable with no _DYNAMIC.
+	 The gp is constant program-wide.  */
+      register unsigned long gp __asm__("gp");
+      *data->gp = gp;
+    }
+
+  return 1;
+}
+
+/* Return a pointer to the unwind table entry for the function
+   containing PC.  */
+
+struct unw_table_entry *
+_Unwind_FindTableEntry (void *pc, unsigned long *segment_base,
+                        unsigned long *gp,
+                        struct unw_table_entry *ent ATTRIBUTE_UNUSED)
+{
+  struct unw_ia64_callback_data data;
+
+  data.pc = (Elf64_Addr) pc;
+  data.segment_base = segment_base;
+  data.gp = gp;
+  data.ret = NULL;
+
+  if (dl_iterate_phdr (_Unwind_IteratePhdrCallback, &data) < 0)
+    return NULL;
+
+  return data.ret;
+}
diff --git a/gcc/config/ia64/fde-vms.c b/gcc/config/ia64/fde-vms.c
new file mode 100644
index 000000000..b310f0d11
--- /dev/null
+++ b/gcc/config/ia64/fde-vms.c
@@ -0,0 +1,157 @@
+/* Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp <rupp@gnat.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Locate the FDE entry for a given address, using VMS Starlet routines
+   to avoid register/deregister calls at DSO load/unload.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "unwind-ia64.h"
+
+#define __int64 long
+#include <vms/ossddef.h>
+#ifndef SS$_NORMAL
+#define SS$_NORMAL 1
+#endif
+
+typedef struct
+{
+  unsigned long start_offset;
+  unsigned long end_offset;
+  unsigned long info_offset;
+  unsigned long gp_value;
+}  vms_unw_table_entry;
+
+typedef unsigned long long uqword;
+
+/* ENTRY is the unwind table entry found for a PC part of call chain we're
+   unwinding through.  Return whether we should force the generic unwinder
+   to resort to "fallback" processing.  */
+   
+static int
+force_fallback_processing_for (void * pc, vms_unw_table_entry * entry)
+{
+  static int eh_debug = -1;
+
+  uqword * unw_info_block = (uqword *)entry->info_offset;
+  uqword header = *unw_info_block;
+
+  /* We need to force fallback processing in two cases:
+
+     1/ The exception dispatch frame, since only our fallback
+        processing knows how to properly unwind through it, and
+
+     2/ A bottom of stack frame, since only our fallback processing
+        will ensure we don't try to unwind further past it, which
+        would get us into unknown territory and likely cause a severe
+        crash along the way.
+
+     The two cases are indicated by non-default values for specific
+     bits in the OS Specific Data (OSSD) General Information block
+     associated with such frames.  */
+
+  ossddef * ossd;
+
+  if (eh_debug == -1)
+    {
+      char * EH_DEBUG = getenv ("EH_DEBUG");
+      eh_debug = EH_DEBUG ? atoi (EH_DEBUG) : 0;
+    }
+
+  if (eh_debug)
+    {
+      printf ("pc @ 0x%p, block @ 0x%p, header = 0x%016llx\n",
+	      pc, unw_info_block, header);
+      printf ("mode = %d, length = %ld, handler = %d\n",
+	      (int)UNW_IVMS_MODE (header), UNW_LENGTH (header),
+	      UNW_FLAG_EHANDLER (header) || UNW_FLAG_EHANDLER (header));
+    }
+
+  /* An OSSD block is there for IVMS_MODE == 3 only.  */
+  if (UNW_IVMS_MODE (header) != 3)
+    return 0;
+
+  /* The OSSD block is found past the header, unwind descriptor area
+     and condition handler pointer, if any.  */  
+  ossd = (ossddef *)
+    /* Beware: uqword pointer arithmetic below.  */
+    (unw_info_block
+     + 1
+     + UNW_LENGTH (header)
+     + (UNW_FLAG_EHANDLER (header) || UNW_FLAG_EHANDLER (header)));
+
+  /* "A General Information segment may be omitted if all of its fields
+      would have their default values.  If a General Information segment
+      is present, it must be the first in the OSSD area."  So ...  */
+  
+  if (eh_debug)
+    printf ("ossd @ 0x%p\n", ossd);
+      
+  if (eh_debug && ossd->ossd$v_type == OSSD$K_GENERAL_INFO)
+    printf ("exc_frame = %d - bot_frame = %d - base_frame = %d\n",
+	    ossd->ossd$v_exception_frame, 
+	    ossd->ossd$v_bottom_of_stack,
+	    ossd->ossd$v_base_frame);
+				
+  return
+    ossd->ossd$v_type == OSSD$K_GENERAL_INFO
+    && (ossd->ossd$v_exception_frame
+	|| ossd->ossd$v_bottom_of_stack || ossd->ossd$v_base_frame);
+}
+
+/* Return a pointer to the unwind table entry for the function
+   containing PC, 0 if we cannot find an entry or if the one we find
+   calls for fallback processing.  */
+
+struct unw_table_entry *
+_Unwind_FindTableEntry (void *pc, unsigned long *segment_base,
+                        unsigned long *gp, struct unw_table_entry *ent)
+{
+  vms_unw_table_entry vueblock;
+
+  if (SYS$GET_UNWIND_ENTRY_INFO (pc, &vueblock, 0) != SS$_NORMAL)
+    return 0;
+
+  /* If there is no unwind information, use fallback.  */
+  if (vueblock.info_offset == 0)
+    return 0;
+
+  /* If we need to force fallback processing, just pretend there is
+     no entry.  */
+  if (force_fallback_processing_for (pc, &vueblock))
+    return 0;
+
+  *segment_base = 0; /* ??? Fixme. ??? */
+  *gp = vueblock.gp_value;
+  ent->start_offset = vueblock.start_offset;
+  ent->end_offset = vueblock.end_offset;
+  ent->info_offset = vueblock.info_offset;
+
+  return ent;
+}
diff --git a/gcc/config/ia64/freebsd.h b/gcc/config/ia64/freebsd.h
new file mode 100644
index 000000000..24e413142
--- /dev/null
+++ b/gcc/config/ia64/freebsd.h
@@ -0,0 +1,55 @@
+/* Definitions for Intel IA-64 running FreeBSD using the ELF format
+   Copyright (C) 2001, 2004, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+#define LINK_SPEC "							\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{symbolic:-Bsymbolic}						\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* Earlier headers may get this wrong for FreeBSD.
+   We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/IA-64 ELF)");
+
+#define TARGET_ELF		1
+
+#define JMP_BUF_SIZE  76
diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h
new file mode 100644
index 000000000..d4d8da576
--- /dev/null
+++ b/gcc/config/ia64/hpux.h
@@ -0,0 +1,235 @@
+/* Definitions of target machine GNU compiler.  IA-64 version.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Ellcey <sje@cup.hp.com> and
+                  Reva Cuthbertson <reva@cup.hp.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This macro is a C statement to print on `stderr' a string describing the
+   particular machine description choice.  */
+
+#define TARGET_VERSION fprintf (stderr, " (IA-64) HP-UX");
+
+/* Enable HPUX ABI quirks.  */
+#undef  TARGET_HPUX
+#define TARGET_HPUX 1
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()			\
+do {							\
+	builtin_assert("system=hpux");			\
+	builtin_assert("system=posix");			\
+	builtin_assert("system=unix");			\
+	builtin_define_std("hpux");			\
+	builtin_define_std("unix");			\
+	builtin_define("__IA64__");			\
+	builtin_define("_LONGLONG");			\
+	builtin_define("_INCLUDE_LONGLONG");		\
+	builtin_define("__STDC_EXT__");			\
+	builtin_define("_UINT128_T");			\
+	if (c_dialect_cxx () || !flag_iso)		\
+	  {						\
+	    builtin_define("_HPUX_SOURCE");		\
+	    builtin_define("__STDCPP__");		\
+	    builtin_define("_INCLUDE__STDC_A1_SOURCE");	\
+	  }						\
+	if (TARGET_ILP32)				\
+	  builtin_define("_ILP32");			\
+} while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+  "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}"
+/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD.  These
+   affect only aCC's C++ library (Rogue Wave-derived) which we do not
+   use, and they violate the user's name space.  */
+
+#undef  ASM_EXTRA_SPEC
+#define ASM_EXTRA_SPEC "%{milp32:-milp32} %{mlp64:-mlp64}"
+
+#ifndef USE_GAS
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+#endif
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#endif
+
+#undef ENDFILE_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:%{static:crt0%O%s} \
+			  %{mlp64:/usr/lib/hpux64/unix98%O%s} \
+			  %{!mlp64:/usr/lib/hpux32/unix98%O%s}}"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-z +Accept TypeMismatch \
+   %{shared:-b} \
+   %{!shared: \
+     -u main \
+     %{static:-noshared}}"
+
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared: \
+     %{mt|pthread:%{fopenmp:-lrt} -lpthread} \
+     %{p:%{!mlp64:-L/usr/lib/hpux32/libp} \
+	 %{mlp64:-L/usr/lib/hpux64/libp} -lprof} \
+     %{pg:%{!mlp64:-L/usr/lib/hpux32/libp} \
+	  %{mlp64:-L/usr/lib/hpux64/libp} -lgprof} \
+     %{!symbolic:-lc}}"
+
+#define MULTILIB_DEFAULTS { "milp32" }
+
+/* A C expression whose value is zero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and
+   greater then zero if they are zero-extended and less then zero if the
+   ptr_extend instruction should be used.  */
+
+#define POINTERS_EXTEND_UNSIGNED -1
+
+#define JMP_BUF_SIZE  (8 * 76)
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_DWARF2_ASM | MASK_BIG_ENDIAN | MASK_ILP32)
+
+/* ??? Might not be needed anymore.  */
+#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) ((MODE) == TFmode)
+
+/* ASM_OUTPUT_EXTERNAL_LIBCALL defaults to just a globalize_label call,
+   but that doesn't put out the @function type information which causes
+   shared library problems.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)			\
+do {								\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function");	\
+} while (0)
+
+#undef FUNCTION_ARG_PADDING
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+	ia64_hpux_function_arg_padding ((MODE), (TYPE))
+
+#undef PAD_VARARGS_DOWN
+#define PAD_VARARGS_DOWN (!AGGREGATE_TYPE_P (type))
+
+#define REGISTER_TARGET_PRAGMAS() \
+  c_register_pragma (0, "builtin", ia64_hpux_handle_builtin_pragma)
+
+/* Tell ia64.c that we are using the HP linker and we should delay output of
+   function extern declarations so that we don't output them for functions
+   which are never used (and may not be defined).  */
+
+#undef TARGET_HPUX_LD
+#define TARGET_HPUX_LD	1
+
+/* The HPUX dynamic linker objects to weak symbols with no
+   definitions, so do not use them in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP  "\t.section\t.init_array,\t\"aw\",\"init_array\""
+
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP  "\t.section\t.fini_array,\t\"aw\",\"fini_array\""
+
+/* The init_array/fini_array technique does not permit the use of
+   initialization priorities.  */
+#define SUPPORTS_INIT_PRIORITY 0
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata,\t\"a\",\t\"progbits\""
+
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.section\t.data,\t\"aw\",\t\"progbits\""
+
+#undef SDATA_SECTION_ASM_OP
+#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\t\"asw\",\t\"progbits\""
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss,\t\"aw\",\t\"nobits\""
+
+#undef SBSS_SECTION_ASM_OP
+#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\t\"asw\",\t\"nobits\""
+
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.section\t.text,\t\"ax\",\t\"progbits\""
+
+/* It is illegal to have relocations in shared segments on HPUX.
+   Pretend flag_pic is always set.  */
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK  ia64_hpux_reloc_rw_mask
+
+/* ia64 HPUX has the float and long double forms of math functions.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS  1
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_hpux_init_libfuncs
+
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode)
+
+/* Put all *xf routines in libgcc, regardless of long double size.  */
+#undef LIBGCC2_HAS_XF_MODE
+#define LIBGCC2_HAS_XF_MODE 1
+#define XF_SIZE 64
+
+/* Put all *tf routines in libgcc, regardless of long double size.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define TF_SIZE 113
+
+/* HP-UX headers are C++-compatible.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* HP-UX uses PROFILE_HOOK instead of FUNCTION_PROFILER but we need a
+   FUNCTION_PROFILER defined because its use is not ifdefed.  When using
+   PROFILE_HOOK, the profile call comes after the prologue.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) do { } while (0)
+
+#undef PROFILE_HOOK
+#define PROFILE_HOOK(LABEL) ia64_profile_hook (LABEL)
+
+#undef  PROFILE_BEFORE_PROLOGUE
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 0
+
+/* The HP-UX linker has a bug that causes calls from functions in
+   .text.unlikely to functions in .text to cause a segfault.  Until
+   it is fixed, prevent code from being put into .text.unlikely or
+   .text.hot.  */
+
+#define TARGET_ASM_FUNCTION_SECTION ia64_hpux_function_section
+
+#define TARGET_POSIX_IO
diff --git a/gcc/config/ia64/ia64-c.c b/gcc/config/ia64/ia64-c.c
new file mode 100644
index 000000000..e56e6d4e4
--- /dev/null
+++ b/gcc/config/ia64/ia64-c.c
@@ -0,0 +1,191 @@
+/* Definitions of C specific functions for GNU compiler.
+   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Ellcey <sje@cup.hp.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "cpplib.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+
+static void ia64_hpux_add_pragma_builtin (tree func);
+
+void
+ia64_hpux_handle_builtin_pragma (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  /* #pragma builtin name, name, name */
+
+  enum cpp_ttype type;
+  tree x;
+
+  type = pragma_lex (&x);
+  while (type == CPP_NAME)
+    {
+      ia64_hpux_add_pragma_builtin (x);
+      type = pragma_lex (&x);
+      if (type == CPP_COMMA)
+	type = pragma_lex (&x);
+    }
+  if (type != CPP_EOF)
+    warning (OPT_Wpragmas, "malformed #pragma builtin");
+}
+
+/* List of standard math functions which do not set matherr by default
+   and which have a different version which does set errno and which we
+   want to call *if* we have seen an extern for the routine and we have
+   asked for strict C89 compatibility.  */
+
+typedef struct c89_mathlib_names
+{
+        const char *realname; /* User visible function name.  */
+        const char *c89name;  /* libm special name needed to set errno.  */
+} c89_mathlib_names;
+
+static const c89_mathlib_names c89_mathlib_name_list [] =
+{
+	{"acos", "_Acos_e#"},
+	{"acosd", "_Acosd_e#"},
+	{"acosdf", "_Acosdf_e#"},
+	{"acosdl", "_Acosdl_e#"},
+	{"acosdw", "_Acosdw_e#"},
+	{"acosf", "_Acosf_e#"},
+	{"acosh", "_Acosh_e#"},
+	{"acoshf", "_Acoshf_e#"},
+	{"acoshl", "_Acoshl_e#"},
+	{"acoshw", "_Acoshw_e#"},
+	{"acosl", "_Acosl_e#"},
+	{"acosw", "_Acosw_e#"},
+	{"asin", "_Asin_e#"},
+	{"asind", "_Asind_e#"},
+	{"asindf", "_Asindf_e#"},
+	{"asindl", "_Asindl_e#"},
+	{"asindw", "_Asindw_e#"},
+	{"asinf", "_Asinf_e#"},
+	{"asinl", "_Asinl_e#"},
+	{"asinw", "_Asinw_e#"},
+	{"atanh", "_Atanh_e#"},
+	{"atanhf", "_Atanhf_e#"},
+	{"atanhl", "_Atanhl_e#"},
+	{"atanhw", "_Atanhw_e#"},
+	{"cosh", "_Cosh_e#"},
+	{"coshf", "_Coshf_e#"},
+	{"coshl", "_Coshl_e#"},
+	{"coshw", "_Coshw_e#"},
+	{"exp2", "_Exp2_e#"},
+	{"exp2f", "_Exp2f_e#"},
+	{"exp2l", "_Exp2l_e#"},
+	{"exp2w", "_Exp2w_e#"},
+	{"exp", "_Exp_e#"},
+	{"expf", "_Expf_e#"},
+	{"expl", "_Expl_e#"},
+	{"expm1", "_Expm1_e#"},
+	{"expm1f", "_Expm1f_e#"},
+	{"expm1l", "_Expm1l_e#"},
+	{"expm1w", "_Expm1w_e#"},
+	{"expw", "_Expw_e#"},
+	{"fmod", "_Fmod_e#"},
+	{"fmodf", "_Fmodf_e#"},
+	{"fmodl", "_Fmodl_e#"},
+	{"fmodw", "_Fmodw_e#"},
+	{"gamma", "_Gamma_e#"},
+	{"gammaf", "_Gammaf_e#"},
+	{"gammal", "_Gammal_e#"},
+	{"gammaw", "_Gammaw_e#"},
+	{"ldexp", "_Ldexp_e#"},
+	{"ldexpf", "_Ldexpf_e#"},
+	{"ldexpl", "_Ldexpl_e#"},
+	{"ldexpw", "_Ldexpw_e#"},
+	{"lgamma", "_Lgamma_e#"},
+	{"lgammaf", "_Lgammaf_e#"},
+	{"lgammal", "_Lgammal_e#"},
+	{"lgammaw", "_Lgammaw_e#"},
+	{"log10", "_Log10_e#"},
+	{"log10f", "_Log10f_e#"},
+	{"log10l", "_Log10l_e#"},
+	{"log10w", "_Log10w_e#"},
+	{"log1p", "_Log1p_e#"},
+	{"log1pf", "_Log1pf_e#"},
+	{"log1pl", "_Log1pl_e#"},
+	{"log1pw", "_Log1pw_e#"},
+	{"log2", "_Log2_e#"},
+	{"log2f", "_Log2f_e#"},
+	{"log2l", "_Log2l_e#"},
+	{"log2w", "_Log2w_e#"},
+	{"log", "_Log_e#"},
+	{"logb", "_Logb_e#"},
+	{"logbf", "_Logbf_e#"},
+	{"logbl", "_Logbl_e#"},
+	{"logbw", "_Logbw_e#"},
+	{"logf", "_Logf_e#"},
+	{"logl", "_Logl_e#"},
+	{"logw", "_Logw_e#"},
+	{"nextafter", "_Nextafter_e#"},
+	{"nextafterf", "_Nextafterf_e#"},
+	{"nextafterl", "_Nextafterl_e#"},
+	{"nextafterw", "_Nextafterw_e#"},
+	{"pow", "_Pow_e#"},
+	{"powf", "_Powf_e#"},
+	{"powl", "_Powl_e#"},
+	{"poww", "_Poww_e#"},
+	{"remainder", "_Remainder_e#"},
+	{"remainderf", "_Remainderf_e#"},
+	{"remainderl", "_Remainderl_e#"},
+	{"remainderw", "_Remainderw_e#"},
+	{"scalb", "_Scalb_e#"},
+	{"scalbf", "_Scalbf_e#"},
+	{"scalbl", "_Scalbl_e#"},
+	{"scalbw", "_Scalbw_e#"},
+	{"sinh", "_Sinh_e#"},
+	{"sinhf", "_Sinhf_e#"},
+	{"sinhl", "_Sinhl_e#"},
+	{"sinhw", "_Sinhw_e#"},
+	{"sqrt", "_Sqrt_e#"},
+	{"sqrtf", "_Sqrtf_e#"},
+	{"sqrtl", "_Sqrtl_e#"},
+	{"sqrtw", "_Sqrtw_e#"},
+	{"tgamma", "_Tgamma_e#"},
+	{"tgammaf", "_Tgammaf_e#"},
+	{"tgammal", "_Tgammal_e#"},
+	{"tgammaw", "_Tgammaw_e#"}
+};
+
+static void
+ia64_hpux_add_pragma_builtin (tree func)
+{
+  size_t i;
+
+  if (!flag_isoc94 && flag_iso)
+    {
+	for (i = 0; i < ARRAY_SIZE (c89_mathlib_name_list); i++)
+	  {
+	    if (!strcmp(c89_mathlib_name_list[i].realname,
+			IDENTIFIER_POINTER (func)))
+	      {
+		add_to_renaming_pragma_list(func,
+			get_identifier(c89_mathlib_name_list[i].c89name));
+	      }
+	  }
+    }
+}
diff --git a/gcc/config/ia64/ia64-modes.def b/gcc/config/ia64/ia64-modes.def
new file mode 100644
index 000000000..a3d04d981
--- /dev/null
+++ b/gcc/config/ia64/ia64-modes.def
@@ -0,0 +1,86 @@
+/* Definitions of target machine GNU compiler.  IA-64 version.
+   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+   		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* IA64 requires both XF and TF modes.
+   XFmode is __float80 is IEEE extended; TFmode is __float128
+   is IEEE quad.  Both these modes occupy 16 bytes, but XFmode
+   only has 80 significant bits.  RFmode is __fpreg is IA64 internal
+   register format with 82 significant bits but otherwise handled like
+   XFmode.  */
+
+FRACTIONAL_FLOAT_MODE (XF, 80, 16, ieee_extended_intel_128_format);
+FRACTIONAL_FLOAT_MODE (RF, 82, 16, ieee_extended_intel_128_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* The above produces:
+
+   mode	  ILP32 size/align	LP64 size/align
+   XF	  16/16			16/16
+   TF	  16/16			16/16
+
+   psABI expectations:
+
+   mode   ILP32 size/align	LP64 size/align
+   XF	  12/4			-
+   TF	  -			-
+
+   HPUX expectations:
+
+   mode	  ILP32 size/align	LP64 size/align
+   XF	  -			-
+   TF	  16/8			-
+
+   We fix this up here.  */
+
+ADJUST_FLOAT_FORMAT (XF, (TARGET_ILP32 && !TARGET_HPUX)
+			 ? &ieee_extended_intel_96_format
+			 : &ieee_extended_intel_128_format);
+ADJUST_BYTESIZE  (XF, (TARGET_ILP32 && !TARGET_HPUX) ? 12 : 16);
+ADJUST_ALIGNMENT (XF, (TARGET_ILP32 && !TARGET_HPUX) ?  4 : 16);
+
+ADJUST_FLOAT_FORMAT (RF, (TARGET_ILP32 && !TARGET_HPUX)
+			 ? &ieee_extended_intel_96_format
+			 : &ieee_extended_intel_128_format);
+ADJUST_BYTESIZE  (RF, (TARGET_ILP32 && !TARGET_HPUX) ? 12 : 16);
+ADJUST_ALIGNMENT (RF, (TARGET_ILP32 && !TARGET_HPUX) ?  4 : 16);
+
+ADJUST_ALIGNMENT (TF, (TARGET_ILP32 &&  TARGET_HPUX) ?  8 : 16);
+
+/* 256-bit integer mode is needed for STACK_SAVEAREA_MODE.  */
+INT_MODE (OI, 32);
+
+/* Add any extra modes needed to represent the condition code.
+
+   CCImode is used to mark a single predicate register instead
+   of a register pair.  This is currently only used in reg_raw_mode
+   so that flow doesn't do something stupid.  */
+
+CC_MODE (CCI);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);		/* V4QI V2HI */
+VECTOR_MODES (INT, 8);		/* V8QI V4HI V2SI */
+VECTOR_MODE (INT, QI, 16);
+VECTOR_MODE (INT, HI, 8);
+VECTOR_MODE (INT, SI, 4);
+VECTOR_MODE (FLOAT, SF, 2);
+VECTOR_MODE (FLOAT, SF, 4);
+
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
new file mode 100644
index 000000000..107a7ccb9
--- /dev/null
+++ b/gcc/config/ia64/ia64-protos.h
@@ -0,0 +1,101 @@
+/* Definitions of target machine for GNU compiler for IA-64.
+   Copyright (C) 1999, 2000, 2002, 2003, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Functions defined in ia64.c */
+
+extern int bundling_p;
+#ifdef RTX_CODE
+extern int ia64_st_address_bypass_p (rtx, rtx);
+extern int ia64_ld_address_bypass_p (rtx, rtx);
+extern int ia64_produce_address_p (rtx);
+
+extern bool ia64_legitimate_constant_p (rtx);
+
+extern rtx ia64_expand_move (rtx, rtx);
+extern int ia64_move_ok (rtx, rtx);
+extern int ia64_load_pair_ok (rtx, rtx);
+extern int addp4_optimize_ok (rtx, rtx);
+extern void ia64_emit_cond_move (rtx, rtx, rtx);
+extern int ia64_depz_field_mask (rtx, rtx);
+extern void ia64_split_tmode_move (rtx[]);
+extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
+extern void ia64_expand_compare (rtx *, rtx *, rtx *);
+extern void ia64_expand_vecint_cmov (rtx[]);
+extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
+extern void ia64_expand_unpack (rtx [], bool, bool);
+extern void ia64_expand_widen_sum (rtx[], bool);
+extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
+extern void ia64_expand_call (rtx, rtx, rtx, int);
+extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
+extern void ia64_reload_gp (void);
+extern void ia64_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx);
+
+extern HOST_WIDE_INT ia64_initial_elimination_offset (int, int);
+extern void ia64_expand_prologue (void);
+extern void ia64_expand_epilogue (int);
+
+extern int ia64_direct_return (void);
+extern bool ia64_expand_load_address (rtx, rtx);
+extern int ia64_hard_regno_rename_ok (int, int);
+
+extern void ia64_print_operand_address (FILE *, rtx);
+extern void ia64_print_operand (FILE *, rtx, int);
+extern enum reg_class ia64_secondary_reload_class (enum reg_class,
+						   enum machine_mode, rtx);
+extern const char *get_bundle_name (int);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+#ifdef RTX_CODE
+extern rtx ia64_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+extern rtx ia64_va_arg (tree, tree);
+#endif /* RTX_CODE */
+
+extern void ia64_asm_output_external (FILE *, tree, const char *);
+extern void ia64_vms_output_aligned_decl_common (FILE *, tree, const char *,
+						 unsigned HOST_WIDE_INT,
+						 unsigned int);
+extern void ia64_vms_elf_asm_named_section (const char *, unsigned int, tree);
+extern void ia64_start_function (FILE *, const char *, tree);
+#endif /* TREE_CODE */
+
+extern int ia64_epilogue_uses (int);
+extern int ia64_eh_uses (int);
+extern void emit_safe_across_calls (void);
+extern void ia64_init_builtins (void);
+extern int ia64_dbx_register_number (int);
+
+extern rtx ia64_return_addr_rtx (HOST_WIDE_INT, rtx);
+extern void ia64_split_return_addr_rtx (rtx);
+
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction'.  */
+extern enum direction ia64_hpux_function_arg_padding (enum machine_mode, const_tree);
+#endif /* ARGS_SIZE_RTX */
+
+extern void ia64_hpux_handle_builtin_pragma (struct cpp_reader *);
+extern void ia64_output_function_profiler (FILE *, int);
+extern void ia64_profile_hook (int);
+
+extern void ia64_init_expanders (void);
+
+extern rtx ia64_dconst_0_5 (void);
+extern rtx ia64_dconst_0_375 (void);
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
new file mode 100644
index 000000000..ea2c549a4
--- /dev/null
+++ b/gcc/config/ia64/ia64.c
@@ -0,0 +1,11055 @@
+/* Definitions of target machine for GNU compiler.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "basic-block.h"
+#include "libfuncs.h"
+#include "diagnostic-core.h"
+#include "sched-int.h"
+#include "timevar.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "hashtab.h"
+#include "langhooks.h"
+#include "cfglayout.h"
+#include "gimple.h"
+#include "intl.h"
+#include "df.h"
+#include "debug.h"
+#include "params.h"
+#include "dbgcnt.h"
+#include "tm-constrs.h"
+#include "sel-sched.h"
+#include "reload.h"
+#include "dwarf2out.h"
+
+/* This is used for communication between ASM_OUTPUT_LABEL and
+   ASM_OUTPUT_LABELREF.  */
+int ia64_asm_output_label = 0;
+
+/* Register names for ia64_expand_prologue.  */
+static const char * const ia64_reg_numbers[96] =
+{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
+  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
+  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
+  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
+  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
+  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
+  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
+  "r104","r105","r106","r107","r108","r109","r110","r111",
+  "r112","r113","r114","r115","r116","r117","r118","r119",
+  "r120","r121","r122","r123","r124","r125","r126","r127"};
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+static const char * const ia64_input_reg_names[8] =
+{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+static const char * const ia64_local_reg_names[80] =
+{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
+  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
+  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
+  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
+  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
+  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
+  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
+  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
+  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
+  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
+
+/* ??? These strings could be shared with REGISTER_NAMES.  */
+static const char * const ia64_output_reg_names[8] =
+{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
+
+/* Which cpu are we scheduling for.  */
+enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
+
+/* Variables which are this size or smaller are put in the sdata/sbss
+   sections.  */
+
+unsigned int ia64_section_threshold;
+
+/* The following variable is used by the DFA insn scheduler.  The value is
+   TRUE if we do insn bundling instead of insn scheduling.  */
+int bundling_p = 0;
+
+enum ia64_frame_regs
+{
+   reg_fp,
+   reg_save_b0,
+   reg_save_pr,
+   reg_save_ar_pfs,
+   reg_save_ar_unat,
+   reg_save_ar_lc,
+   reg_save_gp,
+   number_of_ia64_frame_regs
+};
+
+/* Structure to be filled in by ia64_compute_frame_size with register
+   save masks and offsets for the current function.  */
+
+struct ia64_frame_info
+{
+  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
+				   the caller's scratch area.  */
+  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
+  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
+  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
+  HARD_REG_SET mask;		/* mask of saved registers.  */
+  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
+				   registers or long-term scratches.  */
+  int n_spilled;		/* number of spilled registers.  */
+  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
+  int n_input_regs;		/* number of input registers used.  */
+  int n_local_regs;		/* number of local registers used.  */
+  int n_output_regs;		/* number of output registers used.  */
+  int n_rotate_regs;		/* number of rotating registers used.  */
+
+  char need_regstk;		/* true if a .regstk directive needed.  */
+  char initialized;		/* true if the data is finalized.  */
+};
+
+/* Current frame information calculated by ia64_compute_frame_size.  */
+static struct ia64_frame_info current_frame_info;
+/* The actual registers that are emitted.  */
+static int emitted_frame_related_regs[number_of_ia64_frame_regs];
+
+static int ia64_first_cycle_multipass_dfa_lookahead (void);
+static void ia64_dependencies_evaluation_hook (rtx, rtx);
+static void ia64_init_dfa_pre_cycle_insn (void);
+static rtx ia64_dfa_pre_cycle_insn (void);
+static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
+static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
+static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
+static void ia64_h_i_d_extended (void);
+static void * ia64_alloc_sched_context (void);
+static void ia64_init_sched_context (void *, bool);
+static void ia64_set_sched_context (void *);
+static void ia64_clear_sched_context (void *);
+static void ia64_free_sched_context (void *);
+static int ia64_mode_to_int (enum machine_mode);
+static void ia64_set_sched_flags (spec_info_t);
+static ds_t ia64_get_insn_spec_ds (rtx);
+static ds_t ia64_get_insn_checked_ds (rtx);
+static bool ia64_skip_rtx_p (const_rtx);
+static int ia64_speculate_insn (rtx, ds_t, rtx *);
+static bool ia64_needs_block_p (int);
+static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
+static int ia64_spec_check_p (rtx);
+static int ia64_spec_check_src_p (rtx);
+static rtx gen_tls_get_addr (void);
+static rtx gen_thread_pointer (void);
+static int find_gr_spill (enum ia64_frame_regs, int);
+static int next_scratch_gr_reg (void);
+static void mark_reg_gr_used_mask (rtx, void *);
+static void ia64_compute_frame_size (HOST_WIDE_INT);
+static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
+static void finish_spill_pointers (void);
+static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
+static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
+static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
+static rtx gen_movdi_x (rtx, rtx, rtx);
+static rtx gen_fr_spill_x (rtx, rtx, rtx);
+static rtx gen_fr_restore_x (rtx, rtx, rtx);
+
+static void ia64_option_override (void);
+static void ia64_option_default_params (void);
+static bool ia64_can_eliminate (const int, const int);
+static enum machine_mode hfa_element_mode (const_tree, bool);
+static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					 tree, int *, int);
+static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				   tree, bool);
+static rtx ia64_function_arg_1 (const CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool, bool);
+static rtx ia64_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static rtx ia64_function_incoming_arg (CUMULATIVE_ARGS *,
+				       enum machine_mode, const_tree, bool);
+static void ia64_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static unsigned int ia64_function_arg_boundary (enum machine_mode,
+						const_tree);
+static bool ia64_function_ok_for_sibcall (tree, tree);
+static bool ia64_return_in_memory (const_tree, const_tree);
+static rtx ia64_function_value (const_tree, const_tree, bool);
+static rtx ia64_libcall_value (enum machine_mode, const_rtx);
+static bool ia64_function_value_regno_p (const unsigned int);
+static int ia64_register_move_cost (enum machine_mode, reg_class_t,
+                                    reg_class_t);
+static int ia64_memory_move_cost (enum machine_mode mode, reg_class_t,
+				  bool);
+static bool ia64_rtx_costs (rtx, int, int, int *, bool);
+static int ia64_unspec_may_trap_p (const_rtx, unsigned);
+static void fix_range (const char *);
+static bool ia64_handle_option (size_t, const char *, int);
+static struct machine_function * ia64_init_machine_status (void);
+static void emit_insn_group_barriers (FILE *);
+static void emit_all_insn_group_barriers (FILE *);
+static void final_emit_insn_group_barriers (FILE *);
+static void emit_predicate_relation_info (void);
+static void ia64_reorg (void);
+static bool ia64_in_small_data_p (const_tree);
+static void process_epilogue (FILE *, rtx, bool, bool);
+
+static bool ia64_assemble_integer (rtx, unsigned int, int);
+static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void ia64_output_function_end_prologue (FILE *);
+
+static int ia64_issue_rate (void);
+static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
+static void ia64_sched_init (FILE *, int, int);
+static void ia64_sched_init_global (FILE *, int, int);
+static void ia64_sched_finish_global (FILE *, int);
+static void ia64_sched_finish (FILE *, int);
+static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
+static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
+static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
+static int ia64_variable_issue (FILE *, int, rtx, int);
+
+static void ia64_asm_unwind_emit (FILE *, rtx);
+static void ia64_asm_emit_except_personality (rtx);
+static void ia64_asm_init_sections (void);
+
+static enum unwind_info_type ia64_debug_unwind_info (void);
+static enum unwind_info_type ia64_except_unwind_info (struct gcc_options *);
+
+static struct bundle_state *get_free_bundle_state (void);
+static void free_bundle_state (struct bundle_state *);
+static void initiate_bundle_states (void);
+static void finish_bundle_states (void);
+static unsigned bundle_state_hash (const void *);
+static int bundle_state_eq_p (const void *, const void *);
+static int insert_bundle_state (struct bundle_state *);
+static void initiate_bundle_state_table (void);
+static void finish_bundle_state_table (void);
+static int try_issue_nops (struct bundle_state *, int);
+static int try_issue_insn (struct bundle_state *, rtx);
+static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
+static int get_max_pos (state_t);
+static int get_template (state_t, int);
+
+static rtx get_next_important_insn (rtx, rtx);
+static bool important_for_bundling_p (rtx);
+static void bundling (FILE *, int, rtx, rtx);
+
+static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				  HOST_WIDE_INT, tree);
+static void ia64_file_start (void);
+static void ia64_globalize_decl_name (FILE *, tree);
+
+static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
+static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
+static section *ia64_select_rtx_section (enum machine_mode, rtx,
+					 unsigned HOST_WIDE_INT);
+static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
+     ATTRIBUTE_UNUSED;
+static unsigned int ia64_section_type_flags (tree, const char *, int);
+static void ia64_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_hpux_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_sysv4_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_vms_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static void ia64_soft_fp_init_libfuncs (void)
+     ATTRIBUTE_UNUSED;
+static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
+     ATTRIBUTE_UNUSED;
+static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
+     ATTRIBUTE_UNUSED;
+
+static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
+static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
+static void ia64_encode_section_info (tree, rtx, int);
+static rtx ia64_struct_value_rtx (tree, int);
+static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
+static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
+static bool ia64_vector_mode_supported_p (enum machine_mode mode);
+static bool ia64_cannot_force_const_mem (rtx);
+static const char *ia64_mangle_type (const_tree);
+static const char *ia64_invalid_conversion (const_tree, const_tree);
+static const char *ia64_invalid_unary_op (int, const_tree);
+static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
+static enum machine_mode ia64_c_mode_for_suffix (char);
+static enum machine_mode ia64_promote_function_mode (const_tree,
+						     enum machine_mode,
+						     int *,
+						     const_tree,
+						     int);
+static void ia64_trampoline_init (rtx, tree, rtx);
+static void ia64_override_options_after_change (void);
+
+static void ia64_dwarf_handle_frame_unspec (const char *, rtx, int);
+static tree ia64_builtin_decl (unsigned, bool);
+
+static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
+static enum machine_mode ia64_get_reg_raw_mode (int regno);
+static section * ia64_hpux_function_section (tree, enum node_frequency,
+					     bool, bool);
+
+/* Table of valid machine attributes.  */
+static const struct attribute_spec ia64_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
+  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
+#if TARGET_ABI_OPEN_VMS
+  { "common_object",   1, 1, true, false, false, ia64_vms_common_object_attribute},
+#endif
+  { "version_id",      1, 1, true, false, false,
+    ia64_handle_version_id_attribute },
+  { NULL,	       0, 0, false, false, false, NULL }
+};
+
+/* Implement overriding of the optimization options.  */
+static const struct default_options ia64_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+    SUBTARGET_OPTIMIZATION_OPTIONS,
+#endif
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ia64_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
+
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL ia64_builtin_decl
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP "\tdata1\t"
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER ia64_assemble_integer
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE ia64_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE ia64_option_optimization_table
+#undef TARGET_OPTION_DEFAULT_PARAMS
+#define TARGET_OPTION_DEFAULT_PARAMS ia64_option_default_params
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
+
+#undef TARGET_SCHED_ADJUST_COST_2
+#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT ia64_sched_init
+#undef TARGET_SCHED_FINISH
+#define TARGET_SCHED_FINISH ia64_sched_finish
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER ia64_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
+
+#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
+#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
+#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
+#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
+  ia64_first_cycle_multipass_dfa_lookahead_guard
+
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
+
+#undef TARGET_SCHED_H_I_D_EXTENDED
+#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
+
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
+
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
+
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
+
+#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
+#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
+
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
+
+#undef TARGET_SCHED_SET_SCHED_FLAGS
+#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
+
+#undef TARGET_SCHED_GET_INSN_SPEC_DS
+#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
+
+#undef TARGET_SCHED_GET_INSN_CHECKED_DS
+#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
+
+#undef TARGET_SCHED_SPECULATE_INSN
+#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
+
+#undef TARGET_SCHED_NEEDS_BLOCK_P
+#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
+
+#undef TARGET_SCHED_GEN_SPEC_CHECK
+#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
+  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
+
+#undef TARGET_SCHED_SKIP_RTX_P
+#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG ia64_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START ia64_file_start
+
+#undef TARGET_ASM_GLOBALIZE_DECL_NAME
+#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ia64_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_UNSPEC_MAY_TRAP_P
+#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
+
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
+#endif
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
+
+/* ??? Investigate.  */
+#if 0
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
+#endif
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ia64_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE ia64_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_GET_RAW_RESULT_MODE
+#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
+#undef TARGET_GET_RAW_ARG_MODE
+#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
+
+#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
+#define TARGET_DWARF_HANDLE_FRAME_UNSPEC  ia64_dwarf_handle_frame_unspec
+#undef TARGET_ASM_UNWIND_EMIT
+#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
+#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
+
+#undef TARGET_DEBUG_UNWIND_INFO
+#define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO  ia64_except_unwind_info
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
+
+/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
+   in an order different from the specified program order.  */
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING true
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION ia64_handle_option
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ia64_mangle_type
+
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE ia64_can_eliminate
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
+
+#undef TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+typedef enum
+  {
+    ADDR_AREA_NORMAL,	/* normal address area */
+    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
+  }
+ia64_addr_area;
+
+static GTY(()) tree small_ident1;
+static GTY(()) tree small_ident2;
+
+static void
+init_idents (void)
+{
+  if (small_ident1 == 0)
+    {
+      small_ident1 = get_identifier ("small");
+      small_ident2 = get_identifier ("__small__");
+    }
+}
+
+/* Retrieve the address area that has been chosen for the given decl.  */
+
+static ia64_addr_area
+ia64_get_addr_area (tree decl)
+{
+  tree model_attr;
+
+  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
+  if (model_attr)
+    {
+      tree id;
+
+      init_idents ();
+      id = TREE_VALUE (TREE_VALUE (model_attr));
+      if (id == small_ident1 || id == small_ident2)
+	return ADDR_AREA_SMALL;
+    }
+  return ADDR_AREA_NORMAL;
+}
+
+static tree
+ia64_handle_model_attribute (tree *node, tree name, tree args,
+			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
+  ia64_addr_area area;
+  tree arg, decl = *node;
+
+  init_idents ();
+  arg = TREE_VALUE (args);
+  if (arg == small_ident1 || arg == small_ident2)
+    {
+      addr_area = ADDR_AREA_SMALL;
+    }
+  else
+    {
+      warning (OPT_Wattributes, "invalid argument of %qE attribute",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  switch (TREE_CODE (decl))
+    {
+    case VAR_DECL:
+      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
+	   == FUNCTION_DECL)
+	  && !TREE_STATIC (decl))
+	{
+	  error_at (DECL_SOURCE_LOCATION (decl),
+		    "an address area attribute cannot be specified for "
+		    "local variables");
+	  *no_add_attrs = true;
+	}
+      area = ia64_get_addr_area (decl);
+      if (area != ADDR_AREA_NORMAL && addr_area != area)
+	{
+	  error ("address area of %q+D conflicts with previous "
+		 "declaration", decl);
+	  *no_add_attrs = true;
+	}
+      break;
+
+    case FUNCTION_DECL:
+      error_at (DECL_SOURCE_LOCATION (decl),
+		"address area attribute cannot be specified for "
+		"functions");
+      *no_add_attrs = true;
+      break;
+
+    default:
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* The section must have global and overlaid attributes.  */
+#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
+
+/* Part of the low level implementation of DEC Ada pragma Common_Object which
+   enables the shared use of variables stored in overlaid linker areas
+   corresponding to the use of Fortran COMMON.  */
+
+static tree
+ia64_vms_common_object_attribute (tree *node, tree name, tree args,
+				  int flags ATTRIBUTE_UNUSED,
+				  bool *no_add_attrs)
+{
+    tree decl = *node;
+    tree id, val;
+    if (! DECL_P (decl))
+      abort ();
+  
+    DECL_COMMON (decl) = 1;
+    id = TREE_VALUE (args);
+    if (TREE_CODE (id) == IDENTIFIER_NODE)
+      val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
+    else if (TREE_CODE (id) == STRING_CST)
+      val = id;
+    else
+      {
+	warning (OPT_Wattributes,
+		 "%qE attribute requires a string constant argument", name);
+	*no_add_attrs = true;
+	return NULL_TREE;
+      }
+    DECL_SECTION_NAME (decl) = val;
+    return NULL_TREE;
+}
+
+/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
+
+void
+ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
+				     unsigned HOST_WIDE_INT size,
+				     unsigned int align)
+{
+  tree attr = DECL_ATTRIBUTES (decl);
+
+  /* As common_object attribute set DECL_SECTION_NAME check it before
+     looking up the attribute.  */
+  if (DECL_SECTION_NAME (decl) && attr)
+    attr = lookup_attribute ("common_object", attr);
+  else
+    attr = NULL_TREE;
+
+  if (!attr)
+    {
+      /*  Code from elfos.h.  */
+      fprintf (file, "%s", COMMON_ASM_OP);
+      assemble_name (file, name);
+      fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
+	       size, align / BITS_PER_UNIT);
+    }
+  else
+    {
+      ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+      ASM_OUTPUT_LABEL (file, name);
+      ASM_OUTPUT_SKIP (file, size ? size : 1);
+    }
+}
+
+/* Definition of TARGET_ASM_NAMED_SECTION for VMS.  */
+
+void
+ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
+				tree decl)
+{
+  if (!(flags & SECTION_VMS_OVERLAY))
+    {
+      default_elf_asm_named_section (name, flags, decl);
+      return;
+    }
+  if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
+    abort ();
+
+  if (flags & SECTION_DECLARED)
+    {
+      fprintf (asm_out_file, "\t.section\t%s\n", name);
+      return;
+    }
+
+  fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
+}
+
+static void
+ia64_encode_addr_area (tree decl, rtx symbol)
+{
+  int flags;
+
+  flags = SYMBOL_REF_FLAGS (symbol);
+  switch (ia64_get_addr_area (decl))
+    {
+    case ADDR_AREA_NORMAL: break;
+    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
+    default: gcc_unreachable ();
+    }
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+static void
+ia64_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  /* Careful not to prod global register variables.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && GET_CODE (DECL_RTL (decl)) == MEM
+      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
+      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    ia64_encode_addr_area (decl, XEXP (rtl, 0));
+}
+
+/* Return 1 if the operands of a move are ok.  */
+
+int
+ia64_move_ok (rtx dst, rtx src)
+{
+  /* If we're under init_recog_no_volatile, we'll not be able to use
+     memory_operand.  So check the code directly and don't worry about
+     the validity of the underlying address, which should have been
+     checked elsewhere anyway.  */
+  if (GET_CODE (dst) != MEM)
+    return 1;
+  if (GET_CODE (src) == MEM)
+    return 0;
+  if (register_operand (src, VOIDmode))
+    return 1;
+
+  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
+  if (INTEGRAL_MODE_P (GET_MODE (dst)))
+    return src == const0_rtx;
+  else
+    return satisfies_constraint_G (src);
+}
+
+/* Return 1 if the operands are ok for a floating point load pair.  */
+
+int
+ia64_load_pair_ok (rtx dst, rtx src)
+{
+  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
+    return 0;
+  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
+    return 0;
+  switch (GET_CODE (XEXP (src, 0)))
+    {
+    case REG:
+    case POST_INC:
+      break;
+    case POST_DEC:
+      return 0;
+    case POST_MODIFY:
+      {
+	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
+
+	if (GET_CODE (adjust) != CONST_INT
+	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
+	  return 0;
+      }
+      break;
+    default:
+      abort ();
+    }
+  return 1;
+}
+
+int
+addp4_optimize_ok (rtx op1, rtx op2)
+{
+  return (basereg_operand (op1, GET_MODE(op1)) !=
+	  basereg_operand (op2, GET_MODE(op2)));
+}
+
+/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
+   Return the length of the field, or <= 0 on failure.  */
+
+int
+ia64_depz_field_mask (rtx rop, rtx rshift)
+{
+  unsigned HOST_WIDE_INT op = INTVAL (rop);
+  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
+
+  /* Get rid of the zero bits we're shifting in.  */
+  op >>= shift;
+
+  /* We must now have a solid block of 1's at bit 0.  */
+  return exact_log2 (op + 1);
+}
+
+/* Return the TLS model to use for ADDR.  */
+
+static enum tls_model
+tls_symbolic_operand_type (rtx addr)
+{
+  enum tls_model tls_kind = TLS_MODEL_NONE;
+
+  if (GET_CODE (addr) == CONST)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
+        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
+
+  return tls_kind;
+}
+
+/* Return true if X is a constant that is valid for some immediate
+   field in an instruction.  */
+
+bool
+ia64_legitimate_constant_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case LABEL_REF:
+      return true;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
+	  || GET_MODE (x) == DFmode)
+	return true;
+      return satisfies_constraint_G (x);
+
+    case CONST:
+    case SYMBOL_REF:
+      /* ??? Short term workaround for PR 28490.  We must make the code here
+	 match the code in ia64_expand_move and move_operand, even though they
+	 are both technically wrong.  */
+      if (tls_symbolic_operand_type (x) == 0)
+	{
+	  HOST_WIDE_INT addend = 0;
+	  rtx op = x;
+
+	  if (GET_CODE (op) == CONST
+	      && GET_CODE (XEXP (op, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
+	    {
+	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
+	      op = XEXP (XEXP (op, 0), 0);
+	    }
+
+          if (any_offset_symbol_operand (op, GET_MODE (op))
+              || function_operand (op, GET_MODE (op)))
+            return true;
+	  if (aligned_offset_symbol_operand (op, GET_MODE (op)))
+	    return (addend & 0x3fff) == 0;
+	  return false;
+	}
+      return false;
+
+    case CONST_VECTOR:
+      {
+	enum machine_mode mode = GET_MODE (x);
+
+	if (mode == V2SFmode)
+	  return satisfies_constraint_Y (x);
+
+	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+		&& GET_MODE_SIZE (mode) <= 8);
+      }
+
+    default:
+      return false;
+    }
+}
+
+/* Don't allow TLS addresses to get spilled to memory.  */
+
+static bool
+ia64_cannot_force_const_mem (rtx x)
+{
+  if (GET_MODE (x) == RFmode)
+    return true;
+  return tls_symbolic_operand_type (x) != 0;
+}
+
+/* Expand a symbolic constant load.  */
+
+bool
+ia64_expand_load_address (rtx dest, rtx src)
+{
+  gcc_assert (GET_CODE (dest) == REG);
+
+  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
+     having to pointer-extend the value afterward.  Other forms of address
+     computation below are also more natural to compute as 64-bit quantities.
+     If we've been given an SImode destination register, change it.  */
+  if (GET_MODE (dest) != Pmode)
+    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
+			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
+
+  if (TARGET_NO_PIC)
+    return false;
+  if (small_addr_symbolic_operand (src, VOIDmode))
+    return false;
+
+  if (TARGET_AUTO_PIC)
+    emit_insn (gen_load_gprel64 (dest, src));
+  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
+    emit_insn (gen_load_fptr (dest, src));
+  else if (sdata_symbolic_operand (src, VOIDmode))
+    emit_insn (gen_load_gprel (dest, src));
+  else
+    {
+      HOST_WIDE_INT addend = 0;
+      rtx tmp;
+
+      /* We did split constant offsets in ia64_expand_move, and we did try
+	 to keep them split in move_operand, but we also allowed reload to
+	 rematerialize arbitrary constants rather than spill the value to
+	 the stack and reload it.  So we have to be prepared here to split
+	 them apart again.  */
+      if (GET_CODE (src) == CONST)
+	{
+	  HOST_WIDE_INT hi, lo;
+
+	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
+	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
+	  hi = hi - lo;
+
+	  if (lo != 0)
+	    {
+	      addend = lo;
+	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
+	    }
+	}
+
+      tmp = gen_rtx_HIGH (Pmode, src);
+      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+
+      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+
+      if (addend)
+	{
+	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
+	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+	}
+    }
+
+  return true;
+}
+
+static GTY(()) rtx gen_tls_tga;
+static rtx
+gen_tls_get_addr (void)
+{
+  if (!gen_tls_tga)
+    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
+  return gen_tls_tga;
+}
+
+static GTY(()) rtx thread_pointer_rtx;
+static rtx
+gen_thread_pointer (void)
+{
+  if (!thread_pointer_rtx)
+    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
+  return thread_pointer_rtx;
+}
+
+static rtx
+ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
+			 rtx orig_op1, HOST_WIDE_INT addend)
+{
+  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
+  rtx orig_op0 = op0;
+  HOST_WIDE_INT addend_lo, addend_hi;
+
+  switch (tls_kind)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      start_sequence ();
+
+      tga_op1 = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_dtpmod (tga_op1, op1));
+
+      tga_op2 = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_dtprel (tga_op2, op1));
+
+      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
+					 LCT_CONST, Pmode, 2, tga_op1,
+					 Pmode, tga_op2, Pmode);
+
+      insns = get_insns ();
+      end_sequence ();
+
+      if (GET_MODE (op0) != Pmode)
+	op0 = tga_ret;
+      emit_libcall_block (insns, op0, tga_ret, op1);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      /* ??? This isn't the completely proper way to do local-dynamic
+	 If the call to __tls_get_addr is used only by a single symbol,
+	 then we should (somehow) move the dtprel to the second arg
+	 to avoid the extra add.  */
+      start_sequence ();
+
+      tga_op1 = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_dtpmod (tga_op1, op1));
+
+      tga_op2 = const0_rtx;
+
+      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
+					 LCT_CONST, Pmode, 2, tga_op1,
+					 Pmode, tga_op2, Pmode);
+
+      insns = get_insns ();
+      end_sequence ();
+
+      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				UNSPEC_LD_BASE);
+      tmp = gen_reg_rtx (Pmode);
+      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
+
+      if (!register_operand (op0, Pmode))
+	op0 = gen_reg_rtx (Pmode);
+      if (TARGET_TLS64)
+	{
+	  emit_insn (gen_load_dtprel (op0, op1));
+	  emit_insn (gen_adddi3 (op0, tmp, op0));
+	}
+      else
+	emit_insn (gen_add_dtprel (op0, op1, tmp));
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+      addend_hi = addend - addend_lo;
+
+      op1 = plus_constant (op1, addend_hi);
+      addend = addend_lo;
+
+      tmp = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_tprel (tmp, op1));
+
+      if (!register_operand (op0, Pmode))
+	op0 = gen_reg_rtx (Pmode);
+      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      if (!register_operand (op0, Pmode))
+	op0 = gen_reg_rtx (Pmode);
+
+      op1 = orig_op1;
+      addend = 0;
+      if (TARGET_TLS64)
+	{
+	  emit_insn (gen_load_tprel (op0, op1));
+	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
+	}
+      else
+	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (addend)
+    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
+			       orig_op0, 1, OPTAB_DIRECT);
+  if (orig_op0 == op0)
+    return NULL_RTX;
+  if (GET_MODE (orig_op0) == Pmode)
+    return op0;
+  return gen_lowpart (GET_MODE (orig_op0), op0);
+}
+
+rtx
+ia64_expand_move (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+
+  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
+    op1 = force_reg (mode, op1);
+
+  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
+    {
+      HOST_WIDE_INT addend = 0;
+      enum tls_model tls_kind;
+      rtx sym = op1;
+
+      if (GET_CODE (op1) == CONST
+	  && GET_CODE (XEXP (op1, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
+	{
+	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
+	  sym = XEXP (XEXP (op1, 0), 0);
+	}
+
+      tls_kind = tls_symbolic_operand_type (sym);
+      if (tls_kind)
+	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
+
+      if (any_offset_symbol_operand (sym, mode))
+	addend = 0;
+      else if (aligned_offset_symbol_operand (sym, mode))
+	{
+	  HOST_WIDE_INT addend_lo, addend_hi;
+	      
+	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+	  addend_hi = addend - addend_lo;
+
+	  if (addend_lo != 0)
+	    {
+	      op1 = plus_constant (sym, addend_hi);
+	      addend = addend_lo;
+	    }
+	  else
+	    addend = 0;
+	}
+      else
+	op1 = sym;
+
+      if (reload_completed)
+	{
+	  /* We really should have taken care of this offset earlier.  */
+	  gcc_assert (addend == 0);
+	  if (ia64_expand_load_address (op0, op1))
+	    return NULL_RTX;
+	}
+
+      if (addend)
+	{
+	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
+
+	  op1 = expand_simple_binop (mode, PLUS, subtarget,
+				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
+	  if (op0 == op1)
+	    return NULL_RTX;
+	}
+    }
+
+  return op1;
+}
+
+/* Split a move from OP1 to OP0 conditional on COND.  */
+
+void
+ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
+{
+  rtx insn, first = get_last_insn ();
+
+  emit_move_insn (op0, op1);
+
+  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
+    if (INSN_P (insn))
+      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
+					  PATTERN (insn));
+}
+
+/* Split a post-reload TImode or TFmode reference into two DImode
+   components.  This is made extra difficult by the fact that we do
+   not get any scratch registers to work with, because reload cannot
+   be prevented from giving us a scratch that overlaps the register
+   pair involved.  So instead, when addressing memory, we tweak the
+   pointer register up and back down with POST_INCs.  Or up and not
+   back down when we can get away with it.
+
+   REVERSED is true when the loads must be done in reversed order
+   (high word first) for correctness.  DEAD is true when the pointer
+   dies with the second insn we generate and therefore the second
+   address must not carry a postmodify.
+
+   May return an insn which is to be emitted after the moves.  */
+
+static rtx
+ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
+{
+  rtx fixup = 0;
+
+  switch (GET_CODE (in))
+    {
+    case REG:
+      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
+      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      /* Cannot occur reversed.  */
+      gcc_assert (!reversed);
+      
+      if (GET_MODE (in) != TFmode)
+	split_double (in, &out[0], &out[1]);
+      else
+	/* split_double does not understand how to split a TFmode
+	   quantity into a pair of DImode constants.  */
+	{
+	  REAL_VALUE_TYPE r;
+	  unsigned HOST_WIDE_INT p[2];
+	  long l[4];  /* TFmode is 128 bits */
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
+	  real_to_target (l, &r, TFmode);
+
+	  if (FLOAT_WORDS_BIG_ENDIAN)
+	    {
+	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
+	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
+	    }
+	  else
+	    {
+	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
+	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
+	    }
+	  out[0] = GEN_INT (p[0]);
+	  out[1] = GEN_INT (p[1]);
+	}
+      break;
+
+    case MEM:
+      {
+	rtx base = XEXP (in, 0);
+	rtx offset;
+
+	switch (GET_CODE (base))
+	  {
+	  case REG:
+	    if (!reversed)
+	      {
+		out[0] = adjust_automodify_address
+		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+		out[1] = adjust_automodify_address
+		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
+	      }
+	    else
+	      {
+		/* Reversal requires a pre-increment, which can only
+		   be done as a separate insn.  */
+		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
+		out[0] = adjust_automodify_address
+		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
+		out[1] = adjust_address (in, DImode, 0);
+	      }
+	    break;
+
+	  case POST_INC:
+	    gcc_assert (!reversed && !dead);
+	    
+	    /* Just do the increment in two steps.  */
+	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
+	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
+	    break;
+
+	  case POST_DEC:
+	    gcc_assert (!reversed && !dead);
+	    
+	    /* Add 8, subtract 24.  */
+	    base = XEXP (base, 0);
+	    out[0] = adjust_automodify_address
+	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+	    out[1] = adjust_automodify_address
+	      (in, DImode,
+	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
+	       8);
+	    break;
+
+	  case POST_MODIFY:
+	    gcc_assert (!reversed && !dead);
+
+	    /* Extract and adjust the modification.  This case is
+	       trickier than the others, because we might have an
+	       index register, or we might have a combined offset that
+	       doesn't fit a signed 9-bit displacement field.  We can
+	       assume the incoming expression is already legitimate.  */
+	    offset = XEXP (base, 1);
+	    base = XEXP (base, 0);
+
+	    out[0] = adjust_automodify_address
+	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+
+	    if (GET_CODE (XEXP (offset, 1)) == REG)
+	      {
+		/* Can't adjust the postmodify to match.  Emit the
+		   original, then a separate addition insn.  */
+		out[1] = adjust_automodify_address (in, DImode, 0, 8);
+		fixup = gen_adddi3 (base, base, GEN_INT (-8));
+	      }
+	    else
+	      {
+		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
+		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
+		  {
+		    /* Again the postmodify cannot be made to match,
+		       but in this case it's more efficient to get rid
+		       of the postmodify entirely and fix up with an
+		       add insn.  */
+		    out[1] = adjust_automodify_address (in, DImode, base, 8);
+		    fixup = gen_adddi3
+		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
+		  }
+		else
+		  {
+		    /* Combined offset still fits in the displacement field.
+		       (We cannot overflow it at the high end.)  */
+		    out[1] = adjust_automodify_address
+		      (in, DImode, gen_rtx_POST_MODIFY
+		       (Pmode, base, gen_rtx_PLUS
+			(Pmode, base,
+			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
+		       8);
+		  }
+	      }
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return fixup;
+}
+
+/* Split a TImode or TFmode move instruction after reload.
+   This is used by *movtf_internal and *movti_internal.  */
+void
+ia64_split_tmode_move (rtx operands[])
+{
+  rtx in[2], out[2], insn;
+  rtx fixup[2];
+  bool dead = false;
+  bool reversed = false;
+
+  /* It is possible for reload to decide to overwrite a pointer with
+     the value it points to.  In that case we have to do the loads in
+     the appropriate order so that the pointer is not destroyed too
+     early.  Also we must not generate a postmodify for that second
+     load, or rws_access_regno will die.  */
+  if (GET_CODE (operands[1]) == MEM
+      && reg_overlap_mentioned_p (operands[0], operands[1]))
+    {
+      rtx base = XEXP (operands[1], 0);
+      while (GET_CODE (base) != REG)
+	base = XEXP (base, 0);
+
+      if (REGNO (base) == REGNO (operands[0]))
+	reversed = true;
+      dead = true;
+    }
+  /* Another reason to do the moves in reversed order is if the first
+     element of the target register pair is also the second element of
+     the source register pair.  */
+  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
+      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    reversed = true;
+
+  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
+  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
+
+#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
+  if (GET_CODE (EXP) == MEM						\
+      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
+	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
+	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
+    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
+  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
+  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
+
+  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
+  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
+  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
+
+  if (fixup[0])
+    emit_insn (fixup[0]);
+  if (fixup[1])
+    emit_insn (fixup[1]);
+
+#undef MAYBE_ADD_REG_INC_NOTE
+}
+
+/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
+   through memory plus an extra GR scratch register.  Except that you can
+   either get the first from SECONDARY_MEMORY_NEEDED or the second from
+   SECONDARY_RELOAD_CLASS, but not both.
+
+   We got into problems in the first place by allowing a construct like
+   (subreg:XF (reg:TI)), which we got from a union containing a long double.
+   This solution attempts to prevent this situation from occurring.  When
+   we see something like the above, we spill the inner register to memory.  */
+
+static rtx
+spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
+{
+  if (GET_CODE (in) == SUBREG
+      && GET_MODE (SUBREG_REG (in)) == TImode
+      && GET_CODE (SUBREG_REG (in)) == REG)
+    {
+      rtx memt = assign_stack_temp (TImode, 16, 0);
+      emit_move_insn (memt, SUBREG_REG (in));
+      return adjust_address (memt, mode, 0);
+    }
+  else if (force && GET_CODE (in) == REG)
+    {
+      rtx memx = assign_stack_temp (mode, 16, 0);
+      emit_move_insn (memx, in);
+      return memx;
+    }
+  else
+    return in;
+}
+
+/* Expand the movxf or movrf pattern (MODE says which) with the given
+   OPERANDS, returning true if the pattern should then invoke
+   DONE.  */
+
+bool
+ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
+{
+  rtx op0 = operands[0];
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = SUBREG_REG (op0);
+
+  /* We must support XFmode loads into general registers for stdarg/vararg,
+     unprototyped calls, and a rare case where a long double is passed as
+     an argument after a float HFA fills the FP registers.  We split them into
+     DImode loads for convenience.  We also need to support XFmode stores
+     for the last case.  This case does not happen for stdarg/vararg routines,
+     because we do a block store to memory of unnamed arguments.  */
+
+  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
+    {
+      rtx out[2];
+
+      /* We're hoping to transform everything that deals with XFmode
+	 quantities and GR registers early in the compiler.  */
+      gcc_assert (can_create_pseudo_p ());
+
+      /* Struct to register can just use TImode instead.  */
+      if ((GET_CODE (operands[1]) == SUBREG
+	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
+	  || (GET_CODE (operands[1]) == REG
+	      && GR_REGNO_P (REGNO (operands[1]))))
+	{
+	  rtx op1 = operands[1];
+
+	  if (GET_CODE (op1) == SUBREG)
+	    op1 = SUBREG_REG (op1);
+	  else
+	    op1 = gen_rtx_REG (TImode, REGNO (op1));
+
+	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
+	  return true;
+	}
+
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	{
+	  /* Don't word-swap when reading in the constant.  */
+	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
+			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
+					   0, mode));
+	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
+			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
+					   0, mode));
+	  return true;
+	}
+
+      /* If the quantity is in a register not known to be GR, spill it.  */
+      if (register_operand (operands[1], mode))
+	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
+
+      gcc_assert (GET_CODE (operands[1]) == MEM);
+
+      /* Don't word-swap when reading in the value.  */
+      out[0] = gen_rtx_REG (DImode, REGNO (op0));
+      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
+
+      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
+      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
+      return true;
+    }
+
+  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
+    {
+      /* We're hoping to transform everything that deals with XFmode
+	 quantities and GR registers early in the compiler.  */
+      gcc_assert (can_create_pseudo_p ());
+
+      /* Op0 can't be a GR_REG here, as that case is handled above.
+	 If op0 is a register, then we spill op1, so that we now have a
+	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
+	 to force the spill.  */
+      if (register_operand (operands[0], mode))
+	{
+	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+	  op1 = gen_rtx_SUBREG (mode, op1, 0);
+	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
+	}
+
+      else
+	{
+	  rtx in[2];
+
+	  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+	  /* Don't word-swap when writing out the value.  */
+	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
+	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
+
+	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
+	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
+	  return true;
+	}
+    }
+
+  if (!reload_in_progress && !reload_completed)
+    {
+      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
+
+      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
+	{
+	  rtx memt, memx, in = operands[1];
+	  if (CONSTANT_P (in))
+	    in = validize_mem (force_const_mem (mode, in));
+	  if (GET_CODE (in) == MEM)
+	    memt = adjust_address (in, TImode, 0);
+	  else
+	    {
+	      memt = assign_stack_temp (TImode, 16, 0);
+	      memx = adjust_address (memt, mode, 0);
+	      emit_move_insn (memx, in);
+	    }
+	  emit_move_insn (op0, memt);
+	  return true;
+	}
+
+      if (!ia64_move_ok (operands[0], operands[1]))
+	operands[1] = force_reg (mode, operands[1]);
+    }
+
+  return false;
+}
+
+/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
+   with the expression that holds the compare result (in VOIDmode).  */
+
+static GTY(()) rtx cmptf_libfunc;
+
+void
+ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
+{
+  enum rtx_code code = GET_CODE (*expr);
+  rtx cmp;
+
+  /* If we have a BImode input, then we already have a compare result, and
+     do not need to emit another comparison.  */
+  if (GET_MODE (*op0) == BImode)
+    {
+      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
+      cmp = *op0;
+    }
+  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
+     magic number as its third argument, that indicates what to do.
+     The return value is an integer to be compared against zero.  */
+  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
+    {
+      enum qfcmp_magic {
+	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
+	QCMP_UNORD = 2,
+	QCMP_EQ = 4,
+	QCMP_LT = 8,
+	QCMP_GT = 16
+      };
+      int magic;
+      enum rtx_code ncode;
+      rtx ret, insns;
+      
+      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
+      switch (code)
+	{
+	  /* 1 = equal, 0 = not equal.  Equality operators do
+	     not raise FP_INVALID when given an SNaN operand.  */
+	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
+	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
+	  /* isunordered() from C99.  */
+	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
+	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
+	  /* Relational operators raise FP_INVALID when given
+	     an SNaN operand.  */
+	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
+	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
+	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
+	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
+	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
+	     Expanders for buneq etc. weuld have to be added to ia64.md
+	     for this to be useful.  */
+	default: gcc_unreachable ();
+	}
+
+      start_sequence ();
+
+      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
+				     *op0, TFmode, *op1, TFmode,
+				     GEN_INT (magic), DImode);
+      cmp = gen_reg_rtx (BImode);
+      emit_insn (gen_rtx_SET (VOIDmode, cmp,
+			      gen_rtx_fmt_ee (ncode, BImode,
+					      ret, const0_rtx)));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_libcall_block (insns, cmp, cmp,
+			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
+      code = NE;
+    }
+  else
+    {
+      cmp = gen_reg_rtx (BImode);
+      emit_insn (gen_rtx_SET (VOIDmode, cmp,
+			      gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
+      code = NE;
+    }
+
+  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
+  *op0 = cmp;
+  *op1 = const0_rtx;
+}
+
+/* Generate an integral vector comparison.  Return true if the condition has
+   been reversed, and so the sense of the comparison should be inverted.  */
+
+static bool
+ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
+			    rtx dest, rtx op0, rtx op1)
+{
+  bool negate = false;
+  rtx x;
+
+  /* Canonicalize the comparison to EQ, GT, GTU.  */
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case GTU:
+      break;
+
+    case NE:
+    case LE:
+    case LEU:
+      code = reverse_condition (code);
+      negate = true;
+      break;
+
+    case GE:
+    case GEU:
+      code = reverse_condition (code);
+      negate = true;
+      /* FALLTHRU */
+
+    case LT:
+    case LTU:
+      code = swap_condition (code);
+      x = op0, op0 = op1, op1 = x;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Unsigned parallel compare is not supported by the hardware.  Play some
+     tricks to turn this into a signed comparison against 0.  */
+  if (code == GTU)
+    {
+      switch (mode)
+	{
+	case V2SImode:
+	  {
+	    rtx t1, t2, mask;
+
+	    /* Subtract (-(INT MAX) - 1) from both operands to make
+	       them signed.  */
+	    mask = GEN_INT (0x80000000);
+	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
+	    mask = force_reg (mode, mask);
+	    t1 = gen_reg_rtx (mode);
+	    emit_insn (gen_subv2si3 (t1, op0, mask));
+	    t2 = gen_reg_rtx (mode);
+	    emit_insn (gen_subv2si3 (t2, op1, mask));
+	    op0 = t1;
+	    op1 = t2;
+	    code = GT;
+	  }
+	  break;
+
+	case V8QImode:
+	case V4HImode:
+	  /* Perform a parallel unsigned saturating subtraction.  */
+	  x = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (VOIDmode, x,
+				  gen_rtx_US_MINUS (mode, op0, op1)));
+
+	  code = EQ;
+	  op0 = x;
+	  op1 = CONST0_RTX (mode);
+	  negate = !negate;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  x = gen_rtx_fmt_ee (code, mode, op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+  return negate;
+}
+
+/* Emit an integral vector conditional move.  */
+
+void
+ia64_expand_vecint_cmov (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum rtx_code code = GET_CODE (operands[3]);
+  bool negate;
+  rtx cmp, x, ot, of;
+
+  cmp = gen_reg_rtx (mode);
+  negate = ia64_expand_vecint_compare (code, mode, cmp,
+				       operands[4], operands[5]);
+
+  ot = operands[1+negate];
+  of = operands[2-negate];
+
+  if (ot == CONST0_RTX (mode))
+    {
+      if (of == CONST0_RTX (mode))
+	{
+	  emit_move_insn (operands[0], ot);
+	  return;
+	}
+
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, of);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+    }
+  else if (of == CONST0_RTX (mode))
+    {
+      x = gen_rtx_AND (mode, cmp, ot);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+    }
+  else
+    {
+      rtx t, f;
+
+      t = gen_reg_rtx (mode);
+      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
+      emit_insn (gen_rtx_SET (VOIDmode, t, x));
+
+      f = gen_reg_rtx (mode);
+      x = gen_rtx_NOT (mode, cmp);
+      x = gen_rtx_AND (mode, x, operands[2-negate]);
+      emit_insn (gen_rtx_SET (VOIDmode, f, x));
+
+      x = gen_rtx_IOR (mode, t, f);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+    }
+}
+
+/* Emit an integral vector min or max operation.  Return true if all done.  */
+
+bool
+ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
+			   rtx operands[])
+{
+  rtx xops[6];
+
+  /* These four combinations are supported directly.  */
+  if (mode == V8QImode && (code == UMIN || code == UMAX))
+    return false;
+  if (mode == V4HImode && (code == SMIN || code == SMAX))
+    return false;
+
+  /* This combination can be implemented with only saturating subtraction.  */
+  if (mode == V4HImode && code == UMAX)
+    {
+      rtx x, tmp = gen_reg_rtx (mode);
+
+      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
+
+      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
+      return true;
+    }
+
+  /* Everything else implemented via vector comparisons.  */
+  xops[0] = operands[0];
+  xops[4] = xops[1] = operands[1];
+  xops[5] = xops[2] = operands[2];
+
+  switch (code)
+    {
+    case UMIN:
+      code = LTU;
+      break;
+    case UMAX:
+      code = GTU;
+      break;
+    case SMIN:
+      code = LT;
+      break;
+    case SMAX:
+      code = GT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+
+  ia64_expand_vecint_cmov (xops);
+  return true;
+}
+
+/* The vectors LO and HI each contain N halves of a double-wide vector.
+   Reassemble either the first N/2 or the second N/2 elements.  */
+
+void
+ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
+{
+  enum machine_mode mode = GET_MODE (lo);
+  rtx (*gen) (rtx, rtx, rtx);
+  rtx x;
+
+  switch (mode)
+    {
+    case V8QImode:
+      gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
+      break;
+    case V4HImode:
+      gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  x = gen_lowpart (mode, out);
+  if (TARGET_BIG_ENDIAN)
+    x = gen (x, hi, lo);
+  else
+    x = gen (x, lo, hi);
+  emit_insn (x);
+}
+
+/* Return a vector of the sign-extension of VEC.  */
+
+static rtx
+ia64_unpack_sign (rtx vec, bool unsignedp)
+{
+  enum machine_mode mode = GET_MODE (vec);
+  rtx zero = CONST0_RTX (mode);
+
+  if (unsignedp)
+    return zero;
+  else
+    {
+      rtx sign = gen_reg_rtx (mode);
+      bool neg;
+
+      neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
+      gcc_assert (!neg);
+
+      return sign;
+    }
+}
+
+/* Emit an integral vector unpack operation.  */
+
+void
+ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+{
+  rtx sign = ia64_unpack_sign (operands[1], unsignedp);
+  ia64_unpack_assemble (operands[0], operands[1], sign, highp);
+}
+
+/* Emit an integral vector widening sum operations.  */
+
+void
+ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+{
+  enum machine_mode wmode;
+  rtx l, h, t, sign;
+
+  sign = ia64_unpack_sign (operands[1], unsignedp);
+
+  wmode = GET_MODE (operands[0]);
+  l = gen_reg_rtx (wmode);
+  h = gen_reg_rtx (wmode);
+
+  ia64_unpack_assemble (l, operands[1], sign, false);
+  ia64_unpack_assemble (h, operands[1], sign, true);
+
+  t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
+  t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
+  if (t != operands[0])
+    emit_move_insn (operands[0], t);
+}
+
+/* Emit a signed or unsigned V8QI dot product operation.  */
+
+void
+ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
+{
+  rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
+  rtx p1, p2, p3, p4, s1, s2, s3;
+
+  op1 = operands[1];
+  op2 = operands[2];
+  sn1 = ia64_unpack_sign (op1, unsignedp);
+  sn2 = ia64_unpack_sign (op2, unsignedp);
+
+  l1 = gen_reg_rtx (V4HImode);
+  l2 = gen_reg_rtx (V4HImode);
+  h1 = gen_reg_rtx (V4HImode);
+  h2 = gen_reg_rtx (V4HImode);
+  ia64_unpack_assemble (l1, op1, sn1, false);
+  ia64_unpack_assemble (l2, op2, sn2, false);
+  ia64_unpack_assemble (h1, op1, sn1, true);
+  ia64_unpack_assemble (h2, op2, sn2, true);
+
+  p1 = gen_reg_rtx (V2SImode);
+  p2 = gen_reg_rtx (V2SImode);
+  p3 = gen_reg_rtx (V2SImode);
+  p4 = gen_reg_rtx (V2SImode);
+  emit_insn (gen_pmpy2_even (p1, l1, l2));
+  emit_insn (gen_pmpy2_even (p2, h1, h2));
+  emit_insn (gen_pmpy2_odd (p3, l1, l2));
+  emit_insn (gen_pmpy2_odd (p4, h1, h2));
+
+  s1 = gen_reg_rtx (V2SImode);
+  s2 = gen_reg_rtx (V2SImode);
+  s3 = gen_reg_rtx (V2SImode);
+  emit_insn (gen_addv2si3 (s1, p1, p2));
+  emit_insn (gen_addv2si3 (s2, p3, p4));
+  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], s2, s3));
+}
+
+/* Emit the appropriate sequence for a call.  */
+
+void
+ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
+		  int sibcall_p)
+{
+  rtx insn, b0;
+
+  addr = XEXP (addr, 0);
+  addr = convert_memory_address (DImode, addr);
+  b0 = gen_rtx_REG (DImode, R_BR (0));
+
+  /* ??? Should do this for functions known to bind local too.  */
+  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
+    {
+      if (sibcall_p)
+	insn = gen_sibcall_nogp (addr);
+      else if (! retval)
+	insn = gen_call_nogp (addr, b0);
+      else
+	insn = gen_call_value_nogp (retval, addr, b0);
+      insn = emit_call_insn (insn);
+    }
+  else
+    {
+      if (sibcall_p)
+	insn = gen_sibcall_gp (addr);
+      else if (! retval)
+	insn = gen_call_gp (addr, b0);
+      else
+	insn = gen_call_value_gp (retval, addr, b0);
+      insn = emit_call_insn (insn);
+
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+    }
+
+  if (sibcall_p)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
+
+  if (TARGET_ABI_OPEN_VMS)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+	     gen_rtx_REG (DImode, GR_REG (25)));
+}
+
+static void
+reg_emitted (enum ia64_frame_regs r)
+{
+  if (emitted_frame_related_regs[r] == 0)
+    emitted_frame_related_regs[r] = current_frame_info.r[r];
+  else
+    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
+}
+
+static int
+get_reg (enum ia64_frame_regs r)
+{
+  reg_emitted (r);
+  return current_frame_info.r[r];
+}
+
+static bool
+is_emitted (int regno)
+{
+  unsigned int r;
+
+  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
+    if (emitted_frame_related_regs[r] == regno)
+      return true;
+  return false;
+}
+
+void
+ia64_reload_gp (void)
+{
+  rtx tmp;
+
+  if (current_frame_info.r[reg_save_gp])
+    {
+      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
+    }
+  else
+    {
+      HOST_WIDE_INT offset;
+      rtx offset_r;
+
+      offset = (current_frame_info.spill_cfa_off
+	        + current_frame_info.spill_size);
+      if (frame_pointer_needed)
+        {
+          tmp = hard_frame_pointer_rtx;
+          offset = -offset;
+        }
+      else
+        {
+          tmp = stack_pointer_rtx;
+          offset = current_frame_info.total_size - offset;
+        }
+
+      offset_r = GEN_INT (offset);
+      if (satisfies_constraint_I (offset_r))
+        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
+      else
+        {
+          emit_move_insn (pic_offset_table_rtx, offset_r);
+          emit_insn (gen_adddi3 (pic_offset_table_rtx,
+			         pic_offset_table_rtx, tmp));
+        }
+
+      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
+    }
+
+  emit_move_insn (pic_offset_table_rtx, tmp);
+}
+
+void
+ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
+		 rtx scratch_b, int noreturn_p, int sibcall_p)
+{
+  rtx insn;
+  bool is_desc = false;
+
+  /* If we find we're calling through a register, then we're actually
+     calling through a descriptor, so load up the values.  */
+  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
+    {
+      rtx tmp;
+      bool addr_dead_p;
+
+      /* ??? We are currently constrained to *not* use peep2, because
+	 we can legitimately change the global lifetime of the GP
+	 (in the form of killing where previously live).  This is
+	 because a call through a descriptor doesn't use the previous
+	 value of the GP, while a direct call does, and we do not
+	 commit to either form until the split here.
+
+	 That said, this means that we lack precise life info for
+	 whether ADDR is dead after this call.  This is not terribly
+	 important, since we can fix things up essentially for free
+	 with the POST_DEC below, but it's nice to not use it when we
+	 can immediately tell it's not necessary.  */
+      addr_dead_p = ((noreturn_p || sibcall_p
+		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+					    REGNO (addr)))
+		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
+
+      /* Load the code address into scratch_b.  */
+      tmp = gen_rtx_POST_INC (Pmode, addr);
+      tmp = gen_rtx_MEM (Pmode, tmp);
+      emit_move_insn (scratch_r, tmp);
+      emit_move_insn (scratch_b, scratch_r);
+
+      /* Load the GP address.  If ADDR is not dead here, then we must
+	 revert the change made above via the POST_INCREMENT.  */
+      if (!addr_dead_p)
+	tmp = gen_rtx_POST_DEC (Pmode, addr);
+      else
+	tmp = addr;
+      tmp = gen_rtx_MEM (Pmode, tmp);
+      emit_move_insn (pic_offset_table_rtx, tmp);
+
+      is_desc = true;
+      addr = scratch_b;
+    }
+
+  if (sibcall_p)
+    insn = gen_sibcall_nogp (addr);
+  else if (retval)
+    insn = gen_call_value_nogp (retval, addr, retaddr);
+  else
+    insn = gen_call_nogp (addr, retaddr);
+  emit_call_insn (insn);
+
+  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
+    ia64_reload_gp ();
+}
+
+/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
+
+   This differs from the generic code in that we know about the zero-extending
+   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
+   also know that ld.acq+cmpxchg.rel equals a full barrier.
+
+   The loop we want to generate looks like
+
+	cmp_reg = mem;
+      label:
+        old_reg = cmp_reg;
+	new_reg = cmp_reg op val;
+	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
+	if (cmp_reg != old_reg)
+	  goto label;
+
+   Note that we only do the plain load from memory once.  Subsequent
+   iterations use the value loaded by the compare-and-swap pattern.  */
+
+void
+ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+		       rtx old_dst, rtx new_dst)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
+  enum insn_code icode;
+
+  /* Special case for using fetchadd.  */
+  if ((mode == SImode || mode == DImode)
+      && (code == PLUS || code == MINUS)
+      && fetchadd_operand (val, mode))
+    {
+      if (code == MINUS)
+	val = GEN_INT (-INTVAL (val));
+
+      if (!old_dst)
+        old_dst = gen_reg_rtx (mode);
+
+      emit_insn (gen_memory_barrier ());
+
+      if (mode == SImode)
+	icode = CODE_FOR_fetchadd_acq_si;
+      else
+	icode = CODE_FOR_fetchadd_acq_di;
+      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
+
+      if (new_dst)
+	{
+	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
+					 true, OPTAB_WIDEN);
+	  if (new_reg != new_dst)
+	    emit_move_insn (new_dst, new_reg);
+	}
+      return;
+    }
+
+  /* Because of the volatile mem read, we get an ld.acq, which is the
+     front half of the full barrier.  The end half is the cmpxchg.rel.  */
+  gcc_assert (MEM_VOLATILE_P (mem));
+
+  old_reg = gen_reg_rtx (DImode);
+  cmp_reg = gen_reg_rtx (DImode);
+  label = gen_label_rtx ();
+
+  if (mode != DImode)
+    {
+      val = simplify_gen_subreg (DImode, val, mode, 0);
+      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
+    }
+  else
+    emit_move_insn (cmp_reg, mem);
+
+  emit_label (label);
+
+  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+  emit_move_insn (old_reg, cmp_reg);
+  emit_move_insn (ar_ccv, cmp_reg);
+
+  if (old_dst)
+    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
+
+  new_reg = cmp_reg;
+  if (code == NOT)
+    {
+      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
+				     true, OPTAB_DIRECT);
+      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
+    }
+  else
+    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
+				   true, OPTAB_DIRECT);
+
+  if (mode != DImode)
+    new_reg = gen_lowpart (mode, new_reg);
+  if (new_dst)
+    emit_move_insn (new_dst, new_reg);
+
+  switch (mode)
+    {
+    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
+    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
+    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
+    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
+
+  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
+}
+
+/* Begin the assembly file.  */
+
+static void
+ia64_file_start (void)
+{
+  default_file_start ();
+  emit_safe_across_calls ();
+}
+
+void
+emit_safe_across_calls (void)
+{
+  unsigned int rs, re;
+  int out_state;
+
+  rs = 1;
+  out_state = 0;
+  while (1)
+    {
+      while (rs < 64 && call_used_regs[PR_REG (rs)])
+	rs++;
+      if (rs >= 64)
+	break;
+      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
+	continue;
+      if (out_state == 0)
+	{
+	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
+	  out_state = 1;
+	}
+      else
+	fputc (',', asm_out_file);
+      if (re == rs + 1)
+	fprintf (asm_out_file, "p%u", rs);
+      else
+	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
+      rs = re + 1;
+    }
+  if (out_state)
+    fputc ('\n', asm_out_file);
+}
+
+/* Globalize a declaration.  */
+
+static void
+ia64_globalize_decl_name (FILE * stream, tree decl)
+{
+  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
+  if (version_attr)
+    {
+      tree v = TREE_VALUE (TREE_VALUE (version_attr));
+      const char *p = TREE_STRING_POINTER (v);
+      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
+    }
+  targetm.asm_out.globalize_label (stream, name);
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
+}
+
+/* Helper function for ia64_compute_frame_size: find an appropriate general
+   register to spill some special register to.  SPECIAL_SPILL_MASK contains
+   bits in GR0 to GR31 that have already been allocated by this routine.
+   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
+
+static int
+find_gr_spill (enum ia64_frame_regs r, int try_locals)
+{
+  int regno;
+
+  if (emitted_frame_related_regs[r] != 0)
+    {
+      regno = emitted_frame_related_regs[r];
+      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
+	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
+        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
+      else if (current_function_is_leaf 
+               && regno >= GR_REG (1) && regno <= GR_REG (31))
+        current_frame_info.gr_used_mask |= 1 << regno;
+
+      return regno;
+    }
+
+  /* If this is a leaf function, first try an otherwise unused
+     call-clobbered register.  */
+  if (current_function_is_leaf)
+    {
+      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
+	if (! df_regs_ever_live_p (regno)
+	    && call_used_regs[regno]
+	    && ! fixed_regs[regno]
+	    && ! global_regs[regno]
+	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
+            && ! is_emitted (regno))
+	  {
+	    current_frame_info.gr_used_mask |= 1 << regno;
+	    return regno;
+	  }
+    }
+
+  if (try_locals)
+    {
+      regno = current_frame_info.n_local_regs;
+      /* If there is a frame pointer, then we can't use loc79, because
+	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
+	 reg_name switching code in ia64_expand_prologue.  */
+      while (regno < (80 - frame_pointer_needed))
+	if (! is_emitted (LOC_REG (regno++)))
+	  {
+	    current_frame_info.n_local_regs = regno;
+	    return LOC_REG (regno - 1);
+	  }
+    }
+
+  /* Failed to find a general register to spill to.  Must use stack.  */
+  return 0;
+}
+
+/* In order to make for nice schedules, we try to allocate every temporary
+   to a different register.  We must of course stay away from call-saved,
+   fixed, and global registers.  We must also stay away from registers
+   allocated in current_frame_info.gr_used_mask, since those include regs
+   used all through the prologue.
+
+   Any register allocated here must be used immediately.  The idea is to
+   aid scheduling, not to solve data flow problems.  */
+
+static int last_scratch_gr_reg;
+
+static int
+next_scratch_gr_reg (void)
+{
+  int i, regno;
+
+  for (i = 0; i < 32; ++i)
+    {
+      regno = (last_scratch_gr_reg + i + 1) & 31;
+      if (call_used_regs[regno]
+	  && ! fixed_regs[regno]
+	  && ! global_regs[regno]
+	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
+	{
+	  last_scratch_gr_reg = regno;
+	  return regno;
+	}
+    }
+
+  /* There must be _something_ available.  */
+  gcc_unreachable ();
+}
+
+/* Helper function for ia64_compute_frame_size, called through
+   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
+
+static void
+mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
+{
+  unsigned int regno = REGNO (reg);
+  if (regno < 32)
+    {
+      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
+      for (i = 0; i < n; ++i)
+	current_frame_info.gr_used_mask |= 1 << (regno + i);
+    }
+}
+
+
+/* Returns the number of bytes offset between the frame pointer and the stack
+   pointer for the current function.  SIZE is the number of bytes of space
+   needed for local variables.  */
+
+static void
+ia64_compute_frame_size (HOST_WIDE_INT size)
+{
+  HOST_WIDE_INT total_size;
+  HOST_WIDE_INT spill_size = 0;
+  HOST_WIDE_INT extra_spill_size = 0;
+  HOST_WIDE_INT pretend_args_size;
+  HARD_REG_SET mask;
+  int n_spilled = 0;
+  int spilled_gr_p = 0;
+  int spilled_fr_p = 0;
+  unsigned int regno;
+  int min_regno;
+  int max_regno;
+  int i;
+
+  if (current_frame_info.initialized)
+    return;
+
+  memset (&current_frame_info, 0, sizeof current_frame_info);
+  CLEAR_HARD_REG_SET (mask);
+
+  /* Don't allocate scratches to the return register.  */
+  diddle_return_value (mark_reg_gr_used_mask, NULL);
+
+  /* Don't allocate scratches to the EH scratch registers.  */
+  if (cfun->machine->ia64_eh_epilogue_sp)
+    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
+  if (cfun->machine->ia64_eh_epilogue_bsp)
+    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
+
+  /* Find the size of the register stack frame.  We have only 80 local
+     registers, because we reserve 8 for the inputs and 8 for the
+     outputs.  */
+
+  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
+     since we'll be adjusting that down later.  */
+  regno = LOC_REG (78) + ! frame_pointer_needed;
+  for (; regno >= LOC_REG (0); regno--)
+    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
+      break;
+  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
+
+  /* For functions marked with the syscall_linkage attribute, we must mark
+     all eight input registers as in use, so that locals aren't visible to
+     the caller.  */
+
+  if (cfun->machine->n_varargs > 0
+      || lookup_attribute ("syscall_linkage",
+			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    current_frame_info.n_input_regs = 8;
+  else
+    {
+      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
+	if (df_regs_ever_live_p (regno))
+	  break;
+      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
+    }
+
+  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
+    if (df_regs_ever_live_p (regno))
+      break;
+  i = regno - OUT_REG (0) + 1;
+
+#ifndef PROFILE_HOOK
+  /* When -p profiling, we need one output register for the mcount argument.
+     Likewise for -a profiling for the bb_init_func argument.  For -ax
+     profiling, we need two output registers for the two bb_init_trace_func
+     arguments.  */
+  if (crtl->profile)
+    i = MAX (i, 1);
+#endif
+  current_frame_info.n_output_regs = i;
+
+  /* ??? No rotating register support yet.  */
+  current_frame_info.n_rotate_regs = 0;
+
+  /* Discover which registers need spilling, and how much room that
+     will take.  Begin with floating point and general registers,
+     which will always wind up on the stack.  */
+
+  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	spill_size += 16;
+	n_spilled += 1;
+	spilled_fr_p = 1;
+      }
+
+  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	spill_size += 8;
+	n_spilled += 1;
+	spilled_gr_p = 1;
+      }
+
+  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	spill_size += 8;
+	n_spilled += 1;
+      }
+
+  /* Now come all special registers that might get saved in other
+     general registers.  */
+
+  if (frame_pointer_needed)
+    {
+      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
+      /* If we did not get a register, then we take LOC79.  This is guaranteed
+	 to be free, even if regs_ever_live is already set, because this is
+	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
+	 as we don't count loc79 above.  */
+      if (current_frame_info.r[reg_fp] == 0)
+	{
+	  current_frame_info.r[reg_fp] = LOC_REG (79);
+	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
+	}
+    }
+
+  if (! current_function_is_leaf)
+    {
+      /* Emit a save of BR0 if we call other functions.  Do this even
+	 if this function doesn't return, as EH depends on this to be
+	 able to unwind the stack.  */
+      SET_HARD_REG_BIT (mask, BR_REG (0));
+
+      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
+      if (current_frame_info.r[reg_save_b0] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      /* Similarly for ar.pfs.  */
+      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
+      if (current_frame_info.r[reg_save_ar_pfs] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
+	 registers are clobbered, so we fall back to the stack.  */
+      current_frame_info.r[reg_save_gp]
+	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
+      if (current_frame_info.r[reg_save_gp] == 0)
+	{
+	  SET_HARD_REG_BIT (mask, GR_REG (1));
+	  spill_size += 8;
+	  n_spilled += 1;
+	}
+    }
+  else
+    {
+      if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
+	{
+	  SET_HARD_REG_BIT (mask, BR_REG (0));
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      if (df_regs_ever_live_p (AR_PFS_REGNUM))
+	{
+	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+ 	  current_frame_info.r[reg_save_ar_pfs] 
+            = find_gr_spill (reg_save_ar_pfs, 1);
+	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
+	    {
+	      extra_spill_size += 8;
+	      n_spilled += 1;
+	    }
+	}
+    }
+
+  /* Unwind descriptor hackery: things are most efficient if we allocate
+     consecutive GR save registers for RP, PFS, FP in that order. However,
+     it is absolutely critical that FP get the only hard register that's
+     guaranteed to be free, so we allocated it first.  If all three did
+     happen to be allocated hard regs, and are consecutive, rearrange them
+     into the preferred order now.  
+     
+     If we have already emitted code for any of those registers,
+     then it's already too late to change.  */
+  min_regno = MIN (current_frame_info.r[reg_fp],
+		   MIN (current_frame_info.r[reg_save_b0],
+			current_frame_info.r[reg_save_ar_pfs]));
+  max_regno = MAX (current_frame_info.r[reg_fp],
+		   MAX (current_frame_info.r[reg_save_b0],
+			current_frame_info.r[reg_save_ar_pfs]));
+  if (min_regno > 0
+      && min_regno + 2 == max_regno
+      && (current_frame_info.r[reg_fp] == min_regno + 1
+	  || current_frame_info.r[reg_save_b0] == min_regno + 1
+	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
+      && (emitted_frame_related_regs[reg_save_b0] == 0
+	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
+      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
+	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
+      && (emitted_frame_related_regs[reg_fp] == 0
+	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
+    {
+      current_frame_info.r[reg_save_b0] = min_regno;
+      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
+      current_frame_info.r[reg_fp] = min_regno + 2;
+    }
+
+  /* See if we need to store the predicate register block.  */
+  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      break;
+  if (regno <= PR_REG (63))
+    {
+      SET_HARD_REG_BIT (mask, PR_REG (0));
+      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
+      if (current_frame_info.r[reg_save_pr] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+
+      /* ??? Mark them all as used so that register renaming and such
+	 are free to use them.  */
+      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+	df_set_regs_ever_live (regno, true);
+    }
+
+  /* If we're forced to use st8.spill, we're forced to save and restore
+     ar.unat as well.  The check for existing liveness allows inline asm
+     to touch ar.unat.  */
+  if (spilled_gr_p || cfun->machine->n_varargs
+      || df_regs_ever_live_p (AR_UNAT_REGNUM))
+    {
+      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
+      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
+      current_frame_info.r[reg_save_ar_unat] 
+        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
+      if (current_frame_info.r[reg_save_ar_unat] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+    }
+
+  if (df_regs_ever_live_p (AR_LC_REGNUM))
+    {
+      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
+      current_frame_info.r[reg_save_ar_lc] 
+        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
+      if (current_frame_info.r[reg_save_ar_lc] == 0)
+	{
+	  extra_spill_size += 8;
+	  n_spilled += 1;
+	}
+    }
+
+  /* If we have an odd number of words of pretend arguments written to
+     the stack, then the FR save area will be unaligned.  We round the
+     size of this area up to keep things 16 byte aligned.  */
+  if (spilled_fr_p)
+    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
+  else
+    pretend_args_size = crtl->args.pretend_args_size;
+
+  total_size = (spill_size + extra_spill_size + size + pretend_args_size
+		+ crtl->outgoing_args_size);
+  total_size = IA64_STACK_ALIGN (total_size);
+
+  /* We always use the 16-byte scratch area provided by the caller, but
+     if we are a leaf function, there's no one to which we need to provide
+     a scratch area.  However, if the function allocates dynamic stack space,
+     the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
+     so we need to cope.  */
+  if (current_function_is_leaf && !cfun->calls_alloca)
+    total_size = MAX (0, total_size - 16);
+
+  current_frame_info.total_size = total_size;
+  current_frame_info.spill_cfa_off = pretend_args_size - 16;
+  current_frame_info.spill_size = spill_size;
+  current_frame_info.extra_spill_size = extra_spill_size;
+  COPY_HARD_REG_SET (current_frame_info.mask, mask);
+  current_frame_info.n_spilled = n_spilled;
+  current_frame_info.initialized = reload_completed;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == BR_REG (0) ? current_function_is_leaf : true);
+}
+
+/* Compute the initial difference between the specified pair of registers.  */
+
+HOST_WIDE_INT
+ia64_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  ia64_compute_frame_size (get_frame_size ());
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      switch (to)
+	{
+	case HARD_FRAME_POINTER_REGNUM:
+	  offset = -current_frame_info.total_size;
+	  if (!current_function_is_leaf || cfun->calls_alloca)
+	    offset += 16 + crtl->outgoing_args_size;
+	  break;
+
+	case STACK_POINTER_REGNUM:
+	  offset = 0;
+	  if (!current_function_is_leaf || cfun->calls_alloca)
+	    offset += 16 + crtl->outgoing_args_size;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case ARG_POINTER_REGNUM:
+      /* Arguments start above the 16 byte save area, unless stdarg
+	 in which case we store through the 16 byte save area.  */
+      switch (to)
+	{
+	case HARD_FRAME_POINTER_REGNUM:
+	  offset = 16 - crtl->args.pretend_args_size;
+	  break;
+
+	case STACK_POINTER_REGNUM:
+	  offset = (current_frame_info.total_size
+		    + 16 - crtl->args.pretend_args_size);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+/* If there are more than a trivial number of register spills, we use
+   two interleaved iterators so that we can get two memory references
+   per insn group.
+
+   In order to simplify things in the prologue and epilogue expanders,
+   we use helper functions to fix up the memory references after the
+   fact with the appropriate offsets to a POST_MODIFY memory mode.
+   The following data structure tracks the state of the two iterators
+   while insns are being emitted.  */
+
+struct spill_fill_data
+{
+  rtx init_after;		/* point at which to emit initializations */
+  rtx init_reg[2];		/* initial base register */
+  rtx iter_reg[2];		/* the iterator registers */
+  rtx *prev_addr[2];		/* address of last memory use */
+  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
+  HOST_WIDE_INT prev_off[2];	/* last offset */
+  int n_iter;			/* number of iterators in use */
+  int next_iter;		/* next iterator to use */
+  unsigned int save_gr_used_mask;
+};
+
+static struct spill_fill_data spill_fill_data;
+
+static void
+setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
+{
+  int i;
+
+  spill_fill_data.init_after = get_last_insn ();
+  spill_fill_data.init_reg[0] = init_reg;
+  spill_fill_data.init_reg[1] = init_reg;
+  spill_fill_data.prev_addr[0] = NULL;
+  spill_fill_data.prev_addr[1] = NULL;
+  spill_fill_data.prev_insn[0] = NULL;
+  spill_fill_data.prev_insn[1] = NULL;
+  spill_fill_data.prev_off[0] = cfa_off;
+  spill_fill_data.prev_off[1] = cfa_off;
+  spill_fill_data.next_iter = 0;
+  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
+
+  spill_fill_data.n_iter = 1 + (n_spills > 2);
+  for (i = 0; i < spill_fill_data.n_iter; ++i)
+    {
+      int regno = next_scratch_gr_reg ();
+      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
+      current_frame_info.gr_used_mask |= 1 << regno;
+    }
+}
+
+static void
+finish_spill_pointers (void)
+{
+  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
+}
+
+static rtx
+spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
+{
+  int iter = spill_fill_data.next_iter;
+  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
+  rtx disp_rtx = GEN_INT (disp);
+  rtx mem;
+
+  if (spill_fill_data.prev_addr[iter])
+    {
+      if (satisfies_constraint_N (disp_rtx))
+	{
+	  *spill_fill_data.prev_addr[iter]
+	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
+				   gen_rtx_PLUS (DImode,
+						 spill_fill_data.iter_reg[iter],
+						 disp_rtx));
+	  add_reg_note (spill_fill_data.prev_insn[iter],
+			REG_INC, spill_fill_data.iter_reg[iter]);
+	}
+      else
+	{
+	  /* ??? Could use register post_modify for loads.  */
+	  if (!satisfies_constraint_I (disp_rtx))
+	    {
+	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
+	      emit_move_insn (tmp, disp_rtx);
+	      disp_rtx = tmp;
+	    }
+	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
+				 spill_fill_data.iter_reg[iter], disp_rtx));
+	}
+    }
+  /* Micro-optimization: if we've created a frame pointer, it's at
+     CFA 0, which may allow the real iterator to be initialized lower,
+     slightly increasing parallelism.  Also, if there are few saves
+     it may eliminate the iterator entirely.  */
+  else if (disp == 0
+	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
+	   && frame_pointer_needed)
+    {
+      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
+      set_mem_alias_set (mem, get_varargs_alias_set ());
+      return mem;
+    }
+  else
+    {
+      rtx seq, insn;
+
+      if (disp == 0)
+	seq = gen_movdi (spill_fill_data.iter_reg[iter],
+			 spill_fill_data.init_reg[iter]);
+      else
+	{
+	  start_sequence ();
+
+	  if (!satisfies_constraint_I (disp_rtx))
+	    {
+	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
+	      emit_move_insn (tmp, disp_rtx);
+	      disp_rtx = tmp;
+	    }
+
+	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
+				 spill_fill_data.init_reg[iter],
+				 disp_rtx));
+
+	  seq = get_insns ();
+	  end_sequence ();
+	}
+
+      /* Careful for being the first insn in a sequence.  */
+      if (spill_fill_data.init_after)
+	insn = emit_insn_after (seq, spill_fill_data.init_after);
+      else
+	{
+	  rtx first = get_insns ();
+	  if (first)
+	    insn = emit_insn_before (seq, first);
+	  else
+	    insn = emit_insn (seq);
+	}
+      spill_fill_data.init_after = insn;
+    }
+
+  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
+
+  /* ??? Not all of the spills are for varargs, but some of them are.
+     The rest of the spills belong in an alias set of their own.  But
+     it doesn't actually hurt to include them here.  */
+  set_mem_alias_set (mem, get_varargs_alias_set ());
+
+  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
+  spill_fill_data.prev_off[iter] = cfa_off;
+
+  if (++iter >= spill_fill_data.n_iter)
+    iter = 0;
+  spill_fill_data.next_iter = iter;
+
+  return mem;
+}
+
+static void
+do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
+	  rtx frame_reg)
+{
+  int iter = spill_fill_data.next_iter;
+  rtx mem, insn;
+
+  mem = spill_restore_mem (reg, cfa_off);
+  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
+  spill_fill_data.prev_insn[iter] = insn;
+
+  if (frame_reg)
+    {
+      rtx base;
+      HOST_WIDE_INT off;
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Don't even pretend that the unwind code can intuit its way
+	 through a pair of interleaved post_modify iterators.  Just
+	 provide the correct answer.  */
+
+      if (frame_pointer_needed)
+	{
+	  base = hard_frame_pointer_rtx;
+	  off = - cfa_off;
+	}
+      else
+	{
+	  base = stack_pointer_rtx;
+	  off = current_frame_info.total_size - cfa_off;
+	}
+
+      add_reg_note (insn, REG_CFA_OFFSET,
+		    gen_rtx_SET (VOIDmode,
+				 gen_rtx_MEM (GET_MODE (reg),
+					      plus_constant (base, off)),
+				 frame_reg));
+    }
+}
+
+static void
+do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
+{
+  int iter = spill_fill_data.next_iter;
+  rtx insn;
+
+  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
+				GEN_INT (cfa_off)));
+  spill_fill_data.prev_insn[iter] = insn;
+}
+
+/* Wrapper functions that discards the CONST_INT spill offset.  These
+   exist so that we can give gr_spill/gr_fill the offset they need and
+   use a consistent function interface.  */
+
+static rtx
+gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+{
+  return gen_movdi (dest, src);
+}
+
+static rtx
+gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+{
+  return gen_fr_spill (dest, src);
+}
+
+static rtx
+gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+{
+  return gen_fr_restore (dest, src);
+}
+
+/* Called after register allocation to add any instructions needed for the
+   prologue.  Using a prologue insn is favored compared to putting all of the
+   instructions in output_function_prologue(), since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.
+
+   The register save area is layed out like so:
+   cfa+16
+	[ varargs spill area ]
+	[ fr register spill area ]
+	[ br register spill area ]
+	[ ar register spill area ]
+	[ pr register spill area ]
+	[ gr register spill area ] */
+
+/* ??? Get inefficient code when the frame size is larger than can fit in an
+   adds instruction.  */
+
+void
+ia64_expand_prologue (void)
+{
+  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
+  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
+  rtx reg, alt_reg;
+
+  ia64_compute_frame_size (get_frame_size ());
+  last_scratch_gr_reg = 15;
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = current_frame_info.total_size;
+
+  if (dump_file) 
+    {
+      fprintf (dump_file, "ia64 frame related registers "
+               "recorded in current_frame_info.r[]:\n");
+#define PRINTREG(a) if (current_frame_info.r[a]) \
+        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
+      PRINTREG(reg_fp);
+      PRINTREG(reg_save_b0);
+      PRINTREG(reg_save_pr);
+      PRINTREG(reg_save_ar_pfs);
+      PRINTREG(reg_save_ar_unat);
+      PRINTREG(reg_save_ar_lc);
+      PRINTREG(reg_save_gp);
+#undef PRINTREG
+    }
+
+  /* If there is no epilogue, then we don't need some prologue insns.
+     We need to avoid emitting the dead prologue insns, because flow
+     will complain about them.  */
+  if (optimize)
+    {
+      edge e;
+      edge_iterator ei;
+
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+	if ((e->flags & EDGE_FAKE) == 0
+	    && (e->flags & EDGE_FALLTHRU) != 0)
+	  break;
+      epilogue_p = (e != NULL);
+    }
+  else
+    epilogue_p = 1;
+
+  /* Set the local, input, and output register names.  We need to do this
+     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
+     half.  If we use in/loc/out register names, then we get assembler errors
+     in crtn.S because there is no alloc insn or regstk directive in there.  */
+  if (! TARGET_REG_NAMES)
+    {
+      int inputs = current_frame_info.n_input_regs;
+      int locals = current_frame_info.n_local_regs;
+      int outputs = current_frame_info.n_output_regs;
+
+      for (i = 0; i < inputs; i++)
+	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
+      for (i = 0; i < locals; i++)
+	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
+      for (i = 0; i < outputs; i++)
+	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
+    }
+
+  /* Set the frame pointer register name.  The regnum is logically loc79,
+     but of course we'll not have allocated that many locals.  Rather than
+     worrying about renumbering the existing rtxs, we adjust the name.  */
+  /* ??? This code means that we can never use one local register when
+     there is a frame pointer.  loc79 gets wasted in this case, as it is
+     renamed to a register that will never be used.  See also the try_locals
+     code in find_gr_spill.  */
+  if (current_frame_info.r[reg_fp])
+    {
+      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
+      reg_names[HARD_FRAME_POINTER_REGNUM]
+	= reg_names[current_frame_info.r[reg_fp]];
+      reg_names[current_frame_info.r[reg_fp]] = tmp;
+    }
+
+  /* We don't need an alloc instruction if we've used no outputs or locals.  */
+  if (current_frame_info.n_local_regs == 0
+      && current_frame_info.n_output_regs == 0
+      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
+      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
+    {
+      /* If there is no alloc, but there are input registers used, then we
+	 need a .regstk directive.  */
+      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
+      ar_pfs_save_reg = NULL_RTX;
+    }
+  else
+    {
+      current_frame_info.need_regstk = 0;
+
+      if (current_frame_info.r[reg_save_ar_pfs])
+        {
+	  regno = current_frame_info.r[reg_save_ar_pfs];
+	  reg_emitted (reg_save_ar_pfs);
+	}
+      else
+	regno = next_scratch_gr_reg ();
+      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
+
+      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
+				   GEN_INT (current_frame_info.n_input_regs),
+				   GEN_INT (current_frame_info.n_local_regs),
+				   GEN_INT (current_frame_info.n_output_regs),
+				   GEN_INT (current_frame_info.n_rotate_regs)));
+      RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
+    }
+
+  /* Set up frame pointer, stack pointer, and spill iterators.  */
+
+  n_varargs = cfun->machine->n_varargs;
+  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
+			stack_pointer_rtx, 0);
+
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Force the unwind info to recognize this as defining a new CFA,
+	 rather than some temp register setup.  */
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
+    }
+
+  if (current_frame_info.total_size != 0)
+    {
+      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
+      rtx offset;
+
+      if (satisfies_constraint_I (frame_size_rtx))
+	offset = frame_size_rtx;
+      else
+	{
+	  regno = next_scratch_gr_reg ();
+	  offset = gen_rtx_REG (DImode, regno);
+	  emit_move_insn (offset, frame_size_rtx);
+	}
+
+      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+				    stack_pointer_rtx, offset));
+
+      if (! frame_pointer_needed)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+			gen_rtx_SET (VOIDmode,
+				     stack_pointer_rtx,
+				     gen_rtx_PLUS (DImode,
+						   stack_pointer_rtx,
+						   frame_size_rtx)));
+	}
+
+      /* ??? At this point we must generate a magic insn that appears to
+	 modify the stack pointer, the frame pointer, and all spill
+	 iterators.  This would allow the most scheduling freedom.  For
+	 now, just hard stop.  */
+      emit_insn (gen_blockage ());
+    }
+
+  /* Must copy out ar.unat before doing any integer spills.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+    {
+      if (current_frame_info.r[reg_save_ar_unat])
+        {
+	  ar_unat_save_reg
+	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
+	  reg_emitted (reg_save_ar_unat);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
+	  current_frame_info.gr_used_mask |= 1 << alt_regno;
+	}
+
+      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+      insn = emit_move_insn (ar_unat_save_reg, reg);
+      if (current_frame_info.r[reg_save_ar_unat])
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+	}
+
+      /* Even if we're not going to generate an epilogue, we still
+	 need to save the register so that EH works.  */
+      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
+	emit_insn (gen_prologue_use (ar_unat_save_reg));
+    }
+  else
+    ar_unat_save_reg = NULL_RTX;
+
+  /* Spill all varargs registers.  Do this before spilling any GR registers,
+     since we want the UNAT bits for the GR registers to override the UNAT
+     bits from varargs, which we don't care about.  */
+
+  cfa_off = -16;
+  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
+    {
+      reg = gen_rtx_REG (DImode, regno);
+      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
+    }
+
+  /* Locate the bottom of the register save area.  */
+  cfa_off = (current_frame_info.spill_cfa_off
+	     + current_frame_info.spill_size
+	     + current_frame_info.extra_spill_size);
+
+  /* Save the predicate register block either in a register or in memory.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
+    {
+      reg = gen_rtx_REG (DImode, PR_REG (0));
+      if (current_frame_info.r[reg_save_pr] != 0)
+	{
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
+	  reg_emitted (reg_save_pr);
+	  insn = emit_move_insn (alt_reg, reg);
+
+	  /* ??? Denote pr spill/fill by a DImode move that modifies all
+	     64 hard registers.  */
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+	  /* Even if we're not going to generate an epilogue, we still
+	     need to save the register so that EH works.  */
+	  if (! epilogue_p)
+	    emit_insn (gen_prologue_use (alt_reg));
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  insn = emit_move_insn (alt_reg, reg);
+	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	  cfa_off -= 8;
+	}
+    }
+
+  /* Handle AR regs in numerical order.  All of them get special handling.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
+      && current_frame_info.r[reg_save_ar_unat] == 0)
+    {
+      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
+      cfa_off -= 8;
+    }
+
+  /* The alloc insn already copied ar.pfs into a general register.  The
+     only thing we have to do now is copy that register to a stack slot
+     if we'd not allocated a local register for the job.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
+      && current_frame_info.r[reg_save_ar_pfs] == 0)
+    {
+      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
+      cfa_off -= 8;
+    }
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+    {
+      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
+      if (current_frame_info.r[reg_save_ar_lc] != 0)
+	{
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
+	  reg_emitted (reg_save_ar_lc);
+	  insn = emit_move_insn (alt_reg, reg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+	  /* Even if we're not going to generate an epilogue, we still
+	     need to save the register so that EH works.  */
+	  if (! epilogue_p)
+	    emit_insn (gen_prologue_use (alt_reg));
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  emit_move_insn (alt_reg, reg);
+	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	  cfa_off -= 8;
+	}
+    }
+
+  /* Save the return pointer.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      reg = gen_rtx_REG (DImode, BR_REG (0));
+      if (current_frame_info.r[reg_save_b0] != 0)
+	{
+          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+          reg_emitted (reg_save_b0);
+	  insn = emit_move_insn (alt_reg, reg);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+
+	  /* Even if we're not going to generate an epilogue, we still
+	     need to save the register so that EH works.  */
+	  if (! epilogue_p)
+	    emit_insn (gen_prologue_use (alt_reg));
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  emit_move_insn (alt_reg, reg);
+	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	  cfa_off -= 8;
+	}
+    }
+
+  if (current_frame_info.r[reg_save_gp])
+    {
+      reg_emitted (reg_save_gp);
+      insn = emit_move_insn (gen_rtx_REG (DImode,
+					  current_frame_info.r[reg_save_gp]),
+			     pic_offset_table_rtx);
+    }
+
+  /* We should now be at the base of the gr/br/fr spill area.  */
+  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
+			  + current_frame_info.spill_size));
+
+  /* Spill all general registers.  */
+  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	reg = gen_rtx_REG (DImode, regno);
+	do_spill (gen_gr_spill, reg, cfa_off, reg);
+	cfa_off -= 8;
+      }
+
+  /* Spill the rest of the BR registers.  */
+  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	alt_regno = next_scratch_gr_reg ();
+	alt_reg = gen_rtx_REG (DImode, alt_regno);
+	reg = gen_rtx_REG (DImode, regno);
+	emit_move_insn (alt_reg, reg);
+	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	cfa_off -= 8;
+      }
+
+  /* Align the frame and spill all FR registers.  */
+  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+        gcc_assert (!(cfa_off & 15));
+	reg = gen_rtx_REG (XFmode, regno);
+	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
+	cfa_off -= 16;
+      }
+
+  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
+
+  finish_spill_pointers ();
+}
+
+/* Output the textual info surrounding the prologue.  */
+
+void
+ia64_start_function (FILE *file, const char *fnname,
+		     tree decl ATTRIBUTE_UNUSED)
+{
+#if VMS_DEBUGGING_INFO
+  if (vms_debug_main
+      && strncmp (vms_debug_main, fnname, strlen (vms_debug_main)) == 0)
+    {
+      targetm.asm_out.globalize_label (asm_out_file, VMS_DEBUG_MAIN_POINTER);
+      ASM_OUTPUT_DEF (asm_out_file, VMS_DEBUG_MAIN_POINTER, fnname);
+      dwarf2out_vms_debug_main_pointer ();
+      vms_debug_main = 0;
+    }
+#endif
+
+  fputs ("\t.proc ", file);
+  assemble_name (file, fnname);
+  fputc ('\n', file);
+  ASM_OUTPUT_LABEL (file, fnname);
+}
+
+/* Called after register allocation to add any instructions needed for the
+   epilogue.  Using an epilogue insn is favored compared to putting all of the
+   instructions in output_function_prologue(), since it allows the scheduler
+   to intermix instructions with the saves of the caller saved registers.  In
+   some cases, it might be necessary to emit a barrier instruction as the last
+   insn to prevent such scheduling.  */
+
+void
+ia64_expand_epilogue (int sibcall_p)
+{
+  rtx insn, reg, alt_reg, ar_unat_save_reg;
+  int regno, alt_regno, cfa_off;
+
+  ia64_compute_frame_size (get_frame_size ());
+
+  /* If there is a frame pointer, then we use it instead of the stack
+     pointer, so that the stack pointer does not need to be valid when
+     the epilogue starts.  See EXIT_IGNORE_STACK.  */
+  if (frame_pointer_needed)
+    setup_spill_pointers (current_frame_info.n_spilled,
+			  hard_frame_pointer_rtx, 0);
+  else
+    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
+			  current_frame_info.total_size);
+
+  if (current_frame_info.total_size != 0)
+    {
+      /* ??? At this point we must generate a magic insn that appears to
+         modify the spill iterators and the frame pointer.  This would
+	 allow the most scheduling freedom.  For now, just hard stop.  */
+      emit_insn (gen_blockage ());
+    }
+
+  /* Locate the bottom of the register save area.  */
+  cfa_off = (current_frame_info.spill_cfa_off
+	     + current_frame_info.spill_size
+	     + current_frame_info.extra_spill_size);
+
+  /* Restore the predicate registers.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
+    {
+      if (current_frame_info.r[reg_save_pr] != 0)
+        {
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
+	  reg_emitted (reg_save_pr);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+      reg = gen_rtx_REG (DImode, PR_REG (0));
+      emit_move_insn (reg, alt_reg);
+    }
+
+  /* Restore the application registers.  */
+
+  /* Load the saved unat from the stack, but do not restore it until
+     after the GRs have been restored.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+    {
+      if (current_frame_info.r[reg_save_ar_unat] != 0)
+        {
+          ar_unat_save_reg
+	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
+	  reg_emitted (reg_save_ar_unat);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
+	  current_frame_info.gr_used_mask |= 1 << alt_regno;
+	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+    }
+  else
+    ar_unat_save_reg = NULL_RTX;
+
+  if (current_frame_info.r[reg_save_ar_pfs] != 0)
+    {
+      reg_emitted (reg_save_ar_pfs);
+      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
+      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+      emit_move_insn (reg, alt_reg);
+    }
+  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
+    {
+      alt_regno = next_scratch_gr_reg ();
+      alt_reg = gen_rtx_REG (DImode, alt_regno);
+      do_restore (gen_movdi_x, alt_reg, cfa_off);
+      cfa_off -= 8;
+      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+      emit_move_insn (reg, alt_reg);
+    }
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+    {
+      if (current_frame_info.r[reg_save_ar_lc] != 0)
+        {
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
+          reg_emitted (reg_save_ar_lc);
+	}
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
+      emit_move_insn (reg, alt_reg);
+    }
+
+  /* Restore the return pointer.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      if (current_frame_info.r[reg_save_b0] != 0)
+        {
+         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+         reg_emitted (reg_save_b0);
+        }
+      else
+	{
+	  alt_regno = next_scratch_gr_reg ();
+	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+	  cfa_off -= 8;
+	}
+      reg = gen_rtx_REG (DImode, BR_REG (0));
+      emit_move_insn (reg, alt_reg);
+    }
+
+  /* We should now be at the base of the gr/br/fr spill area.  */
+  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
+			  + current_frame_info.spill_size));
+
+  /* The GP may be stored on the stack in the prologue, but it's
+     never restored in the epilogue.  Skip the stack slot.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
+    cfa_off -= 8;
+
+  /* Restore all general registers.  */
+  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	reg = gen_rtx_REG (DImode, regno);
+	do_restore (gen_gr_restore, reg, cfa_off);
+	cfa_off -= 8;
+      }
+
+  /* Restore the branch registers.  */
+  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+	alt_regno = next_scratch_gr_reg ();
+	alt_reg = gen_rtx_REG (DImode, alt_regno);
+	do_restore (gen_movdi_x, alt_reg, cfa_off);
+	cfa_off -= 8;
+	reg = gen_rtx_REG (DImode, regno);
+	emit_move_insn (reg, alt_reg);
+      }
+
+  /* Restore floating point registers.  */
+  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
+    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+      {
+        gcc_assert (!(cfa_off & 15));
+	reg = gen_rtx_REG (XFmode, regno);
+	do_restore (gen_fr_restore_x, reg, cfa_off);
+	cfa_off -= 16;
+      }
+
+  /* Restore ar.unat for real.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+    {
+      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+      emit_move_insn (reg, ar_unat_save_reg);
+    }
+
+  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
+
+  finish_spill_pointers ();
+
+  if (current_frame_info.total_size
+      || cfun->machine->ia64_eh_epilogue_sp
+      || frame_pointer_needed)
+    {
+      /* ??? At this point we must generate a magic insn that appears to
+         modify the spill iterators, the stack pointer, and the frame
+	 pointer.  This would allow the most scheduling freedom.  For now,
+	 just hard stop.  */
+      emit_insn (gen_blockage ());
+    }
+
+  if (cfun->machine->ia64_eh_epilogue_sp)
+    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
+  else if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
+    }
+  else if (current_frame_info.total_size)
+    {
+      rtx offset, frame_size_rtx;
+
+      frame_size_rtx = GEN_INT (current_frame_info.total_size);
+      if (satisfies_constraint_I (frame_size_rtx))
+	offset = frame_size_rtx;
+      else
+	{
+	  regno = next_scratch_gr_reg ();
+	  offset = gen_rtx_REG (DImode, regno);
+	  emit_move_insn (offset, frame_size_rtx);
+	}
+
+      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    offset));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA,
+		    gen_rtx_SET (VOIDmode,
+				 stack_pointer_rtx,
+				 gen_rtx_PLUS (DImode,
+					       stack_pointer_rtx,
+					       frame_size_rtx)));
+    }
+
+  if (cfun->machine->ia64_eh_epilogue_bsp)
+    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
+
+  if (! sibcall_p)
+    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
+  else
+    {
+      int fp = GR_REG (2);
+      /* We need a throw away register here, r0 and r1 are reserved,
+	 so r2 is the first available call clobbered register.  If
+	 there was a frame_pointer register, we may have swapped the
+	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
+	 sure we're using the string "r2" when emitting the register
+	 name for the assembler.  */
+      if (current_frame_info.r[reg_fp] 
+          && current_frame_info.r[reg_fp] == GR_REG (2))
+	fp = HARD_FRAME_POINTER_REGNUM;
+
+      /* We must emit an alloc to force the input registers to become output
+	 registers.  Otherwise, if the callee tries to pass its parameters
+	 through to another call without an intervening alloc, then these
+	 values get lost.  */
+      /* ??? We don't need to preserve all input registers.  We only need to
+	 preserve those input registers used as arguments to the sibling call.
+	 It is unclear how to compute that number here.  */
+      if (current_frame_info.n_input_regs != 0)
+	{
+	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
+	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
+				const0_rtx, const0_rtx,
+				n_inputs, const0_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Return 1 if br.ret can do all the work required to return from a
+   function.  */
+
+int
+ia64_direct_return (void)
+{
+  if (reload_completed && ! frame_pointer_needed)
+    {
+      ia64_compute_frame_size (get_frame_size ());
+
+      return (current_frame_info.total_size == 0
+	      && current_frame_info.n_spilled == 0
+	      && current_frame_info.r[reg_save_b0] == 0
+	      && current_frame_info.r[reg_save_pr] == 0
+	      && current_frame_info.r[reg_save_ar_pfs] == 0
+	      && current_frame_info.r[reg_save_ar_unat] == 0
+	      && current_frame_info.r[reg_save_ar_lc] == 0);
+    }
+  return 0;
+}
+
+/* Return the magic cookie that we use to hold the return address
+   during early compilation.  */
+
+rtx
+ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL;
+  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
+}
+
+/* Split this value after reload, now that we know where the return
+   address is saved.  */
+
+void
+ia64_split_return_addr_rtx (rtx dest)
+{
+  rtx src;
+
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+    {
+      if (current_frame_info.r[reg_save_b0] != 0)
+        {
+	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+	  reg_emitted (reg_save_b0);
+	}
+      else
+	{
+	  HOST_WIDE_INT off;
+	  unsigned int regno;
+	  rtx off_r;
+
+	  /* Compute offset from CFA for BR0.  */
+	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
+	  off = (current_frame_info.spill_cfa_off
+		 + current_frame_info.spill_size);
+	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+	      off -= 8;
+
+	  /* Convert CFA offset to a register based offset.  */
+	  if (frame_pointer_needed)
+	    src = hard_frame_pointer_rtx;
+	  else
+	    {
+	      src = stack_pointer_rtx;
+	      off += current_frame_info.total_size;
+	    }
+
+	  /* Load address into scratch register.  */
+	  off_r = GEN_INT (off);
+	  if (satisfies_constraint_I (off_r))
+	    emit_insn (gen_adddi3 (dest, src, off_r));
+	  else
+	    {
+	      emit_move_insn (dest, off_r);
+	      emit_insn (gen_adddi3 (dest, src, dest));
+	    }
+
+	  src = gen_rtx_MEM (Pmode, dest);
+	}
+    }
+  else
+    src = gen_rtx_REG (DImode, BR_REG (0));
+
+  emit_move_insn (dest, src);
+}
+
+int
+ia64_hard_regno_rename_ok (int from, int to)
+{
+  /* Don't clobber any of the registers we reserved for the prologue.  */
+  unsigned int r;
+
+  for (r = reg_fp; r <= reg_save_ar_lc; r++)
+    if (to == current_frame_info.r[r] 
+        || from == current_frame_info.r[r]
+        || to == emitted_frame_related_regs[r]
+        || from == emitted_frame_related_regs[r])
+      return 0;
+
+  /* Don't use output registers outside the register frame.  */
+  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
+    return 0;
+
+  /* Retain even/oddness on predicate register pairs.  */
+  if (PR_REGNO_P (from) && PR_REGNO_P (to))
+    return (from & 1) == (to & 1);
+
+  return 1;
+}
+
+/* Target hook for assembling integer objects.  Handle word-sized
+   aligned objects and detect the cases when @fptr is needed.  */
+
+static bool
+ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == POINTER_SIZE / BITS_PER_UNIT
+      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
+      && GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_FUNCTION_P (x))
+    {
+      static const char * const directive[2][2] = {
+	  /* 64-bit pointer */  /* 32-bit pointer */
+	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
+	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
+      };
+      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputs (")\n", asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Emit the function prologue.  */
+
+static void
+ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  int mask, grsave, grsave_prev;
+
+  if (current_frame_info.need_regstk)
+    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
+	     current_frame_info.n_input_regs,
+	     current_frame_info.n_local_regs,
+	     current_frame_info.n_output_regs,
+	     current_frame_info.n_rotate_regs);
+
+  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  /* Emit the .prologue directive.  */
+
+  mask = 0;
+  grsave = grsave_prev = 0;
+  if (current_frame_info.r[reg_save_b0] != 0)
+    {
+      mask |= 8;
+      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
+    }
+  if (current_frame_info.r[reg_save_ar_pfs] != 0
+      && (grsave_prev == 0
+	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
+    {
+      mask |= 4;
+      if (grsave_prev == 0)
+	grsave = current_frame_info.r[reg_save_ar_pfs];
+      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
+    }
+  if (current_frame_info.r[reg_fp] != 0
+      && (grsave_prev == 0
+	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
+    {
+      mask |= 2;
+      if (grsave_prev == 0)
+	grsave = HARD_FRAME_POINTER_REGNUM;
+      grsave_prev = current_frame_info.r[reg_fp];
+    }
+  if (current_frame_info.r[reg_save_pr] != 0
+      && (grsave_prev == 0
+	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
+    {
+      mask |= 1;
+      if (grsave_prev == 0)
+	grsave = current_frame_info.r[reg_save_pr];
+    }
+
+  if (mask && TARGET_GNU_AS)
+    fprintf (file, "\t.prologue %d, %d\n", mask,
+	     ia64_dbx_register_number (grsave));
+  else
+    fputs ("\t.prologue\n", file);
+
+  /* Emit a .spill directive, if necessary, to relocate the base of
+     the register spill area.  */
+  if (current_frame_info.spill_cfa_off != -16)
+    fprintf (file, "\t.spill %ld\n",
+	     (long) (current_frame_info.spill_cfa_off
+		     + current_frame_info.spill_size));
+}
+
+/* Emit the .body directive at the scheduled end of the prologue.  */
+
+static void
+ia64_output_function_end_prologue (FILE *file)
+{
+  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
+    return;
+
+  fputs ("\t.body\n", file);
+}
+
+/* Emit the function epilogue.  */
+
+static void
+ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  int i;
+
+  if (current_frame_info.r[reg_fp])
+    {
+      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
+      reg_names[HARD_FRAME_POINTER_REGNUM]
+	= reg_names[current_frame_info.r[reg_fp]];
+      reg_names[current_frame_info.r[reg_fp]] = tmp;
+      reg_emitted (reg_fp);
+    }
+  if (! TARGET_REG_NAMES)
+    {
+      for (i = 0; i < current_frame_info.n_input_regs; i++)
+	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
+      for (i = 0; i < current_frame_info.n_local_regs; i++)
+	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
+      for (i = 0; i < current_frame_info.n_output_regs; i++)
+	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
+    }
+
+  current_frame_info.initialized = 0;
+}
+
+int
+ia64_dbx_register_number (int regno)
+{
+  /* In ia64_expand_prologue we quite literally renamed the frame pointer
+     from its home at loc79 to something inside the register frame.  We
+     must perform the same renumbering here for the debug info.  */
+  if (current_frame_info.r[reg_fp])
+    {
+      if (regno == HARD_FRAME_POINTER_REGNUM)
+	regno = current_frame_info.r[reg_fp];
+      else if (regno == current_frame_info.r[reg_fp])
+	regno = HARD_FRAME_POINTER_REGNUM;
+    }
+
+  if (IN_REGNO_P (regno))
+    return 32 + regno - IN_REG (0);
+  else if (LOC_REGNO_P (regno))
+    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
+  else if (OUT_REGNO_P (regno))
+    return (32 + current_frame_info.n_input_regs
+	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
+  else
+    return regno;
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.
+
+   The trampoline should set the static chain pointer to value placed
+   into the trampoline and should branch to the specified routine.
+   To make the normal indirect-subroutine calling convention work,
+   the trampoline must look like a function descriptor; the first
+   word being the target address and the second being the target's
+   global pointer.
+
+   We abuse the concept of a global pointer by arranging for it
+   to point to the data we need to load.  The complete trampoline
+   has the following form:
+
+		+-------------------+ \
+	TRAMP:	| __ia64_trampoline | |
+		+-------------------+  > fake function descriptor
+		| TRAMP+16          | |
+		+-------------------+ /
+		| target descriptor |
+		+-------------------+
+		| static link	    |
+		+-------------------+
+*/
+
+static void
+ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
+
+  /* The Intel assembler requires that the global __ia64_trampoline symbol
+     be declared explicitly */
+  if (!TARGET_GNU_AS)
+    {
+      static bool declared_ia64_trampoline = false;
+
+      if (!declared_ia64_trampoline)
+	{
+	  declared_ia64_trampoline = true;
+	  (*targetm.asm_out.globalize_label) (asm_out_file,
+					      "__ia64_trampoline");
+	}
+    }
+
+  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
+  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
+  fnaddr = convert_memory_address (Pmode, fnaddr);
+  static_chain = convert_memory_address (Pmode, static_chain);
+
+  /* Load up our iterator.  */
+  addr_reg = copy_to_reg (addr);
+  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
+
+  /* The first two words are the fake descriptor:
+     __ia64_trampoline, ADDR+16.  */
+  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
+	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
+	 relocation against function symbols to make it identical to the
+	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
+	 strict ELF and dereference to get the bare code address.  */
+      rtx reg = gen_reg_rtx (Pmode);
+      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
+      emit_move_insn (reg, tramp);
+      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
+      tramp = reg;
+   }
+  emit_move_insn (m_tramp, tramp);
+  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+
+  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
+  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+
+  /* The third word is the target descriptor.  */
+  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
+  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+
+  /* The fourth word is the static chain.  */
+  emit_move_insn (m_tramp, static_chain);
+}
+
+/* Do any needed setup for a variadic function.  CUM has not been updated
+   for the last named argument which has type TYPE and mode MODE.
+
+   We generate the actual spill instructions during prologue generation.  */
+
+static void
+ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     tree type, int * pretend_size,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS next_cum = *cum;
+
+  /* Skip the current argument.  */
+  ia64_function_arg_advance (&next_cum, mode, type, 1);
+
+  if (next_cum.words < MAX_ARGUMENT_SLOTS)
+    {
+      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
+      *pretend_size = n * UNITS_PER_WORD;
+      cfun->machine->n_varargs = n;
+    }
+}
+
+/* Check whether TYPE is a homogeneous floating point aggregate.  If
+   it is, return the mode of the floating point type that appears
+   in all leafs.  If it is not, return VOIDmode.
+
+   An aggregate is a homogeneous floating point aggregate is if all
+   fields/elements in it have the same floating point type (e.g,
+   SFmode).  128-bit quad-precision floats are excluded.
+
+   Variable sized aggregates should never arrive here, since we should
+   have already decided to pass them by reference.  Top-level zero-sized
+   aggregates are excluded because our parallels crash the middle-end.  */
+
+static enum machine_mode
+hfa_element_mode (const_tree type, bool nested)
+{
+  enum machine_mode element_mode = VOIDmode;
+  enum machine_mode mode;
+  enum tree_code code = TREE_CODE (type);
+  int know_element_mode = 0;
+  tree t;
+
+  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
+    return VOIDmode;
+
+  switch (code)
+    {
+    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
+    case BOOLEAN_TYPE:	case POINTER_TYPE:
+    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
+    case LANG_TYPE:		case FUNCTION_TYPE:
+      return VOIDmode;
+
+      /* Fortran complex types are supposed to be HFAs, so we need to handle
+	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
+	 types though.  */
+    case COMPLEX_TYPE:
+      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
+	  && TYPE_MODE (type) != TCmode)
+	return GET_MODE_INNER (TYPE_MODE (type));
+      else
+	return VOIDmode;
+
+    case REAL_TYPE:
+      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
+	 mode if this is contained within an aggregate.  */
+      if (nested && TYPE_MODE (type) != TFmode)
+	return TYPE_MODE (type);
+      else
+	return VOIDmode;
+
+    case ARRAY_TYPE:
+      return hfa_element_mode (TREE_TYPE (type), 1);
+
+    case RECORD_TYPE:
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
+	{
+	  if (TREE_CODE (t) != FIELD_DECL)
+	    continue;
+
+	  mode = hfa_element_mode (TREE_TYPE (t), 1);
+	  if (know_element_mode)
+	    {
+	      if (mode != element_mode)
+		return VOIDmode;
+	    }
+	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
+	    return VOIDmode;
+	  else
+	    {
+	      know_element_mode = 1;
+	      element_mode = mode;
+	    }
+	}
+      return element_mode;
+
+    default:
+      /* If we reach here, we probably have some front-end specific type
+	 that the backend doesn't know about.  This can happen via the
+	 aggregate_value_p call in init_function_start.  All we can do is
+	 ignore unknown tree types.  */
+      return VOIDmode;
+    }
+
+  return VOIDmode;
+}
+
+/* Return the number of words required to hold a quantity of TYPE and MODE
+   when passed as an argument.  */
+static int
+ia64_function_arg_words (const_tree type, enum machine_mode mode)
+{
+  int words;
+
+  if (mode == BLKmode)
+    words = int_size_in_bytes (type);
+  else
+    words = GET_MODE_SIZE (mode);
+
+  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
+}
+
+/* Return the number of registers that should be skipped so the current
+   argument (described by TYPE and WORDS) will be properly aligned.
+
+   Integer and float arguments larger than 8 bytes start at the next
+   even boundary.  Aggregates larger than 8 bytes start at the next
+   even boundary if the aggregate has 16 byte alignment.  Note that
+   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
+   but are still to be aligned in registers.
+
+   ??? The ABI does not specify how to handle aggregates with
+   alignment from 9 to 15 bytes, or greater than 16.  We handle them
+   all as if they had 16 byte alignment.  Such aggregates can occur
+   only if gcc extensions are used.  */
+static int
+ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
+			  const_tree type, int words)
+{
+  /* No registers are skipped on VMS.  */
+  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
+    return 0;
+
+  if (type
+      && TREE_CODE (type) != INTEGER_TYPE
+      && TREE_CODE (type) != REAL_TYPE)
+    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
+  else
+    return words > 1;
+}
+
+/* Return rtx for register where argument is passed, or zero if it is passed
+   on the stack.  */
+/* ??? 128-bit quad-precision floats are always passed in general
+   registers.  */
+
+static rtx
+ia64_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		     const_tree type, bool named, bool incoming)
+{
+  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
+  enum machine_mode hfa_mode = VOIDmode;
+
+  /* For OPEN VMS, emit the instruction setting up the argument register here,
+     when we know this will be together with the other arguments setup related
+     insns.  This is not the conceptually best place to do this, but this is
+     the easiest as we have convenient access to cumulative args info.  */
+
+  if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
+      && named == 1)
+    {
+      unsigned HOST_WIDE_INT regval = cum->words;
+      int i;
+
+      for (i = 0; i < 8; i++)
+	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
+
+      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
+		      GEN_INT (regval));
+    }
+
+  /* If all argument slots are used, then it must go on the stack.  */
+  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  /* Check for and handle homogeneous FP aggregates.  */
+  if (type)
+    hfa_mode = hfa_element_mode (type, 0);
+
+  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+     and unprototyped hfas are passed specially.  */
+  if (hfa_mode != VOIDmode && (! cum->prototype || named))
+    {
+      rtx loc[16];
+      int i = 0;
+      int fp_regs = cum->fp_regs;
+      int int_regs = cum->words + offset;
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+      int byte_size;
+      int args_byte_size;
+
+      /* If prototyped, pass it in FR regs then GR regs.
+	 If not prototyped, pass it in both FR and GR regs.
+
+	 If this is an SFmode aggregate, then it is possible to run out of
+	 FR regs while GR regs are still left.  In that case, we pass the
+	 remaining part in the GR regs.  */
+
+      /* Fill the FP regs.  We do this always.  We stop if we reach the end
+	 of the argument, the last FP register, or the last argument slot.  */
+
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      args_byte_size = int_regs * UNITS_PER_WORD;
+      offset = 0;
+      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
+	{
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
+							      + fp_regs)),
+				      GEN_INT (offset));
+	  offset += hfa_size;
+	  args_byte_size += hfa_size;
+	  fp_regs++;
+	}
+
+      /* If no prototype, then the whole thing must go in GR regs.  */
+      if (! cum->prototype)
+	offset = 0;
+      /* If this is an SFmode aggregate, then we might have some left over
+	 that needs to go in GR regs.  */
+      else if (byte_size != offset)
+	int_regs += offset / UNITS_PER_WORD;
+
+      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
+
+      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
+	{
+	  enum machine_mode gr_mode = DImode;
+	  unsigned int gr_size;
+
+	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
+	     then this goes in a GR reg left adjusted/little endian, right
+	     adjusted/big endian.  */
+	  /* ??? Currently this is handled wrong, because 4-byte hunks are
+	     always right adjusted/little endian.  */
+	  if (offset & 0x4)
+	    gr_mode = SImode;
+	  /* If we have an even 4 byte hunk because the aggregate is a
+	     multiple of 4 bytes in size, then this goes in a GR reg right
+	     adjusted/little endian.  */
+	  else if (byte_size - offset == 4)
+	    gr_mode = SImode;
+
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (gr_mode, (basereg
+							     + int_regs)),
+				      GEN_INT (offset));
+
+	  gr_size = GET_MODE_SIZE (gr_mode);
+	  offset += gr_size;
+	  if (gr_size == UNITS_PER_WORD
+	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
+	    int_regs++;
+	  else if (gr_size > UNITS_PER_WORD)
+	    int_regs += gr_size / UNITS_PER_WORD;
+	}
+      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+    }
+  
+  /* On OpenVMS variable argument is either in Rn or Fn.  */
+  else if (TARGET_ABI_OPEN_VMS && named == 0)
+    {
+      if (FLOAT_MODE_P (mode))
+	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
+      else
+	return gen_rtx_REG (mode, basereg + cum->words);
+    }
+
+  /* Integral and aggregates go in general registers.  If we have run out of
+     FR registers, then FP values must also go in general registers.  This can
+     happen when we have a SFmode HFA.  */
+  else if (mode == TFmode || mode == TCmode
+	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
+    {
+      int byte_size = ((mode == BLKmode)
+                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      if (BYTES_BIG_ENDIAN
+	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
+	&& byte_size < UNITS_PER_WORD
+	&& byte_size > 0)
+	{
+	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode,
+						       (basereg + cum->words
+							+ offset)),
+					  const0_rtx);
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
+	}
+      else
+	return gen_rtx_REG (mode, basereg + cum->words + offset);
+
+    }
+
+  /* If there is a prototype, then FP values go in a FR register when
+     named, and in a GR register when unnamed.  */
+  else if (cum->prototype)
+    {
+      if (named)
+	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
+      /* In big-endian mode, an anonymous SFmode value must be represented
+         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
+	 the value into the high half of the general register.  */
+      else if (BYTES_BIG_ENDIAN && mode == SFmode)
+	return gen_rtx_PARALLEL (mode,
+		 gen_rtvec (1,
+                   gen_rtx_EXPR_LIST (VOIDmode,
+		     gen_rtx_REG (DImode, basereg + cum->words + offset),
+				      const0_rtx)));
+      else
+	return gen_rtx_REG (mode, basereg + cum->words + offset);
+    }
+  /* If there is no prototype, then FP values go in both FR and GR
+     registers.  */
+  else
+    {
+      /* See comment above.  */
+      enum machine_mode inner_mode =
+	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
+
+      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (mode, (FR_ARG_FIRST
+							  + cum->fp_regs)),
+				      const0_rtx);
+      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (inner_mode,
+						   (basereg + cum->words
+						    + offset)),
+				      const0_rtx);
+
+      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
+    }
+}
+
+/* Implement TARGET_FUNCION_ARG target hook.  */
+
+static rtx
+ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  return ia64_function_arg_1 (cum, mode, type, named, false);
+}
+
+/* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
+
+static rtx
+ia64_function_incoming_arg (CUMULATIVE_ARGS *cum,
+			    enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  return ia64_function_arg_1 (cum, mode, type, named, true);
+}
+
+/* Return number of bytes, at the beginning of the argument, that must be
+   put in registers.  0 is the argument is entirely in registers or entirely
+   in memory.  */
+
+static int
+ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
+
+  /* If all argument slots are used, then it must go on the stack.  */
+  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  /* It doesn't matter whether the argument goes in FR or GR regs.  If
+     it fits within the 8 argument slots, then it goes entirely in
+     registers.  If it extends past the last argument slot, then the rest
+     goes on the stack.  */
+
+  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
+    return 0;
+
+  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
+}
+
+/* Return ivms_arg_type based on machine_mode.  */
+
+static enum ivms_arg_type
+ia64_arg_type (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return FS;
+    case DFmode:
+      return FT;
+    default:
+      return I64;
+    }
+}
+
+/* Update CUM to point after this argument.  This is patterned after
+   ia64_function_arg.  */
+
+static void
+ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  int words = ia64_function_arg_words (type, mode);
+  int offset = ia64_function_arg_offset (cum, type, words);
+  enum machine_mode hfa_mode = VOIDmode;
+
+  /* If all arg slots are already full, then there is nothing to do.  */
+  if (cum->words >= MAX_ARGUMENT_SLOTS)
+    {
+      cum->words += words + offset;
+      return;
+    }
+
+  cum->atypes[cum->words] = ia64_arg_type (mode);
+  cum->words += words + offset;
+
+  /* Check for and handle homogeneous FP aggregates.  */
+  if (type)
+    hfa_mode = hfa_element_mode (type, 0);
+
+  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+     and unprototyped hfas are passed specially.  */
+  if (hfa_mode != VOIDmode && (! cum->prototype || named))
+    {
+      int fp_regs = cum->fp_regs;
+      /* This is the original value of cum->words + offset.  */
+      int int_regs = cum->words - words;
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+      int byte_size;
+      int args_byte_size;
+
+      /* If prototyped, pass it in FR regs then GR regs.
+	 If not prototyped, pass it in both FR and GR regs.
+
+	 If this is an SFmode aggregate, then it is possible to run out of
+	 FR regs while GR regs are still left.  In that case, we pass the
+	 remaining part in the GR regs.  */
+
+      /* Fill the FP regs.  We do this always.  We stop if we reach the end
+	 of the argument, the last FP register, or the last argument slot.  */
+
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+      args_byte_size = int_regs * UNITS_PER_WORD;
+      offset = 0;
+      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
+	{
+	  offset += hfa_size;
+	  args_byte_size += hfa_size;
+	  fp_regs++;
+	}
+
+      cum->fp_regs = fp_regs;
+    }
+
+  /* On OpenVMS variable argument is either in Rn or Fn.  */
+  else if (TARGET_ABI_OPEN_VMS && named == 0)
+    {
+      cum->int_regs = cum->words;
+      cum->fp_regs = cum->words;
+    }
+
+  /* Integral and aggregates go in general registers.  So do TFmode FP values.
+     If we have run out of FR registers, then other FP values must also go in
+     general registers.  This can happen when we have a SFmode HFA.  */
+  else if (mode == TFmode || mode == TCmode
+           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
+    cum->int_regs = cum->words;
+
+  /* If there is a prototype, then FP values go in a FR register when
+     named, and in a GR register when unnamed.  */
+  else if (cum->prototype)
+    {
+      if (! named)
+	cum->int_regs = cum->words;
+      else
+	/* ??? Complex types should not reach here.  */
+	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+    }
+  /* If there is no prototype, then FP values go in both FR and GR
+     registers.  */
+  else
+    {
+      /* ??? Complex types should not reach here.  */
+      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+      cum->int_regs = cum->words;
+    }
+}
+
+/* Arguments with alignment larger than 8 bytes start at the next even
+   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
+   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
+
+static unsigned int
+ia64_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
+    return PARM_BOUNDARY * 2;
+
+  if (type)
+    {
+      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
+        return PARM_BOUNDARY * 2;
+      else
+        return PARM_BOUNDARY;
+    }
+
+  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
+    return PARM_BOUNDARY * 2;
+  else
+    return PARM_BOUNDARY;
+}
+
+/* True if it is OK to do sibling call optimization for the specified
+   call expression EXP.  DECL will be the called function, or NULL if
+   this is an indirect call.  */
+static bool
+ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* We can't perform a sibcall if the current function has the syscall_linkage
+     attribute.  */
+  if (lookup_attribute ("syscall_linkage",
+			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+    return false;
+
+  /* We must always return with our current GP.  This means we can
+     only sibcall to functions defined in the current module unless
+     TARGET_CONST_GP is set to true.  */
+  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
+}
+
+
+/* Implement va_arg.  */
+
+static tree
+ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		      gimple_seq *post_p)
+{
+  /* Variable sized types are passed by reference.  */
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      tree ptrtype = build_pointer_type (type);
+      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
+      return build_va_arg_indirect_ref (addr);
+    }
+
+  /* Aggregate arguments with alignment larger than 8 bytes start at
+     the next even boundary.  Integer and floating point arguments
+     do so if they are larger than 8 bytes, whether or not they are
+     also aligned larger than 8 bytes.  */
+  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
+      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
+    {
+      tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
+		       size_int (2 * UNITS_PER_WORD - 1));
+      t = fold_convert (sizetype, t);
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  size_int (-2 * UNITS_PER_WORD));
+      t = fold_convert (TREE_TYPE (valist), t);
+      gimplify_assign (unshare_expr (valist), t, pre_p);
+    }
+
+  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+}
+
+/* Return 1 if function return value returned in memory.  Return 0 if it is
+   in a register.  */
+
+static bool
+ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  enum machine_mode hfa_mode;
+  HOST_WIDE_INT byte_size;
+
+  mode = TYPE_MODE (valtype);
+  byte_size = GET_MODE_SIZE (mode);
+  if (mode == BLKmode)
+    {
+      byte_size = int_size_in_bytes (valtype);
+      if (byte_size < 0)
+	return true;
+    }
+
+  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
+
+  hfa_mode = hfa_element_mode (valtype, 0);
+  if (hfa_mode != VOIDmode)
+    {
+      int hfa_size = GET_MODE_SIZE (hfa_mode);
+
+      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
+	return true;
+      else
+	return false;
+    }
+  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
+    return true;
+  else
+    return false;
+}
+
+/* Return rtx for register that holds the function return value.  */
+
+static rtx
+ia64_function_value (const_tree valtype,
+		     const_tree fn_decl_or_type,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  enum machine_mode hfa_mode;
+  int unsignedp;
+  const_tree func = fn_decl_or_type;
+
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    func = NULL;
+  
+  mode = TYPE_MODE (valtype);
+  hfa_mode = hfa_element_mode (valtype, 0);
+
+  if (hfa_mode != VOIDmode)
+    {
+      rtx loc[8];
+      int i;
+      int hfa_size;
+      int byte_size;
+      int offset;
+
+      hfa_size = GET_MODE_SIZE (hfa_mode);
+      byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
+      offset = 0;
+      for (i = 0; offset < byte_size; i++)
+	{
+	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
+				      GEN_INT (offset));
+	  offset += hfa_size;
+	}
+      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+    }
+  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
+    return gen_rtx_REG (mode, FR_ARG_FIRST);
+  else
+    {
+      bool need_parallel = false;
+
+      /* In big-endian mode, we need to manage the layout of aggregates
+	 in the registers so that we get the bits properly aligned in
+	 the highpart of the registers.  */
+      if (BYTES_BIG_ENDIAN
+	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
+	need_parallel = true;
+
+      /* Something like struct S { long double x; char a[0] } is not an
+	 HFA structure, and therefore doesn't go in fp registers.  But
+	 the middle-end will give it XFmode anyway, and XFmode values
+	 don't normally fit in integer registers.  So we need to smuggle
+	 the value inside a parallel.  */
+      else if (mode == XFmode || mode == XCmode || mode == RFmode)
+	need_parallel = true;
+
+      if (need_parallel)
+	{
+	  rtx loc[8];
+	  int offset;
+	  int bytesize;
+	  int i;
+
+	  offset = 0;
+	  bytesize = int_size_in_bytes (valtype);
+	  /* An empty PARALLEL is invalid here, but the return value
+	     doesn't matter for empty structs.  */
+	  if (bytesize == 0)
+	    return gen_rtx_REG (mode, GR_RET_FIRST);
+	  for (i = 0; offset < bytesize; i++)
+	    {
+	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode,
+						       GR_RET_FIRST + i),
+					  GEN_INT (offset));
+	      offset += UNITS_PER_WORD;
+	    }
+	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+	}
+
+      mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
+					 func ? TREE_TYPE (func) : NULL_TREE,
+					 true);
+
+      return gen_rtx_REG (mode, GR_RET_FIRST);
+    }
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+ia64_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode,
+		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
+			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+			&& (mode) != TFmode)
+		       ? FR_RET_FIRST : GR_RET_FIRST));
+}
+
+/* Worker function for FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+ia64_function_value_regno_p (const unsigned int regno)
+{
+  return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
+          || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4 || size == 8);
+  if (size == 4)
+    fputs ("\tdata4.ua\t@dtprel(", file);
+  else
+    fputs ("\tdata8.ua\t@dtprel(", file);
+  output_addr_const (file, x);
+  fputs (")", file);
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
+   also call this from ia64_print_operand for memory addresses.  */
+
+void
+ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
+			    rtx address ATTRIBUTE_UNUSED)
+{
+}
+
+/* Print an operand to an assembler instruction.
+   C	Swap and print a comparison operator.
+   D	Print an FP comparison operator.
+   E    Print 32 - constant, for SImode shifts as extract.
+   e    Print 64 - constant, for DImode rotates.
+   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
+        a floating point register emitted normally.
+   G	A floating point constant.
+   I	Invert a predicate register by adding 1.
+   J    Select the proper predicate register for a condition.
+   j    Select the inverse predicate register for a condition.
+   O	Append .acq for volatile load.
+   P	Postincrement of a MEM.
+   Q	Append .rel for volatile store.
+   R	Print .s .d or nothing for a single, double or no truncation.
+   S	Shift amount for shladd instruction.
+   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
+	for Intel assembler.
+   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
+	for Intel assembler.
+   X	A pair of floating point registers.
+   r	Print register name, or constant 0 as r0.  HP compatibility for
+	Linux kernel.
+   v    Print vector constant value as an 8-byte integer value.  */
+
+void
+ia64_print_operand (FILE * file, rtx x, int code)
+{
+  const char *str;
+
+  switch (code)
+    {
+    case 0:
+      /* Handled below.  */
+      break;
+
+    case 'C':
+      {
+	enum rtx_code c = swap_condition (GET_CODE (x));
+	fputs (GET_RTX_NAME (c), file);
+	return;
+      }
+
+    case 'D':
+      switch (GET_CODE (x))
+	{
+	case NE:
+	  str = "neq";
+	  break;
+	case UNORDERED:
+	  str = "unord";
+	  break;
+	case ORDERED:
+	  str = "ord";
+	  break;
+	case UNLT:
+	  str = "nge";
+	  break;
+	case UNLE:
+	  str = "ngt";
+	  break;
+	case UNGT:
+	  str = "nle";
+	  break;
+	case UNGE:
+	  str = "nlt";
+	  break;
+	default:
+	  str = GET_RTX_NAME (GET_CODE (x));
+	  break;
+	}
+      fputs (str, file);
+      return;
+
+    case 'E':
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
+      return;
+
+    case 'e':
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
+      return;
+
+    case 'F':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	str = reg_names [FR_REG (0)];
+      else if (x == CONST1_RTX (GET_MODE (x)))
+	str = reg_names [FR_REG (1)];
+      else
+	{
+	  gcc_assert (GET_CODE (x) == REG);
+	  str = reg_names [REGNO (x)];
+	}
+      fputs (str, file);
+      return;
+
+    case 'G':
+      {
+	long val[4];
+	REAL_VALUE_TYPE rv;
+	REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	real_to_target (val, &rv, GET_MODE (x));
+	if (GET_MODE (x) == SFmode)
+	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
+	else if (GET_MODE (x) == DFmode)
+	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
+					  & 0xffffffff,
+					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
+					  & 0xffffffff);
+	else
+	  output_operand_lossage ("invalid %%G mode");
+      }
+      return;
+
+    case 'I':
+      fputs (reg_names [REGNO (x) + 1], file);
+      return;
+
+    case 'J':
+    case 'j':
+      {
+	unsigned int regno = REGNO (XEXP (x, 0));
+	if (GET_CODE (x) == EQ)
+	  regno += 1;
+	if (code == 'j')
+	  regno ^= 1;
+        fputs (reg_names [regno], file);
+      }
+      return;
+
+    case 'O':
+      if (MEM_VOLATILE_P (x))
+	fputs(".acq", file);
+      return;
+
+    case 'P':
+      {
+	HOST_WIDE_INT value;
+
+	switch (GET_CODE (XEXP (x, 0)))
+	  {
+	  default:
+	    return;
+
+	  case POST_MODIFY:
+	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
+	    if (GET_CODE (x) == CONST_INT)
+	      value = INTVAL (x);
+	    else
+	      {
+		gcc_assert (GET_CODE (x) == REG);
+		fprintf (file, ", %s", reg_names[REGNO (x)]);
+		return;
+	      }
+	    break;
+
+	  case POST_INC:
+	    value = GET_MODE_SIZE (GET_MODE (x));
+	    break;
+
+	  case POST_DEC:
+	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
+	    break;
+	  }
+
+	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
+	return;
+      }
+
+    case 'Q':
+      if (MEM_VOLATILE_P (x))
+	fputs(".rel", file);
+      return;
+
+    case 'R':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	fputs(".s", file);
+      else if (x == CONST1_RTX (GET_MODE (x)))
+	fputs(".d", file);
+      else if (x == CONST2_RTX (GET_MODE (x)))
+	;
+      else
+	output_operand_lossage ("invalid %%R value");
+      return;
+
+    case 'S':
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      return;
+
+    case 'T':
+      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+	{
+	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
+	  return;
+	}
+      break;
+
+    case 'U':
+      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+	{
+	  const char *prefix = "0x";
+	  if (INTVAL (x) & 0x80000000)
+	    {
+	      fprintf (file, "0xffffffff");
+	      prefix = "";
+	    }
+	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
+	  return;
+	}
+      break;
+
+    case 'X':
+      {
+	unsigned int regno = REGNO (x);
+	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
+      }
+      return;
+
+    case 'r':
+      /* If this operand is the constant zero, write it as register zero.
+	 Any register, zero, or CONST_INT value is OK here.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x)], file);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fputs ("r0", file);
+      else if (GET_CODE (x) == CONST_INT)
+	output_addr_const (file, x);
+      else
+	output_operand_lossage ("invalid %%r value");
+      return;
+
+    case 'v':
+      gcc_assert (GET_CODE (x) == CONST_VECTOR);
+      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+      break;
+
+    case '+':
+      {
+	const char *which;
+
+	/* For conditional branches, returns or calls, substitute
+	   sptk, dptk, dpnt, or spnt for %s.  */
+	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+	if (x)
+	  {
+	    int pred_val = INTVAL (XEXP (x, 0));
+
+	    /* Guess top and bottom 10% statically predicted.  */
+	    if (pred_val < REG_BR_PROB_BASE / 50
+		&& br_prob_note_reliable_p (x))
+	      which = ".spnt";
+	    else if (pred_val < REG_BR_PROB_BASE / 2)
+	      which = ".dpnt";
+	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
+		     || !br_prob_note_reliable_p (x))
+	      which = ".dptk";
+	    else
+	      which = ".sptk";
+	  }
+	else if (GET_CODE (current_output_insn) == CALL_INSN)
+	  which = ".sptk";
+	else
+	  which = ".dptk";
+
+	fputs (which, file);
+	return;
+      }
+
+    case ',':
+      x = current_insn_predicate;
+      if (x)
+	{
+	  unsigned int regno = REGNO (XEXP (x, 0));
+	  if (GET_CODE (x) == EQ)
+	    regno += 1;
+          fprintf (file, "(%s) ", reg_names [regno]);
+	}
+      return;
+
+    default:
+      output_operand_lossage ("ia64_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+      /* This happens for the spill/restore instructions.  */
+    case POST_INC:
+    case POST_DEC:
+    case POST_MODIFY:
+      x = XEXP (x, 0);
+      /* ... fall through ...  */
+
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      {
+	rtx addr = XEXP (x, 0);
+	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
+	  addr = XEXP (addr, 0);
+	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
+	break;
+      }
+
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+/* ??? This is incomplete.  */
+
+static bool
+ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      switch (outer_code)
+        {
+        case SET:
+	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
+	  return true;
+        case PLUS:
+	  if (satisfies_constraint_I (x))
+	    *total = 0;
+	  else if (satisfies_constraint_J (x))
+	    *total = 1;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	  return true;
+        default:
+	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
+	    *total = 0;
+	  else
+	    *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case FMA:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case MULT:
+      /* For multiplies wider than HImode, we have to go to the FPU,
+         which normally involves copies.  Plus there's the latency
+         of the multiply itself, and the latency of the instructions to
+         transfer integer regs to FP regs.  */
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	*total = COSTS_N_INSNS (4);
+      else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
+        *total = COSTS_N_INSNS (10);
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      /* We make divide expensive, so that divide-by-constant will be
+         optimized to a multiply.  */
+      *total = COSTS_N_INSNS (60);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Calculate the cost of moving data from a register in class FROM to
+   one in class TO, using MODE.  */
+
+static int
+ia64_register_move_cost (enum machine_mode mode, reg_class_t from_i,
+			 reg_class_t to_i)
+{
+  enum reg_class from = (enum reg_class) from_i;
+  enum reg_class to = (enum reg_class) to_i;
+
+  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
+  if (to == ADDL_REGS)
+    to = GR_REGS;
+  if (from == ADDL_REGS)
+    from = GR_REGS;
+
+  /* All costs are symmetric, so reduce cases by putting the
+     lower number class as the destination.  */
+  if (from < to)
+    {
+      enum reg_class tmp = to;
+      to = from, from = tmp;
+    }
+
+  /* Moving from FR<->GR in XFmode must be more expensive than 2,
+     so that we get secondary memory reloads.  Between FR_REGS,
+     we have to make this at least as expensive as memory_move_cost
+     to avoid spectacularly poor register class preferencing.  */
+  if (mode == XFmode || mode == RFmode)
+    {
+      if (to != GR_REGS || from != GR_REGS)
+        return memory_move_cost (mode, to, false);
+      else
+	return 3;
+    }
+
+  switch (to)
+    {
+    case PR_REGS:
+      /* Moving between PR registers takes two insns.  */
+      if (from == PR_REGS)
+	return 3;
+      /* Moving between PR and anything but GR is impossible.  */
+      if (from != GR_REGS)
+	return memory_move_cost (mode, to, false);
+      break;
+
+    case BR_REGS:
+      /* Moving between BR and anything but GR is impossible.  */
+      if (from != GR_REGS && from != GR_AND_BR_REGS)
+	return memory_move_cost (mode, to, false);
+      break;
+
+    case AR_I_REGS:
+    case AR_M_REGS:
+      /* Moving between AR and anything but GR is impossible.  */
+      if (from != GR_REGS)
+	return memory_move_cost (mode, to, false);
+      break;
+
+    case GR_REGS:
+    case FR_REGS:
+    case FP_REGS:
+    case GR_AND_FR_REGS:
+    case GR_AND_BR_REGS:
+    case ALL_REGS:
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return 2;
+}
+
+/* Calculate the cost of moving data of MODE from a register to or from
+   memory.  */
+
+static int
+ia64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  if (rclass == GENERAL_REGS
+      || rclass == FR_REGS
+      || rclass == FP_REGS
+      || rclass == GR_AND_FR_REGS)
+    return 4;
+  else
+    return 10;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
+   on RCLASS to use when copying X into that class.  */
+
+static reg_class_t
+ia64_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  switch (rclass)
+    {
+    case FR_REGS:
+    case FP_REGS:
+      /* Don't allow volatile mem reloads into floating point registers.
+	 This is defined to force reload to choose the r/m case instead
+	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
+      if (MEM_P (x) && MEM_VOLATILE_P (x))
+	return NO_REGS;
+      
+      /* Force all unrecognized constants into the constant pool.  */
+      if (CONSTANT_P (x))
+	return NO_REGS;
+      break;
+
+    case AR_M_REGS:
+    case AR_I_REGS:
+      if (!OBJECT_P (x))
+	return NO_REGS;
+      break;
+
+    default:
+      break;
+    }
+
+  return rclass;
+}
+
+/* This function returns the register class required for a secondary
+   register when copying between one of the registers in RCLASS, and X,
+   using MODE.  A return value of NO_REGS means that no secondary register
+   is required.  */
+
+enum reg_class
+ia64_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  int regno = -1;
+
+  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+    regno = true_regnum (x);
+
+  switch (rclass)
+    {
+    case BR_REGS:
+    case AR_M_REGS:
+    case AR_I_REGS:
+      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
+	 interaction.  We end up with two pseudos with overlapping lifetimes
+	 both of which are equiv to the same constant, and both which need
+	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
+	 changes depending on the path length, which means the qty_first_reg
+	 check in make_regs_eqv can give different answers at different times.
+	 At some point I'll probably need a reload_indi pattern to handle
+	 this.
+
+	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
+	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
+	 non-general registers for good measure.  */
+      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
+	return GR_REGS;
+
+      /* This is needed if a pseudo used as a call_operand gets spilled to a
+	 stack slot.  */
+      if (GET_CODE (x) == MEM)
+	return GR_REGS;
+      break;
+
+    case FR_REGS:
+    case FP_REGS:
+      /* Need to go through general registers to get to other class regs.  */
+      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
+	return GR_REGS;
+
+      /* This can happen when a paradoxical subreg is an operand to the
+	 muldi3 pattern.  */
+      /* ??? This shouldn't be necessary after instruction scheduling is
+	 enabled, because paradoxical subregs are not accepted by
+	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
+	 stop the paradoxical subreg stupidity in the *_operand functions
+	 in recog.c.  */
+      if (GET_CODE (x) == MEM
+	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
+	      || GET_MODE (x) == QImode))
+	return GR_REGS;
+
+      /* This can happen because of the ior/and/etc patterns that accept FP
+	 registers as operands.  If the third operand is a constant, then it
+	 needs to be reloaded into a FP register.  */
+      if (GET_CODE (x) == CONST_INT)
+	return GR_REGS;
+
+      /* This can happen because of register elimination in a muldi3 insn.
+	 E.g. `26107 * (unsigned long)&u'.  */
+      if (GET_CODE (x) == PLUS)
+	return GR_REGS;
+      break;
+
+    case PR_REGS:
+      /* ??? This happens if we cse/gcse a BImode value across a call,
+	 and the function has a nonlocal goto.  This is because global
+	 does not allocate call crossing pseudos to hard registers when
+	 crtl->has_nonlocal_goto is true.  This is relatively
+	 common for C++ programs that use exceptions.  To reproduce,
+	 return NO_REGS and compile libstdc++.  */
+      if (GET_CODE (x) == MEM)
+	return GR_REGS;
+
+      /* This can happen when we take a BImode subreg of a DImode value,
+	 and that DImode value winds up in some non-GR register.  */
+      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
+	return GR_REGS;
+      break;
+
+    default:
+      break;
+    }
+
+  return NO_REGS;
+}
+
+
+/* Implement targetm.unspec_may_trap_p hook.  */
+static int
+ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_LDA:
+	case UNSPEC_LDS:
+	case UNSPEC_LDSA:
+	case UNSPEC_LDCCLR:
+	case UNSPEC_CHKACLR:
+	case UNSPEC_CHKS:
+	  /* These unspecs are just wrappers.  */
+	  return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
+	}
+    }
+
+  return default_unspec_may_trap_p (x, flags);
+}
+
+
+/* Parse the -mfixed-range= option string.  */
+
+static void
+fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  This is
+     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
+
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+ia64_handle_option (size_t code, const char *arg, int value)
+{
+  switch (code)
+    {
+    case OPT_mfixed_range_:
+      fix_range (arg);
+      return true;
+
+    case OPT_mtls_size_:
+      if (value != 14 && value != 22 && value != 64)
+	error ("bad value %<%s%> for -mtls-size= switch", arg);
+      return true;
+
+    case OPT_mtune_:
+      {
+	static struct pta
+	  {
+	    const char *name;		/* processor name or nickname.  */
+	    enum processor_type processor;
+	  }
+	const processor_alias_table[] =
+	  {
+	    {"itanium2", PROCESSOR_ITANIUM2},
+	    {"mckinley", PROCESSOR_ITANIUM2},
+	  };
+	int const pta_size = ARRAY_SIZE (processor_alias_table);
+	int i;
+
+	for (i = 0; i < pta_size; i++)
+	  if (!strcmp (arg, processor_alias_table[i].name))
+	    {
+	      ia64_tune = processor_alias_table[i].processor;
+	      break;
+	    }
+	if (i == pta_size)
+	  error ("bad value %<%s%> for -mtune= switch", arg);
+	return true;
+      }
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+ia64_option_override (void)
+{
+  if (TARGET_AUTO_PIC)
+    target_flags |= MASK_CONST_GP;
+
+  /* Numerous experiment shows that IRA based loop pressure
+     calculation works better for RTL loop invariant motion on targets
+     with enough (>= 32) registers.  It is an expensive optimization.
+     So it is on only for peak performance.  */
+  if (optimize >= 3)
+    flag_ira_loop_pressure = 1;
+
+
+  ia64_section_threshold = (global_options_set.x_g_switch_value
+			    ? g_switch_value
+			    : IA64_DEFAULT_GVALUE);
+
+  init_machine_status = ia64_init_machine_status;
+
+  if (align_functions <= 0)
+    align_functions = 64;
+  if (align_loops <= 0)
+    align_loops = 32;
+  if (TARGET_ABI_OPEN_VMS)
+    flag_no_common = 1;
+
+  ia64_override_options_after_change();
+}
+
+/* Implement targetm.override_options_after_change.  */
+
+static void
+ia64_override_options_after_change (void)
+{
+  if (optimize >= 3
+      && !global_options_set.x_flag_selective_scheduling
+      && !global_options_set.x_flag_selective_scheduling2)
+    {
+      flag_selective_scheduling2 = 1;
+      flag_sel_sched_pipelining = 1;
+    }
+  if (mflag_sched_control_spec == 2)
+    {
+      /* Control speculation is on by default for the selective scheduler,
+         but not for the Haifa scheduler.  */
+      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
+    }
+  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
+    {
+      /* FIXME: remove this when we'd implement breaking autoinsns as
+         a transformation.  */
+      flag_auto_inc_dec = 0;
+    }
+}
+
+/* Initialize the record of emitted frame related registers.  */
+
+void ia64_init_expanders (void)
+{
+  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
+}
+
+static struct machine_function *
+ia64_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+static enum attr_itanium_class ia64_safe_itanium_class (rtx);
+static enum attr_type ia64_safe_type (rtx);
+
+static enum attr_itanium_class
+ia64_safe_itanium_class (rtx insn)
+{
+  if (recog_memoized (insn) >= 0)
+    return get_attr_itanium_class (insn);
+  else if (DEBUG_INSN_P (insn))
+    return ITANIUM_CLASS_IGNORE;
+  else
+    return ITANIUM_CLASS_UNKNOWN;
+}
+
+static enum attr_type
+ia64_safe_type (rtx insn)
+{
+  if (recog_memoized (insn) >= 0)
+    return get_attr_type (insn);
+  else
+    return TYPE_UNKNOWN;
+}
+
+/* The following collection of routines emit instruction group stop bits as
+   necessary to avoid dependencies.  */
+
+/* Need to track some additional registers as far as serialization is
+   concerned so we can properly handle br.call and br.ret.  We could
+   make these registers visible to gcc, but since these registers are
+   never explicitly used in gcc generated code, it seems wasteful to
+   do so (plus it would make the call and return patterns needlessly
+   complex).  */
+#define REG_RP		(BR_REG (0))
+#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
+/* This is used for volatile asms which may require a stop bit immediately
+   before and after them.  */
+#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
+#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
+#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
+
+/* For each register, we keep track of how it has been written in the
+   current instruction group.
+
+   If a register is written unconditionally (no qualifying predicate),
+   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
+
+   If a register is written if its qualifying predicate P is true, we
+   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
+   may be written again by the complement of P (P^1) and when this happens,
+   WRITE_COUNT gets set to 2.
+
+   The result of this is that whenever an insn attempts to write a register
+   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
+
+   If a predicate register is written by a floating-point insn, we set
+   WRITTEN_BY_FP to true.
+
+   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
+   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
+
+#if GCC_VERSION >= 4000
+#define RWS_FIELD_TYPE __extension__ unsigned short
+#else
+#define RWS_FIELD_TYPE unsigned int
+#endif
+struct reg_write_state
+{
+  RWS_FIELD_TYPE write_count : 2;
+  RWS_FIELD_TYPE first_pred : 10;
+  RWS_FIELD_TYPE written_by_fp : 1;
+  RWS_FIELD_TYPE written_by_and : 1;
+  RWS_FIELD_TYPE written_by_or : 1;
+};
+
+/* Cumulative info for the current instruction group.  */
+struct reg_write_state rws_sum[NUM_REGS];
+#ifdef ENABLE_CHECKING
+/* Bitmap whether a register has been written in the current insn.  */
+HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
+			   / HOST_BITS_PER_WIDEST_FAST_INT];
+
+static inline void
+rws_insn_set (int regno)
+{
+  gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
+  SET_HARD_REG_BIT (rws_insn, regno);
+}
+
+static inline int
+rws_insn_test (int regno)
+{
+  return TEST_HARD_REG_BIT (rws_insn, regno);
+}
+#else
+/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
+unsigned char rws_insn[2];
+
+static inline void
+rws_insn_set (int regno)
+{
+  if (regno == REG_AR_CFM)
+    rws_insn[0] = 1;
+  else if (regno == REG_VOLATILE)
+    rws_insn[1] = 1;
+}
+
+static inline int
+rws_insn_test (int regno)
+{
+  if (regno == REG_AR_CFM)
+    return rws_insn[0];
+  if (regno == REG_VOLATILE)
+    return rws_insn[1];
+  return 0;
+}
+#endif
+
+/* Indicates whether this is the first instruction after a stop bit,
+   in which case we don't need another stop bit.  Without this,
+   ia64_variable_issue will die when scheduling an alloc.  */
+static int first_instruction;
+
+/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
+   RTL for one instruction.  */
+struct reg_flags
+{
+  unsigned int is_write : 1;	/* Is register being written?  */
+  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
+  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
+  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
+  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
+  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
+};
+
+static void rws_update (int, struct reg_flags, int);
+static int rws_access_regno (int, struct reg_flags, int);
+static int rws_access_reg (rtx, struct reg_flags, int);
+static void update_set_flags (rtx, struct reg_flags *);
+static int set_src_needs_barrier (rtx, struct reg_flags, int);
+static int rtx_needs_barrier (rtx, struct reg_flags, int);
+static void init_insn_group_barriers (void);
+static int group_barrier_needed (rtx);
+static int safe_group_barrier_needed (rtx);
+static int in_safe_group_barrier;
+
+/* Update *RWS for REGNO, which is being written by the current instruction,
+   with predicate PRED, and associated register flags in FLAGS.  */
+
+static void
+rws_update (int regno, struct reg_flags flags, int pred)
+{
+  if (pred)
+    rws_sum[regno].write_count++;
+  else
+    rws_sum[regno].write_count = 2;
+  rws_sum[regno].written_by_fp |= flags.is_fp;
+  /* ??? Not tracking and/or across differing predicates.  */
+  rws_sum[regno].written_by_and = flags.is_and;
+  rws_sum[regno].written_by_or = flags.is_or;
+  rws_sum[regno].first_pred = pred;
+}
+
+/* Handle an access to register REGNO of type FLAGS using predicate register
+   PRED.  Update rws_sum array.  Return 1 if this access creates
+   a dependency with an earlier instruction in the same group.  */
+
+static int
+rws_access_regno (int regno, struct reg_flags flags, int pred)
+{
+  int need_barrier = 0;
+
+  gcc_assert (regno < NUM_REGS);
+
+  if (! PR_REGNO_P (regno))
+    flags.is_and = flags.is_or = 0;
+
+  if (flags.is_write)
+    {
+      int write_count;
+
+      rws_insn_set (regno);
+      write_count = rws_sum[regno].write_count;
+
+      switch (write_count)
+	{
+	case 0:
+	  /* The register has not been written yet.  */
+	  if (!in_safe_group_barrier)
+	    rws_update (regno, flags, pred);
+	  break;
+
+	case 1:
+	  /* The register has been written via a predicate.  Treat
+	     it like a unconditional write and do not try to check
+	     for complementary pred reg in earlier write.  */
+	  if (flags.is_and && rws_sum[regno].written_by_and)
+	    ;
+	  else if (flags.is_or && rws_sum[regno].written_by_or)
+	    ;
+	  else
+	    need_barrier = 1;
+	  if (!in_safe_group_barrier)
+	    rws_update (regno, flags, pred);
+	  break;
+
+	case 2:
+	  /* The register has been unconditionally written already.  We
+	     need a barrier.  */
+	  if (flags.is_and && rws_sum[regno].written_by_and)
+	    ;
+	  else if (flags.is_or && rws_sum[regno].written_by_or)
+	    ;
+	  else
+	    need_barrier = 1;
+	  if (!in_safe_group_barrier)
+	    {
+	      rws_sum[regno].written_by_and = flags.is_and;
+	      rws_sum[regno].written_by_or = flags.is_or;
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      if (flags.is_branch)
+	{
+	  /* Branches have several RAW exceptions that allow to avoid
+	     barriers.  */
+
+	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
+	    /* RAW dependencies on branch regs are permissible as long
+	       as the writer is a non-branch instruction.  Since we
+	       never generate code that uses a branch register written
+	       by a branch instruction, handling this case is
+	       easy.  */
+	    return 0;
+
+	  if (REGNO_REG_CLASS (regno) == PR_REGS
+	      && ! rws_sum[regno].written_by_fp)
+	    /* The predicates of a branch are available within the
+	       same insn group as long as the predicate was written by
+	       something other than a floating-point instruction.  */
+	    return 0;
+	}
+
+      if (flags.is_and && rws_sum[regno].written_by_and)
+	return 0;
+      if (flags.is_or && rws_sum[regno].written_by_or)
+	return 0;
+
+      switch (rws_sum[regno].write_count)
+	{
+	case 0:
+	  /* The register has not been written yet.  */
+	  break;
+
+	case 1:
+	  /* The register has been written via a predicate, assume we
+	     need a barrier (don't check for complementary regs).  */
+	  need_barrier = 1;
+	  break;
+
+	case 2:
+	  /* The register has been unconditionally written already.  We
+	     need a barrier.  */
+	  need_barrier = 1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  return need_barrier;
+}
+
+static int
+rws_access_reg (rtx reg, struct reg_flags flags, int pred)
+{
+  int regno = REGNO (reg);
+  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
+
+  if (n == 1)
+    return rws_access_regno (regno, flags, pred);
+  else
+    {
+      int need_barrier = 0;
+      while (--n >= 0)
+	need_barrier |= rws_access_regno (regno + n, flags, pred);
+      return need_barrier;
+    }
+}
+
+/* Examine X, which is a SET rtx, and update the flags, the predicate, and
+   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
+
+static void
+update_set_flags (rtx x, struct reg_flags *pflags)
+{
+  rtx src = SET_SRC (x);
+
+  switch (GET_CODE (src))
+    {
+    case CALL:
+      return;
+
+    case IF_THEN_ELSE:
+      /* There are four cases here:
+	 (1) The destination is (pc), in which case this is a branch,
+	 nothing here applies.
+	 (2) The destination is ar.lc, in which case this is a
+	 doloop_end_internal,
+	 (3) The destination is an fp register, in which case this is
+	 an fselect instruction.
+	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 
+	 this is a check load.
+	 In all cases, nothing we do in this function applies.  */
+      return;
+
+    default:
+      if (COMPARISON_P (src)
+	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
+	/* Set pflags->is_fp to 1 so that we know we're dealing
+	   with a floating point comparison when processing the
+	   destination of the SET.  */
+	pflags->is_fp = 1;
+
+      /* Discover if this is a parallel comparison.  We only handle
+	 and.orcm and or.andcm at present, since we must retain a
+	 strict inverse on the predicate pair.  */
+      else if (GET_CODE (src) == AND)
+	pflags->is_and = 1;
+      else if (GET_CODE (src) == IOR)
+	pflags->is_or = 1;
+
+      break;
+    }
+}
+
+/* Subroutine of rtx_needs_barrier; this function determines whether the
+   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
+   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
+   for this insn.  */
+
+static int
+set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
+{
+  int need_barrier = 0;
+  rtx dst;
+  rtx src = SET_SRC (x);
+
+  if (GET_CODE (src) == CALL)
+    /* We don't need to worry about the result registers that
+       get written by subroutine call.  */
+    return rtx_needs_barrier (src, flags, pred);
+  else if (SET_DEST (x) == pc_rtx)
+    {
+      /* X is a conditional branch.  */
+      /* ??? This seems redundant, as the caller sets this bit for
+	 all JUMP_INSNs.  */
+      if (!ia64_spec_check_src_p (src))
+	flags.is_branch = 1;
+      return rtx_needs_barrier (src, flags, pred);
+    }
+
+  if (ia64_spec_check_src_p (src))
+    /* Avoid checking one register twice (in condition 
+       and in 'then' section) for ldc pattern.  */
+    {
+      gcc_assert (REG_P (XEXP (src, 2)));
+      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
+		  
+      /* We process MEM below.  */
+      src = XEXP (src, 1);
+    }
+
+  need_barrier |= rtx_needs_barrier (src, flags, pred);
+
+  dst = SET_DEST (x);
+  if (GET_CODE (dst) == ZERO_EXTRACT)
+    {
+      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
+    }
+  return need_barrier;
+}
+
+/* Handle an access to rtx X of type FLAGS using predicate register
+   PRED.  Return 1 if this access creates a dependency with an earlier
+   instruction in the same group.  */
+
+static int
+rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
+{
+  int i, j;
+  int is_complemented = 0;
+  int need_barrier = 0;
+  const char *format_ptr;
+  struct reg_flags new_flags;
+  rtx cond;
+
+  if (! x)
+    return 0;
+
+  new_flags = flags;
+
+  switch (GET_CODE (x))
+    {
+    case SET:
+      update_set_flags (x, &new_flags);
+      need_barrier = set_src_needs_barrier (x, new_flags, pred);
+      if (GET_CODE (SET_SRC (x)) != CALL)
+	{
+	  new_flags.is_write = 1;
+	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
+	}
+      break;
+
+    case CALL:
+      new_flags.is_write = 0;
+      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
+
+      /* Avoid multiple register writes, in case this is a pattern with
+	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
+      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
+	{
+	  new_flags.is_write = 1;
+	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
+	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
+	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
+	}
+      break;
+
+    case COND_EXEC:
+      /* X is a predicated instruction.  */
+
+      cond = COND_EXEC_TEST (x);
+      gcc_assert (!pred);
+      need_barrier = rtx_needs_barrier (cond, flags, 0);
+
+      if (GET_CODE (cond) == EQ)
+	is_complemented = 1;
+      cond = XEXP (cond, 0);
+      gcc_assert (GET_CODE (cond) == REG
+		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
+      pred = REGNO (cond);
+      if (is_complemented)
+	++pred;
+
+      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
+      return need_barrier;
+
+    case CLOBBER:
+    case USE:
+      /* Clobber & use are for earlier compiler-phases only.  */
+      break;
+
+    case ASM_OPERANDS:
+    case ASM_INPUT:
+      /* We always emit stop bits for traditional asms.  We emit stop bits
+	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
+      if (GET_CODE (x) != ASM_OPERANDS
+	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
+	{
+	  /* Avoid writing the register multiple times if we have multiple
+	     asm outputs.  This avoids a failure in rws_access_reg.  */
+	  if (! rws_insn_test (REG_VOLATILE))
+	    {
+	      new_flags.is_write = 1;
+	      rws_access_regno (REG_VOLATILE, new_flags, pred);
+	    }
+	  return 1;
+	}
+
+      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
+	 We cannot just fall through here since then we would be confused
+	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
+	 traditional asms unlike their normal usage.  */
+
+      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
+	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
+	  need_barrier = 1;
+      break;
+
+    case PARALLEL:
+      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+	{
+	  rtx pat = XVECEXP (x, 0, i);
+	  switch (GET_CODE (pat))
+	    {
+	    case SET:
+	      update_set_flags (pat, &new_flags);
+	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
+	      break;
+
+	    case USE:
+	    case CALL:
+	    case ASM_OPERANDS:
+	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
+	      break;
+
+	    case CLOBBER:
+	      if (REG_P (XEXP (pat, 0))
+		  && extract_asm_operands (x) != NULL_RTX
+		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
+		{
+		  new_flags.is_write = 1;
+		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
+						     new_flags, pred);
+		  new_flags = flags;
+		}
+	      break;
+
+	    case RETURN:
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+	{
+	  rtx pat = XVECEXP (x, 0, i);
+	  if (GET_CODE (pat) == SET)
+	    {
+	      if (GET_CODE (SET_SRC (pat)) != CALL)
+		{
+		  new_flags.is_write = 1;
+		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
+						     pred);
+		}
+	    }
+	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
+	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
+	}
+      break;
+
+    case SUBREG:
+      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
+      break;
+    case REG:
+      if (REGNO (x) == AR_UNAT_REGNUM)
+	{
+	  for (i = 0; i < 64; ++i)
+	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
+	}
+      else
+	need_barrier = rws_access_reg (x, flags, pred);
+      break;
+
+    case MEM:
+      /* Find the regs used in memory address computation.  */
+      new_flags.is_write = 0;
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+      break;
+
+    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
+    case SYMBOL_REF:  case LABEL_REF:     case CONST:
+      break;
+
+      /* Operators with side-effects.  */
+    case POST_INC:    case POST_DEC:
+      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
+      break;
+
+    case POST_MODIFY:
+      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
+      break;
+
+      /* Handle common unary and binary ops for efficiency.  */
+    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
+    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
+    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
+    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
+    case NE:       case EQ:      case GE:      case GT:        case LE:
+    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+      break;
+
+    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
+    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
+    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
+    case SQRT:     case FFS:		case POPCOUNT:
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+      break;
+
+    case VEC_SELECT:
+      /* VEC_SELECT's second argument is a PARALLEL with integers that
+	 describe the elements selected.  On ia64, those integers are
+	 always constants.  Avoid walking the PARALLEL so that we don't
+	 get confused with "normal" parallels and then die.  */
+      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+      break;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_LTOFF_DTPMOD:
+	case UNSPEC_LTOFF_DTPREL:
+	case UNSPEC_DTPREL:
+	case UNSPEC_LTOFF_TPREL:
+	case UNSPEC_TPREL:
+	case UNSPEC_PRED_REL_MUTEX:
+	case UNSPEC_PIC_CALL:
+        case UNSPEC_MF:
+        case UNSPEC_FETCHADD_ACQ:
+	case UNSPEC_BSP_VALUE:
+	case UNSPEC_FLUSHRS:
+	case UNSPEC_BUNDLE_SELECTOR:
+          break;
+
+	case UNSPEC_GR_SPILL:
+	case UNSPEC_GR_RESTORE:
+	  {
+	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
+	    HOST_WIDE_INT bit = (offset >> 3) & 63;
+
+	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
+	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
+					      new_flags, pred);
+	    break;
+	  }
+
+	case UNSPEC_FR_SPILL:
+	case UNSPEC_FR_RESTORE:
+	case UNSPEC_GETF_EXP:
+	case UNSPEC_SETF_EXP:
+        case UNSPEC_ADDP4:
+	case UNSPEC_FR_SQRT_RECIP_APPROX:
+	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
+	case UNSPEC_LDA:
+	case UNSPEC_LDS:
+	case UNSPEC_LDS_A:
+	case UNSPEC_LDSA:
+	case UNSPEC_CHKACLR:
+        case UNSPEC_CHKS:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  break;
+
+	case UNSPEC_FR_RECIP_APPROX:
+	case UNSPEC_SHRP:
+	case UNSPEC_COPYSIGN:
+	case UNSPEC_FR_RECIP_APPROX_RES:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+	  break;
+
+        case UNSPEC_CMPXCHG_ACQ:
+	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case UNSPEC_VOLATILE:
+      switch (XINT (x, 1))
+	{
+	case UNSPECV_ALLOC:
+	  /* Alloc must always be the first instruction of a group.
+	     We force this by always returning true.  */
+	  /* ??? We might get better scheduling if we explicitly check for
+	     input/local/output register dependencies, and modify the
+	     scheduler so that alloc is always reordered to the start of
+	     the current group.  We could then eliminate all of the
+	     first_instruction code.  */
+	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
+
+	  new_flags.is_write = 1;
+	  rws_access_regno (REG_AR_CFM, new_flags, pred);
+	  return 1;
+
+	case UNSPECV_SET_BSP:
+	  need_barrier = 1;
+          break;
+
+	case UNSPECV_BLOCKAGE:
+	case UNSPECV_INSN_GROUP_BARRIER:
+	case UNSPECV_BREAK:
+	case UNSPECV_PSAC_ALL:
+	case UNSPECV_PSAC_NORMAL:
+	  return 0;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case RETURN:
+      new_flags.is_write = 0;
+      need_barrier  = rws_access_regno (REG_RP, flags, pred);
+      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
+
+      new_flags.is_write = 1;
+      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
+      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
+      break;
+
+    default:
+      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	switch (format_ptr[i])
+	  {
+	  case '0':	/* unused field */
+	  case 'i':	/* integer */
+	  case 'n':	/* note */
+	  case 'w':	/* wide integer */
+	  case 's':	/* pointer to string */
+	  case 'S':	/* optional pointer to string */
+	    break;
+
+	  case 'e':
+	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
+	      need_barrier = 1;
+	    break;
+
+	  case 'E':
+	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
+	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
+		need_barrier = 1;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      break;
+    }
+  return need_barrier;
+}
+
+/* Clear out the state for group_barrier_needed at the start of a
+   sequence of insns.  */
+
+static void
+init_insn_group_barriers (void)
+{
+  memset (rws_sum, 0, sizeof (rws_sum));
+  first_instruction = 1;
+}
+
+/* Given the current state, determine whether a group barrier (a stop bit) is
+   necessary before INSN.  Return nonzero if so.  This modifies the state to
+   include the effects of INSN as a side-effect.  */
+
+static int
+group_barrier_needed (rtx insn)
+{
+  rtx pat;
+  int need_barrier = 0;
+  struct reg_flags flags;
+
+  memset (&flags, 0, sizeof (flags));
+  switch (GET_CODE (insn))
+    {
+    case NOTE:
+    case DEBUG_INSN:
+      break;
+
+    case BARRIER:
+      /* A barrier doesn't imply an instruction group boundary.  */
+      break;
+
+    case CODE_LABEL:
+      memset (rws_insn, 0, sizeof (rws_insn));
+      return 1;
+
+    case CALL_INSN:
+      flags.is_branch = 1;
+      flags.is_sibcall = SIBLING_CALL_P (insn);
+      memset (rws_insn, 0, sizeof (rws_insn));
+
+      /* Don't bundle a call following another call.  */
+      if ((pat = prev_active_insn (insn))
+	  && GET_CODE (pat) == CALL_INSN)
+	{
+	  need_barrier = 1;
+	  break;
+	}
+
+      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
+      break;
+
+    case JUMP_INSN:
+      if (!ia64_spec_check_p (insn))
+	flags.is_branch = 1;
+
+      /* Don't bundle a jump following a call.  */
+      if ((pat = prev_active_insn (insn))
+	  && GET_CODE (pat) == CALL_INSN)
+	{
+	  need_barrier = 1;
+	  break;
+	}
+      /* FALLTHRU */
+
+    case INSN:
+      if (GET_CODE (PATTERN (insn)) == USE
+	  || GET_CODE (PATTERN (insn)) == CLOBBER)
+	/* Don't care about USE and CLOBBER "insns"---those are used to
+	   indicate to the optimizer that it shouldn't get rid of
+	   certain operations.  */
+	break;
+
+      pat = PATTERN (insn);
+
+      /* Ug.  Hack hacks hacked elsewhere.  */
+      switch (recog_memoized (insn))
+	{
+	  /* We play dependency tricks with the epilogue in order
+	     to get proper schedules.  Undo this for dv analysis.  */
+	case CODE_FOR_epilogue_deallocate_stack:
+	case CODE_FOR_prologue_allocate_stack:
+	  pat = XVECEXP (pat, 0, 0);
+	  break;
+
+	  /* The pattern we use for br.cloop confuses the code above.
+	     The second element of the vector is representative.  */
+	case CODE_FOR_doloop_end_internal:
+	  pat = XVECEXP (pat, 0, 1);
+	  break;
+
+	  /* Doesn't generate code.  */
+	case CODE_FOR_pred_rel_mutex:
+	case CODE_FOR_prologue_use:
+	  return 0;
+
+	default:
+	  break;
+	}
+
+      memset (rws_insn, 0, sizeof (rws_insn));
+      need_barrier = rtx_needs_barrier (pat, flags, 0);
+
+      /* Check to see if the previous instruction was a volatile
+	 asm.  */
+      if (! need_barrier)
+	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
+
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (first_instruction && INSN_P (insn)
+      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+      && GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    {
+      need_barrier = 0;
+      first_instruction = 0;
+    }
+
+  return need_barrier;
+}
+
+/* Like group_barrier_needed, but do not clobber the current state.  */
+
+static int
+safe_group_barrier_needed (rtx insn)
+{
+  int saved_first_instruction;
+  int t;
+
+  saved_first_instruction = first_instruction;
+  in_safe_group_barrier = 1;
+
+  t = group_barrier_needed (insn);
+
+  first_instruction = saved_first_instruction;
+  in_safe_group_barrier = 0;
+
+  return t;
+}
+
+/* Scan the current function and insert stop bits as necessary to
+   eliminate dependencies.  This function assumes that a final
+   instruction scheduling pass has been run which has already
+   inserted most of the necessary stop bits.  This function only
+   inserts new ones at basic block boundaries, since these are
+   invisible to the scheduler.  */
+
+static void
+emit_insn_group_barriers (FILE *dump)
+{
+  rtx insn;
+  rtx last_label = 0;
+  int insns_since_last_label = 0;
+
+  init_insn_group_barriers ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == CODE_LABEL)
+	{
+	  if (insns_since_last_label)
+	    last_label = insn;
+	  insns_since_last_label = 0;
+	}
+      else if (GET_CODE (insn) == NOTE
+	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
+	{
+	  if (insns_since_last_label)
+	    last_label = insn;
+	  insns_since_last_label = 0;
+	}
+      else if (GET_CODE (insn) == INSN
+	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
+	{
+	  init_insn_group_barriers ();
+	  last_label = 0;
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  insns_since_last_label = 1;
+
+	  if (group_barrier_needed (insn))
+	    {
+	      if (last_label)
+		{
+		  if (dump)
+		    fprintf (dump, "Emitting stop before label %d\n",
+			     INSN_UID (last_label));
+		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
+		  insn = last_label;
+
+		  init_insn_group_barriers ();
+		  last_label = 0;
+		}
+	    }
+	}
+    }
+}
+
+/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
+   This function has to emit all necessary group barriers.  */
+
+static void
+emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
+{
+  rtx insn;
+
+  init_insn_group_barriers ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == BARRIER)
+	{
+	  rtx last = prev_active_insn (insn);
+
+	  if (! last)
+	    continue;
+	  if (GET_CODE (last) == JUMP_INSN
+	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
+	    last = prev_active_insn (last);
+	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
+
+	  init_insn_group_barriers ();
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
+	    init_insn_group_barriers ();
+	  else if (group_barrier_needed (insn))
+	    {
+	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
+	      init_insn_group_barriers ();
+	      group_barrier_needed (insn);
+	    }
+	}
+    }
+}
+
+
+
+/* Instruction scheduling support.  */
+
+#define NR_BUNDLES 10
+
+/* A list of names of all available bundles.  */
+
+static const char *bundle_name [NR_BUNDLES] =
+{
+  ".mii",
+  ".mmi",
+  ".mfi",
+  ".mmf",
+#if NR_BUNDLES == 10
+  ".bbb",
+  ".mbb",
+#endif
+  ".mib",
+  ".mmb",
+  ".mfb",
+  ".mlx"
+};
+
+/* Nonzero if we should insert stop bits into the schedule.  */
+
+int ia64_final_schedule = 0;
+
+/* Codes of the corresponding queried units: */
+
+static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
+static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
+
+static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
+static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
+
+static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
+
+/* The following variable value is an insn group barrier.  */
+
+static rtx dfa_stop_insn;
+
+/* The following variable value is the last issued insn.  */
+
+static rtx last_scheduled_insn;
+
+/* The following variable value is pointer to a DFA state used as
+   temporary variable.  */
+
+static state_t temp_dfa_state = NULL;
+
+/* The following variable value is DFA state after issuing the last
+   insn.  */
+
+static state_t prev_cycle_state = NULL;
+
+/* The following array element values are TRUE if the corresponding
+   insn requires to add stop bits before it.  */
+
+static char *stops_p = NULL;
+
+/* The following variable is used to set up the mentioned above array.  */
+
+static int stop_before_p = 0;
+
+/* The following variable value is length of the arrays `clocks' and
+   `add_cycles'. */
+
+static int clocks_length;
+
+/* The following variable value is number of data speculations in progress.  */
+static int pending_data_specs = 0;
+
+/* Number of memory references on current and three future processor cycles.  */
+static char mem_ops_in_group[4];
+
+/* Number of current processor cycle (from scheduler's point of view).  */
+static int current_cycle;
+
+static rtx ia64_single_set (rtx);
+static void ia64_emit_insn_before (rtx, rtx);
+
+/* Map a bundle number to its pseudo-op.  */
+
+const char *
+get_bundle_name (int b)
+{
+  return bundle_name[b];
+}
+
+
+/* Return the maximum number of instructions a cpu can issue.  */
+
+static int
+ia64_issue_rate (void)
+{
+  return 6;
+}
+
+/* Helper function - like single_set, but look inside COND_EXEC.  */
+
+static rtx
+ia64_single_set (rtx insn)
+{
+  rtx x = PATTERN (insn), ret;
+  if (GET_CODE (x) == COND_EXEC)
+    x = COND_EXEC_CODE (x);
+  if (GET_CODE (x) == SET)
+    return x;
+
+  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
+     Although they are not classical single set, the second set is there just
+     to protect it from moving past FP-relative stack accesses.  */
+  switch (recog_memoized (insn))
+    {
+    case CODE_FOR_prologue_allocate_stack:
+    case CODE_FOR_epilogue_deallocate_stack:
+      ret = XVECEXP (x, 0, 0);
+      break;
+
+    default:
+      ret = single_set_2 (insn, x);
+      break;
+    }
+
+  return ret;
+}
+
+/* Adjust the cost of a scheduling dependency.
+   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
+   COST is the current cost, DW is dependency weakness.  */
+static int
+ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
+{
+  enum reg_note dep_type = (enum reg_note) dep_type1;
+  enum attr_itanium_class dep_class;
+  enum attr_itanium_class insn_class;
+
+  insn_class = ia64_safe_itanium_class (insn);
+  dep_class = ia64_safe_itanium_class (dep_insn);
+
+  /* Treat true memory dependencies separately.  Ignore apparent true
+     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
+  if (dep_type == REG_DEP_TRUE
+      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
+      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
+    return 0;
+
+  if (dw == MIN_DEP_WEAK)
+    /* Store and load are likely to alias, use higher cost to avoid stall.  */
+    return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
+  else if (dw > MIN_DEP_WEAK)
+    {
+      /* Store and load are less likely to alias.  */
+      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
+	/* Assume there will be no cache conflict for floating-point data.
+	   For integer data, L1 conflict penalty is huge (17 cycles), so we
+	   never assume it will not cause a conflict.  */
+	return 0;
+      else
+	return cost;
+    }
+
+  if (dep_type != REG_DEP_OUTPUT)
+    return cost;
+
+  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
+      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
+    return 0;
+
+  return cost;
+}
+
+/* Like emit_insn_before, but skip cycle_display notes.
+   ??? When cycle display notes are implemented, update this.  */
+
+static void
+ia64_emit_insn_before (rtx insn, rtx before)
+{
+  emit_insn_before (insn, before);
+}
+
+/* The following function marks insns who produce addresses for load
+   and store insns.  Such insns will be placed into M slots because it
+   decrease latency time for Itanium1 (see function
+   `ia64_produce_address_p' and the DFA descriptions).  */
+
+static void
+ia64_dependencies_evaluation_hook (rtx head, rtx tail)
+{
+  rtx insn, next, next_tail;
+
+  /* Before reload, which_alternative is not set, which means that
+     ia64_safe_itanium_class will produce wrong results for (at least)
+     move instructions.  */
+  if (!reload_completed)
+    return;
+
+  next_tail = NEXT_INSN (tail);
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      insn->call = 0;
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
+      {
+	sd_iterator_def sd_it;
+	dep_t dep;
+	bool has_mem_op_consumer_p = false;
+
+	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+	  {
+	    enum attr_itanium_class c;
+
+	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
+	      continue;
+
+	    next = DEP_CON (dep);
+	    c = ia64_safe_itanium_class (next);
+	    if ((c == ITANIUM_CLASS_ST
+		 || c == ITANIUM_CLASS_STF)
+		&& ia64_st_address_bypass_p (insn, next))
+	      {
+		has_mem_op_consumer_p = true;
+		break;
+	      }
+	    else if ((c == ITANIUM_CLASS_LD
+		      || c == ITANIUM_CLASS_FLD
+		      || c == ITANIUM_CLASS_FLDP)
+		     && ia64_ld_address_bypass_p (insn, next))
+	      {
+		has_mem_op_consumer_p = true;
+		break;
+	      }
+	  }
+
+	insn->call = has_mem_op_consumer_p;
+      }
+}
+
+/* We're beginning a new block.  Initialize data structures as necessary.  */
+
+static void
+ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		 int sched_verbose ATTRIBUTE_UNUSED,
+		 int max_ready ATTRIBUTE_UNUSED)
+{
+#ifdef ENABLE_CHECKING
+  rtx insn;
+
+  if (!sel_sched_p () && reload_completed)
+    for (insn = NEXT_INSN (current_sched_info->prev_head);
+	 insn != current_sched_info->next_tail;
+	 insn = NEXT_INSN (insn))
+      gcc_assert (!SCHED_GROUP_P (insn));
+#endif
+  last_scheduled_insn = NULL_RTX;
+  init_insn_group_barriers ();
+
+  current_cycle = 0;
+  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
+}
+
+/* We're beginning a scheduling pass.  Check assertion.  */
+
+static void
+ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+                        int sched_verbose ATTRIBUTE_UNUSED,
+                        int max_ready ATTRIBUTE_UNUSED)
+{  
+  gcc_assert (pending_data_specs == 0);
+}
+
+/* Scheduling pass is now finished.  Free/reset static variable.  */
+static void
+ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+			  int sched_verbose ATTRIBUTE_UNUSED)
+{
+  gcc_assert (pending_data_specs == 0);
+}
+
+/* Return TRUE if INSN is a load (either normal or speculative, but not a
+   speculation check), FALSE otherwise.  */
+static bool
+is_load_p (rtx insn)
+{
+  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
+
+  return
+   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
+    && get_attr_check_load (insn) == CHECK_LOAD_NO);
+}
+
+/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
+   (taking account for 3-cycle cache reference postponing for stores: Intel
+   Itanium 2 Reference Manual for Software Development and Optimization,
+   6.7.3.1).  */
+static void
+record_memory_reference (rtx insn)
+{
+  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
+
+  switch (insn_class) {
+    case ITANIUM_CLASS_FLD:
+    case ITANIUM_CLASS_LD:
+      mem_ops_in_group[current_cycle % 4]++;
+      break;
+    case ITANIUM_CLASS_STF:
+    case ITANIUM_CLASS_ST:
+      mem_ops_in_group[(current_cycle + 3) % 4]++;
+      break;
+    default:;
+  }
+}
+
+/* We are about to being issuing insns for this clock cycle.
+   Override the default sort algorithm to better slot instructions.  */
+
+static int
+ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
+			int *pn_ready, int clock_var,
+			int reorder_type)
+{
+  int n_asms;
+  int n_ready = *pn_ready;
+  rtx *e_ready = ready + n_ready;
+  rtx *insnp;
+
+  if (sched_verbose)
+    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
+
+  if (reorder_type == 0)
+    {
+      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
+      n_asms = 0;
+      for (insnp = ready; insnp < e_ready; insnp++)
+	if (insnp < e_ready)
+	  {
+	    rtx insn = *insnp;
+	    enum attr_type t = ia64_safe_type (insn);
+	    if (t == TYPE_UNKNOWN)
+	      {
+		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+		    || asm_noperands (PATTERN (insn)) >= 0)
+		  {
+		    rtx lowest = ready[n_asms];
+		    ready[n_asms] = insn;
+		    *insnp = lowest;
+		    n_asms++;
+		  }
+		else
+		  {
+		    rtx highest = ready[n_ready - 1];
+		    ready[n_ready - 1] = insn;
+		    *insnp = highest;
+		    return 1;
+		  }
+	      }
+	  }
+
+      if (n_asms < n_ready)
+	{
+	  /* Some normal insns to process.  Skip the asms.  */
+	  ready += n_asms;
+	  n_ready -= n_asms;
+	}
+      else if (n_ready > 0)
+	return 1;
+    }
+
+  if (ia64_final_schedule)
+    {
+      int deleted = 0;
+      int nr_need_stop = 0;
+
+      for (insnp = ready; insnp < e_ready; insnp++)
+	if (safe_group_barrier_needed (*insnp))
+	  nr_need_stop++;
+
+      if (reorder_type == 1 && n_ready == nr_need_stop)
+	return 0;
+      if (reorder_type == 0)
+	return 1;
+      insnp = e_ready;
+      /* Move down everything that needs a stop bit, preserving
+	 relative order.  */
+      while (insnp-- > ready + deleted)
+	while (insnp >= ready + deleted)
+	  {
+	    rtx insn = *insnp;
+	    if (! safe_group_barrier_needed (insn))
+	      break;
+	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	    *ready = insn;
+	    deleted++;
+	  }
+      n_ready -= deleted;
+      ready += deleted;
+    }
+
+  current_cycle = clock_var;
+  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
+    {
+      int moved = 0;
+
+      insnp = e_ready;
+      /* Move down loads/stores, preserving relative order.  */
+      while (insnp-- > ready + moved)
+	while (insnp >= ready + moved)
+	  {
+	    rtx insn = *insnp;
+	    if (! is_load_p (insn))
+	      break;
+	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+	    *ready = insn;
+	    moved++;
+	  }
+      n_ready -= moved;
+      ready += moved;
+    }
+
+  return 1;
+}
+
+/* We are about to being issuing insns for this clock cycle.  Override
+   the default sort algorithm to better slot instructions.  */
+
+static int
+ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+		    int clock_var)
+{
+  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
+				 pn_ready, clock_var, 0);
+}
+
+/* Like ia64_sched_reorder, but called after issuing each insn.
+   Override the default sort algorithm to better slot instructions.  */
+
+static int
+ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
+		     int *pn_ready, int clock_var)
+{
+  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
+				 clock_var, 1);
+}
+
+/* We are about to issue INSN.  Return the number of insns left on the
+   ready queue that can be issued this cycle.  */
+
+static int
+ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+		     int sched_verbose ATTRIBUTE_UNUSED,
+		     rtx insn ATTRIBUTE_UNUSED,
+		     int can_issue_more ATTRIBUTE_UNUSED)
+{
+  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
+    /* Modulo scheduling does not extend h_i_d when emitting
+       new instructions.  Don't use h_i_d, if we don't have to.  */
+    {
+      if (DONE_SPEC (insn) & BEGIN_DATA)
+	pending_data_specs++;
+      if (CHECK_SPEC (insn) & BEGIN_DATA)
+	pending_data_specs--;
+    }
+
+  if (DEBUG_INSN_P (insn))
+    return 1;
+
+  last_scheduled_insn = insn;
+  memcpy (prev_cycle_state, curr_state, dfa_state_size);
+  if (reload_completed)
+    {
+      int needed = group_barrier_needed (insn);
+      
+      gcc_assert (!needed);
+      if (GET_CODE (insn) == CALL_INSN)
+	init_insn_group_barriers ();
+      stops_p [INSN_UID (insn)] = stop_before_p;
+      stop_before_p = 0;
+
+      record_memory_reference (insn);
+    }
+  return 1;
+}
+
+/* We are choosing insn from the ready queue.  Return nonzero if INSN
+   can be chosen.  */
+
+static int
+ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
+{
+  gcc_assert (insn && INSN_P (insn));
+  return ((!reload_completed
+	   || !safe_group_barrier_needed (insn))
+	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
+	  && (!mflag_sched_mem_insns_hard_limit
+	      || !is_load_p (insn)
+	      || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
+}
+
+/* We are choosing insn from the ready queue.  Return nonzero if INSN
+   can be chosen.  */
+
+static bool
+ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
+{
+  gcc_assert (insn  && INSN_P (insn));
+  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
+     we keep ALAT half-empty.  */
+  return (pending_data_specs < 16
+	  || !(TODO_SPEC (insn) & BEGIN_DATA));
+}
+
+/* The following variable value is pseudo-insn used by the DFA insn
+   scheduler to change the DFA state when the simulated clock is
+   increased.  */
+
+static rtx dfa_pre_cycle_insn;
+
+/* Returns 1 when a meaningful insn was scheduled between the last group
+   barrier and LAST.  */
+static int
+scheduled_good_insn (rtx last)
+{
+  if (last && recog_memoized (last) >= 0)
+    return 1;
+
+  for ( ;
+       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
+       && !stops_p[INSN_UID (last)];
+       last = PREV_INSN (last))
+    /* We could hit a NOTE_INSN_DELETED here which is actually outside
+       the ebb we're scheduling.  */
+    if (INSN_P (last) && recog_memoized (last) >= 0)
+      return 1;
+
+  return 0;
+}
+
+/* We are about to being issuing INSN.  Return nonzero if we cannot
+   issue it on given cycle CLOCK and return zero if we should not sort
+   the ready queue on the next clock start.  */
+
+static int
+ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
+		    int clock, int *sort_p)
+{
+  gcc_assert (insn && INSN_P (insn));
+
+  if (DEBUG_INSN_P (insn))
+    return 0;
+
+  /* When a group barrier is needed for insn, last_scheduled_insn
+     should be set.  */
+  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
+              || last_scheduled_insn);
+
+  if ((reload_completed
+       && (safe_group_barrier_needed (insn)
+	   || (mflag_sched_stop_bits_after_every_cycle
+	       && last_clock != clock
+	       && last_scheduled_insn
+	       && scheduled_good_insn (last_scheduled_insn))))
+      || (last_scheduled_insn
+	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
+	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
+	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
+    {
+      init_insn_group_barriers ();
+
+      if (verbose && dump)
+	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
+		 last_clock == clock ? " + cycle advance" : "");
+
+      stop_before_p = 1;
+      current_cycle = clock;
+      mem_ops_in_group[current_cycle % 4] = 0;
+
+      if (last_clock == clock)
+	{
+	  state_transition (curr_state, dfa_stop_insn);
+	  if (TARGET_EARLY_STOP_BITS)
+	    *sort_p = (last_scheduled_insn == NULL_RTX
+		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
+	  else
+	    *sort_p = 0;
+	  return 1;
+	}
+
+      if (last_scheduled_insn)
+	{
+	  if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
+	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
+	    state_reset (curr_state);
+	  else
+	    {
+	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
+	      state_transition (curr_state, dfa_stop_insn);
+	      state_transition (curr_state, dfa_pre_cycle_insn);
+	      state_transition (curr_state, NULL);
+	    }
+	}
+    }
+  return 0;
+}
+
+/* Implement targetm.sched.h_i_d_extended hook.
+   Extend internal data structures.  */
+static void
+ia64_h_i_d_extended (void)
+{
+  if (stops_p != NULL) 
+    {
+      int new_clocks_length = get_max_uid () * 3 / 2;
+      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
+      clocks_length = new_clocks_length;
+    }
+}
+
+
+/* This structure describes the data used by the backend to guide scheduling.
+   When the current scheduling point is switched, this data should be saved
+   and restored later, if the scheduler returns to this point.  */
+struct _ia64_sched_context
+{
+  state_t prev_cycle_state;
+  rtx last_scheduled_insn;
+  struct reg_write_state rws_sum[NUM_REGS];
+  struct reg_write_state rws_insn[NUM_REGS];
+  int first_instruction;
+  int pending_data_specs;
+  int current_cycle;
+  char mem_ops_in_group[4];
+};
+typedef struct _ia64_sched_context *ia64_sched_context_t;
+
+/* Allocates a scheduling context.  */
+static void *
+ia64_alloc_sched_context (void)
+{
+  return xmalloc (sizeof (struct _ia64_sched_context));
+}
+
+/* Initializes the _SC context with clean data, if CLEAN_P, and from
+   the global context otherwise.  */
+static void
+ia64_init_sched_context (void *_sc, bool clean_p)
+{
+  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+
+  sc->prev_cycle_state = xmalloc (dfa_state_size);
+  if (clean_p)
+    {
+      state_reset (sc->prev_cycle_state);
+      sc->last_scheduled_insn = NULL_RTX;
+      memset (sc->rws_sum, 0, sizeof (rws_sum));
+      memset (sc->rws_insn, 0, sizeof (rws_insn));
+      sc->first_instruction = 1;
+      sc->pending_data_specs = 0;
+      sc->current_cycle = 0;
+      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
+    }
+  else
+    {
+      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
+      sc->last_scheduled_insn = last_scheduled_insn;
+      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
+      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
+      sc->first_instruction = first_instruction;
+      sc->pending_data_specs = pending_data_specs;
+      sc->current_cycle = current_cycle;
+      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
+    }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC.  */
+static void
+ia64_set_sched_context (void *_sc)
+{
+  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+
+  gcc_assert (sc != NULL);
+
+  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
+  last_scheduled_insn = sc->last_scheduled_insn;
+  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
+  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
+  first_instruction = sc->first_instruction;
+  pending_data_specs = sc->pending_data_specs;
+  current_cycle = sc->current_cycle;
+  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
+}
+
+/* Clears the data in the _SC scheduling context.  */
+static void
+ia64_clear_sched_context (void *_sc)
+{
+  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+  
+  free (sc->prev_cycle_state);
+  sc->prev_cycle_state = NULL;
+}
+
+/* Frees the _SC scheduling context.  */
+static void
+ia64_free_sched_context (void *_sc)
+{
+  gcc_assert (_sc != NULL);
+
+  free (_sc);
+}
+
+typedef rtx (* gen_func_t) (rtx, rtx);
+
+/* Return a function that will generate a load of mode MODE_NO
+   with speculation types TS.  */
+static gen_func_t
+get_spec_load_gen_function (ds_t ts, int mode_no)
+{
+  static gen_func_t gen_ld_[] = {
+    gen_movbi,
+    gen_movqi_internal,
+    gen_movhi_internal,
+    gen_movsi_internal,
+    gen_movdi_internal,
+    gen_movsf_internal,
+    gen_movdf_internal,
+    gen_movxf_internal,
+    gen_movti_internal,
+    gen_zero_extendqidi2,
+    gen_zero_extendhidi2,
+    gen_zero_extendsidi2,
+  };
+
+  static gen_func_t gen_ld_a[] = {
+    gen_movbi_advanced,
+    gen_movqi_advanced,
+    gen_movhi_advanced,
+    gen_movsi_advanced,
+    gen_movdi_advanced,
+    gen_movsf_advanced,
+    gen_movdf_advanced,
+    gen_movxf_advanced,
+    gen_movti_advanced,
+    gen_zero_extendqidi2_advanced,
+    gen_zero_extendhidi2_advanced,
+    gen_zero_extendsidi2_advanced,
+  };
+  static gen_func_t gen_ld_s[] = {
+    gen_movbi_speculative,
+    gen_movqi_speculative,
+    gen_movhi_speculative,
+    gen_movsi_speculative,
+    gen_movdi_speculative,
+    gen_movsf_speculative,
+    gen_movdf_speculative,
+    gen_movxf_speculative,
+    gen_movti_speculative,
+    gen_zero_extendqidi2_speculative,
+    gen_zero_extendhidi2_speculative,
+    gen_zero_extendsidi2_speculative,
+  };
+  static gen_func_t gen_ld_sa[] = {
+    gen_movbi_speculative_advanced,
+    gen_movqi_speculative_advanced,
+    gen_movhi_speculative_advanced,
+    gen_movsi_speculative_advanced,
+    gen_movdi_speculative_advanced,
+    gen_movsf_speculative_advanced,
+    gen_movdf_speculative_advanced,
+    gen_movxf_speculative_advanced,
+    gen_movti_speculative_advanced,
+    gen_zero_extendqidi2_speculative_advanced,
+    gen_zero_extendhidi2_speculative_advanced,
+    gen_zero_extendsidi2_speculative_advanced,
+  };
+  static gen_func_t gen_ld_s_a[] = {
+    gen_movbi_speculative_a,
+    gen_movqi_speculative_a,
+    gen_movhi_speculative_a,
+    gen_movsi_speculative_a,
+    gen_movdi_speculative_a,
+    gen_movsf_speculative_a,
+    gen_movdf_speculative_a,
+    gen_movxf_speculative_a,
+    gen_movti_speculative_a,
+    gen_zero_extendqidi2_speculative_a,
+    gen_zero_extendhidi2_speculative_a,
+    gen_zero_extendsidi2_speculative_a,
+  };
+
+  gen_func_t *gen_ld;
+
+  if (ts & BEGIN_DATA)
+    {
+      if (ts & BEGIN_CONTROL)
+	gen_ld = gen_ld_sa;
+      else
+	gen_ld = gen_ld_a;
+    }
+  else if (ts & BEGIN_CONTROL)
+    {
+      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
+	  || ia64_needs_block_p (ts))
+	gen_ld = gen_ld_s;
+      else
+	gen_ld = gen_ld_s_a;
+    }
+  else if (ts == 0)
+    gen_ld = gen_ld_;
+  else
+    gcc_unreachable ();
+
+  return gen_ld[mode_no];
+}
+
+/* Constants that help mapping 'enum machine_mode' to int.  */
+enum SPEC_MODES
+  {
+    SPEC_MODE_INVALID = -1,
+    SPEC_MODE_FIRST = 0,
+    SPEC_MODE_FOR_EXTEND_FIRST = 1,
+    SPEC_MODE_FOR_EXTEND_LAST = 3,
+    SPEC_MODE_LAST = 8
+  };
+
+enum
+  {
+    /* Offset to reach ZERO_EXTEND patterns.  */
+    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
+  };
+
+/* Return index of the MODE.  */
+static int
+ia64_mode_to_int (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case BImode: return 0; /* SPEC_MODE_FIRST  */
+    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
+    case HImode: return 2;
+    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
+    case DImode: return 4;
+    case SFmode: return 5;
+    case DFmode: return 6;
+    case XFmode: return 7;
+    case TImode:
+      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
+	 mentioned in itanium[12].md.  Predicate fp_register_operand also
+	 needs to be defined.  Bottom line: better disable for now.  */
+      return SPEC_MODE_INVALID;
+    default:     return SPEC_MODE_INVALID;
+    }
+}
+
+/* Provide information about speculation capabilities.  */
+static void
+ia64_set_sched_flags (spec_info_t spec_info)
+{
+  unsigned int *flags = &(current_sched_info->flags);
+
+  if (*flags & SCHED_RGN
+      || *flags & SCHED_EBB
+      || *flags & SEL_SCHED)
+    {
+      int mask = 0;
+
+      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
+          || (mflag_sched_ar_data_spec && reload_completed))
+	{
+	  mask |= BEGIN_DATA;
+
+	  if (!sel_sched_p ()
+	      && ((mflag_sched_br_in_data_spec && !reload_completed)
+		  || (mflag_sched_ar_in_data_spec && reload_completed)))
+	    mask |= BE_IN_DATA;
+	}
+      
+      if (mflag_sched_control_spec
+          && (!sel_sched_p ()
+	      || reload_completed))
+	{
+	  mask |= BEGIN_CONTROL;
+	  
+	  if (!sel_sched_p () && mflag_sched_in_control_spec)
+	    mask |= BE_IN_CONTROL;
+	}
+
+      spec_info->mask = mask;
+
+      if (mask)
+	{
+	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
+
+	  if (mask & BE_IN_SPEC)
+	    *flags |= NEW_BBS;
+	  
+	  spec_info->flags = 0;
+      
+	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
+	    spec_info->flags |= PREFER_NON_DATA_SPEC;
+
+	  if (mask & CONTROL_SPEC)
+	    {
+	      if (mflag_sched_prefer_non_control_spec_insns)
+		spec_info->flags |= PREFER_NON_CONTROL_SPEC;
+
+	      if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
+		spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
+	    }
+
+	  if (sched_verbose >= 1)
+	    spec_info->dump = sched_dump;
+	  else
+	    spec_info->dump = 0;
+	  
+	  if (mflag_sched_count_spec_in_critical_path)
+	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
+	}
+    }
+  else
+    spec_info->mask = 0;
+}
+
+/* If INSN is an appropriate load return its mode.
+   Return -1 otherwise.  */
+static int
+get_mode_no_for_insn (rtx insn)
+{
+  rtx reg, mem, mode_rtx;
+  int mode_no;
+  bool extend_p;
+
+  extract_insn_cached (insn);
+
+  /* We use WHICH_ALTERNATIVE only after reload.  This will
+     guarantee that reload won't touch a speculative insn.  */
+
+  if (recog_data.n_operands != 2)
+    return -1;
+
+  reg = recog_data.operand[0];
+  mem = recog_data.operand[1];
+
+  /* We should use MEM's mode since REG's mode in presence of
+     ZERO_EXTEND will always be DImode.  */
+  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
+    /* Process non-speculative ld.  */
+    {
+      if (!reload_completed)
+	{
+	  /* Do not speculate into regs like ar.lc.  */
+	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
+	    return -1;
+
+	  if (!MEM_P (mem))
+	    return -1;
+
+	  {
+	    rtx mem_reg = XEXP (mem, 0);
+
+	    if (!REG_P (mem_reg))
+	      return -1;
+	  }
+
+	  mode_rtx = mem;
+	}
+      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
+	{
+	  gcc_assert (REG_P (reg) && MEM_P (mem));
+	  mode_rtx = mem;
+	}
+      else
+	return -1;
+    }
+  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
+	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
+	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
+    /* Process speculative ld or ld.c.  */
+    {
+      gcc_assert (REG_P (reg) && MEM_P (mem));
+      mode_rtx = mem;
+    }
+  else
+    {
+      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
+
+      if (attr_class == ITANIUM_CLASS_CHK_A
+	  || attr_class == ITANIUM_CLASS_CHK_S_I
+	  || attr_class == ITANIUM_CLASS_CHK_S_F)
+	/* Process chk.  */
+	mode_rtx = reg;
+      else
+	return -1;
+    }
+
+  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
+
+  if (mode_no == SPEC_MODE_INVALID)
+    return -1;
+
+  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
+
+  if (extend_p)
+    {
+      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
+	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
+	return -1;
+
+      mode_no += SPEC_GEN_EXTEND_OFFSET;
+    }
+
+  return mode_no;
+}
+
+/* If X is an unspec part of a speculative load, return its code.
+   Return -1 otherwise.  */
+static int
+get_spec_unspec_code (const_rtx x)
+{
+  if (GET_CODE (x) != UNSPEC)
+    return -1;
+
+  {
+    int code;
+
+    code = XINT (x, 1);
+
+    switch (code)
+      {
+      case UNSPEC_LDA:
+      case UNSPEC_LDS:
+      case UNSPEC_LDS_A:
+      case UNSPEC_LDSA:
+	return code;
+
+      default:
+	return -1;
+      }
+  }
+}
+
+/* Implement skip_rtx_p hook.  */
+static bool
+ia64_skip_rtx_p (const_rtx x)
+{
+  return get_spec_unspec_code (x) != -1;
+}
+
+/* If INSN is a speculative load, return its UNSPEC code.
+   Return -1 otherwise.  */
+static int
+get_insn_spec_code (const_rtx insn)
+{
+  rtx pat, reg, mem;
+
+  pat = PATTERN (insn);
+
+  if (GET_CODE (pat) == COND_EXEC)
+    pat = COND_EXEC_CODE (pat);
+
+  if (GET_CODE (pat) != SET)
+    return -1;
+
+  reg = SET_DEST (pat);
+  if (!REG_P (reg))
+    return -1;
+
+  mem = SET_SRC (pat);
+  if (GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  return get_spec_unspec_code (mem);
+}
+
+/* If INSN is a speculative load, return a ds with the speculation types.
+   Otherwise [if INSN is a normal instruction] return 0.  */
+static ds_t
+ia64_get_insn_spec_ds (rtx insn)
+{
+  int code = get_insn_spec_code (insn);
+
+  switch (code)
+    {
+    case UNSPEC_LDA:
+      return BEGIN_DATA;
+
+    case UNSPEC_LDS:
+    case UNSPEC_LDS_A:
+      return BEGIN_CONTROL;
+
+    case UNSPEC_LDSA:
+      return BEGIN_DATA | BEGIN_CONTROL;
+
+    default:
+      return 0;
+    }
+}
+
+/* If INSN is a speculative load return a ds with the speculation types that
+   will be checked.
+   Otherwise [if INSN is a normal instruction] return 0.  */
+static ds_t
+ia64_get_insn_checked_ds (rtx insn)
+{
+  int code = get_insn_spec_code (insn);
+
+  switch (code)
+    {
+    case UNSPEC_LDA:
+      return BEGIN_DATA | BEGIN_CONTROL;
+
+    case UNSPEC_LDS:
+      return BEGIN_CONTROL;
+
+    case UNSPEC_LDS_A:
+    case UNSPEC_LDSA:
+      return BEGIN_DATA | BEGIN_CONTROL;
+
+    default:
+      return 0;
+    }
+}
+
+/* If GEN_P is true, calculate the index of needed speculation check and return
+   speculative pattern for INSN with speculative mode TS, machine mode
+   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
+   If GEN_P is false, just calculate the index of needed speculation check.  */
+static rtx
+ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
+{
+  rtx pat, new_pat;
+  gen_func_t gen_load;
+
+  gen_load = get_spec_load_gen_function (ts, mode_no);
+
+  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
+		      copy_rtx (recog_data.operand[1]));
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == COND_EXEC)
+    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
+				 new_pat);
+
+  return new_pat;
+}
+
+static bool
+insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
+			      ds_t ds ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* Implement targetm.sched.speculate_insn hook.
+   Check if the INSN can be TS speculative.
+   If 'no' - return -1.
+   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
+   If current pattern of the INSN already provides TS speculation,
+   return 0.  */
+static int
+ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
+{  
+  int mode_no;
+  int res;
+  
+  gcc_assert (!(ts & ~SPECULATIVE));
+
+  if (ia64_spec_check_p (insn))
+    return -1;
+
+  if ((ts & BE_IN_SPEC)
+      && !insn_can_be_in_speculative_p (insn, ts))
+    return -1;
+
+  mode_no = get_mode_no_for_insn (insn);
+
+  if (mode_no != SPEC_MODE_INVALID)
+    {
+      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
+	res = 0;
+      else
+	{
+	  res = 1;
+	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
+	}
+    }
+  else
+    res = -1;
+
+  return res;
+}
+
+/* Return a function that will generate a check for speculation TS with mode
+   MODE_NO.
+   If simple check is needed, pass true for SIMPLE_CHECK_P.
+   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
+static gen_func_t
+get_spec_check_gen_function (ds_t ts, int mode_no,
+			     bool simple_check_p, bool clearing_check_p)
+{
+  static gen_func_t gen_ld_c_clr[] = {
+    gen_movbi_clr,
+    gen_movqi_clr,
+    gen_movhi_clr,
+    gen_movsi_clr,
+    gen_movdi_clr,
+    gen_movsf_clr,
+    gen_movdf_clr,
+    gen_movxf_clr,
+    gen_movti_clr,
+    gen_zero_extendqidi2_clr,
+    gen_zero_extendhidi2_clr,
+    gen_zero_extendsidi2_clr,
+  };
+  static gen_func_t gen_ld_c_nc[] = {
+    gen_movbi_nc,
+    gen_movqi_nc,
+    gen_movhi_nc,
+    gen_movsi_nc,
+    gen_movdi_nc,
+    gen_movsf_nc,
+    gen_movdf_nc,
+    gen_movxf_nc,
+    gen_movti_nc,
+    gen_zero_extendqidi2_nc,
+    gen_zero_extendhidi2_nc,
+    gen_zero_extendsidi2_nc,
+  };
+  static gen_func_t gen_chk_a_clr[] = {
+    gen_advanced_load_check_clr_bi,
+    gen_advanced_load_check_clr_qi,
+    gen_advanced_load_check_clr_hi,
+    gen_advanced_load_check_clr_si,
+    gen_advanced_load_check_clr_di,
+    gen_advanced_load_check_clr_sf,
+    gen_advanced_load_check_clr_df,
+    gen_advanced_load_check_clr_xf,
+    gen_advanced_load_check_clr_ti,
+    gen_advanced_load_check_clr_di,
+    gen_advanced_load_check_clr_di,
+    gen_advanced_load_check_clr_di,
+  };
+  static gen_func_t gen_chk_a_nc[] = {
+    gen_advanced_load_check_nc_bi,
+    gen_advanced_load_check_nc_qi,
+    gen_advanced_load_check_nc_hi,
+    gen_advanced_load_check_nc_si,
+    gen_advanced_load_check_nc_di,
+    gen_advanced_load_check_nc_sf,
+    gen_advanced_load_check_nc_df,
+    gen_advanced_load_check_nc_xf,
+    gen_advanced_load_check_nc_ti,
+    gen_advanced_load_check_nc_di,
+    gen_advanced_load_check_nc_di,
+    gen_advanced_load_check_nc_di,
+  };
+  static gen_func_t gen_chk_s[] = {
+    gen_speculation_check_bi,
+    gen_speculation_check_qi,
+    gen_speculation_check_hi,
+    gen_speculation_check_si,
+    gen_speculation_check_di,
+    gen_speculation_check_sf,
+    gen_speculation_check_df,
+    gen_speculation_check_xf,
+    gen_speculation_check_ti,
+    gen_speculation_check_di,
+    gen_speculation_check_di,
+    gen_speculation_check_di,
+  };
+
+  gen_func_t *gen_check;
+
+  if (ts & BEGIN_DATA)
+    {
+      /* We don't need recovery because even if this is ld.sa
+	 ALAT entry will be allocated only if NAT bit is set to zero.
+	 So it is enough to use ld.c here.  */
+
+      if (simple_check_p)
+	{
+	  gcc_assert (mflag_sched_spec_ldc);
+
+	  if (clearing_check_p)
+	    gen_check = gen_ld_c_clr;
+	  else
+	    gen_check = gen_ld_c_nc;
+	}
+      else
+	{
+	  if (clearing_check_p)
+	    gen_check = gen_chk_a_clr;
+	  else
+	    gen_check = gen_chk_a_nc;
+	}
+    }
+  else if (ts & BEGIN_CONTROL)
+    {
+      if (simple_check_p)
+	/* We might want to use ld.sa -> ld.c instead of
+	   ld.s -> chk.s.  */
+	{
+	  gcc_assert (!ia64_needs_block_p (ts));
+
+	  if (clearing_check_p)
+	    gen_check = gen_ld_c_clr;
+	  else
+	    gen_check = gen_ld_c_nc;
+	}
+      else
+	{
+	  gen_check = gen_chk_s;
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  gcc_assert (mode_no >= 0);
+  return gen_check[mode_no];
+}
+
+/* Return nonzero, if INSN needs branchy recovery check.  */
+static bool
+ia64_needs_block_p (ds_t ts)
+{
+  if (ts & BEGIN_DATA)
+    return !mflag_sched_spec_ldc;
+
+  gcc_assert ((ts & BEGIN_CONTROL) != 0);
+
+  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
+}
+
+/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
+   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
+   Otherwise, generate a simple check.  */
+static rtx
+ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
+{
+  rtx op1, pat, check_pat;
+  gen_func_t gen_check;
+  int mode_no;
+
+  mode_no = get_mode_no_for_insn (insn);
+  gcc_assert (mode_no >= 0);
+
+  if (label)
+    op1 = label;
+  else
+    {
+      gcc_assert (!ia64_needs_block_p (ds));
+      op1 = copy_rtx (recog_data.operand[1]);
+    }
+      
+  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
+					   true);
+
+  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
+    
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == COND_EXEC)
+    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
+				   check_pat);
+
+  return check_pat;
+}
+
+/* Return nonzero, if X is branchy recovery check.  */
+static int
+ia64_spec_check_p (rtx x)
+{
+  x = PATTERN (x);
+  if (GET_CODE (x) == COND_EXEC)
+    x = COND_EXEC_CODE (x);
+  if (GET_CODE (x) == SET)
+    return ia64_spec_check_src_p (SET_SRC (x));
+  return 0;
+}
+
+/* Return nonzero, if SRC belongs to recovery check.  */
+static int
+ia64_spec_check_src_p (rtx src)
+{
+  if (GET_CODE (src) == IF_THEN_ELSE)
+    {
+      rtx t;
+
+      t = XEXP (src, 0);
+      if (GET_CODE (t) == NE)
+	{
+	  t = XEXP (t, 0);	    
+
+	  if (GET_CODE (t) == UNSPEC)
+	    {
+	      int code;
+	      
+	      code = XINT (t, 1);
+	     
+	      if (code == UNSPEC_LDCCLR
+		  || code == UNSPEC_LDCNC
+		  || code == UNSPEC_CHKACLR
+		  || code == UNSPEC_CHKANC
+		  || code == UNSPEC_CHKS)
+		{
+		  gcc_assert (code != 0);
+		  return code;
+		}
+	    }
+	}
+    }
+  return 0;
+}
+
+
+/* The following page contains abstract data `bundle states' which are
+   used for bundling insns (inserting nops and template generation).  */
+
+/* The following describes state of insn bundling.  */
+
+struct bundle_state
+{
+  /* Unique bundle state number to identify them in the debugging
+     output  */
+  int unique_num;
+  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
+  /* number nops before and after the insn  */
+  short before_nops_num, after_nops_num;
+  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
+                   insn */
+  int cost;     /* cost of the state in cycles */
+  int accumulated_insns_num; /* number of all previous insns including
+				nops.  L is considered as 2 insns */
+  int branch_deviation; /* deviation of previous branches from 3rd slots  */
+  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
+  struct bundle_state *next;  /* next state with the same insn_num  */
+  struct bundle_state *originator; /* originator (previous insn state)  */
+  /* All bundle states are in the following chain.  */
+  struct bundle_state *allocated_states_chain;
+  /* The DFA State after issuing the insn and the nops.  */
+  state_t dfa_state;
+};
+
+/* The following is map insn number to the corresponding bundle state.  */
+
+static struct bundle_state **index_to_bundle_states;
+
+/* The unique number of next bundle state.  */
+
+static int bundle_states_num;
+
+/* All allocated bundle states are in the following chain.  */
+
+static struct bundle_state *allocated_bundle_states_chain;
+
+/* All allocated but not used bundle states are in the following
+   chain.  */
+
+static struct bundle_state *free_bundle_state_chain;
+
+
+/* The following function returns a free bundle state.  */
+
+static struct bundle_state *
+get_free_bundle_state (void)
+{
+  struct bundle_state *result;
+
+  if (free_bundle_state_chain != NULL)
+    {
+      result = free_bundle_state_chain;
+      free_bundle_state_chain = result->next;
+    }
+  else
+    {
+      result = XNEW (struct bundle_state);
+      result->dfa_state = xmalloc (dfa_state_size);
+      result->allocated_states_chain = allocated_bundle_states_chain;
+      allocated_bundle_states_chain = result;
+    }
+  result->unique_num = bundle_states_num++;
+  return result;
+
+}
+
+/* The following function frees given bundle state.  */
+
+static void
+free_bundle_state (struct bundle_state *state)
+{
+  state->next = free_bundle_state_chain;
+  free_bundle_state_chain = state;
+}
+
+/* Start work with abstract data `bundle states'.  */
+
+static void
+initiate_bundle_states (void)
+{
+  bundle_states_num = 0;
+  free_bundle_state_chain = NULL;
+  allocated_bundle_states_chain = NULL;
+}
+
+/* Finish work with abstract data `bundle states'.  */
+
+static void
+finish_bundle_states (void)
+{
+  struct bundle_state *curr_state, *next_state;
+
+  for (curr_state = allocated_bundle_states_chain;
+       curr_state != NULL;
+       curr_state = next_state)
+    {
+      next_state = curr_state->allocated_states_chain;
+      free (curr_state->dfa_state);
+      free (curr_state);
+    }
+}
+
+/* Hash table of the bundle states.  The key is dfa_state and insn_num
+   of the bundle states.  */
+
+static htab_t bundle_state_table;
+
+/* The function returns hash of BUNDLE_STATE.  */
+
+static unsigned
+bundle_state_hash (const void *bundle_state)
+{
+  const struct bundle_state *const state
+    = (const struct bundle_state *) bundle_state;
+  unsigned result, i;
+
+  for (result = i = 0; i < dfa_state_size; i++)
+    result += (((unsigned char *) state->dfa_state) [i]
+	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
+  return result + state->insn_num;
+}
+
+/* The function returns nonzero if the bundle state keys are equal.  */
+
+static int
+bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
+{
+  const struct bundle_state *const state1
+    = (const struct bundle_state *) bundle_state_1;
+  const struct bundle_state *const state2
+    = (const struct bundle_state *) bundle_state_2;
+
+  return (state1->insn_num == state2->insn_num
+	  && memcmp (state1->dfa_state, state2->dfa_state,
+		     dfa_state_size) == 0);
+}
+
+/* The function inserts the BUNDLE_STATE into the hash table.  The
+   function returns nonzero if the bundle has been inserted into the
+   table.  The table contains the best bundle state with given key.  */
+
+static int
+insert_bundle_state (struct bundle_state *bundle_state)
+{
+  void **entry_ptr;
+
+  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
+  if (*entry_ptr == NULL)
+    {
+      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
+      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
+      *entry_ptr = (void *) bundle_state;
+      return TRUE;
+    }
+  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
+	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
+	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
+		   > bundle_state->accumulated_insns_num
+		   || (((struct bundle_state *)
+			*entry_ptr)->accumulated_insns_num
+		       == bundle_state->accumulated_insns_num
+		       && (((struct bundle_state *)
+			    *entry_ptr)->branch_deviation
+			   > bundle_state->branch_deviation
+			   || (((struct bundle_state *)
+				*entry_ptr)->branch_deviation
+			       == bundle_state->branch_deviation
+			       && ((struct bundle_state *)
+				   *entry_ptr)->middle_bundle_stops
+			       > bundle_state->middle_bundle_stops))))))
+
+    {
+      struct bundle_state temp;
+
+      temp = *(struct bundle_state *) *entry_ptr;
+      *(struct bundle_state *) *entry_ptr = *bundle_state;
+      ((struct bundle_state *) *entry_ptr)->next = temp.next;
+      *bundle_state = temp;
+    }
+  return FALSE;
+}
+
+/* Start work with the hash table.  */
+
+static void
+initiate_bundle_state_table (void)
+{
+  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
+				    (htab_del) 0);
+}
+
+/* Finish work with the hash table.  */
+
+static void
+finish_bundle_state_table (void)
+{
+  htab_delete (bundle_state_table);
+}
+
+
+
+/* The following variable is a insn `nop' used to check bundle states
+   with different number of inserted nops.  */
+
+static rtx ia64_nop;
+
+/* The following function tries to issue NOPS_NUM nops for the current
+   state without advancing processor cycle.  If it failed, the
+   function returns FALSE and frees the current state.  */
+
+static int
+try_issue_nops (struct bundle_state *curr_state, int nops_num)
+{
+  int i;
+
+  for (i = 0; i < nops_num; i++)
+    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
+      {
+	free_bundle_state (curr_state);
+	return FALSE;
+      }
+  return TRUE;
+}
+
+/* The following function tries to issue INSN for the current
+   state without advancing processor cycle.  If it failed, the
+   function returns FALSE and frees the current state.  */
+
+static int
+try_issue_insn (struct bundle_state *curr_state, rtx insn)
+{
+  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
+    {
+      free_bundle_state (curr_state);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
+   starting with ORIGINATOR without advancing processor cycle.  If
+   TRY_BUNDLE_END_P is TRUE, the function also/only (if
+   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
+   If it was successful, the function creates new bundle state and
+   insert into the hash table and into `index_to_bundle_states'.  */
+
+static void
+issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
+		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
+{
+  struct bundle_state *curr_state;
+
+  curr_state = get_free_bundle_state ();
+  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
+  curr_state->insn = insn;
+  curr_state->insn_num = originator->insn_num + 1;
+  curr_state->cost = originator->cost;
+  curr_state->originator = originator;
+  curr_state->before_nops_num = before_nops_num;
+  curr_state->after_nops_num = 0;
+  curr_state->accumulated_insns_num
+    = originator->accumulated_insns_num + before_nops_num;
+  curr_state->branch_deviation = originator->branch_deviation;
+  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
+  gcc_assert (insn);
+  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
+    {
+      gcc_assert (GET_MODE (insn) != TImode);
+      if (!try_issue_nops (curr_state, before_nops_num))
+	return;
+      if (!try_issue_insn (curr_state, insn))
+	return;
+      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
+      if (curr_state->accumulated_insns_num % 3 != 0)
+	curr_state->middle_bundle_stops++;
+      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
+	  && curr_state->accumulated_insns_num % 3 != 0)
+	{
+	  free_bundle_state (curr_state);
+	  return;
+	}
+    }
+  else if (GET_MODE (insn) != TImode)
+    {
+      if (!try_issue_nops (curr_state, before_nops_num))
+	return;
+      if (!try_issue_insn (curr_state, insn))
+	return;
+      curr_state->accumulated_insns_num++;
+      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
+		  && asm_noperands (PATTERN (insn)) < 0);
+
+      if (ia64_safe_type (insn) == TYPE_L)
+	curr_state->accumulated_insns_num++;
+    }
+  else
+    {
+      /* If this is an insn that must be first in a group, then don't allow
+	 nops to be emitted before it.  Currently, alloc is the only such
+	 supported instruction.  */
+      /* ??? The bundling automatons should handle this for us, but they do
+	 not yet have support for the first_insn attribute.  */
+      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
+	{
+	  free_bundle_state (curr_state);
+	  return;
+	}
+
+      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
+      state_transition (curr_state->dfa_state, NULL);
+      curr_state->cost++;
+      if (!try_issue_nops (curr_state, before_nops_num))
+	return;
+      if (!try_issue_insn (curr_state, insn))
+	return;
+      curr_state->accumulated_insns_num++;
+      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	  || asm_noperands (PATTERN (insn)) >= 0)
+	{
+	  /* Finish bundle containing asm insn.  */
+	  curr_state->after_nops_num
+	    = 3 - curr_state->accumulated_insns_num % 3;
+	  curr_state->accumulated_insns_num
+	    += 3 - curr_state->accumulated_insns_num % 3;
+	}
+      else if (ia64_safe_type (insn) == TYPE_L)
+	curr_state->accumulated_insns_num++;
+    }
+  if (ia64_safe_type (insn) == TYPE_B)
+    curr_state->branch_deviation
+      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
+  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
+    {
+      if (!only_bundle_end_p && insert_bundle_state (curr_state))
+	{
+	  state_t dfa_state;
+	  struct bundle_state *curr_state1;
+	  struct bundle_state *allocated_states_chain;
+
+	  curr_state1 = get_free_bundle_state ();
+	  dfa_state = curr_state1->dfa_state;
+	  allocated_states_chain = curr_state1->allocated_states_chain;
+	  *curr_state1 = *curr_state;
+	  curr_state1->dfa_state = dfa_state;
+	  curr_state1->allocated_states_chain = allocated_states_chain;
+	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
+		  dfa_state_size);
+	  curr_state = curr_state1;
+	}
+      if (!try_issue_nops (curr_state,
+			   3 - curr_state->accumulated_insns_num % 3))
+	return;
+      curr_state->after_nops_num
+	= 3 - curr_state->accumulated_insns_num % 3;
+      curr_state->accumulated_insns_num
+	+= 3 - curr_state->accumulated_insns_num % 3;
+    }
+  if (!insert_bundle_state (curr_state))
+    free_bundle_state (curr_state);
+  return;
+}
+
+/* The following function returns position in the two window bundle
+   for given STATE.  */
+
+static int
+get_max_pos (state_t state)
+{
+  if (cpu_unit_reservation_p (state, pos_6))
+    return 6;
+  else if (cpu_unit_reservation_p (state, pos_5))
+    return 5;
+  else if (cpu_unit_reservation_p (state, pos_4))
+    return 4;
+  else if (cpu_unit_reservation_p (state, pos_3))
+    return 3;
+  else if (cpu_unit_reservation_p (state, pos_2))
+    return 2;
+  else if (cpu_unit_reservation_p (state, pos_1))
+    return 1;
+  else
+    return 0;
+}
+
+/* The function returns code of a possible template for given position
+   and state.  The function should be called only with 2 values of
+   position equal to 3 or 6.  We avoid generating F NOPs by putting
+   templates containing F insns at the end of the template search
+   because undocumented anomaly in McKinley derived cores which can
+   cause stalls if an F-unit insn (including a NOP) is issued within a
+   six-cycle window after reading certain application registers (such
+   as ar.bsp).  Furthermore, power-considerations also argue against
+   the use of F-unit instructions unless they're really needed.  */
+
+static int
+get_template (state_t state, int pos)
+{
+  switch (pos)
+    {
+    case 3:
+      if (cpu_unit_reservation_p (state, _0mmi_))
+	return 1;
+      else if (cpu_unit_reservation_p (state, _0mii_))
+	return 0;
+      else if (cpu_unit_reservation_p (state, _0mmb_))
+	return 7;
+      else if (cpu_unit_reservation_p (state, _0mib_))
+	return 6;
+      else if (cpu_unit_reservation_p (state, _0mbb_))
+	return 5;
+      else if (cpu_unit_reservation_p (state, _0bbb_))
+	return 4;
+      else if (cpu_unit_reservation_p (state, _0mmf_))
+	return 3;
+      else if (cpu_unit_reservation_p (state, _0mfi_))
+	return 2;
+      else if (cpu_unit_reservation_p (state, _0mfb_))
+	return 8;
+      else if (cpu_unit_reservation_p (state, _0mlx_))
+	return 9;
+      else
+	gcc_unreachable ();
+    case 6:
+      if (cpu_unit_reservation_p (state, _1mmi_))
+	return 1;
+      else if (cpu_unit_reservation_p (state, _1mii_))
+	return 0;
+      else if (cpu_unit_reservation_p (state, _1mmb_))
+	return 7;
+      else if (cpu_unit_reservation_p (state, _1mib_))
+	return 6;
+      else if (cpu_unit_reservation_p (state, _1mbb_))
+	return 5;
+      else if (cpu_unit_reservation_p (state, _1bbb_))
+	return 4;
+      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
+	return 3;
+      else if (cpu_unit_reservation_p (state, _1mfi_))
+	return 2;
+      else if (cpu_unit_reservation_p (state, _1mfb_))
+	return 8;
+      else if (cpu_unit_reservation_p (state, _1mlx_))
+	return 9;
+      else
+	gcc_unreachable ();
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* True when INSN is important for bundling.  */
+static bool
+important_for_bundling_p (rtx insn)
+{
+  return (INSN_P (insn)
+	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER);
+}
+
+/* The following function returns an insn important for insn bundling
+   followed by INSN and before TAIL.  */
+
+static rtx
+get_next_important_insn (rtx insn, rtx tail)
+{
+  for (; insn && insn != tail; insn = NEXT_INSN (insn))
+    if (important_for_bundling_p (insn))
+      return insn;
+  return NULL_RTX;
+}
+
+/* Add a bundle selector TEMPLATE0 before INSN.  */
+
+static void
+ia64_add_bundle_selector_before (int template0, rtx insn)
+{
+  rtx b = gen_bundle_selector (GEN_INT (template0));
+
+  ia64_emit_insn_before (b, insn);
+#if NR_BUNDLES == 10
+  if ((template0 == 4 || template0 == 5)
+      && ia64_except_unwind_info (&global_options) == UI_TARGET)
+    {
+      int i;
+      rtx note = NULL_RTX;
+
+      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
+	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
+	 to following nops, as br.call sets rp to the address of following
+	 bundle and therefore an EH region end must be on a bundle
+	 boundary.  */
+      insn = PREV_INSN (insn);
+      for (i = 0; i < 3; i++)
+	{
+	  do
+	    insn = next_active_insn (insn);
+	  while (GET_CODE (insn) == INSN
+		 && get_attr_empty (insn) == EMPTY_YES);
+	  if (GET_CODE (insn) == CALL_INSN)
+	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
+	  else if (note)
+	    {
+	      int code;
+
+	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
+			  || code == CODE_FOR_nop_b);
+	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
+		note = NULL_RTX;
+	      else
+		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
+	    }
+	}
+    }
+#endif
+}
+
+/* The following function does insn bundling.  Bundling means
+   inserting templates and nop insns to fit insn groups into permitted
+   templates.  Instruction scheduling uses NDFA (non-deterministic
+   finite automata) encoding informations about the templates and the
+   inserted nops.  Nondeterminism of the automata permits follows
+   all possible insn sequences very fast.
+
+   Unfortunately it is not possible to get information about inserting
+   nop insns and used templates from the automata states.  The
+   automata only says that we can issue an insn possibly inserting
+   some nops before it and using some template.  Therefore insn
+   bundling in this function is implemented by using DFA
+   (deterministic finite automata).  We follow all possible insn
+   sequences by inserting 0-2 nops (that is what the NDFA describe for
+   insn scheduling) before/after each insn being bundled.  We know the
+   start of simulated processor cycle from insn scheduling (insn
+   starting a new cycle has TImode).
+
+   Simple implementation of insn bundling would create enormous
+   number of possible insn sequences satisfying information about new
+   cycle ticks taken from the insn scheduling.  To make the algorithm
+   practical we use dynamic programming.  Each decision (about
+   inserting nops and implicitly about previous decisions) is described
+   by structure bundle_state (see above).  If we generate the same
+   bundle state (key is automaton state after issuing the insns and
+   nops for it), we reuse already generated one.  As consequence we
+   reject some decisions which cannot improve the solution and
+   reduce memory for the algorithm.
+
+   When we reach the end of EBB (extended basic block), we choose the
+   best sequence and then, moving back in EBB, insert templates for
+   the best alternative.  The templates are taken from querying
+   automaton state for each insn in chosen bundle states.
+
+   So the algorithm makes two (forward and backward) passes through
+   EBB.  */
+
+static void
+bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
+{
+  struct bundle_state *curr_state, *next_state, *best_state;
+  rtx insn, next_insn;
+  int insn_num;
+  int i, bundle_end_p, only_bundle_end_p, asm_p;
+  int pos = 0, max_pos, template0, template1;
+  rtx b;
+  rtx nop;
+  enum attr_type type;
+
+  insn_num = 0;
+  /* Count insns in the EBB.  */
+  for (insn = NEXT_INSN (prev_head_insn);
+       insn && insn != tail;
+       insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      insn_num++;
+  if (insn_num == 0)
+    return;
+  bundling_p = 1;
+  dfa_clean_insn_cache ();
+  initiate_bundle_state_table ();
+  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
+  /* First (forward) pass -- generation of bundle states.  */
+  curr_state = get_free_bundle_state ();
+  curr_state->insn = NULL;
+  curr_state->before_nops_num = 0;
+  curr_state->after_nops_num = 0;
+  curr_state->insn_num = 0;
+  curr_state->cost = 0;
+  curr_state->accumulated_insns_num = 0;
+  curr_state->branch_deviation = 0;
+  curr_state->middle_bundle_stops = 0;
+  curr_state->next = NULL;
+  curr_state->originator = NULL;
+  state_reset (curr_state->dfa_state);
+  index_to_bundle_states [0] = curr_state;
+  insn_num = 0;
+  /* Shift cycle mark if it is put on insn which could be ignored.  */
+  for (insn = NEXT_INSN (prev_head_insn);
+       insn != tail;
+       insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
+	    || GET_CODE (PATTERN (insn)) == USE
+	    || GET_CODE (PATTERN (insn)) == CLOBBER)
+	&& GET_MODE (insn) == TImode)
+      {
+	PUT_MODE (insn, VOIDmode);
+	for (next_insn = NEXT_INSN (insn);
+	     next_insn != tail;
+	     next_insn = NEXT_INSN (next_insn))
+	  if (INSN_P (next_insn)
+	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
+	      && GET_CODE (PATTERN (next_insn)) != USE
+	      && GET_CODE (PATTERN (next_insn)) != CLOBBER
+	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
+	    {
+	      PUT_MODE (next_insn, TImode);
+	      break;
+	    }
+      }
+  /* Forward pass: generation of bundle states.  */
+  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
+       insn != NULL_RTX;
+       insn = next_insn)
+    {
+      gcc_assert (INSN_P (insn)
+		  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+		  && GET_CODE (PATTERN (insn)) != USE
+		  && GET_CODE (PATTERN (insn)) != CLOBBER);
+      type = ia64_safe_type (insn);
+      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
+      insn_num++;
+      index_to_bundle_states [insn_num] = NULL;
+      for (curr_state = index_to_bundle_states [insn_num - 1];
+	   curr_state != NULL;
+	   curr_state = next_state)
+	{
+	  pos = curr_state->accumulated_insns_num % 3;
+	  next_state = curr_state->next;
+	  /* We must fill up the current bundle in order to start a
+	     subsequent asm insn in a new bundle.  Asm insn is always
+	     placed in a separate bundle.  */
+	  only_bundle_end_p
+	    = (next_insn != NULL_RTX
+	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
+	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
+	  /* We may fill up the current bundle if it is the cycle end
+	     without a group barrier.  */
+	  bundle_end_p
+	    = (only_bundle_end_p || next_insn == NULL_RTX
+	       || (GET_MODE (next_insn) == TImode
+		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
+	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
+	      || type == TYPE_S)
+	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
+				 only_bundle_end_p);
+	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
+			       only_bundle_end_p);
+	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
+			       only_bundle_end_p);
+	}
+      gcc_assert (index_to_bundle_states [insn_num]);
+      for (curr_state = index_to_bundle_states [insn_num];
+	   curr_state != NULL;
+	   curr_state = curr_state->next)
+	if (verbose >= 2 && dump)
+	  {
+	    /* This structure is taken from generated code of the
+	       pipeline hazard recognizer (see file insn-attrtab.c).
+	       Please don't forget to change the structure if a new
+	       automaton is added to .md file.  */
+	    struct DFA_chip
+	    {
+	      unsigned short one_automaton_state;
+	      unsigned short oneb_automaton_state;
+	      unsigned short two_automaton_state;
+	      unsigned short twob_automaton_state;
+	    };
+
+	    fprintf
+	      (dump,
+	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
+	       curr_state->unique_num,
+	       (curr_state->originator == NULL
+		? -1 : curr_state->originator->unique_num),
+	       curr_state->cost,
+	       curr_state->before_nops_num, curr_state->after_nops_num,
+	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
+	       curr_state->middle_bundle_stops,
+	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
+	       INSN_UID (insn));
+	  }
+    }
+  
+  /* We should find a solution because the 2nd insn scheduling has
+     found one.  */
+  gcc_assert (index_to_bundle_states [insn_num]);
+  /* Find a state corresponding to the best insn sequence.  */
+  best_state = NULL;
+  for (curr_state = index_to_bundle_states [insn_num];
+       curr_state != NULL;
+       curr_state = curr_state->next)
+    /* We are just looking at the states with fully filled up last
+       bundle.  The first we prefer insn sequences with minimal cost
+       then with minimal inserted nops and finally with branch insns
+       placed in the 3rd slots.  */
+    if (curr_state->accumulated_insns_num % 3 == 0
+	&& (best_state == NULL || best_state->cost > curr_state->cost
+	    || (best_state->cost == curr_state->cost
+		&& (curr_state->accumulated_insns_num
+		    < best_state->accumulated_insns_num
+		    || (curr_state->accumulated_insns_num
+			== best_state->accumulated_insns_num
+			&& (curr_state->branch_deviation
+			    < best_state->branch_deviation
+			    || (curr_state->branch_deviation
+				== best_state->branch_deviation
+				&& curr_state->middle_bundle_stops
+				< best_state->middle_bundle_stops)))))))
+      best_state = curr_state;
+  /* Second (backward) pass: adding nops and templates.  */
+  gcc_assert (best_state);
+  insn_num = best_state->before_nops_num;
+  template0 = template1 = -1;
+  for (curr_state = best_state;
+       curr_state->originator != NULL;
+       curr_state = curr_state->originator)
+    {
+      insn = curr_state->insn;
+      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	       || asm_noperands (PATTERN (insn)) >= 0);
+      insn_num++;
+      if (verbose >= 2 && dump)
+	{
+	  struct DFA_chip
+	  {
+	    unsigned short one_automaton_state;
+	    unsigned short oneb_automaton_state;
+	    unsigned short two_automaton_state;
+	    unsigned short twob_automaton_state;
+	  };
+
+	  fprintf
+	    (dump,
+	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
+	     curr_state->unique_num,
+	     (curr_state->originator == NULL
+	      ? -1 : curr_state->originator->unique_num),
+	     curr_state->cost,
+	     curr_state->before_nops_num, curr_state->after_nops_num,
+	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
+	     curr_state->middle_bundle_stops,
+	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
+	     INSN_UID (insn));
+	}
+      /* Find the position in the current bundle window.  The window can
+	 contain at most two bundles.  Two bundle window means that
+	 the processor will make two bundle rotation.  */
+      max_pos = get_max_pos (curr_state->dfa_state);
+      if (max_pos == 6
+	  /* The following (negative template number) means that the
+	     processor did one bundle rotation.  */
+	  || (max_pos == 3 && template0 < 0))
+	{
+	  /* We are at the end of the window -- find template(s) for
+	     its bundle(s).  */
+	  pos = max_pos;
+	  if (max_pos == 3)
+	    template0 = get_template (curr_state->dfa_state, 3);
+	  else
+	    {
+	      template1 = get_template (curr_state->dfa_state, 3);
+	      template0 = get_template (curr_state->dfa_state, 6);
+	    }
+	}
+      if (max_pos > 3 && template1 < 0)
+	/* It may happen when we have the stop inside a bundle.  */
+	{
+	  gcc_assert (pos <= 3);
+	  template1 = get_template (curr_state->dfa_state, 3);
+	  pos += 3;
+	}
+      if (!asm_p)
+	/* Emit nops after the current insn.  */
+	for (i = 0; i < curr_state->after_nops_num; i++)
+	  {
+	    nop = gen_nop ();
+	    emit_insn_after (nop, insn);
+	    pos--;
+	    gcc_assert (pos >= 0);
+	    if (pos % 3 == 0)
+	      {
+		/* We are at the start of a bundle: emit the template
+		   (it should be defined).  */
+		gcc_assert (template0 >= 0);
+		ia64_add_bundle_selector_before (template0, nop);
+		/* If we have two bundle window, we make one bundle
+		   rotation.  Otherwise template0 will be undefined
+		   (negative value).  */
+		template0 = template1;
+		template1 = -1;
+	      }
+	  }
+      /* Move the position backward in the window.  Group barrier has
+	 no slot.  Asm insn takes all bundle.  */
+      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
+	  && asm_noperands (PATTERN (insn)) < 0)
+	pos--;
+      /* Long insn takes 2 slots.  */
+      if (ia64_safe_type (insn) == TYPE_L)
+	pos--;
+      gcc_assert (pos >= 0);
+      if (pos % 3 == 0
+	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
+	  && asm_noperands (PATTERN (insn)) < 0)
+	{
+	  /* The current insn is at the bundle start: emit the
+	     template.  */
+	  gcc_assert (template0 >= 0);
+	  ia64_add_bundle_selector_before (template0, insn);
+	  b = PREV_INSN (insn);
+	  insn = b;
+	  /* See comment above in analogous place for emitting nops
+	     after the insn.  */
+	  template0 = template1;
+	  template1 = -1;
+	}
+      /* Emit nops after the current insn.  */
+      for (i = 0; i < curr_state->before_nops_num; i++)
+	{
+	  nop = gen_nop ();
+	  ia64_emit_insn_before (nop, insn);
+	  nop = PREV_INSN (insn);
+	  insn = nop;
+	  pos--;
+	  gcc_assert (pos >= 0);
+	  if (pos % 3 == 0)
+	    {
+	      /* See comment above in analogous place for emitting nops
+		 after the insn.  */
+	      gcc_assert (template0 >= 0);
+	      ia64_add_bundle_selector_before (template0, insn);
+	      b = PREV_INSN (insn);
+	      insn = b;
+	      template0 = template1;
+	      template1 = -1;
+	    }
+	}
+    }
+
+#ifdef ENABLE_CHECKING
+  {
+    /* Assert right calculation of middle_bundle_stops.  */
+    int num = best_state->middle_bundle_stops;
+    bool start_bundle = true, end_bundle = false;
+
+    for (insn = NEXT_INSN (prev_head_insn);
+	 insn && insn != tail;
+	 insn = NEXT_INSN (insn))
+      {
+	if (!INSN_P (insn))
+	  continue;
+	if (recog_memoized (insn) == CODE_FOR_bundle_selector)
+	  start_bundle = true;
+	else
+	  {
+	    rtx next_insn;
+
+	    for (next_insn = NEXT_INSN (insn);
+		 next_insn && next_insn != tail;
+		 next_insn = NEXT_INSN (next_insn))
+	      if (INSN_P (next_insn)
+		  && (ia64_safe_itanium_class (next_insn)
+		      != ITANIUM_CLASS_IGNORE
+		      || recog_memoized (next_insn)
+		      == CODE_FOR_bundle_selector)
+		  && GET_CODE (PATTERN (next_insn)) != USE
+		  && GET_CODE (PATTERN (next_insn)) != CLOBBER)
+		break;
+
+	    end_bundle = next_insn == NULL_RTX
+	     || next_insn == tail
+	     || (INSN_P (next_insn)
+		 && recog_memoized (next_insn)
+		 == CODE_FOR_bundle_selector);
+	    if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
+		&& !start_bundle && !end_bundle
+		&& next_insn
+		&& GET_CODE (PATTERN (next_insn)) != ASM_INPUT
+		&& asm_noperands (PATTERN (next_insn)) < 0)
+	      num--;
+
+	    start_bundle = false;
+	  }
+      }
+
+    gcc_assert (num == 0);
+  }
+#endif
+
+  free (index_to_bundle_states);
+  finish_bundle_state_table ();
+  bundling_p = 0;
+  dfa_clean_insn_cache ();
+}
+
+/* The following function is called at the end of scheduling BB or
+   EBB.  After reload, it inserts stop bits and does insn bundling.  */
+
+static void
+ia64_sched_finish (FILE *dump, int sched_verbose)
+{
+  if (sched_verbose)
+    fprintf (dump, "// Finishing schedule.\n");
+  if (!reload_completed)
+    return;
+  if (reload_completed)
+    {
+      final_emit_insn_group_barriers (dump);
+      bundling (dump, sched_verbose, current_sched_info->prev_head,
+		current_sched_info->next_tail);
+      if (sched_verbose && dump)
+	fprintf (dump, "//    finishing %d-%d\n",
+		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
+		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
+
+      return;
+    }
+}
+
+/* The following function inserts stop bits in scheduled BB or EBB.  */
+
+static void
+final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
+{
+  rtx insn;
+  int need_barrier_p = 0;
+  int seen_good_insn = 0;
+
+  init_insn_group_barriers ();
+
+  for (insn = NEXT_INSN (current_sched_info->prev_head);
+       insn != current_sched_info->next_tail;
+       insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == BARRIER)
+	{
+	  rtx last = prev_active_insn (insn);
+
+	  if (! last)
+	    continue;
+	  if (GET_CODE (last) == JUMP_INSN
+	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
+	    last = prev_active_insn (last);
+	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
+
+	  init_insn_group_barriers ();
+	  seen_good_insn = 0;
+	  need_barrier_p = 0;
+	}
+      else if (NONDEBUG_INSN_P (insn))
+	{
+	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
+	    {
+	      init_insn_group_barriers ();
+	      seen_good_insn = 0;
+	      need_barrier_p = 0;
+	    }
+	  else if (need_barrier_p || group_barrier_needed (insn)
+		   || (mflag_sched_stop_bits_after_every_cycle
+		       && GET_MODE (insn) == TImode
+		       && seen_good_insn))
+	    {
+	      if (TARGET_EARLY_STOP_BITS)
+		{
+		  rtx last;
+
+		  for (last = insn;
+		       last != current_sched_info->prev_head;
+		       last = PREV_INSN (last))
+		    if (INSN_P (last) && GET_MODE (last) == TImode
+			&& stops_p [INSN_UID (last)])
+		      break;
+		  if (last == current_sched_info->prev_head)
+		    last = insn;
+		  last = prev_active_insn (last);
+		  if (last
+		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
+		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
+				     last);
+		  init_insn_group_barriers ();
+		  for (last = NEXT_INSN (last);
+		       last != insn;
+		       last = NEXT_INSN (last))
+		    if (INSN_P (last))
+		      {
+			group_barrier_needed (last);
+			if (recog_memoized (last) >= 0
+			    && important_for_bundling_p (last))
+			  seen_good_insn = 1;
+		      }
+		}
+	      else
+		{
+		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+				    insn);
+		  init_insn_group_barriers ();
+		  seen_good_insn = 0;
+		}
+	      group_barrier_needed (insn);
+	      if (recog_memoized (insn) >= 0
+		  && important_for_bundling_p (insn))
+		seen_good_insn = 1;
+	    }
+	  else if (recog_memoized (insn) >= 0
+		   && important_for_bundling_p (insn))
+	    seen_good_insn = 1;
+	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
+			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
+			    || asm_noperands (PATTERN (insn)) >= 0);
+	}
+    }
+}
+
+
+
+/* If the following function returns TRUE, we will use the DFA
+   insn scheduler.  */
+
+static int
+ia64_first_cycle_multipass_dfa_lookahead (void)
+{
+  return (reload_completed ? 6 : 4);
+}
+
+/* The following function initiates variable `dfa_pre_cycle_insn'.  */
+
+static void
+ia64_init_dfa_pre_cycle_insn (void)
+{
+  if (temp_dfa_state == NULL)
+    {
+      dfa_state_size = state_size ();
+      temp_dfa_state = xmalloc (dfa_state_size);
+      prev_cycle_state = xmalloc (dfa_state_size);
+    }
+  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
+  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
+  recog_memoized (dfa_pre_cycle_insn);
+  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
+  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
+  recog_memoized (dfa_stop_insn);
+}
+
+/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
+   used by the DFA insn scheduler.  */
+
+static rtx
+ia64_dfa_pre_cycle_insn (void)
+{
+  return dfa_pre_cycle_insn;
+}
+
+/* The following function returns TRUE if PRODUCER (of type ilog or
+   ld) produces address for CONSUMER (of type st or stf). */
+
+int
+ia64_st_address_bypass_p (rtx producer, rtx consumer)
+{
+  rtx dest, reg, mem;
+
+  gcc_assert (producer && consumer);
+  dest = ia64_single_set (producer);
+  gcc_assert (dest);
+  reg = SET_DEST (dest);
+  gcc_assert (reg);
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+  gcc_assert (GET_CODE (reg) == REG);
+  
+  dest = ia64_single_set (consumer);
+  gcc_assert (dest);
+  mem = SET_DEST (dest);
+  gcc_assert (mem && GET_CODE (mem) == MEM);
+  return reg_mentioned_p (reg, mem);
+}
+
+/* The following function returns TRUE if PRODUCER (of type ilog or
+   ld) produces address for CONSUMER (of type ld or fld). */
+
+int
+ia64_ld_address_bypass_p (rtx producer, rtx consumer)
+{
+  rtx dest, src, reg, mem;
+
+  gcc_assert (producer && consumer);
+  dest = ia64_single_set (producer);
+  gcc_assert (dest);
+  reg = SET_DEST (dest);
+  gcc_assert (reg);
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+  gcc_assert (GET_CODE (reg) == REG);
+  
+  src = ia64_single_set (consumer);
+  gcc_assert (src);
+  mem = SET_SRC (src);
+  gcc_assert (mem);
+ 
+  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
+    mem = XVECEXP (mem, 0, 0);
+  else if (GET_CODE (mem) == IF_THEN_ELSE)
+    /* ??? Is this bypass necessary for ld.c?  */
+    {
+      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
+      mem = XEXP (mem, 1);
+    }
+     
+  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  if (GET_CODE (mem) == UNSPEC)
+    {
+      int c = XINT (mem, 1);
+
+      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
+		  || c == UNSPEC_LDSA);
+      mem = XVECEXP (mem, 0, 0);
+    }
+
+  /* Note that LO_SUM is used for GOT loads.  */
+  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
+
+  return reg_mentioned_p (reg, mem);
+}
+
+/* The following function returns TRUE if INSN produces address for a
+   load/store insn.  We will place such insns into M slot because it
+   decreases its latency time.  */
+
+int
+ia64_produce_address_p (rtx insn)
+{
+  return insn->call;
+}
+
+
+/* Emit pseudo-ops for the assembler to describe predicate relations.
+   At present this assumes that we only consider predicate pairs to
+   be mutex, and that the assembler can deduce proper values from
+   straight-line code.  */
+
+static void
+emit_predicate_relation_info (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_REVERSE (bb)
+    {
+      int r;
+      rtx head = BB_HEAD (bb);
+
+      /* We only need such notes at code labels.  */
+      if (GET_CODE (head) != CODE_LABEL)
+	continue;
+      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
+	head = NEXT_INSN (head);
+
+      /* Skip p0, which may be thought to be live due to (reg:DI p0)
+	 grabbing the entire block of predicate registers.  */
+      for (r = PR_REG (2); r < PR_REG (64); r += 2)
+	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
+	  {
+	    rtx p = gen_rtx_REG (BImode, r);
+	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
+	    if (head == BB_END (bb))
+	      BB_END (bb) = n;
+	    head = n;
+	  }
+    }
+
+  /* Look for conditional calls that do not return, and protect predicate
+     relations around them.  Otherwise the assembler will assume the call
+     returns, and complain about uses of call-clobbered predicates after
+     the call.  */
+  FOR_EACH_BB_REVERSE (bb)
+    {
+      rtx insn = BB_HEAD (bb);
+
+      while (1)
+	{
+	  if (GET_CODE (insn) == CALL_INSN
+	      && GET_CODE (PATTERN (insn)) == COND_EXEC
+	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
+	    {
+	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
+	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
+	      if (BB_HEAD (bb) == insn)
+		BB_HEAD (bb) = b;
+	      if (BB_END (bb) == insn)
+		BB_END (bb) = a;
+	    }
+
+	  if (insn == BB_END (bb))
+	    break;
+	  insn = NEXT_INSN (insn);
+	}
+    }
+}
+
+/* Perform machine dependent operations on the rtl chain INSNS.  */
+
+static void
+ia64_reorg (void)
+{
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  /* If optimizing, we'll have split before scheduling.  */
+  if (optimize == 0)
+    split_all_insns ();
+
+  if (optimize && flag_schedule_insns_after_reload
+      && dbg_cnt (ia64_sched2))
+    {
+      timevar_push (TV_SCHED2);
+      ia64_final_schedule = 1;
+
+      initiate_bundle_states ();
+      ia64_nop = make_insn_raw (gen_nop ());
+      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
+      recog_memoized (ia64_nop);
+      clocks_length = get_max_uid () + 1;
+      stops_p = XCNEWVEC (char, clocks_length);
+
+      if (ia64_tune == PROCESSOR_ITANIUM2)
+	{
+	  pos_1 = get_cpu_unit_code ("2_1");
+	  pos_2 = get_cpu_unit_code ("2_2");
+	  pos_3 = get_cpu_unit_code ("2_3");
+	  pos_4 = get_cpu_unit_code ("2_4");
+	  pos_5 = get_cpu_unit_code ("2_5");
+	  pos_6 = get_cpu_unit_code ("2_6");
+	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
+	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
+	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
+	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
+	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
+	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
+	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
+	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
+	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
+	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
+	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
+	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
+	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
+	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
+	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
+	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
+	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
+	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
+	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
+	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
+	}
+      else
+	{
+	  pos_1 = get_cpu_unit_code ("1_1");
+	  pos_2 = get_cpu_unit_code ("1_2");
+	  pos_3 = get_cpu_unit_code ("1_3");
+	  pos_4 = get_cpu_unit_code ("1_4");
+	  pos_5 = get_cpu_unit_code ("1_5");
+	  pos_6 = get_cpu_unit_code ("1_6");
+	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
+	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
+	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
+	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
+	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
+	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
+	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
+	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
+	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
+	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
+	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
+	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
+	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
+	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
+	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
+	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
+	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
+	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
+	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
+	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
+	}
+
+      if (flag_selective_scheduling2
+	  && !maybe_skip_selective_scheduling ())
+        run_selective_scheduling ();
+      else
+	schedule_ebbs ();
+
+      /* Redo alignment computation, as it might gone wrong.  */
+      compute_alignments ();
+
+      /* We cannot reuse this one because it has been corrupted by the
+	 evil glat.  */
+      finish_bundle_states ();
+      free (stops_p);
+      stops_p = NULL;
+      emit_insn_group_barriers (dump_file);
+
+      ia64_final_schedule = 0;
+      timevar_pop (TV_SCHED2);
+    }
+  else
+    emit_all_insn_group_barriers (dump_file);
+
+  df_analyze ();
+ 
+  /* A call must not be the last instruction in a function, so that the
+     return address is still within the function, so that unwinding works
+     properly.  Note that IA-64 differs from dwarf2 on this point.  */
+  if (ia64_except_unwind_info (&global_options) == UI_TARGET)
+    {
+      rtx insn;
+      int saw_stop = 0;
+
+      insn = get_last_insn ();
+      if (! INSN_P (insn))
+        insn = prev_active_insn (insn);
+      if (insn)
+	{
+	  /* Skip over insns that expand to nothing.  */
+	  while (GET_CODE (insn) == INSN
+		 && get_attr_empty (insn) == EMPTY_YES)
+	    {
+	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
+		saw_stop = 1;
+	      insn = prev_active_insn (insn);
+	    }
+	  if (GET_CODE (insn) == CALL_INSN)
+	    {
+	      if (! saw_stop)
+		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
+	      emit_insn (gen_break_f ());
+	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
+	    }
+	}
+    }
+
+  emit_predicate_relation_info ();
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+  df_finish_pass (false);
+}
+
+/* Return true if REGNO is used by the epilogue.  */
+
+int
+ia64_epilogue_uses (int regno)
+{
+  switch (regno)
+    {
+    case R_GR (1):
+      /* With a call to a function in another module, we will write a new
+	 value to "gp".  After returning from such a call, we need to make
+	 sure the function restores the original gp-value, even if the
+	 function itself does not use the gp anymore.  */
+      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
+
+    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
+    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
+      /* For functions defined with the syscall_linkage attribute, all
+	 input registers are marked as live at all function exits.  This
+	 prevents the register allocator from using the input registers,
+	 which in turn makes it possible to restart a system call after
+	 an interrupt without having to save/restore the input registers.
+	 This also prevents kernel data from leaking to application code.  */
+      return lookup_attribute ("syscall_linkage",
+	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
+
+    case R_BR (0):
+      /* Conditional return patterns can't represent the use of `b0' as
+         the return address, so we force the value live this way.  */
+      return 1;
+
+    case AR_PFS_REGNUM:
+      /* Likewise for ar.pfs, which is used by br.ret.  */
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Return true if REGNO is used by the frame unwinder.  */
+
+int
+ia64_eh_uses (int regno)
+{
+  unsigned int r;
+
+  if (! reload_completed)
+    return 0;
+
+  if (regno == 0)
+    return 0;
+
+  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
+    if (regno == current_frame_info.r[r]
+       || regno == emitted_frame_related_regs[r])
+      return 1;
+
+  return 0;
+}
+
+/* Return true if this goes in small data/bss.  */
+
+/* ??? We could also support own long data here.  Generating movl/add/ld8
+   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
+   code faster because there is one less load.  This also includes incomplete
+   types which can't go in sdata/sbss.  */
+
+static bool
+ia64_in_small_data_p (const_tree exp)
+{
+  if (TARGET_NO_SDATA)
+    return false;
+
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never small data.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+
+      if (strcmp (section, ".sdata") == 0
+	  || strncmp (section, ".sdata.", 7) == 0
+	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
+	  || strcmp (section, ".sbss") == 0
+	  || strncmp (section, ".sbss.", 6) == 0
+	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= ia64_section_threshold)
+	return true;
+    }
+
+  return false;
+}
+
+/* Output assembly directives for prologue regions.  */
+
+/* The current basic block number.  */
+
+static bool last_block;
+
+/* True if we need a copy_state command at the start of the next block.  */
+
+static bool need_copy_state;
+
+#ifndef MAX_ARTIFICIAL_LABEL_BYTES
+# define MAX_ARTIFICIAL_LABEL_BYTES 30
+#endif
+
+/* Emit a debugging label after a call-frame-related insn.  We'd
+   rather output the label right away, but we'd have to output it
+   after, not before, the instruction, and the instruction has not
+   been output yet.  So we emit the label after the insn, delete it to
+   avoid introducing basic blocks, and mark it as preserved, such that
+   it is still output, given that it is referenced in debug info.  */
+
+static const char *
+ia64_emit_deleted_label_after_insn (rtx insn)
+{
+  char label[MAX_ARTIFICIAL_LABEL_BYTES];
+  rtx lb = gen_label_rtx ();
+  rtx label_insn = emit_label_after (lb, insn);
+
+  LABEL_PRESERVE_P (lb) = 1;
+
+  delete_insn (label_insn);
+
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
+
+  return xstrdup (label);
+}
+
+/* Define the CFA after INSN with the steady-state definition.  */
+
+static void
+ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
+{
+  rtx fp = frame_pointer_needed
+    ? hard_frame_pointer_rtx
+    : stack_pointer_rtx;
+  const char *label = ia64_emit_deleted_label_after_insn (insn);
+
+  if (!frame)
+    return;
+
+  dwarf2out_def_cfa
+    (label, REGNO (fp),
+     ia64_initial_elimination_offset
+     (REGNO (arg_pointer_rtx), REGNO (fp))
+     + ARG_POINTER_CFA_OFFSET (current_function_decl));
+}
+
+/* All we need to do here is avoid a crash in the generic dwarf2
+   processing.  The real CFA definition is set up above.  */
+
+static void
+ia64_dwarf_handle_frame_unspec (const char * ARG_UNUSED (label),
+				rtx ARG_UNUSED (pattern),
+				int index)
+{
+  gcc_assert (index == UNSPECV_ALLOC);
+}
+
+/* The generic dwarf2 frame debug info generator does not define a
+   separate region for the very end of the epilogue, so refrain from
+   doing so in the IA64-specific code as well.  */
+
+#define IA64_CHANGE_CFA_IN_EPILOGUE 0
+
+/* The function emits unwind directives for the start of an epilogue.  */
+
+static void
+process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
+{
+  /* If this isn't the last block of the function, then we need to label the
+     current state, and copy it back in at the start of the next block.  */
+
+  if (!last_block)
+    {
+      if (unwind)
+	fprintf (asm_out_file, "\t.label_state %d\n",
+		 ++cfun->machine->state_num);
+      need_copy_state = true;
+    }
+
+  if (unwind)
+    fprintf (asm_out_file, "\t.restore sp\n");
+  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
+    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
+		       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
+}
+
+/* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
+
+static void
+process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
+			bool unwind, bool frame)
+{
+  rtx dest = SET_DEST (pat);
+  rtx src = SET_SRC (pat);
+
+  if (dest == stack_pointer_rtx)
+    {
+      if (GET_CODE (src) == PLUS)
+	{
+	  rtx op0 = XEXP (src, 0);
+	  rtx op1 = XEXP (src, 1);
+	  
+	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
+	  
+	  if (INTVAL (op1) < 0)
+	    {
+	      gcc_assert (!frame_pointer_needed);
+	      if (unwind)
+		fprintf (asm_out_file,
+			 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
+			 -INTVAL (op1));
+	      ia64_dwarf2out_def_steady_cfa (insn, frame);
+	    }
+	  else
+	    process_epilogue (asm_out_file, insn, unwind, frame);
+	}
+      else
+	{
+	  gcc_assert (src == hard_frame_pointer_rtx);
+	  process_epilogue (asm_out_file, insn, unwind, frame);
+	}
+    }
+  else if (dest == hard_frame_pointer_rtx)
+    {
+      gcc_assert (src == stack_pointer_rtx);
+      gcc_assert (frame_pointer_needed);
+
+      if (unwind)
+	fprintf (asm_out_file, "\t.vframe r%d\n",
+		 ia64_dbx_register_number (REGNO (dest)));
+      ia64_dwarf2out_def_steady_cfa (insn, frame);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* This function processes a SET pattern for REG_CFA_REGISTER.  */
+
+static void
+process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
+{
+  rtx dest = SET_DEST (pat);
+  rtx src = SET_SRC (pat);
+
+  int dest_regno = REGNO (dest);
+  int src_regno = REGNO (src);
+
+  switch (src_regno)
+    {
+    case BR_REG (0):
+      /* Saving return address pointer.  */
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save rp, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    case PR_REG (0):
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save pr, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    case AR_UNAT_REGNUM:
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    case AR_LC_REGNUM:
+      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
+      if (unwind)
+	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
+		 ia64_dbx_register_number (dest_regno));
+      break;
+
+    default:
+      /* Everything else should indicate being stored to memory.  */
+      gcc_unreachable ();
+    }
+}
+
+/* This function processes a SET pattern for REG_CFA_OFFSET.  */
+
+static void
+process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
+{
+  rtx dest = SET_DEST (pat);
+  rtx src = SET_SRC (pat);
+  int src_regno = REGNO (src);
+  const char *saveop;
+  HOST_WIDE_INT off;
+  rtx base;
+
+  gcc_assert (MEM_P (dest));
+  if (GET_CODE (XEXP (dest, 0)) == REG)
+    {
+      base = XEXP (dest, 0);
+      off = 0;
+    }
+  else
+    {
+      gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
+      base = XEXP (XEXP (dest, 0), 0);
+      off = INTVAL (XEXP (XEXP (dest, 0), 1));
+    }
+
+  if (base == hard_frame_pointer_rtx)
+    {
+      saveop = ".savepsp";
+      off = - off;
+    }
+  else
+    {
+      gcc_assert (base == stack_pointer_rtx);
+      saveop = ".savesp";
+    }
+
+  src_regno = REGNO (src);
+  switch (src_regno)
+    {
+    case BR_REG (0):
+      gcc_assert (!current_frame_info.r[reg_save_b0]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case PR_REG (0):
+      gcc_assert (!current_frame_info.r[reg_save_pr]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case AR_LC_REGNUM:
+      gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case AR_PFS_REGNUM:
+      gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case AR_UNAT_REGNUM:
+      gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
+      if (unwind)
+	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
+		 saveop, off);
+      break;
+
+    case GR_REG (4):
+    case GR_REG (5):
+    case GR_REG (6):
+    case GR_REG (7):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.g 0x%x\n",
+		 1 << (src_regno - GR_REG (4)));
+      break;
+
+    case BR_REG (1):
+    case BR_REG (2):
+    case BR_REG (3):
+    case BR_REG (4):
+    case BR_REG (5):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.b 0x%x\n",
+		 1 << (src_regno - BR_REG (1)));
+      break;
+
+    case FR_REG (2):
+    case FR_REG (3):
+    case FR_REG (4):
+    case FR_REG (5):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.f 0x%x\n",
+		 1 << (src_regno - FR_REG (2)));
+      break;
+
+    case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
+    case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
+    case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
+    case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
+      if (unwind)
+	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
+		 1 << (src_regno - FR_REG (12)));
+      break;
+
+    default:
+      /* ??? For some reason we mark other general registers, even those
+	 we can't represent in the unwind info.  Ignore them.  */
+      break;
+    }
+}
+
+/* This function looks at a single insn and emits any directives
+   required to unwind this insn.  */
+
+static void
+ia64_asm_unwind_emit (FILE *asm_out_file, rtx insn)
+{
+  bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
+  bool frame = dwarf2out_do_frame ();
+  rtx note, pat;
+  bool handled_one;
+
+  if (!unwind && !frame)
+    return;
+
+  if (NOTE_INSN_BASIC_BLOCK_P (insn))
+    {
+      last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
+
+      /* Restore unwind state from immediately before the epilogue.  */
+      if (need_copy_state)
+	{
+	  if (unwind)
+	    {
+	      fprintf (asm_out_file, "\t.body\n");
+	      fprintf (asm_out_file, "\t.copy_state %d\n",
+		       cfun->machine->state_num);
+	    }
+	  if (IA64_CHANGE_CFA_IN_EPILOGUE)
+	    ia64_dwarf2out_def_steady_cfa (insn, frame);
+	  need_copy_state = false;
+	}
+    }
+
+  if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
+    return;
+
+  /* Look for the ALLOC insn.  */
+  if (INSN_CODE (insn) == CODE_FOR_alloc)
+    {
+      rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
+      int dest_regno = REGNO (dest);
+
+      /* If this is the final destination for ar.pfs, then this must
+	 be the alloc in the prologue.  */
+      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
+	{
+	  if (unwind)
+	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
+		     ia64_dbx_register_number (dest_regno));
+	}
+      else
+	{
+	  /* This must be an alloc before a sibcall.  We must drop the
+	     old frame info.  The easiest way to drop the old frame
+	     info is to ensure we had a ".restore sp" directive
+	     followed by a new prologue.  If the procedure doesn't
+	     have a memory-stack frame, we'll issue a dummy ".restore
+	     sp" now.  */
+	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
+	    /* if haven't done process_epilogue() yet, do it now */
+	    process_epilogue (asm_out_file, insn, unwind, frame);
+	  if (unwind)
+	    fprintf (asm_out_file, "\t.prologue\n");
+	}
+      return;
+    }
+
+  handled_one = false;
+  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+    switch (REG_NOTE_KIND (note))
+      {
+      case REG_CFA_ADJUST_CFA:
+	pat = XEXP (note, 0);
+	if (pat == NULL)
+	  pat = PATTERN (insn);
+	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
+	handled_one = true;
+	break;
+
+      case REG_CFA_OFFSET:
+	pat = XEXP (note, 0);
+	if (pat == NULL)
+	  pat = PATTERN (insn);
+	process_cfa_offset (asm_out_file, pat, unwind);
+	handled_one = true;
+	break;
+
+      case REG_CFA_REGISTER:
+	pat = XEXP (note, 0);
+	if (pat == NULL)
+	  pat = PATTERN (insn);
+	process_cfa_register (asm_out_file, pat, unwind);
+	handled_one = true;
+	break;
+
+      case REG_FRAME_RELATED_EXPR:
+      case REG_CFA_DEF_CFA:
+      case REG_CFA_EXPRESSION:
+      case REG_CFA_RESTORE:
+      case REG_CFA_SET_VDRAP:
+	/* Not used in the ia64 port.  */
+	gcc_unreachable ();
+
+      default:
+	/* Not a frame-related note.  */
+	break;
+      }
+
+  /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
+     explicit action to take.  No guessing required.  */
+  gcc_assert (handled_one);
+}
+
+/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
+
+static void
+ia64_asm_emit_except_personality (rtx personality)
+{
+  fputs ("\t.personality\t", asm_out_file);
+  output_addr_const (asm_out_file, personality);
+  fputc ('\n', asm_out_file);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
+
+static void
+ia64_asm_init_sections (void)
+{
+  exception_section = get_unnamed_section (0, output_section_asm_op,
+					   "\t.handlerdata");
+}
+
+/* Implement TARGET_DEBUG_UNWIND_INFO.  */
+
+static enum unwind_info_type
+ia64_debug_unwind_info (void)
+{
+  return UI_TARGET;
+}
+
+/* Implement TARGET_EXCEPT_UNWIND_INFO.  */
+
+static enum unwind_info_type
+ia64_except_unwind_info (struct gcc_options *opts)
+{
+  /* Honor the --enable-sjlj-exceptions configure switch.  */
+#ifdef CONFIG_UNWIND_EXCEPTIONS
+  if (CONFIG_UNWIND_EXCEPTIONS)
+    return UI_SJLJ;
+#endif
+
+  /* For simplicity elsewhere in this file, indicate that all unwind
+     info is disabled if we're not emitting unwind tables.  */
+  if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
+    return UI_NONE;
+
+  return UI_TARGET;
+}
+
+enum ia64_builtins
+{
+  IA64_BUILTIN_BSP,
+  IA64_BUILTIN_COPYSIGNQ,
+  IA64_BUILTIN_FABSQ,
+  IA64_BUILTIN_FLUSHRS,
+  IA64_BUILTIN_INFQ,
+  IA64_BUILTIN_HUGE_VALQ,
+  IA64_BUILTIN_max
+};
+
+static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
+
+void
+ia64_init_builtins (void)
+{
+  tree fpreg_type;
+  tree float80_type;
+  tree decl;
+
+  /* The __fpreg type.  */
+  fpreg_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (fpreg_type) = 82;
+  layout_type (fpreg_type);
+  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
+
+  /* The __float80 type.  */
+  float80_type = make_node (REAL_TYPE);
+  TYPE_PRECISION (float80_type) = 80;
+  layout_type (float80_type);
+  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
+
+  /* The __float128 type.  */
+  if (!TARGET_HPUX)
+    {
+      tree ftype;
+      tree float128_type = make_node (REAL_TYPE);
+
+      TYPE_PRECISION (float128_type) = 128;
+      layout_type (float128_type);
+      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
+
+      /* TFmode support builtins.  */
+      ftype = build_function_type (float128_type, void_list_node);
+      decl = add_builtin_function ("__builtin_infq", ftype,
+				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      ia64_builtins[IA64_BUILTIN_INFQ] = decl;
+
+      decl = add_builtin_function ("__builtin_huge_valq", ftype,
+				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
+
+      ftype = build_function_type_list (float128_type,
+					float128_type,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_fabsq", ftype,
+				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
+				   "__fabstf2", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
+
+      ftype = build_function_type_list (float128_type,
+					float128_type,
+					float128_type,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_copysignq", ftype,
+				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
+				   "__copysigntf3", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
+    }
+  else
+    /* Under HPUX, this is a synonym for "long double".  */
+    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+					       "__float128");
+
+  /* Fwrite on VMS is non-standard.  */
+  if (TARGET_ABI_OPEN_VMS)
+    {
+      implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
+      implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
+    }
+
+#define def_builtin(name, type, code)					\
+  add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
+		       NULL, NULL_TREE)
+
+  decl = def_builtin ("__builtin_ia64_bsp",
+	       build_function_type (ptr_type_node, void_list_node),
+	       IA64_BUILTIN_BSP);
+  ia64_builtins[IA64_BUILTIN_BSP] = decl;
+
+  decl = def_builtin ("__builtin_ia64_flushrs",
+	       build_function_type (void_type_node, void_list_node),
+	       IA64_BUILTIN_FLUSHRS);
+  ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
+
+#undef def_builtin
+
+  if (TARGET_HPUX)
+    {
+      if (built_in_decls [BUILT_IN_FINITE])
+	set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
+	  "_Isfinite");
+      if (built_in_decls [BUILT_IN_FINITEF])
+	set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
+	  "_Isfinitef");
+      if (built_in_decls [BUILT_IN_FINITEL])
+	set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
+	  "_Isfinitef128");
+    }
+}
+
+rtx
+ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case IA64_BUILTIN_BSP:
+      if (! target || ! register_operand (target, DImode))
+	target = gen_reg_rtx (DImode);
+      emit_insn (gen_bsp_value (target));
+#ifdef POINTERS_EXTEND_UNSIGNED
+      target = convert_memory_address (ptr_mode, target);
+#endif
+      return target;
+
+    case IA64_BUILTIN_FLUSHRS:
+      emit_insn (gen_flushrs ());
+      return const0_rtx;
+
+    case IA64_BUILTIN_INFQ:
+    case IA64_BUILTIN_HUGE_VALQ:
+      {
+        enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
+	REAL_VALUE_TYPE inf;
+	rtx tmp;
+
+	real_inf (&inf);
+	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
+
+	tmp = validize_mem (force_const_mem (target_mode, tmp));
+
+	if (target == 0)
+	  target = gen_reg_rtx (target_mode);
+
+	emit_move_insn (target, tmp);
+	return target;
+      }
+
+    case IA64_BUILTIN_FABSQ:
+    case IA64_BUILTIN_COPYSIGNQ:
+      return expand_call (exp, target, ignore);
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return NULL_RTX;
+}
+
+/* Return the ia64 builtin for CODE.  */
+
+static tree
+ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= IA64_BUILTIN_max)
+    return error_mark_node;
+
+  return ia64_builtins[code];
+}
+
+/* For the HP-UX IA64 aggregate parameters are passed stored in the
+   most significant bits of the stack slot.  */
+
+enum direction
+ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
+{
+   /* Exception to normal case for structures/unions/etc.  */
+
+   if (type && AGGREGATE_TYPE_P (type)
+       && int_size_in_bytes (type) < UNITS_PER_WORD)
+     return upward;
+
+   /* Fall back to the default.  */
+   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+/* Emit text to declare externally defined variables and functions, because
+   the Intel assembler does not support undefined externals.  */
+
+void
+ia64_asm_output_external (FILE *file, tree decl, const char *name)
+{
+  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+     set in order to avoid putting out names that are never really
+     used. */
+  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+    {
+      /* maybe_assemble_visibility will return 1 if the assembler
+	 visibility directive is output.  */
+      int need_visibility = ((*targetm.binds_local_p) (decl)
+			     && maybe_assemble_visibility (decl));
+
+#ifdef DO_CRTL_NAMES
+      DO_CRTL_NAMES;
+#endif
+
+      /* GNU as does not need anything here, but the HP linker does
+	 need something for external functions.  */
+      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
+	  && TREE_CODE (decl) == FUNCTION_DECL)
+	  (*targetm.asm_out.globalize_decl_name) (file, decl);
+      else if (need_visibility && !TARGET_GNU_AS)
+	(*targetm.asm_out.globalize_label) (file, name);
+    }
+}
+
+/* Set SImode div/mod functions, init_integral_libfuncs only initializes
+   modes of word_mode and larger.  Rename the TFmode libfuncs using the
+   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
+   backward compatibility. */
+
+static void
+ia64_init_libfuncs (void)
+{
+  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
+  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
+  set_optab_libfunc (smod_optab, SImode, "__modsi3");
+  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
+
+  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
+  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
+  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
+  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
+  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
+
+  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
+  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
+  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
+  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
+  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
+  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
+
+  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
+  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
+  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
+  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
+  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
+
+  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
+  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
+  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
+  /* HP-UX 11.23 libc does not have a function for unsigned
+     SImode-to-TFmode conversion.  */
+  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
+}
+
+/* Rename all the TFmode libfuncs using the HPUX conventions.  */
+
+static void
+ia64_hpux_init_libfuncs (void)
+{
+  ia64_init_libfuncs ();
+
+  /* The HP SI millicode division and mod functions expect DI arguments.
+     By turning them off completely we avoid using both libgcc and the
+     non-standard millicode routines and use the HP DI millicode routines
+     instead.  */
+
+  set_optab_libfunc (sdiv_optab, SImode, 0);
+  set_optab_libfunc (udiv_optab, SImode, 0);
+  set_optab_libfunc (smod_optab, SImode, 0);
+  set_optab_libfunc (umod_optab, SImode, 0);
+
+  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
+  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
+  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
+  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
+
+  /* HP-UX libc has TF min/max/abs routines in it.  */
+  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
+  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
+  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
+
+  /* ia64_expand_compare uses this.  */
+  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
+
+  /* These should never be used.  */
+  set_optab_libfunc (eq_optab, TFmode, 0);
+  set_optab_libfunc (ne_optab, TFmode, 0);
+  set_optab_libfunc (gt_optab, TFmode, 0);
+  set_optab_libfunc (ge_optab, TFmode, 0);
+  set_optab_libfunc (lt_optab, TFmode, 0);
+  set_optab_libfunc (le_optab, TFmode, 0);
+}
+
+/* Rename the division and modulus functions in VMS.  */
+
+static void
+ia64_vms_init_libfuncs (void)
+{
+  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
+  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
+  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
+  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
+  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
+  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
+  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
+  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
+  abort_libfunc = init_one_libfunc ("decc$abort");
+  memcmp_libfunc = init_one_libfunc ("decc$memcmp");
+#ifdef MEM_LIBFUNCS_INIT
+  MEM_LIBFUNCS_INIT;
+#endif
+}
+
+/* Rename the TFmode libfuncs available from soft-fp in glibc using
+   the HPUX conventions.  */
+
+static void
+ia64_sysv4_init_libfuncs (void)
+{
+  ia64_init_libfuncs ();
+
+  /* These functions are not part of the HPUX TFmode interface.  We
+     use them instead of _U_Qfcmp, which doesn't work the way we
+     expect.  */
+  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
+  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
+  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
+  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
+  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
+  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
+
+  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
+     glibc doesn't have them.  */
+}
+
+/* Use soft-fp.  */
+
+static void
+ia64_soft_fp_init_libfuncs (void)
+{
+}
+
+static bool
+ia64_vms_valid_pointer_mode (enum machine_mode mode)
+{
+  return (mode == SImode || mode == DImode);
+}
+
+/* For HPUX, it is illegal to have relocations in shared segments.  */
+
+static int
+ia64_hpux_reloc_rw_mask (void)
+{
+  return 3;
+}
+
+/* For others, relax this so that relocations to local data goes in
+   read-only segments, but we still cannot allow global relocations
+   in read-only segments.  */
+
+static int
+ia64_reloc_rw_mask (void)
+{
+  return flag_pic ? 3 : 2;
+}
+
+/* Return the section to use for X.  The only special thing we do here
+   is to honor small data.  */
+
+static section *
+ia64_select_rtx_section (enum machine_mode mode, rtx x,
+			 unsigned HOST_WIDE_INT align)
+{
+  if (GET_MODE_SIZE (mode) > 0
+      && GET_MODE_SIZE (mode) <= ia64_section_threshold
+      && !TARGET_NO_SDATA)
+    return sdata_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+static unsigned int
+ia64_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = 0;
+
+  if (strcmp (name, ".sdata") == 0
+      || strncmp (name, ".sdata.", 7) == 0
+      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
+      || strncmp (name, ".sdata2.", 8) == 0
+      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
+      || strcmp (name, ".sbss") == 0
+      || strncmp (name, ".sbss.", 6) == 0
+      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
+    flags = SECTION_SMALL;
+
+#if TARGET_ABI_OPEN_VMS
+  if (decl && DECL_ATTRIBUTES (decl)
+      && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
+    flags |= SECTION_VMS_OVERLAY;
+#endif
+
+  flags |= default_section_type_flags (decl, name, reloc);
+  return flags;
+}
+
+/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
+   structure type and that the address of that type should be passed
+   in out0, rather than in r8.  */
+
+static bool
+ia64_struct_retval_addr_is_first_parm_p (tree fntype)
+{
+  tree ret_type = TREE_TYPE (fntype);
+
+  /* The Itanium C++ ABI requires that out0, rather than r8, be used
+     as the structure return address parameter, if the return value
+     type has a non-trivial copy constructor or destructor.  It is not
+     clear if this same convention should be used for other
+     programming languages.  Until G++ 3.4, we incorrectly used r8 for
+     these return values.  */
+  return (abi_version_at_least (2)
+	  && ret_type
+	  && TYPE_MODE (ret_type) == BLKmode 
+	  && TREE_ADDRESSABLE (ret_type)
+	  && strcmp (lang_hooks.name, "GNU C++") == 0);
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at
+   *(*this + vcall_offset) should be added to THIS.  */
+
+static void
+ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx this_rtx, insn, funexp;
+  unsigned int this_parmno;
+  unsigned int this_regno;
+  rtx delta_rtx;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  /* Set things up as ia64_expand_prologue might.  */
+  last_scratch_gr_reg = 15;
+
+  memset (&current_frame_info, 0, sizeof (current_frame_info));
+  current_frame_info.spill_cfa_off = -16;
+  current_frame_info.n_input_regs = 1;
+  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Figure out whether "this" will be the first parameter (the
+     typical case) or the second parameter (as happens when the
+     virtual function returns certain class objects).  */
+  this_parmno
+    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
+       ? 1 : 0);
+  this_regno = IN_REG (this_parmno);
+  if (!TARGET_REG_NAMES)
+    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
+
+  this_rtx = gen_rtx_REG (Pmode, this_regno);
+
+  /* Apply the constant offset, if required.  */
+  delta_rtx = GEN_INT (delta);
+  if (TARGET_ILP32)
+    {
+      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
+      REG_POINTER (tmp) = 1;
+      if (delta && satisfies_constraint_I (delta_rtx))
+	{
+	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
+	  delta = 0;
+	}
+      else
+	emit_insn (gen_ptr_extend (this_rtx, tmp));
+    }
+  if (delta)
+    {
+      if (!satisfies_constraint_I (delta_rtx))
+	{
+	  rtx tmp = gen_rtx_REG (Pmode, 2);
+	  emit_move_insn (tmp, delta_rtx);
+	  delta_rtx = tmp;
+	}
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
+    }
+
+  /* Apply the offset from the vtable, if required.  */
+  if (vcall_offset)
+    {
+      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+      rtx tmp = gen_rtx_REG (Pmode, 2);
+
+      if (TARGET_ILP32)
+	{
+	  rtx t = gen_rtx_REG (ptr_mode, 2);
+	  REG_POINTER (t) = 1;
+	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
+	  if (satisfies_constraint_I (vcall_offset_rtx))
+	    {
+	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
+	      vcall_offset = 0;
+	    }
+	  else
+	    emit_insn (gen_ptr_extend (tmp, t));
+	}
+      else
+	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      if (vcall_offset)
+	{
+	  if (!satisfies_constraint_J (vcall_offset_rtx))
+	    {
+	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
+	      emit_move_insn (tmp2, vcall_offset_rtx);
+	      vcall_offset_rtx = tmp2;
+	    }
+	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
+	}
+
+      if (TARGET_ILP32)
+	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
+      else
+	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
+  insn = get_last_insn ();
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Code generation for calls relies on splitting.  */
+  reload_completed = 1;
+  epilogue_completed = 1;
+  try_split (PATTERN (insn), insn, 0);
+
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+
+  insn_locators_alloc ();
+  emit_all_insn_group_barriers (NULL);
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+ia64_struct_value_rtx (tree fntype,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ABI_OPEN_VMS ||
+      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
+    return NULL_RTX;
+  return gen_rtx_REG (Pmode, GR_REG (8));
+}
+
+static bool
+ia64_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode:
+      return true;
+
+    case SFmode:
+    case DFmode:
+    case XFmode:
+    case RFmode:
+      return true;
+
+    case TFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+static bool
+ia64_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V8QImode:
+    case V4HImode:
+    case V2SImode:
+      return true;
+
+    case V2SFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement the FUNCTION_PROFILER macro.  */
+
+void
+ia64_output_function_profiler (FILE *file, int labelno)
+{
+  bool indirect_call;
+
+  /* If the function needs a static chain and the static chain
+     register is r15, we use an indirect call so as to bypass
+     the PLT stub in case the executable is dynamically linked,
+     because the stub clobbers r15 as per 5.3.6 of the psABI.
+     We don't need to do that in non canonical PIC mode.  */
+
+  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
+    {
+      gcc_assert (STATIC_CHAIN_REGNUM == 15);
+      indirect_call = true;
+    }
+  else
+    indirect_call = false;
+
+  if (TARGET_GNU_AS)
+    fputs ("\t.prologue 4, r40\n", file);
+  else
+    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
+  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
+
+  if (NO_PROFILE_COUNTERS)
+    fputs ("\tmov out3 = r0\n", file);
+  else
+    {
+      char buf[20];
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+
+      if (TARGET_AUTO_PIC)
+	fputs ("\tmovl out3 = @gprel(", file);
+      else
+	fputs ("\taddl out3 = @ltoff(", file);
+      assemble_name (file, buf);
+      if (TARGET_AUTO_PIC)
+	fputs (")\n", file);
+      else
+	fputs ("), r1\n", file);
+    }
+
+  if (indirect_call)
+    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
+  fputs ("\t;;\n", file);
+
+  fputs ("\t.save rp, r42\n", file);
+  fputs ("\tmov out2 = b0\n", file);
+  if (indirect_call)
+    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
+  fputs ("\t.body\n", file);
+  fputs ("\tmov out1 = r1\n", file);
+  if (indirect_call)
+    {
+      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
+      fputs ("\tmov b6 = r16\n", file);
+      fputs ("\tld8 r1 = [r14]\n", file);
+      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
+    }
+  else
+    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
+}
+
+static GTY(()) rtx mcount_func_rtx;
+static rtx
+gen_mcount_func_rtx (void)
+{
+  if (!mcount_func_rtx)
+    mcount_func_rtx = init_one_libfunc ("_mcount");
+  return mcount_func_rtx;
+}
+
+void
+ia64_profile_hook (int labelno)
+{
+  rtx label, ip;
+
+  if (NO_PROFILE_COUNTERS)
+    label = const0_rtx;
+  else
+    {
+      char buf[30];
+      const char *label_name;
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
+      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
+      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
+    }
+  ip = gen_reg_rtx (Pmode);
+  emit_insn (gen_ip_value (ip));
+  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
+                     VOIDmode, 3,
+		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
+		     ip, Pmode,
+		     label, Pmode);
+}
+
+/* Return the mangling of TYPE if it is an extended fundamental type.  */
+
+static const char *
+ia64_mangle_type (const_tree type)
+{
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+    return NULL;
+
+  /* On HP-UX, "long double" is mangled as "e" so __float128 is
+     mangled as "e".  */
+  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
+    return "g";
+  /* On HP-UX, "e" is not available as a mangling of __float80 so use
+     an extended mangling.  Elsewhere, "e" is available since long
+     double is 80 bits.  */
+  if (TYPE_MODE (type) == XFmode)
+    return TARGET_HPUX ? "u9__float80" : "e";
+  if (TYPE_MODE (type) == RFmode)
+    return "u7__fpreg";
+  return NULL;
+}
+
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+static const char *
+ia64_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  /* Reject nontrivial conversion to or from __fpreg.  */
+  if (TYPE_MODE (fromtype) == RFmode
+      && TYPE_MODE (totype) != RFmode
+      && TYPE_MODE (totype) != VOIDmode)
+    return N_("invalid conversion from %<__fpreg%>");
+  if (TYPE_MODE (totype) == RFmode
+      && TYPE_MODE (fromtype) != RFmode)
+    return N_("invalid conversion to %<__fpreg%>");
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+static const char *
+ia64_invalid_unary_op (int op, const_tree type)
+{
+  /* Reject operations on __fpreg other than unary + or &.  */
+  if (TYPE_MODE (type) == RFmode
+      && op != CONVERT_EXPR
+      && op != ADDR_EXPR)
+    return N_("invalid operation on %<__fpreg%>");
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+static const char *
+ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
+{
+  /* Reject operations on __fpreg.  */
+  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
+    return N_("invalid operation on %<__fpreg%>");
+  return NULL;
+}
+
+/* Implement TARGET_OPTION_DEFAULT_PARAMS.  */
+static void
+ia64_option_default_params (void)
+{
+  /* Let the scheduler form additional regions.  */
+  set_default_param_value (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS, 2);
+
+  /* Set the default values for cache-related parameters.  */
+  set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6);
+  set_default_param_value (PARAM_L1_CACHE_LINE_SIZE, 32);
+
+  set_default_param_value (PARAM_SCHED_MEM_TRUE_DEP_COST, 4);
+}
+
+/* HP-UX version_id attribute.
+   For object foo, if the version_id is set to 1234 put out an alias
+   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
+   other than an alias statement because it is an illegal symbol name.  */
+
+static tree
+ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
+                                 tree name ATTRIBUTE_UNUSED,
+                                 tree args,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool *no_add_attrs)
+{
+  tree arg = TREE_VALUE (args);
+
+  if (TREE_CODE (arg) != STRING_CST)
+    {
+      error("version attribute is not a string");
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+  return NULL_TREE;
+}
+
+/* Target hook for c_mode_for_suffix.  */
+
+static enum machine_mode
+ia64_c_mode_for_suffix (char suffix)
+{
+  if (suffix == 'q')
+    return TFmode;
+  if (suffix == 'w')
+    return XFmode;
+
+  return VOIDmode;
+}
+
+static enum machine_mode
+ia64_promote_function_mode (const_tree type,
+			    enum machine_mode mode,
+			    int *punsignedp,
+			    const_tree funtype,
+			    int for_return)
+{
+  /* Special processing required for OpenVMS ...  */
+
+  if (!TARGET_ABI_OPEN_VMS)
+    return default_promote_function_mode(type, mode, punsignedp, funtype,
+					 for_return);
+
+  /* HP OpenVMS Calling Standard dated June, 2004, that describes
+     HP OpenVMS I64 Version 8.2EFT,
+     chapter 4 "OpenVMS I64 Conventions"
+     section 4.7 "Procedure Linkage"
+     subsection 4.7.5.2, "Normal Register Parameters"
+
+     "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
+     values passed in registers are zero-filled; signed integral values as
+     well as unsigned 32-bit integral values are sign-extended to 64 bits.
+     For all other types passed in the general registers, unused bits are
+     undefined."  */
+
+  if (!AGGREGATE_TYPE_P (type)
+      && GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+    {
+      if (mode == SImode)
+	*punsignedp = 0;
+      return DImode;
+    }
+  else
+    return promote_mode (type, mode, punsignedp);
+}
+   
+static GTY(()) rtx ia64_dconst_0_5_rtx;
+
+rtx
+ia64_dconst_0_5 (void)
+{
+  if (! ia64_dconst_0_5_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_string (&rv, "0.5");
+      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
+    }
+  return ia64_dconst_0_5_rtx;
+}
+
+static GTY(()) rtx ia64_dconst_0_375_rtx;
+
+rtx
+ia64_dconst_0_375 (void)
+{
+  if (! ia64_dconst_0_375_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_string (&rv, "0.375");
+      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
+    }
+  return ia64_dconst_0_375_rtx;
+}
+
+static enum machine_mode
+ia64_get_reg_raw_mode (int regno)
+{
+  if (FR_REGNO_P (regno))
+    return XFmode;
+  return default_get_reg_raw_mode(regno);
+}
+
+/* Always default to .text section until HP-UX linker is fixed.  */
+
+ATTRIBUTE_UNUSED static section *
+ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
+			    enum node_frequency freq ATTRIBUTE_UNUSED,
+			    bool startup ATTRIBUTE_UNUSED,
+			    bool exit ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+
+#include "gt-ia64.h"
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
new file mode 100644
index 000000000..8e6d298aa
--- /dev/null
+++ b/gcc/config/ia64/ia64.h
@@ -0,0 +1,1823 @@
+/* Definitions of target machine GNU compiler.  IA-64 version.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com> and
+   		  David Mosberger <davidm@hpl.hp.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* ??? Look at ABI group documents for list of preprocessor macros and
+   other features required for ABI compliance.  */
+
+/* ??? Functions containing a non-local goto target save many registers.  Why?
+   See for instance execute/920428-2.c.  */
+
+
+/* Run-time target specifications */
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+do {						\
+	builtin_assert("cpu=ia64");		\
+	builtin_assert("machine=ia64");		\
+	builtin_define("__ia64");		\
+	builtin_define("__ia64__");		\
+	builtin_define("__itanium__");		\
+	if (TARGET_BIG_ENDIAN)			\
+	  builtin_define("__BIG_ENDIAN__");	\
+} while (0)
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS \
+  { "asm_extra", ASM_EXTRA_SPEC }, \
+  SUBTARGET_EXTRA_SPECS
+
+#define CC1_SPEC "%(cc1_cpu) "
+
+#define ASM_EXTRA_SPEC ""
+
+/* Variables which are this size or smaller are put in the sdata/sbss
+   sections.  */
+extern unsigned int ia64_section_threshold;
+
+/* If the assembler supports thread-local storage, assume that the
+   system does as well.  If a particular target system has an
+   assembler that supports TLS -- but the rest of the system does not
+   support TLS -- that system should explicit define TARGET_HAVE_TLS
+   to false in its own configuration file.  */
+#if !defined(TARGET_HAVE_TLS) && defined(HAVE_AS_TLS)
+#define TARGET_HAVE_TLS true
+#endif
+
+#define TARGET_TLS14		(ia64_tls_size == 14)
+#define TARGET_TLS22		(ia64_tls_size == 22)
+#define TARGET_TLS64		(ia64_tls_size == 64)
+
+#define TARGET_HPUX		0
+#define TARGET_HPUX_LD		0
+
+#define TARGET_ABI_OPEN_VMS 0
+
+#ifndef TARGET_ILP32
+#define TARGET_ILP32 0
+#endif
+
+#ifndef HAVE_AS_LTOFFX_LDXMOV_RELOCS
+#define HAVE_AS_LTOFFX_LDXMOV_RELOCS 0
+#endif
+
+/* Values for TARGET_INLINE_FLOAT_DIV, TARGET_INLINE_INT_DIV, and
+   TARGET_INLINE_SQRT.  */
+
+enum ia64_inline_type
+{
+  INL_NO = 0,
+  INL_MIN_LAT = 1,
+  INL_MAX_THR = 2
+};
+
+/* Default target_flags if no switches are specified  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_DWARF2_ASM)
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+/* Which processor to schedule for. The cpu attribute defines a list
+   that mirrors this list, so changes to ia64.md must be made at the
+   same time.  */
+
+enum processor_type
+{
+  PROCESSOR_ITANIUM,			/* Original Itanium.  */
+  PROCESSOR_ITANIUM2,
+  PROCESSOR_max
+};
+
+extern enum processor_type ia64_tune;
+
+/* Driver configuration */
+
+/* A C string constant that tells the GCC driver program options to pass to
+   `cc1'.  It can also specify how to translate options you give to GCC into
+   options for GCC to pass to the `cc1'.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{G*}"
+
+/* A C string constant that tells the GCC driver program options to pass to
+   `cc1plus'.  It can also specify how to translate options you give to GCC
+   into options for GCC to pass to the `cc1plus'.  */
+
+/* #define CC1PLUS_SPEC "" */
+
+/* Storage Layout */
+
+/* Define this macro to have the value 1 if the most significant bit in a byte
+   has the lowest number; otherwise define it to have the value zero.  */
+
+#define BITS_BIG_ENDIAN 0
+
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this macro to have the value 1 if, in a multiword object, the most
+   significant word has the lowest number.  */
+
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define UNITS_PER_WORD 8
+
+#define POINTER_SIZE (TARGET_ILP32 ? 32 : 64)
+
+/* A C expression whose value is zero if pointers that need to be extended
+   from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and one if
+   they are zero-extended and negative one if there is a ptr_extend operation.
+
+   You need not define this macro if the `POINTER_SIZE' is equal to the width
+   of `Pmode'.  */
+/* Need this for 32-bit pointers, see hpux.h for setting it.  */
+/* #define POINTERS_EXTEND_UNSIGNED */
+
+/* A macro to update MODE and UNSIGNEDP when an object whose type is TYPE and
+   which has the specified mode and signedness is to be stored in a register.
+   This macro is only called when TYPE is a scalar type.  */
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)				\
+do									\
+  {									\
+    if (GET_MODE_CLASS (MODE) == MODE_INT				\
+	&& GET_MODE_SIZE (MODE) < 4)					\
+      (MODE) = SImode;							\
+  }									\
+while (0)
+
+#define PARM_BOUNDARY 64
+
+/* Define this macro if you wish to preserve a certain alignment for the stack
+   pointer.  The definition is a C expression for the desired alignment
+   (measured in bits).  */
+
+#define STACK_BOUNDARY 128
+
+/* Align frames on double word boundaries */
+#ifndef IA64_STACK_ALIGN
+#define IA64_STACK_ALIGN(LOC) (((LOC) + 15) & ~15)
+#endif
+
+#define FUNCTION_BOUNDARY 128
+
+/* Optional x86 80-bit float, quad-precision 128-bit float, and quad-word
+   128-bit integers all require 128-bit alignment.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* If defined, a C expression to compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that the object
+   would ordinarily have.  The value of this macro is used instead of that
+   alignment to align the object.  */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a constant that
+   is being placed in memory.  CONSTANT is the constant and ALIGN is the
+   alignment that the object would ordinarily have.  The value of this macro is
+   used instead of that alignment to align the object.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+/* Define this if you wish to imitate the way many other C compilers handle
+   alignment of bitfields and the structures that contain them.
+   The behavior is that the type written for a bit-field (`int', `short', or
+   other integer type) imposes an alignment for the entire structure, as if the
+   structure really did contain an ordinary field of that type.  In addition,
+   the bit-field is placed within the structure so that it would fit within such
+   a field, not crossing a boundary for it.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  */
+
+/* Allow pairs of registers to be used, which is the intent of the default.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode)
+
+/* By default, the C++ compiler will use function addresses in the
+   vtable entries.  Setting this nonzero tells the compiler to use
+   function descriptors instead.  The value of this macro says how
+   many words wide the descriptor is (normally 2).  It is assumed
+   that the address of a function descriptor may be treated as a
+   pointer to a function.
+
+   For reasons known only to HP, the vtable entries (as opposed to
+   normal function descriptors) are 16 bytes wide in 32-bit mode as
+   well, even though the 3rd and 4th words are unused.  */
+#define TARGET_VTABLE_USES_DESCRIPTORS (TARGET_ILP32 ? 4 : 2)
+
+/* Due to silliness in the HPUX linker, vtable entries must be
+   8-byte aligned even in 32-bit mode.  Rather than create multiple
+   ABIs, force this restriction on everyone else too.  */
+#define TARGET_VTABLE_ENTRY_ALIGN  64
+
+/* Due to the above, we need extra padding for the data entries below 0
+   to retain the alignment of the descriptors.  */
+#define TARGET_VTABLE_DATA_ENTRY_DISTANCE (TARGET_ILP32 ? 2 : 1)
+
+/* Layout of Source Language Data Types */
+
+#define INT_TYPE_SIZE 32
+
+#define SHORT_TYPE_SIZE 16
+
+#define LONG_TYPE_SIZE (TARGET_ILP32 ? 32 : 64)
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+/* long double is XFmode normally, and TFmode for HPUX.  It should be
+   TFmode for VMS as well but we only support up to DFmode now.  */
+#define LONG_DOUBLE_TYPE_SIZE \
+  (TARGET_HPUX ? 128 \
+   : TARGET_ABI_OPEN_VMS ? 64 \
+   : 80)
+
+/* We always want the XFmode operations from libgcc2.c, except on VMS
+   where this yields references to unimplemented "insns".  */
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE  (TARGET_ABI_OPEN_VMS ? 64 : 80)
+
+
+/* On HP-UX, we use the l suffix for TFmode in libgcc2.c.  */
+#define LIBGCC2_TF_CEXT l
+
+#define DEFAULT_SIGNED_CHAR 1
+
+/* A C expression for a string describing the name of the data type to use for
+   size values.  The typedef name `size_t' is defined using the contents of the
+   string.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define SIZE_TYPE */
+
+/* A C expression for a string describing the name of the data type to use for
+   the result of subtracting two pointers.  The typedef name `ptrdiff_t' is
+   defined using the contents of the string.  See `SIZE_TYPE' above for more
+   information.  */
+/* ??? Needs to be defined for P64 code.  */
+/* #define PTRDIFF_TYPE */
+
+/* A C expression for a string describing the name of the data type to use for
+   wide characters.  The typedef name `wchar_t' is defined using the contents
+   of the string.  See `SIZE_TYPE' above for more information.  */
+/* #define WCHAR_TYPE */
+
+/* A C expression for the size in bits of the data type for wide characters.
+   This is used in `cpp', which cannot make use of `WCHAR_TYPE'.  */
+/* #define WCHAR_TYPE_SIZE */
+
+
+/* Register Basics */
+
+/* Number of hardware registers known to the compiler.
+   We have 128 general registers, 128 floating point registers,
+   64 predicate registers, 8 branch registers, one frame pointer,
+   and several "application" registers.  */
+
+#define FIRST_PSEUDO_REGISTER 334
+
+/* Ranges for the various kinds of registers.  */
+#define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3)
+#define GR_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 127)
+#define FR_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 255)
+#define FP_REGNO_P(REGNO) ((REGNO) >= 128 && (REGNO) <= 254 && (REGNO) != 159)
+#define PR_REGNO_P(REGNO) ((REGNO) >= 256 && (REGNO) <= 319)
+#define BR_REGNO_P(REGNO) ((REGNO) >= 320 && (REGNO) <= 327)
+#define GENERAL_REGNO_P(REGNO) \
+  (GR_REGNO_P (REGNO) || (REGNO) == FRAME_POINTER_REGNUM)
+
+#define GR_REG(REGNO) ((REGNO) + 0)
+#define FR_REG(REGNO) ((REGNO) + 128)
+#define PR_REG(REGNO) ((REGNO) + 256)
+#define BR_REG(REGNO) ((REGNO) + 320)
+#define OUT_REG(REGNO) ((REGNO) + 120)
+#define IN_REG(REGNO) ((REGNO) + 112)
+#define LOC_REG(REGNO) ((REGNO) + 32)
+
+#define AR_CCV_REGNUM	329
+#define AR_UNAT_REGNUM  330
+#define AR_PFS_REGNUM	331
+#define AR_LC_REGNUM	332
+#define AR_EC_REGNUM	333
+
+#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7))
+#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79))
+#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7))
+
+#define AR_M_REGNO_P(REGNO) ((REGNO) == AR_CCV_REGNUM \
+			     || (REGNO) == AR_UNAT_REGNUM)
+#define AR_I_REGNO_P(REGNO) ((REGNO) >= AR_PFS_REGNUM \
+			     && (REGNO) < FIRST_PSEUDO_REGISTER)
+#define AR_REGNO_P(REGNO) ((REGNO) >= AR_CCV_REGNUM \
+			   && (REGNO) < FIRST_PSEUDO_REGISTER)
+
+
+/* ??? Don't really need two sets of macros.  I like this one better because
+   it is less typing.  */
+#define R_GR(REGNO) GR_REG (REGNO)
+#define R_FR(REGNO) FR_REG (REGNO)
+#define R_PR(REGNO) PR_REG (REGNO)
+#define R_BR(REGNO) BR_REG (REGNO)
+
+/* An initializer that says which registers are used for fixed purposes all
+   throughout the compiled code and are therefore not available for general
+   allocation.
+
+   r0: constant 0
+   r1: global pointer (gp)
+   r12: stack pointer (sp)
+   r13: thread pointer (tp)
+   f0: constant 0.0
+   f1: constant 1.0
+   p0: constant true
+   fp: eliminable frame pointer */
+
+/* The last 16 stacked regs are reserved for the 8 input and 8 output
+   registers.  */
+
+#define FIXED_REGISTERS \
+{ /* General registers.  */				\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Floating-point registers.  */			\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Predicate registers.  */				\
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  0, 0, 0, 0, 0, 0, 0, 0,				\
+  /*FP CCV UNAT PFS LC EC */				\
+     1,  1,   1,  1, 1, 1				\
+ }
+
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered
+   (in general) by function calls as well as for fixed registers.  This
+   macro therefore identifies the registers that are not available for
+   general allocation of values that must live across function calls.  */
+
+#define CALL_USED_REGISTERS \
+{ /* General registers.  */				\
+  1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Floating-point registers.  */			\
+  1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Predicate registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1,				\
+  /*FP CCV UNAT PFS LC EC */				\
+     1,  1,   1,  1, 1, 1				\
+}
+
+/* Like `CALL_USED_REGISTERS' but used to overcome a historical
+   problem which makes CALL_USED_REGISTERS *always* include
+   all the FIXED_REGISTERS.  Until this problem has been
+   resolved this macro can be used to overcome this situation.
+   In particular, block_propagate() requires this list
+   be accurate, or we can remove registers which should be live.
+   This macro is used in regs_invalidated_by_call.  */
+
+#define CALL_REALLY_USED_REGISTERS \
+{ /* General registers.  */				\
+  0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Floating-point registers.  */			\
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  /* Predicate registers.  */				\
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	\
+  /* Branch registers.  */				\
+  1, 0, 0, 0, 0, 0, 1, 1,				\
+  /*FP CCV UNAT PFS LC EC */				\
+     0,  1,   0,  1, 0, 0				\
+}
+
+
+/* Define this macro if the target machine has register windows.  This C
+   expression returns the register number as seen by the called function
+   corresponding to the register number OUT as seen by the calling function.
+   Return OUT if register number OUT is not an outbound register.  */
+
+#define INCOMING_REGNO(OUT) \
+  ((unsigned) ((OUT) - OUT_REG (0)) < 8 ? IN_REG ((OUT) - OUT_REG (0)) : (OUT))
+
+/* Define this macro if the target machine has register windows.  This C
+   expression returns the register number as seen by the calling function
+   corresponding to the register number IN as seen by the called function.
+   Return IN if register number IN is not an inbound register.  */
+
+#define OUTGOING_REGNO(IN) \
+  ((unsigned) ((IN) - IN_REG (0)) < 8 ? OUT_REG ((IN) - IN_REG (0)) : (IN))
+
+/* Define this macro if the target machine has register windows.  This
+   C expression returns true if the register is call-saved but is in the
+   register window.  */
+
+#define LOCAL_REGNO(REGNO) \
+  (IN_REGNO_P (REGNO) || LOC_REGNO_P (REGNO))
+
+/* We define CCImode in ia64-modes.def so we need a selector.  */
+
+#define SELECT_CC_MODE(OP,X,Y)  CCmode
+
+/* Order of allocation of registers */
+
+/* If defined, an initializer for a vector of integers, containing the numbers
+   of hard registers in the order in which GCC should prefer to use them
+   (from most preferred to least).
+
+   If this macro is not defined, registers are used lowest numbered first (all
+   else being equal).
+
+   One use of this macro is on machines where the highest numbered registers
+   must always be saved and the save-multiple-registers instruction supports
+   only sequences of consecutive registers.  On such machines, define
+   `REG_ALLOC_ORDER' to be an initializer that lists the highest numbered
+   allocatable register first.  */
+
+/* ??? Should the GR return value registers come before or after the rest
+   of the caller-save GRs?  */
+
+#define REG_ALLOC_ORDER							   \
+{									   \
+  /* Caller-saved general registers.  */				   \
+  R_GR (14), R_GR (15), R_GR (16), R_GR (17),				   \
+  R_GR (18), R_GR (19), R_GR (20), R_GR (21), R_GR (22), R_GR (23),	   \
+  R_GR (24), R_GR (25), R_GR (26), R_GR (27), R_GR (28), R_GR (29),	   \
+  R_GR (30), R_GR (31),							   \
+  /* Output registers.  */						   \
+  R_GR (120), R_GR (121), R_GR (122), R_GR (123), R_GR (124), R_GR (125),  \
+  R_GR (126), R_GR (127),						   \
+  /* Caller-saved general registers, also used for return values.  */	   \
+  R_GR (8), R_GR (9), R_GR (10), R_GR (11),				   \
+  /* addl caller-saved general registers.  */				   \
+  R_GR (2), R_GR (3),							   \
+  /* Caller-saved FP registers.  */					   \
+  R_FR (6), R_FR (7),							   \
+  /* Caller-saved FP registers, used for parameters and return values.  */ \
+  R_FR (8), R_FR (9), R_FR (10), R_FR (11),				   \
+  R_FR (12), R_FR (13), R_FR (14), R_FR (15),				   \
+  /* Rotating caller-saved FP registers.  */				   \
+  R_FR (32), R_FR (33), R_FR (34), R_FR (35),				   \
+  R_FR (36), R_FR (37), R_FR (38), R_FR (39), R_FR (40), R_FR (41),	   \
+  R_FR (42), R_FR (43), R_FR (44), R_FR (45), R_FR (46), R_FR (47),	   \
+  R_FR (48), R_FR (49), R_FR (50), R_FR (51), R_FR (52), R_FR (53),	   \
+  R_FR (54), R_FR (55), R_FR (56), R_FR (57), R_FR (58), R_FR (59),	   \
+  R_FR (60), R_FR (61), R_FR (62), R_FR (63), R_FR (64), R_FR (65),	   \
+  R_FR (66), R_FR (67), R_FR (68), R_FR (69), R_FR (70), R_FR (71),	   \
+  R_FR (72), R_FR (73), R_FR (74), R_FR (75), R_FR (76), R_FR (77),	   \
+  R_FR (78), R_FR (79), R_FR (80), R_FR (81), R_FR (82), R_FR (83),	   \
+  R_FR (84), R_FR (85), R_FR (86), R_FR (87), R_FR (88), R_FR (89),	   \
+  R_FR (90), R_FR (91), R_FR (92), R_FR (93), R_FR (94), R_FR (95),	   \
+  R_FR (96), R_FR (97), R_FR (98), R_FR (99), R_FR (100), R_FR (101),	   \
+  R_FR (102), R_FR (103), R_FR (104), R_FR (105), R_FR (106), R_FR (107),  \
+  R_FR (108), R_FR (109), R_FR (110), R_FR (111), R_FR (112), R_FR (113),  \
+  R_FR (114), R_FR (115), R_FR (116), R_FR (117), R_FR (118), R_FR (119),  \
+  R_FR (120), R_FR (121), R_FR (122), R_FR (123), R_FR (124), R_FR (125),  \
+  R_FR (126), R_FR (127),						   \
+  /* Caller-saved predicate registers.  */				   \
+  R_PR (6), R_PR (7), R_PR (8), R_PR (9), R_PR (10), R_PR (11),		   \
+  R_PR (12), R_PR (13), R_PR (14), R_PR (15),				   \
+  /* Rotating caller-saved predicate registers.  */			   \
+  R_PR (16), R_PR (17),							   \
+  R_PR (18), R_PR (19), R_PR (20), R_PR (21), R_PR (22), R_PR (23),	   \
+  R_PR (24), R_PR (25), R_PR (26), R_PR (27), R_PR (28), R_PR (29),	   \
+  R_PR (30), R_PR (31), R_PR (32), R_PR (33), R_PR (34), R_PR (35),	   \
+  R_PR (36), R_PR (37), R_PR (38), R_PR (39), R_PR (40), R_PR (41),	   \
+  R_PR (42), R_PR (43), R_PR (44), R_PR (45), R_PR (46), R_PR (47),	   \
+  R_PR (48), R_PR (49), R_PR (50), R_PR (51), R_PR (52), R_PR (53),	   \
+  R_PR (54), R_PR (55), R_PR (56), R_PR (57), R_PR (58), R_PR (59),	   \
+  R_PR (60), R_PR (61), R_PR (62), R_PR (63),				   \
+  /* Caller-saved branch registers.  */					   \
+  R_BR (6), R_BR (7),							   \
+									   \
+  /* Stacked callee-saved general registers.  */			   \
+  R_GR (32), R_GR (33), R_GR (34), R_GR (35),				   \
+  R_GR (36), R_GR (37), R_GR (38), R_GR (39), R_GR (40), R_GR (41),	   \
+  R_GR (42), R_GR (43), R_GR (44), R_GR (45), R_GR (46), R_GR (47),	   \
+  R_GR (48), R_GR (49), R_GR (50), R_GR (51), R_GR (52), R_GR (53),	   \
+  R_GR (54), R_GR (55), R_GR (56), R_GR (57), R_GR (58), R_GR (59),	   \
+  R_GR (60), R_GR (61), R_GR (62), R_GR (63), R_GR (64), R_GR (65),	   \
+  R_GR (66), R_GR (67), R_GR (68), R_GR (69), R_GR (70), R_GR (71),	   \
+  R_GR (72), R_GR (73), R_GR (74), R_GR (75), R_GR (76), R_GR (77),	   \
+  R_GR (78), R_GR (79), R_GR (80), R_GR (81), R_GR (82), R_GR (83),	   \
+  R_GR (84), R_GR (85), R_GR (86), R_GR (87), R_GR (88), R_GR (89),	   \
+  R_GR (90), R_GR (91), R_GR (92), R_GR (93), R_GR (94), R_GR (95),	   \
+  R_GR (96), R_GR (97), R_GR (98), R_GR (99), R_GR (100), R_GR (101),	   \
+  R_GR (102), R_GR (103), R_GR (104), R_GR (105), R_GR (106), R_GR (107),  \
+  R_GR (108),								   \
+  /* Input registers.  */						   \
+  R_GR (112), R_GR (113), R_GR (114), R_GR (115), R_GR (116), R_GR (117),  \
+  R_GR (118), R_GR (119),						   \
+  /* Callee-saved general registers.  */				   \
+  R_GR (4), R_GR (5), R_GR (6), R_GR (7),				   \
+  /* Callee-saved FP registers.  */					   \
+  R_FR (2), R_FR (3), R_FR (4), R_FR (5), R_FR (16), R_FR (17),		   \
+  R_FR (18), R_FR (19), R_FR (20), R_FR (21), R_FR (22), R_FR (23),	   \
+  R_FR (24), R_FR (25), R_FR (26), R_FR (27), R_FR (28), R_FR (29),	   \
+  R_FR (30), R_FR (31),							   \
+  /* Callee-saved predicate registers.  */				   \
+  R_PR (1), R_PR (2), R_PR (3), R_PR (4), R_PR (5),			   \
+  /* Callee-saved branch registers.  */					   \
+  R_BR (1), R_BR (2), R_BR (3), R_BR (4), R_BR (5),			   \
+									   \
+  /* ??? Stacked registers reserved for fp, rp, and ar.pfs.  */		   \
+  R_GR (109), R_GR (110), R_GR (111),					   \
+									   \
+  /* Special general registers.  */					   \
+  R_GR (0), R_GR (1), R_GR (12), R_GR (13),				   \
+  /* Special FP registers.  */						   \
+  R_FR (0), R_FR (1),							   \
+  /* Special predicate registers.  */					   \
+  R_PR (0),								   \
+  /* Special branch registers.  */					   \
+  R_BR (0),								   \
+  /* Other fixed registers.  */						   \
+  FRAME_POINTER_REGNUM, 						   \
+  AR_CCV_REGNUM, AR_UNAT_REGNUM, AR_PFS_REGNUM, AR_LC_REGNUM,		   \
+  AR_EC_REGNUM		  						   \
+}
+
+/* How Values Fit in Registers */
+
+/* A C expression for the number of consecutive hard registers, starting at
+   register number REGNO, required to hold a value of mode MODE.  */
+
+/* ??? We say that BImode PR values require two registers.  This allows us to
+   easily store the normal and inverted values.  We use CCImode to indicate
+   a single predicate register.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  ((REGNO) == PR_REG (0) && (MODE) == DImode ? 64			\
+   : PR_REGNO_P (REGNO) && (MODE) == BImode ? 2				\
+   : (PR_REGNO_P (REGNO) || GR_REGNO_P (REGNO)) && (MODE) == CCImode ? 1\
+   : FR_REGNO_P (REGNO) && (MODE) == XFmode ? 1				\
+   : FR_REGNO_P (REGNO) && (MODE) == RFmode ? 1				\
+   : FR_REGNO_P (REGNO) && (MODE) == XCmode ? 2				\
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if it is permissible to store a value of mode
+   MODE in hard register number REGNO (or in several registers starting with
+   that one).  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)				\
+  (FR_REGNO_P (REGNO) ?						\
+     GET_MODE_CLASS (MODE) != MODE_CC &&			\
+     (MODE) != BImode &&					\
+     (MODE) != TFmode 						\
+   : PR_REGNO_P (REGNO) ?					\
+     (MODE) == BImode || GET_MODE_CLASS (MODE) == MODE_CC	\
+   : GR_REGNO_P (REGNO) ?					\
+     (MODE) != XFmode && (MODE) != XCmode && (MODE) != RFmode	\
+   : AR_REGNO_P (REGNO) ? (MODE) == DImode			\
+   : BR_REGNO_P (REGNO) ? (MODE) == DImode			\
+   : 0)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+/* Don't tie integer and FP modes, as that causes us to get integer registers
+   allocated for FP instructions.  XFmode only supported in FP registers so
+   we can't tie it with any other modes.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)	\
+   && ((((MODE1) == XFmode) || ((MODE1) == XCmode) || ((MODE1) == RFmode))	\
+       == (((MODE2) == XFmode) || ((MODE2) == XCmode) || ((MODE2) == RFmode)))	\
+   && (((MODE1) == BImode) == ((MODE2) == BImode)))
+
+/* Specify the modes required to caller save a given hard regno.
+   We need to ensure floating pt regs are not saved as DImode.  */
+
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  ((FR_REGNO_P (REGNO) && (NREGS) == 1) ? RFmode        \
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
+/* Handling Leaf Functions */
+
+/* A C initializer for a vector, indexed by hard register number, which
+   contains 1 for a register that is allowable in a candidate for leaf function
+   treatment.  */
+/* ??? This might be useful.  */
+/* #define LEAF_REGISTERS */
+
+/* A C expression whose value is the register number to which REGNO should be
+   renumbered, when a function is treated as a leaf function.  */
+/* ??? This might be useful.  */
+/* #define LEAF_REG_REMAP(REGNO) */
+
+
+/* Register Classes */
+
+/* An enumeral type that must be defined with all the register class names as
+   enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
+   register class, followed by one more enumeral value, `LIM_REG_CLASSES',
+   which is not a register class but rather tells how many classes there
+   are.  */
+/* ??? When compiling without optimization, it is possible for the only use of
+   a pseudo to be a parameter load from the stack with a REG_EQUIV note.
+   Regclass handles this case specially and does not assign any costs to the
+   pseudo.  The pseudo then ends up using the last class before ALL_REGS.
+   Thus we must not let either PR_REGS or BR_REGS be the last class.  The
+   testcase for this is gcc.c-torture/execute/va-arg-7.c.  */
+enum reg_class
+{
+  NO_REGS,
+  PR_REGS,
+  BR_REGS,
+  AR_M_REGS,
+  AR_I_REGS,
+  ADDL_REGS,
+  GR_REGS,
+  FP_REGS,
+  FR_REGS,
+  GR_AND_BR_REGS,
+  GR_AND_FR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define GENERAL_REGS GR_REGS
+
+/* The number of distinct register classes.  */
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* An initializer containing the names of the register classes as C string
+   constants.  These names are used in writing some of the debugging dumps.  */
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "PR_REGS", "BR_REGS", "AR_M_REGS", "AR_I_REGS", \
+  "ADDL_REGS", "GR_REGS", "FP_REGS", "FR_REGS", \
+  "GR_AND_BR_REGS", "GR_AND_FR_REGS", "ALL_REGS" }
+
+/* An initializer containing the contents of the register classes, as integers
+   which are bit masks.  The Nth integer specifies the contents of class N.
+   The way the integer MASK is interpreted is that register R is in the class
+   if `MASK & (1 << R)' is 1.  */
+#define REG_CLASS_CONTENTS \
+{ 							\
+  /* NO_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* PR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x0000 },			\
+  /* BR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00FF },			\
+  /* AR_M_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0600 },			\
+  /* AR_I_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x3800 },			\
+  /* ADDL_REGS.  */					\
+  { 0x0000000F, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* GR_REGS.  */					\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x0100 },			\
+  /* FP_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* FR_REGS.  */					\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x0000 },			\
+  /* GR_AND_BR_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x00000000, 0x00000000,	\
+    0x00000000, 0x00000000, 0x01FF },			\
+  /* GR_AND_FR_REGS.  */				\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0x00000000, 0x00000000, 0x0100 },			\
+  /* ALL_REGS.  */					\
+  { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,	\
+    0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF },			\
+}
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						     \
+{									     \
+  PR_REGS, BR_REGS, AR_M_REGS, AR_I_REGS, GR_REGS, FR_REGS, LIM_REG_CLASSES  \
+}
+
+/* A C expression whose value is a register class containing hard register
+   REGNO.  In general there is more than one such class; choose a class which
+   is "minimal", meaning that no smaller class also contains the register.  */
+/* The NO_REGS case is primarily for the benefit of rws_access_reg, which
+   may call here with private (invalid) register numbers, such as
+   REG_VOLATILE.  */
+#define REGNO_REG_CLASS(REGNO) \
+(ADDL_REGNO_P (REGNO) ? ADDL_REGS	\
+ : GENERAL_REGNO_P (REGNO) ? GR_REGS	\
+ : FR_REGNO_P (REGNO) ? (REGNO) != R_FR (31) \
+			&& (REGNO) != R_FR(127) ? FP_REGS : FR_REGS \
+ : PR_REGNO_P (REGNO) ? PR_REGS		\
+ : BR_REGNO_P (REGNO) ? BR_REGS		\
+ : AR_M_REGNO_P (REGNO) ? AR_M_REGS	\
+ : AR_I_REGNO_P (REGNO) ? AR_I_REGS	\
+ : NO_REGS)
+
+/* A macro whose definition is the name of the class to which a valid base
+   register must belong.  A base register is one used in an address which is
+   the register value plus a displacement.  */
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* A macro whose definition is the name of the class to which a valid index
+   register must belong.  An index register is one used in an address where its
+   value is either multiplied by a scale factor or added to another register
+   (as well as added to a displacement).  This is needed for POST_MODIFY.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as a base register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard reg.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  (GENERAL_REGNO_P (REGNO) || GENERAL_REGNO_P (reg_renumber[REGNO]))
+
+/* A C expression which is nonzero if register number NUM is suitable for use
+   as an index register in operand addresses.  It may be either a suitable hard
+   register or a pseudo register that has been allocated such a hard reg.
+   This is needed for POST_MODIFY.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM)
+
+/* You should define this macro to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   CLASS in MODE requires an intermediate register, you should define this
+   to return the largest register class all of whose registers can be used
+   as intermediate registers or scratch registers.  */
+
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+ ia64_secondary_reload_class (CLASS, MODE, X)
+
+/* Certain machines have the property that some registers cannot be copied to
+   some other registers without using memory.  Define this macro on those
+   machines to be a C expression that is nonzero if objects of mode M in
+   registers of CLASS1 can only be copied to registers of class CLASS2 by
+   storing a register of CLASS1 into memory and loading that memory location
+   into a register of CLASS2.  */
+
+#if 0
+/* ??? May need this, but since we've disallowed XFmode in GR_REGS,
+   I'm not quite sure how it could be invoked.  The normal problems
+   with unions should be solved with the addressof fiddling done by
+   movxf and friends.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)			\
+  (((MODE) == XFmode || (MODE) == XCmode)				\
+   && (((CLASS1) == GR_REGS && (CLASS2) == FR_REGS)			\
+       || ((CLASS1) == FR_REGS && (CLASS2) == GR_REGS)))
+#endif
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.
+   This is closely related to the macro `HARD_REGNO_NREGS'.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((MODE) == BImode && (CLASS) == PR_REGS ? 2			\
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == XFmode) ? 1 \
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == RFmode) ? 1 \
+   : (((CLASS) == FR_REGS || (CLASS) == FP_REGS) && (MODE) == XCmode) ? 2 \
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* In BR regs, we can't change the DImode at all.
+   In FP regs, we can't change FP values to integer values and vice versa,
+   but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 		\
+  (reg_classes_intersect_p (CLASS, BR_REGS)			\
+   ? (FROM) != (TO)						\
+   : (SCALAR_FLOAT_MODE_P (FROM) != SCALAR_FLOAT_MODE_P (TO)	\
+      ? reg_classes_intersect_p (CLASS, FR_REGS)		\
+      : 0))
+
+/* Basic Stack Layout */
+
+/* Define this macro if pushing a word onto the stack moves the stack pointer
+   to a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this macro to nonzero if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 0
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  If not specified, the default value of zero
+   is used.  This is the proper value for most machines.  */
+/* IA64 has a 16 byte scratch area that is at the bottom of the stack.  */
+#define STACK_POINTER_OFFSET 16
+
+/* Offset from the argument pointer register to the first argument's address.
+   On some machines it may depend on the data type of the function.  */
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame, after the
+   prologue.  */
+
+/* ??? Frames other than zero would likely require interpreting the frame
+   unwind info, so we don't try to support them.  We would also need to define
+   DYNAMIC_CHAIN_ADDRESS and SETUP_FRAME_ADDRESS (for the reg stack flush).  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+  ia64_return_addr_rtx (COUNT, FRAME)
+
+/* A C expression whose value is RTL representing the location of the incoming
+   return address at the beginning of any function, before the prologue.  This
+   RTL is either a `REG', indicating that the return value is saved in `REG',
+   or a `MEM' representing a location in the stack.  This enables DWARF2
+   unwind info for C++ EH.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, BR_REG (0))
+
+/* A C expression whose value is an integer giving the offset, in bytes, from
+   the value of the stack pointer register to the top of the stack frame at the
+   beginning of any function, before the prologue.  The top of the frame is
+   defined to be the value of the stack pointer in the previous frame, just
+   before the call instruction.  */
+/* The CFA is past the red zone, not at the entry-point stack
+   pointer.  */
+#define INCOMING_FRAME_SP_OFFSET STACK_POINTER_OFFSET
+
+/* We shorten debug info by using CFA-16 as DW_AT_frame_base.  */
+#define CFA_FRAME_BASE_OFFSET(FUNDECL) (-INCOMING_FRAME_SP_OFFSET)
+
+
+/* Register That Address the Stack Frame.  */
+
+/* The register number of the stack pointer register, which must also be a
+   fixed register according to `FIXED_REGISTERS'.  On most machines, the
+   hardware determines which register this is.  */
+
+#define STACK_POINTER_REGNUM 12
+
+/* The register number of the frame pointer register, which is used to access
+   automatic variables in the stack frame.  On some machines, the hardware
+   determines which register this is.  On other machines, you can choose any
+   register you wish for this purpose.  */
+
+#define FRAME_POINTER_REGNUM 328
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM  LOC_REG (79)
+
+/* The register number of the arg pointer register, which is used to access the
+   function's argument list.  */
+/* r0 won't otherwise be used, so put the always eliminated argument pointer
+   in it.  */
+#define ARG_POINTER_REGNUM R_GR(0)
+
+/* Due to the way varargs and argument spilling happens, the argument
+   pointer is not 16-byte aligned like the stack pointer.  */
+#define INIT_EXPANDERS					\
+  do {							\
+    ia64_init_expanders ();                             \
+    if (crtl->emit.regno_pointer_align)	\
+      REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) = 64;	\
+  } while (0)
+
+/* Register numbers used for passing a function's static chain pointer.  */
+/* ??? The ABI sez the static chain should be passed as a normal parameter.  */
+#define STATIC_CHAIN_REGNUM 15
+
+/* Eliminating the Frame Pointer and the Arg Pointer */
+
+/* If defined, this macro specifies a table of register pairs used to eliminate
+   unneeded registers that point into the stack frame.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},			\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},			\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = ia64_initial_elimination_offset ((FROM), (TO)))
+
+/* Passing Function Arguments on the Stack */
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+   be computed and placed into the variable
+   `crtl->outgoing_args_size'.  */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Function Arguments in Registers */
+
+#define MAX_ARGUMENT_SLOTS 8
+#define MAX_INT_RETURN_SLOTS 4
+#define GR_ARG_FIRST IN_REG (0)
+#define GR_RET_FIRST GR_REG (8)
+#define GR_RET_LAST  GR_REG (11)
+#define FR_ARG_FIRST FR_REG (8)
+#define FR_RET_FIRST FR_REG (8)
+#define FR_RET_LAST  FR_REG (15)
+#define AR_ARG_FIRST OUT_REG (0)
+
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the type
+   `int' suffices and can hold the number of bytes of argument so far.  */
+
+enum ivms_arg_type {I64, FF, FD, FG, FS, FT};
+/* VMS floating point formats VAX F, VAX D, VAX G, IEEE S, IEEE T.  */
+
+typedef struct ia64_args
+{
+  int words;			/* # words of arguments so far  */
+  int int_regs;			/* # GR registers used so far  */
+  int fp_regs;			/* # FR registers used so far  */
+  int prototype;		/* whether function prototyped  */
+  enum ivms_arg_type atypes[8]; /* which VMS float type or if not float */
+} CUMULATIVE_ARGS;
+
+/* A C statement (sans semicolon) for initializing the variable CUM for the
+   state at the beginning of the argument list.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+do {									\
+  (CUM).words = 0;							\
+  (CUM).int_regs = 0;							\
+  (CUM).fp_regs = 0;							\
+  (CUM).prototype = ((FNTYPE) && prototype_p (FNTYPE)) || (LIBNAME);	\
+  (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64;	        \
+  (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64;            \
+  (CUM).atypes[6] = (CUM).atypes[7] = I64;                              \
+} while (0)
+
+/* Like `INIT_CUMULATIVE_ARGS' but overrides it for the purposes of finding the
+   arguments for the function being compiled.  If this macro is undefined,
+   `INIT_CUMULATIVE_ARGS' is used instead.  */
+
+/* We set prototype to true so that we never try to return a PARALLEL from
+   function_arg.  */
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+do {									\
+  (CUM).words = 0;							\
+  (CUM).int_regs = 0;							\
+  (CUM).fp_regs = 0;							\
+  (CUM).prototype = 1;							\
+  (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64;	        \
+  (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64;            \
+  (CUM).atypes[6] = (CUM).atypes[7] = I64;                              \
+} while (0)
+
+/* A C expression that is nonzero if REGNO is the number of a hard register in
+   which function arguments are sometimes passed.  This does *not* include
+   implicit arguments such as the static chain and the structure-value address.
+   On many machines, no registers can be used for this purpose since all
+   function arguments are pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+(((REGNO) >= AR_ARG_FIRST && (REGNO) < (AR_ARG_FIRST + MAX_ARGUMENT_SLOTS)) \
+ || ((REGNO) >= FR_ARG_FIRST && (REGNO) < (FR_ARG_FIRST + MAX_ARGUMENT_SLOTS)))
+
+
+/* How Large Values are Returned */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+
+/* Caller-Saves Register Allocation */
+
+/* A C expression to determine whether it is worthwhile to consider placing a
+   pseudo-register in a call-clobbered hard register and saving and restoring
+   it around each function call.  The expression should be 1 when this is worth
+   doing, and 0 otherwise.
+
+   If you don't define this macro, a default is used which is good on most
+   machines: `4 * CALLS < REFS'.  */
+/* ??? Investigate.  */
+/* #define CALLER_SAVE_PROFITABLE(REFS, CALLS) */
+
+
+/* Function Entry and Exit */
+
+/* Define this macro as a C expression that is nonzero if the return
+   instruction or the function epilogue ignores the value of the stack pointer;
+   in other words, if it is safe to delete an instruction to adjust the stack
+   pointer before a return from the function.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define this macro as a C expression that is nonzero for registers
+   used by the epilogue or the `return' pattern.  */
+
+#define EPILOGUE_USES(REGNO) ia64_epilogue_uses (REGNO)
+
+/* Nonzero for registers used by the exception handling mechanism.  */
+
+#define EH_USES(REGNO) ia64_eh_uses (REGNO)
+
+/* Output part N of a function descriptor for DECL.  For ia64, both
+   words are emitted with a single relocation, so ignore N > 0.  */
+#define ASM_OUTPUT_FDESC(FILE, DECL, PART)				\
+do {									\
+  if ((PART) == 0)							\
+    {									\
+      if (TARGET_ILP32)							\
+        fputs ("\tdata8.ua @iplt(", FILE);				\
+      else								\
+        fputs ("\tdata16.ua @iplt(", FILE);				\
+      mark_decl_referenced (DECL);					\
+      assemble_name (FILE, XSTR (XEXP (DECL_RTL (DECL), 0), 0));	\
+      fputs (")\n", FILE);						\
+      if (TARGET_ILP32)							\
+	fputs ("\tdata8.ua 0\n", FILE);					\
+    }									\
+} while (0)
+
+/* Generating Code for Profiling.  */
+
+/* A C statement or compound statement to output to FILE some assembler code to
+   call the profiling subroutine `mcount'.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  ia64_output_function_profiler(FILE, LABELNO)
+
+/* Neither hpux nor linux use profile counters.  */
+#define NO_PROFILE_COUNTERS 1
+
+/* Trampolines for Nested Functions.  */
+
+/* We need 32 bytes, so we can save the sp, ar.rnat, ar.bsp, and ar.pfs of
+   the function containing a non-local goto target.  */
+
+#define STACK_SAVEAREA_MODE(LEVEL) \
+  ((LEVEL) == SAVE_NONLOCAL ? OImode : Pmode)
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+
+#define TRAMPOLINE_SIZE		32
+
+/* Alignment required for trampolines, in bits.  */
+
+#define TRAMPOLINE_ALIGNMENT	64
+
+/* Addressing Modes */
+
+/* Define this macro if the machine supports post-increment addressing.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_REG 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a valid
+   address.  */
+
+#define CONSTANT_ADDRESS_P(X) 0
+
+/* The max number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* A C compound statement with a conditional `goto LABEL;' executed if X (an
+   RTX) is a legitimate memory address on the target machine for a memory
+   operand of mode MODE.  */
+
+#define LEGITIMATE_ADDRESS_REG(X)					\
+  ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+   || (GET_CODE (X) == SUBREG && GET_CODE (XEXP (X, 0)) == REG		\
+       && REG_OK_FOR_BASE_P (XEXP (X, 0))))
+
+#define LEGITIMATE_ADDRESS_DISP(R, X)					\
+  (GET_CODE (X) == PLUS							\
+   && rtx_equal_p (R, XEXP (X, 0))					\
+   && (LEGITIMATE_ADDRESS_REG (XEXP (X, 1))				\
+       || (GET_CODE (XEXP (X, 1)) == CONST_INT				\
+	   && INTVAL (XEXP (X, 1)) >= -256				\
+	   && INTVAL (XEXP (X, 1)) < 256)))
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) 			\
+do {									\
+  if (LEGITIMATE_ADDRESS_REG (X))					\
+    goto LABEL;								\
+  else if ((GET_CODE (X) == POST_INC || GET_CODE (X) == POST_DEC)	\
+	   && LEGITIMATE_ADDRESS_REG (XEXP (X, 0))			\
+	   && XEXP (X, 0) != arg_pointer_rtx)				\
+    goto LABEL;								\
+  else if (GET_CODE (X) == POST_MODIFY					\
+	   && LEGITIMATE_ADDRESS_REG (XEXP (X, 0))			\
+	   && XEXP (X, 0) != arg_pointer_rtx				\
+	   && LEGITIMATE_ADDRESS_DISP (XEXP (X, 0), XEXP (X, 1)))	\
+    goto LABEL;								\
+} while (0)
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as a base register.  */
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X) \
+  (GENERAL_REGNO_P (REGNO (X)) || (REGNO (X) >= FIRST_PSEUDO_REGISTER))
+#endif
+
+/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for
+   use as an index register.  This is needed for POST_MODIFY.  */
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand on the target machine.  */
+
+#define LEGITIMATE_CONSTANT_P(X) ia64_legitimate_constant_p (X)
+
+/* Condition Code Status */
+
+/* One some machines not all possible comparisons are defined, but you can
+   convert an invalid comparison into a valid one.  */
+/* ??? Investigate.  See the alpha definition.  */
+/* #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) */
+
+
+/* Describing Relative Costs of Operations */
+
+/* A C expression for the cost of a branch instruction.  A value of 1 is the
+   default; other values are interpreted relative to that.  Used by the
+   if-conversion code as max instruction count.  */
+/* ??? This requires investigation.  The primary effect might be how
+   many additional insn groups we run into, vs how good the dynamic
+   branch predictor is.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 6
+
+/* Define this macro as a C expression which is nonzero if accessing less than
+   a word of memory (i.e. a `char' or a `short') is no faster than accessing a
+   word of memory.  */
+
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.
+
+   Indirect function calls are more expensive that direct function calls, so
+   don't cse function addresses.  */
+
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the output into sections.  */
+
+/* A C expression whose value is a string containing the assembler operation
+   that should precede instructions and read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* A C expression whose value is a string containing the assembler operation to
+   identify the following data as writable initialized data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* If defined, a C expression whose value is a string containing the assembler
+   operation to identify the following data as uninitialized global data.  */
+
+#define BSS_SECTION_ASM_OP "\t.bss"
+
+#define IA64_DEFAULT_GVALUE 8
+
+/* Position Independent Code.  */
+
+/* The register number of the register used to address a table of static data
+   addresses in memory.  */
+
+/* ??? Should modify ia64.md to use pic_offset_table_rtx instead of
+   gen_rtx_REG (DImode, 1).  */
+
+/* ??? Should we set flag_pic?  Probably need to define
+   LEGITIMIZE_PIC_OPERAND_P to make that work.  */
+
+#define PIC_OFFSET_TABLE_REGNUM GR_REG (1)
+
+/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' is
+   clobbered by calls.  */
+
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED 1
+
+
+/* The Overall Framework of an Assembler File.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at the
+   end of the line.  */
+
+#define ASM_COMMENT_START "//"
+
+/* A C string constant for text to be output before each `asm' statement or
+   group of consecutive ones.  */
+
+#define ASM_APP_ON (TARGET_GNU_AS ? "#APP\n" : "//APP\n")
+
+/* A C string constant for text to be output after each `asm' statement or
+   group of consecutive ones.  */
+
+#define ASM_APP_OFF (TARGET_GNU_AS ? "#NO_APP\n" : "//NO_APP\n")
+
+/* Output and Generation of Labels.  */
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM the
+   assembler definition of a label named NAME.  */
+
+/* See the ASM_OUTPUT_LABELREF definition in sysv4.h for an explanation of
+   why ia64_asm_output_label exists.  */
+
+extern int ia64_asm_output_label;
+#define ASM_OUTPUT_LABEL(STREAM, NAME)					\
+do {									\
+  ia64_asm_output_label = 1;						\
+  assemble_name (STREAM, NAME);						\
+  fputs (":\n", STREAM);						\
+  ia64_asm_output_label = 0;						\
+} while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+/* A C statement (sans semicolon) to output to the stdio stream STREAM any text
+   necessary for declaring the name of an external symbol named NAME which is
+   referenced in this compilation but not defined.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  ia64_asm_output_external (FILE, DECL, NAME)
+
+/* A C statement to store into the string STRING a label whose name is made
+   from the string PREFIX and the number NUM.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+do {									\
+  sprintf (LABEL, "*.%s%d", PREFIX, NUM);				\
+} while (0)
+
+/* ??? Not sure if using a ? in the name for Intel as is safe.  */
+
+#define ASM_PN_FORMAT (TARGET_GNU_AS ? "%s.%lu" : "%s?%lu")
+
+/* A C statement to output to the stdio stream STREAM assembler code which
+   defines (equates) the symbol NAME to have the value VALUE.  */
+
+#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \
+do {									\
+  assemble_name (STREAM, NAME);						\
+  fputs (" = ", STREAM);						\
+  if (ISDIGIT (*VALUE))							\
+    ia64_asm_output_label = 1;						\
+  assemble_name (STREAM, VALUE);					\
+  fputc ('\n', STREAM);							\
+  ia64_asm_output_label = 0;						\
+} while (0)
+
+
+/* Macros Controlling Initialization Routines.  */
+
+/* This is handled by sysv4.h.  */
+
+
+/* Output of Assembler Instructions.  */
+
+/* A C initializer containing the assembler's names for the machine registers,
+   each one as a C string constant.  */
+
+#define REGISTER_NAMES \
+{									\
+  /* General registers.  */						\
+  "ap", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",		\
+  "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",	\
+  "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29",	\
+  "r30", "r31",								\
+  /* Local registers.  */						\
+  "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",	\
+  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",	\
+  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",	\
+  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",	\
+  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",	\
+  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",	\
+  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",	\
+  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",	\
+  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",	\
+  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79",	\
+  /* Input registers.  */						\
+  "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7",	\
+  /* Output registers.  */						\
+  "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",	\
+  /* Floating-point registers.  */					\
+  "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9",		\
+  "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19",	\
+  "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29",	\
+  "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39",	\
+  "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", "f48", "f49",	\
+  "f50", "f51", "f52", "f53", "f54", "f55", "f56", "f57", "f58", "f59",	\
+  "f60", "f61", "f62", "f63", "f64", "f65", "f66", "f67", "f68", "f69",	\
+  "f70", "f71", "f72", "f73", "f74", "f75", "f76", "f77", "f78", "f79",	\
+  "f80", "f81", "f82", "f83", "f84", "f85", "f86", "f87", "f88", "f89",	\
+  "f90", "f91", "f92", "f93", "f94", "f95", "f96", "f97", "f98", "f99",	\
+  "f100","f101","f102","f103","f104","f105","f106","f107","f108","f109",\
+  "f110","f111","f112","f113","f114","f115","f116","f117","f118","f119",\
+  "f120","f121","f122","f123","f124","f125","f126","f127",		\
+  /* Predicate registers.  */						\
+  "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9",		\
+  "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19",	\
+  "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p29",	\
+  "p30", "p31", "p32", "p33", "p34", "p35", "p36", "p37", "p38", "p39",	\
+  "p40", "p41", "p42", "p43", "p44", "p45", "p46", "p47", "p48", "p49",	\
+  "p50", "p51", "p52", "p53", "p54", "p55", "p56", "p57", "p58", "p59",	\
+  "p60", "p61", "p62", "p63",						\
+  /* Branch registers.  */						\
+  "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",			\
+  /* Frame pointer.  Application registers.  */				\
+  "sfp", "ar.ccv", "ar.unat", "ar.pfs", "ar.lc", "ar.ec",	\
+}
+
+/* If defined, a C initializer for an array of structures containing a name and
+   a register number.  This macro defines additional names for hard registers,
+   thus allowing the `asm' option in declarations to refer to registers using
+   alternate names.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{									\
+  { "gp", R_GR (1) },							\
+  { "sp", R_GR (12) },							\
+  { "in0", IN_REG (0) },						\
+  { "in1", IN_REG (1) },						\
+  { "in2", IN_REG (2) },						\
+  { "in3", IN_REG (3) },						\
+  { "in4", IN_REG (4) },						\
+  { "in5", IN_REG (5) },						\
+  { "in6", IN_REG (6) },						\
+  { "in7", IN_REG (7) },						\
+  { "out0", OUT_REG (0) },						\
+  { "out1", OUT_REG (1) },						\
+  { "out2", OUT_REG (2) },						\
+  { "out3", OUT_REG (3) },						\
+  { "out4", OUT_REG (4) },						\
+  { "out5", OUT_REG (5) },						\
+  { "out6", OUT_REG (6) },						\
+  { "out7", OUT_REG (7) },						\
+  { "loc0", LOC_REG (0) },						\
+  { "loc1", LOC_REG (1) },						\
+  { "loc2", LOC_REG (2) },						\
+  { "loc3", LOC_REG (3) },						\
+  { "loc4", LOC_REG (4) },						\
+  { "loc5", LOC_REG (5) },						\
+  { "loc6", LOC_REG (6) },						\
+  { "loc7", LOC_REG (7) },						\
+  { "loc8", LOC_REG (8) }, 						\
+  { "loc9", LOC_REG (9) }, 						\
+  { "loc10", LOC_REG (10) }, 						\
+  { "loc11", LOC_REG (11) }, 						\
+  { "loc12", LOC_REG (12) }, 						\
+  { "loc13", LOC_REG (13) }, 						\
+  { "loc14", LOC_REG (14) }, 						\
+  { "loc15", LOC_REG (15) }, 						\
+  { "loc16", LOC_REG (16) }, 						\
+  { "loc17", LOC_REG (17) }, 						\
+  { "loc18", LOC_REG (18) }, 						\
+  { "loc19", LOC_REG (19) }, 						\
+  { "loc20", LOC_REG (20) }, 						\
+  { "loc21", LOC_REG (21) }, 						\
+  { "loc22", LOC_REG (22) }, 						\
+  { "loc23", LOC_REG (23) }, 						\
+  { "loc24", LOC_REG (24) }, 						\
+  { "loc25", LOC_REG (25) }, 						\
+  { "loc26", LOC_REG (26) }, 						\
+  { "loc27", LOC_REG (27) }, 						\
+  { "loc28", LOC_REG (28) }, 						\
+  { "loc29", LOC_REG (29) }, 						\
+  { "loc30", LOC_REG (30) }, 						\
+  { "loc31", LOC_REG (31) }, 						\
+  { "loc32", LOC_REG (32) }, 						\
+  { "loc33", LOC_REG (33) }, 						\
+  { "loc34", LOC_REG (34) }, 						\
+  { "loc35", LOC_REG (35) }, 						\
+  { "loc36", LOC_REG (36) }, 						\
+  { "loc37", LOC_REG (37) }, 						\
+  { "loc38", LOC_REG (38) }, 						\
+  { "loc39", LOC_REG (39) }, 						\
+  { "loc40", LOC_REG (40) }, 						\
+  { "loc41", LOC_REG (41) }, 						\
+  { "loc42", LOC_REG (42) }, 						\
+  { "loc43", LOC_REG (43) }, 						\
+  { "loc44", LOC_REG (44) }, 						\
+  { "loc45", LOC_REG (45) }, 						\
+  { "loc46", LOC_REG (46) }, 						\
+  { "loc47", LOC_REG (47) }, 						\
+  { "loc48", LOC_REG (48) }, 						\
+  { "loc49", LOC_REG (49) }, 						\
+  { "loc50", LOC_REG (50) }, 						\
+  { "loc51", LOC_REG (51) }, 						\
+  { "loc52", LOC_REG (52) }, 						\
+  { "loc53", LOC_REG (53) }, 						\
+  { "loc54", LOC_REG (54) }, 						\
+  { "loc55", LOC_REG (55) }, 						\
+  { "loc56", LOC_REG (56) }, 						\
+  { "loc57", LOC_REG (57) }, 						\
+  { "loc58", LOC_REG (58) }, 						\
+  { "loc59", LOC_REG (59) }, 						\
+  { "loc60", LOC_REG (60) }, 						\
+  { "loc61", LOC_REG (61) }, 						\
+  { "loc62", LOC_REG (62) }, 						\
+  { "loc63", LOC_REG (63) }, 						\
+  { "loc64", LOC_REG (64) }, 						\
+  { "loc65", LOC_REG (65) }, 						\
+  { "loc66", LOC_REG (66) }, 						\
+  { "loc67", LOC_REG (67) }, 						\
+  { "loc68", LOC_REG (68) }, 						\
+  { "loc69", LOC_REG (69) }, 						\
+  { "loc70", LOC_REG (70) }, 						\
+  { "loc71", LOC_REG (71) }, 						\
+  { "loc72", LOC_REG (72) }, 						\
+  { "loc73", LOC_REG (73) }, 						\
+  { "loc74", LOC_REG (74) }, 						\
+  { "loc75", LOC_REG (75) }, 						\
+  { "loc76", LOC_REG (76) }, 						\
+  { "loc77", LOC_REG (77) }, 						\
+  { "loc78", LOC_REG (78) }, 						\
+  { "loc79", LOC_REG (79) }, 						\
+}
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand X.  X is an RTL expression.  */
+
+#define PRINT_OPERAND(STREAM, X, CODE) \
+  ia64_print_operand (STREAM, X, CODE)
+
+/* A C expression which evaluates to true if CODE is a valid punctuation
+   character for use in the `PRINT_OPERAND' macro.  */
+
+/* ??? Keep this around for now, as we might need it later.  */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+  ((CODE) == '+' || (CODE) == ',')
+
+/* A C compound statement to output to stdio stream STREAM the assembler syntax
+   for an instruction operand that is a memory reference whose address is X.  X
+   is an RTL expression.  */
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) \
+  ia64_print_operand_address (STREAM, X)
+
+/* If defined, C string expressions to be used for the `%R', `%L', `%U', and
+   `%I' options of `asm_fprintf' (see `final.c').  */
+
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX ""
+
+
+/* Output of dispatch tables.  */
+
+/* This macro should be provided on machines where the addresses in a dispatch
+   table are relative to the table's own address.  */
+
+/* ??? Depends on the pointer size.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)	\
+  do {								\
+  if (TARGET_ILP32)						\
+    fprintf (STREAM, "\tdata4 @pcrel(.L%d)\n", VALUE);		\
+  else								\
+    fprintf (STREAM, "\tdata8 @pcrel(.L%d)\n", VALUE);		\
+  } while (0)
+
+/* Jump tables only need 8 byte alignment.  */
+
+#define ADDR_VEC_ALIGN(ADDR_VEC) 3
+
+
+/* Assembler Commands for Exception Regions.  */
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)	\
+  (((CODE) == 1 ? DW_EH_PE_textrel : DW_EH_PE_datarel)	\
+   | ((GLOBAL) ? DW_EH_PE_indirect : 0)			\
+   | (TARGET_ILP32 ? DW_EH_PE_udata4 : DW_EH_PE_udata8))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    const char *reltag = NULL;						\
+    if (((ENCODING) & 0xF0) == DW_EH_PE_textrel)			\
+      reltag = "@segrel(";						\
+    else if (((ENCODING) & 0xF0) == DW_EH_PE_datarel)			\
+      reltag = "@gprel(";						\
+    if (reltag)								\
+      {									\
+	fputs (integer_asm_op (SIZE, FALSE), FILE);			\
+	fputs (reltag, FILE);						\
+	assemble_name (FILE, XSTR (ADDR, 0));				\
+	fputc (')', FILE);						\
+	goto DONE;							\
+      }									\
+  } while (0)
+
+
+/* Assembler Commands for Alignment.  */
+
+/* ??? Investigate.  */
+
+/* The alignment (log base 2) to put in front of LABEL, which follows
+   a BARRIER.  */
+
+/* #define LABEL_ALIGN_AFTER_BARRIER(LABEL) */
+
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+
+/* #define LOOP_ALIGN(LABEL) */
+
+/* Define this macro if `ASM_OUTPUT_SKIP' should not be used in the text
+   section because it fails put zeros in the bytes that are skipped.  */
+
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* A C statement to output to the stdio stream STREAM an assembler command to
+   advance the location counter to a multiple of 2 to the POWER bytes.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf (STREAM, "\t.align %d\n", 1<<(POWER))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+/* This is handled in sysv4.h.  */
+
+
+/* Specific Options for DBX Output.  */
+
+/* This is handled by dbxelf.h.  */
+
+
+/* Open ended Hooks for DBX Output.  */
+
+/* Likewise.  */
+
+
+/* File names in DBX format.  */
+
+/* Likewise.  */
+
+
+/* Macros for SDB and Dwarf Output.  */
+
+/* Define this macro if GCC should produce dwarf version 2 format debugging
+   output in response to the `-g' option.  */
+
+#define DWARF2_DEBUGGING_INFO 1
+
+#define DWARF2_ASM_LINE_DEBUG_INFO (TARGET_DWARF2_ASM)
+
+/* Use tags for debug info labels, so that they don't break instruction
+   bundles.  This also avoids getting spurious DV warnings from the
+   assembler.  This is similar to (*targetm.asm_out.internal_label), except that we
+   add brackets around the label.  */
+
+#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \
+  fprintf (FILE, TARGET_GNU_AS ? "[.%s%d:]\n" : ".%s%d:\n", PREFIX, NUM)
+
+/* Use section-relative relocations for debugging offsets.  Unlike other
+   targets that fake this by putting the section VMA at 0, IA-64 has
+   proper relocations for them.  */
+#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL, SECTION)	\
+  do {								\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);			\
+    fputs ("@secrel(", FILE);					\
+    assemble_name (FILE, LABEL);				\
+    fputc (')', FILE);						\
+  } while (0)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    fputs ("@pcrel(", FILE);				\
+    assemble_name (FILE, LABEL);			\
+    fputc (')', FILE);					\
+  } while (0)
+
+/* Register Renaming Parameters.  */
+
+/* A C expression that is nonzero if hard register number REGNO2 can be
+   considered for use as a rename register for REGNO1 */
+
+#define HARD_REGNO_RENAME_OK(REGNO1,REGNO2) \
+  ia64_hard_regno_rename_ok((REGNO1), (REGNO2))
+
+
+/* Miscellaneous Parameters.  */
+
+/* Flag to mark data that is in the small address area (addressable
+   via "addl", that is, within a 2MByte offset of 0.  */
+#define SYMBOL_FLAG_SMALL_ADDR		(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_SMALL_ADDR_P(X)	\
+	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SMALL_ADDR) != 0)
+
+/* An alias for a machine mode name.  This is the machine mode that elements of
+   a jump-table should have.  */
+
+#define CASE_VECTOR_MODE ptr_mode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.  */
+
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this macro if operations between registers with integral mode smaller
+   than a word are always performed on the entire register.  */
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Define this macro to be a C expression indicating when insns that read
+   memory in MODE, an integral mode narrower than a word, set the bits outside
+   of MODE to be either the sign-extension or the zero-extension of the data
+   read.  */
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* The maximum number of bytes that a single instruction can move quickly from
+   memory to memory.  */
+#define MOVE_MAX 8
+
+/* A C expression which is nonzero if on this machine it is safe to "convert"
+   an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller
+   than INPREC) by merely operating on it as if it had only OUTPREC bits.  */
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A C expression describing the value returned by a comparison operator with
+   an integral mode and stored by a store-flag instruction (`sCOND') when the
+   condition is true.  */
+
+/* ??? Investigate using STORE_FLAG_VALUE of -1 instead of 1.  */
+
+/* An alias for the machine mode for pointers.  */
+
+/* ??? This would change if we had ILP32 support.  */
+
+#define Pmode DImode
+
+/* An alias for the machine mode used for memory references to functions being
+   called, in `call' RTL expressions.  */
+
+#define FUNCTION_MODE Pmode
+
+/* A C expression for the maximum number of instructions to execute via
+   conditional execution instructions instead of a branch.  A value of
+   BRANCH_COST+1 is the default if the machine does not use
+   cc0, and 1 if it does use cc0.  */
+/* ??? Investigate.  */
+#define MAX_CONDITIONAL_EXECUTE 12
+
+extern int ia64_final_schedule;
+
+#define TARGET_UNWIND_TABLES_DEFAULT true
+
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 15 : INVALID_REGNUM)
+
+/* This function contains machine specific function data.  */
+struct GTY(()) machine_function
+{
+  /* The new stack pointer when unwinding from EH.  */
+  rtx ia64_eh_epilogue_sp;
+
+  /* The new bsp value when unwinding from EH.  */
+  rtx ia64_eh_epilogue_bsp;
+
+  /* The GP value save register.  */
+  rtx ia64_gp_save;
+
+  /* The number of varargs registers to save.  */
+  int n_varargs;
+
+  /* The number of the next unwind state to copy.  */
+  int state_num;
+};
+
+#define DONT_USE_BUILTIN_SETJMP
+
+/* Output any profiling code before the prologue.  */
+
+#undef  PROFILE_BEFORE_PROLOGUE
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Initialize library function table. */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_init_libfuncs
+
+
+/* Switch on code for querying unit reservations.  */
+#define CPU_UNITS_QUERY 1
+
+/* End of ia64.h */
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
new file mode 100644
index 000000000..c258ca5b7
--- /dev/null
+++ b/gcc/config/ia64/ia64.md
@@ -0,0 +1,5188 @@
+;; IA-64 Machine description template
+;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+;; 2009, 2010 Free Software Foundation, Inc.
+;; Contributed by James E. Wilson <wilson@cygnus.com> and
+;;		  David Mosberger <davidm@hpl.hp.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; ??? register_operand accepts (subreg:DI (mem:SI X)) which forces later
+;; reload.  This will be fixed once scheduling support is turned on.
+
+;; ??? Optimize for post-increment addressing modes.
+
+;; ??? fselect is not supported, because there is no integer register
+;; equivalent.
+
+;; ??? fp abs/min/max instructions may also work for integer values.
+
+;; ??? Would a predicate_reg_operand predicate be useful?  The HP one is buggy,
+;; it assumes the operand is a register and takes REGNO of it without checking.
+
+;; ??? Would a branch_reg_operand predicate be useful?  The HP one is buggy,
+;; it assumes the operand is a register and takes REGNO of it without checking.
+
+;; ??? Go through list of documented named patterns and look for more to
+;; implement.
+
+;; ??? Go through instruction manual and look for more instructions that
+;; can be emitted.
+
+;; ??? Add function unit scheduling info for Itanium (TM) processor.
+
+;; ??? Need a better way to describe alternate fp status registers.
+
+(define_constants
+  [; Relocations
+   (UNSPEC_LTOFF_DTPMOD		0)
+   (UNSPEC_LTOFF_DTPREL		1)
+   (UNSPEC_DTPREL		2)
+   (UNSPEC_LTOFF_TPREL		3)
+   (UNSPEC_TPREL		4)
+   (UNSPEC_DTPMOD		5)
+
+   (UNSPEC_LD_BASE		9)
+   (UNSPEC_GR_SPILL		10)
+   (UNSPEC_GR_RESTORE		11)
+   (UNSPEC_FR_SPILL		12)
+   (UNSPEC_FR_RESTORE		13)
+   (UNSPEC_FR_RECIP_APPROX	14)
+   (UNSPEC_PRED_REL_MUTEX	15)
+   (UNSPEC_GETF_EXP		16)
+   (UNSPEC_PIC_CALL		17)
+   (UNSPEC_MF			18)
+   (UNSPEC_CMPXCHG_ACQ		19)
+   (UNSPEC_FETCHADD_ACQ		20)
+   (UNSPEC_BSP_VALUE		21)
+   (UNSPEC_FLUSHRS		22)
+   (UNSPEC_BUNDLE_SELECTOR	23)
+   (UNSPEC_ADDP4		24)
+   (UNSPEC_PROLOGUE_USE		25)
+   (UNSPEC_RET_ADDR		26)
+   (UNSPEC_SETF_EXP             27)
+   (UNSPEC_FR_SQRT_RECIP_APPROX 28)
+   (UNSPEC_SHRP			29)
+   (UNSPEC_COPYSIGN		30)
+   (UNSPEC_VECT_EXTR		31)
+   (UNSPEC_LDA                  40)
+   (UNSPEC_LDS                  41)
+   (UNSPEC_LDS_A                42)
+   (UNSPEC_LDSA                 43)
+   (UNSPEC_LDCCLR               44)
+   (UNSPEC_LDCNC                45)
+   (UNSPEC_CHKACLR              46)
+   (UNSPEC_CHKANC               47)
+   (UNSPEC_CHKS                 48)
+   (UNSPEC_FR_RECIP_APPROX_RES  49)
+   (UNSPEC_FR_SQRT_RECIP_APPROX_RES 50)
+  ])
+
+(define_constants
+  [(UNSPECV_ALLOC		0)
+   (UNSPECV_BLOCKAGE		1)
+   (UNSPECV_INSN_GROUP_BARRIER	2)
+   (UNSPECV_BREAK		3)
+   (UNSPECV_SET_BSP		4)
+   (UNSPECV_PSAC_ALL		5)	; pred.safe_across_calls
+   (UNSPECV_PSAC_NORMAL		6)
+   (UNSPECV_SETJMP_RECEIVER	7)
+   (UNSPECV_GOTO_RECEIVER	8)
+  ])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in ia64.h.
+(define_attr "cpu" "itanium,itanium2"
+  (const (symbol_ref "((enum attr_cpu) ia64_tune)")))
+
+;; Instruction type.  This primarily determines how instructions can be
+;; packed in bundles, and secondarily affects scheduling to function units.
+
+;; A alu, can go in I or M syllable of a bundle
+;; I integer
+;; M memory
+;; F floating-point
+;; B branch
+;; L long immediate, takes two syllables
+;; S stop bit
+
+;; ??? Should not have any pattern with type unknown.  Perhaps add code to
+;; check this in md_reorg?  Currently use unknown for patterns which emit
+;; multiple instructions, patterns which emit 0 instructions, and patterns
+;; which emit instruction that can go in any slot (e.g. nop).
+
+(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,
+	fldp,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,
+	ld,chk_s_i,chk_s_f,chk_a,long_i,mmalua,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,
+        st,syst_m0, syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,
+        nop_b,nop_f,nop_i,nop_m,nop_x,lfetch,pre_cycle"
+  (const_string "unknown"))
+
+;; chk_s_i has an I and an M form; use type A for convenience.
+(define_attr "type" "unknown,A,I,M,F,B,L,X,S"
+  (cond [(eq_attr "itanium_class" "ld,st,fld,fldp,stf,sem,nop_m") (const_string "M")
+	 (eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
+	 (eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
+	 (eq_attr "itanium_class" "lfetch") (const_string "M")
+         (eq_attr "itanium_class" "chk_s_f,chk_a") (const_string "M")
+	 (eq_attr "itanium_class" "chk_s_i,ialu,icmp,ilog,mmalua")
+	   (const_string "A")
+	 (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
+	 (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
+	 (eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I")
+	 (eq_attr "itanium_class" "frpr,topr,ishf,xtd,tbit") (const_string "I")
+	 (eq_attr "itanium_class" "mmmul,mmshf,mmshfi,nop_i") (const_string "I")
+	 (eq_attr "itanium_class" "br,scall,nop_b") (const_string "B")
+	 (eq_attr "itanium_class" "stop_bit") (const_string "S")
+	 (eq_attr "itanium_class" "nop_x") (const_string "X")
+	 (eq_attr "itanium_class" "long_i") (const_string "L")]
+	(const_string "unknown")))
+
+(define_attr "itanium_requires_unit0" "no,yes"
+  (cond [(eq_attr "itanium_class" "syst_m0,sem,frfr,rse_m") (const_string "yes")
+	 (eq_attr "itanium_class" "toar_m,frar_m") (const_string "yes")
+	 (eq_attr "itanium_class" "frbr,tobr,mmmul") (const_string "yes")
+	 (eq_attr "itanium_class" "tbit,ishf,topr,frpr") (const_string "yes")
+	 (eq_attr "itanium_class" "toar_i,frar_i") (const_string "yes")
+	 (eq_attr "itanium_class" "fmisc,fcmp") (const_string "yes")]
+	(const_string "no")))
+
+;; Predication.  True iff this instruction can be predicated.
+
+(define_attr "predicable" "no,yes" (const_string "yes"))
+
+;; Empty.  True iff this insn does not generate any code.
+
+(define_attr "empty" "no,yes" (const_string "no"))
+
+;; True iff this insn must be the first insn of an instruction group.
+;; This is true for the alloc instruction, and will also be true of others
+;; when we have full intrinsics support.
+
+(define_attr "first_insn" "no,yes" (const_string "no"))
+
+(define_attr "data_speculative" "no,yes" (const_string "no"))
+
+(define_attr "control_speculative" "no,yes" (const_string "no"))
+
+(define_attr "check_load" "no,yes" (const_string "no"))
+
+(define_attr "speculable1" "no,yes" (const_string "no"))
+
+(define_attr "speculable2" "no,yes" (const_string "no"))
+
+;; DFA descriptions of ia64 processors used for insn scheduling and
+;; bundling.
+
+(automata_option "ndfa")
+
+;; Uncomment the following line to output automata for debugging.
+;; (automata_option "v")
+
+(automata_option "w")
+
+(include "itanium2.md")
+
+;; Mode iterators
+
+; Used for truncations from XFmode.
+(define_mode_iterator MODE_SDF [SF DF])
+
+(define_mode_attr suffix [
+  (SF ".s")
+  (DF ".d")
+  (XF "")
+  ])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+;; Set of a single predicate register.  This is only used to implement
+;; pr-to-pr move and complement.
+
+(define_insn "movcci"
+  [(set (match_operand:CCI 0 "destination_operand" "=c,c,?c,?*r, c,*r,*m,*r")
+	(match_operand:CCI 1 "move_operand"        " O,n, c,  c,*r,*m,*r,*r"))]
+  ""
+  "@
+   cmp.ne %0, p0 = r0, r0
+   cmp.eq %0, p0 = r0, r0
+   (%1) cmp.eq.unc %0, p0 = r0, r0
+   #
+   tbit.nz %0, p0 = %1, 0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %1%P0
+   mov %0 = %1"
+  [(set_attr "itanium_class" "icmp,icmp,icmp,unknown,tbit,ld,st,ialu")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:CCI 0 "register_operand" "")
+	(match_operand:CCI 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 2) (const_int 0))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+     (set (match_dup 2) (const_int 1)))]
+  "operands[2] = gen_rtx_REG (BImode, REGNO (operands[0]));
+   operands[3] = gen_rtx_REG (BImode, REGNO (operands[1]));")
+
+(define_insn "movbi"
+  [(set (match_operand:BI 0 "destination_operand" "=c,c,?c,?*r, c,*r,*r,*m,*r")
+	(match_operand:BI 1 "move_operand"        " O,n, c,  c,*r, n,*m,*r,*r"))]
+  ""
+  "@
+   cmp.ne %0, %I0 = r0, r0
+   cmp.eq %0, %I0 = r0, r0
+   #
+   #
+   tbit.nz %0, %I0 = %1, 0
+   adds %0 = %1, r0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %1%P0
+   mov %0 = %1"
+  [(set_attr "itanium_class" "icmp,icmp,unknown,unknown,tbit,ialu,ld,st,ialu")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no,  no,     no,     no,  no, yes,no,no")])
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(match_operand:BI 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(match_operand:BI 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 0) (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  "operands[2] = gen_rtx_REG (CCImode, REGNO (operands[0]));
+   operands[3] = gen_rtx_REG (CCImode, REGNO (operands[0]) + 1);
+   operands[4] = gen_rtx_REG (CCImode, REGNO (operands[1]));
+   operands[5] = gen_rtx_REG (CCImode, REGNO (operands[1]) + 1);")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "destination_operand" "=r,r,r, m, r,*f,*f")
+	(match_operand:QI 1 "move_operand"        "rO,J,m,rO,*f,rO,*f"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %r1
+   addl %0 = %1, r0
+   ld1%O1 %0 = %1%P1
+   st1%Q0 %0 = %r1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %r1
+   mov %0 = %1"
+  [(set_attr "itanium_class" "ialu,ialu,ld,st,frfr,tofr,fmisc")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no, yes,no,no,  no,  no")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "destination_operand" "=r,r,r, m, r,*f,*f")
+	(match_operand:HI 1 "move_operand"        "rO,J,m,rO,*f,rO,*f"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %r1
+   addl %0 = %1, r0
+   ld2%O1 %0 = %1%P1
+   st2%Q0 %0 = %r1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %r1
+   mov %0 = %1"
+  [(set_attr "itanium_class" "ialu,ialu,ld,st,frfr,tofr,fmisc")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no, yes,no,no,  no,  no")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "destination_operand" "=r,r,r,r,r, m, r,*f,*f, r,*d")
+	(match_operand:SI 1 "move_operand"        "rO,J,j,i,m,rO,*f,rO,*f,*d,rK"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+  mov %0 = %r1
+  addl %0 = %1, r0
+  addp4 %0 = %1 - 0x100000000, r0
+  movl %0 = %1
+  ld4%O1 %0 = %1%P1
+  st4%Q0 %0 = %r1%P0
+  getf.sig %0 = %1
+  setf.sig %0 = %r1
+  mov %0 = %1
+  mov %0 = %1
+  mov %0 = %r1"
+  ;; frar_m, toar_m ??? why not frar_i and toar_i
+  [(set_attr "itanium_class" "ialu,ialu,ialu,long_i,ld,st,frfr,tofr,fmisc,frar_m,toar_m")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no,  no,  no,   yes,no,no,  no,  no,   no,    no")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movdi_internal"
+  [(set (match_operand:DI 0 "destination_operand"
+		    "=r,r,r,r,r, m, r,*f,*f,*f, Q, r,*b, r,*e, r,*d, r,*c")
+	(match_operand:DI 1 "move_operand"
+		    "rO,JT,j,i,m,rO,*f,rO,*f, Q,*f,*b,rO,*e,rK,*d,rK,*c,rO"))]
+  "ia64_move_ok (operands[0], operands[1])"
+{
+  static const char * const alt[] = {
+    "%,mov %0 = %r1",
+    "%,addl %0 = %1, r0",
+    "%,addp4 %0 = %1 - 0x100000000, r0",
+    "%,movl %0 = %1",
+    "%,ld8%O1 %0 = %1%P1",
+    "%,st8%Q0 %0 = %r1%P0",
+    "%,getf.sig %0 = %1",
+    "%,setf.sig %0 = %r1",
+    "%,mov %0 = %1",
+    "%,ldf8 %0 = %1%P1",
+    "%,stf8 %0 = %1%P0",
+    "%,mov %0 = %1",
+    "%,mov %0 = %r1",
+    "%,mov %0 = %1",
+    "%,mov %0 = %1",
+    "%,mov %0 = %1",
+    "%,mov %0 = %1",
+    "mov %0 = pr",
+    "mov pr = %1, -1"
+  };
+
+  gcc_assert (which_alternative != 2 || TARGET_NO_PIC
+              || !symbolic_operand (operands[1], VOIDmode));
+
+  return alt[which_alternative];
+}
+  [(set_attr "itanium_class" "ialu,ialu,ialu,long_i,ld,st,frfr,tofr,fmisc,fld,stf,frbr,tobr,frar_i,toar_i,frar_m,toar_m,frpr,topr")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,  no,  no,  no,   yes,no,no,  no,  no,   yes,no, no,  no,  no,    no,    no,    no,    no,  no")])
+
+(define_mode_iterator MODE [BI QI HI SI DI SF DF XF TI])
+(define_mode_iterator MODE_FOR_CMP [BI SI DI SF DF XF (TF "TARGET_HPUX")])
+(define_mode_iterator MODE_FOR_EXTEND [QI HI SI])
+
+(define_mode_attr output_a [
+  (BI "ld1.a %0 = %1%P1")
+  (QI "ld1.a %0 = %1%P1")
+  (HI "ld2.a %0 = %1%P1")
+  (SI "ld4.a %0 = %1%P1")
+  (DI
+   "@
+    ld8.a %0 = %1%P1
+    ldf8.a %0 = %1%P1")
+  (SF
+   "@
+    ldfs.a %0 = %1%P1
+    ld4.a %0 = %1%P1")
+  (DF
+   "@
+    ldfd.a %0 = %1%P1
+    ld8.a %0 = %1%P1")
+  (XF "ldfe.a %0 = %1%P1")
+  (TI "ldfp8.a %X0 = %1%P1")])
+
+(define_mode_attr output_s [
+  (BI "ld1.s %0 = %1%P1")
+  (QI "ld1.s %0 = %1%P1")
+  (HI "ld2.s %0 = %1%P1")
+  (SI "ld4.s %0 = %1%P1")
+  (DI
+   "@
+    ld8.s %0 = %1%P1
+    ldf8.s %0 = %1%P1")
+  (SF
+   "@
+    ldfs.s %0 = %1%P1
+    ld4.s %0 = %1%P1")
+  (DF
+   "@
+    ldfd.s %0 = %1%P1
+    ld8.s %0 = %1%P1")
+  (XF "ldfe.s %0 = %1%P1")
+  (TI "ldfp8.s %X0 = %1%P1")])
+
+(define_mode_attr output_sa [
+  (BI "ld1.sa %0 = %1%P1")
+  (QI "ld1.sa %0 = %1%P1")
+  (HI "ld2.sa %0 = %1%P1")
+  (SI "ld4.sa %0 = %1%P1")
+  (DI
+   "@
+    ld8.sa %0 = %1%P1
+    ldf8.sa %0 = %1%P1")
+  (SF
+   "@
+    ldfs.sa %0 = %1%P1
+    ld4.sa %0 = %1%P1")
+  (DF
+   "@
+    ldfd.sa %0 = %1%P1
+    ld8.sa %0 = %1%P1")
+  (XF "ldfe.sa %0 = %1%P1")
+  (TI "ldfp8.sa %X0 = %1%P1")])
+
+(define_mode_attr output_c_clr [
+  (BI "ld1.c.clr%O1 %0 = %1%P1")
+  (QI "ld1.c.clr%O1 %0 = %1%P1")
+  (HI "ld2.c.clr%O1 %0 = %1%P1")
+  (SI "ld4.c.clr%O1 %0 = %1%P1")
+  (DI
+   "@
+    ld8.c.clr%O1 %0 = %1%P1
+    ldf8.c.clr %0 = %1%P1")
+  (SF
+   "@
+    ldfs.c.clr %0 = %1%P1
+    ld4.c.clr%O1 %0 = %1%P1")
+  (DF
+   "@
+    ldfd.c.clr %0 = %1%P1
+    ld8.c.clr%O1 %0 = %1%P1")
+  (XF "ldfe.c.clr %0 = %1%P1")
+  (TI "ldfp8.c.clr %X0 = %1%P1")])
+
+(define_mode_attr output_c_nc [
+  (BI "ld1.c.nc%O1 %0 = %1%P1")
+  (QI "ld1.c.nc%O1 %0 = %1%P1")
+  (HI "ld2.c.nc%O1 %0 = %1%P1")
+  (SI "ld4.c.nc%O1 %0 = %1%P1")
+  (DI
+   "@
+    ld8.c.nc%O1 %0 = %1%P1
+    ldf8.c.nc %0 = %1%P1")
+  (SF
+   "@
+    ldfs.c.nc %0 = %1%P1
+    ld4.c.nc%O1 %0 = %1%P1")
+  (DF
+   "@
+    ldfd.c.nc %0 = %1%P1
+    ld8.c.nc%O1 %0 = %1%P1")
+  (XF "ldfe.c.nc %0 = %1%P1")
+  (TI "ldfp8.c.nc %X0 = %1%P1")])
+
+(define_mode_attr ld_reg_constr [(BI "=*r") (QI "=r") (HI "=r") (SI "=r") (DI "=r,*f") (SF "=f,*r") (DF "=f,*r") (XF "=f") (TI "=*x")])
+(define_mode_attr ldc_reg_constr [(BI "+*r") (QI "+r") (HI "+r") (SI "+r") (DI "+r,*f") (SF "+f,*r") (DF "+f,*r") (XF "+f") (TI "+*x")])
+(define_mode_attr chk_reg_constr [(BI "*r") (QI "r") (HI "r") (SI "r") (DI "r,*f") (SF "f,*r") (DF "f,*r") (XF "f") (TI "*x")])
+
+(define_mode_attr mem_constr [(BI "*m") (QI "m") (HI "m") (SI "m") (DI "m,Q") (SF "Q,m") (DF "Q,m") (XF "m") (TI "Q")])
+
+;; Define register predicate prefix.
+;; We can generate speculative loads only for general and fp registers - this
+;; is constrained in ia64.c: ia64_speculate_insn ().
+(define_mode_attr reg_pred_prefix [(BI "gr") (QI "gr") (HI "gr") (SI "gr") (DI "grfr") (SF "grfr") (DF "grfr") (XF "fr") (TI "fr")])
+
+(define_mode_attr ld_class [(BI "ld") (QI "ld") (HI "ld") (SI "ld") (DI "ld,fld") (SF "fld,ld") (DF "fld,ld") (XF "fld") (TI "fldp")])
+(define_mode_attr chka_class [(BI "chk_a") (QI "chk_a") (HI "chk_a") (SI "chk_a") (DI "chk_a,chk_a") (SF "chk_a,chk_a") (DF "chk_a,chk_a") (XF "chk_a") (TI "chk_a")])
+(define_mode_attr chks_class [(BI "chk_s_i") (QI "chk_s_i") (HI "chk_s_i") (SI "chk_s_i") (DI "chk_s_i,chk_s_f") (SF "chk_s_f,chk_s_i") (DF "chk_s_f,chk_s_i") (XF "chk_s_f") (TI "chk_s_i")])
+
+(define_mode_attr attr_yes [(BI "yes") (QI "yes") (HI "yes") (SI "yes") (DI "yes,yes") (SF "yes,yes") (DF "yes,yes") (XF "yes") (TI "yes")])
+
+(define_insn "mov<mode>_advanced"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDA))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_a>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_advanced"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDA)))]
+  ""
+  "<output_a>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_speculative"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_s>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_speculative"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS)))]
+  ""
+  "<output_s>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_speculative_advanced"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDSA))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_speculative_a"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ld_reg_constr>")
+	(unspec:MODE [(match_operand:MODE 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS_A))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_speculative_advanced"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDSA)))]
+  ""
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_speculative_a"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI (unspec:MODE_FOR_EXTEND [(match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>")] UNSPEC_LDS_A)))]
+  ""
+  "<output_sa>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "data_speculative" "<attr_yes>")
+   (set_attr "control_speculative" "<attr_yes>")])
+
+(define_insn "mov<mode>_clr"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ldc_reg_constr>")
+	(if_then_else:MODE (ne (unspec [(match_dup 0)] UNSPEC_LDCCLR) (const_int 0))
+			   (match_operand:MODE 1 "memory_operand" "<mem_constr>")
+			   (match_dup 0)))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_c_clr>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "mov<mode>_nc"
+  [(set (match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<ldc_reg_constr>")
+	(if_then_else:MODE (ne (unspec [(match_dup 0)] UNSPEC_LDCNC) (const_int 0))
+			   (match_operand:MODE 1 "memory_operand" "<mem_constr>")
+			   (match_dup 0)))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "<output_c_nc>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_clr"
+  [(set (match_operand:DI 0 "gr_register_operand" "+r")
+	(if_then_else:DI (ne (unspec [(match_dup 0)] UNSPEC_LDCCLR) (const_int 0))
+			 (zero_extend:DI (match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>"))
+			 (match_dup 0)))]
+  ""
+  "<output_c_clr>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "zero_extend<mode>di2_nc"
+  [(set (match_operand:DI 0 "gr_register_operand" "+r")
+	(if_then_else:DI (ne (unspec [(match_dup 0)] UNSPEC_LDCNC) (const_int 0))
+			 (zero_extend:DI (match_operand:MODE_FOR_EXTEND 1 "memory_operand" "<mem_constr>"))
+			 (match_dup 0)))]
+  ""
+  "<output_c_nc>"
+  [(set_attr "itanium_class" "<ld_class>")
+   (set_attr "check_load" "<attr_yes>")])
+
+(define_insn "advanced_load_check_clr_<mode>"
+  [(set (pc)
+        (if_then_else (ne (unspec [(match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<chk_reg_constr>")] UNSPEC_CHKACLR) (const_int 0))
+                      (pc)
+                      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "chk.a.clr %0, %l1"
+  [(set_attr "itanium_class" "<chka_class>")])
+
+(define_insn "advanced_load_check_nc_<mode>"
+  [(set (pc)
+        (if_then_else (ne (unspec [(match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<chk_reg_constr>")] UNSPEC_CHKANC) (const_int 0))
+                      (pc)
+                      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "chk.a.clr %0, %l1"
+  [(set_attr "itanium_class" "<chka_class>")])
+
+(define_insn "speculation_check_<mode>"
+  [(set (pc) 
+        (if_then_else (ne (unspec [(match_operand:MODE 0 "<reg_pred_prefix>_register_operand" "<chk_reg_constr>")] UNSPEC_CHKS) (const_int 0))
+                      (pc)
+                      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "chk.s %0, %l1"
+  [(set_attr "itanium_class" "<chks_class>")])
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "symbolic_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  if (ia64_expand_load_address (operands[0], operands[1]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "load_fptr"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_dup 2) (match_operand 1 "function_operand" "")))
+   (set (match_dup 0) (match_dup 3))]
+  "reload_completed"
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_fptr_internal1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand 1 "function_operand" "s")))]
+  "reload_completed"
+  "addl %0 = @ltoff(@fptr(%1)), gp"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "load_gprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (reg:DI 1) (match_operand 1 "sdata_symbolic_operand" "s")))]
+  "reload_completed"
+  "addl %0 = @gprel(%1), gp"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*gprel64_offset"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "symbolic_operand" "") (reg:DI 1)))]
+  "reload_completed"
+  "movl %0 = @gprel(%1)"
+  [(set_attr "itanium_class" "long_i")])
+
+(define_expand "load_gprel64"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "symbolic_operand" "") (match_dup 2)))
+   (set (match_dup 0)
+	(plus:DI (match_dup 2) (match_dup 0)))]
+  "reload_completed"
+{
+  operands[2] = pic_offset_table_rtx;
+})
+
+;; This is used as a placeholder for the return address during early
+;; compilation.  We won't know where we've placed this until during
+;; reload, at which point it can wind up in b0, a general register,
+;; or memory.  The only safe destination under these conditions is a
+;; general register.
+
+(define_insn_and_split "*movdi_ret_addr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_RET_ADDR))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_return_addr_rtx (operands[0]);
+  DONE;
+}
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*load_symptr_high"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (high:DI (match_operand 1 "got_symbolic_operand" "s"))
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "reload_completed"
+{
+  if (HAVE_AS_LTOFFX_LDXMOV_RELOCS)
+    return "%,addl %0 = @ltoffx(%1), %2";
+  else
+    return "%,addl %0 = @ltoff(%1), %2";
+}
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*load_symptr_low"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand 2 "got_symbolic_operand" "s")))]
+  "reload_completed"
+{
+  if (HAVE_AS_LTOFFX_LDXMOV_RELOCS)
+    return "%,ld8.mov %0 = [%1], %2";
+  else
+    return "%,ld8 %0 = [%1]";
+}
+  [(set_attr "itanium_class" "ld")])
+
+(define_insn_and_split "load_dtpmod"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_DTPMOD))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (unspec:DI [(match_dup 1)] UNSPEC_LTOFF_DTPMOD)
+		 (match_dup 2)))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_ltoff_dtpmod"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+			    UNSPEC_LTOFF_DTPMOD)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "reload_completed"
+  "addl %0 = @ltoff(@dtpmod(%1)), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "load_dtprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  ""
+  "")
+
+(define_insn "*load_dtprel64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  "TARGET_TLS64"
+  "movl %0 = @dtprel(%1)"
+  [(set_attr "itanium_class" "long_i")])
+
+(define_insn "*load_dtprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  ""
+  "addl %0 = @dtprel(%1), r0"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn_and_split "*load_dtprel_gd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_DTPREL))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (unspec:DI [(match_dup 1)] UNSPEC_LTOFF_DTPREL)
+		 (match_dup 2)))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_ltoff_dtprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+			    UNSPEC_LTOFF_DTPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  ""
+  "addl %0 = @ltoff(@dtprel(%1)), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "add_dtprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+			    UNSPEC_DTPREL)
+		 (match_operand:DI 2 "register_operand" "")))]
+  "!TARGET_TLS64"
+  "")
+
+(define_insn "*add_dtprel14"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+			    UNSPEC_DTPREL)
+		 (match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_TLS14"
+  "adds %0 = @dtprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*add_dtprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "ld_tls_symbolic_operand" "")]
+			    UNSPEC_DTPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "TARGET_TLS22"
+  "addl %0 = @dtprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "load_tprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unspec:DI [(match_operand 1 "tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  ""
+  "")
+
+(define_insn "*load_tprel64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  "TARGET_TLS64"
+  "movl %0 = @tprel(%1)"
+  [(set_attr "itanium_class" "long_i")])
+
+(define_insn "*load_tprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  ""
+  "addl %0 = @tprel(%1), r0"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn_and_split "*load_tprel_ie"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand 1 "ie_tls_symbolic_operand" "")]
+		   UNSPEC_TPREL))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (unspec:DI [(match_dup 1)] UNSPEC_LTOFF_TPREL)
+		 (match_dup 2)))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = pic_offset_table_rtx;
+  operands[3] = gen_const_mem (DImode, operands[0]);
+})
+
+(define_insn "*load_ltoff_tprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "ie_tls_symbolic_operand" "")]
+			    UNSPEC_LTOFF_TPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  ""
+  "addl %0 = @ltoff(@tprel(%1)), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "add_tprel"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+			    UNSPEC_TPREL)
+		 (match_operand:DI 2 "register_operand" "")))]
+  "!TARGET_TLS64"
+  "")
+
+(define_insn "*add_tprel14"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+			    UNSPEC_TPREL)
+		 (match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_TLS14"
+  "adds %0 = @tprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*add_tprel22"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (unspec:DI [(match_operand 1 "le_tls_symbolic_operand" "")]
+			    UNSPEC_TPREL)
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "TARGET_TLS22"
+  "addl %0 = @tprel(%1), %2"
+  [(set_attr "itanium_class" "ialu")])
+
+;; With no offsettable memory references, we've got to have a scratch
+;; around to play with the second word.  However, in order to avoid a
+;; reload nightmare we lie, claim we don't need one, and fix it up
+;; in ia64_split_tmode_move.
+(define_expand "movti"
+  [(set (match_operand:TI 0 "general_operand" "")
+	(match_operand:TI 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn_and_split "movti_internal"
+  [(set (match_operand:TI 0 "destination_operand" "=r,   *fm,*x,*f,  Q")
+	(match_operand:TI 1 "general_operand"     "r*fim,r,  Q, *fOQ,*f"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   #
+   #
+   ldfp8 %X0 = %1%P1
+   #
+   #"
+  "reload_completed && !ia64_load_pair_ok(operands[0], operands[1])"
+  [(const_int 0)]
+{
+  ia64_split_tmode_move (operands);
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown,unknown,fldp,unknown,unknown")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,     no,     yes, no,     no")])
+
+;; Floating Point Moves
+;;
+;; Note - Patterns for SF mode moves are compulsory, but
+;; patterns for DF are optional, as GCC can synthesize them.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movsf_internal"
+  [(set (match_operand:SF 0 "destination_operand" "=f,f, Q,*r, f,*r,*r, m,*r")
+	(match_operand:SF 1 "general_operand"     "fG,Q,fG,fG,*r,*r, m,*r, F"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldfs %0 = %1%P1
+   stfs %0 = %F1%P0
+   getf.s %0 = %F1
+   setf.s %0 = %1
+   mov %0 = %1
+   ld4%O1 %0 = %1%P1
+   st4%Q0 %0 = %1%P0
+   movl %0 = %G1"
+  [(set_attr "itanium_class" "fmisc,fld,stf,frfr,tofr,ialu,ld,st,long_i")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,   yes,no, no,  no,  no, yes,no,no")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "movdf_internal"
+  [(set (match_operand:DF 0 "destination_operand" "=f,f, Q,*r, f,*r,*r, m,*r")
+	(match_operand:DF 1 "general_operand"     "fG,Q,fG,fG,*r,*r, m,*r, F"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldfd %0 = %1%P1
+   stfd %0 = %F1%P0
+   getf.d %0 = %F1
+   setf.d %0 = %1
+   mov %0 = %1
+   ld8%O1 %0 = %1%P1
+   st8%Q0 %0 = %1%P0
+   movl %0 = %G1"
+  [(set_attr "itanium_class" "fmisc,fld,stf,frfr,tofr,ialu,ld,st,long_i")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,   yes,no, no,  no,  no, yes,no,no")])
+
+;; With no offsettable memory references, we've got to have a scratch
+;; around to play with the second word if the variable winds up in GRs.
+(define_expand "movxf"
+  [(set (match_operand:XF 0 "general_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  ""
+{
+  if (ia64_expand_movxf_movrf (XFmode, operands))
+    DONE;
+})
+
+;; ??? There's no easy way to mind volatile acquire/release semantics.
+
+(define_insn "movxf_internal"
+  [(set (match_operand:XF 0 "destination_operand" "=f,f, m")
+	(match_operand:XF 1 "general_operand"     "fG,m,fG"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldfe %0 = %1%P1
+   stfe %0 = %F1%P0"
+  [(set_attr "itanium_class" "fmisc,fld,stf")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no,   yes,no")])
+
+;; Same as for movxf, but for RFmode.
+(define_expand "movrf"
+  [(set (match_operand:RF 0 "general_operand" "")
+	(match_operand:RF 1 "general_operand" ""))]
+  ""
+{
+  if (ia64_expand_movxf_movrf (RFmode, operands))
+    DONE;
+})
+
+(define_insn "*movrf_internal"
+  [(set (match_operand:RF 0 "destination_operand" "=f,f, m")
+	(match_operand:RF 1 "general_operand"     "fG,m,fG"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %F1
+   ldf.fill %0 = %1%P1
+   stf.spill %0 = %F1%P0"
+  [(set_attr "itanium_class" "fmisc,fld,stf")])
+
+;; Better code generation via insns that deal with TFmode register pairs
+;; directly.  Same concerns apply as for TImode.
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "general_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn_and_split "*movtf_internal"
+  [(set (match_operand:TF 0 "destination_operand"  "=r,r,m")
+	(match_operand:TF 1 "general_operand"      "ri,m,r"))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_tmode_move (operands);
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown")
+   (set_attr "predicable" "no")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+;; Signed conversions from a smaller integer to a larger integer
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "gr_register_operand" "r")))]
+  ""
+  "sxt1 %0 = %1"
+  [(set_attr "itanium_class" "xtd")])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "gr_register_operand" "r")))]
+  ""
+  "sxt2 %0 = %1"
+  [(set_attr "itanium_class" "xtd")])
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f")
+	(sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))]
+  ""
+  "@
+   sxt4 %0 = %1
+   fsxt.r %0 = %1, %1"
+  [(set_attr "itanium_class" "xtd,fmisc")])
+
+;; Unsigned conversions from a smaller integer to a larger integer
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "gr_nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   zxt1 %0 = %1
+   ld1%O1 %0 = %1%P1"
+  [(set_attr "itanium_class" "xtd,ld")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no, yes")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "gr_nonimmediate_operand" "r,m")))]
+  ""
+  "@
+   zxt2 %0 = %1
+   ld2%O1 %0 = %1%P1"
+  [(set_attr "itanium_class" "xtd,ld")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no, yes")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f")
+	(zero_extend:DI
+	  (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))]
+  ""
+  "@
+   addp4 %0 = %1, r0
+   ld4%O1 %0 = %1%P1
+   fmix.r %0 = f0, %1"
+  [(set_attr "itanium_class" "ialu,ld,fmisc")
+   (set_attr "speculable1"   "yes")
+   (set_attr "speculable2"   "no, yes,no")])
+
+;; Convert between floating point types of different sizes.
+
+;; At first glance, it would appear that emitting fnorm for an extending
+;; conversion is unnecessary.  However, the stf and getf instructions work
+;; correctly only if the input is properly rounded for its type.  In
+;; particular, we get the wrong result for getf.d/stfd if the input is a
+;; denorm single.  Since we don't know what the next instruction will be, we
+;; have to emit an fnorm.
+
+;; ??? Optimization opportunity here.  Get rid of the insn altogether
+;; when we can.  Should probably use a scheme like has been proposed
+;; for ia32 in dealing with operands that match unary operators.  This
+;; would let combine merge the thing into adjacent insns.  See also how the
+;; mips port handles SIGN_EXTEND as operands to integer arithmetic insns via
+;; se_register_operand.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.d %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "extendsfxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(float_extend:XF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "extenddfxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(float_extend:XF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.s %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "truncxfsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.s %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "truncxfdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fnorm.d %0 = %F1"
+  [(set_attr "itanium_class" "fmac")])
+
+;; Convert between signed integer types and floating point.
+
+(define_insn "floatdirf2"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+	(float:RF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatdixf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(float:XF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncxfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fix_truncrfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(fix:DI (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fx.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+;; Convert between unsigned integer types and floating point.
+
+(define_insn "floatunsdisf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf.s %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(unsigned_float:DF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf.d %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatunsdixf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(unsigned_float:XF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "floatunsdirf2"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+	(unsigned_float:RF (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.xuf %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncxfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+(define_insn "fixuns_truncrfdi2"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(unsigned_fix:DI (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fcvt.fxu.trunc %0 = %F1"
+  [(set_attr "itanium_class" "fcvtfx")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit field extraction
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "extv"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+			 (match_operand:DI 2 "extr_len_operand" "n")
+			 (match_operand:DI 3 "shift_count_operand" "M")))]
+  ""
+  "extr %0 = %1, %3, %2"
+  [(set_attr "itanium_class" "ishf")])
+
+(define_insn "extzv"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+			 (match_operand:DI 2 "extr_len_operand" "n")
+			 (match_operand:DI 3 "shift_count_operand" "M")))]
+  ""
+  "extr.u %0 = %1, %3, %2"
+  [(set_attr "itanium_class" "ishf")])
+
+;; Insert a bit field.
+;; Can have 3 operands, source1 (inserter), source2 (insertee), dest.
+;; Source1 can be 0 or -1.
+;; Source2 can be 0.
+
+;; ??? Actual dep instruction is more powerful than what these insv
+;; patterns support.  Unfortunately, combine is unable to create patterns
+;; where source2 != dest.
+
+(define_expand "insv"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "")
+			 (match_operand:DI 1 "const_int_operand" "")
+			 (match_operand:DI 2 "const_int_operand" ""))
+	(match_operand:DI 3 "nonmemory_operand" ""))]
+  ""
+{
+  int width = INTVAL (operands[1]);
+  int shift = INTVAL (operands[2]);
+
+  /* If operand[3] is a constant, and isn't 0 or -1, then load it into a
+     pseudo.  */
+  if (! register_operand (operands[3], DImode)
+      && operands[3] != const0_rtx && operands[3] != constm1_rtx)
+    operands[3] = force_reg (DImode, operands[3]);
+
+  /* If this is a single dep instruction, we have nothing to do.  */
+  if (! ((register_operand (operands[3], DImode) && width <= 16)
+	 || operands[3] == const0_rtx || operands[3] == constm1_rtx))
+    {
+      /* Check for cases that can be implemented with a mix instruction.  */
+      if (width == 32 && shift == 0)
+	{
+	  /* Directly generating the mix4left instruction confuses
+	     optimize_bit_field in function.c.  Since this is performing
+	     a useful optimization, we defer generation of the complicated
+	     mix4left RTL to the first splitting phase.  */
+	  rtx tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_shift_mix4left (operands[0], operands[3], tmp));
+	  DONE;
+	}
+      else if (width == 32 && shift == 32)
+	{
+	  emit_insn (gen_mix4right (operands[0], operands[3]));
+	  DONE;
+	}
+
+      /* We could handle remaining cases by emitting multiple dep
+	 instructions.
+
+	 If we need more than two dep instructions then we lose.  A 6
+	 insn sequence mov mask1,mov mask2,shl;;and,and;;or is better than
+	 mov;;dep,shr;;dep,shr;;dep.  The former can be executed in 3 cycles,
+	 the latter is 6 cycles on an Itanium (TM) processor, because there is
+	 only one function unit that can execute dep and shr immed.
+
+	 If we only need two dep instruction, then we still lose.
+	 mov;;dep,shr;;dep is still 4 cycles.  Even if we optimize away
+	 the unnecessary mov, this is still undesirable because it will be
+	 hard to optimize, and it creates unnecessary pressure on the I0
+	 function unit.  */
+
+      FAIL;
+
+#if 0
+      /* This code may be useful for other IA-64 processors, so we leave it in
+	 for now.  */
+      while (width > 16)
+	{
+	  rtx tmp;
+
+	  emit_insn (gen_insv (operands[0], GEN_INT (16), GEN_INT (shift),
+			       operands[3]));
+	  shift += 16;
+	  width -= 16;
+	  tmp = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp, operands[3], GEN_INT (16)));
+	  operands[3] = tmp;
+	}
+      operands[1] = GEN_INT (width);
+      operands[2] = GEN_INT (shift);
+#endif
+    }
+})
+
+(define_insn "*insv_internal"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (match_operand:DI 1 "const_int_operand" "n")
+			 (match_operand:DI 2 "const_int_operand" "n"))
+	(match_operand:DI 3 "nonmemory_operand" "rP"))]
+  "(gr_register_operand (operands[3], DImode) && INTVAL (operands[1]) <= 16)
+   || operands[3] == const0_rtx || operands[3] == constm1_rtx"
+  "dep %0 = %3, %0, %2, %1"
+  [(set_attr "itanium_class" "ishf")])
+
+;; Combine doesn't like to create bit-field insertions into zero.
+(define_insn "*shladdp4_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gr_register_operand" "r")
+			   (match_operand:DI 2 "shladd_log2_operand" "n"))
+		(match_operand:DI 3 "const_int_operand" "n")))]
+  "ia64_depz_field_mask (operands[3], operands[2]) + INTVAL (operands[2]) == 32"
+  "shladdp4 %0 = %1, %2, r0"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*depz_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gr_register_operand" "r")
+			   (match_operand:DI 2 "const_int_operand" "M"))
+		(match_operand:DI 3 "const_int_operand" "n")))]
+  "satisfies_constraint_M (operands[2])
+   && ia64_depz_field_mask (operands[3], operands[2]) > 0"
+{
+  operands[3] = GEN_INT (ia64_depz_field_mask (operands[3], operands[2]));
+  return "%,dep.z %0 = %1, %2, %3";
+}
+  [(set_attr "itanium_class" "ishf")])
+
+(define_insn "shift_mix4left"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "gr_register_operand" "r"))
+   (clobber (match_operand:DI 2 "gr_register_operand" "=r"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
+			 (const_int 32) (const_int 0))
+	(match_operand:DI 1 "register_operand" ""))
+   (clobber (match_operand:DI 2 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
+   (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_dup 3) (const_int 32)))]
+  "operands[3] = operands[2];")
+
+(define_insn "*mix4left"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (const_int 32) (const_int 0))
+	(lshiftrt:DI (match_operand:DI 1 "gr_register_operand" "r")
+		     (const_int 32)))]
+  ""
+  "mix4.l %0 = %0, %r1"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix4right"
+  [(set (zero_extract:DI (match_operand:DI 0 "gr_register_operand" "+r")
+			 (const_int 32) (const_int 32))
+	(match_operand:DI 1 "gr_reg_or_0_operand" "rO"))]
+  ""
+  "mix4.r %0 = %r1, %0"
+  [(set_attr "itanium_class" "mmshf")])
+
+;; This is used by the rotrsi3 pattern.
+
+(define_insn "*mix4right_3op"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "gr_register_operand" "r"))
+		(ashift:DI (zero_extend:DI
+			     (match_operand:SI 2 "gr_register_operand" "r"))
+			   (const_int 32))))]
+  ""
+  "mix4.r %0 = %2, %1"
+  [(set_attr "itanium_class" "mmshf")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 1-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn_and_split "andbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c,r")
+	(and:BI (match_operand:BI 1 "register_operand" "%0,0,r")
+		(match_operand:BI 2 "register_operand" "c,r,r")))]
+  ""
+  "@
+   #
+   tbit.nz.and.orcm %0, %I0 = %2, 0
+   and %0 = %2, %1"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[2]) == REG && PR_REGNO_P (REGNO (operands[2]))"
+  [(cond_exec (eq (match_dup 2) (const_int 0))
+     (set (match_dup 0) (and:BI (ne:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit,ilog")])
+
+(define_insn_and_split "*andcmbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c,r")
+	(and:BI (not:BI (match_operand:BI 1 "register_operand" "c,r,r"))
+		(match_operand:BI 2 "register_operand" "0,0,r")))]
+  ""
+  "@
+   #
+   tbit.z.and.orcm %0, %I0 = %1, 0
+   andcm %0 = %2, %1"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (and:BI (ne:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit,ilog")])
+
+(define_insn_and_split "iorbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c,r")
+	(ior:BI (match_operand:BI 1 "register_operand" "%0,0,r")
+		(match_operand:BI 2 "register_operand" "c,r,r")))]
+  ""
+  "@
+   #
+   tbit.nz.or.andcm %0, %I0 = %2, 0
+   or %0 = %2, %1"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[2]) == REG && PR_REGNO_P (REGNO (operands[2]))"
+  [(cond_exec (ne (match_dup 2) (const_int 0))
+     (set (match_dup 0) (ior:BI (eq:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit,ilog")])
+
+(define_insn_and_split "*iorcmbi3"
+  [(set (match_operand:BI 0 "register_operand" "=c,c")
+	(ior:BI (not:BI (match_operand:BI 1 "register_operand" "c,r"))
+		(match_operand:BI 2 "register_operand" "0,0")))]
+  ""
+  "@
+   #
+   tbit.z.or.andcm %0, %I0 = %1, 0"
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))"
+  [(cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (ior:BI (eq:BI (const_int 0) (const_int 0))
+				(match_dup 0))))]
+  ""
+  [(set_attr "itanium_class" "unknown,tbit")])
+
+(define_insn "one_cmplbi2"
+  [(set (match_operand:BI 0 "register_operand" "=c,r,c,&c")
+	(not:BI (match_operand:BI 1 "register_operand" "r,r,0,c")))
+   (clobber (match_scratch:BI 2 "=X,X,c,X"))]
+  ""
+  "@
+   tbit.z %0, %I0 = %1, 0
+   xor %0 = 1, %1
+   #
+   #"
+  [(set_attr "itanium_class" "tbit,ilog,unknown,unknown")])
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(not:BI (match_operand:BI 1 "register_operand" "")))
+   (clobber (match_scratch:BI 2 ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 0) (const_int 1))
+   (cond_exec (ne (match_dup 2) (const_int 0))
+     (set (match_dup 0) (const_int 0)))
+   (set (match_dup 0) (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  "operands[3] = gen_rtx_REG (CCImode, REGNO (operands[1]));
+   operands[4] = gen_rtx_REG (CCImode, REGNO (operands[2]));")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(not:BI (match_operand:BI 1 "register_operand" "")))
+   (clobber (match_scratch:BI 2 ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && PR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && PR_REGNO_P (REGNO (operands[1]))
+   && ! rtx_equal_p (operands[0], operands[1])"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))
+   (set (match_dup 0) (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  "")
+
+(define_insn "*cmpsi_and_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+		   (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.and.orcm %0, %I0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_and_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:SI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.and.orcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_andnot_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+			  (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.or.andcm %I0, %0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_andnot_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:SI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.or.andcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_and_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.and.orcm %0, %I0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_and_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.and.orcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_andnot_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:DI 2 "gr_register_operand" "r")
+			  (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.or.andcm %I0, %0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_andnot_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:DI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.or.andcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*tbit_and_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (ne:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.nz.and.orcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_and_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (eq:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.z.and.orcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_and_2"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (ne:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.nz.and.orcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_and_3"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(and:BI (eq:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.z.and.orcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*cmpsi_or_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+		   (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.or.andcm %0, %I0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_or_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:SI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.or.andcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_orcm_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:SI 2 "gr_reg_or_0_operand" "rO")
+			  (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C4.and.orcm %I0, %0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsi_orcm_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:SI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp4.%C3.and.orcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_or_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 4 "predicate_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.or.andcm %0, %I0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_or_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (match_operator:BI 3 "signed_inequality_operator"
+		  [(match_operand:DI 2 "gr_register_operand" "r")
+		   (const_int 0)])
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.or.andcm %0, %I0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_orcm_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 4 "predicate_operator"
+			 [(match_operand:DI 2 "gr_register_operand" "r")
+			  (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C4.and.orcm %I0, %0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_orcm_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (not:BI (match_operator:BI 3 "signed_inequality_operator"
+			  [(match_operand:DI 2 "gr_register_operand" "r")
+			   (const_int 0)]))
+		(match_operand:BI 1 "register_operand" "0")))]
+  ""
+  "cmp.%C3.and.orcm %I0, %0 = r0, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*tbit_or_0"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (ne:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.nz.or.andcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_or_1"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (eq:BI (and:DI (match_operand:DI 1 "gr_register_operand" "r")
+			       (const_int 1))
+		       (const_int 0))
+		(match_operand:BI 2 "register_operand" "0")))]
+  ""
+  "tbit.z.or.andcm %0, %I0 = %1, 0"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_or_2"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (ne:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.nz.or.andcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*tbit_or_3"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ior:BI (eq:BI (zero_extract:DI
+			 (match_operand:DI 1 "gr_register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "shift_count_operand" "M"))
+		       (const_int 0))
+		(match_operand:BI 3 "register_operand" "0")))]
+  ""
+  "tbit.z.or.andcm %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+;; Transform test of and/or of setcc into parallel comparisons.
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(ne:BI (and:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0)
+	(and:BI (ne:BI (and:DI (match_dup 3) (const_int 1)) (const_int 0))
+		(match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(eq:BI (and:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0)
+	(and:BI (ne:BI (and:DI (match_dup 3) (const_int 1)) (const_int 0))
+		(match_dup 2)))
+   (parallel [(set (match_dup 0) (not:BI (match_dup 0)))
+	      (clobber (scratch))])]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(ne:BI (ior:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0) 
+	(ior:BI (ne:BI (match_dup 3) (const_int 0))
+		(match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:BI 0 "register_operand" "")
+	(eq:BI (ior:DI (ne:DI (match_operand:BI 2 "register_operand" "")
+			      (const_int 0))
+		       (match_operand:DI 3 "register_operand" ""))
+	       (const_int 0)))]
+  ""
+  [(set (match_dup 0) 
+	(ior:BI (ne:BI (match_dup 3) (const_int 0))
+		(match_dup 2)))
+   (parallel [(set (match_dup 0) (not:BI (match_dup 0)))
+	      (clobber (scratch))])]
+  "")
+
+;; ??? Incredibly hackish.  Either need four proper patterns with all
+;; the alternatives, or rely on sched1 to split the insn and hope that
+;; nothing bad happens to the comparisons in the meantime.
+;;
+;; Alternately, adjust combine to allow 2->2 and 3->3 splits, assuming
+;; that we're doing height reduction.
+;
+;(define_insn_and_split ""
+;  [(set (match_operand:BI 0 "register_operand" "=c")
+;	(and:BI (and:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")]))
+;		(match_dup 0)))]
+;  "flag_schedule_insns"
+;  "#"
+;  ""
+;  [(set (match_dup 0) (and:BI (match_dup 1) (match_dup 0)))
+;   (set (match_dup 0) (and:BI (match_dup 4) (match_dup 0)))]
+;  "")
+;
+;(define_insn_and_split ""
+;  [(set (match_operand:BI 0 "register_operand" "=c")
+;	(ior:BI (ior:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")]))
+;		(match_dup 0)))]
+;  "flag_schedule_insns"
+;  "#"
+;  ""
+;  [(set (match_dup 0) (ior:BI (match_dup 1) (match_dup 0)))
+;   (set (match_dup 0) (ior:BI (match_dup 4) (match_dup 0)))]
+;  "")
+;
+;(define_split
+;  [(set (match_operand:BI 0 "register_operand" "")
+;	(and:BI (and:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operand:BI 7 "register_operand" ""))
+;		(and:BI (match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")])
+;			(match_operand:BI 8 "register_operand" ""))))]
+;  ""
+;  [(set (match_dup 0) (and:BI (match_dup 7) (match_dup 8)))
+;   (set (match_dup 0) (and:BI (and:BI (match_dup 1) (match_dup 4))
+;			      (match_dup 0)))]
+;  "")
+;
+;(define_split
+;  [(set (match_operand:BI 0 "register_operand" "")
+;	(ior:BI (ior:BI (match_operator:BI 1 "comparison_operator"
+;			  [(match_operand 2 "" "")
+;			   (match_operand 3 "" "")])
+;			(match_operand:BI 7 "register_operand" ""))
+;		(ior:BI (match_operator:BI 4 "comparison_operator"
+;			  [(match_operand 5 "" "")
+;			   (match_operand 6 "" "")])
+;			(match_operand:BI 8 "register_operand" ""))))]
+;  ""
+;  [(set (match_dup 0) (ior:BI (match_dup 7) (match_dup 8)))
+;   (set (match_dup 0) (ior:BI (ior:BI (match_dup 1) (match_dup 4))
+;			      (match_dup 0)))]
+;  "")
+
+;; Try harder to avoid predicate copies by duplicating compares.
+;; Note that we'll have already split the predicate copy, which
+;; is kind of a pain, but oh well.
+
+(define_peephole2
+  [(set (match_operand:BI 0 "register_operand" "")
+	(match_operand:BI 1 "comparison_operator" ""))
+   (set (match_operand:CCI 2 "register_operand" "")
+	(match_operand:CCI 3 "register_operand" ""))
+   (set (match_operand:CCI 4 "register_operand" "")
+	(match_operand:CCI 5 "register_operand" ""))
+   (set (match_operand:BI 6 "register_operand" "")
+	(unspec:BI [(match_dup 6)] UNSPEC_PRED_REL_MUTEX))]
+  "REGNO (operands[3]) == REGNO (operands[0])
+   && REGNO (operands[4]) == REGNO (operands[0]) + 1
+   && REGNO (operands[4]) == REGNO (operands[2]) + 1
+   && REGNO (operands[6]) == REGNO (operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 6) (match_dup 7))]
+  "operands[7] = copy_rtx (operands[1]);")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "gr_register_operand" "=r")
+	(mult:HI (match_operand:HI 1 "gr_register_operand" "r")
+		 (match_operand:HI 2 "gr_register_operand" "r")))]
+  ""
+  "pmpy2.r %0 = %1, %2"
+  [(set_attr "itanium_class" "mmmul")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "gr_register_operand" "%r,r,a")
+		 (match_operand:SI 2 "gr_reg_or_22bit_operand" "r,I,J")))]
+  ""
+  "@
+   add %0 = %1, %2
+   adds %0 = %2, %1
+   addl %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*addsi3_plus1"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "gr_register_operand" "r")
+			  (match_operand:SI 2 "gr_register_operand" "r"))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %2, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*addsi3_plus1_alt"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "gr_register_operand" "r")
+			  (const_int 2))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*addsi3_shladd"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "gr_register_operand" "r")
+			  (match_operand:SI 2 "shladd_operand" "n"))
+		 (match_operand:SI 3 "gr_register_operand" "r")))]
+  ""
+  "shladd %0 = %1, %S2, %3"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "gr_reg_or_8bit_operand" "rK")
+		  (match_operand:SI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*subsi3_minus1"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(plus:SI (not:SI (match_operand:SI 1 "gr_register_operand" "r"))
+		 (match_operand:SI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %2, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; ??? Could add maddsi3 patterns patterned after the madddi3 patterns.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "fr_register_operand" "=f")
+	(mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
+		 (match_operand:SI 2 "grfr_register_operand" "f")))]
+  ""
+  "xmpy.l %0 = %1, %2"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "maddsi4"
+  [(set (match_operand:SI 0 "fr_register_operand" "=f")
+	(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
+			  (match_operand:SI 2 "grfr_register_operand" "f"))
+		 (match_operand:SI 3 "grfr_register_operand" "f")))]
+  ""
+  "xma.l %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = r0, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "abssi2"
+  [(set (match_dup 2)
+	(ge:BI (match_operand:SI 1 "gr_register_operand" "") (const_int 0)))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (eq (match_dup 2) (const_int 0))
+			 (neg:SI (match_dup 1))
+			 (match_dup 1)))]
+  ""
+  { operands[2] = gen_reg_rtx (BImode); })
+
+(define_expand "sminsi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:SI 1 "gr_register_operand" "")
+	       (match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "smaxsi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:SI 1 "gr_register_operand" "")
+	       (match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "uminsi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:SI 1 "gr_register_operand" "")
+		(match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "umaxsi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:SI 1 "gr_register_operand" "")
+		(match_operand:SI 2 "gr_register_operand" "")))
+   (set (match_operand:SI 0 "gr_register_operand" "")
+	(if_then_else:SI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "gr_register_operand" "%r,r,a")
+		 (match_operand:DI 2 "gr_reg_or_22bit_operand" "r,I,J")))]
+  ""
+  "@
+   add %0 = %1, %2
+   adds %0 = %2, %1
+   addl %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*adddi3_plus1"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (plus:DI (match_operand:DI 1 "gr_register_operand" "r")
+			  (match_operand:DI 2 "gr_register_operand" "r"))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %2, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; This has some of the same problems as shladd.  We let the shladd
+;; eliminator hack handle it, which results in the 1 being forced into
+;; a register, but not more ugliness here.
+(define_insn "*adddi3_plus1_alt"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r")
+			  (const_int 2))
+		 (const_int 1)))]
+  ""
+  "add %0 = %1, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "gr_reg_or_8bit_operand" "rK")
+		  (match_operand:DI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*subdi3_minus1"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (not:DI (match_operand:DI 1 "gr_register_operand" "r"))
+		 (match_operand:DI 2 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = %2, %1, 1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; ??? Use grfr instead of fr because of virtual register elimination
+;; and silly test cases multiplying by the frame pointer.
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
+		 (match_operand:DI 2 "grfr_register_operand" "f")))]
+  ""
+  "xmpy.l %0 = %1, %2"
+  [(set_attr "itanium_class" "xmpy")])
+
+;; ??? If operand 3 is an eliminable reg, then register elimination causes the
+;; same problem that we have with shladd below.  Unfortunately, this case is
+;; much harder to fix because the multiply puts the result in an FP register,
+;; but the add needs inputs from a general register.  We add a spurious clobber
+;; here so that it will be present just in case register elimination gives us
+;; the funny result.
+
+;; ??? Maybe validate_changes should try adding match_scratch clobbers?
+
+;; ??? Maybe we should change how adds are canonicalized.
+
+(define_insn "madddi4"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
+			  (match_operand:DI 2 "grfr_register_operand" "f"))
+		 (match_operand:DI 3 "grfr_register_operand" "f")))
+   (clobber (match_scratch:DI 4 "=X"))]
+  ""
+  "xma.l %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "xmpy")])
+
+;; This can be created by register elimination if operand3 of shladd is an
+;; eliminable register or has reg_equiv_constant set.
+
+;; We have to use nonmemory_operand for operand 4, to ensure that the
+;; validate_changes call inside eliminate_regs will always succeed.  If it
+;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded
+;; incorrectly.
+
+(define_insn "*madddi4_elim"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
+				   (match_operand:DI 2 "register_operand" "f"))
+			  (match_operand:DI 3 "register_operand" "f"))
+		 (match_operand:DI 4 "nonmemory_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=f"))]
+  "reload_in_progress"
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+				   (match_operand:DI 2 "register_operand" ""))
+			  (match_operand:DI 3 "register_operand" ""))
+		 (match_operand:DI 4 "gr_reg_or_14bit_operand" "")))
+   (clobber (match_scratch:DI 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+					  (match_dup 3)))
+	      (clobber (match_dup 0))])
+   (set (match_dup 0) (match_dup 5))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  "")
+
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (sign_extend:TI
+		     (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		   (sign_extend:TI
+		     (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG")))
+	  (const_int 64))))]
+  ""
+  "xmpy.h %0 = %F1, %F2"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "fr_register_operand" "=f")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		   (zero_extend:TI
+		     (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG")))
+	  (const_int 64))))]
+  ""
+  "xmpy.hu %0 = %F1, %F2"
+  [(set_attr "itanium_class" "xmpy")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "sub %0 = r0, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_expand "absdi2"
+  [(set (match_dup 2)
+	(ge:BI (match_operand:DI 1 "gr_register_operand" "") (const_int 0)))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (eq (match_dup 2) (const_int 0))
+			 (neg:DI (match_dup 1))
+			 (match_dup 1)))]
+  ""
+  { operands[2] = gen_reg_rtx (BImode); })
+
+(define_expand "smindi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:DI 1 "gr_register_operand" "")
+	       (match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "smaxdi3"
+  [(set (match_dup 3)
+	(ge:BI (match_operand:DI 1 "gr_register_operand" "")
+	       (match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "umindi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:DI 1 "gr_register_operand" "")
+		(match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "umaxdi3"
+  [(set (match_dup 3)
+	(geu:BI (match_operand:DI 1 "gr_register_operand" "")
+		(match_operand:DI 2 "gr_register_operand" "")))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  { operands[3] = gen_reg_rtx (BImode); })
+
+(define_expand "ffsdi2"
+  [(set (match_dup 6)
+	(eq:BI (match_operand:DI 1 "gr_register_operand" "") (const_int 0)))
+   (set (match_dup 2) (plus:DI (match_dup 1) (const_int -1)))
+   (set (match_dup 5) (const_int 0))
+   (set (match_dup 3) (xor:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4) (popcount:DI (match_dup 3)))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(if_then_else:DI (ne (match_dup 6) (const_int 0))
+			 (match_dup 5) (match_dup 4)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+  operands[6] = gen_reg_rtx (BImode);
+})
+
+(define_expand "ctzdi2"
+  [(set (match_dup 2) (plus:DI (match_operand:DI 1 "gr_register_operand" "")
+			       (const_int -1)))
+   (set (match_dup 3) (not:DI (match_dup 1)))
+   (set (match_dup 4) (and:DI (match_dup 2) (match_dup 3)))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(popcount:DI (match_dup 4)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+;; Note the computation here is op0 = 63 - (exp - 0xffff).
+(define_expand "clzdi2"
+  [(set (match_dup 2)
+	(unsigned_float:XF (match_operand:DI 1 "fr_reg_or_fp01_operand" "")))
+   (set (match_dup 3)
+	(unspec:DI [(match_dup 2)] UNSPEC_GETF_EXP))
+   (set (match_dup 4) (const_int 65598))
+   (set (match_operand:DI 0 "gr_register_operand" "")
+	(minus:DI (match_dup 4) (match_dup 3)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (XFmode);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "popcnt %0 = %1"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "bswapdi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (bswap:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "mux1 %0 = %1, @rev"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*getf_exp_xf"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(unspec:DI [(match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_GETF_EXP))]
+  ""
+  "getf.exp %0 = %F1"
+  [(set_attr "itanium_class" "frfr")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 128-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(plus:TI (match_operand:TI 1 "gr_register_operand" "%r")
+		 (match_operand:TI 2 "gr_reg_or_14bit_operand" "rI")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(plus:TI (match_operand:TI 1 "register_operand" "")
+		 (match_operand:TI 2 "register_operand" "")))
+   (clobber (match_scratch:BI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (ltu:BI (match_dup 0) (match_dup 1)))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (plus:DI (match_dup 5) (match_dup 6))))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (plus:DI (match_dup 5) (match_dup 6))
+			    (const_int 1))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[6] = gen_highpart (DImode, operands[2]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(plus:TI (match_operand:TI 1 "register_operand" "")
+		 (match_operand:TI 2 "immediate_operand" "")))
+   (clobber (match_scratch:BI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (ltu:BI (match_dup 0) (match_dup 1)))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (match_dup 5) (match_dup 6))))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (match_dup 5) (match_dup 7))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[6] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
+  operands[7] = INTVAL (operands[2]) < 0 ? const0_rtx : const1_rtx;
+})
+
+(define_insn "subti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(minus:TI (match_operand:TI 1 "gr_reg_or_8bit_operand" "rK")
+		  (match_operand:TI 2 "gr_register_operand" "r")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "unknown")])
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "register_operand" "")
+		  (match_operand:TI 2 "register_operand" "")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  "reload_completed"
+  [(set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (ltu:BI (match_dup 1) (match_dup 0)))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (minus:DI (match_dup 5) (match_dup 6))))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4)
+		   (plus:DI (not:DI (match_dup 6)) (match_dup 5))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+  operands[6] = gen_highpart (DImode, operands[2]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "immediate_operand" "")
+		  (match_operand:TI 2 "register_operand" "")))
+   (clobber (match_scratch:BI 3 "=&c"))]
+  "reload_completed && satisfies_constraint_K (operands[1])"
+  [(set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (gtu:BI (match_dup 0) (match_dup 1)))
+   (cond_exec (ne (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (minus:DI (match_dup 6) (match_dup 5))))
+   (cond_exec (eq (match_dup 3) (const_int 0))
+	      (set (match_dup 4) (minus:DI (match_dup 7) (match_dup 5))))]
+{
+  operands[4] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[5] = gen_highpart (DImode, operands[2]);
+  operands[2] = gen_lowpart (DImode, operands[2]);
+  operands[6] = INTVAL (operands[1]) < 0 ? GEN_INT (-2) : constm1_rtx;
+  operands[7] = INTVAL (operands[1]) < 0 ? constm1_rtx : const0_rtx;
+})
+
+(define_expand "mulditi3"
+  [(set (match_operand:TI 0 "fr_register_operand" "")
+	(mult:TI (sign_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" ""))
+		 (sign_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" ""))))]
+  ""
+  "")
+
+(define_insn_and_split "*mulditi3_internal"
+  [(set (match_operand:TI 0 "fr_register_operand" "=&f")
+	(mult:TI (sign_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		 (sign_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (truncate:DI
+			(lshiftrt:TI
+			  (mult:TI (sign_extend:TI (match_dup 1))
+				   (sign_extend:TI (match_dup 2)))
+			  (const_int 64))))]
+{
+  operands[3] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+}
+  [(set_attr "itanium_class" "unknown")])
+
+(define_expand "umulditi3"
+  [(set (match_operand:TI 0 "fr_register_operand" "")
+	(mult:TI (zero_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" ""))
+		 (zero_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" ""))))]
+  ""
+  "")
+
+(define_insn_and_split "*umulditi3_internal"
+  [(set (match_operand:TI 0 "fr_register_operand" "=&f")
+	(mult:TI (zero_extend:TI
+		   (match_operand:DI 1 "fr_reg_or_fp01_operand" "fG"))
+		 (zero_extend:TI
+		   (match_operand:DI 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3) (truncate:DI
+			(lshiftrt:TI
+			  (mult:TI (zero_extend:TI (match_dup 1))
+				   (zero_extend:TI (match_dup 2)))
+			  (const_int 64))))]
+{
+  operands[3] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+}
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn_and_split "negti2"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(neg:TI (match_operand:TI 1 "gr_register_operand" "r")))
+   (clobber (match_scratch:BI 2 "=&c"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 2) (eq:BI (match_dup 1) (const_int 0)))
+   (set (match_dup 0) (minus:DI (const_int 0) (match_dup 1)))
+   (cond_exec (eq (match_dup 2) (const_int 0))
+	      (set (match_dup 3) (minus:DI (const_int -1) (match_dup 4))))
+   (cond_exec (ne (match_dup 2) (const_int 0))
+	      (set (match_dup 3) (minus:DI (const_int 0) (match_dup 4))))]
+{
+  operands[3] = gen_highpart (DImode, operands[0]);
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[4] = gen_highpart (DImode, operands[1]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+}
+  [(set_attr "itanium_class" "unknown")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "%fG")
+		 (match_operand:SF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fadd.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:SF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fsub.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fneg %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nabssf2"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnegabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  ""
+  "fmerge.s %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*ncopysignsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (unspec:SF [(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+			    (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  ""
+  "fmerge.ns %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(smin:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smaxsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(smax:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nmulsf3"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+			 (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+		(neg:SF
+		  (match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fms.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG"))
+		(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:SF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fnma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "%fG")
+		 (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fadd.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*adddf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (plus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "%fG")
+		   (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fadd.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fsub.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*subdf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (minus:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:DF 2 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fsub.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*muldf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		   (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fneg %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nabsdf2"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnegabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "copysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unspec:DF [(match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  ""
+  "fmerge.s %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*ncopysigndf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (unspec:DF [(match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+			    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  ""
+  "fmerge.ns %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smindf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(smin:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smaxdf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(smax:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nmuldf3"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+			 (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG"))))]
+  ""
+  "fnmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*nmuldf3_trunc"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (neg:DF (mult:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+			   (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")))))]
+  ""
+  "fnmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmadf4"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fma.d %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmadf_trunc_sf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		  (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmsdf4"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		(neg:DF
+		  (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fms.d %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmsdf_trunc_sf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (fma:DF
+	    (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+	    (neg:DF
+	      (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))]
+  ""
+  "fms.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmadf4"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(fma:DF (neg:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))
+		(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fnma.d %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fnmadf_trunc_sf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (fma:DF
+	    (neg:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG"))
+	    (match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:DF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fnma.s %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 80-bit floating point arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(plus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "%fG")
+		 (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fadd %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*addxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (plus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "%fG")
+		   (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fadd.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*addxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (plus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "%fG")
+		   (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fadd.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "subxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(minus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		  (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fsub %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*subxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (minus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fsub.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*subxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (minus:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fsub.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "mulxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fmpy %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*mulxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		   (match_operand:XF 2 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*mulxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		   (match_operand:XF 2 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "absxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(abs:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(neg:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fneg %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nabsxf2"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(neg:XF (abs:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fnegabs %0 = %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "copysignxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")]
+		   UNSPEC_COPYSIGN))]
+  ""
+  "fmerge.s %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*ncopysignxf3"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(neg:XF (unspec:XF [(match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+			    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_COPYSIGN)))]
+  ""
+  "fmerge.ns %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(smin:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fmin %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "smaxxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(smax:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))]
+  ""
+  "fmax %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*nmulxf3"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(neg:XF (mult:XF (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+			 (match_operand:XF 2 "xfreg_or_fp01_operand" "fG"))))]
+  ""
+  "fnmpy %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*nmulxf3_truncsf"
+  [(set (match_operand:SF 0 "fr_register_operand" "=f")
+	(float_truncate:SF
+	  (neg:XF (mult:XF
+		    (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))))]
+  ""
+  "fnmpy.s %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*nmulxf3_truncdf"
+  [(set (match_operand:DF 0 "fr_register_operand" "=f")
+	(float_truncate:DF
+	  (neg:XF (mult:XF
+		    (match_operand:XF 1 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 2 "xfreg_or_fp01_operand" "fG")))))]
+  ""
+  "fnmpy.d %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmaxf4"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fma %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmaxf_trunc_<mode>"
+  [(set (match_operand:MODE_SDF 0 "fr_register_operand" "=f")
+	(float_truncate:MODE_SDF
+	  (fma:XF
+	    (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fma<suffix> %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmsxf4"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(fma:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+		(neg:XF
+		  (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fms %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fmsxf_trunc_<mode>"
+  [(set (match_operand:MODE_SDF 0 "fr_register_operand" "=f")
+	(float_truncate:MODE_SDF
+	  (fma:XF
+	    (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+	    (neg:XF
+	      (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))))]
+  ""
+  "fms<suffix> %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmaxf4"
+  [(set (match_operand:XF 0 "fr_register_operand" "=f")
+	(fma:XF (neg:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG"))
+		(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+		(match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ")))]
+  ""
+  "fnma %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fnmaxf_trunc_<mode>"
+  [(set (match_operand:MODE_SDF 0 "fr_register_operand" "=f")
+	(float_truncate:MODE_SDF
+	  (fma:XF
+	    (neg:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG"))
+	    (match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:XF 3 "fr_reg_or_signed_fp01_operand" "fZ"))))]
+  ""
+  "fnma<suffix> %0 = %F1, %F2, %F3"
+  [(set_attr "itanium_class" "fmac")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(ashift:SI (match_operand:SI 1 "gr_register_operand" "")
+		   (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      /* Why oh why didn't Intel arrange for SHIFT_COUNT_TRUNCATED?  Now
+	 we've got to get rid of stray bits outside the SImode register.  */
+      rtx subshift = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (subshift, operands[2]));
+      operands[2] = subshift;
+    }
+})
+
+(define_insn "*ashlsi3_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "gr_register_operand" "r,r,r")
+		   (match_operand:DI 2 "gr_reg_or_5bit_operand" "R,n,r")))]
+  ""
+  "@
+   shladd %0 = %1, %2, r0
+   dep.z %0 = %1, %2, %E2
+   shl %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu,ishf,mmshf")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "gr_register_operand" "")
+		     (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  rtx subtarget = gen_reg_rtx (DImode);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    emit_insn (gen_extv (subtarget, gen_lowpart (DImode, operands[1]),
+			 GEN_INT (32 - INTVAL (operands[2])), operands[2]));
+  else
+    {
+      rtx subshift = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (subtarget, operands[1]));
+      emit_insn (gen_zero_extendsidi2 (subshift, operands[2]));
+      emit_insn (gen_ashrdi3 (subtarget, subtarget, subshift));
+    }
+  emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget);
+  DONE;
+})
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "gr_register_operand" "")
+		     (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  rtx subtarget = gen_reg_rtx (DImode);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    emit_insn (gen_extzv (subtarget, gen_lowpart (DImode, operands[1]),
+			  GEN_INT (32 - INTVAL (operands[2])), operands[2]));
+  else
+    {
+      rtx subshift = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (subtarget, operands[1]));
+      emit_insn (gen_zero_extendsidi2 (subshift, operands[2]));
+      emit_insn (gen_lshrdi3 (subtarget, subtarget, subshift));
+    }
+  emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget);
+  DONE;
+})
+
+;; Use mix4.r/shr to implement rotrsi3.  We only get 32 bits of valid result
+;; here, instead of 64 like the patterns above.  Keep the pattern together
+;; until after combine; otherwise it won't get matched often.
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(rotatert:SI (match_operand:SI 1 "gr_register_operand" "")
+		     (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  if (GET_MODE (operands[2]) != VOIDmode)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (tmp, operands[2]));
+      operands[2] = tmp;
+    }
+})
+
+(define_insn_and_split "*rotrsi3_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=&r")
+	(rotatert:SI (match_operand:SI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "gr_reg_or_5bit_operand" "rM")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 3)
+	(ior:DI (zero_extend:DI (match_dup 1))
+		(ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32))))
+   (set (match_dup 3)
+	(lshiftrt:DI (match_dup 3) (match_dup 2)))]
+  "operands[3] = gen_rtx_REG (DImode, REGNO (operands[0]));")
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "gr_register_operand" "")
+	(rotate:SI (match_operand:SI 1 "gr_register_operand" "")
+		   (match_operand:SI 2 "gr_reg_or_5bit_operand" "")))]
+  ""
+{
+  if (! shift_32bit_count_operand (operands[2], SImode))
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (tmp, GEN_INT (32), operands[2]));
+      emit_insn (gen_rotrsi3 (operands[0], operands[1], tmp));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*rotlsi3_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "gr_register_operand" "r")
+		   (match_operand:SI 2 "shift_32bit_count_operand" "n")))]
+  ""
+  "mux2 %0 = %1, 0xe1"
+  "reload_completed && INTVAL (operands[2]) != 16"
+  [(set (match_dup 3)
+	(ior:DI (zero_extend:DI (match_dup 1))
+		(ashift:DI (zero_extend:DI (match_dup 1)) (const_int 32))))
+   (set (match_dup 3)
+	(lshiftrt:DI (match_dup 3) (match_dup 2)))]
+{
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r,r")
+	(ashift:DI (match_operand:DI 1 "gr_register_operand" "r,r,r")
+		   (match_operand:DI 2 "gr_reg_or_6bit_operand" "R,r,rM")))]
+  ""
+  "@
+   shladd %0 = %1, %2, r0
+   shl %0 = %1, %2
+   shl %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu,mmshf,mmshfi")])
+
+;; ??? Maybe combine this with the multiply and add instruction?
+
+(define_insn "*shladd"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r")
+			  (match_operand:DI 2 "shladd_operand" "n"))
+		 (match_operand:DI 3 "gr_register_operand" "r")))]
+  ""
+  "shladd %0 = %1, %S2, %3"
+  [(set_attr "itanium_class" "ialu")])
+
+;; This can be created by register elimination if operand3 of shladd is an
+;; eliminable register or has reg_equiv_constant set.
+
+;; We have to use nonmemory_operand for operand 4, to ensure that the
+;; validate_changes call inside eliminate_regs will always succeed.  If it
+;; doesn't succeed, then this remain a shladd pattern, and will be reloaded
+;; incorrectly.
+
+(define_insn_and_split "*shladd_elim"
+  [(set (match_operand:DI 0 "gr_register_operand" "=&r")
+	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "gr_register_operand" "r")
+				   (match_operand:DI 2 "shladd_operand" "n"))
+			  (match_operand:DI 3 "nonmemory_operand" "r"))
+		 (match_operand:DI 4 "nonmemory_operand" "rI")))]
+  "reload_in_progress"
+  "* gcc_unreachable ();"
+  "reload_completed"
+  [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+			       (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  ""
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "gr_register_operand" "r,r")
+		     (match_operand:DI 2 "gr_reg_or_6bit_operand" "r,rM")))]
+  ""
+  "@
+   shr %0 = %1, %2
+   shr %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf,mmshfi")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "gr_register_operand" "r,r")
+		     (match_operand:DI 2 "gr_reg_or_6bit_operand" "r,rM")))]
+  ""
+  "@
+   shr.u %0 = %1, %2
+   shr.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf,mmshfi")])
+
+;; Using a predicate that accepts only constants doesn't work, because optabs
+;; will load the operand into a register and call the pattern if the predicate
+;; did not accept it on the first try.  So we use nonmemory_operand and then
+;; verify that we have an appropriate constant in the expander.
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "")
+	(rotatert:DI (match_operand:DI 1 "gr_register_operand" "")
+		     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (! shift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn "*rotrdi3_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(rotatert:DI (match_operand:DI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "shift_count_operand" "M")))]
+  ""
+  "shrp %0 = %1, %1, %2"
+  [(set_attr "itanium_class" "ishf")])
+
+(define_expand "rotldi3"
+  [(set (match_operand:DI 0 "gr_register_operand" "")
+	(rotate:DI (match_operand:DI 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (! shift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn "*rotldi3_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(rotate:DI (match_operand:DI 1 "gr_register_operand" "r")
+		   (match_operand:DI 2 "shift_count_operand" "M")))]
+  ""
+  "shrp %0 = %1, %1, %e2"
+  [(set_attr "itanium_class" "ishf")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 128-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "ashlti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+	(ashift:TI (match_operand:TI 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (!dshift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn_and_split "*ashlti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(ashift:TI (match_operand:TI 1 "gr_register_operand" "r")
+		   (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT shift = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx lo = gen_lowpart (DImode, operands[1]);
+  rtx shiftlo = GEN_INT (shift & 63);
+
+  if (shift & 64)
+    {
+      emit_move_insn (rl, const0_rtx);
+      if (shift & 63)
+	emit_insn (gen_ashldi3 (rh, lo, shiftlo));
+      else
+	emit_move_insn (rh, lo);
+    }
+  else
+    {
+      rtx hi = gen_highpart (DImode, operands[1]);
+
+      emit_insn (gen_shrp (rh, hi, lo, GEN_INT (-shift & 63)));
+      emit_insn (gen_ashldi3 (rl, lo, shiftlo));
+    }
+  DONE;
+})
+
+(define_expand "ashrti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+	(ashiftrt:TI (match_operand:TI 1 "gr_register_operand" "")
+		     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (!dshift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn_and_split "*ashrti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(ashiftrt:TI (match_operand:TI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT shift = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx hi = gen_highpart (DImode, operands[1]);
+  rtx shiftlo = GEN_INT (shift & 63);
+
+  if (shift & 64)
+    {
+      if (shift & 63)
+	emit_insn (gen_ashrdi3 (rl, hi, shiftlo));
+      else
+	emit_move_insn (rl, hi);
+      emit_insn (gen_ashrdi3 (rh, hi, GEN_INT (63)));
+    }
+  else
+    {
+      rtx lo = gen_lowpart (DImode, operands[1]);
+
+      emit_insn (gen_shrp (rl, hi, lo, shiftlo));
+      emit_insn (gen_ashrdi3 (rh, hi, shiftlo));
+    }
+  DONE;
+})
+
+(define_expand "lshrti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+        (lshiftrt:TI (match_operand:TI 1 "gr_register_operand" "")
+                     (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{ 
+  if (!dshift_count_operand (operands[2], DImode))
+    FAIL;
+}) 
+
+(define_insn_and_split "*lshrti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(lshiftrt:TI (match_operand:TI 1 "gr_register_operand" "r")
+		     (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT shift = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx hi = gen_highpart (DImode, operands[1]);
+  rtx shiftlo = GEN_INT (shift & 63);
+
+  if (shift & 64)
+    {
+      if (shift & 63)
+	emit_insn (gen_lshrdi3 (rl, hi, shiftlo));
+      else
+	emit_move_insn (rl, hi);
+      emit_move_insn (rh, const0_rtx);
+    }
+  else
+    {
+      rtx lo = gen_lowpart (DImode, operands[1]);
+
+      emit_insn (gen_shrp (rl, hi, lo, shiftlo));
+      emit_insn (gen_lshrdi3 (rh, hi, shiftlo));
+    }
+  DONE;
+})
+
+(define_expand "rotlti3"
+  [(set (match_operand:TI 0 "gr_register_operand" "")
+	(rotate:TI (match_operand:TI 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  if (! dshift_count_operand (operands[2], DImode))
+    FAIL;
+})
+
+(define_insn_and_split "*rotlti3_internal"
+  [(set (match_operand:TI 0 "gr_register_operand" "=&r")
+	(rotate:TI (match_operand:TI 1 "gr_register_operand" "r")
+		   (match_operand:DI 2 "dshift_count_operand" "n")))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT count = INTVAL (operands[2]);
+  rtx rl = gen_lowpart (DImode, operands[0]);
+  rtx rh = gen_highpart (DImode, operands[0]);
+  rtx lo = gen_lowpart (DImode, operands[1]);
+  rtx hi = gen_highpart (DImode, operands[1]);
+  rtx countlo = GEN_INT (-count & 63);
+
+  if (count & 64)
+    {
+      if (count & 63)
+	{
+	  emit_insn (gen_shrp (rl, hi, lo, countlo));
+	  emit_insn (gen_shrp (rh, lo, hi, countlo));
+	}
+      else
+	{
+	  emit_move_insn (rl, hi);
+	  emit_move_insn (rh, lo);
+	}
+    }
+  else
+    {
+      emit_insn (gen_shrp (rl, lo, hi, countlo));
+      emit_insn (gen_shrp (rh, hi, lo, countlo));
+    }
+  DONE;
+}
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn "shrp"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "gr_register_operand" "r")
+		    (match_operand:DI 2 "gr_register_operand" "r")
+		    (match_operand:DI 3 "shift_count_operand" "M")]
+		   UNSPEC_SHRP))]
+  ""
+  "shrp %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "ishf")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; We don't seem to need any other 32-bit logical operations, because gcc
+;; generates zero-extend;zero-extend;DImode-op, which combine optimizes to
+;; DImode-op;zero-extend, and then we can optimize away the zero-extend.
+;; This doesn't work for unary logical operations, because we don't call
+;; apply_distributive_law for them.
+
+;; ??? Likewise, this doesn't work for andnot, which isn't handled by
+;; apply_distributive_law.  We get inefficient code for
+;; int sub4 (int i, int j) { return i & ~j; }
+;; We could convert (and (not (sign_extend A)) (sign_extend B)) to
+;; (zero_extend (and (not A) B)) in combine.
+;; Or maybe fix this by adding andsi3/iorsi3/xorsi3 patterns like the
+;; one_cmplsi2 pattern.
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(not:SI (match_operand:SI 1 "gr_register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "itanium_class" "ilog")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 64-bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(and:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f")
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   and %0 = %2, %1
+   fand %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "*andnot"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(and:DI (not:DI (match_operand:DI 1 "grfr_register_operand" "r,*f"))
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   andcm %0 = %2, %1
+   fandcm %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(ior:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f")
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   or %0 = %2, %1
+   for %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
+	(xor:DI (match_operand:DI 1 "grfr_register_operand" "%r,*f")
+		(match_operand:DI 2 "grfr_reg_or_8bit_operand" "rK,*f")))]
+  ""
+  "@
+   xor %0 = %2, %1
+   fxor %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(not:DI (match_operand:DI 1 "gr_register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "itanium_class" "ilog")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Comparisons
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cbranchbi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:BI 1 "register_operand" "")
+	                (match_operand:BI 2 "const_int_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SI 1 "gr_register_operand" "")
+	                (match_operand:SI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DI 1 "gr_register_operand" "")
+	                (match_operand:DI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchxf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:XF 1 "xfreg_or_fp01_operand" "")
+	                (match_operand:XF 2 "xfreg_or_fp01_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "cbranchtf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:TF 1 "gr_register_operand" "")
+	                (match_operand:TF 2 "gr_register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_HPUX"
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+
+(define_insn "*cmpsi_normal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "normal_comparison_operator"
+	   [(match_operand:SI 2 "gr_register_operand" "r")
+	    (match_operand:SI 3 "gr_reg_or_8bit_operand" "rK")]))]
+  ""
+  "cmp4.%C1 %0, %I0 = %3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+;; We use %r3 because it is possible for us to match a 0, and two of the
+;; unsigned comparisons don't accept immediate operands of zero.
+
+(define_insn "*cmpsi_adjusted"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "adjusted_comparison_operator"
+	   [(match_operand:SI 2 "gr_register_operand" "r")
+	    (match_operand:SI 3 "gr_reg_or_8bit_adjusted_operand" "rL")]))]
+  ""
+  "cmp4.%C1 %0, %I0 = %r3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpdi_normal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "normal_comparison_operator"
+	   [(match_operand:DI 2 "gr_reg_or_0_operand" "rO")
+	    (match_operand:DI 3 "gr_reg_or_8bit_operand" "rK")]))]
+  ""
+  "cmp.%C1 %0, %I0 = %3, %r2"
+  [(set_attr "itanium_class" "icmp")])
+
+;; We use %r3 because it is possible for us to match a 0, and two of the
+;; unsigned comparisons don't accept immediate operands of zero.
+
+(define_insn "*cmpdi_adjusted"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "adjusted_comparison_operator"
+	   [(match_operand:DI 2 "gr_register_operand" "r")
+	    (match_operand:DI 3 "gr_reg_or_8bit_adjusted_operand" "rL")]))]
+  ""
+  "cmp.%C1 %0, %I0 = %r3, %2"
+  [(set_attr "itanium_class" "icmp")])
+
+(define_insn "*cmpsf_internal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "comparison_operator"
+	   [(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:SF 3 "fr_reg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "itanium_class" "fcmp")])
+
+(define_insn "*cmpdf_internal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "comparison_operator"
+	   [(match_operand:DF 2 "fr_reg_or_fp01_operand" "fG")
+	    (match_operand:DF 3 "fr_reg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "itanium_class" "fcmp")])
+
+(define_insn "*cmpxf_internal"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(match_operator:BI 1 "comparison_operator"
+		   [(match_operand:XF 2 "xfreg_or_fp01_operand" "fG")
+		    (match_operand:XF 3 "xfreg_or_fp01_operand" "fG")]))]
+  ""
+  "fcmp.%D1 %0, %I0 = %F2, %F3"
+  [(set_attr "itanium_class" "fcmp")])
+
+;; ??? Can this pattern be generated?
+
+(define_insn "*bit_zero"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(eq:BI (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+				(const_int 1)
+				(match_operand:DI 2 "shift_count_operand" "M"))
+	       (const_int 0)))]
+  ""
+  "tbit.z %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+(define_insn "*bit_one"
+  [(set (match_operand:BI 0 "register_operand" "=c")
+	(ne:BI (zero_extract:DI (match_operand:DI 1 "gr_register_operand" "r")
+				(const_int 1)
+				(match_operand:DI 2 "shift_count_operand" "M"))
+	       (const_int 0)))]
+  ""
+  "tbit.nz %0, %I0 = %1, %2"
+  [(set_attr "itanium_class" "tbit")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "*br_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "(%J0) br.cond%+ %l2"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+(define_insn "*br_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "(%j0) br.cond%+ %l2"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Counted loop operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))	; iterations; zero if unknown
+   (use (match_operand 2 "" ""))	; max iterations
+   (use (match_operand 3 "" ""))	; loop level
+   (use (match_operand 4 "" ""))]	; label
+  ""
+{
+  /* Only use cloop on innermost loops.  */
+  if (INTVAL (operands[3]) > 1)
+    FAIL;
+  emit_jump_insn (gen_doloop_end_internal (gen_rtx_REG (DImode, AR_LC_REGNUM),
+					   operands[4]));
+  DONE;
+})
+
+(define_insn "doloop_end_internal"
+  [(set (pc) (if_then_else (ne (match_operand:DI 0 "ar_lc_reg_operand" "")
+			       (const_int 0))
+		(label_ref (match_operand 1 "" ""))
+		(pc)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 0) (const_int 0))
+			 (plus:DI (match_dup 0) (const_int -1))
+			 (match_dup 0)))]
+  ""
+  "br.cloop.sptk.few %l1"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Set flag operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cstorebi4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:BI 2 "register_operand" "")
+	                (match_operand:BI 3 "const_int_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoresi4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:SI 2 "gr_register_operand" "")
+	                (match_operand:SI 3 "gr_reg_or_8bit_and_adjusted_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoredi4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:DI 2 "gr_register_operand" "")
+	                (match_operand:DI 3 "gr_reg_or_8bit_and_adjusted_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoresf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:SF 2 "fr_reg_or_fp01_operand" "")
+	                (match_operand:SF 3 "fr_reg_or_fp01_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoredf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:DF 2 "fr_reg_or_fp01_operand" "")
+	                (match_operand:DF 3 "fr_reg_or_fp01_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstorexf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:XF 2 "xfreg_or_fp01_operand" "")
+	                (match_operand:XF 3 "xfreg_or_fp01_operand" "")]))]
+  ""
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+(define_expand "cstoretf4"
+  [(set (match_operand:DI 0 "gr_register_operand" "") 
+	(match_operator:DI 1 "ia64_cbranch_operator"
+		       [(match_operand:TF 2 "gr_register_operand" "")
+	                (match_operand:TF 3 "gr_register_operand" "")]))]
+  "TARGET_HPUX"
+  "ia64_expand_compare (&operands[1], &operands[2], &operands[3]);")
+
+;; Don't allow memory as destination here, because cmov/cmov/st is more
+;; efficient than mov/mov/cst/cst.
+
+(define_insn_and_split "*sne_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(ne:DI (match_operand:BI 1 "register_operand" "c")
+	       (const_int 0)))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))]
+  ""
+  [(set_attr "itanium_class" "unknown")])
+
+(define_insn_and_split "*seq_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(eq:DI (match_operand:BI 1 "register_operand" "c")
+	       (const_int 0)))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec (ne (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 0)))
+   (cond_exec (eq (match_dup 1) (const_int 0))
+     (set (match_dup 0) (const_int 1)))]
+  ""
+  [(set_attr "itanium_class" "unknown")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional move instructions.
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? Add movXXcc patterns?
+
+;;
+;; DImode if_then_else patterns.
+;;
+
+(define_insn "*cmovdi_internal"
+  [(set (match_operand:DI 0 "not_postinc_destination_operand"
+	   "= r,  r,  r,   r,  r,  r,   r, r, r,   r, m, Q, *f,*b,*d*e")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand"
+		"c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")
+	     (const_int 0)])
+	  (match_operand:DI 2 "not_postinc_move_operand"
+	   "rim, *f, *b,*d*e,rim,rim, rim,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")
+	  (match_operand:DI 3 "not_postinc_move_operand"
+	   "rim,rim,rim, rim, *f, *b,*d*e,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")))]
+  "ia64_move_ok (operands[0], operands[2])
+   && ia64_move_ok (operands[0], operands[3])"
+  { gcc_unreachable (); }
+  [(set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand 0 "not_postinc_destination_operand" "")
+	(if_then_else
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "")
+	     (const_int 0)])
+	  (match_operand 2 "not_postinc_move_operand" "")
+	  (match_operand 3 "not_postinc_move_operand" "")))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  bool emitted_something = false;
+  rtx dest = operands[0];
+  rtx srct = operands[2];
+  rtx srcf = operands[3];
+  rtx cond = operands[4];
+
+  if (! rtx_equal_p (dest, srct))
+    {
+      ia64_emit_cond_move (dest, srct, cond);
+      emitted_something = true;
+    }
+  if (! rtx_equal_p (dest, srcf))
+    {
+      cond = gen_rtx_fmt_ee (GET_CODE (cond) == NE ? EQ : NE,
+ 			     VOIDmode, operands[1], const0_rtx);
+      ia64_emit_cond_move (dest, srcf, cond);
+      emitted_something = true;
+    }
+  if (! emitted_something)
+    emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+;; Absolute value pattern.
+
+(define_insn "*absdi2_internal"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r,r")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" "rI,rI"))
+	  (match_operand:DI 3 "gr_reg_or_22bit_operand" "0,rI")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "ialu,unknown")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:DI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed && rtx_equal_p (operands[0], operands[3])"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0)
+	  (neg:DI (match_dup 2))))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:DI (match_operand:DI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:DI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0) (neg:DI (match_dup 2))))
+   (cond_exec
+     (match_dup 5)
+     (set (match_dup 0) (match_dup 3)))]
+{
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+})
+
+;;
+;; SImode if_then_else patterns.
+;;
+
+(define_insn "*cmovsi_internal"
+  [(set (match_operand:SI 0 "not_postinc_destination_operand"
+		"=r,m,*f,r,m,*f,r,m,*f")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c,c,c,c,c,c,c,c")
+	     (const_int 0)])
+	  (match_operand:SI 2 "not_postinc_move_operand"
+		    "0,0,0,rim*f,rO,rO,rim*f,rO,rO")
+	  (match_operand:SI 3 "not_postinc_move_operand"
+		    "rim*f,rO,rO,0,0,0,rim*f,rO,rO")))]
+  "ia64_move_ok (operands[0], operands[2])
+   && ia64_move_ok (operands[0], operands[3])"
+  { gcc_unreachable (); }
+  [(set_attr "predicable" "no")])
+
+(define_insn "*abssi2_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r,r")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:SI (match_operand:SI 3 "gr_reg_or_22bit_operand" "rI,rI"))
+	  (match_operand:SI 2 "gr_reg_or_22bit_operand" "0,rI")))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "ialu,unknown")
+   (set_attr "predicable" "no")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:SI (match_operand:SI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:SI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed && rtx_equal_p (operands[0], operands[3])"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0)
+	  (neg:SI (match_dup 2))))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	  (match_operator 4 "predicate_operator"
+	    [(match_operand:BI 1 "register_operand" "c,c")
+	     (const_int 0)])
+	  (neg:SI (match_operand:SI 2 "gr_reg_or_22bit_operand" ""))
+	  (match_operand:SI 3 "gr_reg_or_22bit_operand" "")))]
+  "reload_completed"
+  [(cond_exec
+     (match_dup 4)
+     (set (match_dup 0) (neg:SI (match_dup 2))))
+   (cond_exec
+     (match_dup 5)
+     (set (match_dup 0) (match_dup 3)))]
+{
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+})
+
+(define_insn_and_split "*cond_opsi2_internal"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(match_operator:SI 5 "condop_operator"
+	  [(if_then_else:SI
+	     (match_operator 6 "predicate_operator"
+	       [(match_operand:BI 1 "register_operand" "c")
+	        (const_int 0)])
+	     (match_operand:SI 2 "gr_register_operand" "r")
+	     (match_operand:SI 3 "gr_register_operand" "r"))
+	   (match_operand:SI 4 "gr_register_operand" "r")]))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec
+     (match_dup 6)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 2) (match_dup 4)])))
+   (cond_exec
+     (match_dup 7)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 3) (match_dup 4)])))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[6]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+}
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+
+(define_insn_and_split "*cond_opsi2_internal_b"
+  [(set (match_operand:SI 0 "gr_register_operand" "=r")
+	(match_operator:SI 5 "condop_operator"
+	  [(match_operand:SI 4 "gr_register_operand" "r")
+	   (if_then_else:SI
+	     (match_operator 6 "predicate_operator"
+	       [(match_operand:BI 1 "register_operand" "c")
+	        (const_int 0)])
+	     (match_operand:SI 2 "gr_register_operand" "r")
+	     (match_operand:SI 3 "gr_register_operand" "r"))]))]
+  ""
+  "#"
+  "reload_completed"
+  [(cond_exec
+     (match_dup 6)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 4) (match_dup 2)])))
+   (cond_exec
+     (match_dup 7)
+     (set (match_dup 0) (match_op_dup:SI 5 [(match_dup 4) (match_dup 3)])))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[6]) == NE ? EQ : NE,
+				VOIDmode, operands[1], const0_rtx);
+}
+  [(set_attr "itanium_class" "ialu")
+   (set_attr "predicable" "no")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(use (match_operand:DI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  ia64_expand_call (NULL_RTX, operands[0], operands[2], false);
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(use (match_operand:DI 0 "" ""))
+   (use (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  ia64_expand_call (NULL_RTX, operands[0], operands[2], true);
+  DONE;
+})
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands,
+;; the same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+;;
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:DI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+{
+  ia64_expand_call (operands[0], operands[1], operands[3], false);
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(use (match_operand 0 "" ""))
+   (use (match_operand:DI 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (match_operand 3 "" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+{
+  ia64_expand_call (operands[0], operands[1], operands[3], true);
+  DONE;
+})
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+(define_insn "call_nogp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,s"))
+	 (const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=b,b"))]
+  ""
+  "br.call%+.many %1 = %0"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_insn "call_value_nogp"
+  [(set (match_operand 0 "" "=X,X")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "?b,s"))
+	      (const_int 0)))
+   (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
+  ""
+  "br.call%+.many %2 = %1"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_insn "sibcall_nogp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,s"))
+	 (const_int 0))]
+  ""
+  "br%+.many %0"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_insn "call_gp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?r,s"))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" "=b,b"))
+   (clobber (match_scratch:DI 2 "=&r,X"))
+   (clobber (match_scratch:DI 3 "=b,X"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "br,scall")])
+
+;; Irritatingly, we don't have access to INSN within the split body.
+;; See commentary in ia64_split_call as to why these aren't peep2.
+(define_split
+  [(call (mem (match_operand 0 "call_operand" ""))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (match_scratch:DI 3 ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+		   operands[3], true, false);
+  DONE;
+})
+
+(define_split
+  [(call (mem (match_operand 0 "call_operand" ""))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (match_scratch:DI 3 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+		   operands[3], false, false);
+  DONE;
+})
+
+(define_insn "call_value_gp"
+  [(set (match_operand 0 "" "=X,X")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "?r,s"))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" "=b,b"))
+   (clobber (match_scratch:DI 3 "=&r,X"))
+   (clobber (match_scratch:DI 4 "=b,X"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "br,scall")])
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:DI 4 ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+		   operands[4], true, false);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:DI 4 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+		   operands[4], false, false);
+  DONE;
+})
+
+(define_insn_and_split "sibcall_gp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?r,s"))
+	 (const_int 1))
+   (clobber (match_scratch:DI 1 "=&r,X"))
+   (clobber (match_scratch:DI 2 "=b,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], NULL_RTX, operands[1],
+		   operands[2], true, true);
+  DONE;
+}
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand:DI 0 "register_operand" "b"))]
+  ""
+  "br.ret.sptk.many %0"
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "return"
+  [(return)]
+  "ia64_direct_return ()"
+  "br.ret.sptk.many rp"
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "*return_true"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (return)
+		      (pc)))]
+  "ia64_direct_return ()"
+  "(%J0) br.ret%+.many rp"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+(define_insn "*return_false"
+  [(set (pc)
+	(if_then_else (match_operator 0 "predicate_operator"
+			[(match_operand:BI 1 "register_operand" "c")
+			 (const_int 0)])
+		      (pc)
+		      (return)))]
+  "ia64_direct_return ()"
+  "(%j0) br.ret%+.many rp"
+  [(set_attr "itanium_class" "br")
+   (set_attr "predicable" "no")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "br %l0"
+  [(set_attr "itanium_class" "br")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "b"))]
+  ""
+  "br %0"
+  [(set_attr "itanium_class" "br")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand:DI 0 "memory_operand" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  rtx op0 = operands[0];
+  rtx addr;
+
+  /* ??? Bother -- do_tablejump is "helpful" and pulls the table
+     element into a register without bothering to see whether that
+     is necessary given the operand predicate.  Check for MEM just
+     in case someone fixes this.  */
+  if (GET_CODE (op0) == MEM)
+    addr = XEXP (op0, 0);
+  else
+    {
+      /* Otherwise, cheat and guess that the previous insn in the
+	 stream was the memory load.  Grab the address from that.
+	 Note we have to momentarily pop out of the sequence started
+	 by the insn-emit wrapper in order to grab the last insn.  */
+      rtx last, set;
+
+      end_sequence ();
+      last = get_last_insn ();
+      start_sequence ();
+      set = single_set (last);
+
+      gcc_assert (rtx_equal_p (SET_DEST (set), op0)
+		  && GET_CODE (SET_SRC (set)) == MEM);
+      addr = XEXP (SET_SRC (set), 0);
+      gcc_assert (!rtx_equal_p (addr, op0));
+    }
+
+  /* Jump table elements are stored pc-relative.  That is, a displacement
+     from the entry to the label.  Thus to convert to an absolute address
+     we add the address of the memory from which the value is loaded.  */
+  operands[0] = expand_simple_binop (DImode, PLUS, op0, addr,
+				     NULL_RTX, 1, OPTAB_DIRECT);
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:DI 0 "register_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "br %0"
+  [(set_attr "itanium_class" "br")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  ia64_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  ia64_expand_epilogue (0);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  ia64_expand_epilogue (1);
+  DONE;
+})
+
+;; This prevents the scheduler from moving the SP decrement past FP-relative
+;; stack accesses.  This is the same as adddi3 plus the extra set.
+
+(define_insn "prologue_allocate_stack"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,a")
+		 (match_operand:DI 2 "gr_reg_or_22bit_operand" "r,I,J")))
+   (set (match_operand:DI 3 "register_operand" "+r,r,r")
+	(match_dup 3))]
+  ""
+  "@
+   add %0 = %1, %2
+   adds %0 = %2, %1
+   addl %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; This prevents the scheduler from moving the SP restore past FP-relative
+;; stack accesses.  This is similar to movdi plus the extra set.
+
+(define_insn "epilogue_deallocate_stack"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "register_operand" "+r"))
+   (set (match_dup 1) (match_dup 1))]
+  ""
+  "mov %0 = %1"
+  [(set_attr "itanium_class" "ialu")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for EH handling
+(define_insn "prologue_use"
+  [(unspec:DI [(match_operand:DI 0 "register_operand" "")]
+	      UNSPEC_PROLOGUE_USE)]
+  ""
+  ""
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")
+   (set_attr "empty" "yes")])
+
+;; Allocate a new register frame.
+
+(define_insn "alloc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_ALLOC))
+   (use (match_operand:DI 1 "const_int_operand" "i"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (match_operand:DI 3 "const_int_operand" "i"))
+   (use (match_operand:DI 4 "const_int_operand" "i"))]
+  ""
+  "alloc %0 = ar.pfs, %1, %2, %3, %4"
+  [(set_attr "itanium_class" "syst_m0")
+   (set_attr "predicable" "no")
+   (set_attr "first_insn" "yes")])
+
+;; Modifies ar.unat
+(define_expand "gr_spill"
+  [(parallel [(set (match_operand:DI 0 "memory_operand" "=m")
+		   (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+			       (match_operand:DI 2 "const_int_operand" "")]
+			      UNSPEC_GR_SPILL))
+	      (clobber (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_REG (DImode, AR_UNAT_REGNUM);")
+
+(define_insn "gr_spill_internal"
+  [(set (match_operand:DI 0 "destination_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_GR_SPILL))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  ""
+{
+  /* Note that we use a C output pattern here to avoid the predicate
+     being automatically added before the .mem.offset directive.  */
+  return ".mem.offset %2, 0\;%,st8.spill %0 = %1%P0";
+}
+  [(set_attr "itanium_class" "st")])
+
+;; Reads ar.unat
+(define_expand "gr_restore"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "=r")
+		   (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+			       (match_operand:DI 2 "const_int_operand" "")]
+			      UNSPEC_GR_RESTORE))
+	      (use (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_REG (DImode, AR_UNAT_REGNUM);")
+
+(define_insn "gr_restore_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_GR_RESTORE))
+   (use (match_operand:DI 3 "register_operand" ""))]
+  ""
+  { return ".mem.offset %2, 0\;%,ld8.fill %0 = %1%P1"; }
+  [(set_attr "itanium_class" "ld")])
+
+(define_insn "fr_spill"
+  [(set (match_operand:XF 0 "destination_operand" "=m")
+	(unspec:XF [(match_operand:XF 1 "register_operand" "f")]
+		   UNSPEC_FR_SPILL))]
+  ""
+  "stf.spill %0 = %1%P0"
+  [(set_attr "itanium_class" "stf")])
+
+(define_insn "fr_restore"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+	(unspec:XF [(match_operand:XF 1 "memory_operand" "m")]
+		   UNSPEC_FR_RESTORE))]
+  ""
+  "ldf.fill %0 = %1%P1"
+  [(set_attr "itanium_class" "fld")])
+
+;; ??? The explicit stop is not ideal.  It would be better if
+;; rtx_needs_barrier took care of this, but this is something that can be
+;; fixed later.  This avoids an RSE DV.
+
+(define_insn "bsp_value"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_BSP_VALUE))]
+  ""
+  "*
+{
+  return \";;\;%,mov %0 = ar.bsp\";
+}"
+  [(set_attr "itanium_class" "frar_i")])
+
+(define_insn "set_bsp"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+		    UNSPECV_SET_BSP)]
+  ""
+  "flushrs
+	mov r19=ar.rsc
+	;;
+	and r19=0x1c,r19
+	;;
+	mov ar.rsc=r19
+	;;
+	mov ar.bspstore=%0
+	;;
+	or r19=0x3,r19
+	;;
+	loadrs
+	invala
+	;;
+	mov ar.rsc=r19"
+  [(set_attr "itanium_class" "unknown")
+   (set_attr "predicable" "no")])
+
+;; ??? The explicit stops are not ideal.  It would be better if
+;; rtx_needs_barrier took care of this, but this is something that can be
+;; fixed later.  This avoids an RSE DV.
+
+(define_insn "flushrs"
+  [(unspec [(const_int 0)] UNSPEC_FLUSHRS)]
+  ""
+  ";;\;flushrs\;;;"
+  [(set_attr "itanium_class" "rse_m")
+   (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; ??? Emitting a NOP instruction isn't very useful.  This should probably
+;; be emitting ";;" to force a break in the instruction packing.
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop 0"
+  [(set_attr "itanium_class" "nop")])
+
+(define_insn "nop_m"
+  [(const_int 1)]
+  ""
+  "nop.m 0"
+  [(set_attr "itanium_class" "nop_m")])
+
+(define_insn "nop_i"
+  [(const_int 2)]
+  ""
+  "nop.i 0"
+  [(set_attr "itanium_class" "nop_i")])
+
+(define_insn "nop_f"
+  [(const_int 3)]
+  ""
+  "nop.f 0"
+  [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "nop_b"
+  [(const_int 4)]
+  ""
+  "nop.b 0"
+  [(set_attr "itanium_class" "nop_b")])
+
+(define_insn "nop_x"
+  [(const_int 5)]
+  ""
+  ""
+  [(set_attr "itanium_class" "nop_x")
+   (set_attr "empty" "yes")])
+
+;; The following insn will be never generated.  It is used only by
+;; insn scheduler to change state before advancing cycle.
+(define_insn "pre_cycle"
+  [(const_int 6)]
+  ""
+  ""
+  [(set_attr "itanium_class" "pre_cycle")])
+
+(define_insn "bundle_selector"
+  [(unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BUNDLE_SELECTOR)]
+  ""
+  { return get_bundle_name (INTVAL (operands[0])); }
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+(define_insn "insn_group_barrier"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
+		    UNSPECV_INSN_GROUP_BARRIER)]
+  ""
+  ";;"
+  [(set_attr "itanium_class" "stop_bit")
+   (set_attr "predicable" "no")
+   (set_attr "empty" "yes")])
+
+(define_expand "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "")
+
+;; ??? We don't have a match-any slot type.  Setting the type to unknown
+;; produces worse code that setting the slot type to A.
+
+(define_insn "*trap"
+  [(trap_if (const_int 1) (match_operand 0 "const_int_operand" ""))]
+  ""
+  "break %0"
+  [(set_attr "itanium_class" "chk_s_i")])
+
+(define_expand "ctrapbi4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:BI 1 "register_operand" "")
+	                (match_operand:BI 2 "const_int_operand" "")])
+		      (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapsi4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SI 1 "gr_register_operand" "")
+	                (match_operand:SI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapdi4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DI 1 "gr_register_operand" "")
+	                (match_operand:DI 2 "gr_reg_or_8bit_and_adjusted_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapsf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:SF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:SF 2 "fr_reg_or_fp01_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapdf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:DF 1 "fr_reg_or_fp01_operand" "")
+	                (match_operand:DF 2 "fr_reg_or_fp01_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctrapxf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:XF 1 "xfreg_or_fp01_operand" "")
+	                (match_operand:XF 2 "xfreg_or_fp01_operand" "")])
+		       (match_operand 3 "" ""))]
+  ""
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+(define_expand "ctraptf4"
+  [(trap_if (match_operator 0 "ia64_cbranch_operator"
+		       [(match_operand:TF 1 "gr_register_operand" "")
+	                (match_operand:TF 2 "gr_register_operand" "")])
+		       (match_operand 3 "" ""))]
+  "TARGET_HPUX"
+  "ia64_expand_compare (&operands[0], &operands[1], &operands[2]);")
+
+
+(define_insn "*conditional_trap"
+  [(trap_if (match_operator 0 "predicate_operator"
+	      [(match_operand:BI 1 "register_operand" "c")
+	       (const_int 0)])  
+	    (match_operand 2 "const_int_operand" ""))]
+  ""
+  "(%J0) break %2"
+  [(set_attr "itanium_class" "chk_s_i")
+   (set_attr "predicable" "no")])
+
+(define_insn "break_f"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BREAK)]
+  ""
+  "break.f 0"
+  [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  ""
+{
+  static const char * const alt[2][4] = {
+    {
+      "%,lfetch.nta [%0]",
+      "%,lfetch.nt1 [%0]",
+      "%,lfetch.nt2 [%0]",
+      "%,lfetch [%0]"
+    },
+    {
+      "%,lfetch.excl.nta [%0]",
+      "%,lfetch.excl.nt1 [%0]",
+      "%,lfetch.excl.nt2 [%0]",
+      "%,lfetch.excl [%0]"
+    }
+  };
+  int i = (INTVAL (operands[1]));
+  int j = (INTVAL (operands[2]));
+
+  gcc_assert (i == 0 || i == 1);
+  gcc_assert (j >= 0 && j <= 3);
+  return alt[i][j];
+}
+  [(set_attr "itanium_class" "lfetch")])
+
+;; Non-local goto support.
+
+(define_expand "save_stack_nonlocal"
+  [(use (match_operand:OI 0 "memory_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))]
+  ""
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode,
+					 \"__ia64_save_stack_nonlocal\"),
+		     LCT_NORMAL, VOIDmode, 2, XEXP (operands[0], 0), Pmode,
+		     operands[1], Pmode);
+  DONE;
+})
+
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, \"__ia64_nonlocal_goto\"),
+		     LCT_NORETURN, VOIDmode, 3,
+		     operands[1], Pmode,
+		     copy_to_reg (XEXP (operands[2], 0)), Pmode,
+		     operands[3], Pmode);
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "nonlocal_goto_receiver"
+  [(unspec_volatile [(const_int 0)] UNSPECV_GOTO_RECEIVER)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_reload_gp ();
+  DONE;
+})
+
+(define_insn_and_split "builtin_setjmp_receiver"
+  [(unspec_volatile [(match_operand:DI 0 "" "")] UNSPECV_SETJMP_RECEIVER)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_reload_gp ();
+  DONE;
+})
+
+(define_expand "eh_epilogue"
+  [(use (match_operand:DI 0 "register_operand" "r"))
+   (use (match_operand:DI 1 "register_operand" "r"))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  ""
+{
+  rtx bsp = gen_rtx_REG (Pmode, 10);
+  rtx sp = gen_rtx_REG (Pmode, 9);
+
+  if (GET_CODE (operands[0]) != REG || REGNO (operands[0]) != 10)
+    {
+      emit_move_insn (bsp, operands[0]);
+      operands[0] = bsp;
+    }
+  if (GET_CODE (operands[2]) != REG || REGNO (operands[2]) != 9)
+    {
+      emit_move_insn (sp, operands[2]);
+      operands[2] = sp;
+    }
+  emit_use (sp);
+  emit_use (bsp);
+
+  cfun->machine->ia64_eh_epilogue_sp = sp;
+  cfun->machine->ia64_eh_epilogue_bsp = bsp;
+})
+
+;; Builtin apply support.
+
+(define_expand "restore_stack_nonlocal"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:OI 1 "memory_operand" ""))]
+  ""
+{
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode,
+					 "__ia64_restore_stack_nonlocal"),
+		     LCT_NORMAL, VOIDmode, 1,
+		     copy_to_reg (XEXP (operands[1], 0)), Pmode);
+  DONE;
+})
+
+
+;; Predication.
+
+(define_cond_exec
+  [(match_operator 0 "predicate_operator"
+     [(match_operand:BI 1 "register_operand" "c")
+      (const_int 0)])]
+  ""
+  "(%J0)")
+
+(define_insn "pred_rel_mutex"
+  [(set (match_operand:BI 0 "register_operand" "+c")
+       (unspec:BI [(match_dup 0)] UNSPEC_PRED_REL_MUTEX))]
+  ""
+  ".pred.rel.mutex %0, %I0"
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+(define_insn "safe_across_calls_all"
+  [(unspec_volatile [(const_int 0)] UNSPECV_PSAC_ALL)]
+  ""
+  ".pred.safe_across_calls p1-p63"
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+(define_insn "safe_across_calls_normal"
+  [(unspec_volatile [(const_int 0)] UNSPECV_PSAC_NORMAL)]
+  ""
+{
+  emit_safe_across_calls ();
+  return "";
+}
+  [(set_attr "itanium_class" "ignore")
+   (set_attr "predicable" "no")])
+
+;; UNSPEC instruction definition to "swizzle" 32-bit pointer into 64-bit
+;; pointer.  This is used by the HP-UX 32 bit mode.
+
+(define_insn "ptr_extend"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (unspec:DI [(match_operand:SI 1 "gr_register_operand" "r")]
+		   UNSPEC_ADDP4))]
+  ""
+  "addp4 %0 = 0,%1"
+  [(set_attr "itanium_class" "ialu")])
+
+;;
+;; Optimizations for ptr_extend
+
+(define_insn "ptr_extend_plus_imm"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (unspec:DI
+         [(plus:SI (match_operand:SI 1 "basereg_operand" "r")
+                   (match_operand:SI 2 "gr_reg_or_14bit_operand" "rI"))]
+         UNSPEC_ADDP4))]
+  "addp4_optimize_ok (operands[1], operands[2])"
+  "addp4 %0 = %2, %1"
+  [(set_attr "itanium_class" "ialu")])
+
+(define_insn "*ptr_extend_plus_2"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+        (unspec:DI
+         [(plus:SI (match_operand:SI 1 "gr_register_operand" "r")
+                   (match_operand:SI 2 "basereg_operand" "r"))]
+         UNSPEC_ADDP4))]
+  "addp4_optimize_ok (operands[1], operands[2])"
+  "addp4 %0 = %1, %2"
+  [(set_attr "itanium_class" "ialu")])
+
+;;
+;; Get instruction pointer
+
+(define_insn "ip_value"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (pc))]
+ ""
+ "mov %0 = ip"
+  [(set_attr "itanium_class" "frbr")])
+
+;; Vector operations
+(include "vect.md")
+;; Atomic operations
+(include "sync.md")
+;; New division operations
+(include "div.md")
diff --git a/gcc/config/ia64/ia64.opt b/gcc/config/ia64/ia64.opt
new file mode 100644
index 000000000..49d099a4e
--- /dev/null
+++ b/gcc/config/ia64/ia64.opt
@@ -0,0 +1,181 @@
+; Copyright (C) 2005, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mbig-endian
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Generate big endian code
+
+mlittle-endian
+Target Report RejectNegative InverseMask(BIG_ENDIAN)
+Generate little endian code
+
+mgnu-as
+Target Report Mask(GNU_AS)
+Generate code for GNU as
+
+mgnu-ld
+Target Report Mask(GNU_LD)
+Generate code for GNU ld
+
+mvolatile-asm-stop
+Target Report Mask(VOL_ASM_STOP)
+Emit stop bits before and after volatile extended asms
+
+mregister-names
+Target Mask(REG_NAMES)
+Use in/loc/out register names
+
+mno-sdata
+Target Report RejectNegative Mask(NO_SDATA)
+
+msdata
+Target Report RejectNegative InverseMask(NO_SDATA)
+Enable use of sdata/scommon/sbss
+
+mno-pic
+Target Report RejectNegative Mask(NO_PIC)
+Generate code without GP reg
+
+mconstant-gp
+Target Report RejectNegative Mask(CONST_GP)
+gp is constant (but save/restore gp on indirect calls)
+
+mauto-pic
+Target Report RejectNegative Mask(AUTO_PIC)
+Generate self-relocatable code
+
+minline-float-divide-min-latency
+Target Report RejectNegative Var(TARGET_INLINE_FLOAT_DIV, 1)
+Generate inline floating point division, optimize for latency
+
+minline-float-divide-max-throughput
+Target Report RejectNegative Var(TARGET_INLINE_FLOAT_DIV, 2) Init(2)
+Generate inline floating point division, optimize for throughput
+
+mno-inline-float-divide
+Target Report RejectNegative Var(TARGET_INLINE_FLOAT_DIV, 0)
+
+minline-int-divide-min-latency
+Target Report RejectNegative Var(TARGET_INLINE_INT_DIV, 1)
+Generate inline integer division, optimize for latency
+
+minline-int-divide-max-throughput
+Target Report RejectNegative Var(TARGET_INLINE_INT_DIV, 2)
+Generate inline integer division, optimize for throughput
+
+mno-inline-int-divide
+Target Report RejectNegative Var(TARGET_INLINE_INT_DIV, 0)
+Do not inline integer division
+
+minline-sqrt-min-latency
+Target Report RejectNegative Var(TARGET_INLINE_SQRT, 1)
+Generate inline square root, optimize for latency
+
+minline-sqrt-max-throughput
+Target Report RejectNegative Var(TARGET_INLINE_SQRT, 2)
+Generate inline square root, optimize for throughput
+
+mno-inline-sqrt
+Target Report RejectNegative Var(TARGET_INLINE_SQRT, 0)
+Do not inline square root
+
+mdwarf2-asm
+Target Report Mask(DWARF2_ASM)
+Enable Dwarf 2 line debug info via GNU as
+
+mearly-stop-bits
+Target Report Mask(EARLY_STOP_BITS)
+Enable earlier placing stop bits for better scheduling
+
+mfixed-range=
+Target RejectNegative Joined
+Specify range of registers to make fixed
+
+mtls-size=
+Target RejectNegative Joined UInteger Var(ia64_tls_size) Init(22)
+Specify bit size of immediate TLS offsets
+
+mtune=
+Target RejectNegative Joined
+Schedule code for given CPU
+
+msched-br-data-spec
+Target Report Var(mflag_sched_br_data_spec) Init(0)
+Use data speculation before reload
+
+msched-ar-data-spec
+Target Report Var(mflag_sched_ar_data_spec) Init(1)
+Use data speculation after reload
+
+msched-control-spec
+Target Report Var(mflag_sched_control_spec) Init(2)
+Use control speculation
+
+msched-br-in-data-spec
+Target Report Var(mflag_sched_br_in_data_spec) Init(1)
+Use in block data speculation before reload
+
+msched-ar-in-data-spec
+Target Report Var(mflag_sched_ar_in_data_spec) Init(1)
+Use in block data speculation after reload
+
+msched-in-control-spec
+Target Report Var(mflag_sched_in_control_spec) Init(1)
+Use in block control speculation
+
+msched-spec-ldc
+Target Report Var(mflag_sched_spec_ldc) Init(1)
+Use simple data speculation check
+
+msched-spec-control-ldc
+Target Report Var(mflag_sched_spec_control_ldc) Init(0)
+Use simple data speculation check for control speculation
+
+msched-prefer-non-data-spec-insns
+Target Report Var(mflag_sched_prefer_non_data_spec_insns) Init(0)
+If set, data speculative instructions will be chosen for schedule only if there are no other choices at the moment 
+
+msched-prefer-non-control-spec-insns
+Target Report Var(mflag_sched_prefer_non_control_spec_insns) Init(0)
+If set, control speculative instructions will be chosen for schedule only if there are no other choices at the moment 
+
+msched-count-spec-in-critical-path
+Target Report Var(mflag_sched_count_spec_in_critical_path) Init(0)
+Count speculative dependencies while calculating priority of instructions
+
+msched-stop-bits-after-every-cycle
+Target Report Var(mflag_sched_stop_bits_after_every_cycle) Init(1)
+Place a stop bit after every cycle when scheduling
+
+msched-fp-mem-deps-zero-cost
+Target Report Var(mflag_sched_fp_mem_deps_zero_cost) Init(0)
+Assume that floating-point stores and loads are not likely to cause conflict when placed into one instruction group
+
+msched-max-memory-insns=
+Target RejectNegative Joined UInteger Var(ia64_max_memory_insns) Init(1)
+Soft limit on number of memory insns per instruction group, giving lower priority to subsequent memory insns attempting to schedule in the same insn group. Frequently useful to prevent cache bank conflicts.  Default value is 1
+
+msched-max-memory-insns-hard-limit
+Target Report Var(mflag_sched_mem_insns_hard_limit) Init(0)
+Disallow more than 'msched-max-memory-insns' in instruction group. Otherwise, limit is 'soft' (prefer non-memory operations when limit is reached)
+
+msel-sched-dont-check-control-spec
+Target Report Var(mflag_sel_sched_dont_check_control_spec) Init(0)
+Don't generate checks for control speculation in selective scheduling
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/ia64/ia64intrin.h b/gcc/config/ia64/ia64intrin.h
new file mode 100644
index 000000000..fba7296aa
--- /dev/null
+++ b/gcc/config/ia64/ia64intrin.h
@@ -0,0 +1,2 @@
+/* Overloaded builtins have been ported to C++: nothing is needed
+   in the header anymore.  This file intentionally left void.  */
diff --git a/gcc/config/ia64/ilp32.opt b/gcc/config/ia64/ilp32.opt
new file mode 100644
index 000000000..bcb64737e
--- /dev/null
+++ b/gcc/config/ia64/ilp32.opt
@@ -0,0 +1,7 @@
+milp32
+Target Report RejectNegative Mask(ILP32)
+Generate ILP32 code
+
+mlp64
+Target Report RejectNegative InverseMask(ILP32)
+Generate LP64 code
diff --git a/gcc/config/ia64/itanium2.md b/gcc/config/ia64/itanium2.md
new file mode 100644
index 000000000..35593b82e
--- /dev/null
+++ b/gcc/config/ia64/itanium2.md
@@ -0,0 +1,1867 @@
+;; Itanium2 DFA descriptions for insn scheduling and bundling.
+;; Copyright (C) 2002, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by Vladimir Makarov <vmakarov@redhat.com>.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+
+/* This is description of pipeline hazards based on DFA.  The
+   following constructions can be used for this:
+   
+   o define_cpu_unit string [string]) describes a cpu functional unit
+     (separated by comma).
+
+     1st operand: Names of cpu function units.
+     2nd operand: Name of automaton (see comments for
+     DEFINE_AUTOMATON).
+
+     All define_reservations and define_cpu_units should have unique
+     names which cannot be "nothing".
+
+   o (exclusion_set string string) means that each CPU function unit
+     in the first string cannot be reserved simultaneously with each
+     unit whose name is in the second string and vise versa.  CPU
+     units in the string are separated by commas. For example, it is
+     useful for description CPU with fully pipelined floating point
+     functional unit which can execute simultaneously only single
+     floating point insns or only double floating point insns.
+
+   o (presence_set string string) means that each CPU function unit in
+     the first string cannot be reserved unless at least one of
+     pattern of units whose names are in the second string is
+     reserved.  This is an asymmetric relation.  CPU units or unit
+     patterns in the strings are separated by commas.  Pattern is one
+     unit name or unit names separated by white-spaces.
+ 
+     For example, it is useful for description that slot1 is reserved
+     after slot0 reservation for a VLIW processor.  We could describe
+     it by the following construction
+
+         (presence_set "slot1" "slot0")
+
+     Or slot1 is reserved only after slot0 and unit b0 reservation.
+     In this case we could write
+
+         (presence_set "slot1" "slot0 b0")
+
+     All CPU functional units in a set should belong to the same
+     automaton.
+
+   o (final_presence_set string string) is analogous to
+     `presence_set'.  The difference between them is when checking is
+     done.  When an instruction is issued in given automaton state
+     reflecting all current and planned unit reservations, the
+     automaton state is changed.  The first state is a source state,
+     the second one is a result state.  Checking for `presence_set' is
+     done on the source state reservation, checking for
+     `final_presence_set' is done on the result reservation.  This
+     construction is useful to describe a reservation which is
+     actually two subsequent reservations.  For example, if we use
+
+         (presence_set "slot1" "slot0")
+
+     the following insn will be never issued (because slot1 requires
+     slot0 which is absent in the source state).
+
+         (define_reservation "insn_and_nop" "slot0 + slot1")
+
+     but it can be issued if we use analogous `final_presence_set'.
+
+   o (absence_set string string) means that each CPU function unit in
+     the first string can be reserved only if each pattern of units
+     whose names are in the second string is not reserved.  This is an
+     asymmetric relation (actually exclusion set is analogous to this
+     one but it is symmetric).  CPU units or unit patterns in the
+     string are separated by commas.  Pattern is one unit name or unit
+     names separated by white-spaces.
+
+     For example, it is useful for description that slot0 cannot be
+     reserved after slot1 or slot2 reservation for a VLIW processor.
+     We could describe it by the following construction
+
+        (absence_set "slot2" "slot0, slot1")
+
+     Or slot2 cannot be reserved if slot0 and unit b0 are reserved or
+     slot1 and unit b1 are reserved .  In this case we could write
+
+        (absence_set "slot2" "slot0 b0, slot1 b1")
+
+     All CPU functional units in a set should to belong the same
+     automaton.
+
+   o (final_absence_set string string) is analogous to `absence_set' but
+     checking is done on the result (state) reservation.  See comments
+     for final_presence_set.
+
+   o (define_bypass number out_insn_names in_insn_names) names bypass with
+     given latency (the first number) from insns given by the first
+     string (see define_insn_reservation) into insns given by the
+     second string.  Insn names in the strings are separated by
+     commas.
+
+   o (define_automaton string) describes names of an automaton
+     generated and used for pipeline hazards recognition.  The names
+     are separated by comma.  Actually it is possibly to generate the
+     single automaton but unfortunately it can be very large.  If we
+     use more one automata, the summary size of the automata usually
+     is less than the single one.  The automaton name is used in
+     define_cpu_unit.  All automata should have unique names.
+
+   o (automata_option string) describes option for generation of
+     automata.  Currently there are the following options:
+
+     o "no-minimization" which makes no minimization of automata.
+       This is only worth to do when we are debugging the description
+       and need to look more accurately at reservations of states.
+
+     o "ndfa" which makes automata with nondeterministic reservation
+        by insns.
+
+   o (define_reservation string string) names reservation (the first
+     string) of cpu functional units (the 2nd string).  Sometimes unit
+     reservations for different insns contain common parts.  In such
+     case, you describe common part and use one its name (the 1st
+     parameter) in regular expression in define_insn_reservation.  All
+     define_reservations, define results and define_cpu_units should
+     have unique names which cannot be "nothing".
+
+   o (define_insn_reservation name default_latency condition regexpr)
+     describes reservation of cpu functional units (the 3nd operand)
+     for instruction which is selected by the condition (the 2nd
+     parameter).  The first parameter is used for output of debugging
+     information.  The reservations are described by a regular
+     expression according the following syntax:
+
+       regexp = regexp "," oneof
+              | oneof
+
+       oneof = oneof "|" allof
+             | allof
+
+       allof = allof "+" repeat
+             | repeat
+ 
+       repeat = element "*" number
+              | element
+
+       element = cpu_function_name
+               | reservation_name
+               | result_name
+               | "nothing"
+               | "(" regexp ")"
+
+       1. "," is used for describing start of the next cycle in
+          reservation.
+
+       2. "|" is used for describing the reservation described by the
+          first regular expression *or* the reservation described by
+          the second regular expression *or* etc.
+
+       3. "+" is used for describing the reservation described by the
+          first regular expression *and* the reservation described by
+          the second regular expression *and* etc.
+
+       4. "*" is used for convenience and simply means sequence in
+          which the regular expression are repeated NUMBER times with
+          cycle advancing (see ",").
+
+       5. cpu function unit name which means reservation.
+
+       6. reservation name -- see define_reservation.
+
+       7. string "nothing" means no units reservation.
+
+*/
+
+(define_automaton "two")
+
+;;   All possible combinations of bundles/syllables
+(define_cpu_unit "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+                  2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx" "two")
+(define_cpu_unit "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\
+                  2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx." "two")
+(define_cpu_unit "2_0mii., 2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\
+                  2_0mib., 2_0mmb., 2_0mfb." "two")
+
+(define_cpu_unit "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\
+                  2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx" "two")
+(define_cpu_unit "2_1mi.i, 2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\
+                  2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx." "two")
+(define_cpu_unit "2_1mii., 2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\
+                  2_1mib., 2_1mmb., 2_1mfb." "two")
+
+;; Slot 1
+(exclusion_set "2_0m.ii" "2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+                          2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.mi" "2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib,\
+                          2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.fi" "2_0m.mf, 2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb,\
+                          2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.mf" "2_0b.bb, 2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb,\
+	                  2_0m.lx")
+(exclusion_set "2_0b.bb" "2_0m.bb, 2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.bb" "2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.ib" "2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.mb" "2_0m.fb, 2_0m.lx")
+(exclusion_set "2_0m.fb" "2_0m.lx")
+
+;; Slot 2
+(exclusion_set "2_0mi.i" "2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\
+                          2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mm.i" "2_0mf.i, 2_0mm.f, 2_0bb.b, 2_0mb.b,\
+                          2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mf.i" "2_0mm.f, 2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b,\
+                          2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mm.f" "2_0bb.b, 2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b,\
+                          2_0mlx.")
+(exclusion_set "2_0bb.b" "2_0mb.b, 2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mb.b" "2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mi.b" "2_0mm.b, 2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mm.b" "2_0mf.b, 2_0mlx.")
+(exclusion_set "2_0mf.b" "2_0mlx.")
+
+;; Slot 3
+(exclusion_set "2_0mii." "2_0mmi., 2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\
+                          2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mmi." "2_0mfi., 2_0mmf., 2_0bbb., 2_0mbb.,\
+                          2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mfi." "2_0mmf., 2_0bbb., 2_0mbb., 2_0mib., 2_0mmb.,\
+                          2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mmf." "2_0bbb., 2_0mbb., 2_0mib., 2_0mmb., 2_0mfb.,\
+                          2_0mlx.")
+(exclusion_set "2_0bbb." "2_0mbb., 2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mbb." "2_0mib., 2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mib." "2_0mmb., 2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mmb." "2_0mfb., 2_0mlx.")
+(exclusion_set "2_0mfb." "2_0mlx.")
+
+;; Slot 4
+(exclusion_set "2_1m.ii" "2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\
+                          2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.mi" "2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib,\
+                          2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.fi" "2_1m.mf, 2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb,\
+                          2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.mf" "2_1b.bb, 2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb,\
+                          2_1m.lx")
+(exclusion_set "2_1b.bb" "2_1m.bb, 2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.bb" "2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.ib" "2_1m.mb, 2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.mb" "2_1m.fb, 2_1m.lx")
+(exclusion_set "2_1m.fb" "2_1m.lx")
+
+;; Slot 5
+(exclusion_set "2_1mi.i" "2_1mm.i, 2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\
+                          2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mm.i" "2_1mf.i, 2_1mm.f, 2_1bb.b, 2_1mb.b,\
+                          2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mf.i" "2_1mm.f, 2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b,\
+                          2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mm.f" "2_1bb.b, 2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b,\
+                          2_1mlx.")
+(exclusion_set "2_1bb.b" "2_1mb.b, 2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mb.b" "2_1mi.b, 2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mi.b" "2_1mm.b, 2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mm.b" "2_1mf.b, 2_1mlx.")
+(exclusion_set "2_1mf.b" "2_1mlx.")
+
+;; Slot 6
+(exclusion_set "2_1mii." "2_1mmi., 2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\
+                          2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mmi." "2_1mfi., 2_1mmf., 2_1bbb., 2_1mbb.,\
+                          2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mfi." "2_1mmf., 2_1bbb., 2_1mbb., 2_1mib., 2_1mmb.,\
+                          2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mmf." "2_1bbb., 2_1mbb., 2_1mib., 2_1mmb., 2_1mfb.,\
+                          2_1mlx.")
+(exclusion_set "2_1bbb." "2_1mbb., 2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mbb." "2_1mib., 2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mib." "2_1mmb., 2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mmb." "2_1mfb., 2_1mlx.")
+(exclusion_set "2_1mfb." "2_1mlx.")
+
+(final_presence_set "2_0mi.i" "2_0m.ii")
+(final_presence_set "2_0mii." "2_0mi.i")
+(final_presence_set "2_1mi.i" "2_1m.ii")
+(final_presence_set "2_1mii." "2_1mi.i")
+
+(final_presence_set "2_0mm.i" "2_0m.mi")
+(final_presence_set "2_0mmi." "2_0mm.i")
+(final_presence_set "2_1mm.i" "2_1m.mi")
+(final_presence_set "2_1mmi." "2_1mm.i")
+
+(final_presence_set "2_0mf.i" "2_0m.fi")
+(final_presence_set "2_0mfi." "2_0mf.i")
+(final_presence_set "2_1mf.i" "2_1m.fi")
+(final_presence_set "2_1mfi." "2_1mf.i")
+
+(final_presence_set "2_0mm.f" "2_0m.mf")
+(final_presence_set "2_0mmf." "2_0mm.f")
+(final_presence_set "2_1mm.f" "2_1m.mf")
+(final_presence_set "2_1mmf." "2_1mm.f")
+
+(final_presence_set "2_0bb.b" "2_0b.bb")
+(final_presence_set "2_0bbb." "2_0bb.b")
+(final_presence_set "2_1bb.b" "2_1b.bb")
+(final_presence_set "2_1bbb." "2_1bb.b")
+
+(final_presence_set "2_0mb.b" "2_0m.bb")
+(final_presence_set "2_0mbb." "2_0mb.b")
+(final_presence_set "2_1mb.b" "2_1m.bb")
+(final_presence_set "2_1mbb." "2_1mb.b")
+
+(final_presence_set "2_0mi.b" "2_0m.ib")
+(final_presence_set "2_0mib." "2_0mi.b")
+(final_presence_set "2_1mi.b" "2_1m.ib")
+(final_presence_set "2_1mib." "2_1mi.b")
+
+(final_presence_set "2_0mm.b" "2_0m.mb")
+(final_presence_set "2_0mmb." "2_0mm.b")
+(final_presence_set "2_1mm.b" "2_1m.mb")
+(final_presence_set "2_1mmb." "2_1mm.b")
+
+(final_presence_set "2_0mf.b" "2_0m.fb")
+(final_presence_set "2_0mfb." "2_0mf.b")
+(final_presence_set "2_1mf.b" "2_1m.fb")
+(final_presence_set "2_1mfb." "2_1mf.b")
+
+(final_presence_set "2_0mlx." "2_0m.lx")
+(final_presence_set "2_1mlx." "2_1m.lx")
+
+;;   The following reflects the dual issue bundle types table.
+;;   We could place all possible combinations here because impossible
+;; combinations would go away by the subsequent constrains.
+(final_presence_set
+   "2_1m.lx"
+   "2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.")
+(final_presence_set "2_1b.bb" "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mlx.")
+(final_presence_set
+   "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1m.bb,2_1m.ib,2_1m.mb,2_1m.fb"
+   "2_0mii.,2_0mmi.,2_0mfi.,2_0mmf.,2_0mib.,2_0mmb.,2_0mfb.,2_0mlx.")
+
+;;  Ports/units (nb means nop.b insn issued into given port):
+(define_cpu_unit
+   "2_um0, 2_um1, 2_um2, 2_um3, 2_ui0, 2_ui1, 2_uf0, 2_uf1,\
+    2_ub0, 2_ub1, 2_ub2, 2_unb0, 2_unb1, 2_unb2" "two")
+
+(exclusion_set "2_ub0" "2_unb0")
+(exclusion_set "2_ub1" "2_unb1")
+(exclusion_set "2_ub2" "2_unb2")
+
+;; The following rules are used to decrease number of alternatives.
+;; They are consequences of Itanium2 microarchitecture.  They also
+;; describe the following rules mentioned in Itanium2
+;; microarchitecture: rules mentioned in Itanium2 microarchitecture:
+;; o "BBB/MBB: Always splits issue after either of these bundles".
+;; o "MIB BBB: Split issue after the first bundle in this pair".
+(exclusion_set
+   "2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb."
+   "2_1m.ii,2_1m.mi,2_1m.fi,2_1m.mf,2_1b.bb,2_1m.bb,\
+    2_1m.ib,2_1m.mb,2_1m.fb,2_1m.lx")
+(exclusion_set "2_0m.ib,2_0mi.b,2_0mib." "2_1b.bb")
+
+;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the
+;;; B-slot contains a nop.b or a brp instruction".
+;;;   "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or
+;;; nop.b, otherwise it disperses to B2".
+(final_absence_set
+   "2_1m.ii, 2_1m.mi, 2_1m.fi, 2_1m.mf, 2_1b.bb, 2_1m.bb,\
+    2_1m.ib, 2_1m.mb, 2_1m.fb, 2_1m.lx"
+   "2_0mib. 2_ub2, 2_0mfb. 2_ub2, 2_0mmb. 2_ub2")
+
+;; This is necessary to start new processor cycle when we meet stop bit.
+(define_cpu_unit "2_stop" "two")
+(final_absence_set
+   "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\
+    2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\
+    2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\
+    2_0m.lx,2_0mlx., \
+    2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\
+    2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\
+    2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\
+    2_1m.lx,2_1mlx."
+   "2_stop")
+
+;;   The issue logic can reorder M slot insns between different subtypes
+;; but cannot reorder insn within the same subtypes.  The following
+;; constraint is enough to describe this.
+(final_presence_set "2_um1" "2_um0")
+(final_presence_set "2_um3" "2_um2")
+
+;;   The insn in the 1st I slot of the two bundle issue group will issue
+;; to I0.  The second I slot insn will issue to I1.
+(final_presence_set "2_ui1" "2_ui0")
+
+;;  For exceptions of I insns:
+(define_cpu_unit "2_only_ui0" "two")
+(final_absence_set "2_only_ui0"  "2_ui1")
+
+;; Insns
+
+(define_reservation "2_M0"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +(2_um0|2_um1|2_um2|2_um3)")
+
+(define_reservation "2_M1"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +(2_um0|2_um1|2_um2|2_um3)")
+
+(define_reservation "2_M" "2_M0|2_M1")
+
+(define_reservation "2_M0_only_um0"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +2_um0")
+
+(define_reservation "2_M1_only_um0"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +2_um0")
+
+(define_reservation "2_M_only_um0" "2_M0_only_um0|2_M1_only_um0")
+
+(define_reservation "2_M0_only_um2"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +2_um2")
+
+(define_reservation "2_M1_only_um2"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +2_um2")
+
+(define_reservation "2_M_only_um2" "2_M0_only_um2|2_M1_only_um2")
+
+(define_reservation "2_M0_only_um23"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +(2_um2|2_um3)")
+
+(define_reservation "2_M1_only_um23"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +(2_um2|2_um3)")
+
+(define_reservation "2_M_only_um23" "2_M0_only_um23|2_M1_only_um23")
+
+(define_reservation "2_M0_only_um01"
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb|2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx\
+    |2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx\
+    |2_0mm.i|2_0mm.f|2_0mm.b|2_1mm.i|2_1mm.f|2_1mm.b)\
+   +(2_um0|2_um1)")
+
+(define_reservation "2_M1_only_um01"
+  "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+    |2_0mib.+2_unb0|2_0mfb.+2_unb0|2_0mmb.+2_unb0)\
+   +(2_1m.ii|2_1m.mi|2_1m.fi|2_1m.mf|2_1m.bb|2_1m.ib|2_1m.mb|2_1m.fb|2_1m.lx)\
+   +(2_um0|2_um1)")
+
+(define_reservation "2_M_only_um01" "2_M0_only_um01|2_M1_only_um01")
+
+;; I instruction is dispersed to the lowest numbered I unit
+;; not already in use.  Remember about possible splitting.
+(define_reservation "2_I0"
+  "2_0mi.i+2_ui0|2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0\
+   |2_0mfi.+2_ui0|2_0mi.b+2_ui0|(2_1mi.i|2_1mi.b)+(2_ui0|2_ui1)\
+   |(2_1mii.|2_1mmi.|2_1mfi.)+(2_ui0|2_ui1)")
+
+(define_reservation "2_I1"
+  "2_0m.ii+(2_um0|2_um1|2_um2|2_um3)+2_0mi.i+2_ui0\
+   |2_0mm.i+(2_um0|2_um1|2_um2|2_um3)+2_0mmi.+2_ui0\
+   |2_0mf.i+2_uf0+2_0mfi.+2_ui0\
+   |2_0m.ib+(2_um0|2_um1|2_um2|2_um3)+2_0mi.b+2_ui0\
+   |(2_1m.ii+2_1mi.i|2_1m.ib+2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)+(2_ui0|2_ui1)\
+   |2_1mm.i+(2_um0|2_um1|2_um2|2_um3)+2_1mmi.+(2_ui0|2_ui1)\
+   |2_1mf.i+2_uf1+2_1mfi.+(2_ui0|2_ui1)")
+
+(define_reservation "2_I" "2_I0|2_I1")
+
+;; "An F slot in the 1st bundle disperses to F0".
+;; "An F slot in the 2st bundle disperses to F1".
+(define_reservation "2_F0"
+   "2_0mf.i+2_uf0|2_0mmf.+2_uf0|2_0mf.b+2_uf0\
+    |2_1mf.i+2_uf1|2_1mmf.+2_uf1|2_1mf.b+2_uf1")
+
+(define_reservation "2_F1"
+   "(2_0m.fi+2_0mf.i|2_0mm.f+2_0mmf.|2_0m.fb+2_0mf.b)\
+    +(2_um0|2_um1|2_um2|2_um3)+2_uf0\
+    |(2_1m.fi+2_1mf.i|2_1mm.f+2_1mmf.|2_1m.fb+2_1mf.b)\
+     +(2_um0|2_um1|2_um2|2_um3)+2_uf1")
+
+(define_reservation "2_F2"
+   "(2_0m.mf+2_0mm.f+2_0mmf.+2_uf0|2_1m.mf+2_1mm.f+2_1mmf.+2_uf1)\
+    +(2_um0|2_um1|2_um2|2_um3)+(2_um0|2_um1|2_um2|2_um3)\
+    |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0\
+      |2_0mmf.+(2_um0|2_um1|2_um2|2_um3)\
+      |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0)\
+     +(2_1m.fi+2_1mf.i|2_1m.fb+2_1mf.b)+(2_um0|2_um1|2_um2|2_um3)+2_uf1")
+
+(define_reservation "2_F" "2_F0|2_F1|2_F2")
+
+;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B
+;;; unit. That is, a B slot in 1st position is dispersed to B0.  In the
+;;; 2nd position it is dispersed to B2".
+(define_reservation "2_NB"
+    "2_0b.bb+2_unb0|2_0bb.b+2_unb1|2_0bbb.+2_unb2\
+     |2_0mb.b+2_unb1|2_0mbb.+2_unb2|2_0mib.+2_unb0\
+     |2_0mmb.+2_unb0|2_0mfb.+2_unb0\
+     |2_1b.bb+2_unb0|2_1bb.b+2_unb1
+     |2_1bbb.+2_unb2|2_1mb.b+2_unb1|2_1mbb.+2_unb2\
+     |2_1mib.+2_unb0|2_1mmb.+2_unb0|2_1mfb.+2_unb0")
+
+(define_reservation "2_B0"
+   "2_0b.bb+2_ub0|2_0bb.b+2_ub1|2_0bbb.+2_ub2\
+    |2_0mb.b+2_ub1|2_0mbb.+2_ub2|2_0mib.+2_ub2\
+    |2_0mfb.+2_ub2|2_1b.bb+2_ub0|2_1bb.b+2_ub1\
+    |2_1bbb.+2_ub2|2_1mb.b+2_ub1\
+    |2_1mib.+2_ub2|2_1mmb.+2_ub2|2_1mfb.+2_ub2")
+
+(define_reservation "2_B1"
+   "2_0m.bb+(2_um0|2_um1|2_um2|2_um3)+2_0mb.b+2_ub1\
+    |2_0mi.b+2_ui0+2_0mib.+2_ub2\
+    |2_0mm.b+(2_um0|2_um1|2_um2|2_um3)+2_0mmb.+2_ub2\
+    |2_0mf.b+2_uf0+2_0mfb.+2_ub2\
+    |(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0)\
+     +2_1b.bb+2_ub0\
+    |2_1m.bb+(2_um0|2_um1|2_um2|2_um3)+2_1mb.b+2_ub1\
+    |2_1mi.b+(2_ui0|2_ui1)+2_1mib.+2_ub2\
+    |2_1mm.b+(2_um0|2_um1|2_um2|2_um3)+2_1mmb.+2_ub2\
+    |2_1mf.b+2_uf1+2_1mfb.+2_ub2")
+
+(define_reservation "2_B" "2_B0|2_B1")
+
+;; MLX bunlde uses ports equivalent to MFI bundles.
+
+;;   For the MLI template, the I slot insn is always assigned to port I0
+;; if it is in the first bundle or it is assigned to port I1 if it is in
+;; the second bundle.
+(define_reservation "2_L0" "2_0mlx.+2_ui0+2_uf0|2_1mlx.+2_ui1+2_uf1")
+
+(define_reservation "2_L1"
+   "2_0m.lx+(2_um0|2_um1|2_um2|2_um3)+2_0mlx.+2_ui0+2_uf0\
+   |2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1")
+
+(define_reservation "2_L2"
+   "(2_0mii.+(2_ui0|2_ui1)|2_0mmi.+2_ui0|2_0mfi.+2_ui0|2_0mmf.+2_uf0\
+     |2_0mib.+2_unb0|2_0mmb.+2_unb0|2_0mfb.+2_unb0)
+    +2_1m.lx+(2_um0|2_um1|2_um2|2_um3)+2_1mlx.+2_ui1+2_uf1")
+
+(define_reservation "2_L" "2_L0|2_L1|2_L2")
+
+;;   Should we describe that A insn in I slot can be issued into M
+;; ports?  I think it is not necessary because of multipass
+;; scheduling.  For example, the multipass scheduling could use
+;; MMI-MMI instead of MII-MII where the two last I slots contain A
+;; insns (even if the case is complicated by use-def conflicts).
+;;
+;; In any case we could describe it as
+;;    (define_cpu_unit "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres" "two")
+;;    (final_presence_set "2_ui1_0pres,2_ui1_1pres,2_ui1_2pres,2_ui1_3pres"
+;;                        "2_ui1")
+;;    (define_reservation "b_A"
+;;       "b_M|b_I\
+;;        |(2_1mi.i|2_1mii.|2_1mmi.|2_1mfi.|2_1mi.b)+(2_um0|2_um1|2_um2|2_um3)\
+;;         +(2_ui1_0pres|2_ui1_1pres|2_ui1_2pres|2_ui1_3pres)")
+
+(define_reservation "2_A" "2_M|2_I")
+
+;; We assume that there is no insn issued on the same cycle as the
+;; unknown insn.
+(define_cpu_unit "2_empty" "two")
+(exclusion_set "2_empty"
+    "2_0m.ii,2_0m.mi,2_0m.fi,2_0m.mf,2_0b.bb,2_0m.bb,2_0m.ib,2_0m.mb,2_0m.fb,\
+     2_0m.lx")
+
+(define_cpu_unit
+   "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs"
+   "two")
+(define_cpu_unit
+   "2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs"
+   "two")
+
+(define_cpu_unit "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont, 2_mb_cont,\
+	          2_b_cont, 2_bb_cont" "two")
+
+;; For stop in the middle of the bundles.
+(define_cpu_unit "2_m_stop, 2_m0_stop, 2_m1_stop, 2_0mmi_cont" "two")
+(define_cpu_unit "2_mi_stop, 2_mi0_stop, 2_mi1_stop, 2_0mii_cont" "two")
+
+(final_presence_set "2_0m_bs"
+   "2_0m.ii, 2_0m.mi, 2_0m.mf, 2_0m.fi, 2_0m.bb,\
+    2_0m.ib, 2_0m.fb, 2_0m.mb, 2_0m.lx")
+(final_presence_set "2_1m_bs"
+   "2_1m.ii, 2_1m.mi, 2_1m.mf, 2_1m.fi, 2_1m.bb,\
+    2_1m.ib, 2_1m.fb, 2_1m.mb, 2_1m.lx")
+(final_presence_set "2_0mi_bs"  "2_0mi.i, 2_0mi.i")
+(final_presence_set "2_1mi_bs"  "2_1mi.i, 2_1mi.i")
+(final_presence_set "2_0mm_bs"  "2_0mm.i, 2_0mm.f, 2_0mm.b")
+(final_presence_set "2_1mm_bs"  "2_1mm.i, 2_1mm.f, 2_1mm.b")
+(final_presence_set "2_0mf_bs"  "2_0mf.i, 2_0mf.b")
+(final_presence_set "2_1mf_bs"  "2_1mf.i, 2_1mf.b")
+(final_presence_set "2_0b_bs"  "2_0b.bb")
+(final_presence_set "2_1b_bs"  "2_1b.bb")
+(final_presence_set "2_0bb_bs"  "2_0bb.b")
+(final_presence_set "2_1bb_bs"  "2_1bb.b")
+(final_presence_set "2_0mb_bs"  "2_0mb.b")
+(final_presence_set "2_1mb_bs"  "2_1mb.b")
+
+(exclusion_set "2_0m_bs"
+   "2_0mi.i, 2_0mm.i, 2_0mm.f, 2_0mf.i, 2_0mb.b,\
+    2_0mi.b, 2_0mf.b, 2_0mm.b, 2_0mlx., 2_m0_stop")
+(exclusion_set "2_1m_bs"
+   "2_1mi.i, 2_1mm.i, 2_1mm.f, 2_1mf.i, 2_1mb.b,\
+    2_1mi.b, 2_1mf.b, 2_1mm.b, 2_1mlx., 2_m1_stop")
+(exclusion_set "2_0mi_bs"  "2_0mii., 2_0mib., 2_mi0_stop")
+(exclusion_set "2_1mi_bs"  "2_1mii., 2_1mib., 2_mi1_stop")
+(exclusion_set "2_0mm_bs"  "2_0mmi., 2_0mmf., 2_0mmb.")
+(exclusion_set "2_1mm_bs"  "2_1mmi., 2_1mmf., 2_1mmb.")
+(exclusion_set "2_0mf_bs"  "2_0mfi., 2_0mfb.")
+(exclusion_set "2_1mf_bs"  "2_1mfi., 2_1mfb.")
+(exclusion_set "2_0b_bs"  "2_0bb.b")
+(exclusion_set "2_1b_bs"  "2_1bb.b")
+(exclusion_set "2_0bb_bs"  "2_0bbb.")
+(exclusion_set "2_1bb_bs"  "2_1bbb.")
+(exclusion_set "2_0mb_bs"  "2_0mbb.")
+(exclusion_set "2_1mb_bs"  "2_1mbb.")
+
+(exclusion_set
+   "2_0m_bs, 2_0mi_bs, 2_0mm_bs, 2_0mf_bs, 2_0b_bs, 2_0bb_bs, 2_0mb_bs,
+    2_1m_bs, 2_1mi_bs, 2_1mm_bs, 2_1mf_bs, 2_1b_bs, 2_1bb_bs, 2_1mb_bs"
+   "2_stop")
+
+(final_presence_set
+   "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\
+    2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx."
+   "2_m_cont")
+(final_presence_set "2_0mii., 2_0mib." "2_mi_cont")
+(final_presence_set "2_0mmi., 2_0mmf., 2_0mmb." "2_mm_cont")
+(final_presence_set "2_0mfi., 2_0mfb." "2_mf_cont")
+(final_presence_set "2_0bb.b" "2_b_cont")
+(final_presence_set "2_0bbb." "2_bb_cont")
+(final_presence_set "2_0mbb." "2_mb_cont")
+
+(exclusion_set
+   "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+    2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx"
+   "2_m_cont, 2_mi_cont, 2_mm_cont, 2_mf_cont,\
+    2_mb_cont, 2_b_cont, 2_bb_cont")
+
+(exclusion_set "2_empty"
+               "2_m_cont,2_mi_cont,2_mm_cont,2_mf_cont,\
+                2_mb_cont,2_b_cont,2_bb_cont")
+
+;; For m;mi bundle
+(final_presence_set "2_m0_stop" "2_0m.mi")
+(final_presence_set "2_0mm.i" "2_0mmi_cont")
+(exclusion_set "2_0mmi_cont"
+   "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+    2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_m0_stop" "2_0mm.i")
+(final_presence_set "2_m1_stop" "2_1m.mi")
+(exclusion_set "2_m1_stop" "2_1mm.i")
+(final_presence_set "2_m_stop" "2_m0_stop, 2_m1_stop")
+
+;; For mi;i bundle
+(final_presence_set "2_mi0_stop" "2_0mi.i")
+(final_presence_set "2_0mii." "2_0mii_cont")
+(exclusion_set "2_0mii_cont"
+   "2_0m.ii, 2_0m.mi, 2_0m.fi, 2_0m.mf, 2_0b.bb, 2_0m.bb,\
+    2_0m.ib, 2_0m.mb, 2_0m.fb, 2_0m.lx")
+(exclusion_set "2_mi0_stop" "2_0mii.")
+(final_presence_set "2_mi1_stop" "2_1mi.i")
+(exclusion_set "2_mi1_stop" "2_1mii.")
+(final_presence_set "2_mi_stop" "2_mi0_stop, 2_mi1_stop")
+
+(final_absence_set
+   "2_0m.ii,2_0mi.i,2_0mii.,2_0m.mi,2_0mm.i,2_0mmi.,2_0m.fi,2_0mf.i,2_0mfi.,\
+    2_0m.mf,2_0mm.f,2_0mmf.,2_0b.bb,2_0bb.b,2_0bbb.,2_0m.bb,2_0mb.b,2_0mbb.,\
+    2_0m.ib,2_0mi.b,2_0mib.,2_0m.mb,2_0mm.b,2_0mmb.,2_0m.fb,2_0mf.b,2_0mfb.,\
+    2_0m.lx,2_0mlx., \
+    2_1m.ii,2_1mi.i,2_1mii.,2_1m.mi,2_1mm.i,2_1mmi.,2_1m.fi,2_1mf.i,2_1mfi.,\
+    2_1m.mf,2_1mm.f,2_1mmf.,2_1b.bb,2_1bb.b,2_1bbb.,2_1m.bb,2_1mb.b,2_1mbb.,\
+    2_1m.ib,2_1mi.b,2_1mib.,2_1m.mb,2_1mm.b,2_1mmb.,2_1m.fb,2_1mf.b,2_1mfb.,\
+    2_1m.lx,2_1mlx."
+   "2_m0_stop,2_m1_stop,2_mi0_stop,2_mi1_stop")
+
+(define_insn_reservation "2_stop_bit" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stop_bit"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_stop|2_m0_stop|2_m1_stop|2_mi0_stop|2_mi1_stop")
+
+(define_insn_reservation "2_br"      0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "br"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_B")
+(define_insn_reservation "2_scall"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "scall"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_B")
+(define_insn_reservation "2_fcmp"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcmp"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_F")
+(define_insn_reservation "2_fcvtfx"  4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcvtfx"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_F")
+(define_insn_reservation "2_fld"     6
+  (and (and (and (and (eq_attr "cpu" "itanium2")
+                      (eq_attr "itanium_class" "fld"))
+                 (eq_attr "data_speculative" "no"))
+            (eq_attr "check_load" "no"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M")
+(define_insn_reservation "2_flda"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "data_speculative" "yes"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+(define_insn_reservation "2_fldc"    0
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "check_load" "yes"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_fldp"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "no"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+(define_insn_reservation "2_fldpc"   0
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "yes"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_fmac"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmac"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_F")
+(define_insn_reservation "2_fmisc"   4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmisc"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_F")
+
+;; There is only one insn `mov = ar.bsp' for frar_i:
+;; Latency time ???
+(define_insn_reservation "2_frar_i" 13
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_i"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+;; There is only two insns `mov = ar.unat' or `mov = ar.ccv' for frar_m:
+;; Latency time ???
+(define_insn_reservation "2_frar_m"  6
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_m"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um2")
+(define_insn_reservation "2_frbr"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frbr"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+(define_insn_reservation "2_frfr"    5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frfr"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um2")
+(define_insn_reservation "2_frpr"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frpr"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+
+(define_insn_reservation "2_ialu"      1
+    (and (and (eq_attr "cpu" "itanium2")
+              (eq_attr "itanium_class" "ialu"))
+         (eq (symbol_ref "bundling_p") (const_int 0)))
+    "2_A")
+(define_insn_reservation "2_icmp"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "icmp"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_A")
+(define_insn_reservation "2_ilog"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ilog"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_A")
+(define_insn_reservation "2_mmalua"  2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmalua"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_A")
+;; Latency time ???
+(define_insn_reservation "2_ishf"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ishf"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+
+(define_insn_reservation "2_ld"      1
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "ld"))
+            (eq_attr "check_load" "no"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+(define_insn_reservation "2_ldc"     0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "check_load" "yes"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_long_i"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "long_i"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_L")
+
+(define_insn_reservation "2_mmmul"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmmul"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2_mmshf"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshf"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_I")
+;; Latency time ???
+(define_insn_reservation "2_mmshfi"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshfi"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_I")
+
+;; Now we have only one insn (flushrs) of such class.  We assume that flushrs
+;; is the 1st syllable of the bundle after stop bit.
+(define_insn_reservation "2_rse_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "rse_m"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "(2_0m.ii|2_0m.mi|2_0m.fi|2_0m.mf|2_0m.bb\
+    |2_0m.ib|2_0m.mb|2_0m.fb|2_0m.lx)+2_um0")
+(define_insn_reservation "2_sem"     0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "sem"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um23")
+
+(define_insn_reservation "2_stf"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stf"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um23")
+(define_insn_reservation "2_st"      1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "st"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um23")
+(define_insn_reservation "2_syst_m0" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m0"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um2")
+(define_insn_reservation "2_syst_m"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um0")
+;; Reservation???
+(define_insn_reservation "2_tbit"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tbit"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+
+;; There is only ony insn `mov ar.pfs =' for toar_i:
+(define_insn_reservation "2_toar_i"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_i"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m:
+;; Latency time ???
+(define_insn_reservation "2_toar_m"  5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_m"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um2")
+;; Latency time ???
+(define_insn_reservation "2_tobr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tobr"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+(define_insn_reservation "2_tofr"    5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tofr"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um23")
+;; Latency time ???
+(define_insn_reservation "2_topr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "topr"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I+2_only_ui0")
+
+(define_insn_reservation "2_xmpy"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xmpy"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_F")
+;; Latency time ???
+(define_insn_reservation "2_xtd"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xtd"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_I")
+
+(define_insn_reservation "2_chk_s_i" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_i"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_I|2_M_only_um23")
+(define_insn_reservation "2_chk_s_f" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_f"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um23")
+(define_insn_reservation "2_chk_a"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_a"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_lfetch"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "lfetch"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M_only_um01")
+
+(define_insn_reservation "2_nop_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_m"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_M0")
+(define_insn_reservation "2_nop_b"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_b"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_NB")
+(define_insn_reservation "2_nop_i"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_i"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_I0")
+(define_insn_reservation "2_nop_f"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_f"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_F0")
+(define_insn_reservation "2_nop_x"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_x"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_L0")
+
+(define_insn_reservation "2_unknown" 1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "unknown"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "2_empty")
+
+(define_insn_reservation "2_nop" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop"))
+       (eq (symbol_ref "bundling_p") (const_int 0)))
+  "2_M0|2_NB|2_I0|2_F0")
+
+(define_insn_reservation "2_ignore" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ignore"))
+       (eq (symbol_ref "bundling_p") (const_int 0))) "nothing")
+
+(define_cpu_unit "2_m_cont_only, 2_b_cont_only" "two")
+(define_cpu_unit "2_mi_cont_only, 2_mm_cont_only, 2_mf_cont_only" "two")
+(define_cpu_unit "2_mb_cont_only, 2_bb_cont_only" "two")
+
+(final_presence_set "2_m_cont_only" "2_m_cont")
+(exclusion_set "2_m_cont_only"
+  "2_0mi.i, 2_0mm.i, 2_0mf.i, 2_0mm.f, 2_0mb.b,\
+   2_0mi.b, 2_0mm.b, 2_0mf.b, 2_0mlx.")
+
+(final_presence_set "2_b_cont_only" "2_b_cont")
+(exclusion_set "2_b_cont_only"  "2_0bb.b")
+
+(final_presence_set "2_mi_cont_only" "2_mi_cont")
+(exclusion_set "2_mi_cont_only" "2_0mii., 2_0mib.")
+
+(final_presence_set "2_mm_cont_only" "2_mm_cont")
+(exclusion_set "2_mm_cont_only" "2_0mmi., 2_0mmf., 2_0mmb.")
+
+(final_presence_set "2_mf_cont_only" "2_mf_cont")
+(exclusion_set "2_mf_cont_only" "2_0mfi., 2_0mfb.")
+
+(final_presence_set "2_mb_cont_only" "2_mb_cont")
+(exclusion_set "2_mb_cont_only" "2_0mbb.")
+
+(final_presence_set "2_bb_cont_only" "2_bb_cont")
+(exclusion_set "2_bb_cont_only" "2_0bbb.")
+
+(define_insn_reservation "2_pre_cycle" 0
+   (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "pre_cycle"))
+        (eq (symbol_ref "bundling_p") (const_int 0)))
+                         "nothing")
+
+;;(define_insn_reservation "2_pre_cycle" 0
+;;   (and (and (eq_attr "cpu" "itanium2")
+;;             (eq_attr "itanium_class" "pre_cycle"))
+;;        (eq (symbol_ref "bundling_p") (const_int 0)))
+;;                         "(2_0m_bs, 2_m_cont)                     \
+;;                          | (2_0mi_bs, (2_mi_cont|nothing))       \
+;;                          | (2_0mm_bs, 2_mm_cont)                 \
+;;                          | (2_0mf_bs, (2_mf_cont|nothing))       \
+;;                          | (2_0b_bs, (2_b_cont|nothing))         \
+;;                          | (2_0bb_bs, (2_bb_cont|nothing))       \
+;;                          | (2_0mb_bs, (2_mb_cont|nothing))       \
+;;                          | (2_1m_bs, 2_m_cont)                   \
+;;                          | (2_1mi_bs, (2_mi_cont|nothing))       \
+;;                          | (2_1mm_bs, 2_mm_cont)                 \
+;;                          | (2_1mf_bs, (2_mf_cont|nothing))       \
+;;                          | (2_1b_bs, (2_b_cont|nothing))         \
+;;                          | (2_1bb_bs, (2_bb_cont|nothing))       \
+;;                          | (2_1mb_bs, (2_mb_cont|nothing))       \
+;;                          | (2_m_cont_only, (2_m_cont|nothing))   \
+;;                          | (2_b_cont_only,  (2_b_cont|nothing))  \
+;;                          | (2_mi_cont_only, (2_mi_cont|nothing)) \
+;;                          | (2_mm_cont_only, (2_mm_cont|nothing)) \
+;;                          | (2_mf_cont_only, (2_mf_cont|nothing)) \
+;;                          | (2_mb_cont_only, (2_mb_cont|nothing)) \
+;;                          | (2_bb_cont_only, (2_bb_cont|nothing)) \
+;;                          | (2_m_stop, (2_0mmi_cont|nothing))     \
+;;                          | (2_mi_stop, (2_0mii_cont|nothing))")
+
+;; Bypasses:
+
+(define_bypass  1 "2_fcmp" "2_br,2_scall")
+(define_bypass  0 "2_icmp" "2_br,2_scall")
+(define_bypass  0 "2_tbit" "2_br,2_scall")
+(define_bypass  2 "2_ld" "2_ld"  "ia64_ld_address_bypass_p")
+(define_bypass  2 "2_ld" "2_st"  "ia64_st_address_bypass_p")
+(define_bypass  2 "2_ld,2_ldc" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass  3 "2_ilog" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass  3 "2_ialu" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass  3 "2_mmalua,2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld,2_ldc")
+(define_bypass  6 "2_tofr"  "2_frfr,2_stf")
+
+;; We don't use here fcmp because scall may be predicated.
+(define_bypass  0 "2_fcvtfx,2_fld,2_flda,2_fldc,2_fmac,2_fmisc,2_frar_i,2_frar_m,\
+                   2_frbr,2_frfr,2_frpr,2_ialu,2_ilog,2_ishf,2_ld,2_ldc,2_long_i,\
+                   2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tobr,2_tofr,\
+		   2_xmpy,2_xtd"
+                  "2_br,2_scall")
+
+(define_bypass  0 "2_unknown,2_ignore,2_stop_bit,2_br,2_fcmp,2_fcvtfx,2_fld,2_flda,2_fldc,\
+                   2_fmac,2_fmisc,2_frar_i,2_frar_m,2_frbr,2_frfr,2_frpr,\
+                   2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_ldc,2_chk_s_i,2_chk_s_f,2_chk_a,2_long_i,\
+		   2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\
+                   2_nop_i,2_nop_m,2_nop_x,2_rse_m,2_scall,2_sem,2_stf,2_st,\
+                   2_syst_m0,2_syst_m,2_tbit,2_toar_i,2_toar_m,2_tobr,2_tofr,\
+                   2_topr,2_xmpy,2_xtd,2_lfetch" "2_ignore")
+
+
+
+;; Bundling
+
+(define_automaton "twob")
+
+;; Pseudo units for quicker searching for position in two packet window.  */
+(define_query_cpu_unit "2_1,2_2,2_3,2_4,2_5,2_6" "twob")
+
+;;   All possible combinations of bundles/syllables
+(define_cpu_unit
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx" "twob")
+(define_cpu_unit
+   "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b" "twob")
+(define_query_cpu_unit
+   "2b_0mii., 2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\
+    2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx." "twob")
+
+(define_cpu_unit
+   "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx" "twob")
+(define_cpu_unit
+   "2b_1mi.i, 2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mm.b, 2b_1mf.b" "twob")
+(define_query_cpu_unit
+   "2b_1mii., 2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\
+    2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx." "twob")
+
+;; Slot 1
+(exclusion_set "2b_0m.ii"
+   "2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.mi"
+   "2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib,\
+    2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.fi"
+   "2b_0m.mf, 2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.mf"
+   "2b_0b.bb, 2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0b.bb" "2b_0m.bb, 2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.bb" "2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.ib" "2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.mb" "2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_0m.fb" "2b_0m.lx")
+
+;; Slot 2
+(exclusion_set "2b_0mi.i"
+   "2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mm.i"
+   "2b_0mf.i, 2b_0mm.f, 2b_0bb.b, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mf.i"
+   "2b_0mm.f, 2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mm.f"
+   "2b_0bb.b, 2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0bb.b" "2b_0mb.b, 2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mb.b" "2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mi.b" "2b_0mm.b, 2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mm.b" "2b_0mf.b, 2b_0mlx.")
+(exclusion_set "2b_0mf.b" "2b_0mlx.")
+
+;; Slot 3
+(exclusion_set "2b_0mii."
+   "2b_0mmi., 2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\
+    2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mmi."
+   "2b_0mfi., 2b_0mmf., 2b_0bbb., 2b_0mbb.,\
+    2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mfi."
+   "2b_0mmf., 2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mmf."
+   "2b_0bbb., 2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0bbb." "2b_0mbb., 2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mbb." "2b_0mib., 2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mib." "2b_0mmb., 2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mmb." "2b_0mfb., 2b_0mlx.")
+(exclusion_set "2b_0mfb." "2b_0mlx.")
+
+;; Slot 4
+(exclusion_set "2b_1m.ii"
+   "2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.mi"
+   "2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib,\
+    2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.fi"
+   "2b_1m.mf, 2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.mf"
+   "2b_1b.bb, 2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1b.bb" "2b_1m.bb, 2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.bb" "2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.ib" "2b_1m.mb, 2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.mb" "2b_1m.fb, 2b_1m.lx")
+(exclusion_set "2b_1m.fb" "2b_1m.lx")
+
+;; Slot 5
+(exclusion_set "2b_1mi.i"
+   "2b_1mm.i, 2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mm.i"
+   "2b_1mf.i, 2b_1mm.f, 2b_1bb.b, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mf.i"
+   "2b_1mm.f, 2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mm.f"
+   "2b_1bb.b, 2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1bb.b" "2b_1mb.b, 2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mb.b" "2b_1mi.b, 2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mi.b" "2b_1mm.b, 2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mm.b" "2b_1mf.b, 2b_1mlx.")
+(exclusion_set "2b_1mf.b" "2b_1mlx.")
+
+;; Slot 6
+(exclusion_set "2b_1mii."
+   "2b_1mmi., 2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\
+    2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mmi."
+   "2b_1mfi., 2b_1mmf., 2b_1bbb., 2b_1mbb.,\
+    2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mfi."
+   "2b_1mmf., 2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mmf."
+   "2b_1bbb., 2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1bbb." "2b_1mbb., 2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mbb." "2b_1mib., 2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mib." "2b_1mmb., 2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mmb." "2b_1mfb., 2b_1mlx.")
+(exclusion_set "2b_1mfb." "2b_1mlx.")
+
+(final_presence_set "2b_0mi.i" "2b_0m.ii")
+(final_presence_set "2b_0mii." "2b_0mi.i")
+(final_presence_set "2b_1mi.i" "2b_1m.ii")
+(final_presence_set "2b_1mii." "2b_1mi.i")
+
+(final_presence_set "2b_0mm.i" "2b_0m.mi")
+(final_presence_set "2b_0mmi." "2b_0mm.i")
+(final_presence_set "2b_1mm.i" "2b_1m.mi")
+(final_presence_set "2b_1mmi." "2b_1mm.i")
+
+(final_presence_set "2b_0mf.i" "2b_0m.fi")
+(final_presence_set "2b_0mfi." "2b_0mf.i")
+(final_presence_set "2b_1mf.i" "2b_1m.fi")
+(final_presence_set "2b_1mfi." "2b_1mf.i")
+
+(final_presence_set "2b_0mm.f" "2b_0m.mf")
+(final_presence_set "2b_0mmf." "2b_0mm.f")
+(final_presence_set "2b_1mm.f" "2b_1m.mf")
+(final_presence_set "2b_1mmf." "2b_1mm.f")
+
+(final_presence_set "2b_0bb.b" "2b_0b.bb")
+(final_presence_set "2b_0bbb." "2b_0bb.b")
+(final_presence_set "2b_1bb.b" "2b_1b.bb")
+(final_presence_set "2b_1bbb." "2b_1bb.b")
+
+(final_presence_set "2b_0mb.b" "2b_0m.bb")
+(final_presence_set "2b_0mbb." "2b_0mb.b")
+(final_presence_set "2b_1mb.b" "2b_1m.bb")
+(final_presence_set "2b_1mbb." "2b_1mb.b")
+
+(final_presence_set "2b_0mi.b" "2b_0m.ib")
+(final_presence_set "2b_0mib." "2b_0mi.b")
+(final_presence_set "2b_1mi.b" "2b_1m.ib")
+(final_presence_set "2b_1mib." "2b_1mi.b")
+
+(final_presence_set "2b_0mm.b" "2b_0m.mb")
+(final_presence_set "2b_0mmb." "2b_0mm.b")
+(final_presence_set "2b_1mm.b" "2b_1m.mb")
+(final_presence_set "2b_1mmb." "2b_1mm.b")
+
+(final_presence_set "2b_0mf.b" "2b_0m.fb")
+(final_presence_set "2b_0mfb." "2b_0mf.b")
+(final_presence_set "2b_1mf.b" "2b_1m.fb")
+(final_presence_set "2b_1mfb." "2b_1mf.b")
+
+(final_presence_set "2b_0mlx." "2b_0m.lx")
+(final_presence_set "2b_1mlx." "2b_1m.lx")
+
+;;  See the corresponding comment in non-bundling section above.
+(final_presence_set
+   "2b_1m.lx"
+   "2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.")
+(final_presence_set "2b_1b.bb" "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mlx.")
+(final_presence_set
+   "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1m.bb,2b_1m.ib,2b_1m.mb,2b_1m.fb"
+   "2b_0mii.,2b_0mmi.,2b_0mfi.,2b_0mmf.,2b_0mib.,2b_0mmb.,2b_0mfb.,2b_0mlx.")
+
+;;  Ports/units (nb means nop.b insn issued into given port):
+(define_cpu_unit
+   "2b_um0, 2b_um1, 2b_um2, 2b_um3, 2b_ui0, 2b_ui1, 2b_uf0, 2b_uf1,\
+    2b_ub0, 2b_ub1, 2b_ub2, 2b_unb0, 2b_unb1, 2b_unb2" "twob")
+
+(exclusion_set "2b_ub0" "2b_unb0")
+(exclusion_set "2b_ub1" "2b_unb1")
+(exclusion_set "2b_ub2" "2b_unb2")
+
+;; The following rules are used to decrease number of alternatives.
+;; They are consequences of Itanium2 microarchitecture.  They also
+;; describe the following rules mentioned in Itanium2
+;; microarchitecture: rules mentioned in Itanium2 microarchitecture:
+;; o "BBB/MBB: Always splits issue after either of these bundles".
+;; o "MIB BBB: Split issue after the first bundle in this pair".
+(exclusion_set
+   "2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb."
+   "2b_1m.ii,2b_1m.mi,2b_1m.fi,2b_1m.mf,2b_1b.bb,2b_1m.bb,\
+    2b_1m.ib,2b_1m.mb,2b_1m.fb,2b_1m.lx")
+(exclusion_set "2b_0m.ib,2b_0mi.b,2b_0mib." "2b_1b.bb")
+
+;;; "MIB/MFB/MMB: Splits issue after any of these bundles unless the
+;;; B-slot contains a nop.b or a brp instruction".
+;;;   "The B in an MIB/MFB/MMB bundle disperses to B0 if it is a brp or
+;;; nop.b, otherwise it disperses to B2".
+(final_absence_set
+   "2b_1m.ii, 2b_1m.mi, 2b_1m.fi, 2b_1m.mf, 2b_1b.bb, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.mb, 2b_1m.fb, 2b_1m.lx"
+   "2b_0mib. 2b_ub2, 2b_0mfb. 2b_ub2, 2b_0mmb. 2b_ub2")
+
+;; This is necessary to start new processor cycle when we meet stop bit.
+(define_cpu_unit "2b_stop" "twob")
+(final_absence_set
+   "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\
+    2b_0m.fi,2b_0mf.i,2b_0mfi.,\
+    2b_0m.mf,2b_0mm.f,2b_0mmf.,2b_0b.bb,2b_0bb.b,2b_0bbb.,\
+    2b_0m.bb,2b_0mb.b,2b_0mbb.,\
+    2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\
+    2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \
+    2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\
+    2b_1m.fi,2b_1mf.i,2b_1mfi.,\
+    2b_1m.mf,2b_1mm.f,2b_1mmf.,2b_1b.bb,2b_1bb.b,2b_1bbb.,\
+    2b_1m.bb,2b_1mb.b,2b_1mbb.,\
+    2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\
+    2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx."
+   "2b_stop")
+
+;;   The issue logic can reorder M slot insns between different subtypes
+;; but cannot reorder insn within the same subtypes.  The following
+;; constraint is enough to describe this.
+(final_presence_set "2b_um1" "2b_um0")
+(final_presence_set "2b_um3" "2b_um2")
+
+;;   The insn in the 1st I slot of the two bundle issue group will issue
+;; to I0.  The second I slot insn will issue to I1.
+(final_presence_set "2b_ui1" "2b_ui0")
+
+;;  For exceptions of I insns:
+(define_cpu_unit "2b_only_ui0" "twob")
+(final_absence_set "2b_only_ui0"  "2b_ui1")
+
+;; Insns
+
+(define_reservation "2b_M"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +(2b_um0|2b_um1|2b_um2|2b_um3)")
+
+(define_reservation "2b_M_only_um0"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +2b_um0")
+
+(define_reservation "2b_M_only_um2"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +2b_um2")
+
+(define_reservation "2b_M_only_um01"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +(2b_um0|2b_um1)")
+
+(define_reservation "2b_M_only_um23"
+  "((2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1\
+    |(2b_1m.ii|2b_1m.mi|2b_1m.fi|2b_1m.mf|2b_1m.bb\
+      |2b_1m.ib|2b_1m.mb|2b_1m.fb|2b_1m.lx)+2_4\
+    |(2b_0mm.i|2b_0mm.f|2b_0mm.b)+2_2\
+    |(2b_1mm.i|2b_1mm.f|2b_1mm.b)+2_5)\
+   +(2b_um2|2b_um3)")
+
+;; I instruction is dispersed to the lowest numbered I unit
+;; not already in use.  Remember about possible splitting.
+(define_reservation "2b_I"
+  "2b_0mi.i+2_2+2b_ui0|2b_0mii.+2_3+(2b_ui0|2b_ui1)|2b_0mmi.+2_3+2b_ui0\
+   |2b_0mfi.+2_3+2b_ui0|2b_0mi.b+2_2+2b_ui0\
+   |(2b_1mi.i+2_5|2b_1mi.b+2_5)+(2b_ui0|2b_ui1)\
+   |(2b_1mii.|2b_1mmi.|2b_1mfi.)+2_6+(2b_ui0|2b_ui1)")
+
+;; "An F slot in the 1st bundle disperses to F0".
+;; "An F slot in the 2st bundle disperses to F1".
+(define_reservation "2b_F"
+   "2b_0mf.i+2_2+2b_uf0|2b_0mmf.+2_3+2b_uf0|2b_0mf.b+2_2+2b_uf0\
+    |2b_1mf.i+2_5+2b_uf1|2b_1mmf.+2_6+2b_uf1|2b_1mf.b+2_5+2b_uf1")
+
+;;; "Each B slot in MBB or BBB bundle disperses to the corresponding B
+;;; unit. That is, a B slot in 1st position is dispersed to B0.  In the
+;;; 2nd position it is dispersed to B2".
+(define_reservation "2b_NB"
+    "2b_0b.bb+2_1+2b_unb0|2b_0bb.b+2_2+2b_unb1|2b_0bbb.+2_3+2b_unb2\
+     |2b_0mb.b+2_2+2b_unb1|2b_0mbb.+2_3+2b_unb2\
+     |2b_0mib.+2_3+2b_unb0|2b_0mmb.+2_3+2b_unb0|2b_0mfb.+2_3+2b_unb0\
+     |2b_1b.bb+2_4+2b_unb0|2b_1bb.b+2_5+2b_unb1\
+     |2b_1bbb.+2_6+2b_unb2|2b_1mb.b+2_5+2b_unb1|2b_1mbb.+2_6+2b_unb2\
+     |2b_1mib.+2_6+2b_unb0|2b_1mmb.+2_6+2b_unb0|2b_1mfb.+2_6+2b_unb0")
+
+(define_reservation "2b_B"
+   "2b_0b.bb+2_1+2b_ub0|2b_0bb.b+2_2+2b_ub1|2b_0bbb.+2_3+2b_ub2\
+    |2b_0mb.b+2_2+2b_ub1|2b_0mbb.+2_3+2b_ub2|2b_0mib.+2_3+2b_ub2\
+    |2b_0mfb.+2_3+2b_ub2|2b_1b.bb+2_4+2b_ub0|2b_1bb.b+2_5+2b_ub1\
+    |2b_1bbb.+2_6+2b_ub2|2b_1mb.b+2_5+2b_ub1\
+    |2b_1mib.+2_6+2b_ub2|2b_1mmb.+2_6+2b_ub2|2b_1mfb.+2_6+2b_ub2")
+
+;;   For the MLI template, the I slot insn is always assigned to port I0
+;; if it is in the first bundle or it is assigned to port I1 if it is in
+;; the second bundle.
+(define_reservation "2b_L"
+                    "2b_0mlx.+2_3+2b_ui0+2b_uf0|2b_1mlx.+2_6+2b_ui1+2b_uf1")
+
+;;   Should we describe that A insn in I slot can be issued into M
+;; ports?  I think it is not necessary because of multipass
+;; scheduling.  For example, the multipass scheduling could use
+;; MMI-MMI instead of MII-MII where the two last I slots contain A
+;; insns (even if the case is complicated by use-def conflicts).
+;;
+;; In any case we could describe it as
+;;    (define_cpu_unit "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres"
+;;                     "twob")
+;;    (final_presence_set "2b_ui1_0pres,2b_ui1_1pres,2b_ui1_2pres,2b_ui1_3pres"
+;;                        "2b_ui1")
+;;    (define_reservation "b_A"
+;;       "b_M|b_I\
+;;        |(2b_1mi.i+2_5|2b_1mii.+2_6|2b_1mmi.+2_6|2b_1mfi.+2_6|2b_1mi.b+2_5)\
+;;         +(2b_um0|2b_um1|2b_um2|2b_um3)\
+;;         +(2b_ui1_0pres|2b_ui1_1pres|2b_ui1_2pres|2b_ui1_3pres)")
+
+(define_reservation "2b_A" "2b_M|2b_I")
+
+;; We assume that there is no insn issued on the same cycle as the
+;; unknown insn.
+(define_cpu_unit "2b_empty" "twob")
+(exclusion_set "2b_empty"
+    "2b_0m.ii,2b_0m.mi,2b_0m.fi,2b_0m.mf,2b_0b.bb,2b_0m.bb,\
+     2b_0m.ib,2b_0m.mb,2b_0m.fb,2b_0m.lx,2b_0mm.i")
+
+(define_cpu_unit
+   "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs"
+   "twob")
+(define_cpu_unit
+   "2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs"
+   "twob")
+
+(define_cpu_unit "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont, 2b_mb_cont,\
+	          2b_b_cont, 2b_bb_cont" "twob")
+
+;; For stop in the middle of the bundles.
+(define_cpu_unit "2b_m_stop, 2b_m0_stop, 2b_m1_stop, 2b_0mmi_cont" "twob")
+(define_cpu_unit "2b_mi_stop, 2b_mi0_stop, 2b_mi1_stop, 2b_0mii_cont" "twob")
+
+(final_presence_set "2b_0m_bs"
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.mf, 2b_0m.fi, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.fb, 2b_0m.mb, 2b_0m.lx")
+(final_presence_set "2b_1m_bs"
+   "2b_1m.ii, 2b_1m.mi, 2b_1m.mf, 2b_1m.fi, 2b_1m.bb,\
+    2b_1m.ib, 2b_1m.fb, 2b_1m.mb, 2b_1m.lx")
+(final_presence_set "2b_0mi_bs"  "2b_0mi.i, 2b_0mi.i")
+(final_presence_set "2b_1mi_bs"  "2b_1mi.i, 2b_1mi.i")
+(final_presence_set "2b_0mm_bs"  "2b_0mm.i, 2b_0mm.f, 2b_0mm.b")
+(final_presence_set "2b_1mm_bs"  "2b_1mm.i, 2b_1mm.f, 2b_1mm.b")
+(final_presence_set "2b_0mf_bs"  "2b_0mf.i, 2b_0mf.b")
+(final_presence_set "2b_1mf_bs"  "2b_1mf.i, 2b_1mf.b")
+(final_presence_set "2b_0b_bs"  "2b_0b.bb")
+(final_presence_set "2b_1b_bs"  "2b_1b.bb")
+(final_presence_set "2b_0bb_bs"  "2b_0bb.b")
+(final_presence_set "2b_1bb_bs"  "2b_1bb.b")
+(final_presence_set "2b_0mb_bs"  "2b_0mb.b")
+(final_presence_set "2b_1mb_bs"  "2b_1mb.b")
+
+(exclusion_set "2b_0m_bs"
+   "2b_0mi.i, 2b_0mm.i, 2b_0mm.f, 2b_0mf.i, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mf.b, 2b_0mm.b, 2b_0mlx., 2b_m0_stop")
+(exclusion_set "2b_1m_bs"
+   "2b_1mi.i, 2b_1mm.i, 2b_1mm.f, 2b_1mf.i, 2b_1mb.b,\
+    2b_1mi.b, 2b_1mf.b, 2b_1mm.b, 2b_1mlx., 2b_m1_stop")
+(exclusion_set "2b_0mi_bs"  "2b_0mii., 2b_0mib., 2b_mi0_stop")
+(exclusion_set "2b_1mi_bs"  "2b_1mii., 2b_1mib., 2b_mi1_stop")
+(exclusion_set "2b_0mm_bs"  "2b_0mmi., 2b_0mmf., 2b_0mmb.")
+(exclusion_set "2b_1mm_bs"  "2b_1mmi., 2b_1mmf., 2b_1mmb.")
+(exclusion_set "2b_0mf_bs"  "2b_0mfi., 2b_0mfb.")
+(exclusion_set "2b_1mf_bs"  "2b_1mfi., 2b_1mfb.")
+(exclusion_set "2b_0b_bs"  "2b_0bb.b")
+(exclusion_set "2b_1b_bs"  "2b_1bb.b")
+(exclusion_set "2b_0bb_bs"  "2b_0bbb.")
+(exclusion_set "2b_1bb_bs"  "2b_1bbb.")
+(exclusion_set "2b_0mb_bs"  "2b_0mbb.")
+(exclusion_set "2b_1mb_bs"  "2b_1mbb.")
+
+(exclusion_set
+   "2b_0m_bs, 2b_0mi_bs, 2b_0mm_bs, 2b_0mf_bs, 2b_0b_bs, 2b_0bb_bs, 2b_0mb_bs,
+    2b_1m_bs, 2b_1mi_bs, 2b_1mm_bs, 2b_1mf_bs, 2b_1b_bs, 2b_1bb_bs, 2b_1mb_bs"
+   "2b_stop")
+
+(final_presence_set
+   "2b_0mi.i, 2b_0mm.i, 2b_0mf.i, 2b_0mm.f, 2b_0mb.b,\
+    2b_0mi.b, 2b_0mm.b, 2b_0mf.b, 2b_0mlx."
+   "2b_m_cont")
+(final_presence_set "2b_0mii., 2b_0mib." "2b_mi_cont")
+(final_presence_set "2b_0mmi., 2b_0mmf., 2b_0mmb." "2b_mm_cont")
+(final_presence_set "2b_0mfi., 2b_0mfb." "2b_mf_cont")
+(final_presence_set "2b_0bb.b" "2b_b_cont")
+(final_presence_set "2b_0bbb." "2b_bb_cont")
+(final_presence_set "2b_0mbb." "2b_mb_cont")
+
+(exclusion_set
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx"
+   "2b_m_cont, 2b_mi_cont, 2b_mm_cont, 2b_mf_cont,\
+    2b_mb_cont, 2b_b_cont, 2b_bb_cont")
+
+(exclusion_set "2b_empty"
+               "2b_m_cont,2b_mi_cont,2b_mm_cont,2b_mf_cont,\
+                2b_mb_cont,2b_b_cont,2b_bb_cont")
+
+;; For m;mi bundle
+(final_presence_set "2b_m0_stop" "2b_0m.mi")
+(final_presence_set "2b_0mm.i" "2b_0mmi_cont")
+(exclusion_set "2b_0mmi_cont"
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_m0_stop" "2b_0mm.i")
+(final_presence_set "2b_m1_stop" "2b_1m.mi")
+(exclusion_set "2b_m1_stop" "2b_1mm.i")
+(final_presence_set "2b_m_stop" "2b_m0_stop, 2b_m1_stop")
+
+;; For mi;i bundle
+(final_presence_set "2b_mi0_stop" "2b_0mi.i")
+(final_presence_set "2b_0mii." "2b_0mii_cont")
+(exclusion_set "2b_0mii_cont"
+   "2b_0m.ii, 2b_0m.mi, 2b_0m.fi, 2b_0m.mf, 2b_0b.bb, 2b_0m.bb,\
+    2b_0m.ib, 2b_0m.mb, 2b_0m.fb, 2b_0m.lx")
+(exclusion_set "2b_mi0_stop" "2b_0mii.")
+(final_presence_set "2b_mi1_stop" "2b_1mi.i")
+(exclusion_set "2b_mi1_stop" "2b_1mii.")
+(final_presence_set "2b_mi_stop" "2b_mi0_stop, 2b_mi1_stop")
+
+(final_absence_set
+   "2b_0m.ii,2b_0mi.i,2b_0mii.,2b_0m.mi,2b_0mm.i,2b_0mmi.,\
+    2b_0m.fi,2b_0mf.i,2b_0mfi.,2b_0m.mf,2b_0mm.f,2b_0mmf.,\
+    2b_0b.bb,2b_0bb.b,2b_0bbb.,2b_0m.bb,2b_0mb.b,2b_0mbb.,\
+    2b_0m.ib,2b_0mi.b,2b_0mib.,2b_0m.mb,2b_0mm.b,2b_0mmb.,\
+    2b_0m.fb,2b_0mf.b,2b_0mfb.,2b_0m.lx,2b_0mlx., \
+    2b_1m.ii,2b_1mi.i,2b_1mii.,2b_1m.mi,2b_1mm.i,2b_1mmi.,\
+    2b_1m.fi,2b_1mf.i,2b_1mfi.,2b_1m.mf,2b_1mm.f,2b_1mmf.,\
+    2b_1b.bb,2b_1bb.b,2b_1bbb.,2b_1m.bb,2b_1mb.b,2b_1mbb.,\
+    2b_1m.ib,2b_1mi.b,2b_1mib.,2b_1m.mb,2b_1mm.b,2b_1mmb.,\
+    2b_1m.fb,2b_1mf.b,2b_1mfb.,2b_1m.lx,2b_1mlx."
+   "2b_m0_stop,2b_m1_stop,2b_mi0_stop,2b_mi1_stop")
+
+(define_insn_reservation "2b_stop_bit" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stop_bit"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_stop|2b_m0_stop|2b_m1_stop|2b_mi0_stop|2b_mi1_stop")
+(define_insn_reservation "2b_br"      0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "br"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_B")
+(define_insn_reservation "2b_scall"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "scall"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_B")
+(define_insn_reservation "2b_fcmp"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcmp"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F")
+(define_insn_reservation "2b_fcvtfx"  4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fcvtfx"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F")
+(define_insn_reservation "2b_fld"     6
+  (and (and (and (and (eq_attr "cpu" "itanium2")
+                      (eq_attr "itanium_class" "fld"))
+                 (eq_attr "data_speculative" "no"))
+            (eq_attr "check_load" "no"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M")
+(define_insn_reservation "2b_flda"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "data_speculative" "yes"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_fldc"    0
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "fld"))
+            (eq_attr "check_load" "yes"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_fldp"    6
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "no"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_fldpc"   0
+  (and (and (and (eq_attr "cpu" "itanium2")
+		 (eq_attr "itanium_class" "fldp"))
+	    (eq_attr "check_load" "yes"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_fmac"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmac"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F")
+(define_insn_reservation "2b_fmisc"   4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "fmisc"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F")
+
+;; Latency time ???
+(define_insn_reservation "2b_frar_i" 13
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_i"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2b_frar_m"  6
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frar_m"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um2")
+(define_insn_reservation "2b_frbr"    2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frbr"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+(define_insn_reservation "2b_frfr"    5				  
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frfr"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um2")
+(define_insn_reservation "2b_frpr"    2				  
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "frpr"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+
+(define_insn_reservation "2b_ialu"      1
+    (and (and (eq_attr "cpu" "itanium2")
+              (eq_attr "itanium_class" "ialu"))
+         (ne (symbol_ref "bundling_p") (const_int 0)))
+    "2b_A")
+(define_insn_reservation "2b_icmp"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "icmp"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A")
+(define_insn_reservation "2b_ilog"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ilog"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A")
+(define_insn_reservation "2b_mmalua"  2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmalua"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A")
+;; Latency time ???
+(define_insn_reservation "2b_ishf"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ishf"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+
+(define_insn_reservation "2b_ld"      1
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "ld"))
+            (eq_attr "check_load" "no"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_ldc"     0
+  (and (and (and (eq_attr "cpu" "itanium2")
+                 (eq_attr "itanium_class" "ld"))
+            (eq_attr "check_load" "yes"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_long_i"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "long_i"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_L")
+
+;; Latency time ???
+(define_insn_reservation "2b_mmmul"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmmul"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2b_mmshf"   2
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshf"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I")
+;; Latency time ???
+(define_insn_reservation "2b_mmshfi"  1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "mmshfi"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I")
+
+(define_insn_reservation "2b_rse_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "rse_m"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+   "(2b_0m.ii|2b_0m.mi|2b_0m.fi|2b_0m.mf|2b_0m.bb\
+     |2b_0m.ib|2b_0m.mb|2b_0m.fb|2b_0m.lx)+2_1+2b_um0")
+(define_insn_reservation "2b_sem"     0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "sem"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um23")
+
+(define_insn_reservation "2b_stf"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "stf"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um23")
+(define_insn_reservation "2b_st"      1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "st"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um23")
+(define_insn_reservation "2b_syst_m0" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m0"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um2")
+(define_insn_reservation "2b_syst_m"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "syst_m"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um0")
+;; Reservation???
+(define_insn_reservation "2b_tbit"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tbit"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+(define_insn_reservation "2b_toar_i"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_i"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+;; Latency time ???
+(define_insn_reservation "2b_toar_m"  5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "toar_m"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um2")
+;; Latency time ???
+(define_insn_reservation "2b_tobr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tobr"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+(define_insn_reservation "2b_tofr"    5
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "tofr"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um23")
+;; Latency time ???
+(define_insn_reservation "2b_topr"    1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "topr"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I+2b_only_ui0")
+
+(define_insn_reservation "2b_xmpy"    4
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xmpy"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F")
+;; Latency time ???
+(define_insn_reservation "2b_xtd"     1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "xtd"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I")
+
+(define_insn_reservation "2b_chk_s_i" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_i"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_I|2b_M_only_um23")
+(define_insn_reservation "2b_chk_s_f" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_s_f"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um23")
+(define_insn_reservation "2b_chk_a"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "chk_a"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+
+(define_insn_reservation "2b_lfetch"  0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "lfetch"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M_only_um01")
+(define_insn_reservation "2b_nop_m"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_m"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_M")
+(define_insn_reservation "2b_nop_b"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_b"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_NB")
+(define_insn_reservation "2b_nop_i"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_i"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_I")
+(define_insn_reservation "2b_nop_f"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_f"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_F")
+(define_insn_reservation "2b_nop_x"   0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop_x"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_L")
+(define_insn_reservation "2b_unknown" 1
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "unknown"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "2b_empty")
+(define_insn_reservation "2b_nop" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "nop"))
+       (ne (symbol_ref "bundling_p") (const_int 0)))
+  "2b_M|2b_NB|2b_I|2b_F")
+(define_insn_reservation "2b_ignore" 0
+  (and (and (eq_attr "cpu" "itanium2")
+            (eq_attr "itanium_class" "ignore"))
+       (ne (symbol_ref "bundling_p") (const_int 0))) "nothing")
+
+(define_insn_reservation "2b_pre_cycle" 0
+   (and (and (eq_attr "cpu" "itanium2")
+             (eq_attr "itanium_class" "pre_cycle"))
+        (ne (symbol_ref "bundling_p") (const_int 0)))
+                         "(2b_0m_bs, 2b_m_cont)     \
+                          | (2b_0mi_bs, 2b_mi_cont) \
+                          | (2b_0mm_bs, 2b_mm_cont) \
+                          | (2b_0mf_bs, 2b_mf_cont) \
+                          | (2b_0b_bs, 2b_b_cont)   \
+                          | (2b_0bb_bs, 2b_bb_cont) \
+                          | (2b_0mb_bs, 2b_mb_cont) \
+                          | (2b_1m_bs, 2b_m_cont)   \
+                          | (2b_1mi_bs, 2b_mi_cont) \
+                          | (2b_1mm_bs, 2b_mm_cont) \
+                          | (2b_1mf_bs, 2b_mf_cont) \
+                          | (2b_1b_bs, 2b_b_cont)   \
+                          | (2b_1bb_bs, 2b_bb_cont) \
+                          | (2b_1mb_bs, 2b_mb_cont) \
+                          | (2b_m_stop, 2b_0mmi_cont)   \
+                          | (2b_mi_stop, 2b_0mii_cont)")
+
diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm
new file mode 100644
index 000000000..b7eaa6eca
--- /dev/null
+++ b/gcc/config/ia64/lib1funcs.asm
@@ -0,0 +1,795 @@
+/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by James E. Wilson <wilson@cygnus.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L__divxf3
+// Compute a 80-bit IEEE double-extended quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+//
+// __divtf3 is an alternate symbol name for backward compatibility.
+
+	.text
+	.align 16
+	.global __divxf3
+	.proc __divxf3
+__divxf3:
+#ifdef SHARED
+	.global __divtf3
+__divtf3:
+#endif
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fnma.s1 f11 = farg1, f10, f1
+(p6)	fma.s1 f12 = farg0, f10, f0
+	;;
+(p6)	fma.s1 f13 = f11, f11, f0
+(p6)	fma.s1 f14 = f11, f11, f11
+	;;
+(p6)	fma.s1 f11 = f13, f13, f11
+(p6)	fma.s1 f13 = f14, f10, f10
+	;;
+(p6)	fma.s1 f10 = f13, f11, f10
+(p6)	fnma.s1 f11 = farg1, f12, farg0
+	;;
+(p6)	fma.s1 f11 = f11, f10, f12
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fnma.s1 f12 = farg1, f11, farg0
+	;;
+(p6)	fma.s0 fret0 = f12, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	.endp __divxf3
+#endif
+
+#ifdef L__divdf3
+// Compute a 64-bit IEEE double quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdf3
+	.proc __divdf3
+__divdf3:
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f11 = farg0, f10
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fmpy.s1 f13 = f12, f12
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fma.s1 f11 = f13, f11, f11
+	;;
+(p6)	fmpy.s1 f12 = f13, f13
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.d.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.d.s1 f8 = farg1, f11, farg0
+	;;
+(p6)	fma.d fret0 = f8, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdf3
+#endif
+
+#ifdef L__divsf3
+// Compute a 32-bit IEEE float quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// farg0 holds the dividend.  farg1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsf3
+	.proc __divsf3
+__divsf3:
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f8 = farg0, f10
+(p6)	fnma.s1 f9 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.d.s1 f10 = f9, f8, f8
+	;;
+(p6)	fnorm.s.s0 fret0 = f10
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsf3
+#endif
+
+#ifdef L__divdi3
+// Compute a 64-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divdi3
+	.proc __divdi3
+__divdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an integer.
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divdi3
+#endif
+
+#ifdef L__moddi3
+// Compute a 64-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+	.text
+	.align 16
+	.global __moddi3
+	.proc __moddi3
+__moddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, so that they won't be treated as unsigned.
+	fcvt.xf f8 = f14
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	// r = q * (-b) + a
+	xma.l f10 = f10, f9, f14
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __moddi3
+#endif
+
+#ifdef L__udivdi3
+// Compute a 64-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __udivdi3
+	.proc __udivdi3
+__udivdi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, to avoid FP software-assist faults.
+	fcvt.xuf.s1 f8 = f8
+	fcvt.xuf.s1 f9 = f9
+(p7)	break 1
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __udivdi3
+#endif
+
+#ifdef L__umoddi3
+// Compute a 64-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend (a).  in1 holds the divisor (b).
+
+	.text
+	.align 16
+	.global __umoddi3
+	.proc __umoddi3
+__umoddi3:
+	.regstk 2,0,0,0
+	// Transfer inputs to FP registers.
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	// Convert the inputs to FP, to avoid FP software assist faults.
+	fcvt.xuf.s1 f8 = f14
+	fcvt.xuf.s1 f9 = f9
+(p7)	break 1;
+	;;
+	// Compute the reciprocal approximation.
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	// 3 Newton-Raphson iterations.
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	// Round quotient to an unsigned integer.
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	// r = q * (-b) + a
+	xma.l f10 = f10, f9, f14
+	;;
+	// Transfer result to GP registers.
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __umoddi3
+#endif
+
+#ifdef L__divsi3
+// Compute a 32-bit integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __divsi3
+	.proc __divsi3
+__divsi3:
+	.regstk 2,0,0,0
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	sxt4 in0 = in0
+	sxt4 in1 = in1
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+(p7)	break 1
+	;;
+	mov r2 = 0x0ffdd
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+(p6)	fmpy.s1 f8 = f8, f10
+(p6)	fnma.s1 f9 = f9, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f11
+	;;
+(p6)	fma.s1 f10 = f9, f8, f8
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __divsi3
+#endif
+
+#ifdef L__modsi3
+// Compute a 32-bit integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __modsi3
+	.proc __modsi3
+__modsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	sxt4 in0 = in0
+	sxt4 in1 = in1
+	;;
+	setf.sig f13 = r32
+	setf.sig f9 = r33
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	sub in1 = r0, in1
+	fcvt.xf f8 = f13
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+(p7)	break 1
+	;;
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f10 = f9, f10, f1
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f12 = f10, f12, f12
+(p6)	fma.s1 f10 = f10, f10, f11	
+	;;
+(p6)	fma.s1 f10 = f10, f12, f12
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	xma.l f10 = f10, f9, f13
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __modsi3
+#endif
+
+#ifdef L__udivsi3
+// Compute a 32-bit unsigned integer quotient.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __udivsi3
+	.proc __udivsi3
+__udivsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	zxt4 in0 = in0
+	zxt4 in1 = in1
+	;;
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+(p7)	break 1
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+(p6)	fmpy.s1 f8 = f8, f10
+(p6)	fnma.s1 f9 = f9, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fma.s1 f9 = f9, f9, f11
+	;;
+(p6)	fma.s1 f10 = f9, f8, f8
+	;;
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __udivsi3
+#endif
+
+#ifdef L__umodsi3
+// Compute a 32-bit unsigned integer modulus.
+//
+// From the Intel IA-64 Optimization Guide, choose the minimum latency
+// alternative.
+//
+// in0 holds the dividend.  in1 holds the divisor.
+
+	.text
+	.align 16
+	.global __umodsi3
+	.proc __umodsi3
+__umodsi3:
+	.regstk 2,0,0,0
+	mov r2 = 0x0ffdd
+	zxt4 in0 = in0
+	zxt4 in1 = in1
+	;;
+	setf.sig f13 = in0
+	setf.sig f9 = in1
+	// Check divide by zero.
+	cmp.ne.unc p0,p7=0,in1
+	;;
+	sub in1 = r0, in1
+	fcvt.xf f8 = f13
+	fcvt.xf f9 = f9
+	;;
+	setf.exp f11 = r2
+	frcpa.s1 f10, p6 = f8, f9
+(p7)	break 1;
+	;;
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f10 = f9, f10, f1
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f12 = f10, f12, f12
+(p6)	fma.s1 f10 = f10, f10, f11
+	;;
+(p6)	fma.s1 f10 = f10, f12, f12
+	;;
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	xma.l f10 = f10, f9, f13
+	;;
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+	.endp __umodsi3
+#endif
+
+#ifdef L__save_stack_nonlocal
+// Notes on save/restore stack nonlocal: We read ar.bsp but write
+// ar.bspstore.  This is because ar.bsp can be read at all times
+// (independent of the RSE mode) but since it's read-only we need to
+// restore the value via ar.bspstore.  This is OK because
+// ar.bsp==ar.bspstore after executing "flushrs".
+
+// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
+
+	.text
+	.align 16
+	.global __ia64_save_stack_nonlocal
+	.proc __ia64_save_stack_nonlocal
+__ia64_save_stack_nonlocal:
+	{ .mmf
+	  alloc r18 = ar.pfs, 2, 0, 0, 0
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  st8 [in0] = in1, 24
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmi
+	  st8 [in0] = r18, -16
+	  mov ar.rsc = r19
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmi
+	  mov r16 = ar.bsp
+	  mov r17 = ar.rnat
+	  adds r2 = 8, in0
+	  ;;
+	}
+	{ .mmi
+	  st8 [in0] = r16
+	  st8 [r2] = r17
+	}
+	{ .mib
+	  mov ar.rsc = r19
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_save_stack_nonlocal
+#endif
+
+#ifdef L__nonlocal_goto
+// void __ia64_nonlocal_goto(void *target_label, void *save_area,
+//			     void *static_chain);
+
+	.text
+	.align 16
+	.global __ia64_nonlocal_goto
+	.proc __ia64_nonlocal_goto
+__ia64_nonlocal_goto:
+	{ .mmi
+	  alloc r20 = ar.pfs, 3, 0, 0, 0
+	  ld8 r12 = [in1], 8
+	  mov.ret.sptk rp = in0, .L0
+	  ;;
+	}
+	{ .mmf
+	  ld8 r16 = [in1], 8
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  ld8 r17 = [in1], 8
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmi
+	  ld8 r18 = [in1]
+	  mov ar.rsc = r19
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmi
+	  mov ar.bspstore = r16
+	  ;;
+	  mov ar.rnat = r17
+	  ;;
+	}
+	{ .mmi
+	  loadrs
+	  invala
+	  mov r15 = in2
+	  ;;
+	}
+.L0:	{ .mib
+	  mov ar.rsc = r19
+	  mov ar.pfs = r18
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_nonlocal_goto
+#endif
+
+#ifdef L__restore_stack_nonlocal
+// This is mostly the same as nonlocal_goto above.
+// ??? This has not been tested yet.
+
+// void __ia64_restore_stack_nonlocal(void *save_area)
+
+	.text
+	.align 16
+	.global __ia64_restore_stack_nonlocal
+	.proc __ia64_restore_stack_nonlocal
+__ia64_restore_stack_nonlocal:
+	{ .mmf
+	  alloc r20 = ar.pfs, 4, 0, 0, 0
+	  ld8 r12 = [in0], 8
+	  ;;
+	}
+	{ .mmb
+	  ld8 r16=[in0], 8
+	  mov r19 = ar.rsc
+	  ;;
+	}
+	{ .mmi
+	  flushrs
+	  ld8 r17 = [in0], 8
+	  and r19 = 0x1c, r19
+	  ;;
+	}
+	{ .mmf
+	  ld8 r18 = [in0]
+	  mov ar.rsc = r19
+	  ;;
+	}
+	{ .mmi
+	  mov ar.bspstore = r16
+	  ;;
+	  mov ar.rnat = r17
+	  or r19 = 0x3, r19
+	  ;;
+	}
+	{ .mmf
+	  loadrs
+	  invala
+	  ;;
+	}
+.L0:	{ .mib
+	  mov ar.rsc = r19
+	  mov ar.pfs = r18
+	  br.ret.sptk.few rp
+	  ;;
+	}
+	.endp __ia64_restore_stack_nonlocal
+#endif
+
+#ifdef L__trampoline
+// Implement the nested function trampoline.  This is out of line
+// so that we don't have to bother with flushing the icache, as
+// well as making the on-stack trampoline smaller.
+//
+// The trampoline has the following form:
+//
+//		+-------------------+ >
+//	TRAMP:	| __ia64_trampoline | |
+//		+-------------------+  > fake function descriptor
+//		| TRAMP+16          | |
+//		+-------------------+ >
+//		| target descriptor |
+//		+-------------------+
+//		| static link	    |
+//		+-------------------+
+
+	.text
+	.align 16
+	.global __ia64_trampoline
+	.proc __ia64_trampoline
+__ia64_trampoline:
+	{ .mmi
+	  ld8 r2 = [r1], 8
+	  ;;
+	  ld8 r15 = [r1]
+	}
+	{ .mmi
+	  ld8 r3 = [r2], 8
+	  ;;
+	  ld8 r1 = [r2]
+	  mov b6 = r3
+	}
+	{ .bbb
+	  br.sptk.many b6
+	  ;;
+	}
+	.endp __ia64_trampoline
+#endif
+
+#ifdef SHARED
+// Thunks for backward compatibility.
+#ifdef L_fixtfdi
+	.text
+	.align 16
+	.global __fixtfti
+	.proc __fixtfti
+__fixtfti:
+	{ .bbb
+	  br.sptk.many __fixxfti
+	  ;;
+	}
+	.endp __fixtfti
+#endif
+#ifdef L_fixunstfdi
+	.align 16
+	.global __fixunstfti
+	.proc __fixunstfti
+__fixunstfti:
+	{ .bbb
+	  br.sptk.many __fixunsxfti
+	  ;;
+	}
+	.endp __fixunstfti
+#endif
+#ifdef L_floatditf
+	.align 16
+	.global __floattitf
+	.proc __floattitf
+__floattitf:
+	{ .bbb
+	  br.sptk.many __floattixf
+	  ;;
+	}
+	.endp __floattitf
+#endif
+#endif
diff --git a/gcc/config/ia64/libgcc-glibc.ver b/gcc/config/ia64/libgcc-glibc.ver
new file mode 100644
index 000000000..34a69618d
--- /dev/null
+++ b/gcc/config/ia64/libgcc-glibc.ver
@@ -0,0 +1,97 @@
+# Copyright (C) 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# 128 bit long double support was introduced with GCC 4.4.0. These lines
+# make the symbols to get @@GCC_4.4.0 attached.
+
+%exclude {
+  __addtf3
+  __divtc3
+  __divtf3
+  __eqtf2
+  __extenddftf2
+  __extendsftf2
+  __extendxftf2
+  __fixtfdi
+  __fixtfsi
+  __fixtfti
+  __fixunstfdi
+  __fixunstfsi
+  __fixunstfti
+  __floatditf
+  __floatsitf
+  __floattitf
+  __floatunditf
+  __floatunsitf
+  __floatuntitf
+  __getf2
+  __gttf2
+  __letf2
+  __lttf2
+  __multc3
+  __multf3
+  __negtf2
+  __netf2
+  __powitf2
+  __subtf3
+  __trunctfdf2
+  __trunctfsf2
+  __trunctfxf2
+  __unordtf2
+}
+
+# Those TF functions are the aliases of the XF functions before gcc 3.4.
+GCC_3.0 {
+  __divtf3
+  __fixtfti
+  __fixunstfti
+  __floattitf
+}
+
+GCC_4.4.0 {
+  __addtf3
+  __copysigntf3
+  __divtc3
+  __divtf3
+  __eqtf2
+  __extenddftf2
+  __extendsftf2
+  __fabstf2
+  __fixtfdi
+  __fixtfsi
+  __fixunstfdi
+  __fixunstfsi
+  __floatditf
+  __floatsitf
+  __floatunditf
+  __floatunsitf
+  __getf2
+  __gttf2
+  __letf2
+  __lttf2
+  __multc3
+  __multf3
+  __negtf2
+  __netf2
+  __powitf2
+  __subtf3
+  __trunctfdf2
+  __trunctfsf2
+  __trunctfxf2
+  __unordtf2
+}
diff --git a/gcc/config/ia64/libgcc-ia64.ver b/gcc/config/ia64/libgcc-ia64.ver
new file mode 100644
index 000000000..11c1fe629
--- /dev/null
+++ b/gcc/config/ia64/libgcc-ia64.ver
@@ -0,0 +1,30 @@
+# Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+GCC_3.0 {
+  # IA-64 symbols
+  __ia64_nonlocal_goto
+  __ia64_personality_v1
+  __ia64_restore_stack_nonlocal
+  __ia64_save_stack_nonlocal
+  __ia64_trampoline
+  __ia64_backtrace
+}
+GCC_3.3.2 {
+  _Unwind_GetBSP
+}
diff --git a/gcc/config/ia64/linux-unwind.h b/gcc/config/ia64/linux-unwind.h
new file mode 100644
index 000000000..da3125978
--- /dev/null
+++ b/gcc/config/ia64/linux-unwind.h
@@ -0,0 +1,199 @@
+/* DWARF2 EH unwinding support for IA64 Linux.
+   Copyright (C) 2004, 2005, 2009, 2012 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-ia64.c for the structs.  */
+
+/* This works only for glibc-2.3 and later, because sigcontext is different
+   in glibc-2.2.4.  */
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define IA64_GATE_AREA_START 0xa000000000000100LL
+#define IA64_GATE_AREA_END   0xa000000000030000LL
+
+#define MD_FALLBACK_FRAME_STATE_FOR ia64_fallback_frame_state
+
+static _Unwind_Reason_Code
+ia64_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  if (context->rp >= IA64_GATE_AREA_START
+      && context->rp < IA64_GATE_AREA_END)
+    {
+      struct sigframe {
+	char scratch[16];
+	unsigned long sig_number;
+	siginfo_t *info;
+	struct sigcontext *sc;
+      } *frame_ = (struct sigframe *)context->psp;
+      struct sigcontext *sc = frame_->sc;
+
+      /* Restore scratch registers in case the unwinder needs to
+	 refer to a value stored in one of them.  */
+      {
+	int i;
+
+	for (i = 2; i < 4; i++)
+	  context->ireg[i - 2].loc = &sc->sc_gr[i];
+	for (i = 8; i < 12; i++)
+	  context->ireg[i - 2].loc = &sc->sc_gr[i];
+	for (i = 14; i < 32; i++)
+	  context->ireg[i - 2].loc = &sc->sc_gr[i];
+      }
+
+      context->fpsr_loc = &(sc->sc_ar_fpsr);
+      context->signal_pfs_loc = &(sc->sc_ar_pfs);
+      context->lc_loc = &(sc->sc_ar_lc);
+      context->unat_loc = &(sc->sc_ar_unat);
+      context->br_loc[0] = &(sc->sc_br[0]);
+      context->br_loc[6] = &(sc->sc_br[6]);
+      context->br_loc[7] = &(sc->sc_br[7]);
+      context->pr = sc->sc_pr;
+      context->psp = sc->sc_gr[12];
+      context->gp = sc->sc_gr[1];
+      /* Signal frame doesn't have an associated reg. stack frame
+         other than what we adjust for below.	  */
+      fs -> no_reg_stack_frame = 1;
+
+      if (sc->sc_rbs_base)
+	{
+	  /* Need to switch from alternate register backing store.  */
+	  long ndirty, loadrs = sc->sc_loadrs >> 16;
+	  unsigned long alt_bspstore = context->bsp - loadrs;
+	  unsigned long bspstore;
+	  unsigned long *ar_bsp = (unsigned long *)(sc->sc_ar_bsp);
+
+	  ndirty = ia64_rse_num_regs ((unsigned long *) alt_bspstore,
+				      (unsigned long *) context->bsp);
+	  bspstore = (unsigned long)
+	    ia64_rse_skip_regs (ar_bsp, -ndirty);
+	  ia64_copy_rbs (context, bspstore, alt_bspstore, loadrs,
+			 sc->sc_ar_rnat);
+	}
+
+      /* Don't touch the branch registers o.t. b0, b6 and b7.
+	 The kernel doesn't pass the preserved branch registers
+	 in the sigcontext but leaves them intact, so there's no
+	 need to do anything with them here.  */
+      {
+	unsigned long sof = sc->sc_cfm & 0x7f;
+	context->bsp = (unsigned long)
+	  ia64_rse_skip_regs ((unsigned long *)(sc->sc_ar_bsp), -sof);
+      }
+
+      /* Account for use of br.ret to resume execution of user code.  */
+      fs->curr.reg[UNW_REG_RP].where = UNW_WHERE_SPREL;
+      fs->curr.reg[UNW_REG_RP].val
+	= (unsigned long)&(sc->sc_ip) - context->psp;
+      fs->curr.reg[UNW_REG_RP].when = -1;
+
+      fs->curr.reg[UNW_REG_PFS].where = UNW_WHERE_SPREL;
+      fs->curr.reg[UNW_REG_PFS].val
+	= (unsigned long)&(sc->sc_cfm) - context->psp;
+      fs ->curr.reg[UNW_REG_PFS].when = -1;
+
+      return _URC_NO_REASON;
+    }
+  return _URC_END_OF_STACK;
+}
+
+#define MD_HANDLE_UNWABI ia64_handle_unwabi
+
+#define ABI_MARKER_OLD_LINUX_SIGTRAMP	((0 << 8) | 's')
+#define ABI_MARKER_OLD_LINUX_INTERRUPT	((0 << 8) | 'i')
+#define ABI_MARKER_LINUX_SIGTRAMP	((3 << 8) | 's')
+#define ABI_MARKER_LINUX_INTERRUPT	((3 << 8) | 'i')
+
+static void
+ia64_handle_unwabi (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  if (fs->unwabi == ABI_MARKER_LINUX_SIGTRAMP
+      || fs->unwabi == ABI_MARKER_OLD_LINUX_SIGTRAMP)
+    {
+      struct sigframe {
+	char scratch[16];
+	unsigned long sig_number;
+	siginfo_t *info;
+	struct sigcontext *sc;
+      } *frame = (struct sigframe *)context->psp;
+      struct sigcontext *sc = frame->sc;
+
+      /* Restore scratch registers in case the unwinder needs to
+	 refer to a value stored in one of them.  */
+      {
+	int i;
+
+	for (i = 2; i < 4; i++)
+	  context->ireg[i - 2].loc = &sc->sc_gr[i];
+	for (i = 8; i < 12; i++)
+	  context->ireg[i - 2].loc = &sc->sc_gr[i];
+	for (i = 14; i < 32; i++)
+	  context->ireg[i - 2].loc = &sc->sc_gr[i];
+      }
+
+      context->signal_pfs_loc = &(sc->sc_ar_pfs);
+      context->lc_loc = &(sc->sc_ar_lc);
+      context->unat_loc = &(sc->sc_ar_unat);
+      context->br_loc[0] = &(sc->sc_br[0]);
+      context->br_loc[6] = &(sc->sc_br[6]);
+      context->br_loc[7] = &(sc->sc_br[7]);
+      context->pr = sc->sc_pr;
+      context->gp = sc->sc_gr[1];
+      /* Signal frame doesn't have an associated reg. stack frame
+         other than what we adjust for below.	  */
+      fs -> no_reg_stack_frame = 1;
+
+      if (sc->sc_rbs_base)
+	{
+	  /* Need to switch from alternate register backing store.  */
+	  long ndirty, loadrs = sc->sc_loadrs >> 16;
+	  unsigned long alt_bspstore = context->bsp - loadrs;
+	  unsigned long bspstore;
+	  unsigned long *ar_bsp = (unsigned long *)(sc->sc_ar_bsp);
+
+	  ndirty = ia64_rse_num_regs ((unsigned long *) alt_bspstore,
+				      (unsigned long *) context->bsp);
+	  bspstore = (unsigned long) ia64_rse_skip_regs (ar_bsp, -ndirty);
+	  ia64_copy_rbs (context, bspstore, alt_bspstore, loadrs,
+			 sc->sc_ar_rnat);
+	}
+
+      /* Don't touch the branch registers o.t. b0, b6 and b7.
+	 The kernel doesn't pass the preserved branch registers
+	 in the sigcontext but leaves them intact, so there's no
+	 need to do anything with them here.  */
+      {
+	unsigned long sof = sc->sc_cfm & 0x7f;
+	context->bsp = (unsigned long)
+	  ia64_rse_skip_regs ((unsigned long *)(sc->sc_ar_bsp), -sof);
+      }
+
+      /* The use of br.ret to resume execution of user code is already
+	 accounted for in the unwind ABI.  */
+    }
+}
+#endif /* glibc-2.3 or better */
diff --git a/gcc/config/ia64/linux.h b/gcc/config/ia64/linux.h
new file mode 100644
index 000000000..c86c1c4ea
--- /dev/null
+++ b/gcc/config/ia64/linux.h
@@ -0,0 +1,95 @@
+/* Definitions for ia64-linux target.
+
+Copyright (C) 2000, 2001, 2002, 2003, 2004, 2006,
+2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This macro is a C statement to print on `stderr' a string describing the
+   particular machine description choice.  */
+
+#define TARGET_VERSION fprintf (stderr, " (IA-64) Linux");
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#undef CC1_SPEC
+#define CC1_SPEC "%{profile:-p} %{G*}"
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+do {						\
+	LINUX_TARGET_OS_CPP_BUILTINS();		\
+	builtin_define("_LONGLONG");		\
+} while (0)
+
+/* Need to override linux.h STARTFILE_SPEC, since it has crtbeginT.o in.  */
+#undef STARTFILE_SPEC
+#ifdef HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* Define this for shared library support because it isn't in the main
+   linux.h file.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-ia64.so.2"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#define JMP_BUF_SIZE  76
+
+/* Override linux.h LINK_EH_SPEC definition.
+   Signalize that because we have fde-glibc, we don't need all C shared libs
+   linked against -lgcc_s.  */
+#undef LINK_EH_SPEC
+#define LINK_EH_SPEC ""
+
+#define MD_UNWIND_SUPPORT "config/ia64/linux-unwind.h"
+
+/* Put all *tf routines in libgcc.  */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#undef LIBGCC2_TF_CEXT
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_soft_fp_init_libfuncs
diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md
new file mode 100644
index 000000000..6622b2001
--- /dev/null
+++ b/gcc/config/ia64/predicates.md
@@ -0,0 +1,630 @@
+;; Predicate definitions for IA-64.
+;; Copyright (C) 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; True if OP is a valid operand for the MEM of a CALL insn.
+(define_predicate "call_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
+;; True if OP refers to any kind of symbol.
+;; For roughly the same reasons that pmode_register_operand exists, this
+;; predicate ignores its mode argument.
+(define_special_predicate "symbolic_operand" 
+   (match_code "symbol_ref,const,label_ref"))
+
+;; True if OP is a SYMBOL_REF which refers to a function.
+(define_predicate "function_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_FUNCTION_P (op)")))
+
+;; True if OP refers to a symbol in the sdata section.
+(define_predicate "sdata_symbolic_operand" 
+  (match_code "symbol_ref,const")
+{
+  HOST_WIDE_INT offset = 0, size = 0;
+
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      offset = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      if (CONSTANT_POOL_ADDRESS_P (op))
+	{
+	  size = GET_MODE_SIZE (get_pool_mode (op));
+	  if (size > ia64_section_threshold)
+	    return false;
+	}
+      else
+	{
+	  tree t;
+
+	  if (!SYMBOL_REF_LOCAL_P (op) || !SYMBOL_REF_SMALL_P (op))
+	    return false;
+
+	  /* Note that in addition to DECLs, we can get various forms
+	     of constants here.  */
+	  t = SYMBOL_REF_DECL (op);
+	  if (DECL_P (t))
+	    t = DECL_SIZE_UNIT (t);
+	  else
+	    t = TYPE_SIZE_UNIT (TREE_TYPE (t));
+	  if (t && host_integerp (t, 0))
+	    {
+	      size = tree_low_cst (t, 0);
+	      if (size < 0)
+		size = 0;
+	    }
+	}
+
+      /* Deny the stupid user trick of addressing outside the object.  Such
+	 things quickly result in GPREL22 relocation overflows.  Of course,
+	 they're also highly undefined.  From a pure pedant's point of view
+	 they deserve a slap on the wrist (such as provided by a relocation
+	 overflow), but that just leads to bugzilla noise.  */
+      return (offset >= 0 && offset <= size);
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; True if OP refers to a symbol in the small address area.
+(define_predicate "small_addr_symbolic_operand" 
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_SMALL_ADDR_P (op);
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; True if OP refers to a symbol with which we may use any offset.
+(define_predicate "any_offset_symbol_operand"
+  (match_code "symbol_ref")
+{
+  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
+    return true;
+  if (SYMBOL_REF_SMALL_ADDR_P (op))
+    return true;
+  if (SYMBOL_REF_FUNCTION_P (op))
+    return false;
+  if (sdata_symbolic_operand (op, mode))
+    return true;
+  return false;
+})
+
+;; True if OP refers to a symbol with which we may use 14-bit aligned offsets.
+;; False if OP refers to a symbol with which we may not use any offset at any
+;; time.
+(define_predicate "aligned_offset_symbol_operand"
+  (and (match_code "symbol_ref")
+       (match_test "! SYMBOL_REF_FUNCTION_P (op)")))
+
+;; True if OP refers to a symbol, and is appropriate for a GOT load.
+(define_predicate "got_symbolic_operand" 
+  (match_operand 0 "symbolic_operand" "")
+{
+  HOST_WIDE_INT addend = 0;
+
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      /* Accept only (plus (symbol_ref) (const_int)).  */
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+          || GET_CODE (XEXP (op, 1)) != CONST_INT)
+        return false;
+
+      addend = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      /* These symbols shouldn't be used with got loads.  */
+      if (SYMBOL_REF_SMALL_ADDR_P (op))
+	return false;
+      if (SYMBOL_REF_TLS_MODEL (op) != 0)
+	return false;
+
+      if (any_offset_symbol_operand (op, mode))
+	return true;
+
+      /* The low 14 bits of the constant have been forced to zero
+	 so that we do not use up so many GOT entries.  Prevent cse
+	 from undoing this.  */
+      if (aligned_offset_symbol_operand (op, mode))
+	return (addend & 0x3fff) == 0;
+
+      return addend == 0;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a valid thread local storage symbolic operand.
+(define_predicate "tls_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) != 0;
+
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	return false;
+
+      /* We only allow certain offsets for certain tls models.  */
+      switch (SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
+	{
+	case TLS_MODEL_GLOBAL_DYNAMIC:
+	case TLS_MODEL_LOCAL_DYNAMIC:
+	  return false;
+
+	case TLS_MODEL_INITIAL_EXEC:
+	  return (INTVAL (XEXP (op, 1)) & 0x3fff) == 0;
+
+	case TLS_MODEL_LOCAL_EXEC:
+	  return true;
+
+	default:
+	  return false;
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a local-dynamic thread local storage symbolic operand.
+(define_predicate "ld_tls_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is an initial-exec thread local storage symbolic operand.
+(define_predicate "ie_tls_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT
+	  || (INTVAL (XEXP (op, 1)) & 0x3fff) != 0)
+	return false;
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a local-exec thread local storage symbolic operand.
+(define_predicate "le_tls_symbolic_operand"
+  (match_code "symbol_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      op = XEXP (op, 0);
+      if (GET_CODE (op) != PLUS
+          || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+          || GET_CODE (XEXP (op, 1)) != CONST_INT)
+        return false;
+      op = XEXP (op, 0);
+      /* FALLTHRU */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC;
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Like nonimmediate_operand, but don't allow MEMs that try to use a
+;; POST_MODIFY with a REG as displacement.
+(define_predicate "destination_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "GET_CODE (op) != MEM
+		    || GET_CODE (XEXP (op, 0)) != POST_MODIFY
+		    || GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) != REG")))
+
+;; Like destination_operand, but don't allow any post-increments.
+(define_predicate "not_postinc_destination_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "GET_CODE (op) != MEM
+        || GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+;; Like memory_operand, but don't allow post-increments.
+(define_predicate "not_postinc_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+;; True if OP is a general operand, with some restrictions on symbols.
+(define_predicate "move_operand"
+  (match_operand 0 "general_operand")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST:
+      {
+	HOST_WIDE_INT addend;
+
+	/* Accept only (plus (symbol_ref) (const_int)).  */
+	op = XEXP (op, 0);
+	if (GET_CODE (op) != PLUS
+	    || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
+            || GET_CODE (XEXP (op, 1)) != CONST_INT)
+	  return false;
+
+	addend = INTVAL (XEXP (op, 1));
+	op = XEXP (op, 0);
+
+	/* After reload, we want to allow any offset whatsoever.  This
+	   allows reload the opportunity to avoid spilling addresses to
+	   the stack, and instead simply substitute in the value from a
+	   REG_EQUIV.  We'll split this up again when splitting the insn.  */
+	if (reload_in_progress || reload_completed)
+	  return true;
+
+	/* Some symbol types we allow to use with any offset.  */
+	if (any_offset_symbol_operand (op, mode))
+	  return true;
+
+	/* Some symbol types we allow offsets with the low 14 bits of the
+	   constant forced to zero so that we do not use up so many GOT
+	   entries.  We want to prevent cse from undoing this.  */
+	if (aligned_offset_symbol_operand (op, mode))
+	  return (addend & 0x3fff) == 0;
+
+	/* The remaining symbol types may never be used with an offset.  */
+	return false;
+      }
+
+    default:
+      return true;
+    }
+})
+
+;; Like move_operand but don't allow post-increments.
+(define_predicate "not_postinc_move_operand"
+  (and (match_operand 0 "move_operand")
+       (match_test "GET_CODE (op) != MEM
+        || GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+;; True if OP is a register operand that is (or could be) a GR reg.
+(define_predicate "gr_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || GENERAL_REGNO_P (regno));
+})
+
+;; True if OP is a register operand that is (or could be) an FR reg.
+(define_predicate "fr_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || FR_REGNO_P (regno));
+})
+
+;; True if OP is a register operand that is (or could be) a GR/FR reg.
+(define_predicate "grfr_register_operand"
+  (match_operand 0 "register_operand")
+{
+  unsigned int regno;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER
+	  || GENERAL_REGNO_P (regno)
+	  || FR_REGNO_P (regno));
+})
+
+;; True if OP is a nonimmediate operand that is (or could be) a GR reg.
+(define_predicate "gr_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  unsigned int regno;
+
+  if (GET_CODE (op) == MEM)
+    return true;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || GENERAL_REGNO_P (regno));
+})
+
+;; True if OP is a nonimmediate operand that is (or could be) a FR reg.
+(define_predicate "fr_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  unsigned int regno;
+
+  if (GET_CODE (op) == MEM)
+    return true;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER || FR_REGNO_P (regno));
+})
+
+;; True if OP is a nonimmediate operand that is (or could be) a GR/FR reg.
+(define_predicate "grfr_nonimmediate_operand"
+  (match_operand 0 "nonimmediate_operand")
+{
+  unsigned int regno;
+
+  if (GET_CODE (op) == MEM)
+    return true;
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  regno = REGNO (op);
+  return (regno >= FIRST_PSEUDO_REGISTER
+	  || GENERAL_REGNO_P (regno)
+	  || FR_REGNO_P (regno));
+})
+
+;; True if OP is a GR register operand, or zero.
+(define_predicate "gr_reg_or_0_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int,const_double,const_vector")
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
+;; True if OP is a GR register operand, or a 5-bit immediate operand.
+(define_predicate "gr_reg_or_5bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32"))))
+
+;; True if OP is a GR register operand, or a 6-bit immediate operand.
+(define_predicate "gr_reg_or_6bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_M (op)"))))
+
+;; True if OP is a GR register operand, or an 8-bit immediate operand.
+(define_predicate "gr_reg_or_8bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_K (op)"))))
+
+;; True if OP is a GR/FR register operand, or an 8-bit immediate operand.
+(define_predicate "grfr_reg_or_8bit_operand"
+  (ior (match_operand 0 "grfr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_K (op)"))))
+
+;; True if OP is a register operand, or an 8-bit adjusted immediate operand.
+(define_predicate "gr_reg_or_8bit_adjusted_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_L (op)"))))
+
+;; True if OP is a register operand, or is valid for both an 8-bit
+;; immediate and an 8-bit adjusted immediate operand.  This is necessary
+;; because when we emit a compare, we don't know what the condition will be,
+;; so we need the union of the immediates accepted by GT and LT.
+(define_predicate "gr_reg_or_8bit_and_adjusted_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_K (op)
+                         && satisfies_constraint_L (op)"))))
+
+;; True if OP is a register operand, or a 14-bit immediate operand.
+(define_predicate "gr_reg_or_14bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_I (op)"))))
+
+;;  True if OP is a register operand, or a 22-bit immediate operand.
+(define_predicate "gr_reg_or_22bit_operand"
+  (ior (match_operand 0 "gr_register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_J (op)"))))
+
+;; True if OP is a 7-bit immediate operand.
+(define_predicate "dshift_count_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 128")))
+
+;; True if OP is a 6-bit immediate operand.
+(define_predicate "shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_M (op)")))
+
+;; True if OP-1 is a 6-bit immediate operand, used in extr instruction.
+(define_predicate "extr_len_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_M (GEN_INT (INTVAL (op) - 1))")))
+
+;; True if OP is a 5-bit immediate operand.
+(define_predicate "shift_32bit_count_operand"
+   (and (match_code "const_int")
+        (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32")))
+
+;; True if OP is one of the immediate values 2, 4, 8, or 16.
+(define_predicate "shladd_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 ||
+	            INTVAL (op) == 8 || INTVAL (op) == 16")))
+
+;; True if OP is one of the immediate values 1, 2, 3, or 4.
+(define_predicate "shladd_log2_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 1 && INTVAL (op) <= 4")))
+
+;; True if OP is one of the immediate values  -16, -8, -4, -1, 1, 4, 8, 16.
+(define_predicate "fetchadd_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == -16 || INTVAL (op) == -8 ||
+                    INTVAL (op) == -4  || INTVAL (op) == -1 ||
+                    INTVAL (op) == 1   || INTVAL (op) == 4  ||
+                    INTVAL (op) == 8   || INTVAL (op) == 16")))
+
+;; True if OP is one of the immediate values 0, 7, 15, 16
+(define_predicate "pmpyshr_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 0 || INTVAL (op) == 7
+		    || INTVAL (op) == 15 || INTVAL (op) == 16")))
+
+;; True if OP is 0..3.
+(define_predicate "const_int_2bit_operand"
+  (and (match_code "const_int")
+        (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3")))
+
+;; True if OP is a floating-point constant zero, one, or a register.
+(define_predicate "fr_reg_or_fp01_operand"
+  (ior (match_operand 0 "fr_register_operand")
+       (and (match_code "const_double")
+	    (match_test "satisfies_constraint_G (op)"))))
+
+;; Like fr_reg_or_fp01_operand, but don't allow any SUBREGs.
+(define_predicate "xfreg_or_fp01_operand"
+  (and (match_operand 0 "fr_reg_or_fp01_operand")
+       (not (match_code "subreg"))))
+
+;; Like fr_reg_or_fp01_operand, but don't allow 0 if flag_signed_zero is set.
+;; Using f0 as the second arg to fadd or fsub, or as the third arg to fma or
+;; fms can cause a zero result to have the wrong sign.
+(define_predicate "fr_reg_or_signed_fp01_operand"
+  (ior (match_operand 0 "fr_register_operand")
+       (and (match_code "const_double")
+	    (match_test "satisfies_constraint_Z (op)"))))
+
+;; Like fr_reg_or_signed_fp01_operand, but don't allow any SUBREGs.
+(define_predicate "xfreg_or_signed_fp01_operand"
+  (and (match_operand 0 "fr_reg_or_signed_fp01_operand")
+       (not (match_code "subreg"))))
+
+;; True if OP is a constant zero, or a register.
+(define_predicate "fr_reg_or_0_operand"
+  (ior (match_operand 0 "fr_register_operand")
+       (and (match_code "const_double,const_vector")
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+(define_predicate "ia64_cbranch_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (match_code "ordered,unordered")))
+
+;; True if this is a comparison operator, which accepts a normal 8-bit
+;; signed immediate operand.
+(define_predicate "normal_comparison_operator"
+  (match_code "eq,ne,gt,le,gtu,leu"))
+
+;; True if this is a comparison operator, which accepts an adjusted 8-bit
+;; signed immediate operand.
+(define_predicate "adjusted_comparison_operator"
+  (match_code "lt,ge,ltu,geu"))
+
+;; True if this is a signed inequality operator.
+(define_predicate "signed_inequality_operator"
+  (match_code "ge,gt,le,lt"))
+
+;; True if this operator is valid for predication.
+(define_predicate "predicate_operator"
+  (match_code "eq,ne"))
+
+;; True if this operator can be used in a conditional operation.
+(define_predicate "condop_operator"
+  (match_code "plus,minus,ior,xor,and"))
+
+;; These three are hardware registers that can only be addressed in
+;; DImode.  It's not strictly necessary to test mode == DImode here,
+;; but it makes decent insurance against someone writing a
+;; match_operand wrong.
+
+;; True if this is the ar.lc register.
+(define_predicate "ar_lc_reg_operand"
+  (and (match_code "reg")
+       (match_test "mode == DImode && REGNO (op) == AR_LC_REGNUM")))
+
+;; True if this is the ar.ccv register.
+(define_predicate "ar_ccv_reg_operand"
+  (and (match_code "reg")
+       (match_test "mode == DImode && REGNO (op) == AR_CCV_REGNUM")))
+
+;; True if this is the ar.pfs register.
+(define_predicate "ar_pfs_reg_operand"
+  (and (match_code "reg")
+       (match_test "mode == DImode && REGNO (op) == AR_PFS_REGNUM")))
+
+;; True if OP is valid as a base register in a reg + offset address.
+;; ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
+;; checks from pa.c basereg_operand as well?  Seems to be OK without them
+;; in test runs.
+(define_predicate "basereg_operand"
+  (match_operand 0 "register_operand")
+{
+  return REG_P (op) && REG_POINTER (op);
+})
+
+;; True if this is the right-most vector element; for mux1 @brcst.
+(define_predicate "mux1_brcst_element"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
diff --git a/gcc/config/ia64/quadlib.c b/gcc/config/ia64/quadlib.c
new file mode 100644
index 000000000..f9ee30b58
--- /dev/null
+++ b/gcc/config/ia64/quadlib.c
@@ -0,0 +1,78 @@
+/* Subroutines for long double support.
+   Copyright (C) 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+extern int _U_Qfcmp (long double a, long double b, int);
+
+int _U_Qfeq (long double, long double);
+int _U_Qfne (long double, long double);
+int _U_Qfgt (long double, long double);
+int _U_Qfge (long double, long double);
+int _U_Qflt (long double, long double);
+int _U_Qfle (long double, long double);
+int _U_Qfcomp (long double, long double);
+
+int
+_U_Qfeq (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, 4) != 0);
+}
+
+int
+_U_Qfne (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, 4) == 0);
+}
+	
+int
+_U_Qfgt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, 17) != 0);
+}
+
+int
+_U_Qfge (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, 21) != 0);
+}
+
+int
+_U_Qflt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, 9) != 0);
+}
+
+int
+_U_Qfle (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, 13) != 0);
+}
+
+int
+_U_Qfcomp (long double a, long double b)
+{
+  if (_U_Qfcmp (a, b, 4) == 0)
+    return 0;
+
+  return (_U_Qfcmp (a, b, 22) != 0 ? 1 : -1);
+}
diff --git a/gcc/config/ia64/sfp-machine.h b/gcc/config/ia64/sfp-machine.h
new file mode 100644
index 000000000..bdcce772c
--- /dev/null
+++ b/gcc/config/ia64/sfp-machine.h
@@ -0,0 +1,116 @@
+#define _FP_W_TYPE_SIZE		64
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+typedef int TItype __attribute__ ((mode (TI)));
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+
+#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype))
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_MUL_MEAT_Q(R,X,Y)                           \
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_Q(R,X,Y)   _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		_FP_QNANBIT_S
+#define _FP_NANFRAC_D		_FP_QNANBIT_D
+#define _FP_NANFRAC_E		_FP_QNANBIT_E, 0
+#define _FP_NANFRAC_Q		_FP_QNANBIT_Q, 0
+#define _FP_NANSIGN_S		1
+#define _FP_NANSIGN_D		1
+#define _FP_NANSIGN_E		1
+#define _FP_NANSIGN_Q		1
+
+#define _FP_KEEPNANFRACP 1
+
+/* Here is something Intel misdesigned: the specs don't define
+   the case where we have two NaNs with same mantissas, but
+   different sign. Different operations pick up different NaNs.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if (_FP_FRAC_GT_##wc(X, Y)					\
+	|| (_FP_FRAC_EQ_##wc(X,Y) && (OP == '+' || OP == '*')))	\
+      {								\
+	R##_s = X##_s;						\
+        _FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+        _FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define FP_EX_INVALID		0x01
+#define FP_EX_DENORM		0x02
+#define FP_EX_DIVZERO		0x04
+#define FP_EX_OVERFLOW		0x08
+#define FP_EX_UNDERFLOW		0x10
+#define FP_EX_INEXACT		0x20
+
+#define FP_HANDLE_EXCEPTIONS						\
+  do {									\
+    double tmp, dummy;							\
+    if (_fex & FP_EX_INVALID)						\
+      {									\
+	tmp = 0.0;							\
+	__asm__ __volatile__ ("frcpa.s0 %0,p1=f0,f0"			\
+			      : "=f" (tmp) : : "p1" );			\
+      }									\
+    if (_fex & FP_EX_DIVZERO)						\
+      {									\
+	__asm__ __volatile__ ("frcpa.s0 %0,p1=f1,f0"			\
+			      : "=f" (tmp) : : "p1" );			\
+      }									\
+    if (_fex & FP_EX_OVERFLOW)						\
+      {									\
+	dummy = __DBL_MAX__;						\
+	__asm__ __volatile__ ("fadd.d.s0 %0=%1,%1"			\
+			      : "=f" (dummy) : "0" (dummy));		\
+      }									\
+    if (_fex & FP_EX_UNDERFLOW)						\
+      {									\
+	dummy = __DBL_MIN__;						\
+	__asm__ __volatile__ ("fnma.d.s0 %0=%1,%1,f0"			\
+			      : "=f" (tmp) : "f" (dummy));		\
+      }									\
+    if (_fex & FP_EX_INEXACT)						\
+      {									\
+	dummy = __DBL_MAX__;						\
+	__asm__ __volatile__ ("fsub.d.s0 %0=%1,f1"			\
+			      : "=f" (dummy) : "0" (dummy));		\
+      }									\
+  } while (0)
+
+#define FP_RND_NEAREST		0
+#define FP_RND_ZERO		0xc00L
+#define FP_RND_PINF		0x800L
+#define FP_RND_MINF		0x400L
+
+#define _FP_DECL_EX \
+  unsigned long int _fpsr __attribute__ ((unused)) = FP_RND_NEAREST
+
+#define FP_INIT_ROUNDMODE			\
+  do {						\
+    __asm__ __volatile__ ("mov.m %0=ar.fpsr"	\
+			  : "=r" (_fpsr));	\
+  } while (0)
+
+#define FP_ROUNDMODE		(_fpsr & 0xc00L)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+#define __BYTE_ORDER __LITTLE_ENDIAN
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+#define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
diff --git a/gcc/config/ia64/sync.md b/gcc/config/ia64/sync.md
new file mode 100644
index 000000000..06ca8f09b
--- /dev/null
+++ b/gcc/config/ia64/sync.md
@@ -0,0 +1,187 @@
+;; GCC machine description for IA-64 synchronization instructions.
+;; Copyright (C) 2005, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator IMODE [QI HI SI DI])
+(define_mode_iterator I124MODE [QI HI SI])
+(define_mode_iterator I48MODE [SI DI])
+(define_mode_attr modesuffix [(QI "1") (HI "2") (SI "4") (DI "8")])
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")])
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MF))]
+  ""
+  "mf"
+  [(set_attr "itanium_class" "syst_m")])
+
+(define_insn "fetchadd_acq_<mode>"
+  [(set (match_operand:I48MODE 0 "gr_register_operand" "=r")
+	(match_operand:I48MODE 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+	(unspec:I48MODE [(match_dup 1)
+			 (match_operand:I48MODE 2 "fetchadd_operand" "n")]
+		        UNSPEC_FETCHADD_ACQ))]
+  ""
+  "fetchadd<modesuffix>.acq %0 = %1, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_expand "sync_<fetchop_name><mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "")
+	(FETCHOP:IMODE (match_dup 0)
+	  (match_operand:IMODE 1 "general_operand" "")))]
+  ""
+{
+  ia64_expand_atomic_op (<CODE>, operands[0], operands[1], NULL, NULL);
+  DONE;
+})
+
+(define_expand "sync_nand<mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "")
+	(not:IMODE
+	  (and:IMODE (match_dup 0)
+		     (match_operand:IMODE 1 "general_operand" ""))))]
+  ""
+{
+  ia64_expand_atomic_op (NOT, operands[0], operands[1], NULL, NULL);
+  DONE;
+})
+
+(define_expand "sync_old_<fetchop_name><mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(FETCHOP:IMODE 
+	  (match_operand:IMODE 1 "memory_operand" "")
+	  (match_operand:IMODE 2 "general_operand" "")))]
+  ""
+{
+  ia64_expand_atomic_op (<CODE>, operands[1], operands[2], operands[0], NULL);
+  DONE;
+})
+
+(define_expand "sync_old_nand<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(not:IMODE 
+	  (and:IMODE (match_operand:IMODE 1 "memory_operand" "")
+		     (match_operand:IMODE 2 "general_operand" ""))))]
+  ""
+{
+  ia64_expand_atomic_op (NOT, operands[1], operands[2], operands[0], NULL);
+  DONE;
+})
+
+(define_expand "sync_new_<fetchop_name><mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(FETCHOP:IMODE 
+	  (match_operand:IMODE 1 "memory_operand" "")
+	  (match_operand:IMODE 2 "general_operand" "")))]
+  ""
+{
+  ia64_expand_atomic_op (<CODE>, operands[1], operands[2], NULL, operands[0]);
+  DONE;
+})
+
+(define_expand "sync_new_nand<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "")
+	(not:IMODE 
+	  (and:IMODE (match_operand:IMODE 1 "memory_operand" "")
+		     (match_operand:IMODE 2 "general_operand" ""))))]
+  ""
+{
+  ia64_expand_atomic_op (NOT, operands[1], operands[2], NULL, operands[0]);
+  DONE;
+})
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(match_operand:IMODE 0 "gr_register_operand" "")
+   (match_operand:IMODE 1 "memory_operand" "")
+   (match_operand:IMODE 2 "gr_register_operand" "")
+   (match_operand:IMODE 3 "gr_register_operand" "")]
+  ""
+{
+  rtx ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+  rtx dst;
+
+  convert_move (ccv, operands[2], 1);
+
+  dst = operands[0];
+  if (GET_MODE (dst) != DImode)
+    dst = gen_reg_rtx (DImode);
+
+  emit_insn (gen_cmpxchg_rel_<mode> (dst, operands[1], ccv, operands[3]));
+  emit_insn (gen_memory_barrier ());
+
+  if (dst != operands[0])
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, dst));
+  DONE;
+})
+
+(define_insn "cmpxchg_rel_<mode>"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(zero_extend:DI
+	  (match_operand:I124MODE 1 "not_postinc_memory_operand" "+S")))
+   (set (match_dup 1)
+        (unspec:I124MODE
+	  [(match_dup 1)
+	   (match_operand:DI 2 "ar_ccv_reg_operand" "")
+	   (match_operand:I124MODE 3 "gr_reg_or_0_operand" "rO")]
+	  UNSPEC_CMPXCHG_ACQ))]
+  ""
+  "cmpxchg<modesuffix>.rel %0 = %1, %r3, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_insn "cmpxchg_rel_di"
+  [(set (match_operand:DI 0 "gr_register_operand" "=r")
+	(match_operand:DI 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+        (unspec:DI [(match_dup 1)
+		    (match_operand:DI 2 "ar_ccv_reg_operand" "")
+		    (match_operand:DI 3 "gr_reg_or_0_operand" "rO")]
+		   UNSPEC_CMPXCHG_ACQ))]
+  ""
+  "cmpxchg8.rel %0 = %1, %r3, %2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_insn "sync_lock_test_and_set<mode>"
+  [(set (match_operand:IMODE 0 "gr_register_operand" "=r")
+        (match_operand:IMODE 1 "not_postinc_memory_operand" "+S"))
+   (set (match_dup 1)
+        (match_operand:IMODE 2 "gr_reg_or_0_operand" "rO"))]
+  ""
+  "xchg<modesuffix> %0 = %1, %r2"
+  [(set_attr "itanium_class" "sem")])
+
+(define_expand "sync_lock_release<mode>"
+  [(set (match_operand:IMODE 0 "memory_operand" "")
+	(match_operand:IMODE 1 "gr_reg_or_0_operand" ""))]
+  ""
+{
+  gcc_assert (MEM_VOLATILE_P (operands[0]));
+})
diff --git a/gcc/config/ia64/sysv4.h b/gcc/config/ia64/sysv4.h
new file mode 100644
index 000000000..25fd22489
--- /dev/null
+++ b/gcc/config/ia64/sysv4.h
@@ -0,0 +1,145 @@
+/* Override definitions in elfos.h to be correct for IA64.
+
+Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005,
+2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_sysv4_init_libfuncs
+
+/* We want DWARF2 as specified by the IA64 ABI.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Stabs does not work properly for 64-bit targets.  */
+#undef DBX_DEBUGGING_INFO
+
+/* Various pseudo-ops for which the Intel assembler uses non-standard
+   definitions.  */
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP "\tstringz\t"
+
+#undef SKIP_ASM_OP
+#define SKIP_ASM_OP "\t.skip\t"
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP "\t.common\t"
+
+#undef ASCII_DATA_ASM_OP
+#define ASCII_DATA_ASM_OP "\tstring\t"
+
+/* ia64-specific options for gas
+   ??? ia64 gas doesn't accept standard svr4 assembler options?  */
+#undef ASM_SPEC
+#define ASM_SPEC "-x %{mconstant-gp} %{mauto-pic} %(asm_extra)"
+
+/* ??? Unfortunately, .lcomm doesn't work, because it puts things in either
+   .bss or .sbss, and we can't control the decision of which is used.  When
+   I use .lcomm, I get a cryptic "Section group has no member" error from
+   the Intel simulator.  So we must explicitly put variables in .bss
+   instead.  This matters only if we care about the Intel assembler.  */
+
+/* This is asm_output_aligned_bss from varasm.c without the
+   (*targetm.asm_out.globalize_label) call at the beginning.  */
+
+/* This is for final.c, because it is used by ASM_DECLARE_OBJECT_NAME.  */
+extern int size_directive_output;
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+do {									\
+  if ((DECL) && sdata_symbolic_operand (XEXP (DECL_RTL (DECL), 0), Pmode)) \
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_ALIGN (FILE, floor_log2 ((ALIGN) / BITS_PER_UNIT));	\
+  ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);				\
+  ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1);				\
+} while (0)
+
+/* The # tells the Intel assembler that this is not a register name.
+   However, we can't emit the # in a label definition, so we set a variable
+   in ASM_OUTPUT_LABEL to control whether we want the postfix here or not.
+   We append the # to the label name, but since NAME can be an expression
+   we have to scan it for a non-label character and insert the # there.  */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+do {						\
+  const char *name_ = NAME;			\
+  if (*name_ == '*')				\
+    name_++;					\
+  else						\
+    fputs (user_label_prefix, STREAM);		\
+  fputs (name_, STREAM);			\
+  if (!ia64_asm_output_label)			\
+    fputc ('#', STREAM);			\
+} while (0)
+
+/* Intel assembler requires both flags and type if declaring a non-predefined
+   section.  */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP	"\t.section\t.init,\"ax\",\"progbits\""
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini,\"ax\",\"progbits\""
+
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ia64_dbx_register_number(REGNO)
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* We redefine this to use the ia64 .proc pseudo-op.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+  ia64_start_function(FILE,NAME,DECL)
+
+/* We redefine this to use the ia64 .endp pseudo-op.  */
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL) \
+do {									\
+  fputs ("\t.endp ", FILE);						\
+  assemble_name (FILE, NAME);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* Override default elf definition.  */
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK  ia64_reloc_rw_mask
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  ia64_select_rtx_section
+
+#define SDATA_SECTION_ASM_OP "\t.sdata"
+#define SBSS_SECTION_ASM_OP "\t.sbss"
diff --git a/gcc/config/ia64/t-fprules-softfp b/gcc/config/ia64/t-fprules-softfp
new file mode 100644
index 000000000..4c876bfa9
--- /dev/null
+++ b/gcc/config/ia64/t-fprules-softfp
@@ -0,0 +1,6 @@
+softfp_float_modes := tf
+softfp_int_modes := si di ti
+softfp_extensions := sftf dftf xftf
+softfp_truncations := tfsf tfdf tfxf
+softfp_machine_header := ia64/sfp-machine.h
+softfp_exclude_libgcc2 := n
diff --git a/gcc/config/ia64/t-glibc b/gcc/config/ia64/t-glibc
new file mode 100644
index 000000000..ddb5fe3c3
--- /dev/null
+++ b/gcc/config/ia64/t-glibc
@@ -0,0 +1,7 @@
+# Use system libunwind library on IA-64 GLIBC based system.
+LIB2ADDEH = $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c \
+  $(srcdir)/unwind-compat.c
+
+SHLIB_MAPFILES += $(srcdir)/config/ia64/libgcc-glibc.ver
+
+MULTIARCH_DIRNAME = $(call if_multiarch,ia64-linux-gnu)
diff --git a/gcc/config/ia64/t-glibc-libunwind b/gcc/config/ia64/t-glibc-libunwind
new file mode 100644
index 000000000..df78f1d09
--- /dev/null
+++ b/gcc/config/ia64/t-glibc-libunwind
@@ -0,0 +1,4 @@
+# Build libunwind for IA-64 GLIBC based system.
+LIBUNWIND = $(srcdir)/config/ia64/fde-glibc.c \
+  $(srcdir)/config/ia64/unwind-ia64.c
+LIBUNWINDDEP = unwind.inc
diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux
new file mode 100644
index 000000000..a97ab5c44
--- /dev/null
+++ b/gcc/config/ia64/t-hpux
@@ -0,0 +1,75 @@
+# Copyright (C) 2001, 2002, 2003, 2004, 2005,
+# 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We need multilib support for HPUX's ILP32 & LP64 modes.
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+MULTILIB_OPTIONS = milp32/mlp64
+MULTILIB_DIRNAMES = hpux32 hpux64
+MULTILIB_MATCHES =
+
+# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from
+# LIB1ASMSRC.  These functions map the 128 bit conversion function names
+# to 80 bit conversions and were done for Linux backwards compatibility.
+
+LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS))
+
+# Support routines for HP-UX 128 bit floats.
+
+LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c
+
+quadlib.c: $(srcdir)/config/ia64/quadlib.c
+	cat $(srcdir)/config/ia64/quadlib.c > quadlib.c
+
+# We get an undefined main when building a cross compiler because our
+# linkspec has "-u main" and we want that for linking but it makes
+# LIBGCC1_TEST fail because it uses -nostdlib -nostartup.
+
+LIBGCC1_TEST =
+
+# We do not want to include the EH stuff that linux uses, we want to use
+# the HP-UX libunwind library.
+
+T_CFLAGS += -DUSE_LIBUNWIND_EXCEPTIONS
+
+LIB2ADDEH = $(srcdir)/unwind-c.c
+
+SHLIB_EXT = .so
+# Must include -lunwind in the link, so that libgcc_s.so has the necessary
+# DT_NEEDED entry for libunwind.
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,+h,@shlib_base_name@.so.0 \
+	-o @multilib_dir@/@shlib_base_name@.so @multilib_flags@ \
+	@shlib_objs@ -lunwind -lc && \
+	rm -f @multilib_dir@/@shlib_base_name@.so.0 && \
+	$(LN_S) @shlib_base_name@.so @multilib_dir@/@shlib_base_name@.so.0
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+        $$(mkinstalldirs) $$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@; \
+	$(INSTALL_DATA) @multilib_dir@/@shlib_base_name@.so \
+	$$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@/@shlib_base_name@.so.0; \
+	rm -f $$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@/@shlib_base_name@.so; \
+	$(LN_S) @shlib_base_name@.so.0 \
+	$$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@/@shlib_base_name@.so; \
+	chmod +x $$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@/@shlib_base_name@.so
+
+SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
new file mode 100644
index 000000000..3e82d71e3
--- /dev/null
+++ b/gcc/config/ia64/t-ia64
@@ -0,0 +1,57 @@
+# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
+# 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC    = ia64/lib1funcs.asm
+
+# We use different names for the DImode div/mod files so that they won't
+# conflict with libgcc2.c files.  We used to use __ia64 as a prefix, now
+# we use __ as the prefix.  Note that L_divdi3 in libgcc2.c actually defines
+# a TImode divide function, so there is no actual overlap here between
+# libgcc2.c and lib1funcs.asm.
+LIB1ASMFUNCS  = __divxf3 __divdf3 __divsf3 \
+	__divdi3 __moddi3 __udivdi3 __umoddi3 \
+	__divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \
+	__nonlocal_goto __restore_stack_nonlocal __trampoline \
+	_fixtfdi _fixunstfdi _floatditf
+
+# ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel
+# assembler does not accept # line number as a comment.
+# ??? This breaks C++ pragma interface/implementation, which is used in the
+# C++ part of libgcc2, hence it had to be disabled.  Must find some other way
+# to support the Intel assembler.
+#LIBGCC2_DEBUG_CFLAGS = -g1 -P
+
+SHLIB_MAPFILES += $(srcdir)/config/ia64/libgcc-ia64.ver
+
+# Effectively disable the crtbegin/end rules using crtstuff.c
+T = disable
+
+LIB2ADDEH = $(srcdir)/config/ia64/unwind-ia64.c $(srcdir)/unwind-sjlj.c \
+  $(srcdir)/unwind-c.c
+
+ia64-c.o: $(srcdir)/config/ia64/ia64-c.c $(CONFIG_H) $(SYSTEM_H) \
+    coretypes.h $(TM_H) $(TREE_H) $(CPPLIB_H) $(C_COMMON_H) $(C_PRAGMA_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/ia64/ia64-c.c
+
+# genattrtab generates very long string literals.
+insn-attrtab.o-warn = -Wno-error
+
+ia64.o: debug.h $(PARAMS_H) sel-sched.h reload.h
diff --git a/gcc/config/ia64/t-vms b/gcc/config/ia64/t-vms
new file mode 100644
index 000000000..d77a2cf37
--- /dev/null
+++ b/gcc/config/ia64/t-vms
@@ -0,0 +1,49 @@
+# Copyright (C) 2009
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Enable the crtbegin/end rules disabled in t-ia64
+T =
+
+# VMS_EXTRA_PARTS is defined in x-vms and represent object files that
+# are only needed for VMS targets, but can only be compiled on a VMS host
+# (because they need DEC C).
+EXTRA_PARTS = $(VMS_EXTRA_PARTS) crtbegin.o crtbeginS.o crtend.o crtendS.o crtinitS.o
+
+CRTSTUFF_T_CFLAGS = -O0
+CRTSTUFF_T_CFLAGS_S = -O0
+
+$(T)crtinitS.o: $(srcdir)/config/ia64/vms-crtinit.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -I. -c -o $(T)crtinitS.o -x assembler-with-cpp \
+		$(srcdir)/config/ia64/vms-crtinit.asm
+
+LIB2ADDEH += $(srcdir)/config/ia64/fde-vms.c $(srcdir)/gthr-gnat.c
+
+# Shared library macros
+shlib_version:=$(shell echo $(BASEVER_c) | sed -e 's/\./,/' -e 's/\.//g')
+SHLIB_EXT = .exe
+SHLIB_OBJS = @shlib_objs@
+SHLIB_NAME = @shlib_base_name@.exe
+SHLIB_MULTILIB =
+SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(libsubdir)/$(SHLIB_ NAME)
+SHLIB_LINK = \
+  $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -nodefaultlibs \
+  -shared --for-linker=/noinform -o $(SHLIB_NAME) $(SHLIB_OBJS) \
+  --for-linker=$(srcdir)/config/ia64/VMS_SYMVEC_@shlib_base_name@.opt \
+  --for-linker=gsmatch=equal,$(shlib_version)
+
diff --git a/gcc/config/ia64/unwind-ia64.c b/gcc/config/ia64/unwind-ia64.c
new file mode 100644
index 000000000..f935a0ca3
--- /dev/null
+++ b/gcc/config/ia64/unwind-ia64.c
@@ -0,0 +1,2460 @@
+/* Subroutines needed for unwinding IA-64 standard format stack frame
+   info for exception handling.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006,
+   2009  Free Software Foundation, Inc.
+   Contributed by Andrew MacLeod  <amacleod@cygnus.com>
+	          Andrew Haley  <aph@cygnus.com>
+		  David Mosberger-Tang <davidm@hpl.hp.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "unwind.h"
+#include "unwind-ia64.h"
+#include "unwind-compat.h"
+#include "ia64intrin.h"
+
+/* This isn't thread safe, but nice for occasional tests.  */
+#undef ENABLE_MALLOC_CHECKING
+
+#ifndef __USING_SJLJ_EXCEPTIONS__
+
+
+/* By default, assume personality routine interface compatibility with
+   our expectations.  */
+#ifndef MD_UNW_COMPATIBLE_PERSONALITY_P
+#define MD_UNW_COMPATIBLE_PERSONALITY_P(HEADER) 1
+#endif
+
+enum unw_application_register
+{
+  UNW_AR_BSP,
+  UNW_AR_BSPSTORE,
+  UNW_AR_PFS,
+  UNW_AR_RNAT,
+  UNW_AR_UNAT,
+  UNW_AR_LC,
+  UNW_AR_EC,
+  UNW_AR_FPSR,
+  UNW_AR_RSC,
+  UNW_AR_CCV
+};
+
+enum unw_register_index
+{
+  /* Primary UNAT.  */
+  UNW_REG_PRI_UNAT_GR,
+  UNW_REG_PRI_UNAT_MEM,
+
+  /* Memory Stack.  */
+  UNW_REG_PSP,			/* previous memory stack pointer */
+
+  /* Register Stack.  */
+  UNW_REG_BSP,			/* register stack pointer */
+  UNW_REG_BSPSTORE,
+  UNW_REG_PFS,			/* previous function state */
+  UNW_REG_RNAT,
+  /* Return Pointer.  */
+  UNW_REG_RP,
+
+  /* Special preserved registers.  */
+  UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
+
+  /* Non-stacked general registers.  */
+  UNW_REG_R2,
+  UNW_REG_R4 = UNW_REG_R2 + 2,
+  UNW_REG_R7 = UNW_REG_R2 + 5,
+  UNW_REG_R31 = UNW_REG_R2 + 29,
+
+  /* Non-stacked floating point registers.  */
+  UNW_REG_F2,
+  UNW_REG_F5 = UNW_REG_F2 + 3,
+  UNW_REG_F16 = UNW_REG_F2 + 14,
+  UNW_REG_F31 = UNW_REG_F2 + 29,
+
+  /* Branch registers.  */
+  UNW_REG_B0, UNW_REG_B1,
+  UNW_REG_B5 = UNW_REG_B1 + 4,
+
+  UNW_NUM_REGS
+};
+
+enum unw_where
+{
+  UNW_WHERE_NONE,	/* register isn't saved at all */
+  UNW_WHERE_GR,		/* register is saved in a general register */
+  UNW_WHERE_FR,		/* register is saved in a floating-point register */
+  UNW_WHERE_BR,		/* register is saved in a branch register */
+  UNW_WHERE_SPREL,	/* register is saved on memstack (sp-relative) */
+  UNW_WHERE_PSPREL,	/* register is saved on memstack (psp-relative) */
+ 
+ /* At the end of each prologue these locations get resolved to
+     UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively.  */
+  UNW_WHERE_SPILL_HOME,	/* register is saved in its spill home */
+  UNW_WHERE_GR_SAVE	/* register is saved in next general register */
+};
+
+#define UNW_WHEN_NEVER  0x7fffffff
+
+struct unw_reg_info
+{
+  unsigned long val;		/* save location: register number or offset */
+  enum unw_where where;		/* where the register gets saved */
+  int when;			/* when the register gets saved */
+};
+
+struct unw_reg_state {
+	struct unw_reg_state *next;	/* next (outer) element on state stack */
+	struct unw_reg_info reg[UNW_NUM_REGS];	/* register save locations */
+};
+
+struct unw_labeled_state {
+	struct unw_labeled_state *next;		/* next labeled state (or NULL) */
+	unsigned long label;			/* label for this state */
+	struct unw_reg_state saved_state;
+};
+
+typedef struct unw_state_record
+{
+  unsigned int first_region : 1;	/* is this the first region? */
+  unsigned int done : 1;		/* are we done scanning descriptors? */
+  unsigned int any_spills : 1;		/* got any register spills? */
+  unsigned int in_body : 1;	/* are we inside a body? */
+  unsigned int no_reg_stack_frame : 1;	/* Don't adjust bsp for i&l regs */
+  unsigned char *imask;		/* imask of spill_mask record or NULL */
+  unsigned long pr_val;		/* predicate values */
+  unsigned long pr_mask;	/* predicate mask */
+  long spill_offset;		/* psp-relative offset for spill base */
+  int region_start;
+  int region_len;
+  int epilogue_start;
+  int epilogue_count;
+  int when_target;
+
+  unsigned char gr_save_loc;	/* next general register to use for saving */
+  unsigned char return_link_reg; /* branch register for return link */
+  unsigned short unwabi;
+
+  struct unw_labeled_state *labeled_states;	/* list of all labeled states */
+  struct unw_reg_state curr;	/* current state */
+
+  _Unwind_Personality_Fn personality;
+  
+} _Unwind_FrameState;
+
+enum unw_nat_type
+{
+  UNW_NAT_NONE,			/* NaT not represented */
+  UNW_NAT_VAL,			/* NaT represented by NaT value (fp reg) */
+  UNW_NAT_MEMSTK,		/* NaT value is in unat word at offset OFF  */
+  UNW_NAT_REGSTK		/* NaT is in rnat */
+};
+
+struct unw_stack
+{
+  unsigned long limit;
+  unsigned long top;
+};
+
+struct _Unwind_Context
+{
+  /* Initial frame info.  */
+  unsigned long rnat;		/* rse nat collection */
+  unsigned long regstk_top;	/* lowest address of rbs stored register
+				   which uses context->rnat collection */
+
+  /* Current frame info.  */
+  unsigned long bsp;		/* backing store pointer value
+				   corresponding to psp.  */
+  unsigned long sp;		/* stack pointer value */
+  unsigned long psp;		/* previous sp value */
+  unsigned long rp;		/* return pointer */
+  unsigned long pr;		/* predicate collection */
+
+  unsigned long region_start;	/* start of unwind region */
+  unsigned long gp;		/* global pointer value */
+  void *lsda;			/* language specific data area */
+
+  /* Preserved state.  */
+  unsigned long *bsp_loc;	/* previous bsp save location
+  				   Appears to be write-only?	*/
+  unsigned long *bspstore_loc;
+  unsigned long *pfs_loc;	/* Save location for pfs in current
+  				   (corr. to sp) frame.  Target
+  				   contains cfm for caller.	*/
+  unsigned long *signal_pfs_loc;/* Save location for pfs in current
+				   signal frame.  Target contains
+				   pfs for caller.  */
+  unsigned long *pri_unat_loc;
+  unsigned long *unat_loc;
+  unsigned long *lc_loc;
+  unsigned long *fpsr_loc;
+
+  unsigned long eh_data[4];
+
+  struct unw_ireg
+  {
+    unsigned long *loc;
+    struct unw_ireg_nat
+    {
+      enum unw_nat_type type : 3;
+      signed long off : 61;		/* NaT word is at loc+nat.off */
+    } nat;
+  } ireg[32 - 2];	/* Indexed by <register number> - 2 */
+
+  unsigned long *br_loc[8];
+  void *fr_loc[32 - 2];
+
+  /* ??? We initially point pri_unat_loc here.  The entire NAT bit
+     logic needs work.  */
+  unsigned long initial_unat;
+};
+
+typedef unsigned long unw_word;
+
+/* Implicit register save order.  See section 11.4.2.3 Rules for Using
+   Unwind Descriptors, rule 3.  */
+
+static unsigned char const save_order[] =
+{
+  UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
+  UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
+};
+
+
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
+/* MASK is a bitmap describing the allocation state of emergency buffers,
+   with bit set indicating free. Return >= 0 if allocation is successful;
+   < 0 if failure.  */
+
+static inline int
+atomic_alloc (unsigned int *mask)
+{
+  unsigned int old = *mask, ret, new;
+
+  while (1)
+    {
+      if (old == 0)
+	return -1;
+      ret = old & -old;
+      new = old & ~ret;
+      new = __sync_val_compare_and_swap (mask, old, new);
+      if (old == new)
+	break;
+      old = new;
+    }
+
+  return __builtin_ffs (ret) - 1;
+}
+
+/* Similarly, free an emergency buffer.  */
+
+static inline void
+atomic_free (unsigned int *mask, int bit)
+{
+  __sync_xor_and_fetch (mask, 1 << bit);
+}
+
+
+#define SIZE(X)		(sizeof(X) / sizeof(*(X)))
+#define MASK_FOR(X)	((2U << (SIZE (X) - 1)) - 1)
+#define PTR_IN(X, P)	((P) >= (X) && (P) < (X) + SIZE (X))
+
+static struct unw_reg_state emergency_reg_state[32];
+static unsigned int emergency_reg_state_free = MASK_FOR (emergency_reg_state);
+
+static struct unw_labeled_state emergency_labeled_state[8];
+static unsigned int emergency_labeled_state_free = MASK_FOR (emergency_labeled_state);
+
+#ifdef ENABLE_MALLOC_CHECKING
+static int reg_state_alloced;
+static int labeled_state_alloced;
+#endif
+
+/* Allocation and deallocation of structures.  */
+
+static struct unw_reg_state *
+alloc_reg_state (void)
+{
+  struct unw_reg_state *rs;
+
+#ifdef ENABLE_MALLOC_CHECKING
+  reg_state_alloced++;
+#endif
+
+  rs = malloc (sizeof (struct unw_reg_state));
+  if (!rs)
+    {
+      int n = atomic_alloc (&emergency_reg_state_free);
+      if (n >= 0)
+	rs = &emergency_reg_state[n];
+    }
+
+  return rs;
+}
+
+static void
+free_reg_state (struct unw_reg_state *rs)
+{
+#ifdef ENABLE_MALLOC_CHECKING
+  reg_state_alloced--;
+#endif
+
+  if (PTR_IN (emergency_reg_state, rs))
+    atomic_free (&emergency_reg_state_free, rs - emergency_reg_state);
+  else
+    free (rs);
+}
+
+static struct unw_labeled_state *
+alloc_label_state (void)
+{
+  struct unw_labeled_state *ls;
+
+#ifdef ENABLE_MALLOC_CHECKING
+  labeled_state_alloced++;
+#endif
+
+  ls = malloc(sizeof(struct unw_labeled_state));
+  if (!ls)
+    {
+      int n = atomic_alloc (&emergency_labeled_state_free);
+      if (n >= 0)
+	ls = &emergency_labeled_state[n];
+    }
+
+  return ls;
+}
+
+static void
+free_label_state (struct unw_labeled_state *ls)
+{
+#ifdef ENABLE_MALLOC_CHECKING
+  labeled_state_alloced--;
+#endif
+
+  if (PTR_IN (emergency_labeled_state, ls))
+    atomic_free (&emergency_labeled_state_free, emergency_labeled_state - ls);
+  else
+    free (ls);
+}
+
+/* Routines to manipulate the state stack.  */
+
+static void
+push (struct unw_state_record *sr)
+{
+  struct unw_reg_state *rs = alloc_reg_state ();
+  memcpy (rs, &sr->curr, sizeof (*rs));
+  sr->curr.next = rs;
+}
+
+static void
+pop (struct unw_state_record *sr)
+{
+  struct unw_reg_state *rs = sr->curr.next;
+
+  if (!rs)
+    abort ();
+  memcpy (&sr->curr, rs, sizeof(*rs));
+  free_reg_state (rs);
+}
+
+/* Make a copy of the state stack.  Non-recursive to avoid stack overflows.  */
+
+static struct unw_reg_state *
+dup_state_stack (struct unw_reg_state *rs)
+{
+  struct unw_reg_state *copy, *prev = NULL, *first = NULL;
+
+  while (rs)
+    {
+      copy = alloc_reg_state ();
+      memcpy (copy, rs, sizeof(*copy));
+      if (first)
+	prev->next = copy;
+      else
+	first = copy;
+      rs = rs->next;
+      prev = copy;
+    }
+
+  return first;
+}
+
+/* Free all stacked register states (but not RS itself).  */
+static void
+free_state_stack (struct unw_reg_state *rs)
+{
+  struct unw_reg_state *p, *next;
+
+  for (p = rs->next; p != NULL; p = next)
+    {
+      next = p->next;
+      free_reg_state (p);
+    }
+  rs->next = NULL;
+}
+
+/* Free all labeled states.  */
+
+static void
+free_label_states (struct unw_labeled_state *ls)
+{
+  struct unw_labeled_state *next;
+
+  for (; ls ; ls = next)
+    {
+      next = ls->next;
+
+      free_state_stack (&ls->saved_state);
+      free_label_state (ls);
+    }
+}
+
+/* Unwind decoder routines */
+
+static enum unw_register_index __attribute__((const))
+decode_abreg (unsigned char abreg, int memory)
+{
+  switch (abreg)
+    {
+#if TARGET_ABI_OPEN_VMS
+    /* OpenVMS Calling Standard specifies R3 - R31.  */
+    case 0x03 ... 0x1f: return UNW_REG_R2 + (abreg - 0x02);
+#else
+    /* Standard Intel ABI specifies GR 4 - 7.  */
+    case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
+#endif
+    case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
+    case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
+    case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
+    case 0x60: return UNW_REG_PR;
+    case 0x61: return UNW_REG_PSP;
+    case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR;
+    case 0x63: return UNW_REG_RP;
+    case 0x64: return UNW_REG_BSP;
+    case 0x65: return UNW_REG_BSPSTORE;
+    case 0x66: return UNW_REG_RNAT;
+    case 0x67: return UNW_REG_UNAT;
+    case 0x68: return UNW_REG_FPSR;
+    case 0x69: return UNW_REG_PFS;
+    case 0x6a: return UNW_REG_LC;
+    default:
+      abort ();
+  }
+}
+
+static void
+set_reg (struct unw_reg_info *reg, enum unw_where where,
+	 int when, unsigned long val)
+{
+  reg->val = val;
+  reg->where = where;
+  if (reg->when == UNW_WHEN_NEVER)
+    reg->when = when;
+}
+
+static void
+alloc_spill_area (unsigned long *offp, unsigned long regsize,
+		  struct unw_reg_info *lo, struct unw_reg_info *hi)
+{
+  struct unw_reg_info *reg;
+
+  for (reg = hi; reg >= lo; --reg)
+    {
+      if (reg->where == UNW_WHERE_SPILL_HOME)
+	{
+	  reg->where = UNW_WHERE_PSPREL;
+	  *offp -= regsize;
+	  reg->val = *offp;
+	}
+    }
+}
+
+static inline void
+spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim,
+		 unw_word t)
+{
+  struct unw_reg_info *reg;
+
+  for (reg = *regp; reg <= lim; ++reg)
+    {
+      if (reg->where == UNW_WHERE_SPILL_HOME)
+	{
+	  reg->when = t;
+	  *regp = reg + 1;
+	  return;
+	}
+    }
+  /* Excess spill.  */
+  abort ();
+}
+
+static void
+finish_prologue (struct unw_state_record *sr)
+{
+  struct unw_reg_info *reg;
+  unsigned long off;
+  int i;
+
+  /* First, resolve implicit register save locations
+     (see Section "11.4.2.3 Rules for Using Unwind Descriptors", rule 3).  */
+
+  for (i = 0; i < (int) sizeof (save_order); ++i)
+    {
+      reg = sr->curr.reg + save_order[i];
+      if (reg->where == UNW_WHERE_GR_SAVE)
+	{
+	  reg->where = UNW_WHERE_GR;
+	  reg->val = sr->gr_save_loc++;
+	}
+    }
+
+  /* Next, compute when the fp, general, and branch registers get saved.
+     This must come before alloc_spill_area() because we need to know
+     which registers are spilled to their home locations.  */
+  if (sr->imask)
+    {
+      static unsigned char const limit[3] = {
+	UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
+      };
+
+      unsigned char kind, mask = 0, *cp = sr->imask;
+      int t;
+      struct unw_reg_info *(regs[3]);
+
+      regs[0] = sr->curr.reg + UNW_REG_F2;
+      regs[1] = sr->curr.reg + UNW_REG_R4;
+      regs[2] = sr->curr.reg + UNW_REG_B1;
+
+      for (t = 0; t < sr->region_len; ++t)
+	{
+	  if ((t & 3) == 0)
+	    mask = *cp++;
+	  kind = (mask >> 2*(3-(t & 3))) & 3;
+	  if (kind > 0)
+	    spill_next_when (&regs[kind - 1], sr->curr.reg + limit[kind - 1],
+			     sr->region_start + t);
+	}
+    }
+
+  /* Next, lay out the memory stack spill area.  */
+  if (sr->any_spills)
+    {
+      off = sr->spill_offset;
+      alloc_spill_area (&off, 16, sr->curr.reg + UNW_REG_F2,
+		        sr->curr.reg + UNW_REG_F31); 
+      alloc_spill_area (&off,  8, sr->curr.reg + UNW_REG_B1,
+		        sr->curr.reg + UNW_REG_B5);
+      alloc_spill_area (&off,  8, sr->curr.reg + UNW_REG_R4,
+		        sr->curr.reg + UNW_REG_R7);
+    }
+}
+
+/*
+ * Region header descriptors.
+ */
+
+static void
+desc_prologue (int body, unw_word rlen, unsigned char mask,
+	       unsigned char grsave, struct unw_state_record *sr)
+{
+  int i;
+
+  if (!(sr->in_body || sr->first_region))
+    finish_prologue (sr);
+  sr->first_region = 0;
+
+  /* Check if we're done.  */
+  if (sr->when_target < sr->region_start + sr->region_len) 
+    {
+      sr->done = 1;
+      return;
+    }
+
+  for (i = 0; i < sr->epilogue_count; ++i)
+    pop (sr);
+
+  sr->epilogue_count = 0;
+  sr->epilogue_start = UNW_WHEN_NEVER;
+
+  if (!body)
+    push (sr);
+
+  sr->region_start += sr->region_len;
+  sr->region_len = rlen;
+  sr->in_body = body;
+
+  if (!body)
+    {
+      for (i = 0; i < 4; ++i)
+	{
+	  if (mask & 0x8)
+	    set_reg (sr->curr.reg + save_order[i], UNW_WHERE_GR,
+		     sr->region_start + sr->region_len - 1, grsave++);
+	  mask <<= 1;
+	}
+      sr->gr_save_loc = grsave;
+      sr->any_spills = 0;
+      sr->imask = 0;
+      sr->spill_offset = 0x10;	/* default to psp+16 */
+    }
+}
+
+/*
+ * Prologue descriptors.
+ */
+
+static inline void
+desc_abi (unsigned char abi,
+	  unsigned char context,
+	  struct unw_state_record *sr)
+{
+  sr->unwabi = (abi << 8) | context;
+}
+
+static inline void
+desc_br_gr (unsigned char brmask, unsigned char gr,
+	    struct unw_state_record *sr)
+{
+  int i;
+
+  for (i = 0; i < 5; ++i)
+    {
+      if (brmask & 1)
+	set_reg (sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
+		 sr->region_start + sr->region_len - 1, gr++);
+      brmask >>= 1;
+    }
+}
+
+static inline void
+desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
+{
+  int i;
+
+  for (i = 0; i < 5; ++i)
+    {
+      if (brmask & 1)
+	{
+	  set_reg (sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME,
+		   sr->region_start + sr->region_len - 1, 0);
+	  sr->any_spills = 1;
+	}
+      brmask >>= 1;
+    }
+}
+
+static inline void
+desc_frgr_mem (unsigned char grmask, unw_word frmask,
+	       struct unw_state_record *sr)
+{
+  int i;
+
+  for (i = 0; i < 4; ++i)
+    {
+      if ((grmask & 1) != 0)
+	{
+	  set_reg (sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
+		   sr->region_start + sr->region_len - 1, 0);
+	  sr->any_spills = 1;
+	}
+      grmask >>= 1;
+    }
+  for (i = 0; i < 20; ++i)
+    {
+      if ((frmask & 1) != 0)
+	{
+	  enum unw_register_index base = i < 4 ? UNW_REG_F2 : UNW_REG_F16 - 4;
+	  set_reg (sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
+		   sr->region_start + sr->region_len - 1, 0);
+	  sr->any_spills = 1;
+	}
+      frmask >>= 1;
+    }
+}
+
+static inline void
+desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
+{
+  int i;
+
+  for (i = 0; i < 4; ++i)
+    {
+      if ((frmask & 1) != 0)
+	{
+	  set_reg (sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME,
+		   sr->region_start + sr->region_len - 1, 0);
+	  sr->any_spills = 1;
+	}
+      frmask >>= 1;
+    }
+}
+
+static inline void
+desc_gr_gr (unsigned char grmask, unsigned char gr,
+	    struct unw_state_record *sr)
+{
+  int i;
+
+  for (i = 0; i < 4; ++i)
+    {
+      if ((grmask & 1) != 0)
+	set_reg (sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
+		 sr->region_start + sr->region_len - 1, gr++);
+      grmask >>= 1;
+    }
+}
+
+static inline void
+desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
+{
+  int i;
+
+  for (i = 0; i < 4; ++i)
+    {
+      if ((grmask & 1) != 0)
+	{
+	  set_reg (sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
+		   sr->region_start + sr->region_len - 1, 0);
+	  sr->any_spills = 1;
+	}
+      grmask >>= 1;
+    }
+}
+
+static inline void
+desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
+{
+  set_reg (sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
+	   sr->region_start + MIN ((int)t, sr->region_len - 1), 16*size);
+}
+
+static inline void
+desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
+{
+  sr->curr.reg[UNW_REG_PSP].when
+    = sr->region_start + MIN ((int)t, sr->region_len - 1);
+}
+
+static inline void
+desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
+{
+  set_reg (sr->curr.reg + reg, UNW_WHERE_GR,
+	   sr->region_start + sr->region_len - 1, dst);
+}
+
+static inline void
+desc_reg_psprel (unsigned char reg, unw_word pspoff,
+		 struct unw_state_record *sr)
+{
+  set_reg (sr->curr.reg + reg, UNW_WHERE_PSPREL,
+	   sr->region_start + sr->region_len - 1,
+	   0x10 - 4*pspoff);
+}
+
+static inline void
+desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
+{
+  set_reg (sr->curr.reg + reg, UNW_WHERE_SPREL,
+	   sr->region_start + sr->region_len - 1,
+	   4*spoff);
+}
+
+static inline void
+desc_rp_br (unsigned char dst, struct unw_state_record *sr)
+{
+  sr->return_link_reg = dst;
+}
+
+static inline void
+desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
+{
+  struct unw_reg_info *reg = sr->curr.reg + regnum;
+
+  if (reg->where == UNW_WHERE_NONE)
+    reg->where = UNW_WHERE_GR_SAVE;
+  reg->when = sr->region_start + MIN ((int)t, sr->region_len - 1);
+}
+
+static inline void
+desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
+{
+  sr->spill_offset = 0x10 - 4*pspoff;
+}
+
+static inline unsigned char *
+desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
+{
+  sr->imask = imaskp;
+  return imaskp + (2*sr->region_len + 7)/8;
+}
+
+/*
+ * Body descriptors.
+ */
+static inline void
+desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
+{
+  sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
+  sr->epilogue_count = ecount + 1;
+}
+
+static inline void
+desc_copy_state (unw_word label, struct unw_state_record *sr)
+{
+  struct unw_labeled_state *ls;
+
+  for (ls = sr->labeled_states; ls; ls = ls->next)
+    {
+      if (ls->label == label)
+        {
+	  free_state_stack (&sr->curr);
+   	  memcpy (&sr->curr, &ls->saved_state, sizeof (sr->curr));
+	  sr->curr.next = dup_state_stack (ls->saved_state.next);
+	  return;
+	}
+    }
+  abort ();
+}
+
+static inline void
+desc_label_state (unw_word label, struct unw_state_record *sr)
+{
+  struct unw_labeled_state *ls = alloc_label_state ();
+
+  ls->label = label;
+  memcpy (&ls->saved_state, &sr->curr, sizeof (ls->saved_state));
+  ls->saved_state.next = dup_state_stack (sr->curr.next);
+
+  /* Insert into list of labeled states.  */
+  ls->next = sr->labeled_states;
+  sr->labeled_states = ls;
+}
+
+/*
+ * General descriptors.
+ */
+
+static inline int
+desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
+{
+  if (sr->when_target <= sr->region_start + MIN ((int)t, sr->region_len - 1))
+    return 0;
+  if (qp > 0)
+    {
+      if ((sr->pr_val & (1UL << qp)) == 0) 
+	return 0;
+      sr->pr_mask |= (1UL << qp);
+    }
+  return 1;
+}
+
+static inline void
+desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg,
+		struct unw_state_record *sr)
+{
+  struct unw_reg_info *r;
+
+  if (! desc_is_active (qp, t, sr))
+    return;
+
+  r = sr->curr.reg + decode_abreg (abreg, 0);
+  r->where = UNW_WHERE_NONE;
+  r->when = sr->region_start + MIN ((int)t, sr->region_len - 1);
+  r->val = 0;
+}
+
+static inline void
+desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg,
+		  unsigned char x, unsigned char ytreg,
+		  struct unw_state_record *sr)
+{
+  enum unw_where where = UNW_WHERE_GR;
+  struct unw_reg_info *r;
+
+  if (! desc_is_active (qp, t, sr))
+    return;
+
+  if (x)
+    where = UNW_WHERE_BR;
+  else if (ytreg & 0x80)
+    where = UNW_WHERE_FR;
+
+  r = sr->curr.reg + decode_abreg (abreg, 0);
+  r->where = where;
+  r->when = sr->region_start + MIN ((int)t, sr->region_len - 1);
+  r->val = ytreg & 0x7f;
+}
+
+static inline void
+desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg,
+		     unw_word pspoff, struct unw_state_record *sr)
+{
+  struct unw_reg_info *r;
+
+  if (! desc_is_active (qp, t, sr))
+    return;
+
+  r = sr->curr.reg + decode_abreg (abreg, 1);
+  r->where = UNW_WHERE_PSPREL;
+  r->when = sr->region_start + MIN((int)t, sr->region_len - 1);
+  r->val = 0x10 - 4*pspoff;
+}
+
+static inline void
+desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg,
+		    unw_word spoff, struct unw_state_record *sr)
+{
+  struct unw_reg_info *r;
+
+  if (! desc_is_active (qp, t, sr))
+    return;
+
+  r = sr->curr.reg + decode_abreg (abreg, 1);
+  r->where = UNW_WHERE_SPREL;
+  r->when = sr->region_start + MIN ((int)t, sr->region_len - 1);
+  r->val = 4*spoff;
+}
+
+
+#define UNW_DEC_BAD_CODE(code)			abort ();
+
+/* Region headers.  */
+#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg)	desc_prologue(0,r,m,gr,arg)
+#define UNW_DEC_PROLOGUE(fmt,b,r,arg)		desc_prologue(b,r,0,32,arg)
+
+/* Prologue descriptors.  */
+#define UNW_DEC_ABI(fmt,a,c,arg)		desc_abi(a,c,arg)
+#define UNW_DEC_BR_GR(fmt,b,g,arg)		desc_br_gr(b,g,arg)
+#define UNW_DEC_BR_MEM(fmt,b,arg)		desc_br_mem(b,arg)
+#define UNW_DEC_FRGR_MEM(fmt,g,f,arg)		desc_frgr_mem(g,f,arg)
+#define UNW_DEC_FR_MEM(fmt,f,arg)		desc_fr_mem(f,arg)
+#define UNW_DEC_GR_GR(fmt,m,g,arg)		desc_gr_gr(m,g,arg)
+#define UNW_DEC_GR_MEM(fmt,m,arg)		desc_gr_mem(m,arg)
+#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg)	desc_mem_stack_f(t,s,arg)
+#define UNW_DEC_MEM_STACK_V(fmt,t,arg)		desc_mem_stack_v(t,arg)
+#define UNW_DEC_REG_GR(fmt,r,d,arg)		desc_reg_gr(r,d,arg)
+#define UNW_DEC_REG_PSPREL(fmt,r,o,arg)		desc_reg_psprel(r,o,arg)
+#define UNW_DEC_REG_SPREL(fmt,r,o,arg)		desc_reg_sprel(r,o,arg)
+#define UNW_DEC_REG_WHEN(fmt,r,t,arg)		desc_reg_when(r,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)	desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
+#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)	desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
+#define UNW_DEC_PRIUNAT_GR(fmt,r,arg)		desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
+#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg)	desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg)	desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
+#define UNW_DEC_RP_BR(fmt,d,arg)		desc_rp_br(d,arg)
+#define UNW_DEC_SPILL_BASE(fmt,o,arg)		desc_spill_base(o,arg)
+#define UNW_DEC_SPILL_MASK(fmt,m,arg)		(m = desc_spill_mask(m,arg))
+
+/* Body descriptors.  */
+#define UNW_DEC_EPILOGUE(fmt,t,c,arg)		desc_epilogue(t,c,arg)
+#define UNW_DEC_COPY_STATE(fmt,l,arg)		desc_copy_state(l,arg)
+#define UNW_DEC_LABEL_STATE(fmt,l,arg)		desc_label_state(l,arg)
+
+/* General unwind descriptors.  */
+#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg)	desc_spill_reg_p(p,t,a,x,y,arg)
+#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg)	desc_spill_reg_p(0,t,a,x,y,arg)
+#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg)	desc_spill_psprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg)	desc_spill_psprel_p(0,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg)	desc_spill_sprel_p(p,t,a,o,arg)
+#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg)	desc_spill_sprel_p(0,t,a,o,arg)
+#define UNW_DEC_RESTORE_P(f,p,t,a,arg)		desc_restore_p(p,t,a,arg)
+#define UNW_DEC_RESTORE(f,t,a,arg)		desc_restore_p(0,t,a,arg)
+
+
+/*
+ * Generic IA-64 unwind info decoder.
+ *
+ * This file is used both by the Linux kernel and objdump.  Please keep
+ * the copies of this file in sync.
+ *
+ * You need to customize the decoder by defining the following
+ * macros/constants before including this file:
+ *
+ *  Types:
+ *	unw_word	Unsigned integer type with at least 64 bits 
+ *
+ *  Register names:
+ *	UNW_REG_BSP
+ *	UNW_REG_BSPSTORE
+ *	UNW_REG_FPSR
+ *	UNW_REG_LC
+ *	UNW_REG_PFS
+ *	UNW_REG_PR
+ *	UNW_REG_RNAT
+ *	UNW_REG_PSP
+ *	UNW_REG_RP
+ *	UNW_REG_UNAT
+ *
+ *  Decoder action macros:
+ *	UNW_DEC_BAD_CODE(code)
+ *	UNW_DEC_ABI(fmt,abi,context,arg)
+ *	UNW_DEC_BR_GR(fmt,brmask,gr,arg)
+ *	UNW_DEC_BR_MEM(fmt,brmask,arg)
+ *	UNW_DEC_COPY_STATE(fmt,label,arg)
+ *	UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
+ *	UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
+ *	UNW_DEC_FR_MEM(fmt,frmask,arg)
+ *	UNW_DEC_GR_GR(fmt,grmask,gr,arg)
+ *	UNW_DEC_GR_MEM(fmt,grmask,arg)
+ *	UNW_DEC_LABEL_STATE(fmt,label,arg)
+ *	UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
+ *	UNW_DEC_MEM_STACK_V(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_GR(fmt,r,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
+ *	UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
+ *	UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
+ *	UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
+ *	UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
+ *	UNW_DEC_REG_REG(fmt,src,dst,arg)
+ *	UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
+ *	UNW_DEC_REG_WHEN(fmt,reg,t,arg)
+ *	UNW_DEC_RESTORE(fmt,t,abreg,arg)
+ *	UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
+ *	UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
+ *	UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
+ *	UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
+ *	UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ *	UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
+ *	UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
+ *	UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
+ *	UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
+ */
+
+static unw_word
+unw_decode_uleb128 (unsigned char **dpp)
+{
+  unsigned shift = 0;
+  unw_word byte, result = 0;
+  unsigned char *bp = *dpp;
+
+  while (1)
+    {
+      byte = *bp++;
+      result |= (byte & 0x7f) << shift;
+      if ((byte & 0x80) == 0)
+	break;
+      shift += 7;
+    }
+  *dpp = bp;
+  return result;
+}
+
+static unsigned char *
+unw_decode_x1 (unsigned char *dp,
+	       unsigned char code __attribute__((unused)),
+	       void *arg)
+{
+  unsigned char byte1, abreg;
+  unw_word t, off;
+
+  byte1 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  if (byte1 & 0x80)
+	  UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
+  else
+	  UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x2 (unsigned char *dp,
+	       unsigned char code __attribute__((unused)),
+	       void *arg)
+{
+  unsigned char byte1, byte2, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  abreg = (byte1 & 0x7f);
+  ytreg = byte2;
+  x = (byte1 >> 7) & 1;
+  if ((byte1 & 0x80) == 0 && ytreg == 0)
+    UNW_DEC_RESTORE(X2, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x3 (unsigned char *dp,
+	       unsigned char code __attribute__((unused)),
+	       void *arg)
+{
+  unsigned char byte1, byte2, abreg, qp;
+  unw_word t, off;
+
+  byte1 = *dp++; byte2 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+  off = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+
+  if (byte1 & 0x80)
+    UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
+  else
+    UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_x4 (unsigned char *dp,
+	       unsigned char code __attribute__((unused)),
+	       void *arg)
+{
+  unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
+  unw_word t;
+
+  byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+  t = unw_decode_uleb128 (&dp);
+
+  qp = (byte1 & 0x3f);
+  abreg = (byte2 & 0x7f);
+  x = (byte2 >> 7) & 1;
+  ytreg = byte3;
+
+  if ((byte2 & 0x80) == 0 && byte3 == 0)
+    UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
+  else
+    UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int body = (code & 0x20) != 0;
+  unw_word rlen;
+
+  rlen = (code & 0x1f);
+  UNW_DEC_PROLOGUE(R1, body, rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char byte1, mask, grsave;
+  unw_word rlen;
+
+  byte1 = *dp++;
+
+  mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+  grsave = (byte1 & 0x7f);
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word rlen;
+
+  rlen = unw_decode_uleb128 (&dp);
+  UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char brmask = (code & 0x1f);
+
+  UNW_DEC_BR_MEM(P1, brmask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
+{
+  if ((code & 0x10) == 0)
+    {
+      unsigned char byte1 = *dp++;
+
+      UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
+		    (byte1 & 0x7f), arg);
+    }
+  else if ((code & 0x08) == 0)
+    {
+      unsigned char byte1 = *dp++, r, dst;
+
+      r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
+      dst = (byte1 & 0x7f);
+      switch (r)
+	{
+	case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
+	case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
+	case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
+	case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
+	case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
+	case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
+	case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
+	case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
+	case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
+	case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
+	case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
+	case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
+	default: UNW_DEC_BAD_CODE(r); break;
+	}
+    }
+  else if ((code & 0x7) == 0)
+    UNW_DEC_SPILL_MASK(P4, dp, arg);
+  else if ((code & 0x7) == 1)
+    {
+      unw_word grmask, frmask, byte1, byte2, byte3;
+
+      byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
+      grmask = ((byte1 >> 4) & 0xf);
+      frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
+      UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
+    }
+  else
+    UNW_DEC_BAD_CODE(code);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
+{
+  int gregs = (code & 0x10) != 0;
+  unsigned char mask = (code & 0x0f);
+
+  if (gregs)
+    UNW_DEC_GR_MEM(P6, mask, arg);
+  else
+    UNW_DEC_FR_MEM(P6, mask, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unsigned char r, byte1, byte2;
+  unw_word t, size;
+
+  if ((code & 0x10) == 0)
+    {
+      r = (code & 0xf);
+      t = unw_decode_uleb128 (&dp);
+      switch (r)
+	{
+	case 0:
+	  size = unw_decode_uleb128 (&dp);
+	  UNW_DEC_MEM_STACK_F(P7, t, size, arg);
+	  break;
+
+	case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
+	case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
+	case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
+	case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
+	case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
+	case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
+	case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
+	case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
+	case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
+	case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
+	case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
+	case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
+	case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
+	case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
+	case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
+	default: UNW_DEC_BAD_CODE(r); break;
+	}
+    }
+  else
+    {
+      switch (code & 0xf)
+	{
+	case 0x0: /* p8 */
+	  {
+	    r = *dp++;
+	    t = unw_decode_uleb128 (&dp);
+	    switch (r)
+	      {
+	      case  1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
+	      case  2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
+	      case  3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
+	      case  4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
+	      case  5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
+	      case  6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
+	      case  7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
+	      case  8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
+	      case  9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
+	      case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
+	      case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
+	      case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
+	      case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
+	      case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
+	      case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
+	      case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
+	      case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
+	      default: UNW_DEC_BAD_CODE(r); break;
+	    }
+	  }
+	  break;
+
+	case 0x1:
+	  byte1 = *dp++; byte2 = *dp++;
+	  UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
+	  break;
+
+	case 0xf: /* p10 */
+	  byte1 = *dp++; byte2 = *dp++;
+	  UNW_DEC_ABI(P10, byte1, byte2, arg);
+	  break;
+
+	case 0x9:
+	  return unw_decode_x1 (dp, code, arg);
+
+	case 0xa:
+	  return unw_decode_x2 (dp, code, arg);
+
+	case 0xb:
+	  return unw_decode_x3 (dp, code, arg);
+
+	case 0xc:
+	  return unw_decode_x4 (dp, code, arg);
+
+	default:
+	  UNW_DEC_BAD_CODE(code);
+	  break;
+	}
+    }
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word label = (code & 0x1f);
+
+  if ((code & 0x20) != 0)
+    UNW_DEC_COPY_STATE(B1, label, arg);
+  else
+    UNW_DEC_LABEL_STATE(B1, label, arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t;
+
+  t = unw_decode_uleb128 (&dp);
+  UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
+  return dp;
+}
+
+static unsigned char *
+unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
+{
+  unw_word t, ecount, label;
+
+  if ((code & 0x10) == 0)
+    {
+      t = unw_decode_uleb128 (&dp);
+      ecount = unw_decode_uleb128 (&dp);
+      UNW_DEC_EPILOGUE(B3, t, ecount, arg);
+    }
+  else if ((code & 0x07) == 0)
+    {
+      label = unw_decode_uleb128 (&dp);
+      if ((code & 0x08) != 0)
+	UNW_DEC_COPY_STATE(B4, label, arg);
+      else
+	UNW_DEC_LABEL_STATE(B4, label, arg);
+    }
+  else
+    switch (code & 0x7)
+      {
+      case 1: return unw_decode_x1 (dp, code, arg);
+      case 2: return unw_decode_x2 (dp, code, arg);
+      case 3: return unw_decode_x3 (dp, code, arg);
+      case 4: return unw_decode_x4 (dp, code, arg);
+      default: UNW_DEC_BAD_CODE(code); break;
+      }
+  return dp;
+}
+
+typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
+
+static const unw_decoder unw_decode_table[2][8] =
+{
+  /* prologue table: */
+  {
+    unw_decode_r1,	/* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_p1,	/* 4 */
+    unw_decode_p2_p5,
+    unw_decode_p6,
+    unw_decode_p7_p10
+  },
+  {
+    unw_decode_r1,	/* 0 */
+    unw_decode_r1,
+    unw_decode_r2,
+    unw_decode_r3,
+    unw_decode_b1,	/* 4 */
+    unw_decode_b1,
+    unw_decode_b2,
+    unw_decode_b3_x4
+  }
+};
+
+/*
+ * Decode one descriptor and return address of next descriptor.
+ */
+static inline unsigned char *
+unw_decode (unsigned char *dp, int inside_body, void *arg)
+{
+  unw_decoder decoder;
+  unsigned char code;
+
+  code = *dp++;
+  decoder = unw_decode_table[inside_body][code >> 5];
+  dp = (*decoder) (dp, code, arg);
+  return dp;
+}
+
+
+/* RSE helper functions.  */
+
+static inline unsigned long
+ia64_rse_slot_num (unsigned long *addr)
+{
+  return (((unsigned long) addr) >> 3) & 0x3f;
+}
+
+/* Return TRUE if ADDR is the address of an RNAT slot.  */
+static inline unsigned long
+ia64_rse_is_rnat_slot (unsigned long *addr)
+{
+  return ia64_rse_slot_num (addr) == 0x3f;
+}
+
+/* Returns the address of the RNAT slot that covers the slot at
+   address SLOT_ADDR.  */
+static inline unsigned long *
+ia64_rse_rnat_addr (unsigned long *slot_addr)
+{
+  return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3));
+}
+
+/* Calculate the number of registers in the dirty partition starting at
+   BSPSTORE with a size of DIRTY bytes.  This isn't simply DIRTY
+   divided by eight because the 64th slot is used to store ar.rnat.  */
+static inline unsigned long
+ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp)
+{
+  unsigned long slots = (bsp - bspstore);
+
+  return slots - (ia64_rse_slot_num (bspstore) + slots)/0x40;
+}
+
+/* The inverse of the above: given bspstore and the number of
+   registers, calculate ar.bsp.  */
+static inline unsigned long *
+ia64_rse_skip_regs (unsigned long *addr, long num_regs)
+{
+  long delta = ia64_rse_slot_num (addr) + num_regs;
+
+  if (num_regs < 0)
+    delta -= 0x3e;
+  return addr + num_regs + delta/0x3f;
+}
+
+
+/* Copy register backing store from SRC to DST, LEN words
+   (which include both saved registers and nat collections).
+   DST_RNAT is a partial nat collection for DST.  SRC and DST
+   don't have to be equal modulo 64 slots, so it cannot be
+   done with a simple memcpy as the nat collections will be
+   at different relative offsets and need to be combined together.  */
+static void
+ia64_copy_rbs (struct _Unwind_Context *info, unsigned long dst,
+               unsigned long src, long len, unsigned long dst_rnat)
+{
+  long count;
+  unsigned long src_rnat;
+  unsigned long shift1, shift2;
+
+  len <<= 3;
+  dst_rnat &= (1UL << ((dst >> 3) & 0x3f)) - 1;
+  src_rnat = src >= info->regstk_top
+	     ? info->rnat : *(unsigned long *) (src | 0x1f8);
+  src_rnat &= ~((1UL << ((src >> 3) & 0x3f)) - 1);
+  /* Just to make sure.  */
+  src_rnat &= ~(1UL << 63);
+  shift1 = ((dst - src) >> 3) & 0x3f;
+  if ((dst & 0x1f8) < (src & 0x1f8))
+    shift1--;
+  shift2 = 0x3f - shift1;
+  if ((dst & 0x1f8) >= (src & 0x1f8))
+    {
+      count = ~dst & 0x1f8;
+      goto first;
+    }
+  count = ~src & 0x1f8;
+  goto second;
+  while (len > 0)
+    {
+      src_rnat = src >= info->regstk_top
+		 ? info->rnat : *(unsigned long *) (src | 0x1f8);
+      /* Just to make sure.  */
+      src_rnat &= ~(1UL << 63);
+      count = shift2 << 3;
+first:
+      if (count > len)
+        count = len;
+      memcpy ((char *) dst, (char *) src, count);
+      dst += count;
+      src += count;
+      len -= count;
+      dst_rnat |= (src_rnat << shift1) & ~(1UL << 63);
+      if (len <= 0)
+        break;
+      *(long *) dst = dst_rnat;
+      dst += 8;
+      dst_rnat = 0;
+      count = shift1 << 3;
+second:
+      if (count > len)
+        count = len;
+      memcpy ((char *) dst, (char *) src, count);
+      dst += count;
+      src += count + 8;
+      len -= count + 8;
+      dst_rnat |= (src_rnat >> shift2);
+    }
+  if ((dst & 0x1f8) == 0x1f8)
+    {
+      *(long *) dst = dst_rnat;
+      dst += 8;
+      dst_rnat = 0;
+    }
+  /* Set info->regstk_top to lowest rbs address which will use
+     info->rnat collection.  */
+  info->regstk_top = dst & ~0x1ffUL;
+  info->rnat = dst_rnat;
+}
+
+/* Unwind accessors.  */
+
+static void
+unw_access_gr (struct _Unwind_Context *info, int regnum,
+	       unsigned long *val, char *nat, int write)
+{
+  unsigned long *addr, *nat_addr = 0, nat_mask = 0, dummy_nat;
+  struct unw_ireg *ireg;
+
+  if ((unsigned) regnum - 1 >= 127)
+    abort ();
+
+  if (regnum < 1)
+    {
+      nat_addr = addr = &dummy_nat;
+      dummy_nat = 0;
+    }
+  else if (regnum < 32)
+    {
+      /* Access a non-stacked register.  */
+      ireg = &info->ireg[regnum - 2];
+      addr = ireg->loc;
+      if (addr)
+	{
+	  nat_addr = addr + ireg->nat.off;
+	  switch (ireg->nat.type)
+	    {
+	    case UNW_NAT_VAL:
+	      /* Simulate getf.sig/setf.sig.  */
+	      if (write)
+		{
+		  if (*nat)
+		    {
+		      /* Write NaTVal and be done with it.  */
+		      addr[0] = 0;
+		      addr[1] = 0x1fffe;
+		      return;
+		    }
+		  addr[1] = 0x1003e;
+		}
+	      else if (addr[0] == 0 && addr[1] == 0x1ffe)
+		{
+		  /* Return NaT and be done with it.  */
+		  *val = 0;
+		  *nat = 1;
+		  return;
+		}
+	      /* FALLTHRU */
+
+	    case UNW_NAT_NONE:
+	      dummy_nat = 0;
+	      nat_addr = &dummy_nat;
+	      break;
+
+	    case UNW_NAT_MEMSTK:
+	      nat_mask = 1UL << ((long) addr & 0x1f8)/8;
+	      break;
+
+	    case UNW_NAT_REGSTK:
+	      if ((unsigned long) addr >= info->regstk_top)
+		nat_addr = &info->rnat;
+	      else
+		nat_addr = ia64_rse_rnat_addr (addr);
+	      nat_mask = 1UL << ia64_rse_slot_num (addr);
+	      break;
+	    }
+	}
+    }
+  else
+    {
+      /* Access a stacked register.  */
+      addr = ia64_rse_skip_regs ((unsigned long *) info->bsp, regnum - 32);
+      if ((unsigned long) addr >= info->regstk_top)
+	nat_addr = &info->rnat;
+      else
+	nat_addr = ia64_rse_rnat_addr (addr);
+      nat_mask = 1UL << ia64_rse_slot_num (addr);
+    }
+
+  if (write)
+    {
+      *addr = *val;
+      if (*nat)
+	*nat_addr |= nat_mask;
+      else
+	*nat_addr &= ~nat_mask;
+    }
+  else
+    {
+      *val = *addr;
+      *nat = (*nat_addr & nat_mask) != 0;
+    }
+}
+
+/* Get the value of register REG as saved in CONTEXT.  */
+
+_Unwind_Word
+_Unwind_GetGR (struct _Unwind_Context *context, int index)
+{
+  _Unwind_Word ret;
+  char nat;
+
+  if (index == 1)
+    return context->gp;
+  else if (index >= 15 && index <= 18)
+    return context->eh_data[index - 15];
+  else
+    unw_access_gr (context, index, &ret, &nat, 0);
+
+  return ret;
+}
+
+/* Overwrite the saved value for register REG in CONTEXT with VAL.  */
+
+void
+_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
+{
+  char nat = 0;
+
+  if (index == 1)
+    context->gp = val;
+  else if (index >= 15 && index <= 18)
+    context->eh_data[index - 15] = val;
+  else
+    unw_access_gr (context, index, &val, &nat, 1);
+}
+
+/* Retrieve the return address for CONTEXT.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+  return context->rp;
+}
+
+inline _Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+  *ip_before_insn = 0;
+  return context->rp;
+}
+
+/* Overwrite the return address for CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val)
+{
+  context->rp = val;
+}
+
+void *
+_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context)
+{
+  return context->lsda;
+}
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (struct _Unwind_Context *context)
+{
+  return context->region_start;
+}
+
+void *
+_Unwind_FindEnclosingFunction (void *pc)
+{
+  struct unw_table_entry *entp, ent;
+  unsigned long segment_base, gp;
+
+  entp = _Unwind_FindTableEntry (pc, &segment_base, &gp, &ent);
+  if (entp == NULL)
+    return NULL;
+  else
+    return (void *)(segment_base + entp->start_offset);
+}
+
+/* Get the value of the CFA as saved in CONTEXT.  In GCC/Dwarf2 parlance,
+   the CFA is the value of the stack pointer on entry; In IA-64 unwind
+   parlance, this is the PSP.  */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->psp;
+}
+
+/* Get the value of the Backing Store Pointer as saved in CONTEXT.  */
+
+_Unwind_Word
+_Unwind_GetBSP (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bsp;
+}
+
+#ifdef MD_UNWIND_SUPPORT
+#include MD_UNWIND_SUPPORT
+#endif
+
+static _Unwind_Reason_Code
+uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  struct unw_table_entry *entp, ent;
+  unsigned long *unw, header, length;
+  unsigned char *insn, *insn_end;
+  unsigned long segment_base;
+  struct unw_reg_info *r;
+
+  memset (fs, 0, sizeof (*fs));
+  for (r = fs->curr.reg; r < fs->curr.reg + UNW_NUM_REGS; ++r)
+    r->when = UNW_WHEN_NEVER;
+  context->lsda = 0;
+
+  entp = _Unwind_FindTableEntry ((void *) context->rp,
+				&segment_base, &context->gp, &ent);
+  if (entp == NULL)
+    {
+      /* Couldn't find unwind info for this function.  Try an
+	 os-specific fallback mechanism.  This will necessarily
+	 not provide a personality routine or LSDA.  */
+#ifdef MD_FALLBACK_FRAME_STATE_FOR
+      if (MD_FALLBACK_FRAME_STATE_FOR (context, fs) == _URC_NO_REASON)
+	return _URC_NO_REASON;
+#endif
+
+      /* [SCRA 11.4.1] A leaf function with no memory stack, no exception
+	 handlers, and which keeps the return value in B0 does not need
+	 an unwind table entry.
+
+	 This can only happen in the frame after unwinding through a signal
+	 handler.  Avoid infinite looping by requiring that B0 != RP.
+	 RP == 0 terminates the chain.  */
+      if (context->br_loc[0]
+	  && *context->br_loc[0] != context->rp
+	  && context->rp != 0)
+	goto skip_unwind_info;
+
+      return _URC_END_OF_STACK;
+    }
+
+  context->region_start = entp->start_offset + segment_base;
+  fs->when_target = ((context->rp & -16) - context->region_start) / 16 * 3
+		    + (context->rp & 15);
+
+  unw = (unsigned long *) (entp->info_offset + segment_base);
+  header = *unw;
+  length = UNW_LENGTH (header);
+
+  /* Some operating systems use the personality routine slot in way not
+     compatible with what we expect.  For instance, OpenVMS uses this slot to
+     designate "condition handlers" with very different arguments than what we
+     would be providing.  Such cases are typically identified from OS specific
+     bits in the unwind information block header, and checked by the target
+     MD_UNW_COMPATIBLE_PERSONALITY_P macro. 
+
+     We just pretend there is no personality from our standpoint in such
+     situations, and expect GCC not to set the identifying bits itself so that
+     compatible personalities for GCC compiled code are called.
+
+     Of course, this raises the question of what combinations of native/GCC
+     calls can be expected to behave properly exception handling-wise.  We are
+     not to provide a magic answer here, merely to prevent crashes assuming
+     users know what they are doing.
+
+     ??? Perhaps check UNW_VER / UNW_FLAG_OSMASK as well.  */
+
+  if (MD_UNW_COMPATIBLE_PERSONALITY_P (header)
+      && (UNW_FLAG_EHANDLER (header) | UNW_FLAG_UHANDLER (header)))
+    {
+      fs->personality =
+	*(_Unwind_Personality_Fn *) (unw[length + 1] + context->gp);
+      context->lsda = unw + length + 2;
+    }
+
+  insn = (unsigned char *) (unw + 1);
+  insn_end = (unsigned char *) (unw + 1 + length);
+  while (!fs->done && insn < insn_end)
+    insn = unw_decode (insn, fs->in_body, fs);
+
+  free_label_states (fs->labeled_states);
+  free_state_stack (&fs->curr);
+
+#ifdef ENABLE_MALLOC_CHECKING
+  if (reg_state_alloced || labeled_state_alloced)
+    abort ();
+#endif
+
+  /* If we're in the epilogue, sp has been restored and all values
+     on the memory stack below psp also have been restored.  */
+  if (fs->when_target > fs->epilogue_start)
+    {
+      struct unw_reg_info *r;
+
+      fs->curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
+      fs->curr.reg[UNW_REG_PSP].val = 0;
+      for (r = fs->curr.reg; r < fs->curr.reg + UNW_NUM_REGS; ++r)
+	if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
+	    || r->where == UNW_WHERE_SPREL)
+	  r->where = UNW_WHERE_NONE;
+    }
+
+skip_unwind_info:
+  /* If RP didn't get saved, generate entry for the return link register.  */
+  if (fs->curr.reg[UNW_REG_RP].when >= fs->when_target)
+    {
+      fs->curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
+      fs->curr.reg[UNW_REG_RP].when = -1;
+      fs->curr.reg[UNW_REG_RP].val = fs->return_link_reg;
+    }
+
+  /* There is a subtlety for the frame after unwinding through a signal
+     handler: should we restore the cfm as usual or the pfs?  We can't
+     restore both because we use br.ret to resume execution of user code.
+     For other frames the procedure is by definition non-leaf so the pfs
+     is saved and restored and thus effectively dead in the body; only
+     the cfm need therefore be restored.
+     
+     Here we have 2 cases:
+       - either the pfs is saved and restored and thus effectively dead
+	 like in regular frames; then we do nothing special and restore
+	 the cfm.
+       - or the pfs is not saved and thus live; but in that case the
+	 procedure is necessarily leaf so the cfm is effectively dead
+	 and we restore the pfs.  */
+  if (context->signal_pfs_loc)
+    {
+      if (fs->curr.reg[UNW_REG_PFS].when >= fs->when_target)
+	context->pfs_loc = context->signal_pfs_loc;
+      context->signal_pfs_loc = NULL;
+    }
+
+  return _URC_NO_REASON;
+}
+
+static void
+uw_update_reg_address (struct _Unwind_Context *context,
+		       _Unwind_FrameState *fs,
+		       enum unw_register_index regno)
+{
+  struct unw_reg_info *r = fs->curr.reg + regno;
+  void *addr;
+  unsigned long rval;
+
+  if (r->where == UNW_WHERE_NONE || r->when >= fs->when_target)
+    return;
+
+  rval = r->val;
+  switch (r->where)
+    {
+    case UNW_WHERE_GR:
+      if (rval >= 32)
+	addr = ia64_rse_skip_regs ((unsigned long *) context->bsp, rval - 32);
+      else if (rval >= 2)
+	addr = context->ireg[rval - 2].loc;
+      else if (rval == 0)
+	{
+	  static const unsigned long dummy;
+	  addr = (void *) &dummy;
+	}
+      else
+	abort ();
+      break;
+
+    case UNW_WHERE_FR:
+      if (rval >= 2 && rval < 32)
+	addr = context->fr_loc[rval - 2];
+      else
+	abort ();
+      break;
+
+    case UNW_WHERE_BR:
+      /* Note that while RVAL can only be 1-5 from normal descriptors,
+	 we can want to look at B0, B6 and B7 due to having manually unwound a
+	 signal frame.  */
+      if (rval < 8)
+	addr = context->br_loc[rval];
+      else
+	abort ();
+      break;
+
+    case UNW_WHERE_SPREL:
+      addr = (void *)(context->sp + rval);
+      break;
+
+    case UNW_WHERE_PSPREL:
+      addr = (void *)(context->psp + rval);
+      break;
+
+    default:
+      abort ();
+    }
+
+  switch (regno)
+    {
+    case UNW_REG_R2 ... UNW_REG_R31:
+      context->ireg[regno - UNW_REG_R2].loc = addr;
+      switch (r->where)
+      {
+      case UNW_WHERE_GR:
+	if (rval >= 32)
+	  {
+	    context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_MEMSTK;
+	    context->ireg[regno - UNW_REG_R2].nat.off
+	      = context->pri_unat_loc - (unsigned long *) addr;
+	  }
+	else if (rval >= 2)
+	  {
+	    context->ireg[regno - UNW_REG_R2].nat
+	      = context->ireg[rval - 2].nat;
+	  }
+	else if (rval == 0)
+	  {
+	    context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_NONE;
+	    context->ireg[regno - UNW_REG_R2].nat.off = 0;
+	  }
+	else
+	  abort ();
+	break;
+
+      case UNW_WHERE_FR:
+	context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_VAL;
+	context->ireg[regno - UNW_REG_R2].nat.off = 0;
+	break;
+
+      case UNW_WHERE_BR:
+	context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_NONE;
+	context->ireg[regno - UNW_REG_R2].nat.off = 0;
+	break;
+
+      case UNW_WHERE_PSPREL:
+      case UNW_WHERE_SPREL:
+	context->ireg[regno - UNW_REG_R2].nat.type = UNW_NAT_MEMSTK;
+	context->ireg[regno - UNW_REG_R2].nat.off
+	  = context->pri_unat_loc - (unsigned long *) addr;
+	break;
+
+      default:
+	abort ();
+      }
+      break;
+
+    case UNW_REG_F2 ... UNW_REG_F31:
+      context->fr_loc[regno - UNW_REG_F2] = addr;
+      break;
+
+    case UNW_REG_B1 ... UNW_REG_B5:
+      context->br_loc[regno - UNW_REG_B0] = addr;
+      break;
+
+    case UNW_REG_BSP:
+      context->bsp_loc = addr;
+      break;
+    case UNW_REG_BSPSTORE:
+      context->bspstore_loc = addr;
+      break;
+    case UNW_REG_PFS:
+      context->pfs_loc = addr;
+      break;
+    case UNW_REG_RP:
+      context->rp = *(unsigned long *)addr;
+      break;
+    case UNW_REG_UNAT:
+      context->unat_loc = addr;
+      break;
+    case UNW_REG_PR:
+      context->pr = *(unsigned long *) addr;
+      break;
+    case UNW_REG_LC:
+      context->lc_loc = addr;
+      break;
+    case UNW_REG_FPSR:
+      context->fpsr_loc = addr;
+      break;
+
+    case UNW_REG_PSP:
+      context->psp = *(unsigned long *)addr;
+      break;
+
+    default:
+      abort ();
+    }
+}
+
+static void
+uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  long i;
+
+#ifdef MD_HANDLE_UNWABI
+  MD_HANDLE_UNWABI (context, fs);
+#endif
+
+  context->sp = context->psp;
+
+  /* First, set PSP.  Subsequent instructions may depend on this value.  */
+  if (fs->when_target > fs->curr.reg[UNW_REG_PSP].when)
+    {
+      if (fs->curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
+	context->psp = context->psp + fs->curr.reg[UNW_REG_PSP].val;
+      else
+	uw_update_reg_address (context, fs, UNW_REG_PSP);
+    }
+
+  /* Determine the location of the primary UNaT.  */
+  {
+    int i;
+    if (fs->when_target < fs->curr.reg[UNW_REG_PRI_UNAT_GR].when)
+      i = UNW_REG_PRI_UNAT_MEM;
+    else if (fs->when_target < fs->curr.reg[UNW_REG_PRI_UNAT_MEM].when)
+      i = UNW_REG_PRI_UNAT_GR;
+    else if (fs->curr.reg[UNW_REG_PRI_UNAT_MEM].when
+	     > fs->curr.reg[UNW_REG_PRI_UNAT_GR].when)
+      i = UNW_REG_PRI_UNAT_MEM;
+    else
+      i = UNW_REG_PRI_UNAT_GR;
+    uw_update_reg_address (context, fs, i);
+  }
+
+  /* Compute the addresses of all registers saved in this frame.  */
+  for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
+    uw_update_reg_address (context, fs, i);
+
+  /* Unwind BSP for the local registers allocated this frame.  */
+  /* ??? What to do with stored BSP or BSPSTORE registers.  */
+  /* We assert that we are either at a call site, or we have
+     just unwound through a signal frame.  In either case
+     pfs_loc is valid.	*/
+  if (!(fs -> no_reg_stack_frame))
+    {
+      unsigned long pfs = *context->pfs_loc;
+      unsigned long sol = (pfs >> 7) & 0x7f;
+      context->bsp = (unsigned long)
+	ia64_rse_skip_regs ((unsigned long *) context->bsp, -sol);
+    }
+}
+
+static void
+uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context (context, fs);
+}
+
+/* Fill in CONTEXT for top-of-stack.  The only valid registers at this
+   level will be the return address and the CFA.  Note that CFA = SP+16.  */
+   
+#define uw_init_context(CONTEXT)					\
+  do {									\
+    /* ??? There is a whole lot o code in uw_install_context that	\
+       tries to avoid spilling the entire machine state here.  We	\
+       should try to make that work again.  */				\
+    __builtin_unwind_init();						\
+    uw_init_context_1 (CONTEXT, __builtin_ia64_bsp ());			\
+  } while (0)
+
+static void __attribute__((noinline))
+uw_init_context_1 (struct _Unwind_Context *context, void *bsp)
+{
+  void *rp = __builtin_extract_return_addr (__builtin_return_address (0));
+  /* Set psp to the caller's stack pointer.  */
+  void *psp = __builtin_dwarf_cfa () - 16;
+  _Unwind_FrameState fs;
+  unsigned long rnat, tmp1, tmp2;
+
+  /* Flush the register stack to memory so that we can access it.
+     Get rse nat collection for the last incomplete rbs chunk of
+     registers at the same time.  For this RSE needs to be turned
+     into the mandatory only mode.  */
+  asm ("mov.m %1 = ar.rsc;;\n\t"
+       "and %2 = 0x1c, %1;;\n\t"
+       "mov.m ar.rsc = %2;;\n\t"
+       "flushrs;;\n\t"
+       "mov.m %0 = ar.rnat;;\n\t"
+       "mov.m ar.rsc = %1\n\t"
+       : "=r" (rnat), "=r" (tmp1), "=r" (tmp2));
+
+  memset (context, 0, sizeof (struct _Unwind_Context));
+  context->bsp = (unsigned long) bsp;
+  /* Set context->regstk_top to lowest rbs address which will use
+     context->rnat collection.  */
+  context->regstk_top = context->bsp & ~0x1ffULL;
+  context->rnat = rnat;
+  context->psp = (unsigned long) psp;
+  context->rp = (unsigned long) rp;
+  asm ("mov %0 = sp" : "=r" (context->sp));
+  asm ("mov %0 = pr" : "=r" (context->pr));
+  context->pri_unat_loc = &context->initial_unat;	/* ??? */
+
+  if (uw_frame_state_for (context, &fs) != _URC_NO_REASON)
+    abort ();
+
+  uw_update_context (context, &fs);
+}
+
+/* Install (i.e. longjmp to) the contents of TARGET.  */
+
+static void __attribute__((noreturn))
+uw_install_context (struct _Unwind_Context *current __attribute__((unused)),
+		    struct _Unwind_Context *target)
+{
+  unsigned long ireg_buf[4], ireg_nat = 0, ireg_pr = 0;
+  long i;
+
+  /* Copy integer register data from the target context to a
+     temporary buffer.  Do this so that we can frob AR.UNAT
+     to get the NaT bits for these registers set properly.  */
+  for (i = 4; i <= 7; ++i)
+    {
+      char nat;
+      void *t = target->ireg[i - 2].loc;
+      if (t)
+	{
+	  unw_access_gr (target, i, &ireg_buf[i - 4], &nat, 0);
+          ireg_nat |= (long)nat << (((size_t)&ireg_buf[i - 4] >> 3) & 0x3f);
+	  /* Set p6 - p9.  */
+	  ireg_pr |= 4L << i;
+	}
+    }
+
+  /* The value in uc_bsp that we've computed is that for the 
+     target function.  The value that we install below will be
+     adjusted by the BR.RET instruction based on the contents
+     of AR.PFS.  So we must unadjust that here.  */
+  target->bsp = (unsigned long)
+    ia64_rse_skip_regs ((unsigned long *)target->bsp,
+			(*target->pfs_loc >> 7) & 0x7f);
+
+  if (target->bsp < target->regstk_top)
+    target->rnat = *ia64_rse_rnat_addr ((unsigned long *) target->bsp);
+
+  /* Provide assembly with the offsets into the _Unwind_Context.  */
+  asm volatile ("uc_rnat = %0"
+		: : "i"(offsetof (struct _Unwind_Context, rnat)));
+  asm volatile ("uc_bsp = %0"
+		: : "i"(offsetof (struct _Unwind_Context, bsp)));
+  asm volatile ("uc_psp = %0"
+		: : "i"(offsetof (struct _Unwind_Context, psp)));
+  asm volatile ("uc_rp = %0"
+		: : "i"(offsetof (struct _Unwind_Context, rp)));
+  asm volatile ("uc_pr = %0"
+		: : "i"(offsetof (struct _Unwind_Context, pr)));
+  asm volatile ("uc_gp = %0"
+		: : "i"(offsetof (struct _Unwind_Context, gp)));
+  asm volatile ("uc_pfs_loc = %0"
+		: : "i"(offsetof (struct _Unwind_Context, pfs_loc)));
+  asm volatile ("uc_unat_loc = %0"
+		: : "i"(offsetof (struct _Unwind_Context, unat_loc)));
+  asm volatile ("uc_lc_loc = %0"
+		: : "i"(offsetof (struct _Unwind_Context, lc_loc)));
+  asm volatile ("uc_fpsr_loc = %0"
+		: : "i"(offsetof (struct _Unwind_Context, fpsr_loc)));
+  asm volatile ("uc_eh_data = %0"
+		: : "i"(offsetof (struct _Unwind_Context, eh_data)));
+  asm volatile ("uc_br_loc = %0"
+		: : "i"(offsetof (struct _Unwind_Context, br_loc)));
+  asm volatile ("uc_fr_loc = %0"
+		: : "i"(offsetof (struct _Unwind_Context, fr_loc)));
+
+  asm volatile (
+	/* Load up call-saved non-window integer registers from ireg_buf.  */
+	"add r20 = 8, %1			\n\t"
+	"mov ar.unat = %2			\n\t"
+	"mov pr = %3, 0x3c0			\n\t"
+	";;					\n\t"
+	"(p6) ld8.fill r4 = [%1]		\n\t"
+	"(p7) ld8.fill r5 = [r20]		\n\t"
+	"add r21 = uc_br_loc + 16, %0		\n\t"
+	"adds %1 = 16, %1			\n\t"
+	"adds r20 = 16, r20			\n\t"
+	";;					\n\t"
+	"(p8) ld8.fill r6 = [%1]		\n\t"
+	"(p9) ld8.fill r7 = [r20]		\n\t"
+	"add r20 = uc_br_loc + 8, %0		\n\t"
+	";;					\n\t"
+	/* Load up call-saved branch registers.  */
+	"ld8 r22 = [r20], 16			\n\t"
+	"ld8 r23 = [r21], 16			\n\t"
+	";;					\n\t"
+	"ld8 r24 = [r20], 16			\n\t"
+	"ld8 r25 = [r21], uc_fr_loc - (uc_br_loc + 32)\n\t"
+	";;					\n\t"
+	"ld8 r26 = [r20], uc_fr_loc + 8 - (uc_br_loc + 40)\n\t"
+	"ld8 r27 = [r21], 24			\n\t"
+	"cmp.ne p6, p0 = r0, r22		\n\t"
+	";;					\n\t"
+	"ld8 r28 = [r20], 8			\n\t"
+	"(p6) ld8 r22 = [r22]			\n\t"
+	"cmp.ne p7, p0 = r0, r23		\n\t"
+	";;					\n\t"
+	"(p7) ld8 r23 = [r23]			\n\t"
+	"cmp.ne p8, p0 = r0, r24		\n\t"
+	";;					\n\t"
+	"(p8) ld8 r24 = [r24]			\n\t"
+	"(p6) mov b1 = r22			\n\t"
+	"cmp.ne p9, p0 = r0, r25		\n\t"
+	";;					\n\t"
+	"(p9) ld8 r25 = [r25]			\n\t"
+	"(p7) mov b2 = r23			\n\t"
+	"cmp.ne p6, p0 = r0, r26		\n\t"
+	";;					\n\t"
+	"(p6) ld8 r26 = [r26]			\n\t"
+	"(p8) mov b3 = r24			\n\t"
+	"cmp.ne p7, p0 = r0, r27		\n\t"
+	";;					\n\t"
+	/* Load up call-saved fp registers.  */
+	"(p7) ldf.fill f2 = [r27]		\n\t"
+	"(p9) mov b4 = r25			\n\t"
+	"cmp.ne p8, p0 = r0, r28		\n\t"
+	";;					\n\t"
+	"(p8) ldf.fill f3 = [r28]		\n\t"
+	"(p6) mov b5 = r26			\n\t"
+	";;					\n\t"
+	"ld8 r29 = [r20], 16*8 - 4*8		\n\t"
+	"ld8 r30 = [r21], 17*8 - 5*8		\n\t"
+	";;					\n\t"
+	"ld8 r22 = [r20], 16			\n\t"
+	"ld8 r23 = [r21], 16			\n\t"
+	";;					\n\t"
+	"ld8 r24 = [r20], 16			\n\t"
+	"ld8 r25 = [r21]			\n\t"
+	"cmp.ne p6, p0 = r0, r29		\n\t"
+	";;					\n\t"
+	"ld8 r26 = [r20], 8			\n\t"
+	"(p6) ldf.fill f4 = [r29]		\n\t"
+	"cmp.ne p7, p0 = r0, r30		\n\t"
+	";;					\n\t"
+	"ld8 r27 = [r20], 8			\n\t"
+	"(p7) ldf.fill f5 = [r30]		\n\t"
+	"cmp.ne p6, p0 = r0, r22		\n\t"
+	";;					\n\t"
+	"ld8 r28 = [r20], 8			\n\t"
+	"(p6) ldf.fill f16 = [r22]		\n\t"
+	"cmp.ne p7, p0 = r0, r23		\n\t"
+	";;					\n\t"
+	"ld8 r29 = [r20], 8			\n\t"
+	"(p7) ldf.fill f17 = [r23]		\n\t"
+	"cmp.ne p6, p0 = r0, r24		\n\t"
+	";;					\n\t"
+	"ld8 r22 = [r20], 8			\n\t"
+	"(p6) ldf.fill f18 = [r24]		\n\t"
+	"cmp.ne p7, p0 = r0, r25		\n\t"
+	";;					\n\t"
+	"ld8 r23 = [r20], 8			\n\t"
+	"(p7) ldf.fill f19 = [r25]		\n\t"
+	"cmp.ne p6, p0 = r0, r26		\n\t"
+	";;					\n\t"
+	"ld8 r24 = [r20], 8			\n\t"
+	"(p6) ldf.fill f20 = [r26]		\n\t"
+	"cmp.ne p7, p0 = r0, r27		\n\t"
+	";;					\n\t"
+	"ld8 r25 = [r20], 8			\n\t"
+	"(p7) ldf.fill f21 = [r27]		\n\t"
+	"cmp.ne p6, p0 = r0, r28		\n\t"
+	";;					\n\t"
+	"ld8 r26 = [r20], 8			\n\t"
+	"(p6) ldf.fill f22 = [r28]		\n\t"
+	"cmp.ne p7, p0 = r0, r29		\n\t"
+	";;					\n\t"
+	"ld8 r27 = [r20], 8			\n\t"
+	";;					\n\t"
+	"ld8 r28 = [r20], 8			\n\t"
+	"(p7) ldf.fill f23 = [r29]		\n\t"
+	"cmp.ne p6, p0 = r0, r22		\n\t"
+	";;					\n\t"
+	"ld8 r29 = [r20], 8			\n\t"
+	"(p6) ldf.fill f24 = [r22]		\n\t"
+	"cmp.ne p7, p0 = r0, r23		\n\t"
+	";;					\n\t"
+	"(p7) ldf.fill f25 = [r23]		\n\t"
+	"cmp.ne p6, p0 = r0, r24		\n\t"
+	"cmp.ne p7, p0 = r0, r25		\n\t"
+	";;					\n\t"
+	"(p6) ldf.fill f26 = [r24]		\n\t"
+	"(p7) ldf.fill f27 = [r25]		\n\t"
+	"cmp.ne p6, p0 = r0, r26		\n\t"
+	";;					\n\t"
+	"(p6) ldf.fill f28 = [r26]		\n\t"
+	"cmp.ne p7, p0 = r0, r27		\n\t"
+	"cmp.ne p6, p0 = r0, r28		\n\t"
+	";;					\n\t"
+	"(p7) ldf.fill f29 = [r27]		\n\t"
+	"(p6) ldf.fill f30 = [r28]		\n\t"
+	"cmp.ne p7, p0 = r0, r29		\n\t"
+	";;					\n\t"
+	"(p7) ldf.fill f31 = [r29]		\n\t"
+	"add r20 = uc_rnat, %0			\n\t"
+	"add r21 = uc_bsp, %0			\n\t"
+	";;					\n\t"
+	/* Load the balance of the thread state from the context.  */
+	"ld8 r22 = [r20], uc_psp - uc_rnat	\n\t"
+	"ld8 r23 = [r21], uc_gp - uc_bsp	\n\t"
+	";;					\n\t"
+	"ld8 r24 = [r20], uc_pfs_loc - uc_psp	\n\t"
+	"ld8 r1 = [r21], uc_rp - uc_gp		\n\t"
+	";;					\n\t"
+	"ld8 r25 = [r20], uc_unat_loc - uc_pfs_loc\n\t"
+	"ld8 r26 = [r21], uc_pr - uc_rp		\n\t"
+	";;					\n\t"
+	"ld8 r27 = [r20], uc_lc_loc - uc_unat_loc\n\t"
+	"ld8 r28 = [r21], uc_fpsr_loc - uc_pr	\n\t"
+	";;					\n\t"
+	"ld8 r29 = [r20], uc_eh_data - uc_lc_loc\n\t"
+	"ld8 r30 = [r21], uc_eh_data + 8 - uc_fpsr_loc\n\t"
+	";;					\n\t"
+	/* Load data for the exception handler.  */
+	"ld8 r15 = [r20], 16			\n\t"
+	"ld8 r16 = [r21], 16			\n\t"
+	";;					\n\t"
+	"ld8 r17 = [r20]			\n\t"
+	"ld8 r18 = [r21]			\n\t"
+	";;					\n\t"
+	/* Install the balance of the thread state loaded above.  */
+	"cmp.ne p6, p0 = r0, r25		\n\t"
+	"cmp.ne p7, p0 = r0, r27		\n\t"
+	";;					\n\t"
+	"(p6) ld8 r25 = [r25]			\n\t"
+	"(p7) ld8 r27 = [r27]			\n\t"
+	";;					\n\t"
+	"(p7) mov.m ar.unat = r27		\n\t"
+	"(p6) mov.i ar.pfs = r25		\n\t"
+	"cmp.ne p9, p0 = r0, r29		\n\t"
+	";;					\n\t"
+	"(p9) ld8 r29 = [r29]			\n\t"
+	"cmp.ne p6, p0 = r0, r30		\n\t"
+	";;					\n\t"
+	"(p6) ld8 r30 = [r30]			\n\t"
+	/* Don't clobber p6-p9, which are in use at present.  */
+	"mov pr = r28, ~0x3c0			\n\t"
+	"(p9) mov.i ar.lc = r29			\n\t"
+	";;					\n\t"
+	"mov.m r25 = ar.rsc			\n\t"
+	"(p6) mov.m ar.fpsr = r30		\n\t"
+	";;					\n\t"
+	"and r29 = 0x1c, r25			\n\t"
+	"mov b0 = r26				\n\t"
+	";;					\n\t"
+	"mov.m ar.rsc = r29			\n\t"
+	";;					\n\t"
+	/* This must be done before setting AR.BSPSTORE, otherwise 
+	   AR.BSP will be initialized with a random displacement
+	   below the value we want, based on the current number of
+	   dirty stacked registers.  */
+	"loadrs					\n\t"
+	"invala					\n\t"
+	";;					\n\t"
+	"mov.m ar.bspstore = r23		\n\t"
+	";;					\n\t"
+	"mov.m ar.rnat = r22			\n\t"
+	";;					\n\t"
+	"mov.m ar.rsc = r25			\n\t"
+	"mov sp = r24				\n\t"
+	"br.ret.sptk.few b0"
+	: : "r"(target), "r"(ireg_buf), "r"(ireg_nat), "r"(ireg_pr)
+	: "r15", "r16", "r17", "r18", "r20", "r21", "r22",
+	  "r23", "r24", "r25", "r26", "r27", "r28", "r29",
+	  "r30", "r31");
+  /* NOTREACHED */
+  while (1);
+}
+
+static inline _Unwind_Ptr
+uw_identify_context (struct _Unwind_Context *context)
+{
+  return _Unwind_GetIP (context);
+}
+
+#include "unwind.inc"
+
+#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS)
+alias (_Unwind_Backtrace);
+alias (_Unwind_DeleteException);
+alias (_Unwind_FindEnclosingFunction);
+alias (_Unwind_ForcedUnwind);
+alias (_Unwind_GetBSP);
+alias (_Unwind_GetCFA);
+alias (_Unwind_GetGR);
+alias (_Unwind_GetIP);
+alias (_Unwind_GetLanguageSpecificData);
+alias (_Unwind_GetRegionStart);
+alias (_Unwind_RaiseException);
+alias (_Unwind_Resume);
+alias (_Unwind_Resume_or_Rethrow);
+alias (_Unwind_SetGR);
+alias (_Unwind_SetIP);
+#endif
+
+#endif
diff --git a/gcc/config/ia64/unwind-ia64.h b/gcc/config/ia64/unwind-ia64.h
new file mode 100644
index 000000000..b98f048fd
--- /dev/null
+++ b/gcc/config/ia64/unwind-ia64.h
@@ -0,0 +1,43 @@
+/* Copyright (C) 1999, 2000, 2001, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Andrew MacLeod  <amacleod@cygnus.com>
+                  Andrew Haley  <aph@cygnus.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+struct unw_table_entry
+{
+  unsigned long start_offset;
+  unsigned long end_offset;
+  unsigned long info_offset;
+};
+
+/* Accessors to fields of an unwind info block header.  In this common file to
+   be visible from all the units involved in a target implementation.  */
+   
+#ifndef __USING_SJLJ_EXCEPTIONS__
+#define UNW_VER(x)		((x) >> 48)
+#define UNW_FLAG_MASK		0x0000ffff00000000
+#define UNW_FLAG_OSMASK		0x0000f00000000000
+#define UNW_FLAG_EHANDLER(x)	((x) & 0x0000000100000000L)
+#define UNW_FLAG_UHANDLER(x)	((x) & 0x0000000200000000L)
+#define UNW_LENGTH(x)		((x) & 0x00000000ffffffffL)
+#endif
+
+extern struct unw_table_entry *
+_Unwind_FindTableEntry (void *pc, unsigned long *segment_base,
+			unsigned long *gp, struct unw_table_entry *ent)
+			__attribute__ ((__visibility__ ("hidden")));
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
new file mode 100644
index 000000000..1684c8092
--- /dev/null
+++ b/gcc/config/ia64/vect.md
@@ -0,0 +1,1730 @@
+;; IA-64 machine description for vector operations.
+;; Copyright (C) 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Integer vector operations
+
+(define_mode_iterator VECINT [V8QI V4HI V2SI])
+(define_mode_iterator VECINT12 [V8QI V4HI])
+(define_mode_iterator VECINT24 [V4HI V2SI])
+(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
+(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VECINT 0 "general_operand" "")
+        (match_operand:VECINT 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:VECINT 0 "destination_operand"
+					"=r,r,r,r,m ,*f ,*f,Q ,r ,*f")
+	(match_operand:VECINT 1 "move_operand"
+					"rU,W,i,m,rU,U*f,Q ,*f,*f,r "))]
+  "ia64_move_ok (operands[0], operands[1])"
+  "@
+   mov %0 = %r1
+   addl %0 = %v1, r0
+   movl %0 = %v1
+   ld8%O1 %0 = %1%P1
+   st8%Q0 %0 = %r1%P0
+   mov %0 = %F1
+   ldf8 %0 = %1%P1
+   stf8 %0 = %1%P0
+   getf.sig %0 = %1
+   setf.sig %0 = %1"
+  [(set_attr "itanium_class" "ialu,ialu,long_i,ld,st,fmisc,fld,stf,frfr,tofr")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(not:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))]
+  ""
+  "andcm %0 = -1, %1"
+  [(set_attr "itanium_class" "ilog")])
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(and:VECINT
+	  (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   and %0 = %2, %1
+   fand %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "*andnot<mode>"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(and:VECINT
+	  (not:VECINT (match_operand:VECINT 1 "grfr_register_operand" "r,*f"))
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   andcm %0 = %2, %1
+   fandcm %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(ior:VECINT
+	  (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   or %0 = %2, %1
+   for %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+	(xor:VECINT
+	  (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+	  (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+  ""
+  "@
+   xor %0 = %2, %1
+   fxor %0 = %2, %1"
+  [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(neg:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize> %0 = r0, %1"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(plus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")
+		     (match_operand:VECINT 2 "gr_register_operand" "r")))]
+  ""
+  "padd<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*ssadd<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(ss_plus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "padd<vecsize>.sss %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*usadd<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(us_plus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "padd<vecsize>.uuu %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(minus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")
+		      (match_operand:VECINT 2 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*sssub<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(ss_minus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize>.sss %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*ussub<mode>3"
+  [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+	(us_minus:VECINT12
+	  (match_operand:VECINT12 1 "gr_register_operand" "r")
+	  (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+  ""
+  "psub<vecsize>.uuu %0 = %1, %2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_expand "mulv8qi3"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "")
+	(mult:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2]));
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l));
+  else
+    emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacku_lo_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacku_lo_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_expand "vec_widen_umult_hi_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacku_hi_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacku_hi_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_expand "vec_widen_smult_lo_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacks_lo_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacks_lo_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_expand "vec_widen_smult_hi_v8qi"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_reg_rtx (V4HImode);
+  rtx op2 = gen_reg_rtx (V4HImode);
+  emit_insn (gen_vec_unpacks_hi_v8qi (op1, operands[1]));
+  emit_insn (gen_vec_unpacks_hi_v8qi (op2, operands[2]));
+  emit_insn (gen_mulv4hi3 (operands[0], op1, op2));
+  DONE;
+});
+  
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r")
+		   (match_operand:V4HI 2 "gr_register_operand" "r")))]
+  ""
+  "pmpyshr2 %0 = %1, %2, 0"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "pmpyshr2"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(truncate:V4HI
+	  (ashiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "gr_register_operand" "r"))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "gr_register_operand" "r")))
+	    (match_operand:SI 3 "pmpyshr_operand" "n"))))]
+  ""
+  "pmpyshr2 %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "pmpyshr2_u"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "gr_register_operand" "r"))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "gr_register_operand" "r")))
+	    (match_operand:SI 3 "pmpyshr_operand" "n"))))]
+  ""
+  "pmpyshr2.u %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "pmpy2_even"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(mult:V2SI
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 1 "gr_register_operand" "r"))
+	    (parallel [(const_int 0) (const_int 2)]))
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 2 "gr_register_operand" "r"))
+	    (parallel [(const_int 0) (const_int 2)]))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pmpy2.l %0 = %1, %2";
+  else
+    return "%,pmpy2.r %0 = %1, %2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "pmpy2_odd"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(mult:V2SI
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 1 "gr_register_operand" "r"))
+	    (parallel [(const_int 1) (const_int 3)]))
+	  (vec_select:V2SI
+	    (sign_extend:V4SI
+	      (match_operand:V4HI 2 "gr_register_operand" "r"))
+	    (parallel [(const_int 1) (const_int 3)]))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pmpy2.r %0 = %1, %2";
+  else
+    return "%,pmpy2.l %0 = %1, %2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_widen_smult_lo_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, false);
+  DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, true);
+  DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, false);
+  DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx l = gen_reg_rtx (V4HImode);
+  rtx h = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+  emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+  ia64_unpack_assemble (operands[0], l, h, true);
+  DONE;
+})
+
+(define_expand "mulv2si3"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "")
+	(mult:V2SI (match_operand:V2SI 1 "gr_register_operand" "r")
+		   (match_operand:V2SI 2 "gr_register_operand" "r")))]
+  ""
+{
+  rtx t0, t1, t2, t3, t4, t5, t6, t7, x;
+  rtx op1h = gen_lowpart (V4HImode, operands[1]);
+  rtx op2h = gen_lowpart (V4HImode, operands[2]);
+
+  t0 = gen_reg_rtx (V4HImode);
+  t1 = gen_reg_rtx (V4HImode);
+  t2 = gen_reg_rtx (V4HImode);
+  t3 = gen_reg_rtx (V4HImode);
+  t4 = gen_reg_rtx (V2SImode);
+  t5 = gen_reg_rtx (V2SImode);
+  t6 = gen_reg_rtx (V2SImode);
+  t7 = gen_reg_rtx (V2SImode);
+
+  /* Consider the HImode components of op1 = DCBA, op2 = ZYXW.
+     Consider .l and .h suffixes below the low and high 16 bits
+     of the full 32-bit product.  */
+
+  /* T0 = CDBA.  */
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
+					     GEN_INT (3), const2_rtx));
+  x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
+  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+  /* T1 = DZ.l, CY.l, BX.l, AW.l.  */
+  emit_insn (gen_mulv4hi3 (t1, op1h, op2h));
+
+  /* T2 = DZ.h, CY.h, BX.h, AW.h.  */
+  emit_insn (gen_pmpyshr2_u (t2, op1h, op2h, GEN_INT (16)));
+
+  /* T3 = CZ.l, DY.l, AX.l, BW.l.  */
+  emit_insn (gen_mulv4hi3 (t3, t0, op2h));
+
+  /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW.  */
+  x = gen_lowpart (V4HImode, t4);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_odd (x, t2, t1);
+  else
+    x = gen_mix2_even (x, t1, t2);
+  emit_insn (x);
+
+  /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16.  */
+  x = gen_lowpart (V4HImode, t5);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode));
+  else
+    x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3);
+  emit_insn (x);
+
+  /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16.  */
+  x = gen_lowpart (V4HImode, t6);
+  if (TARGET_BIG_ENDIAN)
+    x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode));
+  else
+    x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3);
+  emit_insn (x);
+
+  emit_insn (gen_addv2si3 (t7, t4, t5));
+  emit_insn (gen_addv2si3 (operands[0], t6, t7));
+  DONE;
+})
+
+(define_expand "umax<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(umax:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
+		     (match_operand:VECINT 2 "gr_register_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (UMAX, <MODE>mode, operands))
+    DONE;
+})
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(smax:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+		     (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (SMAX, <MODE>mode, operands))
+    DONE;
+})
+
+(define_expand "umin<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(umin:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
+		     (match_operand:VECINT 2 "gr_register_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (UMIN, <MODE>mode, operands))
+    DONE;
+})
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(smin:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+		     (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  if (ia64_expand_vecint_minmax (SMIN, <MODE>mode, operands))
+    DONE;
+})
+
+(define_insn "*umaxv8qi3"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(umax:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
+  ""
+  "pmax1.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*smaxv4hi3"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(smax:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pmax2 %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*uminv8qi3"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(umin:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
+  ""
+  "pmin1.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*sminv4hi3"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(smin:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pmin2 %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+	(ashift:VECINT24
+	  (match_operand:VECINT24 1 "gr_register_operand" "r")
+	  (match_operand:DI 2 "gr_reg_or_5bit_operand" "rn")))]
+  ""
+  "pshl<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+	(ashiftrt:VECINT24
+	  (match_operand:VECINT24 1 "gr_register_operand" "r")
+	  (match_operand:DI 2 "gr_reg_or_5bit_operand" "rn")))]
+  ""
+  "pshr<vecsize> %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "lshr<mode>3"
+  [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+	(lshiftrt:VECINT24
+	  (match_operand:VECINT24 1 "gr_register_operand" "r")
+	  (match_operand:DI 2 "gr_reg_or_5bit_operand" "rn")))]
+  ""
+  "pshr<vecsize>.u %0 = %1, %2"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(ashift:DI (match_operand:VECINT 1 "gr_register_operand" "")
+		   (match_operand:DI 2 "gr_reg_or_6bit_operand" "")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+        (lshiftrt:DI (match_operand:VECINT 1 "gr_register_operand" "")
+                     (match_operand:DI 2 "gr_reg_or_6bit_operand" "")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "widen_usumv8qi3"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, true);
+  DONE;
+})
+
+(define_expand "widen_usumv4hi3"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, true);
+  DONE;
+})
+
+(define_expand "widen_ssumv8qi3"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, false);
+  DONE;
+})
+
+(define_expand "widen_ssumv4hi3"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_widen_sum (operands, false);
+  DONE;
+})
+
+(define_expand "udot_prodv8qi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_dot_prod_v8qi (operands, true);
+  DONE;
+})
+
+(define_expand "sdot_prodv8qi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  ia64_expand_dot_prod_v8qi (operands, false);
+  DONE;
+})
+
+(define_expand "sdot_prodv4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  rtx e, o, t;
+
+  e = gen_reg_rtx (V2SImode);
+  o = gen_reg_rtx (V2SImode);
+  t = gen_reg_rtx (V2SImode);
+
+  emit_insn (gen_pmpy2_even (e, operands[1], operands[2]));
+  emit_insn (gen_pmpy2_odd (o, operands[1], operands[2]));
+  emit_insn (gen_addv2si3 (t, e, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], t, o));
+  DONE;
+})
+
+(define_expand "udot_prodv4hi"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")
+   (match_operand:V2SI 3 "gr_register_operand" "")]
+  ""
+{
+  rtx l, h, t;
+
+  l = gen_reg_rtx (V2SImode);
+  h = gen_reg_rtx (V2SImode);
+  t = gen_reg_rtx (V2SImode);
+
+  emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2]));
+  emit_insn (gen_addv2si3 (t, l, operands[3]));
+  emit_insn (gen_addv2si3 (operands[0], t, h));
+  DONE;
+})
+
+(define_expand "vcond<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(if_then_else:VECINT
+	  (match_operator 3 "" 
+	    [(match_operand:VECINT 4 "gr_reg_or_0_operand" "")
+	     (match_operand:VECINT 5 "gr_reg_or_0_operand" "")])
+	  (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+	  (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  ia64_expand_vecint_cmov (operands);
+  DONE;
+})
+
+(define_expand "vcondu<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "")
+	(if_then_else:VECINT
+	  (match_operator 3 "" 
+	    [(match_operand:VECINT 4 "gr_reg_or_0_operand" "")
+	     (match_operand:VECINT 5 "gr_reg_or_0_operand" "")])
+	  (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+	  (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+  ""
+{
+  ia64_expand_vecint_cmov (operands);
+  DONE;
+})
+
+(define_insn "*cmpeq_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(eq:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pcmp<vecsize>.eq %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*cmpgt_<mode>"
+  [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+	(gt:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU")
+		   (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))]
+  ""
+  "pcmp<vecsize>.gt %0 = %r1, %r2"
+  [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "vec_pack_ssat_v4hi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_concat:V8QI
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU"))
+	  (ss_truncate:V4QI
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack2.sss %0 = %r2, %r1";
+  else
+    return "%,pack2.sss %0 = %r1, %r2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_pack_usat_v4hi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_concat:V8QI
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU"))
+	  (us_truncate:V4QI
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack2.uss %0 = %r2, %r1";
+  else
+    return "%,pack2.uss %0 = %r1, %r2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_pack_ssat_v2si"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_concat:V4HI
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU"))
+	  (ss_truncate:V2HI
+	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,pack4.sss %0 = %r2, %r1";
+  else
+    return "%,pack4.sss %0 = %r1, %r2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_interleave_lowv8qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack1.l %0 = %r1, %r2";
+  else
+    return "%,unpack1.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_interleave_highv8qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack1.h %0 = %r1, %r2";
+  else
+    return "%,unpack1.h %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix1_even"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix1.l %0 = %r1, %r2";
+  else
+    return "%,mix1.r %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix1_odd"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (vec_concat:V16QI
+	    (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix1.r %0 = %r1, %r2";
+  else
+    return "%,mix1.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_rev"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 7) (const_int 6)
+		     (const_int 5) (const_int 4)
+		     (const_int 3) (const_int 2)
+		     (const_int 1) (const_int 0)])))]
+  ""
+  "mux1 %0 = %1, @rev"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_mix"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)
+		     (const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  ""
+  "mux1 %0 = %1, @mix"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_shuf"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)
+		     (const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  ""
+  "mux1 %0 = %1, @shuf"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mux1_alt"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(const_int 0) (const_int 2)
+		     (const_int 4) (const_int 6)
+		     (const_int 1) (const_int 3)
+		     (const_int 5) (const_int 7)])))]
+  ""
+  "mux1 %0 = %1, @alt"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_brcst_v8qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_select:V8QI
+	  (match_operand:V8QI 1 "gr_register_operand" "r")
+	  (parallel [(match_operand 2 "mux1_brcst_element" "")
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)
+		     (match_dup 2)])))]
+  ""
+  "mux1 %0 = %1, @brcst"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_brcst_qi"
+  [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+	(vec_duplicate:V8QI
+	  (match_operand:QI 1 "gr_register_operand" "r")))]
+  ""
+  "mux1 %0 = %1, @brcst"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_extract_evenv8qi"
+  [(match_operand:V8QI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx temp = gen_reg_rtx (V8QImode);
+  emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
+  emit_insn (gen_mux1_alt (operands[0], temp));
+  DONE;
+})
+
+(define_expand "vec_extract_oddv8qi"
+  [(match_operand:V8QI 0 "gr_register_operand" "")
+   (match_operand:V8QI 1 "gr_register_operand" "")
+   (match_operand:V8QI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx temp = gen_reg_rtx (V8QImode);
+  emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
+  emit_insn (gen_mux1_alt (operands[0], temp));
+  DONE;
+})
+
+(define_insn "vec_interleave_lowv4hi"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack2.l %0 = %r1, %r2";
+  else
+    return "%,unpack2.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_interleave_highv4hi"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack2.h %0 = %r1, %r2";
+  else
+    return "%,unpack2.h %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix2_even"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix2.l %0 = %r1, %r2";
+  else
+    return "%,mix2.r %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix2_odd"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (vec_concat:V8HI
+	    (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,mix2.r %0 = %r1, %r2";
+  else
+    return "%,mix2.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux2"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_select:V4HI
+	  (match_operand:V4HI 1 "gr_register_operand" "r")
+	  (parallel [(match_operand 2 "const_int_2bit_operand" "")
+		     (match_operand 3 "const_int_2bit_operand" "")
+		     (match_operand 4 "const_int_2bit_operand" "")
+		     (match_operand 5 "const_int_2bit_operand" "")])))]
+  ""
+{
+  int mask = 0;
+  if (TARGET_BIG_ENDIAN)
+    {
+      mask |= (3 - INTVAL (operands[2])) << 6;
+      mask |= (3 - INTVAL (operands[3])) << 4;
+      mask |= (3 - INTVAL (operands[4])) << 2;
+      mask |= 3 - INTVAL (operands[5]);
+    }
+  else
+    {
+      mask |= INTVAL (operands[2]);
+      mask |= INTVAL (operands[3]) << 2;
+      mask |= INTVAL (operands[4]) << 4;
+      mask |= INTVAL (operands[5]) << 6;
+    }
+  operands[2] = GEN_INT (mask);
+  return "%,mux2 %0 = %1, %2";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_extract_evenodd_helper"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "")
+	(vec_select:V4HI
+	  (match_operand:V4HI 1 "gr_register_operand" "")
+	  (parallel [(const_int 0) (const_int 2)
+		     (const_int 1) (const_int 3)])))]
+  "")
+
+(define_expand "vec_extract_evenv4hi"
+  [(match_operand:V4HI 0 "gr_register_operand")
+   (match_operand:V4HI 1 "gr_reg_or_0_operand")
+   (match_operand:V4HI 2 "gr_reg_or_0_operand")]
+  ""
+{
+  rtx temp = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
+  emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
+  DONE;
+})
+
+(define_expand "vec_extract_oddv4hi"
+  [(match_operand:V4HI 0 "gr_register_operand")
+   (match_operand:V4HI 1 "gr_reg_or_0_operand")
+   (match_operand:V4HI 2 "gr_reg_or_0_operand")]
+  ""
+{
+  rtx temp = gen_reg_rtx (V4HImode);
+  emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
+  emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
+  DONE;
+})
+
+(define_insn "*mux2_brcst_hi"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+	(vec_duplicate:V4HI
+	  (match_operand:HI 1 "gr_register_operand" "r")))]
+  ""
+  "mux2 %0 = %1, 0"
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_interleave_lowv2si"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.l %0 = %r1, %r2";
+  else
+    return "%,unpack4.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "vec_interleave_highv2si"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(vec_select:V2SI
+	  (vec_concat:V4SI
+	    (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
+	    (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.h %0 = %r1, %r2";
+  else
+    return "%,unpack4.h %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_extract_evenv2si"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V2SI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
+					    operands[2]));
+  else
+    emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
+					   operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extract_oddv2si"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand:V2SI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
+					   operands[2]));
+  else
+    emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
+					    operands[2]));
+  DONE;
+})
+
+(define_expand "vec_initv2si"
+  [(match_operand:V2SI 0 "gr_register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+{
+  rtx op1 = XVECEXP (operands[1], 0, 0);
+  rtx op2 = XVECEXP (operands[1], 0, 1);
+  rtx x;
+
+  if (GET_CODE (op1) == CONST_INT && GET_CODE (op2) == CONST_INT)
+    {
+      x = gen_rtx_CONST_VECTOR (V2SImode, XVEC (operands[1], 0));
+      emit_move_insn (operands[0], x);
+      DONE;
+    }
+
+  if (!gr_reg_or_0_operand (op1, SImode))
+    op1 = force_reg (SImode, op1);
+  if (!gr_reg_or_0_operand (op2, SImode))
+    op2 = force_reg (SImode, op2);
+
+  x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*vecinit_v2si"
+  [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+	(vec_concat:V2SI
+	  (match_operand:SI 1 "gr_reg_or_0_operand" "rO")
+	  (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,unpack4.l %0 = %r1, %r2";
+  else
+    return "%,unpack4.l %0 = %r2, %r1";
+}
+  [(set_attr "itanium_class" "mmshf")])
+
+;; Missing operations
+;; padd.uus
+;; pavg
+;; pavgsub
+;; psad
+;; pshladd
+;; pshradd
+;; psub.uus
+
+;; Floating point vector operations
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_operand" "")
+        (match_operand:V2SF 1 "general_operand" ""))]
+  ""
+{
+  rtx op1 = ia64_expand_move (operands[0], operands[1]);
+  if (!op1)
+    DONE;
+  operands[1] = op1;
+})
+
+(define_insn "*movv2sf_internal"
+  [(set (match_operand:V2SF 0 "destination_operand"
+					"=f,f,f,Q,*r ,*r,*r,*r,m ,f ,*r")
+	(match_operand:V2SF 1 "move_operand"
+					"fU,Y,Q,f,U*r,W ,i ,m ,*r,*r,f "))]
+  "ia64_move_ok (operands[0], operands[1])"
+{
+  static const char * const alt[] = {
+    "%,mov %0 = %F1",
+    "%,fpack %0 = %F2, %F1",
+    "%,ldf8 %0 = %1%P1",
+    "%,stf8 %0 = %1%P0",
+    "%,mov %0 = %r1",
+    "%,addl %0 = %v1, r0",
+    "%,movl %0 = %v1",
+    "%,ld8%O1 %0 = %1%P1",
+    "%,st8%Q0 %0 = %r1%P0",
+    "%,setf.sig %0 = %1",
+    "%,getf.sig %0 = %1"
+  };
+
+  if (which_alternative == 1)
+    {
+      operands[2] = XVECEXP (operands[1], 0, TARGET_BIG_ENDIAN ? 0 : 1);
+      operands[1] = XVECEXP (operands[1], 0, TARGET_BIG_ENDIAN ? 1 : 0);
+    }
+
+  return alt[which_alternative];
+}
+  [(set_attr "itanium_class" "fmisc,fmisc,fld,stf,ialu,ialu,long_i,ld,st,tofr,frfr")])
+
+(define_insn "absv2sf2"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))]
+  ""
+  "fpabs %0 = %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negv2sf2"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))]
+  ""
+  "fpneg %0 = %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*negabsv2sf2"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(neg:V2SF
+	  (abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f"))))]
+  ""
+  "fpnegabs %0 = %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "addv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "")
+	(fma:V2SF (match_operand:V2SF 1 "fr_register_operand" "")
+		  (match_dup 3)
+		  (match_operand:V2SF 2 "fr_register_operand" "")))]
+  ""
+{
+  rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode));
+  operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v));
+})
+
+(define_expand "subv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "")
+	  (match_dup 3)
+	  (neg:V2SF (match_operand:V2SF 2 "fr_register_operand" ""))))]
+  ""
+{
+  rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode));
+  operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v));
+})
+
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
+  ""
+  "fpmpy %0 = %1, %2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmav2sf4"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "f")
+	  (match_operand:V2SF 2 "fr_register_operand" "f")
+	  (match_operand:V2SF 3 "fr_register_operand" "f")))]
+  ""
+  "fpma %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fmsv2sf4"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "f")
+	  (match_operand:V2SF 2 "fr_register_operand" "f")
+	  (neg:V2SF (match_operand:V2SF 3 "fr_register_operand" "f"))))]
+  ""
+  "fpms %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fpnmpy"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(neg:V2SF
+	  (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		     (match_operand:V2SF 2 "fr_register_operand" "f"))))]
+  ""
+  "fpnmpy %0 = %1, %2"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "fnmav2sf4"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(fma:V2SF
+	  (neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f"))
+	  (match_operand:V2SF 2 "fr_register_operand" "f")
+	  (match_operand:V2SF 3 "fr_register_operand" "f")))]
+  ""
+  "fpnma %0 = %1, %2, %3"
+  [(set_attr "itanium_class" "fmac")])
+
+(define_insn "smaxv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(smax:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
+  ""
+  "fpmax %0 = %1, %2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminv2sf3"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(smin:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
+  ""
+  "fpmin %0 = %1, %2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "reduc_splus_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "reduc_smax_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "reduc_smin_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
+  else
+    emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "vcondv2sf"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "")
+	(if_then_else:V2SF
+	  (match_operator 3 "" 
+	    [(match_operand:V2SF 4 "fr_reg_or_0_operand" "")
+	     (match_operand:V2SF 5 "fr_reg_or_0_operand" "")])
+	  (match_operand:V2SF 1 "fr_reg_or_0_operand" "")
+	  (match_operand:V2SF 2 "fr_reg_or_0_operand" "")))]
+  ""
+{
+  rtx x, cmp;
+
+  cmp = gen_reg_rtx (V2SFmode);
+  PUT_MODE (operands[3], V2SFmode);
+  emit_insn (gen_rtx_SET (VOIDmode, cmp, operands[3]));
+
+  x = gen_rtx_IF_THEN_ELSE (V2SFmode, cmp, operands[1], operands[2]);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*fpcmp"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(match_operator:V2SF 3 "comparison_operator"
+	  [(match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	   (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")]))]
+  ""
+  "fpcmp.%D3 %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*fselect"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(if_then_else:V2SF
+	  (match_operand:V2SF 1 "fr_register_operand" "f")
+	  (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")
+	  (match_operand:V2SF 3 "fr_reg_or_0_operand" "fU")))]
+  ""
+  "fselect %0 = %F2, %F3, %1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+{
+  rtx op1 = XVECEXP (operands[1], 0, 0);
+  rtx op2 = XVECEXP (operands[1], 0, 1);
+  rtx x;
+
+  if (GET_CODE (op1) == CONST_DOUBLE && GET_CODE (op2) == CONST_DOUBLE)
+    {
+      x = gen_rtx_CONST_VECTOR (V2SFmode, XVEC (operands[1], 0));
+      emit_move_insn (operands[0], x);
+      DONE;
+    }
+
+  if (!fr_reg_or_fp01_operand (op1, SFmode))
+    op1 = force_reg (SFmode, op1);
+  if (!fr_reg_or_fp01_operand (op2, SFmode))
+    op2 = force_reg (SFmode, op2);
+
+  emit_insn (gen_fpack (operands[0], op1, op2));
+  DONE;
+})
+
+(define_insn "fpack"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_concat:V2SF
+	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+	  (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fpack %0 = %F1, %F2";
+  else
+    return "%,fpack %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "fswap"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 1) (const_int 2)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fswap %0 = %F2, %F1";
+  else
+    return "%,fswap %0 = %F1, %F2";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "vec_interleave_highv2sf"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.l %0 = %F1, %F2";
+  else
+    return "%,fmix.l %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "vec_interleave_lowv2sf"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.r %0 = %F1, %F2";
+  else
+    return "%,fmix.r %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "fmix_lr"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  ""
+{
+  /* Recall that vector elements are numbered in memory order.  */
+  if (TARGET_BIG_ENDIAN)
+    return "%,fmix.lr %0 = %F1, %F2";
+  else
+    return "%,fmix.lr %0 = %F2, %F1";
+}
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vec_extract_evenv2sf"
+  [(match_operand:V2SF 0 "gr_register_operand" "")
+   (match_operand:V2SF 1 "gr_register_operand" "")
+   (match_operand:V2SF 2 "gr_register_operand" "")]
+  ""
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
+					    operands[2]));
+  else
+    emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
+					   operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extract_oddv2sf"
+  [(match_operand:V2SF 0 "gr_register_operand" "")
+   (match_operand:V2SF 1 "gr_register_operand" "")
+   (match_operand:V2SF 2 "gr_register_operand" "")]
+  ""
+{
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
+					   operands[2]));
+  else
+    emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
+					    operands[2]));
+  DONE;
+})
+
+(define_expand "vec_setv2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:SF 1 "fr_register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  ""
+{
+  rtx op0 = operands[0];
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_fmix_lr (op0, tmp, op0));
+      break;
+    case 1:
+      emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv2sf_0_le"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,f,m")
+	(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "rfm,rm,r")
+		    (const_int 0)]
+		   UNSPEC_VECT_EXTR))]
+  "!TARGET_BIG_ENDIAN"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
+    operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
+  else if (MEM_P (operands[1]))
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+  else
+    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
+(define_insn_and_split "*vec_extractv2sf_0_be"
+  [(set (match_operand:SF 0 "register_operand" "=rf,r")
+	(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r")
+		    (const_int 0)]
+		   UNSPEC_VECT_EXTR))]
+  "TARGET_BIG_ENDIAN"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (MEM_P (operands[1]))
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+  else
+    {
+      emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*vec_extractv2sf_1_le"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+		    (const_int 1)]
+		   UNSPEC_VECT_EXTR))]
+  "!TARGET_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+  emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv2sf_1_be"
+  [(set (match_operand:SF 0 "register_operand" "=rf")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+		    (const_int 1)]
+		   UNSPEC_VECT_EXTR))]
+  "TARGET_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
+(define_expand "vec_extractv2sf"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_VECT_EXTR))]
+  ""
+  "")
+
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<vecwider> 0 "register_operand" "")
+   (match_operand:VECINT12   1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_v4hi"
+  [(match_operand:V8QI 0 "gr_register_operand" "")
+   (match_operand:V4HI 1 "gr_register_operand" "")
+   (match_operand:V4HI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_lowpart (V8QImode, operands[1]);
+  rtx op2 = gen_lowpart (V8QImode, operands[2]);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
+  else
+    emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
+  DONE;
+})
+
+(define_expand "vec_pack_trunc_v2si"
+  [(match_operand:V4HI 0 "gr_register_operand" "")
+   (match_operand:V2SI 1 "gr_register_operand" "")
+   (match_operand:V2SI 2 "gr_register_operand" "")]
+  ""
+{
+  rtx op1 = gen_lowpart (V4HImode, operands[1]);
+  rtx op2 = gen_lowpart (V4HImode, operands[2]);
+  if (TARGET_BIG_ENDIAN)
+    emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
+  else
+    emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
+  DONE;
+})
+
+;; Missing operations
+;; fprcpa
+;; fpsqrta
diff --git a/gcc/config/ia64/vms-crtinit.asm b/gcc/config/ia64/vms-crtinit.asm
new file mode 100644
index 000000000..322b29273
--- /dev/null
+++ b/gcc/config/ia64/vms-crtinit.asm
@@ -0,0 +1,24 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+	.global LIB$INITIALIZE#
diff --git a/gcc/config/ia64/vms-unwind.h b/gcc/config/ia64/vms-unwind.h
new file mode 100644
index 000000000..41c76ae76
--- /dev/null
+++ b/gcc/config/ia64/vms-unwind.h
@@ -0,0 +1,307 @@
+/* DWARF2 EH unwinding support for IA64 VMS.
+   Copyright (C) 2005-2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <vms/libicb.h>
+#include <vms/chfdef.h>
+#include <vms/chfctxdef.h>
+
+#define __int64 long long
+#include <vms/intstkdef.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#define DYN$C_SSENTRY 66
+/* ??? would rather get the proper header file.  */
+
+#define MD_FALLBACK_FRAME_STATE_FOR ia64_vms_fallback_frame_state
+
+extern INVO_CONTEXT_BLK * LIB$I64_CREATE_INVO_CONTEXT (void);
+
+extern int LIB$I64_IS_EXC_DISPATCH_FRAME (void *);
+extern int LIB$I64_IS_AST_DISPATCH_FRAME (void *);
+
+extern int LIB$I64_INIT_INVO_CONTEXT (INVO_CONTEXT_BLK *, int, int);
+extern int LIB$I64_GET_CURR_INVO_CONTEXT (INVO_CONTEXT_BLK *);
+extern int LIB$I64_GET_PREV_INVO_CONTEXT (INVO_CONTEXT_BLK *);
+
+typedef unsigned long ulong;
+typedef unsigned int uint;
+typedef unsigned long uw_reg;
+typedef uw_reg * uw_loc;
+
+typedef char fp_reg[16];
+
+#define DENOTES_VMS_DISPATCHER_FRAME(icb) \
+(LIB$I64_IS_EXC_DISPATCH_FRAME (&(icb)->libicb$ih_pc))
+
+#define DENOTES_BOTTOM_OF_STACK(icb) ((icb)->libicb$v_bottom_of_stack)
+
+#define FAIL_IF(COND) \
+   do { if (COND) { context->rp = 0; return _URC_END_OF_STACK; } } while (0)
+/* Clearing context->rp is required to prevent the ia64 gcc unwinder from
+   attempting to keep on walking the call chain.  */
+
+static int
+ia64_vms_fallback_frame_state (struct _Unwind_Context *context,
+			       _Unwind_FrameState *fs)
+{
+  int i, status;
+
+  INVO_CONTEXT_BLK local_icb;
+  INVO_CONTEXT_BLK *icb = &local_icb;
+    
+  CHFCTX * chfctx;
+  CHF$MECH_ARRAY * chfmech;
+  CHF64$SIGNAL_ARRAY *chfsig64;
+  INTSTK * intstk;
+
+  static int eh_debug = -1;
+  int try_bs_copy = 0;
+  /* Non zero to attempt copy of alternate backing store contents for
+     dirty partition in interrupted context. ??? Alpha code, only activated
+     on specific request via specific bit in EH_DEBUG.  */
+
+  if (eh_debug == -1)
+    {
+      char * EH_DEBUG = getenv ("EH_DEBUG");
+      const uint try_bs_copy_mask = (1 << 16);
+
+      eh_debug = EH_DEBUG ? atoi (EH_DEBUG) : 0;
+      
+      /* Fetch and clear the try_bs_copy bit.  */
+      try_bs_copy = (uint)eh_debug & try_bs_copy_mask;
+      eh_debug &= ~try_bs_copy_mask;
+    }
+
+  /* We're called to attempt unwinding through a frame for which no unwind
+     info is available, typical of an operating system exception dispatcher
+     frame.  The code below knows how to handle this case, and only this one,
+     returning a failure code if it finds it is not in this situation.
+
+     Note that we're called from deep down in the exception propagation call
+     chain, possibly below an exception dispatcher but for a frame above it
+     like some os entry point.  */
+
+  if (eh_debug)
+    printf ("FALLBACK - ctxt->rp=0x%lx, sp=0x%lx, psp=0x%lx, bsp=0x%lx\n",
+	    context->rp, context->sp, context->psp, context->bsp);
+
+  /* Step 0 :
+     -------------------------------------------------------------------------
+     VMS-unwind up until we reach a VMS dispatcher frame corresponding to the
+     context we are trying to unwind through. Fail if get past this context or
+     if we reach the bottom of stack along the way.
+     -------------------------------------------------------------------------
+  */
+
+  status = LIB$I64_INIT_INVO_CONTEXT (icb, LIBICB$K_INVO_CONTEXT_VERSION, 0);
+  FAIL_IF (status == 0);
+
+  status = LIB$I64_GET_CURR_INVO_CONTEXT (icb);
+
+  /* Beware: we might be unwinding through nested condition handlers, so the
+     dispatcher frame we seek might not be the first one on the way up.  Loop
+     thus.  */     
+  do {
+    
+    /* Seek the next dispatcher frame up the "current" point.  Stop if we
+       either get past the target context or hit the bottom-of-stack along
+       the way.  */
+    status = LIB$I64_GET_PREV_INVO_CONTEXT (icb);
+    FAIL_IF (status == 0);
+    FAIL_IF ((uw_reg)icb->libicb$ih_sp > (uw_reg)context->psp
+	     || DENOTES_BOTTOM_OF_STACK (icb));
+    
+    if (eh_debug)
+      printf ("frame%s sp @ 0x%llx, pc @ 0x%llx bsp=0x%llx\n",
+	      DENOTES_VMS_DISPATCHER_FRAME (icb) ? " (dispatcher)" : "",
+	      icb->libicb$ih_sp, icb->libicb$ih_pc, icb->libicb$ih_bsp);
+
+    /* Continue until the target frame is found.  */
+  } while ((uw_reg)icb->libicb$ih_bsp != (uw_reg)context->bsp);
+
+  /* If this is not a dispatcher frame, this is certainly a frame for a leaf
+     subprogram.  Use default unwind information.  */
+  if (! DENOTES_VMS_DISPATCHER_FRAME (icb))
+    return _URC_END_OF_STACK;
+
+  /* At this point, we know we are really trying to unwind past an exception
+     dispatcher frame, and have it described in ICB.  Proceed.  */
+
+  /* Step 1 :
+     ------------------------------------------------------------------------
+     We have the VMS dispatcher frame ICB handy and know we are trying to
+     unwind past it.  Fetch pointers to useful datastructures from there, then
+     unwind one step further up to the interrupted user context from which
+     some required values will be easily accessible.
+     ------------------------------------------------------------------------
+  */
+
+  chfctx = icb->libicb$ph_chfctx_addr;
+  FAIL_IF (chfctx == 0);
+  
+  chfmech = (CHF$MECH_ARRAY *)chfctx->chfctx$q_mcharglst;
+  FAIL_IF (chfmech == 0);
+
+  chfsig64 = (CHF64$SIGNAL_ARRAY *)chfmech->chf$ph_mch_sig64_addr;
+  FAIL_IF (chfsig64 == 0);
+ 
+  intstk = (INTSTK *)chfmech->chf$q_mch_esf_addr;
+  FAIL_IF (intstk == 0 || intstk->intstk$b_subtype == DYN$C_SSENTRY);
+
+  status = LIB$I64_GET_PREV_INVO_CONTEXT (icb);
+  FAIL_IF (status == 0);
+
+  if (eh_debug)
+    printf ("User frame, "
+	    "chfmech @ 0x%lx, chfsig64 @ 0x%lx, intstk @ 0x%lx\n",
+	    (ulong)chfmech, (ulong)chfsig64, (ulong)intstk);
+
+  /* Step 2 :
+     ------------------------------------------------------------------------
+     Point the GCC context locations/values required for further unwinding at
+     their corresponding locations/values in the datastructures at hand.
+     ------------------------------------------------------------------------
+  */
+
+  /* Static General Register locations, including scratch registers in case
+     the unwinder needs to refer to a value stored in one of them.  */
+  {
+    uw_reg * ctxregs = (uw_reg *)&intstk->intstk$q_regbase;
+
+    for (i = 2; i <= 3; i++)
+      context->ireg[i - 2].loc = (uw_loc)&ctxregs[i];
+    for (i = 8; i <= 11; i++)
+      context->ireg[i - 2].loc = (uw_loc)&ctxregs[i];
+    for (i = 14; i <= 31; i++)
+      context->ireg[i - 2].loc = (uw_loc)&ctxregs[i];
+  }
+
+  /* Static Floating Point Register locations, as available from the
+     mechargs array, which happens to include all the to be preserved
+     ones + others.  */
+  {
+    fp_reg * ctxregs;
+
+    ctxregs = (fp_reg *)&chfmech->chf$fh_mch_savf2;
+    for (i = 2; i <= 5 ; i++)
+      context->fr_loc[i - 2] = (uw_loc)&ctxregs[i - 2];
+
+    ctxregs = (fp_reg *)&chfmech->chf$fh_mch_savf12;
+    for (i = 12; i <= 31 ; i++)
+      context->fr_loc[i - 2] = (uw_loc)&ctxregs[i - 12];
+  }
+
+  /* Relevant application register locations.  */
+
+  context->fpsr_loc = (uw_loc)&intstk->intstk$q_fpsr;
+  context->lc_loc   = (uw_loc)&intstk->intstk$q_lc;
+  context->unat_loc = (uw_loc)&intstk->intstk$q_unat;
+
+  /* Branch register locations.  */
+  
+  {
+    uw_reg * ctxregs = (uw_reg *)&intstk->intstk$q_b0;
+
+    for (i = 0; i < 8; i++)
+      context->br_loc[i] = (uw_loc)&ctxregs[i];
+  }
+
+  /* Necessary register values.  */
+
+  /* ??? Still unclear if we need to account for possible flushes to an
+     alternate backing store (maybe the unwinding performed above did the
+     trick already) and how this would be handled.  Blind alpha tentative
+     below for experimentation purposes in malfunctioning cases.  */
+  {
+    ulong q_bsp      = (ulong) intstk->intstk$q_bsp;
+    ulong q_bspstore = (ulong) intstk->intstk$q_bspstore;
+    ulong q_bspbase  = (ulong) intstk->intstk$q_bspbase;
+    ulong ih_bspbase = (ulong) icb->libicb$ih_bspbase;
+    
+    if (eh_debug)
+      printf ("q_bspstore = 0x%lx, q_bsp = 0x%lx, q_bspbase = 0x%lx\n"
+	      "ih_bspbase = 0x%lx\n",
+	      q_bspstore, q_bsp, q_bspbase, ih_bspbase);
+
+    /* We witness many situations where q_bspbase is set while ih_bspbase is
+       null, and every attempt made with q_bspbase badly failed while doing
+       nothing resulted in proper behavior.  */
+    if (q_bspstore < q_bsp && ih_bspbase && try_bs_copy)
+      {
+	ulong dirty_size = q_bsp - q_bspstore;
+	ulong q_rnat = (ulong) intstk->intstk$q_rnat;
+
+	if (eh_debug)
+	  printf ("Attempting an alternate backing store copy ...\n");
+
+	ia64_copy_rbs
+	  (context, q_bspstore, ih_bspbase, dirty_size, q_rnat);
+	/* Not clear if these are the proper arguments here.  This is what
+	   looked the closest to what is performed in the Linux case.  */
+      }
+    
+  }
+
+  context->bsp = (uw_reg)intstk->intstk$q_bsp;
+  fs->no_reg_stack_frame = 1;
+
+  context->pr  = (uw_reg)intstk->intstk$q_preds;
+  context->gp  = (uw_reg)intstk->intstk$q_gp;
+
+  /* We're directly setting up the "context" for a VMS exception handler.
+     The "previous SP" for it is the SP upon the handler's entry, that is
+     the SP at the condition/interruption/exception point.  */  
+  context->psp = (uw_reg)icb->libicb$ih_sp;
+
+  /* Previous Frame State location.  What eventually ends up in pfs_loc is
+     installed with ar.pfs = pfs_loc; br.ret; so setup to target intstk->q_ifs
+     to have the interrupted context restored and not that of its caller if
+     we happen to have a handler in the interrupted context itself.  */
+  fs->curr.reg[UNW_REG_PFS].where = UNW_WHERE_PSPREL;
+  fs->curr.reg[UNW_REG_PFS].val
+    = (uw_reg)&intstk->intstk$q_ifs - (uw_reg)context->psp;
+  fs->curr.reg[UNW_REG_PFS].when = -1;
+
+  /* If we need to unwind further up, past the interrupted context, we need to
+     hand out the interrupted context's pfs, still.  */
+  context->signal_pfs_loc = (uw_loc) &intstk->intstk$q_pfs;
+
+  /* Finally, rules for RP .  */
+  {
+    uw_reg * post_sigarray
+      = (uw_reg *)chfsig64 + 1 + chfsig64->chf64$l_sig_args;
+
+    uw_reg * ih_pc_loc = post_sigarray - 2;
+
+    fs->curr.reg[UNW_REG_RP].where = UNW_WHERE_PSPREL;
+    fs->curr.reg[UNW_REG_RP].val
+      = (uw_reg)ih_pc_loc - (uw_reg)context->psp;
+    fs->curr.reg[UNW_REG_RP].when = -1;
+  }
+
+  return _URC_NO_REASON;
+}
+     
diff --git a/gcc/config/ia64/vms.h b/gcc/config/ia64/vms.h
new file mode 100644
index 000000000..1e1a04fd5
--- /dev/null
+++ b/gcc/config/ia64/vms.h
@@ -0,0 +1,211 @@
+/* Definitions of target machine GNU compiler. IA64-VMS version.
+   Copyright (C) 2003-2010 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJECT_SUFFIX ".obj"
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+#define OBJECT_FORMAT_ELF
+
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define_std ("vms");		\
+	builtin_define_std ("VMS");		\
+	builtin_define ("__IA64");		\
+	builtin_assert ("system=vms");		\
+	builtin_define ("__IEEE_FLOAT");	\
+    } while (0)
+
+/* By default, allow $ to be part of an identifier.  */
+#define DOLLARS_IN_IDENTIFIERS 2
+
+#undef TARGET_ABI_OPEN_VMS
+#define TARGET_ABI_OPEN_VMS 1
+
+#undef TARGET_NAME   
+#define TARGET_NAME "OpenVMS/IA64"
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (%s)", TARGET_NAME);           
+
+/* Need .debug_line info generated from gcc and gas.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_DWARF2_ASM | MASK_GNU_AS)
+
+#define VMS_DEBUG_MAIN_POINTER "TRANSFER$BREAK$GO"
+
+/* "long" is 32 bits, but 64 bits for Ada.  */
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+#define ADA_LONG_TYPE_SIZE 64
+
+/* Pointer is 32 bits but the hardware has 64-bit addresses, sign extended.  */
+#undef POINTER_SIZE
+#define POINTER_SIZE 32
+#define POINTERS_EXTEND_UNSIGNED 0
+
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT 524288  /* 8 x 2^16 by DEC Ada Test CD40VRA */
+
+/* Widest floating-point type efficiently supported by hardware and OS.  */
+#undef WIDEST_HARDWARE_FP_SIZE
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* The structure return address arrives as an "argument" on VMS.  */
+#undef PCC_STATIC_STRUCT_RETURN
+
+/* Turn on VMS specific Dwarf2 features.  */
+#define VMS_DEBUGGING_INFO 1
+
+#define ASM_OUTPUT_DWARF_VMS_DELTA(FILE,SIZE,LABEL1,LABEL2) \
+do {                                          \
+  fprintf (FILE, "\tdata4.ua\t@slotcount(");  \
+  assemble_name (FILE, LABEL1);               \
+  fprintf (FILE, "-");                        \
+  assemble_name (FILE, LABEL2);               \
+  fprintf (FILE, ")");                        \
+} while (0)
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{!shared:%{mvms-return-codes:vcrt0.o%s} %{!mvms-return-codes:pcrt0.o%s} \
+    crtbegin.o%s} \
+ %{!static:%{shared:crtinitS.o%s crtbeginS.o%s}}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+"%{!shared:crtend.o%s} %{!static:%{shared:crtendS.o%s}}"
+
+#define LINK_GCC_C_SEQUENCE_SPEC "%G"
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{g*} %{map} %{save-temps} %{shared} %{v}"
+
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+"%{mno-gnu-as:-N so -N vms_upcase -W DVLoc_off} %{mconstant-gp:-M const_gp} \
+ %{mauto-pic:-M no_plabel} %{source-listing:-ahdl=%b.lis}"
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)			\
+do {								\
+  (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function");	\
+} while (0)
+
+/* Set the function to change the names of the division and modulus
+   functions.   */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS ia64_vms_init_libfuncs
+
+#define NAME__MAIN "__gccmain"
+#define SYMBOL__MAIN __gccmain
+
+#define CTOR_LIST_BEGIN asm (".global\tLIB$INITIALIZE#\n");                  \
+STATIC func_ptr __CTOR_LIST__[1]                                             \
+  __attribute__ ((__unused__, section(".ctors"), aligned(sizeof(func_ptr)))) \
+  = { (func_ptr) (-1) };
+
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP ".section\tLIB$INITIALIZE#,\"a\",@progbits"
+
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)      \
+  asm (SECTION_OP "\n\tdata4 @fptr(" #FUNC"#)\n");      \
+  FORCE_CODE_SECTION_ALIGN                            \
+  asm (TEXT_SECTION_ASM_OP);
+
+#undef FINI_SECTION_ASM_OP
+
+/* Maybe same as HPUX?  Needs to be checked.  */
+#define JMP_BUF_SIZE  (8 * 76)
+
+typedef struct crtl_name_spec
+{
+  const char *const name;
+  const char *deccname;
+  int referenced;
+} crtl_name_spec;
+
+#include "config/vms/vms-crtl.h"
+
+/* Alias CRTL names to 32/64bit DECCRTL functions.
+   Fixme: This should do a binary search.  */
+#define DO_CRTL_NAMES                                                      \
+  do                                                                       \
+    {                                                                      \
+      int i;                                                               \
+      static crtl_name_spec vms_crtl_names[] = CRTL_NAMES;                 \
+      static int malloc64_init = 0;                                        \
+                                                                           \
+      if ((malloc64_init == 0) && TARGET_MALLOC64)                         \
+	{                                                                  \
+          for (i=0; vms_crtl_names [i].name; i++)                          \
+            {                                                              \
+	      if (strcmp ("calloc", vms_crtl_names [i].name) == 0)         \
+                vms_crtl_names [i].deccname = "decc$_calloc64";            \
+              else                                                         \
+	      if (strcmp ("malloc", vms_crtl_names [i].name) == 0)         \
+                vms_crtl_names [i].deccname = "decc$_malloc64";            \
+              else                                                         \
+	      if (strcmp ("realloc", vms_crtl_names [i].name) == 0)        \
+                vms_crtl_names [i].deccname = "decc$_realloc64";           \
+              else                                                         \
+	      if (strcmp ("strdup", vms_crtl_names [i].name) == 0)         \
+                vms_crtl_names [i].deccname = "decc$_strdup64";            \
+	    }                                                              \
+            malloc64_init = 1;                                             \
+        }                                                                  \
+      for (i=0; vms_crtl_names [i].name; i++)                              \
+	if (!vms_crtl_names [i].referenced &&                              \
+	    (strcmp (name, vms_crtl_names [i].name) == 0))                 \
+	  {                                                                \
+	    fprintf (file, "\t.alias %s, \"%s\"\n",                        \
+		     name, vms_crtl_names [i].deccname);                   \
+	    vms_crtl_names [i].referenced = 1;                             \
+	  }                                                                \
+    } while (0)
+
+#undef SUBTARGET_OPTIMIZATION_OPTIONS
+#define SUBTARGET_OPTIMIZATION_OPTIONS			\
+  { OPT_LEVELS_ALL, OPT_fmerge_constants, NULL, 0 }
+
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#define MD_UNWIND_SUPPORT "config/ia64/vms-unwind.h"
+
+#define UNW_IVMS_MODE(HEADER) (((HEADER) >> 44) & 0x3L)
+#define MD_UNW_COMPATIBLE_PERSONALITY_P(HEADER) (!UNW_IVMS_MODE (HEADER))
+
+/* Minimum amount of stack required to recover from an anticipated stack
+   overflow detection.  The default value conveys an estimate of the amount
+   of stack required to propagate an exception.  */
+#define STACK_CHECK_PROTECT (24 * 1024)
+
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+  ia64_vms_output_aligned_decl_common (FILE, DECL, NAME, SIZE, ALIGN)
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE ia64_vms_valid_pointer_mode
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION ia64_vms_elf_asm_named_section
diff --git a/gcc/config/ia64/vms.opt b/gcc/config/ia64/vms.opt
new file mode 100644
index 000000000..b4cec1f44
--- /dev/null
+++ b/gcc/config/ia64/vms.opt
@@ -0,0 +1,30 @@
+; IA64 VMS options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+source-listing
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/ia64/vms64.h b/gcc/config/ia64/vms64.h
new file mode 100644
index 000000000..ac1d7a507
--- /dev/null
+++ b/gcc/config/ia64/vms64.h
@@ -0,0 +1,41 @@
+/* Definitions of target machine GNU compiler. 64bit IA64-VMS version.
+   Copyright (C) 2004-2009 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	builtin_define_std ("vms");		\
+	builtin_define_std ("VMS");		\
+	builtin_define ("__IA64");		\
+	builtin_assert ("system=vms");		\
+	builtin_define ("__IEEE_FLOAT");	\
+	builtin_define ("__LONG_POINTERS=1");	\
+    } while (0)
+
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 64
+
+#undef POINTER_SIZE
+#define POINTER_SIZE 64
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_DWARF2_ASM | MASK_GNU_AS | MASK_MALLOC64)
+
+#include "config/vms/vms-crtl-64.h"
diff --git a/gcc/config/ia64/vms_symvec_libgcc_s.opt b/gcc/config/ia64/vms_symvec_libgcc_s.opt
new file mode 100644
index 000000000..88b46dfda
--- /dev/null
+++ b/gcc/config/ia64/vms_symvec_libgcc_s.opt
@@ -0,0 +1,89 @@
+! Symbol vector listing all the universal symbols to be exported when
+! building libgcc_s.exe shareable image on IVMS for Gcc 3.4.5.
+! It would be better to auto-generate this file.
+
+case_sensitive=yes
+SYMBOL_VECTOR=(__divdf3=PROCEDURE)
+SYMBOL_VECTOR=(__divdi3=PROCEDURE)
+SYMBOL_VECTOR=(__divsf3=PROCEDURE)
+SYMBOL_VECTOR=(__divsi3=PROCEDURE)
+SYMBOL_VECTOR=(__divxf3=PROCEDURE)
+SYMBOL_VECTOR=(__moddi3=PROCEDURE)
+SYMBOL_VECTOR=(__modsi3=PROCEDURE)
+SYMBOL_VECTOR=(__ia64_nonlocal_goto=PROCEDURE)
+SYMBOL_VECTOR=(__ia64_restore_stack_nonlocal=PROCEDURE)
+SYMBOL_VECTOR=(__ia64_save_stack_nonlocal=PROCEDURE)
+SYMBOL_VECTOR=(__ia64_trampoline=PROCEDURE)
+SYMBOL_VECTOR=(__udivdi3=PROCEDURE)
+SYMBOL_VECTOR=(__udivsi3=PROCEDURE)
+SYMBOL_VECTOR=(__umoddi3=PROCEDURE)
+SYMBOL_VECTOR=(__umodsi3=PROCEDURE)
+SYMBOL_VECTOR=(__absvti2=PROCEDURE)
+SYMBOL_VECTOR=(__absvdi2=PROCEDURE)
+SYMBOL_VECTOR=(__absvsi2=PROCEDURE)
+SYMBOL_VECTOR=(__addvti3=PROCEDURE)
+SYMBOL_VECTOR=(__addvdi3=PROCEDURE)
+SYMBOL_VECTOR=(__addvsi3=PROCEDURE)
+SYMBOL_VECTOR=(__ashlti3=PROCEDURE)
+SYMBOL_VECTOR=(__ashrti3=PROCEDURE)
+SYMBOL_VECTOR=(__clear_cache=PROCEDURE)
+SYMBOL_VECTOR=(__clzti2=PROCEDURE)
+SYMBOL_VECTOR=(__clzdi2=PROCEDURE)
+SYMBOL_VECTOR=(__cmpti2=PROCEDURE)
+SYMBOL_VECTOR=(__ctzti2=PROCEDURE)
+SYMBOL_VECTOR=(__ctzdi2=PROCEDURE)
+SYMBOL_VECTOR=(__divti3=PROCEDURE)
+SYMBOL_VECTOR=(__enable_execute_stack=PROCEDURE)
+SYMBOL_VECTOR=(__ffsti2=PROCEDURE)
+SYMBOL_VECTOR=(__ffsdi2=PROCEDURE)
+SYMBOL_VECTOR=(__fixdfti=PROCEDURE)
+SYMBOL_VECTOR=(__fixsfti=PROCEDURE)
+SYMBOL_VECTOR=(__fixunsdfti=PROCEDURE)
+SYMBOL_VECTOR=(__fixunsdfdi=PROCEDURE)
+SYMBOL_VECTOR=(__fixunssfti=PROCEDURE)
+SYMBOL_VECTOR=(__fixunssfdi=PROCEDURE)
+SYMBOL_VECTOR=(__floattidf=PROCEDURE)
+SYMBOL_VECTOR=(__floattisf=PROCEDURE)
+SYMBOL_VECTOR=(__lshrti3=PROCEDURE)
+SYMBOL_VECTOR=(__modti3=PROCEDURE)
+SYMBOL_VECTOR=(__multi3=PROCEDURE)
+SYMBOL_VECTOR=(__mulvti3=PROCEDURE)
+SYMBOL_VECTOR=(__mulvdi3=PROCEDURE)
+SYMBOL_VECTOR=(__mulvsi3=PROCEDURE)
+SYMBOL_VECTOR=(__negti2=PROCEDURE)
+SYMBOL_VECTOR=(__negvti2=PROCEDURE)
+SYMBOL_VECTOR=(__negvdi2=PROCEDURE)
+SYMBOL_VECTOR=(__negvsi2=PROCEDURE)
+SYMBOL_VECTOR=(__parityti2=PROCEDURE)
+SYMBOL_VECTOR=(__paritydi2=PROCEDURE)
+SYMBOL_VECTOR=(__popcountti2=PROCEDURE)
+SYMBOL_VECTOR=(__popcountdi2=PROCEDURE)
+SYMBOL_VECTOR=(__subvti3=PROCEDURE)
+SYMBOL_VECTOR=(__subvdi3=PROCEDURE)
+SYMBOL_VECTOR=(__subvsi3=PROCEDURE)
+SYMBOL_VECTOR=(__ucmpti2=PROCEDURE)
+SYMBOL_VECTOR=(__udiv_w_sdiv=PROCEDURE)
+SYMBOL_VECTOR=(__udivti3=PROCEDURE)
+SYMBOL_VECTOR=(__udivmodti4=PROCEDURE)
+SYMBOL_VECTOR=(__umodti3=PROCEDURE)
+SYMBOL_VECTOR=(__gthread_active_p=PROCEDURE)
+SYMBOL_VECTOR=(__gthread_mutex_lock=PROCEDURE)
+SYMBOL_VECTOR=(__gthread_mutex_unlock=PROCEDURE)
+SYMBOL_VECTOR=(__gcc_personality_v0=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetGR=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_SetGR=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetIP=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetIPInfo=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_SetIP=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetLanguageSpecificData=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetRegionStart=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_FindEnclosingFunction=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetCFA=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_GetBSP=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_RaiseException=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_ForcedUnwind=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_Resume=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_Resume_or_Rethrow=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_DeleteException=PROCEDURE)
+SYMBOL_VECTOR=(_Unwind_Backtrace=PROCEDURE)
+case_sensitive=NO
diff --git a/gcc/config/interix.h b/gcc/config/interix.h
new file mode 100644
index 000000000..e9d1cc21f
--- /dev/null
+++ b/gcc/config/interix.h
@@ -0,0 +1,110 @@
+/* Operating system specific defines to be used when targeting GCC for
+   Interix
+   Copyright (C) 1994, 1995, 1999, 2002, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+   Donn Terry, Softway Systems, Inc. (donn@softway.com)
+   Modified from code
+      Contributed by Douglas B. Rupp (drupp@cs.washington.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* POSIX/Uni-thread only for now.  Look at the winnt version
+for windows/multi thread */
+
+/* We need multiple -lc -lcpsx because they mutually refer;
+   that should go away someday */
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+ %{!shared:%{!dynamic:-lc -lcpsx -lc -lcpsx %$INTERIX_ROOT/usr/lib/psxdll.a \
+ %$INTERIX_ROOT/usr/lib/psxdll2.a \
+ }} \
+ %{!G:%{!dynamic:-lc -lcpsx -lc -lcpsx %$INTERIX_ROOT/usr/lib/psxdll.a \
+ %$INTERIX_ROOT/usr/lib/psxdll2.a \
+ }} \
+ %{dynamic:-lc %$INTERIX_ROOT/usr/lib/psxdll.a \
+ %$INTERIX_ROOT/usr/lib/psxdll2.a \
+ } \
+ %{v}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{!shared:-stack 0x400000,0x10000} \
+ 		   -subsystem posix \
+		   %{g} \
+		   %{dynamic:-Bdynamic} \
+		   %{static:-Bstatic} \
+		   %{shared:--shared -Bdynamic} \
+		   %{G:--shared -Bdynamic} \
+		   %{symbolic:--shared -Bsymbolic -Bdynamic} \
+   		   %{rpath*:--rpath %*} \
+		   "
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC  \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}} %{shared:crti%O%s}"
+
+
+#define STDC_0_IN_SYSTEM_HEADERS 1
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+
+/* TARGET_DEFAULT from configure */
+
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Our strategy for finding global constructors is a bit different, although
+   not a lot.  */
+#define DO_GLOBAL_CTORS_BODY						\
+do {									\
+  int i;								\
+  unsigned long nptrs;							\
+  func_ptr *p;								\
+  asm(									\
+       "     .section .ctor_head, \"rw\"\n"				\
+       "1:\n"								\
+       "     .text \n"							\
+       ASM_LOAD_ADDR(1b,%0)						\
+       : "=r" (p) : : "cc");						\
+  for (nptrs = 0; p[nptrs] != 0; nptrs++);				\
+  for (i = nptrs-1; i >= 0; i--)					\
+    p[i] ();								\
+} while (0) 
+
+#define DO_GLOBAL_DTORS_BODY						\
+do {									\
+  func_ptr *p;								\
+  asm(									\
+       "     .section .dtor_head, \"rw\"\n"				\
+       "1:\n"								\
+       "     .text \n"							\
+       ASM_LOAD_ADDR(1b,%0)						\
+       : "=r" (p) : : "cc");						\
+  while (*p)								\
+    {									\
+      p++;								\
+      (*(p-1)) ();							\
+    }									\
+} while (0) 
diff --git a/gcc/config/interix.opt b/gcc/config/interix.opt
new file mode 100644
index 000000000..f50329965
--- /dev/null
+++ b/gcc/config/interix.opt
@@ -0,0 +1,36 @@
+; Interix options.
+
+; Copyright (C) 2010, 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+dynamic
+Driver
+
+G
+Driver
+
+posix
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/interix3.h b/gcc/config/interix3.h
new file mode 100644
index 000000000..370fd8af4
--- /dev/null
+++ b/gcc/config/interix3.h
@@ -0,0 +1,34 @@
+/* Operating system specific defines to be used when targeting GCC for
+   Interix version 3.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   Contributed by Douglas B. Rupp (rupp@gnat.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Interix 3.x has a single rooted file system and properly named
+   libraries, so LIB_SPEC can be simplified */
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+ %{!shared:%{!dynamic:-lc -lpsxdll \
+ }} \
+ %{!G:%{!dynamic:-lc -lpsxdll \
+ }} \
+ %{dynamic:-lc -lpsxdll \
+ } \
+ %{v}"
+
diff --git a/gcc/config/iq2000/abi b/gcc/config/iq2000/abi
new file mode 100644
index 000000000..e4e62c097
--- /dev/null
+++ b/gcc/config/iq2000/abi
@@ -0,0 +1,239 @@
+			IQ2000 ABI
+			=========
+
+Sizes and alignments
+--------------------
+
+	Type		Size (bytes)	Alignment (bytes)
+
+	char		1		1
+	short		2		2
+	int		4		4
+	unsigned	4		4
+	long		4		4 
+	long long	8		8
+	float		4		4
+	double		8		8
+	pointers	4		4 
+
+* alignment within aggregates (structs and unions) is as above, with
+  padding added if needed
+* aggregates have alignment equal to that of their most aligned
+  member
+* aggregates have sizes which are a multiple of their alignment
+
+
+Floating point
+--------------
+
+All emulated using IEEE floating point conventions.
+
+Registers
+----------------
+
+%0		always zero
+%1		call clobbered
+%2		return value
+%3		return value
+%4		argument register 1
+%5		argument register 2
+%6		argument register 3
+%7		argument register 4
+%8		argument register 5
+%9		argument register 6
+%10		argument register 7
+%11		argument register 8
+%12		call clobbered
+%13		call clobbered
+%14		call clobbered
+%15		call clobbered
+%16		call saved
+%17		call saved
+%18		call saved
+%19		call saved
+%20		call saved
+%21		call saved
+%22		call saved
+%23		call saved
+%24		call clobbered
+%25		call clobbered
+%26		reserved
+%27		frame ptr
+%28		global ptr
+%29		stack ptr
+%30		reserved
+%31 		return address
+
+Stack alignment		8 bytes
+
+Structures passed	<= 32 bits as values, else as pointers
+
+The IQ2000 Stack
+---------------
+
+Space is allocated as needed in the stack frame for the following at compile
+time:
+
+* Outgoing parameters beyond the eighth
+
+* All automatic arrays, automatic data aggregates, automatic
+  scalars which must be addressable, and automatic scalars for
+  which there is no room in registers 
+
+* Compiler-generated temporary values (typically when there are
+  too many for the compiler to keep them all in registers) 
+
+Space can be allocated dynamically (at runtime) in the stack frame for the
+following:
+
+* Memory allocated using the alloca() function of the C library
+
+Addressable automatic variables on the stack are addressed with positive
+offsets relative to %27; dynamically allocated space is addressed with positive
+offsets from the pointer returned by alloca().
+
+Stack Frame
+-----------
+
+        +-----------------------+
+	|    Caller memory args |
+        +-----------------------+ <-sp
+ 	|    Return address	|
+	+-----------------------+
+	|    Previous FP	|
+	+-----------------------+
+	|    Saved Registers	|
+	+-----------------------+
+	|        ...		|
+	+-----------------------+
+	|    Local Variables	|
+	+-----------------------+ <-fp
+	|    Alloca		|
+	+-----------------------+
+	|        ...		|
+	+-----------------------+
+	|   Parameter Word 2	|
+	+-----------------------+
+	|   Parameter Word 1	|
+	+-----------------------+ <-sp
+
+
+Parameter Assignment to Registers
+---------------------------------
+
+Consider the parameters in a function call as ordered from left (first
+parameter) to right.  GR contains the number of the next available
+general-purpose register.  STARG is the address of the next available stack
+parameter word.
+
+INITIALIZE:
+	Set GR=r4 and STARG to point to parameter word 1.
+
+SCAN:
+	If there are no more parameters, terminate.
+	Otherwise, select one of the following depending on the type
+	of the next parameter:
+
+    SIMPLE ARG:
+
+	A SIMPLE ARG is one of the following:
+
+	* One of the simple integer types which will fit into a
+	  general-purpose register,
+	* A pointer to an object of any type,
+	* A struct or union small enough to fit in a register (<= 32 bits)
+	* A larger struct or union, which shall be treated as a
+	  pointer to the object or to a copy of the object.
+	  (See below for when copies are made.)
+
+	If GR > r11, go to STACK.  Otherwise, load the parameter value into
+	general-purpose register GR and advance GR to the next general-purpose
+	register.  Values shorter than the register size are sign-extended or
+	zero-extended depending on whether they are signed or unsigned.  Then
+	go to SCAN.
+
+    DOUBLE or LONG LONG
+
+	If GR > r10, go to STACK.  Otherwise, if GR is odd, advance GR to the
+	next register.  Load the 64-bit long long or double value into register
+	pair GR and GR+1.  Advance GR to GR+2 and go to SCAN.
+
+    STACK:
+
+	Parameters not otherwise handled above are passed in the parameter
+	words of the caller's stack frame.  SIMPLE ARGs, as defined above, are
+	considered to have size and alignment equal to the size of a
+	general-purpose register, with simple argument types shorter than this
+	sign- or zero-extended to this width.  Round STARG up to a multiple of
+	the alignment requirement of the parameter and copy the argument
+	byte-for-byte into STARG, STARG+1, ...  STARG+size-1.  Set STARG to
+	STARG+size and go to SCAN.
+
+
+Structure passing
+-----------------
+
+As noted above, code which passes structures and unions by value is implemented
+specially.  (In this section, "struct" will refer to structs and unions
+inclusively.)  Structs small enough to fit in a register are passed by value in
+a single register or in a stack frame slot the size of a register.  Structs
+containing a single double or long long component are passed by value in two
+registers or in a stack frame slot the size of two registers.  Other structs
+are handled by passing the address of the structure.  In this case, a copy of
+the structure will be made if necessary in order to preserve the pass-by-value
+semantics.
+
+Copies of large structs are made under the following rules:
+
+			ANSI mode			K&R Mode
+			---------			--------
+Normal param	 	Callee copies if needed		Caller copies
+Varargs (...) param	Caller copies			Caller copies
+
+In the case of normal (non-varargs) large-struct parameters in ANSI mode, the
+callee is responsible for producing the same effect as if a copy of the
+structure were passed, preserving the pass-by-value semantics.  This may be
+accomplished by having the callee make a copy, but in some cases the callee may
+be able to determine that a copy is not necessary in order to produce the same
+results.  In such cases, the callee may choose to avoid making a copy of the
+parameter.
+
+
+Varargs handling
+----------------
+
+No special changes are needed for handling varargs parameters other than the
+caller knowing that a copy is needed on struct parameters larger than a
+register (see above).
+
+The varargs macros set up a register save area for the general-purpose
+registers to be saved.  Because the save area lies between the caller and
+callee stack frames, the saved register parameters are contiguous with
+parameters passed on the stack.  A pointer advances from the register save area
+into the caller's stack frame.
+
+
+Function return values
+----------------------
+
+	Type		Register
+	----		--------
+	int		r2
+	short		r2
+	long		r2
+	long long	r2-r3
+	float		r2
+	double		r2-r3
+	struct/union	see below
+
+Structs/unions which will fit into two general-purpose registers are returned
+in r2, or in r2-r3 if necessary.  Larger structs/unions are handled by the
+caller passing as a "hidden" first argument a pointer to space allocated to
+receive the return value.
+
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc/config/iq2000/constraints.md b/gcc/config/iq2000/constraints.md
new file mode 100644
index 000000000..8850592d7
--- /dev/null
+++ b/gcc/config/iq2000/constraints.md
@@ -0,0 +1,79 @@
+;; Constraints for Vitesse IQ2000 processors
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "b" "ALL_REGS"
+  "@internal")
+
+(define_register_constraint "d" "GR_REGS"
+  "@internal")
+
+(define_register_constraint "y" "GR_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "A 16-bit signed integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "J"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "A 16-bit unsigned integer"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "L"
+  "A 32-bit constant whose bottom 16 bits are zero."
+  (and (match_code "const_int")
+      (ior (match_test "(ival | 0x7fff0000) == 0x7fff0000")
+	   (match_test "(ival | 0x7fff0000) + 0x10000 == 0"))))
+
+(define_constraint "M"
+  "Any constant not matched by 'I', 'K', or 'L'."
+  (and (match_code "const_int")
+       (match_test "!insn_const_int_ok_for_constraint (ival, CONSTRAINT_I)")
+       (match_test "!insn_const_int_ok_for_constraint (ival, CONSTRAINT_K)")
+       (match_test "!insn_const_int_ok_for_constraint (ival, CONSTRAINT_L)")))
+
+(define_constraint "N"
+  "Any constant whose lower or upper 16 bits are 0xffff."
+  (and (match_code "const_int")
+       (ior (match_test "(ival & 0xffff) == 0xffff")
+	    (match_test "(ival & 0xffff0000) == 0xffff0000"))))
+
+(define_constraint "O"
+  "A 5-bit signed integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -31, 31)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Extra constraints.
+(define_constraint "R"
+  "A memory reference which takes one word for the instruction."
+  (match_test "simple_memory_operand (op, mode)"))
diff --git a/gcc/config/iq2000/iq2000-protos.h b/gcc/config/iq2000/iq2000-protos.h
new file mode 100644
index 000000000..067e80747
--- /dev/null
+++ b/gcc/config/iq2000/iq2000-protos.h
@@ -0,0 +1,48 @@
+/* Definitions of target machine for GNU compiler for iq2000.
+   Copyright (C) 2003, 2004, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_IQ2000_PROTOS_H
+#define GCC_IQ2000_PROTOS_H
+
+extern int              iq2000_check_split (rtx, enum machine_mode);
+extern int              iq2000_reg_mode_ok_for_base_p (rtx, enum machine_mode, int);
+extern const char *     iq2000_fill_delay_slot (const char *, enum delay_type, rtx *, rtx);
+extern const char *     iq2000_move_1word (rtx *, rtx, int);
+extern HOST_WIDE_INT    iq2000_debugger_offset (rtx, HOST_WIDE_INT);
+extern void             final_prescan_insn (rtx, rtx *, int);
+extern HOST_WIDE_INT    compute_frame_size (HOST_WIDE_INT);
+extern int              iq2000_initial_elimination_offset (int, int);
+extern void             iq2000_expand_prologue (void);
+extern void             iq2000_expand_epilogue (void);
+extern void             iq2000_expand_eh_return (rtx);
+extern int              iq2000_can_use_return_insn (void);
+extern int              iq2000_adjust_insn_length (rtx, int);
+extern char *           iq2000_output_conditional_branch (rtx, rtx *, int, int, int, int);
+
+#ifdef RTX_CODE
+extern rtx              gen_int_relational (enum rtx_code, rtx, rtx, rtx, int *);
+extern void             gen_conditional_branch (rtx *, enum machine_mode);
+#endif
+
+#ifdef TREE_CODE
+extern void             init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
+extern bool 		iq2000_function_value_regno_p (const unsigned int);
+#endif
+
+#endif /* ! GCC_IQ2000_PROTOS_H */
diff --git a/gcc/config/iq2000/iq2000.c b/gcc/config/iq2000/iq2000.c
new file mode 100644
index 000000000..b838fecb9
--- /dev/null
+++ b/gcc/config/iq2000/iq2000.c
@@ -0,0 +1,3523 @@
+/* Subroutines used for code generation on Vitesse IQ2000 processors
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "reload.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+
+/* Enumeration for all of the relational tests, so that we can build
+   arrays indexed by the test type, and not worry about the order
+   of EQ, NE, etc.  */
+
+enum internal_test
+  {
+    ITEST_EQ,
+    ITEST_NE,
+    ITEST_GT,
+    ITEST_GE,
+    ITEST_LT,
+    ITEST_LE,
+    ITEST_GTU,
+    ITEST_GEU,
+    ITEST_LTU,
+    ITEST_LEU,
+    ITEST_MAX
+  };
+
+struct constant;
+
+
+/* Structure to be filled in by compute_frame_size with register
+   save masks, and offsets for the current function.  */
+
+struct iq2000_frame_info
+{
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  long extra_size;		/* # bytes of extra gunk.  */
+  int  gp_reg_size;		/* # bytes needed to store gp regs.  */
+  int  fp_reg_size;		/* # bytes needed to store fp regs.  */
+  long mask;			/* Mask of saved gp registers.  */
+  long gp_save_offset;		/* Offset from vfp to store gp registers.  */
+  long fp_save_offset;		/* Offset from vfp to store fp registers.  */
+  long gp_sp_offset;		/* Offset from new sp to store gp registers.  */
+  long fp_sp_offset;		/* Offset from new sp to store fp registers.  */
+  int  initialized;		/* != 0 if frame size already calculated.  */
+  int  num_gp;			/* Number of gp registers saved.  */
+} iq2000_frame_info;
+
+struct GTY(()) machine_function
+{
+  /* Current frame information, calculated by compute_frame_size.  */
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  long extra_size;		/* # bytes of extra gunk.  */
+  int  gp_reg_size;		/* # bytes needed to store gp regs.  */
+  int  fp_reg_size;		/* # bytes needed to store fp regs.  */
+  long mask;			/* Mask of saved gp registers.  */
+  long gp_save_offset;		/* Offset from vfp to store gp registers.  */
+  long fp_save_offset;		/* Offset from vfp to store fp registers.  */
+  long gp_sp_offset;		/* Offset from new sp to store gp registers.  */
+  long fp_sp_offset;		/* Offset from new sp to store fp registers.  */
+  int  initialized;		/* != 0 if frame size already calculated.  */
+  int  num_gp;			/* Number of gp registers saved.  */
+};
+
+/* Global variables for machine-dependent things.  */
+
+/* List of all IQ2000 punctuation characters used by iq2000_print_operand.  */
+static char iq2000_print_operand_punct[256];
+
+/* The target cpu for optimization and scheduling.  */
+enum processor_type iq2000_tune;
+
+/* Which instruction set architecture to use.  */
+int iq2000_isa;
+
+/* Local variables.  */
+
+/* The next branch instruction is a branch likely, not branch normal.  */
+static int iq2000_branch_likely;
+
+/* Count of delay slots and how many are filled.  */
+static int dslots_load_total;
+static int dslots_load_filled;
+static int dslots_jump_total;
+
+/* # of nops needed by previous insn.  */
+static int dslots_number_nops;
+
+/* Number of 1/2/3 word references to data items (i.e., not jal's).  */
+static int num_refs[3];
+
+/* Registers to check for load delay.  */
+static rtx iq2000_load_reg;
+static rtx iq2000_load_reg2;
+static rtx iq2000_load_reg3;
+static rtx iq2000_load_reg4;
+
+/* Mode used for saving/restoring general purpose registers.  */
+static enum machine_mode gpr_mode;
+
+
+/* Initialize the GCC target structure.  */
+static struct machine_function* iq2000_init_machine_status (void);
+static bool iq2000_handle_option      (size_t, const char *, int);
+static void iq2000_option_override    (void);
+static section *iq2000_select_rtx_section (enum machine_mode, rtx,
+					   unsigned HOST_WIDE_INT);
+static void iq2000_init_builtins      (void);
+static rtx  iq2000_expand_builtin     (tree, rtx, rtx, enum machine_mode, int);
+static bool iq2000_return_in_memory   (const_tree, const_tree);
+static void iq2000_setup_incoming_varargs (CUMULATIVE_ARGS *,
+					   enum machine_mode, tree, int *,
+					   int);
+static bool iq2000_rtx_costs          (rtx, int, int, int *, bool);
+static int  iq2000_address_cost       (rtx, bool);
+static section *iq2000_select_section (tree, int, unsigned HOST_WIDE_INT);
+static rtx  iq2000_legitimize_address (rtx, rtx, enum machine_mode);
+static bool iq2000_pass_by_reference  (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static int  iq2000_arg_partial_bytes  (CUMULATIVE_ARGS *, enum machine_mode,
+				       tree, bool);
+static rtx iq2000_function_arg	      (CUMULATIVE_ARGS *,
+				       enum machine_mode, const_tree, bool);
+static void iq2000_function_arg_advance (CUMULATIVE_ARGS *,
+					 enum machine_mode, const_tree, bool);
+static unsigned int iq2000_function_arg_boundary (enum machine_mode,
+						  const_tree);
+static void iq2000_va_start	      (tree, rtx);
+static bool iq2000_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool iq2000_can_eliminate      (const int, const int);
+static void iq2000_asm_trampoline_template (FILE *);
+static void iq2000_trampoline_init    (rtx, tree, rtx);
+static rtx iq2000_function_value      (const_tree, const_tree, bool);
+static rtx iq2000_libcall_value       (enum machine_mode, const_rtx);
+static void iq2000_print_operand      (FILE *, rtx, int);
+static void iq2000_print_operand_address (FILE *, rtx);
+static bool iq2000_print_operand_punct_valid_p (unsigned char code);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options iq2000_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS 		iq2000_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN 		iq2000_expand_builtin
+#undef  TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION	iq2000_select_rtx_section
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION		iq2000_handle_option
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		iq2000_option_override
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE iq2000_option_optimization_table
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS		iq2000_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST		iq2000_address_cost
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION	iq2000_select_section
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS	iq2000_legitimize_address
+
+/* The assembler supports switchable .bss sections, but
+   iq2000_select_section doesn't yet make use of them.  */
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS false
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND		iq2000_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS	iq2000_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P iq2000_print_operand_punct_valid_p
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE	default_promote_function_mode_always_promote
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE 		iq2000_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE		iq2000_libcall_value
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		iq2000_return_in_memory
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE	iq2000_pass_by_reference
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES		hook_callee_copies_named
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES	iq2000_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		iq2000_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	iq2000_function_arg_advance
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY	iq2000_function_arg_boundary
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS	iq2000_setup_incoming_varargs
+#undef  TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING	hook_bool_CUMULATIVE_ARGS_true
+
+#undef	TARGET_EXPAND_BUILTIN_VA_START
+#define	TARGET_EXPAND_BUILTIN_VA_START	iq2000_va_start
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	iq2000_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE            iq2000_can_eliminate
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	iq2000_asm_trampoline_template
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		iq2000_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Return nonzero if we split the address into high and low parts.  */
+
+int
+iq2000_check_split (rtx address, enum machine_mode mode)
+{
+  /* This is the same check used in simple_memory_operand.
+     We use it here because LO_SUM is not offsettable.  */
+  if (GET_MODE_SIZE (mode) > (unsigned) UNITS_PER_WORD)
+    return 0;
+
+  if ((GET_CODE (address) == SYMBOL_REF)
+      || (GET_CODE (address) == CONST
+	  && GET_CODE (XEXP (XEXP (address, 0), 0)) == SYMBOL_REF)
+      || GET_CODE (address) == LABEL_REF)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if REG is valid for MODE.  */
+
+int
+iq2000_reg_mode_ok_for_base_p (rtx reg,
+			       enum machine_mode mode ATTRIBUTE_UNUSED,
+			       int strict)
+{
+  return (strict
+	  ? REGNO_MODE_OK_FOR_BASE_P (REGNO (reg), mode)
+	  : GP_REG_OR_PSEUDO_NONSTRICT_P (REGNO (reg), mode));
+}
+
+/* Return a nonzero value if XINSN is a legitimate address for a
+   memory operand of the indicated MODE.  STRICT is nonzero if this
+   function is called during reload.  */
+
+bool
+iq2000_legitimate_address_p (enum machine_mode mode, rtx xinsn, bool strict)
+{
+  if (TARGET_DEBUG_A_MODE)
+    {
+      GO_PRINTF2 ("\n========== legitimate_address_p, %sstrict\n",
+		  strict ? "" : "not ");
+      GO_DEBUG_RTX (xinsn);
+    }
+
+  /* Check for constant before stripping off SUBREG, so that we don't
+     accept (subreg (const_int)) which will fail to reload.  */
+  if (CONSTANT_ADDRESS_P (xinsn)
+      && ! (iq2000_check_split (xinsn, mode))
+      && ! (GET_CODE (xinsn) == CONST_INT && ! SMALL_INT (xinsn)))
+    return 1;
+
+  while (GET_CODE (xinsn) == SUBREG)
+    xinsn = SUBREG_REG (xinsn);
+
+  if (GET_CODE (xinsn) == REG
+      && iq2000_reg_mode_ok_for_base_p (xinsn, mode, strict))
+    return 1;
+
+  if (GET_CODE (xinsn) == LO_SUM)
+    {
+      rtx xlow0 = XEXP (xinsn, 0);
+      rtx xlow1 = XEXP (xinsn, 1);
+
+      while (GET_CODE (xlow0) == SUBREG)
+	xlow0 = SUBREG_REG (xlow0);
+      if (GET_CODE (xlow0) == REG
+	  && iq2000_reg_mode_ok_for_base_p (xlow0, mode, strict)
+	  && iq2000_check_split (xlow1, mode))
+	return 1;
+    }
+
+  if (GET_CODE (xinsn) == PLUS)
+    {
+      rtx xplus0 = XEXP (xinsn, 0);
+      rtx xplus1 = XEXP (xinsn, 1);
+      enum rtx_code code0;
+      enum rtx_code code1;
+
+      while (GET_CODE (xplus0) == SUBREG)
+	xplus0 = SUBREG_REG (xplus0);
+      code0 = GET_CODE (xplus0);
+
+      while (GET_CODE (xplus1) == SUBREG)
+	xplus1 = SUBREG_REG (xplus1);
+      code1 = GET_CODE (xplus1);
+
+      if (code0 == REG
+	  && iq2000_reg_mode_ok_for_base_p (xplus0, mode, strict))
+	{
+	  if (code1 == CONST_INT && SMALL_INT (xplus1)
+	      && SMALL_INT_UNSIGNED (xplus1) /* No negative offsets */)
+	    return 1;
+	}
+    }
+
+  if (TARGET_DEBUG_A_MODE)
+    GO_PRINTF ("Not a enum machine_mode mode, legitimate address\n");
+
+  /* The address was not legitimate.  */
+  return 0;
+}
+
+/* Returns an operand string for the given instruction's delay slot,
+   after updating filled delay slot statistics.
+
+   We assume that operands[0] is the target register that is set.
+
+   In order to check the next insn, most of this functionality is moved
+   to FINAL_PRESCAN_INSN, and we just set the global variables that
+   it needs.  */
+
+const char *
+iq2000_fill_delay_slot (const char *ret, enum delay_type type, rtx operands[],
+			rtx cur_insn)
+{
+  rtx set_reg;
+  enum machine_mode mode;
+  rtx next_insn = cur_insn ? NEXT_INSN (cur_insn) : NULL_RTX;
+  int num_nops;
+
+  if (type == DELAY_LOAD || type == DELAY_FCMP)
+    num_nops = 1;
+
+  else
+    num_nops = 0;
+
+  /* Make sure that we don't put nop's after labels.  */
+  next_insn = NEXT_INSN (cur_insn);
+  while (next_insn != 0
+	 && (GET_CODE (next_insn) == NOTE
+	     || GET_CODE (next_insn) == CODE_LABEL))
+    next_insn = NEXT_INSN (next_insn);
+
+  dslots_load_total += num_nops;
+  if (TARGET_DEBUG_C_MODE
+      || type == DELAY_NONE
+      || operands == 0
+      || cur_insn == 0
+      || next_insn == 0
+      || GET_CODE (next_insn) == CODE_LABEL
+      || (set_reg = operands[0]) == 0)
+    {
+      dslots_number_nops = 0;
+      iq2000_load_reg  = 0;
+      iq2000_load_reg2 = 0;
+      iq2000_load_reg3 = 0;
+      iq2000_load_reg4 = 0;
+
+      return ret;
+    }
+
+  set_reg = operands[0];
+  if (set_reg == 0)
+    return ret;
+
+  while (GET_CODE (set_reg) == SUBREG)
+    set_reg = SUBREG_REG (set_reg);
+
+  mode = GET_MODE (set_reg);
+  dslots_number_nops = num_nops;
+  iq2000_load_reg = set_reg;
+  if (GET_MODE_SIZE (mode)
+      > (unsigned) (UNITS_PER_WORD))
+    iq2000_load_reg2 = gen_rtx_REG (SImode, REGNO (set_reg) + 1);
+  else
+    iq2000_load_reg2 = 0;
+
+  return ret;
+}
+
+/* Determine whether a memory reference takes one (based off of the GP
+   pointer), two (normal), or three (label + reg) instructions, and bump the
+   appropriate counter for -mstats.  */
+
+static void
+iq2000_count_memory_refs (rtx op, int num)
+{
+  int additional = 0;
+  int n_words = 0;
+  rtx addr, plus0, plus1;
+  enum rtx_code code0, code1;
+  int looping;
+
+  if (TARGET_DEBUG_B_MODE)
+    {
+      fprintf (stderr, "\n========== iq2000_count_memory_refs:\n");
+      debug_rtx (op);
+    }
+
+  /* Skip MEM if passed, otherwise handle movsi of address.  */
+  addr = (GET_CODE (op) != MEM) ? op : XEXP (op, 0);
+
+  /* Loop, going through the address RTL.  */
+  do
+    {
+      looping = FALSE;
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	case CONST_INT:
+	case LO_SUM:
+	  break;
+
+	case PLUS:
+	  plus0 = XEXP (addr, 0);
+	  plus1 = XEXP (addr, 1);
+	  code0 = GET_CODE (plus0);
+	  code1 = GET_CODE (plus1);
+
+	  if (code0 == REG)
+	    {
+	      additional++;
+	      addr = plus1;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code0 == CONST_INT)
+	    {
+	      addr = plus1;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code1 == REG)
+	    {
+	      additional++;
+	      addr = plus0;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code1 == CONST_INT)
+	    {
+	      addr = plus0;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code0 == SYMBOL_REF || code0 == LABEL_REF || code0 == CONST)
+	    {
+	      addr = plus0;
+	      looping = 1;
+	      continue;
+	    }
+
+	  if (code1 == SYMBOL_REF || code1 == LABEL_REF || code1 == CONST)
+	    {
+	      addr = plus1;
+	      looping = 1;
+	      continue;
+	    }
+
+	  break;
+
+	case LABEL_REF:
+	  n_words = 2;		/* Always 2 words.  */
+	  break;
+
+	case CONST:
+	  addr = XEXP (addr, 0);
+	  looping = 1;
+	  continue;
+
+	case SYMBOL_REF:
+	  n_words = SYMBOL_REF_FLAG (addr) ? 1 : 2;
+	  break;
+
+	default:
+	  break;
+	}
+    }
+  while (looping);
+
+  if (n_words == 0)
+    return;
+
+  n_words += additional;
+  if (n_words > 3)
+    n_words = 3;
+
+  num_refs[n_words-1] += num;
+}
+
+/* Abort after printing out a specific insn.  */
+
+static void
+abort_with_insn (rtx insn, const char * reason)
+{
+  error (reason);
+  debug_rtx (insn);
+  fancy_abort (__FILE__, __LINE__, __FUNCTION__);
+}
+
+/* Return the appropriate instructions to move one operand to another.  */
+
+const char *
+iq2000_move_1word (rtx operands[], rtx insn, int unsignedp)
+{
+  const char *ret = 0;
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  enum rtx_code code0 = GET_CODE (op0);
+  enum rtx_code code1 = GET_CODE (op1);
+  enum machine_mode mode = GET_MODE (op0);
+  int subreg_offset0 = 0;
+  int subreg_offset1 = 0;
+  enum delay_type delay = DELAY_NONE;
+
+  while (code0 == SUBREG)
+    {
+      subreg_offset0 += subreg_regno_offset (REGNO (SUBREG_REG (op0)),
+					     GET_MODE (SUBREG_REG (op0)),
+					     SUBREG_BYTE (op0),
+					     GET_MODE (op0));
+      op0 = SUBREG_REG (op0);
+      code0 = GET_CODE (op0);
+    }
+
+  while (code1 == SUBREG)
+    {
+      subreg_offset1 += subreg_regno_offset (REGNO (SUBREG_REG (op1)),
+					     GET_MODE (SUBREG_REG (op1)),
+					     SUBREG_BYTE (op1),
+					     GET_MODE (op1));
+      op1 = SUBREG_REG (op1);
+      code1 = GET_CODE (op1);
+    }
+
+  /* For our purposes, a condition code mode is the same as SImode.  */
+  if (mode == CCmode)
+    mode = SImode;
+
+  if (code0 == REG)
+    {
+      int regno0 = REGNO (op0) + subreg_offset0;
+
+      if (code1 == REG)
+	{
+	  int regno1 = REGNO (op1) + subreg_offset1;
+
+	  /* Do not do anything for assigning a register to itself */
+	  if (regno0 == regno1)
+	    ret = "";
+
+	  else if (GP_REG_P (regno0))
+	    {
+	      if (GP_REG_P (regno1))
+		ret = "or\t%0,%%0,%1";
+	    }
+
+	}
+
+      else if (code1 == MEM)
+	{
+	  delay = DELAY_LOAD;
+
+	  if (TARGET_STATS)
+	    iq2000_count_memory_refs (op1, 1);
+
+	  if (GP_REG_P (regno0))
+	    {
+	      /* For loads, use the mode of the memory item, instead of the
+		 target, so zero/sign extend can use this code as well.  */
+	      switch (GET_MODE (op1))
+		{
+		default:
+		  break;
+		case SFmode:
+		  ret = "lw\t%0,%1";
+		  break;
+		case SImode:
+		case CCmode:
+		  ret = "lw\t%0,%1";
+		  break;
+		case HImode:
+		  ret = (unsignedp) ? "lhu\t%0,%1" : "lh\t%0,%1";
+		  break;
+		case QImode:
+		  ret = (unsignedp) ? "lbu\t%0,%1" : "lb\t%0,%1";
+		  break;
+		}
+	    }
+	}
+
+      else if (code1 == CONST_INT
+	       || (code1 == CONST_DOUBLE
+		   && GET_MODE (op1) == VOIDmode))
+	{
+	  if (code1 == CONST_DOUBLE)
+	    {
+	      /* This can happen when storing constants into long long
+                 bitfields.  Just store the least significant word of
+                 the value.  */
+	      operands[1] = op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
+	    }
+
+	  if (INTVAL (op1) == 0)
+	    {
+	      if (GP_REG_P (regno0))
+		ret = "or\t%0,%%0,%z1";
+	    }
+	 else if (GP_REG_P (regno0))
+	    {
+	      if (SMALL_INT_UNSIGNED (op1))
+		ret = "ori\t%0,%%0,%x1\t\t\t# %1";
+	      else if (SMALL_INT (op1))
+		ret = "addiu\t%0,%%0,%1\t\t\t# %1";
+	      else
+		ret = "lui\t%0,%X1\t\t\t# %1\n\tori\t%0,%0,%x1";
+	    }
+	}
+
+      else if (code1 == CONST_DOUBLE && mode == SFmode)
+	{
+	  if (op1 == CONST0_RTX (SFmode))
+	    {
+	      if (GP_REG_P (regno0))
+		ret = "or\t%0,%%0,%.";
+	    }
+
+	  else
+	    {
+	      delay = DELAY_LOAD;
+	      ret = "li.s\t%0,%1";
+	    }
+	}
+
+      else if (code1 == LABEL_REF)
+	{
+	  if (TARGET_STATS)
+	    iq2000_count_memory_refs (op1, 1);
+
+	  ret = "la\t%0,%a1";
+	}
+
+      else if (code1 == SYMBOL_REF || code1 == CONST)
+	{
+	  if (TARGET_STATS)
+	    iq2000_count_memory_refs (op1, 1);
+
+	  ret = "la\t%0,%a1";
+	}
+
+      else if (code1 == PLUS)
+	{
+	  rtx add_op0 = XEXP (op1, 0);
+	  rtx add_op1 = XEXP (op1, 1);
+
+	  if (GET_CODE (XEXP (op1, 1)) == REG
+	      && GET_CODE (XEXP (op1, 0)) == CONST_INT)
+	    add_op0 = XEXP (op1, 1), add_op1 = XEXP (op1, 0);
+
+	  operands[2] = add_op0;
+	  operands[3] = add_op1;
+	  ret = "add%:\t%0,%2,%3";
+	}
+
+      else if (code1 == HIGH)
+	{
+	  operands[1] = XEXP (op1, 0);
+	  ret = "lui\t%0,%%hi(%1)";
+	}
+    }
+
+  else if (code0 == MEM)
+    {
+      if (TARGET_STATS)
+	iq2000_count_memory_refs (op0, 1);
+
+      if (code1 == REG)
+	{
+	  int regno1 = REGNO (op1) + subreg_offset1;
+
+	  if (GP_REG_P (regno1))
+	    {
+	      switch (mode)
+		{
+		case SFmode: ret = "sw\t%1,%0"; break;
+		case SImode: ret = "sw\t%1,%0"; break;
+		case HImode: ret = "sh\t%1,%0"; break;
+		case QImode: ret = "sb\t%1,%0"; break;
+		default: break;
+		}
+	    }
+	}
+
+      else if (code1 == CONST_INT && INTVAL (op1) == 0)
+	{
+	  switch (mode)
+	    {
+	    case SFmode: ret = "sw\t%z1,%0"; break;
+	    case SImode: ret = "sw\t%z1,%0"; break;
+	    case HImode: ret = "sh\t%z1,%0"; break;
+	    case QImode: ret = "sb\t%z1,%0"; break;
+	    default: break;
+	    }
+	}
+
+      else if (code1 == CONST_DOUBLE && op1 == CONST0_RTX (mode))
+	{
+	  switch (mode)
+	    {
+	    case SFmode: ret = "sw\t%.,%0"; break;
+	    case SImode: ret = "sw\t%.,%0"; break;
+	    case HImode: ret = "sh\t%.,%0"; break;
+	    case QImode: ret = "sb\t%.,%0"; break;
+	    default: break;
+	    }
+	}
+    }
+
+  if (ret == 0)
+    {
+      abort_with_insn (insn, "Bad move");
+      return 0;
+    }
+
+  if (delay != DELAY_NONE)
+    return iq2000_fill_delay_slot (ret, delay, operands, insn);
+
+  return ret;
+}
+
+/* Provide the costs of an addressing mode that contains ADDR.  */
+
+static int
+iq2000_address_cost (rtx addr, bool speed)
+{
+  switch (GET_CODE (addr))
+    {
+    case LO_SUM:
+      return 1;
+
+    case LABEL_REF:
+      return 2;
+
+    case CONST:
+      {
+	rtx offset = const0_rtx;
+
+	addr = eliminate_constant_term (XEXP (addr, 0), & offset);
+	if (GET_CODE (addr) == LABEL_REF)
+	  return 2;
+
+	if (GET_CODE (addr) != SYMBOL_REF)
+	  return 4;
+
+	if (! SMALL_INT (offset))
+	  return 2;
+      }
+
+      /* Fall through.  */
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_FLAG (addr) ? 1 : 2;
+
+    case PLUS:
+      {
+	rtx plus0 = XEXP (addr, 0);
+	rtx plus1 = XEXP (addr, 1);
+
+	if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG)
+	  plus0 = XEXP (addr, 1), plus1 = XEXP (addr, 0);
+
+	if (GET_CODE (plus0) != REG)
+	  break;
+
+	switch (GET_CODE (plus1))
+	  {
+	  case CONST_INT:
+	    return SMALL_INT (plus1) ? 1 : 2;
+
+	  case CONST:
+	  case SYMBOL_REF:
+	  case LABEL_REF:
+	  case HIGH:
+	  case LO_SUM:
+	    return iq2000_address_cost (plus1, speed) + 1;
+
+	  default:
+	    break;
+	  }
+      }
+
+    default:
+      break;
+    }
+
+  return 4;
+}
+
+/* Make normal rtx_code into something we can index from an array.  */
+
+static enum internal_test
+map_test_to_internal_test (enum rtx_code test_code)
+{
+  enum internal_test test = ITEST_MAX;
+
+  switch (test_code)
+    {
+    case EQ:  test = ITEST_EQ;  break;
+    case NE:  test = ITEST_NE;  break;
+    case GT:  test = ITEST_GT;  break;
+    case GE:  test = ITEST_GE;  break;
+    case LT:  test = ITEST_LT;  break;
+    case LE:  test = ITEST_LE;  break;
+    case GTU: test = ITEST_GTU; break;
+    case GEU: test = ITEST_GEU; break;
+    case LTU: test = ITEST_LTU; break;
+    case LEU: test = ITEST_LEU; break;
+    default:			break;
+    }
+
+  return test;
+}
+
+/* Generate the code to do a TEST_CODE comparison on two integer values CMP0
+   and CMP1.  P_INVERT is NULL or ptr if branch needs to reverse its test.
+   The return value RESULT is:
+   (reg:SI xx)		The pseudo register the comparison is in
+   0		       	No register, generate a simple branch.  */
+
+rtx
+gen_int_relational (enum rtx_code test_code, rtx result, rtx cmp0, rtx cmp1,
+		    int *p_invert)
+{
+  struct cmp_info
+  {
+    enum rtx_code test_code;	/* Code to use in instruction (LT vs. LTU).  */
+    int const_low;		/* Low bound of constant we can accept.  */
+    int const_high;		/* High bound of constant we can accept.  */
+    int const_add;		/* Constant to add (convert LE -> LT).  */
+    int reverse_regs;		/* Reverse registers in test.  */
+    int invert_const;		/* != 0 if invert value if cmp1 is constant.  */
+    int invert_reg;		/* != 0 if invert value if cmp1 is register.  */
+    int unsignedp;		/* != 0 for unsigned comparisons.  */
+  };
+
+  static struct cmp_info info[ (int)ITEST_MAX ] =
+  {
+    { XOR,	 0,  65535,  0,	 0,  0,	 0, 0 },	/* EQ  */
+    { XOR,	 0,  65535,  0,	 0,  1,	 1, 0 },	/* NE  */
+    { LT,   -32769,  32766,  1,	 1,  1,	 0, 0 },	/* GT  */
+    { LT,   -32768,  32767,  0,	 0,  1,	 1, 0 },	/* GE  */
+    { LT,   -32768,  32767,  0,	 0,  0,	 0, 0 },	/* LT  */
+    { LT,   -32769,  32766,  1,	 1,  0,	 1, 0 },	/* LE  */
+    { LTU,  -32769,  32766,  1,	 1,  1,	 0, 1 },	/* GTU */
+    { LTU,  -32768,  32767,  0,	 0,  1,	 1, 1 },	/* GEU */
+    { LTU,  -32768,  32767,  0,	 0,  0,	 0, 1 },	/* LTU */
+    { LTU,  -32769,  32766,  1,	 1,  0,	 1, 1 },	/* LEU */
+  };
+
+  enum internal_test test;
+  enum machine_mode mode;
+  struct cmp_info *p_info;
+  int branch_p;
+  int eqne_p;
+  int invert;
+  rtx reg;
+  rtx reg2;
+
+  test = map_test_to_internal_test (test_code);
+  gcc_assert (test != ITEST_MAX);
+
+  p_info = &info[(int) test];
+  eqne_p = (p_info->test_code == XOR);
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Eliminate simple branches.  */
+  branch_p = (result == 0);
+  if (branch_p)
+    {
+      if (GET_CODE (cmp0) == REG || GET_CODE (cmp0) == SUBREG)
+	{
+	  /* Comparisons against zero are simple branches.  */
+	  if (GET_CODE (cmp1) == CONST_INT && INTVAL (cmp1) == 0)
+	    return 0;
+
+	  /* Test for beq/bne.  */
+	  if (eqne_p)
+	    return 0;
+	}
+
+      /* Allocate a pseudo to calculate the value in.  */
+      result = gen_reg_rtx (mode);
+    }
+
+  /* Make sure we can handle any constants given to us.  */
+  if (GET_CODE (cmp0) == CONST_INT)
+    cmp0 = force_reg (mode, cmp0);
+
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (cmp1);
+
+      if (value < p_info->const_low
+	  || value > p_info->const_high)
+	cmp1 = force_reg (mode, cmp1);
+    }
+
+  /* See if we need to invert the result.  */
+  invert = (GET_CODE (cmp1) == CONST_INT
+	    ? p_info->invert_const : p_info->invert_reg);
+
+  if (p_invert != (int *)0)
+    {
+      *p_invert = invert;
+      invert = 0;
+    }
+
+  /* Comparison to constants, may involve adding 1 to change a LT into LE.
+     Comparison between two registers, may involve switching operands.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      if (p_info->const_add != 0)
+	{
+	  HOST_WIDE_INT new_const = INTVAL (cmp1) + p_info->const_add;
+
+	  /* If modification of cmp1 caused overflow,
+	     we would get the wrong answer if we follow the usual path;
+	     thus, x > 0xffffffffU would turn into x > 0U.  */
+	  if ((p_info->unsignedp
+	       ? (unsigned HOST_WIDE_INT) new_const >
+	       (unsigned HOST_WIDE_INT) INTVAL (cmp1)
+	       : new_const > INTVAL (cmp1))
+	      != (p_info->const_add > 0))
+	    {
+	      /* This test is always true, but if INVERT is true then
+		 the result of the test needs to be inverted so 0 should
+		 be returned instead.  */
+	      emit_move_insn (result, invert ? const0_rtx : const_true_rtx);
+	      return result;
+	    }
+	  else
+	    cmp1 = GEN_INT (new_const);
+	}
+    }
+
+  else if (p_info->reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  if (test == ITEST_NE && GET_CODE (cmp1) == CONST_INT && INTVAL (cmp1) == 0)
+    reg = cmp0;
+  else
+    {
+      reg = (invert || eqne_p) ? gen_reg_rtx (mode) : result;
+      convert_move (reg, gen_rtx_fmt_ee (p_info->test_code, mode, cmp0, cmp1), 0);
+    }
+
+  if (test == ITEST_NE)
+    {
+      convert_move (result, gen_rtx_GTU (mode, reg, const0_rtx), 0);
+      if (p_invert != NULL)
+	*p_invert = 0;
+      invert = 0;
+    }
+
+  else if (test == ITEST_EQ)
+    {
+      reg2 = invert ? gen_reg_rtx (mode) : result;
+      convert_move (reg2, gen_rtx_LTU (mode, reg, const1_rtx), 0);
+      reg = reg2;
+    }
+
+  if (invert)
+    {
+      rtx one;
+
+      one = const1_rtx;
+      convert_move (result, gen_rtx_XOR (mode, reg, one), 0);
+    }
+
+  return result;
+}
+
+/* Emit the common code for doing conditional branches.
+   operand[0] is the label to jump to.
+   The comparison operands are saved away by cmp{si,di,sf,df}.  */
+
+void
+gen_conditional_branch (rtx operands[], enum machine_mode mode)
+{
+  enum rtx_code test_code = GET_CODE (operands[0]);
+  rtx cmp0 = operands[1];
+  rtx cmp1 = operands[2];
+  rtx reg;
+  int invert;
+  rtx label1, label2;
+
+  invert = 0;
+  reg = gen_int_relational (test_code, NULL_RTX, cmp0, cmp1, &invert);
+
+  if (reg)
+    {
+      cmp0 = reg;
+      cmp1 = const0_rtx;
+      test_code = NE;
+    }
+  else if (GET_CODE (cmp1) == CONST_INT && INTVAL (cmp1) != 0)
+    /* We don't want to build a comparison against a nonzero
+       constant.  */
+    cmp1 = force_reg (mode, cmp1);
+
+  /* Generate the branch.  */
+  label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  label2 = pc_rtx;
+
+  if (invert)
+    {
+      label2 = label1;
+      label1 = pc_rtx;
+    }
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     gen_rtx_fmt_ee (test_code,
+								     mode,
+								     cmp0, cmp1),
+						     label1, label2)));
+}
+
+/* Initialize CUM for a function FNTYPE.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cum;
+  tree param;
+  tree next_param;
+
+  if (TARGET_DEBUG_D_MODE)
+    {
+      fprintf (stderr,
+	       "\ninit_cumulative_args, fntype = 0x%.8lx", (long) fntype);
+
+      if (!fntype)
+	fputc ('\n', stderr);
+
+      else
+	{
+	  tree ret_type = TREE_TYPE (fntype);
+
+	  fprintf (stderr, ", fntype code = %s, ret code = %s\n",
+		   tree_code_name[(int)TREE_CODE (fntype)],
+		   tree_code_name[(int)TREE_CODE (ret_type)]);
+	}
+    }
+
+  *cum = zero_cum;
+
+  /* Determine if this function has variable arguments.  This is
+     indicated by the last argument being 'void_type_mode' if there
+     are no variable arguments.  The standard IQ2000 calling sequence
+     passes all arguments in the general purpose registers in this case.  */
+
+  for (param = fntype ? TYPE_ARG_TYPES (fntype) : 0;
+       param != 0; param = next_param)
+    {
+      next_param = TREE_CHAIN (param);
+      if (next_param == 0 && TREE_VALUE (param) != void_type_node)
+	cum->gp_reg_found = 1;
+    }
+}
+
+/* Advance the argument of type TYPE and mode MODE to the next argument
+   position in CUM.  */
+
+static void
+iq2000_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     const_tree type, bool named)
+{
+  if (TARGET_DEBUG_D_MODE)
+    {
+      fprintf (stderr,
+	       "function_adv({gp reg found = %d, arg # = %2d, words = %2d}, %4s, ",
+	       cum->gp_reg_found, cum->arg_number, cum->arg_words,
+	       GET_MODE_NAME (mode));
+      fprintf (stderr, "%p", CONST_CAST2 (void *, const_tree,  type));
+      fprintf (stderr, ", %d )\n\n", named);
+    }
+
+  cum->arg_number++;
+  switch (mode)
+    {
+    case VOIDmode:
+      break;
+
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+		  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case BLKmode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((int_size_in_bytes (type) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case SFmode:
+      cum->arg_words ++;
+      if (! cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 1 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DFmode:
+      cum->arg_words += 2;
+      if (! cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 2 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += 2;
+      break;
+
+    case TImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += 4;
+      break;
+
+    case QImode:
+    case HImode:
+    case SImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words ++;
+      break;
+    }
+}
+
+/* Return an RTL expression containing the register for the given mode MODE
+   and type TYPE in CUM, or 0 if the argument is to be passed on the stack.  */
+
+static rtx
+iq2000_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		     const_tree type, bool named)
+{
+  rtx ret;
+  int regbase = -1;
+  int bias = 0;
+  unsigned int *arg_words = &cum->arg_words;
+  int struct_p = (type != 0
+		  && (TREE_CODE (type) == RECORD_TYPE
+		      || TREE_CODE (type) == UNION_TYPE
+		      || TREE_CODE (type) == QUAL_UNION_TYPE));
+
+  if (TARGET_DEBUG_D_MODE)
+    {
+      fprintf (stderr,
+	       "function_arg( {gp reg found = %d, arg # = %2d, words = %2d}, %4s, ",
+	       cum->gp_reg_found, cum->arg_number, cum->arg_words,
+	       GET_MODE_NAME (mode));
+      fprintf (stderr, "%p", (const void *) type);
+      fprintf (stderr, ", %d ) = ", named);
+    }
+
+
+  cum->last_arg_fp = 0;
+  switch (mode)
+    {
+    case SFmode:
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case DFmode:
+      cum->arg_words += cum->arg_words & 1;
+
+      regbase = GP_ARG_FIRST;
+      break;
+
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+		  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+      /* Drops through.  */
+    case BLKmode:
+      if (type != NULL_TREE && TYPE_ALIGN (type) > (unsigned) BITS_PER_WORD)
+	cum->arg_words += (cum->arg_words & 1);
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case VOIDmode:
+    case QImode:
+    case HImode:
+    case SImode:
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case DImode:
+      cum->arg_words += (cum->arg_words & 1);
+      regbase = GP_ARG_FIRST;
+      break;
+
+    case TImode:
+      cum->arg_words += (cum->arg_words & 3);
+      regbase = GP_ARG_FIRST;
+      break;
+    }
+
+  if (*arg_words >= (unsigned) MAX_ARGS_IN_REGISTERS)
+    {
+      if (TARGET_DEBUG_D_MODE)
+	fprintf (stderr, "<stack>%s\n", struct_p ? ", [struct]" : "");
+
+      ret = 0;
+    }
+  else
+    {
+      gcc_assert (regbase != -1);
+
+      if (! type || TREE_CODE (type) != RECORD_TYPE
+	  || ! named  || ! TYPE_SIZE_UNIT (type)
+	  || ! host_integerp (TYPE_SIZE_UNIT (type), 1))
+	ret = gen_rtx_REG (mode, regbase + *arg_words + bias);
+      else
+	{
+	  tree field;
+
+	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	    if (TREE_CODE (field) == FIELD_DECL
+		&& TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+		&& TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD
+		&& host_integerp (bit_position (field), 0)
+		&& int_bit_position (field) % BITS_PER_WORD == 0)
+	      break;
+
+	  /* If the whole struct fits a DFmode register,
+	     we don't need the PARALLEL.  */
+	  if (! field || mode == DFmode)
+	    ret = gen_rtx_REG (mode, regbase + *arg_words + bias);
+	  else
+	    {
+	      unsigned int chunks;
+	      HOST_WIDE_INT bitpos;
+	      unsigned int regno;
+	      unsigned int i;
+
+	      /* ??? If this is a packed structure, then the last hunk won't
+		 be 64 bits.  */
+	      chunks
+		= tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
+	      if (chunks + *arg_words + bias > (unsigned) MAX_ARGS_IN_REGISTERS)
+		chunks = MAX_ARGS_IN_REGISTERS - *arg_words - bias;
+
+	      /* Assign_parms checks the mode of ENTRY_PARM, so we must
+		 use the actual mode here.  */
+	      ret = gen_rtx_PARALLEL (mode, rtvec_alloc (chunks));
+
+	      bitpos = 0;
+	      regno = regbase + *arg_words + bias;
+	      field = TYPE_FIELDS (type);
+	      for (i = 0; i < chunks; i++)
+		{
+		  rtx reg;
+
+		  for (; field; field = DECL_CHAIN (field))
+		    if (TREE_CODE (field) == FIELD_DECL
+			&& int_bit_position (field) >= bitpos)
+		      break;
+
+		  if (field
+		      && int_bit_position (field) == bitpos
+		      && TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+		      && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD)
+		    reg = gen_rtx_REG (DFmode, regno++);
+		  else
+		    reg = gen_rtx_REG (word_mode, regno);
+
+		  XVECEXP (ret, 0, i)
+		    = gen_rtx_EXPR_LIST (VOIDmode, reg,
+					 GEN_INT (bitpos / BITS_PER_UNIT));
+
+		  bitpos += 64;
+		  regno++;
+		}
+	    }
+	}
+
+      if (TARGET_DEBUG_D_MODE)
+	fprintf (stderr, "%s%s\n", reg_names[regbase + *arg_words + bias],
+		 struct_p ? ", [struct]" : "");
+    }
+
+  /* We will be called with a mode of VOIDmode after the last argument
+     has been seen.  Whatever we return will be passed to the call
+     insn.  If we need any shifts for small structures, return them in
+     a PARALLEL.  */
+  if (mode == VOIDmode)
+    {
+      if (cum->num_adjusts > 0)
+	ret = gen_rtx_PARALLEL ((enum machine_mode) cum->fp_code,
+		       gen_rtvec_v (cum->num_adjusts, cum->adjust));
+    }
+
+  return ret;
+}
+
+static unsigned int
+iq2000_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return (type != NULL_TREE
+	  ? (TYPE_ALIGN (type) <= PARM_BOUNDARY
+	     ? PARM_BOUNDARY
+	     : TYPE_ALIGN (type))
+	  : (GET_MODE_ALIGNMENT (mode) <= PARM_BOUNDARY
+	     ? PARM_BOUNDARY
+	     : GET_MODE_ALIGNMENT (mode)));
+}
+
+static int
+iq2000_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			  tree type ATTRIBUTE_UNUSED,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  if (mode == DImode && cum->arg_words == MAX_ARGS_IN_REGISTERS - 1)
+    {
+      if (TARGET_DEBUG_D_MODE)
+	fprintf (stderr, "iq2000_arg_partial_bytes=%d\n", UNITS_PER_WORD);
+      return UNITS_PER_WORD;
+    }
+
+  return 0;
+}
+
+/* Implement va_start.  */
+
+static void
+iq2000_va_start (tree valist, rtx nextarg)
+{
+  int int_arg_words;
+  /* Find out how many non-float named formals.  */
+  int gpr_save_area_size;
+  /* Note UNITS_PER_WORD is 4 bytes.  */
+  int_arg_words = crtl->args.info.arg_words;
+
+  if (int_arg_words < 8 )
+    /* Adjust for the prologue's economy measure.  */
+    gpr_save_area_size = (8 - int_arg_words) * UNITS_PER_WORD;
+  else
+    gpr_save_area_size = 0;
+
+  /* Everything is in the GPR save area, or in the overflow
+     area which is contiguous with it.  */
+  nextarg = plus_constant (nextarg, - gpr_save_area_size);
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Allocate a chunk of memory for per-function machine-dependent data.  */
+
+static struct machine_function *
+iq2000_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+iq2000_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mcpu_:
+      if (strcmp (arg, "iq10") == 0)
+	iq2000_tune = PROCESSOR_IQ10;
+      else if (strcmp (arg, "iq2000") == 0)
+	iq2000_tune = PROCESSOR_IQ2000;
+      else
+	return false;
+      return true;
+
+    case OPT_march_:
+      /* This option has no effect at the moment.  */
+      return (strcmp (arg, "default") == 0
+	      || strcmp (arg, "DEFAULT") == 0
+	      || strcmp (arg, "iq2000") == 0);
+
+    default:
+      return true;
+    }
+}
+
+/* Detect any conflicts in the switches.  */
+
+static void
+iq2000_option_override (void)
+{
+  target_flags &= ~MASK_GPOPT;
+
+  iq2000_isa = IQ2000_ISA_DEFAULT;
+
+  /* Identify the processor type.  */
+
+  iq2000_print_operand_punct['?'] = 1;
+  iq2000_print_operand_punct['#'] = 1;
+  iq2000_print_operand_punct['&'] = 1;
+  iq2000_print_operand_punct['!'] = 1;
+  iq2000_print_operand_punct['*'] = 1;
+  iq2000_print_operand_punct['@'] = 1;
+  iq2000_print_operand_punct['.'] = 1;
+  iq2000_print_operand_punct['('] = 1;
+  iq2000_print_operand_punct[')'] = 1;
+  iq2000_print_operand_punct['['] = 1;
+  iq2000_print_operand_punct[']'] = 1;
+  iq2000_print_operand_punct['<'] = 1;
+  iq2000_print_operand_punct['>'] = 1;
+  iq2000_print_operand_punct['{'] = 1;
+  iq2000_print_operand_punct['}'] = 1;
+  iq2000_print_operand_punct['^'] = 1;
+  iq2000_print_operand_punct['$'] = 1;
+  iq2000_print_operand_punct['+'] = 1;
+  iq2000_print_operand_punct['~'] = 1;
+
+  /* Save GPR registers in word_mode sized hunks.  word_mode hasn't been
+     initialized yet, so we can't use that here.  */
+  gpr_mode = SImode;
+
+  /* Function to allocate machine-dependent function status.  */
+  init_machine_status = iq2000_init_machine_status;
+}
+
+/* The arg pointer (which is eliminated) points to the virtual frame pointer,
+   while the frame pointer (which may be eliminated) points to the stack
+   pointer after the initial adjustments.  */
+
+HOST_WIDE_INT
+iq2000_debugger_offset (rtx addr, HOST_WIDE_INT offset)
+{
+  rtx offset2 = const0_rtx;
+  rtx reg = eliminate_constant_term (addr, & offset2);
+
+  if (offset == 0)
+    offset = INTVAL (offset2);
+
+  if (reg == stack_pointer_rtx || reg == frame_pointer_rtx
+      || reg == hard_frame_pointer_rtx)
+    {
+      HOST_WIDE_INT frame_size = (!cfun->machine->initialized)
+				  ? compute_frame_size (get_frame_size ())
+				  : cfun->machine->total_size;
+
+      offset = offset - frame_size;
+    }
+
+  return offset;
+}
+
+/* If defined, a C statement to be executed just prior to the output of
+   assembler code for INSN, to modify the extracted operands so they will be
+   output differently.
+
+   Here the argument OPVEC is the vector containing the operands extracted
+   from INSN, and NOPERANDS is the number of elements of the vector which
+   contain meaningful data for this insn.  The contents of this vector are
+   what will be used to convert the insn template into assembler code, so you
+   can change the assembler output by changing the contents of the vector.
+
+   We use it to check if the current insn needs a nop in front of it because
+   of load delays, and also to update the delay slot statistics.  */
+
+void
+final_prescan_insn (rtx insn, rtx opvec[] ATTRIBUTE_UNUSED,
+		    int noperands ATTRIBUTE_UNUSED)
+{
+  if (dslots_number_nops > 0)
+    {
+      rtx pattern = PATTERN (insn);
+      int length = get_attr_length (insn);
+
+      /* Do we need to emit a NOP?  */
+      if (length == 0
+	  || (iq2000_load_reg != 0 && reg_mentioned_p (iq2000_load_reg,  pattern))
+	  || (iq2000_load_reg2 != 0 && reg_mentioned_p (iq2000_load_reg2, pattern))
+	  || (iq2000_load_reg3 != 0 && reg_mentioned_p (iq2000_load_reg3, pattern))
+	  || (iq2000_load_reg4 != 0
+	      && reg_mentioned_p (iq2000_load_reg4, pattern)))
+	fputs ("\tnop\n", asm_out_file);
+
+      else
+	dslots_load_filled ++;
+
+      while (--dslots_number_nops > 0)
+	fputs ("\tnop\n", asm_out_file);
+
+      iq2000_load_reg = 0;
+      iq2000_load_reg2 = 0;
+      iq2000_load_reg3 = 0;
+      iq2000_load_reg4 = 0;
+    }
+
+  if (   (GET_CODE (insn) == JUMP_INSN
+       || GET_CODE (insn) == CALL_INSN
+       || (GET_CODE (PATTERN (insn)) == RETURN))
+	   && NEXT_INSN (PREV_INSN (insn)) == insn)
+    {
+      rtx nop_insn = emit_insn_after (gen_nop (), insn);
+
+      INSN_ADDRESSES_NEW (nop_insn, -1);
+    }
+  
+  if (TARGET_STATS
+      && (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CALL_INSN))
+    dslots_jump_total ++;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer where SIZE is the # of var. bytes allocated.
+
+   IQ2000 stack frames look like:
+
+             Before call		        After call
+        +-----------------------+	+-----------------------+
+   high |			|       |      			|
+   mem. |		        |	|			|
+        |  caller's temps.    	|       |  caller's temps.    	|
+	|       		|       |       	        |
+        +-----------------------+	+-----------------------+
+ 	|       		|	|		        |
+        |  arguments on stack.  |	|  arguments on stack.  |
+	|       		|	|			|
+        +-----------------------+	+-----------------------+
+ 	|  4 words to save     	|	|  4 words to save	|
+	|  arguments passed	|	|  arguments passed	|
+	|  in registers, even	|	|  in registers, even	|
+    SP->|  if not passed.       |  VFP->|  if not passed.	|
+	+-----------------------+       +-----------------------+
+					|		        |
+                                        |  fp register save     |
+					|			|
+					+-----------------------+
+					|		        |
+                                        |  gp register save     |
+                                        |       		|
+					+-----------------------+
+					|			|
+					|  local variables	|
+					|			|
+					+-----------------------+
+					|			|
+                                        |  alloca allocations   |
+        				|			|
+					+-----------------------+
+					|			|
+					|  GP save for V.4 abi	|
+					|			|
+					+-----------------------+
+					|			|
+                                        |  arguments on stack   |
+        				|		        |
+					+-----------------------+
+                                        |  4 words to save      |
+					|  arguments passed     |
+                                        |  in registers, even   |
+   low                              SP->|  if not passed.       |
+   memory        			+-----------------------+  */
+
+HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size)
+{
+  int regno;
+  HOST_WIDE_INT total_size;	/* # bytes that the entire frame takes up.  */
+  HOST_WIDE_INT var_size;	/* # bytes that variables take up.  */
+  HOST_WIDE_INT args_size;	/* # bytes that outgoing arguments take up.  */
+  HOST_WIDE_INT extra_size;	/* # extra bytes.  */
+  HOST_WIDE_INT gp_reg_rounded;	/* # bytes needed to store gp after rounding.  */
+  HOST_WIDE_INT gp_reg_size;	/* # bytes needed to store gp regs.  */
+  HOST_WIDE_INT fp_reg_size;	/* # bytes needed to store fp regs.  */
+  long mask;			/* mask of saved gp registers.  */
+
+  gp_reg_size = 0;
+  fp_reg_size = 0;
+  mask = 0;
+  extra_size = IQ2000_STACK_ALIGN ((0));
+  var_size = IQ2000_STACK_ALIGN (size);
+  args_size = IQ2000_STACK_ALIGN (crtl->outgoing_args_size);
+
+  /* If a function dynamically allocates the stack and
+     has 0 for STACK_DYNAMIC_OFFSET then allocate some stack space.  */
+  if (args_size == 0 && cfun->calls_alloca)
+    args_size = 4 * UNITS_PER_WORD;
+
+  total_size = var_size + args_size + extra_size;
+
+  /* Calculate space needed for gp registers.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno))
+	{
+	  gp_reg_size += GET_MODE_SIZE (gpr_mode);
+	  mask |= 1L << (regno - GP_REG_FIRST);
+	}
+    }
+
+  /* We need to restore these for the handler.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0; ; ++i)
+	{
+	  regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == (int) INVALID_REGNUM)
+	    break;
+	  gp_reg_size += GET_MODE_SIZE (gpr_mode);
+	  mask |= 1L << (regno - GP_REG_FIRST);
+	}
+    }
+
+  gp_reg_rounded = IQ2000_STACK_ALIGN (gp_reg_size);
+  total_size += gp_reg_rounded + IQ2000_STACK_ALIGN (fp_reg_size);
+
+  /* The gp reg is caller saved, so there is no need for leaf routines 
+     (total_size == extra_size) to save the gp reg.  */
+  if (total_size == extra_size
+      && ! profile_flag)
+    total_size = extra_size = 0;
+
+  total_size += IQ2000_STACK_ALIGN (crtl->args.pretend_args_size);
+
+  /* Save other computed information.  */
+  cfun->machine->total_size = total_size;
+  cfun->machine->var_size = var_size;
+  cfun->machine->args_size = args_size;
+  cfun->machine->extra_size = extra_size;
+  cfun->machine->gp_reg_size = gp_reg_size;
+  cfun->machine->fp_reg_size = fp_reg_size;
+  cfun->machine->mask = mask;
+  cfun->machine->initialized = reload_completed;
+  cfun->machine->num_gp = gp_reg_size / UNITS_PER_WORD;
+
+  if (mask)
+    {
+      unsigned long offset;
+
+      offset = (args_size + extra_size + var_size
+		+ gp_reg_size - GET_MODE_SIZE (gpr_mode));
+
+      cfun->machine->gp_sp_offset = offset;
+      cfun->machine->gp_save_offset = offset - total_size;
+    }
+  else
+    {
+      cfun->machine->gp_sp_offset = 0;
+      cfun->machine->gp_save_offset = 0;
+    }
+
+  cfun->machine->fp_sp_offset = 0;
+  cfun->machine->fp_save_offset = 0;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+
+/* We can always eliminate to the frame pointer.  We can eliminate to the
+   stack pointer unless a frame pointer is needed.  */
+
+bool
+iq2000_can_eliminate (const int from, const int to)
+{
+  return (from == RETURN_ADDRESS_POINTER_REGNUM
+          && (! leaf_function_p ()
+              || (to == GP_REG_FIRST + 31 && leaf_function_p ())))
+          || (from != RETURN_ADDRESS_POINTER_REGNUM
+              && (to == HARD_FRAME_POINTER_REGNUM
+                  || (to == STACK_POINTER_REGNUM
+                      && ! frame_pointer_needed)));
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer, argument pointer, or return address pointer.  TO is either
+   the stack pointer or hard frame pointer.  */
+
+int
+iq2000_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
+{
+  int offset;
+
+  compute_frame_size (get_frame_size ());				 
+  if ((from) == FRAME_POINTER_REGNUM) 
+    (offset) = 0; 
+  else if ((from) == ARG_POINTER_REGNUM) 
+    (offset) = (cfun->machine->total_size); 
+  else if ((from) == RETURN_ADDRESS_POINTER_REGNUM) 
+    {
+      if (leaf_function_p ()) 
+	(offset) = 0; 
+      else (offset) = cfun->machine->gp_sp_offset 
+	     + ((UNITS_PER_WORD - (POINTER_SIZE / BITS_PER_UNIT)) 
+		* (BYTES_BIG_ENDIAN != 0)); 
+    }
+  else
+    gcc_unreachable ();
+
+  return offset;
+}
+
+/* Common code to emit the insns (or to write the instructions to a file)
+   to save/restore registers.  
+   Other parts of the code assume that IQ2000_TEMP1_REGNUM (aka large_reg)
+   is not modified within save_restore_insns.  */
+
+#define BITSET_P(VALUE,BIT) (((VALUE) & (1L << (BIT))) != 0)
+
+/* Emit instructions to load the value (SP + OFFSET) into IQ2000_TEMP2_REGNUM
+   and return an rtl expression for the register.  Write the assembly
+   instructions directly to FILE if it is not null, otherwise emit them as
+   rtl.
+
+   This function is a subroutine of save_restore_insns.  It is used when
+   OFFSET is too large to add in a single instruction.  */
+
+static rtx
+iq2000_add_large_offset_to_sp (HOST_WIDE_INT offset)
+{
+  rtx reg = gen_rtx_REG (Pmode, IQ2000_TEMP2_REGNUM);
+  rtx offset_rtx = GEN_INT (offset);
+
+  emit_move_insn (reg, offset_rtx);
+  emit_insn (gen_addsi3 (reg, reg, stack_pointer_rtx));
+  return reg;
+}
+
+/* Make INSN frame related and note that it performs the frame-related
+   operation DWARF_PATTERN.  */
+
+static void
+iq2000_annotate_frame_insn (rtx insn, rtx dwarf_pattern)
+{
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      dwarf_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Emit a move instruction that stores REG in MEM.  Make the instruction
+   frame related and note that it stores REG at (SP + OFFSET).  */
+
+static void
+iq2000_emit_frame_related_store (rtx mem, rtx reg, HOST_WIDE_INT offset)
+{
+  rtx dwarf_address = plus_constant (stack_pointer_rtx, offset);
+  rtx dwarf_mem = gen_rtx_MEM (GET_MODE (reg), dwarf_address);
+
+  iq2000_annotate_frame_insn (emit_move_insn (mem, reg),
+			    gen_rtx_SET (GET_MODE (reg), dwarf_mem, reg));
+}
+
+/* Emit instructions to save/restore registers, as determined by STORE_P.  */
+
+static void
+save_restore_insns (int store_p)
+{
+  long mask = cfun->machine->mask;
+  int regno;
+  rtx base_reg_rtx;
+  HOST_WIDE_INT base_offset;
+  HOST_WIDE_INT gp_offset;
+  HOST_WIDE_INT end_offset;
+
+  gcc_assert (!frame_pointer_needed
+	      || BITSET_P (mask, HARD_FRAME_POINTER_REGNUM - GP_REG_FIRST));
+
+  if (mask == 0)
+    {
+      base_reg_rtx = 0, base_offset  = 0;
+      return;
+    }
+
+  /* Save registers starting from high to low.  The debuggers prefer at least
+     the return register be stored at func+4, and also it allows us not to
+     need a nop in the epilog if at least one register is reloaded in
+     addition to return address.  */
+
+  /* Save GP registers if needed.  */
+  /* Pick which pointer to use as a base register.  For small frames, just
+     use the stack pointer.  Otherwise, use a temporary register.  Save 2
+     cycles if the save area is near the end of a large frame, by reusing
+     the constant created in the prologue/epilogue to adjust the stack
+     frame.  */
+
+  gp_offset = cfun->machine->gp_sp_offset;
+  end_offset
+    = gp_offset - (cfun->machine->gp_reg_size
+		   - GET_MODE_SIZE (gpr_mode));
+
+  if (gp_offset < 0 || end_offset < 0)
+    internal_error
+      ("gp_offset (%ld) or end_offset (%ld) is less than zero",
+       (long) gp_offset, (long) end_offset);
+
+  else if (gp_offset < 32768)
+    base_reg_rtx = stack_pointer_rtx, base_offset  = 0;
+  else
+    {
+      int regno;
+      int reg_save_count = 0;
+
+      for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+	if (BITSET_P (mask, regno - GP_REG_FIRST)) reg_save_count += 1;
+      base_offset = gp_offset - ((reg_save_count - 1) * 4);
+      base_reg_rtx = iq2000_add_large_offset_to_sp (base_offset);
+    }
+
+  for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+    {
+      if (BITSET_P (mask, regno - GP_REG_FIRST))
+	{
+	  rtx reg_rtx;
+	  rtx mem_rtx
+	    = gen_rtx_MEM (gpr_mode,
+		       gen_rtx_PLUS (Pmode, base_reg_rtx,
+				GEN_INT (gp_offset - base_offset)));
+
+	  reg_rtx = gen_rtx_REG (gpr_mode, regno);
+
+	  if (store_p)
+	    iq2000_emit_frame_related_store (mem_rtx, reg_rtx, gp_offset);
+	  else 
+	    {
+	      emit_move_insn (reg_rtx, mem_rtx);
+	    }
+	  gp_offset -= GET_MODE_SIZE (gpr_mode);
+	}
+    }
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+iq2000_expand_prologue (void)
+{
+  int regno;
+  HOST_WIDE_INT tsize;
+  int last_arg_is_vararg_marker = 0;
+  tree fndecl = current_function_decl;
+  tree fntype = TREE_TYPE (fndecl);
+  tree fnargs = DECL_ARGUMENTS (fndecl);
+  rtx next_arg_reg;
+  int i;
+  tree next_arg;
+  tree cur_arg;
+  CUMULATIVE_ARGS args_so_far;
+  int store_args_on_stack = (iq2000_can_use_return_insn ());
+
+  /* If struct value address is treated as the first argument.  */
+  if (aggregate_value_p (DECL_RESULT (fndecl), fndecl)
+      && !cfun->returns_pcc_struct
+      && targetm.calls.struct_value_rtx (TREE_TYPE (fndecl), 1) == 0)
+    {
+      tree type = build_pointer_type (fntype);
+      tree function_result_decl = build_decl (BUILTINS_LOCATION,
+					      PARM_DECL, NULL_TREE, type);
+
+      DECL_ARG_TYPE (function_result_decl) = type;
+      DECL_CHAIN (function_result_decl) = fnargs;
+      fnargs = function_result_decl;
+    }
+
+  /* For arguments passed in registers, find the register number
+     of the first argument in the variable part of the argument list,
+     otherwise GP_ARG_LAST+1.  Note also if the last argument is
+     the varargs special argument, and treat it as part of the
+     variable arguments.
+
+     This is only needed if store_args_on_stack is true.  */
+  INIT_CUMULATIVE_ARGS (args_so_far, fntype, NULL_RTX, 0, 0);
+  regno = GP_ARG_FIRST;
+
+  for (cur_arg = fnargs; cur_arg != 0; cur_arg = next_arg)
+    {
+      tree passed_type = DECL_ARG_TYPE (cur_arg);
+      enum machine_mode passed_mode = TYPE_MODE (passed_type);
+      rtx entry_parm;
+
+      if (TREE_ADDRESSABLE (passed_type))
+	{
+	  passed_type = build_pointer_type (passed_type);
+	  passed_mode = Pmode;
+	}
+
+      entry_parm = iq2000_function_arg (&args_so_far, passed_mode,
+					passed_type, true);
+
+      iq2000_function_arg_advance (&args_so_far, passed_mode,
+				   passed_type, true);
+      next_arg = DECL_CHAIN (cur_arg);
+
+      if (entry_parm && store_args_on_stack)
+	{
+	  if (next_arg == 0
+	      && DECL_NAME (cur_arg)
+	      && ((0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (cur_arg)),
+				"__builtin_va_alist"))
+		  || (0 == strcmp (IDENTIFIER_POINTER (DECL_NAME (cur_arg)),
+				   "va_alist"))))
+	    {
+	      last_arg_is_vararg_marker = 1;
+	      break;
+	    }
+	  else
+	    {
+	      int words;
+
+	      gcc_assert (GET_CODE (entry_parm) == REG);
+
+	      /* Passed in a register, so will get homed automatically.  */
+	      if (GET_MODE (entry_parm) == BLKmode)
+		words = (int_size_in_bytes (passed_type) + 3) / 4;
+	      else
+		words = (GET_MODE_SIZE (GET_MODE (entry_parm)) + 3) / 4;
+
+	      regno = REGNO (entry_parm) + words - 1;
+	    }
+	}
+      else
+	{
+	  regno = GP_ARG_LAST+1;
+	  break;
+	}
+    }
+
+  /* In order to pass small structures by value in registers we need to
+     shift the value into the high part of the register.
+     iq2000_unction_arg has encoded a PARALLEL rtx, holding a vector of
+     adjustments to be made as the next_arg_reg variable, so we split up
+     the insns, and emit them separately.  */
+  next_arg_reg = iq2000_function_arg (&args_so_far, VOIDmode,
+				      void_type_node, true);
+  if (next_arg_reg != 0 && GET_CODE (next_arg_reg) == PARALLEL)
+    {
+      rtvec adjust = XVEC (next_arg_reg, 0);
+      int num = GET_NUM_ELEM (adjust);
+
+      for (i = 0; i < num; i++)
+	{
+	  rtx pattern;
+
+	  pattern = RTVEC_ELT (adjust, i);
+	  if (GET_CODE (pattern) != SET
+	      || GET_CODE (SET_SRC (pattern)) != ASHIFT)
+	    abort_with_insn (pattern, "Insn is not a shift");
+	  PUT_CODE (SET_SRC (pattern), ASHIFTRT);
+
+	  emit_insn (pattern);
+	}
+    }
+
+  tsize = compute_frame_size (get_frame_size ());
+
+  /* If this function is a varargs function, store any registers that
+     would normally hold arguments ($4 - $7) on the stack.  */
+  if (store_args_on_stack
+      && (stdarg_p (fntype)
+	  || last_arg_is_vararg_marker))
+    {
+      int offset = (regno - GP_ARG_FIRST) * UNITS_PER_WORD;
+      rtx ptr = stack_pointer_rtx;
+
+      for (; regno <= GP_ARG_LAST; regno++)
+	{
+	  if (offset != 0)
+	    ptr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	  emit_move_insn (gen_rtx_MEM (gpr_mode, ptr),
+			  gen_rtx_REG (gpr_mode, regno));
+
+	  offset += GET_MODE_SIZE (gpr_mode);
+	}
+    }
+
+  if (tsize > 0)
+    {
+      rtx tsize_rtx = GEN_INT (tsize);
+      rtx adjustment_rtx, insn, dwarf_pattern;
+
+      if (tsize > 32767)
+	{
+	  adjustment_rtx = gen_rtx_REG (Pmode, IQ2000_TEMP1_REGNUM);
+	  emit_move_insn (adjustment_rtx, tsize_rtx);
+	}
+      else
+	adjustment_rtx = tsize_rtx;
+
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    adjustment_rtx));
+
+      dwarf_pattern = gen_rtx_SET (Pmode, stack_pointer_rtx,
+				   plus_constant (stack_pointer_rtx, -tsize));
+
+      iq2000_annotate_frame_insn (insn, dwarf_pattern);
+
+      save_restore_insns (1);
+
+      if (frame_pointer_needed)
+	{
+	  rtx insn = 0;
+
+	  insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				       stack_pointer_rtx));
+
+	  if (insn)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  emit_insn (gen_blockage ());
+}
+
+/* Expand the epilogue into a bunch of separate insns.  */
+
+void
+iq2000_expand_epilogue (void)
+{
+  HOST_WIDE_INT tsize = cfun->machine->total_size;
+  rtx tsize_rtx = GEN_INT (tsize);
+  rtx tmp_rtx = (rtx)0;
+
+  if (iq2000_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  if (tsize > 32767)
+    {
+      tmp_rtx = gen_rtx_REG (Pmode, IQ2000_TEMP1_REGNUM);
+      emit_move_insn (tmp_rtx, tsize_rtx);
+      tsize_rtx = tmp_rtx;
+    }
+
+  if (tsize > 0)
+    {
+      if (frame_pointer_needed)
+	{
+	  emit_insn (gen_blockage ());
+
+	  emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+	}
+
+      save_restore_insns (0);
+
+      if (crtl->calls_eh_return)
+	{
+	  rtx eh_ofs = EH_RETURN_STACKADJ_RTX;
+	  emit_insn (gen_addsi3 (eh_ofs, eh_ofs, tsize_rtx));
+	  tsize_rtx = eh_ofs;
+	}
+
+      emit_insn (gen_blockage ());
+
+      if (tsize != 0 || crtl->calls_eh_return)
+	{
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				 tsize_rtx));
+	}
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      /* Perform the additional bump for __throw.  */
+      emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
+		      stack_pointer_rtx);
+      emit_use (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM));
+      emit_jump_insn (gen_eh_return_internal ());
+    }
+  else
+      emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode,
+						  GP_REG_FIRST + 31)));
+}
+
+void
+iq2000_expand_eh_return (rtx address)
+{
+  HOST_WIDE_INT gp_offset = cfun->machine->gp_sp_offset;
+  rtx scratch;
+
+  scratch = plus_constant (stack_pointer_rtx, gp_offset);
+  emit_move_insn (gen_rtx_MEM (GET_MODE (address), scratch), address);
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+int
+iq2000_can_use_return_insn (void)
+{
+  if (! reload_completed)
+    return 0;
+
+  if (df_regs_ever_live_p (31) || profile_flag)
+    return 0;
+
+  if (cfun->machine->initialized)
+    return cfun->machine->total_size == 0;
+
+  return compute_frame_size (get_frame_size ()) == 0;
+}
+
+/* Choose the section to use for the constant rtx expression X that has
+   mode MODE.  */
+
+static section *
+iq2000_select_rtx_section (enum machine_mode mode, rtx x ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT align)
+{
+  /* For embedded applications, always put constants in read-only data,
+     in order to reduce RAM usage.  */
+  return mergeable_constant_section (mode, align, 0);
+}
+
+/* Choose the section to use for DECL.  RELOC is true if its value contains
+   any relocatable expression.
+
+   Some of the logic used here needs to be replicated in
+   ENCODE_SECTION_INFO in iq2000.h so that references to these symbols
+   are done correctly.  */
+
+static section *
+iq2000_select_section (tree decl, int reloc ATTRIBUTE_UNUSED,
+		       unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (TARGET_EMBEDDED_DATA)
+    {
+      /* For embedded applications, always put an object in read-only data
+	 if possible, in order to reduce RAM usage.  */
+      if ((TREE_CODE (decl) == VAR_DECL
+	   && TREE_READONLY (decl) && !TREE_SIDE_EFFECTS (decl)
+	   && DECL_INITIAL (decl)
+	   && (DECL_INITIAL (decl) == error_mark_node
+	       || TREE_CONSTANT (DECL_INITIAL (decl))))
+	  /* Deal with calls from output_constant_def_contents.  */
+	  || TREE_CODE (decl) != VAR_DECL)
+	return readonly_data_section;
+      else
+	return data_section;
+    }
+  else
+    {
+      /* For hosted applications, always put an object in small data if
+	 possible, as this gives the best performance.  */
+      if ((TREE_CODE (decl) == VAR_DECL
+	   && TREE_READONLY (decl) && !TREE_SIDE_EFFECTS (decl)
+	   && DECL_INITIAL (decl)
+	   && (DECL_INITIAL (decl) == error_mark_node
+	       || TREE_CONSTANT (DECL_INITIAL (decl))))
+	  /* Deal with calls from output_constant_def_contents.  */
+	  || TREE_CODE (decl) != VAR_DECL)
+	return readonly_data_section;
+      else
+	return data_section;
+    }
+}
+/* Return register to use for a function return value with VALTYPE for function
+   FUNC.  */
+
+static rtx
+iq2000_function_value (const_tree valtype,
+		       const_tree fn_decl_or_type,
+		       bool outgoing ATTRIBUTE_UNUSED)
+{
+  int reg = GP_RETURN;
+  enum machine_mode mode = TYPE_MODE (valtype);
+  int unsignedp = TYPE_UNSIGNED (valtype);
+  const_tree func = fn_decl_or_type;
+
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    fn_decl_or_type = NULL;
+
+  /* Since we promote return types, we must promote the mode here too.  */
+  mode = promote_function_mode (valtype, mode, &unsignedp, func, 1);
+
+  return gen_rtx_REG (mode, reg);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+iq2000_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (((GET_MODE_CLASS (mode) != MODE_INT
+	                || GET_MODE_SIZE (mode) >= 4)
+	               ? mode : SImode),
+	              GP_RETURN);
+}
+
+/* Worker function for FUNCTION_VALUE_REGNO_P.
+
+   On the IQ2000, R2 and R3 are the only register thus used.  */
+
+bool
+iq2000_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == GP_RETURN);
+}
+
+
+/* Return true when an argument must be passed by reference.  */
+
+static bool
+iq2000_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int size;
+
+  /* We must pass by reference if we would be both passing in registers
+     and the stack.  This is because any subsequent partial arg would be
+     handled incorrectly in this case.  */
+  if (cum && targetm.calls.must_pass_in_stack (mode, type))
+     {
+       /* Don't pass the actual CUM to FUNCTION_ARG, because we would
+	  get double copies of any offsets generated for small structs
+	  passed in registers.  */
+       CUMULATIVE_ARGS temp;
+
+       temp = *cum;
+       if (iq2000_function_arg (&temp, mode, type, named) != 0)
+	 return 1;
+     }
+
+  if (type == NULL_TREE || mode == DImode || mode == DFmode)
+    return 0;
+
+  size = int_size_in_bytes (type);
+  return size == -1 || size > UNITS_PER_WORD;
+}
+
+/* Return the length of INSN.  LENGTH is the initial length computed by
+   attributes in the machine-description file.  */
+
+int
+iq2000_adjust_insn_length (rtx insn, int length)
+{
+  /* A unconditional jump has an unfilled delay slot if it is not part
+     of a sequence.  A conditional jump normally has a delay slot.  */
+  if (simplejump_p (insn)
+      || (   (GET_CODE (insn) == JUMP_INSN
+	   || GET_CODE (insn) == CALL_INSN)))
+    length += 4;
+
+  return length;
+}
+
+/* Output assembly instructions to perform a conditional branch.
+
+   INSN is the branch instruction.  OPERANDS[0] is the condition.
+   OPERANDS[1] is the target of the branch.  OPERANDS[2] is the target
+   of the first operand to the condition.  If TWO_OPERANDS_P is
+   nonzero the comparison takes two operands; OPERANDS[3] will be the
+   second operand.
+
+   If INVERTED_P is nonzero we are to branch if the condition does
+   not hold.  If FLOAT_P is nonzero this is a floating-point comparison.
+
+   LENGTH is the length (in bytes) of the sequence we are to generate.
+   That tells us whether to generate a simple conditional branch, or a
+   reversed conditional branch around a `jr' instruction.  */
+
+char *
+iq2000_output_conditional_branch (rtx insn, rtx * operands, int two_operands_p,
+				  int float_p, int inverted_p, int length)
+{
+  static char buffer[200];
+  /* The kind of comparison we are doing.  */
+  enum rtx_code code = GET_CODE (operands[0]);
+  /* Nonzero if the opcode for the comparison needs a `z' indicating
+     that it is a comparison against zero.  */
+  int need_z_p;
+  /* A string to use in the assembly output to represent the first
+     operand.  */
+  const char *op1 = "%z2";
+  /* A string to use in the assembly output to represent the second
+     operand.  Use the hard-wired zero register if there's no second
+     operand.  */
+  const char *op2 = (two_operands_p ? ",%z3" : ",%.");
+  /* The operand-printing string for the comparison.  */
+  const char *comp = (float_p ? "%F0" : "%C0");
+  /* The operand-printing string for the inverted comparison.  */
+  const char *inverted_comp = (float_p ? "%W0" : "%N0");
+
+  /* Likely variants of each branch instruction annul the instruction
+     in the delay slot if the branch is not taken.  */
+  iq2000_branch_likely = (final_sequence && INSN_ANNULLED_BRANCH_P (insn));
+
+  if (!two_operands_p)
+    {
+      /* To compute whether than A > B, for example, we normally
+	 subtract B from A and then look at the sign bit.  But, if we
+	 are doing an unsigned comparison, and B is zero, we don't
+	 have to do the subtraction.  Instead, we can just check to
+	 see if A is nonzero.  Thus, we change the CODE here to
+	 reflect the simpler comparison operation.  */
+      switch (code)
+	{
+	case GTU:
+	  code = NE;
+	  break;
+
+	case LEU:
+	  code = EQ;
+	  break;
+
+	case GEU:
+	  /* A condition which will always be true.  */
+	  code = EQ;
+	  op1 = "%.";
+	  break;
+
+	case LTU:
+	  /* A condition which will always be false.  */
+	  code = NE;
+	  op1 = "%.";
+	  break;
+
+	default:
+	  /* Not a special case.  */
+	  break;
+	}
+    }
+
+  /* Relative comparisons are always done against zero.  But
+     equality comparisons are done between two operands, and therefore
+     do not require a `z' in the assembly language output.  */
+  need_z_p = (!float_p && code != EQ && code != NE);
+  /* For comparisons against zero, the zero is not provided
+     explicitly.  */
+  if (need_z_p)
+    op2 = "";
+
+  /* Begin by terminating the buffer.  That way we can always use
+     strcat to add to it.  */
+  buffer[0] = '\0';
+
+  switch (length)
+    {
+    case 4:
+    case 8:
+      /* Just a simple conditional branch.  */
+      if (float_p)
+	sprintf (buffer, "b%s%%?\t%%Z2%%1",
+		 inverted_p ? inverted_comp : comp);
+      else
+	sprintf (buffer, "b%s%s%%?\t%s%s,%%1",
+		 inverted_p ? inverted_comp : comp,
+		 need_z_p ? "z" : "",
+		 op1,
+		 op2);
+      return buffer;
+
+    case 12:
+    case 16:
+      {
+	/* Generate a reversed conditional branch around ` j'
+	   instruction:
+
+		.set noreorder
+		.set nomacro
+		bc    l
+		nop
+		j     target
+		.set macro
+		.set reorder
+	     l:
+
+	   Because we have to jump four bytes *past* the following
+	   instruction if this branch was annulled, we can't just use
+	   a label, as in the picture above; there's no way to put the
+	   label after the next instruction, as the assembler does not
+	   accept `.L+4' as the target of a branch.  (We can't just
+	   wait until the next instruction is output; it might be a
+	   macro and take up more than four bytes.  Once again, we see
+	   why we want to eliminate macros.)
+
+	   If the branch is annulled, we jump four more bytes that we
+	   would otherwise; that way we skip the annulled instruction
+	   in the delay slot.  */
+
+	const char *target
+	  = ((iq2000_branch_likely || length == 16) ? ".+16" : ".+12");
+	char *c;
+
+	c = strchr (buffer, '\0');
+	/* Generate the reversed comparison.  This takes four
+	   bytes.  */
+	if (float_p)
+	  sprintf (c, "b%s\t%%Z2%s",
+		   inverted_p ? comp : inverted_comp,
+		   target);
+	else
+	  sprintf (c, "b%s%s\t%s%s,%s",
+		   inverted_p ? comp : inverted_comp,
+		   need_z_p ? "z" : "",
+		   op1,
+		   op2,
+		   target);
+	strcat (c, "\n\tnop\n\tj\t%1");
+	if (length == 16)
+	  /* The delay slot was unfilled.  Since we're inside
+	     .noreorder, the assembler will not fill in the NOP for
+	     us, so we must do it ourselves.  */
+	  strcat (buffer, "\n\tnop");
+	return buffer;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* NOTREACHED */
+  return 0;
+}
+
+#define def_builtin(NAME, TYPE, CODE)					\
+  add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
+		       NULL, NULL_TREE)
+
+static void
+iq2000_init_builtins (void)
+{
+  tree endlink = void_list_node;
+  tree void_ftype, void_ftype_int, void_ftype_int_int;
+  tree void_ftype_int_int_int;
+  tree int_ftype_int, int_ftype_int_int, int_ftype_int_int_int;
+  tree int_ftype_int_int_int_int;
+
+  /* func () */
+  void_ftype
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, void_type_node, endlink));
+
+  /* func (int) */
+  void_ftype_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+
+  /* void func (int, int) */
+  void_ftype_int_int
+    = build_function_type (void_type_node,
+                           tree_cons (NULL_TREE, integer_type_node,
+                                      tree_cons (NULL_TREE, integer_type_node,
+                                                 endlink)));
+
+  /* int func (int) */
+  int_ftype_int
+    = build_function_type (integer_type_node,
+                           tree_cons (NULL_TREE, integer_type_node, endlink));
+
+  /* int func (int, int) */
+  int_ftype_int_int
+    = build_function_type (integer_type_node,
+                           tree_cons (NULL_TREE, integer_type_node,
+                                      tree_cons (NULL_TREE, integer_type_node,
+                                                 endlink)));
+
+  /* void func (int, int, int) */
+void_ftype_int_int_int
+    = build_function_type
+    (void_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+		tree_cons (NULL_TREE, integer_type_node,
+			   tree_cons (NULL_TREE,
+				      integer_type_node,
+				      endlink))));
+
+  /* int func (int, int, int, int) */
+  int_ftype_int_int_int_int
+    = build_function_type
+    (integer_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+		tree_cons (NULL_TREE, integer_type_node,
+			   tree_cons (NULL_TREE,
+				      integer_type_node,
+				      tree_cons (NULL_TREE,
+						 integer_type_node,
+						 endlink)))));
+
+  /* int func (int, int, int) */
+  int_ftype_int_int_int
+    = build_function_type
+    (integer_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+		tree_cons (NULL_TREE, integer_type_node,
+			   tree_cons (NULL_TREE,
+				      integer_type_node,
+				      endlink))));
+
+  /* int func (int, int, int, int) */
+  int_ftype_int_int_int_int
+    = build_function_type
+    (integer_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+		tree_cons (NULL_TREE, integer_type_node,
+			   tree_cons (NULL_TREE,
+				      integer_type_node,
+				      tree_cons (NULL_TREE,
+						 integer_type_node,
+						 endlink)))));
+
+  def_builtin ("__builtin_ado16", int_ftype_int_int, IQ2000_BUILTIN_ADO16);
+  def_builtin ("__builtin_ram", int_ftype_int_int_int_int, IQ2000_BUILTIN_RAM);
+  def_builtin ("__builtin_chkhdr", void_ftype_int_int, IQ2000_BUILTIN_CHKHDR);
+  def_builtin ("__builtin_pkrl", void_ftype_int_int, IQ2000_BUILTIN_PKRL);
+  def_builtin ("__builtin_cfc0", int_ftype_int, IQ2000_BUILTIN_CFC0);
+  def_builtin ("__builtin_cfc1", int_ftype_int, IQ2000_BUILTIN_CFC1);
+  def_builtin ("__builtin_cfc2", int_ftype_int, IQ2000_BUILTIN_CFC2);
+  def_builtin ("__builtin_cfc3", int_ftype_int, IQ2000_BUILTIN_CFC3);
+  def_builtin ("__builtin_ctc0", void_ftype_int_int, IQ2000_BUILTIN_CTC0);
+  def_builtin ("__builtin_ctc1", void_ftype_int_int, IQ2000_BUILTIN_CTC1);
+  def_builtin ("__builtin_ctc2", void_ftype_int_int, IQ2000_BUILTIN_CTC2);
+  def_builtin ("__builtin_ctc3", void_ftype_int_int, IQ2000_BUILTIN_CTC3);
+  def_builtin ("__builtin_mfc0", int_ftype_int, IQ2000_BUILTIN_MFC0);
+  def_builtin ("__builtin_mfc1", int_ftype_int, IQ2000_BUILTIN_MFC1);
+  def_builtin ("__builtin_mfc2", int_ftype_int, IQ2000_BUILTIN_MFC2);
+  def_builtin ("__builtin_mfc3", int_ftype_int, IQ2000_BUILTIN_MFC3);
+  def_builtin ("__builtin_mtc0", void_ftype_int_int, IQ2000_BUILTIN_MTC0);
+  def_builtin ("__builtin_mtc1", void_ftype_int_int, IQ2000_BUILTIN_MTC1);
+  def_builtin ("__builtin_mtc2", void_ftype_int_int, IQ2000_BUILTIN_MTC2);
+  def_builtin ("__builtin_mtc3", void_ftype_int_int, IQ2000_BUILTIN_MTC3);
+  def_builtin ("__builtin_lur", void_ftype_int_int, IQ2000_BUILTIN_LUR);
+  def_builtin ("__builtin_rb", void_ftype_int_int, IQ2000_BUILTIN_RB);
+  def_builtin ("__builtin_rx", void_ftype_int_int, IQ2000_BUILTIN_RX);
+  def_builtin ("__builtin_srrd", void_ftype_int, IQ2000_BUILTIN_SRRD);
+  def_builtin ("__builtin_srwr", void_ftype_int_int, IQ2000_BUILTIN_SRWR);
+  def_builtin ("__builtin_wb", void_ftype_int_int, IQ2000_BUILTIN_WB);
+  def_builtin ("__builtin_wx", void_ftype_int_int, IQ2000_BUILTIN_WX);
+  def_builtin ("__builtin_luc32l", void_ftype_int_int, IQ2000_BUILTIN_LUC32L);
+  def_builtin ("__builtin_luc64", void_ftype_int_int, IQ2000_BUILTIN_LUC64);
+  def_builtin ("__builtin_luc64l", void_ftype_int_int, IQ2000_BUILTIN_LUC64L);
+  def_builtin ("__builtin_luk", void_ftype_int_int, IQ2000_BUILTIN_LUK);
+  def_builtin ("__builtin_lulck", void_ftype_int, IQ2000_BUILTIN_LULCK);
+  def_builtin ("__builtin_lum32", void_ftype_int_int, IQ2000_BUILTIN_LUM32);
+  def_builtin ("__builtin_lum32l", void_ftype_int_int, IQ2000_BUILTIN_LUM32L);
+  def_builtin ("__builtin_lum64", void_ftype_int_int, IQ2000_BUILTIN_LUM64);
+  def_builtin ("__builtin_lum64l", void_ftype_int_int, IQ2000_BUILTIN_LUM64L);
+  def_builtin ("__builtin_lurl", void_ftype_int_int, IQ2000_BUILTIN_LURL);
+  def_builtin ("__builtin_mrgb", int_ftype_int_int_int, IQ2000_BUILTIN_MRGB);
+  def_builtin ("__builtin_srrdl", void_ftype_int, IQ2000_BUILTIN_SRRDL);
+  def_builtin ("__builtin_srulck", void_ftype_int, IQ2000_BUILTIN_SRULCK);
+  def_builtin ("__builtin_srwru", void_ftype_int_int, IQ2000_BUILTIN_SRWRU);
+  def_builtin ("__builtin_trapqfl", void_ftype, IQ2000_BUILTIN_TRAPQFL);
+  def_builtin ("__builtin_trapqne", void_ftype, IQ2000_BUILTIN_TRAPQNE);
+  def_builtin ("__builtin_traprel", void_ftype_int, IQ2000_BUILTIN_TRAPREL);
+  def_builtin ("__builtin_wbu", void_ftype_int_int_int, IQ2000_BUILTIN_WBU);
+  def_builtin ("__builtin_syscall", void_ftype, IQ2000_BUILTIN_SYSCALL);
+}
+
+/* Builtin for ICODE having ARGCOUNT args in EXP where each arg
+   has an rtx CODE.  */
+
+static rtx
+expand_one_builtin (enum insn_code icode, rtx target, tree exp,
+		    enum rtx_code *code, int argcount)
+{
+  rtx pat;
+  tree arg [5];
+  rtx op [5];
+  enum machine_mode mode [5];
+  int i;
+
+  mode[0] = insn_data[icode].operand[0].mode;
+  for (i = 0; i < argcount; i++)
+    {
+      arg[i] = CALL_EXPR_ARG (exp, i);
+      op[i] = expand_normal (arg[i]);
+      mode[i] = insn_data[icode].operand[i].mode;
+      if (code[i] == CONST_INT && GET_CODE (op[i]) != CONST_INT)
+	error ("argument %qd is not a constant", i + 1);
+      if (code[i] == REG
+	  && ! (*insn_data[icode].operand[i].predicate) (op[i], mode[i]))
+	op[i] = copy_to_mode_reg (mode[i], op[i]);
+    }
+
+  if (insn_data[icode].operand[0].constraint[0] == '=')
+    {
+      if (target == 0
+	  || GET_MODE (target) != mode[0]
+	  || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
+	target = gen_reg_rtx (mode[0]);
+    }
+  else
+    target = 0;
+
+  switch (argcount)
+    {
+    case 0:
+	pat = GEN_FCN (icode) (target);
+    case 1:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0], op[1]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1]);
+      break;
+    case 3:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+      break;
+    case 4:
+      if (target)
+	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+      else
+	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+iq2000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum rtx_code code [5];
+
+  code[0] = REG;
+  code[1] = REG;
+  code[2] = REG;
+  code[3] = REG;
+  code[4] = REG;
+  switch (fcode)
+    {
+    default:
+      break;
+      
+    case IQ2000_BUILTIN_ADO16:
+      return expand_one_builtin (CODE_FOR_ado16, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_RAM:
+      code[1] = CONST_INT;
+      code[2] = CONST_INT;
+      code[3] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ram, target, exp, code, 4);
+      
+    case IQ2000_BUILTIN_CHKHDR:
+      return expand_one_builtin (CODE_FOR_chkhdr, target, exp, code, 2);
+      
+    case IQ2000_BUILTIN_PKRL:
+      return expand_one_builtin (CODE_FOR_pkrl, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CFC0:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc0, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CFC1:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc1, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CFC2:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc2, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CFC3:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_cfc3, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_CTC0:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc0, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CTC1:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc1, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CTC2:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc2, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_CTC3:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_ctc3, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MFC0:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc0, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MFC1:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc1, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MFC2:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc2, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MFC3:
+      code[0] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mfc3, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_MTC0:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc0, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MTC1:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc1, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MTC2:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc2, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MTC3:
+      code[1] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mtc3, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUR:
+      return expand_one_builtin (CODE_FOR_lur, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_RB:
+      return expand_one_builtin (CODE_FOR_rb, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_RX:
+      return expand_one_builtin (CODE_FOR_rx, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_SRRD:
+      return expand_one_builtin (CODE_FOR_srrd, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_SRWR:
+      return expand_one_builtin (CODE_FOR_srwr, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_WB:
+      return expand_one_builtin (CODE_FOR_wb, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_WX:
+      return expand_one_builtin (CODE_FOR_wx, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUC32L:
+      return expand_one_builtin (CODE_FOR_luc32l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUC64:
+      return expand_one_builtin (CODE_FOR_luc64, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUC64L:
+      return expand_one_builtin (CODE_FOR_luc64l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUK:
+      return expand_one_builtin (CODE_FOR_luk, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LULCK:
+      return expand_one_builtin (CODE_FOR_lulck, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_LUM32:
+      return expand_one_builtin (CODE_FOR_lum32, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUM32L:
+      return expand_one_builtin (CODE_FOR_lum32l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUM64:
+      return expand_one_builtin (CODE_FOR_lum64, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LUM64L:
+      return expand_one_builtin (CODE_FOR_lum64l, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_LURL:
+      return expand_one_builtin (CODE_FOR_lurl, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_MRGB:
+      code[2] = CONST_INT;
+      return expand_one_builtin (CODE_FOR_mrgb, target, exp, code, 3);
+
+    case IQ2000_BUILTIN_SRRDL:
+      return expand_one_builtin (CODE_FOR_srrdl, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_SRULCK:
+      return expand_one_builtin (CODE_FOR_srulck, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_SRWRU:
+      return expand_one_builtin (CODE_FOR_srwru, target, exp, code, 2);
+
+    case IQ2000_BUILTIN_TRAPQFL:
+      return expand_one_builtin (CODE_FOR_trapqfl, target, exp, code, 0);
+
+    case IQ2000_BUILTIN_TRAPQNE:
+      return expand_one_builtin (CODE_FOR_trapqne, target, exp, code, 0);
+
+    case IQ2000_BUILTIN_TRAPREL:
+      return expand_one_builtin (CODE_FOR_traprel, target, exp, code, 1);
+
+    case IQ2000_BUILTIN_WBU:
+      return expand_one_builtin (CODE_FOR_wbu, target, exp, code, 3);
+
+    case IQ2000_BUILTIN_SYSCALL:
+      return expand_one_builtin (CODE_FOR_syscall, target, exp, code, 0);
+    }
+  
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+iq2000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((int_size_in_bytes (type) > (2 * UNITS_PER_WORD))
+	  || (int_size_in_bytes (type) == -1));
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+iq2000_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+			       enum machine_mode mode ATTRIBUTE_UNUSED,
+			       tree type ATTRIBUTE_UNUSED, int * pretend_size,
+			       int no_rtl)
+{
+  unsigned int iq2000_off = ! cum->last_arg_fp; 
+  unsigned int iq2000_fp_off = cum->last_arg_fp; 
+
+  if ((cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off))
+    {
+      int iq2000_save_gp_regs 
+	= MAX_ARGS_IN_REGISTERS - cum->arg_words - iq2000_off; 
+      int iq2000_save_fp_regs 
+        = (MAX_ARGS_IN_REGISTERS - cum->fp_arg_words - iq2000_fp_off); 
+
+      if (iq2000_save_gp_regs < 0) 
+	iq2000_save_gp_regs = 0; 
+      if (iq2000_save_fp_regs < 0) 
+	iq2000_save_fp_regs = 0; 
+
+      *pretend_size = ((iq2000_save_gp_regs * UNITS_PER_WORD) 
+                      + (iq2000_save_fp_regs * UNITS_PER_FPREG)); 
+
+      if (! (no_rtl)) 
+	{
+	  if (cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off) 
+	    {
+	      rtx ptr, mem; 
+	      ptr = plus_constant (virtual_incoming_args_rtx, 
+				   - (iq2000_save_gp_regs 
+				      * UNITS_PER_WORD)); 
+	      mem = gen_rtx_MEM (BLKmode, ptr); 
+	      move_block_from_reg 
+		(cum->arg_words + GP_ARG_FIRST + iq2000_off, 
+		 mem, 
+		 iq2000_save_gp_regs);
+	    } 
+	} 
+    }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+
+static void
+iq2000_print_operand_address (FILE * file, rtx addr)
+{
+  if (!addr)
+    error ("PRINT_OPERAND_ADDRESS, null pointer");
+
+  else
+    switch (GET_CODE (addr))
+      {
+      case REG:
+	if (REGNO (addr) == ARG_POINTER_REGNUM)
+	  abort_with_insn (addr, "Arg pointer not eliminated.");
+
+	fprintf (file, "0(%s)", reg_names [REGNO (addr)]);
+	break;
+
+      case LO_SUM:
+	{
+	  rtx arg0 = XEXP (addr, 0);
+	  rtx arg1 = XEXP (addr, 1);
+
+	  if (GET_CODE (arg0) != REG)
+	    abort_with_insn (addr,
+			     "PRINT_OPERAND_ADDRESS, LO_SUM with #1 not REG.");
+
+	  fprintf (file, "%%lo(");
+	  iq2000_print_operand_address (file, arg1);
+	  fprintf (file, ")(%s)", reg_names [REGNO (arg0)]);
+	}
+	break;
+
+      case PLUS:
+	{
+	  rtx reg = 0;
+	  rtx offset = 0;
+	  rtx arg0 = XEXP (addr, 0);
+	  rtx arg1 = XEXP (addr, 1);
+
+	  if (GET_CODE (arg0) == REG)
+	    {
+	      reg = arg0;
+	      offset = arg1;
+	      if (GET_CODE (offset) == REG)
+		abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, 2 regs");
+	    }
+
+	  else if (GET_CODE (arg1) == REG)
+	      reg = arg1, offset = arg0;
+	  else if (CONSTANT_P (arg0) && CONSTANT_P (arg1))
+	    {
+	      output_addr_const (file, addr);
+	      break;
+	    }
+	  else
+	    abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, no regs");
+
+	  if (! CONSTANT_P (offset))
+	    abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, invalid insn #2");
+
+	  if (REGNO (reg) == ARG_POINTER_REGNUM)
+	    abort_with_insn (addr, "Arg pointer not eliminated.");
+
+	  output_addr_const (file, offset);
+	  fprintf (file, "(%s)", reg_names [REGNO (reg)]);
+	}
+	break;
+
+      case LABEL_REF:
+      case SYMBOL_REF:
+      case CONST_INT:
+      case CONST:
+	output_addr_const (file, addr);
+	if (GET_CODE (addr) == CONST_INT)
+	  fprintf (file, "(%s)", reg_names [0]);
+	break;
+
+      default:
+	abort_with_insn (addr, "PRINT_OPERAND_ADDRESS, invalid insn #1");
+	break;
+    }
+}
+
+/* A C compound statement to output to stdio stream FILE the
+   assembler syntax for an instruction operand OP.
+
+   LETTER is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  LETTER
+   comes from the `%' specification that was used to request
+   printing of the operand.  If the specification was just `%DIGIT'
+   then LETTER is 0; if the specification was `%LTR DIGIT' then LETTER
+   is the ASCII code for LTR.
+
+   If OP is a register, this macro should print the register's name.
+   The names can be found in an array `reg_names' whose type is
+   `char *[]'.  `reg_names' is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for LETTER.
+
+   The IQ2000 specific codes are:
+
+   'X'  X is CONST_INT, prints upper 16 bits in hexadecimal format = "0x%04x",
+   'x'  X is CONST_INT, prints lower 16 bits in hexadecimal format = "0x%04x",
+   'd'  output integer constant in decimal,
+   'z'	if the operand is 0, use $0 instead of normal operand.
+   'D'  print second part of double-word register or memory operand.
+   'L'  print low-order register of double-word register operand.
+   'M'  print high-order register of double-word register operand.
+   'C'  print part of opcode for a branch condition.
+   'F'  print part of opcode for a floating-point branch condition.
+   'N'  print part of opcode for a branch condition, inverted.
+   'W'  print part of opcode for a floating-point branch condition, inverted.
+   'A'	Print part of opcode for a bit test condition.
+   'P'  Print label for a bit test.
+   'p'  Print log for a bit test.
+   'B'  print 'z' for EQ, 'n' for NE
+   'b'  print 'n' for EQ, 'z' for NE
+   'T'  print 'f' for EQ, 't' for NE
+   't'  print 't' for EQ, 'f' for NE
+   'Z'  print register and a comma, but print nothing for $fcc0
+   '?'	Print 'l' if we are to use a branch likely instead of normal branch.
+   '@'	Print the name of the assembler temporary register (at or $1).
+   '.'	Print the name of the register with a hard-wired zero (zero or $0).
+   '$'	Print the name of the stack pointer register (sp or $29).
+   '+'	Print the name of the gp register (gp or $28).  */
+
+static void
+iq2000_print_operand (FILE *file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  if (iq2000_print_operand_punct_valid_p (letter))
+    {
+      switch (letter)
+	{
+	case '?':
+	  if (iq2000_branch_likely)
+	    putc ('l', file);
+	  break;
+
+	case '@':
+	  fputs (reg_names [GP_REG_FIRST + 1], file);
+	  break;
+
+	case '.':
+	  fputs (reg_names [GP_REG_FIRST + 0], file);
+	  break;
+
+	case '$':
+	  fputs (reg_names[STACK_POINTER_REGNUM], file);
+	  break;
+
+	case '+':
+	  fputs (reg_names[GP_REG_FIRST + 28], file);
+	  break;
+
+	default:
+	  error ("PRINT_OPERAND: Unknown punctuation '%c'", letter);
+	  break;
+	}
+
+      return;
+    }
+
+  if (! op)
+    {
+      error ("PRINT_OPERAND null pointer");
+      return;
+    }
+
+  code = GET_CODE (op);
+
+  if (code == SIGN_EXTEND)
+    op = XEXP (op, 0), code = GET_CODE (op);
+
+  if (letter == 'C')
+    switch (code)
+      {
+      case EQ:	fputs ("eq",  file); break;
+      case NE:	fputs ("ne",  file); break;
+      case GT:	fputs ("gt",  file); break;
+      case GE:	fputs ("ge",  file); break;
+      case LT:	fputs ("lt",  file); break;
+      case LE:	fputs ("le",  file); break;
+      case GTU: fputs ("ne", file); break;
+      case GEU: fputs ("geu", file); break;
+      case LTU: fputs ("ltu", file); break;
+      case LEU: fputs ("eq", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%C");
+      }
+
+  else if (letter == 'N')
+    switch (code)
+      {
+      case EQ:	fputs ("ne",  file); break;
+      case NE:	fputs ("eq",  file); break;
+      case GT:	fputs ("le",  file); break;
+      case GE:	fputs ("lt",  file); break;
+      case LT:	fputs ("ge",  file); break;
+      case LE:	fputs ("gt",  file); break;
+      case GTU: fputs ("leu", file); break;
+      case GEU: fputs ("ltu", file); break;
+      case LTU: fputs ("geu", file); break;
+      case LEU: fputs ("gtu", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%N");
+      }
+
+  else if (letter == 'F')
+    switch (code)
+      {
+      case EQ: fputs ("c1f", file); break;
+      case NE: fputs ("c1t", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%F");
+      }
+
+  else if (letter == 'W')
+    switch (code)
+      {
+      case EQ: fputs ("c1t", file); break;
+      case NE: fputs ("c1f", file); break;
+      default:
+	abort_with_insn (op, "PRINT_OPERAND, invalid insn for %%W");
+      }
+
+  else if (letter == 'A')
+    fputs (code == LABEL_REF ? "i" : "in", file);
+
+  else if (letter == 'P')
+    {
+      if (code == LABEL_REF)
+	output_addr_const (file, op);
+      else if (code != PC)
+	output_operand_lossage ("invalid %%P operand");
+    }
+
+  else if (letter == 'p')
+    {
+      int value;
+      if (code != CONST_INT
+	  || (value = exact_log2 (INTVAL (op))) < 0)
+	output_operand_lossage ("invalid %%p value");
+      else
+	fprintf (file, "%d", value);
+    }
+
+  else if (letter == 'Z')
+    {
+      gcc_unreachable ();
+    }
+
+  else if (code == REG || code == SUBREG)
+    {
+      int regnum;
+
+      if (code == REG)
+	regnum = REGNO (op);
+      else
+	regnum = true_regnum (op);
+
+      if ((letter == 'M' && ! WORDS_BIG_ENDIAN)
+	  || (letter == 'L' && WORDS_BIG_ENDIAN)
+	  || letter == 'D')
+	regnum++;
+
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+
+  else if (code == MEM)
+    {
+      if (letter == 'D')
+	output_address (plus_constant (XEXP (op, 0), 4));
+      else
+	output_address (XEXP (op, 0));
+    }
+
+  else if (code == CONST_DOUBLE
+	   && GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT)
+    {
+      char s[60];
+
+      real_to_decimal (s, CONST_DOUBLE_REAL_VALUE (op), sizeof (s), 0, 1);
+      fputs (s, file);
+    }
+
+  else if (letter == 'x' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, 0xffff & INTVAL(op));
+
+  else if (letter == 'X' && GET_CODE(op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, 0xffff & (INTVAL (op) >> 16));
+
+  else if (letter == 'd' && GET_CODE(op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, (INTVAL(op)));
+
+  else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    fputs (reg_names[GP_REG_FIRST], file);
+
+  else if (letter == 'd' || letter == 'x' || letter == 'X')
+    output_operand_lossage ("invalid use of %%d, %%x, or %%X");
+
+  else if (letter == 'B')
+    fputs (code == EQ ? "z" : "n", file);
+  else if (letter == 'b')
+    fputs (code == EQ ? "n" : "z", file);
+  else if (letter == 'T')
+    fputs (code == EQ ? "f" : "t", file);
+  else if (letter == 't')
+    fputs (code == EQ ? "t" : "f", file);
+
+  else if (code == CONST && GET_CODE (XEXP (op, 0)) == REG)
+    {
+      iq2000_print_operand (file, XEXP (op, 0), letter);
+    }
+
+  else
+    output_addr_const (file, op);
+}
+
+static bool
+iq2000_print_operand_punct_valid_p (unsigned char code)
+{
+  return iq2000_print_operand_punct[code];
+}
+
+/* For the IQ2000, transform:
+
+        memory(X + <large int>)
+   into:
+        Y = <large int> & ~0x7fff;
+        Z = X + Y
+        memory (Z + (<large int> & 0x7fff));
+*/
+
+rtx
+iq2000_legitimize_address (rtx xinsn, rtx old_x ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  if (TARGET_DEBUG_B_MODE)
+    {
+      GO_PRINTF ("\n========== LEGITIMIZE_ADDRESS\n");
+      GO_DEBUG_RTX (xinsn);
+    }
+
+  if (iq2000_check_split (xinsn, mode))
+    {
+      return gen_rtx_LO_SUM (Pmode,
+                             copy_to_mode_reg (Pmode,
+                                               gen_rtx_HIGH (Pmode, xinsn)),
+                             xinsn);
+    }
+
+  if (GET_CODE (xinsn) == PLUS)
+    {
+      rtx xplus0 = XEXP (xinsn, 0);
+      rtx xplus1 = XEXP (xinsn, 1);
+      enum rtx_code code0 = GET_CODE (xplus0);
+      enum rtx_code code1 = GET_CODE (xplus1);
+
+      if (code0 != REG && code1 == REG)
+        {
+          xplus0 = XEXP (xinsn, 1);
+          xplus1 = XEXP (xinsn, 0);
+          code0 = GET_CODE (xplus0);
+          code1 = GET_CODE (xplus1);
+        }
+
+      if (code0 == REG && REG_MODE_OK_FOR_BASE_P (xplus0, mode)
+          && code1 == CONST_INT && !SMALL_INT (xplus1))
+        {
+          rtx int_reg = gen_reg_rtx (Pmode);
+          rtx ptr_reg = gen_reg_rtx (Pmode);
+
+          emit_move_insn (int_reg,
+                          GEN_INT (INTVAL (xplus1) & ~ 0x7fff));
+
+          emit_insn (gen_rtx_SET (VOIDmode,
+                                  ptr_reg,
+                                  gen_rtx_PLUS (Pmode, xplus0, int_reg)));
+
+          return plus_constant (ptr_reg, INTVAL (xplus1) & 0x7fff);
+        }
+    }
+
+  if (TARGET_DEBUG_B_MODE)
+    GO_PRINTF ("LEGITIMIZE_ADDRESS could not fix.\n");
+
+  return xinsn;
+}
+
+
+static bool
+iq2000_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int * total,
+		  bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case MEM:
+      {
+	int num_words = (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ? 2 : 1;
+
+	if (simple_memory_operand (x, mode))
+	  return COSTS_N_INSNS (num_words);
+
+	* total = COSTS_N_INSNS (2 * num_words);
+	break;
+      }
+      
+    case FFS:
+      * total = COSTS_N_INSNS (6);
+      break;
+
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+      * total = COSTS_N_INSNS (mode == DImode ? 2 : 1);
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (mode == DImode)
+	* total = COSTS_N_INSNS ((GET_CODE (XEXP (x, 1)) == CONST_INT) ? 4 : 12);
+      else
+	* total = COSTS_N_INSNS (1);
+    break;								
+
+    case ABS:
+      if (mode == SFmode || mode == DFmode)
+	* total = COSTS_N_INSNS (1);
+      else
+	* total = COSTS_N_INSNS (4);
+      break;
+    
+    case PLUS:
+    case MINUS:
+      if (mode == SFmode || mode == DFmode)
+	* total = COSTS_N_INSNS (6);
+      else if (mode == DImode)
+	* total = COSTS_N_INSNS (4);
+      else
+	* total = COSTS_N_INSNS (1);
+      break;
+    
+    case NEG:
+      * total = (mode == DImode) ? 4 : 1;
+      break;
+
+    case MULT:
+      if (mode == SFmode)
+	* total = COSTS_N_INSNS (7);
+      else if (mode == DFmode)
+	* total = COSTS_N_INSNS (8);
+      else
+	* total = COSTS_N_INSNS (10);
+      break;
+
+    case DIV:
+    case MOD:
+      if (mode == SFmode)
+	* total = COSTS_N_INSNS (23);
+      else if (mode == DFmode)
+	* total = COSTS_N_INSNS (36);
+      else
+	* total = COSTS_N_INSNS (69);
+      break;
+      
+    case UDIV:
+    case UMOD:
+      * total = COSTS_N_INSNS (69);
+      break;
+      
+    case SIGN_EXTEND:
+      * total = COSTS_N_INSNS (2);
+      break;
+    
+    case ZERO_EXTEND:
+      * total = COSTS_N_INSNS (1);
+      break;
+
+    case CONST_INT:
+      * total = 0;
+      break;
+    
+    case LABEL_REF:
+      * total = COSTS_N_INSNS (2);
+      break;
+
+    case CONST:
+      {
+	rtx offset = const0_rtx;
+	rtx symref = eliminate_constant_term (XEXP (x, 0), & offset);
+
+	if (GET_CODE (symref) == LABEL_REF)
+	  * total = COSTS_N_INSNS (2);
+	else if (GET_CODE (symref) != SYMBOL_REF)
+	  * total = COSTS_N_INSNS (4);
+	/* Let's be paranoid....  */
+	else if (INTVAL (offset) < -32768 || INTVAL (offset) > 32767)
+	  * total = COSTS_N_INSNS (2);
+	else
+	  * total = COSTS_N_INSNS (SYMBOL_REF_FLAG (symref) ? 1 : 2);
+	break;
+      }
+
+    case SYMBOL_REF:
+      * total = COSTS_N_INSNS (SYMBOL_REF_FLAG (x) ? 1 : 2);
+      break;
+    
+    case CONST_DOUBLE:
+      {
+	rtx high, low;
+      
+	split_double (x, & high, & low);
+      
+	* total = COSTS_N_INSNS (  (high == CONST0_RTX (GET_MODE (high))
+				  || low == CONST0_RTX (GET_MODE (low)))
+				   ? 2 : 4);
+	break;
+      }
+    
+    default:
+      return false;
+    }
+  return true;
+}
+
+/* Worker for TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+iq2000_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.word\t0x03e00821\t\t# move   $1,$31\n");
+  fprintf (f, "\t.word\t0x04110001\t\t# bgezal $0,.+8\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# nop\n");
+  if (Pmode == DImode)
+    {
+      fprintf (f, "\t.word\t0xdfe30014\t\t# ld     $3,20($31)\n");
+      fprintf (f, "\t.word\t0xdfe2001c\t\t# ld     $2,28($31)\n");
+    }
+  else
+    {
+      fprintf (f, "\t.word\t0x8fe30014\t\t# lw     $3,20($31)\n");
+      fprintf (f, "\t.word\t0x8fe20018\t\t# lw     $2,24($31)\n");
+    }
+  fprintf (f, "\t.word\t0x0060c821\t\t# move   $25,$3 (abicalls)\n");
+  fprintf (f, "\t.word\t0x00600008\t\t# jr     $3\n");
+  fprintf (f, "\t.word\t0x0020f821\t\t# move   $31,$1\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# <function address>\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# <static chain value>\n");
+}
+
+/* Worker for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+iq2000_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_CODE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, Pmode, TRAMPOLINE_CODE_SIZE);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, Pmode,
+			TRAMPOLINE_CODE_SIZE + GET_MODE_SIZE (Pmode));
+  emit_move_insn (mem, chain_value);
+}
+
+#include "gt-iq2000.h"
diff --git a/gcc/config/iq2000/iq2000.h b/gcc/config/iq2000/iq2000.h
new file mode 100644
index 000000000..7f69d2a12
--- /dev/null
+++ b/gcc/config/iq2000/iq2000.h
@@ -0,0 +1,913 @@
+/* Definitions of target machine for GNU compiler.  
+   Vitesse IQ2000 processors
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Driver configuration.  */
+
+/* A generic LIB_SPEC with -leval and --*group tacked on.  */
+#undef  LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:--start-group -lc -leval -lgcc --end-group}}"
+
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+
+/* Run-time target specifications.  */
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("__iq2000__"); 		\
+      builtin_assert ("cpu=iq2000"); 		\
+      builtin_assert ("machine=iq2000");	\
+    }                                           \
+  while (0)
+
+/* Macros used in the machine description to test the flags.  */
+
+#define TARGET_STATS		0
+
+#define TARGET_DEBUG_MODE	0
+#define TARGET_DEBUG_A_MODE	0
+#define TARGET_DEBUG_B_MODE	0
+#define TARGET_DEBUG_C_MODE	0
+#define TARGET_DEBUG_D_MODE	0
+
+#ifndef IQ2000_ISA_DEFAULT
+#define IQ2000_ISA_DEFAULT 1
+#endif
+
+#define IQ2000_VERSION "[1.0]"
+
+#ifndef MACHINE_TYPE
+#define MACHINE_TYPE "IQ2000"
+#endif
+
+#ifndef TARGET_VERSION_INTERNAL
+#define TARGET_VERSION_INTERNAL(STREAM)					\
+  fprintf (STREAM, " %s %s", IQ2000_VERSION, MACHINE_TYPE)
+#endif
+
+#ifndef TARGET_VERSION
+#define TARGET_VERSION TARGET_VERSION_INTERNAL (stderr)
+#endif
+
+/* Storage Layout.  */
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		1 
+#define WORDS_BIG_ENDIAN 		1
+#define BITS_PER_WORD 			32
+#define MAX_BITS_PER_WORD 		64
+#define UNITS_PER_WORD 			4
+#define MIN_UNITS_PER_WORD 		4
+#define POINTER_SIZE 			32
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   We promote any value smaller than SImode up to SImode.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)		\
+    (MODE) = SImode;
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 64
+
+#define FUNCTION_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 64
+
+#undef  DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE 		32
+#define SHORT_TYPE_SIZE 	16
+#define LONG_TYPE_SIZE 		32
+#define LONG_LONG_TYPE_SIZE 	64
+#define CHAR_TYPE_SIZE		BITS_PER_UNIT
+#define FLOAT_TYPE_SIZE 	32
+#define DOUBLE_TYPE_SIZE 	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+#define DEFAULT_SIGNED_CHAR	1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+
+/* Register Basics.  */
+
+/* On the IQ2000, we have 32 integer registers.  */
+#define FIRST_PSEUDO_REGISTER 33
+
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1			\
+}
+
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1			\
+}
+
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER							\
+{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,	\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31	\
+}
+
+
+/* How Values Fit in Registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+ ((REGNO_REG_CLASS (REGNO) == GR_REGS)				\
+  ? ((REGNO) & 1) == 0 || GET_MODE_SIZE (MODE) <= 4     	\
+  : ((REGNO) & 1) == 0 || GET_MODE_SIZE (MODE) == 4)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||			\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)		\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||			\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+#define AVOID_CCMODE_COPIES
+
+
+/* Register Classes.  */
+
+enum reg_class
+{
+  NO_REGS,			/* No registers in set.  */
+  GR_REGS,			/* Integer registers.  */
+  ALL_REGS,			/* All registers.  */
+  LIM_REG_CLASSES		/* Max value + 1.  */
+};
+
+#define GENERAL_REGS GR_REGS
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define IRA_COVER_CLASSES	\
+{				\
+  GR_REGS, LIM_REG_CLASSES	\
+}
+
+#define REG_CLASS_NAMES						\
+{								\
+  "NO_REGS",							\
+  "GR_REGS",							\
+  "ALL_REGS"							\
+}
+
+#define REG_CLASS_CONTENTS					\
+{								\
+  { 0x00000000, 0x00000000 },	/* No registers,  */		\
+  { 0xffffffff, 0x00000000 },	/* Integer registers.  */	\
+  { 0xffffffff, 0x00000001 }	/* All registers.  */		\
+}
+
+#define REGNO_REG_CLASS(REGNO) \
+((REGNO) <= GP_REG_LAST + 1 ? GR_REGS : NO_REGS)
+
+#define BASE_REG_CLASS  (GR_REGS)
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define REGNO_OK_FOR_INDEX_P(regno)	0
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)				\
+  ((CLASS) != ALL_REGS						\
+   ? (CLASS)							\
+   : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT		\
+       || GET_MODE_CLASS (GET_MODE (X)) == MODE_COMPLEX_FLOAT)	\
+      ? (GR_REGS)						\
+      : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_INT		\
+	  || GET_MODE (X) == VOIDmode)				\
+	 ? (GR_REGS)						\
+	 : (CLASS))))
+
+#define CLASS_MAX_NREGS(CLASS, MODE)    \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+
+/* Basic Stack Layout.  */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD 0
+
+#define STARTING_FRAME_OFFSET						\
+  (crtl->outgoing_args_size)
+
+/* Use the default value zero.  */
+/* #define STACK_POINTER_OFFSET 0 */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* The return address for the current frame is in r31 if this is a leaf
+   function.  Otherwise, it is on the stack.  It is at a variable offset
+   from sp/fp/ap, so we define a fake hard register rap which is a
+   pointer to the return address on the stack.  This always gets eliminated
+   during reload to be either the frame pointer or the stack pointer plus
+   an offset.  */
+
+#define RETURN_ADDR_RTX(count, frame)                                   \
+  (((count) == 0)                                                       \
+   ? (leaf_function_p ()                                                \
+      ? gen_rtx_REG (Pmode, GP_REG_FIRST + 31)                          \
+      : gen_rtx_MEM (Pmode, gen_rtx_REG (Pmode,                         \
+                                         RETURN_ADDRESS_POINTER_REGNUM))) \
+    : (rtx) 0)
+
+/* Before the prologue, RA lives in r31.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (VOIDmode, GP_REG_FIRST + 31)
+
+
+/* Register That Address the Stack Frame.  */
+
+#define STACK_POINTER_REGNUM 		(GP_REG_FIRST + 29)
+#define FRAME_POINTER_REGNUM 		(GP_REG_FIRST + 1)
+#define HARD_FRAME_POINTER_REGNUM 	(GP_REG_FIRST + 27)
+#define ARG_POINTER_REGNUM 		GP_REG_FIRST
+#define RETURN_ADDRESS_POINTER_REGNUM	RAP_REG_NUM
+#define STATIC_CHAIN_REGNUM 		(GP_REG_FIRST + 2)
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer.  */
+
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM},			\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, GP_REG_FIRST + 31},			\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			 \
+        (OFFSET) = iq2000_initial_elimination_offset ((FROM), (TO))
+
+/* Passing Function Arguments on the Stack.  */
+
+/* #define PUSH_ROUNDING(BYTES) 0 */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define REG_PARM_STACK_SPACE(FNDECL) 0
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+
+/* Function Arguments in Registers.  */
+
+#define MAX_ARGS_IN_REGISTERS 8
+
+typedef struct iq2000_args
+{
+  int gp_reg_found;		/* Whether a gp register was found yet.  */
+  unsigned int arg_number;	/* Argument number.  */
+  unsigned int arg_words;	/* # total words the arguments take.  */
+  unsigned int fp_arg_words;	/* # words for FP args (IQ2000_EABI only).  */
+  int last_arg_fp;		/* Nonzero if last arg was FP (EABI only).  */
+  int fp_code;			/* Mode of FP arguments.  */
+  unsigned int num_adjusts;	/* Number of adjustments made.  */
+				/* Adjustments made to args pass in regs.  */
+  struct rtx_def * adjust[MAX_ARGS_IN_REGISTERS * 2];
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  init_cumulative_args (& CUM, FNTYPE, LIBNAME)				\
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE)				\
+  (! BYTES_BIG_ENDIAN							\
+   ? upward								\
+   : (((MODE) == BLKmode						\
+       ? ((TYPE) && TREE_CODE (TYPE_SIZE (TYPE)) == INTEGER_CST		\
+	  && int_size_in_bytes (TYPE) < (PARM_BOUNDARY / BITS_PER_UNIT))\
+       : (GET_MODE_BITSIZE (MODE) < PARM_BOUNDARY			\
+	  && (GET_MODE_CLASS (MODE) == MODE_INT)))			\
+      ? downward : upward))
+
+#define FUNCTION_ARG_REGNO_P(N)						\
+  (((N) >= GP_ARG_FIRST && (N) <= GP_ARG_LAST))			
+
+
+/* On the IQ2000, R2 and R3 are the only register thus used.  */
+
+#define FUNCTION_VALUE_REGNO_P(N) iq2000_function_value_regno_p (N)
+
+
+/* How Large Values are Returned.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Function Entry and Exit.  */
+
+#define EXIT_IGNORE_STACK 1
+
+
+/* Generating Code for Profiling.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+{									\
+  fprintf (FILE, "\t.set\tnoreorder\n");				\
+  fprintf (FILE, "\t.set\tnoat\n");					\
+  fprintf (FILE, "\tmove\t%s,%s\t\t# save current return address\n",	\
+	   reg_names[GP_REG_FIRST + 1], reg_names[GP_REG_FIRST + 31]);	\
+  fprintf (FILE, "\tjal\t_mcount\n");					\
+  fprintf (FILE,							\
+	   "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from  stack\n",	\
+	   "subu",							\
+	   reg_names[STACK_POINTER_REGNUM],				\
+	   reg_names[STACK_POINTER_REGNUM],				\
+	   Pmode == DImode ? 16 : 8);					\
+  fprintf (FILE, "\t.set\treorder\n");					\
+  fprintf (FILE, "\t.set\tat\n");					\
+}
+
+
+/* Trampolines for Nested Functions.  */
+
+#define TRAMPOLINE_CODE_SIZE  (8*4)
+#define TRAMPOLINE_SIZE       (TRAMPOLINE_CODE_SIZE + 2*GET_MODE_SIZE (Pmode))
+#define TRAMPOLINE_ALIGNMENT  GET_MODE_ALIGNMENT (Pmode)
+
+
+/* Addressing Modes.  */
+
+#define CONSTANT_ADDRESS_P(X)						\
+  (   (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define REG_OK_FOR_INDEX_P(X) 0
+
+#define LEGITIMATE_CONSTANT_P(X) (1)
+
+
+/* Describing Relative Costs of Operations.  */
+
+#define REGISTER_MOVE_COST(MODE, FROM, TO)	2
+
+#define MEMORY_MOVE_COST(MODE,CLASS,TO_P)	\
+  (TO_P ? 2 : 16)
+
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE 1
+
+#define ADJUST_COST(INSN,LINK,DEP_INSN,COST)				\
+  if (REG_NOTE_KIND (LINK) != 0)					\
+    (COST) = 0; /* Anti or output dependence.  */
+
+
+/* Dividing the output into sections.  */
+
+#define TEXT_SECTION_ASM_OP	"\t.text"	/* Instructions.  */
+
+#define DATA_SECTION_ASM_OP	"\t.data"	/* Large data.  */
+
+
+/* The Overall Framework of an Assembler File.  */
+
+#define ASM_COMMENT_START " #"
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+
+/* Output and Generation of Labels.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM))
+
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+
+/* Output of Assembler Instructions.  */
+
+#define REGISTER_NAMES							\
+{									\
+ "%0",   "%1",   "%2",   "%3",   "%4",   "%5",   "%6",   "%7",		\
+ "%8",   "%9",   "%10",  "%11",  "%12",  "%13",  "%14",  "%15",		\
+ "%16",  "%17",  "%18",  "%19",  "%20",  "%21",  "%22",  "%23",		\
+ "%24",  "%25",  "%26",  "%27",  "%28",  "%29",  "%30",  "%31",  "%rap"	\
+}
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "%0",	 0 + GP_REG_FIRST },					\
+  { "%1",	 1 + GP_REG_FIRST },					\
+  { "%2",	 2 + GP_REG_FIRST },					\
+  { "%3",	 3 + GP_REG_FIRST },					\
+  { "%4",	 4 + GP_REG_FIRST },					\
+  { "%5",	 5 + GP_REG_FIRST },					\
+  { "%6",	 6 + GP_REG_FIRST },					\
+  { "%7",	 7 + GP_REG_FIRST },					\
+  { "%8",	 8 + GP_REG_FIRST },					\
+  { "%9",	 9 + GP_REG_FIRST },					\
+  { "%10",	10 + GP_REG_FIRST },					\
+  { "%11",	11 + GP_REG_FIRST },					\
+  { "%12",	12 + GP_REG_FIRST },					\
+  { "%13",	13 + GP_REG_FIRST },					\
+  { "%14",	14 + GP_REG_FIRST },					\
+  { "%15",	15 + GP_REG_FIRST },					\
+  { "%16",	16 + GP_REG_FIRST },					\
+  { "%17",	17 + GP_REG_FIRST },					\
+  { "%18",	18 + GP_REG_FIRST },					\
+  { "%19",	19 + GP_REG_FIRST },					\
+  { "%20",	20 + GP_REG_FIRST },					\
+  { "%21",	21 + GP_REG_FIRST },					\
+  { "%22",	22 + GP_REG_FIRST },					\
+  { "%23",	23 + GP_REG_FIRST },					\
+  { "%24",	24 + GP_REG_FIRST },					\
+  { "%25",	25 + GP_REG_FIRST },					\
+  { "%26",	26 + GP_REG_FIRST },					\
+  { "%27",	27 + GP_REG_FIRST },					\
+  { "%28",	28 + GP_REG_FIRST },					\
+  { "%29",	29 + GP_REG_FIRST },					\
+  { "%30",	27 + GP_REG_FIRST },					\
+  { "%31",	31 + GP_REG_FIRST },					\
+  { "%rap",	32 + GP_REG_FIRST },					\
+}
+
+/* Check if the current insn needs a nop in front of it
+   because of load delays, and also update the delay slot statistics.  */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)			\
+  final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#define DBR_OUTPUT_SEQEND(STREAM)					\
+do									\
+  {									\
+    fputs ("\n", STREAM);						\
+  }									\
+while (0)
+
+#define LOCAL_LABEL_PREFIX	"$"
+
+#define USER_LABEL_PREFIX	""
+
+
+/* Output of dispatch tables.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do									\
+    {									\
+      fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	       Pmode == DImode ? ".dword" : ".word",			\
+	       LOCAL_LABEL_PREFIX, VALUE);				\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	   Pmode == DImode ? ".dword" : ".word",			\
+	   LOCAL_LABEL_PREFIX,						\
+	   VALUE)
+
+
+/* Assembler Commands for Alignment.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+  fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n",	\
+           (unsigned HOST_WIDE_INT)(SIZE))
+
+#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+  if ((LOG) != 0)                       				\
+    fprintf (STREAM, "\t.balign %d\n", 1<<(LOG))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+#define DEBUGGER_AUTO_OFFSET(X)  \
+  iq2000_debugger_offset (X, (HOST_WIDE_INT) 0)
+
+#define DEBUGGER_ARG_OFFSET(OFFSET, X)  \
+  iq2000_debugger_offset (X, (HOST_WIDE_INT) OFFSET)
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+
+
+/* Miscellaneous Parameters.  */
+
+#define CASE_VECTOR_MODE SImode
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define MOVE_MAX 4
+
+#define MAX_MOVE_MAX 8
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define STORE_FLAG_VALUE 1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE SImode
+
+/* Standard GCC variables that we reference.  */
+
+extern char	call_used_regs[];
+
+/* IQ2000 external variables defined in iq2000.c.  */
+
+/* Comparison type.  */
+enum cmp_type
+{
+  CMP_SI,				/* Compare four byte integers.  */
+  CMP_DI,				/* Compare eight byte integers.  */
+  CMP_SF,				/* Compare single precision floats.  */
+  CMP_DF,				/* Compare double precision floats.  */
+  CMP_MAX				/* Max comparison type.  */
+};
+
+/* Types of delay slot.  */
+enum delay_type
+{
+  DELAY_NONE,				/* No delay slot.  */
+  DELAY_LOAD,				/* Load from memory delay.  */
+  DELAY_FCMP				/* Delay after doing c.<xx>.{d,s}.  */
+};
+
+/* Which processor to schedule for.  */
+
+enum processor_type
+{
+  PROCESSOR_DEFAULT,
+  PROCESSOR_IQ2000,
+  PROCESSOR_IQ10
+};
+
+/* Recast the cpu class to be the cpu attribute.  */
+#define iq2000_cpu_attr ((enum attr_cpu) iq2000_tune)
+
+#define BITMASK_UPPER16	((unsigned long) 0xffff << 16)	/* 0xffff0000 */
+#define BITMASK_LOWER16	((unsigned long) 0xffff)	/* 0x0000ffff */
+
+
+#define GENERATE_BRANCHLIKELY  (ISA_HAS_BRANCHLIKELY)
+
+/* Macros to decide whether certain features are available or not,
+   depending on the instruction set architecture level.  */
+
+#define BRANCH_LIKELY_P()	GENERATE_BRANCHLIKELY
+
+/* ISA has branch likely instructions.  */
+#define ISA_HAS_BRANCHLIKELY	(iq2000_isa == 1)
+
+
+#undef ASM_SPEC
+
+
+/* The mapping from gcc register number to DWARF 2 CFA column number.  */
+#define DWARF_FRAME_REGNUM(REG)        (REG)
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN (GP_REG_FIRST + 31)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + GP_ARG_FIRST : INVALID_REGNUM)
+
+/* The EH_RETURN_STACKADJ_RTX macro returns RTL which describes the
+   location used to store the amount to adjust the stack.  This is
+   usually a register that is available from end of the function's body
+   to the end of the epilogue. Thus, this cannot be a register used as a
+   temporary by the epilogue.
+
+   This must be an integer register.  */
+#define EH_RETURN_STACKADJ_REGNO        3
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, EH_RETURN_STACKADJ_REGNO)
+
+/* The EH_RETURN_HANDLER_RTX macro returns RTL which describes the
+   location used to store the address the processor should jump to
+   catch exception.  This is usually a registers that is available from
+   end of the function's body to the end of the epilogue. Thus, this
+   cannot be a register used as a temporary by the epilogue.
+
+   This must be an address register.  */
+#define EH_RETURN_HANDLER_REGNO         26
+#define EH_RETURN_HANDLER_RTX           \
+        gen_rtx_REG (Pmode, EH_RETURN_HANDLER_REGNO)
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.  */
+#define DWARF_CIE_DATA_ALIGNMENT 4
+
+/* For IQ2000, width of a floating point register.  */
+#define UNITS_PER_FPREG 4
+
+/* Force right-alignment for small varargs in 32 bit little_endian mode */
+
+#define PAD_VARARGS_DOWN !BYTES_BIG_ENDIAN
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register, a floating point register, a
+   multiply/divide register, or a status register.  */
+
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  31
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+
+#define RAP_REG_NUM   32
+#define AT_REGNUM	(GP_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO)	\
+  ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+
+/* IQ2000 registers used in prologue/epilogue code when the stack frame
+   is larger than 32K bytes.  These registers must come from the
+   scratch register set, and not used for passing and returning
+   arguments and any other information used in the calling sequence.  */
+
+#define IQ2000_TEMP1_REGNUM (GP_REG_FIRST + 12)
+#define IQ2000_TEMP2_REGNUM (GP_REG_FIRST + 13)
+
+/* This macro is used later on in the file.  */
+#define GR_REG_CLASS_P(CLASS)						\
+  ((CLASS) == GR_REGS)
+
+#define SMALL_INT(X) ((unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000)
+#define SMALL_INT_UNSIGNED(X) ((unsigned HOST_WIDE_INT) (INTVAL (X)) < 0x10000)
+
+/* Certain machines have the property that some registers cannot be
+   copied to some other registers without using memory.  Define this
+   macro on those machines to be a C expression that is nonzero if
+   objects of mode MODE in registers of CLASS1 can only be copied to
+   registers of class CLASS2 by storing a register of CLASS1 into
+   memory and loading that memory location into a register of CLASS2.
+
+   Do not define this macro if its value would always be zero.  */
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_UNITS(mode, size)						\
+  ((GET_MODE_SIZE (mode) + (size) - 1) / (size))
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.  */
+
+#define CLASS_CANNOT_CHANGE_MODE 0
+
+/* Defines illegal mode changes for CLASS_CANNOT_CHANGE_MODE.  */
+
+#define CLASS_CANNOT_CHANGE_MODE_P(FROM,TO) \
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO))
+
+/* Make sure 4 words are always allocated on the stack.  */
+
+#ifndef STACK_ARGS_ADJUST
+#define STACK_ARGS_ADJUST(SIZE)						\
+  {									\
+    if (SIZE.constant < 4 * UNITS_PER_WORD)				\
+      SIZE.constant = 4 * UNITS_PER_WORD;				\
+  }
+#endif
+
+
+/* Symbolic macros for the registers used to return integer and floating
+   point values.  */
+
+#define GP_RETURN (GP_REG_FIRST + 2)
+
+/* Symbolic macros for the first/last argument registers.  */
+
+#define GP_ARG_FIRST (GP_REG_FIRST + 4)
+#define GP_ARG_LAST  (GP_REG_FIRST + 11)
+
+#define MAX_ARGS_IN_REGISTERS	8
+
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.  */
+
+#define MUST_SAVE_REGISTER(regno) \
+  ((df_regs_ever_live_p (regno) && !call_used_regs[regno])		\
+  || (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)	\
+   || (regno == (GP_REG_FIRST + 31) && df_regs_ever_live_p (GP_REG_FIRST + 31)))
+
+/* ALIGN FRAMES on double word boundaries */
+#ifndef IQ2000_STACK_ALIGN
+#define IQ2000_STACK_ALIGN(LOC) (((LOC) + 7) & ~7)
+#endif
+
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   These definitions are NOT overridden anywhere.  */
+
+#define BASE_REG_P(regno, mode)					\
+  (GP_REG_P (regno))
+
+#define GP_REG_OR_PSEUDO_STRICT_P(regno, mode)				    \
+  BASE_REG_P((regno < FIRST_PSEUDO_REGISTER) ? regno : reg_renumber[regno], \
+	     (mode))
+
+#define GP_REG_OR_PSEUDO_NONSTRICT_P(regno, mode) \
+  (((regno) >= FIRST_PSEUDO_REGISTER) || (BASE_REG_P ((regno), (mode))))
+
+#define REGNO_MODE_OK_FOR_BASE_P(regno, mode) \
+  GP_REG_OR_PSEUDO_STRICT_P ((int) (regno), (mode))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects them all.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Some source files that are used after register allocation
+   need to be strict.  */
+
+#ifndef REG_OK_STRICT
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  iq2000_reg_mode_ok_for_base_p (X, MODE, 0)
+#else
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  iq2000_reg_mode_ok_for_base_p (X, MODE, 1)
+#endif
+
+#if 1
+#define GO_PRINTF(x)	fprintf (stderr, (x))
+#define GO_PRINTF2(x,y)	fprintf (stderr, (x), (y))
+#define GO_DEBUG_RTX(x) debug_rtx (x)
+
+#else
+#define GO_PRINTF(x)
+#define GO_PRINTF2(x,y)
+#define GO_DEBUG_RTX(x)
+#endif
+
+/* If defined, modifies the length assigned to instruction INSN as a
+   function of the context in which it is used.  LENGTH is an lvalue
+   that contains the initially computed length of the insn and should
+   be updated with the correct length of the insn.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = iq2000_adjust_insn_length ((INSN), (LENGTH)))
+
+
+
+
+/* How to tell the debugger about changes of source files.  */
+
+#ifndef SET_FILE_NUMBER
+#define SET_FILE_NUMBER() ++ num_source_filenames
+#endif
+
+/* This is how to output a note the debugger telling it the line number
+   to which the following sequence of instructions corresponds.  */
+
+#ifndef LABEL_AFTER_LOC
+#define LABEL_AFTER_LOC(STREAM)
+#endif
+
+
+/* Default to -G 8 */
+#ifndef IQ2000_DEFAULT_GVALUE
+#define IQ2000_DEFAULT_GVALUE 8
+#endif
+
+#define SDATA_SECTION_ASM_OP	"\t.sdata"	/* Small data.  */
+
+
+/* The target cpu for optimization and scheduling.  */
+extern enum processor_type iq2000_tune;
+
+/* Which instruction set architecture to use.  */
+extern int iq2000_isa;
+
+enum iq2000_builtins
+{
+  IQ2000_BUILTIN_ADO16,
+  IQ2000_BUILTIN_CFC0,
+  IQ2000_BUILTIN_CFC1,
+  IQ2000_BUILTIN_CFC2,
+  IQ2000_BUILTIN_CFC3,
+  IQ2000_BUILTIN_CHKHDR,
+  IQ2000_BUILTIN_CTC0,
+  IQ2000_BUILTIN_CTC1,
+  IQ2000_BUILTIN_CTC2,
+  IQ2000_BUILTIN_CTC3,
+  IQ2000_BUILTIN_LU,
+  IQ2000_BUILTIN_LUC32L,
+  IQ2000_BUILTIN_LUC64,
+  IQ2000_BUILTIN_LUC64L,
+  IQ2000_BUILTIN_LUK,
+  IQ2000_BUILTIN_LULCK,
+  IQ2000_BUILTIN_LUM32,
+  IQ2000_BUILTIN_LUM32L,
+  IQ2000_BUILTIN_LUM64,
+  IQ2000_BUILTIN_LUM64L,
+  IQ2000_BUILTIN_LUR,
+  IQ2000_BUILTIN_LURL,
+  IQ2000_BUILTIN_MFC0,
+  IQ2000_BUILTIN_MFC1,
+  IQ2000_BUILTIN_MFC2,
+  IQ2000_BUILTIN_MFC3,
+  IQ2000_BUILTIN_MRGB,
+  IQ2000_BUILTIN_MTC0,
+  IQ2000_BUILTIN_MTC1,
+  IQ2000_BUILTIN_MTC2,
+  IQ2000_BUILTIN_MTC3,
+  IQ2000_BUILTIN_PKRL,
+  IQ2000_BUILTIN_RAM,
+  IQ2000_BUILTIN_RB,
+  IQ2000_BUILTIN_RX,
+  IQ2000_BUILTIN_SRRD,
+  IQ2000_BUILTIN_SRRDL,
+  IQ2000_BUILTIN_SRULC,
+  IQ2000_BUILTIN_SRULCK,
+  IQ2000_BUILTIN_SRWR,
+  IQ2000_BUILTIN_SRWRU,
+  IQ2000_BUILTIN_TRAPQF,
+  IQ2000_BUILTIN_TRAPQFL,
+  IQ2000_BUILTIN_TRAPQN,
+  IQ2000_BUILTIN_TRAPQNE,
+  IQ2000_BUILTIN_TRAPRE,
+  IQ2000_BUILTIN_TRAPREL,
+  IQ2000_BUILTIN_WB,
+  IQ2000_BUILTIN_WBR,
+  IQ2000_BUILTIN_WBU,
+  IQ2000_BUILTIN_WX,
+  IQ2000_BUILTIN_SYSCALL
+};
diff --git a/gcc/config/iq2000/iq2000.md b/gcc/config/iq2000/iq2000.md
new file mode 100644
index 000000000..7ad113d95
--- /dev/null
+++ b/gcc/config/iq2000/iq2000.md
@@ -0,0 +1,2180 @@
+;;  iq2000.md	     Machine Description for Vitesse IQ2000 processors
+;;  Copyright (C) 2003, 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(UNSPEC_ADO16 0)
+   (UNSPEC_RAM	1)
+   (UNSPEC_CHKHDR 2)
+   (UNSPEC_PKRL	3)
+   (UNSPEC_CFC0	4)
+   (UNSPEC_CFC1	5)
+   (UNSPEC_CFC2	6)
+   (UNSPEC_CFC3	7)
+   (UNSPEC_CTC0	8)
+   (UNSPEC_CTC1	9)
+   (UNSPEC_CTC2	10)
+   (UNSPEC_CTC3	11)
+   (UNSPEC_MFC0	12)
+   (UNSPEC_MFC1	13)
+   (UNSPEC_MFC2	14)
+   (UNSPEC_MFC3	15)
+   (UNSPEC_MTC0	16)
+   (UNSPEC_MTC1	17)
+   (UNSPEC_MTC2	18)
+   (UNSPEC_MTC3	19)
+   (UNSPEC_LUR	20)
+   (UNSPEC_RB	21)
+   (UNSPEC_RX	22)
+   (UNSPEC_SRRD	23)
+   (UNSPEC_SRWR	24)
+   (UNSPEC_WB	25)
+   (UNSPEC_WX	26)
+   (UNSPEC_LUC32 49)
+   (UNSPEC_LUC32L 27)
+   (UNSPEC_LUC64 28)
+   (UNSPEC_LUC64L 29)
+   (UNSPEC_LUK 30)
+   (UNSPEC_LULCK 31)
+   (UNSPEC_LUM32 32)
+   (UNSPEC_LUM32L 33)
+   (UNSPEC_LUM64 34)
+   (UNSPEC_LUM64L 35)
+   (UNSPEC_LURL 36)
+   (UNSPEC_MRGB 37)
+   (UNSPEC_SRRDL 38)
+   (UNSPEC_SRULCK 39)
+   (UNSPEC_SRWRU 40)
+   (UNSPEC_TRAPQFL 41)
+   (UNSPEC_TRAPQNE 42)
+   (UNSPEC_TRAPREL 43)
+   (UNSPEC_WBU 44)
+   (UNSPEC_SYSCALL 45)]
+)
+;; UNSPEC values used in iq2000.md
+;; Number	USE
+;; 0		movsi_ul
+;; 1		movsi_us, get_fnaddr
+;; 3		eh_set_return
+;; 20		builtin_setjmp_setup
+;;
+;; UNSPEC_VOLATILE values
+;; 0		blockage
+;; 2		loadgp
+;; 3		builtin_longjmp
+;; 4		exception_receiver
+;; 10		consttable_qi
+;; 11		consttable_hi
+;; 12		consttable_si
+;; 13		consttable_di
+;; 14		consttable_sf
+;; 15		consttable_df
+;; 16		align_2
+;; 17		align_4
+;; 18		align_8
+
+
+;; ....................
+;;
+;;	Attributes
+;;
+;; ....................
+
+;; Classification of each insn.
+;; branch	conditional branch
+;; jump		unconditional jump
+;; call		unconditional call
+;; load		load instruction(s)
+;; store	store instruction(s)
+;; move		data movement within same register set
+;; xfer		transfer to/from coprocessor
+;; arith	integer arithmetic instruction
+;; darith	double precision integer arithmetic instructions
+;; imul		integer multiply
+;; idiv		integer divide
+;; icmp		integer compare
+;; fadd		floating point add/subtract
+;; fmul		floating point multiply
+;; fmadd	floating point multiply-add
+;; fdiv		floating point divide
+;; fabs		floating point absolute value
+;; fneg		floating point negation
+;; fcmp		floating point compare
+;; fcvt		floating point convert
+;; fsqrt	floating point square root
+;; multi	multiword sequence (or user asm statements)
+;; nop		no operation
+
+(define_attr "type"
+  "unknown,branch,jump,call,load,store,move,xfer,arith,darith,imul,idiv,icmp,fadd,fmul,fmadd,fdiv,fabs,fneg,fcmp,fcvt,fsqrt,multi,nop"
+  (const_string "unknown"))
+
+;; Main data type used by the insn
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,SF,DF,FPSW" (const_string "unknown"))
+
+;; Length (in # of bytes).  A conditional branch is allowed only to a
+;; location within a signed 18-bit offset of the delay slot.  If that
+;; provides too small a range, we use the `j' instruction.  This
+;; instruction takes a 28-bit value, but that value is not an offset.
+;; Instead, it's bitwise-ored with the high-order four bits of the
+;; instruction in the delay slot, which means it cannot be used to
+;; cross a 256MB boundary.  We could fall back back on the jr,
+;; instruction which allows full access to the entire address space,
+;; but we do not do so at present.
+
+(define_attr "length" ""
+   (cond [(eq_attr "type" "branch")
+          (cond [(lt (abs (minus (match_dup 1) (plus (pc) (const_int 4))))
+                     (const_int 131072))
+                 (const_int 4)]
+	         (const_int 12))]
+          (const_int 4)))
+
+(define_attr "cpu"
+  "default,iq2000"
+  (const (symbol_ref "iq2000_cpu_attr")))
+
+;; Does the instruction have a mandatory delay slot? has_dslot
+;; Can the instruction be in a delay slot? ok_in_dslot
+;; Can the instruction not be in a delay slot? not_in_dslot
+(define_attr "dslot" "has_dslot,ok_in_dslot,not_in_dslot"
+  (if_then_else (eq_attr "type" "branch,jump,call,xfer,fcmp")
+		(const_string "has_dslot")
+		(const_string "ok_in_dslot")))
+
+;; Attribute defining whether or not we can use the branch-likely instructions
+
+(define_attr "branch_likely" "no,yes"
+  (const
+   (if_then_else (ne (symbol_ref "GENERATE_BRANCHLIKELY") (const_int 0))
+		 (const_string "yes")
+		 (const_string "no"))))
+
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+
+
+;; .........................
+;;
+;;	Delay slots, can't describe load/fcmp/xfer delay slots here
+;;
+;; .........................
+
+(define_delay (eq_attr "type" "jump")
+  [(and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4"))
+   (nil)
+   (nil)])
+
+(define_delay (eq_attr "type" "branch")
+  [(and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4"))
+   (nil)
+   (and (eq_attr "branch_likely" "yes") (and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4")))])
+
+(define_delay (eq_attr "type" "call")
+  [(and (eq_attr "dslot" "ok_in_dslot") (eq_attr "length" "4"))
+   (nil)
+   (nil)])
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; .........................
+;;
+;;	Pipeline model
+;;
+;; .........................
+
+(define_automaton "iq2000")
+(define_cpu_unit "core,memory" "iq2000")
+
+(define_insn_reservation "nonmemory" 1
+  (eq_attr "type" "!load,move,store,xfer")
+  "core")
+
+(define_insn_reservation "iq2000_load_move" 3
+  (and (eq_attr "type" "load,move")
+       (eq_attr "cpu" "iq2000"))
+  "memory")
+
+(define_insn_reservation "other_load_move" 1
+  (and (eq_attr "type" "load,move")
+       (eq_attr "cpu" "!iq2000"))
+  "memory")
+
+(define_insn_reservation "store" 1
+  (eq_attr "type" "store")
+  "memory")
+
+(define_insn_reservation "xfer" 2
+  (eq_attr "type" "xfer")
+  "memory")
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL TRAPS
+;;
+;;  ....................
+;;
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "*
+{
+  return \"break\";
+}")
+
+;;
+;;  ....................
+;;
+;;	ADDITION
+;;
+;;  ....................
+;;
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ")
+		 (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "addsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ,dJ")
+		 (match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   addu\\t%0,%z1,%2
+   addiu\\t%0,%z1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	SUBTRACTION
+;;
+;;  ....................
+;;
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ")
+		  (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "subsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(minus:SI (match_operand:SI 1 "reg_or_0_operand" "dJ,dJ")
+		  (match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   subu\\t%0,%z1,%2
+   addiu\\t%0,%z1,%n2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	NEGATION and ONE'S COMPLEMENT
+;;
+;;  ....................
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "*
+{
+  operands[2] = const0_rtx;
+  return \"subu\\t%0,%z2,%1\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(not:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "*
+{
+  operands[2] = const0_rtx;
+  return \"nor\\t%0,%z2,%1\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	LOGICAL
+;;
+;;  ....................
+;;
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(and:SI (match_operand:SI 1 "uns_arith_operand" "%d,d,d")
+		(match_operand:SI 2 "nonmemory_operand" "d,K,N")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(and:SI (match_operand:SI 1 "uns_arith_operand" "%d,d,d")
+		(match_operand:SI 2 "nonmemory_operand" "d,K,N")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"and\\t%0,%1,%2\";
+  else if (which_alternative == 1)
+    return \"andi\\t%0,%1,%x2\";
+  else if (which_alternative == 2)
+    {
+      if ((INTVAL (operands[2]) & 0xffff) == 0xffff)
+	{
+	  operands[2] = GEN_INT (INTVAL (operands[2]) >> 16);
+	  return \"andoui\\t%0,%1,%x2\";
+	}
+      else
+	{
+	  operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffff);
+	  return \"andoi\\t%0,%1,%x2\";
+	}
+    }
+  else
+    gcc_unreachable ();
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ior:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ior:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "@
+   or\\t%0,%1,%2
+   ori\\t%0,%1,%x2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(xor:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(xor:SI (match_operand:SI 1 "uns_arith_operand" "%d,d")
+		(match_operand:SI 2 "uns_arith_operand" "d,K")))]
+  ""
+  "@
+   xor\\t%0,%1,%2
+   xori\\t%0,%1,%x2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "*norsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "d"))
+		(not:SI (match_operand:SI 2 "register_operand" "d"))))]
+  ""
+  "nor\\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;;
+;;  ....................
+;;
+;;	ZERO EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+;; Those for integer source operand are ordered widest source type first.
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"andi\\t%0,%1,0xffff\";
+  else
+    return iq2000_move_1word (operands, insn, TRUE);
+}"
+  [(set_attr "type"	"arith,load,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"andi\\t%0,%1,0x00ff\";
+  else
+    return iq2000_move_1word (operands, insn, TRUE);
+}"
+  [(set_attr "type"	"arith,load,load")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"4,4,8")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    return \"andi\\t%0,%1,0x00ff\";
+  else
+    return iq2000_move_1word (operands, insn, TRUE);
+}"
+  [(set_attr "type"	"arith,load,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8")])
+
+;;
+;;  ....................
+;;
+;;	SIGN EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+;; Those for integer source operand are ordered widest source type first.
+
+;; These patterns originally accepted general_operands, however, slightly
+;; better code is generated by only accepting register_operands, and then
+;; letting combine generate the lh and lb insns.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
+  "
+{
+  if (optimize && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_not_mem (operands[1]);
+
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx op1   = gen_lowpart (SImode, operands[1]);
+      rtx temp  = gen_reg_rtx (SImode);
+      rtx shift = GEN_INT (16);
+
+      emit_insn (gen_ashlsi3 (temp, op1, shift));
+      emit_insn (gen_ashrsi3 (operands[0], temp, shift));
+      DONE;
+    }
+}")
+
+(define_insn "extendhisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "R,m")))]
+  ""
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "
+{
+  if (optimize && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_not_mem (operands[1]);
+
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx op0   = gen_lowpart (SImode, operands[0]);
+      rtx op1   = gen_lowpart (SImode, operands[1]);
+      rtx temp  = gen_reg_rtx (SImode);
+      rtx shift = GEN_INT (24);
+
+      emit_insn (gen_ashlsi3 (temp, op1, shift));
+      emit_insn (gen_ashrsi3 (op0, temp, shift));
+      DONE;
+    }
+}")
+
+(define_insn "extendqihi2_internal"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+	(sign_extend:HI (match_operand:QI 1 "memory_operand" "R,m")))]
+  ""
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+  "
+{
+  if (optimize && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_not_mem (operands[1]);
+
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx op1   = gen_lowpart (SImode, operands[1]);
+      rtx temp  = gen_reg_rtx (SImode);
+      rtx shift = GEN_INT (24);
+
+      emit_insn (gen_ashlsi3 (temp, op1, shift));
+      emit_insn (gen_ashrsi3 (operands[0], temp, shift));
+      DONE;
+    }
+}")
+
+(define_insn "extendqisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "R,m")))]
+  ""
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+;;
+;;  ........................
+;;
+;;      BIT FIELD EXTRACTION
+;;
+;;  ........................
+
+(define_insn "extzv"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+                         (match_operand:SI 2 "const_int_operand" "O")
+                         (match_operand:SI 3 "const_int_operand" "O")))]
+  ""
+  "*
+{
+  int value[4];
+  value[2] = INTVAL (operands[2]);
+  value[3] = INTVAL (operands[3]);
+  operands[2] = GEN_INT ((value[3]));
+  operands[3] = GEN_INT ((32 - value[2]));
+  return \"ram\\t%0,%1,%2,%3,0x0\";  
+}"
+  [(set_attr "type" "arith")])
+
+;;
+;;  ....................
+;;
+;;	DATA MOVEMENT
+;;
+;;  ....................
+
+/* Take care of constants that don't fit in single instruction */
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  "(reload_in_progress || reload_completed)
+   && large_int (operands[1], SImode)"
+
+  [(set (match_dup 0 )
+        (high:SI (match_dup 1)))
+   (set (match_dup 0 )
+        (lo_sum:SI (match_dup 0)
+                   (match_dup 1)))]
+)
+
+;; ??? iq2000_move_1word has support for HIGH, so this pattern may be
+;; unnecessary.
+
+(define_insn "high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" "")))]
+  ""
+  "lui\\t%0,%%hi(%1) # high"
+  [(set_attr "type"	"move")])
+
+(define_insn "low"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "")))]
+  ""
+  "addiu\\t%0,%1,%%lo(%2) # low"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; 32-bit Integer moves
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "large_int" ""))]
+  "reload_in_progress | reload_completed"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+     	(ior:SI (match_dup 0)
+		(match_dup 3)))]
+  "
+{
+  operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1])
+					     & BITMASK_UPPER16,
+					     SImode));
+  operands[3] = GEN_INT (INTVAL (operands[1]) & BITMASK_LOWER16);
+}")
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (iq2000_check_split (operands[1], SImode))
+    {
+      enum machine_mode mode = GET_MODE (operands[0]);
+      rtx tem = ((reload_in_progress | reload_completed)
+		 ? operands[0] : gen_reg_rtx (mode));
+
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_HIGH (mode, operands[1])));
+
+      operands[1] = gen_rtx_LO_SUM (mode, tem, operands[1]);
+    }
+
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], SImode)
+      && !register_operand (operands[1], SImode)
+      && (GET_CODE (operands[1]) != CONST_INT
+	  || INTVAL (operands[1]) != 0))
+    {
+      rtx temp = force_reg (SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+
+  /* Take care of constants that don't fit in single instruction */
+  if ((reload_in_progress || reload_completed)
+      && CONSTANT_P (operands[1])
+      && GET_CODE (operands[1]) != HIGH
+      && GET_CODE (operands[1]) != LO_SUM
+      && ! SMALL_INT_UNSIGNED (operands[1]))
+    {
+      rtx tem = ((reload_in_progress | reload_completed)
+		 ? operands[0] : gen_reg_rtx (SImode));
+
+      emit_insn (gen_rtx_SET (VOIDmode, tem,
+			      gen_rtx_HIGH (SImode, operands[1])));
+      operands[1] = gen_rtx_LO_SUM (SImode, tem, operands[1]);
+    }
+}")
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "movsi_internal2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+	(match_operand:SI 1 "move_operand" "d,IKL,Mnis,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode)
+       || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))"
+  "* return iq2000_move_1word (operands, insn, FALSE);"
+  [(set_attr "type"	"move,arith,arith,load,load,store,store")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8,8,8,4,8")])
+
+;; 16-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], HImode)
+      && !register_operand (operands[1], HImode)
+      && ((GET_CODE (operands[1]) != CONST_INT
+	  || INTVAL (operands[1]) != 0)))
+    {
+      rtx temp = force_reg (HImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "movhi_internal2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,R,m")
+	(match_operand:HI 1 "general_operand"       "d,IK,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode)
+       || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))"
+  "* return iq2000_move_1word (operands, insn, TRUE);"
+  [(set_attr "type"	"move,arith,load,load,store,store")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"4,4,4,8,4,8")])
+
+;; 8-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], QImode)
+      && !register_operand (operands[1], QImode)
+      && (GET_CODE (operands[1]) != CONST_INT
+          || INTVAL (operands[1]) != 0))
+    {
+      rtx temp = force_reg (QImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+}")
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "movqi_internal2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,m")
+	(match_operand:QI 1 "general_operand"       "d,IK,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode)
+       || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))"
+  "* return iq2000_move_1word (operands, insn, TRUE);"
+  [(set_attr "type"	"move,arith,load,load,store,store")
+   (set_attr "mode"	"QI")
+   (set_attr "length"	"4,4,4,8,4,8")])
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+        (match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) == MEM
+      && (GET_CODE (operands[1]) == MEM
+         || GET_CODE (operands[1]) == CONST_DOUBLE))
+    operands[1] = copy_to_mode_reg (SFmode, operands[1]);
+
+  /* Take care of reg <- SF constant */
+  if ( const_double_operand (operands[1], GET_MODE (operands[1]) ) )
+    {
+      emit_insn (gen_movsf_high (operands[0], operands[1]));
+      emit_insn (gen_movsf_lo_sum (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_insn "movsf_lo_sum"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (lo_sum:SF (match_operand:SF 1 "register_operand" "r")
+                   (match_operand:SF 2 "const_double_operand" "")))]
+  ""
+  "*
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[2]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[2] = GEN_INT (i);
+  return \"addiu\\t%0,%1,%%lo(%2) # low\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_insn "movsf_high"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (high:SF (match_operand:SF 1 "const_double_operand" "")))]
+  ""
+  "*
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[1] = GEN_INT (i);
+  return \"lui\\t%0,%%hi(%1) # high\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m")
+        (match_operand:SF 1 "nonimmediate_operand" "r,m,r"))]
+  "!memory_operand (operands[0], SFmode) || !memory_operand (operands[1], SFmode)"
+  "*
+{
+  iq2000_fill_delay_slot (\"\", DELAY_LOAD, operands, insn);  
+  if (which_alternative == 0)
+    return \"or\\t%0,%1,%1\";
+  else if (which_alternative == 1)
+    return \"lw\\t%0,%1\";
+  else if (which_alternative == 2)
+    return \"sw\\t%1,%0\";
+  else
+    gcc_unreachable ();
+}"
+  [(set_attr "length" "4,4,4")
+   (set_attr "type" "arith,load,store")]
+)
+
+;;
+;;  ....................
+;;
+;;	SHIFTS
+;;
+;;  ....................
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                   (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "ashlsi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return \"sll\\t%0,%1,%2\";
+    }
+  else
+    return \"sllv\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "ashrsi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return \"sra\\t%0,%1,%2\";
+    }
+  else
+    return \"srav\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "")
+
+(define_insn "lshrsi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+		     (match_operand:SI 2 "arith_operand" "dI")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return \"srl\\t%0,%1,%2\";
+    }
+  else
+    return \"srlv\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; Rotate Right
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (rotatert:SI (match_operand:SI 1 "register_operand" "r")
+                     (match_operand:SI 2 "uns_arith_operand" "O")))]
+  ""
+  "ram %0,%1,%2,0x0,0x0"
+  [(set_attr "type" "arith")])
+
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL BRANCHES
+;;
+;;  ....................
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+        (if_then_else
+         (match_operator:SI 0 "ordered_comparison_operator"
+                            [(match_operand:SI 1 "register_operand")
+                             (match_operand:SI 2 "reg_or_const_operand")])
+	 (label_ref (match_operand:SI 3 ""))
+	 (pc)))]
+  ""
+  "
+{
+  gen_conditional_branch (operands, SImode);
+  DONE;
+}")
+
+
+;; Conditional branches on comparisons with zero.
+
+(define_insn "branch_zero"
+  [(set (pc)
+	(if_then_else
+         (match_operator:SI 0 "cmp_op"
+			    [(match_operand:SI 2 "register_operand" "d")
+			     (const_int 0)])
+        (label_ref (match_operand 1 "" ""))
+        (pc)))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/0,
+					 /*float_p=*/0,
+					 /*inverted_p=*/0,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+(define_insn "branch_zero_inverted"
+  [(set (pc)
+	(if_then_else
+         (match_operator:SI 0 "cmp_op"
+		            [(match_operand:SI 2 "register_operand" "d")
+			     (const_int 0)])
+        (pc)
+        (label_ref (match_operand 1 "" ""))))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/0,
+					 /*float_p=*/0,
+					 /*inverted_p=*/1,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+;; Conditional branch on equality comparison.
+
+(define_insn "branch_equality"
+  [(set (pc)
+	(if_then_else
+         (match_operator:SI 0 "equality_op"
+		   	    [(match_operand:SI 2 "register_operand" "d")
+			     (match_operand:SI 3 "register_operand" "d")])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/1,
+					 /*float_p=*/0,
+					 /*inverted_p=*/0,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+(define_insn "branch_equality_inverted"
+  [(set (pc)
+	(if_then_else
+         (match_operator:SI 0 "equality_op"
+		   	    [(match_operand:SI 2 "register_operand" "d")
+			     (match_operand:SI 3 "register_operand" "d")])
+         (pc)
+         (label_ref (match_operand 1 "" ""))))]
+  ""
+  "*
+{
+  return iq2000_output_conditional_branch (insn,
+					 operands,
+					 /*two_operands_p=*/1,
+					 /*float_p=*/0,
+					 /*inverted_p=*/1,
+					 get_attr_length (insn));
+}"
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")])
+
+
+;; Recognize bbi and bbin instructions.  These use two unusual template
+;; patterns, %Ax and %Px.  %Ax outputs an 'i' if operand `x' is a LABEL_REF
+;; otherwise it outputs an 'in'.  %Px does nothing if `x' is PC 
+;; and outputs the operand if `x' is a LABEL_REF.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (sign_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A2\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (sign_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A3\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A2\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "arith_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A3\\t%0(31-%1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (and:SI (match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "power_of_2_operand" "I"))
+	      (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A3\\t%0(%p1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (and:SI (match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "power_of_2_operand" "I"))
+	     (const_int 0))
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  ""
+  "bb%A2\\t%0(%p1),%P2%P3"
+  [(set_attr "length" "4")
+   (set_attr "type" "branch")])
+
+;;
+;;  ....................
+;;
+;;	SETTING A REGISTER FROM A COMPARISON
+;;
+;;  ....................
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "register_operand")
+	  (match_operand:SI 3 "reg_or_const_operand")]))]
+  ""
+  "
+{
+  gen_int_relational (GET_CODE (operands[1]), operands[0],
+		      operands[2], operands[3], (int *)0);
+  DONE;
+}")
+
+(define_insn "seq_si_zero"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(eq:SI (match_operand:SI 1 "register_operand" "d")
+	       (const_int 0)))]
+  ""
+  "sltiu\\t%0,%1,1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sne_si_zero"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI (match_operand:SI 1 "register_operand" "d")
+	       (const_int 0)))]
+  ""
+  "sltu\\t%0,%.,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sgt_si"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(gt:SI (match_operand:SI 1 "register_operand" "d,d")
+	       (match_operand:SI 2 "reg_or_0_operand" "d,J")))]
+  ""
+  "@
+   slt\\t%0,%z2,%1
+   slt\\t%0,%z2,%1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI,SI")])
+
+(define_insn "slt_si"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(lt:SI (match_operand:SI 1 "register_operand" "d,d")
+	       (match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   slt\\t%0,%1,%2
+   slti\\t%0,%1,%2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI,SI")])
+
+(define_insn "sle_si_const"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(le:SI (match_operand:SI 1 "register_operand" "d")
+	       (match_operand:SI 2 "small_int" "I")))]
+  "INTVAL (operands[2]) < 32767"
+  "*
+{
+  operands[2] = GEN_INT (INTVAL (operands[2])+1);
+  return \"slti\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sgtu_si"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(gtu:SI (match_operand:SI 1 "register_operand" "d")
+		(match_operand:SI 2 "reg_or_0_operand" "dJ")))]
+  ""
+  "sltu\\t%0,%z2,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "sltu_si"
+  [(set (match_operand:SI 0 "register_operand" "=d,=d")
+	(ltu:SI (match_operand:SI 1 "register_operand" "d,d")
+		(match_operand:SI 2 "arith_operand" "d,I")))]
+  ""
+  "@
+   sltu\\t%0,%1,%2
+   sltiu\\t%0,%1,%2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI,SI")])
+
+(define_insn "sleu_si_const"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(leu:SI (match_operand:SI 1 "register_operand" "d")
+		(match_operand:SI 2 "small_int" "I")))]
+  "INTVAL (operands[2]) < 32767"
+  "*
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+  return \"sltiu\\t%0,%1,%2\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+
+;;
+;;  ....................
+;;
+;;	UNCONDITIONAL BRANCHES
+;;
+;;  ....................
+
+;; Unconditional branches.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[0]) == REG)
+    return \"j\\t%0\";
+  return \"j\\t%l0\";
+  /* return \"b\\t%l0\";*/
+}"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "d"))]
+  ""
+  "
+{
+  rtx dest;
+
+  if (operands[0])		/* eliminate unused code warnings */
+    {
+      dest = operands[0];
+      if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+	operands[0] = copy_to_mode_reg (Pmode, dest);
+
+      if (!(Pmode == DImode))
+	emit_jump_insn (gen_indirect_jump_internal1 (operands[0]));
+      else
+	internal_error (\"unimplemented functionality\");
+
+      DONE;
+    }
+}")
+
+(define_insn "indirect_jump_internal1"
+  [(set (pc) (match_operand:SI 0 "register_operand" "d"))]
+  "!(Pmode == DImode)"
+  "j\\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "tablejump"
+  [(set (pc)
+	(match_operand 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "
+{
+  if (operands[0])		/* eliminate unused code warnings */
+    {
+      gcc_assert (GET_MODE (operands[0]) == Pmode);
+
+      if (!(Pmode == DImode))
+	emit_jump_insn (gen_tablejump_internal1 (operands[0], operands[1]));
+      else
+	internal_error (\"unimplemented functionality\");
+
+      DONE;
+    }
+}")
+
+(define_insn "tablejump_internal1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "!(Pmode == DImode)"
+  "j\\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "tablejump_internal3"
+  [(parallel [(set (pc)
+		   (plus:SI (match_operand:SI 0 "register_operand" "d")
+			    (label_ref:SI (match_operand 1 "" ""))))
+	      (use (label_ref:SI (match_dup 1)))])]
+  ""
+  "")
+
+;;; Make sure that this only matches the insn before ADDR_DIFF_VEC.  Otherwise
+;;; it is not valid.  ??? With the USE, the condition tests may not be required
+;;; any longer.
+
+;;; ??? The length depends on the ABI.  It is two for o32, and one for n32.
+;;; We just use the conservative number here.
+
+(define_insn ""
+  [(set (pc)
+	(plus:SI (match_operand:SI 0 "register_operand" "d")
+		 (label_ref:SI (match_operand 1 "" ""))))
+   (use (label_ref:SI (match_dup 1)))]
+  "!(Pmode == DImode) && next_active_insn (insn) != 0
+   && GET_CODE (PATTERN (next_active_insn (insn))) == ADDR_DIFF_VEC
+   && PREV_INSN (next_active_insn (insn)) == operands[1]"
+  "*
+{
+  return \"j\\t%0\";
+}"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"8")])
+
+;;
+;;  ....................
+;;
+;;	Function prologue/epilogue
+;;
+;;  ....................
+;;
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  if (iq2000_isa >= 0)		/* avoid unused code warnings */
+    {
+      iq2000_expand_prologue ();
+      DONE;
+    }
+}")
+
+;; Block any insns from being moved before this point, since the
+;; profiling call to mcount can use various registers that aren't
+;; saved or used to pass arguments.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "type"	"unknown")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  "
+{
+  if (iq2000_isa >= 0)            /* avoid unused code warnings */
+    {
+      iq2000_expand_epilogue ();
+      DONE;
+    }
+}")
+
+;; Trivial return.  Make it look like a normal return insn as that
+;; allows jump optimizations to work better .
+(define_insn "return"
+  [(return)]
+  "iq2000_can_use_return_insn ()"
+  "j\\t%%31"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+;; Normal return.
+
+(define_insn "return_internal"
+  [(use (match_operand 0 "pmode_register_operand" ""))
+   (return)]
+  ""
+  "*
+{
+  return \"j\\t%0\";
+}"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_insn "eh_return_internal"
+  [(const_int 4)
+   (return)
+   (use (reg:SI 26))
+   (use (reg:SI 31))]
+  ""
+  "j\\t%%26"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+(define_expand "eh_return"
+  [(use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "
+{
+  iq2000_expand_eh_return (operands[0]);
+  DONE;
+}")
+
+
+;;
+;;  ....................
+;;
+;;	FUNCTION CALLS
+;;
+;;  ....................
+
+;; calls.c now passes a third argument, make saber happy
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "m")
+		    (match_operand 1 "" "i"))
+	      (clobber (reg:SI 31))
+	      (use (match_operand 2 "" ""))		;; next_arg_reg
+	      (use (match_operand 3 "" ""))])]		;; struct_value_size_rtx
+  ""
+  "
+{
+  rtx addr;
+
+  if (operands[0])		/* eliminate unused code warnings */
+    {
+      addr = XEXP (operands[0], 0);
+      if ((GET_CODE (addr) != REG && (!CONSTANT_ADDRESS_P (addr)))
+	  || ! call_insn_operand (addr, VOIDmode))
+	XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+
+      /* In order to pass small structures by value in registers
+	 compatibly with the IQ2000 compiler, we need to shift the value
+	 into the high part of the register.  Function_arg has encoded
+	 a PARALLEL rtx, holding a vector of adjustments to be made
+	 as the next_arg_reg variable, so we split up the insns,
+	 and emit them separately.  */
+
+      if (operands[2] != (rtx)0 && GET_CODE (operands[2]) == PARALLEL)
+	{
+	  rtvec adjust = XVEC (operands[2], 0);
+	  int num = GET_NUM_ELEM (adjust);
+	  int i;
+
+	  for (i = 0; i < num; i++)
+	    emit_insn (RTVEC_ELT (adjust, i));
+	}
+
+      emit_call_insn (gen_call_internal0 (operands[0], operands[1],
+					  gen_rtx_REG (SImode,
+						       GP_REG_FIRST + 31)));
+      DONE;
+    }
+}")
+
+(define_expand "call_internal0"
+  [(parallel [(call (match_operand 0 "" "")
+		    (match_operand 1 "" ""))
+	      (clobber (match_operand:SI 2 "" ""))])]
+  ""
+  "")
+
+(define_insn "call_internal1"
+  [(call (mem (match_operand 0 "call_insn_operand" "ri"))
+	 (match_operand 1 "" "i"))
+   (clobber (match_operand:SI 2 "register_operand" "=d"))]
+  ""
+  "*
+{
+  register rtx target = operands[0];
+
+  if (GET_CODE (target) == CONST_INT)
+    return \"li\\t%@,%0\\n\\tjalr\\t%2,%@\";
+  else if (CONSTANT_ADDRESS_P (target))
+    return \"jal\\t%0\";
+  else
+    return \"jalr\\t%2,%0\";
+}"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")])
+
+;; calls.c now passes a fourth argument, make saber happy
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "=d")
+		   (call (match_operand 1 "memory_operand" "m")
+			 (match_operand 2 "" "i")))
+	      (clobber (reg:SI 31))
+	      (use (match_operand 3 "" ""))])]		;; next_arg_reg
+  ""
+  "
+{
+  rtx addr;
+
+  if (operands[0])		/* eliminate unused code warning */
+    {
+      addr = XEXP (operands[1], 0);
+      if ((GET_CODE (addr) != REG && (!CONSTANT_ADDRESS_P (addr)))
+	  || ! call_insn_operand (addr, VOIDmode))
+	XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+
+      /* In order to pass small structures by value in registers
+	 compatibly with the IQ2000 compiler, we need to shift the value
+	 into the high part of the register.  Function_arg has encoded
+	 a PARALLEL rtx, holding a vector of adjustments to be made
+	 as the next_arg_reg variable, so we split up the insns,
+	 and emit them separately.  */
+
+      if (operands[3] != (rtx)0 && GET_CODE (operands[3]) == PARALLEL)
+	{
+	  rtvec adjust = XVEC (operands[3], 0);
+	  int num = GET_NUM_ELEM (adjust);
+	  int i;
+
+	  for (i = 0; i < num; i++)
+	    emit_insn (RTVEC_ELT (adjust, i));
+	}
+
+      if (GET_CODE (operands[0]) == PARALLEL && XVECLEN (operands[0], 0) > 1)
+	{
+	  emit_call_insn (gen_call_value_multiple_internal0
+			  (XEXP (XVECEXP (operands[0], 0, 0), 0),
+			   operands[1], operands[2],
+			   XEXP (XVECEXP (operands[0], 0, 1), 0),
+			   gen_rtx_REG (SImode, GP_REG_FIRST + 31)));
+	  DONE;
+	}
+
+      /* We have a call returning a DImode structure in an FP reg.
+	 Strip off the now unnecessary PARALLEL.  */
+      if (GET_CODE (operands[0]) == PARALLEL)
+	operands[0] = XEXP (XVECEXP (operands[0], 0, 0), 0);
+
+      emit_call_insn (gen_call_value_internal0 (operands[0], operands[1], operands[2],
+					        gen_rtx_REG (SImode,
+							     GP_REG_FIRST + 31)));
+
+      DONE;
+    }
+}")
+
+(define_expand "call_value_internal0"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "" "")
+			 (match_operand 2 "" "")))
+	      (clobber (match_operand:SI 3 "" ""))])]
+  ""
+  "")
+
+(define_insn "call_value_internal1"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem (match_operand 1 "call_insn_operand" "r"))
+              (match_operand 2 "" "i")))
+   (clobber (match_operand:SI 3 "register_operand" "=d"))]
+  ""
+  "*
+{
+  register rtx target = operands[1];
+
+  if (GET_CODE (target) == CONST_INT)
+    return \"li\\t%@,%1\\n\\tjalr\\t%3,%@\";
+  else if (CONSTANT_ADDRESS_P (target))
+    return \"jal\\t%1\";
+  else
+    return \"jalr\\t%3,%1\";
+}"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")])
+
+(define_expand "call_value_multiple_internal0"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "" "")
+			 (match_operand 2 "" "")))
+	      (set (match_operand 3 "" "")
+		   (call (match_dup 1)
+			 (match_dup 2)))
+	      (clobber (match_operand:SI 4 "" ""))])]
+  ""
+  "")
+
+;; ??? May eventually need all 6 versions of the call patterns with multiple
+;; return values.
+
+(define_insn "call_value_multiple_internal1"
+  [(set (match_operand 0 "register_operand" "=d")
+        (call (mem (match_operand 1 "call_insn_operand" "r"))
+              (match_operand 2 "" "i")))
+   (set (match_operand 3 "register_operand" "=d")
+   	(call (mem (match_dup 1))
+              (match_dup 2)))
+  (clobber (match_operand:SI 4 "register_operand" "=d"))]
+  ""
+  "*
+{
+  register rtx target = operands[1];
+
+  if (GET_CODE (target) == CONST_INT)
+    return \"li\\t%@,%1\\n\\tjalr\\t%4,%@\";
+  else if (CONSTANT_ADDRESS_P (target))
+    return \"jal\\t%1\";
+  else
+    return \"jalr\\t%4,%1\";
+}"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  if (operands[0])		/* silence statement not reached warnings */
+    {
+      int i;
+
+      emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+      for (i = 0; i < XVECLEN (operands[2], 0); i++)
+	{
+	  rtx set = XVECEXP (operands[2], 0, i);
+	  emit_move_insn (SET_DEST (set), SET_SRC (set));
+	}
+
+      emit_insn (gen_blockage ());
+      DONE;
+    }
+}")
+
+;;
+;;  ....................
+;;
+;;	MISC.
+;;
+;;  ....................
+;;
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")])
+
+
+;; For the rare case where we need to load an address into a register
+;; that cannot be recognized by the normal movsi/addsi instructions.
+;; I have no idea how many insns this can actually generate.  It should
+;; be rare, so over-estimating as 10 instructions should not have any
+;; real performance impact.
+(define_insn "leasi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (match_operand:SI 1 "address_operand" "p"))]
+  "Pmode == SImode"
+  "*
+{
+  rtx xoperands [3];
+
+  xoperands[0] = operands[0];
+  xoperands[1] = XEXP (operands[1], 0);
+  xoperands[2] = XEXP (operands[1], 1);
+  output_asm_insn (\"addiu\\t%0,%1,%2\", xoperands);
+  return \"\";
+}"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"40")])
+
+(define_insn "ado16"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")]
+		UNSPEC_ADO16))]
+  ""
+  "ado16\\t%0, %1, %2"
+)
+
+(define_insn "ram"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	      (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		                (match_operand:SI 2 "const_int_operand" "I")
+		                (match_operand:SI 3 "const_int_operand" "I")
+		                (match_operand:SI 4 "const_int_operand" "I")]
+		     UNSPEC_RAM))]
+  ""
+  "ram\\t%0, %1, %2, %3, %4"
+)
+
+(define_insn "chkhdr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "=r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_CHKHDR)]
+  ""
+  "* return iq2000_fill_delay_slot (\"chkhdr\\t%0, %1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "pkrl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_PKRL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"pkrl\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "cfc0"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+    (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC0))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc0\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "cfc1"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC1))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc1\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "cfc2"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC2))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc2\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "cfc3"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_CFC3))]
+  ""
+  "* return iq2000_fill_delay_slot (\"cfc3\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "ctc0"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC0)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc0\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "ctc1"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC1)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc1\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "ctc2"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC2)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc2\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "ctc3"
+  [(unspec_volatile:SI [(match_operand:SI 0 "reg_or_0_operand" "rJ")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_CTC3)]
+  ""
+  "* return iq2000_fill_delay_slot (\"ctc3\\t%z0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mfc0"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC0))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc0\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mfc1"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC1))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc1\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mfc2"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC2))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc2\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "mfc3"
+   [(set (match_operand:SI                0 "register_operand" "=r")
+   (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "I")]
+ 		UNSPEC_MFC3))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mfc3\\t%0, %%%1\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "mtc0"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC0)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc0\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mtc1"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC1)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc1\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mtc2"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC2)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc2\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "mtc3"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "const_int_operand" "I")]
+		UNSPEC_MTC3)]
+  ""
+  "* return iq2000_fill_delay_slot (\"mtc3\\t%0, %%%1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "lur"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUR)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lur\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "rb"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_RB)]
+  ""
+  "* return iq2000_fill_delay_slot (\"rb\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "rx"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_RX)]
+  ""
+  "* return iq2000_fill_delay_slot (\"rx\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srrd"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_SRRD)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srrd\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srwr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_SRWR)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srwr\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "wb"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_WB)]
+  ""
+  "* return iq2000_fill_delay_slot (\"wb\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "wx"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_WX)]
+  ""
+  "* return iq2000_fill_delay_slot (\"wx\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc32"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC32)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc32\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc32l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC32L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc32l\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc64"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC64)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc64\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luc64l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUC64L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luc64l\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "luk"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUK)]
+  ""
+  "* return iq2000_fill_delay_slot (\"luk\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "lulck"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_LULCK)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lulck\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum32"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM32)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum32\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum32l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM32L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum32l\\t%0, %1\", DELAY_NONE, operands, insn);" 
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum64"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM64)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum64\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lum64l"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LUM64L)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lum64l\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "lurl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_LURL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"lurl\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "mrgb"
+  [(set (match_operand:SI                 0 "register_operand" "=r")
+  	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")
+		(match_operand:SI 3 "const_int_operand" "I")]
+		UNSPEC_MRGB))]
+  ""
+  "* return iq2000_fill_delay_slot (\"mrgb\\t%0, %1, %2, %3\", DELAY_LOAD, operands, insn);"
+  [(set_attr "dslot"	"ok_in_dslot")]
+)
+
+(define_insn "srrdl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_SRRDL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srrdl\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srulck"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_SRULCK)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srulck\\t%0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "srwru"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_SRWRU)]
+  ""
+  "* return iq2000_fill_delay_slot (\"srwru\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "trapqfl"
+  [(unspec_volatile:SI [(const_int 1)] UNSPEC_TRAPQFL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"trapqfl\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "trapqne"
+  [(unspec_volatile:SI [(const_int 2)] UNSPEC_TRAPQNE)]
+  ""
+  "* return iq2000_fill_delay_slot (\"trapqne\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "traprel"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		UNSPEC_TRAPREL)]
+  ""
+  "* return iq2000_fill_delay_slot (\"traprel %0\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "wbu"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "register_operand" "r")]
+		UNSPEC_WBU)]
+  ""
+  "* return iq2000_fill_delay_slot (\"wbu\\t%0, %1\", DELAY_NONE, operands, insn);"
+  [(set_attr "dslot"	"not_in_dslot")]
+)
+
+(define_insn "syscall"
+  [(unspec_volatile:SI [(const_int 2)] UNSPEC_SYSCALL)]
+  ""
+  "syscall"
+  [(set_attr "dslot"    "not_in_dslot")]
+)
diff --git a/gcc/config/iq2000/iq2000.opt b/gcc/config/iq2000/iq2000.opt
new file mode 100644
index 000000000..7ca61424f
--- /dev/null
+++ b/gcc/config/iq2000/iq2000.opt
@@ -0,0 +1,44 @@
+; Options for the Vitesse IQ2000 port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+march=
+Target RejectNegative Joined
+Specify CPU for code generation purposes
+
+mcpu=
+Target RejectNegative Joined
+Specify CPU for scheduling purposes
+
+membedded-data
+Target Mask(EMBEDDED_DATA)
+Use ROM instead of RAM
+
+mgpopt
+Target Mask(GPOPT)
+Use GP relative sdata/sbss sections
+
+; Not used by the compiler proper.
+mno-crt0
+Target RejectNegative
+No default crt0.o
+
+muninit-const-in-rodata
+Target Mask(UNINIT_CONST_IN_RODATA)
+Put uninitialized constants in ROM (needs -membedded-data)
diff --git a/gcc/config/iq2000/lib2extra-funcs.c b/gcc/config/iq2000/lib2extra-funcs.c
new file mode 100644
index 000000000..d53786c8c
--- /dev/null
+++ b/gcc/config/iq2000/lib2extra-funcs.c
@@ -0,0 +1,40 @@
+/* Copyright (C) 2003 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+typedef unsigned int USItype		__attribute__ ((mode (SI)));
+
+USItype
+__mulsi3 (USItype a, USItype b)
+{
+  USItype c = 0;
+
+  while (a != 0)
+    {
+      if (a & 1)
+	c += b;
+      a >>= 1;
+      b <<= 1;
+    }
+
+  return c;
+}
diff --git a/gcc/config/iq2000/predicates.md b/gcc/config/iq2000/predicates.md
new file mode 100644
index 000000000..f27509030
--- /dev/null
+++ b/gcc/config/iq2000/predicates.md
@@ -0,0 +1,240 @@
+;; Predicate definitions for Vitesse IQ2000.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP can be used as an operand where a register or 16-bit
+;; unsigned integer is needed.
+
+(define_predicate "uns_arith_operand"
+  (match_code "reg,const_int,subreg")
+{
+  if (GET_CODE (op) == CONST_INT && SMALL_INT_UNSIGNED (op))
+    return 1;
+
+  return register_operand (op, mode);
+})
+
+;; Return 1 if OP can be used as an operand where a 16-bit integer is
+;; needed.
+
+(define_predicate "arith_operand"
+  (match_code "reg,const_int,subreg")
+{
+  if (GET_CODE (op) == CONST_INT && SMALL_INT (op))
+    return 1;
+
+  return register_operand (op, mode);
+})
+
+;; Return 1 if OP is a register or a constant.  gen_int_relational
+;; takes care of forcing out-of-range constants into a register.
+
+(define_predicate "reg_or_const_operand"
+  (ior (match_code "const_int")
+       (and (match_code "reg,subreg")
+            (match_operand 0 "register_operand"))))
+
+;; Return 1 if OP is a integer which fits in 16 bits.
+
+(define_predicate "small_int"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
+})
+
+;; Return 1 if OP is a 32-bit integer which is too big to be loaded
+;; with one instruction.
+
+(define_predicate "large_int"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT value;
+
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+
+  value = INTVAL (op);
+
+  /* IOR reg,$r0,value.  */
+  if ((value & ~ ((HOST_WIDE_INT) 0x0000ffff)) == 0)
+    return 0;
+
+  /* SUBU reg,$r0,value.  */
+  if (((unsigned HOST_WIDE_INT) (value + 32768)) <= 32767)
+    return 0;
+
+  /* LUI reg,value >> 16.  */
+  if ((value & 0x0000ffff) == 0)
+    return 0;
+
+  return 1;
+})
+
+;; Return 1 if OP is a register or the constant 0.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "reg,const_int,const_double,subreg")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      return INTVAL (op) == 0;
+
+    case CONST_DOUBLE:
+      return op == CONST0_RTX (mode);
+
+    case REG:
+    case SUBREG:
+      return register_operand (op, mode);
+
+    default:
+      break;
+    }
+
+  return 0;
+})
+
+;; Return 1 if OP is a memory operand that fits in a single
+;; instruction (i.e., register + small offset).
+
+(define_predicate "simple_memory_operand"
+  (match_code "mem,subreg")
+{
+  rtx addr, plus0, plus1;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* Dword operations really put out 2 instructions, so eliminate them.  */
+  if (GET_MODE_SIZE (GET_MODE (op)) > (unsigned) UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+    case LO_SUM:
+      return 1;
+
+    case CONST_INT:
+      return SMALL_INT (addr);
+
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+      if (GET_CODE (plus0) == REG
+	  && GET_CODE (plus1) == CONST_INT && SMALL_INT (plus1)
+	  && SMALL_INT_UNSIGNED (plus1) /* No negative offsets.  */)
+	return 1;
+
+      else if (GET_CODE (plus1) == REG
+	       && GET_CODE (plus0) == CONST_INT && SMALL_INT (plus0)
+	       && SMALL_INT_UNSIGNED (plus1) /* No negative offsets.  */)
+	return 1;
+
+      else
+	return 0;
+
+    case SYMBOL_REF:
+      return 0;
+
+    default:
+      break;
+    }
+
+  return 0;
+})
+
+;; Return nonzero if the code of this rtx pattern is EQ or NE.
+
+(define_predicate "equality_op"
+  (match_code "eq,ne")
+{
+  if (mode != GET_MODE (op))
+    return 0;
+
+  return GET_CODE (op) == EQ || GET_CODE (op) == NE;
+})
+
+;; Return nonzero if the code is a relational operations (EQ, LE,
+;; etc).
+
+(define_predicate "cmp_op"
+  (match_code "eq,ne,gt,ge,gtu,geu,lt,le,ltu,leu")
+{
+  if (mode != GET_MODE (op))
+    return 0;
+
+  return COMPARISON_P (op);
+})
+
+;; Return nonzero if the operand is either the PC or a label_ref.
+
+(define_special_predicate "pc_or_label_operand"
+  (match_code "pc,label_ref")
+{
+  if (op == pc_rtx)
+    return 1;
+
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  return 0;
+})
+
+;; Return nonzero if OP is a valid operand for a call instruction.
+
+(define_predicate "call_insn_operand"
+  (match_code "const_int,const,symbol_ref,reg")
+{
+  return (CONSTANT_ADDRESS_P (op)
+	  || (GET_CODE (op) == REG && op != arg_pointer_rtx
+	      && ! (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		    && REGNO (op) <= LAST_VIRTUAL_REGISTER)));
+})
+
+;; Return nonzero if OP is valid as a source operand for a move
+;; instruction.
+
+(define_predicate "move_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  /* Accept any general operand after reload has started; doing so
+     avoids losing if reload does an in-place replacement of a register
+     with a SYMBOL_REF or CONST.  */
+  return (general_operand (op, mode)
+	  && (! (iq2000_check_split (op, mode))
+	      || reload_in_progress || reload_completed));
+})
+
+;; Return nonzero if OP is a constant power of 2.
+
+(define_predicate "power_of_2_operand"
+  (match_code "const_int")
+{
+  int intval;
+
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  else
+    intval = INTVAL (op);
+
+  return ((intval & ((unsigned)(intval) - 1)) == 0);
+})
diff --git a/gcc/config/iq2000/t-iq2000 b/gcc/config/iq2000/t-iq2000
new file mode 100644
index 000000000..0f28f7458
--- /dev/null
+++ b/gcc/config/iq2000/t-iq2000
@@ -0,0 +1,50 @@
+# Copyright (C) 2003, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Suppress building libgcc1.a, since the MIPS compiler port is complete
+# and does not need anything from libgcc1.a.
+LIBGCC1 =
+CROSS_LIBGCC1 =
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c $(srcdir)/config/udivmodsi4.c $(srcdir)/config/iq2000/lib2extra-funcs.c
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT'				> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c			>> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+# Enable the following if multilibs are needed.
+# See gcc/genmultilib, gcc/gcc.texi and gcc/tm.texi for a
+# description of the options and their values.
+#
+# MULTILIB_OPTIONS    = 
+# MULTILIB_DIRNAMES   = 
+# MULTILIB_MATCHES    =
+# MULTILIB_EXCEPTIONS =
+# MULTILIB_EXTRA_OPTS = 
+#
+# LIBGCC = stmp-multilib
+# INSTALL_LIBGCC = install-multilib
+
diff --git a/gcc/config/kfreebsd-gnu.h b/gcc/config/kfreebsd-gnu.h
new file mode 100644
index 000000000..592bae3d3
--- /dev/null
+++ b/gcc/config/kfreebsd-gnu.h
@@ -0,0 +1,37 @@
+/* Definitions for kFreeBSD-based GNU systems with ELF format
+   Copyright (C) 2004, 2006, 2007
+   Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef LINUX_TARGET_OS_CPP_BUILTINS    
+#define LINUX_TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__FreeBSD_kernel__");	\
+	builtin_define ("__GLIBC__");		\
+	builtin_define_std ("unix");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    }						\
+  while (0)
+
+#ifdef GLIBC_DYNAMIC_LINKER
+#undef GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#endif
diff --git a/gcc/config/knetbsd-gnu.h b/gcc/config/knetbsd-gnu.h
new file mode 100644
index 000000000..4cc8eb796
--- /dev/null
+++ b/gcc/config/knetbsd-gnu.h
@@ -0,0 +1,38 @@
+/* Definitions for kNetBSD-based GNU systems with ELF format
+   Copyright (C) 2004, 2006, 2007
+   Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef LINUX_TARGET_OS_CPP_BUILTINS    
+#define LINUX_TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__NetBSD_kernel__");	\
+	builtin_define ("__GLIBC__");		\
+	builtin_define_std ("unix");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    }						\
+  while (0)
+
+
+#ifdef GLIBC_DYNAMIC_LINKER
+#undef GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#endif
diff --git a/gcc/config/kopensolaris-gnu.h b/gcc/config/kopensolaris-gnu.h
new file mode 100644
index 000000000..e044fafce
--- /dev/null
+++ b/gcc/config/kopensolaris-gnu.h
@@ -0,0 +1,37 @@
+/* Definitions for kOpenSolaris-based GNU systems with ELF format
+   Copyright (C) 2004, 2006, 2007, 2009
+   Free Software Foundation, Inc.
+   Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef LINUX_TARGET_OS_CPP_BUILTINS    
+#define LINUX_TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__OpenSolaris_kernel__");	\
+	builtin_define ("__GLIBC__");		\
+	builtin_define_std ("unix");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+    }						\
+  while (0)
+
+#ifdef GLIBC_DYNAMIC_LINKER
+#undef GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#endif
diff --git a/gcc/config/libgcc-glibc.ver b/gcc/config/libgcc-glibc.ver
new file mode 100644
index 000000000..7824ad5a2
--- /dev/null
+++ b/gcc/config/libgcc-glibc.ver
@@ -0,0 +1,55 @@
+# Copyright (C) 2000, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+%exclude {
+  __divdi3
+  __moddi3
+  __udivdi3
+  __umoddi3
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%inherit GCC_3.0 GLIBC_2.0
+GLIBC_2.0 {
+  # Sampling of DImode arithmetic used by (at least) i386 and m68k.
+  __divdi3
+  __moddi3
+  __udivdi3
+  __umoddi3
+
+  # Exception handling support functions used by most everyone.
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
diff --git a/gcc/config/linux-android.h b/gcc/config/linux-android.h
new file mode 100644
index 000000000..94c52748f
--- /dev/null
+++ b/gcc/config/linux-android.h
@@ -0,0 +1,60 @@
+/* Configuration file for Linux Android targets.
+   Copyright (C) 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Doug Kwan (dougkwan@google.com)
+   Rewritten by CodeSourcery, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define ANDROID_TARGET_OS_CPP_BUILTINS()			\
+    do {							\
+	if (OPTION_ANDROID)					\
+	  builtin_define ("__ANDROID__");			\
+    } while (0)
+
+#if ANDROID_DEFAULT
+# define NOANDROID "mno-android"
+#else
+# define NOANDROID "!mandroid"
+#endif
+
+#define LINUX_OR_ANDROID_CC(LINUX_SPEC, ANDROID_SPEC) \
+  "%{" NOANDROID "|tno-android-cc:" LINUX_SPEC ";:" ANDROID_SPEC "}"
+
+#define LINUX_OR_ANDROID_LD(LINUX_SPEC, ANDROID_SPEC) \
+  "%{" NOANDROID "|tno-android-ld:" LINUX_SPEC ";:" ANDROID_SPEC "}"
+
+#define ANDROID_LINK_SPEC \
+  "%{shared: -Bsymbolic}"
+
+#define ANDROID_CC1_SPEC						\
+  "%{!mglibc:%{!muclibc:%{!mbionic: -mbionic}}} "			\
+  "%{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: -fPIC}}}}"
+
+#define ANDROID_CC1PLUS_SPEC						\
+  "%{!fexceptions:%{!fno-exceptions: -fno-exceptions}} "		\
+  "%{!frtti:%{!fno-rtti: -fno-rtti}}"
+
+#define ANDROID_LIB_SPEC \
+  "%{!static: -ldl}"
+
+#define ANDROID_STARTFILE_SPEC						\
+  "%{!shared:"								\
+  "  %{static: crtbegin_static%O%s;: crtbegin_dynamic%O%s}}"
+
+#define ANDROID_ENDFILE_SPEC \
+  "%{!shared: crtend_android%O%s}"
diff --git a/gcc/config/linux-android.opt b/gcc/config/linux-android.opt
new file mode 100644
index 000000000..d075bef1f
--- /dev/null
+++ b/gcc/config/linux-android.opt
@@ -0,0 +1,30 @@
+; Android specific options.
+
+; Copyright (C) 2008, 2010, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mandroid
+Target Report Mask(ANDROID) Var(flag_android) Init(ANDROID_DEFAULT ? OPTION_MASK_ANDROID : 0)
+Generate code for the Android platform.
+
+tno-android-cc
+Driver
+
+tno-android-ld
+Driver
+
diff --git a/gcc/config/linux.h b/gcc/config/linux.h
new file mode 100644
index 000000000..00b4f1c1c
--- /dev/null
+++ b/gcc/config/linux.h
@@ -0,0 +1,99 @@
+/* Definitions for systems using the Linux kernel, with or without
+   MMU, using ELF at the compiler level but possibly FLT for final
+   linked executables and shared libraries in some no-MMU cases, and
+   possibly with a choice of libc implementations.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2003, 2004, 2005, 2006,
+   2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* C libraries supported on Linux.  */
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
+#endif
+
+#define LINUX_TARGET_OS_CPP_BUILTINS()				\
+    do {							\
+	if (OPTION_GLIBC)					\
+	  builtin_define ("__gnu_linux__");			\
+	builtin_define_std ("linux");				\
+	builtin_define_std ("unix");				\
+	builtin_assert ("system=linux");			\
+	builtin_assert ("system=unix");				\
+	builtin_assert ("system=posix");			\
+    } while (0)
+
+/* Determine which dynamic linker to use depending on whether GLIBC or
+   uClibc or Bionic is the default C library and whether
+   -muclibc or -mglibc or -mbionic has been passed to change the default.  */
+
+#define CHOOSE_DYNAMIC_LINKER1(LIBC1, LIBC2, LIBC3, LD1, LD2, LD3)	\
+  "%{" LIBC2 ":" LD2 ";:%{" LIBC3 ":" LD3 ";:" LD1 "}}"
+
+#if DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+  CHOOSE_DYNAMIC_LINKER1 ("mglibc", "muclibc", "mbionic", G, U, B)
+#elif DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+  CHOOSE_DYNAMIC_LINKER1 ("muclibc", "mglibc", "mbionic", U, G, B)
+#elif DEFAULT_LIBC == LIBC_BIONIC
+#define CHOOSE_DYNAMIC_LINKER(G, U, B) \
+  CHOOSE_DYNAMIC_LINKER1 ("mbionic", "mglibc", "muclibc", B, G, U)
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif /* DEFAULT_LIBC */
+
+/* For most targets the following definitions suffice;
+   GLIBC_DYNAMIC_LINKER must be defined for each target using them, or
+   GLIBC_DYNAMIC_LINKER32 and GLIBC_DYNAMIC_LINKER64 for targets
+   supporting both 32-bit and 64-bit compilation.  */
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
+#define BIONIC_DYNAMIC_LINKER "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
+
+#define LINUX_DYNAMIC_LINKER						\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER,	\
+			 BIONIC_DYNAMIC_LINKER)
+#define LINUX_DYNAMIC_LINKER32						\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32, \
+			 BIONIC_DYNAMIC_LINKER32)
+#define LINUX_DYNAMIC_LINKER64						\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64, \
+			 BIONIC_DYNAMIC_LINKER64)
+
+/* Determine whether the entire c99 runtime
+   is present in the runtime library.  */
+#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
+
+/* Whether we have sincos that follows the GNU extension.  */
+#define TARGET_HAS_SINCOS (OPTION_GLIBC || OPTION_BIONIC)
diff --git a/gcc/config/linux.opt b/gcc/config/linux.opt
new file mode 100644
index 000000000..ba6b9f83e
--- /dev/null
+++ b/gcc/config/linux.opt
@@ -0,0 +1,32 @@
+; Processor-independent options for GNU/Linux.
+;
+; Copyright (C) 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+; Contributed by CodeSourcery.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mbionic
+Target Report RejectNegative Var(linux_libc,LIBC_BIONIC) Init(DEFAULT_LIBC) Negative(mglibc)
+Use Bionic C library
+
+mglibc
+Target Report RejectNegative Var(linux_libc,LIBC_GLIBC) Negative(muclibc)
+Use GNU C library
+
+muclibc
+Target Report RejectNegative Var(linux_libc,LIBC_UCLIBC) Negative(mbionic)
+Use uClibc C library
diff --git a/gcc/config/lm32/constraints.md b/gcc/config/lm32/constraints.md
new file mode 100644
index 000000000..a8c7f97e2
--- /dev/null
+++ b/gcc/config/lm32/constraints.md
@@ -0,0 +1,57 @@
+;; Constraint definitions for Lattice Mico32 architecture.
+;; Contributed by Jon Beniston <jon@beniston.com>
+;;
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constraint "J"
+  "The value 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+       
+(define_constraint "K"
+  "A signed 16-bit immediate in the range -32768 to 32767."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "L"
+  "An unsigned 16-bit immediate in the range 0 to 65535."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 65535)")))
+
+(define_constraint "M"
+  "The value 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "U"
+  "A shifted signed 16-bit constant appropriate for orhi."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0
+		    && (ival >> 31 == -1 || ival >> 31 == 0)")))
+
+(define_constraint "S"
+  "A symbol in the small data section."
+  (match_operand 0 "no_pic_small_symbol"))
+
+(define_constraint "Y"
+  "A high part of a symbol."
+  (and (match_code "high")
+       (ior (ior (match_code "symbol_ref" "0")
+                 (match_code "label_ref" "0"))
+            (match_code "const" "0"))))
diff --git a/gcc/config/lm32/lm32-protos.h b/gcc/config/lm32/lm32-protos.h
new file mode 100644
index 000000000..bc086d2ee
--- /dev/null
+++ b/gcc/config/lm32/lm32-protos.h
@@ -0,0 +1,39 @@
+/* Prototypes of target machine functions, Lattice Mico32 architecture.
+   Contributed by Jon Beniston <jon@beniston.com>
+
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+extern int lm32_return_in_memory (tree type);
+extern void lm32_declare_object (FILE *stream, char *name, char *init_string, 
+                                 char *final_string, int size);
+extern void lm32_expand_prologue (void);
+extern void lm32_expand_epilogue (void);
+extern void lm32_print_operand (FILE *file, rtx op, int letter);
+extern void lm32_print_operand_address (FILE *file, rtx addr);
+extern HOST_WIDE_INT lm32_compute_initial_elimination_offset (int from, 
+                                                             int to);
+extern int lm32_can_use_return (void);
+extern rtx lm32_return_addr_rtx (int count, rtx frame);
+extern int lm32_expand_block_move (rtx *);
+extern int nonpic_symbol_mentioned_p (rtx);
+extern rtx lm32_legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern void lm32_expand_scc (rtx operands[]);
+extern void lm32_expand_conditional_branch (rtx operands[]);
+extern bool lm32_move_ok (enum machine_mode, rtx operands[2]);
+extern bool lm32_legitimate_constant_p (rtx);
diff --git a/gcc/config/lm32/lm32.c b/gcc/config/lm32/lm32.c
new file mode 100644
index 000000000..2c7131a5d
--- /dev/null
+++ b/gcc/config/lm32/lm32.c
@@ -0,0 +1,1248 @@
+/* Subroutines used for code generation on the Lattice Mico32 architecture.
+   Contributed by Jon Beniston <jon@beniston.com>
+
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "tm-constrs.h"
+#include "df.h"
+
+struct lm32_frame_info
+{
+  HOST_WIDE_INT total_size;	/* number of bytes of entire frame.  */
+  HOST_WIDE_INT callee_size;	/* number of bytes to save callee saves.  */
+  HOST_WIDE_INT pretend_size;	/* number of bytes we pretend caller did.  */
+  HOST_WIDE_INT args_size;	/* number of bytes for outgoing arguments.  */
+  HOST_WIDE_INT locals_size;	/* number of bytes for local variables.  */
+  unsigned int reg_save_mask;	/* mask of saved registers.  */
+};
+
+/* Prototypes for static functions.  */
+static rtx emit_add (rtx dest, rtx src0, rtx src1);
+static void expand_save_restore (struct lm32_frame_info *info, int op);
+static void stack_adjust (HOST_WIDE_INT amount);
+static bool lm32_in_small_data_p (const_tree);
+static void lm32_setup_incoming_varargs (CUMULATIVE_ARGS * cum,
+					 enum machine_mode mode, tree type,
+					 int *pretend_size, int no_rtl);
+static bool lm32_rtx_costs (rtx x, int code, int outer_code, int *total,
+			    bool speed);
+static bool lm32_can_eliminate (const int, const int);
+static bool
+lm32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict);
+static HOST_WIDE_INT lm32_compute_frame_size (int size);
+static void lm32_option_override (void);
+static rtx lm32_function_arg (CUMULATIVE_ARGS * cum,
+			      enum machine_mode mode, const_tree type,
+			      bool named);
+static void lm32_function_arg_advance (CUMULATIVE_ARGS * cum,
+				       enum machine_mode mode,
+				       const_tree type, bool named);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options lm32_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE lm32_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE lm32_option_optimization_table
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS lm32_rtx_costs
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P lm32_in_small_data_p
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS lm32_setup_incoming_varargs
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG lm32_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE lm32_function_arg_advance
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -0x8000
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE lm32_can_eliminate
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P lm32_legitimate_address_p
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Current frame information calculated by lm32_compute_frame_size.  */
+static struct lm32_frame_info current_frame_info;
+
+/* Return non-zero if the given return type should be returned in memory.  */
+
+int
+lm32_return_in_memory (tree type)
+{
+  HOST_WIDE_INT size;
+
+  if (!AGGREGATE_TYPE_P (type))
+    {
+      /* All simple types are returned in registers.  */
+      return 0;
+    }
+
+  size = int_size_in_bytes (type);
+  if (size >= 0 && size <= UNITS_PER_WORD)
+    {
+      /* If it can fit in one register.  */
+      return 0;
+    }
+
+  return 1;
+}
+
+/* Generate an emit a word sized add instruction.  */
+
+static rtx
+emit_add (rtx dest, rtx src0, rtx src1)
+{
+  rtx insn;
+  insn = emit_insn (gen_addsi3 (dest, src0, src1));
+  return insn;
+}
+
+/* Generate the code to compare (and possibly branch) two integer values
+   TEST_CODE is the comparison code we are trying to emulate 
+     (or implement directly)
+   RESULT is where to store the result of the comparison, 
+     or null to emit a branch
+   CMP0 CMP1 are the two comparison operands
+   DESTINATION is the destination of the branch, or null to only compare
+   */
+
+static void
+gen_int_relational (enum rtx_code code,	
+		    rtx result,	
+		    rtx cmp0,	
+		    rtx cmp1,	
+		    rtx destination)	
+{
+  enum machine_mode mode;
+  int branch_p;
+  rtx temp;
+  rtx cond;
+  rtx label;
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Is this a branch or compare.  */
+  branch_p = (destination != 0);
+
+  /* Instruction set doesn't support LE or LT, so swap operands and use 
+     GE, GT.  */
+  switch (code)
+    {
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+      {
+	rtx temp;
+
+	code = swap_condition (code);
+	temp = cmp0;
+	cmp0 = cmp1;
+	cmp1 = temp;
+	break;
+      }
+    default:
+      break;
+    }
+
+  if (branch_p)
+    {
+      rtx insn, cond, label;
+
+      /* Operands must be in registers.  */
+      if (!register_operand (cmp0, mode))
+	cmp0 = force_reg (mode, cmp0);
+      if (!register_operand (cmp1, mode))
+	cmp1 = force_reg (mode, cmp1);
+
+      /* Generate conditional branch instruction.  */
+      cond = gen_rtx_fmt_ee (code, mode, cmp0, cmp1);
+      label = gen_rtx_LABEL_REF (VOIDmode, destination);
+      insn = gen_rtx_SET (VOIDmode, pc_rtx,
+			  gen_rtx_IF_THEN_ELSE (VOIDmode,
+						cond, label, pc_rtx));
+      emit_jump_insn (insn);
+    }
+  else
+    {
+      /* We can't have const_ints in cmp0, other than 0.  */
+      if ((GET_CODE (cmp0) == CONST_INT) && (INTVAL (cmp0) != 0))
+	cmp0 = force_reg (mode, cmp0);
+
+      /* If the comparison is against an int not in legal range
+         move it into a register.  */
+      if (GET_CODE (cmp1) == CONST_INT)
+	{
+	  switch (code)
+	    {
+	    case EQ:
+	    case NE:
+	    case LE:
+	    case LT:
+	    case GE:
+	    case GT:
+	      if (!satisfies_constraint_K (cmp1))
+		cmp1 = force_reg (mode, cmp1);
+	      break;
+	    case LEU:
+	    case LTU:
+	    case GEU:
+	    case GTU:
+	      if (!satisfies_constraint_L (cmp1))
+		cmp1 = force_reg (mode, cmp1);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Generate compare instruction.  */
+      emit_move_insn (result, gen_rtx_fmt_ee (code, mode, cmp0, cmp1));
+    }
+}
+
+/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
+   and OPERAND[3].  Store the result in OPERANDS[0].  */
+
+void
+lm32_expand_scc (rtx operands[])
+{
+  rtx target = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  gen_int_relational (code, target, op0, op1, NULL_RTX);  
+}
+
+/* Compare OPERANDS[1] with OPERANDS[2] using comparison code
+   CODE and jump to OPERANDS[3] if the condition holds.  */
+
+void
+lm32_expand_conditional_branch (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx destination = operands[3];
+
+  gen_int_relational (code, NULL_RTX, op0, op1, destination);  
+}
+
+/* Generate and emit RTL to save or restore callee save registers.  */
+static void
+expand_save_restore (struct lm32_frame_info *info, int op)
+{
+  unsigned int reg_save_mask = info->reg_save_mask;
+  int regno;
+  HOST_WIDE_INT offset;
+  rtx insn;
+
+  /* Callee saves are below locals and above outgoing arguments.  */
+  offset = info->args_size + info->callee_size;
+  for (regno = 0; regno <= 31; regno++)
+    {
+      if ((reg_save_mask & (1 << regno)) != 0)
+	{
+	  rtx offset_rtx;
+	  rtx mem;
+	  
+	  offset_rtx = GEN_INT (offset);
+	  if (satisfies_constraint_K (offset_rtx))
+	    {	
+              mem = gen_rtx_MEM (word_mode,
+                                 gen_rtx_PLUS (Pmode,
+                                               stack_pointer_rtx,
+                                               offset_rtx));
+            }
+          else
+            {
+              /* r10 is caller saved so it can be used as a temp reg.  */
+              rtx r10;        
+               
+              r10 = gen_rtx_REG (word_mode, 10);
+              insn = emit_move_insn (r10, offset_rtx);
+              if (op == 0)
+                RTX_FRAME_RELATED_P (insn) = 1;
+              insn = emit_add (r10, r10, stack_pointer_rtx);
+              if (op == 0)
+                RTX_FRAME_RELATED_P (insn) = 1;                
+              mem = gen_rtx_MEM (word_mode, r10);
+            }                                                 	    
+	    	    
+	  if (op == 0)
+	    insn = emit_move_insn (mem, gen_rtx_REG (word_mode, regno));
+	  else
+	    insn = emit_move_insn (gen_rtx_REG (word_mode, regno), mem);
+        
+	  /* only prologue instructions which set the sp fp or save a
+	     register should be marked as frame related.  */
+	  if (op == 0)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  offset -= UNITS_PER_WORD;
+	}
+    }
+}
+
+static void
+stack_adjust (HOST_WIDE_INT amount)
+{
+  rtx insn;
+
+  if (!IN_RANGE (amount, -32776, 32768))
+    {
+      /* r10 is caller saved so it can be used as a temp reg.  */
+      rtx r10;
+      r10 = gen_rtx_REG (word_mode, 10);
+      insn = emit_move_insn (r10, GEN_INT (amount));
+      if (amount < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_add (stack_pointer_rtx, stack_pointer_rtx, r10);
+      if (amount < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      insn = emit_add (stack_pointer_rtx,
+		       stack_pointer_rtx, GEN_INT (amount));
+      if (amount < 0)
+	RTX_FRAME_RELATED_P (insn) = 1;
+    }
+}
+
+
+/* Create and emit instructions for a functions prologue.  */
+void
+lm32_expand_prologue (void)
+{
+  rtx insn;
+
+  lm32_compute_frame_size (get_frame_size ());
+
+  if (current_frame_info.total_size > 0)
+    {
+      /* Add space on stack new frame.  */
+      stack_adjust (-current_frame_info.total_size);
+
+      /* Save callee save registers.  */
+      if (current_frame_info.reg_save_mask != 0)
+	expand_save_restore (&current_frame_info, 0);
+
+      /* Setup frame pointer if it's needed.  */
+      if (frame_pointer_needed == 1)
+	{
+	  /* Move sp to fp.  */
+	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1; 
+
+	  /* Add offset - Don't use total_size, as that includes pretend_size, 
+             which isn't part of this frame?  */
+	  insn = emit_add (frame_pointer_rtx, 
+			   frame_pointer_rtx,
+			   GEN_INT (current_frame_info.args_size +
+				    current_frame_info.callee_size +
+				    current_frame_info.locals_size));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* Prevent prologue from being scheduled into function body.  */
+      emit_insn (gen_blockage ());
+    }
+}
+
+/* Create an emit instructions for a functions epilogue.  */
+void
+lm32_expand_epilogue (void)
+{
+  rtx ra_rtx = gen_rtx_REG (Pmode, RA_REGNUM);
+
+  lm32_compute_frame_size (get_frame_size ());
+
+  if (current_frame_info.total_size > 0)
+    {
+      /* Prevent stack code from being reordered.  */
+      emit_insn (gen_blockage ());
+
+      /* Restore callee save registers.  */
+      if (current_frame_info.reg_save_mask != 0)
+	expand_save_restore (&current_frame_info, 1);
+
+      /* Deallocate stack.  */
+      stack_adjust (current_frame_info.total_size);
+
+      /* Return to calling function.  */
+      emit_jump_insn (gen_return_internal (ra_rtx));
+    }
+  else
+    {
+      /* Return to calling function.  */
+      emit_jump_insn (gen_return_internal (ra_rtx));
+    }
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  */
+static HOST_WIDE_INT
+lm32_compute_frame_size (int size)
+{
+  int regno;
+  HOST_WIDE_INT total_size, locals_size, args_size, pretend_size, callee_size;
+  unsigned int reg_save_mask;
+
+  locals_size = size;
+  args_size = crtl->outgoing_args_size;
+  pretend_size = crtl->args.pretend_args_size;
+  callee_size = 0;
+  reg_save_mask = 0;
+
+  /* Build mask that actually determines which regsiters we save
+     and calculate size required to store them in the stack.  */
+  for (regno = 1; regno < SP_REGNUM; regno++)
+    {
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  reg_save_mask |= 1 << regno;
+	  callee_size += UNITS_PER_WORD;
+	}
+    }
+  if (df_regs_ever_live_p (RA_REGNUM) || !current_function_is_leaf
+      || !optimize)
+    {
+      reg_save_mask |= 1 << RA_REGNUM;
+      callee_size += UNITS_PER_WORD;
+    }
+  if (!(reg_save_mask & (1 << FP_REGNUM)) && frame_pointer_needed)
+    {
+      reg_save_mask |= 1 << FP_REGNUM;
+      callee_size += UNITS_PER_WORD;
+    }
+
+  /* Compute total frame size.  */
+  total_size = pretend_size + args_size + locals_size + callee_size;
+
+  /* Align frame to appropriate boundary.  */
+  total_size = (total_size + 3) & ~3;
+
+  /* Save computed information.  */
+  current_frame_info.total_size = total_size;
+  current_frame_info.callee_size = callee_size;
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.locals_size = locals_size;
+  current_frame_info.args_size = args_size;
+  current_frame_info.reg_save_mask = reg_save_mask;
+
+  return total_size;
+}
+
+void
+lm32_print_operand (FILE * file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  code = GET_CODE (op);
+
+  if (code == SIGN_EXTEND)
+    op = XEXP (op, 0), code = GET_CODE (op);
+  else if (code == REG || code == SUBREG)
+    {
+      int regnum;
+
+      if (code == REG)
+	regnum = REGNO (op);
+      else
+	regnum = true_regnum (op);
+
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+  else if (code == HIGH)
+    output_addr_const (file, XEXP (op, 0));  
+  else if (code == MEM)
+    output_address (XEXP (op, 0));
+  else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    fprintf (file, "%s", reg_names[0]);
+  else if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      if ((CONST_DOUBLE_LOW (op) != 0) || (CONST_DOUBLE_HIGH (op) != 0))
+	output_operand_lossage ("only 0.0 can be loaded as an immediate");
+      else
+	fprintf (file, "0");
+    }
+  else if (code == EQ)
+    fprintf (file, "e  ");
+  else if (code == NE)
+    fprintf (file, "ne ");
+  else if (code == GT)
+    fprintf (file, "g  ");
+  else if (code == GTU)
+    fprintf (file, "gu ");
+  else if (code == LT)
+    fprintf (file, "l  ");
+  else if (code == LTU)
+    fprintf (file, "lu ");
+  else if (code == GE)
+    fprintf (file, "ge ");
+  else if (code == GEU)
+    fprintf (file, "geu");
+  else if (code == LE)
+    fprintf (file, "le ");
+  else if (code == LEU)
+    fprintf (file, "leu");
+  else
+    output_addr_const (file, op);
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   On some machines, the syntax for a symbolic address depends on
+   the section that the address refers to.  On these machines,
+   define the macro `ENCODE_SECTION_INFO' to store the information
+   into the `symbol_ref', and then check for it here.  */
+
+void
+lm32_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "(%s+0)", reg_names[REGNO (addr)]);
+      break;
+
+    case MEM:
+      output_address (XEXP (addr, 0));
+      break;
+
+    case PLUS:
+      {
+	rtx arg0 = XEXP (addr, 0);
+	rtx arg1 = XEXP (addr, 1);
+
+	if (GET_CODE (arg0) == REG && CONSTANT_P (arg1))
+	  {
+	    if (GET_CODE (arg1) == CONST_INT)
+	      fprintf (file, "(%s+%ld)", reg_names[REGNO (arg0)],
+		       INTVAL (arg1));
+	    else
+	      {
+		fprintf (file, "(%s+", reg_names[REGNO (arg0)]);
+		output_addr_const (file, arg1);
+		fprintf (file, ")");
+	      }
+	  }
+	else if (CONSTANT_P (arg0) && CONSTANT_P (arg1))
+	  output_addr_const (file, addr);
+	else
+	  fatal_insn ("bad operand", addr);
+      }
+      break;
+
+    case SYMBOL_REF:
+      if (SYMBOL_REF_SMALL_P (addr))
+	{
+	  fprintf (file, "gp(");
+	  output_addr_const (file, addr);
+	  fprintf (file, ")");
+	}
+      else
+	fatal_insn ("can't use non gp relative absolute address", addr);
+      break;
+
+    default:
+      fatal_insn ("invalid addressing mode", addr);
+      break;
+    }
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+lm32_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  if (mode == VOIDmode)
+    /* Compute operand 2 of the call insn.  */
+    return GEN_INT (0);
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return NULL_RTX;
+
+  if (!named || (*cum + LM32_NUM_REGS2 (mode, type) > LM32_NUM_ARG_REGS))
+    return NULL_RTX;
+
+  return gen_rtx_REG (mode, *cum + LM32_FIRST_ARG_REG);
+}
+
+static void
+lm32_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum += LM32_NUM_REGS2 (mode, type);
+}
+
+HOST_WIDE_INT
+lm32_compute_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset = 0;
+
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      switch (to)
+	{
+	case FRAME_POINTER_REGNUM:
+	  offset = 0;
+	  break;
+	case STACK_POINTER_REGNUM:
+	  offset =
+	    lm32_compute_frame_size (get_frame_size ()) -
+	    current_frame_info.pretend_size;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+static void
+lm32_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+			     tree type, int *pretend_size, int no_rtl)
+{
+  int first_anon_arg;
+  tree fntype;
+
+  fntype = TREE_TYPE (current_function_decl);
+
+  if (stdarg_p (fntype))
+    first_anon_arg = *cum + LM32_FIRST_ARG_REG;
+  else
+    {
+      /* this is the common case, we have been passed details setup
+         for the last named argument, we want to skip over the
+         registers, if any used in passing this named paramter in
+         order to determine which is the first registers used to pass
+         anonymous arguments.  */
+      int size;
+
+      if (mode == BLKmode)
+	size = int_size_in_bytes (type);
+      else
+	size = GET_MODE_SIZE (mode);
+
+      first_anon_arg =
+	*cum + LM32_FIRST_ARG_REG +
+	((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+    }
+
+  if ((first_anon_arg < (LM32_FIRST_ARG_REG + LM32_NUM_ARG_REGS)) && !no_rtl)
+    {
+      int first_reg_offset = first_anon_arg;
+      int size = LM32_FIRST_ARG_REG + LM32_NUM_ARG_REGS - first_anon_arg;
+      rtx regblock;
+
+      regblock = gen_rtx_MEM (BLKmode,
+			      plus_constant (arg_pointer_rtx,
+					     FIRST_PARM_OFFSET (0)));
+      move_block_from_reg (first_reg_offset, regblock, size);
+
+      *pretend_size = size * UNITS_PER_WORD;
+    }
+}
+
+/* Override command line options.  */
+static void
+lm32_option_override (void)
+{
+  /* We must have sign-extend enabled if barrel-shift isn't.  */
+  if (!TARGET_BARREL_SHIFT_ENABLED && !TARGET_SIGN_EXTEND_ENABLED)
+    target_flags |= MASK_SIGN_EXTEND_ENABLED;
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+int
+lm32_can_use_return (void)
+{
+  if (!reload_completed)
+    return 0;
+
+  if (df_regs_ever_live_p (RA_REGNUM) || crtl->profile)
+    return 0;
+
+  if (lm32_compute_frame_size (get_frame_size ()) != 0)
+    return 0;
+
+  return 1;
+}
+
+/* Support function to determine the return address of the function
+   'count' frames back up the stack.  */
+rtx
+lm32_return_addr_rtx (int count, rtx frame)
+{
+  rtx r;
+  if (count == 0)
+    {
+      if (!df_regs_ever_live_p (RA_REGNUM))
+	r = gen_rtx_REG (Pmode, RA_REGNUM);
+      else
+	{
+	  r = gen_rtx_MEM (Pmode,
+			   gen_rtx_PLUS (Pmode, frame,
+					 GEN_INT (-2 * UNITS_PER_WORD)));
+	  set_mem_alias_set (r, get_frame_alias_set ());
+	}
+    }
+  else if (flag_omit_frame_pointer)
+    r = NULL_RTX;
+  else
+    {
+      r = gen_rtx_MEM (Pmode,
+		       gen_rtx_PLUS (Pmode, frame,
+				     GEN_INT (-2 * UNITS_PER_WORD)));
+      set_mem_alias_set (r, get_frame_alias_set ());
+    }
+  return r;
+}
+
+/* Return true if EXP should be placed in the small data section.  */
+
+static bool
+lm32_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  Duh.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+      if (strcmp (section, ".sdata") == 0 || strcmp (section, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+         in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= g_switch_value)
+	return true;
+    }
+
+  return false;
+}
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+lm32_block_move_inline (rtx dest, rtx src, HOST_WIDE_INT length,
+			HOST_WIDE_INT alignment)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  /* Work out how many bits to move at a time.  */
+  switch (alignment)
+    {
+    case 1:
+      bits = 8;
+      break;
+    case 2:
+      bits = 16;
+      break;
+    default:
+      bits = 32;
+      break;
+    }
+
+  mode = mode_for_size (bits, MODE_INT, 0);
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      emit_move_insn (regs[i], adjust_address (src, mode, offset));
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    emit_move_insn (adjust_address (dest, mode, offset), regs[i]);
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+    }
+}
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.  */
+
+int
+lm32_expand_block_move (rtx * operands)
+{
+  if ((GET_CODE (operands[2]) == CONST_INT) && (INTVAL (operands[2]) <= 32))
+    {
+      lm32_block_move_inline (operands[0], operands[1], INTVAL (operands[2]),
+			      INTVAL (operands[3]));
+      return 1;
+    }
+  return 0;
+}
+
+/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec.  */
+int
+nonpic_symbol_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
+      || GET_CODE (x) == PC)
+    return 1;
+
+  /* We don't want to look into the possible MEM location of a
+     CONST_DOUBLE, since we're not going to use it, in general.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    return 0;
+
+  if (GET_CODE (x) == UNSPEC)
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+lm32_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool small_mode;
+
+  const int arithmetic_latency = 1;
+  const int shift_latency = 1;
+  const int compare_latency = 2;
+  const int multiply_latency = 3;
+  const int load_latency = 3;
+  const int libcall_size_cost = 5;
+
+  /* Determine if we can handle the given mode size in a single instruction.  */
+  small_mode = (mode == QImode) || (mode == HImode) || (mode == SImode);
+
+  switch (code)
+    {
+
+    case PLUS:
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+    case NEG:
+      if (!speed)
+	*total = COSTS_N_INSNS (LM32_NUM_REGS (mode));
+      else
+	*total =
+	  COSTS_N_INSNS (arithmetic_latency + (LM32_NUM_REGS (mode) - 1));
+      break;
+
+    case COMPARE:
+      if (small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (compare_latency);
+	}
+      else
+	{
+	  /* FIXME. Guessing here.  */
+	  *total = COSTS_N_INSNS (LM32_NUM_REGS (mode) * (2 + 3) / 2);
+	}
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TARGET_BARREL_SHIFT_ENABLED && small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (shift_latency);
+	}
+      else if (TARGET_BARREL_SHIFT_ENABLED)
+	{
+	  /* FIXME: Guessing here.  */
+	  *total = COSTS_N_INSNS (LM32_NUM_REGS (mode) * 4);
+	}
+      else if (small_mode && GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  *total = COSTS_N_INSNS (INTVAL (XEXP (x, 1)));
+	}
+      else
+	{
+	  /* Libcall.  */
+	  if (!speed)
+	    *total = COSTS_N_INSNS (libcall_size_cost);
+	  else
+	    *total = COSTS_N_INSNS (100);
+	}
+      break;
+
+    case MULT:
+      if (TARGET_MULTIPLY_ENABLED && small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (multiply_latency);
+	}
+      else
+	{
+	  /* Libcall.  */
+	  if (!speed)
+	    *total = COSTS_N_INSNS (libcall_size_cost);
+	  else
+	    *total = COSTS_N_INSNS (100);
+	}
+      break;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      if (TARGET_DIVIDE_ENABLED && small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    {
+	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+		{
+		  int cycles = 0;
+		  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+
+		  while (i)
+		    {
+		      i >>= 2;
+		      cycles++;
+		    }
+		  if (IN_RANGE (i, 0, 65536))
+		    *total = COSTS_N_INSNS (1 + 1 + cycles);
+		  else
+		    *total = COSTS_N_INSNS (2 + 1 + cycles);
+		  return true;
+		}
+	      else if (GET_CODE (XEXP (x, 1)) == REG)
+		{
+		  *total = COSTS_N_INSNS (1 + GET_MODE_SIZE (mode) / 2);
+		  return true;
+		}
+	      else
+		{
+		  *total = COSTS_N_INSNS (1 + GET_MODE_SIZE (mode) / 2);
+		  return false;
+		}
+	    }
+	}
+      else
+	{
+	  /* Libcall.  */
+	  if (!speed)
+	    *total = COSTS_N_INSNS (libcall_size_cost);
+	  else
+	    *total = COSTS_N_INSNS (100);
+	}
+      break;
+
+    case HIGH:
+    case LO_SUM:
+      if (!speed)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (arithmetic_latency);
+      break;
+
+    case ZERO_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	*total = COSTS_N_INSNS (0);
+      else if (small_mode)
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1);
+	  else
+	    *total = COSTS_N_INSNS (arithmetic_latency);
+	}
+      else
+	*total = COSTS_N_INSNS (LM32_NUM_REGS (mode) / 2);
+      break;
+
+    case CONST_INT:
+      {
+	switch (outer_code)
+	  {
+	  case HIGH:
+	  case LO_SUM:
+	    *total = COSTS_N_INSNS (0);
+	    return true;
+
+	  case AND:
+	  case XOR:
+	  case IOR:
+	  case ASHIFT:
+	  case ASHIFTRT:
+	  case LSHIFTRT:
+	  case ROTATE:
+	  case ROTATERT:
+	    if (satisfies_constraint_L (x))
+	      *total = COSTS_N_INSNS (0);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	    return true;
+
+	  case SET:
+	  case PLUS:
+	  case MINUS:
+	  case COMPARE:
+	    if (satisfies_constraint_K (x))
+	      *total = COSTS_N_INSNS (0);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	    return true;
+
+	  case MULT:
+	    if (TARGET_MULTIPLY_ENABLED)
+	      {
+	        if (satisfies_constraint_K (x))
+	         *total = COSTS_N_INSNS (0);
+	        else
+	          *total = COSTS_N_INSNS (2);
+		return true;
+	      }
+	    /* Fall through.  */ 
+
+	  default:
+            if (satisfies_constraint_K (x))
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	    return true;
+	  }
+      }
+
+    case SYMBOL_REF:
+    case CONST:
+      switch (outer_code)
+	{
+	case HIGH:
+	case LO_SUM:
+	  *total = COSTS_N_INSNS (0);
+	  return true;
+
+	case MEM:
+	case SET:
+	  if (g_switch_value)
+	    {
+	      *total = COSTS_N_INSNS (0);
+	      return true;
+	    }
+	  break;
+	}
+      /* Fall through.  */
+
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case SET:
+      *total = COSTS_N_INSNS (1);
+      break;
+
+    case MEM:
+      if (!speed)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (load_latency);
+      break;
+
+    }
+
+  return false;
+}
+
+/* Implemenent TARGET_CAN_ELIMINATE.  */
+
+bool
+lm32_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false : true;
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+lm32_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x, bool strict)
+{  
+   /* (rM) */                                                    
+  if (strict && REG_P (x) && STRICT_REG_OK_FOR_BASE_P (x))
+    return true;
+  if (!strict && REG_P (x) && NONSTRICT_REG_OK_FOR_BASE_P (x))
+    return true;
+       
+  /* (rM)+literal) */                               
+  if (GET_CODE (x) == PLUS  
+     && REG_P (XEXP (x, 0))                                     
+     && ((strict && STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)))
+         || (!strict && NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))))                           
+     && GET_CODE (XEXP (x, 1)) == CONST_INT                      
+     && satisfies_constraint_K (XEXP ((x), 1)))
+    return true;
+              
+  /* gp(sym)  */   
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) 
+    return true;
+    
+  return false;                                
+}
+
+/* Check a move is not memory to memory.  */ 
+
+bool 
+lm32_move_ok (enum machine_mode mode, rtx operands[2]) {
+  if (memory_operand (operands[0], mode))
+    return register_or_zero_operand (operands[1], mode);
+  return true;
+}
+
+/* Implement LEGITIMATE_CONSTANT_P.  */
+
+bool
+lm32_legitimate_constant_p (rtx x)
+{
+  /* 32-bit addresses require multiple instructions.  */  
+  if (!flag_pic && reloc_operand (x, GET_MODE (x)))
+    return false; 
+  
+  return true;
+}
diff --git a/gcc/config/lm32/lm32.h b/gcc/config/lm32/lm32.h
new file mode 100644
index 000000000..3141719b4
--- /dev/null
+++ b/gcc/config/lm32/lm32.h
@@ -0,0 +1,556 @@
+/* Definitions of target machine for GNU compiler, Lattice Mico32 architecture.
+   Contributed by Jon Beniston <jon@beniston.com>
+
+   Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/*-------------------------------*/
+/* Run-time Target Specification */
+/*-------------------------------*/
+
+/* Print subsidiary information on the compiler version in use.  */
+#ifndef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (LatticeMico32)")
+#endif
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()                       \
+  do                                                    \
+    {                                                   \
+      builtin_define ("__lm32__");                      \
+      builtin_assert ("cpu=lm32");                      \
+      builtin_assert ("machine=lm32");                  \
+      if (TARGET_MULTIPLY_ENABLED)                      \
+        builtin_define ("__multiply_enabled__");        \
+      if (TARGET_DIVIDE_ENABLED)                        \
+        builtin_define ("__divide_enabled__");          \
+      if (TARGET_BARREL_SHIFT_ENABLED)                  \
+        builtin_define ("__barrel_shift_enabled__");    \
+      if (TARGET_SIGN_EXTEND_ENABLED)                   \
+        builtin_define ("__sign_extend_enabled__");     \
+      if (TARGET_USER_ENABLED)                          \
+        builtin_define ("__user_enabled__");            \
+    }                                                   \
+  while (0)
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mmultiply-enabled} \
+%{mdivide-enabled} \
+%{mbarrel-shift-enabled} \
+%{msign-extend-enabled} \
+%{muser-enabled} \
+"
+
+/* Let link script define all link options. 
+   Default to using simulator link script.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+#undef  LIB_SPEC
+#define LIB_SPEC "%{!T*:-T sim.ld}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{G*}"
+
+/*---------------------------------*/
+/* Target machine storage layout.  */
+/*---------------------------------*/
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+#define BITS_PER_UNIT 8
+#define BITS_PER_WORD 32
+#define UNITS_PER_WORD 4
+
+#define POINTER_SIZE 32
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)               \
+do {                                                    \
+  if (GET_MODE_CLASS (MODE) == MODE_INT                 \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)         \
+    (MODE) = word_mode;                                 \
+} while (0)
+
+#define PARM_BOUNDARY 32
+
+#define STACK_BOUNDARY 32
+
+#define BIGGEST_ALIGNMENT 64
+
+#define FUNCTION_BOUNDARY  32
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRICT_ALIGNMENT 1
+
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Make arrays and structures word-aligned to allow faster copying etc.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* We need this for the same reason as DATA_ALIGNMENT, namely to cause
+   character arrays to be word-aligned so that `strcpy' calls that copy
+   constants to character arrays can be done inline, and 'strcmp' can be
+   optimised to use word loads.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  DATA_ALIGNMENT (TYPE, ALIGN)
+
+/*----------------------------------------*/
+/* Layout of source language data types.  */
+/*----------------------------------------*/
+
+#define INT_TYPE_SIZE		    32
+#define SHORT_TYPE_SIZE		    16
+#define LONG_TYPE_SIZE		    32
+#define LONG_LONG_TYPE_SIZE	    64
+
+#define FLOAT_TYPE_SIZE		    32
+#define DOUBLE_TYPE_SIZE	    64
+#define LONG_DOUBLE_TYPE_SIZE       64
+
+#define DEFAULT_SIGNED_CHAR         0
+
+#define SIZE_TYPE "unsigned int"
+
+#define PTRDIFF_TYPE "int"
+
+/*---------------------------*/
+/* Standard register usage.  */
+/*---------------------------*/
+
+#define FIRST_PSEUDO_REGISTER  32
+
+#define RV_REGNUM   1
+#define GP_REGNUM   26
+#define FP_REGNUM   27
+#define SP_REGNUM   28
+#define RA_REGNUM   29
+
+#define G_REG_P(X)      ((X)<32)
+
+#define FIXED_REGISTERS   \
+{ 1, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 1, 0, 1, 0, 1, 1}
+
+#define CALL_USED_REGISTERS \
+{ 1, 1, 1, 1, 1, 1, 1, 1,   \
+  1, 1, 1, 0, 0, 0, 0, 0,   \
+  0, 0, 0, 0, 0, 0, 0, 0,   \
+  0, 0, 1, 0, 1, 0, 1, 1}
+
+#define HARD_REGNO_NREGS(REGNO, MODE)                                   \
+    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) G_REG_P(REGNO)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)           \
+(      GET_MODE_CLASS (MODE1) == MODE_INT		\
+    && GET_MODE_CLASS (MODE2) == MODE_INT		\
+    && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+    && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+#define AVOID_CCMODE_COPIES
+
+/*----------------------------------*/
+/* Register classes and constants.  */
+/*----------------------------------*/
+
+enum reg_class
+{
+  NO_REGS,                                      
+  GENERAL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES { "NO_REGS", "GENERAL_REGS", "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS      \
+{ {0x00000000},                 \
+  {0xffffffff},                 \
+  {0xffffffff}                  \
+}
+
+#define REGNO_REG_CLASS(REGNO) \
+    (G_REG_P(REGNO) ? GENERAL_REGS : NO_REGS)
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+    (G_REG_P (REGNO) || G_REG_P ((unsigned) reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+
+/*----------------------------------------*/
+/* Stack Layout and Calling Conventions.  */
+/*----------------------------------------*/
+
+#define STACK_GROWS_DOWNWARD 1
+
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STACK_POINTER_OFFSET (UNITS_PER_WORD)
+
+#define STARTING_FRAME_OFFSET (UNITS_PER_WORD)
+
+#define FIRST_PARM_OFFSET(FNDECL) (UNITS_PER_WORD)
+
+#define STACK_POINTER_REGNUM SP_REGNUM
+
+#define FRAME_POINTER_REGNUM FP_REGNUM
+
+#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RA_REGNUM)
+
+#define RETURN_ADDR_RTX(count, frame)                                   \
+  lm32_return_addr_rtx (count, frame)
+
+/* FIXME - This is not yet supported.  */
+#define STATIC_CHAIN_REGNUM 9
+
+#define ELIMINABLE_REGS \
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },                        \
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },                          \
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)                    \
+  (OFFSET) = lm32_compute_initial_elimination_offset (FROM, TO)
+
+/*-----------------------------*/
+/* Function argument passing.  */
+/*-----------------------------*/
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/*--------------------------------*/
+/* Passing Arguments in Registers */
+/*--------------------------------*/
+
+/* The first argument register.  */
+#define LM32_FIRST_ARG_REG 1
+
+/* The number of (integer) argument register available.  */
+#define LM32_NUM_ARG_REGS 8
+
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS)  \
+  (CUM) = 0
+
+#define FUNCTION_ARG_REGNO_P(r)                                         \
+  (((r) >= LM32_FIRST_ARG_REG) && ((r) <= LM32_NUM_ARG_REGS))
+
+/*--------------------*/
+/* Function results.  */
+/*--------------------*/
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)                                   \
+   gen_rtx_REG ((INTEGRAL_TYPE_P (VALTYPE)                              \
+                 && TYPE_PRECISION (VALTYPE) < BITS_PER_WORD)           \
+	            ? word_mode                                         \
+	            : TYPE_MODE (VALTYPE),				\
+	            RV_REGNUM)
+
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, RV_REGNUM)
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == RV_REGNUM)
+
+#define RETURN_IN_MEMORY(TYPE) lm32_return_in_memory (TYPE)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Convert from bytes to ints.  */
+#define LM32_NUM_INTS(X) (((X) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The number of (integer) registers required to hold a quantity of
+   type MODE.  */
+#define LM32_NUM_REGS(MODE) LM32_NUM_INTS (GET_MODE_SIZE (MODE))
+
+/* The number of (integer) registers required to hold a quantity of
+   TYPE MODE.  */
+#define LM32_NUM_REGS2(MODE, TYPE)                       \
+  LM32_NUM_INTS ((MODE) == BLKmode ?                     \
+  int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE))
+
+#define STRUCT_VALUE 0
+
+/*---------------------------*/
+/* Function entry and exit.  */
+/*---------------------------*/
+
+/*-------------*/
+/* Profiling.  */
+/*-------------*/
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/*---------------*/
+/* Trampolines.  */
+/*---------------*/
+
+#define TRAMPOLINE_SIZE		0
+
+/*---------------------*/
+/*  Addressing Modes.  */
+/*---------------------*/
+
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define STRICT_REG_OK_FOR_BASE_P(X)                                     \
+  (REGNO_OK_FOR_BASE_P (REGNO (X)))
+#define NONSTRICT_REG_OK_FOR_BASE_P(X)                                  \
+  (G_REG_P (REGNO (X)) || !HARD_REGISTER_NUM_P (REGNO (X)))
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P(X)
+#else
+#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P(X)
+#endif
+
+#define LEGITIMATE_CONSTANT_P(X) lm32_legitimate_constant_p (X)
+
+/*-------------------------*/
+/* Condition Code Status.  */
+/*-------------------------*/
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/*---------*/
+/* Costs.  */
+/*---------*/
+
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE
+
+#define BRANCH_COST(speed_p, predictable_p) 4
+
+#define MOVE_RATIO(speed) (speed ? 24 : 3)
+
+/*------------*/
+/* Sections.  */
+/*------------*/
+
+#define TEXT_SECTION_ASM_OP             "\t.section\t.text"
+#define DATA_SECTION_ASM_OP             "\t.section\t.data"
+#define SDATA_SECTION_ASM_OP            "\t.section\t.sdata,\"aw\""
+#define BSS_SECTION_ASM_OP              "\t.section\t.bss"
+#define SBSS_SECTION_ASM_OP             "\t.section\t.sbss,\"aw\""
+
+/*-------*/
+/* PIC.  */
+/*-------*/
+
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? GP_REGNUM : INVALID_REGNUM)
+
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+#define LEGITIMATE_PIC_OPERAND_P(X)                                    \
+	(!(nonpic_symbol_mentioned_p (X)))
+
+/*-------------*/
+/* Assembler.  */
+/*-------------*/
+
+#define ASM_COMMENT_START "#"
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {									\
+	fputc ( '\t', FILE);						\
+	assemble_name (FILE, LABEL1);					\
+	fputs ( " = ", FILE);						\
+	assemble_name (FILE, LABEL2);					\
+	fputc ( '\n', FILE);						\
+ } while (0)
+
+/* Override default implementation in elfos.h to support -G.  */
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+    switch_to_section (sbss_section);					\
+  else									\
+    switch_to_section (bss_section);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+  if (!flag_inhibit_size_directive)					\
+    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+  ASM_OUTPUT_LABEL(FILE, NAME);						\
+  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);				\
+} while (0)
+
+/* Override default implementation in elfos.h to support -G.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+do 									\
+{									\
+  if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+    {									\
+      switch_to_section (sbss_section);					\
+      (*targetm.asm_out.globalize_label) (FILE, NAME);			\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+      if (!flag_inhibit_size_directive)					\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+      ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL(FILE, NAME);					\
+      ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);			\
+    }									\
+  else									\
+    {									\
+      switch_to_section (bss_section);					\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",          \
+               (SIZE), (ALIGN) / BITS_PER_UNIT);	                \
+    }									\
+}									\
+while (0)
+
+#define ASM_OUTPUT_LABEL(FILE, NAME) \
+  do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  do {					\
+    const char *xname = (NAME);		\
+    if (xname[0] == '@')		\
+      xname += 1;			\
+    if (xname[0] == '*')		\
+      xname += 1;			\
+    fputs (xname, FILE);		\
+  } while (0)
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYMBOL)				\
+  do {									\
+    assemble_name (STREAM, XSTR (SYMBOL, 0));				\
+  } while (0)
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define REGISTER_NAMES                                          \
+{                                                               \
+ "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",	        \
+ "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",        \
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",        \
+ "r24", "r25",  "gp",  "fp",  "sp",  "ra",  "ea",  "ba"}
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+  (((CHAR) == '&') || ((CHAR) == '@') || ((CHAR) == '*'))
+
+#define PRINT_OPERAND(FILE, X, CODE)                            \
+  lm32_print_operand (FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)                       \
+  lm32_print_operand_address (FILE, ADDR)
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+#endif
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)                              \
+  do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", (1 << (LOG))); } while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)                    \
+do {                                                            \
+  char label[64];                                               \
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);              \
+  fprintf (FILE, "\n\t.word\t");                                \
+  assemble_name (FILE, label);                                  \
+  fprintf (FILE, "\n");                                         \
+} while (0)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)        \
+do {                                                            \
+  char label[64];                                               \
+  fprintf (FILE, "\t.word\t(");                                 \
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);		\
+  assemble_name (FILE, label);                                  \
+  fprintf (FILE, "-");                                          \
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);                \
+  assemble_name (FILE, label);                                  \
+  fprintf (FILE, ")\n");                                        \
+} while (0)
+
+/*-------------*/
+/* Debugging.  */
+/*-------------*/
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/*--------*/
+/* Misc.  */
+/*--------*/
+
+#define CASE_VECTOR_MODE Pmode
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+#define MOVE_MAX        UNITS_PER_WORD
+#define MAX_MOVE_MAX    4
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE SImode
+
+#ifndef NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+#endif
+
+#define STORE_FLAG_VALUE 1
diff --git a/gcc/config/lm32/lm32.md b/gcc/config/lm32/lm32.md
new file mode 100644
index 000000000..7539cb065
--- /dev/null
+++ b/gcc/config/lm32/lm32.md
@@ -0,0 +1,996 @@
+;; Machine description of the Lattice Mico32 architecture for GNU C compiler.
+;; Contributed by Jon Beniston <jon@beniston.com>
+
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  
+
+;; Include predicate and constraint definitions
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Register numbers
+(define_constants
+  [(RA_REGNUM           29)	; return address register.
+  ]
+)
+
+;; LM32 specific volatile operations
+(define_constants
+  [(UNSPECV_BLOCKAGE    1)]     ; prevent scheduling across pro/epilog boundaries
+)
+
+;; LM32 specific operations
+(define_constants
+  [(UNSPEC_GOT          2)
+   (UNSPEC_GOTOFF_HI16  3)
+   (UNSPEC_GOTOFF_LO16  4)]     
+)
+
+;; --------------------------------- 
+;;      instruction types
+;; ---------------------------------
+
+(define_attr "type"
+  "unknown,load,store,arith,compare,shift,multiply,divide,call,icall,ubranch,uibranch,cbranch"
+  (const_string "unknown"))
+  
+;; ---------------------------------
+;;      instruction lengths
+;; ---------------------------------
+  
+; All instructions are 4 bytes
+; Except for branches that are out of range, and have to be implemented
+; as two instructions
+(define_attr "length" "" 
+        (cond [
+                (eq_attr "type" "cbranch")
+                (if_then_else
+                        (lt (abs (minus (match_dup 2) (pc)))
+                                (const_int 32768)
+                        )
+                        (const_int 4)
+                        (const_int 8)               
+                )
+              ] 
+        (const_int 4))
+)
+                    
+;; ---------------------------------
+;;           scheduling 
+;; ---------------------------------
+
+(define_automaton "lm32")
+
+(define_cpu_unit "x" "lm32")
+(define_cpu_unit "m" "lm32")
+(define_cpu_unit "w" "lm32")
+
+(define_insn_reservation "singlecycle" 1
+  (eq_attr "type" "store,arith,call,icall,ubranch,uibranch,cbranch")
+ "x")
+
+(define_insn_reservation "twocycle" 2
+  (eq_attr "type" "compare,shift,divide")
+ "x,m") 
+
+(define_insn_reservation "threecycle" 3
+  (eq_attr "type" "load,multiply")
+ "x,m,w")
+
+;; ---------------------------------
+;;               mov 
+;; ---------------------------------
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operand0) == MEM)
+        {
+          /* Source operand for store must be in a register.  */
+          operands[1] = force_reg (QImode, operands[1]);
+        }
+    }
+}")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+        (match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) == MEM)
+        {
+          /* Source operand for store must be in a register.  */
+          operands[1] = force_reg (HImode, operands[1]);
+        }
+    }
+}")
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (can_create_pseudo_p ())
+    {
+      if (GET_CODE (operands[0]) == MEM 
+	  || (GET_CODE (operands[0]) == SUBREG 
+	      && GET_CODE (SUBREG_REG (operands[0])) == MEM))
+        {
+          /* Source operand for store must be in a register.  */
+          operands[1] = force_reg (SImode, operands[1]);
+        }
+    }
+
+  if (flag_pic && symbolic_operand (operands[1], SImode)) 
+    {
+      if (GET_CODE (operands[1]) == LABEL_REF
+          || (GET_CODE (operands[1]) == SYMBOL_REF 
+              && SYMBOL_REF_LOCAL_P (operands[1])
+              && !SYMBOL_REF_WEAK (operands[1])))
+        {
+          emit_insn (gen_movsi_gotoff_hi16 (operands[0], operands[1]));
+          emit_insn (gen_addsi3 (operands[0], 
+                                 operands[0], 
+                                 pic_offset_table_rtx));
+          emit_insn (gen_movsi_gotoff_lo16 (operands[0], 
+                                            operands[0], 
+                                            operands[1]));
+        } 
+      else 
+        emit_insn (gen_movsi_got (operands[0], operands[1]));
+      crtl->uses_pic_offset_table = 1;
+      DONE;
+    }         
+  else if (flag_pic && GET_CODE (operands[1]) == CONST) 
+    {
+      rtx op = XEXP (operands[1], 0);
+      if (GET_CODE (op) == PLUS)
+        {
+          rtx arg0 = XEXP (op, 0);
+          rtx arg1 = XEXP (op, 1);
+          if (GET_CODE (arg0) == LABEL_REF
+              || (GET_CODE (arg0) == SYMBOL_REF 
+                  && SYMBOL_REF_LOCAL_P (arg0)
+                  && !SYMBOL_REF_WEAK (arg0)))
+            {
+              emit_insn (gen_movsi_gotoff_hi16 (operands[0], arg0));
+              emit_insn (gen_addsi3 (operands[0], 
+                                     operands[0], 
+                                     pic_offset_table_rtx));
+              emit_insn (gen_movsi_gotoff_lo16 (operands[0], 
+                                                operands[0], 
+                                                arg0));
+            } 
+          else 
+            emit_insn (gen_movsi_got (operands[0], arg0));
+          emit_insn (gen_addsi3 (operands[0], operands[0], arg1));
+          crtl->uses_pic_offset_table = 1;
+          DONE;
+        }     
+    }
+  else if (!flag_pic && reloc_operand (operands[1], GET_MODE (operands[1]))) 
+    {
+      emit_insn (gen_rtx_SET (SImode, operands[0], gen_rtx_HIGH (SImode, operands[1])));
+      emit_insn (gen_rtx_SET (SImode, operands[0], gen_rtx_LO_SUM (SImode, operands[0], operands[1])));
+      DONE;
+    }  
+  else if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (!(satisfies_constraint_K (operands[1]) 
+          || satisfies_constraint_L (operands[1])
+          || satisfies_constraint_U (operands[1])))      
+        {
+          emit_insn (gen_movsi_insn (operands[0], 
+                                     GEN_INT (INTVAL (operands[1]) & ~0xffff)));
+          emit_insn (gen_iorsi3 (operands[0], 
+                                 operands[0], 
+                                 GEN_INT (INTVAL (operands[1]) & 0xffff)));
+          DONE;
+        }
+    }    
+}")
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand" "")
+		   (match_operand:BLK 1 "general_operand" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  ""
+{
+  if (!lm32_expand_block_move (operands))
+    FAIL;
+  DONE;
+})
+
+;; ---------------------------------
+;;        load/stores/moves 
+;; ---------------------------------
+
+(define_insn "movsi_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_GOT))]
+  "flag_pic"
+  "lw       %0, (gp+got(%1))"
+  [(set_attr "type" "load")]
+)
+
+(define_insn "movsi_gotoff_hi16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_GOTOFF_HI16))]
+  "flag_pic"
+  "orhi     %0, r0, gotoffhi16(%1)"
+  [(set_attr "type" "load")]
+)
+
+(define_insn "movsi_gotoff_lo16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(plus:SI (match_operand:SI 1 "register_operand" "0")
+                             (match_operand 2 "" ""))] UNSPEC_GOTOFF_LO16))]        
+  "flag_pic"
+  "addi     %0, %1, gotofflo16(%2)"
+  [(set_attr "type" "arith")]
+)
+  
+(define_insn "*movsi_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+                   (match_operand:SI 2 "reloc_operand" "i")))]
+  "!flag_pic"
+  "ori      %0, %0, lo(%2)"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m,m,r")
+        (match_operand:QI 1 "general_operand" "m,r,r,J,n"))]
+  "lm32_move_ok (QImode, operands)"
+  "@
+   lbu      %0, %1
+   or       %0, %1, r0
+   sb       %0, %1
+   sb       %0, r0
+   addi     %0, r0, %1"
+  [(set_attr "type" "load,arith,store,store,arith")]   
+)
+   
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,m,r,r")
+        (match_operand:HI 1 "general_operand" "m,r,r,J,K,L"))]
+  "lm32_move_ok (HImode, operands)"
+  "@
+   lhu      %0, %1
+   or       %0, %1, r0
+   sh       %0, %1
+   sh       %0, r0
+   addi     %0, r0, %1
+   ori      %0, r0, %1"
+  [(set_attr "type" "load,arith,store,store,arith,arith")]   
+)
+
+(define_insn "movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m,m,r,r,r,r,r")
+        (match_operand:SI 1 "movsi_rhs_operand" "m,r,r,J,K,L,U,S,Y"))]
+  "lm32_move_ok (SImode, operands)"
+  "@
+   lw       %0, %1
+   or       %0, %1, r0
+   sw       %0, %1
+   sw       %0, r0
+   addi     %0, r0, %1
+   ori      %0, r0, %1
+   orhi     %0, r0, hi(%1)
+   mva      %0, gp(%1)
+   orhi     %0, r0, hi(%1)"
+  [(set_attr "type" "load,arith,store,store,arith,arith,arith,arith,arith")]   
+)
+
+;; ---------------------------------
+;;      sign and zero extension 
+;; ---------------------------------
+
+(define_insn "*extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  "TARGET_SIGN_EXTEND_ENABLED || (GET_CODE (operands[1]) != REG)"
+  "@
+   lb       %0, %1
+   sextb    %0, %1"
+  [(set_attr "type" "load,arith")]
+)
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  ""
+  "@
+   lbu      %0, %1
+   andi     %0, %1, 0xff"
+  [(set_attr "type" "load,arith")]  
+)
+
+(define_insn "*extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  "TARGET_SIGN_EXTEND_ENABLED || (GET_CODE (operands[1]) != REG)"
+  "@
+   lb       %0, %1
+   sextb    %0, %1"
+  [(set_attr "type" "load,arith")]
+)
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m,r")))]
+  ""
+  "@
+   lbu      %0, %1
+   andi     %0, %1, 0xff"
+  [(set_attr "type" "load,arith")]  
+)
+
+(define_insn "*extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
+  "TARGET_SIGN_EXTEND_ENABLED || (GET_CODE (operands[1]) != REG)"
+  "@
+   lh       %0, %1
+   sexth    %0, %1"
+  [(set_attr "type" "load,arith")]
+)
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
+  ""
+  "@
+   lhu      %0, %1
+   andi     %0, %1, 0xffff"
+  [(set_attr "type" "load,arith")]  
+)
+
+;; ---------------------------------
+;;             compare 
+;; ---------------------------------
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "register_operand")
+	  (match_operand:SI 3 "register_or_int_operand")]))]
+  ""
+{
+  lm32_expand_scc (operands);
+  DONE;
+})
+
+(define_insn "*seq"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (eq:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpe     %0, %z1, %2
+   cmpei    %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sne"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ne:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpne    %0, %z1, %2
+   cmpnei   %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sgt"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (gt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpg     %0, %z1, %2
+   cmpgi    %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sge"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ge:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+               (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   cmpge    %0, %z1, %2
+   cmpgei   %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sgtu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (gtu:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   cmpgu    %0, %z1, %2
+   cmpgui   %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+(define_insn "*sgeu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (geu:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   cmpgeu   %0, %z1, %2
+   cmpgeui  %0, %z1, %2"
+  [(set_attr "type" "compare")]
+)
+
+;; ---------------------------------
+;;       unconditional branch
+;; ---------------------------------
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bi       %0"
+  [(set_attr "type" "ubranch")]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "b        %0"
+  [(set_attr "type" "uibranch")]
+)
+
+;; ---------------------------------
+;;        conditional branch
+;; ---------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+   (if_then_else (match_operator 0 "comparison_operator" 
+                  [(match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "nonmemory_operand")])
+                 (label_ref (match_operand 3 "" ""))
+                 (pc)))]
+  ""
+  "
+{   
+  lm32_expand_conditional_branch (operands);
+  DONE;
+}")
+
+(define_insn "*beq"
+  [(set (pc)
+        (if_then_else (eq:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "be     %z0,%z1,%2"
+        : "bne    %z0,%z1,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bne"
+  [(set (pc)
+        (if_then_else (ne:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bne    %z0,%z1,%2"
+        : "be     %z0,%z1,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bgt"
+  [(set (pc)
+        (if_then_else (gt:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bg     %z0,%z1,%2"
+        : "bge    %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bge"
+  [(set (pc)
+        (if_then_else (ge:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                             (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bge    %z0,%z1,%2"
+        : "bg     %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bgtu"
+  [(set (pc)
+        (if_then_else (gtu:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                              (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bgu    %z0,%z1,%2"
+        : "bgeu   %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+(define_insn "*bgeu"
+  [(set (pc)
+        (if_then_else (geu:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+                              (match_operand:SI 1 "register_or_zero_operand" "rJ"))
+                      (label_ref (match_operand 2 "" ""))
+                      (pc)))]
+  ""
+{
+  return get_attr_length (insn) == 4
+        ? "bgeu   %z0,%z1,%2"
+        : "bgu    %z1,%z0,8\n\tbi     %2";
+}  
+  [(set_attr "type" "cbranch")])
+
+;; ---------------------------------
+;;               call 
+;; ---------------------------------
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (match_operand 1 "" ""))
+              (clobber (reg:SI RA_REGNUM))
+             ])]
+  ""
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (!CONSTANT_ADDRESS_P (addr))
+    XEXP (operands[0], 0) = force_reg (Pmode, addr);
+}")
+
+(define_insn "*call"
+  [(call (mem:SI (match_operand:SI 0 "call_operand" "r,s"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RA_REGNUM))]
+  ""
+  "@
+   call     %0
+   calli    %0"
+  [(set_attr "type" "call,icall")]  
+)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "")
+                         (match_operand 2 "" "")))
+              (clobber (reg:SI RA_REGNUM))
+             ])]
+  ""
+  "
+{
+  rtx addr = XEXP (operands[1], 0);
+  if (!CONSTANT_ADDRESS_P (addr))
+    XEXP (operands[1], 0) = force_reg (Pmode, addr); 
+}")
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "register_operand" "=r,r")
+        (call (mem:SI (match_operand:SI 1 "call_operand" "r,s"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RA_REGNUM))]
+  ""
+  "@
+   call     %1
+   calli    %1"
+  [(set_attr "type" "call,icall")]  
+)
+
+(define_insn "return_internal"
+  [(use (match_operand:SI 0 "register_operand" "r"))
+   (return)]
+  ""
+  "b        %0"
+  [(set_attr "type" "uibranch")]  
+)
+
+(define_insn "return"
+  [(return)]
+  "lm32_can_use_return ()"
+  "ret"
+  [(set_attr "type" "uibranch")]  
+) 
+
+;; ---------------------------------
+;;       switch/case statements 
+;; ---------------------------------
+  
+(define_expand "tablejump"
+  [(set (pc) (match_operand 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "
+{
+  rtx target = operands[0];
+  if (flag_pic)
+    {
+      /* For PIC, the table entry is relative to the start of the table.  */
+      rtx label = gen_reg_rtx (SImode);
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1]));
+      emit_insn (gen_addsi3 (target, operands[0], label));
+    }
+  emit_jump_insn (gen_tablejumpsi (target, operands[1]));
+  DONE;
+}")
+
+(define_insn "tablejumpsi"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "b        %0"
+  [(set_attr "type" "ubranch")]  
+)
+
+;; ---------------------------------
+;;            arithmetic 
+;; ---------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                 (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  ""
+  "@
+   add      %0, %z1, %2
+   addi     %0, %z1, %2"
+  [(set_attr "type" "arith")]  
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                  (match_operand:SI 2 "register_or_zero_operand" "rJ")))]
+  ""
+  "sub      %0, %z1, %z2"
+  [(set_attr "type" "arith")]  
+)
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (mult:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                 (match_operand:SI 2 "register_or_K_operand" "r,K")))]
+  "TARGET_MULTIPLY_ENABLED"
+  "@
+   mul      %0, %z1, %2
+   muli     %0, %z1, %2"
+  [(set_attr "type" "multiply")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (udiv:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIVIDE_ENABLED"
+  "divu     %0, %z1, %2"
+  [(set_attr "type" "divide")]
+)
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (umod:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIVIDE_ENABLED"
+  "modu     %0, %z1, %2"
+  [(set_attr "type" "divide")]
+)
+
+;; ---------------------------------
+;;      negation and inversion 
+;; ---------------------------------
+               
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (neg:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")))]
+  ""
+  "sub      %0, r0, %z1"
+  [(set_attr "type" "arith")]
+)      
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (not:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")))]
+  ""
+  "not      %0, %z1"
+  [(set_attr "type" "arith")]
+)
+
+;; ---------------------------------
+;;             logical 
+;; ---------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (and:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   and      %0, %z1, %2
+   andi     %0, %z1, %2"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ior:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   or       %0, %z1, %2
+   ori      %0, %z1, %2"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+                (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  ""
+  "@
+   xor      %0, %z1, %2
+   xori     %0, %z1, %2"
+  [(set_attr "type" "arith")]
+)
+
+(define_insn "*norsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(not:SI (ior:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+			(match_operand:SI 2 "register_or_L_operand" "r,L"))))]
+  ""
+  "@ 
+   nor      %0, %z1, %2
+   nori     %0, %z1, %2"     		
+  [(set_attr "type" "arith")]
+)                
+
+(define_insn "*xnorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(not:SI (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ,rJ")
+			(match_operand:SI 2 "register_or_L_operand" "r,L"))))]
+  ""
+  "@
+   xnor     %0, %z1, %2
+   xnori    %0, %z1, %2"     		
+  [(set_attr "type" "arith")]
+)                
+
+;; ---------------------------------
+;;              shifts 
+;; ---------------------------------
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ashift:SI (match_operand:SI 1 "register_or_zero_operand" "")
+                   (match_operand:SI 2 "register_or_L_operand" "")))]
+  ""
+{
+  if (!TARGET_BARREL_SHIFT_ENABLED)
+    {
+      if (!optimize_size 
+          && satisfies_constraint_L (operands[2])
+          && INTVAL (operands[2]) <= 8)
+        {
+          int i;
+          int shifts = INTVAL (operands[2]);
+          rtx one = GEN_INT (1);
+          
+          if (shifts == 0)
+            emit_move_insn (operands[0], operands[1]);
+          else
+            emit_insn (gen_addsi3 (operands[0], operands[1], operands[1]));
+          for (i = 1; i < shifts; i++) 
+            emit_insn (gen_addsi3 (operands[0], operands[0], operands[0]));
+          DONE;                  
+        }
+      else
+        FAIL;
+    }
+})  
+
+(define_insn "*ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ashift:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                   (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  "TARGET_BARREL_SHIFT_ENABLED"
+  "@ 
+   sl       %0, %z1, %2
+   sli      %0, %z1, %2"
+  [(set_attr "type" "shift")]
+)
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (ashiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "")
+                     (match_operand:SI 2 "register_or_L_operand" "")))]
+  ""
+{
+  if (!TARGET_BARREL_SHIFT_ENABLED)
+    {
+      if (!optimize_size 
+          && satisfies_constraint_L (operands[2])
+          && INTVAL (operands[2]) <= 8)
+        {
+          int i;
+          int shifts = INTVAL (operands[2]);
+          rtx one = GEN_INT (1);
+          
+          if (shifts == 0)
+            emit_move_insn (operands[0], operands[1]);
+          else
+            emit_insn (gen_ashrsi3_1bit (operands[0], operands[1], one));
+          for (i = 1; i < shifts; i++) 
+            emit_insn (gen_ashrsi3_1bit (operands[0], operands[0], one));
+          DONE;                  
+        }
+      else
+        FAIL;
+    }
+})  
+                       
+(define_insn "*ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (ashiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                     (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  "TARGET_BARREL_SHIFT_ENABLED"
+  "@
+   sr       %0, %z1, %2
+   sri      %0, %z1, %2"
+  [(set_attr "type" "shift")]
+)
+
+(define_insn "ashrsi3_1bit"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                     (match_operand:SI 2 "constant_M_operand" "M")))]
+  "!TARGET_BARREL_SHIFT_ENABLED"
+  "sri      %0, %z1, %2"
+  [(set_attr "type" "shift")]
+)
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (lshiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "")
+                     (match_operand:SI 2 "register_or_L_operand" "")))]
+  ""
+{
+  if (!TARGET_BARREL_SHIFT_ENABLED)
+    {
+      if (!optimize_size 
+          && satisfies_constraint_L (operands[2])
+          && INTVAL (operands[2]) <= 8)
+        {
+          int i;
+          int shifts = INTVAL (operands[2]);
+          rtx one = GEN_INT (1);
+          
+          if (shifts == 0)
+            emit_move_insn (operands[0], operands[1]);
+          else
+            emit_insn (gen_lshrsi3_1bit (operands[0], operands[1], one));
+          for (i = 1; i < shifts; i++) 
+            emit_insn (gen_lshrsi3_1bit (operands[0], operands[0], one));
+          DONE;                  
+        }
+      else
+        FAIL;
+    }
+})  
+
+(define_insn "*lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+        (lshiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ,rJ")
+                     (match_operand:SI 2 "register_or_L_operand" "r,L")))]
+  "TARGET_BARREL_SHIFT_ENABLED"
+  "@ 
+   sru      %0, %z1, %2
+   srui     %0, %z1, %2"
+  [(set_attr "type" "shift")]   
+)
+
+(define_insn "lshrsi3_1bit"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lshiftrt:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                     (match_operand:SI 2 "constant_M_operand" "M")))]
+  "!TARGET_BARREL_SHIFT_ENABLED"
+  "srui     %0, %z1, %2"
+  [(set_attr "type" "shift")]   
+)
+
+;; ---------------------------------
+;;     function entry / exit 
+;; ---------------------------------
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  lm32_expand_prologue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  lm32_expand_epilogue ();
+  DONE;
+}")
+
+;; ---------------------------------
+;;              nop 
+;; ---------------------------------
+
+(define_insn "nop"  
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "arith")]
+)
+
+;; ---------------------------------
+;;             blockage 
+;; ---------------------------------
+
+;; used to stop the scheduler from 
+;; scheduling code across certain boundaries
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
diff --git a/gcc/config/lm32/lm32.opt b/gcc/config/lm32/lm32.opt
new file mode 100644
index 000000000..9efecdd7c
--- /dev/null
+++ b/gcc/config/lm32/lm32.opt
@@ -0,0 +1,40 @@
+; Options for the Lattice Mico32 port of the compiler.
+; Contributed by Jon Beniston <jon@beniston.com>
+;
+; Copyright (C) 2009 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published
+; by the Free Software Foundation; either version 3, or (at your
+; option) any later version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+;  <http://www.gnu.org/licenses/>.  
+   
+mmultiply-enabled
+Target Report Mask(MULTIPLY_ENABLED)
+Enable multiply instructions
+
+mdivide-enabled
+Target Report Mask(DIVIDE_ENABLED)
+Enable divide and modulus instructions
+
+mbarrel-shift-enabled
+Target Report Mask(BARREL_SHIFT_ENABLED)
+Enable barrel shift instructions
+
+msign-extend-enabled
+Target Report Mask(SIGN_EXTEND_ENABLED)
+Enable sign extend instructions
+
+muser-enabled
+Target Report Mask(USER_ENABLED)
+Enable user-defined instructions
diff --git a/gcc/config/lm32/predicates.md b/gcc/config/lm32/predicates.md
new file mode 100644
index 000000000..7137c0114
--- /dev/null
+++ b/gcc/config/lm32/predicates.md
@@ -0,0 +1,77 @@
+;; Predicate definitions for Lattice Mico32 architecture.
+;; Contributed by Jon Beniston <jon@beniston.com>
+;;
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;; 
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;;  <http://www.gnu.org/licenses/>.  
+
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_predicate "constant_K_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_K (op)")))
+       
+(define_predicate "constant_L_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_L (op)")))
+
+(define_predicate "constant_M_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_M (op)")))
+
+(define_predicate "register_or_zero_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+(define_predicate "register_or_K_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "constant_K_operand")))
+         
+(define_predicate "register_or_L_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "constant_L_operand")))
+
+(define_predicate "register_or_int_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_code "const_int")))
+
+(define_predicate "reloc_operand"
+  (ior (ior (match_code "label_ref")
+            (match_code "symbol_ref"))
+       (match_code "const")))
+
+(define_predicate "symbolic_operand"
+  (ior (match_code "label_ref")
+       (match_code "symbol_ref")))
+       
+(define_predicate "no_pic_small_symbol"
+  (match_code "symbol_ref")
+{
+  return !flag_pic && SYMBOL_REF_SMALL_P (op);
+})
+
+(define_predicate "call_operand"
+  (ior (match_code "symbol_ref")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "movsi_rhs_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (ior (match_code "const_int")
+            (ior (match_test "satisfies_constraint_S (op)")
+                 (match_test "satisfies_constraint_Y (op)")))))
diff --git a/gcc/config/lm32/rtems.h b/gcc/config/lm32/rtems.h
new file mode 100644
index 000000000..44a527b8d
--- /dev/null
+++ b/gcc/config/lm32/rtems.h
@@ -0,0 +1,32 @@
+/* Definitions for rtems targeting a lm32 using ELF.
+   Copyright (C) 2009, Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc/config/lm32/sfp-machine.h b/gcc/config/lm32/sfp-machine.h
new file mode 100644
index 000000000..190384854
--- /dev/null
+++ b/gcc/config/lm32/sfp-machine.h
@@ -0,0 +1,51 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+#define __BYTE_ORDER __BIG_ENDIAN
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
diff --git a/gcc/config/lm32/t-fprules-softfp b/gcc/config/lm32/t-fprules-softfp
new file mode 100644
index 000000000..f99f51cfd
--- /dev/null
+++ b/gcc/config/lm32/t-fprules-softfp
@@ -0,0 +1,5 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := lm32/sfp-machine.h
diff --git a/gcc/config/lm32/t-lm32 b/gcc/config/lm32/t-lm32
new file mode 100644
index 000000000..ec9a18b73
--- /dev/null
+++ b/gcc/config/lm32/t-lm32
@@ -0,0 +1,2 @@
+# for multilib
+MULTILIB_OPTIONS = mbarrel-shift-enabled mmultiply-enabled mdivide-enabled msign-extend-enabled
diff --git a/gcc/config/lm32/uclinux-elf.h b/gcc/config/lm32/uclinux-elf.h
new file mode 100644
index 000000000..ce3689b17
--- /dev/null
+++ b/gcc/config/lm32/uclinux-elf.h
@@ -0,0 +1,84 @@
+/* Definitions for LM32 running Linux-based GNU systems using ELF
+   Copyright (C) 1993, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Philip Blundell <philb@gnu.org>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* elfos.h should have already been included.  Now just override
+   any conflicting definitions and add any extras.  */
+
+/* Run-time Target Specification.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION  fputs (" (LM32 GNU/Linux with ELF)", stderr);
+
+/* Do not assume anything about header files.  */
+#undef NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+/* Now we define the strings used to build the spec file.  */
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:-lc} "
+
+#define LIBGCC_SPEC "-lgcc"
+
+/* Provide a STARTFILE_SPEC appropriate for GNU/Linux.  Here we add
+   the GNU/Linux magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+                       %{!p:%{profile:gcrt1.o%s} \
+                         %{!profile:crt1.o%s}}}} \
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
+   the GNU/Linux magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU/Linux "finalizer" file, `crtn.o'.  */
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} \
+   %{static:-Bstatic} \
+   %{shared:-shared} \
+   %{symbolic:-Bsymbolic} \
+   %{rdynamic:-export-dynamic} \
+   -dynamic-linker /lib/ld-linux.so.2"
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{G*} %{!fno-PIC:-fPIC}"
+
diff --git a/gcc/config/lynx.h b/gcc/config/lynx.h
new file mode 100644
index 000000000..656dae563
--- /dev/null
+++ b/gcc/config/lynx.h
@@ -0,0 +1,177 @@
+/* Target independent definitions for LynxOS.
+   Copyright (C) 1993, 1994, 1995, 1996, 1999, 2000, 2002, 2003, 2004,
+   2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* In this file we set up defaults that can be chosen by
+   <target>/lynx.h files.  A target-specific lynx.h file can decide
+   either to define and override these definitions or to use them by
+   ensuring they are undefined at this point.  If we were to #undef
+   them here we might accidentally disable some target-specific
+   defines.  */
+
+#ifndef EXTRA_OS_LYNX_TARGET_SPECS
+# define EXTRA_OS_LYNX_TARGET_SPECS
+#endif
+
+#ifndef EXTRA_OS_LYNX_SPECS
+# define EXTRA_OS_LYNX_SPECS \
+  { "cpp_os_lynx", CPP_OS_LYNX_SPEC }, \
+  { "lib_os_lynx", LIB_OS_LYNX_SPEC }, \
+  { "link_os_lynx", LINK_OS_LYNX_SPEC }, \
+  { "startfile_os_lynx", STARTFILE_OS_LYNX_SPEC }, \
+  { "endfile_os_lynx", ENDFILE_OS_LYNX_SPEC }, \
+  EXTRA_OS_LYNX_TARGET_SPECS
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+# define SUBTARGET_EXTRA_SPECS EXTRA_OS_LYNX_SPECS
+#endif
+
+#ifndef CPP_SPEC
+# define CPP_SPEC "%(cpp_cpu) %(cpp_os_lynx)"
+#endif
+
+#ifndef LIB_SPEC
+# define LIB_SPEC "%(lib_os_lynx)"
+#endif
+
+#ifndef LINK_SPEC
+# define LINK_SPEC "%(link_os_lynx)"
+#endif
+
+#ifndef STARTFILE_SPEC
+# define STARTFILE_SPEC "%(startfile_os_lynx)"
+#endif
+
+#ifndef ENDFILE_SPEC
+# define ENDFILE_SPEC "%(endfile_os_lynx)"
+#endif
+
+#ifndef CPP_OS_LYNX_SPEC
+# define CPP_OS_LYNX_SPEC \
+"%{mthreads: \
+   %{mlegacy-threads: \
+     %ecannot use mthreads and mlegacy-threads together}} \
+ %{mthreads: -D_MULTITHREADED} \
+ %{mlegacy-threads: -D_THREADS_POSIX4ad4} \
+ -Asystem=lynx -Asystem=unix -D__Lynx__ -D__unix__"
+#endif
+
+#ifndef LIB_OS_LYNX_SPEC
+# define LIB_OS_LYNX_SPEC \
+"%{mlegacy-threads:-lposix-pre1c} -lm -lc"
+#endif
+
+/* We link static executables for LynxOS by default unless -mshared is
+   used when linking an executable.  Along the same line, we link to
+   shared libraries when linking a shared object by default unless
+   -static is used.
+
+   We have to pass in our -L options here otherwise the translated
+   startfile directories (%D) will take priority over this.
+   Furthermore since we have to pass in -L options here we have to
+   make sure that -L options provided by the user take priority over
+   everything we specify.  */
+
+#ifndef LINK_OS_LYNX_SPEC
+# define LINK_OS_LYNX_SPEC \
+"%{shared} %{static} \
+ %{mshared: %{static: %ecannot use mshared and static together}} \
+ %{!mshared: %{!shared: %{!static: -static}}} \
+ %{L*} \
+ %{mthreads: \
+   %{mshared: -L/lib/thread/shlib -rpath /lib/thread/shlib} \
+   %{shared: \
+     %{!static: -L/lib/thread/shlib -rpath /lib/thread/shlib} \
+   %{!mshared: -L/lib/thread}} \
+   %{shared: %{static: -L/lib/thread}}} \
+ %{!mthreads: \
+   %{mshared: -L/lib/shlib -rpath /lib/shlib} \
+   %{shared: -L/lib/shlib -rpath /lib/shlib}} \
+ %{mlegacy-threads:-lposix-pre1c} -lm -lc"
+#endif
+
+#ifndef STARTFILE_OS_LYNX_SPEC
+# define STARTFILE_OS_LYNX_SPEC \
+"%{!shared: \
+   %{!mthreads: \
+     %{p:gcrt1.o%s} %{pg:gcrt1.o%s} \
+     %{!p:%{!pg:crt1.o%s}}} \
+   %{mthreads: \
+     %{p:thread/gcrt1.o%s} %{pg:thread/gcrt1.o%s} \
+     %{!p:%{!pg:thread/crt1.o%s }}}}\
+ %{mthreads: thread/crti.o%s} %{!mthreads: crti.o%s} \
+ %{!shared: crtbegin.o%s} \
+ %{shared: crtbeginS.o%s}"
+#endif
+
+#ifndef ENDFILE_OS_LYNX_SPEC
+# define ENDFILE_OS_LYNX_SPEC \
+"%{!shared: crtend.o%s} \
+ %{shared: crtendS.o%s} \
+ %{mthreads: thread/crtn.o%s} %{!mthreads: crtn.o%s}"
+#endif
+
+/* Define the actual types of some ANSI-mandated types.  */
+
+#ifndef SIZE_TYPE
+# define SIZE_TYPE "unsigned int"
+#endif
+
+#ifndef  PTRDIFF_TYPE
+# define PTRDIFF_TYPE "int"
+#endif
+
+#ifndef  WCHAR_TYPE
+# define WCHAR_TYPE "long int"
+#endif
+
+#ifndef  WCHAR_TYPE_SIZE
+# define WCHAR_TYPE_SIZE BITS_PER_WORD
+#endif
+
+/* Define ASM_OUTPUT_ALIGN to use the .balign directive rather that
+   the .align directive with GAS.  */
+
+#ifndef ASM_OUTPUT_ALIGN
+# define ASM_OUTPUT_ALIGN(FILE, LOG) 			\
+  do							\
+    {							\
+      if ((LOG) != 0)					\
+	fprintf ((FILE), "\t.balign %d\n", 1 << (LOG));	\
+    }							\
+  while (0)
+#endif
+
+/* Keep the *_DEBUGGING_INFO defines from elfos.h except that stabs is
+   the default on LynxOS.  */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+# define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+#endif
+
+/* We have C++ support in our system headers.  */
+
+#ifndef NO_IMPLICIT_EXTERN_C
+# define NO_IMPLICIT_EXTERN_C
+#endif
+
+#ifndef TARGET_POSIX_IO
+# define TARGET_POSIX_IO
+#endif
diff --git a/gcc/config/lynx.opt b/gcc/config/lynx.opt
new file mode 100644
index 000000000..5b65bd9a0
--- /dev/null
+++ b/gcc/config/lynx.opt
@@ -0,0 +1,31 @@
+; Processor-independent options for LynxOS.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mlegacy-threads
+Target RejectNegative
+Support legacy multi-threading
+
+mshared
+Target RejectNegative
+Use shared libraries
+
+mthreads
+Target RejectNegative
+Support multi-threading
diff --git a/gcc/config/m32c/addsub.md b/gcc/config/m32c/addsub.md
new file mode 100644
index 000000000..3d24bc7ae
--- /dev/null
+++ b/gcc/config/m32c/addsub.md
@@ -0,0 +1,260 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; add, sub
+
+(define_insn "addqi3"
+  [(set (match_operand:QI 0 "mra_or_sp_operand"
+		  "=SdRhl,SdRhl,??Rmm,??Rmm, *Raa,*Raa,SdRhl,??Rmm")
+	(plus:QI (match_operand:QI 1 "mra_operand"
+		  "%0,0,0,0, 0,0,0,0")
+		 (match_operand:QI 2 "mrai_operand"
+		  "iSdRhl,?Rmm,iSdRhl,?Rmm, iSdRhl,?Rmm,*Raa,*Raa")))]
+  ""
+  "add.b\t%2,%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand"
+	 	  "=SdRhi,SdRhi,??Rmm,??Rmm, SdRhi,??Rmm, Rhi, Raw, Raw, !Rsp")
+	(plus:HI (match_operand:HI 1 "m32c_any_operand"
+		  "%0,0,0,0, 0,0, Raw, Rfb, Rfb, 0")
+		 (match_operand:HI 2 "m32c_any_operand"
+		  "IU2sSdRhi,?Rmm,IU2sSdRhi,?Rmm, IM2,IM2, IS2IU2, I00, IS1, i")))]
+  ""
+  "@
+   add.w\t%2,%0
+   add.w\t%2,%0
+   add.w\t%2,%0
+   add.w\t%2,%0
+   sub.w\t%m2,%0
+   sub.w\t%m2,%0
+   mova\t%d2[%1],%0
+   stc\t%1,%0
+   mova\t%D2[%1],%0
+   add.w\t%2,%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc,oszc,oszc,n,n,n,oszc")]
+  )
+
+(define_insn "addpsi3"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "=Rpi,Raa,SdRpi,SdRpi,Rsp*Rmm, Rpi,Rpi")
+	(plus:PSI (match_operand:PSI 1 "m32c_nonimmediate_operand" "0,0,0,0,0, Raa,Rad")
+		  (match_operand:PSI 2 "m32c_any_operand" "Is3,IS1,iSdRpi,?Rmm,i, i,IS2")))]
+  "TARGET_A24"
+  "@
+   add.l:q\t%2,%0
+   addx\t%2,%0
+   add.l\t%2,%0
+   add.l\t%2,%0
+   add.l\t%2,%0
+   mova\t%d2[%1],%0
+   mova\t%D2[%1],%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc,oszc,n,n")]
+  )
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (plus:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0")
+                 (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24 ||TARGET_A16"
+  ""
+  )
+
+(define_insn "addsi3_1"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,??Rmm,RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (plus:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0,0,0")
+                 (match_operand 2 "mrai_operand" "IU2,IU2,i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  "TARGET_A16"
+  "*
+  
+  switch (which_alternative)
+    { 
+    case 0:
+      return \"add.w %X2,%h0\;adcf.w %H0\";
+    case 1:
+      return \"add.w %X2,%h0\;adcf.w %H0\";
+    case 2:
+      if (GET_CODE (operands[2]) == SYMBOL_REF)
+        {
+          output_asm_insn (\"add.w #%%lo(%d2),%h0\",operands);
+          return \"adc.w #%%hi(%d2),%H0\";
+        }
+      else
+        {
+          output_asm_insn (\"add.w %X2,%h0\",operands);
+          operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+          return \"adc.w %X2,%H0\";
+        }
+    case 3:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    case 4:
+      output_asm_insn (\"add.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"adc.w %X2,%H0\";
+    case 5:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    case 6:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    case 7:
+      return \"add.w %h2,%h0\;adc.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x,x,x")]
+)
+
+(define_insn "addsi3_2"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (plus:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0")
+                 (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24"
+  "add.l\t%2,%0"
+  [(set_attr "flags" "oszc")]
+)
+
+(define_insn "subqi3"
+  [(set (match_operand:QI 0 "mra_or_sp_operand"
+		   "=SdRhl,SdRhl,??Rmm,??Rmm, Raa,Raa,SdRhl,??Rmm, *Rsp")
+	(minus:QI (match_operand:QI 1 "mra_operand"
+		   "0,0,0,0, 0,0,0,0, 0")
+		  (match_operand:QI 2 "mrai_operand"
+		   "iSdRhl,?Rmm,iSdRhl,?Rmm, iSdRhl,?Rmm,Raa,Raa, i")))]
+  ""
+  "sub.b\t%2,%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "mra_operand"
+		   "=SdRhi,SdRhi,??Rmm,??Rmm, SdRhi,??Rmm")
+	(minus:HI (match_operand:HI 1 "mras_operand"
+		   "0,0,0,0, 0,0")
+		  (match_operand:HI 2 "mrai_operand"
+		   "IU2SdRhi,?Rmm,IU2SdRhi,?Rmm, IM2,IM2")))]
+  ""
+  "@
+   sub.w\t%2,%0
+   sub.w\t%2,%0
+   sub.w\t%2,%0
+   sub.w\t%2,%0
+   add.w\t%m2,%0
+   add.w\t%m2,%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc,oszc,oszc")]
+  )
+
+(define_insn "subpsi3"
+  [(set (match_operand:PSI 0 "mra_operand" "=RpiSd,RpiSd,??Rmm,??Rmm")
+	(minus:PSI (match_operand:PSI 1 "mra_operand" "0,0,0,0")
+		   (match_operand:PSI 2 "mrai_operand" "iRpiSd,?Rmm,iRpiSd,?Rmm")))]
+  "TARGET_A24"
+  "sub.%&\t%2,%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (minus:SI (match_operand:SI 1 "mra_operand" "0,0,0,0")
+                  (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24 ||TARGET_A16"
+  ""
+)
+
+(define_insn "subsi3_1"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (minus:SI (match_operand:SI 1 "mra_operand" "0,0,0,0,0,0")
+                  (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  "TARGET_A16"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"sub.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"sbb.w %X2,%H0\";
+    case 1:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"sub.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"sbb.w %X2,%H0\";
+    case 3:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    case 4:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    case 5:
+      return \"sub.w %h2,%h0\;sbb.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+(define_insn "subsi3_2"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (minus:SI (match_operand:SI 1 "mra_operand" "0,0,0,0")
+                  (match_operand:SI 2 "mrai_operand" "iRsiSd,?Rmm,iRsiSd,?Rmm")))]
+  "TARGET_A24"
+  "sub.l\t%2,%0"
+  [(set_attr "flags" "oszc,oszc,oszc,oszc")]
+)
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "mra_operand" "=SdRhl,??Rmm")
+	(neg:QI (match_operand:QI 1 "mra_operand" "0,0")))]
+  ""
+  "neg.b\t%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "mra_operand" "=SdRhi,??Rmm")
+	(neg:HI (match_operand:HI 1 "mra_operand" "0,0")))]
+  ""
+  "neg.w\t%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+; We can negate an SImode by operating on the subparts.  GCC deals
+; with this itself for larger modes, but not SI.
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "mra_operand" "=SdR03,??Rmm")
+	(neg:SI (match_operand:SI 1 "mra_operand" "0,0")))]
+  ""
+  "not.w %h0 | not.w %H0 | add.w #1,%h0 | adcf.w %H0"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "absqi2"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,??Rmm")
+	(abs:QI (match_operand:QI 1 "mra_operand" "0,0")))]
+  ""
+  "abs.b\t%0"
+  [(set_attr "flags" "oszc")]
+  )
+
+(define_insn "abshi2"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+	(abs:HI (match_operand:HI 1 "mra_operand" "0,0")))]
+  ""
+  "abs.w\t%0"
+  [(set_attr "flags" "oszc")]
+  )
diff --git a/gcc/config/m32c/bitops.md b/gcc/config/m32c/bitops.md
new file mode 100644
index 000000000..3c8e8427b
--- /dev/null
+++ b/gcc/config/m32c/bitops.md
@@ -0,0 +1,422 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Bit-wise operations (and, ior, xor, shift)
+
+; On the R8C and M16C, "address" for bit instructions is usually (but
+; not always!) the *bit* address, not the *byte* address.  This
+; confuses gcc, so we avoid cases where gcc would produce the wrong
+; code.  We're left with absolute addresses and registers, and the odd
+; case of shifting a bit by a variable.
+
+; On the M32C, "address" for bit instructions is a regular address,
+; and the bit number is stored in a separate field.  Thus, we can let
+; gcc do more interesting things.  However, the M32C cannot set all
+; the bits in a 16-bit register, which the R8C/M16C can do.
+
+; However, it all means that we end up with two sets of patterns, one
+; for each chip.
+
+;;----------------------------------------------------------------------
+
+;; First off, all the ways we can set one bit, other than plain IOR.
+
+(define_insn "bset_qi"
+  [(set (match_operand:QI 0 "memsym_operand" "+Si")
+	(ior:QI (subreg:QI (ashift:HI (const_int 1)
+				      (subreg:QI (match_operand:HI 1 "a_qi_operand" "Raa") 0)) 0)
+		(match_operand:QI 2 "memsym_operand" "0")))]
+  "TARGET_A16"
+  "bset\t%0[%1]"
+  [(set_attr "flags" "n")]
+  )  
+
+(define_insn "bset_hi"
+  [(set (zero_extract:HI (match_operand:QI 0 "memsym_operand" "+Si")
+			 (const_int 1)
+			 (zero_extend:HI (subreg:QI (match_operand:HI 1 "a_qi_operand" "Raa") 0)))
+	(const_int 1))]
+  "TARGET_A16"
+  "bset\t%0[%1]"
+  [(set_attr "flags" "n")]
+  )  
+
+;;----------------------------------------------------------------------
+
+;; Now all the ways we can clear one bit, other than plain AND.
+
+; This is odd because the shift patterns use QI counts, but we can't
+; easily put QI in $aN without causing problems elsewhere.
+(define_insn "bclr_qi"
+  [(set (zero_extract:HI (match_operand:QI 0 "memsym_operand" "+Si")
+			 (const_int 1)
+			 (zero_extend:HI (subreg:QI (match_operand:HI 1 "a_qi_operand" "Raa") 0)))
+	(const_int 0))]
+  "TARGET_A16"
+  "bclr\t%0[%1]"
+  [(set_attr "flags" "n")]
+  )  
+
+
+;;----------------------------------------------------------------------
+
+;; Now the generic patterns.
+
+(define_insn "andqi3_16"
+  [(set (match_operand:QI 0 "mra_operand" "=Sp,Rqi,RhlSd,RhlSd,??Rmm,??Rmm")
+	(and:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		(match_operand 2 "mrai_operand" "Imb,Imb,iRhlSd,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A16"
+  "@
+   bclr\t%B2,%0
+   bclr\t%B2,%h0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0"
+  [(set_attr "flags" "n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "andhi3_16"
+  [(set (match_operand:HI 0 "mra_operand" "=Sp,Sp,Rhi,RhiSd,??Rmm,RhiSd,??Rmm")
+	(and:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "ImB,Imw,Imw,iRhiSd,?Rmm,?Rmm,iRhiSd")))]
+  "TARGET_A16"
+  "@
+   
+   bclr\t%B2,%0
+   bclr\t%B2-8,1+%0
+   bclr\t%B2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (and:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0")
+                (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"and.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"and.w %X2,%H0\";
+    case 1:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"and.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"and.w %X2,%H0\";
+    case 3:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    case 4:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    case 5:
+      return \"and.w %h2,%h0\;and.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+
+(define_insn "iorqi3_16"
+  [(set (match_operand:QI 0 "mra_operand" "=Sp,Rqi,RqiSd,??Rmm,RqiSd,??Rmm")
+	(ior:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		(match_operand:QI 2 "mrai_operand" "Ilb,Ilb,iRhlSd,iRhlSd,?Rmm,?Rmm")))]
+  "TARGET_A16"
+  "@
+   bset\t%B2,%0
+   bset\t%B2,%h0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0"
+  [(set_attr "flags" "n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "iorhi3_16"
+  [(set (match_operand:HI 0 "mra_operand" "=Sp,Sp,Rhi,RhiSd,RhiSd,??Rmm,??Rmm")
+	(ior:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "Ilb,Ilw,Ilw,iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A16"
+  "@
+   bset %B2,%0
+   bset\t%B2-8,1+%0
+   bset\t%B2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,sz,sz,sz,sz")]
+  )
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+(define_insn "andqi3_24"
+  [(set (match_operand:QI 0 "mra_operand" "=Sd,Rqi,RhlSd,RhlSd,??Rmm,??Rmm")
+	(and:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		(match_operand 2 "mrai_operand" "Imb,Imb,iRhlSd,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A24"
+  "@
+   bclr\t%B2,%0
+   bclr\t%B2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0
+   and.b\t%x2,%0"
+  [(set_attr "flags" "n,n,sz,sz,sz,sz")]
+  )
+
+(define_insn "andhi3_24"
+  [(set (match_operand:HI 0 "mra_operand" "=Sd,Sd,?Rhl,?Rhl,RhiSd,??Rmm,RhiSd,??Rmm")
+	(and:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "ImB,Imw,ImB,Imw,iRhiSd,?Rmm,?Rmm,iRhiSd")))]
+  "TARGET_A24"
+  "@
+   bclr\t%B2,%0
+   bclr\t%B2-8,1+%0
+   bclr\t%B2,%h0
+   bclr\t%B2-8,%H0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0
+   and.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,n,sz,sz,sz,sz")]
+  )
+
+
+
+(define_insn "iorqi3_24"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd,RqiSd,??Rmm,RqiSd,??Rmm")
+	(ior:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0")
+		(match_operand:QI 2 "mrai_operand" "Ilb,iRhlSd,iRhlSd,?Rmm,?Rmm")))]
+  "TARGET_A24"
+  "@
+   bset\t%B2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0
+   or.b\t%x2,%0"
+  [(set_attr "flags" "n,sz,sz,sz,sz")]
+  )
+
+(define_insn "iorhi3_24"
+  [(set (match_operand:HI 0 "mra_operand" "=Sd,Sd,?Rhl,?Rhl,RhiSd,RhiSd,??Rmm,??Rmm")
+	(ior:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0,0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "Ilb,Ilw,Ilb,Ilw,iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A24"
+  "@
+   bset\t%B2,%0
+   bset\t%B2-8,1+%0
+   bset\t%B2,%h0
+   bset\t%B2-8,%H0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0
+   or.w\t%X2,%0"
+  [(set_attr "flags" "n,n,n,n,sz,sz,sz,sz")]
+  )
+
+
+; ----------------------------------------------------------------------
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "mra_operand" "")
+	(and:QI (match_operand:QI 1 "mra_operand" "")
+		(match_operand:QI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_andqi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_andqi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "mra_operand" "")
+	(and:HI (match_operand:HI 1 "mra_operand" "")
+		(match_operand:HI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_andhi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_andhi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI 0 "mra_operand" "")
+	(ior:QI (match_operand:QI 1 "mra_operand" "")
+		(match_operand:QI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_iorqi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_iorqi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_expand "iorhi3"
+  [(set (match_operand:HI 0 "mra_operand" "")
+	(ior:HI (match_operand:HI 1 "mra_operand" "")
+		(match_operand:HI 2 "mrai_operand" "")))]
+  ""
+  "if (TARGET_A16)
+     emit_insn (gen_iorhi3_16 (operands[0], operands[1], operands[2]));
+   else
+     emit_insn (gen_iorhi3_24 (operands[0], operands[1], operands[2]));
+   DONE;"
+  )
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (ior:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0")
+                (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"or.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"or.w %X2,%H0\";
+    case 1:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"or.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"or.w %X2,%H0\";
+    case 3:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    case 4:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    case 5:
+      return \"or.w %h2,%h0\;or.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,RhlSd,??Rmm,??Rmm")
+	(xor:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0")
+		(match_operand:QI 2 "mrai_operand" "iRhlSd,?Rmm,iRhlSd,?Rmm")))]
+  ""
+  "xor.b\t%x2,%0"
+  [(set_attr "flags" "sz,sz,sz,sz")]
+  )
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm")
+	(xor:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0")
+		(match_operand:HI 2 "mrai_operand" "iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  ""
+  "xor.w\t%X2,%0"
+  [(set_attr "flags" "sz,sz,sz,sz")]
+  )
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm,??Rmm,RsiSd")
+        (xor:SI (match_operand:SI 1 "mra_operand" "%0,0,0,0,0,0")
+                (match_operand:SI 2 "mrai_operand" "i,?Rmm,i,RsiSd,?Rmm,RsiSd")))]
+  ""
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      output_asm_insn (\"xor.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"xor.w %X2,%H0\";
+    case 1:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    case 2:
+      output_asm_insn (\"xor.w %X2,%h0\",operands);
+      operands[2]= GEN_INT (INTVAL (operands[2]) >> 16);
+      return \"xor.w %X2,%H0\";
+    case 3:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    case 4:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    case 5:
+      return \"xor.w %h2,%h0\;xor.w %H2,%H0\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "flags" "x,x,x,x,x,x")]
+)
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,??Rmm")
+	(not:QI (match_operand:QI 1 "mra_operand" "0,0")))]
+  ""
+  "not.b\t%0"
+  [(set_attr "flags" "sz,sz")]
+  )
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+	(not:HI (match_operand:HI 1 "mra_operand" "0,0")))]
+  ""
+  "not.w\t%0"
+  [(set_attr "flags" "sz,sz")]
+  )
+
+; Optimizations using bit opcodes
+
+; We need this because combine only looks at three insns at a time,
+; and the bclr_qi pattern uses four - mov, shift, not, and.  GCC
+; should never expand this pattern, because it only shifts a constant
+; by a constant, so gcc should do that itself.
+(define_insn "shift1_qi"
+  [(set (match_operand:QI 0 "mra_operand" "=Rqi")
+	(ashift:QI (const_int 1)
+		   (match_operand 1 "const_int_operand" "In4")))]
+  ""
+  "mov.b\t#1,%0\n\tshl.b\t%1,%0"
+  )
+(define_insn "shift1_hi"
+  [(set (match_operand:HI 0 "mra_operand" "=Rhi")
+	(ashift:HI (const_int 1)
+		   (match_operand 1 "const_int_operand" "In4")))]
+  ""
+  "mov.w\t#1,%0\n\tshl.w\t%1,%0"
+  )
+
+; Generic insert-bit expander, needed so that we can use the bit
+; opcodes for volatile bitfields.
+
+(define_expand "insv"
+  [(set (zero_extract:HI (match_operand:HI 0 "mra_operand" "")
+			 (match_operand 1 "const_int_operand" "")
+			 (match_operand 2 "const_int_operand" ""))
+	(match_operand:HI 3 "const_int_operand" ""))]
+  ""
+  "if (m32c_expand_insv (operands))
+     FAIL;
+   DONE;"
+  )
diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md
new file mode 100644
index 000000000..e384d3c52
--- /dev/null
+++ b/gcc/config/m32c/blkmov.md
@@ -0,0 +1,242 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2006, 2007, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; various block move instructions
+
+;; R8C:
+;;  SMOVB - while (r3--) { *a1-- = *r1ha0--; } - memcpy
+;;  SMOVF - while (r3--) { *a1++ = *r1ha0++; } - memcpy
+;;  SSTR  - while (r3--) { *a1++ = [r0l,r0]; } - memset
+
+;; M32CM:
+;;  SCMPU - while (*a0 && *a0 != *a1) { a0++; a1++; } - strcmp
+;;  SIN   - while (r3--) { *a1++ = *a0; }
+;;  SMOVB - while (r3--) { *a1-- = *a0--; } - memcpy
+;;  SMOVF - while (r3--) { *a1++ = *a0++; } - memcpy
+;;  SMOVU - while (*a1++ = *a0++) ; - strcpy
+;;  SOUT  - while (r3--) { *a1 = *a0++; }
+;;  SSTR  - while (r3--) { *a1++ = [r0l,r0]; } - memset
+
+
+
+;; 0 = destination (mem:BLK ...)
+;; 1 = source (mem:BLK ...)
+;; 2 = count
+;; 3 = alignment
+(define_expand "movmemhi"
+  [(match_operand 0 "ap_operand" "")
+   (match_operand 1 "ap_operand" "")
+   (match_operand 2 "m32c_r3_operand" "")
+   (match_operand 3 "" "")
+   ]
+  ""
+  "if (m32c_expand_movmemhi(operands)) DONE; FAIL;"
+  )
+
+;; We can't use mode iterators for these because M16C uses r1h to extend
+;; the source address, for copying data from ROM to RAM.  We don't yet
+;; support that, but we need to zero our r1h, so the patterns differ.
+
+;; 0 = dest (out)
+;; 1 = src (out)
+;; 2 = count (out)
+;; 3 = dest (in)
+;; 4 = src (in)
+;; 5 = count (in)
+(define_insn "movmemhi_bhi_op"
+  [(set (mem:QI (match_operand:HI 3 "ap_operand" "0"))
+	(mem:QI (match_operand:HI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HI 0 "ap_operand" "=Ra1")
+	(plus:HI (match_dup 3)
+		  (zero_extend:HI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:HI 1 "ap_operand" "=Ra0")
+	(plus:HI (match_dup 4)
+		  (zero_extend:HI (match_dup 5))))
+   (use (reg:HI R1_REGNO))]
+  "TARGET_A16"
+  "mov.b:q\t#0,r1h\n\tsmovf.b\t; %0[0..%2-1]=r1h%1[]"
+  )
+(define_insn "movmemhi_bpsi_op"
+  [(set (mem:QI (match_operand:PSI 3 "ap_operand" "0"))
+	(mem:QI (match_operand:PSI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:PSI 0 "ap_operand" "=Ra1")
+	(plus:PSI (match_dup 3)
+		  (zero_extend:PSI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:PSI 1 "ap_operand" "=Ra0")
+	(plus:PSI (match_dup 4)
+		  (zero_extend:PSI (match_dup 5))))]
+  "TARGET_A24"
+  "smovf.b\t; %0[0..%2-1]=%1[]"
+  )
+(define_insn "movmemhi_whi_op"
+  [(set (mem:HI (match_operand:HI 3 "ap_operand" "0"))
+	(mem:HI (match_operand:HI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HI 0 "ap_operand" "=Ra1")
+	(plus:HI (match_dup 3)
+		  (zero_extend:HI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:HI 1 "ap_operand" "=Ra0")
+	(plus:HI (match_dup 4)
+		  (zero_extend:HI (match_dup 5))))
+   (use (reg:HI R1_REGNO))]
+  "TARGET_A16"
+  "mov.b:q\t#0,r1h\n\tsmovf.w\t; %0[0..%2-1]=r1h%1[]"
+  )
+(define_insn "movmemhi_wpsi_op"
+  [(set (mem:HI (match_operand:PSI 3 "ap_operand" "0"))
+	(mem:HI (match_operand:PSI 4 "ap_operand" "1")))
+   (set (match_operand:HI 2 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:PSI 0 "ap_operand" "=Ra1")
+	(plus:PSI (match_dup 3)
+		  (zero_extend:PSI (match_operand:HI 5 "m32c_r3_operand" "2"))))
+   (set (match_operand:PSI 1 "ap_operand" "=Ra0")
+	(plus:PSI (match_dup 4)
+		  (zero_extend:PSI (match_dup 5))))]
+  "TARGET_A24"
+  "smovf.w\t; %0[0..%2-1]=%1[]"
+  )
+
+
+
+;; 0 = destination (mem:BLK ...)
+;; 1 = number of bytes
+;; 2 = value to store
+;; 3 = alignment
+(define_expand "setmemhi"
+  [(match_operand 0 "ap_operand" "")
+   (match_operand 1 "m32c_r3_operand" "")
+   (match_operand 2 "m32c_r0_operand" "")
+   (match_operand 3 "" "")
+   ]
+  "TARGET_A24"
+  "if (m32c_expand_setmemhi(operands)) DONE; FAIL;"
+  )
+
+;; 0 = address (out)
+;; 1 = count (out)
+;; 2 = value (in)
+;; 3 = address (in)
+;; 4 = count (in)
+(define_insn "setmemhi_b<mode>_op"
+  [(set (mem:QI (match_operand:HPSI 3 "ap_operand" "0"))
+	(match_operand:QI 2 "m32c_r0_operand" "R0w"))
+   (set (match_operand:HI 1 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HPSI 0 "ap_operand" "=Ra1")
+	(plus:HPSI (match_dup 3)
+		  (zero_extend:HPSI (match_operand:HI 4 "m32c_r3_operand" "1"))))]
+  "TARGET_A24"
+  "sstr.b\t; %0[0..%1-1]=%2"
+  )
+
+(define_insn "setmemhi_w<mode>_op"
+  [(set (mem:HI (match_operand:HPSI 3 "ap_operand" "0"))
+	(match_operand:HI 2 "m32c_r0_operand" "R0w"))
+   (set (match_operand:HI 1 "m32c_r3_operand" "=R3w")
+	(const_int 0))
+   (set (match_operand:HPSI 0 "ap_operand" "=Ra1")
+	(plus:HPSI (match_dup 3)
+		  (zero_extend:HPSI (match_operand:HI 4 "m32c_r3_operand" "1"))))]
+  "TARGET_A24"
+  "sstr.w\t; %0[0..%1-1]=%2"
+  )
+
+
+;; SCMPU sets the flags according to the result of the string
+;; comparison.  GCC wants the result to be a signed value reflecting
+;; the result, which it then compares to zero.  Hopefully we can
+;; optimize that later (see peephole in cond.md).  Meanwhile, the
+;; strcmp builtin is expanded to a SCMPU followed by a flags-to-int
+;; pattern in cond.md.
+
+;; 0 = result:HI
+;; 1 = destination (mem:BLK ...)
+;; 2 = source (mem:BLK ...)
+;; 3 = alignment
+
+(define_expand "cmpstrsi"
+  [(match_operand:HI 0 "" "")
+   (match_operand 1 "ap_operand" "")
+   (match_operand 2 "ap_operand" "")
+   (match_operand 3 "" "")
+   ]
+  "TARGET_A24"
+  "if (m32c_expand_cmpstr(operands)) DONE; FAIL;"
+  )
+
+;; 0 = string1
+;; 1 = string2
+
+(define_insn "cmpstrhi_op"
+  [(set (reg:CC FLG_REGNO)
+	(compare:CC (mem:BLK (match_operand:PSI 0 "ap_operand" "Ra0"))
+		    (mem:BLK (match_operand:PSI 1 "ap_operand" "Ra1"))))
+   (clobber (match_operand:PSI 2 "ap_operand" "=0"))
+   (clobber (match_operand:PSI 3 "ap_operand" "=1"))]
+  "TARGET_A24"
+  "scmpu.b\t; flags := strcmp(*%0,*%1)"
+  [(set_attr "flags" "oszc")]
+  )
+
+
+
+;; Note that SMOVU leaves the address registers pointing *after*
+;; the NUL at the end of the string.  This is not what gcc expects; it
+;; expects the address registers to point *at* the NUL.  The expander
+;; must emit a suitable add insn.
+
+;; 0 = target: set to &NUL in dest
+;; 1 = destination (mem:BLK ...)
+;; 2 = source (mem:BLK ...)
+
+(define_expand "movstr"
+  [(match_operand 0 "m32c_nonimmediate_operand" "")
+   (match_operand 1 "ap_operand" "")
+   (match_operand 2 "ap_operand" "")
+   ]
+  "TARGET_A24"
+  "if (m32c_expand_movstr(operands)) DONE; FAIL;"
+  )
+
+;; 0 = dest (out)
+;; 1 = src (out) (clobbered)
+;; 2 = dest (in)
+;; 3 = src (in)
+(define_insn "movstr_op"
+  [(set (mem:BLK (match_operand:PSI 2 "ap_operand" "0"))
+	(mem:BLK (match_operand:PSI 3 "ap_operand" "1")))
+   (set (match_operand:PSI 0 "ap_operand" "=Ra1")
+	(plus:PSI (match_dup 2)
+		  (unspec:PSI [(const_int 0)] UNS_SMOVU)))
+   (set (match_operand:PSI 1 "ap_operand" "=Ra0")
+	(plus:PSI (match_dup 3)
+		  (unspec:PSI [(const_int 0)] UNS_SMOVU)))]
+  "TARGET_A24"
+  "smovu.b\t; while (*%2++ := *%3++) != 0"
+  [(set_attr "flags" "*")]
+  )
+  
diff --git a/gcc/config/m32c/cond.md b/gcc/config/m32c/cond.md
new file mode 100644
index 000000000..c751070e7
--- /dev/null
+++ b/gcc/config/m32c/cond.md
@@ -0,0 +1,293 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2008
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; conditionals - cmp, jcc, setcc, etc.
+
+; Special note about conditional instructions: GCC always emits the
+; compare right before the insn, which is good, because m32c's mov
+; insns modify the flags.  However, this means that any conditional
+; insn that may require reloading must be kept with its compare until
+; after reload finishes, else the reload insns might clobber the
+; flags.  Thus, these rules:
+;
+; * the cmp* expanders just save the operands in compare_op0 and
+;   compare_op1 via m32c_pend_compare.
+; * conditional insns that won't need reload can call
+;   m32c_unpend_compare before their expansion.
+; * other insns must expand to include the compare operands within,
+;   then split after reload to a separate compare and conditional.
+
+; Until support for relaxing is supported in gas, we must assume that
+; short labels won't reach, so we must use long labels.
+; Unfortunately, there aren't any conditional jumps with long labels,
+; so instead we invert the conditional and jump around a regular jump.
+
+; Note that we can, at some point in the future, add code to omit the
+; "cmp" portion of the insn if the preceding insn happened to set the
+; right flags already.  For example, a mov followed by a "cmp *,0" is
+; redundant; the move already set the Z flag.
+
+(define_insn_and_split "cbranch<mode>4"
+  [(set (pc) (if_then_else
+	      (match_operator 0 "m32c_cmp_operator"
+			      [(match_operand:QHPSI 1 "mra_operand" "RraSd")
+			       (match_operand:QHPSI 2 "mrai_operand" "iRraSd")])
+              (label_ref (match_operand 3 "" ""))
+	      (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 1)
+		 (match_dup 2)))
+   (set (pc) (if_then_else (match_op_dup 0 [(reg:CC FLG_REGNO) (const_int 0)])
+			   (label_ref (match_dup 3))
+			   (pc)))]
+  ""
+  )
+
+(define_insn "bcc_op"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(reg:CC FLG_REGNO) (const_int 0)])
+                      (label_ref (match_operand 1 ""))
+                      (pc)))]
+  ""
+  "j%c0\t%l1"
+  [(set_attr "flags" "n")]
+)
+
+(define_insn "stzx_16"
+  [(set (match_operand:QI 0 "mrai_operand" "=R0w,R0w,R0w")
+	(if_then_else:QI (eq (reg:CC FLG_REGNO) (const_int 0))
+			 (match_operand:QI 1 "const_int_operand" "i,i,0")
+			 (match_operand:QI 2 "const_int_operand" "i,0,i")))]
+  "TARGET_A16 && reload_completed"
+  "@
+   stzx\t%1,%2,%0
+   stz\t%1,%0
+   stnz\t%2,%0"
+  [(set_attr "flags" "n,n,n")]
+)
+
+(define_insn "stzx_24_<mode>"
+  [(set (match_operand:QHI 0 "mrai_operand" "=RraSd,RraSd,RraSd")
+	(if_then_else:QHI (eq (reg:CC FLG_REGNO) (const_int 0))
+			 (match_operand:QHI 1 "const_int_operand" "i,i,0")
+			 (match_operand:QHI 2 "const_int_operand" "i,0,i")))]
+  "TARGET_A24 && reload_completed"
+  "@
+   stzx.<bwl>\t%1,%2,%0
+   stz.<bwl>\t%1,%0
+   stnz.<bwl>\t%2,%0"
+  [(set_attr "flags" "n,n,n")])
+
+(define_insn_and_split "stzx_reversed_<mode>"
+  [(set (match_operand:QHI 0 "m32c_r0_operand" "=R0w")
+	(if_then_else:QHI (ne (reg:CC FLG_REGNO) (const_int 0))
+			 (match_operand:QHI 1 "const_int_operand" "")
+			 (match_operand:QHI 2 "const_int_operand" "")))]
+  "(TARGET_A24 || GET_MODE (operands[0]) == QImode) && reload_completed"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(if_then_else:QHI (eq (reg:CC FLG_REGNO) (const_int 0))
+		      (match_dup 2)
+		      (match_dup 1)))]
+  ""
+  )
+
+
+(define_insn "cmp<mode>_op"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_operand:QHPSI 0 "mra_operand" "RraSd")
+		 (match_operand:QHPSI 1 "mrai_operand" "RraSdi")))]
+  ""
+  "* return m32c_output_compare(insn, operands); "
+  [(set_attr "flags" "oszc")])
+
+;; m32c_conditional_register_usage changes the setcc_gen_code array to
+;; point to the _24 variants if needed.
+
+;; We need to keep the compare and conditional sets together through
+;; reload, because reload might need to add address reloads to the
+;; set, which would clobber the flags.  By keeping them together, the
+;; reloads get put before the compare, thus preserving the flags.
+
+;; These are the post-split patterns for the conditional sets.
+
+(define_insn "scc_op"
+  [(set (match_operand:QI 0 "register_operand" "=Rqi")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  "TARGET_A16 && reload_completed"
+  "* return m32c_scc_pattern(operands, GET_CODE (operands[1]));")
+
+(define_insn "scc_24_op"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd")
+	(match_operator:HI 1 "ordered_comparison_operator"
+	 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  "TARGET_A24 && reload_completed"
+  "sc%c1\t%0"
+  [(set_attr "flags" "n")]
+)
+
+;; These are the pre-split patterns for the conditional sets.
+
+(define_insn_and_split "cstore<mode>4"
+  [(set (match_operand:QI 0 "register_operand" "=Rqi")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(match_operand:QHPSI 2 "mra_operand" "RraSd")
+	  (match_operand:QHPSI 3 "mrai_operand" "RraSdi")]))]
+  "TARGET_A16"
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 2)
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  ""
+  [(set_attr "flags" "x")]
+)
+
+(define_insn_and_split "cstore<mode>4_24"
+  [(set (match_operand:HI 0 "mra_nopp_operand" "=RhiSd")
+	(match_operator:HI 1 "ordered_comparison_operator"
+	 [(match_operand:QHPSI 2 "mra_operand" "RraSd")
+	  (match_operand:QHPSI 3 "mrai_operand" "RraSdi")]))]
+  "TARGET_A24"
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 2)
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(match_op_dup 1 [(reg:CC FLG_REGNO) (const_int 0)]))]
+  ""
+  [(set_attr "flags" "x")]
+)
+
+(define_insn_and_split "movqicc_<code>_<mode>"
+  [(set (match_operand:QI 0 "register_operand" "=R0w")
+        (if_then_else:QI (eqne_cond:QI (match_operand:QHPSI 1 "mra_operand" "RraSd")
+				       (match_operand:QHPSI 2 "mrai_operand" "RraSdi"))
+			  (match_operand:QI 3 "const_int_operand" "")
+			  (match_operand:QI 4 "const_int_operand" "")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:QI (eqne_cond:QI (reg:CC FLG_REGNO) (const_int 0))
+			 (match_dup 3)
+			 (match_dup 4)))]
+  ""
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn_and_split "movhicc_<code>_<mode>"
+  [(set (match_operand:HI 0 "register_operand" "=R0w")
+        (if_then_else:HI (eqne_cond:HI (match_operand:QHPSI 1 "mra_operand" "RraSd")
+				       (match_operand:QHPSI 2 "mrai_operand" "RraSdi"))
+			  (match_operand:QI 3 "const_int_operand" "")
+			  (match_operand:QI 4 "const_int_operand" "")))]
+  "TARGET_A24"
+  "#"
+  "reload_completed"
+  [(set (reg:CC FLG_REGNO)
+	(compare (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+        (if_then_else:HI (eqne_cond:HI (reg:CC FLG_REGNO) (const_int 0))
+			 (match_dup 3)
+			 (match_dup 4)))]
+  ""
+  [(set_attr "flags" "x")]
+  )
+
+;; And these are the expanders.
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (if_then_else:QI (match_operand 1 "m32c_eqne_operator" "")
+                         (match_operand:QI 2 "const_int_operand" "")
+                         (match_operand:QI 3 "const_int_operand" "")))]
+  ""
+  "if (m32c_expand_movcc(operands))
+     FAIL;
+   DONE;"
+)
+
+(define_expand "movhicc"
+  [(set (match_operand:HI 0 "mra_operand" "")
+        (if_then_else:HI (match_operand 1 "m32c_eqne_operator" "")
+                         (match_operand:HI 2 "const_int_operand" "")
+                         (match_operand:HI 3 "const_int_operand" "")))]
+  "TARGET_A24"
+  "if (m32c_expand_movcc(operands))
+     FAIL;
+   DONE;"
+)
+
+
+;; CMP opcodes subtract two values, set the flags, and discard the
+;; value.  This pattern recovers the sign of the discarded value based
+;; on the flags.  Operand 0 is set to -1, 0, or 1.  This is used for
+;; the cmpstr pattern.  For optimal code, this should be removed if
+;; followed by a suitable CMP insn (see the peephole following).  This
+;; pattern is 7 bytes and 5 cycles.  If you don't need specific
+;; values, a 5/4 pattern can be made with SCGT and BMLT to set the
+;; appropriate bits.
+
+(define_insn "cond_to_int"
+  [(set (match_operand:HI 0 "mra_qi_operand" "=Rqi")
+	(if_then_else:HI (lt (reg:CC FLG_REGNO) (const_int 0))
+			 (const_int -1)
+			 (if_then_else:HI (eq (reg:CC FLG_REGNO) (const_int 0))
+					  (const_int 0)
+					  (const_int -1))))]
+  "TARGET_A24"
+  "sceq\t%0\n\tbmgt\t1,%h0\n\tdec.w\t%0"
+  [(set_attr "flags" "x")]
+  )  
+
+;; A cond_to_int followed by a compare against zero is essentially a
+;; no-op.  However, the result of the cond_to_int may be used by later
+;; insns, so make sure it's dead before deleting its set.
+
+(define_peephole2
+  [(set (match_operand:HI 0 "mra_qi_operand" "")
+	(if_then_else:HI (lt (reg:CC FLG_REGNO) (const_int 0))
+			 (const_int -1)
+			 (if_then_else:HI (eq (reg:CC FLG_REGNO) (const_int 0))
+					  (const_int 0)
+					  (const_int -1))))
+   (set (reg:CC FLG_REGNO)
+	(compare (match_operand:HI 1 "mra_qi_operand" "")
+		 (const_int 0)))
+   ]
+  "rtx_equal_p (operands[0], operands[1])
+     && dead_or_set_p (peep2_next_insn (1), operands[0])"
+  [(const_int 1)]
+  "")
diff --git a/gcc/config/m32c/jump.md b/gcc/config/m32c/jump.md
new file mode 100644
index 000000000..5f4718dfe
--- /dev/null
+++ b/gcc/config/m32c/jump.md
@@ -0,0 +1,135 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; jump, conditionals, calls, etc
+
+(define_insn "indirect_jump_16"
+  [(set (pc)
+       (match_operand:HI 0 "register_operand" "Rhi"))]
+  "TARGET_A16"
+;  "jmpi.a\t%0"
+  ; no 16-bit jmpi in r8c
+  "push.b #0 | push.w\t%0 | rts"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "indirect_jump_24"
+  [(set (pc)
+       (match_operand:PSI 0 "register_operand" "Rpi"))]
+  "TARGET_A24"
+  "jmpi.a\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_expand "indirect_jump"
+  [(match_operand 0 "register_operand" "")]
+  ""
+  "if (TARGET_A16)
+     emit_jump_insn (gen_indirect_jump_16(operands[0]));
+   else
+     emit_jump_insn (gen_indirect_jump_24(operands[0]));
+   DONE;"
+  )
+
+; We can replace this with jmp.s when gas supports relaxing.  m32c
+; opcodes are too complicated to try to compute their sizes here, it's
+; far easier (and more reliable) to let gas worry about it.
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmp.a\t%l0"
+  [(set_attr "flags" "n")]
+)
+
+; No 16-bit indirect calls on r8c/m16c.  */
+(define_insn "call"
+  [(call (match_operand:QI 0 "memory_operand" "Si,SaSb,?Rmm")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" ""))]
+  ""
+  "*
+switch (which_alternative) {
+  case 0:
+    {
+      HOST_WIDE_INT func_vect_num = 
+      current_function_special_page_vector(XEXP (operands[0], 0));
+      if (func_vect_num)
+        {
+          operands[3] = gen_rtx_CONST_INT (VOIDmode, func_vect_num);
+          return \"jsrs\t%3\";
+        }
+      else
+        return \"jsr.a\t%0\";
+    }
+  case 1: return TARGET_A16 ? \"push.w %a0 | jsr.a\tm32c_jsri16\" : \"jsri.a\t%a0\";
+  case 2: return \"jsri.a\t%a0\";
+  default: gcc_unreachable ();
+}"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "call_value"
+  [(set (match_operand 0 "m32c_return_operand" "=RdiRmmRpa,RdiRmmRpa,RdiRmmRpa")
+	(call (match_operand:QI 1 "memory_operand" "Si,SaSb,?Rmm")
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "immediate_operand" ""))]
+  ""
+  "*
+switch (which_alternative) {
+  case 0:
+    {
+      HOST_WIDE_INT func_vect_num = 
+      current_function_special_page_vector(XEXP (operands[1], 0));
+      if (func_vect_num)
+        {
+          operands[4] = gen_rtx_CONST_INT (VOIDmode, func_vect_num);
+          return \"jsrs\t%4\";
+        }
+      else
+        return \"jsr.a\t%1\";
+    }
+  case 1: return TARGET_A16 ? \"push.w %a1 | jsr.a\tm32c_jsri16\" : \"jsri.a\t%a1\";
+  case 2: return \"jsri.a\t%a1\";
+  default: gcc_unreachable ();
+}"
+  [(set_attr "flags" "x,x,x")]
+  )
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (const_int 0))
+              (match_operand 1 "" "")
+              (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+  DONE;
+}")
diff --git a/gcc/config/m32c/m32c-lib1.S b/gcc/config/m32c/m32c-lib1.S
new file mode 100644
index 000000000..9b6577871
--- /dev/null
+++ b/gcc/config/m32c/m32c-lib1.S
@@ -0,0 +1,231 @@
+/* libgcc routines for R8C/M16C/M32C
+   Copyright (C) 2005, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined(__r8c_cpu__) || defined(__m16c_cpu__)
+#define A16
+#define A(n,w) n
+#define W w
+#else
+#define A24
+#define A(n,w) w
+#define W l
+#endif
+
+
+#ifdef L__m32c_memregs
+
+/* Warning: these memory locations are used as a register bank.  They
+   *must* end up consecutive in any final executable, so you may *not*
+   use the otherwise obvious ".comm" directive to allocate space for
+   them. */
+
+	.bss
+	.global	mem0
+mem0:	.space	1
+	.global	mem1
+mem1:	.space	1
+	.global	mem2
+mem2:	.space	1
+	.global	mem3
+mem3:	.space	1
+	.global	mem4
+mem4:	.space	1
+	.global	mem5
+mem5:	.space	1
+	.global	mem6
+mem6:	.space	1
+	.global	mem7
+mem7:	.space	1
+	.global	mem8
+mem8:	.space	1
+	.global	mem9
+mem9:	.space	1
+	.global	mem10
+mem10:	.space	1
+	.global	mem11
+mem11:	.space	1
+	.global	mem12
+mem12:	.space	1
+	.global	mem13
+mem13:	.space	1
+	.global	mem14
+mem14:	.space	1
+	.global	mem15
+mem15:	.space	1
+
+#endif
+
+#ifdef L__m32c_eh_return
+	.text
+	.global __m32c_eh_return
+__m32c_eh_return:	
+
+	/* At this point, r0 has the stack adjustment, r1r3 has the
+	   address to return to.  The stack looks like this:
+
+	   old_ra
+	   old_fp
+	   <- unwound sp
+	   ...
+	   fb
+	   through
+	   r0
+	   <- sp
+
+	   What we need to do is restore all the registers, update the
+	   stack, and return to the right place.
+	*/
+
+	stc	sp,a0
+	
+	add.W	A(#16,#24),a0
+	/* a0 points to the current stack, just above the register
+	   save areas */
+
+	mov.w	a0,a1
+	exts.w	r0
+	sub.W	A(r0,r2r0),a1
+	sub.W	A(#3,#4),a1
+	/* a1 points to the new stack.  */
+
+	/* This is for the "rts" below.  */
+	mov.w	r1,[a1]
+#ifdef A16
+	mov.w	r2,r1
+	mov.b	r1l,2[a1]
+#else
+	mov.w	r2,2[a1]
+#endif
+
+	/* This is for the "popc sp" below.  */
+	mov.W	a1,[a0]	
+
+	popm    r0,r1,r2,r3,a0,a1,sb,fb
+	popc	sp
+	rts
+#endif
+
+/* SImode arguments for SI foo(SI,SI) functions.  */
+#ifdef A16
+#define SAL  5[fb]
+#define SAH  7[fb]
+#define SBL  9[fb]
+#define SBH 11[fb]
+#else
+#define SAL  8[fb]
+#define SAH 10[fb]
+#define SBL 12[fb]
+#define SBH 14[fb]
+#endif
+
+#ifdef L__m32c_mulsi3
+	.text
+	.global ___mulsi3
+___mulsi3:
+	enter	#0
+	push.w	r2
+	mov.w	SAL,r0
+	mulu.w	SBL,r0		/* writes to r2r0 */
+	mov.w	r0,mem0
+	mov.w	r2,mem2
+	mov.w	SAL,r0
+	mulu.w	SBH,r0		/* writes to r2r0 */
+	add.w	r0,mem2
+	mov.w	SAH,r0
+	mulu.w	SBL,r0		/* writes to r2r0 */
+	add.w	r0,mem2
+	pop.w	r2
+	exitd
+#endif
+
+#ifdef L__m32c_cmpsi2
+	.text
+	.global ___cmpsi2
+___cmpsi2:
+	enter	#0
+	cmp.w	SBH,SAH
+	jgt	cmpsi_gt
+	jlt	cmpsi_lt
+	cmp.w	SBL,SAL
+	jgt	cmpsi_gt
+	jlt	cmpsi_lt
+	mov.w	#1,r0
+	exitd
+cmpsi_gt:
+	mov.w	#2,r0
+	exitd
+cmpsi_lt:
+	mov.w	#0,r0
+	exitd
+#endif
+
+#ifdef L__m32c_ucmpsi2
+	.text
+	.global ___ucmpsi2
+___ucmpsi2:
+	enter	#0
+	cmp.w	SBH,SAH
+	jgtu	cmpsi_gt
+	jltu	cmpsi_lt
+	cmp.w	SBL,SAL
+	jgtu	cmpsi_gt
+	jltu	cmpsi_lt
+	mov.w	#1,r0
+	exitd
+cmpsi_gt:
+	mov.w	#2,r0
+	exitd
+cmpsi_lt:
+	mov.w	#0,r0
+	exitd
+#endif
+
+#ifdef L__m32c_jsri16
+	.text
+#ifdef A16
+	.global	m32c_jsri16
+m32c_jsri16:
+	add.w	#-1, sp
+
+	/* Read the address (16 bits) and return address (24 bits) off
+	the stack.  */
+	mov.w	4[sp], r0
+	mov.w	1[sp], r3
+	mov.b	3[sp], a0 /* This zero-extends, so the high byte has
+			     zero in it.  */
+
+	/* Write the return address, then new address, to the stack.  */
+	mov.w	a0, 1[sp] /* Just to get the zero in 2[sp].  */
+	mov.w	r0, 0[sp]
+	mov.w	r3, 3[sp]
+	mov.b	a0, 5[sp]
+
+	/* This "returns" to the target address, leaving the pending
+	return address on the stack.  */
+	rts
+#endif
+
+#endif
diff --git a/gcc/config/m32c/m32c-lib2-trapv.c b/gcc/config/m32c/m32c-lib2-trapv.c
new file mode 100644
index 000000000..bb61ceaf0
--- /dev/null
+++ b/gcc/config/m32c/m32c-lib2-trapv.c
@@ -0,0 +1,43 @@
+/* 16-bit trapping arithmetic routines for R8C/M16C/M32C
+   Copyright (C) 2009
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* See the comment by the definition of LIBGCC2_UNITS_PER_WORD in
+   m32c.h for why we are creating extra versions of some of the
+   functions defined in libgcc2.c.
+
+   Note - this file is separate from m32c-lib2.c so that the following
+   functions will appear in the their object file.  This is necessary
+   because they call abort() which is defined in the C library whereas
+   the functions in m32c-lib2.c are completely self sufficieent.  */
+
+#define LIBGCC2_UNITS_PER_WORD 2
+
+#define L_mulvsi3
+#define L_negvsi2
+#define L_addvsi3
+#define L_subvsi3
+
+#include "libgcc2.c"
diff --git a/gcc/config/m32c/m32c-lib2.c b/gcc/config/m32c/m32c-lib2.c
new file mode 100644
index 000000000..274affc4a
--- /dev/null
+++ b/gcc/config/m32c/m32c-lib2.c
@@ -0,0 +1,134 @@
+/* libgcc routines for R8C/M16C/M32C
+   Copyright (C) 2005, 2009
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+typedef          int  sint32_type   __attribute__ ((mode (SI)));
+typedef unsigned int  uint32_type   __attribute__ ((mode (SI)));
+typedef int           word_type     __attribute__ ((mode (__word__)));
+
+uint32_type udivmodsi4 (uint32_type, uint32_type, word_type);
+sint32_type __divsi3   (sint32_type, sint32_type);
+sint32_type __modsi3   (sint32_type, sint32_type);
+
+uint32_type
+udivmodsi4 (uint32_type num, uint32_type den, word_type modwanted)
+{
+  uint32_type bit = 1;
+  uint32_type res = 0;
+
+  while (den < num && bit && !(den & (1L << 31)))
+    {
+      den <<= 1;
+      bit <<= 1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>= 1;
+      den >>= 1;
+    }
+  if (modwanted)
+    return num;
+  return res;
+}
+
+sint32_type
+__divsi3 (sint32_type a, sint32_type b)
+{
+  word_type neg = 0;
+  sint32_type res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmodsi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+sint32_type
+__modsi3 (sint32_type a, sint32_type b)
+{
+  word_type neg = 0;
+  sint32_type res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmodsi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+/* See the comment by the definition of LIBGCC2_UNITS_PER_WORD in
+   m32c.h for why we are creating extra versions of some of the
+   functions defined in libgcc2.c.  */
+
+#define LIBGCC2_UNITS_PER_WORD 2
+
+#define L_clzsi2
+#define L_ctzsi2
+#define L_ffssi2
+#define L_paritysi2
+#define L_popcountsi2
+
+#include "libgcc2.c"
+
+uint32_type
+__udivsi3 (uint32_type a, uint32_type b)
+{
+  return udivmodsi4 (a, b, 0);
+}
+
+uint32_type
+__umoddi3 (uint32_type a, uint32_type b)
+{
+  return udivmodsi4 (a, b, 1);
+}
diff --git a/gcc/config/m32c/m32c-modes.def b/gcc/config/m32c/m32c-modes.def
new file mode 100644
index 000000000..80412104b
--- /dev/null
+++ b/gcc/config/m32c/m32c-modes.def
@@ -0,0 +1,29 @@
+/* Target-Specific Modes for R8C/M16C/M32C
+   Copyright (C) 2005, 2007
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* 24-bit pointers, whole */
+/*INT_MODE (PI, 3);*/
+
+/* 24-bit pointers, in 32-bit units */
+PARTIAL_INT_MODE (SI);
+
+/* 48-bit MULEX result */
+/* INT_MODE (MI, 6); */
diff --git a/gcc/config/m32c/m32c-pragma.c b/gcc/config/m32c/m32c-pragma.c
new file mode 100644
index 000000000..df976574b
--- /dev/null
+++ b/gcc/config/m32c/m32c-pragma.c
@@ -0,0 +1,135 @@
+/* M32C Pragma support
+   Copyright (C) 2004, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-common.h"
+#include "diagnostic-core.h"
+#include "cpplib.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "m32c-protos.h"
+#include "function.h"
+#define MAX_RECOG_OPERANDS 10
+#include "reload.h"
+#include "target.h"
+
+/* Implements the "GCC memregs" pragma.  This pragma takes only an
+   integer, and is semantically identical to the -memregs= command
+   line option.  The only catch is, the programmer should only use
+   this pragma at the beginning of the file (preferably, in some
+   project-wide header) to avoid ABI changes related to changing the
+   list of available "registers".  */
+static void
+m32c_pragma_memregs (cpp_reader * reader ATTRIBUTE_UNUSED)
+{
+  /* on off */
+  tree val;
+  enum cpp_ttype type;
+  HOST_WIDE_INT i;
+
+  type = pragma_lex (&val);
+  if (type == CPP_NUMBER)
+    {
+      if (host_integerp (val, 1))
+	{
+	  i = tree_low_cst (val, 1);
+
+	  type = pragma_lex (&val);
+	  if (type != CPP_EOF)
+	    warning (0, "junk at end of #pragma GCC memregs [0..16]");
+
+	  if (0 <= i && i <= 16)
+	    {
+	      if (!ok_to_change_target_memregs)
+		{
+		  warning (0,
+			   "#pragma GCC memregs must precede any function decls");
+		  return;
+		}
+	      target_memregs = i;
+	      m32c_conditional_register_usage ();
+	    }
+	  else
+	    {
+	      warning (0, "#pragma GCC memregs takes a number [0..16]");
+	    }
+
+	  return;
+	}
+    }
+
+  error ("#pragma GCC memregs takes a number [0..16]");
+}
+
+/* Implements the "pragma ADDRESS" pragma.  This pragma takes a
+   variable name and an address, and arranges for that variable to be
+   "at" that address.  The variable is also made volatile.  */
+static void
+m32c_pragma_address (cpp_reader * reader ATTRIBUTE_UNUSED)
+{
+  /* on off */
+  tree var, addr;
+  enum cpp_ttype type;
+
+  type = pragma_lex (&var);
+  if (type == CPP_NAME)
+    {
+      type = pragma_lex (&addr);
+      if (type == CPP_NUMBER)
+	{
+	  if (var != error_mark_node)
+	    {
+	      unsigned uaddr = tree_low_cst (addr, 1);
+	      m32c_note_pragma_address (IDENTIFIER_POINTER (var), uaddr);
+	    }
+
+	  type = pragma_lex (&var);
+	  if (type != CPP_EOF)
+	    {
+	      error ("junk at end of #pragma ADDRESS");
+	    }
+	  return;
+	}
+    }
+  error ("malformed #pragma ADDRESS variable address");
+}
+
+/* Implements REGISTER_TARGET_PRAGMAS.  */
+void
+m32c_register_pragmas (void)
+{
+  c_register_pragma ("GCC", "memregs", m32c_pragma_memregs);
+  c_register_pragma (NULL, "ADDRESS", m32c_pragma_address);
+  c_register_pragma (NULL, "address", m32c_pragma_address);
+
+  /* R8C and M16C have 16-bit pointers in a 20-bit address zpace.
+     M32C has 24-bit pointers in a 24-bit address space, so does not
+     need far pointers, but we accept the qualifier anyway, as a
+     no-op.  */
+  if (TARGET_A16)
+    c_register_addr_space ("__far", ADDR_SPACE_FAR);
+  else
+    c_register_addr_space ("__far", ADDR_SPACE_GENERIC);
+}
diff --git a/gcc/config/m32c/m32c-protos.h b/gcc/config/m32c/m32c-protos.h
new file mode 100644
index 000000000..f7c32e7e8
--- /dev/null
+++ b/gcc/config/m32c/m32c-protos.h
@@ -0,0 +1,103 @@
+/* Target Prototypes for R8C/M16C/M32C
+   Copyright (C) 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define MM enum machine_mode
+#define UINT unsigned int
+
+void m32c_conditional_register_usage (void);
+int  m32c_const_ok_for_constraint_p (HOST_WIDE_INT, char, const char *);
+UINT m32c_dwarf_frame_regnum (int);
+int  m32c_eh_return_data_regno (int);
+void m32c_emit_epilogue (void);
+void m32c_emit_prologue (void);
+int  m32c_epilogue_uses (int);
+int  m32c_extra_address_constraint (char, const char *);
+int  m32c_extra_memory_constraint (char, const char *);
+int  m32c_function_arg_regno_p (int);
+void m32c_init_expanders (void);
+int  m32c_initial_elimination_offset (int, int);
+void m32c_output_reg_pop (FILE *, int);
+void m32c_output_reg_push (FILE *, int);
+int  m32c_print_operand_punct_valid_p (int);
+unsigned int  m32c_push_rounding (int);
+int  m32c_reg_class_from_constraint (char, const char *);
+void m32c_register_pragmas (void);
+void m32c_note_pragma_address (const char *, unsigned);
+int  m32c_regno_ok_for_base_p (int);
+int  m32c_trampoline_alignment (void);
+int  m32c_trampoline_size (void);
+
+#ifdef RTX_CODE
+
+int  m32c_cannot_change_mode_class (MM, MM, int);
+int  m32c_class_max_nregs (int, MM);
+rtx  m32c_eh_return_stackadj_rtx (void);
+void m32c_emit_eh_epilogue (rtx);
+int  m32c_expand_cmpstr (rtx *);
+int  m32c_expand_insv (rtx *);
+int  m32c_expand_movcc (rtx *);
+int  m32c_expand_movmemhi (rtx *);
+int  m32c_expand_movstr (rtx *);
+void m32c_expand_neg_mulpsi3 (rtx *);
+int  m32c_expand_setmemhi (rtx *);
+int  m32c_extra_constraint_p (rtx, char, const char *);
+int  m32c_extra_constraint_p2 (rtx, char, const char *);
+int  m32c_hard_regno_nregs (int, MM);
+int  m32c_hard_regno_ok (int, MM);
+bool m32c_illegal_subreg_p (rtx);
+bool m32c_immd_dbl_mov (rtx *, MM);
+rtx  m32c_incoming_return_addr_rtx (void);
+int  m32c_legitimate_constant_p (rtx);
+int  m32c_legitimize_reload_address (rtx *, MM, int, int, int);
+int  m32c_limit_reload_class (MM, int);
+int  m32c_modes_tieable_p (MM, MM);
+bool m32c_mov_ok (rtx *, MM);
+char * m32c_output_compare (rtx, rtx *);
+int  m32c_preferred_output_reload_class (rtx, int);
+int  m32c_preferred_reload_class (rtx, int);
+int  m32c_prepare_move (rtx *, MM);
+int  m32c_prepare_shift (rtx *, int, int);
+void m32c_print_operand (FILE *, rtx, int);
+void m32c_print_operand_address (FILE *, rtx);
+int  m32c_reg_ok_for_base_p (rtx, int);
+enum reg_class m32c_regno_reg_class (int);
+rtx  m32c_return_addr_rtx (int);
+const char *m32c_scc_pattern (rtx *, RTX_CODE);
+int  m32c_secondary_reload_class (int, MM, rtx);
+int  m32c_split_move (rtx *, MM, int);
+int  m32c_split_psi_p (rtx *);
+int current_function_special_page_vector (rtx);
+
+#endif
+
+#ifdef TREE_CODE
+
+tree m32c_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+void m32c_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+bool m32c_promote_function_return (const_tree);
+int  m32c_special_page_vector_p (tree);
+void m32c_output_aligned_common (FILE *, tree, const char *,
+				 int, int, int);
+
+#endif
+
+#undef MM
+#undef UINT
diff --git a/gcc/config/m32c/m32c.abi b/gcc/config/m32c/m32c.abi
new file mode 100644
index 000000000..ee8324028
--- /dev/null
+++ b/gcc/config/m32c/m32c.abi
@@ -0,0 +1,132 @@
+   Target Definitions for R8C/M16C/M32C
+   Copyright (C) 2005, 2007
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.
+
+
+These are just some random notes I used during development of this
+port.  Please don't consider these to be "official" specifications,
+just additional information to help make the code easier to
+understand.
+
+
+Frame
+=====
+
+	+--------------------
+	| incoming args
+	+--------------------
+	| return Address
+osp ->	+--------------------
+	| saved fp
+fp ->	+--------------------
+	| local data
+	+--------------------
+	| saved regs
+	+--------------------
+	| outgoing args (opt)
+sp ->	+--------------------
+
+Argument Passing
+================
+
+r8c, m16c
+---------
+
+First arg may be passed in r1l or r1 if it (1) fits (QImode or
+HImode), (2) is named, and (3) is an integer or pointer type (no
+structs, floats, etc).  Otherwise, it's passed on the stack.
+
+Second arg may be passed in r2, same restrictions (but not QImode),
+even if the first arg is passed on the stack.
+
+Third and further args are passed on the stack.  No padding is used,
+stack "alignment" is 8 bits.
+
+m32cm, m32c
+-----------
+First arg may be passed in r0l or r0, same restrictions as above.
+
+Second and further args are passed on the stack.  Padding is used
+after QImode parameters (i.e. lower-addressed byte is the value,
+higher-addressed byte is the padding), stack "alignment" is 16 bits.
+
+
+Return Value
+============
+
+r8c, m16c
+---------
+
+QImode in r0l
+HImode in r0
+near pointer in r0
+(desired)
+SImode in r2r0
+far pointer in r2r0
+(actual)
+Anything bigger than 16 bits is returned in memory, at mem0 (mem0
+through mem15 are provided by libgcc.a)
+
+Aggregate values (regardless of size) are returned by pushing a
+pointer to a temporary area on the stack after the args are pushed.
+The function fills in this area with the value.  Note that this
+pointer on the stack does not affect how register arguments, if any,
+are configured.
+
+m32cm, m32c
+-----------
+Same.
+
+
+Registers Preserved Across Calls
+================================
+
+r8c, m16c
+---------
+sb, fb, sp (i.e. nearly all registers are call clobbered)
+
+m32cm, m32c
+-----------
+r1, r2, r3, a0, a1, sb, fb, sp
+(except when used for return values)
+
+
+Interrupt Handlers
+==================
+
+The stack frame is slightly different for interrupt handlers, because
+(1) we don't have a usable parent frame, and (2) we have to use
+special instructions to return and thus must save/restore everything
+differently.
+
+	+--------------------
+	| program state
+osp ->	+--------------------
+	| return address
+	+--------------------
+	| saved r0..fp (pushm)
+fp ->	+--------------------
+	| local data
+	+--------------------
+	| saved regs mem0..mem15
+	+--------------------
+	| outgoing args (opt)
+sp ->	+--------------------
+
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
new file mode 100644
index 000000000..9b4a5a95c
--- /dev/null
+++ b/gcc/config/m32c/m32c.c
@@ -0,0 +1,4860 @@
+/* Target Code for R8C/M16C/M32C
+   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "gimple.h"
+#include "df.h"
+
+/* Prototypes */
+
+/* Used by m32c_pushm_popm.  */
+typedef enum
+{
+  PP_pushm,
+  PP_popm,
+  PP_justcount
+} Push_Pop_Type;
+
+static bool m32c_function_needs_enter (void);
+static tree interrupt_handler (tree *, tree, tree, int, bool *);
+static tree function_vector_handler (tree *, tree, tree, int, bool *);
+static int interrupt_p (tree node);
+static int bank_switch_p (tree node);
+static int fast_interrupt_p (tree node);
+static int interrupt_p (tree node);
+static bool m32c_asm_integer (rtx, unsigned int, int);
+static int m32c_comp_type_attributes (const_tree, const_tree);
+static bool m32c_fixed_condition_code_regs (unsigned int *, unsigned int *);
+static struct machine_function *m32c_init_machine_status (void);
+static void m32c_insert_attributes (tree, tree *);
+static bool m32c_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool m32c_addr_space_legitimate_address_p (enum machine_mode, rtx, bool, addr_space_t);
+static rtx m32c_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static bool m32c_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				    const_tree, bool);
+static void m32c_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static unsigned int m32c_function_arg_boundary (enum machine_mode, const_tree);
+static int m32c_pushm_popm (Push_Pop_Type);
+static bool m32c_strict_argument_naming (CUMULATIVE_ARGS *);
+static rtx m32c_struct_value_rtx (tree, int);
+static rtx m32c_subreg (enum machine_mode, rtx, enum machine_mode, int);
+static int need_to_save (int);
+static rtx m32c_function_value (const_tree, const_tree, bool);
+static rtx m32c_libcall_value (enum machine_mode, const_rtx);
+
+/* Returns true if an address is specified, else false.  */
+static bool m32c_get_pragma_address (const char *varname, unsigned *addr);
+
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION    (SYMBOL_FLAG_MACH_DEP << 0)
+
+#define streq(a,b) (strcmp ((a), (b)) == 0)
+
+/* Internal support routines */
+
+/* Debugging statements are tagged with DEBUG0 only so that they can
+   be easily enabled individually, by replacing the '0' with '1' as
+   needed.  */
+#define DEBUG0 0
+#define DEBUG1 1
+
+#if DEBUG0
+/* This is needed by some of the commented-out debug statements
+   below.  */
+static char const *class_names[LIM_REG_CLASSES] = REG_CLASS_NAMES;
+#endif
+static int class_contents[LIM_REG_CLASSES][1] = REG_CLASS_CONTENTS;
+
+/* These are all to support encode_pattern().  */
+static char pattern[30], *patternp;
+static GTY(()) rtx patternr[30];
+#define RTX_IS(x) (streq (pattern, x))
+
+/* Some macros to simplify the logic throughout this file.  */
+#define IS_MEM_REGNO(regno) ((regno) >= MEM0_REGNO && (regno) <= MEM7_REGNO)
+#define IS_MEM_REG(rtx) (GET_CODE (rtx) == REG && IS_MEM_REGNO (REGNO (rtx)))
+
+#define IS_CR_REGNO(regno) ((regno) >= SB_REGNO && (regno) <= PC_REGNO)
+#define IS_CR_REG(rtx) (GET_CODE (rtx) == REG && IS_CR_REGNO (REGNO (rtx)))
+
+static int
+far_addr_space_p (rtx x)
+{
+  if (GET_CODE (x) != MEM)
+    return 0;
+#if DEBUG0
+  fprintf(stderr, "\033[35mfar_addr_space: "); debug_rtx(x);
+  fprintf(stderr, " = %d\033[0m\n", MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR);
+#endif
+  return MEM_ADDR_SPACE (x) == ADDR_SPACE_FAR;
+}
+
+/* We do most RTX matching by converting the RTX into a string, and
+   using string compares.  This vastly simplifies the logic in many of
+   the functions in this file.
+
+   On exit, pattern[] has the encoded string (use RTX_IS("...") to
+   compare it) and patternr[] has pointers to the nodes in the RTX
+   corresponding to each character in the encoded string.  The latter
+   is mostly used by print_operand().
+
+   Unrecognized patterns have '?' in them; this shows up when the
+   assembler complains about syntax errors.
+*/
+
+static void
+encode_pattern_1 (rtx x)
+{
+  int i;
+
+  if (patternp == pattern + sizeof (pattern) - 2)
+    {
+      patternp[-1] = '?';
+      return;
+    }
+
+  patternr[patternp - pattern] = x;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      *patternp++ = 'r';
+      break;
+    case SUBREG:
+      if (GET_MODE_SIZE (GET_MODE (x)) !=
+	  GET_MODE_SIZE (GET_MODE (XEXP (x, 0))))
+	*patternp++ = 'S';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case MEM:
+      *patternp++ = 'm';
+    case CONST:
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case SIGN_EXTEND:
+      *patternp++ = '^';
+      *patternp++ = 'S';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case ZERO_EXTEND:
+      *patternp++ = '^';
+      *patternp++ = 'Z';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case PLUS:
+      *patternp++ = '+';
+      encode_pattern_1 (XEXP (x, 0));
+      encode_pattern_1 (XEXP (x, 1));
+      break;
+    case PRE_DEC:
+      *patternp++ = '>';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case POST_INC:
+      *patternp++ = '<';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case LO_SUM:
+      *patternp++ = 'L';
+      encode_pattern_1 (XEXP (x, 0));
+      encode_pattern_1 (XEXP (x, 1));
+      break;
+    case HIGH:
+      *patternp++ = 'H';
+      encode_pattern_1 (XEXP (x, 0));
+      break;
+    case SYMBOL_REF:
+      *patternp++ = 's';
+      break;
+    case LABEL_REF:
+      *patternp++ = 'l';
+      break;
+    case CODE_LABEL:
+      *patternp++ = 'c';
+      break;
+    case CONST_INT:
+    case CONST_DOUBLE:
+      *patternp++ = 'i';
+      break;
+    case UNSPEC:
+      *patternp++ = 'u';
+      *patternp++ = '0' + XCINT (x, 1, UNSPEC);
+      for (i = 0; i < XVECLEN (x, 0); i++)
+	encode_pattern_1 (XVECEXP (x, 0, i));
+      break;
+    case USE:
+      *patternp++ = 'U';
+      break;
+    case PARALLEL:
+      *patternp++ = '|';
+      for (i = 0; i < XVECLEN (x, 0); i++)
+	encode_pattern_1 (XVECEXP (x, 0, i));
+      break;
+    case EXPR_LIST:
+      *patternp++ = 'E';
+      encode_pattern_1 (XEXP (x, 0));
+      if (XEXP (x, 1))
+	encode_pattern_1 (XEXP (x, 1));
+      break;
+    default:
+      *patternp++ = '?';
+#if DEBUG0
+      fprintf (stderr, "can't encode pattern %s\n",
+	       GET_RTX_NAME (GET_CODE (x)));
+      debug_rtx (x);
+      gcc_unreachable ();
+#endif
+      break;
+    }
+}
+
+static void
+encode_pattern (rtx x)
+{
+  patternp = pattern;
+  encode_pattern_1 (x);
+  *patternp = 0;
+}
+
+/* Since register names indicate the mode they're used in, we need a
+   way to determine which name to refer to the register with.  Called
+   by print_operand().  */
+
+static const char *
+reg_name_with_mode (int regno, enum machine_mode mode)
+{
+  int mlen = GET_MODE_SIZE (mode);
+  if (regno == R0_REGNO && mlen == 1)
+    return "r0l";
+  if (regno == R0_REGNO && (mlen == 3 || mlen == 4))
+    return "r2r0";
+  if (regno == R0_REGNO && mlen == 6)
+    return "r2r1r0";
+  if (regno == R0_REGNO && mlen == 8)
+    return "r3r1r2r0";
+  if (regno == R1_REGNO && mlen == 1)
+    return "r1l";
+  if (regno == R1_REGNO && (mlen == 3 || mlen == 4))
+    return "r3r1";
+  if (regno == A0_REGNO && TARGET_A16 && (mlen == 3 || mlen == 4))
+    return "a1a0";
+  return reg_names[regno];
+}
+
+/* How many bytes a register uses on stack when it's pushed.  We need
+   to know this because the push opcode needs to explicitly indicate
+   the size of the register, even though the name of the register
+   already tells it that.  Used by m32c_output_reg_{push,pop}, which
+   is only used through calls to ASM_OUTPUT_REG_{PUSH,POP}.  */
+
+static int
+reg_push_size (int regno)
+{
+  switch (regno)
+    {
+    case R0_REGNO:
+    case R1_REGNO:
+      return 2;
+    case R2_REGNO:
+    case R3_REGNO:
+    case FLG_REGNO:
+      return 2;
+    case A0_REGNO:
+    case A1_REGNO:
+    case SB_REGNO:
+    case FB_REGNO:
+    case SP_REGNO:
+      if (TARGET_A16)
+	return 2;
+      else
+	return 3;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int *class_sizes = 0;
+
+/* Given two register classes, find the largest intersection between
+   them.  If there is no intersection, return RETURNED_IF_EMPTY
+   instead.  */
+static int
+reduce_class (int original_class, int limiting_class, int returned_if_empty)
+{
+  int cc = class_contents[original_class][0];
+  int i, best = NO_REGS;
+  int best_size = 0;
+
+  if (original_class == limiting_class)
+    return original_class;
+
+  if (!class_sizes)
+    {
+      int r;
+      class_sizes = (int *) xmalloc (LIM_REG_CLASSES * sizeof (int));
+      for (i = 0; i < LIM_REG_CLASSES; i++)
+	{
+	  class_sizes[i] = 0;
+	  for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
+	    if (class_contents[i][0] & (1 << r))
+	      class_sizes[i]++;
+	}
+    }
+
+  cc &= class_contents[limiting_class][0];
+  for (i = 0; i < LIM_REG_CLASSES; i++)
+    {
+      int ic = class_contents[i][0];
+
+      if ((~cc & ic) == 0)
+	if (best_size < class_sizes[i])
+	  {
+	    best = i;
+	    best_size = class_sizes[i];
+	  }
+
+    }
+  if (best == NO_REGS)
+    return returned_if_empty;
+  return best;
+}
+
+/* Used by m32c_register_move_cost to determine if a move is
+   impossibly expensive.  */
+static bool
+class_can_hold_mode (reg_class_t rclass, enum machine_mode mode)
+{
+  /* Cache the results:  0=untested  1=no  2=yes */
+  static char results[LIM_REG_CLASSES][MAX_MACHINE_MODE];
+
+  if (results[(int) rclass][mode] == 0)
+    {
+      int r;
+      results[rclass][mode] = 1;
+      for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
+	if (in_hard_reg_set_p (reg_class_contents[(int) rclass], mode, r)
+	    && HARD_REGNO_MODE_OK (r, mode))
+	  {
+	    results[rclass][mode] = 2;
+	    break;
+	  }
+    }
+
+#if DEBUG0
+  fprintf (stderr, "class %s can hold %s? %s\n",
+	   class_names[(int) rclass], mode_name[mode],
+	   (results[rclass][mode] == 2) ? "yes" : "no");
+#endif
+  return results[(int) rclass][mode] == 2;
+}
+
+/* Run-time Target Specification.  */
+
+/* Memregs are memory locations that gcc treats like general
+   registers, as there are a limited number of true registers and the
+   m32c families can use memory in most places that registers can be
+   used.
+
+   However, since memory accesses are more expensive than registers,
+   we allow the user to limit the number of memregs available, in
+   order to try to persuade gcc to try harder to use real registers.
+
+   Memregs are provided by m32c-lib1.S.
+*/
+
+int target_memregs = 16;
+static bool target_memregs_set = FALSE;
+int ok_to_change_target_memregs = TRUE;
+
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION m32c_handle_option
+static bool
+m32c_handle_option (size_t code,
+		    const char *arg ATTRIBUTE_UNUSED,
+		    int value ATTRIBUTE_UNUSED)
+{
+  if (code == OPT_memregs_)
+    {
+      target_memregs_set = TRUE;
+      target_memregs = atoi (arg);
+    }
+  return TRUE;
+}
+
+/* Implements TARGET_OPTION_OVERRIDE.  */
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m32c_option_override
+
+static void
+m32c_option_override (void)
+{
+  /* We limit memregs to 0..16, and provide a default.  */
+  if (target_memregs_set)
+    {
+      if (target_memregs < 0 || target_memregs > 16)
+	error ("invalid target memregs value '%d'", target_memregs);
+    }
+  else
+    target_memregs = 16;
+
+  if (TARGET_A24)
+    flag_ivopts = 0;
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* r8c/m16c have no 16-bit indirect call, so thunks are involved.
+     This is always worse than an absolute call.  */
+  if (TARGET_A16)
+    flag_no_function_cse = 1;
+
+  /* This wants to put insns between compares and their jumps.  */
+  /* FIXME: The right solution is to properly trace the flags register
+     values, but that is too much work for stage 4.  */
+  flag_combine_stack_adjustments = 0;
+}
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE m32c_override_options_after_change
+
+static void
+m32c_override_options_after_change (void)
+{
+  if (TARGET_A16)
+    flag_no_function_cse = 1;
+}
+
+/* Defining data structures for per-function information */
+
+/* The usual; we set up our machine_function data.  */
+static struct machine_function *
+m32c_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Implements INIT_EXPANDERS.  We just set up to call the above
+   function.  */
+void
+m32c_init_expanders (void)
+{
+  init_machine_status = m32c_init_machine_status;
+}
+
+/* Storage Layout */
+
+/* Register Basics */
+
+/* Basic Characteristics of Registers */
+
+/* Whether a mode fits in a register is complex enough to warrant a
+   table.  */
+static struct
+{
+  char qi_regs;
+  char hi_regs;
+  char pi_regs;
+  char si_regs;
+  char di_regs;
+} nregs_table[FIRST_PSEUDO_REGISTER] =
+{
+  { 1, 1, 2, 2, 4 },		/* r0 */
+  { 0, 1, 0, 0, 0 },		/* r2 */
+  { 1, 1, 2, 2, 0 },		/* r1 */
+  { 0, 1, 0, 0, 0 },		/* r3 */
+  { 0, 1, 1, 0, 0 },		/* a0 */
+  { 0, 1, 1, 0, 0 },		/* a1 */
+  { 0, 1, 1, 0, 0 },		/* sb */
+  { 0, 1, 1, 0, 0 },		/* fb */
+  { 0, 1, 1, 0, 0 },		/* sp */
+  { 1, 1, 1, 0, 0 },		/* pc */
+  { 0, 0, 0, 0, 0 },		/* fl */
+  { 1, 1, 1, 0, 0 },		/* ap */
+  { 1, 1, 2, 2, 4 },		/* mem0 */
+  { 1, 1, 2, 2, 4 },		/* mem1 */
+  { 1, 1, 2, 2, 4 },		/* mem2 */
+  { 1, 1, 2, 2, 4 },		/* mem3 */
+  { 1, 1, 2, 2, 4 },		/* mem4 */
+  { 1, 1, 2, 2, 0 },		/* mem5 */
+  { 1, 1, 2, 2, 0 },		/* mem6 */
+  { 1, 1, 0, 0, 0 },		/* mem7 */
+};
+
+/* Implements TARGET_CONDITIONAL_REGISTER_USAGE.  We adjust the number
+   of available memregs, and select which registers need to be preserved
+   across calls based on the chip family.  */
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m32c_conditional_register_usage
+void
+m32c_conditional_register_usage (void)
+{
+  int i;
+
+  if (0 <= target_memregs && target_memregs <= 16)
+    {
+      /* The command line option is bytes, but our "registers" are
+	 16-bit words.  */
+      for (i = (target_memregs+1)/2; i < 8; i++)
+	{
+	  fixed_regs[MEM0_REGNO + i] = 1;
+	  CLEAR_HARD_REG_BIT (reg_class_contents[MEM_REGS], MEM0_REGNO + i);
+	}
+    }
+
+  /* M32CM and M32C preserve more registers across function calls.  */
+  if (TARGET_A24)
+    {
+      call_used_regs[R1_REGNO] = 0;
+      call_used_regs[R2_REGNO] = 0;
+      call_used_regs[R3_REGNO] = 0;
+      call_used_regs[A0_REGNO] = 0;
+      call_used_regs[A1_REGNO] = 0;
+    }
+}
+
+/* How Values Fit in Registers */
+
+/* Implements HARD_REGNO_NREGS.  This is complicated by the fact that
+   different registers are different sizes from each other, *and* may
+   be different sizes in different chip families.  */
+static int
+m32c_hard_regno_nregs_1 (int regno, enum machine_mode mode)
+{
+  if (regno == FLG_REGNO && mode == CCmode)
+    return 1;
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+
+  if (regno >= MEM0_REGNO && regno <= MEM7_REGNO)
+    return (GET_MODE_SIZE (mode) + 1) / 2;
+
+  if (GET_MODE_SIZE (mode) <= 1)
+    return nregs_table[regno].qi_regs;
+  if (GET_MODE_SIZE (mode) <= 2)
+    return nregs_table[regno].hi_regs;
+  if (regno == A0_REGNO && mode == SImode && TARGET_A16)
+    return 2;
+  if ((GET_MODE_SIZE (mode) <= 3 || mode == PSImode) && TARGET_A24)
+    return nregs_table[regno].pi_regs;
+  if (GET_MODE_SIZE (mode) <= 4)
+    return nregs_table[regno].si_regs;
+  if (GET_MODE_SIZE (mode) <= 8)
+    return nregs_table[regno].di_regs;
+  return 0;
+}
+
+int
+m32c_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  int rv = m32c_hard_regno_nregs_1 (regno, mode);
+  return rv ? rv : 1;
+}
+
+/* Implements HARD_REGNO_MODE_OK.  The above function does the work
+   already; just test its return value.  */
+int
+m32c_hard_regno_ok (int regno, enum machine_mode mode)
+{
+  return m32c_hard_regno_nregs_1 (regno, mode) != 0;
+}
+
+/* Implements MODES_TIEABLE_P.  In general, modes aren't tieable since
+   registers are all different sizes.  However, since most modes are
+   bigger than our registers anyway, it's easier to implement this
+   function that way, leaving QImode as the only unique case.  */
+int
+m32c_modes_tieable_p (enum machine_mode m1, enum machine_mode m2)
+{
+  if (GET_MODE_SIZE (m1) == GET_MODE_SIZE (m2))
+    return 1;
+
+#if 0
+  if (m1 == QImode || m2 == QImode)
+    return 0;
+#endif
+
+  return 1;
+}
+
+/* Register Classes */
+
+/* Implements REGNO_REG_CLASS.  */
+enum reg_class
+m32c_regno_reg_class (int regno)
+{
+  switch (regno)
+    {
+    case R0_REGNO:
+      return R0_REGS;
+    case R1_REGNO:
+      return R1_REGS;
+    case R2_REGNO:
+      return R2_REGS;
+    case R3_REGNO:
+      return R3_REGS;
+    case A0_REGNO:
+      return A0_REGS;
+    case A1_REGNO:
+      return A1_REGS;
+    case SB_REGNO:
+      return SB_REGS;
+    case FB_REGNO:
+      return FB_REGS;
+    case SP_REGNO:
+      return SP_REGS;
+    case FLG_REGNO:
+      return FLG_REGS;
+    default:
+      if (IS_MEM_REGNO (regno))
+	return MEM_REGS;
+      return ALL_REGS;
+    }
+}
+
+/* Implements REG_CLASS_FROM_CONSTRAINT.  Note that some constraints only match
+   for certain chip families.  */
+int
+m32c_reg_class_from_constraint (char c ATTRIBUTE_UNUSED, const char *s)
+{
+  if (memcmp (s, "Rsp", 3) == 0)
+    return SP_REGS;
+  if (memcmp (s, "Rfb", 3) == 0)
+    return FB_REGS;
+  if (memcmp (s, "Rsb", 3) == 0)
+    return SB_REGS;
+  if (memcmp (s, "Rcr", 3) == 0)
+    return TARGET_A16 ? CR_REGS : NO_REGS;
+  if (memcmp (s, "Rcl", 3) == 0)
+    return TARGET_A24 ? CR_REGS : NO_REGS;
+  if (memcmp (s, "R0w", 3) == 0)
+    return R0_REGS;
+  if (memcmp (s, "R1w", 3) == 0)
+    return R1_REGS;
+  if (memcmp (s, "R2w", 3) == 0)
+    return R2_REGS;
+  if (memcmp (s, "R3w", 3) == 0)
+    return R3_REGS;
+  if (memcmp (s, "R02", 3) == 0)
+    return R02_REGS;
+  if (memcmp (s, "R13", 3) == 0)
+    return R13_REGS;
+  if (memcmp (s, "R03", 3) == 0)
+    return R03_REGS;
+  if (memcmp (s, "Rdi", 3) == 0)
+    return DI_REGS;
+  if (memcmp (s, "Rhl", 3) == 0)
+    return HL_REGS;
+  if (memcmp (s, "R23", 3) == 0)
+    return R23_REGS;
+  if (memcmp (s, "Ra0", 3) == 0)
+    return A0_REGS;
+  if (memcmp (s, "Ra1", 3) == 0)
+    return A1_REGS;
+  if (memcmp (s, "Raa", 3) == 0)
+    return A_REGS;
+  if (memcmp (s, "Raw", 3) == 0)
+    return TARGET_A16 ? A_REGS : NO_REGS;
+  if (memcmp (s, "Ral", 3) == 0)
+    return TARGET_A24 ? A_REGS : NO_REGS;
+  if (memcmp (s, "Rqi", 3) == 0)
+    return QI_REGS;
+  if (memcmp (s, "Rad", 3) == 0)
+    return AD_REGS;
+  if (memcmp (s, "Rsi", 3) == 0)
+    return SI_REGS;
+  if (memcmp (s, "Rhi", 3) == 0)
+    return HI_REGS;
+  if (memcmp (s, "Rhc", 3) == 0)
+    return HC_REGS;
+  if (memcmp (s, "Rra", 3) == 0)
+    return RA_REGS;
+  if (memcmp (s, "Rfl", 3) == 0)
+    return FLG_REGS;
+  if (memcmp (s, "Rmm", 3) == 0)
+    {
+      if (fixed_regs[MEM0_REGNO])
+	return NO_REGS;
+      return MEM_REGS;
+    }
+
+  /* PSImode registers - i.e. whatever can hold a pointer.  */
+  if (memcmp (s, "Rpi", 3) == 0)
+    {
+      if (TARGET_A16)
+	return HI_REGS;
+      else
+	return RA_REGS; /* r2r0 and r3r1 can hold pointers.  */
+    }
+
+  /* We handle this one as an EXTRA_CONSTRAINT.  */
+  if (memcmp (s, "Rpa", 3) == 0)
+    return NO_REGS;
+
+  if (*s == 'R')
+    {
+      fprintf(stderr, "unrecognized R constraint: %.3s\n", s);
+      gcc_unreachable();
+    }
+
+  return NO_REGS;
+}
+
+/* Implements REGNO_OK_FOR_BASE_P.  */
+int
+m32c_regno_ok_for_base_p (int regno)
+{
+  if (regno == A0_REGNO
+      || regno == A1_REGNO || regno >= FIRST_PSEUDO_REGISTER)
+    return 1;
+  return 0;
+}
+
+#define DEBUG_RELOAD 0
+
+/* Implements PREFERRED_RELOAD_CLASS.  In general, prefer general
+   registers of the appropriate size.  */
+int
+m32c_preferred_reload_class (rtx x, int rclass)
+{
+  int newclass = rclass;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "\npreferred_reload_class for %s is ",
+	   class_names[rclass]);
+#endif
+  if (rclass == NO_REGS)
+    rclass = GET_MODE (x) == QImode ? HL_REGS : R03_REGS;
+
+  if (reg_classes_intersect_p (rclass, CR_REGS))
+    {
+      switch (GET_MODE (x))
+	{
+	case QImode:
+	  newclass = HL_REGS;
+	  break;
+	default:
+	  /*      newclass = HI_REGS; */
+	  break;
+	}
+    }
+
+  else if (newclass == QI_REGS && GET_MODE_SIZE (GET_MODE (x)) > 2)
+    newclass = SI_REGS;
+  else if (GET_MODE_SIZE (GET_MODE (x)) > 4
+	   && ~class_contents[rclass][0] & 0x000f)
+    newclass = DI_REGS;
+
+  rclass = reduce_class (rclass, newclass, rclass);
+
+  if (GET_MODE (x) == QImode)
+    rclass = reduce_class (rclass, HL_REGS, rclass);
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "%s\n", class_names[rclass]);
+  debug_rtx (x);
+
+  if (GET_CODE (x) == MEM
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
+    fprintf (stderr, "Glorm!\n");
+#endif
+  return rclass;
+}
+
+/* Implements PREFERRED_OUTPUT_RELOAD_CLASS.  */
+int
+m32c_preferred_output_reload_class (rtx x, int rclass)
+{
+  return m32c_preferred_reload_class (x, rclass);
+}
+
+/* Implements LIMIT_RELOAD_CLASS.  We basically want to avoid using
+   address registers for reloads since they're needed for address
+   reloads.  */
+int
+m32c_limit_reload_class (enum machine_mode mode, int rclass)
+{
+#if DEBUG_RELOAD
+  fprintf (stderr, "limit_reload_class for %s: %s ->",
+	   mode_name[mode], class_names[rclass]);
+#endif
+
+  if (mode == QImode)
+    rclass = reduce_class (rclass, HL_REGS, rclass);
+  else if (mode == HImode)
+    rclass = reduce_class (rclass, HI_REGS, rclass);
+  else if (mode == SImode)
+    rclass = reduce_class (rclass, SI_REGS, rclass);
+
+  if (rclass != A_REGS)
+    rclass = reduce_class (rclass, DI_REGS, rclass);
+
+#if DEBUG_RELOAD
+  fprintf (stderr, " %s\n", class_names[rclass]);
+#endif
+  return rclass;
+}
+
+/* Implements SECONDARY_RELOAD_CLASS.  QImode have to be reloaded in
+   r0 or r1, as those are the only real QImode registers.  CR regs get
+   reloaded through appropriately sized general or address
+   registers.  */
+int
+m32c_secondary_reload_class (int rclass, enum machine_mode mode, rtx x)
+{
+  int cc = class_contents[rclass][0];
+#if DEBUG0
+  fprintf (stderr, "\nsecondary reload class %s %s\n",
+	   class_names[rclass], mode_name[mode]);
+  debug_rtx (x);
+#endif
+  if (mode == QImode
+      && GET_CODE (x) == MEM && (cc & ~class_contents[R23_REGS][0]) == 0)
+    return QI_REGS;
+  if (reg_classes_intersect_p (rclass, CR_REGS)
+      && GET_CODE (x) == REG
+      && REGNO (x) >= SB_REGNO && REGNO (x) <= SP_REGNO)
+    return TARGET_A16 ? HI_REGS : A_REGS;
+  return NO_REGS;
+}
+
+/* Implements TARGET_CLASS_LIKELY_SPILLED_P.  A_REGS is needed for address
+   reloads.  */
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P m32c_class_likely_spilled_p
+
+static bool
+m32c_class_likely_spilled_p (reg_class_t regclass)
+{
+  if (regclass == A_REGS)
+    return true;
+
+  return (reg_class_size[(int) regclass] == 1);
+}
+
+/* Implements CLASS_MAX_NREGS.  We calculate this according to its
+   documented meaning, to avoid potential inconsistencies with actual
+   class definitions.  */
+int
+m32c_class_max_nregs (int regclass, enum machine_mode mode)
+{
+  int rn, max = 0;
+
+  for (rn = 0; rn < FIRST_PSEUDO_REGISTER; rn++)
+    if (class_contents[regclass][0] & (1 << rn))
+      {
+	int n = m32c_hard_regno_nregs (rn, mode);
+	if (max < n)
+	  max = n;
+      }
+  return max;
+}
+
+/* Implements CANNOT_CHANGE_MODE_CLASS.  Only r0 and r1 can change to
+   QI (r0l, r1l) because the chip doesn't support QI ops on other
+   registers (well, it does on a0/a1 but if we let gcc do that, reload
+   suffers).  Otherwise, we allow changes to larger modes.  */
+int
+m32c_cannot_change_mode_class (enum machine_mode from,
+			       enum machine_mode to, int rclass)
+{
+  int rn;
+#if DEBUG0
+  fprintf (stderr, "cannot change from %s to %s in %s\n",
+	   mode_name[from], mode_name[to], class_names[rclass]);
+#endif
+
+  /* If the larger mode isn't allowed in any of these registers, we
+     can't allow the change.  */
+  for (rn = 0; rn < FIRST_PSEUDO_REGISTER; rn++)
+    if (class_contents[rclass][0] & (1 << rn))
+      if (! m32c_hard_regno_ok (rn, to))
+	return 1;
+
+  if (to == QImode)
+    return (class_contents[rclass][0] & 0x1ffa);
+
+  if (class_contents[rclass][0] & 0x0005	/* r0, r1 */
+      && GET_MODE_SIZE (from) > 1)
+    return 0;
+  if (GET_MODE_SIZE (from) > 2)	/* all other regs */
+    return 0;
+
+  return 1;
+}
+
+/* Helpers for the rest of the file.  */
+/* TRUE if the rtx is a REG rtx for the given register.  */
+#define IS_REG(rtx,regno) (GET_CODE (rtx) == REG \
+			   && REGNO (rtx) == regno)
+/* TRUE if the rtx is a pseudo - specifically, one we can use as a
+   base register in address calculations (hence the "strict"
+   argument).  */
+#define IS_PSEUDO(rtx,strict) (!strict && GET_CODE (rtx) == REG \
+			       && (REGNO (rtx) == AP_REGNO \
+				   || REGNO (rtx) >= FIRST_PSEUDO_REGISTER))
+
+/* Implements CONST_OK_FOR_CONSTRAINT_P.  Currently, all constant
+   constraints start with 'I', with the next two characters indicating
+   the type and size of the range allowed.  */
+int
+m32c_const_ok_for_constraint_p (HOST_WIDE_INT value,
+				char c ATTRIBUTE_UNUSED, const char *str)
+{
+  /* s=signed u=unsigned n=nonzero m=minus l=log2able,
+     [sun] bits [SUN] bytes, p=pointer size
+     I[-0-9][0-9] matches that number */
+  if (memcmp (str, "Is3", 3) == 0)
+    {
+      return (-8 <= value && value <= 7);
+    }
+  if (memcmp (str, "IS1", 3) == 0)
+    {
+      return (-128 <= value && value <= 127);
+    }
+  if (memcmp (str, "IS2", 3) == 0)
+    {
+      return (-32768 <= value && value <= 32767);
+    }
+  if (memcmp (str, "IU2", 3) == 0)
+    {
+      return (0 <= value && value <= 65535);
+    }
+  if (memcmp (str, "IU3", 3) == 0)
+    {
+      return (0 <= value && value <= 0x00ffffff);
+    }
+  if (memcmp (str, "In4", 3) == 0)
+    {
+      return (-8 <= value && value && value <= 8);
+    }
+  if (memcmp (str, "In5", 3) == 0)
+    {
+      return (-16 <= value && value && value <= 16);
+    }
+  if (memcmp (str, "In6", 3) == 0)
+    {
+      return (-32 <= value && value && value <= 32);
+    }
+  if (memcmp (str, "IM2", 3) == 0)
+    {
+      return (-65536 <= value && value && value <= -1);
+    }
+  if (memcmp (str, "Ilb", 3) == 0)
+    {
+      int b = exact_log2 (value);
+      return (b >= 0 && b <= 7);
+    }
+  if (memcmp (str, "Imb", 3) == 0)
+    {
+      int b = exact_log2 ((value ^ 0xff) & 0xff);
+      return (b >= 0 && b <= 7);
+    }
+  if (memcmp (str, "ImB", 3) == 0)
+    {
+      int b = exact_log2 ((value ^ 0xffff) & 0xffff);
+      return (b >= 0 && b <= 7);
+    }
+  if (memcmp (str, "Ilw", 3) == 0)
+    {
+      int b = exact_log2 (value);
+      return (b >= 0 && b <= 15);
+    }
+  if (memcmp (str, "Imw", 3) == 0)
+    {
+      int b = exact_log2 ((value ^ 0xffff) & 0xffff);
+      return (b >= 0 && b <= 15);
+    }
+  if (memcmp (str, "I00", 3) == 0)
+    {
+      return (value == 0);
+    }
+  return 0;
+}
+
+#define A0_OR_PSEUDO(x) (IS_REG(x, A0_REGNO) || REGNO (x) >= FIRST_PSEUDO_REGISTER)
+
+/* Implements EXTRA_CONSTRAINT_STR (see next function too).  'S' is
+   for memory constraints, plus "Rpa" for PARALLEL rtx's we use for
+   call return values.  */
+int
+m32c_extra_constraint_p2 (rtx value, char c ATTRIBUTE_UNUSED, const char *str)
+{
+  encode_pattern (value);
+
+  if (far_addr_space_p (value))
+    {
+      if (memcmp (str, "SF", 2) == 0)
+	{
+	  return (   (RTX_IS ("mr")
+		      && A0_OR_PSEUDO (patternr[1])
+		      && GET_MODE (patternr[1]) == SImode)
+		     || (RTX_IS ("m+^Sri")
+			 && A0_OR_PSEUDO (patternr[4])
+			 && GET_MODE (patternr[4]) == HImode)
+		     || (RTX_IS ("m+^Srs")
+			 && A0_OR_PSEUDO (patternr[4])
+			 && GET_MODE (patternr[4]) == HImode)
+		     || (RTX_IS ("m+^S+ris")
+			 && A0_OR_PSEUDO (patternr[5])
+			 && GET_MODE (patternr[5]) == HImode)
+		     || RTX_IS ("ms")
+		     );
+	}
+      return 0;
+    }
+
+  if (memcmp (str, "Sd", 2) == 0)
+    {
+      /* This is the common "src/dest" address */
+      rtx r;
+      if (GET_CODE (value) == MEM && CONSTANT_P (XEXP (value, 0)))
+	return 1;
+      if (RTX_IS ("ms") || RTX_IS ("m+si"))
+	return 1;
+      if (RTX_IS ("m++rii"))
+	{
+	  if (REGNO (patternr[3]) == FB_REGNO
+	      && INTVAL (patternr[4]) == 0)
+	    return 1;
+	}
+      if (RTX_IS ("mr"))
+	r = patternr[1];
+      else if (RTX_IS ("m+ri") || RTX_IS ("m+rs") || RTX_IS ("m+r+si"))
+	r = patternr[2];
+      else
+	return 0;
+      if (REGNO (r) == SP_REGNO)
+	return 0;
+      return m32c_legitimate_address_p (GET_MODE (value), XEXP (value, 0), 1);
+    }
+  else if (memcmp (str, "Sa", 2) == 0)
+    {
+      rtx r;
+      if (RTX_IS ("mr"))
+	r = patternr[1];
+      else if (RTX_IS ("m+ri"))
+	r = patternr[2];
+      else
+	return 0;
+      return (IS_REG (r, A0_REGNO) || IS_REG (r, A1_REGNO));
+    }
+  else if (memcmp (str, "Si", 2) == 0)
+    {
+      return (RTX_IS ("mi") || RTX_IS ("ms") || RTX_IS ("m+si"));
+    }
+  else if (memcmp (str, "Ss", 2) == 0)
+    {
+      return ((RTX_IS ("mr")
+	       && (IS_REG (patternr[1], SP_REGNO)))
+	      || (RTX_IS ("m+ri") && (IS_REG (patternr[2], SP_REGNO))));
+    }
+  else if (memcmp (str, "Sf", 2) == 0)
+    {
+      return ((RTX_IS ("mr")
+	       && (IS_REG (patternr[1], FB_REGNO)))
+	      || (RTX_IS ("m+ri") && (IS_REG (patternr[2], FB_REGNO))));
+    }
+  else if (memcmp (str, "Sb", 2) == 0)
+    {
+      return ((RTX_IS ("mr")
+	       && (IS_REG (patternr[1], SB_REGNO)))
+	      || (RTX_IS ("m+ri") && (IS_REG (patternr[2], SB_REGNO))));
+    }
+  else if (memcmp (str, "Sp", 2) == 0)
+    {
+      /* Absolute addresses 0..0x1fff used for bit addressing (I/O ports) */
+      return (RTX_IS ("mi")
+	      && !(INTVAL (patternr[1]) & ~0x1fff));
+    }
+  else if (memcmp (str, "S1", 2) == 0)
+    {
+      return r1h_operand (value, QImode);
+    }
+  else if (memcmp (str, "SF", 2) == 0)
+    {
+      return 0;
+    }
+
+  gcc_assert (str[0] != 'S');
+
+  if (memcmp (str, "Rpa", 2) == 0)
+    return GET_CODE (value) == PARALLEL;
+
+  return 0;
+}
+
+/* This is for when we're debugging the above.  */
+int
+m32c_extra_constraint_p (rtx value, char c, const char *str)
+{
+  int rv = m32c_extra_constraint_p2 (value, c, str);
+#if DEBUG0
+  fprintf (stderr, "\nconstraint %.*s: %d\n", CONSTRAINT_LEN (c, str), str,
+	   rv);
+  debug_rtx (value);
+#endif
+  return rv;
+}
+
+/* Implements EXTRA_MEMORY_CONSTRAINT.  Currently, we only use strings
+   starting with 'S'.  */
+int
+m32c_extra_memory_constraint (char c, const char *str ATTRIBUTE_UNUSED)
+{
+  return c == 'S';
+}
+
+/* Implements EXTRA_ADDRESS_CONSTRAINT.  We reserve 'A' strings for these,
+   but don't currently define any.  */
+int
+m32c_extra_address_constraint (char c, const char *str ATTRIBUTE_UNUSED)
+{
+  return c == 'A';
+}
+
+/* STACK AND CALLING */
+
+/* Frame Layout */
+
+/* Implements RETURN_ADDR_RTX.  Note that R8C and M16C push 24 bits
+   (yes, THREE bytes) onto the stack for the return address, but we
+   don't support pointers bigger than 16 bits on those chips.  This
+   will likely wreak havoc with exception unwinding.  FIXME.  */
+rtx
+m32c_return_addr_rtx (int count)
+{
+  enum machine_mode mode;
+  int offset;
+  rtx ra_mem;
+
+  if (count)
+    return NULL_RTX;
+  /* we want 2[$fb] */
+
+  if (TARGET_A24)
+    {
+      /* It's four bytes */
+      mode = PSImode;
+      offset = 4;
+    }
+  else
+    {
+      /* FIXME: it's really 3 bytes */
+      mode = HImode;
+      offset = 2;
+    }
+
+  ra_mem =
+    gen_rtx_MEM (mode, plus_constant (gen_rtx_REG (Pmode, FP_REGNO), offset));
+  return copy_to_mode_reg (mode, ra_mem);
+}
+
+/* Implements INCOMING_RETURN_ADDR_RTX.  See comment above.  */
+rtx
+m32c_incoming_return_addr_rtx (void)
+{
+  /* we want [sp] */
+  return gen_rtx_MEM (PSImode, gen_rtx_REG (PSImode, SP_REGNO));
+}
+
+/* Exception Handling Support */
+
+/* Implements EH_RETURN_DATA_REGNO.  Choose registers able to hold
+   pointers.  */
+int
+m32c_eh_return_data_regno (int n)
+{
+  switch (n)
+    {
+    case 0:
+      return A0_REGNO;
+    case 1:
+      if (TARGET_A16)
+	return R3_REGNO;
+      else
+	return R1_REGNO;
+    default:
+      return INVALID_REGNUM;
+    }
+}
+
+/* Implements EH_RETURN_STACKADJ_RTX.  Saved and used later in
+   m32c_emit_eh_epilogue.  */
+rtx
+m32c_eh_return_stackadj_rtx (void)
+{
+  if (!cfun->machine->eh_stack_adjust)
+    {
+      rtx sa;
+
+      sa = gen_rtx_REG (Pmode, R0_REGNO);
+      cfun->machine->eh_stack_adjust = sa;
+    }
+  return cfun->machine->eh_stack_adjust;
+}
+
+/* Registers That Address the Stack Frame */
+
+/* Implements DWARF_FRAME_REGNUM and DBX_REGISTER_NUMBER.  Note that
+   the original spec called for dwarf numbers to vary with register
+   width as well, for example, r0l, r0, and r2r0 would each have
+   different dwarf numbers.  GCC doesn't support this, and we don't do
+   it, and gdb seems to like it this way anyway.  */
+unsigned int
+m32c_dwarf_frame_regnum (int n)
+{
+  switch (n)
+    {
+    case R0_REGNO:
+      return 5;
+    case R1_REGNO:
+      return 6;
+    case R2_REGNO:
+      return 7;
+    case R3_REGNO:
+      return 8;
+    case A0_REGNO:
+      return 9;
+    case A1_REGNO:
+      return 10;
+    case FB_REGNO:
+      return 11;
+    case SB_REGNO:
+      return 19;
+
+    case SP_REGNO:
+      return 12;
+    case PC_REGNO:
+      return 13;
+    default:
+      return DWARF_FRAME_REGISTERS + 1;
+    }
+}
+
+/* The frame looks like this:
+
+   ap -> +------------------------------
+         | Return address (3 or 4 bytes)
+	 | Saved FB (2 or 4 bytes)
+   fb -> +------------------------------
+	 | local vars
+         | register saves fb
+	 |        through r0 as needed
+   sp -> +------------------------------
+*/
+
+/* We use this to wrap all emitted insns in the prologue.  */
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* This maps register numbers to the PUSHM/POPM bitfield, and tells us
+   how much the stack pointer moves for each, for each cpu family.  */
+static struct
+{
+  int reg1;
+  int bit;
+  int a16_bytes;
+  int a24_bytes;
+} pushm_info[] =
+{
+  /* These are in reverse push (nearest-to-sp) order.  */
+  { R0_REGNO, 0x80, 2, 2 },
+  { R1_REGNO, 0x40, 2, 2 },
+  { R2_REGNO, 0x20, 2, 2 },
+  { R3_REGNO, 0x10, 2, 2 },
+  { A0_REGNO, 0x08, 2, 4 },
+  { A1_REGNO, 0x04, 2, 4 },
+  { SB_REGNO, 0x02, 2, 4 },
+  { FB_REGNO, 0x01, 2, 4 }
+};
+
+#define PUSHM_N (sizeof(pushm_info)/sizeof(pushm_info[0]))
+
+/* Returns TRUE if we need to save/restore the given register.  We
+   save everything for exception handlers, so that any register can be
+   unwound.  For interrupt handlers, we save everything if the handler
+   calls something else (because we don't know what *that* function
+   might do), but try to be a bit smarter if the handler is a leaf
+   function.  We always save $a0, though, because we use that in the
+   epilogue to copy $fb to $sp.  */
+static int
+need_to_save (int regno)
+{
+  if (fixed_regs[regno])
+    return 0;
+  if (crtl->calls_eh_return)
+    return 1;
+  if (regno == FP_REGNO)
+    return 0;
+  if (cfun->machine->is_interrupt
+      && (!cfun->machine->is_leaf
+	  || (regno == A0_REGNO
+	      && m32c_function_needs_enter ())
+	  ))
+    return 1;
+  if (df_regs_ever_live_p (regno)
+      && (!call_used_regs[regno] || cfun->machine->is_interrupt))
+    return 1;
+  return 0;
+}
+
+/* This function contains all the intelligence about saving and
+   restoring registers.  It always figures out the register save set.
+   When called with PP_justcount, it merely returns the size of the
+   save set (for eliminating the frame pointer, for example).  When
+   called with PP_pushm or PP_popm, it emits the appropriate
+   instructions for saving (pushm) or restoring (popm) the
+   registers.  */
+static int
+m32c_pushm_popm (Push_Pop_Type ppt)
+{
+  int reg_mask = 0;
+  int byte_count = 0, bytes;
+  int i;
+  rtx dwarf_set[PUSHM_N];
+  int n_dwarfs = 0;
+  int nosave_mask = 0;
+
+  if (crtl->return_rtx
+      && GET_CODE (crtl->return_rtx) == PARALLEL
+      && !(crtl->calls_eh_return || cfun->machine->is_interrupt))
+    {
+      rtx exp = XVECEXP (crtl->return_rtx, 0, 0);
+      rtx rv = XEXP (exp, 0);
+      int rv_bytes = GET_MODE_SIZE (GET_MODE (rv));
+
+      if (rv_bytes > 2)
+	nosave_mask |= 0x20;	/* PSI, SI */
+      else
+	nosave_mask |= 0xf0;	/* DF */
+      if (rv_bytes > 4)
+	nosave_mask |= 0x50;	/* DI */
+    }
+
+  for (i = 0; i < (int) PUSHM_N; i++)
+    {
+      /* Skip if neither register needs saving.  */
+      if (!need_to_save (pushm_info[i].reg1))
+	continue;
+
+      if (pushm_info[i].bit & nosave_mask)
+	continue;
+
+      reg_mask |= pushm_info[i].bit;
+      bytes = TARGET_A16 ? pushm_info[i].a16_bytes : pushm_info[i].a24_bytes;
+
+      if (ppt == PP_pushm)
+	{
+	  enum machine_mode mode = (bytes == 2) ? HImode : SImode;
+	  rtx addr;
+
+	  /* Always use stack_pointer_rtx instead of calling
+	     rtx_gen_REG ourselves.  Code elsewhere in GCC assumes
+	     that there is a single rtx representing the stack pointer,
+	     namely stack_pointer_rtx, and uses == to recognize it.  */
+	  addr = stack_pointer_rtx;
+
+	  if (byte_count != 0)
+	    addr = gen_rtx_PLUS (GET_MODE (addr), addr, GEN_INT (byte_count));
+
+	  dwarf_set[n_dwarfs++] =
+	    gen_rtx_SET (VOIDmode,
+			 gen_rtx_MEM (mode, addr),
+			 gen_rtx_REG (mode, pushm_info[i].reg1));
+	  F (dwarf_set[n_dwarfs - 1]);
+
+	}
+      byte_count += bytes;
+    }
+
+  if (cfun->machine->is_interrupt)
+    {
+      cfun->machine->intr_pushm = reg_mask & 0xfe;
+      reg_mask = 0;
+      byte_count = 0;
+    }
+
+  if (cfun->machine->is_interrupt)
+    for (i = MEM0_REGNO; i <= MEM7_REGNO; i++)
+      if (need_to_save (i))
+	{
+	  byte_count += 2;
+	  cfun->machine->intr_pushmem[i - MEM0_REGNO] = 1;
+	}
+
+  if (ppt == PP_pushm && byte_count)
+    {
+      rtx note = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (n_dwarfs + 1));
+      rtx pushm;
+
+      if (reg_mask)
+	{
+	  XVECEXP (note, 0, 0)
+	    = gen_rtx_SET (VOIDmode,
+			   stack_pointer_rtx,
+			   gen_rtx_PLUS (GET_MODE (stack_pointer_rtx),
+					 stack_pointer_rtx,
+					 GEN_INT (-byte_count)));
+	  F (XVECEXP (note, 0, 0));
+
+	  for (i = 0; i < n_dwarfs; i++)
+	    XVECEXP (note, 0, i + 1) = dwarf_set[i];
+
+	  pushm = F (emit_insn (gen_pushm (GEN_INT (reg_mask))));
+
+	  add_reg_note (pushm, REG_FRAME_RELATED_EXPR, note);
+	}
+
+      if (cfun->machine->is_interrupt)
+	for (i = MEM0_REGNO; i <= MEM7_REGNO; i++)
+	  if (cfun->machine->intr_pushmem[i - MEM0_REGNO])
+	    {
+	      if (TARGET_A16)
+		pushm = emit_insn (gen_pushhi_16 (gen_rtx_REG (HImode, i)));
+	      else
+		pushm = emit_insn (gen_pushhi_24 (gen_rtx_REG (HImode, i)));
+	      F (pushm);
+	    }
+    }
+  if (ppt == PP_popm && byte_count)
+    {
+      if (cfun->machine->is_interrupt)
+	for (i = MEM7_REGNO; i >= MEM0_REGNO; i--)
+	  if (cfun->machine->intr_pushmem[i - MEM0_REGNO])
+	    {
+	      if (TARGET_A16)
+		emit_insn (gen_pophi_16 (gen_rtx_REG (HImode, i)));
+	      else
+		emit_insn (gen_pophi_24 (gen_rtx_REG (HImode, i)));
+	    }
+      if (reg_mask)
+	emit_insn (gen_popm (GEN_INT (reg_mask)));
+    }
+
+  return byte_count;
+}
+
+/* Implements INITIAL_ELIMINATION_OFFSET.  See the comment above that
+   diagrams our call frame.  */
+int
+m32c_initial_elimination_offset (int from, int to)
+{
+  int ofs = 0;
+
+  if (from == AP_REGNO)
+    {
+      if (TARGET_A16)
+	ofs += 5;
+      else
+	ofs += 8;
+    }
+
+  if (to == SP_REGNO)
+    {
+      ofs += m32c_pushm_popm (PP_justcount);
+      ofs += get_frame_size ();
+    }
+
+  /* Account for push rounding.  */
+  if (TARGET_A24)
+    ofs = (ofs + 1) & ~1;
+#if DEBUG0
+  fprintf (stderr, "initial_elimination_offset from=%d to=%d, ofs=%d\n", from,
+	   to, ofs);
+#endif
+  return ofs;
+}
+
+/* Passing Function Arguments on the Stack */
+
+/* Implements PUSH_ROUNDING.  The R8C and M16C have byte stacks, the
+   M32C has word stacks.  */
+unsigned int
+m32c_push_rounding (int n)
+{
+  if (TARGET_R8C || TARGET_M16C)
+    return n;
+  return (n + 1) & ~1;
+}
+
+/* Passing Arguments in Registers */
+
+/* Implements TARGET_FUNCTION_ARG.  Arguments are passed partly in
+   registers, partly on stack.  If our function returns a struct, a
+   pointer to a buffer for it is at the top of the stack (last thing
+   pushed).  The first few real arguments may be in registers as
+   follows:
+
+   R8C/M16C:	arg1 in r1 if it's QI or HI (else it's pushed on stack)
+		arg2 in r2 if it's HI (else pushed on stack)
+		rest on stack
+   M32C:        arg1 in r0 if it's QI or HI (else it's pushed on stack)
+		rest on stack
+
+   Structs are not passed in registers, even if they fit.  Only
+   integer and pointer types are passed in registers.
+
+   Note that when arg1 doesn't fit in r1, arg2 may still be passed in
+   r2 if it fits.  */
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m32c_function_arg
+static rtx
+m32c_function_arg (CUMULATIVE_ARGS * ca,
+		   enum machine_mode mode, const_tree type, bool named)
+{
+  /* Can return a reg, parallel, or 0 for stack */
+  rtx rv = NULL_RTX;
+#if DEBUG0
+  fprintf (stderr, "func_arg %d (%s, %d)\n",
+	   ca->parm_num, mode_name[mode], named);
+  debug_tree (type);
+#endif
+
+  if (mode == VOIDmode)
+    return GEN_INT (0);
+
+  if (ca->force_mem || !named)
+    {
+#if DEBUG0
+      fprintf (stderr, "func arg: force %d named %d, mem\n", ca->force_mem,
+	       named);
+#endif
+      return NULL_RTX;
+    }
+
+  if (type && INTEGRAL_TYPE_P (type) && POINTER_TYPE_P (type))
+    return NULL_RTX;
+
+  if (type && AGGREGATE_TYPE_P (type))
+    return NULL_RTX;
+
+  switch (ca->parm_num)
+    {
+    case 1:
+      if (GET_MODE_SIZE (mode) == 1 || GET_MODE_SIZE (mode) == 2)
+	rv = gen_rtx_REG (mode, TARGET_A16 ? R1_REGNO : R0_REGNO);
+      break;
+
+    case 2:
+      if (TARGET_A16 && GET_MODE_SIZE (mode) == 2)
+	rv = gen_rtx_REG (mode, R2_REGNO);
+      break;
+    }
+
+#if DEBUG0
+  debug_rtx (rv);
+#endif
+  return rv;
+}
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE m32c_pass_by_reference
+static bool
+m32c_pass_by_reference (CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED,
+			const_tree type ATTRIBUTE_UNUSED,
+			bool named ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+/* Implements INIT_CUMULATIVE_ARGS.  */
+void
+m32c_init_cumulative_args (CUMULATIVE_ARGS * ca,
+			   tree fntype,
+			   rtx libname ATTRIBUTE_UNUSED,
+			   tree fndecl,
+			   int n_named_args ATTRIBUTE_UNUSED)
+{
+  if (fntype && aggregate_value_p (TREE_TYPE (fntype), fndecl))
+    ca->force_mem = 1;
+  else
+    ca->force_mem = 0;
+  ca->parm_num = 1;
+}
+
+/* Implements TARGET_FUNCTION_ARG_ADVANCE.  force_mem is set for
+   functions returning structures, so we always reset that.  Otherwise,
+   we only need to know the sequence number of the argument to know what
+   to do with it.  */
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m32c_function_arg_advance
+static void
+m32c_function_arg_advance (CUMULATIVE_ARGS * ca,
+			   enum machine_mode mode ATTRIBUTE_UNUSED,
+			   const_tree type ATTRIBUTE_UNUSED,
+			   bool named ATTRIBUTE_UNUSED)
+{
+  if (ca->force_mem)
+    ca->force_mem = 0;
+  else
+    ca->parm_num++;
+}
+
+/* Implements TARGET_FUNCTION_ARG_BOUNDARY.  */
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY m32c_function_arg_boundary
+static unsigned int
+m32c_function_arg_boundary (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    const_tree type ATTRIBUTE_UNUSED)
+{
+  return (TARGET_A16 ? 8 : 16);
+}
+
+/* Implements FUNCTION_ARG_REGNO_P.  */
+int
+m32c_function_arg_regno_p (int r)
+{
+  if (TARGET_A24)
+    return (r == R0_REGNO);
+  return (r == R1_REGNO || r == R2_REGNO);
+}
+
+/* HImode and PSImode are the two "native" modes as far as GCC is
+   concerned, but the chips also support a 32-bit mode which is used
+   for some opcodes in R8C/M16C and for reset vectors and such.  */
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE m32c_valid_pointer_mode
+static bool
+m32c_valid_pointer_mode (enum machine_mode mode)
+{
+  if (mode == HImode
+      || mode == PSImode
+      || mode == SImode
+      )
+    return 1;
+  return 0;
+}
+
+/* How Scalar Function Values Are Returned */
+
+/* Implements TARGET_LIBCALL_VALUE.  Most values are returned in $r0, or some
+   combination of registers starting there (r2r0 for longs, r3r1r2r0
+   for long long, r3r2r1r0 for doubles), except that that ABI
+   currently doesn't work because it ends up using all available
+   general registers and gcc often can't compile it.  So, instead, we
+   return anything bigger than 16 bits in "mem0" (effectively, a
+   memory location).  */
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE m32c_libcall_value
+
+static rtx
+m32c_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  /* return reg or parallel */
+#if 0
+  /* FIXME: GCC has difficulty returning large values in registers,
+     because that ties up most of the general registers and gives the
+     register allocator little to work with.  Until we can resolve
+     this, large values are returned in memory.  */
+  if (mode == DFmode)
+    {
+      rtx rv;
+
+      rv = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
+      XVECEXP (rv, 0, 0) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R0_REGNO),
+					      GEN_INT (0));
+      XVECEXP (rv, 0, 1) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R1_REGNO),
+					      GEN_INT (2));
+      XVECEXP (rv, 0, 2) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R2_REGNO),
+					      GEN_INT (4));
+      XVECEXP (rv, 0, 3) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (HImode,
+							   R3_REGNO),
+					      GEN_INT (6));
+      return rv;
+    }
+
+  if (TARGET_A24 && GET_MODE_SIZE (mode) > 2)
+    {
+      rtx rv;
+
+      rv = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
+      XVECEXP (rv, 0, 0) = gen_rtx_EXPR_LIST (VOIDmode,
+					      gen_rtx_REG (mode,
+							   R0_REGNO),
+					      GEN_INT (0));
+      return rv;
+    }
+#endif
+
+  if (GET_MODE_SIZE (mode) > 2)
+    return gen_rtx_REG (mode, MEM0_REGNO);
+  return gen_rtx_REG (mode, R0_REGNO);
+}
+
+/* Implements TARGET_FUNCTION_VALUE.  Functions and libcalls have the same
+   conventions.  */
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE m32c_function_value
+
+static rtx
+m32c_function_value (const_tree valtype,
+		     const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  /* return reg or parallel */
+  const enum machine_mode mode = TYPE_MODE (valtype);
+  return m32c_libcall_value (mode, NULL_RTX);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P m32c_function_value_regno_p
+
+static bool
+m32c_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == R0_REGNO || regno == MEM0_REGNO);
+}
+
+/* How Large Values Are Returned */
+
+/* We return structures by pushing the address on the stack, even if
+   we use registers for the first few "real" arguments.  */
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX m32c_struct_value_rtx
+static rtx
+m32c_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return 0;
+}
+
+/* Function Entry and Exit */
+
+/* Implements EPILOGUE_USES.  Interrupts restore all registers.  */
+int
+m32c_epilogue_uses (int regno ATTRIBUTE_UNUSED)
+{
+  if (cfun->machine->is_interrupt)
+    return 1;
+  return 0;
+}
+
+/* Implementing the Varargs Macros */
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING m32c_strict_argument_naming
+static bool
+m32c_strict_argument_naming (CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Trampolines for Nested Functions */
+
+/*
+   m16c:
+   1 0000 75C43412              mov.w   #0x1234,a0
+   2 0004 FC000000              jmp.a   label
+
+   m32c:
+   1 0000 BC563412              mov.l:s #0x123456,a0
+   2 0004 CC000000              jmp.a   label
+*/
+
+/* Implements TRAMPOLINE_SIZE.  */
+int
+m32c_trampoline_size (void)
+{
+  /* Allocate extra space so we can avoid the messy shifts when we
+     initialize the trampoline; we just write past the end of the
+     opcode.  */
+  return TARGET_A16 ? 8 : 10;
+}
+
+/* Implements TRAMPOLINE_ALIGNMENT.  */
+int
+m32c_trampoline_alignment (void)
+{
+  return 2;
+}
+
+/* Implements TARGET_TRAMPOLINE_INIT.  */
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m32c_trampoline_init
+static void
+m32c_trampoline_init (rtx m_tramp, tree fndecl, rtx chainval)
+{
+  rtx function = XEXP (DECL_RTL (fndecl), 0);
+
+#define A0(m,i) adjust_address (m_tramp, m, i)
+  if (TARGET_A16)
+    {
+      /* Note: we subtract a "word" because the moves want signed
+	 constants, not unsigned constants.  */
+      emit_move_insn (A0 (HImode, 0), GEN_INT (0xc475 - 0x10000));
+      emit_move_insn (A0 (HImode, 2), chainval);
+      emit_move_insn (A0 (QImode, 4), GEN_INT (0xfc - 0x100));
+      /* We use 16-bit addresses here, but store the zero to turn it
+	 into a 24-bit offset.  */
+      emit_move_insn (A0 (HImode, 5), function);
+      emit_move_insn (A0 (QImode, 7), GEN_INT (0x00));
+    }
+  else
+    {
+      /* Note that the PSI moves actually write 4 bytes.  Make sure we
+	 write stuff out in the right order, and leave room for the
+	 extra byte at the end.  */
+      emit_move_insn (A0 (QImode, 0), GEN_INT (0xbc - 0x100));
+      emit_move_insn (A0 (PSImode, 1), chainval);
+      emit_move_insn (A0 (QImode, 4), GEN_INT (0xcc - 0x100));
+      emit_move_insn (A0 (PSImode, 5), function);
+    }
+#undef A0
+}
+
+/* Implicit Calls to Library Routines */
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS m32c_init_libfuncs
+static void
+m32c_init_libfuncs (void)
+{
+  /* We do this because the M32C has an HImode operand, but the
+     M16C has an 8-bit operand.  Since gcc looks at the match data
+     and not the expanded rtl, we have to reset the optab so that
+     the right modes are found. */
+  if (TARGET_A24)
+    {
+      set_optab_handler (cstore_optab, QImode, CODE_FOR_cstoreqi4_24);
+      set_optab_handler (cstore_optab, HImode, CODE_FOR_cstorehi4_24);
+      set_optab_handler (cstore_optab, PSImode, CODE_FOR_cstorepsi4_24);
+    }
+}
+
+/* Addressing Modes */
+
+/* The r8c/m32c family supports a wide range of non-orthogonal
+   addressing modes, including the ability to double-indirect on *some*
+   of them.  Not all insns support all modes, either, but we rely on
+   predicates and constraints to deal with that.  */
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P m32c_legitimate_address_p
+bool
+m32c_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  int mode_adjust;
+  if (CONSTANT_P (x))
+    return 1;
+
+  if (TARGET_A16 && GET_MODE (x) != HImode && GET_MODE (x) != SImode)
+    return 0;
+  if (TARGET_A24 && GET_MODE (x) != PSImode)
+    return 0;
+
+  /* Wide references to memory will be split after reload, so we must
+     ensure that all parts of such splits remain legitimate
+     addresses.  */
+  mode_adjust = GET_MODE_SIZE (mode) - 1;
+
+  /* allowing PLUS yields mem:HI(plus:SI(mem:SI(plus:SI in m32c_split_move */
+  if (GET_CODE (x) == PRE_DEC
+      || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_MODIFY)
+    {
+      return (GET_CODE (XEXP (x, 0)) == REG
+	      && REGNO (XEXP (x, 0)) == SP_REGNO);
+    }
+
+#if 0
+  /* This is the double indirection detection, but it currently
+     doesn't work as cleanly as this code implies, so until we've had
+     a chance to debug it, leave it disabled.  */
+  if (TARGET_A24 && GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) != PLUS)
+    {
+#if DEBUG_DOUBLE
+      fprintf (stderr, "double indirect\n");
+#endif
+      x = XEXP (x, 0);
+    }
+#endif
+
+  encode_pattern (x);
+  if (RTX_IS ("r"))
+    {
+      /* Most indexable registers can be used without displacements,
+	 although some of them will be emitted with an explicit zero
+	 to please the assembler.  */
+      switch (REGNO (patternr[0]))
+	{
+	case A1_REGNO:
+	case SB_REGNO:
+	case FB_REGNO:
+	case SP_REGNO:
+	  if (TARGET_A16 && GET_MODE (x) == SImode)
+	    return 0;
+	case A0_REGNO:
+	  return 1;
+
+	default:
+	  if (IS_PSEUDO (patternr[0], strict))
+	    return 1;
+	  return 0;
+	}
+    }
+
+  if (TARGET_A16 && GET_MODE (x) == SImode)
+    return 0;
+
+  if (RTX_IS ("+ri"))
+    {
+      /* This is more interesting, because different base registers
+	 allow for different displacements - both range and signedness
+	 - and it differs from chip series to chip series too.  */
+      int rn = REGNO (patternr[1]);
+      HOST_WIDE_INT offs = INTVAL (patternr[2]);
+      switch (rn)
+	{
+	case A0_REGNO:
+	case A1_REGNO:
+	case SB_REGNO:
+	  /* The syntax only allows positive offsets, but when the
+	     offsets span the entire memory range, we can simulate
+	     negative offsets by wrapping.  */
+	  if (TARGET_A16)
+	    return (offs >= -65536 && offs <= 65535 - mode_adjust);
+	  if (rn == SB_REGNO)
+	    return (offs >= 0 && offs <= 65535 - mode_adjust);
+	  /* A0 or A1 */
+	  return (offs >= -16777216 && offs <= 16777215);
+
+	case FB_REGNO:
+	  if (TARGET_A16)
+	    return (offs >= -128 && offs <= 127 - mode_adjust);
+	  return (offs >= -65536 && offs <= 65535 - mode_adjust);
+
+	case SP_REGNO:
+	  return (offs >= -128 && offs <= 127 - mode_adjust);
+
+	default:
+	  if (IS_PSEUDO (patternr[1], strict))
+	    return 1;
+	  return 0;
+	}
+    }
+  if (RTX_IS ("+rs") || RTX_IS ("+r+si"))
+    {
+      rtx reg = patternr[1];
+
+      /* We don't know where the symbol is, so only allow base
+	 registers which support displacements spanning the whole
+	 address range.  */
+      switch (REGNO (reg))
+	{
+	case A0_REGNO:
+	case A1_REGNO:
+	  /* $sb needs a secondary reload, but since it's involved in
+	     memory address reloads too, we don't deal with it very
+	     well.  */
+	  /*    case SB_REGNO: */
+	  return 1;
+	default:
+	  if (IS_PSEUDO (reg, strict))
+	    return 1;
+	  return 0;
+	}
+    }
+  return 0;
+}
+
+/* Implements REG_OK_FOR_BASE_P.  */
+int
+m32c_reg_ok_for_base_p (rtx x, int strict)
+{
+  if (GET_CODE (x) != REG)
+    return 0;
+  switch (REGNO (x))
+    {
+    case A0_REGNO:
+    case A1_REGNO:
+    case SB_REGNO:
+    case FB_REGNO:
+    case SP_REGNO:
+      return 1;
+    default:
+      if (IS_PSEUDO (x, strict))
+	return 1;
+      return 0;
+    }
+}
+
+/* We have three choices for choosing fb->aN offsets.  If we choose -128,
+   we need one MOVA -128[fb],aN opcode and 16-bit aN displacements,
+   like this:
+       EB 4B FF    mova    -128[$fb],$a0
+       D8 0C FF FF mov.w:Q #0,-1[$a0]
+
+   Alternately, we subtract the frame size, and hopefully use 8-bit aN
+   displacements:
+       7B F4       stc $fb,$a0
+       77 54 00 01 sub #256,$a0
+       D8 08 01    mov.w:Q #0,1[$a0]
+
+   If we don't offset (i.e. offset by zero), we end up with:
+       7B F4       stc $fb,$a0
+       D8 0C 00 FF mov.w:Q #0,-256[$a0]
+
+   We have to subtract *something* so that we have a PLUS rtx to mark
+   that we've done this reload.  The -128 offset will never result in
+   an 8-bit aN offset, and the payoff for the second case is five
+   loads *if* those loads are within 256 bytes of the other end of the
+   frame, so the third case seems best.  Note that we subtract the
+   zero, but detect that in the addhi3 pattern.  */
+
+#define BIG_FB_ADJ 0
+
+/* Implements LEGITIMIZE_ADDRESS.  The only address we really have to
+   worry about is frame base offsets, as $fb has a limited
+   displacement range.  We deal with this by attempting to reload $fb
+   itself into an address register; that seems to result in the best
+   code.  */
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS m32c_legitimize_address
+static rtx
+m32c_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+#if DEBUG0
+  fprintf (stderr, "m32c_legitimize_address for mode %s\n", mode_name[mode]);
+  debug_rtx (x);
+  fprintf (stderr, "\n");
+#endif
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) == FB_REGNO
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && (INTVAL (XEXP (x, 1)) < -128
+	  || INTVAL (XEXP (x, 1)) > (128 - GET_MODE_SIZE (mode))))
+    {
+      /* reload FB to A_REGS */
+      rtx temp = gen_reg_rtx (Pmode);
+      x = copy_rtx (x);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, XEXP (x, 0)));
+      XEXP (x, 0) = temp;
+    }
+
+  return x;
+}
+
+/* Implements LEGITIMIZE_RELOAD_ADDRESS.  See comment above.  */
+int
+m32c_legitimize_reload_address (rtx * x,
+				enum machine_mode mode,
+				int opnum,
+				int type, int ind_levels ATTRIBUTE_UNUSED)
+{
+#if DEBUG0
+  fprintf (stderr, "\nm32c_legitimize_reload_address for mode %s\n",
+	   mode_name[mode]);
+  debug_rtx (*x);
+#endif
+
+  /* At one point, this function tried to get $fb copied to an address
+     register, which in theory would maximize sharing, but gcc was
+     *also* still trying to reload the whole address, and we'd run out
+     of address registers.  So we let gcc do the naive (but safe)
+     reload instead, when the above function doesn't handle it for
+     us.
+
+     The code below is a second attempt at the above.  */
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == REG
+      && REGNO (XEXP (*x, 0)) == FB_REGNO
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT
+      && (INTVAL (XEXP (*x, 1)) < -128
+	  || INTVAL (XEXP (*x, 1)) > (128 - GET_MODE_SIZE (mode))))
+    {
+      rtx sum;
+      int offset = INTVAL (XEXP (*x, 1));
+      int adjustment = -BIG_FB_ADJ;
+
+      sum = gen_rtx_PLUS (Pmode, XEXP (*x, 0),
+			  GEN_INT (adjustment));
+      *x = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - adjustment));
+      if (type == RELOAD_OTHER)
+	type = RELOAD_FOR_OTHER_ADDRESS;
+      push_reload (sum, NULL_RTX, &XEXP (*x, 0), NULL,
+		   A_REGS, Pmode, VOIDmode, 0, 0, opnum,
+		   (enum reload_type) type);
+      return 1;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (*x, 0), 0)) == REG
+      && REGNO (XEXP (XEXP (*x, 0), 0)) == FB_REGNO
+      && GET_CODE (XEXP (XEXP (*x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT
+      )
+    {
+      if (type == RELOAD_OTHER)
+	type = RELOAD_FOR_OTHER_ADDRESS;
+      push_reload (XEXP (*x, 0), NULL_RTX, &XEXP (*x, 0), NULL,
+		   A_REGS, Pmode, VOIDmode, 0, 0, opnum,
+		   (enum reload_type) type);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Implements LEGITIMATE_CONSTANT_P.  We split large constants anyway,
+   so we can allow anything.  */
+int
+m32c_legitimate_constant_p (rtx x ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+
+/* Return the appropriate mode for a named address pointer.  */
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE m32c_addr_space_pointer_mode
+static enum machine_mode
+m32c_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return TARGET_A24 ? PSImode : HImode;
+    case ADDR_SPACE_FAR:
+      return SImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the appropriate mode for a named address address.  */
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE m32c_addr_space_address_mode
+static enum machine_mode
+m32c_addr_space_address_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return TARGET_A24 ? PSImode : HImode;
+    case ADDR_SPACE_FAR:
+      return SImode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Like m32c_legitimate_address_p, except with named addresses.  */
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+  m32c_addr_space_legitimate_address_p
+static bool
+m32c_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+				      bool strict, addr_space_t as)
+{
+  if (as == ADDR_SPACE_FAR)
+    {
+      if (TARGET_A24)
+	return 0;
+      encode_pattern (x);
+      if (RTX_IS ("r"))
+	{
+	  if (GET_MODE (x) != SImode)
+	    return 0;
+	  switch (REGNO (patternr[0]))
+	    {
+	    case A0_REGNO:
+	      return 1;
+
+	    default:
+	      if (IS_PSEUDO (patternr[0], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("+^Sri"))
+	{
+	  int rn = REGNO (patternr[3]);
+	  HOST_WIDE_INT offs = INTVAL (patternr[4]);
+	  if (GET_MODE (patternr[3]) != HImode)
+	    return 0;
+	  switch (rn)
+	    {
+	    case A0_REGNO:
+	      return (offs >= 0 && offs <= 0xfffff);
+
+	    default:
+	      if (IS_PSEUDO (patternr[3], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("+^Srs"))
+	{
+	  int rn = REGNO (patternr[3]);
+	  if (GET_MODE (patternr[3]) != HImode)
+	    return 0;
+	  switch (rn)
+	    {
+	    case A0_REGNO:
+	      return 1;
+
+	    default:
+	      if (IS_PSEUDO (patternr[3], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("+^S+ris"))
+	{
+	  int rn = REGNO (patternr[4]);
+	  if (GET_MODE (patternr[4]) != HImode)
+	    return 0;
+	  switch (rn)
+	    {
+	    case A0_REGNO:
+	      return 1;
+
+	    default:
+	      if (IS_PSEUDO (patternr[4], strict))
+		return 1;
+	      return 0;
+	    }
+	}
+      if (RTX_IS ("s"))
+	{
+	  return 1;
+	}
+      return 0;
+    }
+
+  else if (as != ADDR_SPACE_GENERIC)
+    gcc_unreachable ();
+
+  return m32c_legitimate_address_p (mode, x, strict);
+}
+
+/* Like m32c_legitimate_address, except with named address support.  */
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS m32c_addr_space_legitimize_address
+static rtx
+m32c_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+				    addr_space_t as)
+{
+  if (as != ADDR_SPACE_GENERIC)
+    {
+#if DEBUG0
+      fprintf (stderr, "\033[36mm32c_addr_space_legitimize_address for mode %s\033[0m\n", mode_name[mode]);
+      debug_rtx (x);
+      fprintf (stderr, "\n");
+#endif
+
+      if (GET_CODE (x) != REG)
+	{
+	  x = force_reg (SImode, x);
+	}
+      return x;
+    }
+
+  return m32c_legitimize_address (x, oldx, mode);
+}
+
+/* Determine if one named address space is a subset of another.  */
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P m32c_addr_space_subset_p
+static bool
+m32c_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_FAR);
+  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_FAR);
+
+  if (subset == superset)
+    return true;
+
+  else
+    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_FAR);
+}
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT m32c_addr_space_convert
+/* Convert from one address space to another.  */
+static rtx
+m32c_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+  rtx result;
+
+  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_FAR);
+  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_FAR);
+
+  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_FAR)
+    {
+      /* This is unpredictable, as we're truncating off usable address
+	 bits.  */
+
+      result = gen_reg_rtx (HImode);
+      emit_move_insn (result, simplify_subreg (HImode, op, SImode, 0));
+      return result;
+    }
+  else if (to_as == ADDR_SPACE_FAR && from_as == ADDR_SPACE_GENERIC)
+    {
+      /* This always works.  */
+      result = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extendhisi2 (result, op));
+      return result;
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Condition Code Status */
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS m32c_fixed_condition_code_regs
+static bool
+m32c_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = FLG_REGNO;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+/* Describing Relative Costs of Operations */
+
+/* Implements TARGET_REGISTER_MOVE_COST.  We make impossible moves
+   prohibitively expensive, like trying to put QIs in r2/r3 (there are
+   no opcodes to do that).  We also discourage use of mem* registers
+   since they're really memory.  */
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST m32c_register_move_cost
+
+static int
+m32c_register_move_cost (enum machine_mode mode, reg_class_t from,
+			 reg_class_t to)
+{
+  int cost = COSTS_N_INSNS (3);
+  HARD_REG_SET cc;
+
+/* FIXME: pick real values, but not 2 for now.  */
+  COPY_HARD_REG_SET (cc, reg_class_contents[(int) from]);
+  IOR_HARD_REG_SET (cc, reg_class_contents[(int) to]);
+
+  if (mode == QImode
+      && hard_reg_set_intersect_p (cc, reg_class_contents[R23_REGS]))
+    {
+      if (hard_reg_set_subset_p (cc, reg_class_contents[R23_REGS]))
+	cost = COSTS_N_INSNS (1000);
+      else
+	cost = COSTS_N_INSNS (80);
+    }
+
+  if (!class_can_hold_mode (from, mode) || !class_can_hold_mode (to, mode))
+    cost = COSTS_N_INSNS (1000);
+
+  if (reg_classes_intersect_p (from, CR_REGS))
+    cost += COSTS_N_INSNS (5);
+
+  if (reg_classes_intersect_p (to, CR_REGS))
+    cost += COSTS_N_INSNS (5);
+
+  if (from == MEM_REGS || to == MEM_REGS)
+    cost += COSTS_N_INSNS (50);
+  else if (reg_classes_intersect_p (from, MEM_REGS)
+	   || reg_classes_intersect_p (to, MEM_REGS))
+    cost += COSTS_N_INSNS (10);
+
+#if DEBUG0
+  fprintf (stderr, "register_move_cost %s from %s to %s = %d\n",
+	   mode_name[mode], class_names[(int) from], class_names[(int) to],
+	   cost);
+#endif
+  return cost;
+}
+
+/*  Implements TARGET_MEMORY_MOVE_COST.  */
+
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST m32c_memory_move_cost
+
+static int
+m32c_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  /* FIXME: pick real values.  */
+  return COSTS_N_INSNS (10);
+}
+
+/* Here we try to describe when we use multiple opcodes for one RTX so
+   that gcc knows when to use them.  */
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m32c_rtx_costs
+static bool
+m32c_rtx_costs (rtx x, int code, int outer_code, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case REG:
+      if (REGNO (x) >= MEM0_REGNO && REGNO (x) <= MEM7_REGNO)
+	*total += COSTS_N_INSNS (500);
+      else
+	*total += COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	{
+	  /* mov.b r1l, r1h */
+	  *total +=  COSTS_N_INSNS (1);
+	  return true;
+	}
+      if (INTVAL (XEXP (x, 1)) > 8
+	  || INTVAL (XEXP (x, 1)) < -8)
+	{
+	  /* mov.b #N, r1l */
+	  /* mov.b r1l, r1h */
+	  *total +=  COSTS_N_INSNS (2);
+	  return true;
+	}
+      return true;
+
+    case LE:
+    case LEU:
+    case LT:
+    case LTU:
+    case GT:
+    case GTU:
+    case GE:
+    case GEU:
+    case NE:
+    case EQ:
+      if (outer_code == SET)
+	{
+	  *total += COSTS_N_INSNS (2);
+	  return true;
+	}
+      break;
+
+    case ZERO_EXTRACT:
+      {
+	rtx dest = XEXP (x, 0);
+	rtx addr = XEXP (dest, 0);
+	switch (GET_CODE (addr))
+	  {
+	  case CONST_INT:
+	    *total += COSTS_N_INSNS (1);
+	    break;
+	  case SYMBOL_REF:
+	    *total += COSTS_N_INSNS (3);
+	    break;
+	  default:
+	    *total += COSTS_N_INSNS (2);
+	    break;
+	  }
+	return true;
+      }
+      break;
+
+    default:
+      /* Reasonable default.  */
+      if (TARGET_A16 && GET_MODE(x) == SImode)
+	*total += COSTS_N_INSNS (2);
+      break;
+    }
+  return false;
+}
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST m32c_address_cost
+static int
+m32c_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  int i;
+  /*  fprintf(stderr, "\naddress_cost\n");
+      debug_rtx(addr);*/
+  switch (GET_CODE (addr))
+    {
+    case CONST_INT:
+      i = INTVAL (addr);
+      if (i == 0)
+	return COSTS_N_INSNS(1);
+      if (0 < i && i <= 255)
+	return COSTS_N_INSNS(2);
+      if (0 < i && i <= 65535)
+	return COSTS_N_INSNS(3);
+      return COSTS_N_INSNS(4);
+    case SYMBOL_REF:
+      return COSTS_N_INSNS(4);
+    case REG:
+      return COSTS_N_INSNS(1);
+    case PLUS:
+      if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	{
+	  i = INTVAL (XEXP (addr, 1));
+	  if (i == 0)
+	    return COSTS_N_INSNS(1);
+	  if (0 < i && i <= 255)
+	    return COSTS_N_INSNS(2);
+	  if (0 < i && i <= 65535)
+	    return COSTS_N_INSNS(3);
+	}
+      return COSTS_N_INSNS(4);
+    default:
+      return 0;
+    }
+}
+
+/* Defining the Output Assembler Language */
+
+/* The Overall Framework of an Assembler File */
+
+#undef TARGET_HAVE_NAMED_SECTIONS
+#define TARGET_HAVE_NAMED_SECTIONS true
+
+/* Output of Data */
+
+/* We may have 24 bit sizes, which is the native address size.
+   Currently unused, but provided for completeness.  */
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER m32c_asm_integer
+static bool
+m32c_asm_integer (rtx x, unsigned int size, int aligned_p)
+{
+  switch (size)
+    {
+    case 3:
+      fprintf (asm_out_file, "\t.3byte\t");
+      output_addr_const (asm_out_file, x);
+      fputc ('\n', asm_out_file);
+      return true;
+    case 4:
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  fprintf (asm_out_file, "\t.long\t");
+	  output_addr_const (asm_out_file, x);
+	  fputc ('\n', asm_out_file);
+	  return true;
+	}
+      break;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Output of Assembler Instructions */
+
+/* We use a lookup table because the addressing modes are non-orthogonal.  */
+
+static struct
+{
+  char code;
+  char const *pattern;
+  char const *format;
+}
+const conversions[] = {
+  { 0, "r", "0" },
+
+  { 0, "mr", "z[1]" },
+  { 0, "m+ri", "3[2]" },
+  { 0, "m+rs", "3[2]" },
+  { 0, "m+^Zrs", "5[4]" },
+  { 0, "m+^Zri", "5[4]" },
+  { 0, "m+^Z+ris", "7+6[5]" },
+  { 0, "m+^Srs", "5[4]" },
+  { 0, "m+^Sri", "5[4]" },
+  { 0, "m+^S+ris", "7+6[5]" },
+  { 0, "m+r+si", "4+5[2]" },
+  { 0, "ms", "1" },
+  { 0, "mi", "1" },
+  { 0, "m+si", "2+3" },
+
+  { 0, "mmr", "[z[2]]" },
+  { 0, "mm+ri", "[4[3]]" },
+  { 0, "mm+rs", "[4[3]]" },
+  { 0, "mm+r+si", "[5+6[3]]" },
+  { 0, "mms", "[[2]]" },
+  { 0, "mmi", "[[2]]" },
+  { 0, "mm+si", "[4[3]]" },
+
+  { 0, "i", "#0" },
+  { 0, "s", "#0" },
+  { 0, "+si", "#1+2" },
+  { 0, "l", "#0" },
+
+  { 'l', "l", "0" },
+  { 'd', "i", "0" },
+  { 'd', "s", "0" },
+  { 'd', "+si", "1+2" },
+  { 'D', "i", "0" },
+  { 'D', "s", "0" },
+  { 'D', "+si", "1+2" },
+  { 'x', "i", "#0" },
+  { 'X', "i", "#0" },
+  { 'm', "i", "#0" },
+  { 'b', "i", "#0" },
+  { 'B', "i", "0" },
+  { 'p', "i", "0" },
+
+  { 0, 0, 0 }
+};
+
+/* This is in order according to the bitfield that pushm/popm use.  */
+static char const *pushm_regs[] = {
+  "fb", "sb", "a1", "a0", "r3", "r2", "r1", "r0"
+};
+
+/* Implements PRINT_OPERAND.  */
+void
+m32c_print_operand (FILE * file, rtx x, int code)
+{
+  int i, j, b;
+  const char *comma;
+  HOST_WIDE_INT ival;
+  int unsigned_const = 0;
+  int force_sign;
+
+  /* Multiplies; constants are converted to sign-extended format but
+   we need unsigned, so 'u' and 'U' tell us what size unsigned we
+   need.  */
+  if (code == 'u')
+    {
+      unsigned_const = 2;
+      code = 0;
+    }
+  if (code == 'U')
+    {
+      unsigned_const = 1;
+      code = 0;
+    }
+  /* This one is only for debugging; you can put it in a pattern to
+     force this error.  */
+  if (code == '!')
+    {
+      fprintf (stderr, "dj: unreviewed pattern:");
+      if (current_output_insn)
+	debug_rtx (current_output_insn);
+      gcc_unreachable ();
+    }
+  /* PSImode operations are either .w or .l depending on the target.  */
+  if (code == '&')
+    {
+      if (TARGET_A16)
+	fprintf (file, "w");
+      else
+	fprintf (file, "l");
+      return;
+    }
+  /* Inverted conditionals.  */
+  if (code == 'C')
+    {
+      switch (GET_CODE (x))
+	{
+	case LE:
+	  fputs ("gt", file);
+	  break;
+	case LEU:
+	  fputs ("gtu", file);
+	  break;
+	case LT:
+	  fputs ("ge", file);
+	  break;
+	case LTU:
+	  fputs ("geu", file);
+	  break;
+	case GT:
+	  fputs ("le", file);
+	  break;
+	case GTU:
+	  fputs ("leu", file);
+	  break;
+	case GE:
+	  fputs ("lt", file);
+	  break;
+	case GEU:
+	  fputs ("ltu", file);
+	  break;
+	case NE:
+	  fputs ("eq", file);
+	  break;
+	case EQ:
+	  fputs ("ne", file);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+  /* Regular conditionals.  */
+  if (code == 'c')
+    {
+      switch (GET_CODE (x))
+	{
+	case LE:
+	  fputs ("le", file);
+	  break;
+	case LEU:
+	  fputs ("leu", file);
+	  break;
+	case LT:
+	  fputs ("lt", file);
+	  break;
+	case LTU:
+	  fputs ("ltu", file);
+	  break;
+	case GT:
+	  fputs ("gt", file);
+	  break;
+	case GTU:
+	  fputs ("gtu", file);
+	  break;
+	case GE:
+	  fputs ("ge", file);
+	  break;
+	case GEU:
+	  fputs ("geu", file);
+	  break;
+	case NE:
+	  fputs ("ne", file);
+	  break;
+	case EQ:
+	  fputs ("eq", file);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+  /* Used in negsi2 to do HImode ops on the two parts of an SImode
+     operand.  */
+  if (code == 'h' && GET_MODE (x) == SImode)
+    {
+      x = m32c_subreg (HImode, x, SImode, 0);
+      code = 0;
+    }
+  if (code == 'H' && GET_MODE (x) == SImode)
+    {
+      x = m32c_subreg (HImode, x, SImode, 2);
+      code = 0;
+    }
+  if (code == 'h' && GET_MODE (x) == HImode)
+    {
+      x = m32c_subreg (QImode, x, HImode, 0);
+      code = 0;
+    }
+  if (code == 'H' && GET_MODE (x) == HImode)
+    {
+      /* We can't actually represent this as an rtx.  Do it here.  */
+      if (GET_CODE (x) == REG)
+	{
+	  switch (REGNO (x))
+	    {
+	    case R0_REGNO:
+	      fputs ("r0h", file);
+	      return;
+	    case R1_REGNO:
+	      fputs ("r1h", file);
+	      return;
+	    default:
+	      gcc_unreachable();
+	    }
+	}
+      /* This should be a MEM.  */
+      x = m32c_subreg (QImode, x, HImode, 1);
+      code = 0;
+    }
+  /* This is for BMcond, which always wants word register names.  */
+  if (code == 'h' && GET_MODE (x) == QImode)
+    {
+      if (GET_CODE (x) == REG)
+	x = gen_rtx_REG (HImode, REGNO (x));
+      code = 0;
+    }
+  /* 'x' and 'X' need to be ignored for non-immediates.  */
+  if ((code == 'x' || code == 'X') && GET_CODE (x) != CONST_INT)
+    code = 0;
+
+  encode_pattern (x);
+  force_sign = 0;
+  for (i = 0; conversions[i].pattern; i++)
+    if (conversions[i].code == code
+	&& streq (conversions[i].pattern, pattern))
+      {
+	for (j = 0; conversions[i].format[j]; j++)
+	  /* backslash quotes the next character in the output pattern.  */
+	  if (conversions[i].format[j] == '\\')
+	    {
+	      fputc (conversions[i].format[j + 1], file);
+	      j++;
+	    }
+	  /* Digits in the output pattern indicate that the
+	     corresponding RTX is to be output at that point.  */
+	  else if (ISDIGIT (conversions[i].format[j]))
+	    {
+	      rtx r = patternr[conversions[i].format[j] - '0'];
+	      switch (GET_CODE (r))
+		{
+		case REG:
+		  fprintf (file, "%s",
+			   reg_name_with_mode (REGNO (r), GET_MODE (r)));
+		  break;
+		case CONST_INT:
+		  switch (code)
+		    {
+		    case 'b':
+		    case 'B':
+		      {
+			int v = INTVAL (r);
+			int i = (int) exact_log2 (v);
+			if (i == -1)
+			  i = (int) exact_log2 ((v ^ 0xffff) & 0xffff);
+			if (i == -1)
+			  i = (int) exact_log2 ((v ^ 0xff) & 0xff);
+			/* Bit position.  */
+			fprintf (file, "%d", i);
+		      }
+		      break;
+		    case 'x':
+		      /* Unsigned byte.  */
+		      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+			       INTVAL (r) & 0xff);
+		      break;
+		    case 'X':
+		      /* Unsigned word.  */
+		      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+			       INTVAL (r) & 0xffff);
+		      break;
+		    case 'p':
+		      /* pushm and popm encode a register set into a single byte.  */
+		      comma = "";
+		      for (b = 7; b >= 0; b--)
+			if (INTVAL (r) & (1 << b))
+			  {
+			    fprintf (file, "%s%s", comma, pushm_regs[b]);
+			    comma = ",";
+			  }
+		      break;
+		    case 'm':
+		      /* "Minus".  Output -X  */
+		      ival = (-INTVAL (r) & 0xffff);
+		      if (ival & 0x8000)
+			ival = ival - 0x10000;
+		      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+		      break;
+		    default:
+		      ival = INTVAL (r);
+		      if (conversions[i].format[j + 1] == '[' && ival < 0)
+			{
+			  /* We can simulate negative displacements by
+			     taking advantage of address space
+			     wrapping when the offset can span the
+			     entire address range.  */
+			  rtx base =
+			    patternr[conversions[i].format[j + 2] - '0'];
+			  if (GET_CODE (base) == REG)
+			    switch (REGNO (base))
+			      {
+			      case A0_REGNO:
+			      case A1_REGNO:
+				if (TARGET_A24)
+				  ival = 0x1000000 + ival;
+				else
+				  ival = 0x10000 + ival;
+				break;
+			      case SB_REGNO:
+				if (TARGET_A16)
+				  ival = 0x10000 + ival;
+				break;
+			      }
+			}
+		      else if (code == 'd' && ival < 0 && j == 0)
+			/* The "mova" opcode is used to do addition by
+			   computing displacements, but again, we need
+			   displacements to be unsigned *if* they're
+			   the only component of the displacement
+			   (i.e. no "symbol-4" type displacement).  */
+			ival = (TARGET_A24 ? 0x1000000 : 0x10000) + ival;
+
+		      if (conversions[i].format[j] == '0')
+			{
+			  /* More conversions to unsigned.  */
+			  if (unsigned_const == 2)
+			    ival &= 0xffff;
+			  if (unsigned_const == 1)
+			    ival &= 0xff;
+			}
+		      if (streq (conversions[i].pattern, "mi")
+			  || streq (conversions[i].pattern, "mmi"))
+			{
+			  /* Integers used as addresses are unsigned.  */
+			  ival &= (TARGET_A24 ? 0xffffff : 0xffff);
+			}
+		      if (force_sign && ival >= 0)
+			fputc ('+', file);
+		      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+		      break;
+		    }
+		  break;
+		case CONST_DOUBLE:
+		  /* We don't have const_double constants.  If it
+		     happens, make it obvious.  */
+		  fprintf (file, "[const_double 0x%lx]",
+			   (unsigned long) CONST_DOUBLE_HIGH (r));
+		  break;
+		case SYMBOL_REF:
+		  assemble_name (file, XSTR (r, 0));
+		  break;
+		case LABEL_REF:
+		  output_asm_label (r);
+		  break;
+		default:
+		  fprintf (stderr, "don't know how to print this operand:");
+		  debug_rtx (r);
+		  gcc_unreachable ();
+		}
+	    }
+	  else
+	    {
+	      if (conversions[i].format[j] == 'z')
+		{
+		  /* Some addressing modes *must* have a displacement,
+		     so insert a zero here if needed.  */
+		  int k;
+		  for (k = j + 1; conversions[i].format[k]; k++)
+		    if (ISDIGIT (conversions[i].format[k]))
+		      {
+			rtx reg = patternr[conversions[i].format[k] - '0'];
+			if (GET_CODE (reg) == REG
+			    && (REGNO (reg) == SB_REGNO
+				|| REGNO (reg) == FB_REGNO
+				|| REGNO (reg) == SP_REGNO))
+			  fputc ('0', file);
+		      }
+		  continue;
+		}
+	      /* Signed displacements off symbols need to have signs
+		 blended cleanly.  */
+	      if (conversions[i].format[j] == '+'
+		  && (!code || code == 'D' || code == 'd')
+		  && ISDIGIT (conversions[i].format[j + 1])
+		  && (GET_CODE (patternr[conversions[i].format[j + 1] - '0'])
+		      == CONST_INT))
+		{
+		  force_sign = 1;
+		  continue;
+		}
+	      fputc (conversions[i].format[j], file);
+	    }
+	break;
+      }
+  if (!conversions[i].pattern)
+    {
+      fprintf (stderr, "unconvertible operand %c `%s'", code ? code : '-',
+	       pattern);
+      debug_rtx (x);
+      fprintf (file, "[%c.%s]", code ? code : '-', pattern);
+    }
+
+  return;
+}
+
+/* Implements PRINT_OPERAND_PUNCT_VALID_P.  See m32c_print_operand
+   above for descriptions of what these do.  */
+int
+m32c_print_operand_punct_valid_p (int c)
+{
+  if (c == '&' || c == '!')
+    return 1;
+  return 0;
+}
+
+/* Implements PRINT_OPERAND_ADDRESS.  Nothing unusual here.  */
+void
+m32c_print_operand_address (FILE * stream, rtx address)
+{
+  if (GET_CODE (address) == MEM)
+    address = XEXP (address, 0);
+  else
+    /* cf: gcc.dg/asm-4.c.  */
+    gcc_assert (GET_CODE (address) == REG);
+
+  m32c_print_operand (stream, address, 0);
+}
+
+/* Implements ASM_OUTPUT_REG_PUSH.  Control registers are pushed
+   differently than general registers.  */
+void
+m32c_output_reg_push (FILE * s, int regno)
+{
+  if (regno == FLG_REGNO)
+    fprintf (s, "\tpushc\tflg\n");
+  else
+    fprintf (s, "\tpush.%c\t%s\n",
+	     " bwll"[reg_push_size (regno)], reg_names[regno]);
+}
+
+/* Likewise for ASM_OUTPUT_REG_POP.  */
+void
+m32c_output_reg_pop (FILE * s, int regno)
+{
+  if (regno == FLG_REGNO)
+    fprintf (s, "\tpopc\tflg\n");
+  else
+    fprintf (s, "\tpop.%c\t%s\n",
+	     " bwll"[reg_push_size (regno)], reg_names[regno]);
+}
+
+/* Defining target-specific uses of `__attribute__' */
+
+/* Used to simplify the logic below.  Find the attributes wherever
+   they may be.  */
+#define M32C_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+                : DECL_ATTRIBUTES (decl) \
+                  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+/* Returns TRUE if the given tree has the "interrupt" attribute.  */
+static int
+interrupt_p (tree node ATTRIBUTE_UNUSED)
+{
+  tree list = M32C_ATTRIBUTES (node);
+  while (list)
+    {
+      if (is_attribute_p ("interrupt", TREE_PURPOSE (list)))
+	return 1;
+      list = TREE_CHAIN (list);
+    }
+  return fast_interrupt_p (node);
+}
+
+/* Returns TRUE if the given tree has the "bank_switch" attribute.  */
+static int
+bank_switch_p (tree node ATTRIBUTE_UNUSED)
+{
+  tree list = M32C_ATTRIBUTES (node);
+  while (list)
+    {
+      if (is_attribute_p ("bank_switch", TREE_PURPOSE (list)))
+	return 1;
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+/* Returns TRUE if the given tree has the "fast_interrupt" attribute.  */
+static int
+fast_interrupt_p (tree node ATTRIBUTE_UNUSED)
+{
+  tree list = M32C_ATTRIBUTES (node);
+  while (list)
+    {
+      if (is_attribute_p ("fast_interrupt", TREE_PURPOSE (list)))
+	return 1;
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+static tree
+interrupt_handler (tree * node ATTRIBUTE_UNUSED,
+		   tree name ATTRIBUTE_UNUSED,
+		   tree args ATTRIBUTE_UNUSED,
+		   int flags ATTRIBUTE_UNUSED,
+		   bool * no_add_attrs ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
+
+/* Returns TRUE if given tree has the "function_vector" attribute. */
+int
+m32c_special_page_vector_p (tree func)
+{
+  tree list;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  list = M32C_ATTRIBUTES (func);
+  while (list)
+    {
+      if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+        return 1;
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+static tree
+function_vector_handler (tree * node ATTRIBUTE_UNUSED,
+                         tree name ATTRIBUTE_UNUSED,
+                         tree args ATTRIBUTE_UNUSED,
+                         int flags ATTRIBUTE_UNUSED,
+                         bool * no_add_attrs ATTRIBUTE_UNUSED)
+{
+  if (TARGET_R8C)
+    {
+      /* The attribute is not supported for R8C target.  */
+      warning (OPT_Wattributes,
+                "%qE attribute is not supported for R8C target",
+                name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      /* The attribute must be applied to functions only.  */
+      warning (OPT_Wattributes,
+                "%qE attribute applies only to functions",
+                name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes,
+                "%qE attribute argument not an integer constant",
+                name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_INT_CST_LOW (TREE_VALUE (args)) < 18
+           || TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
+    {
+      /* The argument value must be between 18 to 255.  */
+      warning (OPT_Wattributes,
+                "%qE attribute argument should be between 18 to 255",
+                name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+/* If the function is assigned the attribute 'function_vector', it
+   returns the function vector number, otherwise returns zero.  */
+int
+current_function_special_page_vector (rtx x)
+{
+  int num;
+
+  if ((GET_CODE(x) == SYMBOL_REF)
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      tree list;
+      tree t = SYMBOL_REF_DECL (x);
+
+      if (TREE_CODE (t) != FUNCTION_DECL)
+        return 0;
+
+      list = M32C_ATTRIBUTES (t);
+      while (list)
+        {
+          if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+            {
+              num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
+              return num;
+            }
+
+          list = TREE_CHAIN (list);
+        }
+
+      return 0;
+    }
+  else
+    return 0;
+}
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m32c_attribute_table
+static const struct attribute_spec m32c_attribute_table[] = {
+  {"interrupt", 0, 0, false, false, false, interrupt_handler},
+  {"bank_switch", 0, 0, false, false, false, interrupt_handler},
+  {"fast_interrupt", 0, 0, false, false, false, interrupt_handler},
+  {"function_vector", 1, 1, true,  false, false, function_vector_handler},
+  {0, 0, 0, 0, 0, 0, 0}
+};
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES m32c_comp_type_attributes
+static int
+m32c_comp_type_attributes (const_tree type1 ATTRIBUTE_UNUSED,
+			   const_tree type2 ATTRIBUTE_UNUSED)
+{
+  /* 0=incompatible 1=compatible 2=warning */
+  return 1;
+}
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES m32c_insert_attributes
+static void
+m32c_insert_attributes (tree node ATTRIBUTE_UNUSED,
+			tree * attr_ptr ATTRIBUTE_UNUSED)
+{
+  unsigned addr;
+  /* See if we need to make #pragma address variables volatile.  */
+
+  if (TREE_CODE (node) == VAR_DECL)
+    {
+      const char *name = IDENTIFIER_POINTER (DECL_NAME (node));
+      if (m32c_get_pragma_address  (name, &addr))
+	{
+	  TREE_THIS_VOLATILE (node) = true;
+	}
+    }	
+}
+
+
+struct GTY(()) pragma_entry {
+  const char *varname;
+  unsigned address;
+};
+typedef struct pragma_entry pragma_entry;
+
+/* Hash table of pragma info.  */
+static GTY((param_is (pragma_entry))) htab_t pragma_htab;
+
+static int
+pragma_entry_eq (const void *p1, const void *p2)
+{
+  const pragma_entry *old = (const pragma_entry *) p1;
+  const char *new_name = (const char *) p2;
+
+  return strcmp (old->varname, new_name) == 0;
+}
+
+static hashval_t
+pragma_entry_hash (const void *p)
+{
+  const pragma_entry *old = (const pragma_entry *) p;
+  return htab_hash_string (old->varname);
+}
+
+void
+m32c_note_pragma_address (const char *varname, unsigned address)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    pragma_htab = htab_create_ggc (31, pragma_entry_hash,
+				    pragma_entry_eq, NULL);
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, varname,
+			      htab_hash_string (varname), INSERT);
+
+  if (!*slot)
+    {
+      *slot = ggc_alloc_pragma_entry ();
+      (*slot)->varname = ggc_strdup (varname);
+    }
+  (*slot)->address = address;
+}
+
+static bool
+m32c_get_pragma_address (const char *varname, unsigned *address)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    return false;
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, varname,
+			      htab_hash_string (varname), NO_INSERT);
+  if (slot && *slot)
+    {
+      *address = (*slot)->address;
+      return true;
+    }
+  return false;
+}
+
+void
+m32c_output_aligned_common (FILE *stream, tree decl ATTRIBUTE_UNUSED,
+			    const char *name,
+			    int size, int align, int global)
+{
+  unsigned address;
+
+  if (m32c_get_pragma_address (name, &address))
+    {
+      /* We never output these as global.  */
+      assemble_name (stream, name);
+      fprintf (stream, " = 0x%04x\n", address);
+      return;
+    }
+  if (!global)
+    {
+      fprintf (stream, "\t.local\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Predicates */
+
+/* This is a list of legal subregs of hard regs.  */
+static const struct {
+  unsigned char outer_mode_size;
+  unsigned char inner_mode_size;
+  unsigned char byte_mask;
+  unsigned char legal_when;
+  unsigned int regno;
+} legal_subregs[] = {
+  {1, 2, 0x03, 1, R0_REGNO}, /* r0h r0l */
+  {1, 2, 0x03, 1, R1_REGNO}, /* r1h r1l */
+  {1, 2, 0x01, 1, A0_REGNO},
+  {1, 2, 0x01, 1, A1_REGNO},
+
+  {1, 4, 0x01, 1, A0_REGNO},
+  {1, 4, 0x01, 1, A1_REGNO},
+
+  {2, 4, 0x05, 1, R0_REGNO}, /* r2 r0 */
+  {2, 4, 0x05, 1, R1_REGNO}, /* r3 r1 */
+  {2, 4, 0x05, 16, A0_REGNO}, /* a1 a0 */
+  {2, 4, 0x01, 24, A0_REGNO}, /* a1 a0 */
+  {2, 4, 0x01, 24, A1_REGNO}, /* a1 a0 */
+
+  {4, 8, 0x55, 1, R0_REGNO}, /* r3 r1 r2 r0 */
+};
+
+/* Returns TRUE if OP is a subreg of a hard reg which we don't
+   support.  We also bail on MEMs with illegal addresses.  */
+bool
+m32c_illegal_subreg_p (rtx op)
+{
+  int offset;
+  unsigned int i;
+  int src_mode, dest_mode;
+
+  if (GET_CODE (op) == MEM
+      && ! m32c_legitimate_address_p (Pmode, XEXP (op, 0), false))
+    {
+      return true;
+    }
+
+  if (GET_CODE (op) != SUBREG)
+    return false;
+
+  dest_mode = GET_MODE (op);
+  offset = SUBREG_BYTE (op);
+  op = SUBREG_REG (op);
+  src_mode = GET_MODE (op);
+
+  if (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (src_mode))
+    return false;
+  if (GET_CODE (op) != REG)
+    return false;
+  if (REGNO (op) >= MEM0_REGNO)
+    return false;
+
+  offset = (1 << offset);
+
+  for (i = 0; i < ARRAY_SIZE (legal_subregs); i ++)
+    if (legal_subregs[i].outer_mode_size == GET_MODE_SIZE (dest_mode)
+	&& legal_subregs[i].regno == REGNO (op)
+	&& legal_subregs[i].inner_mode_size == GET_MODE_SIZE (src_mode)
+	&& legal_subregs[i].byte_mask & offset)
+      {
+	switch (legal_subregs[i].legal_when)
+	  {
+	  case 1:
+	    return false;
+	  case 16:
+	    if (TARGET_A16)
+	      return false;
+	    break;
+	  case 24:
+	    if (TARGET_A24)
+	      return false;
+	    break;
+	  }
+      }
+  return true;
+}
+
+/* Returns TRUE if we support a move between the first two operands.
+   At the moment, we just want to discourage mem to mem moves until
+   after reload, because reload has a hard time with our limited
+   number of address registers, and we can get into a situation where
+   we need three of them when we only have two.  */
+bool
+m32c_mov_ok (rtx * operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  if (TARGET_A24)
+    return true;
+
+#define DEBUG_MOV_OK 0
+#if DEBUG_MOV_OK
+  fprintf (stderr, "m32c_mov_ok %s\n", mode_name[mode]);
+  debug_rtx (op0);
+  debug_rtx (op1);
+#endif
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = XEXP (op0, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = XEXP (op1, 0);
+
+  if (GET_CODE (op0) == MEM
+      && GET_CODE (op1) == MEM
+      && ! reload_completed)
+    {
+#if DEBUG_MOV_OK
+      fprintf (stderr, " - no, mem to mem\n");
+#endif
+      return false;
+    }
+
+#if DEBUG_MOV_OK
+  fprintf (stderr, " - ok\n");
+#endif
+  return true;
+}
+
+/* Returns TRUE if two consecutive HImode mov instructions, generated
+   for moving an immediate double data to a double data type variable
+   location, can be combined into single SImode mov instruction.  */
+bool
+m32c_immd_dbl_mov (rtx * operands, 
+		   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int flag = 0, okflag = 0, offset1 = 0, offset2 = 0, offsetsign = 0;
+  const char *str1;
+  const char *str2;
+
+  if (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+      && MEM_SCALAR_P (operands[0])
+      && !MEM_IN_STRUCT_P (operands[0])
+      && GET_CODE (XEXP (operands[2], 0)) == CONST
+      && GET_CODE (XEXP (XEXP (operands[2], 0), 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (XEXP (operands[2], 0), 0), 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (XEXP (XEXP (operands[2], 0), 0), 1)) == CONST_INT
+      && MEM_SCALAR_P (operands[2])
+      && !MEM_IN_STRUCT_P (operands[2]))
+    flag = 1; 
+
+  else if (GET_CODE (XEXP (operands[0], 0)) == CONST
+           && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == PLUS
+           && GET_CODE (XEXP (XEXP (XEXP (operands[0], 0), 0), 0)) == SYMBOL_REF
+           && MEM_SCALAR_P (operands[0])
+           && !MEM_IN_STRUCT_P (operands[0])
+           && !(INTVAL (XEXP (XEXP (XEXP (operands[0], 0), 0), 1)) %4)
+           && GET_CODE (XEXP (operands[2], 0)) == CONST
+           && GET_CODE (XEXP (XEXP (operands[2], 0), 0)) == PLUS
+           && GET_CODE (XEXP (XEXP (XEXP (operands[2], 0), 0), 0)) == SYMBOL_REF
+           && MEM_SCALAR_P (operands[2])
+           && !MEM_IN_STRUCT_P (operands[2]))
+    flag = 2; 
+
+  else if (GET_CODE (XEXP (operands[0], 0)) == PLUS
+           &&  GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+           &&  REGNO (XEXP (XEXP (operands[0], 0), 0)) == FB_REGNO 
+           &&  GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT
+           &&  MEM_SCALAR_P (operands[0])
+           &&  !MEM_IN_STRUCT_P (operands[0])
+           &&  !(INTVAL (XEXP (XEXP (operands[0], 0), 1)) %4)
+           &&  REGNO (XEXP (XEXP (operands[2], 0), 0)) == FB_REGNO 
+           &&  GET_CODE (XEXP (XEXP (operands[2], 0), 1)) == CONST_INT
+           &&  MEM_SCALAR_P (operands[2])
+           &&  !MEM_IN_STRUCT_P (operands[2]))
+    flag = 3; 
+
+  else
+    return false;
+
+  switch (flag)
+    {
+    case 1:
+      str1 = XSTR (XEXP (operands[0], 0), 0);
+      str2 = XSTR (XEXP (XEXP (XEXP (operands[2], 0), 0), 0), 0);
+      if (strcmp (str1, str2) == 0)
+	okflag = 1; 
+      else
+	okflag = 0; 
+      break;
+    case 2:
+      str1 = XSTR (XEXP (XEXP (XEXP (operands[0], 0), 0), 0), 0);
+      str2 = XSTR (XEXP (XEXP (XEXP (operands[2], 0), 0), 0), 0);
+      if (strcmp(str1,str2) == 0)
+	okflag = 1; 
+      else
+	okflag = 0; 
+      break; 
+    case 3:
+      offset1 = INTVAL (XEXP (XEXP (operands[0], 0), 1));
+      offset2 = INTVAL (XEXP (XEXP (operands[2], 0), 1));
+      offsetsign = offset1 >> ((sizeof (offset1) * 8) -1);
+      if (((offset2-offset1) == 2) && offsetsign != 0)
+	okflag = 1;
+      else 
+	okflag = 0; 
+      break; 
+    default:
+      okflag = 0; 
+    } 
+      
+  if (okflag == 1)
+    {
+      HOST_WIDE_INT val;
+      operands[4] = gen_rtx_MEM (SImode, XEXP (operands[0], 0));
+
+      val = (INTVAL (operands[3]) << 16) + (INTVAL (operands[1]) & 0xFFFF);
+      operands[5] = gen_rtx_CONST_INT (VOIDmode, val);
+     
+      return true;
+    }
+
+  return false;
+}  
+
+/* Expanders */
+
+/* Subregs are non-orthogonal for us, because our registers are all
+   different sizes.  */
+static rtx
+m32c_subreg (enum machine_mode outer,
+	     rtx x, enum machine_mode inner, int byte)
+{
+  int r, nr = -1;
+
+  /* Converting MEMs to different types that are the same size, we
+     just rewrite them.  */
+  if (GET_CODE (x) == SUBREG
+      && SUBREG_BYTE (x) == 0
+      && GET_CODE (SUBREG_REG (x)) == MEM
+      && (GET_MODE_SIZE (GET_MODE (x))
+	  == GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+    {
+      rtx oldx = x;
+      x = gen_rtx_MEM (GET_MODE (x), XEXP (SUBREG_REG (x), 0));
+      MEM_COPY_ATTRIBUTES (x, SUBREG_REG (oldx));
+    }
+
+  /* Push/pop get done as smaller push/pops.  */
+  if (GET_CODE (x) == MEM
+      && (GET_CODE (XEXP (x, 0)) == PRE_DEC
+	  || GET_CODE (XEXP (x, 0)) == POST_INC))
+    return gen_rtx_MEM (outer, XEXP (x, 0));
+  if (GET_CODE (x) == SUBREG
+      && GET_CODE (XEXP (x, 0)) == MEM
+      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == PRE_DEC
+	  || GET_CODE (XEXP (XEXP (x, 0), 0)) == POST_INC))
+    return gen_rtx_MEM (outer, XEXP (XEXP (x, 0), 0));
+
+  if (GET_CODE (x) != REG)
+    {
+      rtx r = simplify_gen_subreg (outer, x, inner, byte);
+      if (GET_CODE (r) == SUBREG
+	  && GET_CODE (x) == MEM
+	  && MEM_VOLATILE_P (x))
+	{
+	  /* Volatile MEMs don't get simplified, but we need them to
+	     be.  We are little endian, so the subreg byte is the
+	     offset.  */
+	  r = adjust_address_nv (x, outer, byte);
+	}
+      return r;
+    }
+
+  r = REGNO (x);
+  if (r >= FIRST_PSEUDO_REGISTER || r == AP_REGNO)
+    return simplify_gen_subreg (outer, x, inner, byte);
+
+  if (IS_MEM_REGNO (r))
+    return simplify_gen_subreg (outer, x, inner, byte);
+
+  /* This is where the complexities of our register layout are
+     described.  */
+  if (byte == 0)
+    nr = r;
+  else if (outer == HImode)
+    {
+      if (r == R0_REGNO && byte == 2)
+	nr = R2_REGNO;
+      else if (r == R0_REGNO && byte == 4)
+	nr = R1_REGNO;
+      else if (r == R0_REGNO && byte == 6)
+	nr = R3_REGNO;
+      else if (r == R1_REGNO && byte == 2)
+	nr = R3_REGNO;
+      else if (r == A0_REGNO && byte == 2)
+	nr = A1_REGNO;
+    }
+  else if (outer == SImode)
+    {
+      if (r == R0_REGNO && byte == 0)
+	nr = R0_REGNO;
+      else if (r == R0_REGNO && byte == 4)
+	nr = R1_REGNO;
+    }
+  if (nr == -1)
+    {
+      fprintf (stderr, "m32c_subreg %s %s %d\n",
+	       mode_name[outer], mode_name[inner], byte);
+      debug_rtx (x);
+      gcc_unreachable ();
+    }
+  return gen_rtx_REG (outer, nr);
+}
+
+/* Used to emit move instructions.  We split some moves,
+   and avoid mem-mem moves.  */
+int
+m32c_prepare_move (rtx * operands, enum machine_mode mode)
+{
+  if (far_addr_space_p (operands[0])
+      && CONSTANT_P (operands[1]))
+    {
+      operands[1] = force_reg (GET_MODE (operands[0]), operands[1]);
+    }
+  if (TARGET_A16 && mode == PSImode)
+    return m32c_split_move (operands, mode, 1);
+  if ((GET_CODE (operands[0]) == MEM)
+      && (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY))
+    {
+      rtx pmv = XEXP (operands[0], 0);
+      rtx dest_reg = XEXP (pmv, 0);
+      rtx dest_mod = XEXP (pmv, 1);
+
+      emit_insn (gen_rtx_SET (Pmode, dest_reg, dest_mod));
+      operands[0] = gen_rtx_MEM (mode, dest_reg);
+    }
+  if (can_create_pseudo_p () && MEM_P (operands[0]) && MEM_P (operands[1]))
+    operands[1] = copy_to_mode_reg (mode, operands[1]);
+  return 0;
+}
+
+#define DEBUG_SPLIT 0
+
+/* Returns TRUE if the given PSImode move should be split.  We split
+   for all r8c/m16c moves, since it doesn't support them, and for
+   POP.L as we can only *push* SImode.  */
+int
+m32c_split_psi_p (rtx * operands)
+{
+#if DEBUG_SPLIT
+  fprintf (stderr, "\nm32c_split_psi_p\n");
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+  if (TARGET_A16)
+    {
+#if DEBUG_SPLIT
+      fprintf (stderr, "yes, A16\n");
+#endif
+      return 1;
+    }
+  if (GET_CODE (operands[1]) == MEM
+      && GET_CODE (XEXP (operands[1], 0)) == POST_INC)
+    {
+#if DEBUG_SPLIT
+      fprintf (stderr, "yes, pop.l\n");
+#endif
+      return 1;
+    }
+#if DEBUG_SPLIT
+  fprintf (stderr, "no, default\n");
+#endif
+  return 0;
+}
+
+/* Split the given move.  SPLIT_ALL is 0 if splitting is optional
+   (define_expand), 1 if it is not optional (define_insn_and_split),
+   and 3 for define_split (alternate api). */
+int
+m32c_split_move (rtx * operands, enum machine_mode mode, int split_all)
+{
+  rtx s[4], d[4];
+  int parts, si, di, rev = 0;
+  int rv = 0, opi = 2;
+  enum machine_mode submode = HImode;
+  rtx *ops, local_ops[10];
+
+  /* define_split modifies the existing operands, but the other two
+     emit new insns.  OPS is where we store the operand pairs, which
+     we emit later.  */
+  if (split_all == 3)
+    ops = operands;
+  else
+    ops = local_ops;
+
+  /* Else HImode.  */
+  if (mode == DImode)
+    submode = SImode;
+
+  /* Before splitting mem-mem moves, force one operand into a
+     register.  */
+  if (can_create_pseudo_p () && MEM_P (operands[0]) && MEM_P (operands[1]))
+    {
+#if DEBUG0
+      fprintf (stderr, "force_reg...\n");
+      debug_rtx (operands[1]);
+#endif
+      operands[1] = force_reg (mode, operands[1]);
+#if DEBUG0
+      debug_rtx (operands[1]);
+#endif
+    }
+
+  parts = 2;
+
+#if DEBUG_SPLIT
+  fprintf (stderr, "\nsplit_move %d all=%d\n", !can_create_pseudo_p (),
+	   split_all);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  /* Note that split_all is not used to select the api after this
+     point, so it's safe to set it to 3 even with define_insn.  */
+  /* None of the chips can move SI operands to sp-relative addresses,
+     so we always split those.  */
+  if (m32c_extra_constraint_p (operands[0], 'S', "Ss"))
+    split_all = 3;
+
+  if (TARGET_A16
+      && (far_addr_space_p (operands[0])
+	  || far_addr_space_p (operands[1])))
+    split_all |= 1;
+
+  /* We don't need to split these.  */
+  if (TARGET_A24
+      && split_all != 3
+      && (mode == SImode || mode == PSImode)
+      && !(GET_CODE (operands[1]) == MEM
+	   && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    return 0;
+
+  /* First, enumerate the subregs we'll be dealing with.  */
+  for (si = 0; si < parts; si++)
+    {
+      d[si] =
+	m32c_subreg (submode, operands[0], mode,
+		     si * GET_MODE_SIZE (submode));
+      s[si] =
+	m32c_subreg (submode, operands[1], mode,
+		     si * GET_MODE_SIZE (submode));
+    }
+
+  /* Split pushes by emitting a sequence of smaller pushes.  */
+  if (GET_CODE (d[0]) == MEM && GET_CODE (XEXP (d[0], 0)) == PRE_DEC)
+    {
+      for (si = parts - 1; si >= 0; si--)
+	{
+	  ops[opi++] = gen_rtx_MEM (submode,
+				    gen_rtx_PRE_DEC (Pmode,
+						     gen_rtx_REG (Pmode,
+								  SP_REGNO)));
+	  ops[opi++] = s[si];
+	}
+
+      rv = 1;
+    }
+  /* Likewise for pops.  */
+  else if (GET_CODE (s[0]) == MEM && GET_CODE (XEXP (s[0], 0)) == POST_INC)
+    {
+      for (di = 0; di < parts; di++)
+	{
+	  ops[opi++] = d[di];
+	  ops[opi++] = gen_rtx_MEM (submode,
+				    gen_rtx_POST_INC (Pmode,
+						      gen_rtx_REG (Pmode,
+								   SP_REGNO)));
+	}
+      rv = 1;
+    }
+  else if (split_all)
+    {
+      /* if d[di] == s[si] for any di < si, we'll early clobber. */
+      for (di = 0; di < parts - 1; di++)
+	for (si = di + 1; si < parts; si++)
+	  if (reg_mentioned_p (d[di], s[si]))
+	    rev = 1;
+
+      if (rev)
+	for (si = 0; si < parts; si++)
+	  {
+	    ops[opi++] = d[si];
+	    ops[opi++] = s[si];
+	  }
+      else
+	for (si = parts - 1; si >= 0; si--)
+	  {
+	    ops[opi++] = d[si];
+	    ops[opi++] = s[si];
+	  }
+      rv = 1;
+    }
+  /* Now emit any moves we may have accumulated.  */
+  if (rv && split_all != 3)
+    {
+      int i;
+      for (i = 2; i < opi; i += 2)
+	emit_move_insn (ops[i], ops[i + 1]);
+    }
+  return rv;
+}
+
+/* The m32c has a number of opcodes that act like memcpy, strcmp, and
+   the like.  For the R8C they expect one of the addresses to be in
+   R1L:An so we need to arrange for that.  Otherwise, it's just a
+   matter of picking out the operands we want and emitting the right
+   pattern for them.  All these expanders, which correspond to
+   patterns in blkmov.md, must return nonzero if they expand the insn,
+   or zero if they should FAIL.  */
+
+/* This is a memset() opcode.  All operands are implied, so we need to
+   arrange for them to be in the right registers.  The opcode wants
+   addresses, not [mem] syntax.  $0 is the destination (MEM:BLK), $1
+   the count (HI), and $2 the value (QI).  */
+int
+m32c_expand_setmemhi(rtx *operands)
+{
+  rtx desta, count, val;
+  rtx desto, counto;
+
+  desta = XEXP (operands[0], 0);
+  count = operands[1];
+  val = operands[2];
+
+  desto = gen_reg_rtx (Pmode);
+  counto = gen_reg_rtx (HImode);
+
+  if (GET_CODE (desta) != REG
+      || REGNO (desta) < FIRST_PSEUDO_REGISTER)
+    desta = copy_to_mode_reg (Pmode, desta);
+
+  /* This looks like an arbitrary restriction, but this is by far the
+     most common case.  For counts 8..14 this actually results in
+     smaller code with no speed penalty because the half-sized
+     constant can be loaded with a shorter opcode.  */
+  if (GET_CODE (count) == CONST_INT
+      && GET_CODE (val) == CONST_INT
+      && ! (INTVAL (count) & 1)
+      && (INTVAL (count) > 1)
+      && (INTVAL (val) <= 7 && INTVAL (val) >= -8))
+    {
+      unsigned v = INTVAL (val) & 0xff;
+      v = v | (v << 8);
+      count = copy_to_mode_reg (HImode, GEN_INT (INTVAL (count) / 2));
+      val = copy_to_mode_reg (HImode, GEN_INT (v));
+      if (TARGET_A16)
+	emit_insn (gen_setmemhi_whi_op (desto, counto, val, desta, count));
+      else
+	emit_insn (gen_setmemhi_wpsi_op (desto, counto, val, desta, count));
+      return 1;
+    }
+
+  /* This is the generalized memset() case.  */
+  if (GET_CODE (val) != REG
+      || REGNO (val) < FIRST_PSEUDO_REGISTER)
+    val = copy_to_mode_reg (QImode, val);
+
+  if (GET_CODE (count) != REG
+      || REGNO (count) < FIRST_PSEUDO_REGISTER)
+    count = copy_to_mode_reg (HImode, count);
+
+  if (TARGET_A16)
+    emit_insn (gen_setmemhi_bhi_op (desto, counto, val, desta, count));
+  else
+    emit_insn (gen_setmemhi_bpsi_op (desto, counto, val, desta, count));
+
+  return 1;
+}
+
+/* This is a memcpy() opcode.  All operands are implied, so we need to
+   arrange for them to be in the right registers.  The opcode wants
+   addresses, not [mem] syntax.  $0 is the destination (MEM:BLK), $1
+   is the source (MEM:BLK), and $2 the count (HI).  */
+int
+m32c_expand_movmemhi(rtx *operands)
+{
+  rtx desta, srca, count;
+  rtx desto, srco, counto;
+
+  desta = XEXP (operands[0], 0);
+  srca = XEXP (operands[1], 0);
+  count = operands[2];
+
+  desto = gen_reg_rtx (Pmode);
+  srco = gen_reg_rtx (Pmode);
+  counto = gen_reg_rtx (HImode);
+
+  if (GET_CODE (desta) != REG
+      || REGNO (desta) < FIRST_PSEUDO_REGISTER)
+    desta = copy_to_mode_reg (Pmode, desta);
+
+  if (GET_CODE (srca) != REG
+      || REGNO (srca) < FIRST_PSEUDO_REGISTER)
+    srca = copy_to_mode_reg (Pmode, srca);
+
+  /* Similar to setmem, but we don't need to check the value.  */
+  if (GET_CODE (count) == CONST_INT
+      && ! (INTVAL (count) & 1)
+      && (INTVAL (count) > 1))
+    {
+      count = copy_to_mode_reg (HImode, GEN_INT (INTVAL (count) / 2));
+      if (TARGET_A16)
+	emit_insn (gen_movmemhi_whi_op (desto, srco, counto, desta, srca, count));
+      else
+	emit_insn (gen_movmemhi_wpsi_op (desto, srco, counto, desta, srca, count));
+      return 1;
+    }
+
+  /* This is the generalized memset() case.  */
+  if (GET_CODE (count) != REG
+      || REGNO (count) < FIRST_PSEUDO_REGISTER)
+    count = copy_to_mode_reg (HImode, count);
+
+  if (TARGET_A16)
+    emit_insn (gen_movmemhi_bhi_op (desto, srco, counto, desta, srca, count));
+  else
+    emit_insn (gen_movmemhi_bpsi_op (desto, srco, counto, desta, srca, count));
+
+  return 1;
+}
+
+/* This is a stpcpy() opcode.  $0 is the destination (MEM:BLK) after
+   the copy, which should point to the NUL at the end of the string,
+   $1 is the destination (MEM:BLK), and $2 is the source (MEM:BLK).
+   Since our opcode leaves the destination pointing *after* the NUL,
+   we must emit an adjustment.  */
+int
+m32c_expand_movstr(rtx *operands)
+{
+  rtx desta, srca;
+  rtx desto, srco;
+
+  desta = XEXP (operands[1], 0);
+  srca = XEXP (operands[2], 0);
+
+  desto = gen_reg_rtx (Pmode);
+  srco = gen_reg_rtx (Pmode);
+
+  if (GET_CODE (desta) != REG
+      || REGNO (desta) < FIRST_PSEUDO_REGISTER)
+    desta = copy_to_mode_reg (Pmode, desta);
+
+  if (GET_CODE (srca) != REG
+      || REGNO (srca) < FIRST_PSEUDO_REGISTER)
+    srca = copy_to_mode_reg (Pmode, srca);
+
+  emit_insn (gen_movstr_op (desto, srco, desta, srca));
+  /* desto ends up being a1, which allows this type of add through MOVA.  */
+  emit_insn (gen_addpsi3 (operands[0], desto, GEN_INT (-1)));
+
+  return 1;
+}
+
+/* This is a strcmp() opcode.  $0 is the destination (HI) which holds
+   <=>0 depending on the comparison, $1 is one string (MEM:BLK), and
+   $2 is the other (MEM:BLK).  We must do the comparison, and then
+   convert the flags to a signed integer result.  */
+int
+m32c_expand_cmpstr(rtx *operands)
+{
+  rtx src1a, src2a;
+
+  src1a = XEXP (operands[1], 0);
+  src2a = XEXP (operands[2], 0);
+
+  if (GET_CODE (src1a) != REG
+      || REGNO (src1a) < FIRST_PSEUDO_REGISTER)
+    src1a = copy_to_mode_reg (Pmode, src1a);
+
+  if (GET_CODE (src2a) != REG
+      || REGNO (src2a) < FIRST_PSEUDO_REGISTER)
+    src2a = copy_to_mode_reg (Pmode, src2a);
+
+  emit_insn (gen_cmpstrhi_op (src1a, src2a, src1a, src2a));
+  emit_insn (gen_cond_to_int (operands[0]));
+
+  return 1;
+}
+
+
+typedef rtx (*shift_gen_func)(rtx, rtx, rtx);
+
+static shift_gen_func
+shift_gen_func_for (int mode, int code)
+{
+#define GFF(m,c,f) if (mode == m && code == c) return f
+  GFF(QImode,  ASHIFT,   gen_ashlqi3_i);
+  GFF(QImode,  ASHIFTRT, gen_ashrqi3_i);
+  GFF(QImode,  LSHIFTRT, gen_lshrqi3_i);
+  GFF(HImode,  ASHIFT,   gen_ashlhi3_i);
+  GFF(HImode,  ASHIFTRT, gen_ashrhi3_i);
+  GFF(HImode,  LSHIFTRT, gen_lshrhi3_i);
+  GFF(PSImode, ASHIFT,   gen_ashlpsi3_i);
+  GFF(PSImode, ASHIFTRT, gen_ashrpsi3_i);
+  GFF(PSImode, LSHIFTRT, gen_lshrpsi3_i);
+  GFF(SImode,  ASHIFT,   TARGET_A16 ? gen_ashlsi3_16 : gen_ashlsi3_24);
+  GFF(SImode,  ASHIFTRT, TARGET_A16 ? gen_ashrsi3_16 : gen_ashrsi3_24);
+  GFF(SImode,  LSHIFTRT, TARGET_A16 ? gen_lshrsi3_16 : gen_lshrsi3_24);
+#undef GFF
+  gcc_unreachable ();
+}
+
+/* The m32c only has one shift, but it takes a signed count.  GCC
+   doesn't want this, so we fake it by negating any shift count when
+   we're pretending to shift the other way.  Also, the shift count is
+   limited to -8..8.  It's slightly better to use two shifts for 9..15
+   than to load the count into r1h, so we do that too.  */
+int
+m32c_prepare_shift (rtx * operands, int scale, int shift_code)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  shift_gen_func func = shift_gen_func_for (mode, shift_code);
+  rtx temp;
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int maxc = TARGET_A24 && (mode == PSImode || mode == SImode) ? 32 : 8;
+      int count = INTVAL (operands[2]) * scale;
+
+      while (count > maxc)
+	{
+	  temp = gen_reg_rtx (mode);
+	  emit_insn (func (temp, operands[1], GEN_INT (maxc)));
+	  operands[1] = temp;
+	  count -= maxc;
+	}
+      while (count < -maxc)
+	{
+	  temp = gen_reg_rtx (mode);
+	  emit_insn (func (temp, operands[1], GEN_INT (-maxc)));
+	  operands[1] = temp;
+	  count += maxc;
+	}
+      emit_insn (func (operands[0], operands[1], GEN_INT (count)));
+      return 1;
+    }
+
+  temp = gen_reg_rtx (QImode);
+  if (scale < 0)
+    /* The pattern has a NEG that corresponds to this. */
+    emit_move_insn (temp, gen_rtx_NEG (QImode, operands[2]));
+  else if (TARGET_A16 && mode == SImode)
+    /* We do this because the code below may modify this, we don't
+       want to modify the origin of this value.  */
+    emit_move_insn (temp, operands[2]);
+  else
+    /* We'll only use it for the shift, no point emitting a move.  */
+    temp = operands[2];
+
+  if (TARGET_A16 && GET_MODE_SIZE (mode) == 4)
+    {
+      /* The m16c has a limit of -16..16 for SI shifts, even when the
+	 shift count is in a register.  Since there are so many targets
+	 of these shifts, it's better to expand the RTL here than to
+	 call a helper function.
+
+	 The resulting code looks something like this:
+
+		cmp.b	r1h,-16
+		jge.b	1f
+		shl.l	-16,dest
+		add.b	r1h,16
+	1f:	cmp.b	r1h,16
+		jle.b	1f
+		shl.l	16,dest
+		sub.b	r1h,16
+	1f:	shl.l	r1h,dest
+
+	 We take advantage of the fact that "negative" shifts are
+	 undefined to skip one of the comparisons.  */
+
+      rtx count;
+      rtx label, insn, tempvar;
+
+      emit_move_insn (operands[0], operands[1]);
+
+      count = temp;
+      label = gen_label_rtx ();
+      LABEL_NUSES (label) ++;
+
+      tempvar = gen_reg_rtx (mode);
+
+      if (shift_code == ASHIFT)
+	{
+	  /* This is a left shift.  We only need check positive counts.  */
+	  emit_jump_insn (gen_cbranchqi4 (gen_rtx_LE (VOIDmode, 0, 0),
+					  count, GEN_INT (16), label));
+	  emit_insn (func (tempvar, operands[0], GEN_INT (8)));
+	  emit_insn (func (operands[0], tempvar, GEN_INT (8)));
+	  insn = emit_insn (gen_addqi3 (count, count, GEN_INT (-16)));
+	  emit_label_after (label, insn);
+	}
+      else
+	{
+	  /* This is a right shift.  We only need check negative counts.  */
+	  emit_jump_insn (gen_cbranchqi4 (gen_rtx_GE (VOIDmode, 0, 0),
+					  count, GEN_INT (-16), label));
+	  emit_insn (func (tempvar, operands[0], GEN_INT (-8)));
+	  emit_insn (func (operands[0], tempvar, GEN_INT (-8)));
+	  insn = emit_insn (gen_addqi3 (count, count, GEN_INT (16)));
+	  emit_label_after (label, insn);
+	}
+      operands[1] = operands[0];
+      emit_insn (func (operands[0], operands[0], count));
+      return 1;
+    }
+
+  operands[2] = temp;
+  return 0;
+}
+
+/* The m32c has a limited range of operations that work on PSImode
+   values; we have to expand to SI, do the math, and truncate back to
+   PSI.  Yes, this is expensive, but hopefully gcc will learn to avoid
+   those cases.  */
+void
+m32c_expand_neg_mulpsi3 (rtx * operands)
+{
+  /* operands: a = b * i */
+  rtx temp1; /* b as SI */
+  rtx scale /* i as SI */;
+  rtx temp2; /* a*b as SI */
+
+  temp1 = gen_reg_rtx (SImode);
+  temp2 = gen_reg_rtx (SImode);
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      scale = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extendpsisi2 (scale, operands[2]));
+    }
+  else
+    scale = copy_to_mode_reg (SImode, operands[2]);
+
+  emit_insn (gen_zero_extendpsisi2 (temp1, operands[1]));
+  temp2 = expand_simple_binop (SImode, MULT, temp1, scale, temp2, 1, OPTAB_LIB);
+  emit_insn (gen_truncsipsi2 (operands[0], temp2));
+}
+
+/* Pattern Output Functions */
+
+int
+m32c_expand_movcc (rtx *operands)
+{
+  rtx rel = operands[1];
+
+  if (GET_CODE (rel) != EQ && GET_CODE (rel) != NE)
+    return 1;
+  if (GET_CODE (operands[2]) != CONST_INT
+      || GET_CODE (operands[3]) != CONST_INT)
+    return 1;
+  if (GET_CODE (rel) == NE)
+    {
+      rtx tmp = operands[2];
+      operands[2] = operands[3];
+      operands[3] = tmp;
+      rel = gen_rtx_EQ (GET_MODE (rel), XEXP (rel, 0), XEXP (rel, 1));
+    }
+
+  emit_move_insn (operands[0],
+		  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
+					rel,
+					operands[2],
+					operands[3]));
+  return 0;
+}
+
+/* Used for the "insv" pattern.  Return nonzero to fail, else done.  */
+int
+m32c_expand_insv (rtx *operands)
+{
+  rtx op0, src0, p;
+  int mask;
+
+  if (INTVAL (operands[1]) != 1)
+    return 1;
+
+  /* Our insv opcode (bset, bclr) can only insert a one-bit constant.  */
+  if (GET_CODE (operands[3]) != CONST_INT)
+    return 1;
+  if (INTVAL (operands[3]) != 0
+      && INTVAL (operands[3]) != 1
+      && INTVAL (operands[3]) != -1)
+    return 1;
+
+  mask = 1 << INTVAL (operands[2]);
+
+  op0 = operands[0];
+  if (GET_CODE (op0) == SUBREG
+      && SUBREG_BYTE (op0) == 0)
+    {
+      rtx sub = SUBREG_REG (op0);
+      if (GET_MODE (sub) == HImode || GET_MODE (sub) == QImode)
+	op0 = sub;
+    }
+
+  if (!can_create_pseudo_p ()
+      || (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0)))
+    src0 = op0;
+  else
+    {
+      src0 = gen_reg_rtx (GET_MODE (op0));
+      emit_move_insn (src0, op0);
+    }
+
+  if (GET_MODE (op0) == HImode
+      && INTVAL (operands[2]) >= 8
+      && GET_CODE (op0) == MEM)
+    {
+      /* We are little endian.  */
+      rtx new_mem = gen_rtx_MEM (QImode, plus_constant (XEXP (op0, 0), 1));
+      MEM_COPY_ATTRIBUTES (new_mem, op0);
+      mask >>= 8;
+    }
+
+  /* First, we generate a mask with the correct polarity.  If we are
+     storing a zero, we want an AND mask, so invert it.  */
+  if (INTVAL (operands[3]) == 0)
+    {
+      /* Storing a zero, use an AND mask */
+      if (GET_MODE (op0) == HImode)
+	mask ^= 0xffff;
+      else
+	mask ^= 0xff;
+    }
+  /* Now we need to properly sign-extend the mask in case we need to
+     fall back to an AND or OR opcode.  */
+  if (GET_MODE (op0) == HImode)
+    {
+      if (mask & 0x8000)
+	mask -= 0x10000;
+    }
+  else
+    {
+      if (mask & 0x80)
+	mask -= 0x100;
+    }
+
+  switch (  (INTVAL (operands[3]) ? 4 : 0)
+	  + ((GET_MODE (op0) == HImode) ? 2 : 0)
+	  + (TARGET_A24 ? 1 : 0))
+    {
+    case 0: p = gen_andqi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 1: p = gen_andqi3_24 (op0, src0, GEN_INT (mask)); break;
+    case 2: p = gen_andhi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 3: p = gen_andhi3_24 (op0, src0, GEN_INT (mask)); break;
+    case 4: p = gen_iorqi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 5: p = gen_iorqi3_24 (op0, src0, GEN_INT (mask)); break;
+    case 6: p = gen_iorhi3_16 (op0, src0, GEN_INT (mask)); break;
+    case 7: p = gen_iorhi3_24 (op0, src0, GEN_INT (mask)); break;
+    default: p = NULL_RTX; break; /* Not reached, but silences a warning.  */
+    }
+
+  emit_insn (p);
+  return 0;
+}
+
+const char *
+m32c_scc_pattern(rtx *operands, RTX_CODE code)
+{
+  static char buf[30];
+  if (GET_CODE (operands[0]) == REG
+      && REGNO (operands[0]) == R0_REGNO)
+    {
+      if (code == EQ)
+	return "stzx\t#1,#0,r0l";
+      if (code == NE)
+	return "stzx\t#0,#1,r0l";
+    }
+  sprintf(buf, "bm%s\t0,%%h0\n\tand.b\t#1,%%0", GET_RTX_NAME (code));
+  return buf;
+}
+
+/* Encode symbol attributes of a SYMBOL_REF into its
+   SYMBOL_REF_FLAGS. */
+static void
+m32c_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int extra_flags = 0;
+
+  default_encode_section_info (decl, rtl, first);
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && m32c_special_page_vector_p (decl))
+
+    extra_flags = SYMBOL_FLAG_FUNCVEC_FUNCTION;
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= extra_flags;
+}
+
+/* Returns TRUE if the current function is a leaf, and thus we can
+   determine which registers an interrupt function really needs to
+   save.  The logic below is mostly about finding the insn sequence
+   that's the function, versus any sequence that might be open for the
+   current insn.  */
+static int
+m32c_leaf_function_p (void)
+{
+  rtx saved_first, saved_last;
+  struct sequence_stack *seq;
+  int rv;
+
+  saved_first = crtl->emit.x_first_insn;
+  saved_last = crtl->emit.x_last_insn;
+  for (seq = crtl->emit.sequence_stack; seq && seq->next; seq = seq->next)
+    ;
+  if (seq)
+    {
+      crtl->emit.x_first_insn = seq->first;
+      crtl->emit.x_last_insn = seq->last;
+    }
+
+  rv = leaf_function_p ();
+
+  crtl->emit.x_first_insn = saved_first;
+  crtl->emit.x_last_insn = saved_last;
+  return rv;
+}
+
+/* Returns TRUE if the current function needs to use the ENTER/EXIT
+   opcodes.  If the function doesn't need the frame base or stack
+   pointer, it can use the simpler RTS opcode.  */
+static bool
+m32c_function_needs_enter (void)
+{
+  rtx insn;
+  struct sequence_stack *seq;
+  rtx sp = gen_rtx_REG (Pmode, SP_REGNO);
+  rtx fb = gen_rtx_REG (Pmode, FB_REGNO);
+
+  insn = get_insns ();
+  for (seq = crtl->emit.sequence_stack;
+       seq;
+       insn = seq->first, seq = seq->next);
+
+  while (insn)
+    {
+      if (reg_mentioned_p (sp, insn))
+	return true;
+      if (reg_mentioned_p (fb, insn))
+	return true;
+      insn = NEXT_INSN (insn);
+    }
+  return false;
+}
+
+/* Mark all the subexpressions of the PARALLEL rtx PAR as
+   frame-related.  Return PAR.
+
+   dwarf2out.c:dwarf2out_frame_debug_expr ignores sub-expressions of a
+   PARALLEL rtx other than the first if they do not have the
+   FRAME_RELATED flag set on them.  So this function is handy for
+   marking up 'enter' instructions.  */
+static rtx
+m32c_all_frame_related (rtx par)
+{
+  int len = XVECLEN (par, 0);
+  int i;
+
+  for (i = 0; i < len; i++)
+    F (XVECEXP (par, 0, i));
+
+  return par;
+}
+
+/* Emits the prologue.  See the frame layout comment earlier in this
+   file.  We can reserve up to 256 bytes with the ENTER opcode, beyond
+   that we manually update sp.  */
+void
+m32c_emit_prologue (void)
+{
+  int frame_size, extra_frame_size = 0, reg_save_size;
+  int complex_prologue = 0;
+
+  cfun->machine->is_leaf = m32c_leaf_function_p ();
+  if (interrupt_p (cfun->decl))
+    {
+      cfun->machine->is_interrupt = 1;
+      complex_prologue = 1;
+    }
+  else if (bank_switch_p (cfun->decl))
+    warning (OPT_Wattributes,
+	     "%<bank_switch%> has no effect on non-interrupt functions");
+
+  reg_save_size = m32c_pushm_popm (PP_justcount);
+
+  if (interrupt_p (cfun->decl))
+    {
+      if (bank_switch_p (cfun->decl))
+	emit_insn (gen_fset_b ());
+      else if (cfun->machine->intr_pushm)
+	emit_insn (gen_pushm (GEN_INT (cfun->machine->intr_pushm)));
+    }
+
+  frame_size =
+    m32c_initial_elimination_offset (FB_REGNO, SP_REGNO) - reg_save_size;
+  if (frame_size == 0
+      && !m32c_function_needs_enter ())
+    cfun->machine->use_rts = 1;
+
+  if (frame_size > 254)
+    {
+      extra_frame_size = frame_size - 254;
+      frame_size = 254;
+    }
+  if (cfun->machine->use_rts == 0)
+    F (emit_insn (m32c_all_frame_related
+		  (TARGET_A16
+		   ? gen_prologue_enter_16 (GEN_INT (frame_size + 2))
+		   : gen_prologue_enter_24 (GEN_INT (frame_size + 4)))));
+
+  if (extra_frame_size)
+    {
+      complex_prologue = 1;
+      if (TARGET_A16)
+	F (emit_insn (gen_addhi3 (gen_rtx_REG (HImode, SP_REGNO),
+				  gen_rtx_REG (HImode, SP_REGNO),
+				  GEN_INT (-extra_frame_size))));
+      else
+	F (emit_insn (gen_addpsi3 (gen_rtx_REG (PSImode, SP_REGNO),
+				   gen_rtx_REG (PSImode, SP_REGNO),
+				   GEN_INT (-extra_frame_size))));
+    }
+
+  complex_prologue += m32c_pushm_popm (PP_pushm);
+
+  /* This just emits a comment into the .s file for debugging.  */
+  if (complex_prologue)
+    emit_insn (gen_prologue_end ());
+}
+
+/* Likewise, for the epilogue.  The only exception is that, for
+   interrupts, we must manually unwind the frame as the REIT opcode
+   doesn't do that.  */
+void
+m32c_emit_epilogue (void)
+{
+  /* This just emits a comment into the .s file for debugging.  */
+  if (m32c_pushm_popm (PP_justcount) > 0 || cfun->machine->is_interrupt)
+    emit_insn (gen_epilogue_start ());
+
+  m32c_pushm_popm (PP_popm);
+
+  if (cfun->machine->is_interrupt)
+    {
+      enum machine_mode spmode = TARGET_A16 ? HImode : PSImode;
+
+      /* REIT clears B flag and restores $fp for us, but we still
+	 have to fix up the stack.  USE_RTS just means we didn't
+	 emit ENTER.  */
+      if (!cfun->machine->use_rts)
+	{
+	  emit_move_insn (gen_rtx_REG (spmode, A0_REGNO),
+			  gen_rtx_REG (spmode, FP_REGNO));
+	  emit_move_insn (gen_rtx_REG (spmode, SP_REGNO),
+			  gen_rtx_REG (spmode, A0_REGNO));
+	  /* We can't just add this to the POPM because it would be in
+	     the wrong order, and wouldn't fix the stack if we're bank
+	     switching.  */
+	  if (TARGET_A16)
+	    emit_insn (gen_pophi_16 (gen_rtx_REG (HImode, FP_REGNO)));
+	  else
+	    emit_insn (gen_poppsi (gen_rtx_REG (PSImode, FP_REGNO)));
+	}
+      if (!bank_switch_p (cfun->decl) && cfun->machine->intr_pushm)
+	emit_insn (gen_popm (GEN_INT (cfun->machine->intr_pushm)));
+
+      /* The FREIT (Fast REturn from InTerrupt) instruction should be
+         generated only for M32C/M32CM targets (generate the REIT
+         instruction otherwise).  */
+      if (fast_interrupt_p (cfun->decl))
+        {
+          /* Check if fast_attribute is set for M32C or M32CM.  */
+          if (TARGET_A24)
+            {
+              emit_jump_insn (gen_epilogue_freit ());
+            }
+          /* If fast_interrupt attribute is set for an R8C or M16C
+             target ignore this attribute and generated REIT
+             instruction.  */
+          else
+	    {
+	      warning (OPT_Wattributes,
+		       "%<fast_interrupt%> attribute directive ignored");
+	      emit_jump_insn (gen_epilogue_reit_16 ());
+	    }
+        }
+      else if (TARGET_A16)
+	emit_jump_insn (gen_epilogue_reit_16 ());
+      else
+	emit_jump_insn (gen_epilogue_reit_24 ());
+    }
+  else if (cfun->machine->use_rts)
+    emit_jump_insn (gen_epilogue_rts ());
+  else if (TARGET_A16)
+    emit_jump_insn (gen_epilogue_exitd_16 ());
+  else
+    emit_jump_insn (gen_epilogue_exitd_24 ());
+  emit_barrier ();
+}
+
+void
+m32c_emit_eh_epilogue (rtx ret_addr)
+{
+  /* R0[R2] has the stack adjustment.  R1[R3] has the address to
+     return to.  We have to fudge the stack, pop everything, pop SP
+     (fudged), and return (fudged).  This is actually easier to do in
+     assembler, so punt to libgcc.  */
+  emit_jump_insn (gen_eh_epilogue (ret_addr, cfun->machine->eh_stack_adjust));
+  /*  emit_clobber (gen_rtx_REG (HImode, R0L_REGNO)); */
+  emit_barrier ();
+}
+
+/* Indicate which flags must be properly set for a given conditional.  */
+static int
+flags_needed_for_conditional (rtx cond)
+{
+  switch (GET_CODE (cond))
+    {
+    case LE:
+    case GT:
+      return FLAGS_OSZ;
+    case LEU:
+    case GTU:
+      return FLAGS_ZC;
+    case LT:
+    case GE:
+      return FLAGS_OS;
+    case LTU:
+    case GEU:
+      return FLAGS_C;
+    case EQ:
+    case NE:
+      return FLAGS_Z;
+    default:
+      return FLAGS_N;
+    }
+}
+
+#define DEBUG_CMP 0
+
+/* Returns true if a compare insn is redundant because it would only
+   set flags that are already set correctly.  */
+static bool
+m32c_compare_redundant (rtx cmp, rtx *operands)
+{
+  int flags_needed;
+  int pflags;
+  rtx prev, pp, next;
+  rtx op0, op1;
+#if DEBUG_CMP
+  int prev_icode, i;
+#endif
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+#if DEBUG_CMP
+  fprintf(stderr, "\n\033[32mm32c_compare_redundant\033[0m\n");
+  debug_rtx(cmp);
+  for (i=0; i<2; i++)
+    {
+      fprintf(stderr, "operands[%d] = ", i);
+      debug_rtx(operands[i]);
+    }
+#endif
+
+  next = next_nonnote_insn (cmp);
+  if (!next || !INSN_P (next))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "compare not followed by insn\n");
+      debug_rtx(next);
+#endif
+      return false;
+    }
+  if (GET_CODE (PATTERN (next)) == SET
+      && GET_CODE (XEXP ( PATTERN (next), 1)) == IF_THEN_ELSE)
+    {
+      next = XEXP (XEXP (PATTERN (next), 1), 0);
+    }
+  else if (GET_CODE (PATTERN (next)) == SET)
+    {
+      /* If this is a conditional, flags_needed will be something
+	 other than FLAGS_N, which we test below.  */
+      next = XEXP (PATTERN (next), 1);
+    }
+  else
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "compare not followed by conditional\n");
+      debug_rtx(next);
+#endif
+      return false;
+    }
+#if DEBUG_CMP
+  fprintf(stderr, "conditional is: ");
+  debug_rtx(next);
+#endif
+
+  flags_needed = flags_needed_for_conditional (next);
+  if (flags_needed == FLAGS_N)
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "compare not followed by conditional\n");
+      debug_rtx(next);
+#endif
+      return false;
+    }
+
+  /* Compare doesn't set overflow and carry the same way that
+     arithmetic instructions do, so we can't replace those.  */
+  if (flags_needed & FLAGS_OC)
+    return false;
+
+  prev = cmp;
+  do {
+    prev = prev_nonnote_insn (prev);
+    if (!prev)
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "No previous insn.\n");
+#endif
+	return false;
+      }
+    if (!INSN_P (prev))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "Previous insn is a non-insn.\n");
+#endif
+	return false;
+      }
+    pp = PATTERN (prev);
+    if (GET_CODE (pp) != SET)
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "Previous insn is not a SET.\n");
+#endif
+	return false;
+      }
+    pflags = get_attr_flags (prev);
+
+    /* Looking up attributes of previous insns corrupted the recog
+       tables.  */
+    INSN_UID (cmp) = -1;
+    recog (PATTERN (cmp), cmp, 0);
+
+    if (pflags == FLAGS_N
+	&& reg_mentioned_p (op0, pp))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "intermediate non-flags insn uses op:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+
+    /* Check for comparisons against memory - between volatiles and
+       aliases, we just can't risk this one.  */
+    if (GET_CODE (operands[0]) == MEM
+	|| GET_CODE (operands[0]) == MEM)
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "comparisons with memory:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+
+    /* Check for PREV changing a register that's used to compute a
+       value in CMP, even if it doesn't otherwise change flags.  */
+    if (GET_CODE (operands[0]) == REG
+	&& rtx_referenced_p (SET_DEST (PATTERN (prev)), operands[0]))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "sub-value affected, op0:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+    if (GET_CODE (operands[1]) == REG
+	&& rtx_referenced_p (SET_DEST (PATTERN (prev)), operands[1]))
+      {
+#if DEBUG_CMP
+	fprintf(stderr, "sub-value affected, op1:\n");
+	debug_rtx(prev);
+#endif
+	return false;
+      }
+
+  } while (pflags == FLAGS_N);
+#if DEBUG_CMP
+  fprintf(stderr, "previous flag-setting insn:\n");
+  debug_rtx(prev);
+  debug_rtx(pp);
+#endif
+
+  if (GET_CODE (pp) == SET
+      && GET_CODE (XEXP (pp, 0)) == REG
+      && REGNO (XEXP (pp, 0)) == FLG_REGNO
+      && GET_CODE (XEXP (pp, 1)) == COMPARE)
+    {
+      /* Adjacent cbranches must have the same operands to be
+	 redundant.  */
+      rtx pop0 = XEXP (XEXP (pp, 1), 0);
+      rtx pop1 = XEXP (XEXP (pp, 1), 1);
+#if DEBUG_CMP
+      fprintf(stderr, "adjacent cbranches\n");
+      debug_rtx(pop0);
+      debug_rtx(pop1);
+#endif
+      if (rtx_equal_p (op0, pop0)
+	  && rtx_equal_p (op1, pop1))
+	return true;
+#if DEBUG_CMP
+      fprintf(stderr, "prev cmp not same\n");
+#endif
+      return false;
+    }
+
+  /* Else the previous insn must be a SET, with either the source or
+     dest equal to operands[0], and operands[1] must be zero.  */
+
+  if (!rtx_equal_p (op1, const0_rtx))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "operands[1] not const0_rtx\n");
+#endif
+      return false;
+    }
+  if (GET_CODE (pp) != SET)
+    {
+#if DEBUG_CMP
+      fprintf (stderr, "pp not set\n");
+#endif
+      return false;
+    }
+  if (!rtx_equal_p (op0, SET_SRC (pp))
+      && !rtx_equal_p (op0, SET_DEST (pp)))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "operands[0] not found in set\n");
+#endif
+      return false;
+    }
+
+#if DEBUG_CMP
+  fprintf(stderr, "cmp flags %x prev flags %x\n", flags_needed, pflags);
+#endif
+  if ((pflags & flags_needed) == flags_needed)
+    return true;
+
+  return false;
+}
+
+/* Return the pattern for a compare.  This will be commented out if
+   the compare is redundant, else a normal pattern is returned.  Thus,
+   the assembler output says where the compare would have been.  */
+char *
+m32c_output_compare (rtx insn, rtx *operands)
+{
+  static char templ[] = ";cmp.b\t%1,%0";
+  /*                             ^ 5  */
+
+  templ[5] = " bwll"[GET_MODE_SIZE(GET_MODE(operands[0]))];
+  if (m32c_compare_redundant (insn, operands))
+    {
+#if DEBUG_CMP
+      fprintf(stderr, "cbranch: cmp not needed\n");
+#endif
+      return templ;
+    }
+
+#if DEBUG_CMP
+  fprintf(stderr, "cbranch: cmp needed: `%s'\n", templ + 1);
+#endif
+  return templ + 1;
+}
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO m32c_encode_section_info
+
+/* If the frame pointer isn't used, we detect it manually.  But the
+   stack pointer doesn't have as flexible addressing as the frame
+   pointer, so we always assume we have it.  */
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED hook_bool_void_true
+
+/* The Global `targetm' Variable. */
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-m32c.h"
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
new file mode 100644
index 000000000..9cabeedb8
--- /dev/null
+++ b/gcc/config/m32c/m32c.h
@@ -0,0 +1,688 @@
+/* Target Definitions for R8C/M16C/M32C
+   Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_M32C_H
+#define GCC_M32C_H
+
+/* Controlling the Compilation Driver, `gcc'.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/* There are four CPU series we support, but they basically break down
+   into two families - the R8C/M16C families, with 16-bit address
+   registers and one set of opcodes, and the M32CM/M32C group, with
+   24-bit address registers and a different set of opcodes.  The
+   assembler doesn't care except for which opcode set is needed; the
+   big difference is in the memory maps, which we cover in
+   LIB_SPEC.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mcpu=r8c:--m16c} \
+%{mcpu=m16c:--m16c} \
+%{mcpu=m32cm:--m32c} \
+%{mcpu=m32c:--m32c} "
+
+/* The default is R8C hardware.  We support a simulator, which has its
+   own libgloss and link map, plus one default link map for each chip
+   family.  Most of the logic here is making sure we do the right
+   thing when no CPU is specified, which defaults to R8C.  */
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim:-lsim}%{!msim:-lnosys} -) \
+%{msim:%{!T*: %{mcpu=m32cm:%Tsim24.ld}%{mcpu=m32c:%Tsim24.ld} \
+       %{!mcpu=m32cm:%{!mcpu=m32c:%Tsim16.ld}}}} \
+%{!T*:%{!msim: %{mcpu=m16c:%Tm16c.ld} \
+	       %{mcpu=m32cm:%Tm32cm.ld} \
+	       %{mcpu=m32c:%Tm32c.ld} \
+	       %{!mcpu=m16c:%{!mcpu=m32cm:%{!mcpu=m32c:%Tr8c.ld}}}}} \
+"
+
+/* Run-time Target Specification */
+
+/* Nothing unusual here.  */
+#define TARGET_CPU_CPP_BUILTINS() \
+  { \
+    builtin_assert ("cpu=m32c"); \
+    builtin_assert ("machine=m32c"); \
+    builtin_define ("__m32c__=1"); \
+    if (TARGET_R8C) \
+      builtin_define ("__r8c_cpu__=1"); \
+    if (TARGET_M16C) \
+      builtin_define ("__m16c_cpu__=1"); \
+    if (TARGET_M32CM) \
+      builtin_define ("__m32cm_cpu__=1"); \
+    if (TARGET_M32C) \
+      builtin_define ("__m32c_cpu__=1"); \
+  }
+
+/* The pragma handlers need to know if we've started processing
+   functions yet, as the memregs pragma should only be given at the
+   beginning of the file.  This variable starts off TRUE and later
+   becomes FALSE.  */
+extern int ok_to_change_target_memregs;
+extern int target_memregs;
+
+/* TARGET_CPU is a multi-way option set in m32c.opt.  While we could
+   use enums or defines for this, this and m32c.opt are the only
+   places that know (or care) what values are being used.  */
+#define TARGET_R8C	(target_cpu == 'r')
+#define TARGET_M16C	(target_cpu == '6')
+#define TARGET_M32CM	(target_cpu == 'm')
+#define TARGET_M32C	(target_cpu == '3')
+
+/* Address register sizes.  Warning: these are used all over the place
+   to select between the two CPU families in general.  */
+#define TARGET_A16	(TARGET_R8C || TARGET_M16C)
+#define TARGET_A24	(TARGET_M32CM || TARGET_M32C)
+
+#define TARGET_VERSION fprintf (stderr, " (m32c)");
+
+/* Defining data structures for per-function information */
+
+typedef struct GTY (()) machine_function
+{
+  /* How much we adjust the stack when returning from an exception
+     handler.  */
+  rtx eh_stack_adjust;
+
+  /* TRUE if the current function is an interrupt handler.  */
+  int is_interrupt;
+
+  /* TRUE if the current function is a leaf function.  Currently, this
+     only affects saving $a0 in interrupt functions.  */
+  int is_leaf;
+
+  /* Bitmask that keeps track of which registers are used in an
+     interrupt function, so we know which ones need to be saved and
+     restored.  */
+  int intr_pushm;
+  /* Likewise, one element for each memreg that needs to be saved.  */
+  char intr_pushmem[16];
+
+  /* TRUE if the current function can use a simple RTS to return, instead
+     of the longer ENTER/EXIT pair.  */
+  int use_rts;
+}
+machine_function;
+
+#define INIT_EXPANDERS m32c_init_expanders ()
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+/* We can do QI, HI, and SI operations pretty much equally well, but
+   GCC expects us to have a "native" format, so we pick the one that
+   matches "int".  Pointers are 16 bits for R8C/M16C (when TARGET_A16
+   is true) and 24 bits for M32CM/M32C (when TARGET_A24 is true), but
+   24-bit pointers are stored in 32-bit words.  */
+#define BITS_PER_UNIT 8
+#define UNITS_PER_WORD 2
+#define POINTER_SIZE (TARGET_A16 ? 16 : 32)
+#define POINTERS_EXTEND_UNSIGNED 1
+/* We have a problem with libgcc2.  It only defines two versions of
+   each function, one for "int" and one for "long long".  Ie it assumes
+   that "sizeof (int) == sizeof (long)".  For the M32C this is not true
+   and we need a third set of functions.  We explicitly define
+   LIBGCC2_UNITS_PER_WORD here so that it is clear that we are expecting
+   to get the SI and DI versions from the libgcc2.c sources, and we
+   provide our own set of HI functions in m32c-lib2.c, which is why this
+   definition is surrounded by #ifndef..#endif.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 4
+#endif
+
+/* These match the alignment enforced by the two types of stack operations.  */
+#define PARM_BOUNDARY (TARGET_A16 ? 8 : 16)
+#define STACK_BOUNDARY (TARGET_A16 ? 8 : 16)
+
+/* We do this because we care more about space than about speed.  For
+   the chips with 16-bit busses, we could set these to 16 if
+   desired.  */
+#define FUNCTION_BOUNDARY 8
+#define BIGGEST_ALIGNMENT 8
+
+/* Since we have a maximum structure alignment of 8 there
+   is no need to enforce any alignment of bitfield types.  */
+#undef  PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 0
+
+#define STRICT_ALIGNMENT 0
+#define SLOW_BYTE_ACCESS 1
+
+/* Layout of Source Language Data Types */
+
+#define INT_TYPE_SIZE 16
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_A16 ? "int" : "long int")
+
+#undef UINTPTR_TYPE
+#define UINTPTR_TYPE (TARGET_A16 ? "unsigned int" : "long unsigned int")
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* REGISTER USAGE */
+
+/* Register Basics */
+
+/* Register layout:
+
+        [r0h][r0l]  $r0  (16 bits, or two 8-bit halves)
+        [--------]  $r2  (16 bits)
+        [r1h][r1l]  $r1  (16 bits, or two 8-bit halves)
+        [--------]  $r3  (16 bits)
+   [---][--------]  $a0  (might be 24 bits)
+   [---][--------]  $a1  (might be 24 bits)
+   [---][--------]  $sb  (might be 24 bits)
+   [---][--------]  $fb  (might be 24 bits)
+   [---][--------]  $sp  (might be 24 bits)
+   [-------------]  $pc  (20 or 24 bits)
+             [---]  $flg (CPU flags)
+   [---][--------]  $argp (virtual)
+        [--------]  $mem0 (all 16 bits)
+          . . .
+        [--------]  $mem14
+*/
+
+#define FIRST_PSEUDO_REGISTER   20
+
+/* Note that these two tables are modified based on which CPU family
+   you select; see m32c_conditional_register_usage for details.  */
+
+/* r0 r2 r1 r3 - a0 a1 sb fb - sp pc flg argp - mem0..mem14 */
+#define FIXED_REGISTERS     { 0, 0, 0, 0, \
+			      0, 0, 1, 0, \
+			      1, 1, 0, 1, \
+			      0, 0, 0, 0, 0, 0, 0, 0 }
+#define CALL_USED_REGISTERS { 1, 1, 1, 1, \
+			      1, 1, 1, 0, \
+			      1, 1, 1, 1, \
+			      1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* The *_REGNO theme matches m32c.md and most register number
+   arguments; the PC_REGNUM is the odd one out.  */
+#ifndef PC_REGNO
+#define PC_REGNO 9
+#endif
+#define PC_REGNUM PC_REGNO
+
+/* Order of Allocation of Registers */
+
+#define REG_ALLOC_ORDER { \
+	0, 1, 2, 3, 4, 5, /* r0..r3, a0, a1 */ \
+        12, 13, 14, 15, 16, 17, 18, 19, /* mem0..mem7 */	\
+	6, 7, 8, 9, 10, 11 /* sb, fb, sp, pc, flg, ap */ }
+
+/* How Values Fit in Registers */
+
+#define HARD_REGNO_NREGS(R,M) m32c_hard_regno_nregs (R, M)
+#define HARD_REGNO_MODE_OK(R,M) m32c_hard_regno_ok (R, M)
+#define MODES_TIEABLE_P(M1,M2) m32c_modes_tieable_p (M1, M2)
+#define AVOID_CCMODE_COPIES
+
+/* Register Classes */
+
+/* Most registers are special purpose in some form or another, so this
+   table is pretty big.  Class names are used for constraints also;
+   for example the HL_REGS class (HL below) is "Rhl" in the md files.
+   See m32c_reg_class_from_constraint for the mapping.  There's some
+   duplication so that we can better isolate the reason for using
+   constraints in the md files from the actual registers used; for
+   example we may want to exclude a1a0 from SI_REGS in the future,
+   without precluding their use as HImode registers.  */
+
+/* m7654 - m3210 - argp flg pc sp - fb sb a1 a0 - r3 r1 r2 r0 */
+/*       mmPAR */
+#define REG_CLASS_CONTENTS \
+{ { 0x00000000 }, /* NO */\
+  { 0x00000100 }, /* SP  - sp */\
+  { 0x00000080 }, /* FB  - fb */\
+  { 0x00000040 }, /* SB  - sb */\
+  { 0x000001c0 }, /* CR  - sb fb sp */\
+  { 0x00000001 }, /* R0  - r0 */\
+  { 0x00000004 }, /* R1  - r1 */\
+  { 0x00000002 }, /* R2  - r2 */\
+  { 0x00000008 }, /* R3  - r3 */\
+  { 0x00000003 }, /* R02 - r0r2 */\
+  { 0x0000000c }, /* R13 - r1r3 */\
+  { 0x00000005 }, /* HL  - r0 r1 */\
+  { 0x00000005 }, /* QI  - r0 r1 */\
+  { 0x0000000a }, /* R23 - r2 r3 */\
+  { 0x0000000f }, /* R03 - r0r2 r1r3 */\
+  { 0x0000000f }, /* DI  - r0r2r1r3 + mems */\
+  { 0x00000010 }, /* A0  - a0 */\
+  { 0x00000020 }, /* A1  - a1 */\
+  { 0x00000030 }, /* A   - a0 a1 */\
+  { 0x000000f0 }, /* AD  - a0 a1 sb fp */\
+  { 0x000001f0 }, /* PS  - a0 a1 sb fp sp */\
+  { 0x0000000f }, /* SI  - r0r2 r1r3 a0a1 */\
+  { 0x0000003f }, /* HI  - r0 r1 r2 r3 a0 a1 */\
+  { 0x00000033 }, /* R02A  - r0r2 a0 a1 */ \
+  { 0x0000003f }, /* RA  - r0..r3 a0 a1 */\
+  { 0x0000007f }, /* GENERAL */\
+  { 0x00000400 }, /* FLG */\
+  { 0x000001ff }, /* HC  - r0l r1 r2 r3 a0 a1 sb fb sp */\
+  { 0x000ff000 }, /* MEM */\
+  { 0x000ff003 }, /* R02_A_MEM */\
+  { 0x000ff005 }, /* A_HL_MEM */\
+  { 0x000ff00c }, /* R1_R3_A_MEM */\
+  { 0x000ff00f }, /* R03_MEM */\
+  { 0x000ff03f }, /* A_HI_MEM */\
+  { 0x000ff0ff }, /* A_AD_CR_MEM_SI */\
+  { 0x000ff1ff }, /* ALL */\
+}
+
+enum reg_class
+{
+  NO_REGS,
+  SP_REGS,
+  FB_REGS,
+  SB_REGS,
+  CR_REGS,
+  R0_REGS,
+  R1_REGS,
+  R2_REGS,
+  R3_REGS,
+  R02_REGS,
+  R13_REGS,
+  HL_REGS,
+  QI_REGS,
+  R23_REGS,
+  R03_REGS,
+  DI_REGS,
+  A0_REGS,
+  A1_REGS,
+  A_REGS,
+  AD_REGS,
+  PS_REGS,
+  SI_REGS,
+  HI_REGS,
+  R02A_REGS,
+  RA_REGS,
+  GENERAL_REGS,
+  FLG_REGS,
+  HC_REGS,
+  MEM_REGS,
+  R02_A_MEM_REGS,
+  A_HL_MEM_REGS,
+  R1_R3_A_MEM_REGS,
+  R03_MEM_REGS,
+  A_HI_MEM_REGS,
+  A_AD_CR_MEM_SI_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {\
+"NO_REGS", \
+"SP_REGS", \
+"FB_REGS", \
+"SB_REGS", \
+"CR_REGS", \
+"R0_REGS", \
+"R1_REGS", \
+"R2_REGS", \
+"R3_REGS", \
+"R02_REGS", \
+"R13_REGS", \
+"HL_REGS", \
+"QI_REGS", \
+"R23_REGS", \
+"R03_REGS", \
+"DI_REGS", \
+"A0_REGS", \
+"A1_REGS", \
+"A_REGS", \
+"AD_REGS", \
+"PS_REGS", \
+"SI_REGS", \
+"HI_REGS", \
+"R02A_REGS", \
+"RA_REGS", \
+"GENERAL_REGS", \
+"FLG_REGS", \
+"HC_REGS", \
+"MEM_REGS", \
+"R02_A_MEM_REGS", \
+"A_HL_MEM_REGS", \
+"R1_R3_A_MEM_REGS", \
+"R03_MEM_REGS", \
+"A_HI_MEM_REGS", \
+"A_AD_CR_MEM_SI_REGS", \
+"ALL_REGS", \
+}
+
+#define REGNO_REG_CLASS(R) m32c_regno_reg_class (R)
+
+/* We support simple displacements off address registers, nothing else.  */
+#define BASE_REG_CLASS A_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+/* We primarily use the new "long" constraint names, with the initial
+   letter classifying the constraint type and following letters
+   specifying which.  The types are:
+
+   I - integer values
+   R - register classes
+   S - memory references (M was used)
+   A - addresses (currently unused)
+*/
+
+#define CONSTRAINT_LEN(CHAR,STR) \
+	((CHAR) == 'I' ? 3 \
+	 : (CHAR) == 'R' ? 3 \
+	 : (CHAR) == 'S' ? 2 \
+	 : (CHAR) == 'A' ? 2 \
+	 : DEFAULT_CONSTRAINT_LEN(CHAR,STR))
+#define REG_CLASS_FROM_CONSTRAINT(CHAR,STR) \
+	(enum reg_class) m32c_reg_class_from_constraint (CHAR, STR)
+
+#define REGNO_OK_FOR_BASE_P(NUM) m32c_regno_ok_for_base_p (NUM)
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS) m32c_preferred_reload_class (X, CLASS)
+#define PREFERRED_OUTPUT_RELOAD_CLASS(X,CLASS) m32c_preferred_output_reload_class (X, CLASS)
+#define LIMIT_RELOAD_CLASS(MODE,CLASS) \
+  (enum reg_class) m32c_limit_reload_class (MODE, CLASS)
+
+#define SECONDARY_RELOAD_CLASS(CLASS,MODE,X) \
+  (enum reg_class) m32c_secondary_reload_class (CLASS, MODE, X)
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+#define CLASS_MAX_NREGS(C,M) m32c_class_max_nregs (C, M)
+
+#define CANNOT_CHANGE_MODE_CLASS(F,T,C) m32c_cannot_change_mode_class(F,T,C)
+
+#define CONST_OK_FOR_CONSTRAINT_P(VALUE,C,STR) \
+	m32c_const_ok_for_constraint_p (VALUE, C, STR)
+#define CONST_DOUBLE_OK_FOR_CONSTRAINT_P(VALUE,C,STR) 0
+#define EXTRA_CONSTRAINT_STR(VALUE,C,STR) \
+	m32c_extra_constraint_p (VALUE, C, STR)
+#define EXTRA_MEMORY_CONSTRAINT(C,STR) \
+	m32c_extra_memory_constraint (C, STR)
+#define EXTRA_ADDRESS_CONSTRAINT(C,STR) \
+	m32c_extra_address_constraint (C, STR)
+
+/* STACK AND CALLING */
+
+/* Frame Layout */
+
+/* Standard push/pop stack, no surprises here.  */
+
+#define STACK_GROWS_DOWNWARD 1
+#define STACK_PUSH_CODE PRE_DEC
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET 0
+#define FIRST_PARM_OFFSET(F) 0
+
+#define RETURN_ADDR_RTX(COUNT,FA) m32c_return_addr_rtx (COUNT)
+
+#define INCOMING_RETURN_ADDR_RTX m32c_incoming_return_addr_rtx()
+#define INCOMING_FRAME_SP_OFFSET (TARGET_A24 ? 4 : 3)
+
+/* Exception Handling Support */
+
+#define EH_RETURN_DATA_REGNO(N) m32c_eh_return_data_regno (N)
+#define EH_RETURN_STACKADJ_RTX m32c_eh_return_stackadj_rtx ()
+
+/* Registers That Address the Stack Frame */
+
+#ifndef FP_REGNO
+#define FP_REGNO 7
+#endif
+#ifndef SP_REGNO
+#define SP_REGNO 8
+#endif
+#define AP_REGNO 11
+
+#define STACK_POINTER_REGNUM	SP_REGNO
+#define FRAME_POINTER_REGNUM	FP_REGNO
+#define ARG_POINTER_REGNUM	AP_REGNO
+
+/* The static chain must be pointer-capable.  */
+#define STATIC_CHAIN_REGNUM A0_REGNO
+
+#define DWARF_FRAME_REGISTERS 20
+#define DWARF_FRAME_REGNUM(N) m32c_dwarf_frame_regnum (N)
+#define DBX_REGISTER_NUMBER(N) m32c_dwarf_frame_regnum (N)
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+/* This is the same as the default in practice, except that by making
+   it explicit we tell binutils what size pointers to use.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+  (TARGET_A16 ? DW_EH_PE_udata2 : DW_EH_PE_udata4)
+
+/* Eliminating Frame Pointer and Arg Pointer */
+
+#define ELIMINABLE_REGS \
+  {{AP_REGNO, SP_REGNO}, \
+   {AP_REGNO, FB_REGNO}, \
+   {FB_REGNO, SP_REGNO}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM,TO,VAR) \
+	(VAR) = m32c_initial_elimination_offset(FROM,TO)
+
+/* Passing Function Arguments on the Stack */
+
+#define PUSH_ARGS 1
+#define PUSH_ROUNDING(N) m32c_push_rounding (N)
+#define CALL_POPS_ARGS(C) 0
+
+/* Passing Arguments in Registers */
+
+typedef struct m32c_cumulative_args
+{
+  /* For address of return value buffer (structures are returned by
+     passing the address of a buffer as an invisible first argument.
+     This identifies it).  If set, the current parameter will be put
+     on the stack, regardless of type.  */
+  int force_mem;
+  /* First parm is 1, parm 0 is hidden pointer for returning
+     aggregates.  */
+  int parm_num;
+} m32c_cumulative_args;
+
+#define CUMULATIVE_ARGS m32c_cumulative_args
+#define INIT_CUMULATIVE_ARGS(CA,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+	m32c_init_cumulative_args (&(CA),FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS)
+#define FUNCTION_ARG_REGNO_P(r) m32c_function_arg_regno_p (r)
+
+/* How Large Values Are Returned */
+
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* Function Entry and Exit */
+
+#define EXIT_IGNORE_STACK 0
+#define EPILOGUE_USES(REGNO) m32c_epilogue_uses(REGNO)
+#define EH_USES(REGNO) 0	/* FIXME */
+
+/* Generating Code for Profiling */
+
+#define FUNCTION_PROFILER(FILE,LABELNO)
+
+/* Implementing the Varargs Macros */
+
+/* Trampolines for Nested Functions */
+
+#define TRAMPOLINE_SIZE m32c_trampoline_size ()
+#define TRAMPOLINE_ALIGNMENT m32c_trampoline_alignment ()
+
+/* Addressing Modes */
+
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_INCREMENT 1
+#define MAX_REGS_PER_ADDRESS 1
+
+/* This is passed to the macros below, so that they can be implemented
+   in m32c.c.  */
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_V 1
+#else
+#define REG_OK_STRICT_V 0
+#endif
+
+#define REG_OK_FOR_BASE_P(X) m32c_reg_ok_for_base_p (X, REG_OK_STRICT_V)
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* #define FIND_BASE_TERM(X) when we do unspecs for symrefs */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \
+	if (m32c_legitimize_reload_address(&(X),MODE,OPNUM,TYPE,IND_LEVELS)) \
+	  goto WIN;
+
+#define LEGITIMATE_CONSTANT_P(X) m32c_legitimate_constant_p (X)
+
+/* Address spaces.  */
+#define ADDR_SPACE_FAR	1
+
+
+/* Condition Code Status */
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* Dividing the Output into Sections (Texts, Data, ...) */
+
+#define TEXT_SECTION_ASM_OP ".text"
+#define DATA_SECTION_ASM_OP ".data"
+#define BSS_SECTION_ASM_OP ".bss"
+
+#define CTOR_LIST_BEGIN
+#define CTOR_LIST_END
+#define DTOR_LIST_BEGIN
+#define DTOR_LIST_END
+#define CTORS_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array"
+#define DTORS_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array"
+#define INIT_ARRAY_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array"
+#define FINI_ARRAY_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array"
+
+/* The Overall Framework of an Assembler File */
+
+#define ASM_COMMENT_START ";"
+#define ASM_APP_ON ""
+#define ASM_APP_OFF ""
+
+/* Output and Generation of Labels */
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Output of Assembler Instructions */
+
+#define REGISTER_NAMES {	\
+  "r0", "r2", "r1", "r3", \
+  "a0", "a1", "sb", "fb", "sp", \
+  "pc", "flg", "argp", \
+  "mem0",  "mem2",  "mem4",  "mem6",  "mem8",  "mem10",  "mem12",  "mem14", \
+}
+
+#define ADDITIONAL_REGISTER_NAMES { \
+  {"r0l", 0}, \
+  {"r1l", 2}, \
+  {"r0r2", 0}, \
+  {"r1r3", 2}, \
+  {"a0a1", 4}, \
+  {"r0r2r1r3", 0} }
+
+#define PRINT_OPERAND(S,X,C) m32c_print_operand (S, X, C)
+#define PRINT_OPERAND_PUNCT_VALID_P(C) m32c_print_operand_punct_valid_p (C)
+#define PRINT_OPERAND_ADDRESS(S,X) m32c_print_operand_address (S, X)
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define ASM_OUTPUT_REG_PUSH(S,R) m32c_output_reg_push (S, R)
+#define ASM_OUTPUT_REG_POP(S,R) m32c_output_reg_pop (S, R)
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	m32c_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 1)
+
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	m32c_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+
+/* Output of Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(S,V) \
+	fprintf (S, "\t.word L%d\n", V)
+
+/* Assembler Commands for Exception Regions */
+
+#define DWARF_CIE_DATA_ALIGNMENT -1
+
+/* Assembler Commands for Alignment */
+
+#define ASM_OUTPUT_ALIGN(STREAM,POWER) \
+	fprintf (STREAM, "\t.p2align\t%d\n", POWER);
+
+/* Controlling Debugging Information Format */
+
+#define DWARF2_ADDR_SIZE	4
+
+/* Miscellaneous Parameters */
+
+#define HAS_LONG_COND_BRANCH false
+#define HAS_LONG_UNCOND_BRANCH true
+#define CASE_VECTOR_MODE SImode
+#define LOAD_EXTEND_OP(MEM) ZERO_EXTEND
+
+#define MOVE_MAX 4
+#define TRULY_NOOP_TRUNCATION(op,ip) 1
+
+#define STORE_FLAG_VALUE 1
+
+/* 16- or 24-bit pointers */
+#define Pmode (TARGET_A16 ? HImode : PSImode)
+#define FUNCTION_MODE QImode
+
+#define REGISTER_TARGET_PRAGMAS() m32c_register_pragmas()
+
+#endif
diff --git a/gcc/config/m32c/m32c.md b/gcc/config/m32c/m32c.md
new file mode 100644
index 000000000..739f24c17
--- /dev/null
+++ b/gcc/config/m32c/m32c.md
@@ -0,0 +1,80 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(R0_REGNO 0)
+   (R2_REGNO 1)
+   (R1_REGNO 2)
+   (R3_REGNO 3)
+
+   (A0_REGNO 4)
+   (A1_REGNO 5)
+   (SB_REGNO 6)
+   (FB_REGNO 7)
+
+   (SP_REGNO 8)
+   (PC_REGNO 9)
+   (FLG_REGNO 10)
+   (MEM0_REGNO 12)
+   (MEM7_REGNO 19)
+   ])
+
+(define_constants
+  [(UNS_PROLOGUE_END 1)
+   (UNS_EPILOGUE_START 2)
+   (UNS_EH_EPILOGUE 3)
+   (UNS_PUSHM 4)
+   (UNS_POPM 5)
+   (UNS_SMOVF 6)
+   (UNS_SSTR 7)
+   (UNS_SCMPU 8)
+   (UNS_SMOVU 9)
+   (UNS_FSETB 10)
+   (UNS_FREIT 11)
+   ])
+
+;; n = no change, x = clobbered.  The first 16 values are chosen such
+;; that the enum has one bit set for each flag.
+(define_attr "flags" "x,c,z,zc,s,sc,sz,szc,o,oc,oz,ozc,os,osc,osz,oszc,n" (const_string "n"))
+(define_asm_attributes [(set_attr "flags" "x")])
+
+(define_mode_iterator QHI [QI HI])
+(define_mode_iterator HPSI [(HI "TARGET_A16") (PSI "TARGET_A24")])
+(define_mode_iterator QHPSI [QI HI (PSI "TARGET_A24")])
+(define_mode_iterator QHSI [QI HI (SI "TARGET_A24")])
+(define_mode_attr bwl [(QI "b") (HI "w") (PSI "l") (SI "l")])
+
+(define_code_iterator eqne_cond [eq ne])
+
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "flags" "n")]
+)
+
+(define_insn "no_insn"
+  [(const_int 1)]
+  ""
+  ""
+  [(set_attr "flags" "n")]
+)
diff --git a/gcc/config/m32c/m32c.opt b/gcc/config/m32c/m32c.opt
new file mode 100644
index 000000000..d19153bbe
--- /dev/null
+++ b/gcc/config/m32c/m32c.opt
@@ -0,0 +1,44 @@
+; Target Options for R8C/M16C/M32C
+; Copyright (C) 2005 2007
+; Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it
+; under the terms of the GNU General Public License as published
+; by the Free Software Foundation; either version 3, or (at your
+; option) any later version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msim
+Target
+-msim	Use simulator runtime
+
+mcpu=r8c
+Target RejectNegative Var(target_cpu,'r') Init('r')
+-mcpu=r8c	Compile code for R8C variants
+
+mcpu=m16c
+Target RejectNegative Var(target_cpu,'6')
+-mcpu=m16c	Compile code for M16C variants
+
+mcpu=m32cm
+Target RejectNegative Var(target_cpu,'m')
+-mcpu=m32cm	Compile code for M32CM variants
+
+mcpu=m32c
+Target RejectNegative Var(target_cpu,'3')
+-mcpu=m32c	Compile code for M32C variants
+
+memregs=
+Target RejectNegative Joined Var(target_memregs_string)
+-memregs=	Number of memreg bytes (default: 16, range: 0..16)
diff --git a/gcc/config/m32c/minmax.md b/gcc/config/m32c/minmax.md
new file mode 100644
index 000000000..25be96151
--- /dev/null
+++ b/gcc/config/m32c/minmax.md
@@ -0,0 +1,58 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; min, max
+
+(define_insn "sminqi3"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,RhlSd,??Rmm,??Rmm,Raa,Raa")
+	(smin:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		 (match_operand:QI 2 "mrai_operand" "iRhlSdRaa,?Rmm,iRhlSdRaa,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A24"
+  "min.b\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "sminhi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm")
+	(smin:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "mrai_operand" "iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A24"
+  "min.w\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "smaxqi3"
+  [(set (match_operand:QI 0 "mra_operand" "=RhlSd,RhlSd,??Rmm,??Rmm,Raa,Raa")
+	(smax:QI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0")
+		 (match_operand:QI 2 "mrai_operand" "iRhlSdRaa,?Rmm,iRhlSdRaa,?Rmm,iRhlSd,?Rmm")))]
+  "TARGET_A24"
+  "max.b\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "smaxhi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm")
+	(smax:HI (match_operand:HI 1 "mra_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "mrai_operand" "iRhiSd,?Rmm,iRhiSd,?Rmm")))]
+  "TARGET_A24"
+  "max.w\t%2,%0"
+  [(set_attr "flags" "n")]
+  )
diff --git a/gcc/config/m32c/mov.md b/gcc/config/m32c/mov.md
new file mode 100644
index 000000000..429807875
--- /dev/null
+++ b/gcc/config/m32c/mov.md
@@ -0,0 +1,491 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; move, push, extend, etc.
+
+;; Be careful to never create an alternative that has memory as both
+;; src and dest, as that makes gcc think that mem-mem moves in general
+;; are supported.  While the chip does support this, it only has two
+;; address registers and sometimes gcc requires more than that.  One
+;; example is code like this: a = *b where both a and b are spilled to
+;; the stack.
+
+(define_insn "mov<mode>_far_op1"
+  [(set (match_operand:QHI 0 "register_operand" "=Rhi")
+	(mem:QHI (plus:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "Ra0"))
+			 (match_operand 2 "immediate_operand" "si"))))
+   ]
+  ""
+  "lde.<bwl>\t%D2[%1],%0"
+  [(set_attr "flags" "sz")]
+  )
+
+(define_insn "mov<mode>_far_op2"
+  [(set (mem:QHI (plus:SI (sign_extend:SI (match_operand:HI 0 "register_operand" "Ra0"))
+			 (match_operand 1 "immediate_operand" "si")))
+	(match_operand:QHI 2 "register_operand"
+			  "=Rhi"))
+   ]
+  ""
+  "ste.<bwl>\t%2,%D1[%0]"
+  [(set_attr "flags" "sz")]
+  )
+
+;; Match push/pop before mov.b for passing char as arg,
+;; e.g. stdlib/efgcvt.c.
+(define_insn "movqi_op"
+  [(set (match_operand:QI 0 "m32c_nonimmediate_operand"
+			  "=SF,Rhi*Rmm, Rqi*Rmm, <,          RqiSd*Rmm, SdSs,    Rqi*Rmm, Sd")
+	(match_operand:QI 1 "m32c_any_operand"
+			  "Rhi*Rmm,SF, iRqi*Rmm, iRqiSd*Rmm, >,         Rqi*Rmm, SdSs,    i"))]
+  "m32c_mov_ok (operands, QImode)"
+  "@
+    lde.b\t%1,%0
+    ste.b\t%1,%0
+    mov.b\t%1,%0
+    push.b\t%1
+    pop.b\t%0
+    mov.b\t%1,%0
+    mov.b\t%1,%0
+    mov.b\t%1,%0"
+  [(set_attr "flags" "sz,sz,sz,*,*,sz,sz,sz")]
+  )
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=RqiSd*Rmm")
+	(match_operand:QI 1 "general_operand" "iRqiSd*Rmm"))]
+  ""
+  "if (m32c_prepare_move (operands, QImode)) DONE;"
+  )
+
+
+(define_insn "movhi_op"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand"
+			  "=SF,Rhi*Rmm, Rhi*Rmm,     Sd, SdSs,   *Rcr, RhiSd*Rmm, <, RhiSd*Rmm, <, *Rcr")
+	(match_operand:HI 1 "m32c_any_operand"
+			  " Rhi*Rmm,SF, iRhi*RmmSdSs, i, Rhi*Rmm, RhiSd*Rmm, *Rcr, iRhiSd*Rmm, >, *Rcr, >"))]
+  "m32c_mov_ok (operands, HImode)"
+  "@
+   ste.w\t%1,%0
+   lde.w\t%1,%0
+   mov.w\t%1,%0
+   mov.w\t%1,%0
+   mov.w\t%1,%0
+   ldc\t%1,%0
+   stc\t%1,%0
+   push.w\t%1
+   pop.w\t%0
+   pushc\t%1
+   popc\t%0"
+  [(set_attr "flags" "sz,sz,sz,sz,sz,n,n,n,n,n,n")]
+  )
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand" "=RhiSd*Rmm")
+	(match_operand:HI 1 "m32c_any_operand" "iRhiSd*Rmm"))]
+  ""
+  "if (m32c_prepare_move (operands, HImode)) DONE;"
+  )
+
+
+(define_insn "movpsi_op"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand"
+			   "=Raa, SdRmmRpi,  Rcl,  RpiSd*Rmm, <,       <, Rcl, RpiRaa*Rmm")
+	(match_operand:PSI 1 "m32c_any_operand"
+			   "sIU3, iSdRmmRpi, iRpiSd*Rmm, Rcl, Rpi*Rmm, Rcl, >, >"))]
+  "TARGET_A24 && m32c_mov_ok (operands, PSImode)"
+  "@
+   mov.l:s\t%1,%0
+   mov.l\t%1,%0
+   ldc\t%1,%0
+   stc\t%1,%0
+   push.l\t%1
+   pushc\t%1
+   popc\t%0
+   #"
+  [(set_attr "flags" "sz,sz,n,n,n,n,n,*")]
+  )
+
+
+;; The intention here is to combine the add with the move to create an
+;; indexed move.  GCC doesn't always figure this out itself.
+
+(define_peephole2
+  [(set (match_operand:HPSI 0 "register_operand" "")
+	(plus:HPSI (match_operand:HPSI 1 "register_operand" "")
+		   (match_operand:HPSI 2 "immediate_operand" "")))
+   (set (match_operand:QHSI 3 "nonimmediate_operand" "")
+	(mem:QHSI (match_operand:HPSI 4 "register_operand" "")))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[0], operands[3])
+       || (dead_or_set_p (peep2_next_insn (1), operands[4])
+          && ! reg_mentioned_p (operands[0], operands[3])))"
+  [(set (match_dup 3)
+	(mem:QHSI (plus:HPSI (match_dup 1)
+			     (match_dup 2))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HPSI 0 "register_operand" "")
+	(plus:HPSI (match_operand:HPSI 1 "register_operand" "")
+		   (match_operand:HPSI 2 "immediate_operand" "")))
+   (set (mem:QHSI (match_operand:HPSI 4 "register_operand" ""))
+	(match_operand:QHSI 3 "m32c_any_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && dead_or_set_p (peep2_next_insn (1), operands[4])
+   && ! reg_mentioned_p (operands[0], operands[3])"
+  [(set (mem:QHSI (plus:HPSI (match_dup 1)
+			     (match_dup 2)))
+	(match_dup 3))]
+  "")
+
+; Peephole to generate SImode mov instructions for storing an
+; immediate double data to a memory location.
+(define_peephole2
+  [(set (match_operand:HI 0 "memory_operand" "")
+        (match_operand 1 "const_int_operand" ""))
+   (set (match_operand:HI 2 "memory_operand" "")
+        (match_operand 3 "const_int_operand" ""))]
+   "TARGET_A24 && m32c_immd_dbl_mov (operands, HImode)"
+   [(set (match_dup 4) (match_dup 5))]
+   ""
+)
+
+; Some PSI moves must be split.
+(define_split
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "")
+	(match_operand:PSI 1 "m32c_any_operand" ""))]
+  "reload_completed && m32c_split_psi_p (operands)"
+  [(set (match_dup 2)
+	(match_dup 3))
+   (set (match_dup 4)
+	(match_dup 5))]
+  "m32c_split_move (operands, PSImode, 3);"
+  )
+
+(define_expand "movpsi"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "")
+	(match_operand:PSI 1 "m32c_any_operand" ""))]
+  ""
+  "if (m32c_prepare_move (operands, PSImode)) DONE;"
+  )
+
+
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand" "=RsiSd*Rmm")
+	(match_operand:SI 1 "m32c_any_operand" "iRsiSd*Rmm"))]
+  ""
+  "if (m32c_split_move (operands, SImode, 0)) DONE;"
+  )
+
+; All SI moves are split if TARGET_A16
+(define_insn_and_split "movsi_splittable"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand" "=RsiRaa<*Rmm,  RsiRaaSd*Rmm,  Ss")
+	(match_operand:SI 1 "m32c_any_operand" "iRsiRaaSd*Rmm,  iRsiRaa>*Rmm,  RsiRaa*Rmm"))]
+  "TARGET_A16"
+  "#"
+  "TARGET_A16"
+  [(pc)]
+  "m32c_split_move (operands, SImode, 1); DONE;"
+  )
+
+; The movsi pattern doesn't always match because sometimes the modes
+; don't match.
+(define_insn "push_a01_l"
+  [(set (mem:SI (pre_dec:PSI (reg:PSI SP_REGNO)))
+	(match_operand 0 "a_operand" "Raa"))]
+  ""
+  "push.l\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "movsi_24"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand"  "=Rsi*Rmm,   Sd,       RsiSd*Rmm,     <")
+	(match_operand:SI 1 "m32c_any_operand" "iRsiSd*Rmm, iRsi*Rmm, >, iRsiRaaSd*Rmm"))]
+  "TARGET_A24"
+  "@
+   mov.l\t%1,%0
+   mov.l\t%1,%0
+   #
+   push.l\t%1"
+  [(set_attr "flags" "sz,sz,*,n")]
+  )
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "m32c_nonimmediate_operand" "=RdiSd*Rmm")
+	(match_operand:DI 1 "m32c_any_operand" "iRdiSd*Rmm"))]
+  ""
+  "if (m32c_split_move (operands, DImode, 0)) DONE;"
+  )
+
+(define_insn_and_split "movdi_splittable"
+  [(set (match_operand:DI 0 "m32c_nonimmediate_operand" "=Rdi<*Rmm,RdiSd*Rmm")
+	(match_operand:DI 1 "m32c_any_operand" "iRdiSd*Rmm,iRdi>*Rmm"))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  "m32c_split_move (operands, DImode, 1); DONE;"
+  )
+
+
+
+
+(define_insn "pushqi"
+  [(set (mem:QI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:QI 0 "mrai_operand" "iRqiSd*Rmm"))]
+  ""
+  "push.b\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_expand "pushhi"
+  [(set (mem:HI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:HI 0 "" ""))]
+  ""
+  "if (TARGET_A16)
+     gen_pushhi_16 (operands[0]);
+   else
+     gen_pushhi_24 (operands[0]);
+   DONE;"
+  )
+
+(define_insn "pushhi_16"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNO)))
+        (match_operand:HI 0 "mrai_operand" "iRhiSd*Rmm,Rcr"))]
+  "TARGET_A16"
+  "@
+   push.w\t%0
+   pushc\t%0"
+  [(set_attr "flags" "n,n")]
+  )
+
+(define_insn "pushhi_24"
+  [(set (mem:HI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:HI 0 "mrai_operand" "iRhiSd*Rmm"))]
+  "TARGET_A24"
+  "push.w\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+;(define_insn "pushpi"
+;  [(set (mem:PSI (pre_dec:PSI (reg:PSI SP_REGNO)))
+;        (match_operand:PI 0 "mrai_operand" "iRaa,Rcr"))]
+;  "TARGET_A24"
+;  "@
+;   push.l\t%0
+;   pushc\t%0"
+;  )
+
+(define_insn "pushsi"
+  [(set (mem:SI (pre_dec:PSI (reg:PSI SP_REGNO)))
+        (match_operand:SI 0 "mrai_operand" "iRsiSd*Rmm"))]
+  "TARGET_A24"
+  "push.l\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_expand "pophi"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm,Rcr")
+        (mem:HI (post_inc:HI (reg:HI SP_REGNO))))]
+  ""
+  "if (TARGET_A16)
+     gen_pophi_16 (operands[0]);
+   else
+     gen_pophi_24 (operands[0]);
+   DONE;"
+  )
+
+(define_insn "pophi_16"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm,Rcr")
+        (mem:HI (post_inc:HI (reg:HI SP_REGNO))))]
+  "TARGET_A16"
+  "@
+   pop.w\t%0
+   popc\t%0"
+  [(set_attr "flags" "n,n")]
+  )
+
+(define_insn "pophi_24"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm")
+        (mem:HI (post_inc:PSI (reg:PSI SP_REGNO))))]
+  "TARGET_A24"
+  "pop.w\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "poppsi"
+  [(set (match_operand:PSI 0 "cr_operand" "=Rcl")
+        (mem:PSI (post_inc:PSI (reg:PSI SP_REGNO))))]
+  "TARGET_A24"
+  "popc\t%0"
+  [(set_attr "flags" "n")]
+  )
+
+
+;; Rhl used here as an HI-mode Rxl
+(define_insn "extendqihi2"
+[(set (match_operand:HI 0 "m32c_nonimmediate_operand" "=RhlSd*Rmm")
+	(sign_extend:HI (match_operand:QI 1 "mra_operand" "0")))]
+  ""
+  "exts.b\t%1"
+  [(set_attr "flags" "sz")]
+  )
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=R03")
+	(sign_extend:SI (match_operand:HI 1 "r0123_operand" "0")))]
+  ""
+  "*
+   if (REGNO(operands[0]) == 0) return \"exts.w\t%1\";
+   else return \"mov.w r1,r3 | sha.w #-8,r3 | sha.w #-7,r3\";"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=R03")
+	(sign_extend:PSI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "*
+   if (REGNO(operands[0]) == 0) return \"exts.w\t%1\";
+   else return \"mov.w r1,r3 | sha.w #-8,r3 | sha.w #-7,r3\";"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "extendpsisi2"
+  [(set (match_operand:SI 0 "mr_operand" "=R03Sd*Rmm")
+	(sign_extend:SI (match_operand:PSI 1 "mr_operand" "0")))]
+  ""
+  "; expand psi %1 to si %0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "zero_extendpsisi2"
+  [(set (match_operand:SI 0 "mr_operand" "=R03Sd*Rmm")
+	(zero_extend:SI (match_operand:PSI 1 "mr_operand" "0")))]
+  ""
+  "; expand psi %1 to si %0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "zero_extendhipsi2"
+  [(set (match_operand:PSI 0 "register_operand" "=Raa")
+	(truncate:PSI (zero_extend:SI (match_operand:HI 1 "register_operand" "R03"))))]
+  ""
+  "mov.w\t%1,%0"
+  [(set_attr "flags" "sz")]
+  )
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "m32c_nonimmediate_operand" "=RsiSd")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")))]
+  ""
+  "mov.w\t#0,%H0"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "m32c_nonimmediate_operand" "=?Rhl,RhiSd*Rmm")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+  ""
+  "@
+   mov.b\t#0,%H0
+   and.w\t#255,%0"
+  [(set_attr "flags" "x,x")]
+  )
+
+(define_insn "truncsipsi2_16"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "=RsiRadSd*Rmm,Raa,Rcr,RsiSd*Rmm")
+	(truncate:PSI (match_operand:SI 1 "nonimmediate_operand" "0,RsiSd*Rmm,RsiSd*Rmm,Rcr")))]
+  "TARGET_A16"
+  "@
+   ; no-op trunc si %1 to psi %0
+   #
+   ldc\t%1,%0
+   stc\t%1,%0"
+  [(set_attr "flags" "n,*,n,n")]
+  )
+
+(define_insn "trunchiqi2"
+  [(set (match_operand:QI 0 "m32c_nonimmediate_operand" "=RqiRmmSd")
+	(truncate:QI (match_operand:HI 1 "mra_qi_operand" "0")))]
+  ""
+  "; no-op trunc hi %1 to qi %0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "truncsipsi2_24"
+  [(set (match_operand:PSI 0              "m32c_nonimmediate_operand" "=RsiSd*Rmm,Raa,!Rcl,RsiSd*Rmm")
+	(truncate:PSI (match_operand:SI 1 "m32c_nonimmediate_operand" "0,RsiSd*Rmm,RsiSd*Rmm,!Rcl")))]
+  "TARGET_A24"
+  "@
+   ; no-op trunc si %1 to psi %0
+   mov.l\t%1,%0
+   ldc\t%1,%0
+   stc\t%1,%0"
+  [(set_attr "flags" "n,sz,n,n")]
+  )
+
+(define_expand "truncsipsi2"
+  [(set (match_operand:PSI 0 "m32c_nonimmediate_operand" "=RsiRadSd*Rmm,Raa,Rcr,RsiSd*Rmm")
+	(truncate:PSI (match_operand:SI 1 "m32c_nonimmediate_operand" "0,RsiSd*Rmm,RsiSd*Rmm,Rcr")))]
+  ""
+  ""
+  )
+
+(define_expand "reload_inqi"
+  [(set (match_operand:QI 2 "" "=&Rqi")
+	(match_operand:QI 1 "" ""))
+   (set (match_operand:QI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
+
+(define_expand "reload_outqi"
+  [(set (match_operand:QI 2 "" "=&Rqi")
+	(match_operand:QI 1 "" ""))
+   (set (match_operand:QI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
+
+(define_expand "reload_inhi"
+  [(set (match_operand:HI 2 "" "=&Rhi")
+	(match_operand:HI 1 "" ""))
+   (set (match_operand:HI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
+
+(define_expand "reload_outhi"
+  [(set (match_operand:HI 2 "" "=&Rhi")
+	(match_operand:HI 1 "" ""))
+   (set (match_operand:HI 0 "" "")
+	(match_dup 2))
+   ]
+  ""
+  "")
diff --git a/gcc/config/m32c/muldiv.md b/gcc/config/m32c/muldiv.md
new file mode 100644
index 000000000..17b0b5d18
--- /dev/null
+++ b/gcc/config/m32c/muldiv.md
@@ -0,0 +1,288 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; multiply and divide
+
+; Here is the pattern for the const_int.
+(define_insn "mulqihi3_c"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "mra_operand" "%0,0"))
+                 (match_operand 2 "immediate_operand" "i,i")))]
+  ""
+  "mul.b\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+; Here is the pattern for registers and such.
+(define_insn "mulqihi3_r"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (sign_extend:HI (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm"))))]
+  ""
+  "mul.b\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+; Don't try to sign_extend a const_int.  Same for all other multiplies.
+(define_expand "mulqihi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_SIGN_EXTEND (HImode, operands[2]); }"
+)
+
+(define_insn "umulqihi3_c"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,??Rmm")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "mra_operand" "%0,0"))
+                 (match_operand 2 "immediate_operand" "i,i")))]
+  ""
+  "mulu.b\t%U2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "umulqihi3_r"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (zero_extend:HI (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm"))))]
+  ""
+  "mulu.b\t%U2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "umulqihi3"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd,RhiSd,??Rmm,??Rmm,Raa,Raa")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "mra_operand" "%0,0,0,0,0,0"))
+                 (match_operand:QI 2 "mra_operand" "RqiSd,?Rmm,RqiSd,?Rmm,RhlSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_ZERO_EXTEND (HImode, operands[2]); }"
+)
+
+(define_insn "mulhisi3_c"
+  [(set (match_operand:SI 0 "ra_operand" "=Rsi")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "mra_operand" "%0"))
+                 (match_operand:HI 2 "immediate_operand" "i")))]
+  ""
+  "mul.w\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "mulhisi3_r"
+  [(set (match_operand:SI 0 "mra_operand" "=Rsi,Rsi")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "mra_operand" "%0,0"))
+                 (sign_extend:SI (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm"))))]
+  ""
+  "mul.w\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (mult:SI (sign_extend:SI (match_operand:HI 1 "mra_operand" "%0,0,0,0"))
+                 (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm,RhiSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_SIGN_EXTEND (SImode, operands[2]); }"
+)
+
+(define_insn "umulhisi3_c"
+  [(set (match_operand:SI 0 "ra_operand" "=Rsi")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "mra_operand" "%0"))
+                 (match_operand 2 "m32c_const_u16_operand" "i")))]
+  ""
+  "mulu.w\t%u2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "umulhisi3_r"
+  [(set (match_operand:SI 0 "mra_operand" "=Rsi,Rsi")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "mra_operand" "%0,0"))
+                 (zero_extend:SI (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm"))))]
+  ""
+  "mulu.w\t%u2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "mra_operand" "=RsiSd,RsiSd,??Rmm,??Rmm")
+        (mult:SI (zero_extend:SI (match_operand:HI 1 "mra_operand" "%0,0,0,0"))
+                 (match_operand:HI 2 "mra_operand" "RhiSd,?Rmm,RhiSd,?Rmm")))]
+  ""
+  "{ if (GET_MODE (operands[2]) != VOIDmode)
+      operands[2] = gen_rtx_ZERO_EXTEND (SImode, operands[2]); }"
+)
+
+
+; GCC expects to be able to multiply pointer-sized integers too, but
+; fortunately it only multiplies by powers of two, although sometimes
+; they're negative.
+(define_insn "mulpsi3_op"
+  [(set (match_operand:PSI 0 "mra_operand" "=RsiSd")
+	(mult:PSI (match_operand:PSI 1 "mra_operand" "%0")
+		  (match_operand 2 "m32c_psi_scale" "Ilb")))]
+  "TARGET_A24"
+  "shl.l\t%b2,%0"
+  [(set_attr "flags" "szc")]
+  )
+
+(define_expand "mulpsi3"
+  [(set (match_operand:PSI 0 "mra_operand" "=RsiSd")
+	(mult:PSI (match_operand:PSI 1 "mra_operand" "%0")
+		  (match_operand 2 "m32c_psi_scale" "Ilb")))]
+  "TARGET_A24"
+  "if (GET_CODE (operands[2]) != CONST_INT
+       || ! m32c_psi_scale (operands[2], PSImode))
+     {
+       m32c_expand_neg_mulpsi3 (operands);
+       DONE;
+     }"
+  )
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "r0123_operand" "=R02,R02")
+        (mult:SI (match_operand:SI 1 "r0123_operand" "%0,0")
+                 (match_operand:SI 2 "mra_operand" "RsiSd,?Rmm")))]
+  "TARGET_M32C"
+  "mul.l\t%2,%1"
+  [(set_attr "flags" "o")]
+)
+
+(define_expand "divmodqi4"
+  [(set (match_dup 4)
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:QI 0 "register_operand" "=R0w,R0w")
+		   (div:QI (match_dup 4)
+			   (match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+	      (set (match_operand:QI 3 "register_operand" "=&R0h,&R0h")
+		   (mod:QI (match_dup 4) (match_dup 2)))
+	      ])]
+  "0"
+  "operands[4] = gen_reg_rtx (HImode);"
+  )
+
+(define_insn "divmodqi4_n"
+  [(set (match_operand:QI 0 "register_operand" "=R0l,R0l")
+	(div:QI (match_operand:HI 1 "register_operand" "R0w,R0w")
+		(match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+   (set (match_operand:QI 3 "register_operand" "=R0h,R0h")
+	(mod:QI (match_dup 1) (match_dup 2)))
+   ]
+  "0"
+  "div.b\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_expand "udivmodqi4"
+  [(set (match_dup 4)
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:QI 0 "register_operand" "=R0l,R0l")
+		   (udiv:QI (match_dup 4)
+			   (match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+	      (set (match_operand:QI 3 "register_operand" "=&R0h,&R0h")
+		   (umod:QI (match_dup 4) (match_dup 2)))
+	      ])]
+  "0"
+  "operands[4] = gen_reg_rtx (HImode);"
+  )
+
+(define_insn "udivmodqi4_n"
+  [(set (match_operand:QI 0 "register_operand" "=R0l,R0l")
+	(udiv:QI (match_operand:HI 1 "register_operand" "R0w,R0w")
+		(match_operand:QI 2 "general_operand" "iRqiSd,?Rmm")))
+   (set (match_operand:QI 3 "register_operand" "=R0h,R0h")
+	(umod:QI (match_dup 1) (match_dup 2)))
+   ]
+  "0"
+  "divu.b\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_expand "divmodhi4"
+  [(set (match_dup 4)
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=R0w,R0w")
+		   (div:HI (match_dup 4)
+			   (match_operand:HI 2 "general_operand" "iRhiSd,?Rmm")))
+	      (set (match_operand:HI 3 "register_operand" "=R2w,R2w")
+		   (mod:HI (match_dup 4) (match_dup 2)))
+	      ])]
+  ""
+  "operands[4] = gen_reg_rtx (SImode);"
+  )
+
+(define_insn "divmodhi4_n"
+  [(set (match_operand:HI 0 "m32c_r0_operand" "=R0w,R0w")
+	(div:HI (match_operand:SI 1 "m32c_r0_operand" "R02,R02")
+		(match_operand:HI 2 "m32c_notr2_operand" "iR1wR3wRaaSd,?Rmm")))
+   (set (match_operand:HI 3 "m32c_r2_operand" "=R2w,R2w")
+	(mod:HI (match_dup 1) (match_dup 2)))
+   ]
+  ""
+  "div.w\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_expand "udivmodhi4"
+  [(set (match_dup 4)
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "0,0")))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=R0w,R0w")
+		   (udiv:HI (match_dup 4)
+			   (match_operand:HI 2 "general_operand" "iRhiSd,?Rmm")))
+	      (set (match_operand:HI 3 "register_operand" "=R2w,R2w")
+		   (umod:HI (match_dup 4) (match_dup 2)))
+	      ])]
+  ""
+  "operands[4] = gen_reg_rtx (SImode);"
+  )
+
+(define_insn "udivmodhi4_n"
+  [(set (match_operand:HI 0 "m32c_r0_operand" "=R0w,R0w")
+	(udiv:HI (match_operand:SI 1 "m32c_r0_operand" "R02,R02")
+		(match_operand:HI 2 "m32c_notr2_operand" "iR1wR3wRaaSd,?Rmm")))
+   (set (match_operand:HI 3 "m32c_r2_operand" "=R2w,R2w")
+	(umod:HI (match_dup 1) (match_dup 2)))
+   ]
+  ""
+  "divu.w\t%2"
+  [(set_attr "flags" "o")]
+  )
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "r0123_operand" "=R02,R02")
+        (div:SI (match_operand:SI 1 "r0123_operand" "0,0")
+                (match_operand:SI 2 "mra_operand" "RsiSd,?Rmm")))]
+  "TARGET_M32C"
+  "div.l\t%2"
+  [(set_attr "flags" "o")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "r0123_operand" "=R02,R02")
+        (udiv:SI (match_operand:SI 1 "r0123_operand" "0,0")
+                 (match_operand:SI 2 "mra_operand" "RsiSd,?Rmm")))]
+  "TARGET_M32C"
+  "divu.l\t%2"
+  [(set_attr "flags" "o")]
+)
+
+
diff --git a/gcc/config/m32c/predicates.md b/gcc/config/m32c/predicates.md
new file mode 100644
index 000000000..533f3469c
--- /dev/null
+++ b/gcc/config/m32c/predicates.md
@@ -0,0 +1,299 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Predicates
+
+; TRUE for any valid operand.  We do this because general_operand
+; refuses to match volatile memory refs.
+
+(define_predicate "m32c_any_operand"
+  (ior (match_operand 0 "general_operand")
+       (match_code "mem,const_int,const_double"))
+  {
+    return ! m32c_illegal_subreg_p (op);
+  }
+)
+
+; Likewise for nonimmediate_operand.
+
+(define_predicate "m32c_nonimmediate_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_code "mem"))
+  {
+    return ! m32c_illegal_subreg_p (op);
+  }
+)
+
+; TRUE if the operand is a pseudo-register.
+(define_predicate "m32c_pseudo"
+  (ior (and (match_code "reg")
+	    (match_test "REGNO(op) >= FIRST_PSEUDO_REGISTER"))
+       (and (match_code "subreg")
+	    (and (match_test "GET_CODE (XEXP (op, 0)) == REG")
+		 (match_test "REGNO(XEXP (op,0)) >= FIRST_PSEUDO_REGISTER")))))
+       
+
+; Returning true causes many predicates to NOT match.  We allow
+; subregs for type changing, but not for size changing.
+(define_predicate "m32c_wide_subreg"
+  (and (match_code "subreg")
+       (not (match_operand 0 "m32c_pseudo")))
+  {
+    unsigned int sizeo = GET_MODE_SIZE (GET_MODE (op));
+    unsigned int sizei = GET_MODE_SIZE (GET_MODE (XEXP (op, 0)));
+    sizeo = (sizeo + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+    sizei = (sizei + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+    return sizeo != sizei;
+  })
+
+; TRUE for r0 through r3, or a pseudo that reload could put in r0
+; through r3 (likewise for the next couple too)
+(define_predicate "r0123_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) <= R3_REGNO"))))
+
+; TRUE for r0
+(define_predicate "m32c_r0_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R0_REGNO"))))
+
+; TRUE for r1
+(define_predicate "m32c_r1_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R1_REGNO"))))
+
+; TRUE for HL_CLASS (r0 or r1)
+(define_predicate "m32c_hl_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R0_REGNO || REGNO(op) == R1_REGNO"))))
+
+
+; TRUE for r2
+(define_predicate "m32c_r2_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R2_REGNO"))))
+
+; TRUE for r3
+(define_predicate "m32c_r3_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == R3_REGNO"))))
+
+; TRUE for any general operand except r2.
+(define_predicate "m32c_notr2_operand"
+  (and (match_operand 0 "general_operand")
+       (ior (not (match_code "reg"))
+	    (match_test "REGNO(op) != R2_REGNO"))))
+
+; TRUE for the stack pointer.
+(define_predicate "m32c_sp_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == SP_REGNO"))))
+
+; TRUE for control registers.
+(define_predicate "cr_operand"
+  (match_code "reg")
+  "return (REGNO (op) >= SB_REGNO
+           && REGNO (op) <= FLG_REGNO);")
+
+; TRUE for $a0 or $a1.
+(define_predicate "a_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == A0_REGNO || REGNO (op) == A1_REGNO")))
+
+; TRUE for $a0 or $a1 or a pseudo
+(define_predicate "ap_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == A0_REGNO || REGNO (op) == A1_REGNO"))))
+
+; TRUE for r0 through r3, or a0 or a1.
+(define_predicate "ra_operand"
+  (and (and (match_operand 0 "register_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+; Likewise, plus TRUE for memory references.
+(define_predicate "mra_operand"
+  (and (and (match_operand 0 "m32c_nonimmediate_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+; Likewise, plus TRUE for subregs.
+(define_predicate "mras_operand"
+  (and (match_operand 0 "nonimmediate_operand" "")
+       (not (match_operand 1 "cr_operand" ""))))
+
+; As above, but no push/pop operations
+(define_predicate "mra_nopp_operand"
+  (match_operand 0 "mra_operand" "")
+{
+  if (GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == PRE_DEC
+	  || (GET_CODE (XEXP (op, 0)) == POST_INC)))
+    return 0;
+  return 1;
+})
+
+; TRUE for memory, r0..r3, a0..a1, or immediates.
+(define_predicate "mrai_operand"
+  (and (and (match_operand 0 "m32c_any_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+; Likewise, plus true for subregs.
+(define_predicate "mrasi_operand"
+  (and (match_operand 0 "general_operand" "")
+       (not (match_operand 1 "cr_operand" ""))))
+
+; TRUE for r0..r3 or memory.
+(define_predicate "mr_operand"
+  (and (match_operand 0 "mra_operand" "")
+       (not (match_operand 1 "a_operand" ""))))
+
+; TRUE for a0..a1 or memory.
+(define_predicate "ma_operand"
+  (ior (match_operand 0 "a_operand" "")
+       (match_operand 1 "memory_operand" "")))
+
+; TRUE for memory operands that are not indexed
+(define_predicate "memsym_operand"
+  (and (match_operand 0 "memory_operand" "")
+       (match_test "m32c_extra_constraint_p (op, 'S', \"Si\")")))
+
+; TRUE for memory operands with small integer addresses
+(define_predicate "memimmed_operand"
+  (and (match_operand 0 "memory_operand" "")
+       (match_test "m32c_extra_constraint_p (op, 'S', \"Sp\")")))
+
+; TRUE for r1h.  This is complicated since r1h isn't a register GCC
+; normally knows about.
+(define_predicate "r1h_operand"
+  (match_code "zero_extract")
+  {
+    rtx reg = XEXP (op, 0);
+    rtx size = XEXP (op, 1);
+    rtx pos = XEXP (op, 2);
+    return (GET_CODE (reg) == REG
+	    && REGNO (reg) == R1_REGNO
+	    && GET_CODE (size) == CONST_INT
+	    && INTVAL (size) == 8
+	    && GET_CODE (pos) == CONST_INT
+	    && INTVAL (pos) == 8);
+  })
+
+; TRUE if we can shift by this amount.  Constant shift counts have a
+; limited range.
+(define_predicate "shiftcount_operand"
+  (ior (match_operand 0 "mra_operand" "")
+       (and (match_operand 2 "const_int_operand" "")
+	    (match_test "-8 <= INTVAL (op) && INTVAL (op) && INTVAL (op) <= 8"))))
+(define_predicate "longshiftcount_operand"
+  (ior (match_operand 0 "mra_operand" "")
+       (and (match_operand 2 "const_int_operand" "")
+	    (match_test "-32 <= INTVAL (op) && INTVAL (op) && INTVAL (op) <= 32"))))
+
+; TRUE for r0..r3, a0..a1, or sp.
+(define_predicate "mra_or_sp_operand"
+  (and (ior (match_operand 0 "mra_operand")
+	    (match_operand 1 "m32c_sp_operand"))
+       (not (match_operand 2 "m32c_wide_subreg" ""))))
+
+
+; TRUE for r2 or r3.
+(define_predicate "m32c_r2r3_operand"
+  (ior (and (match_code "reg")
+	    (ior (match_test "REGNO(op) == R2_REGNO")
+		 (match_test "REGNO(op) == R3_REGNO")))
+       (and (match_code "subreg")
+	    (match_test "GET_CODE (XEXP (op, 0)) == REG && (REGNO (XEXP (op, 0)) == R2_REGNO || REGNO (XEXP (op, 0)) == R3_REGNO)"))))
+
+; Likewise, plus TRUE for a0..a1.
+(define_predicate "m32c_r2r3a_operand"
+  (ior (match_operand 0 "m32c_r2r3_operand" "")
+       (match_operand 0 "a_operand" "")))
+
+; These two are only for movqi - no subreg limit
+(define_predicate "mra_qi_operand"
+  (and (and (match_operand 0 "m32c_nonimmediate_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 1 "m32c_r2r3a_operand" ""))))
+
+(define_predicate "mrai_qi_operand"
+  (and (and (match_operand 0 "m32c_any_operand" "")
+	    (not (match_operand 1 "cr_operand" "")))
+       (not (match_operand 1 "m32c_r2r3a_operand" ""))))
+
+(define_predicate "a_qi_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (match_operand 1 "a_operand" "")))
+
+; TRUE for comparisons we support.
+(define_predicate "m32c_cmp_operator"
+  (match_code "eq,ne,gt,gtu,lt,ltu,ge,geu,le,leu"))
+
+(define_predicate "m32c_eqne_operator"
+  (match_code "eq,ne"))
+
+; TRUE for mem0
+(define_predicate "m32c_mem0_operand"
+  (ior (match_operand 0 "m32c_pseudo" "")
+       (and (match_code "reg")
+	    (match_test "REGNO(op) == MEM0_REGNO"))))
+
+; TRUE for things the call patterns can return.
+(define_predicate "m32c_return_operand"
+  (ior (match_operand 0 "m32c_r0_operand")
+       (ior (match_operand 0 "m32c_mem0_operand")
+	    (match_code "parallel"))))
+
+; TRUE for constants we can multiply pointers by
+(define_predicate "m32c_psi_scale"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "m32c_const_ok_for_constraint_p(INTVAL(op), 'I', \"Ilb\")")))
+
+; TRUE for one bit set (bit) or clear (mask) out of N bits.
+
+(define_predicate "m32c_1bit8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "m32c_const_ok_for_constraint_p(INTVAL(op), 'I', \"Ilb\")")))
+
+(define_predicate "m32c_1bit16_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "m32c_const_ok_for_constraint_p(INTVAL(op), 'I', \"Ilw\")")))
+
+(define_predicate "m32c_1mask8_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "m32c_const_ok_for_constraint_p(INTVAL(op), 'I', \"ImB\")")))
+
+(define_predicate "m32c_1mask16_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "m32c_const_ok_for_constraint_p(INTVAL(op), 'I', \"Imw\")")))
+
+(define_predicate "m32c_const_u16_operand"
+  (and (match_operand 0 "const_int_operand")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 65535")))
diff --git a/gcc/config/m32c/prologue.md b/gcc/config/m32c/prologue.md
new file mode 100644
index 000000000..175b2b0ab
--- /dev/null
+++ b/gcc/config/m32c/prologue.md
@@ -0,0 +1,199 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007, 2008
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Prologue and epilogue patterns
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "m32c_emit_prologue(); DONE;"
+  )
+
+; For the next two, operands[0] is the amount of stack space we want
+; to reserve.
+
+; We assume dwarf2out will process each set in sequence.
+(define_insn "prologue_enter_16"
+  [(set (mem:HI (plus:HI (reg:HI SP_REGNO) (const_int -2)))
+	(reg:HI FB_REGNO))
+   (set (reg:HI FB_REGNO)
+	(plus:HI (reg:HI SP_REGNO) (const_int -2)))
+   (set (reg:HI SP_REGNO)
+	(minus:HI (reg:HI SP_REGNO)
+	           (match_operand 0 "const_int_operand" "i")))
+   ]
+  "TARGET_A16"
+  {
+    /* This is due to binutils bug gas/4659.  */
+    if (INTVAL (operands[0]) == 2)
+      return "enter\t#0";
+    return "enter\t%0-2";
+  }
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "prologue_enter_24"
+  [(set (mem:SI (plus:PSI (reg:PSI SP_REGNO) (const_int -4)))
+	(reg:SI FB_REGNO))
+   (set (reg:PSI FB_REGNO)
+	(plus:PSI (reg:PSI SP_REGNO) (const_int -4)))
+   (set (reg:PSI SP_REGNO)
+	(minus:PSI (reg:PSI SP_REGNO)
+	           (match_operand 0 "const_int_operand" "i")))
+   ]
+  "TARGET_A24"
+  {
+    /* This is due to binutils bug gas/4659.  */
+    if (INTVAL (operands[0]) == 4)
+      return "enter\t#0";
+    return "enter\t%0-4";
+  }
+  [(set_attr "flags" "x")]
+  )
+
+; Just a comment, for debugging the assembler output.
+(define_insn "prologue_end"
+  [(unspec_volatile [(const_int 0)] UNS_PROLOGUE_END)]
+  ""
+  "; end of prologue"
+  [(set_attr "flags" "n")]
+  )
+
+
+
+(define_expand "epilogue"
+  [(const_int 1)]
+  ""
+  "m32c_emit_epilogue(); DONE;"
+  )
+
+(define_expand "eh_return"
+  [(match_operand:PSI 0 "" "")]
+  ""
+  "m32c_emit_eh_epilogue(operands[0]); DONE;"
+  )
+
+(define_insn "eh_epilogue"
+  [(set (pc)
+	(unspec_volatile [(match_operand 0 "m32c_r1_operand" "")
+			  (match_operand 1 "m32c_r0_operand" "")
+			  ] UNS_EH_EPILOGUE))]
+  ""
+  "jmp.a\t__m32c_eh_return"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_exitd_16"
+  [(set (reg:HI SP_REGNO)
+	(plus:HI (reg:HI FB_REGNO)
+	      (const_int 2)))
+   (set (reg:HI FB_REGNO)
+	(mem:HI (reg:HI FB_REGNO)))
+   (return)
+   ]
+  "TARGET_A16"
+  "exitd"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_reit_16"
+  [(set (reg:HI SP_REGNO)
+	(plus:HI (reg:HI SP_REGNO)
+	      (const_int 4)))
+   (return)
+   ]
+  "TARGET_A16"
+  "reit"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_exitd_24"
+  [(set (reg:PSI SP_REGNO)
+	(plus:PSI (reg:PSI FB_REGNO)
+	      (const_int 4)))
+   (set (reg:PSI FB_REGNO)
+	(mem:PSI (reg:PSI FB_REGNO)))
+   (return)
+   ]
+  "TARGET_A24"
+  "exitd"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_reit_24"
+  [(set (reg:PSI SP_REGNO)
+	(plus:PSI (reg:PSI SP_REGNO)
+	      (const_int 6)))
+   (return)
+   ]
+  "TARGET_A24"
+  "reit"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_freit"
+  [(unspec [(const_int 0)] UNS_FREIT)
+   (return)
+   ]
+  ""
+  "freit"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_rts"
+  [(return)
+   ]
+  ""
+  "rts"
+  [(set_attr "flags" "x")]
+  )
+
+(define_insn "epilogue_start"
+  [(unspec_volatile [(const_int 0)] UNS_EPILOGUE_START)]
+  ""
+  "; start of epilogue"
+  [(set_attr "flags" "n")]
+  )
+
+
+; These are used by the prologue/epilogue code.
+
+(define_insn "pushm"
+  [(unspec [(match_operand 0 "const_int_operand" "i")] UNS_PUSHM)]
+  ""
+  "pushm\t%p0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "popm"
+  [(unspec [(match_operand 0 "const_int_operand" "i")] UNS_POPM)]
+  ""
+  "popm\t%p0"
+  [(set_attr "flags" "n")]
+  )
+
+(define_insn "fset_b"
+  [(unspec [(const_int 0)] UNS_FSETB)]
+  ""
+  "fset\tB"
+  [(set_attr "flags" "n")]
+  )
+
diff --git a/gcc/config/m32c/rtems.h b/gcc/config/m32c/rtems.h
new file mode 100644
index 000000000..1ef2aada6
--- /dev/null
+++ b/gcc/config/m32c/rtems.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a M32C using ELF.
+   Copyright (C) 2008, Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc/config/m32c/shift.md b/gcc/config/m32c/shift.md
new file mode 100644
index 000000000..b4c6bafa3
--- /dev/null
+++ b/gcc/config/m32c/shift.md
@@ -0,0 +1,352 @@
+;; Machine Descriptions for R8C/M16C/M32C
+;; Copyright (C) 2005, 2007
+;; Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; bit shifting
+
+; Shifts are unusual for m32c.  We only support shifting in one
+; "direction" but the shift count is signed.  Also, immediate shift
+; counts have a limited range, and variable shift counts have to be in
+; $r1h which GCC normally doesn't even know about.
+
+; Other than compensating for the above, the patterns below are pretty
+; straightforward.
+
+(define_insn "ashlqi3_i"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd*Rmm,RqiSd*Rmm")
+	(ashift:QI (match_operand:QI 1 "mra_operand" "0,0")
+		   (match_operand:QI 2 "mrai_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.b\t%2,%0
+   mov.b\t%2,r1h\n\tsha.b\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrqi3_i"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd*Rmm,RqiSd*Rmm")
+	(ashiftrt:QI (match_operand:QI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.b\t%2,%0
+   mov.b\t%2,r1h\n\tsha.b\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrqi3_i"
+  [(set (match_operand:QI 0 "mra_operand" "=RqiSd*Rmm,RqiSd*Rmm")
+	(lshiftrt:QI (match_operand:QI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   shl.b\t%2,%0
+   mov.b\t%2,r1h\n\tshl.b\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+(define_expand "ashlqi3"
+  [(parallel [(set (match_operand:QI 0 "mra_operand" "")
+	(ashift:QI (match_operand:QI 1 "mra_operand" "")
+		   (match_operand:QI 2 "general_operand" "")))
+   (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrqi3"
+  [(parallel [(set (match_operand:QI 0 "mra_operand" "")
+	(ashiftrt:QI (match_operand:QI 1 "mra_operand" "")
+		     (neg:QI (match_operand:QI 2 "general_operand" ""))))
+   (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrqi3"
+  [(parallel [(set (match_operand:QI 0 "mra_operand" "")
+		   (lshiftrt:QI (match_operand:QI 1 "mra_operand" "")
+				(neg:QI (match_operand:QI 2 "general_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+(define_insn "ashlhi3_i"
+  [(set (match_operand:HI 0 "mra_operand" "=SdRhi*Rmm,SdRhi*Rmm")
+	(ashift:HI (match_operand:HI 1 "mra_operand" "0,0")
+		   (match_operand:QI 2 "mrai_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.w\t%2,%0
+   mov.b\t%2,r1h\n\tsha.w\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrhi3_i"
+  [(set (match_operand:HI 0 "mra_operand" "=SdRhi*Rmm,SdRhi*Rmm")
+	(ashiftrt:HI (match_operand:HI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   sha.w\t%2,%0
+   mov.b\t%2,r1h\n\tsha.w\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrhi3_i"
+  [(set (match_operand:HI 0 "mra_operand" "=RhiSd*Rmm,RhiSd*Rmm")
+	(lshiftrt:HI (match_operand:HI 1 "mra_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "mrai_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  ""
+  "@
+   shl.w\t%2,%0
+   mov.b\t%2,r1h\n\tshl.w\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+(define_expand "ashlhi3"
+  [(parallel [(set (match_operand:HI 0 "mra_operand" "")
+		   (ashift:HI (match_operand:HI 1 "mra_operand" "")
+			      (match_operand:QI 2 "general_operand" "")))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrhi3"
+  [(parallel [(set (match_operand:HI 0 "mra_operand" "")
+		   (ashiftrt:HI (match_operand:HI 1 "mra_operand" "")
+				(neg:QI (match_operand:QI 2 "general_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrhi3"
+  [(parallel [(set (match_operand:HI 0 "mra_operand" "")
+		   (lshiftrt:HI (match_operand:HI 1 "mra_operand" "")
+				(neg:QI (match_operand:QI 2 "general_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
+
+
+
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+
+(define_insn "ashlpsi3_i"
+  [(set (match_operand:PSI 0 "mra_operand" "=R02RaaSd*Rmm,R02RaaSd*Rmm")
+	(ashift:PSI (match_operand:PSI 1 "mra_operand" "0,0")
+		    (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrpsi3_i"
+  [(set (match_operand:PSI 0 "mra_operand" "=R02RaaSd*Rmm,R02RaaSd*Rmm")
+	(ashiftrt:PSI (match_operand:PSI 1 "mra_operand" "0,0")
+		      (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrpsi3_i"
+  [(set (match_operand:PSI 0 "mra_operand" "=R02RaaSd,??Rmm")
+	(lshiftrt:PSI (match_operand:PSI 1 "mra_operand" "0,0")
+		      (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   shl.l\t%2,%0
+   mov.b\t%2,r1h\n\tshl.l\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+(define_expand "ashlpsi3"
+  [(parallel [(set (match_operand:PSI 0 "mra_operand" "")
+		   (ashift:PSI (match_operand:PSI 1 "mra_operand" "")
+			       (match_operand:QI 2 "shiftcount_operand" "")))
+	      (clobber (match_scratch:HI 3 ""))])]
+  "TARGET_A24"
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrpsi3"
+  [(parallel [(set (match_operand:PSI 0 "mra_operand" "")
+		   (ashiftrt:PSI (match_operand:PSI 1 "mra_operand" "")
+				 (neg:QI (match_operand:QI 2 "shiftcount_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  "TARGET_A24"
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrpsi3"
+  [(parallel [(set (match_operand:PSI 0 "mra_operand" "")
+		   (lshiftrt:PSI (match_operand:PSI 1 "mra_operand" "")
+				 (neg:QI (match_operand:QI 2 "shiftcount_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  "TARGET_A24"
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
+
+; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+; The m16c has a maximum shift count of -16..16, even when in a
+; register.  It's optimal to use multiple shifts of -8..8 rather than
+; loading larger constants into R1H multiple time.  The m32c can shift
+; -32..32 either via immediates or in registers.  Hence, separate
+; patterns.
+
+
+(define_insn "ashlsi3_16"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashift:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		   (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A16"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "ashrsi3_16"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A16"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  [(set_attr "flags" "oszc,oszc")]
+  )
+
+(define_insn "lshrsi3_16"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(lshiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "shiftcount_operand" "In4,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A16"
+  "@
+   shl.l\t%2,%0
+   mov.b\t%2,r1h\n\tshl.l\tr1h,%0"
+  [(set_attr "flags" "szc,szc")]
+  )
+
+
+
+(define_insn "ashlsi3_24"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashift:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		   (match_operand:QI 2 "longshiftcount_operand" "In6,RqiSd")))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  )
+
+(define_insn "ashrsi3_24"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(ashiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "longshiftcount_operand" "In6,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   sha.l\t%2,%0
+   mov.b\t%2,r1h\n\tsha.l\tr1h,%0"
+  )
+
+(define_insn "lshrsi3_24"
+  [(set (match_operand:SI 0 "r0123_operand" "=R03,R03")
+	(lshiftrt:SI (match_operand:SI 1 "r0123_operand" "0,0")
+		     (neg:QI (match_operand:QI 2 "longshiftcount_operand" "In6,RqiSd"))))
+   (clobber (match_scratch:HI 3 "=X,R1w"))]
+  "TARGET_A24"
+  "@
+   shl.l\t%2,%0
+   mov.b\t%2,r1h\n\tshl.l\tr1h,%0"
+  )
+
+
+
+
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI 0 "r0123_operand" "")
+		   (ashift:SI (match_operand:SI 1 "r0123_operand" "")
+			      (match_operand:QI 2 "mrai_operand" "")))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, 1, ASHIFT))
+     DONE;"
+  )
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "r0123_operand" "")
+		   (ashiftrt:SI (match_operand:SI 1 "r0123_operand" "")
+				(neg:QI (match_operand:QI 2 "mrai_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, ASHIFTRT))
+     DONE;"
+  )
+
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI 0 "r0123_operand" "")
+		   (lshiftrt:SI (match_operand:SI 1 "r0123_operand" "")
+				(neg:QI (match_operand:QI 2 "mrai_operand" ""))))
+	      (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "if (m32c_prepare_shift (operands, -1, LSHIFTRT))
+     DONE;"
+  )
diff --git a/gcc/config/m32c/t-m32c b/gcc/config/m32c/t-m32c
new file mode 100644
index 000000000..30b8f6f0b
--- /dev/null
+++ b/gcc/config/m32c/t-m32c
@@ -0,0 +1,69 @@
+# Target Makefile Fragment for R8C/M16C/M32C
+# Copyright (C) 2005, 2006, 2007, 2008, 2009
+# Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = m32c/m32c-lib1.S
+
+LIB1ASMFUNCS = \
+	__m32c_memregs \
+	__m32c_eh_return \
+	__m32c_mulsi3 \
+	__m32c_cmpsi2 \
+	__m32c_ucmpsi2 \
+	__m32c_jsri16
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/m32c/m32c-lib2.c $(srcdir)/config/m32c/m32c-lib2-trapv.c
+
+# floating point emulation libraries
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT'				> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c			>> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+# target-specific files
+
+md_file = md
+
+MD_FILES = m32c predicates addsub bitops blkmov cond jump minmax mov muldiv prologue shift
+
+# Doing it this way lets the gen* programs report the right line numbers.
+
+md : $(MD_FILES:%=$(srcdir)/config/m32c/%.md) $(srcdir)/config/m32c/t-m32c
+	for md in $(MD_FILES); do \
+	  echo "(include \"$(srcdir)/config/m32c/$$md.md\")"; \
+	done > md
+
+m32c-pragma.o: $(srcdir)/config/m32c/m32c-pragma.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+# We support four CPU series, but R8C and M16C share one multilib, and
+# M32C and M32CM share another.
+
+MULTILIB_OPTIONS = mcpu=m32cm
+MULTILIB_DIRNAMES = m32cm
+MULTILIB_MATCHES = mcpu?m32cm=mcpu?m32c mcpu?r8c=mcpu?m16c
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
diff --git a/gcc/config/m32r/constraints.md b/gcc/config/m32r/constraints.md
new file mode 100644
index 000000000..350d48407
--- /dev/null
+++ b/gcc/config/m32r/constraints.md
@@ -0,0 +1,147 @@
+;; Constraint definitions for Renesas M32R cpu for GNU C compiler
+;; Copyright (C) 2007, 2011 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; The letters I, J, K, L, M, N, O, P in a register constraint string
+;; can be used to stand for particular ranges of immediate operands.
+;; The letters Q, R, S, T, U are used to segregate specific types of
+;; operands, usually memory references, for the target machine.
+;;
+;; I is used for 8-bit signed immediates.
+;; J is used for 16-bit signed immediates.
+;; K is used for 16-bit unsigned immediates.
+;; L is used for 16-bit immediates left shifted by 16 (sign ???).
+;; M is used for 24-bit unsigned immediates.
+;; N is used for 8-bit signed immediates for compares
+;;   (values in the range -127 to +128).
+;; O is used for 5-bit unsigned immediates (shift count).
+;; P is used for 16-bit signed immediates for compares
+;;     (values in the range -32767 to +32768).
+;;
+;; Q is for symbolic addresses loadable with ld24.
+;; R is for symbolic addresses when ld24 can't be used.
+;; S is for stores with pre {inc,dec}rement
+;; T is for indirect of a pointer.
+;; U is for loads with post increment.
+;; W is used for an immediate value of 0.
+;;
+;; Register constraints
+
+(define_register_constraint "a" "ACCUM_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CARRY_REG"
+  "@internal")
+
+;; Integer constraints
+(define_constraint "I"
+  "8-bit signed immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x80 && ival <= 0x7f")))
+
+(define_constraint "J"
+  "16-bit signed immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x8000 && ival <= 0x7fff")))
+
+(define_constraint "K"
+  "16-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 0x0000ffff")))
+
+(define_constraint "L"
+  "16-bit signed immediate left shifted by 16."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0")
+       (match_test "(ival >> 16) >= -0x8000 && (ival >> 16) <= 0x7fff")))
+
+(define_constraint "M"
+  "24-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 0x00ffffff")))
+
+(define_constraint "N"
+  "8-bit signed immediate for compare."
+  (and (match_code "const_int")
+       (match_test "ival >= -127 && ival <= 128")))
+
+(define_constraint "O"
+  "5-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 32")))
+
+(define_constraint "P"
+  "16-bit signed immediate for compare."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x7fff && ival <= 0x8000")))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Double constant loadable with 2 ldi insns."
+  (and (match_code "const_double")
+       (match_test "easy_di_const (op)")))
+
+(define_constraint "H"
+  "Double constant loadable with movdf."
+  (and (match_code "const_double")
+       (match_test "easy_df_const (op)")))
+
+;; Extra constraints
+(define_constraint "Q"
+  "A symbolic address loadable when ld24."
+  (ior (and (match_test "TARGET_ADDR24")
+	    (match_test "GET_CODE (op) == LABEL_REF"))
+       (match_test "addr24_operand (op, VOIDmode)")))
+
+(define_constraint "R"
+  "A symbolic address loadable with ld24 can't be used."
+  (ior (and (match_test "TARGET_ADDR32")
+	    (match_test "GET_CODE (op) == LABEL_REF"))
+       (match_test "addr32_operand (op, VOIDmode)")))
+
+(define_constraint "S"
+  "A store with pre {inc,dec}rement."
+  (and (match_code "mem")
+       (match_test "mode == SImode || mode == SFmode")
+       (match_code "pre_inc,pre_dec" "0")
+       (match_code "reg" "00")
+       (match_test "GPR_P (REGNO (XEXP (XEXP (op, 0), 0)))
+		    || REGNO (XEXP (XEXP (op, 0), 0)) == ARG_POINTER_REGNUM
+		    || ! HARD_REGISTER_P (XEXP (XEXP (op, 0), 0))")))
+
+(define_constraint "T"
+  "An indirect of a pointer."
+  (and (match_code "mem")
+       (match_test "memreg_operand (op, GET_MODE (op))")))
+
+(define_constraint "U"
+  "A load with post increment."
+  (and (match_code "mem")
+       (match_test "mode == SImode || mode == SFmode")
+       (match_code "post_inc" "0")
+       (match_code "reg" "00")
+       (match_test "GPR_P (REGNO (XEXP (XEXP (op, 0), 0)))
+		    || REGNO (XEXP (XEXP (op, 0), 0)) == ARG_POINTER_REGNUM
+		    || ! HARD_REGISTER_P (XEXP (XEXP (op, 0), 0))")))
+
+(define_constraint "W"
+  "zero immediate."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
diff --git a/gcc/config/m32r/initfini.c b/gcc/config/m32r/initfini.c
new file mode 100644
index 000000000..6e7d58614
--- /dev/null
+++ b/gcc/config/m32r/initfini.c
@@ -0,0 +1,168 @@
+/* .init/.fini section handling + C++ global constructor/destructor handling.
+   This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
+
+   Copyright (C) 1996, 1997, 1998, 2006, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*  Declare a pointer to void function type.  */
+typedef void (*func_ptr) (void);
+
+#ifdef CRT_INIT
+
+/* NOTE:  In order to be able to support SVR4 shared libraries, we arrange
+   to have one set of symbols { __CTOR_LIST__, __DTOR_LIST__, __CTOR_END__,
+   __DTOR_END__ } per root executable and also one set of these symbols
+   per shared library.  So in any given whole process image, we may have
+   multiple definitions of each of these symbols.  In order to prevent
+   these definitions from conflicting with one another, and in order to
+   ensure that the proper lists are used for the initialization/finalization
+   of each individual shared library (respectively), we give these symbols
+   only internal (i.e. `static') linkage, and we also make it a point to
+   refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
+   symbol in crtinit.o, where they are defined.  */
+
+static func_ptr __CTOR_LIST__[1]
+  __attribute__ ((used, section (".ctors")))
+     = { (func_ptr) (-1) };
+
+static func_ptr __DTOR_LIST__[1]
+  __attribute__ ((used, section (".dtors")))
+     = { (func_ptr) (-1) };
+
+/* Run all the global destructors on exit from the program.  */
+ 
+/* Some systems place the number of pointers in the first word of the
+   table.  On SVR4 however, that word is -1.  In all cases, the table is
+   null-terminated.  On SVR4, we start from the beginning of the list and
+   invoke each per-compilation-unit destructor routine in order
+   until we find that null.
+
+   Note that this function MUST be static.  There will be one of these
+   functions in each root executable and one in each shared library, but
+   although they all have the same code, each one is unique in that it
+   refers to one particular associated `__DTOR_LIST__' which belongs to the
+   same particular root executable or shared library file.  */
+
+static void __do_global_dtors (void)
+asm ("__do_global_dtors") __attribute__ ((used, section (".text")));
+
+static void
+__do_global_dtors (void)
+{
+  func_ptr *p;
+
+  for (p = __DTOR_LIST__ + 1; *p; p++)
+    (*p) ();
+}
+
+/* .init section start.
+   This must appear at the start of the .init section.  */
+
+asm ("\n\
+	.section .init,\"ax\",@progbits\n\
+	.balign 4\n\
+	.global __init\n\
+__init:\n\
+	push fp\n\
+	push lr\n\
+	mv fp,sp\n\
+	seth r0, #shigh(__fini)\n\
+	add3 r0, r0, #low(__fini)\n\
+	bl atexit\n\
+	.fillinsn\n\
+");
+
+/* .fini section start.
+   This must appear at the start of the .init section.  */
+
+asm ("\n\
+	.section .fini,\"ax\",@progbits\n\
+	.balign 4\n\
+	.global __fini\n\
+__fini:\n\
+	push fp\n\
+	push lr\n\
+	mv fp,sp\n\
+	bl __do_global_dtors\n\
+	.fillinsn\n\
+");
+
+#endif /* CRT_INIT */
+
+#ifdef CRT_FINI
+
+/* Put a word containing zero at the end of each of our two lists of function
+   addresses.  Note that the words defined here go into the .ctors and .dtors
+   sections of the crtend.o file, and since that file is always linked in
+   last, these words naturally end up at the very ends of the two lists
+   contained in these two sections.  */
+
+static func_ptr __CTOR_END__[1]
+  __attribute__ ((used, section (".ctors")))
+     = { (func_ptr) 0 };
+
+static func_ptr __DTOR_END__[1]
+  __attribute__ ((used, section (".dtors")))
+     = { (func_ptr) 0 };
+
+/* Run all global constructors for the program.
+   Note that they are run in reverse order.  */
+
+static void __do_global_ctors (void)
+asm ("__do_global_ctors") __attribute__ ((used, section (".text")));
+
+static void
+__do_global_ctors (void)
+{
+  func_ptr *p;
+
+  for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
+    (*p) ();
+}
+
+/* .init section end.
+   This must live at the end of the .init section.  */
+
+asm ("\n\
+	.section .init,\"ax\",@progbits\n\
+	bl __do_global_ctors\n\
+	mv sp,fp\n\
+	pop lr\n\
+	pop fp\n\
+	jmp lr\n\
+	.fillinsn\n\
+");
+
+/* .fini section end.
+   This must live at the end of the .fini section.  */
+
+asm ("\n\
+	.section .fini,\"ax\",@progbits\n\
+	mv sp,fp\n\
+	pop lr\n\
+	pop fp\n\
+	jmp lr\n\
+	.fillinsn\n\
+");
+
+#endif /* CRT_FINI */
diff --git a/gcc/config/m32r/libgcc-glibc.ver b/gcc/config/m32r/libgcc-glibc.ver
new file mode 100644
index 000000000..0e1304b2a
--- /dev/null
+++ b/gcc/config/m32r/libgcc-glibc.ver
@@ -0,0 +1,48 @@
+# Copyright (C) 2004, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+# Note that we cannot use the default libgcc-glibc.ver file on sh,
+# because GLIBC_2.0 does not exist on this architecture, as the first 
+# ever glibc release on the platform was GLIBC_2.3.
+
+%exclude {
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%inherit GCC_3.0 GLIBC_2.3
+GLIBC_2.3 {
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
diff --git a/gcc/config/m32r/linux.h b/gcc/config/m32r/linux.h
new file mode 100644
index 000000000..55f6619f7
--- /dev/null
+++ b/gcc/config/m32r/linux.h
@@ -0,0 +1,101 @@
+/* Definitions for Renesas M32R running Linux-based GNU systems using ELF.
+   Copyright (C) 2003, 2004, 2006, 2007, 2010, 2011
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define LINUX_DEFAULT_ELF
+
+/* A lie, I guess, but the general idea behind linux/ELF is that we are
+   supposed to be outputting something that will assemble under SVr4.
+   This gets us pretty close.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (M32R GNU/Linux with ELF)");
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+ 
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+  
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+   
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+    
+/* Provide a LINK_SPEC appropriate for Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef	LINK_SPEC
+#if TARGET_LITTLE_ENDIAN
+#define LINK_SPEC "%(link_cpu) -m m32rlelf_linux %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+#else
+#define LINK_SPEC "%(link_cpu) -m m32relf_linux %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+#endif
+
+#undef	LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared: -lc} \
+   %{!shared: \
+       %{mieee-fp:-lieee} \
+       %{profile:-lc_p} %{!profile: -lc}}"
+
+#undef  STARTFILE_SPEC
+#if defined HAVE_LD_PIE
+#define STARTFILE_SPEC \
+  "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} %{!p:crt1.o%s}}}\
+   crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+#endif
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "\
+   %{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS} \
+"
+                                                                                
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
diff --git a/gcc/config/m32r/little.h b/gcc/config/m32r/little.h
new file mode 100644
index 000000000..3eca4cebd
--- /dev/null
+++ b/gcc/config/m32r/little.h
@@ -0,0 +1,21 @@
+/* Definitions for Renesas little endian M32R cpu.
+   Copyright (C) 2003, 2004, 2005, 2007, 2011
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TARGET_LITTLE_ENDIAN 1
diff --git a/gcc/config/m32r/m32r-protos.h b/gcc/config/m32r/m32r-protos.h
new file mode 100644
index 000000000..0f0607252
--- /dev/null
+++ b/gcc/config/m32r/m32r-protos.h
@@ -0,0 +1,66 @@
+/* Prototypes for m32r.c functions used in the md file & elsewhere.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Function prototypes that cannot exist in v850.h due to dependency
+   complications.  */
+#define Mmode enum machine_mode
+
+extern void   m32r_init (void);
+extern void   m32r_init_expanders (void);
+extern unsigned m32r_compute_frame_size (int);
+extern void   m32r_expand_prologue (void);
+extern void   m32r_expand_epilogue (void);
+extern int    direct_return (void);
+extern void   m32r_load_pic_register (void);
+extern enum m32r_function_type m32r_compute_function_type (tree);
+
+#ifdef RTX_CODE
+extern int    easy_di_const (rtx);
+extern int    easy_df_const (rtx);
+extern rtx    gen_compare (enum rtx_code, rtx, rtx, int);
+extern bool   gen_cond_store (enum rtx_code, rtx, rtx, rtx);
+extern rtx    gen_split_move_double (rtx *);
+extern int    m32r_address_code (rtx);
+extern void   m32r_initialize_trampoline (rtx, rtx, rtx);
+extern int    zero_and_one (rtx, rtx);
+extern char * emit_cond_move (rtx *, rtx);
+extern void   m32r_output_block_move (rtx, rtx *);
+extern int    m32r_expand_block_move (rtx *);
+extern int    m32r_not_same_reg (rtx, rtx);
+extern int    m32r_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int    m32r_legitimate_pic_operand_p (rtx);
+extern rtx    m32r_legitimize_pic_address (rtx, rtx);
+extern rtx    m32r_return_addr (int);
+extern rtx    m32r_function_symbol (const char *);
+
+#ifdef HAVE_MACHINE_MODES
+extern int    call_operand (rtx, Mmode);
+extern int    small_data_operand (rtx, Mmode);
+extern int    addr24_operand (rtx, Mmode);
+extern int    addr32_operand (rtx, Mmode);
+extern int    call26_operand (rtx, Mmode);
+extern int    memreg_operand (rtx, Mmode);
+extern int    small_insn_p (rtx, Mmode);
+
+#endif /* HAVE_MACHINE_MODES */
+
+#endif /* RTX_CODE */
+
+#undef  Mmode
diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c
new file mode 100644
index 000000000..444f16efb
--- /dev/null
+++ b/gcc/config/m32r/m32r.c
@@ -0,0 +1,2959 @@
+/* Subroutines used for code generation on the Renesas M32R cpu.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "expr.h"
+#include "function.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "integrate.h"
+#include "df.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm-constrs.h"
+
+/* Array of valid operand punctuation characters.  */
+static char m32r_punct_chars[256];
+
+/* Selected code model.  */
+enum m32r_model m32r_model = M32R_MODEL_DEFAULT;
+
+/* Selected SDA support.  */
+enum m32r_sdata m32r_sdata = M32R_SDATA_DEFAULT;
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_MODEL_SHIFT		SYMBOL_FLAG_MACH_DEP_SHIFT
+#define SYMBOL_REF_MODEL(X) \
+  ((enum m32r_model) ((SYMBOL_REF_FLAGS (X) >> SYMBOL_FLAG_MODEL_SHIFT) & 3))
+
+/* For string literals, etc.  */
+#define LIT_NAME_P(NAME) ((NAME)[0] == '*' && (NAME)[1] == '.')
+
+/* Forward declaration.  */
+static bool  m32r_handle_option (size_t, const char *, int);
+static void  m32r_option_override (void);
+static void  init_reg_tables (void);
+static void  block_move_call (rtx, rtx, rtx);
+static int   m32r_is_insn (rtx);
+static bool  m32r_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx   m32r_legitimize_address (rtx, rtx, enum machine_mode);
+static bool  m32r_mode_dependent_address_p (const_rtx);
+static tree  m32r_handle_model_attribute (tree *, tree, tree, int, bool *);
+static void  m32r_print_operand (FILE *, rtx, int);
+static void  m32r_print_operand_address (FILE *, rtx);
+static bool  m32r_print_operand_punct_valid_p (unsigned char code);
+static void  m32r_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void  m32r_output_function_epilogue (FILE *, HOST_WIDE_INT);
+
+static void  m32r_file_start (void);
+
+static int    m32r_adjust_priority (rtx, int);
+static int    m32r_issue_rate (void);
+
+static void m32r_encode_section_info (tree, rtx, int);
+static bool m32r_in_small_data_p (const_tree);
+static bool m32r_return_in_memory (const_tree, const_tree);
+static rtx m32r_function_value (const_tree, const_tree, bool);
+static rtx m32r_libcall_value (enum machine_mode, const_rtx);
+static bool m32r_function_value_regno_p (const unsigned int);
+static void m32r_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					 tree, int *, int);
+static void init_idents (void);
+static bool m32r_rtx_costs (rtx, int, int, int *, bool speed);
+static int m32r_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static bool m32r_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				    const_tree, bool);
+static int m32r_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				   tree, bool);
+static rtx m32r_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static void m32r_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static bool m32r_can_eliminate (const int, const int);
+static void m32r_conditional_register_usage (void);
+static void m32r_trampoline_init (rtx, tree, rtx);
+
+/* M32R specific attributes.  */
+
+static const struct attribute_spec m32r_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt", 0, 0, true,  false, false, NULL },
+  { "model",     1, 1, true,  false, false, m32r_handle_model_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+static const struct default_options m32r_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_1_PLUS, OPT_fregmove, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m32r_attribute_table
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P m32r_legitimate_address_p
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS m32r_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P m32r_mode_dependent_address_p
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND m32r_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS m32r_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P m32r_print_operand_punct_valid_p
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE m32r_output_function_prologue
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE m32r_output_function_epilogue
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START m32r_file_start
+
+#undef  TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY m32r_adjust_priority
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE m32r_issue_rate
+
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_CPU_DEFAULT
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION m32r_handle_option
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m32r_option_override
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE m32r_option_optimization_table
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO m32r_encode_section_info
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P m32r_in_small_data_p
+
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST m32r_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m32r_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY m32r_return_in_memory
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE m32r_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE m32r_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P m32r_function_value_regno_p
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS m32r_setup_incoming_varargs
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE m32r_pass_by_reference
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES m32r_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m32r_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m32r_function_arg_advance
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE m32r_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m32r_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m32r_trampoline_init
+
+#undef  TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO		sjlj_except_unwind_info
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+m32r_handle_option (size_t code, const char *arg, int value)
+{
+  switch (code)
+    {
+    case OPT_m32r:
+      target_flags &= ~(MASK_M32R2 | MASK_M32RX);
+      return true;
+
+    case OPT_mmodel_:
+      if (strcmp (arg, "small") == 0)
+	m32r_model = M32R_MODEL_SMALL;
+      else if (strcmp (arg, "medium") == 0)
+	m32r_model = M32R_MODEL_MEDIUM;
+      else if (strcmp (arg, "large") == 0)
+	m32r_model = M32R_MODEL_LARGE;
+      else
+	return false;
+      return true;
+
+    case OPT_msdata_:
+      if (strcmp (arg, "none") == 0)
+	m32r_sdata = M32R_SDATA_NONE;
+      else if (strcmp (arg, "sdata") == 0)
+	m32r_sdata = M32R_SDATA_SDATA;
+      else if (strcmp (arg, "use") == 0)
+	m32r_sdata = M32R_SDATA_USE;
+      else
+	return false;
+      return true;
+
+    case OPT_mno_flush_func:
+      m32r_cache_flush_func = NULL;
+      return true;
+
+    case OPT_mflush_trap_:
+      return value <= 15;
+
+    case OPT_mno_flush_trap:
+      m32r_cache_flush_trap = -1;
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Called by m32r_option_override to initialize various things.  */
+
+void
+m32r_init (void)
+{
+  init_reg_tables ();
+
+  /* Initialize array for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+  memset (m32r_punct_chars, 0, sizeof (m32r_punct_chars));
+  m32r_punct_chars['#'] = 1;
+  m32r_punct_chars['@'] = 1; /* ??? no longer used */
+
+  /* Provide default value if not specified.  */
+  if (!global_options_set.x_g_switch_value)
+    g_switch_value = SDATA_DEFAULT_SIZE;
+}
+
+static void
+m32r_option_override (void)
+{
+  /* These need to be done at start up.
+     It's convenient to do them here.  */
+  m32r_init ();
+  SUBTARGET_OVERRIDE_OPTIONS;
+}
+
+/* Vectors to keep interesting information about registers where it can easily
+   be got.  We use to use the actual mode value as the bit number, but there
+   is (or may be) more than 32 modes now.  Instead we use two tables: one
+   indexed by hard register number, and one indexed by mode.  */
+
+/* The purpose of m32r_mode_class is to shrink the range of modes so that
+   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
+   mapped into one m32r_mode_class mode.  */
+
+enum m32r_mode_class
+{
+  C_MODE,
+  S_MODE, D_MODE, T_MODE, O_MODE,
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE, A_MODE
+};
+
+/* Modes for condition codes.  */
+#define C_MODES (1 << (int) C_MODE)
+
+/* Modes for single-word and smaller quantities.  */
+#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-word and smaller quantities.  */
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Modes for quad-word and smaller quantities.  */
+#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+
+/* Modes for accumulators.  */
+#define A_MODES (1 << (int) A_MODE)
+
+/* Value is 1 if register/mode pair is acceptable on arc.  */
+
+const unsigned int m32r_hard_regno_mode_ok[FIRST_PSEUDO_REGISTER] =
+{
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
+  T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, C_MODES, A_MODES, A_MODES
+};
+
+unsigned int m32r_mode_class [NUM_MACHINE_MODES];
+
+enum reg_class m32r_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+static void
+init_reg_tables (void)
+{
+  int i;
+
+  for (i = 0; i < NUM_MACHINE_MODES; i++)
+    {
+      switch (GET_MODE_CLASS (i))
+	{
+	case MODE_INT:
+	case MODE_PARTIAL_INT:
+	case MODE_COMPLEX_INT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    m32r_mode_class[i] = 1 << (int) S_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    m32r_mode_class[i] = 1 << (int) D_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    m32r_mode_class[i] = 1 << (int) T_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    m32r_mode_class[i] = 1 << (int) O_MODE;
+	  else
+	    m32r_mode_class[i] = 0;
+	  break;
+	case MODE_FLOAT:
+	case MODE_COMPLEX_FLOAT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    m32r_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    m32r_mode_class[i] = 1 << (int) DF_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    m32r_mode_class[i] = 1 << (int) TF_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    m32r_mode_class[i] = 1 << (int) OF_MODE;
+	  else
+	    m32r_mode_class[i] = 0;
+	  break;
+	case MODE_CC:
+	  m32r_mode_class[i] = 1 << (int) C_MODE;
+	  break;
+	default:
+	  m32r_mode_class[i] = 0;
+	  break;
+	}
+    }
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (GPR_P (i))
+	m32r_regno_reg_class[i] = GENERAL_REGS;
+      else if (i == ARG_POINTER_REGNUM)
+	m32r_regno_reg_class[i] = GENERAL_REGS;
+      else
+	m32r_regno_reg_class[i] = NO_REGS;
+    }
+}
+
+/* M32R specific attribute support.
+
+   interrupt - for interrupt functions
+
+   model - select code model used to access object
+
+	small: addresses use 24 bits, use bl to make calls
+	medium: addresses use 32 bits, use bl to make calls
+	large: addresses use 32 bits, use seth/add3/jl to make calls
+
+	Grep for MODEL in m32r.h for more info.  */
+
+static tree small_ident1;
+static tree small_ident2;
+static tree medium_ident1;
+static tree medium_ident2;
+static tree large_ident1;
+static tree large_ident2;
+
+static void
+init_idents (void)
+{
+  if (small_ident1 == 0)
+    {
+      small_ident1 = get_identifier ("small");
+      small_ident2 = get_identifier ("__small__");
+      medium_ident1 = get_identifier ("medium");
+      medium_ident2 = get_identifier ("__medium__");
+      large_ident1 = get_identifier ("large");
+      large_ident2 = get_identifier ("__large__");
+    }
+}
+
+/* Handle an "model" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+m32r_handle_model_attribute (tree *node ATTRIBUTE_UNUSED, tree name,
+			     tree args, int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs)
+{
+  tree arg;
+
+  init_idents ();
+  arg = TREE_VALUE (args);
+
+  if (arg != small_ident1
+      && arg != small_ident2
+      && arg != medium_ident1
+      && arg != medium_ident2
+      && arg != large_ident1
+      && arg != large_ident2)
+    {
+      warning (OPT_Wattributes, "invalid argument of %qs attribute",
+	       IDENTIFIER_POINTER (name));
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Encode section information of DECL, which is either a VAR_DECL,
+   FUNCTION_DECL, STRING_CST, CONSTRUCTOR, or ???.
+
+   For the M32R we want to record:
+
+   - whether the object lives in .sdata/.sbss.
+   - what code model should be used to access the object
+*/
+
+static void
+m32r_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int extra_flags = 0;
+  tree model_attr;
+  enum m32r_model model;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (!DECL_P (decl))
+    return;
+
+  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
+  if (model_attr)
+    {
+      tree id;
+
+      init_idents ();
+
+      id = TREE_VALUE (TREE_VALUE (model_attr));
+
+      if (id == small_ident1 || id == small_ident2)
+	model = M32R_MODEL_SMALL;
+      else if (id == medium_ident1 || id == medium_ident2)
+	model = M32R_MODEL_MEDIUM;
+      else if (id == large_ident1 || id == large_ident2)
+	model = M32R_MODEL_LARGE;
+      else
+	gcc_unreachable (); /* shouldn't happen */
+    }
+  else
+    {
+      if (TARGET_MODEL_SMALL)
+	model = M32R_MODEL_SMALL;
+      else if (TARGET_MODEL_MEDIUM)
+	model = M32R_MODEL_MEDIUM;
+      else if (TARGET_MODEL_LARGE)
+	model = M32R_MODEL_LARGE;
+      else
+	gcc_unreachable (); /* shouldn't happen */
+    }
+  extra_flags |= model << SYMBOL_FLAG_MODEL_SHIFT;
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= extra_flags;
+}
+
+/* Only mark the object as being small data area addressable if
+   it hasn't been explicitly marked with a code model.
+
+   The user can explicitly put an object in the small data area with the
+   section attribute.  If the object is in sdata/sbss and marked with a
+   code model do both [put the object in .sdata and mark it as being
+   addressed with a specific code model - don't mark it as being addressed
+   with an SDA reloc though].  This is ok and might be useful at times.  If
+   the object doesn't fit the linker will give an error.  */
+
+static bool
+m32r_in_small_data_p (const_tree decl)
+{
+  const_tree section;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return false;
+
+  if (lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
+    return false;
+
+  section = DECL_SECTION_NAME (decl);
+  if (section)
+    {
+      const char *const name = TREE_STRING_POINTER (section);
+      if (strcmp (name, ".sdata") == 0 || strcmp (name, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      if (! TREE_READONLY (decl) && ! TARGET_SDATA_NONE)
+	{
+	  int size = int_size_in_bytes (TREE_TYPE (decl));
+
+	  if (size > 0 && size <= g_switch_value)
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Do anything needed before RTL is emitted for each function.  */
+
+void
+m32r_init_expanders (void)
+{
+  /* ??? At one point there was code here.  The function is left in
+     to make it easy to experiment.  */
+}
+
+int
+call_operand (rtx op, enum machine_mode mode)
+{
+  if (!MEM_P (op))
+    return 0;
+  op = XEXP (op, 0);
+  return call_address_operand (op, mode);
+}
+
+/* Return 1 if OP is a reference to an object in .sdata/.sbss.  */
+
+int
+small_data_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (! TARGET_SDATA_USE)
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_SMALL_P (op);
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+      && satisfies_constraint_J (XEXP (XEXP (op, 0), 1)))
+    return SYMBOL_REF_SMALL_P (XEXP (XEXP (op, 0), 0));
+
+  return 0;
+}
+
+/* Return 1 if OP is a symbol that can use 24-bit addressing.  */
+
+int
+addr24_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx sym;
+
+  if (flag_pic)
+    return 0;
+
+  if (GET_CODE (op) == LABEL_REF)
+    return TARGET_ADDR24;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    sym = op;
+  else if (GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	   && satisfies_constraint_M (XEXP (XEXP (op, 0), 1)))
+    sym = XEXP (XEXP (op, 0), 0);
+  else
+    return 0;
+
+  if (SYMBOL_REF_MODEL (sym) == M32R_MODEL_SMALL)
+    return 1;
+
+  if (TARGET_ADDR24
+      && (CONSTANT_POOL_ADDRESS_P (sym)
+	  || LIT_NAME_P (XSTR (sym, 0))))
+    return 1;
+
+  return 0;
+}
+
+/* Return 1 if OP is a symbol that needs 32-bit addressing.  */
+
+int
+addr32_operand (rtx op, enum machine_mode mode)
+{
+  rtx sym;
+
+  if (GET_CODE (op) == LABEL_REF)
+    return TARGET_ADDR32;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    sym = op;
+  else if (GET_CODE (op) == CONST
+	   && GET_CODE (XEXP (op, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+	   && CONST_INT_P (XEXP (XEXP (op, 0), 1))
+	   && ! flag_pic)
+    sym = XEXP (XEXP (op, 0), 0);
+  else
+    return 0;
+
+  return (! addr24_operand (sym, mode)
+	  && ! small_data_operand (sym, mode));
+}
+
+/* Return 1 if OP is a function that can be called with the `bl' insn.  */
+
+int
+call26_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic)
+    return 1;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_MODEL (op) != M32R_MODEL_LARGE;
+
+  return TARGET_CALL26;
+}
+
+/* Return 1 if OP is a DImode const we want to handle inline.
+   This must match the code in the movdi pattern.
+   It is used by the 'G' CONST_DOUBLE_OK_FOR_LETTER.  */
+
+int
+easy_di_const (rtx op)
+{
+  rtx high_rtx, low_rtx;
+  HOST_WIDE_INT high, low;
+
+  split_double (op, &high_rtx, &low_rtx);
+  high = INTVAL (high_rtx);
+  low = INTVAL (low_rtx);
+  /* Pick constants loadable with 2 16-bit `ldi' insns.  */
+  if (high >= -128 && high <= 127
+      && low >= -128 && low <= 127)
+    return 1;
+  return 0;
+}
+
+/* Return 1 if OP is a DFmode const we want to handle inline.
+   This must match the code in the movdf pattern.
+   It is used by the 'H' CONST_DOUBLE_OK_FOR_LETTER.  */
+
+int
+easy_df_const (rtx op)
+{
+  REAL_VALUE_TYPE r;
+  long l[2];
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+  if (l[0] == 0 && l[1] == 0)
+    return 1;
+  if ((l[0] & 0xffff) == 0 && l[1] == 0)
+    return 1;
+  return 0;
+}
+
+/* Return 1 if OP is (mem (reg ...)).
+   This is used in insn length calcs.  */
+
+int
+memreg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return MEM_P (op) && REG_P (XEXP (op, 0));
+}
+
+/* Return nonzero if TYPE must be passed by indirect reference.  */
+
+static bool
+m32r_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  int size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return (size < 0 || size > 8);
+}
+
+/* Comparisons.  */
+
+/* X and Y are two things to compare using CODE.  Emit the compare insn and
+   return the rtx for compare [arg0 of the if_then_else].
+   If need_compare is true then the comparison insn must be generated, rather
+   than being subsumed into the following branch instruction.  */
+
+rtx
+gen_compare (enum rtx_code code, rtx x, rtx y, int need_compare)
+{
+  enum rtx_code compare_code;
+  enum rtx_code branch_code;
+  rtx cc_reg = gen_rtx_REG (CCmode, CARRY_REGNUM);
+  int must_swap = 0;
+
+  switch (code)
+    {
+    case EQ:  compare_code = EQ;  branch_code = NE; break;
+    case NE:  compare_code = EQ;  branch_code = EQ; break;
+    case LT:  compare_code = LT;  branch_code = NE; break;
+    case LE:  compare_code = LT;  branch_code = EQ; must_swap = 1; break;
+    case GT:  compare_code = LT;  branch_code = NE; must_swap = 1; break;
+    case GE:  compare_code = LT;  branch_code = EQ; break;
+    case LTU: compare_code = LTU; branch_code = NE; break;
+    case LEU: compare_code = LTU; branch_code = EQ; must_swap = 1; break;
+    case GTU: compare_code = LTU; branch_code = NE; must_swap = 1; break;
+    case GEU: compare_code = LTU; branch_code = EQ; break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (need_compare)
+    {
+      switch (compare_code)
+	{
+	case EQ:
+	  if (satisfies_constraint_P (y)		/* Reg equal to small const.  */
+	      && y != const0_rtx)
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_addsi3 (tmp, x, GEN_INT (-INTVAL (y))));
+	      x = tmp;
+	      y = const0_rtx;
+	    }
+	  else if (CONSTANT_P (y))			/* Reg equal to const.  */
+	    {
+	      rtx tmp = force_reg (GET_MODE (x), y);
+	      y = tmp;
+	    }
+
+	  if (register_operand (y, SImode) 		/* Reg equal to reg.  */
+	      || y == const0_rtx) 	   		/* Reg equal to zero.  */
+	    {
+	      emit_insn (gen_cmp_eqsi_insn (x, y));
+
+	      return gen_rtx_fmt_ee (code, CCmode, cc_reg, const0_rtx);
+	    }
+	  break;
+
+	case LT:
+	  if (register_operand (y, SImode)
+	      || satisfies_constraint_P (y))
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);	      /* Reg compared to reg.  */
+
+	      switch (code)
+		{
+		case LT:
+		  emit_insn (gen_cmp_ltsi_insn (x, y));
+		  code = EQ;
+		  break;
+		case LE:
+		  if (y == const0_rtx)
+		    tmp = const1_rtx;
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltsi_insn (x, tmp));
+		  code = EQ;
+		  break;
+		case GT:
+		  if (CONST_INT_P (y))
+		    tmp = gen_rtx_PLUS (SImode, y, const1_rtx);
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltsi_insn (x, tmp));
+		  code = NE;
+		  break;
+		case GE:
+		  emit_insn (gen_cmp_ltsi_insn (x, y));
+		  code = NE;
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+
+	      return gen_rtx_fmt_ee (code, CCmode, cc_reg, const0_rtx);
+	    }
+	  break;
+
+	case LTU:
+	  if (register_operand (y, SImode)
+	      || satisfies_constraint_P (y))
+	    {
+	      rtx tmp = gen_reg_rtx (SImode);	      /* Reg (unsigned) compared to reg.  */
+
+	      switch (code)
+		{
+		case LTU:
+		  emit_insn (gen_cmp_ltusi_insn (x, y));
+		  code = EQ;
+		  break;
+		case LEU:
+		  if (y == const0_rtx)
+		    tmp = const1_rtx;
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltusi_insn (x, tmp));
+		  code = EQ;
+		  break;
+		case GTU:
+		  if (CONST_INT_P (y))
+		    tmp = gen_rtx_PLUS (SImode, y, const1_rtx);
+		  else
+		    emit_insn (gen_addsi3 (tmp, y, constm1_rtx));
+		  emit_insn (gen_cmp_ltusi_insn (x, tmp));
+		  code = NE;
+		  break;
+		case GEU:
+		  emit_insn (gen_cmp_ltusi_insn (x, y));
+		  code = NE;
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+
+	      return gen_rtx_fmt_ee (code, CCmode, cc_reg, const0_rtx);
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      /* Reg/reg equal comparison.  */
+      if (compare_code == EQ
+	  && register_operand (y, SImode))
+	return gen_rtx_fmt_ee (code, CCmode, x, y);
+
+      /* Reg/zero signed comparison.  */
+      if ((compare_code == EQ || compare_code == LT)
+	  && y == const0_rtx)
+	return gen_rtx_fmt_ee (code, CCmode, x, y);
+
+      /* Reg/smallconst equal comparison.  */
+      if (compare_code == EQ
+	  && satisfies_constraint_P (y))
+	{
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_addsi3 (tmp, x, GEN_INT (-INTVAL (y))));
+	  return gen_rtx_fmt_ee (code, CCmode, tmp, const0_rtx);
+	}
+
+      /* Reg/const equal comparison.  */
+      if (compare_code == EQ
+	  && CONSTANT_P (y))
+	{
+	  rtx tmp = force_reg (GET_MODE (x), y);
+
+	  return gen_rtx_fmt_ee (code, CCmode, x, tmp);
+	}
+    }
+
+  if (CONSTANT_P (y))
+    {
+      if (must_swap)
+	y = force_reg (GET_MODE (x), y);
+      else
+	{
+	  int ok_const = reg_or_int16_operand (y, GET_MODE (y));
+
+	  if (! ok_const)
+	    y = force_reg (GET_MODE (x), y);
+	}
+    }
+
+  switch (compare_code)
+    {
+    case EQ :
+      emit_insn (gen_cmp_eqsi_insn (must_swap ? y : x, must_swap ? x : y));
+      break;
+    case LT :
+      emit_insn (gen_cmp_ltsi_insn (must_swap ? y : x, must_swap ? x : y));
+      break;
+    case LTU :
+      emit_insn (gen_cmp_ltusi_insn (must_swap ? y : x, must_swap ? x : y));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_fmt_ee (branch_code, VOIDmode, cc_reg, CONST0_RTX (CCmode));
+}
+
+bool
+gen_cond_store (enum rtx_code code, rtx op0, rtx op1, rtx op2)
+{
+  enum machine_mode mode = GET_MODE (op0);
+
+  gcc_assert (mode == SImode);
+  switch (code)
+    {
+    case EQ:
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (TARGET_M32RX || TARGET_M32R2)
+	{
+	  if (!reg_or_zero_operand (op2, mode))
+	    op2 = force_reg (mode, op2);
+
+	  emit_insn (gen_seq_insn_m32rx (op0, op1, op2));
+	  return true;
+	}
+      if (CONST_INT_P (op2) && INTVAL (op2) == 0)
+	{
+	  emit_insn (gen_seq_zero_insn (op0, op1));
+	  return true;
+	}
+
+      if (!reg_or_eq_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      emit_insn (gen_seq_insn (op0, op1, op2));
+      return true;
+
+    case NE:
+      if (!CONST_INT_P (op2)
+	  || (INTVAL (op2) != 0 && satisfies_constraint_K (op2)))
+	{
+	  rtx reg;
+
+	  if (reload_completed || reload_in_progress)
+	    return false;
+
+	  reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (reg, op1, op2));
+	  op1 = reg;
+
+	  if (!register_operand (op1, mode))
+	    op1 = force_reg (mode, op1);
+
+	  emit_insn (gen_sne_zero_insn (op0, op1));
+	  return true;
+	}
+      return false;
+
+    case LT:
+    case GT:
+      if (code == GT)
+	{
+	  rtx tmp = op2;
+	  op2 = op1;
+	  op1 = tmp;
+	  code = LT;
+	}
+
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (!reg_or_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      emit_insn (gen_slt_insn (op0, op1, op2));
+      return true;
+
+    case LTU:
+    case GTU:
+      if (code == GTU)
+	{
+	  rtx tmp = op2;
+	  op2 = op1;
+	  op1 = tmp;
+	  code = LTU;
+	}
+
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (!reg_or_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      emit_insn (gen_sltu_insn (op0, op1, op2));
+      return true;
+
+    case GE:
+    case GEU:
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (!reg_or_int16_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      if (code == GE)
+	emit_insn (gen_sge_insn (op0, op1, op2));
+      else
+	emit_insn (gen_sgeu_insn (op0, op1, op2));
+      return true;
+
+    case LE:
+    case LEU:
+      if (!register_operand (op1, mode))
+	op1 = force_reg (mode, op1);
+
+      if (CONST_INT_P (op2))
+	{
+	  HOST_WIDE_INT value = INTVAL (op2);
+	  if (value >= 2147483647)
+	    {
+	      emit_move_insn (op0, const1_rtx);
+	      return true;
+	    }
+
+	  op2 = GEN_INT (value + 1);
+	  if (value < -32768 || value >= 32767)
+	    op2 = force_reg (mode, op2);
+
+          if (code == LEU)
+	    emit_insn (gen_sltu_insn (op0, op1, op2));
+	  else
+	    emit_insn (gen_slt_insn (op0, op1, op2));
+	  return true;
+	}
+
+      if (!register_operand (op2, mode))
+	op2 = force_reg (mode, op2);
+
+      if (code == LEU)
+        emit_insn (gen_sleu_insn (op0, op1, op2));
+      else
+        emit_insn (gen_sle_insn (op0, op1, op2));
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Split a 2 word move (DI or DF) into component parts.  */
+
+rtx
+gen_split_move_double (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtx dest = operands[0];
+  rtx src  = operands[1];
+  rtx val;
+
+  /* We might have (SUBREG (MEM)) here, so just get rid of the
+     subregs to make this code simpler.  It is safe to call
+     alter_subreg any time after reload.  */
+  if (GET_CODE (dest) == SUBREG)
+    alter_subreg (&dest);
+  if (GET_CODE (src) == SUBREG)
+    alter_subreg (&src);
+
+  start_sequence ();
+  if (REG_P (dest))
+    {
+      int dregno = REGNO (dest);
+
+      /* Reg = reg.  */
+      if (REG_P (src))
+	{
+	  int sregno = REGNO (src);
+
+	  int reverse = (dregno == sregno + 1);
+
+	  /* We normally copy the low-numbered register first.  However, if
+	     the first register operand 0 is the same as the second register of
+	     operand 1, we must copy in the opposite order.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  operand_subword (src,  reverse, TRUE, mode)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, !reverse, TRUE, mode),
+				  operand_subword (src,  !reverse, TRUE, mode)));
+	}
+
+      /* Reg = constant.  */
+      else if (CONST_INT_P (src) || GET_CODE (src) == CONST_DOUBLE)
+	{
+	  rtx words[2];
+	  split_double (src, &words[0], &words[1]);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 0, TRUE, mode),
+				  words[0]));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, 1, TRUE, mode),
+				  words[1]));
+	}
+
+      /* Reg = mem.  */
+      else if (MEM_P (src))
+	{
+	  /* If the high-address word is used in the address, we must load it
+	     last.  Otherwise, load it first.  */
+	  int reverse
+	    = (refers_to_regno_p (dregno, dregno + 1, XEXP (src, 0), 0) != 0);
+
+	  /* We used to optimize loads from single registers as
+
+		ld r1,r3+; ld r2,r3
+
+	     if r3 were not used subsequently.  However, the REG_NOTES aren't
+	     propagated correctly by the reload phase, and it can cause bad
+	     code to be generated.  We could still try:
+
+		ld r1,r3+; ld r2,r3; addi r3,-4
+
+	     which saves 2 bytes and doesn't force longword alignment.  */
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, reverse, TRUE, mode),
+				  adjust_address (src, SImode,
+						  reverse * UNITS_PER_WORD)));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  operand_subword (dest, !reverse, TRUE, mode),
+				  adjust_address (src, SImode,
+						  !reverse * UNITS_PER_WORD)));
+	}
+      else
+	gcc_unreachable ();
+    }
+
+  /* Mem = reg.  */
+  /* We used to optimize loads from single registers as
+
+	st r1,r3; st r2,+r3
+
+     if r3 were not used subsequently.  However, the REG_NOTES aren't
+     propagated correctly by the reload phase, and it can cause bad
+     code to be generated.  We could still try:
+
+	st r1,r3; st r2,+r3; addi r3,-4
+
+     which saves 2 bytes and doesn't force longword alignment.  */
+  else if (MEM_P (dest) && REG_P (src))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      adjust_address (dest, SImode, 0),
+			      operand_subword (src, 0, TRUE, mode)));
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      adjust_address (dest, SImode, UNITS_PER_WORD),
+			      operand_subword (src, 1, TRUE, mode)));
+    }
+
+  else
+    gcc_unreachable ();
+
+  val = get_insns ();
+  end_sequence ();
+  return val;
+}
+
+
+static int
+m32r_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words;
+  unsigned int size =
+    (((mode == BLKmode && type)
+      ? (unsigned int) int_size_in_bytes (type)
+      : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
+    / UNITS_PER_WORD;
+
+  if (*cum >= M32R_MAX_PARM_REGS)
+    words = 0;
+  else if (*cum + size > M32R_MAX_PARM_REGS)
+    words = (*cum + size) - M32R_MAX_PARM_REGS;
+  else
+    words = 0;
+
+  return words * UNITS_PER_WORD;
+}
+
+/* The ROUND_ADVANCE* macros are local to this file.  */
+/* Round SIZE up to a word boundary.  */
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round arg MODE/TYPE up to the next word boundary.  */
+#define ROUND_ADVANCE_ARG(MODE, TYPE) \
+  ((MODE) == BLKmode				\
+   ? ROUND_ADVANCE ((unsigned int) int_size_in_bytes (TYPE))	\
+   : ROUND_ADVANCE ((unsigned int) GET_MODE_SIZE (MODE)))
+
+/* Round CUM up to the necessary point for argument MODE/TYPE.  */
+#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) (CUM)
+
+/* Return boolean indicating arg of type TYPE and mode MODE will be passed in
+   a reg.  This includes arguments that have to be passed by reference as the
+   pointer to them is passed in a reg if one is available (and that is what
+   we're given).
+   This macro is only used in this file.  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)) < M32R_MAX_PARM_REGS)
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+/* On the M32R the first M32R_MAX_PARM_REGS args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+m32r_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  return (PASS_IN_REG_P (*cum, mode, type)
+	  ? gen_rtx_REG (mode, ROUND_ADVANCE_CUM (*cum, mode, type))
+	  : NULL_RTX);
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+m32r_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum = (ROUND_ADVANCE_CUM (*cum, mode, type)
+	  + ROUND_ADVANCE_ARG (mode, type));
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+m32r_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return m32r_pass_by_reference (NULL, TYPE_MODE (type), type, false);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+static rtx
+m32r_function_value (const_tree valtype,
+		const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), 0);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+m32r_libcall_value (enum machine_mode mode,
+		const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, 0);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.
+
+  ??? What about r1 in DI/DF values.  */
+
+static bool
+m32r_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 0);
+}
+
+/* Do any needed setup for a variadic function.  For the M32R, we must
+   create a register parameter block, and then copy any anonymous arguments
+   in registers to memory.
+
+   CUM has not been updated for the last named argument which has type TYPE
+   and mode MODE, and we rely on this fact.  */
+
+static void
+m32r_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     tree type, int *pretend_size, int no_rtl)
+{
+  int first_anon_arg;
+
+  if (no_rtl)
+    return;
+
+  /* All BLKmode values are passed by reference.  */
+  gcc_assert (mode != BLKmode);
+
+  first_anon_arg = (ROUND_ADVANCE_CUM (*cum, mode, type)
+		    + ROUND_ADVANCE_ARG (mode, type));
+
+  if (first_anon_arg < M32R_MAX_PARM_REGS)
+    {
+      /* Note that first_reg_offset < M32R_MAX_PARM_REGS.  */
+      int first_reg_offset = first_anon_arg;
+      /* Size in words to "pretend" allocate.  */
+      int size = M32R_MAX_PARM_REGS - first_reg_offset;
+      rtx regblock;
+
+      regblock = gen_frame_mem (BLKmode,
+				plus_constant (arg_pointer_rtx,
+					       FIRST_PARM_OFFSET (0)));
+      set_mem_alias_set (regblock, get_varargs_alias_set ());
+      move_block_from_reg (first_reg_offset, regblock, size);
+
+      *pretend_size = (size * UNITS_PER_WORD);
+    }
+}
+
+
+/* Return true if INSN is real instruction bearing insn.  */
+
+static int
+m32r_is_insn (rtx insn)
+{
+  return (NONDEBUG_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER
+	  && GET_CODE (PATTERN (insn)) != ADDR_VEC);
+}
+
+/* Increase the priority of long instructions so that the
+   short instructions are scheduled ahead of the long ones.  */
+
+static int
+m32r_adjust_priority (rtx insn, int priority)
+{
+  if (m32r_is_insn (insn)
+      && get_attr_insn_size (insn) != INSN_SIZE_SHORT)
+    priority <<= 3;
+
+  return priority;
+}
+
+
+/* Indicate how many instructions can be issued at the same time.
+   This is sort of a lie.  The m32r can issue only 1 long insn at
+   once, but it can issue 2 short insns.  The default therefore is
+   set at 2, but this can be overridden by the command line option
+   -missue-rate=1.  */
+
+static int
+m32r_issue_rate (void)
+{
+  return ((TARGET_LOW_ISSUE_RATE) ? 1 : 2);
+}
+
+/* Cost functions.  */
+
+/* Implement TARGET_HANDLE_OPTION.
+
+   Memory is 3 times as expensive as registers.
+   ??? Is that the right way to look at it?  */
+
+static int
+m32r_memory_move_cost (enum machine_mode mode,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+    return 6;
+  else
+    return 12;
+}
+
+static bool
+m32r_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      /* Small integers are as cheap as registers.  4 byte values can be
+         fetched as immediate constants - let's give that the cost of an
+         extra insn.  */
+    case CONST_INT:
+      if (INT16_P (INTVAL (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST_DOUBLE:
+      {
+	rtx high, low;
+
+	split_double (x, &high, &low);
+	*total = COSTS_N_INSNS (!INT16_P (INTVAL (high))
+			        + !INT16_P (INTVAL (low)));
+	return true;
+      }
+
+    case MULT:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (10);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Type of function DECL.
+
+   The result is cached.  To reset the cache at the end of a function,
+   call with DECL = NULL_TREE.  */
+
+enum m32r_function_type
+m32r_compute_function_type (tree decl)
+{
+  /* Cached value.  */
+  static enum m32r_function_type fn_type = M32R_FUNCTION_UNKNOWN;
+  /* Last function we were called for.  */
+  static tree last_fn = NULL_TREE;
+
+  /* Resetting the cached value?  */
+  if (decl == NULL_TREE)
+    {
+      fn_type = M32R_FUNCTION_UNKNOWN;
+      last_fn = NULL_TREE;
+      return fn_type;
+    }
+
+  if (decl == last_fn && fn_type != M32R_FUNCTION_UNKNOWN)
+    return fn_type;
+
+  /* Compute function type.  */
+  fn_type = (lookup_attribute ("interrupt", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE
+	     ? M32R_FUNCTION_INTERRUPT
+	     : M32R_FUNCTION_NORMAL);
+
+  last_fn = decl;
+  return fn_type;
+}
+/* Function prologue/epilogue handlers.  */
+
+/* M32R stack frames look like:
+
+             Before call                       After call
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+   high |  local variables,     |       |  local variables,     |
+   mem  |  reg save area, etc.  |       |  reg save area, etc.  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  arguments on stack.  |       |  arguments on stack.  |
+        |                       |       |                       |
+  SP+0->+-----------------------+       +-----------------------+
+                                        |  reg parm save area,  |
+                                        |  only created for     |
+                                        |  variable argument    |
+                                        |  functions            |
+					+-----------------------+
+                                        |   previous frame ptr  |
+                                        +-----------------------+
+                                        |                       |
+                                        |  register save area   |
+                                        |                       |
+					+-----------------------+
+                                        |    return address     |
+                                        +-----------------------+
+                                        |                       |
+                                        |  local variables      |
+                                        |                       |
+                                        +-----------------------+
+                                        |                       |
+                                        |  alloca allocations   |
+                                        |                       |
+                                        +-----------------------+
+                                        |                       |
+   low                                  |  arguments on stack   |
+   memory                               |                       |
+                                  SP+0->+-----------------------+
+
+Notes:
+1) The "reg parm save area" does not exist for non variable argument fns.
+2) The "reg parm save area" can be eliminated completely if we saved regs
+   containing anonymous args separately but that complicates things too
+   much (so it's not done).
+3) The return address is saved after the register save area so as to have as
+   many insns as possible between the restoration of `lr' and the `jmp lr'.  */
+
+/* Structure to be filled in by m32r_compute_frame_size with register
+   save masks, and offsets for the current function.  */
+struct m32r_frame_info
+{
+  unsigned int total_size;	/* # bytes that the entire frame takes up.  */
+  unsigned int extra_size;	/* # bytes of extra stuff.  */
+  unsigned int pretend_size;	/* # bytes we push and pretend caller did.  */
+  unsigned int args_size;	/* # bytes that outgoing arguments take up.  */
+  unsigned int reg_size;	/* # bytes needed to store regs.  */
+  unsigned int var_size;	/* # bytes that variables take up.  */
+  unsigned int gmask;		/* Mask of saved gp registers.  */
+  unsigned int save_fp;		/* Nonzero if fp must be saved.  */
+  unsigned int save_lr;		/* Nonzero if lr (return addr) must be saved.  */
+  int          initialized;	/* Nonzero if frame size already calculated.  */
+};
+
+/* Current frame information calculated by m32r_compute_frame_size.  */
+static struct m32r_frame_info current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+static struct m32r_frame_info zero_frame_info;
+
+#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
+#define RETURN_ADDR_MASK   (1 << (RETURN_ADDR_REGNUM))
+
+/* Tell prologue and epilogue if register REGNO should be saved / restored.
+   The return address and frame pointer are treated separately.
+   Don't consider them here.  */
+#define MUST_SAVE_REGISTER(regno, interrupt_p) \
+  ((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
+   && (df_regs_ever_live_p (regno) && (!call_really_used_regs[regno] || interrupt_p)))
+
+#define MUST_SAVE_FRAME_POINTER (df_regs_ever_live_p (FRAME_POINTER_REGNUM))
+#define MUST_SAVE_RETURN_ADDR   (df_regs_ever_live_p (RETURN_ADDR_REGNUM) || crtl->profile)
+
+#define SHORT_INSN_SIZE 2	/* Size of small instructions.  */
+#define LONG_INSN_SIZE 4	/* Size of long instructions.  */
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   SIZE is the size needed for local variables.  */
+
+unsigned int
+m32r_compute_frame_size (int size)	/* # of var. bytes allocated.  */
+{
+  unsigned int regno;
+  unsigned int total_size, var_size, args_size, pretend_size, extra_size;
+  unsigned int reg_size;
+  unsigned int gmask;
+  enum m32r_function_type fn_type;
+  int interrupt_p;
+  int pic_reg_used = flag_pic && (crtl->uses_pic_offset_table
+                                  | crtl->profile);
+
+  var_size	= M32R_STACK_ALIGN (size);
+  args_size	= M32R_STACK_ALIGN (crtl->outgoing_args_size);
+  pretend_size	= crtl->args.pretend_args_size;
+  extra_size	= FIRST_PARM_OFFSET (0);
+  total_size	= extra_size + pretend_size + args_size + var_size;
+  reg_size	= 0;
+  gmask		= 0;
+
+  /* See if this is an interrupt handler.  Call used registers must be saved
+     for them too.  */
+  fn_type = m32r_compute_function_type (current_function_decl);
+  interrupt_p = M32R_INTERRUPT_P (fn_type);
+
+  /* Calculate space needed for registers.  */
+  for (regno = 0; regno < M32R_MAX_INT_REGS; regno++)
+    {
+      if (MUST_SAVE_REGISTER (regno, interrupt_p)
+          || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+	{
+	  reg_size += UNITS_PER_WORD;
+	  gmask |= 1 << regno;
+	}
+    }
+
+  current_frame_info.save_fp = MUST_SAVE_FRAME_POINTER;
+  current_frame_info.save_lr = MUST_SAVE_RETURN_ADDR || pic_reg_used;
+
+  reg_size += ((current_frame_info.save_fp + current_frame_info.save_lr)
+	       * UNITS_PER_WORD);
+  total_size += reg_size;
+
+  /* ??? Not sure this is necessary, and I don't think the epilogue
+     handler will do the right thing if this changes total_size.  */
+  total_size = M32R_STACK_ALIGN (total_size);
+
+  /* frame_size = total_size - (pretend_size + reg_size); */
+
+  /* Save computed information.  */
+  current_frame_info.total_size   = total_size;
+  current_frame_info.extra_size   = extra_size;
+  current_frame_info.pretend_size = pretend_size;
+  current_frame_info.var_size     = var_size;
+  current_frame_info.args_size    = args_size;
+  current_frame_info.reg_size	  = reg_size;
+  current_frame_info.gmask	  = gmask;
+  current_frame_info.initialized  = reload_completed;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+m32r_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+
+/* The table we use to reference PIC data.  */
+static rtx global_offset_table;
+
+static void
+m32r_reload_lr (rtx sp, int size)
+{
+  rtx lr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+
+  if (size == 0)
+    emit_insn (gen_movsi (lr, gen_frame_mem (Pmode, sp)));
+  else if (size < 32768)
+    emit_insn (gen_movsi (lr, gen_frame_mem (Pmode,
+					     gen_rtx_PLUS (Pmode, sp,
+							   GEN_INT (size)))));
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+      emit_insn (gen_movsi (tmp, GEN_INT (size)));
+      emit_insn (gen_addsi3 (tmp, tmp, sp));
+      emit_insn (gen_movsi (lr, gen_frame_mem (Pmode, tmp)));
+    }
+
+  emit_use (lr);
+}
+
+void
+m32r_load_pic_register (void)
+{
+  global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
+                         GEN_INT (TARGET_MODEL_SMALL)));
+
+  /* Need to emit this whether or not we obey regdecls,
+     since setjmp/longjmp can cause life info to screw up.  */
+  emit_use (pic_offset_table_rtx);
+}
+
+/* Expand the m32r prologue as a series of insns.  */
+
+void
+m32r_expand_prologue (void)
+{
+  int regno;
+  int frame_size;
+  unsigned int gmask;
+  int pic_reg_used = flag_pic && (crtl->uses_pic_offset_table
+                                  | crtl->profile);
+
+  if (! current_frame_info.initialized)
+    m32r_compute_frame_size (get_frame_size ());
+
+  gmask = current_frame_info.gmask;
+
+  /* These cases shouldn't happen.  Catch them now.  */
+  gcc_assert (current_frame_info.total_size || !gmask);
+
+  /* Allocate space for register arguments if this is a variadic function.  */
+  if (current_frame_info.pretend_size != 0)
+    {
+      /* Use a HOST_WIDE_INT temporary, since negating an unsigned int gives
+	 the wrong result on a 64-bit host.  */
+      HOST_WIDE_INT pretend_size = current_frame_info.pretend_size;
+      emit_insn (gen_addsi3 (stack_pointer_rtx,
+			     stack_pointer_rtx,
+			     GEN_INT (-pretend_size)));
+    }
+
+  /* Save any registers we need to and set up fp.  */
+  if (current_frame_info.save_fp)
+    emit_insn (gen_movsi_push (stack_pointer_rtx, frame_pointer_rtx));
+
+  gmask &= ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK);
+
+  /* Save any needed call-saved regs (and call-used if this is an
+     interrupt handler).  */
+  for (regno = 0; regno <= M32R_MAX_INT_REGS; ++regno)
+    {
+      if ((gmask & (1 << regno)) != 0)
+	emit_insn (gen_movsi_push (stack_pointer_rtx,
+				   gen_rtx_REG (Pmode, regno)));
+    }
+
+  if (current_frame_info.save_lr)
+    emit_insn (gen_movsi_push (stack_pointer_rtx,
+			       gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)));
+
+  /* Allocate the stack frame.  */
+  frame_size = (current_frame_info.total_size
+		- (current_frame_info.pretend_size
+		   + current_frame_info.reg_size));
+
+  if (frame_size == 0)
+    ; /* Nothing to do.  */
+  else if (frame_size <= 32768)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (-frame_size)));
+  else
+    {
+      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+      emit_insn (gen_movsi (tmp, GEN_INT (frame_size)));
+      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+    }
+
+  if (frame_pointer_needed)
+    emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+
+  if (crtl->profile)
+    /* Push lr for mcount (form_pc, x).  */
+    emit_insn (gen_movsi_push (stack_pointer_rtx,
+                               gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)));
+
+  if (pic_reg_used)
+    {
+      m32r_load_pic_register ();
+      m32r_reload_lr (stack_pointer_rtx,
+                      (crtl->profile ? 0 : frame_size));
+    }
+
+  if (crtl->profile && !pic_reg_used)
+    emit_insn (gen_blockage ());
+}
+
+
+/* Set up the stack and frame pointer (if desired) for the function.
+   Note, if this is changed, you need to mirror the changes in
+   m32r_compute_frame_size which calculates the prolog size.  */
+
+static void
+m32r_output_function_prologue (FILE * file, HOST_WIDE_INT size)
+{
+  enum m32r_function_type fn_type = m32r_compute_function_type (current_function_decl);
+
+  /* If this is an interrupt handler, mark it as such.  */
+  if (M32R_INTERRUPT_P (fn_type))
+    fprintf (file, "\t%s interrupt handler\n", ASM_COMMENT_START);
+
+  if (! current_frame_info.initialized)
+    m32r_compute_frame_size (size);
+
+  /* This is only for the human reader.  */
+  fprintf (file,
+	   "\t%s PROLOGUE, vars= %d, regs= %d, args= %d, extra= %d\n",
+	   ASM_COMMENT_START,
+	   current_frame_info.var_size,
+	   current_frame_info.reg_size / 4,
+	   current_frame_info.args_size,
+	   current_frame_info.extra_size);
+}
+
+/* Output RTL to pop register REGNO from the stack.  */
+
+static void
+pop (int regno)
+{
+  rtx x;
+
+  x = emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno),
+				stack_pointer_rtx));
+  add_reg_note (x, REG_INC, stack_pointer_rtx);
+}
+
+/* Expand the m32r epilogue as a series of insns.  */
+
+void
+m32r_expand_epilogue (void)
+{
+  int regno;
+  int noepilogue = FALSE;
+  int total_size;
+
+  gcc_assert (current_frame_info.initialized);
+  total_size = current_frame_info.total_size;
+
+  if (total_size == 0)
+    {
+      rtx insn = get_last_insn ();
+
+      /* If the last insn was a BARRIER, we don't have to write any code
+	 because a jump (aka return) was put there.  */
+      if (insn && NOTE_P (insn))
+	insn = prev_nonnote_insn (insn);
+      if (insn && BARRIER_P (insn))
+	noepilogue = TRUE;
+    }
+
+  if (!noepilogue)
+    {
+      unsigned int var_size = current_frame_info.var_size;
+      unsigned int args_size = current_frame_info.args_size;
+      unsigned int gmask = current_frame_info.gmask;
+      int can_trust_sp_p = !cfun->calls_alloca;
+
+      if (flag_exceptions)
+        emit_insn (gen_blockage ());
+
+      /* The first thing to do is point the sp at the bottom of the register
+	 save area.  */
+      if (can_trust_sp_p)
+	{
+	  unsigned int reg_offset = var_size + args_size;
+
+	  if (reg_offset == 0)
+	    ; /* Nothing to do.  */
+	  else if (reg_offset < 32768)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   GEN_INT (reg_offset)));
+	  else
+	    {
+	      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+	      emit_insn (gen_movsi (tmp, GEN_INT (reg_offset)));
+	      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				     tmp));
+	    }
+	}
+      else if (frame_pointer_needed)
+	{
+	  unsigned int reg_offset = var_size + args_size;
+
+	  if (reg_offset == 0)
+	    emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+	  else if (reg_offset < 32768)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_rtx,
+			   GEN_INT (reg_offset)));
+	  else
+	    {
+	      rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM);
+
+	      emit_insn (gen_movsi (tmp, GEN_INT (reg_offset)));
+	      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+	      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				     tmp));
+	    }
+	}
+      else
+	gcc_unreachable ();
+
+      if (current_frame_info.save_lr)
+	pop (RETURN_ADDR_REGNUM);
+
+      /* Restore any saved registers, in reverse order of course.  */
+      gmask &= ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK);
+      for (regno = M32R_MAX_INT_REGS - 1; regno >= 0; --regno)
+	{
+	  if ((gmask & (1L << regno)) != 0)
+	    pop (regno);
+	}
+
+      if (current_frame_info.save_fp)
+	pop (FRAME_POINTER_REGNUM);
+
+      /* Remove varargs area if present.  */
+      if (current_frame_info.pretend_size != 0)
+	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			       GEN_INT (current_frame_info.pretend_size)));
+
+      emit_insn (gen_blockage ());
+    }
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+
+static void
+m32r_output_function_epilogue (FILE * file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+  m32r_compute_function_type (NULL_TREE);
+}
+
+/* Return nonzero if this function is known to have a null or 1 instruction
+   epilogue.  */
+
+int
+direct_return (void)
+{
+  if (!reload_completed)
+    return FALSE;
+
+  if (M32R_INTERRUPT_P (m32r_compute_function_type (current_function_decl)))
+    return FALSE;
+
+  if (! current_frame_info.initialized)
+    m32r_compute_frame_size (get_frame_size ());
+
+  return current_frame_info.total_size == 0;
+}
+
+
+/* PIC.  */
+
+int
+m32r_legitimate_pic_operand_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    return 0;
+
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+          || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
+      && (CONST_INT_P (XEXP (XEXP (x, 0), 1))))
+    return 0;
+
+  return 1;
+}
+
+rtx
+m32r_legitimize_pic_address (rtx orig, rtx reg)
+{
+#ifdef DEBUG_PIC
+  printf("m32r_legitimize_pic_address()\n");
+#endif
+
+  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
+    {
+      rtx pic_ref, address;
+      int subregs = 0;
+
+      if (reg == 0)
+        {
+          gcc_assert (!reload_in_progress && !reload_completed);
+	  reg = gen_reg_rtx (Pmode);
+
+          subregs = 1;
+        }
+
+      if (subregs)
+        address = gen_reg_rtx (Pmode);
+      else
+        address = reg;
+
+      crtl->uses_pic_offset_table = 1;
+
+      if (GET_CODE (orig) == LABEL_REF
+          || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
+        {
+          emit_insn (gen_gotoff_load_addr (reg, orig));
+          emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+          return reg;
+        }
+
+      emit_insn (gen_pic_load_addr (address, orig));
+
+      emit_insn (gen_addsi3 (address, address, pic_offset_table_rtx));
+      pic_ref = gen_const_mem (Pmode, address);
+      emit_move_insn (reg, pic_ref);
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+          && XEXP (XEXP (orig, 0), 1) == pic_offset_table_rtx)
+        return orig;
+
+      if (reg == 0)
+        {
+          gcc_assert (!reload_in_progress && !reload_completed);
+	  reg = gen_reg_rtx (Pmode);
+        }
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS)
+        {
+          base = m32r_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
+          if (base == reg)
+            offset = m32r_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), NULL_RTX);
+          else
+            offset = m32r_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), reg);
+        }
+      else
+        return orig;
+
+      if (CONST_INT_P (offset))
+        {
+          if (INT16_P (INTVAL (offset)))
+            return plus_constant (base, INTVAL (offset));
+          else
+	    {
+	      gcc_assert (! reload_in_progress && ! reload_completed);
+	      offset = force_reg (Pmode, offset);
+	    }
+        }
+
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  return orig;
+}
+
+static rtx
+m32r_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic)
+    return m32r_legitimize_pic_address (x, NULL_RTX);
+  else
+    return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.  */
+
+static bool
+m32r_mode_dependent_address_p (const_rtx addr)
+{
+  if (GET_CODE (addr) == LO_SUM)
+    return true;
+
+  return false;
+}
+
+/* Nested function support.  */
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+void
+m32r_initialize_trampoline (rtx tramp ATTRIBUTE_UNUSED,
+			    rtx fnaddr ATTRIBUTE_UNUSED,
+			    rtx cxt ATTRIBUTE_UNUSED)
+{
+}
+
+static void
+m32r_file_start (void)
+{
+  default_file_start ();
+
+  if (flag_verbose_asm)
+    fprintf (asm_out_file,
+	     "%s M32R/D special options: -G %d\n",
+	     ASM_COMMENT_START, g_switch_value);
+
+  if (TARGET_LITTLE_ENDIAN)
+    fprintf (asm_out_file, "\t.little\n");
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+static void
+m32r_print_operand (FILE * file, rtx x, int code)
+{
+  rtx addr;
+
+  switch (code)
+    {
+      /* The 's' and 'p' codes are used by output_block_move() to
+	 indicate post-increment 's'tores and 'p're-increment loads.  */
+    case 's':
+      if (REG_P (x))
+	fprintf (file, "@+%s", reg_names [REGNO (x)]);
+      else
+	output_operand_lossage ("invalid operand to %%s code");
+      return;
+
+    case 'p':
+      if (REG_P (x))
+	fprintf (file, "@%s+", reg_names [REGNO (x)]);
+      else
+	output_operand_lossage ("invalid operand to %%p code");
+      return;
+
+    case 'R' :
+      /* Write second word of DImode or DFmode reference,
+	 register or memory.  */
+      if (REG_P (x))
+	fputs (reg_names[REGNO (x)+1], file);
+      else if (MEM_P (x))
+	{
+	  fprintf (file, "@(");
+	  /* Handle possible auto-increment.  Since it is pre-increment and
+	     we have already done it, we can just use an offset of four.  */
+	  /* ??? This is taken from rs6000.c I think.  I don't think it is
+	     currently necessary, but keep it around.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 4));
+	  else
+	    output_address (plus_constant (XEXP (x, 0), 4));
+	  fputc (')', file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%R code");
+      return;
+
+    case 'H' : /* High word.  */
+    case 'L' : /* Low word.  */
+      if (REG_P (x))
+	{
+	  /* L = least significant word, H = most significant word.  */
+	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
+	    fputs (reg_names[REGNO (x)], file);
+	  else
+	    fputs (reg_names[REGNO (x)+1], file);
+	}
+      else if (CONST_INT_P (x)
+	       || GET_CODE (x) == CONST_DOUBLE)
+	{
+	  rtx first, second;
+
+	  split_double (x, &first, &second);
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		   code == 'L' ? INTVAL (first) : INTVAL (second));
+	}
+      else
+	output_operand_lossage ("invalid operand to %%H/%%L code");
+      return;
+
+    case 'A' :
+      {
+	char str[30];
+
+	if (GET_CODE (x) != CONST_DOUBLE
+	    || GET_MODE_CLASS (GET_MODE (x)) != MODE_FLOAT)
+	  fatal_insn ("bad insn for 'A'", x);
+
+	real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1);
+	fprintf (file, "%s", str);
+	return;
+      }
+
+    case 'B' : /* Bottom half.  */
+    case 'T' : /* Top half.  */
+      /* Output the argument to a `seth' insn (sets the Top half-word).
+	 For constants output arguments to a seth/or3 pair to set Top and
+	 Bottom halves.  For symbols output arguments to a seth/add3 pair to
+	 set Top and Bottom halves.  The difference exists because for
+	 constants seth/or3 is more readable but for symbols we need to use
+	 the same scheme as `ld' and `st' insns (16-bit addend is signed).  */
+      switch (GET_CODE (x))
+	{
+	case CONST_INT :
+	case CONST_DOUBLE :
+	  {
+	    rtx first, second;
+
+	    split_double (x, &first, &second);
+	    x = WORDS_BIG_ENDIAN ? second : first;
+	    fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		     (code == 'B'
+		      ? INTVAL (x) & 0xffff
+		      : (INTVAL (x) >> 16) & 0xffff));
+	  }
+	  return;
+	case CONST :
+	case SYMBOL_REF :
+	  if (code == 'B'
+	      && small_data_operand (x, VOIDmode))
+	    {
+	      fputs ("sda(", file);
+	      output_addr_const (file, x);
+	      fputc (')', file);
+	      return;
+	    }
+	  /* fall through */
+	case LABEL_REF :
+	  fputs (code == 'T' ? "shigh(" : "low(", file);
+	  output_addr_const (file, x);
+	  fputc (')', file);
+	  return;
+	default :
+	  output_operand_lossage ("invalid operand to %%T/%%B code");
+	  return;
+	}
+      break;
+
+    case 'U' :
+      /* ??? wip */
+      /* Output a load/store with update indicator if appropriate.  */
+      if (MEM_P (x))
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    fputs (".a", file);
+	}
+      else
+	output_operand_lossage ("invalid operand to %%U code");
+      return;
+
+    case 'N' :
+      /* Print a constant value negated.  */
+      if (CONST_INT_P (x))
+	output_addr_const (file, GEN_INT (- INTVAL (x)));
+      else
+	output_operand_lossage ("invalid operand to %%N code");
+      return;
+
+    case 'X' :
+      /* Print a const_int in hex.  Used in comments.  */
+      if (CONST_INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      return;
+
+    case '#' :
+      fputs (IMMEDIATE_PREFIX, file);
+      return;
+
+    case 0 :
+      /* Do nothing special.  */
+      break;
+
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG :
+      fputs (reg_names[REGNO (x)], file);
+      break;
+
+    case MEM :
+      addr = XEXP (x, 0);
+      if (GET_CODE (addr) == PRE_INC)
+	{
+	  if (!REG_P (XEXP (addr, 0)))
+	    fatal_insn ("pre-increment address is not a register", x);
+
+	  fprintf (file, "@+%s", reg_names[REGNO (XEXP (addr, 0))]);
+	}
+      else if (GET_CODE (addr) == PRE_DEC)
+	{
+	  if (!REG_P (XEXP (addr, 0)))
+	    fatal_insn ("pre-decrement address is not a register", x);
+
+	  fprintf (file, "@-%s", reg_names[REGNO (XEXP (addr, 0))]);
+	}
+      else if (GET_CODE (addr) == POST_INC)
+	{
+	  if (!REG_P (XEXP (addr, 0)))
+	    fatal_insn ("post-increment address is not a register", x);
+
+	  fprintf (file, "@%s+", reg_names[REGNO (XEXP (addr, 0))]);
+	}
+      else
+	{
+	  fputs ("@(", file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	}
+      break;
+
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+
+      /* Fall through.  Let output_addr_const deal with it.  */
+
+    default :
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+static void
+m32r_print_operand_address (FILE * file, rtx addr)
+{
+  rtx base;
+  rtx index = 0;
+  int offset = 0;
+
+  switch (GET_CODE (addr))
+    {
+    case REG :
+      fputs (reg_names[REGNO (addr)], file);
+      break;
+
+    case PLUS :
+      if (CONST_INT_P (XEXP (addr, 0)))
+	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
+      else if (CONST_INT_P (XEXP (addr, 1)))
+	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
+      else
+	base = XEXP (addr, 0), index = XEXP (addr, 1);
+      if (REG_P (base))
+	{
+	  /* Print the offset first (if present) to conform to the manual.  */
+	  if (index == 0)
+	    {
+	      if (offset != 0)
+		fprintf (file, "%d,", offset);
+	      fputs (reg_names[REGNO (base)], file);
+	    }
+	  /* The chip doesn't support this, but left in for generality.  */
+	  else if (REG_P (index))
+	    fprintf (file, "%s,%s",
+		     reg_names[REGNO (base)], reg_names[REGNO (index)]);
+	  /* Not sure this can happen, but leave in for now.  */
+	  else if (GET_CODE (index) == SYMBOL_REF)
+	    {
+	      output_addr_const (file, index);
+	      fputc (',', file);
+	      fputs (reg_names[REGNO (base)], file);
+	    }
+	  else
+	    fatal_insn ("bad address", addr);
+	}
+      else if (GET_CODE (base) == LO_SUM)
+	{
+	  gcc_assert (!index && REG_P (XEXP (base, 0)));
+	  if (small_data_operand (XEXP (base, 1), VOIDmode))
+	    fputs ("sda(", file);
+	  else
+	    fputs ("low(", file);
+	  output_addr_const (file, plus_constant (XEXP (base, 1), offset));
+	  fputs ("),", file);
+	  fputs (reg_names[REGNO (XEXP (base, 0))], file);
+	}
+      else
+	fatal_insn ("bad address", addr);
+      break;
+
+    case LO_SUM :
+      if (!REG_P (XEXP (addr, 0)))
+	fatal_insn ("lo_sum not of register", addr);
+      if (small_data_operand (XEXP (addr, 1), VOIDmode))
+	fputs ("sda(", file);
+      else
+	fputs ("low(", file);
+      output_addr_const (file, XEXP (addr, 1));
+      fputs ("),", file);
+      fputs (reg_names[REGNO (XEXP (addr, 0))], file);
+      break;
+
+    case PRE_INC :	/* Assume SImode.  */
+      fprintf (file, "+%s", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PRE_DEC :	/* Assume SImode.  */
+      fprintf (file, "-%s", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_INC :	/* Assume SImode.  */
+      fprintf (file, "%s+", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    default :
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static bool
+m32r_print_operand_punct_valid_p (unsigned char code)
+{
+  return m32r_punct_chars[code];
+}
+
+/* Return true if the operands are the constants 0 and 1.  */
+
+int
+zero_and_one (rtx operand1, rtx operand2)
+{
+  return
+       CONST_INT_P (operand1)
+    && CONST_INT_P (operand2)
+    && (  ((INTVAL (operand1) == 0) && (INTVAL (operand2) == 1))
+	||((INTVAL (operand1) == 1) && (INTVAL (operand2) == 0)));
+}
+
+/* Generate the correct assembler code to handle the conditional loading of a
+   value into a register.  It is known that the operands satisfy the
+   conditional_move_operand() function above.  The destination is operand[0].
+   The condition is operand [1].  The 'true' value is operand [2] and the
+   'false' value is operand [3].  */
+
+char *
+emit_cond_move (rtx * operands, rtx insn ATTRIBUTE_UNUSED)
+{
+  static char buffer [100];
+  const char * dest = reg_names [REGNO (operands [0])];
+
+  buffer [0] = 0;
+
+  /* Destination must be a register.  */
+  gcc_assert (REG_P (operands [0]));
+  gcc_assert (conditional_move_operand (operands [2], SImode));
+  gcc_assert (conditional_move_operand (operands [3], SImode));
+
+  /* Check to see if the test is reversed.  */
+  if (GET_CODE (operands [1]) == NE)
+    {
+      rtx tmp = operands [2];
+      operands [2] = operands [3];
+      operands [3] = tmp;
+    }
+
+  sprintf (buffer, "mvfc %s, cbr", dest);
+
+  /* If the true value was '0' then we need to invert the results of the move.  */
+  if (INTVAL (operands [2]) == 0)
+    sprintf (buffer + strlen (buffer), "\n\txor3 %s, %s, #1",
+	     dest, dest);
+
+  return buffer;
+}
+
+/* Returns true if the registers contained in the two
+   rtl expressions are different.  */
+
+int
+m32r_not_same_reg (rtx a, rtx b)
+{
+  int reg_a = -1;
+  int reg_b = -2;
+
+  while (GET_CODE (a) == SUBREG)
+    a = SUBREG_REG (a);
+
+  if (REG_P (a))
+    reg_a = REGNO (a);
+
+  while (GET_CODE (b) == SUBREG)
+    b = SUBREG_REG (b);
+
+  if (REG_P (b))
+    reg_b = REGNO (b);
+
+  return reg_a != reg_b;
+}
+
+
+rtx
+m32r_function_symbol (const char *name)
+{
+  int extra_flags = 0;
+  enum m32r_model model;
+  rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
+
+  if (TARGET_MODEL_SMALL)
+    model = M32R_MODEL_SMALL;
+  else if (TARGET_MODEL_MEDIUM)
+    model = M32R_MODEL_MEDIUM;
+  else if (TARGET_MODEL_LARGE)
+    model = M32R_MODEL_LARGE;
+  else
+    gcc_unreachable (); /* Shouldn't happen.  */
+  extra_flags |= model << SYMBOL_FLAG_MODEL_SHIFT;
+
+  if (extra_flags)
+    SYMBOL_REF_FLAGS (sym) |= extra_flags;
+
+  return sym;
+}
+
+/* Use a library function to move some bytes.  */
+
+static void
+block_move_call (rtx dest_reg, rtx src_reg, rtx bytes_rtx)
+{
+  /* We want to pass the size as Pmode, which will normally be SImode
+     but will be DImode if we are using 64-bit longs and pointers.  */
+  if (GET_MODE (bytes_rtx) != VOIDmode
+      && GET_MODE (bytes_rtx) != Pmode)
+    bytes_rtx = convert_to_mode (Pmode, bytes_rtx, 1);
+
+  emit_library_call (m32r_function_symbol ("memcpy"), LCT_NORMAL,
+		     VOIDmode, 3, dest_reg, Pmode, src_reg, Pmode,
+		     convert_to_mode (TYPE_MODE (sizetype), bytes_rtx,
+				      TYPE_UNSIGNED (sizetype)),
+		     TYPE_MODE (sizetype));
+}
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.
+
+   Returns 1 upon success, 0 otherwise.  */
+
+int
+m32r_expand_block_move (rtx operands[])
+{
+  rtx           orig_dst  = operands[0];
+  rtx           orig_src  = operands[1];
+  rtx           bytes_rtx = operands[2];
+  rtx           align_rtx = operands[3];
+  int           constp    = CONST_INT_P (bytes_rtx);
+  HOST_WIDE_INT bytes     = constp ? INTVAL (bytes_rtx) : 0;
+  int           align     = INTVAL (align_rtx);
+  int           leftover;
+  rtx           src_reg;
+  rtx           dst_reg;
+
+  if (constp && bytes <= 0)
+    return 1;
+
+  /* Move the address into scratch registers.  */
+  dst_reg = copy_addr_to_reg (XEXP (orig_dst, 0));
+  src_reg = copy_addr_to_reg (XEXP (orig_src, 0));
+
+  if (align > UNITS_PER_WORD)
+    align = UNITS_PER_WORD;
+
+  /* If we prefer size over speed, always use a function call.
+     If we do not know the size, use a function call.
+     If the blocks are not word aligned, use a function call.  */
+  if (optimize_size || ! constp || align != UNITS_PER_WORD)
+    {
+      block_move_call (dst_reg, src_reg, bytes_rtx);
+      return 0;
+    }
+
+  leftover = bytes % MAX_MOVE_BYTES;
+  bytes   -= leftover;
+
+  /* If necessary, generate a loop to handle the bulk of the copy.  */
+  if (bytes)
+    {
+      rtx label = NULL_RTX;
+      rtx final_src = NULL_RTX;
+      rtx at_a_time = GEN_INT (MAX_MOVE_BYTES);
+      rtx rounded_total = GEN_INT (bytes);
+      rtx new_dst_reg = gen_reg_rtx (SImode);
+      rtx new_src_reg = gen_reg_rtx (SImode);
+
+      /* If we are going to have to perform this loop more than
+	 once, then generate a label and compute the address the
+	 source register will contain upon completion of the final
+	 iteration.  */
+      if (bytes > MAX_MOVE_BYTES)
+	{
+	  final_src = gen_reg_rtx (Pmode);
+
+	  if (INT16_P(bytes))
+	    emit_insn (gen_addsi3 (final_src, src_reg, rounded_total));
+	  else
+	    {
+	      emit_insn (gen_movsi (final_src, rounded_total));
+	      emit_insn (gen_addsi3 (final_src, final_src, src_reg));
+	    }
+
+	  label = gen_label_rtx ();
+	  emit_label (label);
+	}
+
+      /* It is known that output_block_move() will update src_reg to point
+	 to the word after the end of the source block, and dst_reg to point
+	 to the last word of the destination block, provided that the block
+	 is MAX_MOVE_BYTES long.  */
+      emit_insn (gen_movmemsi_internal (dst_reg, src_reg, at_a_time,
+					new_dst_reg, new_src_reg));
+      emit_move_insn (dst_reg, new_dst_reg);
+      emit_move_insn (src_reg, new_src_reg);
+      emit_insn (gen_addsi3 (dst_reg, dst_reg, GEN_INT (4)));
+
+      if (bytes > MAX_MOVE_BYTES)
+	{
+	  rtx test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+	  emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+	}
+    }
+
+  if (leftover)
+    emit_insn (gen_movmemsi_internal (dst_reg, src_reg, GEN_INT (leftover),
+				      gen_reg_rtx (SImode),
+				      gen_reg_rtx (SImode)));
+  return 1;
+}
+
+
+/* Emit load/stores for a small constant word aligned block_move.
+
+   operands[0] is the memory address of the destination.
+   operands[1] is the memory address of the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is a temp register.
+   operands[4] is a temp register.  */
+
+void
+m32r_output_block_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
+{
+  HOST_WIDE_INT bytes = INTVAL (operands[2]);
+  int		first_time;
+  int		got_extra = 0;
+
+  gcc_assert (bytes >= 1 && bytes <= MAX_MOVE_BYTES);
+
+  /* We do not have a post-increment store available, so the first set of
+     stores are done without any increment, then the remaining ones can use
+     the pre-increment addressing mode.
+
+     Note: expand_block_move() also relies upon this behavior when building
+     loops to copy large blocks.  */
+  first_time = 1;
+
+  while (bytes > 0)
+    {
+      if (bytes >= 8)
+	{
+	  if (first_time)
+	    {
+	      output_asm_insn ("ld\t%5, %p1", operands);
+	      output_asm_insn ("ld\t%6, %p1", operands);
+	      output_asm_insn ("st\t%5, @%0", operands);
+	      output_asm_insn ("st\t%6, %s0", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ld\t%5, %p1", operands);
+	      output_asm_insn ("ld\t%6, %p1", operands);
+	      output_asm_insn ("st\t%5, %s0", operands);
+	      output_asm_insn ("st\t%6, %s0", operands);
+	    }
+
+	  bytes -= 8;
+	}
+      else if (bytes >= 4)
+	{
+	  if (bytes > 4)
+	    got_extra = 1;
+
+	  output_asm_insn ("ld\t%5, %p1", operands);
+
+	  if (got_extra)
+	    output_asm_insn ("ld\t%6, %p1", operands);
+
+	  if (first_time)
+	    output_asm_insn ("st\t%5, @%0", operands);
+	  else
+	    output_asm_insn ("st\t%5, %s0", operands);
+
+	  bytes -= 4;
+	}
+      else
+	{
+	  /* Get the entire next word, even though we do not want all of it.
+	     The saves us from doing several smaller loads, and we assume that
+	     we cannot cause a page fault when at least part of the word is in
+	     valid memory [since we don't get called if things aren't properly
+	     aligned].  */
+	  int dst_offset = first_time ? 0 : 4;
+	  /* The amount of increment we have to make to the
+	     destination pointer.  */
+	  int dst_inc_amount = dst_offset + bytes - 4;
+	  /* The same for the source pointer.  */
+	  int src_inc_amount = bytes;
+	  int last_shift;
+	  rtx my_operands[3];
+
+	  /* If got_extra is true then we have already loaded
+	     the next word as part of loading and storing the previous word.  */
+	  if (! got_extra)
+	    output_asm_insn ("ld\t%6, @%1", operands);
+
+	  if (bytes >= 2)
+	    {
+	      bytes -= 2;
+
+	      output_asm_insn ("sra3\t%5, %6, #16", operands);
+	      my_operands[0] = operands[5];
+	      my_operands[1] = GEN_INT (dst_offset);
+	      my_operands[2] = operands[0];
+	      output_asm_insn ("sth\t%0, @(%1,%2)", my_operands);
+
+	      /* If there is a byte left to store then increment the
+		 destination address and shift the contents of the source
+		 register down by 8 bits.  We could not do the address
+		 increment in the store half word instruction, because it does
+		 not have an auto increment mode.  */
+	      if (bytes > 0)  /* assert (bytes == 1) */
+		{
+		  dst_offset += 2;
+		  last_shift = 8;
+		}
+	    }
+	  else
+	    last_shift = 24;
+
+	  if (bytes > 0)
+	    {
+	      my_operands[0] = operands[6];
+	      my_operands[1] = GEN_INT (last_shift);
+	      output_asm_insn ("srai\t%0, #%1", my_operands);
+	      my_operands[0] = operands[6];
+	      my_operands[1] = GEN_INT (dst_offset);
+	      my_operands[2] = operands[0];
+	      output_asm_insn ("stb\t%0, @(%1,%2)", my_operands);
+	    }
+
+	  /* Update the destination pointer if needed.  We have to do
+	     this so that the patterns matches what we output in this
+	     function.  */
+	  if (dst_inc_amount
+	      && !find_reg_note (insn, REG_UNUSED, operands[0]))
+	    {
+	      my_operands[0] = operands[0];
+	      my_operands[1] = GEN_INT (dst_inc_amount);
+	      output_asm_insn ("addi\t%0, #%1", my_operands);
+	    }
+
+	  /* Update the source pointer if needed.  We have to do this
+	     so that the patterns matches what we output in this
+	     function.  */
+	  if (src_inc_amount
+	      && !find_reg_note (insn, REG_UNUSED, operands[1]))
+	    {
+	      my_operands[0] = operands[1];
+	      my_operands[1] = GEN_INT (src_inc_amount);
+	      output_asm_insn ("addi\t%0, #%1", my_operands);
+	    }
+
+	  bytes = 0;
+	}
+
+      first_time = 0;
+    }
+}
+
+/* Return true if using NEW_REG in place of OLD_REG is ok.  */
+
+int
+m32r_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			   unsigned int new_reg)
+{
+  /* Interrupt routines can't clobber any register that isn't already used.  */
+  if (lookup_attribute ("interrupt", DECL_ATTRIBUTES (current_function_decl))
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+rtx
+m32r_return_addr (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
+}
+
+static void
+m32r_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  emit_move_insn (adjust_address (m_tramp, SImode, 0),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0x017e8e17 : 0x178e7e01, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 4),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0x0c00ae86 : 0x86ae000c, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 8),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0xe627871e : 0x1e8727e6, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 12),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ?
+				0xc616c626 : 0x26c61fc6, SImode));
+  emit_move_insn (adjust_address (m_tramp, SImode, 16),
+		  chain_value);
+  emit_move_insn (adjust_address (m_tramp, SImode, 20),
+		  XEXP (DECL_RTL (fndecl), 0));
+
+  if (m32r_cache_flush_trap >= 0)
+    emit_insn (gen_flush_icache
+	       (validize_mem (adjust_address (m_tramp, SImode, 0)),
+		gen_int_mode (m32r_cache_flush_trap, SImode)));
+  else if (m32r_cache_flush_func && m32r_cache_flush_func[0])
+    emit_library_call (m32r_function_symbol (m32r_cache_flush_func),
+		       LCT_NORMAL, VOIDmode, 3, XEXP (m_tramp, 0), Pmode,
+		       gen_int_mode (TRAMPOLINE_SIZE, SImode), SImode,
+		       GEN_INT (3), SImode);
+}
+
+/* True if X is a reg that can be used as a base reg.  */
+
+static bool
+m32r_rtx_ok_for_base_p (const_rtx x, bool strict)
+{
+  if (! REG_P (x))
+    return false;
+
+  if (strict)
+    {
+      if (GPR_P (REGNO (x)))
+	return true;
+    }
+  else
+    {
+      if (GPR_P (REGNO (x))
+	  || REGNO (x) == ARG_POINTER_REGNUM
+	  || ! HARD_REGISTER_P (x))
+	return true;
+    }
+
+  return false;
+}
+
+static inline bool
+m32r_rtx_ok_for_offset_p (const_rtx x)
+{
+  return (CONST_INT_P (x) && INT16_P (INTVAL (x)));
+}
+
+static inline bool
+m32r_legitimate_offset_addres_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				 const_rtx x, bool strict)
+{
+  if (GET_CODE (x) == PLUS
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict)
+      && m32r_rtx_ok_for_offset_p (XEXP (x, 1)))
+    return true;
+
+  return false;
+}
+
+/* For LO_SUM addresses, do not allow them if the MODE is > 1 word,
+   since more than one instruction will be required.  */
+
+static inline bool
+m32r_legitimate_lo_sum_addres_p (enum machine_mode mode, const_rtx x,
+				 bool strict)
+{
+  if (GET_CODE (x) == LO_SUM
+      && (mode != BLKmode && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict)
+      && CONSTANT_P (XEXP (x, 1)))
+    return true;
+
+  return false;
+}
+
+/* Is this a load and increment operation.  */
+
+static inline bool
+m32r_load_postinc_p (enum machine_mode mode, const_rtx x, bool strict)
+{
+  if ((mode == SImode || mode == SFmode)
+      && GET_CODE (x) == POST_INC
+      && REG_P (XEXP (x, 0))
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict))
+    return true;
+
+  return false;
+}
+
+/* Is this an increment/decrement and store operation.  */
+
+static inline bool
+m32r_store_preinc_predec_p (enum machine_mode mode, const_rtx x, bool strict)
+{
+  if ((mode == SImode || mode == SFmode)
+      && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+      && REG_P (XEXP (x, 0))                           \
+      && m32r_rtx_ok_for_base_p (XEXP (x, 0), strict))
+    return true;
+
+  return false;
+}
+
+/* Implement  TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+m32r_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (m32r_rtx_ok_for_base_p (x, strict)
+      || m32r_legitimate_offset_addres_p (mode, x, strict)
+      || m32r_legitimate_lo_sum_addres_p (mode, x, strict)
+      || m32r_load_postinc_p (mode, x, strict)
+      || m32r_store_preinc_predec_p (mode, x, strict))
+    return true;
+
+  return false;
+}
+
+static void
+m32r_conditional_register_usage (void)
+{
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
diff --git a/gcc/config/m32r/m32r.h b/gcc/config/m32r/m32r.h
new file mode 100644
index 000000000..ef24ec109
--- /dev/null
+++ b/gcc/config/m32r/m32r.h
@@ -0,0 +1,1165 @@
+/* Definitions of target machine for GNU compiler, Renesas M32R cpu.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Things to do:
+- longlong.h?
+*/
+
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
+#undef TARGET_VERSION
+#undef CPP_SPEC
+#undef ASM_SPEC
+#undef LINK_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+#undef ASM_APP_ON
+#undef ASM_APP_OFF
+
+
+/* M32R/X overrides.  */
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION fprintf (stderr, " (m32r/x/2)");
+
+/* Additional flags for the preprocessor.  */
+#define CPP_CPU_SPEC "%{m32rx:-D__M32RX__ -D__m32rx__ -U__M32R2__ -U__m32r2__} \
+%{m32r2:-D__M32R2__ -D__m32r2__ -U__M32RX__ -U__m32rx__} \
+%{m32r:-U__M32RX__  -U__m32rx__ -U__M32R2__ -U__m32r2__} \
+ "
+
+/* Assembler switches.  */
+#define ASM_CPU_SPEC \
+"%{m32r} %{m32rx} %{m32r2} %{!O0: %{O*: -O}} --no-warn-explicit-parallel-conflicts"
+
+/* Use m32rx specific crt0/crtinit/crtfini files.  */
+#define STARTFILE_CPU_SPEC "%{!shared:crt0.o%s} %{m32rx:m32rx/crtinit.o%s} %{!m32rx:crtinit.o%s}"
+#define ENDFILE_CPU_SPEC "-lgloss %{m32rx:m32rx/crtfini.o%s} %{!m32rx:crtfini.o%s}"
+
+/* Define this macro as a C expression for the initializer of an array of
+   strings to tell the driver program which options are defaults for this
+   target and thus do not need to be handled specially when using
+   `MULTILIB_OPTIONS'.  */
+#define SUBTARGET_MULTILIB_DEFAULTS , "m32r"
+
+/* Number of additional registers the subtarget defines.  */
+#define SUBTARGET_NUM_REGISTERS 1
+
+/* 1 for registers that cannot be allocated.  */
+#define SUBTARGET_FIXED_REGISTERS , 1
+
+/* 1 for registers that are not available across function calls.  */
+#define SUBTARGET_CALL_USED_REGISTERS , 1
+
+/* Order to allocate model specific registers.  */
+#define SUBTARGET_REG_ALLOC_ORDER , 19
+
+/* Registers which are accumulators.  */
+#define SUBTARGET_REG_CLASS_ACCUM 0x80000
+
+/* All registers added.  */
+#define SUBTARGET_REG_CLASS_ALL SUBTARGET_REG_CLASS_ACCUM
+
+/* Additional accumulator registers.  */
+#define SUBTARGET_ACCUM_P(REGNO) ((REGNO) == 19)
+
+/* Define additional register names.  */
+#define SUBTARGET_REGISTER_NAMES , "a1"
+/* end M32R/X overrides.  */
+
+/* Print subsidiary information on the compiler version in use.  */
+#ifndef	TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (m32r)")
+#endif
+
+/* Names to predefine in the preprocessor for this target machine.  */
+/* __M32R__ is defined by the existing compiler so we use that.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__M32R__");		\
+      builtin_define ("__m32r__");		\
+      builtin_assert ("cpu=m32r");		\
+      builtin_assert ("machine=m32r");		\
+      builtin_define (TARGET_BIG_ENDIAN		\
+                      ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); \
+    }						\
+  while (0)
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#ifndef	ASM_CPU_SPEC
+#define	ASM_CPU_SPEC ""
+#endif
+
+#ifndef	CPP_CPU_SPEC
+#define	CPP_CPU_SPEC ""
+#endif
+
+#ifndef	CC1_CPU_SPEC
+#define	CC1_CPU_SPEC ""
+#endif
+
+#ifndef	LINK_CPU_SPEC
+#define	LINK_CPU_SPEC ""
+#endif
+
+#ifndef STARTFILE_CPU_SPEC
+#define STARTFILE_CPU_SPEC "%{!shared:crt0.o%s} crtinit.o%s"
+#endif
+
+#ifndef ENDFILE_CPU_SPEC
+#define ENDFILE_CPU_SPEC "-lgloss crtfini.o%s"
+#endif
+
+#ifndef RELAX_SPEC
+#if 0 /* Not supported yet.  */
+#define RELAX_SPEC "%{mrelax:-relax}"
+#else
+#define RELAX_SPEC ""
+#endif
+#endif
+
+#define EXTRA_SPECS							\
+  { "asm_cpu",			ASM_CPU_SPEC },				\
+  { "cpp_cpu",			CPP_CPU_SPEC },				\
+  { "cc1_cpu",			CC1_CPU_SPEC },				\
+  { "link_cpu",			LINK_CPU_SPEC },			\
+  { "startfile_cpu",		STARTFILE_CPU_SPEC },			\
+  { "endfile_cpu",		ENDFILE_CPU_SPEC },			\
+  { "relax",			RELAX_SPEC },				\
+  SUBTARGET_EXTRA_SPECS
+
+#define CPP_SPEC "%(cpp_cpu)"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{G*} %(cc1_cpu)"
+
+/* Options to pass on to the assembler.  */
+#undef  ASM_SPEC
+#define ASM_SPEC "%(asm_cpu) %(relax) %{fpic|fpie:-K PIC} %{fPIC|fPIE:-K PIC}"
+
+#define LINK_SPEC "%{v} %(link_cpu) %(relax)"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%(startfile_cpu)"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "%(endfile_cpu)"
+
+#undef LIB_SPEC
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+#define TARGET_M32R             (! TARGET_M32RX && ! TARGET_M32R2)
+
+#ifndef TARGET_LITTLE_ENDIAN
+#define TARGET_LITTLE_ENDIAN	0
+#endif
+#define TARGET_BIG_ENDIAN       (! TARGET_LITTLE_ENDIAN)
+
+/* This defaults us to m32r.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+/* Code Models
+
+   Code models are used to select between two choices of two separate
+   possibilities (address space size, call insn to use):
+
+   small: addresses use 24 bits, use bl to make calls
+   medium: addresses use 32 bits, use bl to make calls (*1)
+   large: addresses use 32 bits, use seth/add3/jl to make calls (*2)
+
+   The fourth is "addresses use 24 bits, use seth/add3/jl to make calls" but
+   using this one doesn't make much sense.
+
+   (*1) The linker may eventually be able to relax seth/add3 -> ld24.
+   (*2) The linker may eventually be able to relax seth/add3/jl -> bl.
+
+   Internally these are recorded as TARGET_ADDR{24,32} and
+   TARGET_CALL{26,32}.
+
+   The __model__ attribute can be used to select the code model to use when
+   accessing particular objects.  */
+
+enum m32r_model { M32R_MODEL_SMALL, M32R_MODEL_MEDIUM, M32R_MODEL_LARGE };
+
+extern enum m32r_model m32r_model;
+#define TARGET_MODEL_SMALL  (m32r_model == M32R_MODEL_SMALL)
+#define TARGET_MODEL_MEDIUM (m32r_model == M32R_MODEL_MEDIUM)
+#define TARGET_MODEL_LARGE  (m32r_model == M32R_MODEL_LARGE)
+#define TARGET_ADDR24       (m32r_model == M32R_MODEL_SMALL)
+#define TARGET_ADDR32       (! TARGET_ADDR24)
+#define TARGET_CALL26       (! TARGET_CALL32)
+#define TARGET_CALL32       (m32r_model == M32R_MODEL_LARGE)
+
+/* The default is the small model.  */
+#ifndef M32R_MODEL_DEFAULT
+#define M32R_MODEL_DEFAULT M32R_MODEL_SMALL
+#endif
+
+/* Small Data Area
+
+   The SDA consists of sections .sdata, .sbss, and .scommon.
+   .scommon isn't a real section, symbols in it have their section index
+   set to SHN_M32R_SCOMMON, though support for it exists in the linker script.
+
+   Two switches control the SDA:
+
+   -G NNN        - specifies the maximum size of variable to go in the SDA
+
+   -msdata=foo   - specifies how such variables are handled
+
+        -msdata=none  - small data area is disabled
+
+        -msdata=sdata - small data goes in the SDA, special code isn't
+                        generated to use it, and special relocs aren't
+                        generated
+
+        -msdata=use   - small data goes in the SDA, special code is generated
+                        to use the SDA and special relocs are generated
+
+   The SDA is not multilib'd, it isn't necessary.
+   MULTILIB_EXTRA_OPTS is set in tmake_file to -msdata=sdata so multilib'd
+   libraries have small data in .sdata/SHN_M32R_SCOMMON so programs that use
+   -msdata=use will successfully link with them (references in header files
+   will cause the compiler to emit code that refers to library objects in
+   .data).  ??? There can be a problem if the user passes a -G value greater
+   than the default and a library object in a header file is that size.
+   The default is 8 so this should be rare - if it occurs the user
+   is required to rebuild the libraries or use a smaller value for -G.  */
+
+/* Maximum size of variables that go in .sdata/.sbss.
+   The -msdata=foo switch also controls how small variables are handled.  */
+#ifndef SDATA_DEFAULT_SIZE
+#define SDATA_DEFAULT_SIZE 8
+#endif
+
+enum m32r_sdata { M32R_SDATA_NONE, M32R_SDATA_SDATA, M32R_SDATA_USE };
+
+extern enum m32r_sdata m32r_sdata;
+#define TARGET_SDATA_NONE  (m32r_sdata == M32R_SDATA_NONE)
+#define TARGET_SDATA_SDATA (m32r_sdata == M32R_SDATA_SDATA)
+#define TARGET_SDATA_USE   (m32r_sdata == M32R_SDATA_USE)
+
+/* Default is to disable the SDA
+   [for upward compatibility with previous toolchains].  */
+#ifndef M32R_SDATA_DEFAULT
+#define M32R_SDATA_DEFAULT M32R_SDATA_NONE
+#endif
+
+/* Define this macro as a C expression for the initializer of an array of
+   strings to tell the driver program which options are defaults for this
+   target and thus do not need to be handled specially when using
+   `MULTILIB_OPTIONS'.  */
+#ifndef SUBTARGET_MULTILIB_DEFAULTS
+#define SUBTARGET_MULTILIB_DEFAULTS
+#endif
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mmodel=small" SUBTARGET_MULTILIB_DEFAULTS }
+#endif
+
+#ifndef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS
+#endif
+
+/* Target machine storage layout.  */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases, 
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)	\
+    {						\
+      (MODE) = SImode;				\
+    }
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 32
+
+/* ALIGN FRAMES on word boundaries */
+#define M32R_STACK_ALIGN(LOC) (((LOC) + 3) & ~ 3)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)	\
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  (TREE_CODE (TYPE) == ARRAY_TYPE					\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode				\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define LAVEL_ALIGN to calculate code length of PNOP at labels.  */
+#define LABEL_ALIGN(insn) 2
+
+/* Layout of source language data types.  */
+
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define M32R_NUM_REGISTERS 	19
+
+#ifndef SUBTARGET_NUM_REGISTERS
+#define SUBTARGET_NUM_REGISTERS 0
+#endif
+
+#define FIRST_PSEUDO_REGISTER (M32R_NUM_REGISTERS + SUBTARGET_NUM_REGISTERS)
+	
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   0-3   - arguments/results
+   4-5   - call used [4 is used as a tmp during prologue/epilogue generation]
+   6     - call used, gptmp
+   7     - call used, static chain pointer
+   8-11  - call saved
+   12    - call saved [reserved for global pointer]
+   13    - frame pointer
+   14    - subroutine link register
+   15    - stack pointer
+   16    - arg pointer
+   17    - carry flag
+   18	 - accumulator
+   19    - accumulator 1 in the m32r/x
+   By default, the extension registers are not available.  */
+
+#ifndef SUBTARGET_FIXED_REGISTERS
+#define SUBTARGET_FIXED_REGISTERS
+#endif
+
+#define FIXED_REGISTERS		\
+{				\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 1,	\
+  1, 1, 1			\
+  SUBTARGET_FIXED_REGISTERS	\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#ifndef SUBTARGET_CALL_USED_REGISTERS
+#define SUBTARGET_CALL_USED_REGISTERS
+#endif
+
+#define CALL_USED_REGISTERS	\
+{				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 1, 1,	\
+  1, 1, 1			\
+  SUBTARGET_CALL_USED_REGISTERS	\
+}
+
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+/* If defined, an initializer for a vector of integers, containing the
+   numbers of hard registers in the order in which GCC should
+   prefer to use them (from most preferred to least).  */
+
+#ifndef SUBTARGET_REG_ALLOC_ORDER
+#define SUBTARGET_REG_ALLOC_ORDER
+#endif
+
+#if 1 /* Better for int code.  */
+#define REG_ALLOC_ORDER				\
+{						\
+  4,  5,  6,  7,  2,  3,  8,  9, 10,		\
+  11, 12, 13, 14,  0,  1, 15, 16, 17, 18	\
+  SUBTARGET_REG_ALLOC_ORDER			\
+}
+
+#else /* Better for fp code at expense of int code.  */
+#define REG_ALLOC_ORDER				\
+{						\
+   0,  1,  2,  3,  4,  5,  6,  7,  8,		\
+   9, 10, 11, 12, 13, 14, 15, 16, 17, 18	\
+  SUBTARGET_REG_ALLOC_ORDER			\
+}
+#endif
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+extern const unsigned int m32r_hard_regno_mode_ok[FIRST_PSEUDO_REGISTER];
+extern unsigned int m32r_mode_class[];
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((m32r_hard_regno_mode_ok[REGNO] & m32r_mode_class[MODE]) != 0)
+
+/* A C expression that is nonzero if it is desirable to choose
+   register allocation so as to avoid move instructions between a
+   value of mode MODE1 and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R,
+   MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1,
+   MODE2)' must be zero.  */
+
+/* Tie QI/HI/SI modes together.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 		\
+  (   GET_MODE_CLASS (MODE1) == MODE_INT	\
+   && GET_MODE_CLASS (MODE2) == MODE_INT	\
+   && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
+   && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  m32r_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Register classes and constants.  */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   It is important that any condition codes have class NO_REGS.
+   See `register_operand'.  */
+
+enum reg_class
+{
+  NO_REGS, CARRY_REG, ACCUM_REGS, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define IRA_COVER_CLASSES				\
+{							\
+  ACCUM_REGS, GENERAL_REGS, LIM_REG_CLASSES		\
+}
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES \
+  { "NO_REGS", "CARRY_REG", "ACCUM_REGS", "GENERAL_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#ifndef SUBTARGET_REG_CLASS_CARRY
+#define SUBTARGET_REG_CLASS_CARRY 0
+#endif
+
+#ifndef SUBTARGET_REG_CLASS_ACCUM
+#define SUBTARGET_REG_CLASS_ACCUM 0
+#endif
+
+#ifndef SUBTARGET_REG_CLASS_GENERAL
+#define SUBTARGET_REG_CLASS_GENERAL 0
+#endif
+
+#ifndef SUBTARGET_REG_CLASS_ALL
+#define SUBTARGET_REG_CLASS_ALL 0
+#endif
+
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000 },								\
+  { 0x20000 | SUBTARGET_REG_CLASS_CARRY },				\
+  { 0x40000 | SUBTARGET_REG_CLASS_ACCUM },				\
+  { 0x1ffff | SUBTARGET_REG_CLASS_GENERAL },				\
+  { 0x7ffff | SUBTARGET_REG_CLASS_ALL },				\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+extern enum reg_class m32r_regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) (m32r_regno_reg_class[REGNO])
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  ((REGNO) < FIRST_PSEUDO_REGISTER			\
+   ? GPR_P (REGNO) || (REGNO) == ARG_POINTER_REGNUM	\
+   : GPR_P (reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Return true if a value is inside a range.  */
+#define IN_RANGE_P(VALUE, LOW, HIGH)			\
+  (((unsigned HOST_WIDE_INT)((VALUE) - (LOW)))		\
+   <= ((unsigned HOST_WIDE_INT)((HIGH) - (LOW))))
+
+/* Some range macros.  */
+#define INT16_P(X)     ((X) >= - 0x8000 && (X) <= 0x7fff)
+#define CMP_INT16_P(X) ((X) >= - 0x7fff && (X) <= 0x8000)
+#define UINT16_P(X)   (((unsigned HOST_WIDE_INT) (X)) <= 0x0000ffff)
+#define UINT24_P(X)   (((unsigned HOST_WIDE_INT) (X)) <= 0x00ffffff)
+
+/* Stack layout and stack pointer usage.  */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Offset from frame pointer to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+/* The frame pointer points at the same place as the stack pointer, except if
+   alloca has been called.  */
+#define STARTING_FRAME_OFFSET \
+  M32R_STACK_ALIGN (crtl->outgoing_args_size)
+
+/* Offset from the stack pointer register to the first location at which
+   outgoing arguments are placed.  */
+#define STACK_POINTER_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 15
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 13
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 16
+
+/* Register in which static-chain is passed to a function.
+   This must not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM  7
+
+/* These aren't official macros.  */
+#define PROLOGUE_TMP_REGNUM  4
+#define RETURN_ADDR_REGNUM  14
+/* #define GP_REGNUM        12 */
+#define CARRY_REGNUM        17
+#define ACCUM_REGNUM        18
+#define M32R_MAX_INT_REGS   16
+
+#ifndef SUBTARGET_GPR_P
+#define SUBTARGET_GPR_P(REGNO) 0
+#endif
+
+#ifndef SUBTARGET_ACCUM_P
+#define SUBTARGET_ACCUM_P(REGNO) 0
+#endif
+
+#ifndef SUBTARGET_CARRY_P
+#define SUBTARGET_CARRY_P(REGNO) 0
+#endif
+
+#define GPR_P(REGNO)   (IN_RANGE_P ((REGNO), 0, 15) || SUBTARGET_GPR_P (REGNO))
+#define ACCUM_P(REGNO) ((REGNO) == ACCUM_REGNUM || SUBTARGET_ACCUM_P (REGNO))
+#define CARRY_P(REGNO) ((REGNO) == CARRY_REGNUM || SUBTARGET_CARRY_P (REGNO))
+
+/* Eliminating the frame and arg pointers.  */
+
+#if 0
+/* C statement to store the difference between the frame pointer
+   and the stack pointer values immediately after the function prologue.
+   If `ELIMINABLE_REGS' is defined, this macro will be not be used and
+   need not be defined.  */
+#define INITIAL_FRAME_POINTER_OFFSET(VAR) \
+((VAR) = m32r_compute_frame_size (get_frame_size ()))
+#endif
+
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  If
+   it is not defined, the only elimination attempted by the compiler
+   is to replace references to the frame pointer with references to
+   the stack pointer.
+
+   Note that the elimination of the argument pointer with the stack
+   pointer is specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS					\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM }}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)				\
+  do										\
+    {										\
+      int size = m32r_compute_frame_size (get_frame_size ());			\
+										\
+      if ((FROM) == FRAME_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
+	(OFFSET) = 0;								\
+      else if ((FROM) == ARG_POINTER_REGNUM && (TO) == FRAME_POINTER_REGNUM)	\
+	(OFFSET) = size - crtl->args.pretend_args_size;			\
+      else if ((FROM) == ARG_POINTER_REGNUM && (TO) == STACK_POINTER_REGNUM)	\
+	(OFFSET) = size - crtl->args.pretend_args_size;			\
+      else									\
+	gcc_unreachable ();								\
+    }										\
+  while (0)
+
+/* Function argument passing.  */
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  ((CUM) = 0)
+
+/* The number of registers used for parameter passing.  Local to this file.  */
+#define M32R_MAX_PARM_REGS 4
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(N) \
+  ((unsigned) (N) < M32R_MAX_PARM_REGS)
+
+
+/* Function results.  */
+
+/* Tell GCC to use TARGET_RETURN_IN_MEMORY.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Function entry and exit.  */
+
+/* Initialize data used by insn expanders.  This is called from
+   init_emit, once for each function, before code is generated.  */
+#define INIT_EXPANDERS m32r_init_expanders ()
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#undef  FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)			\
+  do								\
+    {								\
+      if (flag_pic)						\
+	{							\
+	  fprintf (FILE, "\tld24 r14,#mcount\n");		\
+	  fprintf (FILE, "\tadd r14,r12\n");			\
+	  fprintf (FILE, "\tld r14,@r14\n");			\
+	  fprintf (FILE, "\tjl r14\n");				\
+	}							\
+      else							\
+	{							\
+	  if (TARGET_ADDR24)					\
+	    fprintf (FILE, "\tbl mcount\n");			\
+	  else							\
+	    {							\
+	      fprintf (FILE, "\tseth r14,#high(mcount)\n");	\
+	      fprintf (FILE, "\tor3 r14,r14,#low(mcount)\n");	\
+	      fprintf (FILE, "\tjl r14\n");			\
+	    }							\
+	}							\
+      fprintf (FILE, "\taddi sp,#4\n");				\
+    }								\
+  while (0)
+
+/* Trampolines.  */
+
+/* On the M32R, the trampoline is:
+
+        mv      r7, lr   -> bl L1        ; 178e 7e01
+L1:     add3    r6, lr, #L2-L1           ; 86ae 000c (L2 - L1 = 12)
+        mv      lr, r7   -> ld r7,@r6+   ; 1e87 27e6
+        ld      r6, @r6  -> jmp r6       ; 26c6 1fc6
+L2:     .word STATIC
+        .word FUNCTION  */
+
+#ifndef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_flush_cache"
+#endif
+#ifndef CACHE_FLUSH_TRAP
+#define CACHE_FLUSH_TRAP 12
+#endif
+
+/* Length in bytes of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE 24
+
+
+#define RETURN_ADDR_RTX(COUNT, FRAME) m32r_return_addr (COUNT)
+
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* We have post-inc load and pre-dec,pre-inc store,
+   but only for 4 byte vals.  */
+#define HAVE_PRE_DECREMENT  1
+#define HAVE_PRE_INCREMENT  1
+#define HAVE_POST_INCREMENT 1
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X)   \
+  (    GET_CODE (X) == LABEL_REF  \
+   ||  GET_CODE (X) == SYMBOL_REF \
+   ||  CONST_INT_P (X)  \
+   || (GET_CODE (X) == CONST      \
+       && ! (flag_pic && ! m32r_legitimate_pic_operand_p (X))))
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   We don't allow (plus symbol large-constant) as the relocations can't
+   describe it.  INTVAL > 32767 handles both 16-bit and 24-bit relocations.
+   We allow all CONST_DOUBLE's as the md file patterns will force the
+   constant to memory if they can't handle them.  */
+
+#define LEGITIMATE_CONSTANT_P(X)					\
+  (! (GET_CODE (X) == CONST						\
+      && GET_CODE (XEXP (X, 0)) == PLUS					\
+      && (GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF || GET_CODE (XEXP (XEXP (X, 0), 0)) == LABEL_REF) \
+      && CONST_INT_P (XEXP (XEXP (X, 0), 1))			\
+      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (X, 0), 1)) > 32767))
+
+/* Condition code usage.  */
+
+/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/
+
+/* Costs.  */
+
+/* The cost of a branch insn.  */
+/* A value of 2 here causes GCC to avoid using branches in comparisons like
+   while (a < N && a).  Branches aren't that expensive on the M32R so
+   we define this as 1.  Defining it as 2 had a heavy hit in fp-bit.c.  */
+#define BRANCH_COST(speed_p, predictable_p) ((TARGET_BRANCH_COST) ? 2 : 1)
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE
+
+/* Section selection.  */
+
+#define TEXT_SECTION_ASM_OP	"\t.section .text"
+#define DATA_SECTION_ASM_OP	"\t.section .data"
+#define BSS_SECTION_ASM_OP	"\t.section .bss"
+
+/* Define this macro if jump tables (for tablejump insns) should be
+   output in the text section, along with the assembler instructions.
+   Otherwise, the readonly data section is used.
+   This macro is irrelevant if there is no separate readonly data section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+/* Position Independent Code.  */
+
+/* The register number of the register used to address a table of static
+   data addresses in memory.  In some cases this register is defined by a
+   processor's ``application binary interface'' (ABI).  When this macro
+   is defined, RTL is generated for this register once, as with the stack
+   pointer and frame pointer registers.  If this macro is not defined, it
+   is up to the machine-dependent files to allocate such a register (if
+   necessary).  */
+#define PIC_OFFSET_TABLE_REGNUM 12
+
+/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is
+   clobbered by calls.  Do not define this macro if PIC_OFFSET_TABLE_REGNUM
+   is not defined.  */
+/* This register is call-saved on the M32R.  */
+/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent code.
+   You can assume that X satisfies CONSTANT_P, so you need not
+   check this.  You can also assume `flag_pic' is true, so you need not
+   check it either.  You need not define this macro if all constants
+   (including SYMBOL_REF) can be immediate operands when generating
+   position independent code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X) m32r_legitimate_pic_operand_p (X)
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will
+   end at the end of the line.  */
+#define ASM_COMMENT_START ";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF ""
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* We do not use DBX_LINES_FUNCTION_RELATIVE or
+   dbxout_stab_value_internal_label_diff here because
+   we need to use .debugsym for the line label.  */
+
+#define DBX_OUTPUT_SOURCE_LINE(file, line, counter)			\
+  do									\
+    {									\
+      const char * begin_label =					\
+	XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);		\
+      char label[64];							\
+      ASM_GENERATE_INTERNAL_LABEL (label, "LM", counter);		\
+									\
+      dbxout_begin_stabn_sline (line);					\
+      assemble_name (file, label);					\
+      putc ('-', file);							\
+      assemble_name (file, begin_label);				\
+      fputs ("\n\t.debugsym ", file);					\
+      assemble_name (file, label);					\
+      putc ('\n', file);						\
+      counter += 1;							\
+     }									\
+  while (0)
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#ifndef SUBTARGET_REGISTER_NAMES
+#define SUBTARGET_REGISTER_NAMES
+#endif
+
+#define REGISTER_NAMES					\
+{							\
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",	\
+  "r8", "r9", "r10", "r11", "r12", "fp", "lr", "sp",	\
+  "ap", "cbit", "a0"					\
+  SUBTARGET_REGISTER_NAMES				\
+}
+
+/* If defined, a C initializer for an array of structures containing
+   a name and a register number.  This macro defines additional names
+   for hard registers, thus allowing the `asm' option in declarations
+   to refer to registers using alternate names.  */
+#ifndef SUBTARGET_ADDITIONAL_REGISTER_NAMES
+#define SUBTARGET_ADDITIONAL_REGISTER_NAMES
+#endif
+
+#define ADDITIONAL_REGISTER_NAMES	\
+{					\
+  /*{ "gp", GP_REGNUM },*/		\
+  { "r13", FRAME_POINTER_REGNUM },	\
+  { "r14", RETURN_ADDR_REGNUM },	\
+  { "r15", STACK_POINTER_REGNUM },	\
+  SUBTARGET_ADDITIONAL_REGISTER_NAMES	\
+}
+
+/* If defined, C string expressions to be used for the `%R', `%L',
+   `%U', and `%I' options of `asm_fprintf' (see `final.c').  These
+   are useful when a single `md' file must support multiple assembler
+   formats.  In that case, the various `tm.h' files can define these
+   macros differently.  */
+#define REGISTER_PREFIX		""
+#define LOCAL_LABEL_PREFIX	".L"
+#define USER_LABEL_PREFIX	""
+#define IMMEDIATE_PREFIX	"#"
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)		\
+   do							\
+     {							\
+       char label[30];					\
+       ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+       fprintf (FILE, "\t.word\t");			\
+       assemble_name (FILE, label);			\
+       fprintf (FILE, "\n");				\
+     }							\
+  while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)\
+  do							\
+    {							\
+      char label[30];					\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
+      fprintf (FILE, "\t.word\t");			\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "-");				\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);	\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "\n");				\
+    }							\
+  while (0)
+
+/* The desired alignment for the location counter at the beginning
+   of a loop.  */
+/* On the M32R, align loops to 32 byte boundaries (cache line size)
+   if -malign-loops.  */
+#define LOOP_ALIGN(LABEL) (TARGET_ALIGN_LOOPS ? 5 : 0)
+
+/* Define this to be the maximum number of insns to move around when moving
+   a loop test from the top of a loop to the bottom
+   and seeing whether to duplicate it.  The default is thirty.
+
+   Loop unrolling currently doesn't like this optimization, so
+   disable doing if we are unrolling loops and saving space.  */
+#define LOOP_TEST_THRESHOLD (optimize_size				\
+			     && !flag_unroll_loops			\
+			     && !flag_unroll_all_loops ? 2 : 30)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+/* .balign is used to avoid confusion.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)			\
+  do							\
+    {							\
+      if ((LOG) != 0)					\
+	fprintf (FILE, "\t.balign %d\n", 1 << (LOG));	\
+    }							\
+  while (0)
+
+/* Like `ASM_OUTPUT_COMMON' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used in
+   place of `ASM_OUTPUT_COMMON', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.  */
+
+#define SCOMMON_ASM_OP "\t.scomm\t"
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (! TARGET_SDATA_NONE						\
+	  && (SIZE) > 0							\
+	  && (SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+	fprintf ((FILE), "%s", SCOMMON_ASM_OP);				\
+      else								\
+	fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (ALIGN) / BITS_PER_UNIT);\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (! TARGET_SDATA_NONE						\
+          && (SIZE) > 0							\
+	  && (SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		\
+        switch_to_section (get_named_section (NULL, ".sbss", 0));	\
+      else								\
+        switch_to_section (bss_section);				\
+      ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT));	\
+      last_assemble_variable_decl = DECL;				\
+      ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL);			\
+      ASM_OUTPUT_SKIP (FILE, SIZE ? SIZE : 1);				\
+    }									\
+  while (0)
+
+/* Debugging information.  */
+
+/* Generate DBX and DWARF debugging information.  */
+#define DBX_DEBUGGING_INFO    1
+#define DWARF2_DEBUGGING_INFO 1
+
+/* Use DWARF2 debugging info by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Turn off splitting of long stabs.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Miscellaneous.  */
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (flag_pic ? SImode : Pmode)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Max number of bytes we can move from memory
+   to memory in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+/* ??? The M32R doesn't have full 32-bit pointers, but making this PSImode has
+   its own problems (you have to add extendpsisi2 and truncsipsi2).
+   Try to avoid it.  */
+#define Pmode SImode
+
+/* A function address in a call instruction.  */
+#define FUNCTION_MODE SImode
+
+/* M32R function types.  */
+enum m32r_function_type
+{
+  M32R_FUNCTION_UNKNOWN, M32R_FUNCTION_NORMAL, M32R_FUNCTION_INTERRUPT
+};
+
+#define M32R_INTERRUPT_P(TYPE) ((TYPE) == M32R_FUNCTION_INTERRUPT)
+
+/* The maximum number of bytes to copy using pairs of load/store instructions.
+   If a block is larger than this then a loop will be generated to copy
+   MAX_MOVE_BYTES chunks at a time.  The value of 32 is a semi-arbitrary choice.
+   A customer uses Dhrystome as their benchmark, and Dhrystone has a 31 byte
+   string copy in it.  */
+#define MAX_MOVE_BYTES 32
diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md
new file mode 100644
index 000000000..071935735
--- /dev/null
+++ b/gcc/config/m32r/m32r.md
@@ -0,0 +1,2279 @@
+;; Machine description of the Renesas M32R cpu for GNU C compiler
+;; Copyright (C) 1996, 1997, 1998, 1999, 2001, 2003, 2004, 2005,
+;  2007, 2008, 2009 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; UNSPEC_VOLATILE usage
+(define_constants
+  [(UNSPECV_BLOCKAGE		0)
+   (UNSPECV_FLUSH_ICACHE	1)])
+
+;; UNSPEC usage
+(define_constants
+  [(UNSPEC_LOAD_SDA_BASE	2)
+   (UNSPEC_SET_CBIT		3)
+   (UNSPEC_PIC_LOAD_ADDR	4)
+   (UNSPEC_GET_PC		5)
+   (UNSPEC_GOTOFF		6)
+   ])
+
+;; Insn type.  Used to default other attribute values.
+(define_attr "type"
+  "int2,int4,load2,load4,load8,store2,store4,store8,shift2,shift4,mul2,div4,uncond_branch,branch,call,multi,misc"
+  (const_string "misc"))
+
+;; Length in bytes.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "int2,load2,store2,shift2,mul2")
+	 (const_int 2)
+
+	 (eq_attr "type" "int4,load4,store4,shift4,div4")
+	 (const_int 4)
+
+	 (eq_attr "type" "multi")
+	 (const_int 8)
+
+	 (eq_attr "type" "uncond_branch,branch,call")
+	 (const_int 4)]
+
+	 (const_int 4)))
+
+;; The length here is the length of a single asm.  Unfortunately it might be
+;; 2 or 4 so we must allow for 4.  That's ok though.
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")])
+
+;; Whether an instruction is short (16-bit) or long (32-bit).
+(define_attr "insn_size" "short,long"
+  (if_then_else (eq_attr "type" "int2,load2,store2,shift2,mul2")
+		(const_string "short")
+		(const_string "long")))
+
+;; The target CPU we're compiling for.
+(define_attr "cpu" "m32r,m32r2,m32rx"
+  (cond [(ne (symbol_ref "TARGET_M32RX") (const_int 0))
+	     (const_string "m32rx")
+	 (ne (symbol_ref "TARGET_M32R2") (const_int 0))
+	     (const_string "m32r2")]
+    (const_string "m32r")))
+
+;; Defines the pipeline where an instruction can be executed on.
+;; For the M32R, a short instruction can execute one of the two pipes.
+;; For the M32Rx, the restrictions are modelled in the second
+;;  condition of this attribute definition.
+(define_attr "m32r_pipeline" "either,s,o,long"
+  (cond [(and (eq_attr "cpu" "m32r")
+	      (eq_attr "insn_size" "short"))
+	     (const_string "either")
+         (eq_attr "insn_size" "!short")
+	     (const_string "long")]
+	 (cond [(eq_attr "type" "int2")
+		   (const_string "either")
+	        (eq_attr "type" "load2,store2,shift2,uncond_branch,branch,call")
+		   (const_string "o")
+	        (eq_attr "type" "mul2")
+		   (const_string "s")]
+	 (const_string "long"))))
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Pipeline description
+;; ::
+;; ::::::::::::::::::::
+
+;; This model is based on Chapter 2, Appendix 3 and Appendix 4 of the
+;; "M32R-FPU Software Manual", Revision 1.01, plus additional information
+;; obtained by our best friend and mine, Google.
+;;
+;; The pipeline is modelled as a fetch unit, and a core with a memory unit,
+;; two execution units, where "fetch" models IF and D, "memory" for MEM1
+;; and MEM2, and "EXEC" for E, E1, E2, EM, and EA.  Writeback and
+;; bypasses are not modelled.
+(define_automaton "m32r")
+
+;; We pretend there are two short (16 bits) instruction fetchers.  The
+;; "s" short fetcher cannot be reserved until the "o" short fetcher is
+;; reserved.  Some instructions reserve both the left and right fetchers.
+;; These fetch units are a hack to get GCC to better pack the instructions
+;; for the M32Rx processor, which has two execution pipes.
+;;
+;; In reality there is only one decoder, which can decode either two 16-bit
+;; instructions, or a single 32-bit instruction.
+;;
+;; Note, "fetch" models both the IF and the D pipeline stages.
+;;
+;; The m32rx core has two execution pipes.  We name them o_E and s_E.
+;; In addition, there's a memory unit.
+
+(define_cpu_unit "o_IF,s_IF,o_E,s_E,memory" "m32r")
+
+;; Prevent the s pipe from being reserved before the o pipe.
+(absence_set "s_IF" "o_IF")
+(absence_set "s_E"  "o_E")
+
+;; On the M32Rx, long instructions execute on both pipes, so reserve
+;; both fetch slots and both pipes.
+(define_reservation "long_IF" "o_IF+s_IF")
+(define_reservation "long_E" "o_E+s_E")
+
+;; ::::::::::::::::::::
+
+;; Simple instructions do 4 stages: IF D E WB.  WB is not modelled.
+;; Hence, ready latency is 1.
+(define_insn_reservation "short_left" 1
+  (and (eq_attr "m32r_pipeline" "o")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "!load2")))
+  "o_IF,o_E")
+
+(define_insn_reservation "short_right" 1
+  (and (eq_attr "m32r_pipeline" "s")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "!load2")))
+  "s_IF,s_E")
+
+(define_insn_reservation "short_either" 1
+  (and (eq_attr "m32r_pipeline" "either")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "!load2")))
+  "o_IF|s_IF,o_E|s_E")
+
+(define_insn_reservation "long_m32r" 1
+  (and (eq_attr "cpu" "m32r")
+       (and (eq_attr "insn_size" "long")
+	    (eq_attr "type" "!load4,load8")))
+  "long_IF,long_E")
+
+(define_insn_reservation "long_m32rx" 2
+  (and (eq_attr "m32r_pipeline" "long")
+       (and (eq_attr "insn_size" "long")
+	    (eq_attr "type" "!load4,load8")))
+  "long_IF,long_E")
+
+;; Load/store instructions do 6 stages: IF D E MEM1 MEM2 WB.
+;; MEM1 may require more than one cycle depending on locality.  We
+;; optimistically assume all memory is nearby, i.e. MEM1 takes only
+;; one cycle.  Hence, ready latency is 3.
+
+;; The M32Rx can do short load/store only on the left pipe.
+(define_insn_reservation "short_load_left" 3
+  (and (eq_attr "m32r_pipeline" "o")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "load2")))
+  "o_IF,o_E,memory*2")
+
+(define_insn_reservation "short_load" 3
+  (and (eq_attr "m32r_pipeline" "either")
+       (and (eq_attr "insn_size" "short")
+	    (eq_attr "type" "load2")))
+  "s_IF|o_IF,s_E|o_E,memory*2")
+
+(define_insn_reservation "long_load" 3
+  (and (eq_attr "cpu" "m32r")
+       (and (eq_attr "insn_size" "long")
+	    (eq_attr "type" "load4,load8")))
+  "long_IF,long_E,memory*2")
+
+(define_insn_reservation "long_load_m32rx" 3
+  (and (eq_attr "m32r_pipeline" "long")
+       (eq_attr "type" "load4,load8"))
+  "long_IF,long_E,memory*2")
+
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Expand prologue as RTL
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  m32r_expand_prologue ();
+  DONE;
+}")
+
+;; Expand epilogue as RTL
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  m32r_expand_epilogue ();
+  emit_jump_insn (gen_return_normal ());
+  DONE;
+}")
+
+;; Move instructions.
+;;
+;; For QI and HI moves, the register must contain the full properly
+;; sign-extended value.  nonzero_bits assumes this [otherwise
+;; SHORT_IMMEDIATES_SIGN_EXTEND must be used, but the comment for it
+;; says it's a kludge and the .md files should be fixed instead].
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], QImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.
+     Objects in the small data area are handled too.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (QImode, operands[1]);
+}")
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "move_dest_operand" "=r,r,r,r,r,T,m")
+	(match_operand:QI 1 "move_src_operand" "r,I,JQR,T,m,r,r"))]
+  "register_operand (operands[0], QImode) || register_operand (operands[1], QImode)"
+  "@
+   mv %0,%1
+   ldi %0,%#%1
+   ldi %0,%#%1
+   ldub %0,%1
+   ldub %0,%1
+   stb %1,%0
+   stb %1,%0"
+  [(set_attr "type" "int2,int2,int4,load2,load4,store2,store4")
+   (set_attr "length" "2,2,4,2,4,2,4")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], HImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (HImode, operands[1]);
+}")
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "move_dest_operand" "=r,r,r,r,r,r,T,m")
+	(match_operand:HI 1 "move_src_operand" "r,I,JQR,K,T,m,r,r"))]
+  "register_operand (operands[0], HImode) || register_operand (operands[1], HImode)"
+  "@
+   mv %0,%1
+   ldi %0,%#%1
+   ldi %0,%#%1
+   ld24 %0,%#%1
+   lduh %0,%1
+   lduh %0,%1
+   sth %1,%0
+   sth %1,%0"
+  [(set_attr "type" "int2,int2,int4,int4,load2,load4,store2,store4")
+   (set_attr "length" "2,2,4,4,2,4,2,4")])
+
+(define_expand "movsi_push"
+  [(set (mem:SI (pre_dec:SI (match_operand:SI 0 "register_operand" "")))
+	(match_operand:SI 1 "register_operand" ""))]
+  ""
+  "")
+
+(define_expand "movsi_pop"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mem:SI (post_inc:SI (match_operand:SI 1 "register_operand" ""))))]
+  ""
+  "")
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], SImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  /* Small Data Area reference?  */
+  if (small_data_operand (operands[1], SImode))
+    {
+      emit_insn (gen_movsi_sda (operands[0], operands[1]));
+      DONE;
+    }
+
+  /* If medium or large code model, symbols have to be loaded with
+     seth/add3.  */
+  if (addr32_operand (operands[1], SImode))
+    {
+      emit_insn (gen_movsi_addr32 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+;; ??? Do we need a const_double constraint here for large unsigned values?
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "move_dest_operand" "=r,r,r,r,r,r,r,r,r,T,S,m")
+	(match_operand:SI 1 "move_src_operand" "r,I,J,MQ,L,n,T,U,m,r,r,r"))]
+  "register_operand (operands[0], SImode) || register_operand (operands[1], SImode)"
+  "*
+{
+  if (REG_P (operands[0]) || GET_CODE (operands[1]) == SUBREG)
+    {
+      switch (GET_CODE (operands[1]))
+	{
+	  default:
+	    break;
+
+	  case REG:
+	  case SUBREG:
+	    return \"mv %0,%1\";
+
+	  case MEM:
+	    if (GET_CODE (XEXP (operands[1], 0)) == POST_INC
+		&& XEXP (XEXP (operands[1], 0), 0) == stack_pointer_rtx)
+	      return \"pop %0\";
+
+	    return \"ld %0,%1\";
+
+	  case CONST_INT:
+	    if (satisfies_constraint_J (operands[1]))
+	      return \"ldi %0,%#%1\\t; %X1\";
+
+	    if (satisfies_constraint_M (operands[1]))
+	      return \"ld24 %0,%#%1\\t; %X1\";
+
+	    if (satisfies_constraint_L (operands[1]))
+	      return \"seth %0,%#%T1\\t; %X1\";
+
+	    return \"#\";
+
+	  case CONST:
+	  case SYMBOL_REF:
+	  case LABEL_REF:
+	    if (TARGET_ADDR24)
+	      return \"ld24 %0,%#%1\";
+
+	    return \"#\";
+	}
+    }
+
+  else if (MEM_P (operands[0])
+	   && (REG_P (operands[1]) || GET_CODE (operands[1]) == SUBREG))
+    {
+      if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	  && XEXP (XEXP (operands[0], 0), 0) == stack_pointer_rtx)
+	return \"push %1\";
+
+      return \"st %1,%0\";
+    }
+
+  gcc_unreachable ();
+}"
+  [(set_attr "type" "int2,int2,int4,int4,int4,multi,load2,load2,load4,store2,store2,store4")
+   (set_attr "length" "2,2,4,4,4,8,2,2,4,2,2,4")])
+
+; Try to use a four byte / two byte pair for constants not loadable with
+; ldi, ld24, seth.
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+       (match_operand:SI 1 "two_insn_const_operand" ""))]
+  ""
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 3)))]
+  "
+{
+  unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
+  unsigned HOST_WIDE_INT tmp;
+  int shift;
+
+  /* In all cases we will emit two instructions.  However we try to
+     use 2 byte instructions wherever possible.  We can assume the
+     constant isn't loadable with any of ldi, ld24, or seth.  */
+
+  /* See if we can load a 24-bit unsigned value and invert it.  */
+  if (UINT24_P (~ val))
+    {
+      emit_insn (gen_movsi (operands[0], GEN_INT (~ val)));
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+      DONE;
+    }
+
+  /* See if we can load a 24-bit unsigned value and shift it into place.
+     0x01fffffe is just beyond ld24's range.  */
+  for (shift = 1, tmp = 0x01fffffe;
+       shift < 8;
+       ++shift, tmp <<= 1)
+    {
+      if ((val & ~tmp) == 0)
+	{
+	  emit_insn (gen_movsi (operands[0], GEN_INT (val >> shift)));
+	  emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (shift)));
+	  DONE;
+	}
+    }
+
+  /* Can't use any two byte insn, fall back to seth/or3.  Use ~0xffff instead
+     of 0xffff0000, since the later fails on a 64-bit host.  */
+  operands[2] = GEN_INT ((val) & ~0xffff);
+  operands[3] = GEN_INT ((val) & 0xffff);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "seth_add3_operand" ""))]
+  "TARGET_ADDR32"
+  [(set (match_dup 0)
+	(high:SI (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0)
+		   (match_dup 1)))]
+  "")
+
+;; Small data area support.
+;; The address of _SDA_BASE_ is loaded into a register and all objects in
+;; the small data area are indexed off that.  This is done for each reference
+;; but cse will clean things up for us.  We let the compiler choose the
+;; register to use so we needn't allocate (and maybe even fix) a special
+;; register to use.  Since the load and store insns have a 16-bit offset the
+;; total size of the data area can be 64K.  However, if the data area lives
+;; above 16M (24 bits), _SDA_BASE_ will have to be loaded with seth/add3 which
+;; would then yield 3 instructions to reference an object [though there would
+;; be no net loss if two or more objects were referenced].  The 3 insns can be
+;; reduced back to 2 if the size of the small data area were reduced to 32K
+;; [then seth + ld/st would work for any object in the area].  Doing this
+;; would require special handling of _SDA_BASE_ (its value would be
+;; (.sdata + 32K) & 0xffff0000) and reloc computations would be different
+;; [I think].  What to do about this is deferred until later and for now we
+;; require .sdata to be in the first 16M.
+
+(define_expand "movsi_sda"
+  [(set (match_dup 2)
+	(unspec:SI [(const_int 0)] UNSPEC_LOAD_SDA_BASE))
+   (set (match_operand:SI 0 "register_operand" "")
+	(lo_sum:SI (match_dup 2)
+		   (match_operand:SI 1 "small_data_operand" "")))]
+  ""
+  "
+{
+  if (reload_in_progress || reload_completed)
+    operands[2] = operands[0];
+  else
+    operands[2] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "*load_sda_base_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_LOAD_SDA_BASE))]
+  "TARGET_ADDR32"
+  "seth %0,%#shigh(_SDA_BASE_)\;add3 %0,%0,%#low(_SDA_BASE_)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "*load_sda_base"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_LOAD_SDA_BASE))]
+  ""
+  "ld24 %0,#_SDA_BASE_"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+;; 32-bit address support.
+
+(define_expand "movsi_addr32"
+  [(set (match_dup 2)
+	; addr32_operand isn't used because it's too restrictive,
+	; seth_add3_operand is more general and thus safer.
+	(high:SI (match_operand:SI 1 "seth_add3_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(lo_sum:SI (match_dup 2) (match_dup 1)))]
+  ""
+  "
+{
+  if (reload_in_progress || reload_completed)
+    operands[2] = operands[0];
+  else
+    operands[2] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "set_hi_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand 1 "symbolic_operand" "")))]
+  ""
+  "seth %0,%#shigh(%1)"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+(define_insn "lo_sum_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "in")))]
+  ""
+  "add3 %0,%1,%#%B2"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], DImode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (DImode, operands[1]);
+}")
+
+(define_insn "*movdi_insn"
+  [(set (match_operand:DI 0 "move_dest_operand" "=r,r,r,r,m")
+	(match_operand:DI 1 "move_double_src_operand" "r,nG,F,m,r"))]
+  "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)"
+  "#"
+  [(set_attr "type" "multi,multi,multi,load8,store8")
+   (set_attr "length" "4,4,16,6,6")])
+
+(define_split
+  [(set (match_operand:DI 0 "move_dest_operand" "")
+	(match_operand:DI 1 "move_double_src_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = gen_split_move_double (operands);")
+
+;; Floating point move insns.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], SFmode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (SFmode, operands[1]);
+}")
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,r,r,T,S,m")
+	(match_operand:SF 1 "move_src_operand" "r,F,U,S,m,r,r,r"))]
+  "register_operand (operands[0], SFmode) || register_operand (operands[1], SFmode)"
+  "@
+   mv %0,%1
+   #
+   ld %0,%1
+   ld %0,%1
+   ld %0,%1
+   st %1,%0
+   st %1,%0
+   st %1,%0"
+  ;; ??? Length of alternative 1 is either 2, 4 or 8.
+  [(set_attr "type" "int2,multi,load2,load2,load4,store2,store2,store4")
+   (set_attr "length" "2,8,2,2,4,2,2,4")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+  operands[3] = operand_subword (operands[1], 0, 0, SFmode);
+}")
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Fixup PIC cases.  */
+  if (flag_pic)
+    {
+      if (symbolic_operand (operands[1], DFmode))
+        {
+          if (reload_in_progress || reload_completed)
+            operands[1] = m32r_legitimize_pic_address (operands[1], operands[0]);
+          else
+            operands[1] = m32r_legitimize_pic_address (operands[1], NULL_RTX);
+        }
+    }
+
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (DFmode, operands[1]);
+}")
+
+(define_insn "*movdf_insn"
+  [(set (match_operand:DF 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:DF 1 "move_double_src_operand" "r,F,m,r"))]
+  "register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
+  "#"
+  [(set_attr "type" "multi,multi,load8,store8")
+   (set_attr "length" "4,16,6,6")])
+
+(define_split
+  [(set (match_operand:DF 0 "move_dest_operand" "")
+	(match_operand:DF 1 "move_double_src_operand" ""))]
+  "reload_completed"
+  [(match_dup 2)]
+  "operands[2] = gen_split_move_double (operands);")
+
+;; Zero extension instructions.
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(zero_extend:HI (match_operand:QI 1 "extend_operand" "r,T,m")))]
+  ""
+  "@
+   and3 %0,%1,%#255
+   ldub %0,%1
+   ldub %0,%1"
+  [(set_attr "type" "int4,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:QI 1 "extend_operand" "r,T,m")))]
+  ""
+  "@
+   and3 %0,%1,%#255
+   ldub %0,%1
+   ldub %0,%1"
+  [(set_attr "type" "int4,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI (match_operand:HI 1 "extend_operand" "r,T,m")))]
+  ""
+  "@
+   and3 %0,%1,%#65535
+   lduh %0,%1
+   lduh %0,%1"
+  [(set_attr "type" "int4,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+;; Signed conversions from a smaller integer to a larger integer
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(sign_extend:HI (match_operand:QI 1 "extend_operand" "0,T,m")))]
+  ""
+  "@
+    #
+    ldb %0,%1
+    ldb %0,%1"
+  [(set_attr "type" "multi,load2,load4")
+   (set_attr "length" "2,2,4")])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx op0   = gen_lowpart (SImode, operands[0]);
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (op0, op0, shift);
+  operands[3] = gen_ashrsi3 (op0, op0, shift);
+}")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:QI 1 "extend_operand" "0,T,m")))]
+  ""
+  "@
+    #
+    ldb %0,%1
+    ldb %0,%1"
+  [(set_attr "type" "multi,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx shift = GEN_INT (24);
+
+  operands[2] = gen_ashlsi3 (operands[0], operands[0], shift);
+  operands[3] = gen_ashrsi3 (operands[0], operands[0], shift);
+}")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:HI 1 "extend_operand" "0,T,m")))]
+  ""
+  "@
+    #
+    ldh %0,%1
+    ldh %0,%1"
+  [(set_attr "type" "multi,load2,load4")
+   (set_attr "length" "4,2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  "reload_completed"
+  [(match_dup 2)
+   (match_dup 3)]
+  "
+{
+  rtx shift = GEN_INT (16);
+
+  operands[2] = gen_ashlsi3 (operands[0], operands[0], shift);
+  operands[3] = gen_ashrsi3 (operands[0], operands[0], shift);
+}")
+
+;; Arithmetic instructions.
+
+; ??? Adding an alternative to split add3 of small constants into two
+; insns yields better instruction packing but slower code.  Adds of small
+; values is done a lot.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		 (match_operand:SI 2 "nonmemory_operand" "r,I,J")))]
+  ""
+  "@
+   add %0,%2
+   addi %0,%#%2
+   add3 %0,%1,%#%2"
+  [(set_attr "type" "int2,int2,int4")
+   (set_attr "length" "2,2,4")])
+
+;(define_split
+;  [(set (match_operand:SI 0 "register_operand" "")
+;	(plus:SI (match_operand:SI 1 "register_operand" "")
+;		 (match_operand:SI 2 "int8_operand" "")))]
+;  "reload_completed
+;   && REGNO (operands[0]) != REGNO (operands[1])
+;   && satisfies_constraint_I (operands[2])
+;   && INTVAL (operands[2]) != 0"
+;  [(set (match_dup 0) (match_dup 1))
+;   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+;  "")
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%0")
+		 (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+;; ??? The cmp clears the condition bit.  Can we speed up somehow?
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "reload_completed"
+  [(parallel [(set (reg:CC 17)
+		   (const_int 0))
+	      (use (match_dup 4))])
+   (parallel [(set (match_dup 4)
+		   (plus:SI (match_dup 4)
+			    (plus:SI (match_dup 5)
+				     (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])
+   (parallel [(set (match_dup 6)
+		   (plus:SI (match_dup 6)
+			    (plus:SI (match_dup 7)
+				     (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])]
+  "
+{
+  operands[4] = operand_subword (operands[0], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[5] = operand_subword (operands[2], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[6] = operand_subword (operands[0], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+  operands[7] = operand_subword (operands[2], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+}")
+
+(define_insn "*clear_c"
+  [(set (reg:CC 17)
+	(const_int 0))
+   (use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "cmp %0,%0"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "*add_carry"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0")
+		 (plus:SI (match_operand:SI 2 "register_operand" "r")
+			  (ne:SI (reg:CC 17) (const_int 0)))))
+   (set (reg:CC 17)
+	(unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))]
+  ""
+  "addx %0,%2"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub %0,%2"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0")
+		  (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+;; ??? The cmp clears the condition bit.  Can we speed up somehow?
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "register_operand" "")
+		  (match_operand:DI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "reload_completed"
+  [(parallel [(set (reg:CC 17)
+		   (const_int 0))
+	      (use (match_dup 4))])
+   (parallel [(set (match_dup 4)
+		   (minus:SI (match_dup 4)
+			     (minus:SI (match_dup 5)
+				       (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])
+   (parallel [(set (match_dup 6)
+		   (minus:SI (match_dup 6)
+			     (minus:SI (match_dup 7)
+				       (ne:SI (reg:CC 17) (const_int 0)))))
+	      (set (reg:CC 17)
+		   (unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))])]
+  "
+{
+  operands[4] = operand_subword (operands[0], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[5] = operand_subword (operands[2], (WORDS_BIG_ENDIAN != 0), 0, DImode);
+  operands[6] = operand_subword (operands[0], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+  operands[7] = operand_subword (operands[2], (WORDS_BIG_ENDIAN == 0), 0, DImode);
+}")
+
+(define_insn "*sub_carry"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "%0")
+		  (minus:SI (match_operand:SI 2 "register_operand" "r")
+			    (ne:SI (reg:CC 17) (const_int 0)))))
+   (set (reg:CC 17)
+	(unspec:CC [(const_int 0)] UNSPEC_SET_CBIT))]
+  ""
+  "subx %0,%2"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+; Multiply/Divide instructions.
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "mullo %1,%2\;mvfacmi %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "mul %0,%2"
+  [(set_attr "type" "mul2")
+   (set_attr "length" "2")])
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "div %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "divu %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mod:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "rem %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(umod:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "remu %0,%2"
+  [(set_attr "type" "div4")
+   (set_attr "length" "4")])
+
+;; Boolean instructions.
+;;
+;; We don't define the DImode versions as expand_binop does a good enough job.
+;; And if it doesn't it should be fixed.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "reg_or_uint16_operand" "r,K")))]
+  ""
+  "*
+{
+  /* If we are worried about space, see if we can break this up into two
+     short instructions, which might eliminate a NOP being inserted.  */
+  if (optimize_size
+      && m32r_not_same_reg (operands[0], operands[1])
+      && satisfies_constraint_I (operands[2]))
+    return \"#\";
+
+  else if (CONST_INT_P (operands[2]))
+    return \"and3 %0,%1,%#%X2\";
+
+  return \"and %0,%2\";
+}"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "int8_operand" "")))]
+  "optimize_size && m32r_not_same_reg (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "reg_or_uint16_operand" "r,K")))]
+  ""
+  "*
+{
+  /* If we are worried about space, see if we can break this up into two
+     short instructions, which might eliminate a NOP being inserted.  */
+  if (optimize_size
+      && m32r_not_same_reg (operands[0], operands[1])
+      && satisfies_constraint_I (operands[2]))
+    return \"#\";
+
+  else if (CONST_INT_P (operands[2]))
+    return \"or3 %0,%1,%#%X2\";
+
+  return \"or %0,%2\";
+}"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "int8_operand" "")))]
+  "optimize_size && m32r_not_same_reg (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "reg_or_uint16_operand" "r,K")))]
+  ""
+  "*
+{
+  /* If we are worried about space, see if we can break this up into two
+     short instructions, which might eliminate a NOP being inserted.  */
+  if (optimize_size
+      && m32r_not_same_reg (operands[0], operands[1])
+      && satisfies_constraint_I (operands[2]))
+    return \"#\";
+
+  else if (CONST_INT_P (operands[2]))
+    return \"xor3 %0,%1,%#%X2\";
+
+  return \"xor %0,%2\";
+}"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "int8_operand" "")))]
+  "optimize_size && m32r_not_same_reg (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg %0,%1"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "not %0,%1"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,0,r")
+		   (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
+  ""
+  "@
+   sll %0,%2
+   slli %0,%#%2
+   sll3 %0,%1,%#%2"
+  [(set_attr "type" "shift2,shift2,shift4")
+   (set_attr "length" "2,2,4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r")
+		     (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
+  ""
+  "@
+   sra %0,%2
+   srai %0,%#%2
+   sra3 %0,%1,%#%2"
+  [(set_attr "type" "shift2,shift2,shift4")
+   (set_attr "length" "2,2,4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,r")
+		     (match_operand:SI 2 "reg_or_uint16_operand" "r,O,K")))]
+  ""
+  "@
+   srl %0,%2
+   srli %0,%#%2
+   srl3 %0,%1,%#%2"
+  [(set_attr "type" "shift2,shift2,shift4")
+   (set_attr "length" "2,2,4")])
+
+;; Compare instructions.
+;; This controls RTL generation and register allocation.
+
+;; We generate RTL for comparisons and branches by having the cmpxx
+;; patterns store away the operands.  Then the bcc patterns
+;; emit RTL for both the compare and the branch.
+;;
+;; On the m32r it is more efficient to use the bxxz instructions and
+;; thus merge the compare and branch into one instruction, so they are
+;; preferred.
+
+(define_insn "cmp_eqsi_zero_insn"
+  [(set (reg:CC 17)
+        (eq:CC (match_operand:SI 0 "register_operand" "r,r")
+               (match_operand:SI 1 "reg_or_zero_operand" "r,P")))]
+  "TARGET_M32RX || TARGET_M32R2"
+  "@
+   cmpeq %0, %1
+   cmpz  %0"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+;; The cmp_xxx_insn patterns set the condition bit to the result of the
+;; comparison.  There isn't a "compare equal" instruction so cmp_eqsi_insn
+;; is quite inefficient.  However, it is rarely used.
+
+(define_insn "cmp_eqsi_insn"
+  [(set (reg:CC 17)
+        (eq:CC (match_operand:SI 0 "register_operand" "r,r")
+               (match_operand:SI 1 "reg_or_cmp_int16_operand" "r,P")))
+   (clobber (match_scratch:SI 2 "=&r,&r"))]
+  ""
+  "*
+{
+  if (which_alternative == 0)
+    {
+         return \"mv %2,%0\;sub %2,%1\;cmpui %2,#1\";
+    }
+  else
+    {
+        if (INTVAL (operands [1]) == 0)
+          return \"cmpui %0, #1\";
+        else if (REGNO (operands [2]) == REGNO (operands [0]))
+          return \"addi %0,%#%N1\;cmpui %2,#1\";
+        else
+          return \"add3 %2,%0,%#%N1\;cmpui %2,#1\";
+    }
+}"
+  [(set_attr "type" "multi,multi")
+   (set_attr "length" "8,8")])
+
+(define_insn "cmp_ltsi_insn"
+  [(set (reg:CC 17)
+        (lt:CC (match_operand:SI 0 "register_operand" "r,r")
+               (match_operand:SI 1 "reg_or_int16_operand" "r,J")))]
+  ""
+  "@
+   cmp %0,%1
+   cmpi %0,%#%1"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+(define_insn "cmp_ltusi_insn"
+  [(set (reg:CC 17)
+        (ltu:CC (match_operand:SI 0 "register_operand" "r,r")
+                (match_operand:SI 1 "reg_or_int16_operand" "r,J")))]
+  ""
+  "@
+   cmpu %0,%1
+   cmpui %0,%#%1"
+  [(set_attr "type" "int2,int4")
+   (set_attr "length" "2,4")])
+
+;; These control RTL generation for conditional jump insns.
+
+(define_expand "cbranchsi4"
+  ; the comparison is emitted by gen_compare if needed.
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "reg_or_cmp_int16_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[0] = gen_compare (GET_CODE (operands[0]), operands[1], operands[2], FALSE);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = XEXP (operands[0], 1);
+}")
+
+;; Now match both normal and inverted jump.
+
+(define_insn "*branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(reg 17) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  static char instruction[40];
+  sprintf (instruction, \"%s%s %%l0\",
+	   (GET_CODE (operands[1]) == NE) ? \"bc\" : \"bnc\",
+	   (get_attr_length (insn) == 2) ? \".s\" : \"\");
+  return instruction;
+}"
+  [(set_attr "type" "branch")
+   ; cf PR gcc/28508
+   ; We use 300/600 instead of 512,1024 to account for inaccurate insn
+   ; lengths and insn alignments that are complex to track.
+   ; It's not important that we be hyper-precise here.  It may be more
+   ; important blah blah blah when the chip supports parallel execution
+   ; blah blah blah but until then blah blah blah this is simple and
+   ; suffices.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 300))
+					   (const_int 600))
+				      (const_int 2)
+				      (const_int 4)))])
+
+(define_insn "*rev_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(reg 17) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ;"REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))"
+  ""
+  "*
+{
+  static char instruction[40];
+  sprintf (instruction, \"%s%s %%l0\",
+	   (GET_CODE (operands[1]) == EQ) ? \"bc\" : \"bnc\",
+	   (get_attr_length (insn) == 2) ? \".s\" : \"\");
+  return instruction;
+}"
+  [(set_attr "type" "branch")
+   ; cf PR gcc/28508
+   ; We use 300/600 instead of 512,1024 to account for inaccurate insn
+   ; lengths and insn alignments that are complex to track.
+   ; It's not important that we be hyper-precise here.  It may be more
+   ; important blah blah blah when the chip supports parallel execution
+   ; blah blah blah but until then blah blah blah this is simple and
+   ; suffices.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 300))
+					   (const_int 600))
+				      (const_int 2)
+				      (const_int 4)))])
+
+; reg/reg compare and branch insns
+
+(define_insn "*reg_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (match_operand:SI 3 "register_operand" "r")])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  /* Is branch target reachable with beq/bne?  */
+  if (get_attr_length (insn) == 4)
+    {
+      if (GET_CODE (operands[1]) == EQ)
+	return \"beq %2,%3,%l0\";
+      else
+	return \"bne %2,%3,%l0\";
+    }
+  else
+    {
+      if (GET_CODE (operands[1]) == EQ)
+	return \"bne %2,%3,1f\;bra %l0\;1:\";
+      else
+	return \"beq %2,%3,1f\;bra %l0\;1:\";
+    }
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+(define_insn "*rev_reg_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (match_operand:SI 3 "register_operand" "r")])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  /* Is branch target reachable with beq/bne?  */
+  if (get_attr_length (insn) == 4)
+    {
+      if (GET_CODE (operands[1]) == NE)
+	return \"beq %2,%3,%l0\";
+      else
+	return \"bne %2,%3,%l0\";
+    }
+  else
+    {
+      if (GET_CODE (operands[1]) == NE)
+	return \"bne %2,%3,1f\;bra %l0\;1:\";
+      else
+	return \"beq %2,%3,1f\;bra %l0\;1:\";
+    }
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+; reg/zero compare and branch insns
+
+(define_insn "*zero_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  const char *br,*invbr;
+  char asmtext[40];
+
+  switch (GET_CODE (operands[1]))
+    {
+      case EQ : br = \"eq\"; invbr = \"ne\"; break;
+      case NE : br = \"ne\"; invbr = \"eq\"; break;
+      case LE : br = \"le\"; invbr = \"gt\"; break;
+      case GT : br = \"gt\"; invbr = \"le\"; break;
+      case LT : br = \"lt\"; invbr = \"ge\"; break;
+      case GE : br = \"ge\"; invbr = \"lt\"; break;
+
+      default: gcc_unreachable ();
+    }
+
+  /* Is branch target reachable with bxxz?  */
+  if (get_attr_length (insn) == 4)
+    {
+      sprintf (asmtext, \"b%sz %%2,%%l0\", br);
+      output_asm_insn (asmtext, operands);
+    }
+  else
+    {
+      sprintf (asmtext, \"b%sz %%2,1f\;bra %%l0\;1:\", invbr);
+      output_asm_insn (asmtext, operands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+(define_insn "*rev_zero_branch_insn"
+  [(set (pc)
+	(if_then_else (match_operator 1 "eqne_comparison_operator"
+				      [(match_operand:SI 2 "register_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  const char *br,*invbr;
+  char asmtext[40];
+
+  switch (GET_CODE (operands[1]))
+    {
+      case EQ : br = \"eq\"; invbr = \"ne\"; break;
+      case NE : br = \"ne\"; invbr = \"eq\"; break;
+      case LE : br = \"le\"; invbr = \"gt\"; break;
+      case GT : br = \"gt\"; invbr = \"le\"; break;
+      case LT : br = \"lt\"; invbr = \"ge\"; break;
+      case GE : br = \"ge\"; invbr = \"lt\"; break;
+
+      default: gcc_unreachable ();
+    }
+
+  /* Is branch target reachable with bxxz?  */
+  if (get_attr_length (insn) == 4)
+    {
+      sprintf (asmtext, \"b%sz %%2,%%l0\", invbr);
+      output_asm_insn (asmtext, operands);
+    }
+  else
+    {
+      sprintf (asmtext, \"b%sz %%2,1f\;bra %%l0\;1:\", br);
+      output_asm_insn (asmtext, operands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "branch")
+  ; We use 25000/50000 instead of 32768/65536 to account for slot filling
+  ; which is complex to track and inaccurate length specs.
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 25000))
+					   (const_int 50000))
+				      (const_int 4)
+				      (const_int 8)))])
+
+;; S<cc> operations to set a register to 1/0 based on a comparison
+
+(define_expand "cstoresi4"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operator:SI 1 "ordered_comparison_operator"
+    [(match_operand:SI 2 "register_operand" "")
+     (match_operand:SI 3 "reg_or_cmp_int16_operand" "")])]
+  ""
+  "
+{
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+
+  if (!gen_cond_store (GET_CODE (operands[1]),
+		       operands[0], operands[2], operands[3]))
+    FAIL;
+
+  DONE;
+}")
+
+(define_insn "seq_insn_m32rx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "register_operand" "%r")
+	       (match_operand:SI 2 "reg_or_zero_operand" "rP")))
+   (clobber (reg:CC 17))]
+  "TARGET_M32RX || TARGET_M32R2"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_zero_operand" "")))
+   (clobber (reg:CC 17))]
+  "TARGET_M32RX || TARGET_M32R2"
+  [(set (reg:CC 17)
+	(eq:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "seq_zero_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC 17))]
+  "TARGET_M32R"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC 17))]
+  "TARGET_M32R"
+  [(match_dup 3)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  start_sequence ();
+  emit_insn (gen_cmp_ltusi_insn (op1, const1_rtx));
+  emit_insn (gen_movcc_insn (op0));
+  operands[3] = get_insns ();
+  end_sequence ();
+}")
+
+(define_insn "seq_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,??r,r")
+	(eq:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+	       (match_operand:SI 2 "reg_or_eq_int16_operand" "r,r,r,PK")))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 3 "=1,2,&r,r"))]
+  "TARGET_M32R"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8,8,10,10")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_eq_int16_operand" "")))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_M32R && reload_completed"
+  [(match_dup 4)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+  HOST_WIDE_INT value;
+
+  if (REG_P (op2) && REG_P (op3)
+      && REGNO (op2) == REGNO (op3))
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+
+  start_sequence ();
+  if (REG_P (op1) && REG_P (op3)
+      && REGNO (op1) != REGNO (op3))
+    {
+      emit_move_insn (op3, op1);
+      op1 = op3;
+    }
+
+  if (satisfies_constraint_P (op2) && (value = INTVAL (op2)) != 0)
+    emit_insn (gen_addsi3 (op3, op1, GEN_INT (-value)));
+  else
+    emit_insn (gen_xorsi3 (op3, op1, op2));
+
+  emit_insn (gen_cmp_ltusi_insn (op3, const1_rtx));
+  emit_insn (gen_movcc_insn (op0));
+  operands[4] = get_insns ();
+  end_sequence ();
+}")
+
+(define_insn "sne_zero_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ne:SI (match_operand:SI 1 "register_operand" "")
+	       (const_int 0)))
+   (clobber (reg:CC 17))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(const_int 0))
+   (set (reg:CC 17)
+	(ltu:CC (match_dup 2)
+		(match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "slt_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(lt:SI (match_operand:SI 1 "register_operand" "r,r")
+	       (match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4,6")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lt:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "sle_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(le:SI (match_operand:SI 1 "register_operand" "r")
+	       (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(le:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 2)
+	       (match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(le:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 2)
+	       (match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "sge_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ge:SI (match_operand:SI 1 "register_operand" "r,r")
+	       (match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8,10")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ge:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ge:SI (match_operand:SI 1 "register_operand" "")
+	       (match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(lt:CC (match_dup 1)
+	       (match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "sltu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ltu:SI (match_operand:SI 1 "register_operand" "r,r")
+		(match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "6,8")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ltu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  "")
+
+(define_insn "sleu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(leu:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(leu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 2)
+		(match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(leu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 2)
+		(match_dup 1)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "sgeu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(geu:SI (match_operand:SI 1 "register_operand" "r,r")
+		(match_operand:SI 2 "reg_or_int16_operand" "r,J")))
+   (clobber (reg:CC 17))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8,10")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(geu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "!optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(xor:SI (match_dup 0)
+		(const_int 1)))]
+  "")
+
+;; If optimizing for space, use -(reg - 1) to invert the comparison rather than
+;; xor reg,reg,1 which might eliminate a NOP being inserted.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(geu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_int16_operand" "")))
+   (clobber (reg:CC 17))]
+  "optimize_size"
+  [(set (reg:CC 17)
+	(ltu:CC (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(ne:SI (reg:CC 17) (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (match_dup 0)
+	(neg:SI (match_dup 0)))]
+  "")
+
+(define_insn "movcc_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  ""
+  "mvfc %0, cbr"
+  [(set_attr "type" "misc")
+   (set_attr "length" "2")])
+
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bra %l0"
+  [(set_attr "type" "uncond_branch")
+   (set (attr "length") (if_then_else (ltu (plus (minus (match_dup 0) (pc))
+						 (const_int 400))
+					   (const_int 800))
+				      (const_int 2)
+				      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  ""
+  "jmp %a0"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_insn "return_lr"
+  [(parallel [(return) (use (reg:SI 14))])]
+  ""
+  "jmp lr"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_insn "return_rte"
+  [(return)]
+  ""
+  "rte"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_expand "return"
+  [(return)]
+  "direct_return ()"
+  "
+{
+  emit_jump_insn (gen_return_lr ());
+  DONE;
+}")
+
+(define_expand "return_normal"
+  [(return)]
+  "!direct_return ()"
+  "
+{
+  enum m32r_function_type fn_type;
+
+  fn_type = m32r_compute_function_type (current_function_decl);
+  if (M32R_INTERRUPT_P (fn_type))
+    {
+      emit_jump_insn (gen_return_rte ());
+      DONE;
+    }
+
+  emit_jump_insn (gen_return_lr ());
+  DONE;
+}")
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
+              (use (label_ref (match_operand 1 "" "")))])]
+  ""
+  "
+{
+  /* In pic mode, our address differences are against the base of the
+     table.  Add that base value back in; CSE ought to be able to combine
+     the two address loads.  */
+  if (flag_pic)
+    {
+      rtx tmp, tmp2;
+
+      tmp = gen_rtx_LABEL_REF (Pmode, operands[1]);
+      tmp2 = operands[0];
+      tmp = gen_rtx_PLUS (Pmode, tmp2, tmp);
+      operands[0] = memory_address (Pmode, tmp);
+    }
+}")
+
+(define_insn "*tablejump_insn"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp %a0"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "length" "2")])
+
+(define_expand "call"
+  ;; operands[1] is stack_size_rtx
+  ;; operands[2] is next_arg_register
+  [(parallel [(call (match_operand:SI 0 "call_operand" "")
+		    (match_operand 1 "" ""))
+	     (clobber (reg:SI 14))])]
+  ""
+  "
+{
+  if (flag_pic)
+    crtl->uses_pic_offset_table = 1;
+}")
+
+(define_insn "*call_via_reg"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 14))]
+  ""
+  "jl %0"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*call_via_label"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" ""))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 14))]
+  ""
+  "*
+{
+  int call26_p = call26_operand (operands[0], FUNCTION_MODE);
+
+  if (! call26_p)
+    {
+      /* We may not be able to reach with a `bl' insn so punt and leave it to
+	 the linker.
+	 We do this here, rather than doing a force_reg in the define_expand
+	 so these insns won't be separated, say by scheduling, thus simplifying
+	 the linker.  */
+      return \"seth r14,%T0\;add3 r14,r14,%B0\;jl r14\";
+    }
+  else
+    return \"bl %0\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (eq (symbol_ref "call26_operand (operands[0], FUNCTION_MODE)")
+			  (const_int 0))
+		      (const_int 12) ; 10 + 2 for nop filler
+		      ; The return address must be on a 4 byte boundary so
+		      ; there's no point in using a value of 2 here.  A 2 byte
+		      ; insn may go in the left slot but we currently can't
+		      ; use such knowledge.
+		      (const_int 4)))])
+
+(define_expand "call_value"
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  [(parallel [(set (match_operand 0 "register_operand" "=r")
+		   (call (match_operand:SI 1 "call_operand" "")
+			 (match_operand 2 "" "")))
+	     (clobber (reg:SI 14))])]
+  ""
+  "
+{
+  if (flag_pic)
+    crtl->uses_pic_offset_table = 1;
+}")
+
+(define_insn "*call_value_via_reg"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 14))]
+  ""
+  "jl %1"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")])
+
+(define_insn "*call_value_via_label"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "call_address_operand" ""))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 14))]
+  ""
+  "*
+{
+  int call26_p = call26_operand (operands[1], FUNCTION_MODE);
+
+  if (flag_pic)
+    crtl->uses_pic_offset_table = 1;
+
+  if (! call26_p)
+    {
+      /* We may not be able to reach with a `bl' insn so punt and leave it to
+	 the linker.
+	 We do this here, rather than doing a force_reg in the define_expand
+	 so these insns won't be separated, say by scheduling, thus simplifying
+	 the linker.  */
+      return \"seth r14,%T1\;add3 r14,r14,%B1\;jl r14\";
+    }
+  else
+    return \"bl %1\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(if_then_else (eq (symbol_ref "call26_operand (operands[1], FUNCTION_MODE)")
+			  (const_int 0))
+		      (const_int 12) ; 10 + 2 for nop filler
+		      ; The return address must be on a 4 byte boundary so
+		      ; there's no point in using a value of 2 here.  A 2 byte
+		      ; insn may go in the left slot but we currently can't
+		      ; use such knowledge.
+		      (const_int 4)))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "int2")
+   (set_attr "length" "2")])
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  "")
+
+;; Special pattern to flush the icache.
+
+(define_insn "flush_icache"
+  [(unspec_volatile [(match_operand 0 "memory_operand" "m")]
+		    UNSPECV_FLUSH_ICACHE)
+   (match_operand 1 "" "")
+   (clobber (reg:SI 17))]
+  ""
+  "* return \"trap %#%1 ; flush-icache\";"
+  [(set_attr "type" "int4")
+   (set_attr "length" "4")])
+
+;; Speed up fabs and provide correct sign handling for -0
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(abs:DF (match_operand:DF 1 "register_operand" "0")))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(abs:DF (match_operand:DF 1 "register_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(ashift:SI (match_dup 2)
+		   (const_int 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 1)))]
+  "operands[2] = gen_highpart (SImode, operands[0]);")
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "0")))]
+  ""
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(abs:SF (match_operand:SF 1 "register_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(ashift:SI (match_dup 2)
+		   (const_int 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 1)))]
+  "operands[2] = gen_highpart (SImode, operands[0]);")
+
+;; Conditional move instructions
+;; Based on those done for the d10v
+
+(define_expand "movsicc"
+  [
+   (set (match_operand:SI 0 "register_operand" "r")
+	(if_then_else:SI (match_operand 1 "" "")
+			 (match_operand:SI 2 "conditional_move_operand" "O")
+			 (match_operand:SI 3 "conditional_move_operand" "O")
+        )
+   )
+  ]
+  ""
+  "
+{
+  if (! zero_and_one (operands [2], operands [3]))
+    FAIL;
+
+  /* Generate the comparison that will set the carry flag.  */
+  operands[1] = gen_compare (GET_CODE (operands[1]), XEXP (operands[1], 0),
+			     XEXP (operands[1], 1), TRUE);
+
+  /* See other movsicc pattern below for reason why.  */
+  emit_insn (gen_blockage ());
+}")
+
+;; Generate the conditional instructions based on how the carry flag is examined.
+(define_insn "*movsicc_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI (match_operand 1 "carry_compare_operand" "")
+			 (match_operand:SI 2 "conditional_move_operand" "O")
+			 (match_operand:SI 3 "conditional_move_operand" "O")
+        )
+   )]
+  "zero_and_one (operands [2], operands[3])"
+  "* return emit_cond_move (operands, insn);"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")
+  ]
+)
+
+
+;; Block moves, see m32r.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand" "")
+		   (match_operand:BLK 1 "general_operand" ""))
+	      (use (match_operand:SI  2 "immediate_operand" ""))
+	      (use (match_operand:SI  3 "immediate_operand" ""))])]
+  ""
+  "
+{
+  if (operands[0])		/* Avoid unused code messages.  */
+    {
+     if (m32r_expand_block_move (operands))
+       DONE;
+     else
+       FAIL;
+    }
+}")
+
+;; Insn generated by block moves
+
+(define_insn "movmemsi_internal"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r"))	;; destination
+	(mem:BLK (match_operand:SI 1 "register_operand" "r")))	;; source
+   (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move
+   (set (match_operand:SI 3 "register_operand" "=0")
+	(plus:SI (minus (match_dup 2) (const_int 4))
+	         (match_dup 0)))
+   (set (match_operand:SI 4 "register_operand" "=1")
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))
+   (clobber (match_scratch:SI 5 "=&r"))  ;; temp1
+   (clobber (match_scratch:SI 6 "=&r"))] ;; temp2
+  ""
+  "* m32r_output_block_move (insn, operands); return \"\"; "
+  [(set_attr "type"	"store8")
+   (set_attr "length"	"72")]) ;; Maximum
+
+;; PIC
+
+/* When generating pic, we need to load the symbol offset into a register.
+   So that the optimizer does not confuse this with a normal symbol load
+   we use an unspec.  The offset will be loaded from a constant pool entry,
+   since that is the only type of relocation we can use.  */
+
+(define_insn "pic_load_addr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_PIC_LOAD_ADDR))]
+  "flag_pic"
+  "ld24 %0,%#%1"
+  [(set_attr "type" "int4")])
+
+(define_insn "gotoff_load_addr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "" "")] UNSPEC_GOTOFF))]
+  "flag_pic"
+  "seth %0, %#shigh(%1@GOTOFF)\;add3 %0, %0, low(%1@GOTOFF)"
+  [(set_attr "type" 	"int4")
+   (set_attr "length"	"8")])
+
+;; Load program counter insns.
+
+(define_insn "get_pc"
+  [(clobber (reg:SI 14))
+   (set (match_operand 0 "register_operand" "=r,r")
+        (unspec [(match_operand 1 "" "")] UNSPEC_GET_PC))
+   (use (match_operand:SI 2 "immediate_operand" "W,i"))]
+  "flag_pic"
+  "@
+   bl.s .+4\;seth %0,%#shigh(%1)\;add3 %0,%0,%#low(%1+4)\;add %0,lr
+   bl.s .+4\;ld24 %0,%#%1\;add %0,lr"
+  [(set_attr "length" "12,8")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+  "
+{
+  m32r_load_pic_register ();
+  DONE;
+}")
diff --git a/gcc/config/m32r/m32r.opt b/gcc/config/m32r/m32r.opt
new file mode 100644
index 000000000..a1aa96668
--- /dev/null
+++ b/gcc/config/m32r/m32r.opt
@@ -0,0 +1,82 @@
+; Options for the Renesas M32R port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m32rx
+Target Report RejectNegative Mask(M32RX)
+Compile for the m32rx
+
+m32r2
+Target Report RejectNegative Mask(M32R2)
+Compile for the m32r2
+
+m32r
+Target RejectNegative
+Compile for the m32r
+
+malign-loops
+Target Report Mask(ALIGN_LOOPS)
+Align all loops to 32 byte boundary
+
+mbranch-cost=1
+Target Report RejectNegative Mask(BRANCH_COST)
+Prefer branches over conditional execution
+
+mbranch-cost=2
+Target Report RejectNegative InverseMask(BRANCH_COST)
+Give branches their default cost
+
+mdebug
+Target Mask(DEBUG)
+Display compile time statistics
+
+mflush-func=
+Target RejectNegative Joined Var(m32r_cache_flush_func) Init(CACHE_FLUSH_FUNC)
+Specify cache flush function
+
+mflush-trap=
+Target RejectNegative Joined UInteger Var(m32r_cache_flush_trap) Init(CACHE_FLUSH_TRAP)
+Specify cache flush trap number
+
+missue-rate=1
+Target Report RejectNegative Mask(LOW_ISSUE_RATE)
+Only issue one instruction per cycle
+
+missue-rate=2
+Target Report RejectNegative InverseMask(LOW_ISSUE_RATE)
+Allow two instructions to be issued per cycle
+
+mmodel=
+Target RejectNegative Joined
+Code size: small, medium or large
+
+mno-flush-func
+Target RejectNegative
+Don't call any cache flush functions
+
+mno-flush-trap
+Target RejectNegative
+Don't call any cache flush trap
+
+; mrelax
+; Target Mask(RELAX)
+
+msdata=
+Target RejectNegative Joined
+Small data area: none, sdata, use
diff --git a/gcc/config/m32r/predicates.md b/gcc/config/m32r/predicates.md
new file mode 100644
index 000000000..4b3c5fea1
--- /dev/null
+++ b/gcc/config/m32r/predicates.md
@@ -0,0 +1,440 @@
+;; Predicate definitions for Renesas M32R.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is a register or the constant 0.
+
+(define_predicate "reg_or_zero_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+
+  if (!CONST_INT_P (op))
+    return 0;
+
+  return INTVAL (op) == 0;
+})
+
+;; Return nonzero if the operand is suitable for use in a conditional
+;; move sequence.
+
+(define_predicate "conditional_move_operand"
+  (match_code "reg,subreg,const_int")
+{
+  /* Only defined for simple integers so far...  */
+  if (mode != SImode && mode != HImode && mode != QImode)
+    return FALSE;
+
+  /* At the moment we can handle moving registers and loading constants.  */
+  /* To be added: Addition/subtraction/bitops/multiplication of registers.  */
+
+  switch (GET_CODE (op))
+    {
+    case REG:
+      return 1;
+
+    case CONST_INT:
+      return satisfies_constraint_I (op);
+
+    default:
+#if 0
+      fprintf (stderr, "Test for cond move op of type: %s\n",
+	       GET_RTX_NAME (GET_CODE (op)));
+#endif
+      return 0;
+    }
+})
+
+;; Return true if the code is a test of the carry bit.
+
+(define_predicate "carry_compare_operand"
+  (match_code "eq,ne")
+{
+  rtx x;
+
+  if (GET_MODE (op) != CCmode && GET_MODE (op) != VOIDmode)
+    return FALSE;
+
+  if (GET_CODE (op) != NE && GET_CODE (op) != EQ)
+    return FALSE;
+
+  x = XEXP (op, 0);
+  if (!REG_P (x) || REGNO (x) != CARRY_REGNUM)
+    return FALSE;
+
+  x = XEXP (op, 1);
+  if (!CONST_INT_P (x) || INTVAL (x) != 0)
+    return FALSE;
+
+  return TRUE;
+})
+
+;; Return 1 if OP is an EQ or NE comparison operator.
+
+(define_predicate "eqne_comparison_operator"
+  (match_code "eq,ne")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == EQ || code == NE);
+})
+
+;; Return 1 if OP is a signed comparison operator.
+
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,lt,le,gt,ge")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (COMPARISON_P (op)
+  	  && (code == EQ || code == NE
+	      || code == LT || code == LE || code == GT || code == GE));
+})
+
+;; Return true if OP is an acceptable argument for a move destination.
+
+(define_predicate "move_dest_operand"
+  (match_code "reg,subreg,mem")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (MEM_P (SUBREG_REG (op)))
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      if (GET_CODE (XEXP (op, 0)) == POST_INC)
+	return 0;		/* stores can't do post inc */
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is an acceptable argument for a single word move
+;; source.
+
+(define_predicate "move_src_operand"
+  (match_code "reg,subreg,mem,const_int,const_double,label_ref,const,symbol_ref")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF :
+    case SYMBOL_REF :
+    case CONST :
+      return addr24_operand (op, mode);
+    case CONST_INT :
+      /* ??? We allow more cse opportunities if we only allow constants
+	 loadable with one insn, and split the rest into two.  The instances
+	 where this would help should be rare and the current way is
+	 simpler.  */
+      if (HOST_BITS_PER_WIDE_INT > 32)
+	{
+	  HOST_WIDE_INT rest = INTVAL (op) >> 31;
+	  return (rest == 0 || rest == -1);
+	}
+      else
+	return 1;
+    case CONST_DOUBLE :
+      if (mode == SFmode)
+	return 1;
+      else if (mode == SImode)
+	{
+	  /* Large unsigned constants are represented as const_double's.  */
+	  unsigned HOST_WIDE_INT low, high;
+
+	  low = CONST_DOUBLE_LOW (op);
+	  high = CONST_DOUBLE_HIGH (op);
+	  return high == 0 && low <= (unsigned) 0xffffffff;
+	}
+      else
+	return 0;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (MEM_P (SUBREG_REG (op)))
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      if (GET_CODE (XEXP (op, 0)) == PRE_INC
+	  || GET_CODE (XEXP (op, 0)) == PRE_DEC)
+	return 0;		/* loads can't do pre-{inc,dec} */
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is an acceptable argument for a double word move
+;; source.
+
+(define_predicate "move_double_src_operand"
+  (match_code "reg,subreg,mem,const_int,const_double")
+{
+  switch (GET_CODE (op))
+    {
+    case CONST_INT :
+    case CONST_DOUBLE :
+      return 1;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (MEM_P (SUBREG_REG (op)))
+	return move_double_src_operand (SUBREG_REG (op), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      /* Disallow auto inc/dec for now.  */
+      if (GET_CODE (XEXP (op, 0)) == PRE_DEC
+	  || GET_CODE (XEXP (op, 0)) == PRE_INC)
+	return 0;
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+})
+
+;; Return true if OP is a const_int requiring two instructions to
+;; load.
+
+(define_predicate "two_insn_const_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  if (satisfies_constraint_J (op)
+      || satisfies_constraint_M (op)
+      || satisfies_constraint_L (op))
+    return 0;
+  return 1;
+})
+
+;; Returns 1 if OP is a symbol reference.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST :
+      return 1;
+
+    default:
+      return 0;
+    }
+})
+
+;; Return true if OP is a signed 8-bit immediate value.
+
+(define_predicate "int8_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_I (op);
+})
+
+;; Return true if OP is an unsigned 16-bit immediate value.
+
+(define_predicate "uint16_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_K (op);
+})
+
+;; Return true if OP is a register or signed 16-bit value.
+
+(define_predicate "reg_or_int16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_J (op);
+})
+
+;; Return true if OP is a register or an unsigned 16-bit value.
+
+(define_predicate "reg_or_uint16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_K (op);
+})
+
+;; Return true if OP is a register or signed 16-bit value for
+;; compares.
+
+(define_predicate "reg_or_cmp_int16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_P (op);
+})
+
+;; Return true if OP is a register or an integer value that can be
+;; used is SEQ/SNE.  We can use either XOR of the value or ADD of the
+;; negative of the value for the constant.  Don't allow 0, because
+;; that is special cased.
+
+(define_predicate "reg_or_eq_int16_operand"
+  (match_code "reg,subreg,const_int")
+{
+  HOST_WIDE_INT value;
+
+  if (REG_P (op) || GET_CODE (op) == SUBREG)
+    return register_operand (op, mode);
+
+  if (!CONST_INT_P (op))
+    return 0;
+
+  value = INTVAL (op);
+  return (value != 0) && (UINT16_P (value) || CMP_INT16_P (-value));
+})
+
+;; Return true if OP is a signed 16-bit immediate value useful in
+;; comparisons.
+
+(define_predicate "cmp_int16_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op))
+    return 0;
+  return satisfies_constraint_P (op);
+})
+
+;; Acceptable arguments to the call insn.
+
+(define_predicate "call_address_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  return symbolic_operand (op, mode);
+
+/* Constants and values in registers are not OK, because
+   the m32r BL instruction can only support PC relative branching.  */
+})
+
+;; Return true if OP is an acceptable input argument for a zero/sign
+;; extend operation.
+
+(define_predicate "extend_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx addr;
+
+  switch (GET_CODE (op))
+    {
+    case REG :
+    case SUBREG :
+      return register_operand (op, mode);
+
+    case MEM :
+      addr = XEXP (op, 0);
+      if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	return 0;		/* loads can't do pre inc/pre dec */
+
+      return address_operand (addr, mode);
+
+    default :
+      return 0;
+    }
+})
+
+;; Return nonzero if the operand is an insn that is a small
+;; insn. Allow const_int 0 as well, which is a placeholder for NOP
+;; slots.
+
+(define_predicate "small_insn_p"
+  (match_code "insn,call_insn,jump_insn")
+{
+  if (CONST_INT_P (op) && INTVAL (op) == 0)
+    return 1;
+
+  if (! INSN_P (op))
+    return 0;
+
+  return get_attr_length (op) == 2;
+})
+
+;; Return true if op is an integer constant, less than or equal to
+;; MAX_MOVE_BYTES.
+
+(define_predicate "m32r_block_immediate_operand"
+  (match_code "const_int")
+{
+  if (!CONST_INT_P (op)
+      || INTVAL (op) > MAX_MOVE_BYTES
+      || INTVAL (op) <= 0)
+    return 0;
+
+  return 1;
+})
+
+;; Return nonzero if the operand is an insn that is a large insn.
+
+(define_predicate "large_insn_p"
+  (match_code "insn,call_insn,jump_insn")
+{
+  if (! INSN_P (op))
+    return 0;
+
+  return get_attr_length (op) != 2;
+})
+
+;; Returns 1 if OP is an acceptable operand for seth/add3.
+
+(define_predicate "seth_add3_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  if (flag_pic)
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF
+      || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+      && satisfies_constraint_J (XEXP (XEXP (op, 0), 1)))
+    return 1;
+
+  return 0;
+})
diff --git a/gcc/config/m32r/rtems.h b/gcc/config/m32r/rtems.h
new file mode 100644
index 000000000..add53f1d4
--- /dev/null
+++ b/gcc/config/m32r/rtems.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a M32R using ELF.
+   Copyright (C) 2009, Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc/config/m32r/t-linux b/gcc/config/m32r/t-linux
new file mode 100644
index 000000000..dc8c8c4d2
--- /dev/null
+++ b/gcc/config/m32r/t-linux
@@ -0,0 +1,57 @@
+# Copyright (C) 2003, 2004, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# lib1funcs.asm is currently empty.
+CROSS_LIBGCC1 =
+
+# These are really part of libgcc1, but this will cause them to be
+# built correctly, so...
+
+LIB2FUNCS_EXTRA = fp-bit.c dp-bit.c
+
+# Turn off the SDA while compiling libgcc2.  There are no headers for it
+# and we want maximal upward compatibility here.
+
+TARGET_LIBGCC2_CFLAGS = -G 0 -fPIC
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+# We need to use -fpic when we are using gcc to compile the routines in
+# initfini.c.  This is only really needed when we are going to use gcc/g++
+# to produce a shared library, but since we don't know ahead of time when
+# we will be doing that, we just always use -fpic when compiling the
+# routines in initfini.c.
+# -fpic currently isn't supported for the m32r.
+
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+# Don't install "assert.h" in gcc. We use the one in glibc.
+INSTALL_ASSERT_H =
+ 
+# Do not build libgcc1. Let gcc generate those functions. The GNU/Linux
+# C library can handle them.
+LIBGCC1 = 
+CROSS_LIBGCC1 =
+LIBGCC1_TEST =
+
+SHLIB_MAPFILES += $(srcdir)/config/m32r/libgcc-glibc.ver
diff --git a/gcc/config/m32r/t-m32r b/gcc/config/m32r/t-m32r
new file mode 100644
index 000000000..17e1e3145
--- /dev/null
+++ b/gcc/config/m32r/t-m32r
@@ -0,0 +1,82 @@
+# Copyright (C) 1997, 1998, 1999, 2001, 2003 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+# Turn off the SDA while compiling libgcc2.  There are no headers for it
+# and we want maximal upward compatibility here.
+
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+# We need to use -fpic when we are using gcc to compile the routines in
+# initfini.c.  This is only really needed when we are going to use gcc/g++
+# to produce a shared library, but since we don't know ahead of time when
+# we will be doing that, we just always use -fpic when compiling the
+# routines in initfini.c.
+# -fpic currently isn't supported for the m32r.
+
+CRTSTUFF_T_CFLAGS =
+
+# .init/.fini section routines
+
+$(T)crtinit.o: $(srcdir)/config/m32r/initfini.c $(GCC_PASSES) $(CONFIG_H)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) \
+	$(CRTSTUFF_T_CFLAGS) $(INCLUDES) -DCRT_INIT \
+	-finhibit-size-directive -fno-inline-functions -g0 \
+	-mmodel=medium -c $(srcdir)/config/m32r/initfini.c \
+	-o $(T)crtinit.o 
+
+$(T)crtfini.o: $(srcdir)/config/m32r/initfini.c $(GCC_PASSES) $(CONFIG_H)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) \
+	$(CRTSTUFF_T_CFLAGS) $(INCLUDES) -DCRT_FINI \
+	-finhibit-size-directive -fno-inline-functions -g0 \
+	-mmodel=medium -c $(srcdir)/config/m32r/initfini.c \
+	-o $(T)crtfini.o 
+m32rx:
+	mkdir $@
+m32r2:
+	mkdir $@
+
+# -mmodel={small,medium} requires separate libraries.
+# We don't build libraries for the large model, instead we use the medium
+# libraries.  The only difference is that the large model can handle jumps
+# more than 26 signed bits away.
+
+MULTILIB_OPTIONS = mmodel=small/mmodel=medium m32r/m32rx/m32r2 
+MULTILIB_DIRNAMES = small medium m32r m32rx m32r2
+MULTILIB_MATCHES = mmodel?medium=mmodel?large
+
+# Set MULTILIB_EXTRA_OPTS so shipped libraries have small data in .sdata and
+# SHN_M32R_SCOMMON.
+# This is important for objects referenced in system header files.
+MULTILIB_EXTRA_OPTS = msdata=sdata
+
+EXTRA_MULTILIB_PARTS = crtinit.o crtfini.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/m68hc11/larith.asm b/gcc/config/m68hc11/larith.asm
new file mode 100644
index 000000000..09f946cbf
--- /dev/null
+++ b/gcc/config/m68hc11/larith.asm
@@ -0,0 +1,1333 @@
+/* libgcc routines for M68HC11 & M68HC12.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2008, 2009
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __HAVE_SHORT_INT__
+	.mode mshort
+#else
+	.mode mlong
+#endif
+
+	.macro declare_near name
+	.globl \name
+	.type  \name,@function
+	.size  \name,.Lend-\name
+\name:
+	.endm
+
+#if defined(__USE_RTC__)
+# define ARG(N) N+1
+
+	.macro ret
+#if defined(mc68hc12)
+	rtc
+#else
+	jmp __return_32
+#endif
+	.endm
+
+	.macro declare name
+	.globl \name
+	.type  \name,@function
+	.size  \name,.Lend-\name
+	.far   \name
+\name:
+	.endm
+
+	.macro farsym name
+	.far NAME
+	.endm
+
+#else
+# define ARG(N) N
+
+	.macro ret
+	rts
+	.endm
+
+	.macro farsym name
+	.endm
+
+	.macro declare name
+	.globl \name
+	.type  \name,@function
+	.size  \name,.Lend-\name
+\name:
+	.endm
+
+#endif
+
+	.sect .text
+	
+
+#define REG(NAME)			\
+NAME:	.dc.w	1;			\
+	.type NAME,@object ;		\
+	.size NAME,2
+
+#ifdef L_regs_min
+/* Pseudo hard registers used by gcc.
+   They should be located in page0.  */
+
+	.sect .softregs
+	.globl _.tmp
+	.globl _.z,_.xy
+REG(_.tmp)
+REG(_.z)
+REG(_.xy)
+
+#endif
+
+#ifdef L_regs_frame
+	.sect .softregs
+	.globl _.frame
+REG(_.frame)
+#endif
+
+#ifdef L_regs_d1_2
+	.sect .softregs
+	.globl _.d1,_.d2
+REG(_.d1)
+REG(_.d2)
+#endif
+
+#ifdef L_regs_d3_4
+	.sect .softregs
+	.globl _.d3,_.d4
+REG(_.d3)
+REG(_.d4)
+#endif
+
+#ifdef L_regs_d5_6
+	.sect .softregs
+	.globl _.d5,_.d6
+REG(_.d5)
+REG(_.d6)
+#endif
+
+#ifdef L_regs_d7_8
+	.sect .softregs
+	.globl _.d7,_.d8
+REG(_.d7)
+REG(_.d8)
+#endif
+
+#ifdef L_regs_d9_16
+/* Pseudo hard registers used by gcc.
+   They should be located in page0.  */
+	.sect .softregs
+	.globl _.d9,_.d10,_.d11,_.d12,_.d13,_.d14
+	.globl _.d15,_.d16
+REG(_.d9)
+REG(_.d10)
+REG(_.d11)
+REG(_.d12)
+REG(_.d13)
+REG(_.d14)
+REG(_.d15)
+REG(_.d16)
+
+#endif
+
+#ifdef L_regs_d17_32
+/* Pseudo hard registers used by gcc.
+   They should be located in page0.  */
+	.sect .softregs
+	.globl _.d17,_.d18,_.d19,_.d20,_.d21,_.d22
+	.globl _.d23,_.d24,_.d25,_.d26,_.d27,_.d28
+	.globl _.d29,_.d30,_.d31,_.d32
+REG(_.d17)
+REG(_.d18)
+REG(_.d19)
+REG(_.d20)
+REG(_.d21)
+REG(_.d22)
+REG(_.d23)
+REG(_.d24)
+REG(_.d25)
+REG(_.d26)
+REG(_.d27)
+REG(_.d28)
+REG(_.d29)
+REG(_.d30)
+REG(_.d31)
+REG(_.d32)
+#endif
+
+#ifdef L_premain
+;;
+;; Specific initialization for 68hc11 before the main.
+;; Nothing special for a generic routine; Just enable interrupts.
+;;
+	declare_near	__premain
+	clra
+	tap	; Clear both I and X.
+	rts
+#endif
+
+#ifdef L__exit
+;;
+;; Exit operation.  Just loop forever and wait for interrupts.
+;; (no other place to go)
+;; This operation is split in several pieces collected together by
+;; the linker script.  This allows to support destructors at the
+;; exit stage while not impacting program sizes when there is no
+;; destructors.
+;;
+;; _exit:
+;;    *(.fini0)		/* Beginning of finish code (_exit symbol).  */
+;;    *(.fini1)		/* Place holder for applications.  */
+;;    *(.fini2)		/* C++ destructors.  */
+;;    *(.fini3)		/* Place holder for applications.  */
+;;    *(.fini4)		/* Runtime exit.  */
+;;
+	.sect .fini0,"ax",@progbits
+	.globl _exit
+	.globl exit
+	.weak  exit
+	farsym  exit
+	farsym  _exit
+exit:
+_exit:
+
+	.sect .fini4,"ax",@progbits
+fatal:
+	cli
+	wai
+	bra fatal
+#endif
+
+#ifdef L_abort
+;;
+;; Abort operation.  This is defined for the GCC testsuite.
+;;
+	declare	abort
+
+	ldd	#255		; 
+#ifdef mc68hc12
+	trap	#0x30
+#else
+	.byte 0xCD		; Generate an illegal instruction trap
+	.byte 0x03		; The simulator catches this and stops.
+#endif
+	jmp _exit
+#endif
+	
+#ifdef L_cleanup
+;;
+;; Cleanup operation used by exit().
+;;
+	declare	_cleanup
+
+	ret
+#endif
+
+;-----------------------------------------
+; required gcclib code
+;-----------------------------------------
+#ifdef L_memcpy
+       declare	memcpy
+       declare	__memcpy
+
+	.weak memcpy
+;;;
+;;; void* memcpy(void*, const void*, size_t)
+;;; 
+;;; D    = dst	Pmode
+;;; 2,sp = src	Pmode
+;;; 4,sp = size	HImode (size_t)
+;;; 
+#ifdef mc68hc12
+	ldx	ARG(2),sp
+	ldy	ARG(4),sp
+	pshd
+	xgdy
+	lsrd
+	bcc	Start
+	movb	1,x+,1,y+
+Start:
+	beq	Done
+Loop:
+	movw	2,x+,2,y+
+	dbne	d,Loop
+Done:
+	puld
+	ret
+#else
+	xgdy
+	tsx
+	ldd	ARG(4),x
+	ldx	ARG(2),x	; SRC = X, DST = Y
+	cpd	#0
+	beq	End
+	pshy
+	inca			; Correction for the deca below
+L0:
+	psha			; Save high-counter part
+L1:
+	ldaa	0,x		; Copy up to 256 bytes
+	staa	0,y
+	inx
+	iny
+	decb
+	bne	L1
+	pula
+	deca
+	bne	L0
+	puly			; Restore Y to return the DST
+End:
+	xgdy
+	ret
+#endif
+#endif
+
+#ifdef L_memset
+       declare	memset
+       declare	__memset
+;;;
+;;; void* memset(void*, int value, size_t)
+;;; 
+#ifndef __HAVE_SHORT_INT__
+;;; D    = dst	Pmode
+;;; 2,sp = src	SImode
+;;; 6,sp = size	HImode (size_t)
+	val  = ARG(5)
+	size = ARG(6)
+#else
+;;; D    = dst	Pmode
+;;; 2,sp = src	SImode
+;;; 6,sp = size	HImode (size_t)
+	val  = ARG(3)
+	size = ARG(4)
+#endif
+#ifdef mc68hc12
+	xgdx
+	ldab	val,sp
+	ldy	size,sp
+	pshx
+	beq	End
+Loop:
+	stab	1,x+
+	dbne	y,Loop
+End:
+	puld
+	ret
+#else
+	xgdx
+	tsy
+	ldab	val,y
+	ldy	size,y		; DST = X, CNT = Y
+	beq	End
+	pshx
+L0:
+	stab	0,x		; Fill up to 256 bytes
+	inx
+	dey
+	bne	L0
+	pulx			; Restore X to return the DST
+End:
+	xgdx
+	ret
+#endif
+#endif
+
+#ifdef L_adddi3
+	declare	___adddi3
+
+	tsx
+	xgdy
+	ldd	ARG(8),x		; Add LSB
+	addd	ARG(16),x
+	std	6,y		; Save (carry preserved)
+
+	ldd	ARG(6),x
+	adcb	ARG(15),x
+	adca	ARG(14),x
+	std	4,y
+
+	ldd	ARG(4),x
+	adcb	ARG(13),x
+	adca	ARG(12),x
+	std	2,y
+	
+	ldd	ARG(2),x
+	adcb	ARG(11),x		; Add MSB
+	adca	ARG(10),x
+	std	0,y
+
+	xgdy
+	ret
+#endif
+
+#ifdef L_subdi3
+	declare	___subdi3
+
+	tsx
+	xgdy
+	ldd	ARG(8),x		; Subtract LSB
+	subd	ARG(16),x
+	std	6,y			; Save, borrow preserved
+
+	ldd	ARG(6),x
+	sbcb	ARG(15),x
+	sbca	ARG(14),x
+	std	4,y
+
+	ldd	ARG(4),x
+	sbcb	ARG(13),x
+	sbca	ARG(12),x
+	std	2,y
+	
+	ldd	ARG(2),x		; Subtract MSB
+	sbcb	ARG(11),x
+	sbca	ARG(10),x
+	std	0,y
+
+	xgdy			;
+	ret
+#endif
+	
+#ifdef L_notdi2
+	declare	___notdi2
+
+	tsy
+	xgdx
+	ldd	ARG(8),y
+	coma
+	comb
+	std	6,x
+	
+	ldd	ARG(6),y
+	coma
+	comb
+	std	4,x
+
+	ldd	ARG(4),y
+	coma
+	comb
+	std	2,x
+
+	ldd	ARG(2),y
+	coma
+	comb
+	std	0,x
+	xgdx
+	ret
+#endif
+	
+#ifdef L_negsi2
+	declare_near ___negsi2
+
+	comb
+	coma
+	xgdx
+	comb
+	coma
+	inx
+	xgdx
+	bne	done
+	inx
+done:
+	rts
+#endif
+
+#ifdef L_one_cmplsi2
+	declare_near ___one_cmplsi2
+
+	comb
+	coma
+	xgdx
+	comb
+	coma
+	xgdx
+	rts
+#endif
+	
+#ifdef L_ashlsi3
+	declare_near ___ashlsi3
+
+	xgdy
+	clra
+	andb	#0x1f
+	xgdy
+	beq	Return
+Loop:
+	lsld
+	xgdx
+	rolb
+	rola
+	xgdx
+	dey
+	bne	Loop
+Return:
+	rts
+#endif
+
+#ifdef L_ashrsi3
+	declare_near ___ashrsi3
+
+	xgdy
+	clra
+	andb	#0x1f
+	xgdy
+	beq	Return
+Loop:
+	xgdx
+	asra
+	rorb
+	xgdx
+	rora
+	rorb
+	dey
+	bne	Loop
+Return:
+	rts
+#endif
+
+#ifdef L_lshrsi3
+	declare_near ___lshrsi3
+
+	xgdy
+	clra
+	andb	#0x1f
+	xgdy
+	beq	Return
+Loop:
+	xgdx
+	lsrd
+	xgdx
+	rora
+	rorb
+	dey
+	bne	Loop
+Return:
+	rts
+#endif
+
+#ifdef L_lshrhi3
+	declare_near ___lshrhi3
+
+	cpx	#16
+	bge	Return_zero
+	cpx	#0
+	beq	Return
+Loop:
+	lsrd
+	dex
+	bne	Loop
+Return:
+	rts
+Return_zero:
+	clra
+	clrb
+	rts
+#endif
+	
+#ifdef L_lshlhi3
+	declare_near ___lshlhi3
+
+	cpx	#16
+	bge	Return_zero
+	cpx	#0
+	beq	Return
+Loop:
+	lsld
+	dex
+	bne	Loop
+Return:
+	rts
+Return_zero:
+	clra
+	clrb
+	rts
+#endif
+
+#ifdef L_rotrhi3
+	declare_near ___rotrhi3
+
+___rotrhi3:
+	xgdx
+	clra
+	andb	#0x0f
+	xgdx
+	beq	Return
+Loop:
+	tap
+	rorb
+	rora
+	dex
+	bne	Loop
+Return:
+	rts
+#endif
+
+#ifdef L_rotlhi3
+	declare_near ___rotlhi3
+
+___rotlhi3:
+	xgdx
+	clra
+	andb	#0x0f
+	xgdx
+	beq	Return
+Loop:
+	asrb
+	rolb
+	rola
+	rolb
+	dex
+	bne	Loop
+Return:
+	rts
+#endif
+
+#ifdef L_ashrhi3
+	declare_near ___ashrhi3
+
+	cpx	#16
+	bge	Return_minus_1_or_zero
+	cpx	#0
+	beq	Return
+Loop:
+	asra
+	rorb
+	dex
+	bne	Loop
+Return:
+	rts
+Return_minus_1_or_zero:
+	clrb
+	tsta
+	bpl	Return_zero
+	comb
+Return_zero:
+	tba
+	rts
+#endif
+	
+#ifdef L_ashrqi3
+	declare_near ___ashrqi3
+
+	cmpa	#8
+	bge	Return_minus_1_or_zero
+	tsta
+	beq	Return
+Loop:
+	asrb
+	deca
+	bne	Loop
+Return:
+	rts
+Return_minus_1_or_zero:
+	clrb
+	tstb
+	bpl	Return_zero
+	coma
+Return_zero:
+	tab
+	rts
+#endif
+
+#ifdef L_lshlqi3
+	declare_near ___lshlqi3
+
+	cmpa	#8
+	bge	Return_zero
+	tsta
+	beq	Return
+Loop:
+	lslb
+	deca
+	bne	Loop
+Return:
+	rts
+Return_zero:
+	clrb
+	rts
+#endif
+
+#ifdef L_divmodhi4
+#ifndef mc68hc12
+/* 68HC12 signed divisions are generated inline (idivs).  */
+
+	declare_near __divmodhi4
+
+;
+;; D = numerator
+;; X = denominator
+;;
+;; Result:	D = D / X
+;;		X = D % X
+;; 
+	tsta
+	bpl	Numerator_pos
+	comb			; D = -D <=> D = (~D) + 1
+	coma
+	xgdx
+	inx
+	tsta
+	bpl	Numerator_neg_denominator_pos
+Numerator_neg_denominator_neg:
+	comb			; X = -X
+	coma
+	addd	#1
+	xgdx
+	idiv
+	coma
+	comb
+	xgdx			; Remainder <= 0 and result >= 0
+	inx
+	rts
+
+Numerator_pos_denominator_pos:
+	xgdx
+	idiv
+	xgdx			; Both values are >= 0
+	rts
+	
+Numerator_pos:
+	xgdx
+	tsta
+	bpl	Numerator_pos_denominator_pos
+Numerator_pos_denominator_neg:
+	coma			; X = -X
+	comb
+	xgdx
+	inx
+	idiv
+	xgdx			; Remainder >= 0 but result <= 0
+	coma
+	comb
+	addd	#1
+	rts
+	
+Numerator_neg_denominator_pos:
+	xgdx
+	idiv
+	coma			; One value is > 0 and the other < 0
+	comb			; Change the sign of result and remainder
+	xgdx
+	inx
+	coma
+	comb
+	addd	#1
+	rts
+#endif /* !mc68hc12 */
+#endif
+
+#ifdef L_mulqi3
+	declare_near ___mulqi3
+
+;
+; short __mulqi3(signed char a, signed char b);
+;
+;	signed char a	-> register A
+;	signed char b	-> register B
+;
+; returns the signed result of A * B in register D.
+;
+	tsta
+	bmi	A_neg
+	tstb
+	bmi	B_neg
+	mul
+	rts
+B_neg:
+	negb
+	bra	A_or_B_neg
+A_neg:
+	nega
+	tstb
+	bmi	AB_neg
+A_or_B_neg:
+	mul
+	coma
+	comb
+	addd	#1
+	rts
+AB_neg:
+	negb
+	mul
+	rts
+#endif
+	
+#ifdef L_mulhi3
+	declare_near ___mulhi3
+
+;
+;
+;  unsigned short ___mulhi3(unsigned short a, unsigned short b)
+;
+;	a = register D
+;	b = register X
+;
+#ifdef mc68hc12
+	pshx			; Preserve X
+	exg	x,y
+	emul
+	exg	x,y
+	pulx
+	rts
+#else
+#ifdef NO_TMP
+	;
+	; 16-bit multiplication without temp memory location.
+	; (smaller but slower)
+	;
+	pshx			; (4)
+	ins			; (3)
+	pshb			; (3)
+	psha			; (3)
+	pshx			; (4)
+	pula			; (4)
+	pulx			; (5)
+	mul			; (10) B.high * A.low
+	xgdx			; (3)
+	mul			; (10) B.low * A.high
+	abx			; (3)
+	pula			; (4)
+	pulb			; (4)
+	mul			; (10) B.low * A.low
+	pshx			; (4) 
+	tsx			; (3)
+	adda	1,x		; (4)
+	pulx			; (5)
+	rts			; (5) 20 bytes
+				; ---
+				; 91 cycles
+#else
+	stx	*_.tmp		; (4)
+	pshb			; (3)
+	ldab	*_.tmp+1	; (3)
+	mul			; (10) A.high * B.low
+	ldaa	*_.tmp		; (3)
+	stab	*_.tmp		; (3)
+	pulb			; (4)
+	pshb			; (4)
+	mul			; (10) A.low * B.high
+	addb	*_.tmp		; (4)
+	stab	*_.tmp		; (3)
+	ldaa	*_.tmp+1	; (3)
+	pulb			; (4)
+	mul			; (10) A.low * B.low
+	adda	*_.tmp		; (4)
+	rts			; (5) 24/32 bytes
+				; 77/85 cycles
+#endif
+#endif
+#endif
+
+#ifdef L_mulhi32
+
+;
+;
+;  unsigned long __mulhi32(unsigned short a, unsigned short b)
+;
+;	a = register D
+;	b = value on stack
+;
+;	+---------------+
+;       |  B low	| <- 7,x
+;	+---------------+
+;       |  B high	| <- 6,x
+;	+---------------+
+;       |  PC low	|  
+;	+---------------+
+;       |  PC high	|  
+;	+---------------+
+;	|  Tmp low	|
+;	+---------------+
+;	|  Tmp high     |
+;	+---------------+
+;	|  A low	|
+;	+---------------+
+;	|  A high	|
+;	+---------------+  <- 0,x
+;
+;
+;      <B-low>    5,x
+;      <B-high>   4,x
+;      <ret>      2,x
+;      <A-low>    1,x
+;      <A-high>   0,x
+;
+	declare_near	__mulhi32
+
+#ifdef mc68hc12
+	ldy	2,sp
+	emul
+	exg	x,y
+	rts
+#else
+	pshx			; Room for temp value
+	pshb
+	psha
+	tsx
+	ldab	6,x
+	mul
+	xgdy			; A.high * B.high
+	ldab	7,x
+	pula
+	mul			; A.high * B.low
+	std	2,x
+	ldaa	1,x
+	ldab	6,x
+	mul			; A.low * B.high
+	addd	2,x
+	stab	2,x
+	tab
+	aby
+	bcc	N
+	ldab	#0xff
+	aby
+	iny
+N:
+	ldab	7,x
+	pula
+	mul			; A.low * B.low
+	adda	2,x
+	pulx			; Drop temp location
+	pshy			; Put high part in X
+	pulx
+	bcc	Ret
+	inx
+Ret:
+	rts
+#endif	
+#endif
+
+#ifdef L_mulsi3
+
+;
+;      <B-low>    8,y
+;      <B-high>   6,y
+;      <ret>      4,y
+;	<tmp>	  2,y
+;      <A-low>    0,y
+;
+; D,X   -> A
+; Stack -> B
+;
+; The result is:
+;
+;	(((A.low * B.high) + (A.high * B.low)) << 16) + (A.low * B.low)
+;
+;
+;
+
+	declare	__mulsi3
+
+#ifdef mc68hc12
+	pshd				; Save A.low
+	ldy	ARG(4),sp
+	emul				; A.low * B.high
+	ldy	ARG(6),sp
+	exg	x,d
+	emul				; A.high * B.low
+	leax	d,x
+	ldy	ARG(6),sp
+	puld
+	emul				; A.low * B.low
+	exg	d,y
+	leax	d,x
+	exg	d,y
+	ret
+#else
+B_low	=	ARG(8)
+B_high	=	ARG(6)
+A_low	=	0
+A_high	=	2
+	pshx
+	pshb
+	psha
+	tsy
+;
+; If B.low is 0, optimize into: (A.low * B.high) << 16
+;
+	ldd	B_low,y
+	beq	B_low_zero
+;
+; If A.high is 0, optimize into: (A.low * B.high) << 16 + (A.low * B.low)
+;
+	cpx	#0
+	beq	A_high_zero
+	bsr	___mulhi3		; A.high * B.low
+;
+; If A.low is 0, optimize into: (A.high * B.low) << 16
+;
+	ldx	A_low,y
+	beq	A_low_zero		; X = 0, D = A.high * B.low
+	std	2,y
+;
+; If B.high is 0, we can avoid the (A.low * B.high) << 16 term.
+;
+	ldd	B_high,y
+	beq	B_high_zero
+	bsr	___mulhi3		; A.low * B.high
+	addd	2,y
+	std	2,y
+;
+; Here, we know that A.low and B.low are not 0.
+;
+B_high_zero:
+	ldd	B_low,y			; A.low is on the stack
+	bsr	__mulhi32		; A.low * B.low
+	xgdx
+	tsy				; Y was clobbered, get it back
+	addd	2,y
+A_low_zero:				; See A_low_zero_non_optimized below
+	xgdx
+Return:
+	ins
+	ins
+	ins
+	ins
+	ret
+;
+; 
+; A_low_zero_non_optimized:
+;
+; At this step, X = 0 and D = (A.high * B.low)
+; Optimize into: (A.high * B.low) << 16
+;
+;	xgdx
+;	clra			; Since X was 0, clearing D is superfuous.
+;	clrb
+;	bra	Return
+; ----------------
+; B.low == 0, the result is:	(A.low * B.high) << 16
+;
+; At this step:
+;   D = B.low				= 0 
+;   X = A.high				?
+;       A.low is at A_low,y		?
+;       B.low is at B_low,y		?
+;
+B_low_zero:
+	ldd	A_low,y
+	beq	Zero1
+	ldx	B_high,y
+	beq	Zero2
+	bsr	___mulhi3
+Zero1:
+	xgdx
+Zero2:
+	clra
+	clrb
+	bra	Return
+; ----------------
+; A.high is 0, optimize into: (A.low * B.high) << 16 + (A.low * B.low)
+;
+; At this step:
+;   D = B.low				!= 0 
+;   X = A.high				= 0
+;       A.low is at A_low,y		?
+;       B.low is at B_low,y		?
+;
+A_high_zero:
+	ldd	A_low,y		; A.low
+	beq	Zero1
+	ldx	B_high,y	; B.high
+	beq	A_low_B_low
+	bsr	___mulhi3
+	std	2,y
+	bra	B_high_zero	; Do the (A.low * B.low) and the add.
+
+; ----------------
+; A.high and B.high are 0 optimize into: (A.low * B.low)
+;
+; At this step:
+;   D = B.high				= 0 
+;   X = A.low				!= 0
+;       A.low is at A_low,y		!= 0
+;       B.high is at B_high,y		= 0
+;
+A_low_B_low:
+	ldd	B_low,y			; A.low is on the stack
+	bsr	__mulhi32
+	bra	Return
+#endif
+#endif
+
+#ifdef L_map_data
+
+	.sect	.install2,"ax",@progbits
+	.globl	__map_data_section
+	.globl __data_image
+#ifdef mc68hc12
+	.globl __data_section_size
+#endif
+__map_data_section:
+#ifdef mc68hc12
+	ldx	#__data_image
+	ldy	#__data_section_start
+	ldd	#__data_section_size
+	beq	Done
+Loop:
+	movb	1,x+,1,y+
+	dbne	d,Loop
+#else
+	ldx	#__data_image
+	ldy	#__data_section_start
+	bra	Start_map
+Loop:
+	ldaa	0,x
+	staa	0,y
+	inx
+	iny
+Start_map:
+	cpx	#__data_image_end
+	blo	Loop
+#endif
+Done:
+
+#endif
+
+#ifdef L_init_bss
+
+	.sect	.install2,"ax",@progbits
+	.globl	__init_bss_section
+
+__init_bss_section:
+	ldd	#__bss_size
+	beq	Done
+	ldx	#__bss_start
+Loop:
+#ifdef mc68hc12
+	clr	1,x+
+	dbne	d,Loop
+#else
+	clr	0,x
+	inx
+	subd	#1
+	bne	Loop
+#endif
+Done:
+
+#endif
+
+#ifdef L_ctor
+
+; End of constructor table
+	.sect	.install3,"ax",@progbits
+	.globl	__do_global_ctors
+
+__do_global_ctors:
+	; Start from the end - sizeof(void*)
+	ldx	#__CTOR_END__-2
+ctors_loop:
+	cpx	#__CTOR_LIST__
+	blo	ctors_done
+	pshx
+	ldx	0,x
+	jsr	0,x
+	pulx
+	dex
+	dex
+	bra	ctors_loop
+ctors_done:
+
+#endif
+
+#ifdef L_dtor
+
+	.sect	.fini3,"ax",@progbits
+	.globl	__do_global_dtors
+
+;;
+;; This piece of code is inserted in the _exit() code by the linker.
+;;
+__do_global_dtors:
+	pshb	; Save exit code
+	psha
+	ldx	#__DTOR_LIST__
+dtors_loop:
+	cpx	#__DTOR_END__
+	bhs	dtors_done
+	pshx
+	ldx	0,x
+	jsr	0,x
+	pulx
+	inx
+	inx
+	bra	dtors_loop
+dtors_done:
+	pula	; Restore exit code
+	pulb
+
+#endif
+
+#ifdef L_far_tramp
+#ifdef mc68hc12
+	.sect	.tramp,"ax",@progbits
+	.globl	__far_trampoline
+
+;; This is a trampoline used by the linker to invoke a function
+;; using rtc to return and being called with jsr/bsr.
+;; The trampoline generated is:
+;;
+;;	foo_tramp:
+;;		ldy	#foo
+;;		call	__far_trampoline,page(foo)
+;;
+;; The linker transforms:
+;;
+;;		jsr	foo
+;;
+;; into
+;;		jsr	foo_tramp
+;;
+;; The linker generated trampoline and _far_trampoline must be in 
+;; non-banked memory.
+;;
+__far_trampoline:
+	movb	0,sp, 2,sp	; Copy page register below the caller's return
+	leas	2,sp		; address.
+	jmp	0,y		; We have a 'call/rtc' stack layout now
+				; and can jump to the far handler
+				; (whose memory bank is mapped due to the
+				; call to the trampoline).
+#endif
+
+#ifdef mc68hc11
+	.sect	.tramp,"ax",@progbits
+	.globl __far_trampoline
+
+;; Trampoline generated by gcc for 68HC11:
+;;
+;;	pshb
+;;	ldab	#%page(func)
+;;	ldy	#%addr(func)
+;;	jmp	__far_trampoline
+;;
+__far_trampoline:
+	psha				; (2) Save function parameter (high)
+	;; <Read current page in A>
+	psha				; (2)
+	;; <Set currenge page from B>
+	pshx				; (4)
+	tsx				; (3)
+	ldab	4,x			; (4) Restore function parameter (low)
+	ldaa	2,x			; (4) Get saved page number
+	staa	4,x			; (4) Save it below return PC
+	pulx				; (5)
+	pula				; (3)
+	pula				; (3) Restore function parameter (high)
+	jmp	0,y			; (4)
+#endif
+#endif
+
+#ifdef L_call_far
+#ifdef mc68hc11
+	.sect	.tramp,"ax",@progbits
+	.globl __call_a16
+	.globl __call_a32
+;;
+;; The call methods are used for 68HC11 to support memory bank switching.
+;; Every far call is redirected to these call methods.  Its purpose is to:
+;;
+;;  1/ Save the current page on the stack (1 byte to follow 68HC12 call frame)
+;;  2/ Install the new page
+;;  3/ Jump to the real function
+;;
+;; The page switching (get/save) is board dependent.  The default provided
+;; here does nothing (just create the appropriate call frame).
+;;
+;; Call sequence (10 bytes, 13 cycles):
+;;
+;;	ldx #page			; (3)
+;;	ldy #func			; (4)
+;;	jsr __call_a16			; (6)
+;;
+;; Call trampoline (11 bytes, 19 cycles):
+;;
+__call_a16:
+	;; xgdx				; (3)
+	;; <Read current page in A>	; (3) ldaa _current_page
+	psha				; (2)
+	;; <Set current page from B>	; (4) staa _current_page
+	;; xgdx				; (3)
+	jmp 0,y				; (4)
+
+;;
+;; Call sequence (10 bytes, 14 cycles):
+;;
+;;	pshb				; (2)
+;;	ldab #page			; (2)
+;;	ldy  #func			; (4)
+;;	jsr __call_a32			; (6)
+;;
+;; Call trampoline (87 bytes, 57 cycles):
+;;
+__call_a32:
+	pshx				; (4)
+	psha				; (2)
+	;; <Read current page in A>	; (3) ldaa _current_page
+	psha				; (2)
+	;; <Set current page from B>	; (4) staa _current_page
+	tsx				; (3)
+	ldab	6,x			; (4) Restore function parameter
+	ldaa	5,x			; (4) Move PC return at good place
+	staa	6,x			; (4)
+	ldaa	4,x			; (4)
+	staa	5,x			; (4)
+	pula				; (3)
+	staa	4,x			; (4)
+	pula				; (3)
+	pulx				; (5)
+	jmp	0,y			; (4)
+#endif
+#endif
+
+#ifdef L_return_far
+#ifdef mc68hc11
+	.sect	.tramp,"ax",@progbits
+       .globl __return_void
+       .globl __return_16
+       .globl __return_32
+
+__return_void:
+	;; pulb
+	;; <Set current page from B> (Board specific)
+	;; rts
+__return_16:
+	;; xgdx
+	;; pulb
+	;; <Set current page from B> (Board specific)
+	;; xgdx
+	;; rts
+__return_32:
+	;; xgdy
+	;; pulb
+	;; <Set current page from B> (Board specific)
+	;; xgdy
+	;; rts
+	ins
+	rts
+#endif
+#endif
+.Lend:
+;-----------------------------------------
+; end required gcclib code
+;-----------------------------------------
diff --git a/gcc/config/m68hc11/m68hc11-crt0.S b/gcc/config/m68hc11/m68hc11-crt0.S
new file mode 100644
index 000000000..429ab0f27
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc11-crt0.S
@@ -0,0 +1,86 @@
+/* Startup code for M68HC11.
+   Copyright (C) 1999, 2000, 2002, 2008, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+	
+;-----------------------------------------
+; startup code
+;-----------------------------------------
+
+#ifdef __HAVE_SHORT_INT__
+	.mode mshort
+#else
+	.mode mlong
+#endif
+
+#if defined(__USE_RTC__) && defined(mc68hc12)
+	.macro jsr name
+	call	\name
+	.endm
+#endif
+;; 
+;; 
+;; The linker concatenate the .install* sections in the following order:
+;; 
+;; .install0	Setup the stack pointer
+;; .install1	Place holder for applications
+;; .install2	Optional installation of data section in memory
+;; .install3	Place holder for applications
+;; .install4	Invokes the main
+;; 
+	.sect   .install0,"ax",@progbits
+	.globl _start
+
+_start:
+;;
+;; At this step, the stack is not initialized and interrupts are masked.
+;; Applications only have 64 cycles to initialize some registers.
+;;
+;; To have a generic/configurable startup, initialize the stack to
+;; the end of some memory region.  The _stack symbol is defined by
+;; the linker.
+;;
+	lds	#_stack
+	
+	.sect	.install2,"ax",@progbits
+;;
+;; Call a specific initialization operation.  The default is empty.
+;; It can be overridden by applications.  It is intended to initialize
+;; the 68hc11 registers.  Function prototype is:
+;; 
+;;	int __premain(void);
+;; 
+	jsr	__premain
+	
+;;
+;; 
+;;
+	.sect	.install4,"ax",@progbits
+	jsr     main
+fatal:
+	jsr	exit
+	bra fatal
+
+;-----------------------------------------
+; end startup code
+;-----------------------------------------
+;; Force loading of data section mapping and bss clear
+	.2byte	__map_data_section
+	.2byte  __init_bss_section
diff --git a/gcc/config/m68hc11/m68hc11-protos.h b/gcc/config/m68hc11/m68hc11-protos.h
new file mode 100644
index 000000000..76b665937
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc11-protos.h
@@ -0,0 +1,109 @@
+/* Prototypes for exported functions defined in m68hc11.c
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Stephane Carrez (stcarrez@nerim.fr)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+extern int hard_regno_mode_ok (int, enum machine_mode);
+extern int m68hc11_hard_regno_rename_ok (int, int);
+
+extern int m68hc11_total_frame_size (void);
+extern int m68hc11_initial_frame_pointer_offset (void);
+extern int m68hc11_initial_elimination_offset (int, int);
+
+extern void expand_prologue (void);
+extern void expand_epilogue (void);
+
+#ifdef RTX_CODE
+extern int m68hc11_auto_inc_p (rtx);
+
+extern rtx m68hc11_expand_compare_and_branch (enum rtx_code, rtx, rtx, rtx);
+extern enum reg_class preferred_reload_class (rtx, enum reg_class);
+
+extern void m68hc11_notice_update_cc (rtx, rtx);
+extern void m68hc11_notice_keep_cc (rtx);
+
+extern void m68hc11_gen_movqi (rtx, rtx*);
+extern void m68hc11_gen_movhi (rtx, rtx*);
+extern void m68hc11_gen_rotate (enum rtx_code, rtx, rtx*);
+
+extern void m68hc11_output_swap (rtx, rtx*);
+
+extern int next_insn_test_reg (rtx, rtx);
+
+extern int m68hc11_reload_operands (rtx*);
+
+extern int dead_register_here (rtx, rtx);
+
+extern int push_pop_operand_p (rtx);
+extern void m68hc11_split_move (rtx, rtx, rtx);
+extern void m68hc11_split_compare_and_branch (enum rtx_code,
+                                              rtx, rtx, rtx);
+
+extern rtx m68hc11_gen_lowpart (enum machine_mode, rtx);
+extern rtx m68hc11_gen_highpart (enum machine_mode, rtx);
+
+#ifdef HAVE_MACHINE_MODES
+extern int m68hc11_memory_move_cost (enum machine_mode, enum reg_class, int);
+extern int m68hc11_register_move_cost (enum machine_mode,
+                                       enum reg_class, enum reg_class);
+
+extern void m68hc11_emit_libcall (const char*, enum rtx_code,
+                                  enum machine_mode, enum machine_mode,
+                                  int, rtx*);
+extern int m68hc11_small_indexed_indirect_p (rtx, enum machine_mode);
+extern int m68hc11_symbolic_p (rtx, enum machine_mode);
+extern int m68hc11_indirect_p (rtx, enum machine_mode);
+extern int go_if_legitimate_address2 (rtx, enum machine_mode, int);
+
+extern int reg_or_indexed_operand (rtx,enum machine_mode);
+extern int memory_indexed_operand (rtx, enum machine_mode);
+
+#ifdef RTX_CODE
+extern void m68hc11_split_logical (enum machine_mode, enum rtx_code, rtx*);
+#endif
+
+extern int m68hc11_register_indirect_p (rtx, enum machine_mode);
+extern int m68hc11_valid_addressing_p (rtx, enum machine_mode, int);
+
+extern int symbolic_memory_operand (rtx, enum machine_mode);
+
+extern int memory_reload_operand (rtx, enum machine_mode);
+extern int arith_src_operand (rtx, enum machine_mode);
+extern int soft_reg_operand (rtx, enum machine_mode);
+
+extern void m68hc11_init_cumulative_args (CUMULATIVE_ARGS*, tree, rtx);
+
+#ifdef ARGS_SIZE_RTX
+extern enum direction m68hc11_function_arg_padding (enum machine_mode,
+						    const_tree);
+#endif
+
+extern void m68hc11_function_epilogue (FILE*,int);
+
+extern int m68hc11_is_far_symbol (rtx);
+extern int m68hc11_is_trap_symbol (rtx);
+extern int m68hc11_page0_symbol_p (rtx x);
+
+extern HOST_WIDE_INT m68hc11_min_offset;
+extern HOST_WIDE_INT m68hc11_max_offset;
+extern int m68hc11_addr_mode;
+
+#endif /* HAVE_MACHINE_MODES */
+#endif /* RTX_CODE */
diff --git a/gcc/config/m68hc11/m68hc11.c b/gcc/config/m68hc11/m68hc11.c
new file mode 100644
index 000000000..f45de3d85
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc11.c
@@ -0,0 +1,5582 @@
+/* Subroutines for code generation on Motorola 68HC11 and 68HC12.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+   Contributed by Stephane Carrez (stcarrez@nerim.fr)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Note:
+   A first 68HC11 port was made by Otto Lind (otto@coactive.com)
+   on gcc 2.6.3.  I have used it as a starting point for this port.
+   However, this new port is a complete re-write.  Its internal
+   design is completely different.  The generated code is not
+   compatible with the gcc 2.6.3 port.
+
+   The gcc 2.6.3 port is available at:
+
+   ftp.unina.it/pub/electronics/motorola/68hc11/gcc/gcc-6811-fsf.tar.gz
+
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "libfuncs.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "function.h"
+#include "ggc.h"
+#include "reload.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+static void m68hc11_option_override (void);
+static void emit_move_after_reload (rtx, rtx, rtx);
+static rtx simplify_logical (enum machine_mode, int, rtx, rtx *);
+static void m68hc11_emit_logical (enum machine_mode, enum rtx_code, rtx *);
+static void m68hc11_reorg (void);
+static bool m68hc11_legitimate_address_p_1 (enum machine_mode, rtx, bool);
+static bool m68hc11_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx m68hc11_expand_compare (enum rtx_code, rtx, rtx);
+static int must_parenthesize (rtx);
+static int m68hc11_address_cost (rtx, bool);
+static int m68hc11_shift_cost (enum machine_mode, rtx, int);
+static int m68hc11_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
+static bool m68hc11_rtx_costs (rtx, int, int, int *, bool);
+static tree m68hc11_handle_fntype_attribute (tree *, tree, tree, int, bool *);
+static tree m68hc11_handle_page0_attribute (tree *, tree, tree, int, bool *);
+static bool m68hc11_class_likely_spilled_p (reg_class_t);
+
+void create_regs_rtx (void);
+
+static void asm_print_register (FILE *, int);
+static void m68hc11_print_operand (FILE *, rtx, int);
+static void m68hc11_print_operand_address (FILE *, rtx);
+static void m68hc11_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void m68hc11_asm_out_constructor (rtx, int);
+static void m68hc11_asm_out_destructor (rtx, int);
+static void m68hc11_file_start (void);
+static void m68hc11_encode_section_info (tree, rtx, int);
+static const char *m68hc11_strip_name_encoding (const char* str);
+static unsigned int m68hc11_section_type_flags (tree, const char*, int);
+static int autoinc_mode (rtx);
+static int m68hc11_make_autoinc_notes (rtx *, void *);
+static void m68hc11_init_libfuncs (void);
+static rtx m68hc11_struct_value_rtx (tree, int);
+static bool m68hc11_return_in_memory (const_tree, const_tree);
+static bool m68hc11_can_eliminate (const int, const int);
+static void m68hc11_conditional_register_usage (void);
+static void m68hc11_trampoline_init (rtx, tree, rtx);
+
+static rtx m68hc11_function_arg (CUMULATIVE_ARGS*, enum machine_mode,
+				 const_tree, bool);
+static void m68hc11_function_arg_advance (CUMULATIVE_ARGS*, enum machine_mode,
+					  const_tree, bool);
+
+/* Must be set to 1 to produce debug messages.  */
+int debug_m6811 = 0;
+
+extern FILE *asm_out_file;
+
+rtx ix_reg;
+rtx iy_reg;
+rtx d_reg;
+rtx m68hc11_soft_tmp_reg;
+static GTY(()) rtx stack_push_word;
+static GTY(()) rtx stack_pop_word;
+static GTY(()) rtx z_reg;
+static GTY(()) rtx z_reg_qi;
+static int regs_inited = 0;
+
+/* Set to 1 by expand_prologue() when the function is an interrupt handler.  */
+int current_function_interrupt;
+
+/* Set to 1 by expand_prologue() when the function is a trap handler.  */
+int current_function_trap;
+
+/* Set to 1 when the current function is placed in 68HC12 banked
+   memory and must return with rtc.  */
+int current_function_far;
+
+/* Min offset that is valid for the indirect addressing mode.  */
+HOST_WIDE_INT m68hc11_min_offset = 0;
+
+/* Max offset that is valid for the indirect addressing mode.  */
+HOST_WIDE_INT m68hc11_max_offset = 256;
+
+/* The class value for base registers.  */
+enum reg_class m68hc11_base_reg_class = A_REGS;
+
+/* The class value for index registers.  This is NO_REGS for 68HC11.  */
+enum reg_class m68hc11_index_reg_class = NO_REGS;
+
+enum reg_class m68hc11_tmp_regs_class = NO_REGS;
+
+/* Tables that tell whether a given hard register is valid for
+   a base or an index register.  It is filled at init time depending
+   on the target processor.  */
+unsigned char m68hc11_reg_valid_for_base[FIRST_PSEUDO_REGISTER];
+unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER];
+
+/* A correction offset which is applied to the stack pointer.
+   This is 1 for 68HC11 and 0 for 68HC12.  */
+int m68hc11_sp_correction;
+
+int m68hc11_addr_mode;
+int m68hc11_mov_addr_mode;
+
+
+const struct processor_costs *m68hc11_cost;
+
+/* Costs for a 68HC11.  */
+static const struct processor_costs m6811_cost = {
+  /* add */
+  COSTS_N_INSNS (2),
+  /* logical */
+  COSTS_N_INSNS (2),
+  /* non-constant shift */
+  COSTS_N_INSNS (20),
+  /* shiftQI const */
+  { COSTS_N_INSNS (0), COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+    COSTS_N_INSNS (3), COSTS_N_INSNS (4), COSTS_N_INSNS (3),
+    COSTS_N_INSNS (2), COSTS_N_INSNS (1) },
+
+  /* shiftHI const */
+  { COSTS_N_INSNS (0), COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+    COSTS_N_INSNS (6), COSTS_N_INSNS (8), COSTS_N_INSNS (6),
+    COSTS_N_INSNS (4), COSTS_N_INSNS (2),
+    COSTS_N_INSNS (2), COSTS_N_INSNS (4),
+    COSTS_N_INSNS (6), COSTS_N_INSNS (8), COSTS_N_INSNS (10),
+    COSTS_N_INSNS (8), COSTS_N_INSNS (6), COSTS_N_INSNS (4)
+  },
+  /* mulQI */
+  COSTS_N_INSNS (20),
+  /* mulHI */
+  COSTS_N_INSNS (20 * 4),
+  /* mulSI */
+  COSTS_N_INSNS (20 * 16),
+  /* divQI */
+  COSTS_N_INSNS (20),
+  /* divHI */
+  COSTS_N_INSNS (80),
+  /* divSI */
+  COSTS_N_INSNS (100)
+};
+
+/* Costs for a 68HC12.  */
+static const struct processor_costs m6812_cost = {
+  /* add */
+  COSTS_N_INSNS (2),
+  /* logical */
+  COSTS_N_INSNS (2),
+  /* non-constant shift */
+  COSTS_N_INSNS (20),
+  /* shiftQI const */
+  { COSTS_N_INSNS (0), COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+    COSTS_N_INSNS (3), COSTS_N_INSNS (4), COSTS_N_INSNS (3),
+    COSTS_N_INSNS (2), COSTS_N_INSNS (1) },
+
+  /* shiftHI const */
+  { COSTS_N_INSNS (0), COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+    COSTS_N_INSNS (6), COSTS_N_INSNS (8), COSTS_N_INSNS (6),
+    COSTS_N_INSNS (4), COSTS_N_INSNS (2),
+    COSTS_N_INSNS (2), COSTS_N_INSNS (4), COSTS_N_INSNS (6),
+    COSTS_N_INSNS (8), COSTS_N_INSNS (10), COSTS_N_INSNS (8),
+    COSTS_N_INSNS (6), COSTS_N_INSNS (4)
+  },
+  /* mulQI */
+  COSTS_N_INSNS (3),
+  /* mulHI */
+  COSTS_N_INSNS (3),
+  /* mulSI */
+  COSTS_N_INSNS (3 * 4),
+  /* divQI */
+  COSTS_N_INSNS (12),
+  /* divHI */
+  COSTS_N_INSNS (12),
+  /* divSI */
+  COSTS_N_INSNS (100)
+};
+
+/* M68HC11 specific attributes.  */
+
+static const struct attribute_spec m68hc11_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt", 0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
+  { "trap",      0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
+  { "far",       0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
+  { "near",      0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
+  { "page0",     0, 0, false, false, false, m68hc11_handle_page0_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m68hc11_attribute_table
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND m68hc11_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS m68hc11_print_operand_address
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE m68hc11_output_function_epilogue
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START m68hc11_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO  m68hc11_encode_section_info
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS m68hc11_section_type_flags
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m68hc11_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST m68hc11_address_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG m68hc11_reorg
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS m68hc11_init_libfuncs
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m68hc11_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m68hc11_function_arg_advance
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX m68hc11_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY m68hc11_return_in_memory
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_callee_copies_named
+
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING m68hc11_strip_name_encoding
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	m68hc11_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE m68hc11_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m68hc11_conditional_register_usage
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P m68hc11_class_likely_spilled_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m68hc11_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m68hc11_option_override
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+static void
+m68hc11_option_override (void)
+{
+  memset (m68hc11_reg_valid_for_index, 0,
+	  sizeof (m68hc11_reg_valid_for_index));
+  memset (m68hc11_reg_valid_for_base, 0, sizeof (m68hc11_reg_valid_for_base));
+
+  /* Compilation with -fpic generates a wrong code.  */
+  if (flag_pic)
+    {
+      warning (0, "-f%s ignored for 68HC11/68HC12 (not supported)",
+	       (flag_pic > 1) ? "PIC" : "pic");
+      flag_pic = 0;
+    }
+
+  /* Do not enable -fweb because it breaks the 32-bit shift patterns
+     by breaking the match_dup of those patterns.  The shift patterns
+     will no longer be recognized after that.  */
+  flag_web = 0;
+
+  /* Configure for a 68hc11 processor.  */
+  if (TARGET_M6811)
+    {
+      target_flags &= ~(TARGET_AUTO_INC_DEC | TARGET_MIN_MAX);
+      m68hc11_cost = &m6811_cost;
+      m68hc11_min_offset = 0;
+      m68hc11_max_offset = 256;
+      m68hc11_index_reg_class = NO_REGS;
+      m68hc11_base_reg_class = A_REGS;
+      m68hc11_reg_valid_for_base[HARD_X_REGNUM] = 1;
+      m68hc11_reg_valid_for_base[HARD_Y_REGNUM] = 1;
+      m68hc11_reg_valid_for_base[HARD_Z_REGNUM] = 1;
+      m68hc11_sp_correction = 1;
+      m68hc11_tmp_regs_class = D_REGS;
+      m68hc11_addr_mode = ADDR_OFFSET;
+      m68hc11_mov_addr_mode = 0;
+      if (m68hc11_soft_reg_count < 0)
+	m68hc11_soft_reg_count = 4;
+    }
+
+  /* Configure for a 68hc12 processor.  */
+  if (TARGET_M6812)
+    {
+      m68hc11_cost = &m6812_cost;
+      m68hc11_min_offset = -65536;
+      m68hc11_max_offset = 65536;
+      m68hc11_index_reg_class = D_REGS;
+      m68hc11_base_reg_class = A_OR_SP_REGS;
+      m68hc11_reg_valid_for_base[HARD_X_REGNUM] = 1;
+      m68hc11_reg_valid_for_base[HARD_Y_REGNUM] = 1;
+      m68hc11_reg_valid_for_base[HARD_Z_REGNUM] = 1;
+      m68hc11_reg_valid_for_base[HARD_SP_REGNUM] = 1;
+      m68hc11_reg_valid_for_index[HARD_D_REGNUM] = 1;
+      m68hc11_sp_correction = 0;
+      m68hc11_tmp_regs_class = TMP_REGS;
+      m68hc11_addr_mode = ADDR_INDIRECT | ADDR_OFFSET | ADDR_CONST
+        | (TARGET_AUTO_INC_DEC ? ADDR_INCDEC : 0);
+      m68hc11_mov_addr_mode = ADDR_OFFSET | ADDR_CONST
+        | (TARGET_AUTO_INC_DEC ? ADDR_INCDEC : 0);
+      target_flags |= MASK_NO_DIRECT_MODE;
+      if (m68hc11_soft_reg_count < 0)
+	m68hc11_soft_reg_count = 0;
+
+      if (TARGET_LONG_CALLS)
+        current_function_far = 1;
+    }
+}
+
+
+/* The soft-registers are disabled or enabled according to the
+  -msoft-reg-count=<n> option.  */
+
+static void
+m68hc11_conditional_register_usage (void)
+{
+  int i;
+
+  if (m68hc11_soft_reg_count > SOFT_REG_LAST - SOFT_REG_FIRST)
+    m68hc11_soft_reg_count = SOFT_REG_LAST - SOFT_REG_FIRST;
+
+  for (i = SOFT_REG_FIRST + m68hc11_soft_reg_count; i < SOFT_REG_LAST; i++)
+    {
+      fixed_regs[i] = 1;
+      call_used_regs[i] = 1;
+    }
+
+  /* For 68HC12, the Z register emulation is not necessary when the
+     frame pointer is not used.  The frame pointer is eliminated and
+     replaced by the stack register (which is a BASE_REG_CLASS).  */
+  if (TARGET_M6812 && flag_omit_frame_pointer && optimize)
+    {
+      fixed_regs[HARD_Z_REGNUM] = 1;
+    }
+}
+
+
+/* Reload and register operations.  */
+
+
+void
+create_regs_rtx (void)
+{
+  /*  regs_inited = 1; */
+  ix_reg = gen_rtx_REG (HImode, HARD_X_REGNUM);
+  iy_reg = gen_rtx_REG (HImode, HARD_Y_REGNUM);
+  d_reg = gen_rtx_REG (HImode, HARD_D_REGNUM);
+  m68hc11_soft_tmp_reg = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+
+  stack_push_word = gen_rtx_MEM (HImode,
+			     gen_rtx_PRE_DEC (HImode,
+				      gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+  stack_pop_word = gen_rtx_MEM (HImode,
+			    gen_rtx_POST_INC (HImode,
+				     gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+
+}
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+    - 8-bit values are stored anywhere (except the SP register).
+    - 16-bit values can be stored in any register whose mode is 16
+    - 32-bit values can be stored in D, X registers or in a soft register
+      (except the last one because we need 2 soft registers)
+    - Values whose size is > 32 bit are not stored in real hard
+      registers.  They may be stored in soft registers if there are
+      enough of them.  */
+int
+hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  switch (GET_MODE_SIZE (mode))
+    {
+    case 8:
+      return S_REGNO_P (regno) && m68hc11_soft_reg_count >= 4;
+
+    case 4:
+      return (X_REGNO_P (regno)
+	      || (S_REGNO_P (regno) && m68hc11_soft_reg_count >= 2));
+
+    case 2:
+      return G_REGNO_P (regno);
+
+    case 1:
+      /* We have to accept a QImode in X or Y registers.  Otherwise, the
+         reload pass will fail when some (SUBREG:QI (REG:HI X)) are defined
+         in the insns.  Reload fails if the insn rejects the register class 'a'
+         as well as if it accepts it.  Patterns that failed were
+         zero_extend_qihi2 and iorqi3.  */
+
+      return G_REGNO_P (regno) && !SP_REGNO_P (regno);
+
+    default:
+      return 0;
+    }
+}
+
+int
+m68hc11_hard_regno_rename_ok (int reg1, int reg2)
+{
+  /* Don't accept renaming to Z register.  We will replace it to
+     X,Y or D during machine reorg pass.  */
+  if (reg2 == HARD_Z_REGNUM)
+    return 0;
+
+  /* Don't accept renaming D,X to Y register as the code will be bigger.  */
+  if (TARGET_M6811 && reg2 == HARD_Y_REGNUM
+      && (D_REGNO_P (reg1) || X_REGNO_P (reg1)))
+    return 0;
+
+  return 1;
+}
+
+enum reg_class
+preferred_reload_class (rtx operand, enum reg_class rclass)
+{
+  enum machine_mode mode;
+
+  mode = GET_MODE (operand);
+
+  if (debug_m6811)
+    {
+      printf ("Preferred reload: (class=%s): ", reg_class_names[rclass]);
+    }
+
+  if (rclass == D_OR_A_OR_S_REGS && SP_REG_P (operand))
+    return m68hc11_base_reg_class;
+
+  if (rclass >= S_REGS && (GET_CODE (operand) == MEM
+			  || GET_CODE (operand) == CONST_INT))
+    {
+      /* S_REGS class must not be used.  The movhi template does not
+         work to move a memory to a soft register.
+         Restrict to a hard reg.  */
+      switch (rclass)
+	{
+	default:
+	case G_REGS:
+	case D_OR_A_OR_S_REGS:
+	  rclass = A_OR_D_REGS;
+	  break;
+	case A_OR_S_REGS:
+	  rclass = A_REGS;
+	  break;
+	case D_OR_SP_OR_S_REGS:
+	  rclass = D_OR_SP_REGS;
+	  break;
+	case D_OR_Y_OR_S_REGS:
+	  rclass = D_OR_Y_REGS;
+	  break;
+	case D_OR_X_OR_S_REGS:
+	  rclass = D_OR_X_REGS;
+	  break;
+	case SP_OR_S_REGS:
+	  rclass = SP_REGS;
+	  break;
+	case Y_OR_S_REGS:
+	  rclass = Y_REGS;
+	  break;
+	case X_OR_S_REGS:
+	  rclass = X_REGS;
+	  break;
+	case D_OR_S_REGS:
+	  rclass = D_REGS;
+	}
+    }
+  else if (rclass == Y_REGS && GET_CODE (operand) == MEM)
+    {
+      rclass = Y_REGS;
+    }
+  else if (rclass == A_OR_D_REGS && GET_MODE_SIZE (mode) == 4)
+    {
+      rclass = D_OR_X_REGS;
+    }
+  else if (rclass >= S_REGS && S_REG_P (operand))
+    {
+      switch (rclass)
+	{
+	default:
+	case G_REGS:
+	case D_OR_A_OR_S_REGS:
+	  rclass = A_OR_D_REGS;
+	  break;
+	case A_OR_S_REGS:
+	  rclass = A_REGS;
+	  break;
+	case D_OR_SP_OR_S_REGS:
+	  rclass = D_OR_SP_REGS;
+	  break;
+	case D_OR_Y_OR_S_REGS:
+	  rclass = D_OR_Y_REGS;
+	  break;
+	case D_OR_X_OR_S_REGS:
+	  rclass = D_OR_X_REGS;
+	  break;
+	case SP_OR_S_REGS:
+	  rclass = SP_REGS;
+	  break;
+	case Y_OR_S_REGS:
+	  rclass = Y_REGS;
+	  break;
+	case X_OR_S_REGS:
+	  rclass = X_REGS;
+	  break;
+	case D_OR_S_REGS:
+	  rclass = D_REGS;
+	}
+    }
+  else if (rclass >= S_REGS)
+    {
+      if (debug_m6811)
+	{
+	  printf ("Class = %s for: ", reg_class_names[rclass]);
+	  fflush (stdout);
+	  debug_rtx (operand);
+	}
+    }
+
+  if (debug_m6811)
+    {
+      printf (" => class=%s\n", reg_class_names[rclass]);
+      fflush (stdout);
+      debug_rtx (operand);
+    }
+
+  return rclass;
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
+
+static bool
+m68hc11_class_likely_spilled_p (reg_class_t rclass)
+{
+  switch (rclass)
+    {
+    case D_REGS:
+    case X_REGS:
+    case Y_REGS:
+    case A_REGS:
+    case SP_REGS:
+    case D_OR_X_REGS:
+    case D_OR_Y_REGS:
+    case X_OR_SP_REGS:
+    case Y_OR_SP_REGS:
+    case D_OR_SP_REGS:
+      return true;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
+/* Return 1 if the operand is a valid indexed addressing mode.
+   For 68hc11:  n,r    with n in [0..255] and r in A_REGS class
+   For 68hc12:  n,r    no constraint on the constant, r in A_REGS class.  */
+int
+m68hc11_valid_addressing_p (rtx operand, enum machine_mode mode, int addr_mode)
+{
+  rtx base, offset;
+
+  switch (GET_CODE (operand))
+    {
+    case MEM:
+      if ((addr_mode & ADDR_INDIRECT) && GET_MODE_SIZE (mode) <= 2)
+        return m68hc11_valid_addressing_p (XEXP (operand, 0), mode,
+                                   addr_mode & (ADDR_STRICT | ADDR_OFFSET));
+      return 0;
+
+    case POST_INC:
+    case PRE_INC:
+    case POST_DEC:
+    case PRE_DEC:
+      if (addr_mode & ADDR_INCDEC)
+	return m68hc11_valid_addressing_p (XEXP (operand, 0), mode,
+                                   addr_mode & ADDR_STRICT);
+      return 0;
+
+    case PLUS:
+      base = XEXP (operand, 0);
+      if (GET_CODE (base) == MEM)
+	return 0;
+
+      offset = XEXP (operand, 1);
+      if (GET_CODE (offset) == MEM)
+	return 0;
+
+      /* Indexed addressing mode with 2 registers.  */
+      if (GET_CODE (base) == REG && GET_CODE (offset) == REG)
+        {
+          if (!(addr_mode & ADDR_INDEXED))
+            return 0;
+
+          addr_mode &= ADDR_STRICT;
+          if (REGNO_OK_FOR_BASE_P2 (REGNO (base), addr_mode)
+              && REGNO_OK_FOR_INDEX_P2 (REGNO (offset), addr_mode))
+            return 1;
+
+          if (REGNO_OK_FOR_BASE_P2 (REGNO (offset), addr_mode)
+              && REGNO_OK_FOR_INDEX_P2 (REGNO (base), addr_mode))
+            return 1;
+
+          return 0;
+        }
+
+      if (!(addr_mode & ADDR_OFFSET))
+        return 0;
+
+      if (GET_CODE (base) == REG)
+	{
+          if (!VALID_CONSTANT_OFFSET_P (offset, mode))
+	    return 0;
+
+	  if (!(addr_mode & ADDR_STRICT))
+	    return 1;
+
+	  return REGNO_OK_FOR_BASE_P2 (REGNO (base), 1);
+	}
+
+      if (GET_CODE (offset) == REG)
+	{
+	  if (!VALID_CONSTANT_OFFSET_P (base, mode))
+	    return 0;
+
+	  if (!(addr_mode & ADDR_STRICT))
+	    return 1;
+
+	  return REGNO_OK_FOR_BASE_P2 (REGNO (offset), 1);
+	}
+      return 0;
+
+    case REG:
+      return REGNO_OK_FOR_BASE_P2 (REGNO (operand), addr_mode & ADDR_STRICT);
+
+    case CONST_INT:
+      if (addr_mode & ADDR_CONST)
+        return VALID_CONSTANT_OFFSET_P (operand, mode);
+      return 0;
+
+    default:
+      return 0;
+    }
+}
+
+/* Returns 1 if the operand fits in a 68HC11 indirect mode or in
+   a 68HC12 1-byte index addressing mode.  */
+int
+m68hc11_small_indexed_indirect_p (rtx operand, enum machine_mode mode)
+{
+  rtx base, offset;
+  int addr_mode;
+
+  if (GET_CODE (operand) == REG && reload_in_progress
+      && REGNO (operand) >= FIRST_PSEUDO_REGISTER
+      && reg_equiv_memory_loc[REGNO (operand)])
+    {
+      operand = reg_equiv_memory_loc[REGNO (operand)];
+      operand = eliminate_regs (operand, VOIDmode, NULL_RTX);
+    }
+
+  if (GET_CODE (operand) != MEM)
+    return 0;
+
+  operand = XEXP (operand, 0);
+  if (CONSTANT_ADDRESS_P (operand))
+    return 1;
+
+  if (PUSH_POP_ADDRESS_P (operand))
+    return 1;
+
+  addr_mode = m68hc11_mov_addr_mode | (reload_completed ? ADDR_STRICT : 0);
+  if (!m68hc11_valid_addressing_p (operand, mode, addr_mode))
+    return 0;
+
+  if (TARGET_M6812 && GET_CODE (operand) == PLUS
+      && (reload_completed | reload_in_progress))
+    {
+      base = XEXP (operand, 0);
+      offset = XEXP (operand, 1);
+
+      /* The offset can be a symbol address and this is too big
+         for the operand constraint.  */
+      if (GET_CODE (base) != CONST_INT && GET_CODE (offset) != CONST_INT)
+        return 0;
+
+      if (GET_CODE (base) == CONST_INT)
+	offset = base;
+
+      switch (GET_MODE_SIZE (mode))
+	{
+	case 8:
+	  if (INTVAL (offset) < -16 + 6 || INTVAL (offset) > 15 - 6)
+	    return 0;
+	  break;
+
+	case 4:
+	  if (INTVAL (offset) < -16 + 2 || INTVAL (offset) > 15 - 2)
+	    return 0;
+	  break;
+
+	default:
+	  if (INTVAL (offset) < -16 || INTVAL (offset) > 15)
+	    return 0;
+	  break;
+	}
+    }
+  return 1;
+}
+
+int
+m68hc11_register_indirect_p (rtx operand, enum machine_mode mode)
+{
+  int addr_mode;
+
+  if (GET_CODE (operand) == REG && reload_in_progress
+      && REGNO (operand) >= FIRST_PSEUDO_REGISTER
+      && reg_equiv_memory_loc[REGNO (operand)])
+    {
+      operand = reg_equiv_memory_loc[REGNO (operand)];
+      operand = eliminate_regs (operand, VOIDmode, NULL_RTX);
+    }
+  if (GET_CODE (operand) != MEM)
+    return 0;
+
+  operand = XEXP (operand, 0);
+  addr_mode = m68hc11_addr_mode | (reload_completed ? ADDR_STRICT : 0);
+  return m68hc11_valid_addressing_p (operand, mode, addr_mode);
+}
+
+static bool
+m68hc11_legitimate_address_p_1  (enum machine_mode mode, rtx operand,
+                                 bool strict)
+{
+  int addr_mode;
+
+  if (CONSTANT_ADDRESS_P (operand) && TARGET_M6812)
+    {
+      /* Reject the global variables if they are too wide.  This forces
+         a load of their address in a register and generates smaller code.  */
+      if (GET_MODE_SIZE (mode) == 8)
+	return 0;
+
+      return 1;
+    }
+  addr_mode = m68hc11_addr_mode | (strict ? ADDR_STRICT : 0);
+  if (m68hc11_valid_addressing_p (operand, mode, addr_mode))
+    {
+      return 1;
+    }
+  if (PUSH_POP_ADDRESS_P (operand))
+    {
+      return 1;
+    }
+  if (symbolic_memory_operand (operand, mode))
+    {
+      return 1;
+    }
+  return 0;
+}
+
+bool
+m68hc11_legitimate_address_p (enum machine_mode mode, rtx operand,
+                              bool strict)
+{
+  int result;
+
+  if (debug_m6811)
+    {
+      printf ("Checking: ");
+      fflush (stdout);
+      debug_rtx (operand);
+    }
+
+  result = m68hc11_legitimate_address_p_1 (mode, operand, strict);
+
+  if (debug_m6811)
+    {
+      printf (" -> %s\n", result == 0 ? "NO" : "YES");
+    }
+
+  if (result == 0)
+    {
+      if (debug_m6811)
+	{
+	  printf ("go_if_legitimate%s, ret 0: %d:",
+		  (strict ? "_strict" : ""), mode);
+	  fflush (stdout);
+	  debug_rtx (operand);
+	}
+    }
+  return result;
+}
+
+
+int
+m68hc11_reload_operands (rtx operands[])
+{
+  enum machine_mode mode;
+
+  if (regs_inited == 0)
+    create_regs_rtx ();
+
+  mode = GET_MODE (operands[1]);
+
+  /* Input reload of indirect addressing (MEM (PLUS (REG) (CONST))).  */
+  if (A_REG_P (operands[0]) && memory_reload_operand (operands[1], mode))
+    {
+      rtx big_offset = XEXP (XEXP (operands[1], 0), 1);
+      rtx base = XEXP (XEXP (operands[1], 0), 0);
+
+      if (GET_CODE (base) != REG)
+	{
+	  rtx tmp = base;
+	  base = big_offset;
+	  big_offset = tmp;
+	}
+
+      /* If the offset is out of range, we have to compute the address
+         with a separate add instruction.  We try to do this with an 8-bit
+         add on the A register.  This is possible only if the lowest part
+         of the offset (i.e., big_offset % 256) is a valid constant offset
+         with respect to the mode.  If it's not, we have to generate a
+         16-bit add on the D register.  From:
+       
+         (SET (REG X (MEM (PLUS (REG X) (CONST_INT 1000)))))
+       
+         we generate:
+        
+         [(SET (REG D) (REG X)) (SET (REG X) (REG D))]
+         (SET (REG A) (PLUS (REG A) (CONST_INT 1000 / 256)))
+         [(SET (REG D) (REG X)) (SET (REG X) (REG D))]
+         (SET (REG X) (MEM (PLUS (REG X) (CONST_INT 1000 % 256)))
+       
+         (SET (REG X) (PLUS (REG X) (CONST_INT 1000 / 256 * 256)))
+         (SET (REG X) (MEM (PLUS (REG X) (CONST_INT 1000 % 256)))) 
+
+      */
+      if (!VALID_CONSTANT_OFFSET_P (big_offset, mode))
+	{
+	  int vh, vl;
+	  rtx reg = operands[0];
+	  rtx offset;
+	  int val = INTVAL (big_offset);
+
+
+	  /* We use the 'operands[0]' as a scratch register to compute the
+	     address. Make sure 'base' is in that register.  */
+	  if (!rtx_equal_p (base, operands[0]))
+	    {
+	      emit_move_insn (reg, base);
+	    }
+
+	  if (val > 0)
+	    {
+	      vh = val >> 8;
+	      vl = val & 0x0FF;
+	    }
+	  else
+	    {
+	      vh = (val >> 8) & 0x0FF;
+	      vl = val & 0x0FF;
+	    }
+
+	  /* Create the lowest part offset that still remains to be added.
+	     If it's not a valid offset, do a 16-bit add.  */
+	  offset = GEN_INT (vl);
+	  if (!VALID_CONSTANT_OFFSET_P (offset, mode))
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_PLUS (HImode, reg, big_offset)));
+	      offset = const0_rtx;
+	    }
+	  else
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, reg,
+				  gen_rtx_PLUS (HImode, reg,
+					   GEN_INT (vh << 8))));
+	    }
+	  emit_move_insn (operands[0],
+			  gen_rtx_MEM (GET_MODE (operands[1]),
+				   gen_rtx_PLUS (Pmode, reg, offset)));
+	  return 1;
+	}
+    }
+
+  /* Use the normal gen_movhi pattern.  */
+  return 0;
+}
+
+void
+m68hc11_emit_libcall (const char *name, enum rtx_code code,
+                      enum machine_mode dmode, enum machine_mode smode,
+                      int noperands, rtx *operands)
+{
+  rtx ret;
+  rtx insns;
+  rtx libcall;
+  rtx equiv;
+
+  start_sequence ();
+  libcall = gen_rtx_SYMBOL_REF (Pmode, name);
+  switch (noperands)
+    {
+    case 2:
+      ret = emit_library_call_value (libcall, NULL_RTX, LCT_CONST,
+                                     dmode, 1, operands[1], smode);
+      equiv = gen_rtx_fmt_e (code, dmode, operands[1]);
+      break;
+
+    case 3:
+      ret = emit_library_call_value (libcall, NULL_RTX,
+                                     LCT_CONST, dmode, 2,
+                                     operands[1], smode, operands[2],
+                                     smode);
+      equiv = gen_rtx_fmt_ee (code, dmode, operands[1], operands[2]);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  insns = get_insns ();
+  end_sequence ();
+  emit_libcall_block (insns, operands[0], ret, equiv);
+}
+
+/* Returns true if X is a PRE/POST increment decrement
+   (same as auto_inc_p() in rtlanal.c but do not take into
+   account the stack).  */
+int
+m68hc11_auto_inc_p (rtx x)
+{
+  return GET_CODE (x) == PRE_DEC
+    || GET_CODE (x) == POST_INC
+    || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_INC;
+}
+
+
+/* Predicates for machine description.  */
+
+int
+memory_reload_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (operand) == MEM
+    && GET_CODE (XEXP (operand, 0)) == PLUS
+    && ((GET_CODE (XEXP (XEXP (operand, 0), 0)) == REG
+	 && GET_CODE (XEXP (XEXP (operand, 0), 1)) == CONST_INT)
+	|| (GET_CODE (XEXP (XEXP (operand, 0), 1)) == REG
+	    && GET_CODE (XEXP (XEXP (operand, 0), 0)) == CONST_INT));
+}
+
+int
+m68hc11_symbolic_p (rtx operand, enum machine_mode mode)
+{
+  if (GET_CODE (operand) == MEM)
+    {
+      rtx op = XEXP (operand, 0);
+
+      if (symbolic_memory_operand (op, mode))
+	return 1;
+    }
+  return 0;
+}
+
+int
+m68hc11_indirect_p (rtx operand, enum machine_mode mode)
+{
+  if (GET_CODE (operand) == MEM && GET_MODE (operand) == mode)
+    {
+      rtx op = XEXP (operand, 0);
+      int addr_mode;
+
+      if (m68hc11_page0_symbol_p (op))
+        return 1;
+
+      if (symbolic_memory_operand (op, mode))
+	return TARGET_M6812;
+
+      if (reload_in_progress)
+        return 1;
+
+      operand = XEXP (operand, 0);
+      addr_mode = m68hc11_addr_mode | (reload_completed ? ADDR_STRICT : 0);
+      return m68hc11_valid_addressing_p (operand, mode, addr_mode);
+    }
+  return 0;
+}
+
+int
+memory_indexed_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (operand) != MEM)
+    return 0;
+
+  operand = XEXP (operand, 0);
+  if (GET_CODE (operand) == PLUS)
+    {
+      if (GET_CODE (XEXP (operand, 0)) == REG)
+	operand = XEXP (operand, 0);
+      else if (GET_CODE (XEXP (operand, 1)) == REG)
+	operand = XEXP (operand, 1);
+    }
+  return GET_CODE (operand) == REG
+    && (REGNO (operand) >= FIRST_PSEUDO_REGISTER
+	|| A_REGNO_P (REGNO (operand)));
+}
+
+int
+push_pop_operand_p (rtx operand)
+{
+  if (GET_CODE (operand) != MEM)
+    {
+      return 0;
+    }
+  operand = XEXP (operand, 0);
+  return PUSH_POP_ADDRESS_P (operand);
+}
+
+/* Returns 1 if OP is either a symbol reference or a sum of a symbol
+   reference and a constant.  */
+
+int
+symbolic_memory_operand (rtx op, enum machine_mode mode)
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+
+    case CONST:
+      op = XEXP (op, 0);
+      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+
+      /* ??? This clause seems to be irrelevant.  */
+    case CONST_DOUBLE:
+      return GET_MODE (op) == mode;
+
+    case PLUS:
+      return symbolic_memory_operand (XEXP (op, 0), mode)
+	&& symbolic_memory_operand (XEXP (op, 1), mode);
+
+    default:
+      return 0;
+    }
+}
+
+/* Emit the code to build the trampoline used to call a nested function.
+   
+   68HC11               68HC12
+
+   ldy #&CXT            movw #&CXT,*_.d1
+   sty *_.d1            jmp FNADDR
+   jmp FNADDR
+
+*/
+static void
+m68hc11_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  const char *static_chain_reg = reg_names[STATIC_CHAIN_REGNUM];
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  /* Skip the '*'.  */
+  if (*static_chain_reg == '*')
+    static_chain_reg++;
+  if (TARGET_M6811)
+    {
+      mem = adjust_address (m_tramp, HImode, 0);
+      emit_move_insn (mem, GEN_INT (0x18ce));
+      mem = adjust_address (m_tramp, HImode, 2);
+      emit_move_insn (mem, cxt);
+      mem = adjust_address (m_tramp, HImode, 4);
+      emit_move_insn (mem, GEN_INT (0x18df));
+      mem = adjust_address (m_tramp, QImode, 6);
+      emit_move_insn (mem,
+                      gen_rtx_CONST (QImode,
+                                     gen_rtx_SYMBOL_REF (Pmode,
+                                                         static_chain_reg)));
+      mem = adjust_address (m_tramp, QImode, 7);
+      emit_move_insn (mem, GEN_INT (0x7e));
+      mem = adjust_address (m_tramp, HImode, 8);
+      emit_move_insn (mem, fnaddr);
+    }
+  else
+    {
+      mem = adjust_address (m_tramp, HImode, 0);
+      emit_move_insn (mem, GEN_INT (0x1803));
+      mem = adjust_address (m_tramp, HImode, 2);
+      emit_move_insn (mem, cxt);
+      mem = adjust_address (m_tramp, HImode, 4);
+      emit_move_insn (mem,
+                      gen_rtx_CONST (HImode,
+                                     gen_rtx_SYMBOL_REF (Pmode,
+                                                         static_chain_reg)));
+      mem = adjust_address (m_tramp, QImode, 6);
+      emit_move_insn (mem, GEN_INT (0x06));
+      mem = adjust_address (m_tramp, HImode, 7);
+      emit_move_insn (mem, fnaddr);
+    }
+}
+
+/* Declaration of types.  */
+
+/* Handle an "tiny_data" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+m68hc11_handle_page0_attribute (tree *node, tree name,
+                                tree args ATTRIBUTE_UNUSED,
+                                int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  tree decl = *node;
+
+  if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+    {
+      DECL_SECTION_NAME (decl) = build_string (6, ".page0");
+    }
+  else
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Keep track of the symbol which has a `trap' attribute and which uses
+   the `swi' calling convention.  Since there is only one trap, we only
+   record one such symbol.  If there are several, a warning is reported.  */
+static rtx trap_handler_symbol = 0;
+
+/* Handle an attribute requiring a FUNCTION_TYPE, FIELD_DECL or TYPE_DECL;
+   arguments as in struct attribute_spec.handler.  */
+static tree
+m68hc11_handle_fntype_attribute (tree *node, tree name,
+                                 tree args ATTRIBUTE_UNUSED,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+/* Undo the effects of the above.  */
+
+static const char *
+m68hc11_strip_name_encoding (const char *str)
+{
+  return str + (*str == '*' || *str == '@' || *str == '&');
+}
+
+static void
+m68hc11_encode_label (tree decl)
+{
+  const char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  int len = strlen (str);
+  char *newstr = XALLOCAVEC (char, len + 2);
+
+  newstr[0] = '@';
+  strcpy (&newstr[1], str);
+
+  XSTR (XEXP (DECL_RTL (decl), 0), 0) = ggc_alloc_string (newstr, len + 1);
+}
+
+/* Return 1 if this is a symbol in page0  */
+int
+m68hc11_page0_symbol_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+      return XSTR (x, 0) != 0 && XSTR (x, 0)[0] == '@';
+
+    case CONST:
+      return m68hc11_page0_symbol_p (XEXP (x, 0));
+
+    case PLUS:
+      if (!m68hc11_page0_symbol_p (XEXP (x, 0)))
+        return 0;
+
+      return GET_CODE (XEXP (x, 1)) == CONST_INT
+        && INTVAL (XEXP (x, 1)) < 256
+        && INTVAL (XEXP (x, 1)) >= 0;
+
+    default:
+      return 0;
+    }
+}
+
+/* We want to recognize trap handlers so that we handle calls to traps
+   in a special manner (by issuing the trap).  This information is stored
+   in SYMBOL_REF_FLAG.  */
+
+static void
+m68hc11_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED)
+{
+  tree func_attr;
+  int trap_handler;
+  int is_far = 0;
+  
+  if (TREE_CODE (decl) == VAR_DECL)
+    {
+      if (lookup_attribute ("page0", DECL_ATTRIBUTES (decl)) != 0)
+        m68hc11_encode_label (decl);
+      return;
+    }
+
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    return;
+
+  func_attr = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+
+
+  if (lookup_attribute ("far", func_attr) != NULL_TREE)
+    is_far = 1;
+  else if (lookup_attribute ("near", func_attr) == NULL_TREE)
+    is_far = TARGET_LONG_CALLS != 0;
+
+  trap_handler = lookup_attribute ("trap", func_attr) != NULL_TREE;
+  if (trap_handler && is_far)
+    {
+      warning (OPT_Wattributes, "%<trap%> and %<far%> attributes are "
+	       "not compatible, ignoring %<far%>");
+      trap_handler = 0;
+    }
+  if (trap_handler)
+    {
+      if (trap_handler_symbol != 0)
+        warning (OPT_Wattributes, "%<trap%> attribute is already used");
+      else
+        trap_handler_symbol = XEXP (rtl, 0);
+    }
+  SYMBOL_REF_FLAG (XEXP (rtl, 0)) = is_far;
+}
+
+static unsigned int
+m68hc11_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (strncmp (name, ".eeprom", 7) == 0)
+    {
+      flags |= SECTION_WRITE | SECTION_CODE | SECTION_OVERRIDE;
+    }
+
+  return flags;
+}
+
+int
+m68hc11_is_far_symbol (rtx sym)
+{
+  if (GET_CODE (sym) == MEM)
+    sym = XEXP (sym, 0);
+
+  return SYMBOL_REF_FLAG (sym);
+}
+
+int
+m68hc11_is_trap_symbol (rtx sym)
+{
+  if (GET_CODE (sym) == MEM)
+    sym = XEXP (sym, 0);
+
+  return trap_handler_symbol != 0 && rtx_equal_p (trap_handler_symbol, sym);
+}
+
+
+/* Argument support functions.  */
+
+/* Given FROM and TO register numbers, say whether this elimination is
+   allowed. Frame pointer elimination is automatically handled.
+
+   All other eliminations are valid.  */
+
+bool
+m68hc11_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* Define the offset between two registers, one to be eliminated, and the
+   other its replacement, at the start of a routine.  */
+int
+m68hc11_initial_elimination_offset (int from, int to)
+{
+  int trap_handler;
+  tree func_attr;
+  int size;
+  int regno;
+
+  /* For a trap handler, we must take into account the registers which
+     are pushed on the stack during the trap (except the PC).  */
+  func_attr = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  current_function_interrupt = lookup_attribute ("interrupt",
+						 func_attr) != NULL_TREE;
+  trap_handler = lookup_attribute ("trap", func_attr) != NULL_TREE;
+
+  if (lookup_attribute ("far", func_attr) != 0)
+    current_function_far = 1;
+  else if (lookup_attribute ("near", func_attr) != 0)
+    current_function_far = 0;
+  else
+    current_function_far = (TARGET_LONG_CALLS != 0
+                            && !current_function_interrupt
+                            && !trap_handler);
+
+  if (trap_handler && from == ARG_POINTER_REGNUM)
+    size = 7;
+
+  /* For a function using 'call/rtc' we must take into account the
+     page register which is pushed in the call.  */
+  else if (current_function_far && from == ARG_POINTER_REGNUM)
+    size = 1;
+  else
+    size = 0;
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    {
+      /* 2 is for the saved frame.
+         1 is for the 'sts' correction when creating the frame.  */
+      return get_frame_size () + 2 + m68hc11_sp_correction + size;
+    }
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    {
+      return m68hc11_sp_correction;
+    }
+
+  /* Push any 2 byte pseudo hard registers that we need to save.  */
+  for (regno = SOFT_REG_FIRST; regno < SOFT_REG_LAST; regno++)
+    {
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  size += 2;
+	}
+    }
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_SP_REGNUM)
+    {
+      return get_frame_size () + size;
+    }
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_SP_REGNUM)
+    {
+      return size;
+    }
+  return 0;
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+void
+m68hc11_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname)
+{
+  tree ret_type;
+
+  z_replacement_completed = 0;
+  cum->words = 0;
+  cum->nregs = 0;
+
+  /* For a library call, we must find out the type of the return value.
+     When the return value is bigger than 4 bytes, it is returned in
+     memory.  In that case, the first argument of the library call is a
+     pointer to the memory location.  Because the first argument is passed in
+     register D, we have to identify this, so that the first function
+     parameter is not passed in D either.  */
+  if (fntype == 0)
+    {
+      const char *name;
+      size_t len;
+
+      if (libname == 0 || GET_CODE (libname) != SYMBOL_REF)
+	return;
+
+      /* If the library ends in 'di' or in 'df', we assume it's
+         returning some DImode or some DFmode which are 64-bit wide.  */
+      name = XSTR (libname, 0);
+      len = strlen (name);
+      if (len > 3
+	  && ((name[len - 2] == 'd'
+	       && (name[len - 1] == 'f' || name[len - 1] == 'i'))
+	      || (name[len - 3] == 'd'
+		  && (name[len - 2] == 'i' || name[len - 2] == 'f'))))
+	{
+	  /* We are in.  Mark the first parameter register as already used.  */
+	  cum->words = 1;
+	  cum->nregs = 1;
+	}
+      return;
+    }
+
+  ret_type = TREE_TYPE (fntype);
+
+  if (ret_type && aggregate_value_p (ret_type, fntype))
+    {
+      cum->words = 1;
+      cum->nregs = 1;
+    }
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+m68hc11_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                              const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  if (mode != BLKmode)
+    {
+      if (cum->words == 0 && GET_MODE_SIZE (mode) == 4)
+	{
+	  cum->nregs = 2;
+	  cum->words = GET_MODE_SIZE (mode);
+	}
+      else
+	{
+	  cum->words += GET_MODE_SIZE (mode);
+	  if (cum->words <= HARD_REG_SIZE)
+	    cum->nregs = 1;
+	}
+    }
+  else
+    {
+      cum->words += int_size_in_bytes (type);
+    }
+  return;
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+m68hc11_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                      const_tree type ATTRIBUTE_UNUSED,
+		      bool named ATTRIBUTE_UNUSED)
+{
+  if (cum->words != 0)
+    {
+      return NULL_RTX;
+    }
+
+  if (mode != BLKmode)
+    {
+      if (GET_MODE_SIZE (mode) == 2 * HARD_REG_SIZE)
+	return gen_rtx_REG (mode, HARD_X_REGNUM);
+
+      if (GET_MODE_SIZE (mode) > HARD_REG_SIZE)
+	{
+	  return NULL_RTX;
+	}
+      return gen_rtx_REG (mode, HARD_D_REGNUM);
+    }
+  return NULL_RTX;
+}
+
+/* If defined, a C expression which determines whether, and in which direction,
+   to pad out an argument with extra space.  The value should be of type
+   `enum direction': either `upward' to pad above the argument,
+   `downward' to pad below, or `none' to inhibit padding.
+
+   Structures are stored left shifted in their argument slot.  */
+enum direction
+m68hc11_function_arg_padding (enum machine_mode mode, const_tree type)
+{
+  if (type != 0 && AGGREGATE_TYPE_P (type))
+    return upward;
+
+  /* Fall back to the default.  */
+  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+
+/* Function prologue and epilogue.  */
+
+/* Emit a move after the reload pass has completed.  This is used to
+   emit the prologue and epilogue.  */
+static void
+emit_move_after_reload (rtx to, rtx from, rtx scratch)
+{
+  rtx insn;
+
+  if (TARGET_M6812 || H_REG_P (to) || H_REG_P (from))
+    {
+      insn = emit_move_insn (to, from);
+    }
+  else
+    {
+      emit_move_insn (scratch, from);
+      insn = emit_move_insn (to, scratch);
+    }
+
+  /* Put a REG_INC note to tell the flow analysis that the instruction
+     is necessary.  */
+  if (IS_STACK_PUSH (to))
+    add_reg_note (insn, REG_INC, XEXP (XEXP (to, 0), 0));
+  else if (IS_STACK_POP (from))
+    add_reg_note (insn, REG_INC, XEXP (XEXP (from, 0), 0));
+
+  /* For 68HC11, put a REG_INC note on `sts _.frame' to prevent the cse-reg
+     to think that sp == _.frame and later replace a x = sp with x = _.frame.
+     The problem is that we are lying to gcc and use `txs' for x = sp
+     (which is not really true because txs is really x = sp + 1).  */
+  else if (TARGET_M6811 && SP_REG_P (from))
+    add_reg_note (insn, REG_INC, from);
+}
+
+int
+m68hc11_total_frame_size (void)
+{
+  int size;
+  int regno;
+
+  size = get_frame_size ();
+  if (current_function_interrupt)
+    {
+      size += 3 * HARD_REG_SIZE;
+    }
+  if (frame_pointer_needed)
+    size += HARD_REG_SIZE;
+
+  for (regno = SOFT_REG_FIRST; regno <= SOFT_REG_LAST; regno++)
+    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+      size += HARD_REG_SIZE;
+
+  return size;
+}
+
+static void
+m68hc11_output_function_epilogue (FILE *out ATTRIBUTE_UNUSED,
+                                  HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* We catch the function epilogue generation to have a chance
+     to clear the z_replacement_completed flag.  */
+  z_replacement_completed = 0;
+}
+
+void
+expand_prologue (void)
+{
+  tree func_attr;
+  int size;
+  int regno;
+  rtx scratch;
+
+  gcc_assert (reload_completed == 1);
+
+  size = get_frame_size ();
+
+  create_regs_rtx ();
+
+  /* Generate specific prologue for interrupt handlers.  */
+  func_attr = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  current_function_interrupt = lookup_attribute ("interrupt",
+						 func_attr) != NULL_TREE;
+  current_function_trap = lookup_attribute ("trap", func_attr) != NULL_TREE;
+  if (lookup_attribute ("far", func_attr) != NULL_TREE)
+    current_function_far = 1;
+  else if (lookup_attribute ("near", func_attr) != NULL_TREE)
+    current_function_far = 0;
+  else
+    current_function_far = (TARGET_LONG_CALLS != 0
+                            && !current_function_interrupt
+                            && !current_function_trap);
+
+  /* Get the scratch register to build the frame and push registers.
+     If the first argument is a 32-bit quantity, the D+X registers
+     are used.  Use Y to compute the frame.  Otherwise, X is cheaper.
+     For 68HC12, this scratch register is not used.  */
+  if (crtl->args.info.nregs == 2)
+    scratch = iy_reg;
+  else
+    scratch = ix_reg;
+
+  /* Save current stack frame.  */
+  if (frame_pointer_needed)
+    emit_move_after_reload (stack_push_word, hard_frame_pointer_rtx, scratch);
+
+  /* For an interrupt handler, we must preserve _.tmp, _.z and _.xy.
+     Other soft registers in page0 need not to be saved because they
+     will be restored by C functions.  For a trap handler, we don't
+     need to preserve these registers because this is a synchronous call.  */
+  if (current_function_interrupt)
+    {
+      emit_move_after_reload (stack_push_word, m68hc11_soft_tmp_reg, scratch);
+      emit_move_after_reload (stack_push_word,
+			      gen_rtx_REG (HImode, SOFT_Z_REGNUM), scratch);
+      emit_move_after_reload (stack_push_word,
+			      gen_rtx_REG (HImode, SOFT_SAVED_XY_REGNUM),
+			      scratch);
+    }
+
+  /* Allocate local variables.  */
+  if (TARGET_M6812 && (size > 4 || size == 3))
+    {
+      emit_insn (gen_addhi3 (stack_pointer_rtx,
+			     stack_pointer_rtx, GEN_INT (-size)));
+    }
+  else if ((!optimize_size && size > 8) || (optimize_size && size > 10))
+    {
+      rtx insn;
+
+      insn = gen_rtx_PARALLEL
+	(VOIDmode,
+	 gen_rtvec (2,
+		    gen_rtx_SET (VOIDmode,
+				 stack_pointer_rtx,
+				 gen_rtx_PLUS (HImode,
+					       stack_pointer_rtx,
+					       GEN_INT (-size))),
+		    gen_rtx_CLOBBER (VOIDmode, scratch)));
+      emit_insn (insn);
+    }
+  else
+    {
+      int i;
+
+      /* Allocate by pushing scratch values.  */
+      for (i = 2; i <= size; i += 2)
+	emit_move_after_reload (stack_push_word, ix_reg, 0);
+
+      if (size & 1)
+	emit_insn (gen_addhi3 (stack_pointer_rtx,
+			       stack_pointer_rtx, constm1_rtx));
+    }
+
+  /* Create the frame pointer.  */
+  if (frame_pointer_needed)
+    emit_move_after_reload (hard_frame_pointer_rtx,
+			    stack_pointer_rtx, scratch);
+
+  /* Push any 2 byte pseudo hard registers that we need to save.  */
+  for (regno = SOFT_REG_FIRST; regno <= SOFT_REG_LAST; regno++)
+    {
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  emit_move_after_reload (stack_push_word,
+				  gen_rtx_REG (HImode, regno), scratch);
+	}
+    }
+}
+
+void
+expand_epilogue (void)
+{
+  int size;
+  register int regno;
+  int return_size;
+  rtx scratch;
+
+  gcc_assert (reload_completed == 1);
+
+  size = get_frame_size ();
+
+  /* If we are returning a value in two registers, we have to preserve the
+     X register and use the Y register to restore the stack and the saved
+     registers.  Otherwise, use X because it's faster (and smaller).  */
+  if (crtl->return_rtx == 0)
+    return_size = 0;
+  else if (GET_CODE (crtl->return_rtx) == MEM)
+    return_size = HARD_REG_SIZE;
+  else
+    return_size = GET_MODE_SIZE (GET_MODE (crtl->return_rtx));
+
+  if (return_size > HARD_REG_SIZE && return_size <= 2 * HARD_REG_SIZE)
+    scratch = iy_reg;
+  else
+    scratch = ix_reg;
+
+  /* Pop any 2 byte pseudo hard registers that we saved.  */
+  for (regno = SOFT_REG_LAST; regno >= SOFT_REG_FIRST; regno--)
+    {
+      if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  emit_move_after_reload (gen_rtx_REG (HImode, regno),
+				  stack_pop_word, scratch);
+	}
+    }
+
+  /* de-allocate auto variables */
+  if (TARGET_M6812 && (size > 4 || size == 3))
+    {
+      emit_insn (gen_addhi3 (stack_pointer_rtx,
+			     stack_pointer_rtx, GEN_INT (size)));
+    }
+  else if ((!optimize_size && size > 8) || (optimize_size && size > 10))
+    {
+      rtx insn;
+
+      insn = gen_rtx_PARALLEL
+	(VOIDmode,
+	 gen_rtvec (2,
+		    gen_rtx_SET (VOIDmode,
+				 stack_pointer_rtx,
+				 gen_rtx_PLUS (HImode,
+					       stack_pointer_rtx,
+					       GEN_INT (size))),
+		    gen_rtx_CLOBBER (VOIDmode, scratch)));
+      emit_insn (insn);
+    }
+  else
+    {
+      int i;
+
+      for (i = 2; i <= size; i += 2)
+	emit_move_after_reload (scratch, stack_pop_word, scratch);
+      if (size & 1)
+	emit_insn (gen_addhi3 (stack_pointer_rtx,
+			       stack_pointer_rtx, const1_rtx));
+    }
+
+  /* For an interrupt handler, restore ZTMP, ZREG and XYREG.  */
+  if (current_function_interrupt)
+    {
+      emit_move_after_reload (gen_rtx_REG (HImode, SOFT_SAVED_XY_REGNUM),
+			      stack_pop_word, scratch);
+      emit_move_after_reload (gen_rtx_REG (HImode, SOFT_Z_REGNUM),
+			      stack_pop_word, scratch);
+      emit_move_after_reload (m68hc11_soft_tmp_reg, stack_pop_word, scratch);
+    }
+
+  /* Restore previous frame pointer.  */
+  if (frame_pointer_needed)
+    emit_move_after_reload (hard_frame_pointer_rtx, stack_pop_word, scratch);
+
+  /* If the trap handler returns some value, copy the value
+     in D, X onto the stack so that the rti will pop the return value
+     correctly.  */
+  else if (current_function_trap && return_size != 0)
+    {
+      rtx addr_reg = stack_pointer_rtx;
+
+      if (!TARGET_M6812)
+	{
+	  emit_move_after_reload (scratch, stack_pointer_rtx, 0);
+	  addr_reg = scratch;
+	}
+      emit_move_after_reload (gen_rtx_MEM (HImode,
+				       gen_rtx_PLUS (HImode, addr_reg,
+						const1_rtx)), d_reg, 0);
+      if (return_size > HARD_REG_SIZE)
+	emit_move_after_reload (gen_rtx_MEM (HImode,
+					 gen_rtx_PLUS (HImode, addr_reg,
+						  GEN_INT (3))), ix_reg, 0);
+    }
+
+  emit_jump_insn (gen_return ());
+}
+
+
+/* Low and High part extraction for 68HC11.  These routines are
+   similar to gen_lowpart and gen_highpart but they have been
+   fixed to work for constants and 68HC11 specific registers.  */
+
+rtx
+m68hc11_gen_lowpart (enum machine_mode mode, rtx x)
+{
+  /* We assume that the low part of an auto-inc mode is the same with
+     the mode changed and that the caller split the larger mode in the
+     correct order.  */
+  if (GET_CODE (x) == MEM && m68hc11_auto_inc_p (XEXP (x, 0)))
+    {
+      return gen_rtx_MEM (mode, XEXP (x, 0));
+    }
+
+  /* Note that a CONST_DOUBLE rtx could represent either an integer or a
+     floating-point constant.  A CONST_DOUBLE is used whenever the
+     constant requires more than one word in order to be adequately
+     represented.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      long l[2];
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	{
+	  REAL_VALUE_TYPE r;
+
+	  if (GET_MODE (x) == SFmode)
+	    {
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	      REAL_VALUE_TO_TARGET_SINGLE (r, l[0]);
+	    }
+	  else
+	    {
+	      rtx first, second;
+
+	      split_double (x, &first, &second);
+	      return second;
+	    }
+	  if (mode == SImode)
+	    return GEN_INT (l[0]);
+
+	  return gen_int_mode (l[0], HImode);
+	}
+      else
+	{
+	  l[0] = CONST_DOUBLE_LOW (x);
+	}
+      switch (mode)
+	{
+	case SImode:
+	  return GEN_INT (l[0]);
+	case HImode:
+	  gcc_assert (GET_MODE (x) == SFmode);
+	  return gen_int_mode (l[0], HImode);
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (mode == QImode && D_REG_P (x))
+    return gen_rtx_REG (mode, HARD_B_REGNUM);
+
+  /* gen_lowpart crashes when it is called with a SUBREG.  */
+  if (GET_CODE (x) == SUBREG && SUBREG_BYTE (x) != 0)
+    {
+      switch (mode)
+	{
+	case SImode:
+	  return gen_rtx_SUBREG (mode, SUBREG_REG (x), SUBREG_BYTE (x) + 4);
+	case HImode:
+	  return gen_rtx_SUBREG (mode, SUBREG_REG (x), SUBREG_BYTE (x) + 2);
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  x = gen_lowpart (mode, x);
+
+  /* Return a different rtx to avoid to share it in several insns
+     (when used by a split pattern).  Sharing addresses within
+     a MEM breaks the Z register replacement (and reloading).  */
+  if (GET_CODE (x) == MEM)
+    x = copy_rtx (x);
+  return x;
+}
+
+rtx
+m68hc11_gen_highpart (enum machine_mode mode, rtx x)
+{
+  /* We assume that the high part of an auto-inc mode is the same with
+     the mode changed and that the caller split the larger mode in the
+     correct order.  */
+  if (GET_CODE (x) == MEM && m68hc11_auto_inc_p (XEXP (x, 0)))
+    {
+      return gen_rtx_MEM (mode, XEXP (x, 0));
+    }
+
+  /* Note that a CONST_DOUBLE rtx could represent either an integer or a
+     floating-point constant.  A CONST_DOUBLE is used whenever the
+     constant requires more than one word in order to be adequately
+     represented.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      long l[2];
+
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	{
+	  REAL_VALUE_TYPE r;
+
+	  if (GET_MODE (x) == SFmode)
+	    {
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	      REAL_VALUE_TO_TARGET_SINGLE (r, l[1]);
+	    }
+	  else
+	    {
+	      rtx first, second;
+
+	      split_double (x, &first, &second);
+	      return first;
+	    }
+	  if (mode == SImode)
+	    return GEN_INT (l[1]);
+
+	  return gen_int_mode ((l[1] >> 16), HImode);
+	}
+      else
+	{
+	  l[1] = CONST_DOUBLE_HIGH (x);
+	}
+
+      switch (mode)
+	{
+	case SImode:
+	  return GEN_INT (l[1]);
+	case HImode:
+	  gcc_assert (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT);
+	  return gen_int_mode ((l[0] >> 16), HImode);
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  if (GET_CODE (x) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (x);
+
+      if (mode == QImode)
+	{
+	  return gen_int_mode (val >> 8, QImode);
+	}
+      else if (mode == HImode)
+	{
+	  return gen_int_mode (val >> 16, HImode);
+	}
+      else if (mode == SImode)
+       {
+         return gen_int_mode ((val >> 16) >> 16, SImode);
+       }
+    }
+  if (mode == QImode && D_REG_P (x))
+    return gen_rtx_REG (mode, HARD_A_REGNUM);
+
+  /* There is no way in GCC to represent the upper part of a word register.
+     To obtain the 8-bit upper part of a soft register, we change the
+     reg into a mem rtx.  This is possible because they are physically
+     located in memory.  There is no offset because we are big-endian.  */
+  if (mode == QImode && S_REG_P (x))
+    {
+      int pos;
+
+      /* Avoid the '*' for direct addressing mode when this
+         addressing mode is disabled.  */
+      pos = TARGET_NO_DIRECT_MODE ? 1 : 0;
+      return gen_rtx_MEM (QImode,
+		      gen_rtx_SYMBOL_REF (Pmode,
+			       &reg_names[REGNO (x)][pos]));
+    }
+
+  /* gen_highpart crashes when it is called with a SUBREG.  */
+  switch (GET_CODE (x))
+    {
+    case SUBREG:
+      return gen_rtx_SUBREG (mode, XEXP (x, 0), XINT (x, 1));
+    case REG:
+      if (REGNO (x) < FIRST_PSEUDO_REGISTER)
+        return gen_rtx_REG (mode, REGNO (x));
+      else
+        return gen_rtx_SUBREG (mode, x, 0);
+    case MEM:
+      x = change_address (x, mode, 0);
+
+      /* Return a different rtx to avoid to share it in several insns
+	 (when used by a split pattern).  Sharing addresses within
+	 a MEM breaks the Z register replacement (and reloading).  */
+      if (GET_CODE (x) == MEM)
+	x = copy_rtx (x);
+      return x;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Obscure register manipulation.  */
+
+/* Finds backward in the instructions to see if register 'reg' is
+   dead.  This is used when generating code to see if we can use 'reg'
+   as a scratch register.  This allows us to choose a better generation
+   of code when we know that some register dies or can be clobbered.  */
+
+int
+dead_register_here (rtx x, rtx reg)
+{
+  rtx x_reg;
+  rtx p;
+
+  if (D_REG_P (reg))
+    x_reg = gen_rtx_REG (SImode, HARD_X_REGNUM);
+  else
+    x_reg = 0;
+
+  for (p = PREV_INSN (x); p && GET_CODE (p) != CODE_LABEL; p = PREV_INSN (p))
+    if (INSN_P (p))
+      {
+	rtx body;
+
+	body = PATTERN (p);
+
+	if (GET_CODE (body) == CALL_INSN)
+	  break;
+	if (GET_CODE (body) == JUMP_INSN)
+	  break;
+
+	if (GET_CODE (body) == SET)
+	  {
+	    rtx dst = XEXP (body, 0);
+
+	    if (GET_CODE (dst) == REG && REGNO (dst) == REGNO (reg))
+	      break;
+	    if (x_reg && rtx_equal_p (dst, x_reg))
+	      break;
+
+	    if (find_regno_note (p, REG_DEAD, REGNO (reg)))
+	      return 1;
+	  }
+	else if (reg_mentioned_p (reg, p)
+		 || (x_reg && reg_mentioned_p (x_reg, p)))
+	  break;
+      }
+
+  /* Scan forward to see if the register is set in some insns and never
+     used since then.  */
+  for (p = x /*NEXT_INSN (x) */ ; p; p = NEXT_INSN (p))
+    {
+      rtx body;
+
+      if (GET_CODE (p) == CODE_LABEL
+	  || GET_CODE (p) == JUMP_INSN
+	  || GET_CODE (p) == CALL_INSN || GET_CODE (p) == BARRIER)
+	break;
+
+      if (GET_CODE (p) != INSN)
+	continue;
+
+      body = PATTERN (p);
+      if (GET_CODE (body) == SET)
+	{
+	  rtx src = XEXP (body, 1);
+	  rtx dst = XEXP (body, 0);
+
+	  if (GET_CODE (dst) == REG
+	      && REGNO (dst) == REGNO (reg) && !reg_mentioned_p (reg, src))
+	    return 1;
+	}
+
+      /* Register is used (may be in source or in dest).  */
+      if (reg_mentioned_p (reg, p)
+	  || (x_reg != 0 && GET_MODE (p) == SImode
+	      && reg_mentioned_p (x_reg, p)))
+	break;
+    }
+  return p == 0 ? 1 : 0;
+}
+
+
+/* Code generation operations called from machine description file.  */
+
+/* Print the name of register 'regno' in the assembly file.  */
+static void
+asm_print_register (FILE *file, int regno)
+{
+  const char *name = reg_names[regno];
+
+  if (TARGET_NO_DIRECT_MODE && name[0] == '*')
+    name++;
+
+  fprintf (file, "%s", name);
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  X is an RTL
+   expression.
+
+   CODE is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  CODE
+   comes from the `%' specification that was used to request
+   printing of the operand.  If the specification was just `%DIGIT'
+   then CODE is 0; if the specification was `%LTR DIGIT' then CODE
+   is the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.
+   The names can be found in an array `reg_names' whose type is
+   `char *[]'.  `reg_names' is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for CODE.
+
+   The M68HC11 specific codes are:
+
+   'b' for the low part of the operand.
+   'h' for the high part of the operand
+       The 'b' or 'h' modifiers have no effect if the operand has
+       the QImode and is not a S_REG_P (soft register).  If the
+       operand is a hard register, these two modifiers have no effect.
+   't' generate the temporary scratch register.  The operand is
+       ignored.
+   'T' generate the low-part temporary scratch register.  The operand is
+       ignored.  */
+
+static void
+m68hc11_print_operand (FILE *file, rtx op, int letter)
+{
+  if (letter == 't')
+    {
+      asm_print_register (file, SOFT_TMP_REGNUM);
+      return;
+    }
+  else if (letter == 'T')
+    {
+      asm_print_register (file, SOFT_TMP_REGNUM);
+      fprintf (file, "+1");
+      return;
+    }
+  else if (letter == '#')
+    {
+      asm_fprintf (file, "%I");
+    }
+
+  if (GET_CODE (op) == REG)
+    {
+      if (letter == 'b' && S_REG_P (op))
+	{
+	  asm_print_register (file, REGNO (op));
+	  fprintf (file, "+1");
+	}
+      else if (letter == 'b' && D_REG_P (op))
+	{
+	  asm_print_register (file, HARD_B_REGNUM);
+	}
+      else
+	{
+	  asm_print_register (file, REGNO (op));
+	}
+      return;
+    }
+
+  if (GET_CODE (op) == SYMBOL_REF && (letter == 'b' || letter == 'h'))
+    {
+      if (letter == 'b')
+	asm_fprintf (file, "%I%%lo(");
+      else
+	asm_fprintf (file, "%I%%hi(");
+
+      output_addr_const (file, op);
+      fprintf (file, ")");
+      return;
+    }
+
+  /* Get the low or high part of the operand when 'b' or 'h' modifiers
+     are specified.  If we already have a QImode, there is nothing to do.  */
+  if (GET_MODE (op) == HImode || GET_MODE (op) == VOIDmode)
+    {
+      if (letter == 'b')
+	{
+	  op = m68hc11_gen_lowpart (QImode, op);
+	}
+      else if (letter == 'h')
+	{
+	  op = m68hc11_gen_highpart (QImode, op);
+	}
+    }
+
+  if (GET_CODE (op) == MEM)
+    {
+      rtx base = XEXP (op, 0);
+      switch (GET_CODE (base))
+	{
+	case PRE_DEC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,-", GET_MODE_SIZE (GET_MODE (op)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  break;
+
+	case POST_DEC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,", GET_MODE_SIZE (GET_MODE (op)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  fprintf (file, "-");
+	  break;
+
+	case POST_INC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,", GET_MODE_SIZE (GET_MODE (op)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  fprintf (file, "+");
+	  break;
+
+	case PRE_INC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,+", GET_MODE_SIZE (GET_MODE (op)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  break;
+
+        case MEM:
+          gcc_assert (TARGET_M6812);
+	  fprintf (file, "[");
+	  m68hc11_print_operand_address (file, XEXP (base, 0));
+	  fprintf (file, "]");
+          break;
+
+	default:
+          if (m68hc11_page0_symbol_p (base))
+            fprintf (file, "*");
+
+	  output_address (base);
+	  break;
+	}
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == SFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+      asm_fprintf (file, "%I0x%lx", l);
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == DFmode)
+    {
+      char dstr[30];
+
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (op),
+		       sizeof (dstr), 0, 1);
+      asm_fprintf (file, "%I0r%s", dstr);
+    }
+  else
+    {
+      int need_parenthesize = 0;
+
+      if (letter != 'i')
+	asm_fprintf (file, "%I");
+      else
+        need_parenthesize = must_parenthesize (op);
+
+      if (need_parenthesize)
+        fprintf (file, "(");
+
+      output_addr_const (file, op);
+      if (need_parenthesize)
+        fprintf (file, ")");
+    }
+}
+
+/* Returns true if the operand 'op' must be printed with parenthesis
+   around it.  This must be done only if there is a symbol whose name
+   is a processor register.  */
+static int
+must_parenthesize (rtx op)
+{
+  const char *name;
+
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      name = XSTR (op, 0);
+      /* Avoid a conflict between symbol name and a possible
+         register.  */
+      return (strcasecmp (name, "a") == 0
+	      || strcasecmp (name, "b") == 0
+	      || strcasecmp (name, "d") == 0
+	      || strcasecmp (name, "x") == 0
+	      || strcasecmp (name, "y") == 0
+	      || strcasecmp (name, "ix") == 0
+	      || strcasecmp (name, "iy") == 0
+	      || strcasecmp (name, "pc") == 0
+	      || strcasecmp (name, "sp") == 0
+	      || strcasecmp (name, "ccr") == 0) ? 1 : 0;
+
+    case PLUS:
+    case MINUS:
+      return must_parenthesize (XEXP (op, 0))
+	|| must_parenthesize (XEXP (op, 1));
+
+    case MEM:
+    case CONST:
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      return must_parenthesize (XEXP (op, 0));
+
+    case CONST_DOUBLE:
+    case CONST_INT:
+    case LABEL_REF:
+    case CODE_LABEL:
+    default:
+      return 0;
+    }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+
+static void
+m68hc11_print_operand_address (FILE *file, rtx addr)
+{
+  rtx base;
+  rtx offset;
+  int need_parenthesis = 0;
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      gcc_assert (REG_P (addr) && REG_OK_FOR_BASE_STRICT_P (addr));
+
+      fprintf (file, "0,");
+      asm_print_register (file, REGNO (addr));
+      break;
+
+    case MEM:
+      base = XEXP (addr, 0);
+      switch (GET_CODE (base))
+	{
+	case PRE_DEC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,-", GET_MODE_SIZE (GET_MODE (addr)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  break;
+
+	case POST_DEC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,", GET_MODE_SIZE (GET_MODE (addr)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  fprintf (file, "-");
+	  break;
+
+	case POST_INC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,", GET_MODE_SIZE (GET_MODE (addr)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  fprintf (file, "+");
+	  break;
+
+	case PRE_INC:
+	  gcc_assert (TARGET_M6812);
+	  fprintf (file, "%u,+", GET_MODE_SIZE (GET_MODE (addr)));
+	  asm_print_register (file, REGNO (XEXP (base, 0)));
+	  break;
+
+	default:
+	  need_parenthesis = must_parenthesize (base);
+	  if (need_parenthesis)
+	    fprintf (file, "(");
+
+	  output_addr_const (file, base);
+	  if (need_parenthesis)
+	    fprintf (file, ")");
+	  break;
+	}
+      break;
+
+    case PLUS:
+      base = XEXP (addr, 0);
+      offset = XEXP (addr, 1);
+      if (!G_REG_P (base) && G_REG_P (offset))
+	{
+	  base = XEXP (addr, 1);
+	  offset = XEXP (addr, 0);
+	}
+      if (CONSTANT_ADDRESS_P (base))
+	{
+	  need_parenthesis = must_parenthesize (addr);
+
+	  gcc_assert (CONSTANT_ADDRESS_P (offset));
+	  if (need_parenthesis)
+	    fprintf (file, "(");
+
+	  output_addr_const (file, base);
+	  fprintf (file, "+");
+	  output_addr_const (file, offset);
+	  if (need_parenthesis)
+	    fprintf (file, ")");
+	}
+      else
+	{
+	  gcc_assert (REG_P (base) && REG_OK_FOR_BASE_STRICT_P (base));
+	  if (REG_P (offset))
+	    {
+	      gcc_assert (TARGET_M6812);
+	      asm_print_register (file, REGNO (offset));
+	      fprintf (file, ",");
+	      asm_print_register (file, REGNO (base));
+	    }
+	  else
+	    {
+              need_parenthesis = must_parenthesize (offset);
+              if (need_parenthesis)
+                fprintf (file, "(");
+
+	      output_addr_const (file, offset);
+              if (need_parenthesis)
+                fprintf (file, ")");
+	      fprintf (file, ",");
+	      asm_print_register (file, REGNO (base));
+	    }
+	}
+      break;
+
+    default:
+      if (GET_CODE (addr) == CONST_INT
+	  && INTVAL (addr) < 0x8000 && INTVAL (addr) >= -0x8000)
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
+	}
+      else
+	{
+	  need_parenthesis = must_parenthesize (addr);
+	  if (need_parenthesis)
+	    fprintf (file, "(");
+
+	  output_addr_const (file, addr);
+	  if (need_parenthesis)
+	    fprintf (file, ")");
+	}
+      break;
+    }
+}
+
+
+/* Splitting of some instructions.  */
+
+static rtx
+m68hc11_expand_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx ret = 0;
+
+  gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) != MODE_FLOAT);
+  emit_insn (gen_rtx_SET (VOIDmode, cc0_rtx,
+			  gen_rtx_COMPARE (VOIDmode, op0, op1)));
+  ret = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+
+  return ret;
+}
+
+rtx
+m68hc11_expand_compare_and_branch (enum rtx_code code, rtx op0, rtx op1,
+                                   rtx label)
+{
+  rtx tmp;
+
+  switch (GET_MODE (op0))
+    {
+    case QImode:
+    case HImode:
+      tmp = m68hc11_expand_compare (code, op0, op1);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, label),
+				  pc_rtx);
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+      return 0;
+#if 0
+
+      /* SCz: from i386.c  */
+    case SFmode:
+    case DFmode:
+      /* Don't expand the comparison early, so that we get better code
+         when jump or whoever decides to reverse the comparison.  */
+      {
+	rtvec vec;
+	int use_fcomi;
+
+	code = m68hc11_prepare_fp_compare_args (code, &m68hc11_compare_op0,
+						&m68hc11_compare_op1);
+
+	tmp = gen_rtx_fmt_ee (code, m68hc11_fp_compare_mode (code),
+			      m68hc11_compare_op0, m68hc11_compare_op1);
+	tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				    gen_rtx_LABEL_REF (VOIDmode, label),
+				    pc_rtx);
+	tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+	use_fcomi = ix86_use_fcomi_compare (code);
+	vec = rtvec_alloc (3 + !use_fcomi);
+	RTVEC_ELT (vec, 0) = tmp;
+	RTVEC_ELT (vec, 1)
+	  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
+	RTVEC_ELT (vec, 2)
+	  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
+	if (!use_fcomi)
+	  RTVEC_ELT (vec, 3)
+	    = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
+
+	emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+	return;
+      }
+#endif
+
+    case SImode:
+      /* Expand SImode branch into multiple compare+branch.  */
+      {
+	rtx lo[2], hi[2], label2;
+	enum rtx_code code1, code2, code3;
+
+	if (CONSTANT_P (op0) && !CONSTANT_P (op1))
+	  {
+	    tmp = op0;
+	    op0 = op1;
+	    op1 = tmp;
+	    code = swap_condition (code);
+	  }
+	lo[0] = m68hc11_gen_lowpart (HImode, op0);
+	lo[1] = m68hc11_gen_lowpart (HImode, op1);
+	hi[0] = m68hc11_gen_highpart (HImode, op0);
+	hi[1] = m68hc11_gen_highpart (HImode, op1);
+
+	/* Otherwise, if we are doing less-than, op1 is a constant and the
+	   low word is zero, then we can just examine the high word.  */
+
+	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
+	    && (code == LT || code == LTU))
+	  {
+	    return m68hc11_expand_compare_and_branch (code, hi[0], hi[1],
+						      label);
+	  }
+
+	/* Otherwise, we need two or three jumps.  */
+
+	label2 = gen_label_rtx ();
+
+	code1 = code;
+	code2 = swap_condition (code);
+	code3 = unsigned_condition (code);
+
+	switch (code)
+	  {
+	  case LT:
+	  case GT:
+	  case LTU:
+	  case GTU:
+	    break;
+
+	  case LE:
+	    code1 = LT;
+	    code2 = GT;
+	    break;
+	  case GE:
+	    code1 = GT;
+	    code2 = LT;
+	    break;
+	  case LEU:
+	    code1 = LTU;
+	    code2 = GTU;
+	    break;
+	  case GEU:
+	    code1 = GTU;
+	    code2 = LTU;
+	    break;
+
+	  case EQ:
+	    code1 = UNKNOWN;
+	    code2 = NE;
+	    break;
+	  case NE:
+	    code2 = UNKNOWN;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	/*
+	 * a < b =>
+	 *    if (hi(a) < hi(b)) goto true;
+	 *    if (hi(a) > hi(b)) goto false;
+	 *    if (lo(a) < lo(b)) goto true;
+	 *  false:
+	 */
+	if (code1 != UNKNOWN)
+	  m68hc11_expand_compare_and_branch (code1, hi[0], hi[1], label);
+	if (code2 != UNKNOWN)
+	  m68hc11_expand_compare_and_branch (code2, hi[0], hi[1], label2);
+
+	m68hc11_expand_compare_and_branch (code3, lo[0], lo[1], label);
+
+	if (code2 != UNKNOWN)
+	  emit_label (label2);
+	return 0;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+  return 0;
+}
+
+/* Return the increment/decrement mode of a MEM if it is such.
+   Return CONST if it is anything else.  */
+static int
+autoinc_mode (rtx x)
+{
+  if (GET_CODE (x) != MEM)
+    return CONST;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == PRE_INC
+      || GET_CODE (x) == PRE_DEC
+      || GET_CODE (x) == POST_INC
+      || GET_CODE (x) == POST_DEC)
+    return GET_CODE (x);
+
+  return CONST;
+}
+
+static int
+m68hc11_make_autoinc_notes (rtx *x, void *data)
+{
+  rtx insn;
+  
+  switch (GET_CODE (*x))
+    {
+    case PRE_DEC:
+    case PRE_INC:
+    case POST_DEC:
+    case POST_INC:
+      insn = (rtx) data;
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC, XEXP (*x, 0),
+                                          REG_NOTES (insn));
+      return -1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Split a DI, SI or HI move into several smaller move operations.
+   The scratch register 'scratch' is used as a temporary to load
+   store intermediate values.  It must be a hard register.  */
+void
+m68hc11_split_move (rtx to, rtx from, rtx scratch)
+{
+  rtx low_to, low_from;
+  rtx high_to, high_from;
+  rtx insn;
+  enum machine_mode mode;
+  int offset = 0;
+  int autoinc_from = autoinc_mode (from);
+  int autoinc_to = autoinc_mode (to);
+
+  mode = GET_MODE (to);
+
+  /* If the TO and FROM contain autoinc modes that are not compatible
+     together (one pop and the other a push), we must change one to
+     an offsetable operand and generate an appropriate add at the end.  */
+  if (TARGET_M6812 && GET_MODE_SIZE (mode) > 2)
+    {
+      rtx reg;
+      int code;
+
+      /* The source uses an autoinc mode which is not compatible with
+         a split (this would result in a word swap).  */
+      if (autoinc_from == PRE_INC || autoinc_from == POST_DEC)
+        {
+          code = GET_CODE (XEXP (from, 0));
+          reg = XEXP (XEXP (from, 0), 0);
+          offset = GET_MODE_SIZE (GET_MODE (from));
+          if (code == POST_DEC)
+            offset = -offset;
+
+          if (code == PRE_INC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+
+          m68hc11_split_move (to, gen_rtx_MEM (GET_MODE (from), reg), scratch);
+          if (code == POST_DEC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+          return;
+        }
+
+      /* Likewise for destination.  */
+      if (autoinc_to == PRE_INC || autoinc_to == POST_DEC)
+        {
+          code = GET_CODE (XEXP (to, 0));
+          reg = XEXP (XEXP (to, 0), 0);
+          offset = GET_MODE_SIZE (GET_MODE (to));
+          if (code == POST_DEC)
+            offset = -offset;
+
+          if (code == PRE_INC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+
+          m68hc11_split_move (gen_rtx_MEM (GET_MODE (to), reg), from, scratch);
+          if (code == POST_DEC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+          return;
+        }
+
+      /* The source and destination auto increment modes must be compatible
+         with each other: same direction.  */
+      if ((autoinc_to != autoinc_from
+           && autoinc_to != CONST && autoinc_from != CONST)
+          /* The destination address register must not be used within
+             the source operand because the source address would change
+             while doing the copy.  */
+          || (autoinc_to != CONST
+              && reg_mentioned_p (XEXP (XEXP (to, 0), 0), from)
+              && !IS_STACK_PUSH (to)))
+        {
+          /* Must change the destination.  */
+          code = GET_CODE (XEXP (to, 0));
+          reg = XEXP (XEXP (to, 0), 0);
+          offset = GET_MODE_SIZE (GET_MODE (to));
+          if (code == PRE_DEC || code == POST_DEC)
+            offset = -offset;
+
+          if (code == PRE_DEC || code == PRE_INC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+          m68hc11_split_move (gen_rtx_MEM (GET_MODE (to), reg), from, scratch);
+          if (code == POST_DEC || code == POST_INC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+
+          return;
+        }
+
+      /* Likewise, the source address register must not be used within
+         the destination operand.  */
+      if (autoinc_from != CONST
+          && reg_mentioned_p (XEXP (XEXP (from, 0), 0), to)
+          && !IS_STACK_PUSH (to))
+        {
+          /* Must change the source.  */
+          code = GET_CODE (XEXP (from, 0));
+          reg = XEXP (XEXP (from, 0), 0);
+          offset = GET_MODE_SIZE (GET_MODE (from));
+          if (code == PRE_DEC || code == POST_DEC)
+            offset = -offset;
+
+          if (code == PRE_DEC || code == PRE_INC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+          m68hc11_split_move (to, gen_rtx_MEM (GET_MODE (from), reg), scratch);
+          if (code == POST_DEC || code == POST_INC)
+            emit_insn (gen_addhi3 (reg, reg, GEN_INT (offset)));
+
+          return;
+        }
+    }
+
+  if (GET_MODE_SIZE (mode) == 8)
+    mode = SImode;
+  else if (GET_MODE_SIZE (mode) == 4)
+    mode = HImode;
+  else
+    mode = QImode;
+
+  if (TARGET_M6812
+      && IS_STACK_PUSH (to)
+      && reg_mentioned_p (gen_rtx_REG (HImode, HARD_SP_REGNUM), from))
+    {
+      if (mode == SImode)
+        {
+          offset = 4;
+        }
+      else if (mode == HImode)
+        {
+          offset = 2;
+        }
+      else
+        offset = 0;
+    }
+
+  low_to = m68hc11_gen_lowpart (mode, to);
+  high_to = m68hc11_gen_highpart (mode, to);
+
+  low_from = m68hc11_gen_lowpart (mode, from);
+  high_from = m68hc11_gen_highpart (mode, from);
+
+  if (offset)
+    {
+      high_from = adjust_address (high_from, mode, offset);
+      low_from = high_from;
+    }
+
+  /* When copying with a POST_INC mode, we must copy the
+     high part and then the low part to guarantee a correct
+     32/64-bit copy.  */
+  if (TARGET_M6812
+      && GET_MODE_SIZE (mode) >= 2
+      && autoinc_from != autoinc_to
+      && (autoinc_from == POST_INC || autoinc_to == POST_INC))
+    {
+      rtx swap;
+
+      swap = low_to;
+      low_to = high_to;
+      high_to = swap;
+
+      swap = low_from;
+      low_from = high_from;
+      high_from = swap;
+    }
+  if (mode == SImode)
+    {
+      m68hc11_split_move (low_to, low_from, scratch);
+      m68hc11_split_move (high_to, high_from, scratch);
+    }
+  else if (H_REG_P (to) || H_REG_P (from)
+	   || (low_from == const0_rtx
+	       && high_from == const0_rtx
+	       && ! push_operand (to, GET_MODE (to))
+	       && ! H_REG_P (scratch))
+	   || (TARGET_M6812
+	       && (!m68hc11_register_indirect_p (from, GET_MODE (from))
+		   || m68hc11_small_indexed_indirect_p (from,
+							GET_MODE (from)))
+	       && (!m68hc11_register_indirect_p (to, GET_MODE (to))
+		   || m68hc11_small_indexed_indirect_p (to, GET_MODE (to)))))
+    {
+      insn = emit_move_insn (low_to, low_from);
+      for_each_rtx (&PATTERN (insn), m68hc11_make_autoinc_notes, insn);
+
+      insn = emit_move_insn (high_to, high_from);
+      for_each_rtx (&PATTERN (insn), m68hc11_make_autoinc_notes, insn);
+    }
+  else
+    {
+      insn = emit_move_insn (scratch, low_from);
+      for_each_rtx (&PATTERN (insn), m68hc11_make_autoinc_notes, insn);
+      insn = emit_move_insn (low_to, scratch);
+      for_each_rtx (&PATTERN (insn), m68hc11_make_autoinc_notes, insn);
+
+      insn = emit_move_insn (scratch, high_from);
+      for_each_rtx (&PATTERN (insn), m68hc11_make_autoinc_notes, insn);
+      insn = emit_move_insn (high_to, scratch);
+      for_each_rtx (&PATTERN (insn), m68hc11_make_autoinc_notes, insn);
+    }
+}
+
+static rtx
+simplify_logical (enum machine_mode mode, int code, rtx operand, rtx *result)
+{
+  int val;
+  int mask;
+
+  *result = 0;
+  if (GET_CODE (operand) != CONST_INT)
+    return operand;
+
+  if (mode == HImode)
+    mask = 0x0ffff;
+  else
+    mask = 0x0ff;
+
+  val = INTVAL (operand);
+  switch (code)
+    {
+    case IOR:
+      if ((val & mask) == 0)
+	return 0;
+      if ((val & mask) == mask)
+	*result = constm1_rtx;
+      break;
+
+    case AND:
+      if ((val & mask) == 0)
+	*result = const0_rtx;
+      if ((val & mask) == mask)
+	return 0;
+      break;
+
+    case XOR:
+      if ((val & mask) == 0)
+	return 0;
+      break;
+    }
+  return operand;
+}
+
+static void
+m68hc11_emit_logical (enum machine_mode mode, enum rtx_code code, rtx *operands)
+{
+  rtx result;
+  int need_copy;
+
+  need_copy = (rtx_equal_p (operands[0], operands[1])
+	       || rtx_equal_p (operands[0], operands[2])) ? 0 : 1;
+
+  operands[1] = simplify_logical (mode, code, operands[1], &result);
+  operands[2] = simplify_logical (mode, code, operands[2], &result);
+
+  if (result && GET_CODE (result) == CONST_INT)
+    {
+      if (!H_REG_P (operands[0]) && operands[3]
+	  && (INTVAL (result) != 0 || IS_STACK_PUSH (operands[0])))
+	{
+	  emit_move_insn (operands[3], result);
+	  emit_move_insn (operands[0], operands[3]);
+	}
+      else
+	{
+	  emit_move_insn (operands[0], result);
+	}
+    }
+  else if (operands[1] != 0 && operands[2] != 0)
+    {
+      if (!H_REG_P (operands[0]) && operands[3])
+	{
+	  emit_move_insn (operands[3], operands[1]);
+	  emit_insn (gen_rtx_SET (mode,
+				  operands[3],
+				  gen_rtx_fmt_ee (code, mode,
+						  operands[3], operands[2])));
+	  emit_move_insn (operands[0], operands[3]);
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (mode, operands[0],
+				  gen_rtx_fmt_ee (code, mode,
+						  operands[0], operands[2])));
+	}
+    }
+
+  /* The logical operation is similar to a copy.  */
+  else if (need_copy)
+    {
+      rtx src;
+
+      if (GET_CODE (operands[1]) == CONST_INT)
+	src = operands[2];
+      else
+	src = operands[1];
+
+      if (!H_REG_P (operands[0]) && !H_REG_P (src))
+	{
+	  emit_move_insn (operands[3], src);
+	  emit_move_insn (operands[0], operands[3]);
+	}
+      else
+	{
+	  emit_move_insn (operands[0], src);
+	}
+    }
+}
+
+void
+m68hc11_split_logical (enum machine_mode mode, enum rtx_code code,
+		       rtx *operands)
+{
+  rtx low[4];
+  rtx high[4];
+
+  low[0] = m68hc11_gen_lowpart (mode, operands[0]);
+  low[1] = m68hc11_gen_lowpart (mode, operands[1]);
+  low[2] = m68hc11_gen_lowpart (mode, operands[2]);
+
+  high[0] = m68hc11_gen_highpart (mode, operands[0]);
+  high[1] = m68hc11_gen_highpart (mode, operands[1]);
+  high[2] = m68hc11_gen_highpart (mode, operands[2]);
+
+  low[3] = operands[3];
+  high[3] = operands[3];
+  if (mode == SImode)
+    {
+      m68hc11_split_logical (HImode, code, low);
+      m68hc11_split_logical (HImode, code, high);
+      return;
+    }
+
+  m68hc11_emit_logical (mode, code, low);
+  m68hc11_emit_logical (mode, code, high);
+}
+
+
+/* Code generation.  */
+
+void
+m68hc11_output_swap (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
+{
+  /* We have to be careful with the cc_status.  An address register swap
+     is generated for some comparison.  The comparison is made with D
+     but the branch really uses the address register.  See the split
+     pattern for compare.  The xgdx/xgdy preserve the flags but after
+     the exchange, the flags will reflect to the value of X and not D.
+     Tell this by setting the cc_status according to the cc_prev_status.  */
+  if (X_REG_P (operands[1]) || X_REG_P (operands[0]))
+    {
+      if (cc_prev_status.value1 != 0
+	  && (D_REG_P (cc_prev_status.value1)
+	      || X_REG_P (cc_prev_status.value1)))
+	{
+	  cc_status = cc_prev_status;
+	  if (D_REG_P (cc_status.value1))
+	    cc_status.value1 = gen_rtx_REG (GET_MODE (cc_status.value1),
+					HARD_X_REGNUM);
+	  else
+	    cc_status.value1 = gen_rtx_REG (GET_MODE (cc_status.value1),
+					HARD_D_REGNUM);
+	}
+      else
+	CC_STATUS_INIT;
+
+      output_asm_insn ("xgdx", operands);
+    }
+  else
+    {
+      if (cc_prev_status.value1 != 0
+	  && (D_REG_P (cc_prev_status.value1)
+	      || Y_REG_P (cc_prev_status.value1)))
+	{
+	  cc_status = cc_prev_status;
+	  if (D_REG_P (cc_status.value1))
+	    cc_status.value1 = gen_rtx_REG (GET_MODE (cc_status.value1),
+					HARD_Y_REGNUM);
+	  else
+	    cc_status.value1 = gen_rtx_REG (GET_MODE (cc_status.value1),
+					HARD_D_REGNUM);
+	}
+      else
+	CC_STATUS_INIT;
+
+      output_asm_insn ("xgdy", operands);
+    }
+}
+
+/* Returns 1 if the next insn after 'insn' is a test of the register 'reg'.
+   This is used to decide whether a move that set flags should be used
+   instead.  */
+int
+next_insn_test_reg (rtx insn, rtx reg)
+{
+  rtx body;
+
+  insn = next_nonnote_insn (insn);
+  if (GET_CODE (insn) != INSN)
+    return 0;
+
+  body = PATTERN (insn);
+  if (sets_cc0_p (body) != 1)
+    return 0;
+
+  if (rtx_equal_p (XEXP (body, 1), reg) == 0)
+    return 0;
+
+  return 1;
+}
+
+/* Generate the code to move a 16-bit operand into another one.  */
+
+void
+m68hc11_gen_movhi (rtx insn, rtx *operands)
+{
+  int reg;
+
+  /* Move a register or memory to the same location.
+     This is possible because such insn can appear
+     in a non-optimizing mode.  */
+  if (operands[0] == operands[1] || rtx_equal_p (operands[0], operands[1]))
+    {
+      cc_status = cc_prev_status;
+      return;
+    }
+
+  if (TARGET_M6812)
+    {
+      rtx from = operands[1];
+      rtx to = operands[0];
+
+      if (IS_STACK_PUSH (to) && H_REG_P (from))
+	{
+          cc_status = cc_prev_status;
+	  switch (REGNO (from))
+	    {
+	    case HARD_X_REGNUM:
+	    case HARD_Y_REGNUM:
+	    case HARD_D_REGNUM:
+	      output_asm_insn ("psh%1", operands);
+	      break;
+            case HARD_SP_REGNUM:
+              output_asm_insn ("sts\t2,-sp", operands);
+              break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  return;
+	}
+      if (IS_STACK_POP (from) && H_REG_P (to))
+	{
+          cc_status = cc_prev_status;
+	  switch (REGNO (to))
+	    {
+	    case HARD_X_REGNUM:
+	    case HARD_Y_REGNUM:
+	    case HARD_D_REGNUM:
+	      output_asm_insn ("pul%0", operands);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  return;
+	}
+      if (H_REG_P (operands[0]) && H_REG_P (operands[1]))
+	{
+          m68hc11_notice_keep_cc (operands[0]);
+	  output_asm_insn ("tfr\t%1,%0", operands);
+	}
+      else if (H_REG_P (operands[0]))
+	{
+	  if (SP_REG_P (operands[0]))
+	    output_asm_insn ("lds\t%1", operands);
+	  else
+	    output_asm_insn ("ld%0\t%1", operands);
+	}
+      else if (H_REG_P (operands[1]))
+	{
+	  if (SP_REG_P (operands[1]))
+	    output_asm_insn ("sts\t%0", operands);
+	  else
+	    output_asm_insn ("st%1\t%0", operands);
+	}
+
+      /* The 68hc12 does not support (MEM:HI (MEM:HI)) with the movw
+         instruction.  We have to use a scratch register as temporary location.
+         Trying to use a specific pattern or constrain failed.  */
+      else if (GET_CODE (to) == MEM && GET_CODE (XEXP (to, 0)) == MEM)
+        {
+          rtx ops[4];
+
+          ops[0] = to;
+          ops[2] = from;
+          ops[3] = 0;
+          if (dead_register_here (insn, d_reg))
+            ops[1] = d_reg;
+          else if (dead_register_here (insn, ix_reg))
+            ops[1] = ix_reg;
+          else if (dead_register_here (insn, iy_reg))
+            ops[1] = iy_reg;
+          else
+            {
+              ops[1] = d_reg;
+              ops[3] = d_reg;
+              output_asm_insn ("psh%3", ops);
+            }
+
+          ops[0] = to;
+          ops[2] = from;
+          output_asm_insn ("ld%1\t%2", ops);
+          output_asm_insn ("st%1\t%0", ops);
+          if (ops[3])
+            output_asm_insn ("pul%3", ops);
+        }
+
+      /* Use movw for non-null constants or when we are clearing
+         a volatile memory reference.  However, this is possible
+         only if the memory reference has a small offset or is an
+         absolute address.  */
+      else if (GET_CODE (from) == CONST_INT
+               && INTVAL (from) == 0
+               && (MEM_VOLATILE_P (to) == 0
+                   || m68hc11_small_indexed_indirect_p (to, HImode) == 0))
+        {
+          output_asm_insn ("clr\t%h0", operands);
+          output_asm_insn ("clr\t%b0", operands);
+        }
+      else
+	{
+	  if ((m68hc11_register_indirect_p (from, GET_MODE (from))
+	       && !m68hc11_small_indexed_indirect_p (from, GET_MODE (from)))
+	      || (m68hc11_register_indirect_p (to, GET_MODE (to))
+		  && !m68hc11_small_indexed_indirect_p (to, GET_MODE (to))))
+	    {
+	      rtx ops[3];
+
+	      if (operands[2])
+		{
+		  ops[0] = operands[2];
+		  ops[1] = from;
+		  ops[2] = 0;
+		  m68hc11_gen_movhi (insn, ops);
+		  ops[0] = to;
+		  ops[1] = operands[2];
+		  m68hc11_gen_movhi (insn, ops);
+                  return;
+		}
+	      else
+		{
+		  /* !!!! SCz wrong here.  */
+                  fatal_insn ("move insn not handled", insn);
+		}
+	    }
+          else
+            {
+              m68hc11_notice_keep_cc (operands[0]);
+              output_asm_insn ("movw\t%1,%0", operands);
+            }
+	}
+      return;
+    }
+
+  if (IS_STACK_POP (operands[1]) && H_REG_P (operands[0]))
+    {
+      cc_status = cc_prev_status;
+      switch (REGNO (operands[0]))
+	{
+	case HARD_X_REGNUM:
+	case HARD_Y_REGNUM:
+	  output_asm_insn ("pul%0", operands);
+	  break;
+	case HARD_D_REGNUM:
+	  output_asm_insn ("pula", operands);
+	  output_asm_insn ("pulb", operands);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+  /* Some moves to a hard register are special. Not all of them
+     are really supported and we have to use a temporary
+     location to provide them (either the stack of a temp var).  */
+  if (H_REG_P (operands[0]))
+    {
+      switch (REGNO (operands[0]))
+	{
+	case HARD_D_REGNUM:
+	  if (X_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_X_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else if (next_insn_test_reg (insn, operands[0]))
+		{
+		  output_asm_insn ("stx\t%t0\n\tldd\t%t0", operands);
+		}
+	      else
+		{
+                  m68hc11_notice_keep_cc (operands[0]);
+		  output_asm_insn ("pshx\n\tpula\n\tpulb", operands);
+		}
+	    }
+	  else if (Y_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_Y_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else
+		{
+		  /* %t means *ZTMP scratch register.  */
+		  output_asm_insn ("sty\t%t1", operands);
+		  output_asm_insn ("ldd\t%t1", operands);
+		}
+	    }
+	  else if (SP_REG_P (operands[1]))
+	    {
+	      CC_STATUS_INIT;
+	      if (ix_reg == 0)
+		create_regs_rtx ();
+	      if (optimize == 0 || dead_register_here (insn, ix_reg) == 0)
+		output_asm_insn ("xgdx", operands);
+	      output_asm_insn ("tsx", operands);
+	      output_asm_insn ("xgdx", operands);
+	    }
+	  else if (IS_STACK_POP (operands[1]))
+	    {
+	      output_asm_insn ("pula\n\tpulb", operands);
+	    }
+	  else if (GET_CODE (operands[1]) == CONST_INT
+		   && INTVAL (operands[1]) == 0)
+	    {
+	      output_asm_insn ("clra\n\tclrb", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ldd\t%1", operands);
+	    }
+	  break;
+
+	case HARD_X_REGNUM:
+	  if (D_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_D_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else if (next_insn_test_reg (insn, operands[0]))
+		{
+		  output_asm_insn ("std\t%t0\n\tldx\t%t0", operands);
+		}
+	      else
+		{
+		  m68hc11_notice_keep_cc (operands[0]);
+		  output_asm_insn ("pshb", operands);
+		  output_asm_insn ("psha", operands);
+		  output_asm_insn ("pulx", operands);
+		}
+	    }
+	  else if (Y_REG_P (operands[1]))
+	    {
+              /* When both D and Y are dead, use the sequence xgdy, xgdx
+                 to move Y into X.  The D and Y registers are modified.  */
+              if (optimize && find_regno_note (insn, REG_DEAD, HARD_Y_REGNUM)
+                  && dead_register_here (insn, d_reg))
+                {
+                  output_asm_insn ("xgdy", operands);
+                  output_asm_insn ("xgdx", operands);
+                  CC_STATUS_INIT;
+                }
+              else if (!optimize_size)
+                {
+                  output_asm_insn ("sty\t%t1", operands);
+                  output_asm_insn ("ldx\t%t1", operands);
+                }
+              else
+                {
+                  CC_STATUS_INIT;
+                  output_asm_insn ("pshy", operands);
+                  output_asm_insn ("pulx", operands);
+                }
+	    }
+	  else if (SP_REG_P (operands[1]))
+	    {
+	      /* tsx, tsy preserve the flags */
+	      cc_status = cc_prev_status;
+	      output_asm_insn ("tsx", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ldx\t%1", operands);
+	    }
+	  break;
+
+	case HARD_Y_REGNUM:
+	  if (D_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_D_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else
+		{
+		  output_asm_insn ("std\t%t1", operands);
+		  output_asm_insn ("ldy\t%t1", operands);
+		}
+	    }
+	  else if (X_REG_P (operands[1]))
+	    {
+              /* When both D and X are dead, use the sequence xgdx, xgdy
+                 to move X into Y.  The D and X registers are modified.  */
+              if (optimize && find_regno_note (insn, REG_DEAD, HARD_X_REGNUM)
+                  && dead_register_here (insn, d_reg))
+                {
+                  output_asm_insn ("xgdx", operands);
+                  output_asm_insn ("xgdy", operands);
+                  CC_STATUS_INIT;
+                }
+              else if (!optimize_size)
+                {
+                  output_asm_insn ("stx\t%t1", operands);
+                  output_asm_insn ("ldy\t%t1", operands);
+                }
+              else
+                {
+                  CC_STATUS_INIT;
+                  output_asm_insn ("pshx", operands);
+                  output_asm_insn ("puly", operands);
+                }
+	    }
+	  else if (SP_REG_P (operands[1]))
+	    {
+	      /* tsx, tsy preserve the flags */
+	      cc_status = cc_prev_status;
+	      output_asm_insn ("tsy", operands);
+	    }
+          else
+	    {
+	      output_asm_insn ("ldy\t%1", operands);
+	    }
+	  break;
+
+	case HARD_SP_REGNUM:
+	  if (D_REG_P (operands[1]))
+	    {
+	      m68hc11_notice_keep_cc (operands[0]);
+	      output_asm_insn ("xgdx", operands);
+	      output_asm_insn ("txs", operands);
+	      output_asm_insn ("xgdx", operands);
+	    }
+	  else if (X_REG_P (operands[1]))
+	    {
+	      /* tys, txs preserve the flags */
+	      cc_status = cc_prev_status;
+	      output_asm_insn ("txs", operands);
+	    }
+	  else if (Y_REG_P (operands[1]))
+	    {
+	      /* tys, txs preserve the flags */
+	      cc_status = cc_prev_status;
+	      output_asm_insn ("tys", operands);
+	    }
+	  else
+	    {
+	      /* lds sets the flags but the des does not.  */
+	      CC_STATUS_INIT;
+	      output_asm_insn ("lds\t%1", operands);
+	      output_asm_insn ("des", operands);
+	    }
+	  break;
+
+	default:
+	  fatal_insn ("invalid register in the move instruction", insn);
+	  break;
+	}
+      return;
+    }
+  if (SP_REG_P (operands[1]) && REG_P (operands[0])
+      && REGNO (operands[0]) == HARD_FRAME_POINTER_REGNUM)
+    {
+      output_asm_insn ("sts\t%0", operands);
+      return;
+    }
+
+  if (IS_STACK_PUSH (operands[0]) && H_REG_P (operands[1]))
+    {
+      cc_status = cc_prev_status;
+      switch (REGNO (operands[1]))
+	{
+	case HARD_X_REGNUM:
+	case HARD_Y_REGNUM:
+	  output_asm_insn ("psh%1", operands);
+	  break;
+	case HARD_D_REGNUM:
+	  output_asm_insn ("pshb", operands);
+	  output_asm_insn ("psha", operands);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    }
+
+  /* Operand 1 must be a hard register.  */
+  if (!H_REG_P (operands[1]))
+    {
+      fatal_insn ("invalid operand in the instruction", insn);
+    }
+
+  reg = REGNO (operands[1]);
+  switch (reg)
+    {
+    case HARD_D_REGNUM:
+      output_asm_insn ("std\t%0", operands);
+      break;
+
+    case HARD_X_REGNUM:
+      output_asm_insn ("stx\t%0", operands);
+      break;
+
+    case HARD_Y_REGNUM:
+      output_asm_insn ("sty\t%0", operands);
+      break;
+
+    case HARD_SP_REGNUM:
+      if (ix_reg == 0)
+	create_regs_rtx ();
+
+      if (REG_P (operands[0]) && REGNO (operands[0]) == SOFT_TMP_REGNUM)
+        {
+          output_asm_insn ("pshx", operands);
+          output_asm_insn ("tsx", operands);
+          output_asm_insn ("inx", operands);
+          output_asm_insn ("inx", operands);
+          output_asm_insn ("stx\t%0", operands);
+          output_asm_insn ("pulx", operands);
+        }
+          
+      else if (reg_mentioned_p (ix_reg, operands[0]))
+	{
+	  output_asm_insn ("sty\t%t0", operands);
+	  output_asm_insn ("tsy", operands);
+	  output_asm_insn ("sty\t%0", operands);
+	  output_asm_insn ("ldy\t%t0", operands);
+	}
+      else
+	{
+	  output_asm_insn ("stx\t%t0", operands);
+	  output_asm_insn ("tsx", operands);
+	  output_asm_insn ("stx\t%0", operands);
+	  output_asm_insn ("ldx\t%t0", operands);
+	}
+      CC_STATUS_INIT;
+      break;
+
+    default:
+      fatal_insn ("invalid register in the move instruction", insn);
+      break;
+    }
+}
+
+void
+m68hc11_gen_movqi (rtx insn, rtx *operands)
+{
+  /* Move a register or memory to the same location.
+     This is possible because such insn can appear
+     in a non-optimizing mode.  */
+  if (operands[0] == operands[1] || rtx_equal_p (operands[0], operands[1]))
+    {
+      cc_status = cc_prev_status;
+      return;
+    }
+
+  if (TARGET_M6812)
+    {
+
+      if (H_REG_P (operands[0]) && H_REG_P (operands[1]))
+	{
+          m68hc11_notice_keep_cc (operands[0]);
+	  output_asm_insn ("tfr\t%1,%0", operands);
+	}
+      else if (H_REG_P (operands[0]))
+	{
+          if (IS_STACK_POP (operands[1]))
+            output_asm_insn ("pul%b0", operands);
+	  else if (Q_REG_P (operands[0]))
+            output_asm_insn ("lda%0\t%b1", operands);
+	  else if (D_REG_P (operands[0]))
+	    output_asm_insn ("ldab\t%b1", operands);
+	  else
+	    goto m6811_move;
+	}
+      else if (H_REG_P (operands[1]))
+	{
+	  if (Q_REG_P (operands[1]))
+	    output_asm_insn ("sta%1\t%b0", operands);
+	  else if (D_REG_P (operands[1]))
+	    output_asm_insn ("stab\t%b0", operands);
+	  else
+	    goto m6811_move;
+	}
+      else
+	{
+	  rtx from = operands[1];
+	  rtx to = operands[0];
+
+	  if ((m68hc11_register_indirect_p (from, GET_MODE (from))
+	       && !m68hc11_small_indexed_indirect_p (from, GET_MODE (from)))
+	      || (m68hc11_register_indirect_p (to, GET_MODE (to))
+		  && !m68hc11_small_indexed_indirect_p (to, GET_MODE (to))))
+	    {
+	      rtx ops[3];
+
+	      if (operands[2])
+		{
+		  ops[0] = operands[2];
+		  ops[1] = from;
+		  ops[2] = 0;
+		  m68hc11_gen_movqi (insn, ops);
+		  ops[0] = to;
+		  ops[1] = operands[2];
+		  m68hc11_gen_movqi (insn, ops);
+		}
+	      else
+		{
+		  /* !!!! SCz wrong here.  */
+                  fatal_insn ("move insn not handled", insn);
+		}
+	    }
+	  else
+	    {
+	      if (GET_CODE (from) == CONST_INT && INTVAL (from) == 0)
+		{
+		  output_asm_insn ("clr\t%b0", operands);
+		}
+	      else
+		{
+                  m68hc11_notice_keep_cc (operands[0]);
+		  output_asm_insn ("movb\t%b1,%b0", operands);
+		}
+	    }
+	}
+      return;
+    }
+
+ m6811_move:
+  if (H_REG_P (operands[0]))
+    {
+      switch (REGNO (operands[0]))
+	{
+	case HARD_B_REGNUM:
+	case HARD_D_REGNUM:
+	  if (X_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_X_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else
+		{
+		  output_asm_insn ("stx\t%t1", operands);
+		  output_asm_insn ("ldab\t%T0", operands);
+		}
+	    }
+	  else if (Y_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_Y_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else
+		{
+		  output_asm_insn ("sty\t%t1", operands);
+		  output_asm_insn ("ldab\t%T0", operands);
+		}
+	    }
+	  else if (!DB_REG_P (operands[1]) && !D_REG_P (operands[1])
+		   && !DA_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("ldab\t%b1", operands);
+	    }
+	  else if (DA_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("tab", operands);
+	    }
+	  else
+	    {
+	      cc_status = cc_prev_status;
+	      return;
+	    }
+	  break;
+
+	case HARD_A_REGNUM:
+	  if (X_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("stx\t%t1", operands);
+	      output_asm_insn ("ldaa\t%T0", operands);
+	    }
+	  else if (Y_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("sty\t%t1", operands);
+	      output_asm_insn ("ldaa\t%T0", operands);
+	    }
+	  else if (!DB_REG_P (operands[1]) && !D_REG_P (operands[1])
+		   && !DA_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("ldaa\t%b1", operands);
+	    }
+	  else if (!DA_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("tba", operands);
+	    }
+	  else
+	    {
+	      cc_status = cc_prev_status;
+	    }
+	  break;
+
+	case HARD_X_REGNUM:
+	  if (D_REG_P (operands[1]))
+	    {
+	      if (optimize && find_regno_note (insn, REG_DEAD, HARD_D_REGNUM))
+		{
+		  m68hc11_output_swap (insn, operands);
+		}
+	      else
+		{
+		  output_asm_insn ("stab\t%T1", operands);
+		  output_asm_insn ("ldx\t%t1", operands);
+		}
+	      CC_STATUS_INIT;
+	    }
+	  else if (Y_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("sty\t%t0", operands);
+	      output_asm_insn ("ldx\t%t0", operands);
+	    }
+	  else if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      output_asm_insn ("ldx\t%1", operands);
+	    }
+	  else if (dead_register_here (insn, d_reg))
+	    {
+	      output_asm_insn ("ldab\t%b1", operands);
+	      output_asm_insn ("xgdx", operands);
+	    }
+	  else if (!reg_mentioned_p (operands[0], operands[1]))
+	    {
+	      output_asm_insn ("xgdx", operands);
+	      output_asm_insn ("ldab\t%b1", operands);
+	      output_asm_insn ("xgdx", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("pshb", operands);
+	      output_asm_insn ("ldab\t%b1", operands);
+	      output_asm_insn ("stab\t%T1", operands);
+	      output_asm_insn ("ldx\t%t1", operands);
+	      output_asm_insn ("pulb", operands);
+	      CC_STATUS_INIT;
+	    }
+	  break;
+
+	case HARD_Y_REGNUM:
+	  if (D_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("stab\t%T1", operands);
+	      output_asm_insn ("ldy\t%t1", operands);
+	      CC_STATUS_INIT;
+	    }
+	  else if (X_REG_P (operands[1]))
+	    {
+	      output_asm_insn ("stx\t%t1", operands);
+	      output_asm_insn ("ldy\t%t1", operands);
+	      CC_STATUS_INIT;
+	    }
+	  else if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      output_asm_insn ("ldy\t%1", operands);
+	    }
+	  else if (dead_register_here (insn, d_reg))
+	    {
+	      output_asm_insn ("ldab\t%b1", operands);
+	      output_asm_insn ("xgdy", operands);
+	    }
+	  else if (!reg_mentioned_p (operands[0], operands[1]))
+	    {
+	      output_asm_insn ("xgdy", operands);
+	      output_asm_insn ("ldab\t%b1", operands);
+	      output_asm_insn ("xgdy", operands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("pshb", operands);
+	      output_asm_insn ("ldab\t%b1", operands);
+	      output_asm_insn ("stab\t%T1", operands);
+	      output_asm_insn ("ldy\t%t1", operands);
+	      output_asm_insn ("pulb", operands);
+	      CC_STATUS_INIT;
+	    }
+	  break;
+
+	default:
+	  fatal_insn ("invalid register in the instruction", insn);
+	  break;
+	}
+    }
+  else if (H_REG_P (operands[1]))
+    {
+      switch (REGNO (operands[1]))
+	{
+	case HARD_D_REGNUM:
+	case HARD_B_REGNUM:
+	  output_asm_insn ("stab\t%b0", operands);
+	  break;
+
+	case HARD_A_REGNUM:
+	  output_asm_insn ("staa\t%b0", operands);
+	  break;
+
+	case HARD_X_REGNUM:
+	  output_asm_insn ("xgdx\n\tstab\t%b0\n\txgdx", operands);
+	  break;
+
+	case HARD_Y_REGNUM:
+	  output_asm_insn ("xgdy\n\tstab\t%b0\n\txgdy", operands);
+	  break;
+
+	default:
+	  fatal_insn ("invalid register in the move instruction", insn);
+	  break;
+	}
+      return;
+    }
+  else
+    {
+      fatal_insn ("operand 1 must be a hard register", insn);
+    }
+}
+
+/* Generate the code for a ROTATE or ROTATERT on a QI or HI mode.
+   The source and destination must be D or A and the shift must
+   be a constant.  */
+void
+m68hc11_gen_rotate (enum rtx_code code, rtx insn, rtx operands[])
+{
+  int val;
+  
+  if (GET_CODE (operands[2]) != CONST_INT
+      || (!D_REG_P (operands[0]) && !DA_REG_P (operands[0])))
+    fatal_insn ("invalid rotate insn", insn);
+
+  val = INTVAL (operands[2]);
+  if (code == ROTATERT)
+    val = GET_MODE_SIZE (GET_MODE (operands[0])) * BITS_PER_UNIT - val;
+
+  if (GET_MODE (operands[0]) != QImode)
+    CC_STATUS_INIT;
+
+  /* Rotate by 8-bits if the shift is within [5..11].  */
+  if (val >= 5 && val <= 11)
+    {
+      if (TARGET_M6812)
+	output_asm_insn ("exg\ta,b", operands);
+      else
+	{
+	  output_asm_insn ("psha", operands);
+	  output_asm_insn ("tba", operands);
+	  output_asm_insn ("pulb", operands);
+	}
+      val -= 8;
+    }
+
+  /* If the shift is big, invert the rotation.  */
+  else if (val >= 12)
+    {
+      val = val - 16;
+    }
+
+  if (val > 0)
+    {
+      while (--val >= 0)
+        {
+          /* Set the carry to bit-15, but don't change D yet.  */
+          if (GET_MODE (operands[0]) != QImode)
+            {
+              output_asm_insn ("asra", operands);
+              output_asm_insn ("rola", operands);
+            }
+
+          /* Rotate B first to move the carry to bit-0.  */
+          if (D_REG_P (operands[0]))
+            output_asm_insn ("rolb", operands);
+
+          if (GET_MODE (operands[0]) != QImode || DA_REG_P (operands[0]))
+            output_asm_insn ("rola", operands);
+        }
+    }
+  else
+    {
+      while (++val <= 0)
+        {
+          /* Set the carry to bit-8 of D.  */
+          if (GET_MODE (operands[0]) != QImode)
+            output_asm_insn ("tap", operands);
+
+          /* Rotate B first to move the carry to bit-7.  */
+          if (D_REG_P (operands[0]))
+            output_asm_insn ("rorb", operands);
+
+          if (GET_MODE (operands[0]) != QImode || DA_REG_P (operands[0]))
+            output_asm_insn ("rora", operands);
+        }
+    }
+}
+
+
+
+/* Store in cc_status the expressions that the condition codes will
+   describe after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+void
+m68hc11_notice_update_cc (rtx exp, rtx insn ATTRIBUTE_UNUSED)
+{
+  /* recognize SET insn's.  */
+  if (GET_CODE (exp) == SET)
+    {
+      /* Jumps do not alter the cc's.  */
+      if (SET_DEST (exp) == pc_rtx)
+	;
+
+      /* NOTE: most instructions don't affect the carry bit, but the
+         bhi/bls/bhs/blo instructions use it.  This isn't mentioned in
+         the conditions.h header.  */
+
+      /* Function calls clobber the cc's.  */
+      else if (GET_CODE (SET_SRC (exp)) == CALL)
+	{
+	  CC_STATUS_INIT;
+	}
+
+      /* Tests and compares set the cc's in predictable ways.  */
+      else if (SET_DEST (exp) == cc0_rtx)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = XEXP (exp, 0);
+	  if (GET_CODE (XEXP (exp, 1)) == COMPARE
+	      && XEXP (XEXP (exp, 1), 1) == CONST0_RTX (GET_MODE (XEXP (XEXP (exp, 1), 0))))
+	    cc_status.value2 = XEXP (XEXP (exp, 1), 0);
+	  else
+	    cc_status.value2 = XEXP (exp, 1);
+	}
+      else
+	{
+	  /* All other instructions affect the condition codes.  */
+	  cc_status.flags = 0;
+	  cc_status.value1 = XEXP (exp, 0);
+	  cc_status.value2 = XEXP (exp, 1);
+	}
+    }
+  else
+    {
+      /* Default action if we haven't recognized something
+         and returned earlier.  */
+      CC_STATUS_INIT;
+    }
+
+  if (cc_status.value2 != 0)
+    switch (GET_CODE (cc_status.value2))
+      {
+	/* These logical operations can generate several insns.
+	   The flags are setup according to what is generated.  */
+      case IOR:
+      case XOR:
+      case AND:
+	break;
+
+	/* The (not ...) generates several 'com' instructions for
+	   non QImode.  We have to invalidate the flags.  */
+      case NOT:
+	if (GET_MODE (cc_status.value2) != QImode)
+	  CC_STATUS_INIT;
+	break;
+
+      case PLUS:
+      case MINUS:
+      case MULT:
+      case DIV:
+      case UDIV:
+      case MOD:
+      case UMOD:
+      case NEG:
+	if (GET_MODE (cc_status.value2) != VOIDmode)
+	  cc_status.flags |= CC_NO_OVERFLOW;
+	break;
+
+	/* The asl sets the overflow bit in such a way that this
+	   makes the flags unusable for a next compare insn.  */
+      case ASHIFT:
+      case ROTATE:
+      case ROTATERT:
+	if (GET_MODE (cc_status.value2) != VOIDmode)
+	  cc_status.flags |= CC_NO_OVERFLOW;
+	break;
+
+	/* A load/store instruction does not affect the carry.  */
+      case MEM:
+      case SYMBOL_REF:
+      case REG:
+      case CONST_INT:
+	cc_status.flags |= CC_NO_OVERFLOW;
+	break;
+
+      default:
+	break;
+      }
+  if (cc_status.value1 && GET_CODE (cc_status.value1) == REG
+      && cc_status.value2
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+    cc_status.value2 = 0;
+
+  else if (cc_status.value1 && side_effects_p (cc_status.value1))
+    cc_status.value1 = 0;
+
+  else if (cc_status.value2 && side_effects_p (cc_status.value2))
+    cc_status.value2 = 0;
+}
+
+/* The current instruction does not affect the flags but changes
+   the register 'reg'.  See if the previous flags can be kept for the
+   next instruction to avoid a comparison.  */
+void
+m68hc11_notice_keep_cc (rtx reg)
+{
+  if (reg == 0
+      || cc_prev_status.value1 == 0
+      || rtx_equal_p (reg, cc_prev_status.value1)
+      || (cc_prev_status.value2
+          && reg_mentioned_p (reg, cc_prev_status.value2)))
+    CC_STATUS_INIT;
+  else
+    cc_status = cc_prev_status;
+}
+
+
+
+/* Machine Specific Reorg.  */
+
+/* Z register replacement:
+
+   GCC treats the Z register as an index base address register like
+   X or Y.  In general, it uses it during reload to compute the address
+   of some operand.  This helps the reload pass to avoid to fall into the
+   register spill failure.
+
+   The Z register is in the A_REGS class.  In the machine description,
+   the 'A' constraint matches it.  The 'x' or 'y' constraints do not.
+
+   It can appear everywhere an X or Y register can appear, except for
+   some templates in the clobber section (when a clobber of X or Y is asked).
+   For a given instruction, the template must ensure that no more than
+   2 'A' registers are used.  Otherwise, the register replacement is not
+   possible.
+
+   To replace the Z register, the algorithm is not terrific:
+   1. Insns that do not use the Z register are not changed
+   2. When a Z register is used, we scan forward the insns to see
+   a potential register to use: either X or Y and sometimes D.
+   We stop when a call, a label or a branch is seen, or when we
+   detect that both X and Y are used (probably at different times, but it does
+   not matter).
+   3. The register that will be used for the replacement of Z is saved
+   in a .page0 register or on the stack.  If the first instruction that
+   used Z, uses Z as an input, the value is loaded from another .page0
+   register.  The replacement register is pushed on the stack in the
+   rare cases where a compare insn uses Z and we couldn't find if X/Y
+   are dead.
+   4. The Z register is replaced in all instructions until we reach
+   the end of the Z-block, as detected by step 2.
+   5. If we detect that Z is still alive, its value is saved.
+   If the replacement register is alive, its old value is loaded.
+
+   The Z register can be disabled with -ffixed-z.
+*/
+
+struct replace_info
+{
+  rtx first;
+  rtx replace_reg;
+  int need_save_z;
+  int must_load_z;
+  int must_save_reg;
+  int must_restore_reg;
+  rtx last;
+  int regno;
+  int x_used;
+  int y_used;
+  int can_use_d;
+  int found_call;
+  int z_died;
+  int z_set_count;
+  rtx z_value;
+  int must_push_reg;
+  int save_before_last;
+  int z_loaded_with_sp;
+};
+
+static int m68hc11_check_z_replacement (rtx, struct replace_info *);
+static void m68hc11_find_z_replacement (rtx, struct replace_info *);
+static void m68hc11_z_replacement (rtx);
+static void m68hc11_reassign_regs (rtx);
+
+int z_replacement_completed = 0;
+
+/* Analyze the insn to find out which replacement register to use and
+   the boundaries of the replacement.
+   Returns 0 if we reached the last insn to be replaced, 1 if we can
+   continue replacement in next insns.  */
+
+static int
+m68hc11_check_z_replacement (rtx insn, struct replace_info *info)
+{
+  int this_insn_uses_ix;
+  int this_insn_uses_iy;
+  int this_insn_uses_z;
+  int this_insn_uses_z_in_dst;
+  int this_insn_uses_d;
+  rtx body;
+  int z_dies_here;
+
+  /* A call is said to clobber the Z register, we don't need
+     to save the value of Z.  We also don't need to restore
+     the replacement register (unless it is used by the call).  */
+  if (GET_CODE (insn) == CALL_INSN)
+    {
+      body = PATTERN (insn);
+
+      info->can_use_d = 0;
+
+      /* If the call is an indirect call with Z, we have to use the
+         Y register because X can be used as an input (D+X).
+         We also must not save Z nor restore Y.  */
+      if (reg_mentioned_p (z_reg, body))
+	{
+	  insn = NEXT_INSN (insn);
+	  info->x_used = 1;
+	  info->y_used = 0;
+	  info->found_call = 1;
+	  info->must_restore_reg = 0;
+	  info->last = NEXT_INSN (insn);
+	}
+      info->need_save_z = 0;
+      return 0;
+    }
+  if (GET_CODE (insn) == CODE_LABEL
+      || GET_CODE (insn) == BARRIER || GET_CODE (insn) == ASM_INPUT)
+    return 0;
+
+  if (GET_CODE (insn) == JUMP_INSN)
+    {
+      if (reg_mentioned_p (z_reg, insn) == 0)
+	return 0;
+
+      info->can_use_d = 0;
+      info->must_save_reg = 0;
+      info->must_restore_reg = 0;
+      info->need_save_z = 0;
+      info->last = NEXT_INSN (insn);
+      return 0;
+    }
+  if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
+    {
+      return 1;
+    }
+
+  /* Z register dies here.  */
+  z_dies_here = find_regno_note (insn, REG_DEAD, HARD_Z_REGNUM) != NULL;
+
+  body = PATTERN (insn);
+  if (GET_CODE (body) == SET)
+    {
+      rtx src = XEXP (body, 1);
+      rtx dst = XEXP (body, 0);
+
+      /* Condition code is set here. We have to restore the X/Y and
+         save into Z before any test/compare insn because once we save/restore
+         we can change the condition codes. When the compare insn uses Z and
+         we can't use X/Y, the comparison is made with the *ZREG soft register
+         (this is supported by cmphi, cmpqi, tsthi, tstqi patterns).  */
+      if (dst == cc0_rtx)
+	{
+	  if ((GET_CODE (src) == REG && REGNO (src) == HARD_Z_REGNUM)
+	      || (GET_CODE (src) == COMPARE &&
+		  ((rtx_equal_p (XEXP (src, 0), z_reg)
+                    && H_REG_P (XEXP (src, 1)))
+		   || (rtx_equal_p (XEXP (src, 1), z_reg)
+                       && H_REG_P (XEXP (src, 0))))))
+	    {
+	      if (insn == info->first)
+		{
+		  info->must_load_z = 0;
+		  info->must_save_reg = 0;
+		  info->must_restore_reg = 0;
+		  info->need_save_z = 0;
+		  info->found_call = 1;
+		  info->regno = SOFT_Z_REGNUM;
+		  info->last = NEXT_INSN (insn);
+		}
+	      return 0;
+	    }
+	  if (reg_mentioned_p (z_reg, src) == 0)
+	    {
+	      info->can_use_d = 0;
+	      return 0;
+	    }
+
+	  if (insn != info->first)
+	    return 0;
+
+	  /* Compare insn which uses Z.  We have to save/restore the X/Y
+	     register without modifying the condition codes.  For this
+	     we have to use a push/pop insn.  */
+	  info->must_push_reg = 1;
+	  info->last = insn;
+	}
+
+      /* Z reg is set to something new. We don't need to load it.  */
+      if (Z_REG_P (dst))
+	{
+	  if (!reg_mentioned_p (z_reg, src))
+	    {
+              /* Z reg is used before being set.  Treat this as
+                 a new sequence of Z register replacement.  */
+	      if (insn != info->first)
+		{
+                  return 0;
+		}
+              info->must_load_z = 0;
+	    }
+	  info->z_set_count++;
+	  info->z_value = src;
+	  if (SP_REG_P (src))
+	    info->z_loaded_with_sp = 1;
+	}
+      else if (reg_mentioned_p (z_reg, dst))
+	info->can_use_d = 0;
+
+      this_insn_uses_d = reg_mentioned_p (d_reg, src)
+	| reg_mentioned_p (d_reg, dst);
+      this_insn_uses_ix = reg_mentioned_p (ix_reg, src)
+	| reg_mentioned_p (ix_reg, dst);
+      this_insn_uses_iy = reg_mentioned_p (iy_reg, src)
+	| reg_mentioned_p (iy_reg, dst);
+      this_insn_uses_z = reg_mentioned_p (z_reg, src);
+
+      /* If z is used as an address operand (like (MEM (reg z))),
+         we can't replace it with d.  */
+      if (this_insn_uses_z && !Z_REG_P (src)
+          && !(m68hc11_arith_operator (src, GET_MODE (src))
+               && Z_REG_P (XEXP (src, 0))
+               && !reg_mentioned_p (z_reg, XEXP (src, 1))
+               && insn == info->first
+               && dead_register_here (insn, d_reg)))
+	info->can_use_d = 0;
+
+      this_insn_uses_z_in_dst = reg_mentioned_p (z_reg, dst);
+      if (TARGET_M6812 && !z_dies_here
+          && ((this_insn_uses_z && side_effects_p (src))
+              || (this_insn_uses_z_in_dst && side_effects_p (dst))))
+        {
+          info->need_save_z = 1;
+          info->z_set_count++;
+        }
+      this_insn_uses_z |= this_insn_uses_z_in_dst;
+
+      if (this_insn_uses_z && this_insn_uses_ix && this_insn_uses_iy)
+	{
+	  fatal_insn ("registers IX, IY and Z used in the same INSN", insn);
+	}
+
+      if (this_insn_uses_d)
+	info->can_use_d = 0;
+
+      /* IX and IY are used at the same time, we have to restore
+         the value of the scratch register before this insn.  */
+      if (this_insn_uses_ix && this_insn_uses_iy)
+	{
+	  return 0;
+	}
+
+      if (this_insn_uses_ix && X_REG_P (dst) && GET_MODE (dst) == SImode)
+        info->can_use_d = 0;
+
+      if (info->x_used == 0 && this_insn_uses_ix)
+	{
+	  if (info->y_used)
+	    {
+	      /* We have a (set (REG:HI X) (REG:HI Z)).
+	         Since we use Z as the replacement register, this insn
+	         is no longer necessary.  We turn it into a note.  We must
+	         not reload the old value of X.  */
+	      if (X_REG_P (dst) && rtx_equal_p (src, z_reg))
+		{
+		  if (z_dies_here)
+		    {
+		      info->need_save_z = 0;
+		      info->z_died = 1;
+		    }
+		  info->must_save_reg = 0;
+		  info->must_restore_reg = 0;
+		  info->found_call = 1;
+		  info->can_use_d = 0;
+		  SET_INSN_DELETED (insn);
+		  info->last = NEXT_INSN (insn);
+		  return 0;
+		}
+
+	      if (X_REG_P (dst)
+		  && (rtx_equal_p (src, z_reg)
+		      || (z_dies_here && !reg_mentioned_p (ix_reg, src))))
+		{
+		  if (z_dies_here)
+		    {
+		      info->need_save_z = 0;
+		      info->z_died = 1;
+		    }
+		  info->last = NEXT_INSN (insn);
+		  info->must_save_reg = 0;
+		  info->must_restore_reg = 0;
+		}
+	      else if (X_REG_P (dst) && reg_mentioned_p (z_reg, src)
+		       && !reg_mentioned_p (ix_reg, src))
+		{
+		  if (z_dies_here)
+		    {
+		      info->z_died = 1;
+		      info->need_save_z = 0;
+		    }
+		  else if (TARGET_M6812 && side_effects_p (src))
+                    {
+                      info->last = 0;
+                      info->must_restore_reg = 0;
+                      return 0;
+                    }
+                  else
+		    {
+		      info->save_before_last = 1;
+		    }
+		  info->must_restore_reg = 0;
+		  info->last = NEXT_INSN (insn);
+		}
+	      else if (info->can_use_d)
+		{
+		  info->last = NEXT_INSN (insn);
+		  info->x_used = 1;
+		}
+	      return 0;
+	    }
+	  info->x_used = 1;
+	  if (z_dies_here && !reg_mentioned_p (ix_reg, src)
+	      && GET_CODE (dst) == REG && REGNO (dst) == HARD_X_REGNUM)
+	    {
+	      info->need_save_z = 0;
+	      info->z_died = 1;
+	      info->last = NEXT_INSN (insn);
+	      info->regno = HARD_X_REGNUM;
+	      info->must_save_reg = 0;
+	      info->must_restore_reg = 0;
+	      return 0;
+	    }
+          if (rtx_equal_p (src, z_reg) && rtx_equal_p (dst, ix_reg))
+            {
+              info->regno = HARD_X_REGNUM;
+              info->must_restore_reg = 0;
+              info->must_save_reg = 0;
+              return 0;
+            }
+	}
+      if (info->y_used == 0 && this_insn_uses_iy)
+	{
+	  if (info->x_used)
+	    {
+	      if (Y_REG_P (dst) && rtx_equal_p (src, z_reg))
+		{
+		  if (z_dies_here)
+		    {
+		      info->need_save_z = 0;
+		      info->z_died = 1;
+		    }
+		  info->must_save_reg = 0;
+		  info->must_restore_reg = 0;
+		  info->found_call = 1;
+		  info->can_use_d = 0;
+		  SET_INSN_DELETED (insn);
+		  info->last = NEXT_INSN (insn);
+		  return 0;
+		}
+
+	      if (Y_REG_P (dst)
+		  && (rtx_equal_p (src, z_reg)
+		      || (z_dies_here && !reg_mentioned_p (iy_reg, src))))
+		{
+		  if (z_dies_here)
+		    {
+		      info->z_died = 1;
+		      info->need_save_z = 0;
+		    }
+		  info->last = NEXT_INSN (insn);
+		  info->must_save_reg = 0;
+		  info->must_restore_reg = 0;
+		}
+	      else if (Y_REG_P (dst) && reg_mentioned_p (z_reg, src)
+		       && !reg_mentioned_p (iy_reg, src))
+		{
+		  if (z_dies_here)
+		    {
+		      info->z_died = 1;
+		      info->need_save_z = 0;
+		    }
+		  else if (TARGET_M6812 && side_effects_p (src))
+                    {
+                      info->last = 0;
+                      info->must_restore_reg = 0;
+                      return 0;
+                    }
+                  else
+		    {
+		      info->save_before_last = 1;
+		    }
+		  info->must_restore_reg = 0;
+		  info->last = NEXT_INSN (insn);
+		}
+	      else if (info->can_use_d)
+		{
+		  info->last = NEXT_INSN (insn);
+		  info->y_used = 1;
+		}
+
+	      return 0;
+	    }
+	  info->y_used = 1;
+	  if (z_dies_here && !reg_mentioned_p (iy_reg, src)
+	      && GET_CODE (dst) == REG && REGNO (dst) == HARD_Y_REGNUM)
+	    {
+	      info->need_save_z = 0;
+	      info->z_died = 1;
+	      info->last = NEXT_INSN (insn);
+	      info->regno = HARD_Y_REGNUM;
+	      info->must_save_reg = 0;
+	      info->must_restore_reg = 0;
+	      return 0;
+	    }
+          if (rtx_equal_p (src, z_reg) && rtx_equal_p (dst, iy_reg))
+            {
+              info->regno = HARD_Y_REGNUM;
+              info->must_restore_reg = 0;
+              info->must_save_reg = 0;
+              return 0;
+            }
+	}
+      if (z_dies_here)
+	{
+	  info->need_save_z = 0;
+	  info->z_died = 1;
+	  if (info->last == 0)
+	    info->last = NEXT_INSN (insn);
+	  return 0;
+	}
+      return info->last != NULL_RTX ? 0 : 1;
+    }
+  if (GET_CODE (body) == PARALLEL)
+    {
+      int i;
+      char ix_clobber = 0;
+      char iy_clobber = 0;
+      char z_clobber = 0;
+      this_insn_uses_iy = 0;
+      this_insn_uses_ix = 0;
+      this_insn_uses_z = 0;
+
+      for (i = XVECLEN (body, 0) - 1; i >= 0; i--)
+	{
+	  rtx x;
+	  int uses_ix, uses_iy, uses_z;
+
+	  x = XVECEXP (body, 0, i);
+
+	  if (info->can_use_d && reg_mentioned_p (d_reg, x))
+	    info->can_use_d = 0;
+
+	  uses_ix = reg_mentioned_p (ix_reg, x);
+	  uses_iy = reg_mentioned_p (iy_reg, x);
+	  uses_z = reg_mentioned_p (z_reg, x);
+	  if (GET_CODE (x) == CLOBBER)
+	    {
+	      ix_clobber |= uses_ix;
+	      iy_clobber |= uses_iy;
+	      z_clobber |= uses_z;
+	    }
+	  else
+	    {
+	      this_insn_uses_ix |= uses_ix;
+	      this_insn_uses_iy |= uses_iy;
+	      this_insn_uses_z |= uses_z;
+	    }
+	  if (uses_z && GET_CODE (x) == SET)
+	    {
+	      rtx dst = XEXP (x, 0);
+
+	      if (Z_REG_P (dst))
+		info->z_set_count++;
+	    }
+          if (TARGET_M6812 && uses_z && side_effects_p (x))
+            info->need_save_z = 1;
+
+	  if (z_clobber)
+	    info->need_save_z = 0;
+	}
+      if (debug_m6811)
+	{
+	  printf ("Uses X:%d Y:%d Z:%d CX:%d CY:%d CZ:%d\n",
+		  this_insn_uses_ix, this_insn_uses_iy,
+		  this_insn_uses_z, ix_clobber, iy_clobber, z_clobber);
+	  debug_rtx (insn);
+	}
+      if (this_insn_uses_z)
+	info->can_use_d = 0;
+
+      if (z_clobber && info->first != insn)
+	{
+	  info->need_save_z = 0;
+	  info->last = insn;
+	  return 0;
+	}
+      if (z_clobber && info->x_used == 0 && info->y_used == 0)
+	{
+	  if (this_insn_uses_z == 0 && insn == info->first)
+	    {
+	      info->must_load_z = 0;
+	    }
+	  if (dead_register_here (insn, d_reg))
+	    {
+	      info->regno = HARD_D_REGNUM;
+	      info->must_save_reg = 0;
+	      info->must_restore_reg = 0;
+	    }
+	  else if (dead_register_here (insn, ix_reg))
+	    {
+	      info->regno = HARD_X_REGNUM;
+	      info->must_save_reg = 0;
+	      info->must_restore_reg = 0;
+	    }
+	  else if (dead_register_here (insn, iy_reg))
+	    {
+	      info->regno = HARD_Y_REGNUM;
+	      info->must_save_reg = 0;
+	      info->must_restore_reg = 0;
+	    }
+	  if (info->regno >= 0)
+	    {
+	      info->last = NEXT_INSN (insn);
+	      return 0;
+	    }
+	  if (this_insn_uses_ix == 0)
+	    {
+	      info->regno = HARD_X_REGNUM;
+	      info->must_save_reg = 1;
+	      info->must_restore_reg = 1;
+	    }
+	  else if (this_insn_uses_iy == 0)
+	    {
+	      info->regno = HARD_Y_REGNUM;
+	      info->must_save_reg = 1;
+	      info->must_restore_reg = 1;
+	    }
+	  else
+	    {
+	      info->regno = HARD_D_REGNUM;
+	      info->must_save_reg = 1;
+	      info->must_restore_reg = 1;
+	    }
+	  info->last = NEXT_INSN (insn);
+	  return 0;
+	}
+
+      if (((info->x_used || this_insn_uses_ix) && iy_clobber)
+	  || ((info->y_used || this_insn_uses_iy) && ix_clobber))
+	{
+	  if (this_insn_uses_z)
+	    {
+	      if (info->y_used == 0 && iy_clobber)
+		{
+		  info->regno = HARD_Y_REGNUM;
+		  info->must_save_reg = 0;
+		  info->must_restore_reg = 0;
+		}
+	      if (info->first != insn
+		  && ((info->y_used && ix_clobber)
+		      || (info->x_used && iy_clobber)))
+		info->last = insn;
+	      else
+		info->last = NEXT_INSN (insn);
+	      info->save_before_last = 1;
+	    }
+	  return 0;
+	}
+      if (this_insn_uses_ix && this_insn_uses_iy)
+	{
+          if (this_insn_uses_z)
+            {
+              fatal_insn ("cannot do z-register replacement", insn);
+            }
+	  return 0;
+	}
+      if (info->x_used == 0 && (this_insn_uses_ix || ix_clobber))
+	{
+	  if (info->y_used)
+	    {
+	      return 0;
+	    }
+	  info->x_used = 1;
+	  if (iy_clobber || z_clobber)
+	    {
+	      info->last = NEXT_INSN (insn);
+	      info->save_before_last = 1;
+	      return 0;
+	    }
+	}
+
+      if (info->y_used == 0 && (this_insn_uses_iy || iy_clobber))
+	{
+	  if (info->x_used)
+	    {
+	      return 0;
+	    }
+	  info->y_used = 1;
+	  if (ix_clobber || z_clobber)
+	    {
+	      info->last = NEXT_INSN (insn);
+	      info->save_before_last = 1;
+	      return 0;
+	    }
+	}
+      if (z_dies_here)
+	{
+	  info->z_died = 1;
+	  info->need_save_z = 0;
+	}
+      return 1;
+    }
+  if (GET_CODE (body) == CLOBBER)
+    {
+      rtx dst = XEXP (body, 0);
+
+      this_insn_uses_ix = reg_mentioned_p (ix_reg, dst);
+      this_insn_uses_iy = reg_mentioned_p (iy_reg, dst);
+
+      /* IX and IY are used at the same time, we have to restore
+         the value of the scratch register before this insn.  */
+      if (this_insn_uses_ix && this_insn_uses_iy)
+	{
+	  return 0;
+	}
+      if (info->x_used == 0 && this_insn_uses_ix)
+	{
+	  if (info->y_used)
+	    {
+	      return 0;
+	    }
+	  info->x_used = 1;
+	}
+      if (info->y_used == 0 && this_insn_uses_iy)
+	{
+	  if (info->x_used)
+	    {
+	      return 0;
+	    }
+	  info->y_used = 1;
+	}
+      return 1;
+    }
+  return 1;
+}
+
+static void
+m68hc11_find_z_replacement (rtx insn, struct replace_info *info)
+{
+  int reg;
+
+  info->replace_reg = NULL_RTX;
+  info->must_load_z = 1;
+  info->need_save_z = 1;
+  info->must_save_reg = 1;
+  info->must_restore_reg = 1;
+  info->first = insn;
+  info->x_used = 0;
+  info->y_used = 0;
+  info->can_use_d = TARGET_M6811 ? 1 : 0;
+  info->found_call = 0;
+  info->z_died = 0;
+  info->last = 0;
+  info->regno = -1;
+  info->z_set_count = 0;
+  info->z_value = NULL_RTX;
+  info->must_push_reg = 0;
+  info->save_before_last = 0;
+  info->z_loaded_with_sp = 0;
+
+  /* Scan the insn forward to find an address register that is not used.
+     Stop when:
+     - the flow of the program changes,
+     - when we detect that both X and Y are necessary,
+     - when the Z register dies,
+     - when the condition codes are set.  */
+
+  for (; insn && info->z_died == 0; insn = NEXT_INSN (insn))
+    {
+      if (m68hc11_check_z_replacement (insn, info) == 0)
+	break;
+    }
+
+  /* May be we can use Y or X if they contain the same value as Z.
+     This happens very often after the reload.  */
+  if (info->z_set_count == 1)
+    {
+      rtx p = info->first;
+      rtx v = 0;
+
+      if (info->x_used)
+	{
+	  v = find_last_value (iy_reg, &p, insn, 1);
+	}
+      else if (info->y_used)
+	{
+	  v = find_last_value (ix_reg, &p, insn, 1);
+	}
+      if (v && (v != iy_reg && v != ix_reg) && rtx_equal_p (v, info->z_value))
+	{
+	  if (info->x_used)
+	    info->regno = HARD_Y_REGNUM;
+	  else
+	    info->regno = HARD_X_REGNUM;
+	  info->must_load_z = 0;
+	  info->must_save_reg = 0;
+	  info->must_restore_reg = 0;
+	  info->found_call = 1;
+	}
+    }
+  if (info->z_set_count == 0)
+    info->need_save_z = 0;
+
+  if (insn == 0)
+    info->need_save_z = 0;
+
+  if (info->last == 0)
+    info->last = insn;
+
+  if (info->regno >= 0)
+    {
+      reg = info->regno;
+      info->replace_reg = gen_rtx_REG (HImode, reg);
+    }
+  else if (info->can_use_d)
+    {
+      reg = HARD_D_REGNUM;
+      info->replace_reg = d_reg;
+    }
+  else if (info->x_used)
+    {
+      reg = HARD_Y_REGNUM;
+      info->replace_reg = iy_reg;
+    }
+  else
+    {
+      reg = HARD_X_REGNUM;
+      info->replace_reg = ix_reg;
+    }
+  info->regno = reg;
+
+  if (info->must_save_reg && info->must_restore_reg)
+    {
+      if (insn && dead_register_here (insn, info->replace_reg))
+	{
+	  info->must_save_reg = 0;
+	  info->must_restore_reg = 0;
+	}
+    }
+}
+
+/* The insn uses the Z register.  Find a replacement register for it
+   (either X or Y) and replace it in the insn and the next ones until
+   the flow changes or the replacement register is used.  Instructions
+   are emitted before and after the Z-block to preserve the value of
+   Z and of the replacement register.  */
+
+static void
+m68hc11_z_replacement (rtx insn)
+{
+  rtx replace_reg_qi;
+  rtx replace_reg;
+  struct replace_info info;
+
+  /* Find trivial case where we only need to replace z with the
+     equivalent soft register.  */
+  if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx body = PATTERN (insn);
+      rtx src = XEXP (body, 1);
+      rtx dst = XEXP (body, 0);
+
+      if (Z_REG_P (dst) && (H_REG_P (src) && !SP_REG_P (src)))
+	{
+	  XEXP (body, 0) = gen_rtx_REG (GET_MODE (dst), SOFT_Z_REGNUM);
+	  return;
+	}
+      else if (Z_REG_P (src)
+	       && ((H_REG_P (dst) && !SP_REG_P (src)) || dst == cc0_rtx))
+	{
+	  XEXP (body, 1) = gen_rtx_REG (GET_MODE (src), SOFT_Z_REGNUM);
+	  return;
+	}
+      else if (D_REG_P (dst)
+	       && m68hc11_arith_operator (src, GET_MODE (src))
+	       && D_REG_P (XEXP (src, 0)) && Z_REG_P (XEXP (src, 1)))
+	{
+	  XEXP (src, 1) = gen_rtx_REG (GET_MODE (src), SOFT_Z_REGNUM);
+	  return;
+	}
+      else if (Z_REG_P (dst) && GET_CODE (src) == CONST_INT
+	       && INTVAL (src) == 0)
+	{
+	  XEXP (body, 0) = gen_rtx_REG (GET_MODE (dst), SOFT_Z_REGNUM);
+          /* Force it to be re-recognized.  */
+          INSN_CODE (insn) = -1;
+	  return;
+	}
+    }
+
+  m68hc11_find_z_replacement (insn, &info);
+
+  replace_reg = info.replace_reg;
+  replace_reg_qi = NULL_RTX;
+
+  /* Save the X register in a .page0 location.  */
+  if (info.must_save_reg && !info.must_push_reg)
+    {
+      rtx dst;
+
+      if (info.must_push_reg && 0)
+	dst = gen_rtx_MEM (HImode,
+		       gen_rtx_PRE_DEC (HImode,
+				gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+      else
+	dst = gen_rtx_REG (HImode, SOFT_SAVED_XY_REGNUM);
+
+      emit_insn_before (gen_movhi (dst,
+				   gen_rtx_REG (HImode, info.regno)), insn);
+    }
+  if (info.must_load_z && !info.must_push_reg)
+    {
+      emit_insn_before (gen_movhi (gen_rtx_REG (HImode, info.regno),
+				   gen_rtx_REG (HImode, SOFT_Z_REGNUM)),
+			insn);
+    }
+
+
+  /* Replace all occurrence of Z by replace_reg.
+     Stop when the last instruction to replace is reached.
+     Also stop when we detect a change in the flow (but it's not
+     necessary; just safeguard).  */
+
+  for (; insn && insn != info.last; insn = NEXT_INSN (insn))
+    {
+      rtx body;
+
+      if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == BARRIER)
+	break;
+
+      if (GET_CODE (insn) != INSN
+	  && GET_CODE (insn) != CALL_INSN && GET_CODE (insn) != JUMP_INSN)
+	continue;
+
+      body = PATTERN (insn);
+      if (GET_CODE (body) == SET || GET_CODE (body) == PARALLEL
+          || GET_CODE (body) == ASM_OPERANDS
+	  || GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+	{
+          rtx note;
+
+	  if (debug_m6811 && reg_mentioned_p (replace_reg, body))
+	    {
+	      printf ("Reg mentioned here...:\n");
+	      fflush (stdout);
+	      debug_rtx (insn);
+	    }
+
+	  /* Stack pointer was decremented by 2 due to the push.
+	     Correct that by adding 2 to the destination.  */
+	  if (info.must_push_reg
+	      && info.z_loaded_with_sp && GET_CODE (body) == SET)
+	    {
+	      rtx src, dst;
+
+	      src = SET_SRC (body);
+	      dst = SET_DEST (body);
+	      if (SP_REG_P (src) && Z_REG_P (dst))
+		emit_insn_after (gen_addhi3 (dst, dst, const2_rtx), insn);
+	    }
+
+	  /* Replace any (REG:HI Z) occurrence by either X or Y.  */
+	  if (!validate_replace_rtx (z_reg, replace_reg, insn))
+	    {
+	      INSN_CODE (insn) = -1;
+	      if (!validate_replace_rtx (z_reg, replace_reg, insn))
+		fatal_insn ("cannot do z-register replacement", insn);
+	    }
+
+	  /* Likewise for (REG:QI Z).  */
+	  if (reg_mentioned_p (z_reg, insn))
+	    {
+	      if (replace_reg_qi == NULL_RTX)
+		replace_reg_qi = gen_rtx_REG (QImode, REGNO (replace_reg));
+	      validate_replace_rtx (z_reg_qi, replace_reg_qi, insn);
+	    }
+
+          /* If there is a REG_INC note on Z, replace it with a
+             REG_INC note on the replacement register.  This is necessary
+             to make sure that the flow pass will identify the change
+             and it will not remove a possible insn that saves Z.  */
+          for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+            {
+              if (REG_NOTE_KIND (note) == REG_INC
+                  && GET_CODE (XEXP (note, 0)) == REG
+                  && REGNO (XEXP (note, 0)) == REGNO (z_reg))
+                {
+                  XEXP (note, 0) = replace_reg;
+                }
+            }
+	}
+      if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+	break;
+    }
+
+  /* Save Z before restoring the old value.  */
+  if (insn && info.need_save_z && !info.must_push_reg)
+    {
+      rtx save_pos_insn = insn;
+
+      /* If Z is clobber by the last insn, we have to save its value
+         before the last instruction.  */
+      if (info.save_before_last)
+	save_pos_insn = PREV_INSN (save_pos_insn);
+
+      emit_insn_before (gen_movhi (gen_rtx_REG (HImode, SOFT_Z_REGNUM),
+				   gen_rtx_REG (HImode, info.regno)),
+			save_pos_insn);
+    }
+
+  if (info.must_push_reg && info.last)
+    {
+      rtx new_body, body;
+
+      body = PATTERN (info.last);
+      new_body = gen_rtx_PARALLEL (VOIDmode,
+			  gen_rtvec (3, body,
+				     gen_rtx_USE (VOIDmode,
+					      replace_reg),
+				     gen_rtx_USE (VOIDmode,
+					      gen_rtx_REG (HImode,
+						       SOFT_Z_REGNUM))));
+      PATTERN (info.last) = new_body;
+
+      /* Force recognition on insn since we changed it.  */
+      INSN_CODE (insn) = -1;
+
+      if (!validate_replace_rtx (z_reg, replace_reg, info.last))
+	{
+	  fatal_insn ("invalid Z register replacement for insn", insn);
+	}
+      insn = NEXT_INSN (info.last);
+    }
+
+  /* Restore replacement register unless it was died.  */
+  if (insn && info.must_restore_reg && !info.must_push_reg)
+    {
+      rtx dst;
+
+      if (info.must_push_reg && 0)
+	dst = gen_rtx_MEM (HImode,
+		       gen_rtx_POST_INC (HImode,
+				gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+      else
+	dst = gen_rtx_REG (HImode, SOFT_SAVED_XY_REGNUM);
+
+      emit_insn_before (gen_movhi (gen_rtx_REG (HImode, info.regno),
+				   dst), insn);
+    }
+
+}
+
+
+/* Scan all the insn and re-affects some registers
+    - The Z register (if it was used), is affected to X or Y depending
+      on the instruction.  */
+
+static void
+m68hc11_reassign_regs (rtx first)
+{
+  rtx insn;
+
+  ix_reg = gen_rtx_REG (HImode, HARD_X_REGNUM);
+  iy_reg = gen_rtx_REG (HImode, HARD_Y_REGNUM);
+  z_reg = gen_rtx_REG (HImode, HARD_Z_REGNUM);
+  z_reg_qi = gen_rtx_REG (QImode, HARD_Z_REGNUM);
+
+  /* Scan all insns to replace Z by X or Y preserving the old value
+     of X/Y and restoring it afterward.  */
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    {
+      rtx body;
+
+      if (GET_CODE (insn) == CODE_LABEL
+	  || GET_CODE (insn) == NOTE || GET_CODE (insn) == BARRIER)
+	continue;
+
+      if (!INSN_P (insn))
+	continue;
+
+      body = PATTERN (insn);
+      if (GET_CODE (body) == CLOBBER || GET_CODE (body) == USE)
+	continue;
+
+      if (GET_CODE (body) == CONST_INT || GET_CODE (body) == ASM_INPUT
+	  || GET_CODE (body) == ASM_OPERANDS
+	  || GET_CODE (body) == UNSPEC || GET_CODE (body) == UNSPEC_VOLATILE)
+	continue;
+
+      if (GET_CODE (body) == SET || GET_CODE (body) == PARALLEL
+	  || GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+	{
+
+	  /* If Z appears in this insn, replace it in the current insn
+	     and the next ones until the flow changes or we have to
+	     restore back the replacement register.  */
+
+	  if (reg_mentioned_p (z_reg, body))
+	    {
+	      m68hc11_z_replacement (insn);
+	    }
+	}
+      else
+	{
+	  printf ("insn not handled by Z replacement:\n");
+	  fflush (stdout);
+	  debug_rtx (insn);
+	}
+    }
+}
+
+
+/* Machine-dependent reorg pass.
+   Specific optimizations are defined here:
+    - this pass changes the Z register into either X or Y
+      (it preserves X/Y previous values in a memory slot in page0).
+
+   When this pass is finished, the global variable
+   'z_replacement_completed' is set to 2.  */
+
+static void
+m68hc11_reorg (void)
+{
+  int split_done = 0;
+  rtx first;
+
+  z_replacement_completed = 0;
+  z_reg = gen_rtx_REG (HImode, HARD_Z_REGNUM);
+  first = get_insns ();
+
+  /* Some RTX are shared at this point.  This breaks the Z register
+     replacement, unshare everything.  */
+  unshare_all_rtl_again (first);
+
+  /* Force a split of all splittable insn.  This is necessary for the
+     Z register replacement mechanism because we end up with basic insns.  */
+  split_all_insns_noflow ();
+  split_done = 1;
+
+  z_replacement_completed = 1;
+  m68hc11_reassign_regs (first);
+
+  if (optimize)
+    compute_bb_for_insn ();
+
+  /* After some splitting, there are some opportunities for CSE pass.
+     This happens quite often when 32-bit or above patterns are split.  */
+  if (optimize > 0 && split_done)
+    {
+      reload_cse_regs (first);
+    }
+
+  /* Re-create the REG_DEAD notes.  These notes are used in the machine
+     description to use the best assembly directives.  */
+  if (optimize)
+    {
+      df_note_add_problem ();
+      df_analyze ();
+      df_remove_problem (df_note);
+    }
+
+  z_replacement_completed = 2;
+
+  /* If optimizing, then go ahead and split insns that must be
+     split after Z register replacement.  This gives more opportunities
+     for peephole (in particular for consecutives xgdx/xgdy).  */
+  if (optimize > 0)
+    split_all_insns_noflow ();
+
+  /* Once insns are split after the z_replacement_completed == 2,
+     we must not re-run the life_analysis.  The xgdx/xgdy patterns
+     are not recognized and the life_analysis pass removes some
+     insns because it thinks some (SETs) are noops or made to dead
+     stores (which is false due to the swap).
+
+     Do a simple pass to eliminate the noop set that the final
+     split could generate (because it was easier for split definition).  */
+  {
+    rtx insn;
+
+    for (insn = first; insn; insn = NEXT_INSN (insn))
+      {
+	rtx body;
+
+	if (INSN_DELETED_P (insn))
+	  continue;
+	if (!INSN_P (insn))
+	  continue;
+
+	/* Remove the (set (R) (R)) insns generated by some splits.  */
+	body = PATTERN (insn);
+	if (GET_CODE (body) == SET
+	    && rtx_equal_p (SET_SRC (body), SET_DEST (body)))
+	  {
+	    SET_INSN_DELETED  (insn);
+	    continue;
+	  }
+      }
+  }
+}
+
+/* Override memcpy */
+
+static void
+m68hc11_init_libfuncs (void)
+{
+  memcpy_libfunc = init_one_libfunc ("__memcpy");
+  memcmp_libfunc = init_one_libfunc ("__memcmp");
+  memset_libfunc = init_one_libfunc ("__memset");
+}
+
+
+
+/* Cost functions.  */
+
+/* Cost of moving memory.  */
+int
+m68hc11_memory_move_cost (enum machine_mode mode, enum reg_class rclass,
+                          int in ATTRIBUTE_UNUSED)
+{
+  if (rclass <= H_REGS && rclass > NO_REGS)
+    {
+      if (GET_MODE_SIZE (mode) <= 2)
+	return COSTS_N_INSNS (1) + (reload_completed | reload_in_progress);
+      else
+	return COSTS_N_INSNS (2) + (reload_completed | reload_in_progress);
+    }
+  else
+    {
+      if (GET_MODE_SIZE (mode) <= 2)
+	return COSTS_N_INSNS (3);
+      else
+	return COSTS_N_INSNS (4);
+    }
+}
+
+
+/* Cost of moving data from a register of class 'from' to on in class 'to'.
+   Reload does not check the constraint of set insns when the two registers
+   have a move cost of 2.  Setting a higher cost will force reload to check
+   the constraints.  */
+int
+m68hc11_register_move_cost (enum machine_mode mode, enum reg_class from,
+                            enum reg_class to)
+{
+  /* All costs are symmetric, so reduce cases by putting the
+     lower number class as the destination.  */
+  if (from < to)
+    {
+      enum reg_class tmp = to;
+      to = from, from = tmp;
+    }
+  if (to >= S_REGS)
+    return m68hc11_memory_move_cost (mode, S_REGS, 0);
+  else if (from <= S_REGS)
+    return COSTS_N_INSNS (1) + (reload_completed | reload_in_progress);
+  else
+    return COSTS_N_INSNS (2);
+}
+
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   If ADDR is not a valid address, its cost is irrelevant.  */
+
+static int
+m68hc11_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  int cost = 4;
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* Make the cost of hard registers and specially SP, FP small.  */
+      if (REGNO (addr) < FIRST_PSEUDO_REGISTER)
+	cost = 0;
+      else
+	cost = 1;
+      break;
+
+    case SYMBOL_REF:
+      cost = 8;
+      break;
+
+    case LABEL_REF:
+    case CONST:
+      cost = 0;
+      break;
+
+    case PLUS:
+      {
+	register rtx plus0 = XEXP (addr, 0);
+	register rtx plus1 = XEXP (addr, 1);
+
+	if (GET_CODE (plus0) != REG)
+	  break;
+
+	switch (GET_CODE (plus1))
+	  {
+	  case CONST_INT:
+	    if (INTVAL (plus1) >= 2 * m68hc11_max_offset
+		|| INTVAL (plus1) < m68hc11_min_offset)
+	      cost = 3;
+	    else if (INTVAL (plus1) >= m68hc11_max_offset)
+	      cost = 2;
+	    else
+	      cost = 1;
+	    if (REGNO (plus0) < FIRST_PSEUDO_REGISTER)
+	      cost += 0;
+	    else
+	      cost += 1;
+	    break;
+
+	  case SYMBOL_REF:
+	    cost = 8;
+	    break;
+
+	  case CONST:
+	  case LABEL_REF:
+	    cost = 0;
+	    break;
+
+	  default:
+	    break;
+	  }
+	break;
+      }
+    case PRE_DEC:
+    case PRE_INC:
+      if (SP_REG_P (XEXP (addr, 0)))
+	cost = 1;
+      break;
+
+    default:
+      break;
+    }
+  if (debug_m6811)
+    {
+      printf ("Address cost: %d for :", cost);
+      fflush (stdout);
+      debug_rtx (addr);
+    }
+
+  return cost;
+}
+
+static int
+m68hc11_shift_cost (enum machine_mode mode, rtx x, int shift)
+{
+  int total;
+
+  total = rtx_cost (x, SET, !optimize_size);
+  if (mode == QImode)
+    total += m68hc11_cost->shiftQI_const[shift % 8];
+  else if (mode == HImode)
+    total += m68hc11_cost->shiftHI_const[shift % 16];
+  else if (shift == 8 || shift == 16 || shift == 32)
+    total += m68hc11_cost->shiftHI_const[8];
+  else if (shift != 0 && shift != 16 && shift != 32)
+    {
+      total += m68hc11_cost->shiftHI_const[1] * shift;
+    }
+
+  /* For SI and others, the cost is higher.  */
+  if (GET_MODE_SIZE (mode) > 2 && (shift % 16) != 0)
+    total *= GET_MODE_SIZE (mode) / 2;
+
+  /* When optimizing for size, make shift more costly so that
+     multiplications are preferred.  */
+  if (optimize_size && (shift % 8) != 0)
+    total *= 2;
+  
+  return total;
+}
+
+static int
+m68hc11_rtx_costs_1 (rtx x, enum rtx_code code,
+                     enum rtx_code outer_code ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int extra_cost = 0;
+  int total;
+
+  switch (code)
+    {
+    case ROTATE:
+    case ROTATERT:
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+          return m68hc11_shift_cost (mode, XEXP (x, 0), INTVAL (XEXP (x, 1)));
+	}
+
+      total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
+      total += m68hc11_cost->shift_var;
+      return total;
+
+    case AND:
+    case XOR:
+    case IOR:
+      total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
+      total += m68hc11_cost->logical;
+
+      /* Logical instructions are byte instructions only.  */
+      total *= GET_MODE_SIZE (mode);
+      return total;
+
+    case MINUS:
+    case PLUS:
+      total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
+      total += m68hc11_cost->add;
+      if (GET_MODE_SIZE (mode) > 2)
+	{
+	  total *= GET_MODE_SIZE (mode) / 2;
+	}
+      return total;
+
+    case UDIV:
+    case DIV:
+    case MOD:
+      total = rtx_cost (XEXP (x, 0), code, !optimize_size) + rtx_cost (XEXP (x, 1), code, !optimize_size);
+      switch (mode)
+        {
+        case QImode:
+          total += m68hc11_cost->divQI;
+          break;
+
+        case HImode:
+          total += m68hc11_cost->divHI;
+          break;
+
+        case SImode:
+        default:
+          total += m68hc11_cost->divSI;
+          break;
+        }
+      return total;
+      
+    case MULT:
+      /* mul instruction produces 16-bit result.  */
+      if (mode == HImode && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+          && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+        return m68hc11_cost->multQI
+          + rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size)
+          + rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size);
+
+      /* emul instruction produces 32-bit result for 68HC12.  */
+      if (TARGET_M6812 && mode == SImode
+          && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+          && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+        return m68hc11_cost->multHI
+          + rtx_cost (XEXP (XEXP (x, 0), 0), code, !optimize_size)
+          + rtx_cost (XEXP (XEXP (x, 1), 0), code, !optimize_size);
+
+      total = rtx_cost (XEXP (x, 0), code, !optimize_size)
+      	      + rtx_cost (XEXP (x, 1), code, !optimize_size);
+      switch (mode)
+        {
+        case QImode:
+          total += m68hc11_cost->multQI;
+          break;
+
+        case HImode:
+          total += m68hc11_cost->multHI;
+          break;
+
+        case SImode:
+        default:
+          total += m68hc11_cost->multSI;
+          break;
+        }
+      return total;
+
+    case NEG:
+    case SIGN_EXTEND:
+      extra_cost = COSTS_N_INSNS (2);
+
+      /* Fall through */
+    case NOT:
+    case COMPARE:
+    case ABS:
+    case ZERO_EXTEND:
+    case ZERO_EXTRACT:
+      total = extra_cost + rtx_cost (XEXP (x, 0), code, !optimize_size);
+      if (mode == QImode)
+	{
+	  return total + COSTS_N_INSNS (1);
+	}
+      if (mode == HImode)
+	{
+	  return total + COSTS_N_INSNS (2);
+	}
+      if (mode == SImode)
+	{
+	  return total + COSTS_N_INSNS (4);
+	}
+      return total + COSTS_N_INSNS (8);
+
+    case IF_THEN_ELSE:
+      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+	return COSTS_N_INSNS (1);
+
+      return COSTS_N_INSNS (1);
+
+    default:
+      return COSTS_N_INSNS (4);
+    }
+}
+
+static bool
+m68hc11_rtx_costs (rtx x, int codearg, int outer_code_arg, int *total,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code = (enum rtx_code) codearg;
+  enum rtx_code outer_code = (enum rtx_code) outer_code_arg;
+
+  switch (code)
+    {
+      /* Constants are cheap.  Moving them in registers must be avoided
+         because most instructions do not handle two register operands.  */
+    case CONST_INT:
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      /* Logical and arithmetic operations with a constant operand are
+	 better because they are not supported with two registers.  */
+      /* 'clr' is slow */
+      if (outer_code == SET && x == const0_rtx)
+	 /* After reload, the reload_cse pass checks the cost to change
+	    a SET into a PLUS.  Make const0 cheap then.  */
+	*total = 1 - reload_completed;
+      else
+	*total = 0;
+      return true;
+    
+    case ZERO_EXTRACT:
+      if (outer_code != COMPARE)
+	return false;
+
+    case ROTATE:
+    case ROTATERT:
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+    case MINUS:
+    case PLUS:
+    case AND:
+    case XOR:
+    case IOR:
+    case UDIV:
+    case DIV:
+    case MOD:
+    case MULT:
+    case NEG:
+    case SIGN_EXTEND:
+    case NOT:
+    case COMPARE:
+    case ZERO_EXTEND:
+    case IF_THEN_ELSE:
+      *total = m68hc11_rtx_costs_1 (x, code, outer_code);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+
+/* Worker function for TARGET_ASM_FILE_START.  */
+
+static void
+m68hc11_file_start (void)
+{
+  default_file_start ();
+  
+  fprintf (asm_out_file, "\t.mode %s\n", TARGET_SHORT ? "mshort" : "mlong");
+}
+
+
+/* Worker function for TARGET_ASM_CONSTRUCTOR.  */
+
+static void
+m68hc11_asm_out_constructor (rtx symbol, int priority)
+{
+  default_ctor_section_asm_out_constructor (symbol, priority);
+  fprintf (asm_out_file, "\t.globl\t__do_global_ctors\n");
+}
+
+/* Worker function for TARGET_ASM_DESTRUCTOR.  */
+
+static void
+m68hc11_asm_out_destructor (rtx symbol, int priority)
+{
+  default_dtor_section_asm_out_destructor (symbol, priority);
+  fprintf (asm_out_file, "\t.globl\t__do_global_dtors\n");
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+m68hc11_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+			  int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, HARD_D_REGNUM);
+}
+
+/* Return true if type TYPE should be returned in memory.
+   Blocks and data types largers than 4 bytes cannot be returned
+   in the register (D + X = 4).  */
+
+static bool
+m68hc11_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (TYPE_MODE (type) == BLKmode)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      return (size == -1 || size > 4);
+    }
+  else
+    return GET_MODE_SIZE (TYPE_MODE (type)) > 4;
+}
+
+#include "gt-m68hc11.h"
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
new file mode 100644
index 000000000..8f6d06867
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -0,0 +1,1382 @@
+/* Definitions of target machine for GNU compiler.
+   Motorola 68HC11 and 68HC12.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Stephane Carrez (stcarrez@nerim.fr)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Note:
+   A first 68HC11 port was made by Otto Lind (otto@coactive.com)
+   on gcc 2.6.3.  I have used it as a starting point for this port.
+   However, this new port is a complete re-write.  Its internal
+   design is completely different.  The generated code is not
+   compatible with the gcc 2.6.3 port.
+
+   The gcc 2.6.3 port is available at:
+
+   ftp.unina.it/pub/electronics/motorola/68hc11/gcc/gcc-6811-fsf.tar.gz
+
+*/
+
+/*****************************************************************************
+**
+** Controlling the Compilation Driver, `gcc'
+**
+*****************************************************************************/
+
+#undef ENDFILE_SPEC
+
+/* Compile and assemble for a 68hc11 unless there is a -m68hc12 option.  */
+#ifndef ASM_SPEC
+#define ASM_SPEC                                                \
+"%{m68hc12:-m68hc12}"                                           \
+"%{m68hcs12:-m68hcs12}"                                         \
+"%{!m68hc12:%{!m68hcs12:-m68hc11}} "                            \
+"%{mshort:-mshort}%{!mshort:-mlong} "                           \
+"%{fshort-double:-mshort-double}%{!fshort-double:-mlong-double}"
+#endif
+
+/* We need to tell the linker the target elf format.  Just pass an
+   emulation option.  This can be overridden by -Wl option of gcc.  */
+#ifndef LINK_SPEC
+#define LINK_SPEC                                               \
+"%{m68hc12:-m m68hc12elf}"                                      \
+"%{m68hcs12:-m m68hc12elf}"                                     \
+"%{!m68hc12:%{!m68hcs12:-m m68hc11elf}} "                       \
+"%{!mnorelax:%{!m68hc12:%{!m68hcs12:-relax}}}"
+#endif
+
+#ifndef LIB_SPEC
+#define LIB_SPEC       ""
+#endif
+
+#ifndef CC1_SPEC
+#define CC1_SPEC       ""
+#endif
+
+#ifndef CPP_SPEC
+#define CPP_SPEC  \
+"%{mshort:-D__HAVE_SHORT_INT__ -D__INT__=16}\
+ %{!mshort:-D__INT__=32}\
+ %{m68hc12:-Dmc6812 -DMC6812 -Dmc68hc12}\
+ %{m68hcs12:-Dmc6812 -DMC6812 -Dmc68hcs12}\
+ %{!m68hc12:%{!m68hcs12:-Dmc6811 -DMC6811 -Dmc68hc11}}\
+ %{fshort-double:-D__HAVE_SHORT_DOUBLE__}\
+ %{mlong-calls:-D__USE_RTC__}"
+#endif
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt1%O%s"
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("mc68hc1x");		\
+    }						\
+  while (0)
+
+/* As an embedded target, we have no libc.  */
+#ifndef inhibit_libc
+#  define inhibit_libc
+#endif
+
+/* Forward type declaration for prototypes definitions.
+   rtx_ptr is equivalent to rtx. Can't use the same name.  */
+struct rtx_def;
+typedef struct rtx_def *rtx_ptr;
+
+union tree_node;
+typedef union tree_node *tree_ptr;
+
+/* We can't declare enum machine_mode forward nor include 'machmode.h' here.
+   Prototypes defined here will use an int instead. It's better than no
+   prototype at all.  */
+typedef int enum_machine_mode;
+
+/*****************************************************************************
+**
+** Run-time Target Specification
+**
+*****************************************************************************/
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+extern short *reg_renumber;	/* def in local_alloc.c */
+
+#define TARGET_OP_TIME		(optimize && optimize_size == 0)
+#define TARGET_RELAX            (TARGET_NO_DIRECT_MODE)
+
+/* Default target_flags if no switches specified.  */
+#ifndef TARGET_DEFAULT
+# define TARGET_DEFAULT		0
+#endif
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.  */
+#ifndef MULTILIB_DEFAULTS
+# if TARGET_DEFAULT & MASK_M6811
+#  define MULTILIB_DEFAULTS { "m68hc11" }
+# else
+#  define MULTILIB_DEFAULTS { "m68hc12" }
+# endif
+#endif
+
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION	fprintf (stderr, " (MC68HC11/MC68HC12/MC68HCS12)")
+
+
+/* Define cost parameters for a given processor variant.  */
+struct processor_costs {
+  const int add;		/* cost of an add instruction */
+  const int logical;          /* cost of a logical instruction */
+  const int shift_var;
+  const int shiftQI_const[8];
+  const int shiftHI_const[16];
+  const int multQI;
+  const int multHI;
+  const int multSI;
+  const int divQI;
+  const int divHI;
+  const int divSI;
+};
+
+/* Costs for the current processor.  */
+extern const struct processor_costs *m68hc11_cost;
+
+
+/* target machine storage layout */
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 	1
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN         0
+
+/* Define this if most significant word of a multiword number is numbered.  */
+#define WORDS_BIG_ENDIAN 	1
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		2
+
+/* Definition of size_t.  This is really an unsigned short as the
+   68hc11 only handles a 64K address space.  */
+#define SIZE_TYPE               "short unsigned int"
+
+/* A C expression for a string describing the name of the data type
+   to use for the result of subtracting two pointers.  The typedef
+   name `ptrdiff_t' is defined using the contents of the string.
+   The 68hc11 only has a 64K address space.  */
+#define PTRDIFF_TYPE            "short int"
+
+/* Allocation boundary (bits) for storing pointers in memory.  */
+#define POINTER_BOUNDARY	8
+
+/* Normal alignment required for function parameters on the stack, in bits.
+   This can't be less than BITS_PER_WORD */
+#define PARM_BOUNDARY		(BITS_PER_WORD)
+
+/* Boundary (bits) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY		8
+
+/* Allocation boundary (bits) for the code of a function.  */
+#define FUNCTION_BOUNDARY	8
+
+#define BIGGEST_ALIGNMENT	8
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY	8
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Define this if instructions will fail to work if given data not
+   on the nominal alignment.  If instructions will merely go slower
+   in that case, do not define this macro.  */
+#define STRICT_ALIGNMENT	0
+
+/* An integer expression for the size in bits of the largest integer
+   machine mode that should actually be used.  All integer machine modes of
+   this size or smaller can be used for structures and unions with the
+   appropriate sizes.  */
+#define MAX_FIXED_MODE_SIZE	64
+
+/* target machine storage layout */
+
+/* Size (bits) of the type "int" on target machine
+   (If undefined, default is BITS_PER_WORD).  */
+#define INT_TYPE_SIZE           (TARGET_SHORT ? 16 : 32)
+
+/* Size (bits) of the type "short" on target machine */
+#define SHORT_TYPE_SIZE		16
+
+/* Size (bits) of the type "long" on target machine */
+#define LONG_TYPE_SIZE		32
+
+/* Size (bits) of the type "long long" on target machine */
+#define LONG_LONG_TYPE_SIZE     64
+
+/* A C expression for the size in bits of the type `float' on the
+   target machine. If you don't define this, the default is one word.
+   Don't use default: a word is only 16.  */
+#define FLOAT_TYPE_SIZE         32
+
+/* A C expression for the size in bits of the type double on the target
+   machine. If you don't define this, the default is two words.
+   Be IEEE compliant.  */
+#define DOUBLE_TYPE_SIZE        64
+
+#define LONG_DOUBLE_TYPE_SIZE   64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR	0
+
+/* Define these to avoid dependence on meaning of `int'.
+   Note that WCHAR_TYPE_SIZE is used in cexp.y,
+   where TARGET_SHORT is not available.  */
+#define WCHAR_TYPE              "short int"
+#define WCHAR_TYPE_SIZE         16
+
+
+/* Standard register usage.  */
+
+#define HARD_REG_SIZE           (UNITS_PER_WORD)
+
+/* Assign names to real MC68HC11 registers.
+   A and B registers are not really used (A+B = D)
+   X register is first so that GCC allocates X+D for 32-bit integers and
+   the lowpart of that integer will be D.  Having the lower part in D is
+   better for 32<->16bit conversions and for many arithmetic operations.  */
+#define HARD_X_REGNUM		0
+#define HARD_D_REGNUM		1
+#define HARD_Y_REGNUM		2
+#define HARD_SP_REGNUM		3
+#define HARD_PC_REGNUM		4
+#define HARD_A_REGNUM		5
+#define HARD_B_REGNUM		6
+#define HARD_CCR_REGNUM		7
+
+/* The Z register does not really exist in the 68HC11.  This a fake register
+   for GCC.  It is treated exactly as an index register (X or Y).  It is only
+   in the A_REGS class, which is the BASE_REG_CLASS for GCC.  Defining this
+   register helps the reload pass of GCC.  Otherwise, the reload often dies
+   with register spill failures.
+
+   The Z register is replaced by either X or Y during the machine specific
+   reorg (m68hc11_reorg).  It is saved in the SOFT_Z_REGNUM soft-register
+   when this is necessary.
+
+   It's possible to tell GCC not to use this register with -ffixed-z.  */
+#define HARD_Z_REGNUM           8
+
+/* The frame pointer is a soft-register.  It's treated as such by GCC:
+   it is not and must not be part of the BASE_REG_CLASS.  */
+#define DEFAULT_HARD_FP_REGNUM  (9)
+#define HARD_FP_REGNUM		(9)
+#define HARD_AP_REGNUM		(HARD_FP_REGNUM)
+
+/* Temporary soft-register used in some cases when an operand came
+   up into a bad register class (D, X, Y, SP) and gcc failed to
+   recognize this. This register is never allocated by GCC.  */
+#define SOFT_TMP_REGNUM          10
+
+/* The soft-register which is used to save the Z register
+   (see Z register replacement notes in m68hc11.c).  */
+#define SOFT_Z_REGNUM            11
+
+/* The soft-register which is used to save either X or Y.  */
+#define SOFT_SAVED_XY_REGNUM     12
+
+/* A fake clobber register for 68HC12 patterns.  */
+#define FAKE_CLOBBER_REGNUM     (13)
+
+/* Define 32 soft-registers of 16-bit each.  By default,
+   only 12 of them are enabled and can be used by GCC.  The
+   -msoft-reg-count=<n> option allows to control the number of valid
+   soft-registers. GCC can put 32-bit values in them
+   by allocating consecutive registers.  The first 3 soft-registers
+   are never allocated by GCC.  They are used in case the insn template needs
+   a temporary register, or for the Z register replacement.  */
+
+#define MAX_SOFT_REG_COUNT      (32)
+#define SOFT_REG_FIXED          0, 0, 0, 0, 0, 0, 0, 0, \
+				0, 0, 0, 0, 1, 1, 1, 1, \
+				1, 1, 1, 1, 1, 1, 1, 1, \
+				1, 1, 1, 1, 1, 1, 1, 1
+#define SOFT_REG_USED           0, 0, 0, 0, 0, 0, 0, 0, \
+				0, 0, 0, 0, 1, 1, 1, 1, \
+				1, 1, 1, 1, 1, 1, 1, 1, \
+				1, 1, 1, 1, 1, 1, 1, 1
+#define SOFT_REG_ORDER		\
+SOFT_REG_FIRST, SOFT_REG_FIRST+1,SOFT_REG_FIRST+2,SOFT_REG_FIRST+3,\
+SOFT_REG_FIRST+4, SOFT_REG_FIRST+5,SOFT_REG_FIRST+6,SOFT_REG_FIRST+7,\
+SOFT_REG_FIRST+8, SOFT_REG_FIRST+9,SOFT_REG_FIRST+10,SOFT_REG_FIRST+11,\
+SOFT_REG_FIRST+12, SOFT_REG_FIRST+13,SOFT_REG_FIRST+14,SOFT_REG_FIRST+15,\
+SOFT_REG_FIRST+16, SOFT_REG_FIRST+17,SOFT_REG_FIRST+18,SOFT_REG_FIRST+19,\
+SOFT_REG_FIRST+20, SOFT_REG_FIRST+21,SOFT_REG_FIRST+22,SOFT_REG_FIRST+23,\
+SOFT_REG_FIRST+24, SOFT_REG_FIRST+25,SOFT_REG_FIRST+26,SOFT_REG_FIRST+27,\
+SOFT_REG_FIRST+28, SOFT_REG_FIRST+29,SOFT_REG_FIRST+30,SOFT_REG_FIRST+31
+
+#define SOFT_REG_NAMES							\
+"*_.d1",  "*_.d2",  "*_.d3",  "*_.d4", \
+"*_.d5",  "*_.d6",  "*_.d7",  "*_.d8",	\
+"*_.d9",  "*_.d10", "*_.d11", "*_.d12", \
+"*_.d13", "*_.d14", "*_.d15", "*_.d16",	\
+"*_.d17", "*_.d18", "*_.d19", "*_.d20", \
+"*_.d21", "*_.d22", "*_.d23", "*_.d24", \
+"*_.d25", "*_.d26", "*_.d27", "*_.d28", \
+"*_.d29", "*_.d30", "*_.d31", "*_.d32"
+
+/* First available soft-register for GCC.  */
+#define SOFT_REG_FIRST          (SOFT_SAVED_XY_REGNUM+2)
+
+/* Last available soft-register for GCC.  */
+#define SOFT_REG_LAST           (SOFT_REG_FIRST+MAX_SOFT_REG_COUNT)
+#define SOFT_FP_REGNUM		(SOFT_REG_LAST)
+#define SOFT_AP_REGNUM		(SOFT_FP_REGNUM+1)
+
+/* Number of actual hardware registers. The hardware registers are assigned
+   numbers for the compiler from 0 to just below FIRST_PSEUDO_REGISTER. 
+   All registers that the compiler knows about must be given numbers, even
+   those that are not normally considered general registers.  */
+#define FIRST_PSEUDO_REGISTER	(SOFT_REG_LAST+2)
+
+/* 1 for registers that have pervasive standard uses and are not available
+   for the register allocator.  */
+#define FIXED_REGISTERS \
+  {0, 0, 0, 1, 1, 1, 1, 1,   0, 1,  1,   1,1, 1, SOFT_REG_FIXED, 1, 1}
+/* X, D, Y, SP,PC,A, B, CCR, Z, FP,ZTMP,ZR,XYR, FK, D1 - D32, SOFT-FP, AP */
+
+/* 1 for registers not available across function calls. For our pseudo
+   registers, all are available.  */
+#define CALL_USED_REGISTERS \
+  {1, 1, 1, 1, 1, 1, 1, 1,   1, 1,  1,   1,1, 1, SOFT_REG_USED, 1, 1}
+/* X, D, Y, SP,PC,A, B, CCR, Z, FP, ZTMP,ZR,XYR, D1 - 32,     SOFT-FP, AP */
+
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  */
+#define REG_ALLOC_ORDER							\
+{ HARD_D_REGNUM, HARD_X_REGNUM, HARD_Y_REGNUM,				\
+  SOFT_REG_ORDER, HARD_Z_REGNUM, HARD_PC_REGNUM, HARD_A_REGNUM,		\
+  HARD_B_REGNUM, HARD_CCR_REGNUM, HARD_FP_REGNUM, SOFT_FP_REGNUM,	\
+  HARD_SP_REGNUM, SOFT_TMP_REGNUM, SOFT_Z_REGNUM, SOFT_SAVED_XY_REGNUM, \
+  SOFT_AP_REGNUM, FAKE_CLOBBER_REGNUM  }
+
+/* A C expression for the number of consecutive hard registers,
+   starting at register number REGNO, required to hold a value of
+   mode MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+((Q_REGNO_P (REGNO)) ? (GET_MODE_SIZE (MODE)) : \
+   ((GET_MODE_SIZE (MODE) + HARD_REG_SIZE - 1) / HARD_REG_SIZE))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+    - 8-bit values are stored anywhere (except the SP register).
+    - 16-bit values can be stored in any register whose mode is 16
+    - 32-bit values can be stored in D, X registers or in a soft register
+      (except the last one because we need 2 soft registers)
+    - Values whose size is > 32 bit are not stored in real hard
+      registers.  They may be stored in soft registers if there are
+      enough of them.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+     hard_regno_mode_ok (REGNO,MODE)
+
+/* Value is 1 if it is a good idea to tie two pseudo registers when one has
+   mode MODE1 and one has mode MODE2.  If HARD_REGNO_MODE_OK could produce
+   different values for MODE1 and MODE2, for any hard reg, then this must be
+   0 for correct output.
+
+   All modes are tieable except QImode.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)                   \
+     (((MODE1) == (MODE2))                              \
+      || ((MODE1) != QImode && (MODE2) != QImode))
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The M68hc11 has so few registers that it's not possible for GCC to
+   do any register allocation without breaking. We extend the processor
+   registers by having soft registers. These registers are treated as
+   hard registers by GCC but they are located in memory and accessed by page0
+   accesses (IND mode).  */
+enum reg_class
+{
+  NO_REGS,
+  D_REGS,			/* 16-bit data register */
+  X_REGS,			/* 16-bit X register */
+  Y_REGS,			/* 16-bit Y register */
+  SP_REGS,			/* 16-bit stack pointer */
+  DA_REGS,			/* 8-bit A reg.  */
+  DB_REGS,			/* 8-bit B reg.  */
+  Z_REGS,			/* 16-bit fake Z register */
+  D8_REGS,			/* 8-bit A or B reg.  */
+  Q_REGS,			/* 8-bit (byte (QI)) data (A, B or D) */
+  D_OR_X_REGS,			/* D or X register */
+  D_OR_Y_REGS,			/* D or Y register */
+  D_OR_SP_REGS,			/* D or SP register */
+  X_OR_Y_REGS,			/* IX or Y register */
+  A_REGS,			/* 16-bit address register (X, Y, Z) */
+  X_OR_SP_REGS,			/* X or SP register */
+  Y_OR_SP_REGS,			/* Y or SP register */
+  X_OR_Y_OR_D_REGS,		/* X, Y or D */
+  A_OR_D_REGS,			/* X, Y, Z or D */
+  A_OR_SP_REGS,			/* X, Y, Z or SP */
+  H_REGS,			/* 16-bit hard register (D, X, Y, Z, SP) */
+  S_REGS,			/* 16-bit soft register */
+  D_OR_S_REGS,			/* 16-bit soft register or D register */
+  X_OR_S_REGS,			/* 16-bit soft register or X register */
+  Y_OR_S_REGS,			/* 16-bit soft register or Y register */
+  Z_OR_S_REGS,			/* 16-bit soft register or Z register */
+  SP_OR_S_REGS,			/* 16-bit soft register or SP register */
+  D_OR_X_OR_S_REGS,		/* 16-bit soft register or D or X register */
+  D_OR_Y_OR_S_REGS,		/* 16-bit soft register or D or Y register */
+  D_OR_SP_OR_S_REGS,		/* 16-bit soft register or D or SP register */
+  A_OR_S_REGS,			/* 16-bit soft register or X, Y registers */
+  D_OR_A_OR_S_REGS,		/* 16-bit soft register or D, X, Y registers */
+  TMP_REGS,			/* 16-bit fake scratch register */
+  D_OR_A_OR_TMP_REGS,		/* General scratch register */
+  G_REGS,			/* 16-bit general register
+                                   (H_REGS + soft registers) */
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+/* alias GENERAL_REGS to G_REGS.  */
+#define GENERAL_REGS	G_REGS
+
+#define N_REG_CLASSES	(int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES \
+{ "NO_REGS",                                    \
+      "D_REGS",                                 \
+      "X_REGS",                                 \
+      "Y_REGS",                                 \
+      "SP_REGS",                                \
+      "DA_REGS",                                \
+      "DB_REGS",                                \
+      "D8_REGS",                                \
+      "Z_REGS",                                 \
+      "Q_REGS",                                 \
+      "D_OR_X_REGS",                            \
+      "D_OR_Y_REGS",                            \
+      "D_OR_SP_REGS",                           \
+      "X_OR_Y_REGS",                            \
+      "A_REGS",                                 \
+      "X_OR_SP_REGS",                           \
+      "Y_OR_SP_REGS",                           \
+      "X_OR_Y_OR_D_REGS",                       \
+      "A_OR_D_REGS",                            \
+      "A_OR_SP_REGS",                           \
+      "H_REGS",                                 \
+      "S_REGS",                                 \
+      "D_OR_S_REGS",                            \
+      "X_OR_S_REGS",                            \
+      "Y_OR_S_REGS",                            \
+      "Z_OR_S_REGS",                            \
+      "SP_OR_S_REGS",                           \
+      "D_OR_X_OR_S_REGS",                       \
+      "D_OR_Y_OR_S_REGS",                       \
+      "D_OR_SP_OR_S_REGS",                      \
+      "A_OR_S_REGS",                            \
+      "D_OR_A_OR_S_REGS",                       \
+      "TMP_REGS",				\
+      "D_OR_A_OR_TMP_REGS",			\
+      "G_REGS",                                 \
+      "ALL_REGS" }
+
+/* An initializer containing the contents of the register classes,
+   as integers which are bit masks.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if `MASK & (1 << R)' is 1.  */
+
+/*--------------------------------------------------------------
+   X		0x00000001
+   D		0x00000002
+   Y		0x00000004
+   SP		0x00000008
+   PC		0x00000010
+   A		0x00000020
+   B		0x00000040
+   CCR		0x00000080
+   Z		0x00000100
+   FRAME        0x00000200
+   ZTMP		0x00000400
+   ZREG		0x00000800
+   XYREG	0x00001000
+   FAKE         0x00002000
+   Di		0xFFFFc000, 0x03FFF
+   SFRAME       0x00000000, 0x04000
+   AP           0x00000000, 0x08000
+
+   D_OR_X_REGS represents D+X. It is used for 32-bits numbers.
+   A_REGS      represents a valid base register for indexing. It represents
+	       X,Y and the Z register.
+   S_REGS      represents the soft-registers. This includes the hard frame
+	       and soft frame registers.
+--------------------------------------------------------------*/
+
+#define REG_CLASS_CONTENTS \
+/* NO_REGS */		{{ 0x00000000, 0x00000000 },			\
+/* D_REGS  */		 { 0x00000002, 0x00000000 }, /* D */            \
+/* X_REGS  */		 { 0x00000001, 0x00000000 }, /* X */            \
+/* Y_REGS  */		 { 0x00000004, 0x00000000 }, /* Y */            \
+/* SP_REGS */		 { 0x00000008, 0x00000000 }, /* SP */           \
+/* DA_REGS */		 { 0x00000020, 0x00000000 }, /* A */            \
+/* DB_REGS */		 { 0x00000040, 0x00000000 }, /* B */            \
+/* Z_REGS  */		 { 0x00000100, 0x00000000 }, /* Z */            \
+/* D8_REGS */		 { 0x00000060, 0x00000000 }, /* A B */          \
+/* Q_REGS  */		 { 0x00000062, 0x00000000 }, /* A B D */        \
+/* D_OR_X_REGS */        { 0x00000003, 0x00000000 }, /* D X */          \
+/* D_OR_Y_REGS */        { 0x00000006, 0x00000000 }, /* D Y */          \
+/* D_OR_SP_REGS */       { 0x0000000A, 0x00000000 }, /* D SP */         \
+/* X_OR_Y_REGS  */	 { 0x00000005, 0x00000000 }, /* X Y */          \
+/* A_REGS  */		 { 0x00000105, 0x00000000 }, /* X Y Z */        \
+/* X_OR_SP_REGS */       { 0x00000009, 0x00000000 }, /* X SP */         \
+/* Y_OR_SP_REGS */       { 0x0000000C, 0x00000000 }, /* Y SP */         \
+/* X_OR_Y_OR_D_REGS */   { 0x00000007, 0x00000000 }, /* D X Y */        \
+/* A_OR_D_REGS  */       { 0x00000107, 0x00000000 }, /* D X Y Z */      \
+/* A_OR_SP_REGS */       { 0x0000010D, 0x00000000 }, /* X Y SP */       \
+/* H_REGS  */		 { 0x0000010F, 0x00000000 }, /* D X Y SP */     \
+/* S_REGS  */		 { 0xFFFFDE00, 0x00007FFF }, /* _.D,..,FP,Z*  */  \
+/* D_OR_S_REGS */	 { 0xFFFFDE02, 0x00007FFF }, /* D _.D */        \
+/* X_OR_S_REGS */	 { 0xFFFFDE01, 0x00007FFF }, /* X _.D */        \
+/* Y_OR_S_REGS */	 { 0xFFFFDE04, 0x00007FFF }, /* Y _.D */        \
+/* Z_OR_S_REGS */	 { 0xFFFFDF00, 0x00007FFF }, /* Z _.D */        \
+/* SP_OR_S_REGS */	 { 0xFFFFDE08, 0x00007FFF }, /* SP _.D */	\
+/* D_OR_X_OR_S_REGS */	 { 0xFFFFDE03, 0x00007FFF }, /* D X _.D */      \
+/* D_OR_Y_OR_S_REGS */	 { 0xFFFFDE06, 0x00007FFF }, /* D Y _.D */      \
+/* D_OR_SP_OR_S_REGS */	 { 0xFFFFDE0A, 0x00007FFF }, /* D SP _.D */     \
+/* A_OR_S_REGS */	 { 0xFFFFDF05, 0x00007FFF }, /* X Y _.D */      \
+/* D_OR_A_OR_S_REGS */	 { 0xFFFFDF07, 0x00007FFF }, /* D X Y _.D */    \
+/* TMP_REGS  */	         { 0x00002000, 0x00000000 }, /* FAKE */		\
+/* D_OR_A_OR_TMP_REGS*/  { 0x00002107, 0x00000000 }, /* D X Y Z Fake */  \
+/* G_REGS  */		 { 0xFFFFFF1F, 0x00007FFF }, /* ? _.D D X Y */   \
+/* ALL_REGS*/		 { 0xFFFFFFFF, 0x00007FFF }}
+
+
+/* set up a C expression whose value is a register class containing hard
+   register REGNO */
+#define Q_REGNO_P(REGNO)	((REGNO) == HARD_A_REGNUM \
+				 || (REGNO) == HARD_B_REGNUM)
+#define Q_REG_P(X)              (REG_P (X) && Q_REGNO_P (REGNO (X)))
+
+#define D_REGNO_P(REGNO)        ((REGNO) == HARD_D_REGNUM)
+#define D_REG_P(X)              (REG_P (X) && D_REGNO_P (REGNO (X)))
+
+#define DB_REGNO_P(REGNO)       ((REGNO) == HARD_B_REGNUM)
+#define DB_REG_P(X)             (REG_P (X) && DB_REGNO_P (REGNO (X)))
+#define DA_REGNO_P(REGNO)       ((REGNO) == HARD_A_REGNUM)
+#define DA_REG_P(X)             (REG_P (X) && DA_REGNO_P (REGNO (X)))
+
+#define X_REGNO_P(REGNO)        ((REGNO) == HARD_X_REGNUM)
+#define X_REG_P(X)              (REG_P (X) && X_REGNO_P (REGNO (X)))
+
+#define Y_REGNO_P(REGNO)        ((REGNO) == HARD_Y_REGNUM)
+#define Y_REG_P(X)              (REG_P (X) && Y_REGNO_P (REGNO (X)))
+
+#define Z_REGNO_P(REGNO)        ((REGNO) == HARD_Z_REGNUM)
+#define Z_REG_P(X)              (REG_P (X) && Z_REGNO_P (REGNO (X)))
+
+#define SP_REGNO_P(REGNO)       ((REGNO) == HARD_SP_REGNUM)
+#define SP_REG_P(X)             (REG_P (X) && SP_REGNO_P (REGNO (X)))
+
+/* Address register.  */
+#define A_REGNO_P(REGNO)        ((REGNO) == HARD_X_REGNUM \
+                                 || (REGNO) == HARD_Y_REGNUM \
+                                 || (REGNO) == HARD_Z_REGNUM)
+#define A_REG_P(X)              (REG_P (X) && A_REGNO_P (REGNO (X)))
+
+/* M68hc11 hard registers.  */
+#define H_REGNO_P(REGNO)        (D_REGNO_P (REGNO) || A_REGNO_P (REGNO) \
+				 || SP_REGNO_P (REGNO) || Q_REGNO_P (REGNO))
+#define H_REG_P(X)              (REG_P (X) && H_REGNO_P (REGNO (X)))
+
+#define FAKE_REGNO_P(REGNO)     ((REGNO) == FAKE_CLOBBER_REGNUM)
+#define FAKE_REG_P(X)           (REG_P (X) && FAKE_REGNO_P (REGNO (X)))
+
+/* Soft registers (or register emulation for gcc).  The temporary register
+   used by insn template must be part of the S_REGS class so that it
+   matches the 'u' constraint.  */
+#define S_REGNO_P(REGNO)        ((REGNO) >= SOFT_TMP_REGNUM \
+                                 && (REGNO) <= SOFT_REG_LAST \
+                                 && (REGNO) != FAKE_CLOBBER_REGNUM)
+#define S_REG_P(X)              (REG_P (X) && S_REGNO_P (REGNO (X)))
+
+#define Z_REGNO_P(REGNO)        ((REGNO) == HARD_Z_REGNUM)
+#define Z_REG_P(X)              (REG_P (X) && Z_REGNO_P (REGNO (X)))
+
+/* General register.  */
+#define G_REGNO_P(REGNO)        (H_REGNO_P (REGNO) || S_REGNO_P (REGNO) \
+                                 || ((REGNO) == HARD_PC_REGNUM) \
+				 || ((REGNO) == HARD_FP_REGNUM) \
+				 || ((REGNO) == SOFT_FP_REGNUM) \
+				 || ((REGNO) == FAKE_CLOBBER_REGNUM) \
+				 || ((REGNO) == SOFT_AP_REGNUM))
+
+#define G_REG_P(X)              (REG_P (X) && G_REGNO_P (REGNO (X)))
+
+#define REGNO_REG_CLASS(REGNO) \
+  (D_REGNO_P (REGNO) ? D_REGS : \
+   (X_REGNO_P (REGNO) ? X_REGS : \
+    (Y_REGNO_P (REGNO) ? Y_REGS : \
+     (SP_REGNO_P (REGNO) ? SP_REGS : \
+      (Z_REGNO_P (REGNO) ? Z_REGS : \
+       (H_REGNO_P (REGNO) ? H_REGS : \
+        (FAKE_REGNO_P (REGNO) ? TMP_REGS : \
+	 (S_REGNO_P (REGNO) ? S_REGS : \
+	  (DA_REGNO_P (REGNO) ? DA_REGS: \
+	   (DB_REGNO_P (REGNO) ? DB_REGS: \
+            (G_REGNO_P (REGNO) ? G_REGS : ALL_REGS)))))))))))
+
+
+/* Get reg_class from a letter in the machine description.  */
+
+extern enum reg_class m68hc11_tmp_regs_class;
+#define REG_CLASS_FROM_LETTER(C) \
+   ((C) == 'a' ? DA_REGS : \
+    (C) == 'A' ? A_REGS : \
+    (C) == 'b' ? DB_REGS : \
+    (C) == 'B' ? X_OR_Y_REGS : \
+    (C) == 'd' ? D_REGS : \
+    (C) == 'D' ? D_OR_X_REGS : \
+    (C) == 'q' ? Q_REGS : \
+    (C) == 'h' ? H_REGS : \
+    (C) == 't' ? TMP_REGS : \
+    (C) == 'u' ? S_REGS : \
+    (C) == 'v' ? m68hc11_tmp_regs_class : \
+    (C) == 'w' ? SP_REGS : \
+    (C) == 'x' ? X_REGS : \
+    (C) == 'y' ? Y_REGS : \
+    (C) == 'z' ? Z_REGS : NO_REGS)
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)	preferred_reload_class(X,CLASS)
+
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* A C expression that is nonzero if hard register number REGNO2 can be
+   considered for use as a rename register for REGNO1 */
+
+#define HARD_REGNO_RENAME_OK(REGNO1,REGNO2) \
+  m68hc11_hard_regno_rename_ok ((REGNO1), (REGNO2))
+
+/* Return the maximum number of consecutive registers needed to represent
+   mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)		\
+(((CLASS) == DA_REGS || (CLASS) == DB_REGS \
+   || (CLASS) == D8_REGS || (CLASS) == Q_REGS) ? GET_MODE_SIZE (MODE) \
+ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* The letters I, J, K, L and M in a register constraint string
+   can be used to stand for particular ranges of immediate operands.
+   This macro defines what the ranges are.
+   C is the letter, and VALUE is a constant value.
+   Return 1 if VALUE is in the range specified by C.
+
+   `K' is for 0.
+   `L' is for range -65536 to 65536
+   `M' is for values whose 16-bit low part is 0
+   'N' is for +1 or -1.
+   'O' is for 16 (for rotate using swap).
+   'P' is for range -8 to 2 (used by addhi_sp)
+
+   'I', 'J' are not used.  */
+
+#define CONST_OK_FOR_LETTER_P(VALUE, C) \
+  ((C) == 'K' ? (VALUE) == 0 : \
+   (C) == 'L' ? ((VALUE) >= -65536 && (VALUE) <= 65535) : \
+   (C) == 'M' ? ((VALUE) & 0x0ffffL) == 0 : \
+   (C) == 'N' ? ((VALUE) == 1 || (VALUE) == -1) : \
+   (C) == 'I' ? ((VALUE) >= -2 && (VALUE) <= 2) : \
+   (C) == 'O' ? (VALUE) == 16 : \
+   (C) == 'P' ? ((VALUE) <= 2 && (VALUE) >= -8) : 0)
+
+/* Similar, but for floating constants, and defining letters G and H.
+
+   `G' is for 0.0.  */
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
+  ((C) == 'G' ? (GET_MODE_CLASS (GET_MODE (VALUE)) == MODE_FLOAT \
+		 && VALUE == CONST0_RTX (GET_MODE (VALUE))) : 0) 
+
+/* 'U' represents certain kind of memory indexed operand for 68HC12.
+   and any memory operand for 68HC11.
+   'R' represents indexed addressing mode or access to page0 for 68HC11.
+   For 68HC12, it represents any memory operand.  */
+#define EXTRA_CONSTRAINT(OP, C)                         \
+((C) == 'U' ? m68hc11_small_indexed_indirect_p (OP, GET_MODE (OP)) \
+ : (C) == 'Q' ? m68hc11_symbolic_p (OP, GET_MODE (OP)) \
+ : (C) == 'R' ? m68hc11_indirect_p (OP, GET_MODE (OP)) \
+ : (C) == 'S' ? (memory_operand (OP, GET_MODE (OP)) \
+		 && non_push_operand (OP, GET_MODE (OP))) : 0)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.
+
+   Define to 0 for 68HC11, the frame pointer is the bottom
+   of local variables.  */
+#define FRAME_GROWS_DOWNWARD		0
+
+/* Define this if successive arguments to a function occupy decreasing 
+   addresses in the stack.  */
+/* #define ARGS_GROW_DOWNWARD */
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET		0
+
+/* Offset of first parameter from the argument pointer register value.  */
+
+#define FIRST_PARM_OFFSET(FNDECL)	2
+
+/* After the prologue, RA is at 0(AP) in the current frame.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)					\
+  ((COUNT) == 0								\
+   ? gen_rtx_MEM (Pmode, arg_pointer_rtx)                               \
+   : 0)
+
+/* Before the prologue, the top of the frame is at 2(sp).  */
+#define INCOMING_FRAME_SP_OFFSET        2
+
+/* Define this if functions should assume that stack space has been
+   allocated for arguments even when their values are passed in
+   registers.
+  
+   The value of this macro is the size, in bytes, of the area reserved for
+   arguments passed in registers.
+  
+   This space can either be allocated by the caller or be a part of the
+   machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE'
+   says which.  */
+/* #define REG_PARM_STACK_SPACE(FNDECL)	2 */
+
+/* Define this macro if REG_PARM_STACK_SPACE is defined but stack
+   parameters don't skip the area specified by REG_PARM_STACK_SPACE.
+   Normally, when a parameter is not passed in registers, it is placed on
+   the stack beyond the REG_PARM_STACK_SPACE area.  Defining this macro  
+   suppresses this behavior and causes the parameter to be passed on the
+   stack in its natural location.  */
+/* #define STACK_PARMS_IN_REG_PARM_AREA */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM		HARD_SP_REGNUM
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM		SOFT_FP_REGNUM
+
+#define HARD_FRAME_POINTER_REGNUM	HARD_FP_REGNUM
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM		SOFT_AP_REGNUM
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	        SOFT_Z_REGNUM
+
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   We have two registers that are eliminated on the 6811. The pseudo arg
+   pointer and pseudo frame pointer registers can always be eliminated;
+   they are replaced with either the stack or the real frame pointer.  */
+
+#define ELIMINABLE_REGS					\
+{{ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},		\
+ {ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM},	\
+ {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+    { OFFSET = m68hc11_initial_elimination_offset (FROM, TO); }
+
+
+/* Passing Function Arguments on the Stack.  */
+
+/* If we generate an insn to push BYTES bytes, this says how many the
+   stack pointer really advances by. No rounding or alignment needed
+   for MC6811.  */
+#define PUSH_ROUNDING(BYTES)	(BYTES)
+
+/* Passing Arguments in Registers.  */
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+
+typedef struct m68hc11_args
+{
+  int words;
+  int nregs;
+} CUMULATIVE_ARGS;
+
+/* If defined, a C expression which determines whether, and in which direction,
+   to pad out an argument with extra space.  The value should be of type
+   `enum direction': either `upward' to pad above the argument,
+   `downward' to pad below, or `none' to inhibit padding.
+
+   Structures are stored left shifted in their argument slot.  */
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  m68hc11_function_arg_padding ((MODE), (TYPE))
+
+#undef PAD_VARARGS_DOWN
+#define PAD_VARARGS_DOWN \
+  (m68hc11_function_arg_padding (TYPE_MODE (type), type) == downward)
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a
+   function whose data type is FNTYPE. For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+    (m68hc11_init_cumulative_args (&CUM, FNTYPE, LIBNAME))
+
+/* Define the profitability of saving registers around calls.
+
+   Disable this because the saving instructions generated by
+   caller-save need a reload and the way it is implemented,
+   it forbids all spill registers at that point.  Enabling
+   caller saving results in spill failure.  */
+#define CALLER_SAVE_PROFITABLE(REFS,CALLS) 0
+
+/* 1 if N is a possible register number for function argument passing.
+   D is for 16-bit values, X is for 32-bit (X+D).  */
+#define FUNCTION_ARG_REGNO_P(N)	\
+     (((N) == HARD_D_REGNUM) || ((N) == HARD_X_REGNUM))
+
+/* All return values are in the D or X+D registers:
+    - 8 and 16-bit values are returned in D.
+      BLKmode are passed in D as pointer.
+    - 32-bit values are returned in X + D.
+      The high part is passed in X and the low part in D.
+      For GCC, the register number must be HARD_X_REGNUM.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+     gen_rtx_REG (TYPE_MODE (VALTYPE),					\
+              ((TYPE_MODE (VALTYPE) == BLKmode				\
+	        || GET_MODE_SIZE (TYPE_MODE (VALTYPE)) <= 2)		\
+		   ? HARD_D_REGNUM : HARD_X_REGNUM))
+
+#define LIBCALL_VALUE(MODE)						\
+     gen_rtx_REG (MODE,						\
+              (((MODE) == BLKmode || GET_MODE_SIZE (MODE) <= 2)		\
+                   ? HARD_D_REGNUM : HARD_X_REGNUM))
+
+/* 1 if N is a possible register number for a function value.  */
+#define FUNCTION_VALUE_REGNO_P(N) \
+     ((N) == HARD_D_REGNUM || (N) == HARD_X_REGNUM)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in functions
+   that have frame pointers. No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK	0
+
+
+/* Generating Code for Profiling.  */
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)		\
+    fprintf (FILE, "\tldy\t.LP%d\n\tjsr mcount\n", (LABELNO))
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE		(TARGET_M6811 ? 11 : 9)
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define ADDR_STRICT       0x01  /* Accept only registers in class A_REGS  */
+#define ADDR_INCDEC       0x02  /* Post/Pre inc/dec */
+#define ADDR_INDEXED      0x04  /* D-reg index */
+#define ADDR_OFFSET       0x08
+#define ADDR_INDIRECT     0x10  /* Accept (mem (mem ...)) for [n,X] */
+#define ADDR_CONST        0x20  /* Accept const and symbol_ref  */
+
+/* The 68HC12 has all the post/pre increment/decrement modes.  */
+#define HAVE_POST_INCREMENT (TARGET_M6812 && TARGET_AUTO_INC_DEC)
+#define HAVE_PRE_INCREMENT  (TARGET_M6812 && TARGET_AUTO_INC_DEC)
+#define HAVE_POST_DECREMENT (TARGET_M6812 && TARGET_AUTO_INC_DEC)
+#define HAVE_PRE_DECREMENT  (TARGET_M6812 && TARGET_AUTO_INC_DEC)
+
+/* The class value for base registers.  This depends on the target:
+   A_REGS for 68HC11 and A_OR_SP_REGS for 68HC12.  The class value
+   is stored at init time.  */
+extern enum reg_class m68hc11_base_reg_class;
+#define BASE_REG_CLASS		m68hc11_base_reg_class
+
+/* The class value for index registers.  This is NO_REGS for 68HC11.  */
+
+extern enum reg_class m68hc11_index_reg_class;
+#define INDEX_REG_CLASS	        m68hc11_index_reg_class
+
+/* These assume that REGNO is a hard or pseudo reg number. They give nonzero
+   only if REGNO is a hard reg of the suitable class or a pseudo reg currently
+   allocated to a suitable hard reg.  Since they use reg_renumber, they are
+   safe only once reg_renumber has been allocated, which happens in
+   local-alloc.c.  */
+
+
+extern unsigned char m68hc11_reg_valid_for_base[FIRST_PSEUDO_REGISTER];
+#define REG_VALID_FOR_BASE_P(REGNO) \
+    ((REGNO) < FIRST_PSEUDO_REGISTER \
+     && m68hc11_reg_valid_for_base[REGNO])
+
+/* Internal macro, return 1 if REGNO is a valid index register.  */
+extern unsigned char m68hc11_reg_valid_for_index[FIRST_PSEUDO_REGISTER];
+#define REG_VALID_FOR_INDEX_P(REGNO) \
+    ((REGNO) < FIRST_PSEUDO_REGISTER \
+     && m68hc11_reg_valid_for_index[REGNO])
+
+/* Internal macro, the nonstrict definition for REGNO_OK_FOR_BASE_P.  */
+#define REGNO_OK_FOR_BASE_NONSTRICT_P(REGNO) \
+    ((REGNO) >= FIRST_PSEUDO_REGISTER \
+     || REG_VALID_FOR_BASE_P (REGNO) \
+     || (REGNO) == FRAME_POINTER_REGNUM \
+     || (REGNO) == HARD_FRAME_POINTER_REGNUM \
+     || (REGNO) == ARG_POINTER_REGNUM \
+     || (reg_renumber && REG_VALID_FOR_BASE_P (reg_renumber[REGNO])))
+
+/* Internal macro, the nonstrict definition for REGNO_OK_FOR_INDEX_P.  */
+#define REGNO_OK_FOR_INDEX_NONSTRICT_P(REGNO) \
+    (TARGET_M6812 \
+     && ((REGNO) >= FIRST_PSEUDO_REGISTER \
+         || REG_VALID_FOR_INDEX_P (REGNO) \
+         || (reg_renumber && REG_VALID_FOR_INDEX_P (reg_renumber[REGNO]))))
+
+/* Internal macro, the strict definition for REGNO_OK_FOR_BASE_P.  */
+#define REGNO_OK_FOR_BASE_STRICT_P(REGNO) \
+    ((REGNO) < FIRST_PSEUDO_REGISTER ? REG_VALID_FOR_BASE_P (REGNO) \
+     : (reg_renumber && REG_VALID_FOR_BASE_P (reg_renumber[REGNO])))
+
+/* Internal macro, the strict definition for REGNO_OK_FOR_INDEX_P.  */
+#define REGNO_OK_FOR_INDEX_STRICT_P(REGNO) \
+    (TARGET_M6812 \
+     && ((REGNO) < FIRST_PSEUDO_REGISTER ? REG_VALID_FOR_INDEX_P (REGNO) \
+         : (reg_renumber && REG_VALID_FOR_INDEX_P (reg_renumber[REGNO]))))
+
+#define REGNO_OK_FOR_BASE_P2(REGNO,STRICT) \
+    ((STRICT) ? (REGNO_OK_FOR_BASE_STRICT_P (REGNO)) \
+              : (REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO)))
+
+#define REGNO_OK_FOR_INDEX_P2(REGNO,STRICT) \
+    ((STRICT) ? (REGNO_OK_FOR_INDEX_STRICT_P (REGNO)) \
+              : (REGNO_OK_FOR_INDEX_NONSTRICT_P (REGNO)))
+
+#define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_BASE_STRICT_P (REGNO)
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_INDEX_STRICT_P (REGNO)
+
+#define REG_OK_FOR_BASE_STRICT_P(X)     REGNO_OK_FOR_BASE_STRICT_P (REGNO (X))
+#define REG_OK_FOR_BASE_NONSTRICT_P(X)  REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (X))
+#define REG_OK_FOR_INDEX_STRICT_P(X)    REGNO_OK_FOR_INDEX_STRICT_P (REGNO (X))
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X) REGNO_OK_FOR_INDEX_NONSTRICT_P (REGNO (X))
+
+/* see PUSH_POP_ADDRESS_P() below for an explanation of this.  */
+#define IS_STACK_PUSH(operand) \
+    ((GET_CODE (operand) == MEM) \
+     && (GET_CODE (XEXP (operand, 0)) == PRE_DEC) \
+     && (SP_REG_P (XEXP (XEXP (operand, 0), 0))))
+
+#define IS_STACK_POP(operand) \
+    ((GET_CODE (operand) == MEM) \
+     && (GET_CODE (XEXP (operand, 0)) == POST_INC) \
+     && (SP_REG_P (XEXP (XEXP (operand, 0), 0))))
+
+/* Maximum number of registers that can appear in a valid memory address */
+#define MAX_REGS_PER_ADDRESS	2
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression that is a
+   valid memory address for an instruction. The MODE argument is the
+   machine mode for the MEM expression that wants to use this address.  */
+
+/*--------------------------------------------------------------
+   Valid addresses are either direct or indirect (MEM) versions
+   of the following forms:
+	constant		N
+	register		,X
+	indexed			N,X
+--------------------------------------------------------------*/
+
+/* The range of index that is allowed by indirect addressing.  */
+
+#define VALID_MIN_OFFSET m68hc11_min_offset
+#define VALID_MAX_OFFSET m68hc11_max_offset
+
+/* The offset values which are allowed by the n,x and n,y addressing modes.
+   Take into account the size of the mode because we may have to add
+   a mode offset to access the lowest part of the data.
+   (For example, for an SImode, the last valid offset is 252.) */
+#define VALID_CONSTANT_OFFSET_P(X,MODE)		\
+(((GET_CODE (X) == CONST_INT) &&			\
+  ((INTVAL (X) >= VALID_MIN_OFFSET)		\
+     && ((INTVAL (X) <= VALID_MAX_OFFSET		\
+		- (HOST_WIDE_INT) (GET_MODE_SIZE (MODE) + 1))))) \
+|| (TARGET_M6812 \
+    && ((GET_CODE (X) == SYMBOL_REF) \
+        || GET_CODE (X) == LABEL_REF \
+        || GET_CODE (X) == CONST)))
+
+/* This is included to allow stack push/pop operations. Special hacks in the
+   md and m6811.c files exist to support this.  */
+#define PUSH_POP_ADDRESS_P(X) \
+  (((GET_CODE (X) == PRE_DEC) || (GET_CODE (X) == POST_INC)) \
+	&& SP_REG_P (XEXP (X, 0)))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx and check its
+   validity for a certain class.  We have two alternate definitions for each
+   of them.  The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.  The symbol
+   REG_OK_STRICT causes the latter definition to be used.
+  
+   Most source files want to accept pseudo regs in the hope that they will
+   get allocated to the class that the insn wants them to be in. Source files
+   for reload pass need to be strict. After reload, it makes no difference,
+   since pseudo regs have been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X)   REG_OK_FOR_BASE_NONSTRICT_P(X)
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X)  REG_OK_FOR_INDEX_NONSTRICT_P(X)
+#else
+#define REG_OK_FOR_BASE_P(X)   REG_OK_FOR_BASE_STRICT_P(X)
+#define REG_OK_FOR_INDEX_P(X)  REG_OK_FOR_INDEX_STRICT_P(X)
+#endif
+
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_CONSTANT_P(X)	1
+
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) \
+	m68hc11_notice_update_cc ((EXP), (INSN))
+
+/* Move costs between classes of registers */
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)	\
+    (m68hc11_register_move_cost (MODE, CLASS1, CLASS2))
+
+/* Move cost between register and memory.
+    - Move to a 16-bit register is reasonable,
+    - Move to a soft register can be expensive.  */
+#define MEMORY_MOVE_COST(MODE,CLASS,IN)		\
+    m68hc11_memory_move_cost ((MODE),(CLASS),(IN))
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.
+
+   Pretend branches are cheap because GCC generates sub-optimal code
+   for the default value.  */
+#define BRANCH_COST(speed_p, predictable_p) 0
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS	0
+
+/* It is as good to call a constant function address as to call an address
+   kept in a register.  */
+#define NO_FUNCTION_CSE
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.
+
+   For M68HC11, we handle large displacements of a base register
+   by splitting the addend across an addhi3 insn.
+
+   For M68HC12, the 64K offset range is available.
+   */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)     \
+do {                                                                    \
+  /* We must recognize output that we have already generated ourselves.  */ \
+  if (GET_CODE (X) == PLUS						\
+      && GET_CODE (XEXP (X, 0)) == PLUS					\
+      && GET_CODE (XEXP (XEXP (X, 0), 0)) == REG			\
+      && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT			\
+      && GET_CODE (XEXP (X, 1)) == CONST_INT)				\
+    {									\
+      push_reload (XEXP (X, 0), NULL_RTX, &XEXP (X, 0), NULL,           \
+                   BASE_REG_CLASS, GET_MODE (X), VOIDmode, 0, 0,        \
+                   OPNUM, TYPE);                                        \
+      goto WIN;                                                         \
+    }									\
+  if (GET_CODE (X) == PLUS                                              \
+      && GET_CODE (XEXP (X, 0)) == REG                                  \
+      && GET_CODE (XEXP (X, 1)) == CONST_INT				\
+      && !VALID_CONSTANT_OFFSET_P (XEXP (X, 1), MODE))                  \
+    {                                                                   \
+      HOST_WIDE_INT val = INTVAL (XEXP (X, 1));                         \
+      HOST_WIDE_INT low, high;                                          \
+      high = val & (~0x0FF);                                            \
+      low  = val & 0x00FF;                                              \
+      if (low >= 256-15) { high += 16; low -= 16; }                     \
+      /* Reload the high part into a base reg; leave the low part       \
+         in the mem directly.  */                                       \
+                                                                        \
+      X = gen_rtx_PLUS (Pmode,						\
+                        gen_rtx_PLUS (Pmode, XEXP (X, 0),		\
+                                      GEN_INT (high)),                  \
+                        GEN_INT (low));                                 \
+                                                                        \
+      push_reload (XEXP (X, 0), NULL_RTX, &XEXP (X, 0), NULL,           \
+                   BASE_REG_CLASS, GET_MODE (X), VOIDmode, 0, 0,        \
+                   OPNUM, TYPE);                                        \
+      goto WIN;                                                         \
+    }                                                                   \
+} while (0)
+
+
+/* Defining the Output Assembler Language.  */
+
+/* A default list of other sections which we might be "in" at any given
+   time.  For targets that use additional sections (e.g. .tdesc) you
+   should override this definition in the target-specific file which
+   includes this file.  */
+
+/* Output before read-only data.  */
+#define TEXT_SECTION_ASM_OP	("\t.sect\t.text")
+
+/* Output before writable data.  */
+#define DATA_SECTION_ASM_OP	("\t.sect\t.data")
+
+/* Output before uninitialized data.  */
+#define BSS_SECTION_ASM_OP 	("\t.sect\t.bss")
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+
+   Same as config/elfos.h but don't mark these section SHF_WRITE since
+   there is no shared library problem.  */
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"a\""
+
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"a\""
+
+#define TARGET_ASM_CONSTRUCTOR  m68hc11_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   m68hc11_asm_out_destructor
+
+/* Comment character */
+#define ASM_COMMENT_START	";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON 		"; Begin inline assembler code\n#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF 		"; End of inline assembler code\n#NO_APP\n"
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.
+
+   For 68HC12 we mark functions that return with 'rtc'.  The linker
+   will ensure that a 'call' is really made (instead of 'jsr').
+   The debugger needs this information to correctly compute the stack frame.
+
+   For 68HC11/68HC12 we also mark interrupt handlers for gdb to
+   compute the correct stack frame.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)	\
+  do							\
+    {							\
+      fprintf (FILE, "%s", TYPE_ASM_OP);		\
+      assemble_name (FILE, NAME);			\
+      putc (',', FILE);					\
+      fprintf (FILE, TYPE_OPERAND_FMT, "function");	\
+      putc ('\n', FILE);				\
+      							\
+      if (current_function_far)                         \
+        {						\
+          fprintf (FILE, "\t.far\t");			\
+	  assemble_name (FILE, NAME);			\
+	  putc ('\n', FILE);				\
+	}						\
+      else if (current_function_interrupt		\
+	       || current_function_trap)		\
+        {						\
+	  fprintf (FILE, "\t.interrupt\t");		\
+	  assemble_name (FILE, NAME);			\
+	  putc ('\n', FILE);				\
+	}						\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));	\
+      ASM_OUTPUT_LABEL(FILE, NAME);			\
+    }							\
+  while (0)
+
+/* Output #ident as a .ident.  */
+
+/* output external reference */
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE,DECL,NAME) \
+  {fputs ("\t; extern\t", FILE); \
+  assemble_name (FILE, NAME); \
+  fputs ("\n", FILE);}
+
+/* How to refer to registers in assembler output.  This sequence is indexed
+   by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES						\
+{ "x", "d", "y", "sp", "pc", "a", "b", "ccr", "z",		\
+  "*_.frame", "*_.tmp", "*_.z", "*_.xy", "*fake clobber",	\
+  SOFT_REG_NAMES, "*sframe", "*ap"}
+
+/* This is how to output an insn to push/pop a register on the stack.
+   It need not be very fast code.  
+
+   Don't define because we don't know how to handle that with
+   the STATIC_CHAIN_REGNUM (soft register).  Saving the static
+   chain must be made inside FUNCTION_PROFILER.  */
+
+#undef ASM_OUTPUT_REG_PUSH
+#undef ASM_OUTPUT_REG_POP
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t%s\tL%d-L%d\n", integer_asm_op (2, TRUE), VALUE, REL)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t%s\t.L%d\n", integer_asm_op (2, TRUE), VALUE)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)			\
+  do {                                                  \
+      if ((LOG) > 1)                                    \
+          fprintf ((FILE), "%s\n", ALIGN_ASM_OP); \
+  } while (0)
+
+
+/* Assembler Commands for Exception Regions.  */
+
+/* Default values provided by GCC should be ok. Assuming that DWARF-2
+   frame unwind info is ok for this platform.  */
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* For the support of memory banks we need addresses that indicate
+   the page number.  */
+#define DWARF2_ADDR_SIZE 4
+
+/* SCz 2003-07-08: Don't use as dwarf2 .file/.loc directives because
+   the linker is doing relaxation and it does not adjust the debug_line
+   sections when it shrinks the code.  This results in invalid addresses
+   when debugging.  This does not bless too much the HC11/HC12 as most
+   applications are embedded and small, hence a reasonable debug info.
+   This problem is known for binutils 2.13, 2.14 and mainline.  */
+#undef HAVE_AS_DWARF2_DEBUG_LINE
+
+/* The prefix for local labels.  You should be able to define this as
+   an empty string, or any arbitrary string (such as ".", ".L%", etc)
+   without having to make any other changes to account for the specific
+   definition.  Note it is a string literal, not interpreted by printf
+   and friends.  */
+#define LOCAL_LABEL_PREFIX "."
+
+/* The prefix for immediate operands.  */
+#define IMMEDIATE_PREFIX "#"
+#define GLOBAL_ASM_OP   "\t.globl\t"
+
+
+/* Miscellaneous Parameters.  */
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE	Pmode
+
+/* This flag, if defined, says the same insns that convert to a signed fixnum
+   also convert validly to an unsigned one.  */
+#define FIXUNS_TRUNC_LIKE_FIX_TRUNC
+
+/* Max number of bytes we can move from memory to memory in one
+   reasonably fast instruction.  */
+#define MOVE_MAX 		2
+
+/* MOVE_RATIO is the number of move instructions that is better than a
+   block move.  Make this small on 6811, since the code size grows very
+   large with each move.  */
+#define MOVE_RATIO(speed)	3
+
+/* Define if shifts truncate the shift count which implies one can omit
+   a sign-extension or zero-extension of a shift count.  */
+#define SHIFT_COUNT_TRUNCATED	1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)	1
+
+/* Specify the machine mode that pointers have. After generation of rtl, the
+   compiler makes no further distinction between pointers and any other
+   objects of this machine mode.  */
+#define Pmode			HImode
+
+/* A function address in a call instruction is a byte address (for indexing
+   purposes) so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE		QImode
+
+extern int debug_m6811;
+extern int z_replacement_completed;
+extern int current_function_interrupt;
+extern int current_function_trap;
+extern int current_function_far;
+
+extern GTY(()) rtx m68hc11_soft_tmp_reg;
+extern GTY(()) rtx ix_reg;
+extern GTY(()) rtx iy_reg;
+extern GTY(()) rtx d_reg;
diff --git a/gcc/config/m68hc11/m68hc11.md b/gcc/config/m68hc11/m68hc11.md
new file mode 100644
index 000000000..f4ff3ebbb
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc11.md
@@ -0,0 +1,7579 @@
+;;- Machine description file for Motorola 68HC11 and 68HC12.
+;;- Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
+;;- Free Software Foundation, Inc.
+;;- Contributed by Stephane Carrez (stcarrez@nerim.fr)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Note:
+;;   A first 68HC11 port was made by Otto Lind (otto@coactive.com)
+;;   on gcc 2.6.3.  I have used it as a starting point for this port.
+;;   However, this new port is a complete re-write.  Its internal
+;;   design is completely different.  The generated code is not
+;;   compatible with the gcc 2.6.3 port.
+;;
+;;   The gcc 2.6.3 port is available at:
+;;
+;;   ftp.unina.it/pub/electronics/motorola/68hc11/gcc/gcc-6811-fsf.tar.gz
+;;
+
+;;- Instruction patterns.  When multiple patterns apply,
+;;- the first one in the file is chosen.
+;;-
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+;;-
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;;
+;; The following constraints are used:
+;;
+;; Single pair registers:
+;; a    register 'a'			 8-bit
+;; b    register 'b'			 8-bit
+;; d    register 'd'			16-bit
+;; t    pseudo soft register 'TMP'      16-bit
+;; v    register 'd' for 68hc11,	16-bit
+;;      NO_REG for 68hc12
+;;      (used for scratch register)
+;; w    register 'sp'			16-bit 
+;; x    register 'x'			16-bit
+;; y    register 'y'			16-bit
+;; z    register 'z'			16-bit  (fake r for 68HC11 and 68HC12)
+;; D    register 'd+x'			32-bit 
+;;
+;; Group of registers:
+;; q    register 'a' or 'b' or 'd'	 8-bit
+;; u    pseudo soft register		16-bit
+;; A    register 'x', 'y', 'z'		16-bit
+;; B    register 'x', 'y'               16-bit
+;; h	register 'd', 'x', 'y', 'z'	16-bit
+;;
+;; Other constraints:
+;;
+;; Q    an operand which is in memory but whose address is constant
+;;      (i.e., a (MEM (SYMBOL_REF x))).  This constraint is used by
+;;      bset/bclr instructions together with linker relaxation.  The
+;;      operand can be translated to a page0 addressing mode if the
+;;      symbol address is in page0 (0..255).
+;;
+;; R    an operand which is in memory and whose address is expressed
+;;      with 68HC11/68HC12 indexed addressing mode.  In general this
+;;      is any valid (MEM) except a (MEM (SYMBOL_REF x)).
+;;
+;; U    an operand which is in memory and if it uses the 68HC12 indexed
+;;      addressing mode, the offset is in the range -16..+15.  This is
+;;      used by 68HC12 movb/movw instructions since they do not accept
+;;      the full 16-bit offset range (as other insn do).
+;;
+;;
+;; Immediate integer operand constraints:
+;;   `L' is for range -65536 to 65536
+;;   `M' is for values whose 16-bit low part is 0
+;;   'N' is for +1 or -1.
+;;   'O' is for 16 (for rotate using swap).
+;;   'P' is for range -8 to 2 (used by addhi_sp)
+;;
+;; In many cases, it's not possible to use the 'g' or 'r' constraints.
+;;
+;; Operands modifiers:
+;;
+;;     %b	Get the low part of the operand (to obtain a QImode)
+;;		This modifier must always be used for QImode operations
+;;		because a correction must be applied when the operand
+;;		is a soft register (ex: *ZD1). Otherwise, we generate
+;;		*ZD1 and this is the high part of the register. For other
+;;		kinds of operands, if the operand is already QImode, no
+;;		additional correction is made.
+;;     %h	Get the high part of the operand (to obtain a QImode)
+;;     %t	Represents the temporary/scratch register *_.tmp
+;;		The scratch register is used in some cases when GCC puts
+;;		some values in bad registers. 
+;;
+;; 32/64-bit Patterns:
+;;     The 68HC11 does not support 32/64-bit operations.  Most of the
+;;     32/64-bit patterns are defined to split the instruction in
+;;     16-bits patterns.  Providing split patterns generates better code
+;;     than letting GCC implement the 32/64-bit operation itself.
+;;
+;;
+;; Notes:
+;;
+;; o For iorqi3, andqi3, xorqi3 patterns, we must accept the 'A' constraint
+;;   otherwise some insn are not satisfied.
+;;
+;; o Split patterns that create a swap_areg pattern (xgdx or xgdy) must
+;;   be valid only when z_replacement_completed == 2 because once these
+;;   swap instructions are generated, a flow/cse pass fails to handle
+;;   them correctly (it would treat the X, Y or D register as dead sometimes).
+;;
+;; o Some split pattern generate instructions that operate on 'a' or 'b'
+;;   register directly (high part and low part of D respectively).
+;;   Such split pattern must also be valid when z_replacement_completed == 2
+;;   because flow/cse is not aware that D is composed of {a, b}.
+;;
+;; o Split patterns that generate a (mem:QI (symbol_reg _.dx)) to access
+;;   the high part of a soft register must be expanded after z_replacement
+;;   pass.
+;;
+;;---------------------------------------------------------------------------
+;; Constants
+
+(define_constants [
+   ;; Register numbers
+   (X_REGNUM	    0)		; Index X register
+   (D_REGNUM	    1)		; Data register
+   (Y_REGNUM        2)		; Index Y register
+   (SP_REGNUM       3)          ; Stack pointer
+   (PC_REGNUM	    4)		; Program counter
+   (A_REGNUM        5)		; A (high part of D)
+   (B_REGNUM        6)		; B (low part of D)
+   (CC_REGNUM       7)		; Condition code register
+   (SOFT_TMP_REGNUM 10)         ; TMP soft register
+   (SOFT_Z_REGNUM   11)         ; Z soft register
+   (SOFT_XY_REGNUM  12)         ; XY soft register
+])
+
+(include "predicates.md")
+
+;;--------------------------------------------------------------------
+;;-  Test
+;;--------------------------------------------------------------------
+;;
+;; The test and compare insn must not accept a memory operand with
+;; an auto-inc mode.  If we do this, the reload can emit move insns
+;; after the test or compare.  Such move will set the flags and therefore
+;; break the comparison.  This can happen if the auto-inc register
+;; does not happen to be a hard register (i.e., reloading occurs).
+;; An offsetable memory operand should be ok.  The 'tst_operand' and
+;; 'cmp_operand' predicates take care of this rule.
+;;
+
+(define_insn "tsthi_1"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "tst_operand" "dx,*y")
+		 (const_int 0)))]
+  ""
+  "*
+{
+   if (D_REG_P (operands[0]) && !TARGET_M6812)
+     return \"std\\t%t0\";
+   else
+     return \"cp%0\\t#0\";
+}")
+
+;;
+;; Split pattern for (tst:QI) on an address register.
+;;
+(define_split
+  [(set (cc0)
+	(compare (match_operand:QI 0 "hard_addr_reg_operand" "")
+		 (const_int 0)))]
+  "z_replacement_completed == 2 && GET_MODE (operands[0]) == QImode"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 1))
+	      (set (match_dup 1) (reg:HI D_REGNUM))])
+   (set (cc0) (compare (reg:QI D_REGNUM)
+		       (const_int 0)))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 1))
+	      (set (match_dup 1) (reg:HI D_REGNUM))])]
+  "operands[1] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+(define_insn "tstqi_1"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "tst_operand" "m,d,*A,!u")
+		 (const_int 0)))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  else if (D_REG_P (operands[0]))
+    return \"tstb\";
+
+  else if (dead_register_here (insn, d_reg))
+    return \"ldab\\t%b0\";
+
+  else
+    return \"tst\\t%b0\";
+}")
+
+;;
+;; tstqi_z_used, cmpqi_z_used and cmphi_z_used are patterns generated 
+;; during the Z register replacement.  They are used when an operand
+;; uses the Z register as an index register (i.e., (MEM:QI (REG:HI Z))).
+;; In that case, we have to preserve the values of the replacement
+;; register (as well as the CC0 since the insns are compare insns).
+;; To do this, the replacement register is pushed on the stack and
+;; restored after the real compare.  A pattern+split is defined to
+;; avoid problems with the flow+cse register pass which are made
+;; after Z register replacement.
+;;
+(define_insn_and_split "tstqi_z_used"
+  [(set (cc0) (compare (match_operand:QI 0 "tst_operand" "m")
+		       (const_int 0)))
+   (use (match_operand:HI 1 "hard_reg_operand" "dxy"))
+   (use (reg:HI SOFT_Z_REGNUM))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 1))
+   (set (match_dup 1) (match_dup 2))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))
+   (set (match_dup 1) (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))]
+  "operands[2] = gen_rtx_REG (HImode, SOFT_Z_REGNUM);")
+
+
+;;--------------------------------------------------------------------
+;;- Compare
+;;--------------------------------------------------------------------
+
+;;
+;; Comparison of a hard register with another one is provided because
+;; it helps GCC to avoid to spill a pseudo hard register.
+;; We use a temporary in page 0, this is equivalent to a pseudo hard reg.
+;; (except that we loose the information that the value is saved in it).
+;;
+;; The split pattern transforms the comparison into a save of one hard
+;; register and a comparison with the temporary.
+;;
+(define_split
+  [(set (cc0)
+	(compare (match_operand:HI 0 "hard_reg_operand" "")
+		 (match_operand:HI 1 "hard_reg_operand" "")))]
+  "TARGET_M6811
+   && reload_completed && !(Z_REG_P (operands[0]) || Z_REG_P (operands[1]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (cc0)
+        (compare (match_dup 0) (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);")
+
+(define_split
+  [(set (cc0)
+	(compare (match_operand:HI 0 "hard_reg_operand" "")
+		 (match_operand:HI 1 "hard_reg_operand" "")))]
+  "0 && TARGET_M6812
+   && reload_completed && !(Z_REG_P (operands[0]) || Z_REG_P (operands[1]))"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 1))
+   (set (cc0)
+        (compare (match_dup 0) (mem:HI (post_inc:HI (reg:HI SP_REGNUM)))))]
+  "")
+
+(define_insn "cmphi_1_hc12"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "tst_operand" 
+				"d,?xy,xyd,?xy,d,m,!u,dxy,dxy")
+		 (match_operand:HI 1 "general_operand"
+				"i,i,!u,m,m,dxy,dxy,?*d*A,!*w")))]
+  "TARGET_M6812"
+  "*
+{
+  if (H_REG_P (operands[1]) && !H_REG_P (operands[0]))
+    {
+      cc_status.flags |= CC_REVERSED;
+      return \"cp%1\\t%0\";
+    }
+  else if (SP_REG_P (operands[1]))
+    return \"sts\\t2,-sp\n\\tcp%0\\t2,sp+\";
+  else if (H_REG_P (operands[1]))
+    return \"psh%1\n\\tcp%0\\t2,sp+\";
+  else
+    return \"cp%0\\t%1\";
+}")
+
+(define_insn "cmphi_1_hc11"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "tst_operand" 
+				"dx,y,xyd,?xy,d,m,m,dxy,dxy,?u*z,dxy,*z")
+		 (match_operand:HI 1 "cmp_operand"
+				"i,i,!u,m,m,?xy,d,?*d*A,?u,dxy,!*w,i")))]
+  "TARGET_M6811"
+  "*
+{
+  if (H_REG_P (operands[1]) && !H_REG_P (operands[0]))
+    {
+      cc_status.flags |= CC_REVERSED;
+      return \"cp%1\\t%0\";
+    }
+  else if (H_REG_P (operands[1]))
+    return \"#\";
+  else
+    return \"cp%0\\t%1\";
+}")
+
+(define_insn_and_split "cmphi_z_used"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "tst_operand" "dxy,m")
+		 (match_operand:HI 1 "cmp_operand" "mi,dxy")))
+   (use (match_operand:HI 2 "hard_reg_operand" "dxy,dxy"))
+   (use (reg:HI SOFT_Z_REGNUM))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))
+   (set (match_dup 2) (match_dup 3))
+   (set (cc0) (compare (match_dup 0) (match_dup 1)))
+   (set (match_dup 2) (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))]
+  "operands[3] = gen_rtx_REG (HImode, SOFT_Z_REGNUM);")
+
+;;
+;; 8-bit comparison with address register.
+;; There is no such comparison instruction, we have to temporarily switch
+;; the address register and the D register and do the comparison with D.
+;; The xgdx and xgdy instructions preserve the flags.
+;;
+(define_split
+  [(set (cc0)
+	(compare (match_operand:QI 0 "hard_addr_reg_operand" "")
+		 (match_operand:QI 1 "cmp_operand" "")))]
+  "z_replacement_completed == 2 && GET_MODE (operands[0]) == QImode"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+              (set (match_dup 3) (reg:HI D_REGNUM))])
+   (set (cc0)
+        (compare (reg:QI D_REGNUM) (match_dup 1)))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+              (set (match_dup 3) (reg:HI D_REGNUM))])]
+  "operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+(define_split
+  [(set (cc0)
+	(compare (match_operand:QI 0 "hard_reg_operand" "")
+		 (match_operand:QI 1 "hard_reg_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (cc0)
+        (compare (match_dup 0) (match_dup 2)))]
+  "operands[2] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+   operands[3] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+   operands[4] = gen_rtx_REG (HImode, REGNO (operands[1]));")
+
+(define_insn "bitcmpqi"
+  [(set (cc0)
+	(compare (and:QI (match_operand:QI 0 "tst_operand" "d,d,d,m,!u")
+		         (match_operand:QI 1 "cmp_operand" "im,*B,u,d,d"))
+		 (const_int 0)))]
+  ""
+  "@
+   bitb\\t%b1
+   #
+   bitb\\t%b1
+   bitb\\t%b0
+   bitb\\t%b0")
+
+(define_split /* "bitcmpqi" */
+  [(set (cc0)
+	(compare (and:QI (match_operand:QI 0 "tst_operand" "")
+			 (match_operand:QI 1 "hard_addr_reg_operand" ""))
+		 (const_int 0)))]
+  "z_replacement_completed == 2"
+  [(set (match_dup 3) (match_dup 2))
+   (set (cc0) (and:QI (match_dup 0) (match_dup 4)))]
+  "operands[2] = gen_rtx_REG (HImode, REGNO (operands[1]));
+   operands[3] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+   operands[4] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);")
+
+(define_insn_and_split "bitcmpqi_z_used"
+  [(set (cc0)
+	(compare (and:QI (match_operand:QI 0 "tst_operand" "d,m")
+			 (match_operand:QI 1 "cmp_operand" "m,d"))
+		 (const_int 0)))
+   (use (match_operand:HI 2 "hard_reg_operand" "xy,xy"))
+   (use (reg:HI SOFT_Z_REGNUM))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))
+   (set (match_dup 2) (match_dup 3))
+   (set (cc0) (and:QI (match_dup 0) (match_dup 1)))
+   (set (match_dup 2) (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))]
+  "operands[3] = gen_rtx_REG (HImode, SOFT_Z_REGNUM);")
+
+(define_insn "bitcmphi"
+  [(set (cc0)
+	(compare (and:HI (match_operand:HI 0 "tst_operand" "d")
+		         (match_operand:HI 1 "const_int_operand" "i"))
+		 (const_int 0)))]
+  "(INTVAL (operands[1]) & 0x0ff) == 0
+   || (INTVAL (operands[1]) & 0x0ff00) == 0"
+  "*
+{
+   if ((INTVAL (operands[1]) & 0x0ff) == 0)
+     return \"bita\\t%h1\";
+   else
+     return \"bitb\\t%1\";
+}")
+
+(define_insn "bitcmpqi_12"
+  [(set (cc0)
+	(compare (zero_extract:HI (match_operand:HI 0 "tst_operand" "d")
+		                  (match_operand:HI 1 "const_int_operand" "i")
+			          (match_operand:HI 2 "const_int_operand" "i"))
+		 (const_int 0)))]
+  "(unsigned) (INTVAL (operands[2]) + INTVAL (operands[1])) <= 8
+   || (((unsigned) (INTVAL (operands[2]) + INTVAL (operands[1])) <= 16)
+       && (unsigned) INTVAL (operands[2]) >= 8)"
+  "*
+{
+   rtx ops[1];
+   int mask;
+   int startpos = INTVAL (operands[2]);
+   int bitsize = INTVAL (operands[1]);
+
+   if (startpos >= 8)
+     {
+       startpos -= 8;
+       mask = (1 << (startpos + bitsize)) - 1;
+       mask &= ~((1 << startpos) - 1);
+
+       ops[0] = GEN_INT (mask);
+       output_asm_insn (\"bita\\t%0\", ops);
+     }
+   else
+     {
+       mask = (1 << (startpos + bitsize)) - 1;
+       mask &= ~((1 << startpos) - 1);
+
+       ops[0] = GEN_INT (mask);
+       output_asm_insn (\"bitb\\t%0\", ops);
+     }
+   return \"\";
+}")
+
+(define_insn "cmpqi_1"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "tst_operand" "d,m,d,!u,*B,d*B")
+		 (match_operand:QI 1 "cmp_operand" "im,d,!u,d,dim*A,*u")))]
+  ""
+  "*
+{
+   if (A_REG_P (operands[0]) || A_REG_P (operands[1]))
+     {
+        return \"#\";
+     }
+   else if (D_REG_P (operands[0]))
+     {
+        return \"cmpb\\t%b1\";
+     }
+   cc_status.flags |= CC_REVERSED;
+   return \"cmpb\\t%b0\";
+}")
+
+(define_insn_and_split "cmpqi_z_used"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "tst_operand" "dxy,m")
+		 (match_operand:QI 1 "cmp_operand" "m,dxy")))
+   (use (match_operand:HI 2 "hard_reg_operand" "dxy,dxy"))
+   (use (reg:HI SOFT_Z_REGNUM))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))
+   (set (match_dup 2) (match_dup 3))
+   (set (cc0) (compare (match_dup 0) (match_dup 1)))
+   (set (match_dup 2) (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))]
+  "operands[3] = gen_rtx_REG (HImode, SOFT_Z_REGNUM);")
+
+;;--------------------------------------------------------------------
+;;-  Move strict_low_part
+;;--------------------------------------------------------------------
+;;
+;; The (strict_low_part ...) patterns are replaced by normal (set) patterns.
+;; The replacement must be made at the very end because we loose the
+;; (strict_low_part ...) information.  This is correct for our machine
+;; description but not for GCC optimization passes.
+;;
+(define_insn_and_split "movstrictsi"
+  [(set (strict_low_part (match_operand:SI 0 "non_push_operand" "+um,D,D"))
+	(match_operand:SI 1 "general_operand" "D,Dim,uD"))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_insn_and_split "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "non_push_operand" "+um,dA,dA"))
+	(match_operand:HI 1 "general_operand" "dA,dAim,u"))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_insn_and_split "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "non_push_operand" "+mu,!dA"))
+	(match_operand:QI 1 "general_operand" "d,imudA"))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+;;--------------------------------------------------------------------
+;;- 64-bit Move Operations.
+;; The movdi and movdf patterns are identical except for the mode.
+;; They are also very similar to those for movsi and movsf.
+;;
+;; For 68HC11, we need a scratch register (either D, X, Y) 
+;; because there is no memory->memory moves.  It must be defined with
+;; earlyclobber (&) so that it does not appear in the source or destination 
+;; address.  Providing patterns for movdi/movdf allows GCC to generate
+;; better code.  [Until now, the scratch register is limited to D because
+;; otherwise we can run out of registers in the A_REGS class for reload].
+;;
+;; For 68HC12, the scratch register is not necessary.  To use the same
+;; pattern and same split, we use the 'v' constraint.  This tells the
+;; reload to use the _.tmp register (which is not used at all).
+;; The insn will be split in one or several memory moves (movw).
+;; [SCz: this does not work ?? So, I switched temporary to 'd' reg]
+;;--------------------------------------------------------------------
+(define_expand "movdi"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+		   (match_operand:DI 1 "general_operand" ""))
+	      (clobber (match_scratch:HI 2 ""))])]
+  ""
+  "
+  /* For push/pop, emit a REG_INC note to make sure the reload
+     inheritance and reload CSE pass notice the change of the stack
+     pointer.  */
+  if (IS_STACK_PUSH (operands[0]) || IS_STACK_POP (operands[1]))
+    {
+      rtx insn;
+
+      insn = emit_insn (gen_movdi_internal (operands[0], operands[1]));
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+					  stack_pointer_rtx,
+					  REG_NOTES (insn));
+      DONE;
+    }
+")
+
+;; Separate push from normal moves to avoid reloading problems
+;; The 'clr' is not able to push on 68HC11 so we really need a scratch.
+;; We can also accept more scratch registers.
+(define_insn_and_split "*pushdi_internal"
+  [(set (match_operand:DI 0 "push_operand" "=<,<,<,<")
+	(match_operand:DI 1 "general_operand" "i,U,m,!u"))
+   (clobber (match_scratch:HI 2 "=&dA,&d,&d,&dA"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+(define_insn_and_split "movdi_internal"
+  [(set (match_operand:DI 0 "non_push_operand" "=m!u,U,!u,U,m,m,!u")
+	(match_operand:DI 1 "general_operand" "K,iU,iU,!u,mi,!u,!mu"))
+   (clobber (match_scratch:HI 2 "=X,&d,&d,&d,&d,&d,&d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+(define_expand "movdf"
+  [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
+		   (match_operand:DF 1 "general_operand" ""))
+	      (clobber (match_scratch:HI 2 ""))])]
+  ""
+  "/* For push/pop, emit a REG_INC note to make sure the reload
+      inheritance and reload CSE pass notice the change of the stack
+      pointer.  */
+  if (IS_STACK_PUSH (operands[0]) || IS_STACK_POP (operands[1]))
+    {
+      rtx insn;
+
+      insn = emit_insn (gen_movdf_internal (operands[0], operands[1]));
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+					  stack_pointer_rtx,
+					  REG_NOTES (insn));
+      DONE;
+    }
+")
+
+;; See pushdi_internal
+(define_insn_and_split "*pushdf_internal"
+  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+	(match_operand:DF 1 "general_operand" "i,U,m,!u"))
+   (clobber (match_scratch:HI 2 "=&dA,&d,&d,&dA"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+(define_insn_and_split "movdf_internal"
+  [(set (match_operand:DF 0 "non_push_operand" "=mu,U,m,!u,U,m,!u")
+	(match_operand:DF 1 "general_operand" "G,iU,mi,iU,!u,!u,!mu"))
+   (clobber (match_scratch:HI 2 "=X,&d,&d,&d,&d,&d,&d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+;;--------------------------------------------------------------------
+;;- 32-bit Move Operations.
+;; The movsi and movsf patterns are identical except for the mode.
+;; When we move to/from a hard register (d+x), we don't need a scratch.
+;; Otherwise, a scratch register is used as intermediate register for
+;; the move.  The '&' constraint is necessary to make sure the reload
+;; pass does not give us a register that dies in the insn and is used
+;; for input/output operands.
+;;--------------------------------------------------------------------
+(define_expand "movsi"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+		   (match_operand:SI 1 "general_operand" ""))
+	      (clobber (match_scratch:HI 2 ""))])]
+  ""
+  "/* For push/pop, emit a REG_INC note to make sure the reload
+      inheritance and reload CSE pass notice the change of the stack
+      pointer.  */
+  if (IS_STACK_PUSH (operands[0]) || IS_STACK_POP (operands[1]))
+    {
+      rtx insn;
+
+      insn = emit_insn (gen_movsi_internal (operands[0], operands[1]));
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+					  stack_pointer_rtx,
+					  REG_NOTES (insn));
+      DONE;
+    }
+")
+
+(define_insn_and_split "*pushsi_internal"
+  [(set (match_operand:SI 0 "push_operand" "=<,<,<,<,<")
+	(match_operand:SI 1 "general_operand" "!D,i,U,m,!u"))
+   (clobber (match_scratch:HI 2 "=X,&dA,&d,&d,&dA"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+(define_insn_and_split "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=mu,mu,?D,m,?D,?u,?u,!u,D")
+	(match_operand:SI 1 "general_operand" "K,imu,im,?D,!u,?D,mi,!u,!D"))
+   (clobber (match_scratch:HI 2               "=X,&d,X,X,X,X,&d,&d,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+(define_expand "movsf"
+  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
+		   (match_operand:SF 1 "general_operand" ""))
+	      (clobber (match_scratch:HI 2 ""))])]
+  ""
+  "/* For push/pop, emit a REG_INC note to make sure the reload
+      inheritance and reload CSE pass notice the change of the stack
+      pointer.  */
+  if (IS_STACK_PUSH (operands[0]) || IS_STACK_POP (operands[1]))
+    {
+      rtx insn;
+
+      insn = emit_insn (gen_movsf_internal (operands[0], operands[1]));
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+					  stack_pointer_rtx,
+					  REG_NOTES (insn));
+      DONE;
+    }
+")
+
+(define_insn_and_split "*pushsf_internal"
+  [(set (match_operand:SF 0 "push_operand" "=<,<,<,<,<")
+	(match_operand:SF 1 "general_operand" "!D,i,U,m,!u"))
+   (clobber (match_scratch:HI 2 "=X,&dA,&d,&d,&dA"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+(define_insn_and_split "movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m!u,m,D,m,D,!u,!u,!u,D")
+	(match_operand:SF 1 "general_operand" "G,im,im,D,!u,D,mi,!u,!D"))
+   (clobber (match_scratch:HI 2 "=X,&d,X,X,X,X,&d,&d,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_move (operands[0], operands[1], operands[2]);
+   DONE;")
+
+
+;;--------------------------------------------------------------------
+;;- 16-bit Move Operations.
+;; We don't need a scratch register.
+;;--------------------------------------------------------------------
+
+(define_insn "*movhi2_push"
+  [(set (match_operand:HI 0 "push_operand" "=<,<,<")
+	(match_operand:HI 1 "general_operand" "xy,?d,!z"))]
+  "TARGET_M6811 && !TARGET_M6812"
+  "*
+{
+  cc_status = cc_prev_status;
+  if (D_REG_P (operands[1]))
+    {
+      output_asm_insn (\"pshb\", operands);
+      return \"psha\";
+    }
+  else if (X_REG_P (operands[1]))
+    {
+      return \"pshx\";
+    }
+  else if (Y_REG_P (operands[1]))
+    {
+      return \"pshy\";
+    }
+  fatal_insn (\"Invalid register in the instruction\", insn);
+}")
+
+(define_insn "*movhi2_pop"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=xy,d")
+	(match_operand:HI 1 "pop_operand" ">,>"))]
+  "TARGET_M6811"
+  "*
+{
+  cc_status = cc_prev_status;
+  if (D_REG_P (operands[0]))
+    {
+      output_asm_insn (\"pula\", operands);
+      return \"pulb\";
+    }
+  else if (X_REG_P (operands[0]))
+    {
+      return \"pulx\";
+    }
+  else if (Y_REG_P (operands[0]))
+    {
+      return \"puly\";
+    }
+  fatal_insn (\"Invalid register in the instruction\", insn);
+}")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (reload_in_progress)
+    {
+      if (m68hc11_reload_operands (operands))
+        {
+          DONE;
+        }
+    }
+  if (TARGET_M6811 && (reload_in_progress | reload_completed) == 0)
+    {
+      if (GET_CODE (operands[0]) == MEM &&
+	  (GET_CODE (operands[1]) == MEM
+	   || GET_CODE (operands[1]) == CONST_INT))
+        {
+	  operands[1] = force_reg (HImode, operands[1]);
+        }
+      else if (IS_STACK_PUSH (operands[0])
+	       && GET_CODE (operands[1]) != REG)
+        {
+	  operands[1] = force_reg (HImode, operands[1]);
+        }
+    }
+  /* For push/pop, emit a REG_INC note to make sure the reload
+     inheritance and reload CSE pass notice the change of the stack
+     pointer.  */
+  if (IS_STACK_PUSH (operands[0]) || IS_STACK_POP (operands[1]))
+    {
+      rtx insn;
+
+      insn = emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+					  stack_pointer_rtx,
+					  REG_NOTES (insn));
+      DONE;
+    }
+}")
+
+(define_insn "*movhi_68hc12"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=U,dAw,dAw,m,U,U,m,!u")
+	(match_operand:HI 1 "general_operand" "U,dAwim,!u,K,dAwi,!u,dAw,riU"))]
+  "TARGET_M6812"
+  "*
+{
+  m68hc11_gen_movhi (insn, operands);
+  return \"\";
+}")
+
+(define_insn "movhi_const0"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,A,um")
+	(const_int 0))]
+  "TARGET_M6811"
+  "@
+   clra\\n\\tclrb
+   ld%0\\t#0
+   clr\\t%b0\\n\\tclr\\t%h0")
+
+(define_insn "*movhi_m68hc11"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dAw,!u,m,m,dAw,!*u")
+	(match_operand:HI 1 "general_operand" "dAwim,dAw,dA,?Aw,!*u,dAw"))]
+  "TARGET_M6811"
+  "*
+{
+  m68hc11_gen_movhi (insn, operands);
+  return \"\";
+}")
+
+;;--------------------------------------------------------------------
+;;- 8-bit Move Operations.
+;; We don't need a scratch register.
+;;--------------------------------------------------------------------
+;;
+;; The *a alternative also clears the high part of the register.
+;; This should be ok since this is not the (strict_low_part) set.
+;;
+(define_insn "movqi_const0"
+  [(set (match_operand:QI 0 "non_push_operand" "=d,m,!u,*A,!*q")
+	(const_int 0))]
+  ""
+  "@
+   clrb
+   clr\\t%b0
+   clr\\t%b0
+   ld%0\\t#0
+   clr%0")
+
+;;
+;; 8-bit operations on address registers.
+;;
+;; Switch temporary to the D register and load the value in B.
+;; This is possible as long as the address register does not
+;; appear in the source operand.
+;;
+(define_split
+  [(set (match_operand:QI 0 "hard_addr_reg_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  "z_replacement_completed == 2
+   && !reg_mentioned_p (operands[0], operands[1])
+   && !(D_REG_P (operands[1]) || Q_REG_P (operands[1]))"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 2))
+              (set (match_dup 2) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (match_dup 1))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 2))
+              (set (match_dup 2) (reg:HI D_REGNUM))])]
+  "operands[2] = gen_rtx_REG (HImode, REGNO (operands[0]));")
+
+;;
+;; 8-bit operations on address registers.
+;;
+(define_split
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (match_operand:QI 1 "hard_addr_reg_operand" ""))]
+  "z_replacement_completed == 2
+   && !reg_mentioned_p (operands[1], operands[0])
+   && !(D_REG_P (operands[0]) || Q_REG_P (operands[0]))"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 2))
+              (set (match_dup 2) (reg:HI D_REGNUM))])
+   (set (match_dup 0) (reg:QI D_REGNUM))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 2))
+              (set (match_dup 2) (reg:HI D_REGNUM))])]
+  "operands[2] = gen_rtx_REG (HImode, REGNO (operands[1]));")
+
+(define_insn "*movqi2_push"
+  [(set (match_operand:QI 0 "push_operand" "=<,<")
+	(match_operand:QI 1 "general_operand" "d,!*A"))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[1]))
+    return \"#\";
+
+  cc_status = cc_prev_status;
+  return \"pshb\";
+}")
+
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (reload_in_progress)
+    {
+      if (m68hc11_reload_operands (operands))
+        {
+          DONE;
+        }
+    }
+  if (TARGET_M6811 && (reload_in_progress | reload_completed) == 0)
+    {
+      if (GET_CODE (operands[0]) == MEM
+	  && (GET_CODE (operands[1]) == MEM
+	      || GET_CODE (operands[1]) == CONST_INT))
+        {
+	  operands[1] = force_reg (QImode, operands[1]);
+        }
+      else if (IS_STACK_PUSH (operands[0])
+	       && GET_CODE (operands[1]) != REG)
+        {
+	  operands[1] = force_reg (QImode, operands[1]);
+        }
+    }
+  /* For push/pop, emit a REG_INC note to make sure the reload
+     inheritance and reload CSE pass notice the change of the stack
+     pointer.  */
+  if (IS_STACK_PUSH (operands[0]) || IS_STACK_POP (operands[1]))
+    {
+      rtx insn;
+
+      insn = emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+      REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+					  stack_pointer_rtx,
+					  REG_NOTES (insn));
+      DONE;
+    }
+}")
+
+(define_insn "*movqi_68hc12"
+  [(set (match_operand:QI 0 "nonimmediate_operand" 
+				"=U,d*AU*q,d*A*qU,d*A*q,m,?*u,m")
+	(match_operand:QI 1 "general_operand" 
+				"U,*ri*q,U,m,d*q,*ri*qU,!*A"))]
+  "TARGET_M6812"
+  "*
+{
+  m68hc11_gen_movqi (insn, operands);
+  return \"\";
+}")
+
+(define_insn "*movqi_m68hc11"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d*A*q,m,m,d*A*q,*u")
+	(match_operand:QI 1 "general_operand" "d*Aim*q,d*q,!*A,*u,d*A*q"))]
+  "TARGET_M6811"
+  "*
+{
+  m68hc11_gen_movqi (insn, operands);
+  return \"\";
+}")
+
+;;--------------------------------------------------------------------
+;;-  Swap registers
+;;--------------------------------------------------------------------
+;; Swapping registers is used for split patterns.
+(define_insn "swap_areg"
+   [(set (match_operand:HI 0 "hard_reg_operand" "=d,A")
+         (match_operand:HI 1 "hard_reg_operand" "=A,d"))
+    (set (match_dup 1) (match_dup 0))]
+   ""
+   "*
+{
+  m68hc11_output_swap (insn, operands);
+  return \"\";
+}")
+
+;;--------------------------------------------------------------------
+;;-  Truncation insns.
+;;--------------------------------------------------------------------
+;;
+;; Truncation are not necessary because GCC knows how to truncate,
+;; specially when values lie in consecutive registers.
+;;
+
+(define_expand "floatunssisf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(unsigned_float:SF (match_operand:SI 1 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"__floatunsisf\", UNSIGNED_FLOAT, 
+			 SFmode, SImode, 2, operands);
+   DONE;")
+
+(define_expand "floatunssidf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(unsigned_float:DF (match_operand:SI 1 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"__floatunsidf\", UNSIGNED_FLOAT, 
+			 DFmode, SImode, 2, operands);
+   DONE;")
+
+;;--------------------------------------------------------------------
+;;-  Zero extension insns.
+;;--------------------------------------------------------------------
+
+;;
+;; 64-bit extend.  The insn will be split into 16-bit instructions just
+;; before the final pass.  We need a scratch register for the split.
+;; The final value can be generated on the stack directly.  This is more
+;; efficient and useful for conversions made during parameter passing rules.
+;;
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,!u,m,!u")
+	(zero_extend:DI 
+	   (match_operand:QI 1 "nonimmediate_operand" "m,dmu,*B,*B")))
+   (clobber (match_scratch:HI 2 "=&d,&dB,&d,&dB"))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "push_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (match_scratch:HI 2 "=&dB"))]
+  "z_replacement_completed == 2"
+  [(const_int 0)]
+  "
+{
+  rtx low  = m68hc11_gen_lowpart (SImode, operands[0]);
+  rtx push = m68hc11_gen_lowpart (HImode, low);
+  rtx src  = operands[1];
+
+   /* Source operand must be in a hard register.  */
+   if (!H_REG_P (src))
+     {
+       src = gen_rtx_REG (QImode, REGNO (operands[2]));
+       emit_move_insn (src, operands[1]);
+     }
+
+   /* Source is in D, we can push B then one word of 0 and we do
+      a correction on the stack pointer.  */
+   if (D_REG_P (src))
+     {
+       emit_move_insn (m68hc11_gen_lowpart (QImode, push), src);
+       emit_move_insn (operands[2], const0_rtx);
+       if (D_REG_P (operands[2]))
+	 {
+	   emit_move_insn (m68hc11_gen_lowpart (QImode, push), src);
+	 }
+       else
+	 {
+           emit_move_insn (push, operands[2]);
+           emit_insn (gen_addhi3 (gen_rtx_REG (HImode, HARD_SP_REGNUM),
+				  gen_rtx_REG (HImode, HARD_SP_REGNUM),
+			          const1_rtx));
+	 }
+     }
+   else
+     {
+       /* Source is in X or Y.  It's better to push the 16-bit register
+          and then to some stack adjustment.  */
+       src = gen_rtx_REG (HImode, REGNO (src));
+       emit_move_insn (push, src);
+       emit_move_insn (operands[2], const0_rtx);
+       emit_insn (gen_addhi3 (gen_rtx_REG (HImode, HARD_SP_REGNUM),
+			      gen_rtx_REG (HImode, HARD_SP_REGNUM),
+			      const1_rtx));
+       emit_move_insn (push, operands[2]);
+       emit_insn (gen_addhi3 (gen_rtx_REG (HImode, HARD_SP_REGNUM),
+			      gen_rtx_REG (HImode, HARD_SP_REGNUM),
+			      const1_rtx));
+     }      
+   emit_move_insn (push, operands[2]);
+   emit_move_insn (push, operands[2]);
+   emit_move_insn (push, operands[2]);
+   DONE;
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "")))
+   (clobber (match_scratch:HI 2 "=&dB"))]
+  "z_replacement_completed == 2"
+  [(const_int 0)]
+  "
+{
+  rtx low  = m68hc11_gen_lowpart (SImode, operands[0]);
+  rtx low2 = m68hc11_gen_lowpart (HImode, low);
+  rtx src  = operands[1];
+
+   /* Source operand must be in a hard register.  */
+   if (!H_REG_P (src))
+     {
+       src = gen_rtx_REG (QImode, REGNO (operands[2]));
+       emit_move_insn (src, operands[1]);
+     }
+
+   emit_move_insn (m68hc11_gen_lowpart (QImode, low2), src);
+   emit_move_insn (operands[2], const0_rtx);
+   src = gen_rtx_REG (QImode, REGNO (operands[2]));
+   emit_move_insn (m68hc11_gen_highpart (QImode, low2), src);
+
+   emit_move_insn (m68hc11_gen_highpart (HImode, low), operands[2]);
+   low = m68hc11_gen_highpart (SImode, operands[0]);
+   emit_move_insn (m68hc11_gen_lowpart (HImode, low), operands[2]);
+   emit_move_insn (m68hc11_gen_highpart (HImode, low), operands[2]);
+   DONE;
+}")
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "non_push_operand" "=m,m,m,m,!u,!u")
+	(zero_extend:DI 
+	    (match_operand:HI 1 "nonimmediate_operand" "m,d,A,!u,dmA,!u")))
+   (clobber (match_scratch:HI 2 "=&d,&B,&d,&dB,&dB,&dB"))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "non_push_operand" "")
+	(zero_extend:DI 
+	    (match_operand:HI 1 "nonimmediate_operand" "")))
+   (clobber (match_scratch:HI 2 ""))]
+  "z_replacement_completed == 2"
+  [(const_int 0)]
+  "
+{
+   rtx low  = m68hc11_gen_lowpart (SImode, operands[0]);
+   rtx high = m68hc11_gen_highpart (SImode, operands[0]);
+   rtx src  = operands[1];
+
+   /* Make sure the source is in a hard register.  */
+   if (!H_REG_P (src))
+     {
+       src = operands[2];
+       emit_move_insn (src, operands[1]);
+     }
+
+   /* Move the low part first for the push.  */
+   emit_move_insn (m68hc11_gen_lowpart (HImode, low), src);
+
+   /* Now, use the scratch register to fill in the zeros.  */
+   emit_move_insn (operands[2], const0_rtx);
+   emit_move_insn (m68hc11_gen_highpart (HImode, low), operands[2]);
+   emit_move_insn (m68hc11_gen_lowpart (HImode, high), operands[2]);
+   emit_move_insn (m68hc11_gen_highpart (HImode, high), operands[2]);
+   DONE;
+}")
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,m,!u,!u")
+	(zero_extend:DI 
+	    (match_operand:SI 1 "nonimmediate_operand" "m,Du,m,Du")))
+   (clobber (match_scratch:HI 2 "=d,d,d,d"))]
+  ""
+  "#")
+
+(define_split 
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI 
+	    (match_operand:SI 1 "nonimmediate_operand" "")))
+   (clobber (match_scratch:HI 2 ""))]
+  "z_replacement_completed == 2"
+  [(const_int 0)]
+  "
+{
+  rtx low  = m68hc11_gen_lowpart (SImode, operands[0]);
+  rtx high = m68hc11_gen_highpart (SImode, operands[0]);
+
+  /* Move the low part first so that this is ok for a push.  */
+  m68hc11_split_move (low, operands[1], operands[2]);
+
+  /* Use the scratch register to clear the high part of the destination.  */
+  emit_move_insn (operands[2], const0_rtx);
+  emit_move_insn (m68hc11_gen_lowpart (HImode, high), operands[2]);
+  emit_move_insn (m68hc11_gen_highpart (HImode, high), operands[2]);
+  DONE;
+}")
+
+;;
+;; For 16->32bit unsigned extension, we don't allow generation on the stack
+;; because it's less efficient.
+;;
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "non_push_operand" "=D,m,u,m,m,!u,!u")
+        (zero_extend:SI 
+	    (match_operand:HI 1 "nonimmediate_operand" "dAmu,dA,dA,m,!u,m,!u")))
+   (clobber (match_scratch:HI 2 "=X,X,X,&d,&dB,&dB,&dB"))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "non_push_operand" "")
+	(zero_extend:SI 
+	    (match_operand:HI 1 "nonimmediate_operand" "")))
+   (clobber (match_scratch:HI 2 ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx src = operands[1];
+
+  if (!H_REG_P (src) && !H_REG_P (operands[0]))
+    {
+      src = operands[2];
+      emit_move_insn (src, operands[1]);
+    }
+  emit_move_insn (m68hc11_gen_lowpart (HImode, operands[0]), src);
+  emit_move_insn (m68hc11_gen_highpart (HImode, operands[0]), const0_rtx);
+  DONE;
+}")
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "non_push_operand" "=D,D,m,m,u")
+      (zero_extend:SI 
+	  (match_operand:QI 1 "nonimmediate_operand" "dmu,xy,d,xy,dxy")))]
+  ""
+  "#")
+
+(define_split 
+  [(set (match_operand:SI 0 "non_push_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "reload_completed && !X_REG_P (operands[0])"
+  [(set (match_dup 2) (zero_extend:HI (match_dup 1)))
+   (set (match_dup 3) (const_int 0))]
+  "
+   operands[2] = m68hc11_gen_lowpart (HImode, operands[0]);
+   operands[3] = m68hc11_gen_highpart (HImode, operands[0]);")
+
+(define_split 
+  [(set (match_operand:SI 0 "hard_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "z_replacement_completed == 2 && X_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (const_int 0))
+   (set (match_dup 5) (zero_extend:HI (match_dup 6)))]
+  "
+   if (X_REG_P (operands[1]))
+     {
+	emit_insn (gen_swap_areg (gen_rtx_REG (HImode, HARD_D_REGNUM),
+				  gen_rtx_REG (HImode, HARD_X_REGNUM)));
+	emit_insn (gen_zero_extendqihi2 (gen_rtx_REG (HImode, HARD_D_REGNUM),
+					 gen_rtx_REG (QImode, HARD_D_REGNUM)));
+	emit_move_insn (gen_rtx_REG (HImode, HARD_X_REGNUM),
+			const0_rtx);
+	DONE;
+     }
+
+   if (reg_mentioned_p (gen_rtx_REG (HImode, HARD_X_REGNUM), operands[1]))
+     {
+	emit_insn (gen_zero_extendqihi2 (m68hc11_gen_lowpart (HImode,
+							      operands[0]),
+					 operands[1]));
+	emit_move_insn (gen_rtx_REG (HImode, HARD_X_REGNUM), const0_rtx);
+	DONE;
+     }
+   operands[4] = m68hc11_gen_highpart (HImode, operands[0]);
+   operands[5] = m68hc11_gen_lowpart (HImode, operands[0]);
+   if (A_REG_P (operands[1]))
+     {
+       operands[2] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+       operands[3] = gen_rtx_REG (HImode, REGNO (operands[1]));
+       operands[6] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+     }
+   else
+     {
+       operands[5] = operands[2] =
+       operands[3] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+       operands[6] = operands[1];
+     }
+")
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "non_push_operand" "=dm,d,*A,!*u,d,m,!*u")
+	(zero_extend:HI 
+	    (match_operand:QI 1 "nonimmediate_operand" "d,*A,d*Am,d,!um,*A,*A")))]
+  ""
+ "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  if (H_REG_P (operands[0]))
+    {
+      output_asm_insn (\"clra\", operands);
+      if (operands[0] != operands[1]
+          && !(D_REG_P (operands[0]) && D_REG_P (operands[1])))
+        {
+          if (X_REG_P (operands[1])
+	      || (D_REG_P (operands[1]) && X_REG_P (operands[0])))
+	    {
+	      output_asm_insn (\"stx\\t%t1\", operands);
+	      output_asm_insn (\"ldab\\t%T0\", operands);
+	    }
+	  else if (Y_REG_P (operands[1])
+		   || (D_REG_P (operands[1]) && Y_REG_P (operands[0])))
+	    {
+	      output_asm_insn (\"sty\\t%t1\", operands);
+	      output_asm_insn (\"ldab\\t%T0\", operands);
+	    }
+          else
+            {
+	      output_asm_insn (\"ldab\\t%b1\", operands);
+            }
+	  cc_status.flags |= CC_NOT_NEGATIVE;
+        }
+      else
+	{
+	  /* Status refers to the clra insn. Status is ok for others
+	   * since we have loaded the value in B.
+	   */
+	  CC_STATUS_INIT;
+	}
+      return \"\";
+    }
+
+  if (A_REG_P (operands[1]))
+    {
+      output_asm_insn (\"st%1\\t%0\", operands);
+      output_asm_insn (\"clr\\t%h0\", operands);
+      CC_STATUS_INIT;
+    }
+  else
+    {
+      output_asm_insn (\"clr\\t%h0\", operands);
+      output_asm_insn (\"stab\\t%b0\", operands);
+      cc_status.flags |= CC_NOT_NEGATIVE;
+    }
+
+  return \"\";
+}")
+
+
+;;--------------------------------------------------------------------
+;;-  Sign extension insns.
+;;--------------------------------------------------------------------
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=D,m,u")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "dmux,d,d")))]
+  ""
+  "*
+{
+  rtx ops[3];
+  int need_tst = 0;
+
+  /* The 68HC12 has a sign-extension instruction.  Use it when the
+     destination is the register (X,D).  First sign-extend the low
+     part and fill X with the sign-extension of the high part.  */
+  if (TARGET_M6812 && X_REG_P (operands[0]))
+    {
+      if (!D_REG_P (operands[1]))
+        {
+	  ops[0] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+	  ops[1] = operands[1];
+	  m68hc11_gen_movqi (insn, ops);
+	}
+      return \"sex\\tb,d\\n\\tsex\\ta,x\";
+    }
+
+  ops[2] = gen_label_rtx ();
+
+  if (X_REG_P (operands[1]))
+    {
+      output_asm_insn (\"xgdx\", operands);
+      need_tst = 1;
+    }
+  else if (X_REG_P (operands[0]))
+    {
+      /* X can be used as an indexed addressing in the source.
+         Get the value before clearing it.  */
+      if (reg_mentioned_p (ix_reg, operands[1]))
+        {
+          output_asm_insn (\"ldab\\t%b1\", operands);
+	  need_tst = 1;
+        }
+      output_asm_insn (\"ldx\\t#0\", operands);
+    }
+
+  output_asm_insn (\"clra\", operands);
+  if (!X_REG_P (operands[0]))
+    {
+      ops[0] = m68hc11_gen_lowpart (HImode, operands[0]);
+      ops[1] = m68hc11_gen_lowpart (QImode, ops[0]);
+
+      if (IS_STACK_PUSH (operands[0]))
+        {
+          output_asm_insn (\"pshb\", ops);
+          output_asm_insn (\"tstb\", ops);
+        }
+      else
+        {
+          output_asm_insn (\"stab\\t%b1\", ops);
+        }
+    }
+  else if (D_REG_P (operands[1]) || need_tst)
+    {
+      output_asm_insn (\"tstb\", operands);
+    }
+  else
+    {
+      output_asm_insn (\"ldab\\t%b1\", operands);
+    }
+  output_asm_insn (\"bpl\\t%l2\", ops);
+  output_asm_insn (\"deca\", operands);
+  if (X_REG_P (operands[0]))
+    output_asm_insn (\"dex\", operands);
+
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[2]));
+
+  if (!X_REG_P (operands[0]))
+    {
+      if (IS_STACK_PUSH (operands[0]))
+	{
+	  output_asm_insn (\"psha\", ops);
+	  output_asm_insn (\"psha\", ops);
+	  output_asm_insn (\"psha\", ops);
+	}
+      else
+	{
+	  output_asm_insn (\"staa\\t%h0\", ops);
+
+	  ops[0] = m68hc11_gen_highpart (HImode, operands[0]);
+	  if (dead_register_here (insn, d_reg))
+	    {
+	      output_asm_insn (\"tab\", ops);
+	      output_asm_insn (\"std\\t%0\", ops);
+	    }
+	  else
+	    {
+	      output_asm_insn (\"staa\\t%b0\", ops);
+	      output_asm_insn (\"staa\\t%h0\", ops);
+	    }
+	}
+    }
+
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "non_push_operand" "=d,*x*ym,u")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "dum,0,0")))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  ops[0] = gen_label_rtx ();
+  if (D_REG_P (operands[0]))
+    {
+      if (TARGET_M6812)
+	{
+	  if (!D_REG_P (operands[1]))
+	    {
+	      ops[0] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+	      ops[1] = operands[1];
+	      m68hc11_gen_movqi (insn, ops);
+	    }
+	  return \"sex\\tb,d\";
+	}
+      output_asm_insn (\"clra\", operands);
+      if (H_REG_P (operands[1]))
+        {
+          output_asm_insn (\"tstb\", operands);
+        }
+      else
+        {
+	  output_asm_insn (\"ldab\\t%b1\", operands);
+        }
+      output_asm_insn (\"bpl\\t%l0\", ops);
+      output_asm_insn (\"deca\", operands);
+
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\", 
+				 CODE_LABEL_NUMBER (ops[0]));
+    }
+   else
+    {
+      output_asm_insn (\"clr\\t%h0\", operands);
+      if (m68hc11_register_indirect_p (operands[1], HImode))
+        {
+	  ops[1] = operands[1];
+          output_asm_insn (\"brclr\\t%b1 #0x80 %l0\", ops);
+	  CC_STATUS_INIT;
+        }
+      else
+        {
+          output_asm_insn (\"tst\\t%b1\", operands);
+	  output_asm_insn (\"bpl\\t%l0\", ops);
+        }
+      output_asm_insn (\"dec\\t%h0\", operands);
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+			         CODE_LABEL_NUMBER (ops[0]));
+    }
+
+  return \"\";
+}")
+
+;;
+;; Split the special case where the source of the sign extend is
+;; either Y or Z. In that case, we can't move the source in the D
+;; register directly. The movhi pattern handles this move by using
+;; a temporary scratch memory location.
+;;
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  "reload_completed && (Y_REG_P (operands[1]) || Z_REG_P (operands[1]))"
+  [(set (reg:HI D_REGNUM) (match_dup 1))
+   (set (match_dup 0) (sign_extend:SI (reg:HI D_REGNUM)))]
+  "")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,D")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "m,!r,dA")))]
+  ""
+  "*
+{
+  rtx ops[2];
+  int x_reg_used;
+
+  if (Y_REG_P (operands[1]))
+    return \"#\";
+
+  if (X_REG_P (operands[1]))
+    {
+      output_asm_insn (\"xgdx\", operands);
+      x_reg_used = 1;
+    }
+  else
+    {
+      /* X can be used as an indexed addressing in the source.
+         Get the value before clearing it.  */
+      x_reg_used = reg_mentioned_p (ix_reg, operands[1]);
+      if (x_reg_used)
+        {
+	  ops[0] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+	  ops[1] = operands[1];
+	  m68hc11_gen_movhi (insn, ops);
+        }
+    }
+
+  CC_STATUS_INIT;
+  if (TARGET_M6812 && 0)
+    {
+      /* This sequence of code is larger than the one for 68HC11.
+         Don't use it; keep it for documentation.  */
+      if (!D_REG_P (operands[1]) && !x_reg_used)
+        {
+          ops[0] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+          ops[1] = operands[1];
+          m68hc11_gen_movhi (insn, ops);
+        }
+      output_asm_insn (\"sex\\ta,x\", operands);
+      output_asm_insn (\"xgdx\", operands);
+      output_asm_insn (\"sex\\ta,d\", operands);
+      return \"xgdx\";
+    }
+
+  output_asm_insn (\"ldx\\t#0\", operands);
+  if (D_REG_P (operands[1]) || x_reg_used)
+    {
+      output_asm_insn (\"tsta\", operands);
+    }
+  else
+    {
+      ops[0] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+      ops[1] = operands[1];
+      m68hc11_gen_movhi (insn, ops);
+    }
+
+  ops[0] = gen_label_rtx ();
+  output_asm_insn (\"bpl\\t%l0\", ops);
+  output_asm_insn (\"dex\", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[0]));
+
+  return \"\";
+}")
+
+
+;;--------------------------------------------------------------------
+;;- Min and Max instructions (68HC12).
+;;--------------------------------------------------------------------
+(define_insn "uminqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,m")
+	(umin:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:QI 2 "general_operand" "m,d")))]
+  "TARGET_M6812 && TARGET_MIN_MAX"
+  "*
+{
+  /* Flags are set according to (sub:QI (operand 1) (operand2)).
+     The mina/minm use A as the source or destination.  This is the
+     high part of D.  There is no way to express that in the pattern
+     so we must use 'exg a,b' to put the operand in the good register.  */
+  CC_STATUS_INIT;
+  if (D_REG_P (operands[0]))
+    {
+      return \"exg\\ta,b\\n\\tmina\\t%2\\n\\texg\\ta,b\";
+    }
+  else
+    {
+      return \"exg\\ta,b\\n\\tminm\\t%0\\n\\texg\\ta,b\";
+    }
+}")
+
+(define_insn "umaxqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,m")
+	(umax:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:QI 2 "general_operand" "m,d")))]
+  "TARGET_M6812 && TARGET_MIN_MAX"
+  "*
+{
+  /* Flags are set according to (sub:QI (operand 1) (operand2)).
+     The maxa/maxm use A as the source or destination.  This is the
+     high part of D.  There is no way to express that in the pattern
+     so we must use 'exg a,b' to put the operand in the good register.  */
+  CC_STATUS_INIT;
+  if (D_REG_P (operands[0]))
+    {
+      return \"exg\\ta,b\\n\\tmaxa\\t%2\\n\\texg\\ta,b\";
+    }
+  else
+    {
+      return \"exg\\ta,b\\n\\tmaxm\\t%0\\n\\texg\\ta,b\";
+    }
+}")
+
+(define_insn "uminhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,m")
+	(umin:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "general_operand" "m,d")))]
+  "TARGET_M6812 && TARGET_MIN_MAX"
+  "*
+{
+  /* Flags are set according to (sub:HI (operand 1) (operand2)).  */
+  CC_STATUS_INIT;
+  if (D_REG_P (operands[0]))
+    {
+      return \"emind\\t%2\";
+    }
+  else
+    {
+      return \"eminm\\t%0\";
+    }
+}")
+
+(define_insn "umaxhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,m")
+	(umax:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+		 (match_operand:HI 2 "general_operand" "m,d")))]
+  "TARGET_M6812 && TARGET_MIN_MAX"
+  "*
+{
+  /* Flags are set according to (sub:HI (operand 1) (operand2)).  */
+  CC_STATUS_INIT;
+  if (D_REG_P (operands[0]))
+    {
+      return \"emaxd\\t%2\";
+    }
+  else
+    {
+      return \"emaxm\\t%0\";
+    }
+}")
+
+
+;;--------------------------------------------------------------------
+;;- Add instructions.
+;;--------------------------------------------------------------------
+;; 64-bit: Use a library call because what GCC generates is huge.
+;;
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(plus:DI (match_operand:DI 1 "general_operand" "")
+		 (match_operand:DI 2 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"___adddi3\", PLUS, DImode, DImode, 3, operands);
+   DONE;")
+
+;;
+;; - 32-bit Add.
+;;
+(define_expand "addsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+	             (plus:SI (match_operand:SI 1 "general_operand" "")
+		              (match_operand:SI 2 "general_operand" "")))
+              (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "")
+
+(define_insn "*addsi3_zero_extendhi"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,D,D")
+	(plus:SI (zero_extend:SI 
+		 (match_operand:HI 1 "general_operand" "dxi,!u,mdxi,!u"))
+		 (match_operand:SI 2 "general_operand" "mi,mi,D?u,!Du")))
+   (clobber (match_scratch:HI 3 "=X,X,X,X"))]
+  ""
+  "*
+{
+  rtx ops[3];
+
+  if (X_REG_P (operands[2]))
+    {
+      ops[0] = operands[1];
+    }
+  else
+    {
+      if (X_REG_P (operands[1]))
+        {
+          output_asm_insn (\"xgdx\", ops);
+        }
+      else if (!D_REG_P (operands[1]))
+        {
+          ops[0] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+          ops[1] = operands[1];
+          m68hc11_gen_movhi (insn, ops);
+        }
+      ops[0] = m68hc11_gen_lowpart (HImode, operands[2]);
+      ops[1] = m68hc11_gen_highpart (HImode, operands[2]);
+    }
+  ops[2] = gen_label_rtx ();
+
+  /* ldx preserves the carry, propagate it by incrementing X directly.  */
+  output_asm_insn (\"addd\\t%0\", ops);
+  if (!X_REG_P (operands[2]))
+    output_asm_insn (\"ldx\\t%1\", ops);
+
+  output_asm_insn (\"bcc\\t%l2\", ops);
+  output_asm_insn (\"inx\", ops);
+
+  CC_STATUS_INIT;
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[2]));
+  return \"\";  
+}")
+
+
+(define_split /* "*addsi3_zero_extendqi" */
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (zero_extend:SI 
+		   (match_operand:QI 1 "general_operand" ""))
+		 (match_operand:SI 2 "memory_operand" "")))
+   (clobber (match_scratch:HI 3 "=X,X"))]
+  "reload_completed"
+  [(set (reg:HI D_REGNUM) (zero_extend:HI (match_dup 1)))
+   (parallel [(set (match_dup 0) 
+		   (plus:SI (zero_extend:SI (reg:HI D_REGNUM)) (match_dup 2)))
+	      (clobber (match_dup 3))])]
+  "")
+
+(define_insn "*addsi3_zero_extendqi"
+  [(set (match_operand:SI 0 "register_operand" "=D,D")
+	(plus:SI (zero_extend:SI 
+		   (match_operand:QI 1 "general_operand" "dAmi,!dAmiu"))
+		 (match_operand:SI 2 "general_operand" "miD,!muiD")))
+   (clobber (match_scratch:HI 3 "=X,X"))]
+  ""
+  "*
+{
+  rtx ops[4];
+
+  if (GET_CODE (operands[2]) == MEM)
+    return \"#\";
+
+  if (X_REG_P (operands[2]))
+    {
+      if (H_REG_P (operands[1]))
+	{
+	  ops[0] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+	  ops[1] = gen_rtx_REG (HImode, REGNO (operands[1]));
+	  m68hc11_gen_movhi (insn, ops);
+	}
+      else
+	{
+	  ops[0] = operands[1];
+	}
+      ops[1] = const0_rtx;
+    }
+  else
+    {
+      if (X_REG_P (operands[1]))
+        {
+          output_asm_insn (\"xgdx\", ops);
+        }
+      else if (!D_REG_P (operands[1]))
+        {
+          ops[0] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+          ops[1] = operands[1];
+          m68hc11_gen_movqi (insn, ops);
+        }
+
+      ops[0] = m68hc11_gen_lowpart (HImode, operands[2]);
+      ops[1] = ops[0];
+      ops[2] = m68hc11_gen_highpart (HImode, operands[2]);
+      output_asm_insn (\"clra\", ops);
+    }
+
+  /* ldx preserves the carry, propagate it by incrementing X directly.  */
+  output_asm_insn (\"addb\\t%b0\", ops);
+  output_asm_insn (\"adca\\t%h1\", ops);
+  if (!X_REG_P (operands[2]))
+    output_asm_insn (\"ldx\\t%2\", ops);
+
+  /* If the above adca was adding some constant, we don't need to propagate
+     the carry unless the constant was 0xff.  */
+  if (X_REG_P (operands[2])
+      || GET_CODE (ops[1]) != CONST_INT
+      || ((INTVAL (ops[1]) & 0x0ff00) == 0x0ff00))
+    {
+      ops[3] = gen_label_rtx ();
+
+      output_asm_insn (\"bcc\\t%l3\", ops);
+      output_asm_insn (\"inx\", ops);
+
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[3]));
+    }
+  CC_STATUS_INIT;
+  return \"\";  
+}")
+
+(define_insn "*addsi3"
+  [(set (match_operand:SI 0 "non_push_operand" "=o,D,!u,?D,D")
+	(plus:SI (match_operand:SI 1 "non_push_operand" "%0,0,0,0,0")
+		 (match_operand:SI 2 "general_operand" "ML,i,ML,?D,?oiu")))
+   (clobber (match_scratch:HI 3 "=d,X,d,X,X"))]
+  ""
+  "*
+{
+  rtx   ops[3];
+  const char* add_insn;
+  const char* inc_insn;
+  const char* incb_mem;
+  const char* inch_mem;
+  HOST_WIDE_INT val;
+
+  if (which_alternative > 2)
+    {
+      return \"#\";
+    }
+
+  val = INTVAL (operands[2]);
+  if ((val & 0x0ffffL) == 0)
+    {
+      if (!H_REG_P (operands[0]))
+	{
+	  ops[0] = m68hc11_gen_highpart (HImode, operands[0]);
+	  ops[1] = m68hc11_gen_highpart (HImode, operands[2]);
+	  output_asm_insn (\"ldd\\t%0\", ops);
+	  output_asm_insn (\"addd\\t%1\", ops);
+	  output_asm_insn (\"std\\t%0\", ops);
+	  return \"\";
+	}
+      else if (val == 1)
+	{
+	  return \"inx\";
+	}
+      else
+	{
+	  return \"#\";
+	}
+    }
+  if ((val & 0xffff0000L) != 0 && (val & 0xffff0000L) != 0xffff0000L)
+    {
+      return \"#\";
+    }
+
+  if (val >= 0)
+    {
+      ops[1]   = operands[2];
+      add_insn = \"addd\\t%1\";
+      inc_insn = \"inx\\t\";
+      incb_mem  = \"inc\\t%b1\";
+      inch_mem  = \"inc\\t%h1\";
+    }
+  else
+    {
+      ops[1] = GEN_INT (- val);
+      add_insn = \"subd\\t%1\";
+      inc_insn = \"dex\";
+      incb_mem  = \"dec\\t%b1\";
+      inch_mem  = \"dec\\t%h1\";
+    }
+      
+  ops[2] = gen_label_rtx ();
+  if (!H_REG_P (operands[0]))
+    {
+      ops[0] = m68hc11_gen_lowpart (HImode, operands[0]);
+      output_asm_insn (\"ldd\\t%0\", ops);
+    }
+  output_asm_insn (add_insn, ops);
+  if (!H_REG_P (operands[0]))
+    {
+      output_asm_insn (\"std\\t%0\", ops);
+    }
+  output_asm_insn (\"bcc\\t%l2\", ops);
+  if (H_REG_P (operands[0]))
+    {
+      output_asm_insn (inc_insn, ops);
+    }
+  else
+    {
+      ops[0] = m68hc11_gen_highpart (HImode, operands[0]);
+      ops[1] = ops[0];
+      if (INTVAL (operands[2]) < 0)
+	{
+	  output_asm_insn (\"ldd\\t%1\", ops);
+	  output_asm_insn (\"addd\\t#-1\", ops);
+	  output_asm_insn (\"std\\t%1\", ops);
+	}
+      else
+	{
+          output_asm_insn (incb_mem, ops);
+          output_asm_insn (\"bne\\t%l2\", ops);
+          output_asm_insn (inch_mem, ops);
+	}
+    }
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[2]));
+
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (match_scratch:HI 3 ""))]
+  "reload_completed && z_replacement_completed == 2
+   && ((INTVAL (operands[2]) & 0x0FFFF) == 0)"
+  [(set (match_dup 5) (match_dup 6))
+   (set (reg:HI 0) (plus:HI (reg:HI 0) (match_dup 4)))
+   (set (match_dup 6) (match_dup 5))]
+  "operands[4] = m68hc11_gen_highpart (HImode, operands[2]);
+   if (X_REG_P (operands[0]))
+     {
+       operands[5] = operands[6] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+     }
+   else
+     {
+       operands[6] = m68hc11_gen_highpart (HImode, operands[1]);
+       operands[5] = operands[3];
+     }
+   ")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))
+   (clobber (match_scratch:HI 3 "=X"))]
+  "reload_completed && z_replacement_completed == 2
+   && (GET_CODE (operands[2]) != CONST_INT || 
+        (!(INTVAL (operands[2]) >= -65536 && INTVAL (operands[2]) <= 65535)))"
+  [(set (reg:HI D_REGNUM) (plus:HI (reg:HI D_REGNUM) (match_dup 3)))
+   (parallel [(set (reg:HI D_REGNUM) (reg:HI X_REGNUM))
+              (set (reg:HI X_REGNUM) (reg:HI D_REGNUM))])
+   (set (reg:QI B_REGNUM) (plus:QI (plus:QI (reg:QI CC_REGNUM) (reg:QI B_REGNUM)) (match_dup 4)))
+   (set (reg:QI A_REGNUM) (plus:QI (plus:QI (reg:QI CC_REGNUM) (reg:QI A_REGNUM)) (match_dup 5)))
+   (parallel [(set (reg:HI D_REGNUM) (reg:HI X_REGNUM))
+              (set (reg:HI X_REGNUM) (reg:HI D_REGNUM))])]
+  "operands[3] = m68hc11_gen_lowpart (HImode, operands[2]);
+   operands[4] = m68hc11_gen_highpart (HImode, operands[2]);
+   operands[5] = m68hc11_gen_highpart (QImode, operands[4]);
+   operands[4] = m68hc11_gen_lowpart (QImode, operands[4]);")
+
+;;
+;; Instruction generated to propagate the carry of a 16-bit add
+;; to the upper 16-bit part (in register X).
+;;
+(define_insn "*addsi_carry"
+  [(set (match_operand:HI 0 "register_operand" "=x")
+           (plus:HI (plus:HI (match_operand:HI 1 "register_operand" "0")
+		             (const_int 0)) 
+		    (reg:HI CC_REGNUM)))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = gen_label_rtx ();
+  output_asm_insn (\"bcc\\t%l0\", ops);
+  output_asm_insn (\"in%0\", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[0]));
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+;;
+;; - 16-bit Add.
+;;
+(define_expand "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	   (plus:HI (match_operand:HI 1 "register_operand" "")
+		    (match_operand:HI 2 "general_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_M6811 && SP_REG_P (operands[0]))
+    {
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
+			 gen_rtx_SET (VOIDmode,
+				  operand0,
+				  gen_rtx_PLUS (HImode,
+					   operand1, operand2)),
+			gen_rtx_CLOBBER (VOIDmode,
+				gen_rtx_SCRATCH (HImode)))));
+      DONE;
+    }
+}")
+
+(define_insn "*addhi3_68hc12"
+  [(set (match_operand:HI 0 "register_operand" "=d*A,d,xy*A*w,xy*A*w,xy*A")
+        (plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,xy*Aw,0")
+                 (match_operand:HI 2 "general_operand" "i,m*A*wu,id,id,!mu*A")))]
+  "TARGET_M6812"
+  "*
+{
+  int val;
+  const char* insn_code;
+
+  if (which_alternative >= 4)
+    {
+      if (A_REG_P (operands[2]))
+        {
+	  CC_STATUS_INIT;
+	  output_asm_insn (\"xgd%2\", operands);
+	  output_asm_insn (\"lea%0 d,%0\", operands);
+	  return \"xgd%2\";
+	}
+      return \"#\";
+    }
+
+  if (D_REG_P (operands[0]))
+    {
+      if (X_REG_P (operands[2]))
+	{
+	  m68hc11_notice_keep_cc (operands[0]);
+	  output_asm_insn (\"xgdx\", operands);
+	  output_asm_insn (\"leax\\td,%2\", operands);
+	  return \"xgdx\";
+	}
+      else if (Y_REG_P (operands[2]))
+	{
+	  m68hc11_notice_keep_cc (operands[0]);
+	  output_asm_insn (\"xgdy\", operands);
+	  output_asm_insn (\"leay\\td,%2\", operands);
+	  return \"xgdy\";
+	}
+      else if (SP_REG_P (operands[2]))
+	{
+	  output_asm_insn (\"sts\\t%t0\", operands);
+	  return \"addd\\t%t0\";
+	}
+      return \"addd\\t%2\";
+    }
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    val = INTVAL (operands[2]);
+  else
+    val = 1000;
+
+  if ((val != -1 && val != 1) || !rtx_equal_p (operands[0], operands[1]))
+    {
+      m68hc11_notice_keep_cc (operands[0]);
+      switch (REGNO (operands[0]))
+	{
+	case HARD_X_REGNUM:
+	  return \"leax\\t%i2,%1\";
+
+	case HARD_Y_REGNUM:
+	  return \"leay\\t%i2,%1\";
+
+	case HARD_SP_REGNUM:
+	  return \"leas\\t%i2,%1\";
+
+	default:
+	  fatal_insn (\"Invalid operands in the instruction\", insn);
+	}
+    }
+  if (val > 0)
+    {
+      insn_code = X_REG_P (operands[0]) ? \"inx\"
+		: Y_REG_P (operands[0]) ? \"iny\" : \"ins\";
+    }
+  else
+    {
+      val  = -val;
+      insn_code = X_REG_P (operands[0]) ? \"dex\"
+		: Y_REG_P (operands[0]) ? \"dey\" : \"des\";
+    }
+
+  /* For X and Y increment, the flags are not complete. Only the Z flag
+     is updated. For SP increment, flags are not changed.  */
+  if (SP_REG_P (operands[0]))
+    {
+      cc_status = cc_prev_status; 
+      if (INTVAL (operands[2]) < 0)
+	{
+	  while (val > 2)
+	    {
+	      output_asm_insn (\"pshx\", operands);
+	      val -= 2;
+	    }
+	  if (val == 0)
+	    return \"\";
+	}     
+    }
+  else
+    {
+      CC_STATUS_INIT;
+    }
+
+  while (val)
+    {
+      output_asm_insn (insn_code, operands);
+      val--;
+    }
+  return \"\";
+}")
+
+;;
+;; Specific pattern to add to the stack pointer.
+;; We also take care of the clobbering of the IY register.
+;;
+(define_insn "addhi_sp"
+  [(set (match_operand:HI 0 "stack_register_operand" "=w,w,w,w")
+	  (plus:HI (match_operand:HI 1 "stack_register_operand" "%0,0,0,0")
+		   (match_operand:HI 2 "general_operand" "P,im,u,im")))
+   (clobber (match_scratch:HI 3 "=X,&y,&y,!&x"))]
+  "!TARGET_M6812"
+  "*
+{
+  HOST_WIDE_INT val;
+
+  if (optimize && Y_REG_P (operands[3])
+      && dead_register_here (insn, gen_rtx_REG (HImode, HARD_X_REGNUM)))
+    operands[3] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (val = INTVAL (operands[2])) != 0
+      && (CONST_OK_FOR_LETTER_P (val, 'P')
+	  || (val > 0 && val <= 8)))
+    {
+      while (val > 1 || val < -1)
+	{
+	  if (val > 0)
+	    {
+	      if (!H_REG_P (operands[3]))
+		break;
+
+	      output_asm_insn (\"pul%3\", operands);
+	      val -= 2;
+	    }
+	  else
+	    {
+	      output_asm_insn (\"pshx\", operands);
+	      val += 2;
+	    }
+	}
+      while (val != 0)
+	{
+	  if (val > 0)
+	    {
+	      output_asm_insn (\"ins\", operands);
+	      val--;
+	    }
+	  else
+	    {
+	      output_asm_insn (\"des\", operands);
+	      val++;
+	    }
+	}
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+
+  /* Need to transfer to SP to X/Y and then to D register.
+     Register X/Y is lost, this is specified by the (clobber) statement.  */
+  output_asm_insn (\"ts%3\", operands);
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ((val = INTVAL (operands[2])) >= 0 && val < 0x100)
+      && dead_register_here (insn, gen_rtx_REG (HImode, HARD_D_REGNUM)))
+    {
+      output_asm_insn (\"ldab\\t%2\", operands);
+      output_asm_insn (\"ab%3\", operands);
+      CC_STATUS_INIT;
+    }
+  else
+    {
+      output_asm_insn (\"xgd%3\", operands);
+      output_asm_insn (\"addd\\t%2\", operands);
+      output_asm_insn (\"xgd%3\", operands);
+    }
+
+   /* The status flags correspond to the addd.  xgdy and tys do not
+      modify the flags.  */
+  return \"t%3s\";
+}")
+
+(define_insn "*addhi3"
+  [(set (match_operand:HI 0 "hard_reg_operand" "=A,dA,d,!A,d*A,d,!d*A")
+	(plus:HI (match_operand:HI 1 "general_operand" "%0,0,0,0,0,0,0")
+		 (match_operand:HI 2 "general_operand" "N,I,i,I,mi*A*d,*u,!u*d*w")))]
+  "TARGET_M6811"
+  "*
+{
+  const char* insn_code;
+  int val;
+
+  if (D_REG_P (operands[0]) && SP_REG_P (operands[2]))
+    {
+      output_asm_insn (\"sts\\t%t0\", operands);
+      output_asm_insn (\"addd\\t%t0\", operands);
+      return \"addd\\t#1\";
+    }
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      /* Adding to an address register or with another/same register
+         is not possible. This must be replaced.  */
+      if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+        return \"#\";
+
+      return \"addd\\t%2\";
+    }
+  val = INTVAL (operands[2]);
+  if (!SP_REG_P (operands[0]))
+    {
+      if (D_REG_P (operands[0]))
+	{
+	  if ((val & 0x0ff) == 0 && !next_insn_test_reg (insn, operands[0]))
+	    {
+	      CC_STATUS_INIT;
+	      return \"adda\\t%h2\";
+	    }
+	  else
+	    {
+	      return \"addd\\t%2\";
+	    }
+	}
+      else if (GET_CODE (operands[2]) != CONST_INT
+	       || INTVAL (operands[2]) < -4
+	       || INTVAL (operands[2]) > 4)
+        return \"#\";
+    }
+  if (val > 0)
+    {
+      insn_code = X_REG_P (operands[0]) ? \"inx\"
+		    : Y_REG_P (operands[0]) ? \"iny\" : \"ins\";
+    }
+  else
+    {
+      val  = -val;
+      insn_code = X_REG_P (operands[0]) ? \"dex\"
+		    : Y_REG_P (operands[0]) ? \"dey\" : \"des\";
+    }
+
+  /* For X and Y increment, the flags are not complete.  Only the Z flag
+     is updated.  For SP increment, flags are not changed.  */
+  if (SP_REG_P (operands[0]))
+    {
+      cc_status = cc_prev_status; 
+      if (INTVAL (operands[2]) < 0)
+	{
+	  while (val >= 2)
+	    {
+	      output_asm_insn (\"pshx\", operands);
+	      val -= 2;
+	    }
+	}
+      else if (optimize && dead_register_here (insn, ix_reg))
+	{
+	  while (val >= 2)
+	    {
+	      output_asm_insn (\"pulx\", operands);
+	      val -= 2;
+	    }
+	}
+    }
+  else
+    {
+      CC_STATUS_INIT;
+    }
+
+  while (val)
+    {
+      output_asm_insn (insn_code, operands);
+      val--;
+    }
+  return \"\";
+}")
+
+(define_insn "*addhi3_zext"
+  [(set (match_operand:HI 0 "hard_reg_operand" "=A,d")
+	(plus:HI (zero_extend:HI 
+		     (match_operand:QI 1 "nonimmediate_operand" "d,um*A"))
+		 (match_operand:HI 2 "general_operand" "0,0")))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  if (A_REG_P (operands[0]))
+    return \"ab%0\";
+  else if (A_REG_P (operands[1]))
+    return \"st%1\\t%t0\\n\\taddb\\t%T0\\n\\tadca\\t#0\";
+  else 
+    return \"addb\\t%b1\\n\\tadca\\t#0\";
+}")
+
+;;
+;; Translate d = d + d into d = << 1
+;; We have to do this because adding a register to itself is not possible.
+;; ??? It's not clear whether this is really necessary.
+;;
+(define_split
+  [(set (match_operand:QI 0 "hard_reg_operand" "")
+	(plus:QI (match_dup 0)
+		 (match_dup 0)))]
+  "0 && reload_completed"
+  [(set (match_dup 0) (ashift:QI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_insn "addqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=!d*rm,dq,!*A")
+        (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+                 (match_operand:QI 2 "general_operand" "N,ium*A*d,ium*A*d")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) == 1)
+	{
+	  if (DA_REG_P (operands[0]))
+	    {
+	      return \"inca\";
+	    }
+	  else if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+	   {
+	     return \"incb\";
+
+	   }
+	  else if (A_REG_P (operands[0]))
+	   {
+	     /* This applies on the 16-bit register.  This should be ok since
+	        this is not a strict_low_part increment.  */
+	     return \"in%0\";
+	   }
+	  else
+	   {
+	     return \"inc\\t%b0\";
+	   }
+	}
+      else if (INTVAL (operands[2]) == -1)
+	{
+	  if (DA_REG_P (operands[0]))
+	    {
+	      return \"deca\";
+	    }
+	  else if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+	    {
+	      return \"decb\";
+	    }
+	  else if (A_REG_P (operands[0]))
+	    {
+	     /* This applies on the 16-bit register.  This should be ok since
+	        this is not a strict_low_part decrement.  */
+	      return \"de%0\";
+	    }
+	  else
+	    {
+	      return \"dec\\t%b0\";
+	    }
+	}
+    }
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+  else if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"addb\\t%b2\";
+  else
+    return \"adda\\t%b2\";
+}")
+
+;;
+;; add with carry is used for 32-bit add.
+;;
+(define_insn "*adcq"
+  [(set (match_operand:QI 0 "register_operand" "=q")
+        (plus:QI (plus:QI (reg:QI CC_REGNUM)
+                          (match_operand:QI 1 "register_operand" "%0"))
+                 (match_operand:QI 2 "general_operand" "ium")))]
+  ""
+  "adc%0\\t%b2")
+
+;;--------------------------------------------------------------------
+;;- Subtract instructions.
+;;--------------------------------------------------------------------
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		  (match_operand:DI 2 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"___subdi3\", MINUS, DImode, DImode, 3, operands);
+   DONE;")
+
+;;
+;; 32-bit Subtract (see addsi3)
+;; Subtract with a constant are handled by addsi3.
+;;
+;;
+;; - 32-bit Add.
+;;
+(define_expand "subsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+	             (minus:SI (match_operand:SI 1 "register_operand" "")
+		              (match_operand:SI 2 "general_operand" "")))
+              (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "")
+
+(define_insn "*subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,D,D,!u")
+	(minus:SI (match_operand:SI 1 "general_operand" "0,oi,0,!u,0")
+		  (match_operand:SI 2 "general_operand" "oi,D,!u,D,!oui")))
+   (clobber (match_scratch:HI 3 "=X,X,X,X,d"))]
+  ""
+  "#")
+
+(define_insn "*subsi3_zero_extendhi"
+  [(set (match_operand:SI 0 "register_operand" "=D")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+	    (zero_extend:SI (match_operand:HI 2 "general_operand" "dmui*A"))))
+   (clobber (match_scratch:HI 3 "=X"))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  if (A_REG_P (operands[2]))
+    {
+      if (TARGET_M6812)
+        ops[0] = gen_rtx_MEM (HImode,
+			  gen_rtx_PRE_DEC (HImode,
+				   gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+      else
+        ops[0] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+
+      ops[1] = operands[2];
+      m68hc11_gen_movhi (insn, ops);
+      if (TARGET_M6812)
+        operands[2] = gen_rtx_MEM (HImode,
+			       gen_rtx_POST_INC (HImode,
+				        gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+      else
+        operands[2] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+    }
+  ops[0] = gen_label_rtx (); 
+  output_asm_insn (\"subd\\t%2\", operands);
+  output_asm_insn (\"bcc\\t%l0\", ops);
+  output_asm_insn (\"dex\", ops);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[0]));
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+(define_insn "*subsi3_zero_extendqi"
+  [(set (match_operand:SI 0 "register_operand" "=D")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+	    (zero_extend:SI (match_operand:QI 2 "general_operand" "dmui*A"))))
+   (clobber (match_scratch:HI 3 "=X"))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  if (A_REG_P (operands[2]))
+    {
+      ops[0] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+      ops[1] = operands[2];
+      m68hc11_gen_movhi (insn, ops);
+      operands[2] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+    }
+  ops[0] = gen_label_rtx (); 
+  output_asm_insn (\"subb\\t%b2\", operands);
+  output_asm_insn (\"sbca\\t#0\", operands);
+  output_asm_insn (\"bcc\\t%l0\", ops);
+  output_asm_insn (\"dex\", ops);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[0]));
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+;;
+;; reg:HI 1 -> d	reg:QI 6 -> B
+;; reg:QI 7 -> ccr      reg:QI 5 -> A
+;;
+(define_split /* "*subsi3" */
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "register_operand" "")
+		  (match_operand:SI 2 "general_operand" "")))
+   (clobber (match_scratch:HI 3 "=X"))]
+  "reload_completed && z_replacement_completed == 2
+   && X_REG_P (operands[1])"
+  [(set (reg:HI D_REGNUM) (minus:HI (reg:HI D_REGNUM) (match_dup 3)))
+   (parallel [(set (reg:HI X_REGNUM) (reg:HI D_REGNUM))
+              (set (reg:HI D_REGNUM) (reg:HI X_REGNUM))])
+   (set (reg:QI B_REGNUM) (minus:QI (minus:QI (reg:QI CC_REGNUM) (reg:QI B_REGNUM)) (match_dup 4)))
+   (set (reg:QI A_REGNUM) (minus:QI (minus:QI (reg:QI CC_REGNUM) (reg:QI A_REGNUM)) (match_dup 5)))
+   (parallel [(set (reg:HI X_REGNUM) (reg:HI D_REGNUM))
+              (set (reg:HI D_REGNUM) (reg:HI X_REGNUM))])]
+  "operands[3] = m68hc11_gen_lowpart (HImode, operands[2]);
+   operands[4] = m68hc11_gen_highpart (HImode, operands[2]);
+   operands[5] = m68hc11_gen_highpart (QImode, operands[4]);
+   operands[4] = m68hc11_gen_lowpart (QImode, operands[4]);")
+
+(define_split /* "*subsi3" */
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "general_operand" "")
+		  (match_operand:SI 2 "register_operand" "")))
+   (clobber (match_scratch:HI 3 "=X"))]
+  "reload_completed && z_replacement_completed == 2
+   && X_REG_P (operands[2])"
+  [(set (reg:HI D_REGNUM) (minus:HI (reg:HI D_REGNUM) (match_dup 3)))
+   (parallel [(set (reg:HI X_REGNUM) (reg:HI D_REGNUM))
+              (set (reg:HI D_REGNUM) (reg:HI X_REGNUM))])
+   (set (reg:QI B_REGNUM) (minus:QI (minus:QI (reg:QI CC_REGNUM) (reg:QI B_REGNUM)) (match_dup 4)))
+   (set (reg:QI A_REGNUM) (minus:QI (minus:QI (reg:QI CC_REGNUM) (reg:QI A_REGNUM)) (match_dup 5)))
+   (parallel [(set (reg:HI X_REGNUM) (reg:HI D_REGNUM))
+              (set (reg:HI D_REGNUM) (reg:HI X_REGNUM))])
+   (set (reg:SI 0) (neg:SI (reg:SI 0)))]
+  "operands[3] = m68hc11_gen_lowpart (HImode, operands[1]);
+   operands[4] = m68hc11_gen_highpart (HImode, operands[1]);
+   operands[5] = m68hc11_gen_highpart (QImode, operands[4]);
+   operands[4] = m68hc11_gen_lowpart (QImode, operands[4]);")
+
+(define_split /* "*subsi3" */
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(minus:SI (match_operand:SI 1 "general_operand" "")
+		  (match_operand:SI 2 "general_operand" "")))
+   (clobber (match_scratch:HI 3 "=d"))]
+  "reload_completed && z_replacement_completed == 2
+   && !X_REG_P (operands[0])"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 3) (minus:HI (match_dup 3) (match_dup 5)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 3) (match_dup 6))
+   (set (reg:QI 6) (minus:QI (minus:QI (reg:QI 7) (reg:QI 6)) (match_dup 7)))
+   (set (reg:QI 5) (minus:QI (minus:QI (reg:QI 7) (reg:QI 5)) (match_dup 8)))
+   (set (match_dup 6) (match_dup 3))]
+  "operands[4] = m68hc11_gen_lowpart (HImode, operands[1]);
+   operands[5] = m68hc11_gen_lowpart (HImode, operands[2]);
+   operands[6] = m68hc11_gen_highpart (HImode, operands[1]);
+   operands[7] = m68hc11_gen_highpart (HImode, operands[2]);
+   operands[8] = m68hc11_gen_highpart (QImode, operands[7]);
+   operands[7] = m68hc11_gen_lowpart (QImode, operands[7]);")
+
+;;
+;; - 16-bit Subtract.
+;;
+(define_expand "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0")
+		  (match_operand:HI 2 "general_operand" "g")))]
+  ""
+  "")
+
+;;
+;; Subtract from stack. This is better if we provide a pattern.
+;;
+(define_insn "*subhi3_sp"
+  [(set (match_operand:HI 0 "stack_register_operand" "=w,w")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0")
+		  (match_operand:HI 2 "general_operand" "im*d,!u*A")))
+   (clobber (match_scratch:HI 3 "=A*d,A*d"))]
+  ""
+  "*
+{
+  if (X_REG_P (operands[2]))
+    {
+      operands[2] = m68hc11_soft_tmp_reg;
+      output_asm_insn (\"stx\\t%2\", operands);
+    }
+  else if (Y_REG_P (operands[2]))
+    {
+      operands[2] = m68hc11_soft_tmp_reg;
+      output_asm_insn (\"sty\\t%2\", operands);
+    }
+  else if (D_REG_P (operands[2]))
+    {
+      operands[2] = m68hc11_soft_tmp_reg;
+      output_asm_insn (\"std\\t%2\", operands);
+    }
+
+  if (D_REG_P (operands[3]))
+    {
+      int save_x;
+
+      save_x = !dead_register_here (insn, ix_reg);
+      if (save_x)
+	output_asm_insn (\"xgdx\", operands);
+      output_asm_insn (\"tsx\", operands);
+      output_asm_insn (\"xgdx\", operands);
+      output_asm_insn (\"subd\\t%2\", operands);
+      output_asm_insn (\"xgdx\", operands);
+
+      /* The status flags correspond to the addd. xgdx/y and tx/ys do not
+         modify the flags.  */
+      output_asm_insn (\"txs\", operands);
+      if (save_x)
+        return \"xgdx\";
+      else
+        return \"\";
+    }
+
+  /* Need to transfer to SP to X,Y and then to D register.
+     Register X,Y is lost, this is specified by the (clobber) statement.  */
+  output_asm_insn (\"ts%3\", operands);
+  output_asm_insn (\"xgd%3\", operands);
+  output_asm_insn (\"subd\\t%2\", operands);
+  output_asm_insn (\"xgd%3\", operands);
+
+   /* The status flags correspond to the addd. xgdx/y and tx/ys do not
+      modify the flags.  */
+  return \"t%3s\";
+}")
+
+
+(define_insn "*subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*A,d,*A")
+	(minus:HI (match_operand:HI 1 "general_operand" "0,0,0,0")
+		  (match_operand:HI 2 "general_operand" "im*A*d,im*d*A,u,!u")))]
+  ""
+  "*
+{
+  /* Adding to an address register or with another/same register
+     is not possible.  This must be replaced.  */
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  return \"subd\\t%2\";
+}")
+
+(define_insn "*subhi3_zext"
+  [(set (match_operand:HI 0 "hard_reg_operand" "=d,d")
+	(minus:HI (match_operand:HI 1 "general_operand" "0,0")
+           (zero_extend:HI (match_operand:QI 2 "general_operand" "mi*A,!u"))))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  if (A_REG_P (operands[2]))
+    {
+      rtx ops[2];
+
+      ops[0] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+      ops[1] = operands[2];
+      m68hc11_gen_movqi (insn, ops);
+      return \"subb\\t%T0\\n\\tsbca\\t#0\";
+    }
+  return \"subb\\t%b2\\n\\tsbca\\t#0\";
+}")
+
+(define_insn "subqi3"
+  [(set (match_operand:QI 0 "hard_reg_operand" "=dq,!*x*y")
+        (minus:QI (match_operand:QI 1 "general_operand" "0,0")
+                  (match_operand:QI 2 "general_operand" "uim*A*d,uim*A*d")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+  else if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"subb\\t%b2\";
+  else
+    return \"suba\\t%b2\";
+}")
+
+;;
+;; subtract with carry is used for 32-bit subtract.
+;;
+(define_insn "*subcq"
+  [(set (match_operand:QI 0 "register_operand" "=q")
+        (minus:QI (minus:QI (reg:QI CC_REGNUM)
+                            (match_operand:QI 1 "register_operand" "0"))
+                  (match_operand:QI 2 "general_operand" "ium")))]
+  ""
+  "sbc%0\\t%b2")
+
+;;--------------------------------------------------------------------
+;;- Multiply instructions.
+;;--------------------------------------------------------------------
+;;
+;; 32 and 64-bit multiply are handled by the library
+;;
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(mult:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"__mulsi3\", MULT, SImode, SImode, 3, operands);
+   DONE;")
+
+(define_expand "mulhi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		       (mult:HI (match_operand:HI 1 "register_operand" "")
+			        (match_operand:HI 2 "register_operand" "")))
+              (clobber (match_scratch:HI 3 ""))])]
+  ""
+  "")
+
+(define_insn "mulhi3_m68hc11"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(mult:HI (match_operand:HI 1 "register_operand" "%0")
+		 (match_operand:HI 2 "register_operand" "x")))
+   (clobber (match_scratch:HI 3 "=X"))]
+  "TARGET_M6811"
+  "*
+{
+  CC_STATUS_INIT;
+  /* D * X -> D  (X and Y are preserved by this function call).  */
+  return \"jsr\\t___mulhi3\";
+}")
+
+(define_insn "mulhi3_m68hc12"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+	(mult:HI (match_operand:HI 1 "register_operand" "%0,0")
+		 (match_operand:HI 2 "register_operand" "y,x")))
+   (clobber (match_scratch:HI 3 "=2,2"))]
+  "TARGET_M6812"
+  "*
+{
+  CC_STATUS_INIT;
+  if (X_REG_P (operands[2]))
+    return \"exg\\tx,y\\n\\temul\\n\\texg\\tx,y\";
+  else
+    return \"emul\";
+}")
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,D")
+        (mult:SI (zero_extend:SI
+		     (match_operand:HI 1 "register_operand" "%d,d"))
+		 (zero_extend:SI
+	             (match_operand:HI 2 "register_operand" "y,x"))))
+   (clobber (match_scratch:HI 3 "=2,X"))]
+  "TARGET_M6812"
+  "*
+{
+  if (X_REG_P (operands [2]))
+    output_asm_insn (\"exg\\tx,y\", operands);
+
+  /* Can't use the carry after that; other flags are ok when testing
+     the 32-bit result.  */
+  cc_status.flags |= CC_NO_OVERFLOW;
+  return \"emul\\n\\texg\\tx,y\";
+}")
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,D")
+        (mult:SI (sign_extend:SI
+		     (match_operand:HI 1 "register_operand" "%d,d"))
+		 (sign_extend:SI
+	             (match_operand:HI 2 "register_operand" "y,x"))))
+   (clobber (match_scratch:HI 3 "=2,X"))]
+  "TARGET_M6812"
+  "*
+{
+  if (X_REG_P (operands [2]))
+    output_asm_insn (\"exg\\tx,y\", operands);
+
+  /* Can't use the carry after that; other flags are ok when testing
+     the 32-bit result.  */
+  cc_status.flags |= CC_NO_OVERFLOW;
+  return \"emuls\\n\\texg\\tx,y\";
+}")
+
+(define_insn "umulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (mult:HI (zero_extend:HI
+		     (match_operand:QI 1 "nonimmediate_operand" "dm*u"))
+		 (zero_extend:HI
+	             (match_operand:QI 2 "nonimmediate_operand" "dm*u*A"))))]
+  ""
+  "*
+{
+  if (D_REG_P (operands[1]) && D_REG_P (operands[2]))
+    {
+      output_asm_insn (\"tba\", operands);
+    }
+  else
+    {
+      rtx ops[2];
+
+      if (D_REG_P (operands[2]))
+	{
+	  rtx temp = operands[2];
+	  operands[2] = operands[1];
+	  operands[1] = temp;
+	}
+
+      ops[0] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+      ops[1] = operands[2];
+      m68hc11_gen_movqi (insn, ops);
+
+      if (!D_REG_P (operands[1]))
+	{
+	  output_asm_insn (\"ldab\\t%b1\", operands);
+	}
+    }
+
+  CC_STATUS_INIT;
+  return \"mul\";
+}")
+
+(define_insn "mulqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,*x,*y")
+        (mult:QI (match_operand:QI 1 "general_operand" "%di*um,0,0")
+		 (match_operand:QI 2 "general_operand" "di*um,*xium,*yium")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  if (D_REG_P (operands[1]) && D_REG_P (operands[2]))
+    {
+      output_asm_insn (\"tba\", operands);
+    }
+  else
+    {
+      if (D_REG_P (operands[2]))
+	{
+	  rtx temp = operands[2];
+	  operands[2] = operands[1];
+	  operands[1] = temp;
+	}
+	
+      output_asm_insn (\"ldaa\\t%b2\", operands);
+
+      if (!D_REG_P (operands[1]))
+	{
+	  output_asm_insn (\"ldab\\t%b1\", operands);
+	}
+    }
+
+  CC_STATUS_INIT;
+  return \"mul\";
+}")
+
+(define_split
+  [(set (match_operand:QI 0 "hard_addr_reg_operand" "")
+        (mult:QI (match_operand:QI 1 "general_operand" "")
+		 (match_operand:QI 2 "general_operand" "")))]
+  "z_replacement_completed == 2"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+	      (set (match_dup 3) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (mult:QI (match_dup 5) (match_dup 6)))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+              (set (match_dup 3) (reg:HI D_REGNUM))])]
+  "
+   operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+   if (A_REG_P (operands[1]))
+     operands[5] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+   else
+     operands[5] = operands[1];
+   if (A_REG_P (operands[2]))
+     operands[6] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+   else
+     operands[6] = operands[2];
+  ")
+
+(define_insn "mulqihi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+        (mult:HI (sign_extend:HI
+			(match_operand:QI 1 "register_operand" "%0,0,0"))
+		 (sign_extend:HI
+                        (match_operand:QI 2 "general_operand" "mi*u,*A,0"))))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+
+  /* Special case when multiplying the register with itself.  */
+  if (D_REG_P (operands[2]))
+    {
+      output_asm_insn (\"tba\", operands);
+      return \"mul\";
+    }
+
+  if (!H_REG_P (operands[2]))
+    {
+      output_asm_insn (\"ldaa\\t%b2\", operands);
+    }
+  else
+    {
+      rtx ops[2];
+
+      ops[0] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+      ops[1] = operands[2];
+      m68hc11_gen_movqi (insn, ops);
+    }
+  return \"jsr\\t___mulqi3\";
+}")
+
+;;--------------------------------------------------------------------
+;;- Divide instructions.
+;;--------------------------------------------------------------------
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+          (div:HI (match_operand:HI 1 "register_operand" "0,0")
+	          (match_operand:HI 2 "general_operand" "A,ium")))
+   (set (match_operand:HI 3 "register_operand" "=&x,&x")
+	(mod:HI (match_dup 1) (match_dup 2)))]
+  ""
+  "*
+{
+  if (!X_REG_P (operands[2])) 
+    {
+      if (Y_REG_P (operands[2]))
+	{
+	  output_asm_insn (\"sty\\t%t1\", operands);
+	  output_asm_insn (\"ldx\\t%t1\", operands);
+	}
+      else
+	{
+          output_asm_insn (\"ldx\\t%2\", operands);
+	}
+    }
+  if (TARGET_M6812)
+    {
+      /* Flags are ok after that.  */
+      return \"idivs\\n\\txgdx\";      
+    }
+  else
+    {
+      CC_STATUS_INIT;
+      return \"bsr\\t__divmodhi4\";
+    }
+}")
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+          (udiv:HI (match_operand:HI 1 "register_operand" "0,0")
+	           (match_operand:HI 2 "general_operand" "A,ium")))
+   (set (match_operand:HI 3 "register_operand" "=x,x")
+	(umod:HI (match_dup 1) (match_dup 2)))]
+  ""
+  "*
+{
+  if (!X_REG_P (operands[2])) 
+    {
+      if (Y_REG_P (operands[2]))
+	{
+	  output_asm_insn (\"sty\\t%t1\", operands);
+	  output_asm_insn (\"ldx\\t%t1\", operands);
+	}
+      else
+	{
+          output_asm_insn (\"ldx\\t%2\", operands);
+	}
+    }
+
+  /* Z V and C flags are set but N is unchanged.
+     Since this is an unsigned divide, we can probably keep the flags
+     and indicate this.  */
+  cc_status.flags |= CC_NOT_NEGATIVE;
+  return \"idiv\\n\\txgdx\";
+}")
+
+;;--------------------------------------------------------------------
+;;- and instructions.
+;;--------------------------------------------------------------------
+
+(define_insn_and_split "anddi3"
+  [(set (match_operand:DI 0 "reg_or_some_mem_operand" "=m,u")
+	(and:DI (match_operand:DI 1 "reg_or_some_mem_operand" "%imu,imu")
+		(match_operand:DI 2 "general_operand" "imu,imu")))
+   (clobber (match_scratch:HI 3 "=d,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_logical (SImode, AND, operands);
+   DONE;")
+
+(define_insn_and_split "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,!u")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "Dimu,imu")))
+   (clobber (match_scratch:HI 3 "=X,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_logical (HImode, AND, operands);
+   DONE;")
+
+(define_expand "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "*andhi3_mem"
+  [(set (match_operand:HI 0 "memory_operand" "=R,Q")
+	(and:HI (match_dup 0)
+	        (match_operand:HI 1 "immediate_operand" "i,i")))
+   (clobber (match_scratch:HI 2 "=X,xy"))]
+  "TARGET_RELAX && !TARGET_M6812"
+  "*
+{
+  int val = INTVAL (operands[1]) & 0x0FFFF;
+
+  if (val == 0x0ffff)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+
+  CC_STATUS_INIT;
+
+  /* The bclr instruction uses an inverted mask.  */
+  operands[1] = GEN_INT ((~val) & 0x0FFFF);
+
+  /* When destination is a global variable, generate a .relax instruction
+     and load the address in the clobber register.  That load can be
+     eliminated by the linker if the address is in page0.  */
+  if (which_alternative == 1)
+    {
+      rtx ops[3];
+
+      ops[0] = operands[2];
+      ops[1] = XEXP (operands[0], 0);
+      ops[2] = gen_label_rtx ();
+      output_asm_insn (\".relax\\t%l2\", ops);
+      m68hc11_gen_movhi (insn, ops);
+      if ((val & 0x0FF) != 0x0FF)
+        output_asm_insn (\"bclr\\t1,%2, %b1\", operands);
+
+      if ((val & 0x0FF00) != 0x0FF00)
+        output_asm_insn (\"bclr\\t0,%2, %h1\", operands);
+
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[2]));
+      return \"\";
+    }
+
+  if ((val & 0x0FF) != 0x0FF)
+    output_asm_insn (\"bclr\\t%b0, %b1\", operands);
+
+  if ((val & 0x0FF00) != 0x0FF00)
+    output_asm_insn (\"bclr\\t%h0, %h1\", operands);
+
+  return \"\";
+}")
+
+(define_insn "*andhi3_const"
+  [(set (match_operand:HI 0 "reg_or_some_mem_operand" "=R,d,?*A")
+	(and:HI (match_operand:HI 1 "reg_or_some_mem_operand" "%0,0,0")
+	        (match_operand:HI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int val = INTVAL (operands[2]) & 0x0FFFF;
+  int lowpart_zero = 0;
+  int highpart_zero = 0;
+  int lowpart_unknown = 0;
+  int highpart_unknown = 0;
+
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (val == 0x0ffff)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+
+  /* First, try to clear the low and high part.
+     If that's possible, the second 'and' will give
+     the good status flags and we can avoid a tsthi.  */
+  if ((val & 0x0FF) == 0)
+    {
+      if (D_REG_P (operands[0]))
+	output_asm_insn (\"clrb\", operands);
+      else
+	output_asm_insn (\"clr\\t%b0\", operands);
+      lowpart_zero = 1;
+    }
+  if ((val & 0x0FF00) == 0)
+    {
+      if (D_REG_P (operands[0]))
+	output_asm_insn (\"clra\", operands);
+      else
+	output_asm_insn (\"clr\\t%h0\", operands);
+      highpart_zero = 1;
+    }
+
+  if ((val & 0x0FF) == 0x0FF)
+    {
+      lowpart_unknown = 1;
+    }
+  else if ((val & 0x0FF) != 0 && !H_REG_P (operands[0]))
+    {
+      rtx ops[2];
+
+      ops[0] = operands[0];
+      ops[1] = GEN_INT ((~val) & 0x0FF);
+      output_asm_insn (\"bclr\\t%b0, %1\", ops);
+    }
+  else if ((val & 0x0FF) != 0)
+    {
+      output_asm_insn (\"andb\\t%b2\", operands);
+    }
+
+  if ((val & 0x0FF00) == 0x0FF00)
+    {
+      highpart_unknown = 1;
+    }
+  else if (((val & 0x0FF00) != 0) && !H_REG_P (operands[0]))
+    {
+      rtx ops[2];
+
+      ops[0] = operands[0];
+      ops[1] = GEN_INT (((~val) & 0x0FF00) >> 8);
+      output_asm_insn (\"bclr\\t%h0, %1\", ops);
+    }
+  else if ((val & 0x0FF00) != 0)
+    {
+      output_asm_insn (\"anda\\t%h2\", operands);
+    }
+
+  if (highpart_unknown || lowpart_unknown)
+     CC_STATUS_INIT;
+  else if (highpart_zero == 0 && lowpart_zero == 0)
+     CC_STATUS_INIT;
+
+  return \"\";
+}")
+
+(define_insn "*andhi3_gen"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,!*A")
+	(and:HI (match_operand:HI 1 "splitable_operand" "%0,0,0")
+		(match_operand:HI 2 "splitable_operand" "mi,!u*A,!um*Ai")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  return \"anda\\t%h2\\n\\tandb\\t%b2\";
+}")
+
+(define_expand "andqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+        (and:QI (match_operand:QI 1 "register_operand" "")
+                (match_operand:QI 2 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "*andqi3_mem"
+  [(set (match_operand:QI 0 "memory_operand" "=R,Q")
+	(and:QI (match_dup 0)
+	        (match_operand:QI 1 "const_int_operand" "i,i")))
+   (clobber (match_scratch:HI 2 "=X,xy"))]
+  "TARGET_RELAX && !TARGET_M6812"
+  "*
+{
+  int val = INTVAL (operands[1]) & 0x0FF;
+
+  if (val == 0x0ff)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+
+  /* The bclr instruction uses an inverted mask.  */
+  operands[1] = GEN_INT ((~val) & 0x0FF);
+
+  /* When destination is a global variable, generate a .relax instruction
+     and load the address in the clobber register.  That load can be
+     eliminated by the linker if the address is in page0.  */
+  if (which_alternative == 1)
+    {
+      rtx ops[3];
+
+      ops[0] = operands[2];
+      ops[1] = XEXP (operands[0], 0);
+      ops[2] = gen_label_rtx ();
+      output_asm_insn (\".relax\\t%l2\", ops);
+      m68hc11_gen_movhi (insn, ops);
+      output_asm_insn (\"bclr\\t0,%2, %1\", operands);
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[2]));
+      return \"\";
+    }
+  return \"bclr\\t%b0, %1\";
+}")
+
+(define_insn "*andqi3_const"
+  [(set (match_operand:QI 0 "reg_or_some_mem_operand" "=R,d,?*A*q")
+	(and:QI (match_operand:QI 1 "reg_or_some_mem_operand" "%0,0,0")
+	        (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int val = INTVAL (operands[2]) & 0x0FF;
+
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (val == 0x0ff)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+  if (!H_REG_P (operands[0]))
+    {
+      rtx ops[2];
+
+      ops[0] = operands[0];
+      ops[1] = GEN_INT ((~val) & 0x0FF);
+      output_asm_insn (\"bclr\\t%b0, %b1\", ops);
+      return \"\";
+    }
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"andb\\t%b2\";
+  else if (DA_REG_P (operands[0]))
+    return \"anda\\t%b2\";
+  else
+    fatal_insn (\"Invalid operand in the instruction\", insn);
+}")
+
+(define_insn "*andqi3_gen"
+  [(set (match_operand:QI 0 "register_operand" "=d,d,d,?*A,?*A,!*q")
+        (and:QI (match_operand:QI 1 "general_operand" "%0,0,0,0,0,0")
+             (match_operand:QI 2 "general_operand" "mi,!*u,?*A,!*um,?*A*d,!*um*A")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"andb\\t%b2\";
+  else if (DA_REG_P (operands[0]))
+    return \"anda\\t%b2\";
+  else
+    fatal_insn (\"Invalid operand in the instruction\", insn);
+}")
+
+;;--------------------------------------------------------------------
+;;- Bit set or instructions.
+;;--------------------------------------------------------------------
+
+(define_insn_and_split "iordi3"
+  [(set (match_operand:DI 0 "reg_or_some_mem_operand" "=m,u")
+	(ior:DI (match_operand:DI 1 "reg_or_some_mem_operand" "%imu,imu")
+		(match_operand:DI 2 "general_operand" "imu,imu")))
+   (clobber (match_scratch:HI 3 "=d,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_logical (SImode, IOR, operands);
+   DONE;")
+
+(define_insn_and_split "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,!u")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "Dimu,imu")))
+   (clobber (match_scratch:HI 3 "=X,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_logical (HImode, IOR, operands);
+   DONE;")
+
+(define_expand "iorhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ior:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand:HI 2 "splitable_operand" "")))]
+  ""
+  "")
+
+(define_insn "*iorhi3_mem"
+  [(set (match_operand:HI 0 "memory_operand" "=R,Q")
+	(ior:HI (match_dup 0)
+	        (match_operand:HI 1 "const_int_operand" "")))
+   (clobber (match_scratch:HI 2 "=X,xy"))]
+  "TARGET_RELAX && !TARGET_M6812"
+  "*
+{
+  int val = INTVAL (operands[1]) & 0x0FFFF;
+
+  if (val == 0)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+  CC_STATUS_INIT;
+  if (which_alternative == 1)
+    {
+      rtx ops[3];
+
+      ops[0] = operands[2];
+      ops[1] = XEXP (operands[0], 0);
+      ops[2] = gen_label_rtx ();
+      output_asm_insn (\".relax\\t%l2\", ops);
+      m68hc11_gen_movhi (insn, ops);
+      if ((val & 0x0FF) != 0)
+        output_asm_insn (\"bset\\t1,%2, %b1\", operands);
+
+      if ((val & 0x0FF00) != 0)
+        output_asm_insn (\"bset\\t0,%2, %h1\", operands);
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[2]));
+      return \"\";
+    }
+
+  if ((val & 0x0FF) != 0)
+    output_asm_insn (\"bset\\t%b0, %b1\", operands);
+
+  if ((val & 0x0FF00) != 0)
+    output_asm_insn (\"bset\\t%h0, %h1\", operands);
+
+  return \"\";
+}")
+
+(define_insn "*iorhi3_const"
+  [(set (match_operand:HI 0 "reg_or_some_mem_operand" "=R,d,?*A")
+	(ior:HI (match_operand:HI 1 "reg_or_some_mem_operand" "%0,0,0")
+		(match_operand:HI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int val = INTVAL (operands[2]) & 0x0FFFF;
+
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (val == 0)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+
+  if ((val & 0x0FF) != 0)
+    {
+      if (!H_REG_P (operands[0]))
+        output_asm_insn (\"bset\\t%b0, %b2\", operands);
+      else
+	output_asm_insn (\"orab\\t%b2\", operands);
+    }
+
+  if ((val & 0x0FF00) != 0)
+    {
+      if (!H_REG_P (operands[0]))
+         output_asm_insn (\"bset\\t%h0, %h2\", operands);
+      else
+	 output_asm_insn (\"oraa\\t%h2\", operands);
+    }
+
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+(define_insn "*iorhi3_gen"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,!*A")
+	(ior:HI (match_operand:HI 1 "splitable_operand" "%0,0,0")
+		(match_operand:HI 2 "splitable_operand" "mi,!u*A,!um*Ai")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  return \"oraa\\t%h2\\n\\torab\\t%b2\";
+}")
+
+(define_expand "iorqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ior:QI (match_operand:QI 1 "register_operand" "")
+	        (match_operand:QI 2 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "*iorqi3_mem"
+  [(set (match_operand:QI 0 "memory_operand" "=R,Q")
+	(ior:QI (match_dup 0)
+	        (match_operand:QI 1 "const_int_operand" "")))
+   (clobber (match_scratch:HI 2 "=X,xy"))]
+  "TARGET_RELAX && !TARGET_M6812"
+  "*
+{
+  int val = INTVAL (operands[1]) & 0x0FF;
+
+  if (val == 0)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+  if (which_alternative == 1)
+    {
+      rtx ops[3];
+
+      ops[0] = operands[2];
+      ops[1] = XEXP (operands[0], 0);
+      ops[2] = gen_label_rtx ();
+      output_asm_insn (\".relax\\t%l2\", ops);
+      m68hc11_gen_movhi (insn, ops);
+      output_asm_insn (\"bset\\t0,%2, %1\", operands);
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[2]));
+      return \"\";
+    }
+  return \"bset\\t%b0, %1\";
+}")
+
+(define_insn "*iorqi3_const"
+  [(set (match_operand:QI 0 "reg_or_some_mem_operand" "=R,d,?*A*q")
+	(ior:QI (match_operand:QI 1 "reg_or_some_mem_operand" "%0,0,0")
+		(match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int val = INTVAL (operands[2]) & 0x0FF;
+
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (val == 0)
+    {
+      cc_status = cc_prev_status;
+      return \"\";
+    }
+  if (!H_REG_P (operands[0]))
+    {
+      return \"bset\\t%b0, %2\";
+    }
+
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"orab\\t%b2\";
+  else if (DA_REG_P (operands[0]))
+    return \"oraa\\t%b2\";
+  else
+    fatal_insn (\"Invalid operand in the instruction\", insn);
+}")
+
+(define_insn "*iorqi3_gen"
+  [(set (match_operand:QI 0 "register_operand" "=d,d,d,?*A,?*A,!*q")
+	(ior:QI (match_operand:QI 1 "general_operand" "%0,0,0,0,0,0")
+	     (match_operand:QI 2 "general_operand" "mi,!*u,!*A,!*um,?*A*d,!*um*A")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"orab\\t%b2\";
+  else if (DA_REG_P (operands[0]))
+    return \"oraa\\t%b2\";
+  else
+    fatal_insn (\"Invalid operand in the instruction\", insn);
+}")
+
+
+;;--------------------------------------------------------------------
+;;- xor instructions.
+;;--------------------------------------------------------------------
+
+(define_insn_and_split "xordi3"
+  [(set (match_operand:DI 0 "reg_or_some_mem_operand" "=m,u")
+	(xor:DI (match_operand:DI 1 "reg_or_some_mem_operand" "%imu,imu")
+		(match_operand:DI 2 "general_operand" "imu,imu")))
+   (clobber (match_scratch:HI 3 "=d,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_logical (SImode, XOR, operands);
+   DONE;")
+
+(define_insn_and_split "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,!u")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "Dimu,imu")))
+   (clobber (match_scratch:HI 3 "=X,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "m68hc11_split_logical (HImode, XOR, operands);
+   DONE;")
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,!*A")
+	(xor:HI (match_operand:HI 1 "splitable_operand" "%0,0,0")
+		(match_operand:HI 2 "splitable_operand" "im,!u*A,!ium*A")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int val = INTVAL (operands[2]) & 0x0FFFF;
+
+      if (val == 0)
+	{
+	  cc_status = cc_prev_status;
+	  return \"\";
+	}
+      if ((val & 0x0FF) != 0)
+	{
+	  output_asm_insn (\"eorb\\t%b2\", operands);
+	}
+      else if ((val & 0x0FF) == 0x0FF)
+	{
+	  output_asm_insn (\"comb\", operands);
+	}
+
+      if ((val & 0x0FF00) != 0)
+	{
+	  output_asm_insn (\"eora\\t%h2\", operands);
+	}
+      else if ((val & 0x0FF00) == 0x0FF00)
+	{
+	  output_asm_insn (\"coma\", operands);
+	}
+
+      CC_STATUS_INIT;
+      return \"\";
+    }
+
+  CC_STATUS_INIT;
+  return \"eora\\t%h2\\n\\teorb\\t%b2\";
+}")
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,d,d,?*A,?*A,!*q")
+        (xor:QI (match_operand:QI 1 "general_operand" "%0,0,0,0,0,0")
+             (match_operand:QI 2 "general_operand" "im,!*u,!*A,!i*um,?*A*d,!i*um*A")))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]) || H_REG_P (operands[2]))
+    return \"#\";
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int val = INTVAL (operands[2]) & 0x0FF;
+
+      if (val == 0)
+	{
+	  cc_status = cc_prev_status;
+	  return \"\";
+	}
+      if (val == 0x0FF)
+	{
+	  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+	    return \"comb\";
+	  else
+	    return \"coma\";
+	}
+    }
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    return \"eorb\\t%b2\";
+  else if (DA_REG_P (operands[0]))
+    return \"eora\\t%b2\";
+  else
+    fatal_insn (\"Invalid operand in the instruction\", insn);
+}")
+
+;;--------------------------------------------------------------------
+;;- Bit set or instructions.
+;;--------------------------------------------------------------------
+
+(define_insn_and_split "*logicalsi3_zexthi"
+  [(set (match_operand:SI 0 "register_operand" "=D")
+	(match_operator:SI 3 "m68hc11_logical_operator"
+		[(zero_extend:SI
+		     (match_operand:HI 1 "general_operand" "imudA"))
+		 (match_operand:SI 2 "general_operand" "Dimu")]))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:HI D_REGNUM) (match_dup 4))
+   (set (reg:HI D_REGNUM) (match_op_dup 3 [(reg:HI D_REGNUM) (match_dup 5)]))
+   (set (reg:HI X_REGNUM) (match_dup 6))]
+  "PUT_MODE (operands[3], HImode);
+   if (X_REG_P (operands[2]))
+     {
+       operands[5] = operands[1];
+       /* Make all the (set (REG:x) (REG:y)) a nop set.  */
+       operands[4] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+       operands[6] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+     }
+   else
+     {
+       operands[4] = operands[1];
+       operands[5] = m68hc11_gen_lowpart (HImode, operands[2]);
+       operands[6] = m68hc11_gen_highpart (HImode, operands[2]);
+     }
+   /* For an AND, make sure the high 16-bit part is cleared.  */
+   if (GET_CODE (operands[3]) == AND)
+     {
+       operands[6] = const0_rtx;
+     }
+   ")
+
+(define_insn_and_split "*logicalsi3_zextqi"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,D")
+	(match_operator:SI 3 "m68hc11_logical_operator"
+		[(zero_extend:SI
+		     (match_operand:QI 1 "general_operand" "d,*A,imu"))
+		 (match_operand:SI 2 "general_operand" "imu,imu,0")]))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (reg:QI A_REGNUM) (match_dup 4))
+   (set (reg:QI D_REGNUM) (match_dup 7))
+   (set (reg:QI B_REGNUM) (match_op_dup 3 [(reg:QI B_REGNUM) (match_dup 5)]))
+   (set (reg:HI X_REGNUM) (match_dup 6))]
+  "PUT_MODE (operands[3], QImode);
+   if (X_REG_P (operands[2]))
+     {
+       operands[5] = operands[1];
+       /* Make all the (set (REG:x) (REG:y)) a nop set.  */
+       operands[4] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+       operands[7] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+       operands[6] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+     }
+   else
+     {
+       operands[4] = m68hc11_gen_lowpart (HImode, operands[2]);
+       operands[7] = operands[1];
+       operands[5] = m68hc11_gen_lowpart (QImode, operands[4]);
+       operands[4] = m68hc11_gen_highpart (QImode, operands[4]);
+       operands[6] = m68hc11_gen_highpart (HImode, operands[2]);
+     }
+   /* For an AND, make sure the high 24-bit part is cleared.  */
+   if (GET_CODE (operands[3]) == AND)
+     {
+       operands[4] = const0_rtx;
+       operands[6] = const0_rtx;
+     }
+   ")
+
+(define_insn_and_split "*logicalhi3_zexthi_ashift8"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(match_operator:HI 3 "m68hc11_logical_operator"
+		[(zero_extend:HI
+		     (match_operand:QI 1 "general_operand" "imud*A"))
+		 (ashift:HI
+		     (match_operand:HI 2 "general_operand" "imud*A")
+		     (const_int 8))]))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (reg:QI A_REGNUM) (match_dup 4))
+   (set (reg:QI B_REGNUM) (match_dup 5))]
+  "
+   if (GET_CODE (operands[3]) == AND)
+     {
+       emit_insn (gen_movhi (operands[0], const0_rtx));
+       DONE;
+     }
+   else
+     {
+       operands[5] = operands[1];
+       if (D_REG_P (operands[2]))
+         {
+           operands[4] = gen_rtx_REG (QImode, HARD_B_REGNUM);
+         }
+       else
+         {
+           operands[4] = m68hc11_gen_lowpart (QImode, operands[2]);
+         }
+     }
+  ")
+
+(define_insn_and_split "*logicalhi3_zexthi"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+	(match_operator:HI 3 "m68hc11_logical_operator"
+		[(zero_extend:HI
+		     (match_operand:QI 1 "general_operand" "imd*A,?u"))
+		 (match_operand:HI 2 "general_operand" "dim,?dimu")]))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (reg:QI B_REGNUM) (match_dup 6))
+   (set (reg:QI A_REGNUM) (match_dup 4))
+   (set (reg:QI B_REGNUM) (match_op_dup 3 [(reg:QI B_REGNUM) (match_dup 5)]))]
+  "
+   PUT_MODE (operands[3], QImode);
+   if (D_REG_P (operands[2]))
+     {
+       operands[4] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+       operands[5] = operands[1];
+       operands[6] = gen_rtx_REG (QImode, HARD_B_REGNUM);
+     }
+   else
+     {
+       operands[4] = m68hc11_gen_highpart (QImode, operands[2]);
+       operands[5] = m68hc11_gen_lowpart (QImode, operands[2]);
+       if (D_REG_P (operands[1]))
+	 operands[6] = gen_rtx_REG (QImode, HARD_B_REGNUM);
+       else
+         operands[6] = operands[1];
+     }
+   /* For an AND, make sure the high 8-bit part is cleared.  */
+   if (GET_CODE (operands[3]) == AND)
+     {
+       operands[4] = const0_rtx;
+     }
+  ")
+
+
+(define_insn_and_split "*logicalsi3_silshr16"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,D,?D")
+          (match_operator:SI 3 "m68hc11_logical_operator"
+	      [(lshiftrt:SI 
+		   (match_operand:SI 1 "general_operand" "uim,uim,0,0")
+		   (const_int 16))
+		(match_operand:SI 2 "general_operand" "uim,0,uim,0")]))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:HI D_REGNUM) (match_dup 4))
+   (set (reg:HI D_REGNUM) (match_op_dup 3 [(reg:HI D_REGNUM) (match_dup 5)]))
+   (set (reg:HI X_REGNUM) (match_dup 6))]
+  "operands[5] = m68hc11_gen_highpart (HImode, operands[1]);
+   if (X_REG_P (operands[2]))
+     {
+       operands[4] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+       operands[6] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+     }
+   else
+     {
+       operands[4] = m68hc11_gen_lowpart (HImode, operands[2]);
+       operands[6] = m68hc11_gen_highpart (HImode, operands[2]);
+     }
+   PUT_MODE (operands[3], HImode);
+
+   /* For an AND, make sure the high 16-bit part is cleared.  */
+   if (GET_CODE (operands[3]) == AND)
+     {
+       operands[6] = const0_rtx;
+     }
+")
+
+(define_insn_and_split "*logicalsi3_silshl16"
+  [(set (match_operand:SI 0 "register_operand" "=D,D")
+          (match_operator:SI 3 "m68hc11_logical_operator"
+	      [(ashift:SI 
+		   (match_operand:SI 1 "general_operand" "uim,?D")
+		   (const_int 16))
+		(match_operand:SI 2 "general_operand" "0,0")]))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (reg:HI X_REGNUM) (match_op_dup 3 [(reg:HI X_REGNUM) (match_dup 4)]))
+   (set (reg:HI D_REGNUM) (match_dup 5))]
+  "operands[4] = m68hc11_gen_lowpart (HImode, operands[1]);
+   PUT_MODE (operands[3], HImode);
+
+   if (GET_CODE (operands[3]) == AND)
+     operands[5] = const0_rtx;
+   else
+     operands[5] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+   ")
+
+(define_insn_and_split "*logicalsi3_silshl16_zext"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,D")
+          (match_operator:SI 3 "m68hc11_logical_operator"
+	      [(ashift:SI
+	          (zero_extend:SI
+		     (match_operand:HI 1 "general_operand" "uim,udA,!dA"))
+		  (const_int 16))
+	    (zero_extend:SI (match_operand:HI 2 "general_operand" "uidA,um,!dA"))]))]
+  ""
+  "#"
+  ;; Must split before z register replacement
+  "reload_completed"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "
+    /* set (X_REGNUM) (d), set (D_REGNUM) (1) */
+   if (GET_CODE (operands[1]) == HARD_D_REGNUM
+       && GET_CODE (operands[3]) != AND)
+     {
+       /* This particular case is too early to be split before
+          Z register replacement because the cse-reg pass we do
+	  does not recognize the 'swap_areg'.  It is ok to handle
+	  this case after.  */
+       if (z_replacement_completed != 2)
+         {
+	   FAIL;
+	 }
+       emit_move_insn (gen_rtx_REG (HImode, HARD_X_REGNUM), operands[2]);
+       emit_insn (gen_swap_areg (gen_rtx_REG (HImode, HARD_D_REGNUM),
+			         gen_rtx_REG (HImode, HARD_X_REGNUM)));
+     }
+   operands[4] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+   operands[6] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+   operands[5] = operands[2];
+   operands[7] = operands[1];
+
+   if (GET_CODE (operands[3]) == AND)
+     operands[5] = operands[7] = const0_rtx;
+   ")
+
+;;--------------------------------------------------------------------
+;; 16-bit Arithmetic and logical operations on X and Y:
+;;
+;;	PLUS MINUS AND IOR XOR ASHIFT ASHIFTRT LSHIFTRT ROTATE ROTATERT
+;;
+;; Operations on X or Y registers are split here.  Instructions are
+;; changed into:
+;;   - xgdx/xgdy instruction pattern,
+;;   - The same operation on register D,
+;;   - xgdx/xgdy instruction pattern.
+;; This should allow the peephole to merge consecutive xgdx/xgdy instructions.
+;; We also handle the case were the address register is used in both source
+;; operands, such as:
+;;
+;;     (set (REG:HI X) (PLUS:HI (REG:HI X) (mem:HI (REG:HI X))))
+;; or
+;;     (set (REG:HI X) (PLUS:HI (REG:HI X) (REG:HI X)))
+;;
+;;
+(define_split
+  [(set (match_operand:HI 0 "hard_addr_reg_operand" "")
+	(match_operator:HI 3 "m68hc11_arith_operator"
+            [(match_operand:HI 1 "hard_addr_reg_operand" "")
+	     (match_operand:HI 2 "general_operand" "")]))]
+  "z_replacement_completed == 2
+   /* If we are adding a small constant to X or Y, it's
+     better to use one or several inx/iny instructions.  */
+   && !(GET_CODE (operands[3]) == PLUS 
+        && ((TARGET_M6812 
+	     && (immediate_operand (operands[2], HImode)
+		 || hard_reg_operand (operands[2], HImode)))
+            || (GET_CODE (operands[2]) == CONST_INT
+	        && INTVAL (operands[2]) >= -4
+	        && INTVAL (operands[2]) <= 4)))"
+  [(set (match_dup 9) (match_dup 0))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 8) (match_dup 7))
+   (set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:HI D_REGNUM) (match_op_dup 3 [(reg:HI D_REGNUM) (match_dup 6)]))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "
+   operands[9] = operands[0];
+   /* For 68HC12, push the value on the stack and do the operation
+      with a pop.  */
+   if (TARGET_M6812
+       && m68hc11_non_shift_operator (operands[3], HImode)
+       && (H_REG_P (operands[2])
+	   || (m68hc11_small_indexed_indirect_p (operands[2], HImode)
+	       && reg_mentioned_p (operands[0], operands[2]))))
+     {
+       operands[4] = gen_rtx_MEM (HImode,
+			      gen_rtx_PRE_DEC (HImode,
+				       gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+       operands[6] = gen_rtx_MEM (HImode,
+			      gen_rtx_POST_INC (HImode,
+				       gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+       operands[5] = operands[2];
+       operands[8] = operands[7] = operands[0];
+     }
+   /* Save the operand2 in a temporary location and use it.  */
+   else if ((H_REG_P (operands[2])
+             || reg_mentioned_p  (operands[0], operands[2]))
+            && !(SP_REG_P (operands[2]) && GET_CODE (operands[3]) == PLUS))
+     {
+       if (GET_CODE (operands[3]) == MINUS
+	   && reg_mentioned_p (operands[0], operands[2]))
+	 {
+	   operands[9] = gen_rtx_MEM (HImode,
+			      gen_rtx_PRE_DEC (HImode,
+				       gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+	   operands[1] = gen_rtx_MEM (HImode,
+			      gen_rtx_POST_INC (HImode,
+				       gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+	   operands[8] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+	   operands[4] = operands[7] = operands[0];
+	   operands[6] = operands[8];
+	   operands[5] = operands[2];
+	 }
+       else 
+	 {
+       operands[4] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+       operands[6] = operands[4];
+       if (!H_REG_P (operands[2]))
+	 {
+	   operands[5] = operands[0];
+	   operands[7] = operands[2];
+	   operands[8] = operands[0];
+	 }
+       else
+	 {
+           operands[5] = operands[2];
+	   operands[8] = operands[7] = operands[0];
+	 }
+	 }
+     }
+   else
+     {
+       operands[4] = operands[5] = operands[0];
+       operands[6] = operands[2];
+       operands[8] = operands[7] = operands[0];
+     }
+   ")
+
+(define_split
+  [(set (match_operand:HI 0 "hard_addr_reg_operand" "")
+	(match_operator:HI 3 "m68hc11_arith_operator"
+            [(match_operand:HI 1 "general_operand" "")
+	     (match_operand:HI 2 "general_operand" "")]))]
+  "z_replacement_completed == 2
+   /* If we are adding a small constant to X or Y, it's
+     better to use one or several inx/iny instructions.  */
+   && !(GET_CODE (operands[3]) == PLUS 
+        && ((TARGET_M6812 
+	    && (immediate_operand (operands[2], HImode)
+		|| hard_reg_operand (operands[2], HImode)))
+            || (GET_CODE (operands[2]) == CONST_INT
+	        && INTVAL (operands[2]) >= -4
+	        && INTVAL (operands[2]) <= 4)))"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:HI D_REGNUM) (match_op_dup 3 [(reg:HI D_REGNUM) (match_dup 2)]))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "
+   ")
+
+;;
+;; Next split handles the logical operations on D register with
+;; another hard register for the second operand.  For this, we
+;; have to save the second operand in a scratch location and use
+;; it instead.  This must be supported because in some (rare) cases
+;; the second operand can come in a hard register and the reload
+;; pass doesn't know how to reload it in a memory location.
+;;
+;;	PLUS MINUS AND IOR XOR
+;;
+;; The shift operators are special and must not appear here.
+;;
+(define_split
+  [(set (match_operand:HI 0 "d_register_operand" "")
+	(match_operator:HI 3 "m68hc11_non_shift_operator"
+            [(match_operand:HI 1 "d_register_operand" "")
+	     (match_operand:HI 2 "hard_reg_operand" "")]))]
+  "TARGET_M6811
+   && z_replacement_completed == 2 && !SP_REG_P (operands[2])"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 4)]))]
+  "operands[4] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);")
+
+;;
+;; For 68HC12, push the operand[2] value on the stack and do the
+;; logical/arithmetic operation with a pop.
+;;
+(define_split
+  [(set (match_operand:HI 0 "d_register_operand" "")
+	(match_operator:HI 3 "m68hc11_non_shift_operator"
+            [(match_operand:HI 1 "d_register_operand" "")
+	     (match_operand:HI 2 "hard_reg_operand" "")]))]
+  "TARGET_M6812
+   && z_replacement_completed == 2 && !SP_REG_P (operands[2])"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 5)]))]
+  "operands[4] = gen_rtx_MEM (HImode,
+			  gen_rtx_PRE_DEC (HImode,
+				   gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+   operands[5] = gen_rtx_MEM (HImode,
+			  gen_rtx_POST_INC (HImode,
+				   gen_rtx_REG (HImode, HARD_SP_REGNUM)));
+   ")
+
+;;--------------------------------------------------------------------
+;; 16-bit Unary operations on X and Y:
+;;
+;;		NOT NEG
+;;
+;; Operations on X or Y registers are split here.  Instructions are
+;; changed into:
+;;   - xgdx/xgdy instruction pattern,
+;;   - The same operation on register D,
+;;   - xgdx/xgdy instruction pattern.
+;; This should allow the peephole to merge consecutive xgdx/xgdy instructions.
+;; We also handle the case were the address register is used in both source
+;; operands, such as:
+;;
+;;     (set (REG:HI X) (PLUS:HI (REG:HI X) (mem:HI (REG:HI X))))
+;; or
+;;     (set (REG:HI X) (PLUS:HI (REG:HI X) (REG:HI X)))
+;;
+(define_split
+  [(set (match_operand:HI 0 "hard_addr_reg_operand" "")
+	(match_operator:HI 2 "m68hc11_unary_operator"
+            [(match_operand 1 "general_operand" "")]))]
+  "z_replacement_completed == 2"
+  [(set (match_dup 4) (match_dup 5))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:HI D_REGNUM) (match_op_dup 2 [(match_dup 3)]))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "
+{
+  if ((H_REG_P (operands[1])
+       && !rtx_equal_p (operands[0], operands[1]))
+      || reg_mentioned_p (operands[0], operands[1]))
+    {
+      /* Move to the destination register, before the xgdx.  */
+      operands[4] = gen_rtx_REG (GET_MODE (operands[1]), 
+			     REGNO (operands[0]));
+      operands[5] = operands[1];
+
+      /* Apply the operation on D.  */
+      operands[3] = gen_rtx_REG (GET_MODE (operands[1]), HARD_D_REGNUM);
+    }
+  else
+    {
+      /* Generate a copy to same register (nop).  */
+      operands[4] = operands[5] = operands[0];
+      operands[3] = operands[1];
+    }
+}")
+
+;;
+;; 8-bit operations on address registers.
+;;
+;; We have to take care that the address register is not used for the
+;; source of operand2. If operand2 is the D register, we have to save
+;; that register in a temporary location.
+;;
+;; AND OR XOR PLUS MINUS ASHIFT ASHIFTRT LSHIFTRT ROTATE ROTATERT
+;;
+(define_split
+  [(set (match_operand:QI 0 "hard_addr_reg_operand" "")
+	(match_operator:QI 3 "m68hc11_arith_operator"
+            [(match_operand:QI 1 "hard_addr_reg_operand" "")
+	     (match_operand:QI 2 "general_operand" "")]))]
+  "z_replacement_completed == 2
+   /* Reject a (plus:QI (reg:QI X) (const_int 1|-1)) because the
+      incqi pattern generates a better code.  */
+   && !(GET_CODE (operands[3]) == PLUS
+        && GET_CODE (operands[2]) == CONST_INT
+        && (INTVAL (operands[2]) == 1 || INTVAL (operands[2]) == -1))"
+  [(set (match_dup 5) (match_dup 6))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 4))
+              (set (match_dup 4) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (match_op_dup 3 [(reg:QI D_REGNUM) (match_dup 7)]))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 4))
+              (set (match_dup 4) (reg:HI D_REGNUM))])]
+  "operands[4] = gen_rtx_REG (HImode, REGNO (operands[0]));
+
+   /* For the second operand is a hard register or if the address
+      register appears in the source, we have to save the operand[2]
+      value in a temporary location and then use that temp.
+      Otherwise, it's ok and we generate a (set (D) (D)) that
+      will result in a nop.  */
+   if (H_REG_P (operands[2]))
+     {
+       operands[5] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+       operands[6] = gen_rtx_REG (HImode, REGNO (operands[2]));
+       operands[7] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+     }
+   else if (reg_mentioned_p (operands[0], operands[2]))
+     {
+       operands[5] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+       operands[6] = operands[2];
+       operands[7] = operands[5];
+     }
+   else
+     {
+       operands[5] = operands[6] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+       operands[7] = operands[2];
+     }
+  ")
+
+;;
+;; Next split handles the logical operations on D register with
+;; another hard register for the second operand.  For this, we
+;; have to save the second operand in a scratch location and use
+;; it instead.  This must be supported because in some (rare) cases
+;; the second operand can come in a hard register and the reload
+;; pass doesn't know how to reload it in a memory location.
+;;
+;;	PLUS MINUS AND IOR XOR
+;;
+;; The shift operators are special and must not appear here.
+;;
+(define_split
+  [(set (match_operand:QI 0 "d_register_operand" "")
+	(match_operator:QI 3 "m68hc11_non_shift_operator"
+            [(match_operand:QI 1 "d_register_operand" "")
+	     (match_operand:QI 2 "hard_reg_operand" "")]))]
+  "reload_completed"
+  [(set (match_dup 5) (match_dup 6))
+   (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 4)]))]
+  "operands[4] = gen_rtx_REG (QImode, SOFT_TMP_REGNUM);
+   operands[5] = gen_rtx_REG (HImode, SOFT_TMP_REGNUM);
+   operands[6] = gen_rtx_REG (HImode, REGNO (operands[2]));")
+
+;;--------------------------------------------------------------------
+;; 8-bit Unary operations on X and Y:
+;;
+;;		NOT NEG
+;;
+;; Operations on X or Y registers are split here.  Instructions are
+;; changed into:
+;;   - xgdx/xgdy instruction pattern,
+;;   - The same operation on register D,
+;;   - xgdx/xgdy instruction pattern.
+;; This should allow the peephole to merge consecutive xgdx/xgdy instructions.
+;; We also handle the case were the address register is used in both source
+;; operands, such as:
+;;
+;;     (set (REG:HI X) (PLUS:HI (REG:HI X) (mem:HI (REG:HI X))))
+;; or
+;;     (set (REG:HI X) (PLUS:HI (REG:HI X) (REG:HI X)))
+;;
+(define_split
+  [(set (match_operand:QI 0 "hard_addr_reg_operand" "")
+	(match_operator:QI 2 "m68hc11_unary_operator"
+            [(match_operand:QI 1 "general_operand" "")]))]
+  "z_replacement_completed == 2"
+  [(set (match_dup 4) (match_dup 5))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+              (set (match_dup 3) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (match_op_dup 2 [(match_dup 6)]))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+              (set (match_dup 3) (reg:HI D_REGNUM))])]
+  "
+{
+  operands[3] = gen_rtx_REG (HImode, REGNO (operands[0]));
+  if ((H_REG_P (operands[1])
+       && !rtx_equal_p (operands[0], operands[1]))
+      || reg_mentioned_p (operands[0], operands[1]))
+    {
+      /* Move to the destination register, before the xgdx.  */
+      operands[4] = operands[0];
+      operands[5] = operands[1];
+
+      /* Apply the operation on D.  */
+      operands[6] = gen_rtx_REG (QImode, HARD_D_REGNUM);
+    }
+  else
+    {
+      operands[4] = operands[5] = operands[0];
+      operands[6] = operands[1];
+    }
+}")
+
+
+;;--------------------------------------------------------------------
+;;-  Complements
+;;--------------------------------------------------------------------
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(neg:DI (match_operand:DI 1 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"__negdi2\", NEG, DImode, DImode, 2, operands);
+   DONE;")
+
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=D")
+	(neg:SI (match_operand:SI 1 "general_operand" "0")))]
+  ""
+  "*
+{
+  rtx ops[1];
+
+  CC_STATUS_INIT;
+
+  /* With -Os or without -O, use a special library call.  */
+  if (optimize_size || optimize == 0)
+    return \"bsr\\t___negsi2\";
+
+  ops[0] = gen_label_rtx ();
+
+  /* 32-bit complement and add 1.  */
+  output_asm_insn (\"comb\\n\\tcoma\\n\\txgdx\", operands);
+  output_asm_insn (\"comb\\n\\tcoma\\n\\tinx\\n\\txgdx\", operands);
+  output_asm_insn (\"bne\\t%l0\", ops);
+  output_asm_insn (\"inx\", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (ops[0]));
+  return \"\";
+}")
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,x*y")
+	(neg:HI (match_operand:HI 1 "general_operand" "0,!duim,0")))]
+  ""
+  "@
+   coma\\n\\tcomb\\n\\taddd\\t#1
+   clra\\n\\tclrb\\n\\tsubd\\t%1
+   xgd%0\\n\\tcoma\\n\\tcomb\\n\\txgd%0\\n\\tin%0")
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,m,!u,!*A")
+	(neg:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,0,0")))]
+  ""
+  "@
+   negb
+   neg\\t%b0
+   neg\\t%b0
+   #")
+
+;;
+;; - 32-bit complement.  GCC knows how to translate them but providing a
+;; pattern generates better/smaller code.
+;;
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(not:DI (match_operand:DI 1 "general_operand" "")))]
+  ""
+  "m68hc11_emit_libcall (\"___notdi2\", NOT, DImode, DImode, 2, operands);
+   DONE;")
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "non_push_operand" "=D,m,!u")
+	(not:SI (match_operand:SI 1 "general_operand" "0,m,0")))
+   (clobber (match_scratch:HI 2 "=X,d,X"))]
+  ""
+  "@
+   bsr\\t___one_cmplsi2
+   #
+   #")
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "non_push_operand" "=d,m,*A,u")
+	(not:HI (match_operand:HI 1 "general_operand" "0,0,0,0")))]
+  ""
+  "@
+   comb\\n\\tcoma
+   com\\t%b0\\n\\tcom\\t%h0
+   #
+   com\\t%b0\\n\\tcom\\t%h0")
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "non_push_operand" "=d,m,*A,u")
+	(not:QI (match_operand:QI 1 "general_operand" "0,0,0,0")))]
+  ""
+  "@
+   comb
+   com\\t%b0
+   #
+   com\\t%b0")
+
+(define_split /* "*one_cmplsi2" */
+  [(set (match_operand:SI 0 "non_push_operand" "")
+	(not:SI (match_dup 0)))
+   (clobber (match_scratch:HI 1 ""))]
+  "z_replacement_completed == 2
+   && (!X_REG_P (operands[0]) || (optimize && optimize_size == 0))"
+  [(set (match_dup 2) (not:HI (match_dup 2)))
+   (set (match_dup 3) (not:HI (match_dup 3)))]
+  "operands[2] = m68hc11_gen_lowpart (HImode, operands[0]);
+   operands[3] = m68hc11_gen_highpart (HImode, operands[0]);")
+
+(define_split /* "*one_cmplsi2" */
+  [(set (match_operand:SI 0 "non_push_operand" "")
+	(not:SI (match_operand:SI 1 "non_push_operand" "")))
+   (clobber (match_operand:HI 2 "d_register_operand" ""))]
+  "z_replacement_completed == 2
+   && (!X_REG_P (operands[0]) || (optimize && optimize_size == 0))"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 2) (not:HI (match_dup 2)))
+   (set (match_dup 4) (match_dup 2))
+   (set (match_dup 2) (match_dup 5))
+   (set (match_dup 2) (not:HI (match_dup 2)))
+   (set (match_dup 6) (match_dup 2))]
+  "operands[3] = m68hc11_gen_lowpart (HImode, operands[1]);
+   operands[5] = m68hc11_gen_highpart (HImode, operands[1]);
+   operands[4] = m68hc11_gen_lowpart (HImode, operands[0]);
+   operands[6] = m68hc11_gen_highpart (HImode, operands[0]);")
+
+;;--------------------------------------------------------------------
+;;- arithmetic shifts
+;;--------------------------------------------------------------------
+;;
+;; Provide some 64-bit shift patterns. 
+(define_expand "ashldi3"
+  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	             (ashift:DI (match_operand:DI 1 "general_operand" "")
+		                (match_operand:HI 2 "general_operand" "")))
+              (clobber (match_scratch:HI 3 ""))])]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT 
+      || (INTVAL (operands[2]) != 32 && INTVAL (operands[2]) != 1))
+    {
+      FAIL;
+    }
+}")
+
+(define_insn_and_split "*ashldi3_const32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=<,m,u")
+	(ashift:DI (match_operand:DI 1 "general_operand" "umi,umi,umi")
+		   (const_int 32)))
+   (clobber (match_scratch:HI 2 "=&A,d,d"))]
+   ""
+   "#"
+   "reload_completed"
+   [(const_int 0)]
+   "/* Move the lowpart in the highpart first in case the shift
+       is applied on the source.  */
+    if (IS_STACK_PUSH (operands[0]))
+      {
+         m68hc11_split_move (m68hc11_gen_lowpart (SImode, operands[0]),
+			     const0_rtx, operands[2]);
+
+         /* Adjust first operand if it uses SP so that we take into
+            account the above push.  Can occur only for 68HC12.  */
+         if (reg_mentioned_p (gen_rtx_REG (HImode, HARD_SP_REGNUM),
+	                      operands[1]))
+           operands[1] = adjust_address (operands[1],
+	                                 GET_MODE (operands[0]), 4);
+      }
+    m68hc11_split_move (m68hc11_gen_highpart (SImode, operands[0]),
+		        m68hc11_gen_lowpart (SImode, operands[1]),
+		        operands[2]);
+    if (!IS_STACK_PUSH (operands[0]))
+      {
+        m68hc11_split_move (m68hc11_gen_lowpart (SImode, operands[0]),
+			    const0_rtx, operands[2]);
+      }
+    DONE;")
+
+(define_insn_and_split "*ashldi3_const1"
+  [(set (match_operand:DI 0 "non_push_operand" "=m,m,u")
+	(ashift:DI (match_operand:DI 1 "general_operand" "mi,u,umi")
+		   (const_int 1)))
+   (clobber (match_scratch:HI 2 "=d,d,d"))]
+   ""
+   "#"
+   "z_replacement_completed == 2"
+   [(set (match_dup 2) (match_dup 3))
+    (set (match_dup 2) (ashift:HI (match_dup 2) (const_int 1)))
+    (set (match_dup 4) (match_dup 2))
+
+    (set (match_dup 2) (match_dup 5))
+    (parallel [(set (match_dup 2)
+		       (rotate:HI (match_dup 2) (const_int 1)))
+	       (clobber (reg:HI CC_REGNUM))])
+    (set (match_dup 6) (match_dup 2))
+
+    (set (match_dup 2) (match_dup 7))
+    (parallel [(set (match_dup 2)
+		       (rotate:HI (match_dup 2) (const_int 1)))
+	       (clobber (reg:HI CC_REGNUM))])
+    (set (match_dup 8) (match_dup 2))
+
+    (set (match_dup 2) (match_dup 9))
+    (parallel [(set (match_dup 2)
+		       (rotate:HI (match_dup 2) (const_int 1)))
+	       (clobber (reg:HI CC_REGNUM))])
+    (set (match_dup 10) (match_dup 2))]
+   "operands[3] = m68hc11_gen_lowpart (SImode, operands[1]);
+    operands[5] = m68hc11_gen_highpart (HImode, operands[3]);
+    operands[3] = m68hc11_gen_lowpart (HImode, operands[3]);
+
+    operands[4] = m68hc11_gen_lowpart (SImode, operands[0]);
+    operands[6] = m68hc11_gen_highpart (HImode, operands[4]);
+    operands[4] = m68hc11_gen_lowpart (HImode, operands[4]);
+
+    operands[7] = m68hc11_gen_highpart (SImode, operands[1]);
+    operands[9] = m68hc11_gen_highpart (HImode, operands[7]);
+    operands[7] = m68hc11_gen_lowpart (HImode, operands[7]);
+
+    operands[8] = m68hc11_gen_highpart (SImode, operands[0]);
+    operands[10] = m68hc11_gen_highpart (HImode, operands[8]);
+    operands[8] = m68hc11_gen_lowpart (HImode, operands[8]);")
+
+(define_insn "addsi_silshr16"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,!D")
+          (plus:SI (lshiftrt:SI (match_operand:SI 1 "general_operand" "!*uim,0,0")
+				(const_int 16))
+		   (match_operand:SI 2 "general_operand" "0,m!*u,0")))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+          (plus:SI (lshiftrt:SI (match_operand:SI 1 "general_operand" "")
+				(const_int 16))
+		   (match_operand:SI 2 "general_operand" "")))]
+  "z_replacement_completed == 2 && !X_REG_P (operands[1])"
+  [(set (reg:HI D_REGNUM) (plus:HI (reg:HI D_REGNUM) (match_dup 3)))
+   (set (reg:HI X_REGNUM) (plus:HI (plus:HI (reg:HI X_REGNUM)
+					    (const_int 0))
+				   (reg:HI CC_REGNUM)))]
+  "operands[3] = m68hc11_gen_highpart (HImode, operands[1]);")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+          (plus:SI (lshiftrt:SI (match_operand:SI 1 "general_operand" "")
+				(const_int 16))
+		   (match_operand:SI 2 "general_operand" "")))]
+  "z_replacement_completed == 2 && X_REG_P (operands[1])"
+  [(set (reg:HI D_REGNUM) (match_dup 5))
+   (set (reg:HI X_REGNUM) (match_dup 3))
+   (set (reg:HI D_REGNUM) (plus:HI (reg:HI D_REGNUM) (match_dup 4)))
+   (set (reg:HI X_REGNUM) (plus:HI (plus:HI (reg:HI X_REGNUM)
+					    (const_int 0))
+				   (reg:HI CC_REGNUM)))]
+  "operands[3] = m68hc11_gen_highpart (HImode, operands[2]);
+   if (X_REG_P (operands[2]))
+     {
+       operands[4] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+       operands[5] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+     }
+   else
+     {
+       operands[4] = m68hc11_gen_lowpart (HImode, operands[2]);
+       operands[5] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+     }
+")
+
+(define_insn "addsi_ashift16"
+  [(set (match_operand:SI 0 "register_operand" "=D")
+          (plus:SI 
+		   (mult:SI (match_operand:SI 2 "general_operand" "uim")
+			    (const_int 65536))
+		(match_operand:SI 1 "general_operand" "0")))
+   (clobber (match_scratch:HI 3 "=X"))]
+  "0"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+          (plus:SI 
+		   (mult:SI (match_operand:SI 2 "general_operand" "")
+			    (const_int 65536))
+		   (match_operand:SI 1 "general_operand" "")))
+   (clobber (match_scratch:HI 3 "=X"))]
+  "0 && reload_completed && z_replacement_completed == 2"
+  [(set (reg:HI X_REGNUM) (plus:HI (reg:HI X_REGNUM) (match_dup 4)))]
+  "
+{
+  operands[4] = m68hc11_gen_lowpart (HImode, operands[2]);
+}")
+
+(define_insn_and_split "addsi_andshr16"
+  [(set (match_operand:SI 0 "register_operand" "=D")
+          (plus:SI (and:SI (match_operand:SI 1 "general_operand" "%uim")
+			   (const_int 65535))
+		   (match_operand:SI 2 "general_operand" "0")))]
+  ""
+  "#"
+  "z_replacement_completed == 2"
+  [(set (reg:HI D_REGNUM) (plus:HI (reg:HI D_REGNUM) (match_dup 3)))
+   (set (reg:HI X_REGNUM) (plus:HI (plus:HI (reg:HI X_REGNUM) (const_int 0)) (reg:HI CC_REGNUM)))]
+  "operands[3] = m68hc11_gen_lowpart (HImode, operands[1]);")
+
+;;
+;; 32-bit shifts are made by a small library routine that uses
+;; a specific passing convention for parameters (for efficiency reasons).
+;;
+;; [D + X] -> Value to be shifted
+;; Y       -> Shift count
+;;
+;; The shift count is clobbered by the routine.
+;;
+(define_expand "ashlsi3"
+  [(parallel
+       [(set (match_operand:SI 0 "register_operand" "") 
+	     (match_operand:SI 1 "general_operand" ""))
+	(clobber (scratch:HI))])
+   (parallel
+     [(set (match_dup 0) (ashift:SI (match_dup 0)
+			 (match_operand:HI 2 "nonmemory_operand" "")))
+      (clobber (scratch:HI))])]
+   ""
+   "")
+
+(define_split
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ashift:SI (match_operand:SI 1 "general_operand" "")
+	           (const_int 16)))
+   (clobber (match_scratch:HI 3 ""))]
+   ""
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (const_int 0))]
+   "operands[2] = m68hc11_gen_highpart (HImode, operands[0]);
+    operands[4] = m68hc11_gen_lowpart (HImode, operands[0]);
+    operands[3] = m68hc11_gen_lowpart (HImode, operands[1]);")
+
+(define_insn "*ashlsi3_const16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=D,m,*u")
+	(ashift:SI (match_operand:SI 1 "general_operand" "Duim,D,D")
+	           (const_int 16)))
+   (clobber (match_scratch:HI 2 "=X,X,X"))]
+   ""
+   "#")
+
+(define_insn_and_split "*ashlsi3_const16_zexthi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=D")
+	(ashift:SI (zero_extend:HI 
+			(match_operand:HI 1 "general_operand" "duim*A"))
+	           (const_int 16)))
+   (clobber (match_scratch:HI 2 "=X"))]
+   ""
+   "#"
+   "reload_completed"
+   [(set (reg:HI X_REGNUM) (match_dup 1))
+    (set (reg:HI D_REGNUM) (const_int 0))]
+   "")
+
+(define_insn "*ashlsi3_const1"
+  [(set (match_operand:SI 0 "non_push_operand" "=D,D,D,m,*u,*u")
+	(ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,*u,m,*u,m")
+	           (const_int 1)))
+   (clobber (match_scratch:HI 2 "=X,X,X,&d,&d,&d"))]
+   ""
+   "*
+{
+  CC_STATUS_INIT;
+  if (X_REG_P (operands[1]))
+    {
+      return \"lsld\\n\\txgdx\\n\\trolb\\n\\trola\\n\\txgdx\";
+    }
+  else
+    {
+      rtx ops[2];
+
+      ops[1] = m68hc11_gen_lowpart (HImode, operands[1]);
+      ops[0] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+      m68hc11_gen_movhi (insn, ops);
+      output_asm_insn (\"lsld\", ops);
+      if (!X_REG_P (operands[0]))
+	{
+	  ops[1] = ops[0];
+	  ops[0] = m68hc11_gen_lowpart (HImode, operands[0]);
+	  m68hc11_gen_movhi (insn, ops);
+	  ops[0] = ops[1];
+          ops[1] = m68hc11_gen_highpart (HImode, operands[1]);
+          m68hc11_gen_movhi (insn, ops);
+	}
+      else
+	{
+	  /* Load the high part in X in case the source operand
+	     uses X as a memory pointer.  */
+	  ops[0] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+          ops[1] = m68hc11_gen_highpart (HImode, operands[1]);
+          m68hc11_gen_movhi (insn, ops);
+          output_asm_insn (\"xgdx\", ops);
+	}
+      output_asm_insn (\"rolb\", ops);
+      output_asm_insn (\"rola\", ops);
+      if (!X_REG_P (operands[0]))
+	{
+	  ops[1] = ops[0];
+	  ops[0] = m68hc11_gen_highpart (HImode, operands[0]);
+	  m68hc11_gen_movhi (insn, ops);
+	}
+      else
+        {
+          output_asm_insn (\"xgdx\", ops);
+        }
+      return \"\";
+    }
+}")
+
+(define_insn "*ashlsi3_const"
+  [(set (match_operand:SI 0 "register_operand" "+D")
+	(ashift:SI (match_dup 0)
+	           (match_operand:HI 1 "const_int_operand" "")))
+   (clobber (match_scratch:HI 2 "=y"))]
+   "TARGET_M6811 /* See *ashlsi3 note.  */"
+   "*
+{
+  CC_STATUS_INIT;
+  return \"ldy\\t%1\\n\\tbsr\\t___ashlsi3\";
+}")
+
+(define_insn "*ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "+D,D")
+	(ashift:SI (match_dup 0)
+	           (match_operand:HI 1 "general_operand" "y,mi")))
+   (clobber (match_scratch:HI 2 "=1,X"))]
+   ""
+   "*
+{
+  CC_STATUS_INIT;
+
+  /* There is a reload problem if we don't accept 'm' for the shift value.
+     A RELOAD_OTHER reload can be generated for operand 0 (class A_REGS)
+     and this conflicts with all reloads.  Since X, Y, Z are used there
+     is not enough register in class A_REGS.
+
+     Assuming that 'operands[1]' does not refer to the stack (which 
+     is true for 68hc11 only, we save temporary the value of Y.
+
+     For 68HC12 we must also accept a constant because Z register is
+     disabled when compiling with -fomit-frame-pointer.  We can come up
+     with a reload problem and the *lshrsi3_const pattern was disabled
+     for that reason.  */
+  if (!Y_REG_P (operands[2]))
+    {
+      rtx ops[1];
+      int y_dead = dead_register_here (insn, iy_reg);
+
+      ops[0] = operands[1];
+      if (y_dead == 0)
+	{
+          output_asm_insn (\"pshy\", operands);
+          if (reg_mentioned_p (stack_pointer_rtx, operands[1]))
+	    ops[0] = adjust_address (operands[1], GET_MODE (operands[1]), 2);
+	}
+      output_asm_insn (\"ldy\\t%0\", ops);
+      output_asm_insn (\"bsr\\t___ashlsi3\", operands);
+      return y_dead == 0 ? \"puly\" : \"\";
+    }
+  return \"bsr\\t___ashlsi3\";
+}")
+
+(define_expand "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ashift:HI (match_operand:HI 1 "register_operand" "")
+	           (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT) 
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+	 	 gen_rtvec (2, gen_rtx_SET (VOIDmode,
+			    operand0,
+			    gen_rtx_ASHIFT (HImode,
+					operand1, scratch)),
+			      gen_rtx_CLOBBER (VOIDmode, scratch))));
+      DONE;
+    }
+}")
+
+(define_insn "*ashlhi3_const1"
+  [(set (match_operand:HI 0 "non_push_operand" "=dm,!*u*A")
+	(ashift:HI (match_operand:HI 1 "non_push_operand" "0,0")
+	           (const_int 1)))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  if (D_REG_P (operands[0]))
+    {
+      return \"asld\";
+    }
+  
+  output_asm_insn (\"asl\\t%b0\", operands);
+  output_asm_insn (\"rol\\t%h0\", operands);
+  CC_STATUS_INIT;
+  return \"\";
+}")
+
+
+(define_insn "*ashlhi3_2"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0")
+                   (match_operand:HI 2 "register_operand" "+x,+d")))
+   (clobber (match_dup 2))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  return \"bsr\\t___lshlhi3\";
+}")
+
+(define_insn "*ashlhi3"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(ashift:HI (match_dup 0)
+		   (match_operand:HI 1 "register_operand" "+x")))
+   (clobber (match_dup 1))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  return \"bsr\\t___lshlhi3\";
+}")
+
+(define_insn "*ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,!*A")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0")
+	           (match_operand:HI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int	i;
+
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  i = INTVAL (operands[2]);
+  if (i >= 8)
+    {
+      CC_STATUS_INIT;
+      output_asm_insn (\"tba\", operands);
+      if (i == 15)
+        {
+	  output_asm_insn (\"rora\", operands);
+	  output_asm_insn (\"anda\\t#0\", operands);
+	  output_asm_insn (\"rora\", operands);
+	}
+      else
+        while (i != 8 )
+          {
+            output_asm_insn (\"asla\", operands);
+	    i--;
+	  }
+      return \"clrb\";
+    }
+  for (i = 0; i < INTVAL (operands[2]) - 1; i++) 
+    {
+      output_asm_insn (\"asld\", operands);
+    }
+  return \"asld\";
+}")
+
+(define_expand "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ashift:QI (match_operand:QI 1 "register_operand" "")
+	           (match_operand:QI 2 "general_operand" "")))]
+   ""
+   "")
+
+(define_insn "*ashlqi3_const1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,m,!u,!*q,!*A")
+	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,0,0,0")
+	           (const_int 1)))]
+  ""
+  "@
+   aslb
+   asl\\t%b0
+   asl\\t%b0
+   asl%0
+   #")
+
+(define_insn "*ashlqi3_const"
+  [(set (match_operand:QI 0 "register_operand" "=d,!*q,!*A")
+	(ashift:QI (match_operand:QI 1 "register_operand" "0,0,0")
+	           (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int i;
+  const char* insn_code;
+
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    insn_code = \"aslb\";
+  else if (DA_REG_P (operands[0]))
+    insn_code = \"asla\";
+  else
+    return \"#\";
+
+  i = INTVAL (operands[2]);
+  if (i >= 8)
+    {
+      if (DA_REG_P (operands[0]))
+        return \"clra\";
+      else
+        return \"clrb\";
+    }
+  else if (i == 7)
+    {
+      if (DA_REG_P (operands[0]))
+        {
+          output_asm_insn (\"rora\", operands);
+          output_asm_insn (\"ldaa\\t#0\", operands);
+          return \"rora\";
+        }
+      else
+        {
+          output_asm_insn (\"rorb\", operands);
+          output_asm_insn (\"ldab\\t#0\", operands);
+          return \"rorb\";
+        }
+    }
+  else if (i == 6)
+    {
+      if (DA_REG_P (operands[0]))
+        {
+          output_asm_insn (\"rora\", operands);
+          output_asm_insn (\"rora\", operands);
+          output_asm_insn (\"rora\", operands);
+          return \"anda\\t#0xC0\";
+        }
+      else
+        {
+          output_asm_insn (\"rorb\", operands);
+          output_asm_insn (\"rorb\", operands);
+          output_asm_insn (\"rorb\", operands);
+          return \"andb\\t#0xC0\";
+        }
+    }
+  while (--i >= 0)
+    {
+      output_asm_insn (insn_code, operands);
+    }
+  return \"\";
+}")
+
+(define_insn "*ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,!*q,!*A")
+	(ashift:QI (match_operand:QI 1 "register_operand" "0,0,0")
+	             (match_operand:QI 2 "nonimmediate_operand" 
+					 "m*u*d*A,m*u*d*A,m*u")))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  if (!D_REG_P (operands[0]) && !Q_REG_P (operands[0]))
+    return \"#\";
+
+  ops[0] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+  ops[1] = operands[2];
+  m68hc11_gen_movqi (insn, ops);
+
+  CC_STATUS_INIT;
+  return \"bsr\\t___lshlqi3\";
+}")
+
+(define_expand "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "")
+	             (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT) 
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+		 gen_rtvec (2, gen_rtx_SET (VOIDmode,
+				operand0,
+				gen_rtx_ASHIFTRT (HImode,
+					operand1, scratch)),
+			      gen_rtx_CLOBBER (VOIDmode, scratch))));
+       DONE;
+    }
+}")
+
+(define_insn "*ashrhi3_const1"
+  [(set (match_operand:HI 0 "non_push_operand" "=dm,!*u*A")
+	(ashiftrt:HI (match_operand:HI 1 "non_push_operand" "0,0")
+	             (const_int 1)))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (D_REG_P (operands[0]))
+    {
+      return \"asra\\n\\trorb\";
+    }
+  
+  output_asm_insn (\"asr\\t%h0\", operands);
+  output_asm_insn (\"ror\\t%b0\", operands);
+  return \"\";
+}")
+
+
+(define_insn "*ashrhi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=d,!*A")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0,0")
+	             (match_operand:HI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  rtx ops[2];
+  int val = INTVAL (operands[2]);
+
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  if (val >= 15)
+    {
+      ops[0] = gen_label_rtx ();
+
+      output_asm_insn (\"clrb\", operands);
+      output_asm_insn (\"rola\", operands);
+
+	/* Clear A without clearing the carry flag.  */
+      output_asm_insn (\"tba\", operands);
+      output_asm_insn (\"bcc\\t%l0\", ops);
+      output_asm_insn (\"coma\", operands);
+      output_asm_insn (\"comb\", operands);
+
+      CC_STATUS_INIT;
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[0]));
+      return \"\";
+    }
+  if (val >= 8)
+    {
+      ops[0] = gen_label_rtx ();
+
+      output_asm_insn (\"tab\", operands);
+      output_asm_insn (\"clra\", operands);
+      output_asm_insn (\"tstb\", operands);
+      output_asm_insn (\"bge\\t%l0\", ops);
+      output_asm_insn (\"deca\", operands);
+
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[0]));
+
+      val -= 8;
+
+      while (val > 0)
+        {
+	  output_asm_insn (\"asrb\", operands);
+	  val--;
+        }
+	/* Status is ok.  */
+      return \"\";
+    }
+  if (val == 7)
+    {
+      ops[0] = gen_label_rtx ();
+      output_asm_insn (\"rolb\", operands);
+      output_asm_insn (\"rola\", operands);
+      output_asm_insn (\"tab\", operands);
+      output_asm_insn (\"anda\\t#0\", operands);
+      output_asm_insn (\"bcc\\t%l0\", ops);
+      output_asm_insn (\"coma\", ops);
+
+      (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				 CODE_LABEL_NUMBER (ops[0]));
+      return \"\";
+    }
+  while (val > 0)
+    {
+      output_asm_insn (\"asra\", operands);
+      output_asm_insn (\"rorb\", operands);
+      val--;
+    }
+  CC_STATUS_INIT;
+
+  return \"\";
+}")
+
+(define_insn "*ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0,0")
+	             (match_operand:HI 2 "register_operand" "+x,+d")))
+   (clobber (match_dup 2))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  output_asm_insn (\"bsr\\t___ashrhi3\", operands);
+  return \"\"; 
+}")
+
+(define_expand "ashrsi3"
+  [(parallel
+       [(set (match_dup 0) (match_operand:SI 1 "general_operand" ""))
+	(clobber (scratch:HI))])
+   (parallel
+       [(set (match_operand:SI 0 "register_operand" "")
+		(ashiftrt:SI (match_dup 0)
+		             (match_operand:HI 2 "general_operand" "")))
+        (clobber (scratch:HI))])]
+   ""
+   "")
+
+(define_insn "*ashrsi3_const"
+  [(set (match_operand:SI 0 "register_operand" "+D")
+	(ashiftrt:SI (match_dup 0)
+	             (match_operand:HI 1 "const_int_operand" "")))
+   (clobber (match_scratch:HI 2 "=y"))]
+   "TARGET_M6811 /* See *ashrsi3 note.  */"
+   "*
+{
+  CC_STATUS_INIT;
+  return \"ldy\\t%1\\n\\tbsr\\t___ashrsi3\";
+}")
+
+(define_insn "*ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "+D,D")
+	(ashiftrt:SI (match_dup 0)
+	             (match_operand:HI 1 "general_operand" "y,mi")))
+   (clobber (match_scratch:HI 2 "=1,X"))]
+   ""
+   "*
+{
+  CC_STATUS_INIT;
+  /* There is a reload problem if we don't accept 'm' for the shift value.
+     A RELOAD_OTHER reload can be generated for operand 0 (class A_REGS)
+     and this conflicts with all reloads.  Since X, Y, Z are used there
+     is not enough register in class A_REGS.
+
+     Assuming that 'operands[1]' does not refer to the stack (which 
+     is true for 68hc11 only, we save temporary the value of Y.
+
+     For 68HC12 we must also accept a constant because Z register is
+     disabled when compiling with -fomit-frame-pointer.  We can come up
+     with a reload problem and the *lshrsi3_const pattern was disabled
+     for that reason.  */
+  if (!Y_REG_P (operands[2]))
+    {
+      rtx ops[1];
+      int y_dead = dead_register_here (insn, iy_reg);
+
+      ops[0] = operands[1];
+      if (y_dead == 0)
+	{
+          output_asm_insn (\"pshy\", operands);
+          if (reg_mentioned_p (stack_pointer_rtx, operands[1]))
+	    ops[0] = adjust_address (operands[1], GET_MODE (operands[1]), 2);
+	}
+      output_asm_insn (\"ldy\\t%0\", ops);
+      output_asm_insn (\"bsr\\t___ashrsi3\", operands);
+      return y_dead == 0 ? \"puly\" : \"\";
+    }
+  return \"bsr\\t___ashrsi3\";
+}")
+
+(define_expand "ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "")
+	             (match_operand:QI 2 "general_operand" "")))]
+   ""
+   "")
+
+(define_insn "*ashrqi3_const1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,m,!u,!*q,!*A")
+	(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,0,0,0")
+		     (const_int 1)))]
+  ""
+  "@
+   asrb
+   asr\\t%b0
+   asr\\t%b0
+   asr%0
+   #")
+
+(define_insn "*ashrqi3_const"
+  [(set (match_operand:QI 0 "register_operand" "=d,!*q,!*A")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0")
+	             (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int i;
+  const char* insn_code;
+
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    insn_code = \"asrb\";
+  else if (DA_REG_P (operands[0]))
+    insn_code = \"asra\";
+  else
+    return \"#\";
+
+  i = INTVAL (operands[2]);
+  if (i > 8)
+    i = 8;
+  while (--i >= 0)
+    {
+      output_asm_insn (insn_code, operands);
+    }
+  return \"\";
+}")
+
+(define_insn "*ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,!*q,!*A")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0")
+	             (match_operand:QI 2 "nonimmediate_operand" 
+					 "m*u*d*A,m*u*d*A,m*u")))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  if (!D_REG_P (operands[0]) && !Q_REG_P (operands[0]))
+    return \"#\";
+
+  ops[0] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+  ops[1] = operands[2];
+  m68hc11_gen_movqi (insn, ops);
+
+  CC_STATUS_INIT;
+  return \"bsr\\t___ashrqi3\";
+}")
+
+;;--------------------------------------------------------------------
+;; logical shift instructions
+;;--------------------------------------------------------------------
+(define_expand "lshrdi3"
+  [(parallel [(set (match_operand:DI 0 "general_operand" "")
+	             (lshiftrt:DI (match_operand:DI 1 "general_operand" "")
+		                  (match_operand:HI 2 "general_operand" "")))
+              (clobber (match_scratch:HI 3 ""))])]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT 
+     || (INTVAL (operands[2]) != 32 && INTVAL (operands[2]) < 48
+         && INTVAL (operands[2]) != 1))
+    {
+      FAIL;
+    }
+}")
+
+(define_insn_and_split "*lshrdi3_const32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=<,m,u")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "umi,umi,umi")
+		     (const_int 32)))
+   (clobber (match_scratch:HI 2 "=&A,d,d"))]
+   ""
+   "#"
+   "reload_completed"
+   [(const_int 0)]
+   "m68hc11_split_move (m68hc11_gen_lowpart (SImode, operands[0]),
+		        m68hc11_gen_highpart (SImode, operands[1]),
+		        operands[2]);
+    m68hc11_split_move (m68hc11_gen_highpart (SImode, operands[0]),
+			const0_rtx, operands[2]);
+    DONE;")
+
+(define_insn "*lshrdi3_const63"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,u")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "umi,umi")
+		     (match_operand:DI 2 "const_int_operand" "")))
+   (clobber (match_scratch:HI 3 "=d,d"))]
+   "INTVAL (operands[2]) >= 48"
+   "#")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "")
+		     (match_operand:DI 2 "const_int_operand" "")))
+   (clobber (match_scratch:HI 3 "=d"))]
+   "z_replacement_completed && INTVAL (operands[2]) >= 56"
+   [(set (reg:QI D_REGNUM) (match_dup 9))
+    (set (reg:QI D_REGNUM) (lshiftrt:QI (reg:QI D_REGNUM) (match_dup 8)))
+    (set (reg:HI D_REGNUM) (zero_extend:HI (reg:QI D_REGNUM)))
+    (set (match_dup 4) (reg:HI D_REGNUM))
+    (set (reg:QI D_REGNUM) (const_int 0))
+    (set (match_dup 5) (reg:HI D_REGNUM))
+    (set (match_dup 6) (reg:HI D_REGNUM))
+    (set (match_dup 7) (reg:HI D_REGNUM))]
+   "operands[8] = GEN_INT (INTVAL (operands[2]) - 56);
+    operands[4] = m68hc11_gen_lowpart (SImode, operands[0]);
+    operands[5] = m68hc11_gen_highpart (HImode, operands[4]);
+    operands[4] = m68hc11_gen_lowpart (HImode, operands[4]);
+
+    operands[9] = m68hc11_gen_highpart (SImode, operands[1]);
+    operands[9] = m68hc11_gen_highpart (HImode, operands[9]);
+    operands[9] = m68hc11_gen_highpart (QImode, operands[9]);
+
+    operands[6] = m68hc11_gen_highpart (SImode, operands[0]);
+    operands[7] = m68hc11_gen_highpart (HImode, operands[6]);
+    operands[6] = m68hc11_gen_lowpart (HImode, operands[6]);")
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "")
+		     (match_operand:DI 2 "const_int_operand" "")))
+   (clobber (match_scratch:HI 3 "=d"))]
+   "z_replacement_completed && INTVAL (operands[2]) >= 48 
+    && INTVAL (operands[2]) < 56"
+   [(set (reg:HI D_REGNUM) (match_dup 9))
+    (set (reg:HI D_REGNUM) (lshiftrt:HI (reg:HI D_REGNUM) (match_dup 8)))
+    (set (match_dup 4) (reg:HI D_REGNUM))
+    (set (reg:HI D_REGNUM) (const_int 0))
+    (set (match_dup 5) (reg:HI D_REGNUM))
+    (set (match_dup 6) (reg:HI D_REGNUM))
+    (set (match_dup 7) (reg:HI D_REGNUM))]
+   "operands[8] = GEN_INT (INTVAL (operands[2]) - 48);
+    operands[4] = m68hc11_gen_lowpart (SImode, operands[0]);
+    operands[5] = m68hc11_gen_highpart (HImode, operands[4]);
+    operands[4] = m68hc11_gen_lowpart (HImode, operands[4]);
+
+    operands[9] = m68hc11_gen_highpart (SImode, operands[1]);
+    operands[9] = m68hc11_gen_highpart (HImode, operands[1]);
+    operands[6] = m68hc11_gen_highpart (SImode, operands[0]);
+    operands[7] = m68hc11_gen_highpart (HImode, operands[6]);
+    operands[6] = m68hc11_gen_lowpart (HImode, operands[6]);")
+
+(define_insn_and_split "*lshrdi_const1"
+  [(set (match_operand:DI 0 "non_push_operand" "=m,u")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "umi,umi")
+		     (const_int 1)))
+   (clobber (match_scratch:HI 2 "=d,d"))]
+   ""
+   "#"
+   "z_replacement_completed == 2"
+   [(set (match_dup 2) (match_dup 3))
+    (set (match_dup 2) (lshiftrt:HI (match_dup 2) (const_int 1)))
+    (set (match_dup 4) (match_dup 2))
+
+    (set (match_dup 2) (match_dup 5))
+    (parallel [(set (match_dup 2) (rotatert:HI (match_dup 2) (const_int 1)))
+               (clobber (reg:HI CC_REGNUM))])
+    (set (match_dup 6) (match_dup 2))
+
+    (set (match_dup 2) (match_dup 7))
+    (parallel [(set (match_dup 2) (rotatert:HI (match_dup 2) (const_int 1)))
+               (clobber (reg:HI CC_REGNUM))])
+    (set (match_dup 8) (match_dup 2))
+
+    (set (match_dup 2) (match_dup 9))
+    (parallel [(set (match_dup 2) (rotatert:HI (match_dup 2) (const_int 1)))
+               (clobber (reg:HI CC_REGNUM))])
+    (set (match_dup 10) (match_dup 2))]
+   "operands[3] = m68hc11_gen_highpart (SImode, operands[1]);
+    operands[5] = m68hc11_gen_lowpart (HImode, operands[3]);
+    operands[3] = m68hc11_gen_highpart (HImode, operands[3]);
+
+    operands[4] = m68hc11_gen_highpart (SImode, operands[0]);
+    operands[6] = m68hc11_gen_lowpart (HImode, operands[4]);
+    operands[4] = m68hc11_gen_highpart (HImode, operands[4]);
+
+    operands[7] = m68hc11_gen_lowpart (SImode, operands[1]);
+    operands[9] = m68hc11_gen_lowpart (HImode, operands[7]);
+    operands[7] = m68hc11_gen_highpart (HImode, operands[7]);
+
+    operands[8] = m68hc11_gen_lowpart (SImode, operands[0]);
+    operands[10] = m68hc11_gen_lowpart (HImode, operands[8]);
+    operands[8] = m68hc11_gen_highpart (HImode, operands[8]);")
+
+(define_expand "lshrsi3"
+  [(parallel
+       [(set (match_dup 0) (match_operand:SI 1 "general_operand" ""))
+	(clobber (scratch:HI))])
+   (parallel
+       [(set (match_operand:SI 0 "register_operand" "")
+	     (lshiftrt:SI (match_dup 0)
+		          (match_operand:HI 2 "general_operand" "")))
+        (clobber (scratch:HI))])]
+   ""
+   "")
+
+(define_split
+  [(set (match_operand:SI 0 "non_push_operand" "")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand" "")
+	             (const_int 16)))
+   (clobber (match_scratch:HI 3 ""))]
+   "reload_completed && !(X_REG_P (operands[0]) && X_REG_P (operands[1]))"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (const_int 0))]
+   "operands[4] = m68hc11_gen_highpart (HImode, operands[0]);
+    operands[2] = m68hc11_gen_lowpart (HImode, operands[0]);
+    operands[3] = m68hc11_gen_highpart (HImode, operands[1]);")
+
+(define_insn "*lshrsi3_const16"
+  [(set (match_operand:SI 0 "non_push_operand" "=D,D,m,u")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand" "uim,0,D,D")
+	             (const_int 16)))
+   (clobber (match_scratch:HI 2 "=X,X,X,X"))]
+   ""
+   "@
+    #
+    xgdx\\n\\tldx\\t#0
+    #
+    #")
+
+(define_insn "*lshrsi3_const1"
+  [(set (match_operand:SI 0 "non_push_operand" "=D,D,D,m,*u,*u")
+	(lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,*u,m,*u,m")
+	             (const_int 1)))
+   (clobber (match_scratch:HI 2 "=X,X,X,&d,&d,&d"))]
+   ""
+   "*
+{
+  CC_STATUS_INIT;
+  if (X_REG_P (operands[1]))
+    {
+      return \"xgdx\\n\\tlsrd\\n\\txgdx\\n\\trora\\n\\trorb\";
+    }
+  else
+    {
+      rtx ops[2];
+
+      ops[1] = m68hc11_gen_highpart (HImode, operands[1]);
+      ops[0] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+      m68hc11_gen_movhi (insn, ops);
+      output_asm_insn (\"lsrd\", ops);
+      if (!X_REG_P (operands[0]))
+	{
+	  ops[1] = ops[0];
+	  ops[0] = m68hc11_gen_highpart (HImode, operands[0]);
+	  m68hc11_gen_movhi (insn, ops);
+	  ops[0] = ops[1];
+          ops[1] = m68hc11_gen_lowpart (HImode, operands[1]);
+          m68hc11_gen_movhi (insn, ops);
+	}
+      else
+	{
+	  /* Load the lowpart in X in case the operands is some N,x.  */
+	  ops[0] = gen_rtx_REG (HImode, HARD_X_REGNUM);
+          ops[1] = m68hc11_gen_lowpart (HImode, operands[1]);
+          m68hc11_gen_movhi (insn, ops);
+          output_asm_insn (\"xgdx\", ops);
+	}
+      output_asm_insn (\"rora\", ops);
+      output_asm_insn (\"rorb\", ops);
+      if (!X_REG_P (operands[0]))
+	{
+	  ops[1] = ops[0];
+	  ops[0] = m68hc11_gen_lowpart (HImode, operands[0]);
+	  m68hc11_gen_movhi (insn, ops);
+	}
+      return \"\";
+    }
+}")
+
+(define_insn "*lshrsi3_const"
+  [(set (match_operand:SI 0 "register_operand" "+D")
+	(lshiftrt:SI (match_dup 0)
+	             (match_operand:HI 1 "const_int_operand" "")))
+   (clobber (match_scratch:HI 2 "=y"))]
+   "TARGET_M6811 /* See *lshrsi3 note.  */"
+   "*
+{
+  CC_STATUS_INIT;
+  return \"ldy\\t%1\\n\\tbsr\\t___lshrsi3\";
+}")
+
+(define_insn "*lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "+D,D")
+	(lshiftrt:SI (match_dup 0)
+	             (match_operand:HI 1 "general_operand" "y,mi")))
+   (clobber (match_scratch:HI 2 "=1,X"))]
+   ""
+   "*
+{
+  CC_STATUS_INIT;
+  /* There is a reload problem if we don't accept 'm' for the shift value.
+     A RELOAD_OTHER reload can be generated for operand 0 (class A_REGS)
+     and this conflicts with all reloads.  Since X, Y, Z are used there
+     is not enough register in class A_REGS.
+
+     Assuming that 'operands[1]' does not refer to the stack (which 
+     is true for 68hc11 only, we save temporary the value of Y.
+
+     For 68HC12 we must also accept a constant because Z register is
+     disabled when compiling with -fomit-frame-pointer.  We can come up
+     with a reload problem and the *lshrsi3_const pattern was disabled
+     for that reason.  */
+  if (!Y_REG_P (operands[2]))
+    {
+      rtx ops[1];
+      int y_dead = dead_register_here (insn, iy_reg);
+
+      ops[0] = operands[1];
+      if (y_dead == 0)
+	{
+          output_asm_insn (\"pshy\", operands);
+          if (reg_mentioned_p (stack_pointer_rtx, operands[1]))
+	    ops[0] = adjust_address (operands[1], GET_MODE (operands[1]), 2);
+	}
+      output_asm_insn (\"ldy\\t%0\", ops);
+      output_asm_insn (\"bsr\\t___lshrsi3\", operands);
+      return y_dead == 0 ? \"puly\" : \"\";
+    }
+  return \"bsr\\t___lshrsi3\";
+}")
+
+(define_expand "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand" "")
+	             (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+      operand1 = force_reg (HImode, operand1);
+
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+		 gen_rtvec (2, gen_rtx_SET (VOIDmode,
+					operand0,
+					gen_rtx_LSHIFTRT (HImode,
+						operand1, scratch)),
+			      gen_rtx_CLOBBER (VOIDmode, scratch))));
+     DONE;
+  }
+}")
+
+(define_insn "lshrhi3_const1"
+  [(set (match_operand:HI 0 "non_push_operand" "=dm,!*u*A")
+	(lshiftrt:HI (match_operand:HI 1 "non_push_operand" "0,0")
+		     (const_int 1)))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  if (D_REG_P (operands[0]))
+    return \"lsrd\";
+
+  CC_STATUS_INIT;
+  return \"lsr\\t%h0\\n\\tror\\t%b0\";
+}")
+
+(define_insn "lshrhi3_const"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,!*A,!*A")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand" "dm*A,!u,dm,!u")
+		     (match_operand:HI 2 "const_int_operand" "i,i,i,i")))]
+  ""
+  "*
+{
+  int val = INTVAL (operands[2]);
+
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  if (val >= 8)
+    {
+      if (val == 8)
+        CC_STATUS_INIT;
+
+      if (!H_REG_P (operands[1]))
+	{
+          output_asm_insn (\"clra\", operands);
+          output_asm_insn (\"ldab\\t%h1\", operands);
+        }
+      else if (A_REG_P (operands[1]))
+	{
+	  output_asm_insn (\"st%1\\t%t0\", operands);
+	  output_asm_insn (\"ldab\\t%t0\", operands);
+	  output_asm_insn (\"clra\", operands);
+	}
+      else
+        {
+          output_asm_insn (\"tab\", operands);
+          output_asm_insn (\"clra\", operands);
+        }
+      val -= 8;
+      switch (val) 
+	{
+	case 7:
+	  output_asm_insn (\"rolb\", operands);
+	  output_asm_insn (\"tab\", operands);
+	  output_asm_insn (\"rolb\", operands);
+	  break;
+
+	case 6:
+	  output_asm_insn (\"rolb\", operands);
+	  output_asm_insn (\"rolb\", operands);
+	  output_asm_insn (\"rolb\", operands);
+	  output_asm_insn (\"andb\\t#3\", operands);
+	  break;
+
+	default:
+	   while (val > 0)
+	     {
+	        val --;
+	        output_asm_insn (\"lsrb\", operands);
+             }
+	   break;
+        }
+      return \"\";
+    }
+
+  if (!D_REG_P (operands[1]))
+    m68hc11_gen_movhi (insn, operands);
+  switch (val)
+    {
+    case 7:
+      output_asm_insn (\"rolb\", operands);
+      output_asm_insn (\"tab\", operands);
+      output_asm_insn (\"rolb\", operands);
+      output_asm_insn (\"rola\", operands);
+      output_asm_insn (\"rola\", operands);
+      output_asm_insn (\"anda\\t#1\", operands);
+      CC_STATUS_INIT;
+      break;
+
+    default:
+      while (val > 0) 
+	{
+	  val --;
+	  output_asm_insn (\"lsrd\", operands);
+	}
+     }
+  return \"\";
+}")
+
+(define_insn "*lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0,0")
+		     (match_operand:HI 2 "register_operand" "+x,+d")))
+   (clobber (match_dup 2))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  return \"bsr\\t___lshrhi3\";
+}")
+
+(define_expand "lshrqi3"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "")
+	             (match_operand:QI 2 "general_operand" "")))]
+   ""
+   "")
+
+(define_insn "*lshrqi3_const1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d,!u,!*q,!*A")
+	(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,0,0,0")
+		     (const_int 1)))]
+  ""
+  "@
+   lsr\\t%b0
+   lsrb
+   lsr\\t%b0
+   lsr%0
+   #")
+
+(define_insn "*lshrqi3_const"
+  [(set (match_operand:QI 0 "register_operand" "=d,!*q,!*A")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0")
+		     (match_operand:QI 2 "const_int_operand" "")))]
+  ""
+  "*
+{
+  int i;
+  const char* insn_code;
+
+  if (D_REG_P (operands[0]) || DB_REG_P (operands[0]))
+    insn_code = \"lsrb\";
+  else if (DA_REG_P (operands[0]))
+    insn_code = \"lsra\";
+  else
+    return \"#\";
+
+  i = INTVAL (operands[2]);
+  if (i >= 8)
+    {
+      if (DA_REG_P (operands[0]))
+        return \"clra\";
+      else
+        return \"clrb\";
+    }
+  else if (i == 7)
+    {
+      if (DA_REG_P (operands[0]))
+        {
+          output_asm_insn (\"rola\", operands);
+          output_asm_insn (\"ldaa\\t#0\", operands);
+          return \"rola\";
+        }
+      else
+        {
+          output_asm_insn (\"rolb\", operands);
+          output_asm_insn (\"ldab\\t#0\", operands);
+          return \"rolb\";
+        }
+    }
+  else if (i == 6)
+    {
+      if (DA_REG_P (operands[0]))
+        {
+          output_asm_insn (\"rola\", operands);
+          output_asm_insn (\"rola\", operands);
+          output_asm_insn (\"rola\", operands);
+          return \"anda\\t#3\";
+        }
+      else
+        {
+          output_asm_insn (\"rolb\", operands);
+          output_asm_insn (\"rolb\", operands);
+          output_asm_insn (\"rolb\", operands);
+          return \"andb\\t#3\";
+        }
+    }
+  while (--i >= 0)
+    {
+      output_asm_insn (insn_code, operands);
+    }
+  return \"\";
+}")
+
+(define_insn "*lshrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,!*q,!*A")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0,0,0")
+		     (match_operand:QI 2 "nonimmediate_operand" 
+					 "m*u*d*A,m*u*d*A,m*u")))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  if (!D_REG_P (operands[0]) && !Q_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  ops[0] = gen_rtx_REG (QImode, HARD_A_REGNUM);
+  ops[1] = operands[2];
+  m68hc11_gen_movqi (insn, ops);
+
+  if (!optimize || optimize_size)
+    {
+      return \"bsr\\t___lshrqi3\";
+    }
+
+  ops[0] = gen_label_rtx ();
+  ops[1] = gen_label_rtx ();
+  output_asm_insn (\"ble\\t%l1\", ops);
+
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+			     CODE_LABEL_NUMBER (ops[0]));
+
+  output_asm_insn (\"lsrb\", operands);
+  output_asm_insn (\"deca\", operands);
+  output_asm_insn (\"bne\\t%l0\", ops);
+
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+			     CODE_LABEL_NUMBER (ops[1]));
+  return \"\";
+}")
+
+(define_insn "*rotlqi3_with_carry"
+  [(set (match_operand:QI 0 "register_operand" "=d,!q")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0,0")
+		   (reg:QI CC_REGNUM)))]
+  ""
+  "*
+{
+  if (DA_REG_P (operands[0]))
+    return \"rola\";
+  else
+    return \"rolb\";
+}")
+
+(define_insn "*rotlhi3_with_carry"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:HI CC_REGNUM))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  return \"rolb\\n\\trola\";
+}")
+
+(define_insn "*rotrhi3_with_carry"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotatert:HI (match_operand:HI 1 "register_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:HI CC_REGNUM))]
+  ""
+  "*
+{
+  CC_STATUS_INIT;
+  return \"rora\\n\\trorb\";
+}")
+
+(define_insn "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,!q")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0,0")
+		   (match_operand:QI 2 "const_int_operand" "i,i")))]
+  ""
+  "*
+{
+  m68hc11_gen_rotate (ROTATE, insn, operands);
+  return \"\";
+}")
+
+(define_insn "rotrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,!q")
+	(rotatert:QI (match_operand:QI 1 "register_operand" "0,0")
+		     (match_operand:QI 2 "const_int_operand" "i,i")))]
+  ""
+  "*
+{
+  m68hc11_gen_rotate (ROTATERT, insn, operands);
+  return \"\";
+}")
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(rotate:HI (match_operand:HI 1 "register_operand" "")
+	           (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+      operand1 = force_reg (HImode, operand1);
+
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+		 gen_rtvec (2, gen_rtx_SET (VOIDmode,
+					operand0,
+					gen_rtx_ROTATE (HImode,
+						operand1, scratch)),
+			      gen_rtx_CLOBBER (VOIDmode, scratch))));
+      DONE;
+    }
+}")
+
+(define_insn "rotlhi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "const_int_operand" "i")))]
+  ""
+  "*
+{
+  m68hc11_gen_rotate (ROTATE, insn, operands);
+  return \"\";
+}")
+
+(define_insn "*rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0,0")
+		   (match_operand:HI 2 "general_operand" "+x,+d")))
+   (clobber (match_dup 2))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  return \"bsr\\t___rotlhi3\";
+}")
+
+(define_expand "rotrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(rotatert:HI (match_operand:HI 1 "general_operand" "")
+	             (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+      operand1 = force_reg (HImode, operand1);
+
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+		 gen_rtvec (2, gen_rtx_SET (VOIDmode,
+					operand0,
+					gen_rtx_ROTATERT (HImode,
+						operand1, scratch)),
+			      gen_rtx_CLOBBER (VOIDmode, scratch))));
+      DONE;
+    }
+}")
+
+(define_insn "rotrhi3_const"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotatert:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "const_int_operand" "i")))]
+  ""
+  "*
+{
+  m68hc11_gen_rotate (ROTATERT, insn, operands);
+  return \"\";
+}")
+
+(define_insn "*rotrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(rotatert:HI (match_operand:HI 1 "register_operand" "0,0")
+		     (match_operand:HI 2 "general_operand" "+x,+d")))
+   (clobber (match_dup 2))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  return \"bsr\\t___rotrhi3\";
+}")
+
+;; Split a shift operation on an address register in a shift
+;; on D_REGNUM.
+(define_split /* "*rotrhi3_addr" */
+  [(set (match_operand:HI 0 "hard_addr_reg_operand" "")
+	(match_operator:HI 3 "m68hc11_shift_operator"
+	    [(match_operand:HI 1 "register_operand" "")
+	     (match_operand:HI 2 "register_operand" "")]))
+   (clobber (match_dup 2))]
+  "z_replacement_completed == 2"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (parallel [(set (reg:HI D_REGNUM) 
+		   (match_op_dup 3 [(reg:HI D_REGNUM) (match_dup 0)]))
+	      (clobber (match_dup 0))])
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "")
+
+;;--------------------------------------------------------------------
+;;-  68HC12 Decrement/Increment and branch
+;;--------------------------------------------------------------------
+;; These patterns are used by loop optimization as well as peephole2
+;; They must handle reloading themselves and the scratch register
+;; is used for that.  Even if we accept memory operand, we must not
+;; accept them on the predicate because it might create too many reloads.
+;; (specially on HC12 due to its auto-incdec addressing modes).
+;;
+(define_expand "decrement_and_branch_until_zero"
+  [(parallel [(set (pc)
+		   (if_then_else
+		    (ne (plus:HI (match_operand:HI 0 "register_operand" "")
+				 (const_int 0))
+			(const_int 1))
+		    (label_ref (match_operand 1 "" ""))
+		    (pc)))
+	      (set (match_dup 0)
+		   (plus:HI (match_dup 0)
+			    (const_int -1)))
+	      (clobber (match_scratch:HI 2 ""))])]
+  "TARGET_M6812"
+  "")
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))	; iterations; zero if unknown
+   (use (match_operand 2 "" ""))	; max iterations
+   (use (match_operand 3 "" ""))	; loop level
+   (use (match_operand 4 "" ""))]	; label
+  "TARGET_M6812"
+  "
+{
+  /* Reject non-constant loops as it generates bigger code due to
+     the handling of the loop register.  We can do better by using
+     the peephole2 dbcc/ibcc patterns.  */
+  if (INTVAL (operands[1]) == 0)
+    {
+      FAIL;
+    }
+
+  /* Note that for xxx_dbcc_dec_yy the gen_rtx_NE is only used to pass
+     the operator and its operands are not relevant.  */
+  if (GET_MODE (operands[0]) == HImode)
+    {
+      emit_jump_insn (gen_m68hc12_dbcc_dec_hi (operands[0],
+					       gen_rtx_NE (HImode,
+							   operands[0],
+							   const1_rtx),
+					       operands[4]));
+      DONE;
+    }
+  if (GET_MODE (operands[0]) == QImode)
+    {
+      emit_jump_insn (gen_m68hc12_dbcc_dec_qi (operands[0],
+					       gen_rtx_NE (QImode,
+							   operands[0],
+							   const1_rtx),
+					       operands[4]));
+      DONE;
+    }
+
+  FAIL;
+}")
+
+;; Decrement-and-branch insns.
+(define_insn "m68hc12_dbcc_dec_hi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:HI 0 "register_operand" "+dxy,m*u*z")
+	      (const_int 1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0) (const_int -1)))
+   (clobber (match_scratch:HI 3 "=X,dxy"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!H_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"dbeq\\t%0,%l2\";
+  else
+    return \"dbne\\t%0,%l2\";
+}")
+
+;; Decrement-and-branch insns.
+(define_insn "m68hc12_dbcc_inc_hi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:HI 0 "register_operand" "+dxy,m*u*z")
+	      (const_int -1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0) (const_int 1)))
+   (clobber (match_scratch:HI 3 "=X,dxy"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!H_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"ibeq\\t%0,%l2\";
+  else
+    return \"ibeq\\t%0,%l2\";
+}")
+
+;; Decrement-and-branch (QImode).
+(define_insn "m68hc12_dbcc_dec_qi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:QI 0 "register_operand" "+d,m*u*A")
+	      (const_int 1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:QI (match_dup 0) (const_int -1)))
+   (clobber (match_scratch:QI 3 "=X,d"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!D_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"dbeq\\tb,%l2\";
+  else
+    return \"dbne\\tb,%l2\";
+}")
+
+;; Increment-and-branch (QImode).
+(define_insn "m68hc12_dbcc_inc_qi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:QI 0 "register_operand" "+d,m*u*A")
+	      (const_int -1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:QI (match_dup 0) (const_int 1)))
+   (clobber (match_scratch:QI 3 "=X,d"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!D_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"ibeq\\tb,%l2\";
+  else
+    return \"ibeq\\tb,%l2\";
+}")
+
+;; Split the above to handle the case where operand 0 is in memory
+;; (a register that couldn't get a hard register)
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "m68hc11_eq_compare_operator"
+	     [(match_operand:HI 0 "general_operand" "")
+	      (match_operand:HI 1 "const_int_operand" "")])
+	 (label_ref (match_operand 4 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0) (match_operand 2 "const_int_operand" "")))
+   (clobber (match_operand:HI 5 "hard_reg_operand" ""))]
+  "TARGET_M6812 && reload_completed"
+  [(set (match_dup 5) (match_dup 0))
+   (set (match_dup 5) (plus:HI (match_dup 5) (match_dup 2)))
+   (set (match_dup 0) (match_dup 5))
+   (set (pc)
+	(if_then_else (match_op_dup 3
+			    [(match_dup 5) (const_int 0)])
+		      (label_ref (match_dup 4)) (pc)))]
+  "")
+
+;; Split the above to handle the case where operand 0 is in memory
+;; (a register that couldn't get a hard register)
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "m68hc11_eq_compare_operator"
+	     [(match_operand:QI 0 "general_operand" "")
+	      (match_operand:QI 1 "const_int_operand" "")])
+	 (label_ref (match_operand 4 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:QI (match_dup 0) (match_operand 2 "const_int_operand" "")))
+   (clobber (match_operand:QI 5 "hard_reg_operand" ""))]
+  "TARGET_M6812 && reload_completed"
+  [(set (match_dup 5) (match_dup 0))
+   (set (match_dup 5) (plus:QI (match_dup 5) (match_dup 2)))
+   (set (match_dup 0) (match_dup 5))
+   (set (pc)
+	(if_then_else (match_op_dup 3
+			    [(match_dup 5) (const_int 0)])
+		      (label_ref (match_dup 4)) (pc)))]
+  "")
+
+;;--------------------------------------------------------------------
+;;-  Jumps and transfers
+;;--------------------------------------------------------------------
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "bra\\t%l0")
+
+(define_expand "cbranchsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "tst_operand" "")
+		 (match_operand:SI 2 "cmp_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
+    operands[1] = force_reg (SImode, operands[1]);
+
+  m68hc11_expand_compare_and_branch (GET_CODE (operands[0]), operands[1],
+				     operands[2], operands[3]);
+  DONE;
+}")
+
+(define_expand "cbranchhi4"
+  [(set (cc0)
+	(compare (match_operand:HI 1 "tst_operand" "")
+		 (match_operand:HI 2 "cmp_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
+    operands[1] = force_reg (HImode, operands[1]);
+
+  m68hc11_expand_compare_and_branch (GET_CODE (operands[0]), operands[1],
+				     operands[2], operands[3]);
+  DONE;
+}")
+
+(define_expand "cbranchqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 1 "tst_operand" "")
+		 (match_operand:QI 2 "cmp_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
+    operands[1] = force_reg (QImode, operands[1]);
+
+  m68hc11_expand_compare_and_branch (GET_CODE (operands[0]), operands[1],
+				     operands[2], operands[3]);
+  DONE;
+}")
+
+
+;;
+;; Test and branch instructions for 68HC12 for EQ and NE.
+;; 'z' must not appear in the constraints because the z replacement 
+;; pass does not know how to restore the replacement register.
+;;
+(define_insn "*tbeq"
+  [(set (pc)
+	(if_then_else (eq (match_operand:HI 0 "register_operand" "dxy")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_M6812"
+  "*
+{
+   /* If the flags are already set correctly, use 'bne/beq' which are
+      smaller and a little bit faster.  This happens quite often due
+      to reloading of operands[0].  In that case, flags are set correctly
+      due to the load instruction.  */
+  if ((cc_status.value1 && rtx_equal_p (cc_status.value1, operands[0]))
+      || (cc_status.value2 && rtx_equal_p (cc_status.value2, operands[0])))
+    return \"beq\\t%l1\";
+  else
+    return \"tbeq\\t%0,%l1\";
+}")
+
+(define_insn "*tbne"
+  [(set (pc)
+	(if_then_else (ne (match_operand:HI 0 "register_operand" "dxy")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_M6812"
+  "*
+{
+   if ((cc_status.value1 && rtx_equal_p (cc_status.value1, operands[0]))
+       || (cc_status.value2 && rtx_equal_p (cc_status.value2, operands[0])))
+     return \"bne\\t%l1\";
+   else
+     return \"tbne\\t%0,%l1\";
+}")
+
+;;
+;; Test and branch with 8-bit register.  Register must be B (or A).
+;;
+(define_insn "*tbeq8"
+  [(set (pc)
+	(if_then_else (eq (match_operand:QI 0 "register_operand" "d")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_M6812"
+  "*
+{
+   if ((cc_status.value1 && rtx_equal_p (cc_status.value1, operands[0]))
+       || (cc_status.value2 && rtx_equal_p (cc_status.value2, operands[0])))
+     return \"beq\\t%l1\";
+   else
+     return \"tbeq\\tb,%l1\";
+}")
+
+(define_insn "*tbne8"
+  [(set (pc)
+	(if_then_else (ne (match_operand:QI 0 "register_operand" "d")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_M6812"
+  "*
+{
+   if ((cc_status.value1 && rtx_equal_p (cc_status.value1, operands[0]))
+       || (cc_status.value2 && rtx_equal_p (cc_status.value2, operands[0])))
+     return \"bne\\t%l1\";
+   else
+     return \"tbne\\tb,%l1\";
+}")
+
+(define_insn "*beq"
+  [(set (pc)
+	(if_then_else (eq (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "beq\\t%l0")
+
+(define_insn "*bne"
+  [(set (pc)
+	(if_then_else (ne (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bne\\t%l0")
+
+(define_insn "*bgt"
+  [(set (pc)
+	(if_then_else (gt (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bgt\\t%l0")
+
+(define_insn "*bgtu"
+  [(set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bhi\\t%l0")
+
+(define_insn "*blt"
+  [(set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)
+    return \"bmi\\t%l0\";
+  else
+    return \"blt\\t%l0\";
+}")
+
+(define_insn "*bltu"
+  [(set (pc)
+	(if_then_else (ltu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "blo\\t%l0")
+
+(define_insn "*bge"
+  [(set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)
+    return \"bpl\\t%l0\";
+  else
+    return \"bge\\t%l0\";
+}")
+
+(define_insn "*bgeu"
+  [(set (pc)
+	(if_then_else (geu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bhs\\t%l0")
+
+(define_insn "*ble"
+  [(set (pc)
+	(if_then_else (le (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)
+    return \"bmi\\t%l0\\n\\tbeq\\t%l0\";
+  else
+    return \"ble\\t%l0\";
+}")
+
+(define_insn "*bleu"
+  [(set (pc)
+	(if_then_else (leu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bls\\t%l0")
+
+;;--------------------------------------------------------------------
+;;- Negative test and branch
+;;--------------------------------------------------------------------
+(define_insn ""
+  [(set (pc)
+	(if_then_else (eq (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bne\\t%l0")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (ne (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "beq\\t%l0")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (gt (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)
+    return \"bmi\\t%l0\\n\\tbeq\\t%l0\";
+  else
+    return \"ble\\t%l0\";
+}")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bls\\t%l0")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)
+    return \"bpl\\t%l0\";
+  else
+    return \"bge\\t%l0\";
+}")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (ltu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bhs\\t%l0")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)
+    return \"bmi\\t%l0\";
+  else
+    return \"blt\\t%l0\";
+}")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (geu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "blo\\t%l0")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (le (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bgt\\t%l0")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (leu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bhi\\t%l0")
+
+;;--------------------------------------------------------------------
+;;-  Calls
+;;--------------------------------------------------------------------
+;;
+;;- Call a function that returns no value.
+(define_insn "call"
+  [(call (match_operand:QI 0 "memory_operand" "m")
+	 (match_operand:SI 1 "general_operand" "g"))]
+  ;; Operand 1 not really used on the m68hc11.
+  ""
+ "*
+{
+  if (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      if (m68hc11_is_far_symbol (operands[0]))
+        {
+          if (TARGET_M6812)
+            {
+	      output_asm_insn (\"call\\t%0\", operands);
+	      return \"\";
+	    }
+          else
+	    {
+	      output_asm_insn (\"pshb\", operands);
+	      output_asm_insn (\"ldab\\t#%%page(%0)\", operands);
+	      output_asm_insn (\"ldy\\t#%%addr(%0)\", operands);
+	      return \"jsr\\t__call_a32\";
+	    }
+	}
+      if (m68hc11_is_trap_symbol (operands[0]))
+        return \"swi\";
+      else
+        return \"bsr\\t%0\";
+    }
+  else
+    {
+      return \"jsr\\t%0\";
+    }
+}")
+
+(define_insn "call_value"
+  [(set (match_operand 0 "" "=g")
+	(call (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "general_operand" "g")))]
+  ""
+ "*
+{
+  if (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      if (m68hc11_is_far_symbol (operands[1]))
+        {
+          if (TARGET_M6812)
+            {
+	      output_asm_insn (\"call\\t%1\", operands);
+	      return \"\";
+	    }
+          else
+	    {
+	      output_asm_insn (\"pshb\", operands);
+	      output_asm_insn (\"ldab\\t#%%page(%1)\", operands);
+	      output_asm_insn (\"ldy\\t#%%addr(%1)\", operands);
+	      return \"jsr\\t__call_a32\";
+	    }
+	}
+      if (m68hc11_is_trap_symbol (operands[1]))
+        return \"swi\";
+      else
+        return \"bsr\\t%1\";
+    }
+  else
+    {
+      return \"jsr\\t%1\";
+    }
+}")
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  "")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+    
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "
+{
+  expand_prologue (); 
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  expand_epilogue ();
+  DONE;
+}")
+
+;; Used for frameless functions which save no regs and allocate no locals.
+(define_expand "return"
+  [(return)]
+  "reload_completed && m68hc11_total_frame_size () == 0"
+  "
+{
+  int ret_size = 0;
+
+  if (crtl->return_rtx)
+    ret_size = GET_MODE_SIZE (GET_MODE (crtl->return_rtx));
+
+  /* Emit use notes only when HAVE_return is true.  */
+  if (m68hc11_total_frame_size () != 0)
+    ret_size = 0;
+
+  if (ret_size && ret_size <= 2)
+    {
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode,
+		      gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
+			         gen_rtx_USE (VOIDmode,
+					      gen_rtx_REG (HImode, 1)))));
+      DONE;
+    }
+  if (ret_size)
+    {
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode,
+		      gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
+			         gen_rtx_USE (VOIDmode,
+					      gen_rtx_REG (SImode, 0)))));
+      DONE;
+    }
+}")
+
+(define_insn "*return_void"
+  [(return)]
+  "reload_completed"
+  "*
+{
+  rtx next = next_active_insn (insn);
+
+  if (next
+      && GET_CODE (next) == JUMP_INSN
+      && GET_CODE (PATTERN (next)) == RETURN)
+    return \"\";
+  if (current_function_interrupt || current_function_trap)
+    return \"rti\";
+  else if (!current_function_far)
+    return \"rts\";
+  else if (TARGET_M6812)
+    return \"rtc\";
+  else
+    {
+      int ret_size = 0;
+
+      if (crtl->return_rtx)
+        ret_size = GET_MODE_SIZE (GET_MODE (crtl->return_rtx));
+
+      if (ret_size == 0)
+        return \"jmp\\t__return_void\";
+      if (ret_size <= 2)
+        return \"jmp\\t__return_16\";
+      if (ret_size <= 4)
+        return \"jmp\\t__return_32\";
+      return \"jmp\\t__return_16\";
+    }
+}")
+
+(define_insn "*return_16bit"
+  [(return)
+   (use (reg:HI D_REGNUM))]
+  "reload_completed && m68hc11_total_frame_size () == 0"
+  "*
+{
+  rtx next = next_active_insn (insn);
+
+  if (next
+      && GET_CODE (next) == JUMP_INSN
+      && GET_CODE (PATTERN (next)) == RETURN)
+    return \"\";
+  if (current_function_interrupt || current_function_trap)
+    return \"rti\";
+  else if (!current_function_far)
+    return \"rts\";
+  else if (TARGET_M6812)
+    return \"rtc\";
+  else
+    return \"jmp\\t__return_16\";
+}")
+
+(define_insn "*return_32bit"
+  [(return)
+   (use (reg:SI 0))]
+  "reload_completed && m68hc11_total_frame_size () == 0"
+  "*
+{
+  rtx next = next_active_insn (insn);
+
+  if (next
+      && GET_CODE (next) == JUMP_INSN
+      && GET_CODE (PATTERN (next)) == RETURN)
+    return \"\";
+  if (current_function_interrupt || current_function_trap)
+    return \"rti\";
+  else if (!current_function_far)
+    return \"rts\";
+  else if (TARGET_M6812)
+    return \"rtc\";
+  else
+    return \"jmp\\t__return_32\";
+}")
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "nonimmediate_operand" "xy"))]
+  ""
+  "jmp\\t0,%0")
+
+;;--------------------------------------------------------------------
+;;-  Table jump
+;;--------------------------------------------------------------------
+;;
+;; Operand 0 is the address of the table element to use
+;; operand 1 is the CODE_LABEL for the table
+;;--------------------------------------------------------------------
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+  "")
+
+(define_insn "*jump_indirect"
+   [(parallel [
+	(set (pc) (match_operand:HI 0 "register_operand" "xy"))
+	(use (label_ref (match_operand 1 "" "")))])]
+   ""
+  "jmp\\t0,%0")
+
+;;--------------------------------------------------------------------
+;;- Peepholes
+;;--------------------------------------------------------------------
+
+;;--------------------------------------------------------------------
+;;- 68HC12 dbcc/ibcc peepholes
+;;--------------------------------------------------------------------
+;;
+;; Replace: "addd #-1; bne L1" into "dbne d,L1"
+;;          "addd #-1; beq L1" into "dbeq d,L1"
+;;          "addd #1; bne L1" into "ibne d,L1"
+;;          "addd #1; beq L1" into "ibeq d,L1"
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 1 "const_int_operand" "")))
+   (set (pc)
+        (if_then_else (match_operator 2 "m68hc11_eq_compare_operator"
+		         [(match_dup 0)
+			  (const_int 0)])
+		      (label_ref (match_operand 3 "" "")) (pc)))]
+  "TARGET_M6812 && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)"
+  [(parallel [
+      (set (pc) (if_then_else (match_op_dup 2 [(match_dup 0) (match_dup 5)])
+			      (label_ref (match_dup 3)) (pc)))
+      (set (match_dup 0) (plus:HI (match_dup 0) (match_dup 1)))
+      (clobber (match_dup 4))])]
+  "operands[4] = gen_rtx_SCRATCH(HImode);
+   operands[5] = GEN_INT (-INTVAL (operands[1]));")
+
+
+;;
+;; Replace: "addb #-1; bne L1" into "dbne b,L1"
+;;          "addb #-1; beq L1" into "dbeq b,L1"
+;;
+(define_peephole2
+  [(set (match_operand:QI 0 "hard_reg_operand" "")
+	(plus:QI (match_dup 0)
+	         (match_operand:QI 1 "const_int_operand" "")))
+   (set (pc)
+        (if_then_else (match_operator 2 "m68hc11_eq_compare_operator"
+		         [(match_dup 0)
+			  (const_int 0)])
+		      (label_ref (match_operand 3 "" "")) (pc)))]
+  "TARGET_M6812 && D_REG_P (operands[0])
+   && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)"
+  [(parallel [
+      (set (pc) (if_then_else (match_op_dup 2 [(match_dup 0) (match_dup 5)])
+			      (label_ref (match_dup 3)) (pc)))
+      (set (match_dup 0) (plus:QI (match_dup 0) (match_dup 1)))
+      (clobber (match_dup 4))])]
+  "operands[4] = gen_rtx_SCRATCH(QImode);
+   operands[5] = GEN_INT (-INTVAL (operands[1]));")
+
+
+;;--------------------------------------------------------------------
+;;- Move peephole2
+;;--------------------------------------------------------------------
+
+;;
+;; Replace "leas 2,sp" with a "pulx" or a "puly".
+;; On 68HC12, this is one cycle slower but one byte smaller.
+;; pr target/6899: This peephole was not valid because a register CSE
+;; pass removes the pulx/puly.  The 'use' clause ensure that the pulx is
+;; not removed.
+;;
+(define_peephole2
+  [(set (reg:HI SP_REGNUM) (plus:HI (reg:HI SP_REGNUM) (const_int 2)))
+   (match_scratch:HI 0 "xy")]
+  "TARGET_M6812 && optimize_size"
+  [(set (match_dup 0) (match_dup 1))
+   (use (match_dup 0))]
+  "operands[1] = gen_rtx_MEM (HImode,
+			  gen_rtx_POST_INC (HImode,
+				   gen_rtx_REG (HImode, HARD_SP_REGNUM)));")
+
+;; Replace: "pshx; tfr d,x; stx 0,sp" into "pshd; tfr d,x"
+;;
+;; PR 14542: emit a use to pretend we need the value of initial register.
+;; Otherwise verify_local_live_at_start will die due to a live change
+;; of that register.
+;;
+(define_peephole2
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (match_operand:HI 0 "hard_reg_operand" ""))
+   (set (match_dup 0)
+        (match_operand:HI 1 "hard_reg_operand" ""))
+   (set (mem:HI (reg:HI SP_REGNUM))
+        (match_dup 0))]
+  "TARGET_M6812"
+  [(use (match_dup 0))
+   (set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (match_dup 1))
+   (set (match_dup 0) (match_dup 1))]
+  "")
+
+;;
+;; Change: "ldd 0,sp; pulx" into  "puld"
+;; This sequence usually appears at end a functions.
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+        (mem:HI (reg:HI SP_REGNUM)))
+   (use (match_dup 0))
+   (set (match_operand:HI 1 "hard_reg_operand" "")
+        (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))]
+  "peep2_reg_dead_p (2, operands[1])"
+  [(set (match_dup 0) (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))
+   (use (match_dup 0))]
+  "")
+
+;; Replace: "pshx; clr 0,sp; clr 1,sp" by "clr 1,-sp; clr 1,-sp"
+;; Appears to allocate local variables.
+(define_peephole2
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (match_operand:HI 0 "hard_reg_operand" ""))
+   (set (mem:QI (plus:HI (reg:HI SP_REGNUM) (const_int 1)))
+        (const_int 0))
+   (set (mem:QI (reg:HI SP_REGNUM))
+        (const_int 0))]
+  "TARGET_M6812"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (const_int 0))]
+  "")
+
+;; Likewise for HI mode
+(define_peephole2
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (match_operand:HI 0 "hard_reg_operand" ""))
+   (set (mem:HI (reg:HI SP_REGNUM))
+        (const_int 0))]
+  "TARGET_M6812"
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (const_int 0))]
+  "")
+;;--------------------------------------------------------------------
+;;- 
+;;--------------------------------------------------------------------
+;;
+;; Optimize memory<->memory moves when the value is also loaded in
+;; a register.
+;;
+(define_peephole2
+  [(set (match_operand:QI 0 "memory_operand" "")
+	(match_operand:QI 1 "memory_operand" ""))
+   (set (reg:QI D_REGNUM)
+	(match_operand:QI 2 "memory_operand" ""))]
+  "(rtx_equal_p (operands[0], operands[2]) && !side_effects_p (operands[0]))
+   || (GET_CODE (XEXP (operands[0], 0)) == REG
+       && GET_CODE (XEXP (operands[2], 0)) == POST_INC
+       && rtx_equal_p (XEXP (operands[0], 0), XEXP (XEXP (operands[2], 0), 0)))"
+  [(set (reg:QI D_REGNUM) (match_dup 1))
+   (set (match_dup 2) (reg:QI D_REGNUM))]
+  "")
+
+;;
+;; Remove a possible move before a compare instruction when that
+;; move will go in a dead register.  Compare with the source then.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "hard_reg_operand" ""))
+   (set (cc0)
+	(compare (match_dup 0)
+	         (match_operand:HI 2 "cmp_operand" "")))]
+  "(X_REG_P (operands[1]) || Y_REG_P (operands[1]))
+   && peep2_reg_dead_p (2, operands[0])
+   && !reg_mentioned_p (operands[0], operands[2])"
+  [(set (cc0) (compare (match_dup 1) (match_dup 2)))]
+  "")
+
+;;
+;; Optimize loading a constant to memory when that same constant
+;; is loaded to a hard register.  Switch the two to use the register
+;; for memory initialization.  In most cases, the constant is 0.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "memory_operand" "")
+	(match_operand:HI 1 "immediate_operand" ""))
+   (set (match_operand:HI 2 "hard_reg_operand" "")
+        (match_dup 1))]
+  "(D_REG_P (operands[2]) || X_REG_P (operands[2]) || Y_REG_P (operands[2]))
+   && !reg_mentioned_p (operands[2], operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;;
+;; Reorganize to optimize address computations.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 2 "general_operand" "")))]
+  "(INTVAL (operands[1]) >= -2 && INTVAL (operands[1]) <= 2)"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:HI (match_dup 0) (match_dup 1)))]
+  "")
+
+;;
+;; Replace: "ldx #N; xgdx; addd <var>; xgdx" by "ldab #N; ldx <var>; abx"
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 2 "general_operand" "")))
+   (match_scratch:QI 3 "d")]
+  "TARGET_M6811 && (INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0x0ff)"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:HI (zero_extend:HI (match_dup 3)) (match_dup 0)))]
+  "operands[4] = m68hc11_gen_lowpart (QImode, operands[1]);")
+
+;;
+;; Replace: "ldx #N; xgdx; addd <var>; xgdx" by "ldab #N; ldx <var>; abx"
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 2 "general_operand" "")))]
+  "TARGET_M6812"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:HI (match_dup 0) (match_dup 1)))]
+  "")
+
+;;
+;; Optimize an address register increment and a compare to use
+;; a PRE_INC or PRE_DEC addressing mode (disabled on the tst insn
+;; before reload, but can be enabled after).
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 1 "const_int_operand" "")))
+   (set (cc0)
+	(compare (match_operand:QI 2 "memory_operand" "")
+		 (const_int 0)))]
+  "TARGET_AUTO_INC_DEC
+   && (INTVAL (operands[1]) == -1 || INTVAL (operands[1]) == 1)
+   && reg_mentioned_p (operands[0], operands[2])"
+  [(set (cc0)
+	(compare (match_dup 3)
+		 (const_int 0)))]
+  "if (INTVAL (operands[1]) == 1)
+     operands[3] = gen_rtx_MEM (QImode,
+			    gen_rtx_PRE_INC (HImode, operands[0]));
+   else
+     operands[3] = gen_rtx_MEM (QImode,
+			    gen_rtx_PRE_DEC (HImode, operands[0]));
+  ")
+
+;;
+;; Likewise for compare.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 1 "const_int_operand" "")))
+   (set (cc0)
+	(compare (match_operand:QI 2 "hard_reg_operand" "")
+		 (match_operand:QI 3 "memory_operand" "")))]
+  "TARGET_AUTO_INC_DEC
+   && (INTVAL (operands[1]) == -1 || INTVAL (operands[1]) == 1)
+   && reg_mentioned_p (operands[0], operands[3])"
+  [(set (cc0) (compare (match_dup 2) (match_dup 4)))]
+  "if (INTVAL (operands[1]) == 1)
+     operands[4] = gen_rtx_MEM (QImode,
+			    gen_rtx_PRE_INC (HImode, operands[0]));
+   else
+     operands[4] = gen_rtx_MEM (QImode,
+			    gen_rtx_PRE_DEC (HImode, operands[0]));
+  ")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 1 "const_int_operand" "")))
+   (set (cc0)
+	(compare (match_operand:QI 2 "memory_operand" "")
+		 (match_operand:QI 3 "hard_reg_operand" "")))]
+  "TARGET_AUTO_INC_DEC
+   && (INTVAL (operands[1]) == -1 || INTVAL (operands[1]) == 1)
+   && reg_mentioned_p (operands[0], operands[2])"
+  [(set (cc0) (compare (match_dup 4) (match_dup 3)))]
+  "if (INTVAL (operands[1]) == 1)
+     operands[4] = gen_rtx_MEM (QImode,
+			    gen_rtx_PRE_INC (HImode, operands[0]));
+   else
+     operands[4] = gen_rtx_MEM (QImode,
+			    gen_rtx_PRE_DEC (HImode, operands[0]));
+  ")
+
+;;
+;; Replace a "ldx #N; addx <sp>" with a "ldx <sp>; addx #n"
+;; (avoids many temporary moves because we can't add sp to another reg easily)
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+        (match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0) (plus:HI (match_dup 0) (reg:HI SP_REGNUM)))]
+  ""
+  [(set (match_dup 0) (reg:HI SP_REGNUM))
+   (set (match_dup 0) (plus:HI (match_dup 0) (match_dup 1)))]
+  "")
+
+;;
+;; Replace "ldd #N; addd <op>" with "ldd <op>; addd #N".
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 2 "general_operand" "")))]
+  "(INTVAL (operands[1]) >= -2 && INTVAL (operands[1]) <= 2)"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:HI (match_dup 0) (match_dup 1)))]
+  "")
+
+;;
+;;
+;;
+(define_peephole2
+  [(parallel 
+     [(set (match_operand:SI 0 "hard_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "general_operand" "")
+		   (const_int 1)))
+      (clobber (match_scratch:HI 2 ""))])
+   (set (match_operand:HI 3 "nonimmediate_operand" "") (reg:HI D_REGNUM))
+   (set (match_operand:HI 4 "nonimmediate_operand" "") (reg:HI X_REGNUM))]
+  "!X_REG_P (operands[1])
+   && peep2_reg_dead_p (2, gen_rtx_REG (HImode, D_REGNUM))
+   && peep2_reg_dead_p (3, gen_rtx_REG (HImode, X_REGNUM))"
+  [(set (reg:HI D_REGNUM) (match_dup 5))
+   (set (reg:HI D_REGNUM) (ashift:HI (reg:HI D_REGNUM) (const_int 1)))
+   (set (match_dup 3) (reg:HI D_REGNUM))
+   (set (reg:HI D_REGNUM) (match_dup 6))
+   (parallel [(set (reg:HI D_REGNUM)
+		   (rotate:HI (reg:HI D_REGNUM) (const_int 1)))
+              (clobber (reg:HI CC_REGNUM))])
+   (set (match_dup 4) (reg:HI D_REGNUM))]
+  "operands[5] = m68hc11_gen_lowpart (HImode, operands[1]);
+   operands[6] = m68hc11_gen_highpart (HImode, operands[1]);")
+
+;;
+;; Replace a "ldd <mem>; psha; pshb" with a "ldx <mem>; pshx".
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+        (match_operand:HI 1 "memory_operand" ""))
+   (set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM)))
+        (match_dup 0))
+   (match_scratch:HI 2 "x")]
+  "TARGET_M6811 && D_REG_P (operands[0]) && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))]
+  "")
+
+;;
+;; Remove one load when copying a value to/from memory and also
+;; to a register.  Take care not clobbering a possible register used
+;; by operand 2.
+;; Replace: "ldd 0,y; std 2,y; ldx 0,y" into "ldx 0,y; stx 2,y"
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+        (match_operand:HI 1 "general_operand" ""))
+   (set (match_operand:HI 2 "nonimmediate_operand" "") (match_dup 0))
+   (set (match_operand:HI 3 "hard_reg_operand" "") (match_dup 1))]
+  "peep2_reg_dead_p (2, operands[0])
+   && !side_effects_p (operands[1])
+   && !side_effects_p (operands[2])
+   && !reg_mentioned_p (operands[3], operands[2])"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "")
+
+;;
+;; Replace a "ldd <mem>; addd #N; std <mem>" into a
+;; "ldx <mem>; leax; stx <mem>" if we have a free X/Y register
+;; and the constant is small.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "general_operand" ""))
+   (set (match_dup 0) (plus:HI (match_dup 0)
+			       (match_operand:HI 2 "const_int_operand" "")))
+   (set (match_operand:HI 3 "nonimmediate_operand" "")
+        (match_dup 0))
+   (match_scratch:HI 4 "xy")]
+  "D_REG_P (operands[0])
+   && (TARGET_M6812 
+       || (INTVAL (operands[2]) >= -2 && INTVAL (operands[2]) <= 2))
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 4) (match_dup 1))
+   (set (match_dup 4) (plus:HI (match_dup 4) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))]
+  "if (reg_mentioned_p (operands[4], operands[1])) FAIL;
+   if (reg_mentioned_p (operands[4], operands[3])) FAIL;")
+
+;;--------------------------------------------------------------------
+;;- Bset peephole2
+;;--------------------------------------------------------------------
+;; These peepholes try to replace some logical sequences by 'bset' and 'bclr'.
+;;
+;; Replace 'ldab <mem>; orab #N; stab <mem>' by 'bset <mem> #N'.
+;; Register D must be dead and there must be no register side effects for mem.
+;; The <mem> *can* be volatile this is why we must not use 'side_effects_p'.
+;; The good side effect is that it makes the sequence atomic.
+;;
+(define_peephole2
+  [(set (match_operand:QI 0 "hard_reg_operand" "")
+	(match_operand:QI 1 "nonimmediate_operand" ""))
+   (set (match_dup 0) (ior:QI (match_dup 0)
+			      (match_operand:QI 2 "const_int_operand" "")))
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_M6812 || m68hc11_indirect_p (operands[1], QImode))
+   && (GET_CODE (operands[1]) != MEM || !auto_inc_p (XEXP (operands[1], 0)))
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 1) (ior:QI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "nonimmediate_operand" ""))
+   (set (match_dup 0) (ior:HI (match_dup 0)
+			      (match_operand:HI 2 "const_int_operand" "")))
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_M6812 || m68hc11_indirect_p (operands[1], HImode))
+   && (GET_CODE (operands[1]) != MEM || !auto_inc_p (XEXP (operands[1], 0)))
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 1) (ior:HI (match_dup 1) (match_dup 2)))]
+  "")
+
+;;--------------------------------------------------------------------
+;;- Bclr peephole2
+;;--------------------------------------------------------------------
+;; Replace 'ldab <mem>; andab #N; stab <mem>' by 'bclr <mem> #N'.
+;; See Bset peephole2.
+;;
+(define_peephole2
+  [(set (match_operand:QI 0 "hard_reg_operand" "")
+	(match_operand:QI 1 "nonimmediate_operand" ""))
+   (set (match_dup 0) (and:QI (match_dup 0)
+			      (match_operand:QI 2 "const_int_operand" "")))
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_M6812 || m68hc11_indirect_p (operands[1], QImode))
+   && (GET_CODE (operands[1]) != MEM || !auto_inc_p (XEXP (operands[1], 0)))
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 1) (and:QI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "nonimmediate_operand" ""))
+   (set (match_dup 0) (and:HI (match_dup 0)
+			      (match_operand:HI 2 "const_int_operand" "")))
+   (set (match_dup 1) (match_dup 0))]
+  "(TARGET_M6812 || m68hc11_indirect_p (operands[1], HImode))
+   && (GET_CODE (operands[1]) != MEM || !auto_inc_p (XEXP (operands[1], 0)))
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 1) (and:HI (match_dup 1) (match_dup 2)))]
+  "")
+
+
+;;--------------------------------------------------------------------
+;;- Compare peephole2
+;;--------------------------------------------------------------------
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "hard_reg_operand" ""))
+   (set (match_dup 1) (plus:HI (match_dup 1) 
+                               (match_operand:HI 2 "const_int_operand" "")))
+   (set (cc0) (compare (match_dup 0)
+		       (const_int 0)))]
+  "peep2_reg_dead_p (3, operands[0]) && !Z_REG_P (operands[1])"
+  [(set (match_dup 1) (plus:HI (match_dup 1) (match_dup 2)))
+   (set (cc0) (compare (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "hard_reg_operand" ""))
+   (set (match_operand:HI 2 "hard_reg_operand" "")
+        (plus:HI (match_dup 2) 
+                 (match_operand:HI 3 "const_int_operand" "")))
+   (set (match_operand:HI 4 "memory_operand" "") (match_dup 2))
+   (set (cc0) (compare (match_operand:HI 5 "hard_reg_operand" "")
+		       (const_int 0)))]
+  "peep2_reg_dead_p (4, operands[5]) && !Z_REG_P (operands[2])
+   && !reg_mentioned_p (operands[2], operands[4])
+
+   && ((rtx_equal_p (operands[5], operands[0])
+        && rtx_equal_p (operands[2], operands[1]))
+
+       || (rtx_equal_p (operands[5], operands[1])
+           && rtx_equal_p (operands[2], operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 2) (plus:HI (match_dup 2) (match_dup 3)))
+   (set (match_dup 4) (match_dup 2))
+   (set (cc0) (compare (match_dup 2) (match_dup 3)))]
+  "")
+
+
+;;--------------------------------------------------------------------
+;;- Load peephole2
+;;--------------------------------------------------------------------
+;;
+;; Optimize initialization of 2 hard regs from the same memory location
+;; Since we can't copy easily X, Y and D to each other, load the 2 registers
+;; from the same memory location.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "memory_operand" ""))
+   (set (match_operand:HI 2 "hard_reg_operand" "") (match_dup 0))]
+  "TARGET_M6811
+   && !side_effects_p (operands[1])
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 1))]
+  "")
+
+;; Replace "ldd #N; addd <op>" with "ldd <op>; addd #N".
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "nonimmediate_operand" "") (const_int 0))
+   (set (match_operand:HI 1 "nonimmediate_operand" "") (const_int 0))
+   (set (match_operand:HI 2 "nonimmediate_operand" "") (const_int 0))
+   (set (match_operand:HI 3 "nonimmediate_operand" "") (const_int 0))
+   (match_scratch:HI 4 "d")]
+  ""
+  [(set (match_dup 4) (const_int 0))
+   (set (match_dup 0) (match_dup 4))
+   (set (match_dup 1) (match_dup 4))
+   (set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 4))]
+  "")
+
+;;
+;; Replace "ldd #N; addd <op>" with "ldd <op>; addd #N".
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "nonimmediate_operand" "") (const_int 0))
+   (set (match_operand:HI 1 "nonimmediate_operand" "") (const_int 0))
+   (set (match_operand:HI 2 "nonimmediate_operand" "") (const_int 0))
+   (match_scratch:HI 3 "d")]
+  ""
+  [(set (match_dup 3) (const_int 0))
+   (set (match_dup 0) (match_dup 3))
+   (set (match_dup 1) (match_dup 3))
+   (set (match_dup 2) (match_dup 3))]
+  "")
+
+;;
+;; Replace "ldd #N; addd <op>" with "ldd <op>; addd #N".
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "") (const_int 0))
+   (set (match_operand:HI 1 "push_operand" "") (match_dup 0))
+   (set (match_operand:HI 2 "push_operand" "") (match_dup 0))
+   (set (match_operand:HI 3 "push_operand" "") (match_dup 0))
+   (match_scratch:HI 4 "x")]
+  "TARGET_M6811 && D_REG_P (operands[0]) && peep2_reg_dead_p (4, operands[0])"
+  [(set (match_dup 4) (const_int 0))
+   (set (match_dup 1) (match_dup 4))
+   (set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 4))]
+  "")
+
+;;
+;; This peephole catches the address computations generated by the reload
+;; pass. 
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "xy")
+	(match_operand:HI 1 "const_int_operand" ""))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+	      (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:HI D_REGNUM)
+	(plus (reg:HI D_REGNUM)
+	      (match_operand:HI 2 "general_operand" "")))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+	      (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "(INTVAL (operands[1]) & 0x0FF) == 0"
+  "*
+{
+  int value_loaded = 1;
+
+  if (X_REG_P (operands[0]) || SP_REG_P (operands[2]))
+    {
+      rtx ops[2];
+
+      ops[0] = operands[0];
+      ops[1] = operands[2];
+      m68hc11_gen_movhi (insn, ops);
+      output_asm_insn (\"xgd%0\", operands);
+    }
+  else if (Y_REG_P (operands[0]))
+    {
+      if (reg_mentioned_p (iy_reg, operands[2]))
+        output_asm_insn (\"ldy\\t%2\", operands);
+      else
+	value_loaded = 0;
+      output_asm_insn (\"xgdy\", operands);
+    }
+  else
+    {
+      output_asm_insn (\"ldd\\t%2\", operands);
+    }
+
+  if (value_loaded == 0)
+    output_asm_insn (\"ldd\\t%2\", operands);
+  if ((INTVAL (operands[1]) & 0x0ff00) == 0x100)
+    output_asm_insn (\"inca\", operands);
+  else if ((INTVAL (operands[1]) & 0x0ff00) == 0xff00)
+    output_asm_insn (\"deca\", operands);
+  else if (INTVAL (operands[1]) != 0)
+    output_asm_insn (\"adda\\t%h1\", operands);
+
+  if (X_REG_P (operands[0]))
+    return \"xgdx\";
+  else if (Y_REG_P (operands[0]))
+    return \"xgdy\";
+  else
+    return \"\";
+}
+")
+
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "h")
+	(match_operand:HI 1 "non_push_operand" "g"))
+   (set (match_operand:HI 2 "hard_reg_operand" "h")
+        (match_dup 0))]
+  "find_regno_note (insn, REG_DEAD, REGNO (operands[0]))
+   && !S_REG_P (operands[2])"
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[2];
+  ops[1] = operands[1];
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}
+")
+
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "h")
+	(match_operand:HI 1 "hard_reg_operand" "h"))
+   (set (match_operand:HI 2 "non_push_operand" "g")
+        (match_dup 0))]
+  "find_regno_note (insn, REG_DEAD, REGNO (operands[0]))
+   && !S_REG_P (operands[2])"
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[2];
+  ops[1] = operands[1];
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}
+")
+
+;;
+;; Catch a (set X/Y D) followed by a swap. In this form, D is dead after
+;; the set, so we don't need to emit anything. 'ins1' refers to the
+;; (set ...) insn.
+;;
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "A") (reg:HI D_REGNUM))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "find_regno_note (ins1, REG_DEAD, HARD_D_REGNUM)"
+  "*
+{
+   cc_status = cc_prev_status;
+   return \"\";
+}
+")
+
+;; Same as above but due to some split, there may be a noop set
+;; between the two.
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "A") (reg:HI D_REGNUM))
+   (set (match_dup 0) (match_dup 0))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "find_regno_note (ins1, REG_DEAD, HARD_D_REGNUM)"
+  "*
+{
+   cc_status = cc_prev_status;
+   return \"\";
+}
+")
+
+;;
+;; Catch a (set X/Y D) followed by an xgdx/xgdy. D is not dead
+;; and we must, at least, setup X/Y with value of D.
+;;
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "A") (reg:HI D_REGNUM))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}
+")
+
+;;;
+;;; Catch an xgdx/xgdy followed by a (set D X/Y). If X/Y is dead, we don't
+;;; need to emit anything. Otherwise, we just need a copy of D to X/Y.
+;;;
+(define_peephole
+  [(parallel [(set (reg:HI D_REGNUM) (match_operand:HI 0 "hard_reg_operand" "A"))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:HI D_REGNUM) (match_dup 0))]
+  "find_regno_note (insn, REG_DEAD, REGNO (operands[0]))"
+  "*
+{
+  cc_status = cc_prev_status;
+  return \"\";
+}
+")
+
+;;;
+;;; Catch an xgdx/xgdy followed by a (set D X/Y). If X/Y is dead, we don't
+;;; need to emit anything. Otherwise, we just need a copy of D to X/Y.
+;;;
+(define_peephole
+  [(parallel [(set (reg:HI D_REGNUM) (match_operand:HI 0 "hard_reg_operand" "A"))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (match_operand:QI 1 "hard_reg_operand" "A"))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && find_regno_note (insn, REG_DEAD, REGNO (operands[0]))"
+  "*
+{
+  cc_status = cc_prev_status;
+  return \"\";
+}
+")
+
+;;;
+;;; Catch an xgdx/xgdy followed by a (set D X/Y). If X/Y is dead, we don't
+;;; need to emit anything. Otherwise, we just need a copy of D to X/Y.
+;;;
+(define_peephole
+  [(parallel [(set (reg:HI D_REGNUM) (match_operand:HI 0 "hard_reg_operand" "A"))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:HI D_REGNUM) (match_dup 0))]
+  ""
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}
+")
+
+;;;
+;;; Same peephole with a QI set.  The copy is made as 16-bit to comply
+;;; with the xgdx.
+;;;
+(define_peephole
+  [(parallel [(set (reg:HI D_REGNUM) (match_operand:HI 0 "hard_reg_operand" "A"))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (match_operand:QI 1 "hard_reg_operand" "A"))]
+  "REGNO (operands[0]) == REGNO (operands[1])"
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[0];
+  ops[1] = gen_rtx_REG (HImode, HARD_D_REGNUM);
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}
+")
+
+;;;
+;;; Catch two consecutive xgdx or xgdy, emit nothing.
+;;;
+(define_peephole
+  [(parallel [(set (reg:HI D_REGNUM) (match_operand:HI 0 "hard_reg_operand" "A"))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  ""
+  "*
+{
+  cc_status = cc_prev_status;
+  return \"\";
+}
+")
+
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+        (match_operand:HI 1 "stack_register_operand" ""))
+   (set (match_operand:HI 2 "hard_reg_operand" "")
+	(match_operand:HI 3 "memory_operand" "m"))
+   (set (match_dup 0)
+        (match_operand:HI 4 "memory_operand" "m"))]
+  "IS_STACK_POP (operands[4])
+   && (GET_CODE (operands[3]) == MEM &&
+       rtx_equal_p (operands[0], XEXP (operands[3], 0)))"
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[2];
+  ops[1] = gen_rtx_MEM (HImode,
+		    gen_rtx_POST_INC (HImode, stack_pointer_rtx));
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}
+")
+
+;;
+;; Catch (d = -1) (d = d + sp) to avoid 2 adjust of SP.
+;;
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "dA") (const_int -1))
+   (set (match_dup 0) (plus:HI (match_dup 0) (reg:HI SP_REGNUM)))]
+  "TARGET_M6811"
+  "*
+{
+  return \"sts\\t%t0\\n\\tld%0\\t%t0\";
+}
+")
+
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "memory_operand" ""))
+   (set (match_operand:HI 2 "hard_reg_operand" "") (match_dup 0))]
+  "TARGET_M6811
+   && !side_effects_p (operands[1])
+   && !reg_mentioned_p (operands[0], operands[1])"
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[0];
+  ops[1] = operands[1];
+  m68hc11_gen_movhi (insn, ops);
+  ops[0] = operands[2];
+  m68hc11_gen_movhi (insn, ops);
+  return \"\";
+}")
+
+;; Peephole for Z register replacement.
+;; Avoid to use _.tmp register when comparing D and X if we can compare
+;; with soft register
+(define_peephole
+  [(set (match_operand:HI 0 "hard_reg_operand" "") (reg:HI SOFT_XY_REGNUM))
+   (set (reg:HI SOFT_TMP_REGNUM) (match_dup 0))
+   (set (cc0) (compare (match_operand:HI 2 "hard_reg_operand" "")
+                       (reg:HI SOFT_TMP_REGNUM)))]
+  "X_REG_P (operands[0]) || Y_REG_P (operands[0])"
+  "*
+{
+  rtx ops[2];
+
+  ops[0] = operands[0];
+  ops[1] = operands[1];
+  m68hc11_gen_movhi (insn, ops);
+  return \"cp%2\\t%1\";
+}")
diff --git a/gcc/config/m68hc11/m68hc11.opt b/gcc/config/m68hc11/m68hc11.opt
new file mode 100644
index 000000000..f0f29f2a7
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc11.opt
@@ -0,0 +1,94 @@
+; Options for the Motorola 68HC11 and 68HC12 port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m6811
+Target RejectNegative InverseMask(M6812, M6811)
+Compile for a 68HC11
+
+m6812
+Target RejectNegative Mask(M6812)
+Compile for a 68HC12
+
+m68hc11
+Target RejectNegative InverseMask(M6812)
+Compile for a 68HC11
+
+m68hc12
+Target RejectNegative Mask(M6812) MaskExists
+Compile for a 68HC12
+
+; At the moment, there is no difference between the code generated
+; for -m68hc12 and -m68hcs12.
+m68hcs12
+Target RejectNegative Mask(M6812) MaskExists
+Compile for a 68HCS12
+
+m68s12
+Target RejectNegative Mask(M6812) MaskExists
+Compile for a 68HCS12
+
+mauto-incdec
+Target RejectNegative Report Mask(AUTO_INC_DEC)
+Auto pre/post decrement increment allowed
+
+minmax
+Target RejectNegative Report Mask(MIN_MAX)
+Min/max instructions allowed
+
+mlong-calls
+Target RejectNegative Report Mask(LONG_CALLS)
+Use call and rtc for function calls and returns
+
+mnoauto-incdec
+Target RejectNegative Report InverseMask(AUTO_INC_DEC)
+Auto pre/post decrement increment not allowed
+
+mnolong-calls
+Target RejectNegative Report InverseMask(LONG_CALLS)
+Use jsr and rts for function calls and returns
+
+mnominmax
+Target RejectNegative Report InverseMask(MIN_MAX)
+Min/max instructions not allowed
+
+mnorelax
+Target RejectNegative Report InverseMask(NO_DIRECT_MODE)
+Use direct addressing mode for soft registers
+
+mnoshort
+Target RejectNegative Report InverseMask(SHORT)
+Compile with 32-bit integer mode
+
+; Currently ignored.
+mreg-alloc=
+Target RejectNegative Joined
+Specify the register allocation order
+
+mrelax
+Target RejectNegative Report Mask(NO_DIRECT_MODE)
+Do not use direct addressing mode for soft registers
+
+mshort
+Target RejectNegative Report Mask(SHORT)
+Compile with 16-bit integer mode
+
+msoft-reg-count=
+Target RejectNegative Joined UInteger Var(m68hc11_soft_reg_count) Init(-1)
+Indicate the number of soft registers available
diff --git a/gcc/config/m68hc11/m68hc12.h b/gcc/config/m68hc11/m68hc12.h
new file mode 100644
index 000000000..22bdc008c
--- /dev/null
+++ b/gcc/config/m68hc11/m68hc12.h
@@ -0,0 +1,45 @@
+/* Definitions of target machine for GNU compiler, for m68hc12.
+   Copyright (C) 1999, 2000, 2001, 2003, 2007 Free Software Foundation, Inc.
+   Contributed by Stephane Carrez (stcarrez@nerim.fr).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Compile and assemble for a 68hc12 unless there is a -m68hc11 option.  */
+#define ASM_SPEC                                                \
+"%{m68hc11:-m68hc11}"                                           \
+"%{m68hcs12:-m68hcs12}"                                         \
+"%{!m68hc11:%{!m68hcs12:-m68hc12}}"
+#define LIB_SPEC       ""
+#define CC1_SPEC       ""
+
+/* We need to tell the linker the target elf format.  Just pass an
+   emulation option.  This can be overridden by -Wl option of gcc.  */
+#define LINK_SPEC                                               \
+"%{m68hc11:-m m68hc11elf}"                                      \
+"%{m68hcs12:-m m68hc12elf}"                                     \
+"%{!m68hc11:%{!m68hcs12:-m m68hc11elf}} %{mrelax:-relax}"
+
+#define CPP_SPEC  \
+"%{mshort:-D__HAVE_SHORT_INT__ -D__INT__=16}\
+ %{!mshort:-D__INT__=32}\
+ %{m68hc11:-Dmc6811 -DMC6811 -Dmc68hc11}\
+ %{!m68hc11:%{!m68hc12:-Dmc6812 -DMC6812 -Dmc68hc12}}\
+ %{m68hcs12:-Dmc6812 -DMC6812 -Dmc68hcs12}\
+ %{fshort-double:-D__HAVE_SHORT_DOUBLE__}"
+
+/* Default target_flags if no switches specified.  */
+#define TARGET_DEFAULT		(MASK_M6812)
diff --git a/gcc/config/m68hc11/predicates.md b/gcc/config/m68hc11/predicates.md
new file mode 100644
index 000000000..77a524a0e
--- /dev/null
+++ b/gcc/config/m68hc11/predicates.md
@@ -0,0 +1,228 @@
+;; Predicate definitions for Motorola 68HC11 and 68HC12.
+;; Copyright (C) 2005, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; TODO: Add a comment here.
+
+(define_predicate "stack_register_operand"
+  (match_code "subreg,reg")
+{
+  return SP_REG_P (op);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "d_register_operand"
+  (match_code "subreg,reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+
+  return GET_CODE (op) == REG
+    && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	|| REGNO (op) == HARD_D_REGNUM
+        || (mode == QImode && REGNO (op) == HARD_B_REGNUM));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "hard_addr_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+
+  return GET_CODE (op) == REG
+    && (REGNO (op) == HARD_X_REGNUM
+	|| REGNO (op) == HARD_Y_REGNUM
+	|| REGNO (op) == HARD_Z_REGNUM);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "hard_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+
+  return GET_CODE (op) == REG
+    && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	|| H_REGNO_P (REGNO (op)));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "m68hc11_logical_operator"
+  (match_code "and,ior,xor")
+{
+  return GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "m68hc11_arith_operator"
+  (match_code "and,ior,xor,plus,minus,ashift,ashiftrt,lshiftrt,rotate,rotatert")
+{
+  return GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR
+    || GET_CODE (op) == PLUS || GET_CODE (op) == MINUS
+    || GET_CODE (op) == ASHIFT || GET_CODE (op) == ASHIFTRT
+    || GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ROTATE
+    || GET_CODE (op) == ROTATERT;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "m68hc11_non_shift_operator"
+  (match_code "and,ior,xor,plus,minus")
+{
+  return GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR
+    || GET_CODE (op) == PLUS || GET_CODE (op) == MINUS;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "m68hc11_unary_operator"
+  (match_code "neg,not,sign_extend,zero_extend")
+{
+  return GET_CODE (op) == NEG || GET_CODE (op) == NOT
+    || GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
+})
+
+;; Return true if op is a shift operator.
+
+(define_predicate "m68hc11_shift_operator"
+  (match_code "ashift,ashiftrt,lshiftrt,rotate,rotatert")
+{
+  return GET_CODE (op) == ROTATE || GET_CODE (op) == ROTATERT
+    || GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFT
+    || GET_CODE (op) == ASHIFTRT;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "m68hc11_eq_compare_operator"
+  (match_code "eq,ne")
+{
+  return GET_CODE (op) == EQ || GET_CODE (op) == NE;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "non_push_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (general_operand (op, mode) == 0)
+    return 0;
+
+  if (push_operand (op, mode) == 1)
+    return 0;
+  return 1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "splitable_operand"
+  (match_code "subreg,reg,mem,symbol_ref,label_ref,const_int,const_double")
+{
+  if (general_operand (op, mode) == 0)
+    return 0;
+
+  if (push_operand (op, mode) == 1)
+    return 0;
+
+  /* Reject a (MEM (MEM X)) because the patterns that use non_push_operand
+     need to split such addresses to access the low and high part but it
+     is not possible to express a valid address for the low part.  */
+  if (mode != QImode && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == MEM)
+    return 0;
+  return 1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "reg_or_some_mem_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx op0 = XEXP (op, 0);
+      int addr_mode;
+
+      if (symbolic_memory_operand (op0, mode))
+	return 1;
+
+      if (IS_STACK_PUSH (op))
+	return 1;
+
+      if (GET_CODE (op) == REG && reload_in_progress
+          && REGNO (op) >= FIRST_PSEUDO_REGISTER
+          && reg_equiv_memory_loc[REGNO (op)])
+         {
+            op = reg_equiv_memory_loc[REGNO (op)];
+            op = eliminate_regs (op, VOIDmode, NULL_RTX);
+         }
+      if (GET_CODE (op) != MEM)
+         return 0;
+
+      op0 = XEXP (op, 0);
+      addr_mode = m68hc11_addr_mode | (reload_completed ? ADDR_STRICT : 0);
+      addr_mode &= ~ADDR_INDIRECT;
+      return m68hc11_valid_addressing_p (op0, mode, addr_mode);
+    }
+
+  return register_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "tst_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (GET_CODE (op) == MEM && reload_completed == 0)
+    {
+      rtx addr = XEXP (op, 0);
+      if (m68hc11_auto_inc_p (addr))
+	return 0;
+    }
+  return nonimmediate_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "cmp_operand"
+  (match_code "subreg,reg,mem,symbol_ref,label_ref,const_int,const_double")
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx addr = XEXP (op, 0);
+      if (m68hc11_auto_inc_p (addr))
+	return 0;
+    }
+  return general_operand (op, mode);
+})
diff --git a/gcc/config/m68hc11/t-m68hc11 b/gcc/config/m68hc11/t-m68hc11
new file mode 100644
index 000000000..5a8e6ade4
--- /dev/null
+++ b/gcc/config/m68hc11/t-m68hc11
@@ -0,0 +1,96 @@
+# Copyright (C) 2000, 2001, 2002, 2003, 2005,
+# 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+RANLIB_FOR_TARGET = ` \
+  if [ -f $(objdir)/../binutils/ranlib ] ; then \
+    echo $(objdir)/../binutils/ranlib ; \
+  else \
+    if [ "$(host)" = "$(target)" ] ; then \
+      echo ranlib; \
+    else \
+       if [ -f $(bindir)/$(target_noncanonical)-ranlib ] ; then \
+	  echo $(bindir)/$(target_noncanonical)-ranlib ; \
+       else \
+          t='$(program_transform_cross_name)'; echo ranlib | sed -e $$t ; \
+       fi; \
+    fi; \
+  fi`
+
+LIB1ASMSRC = m68hc11/larith.asm
+LIB1ASMFUNCS = _mulsi3 \
+	_mulqi3 _ashlsi3 _ashrsi3 _lshrsi3 \
+	_divmodhi4 _mulhi3 _mulhi32 \
+	_memcpy _memset _negsi2 _one_cmplsi2 \
+	_regs_min _regs_frame _regs_d1_2 \
+	_regs_d3_4 _regs_d5_6 _regs_d7_8 _regs_d9_16 _regs_d17_32 \
+	_premain __exit _abort _cleanup \
+	_adddi3 _subdi3 _notdi2 _rotlhi3 _rotrhi3 \
+	_ashrhi3 _lshrhi3 _lshlhi3 _ashrqi3 _lshlqi3 _map_data _init_bss \
+	_ctor _dtor _far_tramp _call_far _return_far
+
+TARGET_LIBGCC2_CFLAGS = -DUSE_GAS -DIN_GCC -Dinhibit_libc
+
+# C implementation of 32-bit div/mod.
+LIB2FUNCS_EXTRA = $(srcdir)/config/udivmodsi4.c \
+	$(srcdir)/config/divmod.c $(srcdir)/config/udivmod.c
+
+# Don't compile with -g1 this reduces the size of some sections (.eh_frame).
+LIBGCC2_DEBUG_CFLAGS =-g
+LIBGCC2_CFLAGS = -Os -mrelax $(LIBGCC2_INCLUDES) $(TARGET_LIBGCC2_CFLAGS) $(LIBGCC2_DEBUG_CFLAGS) $(GTHREAD_FLAGS) -DIN_LIBGCC2
+
+MULTILIB_OPTIONS  = m68hc11/m68hc12 mshort fshort-double
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES  = m68hc11=m6811 m68hc12=m6812 m68hc12=m68hcs12
+MULTILIB_EXCEPTIONS = -mnoshort -mno68hc11
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define SMALL_MACHINE' >> dp-bit.c
+	echo '#define CMPtype HItype' >> dp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+	echo '#endif' 		>> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#define CMPtype HItype' >> fp-bit.c
+	echo '#define SMALL_MACHINE' >> fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+	echo '#endif' 		>> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+CRT0_S = $(srcdir)/config/m68hc11/m68hc11-crt0.S
+MCRT0_S= $(srcdir)/config/m68hc11/m68hc11-crt0.S
+
+CRT0STUFF_T_CFLAGS =
+
+# Assemble startup files.
+$(T)crt1.o: $(CRT0_S) $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(CRT0_S)
+
+EXTRA_MULTILIB_PARTS = crt1.o
diff --git a/gcc/config/m68k/cf.md b/gcc/config/m68k/cf.md
new file mode 100644
index 000000000..d6f1e92c3
--- /dev/null
+++ b/gcc/config/m68k/cf.md
@@ -0,0 +1,2250 @@
+;; ColdFire V1, V2, V3 and V4/V4e DFA description.
+;; Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery Inc., www.codesourcery.com
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Instruction Buffer
+(define_automaton "cfv123_ib")
+
+;; These pseudo units are used to model instruction buffer of ColdFire cores.
+;; Instruction of size N can be issued only when cf_ib_wN is available.
+(define_cpu_unit "cf_ib_w1, cf_ib_w2, cf_ib_w3" "cfv123_ib")
+
+;; Instruction occupies 1 word in the instruction buffer.
+(define_reservation "cf_ib1" "cf_ib_w1")
+;; Instruction occupies 2 words in the instruction buffer.
+(define_reservation "cf_ib2" "cf_ib_w1+cf_ib_w2")
+;; Instruction occupies 3 words in the instruction buffer.
+(define_reservation "cf_ib3" "cf_ib_w1+cf_ib_w2+cf_ib_w3")
+
+;; This reservation is used at the start of each cycle to setup the maximal
+;; length of instruction that can be issued on current cycle.
+;; E.g., when this reservation is applied for the first time, cf_ib_w3
+;; resource is marked busy, thus filtering out all 3-word insns.
+;;
+;; This reservation requires deterministic automaton.
+;;
+;; At each cycle, given that memory bus is available (i.e., there is no
+;; pending memory operation), instruction fetch pipeline (IFP) prefetches
+;; two instruction words into instruction buffer (IB).
+(define_insn_reservation "cf_ib1" 0
+  (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+       (eq_attr "type" "ib"))
+  "cf_ib_w3|cf_ib_w2|cf_ib_w1")
+
+;; Operand Execution Pipeline
+(define_automaton "cfv123_oep")
+
+(define_cpu_unit "cf_dsoc,cf_agex" "cfv123_oep")
+
+;; A memory unit that is reffered to as 'certain hardware resources' in
+;; ColdFire reference manuals.  This unit remains occupied for two cycles
+;; after last dsoc cycle of a store - hence there is a 2 cycle delay between
+;; two consecutive stores.
+(define_automaton "cfv123_chr")
+
+(define_cpu_unit "cf_chr" "cfv123_chr")
+
+;; Memory bus
+(define_automaton "cfv123_mem")
+
+;; When memory bus is subscribed, that implies that instruction buffer won't
+;; get its portion this cycle.  To model that we query if cf_mem unit is
+;; subscribed and adjust number of prefetched instruction words accordingly.
+;; 
+(define_query_cpu_unit "cf_mem1, cf_mem2" "cfv123_mem")
+
+(define_reservation "cf_mem" "cf_mem1+cf_mem2")
+
+(define_automaton "cf_mac")
+
+(define_cpu_unit "cf_mac1,cf_mac2,cf_mac3,cf_mac4"
+  "cf_mac")
+
+(define_automaton "cfv123_guess")
+
+(define_query_cpu_unit "cfv123_guess" "cfv123_guess")
+
+;; Register to register move.
+;; Takes 1 cycle.
+(define_reservation "cfv123_alu_00"
+  "cf_dsoc,cf_agex")
+
+;; Load from a memory location.
+;; Takes 3 cycles.
+(define_reservation "cfv12_alu_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 2 cycles.
+(define_reservation "cfv12_omove_10"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 4 cycles.
+(define_reservation "cfv3_alu_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+;; Takes 3 cycles.
+(define_reservation "cfv3_omove_10"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+
+;; Load from an indexed location.
+;; Takes 4 cycles.
+(define_reservation "cfv12_alu_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 3 cycles.
+(define_reservation "cfv12_omove_i0"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex")
+;; Takes 5 cycles.
+(define_reservation "cfv3_alu_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+;; Takes 4 cycles.
+(define_reservation "cfv3_omove_i0"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex")
+
+;; Store to a memory location.
+;; Takes 1 cycle.
+(define_reservation "cfv12_alu_01"
+  "cf_dsoc+cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 1 cycle.
+(define_reservation "cfv3_alu_01"
+  "cf_dsoc+cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Store to an indexed location.
+;; Takes 2 cycles.
+(define_reservation "cfv12_alu_0i"
+  "cf_dsoc+cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 2 cycles.
+(define_reservation "cfv3_alu_0i"
+  "cf_dsoc+cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Load from a memory location and store to a memory location.
+;; Takes 3 cycles
+(define_reservation "cfv12_alu_11"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 2 cycles.
+(define_reservation "cfv12_omove_11"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 4 cycles
+(define_reservation "cfv3_alu_11"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv3_omove_11"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Load from an indexed location and store to a memory location.
+;; Takes 4 cycles.
+(define_reservation "cfv12_alu_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv12_omove_i1"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 5 cycles.
+(define_reservation "cfv3_alu_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+;; Takes 4 cycles.
+(define_reservation "cfv3_omove_i1"
+  "cf_dsoc+cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Load from a memory location and store to an indexed location.
+;; Takes 4 cycles.
+(define_reservation "cfv12_alu_1i"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv12_omove_1i"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 5 cycles.
+(define_reservation "cfv3_alu_1i"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+;; Takes 4 cycles.
+(define_reservation "cfv3_omove_1i"
+  "cf_dsoc+cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Lea operation for a memory location.
+;; Takes 1 cycle.
+(define_reservation "cfv123_lea_10"
+  "cf_dsoc,cf_agex")
+
+;; Lea operation for an indexed location.
+;; Takes 2 cycles.
+(define_reservation "cfv123_lea_i0"
+  "cf_dsoc,cf_agex,cf_agex")
+
+;; Pea operation for a memory location.
+;; Takes 2 cycles.
+(define_reservation "cfv12_pea_11"
+  "cf_dsoc,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 2 cycles.
+(define_reservation "cfv3_pea_11"
+  "cf_dsoc,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Pea operation for an indexed location.
+;; Takes 3 cycles.
+(define_reservation "cfv12_pea_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_agex+cf_chr,cf_mem+cf_chr,cf_chr")
+;; Takes 3 cycles.
+(define_reservation "cfv3_pea_i1"
+  "cf_dsoc,cf_agex,cf_agex,cf_agex+cf_chr,cf_mem1+cf_chr,cf_mem2+cf_chr")
+
+;; Long multiplication with no mac.
+;; Takes 9-18 cycles.
+(define_reservation "cfv123_mul_l_00"
+  "cf_dsoc,(cf_agex+cf_dsoc)*17,cf_agex")
+
+;; Word multiplication with no mac.
+;; Takes 9 cycles.
+(define_reservation "cfv123_mul_w_00"
+  "cf_dsoc,(cf_agex+cf_dsoc)*8,cf_agex")
+
+;; Long multiplication with no mac.
+;; Takes 11-20 cycles.
+(define_reservation "cfv12_mul_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,(cf_agex+cf_dsoc)*17,cf_agex")
+;; Takes 12-21 cycles.
+(define_reservation "cfv3_mul_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,(cf_agex+cf_dsoc)*17,cf_agex")
+
+;; Word multiplication with no mac.
+;; Takes 11 cycles.
+(define_reservation "cfv12_mul_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,(cf_agex+cf_dsoc)*8,cf_agex")
+;; Takes 12 cycles.
+(define_reservation "cfv3_mul_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,(cf_agex+cf_dsoc)*8,cf_agex")
+
+;; Word multiplication with no mac.
+;; Takes 12 cycles.
+(define_reservation "cfv12_mul_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,(cf_agex+cf_dsoc)*8,cf_agex")
+;; Takes 13 cycles.
+(define_reservation "cfv3_mul_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,(cf_agex+cf_dsoc)*8,cf_agex")
+
+;; Long multiplication with mac.
+;; Takes 5 cycles.
+(define_reservation "cfv123_mac_l_00"
+  "cf_dsoc,cf_agex,cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Word multiplication with mac.
+;; Takes 3 cycles.
+(define_reservation "cfv123_mac_w_00"
+  "cf_dsoc,cf_agex,cf_mac1,cf_mac2")
+
+;; Long multiplication with mac.
+;; Takes 7 cycles.
+(define_reservation "cfv12_mac_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+;; Takes 8 cycles.
+(define_reservation "cfv3_mac_l_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Word multiplication with mac.
+;; Takes 5 cycles.
+(define_reservation "cfv12_mac_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_mac1,cf_mac2")
+;; Takes 6 cycles.
+(define_reservation "cfv3_mac_w_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_mac1,cf_mac2")
+
+;; Word multiplication with mac.
+;; Takes 6 cycles.
+(define_reservation "cfv12_mac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex,cf_mac1,cf_mac2")
+;; Takes 7 cycles.
+(define_reservation "cfv3_mac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex,cf_mac1,cf_mac2")
+
+;; Multiplication with emac.
+;; Takes 4 cycles.
+(define_reservation "cfv123_emac_00"
+  "cf_dsoc,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Multiplication with emac.
+;; Takes 6 cycles.
+(define_reservation "cfv12_emac_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+;; Takes 7 cycles.
+(define_reservation "cfv3_emac_10"
+  "cf_dsoc,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Word multiplication with emac.
+;; Takes 7 cycles.
+(define_reservation "cfv12_emac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+;; Takes 8 cycles.
+(define_reservation "cfv3_emac_w_i0"
+  "cf_dsoc,cf_agex,cf_agex,cf_dsoc+cf_mem1,cf_dsoc+cf_mem2,cf_agex+cf_mac1,cf_mac2,cf_mac3,cf_mac4")
+
+;; Return instruction.
+;; ??? As return reads target address from stack, use a mem-read reservation
+;; ??? for it.
+;; ??? It's not clear what the core does during these 5 cycles.
+;; ??? Luckily, we don't care that much about an insn that won't be moved.
+;; Takes 5 cycles.
+(define_reservation "cfv12_rts" "cfv12_alu_10")
+;; Takes 8 cycles.
+(define_reservation "cfv3_rts" "cfv3_alu_10")
+
+;; Call instruction.
+;; ??? It's not clear what reservation is best to use for calls.
+;; ??? For now we use mem-write + return reservations to reflect the fact of
+;; ??? pushing and poping return address to and from the stack.
+;; Takes 3 cycles.
+(define_reservation "cfv12_call" "cfv12_alu_01,cfv12_rts")
+;; Takes 1/5 cycles.
+(define_reservation "cfv3_call" "cfv3_alu_01,cfv3_rts")
+
+;; Conditional branch instruction.
+;; ??? Branch reservations are unclear to me so far.  Luckily, we don't care
+;; ??? that much about branches.
+;; Takes 2 cycles.
+(define_reservation "cfv12_bcc" "cfv123_alu_00")
+;; Takes 1 cycles.
+(define_reservation "cfv3_bcc" "cfv123_alu_00")
+
+;; Unconditional branch instruciton.
+;; Takes 2 cycles.
+(define_reservation "cfv12_bra" "cfv12_alu_01")
+;; Takes 1 cycles.
+(define_reservation "cfv3_bra" "cfv3_alu_01")
+
+;; Computed jump instruction.
+;; Takes 3 cycles.
+(define_reservation "cfv12_jmp"
+  "(cf_dsoc+cf_agex)*3")
+;; Takes 5 cycles.
+(define_reservation "cfv3_jmp"
+  "(cf_dsoc+cf_agex)*5")
+
+;; Instruction reservations.
+
+;; Below reservations are simple derivation from the above reservations.
+;; Each reservation from the above expands into 3 reservations below - one
+;; for each instruction size.
+;; A number in the end of reservation's name is the size of the instruction.
+
+(define_insn_reservation "cfv123_alu_00_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "00"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_alu_00")
+
+(define_insn_reservation "cfv123_alu_00_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "00"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_alu_00")
+
+(define_insn_reservation "cfv123_alu_00_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "00"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_alu_00")
+
+(define_insn_reservation "cfv1_alu_10_1" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_10")
+
+(define_insn_reservation "cfv1_alu_10_2" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_10")
+
+(define_insn_reservation "cfv1_alu_10_3" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_10")
+
+(define_insn_reservation "cfv1_omove_10_1" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_10")
+
+(define_insn_reservation "cfv1_omove_10_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_10")
+
+(define_insn_reservation "cfv1_omove_10_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_10")
+
+(define_insn_reservation "cfv2_alu_10_1" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_10")
+
+(define_insn_reservation "cfv2_alu_10_2" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_10")
+
+(define_insn_reservation "cfv2_alu_10_3" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_10")
+
+(define_insn_reservation "cfv2_omove_10_1" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_10")
+
+(define_insn_reservation "cfv2_omove_10_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_10")
+
+(define_insn_reservation "cfv2_omove_10_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_10")
+
+(define_insn_reservation "cfv3_alu_10_1" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_alu_10")
+
+(define_insn_reservation "cfv3_alu_10_2" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_alu_10")
+
+(define_insn_reservation "cfv3_alu_10_3" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_10")
+
+(define_insn_reservation "cfv3_omove_10_1" 3
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_omove_10")
+
+(define_insn_reservation "cfv3_omove_10_2" 3
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_omove_10")
+
+(define_insn_reservation "cfv3_omove_10_3" 3
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "10"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_10")
+
+(define_insn_reservation "cfv1_alu_i0_2" 4
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i0")
+
+(define_insn_reservation "cfv1_alu_i0_3" 4
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i0")
+
+(define_insn_reservation "cfv1_omove_i0_2" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i0")
+
+(define_insn_reservation "cfv1_omove_i0_3" 3
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i0")
+
+(define_insn_reservation "cfv2_alu_i0_2" 4
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i0")
+
+(define_insn_reservation "cfv2_alu_i0_3" 4
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i0")
+
+(define_insn_reservation "cfv2_omove_i0_2" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i0")
+
+(define_insn_reservation "cfv2_omove_i0_3" 3
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i0")
+
+(define_insn_reservation "cfv3_alu_i0_2" 5
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_i0")
+
+(define_insn_reservation "cfv3_alu_i0_3" 5
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_i0")
+
+(define_insn_reservation "cfv3_omove_i0_2" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_omove_i0")
+
+(define_insn_reservation "cfv3_omove_i0_3" 4
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i0"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_i0")
+
+(define_insn_reservation "cfv12_alu_01_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_01")
+
+(define_insn_reservation "cfv12_alu_01_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_01")
+
+(define_insn_reservation "cfv12_alu_01_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_01")
+
+(define_insn_reservation "cfv3_alu_01_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_alu_01")
+
+(define_insn_reservation "cfv3_alu_01_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_alu_01")
+
+(define_insn_reservation "cfv3_alu_01_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "01"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_01")
+
+(define_insn_reservation "cfv12_alu_0i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_0i")
+
+(define_insn_reservation "cfv12_alu_0i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_0i")
+
+(define_insn_reservation "cfv3_alu_0i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_0i")
+
+(define_insn_reservation "cfv3_alu_0i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_0i")
+
+(define_insn_reservation "cfv1_alu_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_11")
+
+(define_insn_reservation "cfv1_alu_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_11")
+
+(define_insn_reservation "cfv1_alu_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_11")
+
+(define_insn_reservation "cfv1_omove_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_11")
+
+(define_insn_reservation "cfv1_omove_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_11")
+
+(define_insn_reservation "cfv1_omove_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_11")
+
+(define_insn_reservation "cfv2_alu_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_alu_11")
+
+(define_insn_reservation "cfv2_alu_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_alu_11")
+
+(define_insn_reservation "cfv2_alu_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_11")
+
+(define_insn_reservation "cfv2_omove_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_omove_11")
+
+(define_insn_reservation "cfv2_omove_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_omove_11")
+
+(define_insn_reservation "cfv2_omove_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_11")
+
+(define_insn_reservation "cfv3_alu_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_alu_11")
+
+(define_insn_reservation "cfv3_alu_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "size" "2"))
+       (eq_attr "op_mem" "11"))
+  "cf_ib2+cfv3_alu_11")
+
+(define_insn_reservation "cfv3_alu_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_11")
+
+(define_insn_reservation "cfv3_omove_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_omove_11")
+
+(define_insn_reservation "cfv3_omove_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "size" "2"))
+       (eq_attr "op_mem" "11"))
+  "cf_ib2+cfv3_omove_11")
+
+(define_insn_reservation "cfv3_omove_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_11")
+
+(define_insn_reservation "cfv1_alu_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i1")
+
+(define_insn_reservation "cfv1_alu_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i1")
+
+(define_insn_reservation "cfv1_omove_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i1")
+
+(define_insn_reservation "cfv1_omove_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i1")
+
+(define_insn_reservation "cfv2_alu_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_i1")
+
+(define_insn_reservation "cfv2_alu_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_i1")
+
+(define_insn_reservation "cfv2_omove_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_i1")
+
+(define_insn_reservation "cfv2_omove_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_i1")
+
+(define_insn_reservation "cfv3_alu_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_i1")
+
+(define_insn_reservation "cfv3_alu_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_i1")
+
+(define_insn_reservation "cfv3_omove_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_omove_i1")
+
+(define_insn_reservation "cfv3_omove_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_i1")
+
+(define_insn_reservation "cfv1_alu_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_1i")
+
+(define_insn_reservation "cfv1_alu_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_1i")
+
+(define_insn_reservation "cfv1_omove_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_1i")
+
+(define_insn_reservation "cfv1_omove_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1")
+		 (eq_attr "type" "
+clr,clr_l,mov3q_l,move,moveq_l,tst,
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_1i")
+
+(define_insn_reservation "cfv2_alu_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_alu_1i")
+
+(define_insn_reservation "cfv2_alu_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_alu_1i")
+
+(define_insn_reservation "cfv2_omove_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_omove_1i")
+
+(define_insn_reservation "cfv2_omove_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv2")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_omove_1i")
+
+(define_insn_reservation "cfv3_alu_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_alu_1i")
+
+(define_insn_reservation "cfv3_alu_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+alu_l,aluq_l,bitr,bitrw,cmp,cmp_l,alux_l,ext,neg_l,scc,shift,
+clr,clr_l,mov3q_l,move,moveq_l,tst"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_alu_1i")
+
+(define_insn_reservation "cfv3_omove_1i_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_omove_1i")
+
+(define_insn_reservation "cfv3_omove_1i_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "
+move_l,tst_l"))
+	    (eq_attr "op_mem" "1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_omove_1i")
+
+(define_insn_reservation "cfv123_lea_10_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_lea_10")
+
+(define_insn_reservation "cfv123_lea_10_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_lea_10")
+
+(define_insn_reservation "cfv123_lea_10_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_lea_10")
+
+(define_insn_reservation "cfv123_lea_i0_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv123_lea_i0")
+
+(define_insn_reservation "cfv123_lea_i0_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		 (eq_attr "type" "lea"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_lea_i0")
+
+(define_insn_reservation "cfv12_pea_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_pea_11")
+
+(define_insn_reservation "cfv12_pea_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_pea_11")
+
+(define_insn_reservation "cfv12_pea_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_pea_11")
+
+(define_insn_reservation "cfv3_pea_11_1" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_pea_11")
+
+(define_insn_reservation "cfv3_pea_11_2" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_pea_11")
+
+(define_insn_reservation "cfv3_pea_11_3" 1
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "11"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_pea_11")
+
+(define_insn_reservation "cfv12_pea_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_pea_i1")
+
+(define_insn_reservation "cfv12_pea_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_pea_i1")
+
+(define_insn_reservation "cfv3_pea_i1_2" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_pea_i1")
+
+(define_insn_reservation "cfv3_pea_i1_3" 2
+  (and (and (and (eq_attr "cpu" "cfv3")
+		 (eq_attr "type" "pea"))
+	    (eq_attr "op_mem" "i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_pea_i1")
+
+(define_insn_reservation "cfv123_mul_l_00_1" 18
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mul_l_00")
+
+(define_insn_reservation "cfv123_mul_l_00_2" 18
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mul_l_00")
+
+(define_insn_reservation "cfv123_mul_l_00_3" 18
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mul_l_00")
+
+(define_insn_reservation "cfv123_mul_w_00_1" 9
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mul_w_00")
+
+(define_insn_reservation "cfv123_mul_w_00_2" 9
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mul_w_00")
+
+(define_insn_reservation "cfv123_mul_w_00_3" 9
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mul_w_00")
+
+(define_insn_reservation "cfv12_mul_l_10_1" 20
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mul_l_10")
+
+(define_insn_reservation "cfv12_mul_l_10_2" 20
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mul_l_10")
+
+(define_insn_reservation "cfv12_mul_l_10_3" 20
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mul_l_10")
+
+(define_insn_reservation "cfv3_mul_l_10_1" 21
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mul_l_10")
+
+(define_insn_reservation "cfv3_mul_l_10_2" 21
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mul_l_10")
+
+(define_insn_reservation "cfv3_mul_l_10_3" 21
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mul_l_10")
+
+(define_insn_reservation "cfv12_mul_w_10_1" 11
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mul_w_10")
+
+(define_insn_reservation "cfv12_mul_w_10_2" 11
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mul_w_10")
+
+(define_insn_reservation "cfv12_mul_w_10_3" 11
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mul_w_10")
+
+(define_insn_reservation "cfv3_mul_w_10_1" 12
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mul_w_10")
+
+(define_insn_reservation "cfv3_mul_w_10_2" 12
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mul_w_10")
+
+(define_insn_reservation "cfv3_mul_w_10_3" 12
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mul_w_10")
+
+(define_insn_reservation "cfv12_mul_w_i0_2" 12
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_mul_w_i0")
+
+(define_insn_reservation "cfv12_mul_w_i0_3" 12
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mul_w_i0")
+
+(define_insn_reservation "cfv3_mul_w_i0_2" 13
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_mul_w_i0")
+
+(define_insn_reservation "cfv3_mul_w_i0_3" 13
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "no"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mul_w_i0")
+
+(define_insn_reservation "cfv123_mac_l_00_1" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mac_l_00")
+
+(define_insn_reservation "cfv123_mac_l_00_2" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mac_l_00")
+
+(define_insn_reservation "cfv123_mac_l_00_3" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mac_l_00")
+
+(define_insn_reservation "cfv123_mac_w_00_1" 3
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_mac_w_00")
+
+(define_insn_reservation "cfv123_mac_w_00_2" 3
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_mac_w_00")
+
+(define_insn_reservation "cfv123_mac_w_00_3" 3
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_mac_w_00")
+
+(define_insn_reservation "cfv12_mac_l_10_1" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mac_l_10")
+
+(define_insn_reservation "cfv12_mac_l_10_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mac_l_10")
+
+(define_insn_reservation "cfv12_mac_l_10_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mac_l_10")
+
+(define_insn_reservation "cfv3_mac_l_10_1" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mac_l_10")
+
+(define_insn_reservation "cfv3_mac_l_10_2" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mac_l_10")
+
+(define_insn_reservation "cfv3_mac_l_10_3" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mac_l_10")
+
+(define_insn_reservation "cfv12_mac_w_10_1" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_mac_w_10")
+
+(define_insn_reservation "cfv12_mac_w_10_2" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_mac_w_10")
+
+(define_insn_reservation "cfv12_mac_w_10_3" 5
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mac_w_10")
+
+(define_insn_reservation "cfv3_mac_w_10_1" 6
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_mac_w_10")
+
+(define_insn_reservation "cfv3_mac_w_10_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_mac_w_10")
+
+(define_insn_reservation "cfv3_mac_w_10_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mac_w_10")
+
+(define_insn_reservation "cfv12_mac_w_i0_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_mac_w_i0")
+
+(define_insn_reservation "cfv12_mac_w_i0_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_mac_w_i0")
+
+(define_insn_reservation "cfv3_mac_w_i0_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_mac_w_i0")
+
+(define_insn_reservation "cfv3_mac_w_i0_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_mac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_mac_w_i0")
+
+(define_insn_reservation "cfv123_emac_00_1" 4
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l,mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv123_emac_00")
+
+(define_insn_reservation "cfv123_emac_00_2" 4
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l,mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv123_emac_00")
+
+(define_insn_reservation "cfv123_emac_00_3" 4
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l,mul_w"))
+	    (eq_attr "op_mem" "00,01,0i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv123_emac_00")
+
+(define_insn_reservation "cfv12_emac_l_10_1" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_l_10_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_l_10_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_emac_10")
+
+(define_insn_reservation "cfv3_emac_l_10_1" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_l_10_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_l_10_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_l"))
+	    (eq_attr "op_mem" "10,i0,i1,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_10_1" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_10_2" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_10_3" 6
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_emac_10")
+
+(define_insn_reservation "cfv3_emac_w_10_1" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_w_10_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_emac_10")
+
+(define_insn_reservation "cfv3_emac_w_10_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "10,11,1i"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_emac_10")
+
+(define_insn_reservation "cfv12_emac_w_i0_2" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv12_emac_w_i0")
+
+(define_insn_reservation "cfv12_emac_w_i0_3" 7
+  (and (and (and (and (eq_attr "cpu" "cfv1,cfv2")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_emac_w_i0")
+
+(define_insn_reservation "cfv3_emac_w_i0_2" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "1,2"))
+  "cf_ib2+cfv3_emac_w_i0")
+
+(define_insn_reservation "cfv3_emac_w_i0_3" 8
+  (and (and (and (and (eq_attr "cpu" "cfv3")
+		      (eq_attr "mac" "cf_emac"))
+		 (eq_attr "type" "mul_w"))
+	    (eq_attr "op_mem" "i0,i1"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_emac_w_i0")
+
+(define_insn_reservation "cfv12_rts" 5
+  (and (eq_attr "cpu" "cfv1,cfv2")
+       (eq_attr "type" "rts"))
+  "cf_ib1+cfv12_rts")
+
+(define_insn_reservation "cfv3_rts" 8
+  (and (eq_attr "cpu" "cfv3")
+       (eq_attr "type" "rts"))
+  "cf_ib1+cfv3_rts")
+
+(define_insn_reservation "cfv12_call_1" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_call")
+
+(define_insn_reservation "cfv12_call_2" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_call")
+
+(define_insn_reservation "cfv12_call_3" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_call")
+
+(define_insn_reservation "cfv3_call_1" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_call")
+
+(define_insn_reservation "cfv3_call_2" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_call")
+
+(define_insn_reservation "cfv3_call_3" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bsr,jsr"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_call")
+
+(define_insn_reservation "cfv12_bcc_1" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_bcc")
+
+(define_insn_reservation "cfv12_bcc_2" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_bcc")
+
+(define_insn_reservation "cfv12_bcc_3" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_bcc")
+
+(define_insn_reservation "cfv3_bcc_1" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_bcc")
+
+(define_insn_reservation "cfv3_bcc_2" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_bcc")
+
+(define_insn_reservation "cfv3_bcc_3" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bcc"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_bcc")
+
+(define_insn_reservation "cfv12_bra_1" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_bra")
+
+(define_insn_reservation "cfv12_bra_2" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_bra")
+
+(define_insn_reservation "cfv12_bra_3" 2
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_bra")
+
+(define_insn_reservation "cfv3_bra_1" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_bra")
+
+(define_insn_reservation "cfv3_bra_2" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_bra")
+
+(define_insn_reservation "cfv3_bra_3" 1
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "bra"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_bra")
+
+(define_insn_reservation "cfv12_jmp_1" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv12_jmp")
+
+(define_insn_reservation "cfv12_jmp_2" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv12_jmp")
+
+(define_insn_reservation "cfv12_jmp_3" 3
+  (and (and (eq_attr "cpu" "cfv1,cfv2")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv12_jmp")
+
+(define_insn_reservation "cfv3_jmp_1" 5
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "1"))
+  "cf_ib1+cfv3_jmp")
+
+(define_insn_reservation "cfv3_jmp_2" 5
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "2"))
+  "cf_ib2+cfv3_jmp")
+
+(define_insn_reservation "cfv3_jmp_3" 5
+  (and (and (eq_attr "cpu" "cfv3")
+	    (eq_attr "type" "jmp"))
+       (eq_attr "size" "3"))
+  "cf_ib3+cfv3_jmp")
+
+(define_insn_reservation "cfv12_unlk" 2
+  (and (eq_attr "cpu" "cfv1,cfv2")
+       (eq_attr "type" "unlk"))
+  "cf_ib1+cfv12_alu_10")
+
+(define_insn_reservation "cfv3_unlk" 3
+  (and (eq_attr "cpu" "cfv3")
+       (eq_attr "type" "unlk"))
+  "cf_ib1+cfv3_alu_10")
+
+;; Dummy reservation for instructions that are not handled.
+(define_insn_reservation "cfv123_guess" 3
+  (and (eq_attr "cpu" "cfv1,cfv2,cfv3")
+       (eq_attr "type" "falu,fbcc,fcmp,fdiv,fmove,fmul,fneg,fsqrt,ftst,
+                        div_w,div_l,link,mvsz,nop,trap,unknown"))
+  "cf_ib3+cfv123_guess+cf_dsoc+cf_agex+cf_mem")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Below is pipeline description of ColdFire V4 core.
+;; It is substantially different from the description of V1, V2 or V3 cores,
+;; primarily due to no need to model the instruction buffer.
+;;
+;; V4 pipeline model uses a completely separate set of cpu units.
+
+;; Operand Execution Pipeline.
+(define_automaton "cfv4_oep")
+
+(define_cpu_unit "cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da"
+  "cfv4_oep")
+
+;; V4 has 3 cases of dual-issue.
+;; After issuing a cfv4_pOEPx instruction, it'll be possible to issue
+;; a cfv4_sOEPx instruction on the same cycle (see final_presence_sets below).
+(define_cpu_unit "cfv4_pOEP1,cfv4_sOEP1,
+                  cfv4_pOEP2,cfv4_sOEP2,
+                  cfv4_pOEP3,cfv4_sOEP3" "cfv4_oep")
+
+(final_presence_set "cfv4_sOEP1" "cfv4_pOEP1")
+(final_presence_set "cfv4_sOEP2" "cfv4_pOEP2")
+(final_presence_set "cfv4_sOEP3" "cfv4_pOEP3")
+
+;; Reservation for instructions that don't allow dual-issue.
+(define_reservation "cfv4_ds" "cfv4_pOEP1+cfv4_sOEP1+
+                               cfv4_pOEP2+cfv4_sOEP2+
+                               cfv4_pOEP3+cfv4_sOEP3")
+
+;; Memory access resource.
+(define_automaton "cfv4_mem")
+
+(define_cpu_unit "cfv4_mem" "cfv4_mem")
+
+;; EMAC.
+(define_automaton "cfv4_emac")
+
+(define_cpu_unit "cfv4_emac" "cfv4_emac")
+
+;; FPU.
+(define_automaton "cfv4_fp")
+
+(define_cpu_unit "cfv4_fp" "cfv4_fp")
+
+;; Automaton for unknown instruction.
+(define_automaton "cfv4_guess")
+
+(define_query_cpu_unit "cfv4_guess" "cfv4_guess")
+
+;; This bypass allows 1st case of dual-issue.
+(define_bypass 0 "cfv4_00_oag_pOEP1,cfv4_10_pOEP1,cfv4_i0_pOEP1"
+  "cfv4_00_oag,cfv4_00_oag_pOEP3_sOEP12,cfv4_00_oag_pOEP1,
+   cfv4_00_oag_moveql,cfv4_00_ex_sOEP13")
+
+;; The following bypasses decrease the latency of producers if it modifies
+;; a target register in the EX stage and the consumer also uses
+;; that register in the EX stage.
+(define_bypass 1 "cfv4_00_ex" "cfv4_00_ex,cfv4_00_ex_sOEP13")
+(define_bypass 1 "cfv4_00_ex" "cfv4_10,cfv4_10_pOEP1,cfv4_i0,cfv4_i0_pOEP1"
+  "!m68k_sched_address_bypass_p")
+
+;; Indexed loads with scale factors 2 and 4 require an update of the index
+;; register in the register file.  Considering that the index register is
+;; only needed at the second cycle of address generation, we get
+;; a latency of 4.
+;; Producers for indexed loads with scale factor 1 should have
+;; a latency of 3.  Since we're only allowed one bypass, we handle it
+;; in the adjust_cost hook.
+(define_bypass 4
+  "cfv4_00_oag,cfv4_00_oag_pOEP3_sOEP12,cfv4_00_oag_lea,cfv4_00_oag_pOEP1,
+   cfv4_00_oag_moveql"
+  "cfv4_i0,cfv4_i0_pOEP1"
+  "m68k_sched_indexed_address_bypass_p")
+
+;; First part of cfv4_00.
+;; If issued in pairs with cfv4_movel_?0, the cost should be increased.
+;; ??? Is it possible that combined cfv4_movel_00 and cfv4_oag_00 instructions
+;; have longer latency than the two instructions emitted sequentially?
+;; Due to register renaming, the result of the sequence would be available
+;; after 3 cycles, instead of 4 for combined instruction?
+(define_insn_reservation "cfv4_00_oag" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,clr_l,cmp_l,mov3q_l,neg_l"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_sOEP1|cfv4_sOEP3|(cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+(define_insn_reservation "cfv4_00_oag_pOEP3_sOEP12" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l,mov3q_l,clr_l"))
+       (and (eq_attr "op_mem" "00")
+	    (and (eq_attr "opx_type" "Rn")
+		 (eq_attr "opy_type" "none,imm_q,imm_w,imm_l"))))
+  "cfv4_sOEP1|cfv4_sOEP2|(cfv4_pOEP3,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+(define_insn_reservation "cfv4_00_oag_lea" 1
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "lea"))
+  "cfv4_pOEP3,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_00_oag_pOEP1" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l,mov3q_l,clr_l"))
+       (and (eq_attr "op_mem" "00")
+	    (ior (eq_attr "opx_type" "!Rn")
+		 (eq_attr "opy_type" "!none,imm_q,imm_w,imm_l"))))
+  "cfv4_sOEP1|(cfv4_pOEP1,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+(define_insn_reservation "cfv4_00_oag_moveql" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "moveq_l"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_sOEP1|cfv4_sOEP2|cfv4_sOEP3|(cfv4_pOEP3,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+;; Second part of cfv4_00.
+;; Latency is either 1 or 4 depending on which stage the consumer
+;; will need the data.
+
+(define_insn_reservation "cfv4_00_ex" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "bitr,bitrw,clr,cmp,move,mvsz,scc,tst"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_00_ex_sOEP13" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alux_l,ext,shift,tst_l"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_sOEP1|cfv4_sOEP3|(cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex)")
+
+;; Several types mentioned in this reservation (e.g., ext and shift) don't
+;; support implicit load.  But we handle them anyway due to first scheduling
+;; pass, which handles non-strict rtl.
+;;
+;; Latency is either 1 or 4 depending in which stage the consumer
+;; will need the data.
+(define_insn_reservation "cfv4_10" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,moveq_l,mvsz,neg_l,
+                             shift,tst,tst_l"))
+       (eq_attr "op_mem" "10"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; Specialization of cfv4_10.
+;; move.l has OC2-to-DS forwarding path, that saves one cycle of latency.
+(define_insn_reservation "cfv4_10_pOEP1" 3
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l"))
+       (eq_attr "op_mem" "10"))
+  "cfv4_pOEP1,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; Same here.  But +1 to latency due to longer OAG.
+(define_insn_reservation "cfv4_i0" 5
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,moveq_l,mvsz,neg_l,
+                             shift,tst,tst_l"))
+       (eq_attr "op_mem" "i0"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; ??? Does indexed load trigger dual-issue?
+;; ??? Does OC2-to-DS forwarding path saves a cycle?
+(define_insn_reservation "cfv4_i0_pOEP1" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "move_l"))
+       (eq_attr "op_mem" "i0"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+;; This reservation is for moves and clr.  Arithmetic instructions
+;; don't write to memory unless they also read from it.
+;; But, before reload we can have all sorts of things.
+;; With cfv4_pOEP2 allow dual-issue for type 2 cases.
+(define_insn_reservation "cfv4_01" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "01"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; ??? Does indexed store trigger dual-issue?
+(define_insn_reservation "cfv4_0i" 2
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "0i"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_11" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "11"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; Latency is 2 due to long OAG stage.
+(define_insn_reservation "cfv4_i1" 2
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; This one is the same as cfv4_i1.
+;; ??? Should it be different?
+(define_insn_reservation "cfv4_1i" 2
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "alu_l,aluq_l,alux_l,bitr,bitrw,
+                             clr,clr_l,cmp,cmp_l,ext,
+                             mov3q_l,move,move_l,moveq_l,mvsz,neg_l,
+                             shift"))
+       (eq_attr "op_mem" "1i"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; ??? Does pea indeed support case 2 of dual-issue?
+(define_insn_reservation "cfv4_11_pea" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "pea"))
+       (eq_attr "op_mem" "11,00,01,0i,10"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+;; ??? Does pea indeed support case 2 of dual-issue?
+;; ??? Does indexed store trigger dual-issue?
+(define_insn_reservation "cfv4_i1_pea" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "pea"))
+       (eq_attr "op_mem" "i1,1i"))
+  "cfv4_pOEP2,cfv4_oag,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_link" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "link"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_ex,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_unlink" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "unlk"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_divw_00" 20
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_w"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex*15")
+
+(define_insn_reservation "cfv4_divw_10" 20
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_w"))
+       (eq_attr "op_mem" "10,11,1i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex*15")
+
+(define_insn_reservation "cfv4_divw_i0" 21
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_w"))
+       (eq_attr "op_mem" "i0,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex*15")
+
+(define_insn_reservation "cfv4_divl_00" 35
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_l"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex*30")
+
+(define_insn_reservation "cfv4_divl_10" 35
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "div_l"))
+       (eq_attr "op_mem" "10,11,1i,i0,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex*30")
+
+(define_insn_reservation "cfv4_emac_mul_00" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "mul_w,mul_l"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_emac")
+
+(define_insn_reservation "cfv4_emac_mul_10" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "mul_w,mul_l"))
+       (eq_attr "op_mem" "10,11,1i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_emac")
+
+(define_insn_reservation "cfv4_emac_mul_i0" 8
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "mul_w,mul_l"))
+       (eq_attr "op_mem" "i0,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_emac")
+
+(define_insn_reservation "cfv4_falu_00" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "falu,fcmp,fmul"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_falu_10" 7
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "falu,fcmp,fmul"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_fneg_00" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fmove,fneg,ftst"))
+       (eq_attr "op_mem" "00"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_fmove_fneg_10" 4
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fmove,fneg,ftst"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+(define_insn_reservation "cfv4_fmove_01" 1
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fmove,fneg,ftst"))
+       (eq_attr "op_mem" "01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp,cfv4_da,cfv4_mem")
+
+(define_insn_reservation "cfv4_fdiv_00" 23
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fdiv"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp*17")
+
+(define_insn_reservation "cfv4_fdiv_10" 23
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fdiv"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp*17")
+
+(define_insn_reservation "cfv4_fsqrt_00" 56
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fsqrt"))
+       (eq_attr "op_mem" "00,01,0i"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp*50")
+
+(define_insn_reservation "cfv4_fsqrt_10" 56
+  (and (and (eq_attr "cpu" "cfv4")
+	    (eq_attr "type" "fsqrt"))
+       (eq_attr "op_mem" "10,i0,11,1i,i1"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_fp*50")
+
+(define_insn_reservation "cfv4_bcc" 0
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "bcc"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_fbcc" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "fbcc"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex,cfv4_fp")
+
+;; ??? Why is bra said to write to memory: 1(0/1) ?
+(define_insn_reservation "cfv4_bra_bsr" 1
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "bra,bsr"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_jmp_jsr" 5
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "jmp,jsr"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_rts" 2
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "rts"))
+  "cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex")
+
+(define_insn_reservation "cfv4_nop" 1
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "nop"))
+  "cfv4_ds+cfv4_oag+cfv4_oc1+cfv4_mem+cfv4_oc2+cfv4_ex")
+
+(define_insn_reservation "cfv4_guess" 10
+  (and (eq_attr "cpu" "cfv4")
+       (eq_attr "type" "trap,unknown"))
+  "cfv4_guess+cfv4_ds,cfv4_oag,cfv4_oc1+cfv4_mem,cfv4_oc2,cfv4_ex,cfv4_emac+cfv4_fp")
+
+(define_insn_reservation "ignore" 0
+  (eq_attr "type" "ignore")
+  "nothing")
diff --git a/gcc/config/m68k/constraints.md b/gcc/config/m68k/constraints.md
new file mode 100644
index 000000000..a4885cda6
--- /dev/null
+++ b/gcc/config/m68k/constraints.md
@@ -0,0 +1,164 @@
+;; Constraint definitions for m68k
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "a" "ADDR_REGS"
+  "Address register.")
+
+(define_register_constraint "d" "DATA_REGS"
+  "Data register.")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+  "Floating point register.")
+
+(define_constraint "I"
+  "Integer constant in the range 1 @dots 8, for immediate shift counts and addq."
+  (and (match_code "const_int")
+       (match_test "ival > 0 && ival <= 8")))
+
+(define_constraint "J"
+  "Signed 16-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x8000 && ival <= 0x7fff")))
+
+(define_constraint "K"
+  "Integer constant that moveq can't handle."
+  (and (match_code "const_int")
+       (match_test "ival < -0x80 || ival >= 0x80")))
+
+(define_constraint "L"
+  "Integer constant in the range -8 @dots -1, for subq."
+  (and (match_code "const_int")
+       (match_test "ival < 0 && ival >= -8")))
+
+(define_constraint "M"
+  "Integer constant that moveq+notb can't handle."
+  (and (match_code "const_int")
+       (match_test "ival < -0x100 || ival >= 0x100")))
+
+(define_constraint "N"
+  "Integer constant in the range 24 @dots 31, for rotatert:SI 8 to 1 expressed as rotate."
+  (and (match_code "const_int")
+       (match_test "ival >= 24 && ival <= 31")))
+
+(define_constraint "O"
+  "Integer constant 16, for rotate using swap."
+  (and (match_code "const_int")
+       (match_test "ival == 16")))
+
+(define_constraint "P"
+  "Integer constant in the range 8 @dots 15, for rotatert:HI 8 to 1 expressed as rotate."
+  (and (match_code "const_int")
+       (match_test "ival >= 8 && ival <= 15")))
+
+(define_constraint "R"
+  "Integer constant that mov3q can handle."
+  (and (match_code "const_int")
+       (match_test "valid_mov3q_const (ival)")))
+
+(define_constraint "G"
+  "Defines all of the floating constants that are *NOT* 68881
+   constants.  This is so 68881 constants get reloaded and the fpmovecr
+   is used."
+  (and (match_code "const_double")
+       (match_test "!(TARGET_68881 && standard_68881_constant_p (op))")))
+
+(define_constraint "H"
+  "Defines a real zero constant."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_constraint "S"
+  "Used for operands that satisfy 'm' when -mpcrel is in effect."
+  (and (match_code "mem")
+       (match_test "TARGET_PCREL
+		    && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+			|| GET_CODE (XEXP (op, 0)) == LABEL_REF
+			|| GET_CODE (XEXP (op, 0)) == CONST)")))
+
+(define_constraint "T"
+  "Used for operands that satisfy 's' when -mpcrel is not in effect."
+  (and (match_code "symbol_ref,label_ref,const")
+       (match_test "!flag_pic")))
+
+(define_memory_constraint "Q"
+  "Means address register indirect addressing mode."
+  (and (match_code "mem")
+       (match_test "m68k_matches_q_p (op)")))
+
+(define_constraint "U"
+  "Used for register offset addressing."
+  (and (match_code "mem")
+       (match_test "m68k_matches_u_p (op)")))
+
+(define_constraint "W"
+  "Used for const_call_operands."
+  (match_operand 0 "const_call_operand"))
+
+(define_constraint "Cs"
+  "symbol_ref or const."
+  (match_code "symbol_ref,const"))
+
+(define_constraint "Ci"
+  "const_int."
+  (and (match_code "const_int")
+       (match_test "true")))
+
+(define_constraint "C0"
+  "const_int 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Cj"
+  "Range of signed numbers that don't fit in 16 bits."
+  (and (match_code "const_int")
+       (match_test "ival < -0x8000 || ival > 0x7FFF")))
+
+(define_constraint "Cu"
+  "16-bit offset for wrapped symbols"
+  (and (match_code "const")
+       (match_test "m68k_unwrap_symbol (op, false) != op")))
+
+(define_constraint "CQ"
+  "Integers valid for mvq."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == MOVQ")))
+
+(define_constraint "CW"
+  "Integers valid for a moveq followed by a swap."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == SWAP")))
+
+(define_constraint "CZ"
+  "Integers valid for mvz."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == MVZ")))
+
+(define_constraint "CS"
+  "Integers valid for mvs."
+  (and (match_code "const_int")
+       (match_test "m68k_const_method (ival) == MVS")))
+
+(define_constraint "Ap"
+  "push_operand."
+  (match_operand 0 "push_operand"))
+
+(define_constraint "Ac"
+  "Non-register operands allowed in clr."
+  (and (match_operand 0 "movsi_const0_operand")
+       (match_test "!REG_P (op)")))
diff --git a/gcc/config/m68k/crti.s b/gcc/config/m68k/crti.s
new file mode 100644
index 000000000..12fb59f41
--- /dev/null
+++ b/gcc/config/m68k/crti.s
@@ -0,0 +1,44 @@
+/* Specialized code needed to support construction and destruction of
+   file-scope objects in C++ and Java code, and to support exception handling.
+   Copyright (C) 1999, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Charles-Antoine Gauthier (charles.gauthier@iit.nrc.ca).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * This file just supplies function prologues for the .init and .fini
+ * sections.  It is linked in before crtbegin.o.
+ */
+
+	.ident  "GNU C crti.o"
+
+	.section .init
+	.globl  _init
+	.type   _init,@function
+_init:
+	linkw %fp,#0
+
+	.section .fini
+	.globl  _fini
+	.type   _fini,@function
+_fini:
+	linkw %fp,#0
diff --git a/gcc/config/m68k/crtn.s b/gcc/config/m68k/crtn.s
new file mode 100644
index 000000000..b7d70f02e
--- /dev/null
+++ b/gcc/config/m68k/crtn.s
@@ -0,0 +1,40 @@
+/* Specialized code needed to support construction and destruction of
+   file-scope objects in C++ and Java code, and to support exception handling.
+   Copyright (C) 1999, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Charles-Antoine Gauthier (charles.gauthier@iit.nrc.ca).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * This file supplies function epilogues for the .init and .fini sections.
+ * It is linked in after all other files.
+ */
+
+	.ident  "GNU C crtn.o"
+
+	.section .init
+	unlk %fp
+	rts
+
+	.section .fini
+	unlk %fp
+	rts
diff --git a/gcc/config/m68k/fpgnulib.c b/gcc/config/m68k/fpgnulib.c
new file mode 100644
index 000000000..2a7f6c75d
--- /dev/null
+++ b/gcc/config/m68k/fpgnulib.c
@@ -0,0 +1,595 @@
+/* This is a stripped down version of floatlib.c.  It supplies only those
+   functions which exist in libgcc, but for which there is not assembly
+   language versions in m68k/lb1sf68.asm.
+
+   It also includes simplistic support for extended floats (by working in
+   double precision).  You must compile this file again with -DEXTFLOAT
+   to get this support.  */
+
+/*
+** gnulib support for software floating point.
+** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
+** Permission is granted to do *anything* you want with this file,
+** commercial or otherwise, provided this message remains intact.  So there!
+** I would appreciate receiving any updates/patches/changes that anyone
+** makes, and am willing to be the repository for said changes (am I
+** making a big mistake?).
+**
+** Pat Wood
+** Pipeline Associates, Inc.
+** pipeline!phw@motown.com or
+** sun!pipeline!phw or
+** uunet!motown!pipeline!phw
+**
+** 05/01/91 -- V1.0 -- first release to gcc mailing lists
+** 05/04/91 -- V1.1 -- added float and double prototypes and return values
+**                  -- fixed problems with adding and subtracting zero
+**                  -- fixed rounding in truncdfsf2
+**                  -- fixed SWAP define and tested on 386
+*/
+
+/*
+** The following are routines that replace the gnulib soft floating point
+** routines that are called automatically when -msoft-float is selected.
+** The support single and double precision IEEE format, with provisions
+** for byte-swapped machines (tested on 386).  Some of the double-precision
+** routines work at full precision, but most of the hard ones simply punt
+** and call the single precision routines, producing a loss of accuracy.
+** long long support is not assumed or included.
+** Overall accuracy is close to IEEE (actually 68882) for single-precision
+** arithmetic.  I think there may still be a 1 in 1000 chance of a bit
+** being rounded the wrong way during a multiply.  I'm not fussy enough to
+** bother with it, but if anyone is, knock yourself out.
+**
+** Efficiency has only been addressed where it was obvious that something
+** would make a big difference.  Anyone who wants to do this right for
+** best speed should go in and rewrite in assembler.
+**
+** I have tested this only on a 68030 workstation and 386/ix integrated
+** in with -msoft-float.
+*/
+
+/* the following deal with IEEE single-precision numbers */
+#define EXCESS		126L
+#define SIGNBIT		0x80000000L
+#define HIDDEN		(1L << 23L)
+#define SIGN(fp)	((fp) & SIGNBIT)
+#define EXP(fp)		(((fp) >> 23L) & 0xFF)
+#define MANT(fp)	(((fp) & 0x7FFFFFL) | HIDDEN)
+#define PACK(s,e,m)	((s) | ((e) << 23L) | (m))
+
+/* the following deal with IEEE double-precision numbers */
+#define EXCESSD		1022L
+#define HIDDEND		(1L << 20L)
+#define EXPDBITS	11
+#define EXPDMASK	0x7FFL
+#define EXPD(fp)	(((fp.l.upper) >> 20L) & 0x7FFL)
+#define SIGND(fp)	((fp.l.upper) & SIGNBIT)
+#define MANTD(fp)	(((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \
+				(fp.l.lower >> 22))
+#define MANTDMASK	0xFFFFFL /* mask of upper part */
+
+/* the following deal with IEEE extended-precision numbers */
+#define EXCESSX		16382L
+#define HIDDENX		(1L << 31L)
+#define EXPXBITS	15
+#define EXPXMASK	0x7FFF
+#define EXPX(fp)	(((fp.l.upper) >> 16) & EXPXMASK)
+#define SIGNX(fp)	((fp.l.upper) & SIGNBIT)
+#define MANTXMASK	0x7FFFFFFFL /* mask of upper part */
+
+union double_long 
+{
+  double d;
+  struct {
+      long upper;
+      unsigned long lower;
+    } l;
+};
+
+union float_long {
+  float f;
+  long l;
+};
+
+union long_double_long
+{
+  long double ld;
+  struct
+    {
+      long upper;
+      unsigned long middle;
+      unsigned long lower;
+    } l;
+};
+
+#ifndef EXTFLOAT
+
+int
+__unordsf2(float a, float b)
+{
+  union float_long fl;
+
+  fl.f = a;
+  if (EXP(fl.l) == EXP(~0u) && (MANT(fl.l) & ~HIDDEN) != 0)
+    return 1;
+  fl.f = b;
+  if (EXP(fl.l) == EXP(~0u) && (MANT(fl.l) & ~HIDDEN) != 0)
+    return 1;
+  return 0;
+}
+
+int
+__unorddf2(double a, double b)
+{
+  union double_long dl;
+
+  dl.d = a;
+  if (EXPD(dl) == EXPDMASK
+      && ((dl.l.upper & MANTDMASK) != 0 || dl.l.lower != 0))
+    return 1;
+  dl.d = b;
+  if (EXPD(dl) == EXPDMASK
+      && ((dl.l.upper & MANTDMASK) != 0 || dl.l.lower != 0))
+    return 1;
+  return 0;
+}
+
+/* convert unsigned int to double */
+double
+__floatunsidf (unsigned long a1)
+{
+  long exp = 32 + EXCESSD;
+  union double_long dl;
+
+  if (!a1)
+    {
+      dl.l.upper = dl.l.lower = 0;
+      return dl.d;
+    }
+
+  while (a1 < 0x2000000L)
+    {
+      a1 <<= 4;
+      exp -= 4;
+    }
+
+  while (a1 < 0x80000000L)
+    {
+      a1 <<= 1;
+      exp--;
+    }
+
+  /* pack up and go home */
+  dl.l.upper = exp << 20L;
+  dl.l.upper |= (a1 >> 11L) & ~HIDDEND;
+  dl.l.lower = a1 << 21L;
+
+  return dl.d;
+}
+
+/* convert int to double */
+double
+__floatsidf (long a1)
+{
+  long sign = 0, exp = 31 + EXCESSD;
+  union double_long dl;
+
+  if (!a1)
+    {
+      dl.l.upper = dl.l.lower = 0;
+      return dl.d;
+    }
+
+  if (a1 < 0)
+    {
+      sign = SIGNBIT;
+      a1 = (long)-(unsigned long)a1;
+      if (a1 < 0)
+	{
+	  dl.l.upper = SIGNBIT | ((32 + EXCESSD) << 20L);
+	  dl.l.lower = 0;
+	  return dl.d;
+        }
+    }
+
+  while (a1 < 0x1000000L)
+    {
+      a1 <<= 4;
+      exp -= 4;
+    }
+
+  while (a1 < 0x40000000L)
+    {
+      a1 <<= 1;
+      exp--;
+    }
+
+  /* pack up and go home */
+  dl.l.upper = sign;
+  dl.l.upper |= exp << 20L;
+  dl.l.upper |= (a1 >> 10L) & ~HIDDEND;
+  dl.l.lower = a1 << 22L;
+
+  return dl.d;
+}
+
+/* convert unsigned int to float */
+float
+__floatunsisf (unsigned long l)
+{
+  double foo = __floatunsidf (l);
+  return foo;
+}
+
+/* convert int to float */
+float
+__floatsisf (long l)
+{
+  double foo = __floatsidf (l);
+  return foo;
+}
+
+/* convert float to double */
+double
+__extendsfdf2 (float a1)
+{
+  register union float_long fl1;
+  register union double_long dl;
+  register long exp;
+  register long mant;
+
+  fl1.f = a1;
+
+  dl.l.upper = SIGN (fl1.l);
+  if ((fl1.l & ~SIGNBIT) == 0)
+    {
+      dl.l.lower = 0;
+      return dl.d;
+    }
+
+  exp = EXP(fl1.l);
+  mant = MANT (fl1.l) & ~HIDDEN;
+  if (exp == 0)
+    {
+      /* Denormal.  */
+      exp = 1;
+      while (!(mant & HIDDEN))
+	{
+	  mant <<= 1;
+	  exp--;
+	}
+      mant &= ~HIDDEN;
+    }
+  exp = exp - EXCESS + EXCESSD;
+  dl.l.upper |= exp << 20;
+  dl.l.upper |= mant >> 3;
+  dl.l.lower = mant << 29;
+	
+  return dl.d;
+}
+
+/* convert double to float */
+float
+__truncdfsf2 (double a1)
+{
+  register long exp;
+  register long mant;
+  register union float_long fl;
+  register union double_long dl1;
+  int sticky;
+  int shift;
+
+  dl1.d = a1;
+
+  if ((dl1.l.upper & ~SIGNBIT) == 0 && !dl1.l.lower)
+    {
+      fl.l = SIGND(dl1);
+      return fl.f;
+    }
+
+  exp = EXPD (dl1) - EXCESSD + EXCESS;
+
+  sticky = dl1.l.lower & ((1 << 22) - 1);
+  mant = MANTD (dl1);
+  /* shift double mantissa 6 bits so we can round */
+  sticky |= mant & ((1 << 6) - 1);
+  mant >>= 6;
+
+  /* Check for underflow and denormals.  */
+  if (exp <= 0)
+    {
+      if (exp < -24)
+	{
+	  sticky |= mant;
+	  mant = 0;
+	}
+      else
+	{
+	  sticky |= mant & ((1 << (1 - exp)) - 1);
+	  mant >>= 1 - exp;
+	}
+      exp = 0;
+    }
+  
+  /* now round */
+  shift = 1;
+  if ((mant & 1) && (sticky || (mant & 2)))
+    {
+      int rounding = exp ? 2 : 1;
+
+      mant += 1;
+
+      /* did the round overflow? */
+      if (mant >= (HIDDEN << rounding))
+	{
+	  exp++;
+	  shift = rounding;
+	}
+    }
+  /* shift down */
+  mant >>= shift;
+
+  mant &= ~HIDDEN;
+
+  /* pack up and go home */
+  fl.l = PACK (SIGND (dl1), exp, mant);
+  return (fl.f);
+}
+
+/* convert double to int */
+long
+__fixdfsi (double a1)
+{
+  register union double_long dl1;
+  register long exp;
+  register long l;
+
+  dl1.d = a1;
+
+  if (!dl1.l.upper && !dl1.l.lower) 
+    return 0;
+
+  exp = EXPD (dl1) - EXCESSD - 31;
+  l = MANTD (dl1);
+
+  if (exp > 0) 
+    {
+      /* Return largest integer.  */
+      return SIGND (dl1) ? 0x80000000L : 0x7fffffffL;
+    }
+
+  if (exp <= -32)
+    return 0;
+
+  /* shift down until exp = 0 */
+  if (exp < 0)
+    l >>= -exp;
+
+  return (SIGND (dl1) ? -l : l);
+}
+
+/* convert float to int */
+long
+__fixsfsi (float a1)
+{
+  double foo = a1;
+  return __fixdfsi (foo);
+}
+
+#else /* EXTFLOAT */
+
+/* We do not need these routines for coldfire, as it has no extended
+   float format. */
+#if !defined (__mcoldfire__)
+
+/* Primitive extended precision floating point support.
+
+   We assume all numbers are normalized, don't do any rounding, etc.  */
+
+/* Prototypes for the above in case we use them.  */
+double __floatunsidf (unsigned long);
+double __floatsidf (long);
+float __floatsisf (long);
+double __extendsfdf2 (float);
+float __truncdfsf2 (double);
+long __fixdfsi (double);
+long __fixsfsi (float);
+
+int
+__unordxf2(long double a, long double b)
+{
+  union long_double_long ldl;
+
+  ldl.ld = a;
+  if (EXPX(ldl) == EXPXMASK
+      && ((ldl.l.middle & MANTXMASK) != 0 || ldl.l.lower != 0))
+    return 1;
+  ldl.ld = b;
+  if (EXPX(ldl) == EXPXMASK
+      && ((ldl.l.middle & MANTXMASK) != 0 || ldl.l.lower != 0))
+    return 1;
+  return 0;
+}
+
+/* convert double to long double */
+long double
+__extenddfxf2 (double d)
+{
+  register union double_long dl;
+  register union long_double_long ldl;
+  register long exp;
+
+  dl.d = d;
+  /*printf ("dfxf in: %g\n", d);*/
+
+  ldl.l.upper = SIGND (dl);
+  if ((dl.l.upper & ~SIGNBIT) == 0 && !dl.l.lower)
+    {
+      ldl.l.middle = 0;
+      ldl.l.lower = 0;
+      return ldl.ld;
+    }
+
+  exp = EXPD (dl) - EXCESSD + EXCESSX;
+  ldl.l.upper |= exp << 16;
+  ldl.l.middle = HIDDENX;
+  /* 31-20: # mantissa bits in ldl.l.middle - # mantissa bits in dl.l.upper */
+  ldl.l.middle |= (dl.l.upper & MANTDMASK) << (31 - 20);
+  /* 1+20: explicit-integer-bit + # mantissa bits in dl.l.upper */
+  ldl.l.middle |= dl.l.lower >> (1 + 20);
+  /* 32 - 21: # bits of dl.l.lower in ldl.l.middle */
+  ldl.l.lower = dl.l.lower << (32 - 21);
+
+  /*printf ("dfxf out: %s\n", dumpxf (ldl.ld));*/
+  return ldl.ld;
+}
+
+/* convert long double to double */
+double
+__truncxfdf2 (long double ld)
+{
+  register long exp;
+  register union double_long dl;
+  register union long_double_long ldl;
+
+  ldl.ld = ld;
+  /*printf ("xfdf in: %s\n", dumpxf (ld));*/
+
+  dl.l.upper = SIGNX (ldl);
+  if ((ldl.l.upper & ~SIGNBIT) == 0 && !ldl.l.middle && !ldl.l.lower)
+    {
+      dl.l.lower = 0;
+      return dl.d;
+    }
+
+  exp = EXPX (ldl) - EXCESSX + EXCESSD;
+  /* ??? quick and dirty: keep `exp' sane */
+  if (exp >= EXPDMASK)
+    exp = EXPDMASK - 1;
+  dl.l.upper |= exp << (32 - (EXPDBITS + 1));
+  /* +1-1: add one for sign bit, but take one off for explicit-integer-bit */
+  dl.l.upper |= (ldl.l.middle & MANTXMASK) >> (EXPDBITS + 1 - 1);
+  dl.l.lower = (ldl.l.middle & MANTXMASK) << (32 - (EXPDBITS + 1 - 1));
+  dl.l.lower |= ldl.l.lower >> (EXPDBITS + 1 - 1);
+
+  /*printf ("xfdf out: %g\n", dl.d);*/
+  return dl.d;
+}
+
+/* convert a float to a long double */
+long double
+__extendsfxf2 (float f)
+{
+  long double foo = __extenddfxf2 (__extendsfdf2 (f));
+  return foo;
+}
+
+/* convert a long double to a float */
+float
+__truncxfsf2 (long double ld)
+{
+  float foo = __truncdfsf2 (__truncxfdf2 (ld));
+  return foo;
+}
+
+/* convert an int to a long double */
+long double
+__floatsixf (long l)
+{
+  double foo = __floatsidf (l);
+  return foo;
+}
+
+/* convert an unsigned int to a long double */
+long double
+__floatunsixf (unsigned long l)
+{
+  double foo = __floatunsidf (l);
+  return foo;
+}
+
+/* convert a long double to an int */
+long
+__fixxfsi (long double ld)
+{
+  long foo = __fixdfsi ((double) ld);
+  return foo;
+}
+
+/* The remaining provide crude math support by working in double precision.  */
+
+long double
+__addxf3 (long double x1, long double x2)
+{
+  return (double) x1 + (double) x2;
+}
+
+long double
+__subxf3 (long double x1, long double x2)
+{
+  return (double) x1 - (double) x2;
+}
+
+long double
+__mulxf3 (long double x1, long double x2)
+{
+  return (double) x1 * (double) x2;
+}
+
+long double
+__divxf3 (long double x1, long double x2)
+{
+  return (double) x1 / (double) x2;
+}
+
+long double
+__negxf2 (long double x1)
+{
+  return - (double) x1;
+}
+
+long
+__cmpxf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+long
+__eqxf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+long
+__nexf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+long
+__ltxf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+long
+__lexf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+long
+__gtxf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+long
+__gexf2 (long double x1, long double x2)
+{
+  return __cmpdf2 ((double) x1, (double) x2);
+}
+
+#endif /* !__mcoldfire__ */
+#endif /* EXTFLOAT */
diff --git a/gcc/config/m68k/ieee.opt b/gcc/config/m68k/ieee.opt
new file mode 100644
index 000000000..1fd67d8b4
--- /dev/null
+++ b/gcc/config/m68k/ieee.opt
@@ -0,0 +1,24 @@
+; Extra IEEE options for the Motorola 68000 port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; This option is ignored by gcc
+mieee-fp
+Target RejectNegative
+Use IEEE math for fp comparisons
diff --git a/gcc/config/m68k/lb1sf68.asm b/gcc/config/m68k/lb1sf68.asm
new file mode 100644
index 000000000..0339a092c
--- /dev/null
+++ b/gcc/config/m68k/lb1sf68.asm
@@ -0,0 +1,4116 @@
+/* libgcc routines for 68000 w/o floating-point hardware.
+   Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use this one for any 680x0; assumes no floating point hardware.
+   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
+   Some of this code comes from MINIX, via the folks at ericsson.
+   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
+*/
+
+/* These are predefined by new versions of GNU cpp.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__ _
+#endif
+
+#ifndef __REGISTER_PREFIX__
+#define __REGISTER_PREFIX__
+#endif
+
+#ifndef __IMMEDIATE_PREFIX__
+#define __IMMEDIATE_PREFIX__ #
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+/* Note that X is a function.  */
+	
+#ifdef __ELF__
+#define FUNC(x) .type SYM(x),function
+#else
+/* The .proc pseudo-op is accepted, but ignored, by GAS.  We could just	
+   define this to the empty string for non-ELF systems, but defining it
+   to .proc means that the information is available to the assembler if
+   the need arises.  */
+#define FUNC(x) .proc
+#endif
+		
+/* Use the right prefix for registers.  */
+
+#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
+
+/* Use the right prefix for immediate values.  */
+
+#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
+
+#define d0 REG (d0)
+#define d1 REG (d1)
+#define d2 REG (d2)
+#define d3 REG (d3)
+#define d4 REG (d4)
+#define d5 REG (d5)
+#define d6 REG (d6)
+#define d7 REG (d7)
+#define a0 REG (a0)
+#define a1 REG (a1)
+#define a2 REG (a2)
+#define a3 REG (a3)
+#define a4 REG (a4)
+#define a5 REG (a5)
+#define a6 REG (a6)
+#define fp REG (fp)
+#define sp REG (sp)
+#define pc REG (pc)
+
+/* Provide a few macros to allow for PIC code support.
+ * With PIC, data is stored A5 relative so we've got to take a bit of special
+ * care to ensure that all loads of global data is via A5.  PIC also requires
+ * jumps and subroutine calls to be PC relative rather than absolute.  We cheat
+ * a little on this and in the PIC case, we use short offset branches and
+ * hope that the final object code is within range (which it should be).
+ */
+#ifndef __PIC__
+
+	/* Non PIC (absolute/relocatable) versions */
+
+	.macro PICCALL addr
+	jbsr	\addr
+	.endm
+
+	.macro PICJUMP addr
+	jmp	\addr
+	.endm
+
+	.macro PICLEA sym, reg
+	lea	\sym, \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	pea	\sym
+	.endm
+
+#else /* __PIC__ */
+
+# if defined (__uClinux__)
+
+	/* Versions for uClinux */
+
+#  if defined(__ID_SHARED_LIBRARY__)
+
+	/* -mid-shared-library versions  */
+
+	.macro PICLEA sym, reg
+	movel	a5@(_current_shared_library_a5_offset_), \reg
+	movel	\sym@GOT(\reg), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	a5@(_current_shared_library_a5_offset_), \areg
+	movel	\sym@GOT(\areg), sp@-
+	.endm
+
+	.macro PICCALL addr
+	PICLEA	\addr,a0
+	jsr	a0@
+	.endm
+
+	.macro PICJUMP addr
+	PICLEA	\addr,a0
+	jmp	a0@
+	.endm
+
+#  else /* !__ID_SHARED_LIBRARY__ */
+
+	/* Versions for -msep-data */
+
+	.macro PICLEA sym, reg
+	movel	\sym@GOT(a5), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	\sym@GOT(a5), sp@-
+	.endm
+
+	.macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+	lea	\addr-.-8,a0
+	jsr	pc@(a0)
+#else
+	jbsr	\addr
+#endif
+	.endm
+
+	.macro PICJUMP addr
+	/* ISA C has no bra.l instruction, and since this assembly file
+	   gets assembled into multiple object files, we avoid the
+	   bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+	lea	\addr-.-8,a0
+	jmp	pc@(a0)
+#else
+	bra	\addr
+#endif
+	.endm
+
+#  endif
+
+# else /* !__uClinux__ */
+
+	/* Versions for Linux */
+
+	.macro PICLEA sym, reg
+	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
+	lea	(-6, pc, \reg), \reg
+	movel	\sym@GOT(\reg), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
+	lea	(-6, pc, \areg), \areg
+	movel	\sym@GOT(\areg), sp@-
+	.endm
+
+	.macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+	lea	\addr-.-8,a0
+	jsr	pc@(a0)
+#else
+	jbsr	\addr
+#endif
+	.endm
+
+	.macro PICJUMP addr
+	/* ISA C has no bra.l instruction, and since this assembly file
+	   gets assembled into multiple object files, we avoid the
+	   bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+	lea	\addr-.-8,a0
+	jmp	pc@(a0)
+#else
+	bra	\addr
+#endif
+	.endm
+
+# endif
+#endif /* __PIC__ */
+
+
+#ifdef L_floatex
+
+| This is an attempt at a decent floating point (single, double and 
+| extended double) code for the GNU C compiler. It should be easy to
+| adapt to other compilers (but beware of the local labels!).
+
+| Starting date: 21 October, 1990
+
+| It is convenient to introduce the notation (s,e,f) for a floating point
+| number, where s=sign, e=exponent, f=fraction. We will call a floating
+| point number fpn to abbreviate, independently of the precision.
+| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 
+| for doubles and 16383 for long doubles). We then have the following 
+| different cases:
+|  1. Normalized fpns have 0 < e < MAX_EXP. They correspond to 
+|     (-1)^s x 1.f x 2^(e-bias-1).
+|  2. Denormalized fpns have e=0. They correspond to numbers of the form
+|     (-1)^s x 0.f x 2^(-bias).
+|  3. +/-INFINITY have e=MAX_EXP, f=0.
+|  4. Quiet NaN (Not a Number) have all bits set.
+|  5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
+
+|=============================================================================
+|                                  exceptions
+|=============================================================================
+
+| This is the floating point condition code register (_fpCCR):
+|
+| struct {
+|   short _exception_bits;	
+|   short _trap_enable_bits;	
+|   short _sticky_bits;
+|   short _rounding_mode;
+|   short _format;
+|   short _last_operation;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand1;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand2;
+| } _fpCCR;
+
+	.data
+	.even
+
+	.globl	SYM (_fpCCR)
+	
+SYM (_fpCCR):
+__exception_bits:
+	.word	0
+__trap_enable_bits:
+	.word	0
+__sticky_bits:
+	.word	0
+__rounding_mode:
+	.word	ROUND_TO_NEAREST
+__format:
+	.word	NIL
+__last_operation:
+	.word	NOOP
+__operand1:
+	.long	0
+	.long	0
+__operand2:
+	.long 	0
+	.long	0
+
+| Offsets:
+EBITS  = __exception_bits - SYM (_fpCCR)
+TRAPE  = __trap_enable_bits - SYM (_fpCCR)
+STICK  = __sticky_bits - SYM (_fpCCR)
+ROUND  = __rounding_mode - SYM (_fpCCR)
+FORMT  = __format - SYM (_fpCCR)
+LASTO  = __last_operation - SYM (_fpCCR)
+OPER1  = __operand1 - SYM (_fpCCR)
+OPER2  = __operand2 - SYM (_fpCCR)
+
+| The following exception types are supported:
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+| The allowed rounding modes are:
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| The allowed values of format are:
+NIL          = 0
+SINGLE_FLOAT = 1
+DOUBLE_FLOAT = 2
+LONG_FLOAT   = 3
+
+| The allowed values for the last operation are:
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+|=============================================================================
+|                           __clear_sticky_bits
+|=============================================================================
+
+| The sticky bits are normally not cleared (thus the name), whereas the 
+| exception type and exception value reflect the last computation. 
+| This routine is provided to clear them (you can also write to _fpCCR,
+| since it is globally visible).
+
+	.globl  SYM (__clear_sticky_bit)
+
+	.text
+	.even
+
+| void __clear_sticky_bits(void);
+SYM (__clear_sticky_bit):		
+	PICLEA	SYM (_fpCCR),a0
+#ifndef __mcoldfire__
+	movew	IMM (0),a0@(STICK)
+#else
+	clr.w	a0@(STICK)
+#endif
+	rts
+
+|=============================================================================
+|                           $_exception_handler
+|=============================================================================
+
+	.globl  $_exception_handler
+
+	.text
+	.even
+
+| This is the common exit point if an exception occurs.
+| NOTE: it is NOT callable from C!
+| It expects the exception type in d7, the format (SINGLE_FLOAT,
+| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
+| It sets the corresponding exception and sticky bits, and the format. 
+| Depending on the format if fills the corresponding slots for the 
+| operands which produced the exception (all this information is provided
+| so if you write your own exception handlers you have enough information
+| to deal with the problem).
+| Then checks to see if the corresponding exception is trap-enabled, 
+| in which case it pushes the address of _fpCCR and traps through 
+| trap FPTRAP (15 for the moment).
+
+FPTRAP = 15
+
+$_exception_handler:
+	PICLEA	SYM (_fpCCR),a0
+	movew	d7,a0@(EBITS)	| set __exception_bits
+#ifndef __mcoldfire__
+	orw	d7,a0@(STICK)	| and __sticky_bits
+#else
+	movew	a0@(STICK),d4
+	orl	d7,d4
+	movew	d4,a0@(STICK)
+#endif
+	movew	d6,a0@(FORMT)	| and __format
+	movew	d5,a0@(LASTO)	| and __last_operation
+
+| Now put the operands in place:
+#ifndef __mcoldfire__
+	cmpw	IMM (SINGLE_FLOAT),d6
+#else
+	cmpl	IMM (SINGLE_FLOAT),d6
+#endif
+	beq	1f
+	movel	a6@(8),a0@(OPER1)
+	movel	a6@(12),a0@(OPER1+4)
+	movel	a6@(16),a0@(OPER2)
+	movel	a6@(20),a0@(OPER2+4)
+	bra	2f
+1:	movel	a6@(8),a0@(OPER1)
+	movel	a6@(12),a0@(OPER2)
+2:
+| And check whether the exception is trap-enabled:
+#ifndef __mcoldfire__
+	andw	a0@(TRAPE),d7	| is exception trap-enabled?
+#else
+	clrl	d6
+	movew	a0@(TRAPE),d6
+	andl	d6,d7
+#endif
+	beq	1f		| no, exit
+	PICPEA	SYM (_fpCCR),a1	| yes, push address of _fpCCR
+	trap	IMM (FPTRAP)	| and trap
+#ifndef __mcoldfire__
+1:	moveml	sp@+,d2-d7	| restore data registers
+#else
+1:	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+#endif /* L_floatex */
+
+#ifdef  L_mulsi3
+	.text
+	FUNC(__mulsi3)
+	.globl	SYM (__mulsi3)
+SYM (__mulsi3):
+	movew	sp@(4), d0	/* x0 -> d0 */
+	muluw	sp@(10), d0	/* x0*y1 */
+	movew	sp@(6), d1	/* x1 -> d1 */
+	muluw	sp@(8), d1	/* x1*y0 */
+#ifndef __mcoldfire__
+	addw	d1, d0
+#else
+	addl	d1, d0
+#endif
+	swap	d0
+	clrw	d0
+	movew	sp@(6), d1	/* x1 -> d1 */
+	muluw	sp@(10), d1	/* x1*y1 */
+	addl	d1, d0
+
+	rts
+#endif /* L_mulsi3 */
+
+#ifdef  L_udivsi3
+	.text
+	FUNC(__udivsi3)
+	.globl	SYM (__udivsi3)
+SYM (__udivsi3):
+#ifndef __mcoldfire__
+	movel	d2, sp@-
+	movel	sp@(12), d1	/* d1 = divisor */
+	movel	sp@(8), d0	/* d0 = dividend */
+
+	cmpl	IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
+	jcc	L3		/* then try next algorithm */
+	movel	d0, d2
+	clrw	d2
+	swap	d2
+	divu	d1, d2          /* high quotient in lower word */
+	movew	d2, d0		/* save high quotient */
+	swap	d0
+	movew	sp@(10), d2	/* get low dividend + high rest */
+	divu	d1, d2		/* low quotient */
+	movew	d2, d0
+	jra	L6
+
+L3:	movel	d1, d2		/* use d2 as divisor backup */
+L4:	lsrl	IMM (1), d1	/* shift divisor */
+	lsrl	IMM (1), d0	/* shift dividend */
+	cmpl	IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
+	jcc	L4
+	divu	d1, d0		/* now we have 16-bit divisor */
+	andl	IMM (0xffff), d0 /* mask out divisor, ignore remainder */
+
+/* Multiply the 16-bit tentative quotient with the 32-bit divisor.  Because of
+   the operand ranges, this might give a 33-bit product.  If this product is
+   greater than the dividend, the tentative quotient was too large. */
+	movel	d2, d1
+	mulu	d0, d1		/* low part, 32 bits */
+	swap	d2
+	mulu	d0, d2		/* high part, at most 17 bits */
+	swap	d2		/* align high part with low part */
+	tstw	d2		/* high part 17 bits? */
+	jne	L5		/* if 17 bits, quotient was too large */
+	addl	d2, d1		/* add parts */
+	jcs	L5		/* if sum is 33 bits, quotient was too large */
+	cmpl	sp@(8), d1	/* compare the sum with the dividend */
+	jls	L6		/* if sum > dividend, quotient was too large */
+L5:	subql	IMM (1), d0	/* adjust quotient */
+
+L6:	movel	sp@+, d2
+	rts
+
+#else /* __mcoldfire__ */
+
+/* ColdFire implementation of non-restoring division algorithm from
+   Hennessy & Patterson, Appendix A. */
+	link	a6,IMM (-12)
+	moveml	d2-d4,sp@
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+	clrl	d2		| clear p
+	moveq	IMM (31),d4
+L1:	addl	d0,d0		| shift reg pair (p,a) one bit left
+	addxl	d2,d2
+	movl	d2,d3		| subtract b from p, store in tmp.
+	subl	d1,d3
+	jcs	L2		| if no carry,
+	bset	IMM (0),d0	| set the low order bit of a to 1,
+	movl	d3,d2		| and store tmp in p.
+L2:	subql	IMM (1),d4
+	jcc	L1
+	moveml	sp@,d2-d4	| restore data registers
+	unlk	a6		| and return
+	rts
+#endif /* __mcoldfire__ */
+
+#endif /* L_udivsi3 */
+
+#ifdef  L_divsi3
+	.text
+	FUNC(__divsi3)
+	.globl	SYM (__divsi3)
+SYM (__divsi3):
+	movel	d2, sp@-
+
+	moveq	IMM (1), d2	/* sign of result stored in d2 (=1 or =-1) */
+	movel	sp@(12), d1	/* d1 = divisor */
+	jpl	L1
+	negl	d1
+#ifndef __mcoldfire__
+	negb	d2		/* change sign because divisor <0  */
+#else
+	negl	d2		/* change sign because divisor <0  */
+#endif
+L1:	movel	sp@(8), d0	/* d0 = dividend */
+	jpl	L2
+	negl	d0
+#ifndef __mcoldfire__
+	negb	d2
+#else
+	negl	d2
+#endif
+
+L2:	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__udivsi3)	/* divide abs(dividend) by abs(divisor) */
+	addql	IMM (8), sp
+
+	tstb	d2
+	jpl	L3
+	negl	d0
+
+L3:	movel	sp@+, d2
+	rts
+#endif /* L_divsi3 */
+
+#ifdef  L_umodsi3
+	.text
+	FUNC(__umodsi3)
+	.globl	SYM (__umodsi3)
+SYM (__umodsi3):
+	movel	sp@(8), d1	/* d1 = divisor */
+	movel	sp@(4), d0	/* d0 = dividend */
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__udivsi3)
+	addql	IMM (8), sp
+	movel	sp@(8), d1	/* d1 = divisor */
+#ifndef __mcoldfire__
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
+	addql	IMM (8), sp
+#else
+	mulsl	d1,d0
+#endif
+	movel	sp@(4), d1	/* d1 = dividend */
+	subl	d0, d1		/* d1 = a - (a/b)*b */
+	movel	d1, d0
+	rts
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+	.text
+	FUNC(__modsi3)
+	.globl	SYM (__modsi3)
+SYM (__modsi3):
+	movel	sp@(8), d1	/* d1 = divisor */
+	movel	sp@(4), d0	/* d0 = dividend */
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__divsi3)
+	addql	IMM (8), sp
+	movel	sp@(8), d1	/* d1 = divisor */
+#ifndef __mcoldfire__
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
+	addql	IMM (8), sp
+#else
+	mulsl	d1,d0
+#endif
+	movel	sp@(4), d1	/* d1 = dividend */
+	subl	d0, d1		/* d1 = a - (a/b)*b */
+	movel	d1, d0
+	rts
+#endif /* L_modsi3 */
+
+
+#ifdef  L_double
+
+	.globl	SYM (_fpCCR)
+	.globl  $_exception_handler
+
+QUIET_NaN      = 0xffffffff
+
+D_MAX_EXP      = 0x07ff
+D_BIAS         = 1022
+DBL_MAX_EXP    = D_MAX_EXP - D_BIAS
+DBL_MIN_EXP    = 1 - D_BIAS
+DBL_MANT_DIG   = 53
+
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+DOUBLE_FLOAT = 2
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+	.globl SYM (__adddf3)
+	.globl SYM (__subdf3)
+	.globl SYM (__muldf3)
+	.globl SYM (__divdf3)
+	.globl SYM (__negdf2)
+	.globl SYM (__cmpdf2)
+	.globl SYM (__cmpdf2_internal)
+	.hidden SYM (__cmpdf2_internal)
+
+	.text
+	.even
+
+| These are common routines to return and signal exceptions.	
+
+Ld$den:
+| Return and signal a denormalized number
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$infty:
+Ld$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+	movel	IMM (0x7ff00000),d0
+	movel	IMM (0),d1
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+OVERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$underflow:
+| Return 0 and set the exception flags 
+	movel	IMM (0),d0
+	movel	d0,d1
+	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$inop:
+| Return a quiet NaN and set the exception flags
+	movel	IMM (QUIET_NaN),d0
+	movel	d0,d1
+	movew	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$div$0:
+| Return a properly signed INFINITY and set the exception flags
+	movel	IMM (0x7ff00000),d0
+	movel	IMM (0),d1
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         double precision routines
+|=============================================================================
+|=============================================================================
+
+| A double precision floating point number (double) has the format:
+|
+| struct _double {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 11; /* exponent, shifted by 126 */
+|  unsigned int fraction  : 52; /* fraction */
+| } double;
+| 
+| Thus sizeof(double) = 8 (64 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the register pair d0-d1. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subdf3
+|=============================================================================
+
+| double __subdf3(double, double);
+	FUNC(__subdf3)
+SYM (__subdf3):
+	bchg	IMM (31),sp@(12) | change sign of second operand
+				| and fall through, so we always add
+|=============================================================================
+|                              __adddf3
+|=============================================================================
+
+| double __adddf3(double, double);
+	FUNC(__adddf3)
+SYM (__adddf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)	| everything will be done in registers
+	moveml	d2-d7,sp@-	| save all data registers and a2 (but d0-d1)
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| 
+	movel	a6@(16),d2	| get second operand
+	movel	a6@(20),d3	| 
+
+	movel	d0,d7		| get d0's sign bit in d7 '
+	addl	d1,d1		| check and clear sign bit of a, and gain one
+	addxl	d0,d0		| bit of extra precision
+	beq	Ladddf$b	| if zero return second operand
+
+	movel	d2,d6		| save sign in d6 
+	addl	d3,d3		| get rid of sign bit and gain one bit of
+	addxl	d2,d2		| extra precision
+	beq	Ladddf$a	| if zero return first operand
+
+	andl	IMM (0x80000000),d7 | isolate a's sign bit '
+        swap	d6		| and also b's sign bit '
+#ifndef __mcoldfire__
+	andw	IMM (0x8000),d6	|
+	orw	d6,d7		| and combine them into d7, so that a's sign '
+				| bit is in the high word and b's is in the '
+				| low word, so d6 is free to be used
+#else
+	andl	IMM (0x8000),d6
+	orl	d6,d7
+#endif
+	movel	d7,a0		| now save d7 into a0, so d7 is free to
+                		| be used also
+
+| Get the exponents and check for denormalized and/or infinity.
+
+	movel	IMM (0x001fffff),d6 | mask for the fraction
+	movel	IMM (0x00200000),d7 | mask to put hidden bit back
+
+	movel	d0,d4		| 
+	andl	d6,d0		| get fraction in d0
+	notl	d6		| make d6 into mask for the exponent
+	andl	d6,d4		| get exponent in d4
+	beq	Ladddf$a$den	| branch if a is denormalized
+	cmpl	d6,d4		| check for INFINITY or NaN
+	beq	Ladddf$nf       | 
+	orl	d7,d0		| and put hidden bit back
+Ladddf$1:
+	swap	d4		| shift right exponent so that it starts
+#ifndef __mcoldfire__
+	lsrw	IMM (5),d4	| in bit 0 and not bit 20
+#else
+	lsrl	IMM (5),d4	| in bit 0 and not bit 20
+#endif
+| Now we have a's exponent in d4 and fraction in d0-d1 '
+	movel	d2,d5		| save b to get exponent
+	andl	d6,d5		| get exponent in d5
+	beq	Ladddf$b$den	| branch if b is denormalized
+	cmpl	d6,d5		| check for INFINITY or NaN
+	beq	Ladddf$nf
+	notl	d6		| make d6 into mask for the fraction again
+	andl	d6,d2		| and get fraction in d2
+	orl	d7,d2		| and put hidden bit back
+Ladddf$2:
+	swap	d5		| shift right exponent so that it starts
+#ifndef __mcoldfire__
+	lsrw	IMM (5),d5	| in bit 0 and not bit 20
+#else
+	lsrl	IMM (5),d5	| in bit 0 and not bit 20
+#endif
+
+| Now we have b's exponent in d5 and fraction in d2-d3. '
+
+| The situation now is as follows: the signs are combined in a0, the 
+| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
+| and d5 (b). To do the rounding correctly we need to keep all the
+| bits until the end, so we need to use d0-d1-d2-d3 for the first number
+| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
+| exponents in a2-a3.
+
+#ifndef __mcoldfire__
+	moveml	a2-a3,sp@-	| save the address registers
+#else
+	movel	a2,sp@-	
+	movel	a3,sp@-	
+	movel	a4,sp@-	
+#endif
+
+	movel	d4,a2		| save the exponents
+	movel	d5,a3		| 
+
+	movel	IMM (0),d7	| and move the numbers around
+	movel	d7,d6		|
+	movel	d3,d5		|
+	movel	d2,d4		|
+	movel	d7,d3		|
+	movel	d7,d2		|
+
+| Here we shift the numbers until the exponents are the same, and put 
+| the largest exponent in a2.
+#ifndef __mcoldfire__
+	exg	d4,a2		| get exponents back
+	exg	d5,a3		|
+	cmpw	d4,d5		| compare the exponents
+#else
+	movel	d4,a4		| get exponents back
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+	cmpl	d4,d5		| compare the exponents
+#endif
+	beq	Ladddf$3	| if equal don't shift '
+	bhi	9f		| branch if second exponent is higher
+
+| Here we have a's exponent larger than b's, so we have to shift b. We do 
+| this by using as counter d2:
+1:	movew	d4,d2		| move largest exponent to d2
+#ifndef __mcoldfire__
+	subw	d5,d2		| and subtract second exponent
+	exg	d4,a2		| get back the longs we saved
+	exg	d5,a3		|
+#else
+	subl	d5,d2		| and subtract second exponent
+	movel	d4,a4		| get back the longs we saved
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (DBL_MANT_DIG+2),d2
+#else
+	cmpl	IMM (DBL_MANT_DIG+2),d2
+#endif
+	bge	Ladddf$b$small
+#ifndef __mcoldfire__
+	cmpw	IMM (32),d2	| if difference >= 32, shift by longs
+#else
+	cmpl	IMM (32),d2	| if difference >= 32, shift by longs
+#endif
+	bge	5f
+2:
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d2	| if difference >= 16, shift by words	
+#else
+	cmpl	IMM (16),d2	| if difference >= 16, shift by words	
+#endif
+	bge	6f
+	bra	3f		| enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d4
+	roxrl	IMM (1),d5
+	roxrl	IMM (1),d6
+	roxrl	IMM (1),d7
+#else
+	lsrl	IMM (1),d7
+	btst	IMM (0),d6
+	beq	10f
+	bset	IMM (31),d7
+10:	lsrl	IMM (1),d6
+	btst	IMM (0),d5
+	beq	11f
+	bset	IMM (31),d6
+11:	lsrl	IMM (1),d5
+	btst	IMM (0),d4
+	beq	12f
+	bset	IMM (31),d5
+12:	lsrl	IMM (1),d4
+#endif
+3:
+#ifndef __mcoldfire__
+	dbra	d2,4b
+#else
+	subql	IMM (1),d2
+	bpl	4b	
+#endif
+	movel	IMM (0),d2
+	movel	d2,d3	
+	bra	Ladddf$4
+5:
+	movel	d6,d7
+	movel	d5,d6
+	movel	d4,d5
+	movel	IMM (0),d4
+#ifndef __mcoldfire__
+	subw	IMM (32),d2
+#else
+	subl	IMM (32),d2
+#endif
+	bra	2b
+6:
+	movew	d6,d7
+	swap	d7
+	movew	d5,d6
+	swap	d6
+	movew	d4,d5
+	swap	d5
+	movew	IMM (0),d4
+	swap	d4
+#ifndef __mcoldfire__
+	subw	IMM (16),d2
+#else
+	subl	IMM (16),d2
+#endif
+	bra	3b
+	
+9:
+#ifndef __mcoldfire__
+	exg	d4,d5
+	movew	d4,d6
+	subw	d5,d6		| keep d5 (largest exponent) in d4
+	exg	d4,a2
+	exg	d5,a3
+#else
+	movel	d5,d6
+	movel	d4,d5
+	movel	d6,d4
+	subl	d5,d6
+	movel	d4,a4
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (DBL_MANT_DIG+2),d6
+#else
+	cmpl	IMM (DBL_MANT_DIG+2),d6
+#endif
+	bge	Ladddf$a$small
+#ifndef __mcoldfire__
+	cmpw	IMM (32),d6	| if difference >= 32, shift by longs
+#else
+	cmpl	IMM (32),d6	| if difference >= 32, shift by longs
+#endif
+	bge	5f
+2:
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d6	| if difference >= 16, shift by words	
+#else
+	cmpl	IMM (16),d6	| if difference >= 16, shift by words	
+#endif
+	bge	6f
+	bra	3f		| enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+#endif
+3:
+#ifndef __mcoldfire__
+	dbra	d6,4b
+#else
+	subql	IMM (1),d6
+	bpl	4b
+#endif
+	movel	IMM (0),d7
+	movel	d7,d6
+	bra	Ladddf$4
+5:
+	movel	d2,d3
+	movel	d1,d2
+	movel	d0,d1
+	movel	IMM (0),d0
+#ifndef __mcoldfire__
+	subw	IMM (32),d6
+#else
+	subl	IMM (32),d6
+#endif
+	bra	2b
+6:
+	movew	d2,d3
+	swap	d3
+	movew	d1,d2
+	swap	d2
+	movew	d0,d1
+	swap	d1
+	movew	IMM (0),d0
+	swap	d0
+#ifndef __mcoldfire__
+	subw	IMM (16),d6
+#else
+	subl	IMM (16),d6
+#endif
+	bra	3b
+Ladddf$3:
+#ifndef __mcoldfire__
+	exg	d4,a2	
+	exg	d5,a3
+#else
+	movel	d4,a4
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+Ladddf$4:	
+| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
+| the signs in a4.
+
+| Here we have to decide whether to add or subtract the numbers:
+#ifndef __mcoldfire__
+	exg	d7,a0		| get the signs 
+	exg	d6,a3		| a3 is free to be used
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	movel	d7,d6		|
+	movew	IMM (0),d7	| get a's sign in d7 '
+	swap	d6              |
+	movew	IMM (0),d6	| and b's sign in d6 '
+	eorl	d7,d6		| compare the signs
+	bmi	Lsubdf$0	| if the signs are different we have 
+				| to subtract
+#ifndef __mcoldfire__
+	exg	d7,a0		| else we add the numbers
+	exg	d6,a3		|
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	addl	d7,d3		|
+	addxl	d6,d2		|
+	addxl	d5,d1		| 
+	addxl	d4,d0           |
+
+	movel	a2,d4		| return exponent to d4
+	movel	a0,d7		| 
+	andl	IMM (0x80000000),d7 | d7 now has the sign
+
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4	
+	movel	sp@+,a3	
+	movel	sp@+,a2	
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (DBL_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+	lea	pc@(Ladddf$5),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Ladddf$5:
+| Put back the exponent and check for overflow
+#ifndef __mcoldfire__
+	cmpw	IMM (0x7ff),d4	| is the exponent big?
+#else
+	cmpl	IMM (0x7ff),d4	| is the exponent big?
+#endif
+	bge	1f
+	bclr	IMM (DBL_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| put exponent back into position
+#else
+	lsll	IMM (4),d4	| put exponent back into position
+#endif
+	swap	d0		| 
+#ifndef __mcoldfire__
+	orw	d4,d0		|
+#else
+	orl	d4,d0		|
+#endif
+	swap	d0		|
+	bra	Ladddf$ret
+1:
+	moveq	IMM (ADD),d5
+	bra	Ld$overflow
+
+Lsubdf$0:
+| Here we do the subtraction.
+#ifndef __mcoldfire__
+	exg	d7,a0		| put sign back in a0
+	exg	d6,a3		|
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	subl	d7,d3		|
+	subxl	d6,d2		|
+	subxl	d5,d1		|
+	subxl	d4,d0		|
+	beq	Ladddf$ret$1	| if zero just exit
+	bpl	1f		| if positive skip the following
+	movel	a0,d7		|
+	bchg	IMM (31),d7	| change sign bit in d7
+	movel	d7,a0		|
+	negl	d3		|
+	negxl	d2		|
+	negxl	d1              | and negate result
+	negxl	d0              |
+1:	
+	movel	a2,d4		| return exponent to d4
+	movel	a0,d7
+	andl	IMM (0x80000000),d7 | isolate sign bit
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	|
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (DBL_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+	lea	pc@(Lsubdf$1),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lsubdf$1:
+| Put back the exponent and sign (we don't have overflow). '
+	bclr	IMM (DBL_MANT_DIG-1),d0	
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| put exponent back into position
+#else
+	lsll	IMM (4),d4	| put exponent back into position
+#endif
+	swap	d0		| 
+#ifndef __mcoldfire__
+	orw	d4,d0		|
+#else
+	orl	d4,d0		|
+#endif
+	swap	d0		|
+	bra	Ladddf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| DBL_MANT_DIG+1) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Ladddf$a$small:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+	movel	a6@(16),d0
+	movel	a6@(20),d1
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Ladddf$b$small:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4	
+	movel	sp@+,a3	
+	movel	sp@+,a2	
+#endif
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Ladddf$a$den:
+	movel	d7,d4		| d7 contains 0x00200000
+	bra	Ladddf$1
+
+Ladddf$b$den:
+	movel	d7,d5           | d7 contains 0x00200000
+	notl	d6
+	bra	Ladddf$2
+
+Ladddf$b:
+| Return b (if a is zero)
+	movel	d2,d0
+	movel	d3,d1
+	bne	1f			| Check if b is -0
+	cmpl	IMM (0x80000000),d0
+	bne	1f
+	andl	IMM (0x80000000),d7	| Use the sign of a
+	clrl	d0
+	bra	Ladddf$ret
+Ladddf$a:
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+1:
+	moveq	IMM (ADD),d5
+| Check for NaN and +/-INFINITY.
+	movel	d0,d7         		|
+	andl	IMM (0x80000000),d7	|
+	bclr	IMM (31),d0		|
+	cmpl	IMM (0x7ff00000),d0	|
+	bge	2f			|
+	movel	d0,d0           	| check for zero, since we don't  '
+	bne	Ladddf$ret		| want to return -0 by mistake
+	bclr	IMM (31),d7		|
+	bra	Ladddf$ret		|
+2:
+	andl	IMM (0x000fffff),d0	| check for NaN (nonzero fraction)
+	orl	d1,d0			|
+	bne	Ld$inop         	|
+	bra	Ld$infty		|
+	
+Ladddf$ret$1:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	| restore regs and exit
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+Ladddf$ret:
+| Normal exit.
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+	orl	d7,d0		| put sign bit back
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Ladddf$ret$den:
+| Return a denormalized number.
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0	| shift right once more
+	roxrl	IMM (1),d1	|
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+	bra	Ladddf$ret
+
+Ladddf$nf:
+	moveq	IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+	movel	a6@(8),d0	| get the numbers back (remember that we
+	movel	a6@(12),d1	| did some processing already)
+	movel	a6@(16),d2	| 
+	movel	a6@(20),d3	| 
+	movel	IMM (0x7ff00000),d4 | useful constant (INFINITY)
+	movel	d0,d7		| save sign bits
+	movel	d2,d6		| 
+	bclr	IMM (31),d0	| clear sign bits
+	bclr	IMM (31),d2	| 
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+	cmpl	d4,d0		| check first a (d0)
+	bhi	Ld$inop		| if d0 > 0x7ff00000 or equal and
+	bne	2f
+	tstl	d1		| d1 > 0, a is NaN
+	bne	Ld$inop		| 
+2:	cmpl	d4,d2		| check now b (d1)
+	bhi	Ld$inop		| 
+	bne	3f
+	tstl	d3		| 
+	bne	Ld$inop		| 
+3:
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+	eorl	d7,d6		| to check sign bits
+	bmi	1f
+	andl	IMM (0x80000000),d7 | get (common) sign bit
+	bra	Ld$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+	cmpl	d2,d0		| are both infinite?
+	bne	1f		| if d0 <> d2 they are not equal
+	cmpl	d3,d1		| if d0 == d2 test d3 and d1
+	beq	Ld$inop		| if equal return NaN
+1:	
+	andl	IMM (0x80000000),d7 | get a's sign bit '
+	cmpl	d4,d0		| test now for infinity
+	beq	Ld$infty	| if a is INFINITY return with this sign
+	bchg	IMM (31),d7	| else we know b is INFINITY and has
+	bra	Ld$infty	| the opposite sign
+
+|=============================================================================
+|                              __muldf3
+|=============================================================================
+
+| double __muldf3(double, double);
+	FUNC(__muldf3)
+SYM (__muldf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0		| get a into d0-d1
+	movel	a6@(12),d1		| 
+	movel	a6@(16),d2		| and b into d2-d3
+	movel	a6@(20),d3		|
+	movel	d0,d7			| d7 will hold the sign of the product
+	eorl	d2,d7			|
+	andl	IMM (0x80000000),d7	|
+	movel	d7,a0			| save sign bit into a0 
+	movel	IMM (0x7ff00000),d7	| useful constant (+INFINITY)
+	movel	d7,d6			| another (mask for fraction)
+	notl	d6			|
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d4			| 
+	orl	d1,d4			| 
+	beq	Lmuldf$a$0		| branch if a is zero
+	movel	d0,d4			|
+	bclr	IMM (31),d2		| get rid of b's sign bit '
+	movel	d2,d5			|
+	orl	d3,d5			| 
+	beq	Lmuldf$b$0		| branch if b is zero
+	movel	d2,d5			| 
+	cmpl	d7,d0			| is a big?
+	bhi	Lmuldf$inop		| if a is NaN return NaN
+	beq	Lmuldf$a$nf		| we still have to check d1 and b ...
+	cmpl	d7,d2			| now compare b with INFINITY
+	bhi	Lmuldf$inop		| is b NaN?
+	beq	Lmuldf$b$nf 		| we still have to check d3 ...
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5.
+	andl	d7,d4			| isolate exponent in d4
+	beq	Lmuldf$a$den		| if exponent zero, have denormalized
+	andl	d6,d0			| isolate fraction
+	orl	IMM (0x00100000),d0	| and put hidden bit back
+	swap	d4			| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d4		| 
+#else
+	lsrl	IMM (4),d4		| 
+#endif
+Lmuldf$1:			
+	andl	d7,d5			|
+	beq	Lmuldf$b$den		|
+	andl	d6,d2			|
+	orl	IMM (0x00100000),d2	| and put hidden bit back
+	swap	d5			|
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d5		|
+#else
+	lsrl	IMM (4),d5		|
+#endif
+Lmuldf$2:				|
+#ifndef __mcoldfire__
+	addw	d5,d4			| add exponents
+	subw	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
+#else
+	addl	d5,d4			| add exponents
+	subl	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were 
+| denormalized to start with!), which means that in the product bit 104 
+| (which will correspond to bit 8 of the fourth long) is set.
+
+| Here we have to do the product.
+| To do it we have to juggle the registers back and forth, as there are not
+| enough to keep everything in them. So we use the address registers to keep
+| some intermediate data.
+
+#ifndef __mcoldfire__
+	moveml	a2-a3,sp@-	| save a2 and a3 for temporary use
+#else
+	movel	a2,sp@-
+	movel	a3,sp@-
+	movel	a4,sp@-
+#endif
+	movel	IMM (0),a2	| a2 is a null register
+	movel	d4,a3		| and a3 will preserve the exponent
+
+| First, shift d2-d3 so bit 20 becomes bit 31:
+#ifndef __mcoldfire__
+	rorl	IMM (5),d2	| rotate d2 5 places right
+	swap	d2		| and swap it
+	rorl	IMM (5),d3	| do the same thing with d3
+	swap	d3		|
+	movew	d3,d6		| get the rightmost 11 bits of d3
+	andw	IMM (0x07ff),d6	|
+	orw	d6,d2		| and put them into d2
+	andw	IMM (0xf800),d3	| clear those bits in d3
+#else
+	moveq	IMM (11),d7	| left shift d2 11 bits
+	lsll	d7,d2
+	movel	d3,d6		| get a copy of d3
+	lsll	d7,d3		| left shift d3 11 bits
+	andl	IMM (0xffe00000),d6 | get the top 11 bits of d3
+	moveq	IMM (21),d7	| right shift them 21 bits
+	lsrl	d7,d6
+	orl	d6,d2		| stick them at the end of d2
+#endif
+
+	movel	d2,d6		| move b into d6-d7
+	movel	d3,d7           | move a into d4-d5
+	movel	d0,d4           | and clear d0-d1-d2-d3 (to put result)
+	movel	d1,d5           |
+	movel	IMM (0),d3	|
+	movel	d3,d2           |
+	movel	d3,d1           |
+	movel	d3,d0	        |
+
+| We use a1 as counter:	
+	movel	IMM (DBL_MANT_DIG-1),a1		
+#ifndef __mcoldfire__
+	exg	d7,a1
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+#endif
+
+1:
+#ifndef __mcoldfire__
+	exg	d7,a1		| put counter back in a1
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+#endif
+	addl	d3,d3		| shift sum once left
+	addxl	d2,d2           |
+	addxl	d1,d1           |
+	addxl	d0,d0           |
+	addl	d7,d7		|
+	addxl	d6,d6		|
+	bcc	2f		| if bit clear skip the following
+#ifndef __mcoldfire__
+	exg	d7,a2		|
+#else
+	movel	d7,a4
+	movel	a2,d7
+	movel	a4,a2
+#endif
+	addl	d5,d3		| else add a to the sum
+	addxl	d4,d2		|
+	addxl	d7,d1		|
+	addxl	d7,d0		|
+#ifndef __mcoldfire__
+	exg	d7,a2		| 
+#else
+	movel	d7,a4
+	movel	a2,d7
+	movel	a4,a2
+#endif
+2:
+#ifndef __mcoldfire__
+	exg	d7,a1		| put counter in d7
+	dbf	d7,1b		| decrement and branch
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+	subql	IMM (1),d7
+	bpl	1b
+#endif
+
+	movel	a3,d4		| restore exponent
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The 
+| first thing to do now is to normalize it so bit 8 becomes bit 
+| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
+	swap	d0
+	swap	d1
+	movew	d1,d0
+	swap	d2
+	movew	d2,d1
+	swap	d3
+	movew	d3,d2
+	movew	IMM (0),d3
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+#else
+	moveq	IMM (29),d6
+	lsrl	IMM (3),d3
+	movel	d2,d7
+	lsll	d6,d7
+	orl	d7,d3
+	lsrl	IMM (3),d2
+	movel	d1,d7
+	lsll	d6,d7
+	orl	d7,d2
+	lsrl	IMM (3),d1
+	movel	d0,d7
+	lsll	d6,d7
+	orl	d7,d1
+	lsrl	IMM (3),d0
+#endif
+	
+| Now round, check for over- and underflow, and exit.
+	movel	a0,d7		| get sign bit back into d7
+	moveq	IMM (MULTIPLY),d5
+
+	btst	IMM (DBL_MANT_DIG+1-32),d0
+	beq	Lround$exit
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+	bra	Lround$exit
+
+Lmuldf$inop:
+	moveq	IMM (MULTIPLY),d5
+	bra	Ld$inop
+
+Lmuldf$b$nf:
+	moveq	IMM (MULTIPLY),d5
+	movel	a0,d7		| get sign bit back into d7
+	tstl	d3		| we know d2 == 0x7ff00000, so check d3
+	bne	Ld$inop		| if d3 <> 0 b is NaN
+	bra	Ld$overflow	| else we have overflow (since a is finite)
+
+Lmuldf$a$nf:
+	moveq	IMM (MULTIPLY),d5
+	movel	a0,d7		| get sign bit back into d7
+	tstl	d1		| we know d0 == 0x7ff00000, so check d1
+	bne	Ld$inop		| if d1 <> 0 a is NaN
+	bra	Ld$overflow	| else signal overflow
+
+| If either number is zero return zero, unless the other is +/-INFINITY or
+| NaN, in which case we return NaN.
+Lmuldf$b$0:
+	moveq	IMM (MULTIPLY),d5
+#ifndef __mcoldfire__
+	exg	d2,d0		| put b (==0) into d0-d1
+	exg	d3,d1		| and a (with sign bit cleared) into d2-d3
+	movel	a0,d0		| set result sign
+#else
+	movel	d0,d2		| put a into d2-d3
+	movel	d1,d3
+	movel	a0,d0		| put result zero into d0-d1
+	movq	IMM(0),d1
+#endif
+	bra	1f
+Lmuldf$a$0:
+	movel	a0,d0		| set result sign
+	movel	a6@(16),d2	| put b into d2-d3 again
+	movel	a6@(20),d3	|
+	bclr	IMM (31),d2	| clear sign bit
+1:	cmpl	IMM (0x7ff00000),d2 | check for non-finiteness
+	bge	Ld$inop		| in case NaN or +/-INFINITY return NaN
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 21
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmuldf$a$den:
+	movel	IMM (1),d4
+	andl	d6,d0
+1:	addl	d1,d1           | shift a left until bit 20 is set
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	subw	IMM (1),d4	| and adjust exponent
+#else
+	subl	IMM (1),d4	| and adjust exponent
+#endif
+	btst	IMM (20),d0	|
+	bne	Lmuldf$1        |
+	bra	1b
+
+Lmuldf$b$den:
+	movel	IMM (1),d5
+	andl	d6,d2
+1:	addl	d3,d3		| shift b left until bit 20 is set
+	addxl	d2,d2		|
+#ifndef __mcoldfire__
+	subw	IMM (1),d5	| and adjust exponent
+#else
+	subql	IMM (1),d5	| and adjust exponent
+#endif
+	btst	IMM (20),d2	|
+	bne	Lmuldf$2	|
+	bra	1b
+
+
+|=============================================================================
+|                              __divdf3
+|=============================================================================
+
+| double __divdf3(double, double);
+	FUNC(__divdf3)
+SYM (__divdf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get a into d0-d1
+	movel	a6@(12),d1	| 
+	movel	a6@(16),d2	| and b into d2-d3
+	movel	a6@(20),d3	|
+	movel	d0,d7		| d7 will hold the sign of the result
+	eorl	d2,d7		|
+	andl	IMM (0x80000000),d7
+	movel	d7,a0		| save sign into a0
+	movel	IMM (0x7ff00000),d7 | useful constant (+INFINITY)
+	movel	d7,d6		| another (mask for fraction)
+	notl	d6		|
+	bclr	IMM (31),d0	| get rid of a's sign bit '
+	movel	d0,d4		|
+	orl	d1,d4		|
+	beq	Ldivdf$a$0	| branch if a is zero
+	movel	d0,d4		|
+	bclr	IMM (31),d2	| get rid of b's sign bit '
+	movel	d2,d5		|
+	orl	d3,d5		|
+	beq	Ldivdf$b$0	| branch if b is zero
+	movel	d2,d5
+	cmpl	d7,d0		| is a big?
+	bhi	Ldivdf$inop	| if a is NaN return NaN
+	beq	Ldivdf$a$nf	| if d0 == 0x7ff00000 we check d1
+	cmpl	d7,d2		| now compare b with INFINITY 
+	bhi	Ldivdf$inop	| if b is NaN return NaN
+	beq	Ldivdf$b$nf	| if d2 == 0x7ff00000 we check d3
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5 and normalize the numbers to
+| ensure that the ratio of the fractions is around 1. We do this by
+| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
+| set, even if they were denormalized to start with.
+| Thus, the result will satisfy: 2 > result > 1/2.
+	andl	d7,d4		| and isolate exponent in d4
+	beq	Ldivdf$a$den	| if exponent is zero we have a denormalized
+	andl	d6,d0		| and isolate fraction
+	orl	IMM (0x00100000),d0 | and put hidden bit back
+	swap	d4		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d4	| 
+#else
+	lsrl	IMM (4),d4	| 
+#endif
+Ldivdf$1:			| 
+	andl	d7,d5		|
+	beq	Ldivdf$b$den	|
+	andl	d6,d2		|
+	orl	IMM (0x00100000),d2
+	swap	d5		|
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d5	|
+#else
+	lsrl	IMM (4),d5	|
+#endif
+Ldivdf$2:			|
+#ifndef __mcoldfire__
+	subw	d5,d4		| subtract exponents
+	addw	IMM (D_BIAS),d4	| and add bias
+#else
+	subl	d5,d4		| subtract exponents
+	addl	IMM (D_BIAS),d4	| and add bias
+#endif
+
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0-d1	hold a (first operand, bit DBL_MANT_DIG-32=0, bit 
+| DBL_MANT_DIG-1-32=1)
+| d2-d3	hold b (second operand, bit DBL_MANT_DIG-32=1)
+| d4	holds the difference of the exponents, corrected by the bias
+| a0	holds the sign of the ratio
+
+| To do the rounding correctly we need to keep information about the
+| nonsignificant bits. One way to do this would be to do the division
+| using four registers; another is to use two registers (as originally
+| I did), but use a sticky bit to preserve information about the 
+| fractional part. Note that we can keep that info in a1, which is not
+| used.
+	movel	IMM (0),d6	| d6-d7 will hold the result
+	movel	d6,d7		| 
+	movel	IMM (0),a1	| and a1 will hold the sticky bit
+
+	movel	IMM (DBL_MANT_DIG-32+1),d5	
+	
+1:	cmpl	d0,d2		| is a < b?
+	bhi	3f		| if b > a skip the following
+	beq	4f		| if d0==d2 check d1 and d3
+2:	subl	d3,d1		| 
+	subxl	d2,d0		| a <-- a - b
+	bset	d5,d6		| set the corresponding bit in d6
+3:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	bra	5f			
+4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	3b		| if d1 > d2 skip the subtraction
+	bra	2b		| else go do it
+5:
+| Here we have to start setting the bits in the second long.
+	movel	IMM (31),d5	| again d5 is counter
+
+1:	cmpl	d0,d2		| is a < b?
+	bhi	3f		| if b > a skip the following
+	beq	4f		| if d0==d2 check d1 and d3
+2:	subl	d3,d1		| 
+	subxl	d2,d0		| a <-- a - b
+	bset	d5,d7		| set the corresponding bit in d7
+3:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	bra	5f			
+4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	3b		| if d1 > d2 skip the subtraction
+	bra	2b		| else go do it
+5:
+| Now go ahead checking until we hit a one, which we store in d2.
+	movel	IMM (DBL_MANT_DIG),d5
+1:	cmpl	d2,d0		| is a < b?
+	bhi	4f		| if b < a, exit
+	beq	3f		| if d0==d2 check d1 and d3
+2:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	movel	IMM (0),d2	| here no sticky bit was found
+	movel	d2,d3
+	bra	5f			
+3:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	2b		| if d1 > d2 go back
+4:
+| Here put the sticky bit in d2-d3 (in the position which actually corresponds
+| to it; if you don't do this the algorithm loses in some cases). '
+	movel	IMM (0),d2
+	movel	d2,d3
+#ifndef __mcoldfire__
+	subw	IMM (DBL_MANT_DIG),d5
+	addw	IMM (63),d5
+	cmpw	IMM (31),d5
+#else
+	subl	IMM (DBL_MANT_DIG),d5
+	addl	IMM (63),d5
+	cmpl	IMM (31),d5
+#endif
+	bhi	2f
+1:	bset	d5,d3
+	bra	5f
+#ifndef __mcoldfire__
+	subw	IMM (32),d5
+#else
+	subl	IMM (32),d5
+#endif
+2:	bset	d5,d2
+5:
+| Finally we are finished! Move the longs in the address registers to
+| their final destination:
+	movel	d6,d0
+	movel	d7,d1
+	movel	IMM (0),d3
+
+| Here we have finished the division, with the result in d0-d1-d2-d3, with
+| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
+| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
+| not set:
+	btst	IMM (DBL_MANT_DIG-32+1),d0
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+	movel	a0,d7		| restore sign bit to d7
+	moveq	IMM (DIVIDE),d5
+	bra	Lround$exit
+
+Ldivdf$inop:
+	moveq	IMM (DIVIDE),d5
+	bra	Ld$inop
+
+Ldivdf$a$0:
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+	moveq	IMM (DIVIDE),d5
+	bclr	IMM (31),d2	|
+	movel	d2,d4		| 
+	orl	d3,d4		| 
+	beq	Ld$inop		| if b is also zero return NaN
+	cmpl	IMM (0x7ff00000),d2 | check for NaN
+	bhi	Ld$inop		| 
+	blt	1f		|
+	tstl	d3		|
+	bne	Ld$inop		|
+1:	movel	a0,d0		| else return signed zero
+	moveq	IMM(0),d1	| 
+	PICLEA	SYM (_fpCCR),a0	| clear exception flags
+	movew	IMM (0),a0@	|
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| 
+#else
+	moveml	sp@,d2-d7	| 
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| 
+	rts			| 	
+
+Ldivdf$b$0:
+	moveq	IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+	movel	a0,d7		| put a's sign bit back in d7 '
+	cmpl	IMM (0x7ff00000),d0 | compare d0 with INFINITY
+	bhi	Ld$inop		| if larger it is NaN
+	tstl	d1		| 
+	bne	Ld$inop		| 
+	bra	Ld$div$0	| else signal DIVIDE_BY_ZERO
+
+Ldivdf$b$nf:
+	moveq	IMM (DIVIDE),d5
+| If d2 == 0x7ff00000 we have to check d3.
+	tstl	d3		|
+	bne	Ld$inop		| if d3 <> 0, b is NaN
+	bra	Ld$underflow	| else b is +/-INFINITY, so signal underflow
+
+Ldivdf$a$nf:
+	moveq	IMM (DIVIDE),d5
+| If d0 == 0x7ff00000 we have to check d1.
+	tstl	d1		|
+	bne	Ld$inop		| if d1 <> 0, a is NaN
+| If a is INFINITY we have to check b
+	cmpl	d7,d2		| compare b with INFINITY 
+	bge	Ld$inop		| if b is NaN or INFINITY return NaN
+	tstl	d3		|
+	bne	Ld$inop		| 
+	bra	Ld$overflow	| else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivdf$a$den:
+	movel	IMM (1),d4
+	andl	d6,d0
+1:	addl	d1,d1		| shift a left until bit 20 is set
+	addxl	d0,d0
+#ifndef __mcoldfire__
+	subw	IMM (1),d4	| and adjust exponent
+#else
+	subl	IMM (1),d4	| and adjust exponent
+#endif
+	btst	IMM (DBL_MANT_DIG-32-1),d0
+	bne	Ldivdf$1
+	bra	1b
+
+Ldivdf$b$den:
+	movel	IMM (1),d5
+	andl	d6,d2
+1:	addl	d3,d3		| shift b left until bit 20 is set
+	addxl	d2,d2
+#ifndef __mcoldfire__
+	subw	IMM (1),d5	| and adjust exponent
+#else
+	subql	IMM (1),d5	| and adjust exponent
+#endif
+	btst	IMM (DBL_MANT_DIG-32-1),d2
+	bne	Ldivdf$2
+	bra	1b
+
+Lround$exit:
+| This is a common exit point for __muldf3 and __divdf3. When they enter
+| this point the sign of the result is in d7, the result in d0-d1, normalized
+| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+	cmpw	IMM (-DBL_MANT_DIG-1),d4		
+#else
+	cmpl	IMM (-DBL_MANT_DIG-1),d4		
+#endif
+	blt	Ld$underflow	
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+	movel	d7,a0		|
+	movel	IMM (0),d6	| use d6-d7 to collect bits flushed right
+	movel	d6,d7		| use d6-d7 to collect bits flushed right
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d4	| if the exponent is less than 1 we 
+#else
+	cmpl	IMM (1),d4	| if the exponent is less than 1 we 
+#endif
+	bge	2f		| have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+	addw	IMM (1),d4	| adjust the exponent
+	lsrl	IMM (1),d0	| shift right once 
+	roxrl	IMM (1),d1	|
+	roxrl	IMM (1),d2	|
+	roxrl	IMM (1),d3	|
+	roxrl	IMM (1),d6	| 
+	roxrl	IMM (1),d7	|
+	cmpw	IMM (1),d4	| is the exponent 1 already?
+#else
+	addl	IMM (1),d4	| adjust the exponent
+	lsrl	IMM (1),d7
+	btst	IMM (0),d6
+	beq	13f
+	bset	IMM (31),d7
+13:	lsrl	IMM (1),d6
+	btst	IMM (0),d3
+	beq	14f
+	bset	IMM (31),d6
+14:	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	cmpl	IMM (1),d4	| is the exponent 1 already?
+#endif
+	beq	2f		| if not loop back
+	bra	1b              |
+	bra	Ld$underflow	| safety check, shouldn't execute '
+2:	orl	d6,d2		| this is a trick so we don't lose  '
+	orl	d7,d3		| the bits which were flushed right
+	movel	a0,d7		| get back sign bit into d7
+| Now call the rounding routine (which takes care of denormalized numbers):
+	lea	pc@(Lround$0),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 0x7ff).
+#ifndef __mcoldfire__
+	cmpw	IMM (0x07ff),d4
+#else
+	cmpl	IMM (0x07ff),d4
+#endif
+	bge	Ld$overflow
+| Now check for a denormalized number (exponent==0):
+	movew	d4,d4
+	beq	Ld$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| exponent back to fourth byte
+#else
+	lsll	IMM (4),d4	| exponent back to fourth byte
+#endif
+	bclr	IMM (DBL_MANT_DIG-32-1),d0
+	swap	d0		| and put back exponent
+#ifndef __mcoldfire__
+	orw	d4,d0		| 
+#else
+	orl	d4,d0		| 
+#endif
+	swap	d0		|
+	orl	d7,d0		| and sign also
+
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+|=============================================================================
+|                              __negdf2
+|=============================================================================
+
+| double __negdf2(double, double);
+	FUNC(__negdf2)
+SYM (__negdf2):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (NEGATE),d5
+	movel	a6@(8),d0	| get number to negate in d0-d1
+	movel	a6@(12),d1	|
+	bchg	IMM (31),d0	| negate
+	movel	d0,d2		| make a positive copy (for the tests)
+	bclr	IMM (31),d2	|
+	movel	d2,d4		| check for zero
+	orl	d1,d4		|
+	beq	2f		| if zero (either sign) return +zero
+	cmpl	IMM (0x7ff00000),d2 | compare to +INFINITY
+	blt	1f		| if finite, return
+	bhi	Ld$inop		| if larger (fraction not zero) is NaN
+	tstl	d1		| if d2 == 0x7ff00000 check d1
+	bne	Ld$inop		|
+	movel	d0,d7		| else get sign and return INFINITY
+	andl	IMM (0x80000000),d7
+	bra	Ld$infty		
+1:	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+2:	bclr	IMM (31),d0
+	bra	1b
+
+|=============================================================================
+|                              __cmpdf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpdf2_internal(double, double, int);
+SYM (__cmpdf2_internal):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@- 	| save registers
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (COMPARE),d5
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	|
+	movel	a6@(16),d2	| get second operand
+	movel	a6@(20),d3	|
+| First check if a and/or b are (+/-) zero and in that case clear
+| the sign bit.
+	movel	d0,d6		| copy signs into d6 (a) and d7(b)
+	bclr	IMM (31),d0	| and clear signs in d0 and d2
+	movel	d2,d7		|
+	bclr	IMM (31),d2	|
+	cmpl	IMM (0x7ff00000),d0 | check for a == NaN
+	bhi	Lcmpd$inop		| if d0 > 0x7ff00000, a is NaN
+	beq	Lcmpdf$a$nf	| if equal can be INFINITY, so check d1
+	movel	d0,d4		| copy into d4 to test for zero
+	orl	d1,d4		|
+	beq	Lcmpdf$a$0	|
+Lcmpdf$0:
+	cmpl	IMM (0x7ff00000),d2 | check for b == NaN
+	bhi	Lcmpd$inop		| if d2 > 0x7ff00000, b is NaN
+	beq	Lcmpdf$b$nf	| if equal can be INFINITY, so check d3
+	movel	d2,d4		|
+	orl	d3,d4		|
+	beq	Lcmpdf$b$0	|
+Lcmpdf$1:
+| Check the signs
+	eorl	d6,d7
+	bpl	1f
+| If the signs are not equal check if a >= 0
+	tstl	d6
+	bpl	Lcmpdf$a$gt$b	| if (a >= 0 && b < 0) => a > b
+	bmi	Lcmpdf$b$gt$a	| if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+	tstl	d6
+	bpl	1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+	exg	d0,d2
+	exg	d1,d3
+#else
+	movel	d0,d7
+	movel	d2,d0
+	movel	d7,d2
+	movel	d1,d7
+	movel	d3,d1
+	movel	d7,d3
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+	cmpl	d0,d2
+	bhi	Lcmpdf$b$gt$a	| |b| > |a|
+	bne	Lcmpdf$a$gt$b	| |b| < |a|
+| If we got here d0 == d2, so we compare d1 and d3.
+	cmpl	d1,d3
+	bhi	Lcmpdf$b$gt$a	| |b| > |a|
+	bne	Lcmpdf$a$gt$b	| |b| < |a|
+| If we got here a == b.
+	movel	IMM (EQUAL),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpdf$a$gt$b:
+	movel	IMM (GREATER),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpdf$b$gt$a:
+	movel	IMM (LESS),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Lcmpdf$a$0:	
+	bclr	IMM (31),d6
+	bra	Lcmpdf$0
+Lcmpdf$b$0:
+	bclr	IMM (31),d7
+	bra	Lcmpdf$1
+
+Lcmpdf$a$nf:
+	tstl	d1
+	bne	Ld$inop
+	bra	Lcmpdf$0
+
+Lcmpdf$b$nf:
+	tstl	d3
+	bne	Ld$inop
+	bra	Lcmpdf$1
+
+Lcmpd$inop:
+	movl	a6@(24),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+| int __cmpdf2(double, double);
+	FUNC(__cmpdf2)
+SYM (__cmpdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1-d2-d3, with the exponent in register d4. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d4.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:	btst	IMM (DBL_MANT_DIG-32),d0
+	bne	2f		| if set the number is normalized
+| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -D_BIAS+1).
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d4	| remember that the exponent is at least one
+#else
+	cmpl	IMM (1),d4	| remember that the exponent is at least one
+#endif
+ 	beq	2f		| an exponent of one means denormalized
+	addl	d3,d3		| else shift and adjust the exponent
+	addxl	d2,d2		|
+	addxl	d1,d1		|
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d4,1b		|
+#else
+	subql	IMM (1), d4
+	bpl	1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+	btst	IMM (0),d1	| is delta < 1?
+	beq	2f		| if so, do not do anything
+	orl	d2,d3		| is delta == 1?
+	bne	1f		| if so round to even
+	movel	d1,d3		| 
+	andl	IMM (2),d3	| bit 1 is the last significant bit
+	movel	IMM (0),d2	|
+	addl	d3,d1		|
+	addxl	d2,d0		|
+	bra	2f		| 
+1:	movel	IMM (1),d3	| else add 1 
+	movel	IMM (0),d2	|
+	addl	d3,d1		|
+	addxl	d2,d0
+| Shift right once (because we used bit #DBL_MANT_DIG-32!).
+2:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1		
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+
+| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
+| 'fraction overflow' ...).
+	btst	IMM (DBL_MANT_DIG-32),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+	btst	IMM (DBL_MANT_DIG-32-1),d0
+	beq	1f
+	jmp	a0@
+1:	movel	IMM (0),d4
+	jmp	a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+	jmp	a0@
+#endif /* L_double */
+
+#ifdef  L_float
+
+	.globl	SYM (_fpCCR)
+	.globl  $_exception_handler
+
+QUIET_NaN    = 0xffffffff
+SIGNL_NaN    = 0x7f800001
+INFINITY     = 0x7f800000
+
+F_MAX_EXP      = 0xff
+F_BIAS         = 126
+FLT_MAX_EXP    = F_MAX_EXP - F_BIAS
+FLT_MIN_EXP    = 1 - F_BIAS
+FLT_MANT_DIG   = 24
+
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+SINGLE_FLOAT = 1
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+	.globl SYM (__addsf3)
+	.globl SYM (__subsf3)
+	.globl SYM (__mulsf3)
+	.globl SYM (__divsf3)
+	.globl SYM (__negsf2)
+	.globl SYM (__cmpsf2)
+	.globl SYM (__cmpsf2_internal)
+	.hidden SYM (__cmpsf2_internal)
+
+| These are common routines to return and signal exceptions.	
+
+	.text
+	.even
+
+Lf$den:
+| Return and signal a denormalized number
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$infty:
+Lf$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+	movel	IMM (INFINITY),d0
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+OVERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$underflow:
+| Return 0 and set the exception flags 
+	moveq	IMM (0),d0
+	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$inop:
+| Return a quiet NaN and set the exception flags
+	movel	IMM (QUIET_NaN),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$div$0:
+| Return a properly signed INFINITY and set the exception flags
+	movel	IMM (INFINITY),d0
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         single precision routines
+|=============================================================================
+|=============================================================================
+
+| A single precision floating point number (float) has the format:
+|
+| struct _float {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 8;  /* exponent, shifted by 126 */
+|  unsigned int fraction  : 23; /* fraction */
+| } float;
+| 
+| Thus sizeof(float) = 4 (32 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the single register d0. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subsf3
+|=============================================================================
+
+| float __subsf3(float, float);
+	FUNC(__subsf3)
+SYM (__subsf3):
+	bchg	IMM (31),sp@(8)	| change sign of second operand
+				| and fall through
+|=============================================================================
+|                              __addsf3
+|=============================================================================
+
+| float __addsf3(float, float);
+	FUNC(__addsf3)
+SYM (__addsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)	| everything will be done in registers
+	moveml	d2-d7,sp@-	| save all data registers but d0-d1
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| get second operand
+	movel	d0,a0		| get d0's sign bit '
+	addl	d0,d0		| check and clear sign bit of a
+	beq	Laddsf$b	| if zero return second operand
+	movel	d1,a1		| save b's sign bit '
+	addl	d1,d1		| get rid of sign bit
+	beq	Laddsf$a	| if zero return first operand
+
+| Get the exponents and check for denormalized and/or infinity.
+
+	movel	IMM (0x00ffffff),d4	| mask to get fraction
+	movel	IMM (0x01000000),d5	| mask to put hidden bit back
+
+	movel	d0,d6		| save a to get exponent
+	andl	d4,d0		| get fraction in d0
+	notl 	d4		| make d4 into a mask for the exponent
+	andl	d4,d6		| get exponent in d6
+	beq	Laddsf$a$den	| branch if a is denormalized
+	cmpl	d4,d6		| check for INFINITY or NaN
+	beq	Laddsf$nf
+	swap	d6		| put exponent into first word
+	orl	d5,d0		| and put hidden bit back
+Laddsf$1:
+| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
+	movel	d1,d7		| get exponent in d7
+	andl	d4,d7		| 
+	beq	Laddsf$b$den	| branch if b is denormalized
+	cmpl	d4,d7		| check for INFINITY or NaN
+	beq	Laddsf$nf
+	swap	d7		| put exponent into first word
+	notl 	d4		| make d4 into a mask for the fraction
+	andl	d4,d1		| get fraction in d1
+	orl	d5,d1		| and put hidden bit back
+Laddsf$2:
+| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
+
+| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we 
+| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
+| bit).
+
+	movel	d1,d2		| move b to d2, since we want to use
+				| two registers to do the sum
+	movel	IMM (0),d1	| and clear the new ones
+	movel	d1,d3		|
+
+| Here we shift the numbers in registers d0 and d1 so the exponents are the
+| same, and put the largest exponent in d6. Note that we are using two
+| registers for each number (see the discussion by D. Knuth in "Seminumerical 
+| Algorithms").
+#ifndef __mcoldfire__
+	cmpw	d6,d7		| compare exponents
+#else
+	cmpl	d6,d7		| compare exponents
+#endif
+	beq	Laddsf$3	| if equal don't shift '
+	bhi	5f		| branch if second exponent largest
+1:
+	subl	d6,d7		| keep the largest exponent
+	negl	d7
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d7	| put difference in lower byte
+#else
+	lsrl	IMM (8),d7	| put difference in lower byte
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (FLT_MANT_DIG+2),d7		
+#else
+	cmpl	IMM (FLT_MANT_DIG+2),d7		
+#endif
+	bge	Laddsf$b$small
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d7	| if difference >= 16 swap
+#else
+	cmpl	IMM (16),d7	| if difference >= 16 swap
+#endif
+	bge	4f
+2:
+#ifndef __mcoldfire__
+	subw	IMM (1),d7
+#else
+	subql	IMM (1), d7
+#endif
+3:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d2	| shift right second operand
+	roxrl	IMM (1),d3
+	dbra	d7,3b
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	subql	IMM (1), d7
+	bpl	3b
+#endif
+	bra	Laddsf$3
+4:
+	movew	d2,d3
+	swap	d3
+	movew	d3,d2
+	swap	d2
+#ifndef __mcoldfire__
+	subw	IMM (16),d7
+#else
+	subl	IMM (16),d7
+#endif
+	bne	2b		| if still more bits, go back to normal case
+	bra	Laddsf$3
+5:
+#ifndef __mcoldfire__
+	exg	d6,d7		| exchange the exponents
+#else
+	eorl	d6,d7
+	eorl	d7,d6
+	eorl	d6,d7
+#endif
+	subl	d6,d7		| keep the largest exponent
+	negl	d7		|
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d7	| put difference in lower byte
+#else
+	lsrl	IMM (8),d7	| put difference in lower byte
+#endif
+| if difference is too large we don't shift (and exit!) '
+#ifndef __mcoldfire__
+	cmpw	IMM (FLT_MANT_DIG+2),d7		
+#else
+	cmpl	IMM (FLT_MANT_DIG+2),d7		
+#endif
+	bge	Laddsf$a$small
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d7	| if difference >= 16 swap
+#else
+	cmpl	IMM (16),d7	| if difference >= 16 swap
+#endif
+	bge	8f
+6:
+#ifndef __mcoldfire__
+	subw	IMM (1),d7
+#else
+	subl	IMM (1),d7
+#endif
+7:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0	| shift right first operand
+	roxrl	IMM (1),d1
+	dbra	d7,7b
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	subql	IMM (1),d7
+	bpl	7b
+#endif
+	bra	Laddsf$3
+8:
+	movew	d0,d1
+	swap	d1
+	movew	d1,d0
+	swap	d0
+#ifndef __mcoldfire__
+	subw	IMM (16),d7
+#else
+	subl	IMM (16),d7
+#endif
+	bne	6b		| if still more bits, go back to normal case
+				| otherwise we fall through
+
+| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
+| signs are stored in a0 and a1).
+
+Laddsf$3:
+| Here we have to decide whether to add or subtract the numbers
+#ifndef __mcoldfire__
+	exg	d6,a0		| get signs back
+	exg	d7,a1		| and save the exponents
+#else
+	movel	d6,d4
+	movel	a0,d6
+	movel	d4,a0
+	movel	d7,d4
+	movel	a1,d7
+	movel	d4,a1
+#endif
+	eorl	d6,d7		| combine sign bits
+	bmi	Lsubsf$0	| if negative a and b have opposite 
+				| sign so we actually subtract the
+				| numbers
+
+| Here we have both positive or both negative
+#ifndef __mcoldfire__
+	exg	d6,a0		| now we have the exponent in d6
+#else
+	movel	d6,d4
+	movel	a0,d6
+	movel	d4,a0
+#endif
+	movel	a0,d7		| and sign in d7
+	andl	IMM (0x80000000),d7
+| Here we do the addition.
+	addl	d3,d1
+	addxl	d2,d0
+| Note: now we have d2, d3, d4 and d5 to play with! 
+
+| Put the exponent, in the first byte, in d2, to use the "standard" rounding
+| routines:
+	movel	d6,d2
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d2
+#else
+	lsrl	IMM (8),d2
+#endif
+
+| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (FLT_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+	addl	IMM (1),d2
+1:
+	lea	pc@(Laddsf$4),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Laddsf$4:
+| Put back the exponent, but check for overflow.
+#ifndef __mcoldfire__
+	cmpw	IMM (0xff),d2
+#else
+	cmpl	IMM (0xff),d2
+#endif
+	bhi	1f
+	bclr	IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2
+#else
+	lsll	IMM (7),d2
+#endif
+	swap	d2
+	orl	d2,d0
+	bra	Laddsf$ret
+1:
+	moveq	IMM (ADD),d5
+	bra	Lf$overflow
+
+Lsubsf$0:
+| We are here if a > 0 and b < 0 (sign bits cleared).
+| Here we do the subtraction.
+	movel	d6,d7		| put sign in d7
+	andl	IMM (0x80000000),d7
+
+	subl	d3,d1		| result in d0-d1
+	subxl	d2,d0		|
+	beq	Laddsf$ret	| if zero just exit
+	bpl	1f		| if positive skip the following
+	bchg	IMM (31),d7	| change sign bit in d7
+	negl	d1
+	negxl	d0
+1:
+#ifndef __mcoldfire__
+	exg	d2,a0		| now we have the exponent in d2
+	lsrw	IMM (8),d2	| put it in the first byte
+#else
+	movel	d2,d4
+	movel	a0,d2
+	movel	d4,a0
+	lsrl	IMM (8),d2	| put it in the first byte
+#endif
+
+| Now d0-d1 is positive and the sign bit is in d7.
+
+| Note that we do not have to normalize, since in the subtraction bit
+| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
+| the rounding routines themselves.
+	lea	pc@(Lsubsf$1),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lsubsf$1:
+| Put back the exponent (we can't have overflow!). '
+	bclr	IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2
+#else
+	lsll	IMM (7),d2
+#endif
+	swap	d2
+	orl	d2,d0
+	bra	Laddsf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| FLT_MANT_DIG+2) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Laddsf$a$small:
+	movel	a6@(12),d0
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Laddsf$b$small:
+	movel	a6@(8),d0
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+| If the numbers are denormalized remember to put exponent equal to 1.
+
+Laddsf$a$den:
+	movel	d5,d6		| d5 contains 0x01000000
+	swap	d6
+	bra	Laddsf$1
+
+Laddsf$b$den:
+	movel	d5,d7
+	swap	d7
+	notl 	d4		| make d4 into a mask for the fraction
+				| (this was not executed after the jump)
+	bra	Laddsf$2
+
+| The rest is mainly code for the different results which can be 
+| returned (checking always for +/-INFINITY and NaN).
+
+Laddsf$b:
+| Return b (if a is zero).
+	movel	a6@(12),d0
+	cmpl	IMM (0x80000000),d0	| Check if b is -0
+	bne	1f
+	movel	a0,d7
+	andl	IMM (0x80000000),d7	| Use the sign of a
+	clrl	d0
+	bra	Laddsf$ret
+Laddsf$a:
+| Return a (if b is zero).
+	movel	a6@(8),d0
+1:
+	moveq	IMM (ADD),d5
+| We have to check for NaN and +/-infty.
+	movel	d0,d7
+	andl	IMM (0x80000000),d7	| put sign in d7
+	bclr	IMM (31),d0		| clear sign
+	cmpl	IMM (INFINITY),d0	| check for infty or NaN
+	bge	2f
+	movel	d0,d0		| check for zero (we do this because we don't '
+	bne	Laddsf$ret	| want to return -0 by mistake
+	bclr	IMM (31),d7	| if zero be sure to clear sign
+	bra	Laddsf$ret	| if everything OK just return
+2:
+| The value to be returned is either +/-infty or NaN
+	andl	IMM (0x007fffff),d0	| check for NaN
+	bne	Lf$inop			| if mantissa not zero is NaN
+	bra	Lf$infty
+
+Laddsf$ret:
+| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
+| We have to clear the exception flags (just the exception type).
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+	orl	d7,d0		| put sign bit
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Laddsf$ret$den:
+| Return a denormalized number (for addition we don't signal underflow) '
+	lsrl	IMM (1),d0	| remember to shift right back once
+	bra	Laddsf$ret	| and return
+
+| Note: when adding two floats of the same sign if either one is 
+| NaN we return NaN without regard to whether the other is finite or 
+| not. When subtracting them (i.e., when adding two numbers of 
+| opposite signs) things are more complicated: if both are INFINITY 
+| we return NaN, if only one is INFINITY and the other is NaN we return
+| NaN, but if it is finite we return INFINITY with the corresponding sign.
+
+Laddsf$nf:
+	moveq	IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+	movel	a6@(8),d0	| get the numbers back (remember that we
+	movel	a6@(12),d1	| did some processing already)
+	movel	IMM (INFINITY),d4 | useful constant (INFINITY)
+	movel	d0,d2		| save sign bits
+	movel	d1,d3
+	bclr	IMM (31),d0	| clear sign bits
+	bclr	IMM (31),d1
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+	cmpl	d4,d0		| check first a (d0)
+	bhi	Lf$inop		
+	cmpl	d4,d1		| check now b (d1)
+	bhi	Lf$inop		
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+	eorl	d3,d2		| to check sign bits
+	bmi	1f
+	movel	d0,d7
+	andl	IMM (0x80000000),d7	| get (common) sign bit
+	bra	Lf$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+	cmpl	d1,d0		| are both infinite?
+	beq	Lf$inop		| if so return NaN
+
+	movel	d0,d7
+	andl	IMM (0x80000000),d7 | get a's sign bit '
+	cmpl	d4,d0		| test now for infinity
+	beq	Lf$infty	| if a is INFINITY return with this sign
+	bchg	IMM (31),d7	| else we know b is INFINITY and has
+	bra	Lf$infty	| the opposite sign
+
+|=============================================================================
+|                             __mulsf3
+|=============================================================================
+
+| float __mulsf3(float, float);
+	FUNC(__mulsf3)
+SYM (__mulsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get a into d0
+	movel	a6@(12),d1	| and b into d1
+	movel	d0,d7		| d7 will hold the sign of the product
+	eorl	d1,d7		|
+	andl	IMM (0x80000000),d7
+	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
+	movel	d6,d5			| another (mask for fraction)
+	notl	d5			|
+	movel	IMM (0x00800000),d4	| this is to put hidden bit back
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d2			|
+	beq	Lmulsf$a$0		| branch if a is zero
+	bclr	IMM (31),d1		| get rid of b's sign bit '
+	movel	d1,d3		|
+	beq	Lmulsf$b$0	| branch if b is zero
+	cmpl	d6,d0		| is a big?
+	bhi	Lmulsf$inop	| if a is NaN return NaN
+	beq	Lmulsf$inf	| if a is INFINITY we have to check b
+	cmpl	d6,d1		| now compare b with INFINITY
+	bhi	Lmulsf$inop	| is b NaN?
+	beq	Lmulsf$overflow | is b INFINITY?
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3.
+	andl	d6,d2		| and isolate exponent in d2
+	beq	Lmulsf$a$den	| if exponent is zero we have a denormalized
+	andl	d5,d0		| and isolate fraction
+	orl	d4,d0		| and put hidden bit back
+	swap	d2		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d2	| 
+#else
+	lsrl	IMM (7),d2	| 
+#endif
+Lmulsf$1:			| number
+	andl	d6,d3		|
+	beq	Lmulsf$b$den	|
+	andl	d5,d1		|
+	orl	d4,d1		|
+	swap	d3		|
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d3	|
+#else
+	lsrl	IMM (7),d3	|
+#endif
+Lmulsf$2:			|
+#ifndef __mcoldfire__
+	addw	d3,d2		| add exponents
+	subw	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#else
+	addl	d3,d2		| add exponents
+	subl	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit FLT_MANT_DIG-1 set (even if they were 
+| denormalized to start with!), which means that in the product 
+| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the 
+| high long) is set. 
+
+| To do the multiplication let us move the number a little bit around ...
+	movel	d1,d6		| second operand in d6
+	movel	d0,d5		| first operand in d4-d5
+	movel	IMM (0),d4
+	movel	d4,d1		| the sums will go in d0-d1
+	movel	d4,d0
+
+| now bit FLT_MANT_DIG-1 becomes bit 31:
+	lsll	IMM (31-FLT_MANT_DIG+1),d6		
+
+| Start the loop (we loop #FLT_MANT_DIG times):
+	moveq	IMM (FLT_MANT_DIG-1),d3	
+1:	addl	d1,d1		| shift sum 
+	addxl	d0,d0
+	lsll	IMM (1),d6	| get bit bn
+	bcc	2f		| if not set skip sum
+	addl	d5,d1		| add a
+	addxl	d4,d0
+2:
+#ifndef __mcoldfire__
+	dbf	d3,1b		| loop back
+#else
+	subql	IMM (1),d3
+	bpl	1b
+#endif
+
+| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
+| (mod 32) of d0 set. The first thing to do now is to normalize it so bit 
+| FLT_MANT_DIG is set (to do the rounding).
+#ifndef __mcoldfire__
+	rorl	IMM (6),d1
+	swap	d1
+	movew	d1,d3
+	andw	IMM (0x03ff),d3
+	andw	IMM (0xfd00),d1
+#else
+	movel	d1,d3
+	lsll	IMM (8),d1
+	addl	d1,d1
+	addl	d1,d1
+	moveq	IMM (22),d5
+	lsrl	d5,d3
+	orl	d3,d1
+	andl	IMM (0xfffffd00),d1
+#endif
+	lsll	IMM (8),d0
+	addl	d0,d0
+	addl	d0,d0
+#ifndef __mcoldfire__
+	orw	d3,d0
+#else
+	orl	d3,d0
+#endif
+
+	moveq	IMM (MULTIPLY),d5
+	
+	btst	IMM (FLT_MANT_DIG+1),d0
+	beq	Lround$exit
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d2
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addql	IMM (1),d2
+#endif
+	bra	Lround$exit
+
+Lmulsf$inop:
+	moveq	IMM (MULTIPLY),d5
+	bra	Lf$inop
+
+Lmulsf$overflow:
+	moveq	IMM (MULTIPLY),d5
+	bra	Lf$overflow
+
+Lmulsf$inf:
+	moveq	IMM (MULTIPLY),d5
+| If either is NaN return NaN; else both are (maybe infinite) numbers, so
+| return INFINITY with the correct sign (which is in d7).
+	cmpl	d6,d1		| is b NaN?
+	bhi	Lf$inop		| if so return NaN
+	bra	Lf$overflow	| else return +/-INFINITY
+
+| If either number is zero return zero, unless the other is +/-INFINITY, 
+| or NaN, in which case we return NaN.
+Lmulsf$b$0:
+| Here d1 (==b) is zero.
+	movel	a6@(8),d1	| get a again to check for non-finiteness
+	bra	1f
+Lmulsf$a$0:
+	movel	a6@(12),d1	| get b again to check for non-finiteness
+1:	bclr	IMM (31),d1	| clear sign bit 
+	cmpl	IMM (INFINITY),d1 | and check for a large exponent
+	bge	Lf$inop		| if b is +/-INFINITY or NaN return NaN
+	movel	d7,d0		| else return signed zero
+	PICLEA	SYM (_fpCCR),a0	|
+	movew	IMM (0),a0@	| 
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| 
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| 
+	rts			| 
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 23
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmulsf$a$den:
+	movel	IMM (1),d2
+	andl	d5,d0
+1:	addl	d0,d0		| shift a left (until bit 23 is set)
+#ifndef __mcoldfire__
+	subw	IMM (1),d2	| and adjust exponent
+#else
+	subql	IMM (1),d2	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d0
+	bne	Lmulsf$1	|
+	bra	1b		| else loop back
+
+Lmulsf$b$den:
+	movel	IMM (1),d3
+	andl	d5,d1
+1:	addl	d1,d1		| shift b left until bit 23 is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d3	| and adjust exponent
+#else
+	subql	IMM (1),d3	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d1
+	bne	Lmulsf$2	|
+	bra	1b		| else loop back
+
+|=============================================================================
+|                             __divsf3
+|=============================================================================
+
+| float __divsf3(float, float);
+	FUNC(__divsf3)
+SYM (__divsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0		| get a into d0
+	movel	a6@(12),d1		| and b into d1
+	movel	d0,d7			| d7 will hold the sign of the result
+	eorl	d1,d7			|
+	andl	IMM (0x80000000),d7	| 
+	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
+	movel	d6,d5			| another (mask for fraction)
+	notl	d5			|
+	movel	IMM (0x00800000),d4	| this is to put hidden bit back
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d2			|
+	beq	Ldivsf$a$0		| branch if a is zero
+	bclr	IMM (31),d1		| get rid of b's sign bit '
+	movel	d1,d3			|
+	beq	Ldivsf$b$0		| branch if b is zero
+	cmpl	d6,d0			| is a big?
+	bhi	Ldivsf$inop		| if a is NaN return NaN
+	beq	Ldivsf$inf		| if a is INFINITY we have to check b
+	cmpl	d6,d1			| now compare b with INFINITY 
+	bhi	Ldivsf$inop		| if b is NaN return NaN
+	beq	Ldivsf$underflow
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3 and normalize the numbers to
+| ensure that the ratio of the fractions is close to 1. We do this by
+| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
+	andl	d6,d2		| and isolate exponent in d2
+	beq	Ldivsf$a$den	| if exponent is zero we have a denormalized
+	andl	d5,d0		| and isolate fraction
+	orl	d4,d0		| and put hidden bit back
+	swap	d2		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d2	| 
+#else
+	lsrl	IMM (7),d2	| 
+#endif
+Ldivsf$1:			| 
+	andl	d6,d3		|
+	beq	Ldivsf$b$den	|
+	andl	d5,d1		|
+	orl	d4,d1		|
+	swap	d3		|
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d3	|
+#else
+	lsrl	IMM (7),d3	|
+#endif
+Ldivsf$2:			|
+#ifndef __mcoldfire__
+	subw	d3,d2		| subtract exponents
+ 	addw	IMM (F_BIAS),d2	| and add bias
+#else
+	subl	d3,d2		| subtract exponents
+ 	addl	IMM (F_BIAS),d2	| and add bias
+#endif
+ 
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0	holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
+| d1	holds b (second operand, bit FLT_MANT_DIG=1)
+| d2	holds the difference of the exponents, corrected by the bias
+| d7	holds the sign of the ratio
+| d4, d5, d6 hold some constants
+	movel	d7,a0		| d6-d7 will hold the ratio of the fractions
+	movel	IMM (0),d6	| 
+	movel	d6,d7
+
+	moveq	IMM (FLT_MANT_DIG+1),d3
+1:	cmpl	d0,d1		| is a < b?
+	bhi	2f		|
+	bset	d3,d6		| set a bit in d6
+	subl	d1,d0		| if a >= b  a <-- a-b
+	beq	3f		| if a is zero, exit
+2:	addl	d0,d0		| multiply a by 2
+#ifndef __mcoldfire__
+	dbra	d3,1b
+#else
+	subql	IMM (1),d3
+	bpl	1b
+#endif
+
+| Now we keep going to set the sticky bit ...
+	moveq	IMM (FLT_MANT_DIG),d3
+1:	cmpl	d0,d1
+	ble	2f
+	addl	d0,d0
+#ifndef __mcoldfire__
+	dbra	d3,1b
+#else
+	subql	IMM(1),d3
+	bpl	1b
+#endif
+	movel	IMM (0),d1
+	bra	3f
+2:	movel	IMM (0),d1
+#ifndef __mcoldfire__
+	subw	IMM (FLT_MANT_DIG),d3
+	addw	IMM (31),d3
+#else
+	subl	IMM (FLT_MANT_DIG),d3
+	addl	IMM (31),d3
+#endif
+	bset	d3,d1
+3:
+	movel	d6,d0		| put the ratio in d0-d1
+	movel	a0,d7		| get sign back
+
+| Because of the normalization we did before we are guaranteed that 
+| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
+| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
+	btst	IMM (FLT_MANT_DIG+1),d0		
+	beq	1f              | if it is not set, then bit 24 is set
+	lsrl	IMM (1),d0	|
+#ifndef __mcoldfire__
+	addw	IMM (1),d2	|
+#else
+	addl	IMM (1),d2	|
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+	moveq	IMM (DIVIDE),d5
+	bra	Lround$exit
+
+Ldivsf$inop:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$inop
+
+Ldivsf$overflow:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$overflow
+
+Ldivsf$underflow:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$underflow
+
+Ldivsf$a$0:
+	moveq	IMM (DIVIDE),d5
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+	andl	IMM (0x7fffffff),d1	| clear sign bit and test b
+	beq	Lf$inop			| if b is also zero return NaN
+	cmpl	IMM (INFINITY),d1	| check for NaN
+	bhi	Lf$inop			| 
+	movel	d7,d0			| else return signed zero
+	PICLEA	SYM (_fpCCR),a0		|
+	movew	IMM (0),a0@		|
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7		| 
+#else
+	moveml	sp@,d2-d7		| 
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6			| 
+	rts				| 
+	
+Ldivsf$b$0:
+	moveq	IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+	cmpl	IMM (INFINITY),d0	| compare d0 with INFINITY
+	bhi	Lf$inop			| if larger it is NaN
+	bra	Lf$div$0		| else signal DIVIDE_BY_ZERO
+
+Ldivsf$inf:
+	moveq	IMM (DIVIDE),d5
+| If a is INFINITY we have to check b
+	cmpl	IMM (INFINITY),d1	| compare b with INFINITY 
+	bge	Lf$inop			| if b is NaN or INFINITY return NaN
+	bra	Lf$overflow		| else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivsf$a$den:
+	movel	IMM (1),d2
+	andl	d5,d0
+1:	addl	d0,d0		| shift a left until bit FLT_MANT_DIG-1 is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d2	| and adjust exponent
+#else
+	subl	IMM (1),d2	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d0
+	bne	Ldivsf$1
+	bra	1b
+
+Ldivsf$b$den:
+	movel	IMM (1),d3
+	andl	d5,d1
+1:	addl	d1,d1		| shift b left until bit FLT_MANT_DIG is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d3	| and adjust exponent
+#else
+	subl	IMM (1),d3	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d1
+	bne	Ldivsf$2
+	bra	1b
+
+Lround$exit:
+| This is a common exit point for __mulsf3 and __divsf3. 
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+	cmpw	IMM (-FLT_MANT_DIG-1),d2		
+#else
+	cmpl	IMM (-FLT_MANT_DIG-1),d2		
+#endif
+	blt	Lf$underflow	
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+	movel	IMM (0),d6	| d6 is used temporarily
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d2	| if the exponent is less than 1 we 
+#else
+	cmpl	IMM (1),d2	| if the exponent is less than 1 we 
+#endif
+	bge	2f		| have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+	addw	IMM (1),d2	| adjust the exponent
+	lsrl	IMM (1),d0	| shift right once 
+	roxrl	IMM (1),d1	|
+	roxrl	IMM (1),d6	| d6 collect bits we would lose otherwise
+	cmpw	IMM (1),d2	| is the exponent 1 already?
+#else
+	addql	IMM (1),d2	| adjust the exponent
+	lsrl	IMM (1),d6
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d6
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	cmpl	IMM (1),d2	| is the exponent 1 already?
+#endif
+	beq	2f		| if not loop back
+	bra	1b              |
+	bra	Lf$underflow	| safety check, shouldn't execute '
+2:	orl	d6,d1		| this is a trick so we don't lose  '
+				| the extra bits which were flushed right
+| Now call the rounding routine (which takes care of denormalized numbers):
+	lea	pc@(Lround$0),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 255).
+#ifndef __mcoldfire__
+	cmpw	IMM (0x00ff),d2
+#else
+	cmpl	IMM (0x00ff),d2
+#endif
+	bge	Lf$overflow
+| Now check for a denormalized number (exponent==0).
+	movew	d2,d2
+	beq	Lf$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2	| exponent back to fourth byte
+#else
+	lsll	IMM (7),d2	| exponent back to fourth byte
+#endif
+	bclr	IMM (FLT_MANT_DIG-1),d0
+	swap	d0		| and put back exponent
+#ifndef __mcoldfire__
+	orw	d2,d0		| 
+#else
+	orl	d2,d0
+#endif
+	swap	d0		|
+	orl	d7,d0		| and sign also
+
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+|=============================================================================
+|                             __negsf2
+|=============================================================================
+
+| This is trivial and could be shorter if we didn't bother checking for NaN '
+| and +/-INFINITY.
+
+| float __negsf2(float);
+	FUNC(__negsf2)
+SYM (__negsf2):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (NEGATE),d5
+	movel	a6@(8),d0	| get number to negate in d0
+	bchg	IMM (31),d0	| negate
+	movel	d0,d1		| make a positive copy
+	bclr	IMM (31),d1	|
+	tstl	d1		| check for zero
+	beq	2f		| if zero (either sign) return +zero
+	cmpl	IMM (INFINITY),d1 | compare to +INFINITY
+	blt	1f		|
+	bhi	Lf$inop		| if larger (fraction not zero) is NaN
+	movel	d0,d7		| else get sign and return INFINITY
+	andl	IMM (0x80000000),d7
+	bra	Lf$infty		
+1:	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+2:	bclr	IMM (31),d0
+	bra	1b
+
+|=============================================================================
+|                             __cmpsf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpsf2_internal(float, float, int);
+SYM (__cmpsf2_internal):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@- 	| save registers
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (COMPARE),d5
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| get second operand
+| Check if either is NaN, and in that case return garbage and signal
+| INVALID_OPERATION. Check also if either is zero, and clear the signs
+| if necessary.
+	movel	d0,d6
+	andl	IMM (0x7fffffff),d0
+	beq	Lcmpsf$a$0
+	cmpl	IMM (0x7f800000),d0
+	bhi	Lcmpf$inop
+Lcmpsf$1:
+	movel	d1,d7
+	andl	IMM (0x7fffffff),d1
+	beq	Lcmpsf$b$0
+	cmpl	IMM (0x7f800000),d1
+	bhi	Lcmpf$inop
+Lcmpsf$2:
+| Check the signs
+	eorl	d6,d7
+	bpl	1f
+| If the signs are not equal check if a >= 0
+	tstl	d6
+	bpl	Lcmpsf$a$gt$b	| if (a >= 0 && b < 0) => a > b
+	bmi	Lcmpsf$b$gt$a	| if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+	tstl	d6
+	bpl	1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+	exg	d0,d1
+#else
+	movel	d0,d7
+	movel	d1,d0
+	movel	d7,d1
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+	cmpl	d0,d1
+	bhi	Lcmpsf$b$gt$a	| |b| > |a|
+	bne	Lcmpsf$a$gt$b	| |b| < |a|
+| If we got here a == b.
+	movel	IMM (EQUAL),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+#endif
+	unlk	a6
+	rts
+Lcmpsf$a$gt$b:
+	movel	IMM (GREATER),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpsf$b$gt$a:
+	movel	IMM (LESS),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Lcmpsf$a$0:	
+	bclr	IMM (31),d6
+	bra	Lcmpsf$1
+Lcmpsf$b$0:
+	bclr	IMM (31),d7
+	bra	Lcmpsf$2
+
+Lcmpf$inop:
+	movl	a6@(16),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+| int __cmpsf2(float, float);
+	FUNC(__cmpsf2)
+SYM (__cmpsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1, with the exponent in register d2. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d2.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:	btst	IMM (FLT_MANT_DIG),d0
+	bne	2f		| if set the number is normalized
+| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -F_BIAS+1).
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d2	| remember that the exponent is at least one
+#else
+	cmpl	IMM (1),d2	| remember that the exponent is at least one
+#endif
+ 	beq	2f		| an exponent of one means denormalized
+	addl	d1,d1		| else shift and adjust the exponent
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d2,1b		|
+#else
+	subql	IMM (1),d2
+	bpl	1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+	btst	IMM (0),d0	| is delta < 1?
+	beq	2f		| if so, do not do anything
+	tstl	d1		| is delta == 1?
+	bne	1f		| if so round to even
+	movel	d0,d1		| 
+	andl	IMM (2),d1	| bit 1 is the last significant bit
+	addl	d1,d0		| 
+	bra	2f		| 
+1:	movel	IMM (1),d1	| else add 1 
+	addl	d1,d0		|
+| Shift right once (because we used bit #FLT_MANT_DIG!).
+2:	lsrl	IMM (1),d0		
+| Now check again bit #FLT_MANT_DIG (rounding could have produced a
+| 'fraction overflow' ...).
+	btst	IMM (FLT_MANT_DIG),d0	
+	beq	1f
+	lsrl	IMM (1),d0
+#ifndef __mcoldfire__
+	addw	IMM (1),d2
+#else
+	addql	IMM (1),d2
+#endif
+1:
+| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+	btst	IMM (FLT_MANT_DIG-1),d0
+	beq	1f
+	jmp	a0@
+1:	movel	IMM (0),d2
+	jmp	a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+	jmp	a0@
+#endif /* L_float */
+
+| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
+| __ledf2, __ltdf2 to all return the same value as a direct call to
+| __cmpdf2 would.  In this implementation, each of these routines
+| simply calls __cmpdf2.  It would be more efficient to give the
+| __cmpdf2 routine several names, but separating them out will make it
+| easier to write efficient versions of these routines someday.
+| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
+| The other routines return 1.
+
+#ifdef  L_eqdf2
+	.text
+	FUNC(__eqdf2)
+	.globl	SYM (__eqdf2)
+SYM (__eqdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+	.text
+	FUNC(__nedf2)
+	.globl	SYM (__nedf2)
+SYM (__nedf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+	.text
+	FUNC(__gtdf2)
+	.globl	SYM (__gtdf2)
+SYM (__gtdf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+	.text
+	FUNC(__gedf2)
+	.globl	SYM (__gedf2)
+SYM (__gedf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+	.text
+	FUNC(__ltdf2)
+	.globl	SYM (__ltdf2)
+SYM (__ltdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ltdf2 */
+
+#ifdef  L_ledf2
+	.text
+	FUNC(__ledf2)
+	.globl	SYM (__ledf2)
+SYM (__ledf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ledf2 */
+
+| The comments above about __eqdf2, et. al., also apply to __eqsf2,
+| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
+
+#ifdef  L_eqsf2
+	.text
+	FUNC(__eqsf2)
+	.globl	SYM (__eqsf2)
+SYM (__eqsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_eqsf2 */
+
+#ifdef  L_nesf2
+	.text
+	FUNC(__nesf2)
+	.globl	SYM (__nesf2)
+SYM (__nesf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_nesf2 */
+
+#ifdef  L_gtsf2
+	.text
+	FUNC(__gtsf2)
+	.globl	SYM (__gtsf2)
+SYM (__gtsf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gtsf2 */
+
+#ifdef  L_gesf2
+	.text
+	FUNC(__gesf2)
+	.globl	SYM (__gesf2)
+SYM (__gesf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gesf2 */
+
+#ifdef  L_ltsf2
+	.text
+	FUNC(__ltsf2)
+	.globl	SYM (__ltsf2)
+SYM (__ltsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ltsf2 */
+
+#ifdef  L_lesf2
+	.text
+	FUNC(__lesf2)
+	.globl	SYM (__lesf2)
+SYM (__lesf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_lesf2 */
+
+#if defined (__ELF__) && defined (__linux__)
+	/* Make stack non-executable for ELF linux targets.  */
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/gcc/config/m68k/linux-unwind.h b/gcc/config/m68k/linux-unwind.h
new file mode 100644
index 000000000..053c15558
--- /dev/null
+++ b/gcc/config/m68k/linux-unwind.h
@@ -0,0 +1,158 @@
+/* DWARF2 EH unwinding support for Linux/m68k.
+   Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+
+/* <sys/ucontext.h> is unfortunately broken right now.  */
+struct uw_ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	mcontext_t	  uc_mcontext;
+	unsigned long	  uc_filler[80];
+	__sigset_t	  uc_sigmask;
+};
+
+#define MD_FALLBACK_FRAME_STATE_FOR m68k_fallback_frame_state
+
+#ifdef __mcoldfire__
+#define M68K_FP_SIZE  8
+#else
+#define M68K_FP_SIZE  12
+#endif
+
+static _Unwind_Reason_Code
+m68k_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  unsigned short *pc = context->ra;
+  long cfa;
+
+  /* moveq #__NR_sigreturn,%d0; trap #0  */
+  if (pc[0] == 0x7077 && pc[1] == 0x4e40)
+    {
+      struct sigcontext *sc;
+
+      /* Context is passed as the 3rd argument.  */
+      sc = *(struct sigcontext **) (context->cfa + 8);
+
+      cfa = sc->sc_usp;
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = 15;
+      fs->regs.cfa_offset = cfa - (long) context->cfa;
+
+      fs->regs.reg[0].how = REG_SAVED_OFFSET;
+      fs->regs.reg[0].loc.offset = (long) &sc->sc_d0 - cfa;
+      fs->regs.reg[1].how = REG_SAVED_OFFSET;
+      fs->regs.reg[1].loc.offset = (long) &sc->sc_d1 - cfa;
+      fs->regs.reg[8].how = REG_SAVED_OFFSET;
+      fs->regs.reg[8].loc.offset = (long) &sc->sc_a0 - cfa;
+      fs->regs.reg[9].how = REG_SAVED_OFFSET;
+      fs->regs.reg[9].loc.offset = (long) &sc->sc_a1 - cfa;
+
+#ifdef __uClinux__
+      fs->regs.reg[13].how = REG_SAVED_OFFSET;
+      fs->regs.reg[13].loc.offset = (long) &sc->sc_a5 - cfa;
+#endif
+
+      fs->regs.reg[24].how = REG_SAVED_OFFSET;
+      fs->regs.reg[24].loc.offset = (long) &sc->sc_pc - cfa;
+
+#ifndef __uClinux__
+      if (*(int *) sc->sc_fpstate)
+	{
+	  int *fpregs = (int *) sc->sc_fpregs;
+
+	  fs->regs.reg[16].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[16].loc.offset = (long) &fpregs[0] - cfa;
+	  fs->regs.reg[17].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[17].loc.offset = (long) &fpregs[M68K_FP_SIZE/4] - cfa;
+	}
+#elif defined __mcffpu__
+# error Implement this when uClinux kernel is ported to an FPU architecture
+#endif
+    }
+#ifdef __mcoldfire__
+  /* move.l #__NR_rt_sigreturn,%d0; trap #0 */
+  else if (pc[0] == 0x203c && pc[1] == 0x0000 &&
+	   pc[2] == 0x00ad && pc[3] == 0x4e40)
+#else
+  /* moveq #~__NR_rt_sigreturn,%d0; not.b %d0; trap #0 */
+  else if (pc[0] == 0x7052 && pc[1] == 0x4600 && pc[2] == 0x4e40)
+#endif
+    {
+      struct uw_ucontext *uc;
+      greg_t *gregs;
+      int i;
+
+      /* Context is passed as the 3rd argument.  */
+      uc = *(struct uw_ucontext **) (context->cfa + 8);
+
+      gregs = uc->uc_mcontext.gregs;
+      cfa = gregs[15];
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = 15;
+      fs->regs.cfa_offset = cfa - (long) context->cfa;
+
+      /* register %d0-%d7/%a0-%a6  */
+      for (i = 0; i <= 14; i++)
+	{
+	  fs->regs.reg[i].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i].loc.offset = (long) &gregs[i] - cfa;
+	}
+
+      /* return address  */
+      fs->regs.reg[24].how = REG_SAVED_OFFSET;
+      fs->regs.reg[24].loc.offset = (long) &gregs[16] - cfa;
+
+#define uc_fpstate      uc_filler[0]
+
+      if (uc->uc_fpstate)
+	{
+	  long fpregs = (long) uc->uc_mcontext.fpregs.f_fpregs;
+
+	  /* register %fp0-%fp7  */
+	  for (i = 16; i <= 23; i++)
+	    {
+	      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+	      fs->regs.reg[i].loc.offset = fpregs - cfa;
+	      fpregs += M68K_FP_SIZE;
+	    }
+	}
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  fs->retaddr_column = 24;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+#endif /* ifdef inhibit_libc  */
diff --git a/gcc/config/m68k/linux.h b/gcc/config/m68k/linux.h
new file mode 100644
index 000000000..82417b477
--- /dev/null
+++ b/gcc/config/m68k/linux.h
@@ -0,0 +1,242 @@
+/* Definitions for Motorola 68k running Linux-based GNU systems with
+   ELF format.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2006,
+   2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (68k GNU/Linux with ELF)");
+
+/* Add %(asm_cpu_spec) to a generic definition of ASM_SPEC.  */
+#undef ASM_SPEC
+#define ASM_SPEC "%(asm_cpu_spec) %(asm_pcrel_spec)"
+
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY 32
+
+/* for 68k machines this only needs to be TRUE for the 68000 */
+
+#undef STRICT_ALIGNMENT
+#define STRICT_ALIGNMENT 0
+#undef M68K_HONOR_TARGET_STRICT_ALIGNMENT
+#define M68K_HONOR_TARGET_STRICT_ALIGNMENT 0
+
+/* Here are four prefixes that are used by asm_fprintf to
+   facilitate customization for alternate assembler syntaxes.
+   Machines with no likelihood of an alternate syntax need not
+   define these and need not use asm_fprintf.  */
+
+/* The prefix for register names.  Note that REGISTER_NAMES
+   is supposed to include this prefix. Also note that this is NOT an
+   fprintf format string, it is a literal string */
+
+#undef REGISTER_PREFIX
+#define REGISTER_PREFIX "%"
+
+/* The prefix for local (compiler generated) labels.
+   These labels will not appear in the symbol table.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+#define ASM_COMMENT_START "|"
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Provide a LINK_SPEC appropriate for GNU/Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-m m68kelf %{shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+    %{static}}"
+
+/* For compatibility with linux/a.out */
+
+#undef PCC_BITFIELD_TYPE_MATTERS
+
+/* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
+   keep switch tables in the text section.  */
+   
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Use the default action for outputting the case label.  */
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_COLDFIRE)				\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)				\
+  if ((LOG) > 0)						\
+    fprintf ((FILE), "%s%u\n", ALIGN_ASM_OP, 1 << (LOG));
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as uninitialized global
+   data.  */
+
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define NO_PROFILE_COUNTERS 1
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+{									\
+  if (flag_pic)								\
+    fprintf (FILE, "\tbsr.l _mcount@PLTPC\n");				\
+  else									\
+    fprintf (FILE, "\tjbsr _mcount\n");					\
+}
+
+/* Do not break .stabs pseudos into continuations.  */
+
+#define DBX_CONTIN_LENGTH 0
+
+/* 1 if N is a possible register number for a function value.  For
+   m68k/SVR4 allow d0, a0, or fp0 as return registers, for integral,
+   pointer, or floating types, respectively.  Reject fp0 if not using
+   a 68881 coprocessor.  */
+
+#undef FUNCTION_VALUE_REGNO_P
+#define FUNCTION_VALUE_REGNO_P(N) \
+  ((N) == D0_REG || (N) == A0_REG || (TARGET_68881 && (N) == FP0_REG))
+
+/* Define this to be true when FUNCTION_VALUE_REGNO_P is true for
+   more than one register.  */
+
+#undef NEEDS_UNTYPED_CALL
+#define NEEDS_UNTYPED_CALL 1
+
+/* Define how to generate (in the callee) the output value of a
+   function and how to find (in the caller) the value returned by a
+   function.  VALTYPE is the data type of the value (as a tree).  If
+   the precise function being called is known, FUNC is its
+   FUNCTION_DECL; otherwise, FUNC is 0.  For m68k/SVR4 generate the
+   result in d0, a0, or fp0 as appropriate.  */
+
+#undef FUNCTION_VALUE
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+  m68k_function_value (VALTYPE, FUNC)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.
+   For m68k/SVR4 look for integer values in d0, pointer values in d0
+   (returned in both d0 and a0), and floating values in fp0.  */
+
+#undef LIBCALL_VALUE
+#define LIBCALL_VALUE(MODE)						\
+  m68k_libcall_value (MODE)
+
+/* For m68k SVR4, structures are returned using the reentrant
+   technique.  */
+#undef PCC_STATIC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Finalize the trampoline by flushing the insn cache.  */
+
+#undef FINALIZE_TRAMPOLINE
+#define FINALIZE_TRAMPOLINE(TRAMP)					\
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),	\
+		     LCT_NORMAL, VOIDmode, 2, TRAMP, Pmode,		\
+		     plus_constant (TRAMP, TRAMPOLINE_SIZE), Pmode);
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  The arguments are as
+   follows:
+
+	cacheflush (addr, scope, cache, len)
+
+   addr	  - the start address for the flush
+   scope  - the scope of the flush (see the cpush insn)
+   cache  - which cache to flush (see the cpush insn)
+   len    - a factor relating to the number of flushes to perform:
+	    len/16 lines, or len/4096 pages.  */
+
+#define CLEAR_INSN_CACHE(BEG, END)					\
+{									\
+  register unsigned long _beg __asm ("%d1") = (unsigned long) (BEG);	\
+  unsigned long _end = (unsigned long) (END);				\
+  register unsigned long _len __asm ("%d4") = (_end - _beg + 32);	\
+  __asm __volatile							\
+    ("move%.l #123, %/d0\n\t"	/* system call nr */			\
+     "move%.l #1, %/d2\n\t"	/* clear lines */			\
+     "move%.l #3, %/d3\n\t"	/* insn+data caches */			\
+     "trap #0"								\
+     : /* no outputs */							\
+     : "d" (_beg), "d" (_len)						\
+     : "%d0", "%d2", "%d3");						\
+}
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#define MD_UNWIND_SUPPORT "config/m68k/linux-unwind.h"
diff --git a/gcc/config/m68k/m68020-elf.h b/gcc/config/m68k/m68020-elf.h
new file mode 100644
index 000000000..299657cdc
--- /dev/null
+++ b/gcc/config/m68k/m68020-elf.h
@@ -0,0 +1,30 @@
+/* Definitions of target machine for GNU compiler.  "naked" 68020,
+   elf object files and debugging, version.
+   Copyright (C) 1987, 1988, 1992, 1995, 1996, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This comment is here to see if it will keep Sun's cpp from dying.  */
+
+/* We need to override the default specs from elfos.h.  This suppresses the
+   loading of crt0.o by gcc's default linker spec.  For embedded targets crt0
+   now comes from the linker script.  */
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "crtbegin.o%s"
+
+/* end of m68020-elf.h */
diff --git a/gcc/config/m68k/m68k-devices.def b/gcc/config/m68k/m68k-devices.def
new file mode 100644
index 000000000..4838fb062
--- /dev/null
+++ b/gcc/config/m68k/m68k-devices.def
@@ -0,0 +1,189 @@
+/* m68k device names -*- C -*-
+   Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+   Written by CodeSourcery
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file lists each target device that we support.  It is used by
+   both C code and build scripts.
+
+   Following Freescale's lead, we group devices into families that share
+   the same core and extension units.  Devices in these families differ
+   only in the set of peripherals they provide.  We pick one device to
+   act as the representative of each family.
+
+   We further group device families into multilibs, again picking one
+   family (and its representative device) to represent each multilib.
+
+   Devices are declared using the construct:
+
+      M68K_DEVICE (NAME, ENUM_VALUE, FAMILY, MULTILIB, MICROARCH, ISA, FLAGS)
+
+   where the arguments are as follows:
+
+      NAME
+	The name of the device as a string.  This string acts as the
+	device's -mcpu argument and is guaranteed to be unique.
+
+      ENUM_VALUE
+	The associated value in the target_device enumeration.
+	This value is also guaranteed to be unique.
+
+      FAMILY
+	The NAME field of the family's representative device.
+
+      MULTILIB
+	The NAME field of the multilib's representative device.
+
+      MICROARCH
+	The class of core used by devices in this family.  The field
+	is a uarch enumeration value without the leading "u".
+
+      ISA
+	The ISA implemented by this family.  The field is
+	an m68k_isa enumeration value.
+
+      FLAGS
+	The FL_* flags that apply to this family, excluding FL_FOR_isa_*.
+	See m68k.h for the full list.
+
+   There is a bit of duplication between devices in the same family,
+   but this approach makes scripting easier.  We keep each entry on
+   a single line for the same reason.
+
+   As the compiler does not (currently) generate MAC or EMAC commands,
+   we do not need separate multilibs for cores that only differ in
+   their MAC functionality.  */
+
+/* 680x0 series processors.  */
+M68K_DEVICE ("68000", m68000,   "68000", "68000", 68000,    isa_00,    0)
+M68K_DEVICE ("68010", m68010,   "68010", "68000", 68010,    isa_10,    0)
+M68K_DEVICE ("68020", m68020,   "68020", "68020", 68020,    isa_20,    FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("68030", m68030,   "68030", "68020", 68030,    isa_20,    FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("68040", m68040,   "68040", "68040", 68040,    isa_40,    FL_MMU)
+M68K_DEVICE ("68060", m68060,   "68060", "68060", 68060,    isa_40,    FL_MMU)
+M68K_DEVICE ("68302", m68302,   "68302", "68000", 68000,    isa_00,    FL_MMU)
+M68K_DEVICE ("68332", m68332,   "68332", "cpu32", cpu32,    isa_cpu32, FL_MMU)
+M68K_DEVICE ("cpu32", cpu32,    "cpu32", "cpu32", cpu32,    isa_cpu32, FL_MMU)
+
+/* ColdFire CFV1 processor.  */
+/* For historical reasons, the 51 multilib is named 51qe.  */
+M68K_DEVICE ("51",    mcf51,    "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51ac",  mcf51ac,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51cn",  mcf51cn,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51em",  mcf51em,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP | FL_CF_MAC)
+M68K_DEVICE ("51jm",  mcf51jm,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+M68K_DEVICE ("51qe",  mcf51qe,  "51",    "51qe",  cfv1,     isa_c,     FL_CF_USP)
+
+/* ColdFire CFV2 processors.  */
+M68K_DEVICE ("5202",  mcf5202,  "5206",  "5206",  cfv2,     isa_a,     0)
+M68K_DEVICE ("5204",  mcf5204,  "5206",  "5206",  cfv2,     isa_a,     0)
+M68K_DEVICE ("5206",  mcf5206,  "5206",  "5206",  cfv2,     isa_a,     0)
+M68K_DEVICE ("5206e", mcf5206e, "5206e", "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5207",  mcf5207,  "5208",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5208",  mcf5208,  "5208",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5210a", mcf5210a, "5211a", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5211a", mcf5211a, "5211a", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5211",  mcf5211,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5212",  mcf5212,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5213",  mcf5213,  "5213",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5214",  mcf5214,  "5216",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5216",  mcf5216,  "5216",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5221x", mcf5221x, "5221x", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52221", mcf52221, "52223", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52223", mcf52223, "52223", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52230", mcf52230, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52231", mcf52231, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52232", mcf52232, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52233", mcf52233, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52234", mcf52234, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52235", mcf52235, "52235", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5224",  mcf5224,  "5225",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5225",  mcf5225,  "5225",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("52252", mcf52252, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52254", mcf52254, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52255", mcf52255, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52256", mcf52256, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52258", mcf52258, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52259", mcf52259, "52259", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52274", mcf52274, "52277", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("52277", mcf52277, "52277", "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5232",  mcf5232,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5233",  mcf5233,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5234",  mcf5234,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5235",  mcf5235,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("523x",  mcf523x,  "5235",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5249",  mcf5249,  "5249",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5250",  mcf5250,  "5250",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5253",  mcf5253,  "5253",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5270",  mcf5270,  "5271",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5271",  mcf5271,  "5271",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV)
+M68K_DEVICE ("5272",  mcf5272,  "5272",  "5206e", cfv2,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5274",  mcf5274,  "5275",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5275",  mcf5275,  "5275",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5280",  mcf5280,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5281",  mcf5281,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5282",  mcf5282,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("528x",  mcf528x,  "5282",  "5208",  cfv2,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+
+/* CFV3 processors.  */
+M68K_DEVICE ("53011", mcf53011, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53012", mcf53012, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53013", mcf53013, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53014", mcf53014, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53015", mcf53015, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53016", mcf53016, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("53017", mcf53017, "53017", "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5307",  mcf5307,  "5307",  "5307",  cfv3,     isa_a,     FL_CF_HWDIV | FL_CF_MAC)
+M68K_DEVICE ("5327",  mcf5327,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5328",  mcf5328,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5329",  mcf5329,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("532x",  mcf532x,  "5329",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5372",  mcf5372,  "5373",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("5373",  mcf5373,  "5373",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+M68K_DEVICE ("537x",  mcf537x,  "5373",  "5329",  cfv3,     isa_aplus, FL_CF_HWDIV | FL_CF_EMAC)
+
+/* CFV4/CFV4e processors.  */
+M68K_DEVICE ("5407",  mcf5407,  "5407",  "5407",  cfv4,     isa_b,     FL_CF_MAC)
+M68K_DEVICE ("54410", mcf54410, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54415", mcf54415, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54416", mcf54416, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54417", mcf54417, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54418", mcf54418, "54418", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54450", mcf54450, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54451", mcf54451, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54452", mcf54452, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54453", mcf54453, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54454", mcf54454, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("54455", mcf54455, "54455", "54455", cfv4,     isa_c,     FL_CF_HWDIV | FL_CF_USP | FL_CF_EMAC | FL_MMU | FL_UCLINUX)
+M68K_DEVICE ("5470",  mcf5470,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5471",  mcf5471,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5472",  mcf5472,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5473",  mcf5473,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5474",  mcf5474,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5475",  mcf5475,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("547x",  mcf547x,  "5475",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5480",  mcf5480,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5481",  mcf5481,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5482",  mcf5482,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5483",  mcf5483,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5484",  mcf5484,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("5485",  mcf5485,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+M68K_DEVICE ("548x",  mcf548x,  "5485",  "5475",  cfv4e,    isa_b,     FL_CF_USP | FL_CF_EMAC | FL_CF_FPU | FL_MMU)
+
+/* Fido processor.  */
+M68K_DEVICE ("fidoa", fidoa,    "cpu32", "fidoa", cpu32,    isa_cpu32, FL_FIDOA | FL_MMU)
diff --git a/gcc/config/m68k/m68k-modes.def b/gcc/config/m68k/m68k-modes.def
new file mode 100644
index 000000000..06297cad9
--- /dev/null
+++ b/gcc/config/m68k/m68k-modes.def
@@ -0,0 +1,25 @@
+/* M68k extra machine modes. 
+   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* These differ in the representation of the canonical NaN.  */
+RESET_FLOAT_FORMAT (SF, motorola_single_format);
+RESET_FLOAT_FORMAT (DF, motorola_double_format);
+
+/* 80-bit floating point (IEEE extended, in a 96-bit field) */
+FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_motorola_format);
diff --git a/gcc/config/m68k/m68k-none.h b/gcc/config/m68k/m68k-none.h
new file mode 100644
index 000000000..8e7652885
--- /dev/null
+++ b/gcc/config/m68k/m68k-none.h
@@ -0,0 +1,19 @@
+/* Definitions of target machine for GNU compiler.  "naked" 68020.
+   Copyright (C) 1994, 1996, 2003, 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
diff --git a/gcc/config/m68k/m68k-protos.h b/gcc/config/m68k/m68k-protos.h
new file mode 100644
index 000000000..ad0202630
--- /dev/null
+++ b/gcc/config/m68k/m68k-protos.h
@@ -0,0 +1,102 @@
+/* Definitions of target machine for GNU compiler.  Sun 68000/68020 version.
+   Copyright (C) 2000, 2002, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Define functions defined in aux-output.c and used in templates.  */
+
+#ifdef RTX_CODE
+extern enum m68k_function_kind m68k_get_function_kind (tree);
+extern HOST_WIDE_INT m68k_initial_elimination_offset (int from, int to);
+
+extern void split_di (rtx[], int, rtx[], rtx[]);
+
+extern bool valid_mov3q_const (HOST_WIDE_INT);
+extern const char *output_move_simode (rtx *);
+extern const char *output_move_himode (rtx *);
+extern const char *output_move_qimode (rtx *);
+extern const char *output_move_stricthi (rtx *);
+extern const char *output_move_strictqi (rtx *);
+extern const char *output_move_double (rtx *);
+extern const char *output_move_const_single (rtx *);
+extern const char *output_move_const_double (rtx *);
+extern const char *output_btst (rtx *, rtx, rtx, rtx, int);
+extern const char *output_scc_di (rtx, rtx, rtx, rtx);
+extern const char *output_addsi3 (rtx *);
+extern const char *output_andsi3 (rtx *);
+extern const char *output_iorsi3 (rtx *);
+extern const char *output_xorsi3 (rtx *);
+extern const char *output_call (rtx);
+extern const char *output_sibcall (rtx);
+extern void output_dbcc_and_branch (rtx *);
+extern int floating_exact_log2 (rtx);
+extern bool strict_low_part_peephole_ok (enum machine_mode mode, rtx first_insn, rtx target);
+
+/* Functions from m68k.c used in macros.  */
+extern int standard_68881_constant_p (rtx);
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern bool m68k_output_addr_const_extra (FILE *, rtx);
+extern void notice_update_cc (rtx, rtx);
+extern bool m68k_legitimate_base_reg_p (rtx, bool);
+extern bool m68k_legitimate_index_reg_p (rtx, bool);
+extern bool m68k_illegitimate_symbolic_constant_p (rtx);
+extern bool m68k_matches_q_p (rtx);
+extern bool m68k_matches_u_p (rtx);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern rtx m68k_legitimize_tls_address (rtx);
+extern bool m68k_tls_reference_p (rtx, bool);
+extern int valid_dbcc_comparison_p_2 (rtx, enum machine_mode);
+extern rtx m68k_libcall_value (enum machine_mode);
+extern rtx m68k_function_value (const_tree, const_tree);
+extern int emit_move_sequence (rtx *, enum machine_mode, rtx);
+extern bool m68k_movem_pattern_p (rtx, rtx, HOST_WIDE_INT, bool);
+extern const char *m68k_output_movem (rtx *, rtx, HOST_WIDE_INT, bool);
+extern void m68k_final_prescan_insn (rtx, rtx *, int);
+
+/* Functions from m68k.c used in constraints.md.  */
+extern rtx m68k_unwrap_symbol (rtx, bool);
+
+/* Functions from m68k.c used in genattrtab.  */
+#ifdef HAVE_ATTR_cpu
+extern enum attr_cpu m68k_sched_cpu;
+extern enum attr_mac m68k_sched_mac;
+
+extern enum attr_opx_type m68k_sched_attr_opx_type (rtx, int);
+extern enum attr_opy_type m68k_sched_attr_opy_type (rtx, int);
+extern enum attr_size m68k_sched_attr_size (rtx);
+extern enum attr_op_mem m68k_sched_attr_op_mem (rtx);
+extern enum attr_type m68k_sched_branch_type (rtx);
+#endif /* HAVE_ATTR_cpu */
+
+#endif /* RTX_CODE */
+
+extern bool m68k_regno_mode_ok (int, enum machine_mode);
+extern enum reg_class m68k_secondary_reload_class (enum reg_class,
+						   enum machine_mode, rtx);
+extern enum reg_class m68k_preferred_reload_class (rtx, enum reg_class);
+extern int flags_in_68881 (void);
+extern void m68k_expand_prologue (void);
+extern bool m68k_use_return_insn (void);
+extern void m68k_expand_epilogue (bool);
+extern const char *m68k_cpp_cpu_ident (const char *);
+extern const char *m68k_cpp_cpu_family (const char *);
+extern void init_68881_table (void);
+extern rtx m68k_legitimize_call_address (rtx);
+extern rtx m68k_legitimize_sibcall_address (rtx);
+extern int m68k_hard_regno_rename_ok(unsigned int, unsigned int);
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
new file mode 100644
index 000000000..e5bd0119a
--- /dev/null
+++ b/gcc/config/m68k/m68k.c
@@ -0,0 +1,6615 @@
+/* Subroutines for insn-output.c for Motorola 68000 family.
+   Copyright (C) 1987, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "function.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "expr.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "flags.h"
+#include "df.h"
+/* ??? Need to add a dependency between m68k.o and sched-int.h.  */
+#include "sched-int.h"
+#include "insn-codes.h"
+#include "ggc.h"
+
+enum reg_class regno_reg_class[] =
+{
+  DATA_REGS, DATA_REGS, DATA_REGS, DATA_REGS,
+  DATA_REGS, DATA_REGS, DATA_REGS, DATA_REGS,
+  ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  ADDR_REGS
+};
+
+
+/* The minimum number of integer registers that we want to save with the
+   movem instruction.  Using two movel instructions instead of a single
+   moveml is about 15% faster for the 68020 and 68030 at no expense in
+   code size.  */
+#define MIN_MOVEM_REGS 3
+
+/* The minimum number of floating point registers that we want to save
+   with the fmovem instruction.  */
+#define MIN_FMOVEM_REGS 1
+
+/* Structure describing stack frame layout.  */
+struct m68k_frame
+{
+  /* Stack pointer to frame pointer offset.  */
+  HOST_WIDE_INT offset;
+
+  /* Offset of FPU registers.  */
+  HOST_WIDE_INT foffset;
+
+  /* Frame size in bytes (rounded up).  */
+  HOST_WIDE_INT size;
+
+  /* Data and address register.  */
+  int reg_no;
+  unsigned int reg_mask;
+
+  /* FPU registers.  */
+  int fpu_no;
+  unsigned int fpu_mask;
+
+  /* Offsets relative to ARG_POINTER.  */
+  HOST_WIDE_INT frame_pointer_offset;
+  HOST_WIDE_INT stack_pointer_offset;
+
+  /* Function which the above information refers to.  */
+  int funcdef_no;
+};
+
+/* Current frame information calculated by m68k_compute_frame_layout().  */
+static struct m68k_frame current_frame;
+
+/* Structure describing an m68k address.
+
+   If CODE is UNKNOWN, the address is BASE + INDEX * SCALE + OFFSET,
+   with null fields evaluating to 0.  Here:
+
+   - BASE satisfies m68k_legitimate_base_reg_p
+   - INDEX satisfies m68k_legitimate_index_reg_p
+   - OFFSET satisfies m68k_legitimate_constant_address_p
+
+   INDEX is either HImode or SImode.  The other fields are SImode.
+
+   If CODE is PRE_DEC, the address is -(BASE).  If CODE is POST_INC,
+   the address is (BASE)+.  */
+struct m68k_address {
+  enum rtx_code code;
+  rtx base;
+  rtx index;
+  rtx offset;
+  int scale;
+};
+
+static int m68k_sched_adjust_cost (rtx, rtx, rtx, int);
+static int m68k_sched_issue_rate (void);
+static int m68k_sched_variable_issue (FILE *, int, rtx, int);
+static void m68k_sched_md_init_global (FILE *, int, int);
+static void m68k_sched_md_finish_global (FILE *, int);
+static void m68k_sched_md_init (FILE *, int, int);
+static void m68k_sched_dfa_pre_advance_cycle (void);
+static void m68k_sched_dfa_post_advance_cycle (void);
+static int m68k_sched_first_cycle_multipass_dfa_lookahead (void);
+
+static bool m68k_can_eliminate (const int, const int);
+static void m68k_conditional_register_usage (void);
+static bool m68k_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool m68k_handle_option (size_t, const char *, int);
+static void m68k_option_override (void);
+static void m68k_override_options_after_change (void);
+static rtx find_addr_reg (rtx);
+static const char *singlemove_string (rtx *);
+static void m68k_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+					  HOST_WIDE_INT, tree);
+static rtx m68k_struct_value_rtx (tree, int);
+static tree m68k_handle_fndecl_attribute (tree *node, tree name,
+					  tree args, int flags,
+					  bool *no_add_attrs);
+static void m68k_compute_frame_layout (void);
+static bool m68k_save_reg (unsigned int regno, bool interrupt_handler);
+static bool m68k_ok_for_sibcall_p (tree, tree);
+static bool m68k_tls_symbol_p (rtx);
+static rtx m68k_legitimize_address (rtx, rtx, enum machine_mode);
+static bool m68k_rtx_costs (rtx, int, int, int *, bool);
+#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
+static bool m68k_return_in_memory (const_tree, const_tree);
+#endif
+static void m68k_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static void m68k_trampoline_init (rtx, tree, rtx);
+static int m68k_return_pops_args (tree, tree, int);
+static rtx m68k_delegitimize_address (rtx);
+static void m68k_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static rtx m68k_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+
+
+/* Specify the identification number of the library being built */
+const char *m68k_library_id_string = "_current_shared_library_a5_offset_";
+
+/* Initialize the GCC target structure.  */
+
+#if INT_OP_GROUP == INT_OP_DOT_WORD
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#endif
+
+#if INT_OP_GROUP == INT_OP_NO_DOT
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP "\tbyte\t"
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\tshort\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\tlong\t"
+#endif
+
+#if INT_OP_GROUP == INT_OP_DC
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP "\tdc.b\t"
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\tdc.w\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\tdc.l\t"
+#endif
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK m68k_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START_APP_OFF
+#define TARGET_ASM_FILE_START_APP_OFF true
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS m68k_legitimize_address
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST m68k_sched_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE m68k_sched_issue_rate
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE m68k_sched_variable_issue
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL m68k_sched_md_init_global
+
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL m68k_sched_md_finish_global
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT m68k_sched_md_init
+
+#undef TARGET_SCHED_DFA_PRE_ADVANCE_CYCLE
+#define TARGET_SCHED_DFA_PRE_ADVANCE_CYCLE m68k_sched_dfa_pre_advance_cycle
+
+#undef TARGET_SCHED_DFA_POST_ADVANCE_CYCLE
+#define TARGET_SCHED_DFA_POST_ADVANCE_CYCLE m68k_sched_dfa_post_advance_cycle
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD	\
+  m68k_sched_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION m68k_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE m68k_option_override
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE m68k_override_options_after_change
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS m68k_rtx_costs
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE m68k_attribute_table
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX m68k_struct_value_rtx
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM m68k_illegitimate_symbolic_constant_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL m68k_ok_for_sibcall_p
+
+#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY m68k_return_in_memory
+#endif
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (true)
+
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL m68k_output_dwarf_dtprel
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	m68k_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE m68k_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE m68k_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT m68k_trampoline_init
+
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS m68k_return_pops_args
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS m68k_delegitimize_address
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG m68k_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE m68k_function_arg_advance
+
+static const struct attribute_spec m68k_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt", 0, 0, true,  false, false, m68k_handle_fndecl_attribute },
+  { "interrupt_handler", 0, 0, true,  false, false, m68k_handle_fndecl_attribute },
+  { "interrupt_thread", 0, 0, true,  false, false, m68k_handle_fndecl_attribute },
+  { NULL,                0, 0, false, false, false, NULL }
+};
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Base flags for 68k ISAs.  */
+#define FL_FOR_isa_00    FL_ISA_68000
+#define FL_FOR_isa_10    (FL_FOR_isa_00 | FL_ISA_68010)
+/* FL_68881 controls the default setting of -m68881.  gcc has traditionally
+   generated 68881 code for 68020 and 68030 targets unless explicitly told
+   not to.  */
+#define FL_FOR_isa_20    (FL_FOR_isa_10 | FL_ISA_68020 \
+			  | FL_BITFIELD | FL_68881)
+#define FL_FOR_isa_40    (FL_FOR_isa_20 | FL_ISA_68040)
+#define FL_FOR_isa_cpu32 (FL_FOR_isa_10 | FL_ISA_68020)
+
+/* Base flags for ColdFire ISAs.  */
+#define FL_FOR_isa_a     (FL_COLDFIRE | FL_ISA_A)
+#define FL_FOR_isa_aplus (FL_FOR_isa_a | FL_ISA_APLUS | FL_CF_USP)
+/* Note ISA_B doesn't necessarily include USP (user stack pointer) support.  */
+#define FL_FOR_isa_b     (FL_FOR_isa_a | FL_ISA_B | FL_CF_HWDIV)
+/* ISA_C is not upwardly compatible with ISA_B.  */
+#define FL_FOR_isa_c     (FL_FOR_isa_a | FL_ISA_C | FL_CF_USP)
+
+enum m68k_isa
+{
+  /* Traditional 68000 instruction sets.  */
+  isa_00,
+  isa_10,
+  isa_20,
+  isa_40,
+  isa_cpu32,
+  /* ColdFire instruction set variants.  */
+  isa_a,
+  isa_aplus,
+  isa_b,
+  isa_c,
+  isa_max
+};
+
+/* Information about one of the -march, -mcpu or -mtune arguments.  */
+struct m68k_target_selection
+{
+  /* The argument being described.  */
+  const char *name;
+
+  /* For -mcpu, this is the device selected by the option.
+     For -mtune and -march, it is a representative device
+     for the microarchitecture or ISA respectively.  */
+  enum target_device device;
+
+  /* The M68K_DEVICE fields associated with DEVICE.  See the comment
+     in m68k-devices.def for details.  FAMILY is only valid for -mcpu.  */
+  const char *family;
+  enum uarch_type microarch;
+  enum m68k_isa isa;
+  unsigned long flags;
+};
+
+/* A list of all devices in m68k-devices.def.  Used for -mcpu selection.  */
+static const struct m68k_target_selection all_devices[] =
+{
+#define M68K_DEVICE(NAME,ENUM_VALUE,FAMILY,MULTILIB,MICROARCH,ISA,FLAGS) \
+  { NAME, ENUM_VALUE, FAMILY, u##MICROARCH, ISA, FLAGS | FL_FOR_##ISA },
+#include "m68k-devices.def"
+#undef M68K_DEVICE
+  { NULL, unk_device, NULL, unk_arch, isa_max, 0 }
+};
+
+/* A list of all ISAs, mapping each one to a representative device.
+   Used for -march selection.  */
+static const struct m68k_target_selection all_isas[] =
+{
+  { "68000",    m68000,     NULL,  u68000,   isa_00,    FL_FOR_isa_00 },
+  { "68010",    m68010,     NULL,  u68010,   isa_10,    FL_FOR_isa_10 },
+  { "68020",    m68020,     NULL,  u68020,   isa_20,    FL_FOR_isa_20 },
+  { "68030",    m68030,     NULL,  u68030,   isa_20,    FL_FOR_isa_20 },
+  { "68040",    m68040,     NULL,  u68040,   isa_40,    FL_FOR_isa_40 },
+  { "68060",    m68060,     NULL,  u68060,   isa_40,    FL_FOR_isa_40 },
+  { "cpu32",    cpu32,      NULL,  ucpu32,   isa_20,    FL_FOR_isa_cpu32 },
+  { "isaa",     mcf5206e,   NULL,  ucfv2,    isa_a,     (FL_FOR_isa_a
+							 | FL_CF_HWDIV) },
+  { "isaaplus", mcf5271,    NULL,  ucfv2,    isa_aplus, (FL_FOR_isa_aplus
+							 | FL_CF_HWDIV) },
+  { "isab",     mcf5407,    NULL,  ucfv4,    isa_b,     FL_FOR_isa_b },
+  { "isac",     unk_device, NULL,  ucfv4,    isa_c,     (FL_FOR_isa_c
+							 | FL_CF_HWDIV) },
+  { NULL,       unk_device, NULL,  unk_arch, isa_max,   0 }
+};
+
+/* A list of all microarchitectures, mapping each one to a representative
+   device.  Used for -mtune selection.  */
+static const struct m68k_target_selection all_microarchs[] =
+{
+  { "68000",    m68000,     NULL,  u68000,    isa_00,  FL_FOR_isa_00 },
+  { "68010",    m68010,     NULL,  u68010,    isa_10,  FL_FOR_isa_10 },
+  { "68020",    m68020,     NULL,  u68020,    isa_20,  FL_FOR_isa_20 },
+  { "68020-40", m68020,     NULL,  u68020_40, isa_20,  FL_FOR_isa_20 },
+  { "68020-60", m68020,     NULL,  u68020_60, isa_20,  FL_FOR_isa_20 },
+  { "68030",    m68030,     NULL,  u68030,    isa_20,  FL_FOR_isa_20 },
+  { "68040",    m68040,     NULL,  u68040,    isa_40,  FL_FOR_isa_40 },
+  { "68060",    m68060,     NULL,  u68060,    isa_40,  FL_FOR_isa_40 },
+  { "cpu32",    cpu32,      NULL,  ucpu32,    isa_20,  FL_FOR_isa_cpu32 },
+  { "cfv1",     mcf51qe,    NULL,  ucfv1,     isa_c,   FL_FOR_isa_c },
+  { "cfv2",     mcf5206,    NULL,  ucfv2,     isa_a,   FL_FOR_isa_a },
+  { "cfv3",     mcf5307,    NULL,  ucfv3,     isa_a,   (FL_FOR_isa_a
+							| FL_CF_HWDIV) },
+  { "cfv4",     mcf5407,    NULL,  ucfv4,     isa_b,   FL_FOR_isa_b },
+  { "cfv4e",    mcf547x,    NULL,  ucfv4e,    isa_b,   (FL_FOR_isa_b
+							| FL_CF_USP
+							| FL_CF_EMAC
+							| FL_CF_FPU) },
+  { NULL,       unk_device, NULL,  unk_arch,  isa_max, 0 }
+};
+
+/* The entries associated with the -mcpu, -march and -mtune settings,
+   or null for options that have not been used.  */
+const struct m68k_target_selection *m68k_cpu_entry;
+const struct m68k_target_selection *m68k_arch_entry;
+const struct m68k_target_selection *m68k_tune_entry;
+
+/* Which CPU we are generating code for.  */
+enum target_device m68k_cpu;
+
+/* Which microarchitecture to tune for.  */
+enum uarch_type m68k_tune;
+
+/* Which FPU to use.  */
+enum fpu_type m68k_fpu;
+
+/* The set of FL_* flags that apply to the target processor.  */
+unsigned int m68k_cpu_flags;
+
+/* The set of FL_* flags that apply to the processor to be tuned for.  */
+unsigned int m68k_tune_flags;
+
+/* Asm templates for calling or jumping to an arbitrary symbolic address,
+   or NULL if such calls or jumps are not supported.  The address is held
+   in operand 0.  */
+const char *m68k_symbolic_call;
+const char *m68k_symbolic_jump;
+
+/* Enum variable that corresponds to m68k_symbolic_call values.  */
+enum M68K_SYMBOLIC_CALL m68k_symbolic_call_var;
+
+
+/* See whether TABLE has an entry with name NAME.  Return true and
+   store the entry in *ENTRY if so, otherwise return false and
+   leave *ENTRY alone.  */
+
+static bool
+m68k_find_selection (const struct m68k_target_selection **entry,
+		     const struct m68k_target_selection *table,
+		     const char *name)
+{
+  size_t i;
+
+  for (i = 0; table[i].name; i++)
+    if (strcmp (table[i].name, name) == 0)
+      {
+	*entry = table + i;
+	return true;
+      }
+  return false;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+m68k_handle_option (size_t code, const char *arg, int value)
+{
+  switch (code)
+    {
+    case OPT_march_:
+      return m68k_find_selection (&m68k_arch_entry, all_isas, arg);
+
+    case OPT_mcpu_:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, arg);
+
+    case OPT_mtune_:
+      return m68k_find_selection (&m68k_tune_entry, all_microarchs, arg);
+
+    case OPT_m5200:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "5206");
+
+    case OPT_m5206e:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "5206e");
+
+    case OPT_m528x:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "528x");
+
+    case OPT_m5307:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "5307");
+
+    case OPT_m5407:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "5407");
+
+    case OPT_mcfv4e:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "547x");
+
+    case OPT_m68000:
+    case OPT_mc68000:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68000");
+
+    case OPT_m68010:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68010");
+
+    case OPT_m68020:
+    case OPT_mc68020:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68020");
+
+    case OPT_m68020_40:
+      return (m68k_find_selection (&m68k_tune_entry, all_microarchs,
+				   "68020-40")
+	      && m68k_find_selection (&m68k_cpu_entry, all_devices, "68020"));
+
+    case OPT_m68020_60:
+      return (m68k_find_selection (&m68k_tune_entry, all_microarchs,
+				   "68020-60")
+	      && m68k_find_selection (&m68k_cpu_entry, all_devices, "68020"));
+
+    case OPT_m68030:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68030");
+
+    case OPT_m68040:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68040");
+
+    case OPT_m68060:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68060");
+
+    case OPT_m68302:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68302");
+
+    case OPT_m68332:
+    case OPT_mcpu32:
+      return m68k_find_selection (&m68k_cpu_entry, all_devices, "68332");
+
+    case OPT_mshared_library_id_:
+      if (value > MAX_LIBRARY_ID)
+	error ("-mshared-library-id=%s is not between 0 and %d",
+	       arg, MAX_LIBRARY_ID);
+      else
+        {
+	  char *tmp;
+	  asprintf (&tmp, "%d", (value * -4) - 4);
+	  m68k_library_id_string = tmp;
+	}
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+m68k_option_override (void)
+{
+  const struct m68k_target_selection *entry;
+  unsigned long target_mask;
+
+  /* User can choose:
+
+     -mcpu=
+     -march=
+     -mtune=
+
+     -march=ARCH should generate code that runs any processor
+     implementing architecture ARCH.  -mcpu=CPU should override -march
+     and should generate code that runs on processor CPU, making free
+     use of any instructions that CPU understands.  -mtune=UARCH applies
+     on top of -mcpu or -march and optimizes the code for UARCH.  It does
+     not change the target architecture.  */
+  if (m68k_cpu_entry)
+    {
+      /* Complain if the -march setting is for a different microarchitecture,
+	 or includes flags that the -mcpu setting doesn't.  */
+      if (m68k_arch_entry
+	  && (m68k_arch_entry->microarch != m68k_cpu_entry->microarch
+	      || (m68k_arch_entry->flags & ~m68k_cpu_entry->flags) != 0))
+	warning (0, "-mcpu=%s conflicts with -march=%s",
+		 m68k_cpu_entry->name, m68k_arch_entry->name);
+
+      entry = m68k_cpu_entry;
+    }
+  else
+    entry = m68k_arch_entry;
+
+  if (!entry)
+    entry = all_devices + TARGET_CPU_DEFAULT;
+
+  m68k_cpu_flags = entry->flags;
+
+  /* Use the architecture setting to derive default values for
+     certain flags.  */
+  target_mask = 0;
+
+  /* ColdFire is lenient about alignment.  */
+  if (!TARGET_COLDFIRE)
+    target_mask |= MASK_STRICT_ALIGNMENT;
+
+  if ((m68k_cpu_flags & FL_BITFIELD) != 0)
+    target_mask |= MASK_BITFIELD;
+  if ((m68k_cpu_flags & FL_CF_HWDIV) != 0)
+    target_mask |= MASK_CF_HWDIV;
+  if ((m68k_cpu_flags & (FL_68881 | FL_CF_FPU)) != 0)
+    target_mask |= MASK_HARD_FLOAT;
+  target_flags |= target_mask & ~target_flags_explicit;
+
+  /* Set the directly-usable versions of the -mcpu and -mtune settings.  */
+  m68k_cpu = entry->device;
+  if (m68k_tune_entry)
+    {
+      m68k_tune = m68k_tune_entry->microarch;
+      m68k_tune_flags = m68k_tune_entry->flags;
+    }
+#ifdef M68K_DEFAULT_TUNE
+  else if (!m68k_cpu_entry && !m68k_arch_entry)
+    {
+      enum target_device dev;
+      dev = all_microarchs[M68K_DEFAULT_TUNE].device;
+      m68k_tune_flags = all_devices[dev]->flags;
+    }
+#endif
+  else
+    {
+      m68k_tune = entry->microarch;
+      m68k_tune_flags = entry->flags;
+    }
+
+  /* Set the type of FPU.  */
+  m68k_fpu = (!TARGET_HARD_FLOAT ? FPUTYPE_NONE
+	      : (m68k_cpu_flags & FL_COLDFIRE) != 0 ? FPUTYPE_COLDFIRE
+	      : FPUTYPE_68881);
+
+  /* Sanity check to ensure that msep-data and mid-sahred-library are not
+   * both specified together.  Doing so simply doesn't make sense.
+   */
+  if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
+    error ("cannot specify both -msep-data and -mid-shared-library");
+
+  /* If we're generating code for a separate A5 relative data segment,
+   * we've got to enable -fPIC as well.  This might be relaxable to
+   * -fpic but it hasn't been tested properly.
+   */
+  if (TARGET_SEP_DATA || TARGET_ID_SHARED_LIBRARY)
+    flag_pic = 2;
+
+  /* -mpcrel -fPIC uses 32-bit pc-relative displacements.  Raise an
+     error if the target does not support them.  */
+  if (TARGET_PCREL && !TARGET_68020 && flag_pic == 2)
+    error ("-mpcrel -fPIC is not currently supported on selected cpu");
+
+  /* ??? A historic way of turning on pic, or is this intended to
+     be an embedded thing that doesn't have the same name binding
+     significance that it does on hosted ELF systems?  */
+  if (TARGET_PCREL && flag_pic == 0)
+    flag_pic = 1;
+
+  if (!flag_pic)
+    {
+      m68k_symbolic_call_var = M68K_SYMBOLIC_CALL_JSR;
+
+      m68k_symbolic_jump = "jra %a0";
+    }
+  else if (TARGET_ID_SHARED_LIBRARY)
+    /* All addresses must be loaded from the GOT.  */
+    ;
+  else if (TARGET_68020 || TARGET_ISAB || TARGET_ISAC)
+    {
+      if (TARGET_PCREL)
+	m68k_symbolic_call_var = M68K_SYMBOLIC_CALL_BSR_C;
+      else
+	m68k_symbolic_call_var = M68K_SYMBOLIC_CALL_BSR_P;
+
+      if (TARGET_ISAC)
+	/* No unconditional long branch */;
+      else if (TARGET_PCREL)
+	m68k_symbolic_jump = "bra%.l %c0";
+      else
+	m68k_symbolic_jump = "bra%.l %p0";
+      /* Turn off function cse if we are doing PIC.  We always want
+	 function call to be done as `bsr foo@PLTPC'.  */
+      /* ??? It's traditional to do this for -mpcrel too, but it isn't
+	 clear how intentional that is.  */
+      flag_no_function_cse = 1;
+    }
+
+  switch (m68k_symbolic_call_var)
+    {
+    case M68K_SYMBOLIC_CALL_JSR:
+      m68k_symbolic_call = "jsr %a0";
+      break;
+
+    case M68K_SYMBOLIC_CALL_BSR_C:
+      m68k_symbolic_call = "bsr%.l %c0";
+      break;
+
+    case M68K_SYMBOLIC_CALL_BSR_P:
+      m68k_symbolic_call = "bsr%.l %p0";
+      break;
+
+    case M68K_SYMBOLIC_CALL_NONE:
+      gcc_assert (m68k_symbolic_call == NULL);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+#ifndef ASM_OUTPUT_ALIGN_WITH_NOP
+  if (align_labels > 2)
+    {
+      warning (0, "-falign-labels=%d is not supported", align_labels);
+      align_labels = 0;
+    }
+  if (align_loops > 2)
+    {
+      warning (0, "-falign-loops=%d is not supported", align_loops);
+      align_loops = 0;
+    }
+#endif
+
+  SUBTARGET_OVERRIDE_OPTIONS;
+
+  /* Setup scheduling options.  */
+  if (TUNE_CFV1)
+    m68k_sched_cpu = CPU_CFV1;
+  else if (TUNE_CFV2)
+    m68k_sched_cpu = CPU_CFV2;
+  else if (TUNE_CFV3)
+    m68k_sched_cpu = CPU_CFV3;
+  else if (TUNE_CFV4)
+    m68k_sched_cpu = CPU_CFV4;
+  else
+    {
+      m68k_sched_cpu = CPU_UNKNOWN;
+      flag_schedule_insns = 0;
+      flag_schedule_insns_after_reload = 0;
+      flag_modulo_sched = 0;
+    }
+
+  if (m68k_sched_cpu != CPU_UNKNOWN)
+    {
+      if ((m68k_cpu_flags & (FL_CF_EMAC | FL_CF_EMAC_B)) != 0)
+	m68k_sched_mac = MAC_CF_EMAC;
+      else if ((m68k_cpu_flags & FL_CF_MAC) != 0)
+	m68k_sched_mac = MAC_CF_MAC;
+      else
+	m68k_sched_mac = MAC_NO;
+    }
+}
+
+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
+
+static void
+m68k_override_options_after_change (void)
+{
+  if (m68k_sched_cpu == CPU_UNKNOWN)
+    {
+      flag_schedule_insns = 0;
+      flag_schedule_insns_after_reload = 0;
+      flag_modulo_sched = 0;
+    }
+}
+
+/* Generate a macro of the form __mPREFIX_cpu_NAME, where PREFIX is the
+   given argument and NAME is the argument passed to -mcpu.  Return NULL
+   if -mcpu was not passed.  */
+
+const char *
+m68k_cpp_cpu_ident (const char *prefix)
+{
+  if (!m68k_cpu_entry)
+    return NULL;
+  return concat ("__m", prefix, "_cpu_", m68k_cpu_entry->name, NULL);
+}
+
+/* Generate a macro of the form __mPREFIX_family_NAME, where PREFIX is the
+   given argument and NAME is the name of the representative device for
+   the -mcpu argument's family.  Return NULL if -mcpu was not passed.  */
+
+const char *
+m68k_cpp_cpu_family (const char *prefix)
+{
+  if (!m68k_cpu_entry)
+    return NULL;
+  return concat ("__m", prefix, "_family_", m68k_cpu_entry->family, NULL);
+}
+
+/* Return m68k_fk_interrupt_handler if FUNC has an "interrupt" or
+   "interrupt_handler" attribute and interrupt_thread if FUNC has an
+   "interrupt_thread" attribute.  Otherwise, return
+   m68k_fk_normal_function.  */
+
+enum m68k_function_kind
+m68k_get_function_kind (tree func)
+{
+  tree a;
+
+  gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
+  
+  a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    return m68k_fk_interrupt_handler;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    return m68k_fk_interrupt_handler;
+
+  a = lookup_attribute ("interrupt_thread", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    return m68k_fk_interrupt_thread;
+
+  return m68k_fk_normal_function;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+m68k_handle_fndecl_attribute (tree *node, tree name,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  if (m68k_get_function_kind (*node) != m68k_fk_normal_function)
+    {
+      error ("multiple interrupt attributes not allowed");
+      *no_add_attrs = true;
+    }
+
+  if (!TARGET_FIDOA
+      && !strcmp (IDENTIFIER_POINTER (name), "interrupt_thread"))
+    {
+      error ("interrupt_thread is available only on fido");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static void
+m68k_compute_frame_layout (void)
+{
+  int regno, saved;
+  unsigned int mask;
+  enum m68k_function_kind func_kind =
+    m68k_get_function_kind (current_function_decl);
+  bool interrupt_handler = func_kind == m68k_fk_interrupt_handler;
+  bool interrupt_thread = func_kind == m68k_fk_interrupt_thread;
+
+  /* Only compute the frame once per function.
+     Don't cache information until reload has been completed.  */
+  if (current_frame.funcdef_no == current_function_funcdef_no
+      && reload_completed)
+    return;
+
+  current_frame.size = (get_frame_size () + 3) & -4;
+
+  mask = saved = 0;
+
+  /* Interrupt thread does not need to save any register.  */
+  if (!interrupt_thread)
+    for (regno = 0; regno < 16; regno++)
+      if (m68k_save_reg (regno, interrupt_handler))
+	{
+	  mask |= 1 << (regno - D0_REG);
+	  saved++;
+	}
+  current_frame.offset = saved * 4;
+  current_frame.reg_no = saved;
+  current_frame.reg_mask = mask;
+
+  current_frame.foffset = 0;
+  mask = saved = 0;
+  if (TARGET_HARD_FLOAT)
+    {
+      /* Interrupt thread does not need to save any register.  */
+      if (!interrupt_thread)
+	for (regno = 16; regno < 24; regno++)
+	  if (m68k_save_reg (regno, interrupt_handler))
+	    {
+	      mask |= 1 << (regno - FP0_REG);
+	      saved++;
+	    }
+      current_frame.foffset = saved * TARGET_FP_REG_SIZE;
+      current_frame.offset += current_frame.foffset;
+    }
+  current_frame.fpu_no = saved;
+  current_frame.fpu_mask = mask;
+
+  /* Remember what function this frame refers to.  */
+  current_frame.funcdef_no = current_function_funcdef_no;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+m68k_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+HOST_WIDE_INT
+m68k_initial_elimination_offset (int from, int to)
+{
+  int argptr_offset;
+  /* The arg pointer points 8 bytes before the start of the arguments,
+     as defined by FIRST_PARM_OFFSET.  This makes it coincident with the
+     frame pointer in most frames.  */
+  argptr_offset = frame_pointer_needed ? 0 : UNITS_PER_WORD;
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return argptr_offset;
+
+  m68k_compute_frame_layout ();
+
+  gcc_assert (to == STACK_POINTER_REGNUM);
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      return current_frame.offset + current_frame.size - argptr_offset;
+    case FRAME_POINTER_REGNUM:
+      return current_frame.offset + current_frame.size;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Refer to the array `regs_ever_live' to determine which registers
+   to save; `regs_ever_live[I]' is nonzero if register number I
+   is ever used in the function.  This function is responsible for
+   knowing which registers should not be saved even if used.
+   Return true if we need to save REGNO.  */
+
+static bool
+m68k_save_reg (unsigned int regno, bool interrupt_handler)
+{
+  if (flag_pic && regno == PIC_REG)
+    {
+      if (crtl->saves_all_registers)
+	return true;
+      if (crtl->uses_pic_offset_table)
+	return true;
+      /* Reload may introduce constant pool references into a function
+	 that thitherto didn't need a PIC register.  Note that the test
+	 above will not catch that case because we will only set
+	 crtl->uses_pic_offset_table when emitting
+	 the address reloads.  */
+      if (crtl->uses_const_pool)
+	return true;
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      for (i = 0; ; i++)
+	{
+	  unsigned int test = EH_RETURN_DATA_REGNO (i);
+	  if (test == INVALID_REGNUM)
+	    break;
+	  if (test == regno)
+	    return true;
+	}
+    }
+
+  /* Fixed regs we never touch.  */
+  if (fixed_regs[regno])
+    return false;
+
+  /* The frame pointer (if it is such) is handled specially.  */
+  if (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return false;
+
+  /* Interrupt handlers must also save call_used_regs
+     if they are live or when calling nested functions.  */
+  if (interrupt_handler)
+    {
+      if (df_regs_ever_live_p (regno))
+	return true;
+
+      if (!current_function_is_leaf && call_used_regs[regno])
+	return true;
+    }
+
+  /* Never need to save registers that aren't touched.  */
+  if (!df_regs_ever_live_p (regno))
+    return false;
+
+  /* Otherwise save everything that isn't call-clobbered.  */
+  return !call_used_regs[regno];
+}
+
+/* Emit RTL for a MOVEM or FMOVEM instruction.  BASE + OFFSET represents
+   the lowest memory address.  COUNT is the number of registers to be
+   moved, with register REGNO + I being moved if bit I of MASK is set.
+   STORE_P specifies the direction of the move and ADJUST_STACK_P says
+   whether or not this is pre-decrement (if STORE_P) or post-increment
+   (if !STORE_P) operation.  */
+
+static rtx
+m68k_emit_movem (rtx base, HOST_WIDE_INT offset,
+		 unsigned int count, unsigned int regno,
+		 unsigned int mask, bool store_p, bool adjust_stack_p)
+{
+  int i;
+  rtx body, addr, src, operands[2];
+  enum machine_mode mode;
+
+  body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (adjust_stack_p + count));
+  mode = reg_raw_mode[regno];
+  i = 0;
+
+  if (adjust_stack_p)
+    {
+      src = plus_constant (base, (count
+				  * GET_MODE_SIZE (mode)
+				  * (HOST_WIDE_INT) (store_p ? -1 : 1)));
+      XVECEXP (body, 0, i++) = gen_rtx_SET (VOIDmode, base, src);
+    }
+
+  for (; mask != 0; mask >>= 1, regno++)
+    if (mask & 1)
+      {
+	addr = plus_constant (base, offset);
+	operands[!store_p] = gen_frame_mem (mode, addr);
+	operands[store_p] = gen_rtx_REG (mode, regno);
+	XVECEXP (body, 0, i++)
+	  = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
+	offset += GET_MODE_SIZE (mode);
+      }
+  gcc_assert (i == XVECLEN (body, 0));
+
+  return emit_insn (body);
+}
+
+/* Make INSN a frame-related instruction.  */
+
+static void
+m68k_set_frame_related (rtx insn)
+{
+  rtx body;
+  int i;
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  body = PATTERN (insn);
+  if (GET_CODE (body) == PARALLEL)
+    for (i = 0; i < XVECLEN (body, 0); i++)
+      RTX_FRAME_RELATED_P (XVECEXP (body, 0, i)) = 1;
+}
+
+/* Emit RTL for the "prologue" define_expand.  */
+
+void
+m68k_expand_prologue (void)
+{
+  HOST_WIDE_INT fsize_with_regs;
+  rtx limit, src, dest;
+
+  m68k_compute_frame_layout ();
+
+  /* If the stack limit is a symbol, we can check it here,
+     before actually allocating the space.  */
+  if (crtl->limit_stack
+      && GET_CODE (stack_limit_rtx) == SYMBOL_REF)
+    {
+      limit = plus_constant (stack_limit_rtx, current_frame.size + 4);
+      if (!LEGITIMATE_CONSTANT_P (limit))
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, D0_REG), limit);
+	  limit = gen_rtx_REG (Pmode, D0_REG);
+	}
+      emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode,
+					    stack_pointer_rtx, limit),
+			       stack_pointer_rtx, limit,
+			       const1_rtx));
+    }
+
+  fsize_with_regs = current_frame.size;
+  if (TARGET_COLDFIRE)
+    {
+      /* ColdFire's move multiple instructions do not allow pre-decrement
+	 addressing.  Add the size of movem saves to the initial stack
+	 allocation instead.  */
+      if (current_frame.reg_no >= MIN_MOVEM_REGS)
+	fsize_with_regs += current_frame.reg_no * GET_MODE_SIZE (SImode);
+      if (current_frame.fpu_no >= MIN_FMOVEM_REGS)
+	fsize_with_regs += current_frame.fpu_no * GET_MODE_SIZE (DFmode);
+    }
+
+  if (frame_pointer_needed)
+    {
+      if (fsize_with_regs == 0 && TUNE_68040)
+	{
+	  /* On the 68040, two separate moves are faster than link.w 0.  */
+	  dest = gen_frame_mem (Pmode,
+				gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+	  m68k_set_frame_related (emit_move_insn (dest, frame_pointer_rtx));
+	  m68k_set_frame_related (emit_move_insn (frame_pointer_rtx,
+						  stack_pointer_rtx));
+	}
+      else if (fsize_with_regs < 0x8000 || TARGET_68020)
+	m68k_set_frame_related
+	  (emit_insn (gen_link (frame_pointer_rtx,
+				GEN_INT (-4 - fsize_with_regs))));
+      else
+ 	{
+	  m68k_set_frame_related
+	    (emit_insn (gen_link (frame_pointer_rtx, GEN_INT (-4))));
+	  m68k_set_frame_related
+	    (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-fsize_with_regs))));
+	}
+
+      /* If the frame pointer is needed, emit a special barrier that
+	 will prevent the scheduler from moving stores to the frame
+	 before the stack adjustment.  */
+      emit_insn (gen_stack_tie (stack_pointer_rtx, frame_pointer_rtx));
+    }
+  else if (fsize_with_regs != 0)
+    m68k_set_frame_related
+      (emit_insn (gen_addsi3 (stack_pointer_rtx,
+			      stack_pointer_rtx,
+			      GEN_INT (-fsize_with_regs))));
+
+  if (current_frame.fpu_mask)
+    {
+      gcc_assert (current_frame.fpu_no >= MIN_FMOVEM_REGS);
+      if (TARGET_68881)
+	m68k_set_frame_related
+	  (m68k_emit_movem (stack_pointer_rtx,
+			    current_frame.fpu_no * -GET_MODE_SIZE (XFmode),
+			    current_frame.fpu_no, FP0_REG,
+			    current_frame.fpu_mask, true, true));
+      else
+	{
+	  int offset;
+
+	  /* If we're using moveml to save the integer registers,
+	     the stack pointer will point to the bottom of the moveml
+	     save area.  Find the stack offset of the first FP register.  */
+	  if (current_frame.reg_no < MIN_MOVEM_REGS)
+	    offset = 0;
+	  else
+	    offset = current_frame.reg_no * GET_MODE_SIZE (SImode);
+	  m68k_set_frame_related
+	    (m68k_emit_movem (stack_pointer_rtx, offset,
+			      current_frame.fpu_no, FP0_REG,
+			      current_frame.fpu_mask, true, false));
+	}
+    }
+
+  /* If the stack limit is not a symbol, check it here.
+     This has the disadvantage that it may be too late...  */
+  if (crtl->limit_stack)
+    {
+      if (REG_P (stack_limit_rtx))
+        emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode, stack_pointer_rtx,
+					      stack_limit_rtx),
+			         stack_pointer_rtx, stack_limit_rtx,
+			         const1_rtx));
+
+      else if (GET_CODE (stack_limit_rtx) != SYMBOL_REF)
+	warning (0, "stack limit expression is not supported");
+    }
+
+  if (current_frame.reg_no < MIN_MOVEM_REGS)
+    {
+      /* Store each register separately in the same order moveml does.  */
+      int i;
+
+      for (i = 16; i-- > 0; )
+	if (current_frame.reg_mask & (1 << i))
+	  {
+	    src = gen_rtx_REG (SImode, D0_REG + i);
+	    dest = gen_frame_mem (SImode,
+				  gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+	    m68k_set_frame_related (emit_insn (gen_movsi (dest, src)));
+	  }
+    }
+  else
+    {
+      if (TARGET_COLDFIRE)
+	/* The required register save space has already been allocated.
+	   The first register should be stored at (%sp).  */
+	m68k_set_frame_related
+	  (m68k_emit_movem (stack_pointer_rtx, 0,
+			    current_frame.reg_no, D0_REG,
+			    current_frame.reg_mask, true, false));
+      else
+	m68k_set_frame_related
+	  (m68k_emit_movem (stack_pointer_rtx,
+			    current_frame.reg_no * -GET_MODE_SIZE (SImode),
+			    current_frame.reg_no, D0_REG,
+			    current_frame.reg_mask, true, true));
+    }
+
+  if (!TARGET_SEP_DATA
+      && crtl->uses_pic_offset_table)
+    emit_insn (gen_load_got (pic_offset_table_rtx));
+}
+
+/* Return true if a simple (return) instruction is sufficient for this
+   instruction (i.e. if no epilogue is needed).  */
+
+bool
+m68k_use_return_insn (void)
+{
+  if (!reload_completed || frame_pointer_needed || get_frame_size () != 0)
+    return false;
+
+  m68k_compute_frame_layout ();
+  return current_frame.offset == 0;
+}
+
+/* Emit RTL for the "epilogue" or "sibcall_epilogue" define_expand;
+   SIBCALL_P says which.
+
+   The function epilogue should not depend on the current stack pointer!
+   It should use the frame pointer only, if there is a frame pointer.
+   This is mandatory because of alloca; we also take advantage of it to
+   omit stack adjustments before returning.  */
+
+void
+m68k_expand_epilogue (bool sibcall_p)
+{
+  HOST_WIDE_INT fsize, fsize_with_regs;
+  bool big, restore_from_sp;
+
+  m68k_compute_frame_layout ();
+
+  fsize = current_frame.size;
+  big = false;
+  restore_from_sp = false;
+
+  /* FIXME : current_function_is_leaf below is too strong.
+     What we really need to know there is if there could be pending
+     stack adjustment needed at that point.  */
+  restore_from_sp = (!frame_pointer_needed
+		     || (!cfun->calls_alloca
+			 && current_function_is_leaf));
+
+  /* fsize_with_regs is the size we need to adjust the sp when
+     popping the frame.  */
+  fsize_with_regs = fsize;
+  if (TARGET_COLDFIRE && restore_from_sp)
+    {
+      /* ColdFire's move multiple instructions do not allow post-increment
+	 addressing.  Add the size of movem loads to the final deallocation
+	 instead.  */
+      if (current_frame.reg_no >= MIN_MOVEM_REGS)
+	fsize_with_regs += current_frame.reg_no * GET_MODE_SIZE (SImode);
+      if (current_frame.fpu_no >= MIN_FMOVEM_REGS)
+	fsize_with_regs += current_frame.fpu_no * GET_MODE_SIZE (DFmode);
+    }
+
+  if (current_frame.offset + fsize >= 0x8000
+      && !restore_from_sp
+      && (current_frame.reg_mask || current_frame.fpu_mask))
+    {
+      if (TARGET_COLDFIRE
+	  && (current_frame.reg_no >= MIN_MOVEM_REGS
+	      || current_frame.fpu_no >= MIN_FMOVEM_REGS))
+	{
+	  /* ColdFire's move multiple instructions do not support the
+	     (d8,Ax,Xi) addressing mode, so we're as well using a normal
+	     stack-based restore.  */
+	  emit_move_insn (gen_rtx_REG (Pmode, A1_REG),
+			  GEN_INT (-(current_frame.offset + fsize)));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx,
+				 gen_rtx_REG (Pmode, A1_REG),
+				 frame_pointer_rtx));
+	  restore_from_sp = true;
+	}
+      else
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, A1_REG), GEN_INT (-fsize));
+	  fsize = 0;
+	  big = true;
+	}
+    }
+
+  if (current_frame.reg_no < MIN_MOVEM_REGS)
+    {
+      /* Restore each register separately in the same order moveml does.  */
+      int i;
+      HOST_WIDE_INT offset;
+
+      offset = current_frame.offset + fsize;
+      for (i = 0; i < 16; i++)
+        if (current_frame.reg_mask & (1 << i))
+          {
+	    rtx addr;
+
+	    if (big)
+	      {
+		/* Generate the address -OFFSET(%fp,%a1.l).  */
+		addr = gen_rtx_REG (Pmode, A1_REG);
+		addr = gen_rtx_PLUS (Pmode, addr, frame_pointer_rtx);
+		addr = plus_constant (addr, -offset);
+	      }
+	    else if (restore_from_sp)
+	      addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+	    else
+	      addr = plus_constant (frame_pointer_rtx, -offset);
+	    emit_move_insn (gen_rtx_REG (SImode, D0_REG + i),
+			    gen_frame_mem (SImode, addr));
+	    offset -= GET_MODE_SIZE (SImode);
+	  }
+    }
+  else if (current_frame.reg_mask)
+    {
+      if (big)
+	m68k_emit_movem (gen_rtx_PLUS (Pmode,
+				       gen_rtx_REG (Pmode, A1_REG),
+				       frame_pointer_rtx),
+			 -(current_frame.offset + fsize),
+			 current_frame.reg_no, D0_REG,
+			 current_frame.reg_mask, false, false);
+      else if (restore_from_sp)
+	m68k_emit_movem (stack_pointer_rtx, 0,
+			 current_frame.reg_no, D0_REG,
+			 current_frame.reg_mask, false,
+			 !TARGET_COLDFIRE);
+      else
+	m68k_emit_movem (frame_pointer_rtx,
+			 -(current_frame.offset + fsize),
+			 current_frame.reg_no, D0_REG,
+			 current_frame.reg_mask, false, false);
+    }
+
+  if (current_frame.fpu_no > 0)
+    {
+      if (big)
+	m68k_emit_movem (gen_rtx_PLUS (Pmode,
+				       gen_rtx_REG (Pmode, A1_REG),
+				       frame_pointer_rtx),
+			 -(current_frame.foffset + fsize),
+			 current_frame.fpu_no, FP0_REG,
+			 current_frame.fpu_mask, false, false);
+      else if (restore_from_sp)
+	{
+	  if (TARGET_COLDFIRE)
+	    {
+	      int offset;
+
+	      /* If we used moveml to restore the integer registers, the
+		 stack pointer will still point to the bottom of the moveml
+		 save area.  Find the stack offset of the first FP
+		 register.  */
+	      if (current_frame.reg_no < MIN_MOVEM_REGS)
+		offset = 0;
+	      else
+		offset = current_frame.reg_no * GET_MODE_SIZE (SImode);
+	      m68k_emit_movem (stack_pointer_rtx, offset,
+			       current_frame.fpu_no, FP0_REG,
+			       current_frame.fpu_mask, false, false);
+	    }
+	  else
+	    m68k_emit_movem (stack_pointer_rtx, 0,
+			     current_frame.fpu_no, FP0_REG,
+			     current_frame.fpu_mask, false, true);
+	}
+      else
+	m68k_emit_movem (frame_pointer_rtx,
+			 -(current_frame.foffset + fsize),
+			 current_frame.fpu_no, FP0_REG,
+			 current_frame.fpu_mask, false, false);
+    }
+
+  if (frame_pointer_needed)
+    emit_insn (gen_unlink (frame_pointer_rtx));
+  else if (fsize_with_regs)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   GEN_INT (fsize_with_regs)));
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+			   stack_pointer_rtx,
+			   EH_RETURN_STACKADJ_RTX));
+
+  if (!sibcall_p)
+    emit_jump_insn (gen_rtx_RETURN (VOIDmode));
+}
+
+/* Return true if X is a valid comparison operator for the dbcc 
+   instruction.  
+
+   Note it rejects floating point comparison operators.
+   (In the future we could use Fdbcc).
+
+   It also rejects some comparisons when CC_NO_OVERFLOW is set.  */
+   
+int
+valid_dbcc_comparison_p_2 (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (x))
+    {
+      case EQ: case NE: case GTU: case LTU:
+      case GEU: case LEU:
+        return 1;
+
+      /* Reject some when CC_NO_OVERFLOW is set.  This may be over
+         conservative */
+      case GT: case LT: case GE: case LE:
+        return ! (cc_prev_status.flags & CC_NO_OVERFLOW);
+      default:
+        return 0;
+    }
+}
+
+/* Return nonzero if flags are currently in the 68881 flag register.  */
+int
+flags_in_68881 (void)
+{
+  /* We could add support for these in the future */
+  return cc_status.flags & CC_IN_68881;
+}
+
+/* Return true if PARALLEL contains register REGNO.  */
+static bool
+m68k_reg_present_p (const_rtx parallel, unsigned int regno)
+{
+  int i;
+
+  if (REG_P (parallel) && REGNO (parallel) == regno)
+    return true;
+
+  if (GET_CODE (parallel) != PARALLEL)
+    return false;
+
+  for (i = 0; i < XVECLEN (parallel, 0); ++i)
+    {
+      const_rtx x;
+
+      x = XEXP (XVECEXP (parallel, 0, i), 0);
+      if (REG_P (x) && REGNO (x) == regno)
+	return true;
+    }
+
+  return false;
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL_P.  */
+
+static bool
+m68k_ok_for_sibcall_p (tree decl, tree exp)
+{
+  enum m68k_function_kind kind;
+  
+  /* We cannot use sibcalls for nested functions because we use the
+     static chain register for indirect calls.  */
+  if (CALL_EXPR_STATIC_CHAIN (exp))
+    return false;
+
+  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+    {
+      /* Check that the return value locations are the same.  For
+	 example that we aren't returning a value from the sibling in
+	 a D0 register but then need to transfer it to a A0 register.  */
+      rtx cfun_value;
+      rtx call_value;
+
+      cfun_value = FUNCTION_VALUE (TREE_TYPE (DECL_RESULT (cfun->decl)),
+				   cfun->decl);
+      call_value = FUNCTION_VALUE (TREE_TYPE (exp), decl);
+
+      /* Check that the values are equal or that the result the callee
+	 function returns is superset of what the current function returns.  */
+      if (!(rtx_equal_p (cfun_value, call_value)
+	    || (REG_P (cfun_value)
+		&& m68k_reg_present_p (call_value, REGNO (cfun_value)))))
+	return false;
+    }
+
+  kind = m68k_get_function_kind (current_function_decl);
+  if (kind == m68k_fk_normal_function)
+    /* We can always sibcall from a normal function, because it's
+       undefined if it is calling an interrupt function.  */
+    return true;
+
+  /* Otherwise we can only sibcall if the function kind is known to be
+     the same.  */
+  if (decl && m68k_get_function_kind (decl) == kind)
+    return true;
+  
+  return false;
+}
+
+/* On the m68k all args are always pushed.  */
+
+static rtx
+m68k_function_arg (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+static void
+m68k_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum += (mode != BLKmode
+	   ? (GET_MODE_SIZE (mode) + 3) & ~3
+	   : (int_size_in_bytes (type) + 3) & ~3);
+}
+
+/* Convert X to a legitimate function call memory reference and return the
+   result.  */
+
+rtx
+m68k_legitimize_call_address (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (call_operand (XEXP (x, 0), VOIDmode))
+    return x;
+  return replace_equiv_address (x, force_reg (Pmode, XEXP (x, 0)));
+}
+
+/* Likewise for sibling calls.  */
+
+rtx
+m68k_legitimize_sibcall_address (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (sibcall_operand (XEXP (x, 0), VOIDmode))
+    return x;
+
+  emit_move_insn (gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM), XEXP (x, 0));
+  return replace_equiv_address (x, gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM));
+}
+
+/* Convert X to a legitimate address and return it if successful.  Otherwise
+   return X.
+
+   For the 68000, we handle X+REG by loading X into a register R and
+   using R+REG.  R will go in an address reg and indexing will be used.
+   However, if REG is a broken-out memory address or multiplication,
+   nothing needs to be done because REG can certainly go in an address reg.  */
+
+static rtx
+m68k_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  if (m68k_tls_symbol_p (x))
+    return m68k_legitimize_tls_address (x);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      int ch = (x) != (oldx);
+      int copied = 0;
+
+#define COPY_ONCE(Y) if (!copied) { Y = copy_rtx (Y); copied = ch = 1; }
+
+      if (GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  COPY_ONCE (x);
+	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+	}
+      if (GET_CODE (XEXP (x, 1)) == MULT)
+	{
+	  COPY_ONCE (x);
+	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+	}
+      if (ch)
+	{
+          if (GET_CODE (XEXP (x, 1)) == REG
+	      && GET_CODE (XEXP (x, 0)) == REG)
+	    {
+	      if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT)
+	        {
+	          COPY_ONCE (x);
+	          x = force_operand (x, 0);
+	        }
+	      return x;
+	    }
+	  if (memory_address_p (mode, x))
+	    return x;
+	}
+      if (GET_CODE (XEXP (x, 0)) == REG
+	  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	      && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val = force_operand (XEXP (x, 1), 0);
+	  emit_move_insn (temp, val);
+	  COPY_ONCE (x);
+	  XEXP (x, 1) = temp;
+	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && GET_CODE (XEXP (x, 0)) == REG)
+	    x = force_operand (x, 0);
+	}
+      else if (GET_CODE (XEXP (x, 1)) == REG
+	       || (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+		   && GET_MODE (XEXP (XEXP (x, 1), 0)) == HImode))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val = force_operand (XEXP (x, 0), 0);
+	  emit_move_insn (temp, val);
+	  COPY_ONCE (x);
+	  XEXP (x, 0) = temp;
+	  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT
+	      && GET_CODE (XEXP (x, 1)) == REG)
+	    x = force_operand (x, 0);
+	}
+    }
+
+  return x;
+}
+
+ 
+/* Output a dbCC; jCC sequence.  Note we do not handle the 
+   floating point version of this sequence (Fdbcc).  We also
+   do not handle alternative conditions when CC_NO_OVERFLOW is
+   set.  It is assumed that valid_dbcc_comparison_p and flags_in_68881 will
+   kick those out before we get here.  */
+
+void
+output_dbcc_and_branch (rtx *operands)
+{
+  switch (GET_CODE (operands[3]))
+    {
+      case EQ:
+	output_asm_insn ("dbeq %0,%l1\n\tjeq %l2", operands);
+	break;
+
+      case NE:
+	output_asm_insn ("dbne %0,%l1\n\tjne %l2", operands);
+	break;
+
+      case GT:
+	output_asm_insn ("dbgt %0,%l1\n\tjgt %l2", operands);
+	break;
+
+      case GTU:
+	output_asm_insn ("dbhi %0,%l1\n\tjhi %l2", operands);
+	break;
+
+      case LT:
+	output_asm_insn ("dblt %0,%l1\n\tjlt %l2", operands);
+	break;
+
+      case LTU:
+	output_asm_insn ("dbcs %0,%l1\n\tjcs %l2", operands);
+	break;
+
+      case GE:
+	output_asm_insn ("dbge %0,%l1\n\tjge %l2", operands);
+	break;
+
+      case GEU:
+	output_asm_insn ("dbcc %0,%l1\n\tjcc %l2", operands);
+	break;
+
+      case LE:
+	output_asm_insn ("dble %0,%l1\n\tjle %l2", operands);
+	break;
+
+      case LEU:
+	output_asm_insn ("dbls %0,%l1\n\tjls %l2", operands);
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  /* If the decrement is to be done in SImode, then we have
+     to compensate for the fact that dbcc decrements in HImode.  */
+  switch (GET_MODE (operands[0]))
+    {
+      case SImode:
+        output_asm_insn ("clr%.w %0\n\tsubq%.l #1,%0\n\tjpl %l1", operands);
+        break;
+
+      case HImode:
+        break;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+const char *
+output_scc_di (rtx op, rtx operand1, rtx operand2, rtx dest)
+{
+  rtx loperands[7];
+  enum rtx_code op_code = GET_CODE (op);
+
+  /* This does not produce a useful cc.  */
+  CC_STATUS_INIT;
+
+  /* The m68k cmp.l instruction requires operand1 to be a reg as used
+     below.  Swap the operands and change the op if these requirements
+     are not fulfilled.  */
+  if (GET_CODE (operand2) == REG && GET_CODE (operand1) != REG)
+    {
+      rtx tmp = operand1;
+
+      operand1 = operand2;
+      operand2 = tmp;
+      op_code = swap_condition (op_code);
+    }
+  loperands[0] = operand1;
+  if (GET_CODE (operand1) == REG)
+    loperands[1] = gen_rtx_REG (SImode, REGNO (operand1) + 1);
+  else
+    loperands[1] = adjust_address (operand1, SImode, 4);
+  if (operand2 != const0_rtx)
+    {
+      loperands[2] = operand2;
+      if (GET_CODE (operand2) == REG)
+	loperands[3] = gen_rtx_REG (SImode, REGNO (operand2) + 1);
+      else
+	loperands[3] = adjust_address (operand2, SImode, 4);
+    }
+  loperands[4] = gen_label_rtx ();
+  if (operand2 != const0_rtx)
+    output_asm_insn ("cmp%.l %2,%0\n\tjne %l4\n\tcmp%.l %3,%1", loperands);
+  else
+    {
+      if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (loperands[0]))
+	output_asm_insn ("tst%.l %0", loperands);
+      else
+	output_asm_insn ("cmp%.w #0,%0", loperands);
+
+      output_asm_insn ("jne %l4", loperands);
+
+      if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (loperands[1]))
+	output_asm_insn ("tst%.l %1", loperands);
+      else
+	output_asm_insn ("cmp%.w #0,%1", loperands);
+    }
+
+  loperands[5] = dest;
+
+  switch (op_code)
+    {
+      case EQ:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("seq %5", loperands);
+        break;
+
+      case NE:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sne %5", loperands);
+        break;
+
+      case GT:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("shi %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sgt %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case GTU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("shi %5", loperands);
+        break;
+
+      case LT:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("scs %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("slt %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case LTU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("scs %5", loperands);
+        break;
+
+      case GE:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("scc %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sge %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case GEU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("scc %5", loperands);
+        break;
+
+      case LE:
+        loperands[6] = gen_label_rtx ();
+        output_asm_insn ("sls %5\n\tjra %l6", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sle %5", loperands);
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[6]));
+        break;
+
+      case LEU:
+        (*targetm.asm_out.internal_label) (asm_out_file, "L",
+					   CODE_LABEL_NUMBER (loperands[4]));
+        output_asm_insn ("sls %5", loperands);
+        break;
+
+      default:
+	gcc_unreachable ();
+    }
+  return "";
+}
+
+const char *
+output_btst (rtx *operands, rtx countop, rtx dataop, rtx insn, int signpos)
+{
+  operands[0] = countop;
+  operands[1] = dataop;
+
+  if (GET_CODE (countop) == CONST_INT)
+    {
+      register int count = INTVAL (countop);
+      /* If COUNT is bigger than size of storage unit in use,
+	 advance to the containing unit of same size.  */
+      if (count > signpos)
+	{
+	  int offset = (count & ~signpos) / 8;
+	  count = count & signpos;
+	  operands[1] = dataop = adjust_address (dataop, QImode, offset);
+	}
+      if (count == signpos)
+	cc_status.flags = CC_NOT_POSITIVE | CC_Z_IN_NOT_N;
+      else
+	cc_status.flags = CC_NOT_NEGATIVE | CC_Z_IN_NOT_N;
+
+      /* These three statements used to use next_insns_test_no...
+	 but it appears that this should do the same job.  */
+      if (count == 31
+	  && next_insn_tests_no_inequality (insn))
+	return "tst%.l %1";
+      if (count == 15
+	  && next_insn_tests_no_inequality (insn))
+	return "tst%.w %1";
+      if (count == 7
+	  && next_insn_tests_no_inequality (insn))
+	return "tst%.b %1";
+      /* Try to use `movew to ccr' followed by the appropriate branch insn.
+         On some m68k variants unfortunately that's slower than btst.
+         On 68000 and higher, that should also work for all HImode operands. */
+      if (TUNE_CPU32 || TARGET_COLDFIRE || optimize_size)
+	{
+	  if (count == 3 && DATA_REG_P (operands[1])
+	      && next_insn_tests_no_inequality (insn))
+	    {
+	    cc_status.flags = CC_NOT_NEGATIVE | CC_Z_IN_NOT_N | CC_NO_OVERFLOW;
+	    return "move%.w %1,%%ccr";
+	    }
+	  if (count == 2 && DATA_REG_P (operands[1])
+	      && next_insn_tests_no_inequality (insn))
+	    {
+	    cc_status.flags = CC_NOT_NEGATIVE | CC_INVERTED | CC_NO_OVERFLOW;
+	    return "move%.w %1,%%ccr";
+	    }
+	  /* count == 1 followed by bvc/bvs and
+	     count == 0 followed by bcc/bcs are also possible, but need
+	     m68k-specific CC_Z_IN_NOT_V and CC_Z_IN_NOT_C flags. */
+	}
+
+      cc_status.flags = CC_NOT_NEGATIVE;
+    }
+  return "btst %0,%1";
+}
+
+/* Return true if X is a legitimate base register.  STRICT_P says
+   whether we need strict checking.  */
+
+bool
+m68k_legitimate_base_reg_p (rtx x, bool strict_p)
+{
+  /* Allow SUBREG everywhere we allow REG.  This results in better code.  */
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && (strict_p
+	      ? REGNO_OK_FOR_BASE_P (REGNO (x))
+	      : REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x))));
+}
+
+/* Return true if X is a legitimate index register.  STRICT_P says
+   whether we need strict checking.  */
+
+bool
+m68k_legitimate_index_reg_p (rtx x, bool strict_p)
+{
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && (strict_p
+	      ? REGNO_OK_FOR_INDEX_P (REGNO (x))
+	      : REGNO_OK_FOR_INDEX_NONSTRICT_P (REGNO (x))));
+}
+
+/* Return true if X is a legitimate index expression for a (d8,An,Xn) or
+   (bd,An,Xn) addressing mode.  Fill in the INDEX and SCALE fields of
+   ADDRESS if so.  STRICT_P says whether we need strict checking.  */
+
+static bool
+m68k_decompose_index (rtx x, bool strict_p, struct m68k_address *address)
+{
+  int scale;
+
+  /* Check for a scale factor.  */
+  scale = 1;
+  if ((TARGET_68020 || TARGET_COLDFIRE)
+      && GET_CODE (x) == MULT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && (INTVAL (XEXP (x, 1)) == 2
+	  || INTVAL (XEXP (x, 1)) == 4
+	  || (INTVAL (XEXP (x, 1)) == 8
+	      && (TARGET_COLDFIRE_FPU || !TARGET_COLDFIRE))))
+    {
+      scale = INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+    }
+
+  /* Check for a word extension.  */
+  if (!TARGET_COLDFIRE
+      && GET_CODE (x) == SIGN_EXTEND
+      && GET_MODE (XEXP (x, 0)) == HImode)
+    x = XEXP (x, 0);
+
+  if (m68k_legitimate_index_reg_p (x, strict_p))
+    {
+      address->scale = scale;
+      address->index = x;
+      return true;
+    }
+
+  return false;
+}
+
+/* Return true if X is an illegitimate symbolic constant.  */
+
+bool
+m68k_illegitimate_symbolic_constant_p (rtx x)
+{
+  rtx base, offset;
+
+  if (M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	return true;
+    }
+  return m68k_tls_reference_p (x, false);
+}
+
+/* Return true if X is a legitimate constant address that can reach
+   bytes in the range [X, X + REACH).  STRICT_P says whether we need
+   strict checking.  */
+
+static bool
+m68k_legitimate_constant_address_p (rtx x, unsigned int reach, bool strict_p)
+{
+  rtx base, offset;
+
+  if (!CONSTANT_ADDRESS_P (x))
+    return false;
+
+  if (flag_pic
+      && !(strict_p && TARGET_PCREL)
+      && symbolic_operand (x, VOIDmode))
+    return false;
+
+  if (M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P && reach > 1)
+    {
+      split_const (x, &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset) + reach - 1))
+	return false;
+    }
+
+  return !m68k_tls_reference_p (x, false);
+}
+
+/* Return true if X is a LABEL_REF for a jump table.  Assume that unplaced
+   labels will become jump tables.  */
+
+static bool
+m68k_jump_table_ref_p (rtx x)
+{
+  if (GET_CODE (x) != LABEL_REF)
+    return false;
+
+  x = XEXP (x, 0);
+  if (!NEXT_INSN (x) && !PREV_INSN (x))
+    return true;
+
+  x = next_nonnote_insn (x);
+  return x && JUMP_TABLE_DATA_P (x);
+}
+
+/* Return true if X is a legitimate address for values of mode MODE.
+   STRICT_P says whether strict checking is needed.  If the address
+   is valid, describe its components in *ADDRESS.  */
+
+static bool
+m68k_decompose_address (enum machine_mode mode, rtx x,
+			bool strict_p, struct m68k_address *address)
+{
+  unsigned int reach;
+
+  memset (address, 0, sizeof (*address));
+
+  if (mode == BLKmode)
+    reach = 1;
+  else
+    reach = GET_MODE_SIZE (mode);
+
+  /* Check for (An) (mode 2).  */
+  if (m68k_legitimate_base_reg_p (x, strict_p))
+    {
+      address->base = x;
+      return true;
+    }
+
+  /* Check for -(An) and (An)+ (modes 3 and 4).  */
+  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)
+      && m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p))
+    {
+      address->code = GET_CODE (x);
+      address->base = XEXP (x, 0);
+      return true;
+    }
+
+  /* Check for (d16,An) (mode 5).  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && IN_RANGE (INTVAL (XEXP (x, 1)), -0x8000, 0x8000 - reach)
+      && m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p))
+    {
+      address->base = XEXP (x, 0);
+      address->offset = XEXP (x, 1);
+      return true;
+    }
+
+  /* Check for GOT loads.  These are (bd,An,Xn) addresses if
+     TARGET_68020 && flag_pic == 2, otherwise they are (d16,An)
+     addresses.  */
+  if (GET_CODE (x) == PLUS
+      && XEXP (x, 0) == pic_offset_table_rtx)
+    {
+      /* As we are processing a PLUS, do not unwrap RELOC32 symbols --
+	 they are invalid in this context.  */
+      if (m68k_unwrap_symbol (XEXP (x, 1), false) != XEXP (x, 1))
+	{
+	  address->base = XEXP (x, 0);
+	  address->offset = XEXP (x, 1);
+	  return true;
+	}
+    }
+
+  /* The ColdFire FPU only accepts addressing modes 2-5.  */
+  if (TARGET_COLDFIRE_FPU && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return false;
+
+  /* Check for (xxx).w and (xxx).l.  Also, in the TARGET_PCREL case,
+     check for (d16,PC) or (bd,PC,Xn) with a suppressed index register.
+     All these modes are variations of mode 7.  */
+  if (m68k_legitimate_constant_address_p (x, reach, strict_p))
+    {
+      address->offset = x;
+      return true;
+    }
+
+  /* Check for (d8,PC,Xn), a mode 7 form.  This case is needed for
+     tablejumps.
+
+     ??? do_tablejump creates these addresses before placing the target
+     label, so we have to assume that unplaced labels are jump table
+     references.  It seems unlikely that we would ever generate indexed
+     accesses to unplaced labels in other cases.  */
+  if (GET_CODE (x) == PLUS
+      && m68k_jump_table_ref_p (XEXP (x, 1))
+      && m68k_decompose_index (XEXP (x, 0), strict_p, address))
+    {
+      address->offset = XEXP (x, 1);
+      return true;
+    }
+
+  /* Everything hereafter deals with (d8,An,Xn.SIZE*SCALE) or
+     (bd,An,Xn.SIZE*SCALE) addresses.  */
+
+  if (TARGET_68020)
+    {
+      /* Check for a nonzero base displacement.  */
+      if (GET_CODE (x) == PLUS
+	  && m68k_legitimate_constant_address_p (XEXP (x, 1), reach, strict_p))
+	{
+	  address->offset = XEXP (x, 1);
+	  x = XEXP (x, 0);
+	}
+
+      /* Check for a suppressed index register.  */
+      if (m68k_legitimate_base_reg_p (x, strict_p))
+	{
+	  address->base = x;
+	  return true;
+	}
+
+      /* Check for a suppressed base register.  Do not allow this case
+	 for non-symbolic offsets as it effectively gives gcc freedom
+	 to treat data registers as base registers, which can generate
+	 worse code.  */
+      if (address->offset
+	  && symbolic_operand (address->offset, VOIDmode)
+	  && m68k_decompose_index (x, strict_p, address))
+	return true;
+    }
+  else
+    {
+      /* Check for a nonzero base displacement.  */
+      if (GET_CODE (x) == PLUS
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && IN_RANGE (INTVAL (XEXP (x, 1)), -0x80, 0x80 - reach))
+	{
+	  address->offset = XEXP (x, 1);
+	  x = XEXP (x, 0);
+	}
+    }
+
+  /* We now expect the sum of a base and an index.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      if (m68k_legitimate_base_reg_p (XEXP (x, 0), strict_p)
+	  && m68k_decompose_index (XEXP (x, 1), strict_p, address))
+	{
+	  address->base = XEXP (x, 0);
+	  return true;
+	}
+
+      if (m68k_legitimate_base_reg_p (XEXP (x, 1), strict_p)
+	  && m68k_decompose_index (XEXP (x, 0), strict_p, address))
+	{
+	  address->base = XEXP (x, 1);
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Return true if X is a legitimate address for values of mode MODE.
+   STRICT_P says whether strict checking is needed.  */
+
+bool
+m68k_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  struct m68k_address address;
+
+  return m68k_decompose_address (mode, x, strict_p, &address);
+}
+
+/* Return true if X is a memory, describing its address in ADDRESS if so.
+   Apply strict checking if called during or after reload.  */
+
+static bool
+m68k_legitimate_mem_p (rtx x, struct m68k_address *address)
+{
+  return (MEM_P (x)
+	  && m68k_decompose_address (GET_MODE (x), XEXP (x, 0),
+				     reload_in_progress || reload_completed,
+				     address));
+}
+
+/* Return true if X matches the 'Q' constraint.  It must be a memory
+   with a base address and no constant offset or index.  */
+
+bool
+m68k_matches_q_p (rtx x)
+{
+  struct m68k_address address;
+
+  return (m68k_legitimate_mem_p (x, &address)
+	  && address.code == UNKNOWN
+	  && address.base
+	  && !address.offset
+	  && !address.index);
+}
+
+/* Return true if X matches the 'U' constraint.  It must be a base address
+   with a constant offset and no index.  */
+
+bool
+m68k_matches_u_p (rtx x)
+{
+  struct m68k_address address;
+
+  return (m68k_legitimate_mem_p (x, &address)
+	  && address.code == UNKNOWN
+	  && address.base
+	  && address.offset
+	  && !address.index);
+}
+
+/* Return GOT pointer.  */
+
+static rtx
+m68k_get_gp (void)
+{
+  if (pic_offset_table_rtx == NULL_RTX)
+    pic_offset_table_rtx = gen_rtx_REG (Pmode, PIC_REG);
+
+  crtl->uses_pic_offset_table = 1;
+
+  return pic_offset_table_rtx;
+}
+
+/* M68K relocations, used to distinguish GOT and TLS relocations in UNSPEC
+   wrappers.  */
+enum m68k_reloc { RELOC_GOT, RELOC_TLSGD, RELOC_TLSLDM, RELOC_TLSLDO,
+		  RELOC_TLSIE, RELOC_TLSLE };
+
+#define TLS_RELOC_P(RELOC) ((RELOC) != RELOC_GOT)
+
+/* Wrap symbol X into unspec representing relocation RELOC.
+   BASE_REG - register that should be added to the result.
+   TEMP_REG - if non-null, temporary register.  */
+
+static rtx
+m68k_wrap_symbol (rtx x, enum m68k_reloc reloc, rtx base_reg, rtx temp_reg)
+{
+  bool use_x_p;
+
+  use_x_p = (base_reg == pic_offset_table_rtx) ? TARGET_XGOT : TARGET_XTLS;
+
+  if (TARGET_COLDFIRE && use_x_p)
+    /* When compiling with -mx{got, tls} switch the code will look like this:
+
+       move.l <X>@<RELOC>,<TEMP_REG>
+       add.l <BASE_REG>,<TEMP_REG>  */
+    {
+      /* Wrap X in UNSPEC_??? to tip m68k_output_addr_const_extra
+	 to put @RELOC after reference.  */
+      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+			  UNSPEC_RELOC32);
+      x = gen_rtx_CONST (Pmode, x);
+
+      if (temp_reg == NULL)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  temp_reg = gen_reg_rtx (Pmode);
+	}
+
+      emit_move_insn (temp_reg, x);
+      emit_insn (gen_addsi3 (temp_reg, temp_reg, base_reg));
+      x = temp_reg;
+    }
+  else
+    {
+      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)),
+			  UNSPEC_RELOC16);
+      x = gen_rtx_CONST (Pmode, x);
+
+      x = gen_rtx_PLUS (Pmode, base_reg, x);
+    }
+
+  return x;
+}
+
+/* Helper for m68k_unwrap_symbol.
+   Also, if unwrapping was successful (that is if (ORIG != <return value>)),
+   sets *RELOC_PTR to relocation type for the symbol.  */
+
+static rtx
+m68k_unwrap_symbol_1 (rtx orig, bool unwrap_reloc32_p,
+		      enum m68k_reloc *reloc_ptr)
+{
+  if (GET_CODE (orig) == CONST)
+    {
+      rtx x;
+      enum m68k_reloc dummy;
+
+      x = XEXP (orig, 0);
+
+      if (reloc_ptr == NULL)
+	reloc_ptr = &dummy;
+
+      /* Handle an addend.  */
+      if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS)
+	  && CONST_INT_P (XEXP (x, 1)))
+	x = XEXP (x, 0);
+
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_RELOC16:
+	      orig = XVECEXP (x, 0, 0);
+	      *reloc_ptr = (enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1));
+	      break;
+
+	    case UNSPEC_RELOC32:
+	      if (unwrap_reloc32_p)
+		{
+		  orig = XVECEXP (x, 0, 0);
+		  *reloc_ptr = (enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1));
+		}
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+    }
+
+  return orig;
+}
+
+/* Unwrap symbol from UNSPEC_RELOC16 and, if unwrap_reloc32_p,
+   UNSPEC_RELOC32 wrappers.  */
+
+rtx
+m68k_unwrap_symbol (rtx orig, bool unwrap_reloc32_p)
+{
+  return m68k_unwrap_symbol_1 (orig, unwrap_reloc32_p, NULL);
+}
+
+/* Helper for m68k_final_prescan_insn.  */
+
+static int
+m68k_final_prescan_insn_1 (rtx *x_ptr, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *x_ptr;
+
+  if (m68k_unwrap_symbol (x, true) != x)
+    /* For rationale of the below, see comment in m68k_final_prescan_insn.  */
+    {
+      rtx plus;
+
+      gcc_assert (GET_CODE (x) == CONST);
+      plus = XEXP (x, 0);
+
+      if (GET_CODE (plus) == PLUS || GET_CODE (plus) == MINUS)
+	{
+	  rtx unspec;
+	  rtx addend;
+
+	  unspec = XEXP (plus, 0);
+	  gcc_assert (GET_CODE (unspec) == UNSPEC);
+	  addend = XEXP (plus, 1);
+	  gcc_assert (CONST_INT_P (addend));
+
+	  /* We now have all the pieces, rearrange them.  */
+
+	  /* Move symbol to plus.  */
+	  XEXP (plus, 0) = XVECEXP (unspec, 0, 0);
+
+	  /* Move plus inside unspec.  */
+	  XVECEXP (unspec, 0, 0) = plus;
+
+	  /* Move unspec to top level of const.  */
+	  XEXP (x, 0) = unspec;
+	}
+
+      return -1;
+    }
+
+  return 0;
+}
+
+/* Prescan insn before outputing assembler for it.  */
+
+void
+m68k_final_prescan_insn (rtx insn ATTRIBUTE_UNUSED,
+			 rtx *operands, int n_operands)
+{
+  int i;
+
+  /* Combine and, possibly, other optimizations may do good job
+     converting
+       (const (unspec [(symbol)]))
+     into
+       (const (plus (unspec [(symbol)])
+                    (const_int N))).
+     The problem with this is emitting @TLS or @GOT decorations.
+     The decoration is emitted when processing (unspec), so the
+     result would be "#symbol@TLSLE+N" instead of "#symbol+N@TLSLE".
+
+     It seems that the easiest solution to this is to convert such
+     operands to
+       (const (unspec [(plus (symbol)
+                             (const_int N))])).
+     Note, that the top level of operand remains intact, so we don't have
+     to patch up anything outside of the operand.  */
+
+  for (i = 0; i < n_operands; ++i)
+    {
+      rtx op;
+
+      op = operands[i];
+
+      for_each_rtx (&op, m68k_final_prescan_insn_1, NULL);
+    }
+}
+
+/* Move X to a register and add REG_EQUAL note pointing to ORIG.
+   If REG is non-null, use it; generate new pseudo otherwise.  */
+
+static rtx
+m68k_move_to_reg (rtx x, rtx orig, rtx reg)
+{
+  rtx insn;
+
+  if (reg == NULL_RTX)
+    {
+      gcc_assert (can_create_pseudo_p ());
+      reg = gen_reg_rtx (Pmode);
+    }
+
+  insn = emit_move_insn (reg, x);
+  /* Put a REG_EQUAL note on this insn, so that it can be optimized
+     by loop.  */
+  set_unique_reg_note (insn, REG_EQUAL, orig);
+
+  return reg;
+}
+
+/* Does the same as m68k_wrap_symbol, but returns a memory reference to
+   GOT slot.  */
+
+static rtx
+m68k_wrap_symbol_into_got_ref (rtx x, enum m68k_reloc reloc, rtx temp_reg)
+{
+  x = m68k_wrap_symbol (x, reloc, m68k_get_gp (), temp_reg);
+
+  x = gen_rtx_MEM (Pmode, x);
+  MEM_READONLY_P (x) = 1;
+
+  return x;
+}
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go to REG.  If we need more
+   than one register, we lose.  
+
+   An address is legitimized by making an indirect reference
+   through the Global Offset Table with the name of the symbol
+   used as an offset.  
+
+   The assembler and linker are responsible for placing the 
+   address of the symbol in the GOT.  The function prologue
+   is responsible for initializing a5 to the starting address
+   of the GOT.
+
+   The assembler is also responsible for translating a symbol name
+   into a constant displacement from the start of the GOT.  
+
+   A quick example may make things a little clearer:
+
+   When not generating PIC code to store the value 12345 into _foo
+   we would generate the following code:
+
+	movel #12345, _foo
+
+   When generating PIC two transformations are made.  First, the compiler
+   loads the address of foo into a register.  So the first transformation makes:
+
+	lea	_foo, a0
+	movel   #12345, a0@
+
+   The code in movsi will intercept the lea instruction and call this
+   routine which will transform the instructions into:
+
+	movel   a5@(_foo:w), a0
+	movel   #12345, a0@
+   
+
+   That (in a nutshell) is how *all* symbol and label references are 
+   handled.  */
+
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
+		        rtx reg)
+{
+  rtx pic_ref = orig;
+
+  /* First handle a simple SYMBOL_REF or LABEL_REF */
+  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
+    {
+      gcc_assert (reg);
+
+      pic_ref = m68k_wrap_symbol_into_got_ref (orig, RELOC_GOT, reg);
+      pic_ref = m68k_move_to_reg (pic_ref, orig, reg);
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base;
+
+      /* Make sure this has not already been legitimized.  */
+      if (m68k_unwrap_symbol (orig, true) != orig)
+	return orig;
+
+      gcc_assert (reg);
+
+      /* legitimize both operands of the PLUS */
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				     base == reg ? 0 : reg);
+
+      if (GET_CODE (orig) == CONST_INT)
+	pic_ref = plus_constant (base, INTVAL (orig));
+      else
+	pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+    }
+
+  return pic_ref;
+}
+
+/* The __tls_get_addr symbol.  */
+static GTY(()) rtx m68k_tls_get_addr;
+
+/* Return SYMBOL_REF for __tls_get_addr.  */
+
+static rtx
+m68k_get_tls_get_addr (void)
+{
+  if (m68k_tls_get_addr == NULL_RTX)
+    m68k_tls_get_addr = init_one_libfunc ("__tls_get_addr");
+
+  return m68k_tls_get_addr;
+}
+
+/* Return libcall result in A0 instead of usual D0.  */
+static bool m68k_libcall_value_in_a0_p = false;
+
+/* Emit instruction sequence that calls __tls_get_addr.  X is
+   the TLS symbol we are referencing and RELOC is the symbol type to use
+   (either TLSGD or TLSLDM).  EQV is the REG_EQUAL note for the sequence
+   emitted.  A pseudo register with result of __tls_get_addr call is
+   returned.  */
+
+static rtx
+m68k_call_tls_get_addr (rtx x, rtx eqv, enum m68k_reloc reloc)
+{
+  rtx a0;
+  rtx insns;
+  rtx dest;
+
+  /* Emit the call sequence.  */
+  start_sequence ();
+
+  /* FIXME: Unfortunately, emit_library_call_value does not
+     consider (plus (%a5) (const (unspec))) to be a good enough
+     operand for push, so it forces it into a register.  The bad
+     thing about this is that combiner, due to copy propagation and other
+     optimizations, sometimes can not later fix this.  As a consequence,
+     additional register may be allocated resulting in a spill.
+     For reference, see args processing loops in
+     calls.c:emit_library_call_value_1.
+     For testcase, see gcc.target/m68k/tls-{gd, ld}.c  */
+  x = m68k_wrap_symbol (x, reloc, m68k_get_gp (), NULL_RTX);
+
+  /* __tls_get_addr() is not a libcall, but emitting a libcall_value
+     is the simpliest way of generating a call.  The difference between
+     __tls_get_addr() and libcall is that the result is returned in D0
+     instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
+     which temporarily switches returning the result to A0.  */ 
+
+  m68k_libcall_value_in_a0_p = true;
+  a0 = emit_library_call_value (m68k_get_tls_get_addr (), NULL_RTX, LCT_PURE,
+				Pmode, 1, x, Pmode);
+  m68k_libcall_value_in_a0_p = false;
+  
+  insns = get_insns ();
+  end_sequence ();
+
+  gcc_assert (can_create_pseudo_p ());
+  dest = gen_reg_rtx (Pmode);
+  emit_libcall_block (insns, dest, a0, eqv);
+
+  return dest;
+}
+
+/* The __tls_get_addr symbol.  */
+static GTY(()) rtx m68k_read_tp;
+
+/* Return SYMBOL_REF for __m68k_read_tp.  */
+
+static rtx
+m68k_get_m68k_read_tp (void)
+{
+  if (m68k_read_tp == NULL_RTX)
+    m68k_read_tp = init_one_libfunc ("__m68k_read_tp");
+
+  return m68k_read_tp;
+}
+
+/* Emit instruction sequence that calls __m68k_read_tp.
+   A pseudo register with result of __m68k_read_tp call is returned.  */
+
+static rtx 
+m68k_call_m68k_read_tp (void)
+{
+  rtx a0;
+  rtx eqv;
+  rtx insns;
+  rtx dest;
+
+  start_sequence ();
+
+  /* __m68k_read_tp() is not a libcall, but emitting a libcall_value
+     is the simpliest way of generating a call.  The difference between
+     __m68k_read_tp() and libcall is that the result is returned in D0
+     instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
+     which temporarily switches returning the result to A0.  */ 
+
+  /* Emit the call sequence.  */
+  m68k_libcall_value_in_a0_p = true;
+  a0 = emit_library_call_value (m68k_get_m68k_read_tp (), NULL_RTX, LCT_PURE,
+				Pmode, 0);
+  m68k_libcall_value_in_a0_p = false;
+  insns = get_insns ();
+  end_sequence ();
+
+  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+     share the m68k_read_tp result with other IE/LE model accesses.  */
+  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_RELOC32);
+
+  gcc_assert (can_create_pseudo_p ());
+  dest = gen_reg_rtx (Pmode);
+  emit_libcall_block (insns, dest, a0, eqv);
+
+  return dest;
+}
+
+/* Return a legitimized address for accessing TLS SYMBOL_REF X.
+   For explanations on instructions sequences see TLS/NPTL ABI for m68k and
+   ColdFire.  */
+
+rtx
+m68k_legitimize_tls_address (rtx orig)
+{
+  switch (SYMBOL_REF_TLS_MODEL (orig))
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      orig = m68k_call_tls_get_addr (orig, orig, RELOC_TLSGD);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      {
+	rtx eqv;
+	rtx a0;
+	rtx x;
+ 
+	/* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	   share the LDM result with other LD model accesses.  */
+	eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			      UNSPEC_RELOC32);
+
+	a0 = m68k_call_tls_get_addr (orig, eqv, RELOC_TLSLDM);
+
+	x = m68k_wrap_symbol (orig, RELOC_TLSLDO, a0, NULL_RTX);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    case TLS_MODEL_INITIAL_EXEC:
+      {
+	rtx a0;
+	rtx x;
+
+	a0 = m68k_call_m68k_read_tp ();
+
+	x = m68k_wrap_symbol_into_got_ref (orig, RELOC_TLSIE, NULL_RTX);
+	x = gen_rtx_PLUS (Pmode, x, a0);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    case TLS_MODEL_LOCAL_EXEC:
+      {
+	rtx a0;
+	rtx x;
+
+	a0 = m68k_call_m68k_read_tp ();
+
+	x = m68k_wrap_symbol (orig, RELOC_TLSLE, a0, NULL_RTX);
+
+	if (can_create_pseudo_p ())
+	  x = m68k_move_to_reg (x, orig, NULL_RTX);
+
+	orig = x;
+	break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return orig;
+}
+
+/* Return true if X is a TLS symbol.  */
+
+static bool
+m68k_tls_symbol_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Helper for m68k_tls_referenced_p.  */
+
+static int
+m68k_tls_reference_p_1 (rtx *x_ptr, void *data ATTRIBUTE_UNUSED)
+{
+  /* Note: this is not the same as m68k_tls_symbol_p.  */
+  if (GET_CODE (*x_ptr) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x_ptr) != 0 ? 1 : 0;
+
+  /* Don't recurse into legitimate TLS references.  */
+  if (m68k_tls_reference_p (*x_ptr, true))
+    return -1;
+
+  return 0;
+}
+
+/* If !LEGITIMATE_P, return true if X is a TLS symbol reference,
+   though illegitimate one.
+   If LEGITIMATE_P, return true if X is a legitimate TLS symbol reference.  */
+
+bool
+m68k_tls_reference_p (rtx x, bool legitimate_p)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (!legitimate_p)
+    return for_each_rtx (&x, m68k_tls_reference_p_1, NULL) == 1 ? true : false;
+  else
+    {
+      enum m68k_reloc reloc = RELOC_GOT;
+
+      return (m68k_unwrap_symbol_1 (x, true, &reloc) != x
+	      && TLS_RELOC_P (reloc));
+    }
+}
+
+
+
+#define USE_MOVQ(i)	((unsigned) ((i) + 128) <= 255)
+
+/* Return the type of move that should be used for integer I.  */
+
+M68K_CONST_METHOD
+m68k_const_method (HOST_WIDE_INT i)
+{
+  unsigned u;
+
+  if (USE_MOVQ (i))
+    return MOVQ;
+
+  /* The ColdFire doesn't have byte or word operations.  */
+  /* FIXME: This may not be useful for the m68060 either.  */
+  if (!TARGET_COLDFIRE)
+    {
+      /* if -256 < N < 256 but N is not in range for a moveq
+	 N^ff will be, so use moveq #N^ff, dreg; not.b dreg.  */
+      if (USE_MOVQ (i ^ 0xff))
+	return NOTB;
+      /* Likewise, try with not.w */
+      if (USE_MOVQ (i ^ 0xffff))
+	return NOTW;
+      /* This is the only value where neg.w is useful */
+      if (i == -65408)
+	return NEGW;
+    }
+
+  /* Try also with swap.  */
+  u = i;
+  if (USE_MOVQ ((u >> 16) | (u << 16)))
+    return SWAP;
+
+  if (TARGET_ISAB)
+    {
+      /* Try using MVZ/MVS with an immediate value to load constants.  */
+      if (i >= 0 && i <= 65535)
+	return MVZ;
+      if (i >= -32768 && i <= 32767)
+	return MVS;
+    }
+
+  /* Otherwise, use move.l */
+  return MOVL;
+}
+
+/* Return the cost of moving constant I into a data register.  */
+
+static int
+const_int_cost (HOST_WIDE_INT i)
+{
+  switch (m68k_const_method (i))
+    {
+    case MOVQ:
+      /* Constants between -128 and 127 are cheap due to moveq.  */
+      return 0;
+    case MVZ:
+    case MVS:
+    case NOTB:
+    case NOTW:
+    case NEGW:
+    case SWAP:
+      /* Constants easily generated by moveq + not.b/not.w/neg.w/swap.  */
+      return 1;
+    case MOVL:
+      return 2;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static bool
+m68k_rtx_costs (rtx x, int code, int outer_code, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      /* Constant zero is super cheap due to clr instruction.  */
+      if (x == const0_rtx)
+	*total = 0;
+      else
+        *total = const_int_cost (INTVAL (x));
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 3;
+      return true;
+
+    case CONST_DOUBLE:
+      /* Make 0.0 cheaper than other floating constants to
+         encourage creating tstsf and tstdf insns.  */
+      if (outer_code == COMPARE
+          && (x == CONST0_RTX (SFmode) || x == CONST0_RTX (DFmode)))
+	*total = 4;
+      else
+	*total = 5;
+      return true;
+
+    /* These are vaguely right for a 68020.  */
+    /* The costs for long multiply have been adjusted to work properly
+       in synth_mult on the 68020, relative to an average of the time
+       for add and the time for shift, taking away a little more because
+       sometimes move insns are needed.  */
+    /* div?.w is relatively cheaper on 68000 counted in COSTS_N_INSNS
+       terms.  */
+#define MULL_COST				\
+  (TUNE_68060 ? 2				\
+   : TUNE_68040 ? 5				\
+   : (TUNE_CFV2 && TUNE_EMAC) ? 3		\
+   : (TUNE_CFV2 && TUNE_MAC) ? 4		\
+   : TUNE_CFV2 ? 8				\
+   : TARGET_COLDFIRE ? 3 : 13)
+
+#define MULW_COST				\
+  (TUNE_68060 ? 2				\
+   : TUNE_68040 ? 3				\
+   : TUNE_68000_10 ? 5				\
+   : (TUNE_CFV2 && TUNE_EMAC) ? 3		\
+   : (TUNE_CFV2 && TUNE_MAC) ? 2		\
+   : TUNE_CFV2 ? 8				\
+   : TARGET_COLDFIRE ? 2 : 8)
+
+#define DIVW_COST				\
+  (TARGET_CF_HWDIV ? 11				\
+   : TUNE_68000_10 || TARGET_COLDFIRE ? 12 : 27)
+
+    case PLUS:
+      /* An lea costs about three times as much as a simple add.  */
+      if (GET_MODE (x) == SImode
+	  && GET_CODE (XEXP (x, 1)) == REG
+	  && GET_CODE (XEXP (x, 0)) == MULT
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	  && (INTVAL (XEXP (XEXP (x, 0), 1)) == 2
+	      || INTVAL (XEXP (XEXP (x, 0), 1)) == 4
+	      || INTVAL (XEXP (XEXP (x, 0), 1)) == 8))
+	{
+	    /* lea an@(dx:l:i),am */
+	    *total = COSTS_N_INSNS (TARGET_COLDFIRE ? 2 : 3);
+	    return true;
+	}
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TUNE_68060)
+	{
+          *total = COSTS_N_INSNS(1);
+	  return true;
+	}
+      if (TUNE_68000_10)
+        {
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      if (INTVAL (XEXP (x, 1)) < 16)
+	        *total = COSTS_N_INSNS (2) + INTVAL (XEXP (x, 1)) / 2;
+	      else
+	        /* We're using clrw + swap for these cases.  */
+	        *total = COSTS_N_INSNS (4) + (INTVAL (XEXP (x, 1)) - 16) / 2;
+	    }
+	  else
+	    *total = COSTS_N_INSNS (10); /* Worst case.  */
+	  return true;
+        }
+      /* A shift by a big integer takes an extra instruction.  */
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && (INTVAL (XEXP (x, 1)) == 16))
+	{
+	  *total = COSTS_N_INSNS (2);	 /* clrw;swap */
+	  return true;
+	}
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && !(INTVAL (XEXP (x, 1)) > 0
+	       && INTVAL (XEXP (x, 1)) <= 8))
+	{
+	  *total = COSTS_N_INSNS (TARGET_COLDFIRE ? 1 : 3);	 /* lsr #i,dn */
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+	  && GET_MODE (x) == SImode)
+        *total = COSTS_N_INSNS (MULW_COST);
+      else if (GET_MODE (x) == QImode || GET_MODE (x) == HImode)
+        *total = COSTS_N_INSNS (MULW_COST);
+      else
+        *total = COSTS_N_INSNS (MULL_COST);
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (GET_MODE (x) == QImode || GET_MODE (x) == HImode)
+        *total = COSTS_N_INSNS (DIVW_COST);	/* div.w */
+      else if (TARGET_CF_HWDIV)
+        *total = COSTS_N_INSNS (18);
+      else
+	*total = COSTS_N_INSNS (43);		/* div.l */
+      return true;
+
+    case ZERO_EXTRACT:
+      if (outer_code == COMPARE)
+        *total = 0;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return an instruction to move CONST_INT OPERANDS[1] into data register
+   OPERANDS[0].  */
+
+static const char *
+output_move_const_into_data_reg (rtx *operands)
+{
+  HOST_WIDE_INT i;
+
+  i = INTVAL (operands[1]);
+  switch (m68k_const_method (i))
+    {
+    case MVZ:
+      return "mvzw %1,%0";
+    case MVS:
+      return "mvsw %1,%0";
+    case MOVQ:
+      return "moveq %1,%0";
+    case NOTB:
+      CC_STATUS_INIT;
+      operands[1] = GEN_INT (i ^ 0xff);
+      return "moveq %1,%0\n\tnot%.b %0";
+    case NOTW:
+      CC_STATUS_INIT;
+      operands[1] = GEN_INT (i ^ 0xffff);
+      return "moveq %1,%0\n\tnot%.w %0";
+    case NEGW:
+      CC_STATUS_INIT;
+      return "moveq #-128,%0\n\tneg%.w %0";
+    case SWAP:
+      {
+	unsigned u = i;
+
+	operands[1] = GEN_INT ((u << 16) | (u >> 16));
+	return "moveq %1,%0\n\tswap %0";
+      }
+    case MOVL:
+      return "move%.l %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if I can be handled by ISA B's mov3q instruction.  */
+
+bool
+valid_mov3q_const (HOST_WIDE_INT i)
+{
+  return TARGET_ISAB && (i == -1 || IN_RANGE (i, 1, 7));
+}
+
+/* Return an instruction to move CONST_INT OPERANDS[1] into OPERANDS[0].
+   I is the value of OPERANDS[1].  */
+
+static const char *
+output_move_simode_const (rtx *operands)
+{
+  rtx dest;
+  HOST_WIDE_INT src;
+
+  dest = operands[0];
+  src = INTVAL (operands[1]);
+  if (src == 0
+      && (DATA_REG_P (dest) || MEM_P (dest))
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(MEM_P (dest) && MEM_VOLATILE_P (dest))))
+    return "clr%.l %0";
+  else if (GET_MODE (dest) == SImode && valid_mov3q_const (src))
+    return "mov3q%.l %1,%0";
+  else if (src == 0 && ADDRESS_REG_P (dest))
+    return "sub%.l %0,%0";
+  else if (DATA_REG_P (dest))
+    return output_move_const_into_data_reg (operands);
+  else if (ADDRESS_REG_P (dest) && IN_RANGE (src, -0x8000, 0x7fff))
+    {
+      if (valid_mov3q_const (src))
+        return "mov3q%.l %1,%0";
+      return "move%.w %1,%0";
+    }
+  else if (MEM_P (dest)
+	   && GET_CODE (XEXP (dest, 0)) == PRE_DEC
+	   && REGNO (XEXP (XEXP (dest, 0), 0)) == STACK_POINTER_REGNUM
+	   && IN_RANGE (src, -0x8000, 0x7fff))
+    {
+      if (valid_mov3q_const (src))
+        return "mov3q%.l %1,%-";
+      return "pea %a1";
+    }
+  return "move%.l %1,%0";
+}
+
+const char *
+output_move_simode (rtx *operands)
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    return output_move_simode_const (operands);
+  else if ((GET_CODE (operands[1]) == SYMBOL_REF
+	    || GET_CODE (operands[1]) == CONST)
+	   && push_operand (operands[0], SImode))
+    return "pea %a1";
+  else if ((GET_CODE (operands[1]) == SYMBOL_REF
+	    || GET_CODE (operands[1]) == CONST)
+	   && ADDRESS_REG_P (operands[0]))
+    return "lea %a1,%0";
+  return "move%.l %1,%0";
+}
+
+const char *
+output_move_himode (rtx *operands)
+{
+ if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (operands[1] == const0_rtx
+	  && (DATA_REG_P (operands[0])
+	      || GET_CODE (operands[0]) == MEM)
+	  /* clr insns on 68000 read before writing.  */
+	  && ((TARGET_68010 || TARGET_COLDFIRE)
+	      || !(GET_CODE (operands[0]) == MEM
+		   && MEM_VOLATILE_P (operands[0]))))
+	return "clr%.w %0";
+      else if (operands[1] == const0_rtx
+	       && ADDRESS_REG_P (operands[0]))
+	return "sub%.l %0,%0";
+      else if (DATA_REG_P (operands[0])
+	       && INTVAL (operands[1]) < 128
+	       && INTVAL (operands[1]) >= -128)
+	return "moveq %1,%0";
+      else if (INTVAL (operands[1]) < 0x8000
+	       && INTVAL (operands[1]) >= -0x8000)
+	return "move%.w %1,%0";
+    }
+  else if (CONSTANT_P (operands[1]))
+    return "move%.l %1,%0";
+  return "move%.w %1,%0";
+}
+
+const char *
+output_move_qimode (rtx *operands)
+{
+  /* 68k family always modifies the stack pointer by at least 2, even for
+     byte pushes.  The 5200 (ColdFire) does not do this.  */
+  
+  /* This case is generated by pushqi1 pattern now.  */
+  gcc_assert (!(GET_CODE (operands[0]) == MEM
+		&& GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+		&& XEXP (XEXP (operands[0], 0), 0) == stack_pointer_rtx
+		&& ! ADDRESS_REG_P (operands[1])
+		&& ! TARGET_COLDFIRE));
+
+  /* clr and st insns on 68000 read before writing.  */
+  if (!ADDRESS_REG_P (operands[0])
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    {
+      if (operands[1] == const0_rtx)
+	return "clr%.b %0";
+      if ((!TARGET_COLDFIRE || DATA_REG_P (operands[0]))
+	  && GET_CODE (operands[1]) == CONST_INT
+	  && (INTVAL (operands[1]) & 255) == 255)
+	{
+	  CC_STATUS_INIT;
+	  return "st %0";
+	}
+    }
+  if (GET_CODE (operands[1]) == CONST_INT
+      && DATA_REG_P (operands[0])
+      && INTVAL (operands[1]) < 128
+      && INTVAL (operands[1]) >= -128)
+    return "moveq %1,%0";
+  if (operands[1] == const0_rtx && ADDRESS_REG_P (operands[0]))
+    return "sub%.l %0,%0";
+  if (GET_CODE (operands[1]) != CONST_INT && CONSTANT_P (operands[1]))
+    return "move%.l %1,%0";
+  /* 68k family (including the 5200 ColdFire) does not support byte moves to
+     from address registers.  */
+  if (ADDRESS_REG_P (operands[0]) || ADDRESS_REG_P (operands[1]))
+    return "move%.w %1,%0";
+  return "move%.b %1,%0";
+}
+
+const char *
+output_move_stricthi (rtx *operands)
+{
+  if (operands[1] == const0_rtx
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    return "clr%.w %0";
+  return "move%.w %1,%0";
+}
+
+const char *
+output_move_strictqi (rtx *operands)
+{
+  if (operands[1] == const0_rtx
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+          || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    return "clr%.b %0";
+  return "move%.b %1,%0";
+}
+
+/* Return the best assembler insn template
+   for moving operands[1] into operands[0] as a fullword.  */
+
+static const char *
+singlemove_string (rtx *operands)
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    return output_move_simode_const (operands);
+  return "move%.l %1,%0";
+}
+
+
+/* Output assembler or rtl code to perform a doubleword move insn
+   with operands OPERANDS.
+   Pointers to 3 helper functions should be specified:
+   HANDLE_REG_ADJUST to adjust a register by a small value,
+   HANDLE_COMPADR to compute an address and
+   HANDLE_MOVSI to move 4 bytes.  */
+
+static void
+handle_move_double (rtx operands[2],
+		    void (*handle_reg_adjust) (rtx, int),
+		    void (*handle_compadr) (rtx [2]),
+		    void (*handle_movsi) (rtx [2]))
+{
+  enum
+    {
+      REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP
+    } optype0, optype1;
+  rtx latehalf[2];
+  rtx middlehalf[2];
+  rtx xops[2];
+  rtx addreg0 = 0, addreg1 = 0;
+  int dest_overlapped_low = 0;
+  int size = GET_MODE_SIZE (GET_MODE (operands[0]));
+
+  middlehalf[0] = 0;
+  middlehalf[1] = 0;
+
+  /* First classify both operands.  */
+
+  if (REG_P (operands[0]))
+    optype0 = REGOP;
+  else if (offsettable_memref_p (operands[0]))
+    optype0 = OFFSOP;
+  else if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+    optype0 = POPOP;
+  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+    optype0 = PUSHOP;
+  else if (GET_CODE (operands[0]) == MEM)
+    optype0 = MEMOP;
+  else
+    optype0 = RNDOP;
+
+  if (REG_P (operands[1]))
+    optype1 = REGOP;
+  else if (CONSTANT_P (operands[1]))
+    optype1 = CNSTOP;
+  else if (offsettable_memref_p (operands[1]))
+    optype1 = OFFSOP;
+  else if (GET_CODE (XEXP (operands[1], 0)) == POST_INC)
+    optype1 = POPOP;
+  else if (GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)
+    optype1 = PUSHOP;
+  else if (GET_CODE (operands[1]) == MEM)
+    optype1 = MEMOP;
+  else
+    optype1 = RNDOP;
+
+  /* Check for the cases that the operand constraints are not supposed
+     to allow to happen.  Generating code for these cases is
+     painful.  */
+  gcc_assert (optype0 != RNDOP && optype1 != RNDOP);
+
+  /* If one operand is decrementing and one is incrementing
+     decrement the former register explicitly
+     and change that operand into ordinary indexing.  */
+
+  if (optype0 == PUSHOP && optype1 == POPOP)
+    {
+      operands[0] = XEXP (XEXP (operands[0], 0), 0);
+
+      handle_reg_adjust (operands[0], -size);
+
+      if (GET_MODE (operands[1]) == XFmode)
+	operands[0] = gen_rtx_MEM (XFmode, operands[0]);
+      else if (GET_MODE (operands[0]) == DFmode)
+	operands[0] = gen_rtx_MEM (DFmode, operands[0]);
+      else
+	operands[0] = gen_rtx_MEM (DImode, operands[0]);
+      optype0 = OFFSOP;
+    }
+  if (optype0 == POPOP && optype1 == PUSHOP)
+    {
+      operands[1] = XEXP (XEXP (operands[1], 0), 0);
+
+      handle_reg_adjust (operands[1], -size);
+
+      if (GET_MODE (operands[1]) == XFmode)
+	operands[1] = gen_rtx_MEM (XFmode, operands[1]);
+      else if (GET_MODE (operands[1]) == DFmode)
+	operands[1] = gen_rtx_MEM (DFmode, operands[1]);
+      else
+	operands[1] = gen_rtx_MEM (DImode, operands[1]);
+      optype1 = OFFSOP;
+    }
+
+  /* If an operand is an unoffsettable memory ref, find a register
+     we can increment temporarily to make it refer to the second word.  */
+
+  if (optype0 == MEMOP)
+    addreg0 = find_addr_reg (XEXP (operands[0], 0));
+
+  if (optype1 == MEMOP)
+    addreg1 = find_addr_reg (XEXP (operands[1], 0));
+
+  /* Ok, we can do one word at a time.
+     Normally we do the low-numbered word first,
+     but if either operand is autodecrementing then we
+     do the high-numbered word first.
+
+     In either case, set up in LATEHALF the operands to use
+     for the high-numbered word and in some cases alter the
+     operands in OPERANDS to be suitable for the low-numbered word.  */
+
+  if (size == 12)
+    {
+      if (optype0 == REGOP)
+	{
+	  latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 2);
+	  middlehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	}
+      else if (optype0 == OFFSOP)
+	{
+	  middlehalf[0] = adjust_address (operands[0], SImode, 4);
+	  latehalf[0] = adjust_address (operands[0], SImode, size - 4);
+	}
+      else
+	{
+	  middlehalf[0] = adjust_address (operands[0], SImode, 0);
+	  latehalf[0] = adjust_address (operands[0], SImode, 0);
+	}
+
+      if (optype1 == REGOP)
+	{
+	  latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	  middlehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	}
+      else if (optype1 == OFFSOP)
+	{
+	  middlehalf[1] = adjust_address (operands[1], SImode, 4);
+	  latehalf[1] = adjust_address (operands[1], SImode, size - 4);
+	}
+      else if (optype1 == CNSTOP)
+	{
+	  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	    {
+	      REAL_VALUE_TYPE r;
+	      long l[3];
+
+	      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+	      operands[1] = GEN_INT (l[0]);
+	      middlehalf[1] = GEN_INT (l[1]);
+	      latehalf[1] = GEN_INT (l[2]);
+	    }
+	  else
+	    {
+	      /* No non-CONST_DOUBLE constant should ever appear
+		 here.  */
+	      gcc_assert (!CONSTANT_P (operands[1]));
+	    }
+	}
+      else
+	{
+	  middlehalf[1] = adjust_address (operands[1], SImode, 0);
+	  latehalf[1] = adjust_address (operands[1], SImode, 0);
+	}
+    }
+  else
+    /* size is not 12: */
+    {
+      if (optype0 == REGOP)
+	latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      else if (optype0 == OFFSOP)
+	latehalf[0] = adjust_address (operands[0], SImode, size - 4);
+      else
+	latehalf[0] = adjust_address (operands[0], SImode, 0);
+
+      if (optype1 == REGOP)
+	latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+      else if (optype1 == OFFSOP)
+	latehalf[1] = adjust_address (operands[1], SImode, size - 4);
+      else if (optype1 == CNSTOP)
+	split_double (operands[1], &operands[1], &latehalf[1]);
+      else
+	latehalf[1] = adjust_address (operands[1], SImode, 0);
+    }
+
+  /* If insn is effectively movd N(sp),-(sp) then we will do the
+     high word first.  We should use the adjusted operand 1 (which is N+4(sp))
+     for the low word as well, to compensate for the first decrement of sp.  */
+  if (optype0 == PUSHOP
+      && REGNO (XEXP (XEXP (operands[0], 0), 0)) == STACK_POINTER_REGNUM
+      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+    operands[1] = middlehalf[1] = latehalf[1];
+
+  /* For (set (reg:DI N) (mem:DI ... (reg:SI N) ...)),
+     if the upper part of reg N does not appear in the MEM, arrange to
+     emit the move late-half first.  Otherwise, compute the MEM address
+     into the upper part of N and use that as a pointer to the memory
+     operand.  */
+  if (optype0 == REGOP
+      && (optype1 == OFFSOP || optype1 == MEMOP))
+    {
+      rtx testlow = gen_rtx_REG (SImode, REGNO (operands[0]));
+
+      if (reg_overlap_mentioned_p (testlow, XEXP (operands[1], 0))
+	  && reg_overlap_mentioned_p (latehalf[0], XEXP (operands[1], 0)))
+	{
+	  /* If both halves of dest are used in the src memory address,
+	     compute the address into latehalf of dest.
+	     Note that this can't happen if the dest is two data regs.  */
+	compadr:
+	  xops[0] = latehalf[0];
+	  xops[1] = XEXP (operands[1], 0);
+
+	  handle_compadr (xops);
+	  if (GET_MODE (operands[1]) == XFmode)
+	    {
+	      operands[1] = gen_rtx_MEM (XFmode, latehalf[0]);
+	      middlehalf[1] = adjust_address (operands[1], DImode, size - 8);
+	      latehalf[1] = adjust_address (operands[1], DImode, size - 4);
+	    }
+	  else
+	    {
+	      operands[1] = gen_rtx_MEM (DImode, latehalf[0]);
+	      latehalf[1] = adjust_address (operands[1], DImode, size - 4);
+	    }
+	}
+      else if (size == 12
+	       && reg_overlap_mentioned_p (middlehalf[0],
+					   XEXP (operands[1], 0)))
+	{
+	  /* Check for two regs used by both source and dest.
+	     Note that this can't happen if the dest is all data regs.
+	     It can happen if the dest is d6, d7, a0.
+	     But in that case, latehalf is an addr reg, so
+	     the code at compadr does ok.  */
+
+	  if (reg_overlap_mentioned_p (testlow, XEXP (operands[1], 0))
+	      || reg_overlap_mentioned_p (latehalf[0], XEXP (operands[1], 0)))
+	    goto compadr;
+
+	  /* JRV says this can't happen: */
+	  gcc_assert (!addreg0 && !addreg1);
+
+	  /* Only the middle reg conflicts; simply put it last.  */
+	  handle_movsi (operands);
+	  handle_movsi (latehalf);
+	  handle_movsi (middlehalf);
+
+	  return;
+	}
+      else if (reg_overlap_mentioned_p (testlow, XEXP (operands[1], 0)))
+	/* If the low half of dest is mentioned in the source memory
+	   address, the arrange to emit the move late half first.  */
+	dest_overlapped_low = 1;
+    }
+
+  /* If one or both operands autodecrementing,
+     do the two words, high-numbered first.  */
+
+  /* Likewise,  the first move would clobber the source of the second one,
+     do them in the other order.  This happens only for registers;
+     such overlap can't happen in memory unless the user explicitly
+     sets it up, and that is an undefined circumstance.  */
+
+  if (optype0 == PUSHOP || optype1 == PUSHOP
+      || (optype0 == REGOP && optype1 == REGOP
+	  && ((middlehalf[1] && REGNO (operands[0]) == REGNO (middlehalf[1]))
+	      || REGNO (operands[0]) == REGNO (latehalf[1])))
+      || dest_overlapped_low)
+    {
+      /* Make any unoffsettable addresses point at high-numbered word.  */
+      if (addreg0)
+	handle_reg_adjust (addreg0, size - 4);
+      if (addreg1)
+	handle_reg_adjust (addreg1, size - 4);
+
+      /* Do that word.  */
+      handle_movsi (latehalf);
+
+      /* Undo the adds we just did.  */
+      if (addreg0)
+	handle_reg_adjust (addreg0, -4);
+      if (addreg1)
+	handle_reg_adjust (addreg1, -4);
+
+      if (size == 12)
+	{
+	  handle_movsi (middlehalf);
+
+	  if (addreg0)
+	    handle_reg_adjust (addreg0, -4);
+	  if (addreg1)
+	    handle_reg_adjust (addreg1, -4);
+	}
+
+      /* Do low-numbered word.  */
+
+      handle_movsi (operands);
+      return;
+    }
+
+  /* Normal case: do the two words, low-numbered first.  */
+
+  m68k_final_prescan_insn (NULL, operands, 2);
+  handle_movsi (operands);
+
+  /* Do the middle one of the three words for long double */
+  if (size == 12)
+    {
+      if (addreg0)
+	handle_reg_adjust (addreg0, 4);
+      if (addreg1)
+	handle_reg_adjust (addreg1, 4);
+
+      m68k_final_prescan_insn (NULL, middlehalf, 2);
+      handle_movsi (middlehalf);
+    }
+
+  /* Make any unoffsettable addresses point at high-numbered word.  */
+  if (addreg0)
+    handle_reg_adjust (addreg0, 4);
+  if (addreg1)
+    handle_reg_adjust (addreg1, 4);
+
+  /* Do that word.  */
+  m68k_final_prescan_insn (NULL, latehalf, 2);
+  handle_movsi (latehalf);
+
+  /* Undo the adds we just did.  */
+  if (addreg0)
+    handle_reg_adjust (addreg0, -(size - 4));
+  if (addreg1)
+    handle_reg_adjust (addreg1, -(size - 4));
+
+  return;
+}
+
+/* Output assembler code to adjust REG by N.  */
+static void
+output_reg_adjust (rtx reg, int n)
+{
+  const char *s;
+
+  gcc_assert (GET_MODE (reg) == SImode
+	      && -12 <= n && n != 0 && n <= 12);
+
+  switch (n)
+    {
+    case 12:
+      s = "add%.l #12,%0";
+      break;
+
+    case 8:
+      s = "addq%.l #8,%0";
+      break;
+
+    case 4:
+      s = "addq%.l #4,%0";
+      break;
+
+    case -12:
+      s = "sub%.l #12,%0";
+      break;
+
+    case -8:
+      s = "subq%.l #8,%0";
+      break;
+
+    case -4:
+      s = "subq%.l #4,%0";
+      break;
+
+    default:
+      gcc_unreachable ();
+      s = NULL;
+    }
+
+  output_asm_insn (s, &reg);
+}
+
+/* Emit rtl code to adjust REG by N.  */
+static void
+emit_reg_adjust (rtx reg1, int n)
+{
+  rtx reg2;
+
+  gcc_assert (GET_MODE (reg1) == SImode
+	      && -12 <= n && n != 0 && n <= 12);
+
+  reg1 = copy_rtx (reg1);
+  reg2 = copy_rtx (reg1);
+
+  if (n < 0)
+    emit_insn (gen_subsi3 (reg1, reg2, GEN_INT (-n)));
+  else if (n > 0)
+    emit_insn (gen_addsi3 (reg1, reg2, GEN_INT (n)));
+  else
+    gcc_unreachable ();
+}
+
+/* Output assembler to load address OPERANDS[0] to register OPERANDS[1].  */
+static void
+output_compadr (rtx operands[2])
+{
+  output_asm_insn ("lea %a1,%0", operands);
+}
+
+/* Output the best assembler insn for moving operands[1] into operands[0]
+   as a fullword.  */
+static void
+output_movsi (rtx operands[2])
+{
+  output_asm_insn (singlemove_string (operands), operands);
+}
+
+/* Copy OP and change its mode to MODE.  */
+static rtx
+copy_operand (rtx op, enum machine_mode mode)
+{
+  /* ??? This looks really ugly.  There must be a better way
+     to change a mode on the operand.  */
+  if (GET_MODE (op) != VOIDmode)
+    {
+      if (REG_P (op))
+	op = gen_rtx_REG (mode, REGNO (op));
+      else
+	{
+	  op = copy_rtx (op);
+	  PUT_MODE (op, mode);
+	}
+    }
+
+  return op;
+}
+
+/* Emit rtl code for moving operands[1] into operands[0] as a fullword.  */
+static void
+emit_movsi (rtx operands[2])
+{
+  operands[0] = copy_operand (operands[0], SImode);
+  operands[1] = copy_operand (operands[1], SImode);
+
+  emit_insn (gen_movsi (operands[0], operands[1]));
+}
+
+/* Output assembler code to perform a doubleword move insn
+   with operands OPERANDS.  */
+const char *
+output_move_double (rtx *operands)
+{
+  handle_move_double (operands,
+		      output_reg_adjust, output_compadr, output_movsi);
+
+  return "";
+}
+
+/* Output rtl code to perform a doubleword move insn
+   with operands OPERANDS.  */
+void
+m68k_emit_move_double (rtx operands[2])
+{
+  handle_move_double (operands, emit_reg_adjust, emit_movsi, emit_movsi);
+}
+
+/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
+   new rtx with the correct mode.  */
+
+static rtx
+force_mode (enum machine_mode mode, rtx orig)
+{
+  if (mode == GET_MODE (orig))
+    return orig;
+
+  if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
+    abort ();
+
+  return gen_rtx_REG (mode, REGNO (orig));
+}
+
+static int
+fp_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return reg_renumber && FP_REG_P (op);
+}
+
+/* Emit insns to move operands[1] into operands[0].
+
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.
+
+   Note SCRATCH_REG may not be in the proper mode depending on how it
+   will be used.  This routine is responsible for creating a new copy
+   of SCRATCH_REG in the proper mode.  */
+
+int
+emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
+{
+  register rtx operand0 = operands[0];
+  register rtx operand1 = operands[1];
+  register rtx tem;
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand0) == REG
+      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+    operand0 = reg_equiv_mem[REGNO (operand0)];
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand0) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand0)) == REG
+	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
+				 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
+				 SUBREG_BYTE (operand0));
+      operand0 = alter_subreg (&temp);
+    }
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand1) == REG
+      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
+    operand1 = reg_equiv_mem[REGNO (operand1)];
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand1) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand1)) == REG
+	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
+				 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
+				 SUBREG_BYTE (operand1));
+      operand1 = alter_subreg (&temp);
+    }
+
+  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+      && ((tem = find_replacement (&XEXP (operand0, 0)))
+	  != XEXP (operand0, 0)))
+    operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
+  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+      && ((tem = find_replacement (&XEXP (operand1, 0)))
+	  != XEXP (operand1, 0)))
+    operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
+
+  /* Handle secondary reloads for loads/stores of FP registers where
+     the address is symbolic by using the scratch register */
+  if (fp_reg_operand (operand0, mode)
+      && ((GET_CODE (operand1) == MEM
+	   && ! memory_address_p (DFmode, XEXP (operand1, 0)))
+	  || ((GET_CODE (operand1) == SUBREG
+	       && GET_CODE (XEXP (operand1, 0)) == MEM
+	       && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
+      && scratch_reg)
+    {
+      if (GET_CODE (operand1) == SUBREG)
+	operand1 = XEXP (operand1, 0);
+
+      /* SCRATCH_REG will hold an address.  We want
+	 it in SImode regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (SImode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (!memory_address_p (Pmode, XEXP (operand1, 0)))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
+						       Pmode,
+						       XEXP (XEXP (operand1, 0), 0),
+						       scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand1, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, operand0,
+			      gen_rtx_MEM (mode, scratch_reg)));
+      return 1;
+    }
+  else if (fp_reg_operand (operand1, mode)
+	   && ((GET_CODE (operand0) == MEM
+		&& ! memory_address_p (DFmode, XEXP (operand0, 0)))
+	       || ((GET_CODE (operand0) == SUBREG)
+		   && GET_CODE (XEXP (operand0, 0)) == MEM
+		   && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
+	   && scratch_reg)
+    {
+      if (GET_CODE (operand0) == SUBREG)
+	operand0 = XEXP (operand0, 0);
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in SIMODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (SImode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (!memory_address_p (Pmode, XEXP (operand0, 0)))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
+								        0)),
+						       Pmode,
+						       XEXP (XEXP (operand0, 0),
+								   0),
+						       scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand0, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg),
+			      operand1));
+      return 1;
+    }
+  /* Handle secondary reloads for loads of FP registers from constant
+     expressions by forcing the constant into memory.
+
+     use scratch_reg to hold the address of the memory location.
+
+     The proper fix is to change PREFERRED_RELOAD_CLASS to return
+     NO_REGS when presented with a const_int and an register class
+     containing only FP registers.  Doing so unfortunately creates
+     more problems than it solves.   Fix this for 2.5.  */
+  else if (fp_reg_operand (operand0, mode)
+	   && CONSTANT_P (operand1)
+	   && scratch_reg)
+    {
+      rtx xoperands[2];
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in SIMODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (SImode, scratch_reg);
+
+      /* Force the constant into memory and put the address of the
+	 memory location into scratch_reg.  */
+      xoperands[0] = scratch_reg;
+      xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
+      emit_insn (gen_rtx_SET (mode, scratch_reg, xoperands[1]));
+
+      /* Now load the destination register.  */
+      emit_insn (gen_rtx_SET (mode, operand0,
+			      gen_rtx_MEM (mode, scratch_reg)));
+      return 1;
+    }
+
+  /* Now have insn-emit do whatever it normally does.  */
+  return 0;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+   references.  The RTL can be REG, offsettable MEM, integer constant, or
+   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
+   split and "num" is its length.  lo_half and hi_half are output arrays
+   that parallel "operands".  */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+  while (num--)
+    {
+      rtx op = operands[num];
+
+      /* simplify_subreg refuses to split volatile memory addresses,
+	 but we still have to handle it.  */
+      if (GET_CODE (op) == MEM)
+	{
+	  lo_half[num] = adjust_address (op, SImode, 4);
+	  hi_half[num] = adjust_address (op, SImode, 0);
+	}
+      else
+	{
+	  lo_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 4);
+	  hi_half[num] = simplify_gen_subreg (SImode, op,
+					      GET_MODE (op) == VOIDmode
+					      ? DImode : GET_MODE (op), 0);
+	}
+    }
+}
+
+/* Split X into a base and a constant offset, storing them in *BASE
+   and *OFFSET respectively.  */
+
+static void
+m68k_split_offset (rtx x, rtx *base, HOST_WIDE_INT *offset)
+{
+  *offset = 0;
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      *offset += INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+    }
+  *base = x;
+}
+
+/* Return true if PATTERN is a PARALLEL suitable for a movem or fmovem
+   instruction.  STORE_P says whether the move is a load or store.
+
+   If the instruction uses post-increment or pre-decrement addressing,
+   AUTOMOD_BASE is the base register and AUTOMOD_OFFSET is the total
+   adjustment.  This adjustment will be made by the first element of
+   PARALLEL, with the loads or stores starting at element 1.  If the
+   instruction does not use post-increment or pre-decrement addressing,
+   AUTOMOD_BASE is null, AUTOMOD_OFFSET is 0, and the loads or stores
+   start at element 0.  */
+
+bool
+m68k_movem_pattern_p (rtx pattern, rtx automod_base,
+		      HOST_WIDE_INT automod_offset, bool store_p)
+{
+  rtx base, mem_base, set, mem, reg, last_reg;
+  HOST_WIDE_INT offset, mem_offset;
+  int i, first, len;
+  enum reg_class rclass;
+
+  len = XVECLEN (pattern, 0);
+  first = (automod_base != NULL);
+
+  if (automod_base)
+    {
+      /* Stores must be pre-decrement and loads must be post-increment.  */
+      if (store_p != (automod_offset < 0))
+	return false;
+
+      /* Work out the base and offset for lowest memory location.  */
+      base = automod_base;
+      offset = (automod_offset < 0 ? automod_offset : 0);
+    }
+  else
+    {
+      /* Allow any valid base and offset in the first access.  */
+      base = NULL;
+      offset = 0;
+    }
+
+  last_reg = NULL;
+  rclass = NO_REGS;
+  for (i = first; i < len; i++)
+    {
+      /* We need a plain SET.  */
+      set = XVECEXP (pattern, 0, i);
+      if (GET_CODE (set) != SET)
+	return false;
+
+      /* Check that we have a memory location...  */
+      mem = XEXP (set, !store_p);
+      if (!MEM_P (mem) || !memory_operand (mem, VOIDmode))
+	return false;
+
+      /* ...with the right address.  */
+      if (base == NULL)
+	{
+	  m68k_split_offset (XEXP (mem, 0), &base, &offset);
+	  /* The ColdFire instruction only allows (An) and (d16,An) modes.
+	     There are no mode restrictions for 680x0 besides the
+	     automodification rules enforced above.  */
+	  if (TARGET_COLDFIRE
+	      && !m68k_legitimate_base_reg_p (base, reload_completed))
+	    return false;
+	}
+      else
+	{
+	  m68k_split_offset (XEXP (mem, 0), &mem_base, &mem_offset);
+	  if (!rtx_equal_p (base, mem_base) || offset != mem_offset)
+	    return false;
+	}
+
+      /* Check that we have a register of the required mode and class.  */
+      reg = XEXP (set, store_p);
+      if (!REG_P (reg)
+	  || !HARD_REGISTER_P (reg)
+	  || GET_MODE (reg) != reg_raw_mode[REGNO (reg)])
+	return false;
+
+      if (last_reg)
+	{
+	  /* The register must belong to RCLASS and have a higher number
+	     than the register in the previous SET.  */
+	  if (!TEST_HARD_REG_BIT (reg_class_contents[rclass], REGNO (reg))
+	      || REGNO (last_reg) >= REGNO (reg))
+	    return false;
+	}
+      else
+	{
+	  /* Work out which register class we need.  */
+	  if (INT_REGNO_P (REGNO (reg)))
+	    rclass = GENERAL_REGS;
+	  else if (FP_REGNO_P (REGNO (reg)))
+	    rclass = FP_REGS;
+	  else
+	    return false;
+	}
+
+      last_reg = reg;
+      offset += GET_MODE_SIZE (GET_MODE (reg));
+    }
+
+  /* If we have an automodification, check whether the final offset is OK.  */
+  if (automod_base && offset != (automod_offset < 0 ? 0 : automod_offset))
+    return false;
+
+  /* Reject unprofitable cases.  */
+  if (len < first + (rclass == FP_REGS ? MIN_FMOVEM_REGS : MIN_MOVEM_REGS))
+    return false;
+
+  return true;
+}
+
+/* Return the assembly code template for a movem or fmovem instruction
+   whose pattern is given by PATTERN.  Store the template's operands
+   in OPERANDS.
+
+   If the instruction uses post-increment or pre-decrement addressing,
+   AUTOMOD_OFFSET is the total adjustment, otherwise it is 0.  STORE_P
+   is true if this is a store instruction.  */
+
+const char *
+m68k_output_movem (rtx *operands, rtx pattern,
+		   HOST_WIDE_INT automod_offset, bool store_p)
+{
+  unsigned int mask;
+  int i, first;
+
+  gcc_assert (GET_CODE (pattern) == PARALLEL);
+  mask = 0;
+  first = (automod_offset != 0);
+  for (i = first; i < XVECLEN (pattern, 0); i++)
+    {
+      /* When using movem with pre-decrement addressing, register X + D0_REG
+	 is controlled by bit 15 - X.  For all other addressing modes,
+	 register X + D0_REG is controlled by bit X.  Confusingly, the
+	 register mask for fmovem is in the opposite order to that for
+	 movem.  */
+      unsigned int regno;
+
+      gcc_assert (MEM_P (XEXP (XVECEXP (pattern, 0, i), !store_p)));
+      gcc_assert (REG_P (XEXP (XVECEXP (pattern, 0, i), store_p)));
+      regno = REGNO (XEXP (XVECEXP (pattern, 0, i), store_p));
+      if (automod_offset < 0)
+	{
+	  if (FP_REGNO_P (regno))
+	    mask |= 1 << (regno - FP0_REG);
+	  else
+	    mask |= 1 << (15 - (regno - D0_REG));
+	}
+      else
+	{
+	  if (FP_REGNO_P (regno))
+	    mask |= 1 << (7 - (regno - FP0_REG));
+	  else
+	    mask |= 1 << (regno - D0_REG);
+	}
+    }
+  CC_STATUS_INIT;
+
+  if (automod_offset == 0)
+    operands[0] = XEXP (XEXP (XVECEXP (pattern, 0, first), !store_p), 0);
+  else if (automod_offset < 0)
+    operands[0] = gen_rtx_PRE_DEC (Pmode, SET_DEST (XVECEXP (pattern, 0, 0)));
+  else
+    operands[0] = gen_rtx_POST_INC (Pmode, SET_DEST (XVECEXP (pattern, 0, 0)));
+  operands[1] = GEN_INT (mask);
+  if (FP_REGNO_P (REGNO (XEXP (XVECEXP (pattern, 0, first), store_p))))
+    {
+      if (store_p)
+	return "fmovem %1,%a0";
+      else
+	return "fmovem %a0,%1";
+    }
+  else
+    {
+      if (store_p)
+	return "movem%.l %1,%a0";
+      else
+	return "movem%.l %a0,%1";
+    }
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+   ADDR can be effectively incremented by incrementing REG.  */
+
+static rtx
+find_addr_reg (rtx addr)
+{
+  while (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	addr = XEXP (addr, 0);
+      else if (GET_CODE (XEXP (addr, 1)) == REG)
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 0)))
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 1)))
+	addr = XEXP (addr, 0);
+      else
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (addr) == REG);
+  return addr;
+}
+
+/* Output assembler code to perform a 32-bit 3-operand add.  */
+
+const char *
+output_addsi3 (rtx *operands)
+{
+  if (! operands_match_p (operands[0], operands[1]))
+    {
+      if (!ADDRESS_REG_P (operands[1]))
+	{
+	  rtx tmp = operands[1];
+
+	  operands[1] = operands[2];
+	  operands[2] = tmp;
+	}
+
+      /* These insns can result from reloads to access
+	 stack slots over 64k from the frame pointer.  */
+      if (GET_CODE (operands[2]) == CONST_INT
+	  && (INTVAL (operands[2]) < -32768 || INTVAL (operands[2]) > 32767))
+        return "move%.l %2,%0\n\tadd%.l %1,%0";
+      if (GET_CODE (operands[2]) == REG)
+	return MOTOROLA ? "lea (%1,%2.l),%0" : "lea %1@(0,%2:l),%0";
+      return MOTOROLA ? "lea (%c2,%1),%0" : "lea %1@(%c2),%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) > 0
+	  && INTVAL (operands[2]) <= 8)
+	return "addq%.l %2,%0";
+      if (INTVAL (operands[2]) < 0
+	  && INTVAL (operands[2]) >= -8)
+        {
+	  operands[2] = GEN_INT (- INTVAL (operands[2]));
+	  return "subq%.l %2,%0";
+	}
+      /* On the CPU32 it is faster to use two addql instructions to
+	 add a small integer (8 < N <= 16) to a register.
+	 Likewise for subql.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[2]) > 8
+	      && INTVAL (operands[2]) <= 16)
+	    {
+	      operands[2] = GEN_INT (INTVAL (operands[2]) - 8);
+	      return "addq%.l #8,%0\n\taddq%.l %2,%0";
+	    }
+	  if (INTVAL (operands[2]) < -8
+	      && INTVAL (operands[2]) >= -16)
+	    {
+	      operands[2] = GEN_INT (- INTVAL (operands[2]) - 8);
+	      return "subq%.l #8,%0\n\tsubq%.l %2,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0])
+	  && INTVAL (operands[2]) >= -0x8000
+	  && INTVAL (operands[2]) < 0x8000)
+	{
+	  if (TUNE_68040)
+	    return "add%.w %2,%0";
+	  else
+	    return MOTOROLA ? "lea (%c2,%0),%0" : "lea %0@(%c2),%0";
+	}
+    }
+  return "add%.l %2,%0";
+}
+
+/* Store in cc_status the expressions that the condition codes will
+   describe after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+/* On the 68000, all the insns to store in an address register fail to
+   set the cc's.  However, in some cases these instructions can make it
+   possibly invalid to use the saved cc's.  In those cases we clear out
+   some or all of the saved cc's so they won't be used.  */
+
+void
+notice_update_cc (rtx exp, rtx insn)
+{
+  if (GET_CODE (exp) == SET)
+    {
+      if (GET_CODE (SET_SRC (exp)) == CALL)
+	CC_STATUS_INIT; 
+      else if (ADDRESS_REG_P (SET_DEST (exp)))
+	{
+	  if (cc_status.value1 && modified_in_p (cc_status.value1, insn))
+	    cc_status.value1 = 0;
+	  if (cc_status.value2 && modified_in_p (cc_status.value2, insn))
+	    cc_status.value2 = 0; 
+	}
+      /* fmoves to memory or data registers do not set the condition
+	 codes.  Normal moves _do_ set the condition codes, but not in
+	 a way that is appropriate for comparison with 0, because -0.0
+	 would be treated as a negative nonzero number.  Note that it
+	 isn't appropriate to conditionalize this restriction on
+	 HONOR_SIGNED_ZEROS because that macro merely indicates whether
+	 we care about the difference between -0.0 and +0.0.  */
+      else if (!FP_REG_P (SET_DEST (exp))
+	       && SET_DEST (exp) != cc0_rtx
+	       && (FP_REG_P (SET_SRC (exp))
+		   || GET_CODE (SET_SRC (exp)) == FIX
+		   || FLOAT_MODE_P (GET_MODE (SET_DEST (exp)))))
+	CC_STATUS_INIT; 
+      /* A pair of move insns doesn't produce a useful overall cc.  */
+      else if (!FP_REG_P (SET_DEST (exp))
+	       && !FP_REG_P (SET_SRC (exp))
+	       && GET_MODE_SIZE (GET_MODE (SET_SRC (exp))) > 4
+	       && (GET_CODE (SET_SRC (exp)) == REG
+		   || GET_CODE (SET_SRC (exp)) == MEM
+		   || GET_CODE (SET_SRC (exp)) == CONST_DOUBLE))
+	CC_STATUS_INIT; 
+      else if (SET_DEST (exp) != pc_rtx)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = SET_DEST (exp);
+	  cc_status.value2 = SET_SRC (exp);
+	}
+    }
+  else if (GET_CODE (exp) == PARALLEL
+	   && GET_CODE (XVECEXP (exp, 0, 0)) == SET)
+    {
+      rtx dest = SET_DEST (XVECEXP (exp, 0, 0));
+      rtx src  = SET_SRC  (XVECEXP (exp, 0, 0));
+
+      if (ADDRESS_REG_P (dest))
+	CC_STATUS_INIT;
+      else if (dest != pc_rtx)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = dest;
+	  cc_status.value2 = src;
+	}
+    }
+  else
+    CC_STATUS_INIT;
+  if (cc_status.value2 != 0
+      && ADDRESS_REG_P (cc_status.value2)
+      && GET_MODE (cc_status.value2) == QImode)
+    CC_STATUS_INIT;
+  if (cc_status.value2 != 0)
+    switch (GET_CODE (cc_status.value2))
+      {
+      case ASHIFT: case ASHIFTRT: case LSHIFTRT:
+      case ROTATE: case ROTATERT:
+	/* These instructions always clear the overflow bit, and set
+	   the carry to the bit shifted out.  */
+	cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
+	break;
+
+      case PLUS: case MINUS: case MULT:
+      case DIV: case UDIV: case MOD: case UMOD: case NEG:
+	if (GET_MODE (cc_status.value2) != VOIDmode)
+	  cc_status.flags |= CC_NO_OVERFLOW;
+	break;
+      case ZERO_EXTEND:
+	/* (SET r1 (ZERO_EXTEND r2)) on this machine
+	   ends with a move insn moving r2 in r2's mode.
+	   Thus, the cc's are set for r2.
+	   This can set N bit spuriously.  */
+	cc_status.flags |= CC_NOT_NEGATIVE; 
+
+      default:
+	break;
+      }
+  if (cc_status.value1 && GET_CODE (cc_status.value1) == REG
+      && cc_status.value2
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+    cc_status.value2 = 0;
+  if (((cc_status.value1 && FP_REG_P (cc_status.value1))
+       || (cc_status.value2 && FP_REG_P (cc_status.value2))))
+    cc_status.flags = CC_IN_68881;
+  if (cc_status.value2 && GET_CODE (cc_status.value2) == COMPARE
+      && GET_MODE_CLASS (GET_MODE (XEXP (cc_status.value2, 0))) == MODE_FLOAT)
+    {
+      cc_status.flags = CC_IN_68881;
+      if (!FP_REG_P (XEXP (cc_status.value2, 0)))
+	cc_status.flags |= CC_REVERSED;
+    }
+}
+
+const char *
+output_move_const_double (rtx *operands)
+{
+  int code = standard_68881_constant_p (operands[1]);
+
+  if (code != 0)
+    {
+      static char buf[40];
+
+      sprintf (buf, "fmovecr #0x%x,%%0", code & 0xff);
+      return buf;
+    }
+  return "fmove%.d %1,%0";
+}
+
+const char *
+output_move_const_single (rtx *operands)
+{
+  int code = standard_68881_constant_p (operands[1]);
+
+  if (code != 0)
+    {
+      static char buf[40];
+
+      sprintf (buf, "fmovecr #0x%x,%%0", code & 0xff);
+      return buf;
+    }
+  return "fmove%.s %f1,%0";
+}
+
+/* Return nonzero if X, a CONST_DOUBLE, has a value that we can get
+   from the "fmovecr" instruction.
+   The value, anded with 0xff, gives the code to use in fmovecr
+   to get the desired constant.  */
+
+/* This code has been fixed for cross-compilation.  */
+  
+static int inited_68881_table = 0;
+
+static const char *const strings_68881[7] = {
+  "0.0",
+  "1.0",
+  "10.0",
+  "100.0",
+  "10000.0",
+  "1e8",
+  "1e16"
+};
+
+static const int codes_68881[7] = {
+  0x0f,
+  0x32,
+  0x33,
+  0x34,
+  0x35,
+  0x36,
+  0x37
+};
+
+REAL_VALUE_TYPE values_68881[7];
+
+/* Set up values_68881 array by converting the decimal values
+   strings_68881 to binary.  */
+
+void
+init_68881_table (void)
+{
+  int i;
+  REAL_VALUE_TYPE r;
+  enum machine_mode mode;
+
+  mode = SFmode;
+  for (i = 0; i < 7; i++)
+    {
+      if (i == 6)
+        mode = DFmode;
+      r = REAL_VALUE_ATOF (strings_68881[i], mode);
+      values_68881[i] = r;
+    }
+  inited_68881_table = 1;
+}
+
+int
+standard_68881_constant_p (rtx x)
+{
+  REAL_VALUE_TYPE r;
+  int i;
+
+  /* fmovecr must be emulated on the 68040 and 68060, so it shouldn't be
+     used at all on those chips.  */
+  if (TUNE_68040_60)
+    return 0;
+
+  if (! inited_68881_table)
+    init_68881_table ();
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* Use REAL_VALUES_IDENTICAL instead of REAL_VALUES_EQUAL so that -0.0
+     is rejected.  */
+  for (i = 0; i < 6; i++)
+    {
+      if (REAL_VALUES_IDENTICAL (r, values_68881[i]))
+        return (codes_68881[i]);
+    }
+  
+  if (GET_MODE (x) == SFmode)
+    return 0;
+
+  if (REAL_VALUES_EQUAL (r, values_68881[6]))
+    return (codes_68881[6]);
+
+  /* larger powers of ten in the constants ram are not used
+     because they are not equal to a `double' C constant.  */
+  return 0;
+}
+
+/* If X is a floating-point constant, return the logarithm of X base 2,
+   or 0 if X is not a power of 2.  */
+
+int
+floating_exact_log2 (rtx x)
+{
+  REAL_VALUE_TYPE r, r1;
+  int exp;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  if (REAL_VALUES_LESS (r, dconst1))
+    return 0;
+
+  exp = real_exponent (&r);
+  real_2expN (&r1, exp, DFmode);
+  if (REAL_VALUES_EQUAL (r1, r))
+    return exp;
+
+  return 0;
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  X is an RTL
+   expression.
+
+   CODE is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  CODE
+   comes from the `%' specification that was used to request
+   printing of the operand.  If the specification was just `%DIGIT'
+   then CODE is 0; if the specification was `%LTR DIGIT' then CODE
+   is the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.
+   The names can be found in an array `reg_names' whose type is
+   `char *[]'.  `reg_names' is initialized from `REGISTER_NAMES'.
+
+   When the machine description has a specification `%PUNCT' (a `%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for CODE.
+
+   The m68k specific codes are:
+
+   '.' for dot needed in Motorola-style opcode names.
+   '-' for an operand pushing on the stack:
+       sp@-, -(sp) or -(%sp) depending on the style of syntax.
+   '+' for an operand pushing on the stack:
+       sp@+, (sp)+ or (%sp)+ depending on the style of syntax.
+   '@' for a reference to the top word on the stack:
+       sp@, (sp) or (%sp) depending on the style of syntax.
+   '#' for an immediate operand prefix (# in MIT and Motorola syntax
+       but & in SGS syntax).
+   '!' for the cc register (used in an `and to cc' insn).
+   '$' for the letter `s' in an op code, but only on the 68040.
+   '&' for the letter `d' in an op code, but only on the 68040.
+   '/' for register prefix needed by longlong.h.
+   '?' for m68k_library_id_string
+
+   'b' for byte insn (no effect, on the Sun; this is for the ISI).
+   'd' to force memory addressing to be absolute, not relative.
+   'f' for float insn (print a CONST_DOUBLE as a float rather than in hex)
+   'x' for float insn (print a CONST_DOUBLE as a float rather than in hex),
+       or print pair of registers as rx:ry.
+   'p' print an address with @PLTPC attached, but only if the operand
+       is not locally-bound.  */
+
+void
+print_operand (FILE *file, rtx op, int letter)
+{
+  if (letter == '.')
+    {
+      if (MOTOROLA)
+	fprintf (file, ".");
+    }
+  else if (letter == '#')
+    asm_fprintf (file, "%I");
+  else if (letter == '-')
+    asm_fprintf (file, MOTOROLA ? "-(%Rsp)" : "%Rsp@-");
+  else if (letter == '+')
+    asm_fprintf (file, MOTOROLA ? "(%Rsp)+" : "%Rsp@+");
+  else if (letter == '@')
+    asm_fprintf (file, MOTOROLA ? "(%Rsp)" : "%Rsp@");
+  else if (letter == '!')
+    asm_fprintf (file, "%Rfpcr");
+  else if (letter == '$')
+    {
+      if (TARGET_68040)
+	fprintf (file, "s");
+    }
+  else if (letter == '&')
+    {
+      if (TARGET_68040)
+	fprintf (file, "d");
+    }
+  else if (letter == '/')
+    asm_fprintf (file, "%R");
+  else if (letter == '?')
+    asm_fprintf (file, m68k_library_id_string);
+  else if (letter == 'p')
+    {
+      output_addr_const (file, op);
+      if (!(GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (op)))
+	fprintf (file, "@PLTPC");
+    }
+  else if (GET_CODE (op) == REG)
+    {
+      if (letter == 'R')
+	/* Print out the second register name of a register pair.
+	   I.e., R (6) => 7.  */
+	fputs (M68K_REGNAME(REGNO (op) + 1), file);
+      else
+	fputs (M68K_REGNAME(REGNO (op)), file);
+    }
+  else if (GET_CODE (op) == MEM)
+    {
+      output_address (XEXP (op, 0));
+      if (letter == 'd' && ! TARGET_68020
+	  && CONSTANT_ADDRESS_P (XEXP (op, 0))
+	  && !(GET_CODE (XEXP (op, 0)) == CONST_INT
+	       && INTVAL (XEXP (op, 0)) < 0x8000
+	       && INTVAL (XEXP (op, 0)) >= -0x8000))
+	fprintf (file, MOTOROLA ? ".l" : ":l");
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == SFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l;
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+      asm_fprintf (file, "%I0x%lx", l & 0xFFFFFFFF);
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == XFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l[3];
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+      asm_fprintf (file, "%I0x%lx%08lx%08lx", l[0] & 0xFFFFFFFF,
+		   l[1] & 0xFFFFFFFF, l[2] & 0xFFFFFFFF);
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == DFmode)
+    {
+      REAL_VALUE_TYPE r;
+      long l[2];
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+      asm_fprintf (file, "%I0x%lx%08lx", l[0] & 0xFFFFFFFF, l[1] & 0xFFFFFFFF);
+    }
+  else
+    {
+      /* Use `print_operand_address' instead of `output_addr_const'
+	 to ensure that we print relevant PIC stuff.  */
+      asm_fprintf (file, "%I");
+      if (TARGET_PCREL
+	  && (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST))
+	print_operand_address (file, op);
+      else
+	output_addr_const (file, op);
+    }
+}
+
+/* Return string for TLS relocation RELOC.  */
+
+static const char *
+m68k_get_reloc_decoration (enum m68k_reloc reloc)
+{
+  /* To my knowledge, !MOTOROLA assemblers don't support TLS.  */
+  gcc_assert (MOTOROLA || reloc == RELOC_GOT);
+
+  switch (reloc)
+    {
+    case RELOC_GOT:
+      if (MOTOROLA)
+	{
+	  if (flag_pic == 1 && TARGET_68020)
+	    return "@GOT.w";
+	  else
+	    return "@GOT";
+	}
+      else
+	{
+	  if (TARGET_68020)
+	    {
+	      switch (flag_pic)
+		{
+		case 1:
+		  return ":w";
+		case 2:
+		  return ":l";
+		default:
+		  return "";
+		}
+	    }
+	}
+
+    case RELOC_TLSGD:
+      return "@TLSGD";
+
+    case RELOC_TLSLDM:
+      return "@TLSLDM";
+
+    case RELOC_TLSLDO:
+      return "@TLSLDO";
+
+    case RELOC_TLSIE:
+      return "@TLSIE";
+
+    case RELOC_TLSLE:
+      return "@TLSLE";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* m68k implementation of OUTPUT_ADDR_CONST_EXTRA.  */
+
+bool
+m68k_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_RELOC16:
+	case UNSPEC_RELOC32:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs (m68k_get_reloc_decoration
+		 ((enum m68k_reloc) INTVAL (XVECEXP (x, 0, 1))), file);
+	  return true;
+
+	default:
+	  break;
+	}
+    }
+
+  return false;
+}
+
+/* M68K implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void
+m68k_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  gcc_assert (size == 4);
+  fputs ("\t.long\t", file);
+  output_addr_const (file, x);
+  fputs ("@TLSLDO+0x8000", file);
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+m68k_delegitimize_address (rtx orig_x)
+{
+  rtx x;
+  struct m68k_address addr;
+  rtx unspec;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
+    return orig_x;
+
+  if (!m68k_decompose_address (GET_MODE (x), x, false, &addr)
+      || addr.offset == NULL_RTX
+      || GET_CODE (addr.offset) != CONST)
+    return orig_x;
+
+  unspec = XEXP (addr.offset, 0);
+  if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
+    unspec = XEXP (unspec, 0);
+  if (GET_CODE (unspec) != UNSPEC 
+      || (XINT (unspec, 1) != UNSPEC_RELOC16
+	  && XINT (unspec, 1) != UNSPEC_RELOC32))
+    return orig_x;
+  x = XVECEXP (unspec, 0, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF);
+  if (unspec != XEXP (addr.offset, 0))
+    x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.offset, 0), 1));
+  if (addr.index)
+    {
+      rtx idx = addr.index;
+      if (addr.scale != 1)
+	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
+      x = gen_rtx_PLUS (Pmode, idx, x);
+    }
+  if (addr.base)
+    x = gen_rtx_PLUS (Pmode, addr.base, x);
+  if (MEM_P (orig_x))
+    x = replace_equiv_address_nv (orig_x, x);
+  return x;
+}
+  
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   Note that this contains a kludge that knows that the only reason
+   we have an address (plus (label_ref...) (reg...)) when not generating
+   PIC code is in the insn before a tablejump, and we know that m68k.md
+   generates a label LInnn: on such an insn.
+
+   It is possible for PIC to generate a (plus (label_ref...) (reg...))
+   and we handle that just like we would a (plus (symbol_ref...) (reg...)).
+
+   This routine is responsible for distinguishing between -fpic and -fPIC 
+   style relocations in an address.  When generating -fpic code the
+   offset is output in word mode (e.g. movel a5@(_foo:w), a0).  When generating
+   -fPIC code the offset is output in long mode (e.g. movel a5@(_foo:l), a0) */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  struct m68k_address address;
+
+  if (!m68k_decompose_address (QImode, addr, true, &address))
+    gcc_unreachable ();
+
+  if (address.code == PRE_DEC)
+    fprintf (file, MOTOROLA ? "-(%s)" : "%s@-",
+	     M68K_REGNAME (REGNO (address.base)));
+  else if (address.code == POST_INC)
+    fprintf (file, MOTOROLA ? "(%s)+" : "%s@+",
+	     M68K_REGNAME (REGNO (address.base)));
+  else if (!address.base && !address.index)
+    {
+      /* A constant address.  */
+      gcc_assert (address.offset == addr);
+      if (GET_CODE (addr) == CONST_INT)
+	{
+	  /* (xxx).w or (xxx).l.  */
+	  if (IN_RANGE (INTVAL (addr), -0x8000, 0x7fff))
+	    fprintf (file, MOTOROLA ? "%d.w" : "%d:w", (int) INTVAL (addr));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
+	}
+      else if (TARGET_PCREL)
+	{
+	  /* (d16,PC) or (bd,PC,Xn) (with suppressed index register).  */
+	  fputc ('(', file);
+	  output_addr_const (file, addr);
+	  asm_fprintf (file, flag_pic == 1 ? ":w,%Rpc)" : ":l,%Rpc)");
+	}
+      else
+	{
+	  /* (xxx).l.  We need a special case for SYMBOL_REF if the symbol
+	     name ends in `.<letter>', as the last 2 characters can be
+	     mistaken as a size suffix.  Put the name in parentheses.  */
+	  if (GET_CODE (addr) == SYMBOL_REF
+	      && strlen (XSTR (addr, 0)) > 2
+	      && XSTR (addr, 0)[strlen (XSTR (addr, 0)) - 2] == '.')
+	    {
+	      putc ('(', file);
+	      output_addr_const (file, addr);
+	      putc (')', file);
+	    }
+	  else
+	    output_addr_const (file, addr);
+	}
+    }
+  else
+    {
+      int labelno;
+
+      /* If ADDR is a (d8,pc,Xn) address, this is the number of the
+	 label being accessed, otherwise it is -1.  */
+      labelno = (address.offset
+		 && !address.base
+		 && GET_CODE (address.offset) == LABEL_REF
+		 ? CODE_LABEL_NUMBER (XEXP (address.offset, 0))
+		 : -1);
+      if (MOTOROLA)
+	{
+	  /* Print the "offset(base" component.  */
+	  if (labelno >= 0)
+	    asm_fprintf (file, "%LL%d(%Rpc,", labelno);
+	  else
+	    {
+	      if (address.offset)
+		output_addr_const (file, address.offset);
+
+	      putc ('(', file);
+	      if (address.base)
+		fputs (M68K_REGNAME (REGNO (address.base)), file);
+	    }
+	  /* Print the ",index" component, if any.  */
+	  if (address.index)
+	    {
+	      if (address.base)
+		putc (',', file);
+	      fprintf (file, "%s.%c",
+		       M68K_REGNAME (REGNO (address.index)),
+		       GET_MODE (address.index) == HImode ? 'w' : 'l');
+	      if (address.scale != 1)
+		fprintf (file, "*%d", address.scale);
+	    }
+	  putc (')', file);
+	}
+      else /* !MOTOROLA */
+	{
+	  if (!address.offset && !address.index)
+	    fprintf (file, "%s@", M68K_REGNAME (REGNO (address.base)));
+	  else
+	    {
+	      /* Print the "base@(offset" component.  */
+	      if (labelno >= 0)
+		asm_fprintf (file, "%Rpc@(%LL%d", labelno);
+	      else
+		{
+		  if (address.base)
+		    fputs (M68K_REGNAME (REGNO (address.base)), file);
+		  fprintf (file, "@(");
+		  if (address.offset)
+		    output_addr_const (file, address.offset);
+		}
+	      /* Print the ",index" component, if any.  */
+	      if (address.index)
+		{
+		  fprintf (file, ",%s:%c",
+			   M68K_REGNAME (REGNO (address.index)),
+			   GET_MODE (address.index) == HImode ? 'w' : 'l');
+		  if (address.scale != 1)
+		    fprintf (file, ":%d", address.scale);
+		}
+	      putc (')', file);
+	    }
+	}
+    }
+}
+
+/* Check for cases where a clr insns can be omitted from code using
+   strict_low_part sets.  For example, the second clrl here is not needed:
+   clrl d0; movw a0@+,d0; use d0; clrl d0; movw a0@+; use d0; ...
+
+   MODE is the mode of this STRICT_LOW_PART set.  FIRST_INSN is the clear
+   insn we are checking for redundancy.  TARGET is the register set by the
+   clear insn.  */
+
+bool
+strict_low_part_peephole_ok (enum machine_mode mode, rtx first_insn,
+                             rtx target)
+{
+  rtx p = first_insn;
+
+  while ((p = PREV_INSN (p)))
+    {
+      if (NOTE_INSN_BASIC_BLOCK_P (p))
+	return false;
+
+      if (NOTE_P (p))
+	continue;
+
+      /* If it isn't an insn, then give up.  */
+      if (!INSN_P (p))
+	return false;
+
+      if (reg_set_p (target, p))
+	{
+	  rtx set = single_set (p);
+	  rtx dest;
+
+	  /* If it isn't an easy to recognize insn, then give up.  */
+	  if (! set)
+	    return false;
+
+	  dest = SET_DEST (set);
+
+	  /* If this sets the entire target register to zero, then our
+	     first_insn is redundant.  */
+	  if (rtx_equal_p (dest, target)
+	      && SET_SRC (set) == const0_rtx)
+	    return true;
+	  else if (GET_CODE (dest) == STRICT_LOW_PART
+		   && GET_CODE (XEXP (dest, 0)) == REG
+		   && REGNO (XEXP (dest, 0)) == REGNO (target)
+		   && (GET_MODE_SIZE (GET_MODE (XEXP (dest, 0)))
+		       <= GET_MODE_SIZE (mode)))
+	    /* This is a strict low part set which modifies less than
+	       we are using, so it is safe.  */
+	    ;
+	  else
+	    return false;
+	}
+    }
+
+  return false;
+}
+
+/* Operand predicates for implementing asymmetric pc-relative addressing
+   on m68k.  The m68k supports pc-relative addressing (mode 7, register 2)
+   when used as a source operand, but not as a destination operand.
+
+   We model this by restricting the meaning of the basic predicates
+   (general_operand, memory_operand, etc) to forbid the use of this
+   addressing mode, and then define the following predicates that permit
+   this addressing mode.  These predicates can then be used for the
+   source operands of the appropriate instructions.
+
+   n.b.  While it is theoretically possible to change all machine patterns
+   to use this addressing more where permitted by the architecture,
+   it has only been implemented for "common" cases: SImode, HImode, and
+   QImode operands, and only for the principle operations that would
+   require this addressing mode: data movement and simple integer operations.
+
+   In parallel with these new predicates, two new constraint letters
+   were defined: 'S' and 'T'.  'S' is the -mpcrel analog of 'm'.
+   'T' replaces 's' in the non-pcrel case.  It is a no-op in the pcrel case.
+   In the pcrel case 's' is only valid in combination with 'a' registers.
+   See addsi3, subsi3, cmpsi, and movsi patterns for a better understanding
+   of how these constraints are used.
+
+   The use of these predicates is strictly optional, though patterns that
+   don't will cause an extra reload register to be allocated where one
+   was not necessary:
+
+	lea (abc:w,%pc),%a0	; need to reload address
+	moveq &1,%d1		; since write to pc-relative space
+	movel %d1,%a0@		; is not allowed
+	...
+	lea (abc:w,%pc),%a1	; no need to reload address here
+	movel %a1@,%d0		; since "movel (abc:w,%pc),%d0" is ok
+
+   For more info, consult tiemann@cygnus.com.
+
+
+   All of the ugliness with predicates and constraints is due to the
+   simple fact that the m68k does not allow a pc-relative addressing
+   mode as a destination.  gcc does not distinguish between source and
+   destination addresses.  Hence, if we claim that pc-relative address
+   modes are valid, e.g. TARGET_LEGITIMATE_ADDRESS_P accepts them, then we
+   end up with invalid code.  To get around this problem, we left
+   pc-relative modes as invalid addresses, and then added special
+   predicates and constraints to accept them.
+
+   A cleaner way to handle this is to modify gcc to distinguish
+   between source and destination addresses.  We can then say that
+   pc-relative is a valid source address but not a valid destination
+   address, and hopefully avoid a lot of the predicate and constraint
+   hackery.  Unfortunately, this would be a pretty big change.  It would
+   be a useful change for a number of ports, but there aren't any current
+   plans to undertake this.
+
+   ***************************************************************************/
+
+
+const char *
+output_andsi3 (rtx *operands)
+{
+  int logval;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) | 0xffff) == -1
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0]))
+      && !TARGET_COLDFIRE)
+    {
+      if (GET_CODE (operands[0]) != REG)
+        operands[0] = adjust_address (operands[0], HImode, 2);
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffff);
+      /* Do not delete a following tstl %0 insn; that would be incorrect.  */
+      CC_STATUS_INIT;
+      if (operands[2] == const0_rtx)
+        return "clr%.w %0";
+      return "and%.w %2,%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (logval = exact_log2 (~ INTVAL (operands[2]) & 0xffffffff)) >= 0
+      && (DATA_REG_P (operands[0])
+          || offsettable_memref_p (operands[0])))
+    {
+      if (DATA_REG_P (operands[0]))
+	operands[1] = GEN_INT (logval);
+      else
+        {
+	  operands[0] = adjust_address (operands[0], SImode, 3 - (logval / 8));
+	  operands[1] = GEN_INT (logval % 8);
+        }
+      /* This does not set condition codes in a standard way.  */
+      CC_STATUS_INIT;
+      return "bclr %1,%0";
+    }
+  return "and%.l %2,%0";
+}
+
+const char *
+output_iorsi3 (rtx *operands)
+{
+  register int logval;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >> 16 == 0
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0]))
+      && !TARGET_COLDFIRE)
+    {
+      if (GET_CODE (operands[0]) != REG)
+        operands[0] = adjust_address (operands[0], HImode, 2);
+      /* Do not delete a following tstl %0 insn; that would be incorrect.  */
+      CC_STATUS_INIT;
+      if (INTVAL (operands[2]) == 0xffff)
+	return "mov%.w %2,%0";
+      return "or%.w %2,%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (logval = exact_log2 (INTVAL (operands[2]) & 0xffffffff)) >= 0
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0])))
+    {
+      if (DATA_REG_P (operands[0]))
+	operands[1] = GEN_INT (logval);
+      else
+        {
+	  operands[0] = adjust_address (operands[0], SImode, 3 - (logval / 8));
+	  operands[1] = GEN_INT (logval % 8);
+	}
+      CC_STATUS_INIT;
+      return "bset %1,%0";
+    }
+  return "or%.l %2,%0";
+}
+
+const char *
+output_xorsi3 (rtx *operands)
+{
+  register int logval;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) >> 16 == 0
+      && (offsettable_memref_p (operands[0]) || DATA_REG_P (operands[0]))
+      && !TARGET_COLDFIRE)
+    {
+      if (! DATA_REG_P (operands[0]))
+	operands[0] = adjust_address (operands[0], HImode, 2);
+      /* Do not delete a following tstl %0 insn; that would be incorrect.  */
+      CC_STATUS_INIT;
+      if (INTVAL (operands[2]) == 0xffff)
+	return "not%.w %0";
+      return "eor%.w %2,%0";
+    }
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (logval = exact_log2 (INTVAL (operands[2]) & 0xffffffff)) >= 0
+      && (DATA_REG_P (operands[0])
+	  || offsettable_memref_p (operands[0])))
+    {
+      if (DATA_REG_P (operands[0]))
+	operands[1] = GEN_INT (logval);
+      else
+        {
+	  operands[0] = adjust_address (operands[0], SImode, 3 - (logval / 8));
+	  operands[1] = GEN_INT (logval % 8);
+	}
+      CC_STATUS_INIT;
+      return "bchg %1,%0";
+    }
+  return "eor%.l %2,%0";
+}
+
+/* Return the instruction that should be used for a call to address X,
+   which is known to be in operand 0.  */
+
+const char *
+output_call (rtx x)
+{
+  if (symbolic_operand (x, VOIDmode))
+    return m68k_symbolic_call;
+  else
+    return "jsr %a0";
+}
+
+/* Likewise sibling calls.  */
+
+const char *
+output_sibcall (rtx x)
+{
+  if (symbolic_operand (x, VOIDmode))
+    return m68k_symbolic_jump;
+  else
+    return "jmp %a0";
+}
+
+static void
+m68k_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx this_slot, offset, addr, mem, insn, tmp;
+
+  /* Avoid clobbering the struct value reg by using the
+     static chain reg as a temporary.  */
+  tmp = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* The "this" pointer is stored at 4(%sp).  */
+  this_slot = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, 4));
+
+  /* Add DELTA to THIS.  */
+  if (delta != 0)
+    {
+      /* Make the offset a legitimate operand for memory addition.  */
+      offset = GEN_INT (delta);
+      if ((delta < -8 || delta > 8)
+	  && (TARGET_COLDFIRE || USE_MOVQ (delta)))
+	{
+	  emit_move_insn (gen_rtx_REG (Pmode, D0_REG), offset);
+	  offset = gen_rtx_REG (Pmode, D0_REG);
+	}
+      emit_insn (gen_add3_insn (copy_rtx (this_slot),
+				copy_rtx (this_slot), offset));
+    }
+
+  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
+  if (vcall_offset != 0)
+    {
+      /* Set the static chain register to *THIS.  */
+      emit_move_insn (tmp, this_slot);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+
+      /* Set ADDR to a legitimate address for *THIS + VCALL_OFFSET.  */
+      addr = plus_constant (tmp, vcall_offset);
+      if (!m68k_legitimate_address_p (Pmode, addr, true))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, tmp, addr));
+	  addr = tmp;
+	}
+
+      /* Load the offset into %d0 and add it to THIS.  */
+      emit_move_insn (gen_rtx_REG (Pmode, D0_REG),
+		      gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (copy_rtx (this_slot),
+				copy_rtx (this_slot),
+				gen_rtx_REG (Pmode, D0_REG)));
+    }
+
+  /* Jump to the target function.  Use a sibcall if direct jumps are
+     allowed, otherwise load the address into a register first.  */
+  mem = DECL_RTL (function);
+  if (!sibcall_operand (XEXP (mem, 0), VOIDmode))
+    {
+      gcc_assert (flag_pic);
+
+      if (!TARGET_SEP_DATA)
+	{
+	  /* Use the static chain register as a temporary (call-clobbered)
+	     GOT pointer for this function.  We can use the static chain
+	     register because it isn't live on entry to the thunk.  */
+	  SET_REGNO (pic_offset_table_rtx, STATIC_CHAIN_REGNUM);
+	  emit_insn (gen_load_got (pic_offset_table_rtx));
+	}
+      legitimize_pic_address (XEXP (mem, 0), Pmode, tmp);
+      mem = replace_equiv_address (mem, tmp);
+    }
+  insn = emit_call_insn (gen_sibcall (mem, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation.  */
+  insn = get_insns ();
+  split_all_insns_noflow ();
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Clean up the vars set above.  */
+  reload_completed = 0;
+
+  /* Restore the original PIC register.  */
+  if (flag_pic)
+    SET_REGNO (pic_offset_table_rtx, PIC_REG);
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+m68k_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, M68K_STRUCT_VALUE_REGNUM);
+}
+
+/* Return nonzero if register old_reg can be renamed to register new_reg.  */
+int
+m68k_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			   unsigned int new_reg)
+{
+
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if ((m68k_get_function_kind (current_function_decl)
+       == m68k_fk_interrupt_handler)
+      && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Value is true if hard register REGNO can hold a value of machine-mode
+   MODE.  On the 68000, we let the cpu registers can hold any mode, but
+   restrict the 68881 registers to floating-point modes.  */
+
+bool
+m68k_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  if (DATA_REGNO_P (regno))
+    {
+      /* Data Registers, can hold aggregate if fits in.  */
+      if (regno + GET_MODE_SIZE (mode) / 4 <= 8)
+	return true;
+    }
+  else if (ADDRESS_REGNO_P (regno))
+    {
+      if (regno + GET_MODE_SIZE (mode) / 4 <= 16)
+	return true;
+    }
+  else if (FP_REGNO_P (regno))
+    {
+      /* FPU registers, hold float or complex float of long double or
+	 smaller.  */
+      if ((GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	  && GET_MODE_UNIT_SIZE (mode) <= TARGET_FP_REG_SIZE)
+	return true;
+    }
+  return false;
+}
+
+/* Implement SECONDARY_RELOAD_CLASS.  */
+
+enum reg_class
+m68k_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode, rtx x)
+{
+  int regno;
+
+  regno = true_regnum (x);
+
+  /* If one operand of a movqi is an address register, the other
+     operand must be a general register or constant.  Other types
+     of operand must be reloaded through a data register.  */
+  if (GET_MODE_SIZE (mode) == 1
+      && reg_classes_intersect_p (rclass, ADDR_REGS)
+      && !(INT_REGNO_P (regno) || CONSTANT_P (x)))
+    return DATA_REGS;
+
+  /* PC-relative addresses must be loaded into an address register first.  */
+  if (TARGET_PCREL
+      && !reg_class_subset_p (rclass, ADDR_REGS)
+      && symbolic_operand (x, VOIDmode))
+    return ADDR_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement PREFERRED_RELOAD_CLASS.  */
+
+enum reg_class
+m68k_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  enum reg_class secondary_class;
+
+  /* If RCLASS might need a secondary reload, try restricting it to
+     a class that doesn't.  */
+  secondary_class = m68k_secondary_reload_class (rclass, GET_MODE (x), x);
+  if (secondary_class != NO_REGS
+      && reg_class_subset_p (secondary_class, rclass))
+    return secondary_class;
+
+  /* Prefer to use moveq for in-range constants.  */
+  if (GET_CODE (x) == CONST_INT
+      && reg_class_subset_p (DATA_REGS, rclass)
+      && IN_RANGE (INTVAL (x), -0x80, 0x7f))
+    return DATA_REGS;
+
+  /* ??? Do we really need this now?  */
+  if (GET_CODE (x) == CONST_DOUBLE
+      && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      if (TARGET_HARD_FLOAT && reg_class_subset_p (FP_REGS, rclass))
+	return FP_REGS;
+
+      return NO_REGS;
+    }
+
+  return rclass;
+}
+
+/* Return floating point values in a 68881 register.  This makes 68881 code
+   a little bit faster.  It also makes -msoft-float code incompatible with
+   hard-float code, so people have to be careful not to mix the two.
+   For ColdFire it was decided the ABI incompatibility is undesirable.
+   If there is need for a hard-float ABI it is probably worth doing it
+   properly and also passing function arguments in FP registers.  */
+rtx
+m68k_libcall_value (enum machine_mode mode)
+{
+  switch (mode) {
+  case SFmode:
+  case DFmode:
+  case XFmode:
+    if (TARGET_68881)
+      return gen_rtx_REG (mode, FP0_REG);
+    break;
+  default:
+    break;
+  }
+
+  return gen_rtx_REG (mode, m68k_libcall_value_in_a0_p ? A0_REG : D0_REG);
+}
+
+/* Location in which function value is returned.
+   NOTE: Due to differences in ABIs, don't call this function directly,
+   use FUNCTION_VALUE instead.  */
+rtx
+m68k_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+
+  mode = TYPE_MODE (valtype);
+  switch (mode) {
+  case SFmode:
+  case DFmode:
+  case XFmode:
+    if (TARGET_68881)
+      return gen_rtx_REG (mode, FP0_REG);
+    break;
+  default:
+    break;
+  }
+
+  /* If the function returns a pointer, push that into %a0.  */
+  if (func && POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (func))))
+    /* For compatibility with the large body of existing code which
+       does not always properly declare external functions returning
+       pointer types, the m68k/SVR4 convention is to copy the value
+       returned for pointer functions from a0 to d0 in the function
+       epilogue, so that callers that have neglected to properly
+       declare the callee can still find the correct return value in
+       d0.  */
+    return gen_rtx_PARALLEL
+      (mode,
+       gen_rtvec (2,
+		  gen_rtx_EXPR_LIST (VOIDmode,
+				     gen_rtx_REG (mode, A0_REG),
+				     const0_rtx),
+		  gen_rtx_EXPR_LIST (VOIDmode,
+				     gen_rtx_REG (mode, D0_REG),
+				     const0_rtx)));
+  else if (POINTER_TYPE_P (valtype))
+    return gen_rtx_REG (mode, A0_REG);
+  else
+    return gen_rtx_REG (mode, D0_REG);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+#if M68K_HONOR_TARGET_STRICT_ALIGNMENT
+static bool
+m68k_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+
+  if (mode == BLKmode)
+    return true;
+
+  /* If TYPE's known alignment is less than the alignment of MODE that
+     would contain the structure, then return in memory.  We need to
+     do so to maintain the compatibility between code compiled with
+     -mstrict-align and that compiled with -mno-strict-align.  */
+  if (AGGREGATE_TYPE_P (type)
+      && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (mode))
+    return true;
+
+  return false;
+}
+#endif
+
+/* CPU to schedule the program for.  */
+enum attr_cpu m68k_sched_cpu;
+
+/* MAC to schedule the program for.  */
+enum attr_mac m68k_sched_mac;
+
+/* Operand type.  */
+enum attr_op_type
+  {
+    /* No operand.  */
+    OP_TYPE_NONE,
+
+    /* Integer register.  */
+    OP_TYPE_RN,
+
+    /* FP register.  */
+    OP_TYPE_FPN,
+
+    /* Implicit mem reference (e.g. stack).  */
+    OP_TYPE_MEM1,
+
+    /* Memory without offset or indexing.  EA modes 2, 3 and 4.  */
+    OP_TYPE_MEM234,
+
+    /* Memory with offset but without indexing.  EA mode 5.  */
+    OP_TYPE_MEM5,
+
+    /* Memory with indexing.  EA mode 6.  */
+    OP_TYPE_MEM6,
+
+    /* Memory referenced by absolute address.  EA mode 7.  */
+    OP_TYPE_MEM7,
+
+    /* Immediate operand that doesn't require extension word.  */
+    OP_TYPE_IMM_Q,
+
+    /* Immediate 16 bit operand.  */
+    OP_TYPE_IMM_W,
+
+    /* Immediate 32 bit operand.  */
+    OP_TYPE_IMM_L
+  };
+
+/* Return type of memory ADDR_RTX refers to.  */
+static enum attr_op_type
+sched_address_type (enum machine_mode mode, rtx addr_rtx)
+{
+  struct m68k_address address;
+
+  if (symbolic_operand (addr_rtx, VOIDmode))
+    return OP_TYPE_MEM7;
+
+  if (!m68k_decompose_address (mode, addr_rtx,
+			       reload_completed, &address))
+    {
+      gcc_assert (!reload_completed);
+      /* Reload will likely fix the address to be in the register.  */
+      return OP_TYPE_MEM234;
+    }
+
+  if (address.scale != 0)
+    return OP_TYPE_MEM6;
+
+  if (address.base != NULL_RTX)
+    {
+      if (address.offset == NULL_RTX)
+	return OP_TYPE_MEM234;
+
+      return OP_TYPE_MEM5;
+    }
+
+  gcc_assert (address.offset != NULL_RTX);
+
+  return OP_TYPE_MEM7;
+}
+
+/* Return X or Y (depending on OPX_P) operand of INSN.  */
+static rtx
+sched_get_operand (rtx insn, bool opx_p)
+{
+  int i;
+
+  if (recog_memoized (insn) < 0)
+    gcc_unreachable ();
+
+  extract_constrain_insn_cached (insn);
+
+  if (opx_p)
+    i = get_attr_opx (insn);
+  else
+    i = get_attr_opy (insn);
+
+  if (i >= recog_data.n_operands)
+    return NULL;
+
+  return recog_data.operand[i];
+}
+
+/* Return type of INSN's operand X (if OPX_P) or operand Y (if !OPX_P).
+   If ADDRESS_P is true, return type of memory location operand refers to.  */
+static enum attr_op_type
+sched_attr_op_type (rtx insn, bool opx_p, bool address_p)
+{
+  rtx op;
+
+  op = sched_get_operand (insn, opx_p);
+
+  if (op == NULL)
+    {
+      gcc_assert (!reload_completed);
+      return OP_TYPE_RN;
+    }
+
+  if (address_p)
+    return sched_address_type (QImode, op);
+
+  if (memory_operand (op, VOIDmode))
+    return sched_address_type (GET_MODE (op), XEXP (op, 0));
+
+  if (register_operand (op, VOIDmode))
+    {
+      if ((!reload_completed && FLOAT_MODE_P (GET_MODE (op)))
+	  || (reload_completed && FP_REG_P (op)))
+	return OP_TYPE_FPN;
+
+      return OP_TYPE_RN;
+    }
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      int ival;
+
+      ival = INTVAL (op);
+
+      /* Check for quick constants.  */
+      switch (get_attr_type (insn))
+	{
+	case TYPE_ALUQ_L:
+	  if (IN_RANGE (ival, 1, 8) || IN_RANGE (ival, -8, -1))
+	    return OP_TYPE_IMM_Q;
+
+	  gcc_assert (!reload_completed);
+	  break;
+
+	case TYPE_MOVEQ_L:
+	  if (USE_MOVQ (ival))
+	    return OP_TYPE_IMM_Q;
+
+	  gcc_assert (!reload_completed);
+	  break;
+
+	case TYPE_MOV3Q_L:
+	  if (valid_mov3q_const (ival))
+	    return OP_TYPE_IMM_Q;
+
+	  gcc_assert (!reload_completed);
+	  break;
+
+	default:
+	  break;
+	}
+
+      if (IN_RANGE (ival, -0x8000, 0x7fff))
+	return OP_TYPE_IMM_W;
+
+      return OP_TYPE_IMM_L;
+    }
+
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      switch (GET_MODE (op))
+	{
+	case SFmode:
+	  return OP_TYPE_IMM_W;
+
+	case VOIDmode:
+	case DFmode:
+	  return OP_TYPE_IMM_L;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (GET_CODE (op) == CONST
+      || symbolic_operand (op, VOIDmode)
+      || LABEL_P (op))
+    {
+      switch (GET_MODE (op))
+	{
+	case QImode:
+	  return OP_TYPE_IMM_Q;
+
+	case HImode:
+	  return OP_TYPE_IMM_W;
+
+	case SImode:
+	  return OP_TYPE_IMM_L;
+
+	default:
+	  if (symbolic_operand (m68k_unwrap_symbol (op, false), VOIDmode))
+	    /* Just a guess.  */
+	    return OP_TYPE_IMM_W;
+
+	  return OP_TYPE_IMM_L;
+	}
+    }
+
+  gcc_assert (!reload_completed);
+
+  if (FLOAT_MODE_P (GET_MODE (op)))
+    return OP_TYPE_FPN;
+
+  return OP_TYPE_RN;
+}
+
+/* Implement opx_type attribute.
+   Return type of INSN's operand X.
+   If ADDRESS_P is true, return type of memory location operand refers to.  */
+enum attr_opx_type
+m68k_sched_attr_opx_type (rtx insn, int address_p)
+{
+  switch (sched_attr_op_type (insn, true, address_p != 0))
+    {
+    case OP_TYPE_RN:
+      return OPX_TYPE_RN;
+
+    case OP_TYPE_FPN:
+      return OPX_TYPE_FPN;
+
+    case OP_TYPE_MEM1:
+      return OPX_TYPE_MEM1;
+
+    case OP_TYPE_MEM234:
+      return OPX_TYPE_MEM234;
+
+    case OP_TYPE_MEM5:
+      return OPX_TYPE_MEM5;
+
+    case OP_TYPE_MEM6:
+      return OPX_TYPE_MEM6;
+
+    case OP_TYPE_MEM7:
+      return OPX_TYPE_MEM7;
+
+    case OP_TYPE_IMM_Q:
+      return OPX_TYPE_IMM_Q;
+
+    case OP_TYPE_IMM_W:
+      return OPX_TYPE_IMM_W;
+
+    case OP_TYPE_IMM_L:
+      return OPX_TYPE_IMM_L;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement opy_type attribute.
+   Return type of INSN's operand Y.
+   If ADDRESS_P is true, return type of memory location operand refers to.  */
+enum attr_opy_type
+m68k_sched_attr_opy_type (rtx insn, int address_p)
+{
+  switch (sched_attr_op_type (insn, false, address_p != 0))
+    {
+    case OP_TYPE_RN:
+      return OPY_TYPE_RN;
+
+    case OP_TYPE_FPN:
+      return OPY_TYPE_FPN;
+
+    case OP_TYPE_MEM1:
+      return OPY_TYPE_MEM1;
+
+    case OP_TYPE_MEM234:
+      return OPY_TYPE_MEM234;
+
+    case OP_TYPE_MEM5:
+      return OPY_TYPE_MEM5;
+
+    case OP_TYPE_MEM6:
+      return OPY_TYPE_MEM6;
+
+    case OP_TYPE_MEM7:
+      return OPY_TYPE_MEM7;
+
+    case OP_TYPE_IMM_Q:
+      return OPY_TYPE_IMM_Q;
+
+    case OP_TYPE_IMM_W:
+      return OPY_TYPE_IMM_W;
+
+    case OP_TYPE_IMM_L:
+      return OPY_TYPE_IMM_L;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return size of INSN as int.  */
+static int
+sched_get_attr_size_int (rtx insn)
+{
+  int size;
+
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IGNORE:
+      /* There should be no references to m68k_sched_attr_size for 'ignore'
+	 instructions.  */
+      gcc_unreachable ();
+      return 0;
+
+    case TYPE_MUL_L:
+      size = 2;
+      break;
+
+    default:
+      size = 1;
+      break;
+    }
+
+  switch (get_attr_opx_type (insn))
+    {
+    case OPX_TYPE_NONE:
+    case OPX_TYPE_RN:
+    case OPX_TYPE_FPN:
+    case OPX_TYPE_MEM1:
+    case OPX_TYPE_MEM234:
+    case OPY_TYPE_IMM_Q:
+      break;
+
+    case OPX_TYPE_MEM5:
+    case OPX_TYPE_MEM6:
+      /* Here we assume that most absolute references are short.  */
+    case OPX_TYPE_MEM7:
+    case OPY_TYPE_IMM_W:
+      ++size;
+      break;
+
+    case OPY_TYPE_IMM_L:
+      size += 2;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (get_attr_opy_type (insn))
+    {
+    case OPY_TYPE_NONE:
+    case OPY_TYPE_RN:
+    case OPY_TYPE_FPN:
+    case OPY_TYPE_MEM1:
+    case OPY_TYPE_MEM234:
+    case OPY_TYPE_IMM_Q:
+      break;
+
+    case OPY_TYPE_MEM5:
+    case OPY_TYPE_MEM6:
+      /* Here we assume that most absolute references are short.  */
+    case OPY_TYPE_MEM7:
+    case OPY_TYPE_IMM_W:
+      ++size;
+      break;
+
+    case OPY_TYPE_IMM_L:
+      size += 2;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (size > 3)
+    {
+      gcc_assert (!reload_completed);
+
+      size = 3;
+    }
+
+  return size;
+}
+
+/* Return size of INSN as attribute enum value.  */
+enum attr_size
+m68k_sched_attr_size (rtx insn)
+{
+  switch (sched_get_attr_size_int (insn))
+    {
+    case 1:
+      return SIZE_1;
+
+    case 2:
+      return SIZE_2;
+
+    case 3:
+      return SIZE_3;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return operand X or Y (depending on OPX_P) of INSN,
+   if it is a MEM, or NULL overwise.  */
+static enum attr_op_type
+sched_get_opxy_mem_type (rtx insn, bool opx_p)
+{
+  if (opx_p)
+    {
+      switch (get_attr_opx_type (insn))
+	{
+	case OPX_TYPE_NONE:
+	case OPX_TYPE_RN:
+	case OPX_TYPE_FPN:
+	case OPX_TYPE_IMM_Q:
+	case OPX_TYPE_IMM_W:
+	case OPX_TYPE_IMM_L:
+	  return OP_TYPE_RN;
+
+	case OPX_TYPE_MEM1:
+	case OPX_TYPE_MEM234:
+	case OPX_TYPE_MEM5:
+	case OPX_TYPE_MEM7:
+	  return OP_TYPE_MEM1;
+
+	case OPX_TYPE_MEM6:
+	  return OP_TYPE_MEM6;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (get_attr_opy_type (insn))
+	{
+	case OPY_TYPE_NONE:
+	case OPY_TYPE_RN:
+	case OPY_TYPE_FPN:
+	case OPY_TYPE_IMM_Q:
+	case OPY_TYPE_IMM_W:
+	case OPY_TYPE_IMM_L:
+	  return OP_TYPE_RN;
+
+	case OPY_TYPE_MEM1:
+	case OPY_TYPE_MEM234:
+	case OPY_TYPE_MEM5:
+	case OPY_TYPE_MEM7:
+	  return OP_TYPE_MEM1;
+
+	case OPY_TYPE_MEM6:
+	  return OP_TYPE_MEM6;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}
+
+/* Implement op_mem attribute.  */
+enum attr_op_mem
+m68k_sched_attr_op_mem (rtx insn)
+{
+  enum attr_op_type opx;
+  enum attr_op_type opy;
+
+  opx = sched_get_opxy_mem_type (insn, true);
+  opy = sched_get_opxy_mem_type (insn, false);
+
+  if (opy == OP_TYPE_RN && opx == OP_TYPE_RN)
+    return OP_MEM_00;
+
+  if (opy == OP_TYPE_RN && opx == OP_TYPE_MEM1)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_R:
+	  return OP_MEM_10;
+
+	case OPX_ACCESS_W:
+	  return OP_MEM_01;
+
+	case OPX_ACCESS_RW:
+	  return OP_MEM_11;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (opy == OP_TYPE_RN && opx == OP_TYPE_MEM6)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_R:
+	  return OP_MEM_I0;
+
+	case OPX_ACCESS_W:
+	  return OP_MEM_0I;
+
+	case OPX_ACCESS_RW:
+	  return OP_MEM_I1;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  if (opy == OP_TYPE_MEM1 && opx == OP_TYPE_RN)
+    return OP_MEM_10;
+
+  if (opy == OP_TYPE_MEM1 && opx == OP_TYPE_MEM1)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_W:
+	  return OP_MEM_11;
+
+	default:
+	  gcc_assert (!reload_completed);
+	  return OP_MEM_11;
+	}
+    }
+
+  if (opy == OP_TYPE_MEM1 && opx == OP_TYPE_MEM6)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_W:
+	  return OP_MEM_1I;
+
+	default:
+	  gcc_assert (!reload_completed);
+	  return OP_MEM_1I;
+	}
+    }
+
+  if (opy == OP_TYPE_MEM6 && opx == OP_TYPE_RN)
+    return OP_MEM_I0;
+
+  if (opy == OP_TYPE_MEM6 && opx == OP_TYPE_MEM1)
+    {
+      switch (get_attr_opx_access (insn))
+	{
+	case OPX_ACCESS_W:
+	  return OP_MEM_I1;
+
+	default:
+	  gcc_assert (!reload_completed);
+	  return OP_MEM_I1;
+	}
+    }
+
+  gcc_assert (opy == OP_TYPE_MEM6 && opx == OP_TYPE_MEM6);
+  gcc_assert (!reload_completed);
+  return OP_MEM_I1;
+}
+
+/* Jump instructions types.  Indexed by INSN_UID.
+   The same rtl insn can be expanded into different asm instructions
+   depending on the cc0_status.  To properly determine type of jump
+   instructions we scan instruction stream and map jumps types to this
+   array.  */
+static enum attr_type *sched_branch_type;
+
+/* Return the type of the jump insn.  */
+enum attr_type
+m68k_sched_branch_type (rtx insn)
+{
+  enum attr_type type;
+
+  type = sched_branch_type[INSN_UID (insn)];
+
+  gcc_assert (type != 0);
+
+  return type;
+}
+
+/* Data for ColdFire V4 index bypass.
+   Producer modifies register that is used as index in consumer with
+   specified scale.  */
+static struct
+{
+  /* Producer instruction.  */
+  rtx pro;
+
+  /* Consumer instruction.  */
+  rtx con;
+
+  /* Scale of indexed memory access within consumer.
+     Or zero if bypass should not be effective at the moment.  */
+  int scale;
+} sched_cfv4_bypass_data;
+
+/* An empty state that is used in m68k_sched_adjust_cost.  */
+static state_t sched_adjust_cost_state;
+
+/* Implement adjust_cost scheduler hook.
+   Return adjusted COST of dependency LINK between DEF_INSN and INSN.  */
+static int
+m68k_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx def_insn,
+			int cost)
+{
+  int delay;
+
+  if (recog_memoized (def_insn) < 0
+      || recog_memoized (insn) < 0)
+    return cost;
+
+  if (sched_cfv4_bypass_data.scale == 1)
+    /* Handle ColdFire V4 bypass for indexed address with 1x scale.  */
+    {
+      /* haifa-sched.c: insn_cost () calls bypass_p () just before
+	 targetm.sched.adjust_cost ().  Hence, we can be relatively sure
+	 that the data in sched_cfv4_bypass_data is up to date.  */
+      gcc_assert (sched_cfv4_bypass_data.pro == def_insn
+		  && sched_cfv4_bypass_data.con == insn);
+
+      if (cost < 3)
+	cost = 3;
+
+      sched_cfv4_bypass_data.pro = NULL;
+      sched_cfv4_bypass_data.con = NULL;
+      sched_cfv4_bypass_data.scale = 0;
+    }
+  else
+    gcc_assert (sched_cfv4_bypass_data.pro == NULL
+		&& sched_cfv4_bypass_data.con == NULL
+		&& sched_cfv4_bypass_data.scale == 0);
+
+  /* Don't try to issue INSN earlier than DFA permits.
+     This is especially useful for instructions that write to memory,
+     as their true dependence (default) latency is better to be set to 0
+     to workaround alias analysis limitations.
+     This is, in fact, a machine independent tweak, so, probably,
+     it should be moved to haifa-sched.c: insn_cost ().  */
+  delay = min_insn_conflict_delay (sched_adjust_cost_state, def_insn, insn);
+  if (delay > cost)
+    cost = delay;
+
+  return cost;
+}
+
+/* Return maximal number of insns that can be scheduled on a single cycle.  */
+static int
+m68k_sched_issue_rate (void)
+{
+  switch (m68k_sched_cpu)
+    {
+    case CPU_CFV1:
+    case CPU_CFV2:
+    case CPU_CFV3:
+      return 1;
+
+    case CPU_CFV4:
+      return 2;
+
+    default:
+      gcc_unreachable ();
+      return 0;
+    }
+}
+
+/* Maximal length of instruction for current CPU.
+   E.g. it is 3 for any ColdFire core.  */
+static int max_insn_size;
+
+/* Data to model instruction buffer of CPU.  */
+struct _sched_ib
+{
+  /* True if instruction buffer model is modeled for current CPU.  */
+  bool enabled_p;
+
+  /* Size of the instruction buffer in words.  */
+  int size;
+
+  /* Number of filled words in the instruction buffer.  */
+  int filled;
+
+  /* Additional information about instruction buffer for CPUs that have
+     a buffer of instruction records, rather then a plain buffer
+     of instruction words.  */
+  struct _sched_ib_records
+  {
+    /* Size of buffer in records.  */
+    int n_insns;
+
+    /* Array to hold data on adjustements made to the size of the buffer.  */
+    int *adjust;
+
+    /* Index of the above array.  */
+    int adjust_index;
+  } records;
+
+  /* An insn that reserves (marks empty) one word in the instruction buffer.  */
+  rtx insn;
+};
+
+static struct _sched_ib sched_ib;
+
+/* ID of memory unit.  */
+static int sched_mem_unit_code;
+
+/* Implementation of the targetm.sched.variable_issue () hook.
+   It is called after INSN was issued.  It returns the number of insns
+   that can possibly get scheduled on the current cycle.
+   It is used here to determine the effect of INSN on the instruction
+   buffer.  */
+static int
+m68k_sched_variable_issue (FILE *sched_dump ATTRIBUTE_UNUSED,
+			   int sched_verbose ATTRIBUTE_UNUSED,
+			   rtx insn, int can_issue_more)
+{
+  int insn_size;
+
+  if (recog_memoized (insn) >= 0 && get_attr_type (insn) != TYPE_IGNORE)
+    {
+      switch (m68k_sched_cpu)
+	{
+	case CPU_CFV1:
+	case CPU_CFV2:
+	  insn_size = sched_get_attr_size_int (insn);
+	  break;
+
+	case CPU_CFV3:
+	  insn_size = sched_get_attr_size_int (insn);
+	  
+	  /* ColdFire V3 and V4 cores have instruction buffers that can
+	     accumulate up to 8 instructions regardless of instructions'
+	     sizes.  So we should take care not to "prefetch" 24 one-word
+	     or 12 two-words instructions.
+	     To model this behavior we temporarily decrease size of the
+	     buffer by (max_insn_size - insn_size) for next 7 instructions.  */
+	  {
+	    int adjust;
+
+	    adjust = max_insn_size - insn_size;
+	    sched_ib.size -= adjust;
+
+	    if (sched_ib.filled > sched_ib.size)
+	      sched_ib.filled = sched_ib.size;
+
+	    sched_ib.records.adjust[sched_ib.records.adjust_index] = adjust;
+	  }
+
+	  ++sched_ib.records.adjust_index;
+	  if (sched_ib.records.adjust_index == sched_ib.records.n_insns)
+	    sched_ib.records.adjust_index = 0;
+
+	  /* Undo adjustement we did 7 instructions ago.  */
+	  sched_ib.size
+	    += sched_ib.records.adjust[sched_ib.records.adjust_index];
+
+	  break;
+
+	case CPU_CFV4:
+	  gcc_assert (!sched_ib.enabled_p);
+	  insn_size = 0;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      gcc_assert (insn_size <= sched_ib.filled);
+      --can_issue_more;
+    }
+  else if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+	   || asm_noperands (PATTERN (insn)) >= 0)
+    insn_size = sched_ib.filled;
+  else
+    insn_size = 0;
+
+  sched_ib.filled -= insn_size;
+
+  return can_issue_more;
+}
+
+/* Return how many instructions should scheduler lookahead to choose the
+   best one.  */
+static int
+m68k_sched_first_cycle_multipass_dfa_lookahead (void)
+{
+  return m68k_sched_issue_rate () - 1;
+}
+
+/* Implementation of targetm.sched.init_global () hook.
+   It is invoked once per scheduling pass and is used here
+   to initialize scheduler constants.  */
+static void
+m68k_sched_md_init_global (FILE *sched_dump ATTRIBUTE_UNUSED,
+			   int sched_verbose ATTRIBUTE_UNUSED,
+			   int n_insns ATTRIBUTE_UNUSED)
+{
+  /* Init branch types.  */
+  {
+    rtx insn;
+
+    sched_branch_type = XCNEWVEC (enum attr_type, get_max_uid () + 1);
+
+    for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+      {
+	if (JUMP_P (insn))
+	  /* !!! FIXME: Implement real scan here.  */
+	  sched_branch_type[INSN_UID (insn)] = TYPE_BCC;
+      }
+  }
+
+#ifdef ENABLE_CHECKING
+  /* Check that all instructions have DFA reservations and
+     that all instructions can be issued from a clean state.  */
+  {
+    rtx insn;
+    state_t state;
+
+    state = alloca (state_size ());
+
+    for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+      {
+ 	if (INSN_P (insn) && recog_memoized (insn) >= 0)
+	  {
+ 	    gcc_assert (insn_has_dfa_reservation_p (insn));
+
+ 	    state_reset (state);
+ 	    if (state_transition (state, insn) >= 0)
+ 	      gcc_unreachable ();
+ 	  }
+      }
+  }
+#endif
+
+  /* Setup target cpu.  */
+
+  /* ColdFire V4 has a set of features to keep its instruction buffer full
+     (e.g., a separate memory bus for instructions) and, hence, we do not model
+     buffer for this CPU.  */
+  sched_ib.enabled_p = (m68k_sched_cpu != CPU_CFV4);
+
+  switch (m68k_sched_cpu)
+    {
+    case CPU_CFV4:
+      sched_ib.filled = 0;
+
+      /* FALLTHRU */
+
+    case CPU_CFV1:
+    case CPU_CFV2:
+      max_insn_size = 3;
+      sched_ib.records.n_insns = 0;
+      sched_ib.records.adjust = NULL;
+      break;
+
+    case CPU_CFV3:
+      max_insn_size = 3;
+      sched_ib.records.n_insns = 8;
+      sched_ib.records.adjust = XNEWVEC (int, sched_ib.records.n_insns);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  sched_mem_unit_code = get_cpu_unit_code ("cf_mem1");
+
+  sched_adjust_cost_state = xmalloc (state_size ());
+  state_reset (sched_adjust_cost_state);
+
+  start_sequence ();
+  emit_insn (gen_ib ());
+  sched_ib.insn = get_insns ();
+  end_sequence ();
+}
+
+/* Scheduling pass is now finished.  Free/reset static variables.  */
+static void
+m68k_sched_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+			     int verbose ATTRIBUTE_UNUSED)
+{
+  sched_ib.insn = NULL;
+
+  free (sched_adjust_cost_state);
+  sched_adjust_cost_state = NULL;
+
+  sched_mem_unit_code = 0;
+
+  free (sched_ib.records.adjust);
+  sched_ib.records.adjust = NULL;
+  sched_ib.records.n_insns = 0;
+  max_insn_size = 0;
+
+  free (sched_branch_type);
+  sched_branch_type = NULL;
+}
+
+/* Implementation of targetm.sched.init () hook.
+   It is invoked each time scheduler starts on the new block (basic block or
+   extended basic block).  */
+static void
+m68k_sched_md_init (FILE *sched_dump ATTRIBUTE_UNUSED,
+		    int sched_verbose ATTRIBUTE_UNUSED,
+		    int n_insns ATTRIBUTE_UNUSED)
+{
+  switch (m68k_sched_cpu)
+    {
+    case CPU_CFV1:
+    case CPU_CFV2:
+      sched_ib.size = 6;
+      break;
+
+    case CPU_CFV3:
+      sched_ib.size = sched_ib.records.n_insns * max_insn_size;
+
+      memset (sched_ib.records.adjust, 0,
+	      sched_ib.records.n_insns * sizeof (*sched_ib.records.adjust));
+      sched_ib.records.adjust_index = 0;
+      break;
+
+    case CPU_CFV4:
+      gcc_assert (!sched_ib.enabled_p);
+      sched_ib.size = 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (sched_ib.enabled_p)
+    /* haifa-sched.c: schedule_block () calls advance_cycle () just before
+       the first cycle.  Workaround that.  */
+    sched_ib.filled = -2;
+}
+
+/* Implementation of targetm.sched.dfa_pre_advance_cycle () hook.
+   It is invoked just before current cycle finishes and is used here
+   to track if instruction buffer got its two words this cycle.  */
+static void
+m68k_sched_dfa_pre_advance_cycle (void)
+{
+  if (!sched_ib.enabled_p)
+    return;
+
+  if (!cpu_unit_reservation_p (curr_state, sched_mem_unit_code))
+    {
+      sched_ib.filled += 2;
+
+      if (sched_ib.filled > sched_ib.size)
+	sched_ib.filled = sched_ib.size;
+    }
+}
+
+/* Implementation of targetm.sched.dfa_post_advance_cycle () hook.
+   It is invoked just after new cycle begins and is used here
+   to setup number of filled words in the instruction buffer so that
+   instructions which won't have all their words prefetched would be
+   stalled for a cycle.  */
+static void
+m68k_sched_dfa_post_advance_cycle (void)
+{
+  int i;
+
+  if (!sched_ib.enabled_p)
+    return;
+
+  /* Setup number of prefetched instruction words in the instruction
+     buffer.  */
+  i = max_insn_size - sched_ib.filled;
+
+  while (--i >= 0)
+    {
+      if (state_transition (curr_state, sched_ib.insn) >= 0)
+	gcc_unreachable ();
+    }
+}
+
+/* Return X or Y (depending on OPX_P) operand of INSN,
+   if it is an integer register, or NULL overwise.  */
+static rtx
+sched_get_reg_operand (rtx insn, bool opx_p)
+{
+  rtx op = NULL;
+
+  if (opx_p)
+    {
+      if (get_attr_opx_type (insn) == OPX_TYPE_RN)
+	{
+	  op = sched_get_operand (insn, true);
+	  gcc_assert (op != NULL);
+
+	  if (!reload_completed && !REG_P (op))
+	    return NULL;
+	}
+    }
+  else
+    {
+      if (get_attr_opy_type (insn) == OPY_TYPE_RN)
+	{
+	  op = sched_get_operand (insn, false);
+	  gcc_assert (op != NULL);
+
+	  if (!reload_completed && !REG_P (op))
+	    return NULL;
+	}
+    }
+
+  return op;
+}
+
+/* Return true, if X or Y (depending on OPX_P) operand of INSN
+   is a MEM.  */
+static bool
+sched_mem_operand_p (rtx insn, bool opx_p)
+{
+  switch (sched_get_opxy_mem_type (insn, opx_p))
+    {
+    case OP_TYPE_MEM1:
+    case OP_TYPE_MEM6:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Return X or Y (depending on OPX_P) operand of INSN,
+   if it is a MEM, or NULL overwise.  */
+static rtx
+sched_get_mem_operand (rtx insn, bool must_read_p, bool must_write_p)
+{
+  bool opx_p;
+  bool opy_p;
+
+  opx_p = false;
+  opy_p = false;
+
+  if (must_read_p)
+    {
+      opx_p = true;
+      opy_p = true;
+    }
+
+  if (must_write_p)
+    {
+      opx_p = true;
+      opy_p = false;
+    }
+
+  if (opy_p && sched_mem_operand_p (insn, false))
+    return sched_get_operand (insn, false);
+
+  if (opx_p && sched_mem_operand_p (insn, true))
+    return sched_get_operand (insn, true);
+
+  gcc_unreachable ();
+  return NULL;
+}
+
+/* Return non-zero if PRO modifies register used as part of
+   address in CON.  */
+int
+m68k_sched_address_bypass_p (rtx pro, rtx con)
+{
+  rtx pro_x;
+  rtx con_mem_read;
+
+  pro_x = sched_get_reg_operand (pro, true);
+  if (pro_x == NULL)
+    return 0;
+
+  con_mem_read = sched_get_mem_operand (con, true, false);
+  gcc_assert (con_mem_read != NULL);
+
+  if (reg_mentioned_p (pro_x, con_mem_read))
+    return 1;
+
+  return 0;
+}
+
+/* Helper function for m68k_sched_indexed_address_bypass_p.
+   if PRO modifies register used as index in CON,
+   return scale of indexed memory access in CON.  Return zero overwise.  */
+static int
+sched_get_indexed_address_scale (rtx pro, rtx con)
+{
+  rtx reg;
+  rtx mem;
+  struct m68k_address address;
+
+  reg = sched_get_reg_operand (pro, true);
+  if (reg == NULL)
+    return 0;
+
+  mem = sched_get_mem_operand (con, true, false);
+  gcc_assert (mem != NULL && MEM_P (mem));
+
+  if (!m68k_decompose_address (GET_MODE (mem), XEXP (mem, 0), reload_completed,
+			       &address))
+    gcc_unreachable ();
+
+  if (REGNO (reg) == REGNO (address.index))
+    {
+      gcc_assert (address.scale != 0);
+      return address.scale;
+    }
+
+  return 0;
+}
+
+/* Return non-zero if PRO modifies register used
+   as index with scale 2 or 4 in CON.  */
+int
+m68k_sched_indexed_address_bypass_p (rtx pro, rtx con)
+{
+  gcc_assert (sched_cfv4_bypass_data.pro == NULL
+	      && sched_cfv4_bypass_data.con == NULL
+	      && sched_cfv4_bypass_data.scale == 0);
+
+  switch (sched_get_indexed_address_scale (pro, con))
+    {
+    case 1:
+      /* We can't have a variable latency bypass, so
+	 remember to adjust the insn cost in adjust_cost hook.  */
+      sched_cfv4_bypass_data.pro = pro;
+      sched_cfv4_bypass_data.con = con;
+      sched_cfv4_bypass_data.scale = 1;
+      return 0;
+
+    case 2:
+    case 4:
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* We generate a two-instructions program at M_TRAMP :
+	movea.l &CHAIN_VALUE,%a0
+	jmp FNADDR
+   where %a0 can be modified by changing STATIC_CHAIN_REGNUM.  */
+
+static void
+m68k_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  gcc_assert (ADDRESS_REGNO_P (STATIC_CHAIN_REGNUM));
+
+  mem = adjust_address (m_tramp, HImode, 0);
+  emit_move_insn (mem, GEN_INT(0x207C + ((STATIC_CHAIN_REGNUM-8) << 9)));
+  mem = adjust_address (m_tramp, SImode, 2);
+  emit_move_insn (mem, chain_value);
+
+  mem = adjust_address (m_tramp, HImode, 6);
+  emit_move_insn (mem, GEN_INT(0x4EF9));
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, fnaddr);
+
+  FINALIZE_TRAMPOLINE (XEXP (m_tramp, 0));
+}
+
+/* On the 68000, the RTS insn cannot pop anything.
+   On the 68010, the RTD insn may be used to pop them if the number
+     of args is fixed, but if the number is variable then the caller
+     must pop them all.  RTD can't be used for library calls now
+     because the library is compiled with the Unix compiler.
+   Use of RTD is a selectable option, since it is incompatible with
+   standard Unix calling sequences.  If the option is not selected,
+   the caller must always pop the args.  */
+
+static int
+m68k_return_pops_args (tree fundecl, tree funtype, int size)
+{
+  return ((TARGET_RTD
+	   && (!fundecl
+	       || TREE_CODE (fundecl) != IDENTIFIER_NODE)
+	   && (!stdarg_p (funtype)))
+	  ? size : 0);
+}
+
+/* Make sure everything's fine if we *don't* have a given processor.
+   This assumes that putting a register in fixed_regs will keep the
+   compiler's mitts completely off it.  We don't bother to zero it out
+   of register classes.  */
+
+static void
+m68k_conditional_register_usage (void)
+{
+  int i;
+  HARD_REG_SET x;
+  if (!TARGET_HARD_FLOAT)
+    {
+      COPY_HARD_REG_SET (x, reg_class_contents[(int)FP_REGS]);
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+        if (TEST_HARD_REG_BIT (x, i))
+	  fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (flag_pic)
+    fixed_regs[PIC_REG] = call_used_regs[PIC_REG] = 1;
+}
+
+#include "gt-m68k.h"
diff --git a/gcc/config/m68k/m68k.h b/gcc/config/m68k/m68k.h
new file mode 100644
index 000000000..71b7c4f27
--- /dev/null
+++ b/gcc/config/m68k/m68k.h
@@ -0,0 +1,1034 @@
+/* Definitions of target machine for GCC for Motorola 680x0/ColdFire.
+   Copyright (C) 1987, 1988, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We need to have MOTOROLA always defined (either 0 or 1) because we use
+   if-statements and ?: on it.  This way we have compile-time error checking
+   for both the MOTOROLA and MIT code paths.  We do rely on the host compiler
+   to optimize away all constant tests.  */
+#if MOTOROLA  /* Use the Motorola assembly syntax.  */
+# define TARGET_VERSION fprintf (stderr, " (68k, Motorola syntax)")
+#else
+# define MOTOROLA 0  /* Use the MIT assembly syntax.  */
+# define TARGET_VERSION fprintf (stderr, " (68k, MIT syntax)")
+#endif
+
+/* Handle --with-cpu default option from configure script.  */
+#define OPTION_DEFAULT_SPECS						\
+  { "cpu",   "%{!mc68000:%{!m68000:%{!m68302:%{!m68010:%{!mc68020:%{!m68020:\
+%{!m68030:%{!m68040:%{!m68020-40:%{!m68020-60:%{!m68060:%{!mcpu32:\
+%{!m68332:%{!m5200:%{!m5206e:%{!m528x:%{!m5307:%{!m5407:%{!mcfv4e:\
+%{!mcpu=*:%{!march=*:-%(VALUE)}}}}}}}}}}}}}}}}}}}}}" },
+
+/* Pass flags to gas indicating which type of processor we have.  This
+   can be simplified when we can rely on the assembler supporting .cpu
+   and .arch directives.  */
+
+#define ASM_CPU_SPEC "\
+%{m68851}%{mno-68851} %{m68881}%{mno-68881} %{msoft-float:-mno-float} \
+%{m68000}%{m68302}%{mc68000}%{m68010}%{m68020}%{mc68020}%{m68030}\
+%{m68040}%{m68020-40:-m68040}%{m68020-60:-m68040}\
+%{m68060}%{mcpu32}%{m68332}%{m5200}%{m5206e}%{m528x}%{m5307}%{m5407}%{mcfv4e}\
+%{mcpu=*:-mcpu=%*}%{march=*:-march=%*}\
+"
+#define ASM_PCREL_SPEC "%{fPIC|fpic|mpcrel:--pcrel} \
+ %{msep-data|mid-shared-library:--pcrel} \
+"
+
+#define ASM_SPEC "%(asm_cpu_spec) %(asm_pcrel_spec)"
+
+#define EXTRA_SPECS					\
+  { "asm_cpu_spec", ASM_CPU_SPEC },			\
+  { "asm_pcrel_spec", ASM_PCREL_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+#define SUBTARGET_EXTRA_SPECS
+
+/* Note that some other tm.h files include this one and then override
+   many of the definitions that relate to assembler syntax.  */
+
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_define ("__m68k__");					\
+      builtin_define_std ("mc68000");					\
+      /* The other mc680x0 macros have traditionally been derived	\
+	 from the tuning setting.  For example, -m68020-60 defines	\
+	 m68060, even though it generates pure 68020 code.  */		\
+      switch (m68k_tune)						\
+	{								\
+	case u68010:							\
+	  builtin_define_std ("mc68010");				\
+	  break;							\
+									\
+	case u68020:							\
+	  builtin_define_std ("mc68020");				\
+	  break;							\
+									\
+	case u68030:							\
+	  builtin_define_std ("mc68030");				\
+	  break;							\
+									\
+	case u68040:							\
+	  builtin_define_std ("mc68040");				\
+	  break;							\
+									\
+	case u68060:							\
+	  builtin_define_std ("mc68060");				\
+	  break;							\
+									\
+	case u68020_60:							\
+	  builtin_define_std ("mc68060");				\
+	  /* Fall through.  */						\
+	case u68020_40:							\
+	  builtin_define_std ("mc68040");				\
+	  builtin_define_std ("mc68030");				\
+	  builtin_define_std ("mc68020");				\
+	  break;							\
+									\
+	case ucpu32:							\
+	  builtin_define_std ("mc68332");				\
+	  builtin_define_std ("mcpu32");				\
+	  builtin_define_std ("mc68020");				\
+	  break;							\
+									\
+	case ucfv1:							\
+	  builtin_define ("__mcfv1__");					\
+	  break;							\
+									\
+	case ucfv2:							\
+	  builtin_define ("__mcfv2__");					\
+	  break;							\
+									\
+    	case ucfv3:							\
+	  builtin_define ("__mcfv3__");					\
+	  break;							\
+									\
+	case ucfv4:							\
+	  builtin_define ("__mcfv4__");					\
+	  break;							\
+									\
+	case ucfv4e:							\
+	  builtin_define ("__mcfv4e__");				\
+	  break;							\
+									\
+	case ucfv5:							\
+	  builtin_define ("__mcfv5__");					\
+	  break;							\
+									\
+	default:							\
+	  break;							\
+	}								\
+									\
+      if (TARGET_68881)							\
+	builtin_define ("__HAVE_68881__");				\
+									\
+      if (TARGET_COLDFIRE)						\
+	{								\
+	  const char *tmp;						\
+	  								\
+	  tmp = m68k_cpp_cpu_ident ("cf");			   	\
+	  if (tmp)							\
+	    builtin_define (tmp);					\
+	  tmp = m68k_cpp_cpu_family ("cf");				\
+	  if (tmp)							\
+	    builtin_define (tmp);					\
+	  builtin_define ("__mcoldfire__");				\
+									\
+	  if (TARGET_ISAC)						\
+	    builtin_define ("__mcfisac__");				\
+	  else if (TARGET_ISAB)						\
+	    {								\
+	      builtin_define ("__mcfisab__");				\
+	      /* ISA_B: Legacy 5407 defines.  */			\
+	      builtin_define ("__mcf5400__");				\
+	      builtin_define ("__mcf5407__");				\
+	    }								\
+	  else if (TARGET_ISAAPLUS)					\
+	    {								\
+	      builtin_define ("__mcfisaaplus__");			\
+	      /* ISA_A+: legacy defines.  */				\
+	      builtin_define ("__mcf528x__");				\
+	      builtin_define ("__mcf5200__");				\
+	    }								\
+	  else 								\
+	    {								\
+	      builtin_define ("__mcfisaa__");				\
+	      /* ISA_A: legacy defines.  */				\
+	      switch (m68k_tune)					\
+		{							\
+		case ucfv2:						\
+		  builtin_define ("__mcf5200__");			\
+		  break;						\
+									\
+		case ucfv3:						\
+		  builtin_define ("__mcf5307__");			\
+		  builtin_define ("__mcf5300__");			\
+		  break;						\
+									\
+		default:						\
+		  break;						\
+		}							\
+    	    }								\
+	}								\
+									\
+      if (TARGET_COLDFIRE_FPU)						\
+	builtin_define ("__mcffpu__");					\
+									\
+      if (TARGET_CF_HWDIV)						\
+	builtin_define ("__mcfhwdiv__");				\
+									\
+      if (TARGET_FIDOA)							\
+	builtin_define ("__mfido__");					\
+									\
+      builtin_assert ("cpu=m68k");					\
+      builtin_assert ("machine=m68k");					\
+    }									\
+  while (0)
+
+/* Classify the groups of pseudo-ops used to assemble QI, HI and SI
+   quantities.  */
+#define INT_OP_STANDARD	0	/* .byte, .short, .long */
+#define INT_OP_DOT_WORD	1	/* .byte, .word, .long */
+#define INT_OP_NO_DOT   2	/* byte, short, long */
+#define INT_OP_DC	3	/* dc.b, dc.w, dc.l */
+
+/* Set the default.  */
+#define INT_OP_GROUP INT_OP_DOT_WORD
+
+/* Bit values used by m68k-devices.def to identify processor capabilities.  */
+#define FL_BITFIELD  (1 << 0)    /* Support bitfield instructions.  */
+#define FL_68881     (1 << 1)    /* (Default) support for 68881/2.  */
+#define FL_COLDFIRE  (1 << 2)    /* ColdFire processor.  */
+#define FL_CF_HWDIV  (1 << 3)    /* ColdFire hardware divide supported.  */
+#define FL_CF_MAC    (1 << 4)    /* ColdFire MAC unit supported.  */
+#define FL_CF_EMAC   (1 << 5)    /* ColdFire eMAC unit supported.  */
+#define FL_CF_EMAC_B (1 << 6)    /* ColdFire eMAC-B unit supported.  */
+#define FL_CF_USP    (1 << 7)    /* ColdFire User Stack Pointer supported.  */
+#define FL_CF_FPU    (1 << 8)    /* ColdFire FPU supported.  */
+#define FL_ISA_68000 (1 << 9)
+#define FL_ISA_68010 (1 << 10)
+#define FL_ISA_68020 (1 << 11)
+#define FL_ISA_68040 (1 << 12)
+#define FL_ISA_A     (1 << 13)
+#define FL_ISA_APLUS (1 << 14)
+#define FL_ISA_B     (1 << 15)
+#define FL_ISA_C     (1 << 16)
+#define FL_FIDOA     (1 << 17)
+#define FL_MMU 	     0   /* Used by multilib machinery.  */
+#define FL_UCLINUX   0   /* Used by multilib machinery.  */
+
+#define TARGET_68010		((m68k_cpu_flags & FL_ISA_68010) != 0)
+#define TARGET_68020		((m68k_cpu_flags & FL_ISA_68020) != 0)
+#define TARGET_68040		((m68k_cpu_flags & FL_ISA_68040) != 0)
+#define TARGET_COLDFIRE		((m68k_cpu_flags & FL_COLDFIRE) != 0)
+#define TARGET_COLDFIRE_FPU	(m68k_fpu == FPUTYPE_COLDFIRE)
+#define TARGET_68881		(m68k_fpu == FPUTYPE_68881)
+#define TARGET_FIDOA		((m68k_cpu_flags & FL_FIDOA) != 0)
+
+/* Size (in bytes) of FPU registers.  */
+#define TARGET_FP_REG_SIZE	(TARGET_COLDFIRE ? 8 : 12)
+
+#define TARGET_ISAAPLUS		((m68k_cpu_flags & FL_ISA_APLUS) != 0)
+#define TARGET_ISAB		((m68k_cpu_flags & FL_ISA_B) != 0)
+#define TARGET_ISAC		((m68k_cpu_flags & FL_ISA_C) != 0)
+
+/* Some instructions are common to more than one ISA.  */
+#define ISA_HAS_MVS_MVZ	(TARGET_ISAB || TARGET_ISAC)
+#define ISA_HAS_FF1	(TARGET_ISAAPLUS || TARGET_ISAC)
+
+#define TUNE_68000	(m68k_tune == u68000)
+#define TUNE_68010	(m68k_tune == u68010)
+#define TUNE_68000_10	(TUNE_68000 || TUNE_68010)
+#define TUNE_68030	(m68k_tune == u68030 \
+			 || m68k_tune == u68020_40 \
+			 || m68k_tune == u68020_60)
+#define TUNE_68040	(m68k_tune == u68040 \
+			 || m68k_tune == u68020_40 \
+			 || m68k_tune == u68020_60)
+#define TUNE_68060	(m68k_tune == u68060 || m68k_tune == u68020_60)
+#define TUNE_68040_60	(TUNE_68040 || TUNE_68060)
+#define TUNE_CPU32	(m68k_tune == ucpu32)
+#define TUNE_CFV1       (m68k_tune == ucfv1)
+#define TUNE_CFV2	(m68k_tune == ucfv2)
+#define TUNE_CFV3       (m68k_tune == ucfv3)
+#define TUNE_CFV4       (m68k_tune == ucfv4 || m68k_tune == ucfv4e)
+
+#define TUNE_MAC	((m68k_tune_flags & FL_CF_MAC) != 0)
+#define TUNE_EMAC	((m68k_tune_flags & FL_CF_EMAC) != 0)
+
+/* These are meant to be redefined in the host dependent files */
+#define SUBTARGET_OVERRIDE_OPTIONS
+
+/* target machine storage layout */
+
+/* "long double" is the same as "double" on ColdFire and fido
+   targets.  */
+
+#define LONG_DOUBLE_TYPE_SIZE			\
+  ((TARGET_COLDFIRE || TARGET_FIDOA) ? 64 : 80)
+
+/* We need to know the size of long double at compile-time in libgcc2.  */
+
+#if defined(__mcoldfire__) || defined(__mfido__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+#endif
+
+/* Set the value of FLT_EVAL_METHOD in float.h.  When using 68040 fp
+   instructions, we get proper intermediate rounding, otherwise we
+   get extended precision results.  */
+#define TARGET_FLT_EVAL_METHOD ((TARGET_68040 || ! TARGET_68881) ? 0 : 2)
+
+#define BITS_BIG_ENDIAN 1
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+#define UNITS_PER_WORD 4
+
+#define PARM_BOUNDARY (TARGET_SHORT ? 16 : 32)
+#define STACK_BOUNDARY 16
+#define FUNCTION_BOUNDARY 16
+#define EMPTY_FIELD_BOUNDARY 16
+/* ColdFire and fido strongly prefer a 32-bit aligned stack.  */
+#define PREFERRED_STACK_BOUNDARY \
+  ((TARGET_COLDFIRE || TARGET_FIDOA) ? 32 : 16)
+
+/* No data type wants to be aligned rounder than this.
+   Most published ABIs say that ints should be aligned on 16-bit
+   boundaries, but CPUs with 32-bit busses get better performance
+   aligned on 32-bit boundaries.  */
+#define BIGGEST_ALIGNMENT (TARGET_ALIGN_INT ? 32 : 16)
+
+#define STRICT_ALIGNMENT (TARGET_STRICT_ALIGNMENT)
+#define M68K_HONOR_TARGET_STRICT_ALIGNMENT 1
+
+#define DWARF_CIE_DATA_ALIGNMENT -2
+
+#define INT_TYPE_SIZE (TARGET_SHORT ? 16 : 32)
+
+/* Define these to avoid dependence on meaning of `int'.  */
+#define WCHAR_TYPE "long int"
+#define WCHAR_TYPE_SIZE 32
+
+/* Maximum number of library IDs we permit with -mid-shared-library.  */
+#define MAX_LIBRARY_ID 255
+
+
+/* Standard register usage.  */
+
+/* For the m68k, we give the data registers numbers 0-7,
+   the address registers numbers 010-017 (8-15),
+   and the 68881 floating point registers numbers 020-027 (16-23).
+   We also have a fake `arg-pointer' register 030 (24) used for
+   register elimination.  */
+#define FIRST_PSEUDO_REGISTER 25
+
+/* All m68k targets (except AmigaOS) use %a5 as the PIC register  */
+#define PIC_OFFSET_TABLE_REGNUM				\
+  (!flag_pic ? INVALID_REGNUM				\
+   : reload_completed ? REGNO (pic_offset_table_rtx)	\
+   : PIC_REG)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the m68k, only the stack pointer is such.
+   Our fake arg-pointer is obviously fixed as well.  */
+#define FIXED_REGISTERS        \
+ {/* Data registers.  */       \
+  0, 0, 0, 0, 0, 0, 0, 0,      \
+                               \
+  /* Address registers.  */    \
+  0, 0, 0, 0, 0, 0, 0, 1,      \
+                               \
+  /* Floating point registers  \
+     (if available).  */       \
+  0, 0, 0, 0, 0, 0, 0, 0,      \
+                               \
+  /* Arg pointer.  */          \
+  1 }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS     \
+ {/* Data registers.  */        \
+  1, 1, 0, 0, 0, 0, 0, 0,       \
+                                \
+  /* Address registers.  */     \
+  1, 1, 0, 0, 0, 0, 0, 1,       \
+                                \
+  /* Floating point registers   \
+     (if available).  */        \
+  1, 1, 0, 0, 0, 0, 0, 0,       \
+                                \
+  /* Arg pointer.  */           \
+  1 }
+
+#define REG_ALLOC_ORDER		\
+{ /* d0/d1/a0/a1 */		\
+  0, 1, 8, 9,			\
+  /* d2-d7 */			\
+  2, 3, 4, 5, 6, 7,		\
+  /* a2-a7/arg */		\
+  10, 11, 12, 13, 14, 15, 24,	\
+  /* fp0-fp7 */			\
+  16, 17, 18, 19, 20, 21, 22, 23\
+}
+
+
+/* On the m68k, ordinary registers hold 32 bits worth;
+   for the 68881 registers, a single register is always enough for
+   anything that can be stored in them at all.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((REGNO) >= 16 ? GET_MODE_NUNITS (MODE)	\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register.  */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  m68k_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  m68k_regno_mode_ok ((REGNO), (MODE))
+
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+  m68k_secondary_reload_class (CLASS, MODE, X)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)			\
+  (! TARGET_HARD_FLOAT					\
+   || ((GET_MODE_CLASS (MODE1) == MODE_FLOAT		\
+	|| GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)	\
+       == (GET_MODE_CLASS (MODE2) == MODE_FLOAT		\
+	   || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT)))
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+#define STACK_POINTER_REGNUM SP_REG
+
+/* Most m68k targets use %a6 as a frame pointer.  The AmigaOS
+   ABI uses %a6 for shared library calls, therefore the frame
+   pointer is shifted to %a5 on this target.  */
+#define FRAME_POINTER_REGNUM A6_REG
+
+/* Base register for access to arguments of the function.
+ * This isn't a hardware register. It will be eliminated to the
+ * stack pointer or frame pointer.
+ */
+#define ARG_POINTER_REGNUM 24
+
+#define STATIC_CHAIN_REGNUM A0_REG
+#define M68K_STATIC_CHAIN_REG_NAME REGISTER_PREFIX "a0"
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+#define M68K_STRUCT_VALUE_REGNUM A1_REG
+
+
+
+/* The m68k has three kinds of registers, so eight classes would be
+   a complete set.  One of them is not needed.  */
+enum reg_class {
+  NO_REGS, DATA_REGS,
+  ADDR_REGS, FP_REGS,
+  GENERAL_REGS, DATA_OR_FP_REGS,
+  ADDR_OR_FP_REGS, ALL_REGS,
+  LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES \
+ { "NO_REGS", "DATA_REGS",              \
+   "ADDR_REGS", "FP_REGS",              \
+   "GENERAL_REGS", "DATA_OR_FP_REGS",   \
+   "ADDR_OR_FP_REGS", "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS \
+{					\
+  {0x00000000},  /* NO_REGS */		\
+  {0x000000ff},  /* DATA_REGS */	\
+  {0x0100ff00},  /* ADDR_REGS */	\
+  {0x00ff0000},  /* FP_REGS */		\
+  {0x0100ffff},  /* GENERAL_REGS */	\
+  {0x00ff00ff},  /* DATA_OR_FP_REGS */	\
+  {0x01ffff00},  /* ADDR_OR_FP_REGS */	\
+  {0x01ffffff},  /* ALL_REGS */		\
+}
+
+extern enum reg_class regno_reg_class[];
+#define REGNO_REG_CLASS(REGNO) (regno_reg_class[(REGNO)])
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS ADDR_REGS
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS) \
+  m68k_preferred_reload_class (X, CLASS)
+
+/* On the m68k, this is the size of MODE in words,
+   except in the FP regs, where a single reg is always enough.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+ ((CLASS) == FP_REGS ? 1 \
+  : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Moves between fp regs and other regs are two insns.  */
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)	\
+  ((((CLASS1) == FP_REGS) != ((CLASS2) == FP_REGS)) ? 4 : 2)
+
+#define IRA_COVER_CLASSES						\
+{									\
+  ALL_REGS, LIM_REG_CLASSES						\
+}
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD 1
+#define FRAME_GROWS_DOWNWARD 1
+#define STARTING_FRAME_OFFSET 0
+
+/* On the 680x0, sp@- in a byte insn really pushes a word.
+   On the ColdFire, sp@- in a byte insn pushes just a byte.  */
+#define PUSH_ROUNDING(BYTES) (TARGET_COLDFIRE ? BYTES : ((BYTES) + 1) & ~1)
+
+#define FIRST_PARM_OFFSET(FNDECL) 8
+
+/* On the m68k the return value defaults to D0.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC)  \
+  gen_rtx_REG (TYPE_MODE (VALTYPE), D0_REG)
+
+/* On the m68k the return value defaults to D0.  */
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, D0_REG)
+
+/* On the m68k, D0 is usually the only register used.  */
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == D0_REG)
+
+/* Define this to be true when FUNCTION_VALUE_REGNO_P is true for
+   more than one register.
+   XXX This macro is m68k specific and used only for m68kemb.h.  */
+#define NEEDS_UNTYPED_CALL 0
+
+/* On the m68k, all arguments are usually pushed on the stack.  */
+#define FUNCTION_ARG_REGNO_P(N) 0
+
+/* On the m68k, this is a single integer, which is a number of bytes
+   of arguments scanned so far.  */
+#define CUMULATIVE_ARGS int
+
+/* On the m68k, the offset starts at 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  asm_fprintf (FILE, "\tlea %LLP%d,%Ra0\n\tjsr mcount\n", (LABELNO))
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+
+   On the m68k, the trampoline looks like this:
+     movl #STATIC,a0
+     jmp  FUNCTION
+
+   WARNING: Targets that may run on 68040+ cpus must arrange for
+   the instruction cache to be flushed.  Previous incarnations of
+   the m68k trampoline code attempted to get around this by either
+   using an out-of-line transfer function or pc-relative data, but
+   the fact remains that the code to jump to the transfer function
+   or the code to load the pc-relative data needs to be flushed
+   just as much as the "variable" portion of the trampoline.
+   Recognizing that a cache flush is going to be required anyway,
+   dispense with such notions and build a smaller trampoline.
+
+   Since more instructions are required to move a template into
+   place than to create it on the spot, don't use a template.  */
+
+#define TRAMPOLINE_SIZE 12
+#define TRAMPOLINE_ALIGNMENT 16
+
+/* Targets redefine this to invoke code to either flush the cache,
+   or enable stack execution (or both).  */
+#ifndef FINALIZE_TRAMPOLINE
+#define FINALIZE_TRAMPOLINE(TRAMP)
+#endif
+
+/* This is the library routine that is used to transfer control from the
+   trampoline to the actual nested function.  It is defined for backward
+   compatibility, for linking with object code that used the old trampoline
+   definition.
+
+   A colon is used with no explicit operands to cause the template string
+   to be scanned for %-constructs.
+
+   The function name __transfer_from_trampoline is not actually used.
+   The function definition just permits use of "asm with operands"
+   (though the operand list is empty).  */
+#define TRANSFER_FROM_TRAMPOLINE				\
+void								\
+__transfer_from_trampoline ()					\
+{								\
+  register char *a0 asm (M68K_STATIC_CHAIN_REG_NAME);		\
+  asm (GLOBAL_ASM_OP "___trampoline");				\
+  asm ("___trampoline:");					\
+  asm volatile ("move%.l %0,%@" : : "m" (a0[22]));		\
+  asm volatile ("move%.l %1,%0" : "=a" (a0) : "m" (a0[18]));	\
+  asm ("rts":);							\
+}
+
+/* There are two registers that can always be eliminated on the m68k.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },		\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  (OFFSET) = m68k_initial_elimination_offset(FROM, TO)
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+#define HAVE_PRE_DECREMENT 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* True for data registers, D0 through D7.  */
+#define DATA_REGNO_P(REGNO)	IN_RANGE (REGNO, 0, 7)
+
+/* True for address registers, A0 through A7.  */
+#define ADDRESS_REGNO_P(REGNO)	IN_RANGE (REGNO, 8, 15)
+
+/* True for integer registers, D0 through D7 and A0 through A7.  */
+#define INT_REGNO_P(REGNO)	IN_RANGE (REGNO, 0, 15)
+
+/* True for floating point registers, FP0 through FP7.  */
+#define FP_REGNO_P(REGNO)	IN_RANGE (REGNO, 16, 23)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)			\
+  (INT_REGNO_P (REGNO)					\
+   || INT_REGNO_P (reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_BASE_P(REGNO)			\
+  (ADDRESS_REGNO_P (REGNO)				\
+   || ADDRESS_REGNO_P (reg_renumber[REGNO]))
+
+#define REGNO_OK_FOR_INDEX_NONSTRICT_P(REGNO)		\
+  (INT_REGNO_P (REGNO)					\
+   || REGNO == ARG_POINTER_REGNUM			\
+   || REGNO >= FIRST_PSEUDO_REGISTER)
+
+#define REGNO_OK_FOR_BASE_NONSTRICT_P(REGNO)		\
+  (ADDRESS_REGNO_P (REGNO)				\
+   || REGNO == ARG_POINTER_REGNUM			\
+   || REGNO >= FIRST_PSEUDO_REGISTER)
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+
+   These macros are specific to the m68k, and may be used only
+   in code for printing assembler insns and in conditions for
+   define_optimization.  */
+
+/* 1 if X is a data register.  */
+#define DATA_REG_P(X)	(REG_P (X) && DATA_REGNO_P (REGNO (X)))
+
+/* 1 if X is an fp register.  */
+#define FP_REG_P(X)	(REG_P (X) && FP_REGNO_P (REGNO (X)))
+
+/* 1 if X is an address register  */
+#define ADDRESS_REG_P(X) (REG_P (X) && ADDRESS_REGNO_P (REGNO (X)))
+
+/* True if SYMBOL + OFFSET constants must refer to something within
+   SYMBOL's section.  */
+#ifndef M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
+#define M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0
+#endif
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST		\
+    || GET_CODE (X) == HIGH)						\
+   && LEGITIMATE_CONSTANT_P (X))
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+#define LEGITIMATE_CONSTANT_P(X)				\
+  (GET_MODE (X) != XFmode					\
+   && !m68k_illegitimate_symbolic_constant_p (X))
+
+#ifndef REG_OK_STRICT
+#define REG_STRICT_P 0
+#else
+#define REG_STRICT_P 1
+#endif
+
+#define LEGITIMATE_PIC_OPERAND_P(X)				\
+  (!symbolic_operand (X, VOIDmode)				\
+   || (TARGET_PCREL && REG_STRICT_P)				\
+   || m68k_tls_reference_p (X, true))
+
+#define REG_OK_FOR_BASE_P(X) \
+  m68k_legitimate_base_reg_p (X, REG_STRICT_P)
+
+#define REG_OK_FOR_INDEX_P(X) \
+  m68k_legitimate_index_reg_p (X, REG_STRICT_P)
+
+
+/* This address is OK as it stands.  */
+#define PIC_CASE_VECTOR_ADDRESS(index) index
+#define CASE_VECTOR_MODE HImode
+#define CASE_VECTOR_PC_RELATIVE 1
+
+#define DEFAULT_SIGNED_CHAR 1
+#define MOVE_MAX 4
+#define SLOW_BYTE_ACCESS 0
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* The ColdFire FF1 instruction returns 32 for zero. */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+
+#define STORE_FLAG_VALUE (-1)
+
+#define Pmode SImode
+#define FUNCTION_MODE QImode
+
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  */
+
+/* Set if the cc value is actually in the 68881, so a floating point
+   conditional branch must be output.  */
+#define CC_IN_68881 04000
+
+/* On the 68000, all the insns to store in an address register fail to
+   set the cc's.  However, in some cases these instructions can make it
+   possibly invalid to use the saved cc's.  In those cases we clear out
+   some or all of the saved cc's so they won't be used.  */
+#define NOTICE_UPDATE_CC(EXP,INSN) notice_update_cc (EXP, INSN)
+
+/* The shift instructions always clear the overflow bit.  */
+#define CC_OVERFLOW_UNUSABLE 01000
+
+/* The shift instructions use the carry bit in a way not compatible with
+   conditional branches.  conditions.h uses CC_NO_OVERFLOW for this purpose.
+   Rename it to something more understandable.  */
+#define CC_NO_CARRY CC_NO_OVERFLOW
+
+#define OUTPUT_JUMP(NORMAL, FLOAT, NO_OV)  \
+do { if (cc_prev_status.flags & CC_IN_68881)			\
+    return FLOAT;						\
+  if (cc_prev_status.flags & CC_NO_OVERFLOW)			\
+    return NO_OV;						\
+  return NORMAL; } while (0)
+
+/* Control the assembler format that we output.  */
+
+#define ASM_APP_ON "#APP\n"
+#define ASM_APP_OFF "#NO_APP\n"
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define GLOBAL_ASM_OP "\t.globl\t"
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX ""
+#define USER_LABEL_PREFIX "_"
+#define IMMEDIATE_PREFIX "#"
+
+#define REGISTER_NAMES \
+{REGISTER_PREFIX"d0", REGISTER_PREFIX"d1", REGISTER_PREFIX"d2",	\
+ REGISTER_PREFIX"d3", REGISTER_PREFIX"d4", REGISTER_PREFIX"d5",	\
+ REGISTER_PREFIX"d6", REGISTER_PREFIX"d7",			\
+ REGISTER_PREFIX"a0", REGISTER_PREFIX"a1", REGISTER_PREFIX"a2", \
+ REGISTER_PREFIX"a3", REGISTER_PREFIX"a4", REGISTER_PREFIX"a5", \
+ REGISTER_PREFIX"a6", REGISTER_PREFIX"sp",			\
+ REGISTER_PREFIX"fp0", REGISTER_PREFIX"fp1", REGISTER_PREFIX"fp2", \
+ REGISTER_PREFIX"fp3", REGISTER_PREFIX"fp4", REGISTER_PREFIX"fp5", \
+ REGISTER_PREFIX"fp6", REGISTER_PREFIX"fp7", REGISTER_PREFIX"argptr" }
+
+#define M68K_FP_REG_NAME REGISTER_PREFIX"fp"
+
+/* Return a register name by index, handling %fp nicely.
+   We don't replace %fp for targets that don't map it to %a6
+   since it may confuse GAS.  */
+#define M68K_REGNAME(r) ( \
+  ((FRAME_POINTER_REGNUM == A6_REG) \
+    && ((r) == FRAME_POINTER_REGNUM) \
+    && frame_pointer_needed) ? \
+    M68K_FP_REG_NAME : reg_names[(r)])
+
+/* On the Sun-3, the floating point registers have numbers
+   18 to 25, not 16 to 23 as they do in the compiler.  */
+#define DBX_REGISTER_NUMBER(REGNO) ((REGNO) < 16 ? (REGNO) : (REGNO) + 2)
+
+/* Before the prologue, RA is at 0(%sp).  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* After the prologue, RA is at 4(AP) in the current frame.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)					   \
+  ((COUNT) == 0								   \
+   ? gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, UNITS_PER_WORD)) \
+   : gen_rtx_MEM (Pmode, plus_constant (FRAME, UNITS_PER_WORD)))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) \
+  (INT_REGNO_P (REG) || FP_REGNO_P (REG) ? (REG) : INVALID_REGNUM)
+
+/* The return column was originally 24, but gcc used 25 for a while too.
+   Define both registers 24 and 25 as Pmode ones and use 24 in our own
+   unwind information.  */
+#define DWARF_FRAME_REGISTERS 25
+#define DWARF_FRAME_RETURN_COLUMN 24
+#define DWARF_ALT_FRAME_RETURN_COLUMN 25
+
+/* Before the prologue, the top of the frame is at 4(%sp).  */
+#define INCOMING_FRAME_SP_OFFSET 4
+
+/* All registers are live on exit from an interrupt routine.  */
+#define EPILOGUE_USES(REGNO)					\
+  (reload_completed						\
+   && (m68k_get_function_kind (current_function_decl)	\
+       == m68k_fk_interrupt_handler))
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 2 ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, A0_REG)
+#define EH_RETURN_HANDLER_RTX					    \
+  gen_rtx_MEM (Pmode,						    \
+	       gen_rtx_PLUS (Pmode, arg_pointer_rtx,		    \
+			     plus_constant (EH_RETURN_STACKADJ_RTX, \
+					    UNITS_PER_WORD)))
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   TARGET_ID_SHARED_LIBRARY and TARGET_SEP_DATA are designed to support
+   a read-only text segment without imposing a fixed gap between the
+   text and data segments.  As a result, the text segment cannot refer
+   to anything in the data segment, even in PC-relative form.  Because
+   .eh_frame refers to both code and data, it follows that .eh_frame
+   must be in the data segment itself, and that the offset between
+   .eh_frame and code will not be a link-time constant.
+
+   In theory, we could create a read-only .eh_frame by using DW_EH_PE_pcrel
+   | DW_EH_PE_indirect for all code references.  However, gcc currently
+   handles indirect references using a per-TU constant pool.  This means
+   that if a function and its eh_frame are removed by the linker, the
+   eh_frame's indirect references to the removed function will not be
+   removed, leading to an unresolved symbol error.
+
+   It isn't clear that any -msep-data or -mid-shared-library target
+   would benefit from a read-only .eh_frame anyway.  In particular,
+   no known target that supports these options has a feature like
+   PT_GNU_RELRO.  Without any such feature to motivate them, indirect
+   references would be unnecessary bloat, so we simply use an absolute
+   pointer for code and global references.  We still use pc-relative
+   references to data, as this avoids a relocation.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			   \
+  (flag_pic								   \
+   && !((TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)			   \
+	&& ((GLOBAL) || (CODE)))					   \
+   ? ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \
+   : DW_EH_PE_absptr)
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  asm_fprintf (FILE, "%U%s", NAME)
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long)(NUM))
+
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)			\
+  asm_fprintf (FILE, (MOTOROLA				\
+		      ? "\tmove.l %s,-(%Rsp)\n"		\
+		      : "\tmovel %s,%Rsp@-\n"),		\
+	       reg_names[REGNO])
+
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)			\
+  asm_fprintf (FILE, (MOTOROLA				\
+		      ? "\tmove.l (%Rsp)+,%s\n"		\
+		      : "\tmovel %Rsp@+,%s\n"),		\
+	       reg_names[REGNO])
+
+/* The m68k does not use absolute case-vectors, but we must define this macro
+   anyway.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  asm_fprintf (FILE, "\t.long %LL%d\n", VALUE)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
+  asm_fprintf (FILE, "\t.word %LL%d-%LL%d\n", VALUE, REL)
+
+/* We don't have a way to align to more than a two-byte boundary, so do the
+   best we can and don't complain.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) >= 1)			\
+    fprintf (FILE, "\t.even\n");
+
+#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN
+/* Use "move.l %a4,%a4" to advance within code.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG)			\
+  if ((LOG) > 0)						\
+    fprintf ((FILE), "\t.balignw %u,0x284c\n", 1 << (LOG));
+#endif
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.skip %u\n", (int)(SIZE))
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  m68k_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+/* On the 68000, we use several CODE characters:
+   '.' for dot needed in Motorola-style opcode names.
+   '-' for an operand pushing on the stack:
+       sp@-, -(sp) or -(%sp) depending on the style of syntax.
+   '+' for an operand pushing on the stack:
+       sp@+, (sp)+ or (%sp)+ depending on the style of syntax.
+   '@' for a reference to the top word on the stack:
+       sp@, (sp) or (%sp) depending on the style of syntax.
+   '#' for an immediate operand prefix (# in MIT and Motorola syntax
+       but & in SGS syntax).
+   '!' for the fpcr register (used in some float-to-fixed conversions).
+   '$' for the letter `s' in an op code, but only on the 68040.
+   '&' for the letter `d' in an op code, but only on the 68040.
+   '/' for register prefix needed by longlong.h.
+   '?' for m68k_library_id_string
+
+   'b' for byte insn (no effect, on the Sun; this is for the ISI).
+   'd' to force memory addressing to be absolute, not relative.
+   'f' for float insn (print a CONST_DOUBLE as a float rather than in hex)
+   'x' for float insn (print a CONST_DOUBLE as a float rather than in hex),
+       or print pair of registers as rx:ry.  */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)				\
+  ((CODE) == '.' || (CODE) == '#' || (CODE) == '-'			\
+   || (CODE) == '+' || (CODE) == '@' || (CODE) == '!'			\
+   || (CODE) == '$' || (CODE) == '&' || (CODE) == '/' || (CODE) == '?')
+
+
+/* See m68k.c for the m68k specific codes.  */
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL)		\
+do {							\
+  if (! m68k_output_addr_const_extra (FILE, (X)))	\
+    goto FAIL;						\
+} while (0);
+
+/* Values used in the MICROARCH argument to M68K_DEVICE.  */
+enum uarch_type
+{
+  u68000,
+  u68010,
+  u68020,
+  u68020_40,
+  u68020_60,
+  u68030,
+  u68040,
+  u68060,
+  ucpu32,
+  ucfv1,
+  ucfv2,
+  ucfv3,
+  ucfv4,
+  ucfv4e,
+  ucfv5,
+  unk_arch
+};
+
+/* An enumeration of all supported target devices.  */
+enum target_device
+{
+#define M68K_DEVICE(NAME,ENUM_VALUE,FAMILY,MULTILIB,MICROARCH,ISA,FLAGS) \
+  ENUM_VALUE,
+#include "m68k-devices.def"
+#undef M68K_DEVICE
+  unk_device
+};
+
+enum fpu_type
+{
+  FPUTYPE_NONE,
+  FPUTYPE_68881,
+  FPUTYPE_COLDFIRE
+};
+
+enum m68k_function_kind
+{
+  m68k_fk_normal_function,
+  m68k_fk_interrupt_handler,
+  m68k_fk_interrupt_thread
+};
+
+/* Variables in m68k.c; see there for details.  */
+extern const char *m68k_library_id_string;
+extern enum target_device m68k_cpu;
+extern enum uarch_type m68k_tune;
+extern enum fpu_type m68k_fpu;
+extern unsigned int m68k_cpu_flags;
+extern unsigned int m68k_tune_flags;
+extern const char *m68k_symbolic_call;
+extern const char *m68k_symbolic_jump;
+
+enum M68K_SYMBOLIC_CALL { M68K_SYMBOLIC_CALL_NONE, M68K_SYMBOLIC_CALL_JSR,
+			  M68K_SYMBOLIC_CALL_BSR_C, M68K_SYMBOLIC_CALL_BSR_P };
+
+extern enum M68K_SYMBOLIC_CALL m68k_symbolic_call_var;
+
+/* ??? HOST_WIDE_INT is not being defined for auto-generated files.
+   Workaround that.  */
+#ifdef HOST_WIDE_INT
+typedef enum { MOVL, SWAP, NEGW, NOTW, NOTB, MOVQ, MVS, MVZ }
+  M68K_CONST_METHOD;
+
+extern M68K_CONST_METHOD m68k_const_method (HOST_WIDE_INT);
+#endif
+
+extern void m68k_emit_move_double (rtx [2]);
+
+extern int m68k_sched_address_bypass_p (rtx, rtx);
+extern int m68k_sched_indexed_address_bypass_p (rtx, rtx);
+
+#define CPU_UNITS_QUERY 1
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
new file mode 100644
index 000000000..f89037f2e
--- /dev/null
+++ b/gcc/config/m68k/m68k.md
@@ -0,0 +1,7808 @@
+;;- Machine description for GNU compiler, Motorola 68000 Version
+;;  Copyright (C) 1987, 1988, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001,
+;;  2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+;;  Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- Information about MCF5200 port.
+
+;;- The MCF5200 "ColdFire" architecture is a reduced version of the
+;;- 68k ISA.  Differences include reduced support for byte and word
+;;- operands and the removal of BCD, bitfield, rotate, and integer
+;;- divide instructions.  The TARGET_COLDFIRE flag turns the use of the
+;;- removed opcodes and addressing modes off.
+;;- 
+
+
+;;- instruction definitions
+
+;;- @@The original PO technology requires these to be ordered by speed,
+;;- @@    so that assigner will pick the fastest.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;- When naming insn's (operand 0 of define_insn) be careful about using
+;;- names from other targets machine descriptions.
+
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;;- Operand classes for the register allocator:
+;;- 'a' one of the address registers can be used.
+;;- 'd' one of the data registers can be used.
+;;- 'f' one of the m68881/fpu registers can be used
+;;- 'r' either a data or an address register can be used.
+
+;;- Immediate Floating point operator constraints
+;;- 'G' a floating point constant that is *NOT* one of the standard
+;;   68881 constant values (to force calling output_move_const_double
+;;   to get it from rom if it is a 68881 constant).
+;;
+;;   See the functions standard_XXX_constant_p in output-m68k.c for more
+;; info.
+
+;;- Immediate integer operand constraints:
+;;- 'I'  1 .. 8
+;;- 'J'  -32768 .. 32767
+;;- 'K'  all integers EXCEPT -128 .. 127
+;;- 'L'  -8 .. -1
+;;- 'M'  all integers EXCEPT -256 .. 255
+;;- 'N'  24 .. 31
+;;- 'O'  16
+;;- 'P'  8 .. 15
+
+;;- Assembler specs:
+;;- "%."    size separator ("." or "")			move%.l d0,d1
+;;- "%-"    push operand "sp@-"				move%.l d0,%-
+;;- "%+"    pop operand "sp@+"				move%.l d0,%+
+;;- "%@"    top of stack "sp@"				move%.l d0,%@
+;;- "%!"    fpcr register
+;;- "%$"    single-precision fp specifier ("s" or "")	f%$add.x fp0,fp1
+;;- "%&"    double-precision fp specifier ("d" or "")	f%&add.x fp0,fp1
+
+;;- Information about 68040 port.
+
+;;- The 68040 executes all 68030 and 68881/2 instructions, but some must
+;;- be emulated in software by the OS.  It is faster to avoid these
+;;- instructions and issue a library call rather than trapping into
+;;- the kernel.  The affected instructions are fintrz and fscale.  The
+;;- TUNE_68040 flag turns the use of the opcodes off.
+
+;;- The '040 also implements a set of new floating-point instructions
+;;- which specify the rounding precision in the opcode.  This finally
+;;- permit the 68k series to be truly IEEE compliant, and solves all
+;;- issues of excess precision accumulating in the extended registers.
+;;- By default, GCC does not use these instructions, since such code will
+;;- not run on an '030.  To use these instructions, use the -m68040-only
+;;- switch.
+
+;;- These new instructions aren't directly in the md.  They are brought
+;;- into play by defining "%$" and "%&" to expand to "s" and "d" rather
+;;- than "".
+
+;;- Information about 68060 port.
+
+;;- The 68060 executes all 68030 and 68881/2 instructions, but some must
+;;- be emulated in software by the OS.  It is faster to avoid these
+;;- instructions and issue a library call rather than trapping into
+;;- the kernel.  The affected instructions are: divs.l <ea>,Dr:Dq;
+;;- divu.l <ea>,Dr:Dq; muls.l <ea>,Dr:Dq; mulu.l <ea>,Dr:Dq; and
+;;- fscale.  The TUNE_68060 flag turns the use of the opcodes off.
+
+;;- Some of these insn's are composites of several m68000 op codes.
+;;- The assembler (or final @@??) insures that the appropriate one is
+;;- selected.
+
+;; UNSPEC usage:
+
+(define_constants
+  [(UNSPEC_SIN 1)
+   (UNSPEC_COS 2)
+   (UNSPEC_GOT 3)
+   (UNSPEC_IB 4)
+   (UNSPEC_TIE 5)
+   (UNSPEC_RELOC16 6)
+   (UNSPEC_RELOC32 7)
+  ])
+
+;; UNSPEC_VOLATILE usage:
+
+(define_constants
+  [(UNSPECV_BLOCKAGE	0)
+  ])
+
+;; Registers by name.
+(define_constants
+  [(D0_REG		0)
+   (A0_REG		8)
+   (A1_REG		9)
+   (PIC_REG		13)
+   (A6_REG		14)
+   (SP_REG		15)
+   (FP0_REG		16)
+  ])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+;; Processor type.
+(define_attr "cpu" "cfv1, cfv2, cfv3, cfv4, unknown"
+  (const (symbol_ref "m68k_sched_cpu")))
+
+;; MAC type.
+(define_attr "mac" "no, cf_mac, cf_emac"
+  (const (symbol_ref "m68k_sched_mac")))
+
+;; Instruction type for use in scheduling description.
+;; _l and _w suffixes indicate size of the operands of instruction.
+;; alu - usual arithmetic or logic instruction.
+;; aluq - arithmetic or logic instruction which has a quick immediate (the one
+;;        that is encoded in the instruction word) for its Y operand.
+;; alux - Arithmetic instruction that uses carry bit (e.g., addx and subx).
+;; bcc - conditional branch.
+;; bitr - bit operation that only updates flags.
+;; bitrw - bit operation that updates flags and output operand.
+;; bra, bsr, clr, cmp, div, ext - corresponding instruction.
+;; falu, fbcc, fcmp, fdiv, fmove, fmul, fneg, fsqrt, ftst - corresponding
+;;                                                          instruction.
+;; ib - fake instruction to subscribe slots in ColdFire V1,V2,V3 instruction
+;;      buffer.
+;; ignore - fake instruction.
+;; jmp, jsr, lea, link, mov3q, move, moveq, mul - corresponding instruction.
+;; mvsz - mvs or mvz instruction.
+;; neg, nop, pea, rts, scc - corresponding instruction.
+;; shift - arithmetic or logical shift instruction.
+;; trap, tst, unlk - corresponding instruction.
+(define_attr "type"
+  "alu_l,aluq_l,alux_l,bcc,bitr,bitrw,bra,bsr,clr,clr_l,cmp,cmp_l,
+   div_w,div_l,ext,
+   falu,fbcc,fcmp,fdiv,fmove,fmul,fneg,fsqrt,ftst,
+   ib,ignore,
+   jmp,jsr,lea,link,mov3q_l,move,move_l,moveq_l,mul_w,mul_l,mvsz,neg_l,nop,
+   pea,rts,scc,shift,
+   trap,tst,tst_l,unlk,
+   unknown"
+  (const_string "unknown"))
+
+;; Index of the X or Y operand in recog_data.operand[].
+;; Should be used only within opx_type and opy_type.
+(define_attr "opx" "" (const_int 0))
+(define_attr "opy" "" (const_int 1))
+
+;; Type of the Y operand.
+;; See m68k.c: enum attr_op_type.
+(define_attr "opy_type"
+  "none,Rn,FPn,mem1,mem234,mem5,mem6,mem7,imm_q,imm_w,imm_l"
+  (cond [(eq_attr "type" "ext,fbcc,ftst,neg_l,bcc,bra,bsr,clr,clr_l,ib,ignore,
+                          jmp,jsr,nop,rts,scc,trap,tst,tst_l,
+                          unlk,unknown") (const_string "none")
+	 (eq_attr "type" "lea,pea")
+	 (symbol_ref "m68k_sched_attr_opy_type (insn, 1)")]
+	(symbol_ref "m68k_sched_attr_opy_type (insn, 0)")))
+
+;; Type of the X operand.
+;; See m68k.c: enum attr_op_type.
+(define_attr "opx_type"
+  "none,Rn,FPn,mem1,mem234,mem5,mem6,mem7,imm_q,imm_w,imm_l"
+  (cond [(eq_attr "type" "ib,ignore,nop,rts,trap,unlk,
+                          unknown") (const_string "none")
+	 (eq_attr "type" "pea") (const_string "mem1")
+	 (eq_attr "type" "jmp,jsr")
+	 (symbol_ref "m68k_sched_attr_opx_type (insn, 1)")]
+	(symbol_ref "m68k_sched_attr_opx_type (insn, 0)")))
+
+;; Access to the X operand: none, read, write, read/write, unknown.
+;; Access to the Y operand is either none (if opy_type is none)
+;; or read otherwise.
+(define_attr "opx_access" "none, r, w, rw"
+  (cond [(eq_attr "type" "ib,ignore,nop,rts,trap,unlk,
+                          unknown") (const_string "none")
+	 (eq_attr "type" "bcc,bra,bsr,bitr,cmp,cmp_l,fbcc,fcmp,ftst,
+                          jmp,jsr,tst,tst_l") (const_string "r")
+	 (eq_attr "type" "clr,clr_l,fneg,fmove,lea,
+                          mov3q_l,move,move_l,moveq_l,mvsz,
+                          pea,scc") (const_string "w")
+	 (eq_attr "type" "alu_l,aluq_l,alux_l,bitrw,div_w,div_l,ext,
+                          falu,fdiv,fmul,fsqrt,link,mul_w,mul_l,
+                          neg_l,shift") (const_string "rw")]
+	;; Should never be used.
+	(symbol_ref "(gcc_unreachable (), OPX_ACCESS_NONE)")))
+
+;; Memory accesses of the insn.
+;; 00 - no memory references
+;; 10 - memory is read
+;; i0 - indexed memory is read
+;; 01 - memory is written
+;; 0i - indexed memory is written
+;; 11 - memory is read, memory is written
+;; i1 - indexed memory is read, memory is written
+;; 1i - memory is read, indexed memory is written
+(define_attr "op_mem" "00, 10, i0, 01, 0i, 11, i1, 1i"
+  (symbol_ref "m68k_sched_attr_op_mem (insn)"))
+
+;; Instruction size in words.
+(define_attr "size" "1,2,3"
+  (symbol_ref "m68k_sched_attr_size (insn)"))
+
+;; Alternative is OK for ColdFire.
+(define_attr "ok_for_coldfire" "yes,no" (const_string "yes"))
+
+;; Define 'enabled' attribute.
+(define_attr "enabled" ""
+  (cond [(and (ne (symbol_ref "TARGET_COLDFIRE") (const_int 0))
+	      (eq_attr "ok_for_coldfire" "no"))
+	 (const_int 0)]
+ 	(const_int 1)))
+
+;; Mode macros for floating point operations.
+;; Valid floating point modes
+(define_mode_iterator FP [SF DF (XF "TARGET_68881")])
+;; Mnemonic infix to round result
+(define_mode_attr round [(SF "%$") (DF "%&") (XF "")])
+;; Mnemonic infix to round result for mul or div instruction
+(define_mode_attr round_mul [(SF "sgl") (DF "%&") (XF "")])
+;; Suffix specifying source operand format
+(define_mode_attr prec [(SF "s") (DF "d") (XF "x")])
+;; Allowable D registers
+(define_mode_attr dreg [(SF "d") (DF "") (XF "")])
+;; Allowable 68881 constant constraints
+(define_mode_attr const [(SF "F") (DF "G") (XF "")])
+
+
+(define_insn_and_split "*movdf_internal"
+  [(set (match_operand:DF 0 "push_operand"   "=m, m")
+	(match_operand:DF 1 "general_operand" "f, ro<>E"))]
+  ""
+  "@
+   fmove%.d %f1,%0
+   #"
+  "&& reload_completed && (extract_constrain_insn_cached (insn), which_alternative == 1)"
+  [(const_int 0)]
+{
+  m68k_emit_move_double (operands);
+  DONE;
+}
+  [(set_attr "type" "fmove,*")])
+
+(define_insn_and_split "pushdi"
+  [(set (match_operand:DI 0 "push_operand" "=m")
+	(match_operand:DI 1 "general_operand" "ro<>Fi"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  m68k_emit_move_double (operands);
+  DONE;
+})
+
+;; We don't want to allow a constant operand for test insns because
+;; (set (cc0) (const_int foo)) has no mode information.  Such insns will
+;; be folded while optimizing anyway.
+
+(define_insn "tstdi"
+  [(set (cc0)
+	(compare (match_operand:DI 0 "nonimmediate_operand" "am,d")
+		 (const_int 0)))
+   (clobber (match_scratch:SI 1 "=X,d"))
+   (clobber (match_scratch:DI 2 "=d,X"))]
+  ""
+{
+  if (which_alternative == 0)
+    {
+      rtx xoperands[2];
+
+      xoperands[0] = operands[2];
+      xoperands[1] = operands[0];
+      output_move_double (xoperands);
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "neg%.l %R2\;negx%.l %2";
+    }
+  if (find_reg_note (insn, REG_DEAD, operands[0]))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "neg%.l %R0\;negx%.l %0";
+    }
+  else
+    /*
+       'sub' clears %1, and also clears the X cc bit
+       'tst' sets the Z cc bit according to the low part of the DImode operand
+       'subx %1' (i.e. subx #0) acts as a (non-existent) tstx on the high part.
+    */
+    return "sub%.l %1,%1\;tst%.l %R0\;subx%.l %1,%0";
+})
+
+;; If you think that the 68020 does not support tstl a0,
+;; reread page B-167 of the 68020 manual more carefully.
+(define_insn "*tstsi_internal_68020_cf"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "rm")
+		 (const_int 0)))]
+  "TARGET_68020 || TARGET_COLDFIRE"
+  "tst%.l %0"
+  [(set_attr "type" "tst_l")])
+
+;; On an address reg, cmpw may replace cmpl.
+(define_insn "*tstsi_internal"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "dm,r")
+		 (const_int 0)))]
+  "!(TARGET_68020 || TARGET_COLDFIRE)"
+  "@
+   tst%.l %0
+   cmp%.w #0,%0"
+  [(set_attr "type" "tst_l,cmp")])
+
+;; This can't use an address register, because comparisons
+;; with address registers as second operand always test the whole word.
+(define_insn "*tsthi_internal"
+  [(set (cc0)
+	(compare (match_operand:HI 0 "nonimmediate_operand" "dm")
+		 (const_int 0)))]
+  ""
+  "tst%.w %0"
+  [(set_attr "type" "tst")])
+
+(define_insn "*tstqi_internal"
+  [(set (cc0)
+	(compare (match_operand:QI 0 "nonimmediate_operand" "dm")
+		 (const_int 0)))]
+  ""
+  "tst%.b %0"
+  [(set_attr "type" "tst")])
+
+(define_insn "tst<mode>_68881"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "general_operand" "f<FP:dreg>m")
+		 (match_operand:FP 1 "const0_operand" "H")))]
+  "TARGET_68881"
+{
+  cc_status.flags = CC_IN_68881;
+  if (FP_REG_P (operands[0]))
+    return "ftst%.x %0";
+  return "ftst%.<FP:prec> %0";
+}
+  [(set_attr "type" "ftst")])
+
+(define_insn "tst<mode>_cf"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "general_operand" "f<FP:dreg><Q>U")
+		 (match_operand:FP 1 "const0_operand" "H")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  cc_status.flags = CC_IN_68881;
+  if (FP_REG_P (operands[0]))
+    return "ftst%.d %0";
+  return "ftst%.<FP:prec> %0";
+}
+  [(set_attr "type" "ftst")])
+
+
+;; compare instructions.
+
+(define_insn "*cmpdi_internal"
+ [(set (cc0)
+       (compare (match_operand:DI 1 "nonimmediate_operand" "0,d")
+                (match_operand:DI 2 "general_operand" "d,0")))
+  (clobber (match_scratch:DI 0 "=d,d"))]
+  ""
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    return "sub%.l %R2,%R0\;subx%.l %2,%0";
+  else
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "sub%.l %R1,%R0\;subx%.l %1,%0";
+    }
+})
+
+(define_insn "cmpdi"
+ [(set (cc0)
+       (compare (match_operand:DI 0 "nonimmediate_operand")
+                (match_operand:DI 1 "general_operand")))
+  (clobber (match_scratch:DI 2))]
+  ""
+  "")
+
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:DI 1 "nonimmediate_operand")
+			(match_operand:DI 2 "general_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  if (operands[2] == const0_rtx)
+    emit_insn (gen_tstdi (operands[1]));
+  else
+    emit_insn (gen_cmpdi (operands[1], operands[2]));
+  operands[1] = cc0_rtx;
+  operands[2] = const0_rtx;
+})
+
+(define_expand "cstoredi4"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+	 [(match_operand:DI 2 "nonimmediate_operand")
+	  (match_operand:DI 3 "general_operand")]))]
+  ""
+{
+  if (operands[3] == const0_rtx)
+    emit_insn (gen_tstdi (operands[2]));
+  else
+    emit_insn (gen_cmpdi (operands[2], operands[3]));
+  operands[2] = cc0_rtx;
+  operands[3] = const0_rtx;
+})
+
+
+(define_expand "cbranchsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cstoresi4"
+  [(set (cc0)
+	(compare (match_operand:SI 2 "nonimmediate_operand" "")
+		 (match_operand:SI 3 "general_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+
+;; A composite of the cmp, cmpa, cmpi & cmpm m68000 op codes.
+(define_insn ""
+  [(set (cc0)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "rKT,rKs,mSr,mSa,>")
+                 (match_operand:SI 1 "general_src_operand" "mSr,mSa,KTr,Ksr,>")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    return "cmpm%.l %1,%0";
+  if (REG_P (operands[1])
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.l %d0,%d1";
+    }
+  if (ADDRESS_REG_P (operands[0])
+      && GET_CODE (operands[1]) == CONST_INT
+      && INTVAL (operands[1]) < 0x8000
+      && INTVAL (operands[1]) >= -0x8000)
+    return "cmp%.w %1,%0";
+  return "cmp%.l %d1,%d0";
+})
+
+(define_insn "*cmpsi_cf"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "nonimmediate_operand" "mrKs,r")
+		 (match_operand:SI 1 "general_operand" "r,mrKs")))]
+  "TARGET_COLDFIRE"
+{
+  if (REG_P (operands[1])
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.l %d0,%d1";
+    }
+  return "cmp%.l %d1,%d0";
+}
+  [(set_attr "type" "cmp_l")])
+
+(define_expand "cbranchhi4"
+  [(set (cc0)
+	(compare (match_operand:HI 1 "nonimmediate_src_operand" "")
+		 (match_operand:HI 2 "m68k_subword_comparison_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cstorehi4"
+  [(set (cc0)
+	(compare (match_operand:HI 2 "nonimmediate_operand" "")
+		 (match_operand:HI 3 "m68k_subword_comparison_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (cc0)
+        (compare (match_operand:HI 0 "nonimmediate_src_operand" "rnmS,d,n,mS,>")
+                 (match_operand:HI 1 "general_src_operand" "d,rnmS,mS,n,>")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    return "cmpm%.w %1,%0";
+  if ((REG_P (operands[1]) && !ADDRESS_REG_P (operands[1]))
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.w %d0,%d1";
+    }
+  return "cmp%.w %d1,%d0";
+})
+
+(define_expand "cbranchqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 1 "nonimmediate_src_operand" "")
+		 (match_operand:QI 2 "m68k_subword_comparison_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cstoreqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 2 "nonimmediate_src_operand" "")
+		 (match_operand:QI 3 "m68k_subword_comparison_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "ordered_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (cc0)
+        (compare (match_operand:QI 0 "nonimmediate_src_operand" "dn,dmS,>")
+                 (match_operand:QI 1 "general_src_operand" "dmS,nd,>")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    return "cmpm%.b %1,%0";
+  if (REG_P (operands[1])
+      || (!REG_P (operands[0]) && GET_CODE (operands[0]) != MEM))
+    {
+      cc_status.flags |= CC_REVERSED; /*|*/
+      return "cmp%.b %d0,%d1";
+    }
+  return "cmp%.b %d1,%d0";
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+	(compare (match_operand:FP 1 "register_operand" "")
+		 (match_operand:FP 2 "fp_src_operand" "")))
+   (set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+                       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_expand "cstore<mode>4"
+  [(set (cc0)
+	(compare (match_operand:FP 2 "register_operand" "")
+		 (match_operand:FP 3 "fp_src_operand" "")))
+   (set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 "m68k_cstore_comparison_operator"
+         [(cc0) (const_int 0)]))]
+  "TARGET_HARD_FLOAT && !(TUNE_68060 || TARGET_COLDFIRE_FPU)"
+  "if (TARGET_COLDFIRE && operands[2] != const0_rtx)
+     FAIL;")
+
+(define_insn "*cmp<mode>_68881"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "fp_src_operand" "f,f,<FP:dreg>mF")
+		 (match_operand:FP 1 "fp_src_operand" "f,<FP:dreg>mF,f")))]
+  "TARGET_68881
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   fcmp%.x %1,%0
+   fcmp%.<FP:prec> %f1,%0
+   fcmp%.<FP:prec> %0,%f1"
+  [(set_attr "type" "fcmp")])
+
+(define_insn "*cmp<mode>_cf"
+  [(set (cc0)
+	(compare (match_operand:FP 0 "fp_src_operand" "f,f,<FP:dreg><Q>U")
+		 (match_operand:FP 1 "fp_src_operand" "f,<FP:dreg><Q>U,f")))]
+  "TARGET_COLDFIRE_FPU
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "@
+   fcmp%.d %1,%0
+   fcmp%.<FP:prec> %f1,%0
+   fcmp%.<FP:prec> %0,%f1"
+  [(set_attr "type" "fcmp")])
+
+;; Recognizers for btst instructions.
+
+;; ColdFire/5200 only allows "<Q>" type addresses when the bit position is
+;; specified as a constant, so we must disable all patterns that may extract
+;; from a MEM at a constant bit position if we can't use this as a constraint.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_src_operand" "oS")
+			       (const_int 1)
+			       (minus:SI (const_int 7)
+				         (match_operand:SI 1 "general_operand" "di")))
+	     (const_int 0)))]
+  "!TARGET_COLDFIRE"
+{
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+;; This is the same as the above pattern except for the constraints.  The 'i'
+;; has been deleted.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "o")
+			       (const_int 1)
+			       (minus:SI (const_int 7)
+				         (match_operand:SI 1 "general_operand" "d")))
+	     (const_int 0)))]
+  "TARGET_COLDFIRE"
+{
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "d")
+			       (const_int 1)
+			       (minus:SI (const_int 31)
+				         (match_operand:SI 1 "general_operand" "di")))
+	     (const_int 0)))]
+  ""
+{
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+;; The following two patterns are like the previous two
+;; except that they use the fact that bit-number operands
+;; are automatically masked to 3 or 5 bits.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "o")
+			       (const_int 1)
+			       (minus:SI (const_int 7)
+				         (and:SI
+				          (match_operand:SI 1 "register_operand" "d")
+				          (const_int 7))))
+	     (const_int 0)))]
+  ""
+{
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "d")
+			       (const_int 1)
+			       (minus:SI (const_int 31)
+				         (and:SI
+				          (match_operand:SI 1 "register_operand" "d")
+				          (const_int 31))))
+	     (const_int 0)))]
+  ""
+{
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+;; Nonoffsettable mem refs are ok in this one pattern
+;; since we don't try to adjust them.
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "m")
+			      (const_int 1)
+			      (match_operand:SI 1 "const_int_operand" "n"))
+	     (const_int 0)))]
+  "(unsigned) INTVAL (operands[1]) < 8 && !TARGET_COLDFIRE"
+{
+  operands[1] = GEN_INT (7 - INTVAL (operands[1]));
+  return output_btst (operands, operands[1], operands[0], insn, 7);
+})
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "do")
+			      (const_int 1)
+			    (match_operand:SI 1 "const_int_operand" "n"))
+	     (const_int 0)))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      operands[0] = adjust_address (operands[0], QImode,
+				    INTVAL (operands[1]) / 8);
+      operands[1] = GEN_INT (7 - INTVAL (operands[1]) % 8);
+      return output_btst (operands, operands[1], operands[0], insn, 7);
+    }
+  operands[1] = GEN_INT (31 - INTVAL (operands[1]));
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+;; This is the same as the above pattern except for the constraints.
+;; The 'o' has been replaced with 'Q'.
+
+(define_insn ""
+  [(set
+    (cc0)
+    (compare (zero_extract:SI (match_operand:SI 0 "register_operand" "dQ")
+			      (const_int 1)
+			      (match_operand:SI 1 "const_int_operand" "n"))
+	     (const_int 0)))]
+  "TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      operands[0] = adjust_address (operands[0], QImode,
+				    INTVAL (operands[1]) / 8);
+      operands[1] = GEN_INT (7 - INTVAL (operands[1]) % 8);
+      return output_btst (operands, operands[1], operands[0], insn, 7);
+    }
+  operands[1] = GEN_INT (31 - INTVAL (operands[1]));
+  return output_btst (operands, operands[1], operands[0], insn, 31);
+})
+
+
+;; move instructions
+
+;; A special case in which it is not desirable
+;; to reload the constant into a data register.
+(define_insn "pushexthisi_const"
+  [(set (match_operand:SI 0 "push_operand" "=m,m,m")
+	(match_operand:SI 1 "const_int_operand" "C0,R,J"))]
+  "INTVAL (operands[1]) >= -0x8000 && INTVAL (operands[1]) < 0x8000"
+  "@
+   clr%.l %0
+   mov3q%.l %1,%-
+   pea %a1"
+  [(set_attr "type" "clr_l,mov3q_l,pea")])
+
+;This is never used.
+;(define_insn "swapsi"
+;  [(set (match_operand:SI 0 "nonimmediate_operand" "+r")
+;	(match_operand:SI 1 "general_operand" "+r"))
+;   (set (match_dup 1) (match_dup 0))]
+;  ""
+;  "exg %1,%0")
+
+;; Special case of fullword move when source is zero for 68000_10.
+;; moveq is faster on the 68000.
+(define_insn "*movsi_const0_68000_10"
+  [(set (match_operand:SI 0 "movsi_const0_operand" "=d,a,g")
+	(const_int 0))]
+  "TUNE_68000_10"
+  "@
+   moveq #0,%0
+   sub%.l %0,%0
+   clr%.l %0"
+  [(set_attr "type" "moveq_l,alu_l,clr_l")
+   (set_attr "opy" "*,0,*")])
+
+;; Special case of fullword move when source is zero for 68040_60.
+;; On the '040, 'subl an,an' takes 2 clocks while lea takes only 1
+(define_insn "*movsi_const0_68040_60"
+  [(set (match_operand:SI 0 "movsi_const0_operand" "=a,g")
+	(const_int 0))]
+  "TUNE_68040_60"
+{
+  if (which_alternative == 0)
+    return MOTOROLA ? "lea 0.w,%0" : "lea 0:w,%0";
+  else if (which_alternative == 1)
+    return "clr%.l %0";
+  else
+    {
+      gcc_unreachable ();
+      return "";
+    }
+}
+  [(set_attr "type" "lea,clr_l")])
+
+;; Special case of fullword move when source is zero.
+(define_insn "*movsi_const0"
+  [(set (match_operand:SI 0 "movsi_const0_operand" "=a,g")
+	(const_int 0))]
+  "!(TUNE_68000_10 || TUNE_68040_60)"
+  "@
+   sub%.l %0,%0
+   clr%.l %0"
+  [(set_attr "type" "alu_l,clr_l")
+   (set_attr "opy" "0,*")])
+
+;; General case of fullword move.
+;;
+;; This is the main "hook" for PIC code.  When generating
+;; PIC, movsi is responsible for determining when the source address
+;; needs PIC relocation and appropriately calling legitimize_pic_address
+;; to perform the actual relocation.
+;;
+;; In both the PIC and non-PIC cases the patterns generated will
+;; matched by the next define_insn.
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "" "")
+	(match_operand:SI 1 "" ""))]
+  ""
+{
+  rtx tmp, base, offset;
+
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (!TARGET_PCREL && m68k_tls_reference_p (operands[1], false))
+    {
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+        {
+          addend = XEXP (XEXP (tmp, 0), 1);
+          tmp = XEXP (XEXP (tmp, 0), 0);
+        }
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0);
+
+      tmp = m68k_legitimize_tls_address (tmp);
+
+      if (addend)
+        {
+	  if (!REG_P (tmp))
+	    {
+	      rtx reg;
+
+	      reg = gen_reg_rtx (Pmode);
+	      emit_move_insn (reg, tmp);
+	      tmp = reg;
+	    }
+
+          tmp = gen_rtx_PLUS (SImode, tmp, addend);
+	}
+
+      operands[1] = tmp;
+    }
+  else if (flag_pic && !TARGET_PCREL && symbolic_operand (operands[1], SImode))
+    {
+      /* The source is an address which requires PIC relocation.
+         Call legitimize_pic_address with the source, mode, and a relocation
+         register (a new pseudo, or the final destination if reload_in_progress
+         is set).   Then fall through normally */
+      rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+      operands[1] = legitimize_pic_address (operands[1], SImode, temp);
+    }
+  else if (flag_pic && TARGET_PCREL && ! reload_in_progress)
+    {
+      /* Don't allow writes to memory except via a register;
+	 the m68k doesn't consider PC-relative addresses to be writable.  */
+      if (symbolic_operand (operands[0], SImode))
+	operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+      else if (GET_CODE (operands[0]) == MEM
+	       && symbolic_operand (XEXP (operands[0], 0), SImode))
+	operands[0] = gen_rtx_MEM (SImode,
+			       force_reg (SImode, XEXP (operands[0], 0)));
+    }
+  if (M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+    {
+      split_const (operands[1], &base, &offset);
+      if (GET_CODE (base) == SYMBOL_REF
+	  && !offset_within_block_p (base, INTVAL (offset)))
+	{
+	  tmp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (SImode);
+	  emit_move_insn (tmp, base);
+	  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+	  DONE;
+	}
+    }
+})
+
+;; General case of fullword move.
+(define_insn "*movsi_m68k"
+  ;; Notes: make sure no alternative allows g vs g.
+  ;; We don't allow f-regs since fixed point cannot go in them.
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g,d,a<")
+        (match_operand:SI 1 "general_src_operand" "damSnT,n,i"))]
+  "!TARGET_COLDFIRE && reload_completed"
+{
+  return output_move_simode (operands);
+})
+
+;; Before reload is completed the register constraints
+;; force integer constants in range for a moveq to be reloaded
+;; if they are headed for memory.
+(define_insn "*movsi_m68k2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g,d,a<")
+        (match_operand:SI 1 "general_src_operand" "damSKT,n,i"))]
+
+  "!TARGET_COLDFIRE"
+{
+  return output_move_simode (operands);
+})
+
+;; ColdFire move instructions can have at most one operand of mode >= 6.
+(define_insn "*movsi_cf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g,d, d, d, d, d, a,Ap,  a,  r<Q>,g,    U")
+	(match_operand:SI 1 "general_operand"      " R,CQ,CW,CZ,CS,Ci,J,J Cs,Cs, g,   Rr<Q>,U"))]
+  "TARGET_COLDFIRE"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov3q%.l %1,%0";
+
+    case 1:
+      return "moveq %1,%0";
+
+    case 2:
+      {
+	unsigned u = INTVAL (operands[1]);
+
+	operands[1] = GEN_INT ((u << 16) | (u >> 16));  /*|*/
+	return "moveq %1,%0\n\tswap %0";
+      }
+
+    case 3:
+      return "mvz%.w %1,%0";
+
+    case 4:
+      return "mvs%.w %1,%0";
+
+    case 5:
+      return "move%.l %1,%0";
+
+    case 6:
+      return "move%.w %1,%0";
+
+    case 7:
+      return "pea %a1";
+
+    case 8:
+      return "lea %a1,%0";
+
+    case 9:
+    case 10:
+    case 11:
+      return "move%.l %1,%0";
+
+    default:
+      gcc_unreachable ();
+      return "";
+    }
+}
+  [(set_attr "type" "mov3q_l,moveq_l,*,mvsz,mvsz,move_l,move,pea,lea,move_l,move_l,move_l")])
+
+;; Special case of fullword move, where we need to get a non-GOT PIC
+;; reference into an address register.
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a<")
+        (match_operand:SI 1 "pcrel_address" ""))]
+  "TARGET_PCREL"
+{
+  if (push_operand (operands[0], SImode))
+    return "pea %a1";
+  return "lea %a1,%0";
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+        (match_operand:HI 1 "general_src_operand" "gS"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_himode (operands);")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r<Q>,g,U")
+	(match_operand:HI 1 "general_operand" "g,r<Q>,U"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_himode (operands);")
+
+(define_expand "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
+        (match_operand:HI 1 "general_src_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(match_operand:HI 1 "general_src_operand" "rmSn"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_stricthi (operands);")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+d,m"))
+	(match_operand:HI 1 "general_src_operand" "rmn,r"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_stricthi (operands);")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (match_operand:QI 1 "general_src_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,*a,m")
+	(match_operand:QI 1 "general_src_operand" "dmSi*a,di*a,dmSi"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_qimode (operands);")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d<Q>,dm,U,d*a")
+	(match_operand:QI 1 "general_src_operand" "dmi,d<Q>,U,di*a"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_qimode (operands);")
+
+(define_expand "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+        (match_operand:QI 1 "general_src_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(match_operand:QI 1 "general_src_operand" "dmSn"))]
+  "!TARGET_COLDFIRE"
+  "* return output_move_strictqi (operands);")
+
+(define_insn "*movstrictqi_cf"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+d, Ac, d,m"))
+	(match_operand:QI 1 "general_src_operand"                    "C0,C0, dmn,d"))]
+  "TARGET_COLDFIRE"
+  "@
+   clr%.b %0
+   clr%.b %0
+   move%.b %1,%0
+   move%.b %1,%0"
+  [(set_attr "type" "clr,clr,move,move")])
+
+(define_expand "pushqi1"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -2)))
+   (set (mem:QI (plus:SI (reg:SI SP_REG) (const_int 1)))
+	(match_operand:QI 0 "general_operand" ""))]
+  "!TARGET_COLDFIRE"
+  "")
+
+(define_expand "reload_insf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f")
+        (match_operand:SF 1 "general_operand" "mf"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reload_outsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+        (match_operand:SF 1 "register_operand" "f"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rmf")
+	(match_operand:SF 1 "general_operand" "rmfF"))]
+  "!TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "f%$move%.x %1,%0";
+      else if (ADDRESS_REG_P (operands[1]))
+	return "move%.l %1,%-\;f%$move%.s %+,%0";
+      else if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_single (operands);
+      return "f%$move%.s %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return "fmove%.s %1,%-\;move%.l %+,%0";
+      return "fmove%.s %f1,%0";
+    }
+  if (operands[1] == CONST0_RTX (SFmode)
+      /* clr insns on 68000 read before writing.  */
+      && ((TARGET_68010 || TARGET_COLDFIRE)
+	  || !(GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	{
+	  /* On the '040, 'subl an,an' takes 2 clocks while lea takes only 1 */
+	  if (TUNE_68040_60)
+	    return MOTOROLA ? "lea 0.w,%0" : "lea 0:w,%0";
+	  else
+	    return "sub%.l %0,%0";
+	}
+      /* moveq is faster on the 68000.  */
+      if (DATA_REG_P (operands[0]) && TUNE_68000_10)
+	return "moveq #0,%0";
+      return "clr%.l %0";
+    }
+  return "move%.l %1,%0";
+})
+
+(define_insn "movsf_cf_soft"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r<Q>,g,U")
+	(match_operand:SF 1 "general_operand" "g,r<Q>,U"))]
+  "TARGET_COLDFIRE && !TARGET_COLDFIRE_FPU"
+  "move%.l %1,%0"
+  [(set_attr "type" "move_l")])
+
+;; SFmode MEMs are restricted to modes 2-4 if TARGET_COLDFIRE_FPU.
+;; The move instructions can handle all combinations.
+(define_insn "movsf_cf_hard"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r<Q>U, f,    f,mr,f,r<Q>,f
+,m")
+        (match_operand:SF 1 "general_operand"      " f,     r<Q>U,f,rm,F,F,   m
+,f"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (which_alternative == 4 || which_alternative == 5) {
+    rtx xoperands[2];
+    REAL_VALUE_TYPE r;
+    long l;
+    REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+    REAL_VALUE_TO_TARGET_SINGLE (r, l);
+    xoperands[0] = operands[0];
+    xoperands[1] = GEN_INT (l);
+    if (which_alternative == 5) {
+      if (l == 0) {
+        if (ADDRESS_REG_P (xoperands[0]))
+          output_asm_insn ("sub%.l %0,%0", xoperands);
+        else
+          output_asm_insn ("clr%.l %0", xoperands);
+      } else
+        if (GET_CODE (operands[0]) == MEM
+            && symbolic_operand (XEXP (operands[0], 0), SImode))
+          output_asm_insn ("move%.l %1,%-;move%.l %+,%0", xoperands);
+        else
+          output_asm_insn ("move%.l %1,%0", xoperands);
+      return "";
+    }
+    if (l != 0)
+      output_asm_insn ("move%.l %1,%-;fsmove%.s %+,%0", xoperands);
+    else
+      output_asm_insn ("clr%.l %-;fsmove%.s %+,%0", xoperands);
+    return "";
+  }
+  if (FP_REG_P (operands[0]))
+    {
+      if (ADDRESS_REG_P (operands[1]))
+        return "move%.l %1,%-;fsmove%.s %+,%0";
+      if (FP_REG_P (operands[1]))
+        return "fsmove%.d %1,%0";
+      return "fsmove%.s %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+        return "fmove%.s %1,%-;move%.l %+,%0";
+      return "fmove%.s %f1,%0";
+    }
+  if (operands[1] == CONST0_RTX (SFmode))
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return "sub%.l %0,%0";
+      return "clr%.l %0";
+    }
+  return "move%.l %1,%0";
+})
+
+(define_expand "reload_indf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
+        (match_operand:DF 1 "general_operand" "mf"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "reload_outdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+        (match_operand:DF 1 "register_operand" "f"))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves. */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (TARGET_COLDFIRE_FPU)
+    if (emit_move_sequence (operands, DFmode, 0))
+      DONE;
+})
+
+(define_insn ""
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,rf,rf,&rof<>")
+	(match_operand:DF 1 "general_operand" "*rf,m,0,*rofE<>"))]
+;  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,&rf,&rof<>")
+;	(match_operand:DF 1 "general_operand" "rf,m,rofF<>"))]
+  "!TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "f%&move%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "f%&move%.d %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_double (operands);
+      return "f%&move%.d %f1,%0";
+    }
+  else if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+	{
+	  output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  return "move%.l %+,%0";
+	}
+      else
+        return "fmove%.d %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn_and_split "movdf_cf_soft"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,g")
+	(match_operand:DF 1 "general_operand" "g,r"))]
+  "TARGET_COLDFIRE && !TARGET_COLDFIRE_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  m68k_emit_move_double (operands);
+  DONE;
+})
+
+(define_insn "movdf_cf_hard"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,    <Q>U,r,f,r,r,m,f")
+        (match_operand:DF 1 "general_operand"      " f<Q>U,f,   f,r,r,m,r,E"))]
+  "TARGET_COLDFIRE_FPU"
+{
+  rtx xoperands[3];
+  REAL_VALUE_TYPE r;
+  long l[2];
+
+  switch (which_alternative)
+    {
+    default:
+      return "fdmove%.d %1,%0";
+    case 1:
+      return "fmove%.d %1,%0";
+    case 2:
+      return "fmove%.d %1,%-;move%.l %+,%0;move%.l %+,%R0";
+    case 3:
+      return "move%.l %R1,%-;move%.l %1,%-;fdmove%.d %+,%0";
+    case 4: case 5: case 6:
+      return output_move_double (operands);
+    case 7:
+      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+      xoperands[0] = operands[0];
+      xoperands[1] = GEN_INT (l[0]);
+      xoperands[2] = GEN_INT (l[1]);
+      if (operands[1] == CONST0_RTX (DFmode))
+	output_asm_insn ("clr%.l %-;clr%.l %-;fdmove%.d %+,%0",
+			xoperands);
+      else
+	if (l[1] == 0)
+	  output_asm_insn ("clr%.l %-;move%.l %1,%-;fdmove%.d %+,%0",
+			  xoperands);
+	else
+	  output_asm_insn ("move%.l %2,%-;move%.l %1,%-;fdmove%.d %+,%0",
+			  xoperands);
+      return "";
+    }
+})
+
+;; ??? The XFmode patterns are schizophrenic about whether constants are
+;; allowed.  Most but not all have predicates and constraint that disallow
+;; constants.  Most but not all have output templates that handle constants.
+;; See also LEGITIMATE_CONSTANT_P.
+
+(define_expand "movxf"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(match_operand:XF 1 "general_operand" ""))]
+  ""
+{
+  /* We can't rewrite operands during reload.  */
+  if (! reload_in_progress)
+    {
+      if (CONSTANT_P (operands[1]))
+	{
+	  operands[1] = force_const_mem (XFmode, operands[1]);
+	  if (! memory_address_p (XFmode, XEXP (operands[1], 0)))
+	    operands[1] = adjust_address (operands[1], XFmode, 0);
+	}
+      if (flag_pic && TARGET_PCREL)
+	{
+	  /* Don't allow writes to memory except via a register; the
+	     m68k doesn't consider PC-relative addresses to be writable.  */
+	  if (GET_CODE (operands[0]) == MEM
+	      && symbolic_operand (XEXP (operands[0], 0), SImode))
+	    operands[0] = gen_rtx_MEM (XFmode,
+				   force_reg (SImode, XEXP (operands[0], 0)));
+	}
+    }
+})
+
+(define_insn ""
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,!r,!f,!r,m,!r")
+	(match_operand:XF 1 "nonimmediate_operand" "m,f,f,f,r,!r,!r,m"))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "fmove%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "fmove%.x %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+        return "fmove%.x %1,%0";
+      return "fmove%.x %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+	{
+	  output_asm_insn ("fmove%.x %f1,%-\;move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  output_asm_insn ("move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  return "move%.l %+,%0";
+	}
+      /* Must be memory destination.  */
+      return "fmove%.x %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn ""
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=rm,rf,&rof<>")
+	(match_operand:XF 1 "nonimmediate_operand" "rf,m,rof<>"))]
+  "! TARGET_68881 && ! TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "fmove%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "fmove%.x %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+        return "fmove%.x %1,%0";
+      return "fmove%.x %f1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+        {
+          output_asm_insn ("fmove%.x %f1,%-\;move%.l %+,%0", operands);
+          operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+          output_asm_insn ("move%.l %+,%0", operands);
+          operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+          return "move%.l %+,%0";
+        }
+      else
+        return "fmove%.x %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn ""
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=r,g")
+	(match_operand:XF 1 "nonimmediate_operand" "g,r"))]
+  "! TARGET_68881 && TARGET_COLDFIRE"
+  "* return output_move_double (operands);")
+
+(define_expand "movdi"
+  ;; Let's see if it really still needs to handle fp regs, and, if so, why.
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "")
+
+;; movdi can apply to fp regs in some cases
+(define_insn ""
+  ;; Let's see if it really still needs to handle fp regs, and, if so, why.
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r,&ro<>")
+	(match_operand:DI 1 "general_operand" "rF,m,roi<>F"))]
+;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&r,&ro<>,!&rm,!&f")
+;	(match_operand:DI 1 "general_operand" "r,m,roi<>,fF"))]
+;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&rf,&ro<>,!&rm,!&f")
+;	(match_operand:DI 1 "general_operand" "r,m,roi<>,fF,rfF"))]
+  "!TARGET_COLDFIRE"
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "fmove%.x %1,%0";
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "fmove%.d %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_double (operands);
+      return "fmove%.d %f1,%0";
+    }
+  else if (FP_REG_P (operands[1]))
+    {
+      if (REG_P (operands[0]))
+	{
+	  output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+	  operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+	  return "move%.l %+,%0";
+	}
+      else
+        return "fmove%.d %f1,%0";
+    }
+  return output_move_double (operands);
+})
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,g")
+	(match_operand:DI 1 "general_operand" "g,r"))]
+  "TARGET_COLDFIRE"
+  "* return output_move_double (operands);")
+
+;; Thus goes after the move instructions
+;; because the move instructions are better (require no spilling)
+;; when they can apply.  It goes before the add/sub insns
+;; so we will prefer it to them.
+
+(define_insn "pushasi"
+  [(set (match_operand:SI 0 "push_operand" "=m")
+	(match_operand:SI 1 "address_operand" "p"))]
+  ""
+  "pea %a1"
+  [(set_attr "type" "pea")])
+
+;; truncation instructions
+(define_insn "truncsiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm,d")
+	(truncate:QI
+	 (match_operand:SI 1 "general_src_operand" "doJS,i")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == REG)
+    {
+      /* Must clear condition codes, since the move.l bases them on
+	 the entire 32 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.l %1,%0";
+    }
+  if (GET_CODE (operands[1]) == MEM)
+    operands[1] = adjust_address (operands[1], QImode, 3);
+  return "move%.b %1,%0";
+})
+
+(define_insn "trunchiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm,d")
+	(truncate:QI
+	 (match_operand:HI 1 "general_src_operand" "doJS,i")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == REG
+      && (GET_CODE (operands[1]) == MEM
+	  || GET_CODE (operands[1]) == CONST_INT))
+    {
+      /* Must clear condition codes, since the move.w bases them on
+	 the entire 16 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.w %1,%0";
+    }
+  if (GET_CODE (operands[0]) == REG)
+    {
+      /* Must clear condition codes, since the move.l bases them on
+	 the entire 32 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.l %1,%0";
+    }
+  if (GET_CODE (operands[1]) == MEM)
+    operands[1] = adjust_address (operands[1], QImode, 1);
+  return "move%.b %1,%0";
+})
+
+(define_insn "truncsihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm,d")
+	(truncate:HI
+	 (match_operand:SI 1 "general_src_operand" "roJS,i")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == REG)
+    {
+      /* Must clear condition codes, since the move.l bases them on
+	 the entire 32 bits, not just the desired 8 bits.  */
+      CC_STATUS_INIT;
+      return "move%.l %1,%0";
+    }
+  if (GET_CODE (operands[1]) == MEM)
+    operands[1] = adjust_address (operands[1], QImode, 2);
+  return "move%.w %1,%0";
+})
+
+;; zero extension instructions
+
+;; two special patterns to match various post_inc/pre_dec patterns
+(define_insn_and_split "*zero_extend_inc"
+  [(set (match_operand 0 "post_inc_operand" "")
+	(zero_extend (match_operand 1 "register_operand" "")))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT &&
+   GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT &&
+   GET_MODE_SIZE (GET_MODE (operands[0])) == GET_MODE_SIZE (GET_MODE (operands[1])) * 2"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (match_dup 0)
+	(match_dup 1))]
+{
+  operands[0] = adjust_address (operands[0], GET_MODE (operands[1]), 0);
+})
+
+(define_insn_and_split "*zero_extend_dec"
+  [(set (match_operand 0 "pre_dec_operand" "")
+	(zero_extend (match_operand 1 "register_operand" "")))]
+  "(GET_MODE (operands[0]) != HImode || XEXP (XEXP (operands[0], 0), 0) != stack_pointer_rtx) &&
+   GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT &&
+   GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT &&
+   GET_MODE_SIZE (GET_MODE (operands[0])) == GET_MODE_SIZE (GET_MODE (operands[1])) * 2"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))
+   (set (match_dup 0)
+	(const_int 0))]
+{
+  operands[0] = adjust_address (operands[0], GET_MODE (operands[1]), 0);
+})
+
+(define_insn_and_split "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "nonimmediate_src_operand" "")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 3)
+	(const_int 0))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[0]);
+})
+
+(define_insn_and_split "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "nonimmediate_src_operand" "")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 3)
+	(const_int 0))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[0]);
+})
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_src_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn_and_split "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_src_operand" "")))]
+  "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
+  "#"
+  ""
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 3)
+	(const_int 0))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = gen_highpart (SImode, operands[0]);
+})
+
+(define_insn "*zero_extendhisi2_cf"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_src_operand" "rmS")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvz%.w %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_src_operand" "rmS")))]
+  ""
+  "#")
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_src_operand" "")))]
+  "!TARGET_COLDFIRE"
+  "")
+
+(define_insn "*zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_src_operand" "dmS")))]
+  "!TARGET_COLDFIRE"
+  "#")
+
+(define_insn "*zero_extendqisi2_cfv4"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_src_operand" "dmS")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvz%.b %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_src_operand" "dmS")))]
+  ""
+  "#")
+
+;; these two pattern split everything else which isn't matched by
+;; something else above
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(zero_extend (match_operand 1 "nonimmediate_src_operand" "")))]
+  "!ISA_HAS_MVS_MVZ
+   && reload_completed
+   && reg_mentioned_p (operands[0], operands[1])"
+  [(set (strict_low_part (match_dup 2))
+	(match_dup 1))
+   (set (match_dup 0)
+	(match_op_dup 4 [(match_dup 0) (match_dup 3)]))]
+{
+  operands[2] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
+  operands[3] = GEN_INT (GET_MODE_MASK (GET_MODE (operands[1])));
+  operands[4] = gen_rtx_AND (GET_MODE (operands[0]), operands[0], operands[3]);
+})
+
+(define_split
+  [(set (match_operand 0 "register_operand" "")
+	(zero_extend (match_operand 1 "nonimmediate_src_operand" "")))]
+  "!ISA_HAS_MVS_MVZ && reload_completed"
+  [(set (match_dup 0)
+	(const_int 0))
+   (set (strict_low_part (match_dup 2))
+	(match_dup 1))]
+{
+  operands[2] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
+})
+
+;; sign extension instructions
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+        (sign_extend:DI (match_operand:QI 1 "general_src_operand" "rmS")))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (ISA_HAS_MVS_MVZ)
+    return "mvs%.b %1,%2\;smi %0\;extb%.l %0";
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    {
+      if (ADDRESS_REG_P (operands[1]))
+	return "move%.w %1,%2\;extb%.l %2\;smi %0\;extb%.l %0";
+      else
+	return "move%.b %1,%2\;extb%.l %2\;smi %0\;extb%.l %0";
+    }
+  else
+    {
+      if (ADDRESS_REG_P (operands[1]))
+	return "move%.w %1,%2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0\;smi %0";
+      else
+	return "move%.b %1,%2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0\;smi %0";
+    }
+})
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+	(sign_extend:DI
+	 (match_operand:HI 1 "general_src_operand" "rmS")))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (ISA_HAS_MVS_MVZ)
+    return "mvs%.w %1,%2\;smi %0\;extb%.l %0";
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "move%.w %1,%2\;ext%.l %2\;smi %0\;extb%.l %0";
+  else
+    return "move%.w %1,%2\;ext%.l %2\;smi %0\;ext%.w %0\;ext%.l %0";
+})
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,o,o,<")
+	(sign_extend:DI
+	 (match_operand:SI 1 "nonimmediate_src_operand" "rm,rm,r<Q>,rm")))
+   (clobber (match_scratch:SI 2 "=X,d,d,d"))]
+  ""
+{
+  CC_STATUS_INIT;
+
+  if (which_alternative == 0)
+    /* Handle alternative 0.  */
+    {
+      if (TARGET_68020 || TARGET_COLDFIRE)
+        return "move%.l %1,%R0\;smi %0\;extb%.l %0";
+      else
+        return "move%.l %1,%R0\;smi %0\;ext%.w %0\;ext%.l %0";
+    }
+
+  /* Handle alternatives 1, 2 and 3.  We don't need to adjust address by 4
+     in alternative 3 because autodecrement will do that for us.  */
+  operands[3] = adjust_address (operands[0], SImode,
+				which_alternative == 3 ? 0 : 4);
+  operands[0] = adjust_address (operands[0], SImode, 0);
+
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "move%.l %1,%3\;smi %2\;extb%.l %2\;move%.l %2,%0";
+  else
+    return "move%.l %1,%3\;smi %2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0";
+}
+  [(set_attr "ok_for_coldfire" "yes,no,yes,yes")])
+
+;; Special case when one can avoid register clobbering, copy and test
+;; Maybe there is a way to make that the general case, by forcing the
+;; result of the SI tree to be in the lower register of the DI target
+
+(define_insn "extendplussidi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+    (sign_extend:DI (plus:SI (match_operand:SI 1 "general_operand" "%rmn")
+            (match_operand:SI 2 "general_operand" "rmn"))))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (GET_CODE (operands[1]) == CONST_INT
+  && (unsigned) INTVAL (operands[1]) > 8)
+    {
+      rtx tmp = operands[1];
+
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+  if (GET_CODE (operands[1]) == REG
+      && REGNO (operands[1]) == REGNO (operands[3]))
+    output_asm_insn ("add%.l %2,%3", operands);
+  else
+    output_asm_insn ("move%.l %2,%3\;add%.l %1,%3", operands);
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "smi %0\;extb%.l %0";
+  else
+    return "smi %0\;ext%.w %0\;ext%.l %0";
+})
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(sign_extend:SI
+	 (match_operand:HI 1 "nonimmediate_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "*cfv4_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extend:SI
+	 (match_operand:HI 1 "nonimmediate_src_operand" "rmS")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvs%.w %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "*68k_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,a")
+	(sign_extend:SI
+	 (match_operand:HI 1 "nonimmediate_src_operand" "0,rmS")))]
+  "!ISA_HAS_MVS_MVZ"
+  "@
+   ext%.l %0
+   move%.w %1,%0"
+  [(set_attr "type" "ext,move")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0")))]
+  ""
+  "ext%.w %0"
+  [(set_attr "type" "ext")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  "TARGET_68020 || TARGET_COLDFIRE"
+  "")
+
+(define_insn "*cfv4_extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "rms")))]
+  "ISA_HAS_MVS_MVZ"
+  "mvs%.b %1,%0"
+  [(set_attr "type" "mvsz")])
+
+(define_insn "*68k_extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0")))]
+  "TARGET_68020 || (TARGET_COLDFIRE && !ISA_HAS_MVS_MVZ)"
+  "extb%.l %0"
+  [(set_attr "type" "ext")])
+
+;; Conversions between float and double.
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(float_extend:DF
+	 (match_operand:SF 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=*fdm,f")
+	(float_extend:DF
+	  (match_operand:SF 1 "general_operand" "f,dmF")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "f%&move%.x %1,%0";
+    }
+  if (FP_REG_P (operands[0]))
+    return "f%&move%.s %f1,%0";
+  if (DATA_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+      operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      return "move%.l %+,%0";
+    }
+  return "fmove%.d %f1,%0";
+})
+
+(define_insn "extendsfdf2_cf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f")
+	(float_extend:DF
+	 (match_operand:SF 1 "general_operand" "f,<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "fdmove%.d %1,%0";
+    }
+  return "fdmove%.s %f1,%0";
+})
+
+;; This cannot output into an f-reg because there is no way to be
+;; sure of truncating in that case.
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+;; On the '040 we can truncate in a register accurately and easily.
+(define_insn ""
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "fmG")))]
+  "TARGET_68881 && TARGET_68040"
+{
+  if (FP_REG_P (operands[1]))
+    return "f%$move%.x %1,%0";
+  return "f%$move%.d %f1,%0";
+})
+
+(define_insn "truncdfsf2_cf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,d<Q>U")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "<Q>U,f")))]
+  "TARGET_COLDFIRE_FPU"
+  "@
+  fsmove%.d %1,%0
+  fmove%.s %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "*truncdfsf2_68881"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=dm")
+	(float_truncate:SF
+	  (match_operand:DF 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.s %f1,%0"
+  [(set_attr "type" "fmove")])
+
+;; Conversion between fixed point and floating point.
+;; Note that among the fix-to-float insns
+;; the ones that start with SImode come first.
+;; That is so that an operand that is a CONST_INT
+;; (and therefore lacks a specific machine mode).
+;; will be recognized as SImode (which is always valid)
+;; rather than as QImode or HImode.
+
+(define_expand "floatsi<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(float:FP (match_operand:SI 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "floatsi<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:SI 1 "general_operand" "dmi")))]
+  "TARGET_68881"
+  "f<FP:round>move%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "floatsi<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:SI 1 "general_operand" "d<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+  "f<FP:prec>move%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+(define_expand "floathi<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(float:FP (match_operand:HI 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "floathi<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:HI 1 "general_operand" "dmn")))]
+  "TARGET_68881"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "floathi<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+        (float:FP (match_operand:HI 1 "general_operand" "d<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+(define_expand "floatqi<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(float:FP (match_operand:QI 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "floatqi<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:QI 1 "general_operand" "dmn")))]
+  "TARGET_68881"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "floatqi<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(float:FP (match_operand:QI 1 "general_operand" "d<Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+;; New routines to convert floating-point values to integers
+;; to be used on the '040.  These should be faster than trapping
+;; into the kernel to emulate fintrz.  They should also be faster
+;; than calling the subroutines fixsfsi or fixdfsi.
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))
+   (clobber (match_scratch:SI 2 "=d"))
+   (clobber (match_scratch:SI 3 "=d"))]
+  "TARGET_68881 && TUNE_68040"
+{
+  CC_STATUS_INIT;
+  return "fmovem%.l %!,%2\;moveq #16,%3\;or%.l %2,%3\;and%.w #-33,%3\;fmovem%.l %3,%!\;fmove%.l %1,%0\;fmovem%.l %2,%!";
+})
+
+(define_insn "fix_truncdfhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(fix:HI (fix:DF (match_operand:DF 1 "register_operand" "f"))))
+   (clobber (match_scratch:SI 2 "=d"))
+   (clobber (match_scratch:SI 3 "=d"))]
+  "TARGET_68881 && TUNE_68040"
+{
+  CC_STATUS_INIT;
+  return "fmovem%.l %!,%2\;moveq #16,%3\;or%.l %2,%3\;and%.w #-33,%3\;fmovem%.l %3,%!\;fmove%.w %1,%0\;fmovem%.l %2,%!";
+})
+
+(define_insn "fix_truncdfqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(fix:QI (fix:DF (match_operand:DF 1 "register_operand" "f"))))
+   (clobber (match_scratch:SI 2 "=d"))
+   (clobber (match_scratch:SI 3 "=d"))]
+  "TARGET_68881 && TUNE_68040"
+{
+  CC_STATUS_INIT;
+  return "fmovem%.l %!,%2\;moveq #16,%3\;or%.l %2,%3\;and%.w #-33,%3\;fmovem%.l %3,%!\;fmove%.b %1,%0\;fmovem%.l %2,%!";
+})
+
+;; Convert a float to a float whose value is an integer.
+;; This is the first stage of converting it to an integer type.
+
+(define_expand "ftrunc<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(fix:FP (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT && !TUNE_68040"
+  "")
+
+(define_insn "ftrunc<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(fix:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m")))]
+  "TARGET_68881 && !TUNE_68040"
+{
+  if (FP_REG_P (operands[1]))
+    return "fintrz%.x %f1,%0";
+  return "fintrz%.<FP:prec> %f1,%0";
+}
+  [(set_attr "type" "falu")])
+
+(define_insn "ftrunc<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+        (fix:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[1]))
+    return "fintrz%.d %f1,%0";
+  return "fintrz%.<FP:prec> %f1,%0";
+}
+  [(set_attr "type" "falu")])
+
+;; Convert a float whose value is an integer
+;; to an actual integer.  Second stage of converting float to integer type.
+(define_expand "fix<mode>qi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(fix:QI (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "fix<mode>qi2_68881"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(fix:QI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "fix<mode>qi2_cf"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d<Q>U")
+	(fix:QI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.b %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "fix<mode>hi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(fix:HI (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "fix<mode>hi2_68881"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(fix:HI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "fix<mode>hi2_cf"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d<Q>U")
+	(fix:HI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.w %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_expand "fix<mode>si2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(fix:SI (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "fix<mode>si2_68881"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(fix:SI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+(define_insn "fix<mode>si2_cf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d<Q>U")
+	(fix:SI (match_operand:FP 1 "general_operand" "f")))]
+  "TARGET_COLDFIRE_FPU"
+  "fmove%.l %1,%0"
+  [(set_attr "type" "fmove")])
+
+
+;; add instructions
+
+(define_insn "adddi_lshrdi_63"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+    (plus:DI (lshiftrt:DI (match_operand:DI 1 "general_operand" "rm")
+            (const_int 63))
+        (match_dup 1)))
+   (clobber (match_scratch:SI 2 "=d"))]
+  ""
+{
+  operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  if (REG_P (operands[1]) && REGNO (operands[1]) == REGNO (operands[0]))
+    return
+    "move%.l %1,%2\;add%.l %2,%2\;subx%.l %2,%2\;sub%.l %2,%3\;subx%.l %2,%0";
+  if (GET_CODE (operands[1]) == REG)
+    operands[4] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else if (GET_CODE (XEXP (operands[1], 0)) == POST_INC
+        || GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)
+    operands[4] = operands[1];
+  else
+    operands[4] = adjust_address (operands[1], SImode, 4);
+  if (GET_CODE (operands[1]) == MEM
+   && GET_CODE (XEXP (operands[1], 0)) == PRE_DEC)
+    output_asm_insn ("move%.l %4,%3", operands);
+  output_asm_insn ("move%.l %1,%0\;smi %2", operands);
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    output_asm_insn ("extb%.l %2", operands);
+  else
+    output_asm_insn ("ext%.w %2\;ext%.l %2", operands);
+  if (GET_CODE (operands[1]) != MEM
+   || GET_CODE (XEXP (operands[1], 0)) != PRE_DEC)
+    output_asm_insn ("move%.l %4,%3", operands);
+  return "sub%.l %2,%3\;subx%.l %2,%0";
+})
+
+(define_insn "adddi_sexthishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,a,*d,*d")
+    (plus:DI (ashift:DI (sign_extend:DI
+          (match_operand:HI 1 "general_operand" "rm,rm,rm,rm"))
+            (const_int 32))
+        (match_operand:DI 2 "general_operand" "0,0,0,0")))
+   (clobber (match_scratch:SI 3 "=&d,X,a,?d"))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (ADDRESS_REG_P (operands[0]))
+    return "add%.w %1,%0";
+  else if (ADDRESS_REG_P (operands[3]))
+    return "move%.w %1,%3\;add%.l %3,%0";
+  else
+    return "move%.w %1,%3\;ext%.l %3\;add%.l %3,%0";
+})
+
+(define_insn "*adddi_dilshr32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,o")
+	(plus:DI (lshiftrt:DI (match_operand:DI 1 "general_operand" "ro,d")
+			      (const_int 32))
+		 (match_operand:DI 2 "general_operand" "0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[2] = adjust_address (operands[0], SImode, 4);
+  return "add%.l %1,%2\;negx%.l %0\;neg%.l %0";
+})
+
+(define_insn "*adddi_dilshr32_cf"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "ro")
+			      (const_int 32))
+		 (match_operand:DI 2 "register_operand" "0")))]
+  "TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  return "add%.l %1,%R0\;negx%.l %0\;neg%.l %0";
+})
+
+(define_insn "adddi_dishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+;;    (plus:DI (match_operand:DI 2 "general_operand" "%0")
+;;	(ashift:DI (match_operand:DI 1 "general_operand" "ro")
+;;            (const_int 32))))]
+    (plus:DI (ashift:DI (match_operand:DI 1 "general_operand" "ro,d")
+            (const_int 32))
+        (match_operand:DI 2 "general_operand" "0,0")))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else
+    operands[1] = adjust_address (operands[1], SImode, 4);
+  return "add%.l %1,%0";
+}
+  [(set_attr "type" "alu_l")])
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o<>,d,d,d")
+	(plus:DI (match_operand:DI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "d,no>,d,a")))
+   (clobber (match_scratch:SI 3 "=&d,&d,X,&d"))]
+  ""
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      if (DATA_REG_P (operands[2]))
+	return "add%.l %R2,%R0\;addx%.l %2,%0";
+      else if (GET_CODE (operands[2]) == MEM
+	  && GET_CODE (XEXP (operands[2], 0)) == POST_INC)
+	return "move%.l %2,%3\;add%.l %2,%R0\;addx%.l %3,%0";
+      else
+	{
+	  rtx high, low;
+	  rtx xoperands[2];
+
+	  if (GET_CODE (operands[2]) == REG)
+	    {
+	      low = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+	      high = operands[2];
+	    }
+	  else if (CONSTANT_P (operands[2]))
+	    split_double (operands[2], &high, &low);
+	  else
+	    {
+	      low = adjust_address (operands[2], SImode, 4);
+	      high = operands[2];
+	    }
+
+	  operands[1] = low, operands[2] = high;
+	  xoperands[0] = operands[3];
+	  if (GET_CODE (operands[1]) == CONST_INT
+	      && INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+	    xoperands[1] = GEN_INT (-INTVAL (operands[2]) - 1);
+	  else
+	    xoperands[1] = operands[2];
+
+	  output_asm_insn (output_move_simode (xoperands), xoperands);
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) <= 8)
+		return "addq%.l %1,%R0\;addx%.l %3,%0";
+	      else if (INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+		{
+		  operands[1] = GEN_INT (-INTVAL (operands[1]));
+		  return "subq%.l %1,%R0\;subx%.l %3,%0";
+		}
+	    }
+	  return "add%.l %1,%R0\;addx%.l %3,%0";
+	}
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[0]) == MEM);
+      CC_STATUS_INIT;
+      if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+	{
+	  operands[1] = gen_rtx_MEM (SImode,
+				     plus_constant (XEXP(operands[0], 0), -8));
+	  return "move%.l %0,%3\;add%.l %R2,%0\;addx%.l %2,%3\;move%.l %3,%1";
+	}
+      else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+	{
+	  operands[1] = XEXP(operands[0], 0);
+	  return "add%.l %R2,%0\;move%.l %0,%3\;addx%.l %2,%3\;move%.l %3,%1";
+	}
+      else
+	{
+	  operands[1] = adjust_address (operands[0], SImode, 4);
+	  return "add%.l %R2,%1\;move%.l %0,%3\;addx%.l %2,%3\;move%.l %3,%0";
+	}
+    }
+})
+
+(define_insn "addsi_lshrsi_31"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm,dm,d<Q>")
+    (plus:SI (lshiftrt:SI (match_operand:SI 1 "general_operand" "rm,r<Q>,rm")
+            (const_int 31))
+        (match_dup 1)))]
+  ""
+{
+  operands[2] = operands[0];
+  operands[3] = gen_label_rtx();
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+        operands[0] = gen_rtx_MEM (SImode, XEXP (XEXP (operands[0], 0), 0));
+      else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+        operands[2] = gen_rtx_MEM (SImode, XEXP (XEXP (operands[0], 0), 0));
+    }
+  output_asm_insn ("move%.l %1,%0", operands);
+  output_asm_insn ("jpl %l3", operands);
+  output_asm_insn ("addq%.l #1,%2", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				CODE_LABEL_NUMBER (operands[3]));
+  return "";
+}
+  [(set_attr "ok_for_coldfire" "no,yes,yes")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(plus:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_src_operand" "")))]
+  ""
+  "")
+
+;; Note that the middle two alternatives are near-duplicates
+;; in order to handle insns generated by reload.
+;; This is needed since they are not themselves reloaded,
+;; so commutativity won't apply to them.
+(define_insn "*addsi3_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?a,?a,d,a")
+        (plus:SI (match_operand:SI 1 "general_operand" "%0,a,rJK,0,0")
+                 (match_operand:SI 2 "general_src_operand" "dIKLT,rJK,a,mSrIKLT,mSrIKLs")))]
+
+
+  "! TARGET_COLDFIRE"
+  "* return output_addsi3 (operands);")
+
+(define_insn_and_split "*addsi3_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=mr,mr,a,  m,r,  ?a, ?a,?a,?a")
+	(plus:SI (match_operand:SI 1 "general_operand"     "%0, 0, 0,  0,0,   a,  a, r, a")
+		 (match_operand:SI 2 "general_src_operand" " I, L, JCu,d,mrKi,Cj, r, a, JCu")))]
+  "TARGET_COLDFIRE"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "addq%.l %2,%0";
+
+    case 1:
+      operands[2] = GEN_INT (- INTVAL (operands[2]));
+      return "subq%.l %2,%0";
+
+    case 3:
+    case 4:
+      return "add%.l %2,%0";
+
+    case 5:
+      /* move%.l %2,%0\n\tadd%.l %1,%0 */
+      return "#";
+
+    case 6:
+      return MOTOROLA ? "lea (%1,%2.l),%0" : "lea %1@(0,%2:l),%0";
+
+    case 7:
+      return MOTOROLA ? "lea (%2,%1.l),%0" : "lea %2@(0,%1:l),%0";
+
+    case 2:
+    case 8:
+      return MOTOROLA ? "lea (%c2,%1),%0" : "lea %1@(%c2),%0";
+
+    default:
+      gcc_unreachable ();
+      return "";
+    }
+}
+  "&& reload_completed && (extract_constrain_insn_cached (insn), which_alternative == 5) && !operands_match_p (operands[0], operands[1])"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 1)))]
+  ""
+  [(set_attr "type"     "aluq_l,aluq_l,lea, alu_l,alu_l,*,lea, lea, lea")
+   (set_attr "opy"      "2,     2,     *,   2,    2,    *,*,   *,   *")
+   (set_attr "opy_type" "*,     *,     mem5,*,    *,    *,mem6,mem6,mem5")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
+	(plus:SI (match_operand:SI 1 "general_operand" "0")
+		 (sign_extend:SI
+		  (match_operand:HI 2 "nonimmediate_src_operand" "rmS"))))]
+  "!TARGET_COLDFIRE"
+  "add%.w %2,%0")
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,r")
+	(plus:HI (match_operand:HI 1 "general_operand" "%0,0")
+		 (match_operand:HI 2 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      /* If the constant would be a negative number when interpreted as
+	 HImode, make it negative.  This is usually, but not always, done
+	 elsewhere in the compiler.  First check for constants out of range,
+	 which could confuse us.  */
+
+      if (INTVAL (operands[2]) >= 32768)
+	operands[2] = GEN_INT (INTVAL (operands[2]) - 65536);
+
+      if (INTVAL (operands[2]) > 0
+	  && INTVAL (operands[2]) <= 8)
+	return "addq%.w %2,%0";
+      if (INTVAL (operands[2]) < 0
+	  && INTVAL (operands[2]) >= -8)
+	{
+	  operands[2] = GEN_INT (- INTVAL (operands[2]));
+	  return "subq%.w %2,%0";
+	}
+      /* On the CPU32 it is faster to use two addqw instructions to
+	 add a small integer (8 < N <= 16) to a register.  
+	 Likewise for subqw.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[2]) > 8
+	      && INTVAL (operands[2]) <= 16)
+	    {
+	      operands[2] = GEN_INT (INTVAL (operands[2]) - 8);
+	      return "addq%.w #8,%0\;addq%.w %2,%0";
+	    }
+	  if (INTVAL (operands[2]) < -8
+	      && INTVAL (operands[2]) >= -16)
+	    {
+	      operands[2] = GEN_INT (- INTVAL (operands[2]) - 8);
+	      return "subq%.w #8,%0\;subq%.w %2,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0]) && !TUNE_68040)
+	return MOTOROLA ? "lea (%c2,%0),%0" : "lea %0@(%c2),%0";
+    }
+  return "add%.w %2,%0";
+})
+
+;; These insns must use MATCH_DUP instead of the more expected
+;; use of a matching constraint because the "output" here is also
+;; an input, so you can't use the matching constraint.  That also means
+;; that you can't use the "%", so you need patterns with the matched
+;; operand in both positions.
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      /* If the constant would be a negative number when interpreted as
+	 HImode, make it negative.  This is usually, but not always, done
+	 elsewhere in the compiler.  First check for constants out of range,
+	 which could confuse us.  */
+
+      if (INTVAL (operands[1]) >= 32768)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 65536);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.w %1,%0";
+      if (INTVAL (operands[1]) < 0
+	  && INTVAL (operands[1]) >= -8)
+	{
+	  operands[1] = GEN_INT (- INTVAL (operands[1]));
+	  return "subq%.w %1,%0";
+	}
+      /* On the CPU32 it is faster to use two addqw instructions to
+	 add a small integer (8 < N <= 16) to a register. 
+	 Likewise for subqw.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[1]) > 8
+	      && INTVAL (operands[1]) <= 16)
+	    {
+	      operands[1] = GEN_INT (INTVAL (operands[1]) - 8);
+	      return "addq%.w #8,%0\;addq%.w %1,%0";
+	    }
+	  if (INTVAL (operands[1]) < -8
+	      && INTVAL (operands[1]) >= -16)
+	    {
+	      operands[1] = GEN_INT (- INTVAL (operands[1]) - 8);
+	      return "subq%.w #8,%0\;subq%.w %1,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0]) && !TUNE_68040)
+	return MOTOROLA ? "lea (%c1,%0),%0" : "lea %0@(%c1),%0";
+    }
+  return "add%.w %1,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(plus:HI (match_operand:HI 1 "general_src_operand" "dn,rmSn")
+		 (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      /* If the constant would be a negative number when interpreted as
+	 HImode, make it negative.  This is usually, but not always, done
+	 elsewhere in the compiler.  First check for constants out of range,
+	 which could confuse us.  */
+
+      if (INTVAL (operands[1]) >= 32768)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 65536);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.w %1,%0";
+      if (INTVAL (operands[1]) < 0
+	  && INTVAL (operands[1]) >= -8)
+	{
+	  operands[1] = GEN_INT (- INTVAL (operands[1]));
+	  return "subq%.w %1,%0";
+	}
+      /* On the CPU32 it is faster to use two addqw instructions to
+	 add a small integer (8 < N <= 16) to a register.
+	 Likewise for subqw.  */
+      if (TUNE_CPU32 && REG_P (operands[0]))
+	{
+	  if (INTVAL (operands[1]) > 8
+	      && INTVAL (operands[1]) <= 16)
+	    {
+	      operands[1] = GEN_INT (INTVAL (operands[1]) - 8);
+	      return "addq%.w #8,%0\;addq%.w %1,%0";
+	    }
+	  if (INTVAL (operands[1]) < -8
+	      && INTVAL (operands[1]) >= -16)
+	    {
+	      operands[1] = GEN_INT (- INTVAL (operands[1]) - 8);
+	      return "subq%.w #8,%0\;subq%.w %1,%0";
+	    }
+	}
+      if (ADDRESS_REG_P (operands[0]) && !TUNE_68040)
+	return MOTOROLA ? "lea (%c1,%0),%0" : "lea %0@(%c1),%0";
+    }
+  return "add%.w %1,%0";
+})
+
+(define_insn "addqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(plus:QI (match_operand:QI 1 "general_operand" "%0,0")
+		 (match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 128)
+	operands[2] = GEN_INT (INTVAL (operands[2]) - 256);
+
+      if (INTVAL (operands[2]) > 0
+	  && INTVAL (operands[2]) <= 8)
+	return "addq%.b %2,%0";
+      if (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) >= -8)
+       {
+	 operands[2] = GEN_INT (- INTVAL (operands[2]));
+	 return "subq%.b %2,%0";
+       }
+    }
+  return "add%.b %2,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(plus:QI (match_dup 0)
+		 (match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 128)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 256);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.b %1,%0";
+      if (INTVAL (operands[1]) < 0 && INTVAL (operands[1]) >= -8)
+       {
+	 operands[1] = GEN_INT (- INTVAL (operands[1]));
+	 return "subq%.b %1,%0";
+       }
+    }
+  return "add%.b %1,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(plus:QI (match_operand:QI 1 "general_src_operand" "dn,dmSn")
+		 (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 128)
+	operands[1] = GEN_INT (INTVAL (operands[1]) - 256);
+
+      if (INTVAL (operands[1]) > 0
+	  && INTVAL (operands[1]) <= 8)
+	return "addq%.b %1,%0";
+      if (INTVAL (operands[1]) < 0 && INTVAL (operands[1]) >= -8)
+       {
+	 operands[1] = GEN_INT (- INTVAL (operands[1]));
+	 return "subq%.b %1,%0";
+       }
+    }
+  return "add%.b %1,%0";
+})
+
+(define_expand "add<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(plus:FP (match_operand:FP 1 "general_operand" "")
+		 (match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "add<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (float:FP (match_operand:SI 2 "general_operand" "dmi"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+  "f<FP:round>add%.l %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (float:FP (match_operand:HI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+  "f<FP:round>add%.w %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (float:FP (match_operand:QI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+  "f<FP:round>add%.b %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (match_operand:FP 1 "general_operand" "%0")
+		 (match_operand:FP 2 "general_operand" "f<FP:dreg>m<FP:const>")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:round>add%.x %2,%0";
+  return "f<FP:round>add%.<FP:prec> %f2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "add<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(plus:FP (match_operand:FP 1 "general_operand" "%0")
+		 (match_operand:FP 2 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>add%.d %2,%0";
+  return "f<FP:prec>add%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+;; subtract instructions
+
+(define_insn "subdi_sexthishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,a,*d,*d")
+    (minus:DI (match_operand:DI 1 "general_operand" "0,0,0,0")
+        (ashift:DI (sign_extend:DI (match_operand:HI 2 "general_operand" "rm,rm,rm,rm"))
+            (const_int 32))))
+   (clobber (match_scratch:SI 3 "=&d,X,a,?d"))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (ADDRESS_REG_P (operands[0]))
+    return "sub%.w %2,%0";
+  else if (ADDRESS_REG_P (operands[3]))
+    return "move%.w %2,%3\;sub%.l %3,%0";
+  else
+    return "move%.w %2,%3\;ext%.l %3\;sub%.l %3,%0";
+})
+
+(define_insn "subdi_dishl32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+ro")
+    (minus:DI (match_dup 0)
+        (ashift:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32))))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else
+    operands[1] = adjust_address (operands[1], SImode, 4);
+  return "sub%.l %1,%0";
+}
+  [(set_attr "type" "alu_l")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o<>,d,d,d")
+	(minus:DI (match_operand:DI 1 "general_operand" "0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "d,no>,d,a")))
+   (clobber (match_scratch:SI 3 "=&d,&d,X,&d"))]
+  ""
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      if (DATA_REG_P (operands[2]))
+	return "sub%.l %R2,%R0\;subx%.l %2,%0";
+      else if (GET_CODE (operands[2]) == MEM
+	  && GET_CODE (XEXP (operands[2], 0)) == POST_INC)
+	{
+	  return "move%.l %2,%3\;sub%.l %2,%R0\;subx%.l %3,%0";
+	}
+      else
+	{
+	  rtx high, low;
+	  rtx xoperands[2];
+
+	  if (GET_CODE (operands[2]) == REG)
+	    {
+	      low = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+	      high = operands[2];
+	    }
+	  else if (CONSTANT_P (operands[2]))
+	    split_double (operands[2], &high, &low);
+	  else
+	    {
+	      low = adjust_address (operands[2], SImode, 4);
+	      high = operands[2];
+	    }
+
+	  operands[1] = low, operands[2] = high;
+	  xoperands[0] = operands[3];
+	  if (GET_CODE (operands[1]) == CONST_INT
+	      && INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+	    xoperands[1] = GEN_INT (-INTVAL (operands[2]) - 1);
+	  else
+	    xoperands[1] = operands[2];
+
+	  output_asm_insn (output_move_simode (xoperands), xoperands);
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    {
+	      if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) <= 8)
+		return "subq%.l %1,%R0\;subx%.l %3,%0";
+	      else if (INTVAL (operands[1]) >= -8 && INTVAL (operands[1]) < 0)
+		{
+		  operands[1] = GEN_INT (-INTVAL (operands[1]));
+		  return "addq%.l %1,%R0\;addx%.l %3,%0";
+		}
+	    }
+	  return "sub%.l %1,%R0\;subx%.l %3,%0";
+	}
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[0]) == MEM);
+      CC_STATUS_INIT;
+      if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+	{
+	  operands[1]
+	    = gen_rtx_MEM (SImode, plus_constant (XEXP (operands[0], 0), -8));
+	  return "move%.l %0,%3\;sub%.l %R2,%0\;subx%.l %2,%3\;move%.l %3,%1";
+	}
+      else if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+	{
+	  operands[1] = XEXP(operands[0], 0);
+	  return "sub%.l %R2,%0\;move%.l %0,%3\;subx%.l %2,%3\;move%.l %3,%1";
+	}
+      else
+	{
+	  operands[1] = adjust_address (operands[0], SImode, 4);
+	  return "sub%.l %R2,%1\;move%.l %0,%3\;subx%.l %2,%3\;move%.l %3,%0";
+	}
+    }
+})
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=mda,m,d,a")
+	(minus:SI (match_operand:SI 1 "general_operand" "0,0,0,0")
+		  (match_operand:SI 2 "general_src_operand" "I,dT,mSrT,mSrs")))]
+  ""
+  "@
+   subq%.l %2, %0
+   sub%.l %2,%0
+   sub%.l %2,%0
+   sub%.l %2,%0"
+  [(set_attr "type" "aluq_l,alu_l,alu_l,alu_l")
+   (set_attr "opy" "2")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
+	(minus:SI (match_operand:SI 1 "general_operand" "0")
+		  (sign_extend:SI
+		   (match_operand:HI 2 "nonimmediate_src_operand" "rmS"))))]
+  "!TARGET_COLDFIRE"
+  "sub%.w %2,%0")
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "0,0")
+		  (match_operand:HI 2 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(minus:HI (match_dup 0)
+		  (match_operand:HI 1 "general_src_operand" "dn,rmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.w %1,%0")
+
+(define_insn "subqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(minus:QI (match_operand:QI 1 "general_operand" "0,0")
+		  (match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(minus:QI (match_dup 0)
+		  (match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "sub%.b %1,%0")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(minus:FP (match_operand:FP 1 "general_operand" "")
+		  (match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "sub<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (float:FP (match_operand:SI 2 "general_operand" "dmi"))))]
+  "TARGET_68881"
+  "f<FP:round>sub%.l %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (float:FP (match_operand:HI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+  "f<FP:round>sub%.w %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (float:FP (match_operand:QI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+  "f<FP:round>sub%.b %2,%0"
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(minus:FP (match_operand:FP 1 "general_operand" "0")
+		  (match_operand:FP 2 "general_operand" "f<FP:dreg>m<FP:const>")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:round>sub%.x %2,%0";
+  return "f<FP:round>sub%.<FP:prec> %f2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+(define_insn "sub<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+        (minus:FP (match_operand:FP 1 "general_operand" "0")
+                  (match_operand:FP 2 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>sub%.d %2,%0";
+  return "f<FP:prec>sub%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "falu")
+   (set_attr "opy" "2")])
+
+;; multiply instructions
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(mult:HI (match_operand:HI 1 "general_operand" "%0")
+		 (match_operand:HI 2 "general_src_operand" "dmSn")))]
+  ""
+{
+  return MOTOROLA ? "muls%.w %2,%0" : "muls %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "nonimmediate_src_operand" "dmS"))))]
+  ""
+{
+  return MOTOROLA ? "muls%.w %2,%0" : "muls %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_insn "*mulhisisi3_s"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (match_operand:SI 2 "const_int_operand" "n")))]
+  "INTVAL (operands[2]) >= -0x8000 && INTVAL (operands[2]) <= 0x7fff"
+{
+  return MOTOROLA ? "muls%.w %2,%0" : "muls %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(mult:SI (match_operand:SI 1 "general_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))]
+  "TARGET_68020 || TARGET_COLDFIRE"
+  "")
+
+(define_insn "*mulsi3_68020"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (match_operand:SI 1 "general_operand" "%0")
+                 (match_operand:SI 2 "general_src_operand" "dmSTK")))]
+
+  "TARGET_68020"
+  "muls%.l %2,%0"
+  [(set_attr "type" "mul_l")
+   (set_attr "opy" "2")])
+
+(define_insn "*mulsi3_cf"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (match_operand:SI 1 "general_operand" "%0")
+		 (match_operand:SI 2 "general_operand" "d<Q>")))]
+  "TARGET_COLDFIRE"
+  "muls%.l %2,%0"
+  [(set_attr "type" "mul_l")
+   (set_attr "opy" "2")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "nonimmediate_src_operand" "dmS"))))]
+  ""
+{
+  return MOTOROLA ? "mulu%.w %2,%0" : "mulu %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+(define_insn "*mulhisisi3_z"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "nonimmediate_operand" "%0"))
+		 (match_operand:SI 2 "const_int_operand" "n")))]
+  "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 0xffff"
+{
+  return MOTOROLA ? "mulu%.w %2,%0" : "mulu %2,%0";
+}
+  [(set_attr "type" "mul_w")
+   (set_attr "opy" "2")])
+
+;; We need a separate DEFINE_EXPAND for u?mulsidi3 to be able to use the
+;; proper matching constraint.  This is because the matching is between
+;; the high-numbered word of the DImode operand[0] and operand[1].
+(define_expand "umulsidi3"
+  [(parallel
+    [(set (subreg:SI (match_operand:DI 0 "register_operand" "") 4)
+	  (mult:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "register_operand" "")))
+     (set (subreg:SI (match_dup 0) 0)
+	  (truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+					     (zero_extend:DI (match_dup 2)))
+				    (const_int 32))))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		  (match_operand:SI 2 "nonimmediate_operand" "dm")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+					   (zero_extend:DI (match_dup 2)))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "mulu%.l %2,%3:%0")
+
+; Match immediate case.  For 2.4 only match things < 2^31.
+; It's tricky with larger values in these patterns since we need to match
+; values between the two parallel multiplies, between a CONST_DOUBLE and
+; a CONST_INT.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+					   (match_dup 2))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE
+   && (unsigned) INTVAL (operands[2]) <= 0x7fffffff"
+  "mulu%.l %2,%3:%0")
+
+(define_expand "mulsidi3"
+  [(parallel
+    [(set (subreg:SI (match_operand:DI 0 "register_operand" "") 4)
+	  (mult:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "register_operand" "")))
+     (set (subreg:SI (match_dup 0) 0)
+	  (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+					     (sign_extend:DI (match_dup 2)))
+				    (const_int 32))))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "nonimmediate_operand" "dm")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+					   (sign_extend:DI (match_dup 2)))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %2,%3:%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "const_int_operand" "n")))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+					   (match_dup 2))
+				  (const_int 32))))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %2,%3:%0")
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		     (zero_extend:DI (match_operand:SI 2 "general_operand" "")))
+	    (const_int 32))))
+     (clobber (match_dup 3))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+{
+  operands[3] = gen_reg_rtx (SImode);
+
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = immed_double_const (INTVAL (operands[2]) & 0xffffffff,
+					0, DImode);
+
+      /* We have to adjust the operand order for the matching constraints.  */
+      emit_insn (gen_const_umulsi3_highpart (operands[0], operands[3],
+					     operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "%1"))
+		   (zero_extend:DI (match_operand:SI 3 "nonimmediate_operand" "dm")))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "mulu%.l %3,%0:%1")
+
+(define_insn "const_umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "1"))
+		   (match_operand:DI 3 "const_uint32_operand" "n"))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "mulu%.l %3,%0:%1")
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+		     (sign_extend:DI (match_operand:SI 2 "general_operand" "")))
+	    (const_int 32))))
+     (clobber (match_dup 3))])]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+{
+  operands[3] = gen_reg_rtx (SImode);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      /* We have to adjust the operand order for the matching constraints.  */
+      emit_insn (gen_const_smulsi3_highpart (operands[0], operands[3],
+					     operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" "%1"))
+		   (sign_extend:DI (match_operand:SI 3 "nonimmediate_operand" "dm")))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %3,%0:%1")
+
+(define_insn "const_smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 2 "register_operand" "1"))
+		   (match_operand:DI 3 "const_sint32_operand" "n"))
+	  (const_int 32))))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_68020 && !TUNE_68060 && !TARGET_COLDFIRE"
+  "muls%.l %3,%0:%1")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(mult:FP (match_operand:FP 1 "general_operand" "")
+		 (match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "mul<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (float:FP (match_operand:SI 2 "general_operand" "dmi"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>mul%.l %2,%0"
+	 : "f<FP:round_mul>mul%.l %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+(define_insn "mul<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (float:FP (match_operand:HI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>mul%.w %2,%0"
+	 : "f<FP:round_mul>mul%.w %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+(define_insn "mul<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (float:FP (match_operand:QI 2 "general_operand" "dmn"))
+		 (match_operand:FP 1 "general_operand" "0")))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>mul%.b %2,%0"
+	 : "f<FP:round_mul>mul%.b %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+(define_insn "muldf_68881"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f")
+	(mult:DF (match_operand:DF 1 "general_operand" "%0")
+		 (match_operand:DF 2 "general_operand" "fmG")))]
+  "TARGET_68881"
+{
+  if (GET_CODE (operands[2]) == CONST_DOUBLE
+      && floating_exact_log2 (operands[2]) && !TUNE_68040_60)
+    {
+      int i = floating_exact_log2 (operands[2]);
+      operands[2] = GEN_INT (i);
+      return "fscale%.l %2,%0";
+    }
+  if (REG_P (operands[2]))
+    return "f%&mul%.x %2,%0";
+  return "f%&mul%.d %f2,%0";
+})
+
+(define_insn "mulsf_68881"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f")
+	(mult:SF (match_operand:SF 1 "general_operand" "%0")
+		 (match_operand:SF 2 "general_operand" "fdmF")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return (TARGET_68040
+	    ? "fsmul%.x %2,%0"
+	    : "fsglmul%.x %2,%0");
+  return (TARGET_68040
+	  ? "fsmul%.s %f2,%0"
+	  : "fsglmul%.s %f2,%0");
+})
+
+(define_insn "mulxf3_68881"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f")
+	(mult:XF (match_operand:XF 1 "nonimmediate_operand" "%0")
+		 (match_operand:XF 2 "nonimmediate_operand" "fm")))]
+  "TARGET_68881"
+{
+  return "fmul%.x %f2,%0";
+})
+
+(define_insn "fmul<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(mult:FP (match_operand:FP 1 "general_operand" "%0")
+		 (match_operand:FP 2 "general_operand" "f<Q>U<FP:dreg>")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>mul%.d %2,%0";
+  return "f<FP:prec>mul%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "fmul")
+   (set_attr "opy" "2")])
+
+;; divide instructions
+
+(define_expand "div<mode>3"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(div:FP (match_operand:FP 1 "general_operand" "")
+		(match_operand:FP 2 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "div<mode>3_floatsi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(float:FP (match_operand:SI 2 "general_operand" "dmi"))))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>div%.l %2,%0"
+	 : "f<FP:round_mul>div%.l %2,%0";
+})
+
+(define_insn "div<mode>3_floathi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(float:FP (match_operand:HI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>div%.w %2,%0"
+	 : "f<FP:round_mul>div%.w %2,%0";
+})
+
+(define_insn "div<mode>3_floatqi_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(float:FP (match_operand:QI 2 "general_operand" "dmn"))))]
+  "TARGET_68881"
+{
+  return TARGET_68040
+	 ? "f<FP:round>div%.b %2,%0"
+	 : "f<FP:round_mul>div%.b %2,%0";
+})
+
+(define_insn "div<mode>3_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(match_operand:FP 2 "general_operand" "f<FP:dreg>m<FP:const>")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[2]))
+    return (TARGET_68040
+	    ? "f<FP:round>div%.x %2,%0"
+	    : "f<FP:round_mul>div%.x %2,%0");
+  return (TARGET_68040
+	  ? "f<FP:round>div%.<FP:prec> %f2,%0"
+	  : "f<FP:round_mul>div%.<FP:prec> %f2,%0");
+})
+
+(define_insn "div<mode>3_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(div:FP (match_operand:FP 1 "general_operand" "0")
+		(match_operand:FP 2 "general_operand" "f<Q>U<FP:dreg>")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[2]))
+    return "f<FP:prec>div%.d %2,%0";
+  return "f<FP:prec>div%.<FP:prec> %2,%0";
+}
+  [(set_attr "type" "fdiv")
+   (set_attr "opy" "2")])
+
+;; Remainder instructions.
+
+(define_expand "divmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "nonimmediate_operand" "")
+          (div:SI (match_operand:SI 1 "general_operand" "")
+                  (match_operand:SI 2 "general_src_operand" "")))
+     (set (match_operand:SI 3 "nonimmediate_operand" "")
+          (mod:SI (match_dup 1) (match_dup 2)))])]
+  "TARGET_68020 || TARGET_CF_HWDIV"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(div:SI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:SI 2 "general_src_operand" "d<Q>U")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=&d")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_CF_HWDIV"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divs%.l %2,%0";
+  else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+    return "rems%.l %2,%3:%0";
+  else
+    return "rems%.l %2,%3:%0\;divs%.l %2,%0";
+}
+  [(set_attr "type" "div_l")
+   (set_attr "opy" "2")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(div:SI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:SI 2 "general_src_operand" "dmSTK")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=d")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_68020"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divs%.l %2,%0";
+  else
+    return "divsl%.l %2,%3:%0";
+})
+
+(define_expand "udivmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+          (udiv:SI (match_operand:SI 1 "general_operand" "0")
+                   (match_operand:SI 2 "general_src_operand" "dmSTK")))
+     (set (match_operand:SI 3 "nonimmediate_operand" "=d")
+          (umod:SI (match_dup 1) (match_dup 2)))])]
+  "TARGET_68020 || TARGET_CF_HWDIV"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(udiv:SI (match_operand:SI 1 "general_operand" "0")
+		 (match_operand:SI 2 "general_src_operand" "d<Q>U")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=&d")
+	(umod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_CF_HWDIV"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divu%.l %2,%0";
+  else if (find_reg_note (insn, REG_UNUSED, operands[0]))
+    return "remu%.l %2,%3:%0";
+  else
+    return "remu%.l %2,%3:%0\;divu%.l %2,%0";
+}
+  [(set_attr "type" "div_l")
+   (set_attr "opy" "2")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(udiv:SI (match_operand:SI 1 "general_operand" "0")
+		 (match_operand:SI 2 "general_src_operand" "dmSTK")))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=d")
+	(umod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_68020 && !TARGET_COLDFIRE"
+{
+  if (find_reg_note (insn, REG_UNUSED, operands[3]))
+    return "divu%.l %2,%0";
+  else
+    return "divul%.l %2,%3:%0";
+})
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(div:HI (match_operand:HI 1 "general_operand" "0")
+		(match_operand:HI 2 "general_src_operand" "dmSKT")))
+   (set (match_operand:HI 3 "nonimmediate_operand" "=d")
+	(mod:HI (match_dup 1) (match_dup 2)))]
+  "!TARGET_COLDFIRE || TARGET_CF_HWDIV"
+{
+  output_asm_insn (MOTOROLA ?
+    "ext%.l %0\;divs%.w %2,%0" :
+    "extl %0\;divs %2,%0",
+    operands);
+  if (!find_reg_note(insn, REG_UNUSED, operands[3]))
+    {
+      CC_STATUS_INIT;
+      return "move%.l %0,%3\;swap %3";
+    }
+  else
+    return "";
+})
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d")
+	(udiv:HI (match_operand:HI 1 "general_operand" "0")
+		 (match_operand:HI 2 "general_src_operand" "dmSKT")))
+   (set (match_operand:HI 3 "nonimmediate_operand" "=d")
+	(umod:HI (match_dup 1) (match_dup 2)))]
+  "!TARGET_COLDFIRE || TARGET_CF_HWDIV"
+{
+  if (ISA_HAS_MVS_MVZ)
+    output_asm_insn (MOTOROLA ?
+      "mvz%.w %0,%0\;divu%.w %2,%0" :
+      "mvz%.w %0,%0\;divu %2,%0",
+      operands);
+  else
+    output_asm_insn (MOTOROLA ?
+      "and%.l #0xFFFF,%0\;divu%.w %2,%0" :
+      "and%.l #0xFFFF,%0\;divu %2,%0",
+      operands);
+
+  if (!find_reg_note(insn, REG_UNUSED, operands[3]))
+    {
+      CC_STATUS_INIT;
+      return "move%.l %0,%3\;swap %3";
+    }
+  else
+    return "";
+})
+
+;; logical-and instructions
+
+;; "anddi3" is mainly here to help combine().
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,d")
+	(and:DI (match_operand:DI 1 "general_operand" "%0,0")
+		(match_operand:DI 2 "general_operand" "dn,don")))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  /* We can get CONST_DOUBLE, but also const1_rtx etc.  */
+  if (CONSTANT_P (operands[2]))
+    {
+      rtx hi, lo;
+
+      split_double (operands[2], &hi, &lo);
+
+      switch (INTVAL (hi))
+	{
+	  case 0 :
+	    output_asm_insn ("clr%.l %0", operands);
+	    break;
+	  case -1 :
+	    break;
+	  default :
+	    {
+	    rtx xoperands[3];
+
+	    xoperands[0] = operands[0];
+	    xoperands[2] = hi;
+	    output_asm_insn (output_andsi3 (xoperands), xoperands);
+	    }
+	}
+      if (GET_CODE (operands[0]) == REG)
+	operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      else
+	operands[0] = adjust_address (operands[0], SImode, 4);
+      switch (INTVAL (lo))
+	{
+	  case 0 :
+	    output_asm_insn ("clr%.l %0", operands);
+	    break;
+	  case -1 :
+	    break;
+	  default :
+	    {
+	    rtx xoperands[3];
+
+	    xoperands[0] = operands[0];
+	    xoperands[2] = lo;
+	    output_asm_insn (output_andsi3 (xoperands), xoperands);
+	    }
+	}
+      return "";
+    }
+  if (GET_CODE (operands[0]) != REG)
+    {
+      operands[1] = adjust_address (operands[0], SImode, 4);
+      return "and%.l %2,%0\;and%.l %R2,%1";
+    }
+  if (GET_CODE (operands[2]) != REG)
+    {
+      operands[1] = adjust_address (operands[2], SImode, 4);
+      return "and%.l %2,%0\;and%.l %1,%R0";
+    }
+  return "and%.l %2,%0\;and%.l %R2,%R0";
+})
+
+;; Prevent AND from being made with sp.  This doesn't exist in the machine
+;; and reload will cause inefficient code.  Since sp is a FIXED_REG, we
+;; can't allocate pseudos into it.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "not_sp_operand" "")
+	(and:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_src_operand" "")))]
+  ""
+  "")
+
+;; produced by split operations after reload finished
+(define_insn "*andsi3_split"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(and:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "const_int_operand" "i")))]
+  "reload_completed && !TARGET_COLDFIRE"
+{
+  return output_andsi3 (operands);
+})
+
+(define_insn "andsi3_internal"
+  [(set (match_operand:SI 0 "not_sp_operand" "=m,d")
+	(and:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_src_operand" "dKT,dmSM")))]
+  "!TARGET_COLDFIRE"
+{
+  return output_andsi3 (operands);
+})
+
+(define_insn "andsi3_5200"
+  [(set (match_operand:SI 0 "not_sp_operand" "=m,d")
+	(and:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_src_operand" "d,dmsK")))]
+  "TARGET_COLDFIRE"
+{
+  if (ISA_HAS_MVS_MVZ
+      && DATA_REG_P (operands[0])
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) == 0x000000ff)
+        return "mvz%.b %0,%0";
+      else if (INTVAL (operands[2]) == 0x0000ffff)
+        return "mvz%.w %0,%0";
+    }
+  return output_andsi3 (operands);
+})
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,d")
+	(and:HI (match_operand:HI 1 "general_operand" "%0,0")
+		(match_operand:HI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(and:HI (match_dup 0)
+		(match_operand:HI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.w %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(and:HI (match_operand:HI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "and%.w %1,%0")
+
+(define_insn "andqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(and:QI (match_operand:QI 1 "general_operand" "%0,0")
+		(match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(and:QI (match_dup 0)
+		(match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "and%.b %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(and:QI (match_operand:QI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "and%.b %1,%0")
+
+;; inclusive-or instructions
+
+(define_insn "iordi_zext"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,d")
+    (ior:DI (zero_extend:DI (match_operand 1 "general_operand" "dn,dmn"))
+        (match_operand:DI 2 "general_operand" "0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  int byte_mode;
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[0] = adjust_address (operands[0], SImode, 4);
+  if (GET_MODE (operands[1]) == SImode)
+    return "or%.l %1,%0";
+  byte_mode = (GET_MODE (operands[1]) == QImode);
+  if (GET_CODE (operands[0]) == MEM)
+    operands[0] = adjust_address (operands[0], byte_mode ? QImode : HImode,
+				  byte_mode ? 3 : 2);
+  if (byte_mode)
+    return "or%.b %1,%0";
+  else
+    return "or%.w %1,%0";
+})
+
+;; "iordi3" is mainly here to help combine().
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=o,d")
+	(ior:DI (match_operand:DI 1 "general_operand" "%0,0")
+		(match_operand:DI 2 "general_operand" "dn,don")))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  /* We can get CONST_DOUBLE, but also const1_rtx etc.  */
+  if (CONSTANT_P (operands[2]))
+    {
+      rtx hi, lo;
+
+      split_double (operands[2], &hi, &lo);
+
+      switch (INTVAL (hi))
+	{
+	  case 0 :
+	    break;
+	  case -1 :
+	    /* FIXME : a scratch register would be welcome here if operand[0]
+	       is not a register */
+	    output_asm_insn ("move%.l #-1,%0", operands);
+	    break;
+	  default :
+	    {
+	    rtx xoperands[3];
+
+	    xoperands[0] = operands[0];
+	    xoperands[2] = hi;
+	    output_asm_insn (output_iorsi3 (xoperands), xoperands);
+	    }
+	}
+      if (GET_CODE (operands[0]) == REG)
+	operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      else
+	operands[0] = adjust_address (operands[0], SImode, 4);
+      switch (INTVAL (lo))
+	{
+	  case 0 :
+	    break;
+	  case -1 :
+	    /* FIXME : a scratch register would be welcome here if operand[0]
+	       is not a register */
+	    output_asm_insn ("move%.l #-1,%0", operands);
+	    break;
+	  default :
+	    {
+	    rtx xoperands[3];
+
+	    xoperands[0] = operands[0];
+	    xoperands[2] = lo;
+	    output_asm_insn (output_iorsi3 (xoperands), xoperands);
+	    }
+	}
+      return "";
+    }
+  if (GET_CODE (operands[0]) != REG)
+    {
+      operands[1] = adjust_address (operands[0], SImode, 4);
+      return "or%.l %2,%0\;or%.l %R2,%1";
+    }
+  if (GET_CODE (operands[2]) != REG)
+    {
+      operands[1] = adjust_address (operands[2], SImode, 4);
+      return "or%.l %2,%0\;or%.l %1,%R0";
+    }
+  return "or%.l %2,%0\;or%.l %R2,%R0";
+})
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ior:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_src_operand" "")))]
+  ""
+  "")
+
+(define_insn "iorsi3_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,d")
+	(ior:SI (match_operand:SI 1 "general_operand" "%0,0")
+                (match_operand:SI 2 "general_src_operand" "dKT,dmSMT")))]
+  "! TARGET_COLDFIRE"
+{
+  return output_iorsi3 (operands);
+})
+
+(define_insn "iorsi3_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,d")
+	(ior:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_src_operand" "d,dmsK")))]
+  "TARGET_COLDFIRE"
+{
+  return output_iorsi3 (operands);
+})
+
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,d")
+	(ior:HI (match_operand:HI 1 "general_operand" "%0,0")
+		(match_operand:HI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(ior:HI (match_dup 0)
+		(match_operand:HI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.w %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+m,d"))
+	(ior:HI (match_operand:HI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "or%.w %1,%0")
+
+(define_insn "iorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=m,d")
+	(ior:QI (match_operand:QI 1 "general_operand" "%0,0")
+                (match_operand:QI 2 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+	(ior:QI (match_dup 0)
+                (match_operand:QI 1 "general_src_operand" "dn,dmSn")))]
+  "!TARGET_COLDFIRE"
+  "or%.b %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+m,d"))
+        (ior:QI (match_operand:QI 1 "general_src_operand" "dn,dmSn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "or%.b %1,%0")
+
+;; On all 68k models, this makes faster code in a special case.
+;; See also ashlsi_16, ashrsi_16 and lshrsi_16.
+
+(define_insn "iorsi_zexthi_ashl16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&d")
+    (ior:SI (zero_extend:SI (match_operand:HI 1 "general_operand" "rmn"))
+        (ashift:SI (match_operand:SI 2 "general_operand" "or")
+            (const_int 16))))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[2]) != REG)
+      operands[2] = adjust_address (operands[2], HImode, 2);
+  if (GET_CODE (operands[2]) != REG
+  || REGNO (operands[2]) != REGNO (operands[0]))
+    output_asm_insn ("move%.w %2,%0", operands);
+  return "swap %0\;mov%.w %1,%0";
+})
+
+(define_insn "iorsi_zext"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=o,d")
+    (ior:SI (zero_extend:SI (match_operand 1 "general_operand" "dn,dmn"))
+        (match_operand:SI 2 "general_operand" "0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  int byte_mode;
+
+  CC_STATUS_INIT;
+  byte_mode = (GET_MODE (operands[1]) == QImode);
+  if (GET_CODE (operands[0]) == MEM)
+    operands[0] = adjust_address (operands[0], byte_mode ? QImode : HImode,
+				  byte_mode ? 3 : 2);
+  if (byte_mode)
+    return "or%.b %1,%0";
+  else
+    return "or%.w %1,%0";
+})
+
+;; xor instructions
+
+;; "xordi3" is mainly here to help combine().
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=od")
+	(xor:DI (match_operand:DI 1 "general_operand" "%0")
+		(match_operand:DI 2 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  /* We can get CONST_DOUBLE, but also const1_rtx etc.  */
+
+  if (CONSTANT_P (operands[2]))
+    {
+      rtx hi, lo;
+
+      split_double (operands[2], &hi, &lo);
+
+      switch (INTVAL (hi))
+	{
+	  case 0 :
+	    break;
+	  case -1 :
+	    output_asm_insn ("not%.l %0", operands);
+	    break;
+	  default :
+	    /* FIXME : a scratch register would be welcome here if
+	       -128 <= INTVAL (hi) < -1 */
+	    {
+	    rtx xoperands[3];
+
+	    xoperands[0] = operands[0];
+	    xoperands[2] = hi;
+	    output_asm_insn (output_xorsi3 (xoperands), xoperands);
+	    }
+	}
+      if (GET_CODE (operands[0]) == REG)
+	operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      else
+	operands[0] = adjust_address (operands[0], SImode, 4);
+      switch (INTVAL (lo))
+	{
+	  case 0 :
+	    break;
+	  case -1 :
+	    output_asm_insn ("not%.l %0", operands);
+	    break;
+	  default :
+	    /* FIXME : a scratch register would be welcome here if
+	       -128 <= INTVAL (lo) < -1 */
+	    operands[2] = lo;
+	    /* FIXME : this should be merged with xorsi3 */
+	    {
+	    rtx xoperands[3];
+
+	    xoperands[0] = operands[0];
+	    xoperands[2] = lo;
+	    output_asm_insn (output_xorsi3 (xoperands), xoperands);
+	    }
+	}
+      return "";
+    }
+  if (GET_CODE (operands[0]) != REG)
+    {
+      operands[1] = adjust_address (operands[0], SImode, 4);
+      return "eor%.l %2,%0\;eor%.l %R2,%1";
+    }
+  if (GET_CODE (operands[2]) != REG)
+    {
+      operands[1] = adjust_address (operands[2], SImode, 4);
+      return "eor%.l %2,%0\;eor%.l %1,%R0";
+    }
+  return "eor%.l %2,%0\;eor%.l %R2,%R0";
+})
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(xor:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn "xorsi3_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=do,m")
+	(xor:SI (match_operand:SI 1 "general_operand" "%0,0")
+                (match_operand:SI 2 "general_operand" "di,dKT")))]
+
+  "!TARGET_COLDFIRE"
+{
+  return output_xorsi3 (operands);
+})
+
+(define_insn "xorsi3_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm,d")
+	(xor:SI (match_operand:SI 1 "general_operand" "%0,0")
+		(match_operand:SI 2 "general_operand" "d,Ks")))]
+  "TARGET_COLDFIRE"
+{
+  return output_xorsi3 (operands);
+})
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(xor:HI (match_operand:HI 1 "general_operand" "%0")
+		(match_operand:HI 2 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(xor:HI (match_dup 0)
+		(match_operand:HI 1 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.w %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(xor:HI (match_operand:HI 1 "general_operand" "dn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "eor%.w %1,%0")
+
+(define_insn "xorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(xor:QI (match_operand:QI 1 "general_operand" "%0")
+		(match_operand:QI 2 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(xor:QI (match_dup 0)
+		(match_operand:QI 1 "general_operand" "dn")))]
+  "!TARGET_COLDFIRE"
+  "eor%.b %1,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(xor:QI (match_operand:QI 1 "general_operand" "dn")
+		(match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "eor%.b %1,%0")
+
+;; negation instructions
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(neg:DI (match_operand:DI 1 "general_operand" "")))]
+  ""
+{
+  if (TARGET_COLDFIRE)
+    emit_insn (gen_negdi2_5200 (operands[0], operands[1]));
+  else
+    emit_insn (gen_negdi2_internal (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negdi2_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=<,do,!*a")
+	(neg:DI (match_operand:DI 1 "general_operand" "0,0,0")))]
+  "!TARGET_COLDFIRE"
+{
+  if (which_alternative == 0)
+    return "neg%.l %0\;negx%.l %0";
+  if (GET_CODE (operands[0]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[1] = adjust_address (operands[0], SImode, 4);
+  if (ADDRESS_REG_P (operands[0]))
+    return "exg %/d0,%1\;neg%.l %/d0\;exg %/d0,%1\;exg %/d0,%0\;negx%.l %/d0\;exg %/d0,%0";
+  else
+    return "neg%.l %1\;negx%.l %0";
+})
+
+(define_insn "negdi2_5200"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+	(neg:DI (match_operand:DI 1 "general_operand" "0")))]
+  "TARGET_COLDFIRE"
+{
+  operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  return "neg%.l %1\;negx%.l %0";
+})
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(neg:SI (match_operand:SI 1 "general_operand" "")))]
+  ""
+{
+  if (TARGET_COLDFIRE)
+    emit_insn (gen_negsi2_5200 (operands[0], operands[1]));
+  else
+    emit_insn (gen_negsi2_internal (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negsi2_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(neg:SI (match_operand:SI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "neg%.l %0"
+  [(set_attr "type" "neg_l")])
+
+(define_insn "negsi2_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(neg:SI (match_operand:SI 1 "general_operand" "0")))]
+  "TARGET_COLDFIRE"
+  "neg%.l %0"
+  [(set_attr "type" "neg_l")])
+
+(define_insn "neghi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(neg:HI (match_operand:HI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "neg%.w %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(neg:HI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "neg%.w %0")
+
+(define_insn "negqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(neg:QI (match_operand:QI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "neg%.b %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(neg:QI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "neg%.b %0")
+
+;; If using software floating point, just flip the sign bit.
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(neg:SF (match_operand:SF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+
+      target = operand_subword_force (operands[0], 0, SFmode);
+      result = expand_binop (SImode, xor_optab,
+			     operand_subword_force (operands[1], 0, SFmode),
+			     GEN_INT (-2147483647 - 1), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      /* Make a place for REG_EQUAL.  */
+      emit_move_insn (operands[0], operands[0]);
+      DONE;
+    }
+})
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(neg:DF (match_operand:DF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, DFmode);
+      result = expand_binop (SImode, xor_optab,
+			     operand_subword_force (operands[1], 0, DFmode),
+			     GEN_INT (-2147483647 - 1), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
+		      operand_subword_force (operands[1], 1, DFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_expand "negxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_68881)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, XFmode);
+      result = expand_binop (SImode, xor_optab,
+			     operand_subword_force (operands[1], 0, XFmode),
+			     GEN_INT (-2147483647 - 1), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, XFmode),
+		      operand_subword_force (operands[1], 1, XFmode));
+      emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
+		      operand_subword_force (operands[1], 2, XFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_insn "neg<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(neg:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m<FP:const>,0")))]
+  "TARGET_68881"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bchg %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:round>neg%.x %1,%0";
+  return "f<FP:round>neg%.<FP:prec> %f1,%0";
+})
+
+(define_insn "neg<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(neg:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U,0")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bchg %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:prec>neg%.d %1,%0";
+  return "f<FP:prec>neg%.<FP:prec> %1,%0";
+})
+
+;; Sqrt instruction for the 68881
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "")
+	(sqrt:FP (match_operand:FP 1 "general_operand" "")))]
+  "TARGET_HARD_FLOAT"
+  "")
+
+(define_insn "sqrt<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(sqrt:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[1]))
+    return "f<FP:round>sqrt%.x %1,%0";
+  return "f<FP:round>sqrt%.<FP:prec> %1,%0";
+}
+  [(set_attr "type" "fsqrt")])
+
+(define_insn "sqrt<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(sqrt:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (FP_REG_P (operands[1]))
+    return "f<FP:prec>sqrt%.d %1,%0";
+  return "f<FP:prec>sqrt%.<FP:prec> %1,%0";
+}
+  [(set_attr "type" "fsqrt")])
+;; Absolute value instructions
+;; If using software floating point, just zero the sign bit.
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(abs:SF (match_operand:SF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+
+      target = operand_subword_force (operands[0], 0, SFmode);
+      result = expand_binop (SImode, and_optab,
+			     operand_subword_force (operands[1], 0, SFmode),
+			     GEN_INT (0x7fffffff), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      /* Make a place for REG_EQUAL.  */
+      emit_move_insn (operands[0], operands[0]);
+      DONE;
+    }
+})
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(abs:DF (match_operand:DF 1 "general_operand" "")))]
+  ""
+{
+  if (!TARGET_HARD_FLOAT)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, DFmode);
+      result = expand_binop (SImode, and_optab,
+			     operand_subword_force (operands[1], 0, DFmode),
+			     GEN_INT (0x7fffffff), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
+		      operand_subword_force (operands[1], 1, DFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_expand "absxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "")
+	(abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_68881)
+    {
+      rtx result;
+      rtx target;
+      rtx insns;
+
+      start_sequence ();
+      target = operand_subword (operands[0], 0, 1, XFmode);
+      result = expand_binop (SImode, and_optab,
+			     operand_subword_force (operands[1], 0, XFmode),
+			     GEN_INT (0x7fffffff), target, 0, OPTAB_WIDEN);
+      gcc_assert (result);
+
+      if (result != target)
+	emit_move_insn (result, target);
+
+      emit_move_insn (operand_subword (operands[0], 1, 1, XFmode),
+		      operand_subword_force (operands[1], 1, XFmode));
+      emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
+		      operand_subword_force (operands[1], 2, XFmode));
+
+      insns = get_insns ();
+      end_sequence ();
+
+      emit_insn (insns);
+      DONE;
+    }
+})
+
+(define_insn "abs<mode>2_68881"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(abs:FP (match_operand:FP 1 "general_operand" "f<FP:dreg>m<FP:const>,0")))]
+  "TARGET_68881"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bclr %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:round>abs%.x %1,%0";
+  return "f<FP:round>abs%.<FP:prec> %f1,%0";
+})
+
+(define_insn "abs<mode>2_cf"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f,d")
+	(abs:FP (match_operand:FP 1 "general_operand" "f<FP:dreg><Q>U,0")))]
+  "TARGET_COLDFIRE_FPU"
+{
+  if (DATA_REG_P (operands[0]))
+    {
+      operands[1] = GEN_INT (31);
+      return "bclr %1,%0";
+    }
+  if (FP_REG_P (operands[1]))
+    return "f<FP:prec>abs%.d %1,%0";
+  return "f<FP:prec>abs%.<FP:prec> %1,%0";
+}
+  [(set_attr "type" "bitrw,fneg")])
+
+;; bit indexing instructions
+
+;; ColdFire ff1 instruction implements clz.
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+ 	(clz:SI (match_operand:SI 1 "register_operand" "0")))]
+  "ISA_HAS_FF1"
+  "ff1 %0"
+  [(set_attr "type" "ext")])
+
+;; one complement instructions
+
+;; "one_cmpldi2" is mainly here to help combine().
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=dm")
+	(not:DI (match_operand:DI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else if (GET_CODE (XEXP (operands[0], 0)) == POST_INC
+        || GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+    operands[1] = operands[0];
+  else
+    operands[1] = adjust_address (operands[0], SImode, 4);
+  return "not%.l %1\;not%.l %0";
+})
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(not:SI (match_operand:SI 1 "general_operand" "")))]
+  ""
+{
+  if (TARGET_COLDFIRE)
+    emit_insn (gen_one_cmplsi2_5200 (operands[0], operands[1]));
+  else
+    emit_insn (gen_one_cmplsi2_internal (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "one_cmplsi2_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=dm")
+	(not:SI (match_operand:SI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "not%.l %0")
+
+(define_insn "one_cmplsi2_5200"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(not:SI (match_operand:SI 1 "general_operand" "0")))]
+  "TARGET_COLDFIRE"
+  "not%.l %0"
+  [(set_attr "type" "neg_l")])
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=dm")
+	(not:HI (match_operand:HI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "not%.w %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+dm"))
+	(not:HI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "not%.w %0")
+
+(define_insn "one_cmplqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+	(not:QI (match_operand:QI 1 "general_operand" "0")))]
+  "!TARGET_COLDFIRE"
+  "not%.b %0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+dm"))
+	(not:QI (match_dup 0)))]
+  "!TARGET_COLDFIRE"
+  "not%.b %0")
+
+;; arithmetic shift instructions
+;; We don't need the shift memory by 1 bit instruction
+
+(define_insn "ashldi_extsi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro")
+    (ashift:DI
+      (match_operator:DI 2 "extend_operator"
+        [(match_operand:SI 1 "general_operand" "rm")])
+      (const_int 32)))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[2] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[2] = adjust_address (operands[0], SImode, 4);
+  if (ADDRESS_REG_P (operands[0]))
+    return "move%.l %1,%0\;sub%.l %2,%2";
+  else
+    return "move%.l %1,%0\;clr%.l %2";
+})
+
+(define_insn "ashldi_sexthi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,a*d")
+    (ashift:DI (sign_extend:DI (match_operand:HI 1 "general_operand" "rm,rm"))
+        (const_int 32)))
+    (clobber (match_scratch:SI 2 "=a,X"))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == MEM)
+    {
+    if (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      return "clr%.l %0\;move%.w %1,%2\;move%.l %2,%0";
+    else if (GET_CODE (XEXP (operands[0], 0)) == POST_INC)
+      return "move%.w %1,%2\;move%.l %2,%0\;clr%.l %0";
+    else
+      {
+	operands[3] = adjust_address (operands[0], SImode, 4);
+	return "move%.w %1,%2\;move%.l %2,%0\;clr%.l %3";
+      }
+    }
+  else if (DATA_REG_P (operands[0]))
+    return "move%.w %1,%0\;ext%.l %0\;clr%.l %R0";
+  else
+    return "move%.w %1,%0\;sub%.l %R0,%R0";
+})
+
+(define_insn "*ashldi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashift:DI (match_operand:DI 1 "register_operand" "0")
+		   (const_int 1)))]
+  "!TARGET_COLDFIRE"
+  "add%.l %R0,%R0\;addx%.l %0,%0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 2)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashift:DI (match_dup 1) (const_int 1)))
+   (set (match_dup 0)
+	(ashift:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 3)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashift:DI (match_dup 1) (const_int 2)))
+   (set (match_dup 0)
+	(ashift:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 8)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 8)))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 8)))
+   (set (strict_low_part (subreg:QI (match_dup 0) 3))
+	(subreg:QI (match_dup 0) 7))
+   (set (strict_low_part (subreg:QI (match_dup 0) 7))
+	(const_int 0))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 16)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))
+   (set (strict_low_part (subreg:HI (match_dup 0) 2))
+	(subreg:HI (match_dup 0) 6))
+   (set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(const_int 0))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "pre_dec_operand" "")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		   (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_lowpart(SImode, operands[1]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "post_inc_operand" "")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		   (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (const_int 0))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_lowpart(SImode, operands[1]);
+})
+
+(define_insn_and_split "*ashldi3_const32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro<>")
+	(ashift:DI (match_operand:DI 1 "nonimmediate_operand" "ro")
+		   (const_int 32)))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 3))
+   (set (match_dup 2) (const_int 0))]
+  "split_di(operands, 2, operands + 2, operands + 4);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 40"
+  [(set (match_dup 4) (ashift:SI (match_dup 4) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))
+   (set (match_dup 4) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (const_int 48)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 3) (const_int 0))
+   (set (strict_low_part (subreg:HI (match_dup 0) 2))
+	(const_int 0))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 40 && INTVAL (operands[2]) <= 63"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 4) (ashift:SI (match_dup 4) (match_dup 3)))
+   (set (match_dup 3) (match_dup 4))
+   (set (match_dup 4) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_insn "*ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashift:DI (match_operand:DI 1 "register_operand" "0")
+		   (match_operand 2 "const_int_operand" "n")))]
+  "!TARGET_COLDFIRE
+    && ((INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3)
+	|| INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16
+	|| (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63))"
+  "#")
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "register_operand" "")
+		   (match_operand 2 "const_int_operand" "")))]
+  "!TARGET_COLDFIRE"
+{
+  /* ???  This is a named pattern like this is not allowed to FAIL based
+     on its operands.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || ((INTVAL (operands[2]) < 1 || INTVAL (operands[2]) > 3)
+	  && INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16
+	  && (INTVAL (operands[2]) < 32 || INTVAL (operands[2]) > 63)))
+    FAIL;
+})
+
+;; On most 68k models, this makes faster code in a special case.
+
+(define_insn "ashlsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (const_int 16)))]
+  "!TUNE_68060"
+{
+  CC_STATUS_INIT;
+  return "swap %0\;clr%.w %0";
+})
+
+;; ashift patterns : use lsl instead of asl, because lsl always clears the
+;; overflow bit, so we must not set CC_NO_OVERFLOW.
+
+;; On the 68000, this makes faster code in a special case.
+
+(define_insn "ashlsi_17_24"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "const_int_operand" "n")))]
+  "TUNE_68000_10
+   && INTVAL (operands[2]) > 16
+   && INTVAL (operands[2]) <= 24"
+{
+  CC_STATUS_INIT;
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 16);
+  return "lsl%.w %2,%0\;swap %0\;clr%.w %0";
+})
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "general_operand" "dI")))]
+  ""
+{
+  if (operands[2] == const1_rtx)
+    {
+      cc_status.flags = CC_NO_OVERFLOW;
+      return "add%.l %0,%0";
+    }
+  return "lsl%.l %2,%0";
+})
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(ashift:HI (match_dup 0)
+		   (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.w %1,%0")
+
+(define_insn "ashlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ashift:QI (match_operand:QI 1 "register_operand" "0")
+		   (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(ashift:QI (match_dup 0)
+		   (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsl%.b %1,%0")
+
+;; On most 68k models, this makes faster code in a special case.
+
+(define_insn "ashrsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 16)))]
+  "!TUNE_68060"
+  "swap %0\;ext%.l %0")
+
+;; On the 68000, this makes faster code in a special case.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "n")))]
+  "TUNE_68000_10
+   && INTVAL (operands[2]) > 16
+   && INTVAL (operands[2]) <= 24"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 16);
+  return "swap %0\;asr%.w %2,%0\;ext%.l %0";
+})
+
+(define_insn "subreghi1ashrdi_const32"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+    (subreg:HI (ashiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32)) 6))]
+  ""
+{
+  if (GET_CODE (operands[1]) != REG)
+    operands[1] = adjust_address (operands[1], HImode, 2);
+  return "move%.w %1,%0";
+}
+  [(set_attr "type" "move")])
+
+(define_insn "subregsi1ashrdi_const32"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+    (subreg:SI (ashiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32)) 4))]
+  ""
+{
+  return "move%.l %1,%0";
+}
+  [(set_attr "type" "move_l")])
+
+(define_insn "*ashrdi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (const_int 1)))]
+  "!TARGET_COLDFIRE"
+{
+  operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  return "asr%.l #1,%0\;roxr%.l #1,%1";
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 2)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashiftrt:DI (match_dup 1) (const_int 1)))
+   (set (match_dup 0)
+	(ashiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 3)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(ashiftrt:DI (match_dup 1) (const_int 2)))
+   (set (match_dup 0)
+	(ashiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 8)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:QI (match_dup 0) 7))
+	(subreg:QI (match_dup 0) 3))
+   (set (match_dup 2)
+	(ashiftrt:SI (match_dup 2) (const_int 8)))
+   (set (match_dup 3)
+	(rotatert:SI (match_dup 3) (const_int 8)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 16)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(subreg:HI (match_dup 0) 2))
+   (set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))
+   (set (match_dup 2)
+	(sign_extend:SI (subreg:HI (match_dup 2) 2)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_insn "*ashrdi_const32"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_src_operand" "ro")
+		     (const_int 32)))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (TARGET_68020)
+    return "move%.l %1,%R0\;smi %0\;extb%.l %0";
+  else
+    return "move%.l %1,%R0\;smi %0\;ext%.w %0\;ext%.l %0";
+})
+
+(define_insn "*ashrdi_const32_mem"
+  [(set (match_operand:DI 0 "memory_operand" "=o,<")
+	(ashiftrt:DI (match_operand:DI 1 "nonimmediate_src_operand" "ro,ro")
+		     (const_int 32)))
+   (clobber (match_scratch:SI 2 "=d,d"))]
+  ""
+{
+  CC_STATUS_INIT;
+  operands[3] = adjust_address (operands[0], SImode,
+				which_alternative == 0 ? 4 : 0);
+  operands[0] = adjust_address (operands[0], SImode, 0);
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "move%.l %1,%3\;smi %2\;extb%.l %2\;move%.l %2,%0";
+  else
+    return "move%.l %1,%3\;smi %2\;ext%.w %2\;ext%.l %2\;move%.l %2,%0";
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 63)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 3)
+	(ashiftrt:SI (match_dup 3) (const_int 31)))
+   (set (match_dup 2)
+	(match_dup 3))]
+  "split_di(operands, 1, operands + 2, operands + 3);")
+
+;; The predicate below must be general_operand, because ashrdi3 allows that
+(define_insn "ashrdi_const"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand 2 "const_int_operand" "n")))]
+  "!TARGET_COLDFIRE
+    && ((INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3)
+	|| INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16
+	|| INTVAL (operands[2]) == 31
+	|| (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63))"
+{
+  operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  CC_STATUS_INIT;
+  if (INTVAL (operands[2]) == 48)
+    return "swap %0\;ext%.l %0\;move%.l %0,%1\;smi %0\;ext%.w %0";
+  if (INTVAL (operands[2]) == 31)
+    return "add%.l %1,%1\;addx%.l %0,%0\;move%.l %0,%1\;subx%.l %0,%0";
+  if (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+      output_asm_insn (INTVAL (operands[2]) <= 8 ? "asr%.l %2,%0" :
+			"moveq %2,%1\;asr%.l %1,%0", operands);
+      output_asm_insn ("mov%.l %0,%1\;smi %0", operands);
+      return INTVAL (operands[2]) >= 15 ? "ext%.w %d0" :
+	     TARGET_68020 ? "extb%.l %0" : "ext%.w %0\;ext%.l %0";
+    }
+  return "#";
+})
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "!TARGET_COLDFIRE"
+{
+  /* ???  This is a named pattern like this is not allowed to FAIL based
+     on its operands.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || ((INTVAL (operands[2]) < 1 || INTVAL (operands[2]) > 3)
+	  && INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16
+	  && (INTVAL (operands[2]) < 31 || INTVAL (operands[2]) > 63)))
+    FAIL;
+})
+
+;; On all 68k models, this makes faster code in a special case.
+
+(define_insn "ashrsi_31"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 31)))]
+  ""
+{
+  return "add%.l %0,%0\;subx%.l %0,%0";
+})
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "general_operand" "dI")))]
+  ""
+  "asr%.l %2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "opy" "2")])
+
+(define_insn "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(ashiftrt:HI (match_dup 0)
+		     (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.w %1,%0")
+
+(define_insn "ashrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ashiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(ashiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "asr%.b %1,%0")
+
+;; logical shift instructions
+
+;; commented out because of reload problems in 950612-1.c
+;;(define_insn ""
+;;        [(set (cc0)
+;;            (subreg:SI (lshiftrt:DI (match_operand:DI 0 "general_operand" "ro")
+;;                    (const_int 32)) 4))
+;;        (set (match_operand:SI 1 "nonimmediate_operand" "=dm")
+;;            (subreg:SI (lshiftrt:DI (match_dup 0)
+;;                    (const_int 32)) 4))]
+;;  ""
+;;{
+;;  return "move%.l %0,%1";
+;;})
+;;
+;;(define_insn ""
+;;        [(set (cc0)
+;;            (subreg:SI (lshiftrt:DI (match_operand:DI 0 "general_operand" "ro")
+;;                    (const_int 32)) 0))
+;;        (set (match_operand:DI 1 "nonimmediate_operand" "=do")
+;;            (lshiftrt:DI (match_dup 0)
+;;                (const_int 32)))]
+;;  ""
+;;{
+;;  if (GET_CODE (operands[1]) == REG)
+;;    operands[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+;;  else
+;;    operands[2] = adjust_address (operands[1], SImode, 4);
+;;  return "move%.l %0,%2\;clr%.l %1";
+;;})
+
+(define_insn "subreg1lshrdi_const32"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+    (subreg:SI (lshiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+            (const_int 32)) 4))]
+  ""
+  "move%.l %1,%0"
+  [(set_attr "type" "move_l")])
+
+(define_insn "*lshrdi3_const1"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (const_int 1)))]
+  "!TARGET_COLDFIRE"
+  "lsr%.l #1,%0\;roxr%.l #1,%R0")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 2)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(lshiftrt:DI (match_dup 1) (const_int 1)))
+   (set (match_dup 0)
+	(lshiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 3)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (match_dup 0)
+	(lshiftrt:DI (match_dup 1) (const_int 2)))
+   (set (match_dup 0)
+	(lshiftrt:DI (match_dup 0) (const_int 1)))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 8)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:QI (match_dup 0) 7))
+	(subreg:QI (match_dup 0) 3))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2) (const_int 8)))
+   (set (match_dup 3)
+	(rotatert:SI (match_dup 3) (const_int 8)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 16)))]
+  "reload_completed && !TARGET_COLDFIRE"
+  [(set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(subreg:HI (match_dup 0) 2))
+   (set (strict_low_part (subreg:HI (match_dup 0) 2))
+	(const_int 0))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))
+   (set (match_dup 2)
+	(rotate:SI (match_dup 2) (const_int 16)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "pre_dec_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		     (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (const_int 0))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_highpart(SImode, operands[1]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "post_inc_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		     (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_address(operands[0], SImode, 0);
+  operands[1] = gen_highpart(SImode, operands[1]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "")
+		     (const_int 32)))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 5))
+   (set (match_dup 4) (const_int 0))]
+  "split_di(operands, 2, operands + 2, operands + 4);")
+
+(define_insn "*lshrdi_const32"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro<>")
+	(lshiftrt:DI (match_operand:DI 1 "general_operand" "ro")
+		     (const_int 32)))]
+  ""
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 40"
+  [(set (match_dup 3) (lshiftrt:SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 3) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (const_int 48)))]
+  "reload_completed"
+  [(set (match_dup 3) (match_dup 2))
+   (set (strict_low_part (subreg:HI (match_dup 0) 6))
+	(const_int 0))
+   (set (match_dup 2) (const_int 0))
+   (set (match_dup 3)
+	(rotate:SI (match_dup 3) (const_int 16)))]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "reload_completed && !TARGET_COLDFIRE
+   && INTVAL (operands[2]) > 40 && INTVAL (operands[2]) <= 62"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 3) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 3) (const_int 0))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  operands[3] = gen_highpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_insn "*lshrdi_const63"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (const_int 63)))]
+  ""
+  "add%.l %0,%0\;clr%.l %0\;clr%.l %R1\;addx%.l %R1,%R1")
+
+(define_insn "*lshrdi3_const"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+		     (match_operand 2 "const_int_operand" "n")))]
+  "(!TARGET_COLDFIRE
+    && ((INTVAL (operands[2]) >= 2 && INTVAL (operands[2]) <= 3)
+	 || INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16
+	 || (INTVAL (operands[2]) > 32 && INTVAL (operands[2]) <= 63)))"
+  "#")
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand 2 "const_int_operand" "")))]
+  "!TARGET_COLDFIRE"
+{
+  /* ???  This is a named pattern like this is not allowed to FAIL based
+     on its operands.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || ((INTVAL (operands[2]) < 1 || INTVAL (operands[2]) > 3)
+	  && INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16
+	  && (INTVAL (operands[2]) < 32 || INTVAL (operands[2]) > 63)))
+    FAIL;
+})
+
+;; On all 68k models, this makes faster code in a special case.
+
+(define_insn "lshrsi_31"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 31)))]
+  ""
+{
+  return "add%.l %0,%0\;subx%.l %0,%0\;neg%.l %0";
+})
+
+;; On most 68k models, this makes faster code in a special case.
+
+(define_insn "lshrsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (const_int 16)))]
+  "!TUNE_68060"
+{
+  CC_STATUS_INIT;
+  return "clr%.w %0\;swap %0";
+})
+
+;; On the 68000, this makes faster code in a special case.
+
+(define_insn "lshrsi_17_24"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "n")))]
+  "TUNE_68000_10
+   && INTVAL (operands[2]) > 16
+   && INTVAL (operands[2]) <= 24"
+{
+  /* I think lsr%.w sets the CC properly.  */
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 16);
+  return "clr%.w %0\;swap %0\;lsr%.w %2,%0";
+})
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "general_operand" "dI")))]
+  ""
+  "lsr%.l %2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "opy" "2")])
+
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(lshiftrt:HI (match_dup 0)
+		     (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.w %1,%0")
+
+(define_insn "lshrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(lshiftrt:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(lshiftrt:QI (match_dup 0)
+		     (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "lsr%.b %1,%0")
+
+;; rotate instructions
+
+(define_insn "rotlsi_16"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotate:SI (match_operand:SI 1 "register_operand" "0")
+		   (const_int 16)))]
+  ""
+  "swap %0"
+  [(set_attr "type" "shift")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotate:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "general_operand" "dINO")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 16)
+    return "swap %0";
+  else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 16)
+    {
+      operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+      return "ror%.l %2,%0";
+    }
+  else
+    return "rol%.l %2,%0";
+})
+
+(define_insn "rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "dIP")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 8)
+    {
+      operands[2] = GEN_INT (16 - INTVAL (operands[2]));
+      return "ror%.w %2,%0";
+    }
+  else
+    return "rol%.w %2,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(rotate:HI (match_dup 0)
+		   (match_operand:HI 1 "general_operand" "dIP")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 8)
+    {
+      operands[2] = GEN_INT (16 - INTVAL (operands[2]));
+      return "ror%.w %2,%0";
+    }
+  else
+    return "rol%.w %2,%0";
+})
+
+(define_insn "rotlqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(rotate:QI (match_operand:QI 1 "register_operand" "0")
+		   (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 4)
+    {
+      operands[2] = GEN_INT (8 - INTVAL (operands[2]));
+      return "ror%.b %2,%0";
+    }
+  else
+    return "rol%.b %2,%0";
+})
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(rotate:QI (match_dup 0)
+		   (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) >= 4)
+    {
+      operands[2] = GEN_INT (8 - INTVAL (operands[2]));
+      return "ror%.b %2,%0";
+    }
+  else
+    return "rol%.b %2,%0";
+})
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.l %2,%0")
+
+(define_insn "rotrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(rotatert:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.w %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d"))
+	(rotatert:HI (match_dup 0)
+		     (match_operand:HI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.w %1,%0")
+
+(define_insn "rotrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(rotatert:QI (match_operand:QI 1 "register_operand" "0")
+		     (match_operand:QI 2 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.b %2,%0")
+
+(define_insn ""
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d"))
+	(rotatert:QI (match_dup 0)
+		     (match_operand:QI 1 "general_operand" "dI")))]
+  "!TARGET_COLDFIRE"
+  "ror%.b %1,%0")
+
+
+;; Bit set/clear in memory byte.
+
+;; set bit, bit number is int
+(define_insn "bsetmemqi"
+  [(set (match_operand:QI 0 "memory_operand" "+m")
+	(ior:QI (subreg:QI (ashift:SI (const_int 1)
+		(match_operand:SI 1 "general_operand" "d")) 3)
+	(match_dup 0)))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bset %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; set bit, bit number is (sign/zero)_extended from HImode/QImode
+(define_insn "*bsetmemqi_ext"
+  [(set (match_operand:QI 0 "memory_operand" "+m")
+	(ior:QI (subreg:QI (ashift:SI (const_int 1)
+	    (match_operator:SI 2 "extend_operator"
+		[(match_operand 1 "general_operand" "d")])) 3)
+	(match_dup 0)))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bset %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; clear bit, bit number is int
+(define_insn "bclrmemqi"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+m")
+	(const_int 1)
+	(minus:SI (const_int 7)
+	    (match_operand:SI 1 "general_operand" "d")))
+    (const_int 0))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bclr %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; clear bit, bit number is (sign/zero)_extended from HImode/QImode
+(define_insn "*bclrmemqi_ext"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+m")
+	(const_int 1)
+	(minus:SI (const_int 7)
+	    (match_operator:SI 2 "extend_operator"
+		[(match_operand 1 "general_operand" "d")])))
+    (const_int 0))]
+  ""
+{
+  CC_STATUS_INIT;
+  return "bclr %1,%0";
+}
+  [(set_attr "type" "bitrw")])
+
+;; Special cases of bit-field insns which we should
+;; recognize in preference to the general case.
+;; These handle aligned 8-bit and 16-bit fields,
+;; which can usually be done with move instructions.
+
+;
+; Special case for 32-bit field in memory.  This only occurs when 32-bit
+; alignment of structure members is specified.
+;
+; The move is allowed to be odd byte aligned, because that's still faster
+; than an odd byte aligned bit-field instruction.
+;
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (const_int 32)
+			 (match_operand:SI 1 "const_int_operand" "n"))
+	(match_operand:SI 2 "general_src_operand" "rmSi"))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[1]) % 8) == 0
+   && ! mode_dependent_address_p (XEXP (operands[0], 0))"
+{
+  operands[0]
+    = adjust_address (operands[0], SImode, INTVAL (operands[1]) / 8);
+
+  return "move%.l %2,%0";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+do")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "register_operand" "d"))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[1]) == 8 || INTVAL (operands[1]) == 16)
+   && INTVAL (operands[2]) % INTVAL (operands[1]) == 0
+   && (GET_CODE (operands[0]) == REG
+       || ! mode_dependent_address_p (XEXP (operands[0], 0)))"
+{
+  if (REG_P (operands[0]))
+    {
+      if (INTVAL (operands[1]) + INTVAL (operands[2]) != 32)
+        return "bfins %3,%0{%b2:%b1}";
+    }
+  else
+    operands[0] = adjust_address (operands[0],
+				  INTVAL (operands[1]) == 8 ? QImode : HImode,
+				  INTVAL (operands[2]) / 8);
+
+  if (GET_CODE (operands[3]) == MEM)
+    operands[3] = adjust_address (operands[3],
+				  INTVAL (operands[1]) == 8 ? QImode : HImode,
+				  (32 - INTVAL (operands[1])) / 8);
+
+  if (INTVAL (operands[1]) == 8)
+    return "move%.b %3,%0";
+  return "move%.w %3,%0";
+})
+
+
+;
+; Special case for 32-bit field in memory.  This only occurs when 32-bit
+; alignment of structure members is specified.
+;
+; The move is allowed to be odd byte aligned, because that's still faster
+; than an odd byte aligned bit-field instruction.
+;
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(zero_extract:SI (match_operand:QI 1 "memory_src_operand" "oS")
+			 (const_int 32)
+			 (match_operand:SI 2 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) % 8) == 0
+   && ! mode_dependent_address_p (XEXP (operands[1], 0))"
+{
+  operands[1]
+    = adjust_address (operands[1], SImode, INTVAL (operands[2]) / 8);
+
+  return "move%.l %1,%0";
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&d")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "do")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0
+   && (GET_CODE (operands[1]) == REG
+       || ! mode_dependent_address_p (XEXP (operands[1], 0)))"
+{
+  cc_status.flags |= CC_NOT_NEGATIVE;
+  if (REG_P (operands[1]))
+    {
+      if (INTVAL (operands[2]) + INTVAL (operands[3]) != 32)
+	return "bfextu %1{%b3:%b2},%0";
+    }
+  else
+    operands[1]
+      = adjust_address (operands[1], SImode, INTVAL (operands[3]) / 8);
+
+  output_asm_insn ("clr%.l %0", operands);
+  if (GET_CODE (operands[0]) == MEM)
+    operands[0] = adjust_address (operands[0],
+				  INTVAL (operands[2]) == 8 ? QImode : HImode,
+				  (32 - INTVAL (operands[1])) / 8);
+
+  if (INTVAL (operands[2]) == 8)
+    return "move%.b %1,%0";
+  return "move%.w %1,%0";
+})
+
+;
+; Special case for 32-bit field in memory.  This only occurs when 32-bit
+; alignment of structure members is specified.
+;
+; The move is allowed to be odd byte aligned, because that's still faster
+; than an odd byte aligned bit-field instruction.
+;
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+	(sign_extract:SI (match_operand:QI 1 "memory_src_operand" "oS")
+			 (const_int 32)
+			 (match_operand:SI 2 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) % 8) == 0
+   && ! mode_dependent_address_p (XEXP (operands[1], 0))"
+{
+  operands[1]
+    = adjust_address (operands[1], SImode, INTVAL (operands[2]) / 8);
+
+  return "move%.l %1,%0";
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "do")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0
+   && (GET_CODE (operands[1]) == REG
+       || ! mode_dependent_address_p (XEXP (operands[1], 0)))"
+{
+  if (REG_P (operands[1]))
+    {
+      if (INTVAL (operands[2]) + INTVAL (operands[3]) != 32)
+	return "bfexts %1{%b3:%b2},%0";
+    }
+  else
+    operands[1]
+      = adjust_address (operands[1],
+			INTVAL (operands[2]) == 8 ? QImode : HImode,
+			INTVAL (operands[3]) / 8);
+
+  if (INTVAL (operands[2]) == 8)
+    return "move%.b %1,%0\;extb%.l %0";
+  return "move%.w %1,%0\;ext%.l %0";
+})
+
+;; Bit-field instructions, general cases.
+;; "o,d" constraint causes a nonoffsettable memref to match the "o"
+;; so that its address is reloaded.
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "general_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extract:SI (match_operand:QI 1 "memory_operand" "o")
+			 (match_operand:SI 2 "nonmemory_operand" "dn")
+			 (match_operand:SI 3 "nonmemory_operand" "dn")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "bfexts %1{%b3:%b2},%0")
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "general_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extract:SI (match_operand:QI 1 "memory_operand" "o")
+			 (match_operand:SI 2 "nonmemory_operand" "dn")
+			 (match_operand:SI 3 "nonmemory_operand" "dn")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) != 32)
+	cc_status.flags |= CC_NOT_NEGATIVE;
+    }
+  else
+    {
+      CC_STATUS_INIT;
+    }
+  return "bfextu %1{%b3:%b2},%0";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "nonmemory_operand" "dn")
+			 (match_operand:SI 2 "nonmemory_operand" "dn"))
+        (xor:SI (zero_extract:SI (match_dup 0) (match_dup 1) (match_dup 2))
+		(match_operand 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD
+   && (INTVAL (operands[3]) == -1
+       || (GET_CODE (operands[1]) == CONST_INT
+           && (~ INTVAL (operands[3]) & ((1 << INTVAL (operands[1]))- 1)) == 0))"
+{
+  CC_STATUS_INIT;
+  return "bfchg %0{%b2:%b1}";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "nonmemory_operand" "dn")
+			 (match_operand:SI 2 "nonmemory_operand" "dn"))
+	(const_int 0))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfclr %0{%b2:%b1}";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "general_operand" "dn")
+			 (match_operand:SI 2 "general_operand" "dn"))
+	(const_int -1))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfset %0{%b2:%b1}";
+})
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "nonimmediate_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand:SI 3 "register_operand" ""))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+o")
+			 (match_operand:SI 1 "nonmemory_operand" "dn")
+			 (match_operand:SI 2 "nonmemory_operand" "dn"))
+	(match_operand:SI 3 "register_operand" "d"))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "bfins %3,%0{%b2:%b1}")
+
+;; Now recognize bit-field insns that operate on registers
+;; (or at least were intended to do so).
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+  "bfexts %1{%b3:%b2},%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) != 32)
+	cc_status.flags |= CC_NOT_NEGATIVE;
+    }
+  else
+    {
+      CC_STATUS_INIT;
+    }
+  return "bfextu %1{%b3:%b2},%0";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(const_int 0))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfclr %0{%b2:%b1}";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(const_int -1))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  CC_STATUS_INIT;
+  return "bfset %0{%b2:%b1}";
+})
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+d")
+			 (match_operand:SI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "register_operand" "d"))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+#if 0
+  /* These special cases are now recognized by a specific pattern.  */
+  if (GET_CODE (operands[1]) == CONST_INT && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[1]) == 16 && INTVAL (operands[2]) == 16)
+    return "move%.w %3,%0";
+  if (GET_CODE (operands[1]) == CONST_INT && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[1]) == 24 && INTVAL (operands[2]) == 8)
+    return "move%.b %3,%0";
+#endif
+  return "bfins %3,%0{%b2:%b1}";
+})
+
+;; Special patterns for optimizing bit-field instructions.
+
+(define_insn ""
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "o")
+				  (match_operand:SI 1 "const_int_operand" "n")
+				  (match_operand:SI 2 "general_operand" "dn"))
+	         (const_int 0)))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (operands[1] == const1_rtx
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      int width = GET_CODE (operands[0]) == REG ? 31 : 7;
+      return output_btst (operands,
+			  GEN_INT (width - INTVAL (operands[2])),
+			  operands[0], insn, 1000);
+      /* Pass 1000 as SIGNPOS argument so that btst will
+         not think we are testing the sign bit for an `and'
+	 and assume that nonzero implies a negative result.  */
+    }
+  if (INTVAL (operands[1]) != 32)
+    cc_status.flags = CC_NOT_NEGATIVE;
+  return "bftst %0{%b2:%b1}";
+})
+
+
+;;; now handle the register cases
+(define_insn ""
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:SI 0 "register_operand" "d")
+				  (match_operand:SI 1 "const_int_operand" "n")
+			 	  (match_operand:SI 2 "general_operand" "dn"))
+		 (const_int 0)))]
+  "TARGET_68020 && TARGET_BITFIELD"
+{
+  if (operands[1] == const1_rtx
+      && GET_CODE (operands[2]) == CONST_INT)
+    {
+      int width = GET_CODE (operands[0]) == REG ? 31 : 7;
+      return output_btst (operands, GEN_INT (width - INTVAL (operands[2])),
+			  operands[0], insn, 1000);
+      /* Pass 1000 as SIGNPOS argument so that btst will
+         not think we are testing the sign bit for an `and'
+	 and assume that nonzero implies a negative result.  */
+    }
+  if (INTVAL (operands[1]) != 32)
+    cc_status.flags = CC_NOT_NEGATIVE;
+  return "bftst %0{%b2:%b1}";
+})
+
+(define_insn "scc0_di"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro") (const_int 0)]))]
+  "! TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], const0_rtx, operands[0]);
+})
+
+(define_insn "scc0_di_5200"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro") (const_int 0)]))]
+  "TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], const0_rtx, operands[0]);
+})
+
+(define_insn "scc_di"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=dm,dm")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro,r")
+       (match_operand:DI 3 "general_operand" "r,ro")]))]
+  "! TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], operands[3], operands[0]);
+})
+
+(define_insn "scc_di_5200"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d")
+    (match_operator 1 "ordered_comparison_operator"
+      [(match_operand:DI 2 "general_operand" "ro,r")
+       (match_operand:DI 3 "general_operand" "r,ro")]))]
+  "TARGET_COLDFIRE"
+{
+  return output_scc_di (operands[1], operands[2], operands[3], operands[0]);
+})
+
+;; Note that operand 0 of an SCC insn is supported in the hardware as
+;; memory, but we cannot allow it to be in memory in case the address
+;; needs to be reloaded.
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(eq:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("seq %0", "fseq %0", "seq %0");
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ne:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("sne %0", "fsne %0", "sne %0");
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(gt:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("sgt %0", "fsgt %0", 0);
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(gtu:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  return "shi %0";
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(lt:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   OUTPUT_JUMP ("slt %0", "fslt %0", "smi %0");
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ltu:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   return "scs %0";
+})
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ge:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   OUTPUT_JUMP ("sge %0", "fsge %0", "spl %0");
+})
+
+(define_insn "*scc"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(geu:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   return "scc %0";
+}
+  [(set_attr "type" "scc")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(le:QI (cc0) (const_int 0)))]
+  ""
+{
+  cc_status = cc_prev_status;
+  OUTPUT_JUMP ("sle %0", "fsle %0", 0);
+})
+
+(define_insn "*sls"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(leu:QI (cc0) (const_int 0)))]
+  ""
+{
+   cc_status = cc_prev_status;
+   return "sls %0";
+}
+  [(set_attr "type" "scc")])
+
+(define_insn "*sordered_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ordered:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsor %0";
+})
+
+(define_insn "*sunordered_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unordered:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsun %0";
+})
+
+(define_insn "*suneq_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(uneq:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsueq %0";
+})
+
+(define_insn "*sunge_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unge:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsuge %0";
+})
+
+(define_insn "*sungt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ungt:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsugt %0";
+})
+
+(define_insn "*sunle_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unle:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsule %0";
+})
+
+(define_insn "*sunlt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(unlt:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsult %0";
+})
+
+(define_insn "*sltgt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(ltgt:QI (cc0) (const_int 0)))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsogl %0";
+})
+
+(define_insn "*fsogt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (unle:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsogt %0";
+})
+
+(define_insn "*fsoge_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (unlt:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsoge %0";
+})
+
+(define_insn "*fsolt_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (unge:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsolt %0";
+})
+
+(define_insn "*fsole_1"
+  [(set (match_operand:QI 0 "register_operand" "=d")
+	(not:QI (ungt:QI (cc0) (const_int 0))))]
+  "TARGET_68881 && !TUNE_68060"
+{
+  cc_status = cc_prev_status;
+  return "fsole %0";
+})
+
+;; Basic conditional jump instructions.
+
+(define_insn "beq0_di"
+  [(set (pc)
+    (if_then_else (eq (match_operand:DI 0 "general_operand" "d*ao,<>")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ","))
+        (pc)))
+   (clobber (match_scratch:SI 2 "=d,d"))]
+  ""
+{
+  CC_STATUS_INIT;
+  if (which_alternative == 1)
+    return "move%.l %0,%2\;or%.l %0,%2\;jeq %l1";
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return "jeq %l1";
+    }
+  if (GET_CODE (operands[0]) == REG)
+    operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[3] = adjust_address (operands[0], SImode, 4);
+  if (! ADDRESS_REG_P (operands[0]))
+    {
+      if (reg_overlap_mentioned_p (operands[2], operands[0]))
+	{
+	  if (reg_overlap_mentioned_p (operands[2], operands[3]))
+	    return "or%.l %0,%2\;jeq %l1";
+	  else
+	    return "or%.l %3,%2\;jeq %l1";
+	}
+      return "move%.l %0,%2\;or%.l %3,%2\;jeq %l1";
+    }
+  operands[4] = gen_label_rtx();
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    output_asm_insn ("tst%.l %0\;jne %l4\;tst%.l %3\;jeq %l1", operands);
+  else
+    output_asm_insn ("cmp%.w #0,%0\;jne %l4\;cmp%.w #0,%3\;jeq %l1", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				CODE_LABEL_NUMBER (operands[4]));
+  return "";
+})
+
+(define_insn "bne0_di"
+  [(set (pc)
+    (if_then_else (ne (match_operand:DI 0 "general_operand" "do,*a")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ","))
+        (pc)))
+   (clobber (match_scratch:SI 2 "=d,X"))]
+  ""
+{
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return "jne %l1";
+    }
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[0]) == REG)
+    operands[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else
+    operands[3] = adjust_address (operands[0], SImode, 4);
+  if (!ADDRESS_REG_P (operands[0]))
+    {
+      if (reg_overlap_mentioned_p (operands[2], operands[0]))
+	{
+	  if (reg_overlap_mentioned_p (operands[2], operands[3]))
+	    return "or%.l %0,%2\;jne %l1";
+	  else
+	    return "or%.l %3,%2\;jne %l1";
+	}
+      return "move%.l %0,%2\;or%.l %3,%2\;jne %l1";
+    }
+  if (TARGET_68020 || TARGET_COLDFIRE)
+    return "tst%.l %0\;jne %l1\;tst%.l %3\;jne %l1";
+  else
+    return "cmp%.w #0,%0\;jne %l1\;cmp%.w #0,%3\;jne %l1";
+})
+
+(define_insn "bge0_di"
+  [(set (pc)
+    (if_then_else (ge (match_operand:DI 0 "general_operand" "ro")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ""))
+        (pc)))]
+  ""
+{
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return cc_status.flags & CC_REVERSED ? "jle %l1" : "jpl %l1";
+    }
+  CC_STATUS_INIT;
+  if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (operands[0]))
+    output_asm_insn("tst%.l %0", operands);
+  else
+    {
+      /* On an address reg, cmpw may replace cmpl.  */
+      output_asm_insn("cmp%.w #0,%0", operands);
+    }
+  return "jpl %l1";
+})
+
+(define_insn "blt0_di"
+  [(set (pc)
+    (if_then_else (lt (match_operand:DI 0 "general_operand" "ro")
+            (const_int 0))
+        (label_ref (match_operand 1 "" ""))
+        (pc)))]
+  ""
+{
+  if ((cc_prev_status.value1
+      && rtx_equal_p (cc_prev_status.value1, operands[0]))
+    || (cc_prev_status.value2
+      && rtx_equal_p (cc_prev_status.value2, operands[0])))
+    {
+      cc_status = cc_prev_status;
+      return cc_status.flags & CC_REVERSED ? "jgt %l1" : "jmi %l1";
+    }
+  CC_STATUS_INIT;
+  if (TARGET_68020 || TARGET_COLDFIRE || ! ADDRESS_REG_P (operands[0]))
+    output_asm_insn("tst%.l %0", operands);
+  else
+    {
+      /* On an address reg, cmpw may replace cmpl.  */
+      output_asm_insn("cmp%.w #0,%0", operands);
+    }
+  return "jmi %l1";
+})
+
+(define_insn "beq"
+  [(set (pc)
+	(if_then_else (eq (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  OUTPUT_JUMP ("jeq %l0", "fjeq %l0", "jeq %l0");
+}
+  [(set (attr "type") (symbol_ref "m68k_sched_branch_type (insn)"))])
+
+(define_insn "bne"
+  [(set (pc)
+	(if_then_else (ne (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  OUTPUT_JUMP ("jne %l0", "fjne %l0", "jne %l0");
+}
+  [(set (attr "type") (symbol_ref "m68k_sched_branch_type (insn)"))])
+
+(define_insn "bgt"
+  [(set (pc)
+	(if_then_else (gt (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jgt %l0", "fjgt %l0", 0);
+}
+  [(set (attr "type") (symbol_ref "m68k_sched_branch_type (insn)"))])
+
+(define_insn "bgtu"
+  [(set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jhi %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "blt"
+  [(set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jlt %l0", "fjlt %l0", "jmi %l0");
+}
+  [(set (attr "type") (symbol_ref "m68k_sched_branch_type (insn)"))])
+
+(define_insn "bltu"
+  [(set (pc)
+	(if_then_else (ltu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcs %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bge"
+  [(set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jge %l0", "fjge %l0", "jpl %l0");
+})
+
+(define_insn "bgeu"
+  [(set (pc)
+	(if_then_else (geu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcc %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "ble"
+  [(set (pc)
+	(if_then_else (le (cc0)
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jle %l0", "fjle %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bleu"
+  [(set (pc)
+	(if_then_else (leu (cc0)
+			   (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jls %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "bordered"
+  [(set (pc)
+	(if_then_else (ordered (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjor %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunordered"
+  [(set (pc)
+	(if_then_else (unordered (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjun %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "buneq"
+  [(set (pc)
+	(if_then_else (uneq (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjueq %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunge"
+  [(set (pc)
+	(if_then_else (unge (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjuge %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bungt"
+  [(set (pc)
+	(if_then_else (ungt (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjugt %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunle"
+  [(set (pc)
+	(if_then_else (unle (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjule %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bunlt"
+  [(set (pc)
+	(if_then_else (unlt (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjult %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "bltgt"
+  [(set (pc)
+	(if_then_else (ltgt (cc0) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjogl %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+;; Negated conditional jump instructions.
+
+(define_insn "*beq_rev"
+  [(set (pc)
+	(if_then_else (eq (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  OUTPUT_JUMP ("jne %l0", "fjne %l0", "jne %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bne_rev"
+  [(set (pc)
+	(if_then_else (ne (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  OUTPUT_JUMP ("jeq %l0", "fjeq %l0", "jeq %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bgt_rev"
+  [(set (pc)
+	(if_then_else (gt (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jle %l0", "fjngt %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bgtu_rev"
+  [(set (pc)
+	(if_then_else (gtu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jls %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*blt_rev"
+  [(set (pc)
+	(if_then_else (lt (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jge %l0", "fjnlt %l0", "jpl %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bltu_rev"
+  [(set (pc)
+	(if_then_else (ltu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcc %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bge_rev"
+  [(set (pc)
+	(if_then_else (ge (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jlt %l0", "fjnge %l0", "jmi %l0");
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bgeu_rev"
+  [(set (pc)
+	(if_then_else (geu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jcs %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*ble_rev"
+  [(set (pc)
+	(if_then_else (le (cc0)
+			  (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  OUTPUT_JUMP ("jgt %l0", "fjnle %l0", 0);
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bleu_rev"
+  [(set (pc)
+	(if_then_else (leu (cc0)
+			   (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0)
+    {
+      cc_status.flags &= ~CC_OVERFLOW_UNUSABLE;
+      return 0;
+    }
+
+  return "jhi %l0";
+}
+  [(set_attr "type" "bcc")])
+
+(define_insn "*bordered_rev"
+  [(set (pc)
+	(if_then_else (ordered (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjun %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunordered_rev"
+  [(set (pc)
+	(if_then_else (unordered (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjor %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*buneq_rev"
+  [(set (pc)
+	(if_then_else (uneq (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjogl %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunge_rev"
+  [(set (pc)
+	(if_then_else (unge (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjolt %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bungt_rev"
+  [(set (pc)
+	(if_then_else (ungt (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjole %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunle_rev"
+  [(set (pc)
+	(if_then_else (unle (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjogt %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bunlt_rev"
+  [(set (pc)
+	(if_then_else (unlt (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjoge %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+(define_insn "*bltgt_rev"
+  [(set (pc)
+	(if_then_else (ltgt (cc0) (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  gcc_assert (cc_prev_status.flags & CC_IN_68881);
+  return "fjueq %l0";
+}
+  [(set_attr "type" "fbcc")])
+
+;; Unconditional and other jump instructions
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jra %l0"
+  [(set_attr "type" "bra")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "" ""))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+#ifdef CASE_VECTOR_PC_RELATIVE
+    operands[0] = gen_rtx_PLUS (SImode, pc_rtx,
+				gen_rtx_SIGN_EXTEND (SImode, operands[0]));
+#endif
+})
+
+;; Jump to variable address from dispatch table of absolute addresses.
+(define_insn "*tablejump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  return MOTOROLA ? "jmp (%0)" : "jmp %0@";
+}
+  [(set_attr "type" "jmp")])
+
+;; Jump to variable address from dispatch table of relative addresses.
+(define_insn ""
+  [(set (pc)
+	(plus:SI (pc)
+		 (sign_extend:SI (match_operand:HI 0 "register_operand" "r"))))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+#ifdef ASM_RETURN_CASE_JUMP
+  ASM_RETURN_CASE_JUMP;
+#else
+  if (TARGET_COLDFIRE)
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return MOTOROLA ? "jmp (2,pc,%0.l)" : "jmp pc@(2,%0:l)";
+      else if (MOTOROLA)
+	return "ext%.l %0\;jmp (2,pc,%0.l)";
+      else
+	return "extl %0\;jmp pc@(2,%0:l)";
+    }
+  else
+    return MOTOROLA ? "jmp (2,pc,%0.w)" : "jmp pc@(2,%0:w)";
+#endif
+})
+
+;; Decrement-and-branch insns.
+(define_insn "*dbne_hi"
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:HI 0 "nonimmediate_operand" "+d*g")
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.w #1,%0\;jcc %l1";
+  return "subq%.w #1,%0\;cmp%.w #-1,%0\;jne %l1";
+})
+
+(define_insn "*dbne_si"
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:SI 0 "nonimmediate_operand" "+d*g")
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1\;clr%.w %0\;subq%.l #1,%0\;jcc %l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.l #1,%0\;jcc %l1";
+  return "subq%.l #1,%0\;cmp%.l #-1,%0\;jne %l1";
+})
+
+;; Two dbra patterns that use REG_NOTES info generated by strength_reduce.
+
+(define_insn "*dbge_hi"
+  [(set (pc)
+	(if_then_else
+	  (ge (plus:HI (match_operand:HI 0 "nonimmediate_operand" "+d*am")
+		       (const_int -1))
+	      (const_int 0))
+	  (label_ref (match_operand 1 "" ""))
+	  (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE && find_reg_note (insn, REG_NONNEG, 0)"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.w #1,%0\;jcc %l1";
+  return "subq%.w #1,%0\;cmp%.w #-1,%0\;jne %l1";
+})
+
+(define_expand "decrement_and_branch_until_zero"
+  [(parallel [(set (pc)
+		   (if_then_else
+		    (ge (plus:SI (match_operand:SI 0 "nonimmediate_operand" "")
+				 (const_int -1))
+			(const_int 0))
+		    (label_ref (match_operand 1 "" ""))
+		    (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0)
+			    (const_int -1)))])]
+  ""
+  "")
+
+(define_insn "*dbge_si"
+  [(set (pc)
+	(if_then_else
+	  (ge (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+d*am")
+		       (const_int -1))
+	      (const_int 0))
+	  (label_ref (match_operand 1 "" ""))
+	  (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_COLDFIRE && find_reg_note (insn, REG_NONNEG, 0)"
+{
+  CC_STATUS_INIT;
+  if (DATA_REG_P (operands[0]))
+    return "dbra %0,%l1\;clr%.w %0\;subq%.l #1,%0\;jcc %l1";
+  if (GET_CODE (operands[0]) == MEM)
+    return "subq%.l #1,%0\;jcc %l1";
+  return "subq%.l #1,%0\;cmp%.l #-1,%0\;jne %l1";
+})
+
+(define_expand "sibcall"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  operands[0] = m68k_legitimize_sibcall_address (operands[0]);
+})
+
+(define_insn "*sibcall"
+  [(call (mem:QI (match_operand:SI 0 "sibcall_operand" ""))
+	 (match_operand:SI 1 "general_operand" ""))]
+  "SIBLING_CALL_P (insn)"
+{
+  return output_sibcall (operands[0]);
+})
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "")
+	      (match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  operands[1] = m68k_legitimize_sibcall_address (operands[1]);
+})
+
+(define_insn "*sibcall_value"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "sibcall_operand" ""))
+	      (match_operand:SI 2 "general_operand" "")))]
+  "SIBLING_CALL_P (insn)"
+{
+  operands[0] = operands[1];
+  return output_sibcall (operands[0]);
+})
+
+;; Call subroutine with no return value.
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand:SI 1 "general_operand" ""))]
+  ;; Operand 1 not really used on the m68000.
+  ""
+{
+  operands[0] = m68k_legitimize_call_address (operands[0]);
+})
+
+(define_insn "*call"
+  [(call (mem:QI (match_operand:SI 0 "call_operand" "a,W"))
+	 (match_operand:SI 1 "general_operand" "g,g"))]
+  ;; Operand 1 not really used on the m68000.
+  "!SIBLING_CALL_P (insn)"
+{
+  return output_call (operands[0]);
+}
+  [(set_attr "type" "jsr")])
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "")
+	      (match_operand:SI 2 "general_operand" "")))]
+  ;; Operand 2 not really used on the m68000.
+  ""
+{
+  operands[1] = m68k_legitimize_call_address (operands[1]);
+})
+
+(define_insn "*non_symbolic_call_value"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "non_symbolic_call_operand" "a,W"))
+	      (match_operand:SI 2 "general_operand" "g,g")))]
+  ;; Operand 2 not really used on the m68000.
+  "!SIBLING_CALL_P (insn)"
+  "jsr %a1"
+  [(set_attr "type" "jsr")
+   (set_attr "opx" "1")])
+
+(define_insn "*symbolic_call_value_jsr"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "symbolic_operand" "a,W"))
+	      (match_operand:SI 2 "general_operand" "g,g")))]
+  ;; Operand 2 not really used on the m68000.
+  "!SIBLING_CALL_P (insn) && m68k_symbolic_call_var == M68K_SYMBOLIC_CALL_JSR"
+{
+  operands[0] = operands[1];
+  return m68k_symbolic_call;
+}
+  [(set_attr "type" "jsr")
+   (set_attr "opx" "1")])
+
+(define_insn "*symbolic_call_value_bsr"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:QI (match_operand:SI 1 "symbolic_operand" "a,W"))
+	      (match_operand:SI 2 "general_operand" "g,g")))]
+  ;; Operand 2 not really used on the m68000.
+  "!SIBLING_CALL_P (insn)
+   && (m68k_symbolic_call_var == M68K_SYMBOLIC_CALL_BSR_C
+       || m68k_symbolic_call_var == M68K_SYMBOLIC_CALL_BSR_P)"
+{
+  operands[0] = operands[1];
+  return m68k_symbolic_call;
+}
+  [(set_attr "type" "bsr")
+   (set_attr "opx" "1")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "NEEDS_UNTYPED_CALL"
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  "")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "nop")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  m68k_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  m68k_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  m68k_expand_epilogue (true);
+  DONE;
+})
+
+;; Used for frameless functions which save no regs and allocate no locals.
+(define_expand "return"
+  [(return)]
+  "m68k_use_return_insn ()"
+  "")
+
+(define_insn "*return"
+  [(return)]
+  ""
+{
+  switch (m68k_get_function_kind (current_function_decl))
+    {
+    case m68k_fk_interrupt_handler:
+      return "rte";
+
+    case m68k_fk_interrupt_thread:
+      return "sleep";
+
+    default:
+      if (crtl->args.pops_args)
+	{
+	  operands[0] = GEN_INT (crtl->args.pops_args);
+	  return "rtd %0";
+	}
+      else
+	return "rts";
+    }
+}
+  [(set_attr "type" "rts")])
+
+(define_insn "*m68k_store_multiple"
+  [(match_parallel 0 "" [(match_operand 1 "")])]
+  "m68k_movem_pattern_p (operands[0], NULL, 0, true)"
+{
+  return m68k_output_movem (operands, operands[0], 0, true);
+})
+
+(define_insn "*m68k_store_multiple_automod"
+  [(match_parallel 0 ""
+     [(set (match_operand:SI 1 "register_operand" "=a")
+	   (plus:SI (match_operand:SI 2 "register_operand" "1")
+		    (match_operand:SI 3 "const_int_operand")))])]
+  "m68k_movem_pattern_p (operands[0], operands[1], INTVAL (operands[3]), true)"
+{
+  return m68k_output_movem (operands, operands[0], INTVAL (operands[3]), true);
+})
+
+(define_insn "*m68k_load_multiple"
+  [(match_parallel 0 "" [(match_operand 1 "")])]
+  "m68k_movem_pattern_p (operands[0], NULL, 0, false)"
+{
+  return m68k_output_movem (operands, operands[0], 0, false);
+})
+
+(define_insn "*m68k_load_multiple_automod"
+  [(match_parallel 0 ""
+     [(set (match_operand:SI 1 "register_operand" "=a")
+	   (plus:SI (match_operand:SI 2 "register_operand" "1")
+		    (match_operand:SI 3 "const_int_operand")))])]
+  "m68k_movem_pattern_p (operands[0], operands[1],
+			 INTVAL (operands[3]), false)"
+{
+  return m68k_output_movem (operands, operands[0],
+			    INTVAL (operands[3]), false);
+})
+
+(define_expand "link"
+  [(parallel
+       [(set (match_operand:SI 0 "register_operand")
+	     (plus:SI (reg:SI SP_REG) (const_int -4)))
+	(set (match_dup 2)
+	     (match_dup 0))
+	(set (reg:SI SP_REG)
+	     (plus:SI (reg:SI SP_REG)
+		      (match_operand:SI 1 "const_int_operand")))])]
+  "TARGET_68020 || INTVAL (operands[1]) >= -0x8004"
+{
+  operands[2] = gen_frame_mem (SImode, plus_constant (stack_pointer_rtx, -4));
+})
+
+(define_insn "*link"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (mem:SI (plus:SI (reg:SI SP_REG) (const_int -4)))
+	(match_dup 0))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 1 "const_int_operand")))]
+  "TARGET_68020 || INTVAL (operands[1]) >= -0x8004"
+{
+  operands[1] = GEN_INT (INTVAL (operands[1]) + 4);
+  if (!MOTOROLA)
+    return "link %0,%1";
+  else if (INTVAL (operands[1]) >= -0x8000)
+    return "link.w %0,%1";
+  else
+    return "link.l %0,%1";
+}
+  [(set_attr "type" "link")])
+
+(define_expand "unlink"
+  [(parallel
+      [(set (match_operand:SI 0 "register_operand")
+	    (match_dup 1))
+       (set (reg:SI SP_REG)
+	    (plus:SI (match_dup 0)
+		     (const_int 4)))])]
+  ""
+{
+  operands[1] = gen_frame_mem (SImode, copy_rtx (operands[0]));
+})
+
+(define_insn "*unlink"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(mem:SI (match_dup 0)))
+   (set (reg:SI SP_REG)
+	(plus:SI (match_dup 0)
+		 (const_int 4)))]
+  ""
+  "unlk %0"
+  [(set_attr "type" "unlk")])
+
+(define_insn "load_got"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))]
+  ""
+{
+  if (TARGET_ID_SHARED_LIBRARY)
+    {
+      operands[1] = gen_rtx_REG (Pmode, PIC_REG);
+      return MOTOROLA ? "move.l %?(%1),%0" : "movel %1@(%?), %0";
+    }
+  else if (MOTOROLA)
+    {
+      if (TARGET_COLDFIRE)
+	/* Load the full 32-bit PC-relative offset of
+	   _GLOBAL_OFFSET_TABLE_ into the PIC register, then use it to
+	   calculate the absolute value.  The offset and "lea"
+	   operation word together occupy 6 bytes.  */
+	return ("move.l #_GLOBAL_OFFSET_TABLE_@GOTPC, %0\n\t"
+		"lea (-6, %%pc, %0), %0");
+      else
+	return "lea (%%pc, _GLOBAL_OFFSET_TABLE_@GOTPC), %0";
+    }
+  else
+    return ("movel #_GLOBAL_OFFSET_TABLE_, %0\n\t"
+	    "lea %%pc@(0,%0:l),%0");
+})
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  ""
+  "jmp %a0"
+  [(set_attr "type" "jmp")])
+
+;; This should not be used unless the add/sub insns can't be.
+
+(define_insn "*lea"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=a")
+	(match_operand:QI 1 "address_operand" "p"))]
+  ""
+  "lea %a1,%0")
+
+;; This is the first machine-dependent peephole optimization.
+;; It is useful when a floating value is returned from a function call
+;; and then is moved into an FP register.
+;; But it is mainly intended to test the support for these optimizations.
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+   (set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))]
+  "FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
+  [(set (mem:SI (reg:SI SP_REG)) (match_dup 1))
+   (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 2))
+   (set (match_dup 0) (mem:DF (post_inc:SI (reg:SI SP_REG))))]
+  "split_di(operands + 1, 1, operands + 1, operands + 2);")
+
+;; Optimize a stack-adjust followed by a push of an argument.
+;; This is said to happen frequently with -msoft-float
+;; when there are consecutive library calls.
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+   (set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[0])"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);")
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand:SI 0 "const_int_operand" "")))
+   (set (match_operand:SF 1 "push_operand" "")
+	(match_operand:SF 2 "general_operand" ""))]
+  "INTVAL (operands[0]) > 4
+   && !reg_mentioned_p (stack_pointer_rtx, operands[2])"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 0)))
+   (set (match_dup 1) (match_dup 2))]
+{
+  operands[0] = GEN_INT (INTVAL (operands[0]) - 4);
+  operands[1] = replace_equiv_address (operands[1], stack_pointer_rtx);
+})
+
+;; Speed up stack adjust followed by a fullword fixedpoint push.
+;; Constant operands need special care, as replacing a "pea X.w" with
+;; "move.l #X,(%sp)" is often not a win.
+
+;; Already done by the previous csa pass, left as reference.
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+   (set (match_operand:SI 0 "push_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);")
+
+;; Try to use moveq, after stack push has been changed into a simple move.
+(define_peephole2
+  [(match_scratch:SI 2 "d")
+   (set (match_operand:SI 0 "memory_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
+   && INTVAL (operands[1]) != 0
+   && IN_RANGE (INTVAL (operands[1]), -0x80, 0x7f)
+   && !valid_mov3q_const (INTVAL (operands[1]))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))])
+
+;; This sequence adds an instruction, but is two bytes shorter.
+(define_peephole2
+  [(match_scratch:SI 2 "d")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 12)))
+   (set (match_operand:SI 0 "push_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "INTVAL (operands[1]) != 0
+   && IN_RANGE (INTVAL (operands[1]), -0x80, 0x7f)
+   && !valid_mov3q_const (INTVAL (operands[1]))"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+   (set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);")
+
+;; Changing pea X.w into a move.l is no real win here.
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand:SI 0 "const_int_operand" "")))
+   (set (match_operand:SI 1 "push_operand" "")
+	(match_operand:SI 2 "general_operand" ""))]
+  "INTVAL (operands[0]) > 4
+   && !reg_mentioned_p (stack_pointer_rtx, operands[2])
+   && !(CONST_INT_P (operands[2]) && INTVAL (operands[2]) != 0
+	&& IN_RANGE (INTVAL (operands[2]), -0x8000, 0x7fff)
+	&& !valid_mov3q_const (INTVAL (operands[2])))"
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 0)))
+   (set (match_dup 1) (match_dup 2))]
+{
+  operands[0] = GEN_INT (INTVAL (operands[0]) - 4);
+  operands[1] = replace_equiv_address (operands[1], stack_pointer_rtx);
+})
+
+;; Speed up pushing a single byte/two bytes but leaving four bytes of space
+;; (which differs slightly between m680x0 and ColdFire).
+
+(define_peephole2
+  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+   (set (match_operand:QI 0 "memory_operand" "")
+	(match_operand:QI 1 "register_operand" ""))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])
+   && GET_CODE (XEXP (operands[0], 0)) == PLUS
+   && rtx_equal_p (XEXP (XEXP (operands[0], 0), 0), stack_pointer_rtx)
+   && CONST_INT_P (XEXP (XEXP (operands[0], 0), 1))
+   && INTVAL (XEXP (XEXP (operands[0], 0), 1)) == 3"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+  operands[0] = adjust_automodify_address (operands[0], SImode, addr, -3);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+})
+
+(define_peephole2
+  [(set (match_operand:QI 0 "push_operand" "")
+	(match_operand:QI 1 "register_operand" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -3)))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_automodify_address (operands[0], SImode,
+					   XEXP (operands[0], 0), -3);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], QImode, 0);
+})
+
+(define_peephole2
+  [(set (match_operand:HI 0 "push_operand" "")
+	(match_operand:HI 1 "register_operand" ""))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -2)))]
+  "!reg_mentioned_p (stack_pointer_rtx, operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = adjust_automodify_address (operands[0], SImode,
+					   XEXP (operands[0], 0), -2);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+})
+
+;; Optimize a series of strict_low_part assignments
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(const_int 0))
+   (set (strict_low_part (match_operand:HI 1 "register_operand" ""))
+	(match_operand:HI 2 "general_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && strict_low_part_peephole_ok (HImode, insn, operands[0])"
+  [(set (strict_low_part (match_dup 1)) (match_dup 2))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(const_int 0))
+   (set (strict_low_part (match_operand:QI 1 "register_operand" ""))
+	(match_operand:QI 2 "general_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && strict_low_part_peephole_ok (QImode, insn, operands[0])"
+  [(set (strict_low_part (match_dup 1)) (match_dup 2))]
+  "")
+
+;; dbCC peepholes
+;;
+;; Turns
+;;   loop:
+;;           [ ... ]
+;;           jCC label		; abnormal loop termination
+;;           dbra dN, loop	; normal loop termination
+;;
+;; Into
+;;   loop:
+;;           [ ... ]
+;;           dbCC dN, loop
+;;           jCC label
+;;
+;; Which moves the jCC condition outside the inner loop for free.
+;;
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (match_operand:HI 0 "register_operand" "")
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:HI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (match_operand:SI 0 "register_operand" "")
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:SI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ge (plus:HI (match_operand:HI 0 "register_operand" "")
+		         (const_int -1))
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:HI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+(define_peephole
+  [(set (pc) (if_then_else (match_operator 3 "valid_dbcc_comparison_p"
+                             [(cc0) (const_int 0)])
+                           (label_ref (match_operand 2 "" ""))
+                           (pc)))
+   (parallel
+    [(set (pc)
+	  (if_then_else
+	    (ge (plus:SI (match_operand:SI 0 "register_operand" "")
+		         (const_int -1))
+	        (const_int 0))
+	    (label_ref (match_operand 1 "" ""))
+	    (pc)))
+     (set (match_dup 0)
+	  (plus:SI (match_dup 0)
+		   (const_int -1)))])]
+  "!TARGET_COLDFIRE && DATA_REG_P (operands[0]) && ! flags_in_68881 ()"
+{
+  CC_STATUS_INIT;
+  output_dbcc_and_branch (operands);
+  return "";
+})
+
+
+(define_insn "extendsfxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=fm,f")
+	(float_extend:XF (match_operand:SF 1 "general_operand" "f,rmF")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "f%$move%.x %1,%0";
+    }
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1]))
+	return "f%$move%.x %1,%0";
+      else if (ADDRESS_REG_P (operands[1]))
+	return "move%.l %1,%-\;f%$move%.s %+,%0";
+      else if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_single (operands);
+      return "f%$move%.s %f1,%0";
+    }
+  return "fmove%.x %f1,%0";
+})
+
+
+(define_insn "extenddfxf2"
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=fm,f")
+	(float_extend:XF
+          (match_operand:DF 1 "general_operand" "f,rmE")))]
+  "TARGET_68881"
+{
+  if (FP_REG_P (operands[0]) && FP_REG_P (operands[1]))
+    {
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  /* Extending float to double in an fp-reg is a no-op.
+	     NOTICE_UPDATE_CC has already assumed that the
+	     cc will be set.  So cancel what it did.  */
+	  cc_status = cc_prev_status;
+	  return "";
+	}
+      return "fmove%.x %1,%0";
+    }
+  if (FP_REG_P (operands[0]))
+    {
+      if (REG_P (operands[1]))
+	{
+	  rtx xoperands[2];
+	  xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+	  output_asm_insn ("move%.l %1,%-", xoperands);
+	  output_asm_insn ("move%.l %1,%-", operands);
+	  return "f%&move%.d %+,%0";
+	}
+      if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	return output_move_const_double (operands);
+      return "f%&move%.d %f1,%0";
+    }
+  return "fmove%.x %f1,%0";
+})
+
+(define_insn "truncxfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,!r")
+	(float_truncate:DF
+          (match_operand:XF 1 "general_operand" "f,f")))]
+  "TARGET_68881"
+{
+  if (REG_P (operands[0]))
+    {
+      output_asm_insn ("fmove%.d %f1,%-\;move%.l %+,%0", operands);
+      operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      return "move%.l %+,%0";
+    }
+  return "fmove%.d %f1,%0";
+})
+
+(define_insn "truncxfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=dm")
+	(float_truncate:SF
+	  (match_operand:XF 1 "general_operand" "f")))]
+  "TARGET_68881"
+  "fmove%.s %f1,%0")
+
+(define_insn "sin<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(unspec:FP
+	  [(match_operand:FP 1 "general_operand" "f<FP:dreg>m")] UNSPEC_SIN))]
+  "TARGET_68881 && flag_unsafe_math_optimizations"
+{
+  if (FP_REG_P (operands[1]))
+    return "fsin%.x %1,%0";
+  else
+    return "fsin%.<FP:prec> %1,%0";
+})
+
+(define_insn "cos<mode>2"
+  [(set (match_operand:FP 0 "nonimmediate_operand" "=f")
+	(unspec:FP
+	  [(match_operand:FP 1 "general_operand" "f<FP:dreg>m")] UNSPEC_COS))]
+  "TARGET_68881 && flag_unsafe_math_optimizations"
+{
+  if (FP_REG_P (operands[1]))
+    return "fcos%.x %1,%0";
+  else
+    return "fcos%.<FP:prec> %1,%0";
+})
+
+;; Unconditional traps are assumed to have (const_int 1) for the condition.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 7))]
+  ""
+  "trap #7"
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrapdi4"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+{
+  if (operands[2] == const0_rtx)
+    emit_insn (gen_tstdi (operands[1]));
+  else
+    emit_insn (gen_cmpdi (operands[1], operands[2]));
+  operands[1] = cc0_rtx;
+  operands[2] = const0_rtx;
+})
+
+(define_expand "ctrapsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "general_operand" "")))
+   (trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+  "")
+
+(define_expand "ctraphi4"
+  [(set (cc0)
+	(compare (match_operand:HI 1 "nonimmediate_src_operand" "")
+		 (match_operand:HI 2 "general_src_operand" "")))
+   (trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+  "")
+
+(define_expand "ctrapqi4"
+  [(set (cc0)
+	(compare (match_operand:QI 1 "nonimmediate_src_operand" "")
+		 (match_operand:QI 2 "general_src_operand" "")))
+   (trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 3 "const1_operand" ""))]
+  "TARGET_68020"
+  "")
+
+(define_insn "*conditional_trap"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(cc0) (const_int 0)])
+	    (match_operand:SI 1 "const1_operand" "I"))]
+  "TARGET_68020 && ! flags_in_68881 ()"
+{
+  switch (GET_CODE (operands[0]))
+  {
+  case EQ:  return "trapeq";
+  case NE:  return "trapne";
+  case GT:  return "trapgt";
+  case GTU: return "traphi";
+  case LT:  return "traplt";
+  case LTU: return "trapcs";
+  case GE:  return "trapge";
+  case GEU: return "trapcc";
+  case LE:  return "traple";
+  case LEU: return "trapls";
+  default: gcc_unreachable ();
+  }
+})
+
+;; These are to prevent the scheduler from moving stores to the frame
+;; before the stack adjustment.
+(define_insn "stack_tie"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "type" "ignore")])
+
+;; Instruction that subscribes one word in ColdFire instruction buffer.
+;; This instruction is used within scheduler only and should not appear
+;; in the instruction stream.
+(define_insn "ib"
+  [(unspec [(const_int 0)] UNSPEC_IB)]
+  ""
+  "#"
+  [(set_attr "type" "ib")])
+
+(include "cf.md")
diff --git a/gcc/config/m68k/m68k.opt b/gcc/config/m68k/m68k.opt
new file mode 100644
index 000000000..d5aa9fa76
--- /dev/null
+++ b/gcc/config/m68k/m68k.opt
@@ -0,0 +1,188 @@
+; Options for the Motorola 68000 port of the compiler.
+
+; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m5200
+Target RejectNegative
+Generate code for a 520X
+
+m5206e
+Target RejectNegative
+Generate code for a 5206e
+
+m528x
+Target RejectNegative
+Generate code for a 528x
+
+m5307
+Target RejectNegative
+Generate code for a 5307
+
+m5407
+Target RejectNegative
+Generate code for a 5407
+
+m68000
+Target RejectNegative
+Generate code for a 68000
+
+m68010
+Target RejectNegative
+Generate code for a 68010
+
+m68020
+Target RejectNegative
+Generate code for a 68020
+
+m68020-40
+Target RejectNegative
+Generate code for a 68040, without any new instructions
+
+m68020-60
+Target RejectNegative
+Generate code for a 68060, without any new instructions
+
+m68030
+Target RejectNegative
+Generate code for a 68030
+
+m68040
+Target RejectNegative
+Generate code for a 68040
+
+m68060
+Target RejectNegative
+Generate code for a 68060
+
+m68302
+Target RejectNegative
+Generate code for a 68302
+
+m68332
+Target RejectNegative
+Generate code for a 68332
+
+; Has no effect on gcc
+m68851
+Target
+Generate code for a 68851
+
+m68881
+Target RejectNegative Mask(HARD_FLOAT)
+Generate code that uses 68881 floating-point instructions
+
+malign-int
+Target Report Mask(ALIGN_INT)
+Align variables on a 32-bit boundary
+
+march=
+Target RejectNegative Joined
+Specify the name of the target architecture
+
+mbitfield
+Target Report Mask(BITFIELD)
+Use the bit-field instructions
+
+mc68000
+Target RejectNegative
+Generate code for a 68000
+
+mc68020
+Target RejectNegative
+Generate code for a 68020
+
+mcfv4e
+Target RejectNegative
+Generate code for a ColdFire v4e
+
+mcpu=
+Target RejectNegative Joined
+Specify the target CPU
+
+mcpu32
+Target RejectNegative
+Generate code for a cpu32
+
+mdiv
+Target Report Mask(CF_HWDIV)
+Use hardware division instructions on ColdFire
+
+mfidoa
+Target RejectNegative
+Generate code for a Fido A
+
+mhard-float
+Target RejectNegative Mask(HARD_FLOAT) MaskExists
+Generate code which uses hardware floating point instructions
+
+mid-shared-library
+Target Report Mask(ID_SHARED_LIBRARY)
+Enable ID based shared library
+
+mnobitfield
+Target RejectNegative InverseMask(BITFIELD)
+Do not use the bit-field instructions
+
+mnortd
+Target RejectNegative InverseMask(RTD)
+Use normal calling convention
+
+mnoshort
+Target RejectNegative InverseMask(SHORT)
+Consider type 'int' to be 32 bits wide
+
+mpcrel
+Target Report Mask(PCREL)
+Generate pc-relative code
+
+mrtd
+Target Report Mask(RTD)
+Use different calling convention using 'rtd'
+
+msep-data
+Target Report Mask(SEP_DATA)
+Enable separate data segment
+
+mshared-library-id=
+Target RejectNegative Joined UInteger
+ID of shared library to build
+
+mshort
+Target Report Mask(SHORT)
+Consider type 'int' to be 16 bits wide
+
+msoft-float
+Target RejectNegative InverseMask(HARD_FLOAT)
+Generate code with library calls for floating point
+
+mstrict-align
+Target Report Mask(STRICT_ALIGNMENT)
+Do not use unaligned memory references
+
+mtune=
+Target RejectNegative Joined
+Tune for the specified target CPU or architecture
+
+mxgot
+Target Report Mask(XGOT)
+Support more than 8192 GOT entries on ColdFire
+
+mxtls
+Target Report Mask(XTLS)
+Support TLS segment larger than 64K
diff --git a/gcc/config/m68k/m68kelf.h b/gcc/config/m68k/m68kelf.h
new file mode 100644
index 000000000..d3fc41a4e
--- /dev/null
+++ b/gcc/config/m68k/m68kelf.h
@@ -0,0 +1,164 @@
+/* m68kelf support, derived from m68kv4.h */
+
+/* Target definitions for GNU compiler for mc680x0 running System V.4
+   Copyright (C) 1991, 1993, 2000, 2002, 2003, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+
+   Written by Ron Guilmette (rfg@netcom.com) and Fred Fish (fnf@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifndef SWBEG_ASM_OP
+#define SWBEG_ASM_OP "\t.swbeg\t"
+#endif
+
+/* Here are three prefixes that are used by asm_fprintf to
+   facilitate customization for alternate assembler syntaxes.
+   Machines with no likelihood of an alternate syntax need not
+   define these and need not use asm_fprintf.  */
+
+/* The prefix for register names.  Note that REGISTER_NAMES
+   is supposed to include this prefix. Also note that this is NOT an
+   fprintf format string, it is a literal string */
+
+#undef REGISTER_PREFIX
+#define REGISTER_PREFIX "%"
+
+/* The prefix for local (compiler generated) labels.
+   These labels will not appear in the symbol table.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* config/m68k.md has an explicit reference to the program counter,
+   prefix this by the register prefix.  */
+
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_COLDFIRE)				\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)				\
+do {								\
+  if ((LOG) > 0)						\
+    fprintf ((FILE), "%s%u\n", ALIGN_ASM_OP, 1 << (LOG));	\
+} while (0)
+
+/* Register in which address to store a structure value is passed to a
+   function.  The default in m68k.h is a1.  For m68k/SVR4 it is a0.  */
+
+#undef M68K_STRUCT_VALUE_REGNUM
+#define M68K_STRUCT_VALUE_REGNUM A0_REG
+
+/* The static chain regnum defaults to a0, but we use that for
+   structure return, so have to use a1 for the static chain.  */
+
+#undef STATIC_CHAIN_REGNUM
+#define STATIC_CHAIN_REGNUM A1_REG
+#undef M68K_STATIC_CHAIN_REG_NAME
+#define M68K_STATIC_CHAIN_REG_NAME REGISTER_PREFIX "a1"
+
+#define ASM_COMMENT_START "|"
+
+/* Define how the m68k registers should be numbered for Dwarf output.
+   The numbering provided here should be compatible with the native
+   SVR4 SDB debugger in the m68k/SVR4 reference port, where d0-d7
+   are 0-7, a0-a8 are 8-15, and fp0-fp7 are 16-23.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#if 0
+/* SVR4 m68k assembler is bitching on the `comm i,1,1' which askes for 
+   1 byte alignment. Don't generate alignment for COMMON seems to be
+   safer until we the assembler is fixed.  */
+#undef ASM_OUTPUT_ALIGNED_COMMON
+/* Same problem with this one.  */
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#endif
+
+#undef ASM_OUTPUT_COMMON
+#undef ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+/* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
+   keep switch tables in the text section.  */
+   
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* In m68k svr4, using swbeg is the standard way to do switch
+   table.  */
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE,PREFIX,NUM,TABLE)		\
+  fprintf ((FILE), "%s&%d\n", SWBEG_ASM_OP, XVECLEN (PATTERN (TABLE), 1));
+/* end of stuff from m68kv4.h */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s"
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC "crtbegin.o%s"
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#endif
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used
+   in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.
+
+   Try to use function `asm_output_aligned_bss' defined in file
+   `varasm.c' when defining this macro.  */
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
diff --git a/gcc/config/m68k/m68kemb.h b/gcc/config/m68k/m68kemb.h
new file mode 100644
index 000000000..5d917f91c
--- /dev/null
+++ b/gcc/config/m68k/m68kemb.h
@@ -0,0 +1,53 @@
+/* Definitions of target machine for GNU compiler.  "embedded" 68XXX.
+   This is meant to be included after m68k.h.
+   Copyright (C) 1994, 1995, 1998, 1999, 2004, 2006
+   Free Software Foundation, Inc.  */
+
+/* Override the SVR4 ABI for this target.  */
+
+#define PTRDIFF_TYPE "long int"
+#define SIZE_TYPE "long unsigned int"
+
+/* In order for bitfields to work on a 68000, or with -mnobitfield, we must
+   define either PCC_BITFIELD_TYPE_MATTERS or STRUCTURE_SIZE_BOUNDARY.
+   Defining STRUCTURE_SIZE_BOUNDARY results in structure packing problems,
+   so we define PCC_BITFIELD_TYPE_MATTERS.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Don't default to pcc-struct-return, so that we can return small structures
+   and unions in registers, which is slightly more efficient.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#undef FUNCTION_VALUE
+#define FUNCTION_VALUE(VALTYPE,FUNC) LIBCALL_VALUE (TYPE_MODE (VALTYPE))
+
+#undef LIBCALL_VALUE
+#define LIBCALL_VALUE(MODE)					\
+  m68k_libcall_value (MODE)
+
+#undef FUNCTION_VALUE_REGNO_P
+#define FUNCTION_VALUE_REGNO_P(N)			\
+  ((N) == D0_REG || (TARGET_68881 && (N) == FP0_REG))
+
+#undef NEEDS_UNTYPED_CALL
+#define NEEDS_UNTYPED_CALL 1
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__embedded__");		\
+    }						\
+  while (0)
+
+/* Override the default LIB_SPEC from gcc.c.  We don't currently support
+   profiling, or libg.a.  */
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* Make this be null, since we want the crt0.o to come from the linker
+   script */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC ""
diff --git a/gcc/config/m68k/math-68881.h b/gcc/config/m68k/math-68881.h
new file mode 100644
index 000000000..6d9f8b2d4
--- /dev/null
+++ b/gcc/config/m68k/math-68881.h
@@ -0,0 +1,529 @@
+/******************************************************************\
+*								   *
+*  <math-68881.h>		last modified: 23 May 1992.	   *
+*								   *
+*  Copyright (C) 1989 by Matthew Self.				   *
+*  You may freely distribute verbatim copies of this software	   *
+*  provided that this copyright notice is retained in all copies.  *
+*  You may distribute modifications to this software under the     *
+*  conditions above if you also clearly note such modifications    *
+*  with their author and date.			   	     	   *
+*								   *
+*  Note:  errno is not set to EDOM when domain errors occur for    *
+*  most of these functions.  Rather, it is assumed that the	   *
+*  68881's OPERR exception will be enabled and handled		   *
+*  appropriately by the	operating system.  Similarly, overflow	   *
+*  and underflow do not set errno to ERANGE.			   *
+*								   *
+*  Send bugs to Matthew Self (self@bayes.arc.nasa.gov).		   *
+*								   *
+\******************************************************************/
+
+/* This file is NOT a part of GCC, just distributed with it.  */
+
+/* If you find this in GCC,
+   please send bug reports to bug-gcc@prep.ai.mit.edu.  */
+
+/* Changed by Richard Stallman:
+   May 1993, add conditional to prevent multiple inclusion.
+   % inserted before a #.
+   New function `hypot' added.
+   Nans written in hex to avoid 0rnan.
+   May 1992, use %! for fpcr register.  Break lines before function names.
+   December 1989, add parens around `&' in pow.
+   November 1990, added alternate definition of HUGE_VAL for Sun.  */
+
+/* Changed by Jim Wilson:
+   September 1993, Use #undef before HUGE_VAL instead of #ifdef/#endif.  */
+
+/* Changed by Ian Lance Taylor:
+   September 1994, use extern inline instead of static inline.  */
+
+#ifndef __math_68881
+#define __math_68881
+
+#include <errno.h>
+
+#undef HUGE_VAL
+#ifdef __sun__
+/* The Sun assembler fails to handle the hex constant in the usual defn.  */
+#define HUGE_VAL							\
+({									\
+  static union { int i[2]; double d; } u = { {0x7ff00000, 0} };		\
+  u.d;									\
+})
+#else
+#define HUGE_VAL							\
+({									\
+  double huge_val;							\
+									\
+  __asm ("fmove%.d #0x7ff0000000000000,%0"	/* Infinity */		\
+	 : "=f" (huge_val)						\
+	 : /* no inputs */);						\
+  huge_val;								\
+})
+#endif
+
+__inline extern double
+sin (double x)
+{
+  double value;
+
+  __asm ("fsin%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+cos (double x)
+{
+  double value;
+
+  __asm ("fcos%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+tan (double x)
+{
+  double value;
+
+  __asm ("ftan%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+asin (double x)
+{
+  double value;
+
+  __asm ("fasin%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+acos (double x)
+{
+  double value;
+
+  __asm ("facos%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+atan (double x)
+{
+  double value;
+
+  __asm ("fatan%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+atan2 (double y, double x)
+{
+  double pi, pi_over_2;
+
+  __asm ("fmovecr%.x #0,%0"		/* extended precision pi */
+	 : "=f" (pi)
+	 : /* no inputs */ );
+  __asm ("fscale%.b #-1,%0"		/* no loss of accuracy */
+	 : "=f" (pi_over_2)
+	 : "0" (pi));
+  if (x > 0)
+    {
+      if (y > 0)
+	{
+	  if (x > y)
+	    return atan (y / x);
+	  else
+	    return pi_over_2 - atan (x / y);
+	}
+      else
+	{
+	  if (x > -y)
+	    return atan (y / x);
+	  else
+	    return - pi_over_2 - atan (x / y);
+	}
+    }
+  else
+    {
+      if (y < 0)
+	{
+	  if (-x > -y)
+	    return - pi + atan (y / x);
+	  else
+	    return - pi_over_2 - atan (x / y);
+	}
+      else
+	{
+	  if (-x > y)
+	    return pi + atan (y / x);
+	  else if (y > 0)
+	    return pi_over_2 - atan (x / y);
+	  else
+	    {
+	      double value;
+
+	      errno = EDOM;
+	      __asm ("fmove%.d #0x7fffffffffffffff,%0"	/* quiet NaN */
+		     : "=f" (value)
+		     : /* no inputs */);
+	      return value;
+	    }
+	}
+    }
+}
+
+__inline extern double
+sinh (double x)
+{
+  double value;
+
+  __asm ("fsinh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+cosh (double x)
+{
+  double value;
+
+  __asm ("fcosh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+tanh (double x)
+{
+  double value;
+
+  __asm ("ftanh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+atanh (double x)
+{
+  double value;
+
+  __asm ("fatanh%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+exp (double x)
+{
+  double value;
+
+  __asm ("fetox%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+expm1 (double x)
+{
+  double value;
+
+  __asm ("fetoxm1%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+log (double x)
+{
+  double value;
+
+  __asm ("flogn%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+log1p (double x)
+{
+  double value;
+
+  __asm ("flognp1%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+log10 (double x)
+{
+  double value;
+
+  __asm ("flog10%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+sqrt (double x)
+{
+  double value;
+
+  __asm ("fsqrt%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+hypot (double x, double y)
+{
+  return sqrt (x*x + y*y);
+}
+
+__inline extern double
+pow (double x, double y)
+{
+  if (x > 0)
+    return exp (y * log (x));
+  else if (x == 0)
+    {
+      if (y > 0)
+	return 0.0;
+      else
+	{
+	  double value;
+
+	  errno = EDOM;
+	  __asm ("fmove%.d #0x7fffffffffffffff,%0"		/* quiet NaN */
+		 : "=f" (value)
+		 : /* no inputs */);
+	  return value;
+	}
+    }
+  else
+    {
+      double temp;
+
+      __asm ("fintrz%.x %1,%0"
+	     : "=f" (temp)			/* integer-valued float */
+	     : "f" (y));
+      if (y == temp)
+        {
+	  int i = (int) y;
+
+	  if ((i & 1) == 0)			/* even */
+	    return exp (y * log (-x));
+	  else
+	    return - exp (y * log (-x));
+        }
+      else
+        {
+	  double value;
+
+	  errno = EDOM;
+	  __asm ("fmove%.d #0x7fffffffffffffff,%0"		/* quiet NaN */
+		 : "=f" (value)
+		 : /* no inputs */);
+	  return value;
+        }
+    }
+}
+
+__inline extern double
+fabs (double x)
+{
+  double value;
+
+  __asm ("fabs%.x %1,%0"
+	 : "=f" (value)
+	 : "f" (x));
+  return value;
+}
+
+__inline extern double
+ceil (double x)
+{
+  int rounding_mode, round_up;
+  double value;
+
+  __asm volatile ("fmove%.l %!,%0"
+		  : "=dm" (rounding_mode)
+		  : /* no inputs */ );
+  round_up = rounding_mode | 0x30;
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (round_up));
+  __asm volatile ("fint%.x %1,%0"
+		  : "=f" (value)
+		  : "f" (x));
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (rounding_mode));
+  return value;
+}
+
+__inline extern double
+floor (double x)
+{
+  int rounding_mode, round_down;
+  double value;
+
+  __asm volatile ("fmove%.l %!,%0"
+		  : "=dm" (rounding_mode)
+		  : /* no inputs */ );
+  round_down = (rounding_mode & ~0x10)
+		| 0x20;
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (round_down));
+  __asm volatile ("fint%.x %1,%0"
+		  : "=f" (value)
+		  : "f" (x));
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (rounding_mode));
+  return value;
+}
+
+__inline extern double
+rint (double x)
+{
+  int rounding_mode, round_nearest;
+  double value;
+
+  __asm volatile ("fmove%.l %!,%0"
+		  : "=dm" (rounding_mode)
+		  : /* no inputs */ );
+  round_nearest = rounding_mode & ~0x30;
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (round_nearest));
+  __asm volatile ("fint%.x %1,%0"
+		  : "=f" (value)
+		  : "f" (x));
+  __asm volatile ("fmove%.l %0,%!"
+		  : /* no outputs */
+		  : "dmi" (rounding_mode));
+  return value;
+}
+
+__inline extern double
+fmod (double x, double y)
+{
+  double value;
+
+  __asm ("fmod%.x %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "f" (y));
+  return value;
+}
+
+__inline extern double
+drem (double x, double y)
+{
+  double value;
+
+  __asm ("frem%.x %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "f" (y));
+  return value;
+}
+
+__inline extern double
+scalb (double x, int n)
+{
+  double value;
+
+  __asm ("fscale%.l %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "dmi" (n));
+  return value;
+}
+
+__inline extern double
+logb (double x)
+{
+  double exponent;
+
+  __asm ("fgetexp%.x %1,%0"
+	 : "=f" (exponent)
+	 : "f" (x));
+  return exponent;
+}
+
+__inline extern double
+ldexp (double x, int n)
+{
+  double value;
+
+  __asm ("fscale%.l %2,%0"
+	 : "=f" (value)
+	 : "0" (x),
+	   "dmi" (n));
+  return value;
+}
+
+__inline extern double
+frexp (double x, int *exp)
+{
+  double float_exponent;
+  int int_exponent;
+  double mantissa;
+
+  __asm ("fgetexp%.x %1,%0"
+	 : "=f" (float_exponent)	/* integer-valued float */
+	 : "f" (x));
+  int_exponent = (int) float_exponent;
+  __asm ("fgetman%.x %1,%0"
+	 : "=f" (mantissa)		/* 1.0 <= mantissa < 2.0 */
+	 : "f" (x));
+  if (mantissa != 0)
+    {
+      __asm ("fscale%.b #-1,%0"
+	     : "=f" (mantissa)		/* mantissa /= 2.0 */
+	     : "0" (mantissa));
+      int_exponent += 1;
+    }
+  *exp = int_exponent;
+  return mantissa;
+}
+
+__inline extern double
+modf (double x, double *ip)
+{
+  double temp;
+
+  __asm ("fintrz%.x %1,%0"
+	 : "=f" (temp)			/* integer-valued float */
+	 : "f" (x));
+  *ip = temp;
+  return x - temp;
+}
+
+#endif /* not __math_68881 */
diff --git a/gcc/config/m68k/netbsd-elf.h b/gcc/config/m68k/netbsd-elf.h
new file mode 100644
index 000000000..1238d26e7
--- /dev/null
+++ b/gcc/config/m68k/netbsd-elf.h
@@ -0,0 +1,315 @@
+/* Definitions of target machine for GNU compiler,
+   for m68k (including m68010) NetBSD platforms using the
+   ELF object format.
+   Copyright (C) 2002, 2003, 2004, 2006, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Wasabi Systems. Inc.
+
+   This file is derived from <m68k/m68kv4.h>, <m68k/m68kelf.h>,
+   and <m68k/linux.h>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+      builtin_define ("__m68k__");		\
+      builtin_define ("__SVR4_ABI__");		\
+      builtin_define ("__motorola__");		\
+      if (TARGET_HARD_FLOAT)			\
+	builtin_define ("__HAVE_FPU__");	\
+    }						\
+  while (0)
+
+/* Don't try using XFmode on the 68010.  */ 
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_68020 ? 80 : 64)
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#ifdef __mc68010__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "netbsd_entry_point",   NETBSD_ENTRY_POINT },
+
+
+#undef TARGET_VERSION
+#define TARGET_VERSION			\
+  fprintf (stderr,			\
+	   TARGET_68010			\
+	   ? " (NetBSD/68010 ELF)"	\
+	   : " (NetBSD/m68k ELF)");
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD m68k targets.  Currently we
+   deal with the GCC option '-posix', as well as an indication as to
+   whether or not use of the FPU is allowed.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+
+/* Provide an ASM_SPEC appropriate for NetBSD m68k ELF targets.  We need
+   to pass PIC code generation options.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%(asm_cpu_spec) %{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/m68k ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#define NETBSD_ENTRY_POINT "_start"
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function only.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+do									\
+  {									\
+    asm_fprintf (FILE, "\tlea (%LLP%d,%Rpc),%Ra1\n", (LABELNO));	\
+    if (flag_pic)							\
+      fprintf (FILE, "\tbsr.l __mcount@PLTPC\n");			\
+    else								\
+      fprintf (FILE, "\tjbsr __mcount\n");				\
+  }									\
+while (0)
+
+
+/* Make gcc agree with <machine/ansi.h>  */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+
+/* XXX
+   Here is a bunch of stuff lifted from m68kelf.h.  We don't use that
+   file directly, because it has a lot of baggage we don't want.  */
+
+
+/* The prefix for register names.  Note that REGISTER_NAMES
+   is supposed to include this prefix.  Also note that this is NOT an
+   fprintf format string, it is a literal string.  */
+
+#undef REGISTER_PREFIX
+#define REGISTER_PREFIX "%"
+
+
+/* The prefix for local (compiler generated) lables.
+   These labels will not appear in the symbol table.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "|"
+
+
+/* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
+   keep switch tables in the text section.  */
+
+#undef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* Use the default action for outputting the case label.  */
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_COLDFIRE)				\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
+
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG)					\
+do									\
+  {									\
+    if ((LOG) > 0)							\
+      fprintf ((FILE), "%s%u\n", ALIGN_ASM_OP, 1 << (LOG));		\
+  }									\
+while (0)
+
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as uninitialized global
+   data.  */
+
+#define BSS_SECTION_ASM_OP	".section\t.bss"
+
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used
+   in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.
+
+   Try to use function `asm_output_aligned_bss' defined in file
+   `varasm.c' when defining this macro.  */
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+
+#undef ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)			\
+( fputs (".comm ", (FILE)),						\
+  assemble_name ((FILE), (NAME)),					\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+#undef ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)			\
+( fputs (".lcomm ", (FILE)),						\
+  assemble_name ((FILE), (NAME)),					\
+  fprintf ((FILE), ",%u\n", (int)(SIZE)))
+
+
+/* XXX
+   This is the end of the chunk lifted from m68kelf.h  */
+
+
+/* XXX
+   The following chunk is more or less lifted from m68kv4.h.
+   We'd like to just #include that file, but it has not yet
+   been converted to the new include style.
+
+   Should there be a m68kv4-abi.h ??  */
+
+
+/* Register in which address to store a structure value is passed to a
+   function.  The default in m68k.h is a1.  For m68k/SVR4 it is a0. */
+
+#undef M68K_STRUCT_VALUE_REGNUM
+#define M68K_STRUCT_VALUE_REGNUM A0_REG
+
+
+/* Register in which static-chain is passed to a function.  The
+   default isn m68k.h is a0, but that is already the struct value
+   regnum.  Make it a1 instead.  */
+
+#undef STATIC_CHAIN_REGNUM
+#define STATIC_CHAIN_REGNUM A1_REG
+#undef M68K_STATIC_CHAIN_REG_NAME
+#define M68K_STATIC_CHAIN_REG_NAME REGISTER_PREFIX "a1"
+
+
+/* Now to renumber registers for dbx and gdb.
+   We use the Sun-3 convention, which is:
+   floating point registers have numbers 18 to 25, not
+   16 to 23 as they do in the compiler.  */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) ((REGNO) < 16 ? (REGNO) : (REGNO) + 2)
+
+
+/* 1 if N is a possible register number for a function value.  For
+   m68k/SVR4 allow d0, a0, or fp0 as return registers, for integral,
+   pointer, or floating types, respectively.  Reject fp0 if not using
+   a 68881 coprocessor.  */
+
+#undef FUNCTION_VALUE_REGNO_P
+#define FUNCTION_VALUE_REGNO_P(N)					\
+  ((N) == D0_REG || (N) == A0_REG || (TARGET_68881 && (N) == FP0_REG))
+
+
+/* Define this to be true when FUNCTION_VALUE_REGNO_P is true for
+   more than one register.  */
+
+#undef NEEDS_UNTYPED_CALL
+#define NEEDS_UNTYPED_CALL 1
+
+
+/* Define how to generate (in the callee) the output value of a
+   function and how to find (in the caller) the value returned by a
+   function.  VALTYPE is the data type of the value (as a tree).  If
+   the precise function being called is known, FUNC is its
+   FUNCTION_DECL; otherwise, FUNC is 0.  For m68k/SVR4 generate the
+   result in d0, a0, or fp0 as appropriate.  */
+
+#undef FUNCTION_VALUE
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+  m68k_function_value (VALTYPE, FUNC)
+
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.
+   For m68k/SVR4 look for integer values in d0, pointer values in d0
+   (returned in both d0 and a0), and floating values in fp0.  */
+
+#undef LIBCALL_VALUE
+#define LIBCALL_VALUE(MODE)						\
+  m68k_libcall_value (MODE)
+
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.
+   The m68k/SVR4 convention is to keep the stack pointer longword aligned.  */
+
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY 32
+
+
+/* Alignment of field after `int : 0' in a structure.
+   For m68k/SVR4, this is the next longword boundary.  */
+
+#undef EMPTY_FIELD_BOUNDARY
+#define EMPTY_FIELD_BOUNDARY 32
+
+
+/* No data type wants to be aligned rounder than this.
+   For m68k/SVR4, some types (doubles for example) are aligned on 8 byte
+   boundaries */
+
+#undef BIGGEST_ALIGNMENT
+#define BIGGEST_ALIGNMENT 64
+
+
+/* The svr4 ABI for the m68k says that records and unions are returned
+   in memory.  */
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* XXX
+   This is the end of the chunk lifted from m68kv4.h  */
diff --git a/gcc/config/m68k/openbsd.h b/gcc/config/m68k/openbsd.h
new file mode 100644
index 000000000..8478855df
--- /dev/null
+++ b/gcc/config/m68k/openbsd.h
@@ -0,0 +1,89 @@
+/* Configuration file for an m68k OpenBSD target.
+   Copyright (C) 1999, 2002, 2003, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__unix__");		\
+	builtin_define ("__OpenBSD__");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=OpenBSD");	\
+   }						\
+  while (0)
+
+/* Define __HAVE_68881__ in preprocessor, unless -msoft-float is specified.
+   This will control the use of inline 68881 insns in certain macros.  */
+#undef CPP_SPEC
+#define CPP_SPEC "%{!msoft-float:-D__HAVE_68881__ -D__HAVE_FPU__} %{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%(asm_cpu_spec) %{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* Storage layout.  */
+
+/* Every structure or union's size must be a multiple of 2 bytes.  */
+#define STRUCTURE_SIZE_BOUNDARY 16
+
+/* Specific options for DBX Output.  */
+
+/* This is BSD, so it wants DBX format.  */
+#define DBX_DEBUGGING_INFO 1
+
+/* Do not break .stabs pseudos into continuations.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+#define DBX_CONTIN_CHAR '?'
+
+/* Stack & calling: aggregate returns.  */
+
+/* ??? This is traditional, but quite possibly wrong.  It appears to
+   disagree with gdb.  */
+#define PCC_STATIC_STRUCT_RETURN 1
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: exception region output.  */
+
+/* All configurations that don't use elf must be explicit about not using
+   dwarf unwind information.  */
+#define DWARF2_UNWIND_INFO 0
diff --git a/gcc/config/m68k/predicates.md b/gcc/config/m68k/predicates.md
new file mode 100644
index 000000000..6ca261fb9
--- /dev/null
+++ b/gcc/config/m68k/predicates.md
@@ -0,0 +1,246 @@
+;; Predicate definitions for Motorola 68000.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Special case of a general operand that's used as a source
+;; operand. Use this to permit reads from PC-relative memory when
+;; -mpcrel is specified.
+
+(define_predicate "general_src_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem")
+{
+  if (TARGET_PCREL
+      && GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (op, 0)) == LABEL_REF
+	  || GET_CODE (XEXP (op, 0)) == CONST))
+    return 1;
+  return general_operand (op, mode);
+})
+
+;; Special case of a nonimmediate operand that's used as a source. Use
+;; this to permit reads from PC-relative memory when -mpcrel is
+;; specified.
+
+(define_predicate "nonimmediate_src_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (TARGET_PCREL && GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (op, 0)) == LABEL_REF
+	  || GET_CODE (XEXP (op, 0)) == CONST))
+    return 1;
+  return nonimmediate_operand (op, mode);
+})
+
+;; Special case of a memory operand that's used as a source. Use this
+;; to permit reads from PC-relative memory when -mpcrel is specified.
+
+(define_predicate "memory_src_operand"
+  (match_code "subreg,mem")
+{
+  if (TARGET_PCREL && GET_CODE (op) == MEM
+      && (GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	  || GET_CODE (XEXP (op, 0)) == LABEL_REF
+	  || GET_CODE (XEXP (op, 0)) == CONST))
+    return 1;
+  return memory_operand (op, mode);
+})
+
+;; Similar to general_operand, but exclude stack_pointer_rtx.
+
+(define_predicate "not_sp_operand"
+  (match_code "subreg,reg,mem")
+{
+  return op != stack_pointer_rtx && nonimmediate_operand (op, mode);
+})
+
+;; Predicate that accepts only a pc-relative address.  This is needed
+;; because pc-relative addresses don't satisfy the predicate
+;; "general_src_operand".
+
+(define_predicate "pcrel_address"
+  (match_code "symbol_ref,label_ref,const"))
+
+;; Accept integer operands in the range 0..0xffffffff.  We have to
+;; check the range carefully since this predicate is used in DImode
+;; contexts.  Also, we need some extra crud to make it work when
+;; hosted on 64-bit machines.
+
+(define_predicate "const_uint32_operand"
+  (match_code "const_int,const_double")
+{
+  /* It doesn't make sense to ask this question with a mode that is
+     not larger than 32 bits.  */
+  gcc_assert (GET_MODE_BITSIZE (mode) > 32);
+
+#if HOST_BITS_PER_WIDE_INT > 32
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= 0 && INTVAL (op) <= 0xffffffffL));
+#else
+  return (GET_CODE (op) == CONST_INT
+	  || (GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_HIGH (op) == 0));
+#endif
+})
+
+;; Accept integer operands in the range -0x80000000..0x7fffffff.  We
+;; have to check the range carefully since this predicate is used in
+;; DImode contexts.
+
+(define_predicate "const_sint32_operand"
+  (match_code "const_int")
+{
+  /* It doesn't make sense to ask this question with a mode that is
+     not larger than 32 bits.  */
+  gcc_assert (GET_MODE_BITSIZE (mode) > 32);
+
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= (-0x7fffffff - 1) && INTVAL (op) <= 0x7fffffff));
+})
+
+;; Return true if X is a valid comparison operator for the dbcc
+;; instruction.  Note it rejects floating point comparison
+;; operators. (In the future we could use Fdbcc).  It also rejects
+;; some comparisons when CC_NO_OVERFLOW is set.
+
+(define_predicate "valid_dbcc_comparison_p"
+  (and (match_code "eq,ne,gtu,ltu,geu,leu,gt,lt,ge,le")
+       (match_test "valid_dbcc_comparison_p_2 (op, mode)")))
+
+(define_predicate "m68k_cstore_comparison_operator"
+  (if_then_else (match_test "TARGET_68881")
+	        (match_operand 0 "comparison_operator")
+		(match_operand 0 "ordered_comparison_operator")))
+
+;; Check for sign_extend or zero_extend.  Used for bit-count operands.
+
+(define_predicate "extend_operator"
+  (match_code "sign_extend,zero_extend"))
+
+;; Returns true if OP is either a symbol reference or a sum of a
+;; symbol reference and a constant.  This predicate is for "raw"
+;; symbol references not yet processed by legitimize*_address,
+;; hence we do not handle UNSPEC_{XGOT, TLS, XTLS} here.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      op = XEXP (op, 0);
+      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+
+#if 0 /* Deleted, with corresponding change in m68k.h,
+	 so as to fit the specs.  No CONST_DOUBLE is ever symbolic.  */
+    case CONST_DOUBLE:
+      return GET_MODE (op) == mode;
+#endif
+
+    default:
+      return false;
+    }
+})
+
+;; A constant that can be used the address in a call insn
+(define_predicate "const_call_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (and (match_test "m68k_symbolic_call != NULL")
+	    (match_operand 0 "symbolic_operand"))))
+
+;; An operand that can be used as the address in a call insn.
+(define_predicate "call_operand"
+  (ior (match_operand 0 "const_call_operand")
+       (match_operand 0 "register_operand")))
+
+;; A constant that can be used the address in a sibcall insn
+(define_predicate "const_sibcall_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (and (match_test "m68k_symbolic_jump != NULL")
+	    (match_operand 0 "symbolic_operand"))))
+
+;; An operand that can be used as the address in a sibcall insn.
+(define_predicate "sibcall_operand"
+  (ior (match_operand 0 "const_sibcall_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == STATIC_CHAIN_REGNUM"))))
+
+;; TODO: Add a comment here.
+
+(define_predicate "post_inc_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == POST_INC")))
+
+;; TODO: Add a comment here.
+
+(define_predicate "pre_dec_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC")))
+
+;; A zero constant.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; A one constant (operand for conditional_trap).
+(define_predicate "const1_operand"
+  (and (match_code "const_int")
+       (match_test "op == const1_rtx")))
+
+;; A valid operand for a HImode or QImode conditional operation.
+;; ColdFire has tst patterns, but not cmp patterns.
+(define_predicate "m68k_subword_comparison_operand"
+  (if_then_else (match_test "TARGET_COLDFIRE")
+                (and (match_code "const_int")
+		     (match_test "op == const0_rtx"))
+		(match_operand 0 "general_src_operand")))
+
+;; An operand for movsi_const0 pattern.
+(define_predicate "movsi_const0_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (match_test "(TARGET_68010 || TARGET_COLDFIRE)
+                    || !(MEM_P (op) && MEM_VOLATILE_P (op))")))
+ 
+;; A non-symbolic call operand.
+;; We need to special case 'const_int' to ignore its mode while matching.
+(define_predicate "non_symbolic_call_operand"
+  (and (match_operand 0 "call_operand")
+       (ior (and (match_code "const_int")
+ 		 (match_test "!symbolic_operand (op, mode)"))
+ 	    (match_test "!symbolic_operand (op,mode)"))))
+
+;; Special case of general_src_operand, which rejects a few fp
+;; constants (which we prefer in registers) before reload.
+
+(define_predicate "fp_src_operand"
+  (match_operand 0 "general_src_operand")
+{
+  return !CONSTANT_P (op)
+	 || (TARGET_68881
+	     && (!standard_68881_constant_p (op)
+		 || reload_in_progress
+		 || reload_completed));
+})
diff --git a/gcc/config/m68k/print-sysroot-suffix.sh b/gcc/config/m68k/print-sysroot-suffix.sh
new file mode 100644
index 000000000..3cf1d8eb4
--- /dev/null
+++ b/gcc/config/m68k/print-sysroot-suffix.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This script takes the following arguments:
+#
+#    - the target sysroot
+#    - the value of $(MULTILIB_MATCHES)
+#    - the value of $(MULTILIB_OPTIONS)
+#
+# It uses these arguments to construct a definition of SYSROOT_SUFFIX_SPEC,
+# which it prints to the standard output.  For each multilib directory FOO,
+# the script checks whether $sysroot has a subdirectory FOO, and if so will
+# use /FOO for all compatible command-line options.  It will not add a
+# suffix for /FOO's options otherwise.  These suffixes are concatenated,
+# with one subspec for each space-separated entry in $(MULTILIB_OPTIONS).
+set -e
+sysroot=$1
+matches=$2
+options=$3
+
+# For each multilib option OPT, add to $substs a sed command of the
+# form "-e 's/OPT/OPT/'".
+substs=""
+for option in `echo "$options" | tr '/' ' '`
+do
+  substs="$substs -e 's/$option/$option/g'"
+done
+
+# For each ALIAS=CANONICAL entry in $MULTILIB_MATCHES, look for sed
+# arguments in $substs of the form "-e 's/CANONICAL/.../'".  Replace
+# such entries with "-e 's/CANONICAL/ALIAS|.../'".  Both the ALIAS and
+# CANONICAL parts of $MULTILIB_MATCHES use '?' to stand for '='.
+#
+# After this loop, a command of the form "echo FOO | eval sed $substs"
+# will replace a canonical option FOO with a %{...}-style spec pattern.
+for match in $matches
+do
+  canonical=`echo "$match" | sed -e 's/=.*//' -e 's/?/=/g'`
+  alias=`echo "$match" | sed -e 's/.*=//' -e 's/?/=/g'`
+  substs=`echo "$substs" | sed -e "s,s/$canonical/,&$alias|,"`
+done
+
+# Build up the final SYSROOT_SUFFIX_SPEC in $spec.
+spec=
+for combo in $options
+do
+  # See which option alternatives in $combo have their own sysroot
+  # directory.  Create a subspec of the form "%{PAT1:/DIR1;...;PATn:DIRn}"
+  # from each such option OPTi, where DIRi is the directory associated
+  # with OPTi and PATi is the result of passing OPTi through $substs.
+  subspec=
+  for option in `echo "$combo" | tr '/' ' '`
+  do
+    dir=`echo "$option" | sed 's/cpu=//'`
+    if test -d "$sysroot/$dir"; then
+      test -z "$subspec" || subspec="$subspec;"
+      subspec="$subspec"`echo "$option" | eval sed $substs`":/$dir"
+    fi
+  done
+  # Concatenate all the subspecs.
+  test -z "$subspec" || spec="$spec%{$subspec}"
+done
+if test -n "$spec"; then
+  echo "#undef SYSROOT_SUFFIX_SPEC"
+  echo "#define SYSROOT_SUFFIX_SPEC \"$spec\""
+fi
diff --git a/gcc/config/m68k/rtemself.h b/gcc/config/m68k/rtemself.h
new file mode 100644
index 000000000..20861fbfe
--- /dev/null
+++ b/gcc/config/m68k/rtemself.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a Motorola m68k using elf.
+   Copyright (C) 1999, 2000, 2002 National Research Council of Canada.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   Contributed by Charles-Antoine Gauthier (charles.gauthier@nrc.ca).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS	/* Defined in m68kemb.h.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define_std ("mc68000");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_define ("__rtems__");		\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
diff --git a/gcc/config/m68k/t-cf b/gcc/config/m68k/t-cf
new file mode 100644
index 000000000..7bf8e11ee
--- /dev/null
+++ b/gcc/config/m68k/t-cf
@@ -0,0 +1,7 @@
+# Select only ColdFire-specific CPUs.
+
+M68K_MLIB_CPU += && (CPU ~ "^mcf")
+M68K_ARCH := cf
+# Do not stamp the multilibs with a MAC type, as we never use those
+# instructions in compiler-generated code.
+MULTILIB_EXTRA_OPTS += Wa,-mno-mac
diff --git a/gcc/config/m68k/t-crtstuff b/gcc/config/m68k/t-crtstuff
new file mode 100644
index 000000000..a8bdb502d
--- /dev/null
+++ b/gcc/config/m68k/t-crtstuff
@@ -0,0 +1,10 @@
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crti.o crtn.o
+
+# Add flags here as required.
+CRTSTUFF_T_CFLAGS =
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/m68k/crti.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o $(srcdir)/config/m68k/crti.s
+$(T)crtn.o: $(srcdir)/config/m68k/crtn.s $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o $(srcdir)/config/m68k/crtn.s
diff --git a/gcc/config/m68k/t-floatlib b/gcc/config/m68k/t-floatlib
new file mode 100644
index 000000000..2039d1d0d
--- /dev/null
+++ b/gcc/config/m68k/t-floatlib
@@ -0,0 +1,31 @@
+# Copyright (C) 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = m68k/lb1sf68.asm
+LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
+   _double _float _floatex \
+   _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \
+   _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2
+
+LIB2FUNCS_EXTRA = fpgnulib.c xfgnulib.c
+
+fpgnulib.c: $(srcdir)/config/m68k/fpgnulib.c
+	cp $(srcdir)/config/m68k/fpgnulib.c fpgnulib.c
+xfgnulib.c: $(srcdir)/config/m68k/fpgnulib.c
+	echo '#define EXTFLOAT' > xfgnulib.c
+	cat $(srcdir)/config/m68k/fpgnulib.c >> xfgnulib.c
diff --git a/gcc/config/m68k/t-linux b/gcc/config/m68k/t-linux
new file mode 100644
index 000000000..d61f73bea
--- /dev/null
+++ b/gcc/config/m68k/t-linux
@@ -0,0 +1,33 @@
+# Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+
+# Only include multilibs for 680x0 CPUs with an MMU.
+M68K_MLIB_CPU += && (CPU ~ "^m680") && (FLAGS ~ "FL_MMU")
+
+ifeq ($(M68K_ARCH),m68k)
+MULTIARCH_DIRNAME = $(call if_multiarch,m68k-linux-gnu)
+endif
+
+# This rule uses MULTILIB_MATCHES to generate a definition of
+# SYSROOT_SUFFIX_SPEC.
+sysroot-suffix.h: $(srcdir)/config/m68k/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/m68k/print-sysroot-suffix.sh \
+	  "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \
+	  "$(MULTILIB_OPTIONS)" > $@
diff --git a/gcc/config/m68k/t-m68k b/gcc/config/m68k/t-m68k
new file mode 100644
index 000000000..cbff34d65
--- /dev/null
+++ b/gcc/config/m68k/t-m68k
@@ -0,0 +1,4 @@
+# Select only 680x0-specific CPUs.
+
+M68K_MLIB_CPU += && (CPU !~ "^mcf")
+M68K_ARCH := m68k
diff --git a/gcc/config/m68k/t-m68kbare b/gcc/config/m68k/t-m68kbare
new file mode 100644
index 000000000..0cbaead7d
--- /dev/null
+++ b/gcc/config/m68k/t-m68kbare
@@ -0,0 +1,4 @@
+# Add soft-float multilibs.
+M68K_MLIB_DIRNAMES += softfp
+M68K_MLIB_OPTIONS += msoft-float
+
diff --git a/gcc/config/m68k/t-m68kelf b/gcc/config/m68k/t-m68kelf
new file mode 100644
index 000000000..bea01dc4f
--- /dev/null
+++ b/gcc/config/m68k/t-m68kelf
@@ -0,0 +1,4 @@
+# from ../t-svr4
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o
+# no pic for now
+#CRTSTUFF_T_CFLAGS=-fpic
diff --git a/gcc/config/m68k/t-mlibs b/gcc/config/m68k/t-mlibs
new file mode 100644
index 000000000..dcf681c32
--- /dev/null
+++ b/gcc/config/m68k/t-mlibs
@@ -0,0 +1,115 @@
+# multilibs  -*- mode:Makefile -*-
+#
+# Copyright (C) 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# An awk command to extract lines from the m68k-devices.def file that
+# match $1 and then print the string defined by $2.  Leading and
+# trailing whitespace is removed.  $1 & $2 can make use of
+# CPU -- the cpu identifier (has leading 'm'/'mcf')
+# FLAGS -- the cpu capability flags
+# CPU_NAME -- the cpu name (has no leading m/mcf)
+# MLIB -- the multilib cpu name (no leading m/mcf)
+# This is intended to be used as $(call M68K_AWK,predicate,string)
+M68K_AWK = $(strip $(shell $(AWK) 'BEGIN { FS="[ \t]*[,()][ \t]*"; ORS=" " }; \
+	/^M68K_DEVICE/ { CPU=$$3; FLAGS=$$8; \
+	CPU_NAME=substr($$2,2,length($$2)-2); \
+	MLIB=substr($$5,2,length($$5)-2); \
+	if ($1) print $2 }' $(srcdir)/config/m68k/m68k-devices.def))
+
+# Add a multilib for each distinct architecture.  M68K_MLIB_CPU, if defined,
+# adds additional restrictions.
+M68K_MLIB_CPUS := $(call M68K_AWK,\
+	(CPU_NAME == MLIB) $(M68K_MLIB_CPU), \
+	"m"MLIB)
+
+# Make the default cpu the default multilib.
+M68K_MLIB_DEFAULT := $(call M68K_AWK, CPU == "$(target_cpu_default)", MLIB)
+
+ifeq ($(filter m$(M68K_MLIB_DEFAULT),$(M68K_MLIB_CPUS)),)
+$(error Error default cpu '$(target_cpu_default)' is not in multilib set '$(M68K_MLIB_CPUS)')
+endif
+
+# Sed arguments that convert mcpu=* arguments into canonical forms.
+# We want to use the legacy m68* options instead of the new -mcpu=68*
+# options when compiling multilibs because the former are recognised
+# by older binutils.
+CANONICALIZE_OPTIONS = -e 's|mcpu=68|m68|g' -e 's|mcpu=cpu32|mcpu32|g'
+
+MULTILIB_DIRNAMES := $(filter-out m$(M68K_MLIB_DEFAULT),$(M68K_MLIB_CPUS))
+MULTILIB_OPTIONS := $(shell echo $(MULTILIB_DIRNAMES:m%=mcpu=%) \
+		      | sed -e 's| |/|g' $(CANONICALIZE_OPTIONS))
+
+# Add subtarget specific options & dirs.
+MULTILIB_DIRNAMES += $(M68K_MLIB_DIRNAMES)
+MULTILIB_OPTIONS += $(M68K_MLIB_OPTIONS)
+
+MULTILIB_MATCHES :=
+
+ifneq ($(M68K_ARCH),cf)
+# Map the new-style options to the legacy m68k ones.
+MULTILIB_MATCHES += m68000=mcpu?68000 m68000=march?68000 m68000=mc68000 \
+		    m68000=m68302 \
+		    m68020=mcpu?68020 m68020=march?68020 m68020=mc68020 \
+		    m68030=mcpu?68030 m68030=march?68030 \
+		    m68040=mcpu?68040 m68040=march?68040 \
+		    m68060=mcpu?68060 m68060=march?68060 \
+		    mcpu32=mcpu?cpu32 mcpu32=march?cpu32 mcpu32=m68332
+endif
+
+ifneq ($(M68K_ARCH),m68k)
+# Map the legacy ColdFire options to the new ones.
+MULTILIB_MATCHES += mcpu?5206=m5200 mcpu?5206e=m5206e mcpu?5208=m528x \
+		    mcpu?5307=m5300 mcpu?5307=m5307 \
+		    mcpu?5407=m5400 mcpu?5407=m5407 \
+		    mcpu?5475=mcfv4e
+# Map -march=* options to the representative -mcpu=* option.
+MULTILIB_MATCHES += mcpu?5206e=march?isaa mcpu?5208=march?isaaplus \
+		    mcpu?5407=march?isab
+endif
+
+# Match non-representative -mcpu options to their representative option.
+MULTILIB_MATCHES += \
+  $(call M68K_AWK, \
+	 (CPU_NAME != MLIB) $(M68K_MLIB_CPU), \
+	 (match(MLIB, "^68") || MLIB == "cpu32" \
+	  ? "m"MLIB"=mcpu?"CPU_NAME \
+	  : "mcpu?"MLIB"=mcpu?"CPU_NAME))
+
+MULTILIB_EXCEPTIONS :=
+
+ifeq ($(firstword $(M68K_MLIB_OPTIONS)),msoft-float)
+# Exclude soft-float multilibs for targets that default to soft-float anyway.
+MULTILIB_EXCEPTIONS += $(call M68K_AWK,\
+	(CPU_NAME == MLIB) $(M68K_MLIB_CPU) \
+	 && (((CPU ~ "^mcf") && !match(FLAGS, "FL_CF_FPU")) \
+	     || CPU == "cpu32" \
+	     || CPU == "m68000"), \
+	 "mcpu="MLIB"/msoft-float*")
+endif
+
+# Remove the default CPU from the explicit exceptions.
+MULTILIB_EXCEPTIONS := \
+	$(patsubst mcpu=$(M68K_MLIB_DEFAULT)/%,%,$(MULTILIB_EXCEPTIONS))
+
+# Convert all options to canonical form.
+MULTILIB_EXCEPTIONS := $(shell echo $(MULTILIB_EXCEPTIONS) | \
+			 sed $(CANONICALIZE_OPTIONS))
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/m68k/t-openbsd b/gcc/config/m68k/t-openbsd
new file mode 100644
index 000000000..b295608de
--- /dev/null
+++ b/gcc/config/m68k/t-openbsd
@@ -0,0 +1,4 @@
+# gdb gets confused if pic code is linked with non pic
+# We cope by building all variants of libgcc.
+M68K_MLIB_OPTIONS += fpic/fPIC
+M68K_MLIB_DIRNAMES += fpic fPIC
diff --git a/gcc/config/m68k/t-rtems b/gcc/config/m68k/t-rtems
new file mode 100644
index 000000000..0997afebc
--- /dev/null
+++ b/gcc/config/m68k/t-rtems
@@ -0,0 +1,9 @@
+# Custom multilibs for RTEMS
+M68K_MLIB_CPU += && (match(MLIB, "^68") \
+		     || MLIB == "cpu32" \
+		     || MLIB == "5206" \
+		     || MLIB == "5208" \
+		     || MLIB == "5307" \
+		     || MLIB == "5329" \
+		     || MLIB == "5407" \
+		     || MLIB == "5475")
diff --git a/gcc/config/m68k/t-slibgcc-elf-ver b/gcc/config/m68k/t-slibgcc-elf-ver
new file mode 100644
index 000000000..6aac37cc0
--- /dev/null
+++ b/gcc/config/m68k/t-slibgcc-elf-ver
@@ -0,0 +1,3 @@
+# Bump the version number of the shared libgcc library
+
+SHLIB_SOVERSION = 2
diff --git a/gcc/config/m68k/t-uclinux b/gcc/config/m68k/t-uclinux
new file mode 100644
index 000000000..e1711a344
--- /dev/null
+++ b/gcc/config/m68k/t-uclinux
@@ -0,0 +1,36 @@
+# Copyright (C) 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# crti and crtn are provided by uClibc.
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o
+
+# Include multilibs for CPUs without an MMU or with FL_UCLINUX
+M68K_MLIB_CPU += && (!match(FLAGS, "FL_MMU") || match(FLAGS, "FL_UCLINUX"))
+
+# Add multilibs for execute-in-place and shared-library code.
+M68K_MLIB_OPTIONS += msep-data/mid-shared-library
+M68K_MLIB_DIRNAMES += msep-data mid-shared-library
+
+# This rule uses MULTILIB_MATCHES to generate a definition of
+# SYSROOT_SUFFIX_SPEC.
+sysroot-suffix.h: $(srcdir)/config/m68k/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/m68k/print-sysroot-suffix.sh \
+	  "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \
+	  "$(MULTILIB_OPTIONS)" > $@
+
+generated_files += sysroot-suffix.h
diff --git a/gcc/config/m68k/uclinux-oldabi.h b/gcc/config/m68k/uclinux-oldabi.h
new file mode 100644
index 000000000..7ef202efb
--- /dev/null
+++ b/gcc/config/m68k/uclinux-oldabi.h
@@ -0,0 +1,70 @@
+/* Definitions of target machine for GCC.  m68k/ColdFire based uClinux system
+   using ELF objects with special linker post-processing to produce FLAT
+   executables.
+
+   Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* The old uClinux ABI used 80-byte "long double"s for ColdFire too.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 80
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 80
+
+/* Undo the definition of STARTFILE_SPEC from m68kelf.h so we'll
+   pick the default from gcc.c (just link crt0.o from multilib dir).  */
+#undef	STARTFILE_SPEC
+
+/* Override the default LIB_SPEC from gcc.c.  We don't currently support
+   profiling, or libg.a.  */
+#undef LIB_SPEC
+#define LIB_SPEC "\
+%{mid-shared-library:-R libc.gdb%s -elf2flt -shared-lib-id 0} -lc \
+"
+
+/* we don't want a .eh_frame section.  */
+#define EH_FRAME_IN_DATA_SECTION
+
+/* ??? Quick hack to get constructors working.  Make this look more like a
+   COFF target, so the existing dejagnu/libgloss support works.  A better
+   solution would be to make the necessary dejagnu and libgloss changes so
+   that we can use normal the ELF constructor mechanism.  */
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+ 
+/* Bring in standard linux defines */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define_std ("mc68000");		\
+	builtin_define ("__uClinux__");		\
+	builtin_define_std ("linux");		\
+	builtin_define_std ("unix");		\
+	builtin_define ("__gnu_linux__");	\
+	builtin_assert ("system=linux");	\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=posix");	\
+	if (TARGET_ID_SHARED_LIBRARY)		\
+	  builtin_define ("__ID_SHARED_LIBRARY__"); \
+    }						\
+  while (0)
+
diff --git a/gcc/config/m68k/uclinux.h b/gcc/config/m68k/uclinux.h
new file mode 100644
index 000000000..1b21cafa0
--- /dev/null
+++ b/gcc/config/m68k/uclinux.h
@@ -0,0 +1,72 @@
+/* Definitions of target machine for GCC.  m68k/ColdFire based uClinux system
+   using ELF objects with special linker post-processing to produce FLAT
+   executables.
+
+   Copyright (C) 2003, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (68k uClinux)");
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{mshared-library-id=0|!mshared-library-id=*: crt1.o%s ;: Scrt1.o%s} \
+ crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+/* Override the default LIB_SPEC from gcc.c.  We don't currently support
+   profiling, or libg.a.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+"%{mid-shared-library:%{!static-libc:-R libc.gdb%s}} %{pthread:-lpthread} -lc"
+
+/* Default to using -elf2flt with no options.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+"%{!elf2flt*:-elf2flt} \
+ %{mid-shared-library: \
+   %{mshared-library-id=*:-shared-lib-id %*;:-shared-lib-id 0}}"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+      LINUX_TARGET_OS_CPP_BUILTINS ();				\
+      builtin_define ("__uClinux__");				\
+      if (TARGET_ID_SHARED_LIBRARY)				\
+	{							\
+	  builtin_define ("__ID_SHARED_LIBRARY__");		\
+	  /* Shared libraries and executables do not share	\
+	     typeinfo names.  */				\
+	  builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0");	\
+	  builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0");	\
+	}							\
+    }								\
+  while (0)
+
+/* -msep-data is the default PIC mode on this target.  */
+#define DRIVER_SELF_SPECS \
+  "%{fpie|fPIE|fpic|fPIC:%{!msep-data:%{!mid-shared-library: -msep-data}}}"
+
+/* The uclinux binary format relies on relocations against a segment being
+   within that segment.  Conservatively apply this rule to individual
+   sections.  */
+#undef M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
+#define M68K_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
diff --git a/gcc/config/m68k/uclinux.opt b/gcc/config/m68k/uclinux.opt
new file mode 100644
index 000000000..537649952
--- /dev/null
+++ b/gcc/config/m68k/uclinux.opt
@@ -0,0 +1,36 @@
+; m68k/ColdFire uClinux options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+elf2flt
+Driver
+
+elf2flt=
+Driver JoinedOrMissing
+
+static-libc
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/mcore/constraints.md b/gcc/config/mcore/constraints.md
new file mode 100644
index 000000000..dee980775
--- /dev/null
+++ b/gcc/config/mcore/constraints.md
@@ -0,0 +1,112 @@
+;; Constraint definitions for the Motorola MCore
+;; Copyright (C) 2011
+;; Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "LRW_REGS"
+  "@internal")
+
+(define_register_constraint "b" "ONLYR1_REGS"
+  "@internal")
+
+(define_register_constraint "c" "C_REGS"
+  "@internal")
+
+(define_register_constraint "x" "ALL_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "An integer in the range 0 to 127."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 127)")))
+
+(define_constraint "J"
+  "An integer in the range 1 to 32."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 32)")))
+
+(define_constraint "K"
+  "A shift operand, an integer in the range 0 to 31."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 31)")))
+
+(define_constraint "L"
+  "A negative arithmetic operand in the range -32 to -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32, -1)")))
+
+(define_constraint "M"
+  "A constant loadable by bgeni."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (ival) >= 0 && exact_log2 (ival) <= 30")))
+
+(define_constraint "N"
+  "A constant loadable by bmaskii, including -1."
+  (and (match_code "const_int")
+       (ior (match_test "ival == -1")
+	    (and (match_test "exact_log2 (ival + 1) >= 0")
+		 (match_test "exact_log2 (ival + 1) <= 30")))))
+
+(define_constraint "O"
+  "A constant allowed by cmov with two constants +/- 1 of each other."
+  (and (match_code "const_int")
+       (ior (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_I)")
+	    (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_M)")
+	    (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_N)")
+	    (match_test "insn_const_int_ok_for_constraint (ival - 1, CONSTRAINT_M)")
+	    (match_test "insn_const_int_ok_for_constraint (ival + 1, CONSTRAINT_N)"))))
+
+(define_constraint "P"
+  "A value that can be generated without an lrw instruction."
+  (and (match_code "const_int")
+       (match_test "mcore_const_ok_for_inline (ival)")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "@internal"
+  (and (match_code "const_double")
+       (match_test "insn_const_int_ok_for_constraint (hval, CONSTRAINT_I)")
+       (match_test "insn_const_int_ok_for_constraint (ival, CONSTRAINT_I)")))
+
+;; Other constraints.
+(define_constraint "Q"
+  "The integer constant one."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "R"
+  "@internal"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == LABEL_REF")))
+
+(define_constraint "S"
+  "An integer constant with 0, 1, or 2 bits clear."
+  (and (match_code "const_int")
+       (match_test "mcore_num_zeros (ival) <= 2")))
+
+(define_constraint "T"
+  "An integer constant with 2 set bits."
+  (and (match_code "const_int")
+       (match_test "mcore_num_ones (ival) == 2")))
+
+(define_constraint "U"
+  "The integer constant zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
diff --git a/gcc/config/mcore/crti.asm b/gcc/config/mcore/crti.asm
new file mode 100644
index 000000000..03f592928
--- /dev/null
+++ b/gcc/config/mcore/crti.asm
@@ -0,0 +1,62 @@
+# crti.asm for ELF based systems
+
+#   Copyright (C) 1992, 1998, 1999, 2008, 2009 Free Software Foundation, Inc.
+#   Written By David Vinayak Henkel-Wallace, June 1992
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# This file just makes a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+	.section	".init"
+	.global	_init
+	.type	_init,@function
+	.align	4
+_init:
+	subi	r0, 16
+	st.w	r15, (r0, 12)
+
+	# These nops are here to align the end of this code with a 16 byte
+	# boundary.  The linker will start inserting code into the .init
+	# section at such a boundary.
+	
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	
+	.section	".fini"
+	.global	_fini
+	.type	_fini,@function
+	.align	4
+_fini:
+	subi	r0, 16
+	st.w	r15, (r0, 12)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
diff --git a/gcc/config/mcore/crtn.asm b/gcc/config/mcore/crtn.asm
new file mode 100644
index 000000000..b764441e7
--- /dev/null
+++ b/gcc/config/mcore/crtn.asm
@@ -0,0 +1,44 @@
+# crtn.asm for ELF based systems
+
+# Copyright (C) 1992, 1999, 2000, 2008, 2009 Free Software Foundation, Inc.
+#   Written By David Vinayak Henkel-Wallace, June 1992
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+	.section	".init"
+	.align		4
+
+        ldw     r15,(r0, 12)
+        addi    r0,16
+        jmp     r15
+	
+	.section	".fini"
+	.align		4
+
+        ldw     r15, (r0, 12)
+        addi    r0,16
+        jmp     r15
+
+# Th-th-th-that is all folks!
+
diff --git a/gcc/config/mcore/lib1.asm b/gcc/config/mcore/lib1.asm
new file mode 100644
index 000000000..701762f2a
--- /dev/null
+++ b/gcc/config/mcore/lib1.asm
@@ -0,0 +1,303 @@
+/* libgcc routines for the MCore.
+   Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__, x)
+
+#ifdef __ELF__
+#define TYPE(x) .type SYM (x),@function
+#define SIZE(x) .size SYM (x), . - SYM (x)
+#else
+#define TYPE(x)
+#define SIZE(x)
+#endif
+
+.macro FUNC_START name
+	.text
+	.globl SYM (\name)
+	TYPE (\name)
+SYM (\name):
+.endm
+
+.macro FUNC_END name
+	SIZE (\name)
+.endm
+
+#ifdef	L_udivsi3
+FUNC_START udiv32
+FUNC_START udivsi32
+
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9:
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+// appears to be wrong...
+// tested out incorrectly in our OS work...
+//	mov	r7,r3		// looking at divisor
+//	ff1	r7		// I can move 32-r7 more bits to left.
+//	addi	r7,1		// ok, one short of that...
+//	mov	r1,r2
+//	lsr	r1,r7		// bits that came from low order...
+//	rsubi	r7,31		// r7 == "32-n" == LEFT distance
+//	addi	r7,1		// this is (32-n)
+//	lsl	r4,r7		// fixes the high 32 (quotient)
+//	lsl	r2,r7
+//	cmpnei	r4,0
+//	bf	4f		// the sentinel went away...
+
+	// run the remaining bits
+
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+
+4:	mov	r2,r4		// return quotient
+	mov	r3,r1		// and piggyback the remainder
+	jmp	r15
+FUNC_END udiv32
+FUNC_END udivsi32
+#endif
+
+#ifdef	L_umodsi3
+FUNC_START urem32
+FUNC_START umodsi3
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9:
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+	mov	r2,r1		// return remainder
+	jmp	r15
+FUNC_END urem32
+FUNC_END umodsi3
+#endif
+
+#ifdef	L_divsi3
+FUNC_START div32
+FUNC_START divsi3
+	mov	r5,r2		// calc sign of quotient
+	xor	r5,r3
+	abs	r2		// do unsigned divide
+	abs	r3
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9:
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+// tested out incorrectly in our OS work...
+//	mov	r7,r3		// looking at divisor
+//	ff1	r7		// I can move 32-r7 more bits to left.
+//	addi	r7,1		// ok, one short of that...
+//	mov	r1,r2
+//	lsr	r1,r7		// bits that came from low order...
+//	rsubi	r7,31		// r7 == "32-n" == LEFT distance
+//	addi	r7,1		// this is (32-n)
+//	lsl	r4,r7		// fixes the high 32 (quotient)
+//	lsl	r2,r7
+//	cmpnei	r4,0
+//	bf	4f		// the sentinel went away...
+
+	// run the remaining bits
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+
+4:	mov	r2,r4		// return quotient
+	mov	r3,r1		// piggyback the remainder
+	btsti	r5,31		// after adjusting for sign
+	bf	3f
+	rsubi	r2,0
+	rsubi	r3,0
+3:	jmp	r15
+FUNC_END div32
+FUNC_END divsi3
+#endif
+
+#ifdef	L_modsi3
+FUNC_START rem32
+FUNC_START modsi3
+	mov	r5,r2		// calc sign of remainder
+	abs	r2		// do unsigned divide
+	abs	r3
+	movi	r1,0		// r1-r2 form 64 bit dividend
+	movi	r4,1		// r4 is quotient (1 for a sentinel)
+	cmpnei	r3,0		// look for 0 divisor
+	bt	9f
+	trap	3		// divide by 0
+9: 
+	// control iterations; skip across high order 0 bits in dividend
+	mov	r7,r2
+	cmpnei	r7,0
+	bt	8f
+	movi	r2,0		// 0 dividend
+	jmp	r15		// quick return
+8:
+	ff1	r7		// figure distance to skip
+	lsl	r4,r7		// move the sentinel along (with 0's behind)
+	lsl	r2,r7		// and the low 32 bits of numerator
+
+1:	lslc	r2,1		// 1 bit left shift of r1-r2
+	addc	r1,r1
+	cmphs	r1,r3		// upper 32 of dividend >= divisor?
+	bf	2f
+	sub	r1,r3		// if yes, subtract divisor
+2:	addc	r4,r4		// shift by 1 and count subtracts
+	bf	1b		// if sentinel falls out of quotient, stop
+	mov	r2,r1		// return remainder
+	btsti	r5,31		// after adjusting for sign
+	bf	3f
+	rsubi	r2,0
+3:	jmp	r15
+FUNC_END rem32
+FUNC_END modsi3
+#endif
+
+
+/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2}
+   will behave as __cmpdf2. So, we stub the implementations to
+   jump on to __cmpdf2 and __cmpsf2.
+ 
+   All of these shortcircuit the return path so that __cmp{sd}f2
+   will go directly back to the caller.  */
+
+.macro  COMPARE_DF_JUMP name
+	.import SYM (cmpdf2)
+FUNC_START \name
+	jmpi SYM (cmpdf2)
+FUNC_END \name
+.endm
+		
+#ifdef  L_eqdf2
+COMPARE_DF_JUMP eqdf2
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+COMPARE_DF_JUMP nedf2
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+COMPARE_DF_JUMP gtdf2
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+COMPARE_DF_JUMP gedf2
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+COMPARE_DF_JUMP ltdf2
+#endif /* L_ltdf2 */
+	
+#ifdef  L_ledf2
+COMPARE_DF_JUMP ledf2
+#endif /* L_ledf2 */
+
+/* SINGLE PRECISION FLOATING POINT STUBS */
+
+.macro  COMPARE_SF_JUMP name
+	.import SYM (cmpsf2)
+FUNC_START \name
+	jmpi SYM (cmpsf2)
+FUNC_END \name
+.endm
+		
+#ifdef  L_eqsf2
+COMPARE_SF_JUMP eqsf2
+#endif /* L_eqsf2 */
+	
+#ifdef  L_nesf2
+COMPARE_SF_JUMP nesf2
+#endif /* L_nesf2 */
+	
+#ifdef  L_gtsf2
+COMPARE_SF_JUMP gtsf2
+#endif /* L_gtsf2 */
+	
+#ifdef  L_gesf2
+COMPARE_SF_JUMP __gesf2
+#endif /* L_gesf2 */
+	
+#ifdef  L_ltsf2
+COMPARE_SF_JUMP __ltsf2
+#endif /* L_ltsf2 */
+	
+#ifdef  L_lesf2
+COMPARE_SF_JUMP lesf2
+#endif /* L_lesf2 */
diff --git a/gcc/config/mcore/mcore-elf.h b/gcc/config/mcore/mcore-elf.h
new file mode 100644
index 000000000..98a3862cd
--- /dev/null
+++ b/gcc/config/mcore/mcore-elf.h
@@ -0,0 +1,129 @@
+/* Definitions of MCore target. 
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2007
+   Free Software Foundation, Inc.
+   Contributed by Cygnus Solutions.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __MCORE_ELF_H__
+#define __MCORE_ELF_H__
+
+/* Run-time Target Specification.  */
+#define TARGET_VERSION fputs (" (Motorola MCORE/elf)", stderr)
+
+/* Use DWARF2 debugging info.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define MCORE_EXPORT_NAME(STREAM, NAME)			\
+  do							\
+    {							\
+      fprintf (STREAM, "\t.section .exports\n");	\
+      fprintf (STREAM, "\t.ascii \" -export:%s\"\n",	\
+	       (* targetm.strip_name_encoding) (NAME));	\
+      in_section = NULL;				\
+    }							\
+  while (0);
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      if (mcore_dllexport_name_p (NAME))			\
+	{							\
+          MCORE_EXPORT_NAME (FILE, NAME);			\
+	  switch_to_section (function_section (DECL));		\
+	}							\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+
+/* Write the extra assembler code needed to declare an object properly.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+      if (mcore_dllexport_name_p (NAME))			\
+        {							\
+	  section *save_section = in_section;			\
+	  MCORE_EXPORT_NAME (FILE, NAME);			\
+	  switch_to_section (save_section);			\
+        }							\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive && DECL_SIZE (DECL))	\
+        {							\
+          size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+        }							\
+      ASM_OUTPUT_LABEL(FILE, NAME);				\
+    }								\
+  while (0)
+ 
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set
+   by ASM_DECLARE_OBJECT_NAME when it was run for the same decl.  */
+#undef  ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)         \
+  do                                                                     \
+    {                                                                    \
+      const char * name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);           \
+      HOST_WIDE_INT size;						 \
+      if (!flag_inhibit_size_directive && DECL_SIZE (DECL)               \
+          && ! AT_END && TOP_LEVEL                                       \
+          && DECL_INITIAL (DECL) == error_mark_node                      \
+          && !size_directive_output)                                     \
+        {                                                                \
+	  size_directive_output = 1;					 \
+	  size = int_size_in_bytes (TREE_TYPE (DECL));			 \
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			 \
+        }                                                                \
+    }                                                                    \
+  while (0)
+
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+/* Include the OS stub library, so that the code can be simulated.
+   This is not the right way to do this.  Ideally this kind of thing
+   should be done in the linker script - but I have not worked out how
+   to specify the location of a linker script in a gcc command line yet.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC  "%{!mno-lsim:-lsim} crtend.o%s crtn.o%s"
+
+/* The subroutine calls in the .init and .fini sections create literal
+   pools which must be jumped around....  */
+#define FORCE_CODE_SECTION_ALIGN	asm ("br 1f ; .literals ; 1:");
+
+#undef  CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"aw\""
+#undef  DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"aw\""
+     
+#endif /* __MCORE_ELF_H__ */
diff --git a/gcc/config/mcore/mcore-pe.h b/gcc/config/mcore/mcore-pe.h
new file mode 100644
index 000000000..ccd34e9c7
--- /dev/null
+++ b/gcc/config/mcore/mcore-pe.h
@@ -0,0 +1,102 @@
+/* Definitions of target machine for GNU compiler, for MCore using COFF/PE.
+   Copyright (C) 1994, 1999, 2000, 2002, 2003, 2004, 2007
+   Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#define TARGET_VERSION fputs (" (MCORE/pe)", stderr)
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+      builtin_define ("__pe__");				\
+    }								\
+  while (0)
+
+/* The MCore ABI says that bitfields are unsigned by default.  */
+/* The EPOC C++ environment does not support exceptions.  */
+#undef CC1_SPEC
+#define CC1_SPEC "-funsigned-bitfields %{!DIN_GCC:-fno-rtti} %{!DIN_GCC:-fno-exceptions}"
+
+#undef  SDB_DEBUGGING_INFO
+#define DBX_DEBUGGING_INFO 1
+
+/* Computed in toplev.c.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section .rdata"
+
+#define MCORE_EXPORT_NAME(STREAM, NAME)			\
+  do							\
+    {							\
+      fprintf (STREAM, "\t.section .drectve\n");	\
+      fprintf (STREAM, "\t.ascii \" -export:%s\"\n",	\
+	       (* targetm.strip_name_encoding) (NAME));	\
+      in_section = NULL;				\
+    }							\
+  while (0);
+
+/* Output the label for an initialized variable.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)		\
+  do								\
+    {								\
+      if (mcore_dllexport_name_p (NAME))			\
+        {							\
+	  section *save_section = in_section;			\
+	  MCORE_EXPORT_NAME (STREAM, NAME);			\
+	  switch_to_section (save_section);			\
+        }							\
+      ASM_OUTPUT_LABEL ((STREAM), (NAME));			\
+    }								\
+  while (0)
+
+/* Output a function label definition.  */
+#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL)		\
+  do								\
+    {								\
+      if (mcore_dllexport_name_p (NAME))			\
+	{							\
+          MCORE_EXPORT_NAME (STREAM, NAME);			\
+	  switch_to_section (function_section (DECL));		\
+	}							\
+      ASM_OUTPUT_LABEL ((STREAM), (NAME));			\
+    }								\
+  while (0);
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+#define STARTFILE_SPEC "crt0.o%s"
+#define ENDFILE_SPEC  "%{!mno-lsim:-lsim}"
+
+/* __CTOR_LIST__ and __DTOR_LIST__ must be defined by the linker script.  */
+#define CTOR_LISTS_DEFINED_EXTERNALLY
+
+#undef DO_GLOBAL_CTORS_BODY
+#undef DO_GLOBAL_DTORS_BODY
+#undef INIT_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* Switch into a generic section.  */
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_pe_asm_named_section
diff --git a/gcc/config/mcore/mcore-protos.h b/gcc/config/mcore/mcore-protos.h
new file mode 100644
index 000000000..b9f8f3307
--- /dev/null
+++ b/gcc/config/mcore/mcore-protos.h
@@ -0,0 +1,69 @@
+/* Prototypes for exported functions defined in mcore.c
+   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Nick Clifton (nickc@redhat.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+extern const char * mcore_output_jump_label_table	(void);
+extern void         mcore_expand_prolog          	(void);
+extern void         mcore_expand_epilog          	(void);
+extern int          mcore_const_ok_for_inline    	(HOST_WIDE_INT);
+extern int          mcore_num_ones               	(HOST_WIDE_INT);
+extern int          mcore_num_zeros              	(HOST_WIDE_INT);
+extern int          mcore_initial_elimination_offset	(int, int);
+extern int          mcore_byte_offset            	(unsigned int);
+extern int          mcore_halfword_offset        	(unsigned int);
+extern int          mcore_const_trick_uses_not   	(HOST_WIDE_INT);
+extern int          mcore_dllexport_name_p       	(const char *);
+extern int          mcore_dllimport_name_p       	(const char *);
+extern int          mcore_naked_function_p       	(void);
+
+#ifdef TREE_CODE
+#ifdef HAVE_MACHINE_MODES
+extern int          mcore_num_arg_regs           	(enum machine_mode, const_tree);
+#endif /* HAVE_MACHINE_MODES */
+
+#ifdef RTX_CODE
+extern rtx          mcore_function_value         	(const_tree, const_tree);
+#endif /* RTX_CODE */
+#endif /* TREE_CODE */
+
+#ifdef RTX_CODE
+
+extern const char * mcore_output_bclri         		(rtx, int);
+extern const char * mcore_output_bseti         		(rtx, int);
+extern const char * mcore_output_cmov          		(rtx *, int, const char *);
+extern char *       mcore_output_call          		(rtx *, int);
+extern int          mcore_is_dead                	(rtx, rtx);
+extern int          mcore_expand_insv            	(rtx *);
+extern bool         mcore_expand_block_move      	(rtx *);
+extern const char * mcore_output_andn          		(rtx, rtx *);
+extern bool         mcore_gen_compare	        	(RTX_CODE, rtx, rtx);
+extern int          mcore_symbolic_address_p     	(rtx);
+extern bool         mcore_r15_operand_p			(rtx);
+extern enum reg_class mcore_secondary_reload_class	(enum reg_class, enum machine_mode, rtx);
+extern enum reg_class mcore_reload_class 		(rtx, enum reg_class);
+extern int          mcore_is_same_reg            	(rtx, rtx);
+extern int          mcore_arith_S_operand         	(rtx);
+
+#ifdef HAVE_MACHINE_MODES
+extern const char * mcore_output_move          		(rtx, rtx *, enum machine_mode);
+extern const char * mcore_output_movedouble    		(rtx *, enum machine_mode);
+extern int          const_ok_for_mcore                  (HOST_WIDE_INT);
+#endif /* HAVE_MACHINE_MODES */
+#endif /* RTX_CODE */
diff --git a/gcc/config/mcore/mcore.c b/gcc/config/mcore/mcore.c
new file mode 100644
index 000000000..8118276a8
--- /dev/null
+++ b/gcc/config/mcore/mcore.c
@@ -0,0 +1,3206 @@
+/* Output routines for Motorola MCore processor
+   Copyright (C) 1993, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
+   2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "mcore.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "obstack.h"
+#include "expr.h"
+#include "reload.h"
+#include "recog.h"
+#include "function.h"
+#include "ggc.h"
+#include "diagnostic-core.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* For dumping information about frame sizes.  */
+char * mcore_current_function_name = 0;
+long   mcore_current_compilation_timestamp = 0;
+
+/* Global variables for machine-dependent things.  */
+
+/* Provides the class number of the smallest class containing
+   reg number.  */
+const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  GENERAL_REGS,	ONLYR1_REGS,  LRW_REGS,	    LRW_REGS,
+  LRW_REGS,	LRW_REGS,     LRW_REGS,	    LRW_REGS,
+  LRW_REGS,	LRW_REGS,     LRW_REGS,	    LRW_REGS,
+  LRW_REGS,	LRW_REGS,     LRW_REGS,	    GENERAL_REGS,
+  GENERAL_REGS, C_REGS,       NO_REGS,      NO_REGS,
+};
+
+struct mcore_frame
+{
+  int arg_size;			/* Stdarg spills (bytes).  */
+  int reg_size;			/* Non-volatile reg saves (bytes).  */
+  int reg_mask;			/* Non-volatile reg saves.  */
+  int local_size;		/* Locals.  */
+  int outbound_size;		/* Arg overflow on calls out.  */
+  int pad_outbound;
+  int pad_local;
+  int pad_reg;
+  /* Describe the steps we'll use to grow it.  */
+#define	MAX_STACK_GROWS	4	/* Gives us some spare space.  */
+  int growth[MAX_STACK_GROWS];
+  int arg_offset;
+  int reg_offset;
+  int reg_growth;
+  int local_growth;
+};
+
+typedef enum
+{
+  COND_NO,
+  COND_MOV_INSN,
+  COND_CLR_INSN,
+  COND_INC_INSN,
+  COND_DEC_INSN,
+  COND_BRANCH_INSN
+}
+cond_type;
+
+static void       output_stack_adjust           (int, int);
+static int        calc_live_regs                (int *);
+static int        try_constant_tricks           (long, HOST_WIDE_INT *, HOST_WIDE_INT *);
+static const char *     output_inline_const     (enum machine_mode, rtx *);
+static void       layout_mcore_frame            (struct mcore_frame *);
+static void       mcore_setup_incoming_varargs	(CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
+static cond_type  is_cond_candidate             (rtx);
+static rtx        emit_new_cond_insn            (rtx, int);
+static rtx        conditionalize_block          (rtx);
+static void       conditionalize_optimization   (void);
+static void       mcore_reorg                   (void);
+static rtx        handle_structs_in_regs        (enum machine_mode, const_tree, int);
+static void       mcore_mark_dllexport          (tree);
+static void       mcore_mark_dllimport          (tree);
+static int        mcore_dllexport_p             (tree);
+static int        mcore_dllimport_p             (tree);
+static tree       mcore_handle_naked_attribute  (tree *, tree, tree, int, bool *);
+#ifdef OBJECT_FORMAT_ELF
+static void	  mcore_asm_named_section       (const char *,
+						 unsigned int, tree);
+#endif
+static void       mcore_print_operand           (FILE *, rtx, int);
+static void       mcore_print_operand_address   (FILE *, rtx);
+static bool       mcore_print_operand_punct_valid_p (unsigned char code);
+static void       mcore_unique_section	        (tree, int);
+static void mcore_encode_section_info		(tree, rtx, int);
+static const char *mcore_strip_name_encoding	(const char *);
+static int        mcore_const_costs            	(rtx, RTX_CODE);
+static int        mcore_and_cost               	(rtx);
+static int        mcore_ior_cost               	(rtx);
+static bool       mcore_rtx_costs		(rtx, int, int, int *, bool);
+static void       mcore_external_libcall	(rtx);
+static bool       mcore_return_in_memory	(const_tree, const_tree);
+static int        mcore_arg_partial_bytes       (CUMULATIVE_ARGS *,
+						 enum machine_mode,
+						 tree, bool);
+static rtx        mcore_function_arg            (CUMULATIVE_ARGS *,
+						 enum machine_mode,
+						 const_tree, bool);
+static void       mcore_function_arg_advance    (CUMULATIVE_ARGS *,
+						 enum machine_mode,
+						 const_tree, bool);
+static unsigned int mcore_function_arg_boundary (enum machine_mode,
+						 const_tree);
+static void       mcore_asm_trampoline_template (FILE *);
+static void       mcore_trampoline_init		(rtx, tree, rtx);
+static void       mcore_option_override		(void);
+
+/* MCore specific attributes.  */
+
+static const struct attribute_spec mcore_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "dllexport", 0, 0, true,  false, false, NULL },
+  { "dllimport", 0, 0, true,  false, false, NULL },
+  { "naked",     0, 0, true,  false, false, mcore_handle_naked_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* What options are we going to default to specific settings when
+   -O* happens; the user can subsequently override these settings.
+  
+   Omitting the frame pointer is a very good idea on the MCore.
+   Scheduling isn't worth anything on the current MCore implementation.  */
+
+static const struct default_options mcore_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_ffunction_cse, NULL, 0 },
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
+    { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
+    { OPT_LEVELS_ALL, OPT_fschedule_insns2, NULL, 0 },
+    { OPT_LEVELS_SIZE, OPT_mhardlit, NULL, 0 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ASM_EXTERNAL_LIBCALL
+#define TARGET_ASM_EXTERNAL_LIBCALL	mcore_external_libcall
+
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef  TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES	merge_dllimport_decl_attributes
+#endif
+
+#ifdef OBJECT_FORMAT_ELF
+#undef  TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
+#undef  TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#endif
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND		mcore_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS	mcore_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P mcore_print_operand_punct_valid_p
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE 		mcore_attribute_table
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION 	mcore_unique_section
+#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS	TARGET_DEFAULT
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO 	mcore_encode_section_info
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING	mcore_strip_name_encoding
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS 		mcore_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST 		hook_int_rtx_bool_0
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG	mcore_reorg
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE	default_promote_function_mode_always_promote
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		mcore_return_in_memory
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK	must_pass_in_stack_var_size
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE  hook_pass_by_reference_must_pass_in_stack
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES	mcore_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		mcore_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	mcore_function_arg_advance
+#undef  TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY	mcore_function_arg_boundary
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS	mcore_setup_incoming_varargs
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	mcore_asm_trampoline_template
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		mcore_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mcore_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE mcore_option_optimization_table
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Adjust the stack and return the number of bytes taken to do it.  */
+static void
+output_stack_adjust (int direction, int size)
+{
+  /* If extending stack a lot, we do it incrementally.  */
+  if (direction < 0 && size > mcore_stack_increment && mcore_stack_increment > 0)
+    {
+      rtx tmp = gen_rtx_REG (SImode, 1);
+      rtx memref;
+
+      emit_insn (gen_movsi (tmp, GEN_INT (mcore_stack_increment)));
+      do
+	{
+	  emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+	  memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
+	  MEM_VOLATILE_P (memref) = 1;
+	  emit_insn (gen_movsi (memref, stack_pointer_rtx));
+	  size -= mcore_stack_increment;
+	}
+      while (size > mcore_stack_increment);
+
+      /* SIZE is now the residual for the last adjustment,
+	 which doesn't require a probe.  */
+    }
+
+  if (size)
+    {
+      rtx insn;
+      rtx val = GEN_INT (size);
+
+      if (size > 32)
+	{
+	  rtx nval = gen_rtx_REG (SImode, 1);
+	  emit_insn (gen_movsi (nval, val));
+	  val = nval;
+	}
+      
+      if (direction > 0)
+	insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
+      else
+	insn = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
+      
+      emit_insn (insn);
+    }
+}
+
+/* Work out the registers which need to be saved,
+   both as a mask and a count.  */
+
+static int
+calc_live_regs (int * count)
+{
+  int reg;
+  int live_regs_mask = 0;
+  
+  * count = 0;
+
+  for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
+    {
+      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
+	{
+	  (*count)++;
+	  live_regs_mask |= (1 << reg);
+	}
+    }
+
+  return live_regs_mask;
+}
+
+/* Print the operand address in x to the stream.  */
+
+static void
+mcore_print_operand_address (FILE * stream, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fprintf (stream, "(%s)", reg_names[REGNO (x)]);
+      break;
+      
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx index = XEXP (x, 1);
+
+	if (GET_CODE (base) != REG)
+	  {
+	    /* Ensure that BASE is a register (one of them must be).  */
+	    rtx temp = base;
+	    base = index;
+	    index = temp;
+	  }
+
+	switch (GET_CODE (index))
+	  {
+	  case CONST_INT:
+	    fprintf (stream, "(%s," HOST_WIDE_INT_PRINT_DEC ")",
+		     reg_names[REGNO(base)], INTVAL (index));
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+      break;
+
+    default:
+      output_addr_const (stream, x);
+      break;
+    }
+}
+
+static bool
+mcore_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '*' || code == '^'
+	  || code == '!');
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+   according to modifier code.
+
+   'R'  print the next register or memory location along, i.e. the lsw in
+        a double word value
+   'O'  print a constant without the #
+   'M'  print a constant as its negative
+   'P'  print log2 of a power of two
+   'Q'  print log2 of an inverse of a power of two
+   'U'  print register for ldm/stm instruction
+   'X'  print byte number for xtrbN instruction.  */
+
+static void
+mcore_print_operand (FILE * stream, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'N':
+      if (INTVAL(x) == -1)
+	fprintf (asm_out_file, "32");
+      else
+	fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) + 1));
+      break;
+    case 'P':
+      fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) & 0xffffffff));
+      break;
+    case 'Q':
+      fprintf (asm_out_file, "%d", exact_log2 (~INTVAL (x)));
+      break;
+    case 'O':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+    case 'M':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, - INTVAL (x));
+      break;
+    case 'R':
+      /* Next location along in memory or register.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], (stream));
+	  break;
+	case MEM:
+	  mcore_print_operand_address
+	    (stream, XEXP (adjust_address (x, SImode, 4), 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'U':
+      fprintf (asm_out_file, "%s-%s", reg_names[REGNO (x)],
+	       reg_names[REGNO (x) + 3]);
+      break;
+    case 'x':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      break;
+    case 'X':
+      fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 3 - INTVAL (x) / 8);
+      break;
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x)], (stream));
+	  break;
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  break;
+	default:
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+/* What does a constant cost ?  */
+
+static int
+mcore_const_costs (rtx exp, enum rtx_code code)
+{
+  HOST_WIDE_INT val = INTVAL (exp);
+
+  /* Easy constants.  */
+  if (   CONST_OK_FOR_I (val)	
+      || CONST_OK_FOR_M (val)	
+      || CONST_OK_FOR_N (val)	
+      || (code == PLUS && CONST_OK_FOR_L (val)))
+    return 1;					
+  else if (code == AND
+	   && (   CONST_OK_FOR_M (~val)
+	       || CONST_OK_FOR_N (~val)))
+    return 2;
+  else if (code == PLUS			
+	   && (   CONST_OK_FOR_I (-val)	
+	       || CONST_OK_FOR_M (-val)	
+	       || CONST_OK_FOR_N (-val)))	
+    return 2;						
+
+  return 5;					
+}
+
+/* What does an and instruction cost - we do this b/c immediates may 
+   have been relaxed.   We want to ensure that cse will cse relaxed immeds
+   out.  Otherwise we'll get bad code (multiple reloads of the same const).  */
+
+static int
+mcore_and_cost (rtx x)
+{
+  HOST_WIDE_INT val;
+
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return 2;
+
+  val = INTVAL (XEXP (x, 1));
+   
+  /* Do it directly.  */
+  if (CONST_OK_FOR_K (val) || CONST_OK_FOR_M (~val))
+    return 2;
+  /* Takes one instruction to load.  */
+  else if (const_ok_for_mcore (val))
+    return 3;
+  /* Takes two instructions to load.  */
+  else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
+    return 4;
+
+  /* Takes a lrw to load.  */
+  return 5;
+}
+
+/* What does an or cost - see and_cost().  */
+
+static int
+mcore_ior_cost (rtx x)
+{
+  HOST_WIDE_INT val;
+
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return 2;
+
+  val = INTVAL (XEXP (x, 1));
+
+  /* Do it directly with bclri.  */
+  if (CONST_OK_FOR_M (val))
+    return 2;
+  /* Takes one instruction to load.  */
+  else if (const_ok_for_mcore (val))
+    return 3;
+  /* Takes two instructions to load.  */
+  else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
+    return 4;
+  
+  /* Takes a lrw to load.  */
+  return 5;
+}
+
+static bool
+mcore_rtx_costs (rtx x, int code, int outer_code, int * total,
+		 bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      *total = mcore_const_costs (x, (enum rtx_code) outer_code);
+      return true;
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 5;
+      return true;
+    case CONST_DOUBLE:
+      *total = 10;
+      return true;
+
+    case AND:
+      *total = COSTS_N_INSNS (mcore_and_cost (x));
+      return true;
+
+    case IOR:
+      *total = COSTS_N_INSNS (mcore_ior_cost (x));
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+    case FLOAT:
+    case FIX:
+      *total = COSTS_N_INSNS (100);
+      return true;
+  
+    default:
+      return false;
+    }
+}
+
+/* Prepare the operands for a comparison.  Return whether the branch/setcc
+   should reverse the operands.  */
+
+bool
+mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx cc_reg = gen_rtx_REG (CCmode, CC_REG);
+  bool invert;
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (op1);
+      
+      switch (code)
+	{
+	case GTU:
+	  /* Unsigned > 0 is the same as != 0; everything else is converted
+	     below to LEU (reversed cmphs).  */
+	  if (val == 0)
+	    code = NE;
+	  break;
+
+        /* Check whether (LE A imm) can become (LT A imm + 1),
+	   or (GT A imm) can become (GE A imm + 1).  */
+	case GT:
+	case LE:
+	  if (CONST_OK_FOR_J (val + 1))
+	    {
+	      op1 = GEN_INT (val + 1);
+	      code = code == LE ? LT : GE;
+	    }
+	  break;
+	  
+	default:
+	  break;
+	}
+    }
+ 
+  if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT)
+    op1 = force_reg (SImode, op1);
+
+  /* cmpnei: 0-31 (K immediate)
+     cmplti: 1-32 (J immediate, 0 using btsti x,31).  */
+  invert = false;
+  switch (code)
+    {
+    case EQ:	/* Use inverted condition, cmpne.  */
+      code = NE;
+      invert = true;
+      /* Drop through.  */
+      
+    case NE:	/* Use normal condition, cmpne.  */
+      if (GET_CODE (op1) == CONST_INT && ! CONST_OK_FOR_K (INTVAL (op1)))
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case LE:	/* Use inverted condition, reversed cmplt.  */
+      code = GT;
+      invert = true;
+      /* Drop through.  */
+      
+    case GT:	/* Use normal condition, reversed cmplt.  */
+      if (GET_CODE (op1) == CONST_INT)
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case GE:	/* Use inverted condition, cmplt.  */
+      code = LT;
+      invert = true;
+      /* Drop through.  */
+      
+    case LT:	/* Use normal condition, cmplt.  */
+      if (GET_CODE (op1) == CONST_INT && 
+	  /* covered by btsti x,31.  */
+	  INTVAL (op1) != 0 &&
+	  ! CONST_OK_FOR_J (INTVAL (op1)))
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case GTU:	/* Use inverted condition, cmple.  */
+      /* We coped with unsigned > 0 above.  */
+      gcc_assert (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0);
+      code = LEU;
+      invert = true;
+      /* Drop through.  */
+      
+    case LEU:	/* Use normal condition, reversed cmphs.  */
+      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case LTU:	/* Use inverted condition, cmphs.  */
+      code = GEU;
+      invert = true;
+      /* Drop through.  */
+      
+    case GEU:	/* Use normal condition, cmphs.  */
+      if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
+	op1 = force_reg (SImode, op1);
+      break;
+
+    default:
+      break;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  cc_reg,
+			  gen_rtx_fmt_ee (code, CCmode, op0, op1)));
+  return invert;
+}
+
+int
+mcore_symbolic_address_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      x = XEXP (x, 0);
+      return (   (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (x, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (x, 1)) == CONST_INT);
+    default:
+      return 0;
+    }
+}
+
+/* Functions to output assembly code for a function call.  */
+
+char *
+mcore_output_call (rtx operands[], int index)
+{
+  static char buffer[20];
+  rtx addr = operands [index];
+  
+  if (REG_P (addr))
+    {
+      if (TARGET_CG_DATA)
+	{
+	  gcc_assert (mcore_current_function_name);
+	  
+	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
+			      "unknown", 1);
+	}
+
+      sprintf (buffer, "jsr\t%%%d", index);
+    }
+  else
+    {
+      if (TARGET_CG_DATA)
+	{
+	  gcc_assert (mcore_current_function_name);
+	  gcc_assert (GET_CODE (addr) == SYMBOL_REF);
+	  
+	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
+			      XSTR (addr, 0), 0);
+	}
+      
+      sprintf (buffer, "jbsr\t%%%d", index);
+    }
+
+  return buffer;
+}
+
+/* Can we load a constant with a single instruction ?  */
+
+int
+const_ok_for_mcore (HOST_WIDE_INT value)
+{
+  if (value >= 0 && value <= 127)
+    return 1;
+  
+  /* Try exact power of two.  */
+  if (CONST_OK_FOR_M (value))
+    return 1;
+  
+  /* Try exact power of two - 1.  */
+  if (CONST_OK_FOR_N (value) && value != -1)
+    return 1;
+  
+  return 0;
+}
+
+/* Can we load a constant inline with up to 2 instructions ?  */
+
+int
+mcore_const_ok_for_inline (HOST_WIDE_INT value)
+{
+  HOST_WIDE_INT x, y;
+   
+  return try_constant_tricks (value, & x, & y) > 0;
+}
+
+/* Are we loading the constant using a not ?  */
+
+int
+mcore_const_trick_uses_not (HOST_WIDE_INT value)
+{
+  HOST_WIDE_INT x, y;
+
+  return try_constant_tricks (value, & x, & y) == 2; 
+}       
+
+/* Try tricks to load a constant inline and return the trick number if
+   success (0 is non-inlinable).
+  
+   0: not inlinable
+   1: single instruction (do the usual thing)
+   2: single insn followed by a 'not'
+   3: single insn followed by a subi
+   4: single insn followed by an addi
+   5: single insn followed by rsubi
+   6: single insn followed by bseti
+   7: single insn followed by bclri
+   8: single insn followed by rotli
+   9: single insn followed by lsli
+   10: single insn followed by ixh
+   11: single insn followed by ixw.  */
+
+static int
+try_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT * x, HOST_WIDE_INT * y)
+{
+  HOST_WIDE_INT i;
+  unsigned HOST_WIDE_INT bit, shf, rot;
+
+  if (const_ok_for_mcore (value))
+    return 1;	/* Do the usual thing.  */
+  
+  if (! TARGET_HARDLIT) 
+    return 0;
+
+  if (const_ok_for_mcore (~value))
+    {
+      *x = ~value;
+      return 2;
+    }
+
+  for (i = 1; i <= 32; i++)
+    {
+      if (const_ok_for_mcore (value - i))
+	{
+	  *x = value - i;
+	  *y = i;
+
+	  return 3;
+	}
+
+      if (const_ok_for_mcore (value + i))
+	{
+	  *x = value + i;
+	  *y = i;
+
+	  return 4;
+	}
+    }
+
+  bit = 0x80000000ULL;
+
+  for (i = 0; i <= 31; i++)
+    {
+      if (const_ok_for_mcore (i - value))
+	{
+	  *x = i - value;
+	  *y = i;
+
+	  return 5;
+	}
+
+      if (const_ok_for_mcore (value & ~bit))
+	{
+	  *y = bit;
+	  *x = value & ~bit;
+	  return 6;
+	}
+
+      if (const_ok_for_mcore (value | bit))
+	{
+	  *y = ~bit;
+	  *x = value | bit;
+
+	  return 7;
+	}
+
+      bit >>= 1;
+    }
+
+  shf = value;
+  rot = value;
+
+  for (i = 1; i < 31; i++)
+    {
+      int c;
+
+      /* MCore has rotate left.  */
+      c = rot << 31;
+      rot >>= 1;
+      rot &= 0x7FFFFFFF;
+      rot |= c;   /* Simulate rotate.  */
+
+      if (const_ok_for_mcore (rot))
+	{
+	  *y = i;
+	  *x = rot;
+
+	  return 8;
+	}
+
+      if (shf & 1)
+	shf = 0;	/* Can't use logical shift, low order bit is one.  */
+
+      shf >>= 1;
+
+      if (shf != 0 && const_ok_for_mcore (shf))
+	{
+	  *y = i;
+	  *x = shf;
+
+	  return 9;
+	}
+    }
+
+  if ((value % 3) == 0 && const_ok_for_mcore (value / 3))
+    {
+      *x = value / 3;
+
+      return 10;
+    }
+
+  if ((value % 5) == 0 && const_ok_for_mcore (value / 5))
+    {
+      *x = value / 5;
+
+      return 11;
+    }
+  
+  return 0;
+}
+
+/* Check whether reg is dead at first.  This is done by searching ahead
+   for either the next use (i.e., reg is live), a death note, or a set of
+   reg.  Don't just use dead_or_set_p() since reload does not always mark 
+   deaths (especially if PRESERVE_DEATH_NOTES_REGNO_P is not defined). We
+   can ignore subregs by extracting the actual register.  BRC  */
+
+int
+mcore_is_dead (rtx first, rtx reg)
+{
+  rtx insn;
+
+  /* For mcore, subregs can't live independently of their parent regs.  */
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  /* Dies immediately.  */
+  if (dead_or_set_p (first, reg))
+    return 1;
+
+  /* Look for conclusive evidence of live/death, otherwise we have
+     to assume that it is live.  */
+  for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == JUMP_INSN)
+	return 0;	/* We lose track, assume it is alive.  */
+
+      else if (GET_CODE(insn) == CALL_INSN)
+	{
+	  /* Call's might use it for target or register parms.  */
+	  if (reg_referenced_p (reg, PATTERN (insn))
+	      || find_reg_fusage (insn, USE, reg))
+	    return 0;
+	  else if (dead_or_set_p (insn, reg))
+            return 1;
+	}
+      else if (GET_CODE (insn) == INSN)
+	{
+	  if (reg_referenced_p (reg, PATTERN (insn)))
+            return 0;
+	  else if (dead_or_set_p (insn, reg))
+            return 1;
+	}
+    }
+
+  /* No conclusive evidence either way, we cannot take the chance
+     that control flow hid the use from us -- "I'm not dead yet".  */
+  return 0;
+}
+
+/* Count the number of ones in mask.  */
+
+int
+mcore_num_ones (HOST_WIDE_INT mask)
+{
+  /* A trick to count set bits recently posted on comp.compilers.  */
+  mask =  (mask >> 1  & 0x55555555) + (mask & 0x55555555);
+  mask = ((mask >> 2) & 0x33333333) + (mask & 0x33333333);
+  mask = ((mask >> 4) + mask) & 0x0f0f0f0f;
+  mask = ((mask >> 8) + mask);
+
+  return (mask + (mask >> 16)) & 0xff;
+}
+
+/* Count the number of zeros in mask.  */
+
+int
+mcore_num_zeros (HOST_WIDE_INT mask)
+{
+  return 32 - mcore_num_ones (mask);
+}
+
+/* Determine byte being masked.  */
+
+int
+mcore_byte_offset (unsigned int mask)
+{
+  if (mask == 0x00ffffffL)
+    return 0;
+  else if (mask == 0xff00ffffL)
+    return 1;
+  else if (mask == 0xffff00ffL)
+    return 2;
+  else if (mask == 0xffffff00L)
+    return 3;
+
+  return -1;
+}
+
+/* Determine halfword being masked.  */
+
+int
+mcore_halfword_offset (unsigned int mask)
+{
+  if (mask == 0x0000ffffL)
+    return 0;
+  else if (mask == 0xffff0000L)
+    return 1;
+
+  return -1;
+}
+
+/* Output a series of bseti's corresponding to mask.  */
+
+const char *
+mcore_output_bseti (rtx dst, int mask)
+{
+  rtx out_operands[2];
+  int bit;
+
+  out_operands[0] = dst;
+
+  for (bit = 0; bit < 32; bit++)
+    {
+      if ((mask & 0x1) == 0x1)
+	{
+	  out_operands[1] = GEN_INT (bit);
+	  
+	  output_asm_insn ("bseti\t%0,%1", out_operands);
+	}
+      mask >>= 1;
+    }  
+
+  return "";
+}
+
+/* Output a series of bclri's corresponding to mask.  */
+
+const char *
+mcore_output_bclri (rtx dst, int mask)
+{
+  rtx out_operands[2];
+  int bit;
+
+  out_operands[0] = dst;
+
+  for (bit = 0; bit < 32; bit++)
+    {
+      if ((mask & 0x1) == 0x0)
+	{
+	  out_operands[1] = GEN_INT (bit);
+	  
+	  output_asm_insn ("bclri\t%0,%1", out_operands);
+	}
+      
+      mask >>= 1;
+    }  
+
+  return "";
+}
+
+/* Output a conditional move of two constants that are +/- 1 within each
+   other.  See the "movtK" patterns in mcore.md.   I'm not sure this is
+   really worth the effort.  */
+
+const char *
+mcore_output_cmov (rtx operands[], int cmp_t, const char * test)
+{
+  HOST_WIDE_INT load_value;
+  HOST_WIDE_INT adjust_value;
+  rtx out_operands[4];
+
+  out_operands[0] = operands[0];
+
+  /* Check to see which constant is loadable.  */
+  if (const_ok_for_mcore (INTVAL (operands[1])))
+    {
+      out_operands[1] = operands[1];
+      out_operands[2] = operands[2];
+    }
+  else if (const_ok_for_mcore (INTVAL (operands[2])))
+    {
+      out_operands[1] = operands[2];
+      out_operands[2] = operands[1];
+
+      /* Complement test since constants are swapped.  */
+      cmp_t = (cmp_t == 0);
+    }
+  load_value   = INTVAL (out_operands[1]);
+  adjust_value = INTVAL (out_operands[2]);
+
+  /* First output the test if folded into the pattern.  */
+
+  if (test) 
+    output_asm_insn (test, operands);
+
+  /* Load the constant - for now, only support constants that can be
+     generated with a single instruction.  maybe add general inlinable
+     constants later (this will increase the # of patterns since the
+     instruction sequence has a different length attribute).  */
+  if (load_value >= 0 && load_value <= 127)
+    output_asm_insn ("movi\t%0,%1", out_operands);
+  else if (CONST_OK_FOR_M (load_value))
+    output_asm_insn ("bgeni\t%0,%P1", out_operands);
+  else if (CONST_OK_FOR_N (load_value))
+    output_asm_insn ("bmaski\t%0,%N1", out_operands);
+   
+  /* Output the constant adjustment.  */
+  if (load_value > adjust_value)
+    {
+      if (cmp_t)
+	output_asm_insn ("decf\t%0", out_operands);
+      else
+	output_asm_insn ("dect\t%0", out_operands);
+    }
+  else
+    {
+      if (cmp_t)
+	output_asm_insn ("incf\t%0", out_operands);
+      else
+	output_asm_insn ("inct\t%0", out_operands);
+    }
+
+  return "";
+}
+
+/* Outputs the peephole for moving a constant that gets not'ed followed 
+   by an and (i.e. combine the not and the and into andn). BRC  */
+
+const char *
+mcore_output_andn (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
+{
+  HOST_WIDE_INT x, y;
+  rtx out_operands[3];
+  const char * load_op;
+  char buf[256];
+  int trick_no;
+
+  trick_no = try_constant_tricks (INTVAL (operands[1]), &x, &y);
+  gcc_assert (trick_no == 2);
+
+  out_operands[0] = operands[0];
+  out_operands[1] = GEN_INT (x);
+  out_operands[2] = operands[2];
+
+  if (x >= 0 && x <= 127)
+    load_op = "movi\t%0,%1";
+  
+  /* Try exact power of two.  */
+  else if (CONST_OK_FOR_M (x))
+    load_op = "bgeni\t%0,%P1";
+  
+  /* Try exact power of two - 1.  */
+  else if (CONST_OK_FOR_N (x))
+    load_op = "bmaski\t%0,%N1";
+  
+  else
+    {
+      load_op = "BADMOVI-andn\t%0, %1";
+      gcc_unreachable ();
+    }
+
+  sprintf (buf, "%s\n\tandn\t%%2,%%0", load_op);
+  output_asm_insn (buf, out_operands);
+
+  return "";
+}
+
+/* Output an inline constant.  */
+
+static const char *
+output_inline_const (enum machine_mode mode, rtx operands[])
+{
+  HOST_WIDE_INT x = 0, y = 0;
+  int trick_no;
+  rtx out_operands[3];
+  char buf[256];
+  char load_op[256];
+  const char *dst_fmt;
+  HOST_WIDE_INT value;
+
+  value = INTVAL (operands[1]);
+
+  trick_no = try_constant_tricks (value, &x, &y);
+  /* lrw's are handled separately: Large inlinable constants never get
+     turned into lrw's.  Our caller uses try_constant_tricks to back
+     off to an lrw rather than calling this routine.  */
+  gcc_assert (trick_no != 0);
+  
+  if (trick_no == 1)
+    x = value;
+
+  /* operands: 0 = dst, 1 = load immed., 2 = immed. adjustment.  */
+  out_operands[0] = operands[0];
+  out_operands[1] = GEN_INT (x);
+  
+  if (trick_no > 2)
+    out_operands[2] = GEN_INT (y);
+
+  /* Select dst format based on mode.  */
+  if (mode == DImode && (! TARGET_LITTLE_END))
+    dst_fmt = "%R0";
+  else
+    dst_fmt = "%0";
+
+  if (x >= 0 && x <= 127)
+    sprintf (load_op, "movi\t%s,%%1", dst_fmt);
+  
+  /* Try exact power of two.  */
+  else if (CONST_OK_FOR_M (x))
+    sprintf (load_op, "bgeni\t%s,%%P1", dst_fmt);
+  
+  /* Try exact power of two - 1.  */
+  else if (CONST_OK_FOR_N (x))
+    sprintf (load_op, "bmaski\t%s,%%N1", dst_fmt);
+  
+  else
+    {
+      sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt);
+      gcc_unreachable ();
+    }      
+
+  switch (trick_no)
+    {
+    case 1:
+      strcpy (buf, load_op);
+      break;
+    case 2:   /* not */
+      sprintf (buf, "%s\n\tnot\t%s\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 3:   /* add */
+      sprintf (buf, "%s\n\taddi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 4:   /* sub */
+      sprintf (buf, "%s\n\tsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 5:   /* rsub */
+      /* Never happens unless -mrsubi, see try_constant_tricks().  */
+      sprintf (buf, "%s\n\trsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 6:   /* bseti */
+      sprintf (buf, "%s\n\tbseti\t%s,%%P2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 7:   /* bclr */
+      sprintf (buf, "%s\n\tbclri\t%s,%%Q2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 8:   /* rotl */
+      sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 9:   /* lsl */
+      sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value);
+      break;
+    case 10:  /* ixh */
+      sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value);
+      break;
+    case 11:  /* ixw */
+      sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value);
+      break;
+    default:
+      return "";
+    }
+  
+  output_asm_insn (buf, out_operands);
+
+  return "";
+}
+
+/* Output a move of a word or less value.  */
+
+const char *
+mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+		   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (GET_CODE (dst) == REG)
+    {
+      if (GET_CODE (src) == REG)
+	{               
+	  if (REGNO (src) == CC_REG)            /* r-c */
+            return "mvc\t%0"; 
+	  else 
+            return "mov\t%0,%1";                /* r-r*/
+	}
+      else if (GET_CODE (src) == MEM)
+	{
+	  if (GET_CODE (XEXP (src, 0)) == LABEL_REF) 
+            return "lrw\t%0,[%1]";              /* a-R */
+	  else
+	    switch (GET_MODE (src))		/* r-m */
+	      {
+	      case SImode:
+		return "ldw\t%0,%1";
+	      case HImode:
+		return "ld.h\t%0,%1";
+	      case QImode:
+		return "ld.b\t%0,%1";
+	      default:
+		gcc_unreachable ();
+	      }
+	}
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  HOST_WIDE_INT x, y;
+	  
+	  if (CONST_OK_FOR_I (INTVAL (src)))       /* r-I */
+            return "movi\t%0,%1";
+	  else if (CONST_OK_FOR_M (INTVAL (src)))  /* r-M */
+            return "bgeni\t%0,%P1\t// %1 %x1";
+	  else if (CONST_OK_FOR_N (INTVAL (src)))  /* r-N */
+            return "bmaski\t%0,%N1\t// %1 %x1";
+	  else if (try_constant_tricks (INTVAL (src), &x, &y))     /* R-P */
+            return output_inline_const (SImode, operands);  /* 1-2 insns */
+	  else 
+            return "lrw\t%0,%x1\t// %1";	/* Get it from literal pool.  */
+	}
+      else
+	return "lrw\t%0, %1";                /* Into the literal pool.  */
+    }
+  else if (GET_CODE (dst) == MEM)               /* m-r */
+    switch (GET_MODE (dst))
+      {
+      case SImode:
+	return "stw\t%1,%0";
+      case HImode:
+	return "st.h\t%1,%0";
+      case QImode:
+	return "st.b\t%1,%0";
+      default:
+	gcc_unreachable ();
+      }
+
+  gcc_unreachable ();
+}
+
+/* Return a sequence of instructions to perform DI or DF move.
+   Since the MCORE cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+
+const char *
+mcore_output_movedouble (rtx operands[], enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (GET_CODE (dst) == REG)
+    {
+      if (GET_CODE (src) == REG)
+	{
+	  int dstreg = REGNO (dst);
+	  int srcreg = REGNO (src);
+	  
+	  /* Ensure the second source not overwritten.  */
+	  if (srcreg + 1 == dstreg)
+	    return "mov	%R0,%R1\n\tmov	%0,%1";
+	  else
+	    return "mov	%0,%1\n\tmov	%R0,%R1";
+	}
+      else if (GET_CODE (src) == MEM)
+	{
+	  rtx memexp = memexp = XEXP (src, 0);
+	  int dstreg = REGNO (dst);
+	  int basereg = -1;
+	  
+	  if (GET_CODE (memexp) == LABEL_REF)
+	    return "lrw\t%0,[%1]\n\tlrw\t%R0,[%R1]";
+	  else if (GET_CODE (memexp) == REG) 
+	    basereg = REGNO (memexp);
+	  else if (GET_CODE (memexp) == PLUS)
+	    {
+	      if (GET_CODE (XEXP (memexp, 0)) == REG)
+		basereg = REGNO (XEXP (memexp, 0));
+	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
+		basereg = REGNO (XEXP (memexp, 1));
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    gcc_unreachable ();
+
+          /* ??? length attribute is wrong here.  */
+	  if (dstreg == basereg)
+	    {
+	      /* Just load them in reverse order.  */
+	      return "ldw\t%R0,%R1\n\tldw\t%0,%1";
+	      
+	      /* XXX: alternative: move basereg to basereg+1
+	         and then fall through.  */
+	    }
+	  else
+	    return "ldw\t%0,%1\n\tldw\t%R0,%R1";
+	}
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  if (TARGET_LITTLE_END)
+	    {
+	      if (CONST_OK_FOR_I (INTVAL (src)))
+		output_asm_insn ("movi	%0,%1", operands);
+	      else if (CONST_OK_FOR_M (INTVAL (src)))
+		output_asm_insn ("bgeni	%0,%P1", operands);
+	      else if (CONST_OK_FOR_N (INTVAL (src)))
+		output_asm_insn ("bmaski	%0,%N1", operands);
+	      else
+		gcc_unreachable ();
+
+	      if (INTVAL (src) < 0)
+		return "bmaski	%R0,32";
+	      else
+		return "movi	%R0,0";
+	    }
+	  else
+	    {
+	      if (CONST_OK_FOR_I (INTVAL (src)))
+		output_asm_insn ("movi	%R0,%1", operands);
+	      else if (CONST_OK_FOR_M (INTVAL (src)))
+		output_asm_insn ("bgeni	%R0,%P1", operands);
+	      else if (CONST_OK_FOR_N (INTVAL (src)))
+		output_asm_insn ("bmaski	%R0,%N1", operands);
+	      else
+		gcc_unreachable ();
+
+	      if (INTVAL (src) < 0)
+		return "bmaski	%0,32";
+	      else
+		return "movi	%0,0";
+	    }
+	}
+      else
+	gcc_unreachable ();
+    }
+  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
+    return "stw\t%1,%0\n\tstw\t%R1,%R0";
+  else
+    gcc_unreachable ();
+}
+
+/* Predicates used by the templates.  */
+
+int
+mcore_arith_S_operand (rtx op)
+{
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (~INTVAL (op)))
+    return 1;
+  
+  return 0;
+}
+
+/* Expand insert bit field.  BRC  */
+
+int
+mcore_expand_insv (rtx operands[])
+{
+  int width = INTVAL (operands[1]);
+  int posn = INTVAL (operands[2]);
+  int mask;
+  rtx mreg, sreg, ereg;
+
+  /* To get width 1 insv, the test in store_bit_field() (expmed.c, line 191)
+     for width==1 must be removed.  Look around line 368.  This is something
+     we really want the md part to do.  */
+  if (width == 1 && GET_CODE (operands[3]) == CONST_INT)
+    {
+      /* Do directly with bseti or bclri.  */
+      /* RBE: 2/97 consider only low bit of constant.  */
+      if ((INTVAL (operands[3]) & 1) == 0)
+	{
+	  mask = ~(1 << posn);
+	  emit_insn (gen_rtx_SET (SImode, operands[0],
+			      gen_rtx_AND (SImode, operands[0], GEN_INT (mask))));
+	}
+      else
+	{
+	  mask = 1 << posn;
+	  emit_insn (gen_rtx_SET (SImode, operands[0],
+			    gen_rtx_IOR (SImode, operands[0], GEN_INT (mask))));
+	}
+      
+      return 1;
+    }
+
+  /* Look at some bit-field placements that we aren't interested
+     in handling ourselves, unless specifically directed to do so.  */
+  if (! TARGET_W_FIELD)
+    return 0;		/* Generally, give up about now.  */
+
+  if (width == 8 && posn % 8 == 0)
+    /* Byte sized and aligned; let caller break it up.  */
+    return 0;
+  
+  if (width == 16 && posn % 16 == 0)
+    /* Short sized and aligned; let caller break it up.  */
+    return 0;
+
+  /* The general case - we can do this a little bit better than what the
+     machine independent part tries.  This will get rid of all the subregs
+     that mess up constant folding in combine when working with relaxed
+     immediates.  */
+
+  /* If setting the entire field, do it directly.  */
+  if (GET_CODE (operands[3]) == CONST_INT
+      && INTVAL (operands[3]) == ((1 << width) - 1))
+    {
+      mreg = force_reg (SImode, GEN_INT (INTVAL (operands[3]) << posn));
+      emit_insn (gen_rtx_SET (SImode, operands[0],
+                         gen_rtx_IOR (SImode, operands[0], mreg)));
+      return 1;
+    }
+
+  /* Generate the clear mask.  */
+  mreg = force_reg (SImode, GEN_INT (~(((1 << width) - 1) << posn)));
+
+  /* Clear the field, to overlay it later with the source.  */
+  emit_insn (gen_rtx_SET (SImode, operands[0], 
+		      gen_rtx_AND (SImode, operands[0], mreg)));
+
+  /* If the source is constant 0, we've nothing to add back.  */
+  if (GET_CODE (operands[3]) == CONST_INT && INTVAL (operands[3]) == 0)
+    return 1;
+
+  /* XXX: Should we worry about more games with constant values?
+     We've covered the high profile: set/clear single-bit and many-bit
+     fields. How often do we see "arbitrary bit pattern" constants?  */
+  sreg = copy_to_mode_reg (SImode, operands[3]);
+
+  /* Extract src as same width as dst (needed for signed values).  We
+     always have to do this since we widen everything to SImode.
+     We don't have to mask if we're shifting this up against the
+     MSB of the register (e.g., the shift will push out any hi-order
+     bits.  */
+  if (width + posn != (int) GET_MODE_SIZE (SImode))
+    {
+      ereg = force_reg (SImode, GEN_INT ((1 << width) - 1));      
+      emit_insn (gen_rtx_SET (SImode, sreg,
+                          gen_rtx_AND (SImode, sreg, ereg)));
+    }
+
+  /* Insert source value in dest.  */
+  if (posn != 0)
+    emit_insn (gen_rtx_SET (SImode, sreg,
+		        gen_rtx_ASHIFT (SImode, sreg, GEN_INT (posn))));
+  
+  emit_insn (gen_rtx_SET (SImode, operands[0],
+		      gen_rtx_IOR (SImode, operands[0], sreg)));
+
+  return 1;
+}
+
+/* ??? Block move stuff stolen from m88k.  This code has not been
+   verified for correctness.  */
+
+/* Emit code to perform a block move.  Choose the best method.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the size.
+   OPERANDS[3] is the alignment safe to use.  */
+
+/* Emit code to perform a block move with an offset sequence of ldw/st
+   instructions (..., ldw 0, stw 1, ldw 1, stw 0, ...).  SIZE and ALIGN are
+   known constants.  DEST and SRC are registers.  OFFSET is the known
+   starting point for the output pattern.  */
+
+static const enum machine_mode mode_from_align[] =
+{
+  VOIDmode, QImode, HImode, VOIDmode, SImode,
+};
+
+static void
+block_move_sequence (rtx dst_mem, rtx src_mem, int size, int align)
+{
+  rtx temp[2];
+  enum machine_mode mode[2];
+  int amount[2];
+  bool active[2];
+  int phase = 0;
+  int next;
+  int offset_ld = 0;
+  int offset_st = 0;
+  rtx x;
+
+  x = XEXP (dst_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      dst_mem = replace_equiv_address (dst_mem, x);
+    }
+
+  x = XEXP (src_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      src_mem = replace_equiv_address (src_mem, x);
+    }
+
+  active[0] = active[1] = false;
+
+  do
+    {
+      next = phase;
+      phase ^= 1;
+
+      if (size > 0)
+	{
+	  int next_amount;
+
+	  next_amount = (size >= 4 ? 4 : (size >= 2 ? 2 : 1));
+	  next_amount = MIN (next_amount, align);
+
+	  amount[next] = next_amount;
+	  mode[next] = mode_from_align[next_amount];
+	  temp[next] = gen_reg_rtx (mode[next]);
+
+	  x = adjust_address (src_mem, mode[next], offset_ld);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
+
+	  offset_ld += next_amount;
+	  size -= next_amount;
+	  active[next] = true;
+	}
+
+      if (active[phase])
+	{
+	  active[phase] = false;
+	  
+	  x = adjust_address (dst_mem, mode[phase], offset_st);
+	  emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
+
+	  offset_st += amount[phase];
+	}
+    }
+  while (active[next]);
+}
+
+bool
+mcore_expand_block_move (rtx *operands)
+{
+  HOST_WIDE_INT align, bytes, max;
+
+  if (GET_CODE (operands[2]) != CONST_INT)
+    return false;
+
+  bytes = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+
+  if (bytes <= 0)
+    return false;
+  if (align > 4)
+    align = 4;
+
+  switch (align)
+    {
+    case 4:
+      if (bytes & 1)
+	max = 4*4;
+      else if (bytes & 3)
+	max = 8*4;
+      else
+	max = 16*4;
+      break;
+    case 2:
+      max = 4*2;
+      break;
+    case 1:
+      max = 4*1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (bytes <= max)
+    {
+      block_move_sequence (operands[0], operands[1], bytes, align);
+      return true;
+    }
+
+  return false;
+}
+
+
+/* Code to generate prologue and epilogue sequences.  */
+static int number_of_regs_before_varargs;
+
+/* Set by TARGET_SETUP_INCOMING_VARARGS to indicate to prolog that this is
+   for a varargs function.  */
+static int current_function_anonymous_args;
+
+#define	STACK_BYTES (STACK_BOUNDARY/BITS_PER_UNIT)
+#define	STORE_REACH (64)	/* Maximum displace of word store + 4.  */
+#define	ADDI_REACH (32)		/* Maximum addi operand.  */
+
+static void
+layout_mcore_frame (struct mcore_frame * infp)
+{
+  int n;
+  unsigned int i;
+  int nbytes;
+  int regarg;
+  int localregarg;
+  int outbounds;
+  unsigned int growths;
+  int step;
+
+  /* Might have to spill bytes to re-assemble a big argument that
+     was passed partially in registers and partially on the stack.  */
+  nbytes = crtl->args.pretend_args_size;
+  
+  /* Determine how much space for spilled anonymous args (e.g., stdarg).  */
+  if (current_function_anonymous_args)
+    nbytes += (NPARM_REGS - number_of_regs_before_varargs) * UNITS_PER_WORD;
+  
+  infp->arg_size = nbytes;
+
+  /* How much space to save non-volatile registers we stomp.  */
+  infp->reg_mask = calc_live_regs (& n);
+  infp->reg_size = n * 4;
+
+  /* And the rest of it... locals and space for overflowed outbounds.  */
+  infp->local_size = get_frame_size ();
+  infp->outbound_size = crtl->outgoing_args_size;
+
+  /* Make sure we have a whole number of words for the locals.  */
+  if (infp->local_size % STACK_BYTES)
+    infp->local_size = (infp->local_size + STACK_BYTES - 1) & ~ (STACK_BYTES -1);
+  
+  /* Only thing we know we have to pad is the outbound space, since
+     we've aligned our locals assuming that base of locals is aligned.  */
+  infp->pad_local = 0;
+  infp->pad_reg = 0;
+  infp->pad_outbound = 0;
+  if (infp->outbound_size % STACK_BYTES)
+    infp->pad_outbound = STACK_BYTES - (infp->outbound_size % STACK_BYTES);
+
+  /* Now we see how we want to stage the prologue so that it does
+     the most appropriate stack growth and register saves to either:
+     (1) run fast,
+     (2) reduce instruction space, or
+     (3) reduce stack space.  */
+  for (i = 0; i < ARRAY_SIZE (infp->growth); i++)
+    infp->growth[i] = 0;
+
+  regarg      = infp->reg_size + infp->arg_size;
+  localregarg = infp->local_size + regarg;
+  outbounds   = infp->outbound_size + infp->pad_outbound;
+  growths     = 0;
+
+  /* XXX: Consider one where we consider localregarg + outbound too! */
+
+  /* Frame of <= 32 bytes and using stm would get <= 2 registers.
+     use stw's with offsets and buy the frame in one shot.  */
+  if (localregarg <= ADDI_REACH
+      && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000))
+    {
+      /* Make sure we'll be aligned.  */
+      if (localregarg % STACK_BYTES)
+	infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES);
+
+      step = localregarg + infp->pad_reg;
+      infp->reg_offset = infp->local_size;
+      
+      if (outbounds + step <= ADDI_REACH && !frame_pointer_needed)
+	{
+	  step += outbounds;
+	  infp->reg_offset += outbounds;
+	  outbounds = 0;
+	}
+      
+      infp->arg_offset = step - 4;
+      infp->growth[growths++] = step;
+      infp->reg_growth = growths;
+      infp->local_growth = growths;
+      
+      /* If we haven't already folded it in.  */
+      if (outbounds)
+	infp->growth[growths++] = outbounds;
+      
+      goto finish;
+    }
+
+  /* Frame can't be done with a single subi, but can be done with 2
+     insns.  If the 'stm' is getting <= 2 registers, we use stw's and
+     shift some of the stack purchase into the first subi, so both are
+     single instructions.  */
+  if (localregarg <= STORE_REACH
+      && (infp->local_size > ADDI_REACH)
+      && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000))
+    {
+      int all;
+
+      /* Make sure we'll be aligned; use either pad_reg or pad_local.  */
+      if (localregarg % STACK_BYTES)
+	infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES);
+
+      all = localregarg + infp->pad_reg + infp->pad_local;
+      step = ADDI_REACH;	/* As much up front as we can.  */
+      if (step > all)
+	step = all;
+      
+      /* XXX: Consider whether step will still be aligned; we believe so.  */
+      infp->arg_offset = step - 4;
+      infp->growth[growths++] = step;
+      infp->reg_growth = growths;
+      infp->reg_offset = step - infp->pad_reg - infp->reg_size;
+      all -= step;
+
+      /* Can we fold in any space required for outbounds?  */
+      if (outbounds + all <= ADDI_REACH && !frame_pointer_needed)
+	{
+	  all += outbounds;
+	  outbounds = 0;
+	}
+
+      /* Get the rest of the locals in place.  */
+      step = all;
+      infp->growth[growths++] = step;
+      infp->local_growth = growths;
+      all -= step;
+
+      gcc_assert (all == 0);
+
+      /* Finish off if we need to do so.  */
+      if (outbounds)
+	infp->growth[growths++] = outbounds;
+      
+      goto finish;
+    }
+
+  /* Registers + args is nicely aligned, so we'll buy that in one shot.
+     Then we buy the rest of the frame in 1 or 2 steps depending on
+     whether we need a frame pointer.  */
+  if ((regarg % STACK_BYTES) == 0)
+    {
+      infp->growth[growths++] = regarg;
+      infp->reg_growth = growths;
+      infp->arg_offset = regarg - 4;
+      infp->reg_offset = 0;
+
+      if (infp->local_size % STACK_BYTES)
+	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
+      
+      step = infp->local_size + infp->pad_local;
+      
+      if (!frame_pointer_needed)
+	{
+	  step += outbounds;
+	  outbounds = 0;
+	}
+      
+      infp->growth[growths++] = step;
+      infp->local_growth = growths;
+
+      /* If there's any left to be done.  */
+      if (outbounds)
+	infp->growth[growths++] = outbounds;
+      
+      goto finish;
+    }
+
+  /* XXX: optimizations that we'll want to play with....
+     -- regarg is not aligned, but it's a small number of registers;
+    	use some of localsize so that regarg is aligned and then 
+    	save the registers.  */
+
+  /* Simple encoding; plods down the stack buying the pieces as it goes.
+     -- does not optimize space consumption.
+     -- does not attempt to optimize instruction counts.
+     -- but it is safe for all alignments.  */
+  if (regarg % STACK_BYTES != 0)
+    infp->pad_reg = STACK_BYTES - (regarg % STACK_BYTES);
+  
+  infp->growth[growths++] = infp->arg_size + infp->reg_size + infp->pad_reg;
+  infp->reg_growth = growths;
+  infp->arg_offset = infp->growth[0] - 4;
+  infp->reg_offset = 0;
+  
+  if (frame_pointer_needed)
+    {
+      if (infp->local_size % STACK_BYTES != 0)
+	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
+      
+      infp->growth[growths++] = infp->local_size + infp->pad_local;
+      infp->local_growth = growths;
+      
+      infp->growth[growths++] = outbounds;
+    }
+  else
+    {
+      if ((infp->local_size + outbounds) % STACK_BYTES != 0)
+	infp->pad_local = STACK_BYTES - ((infp->local_size + outbounds) % STACK_BYTES);
+      
+      infp->growth[growths++] = infp->local_size + infp->pad_local + outbounds;
+      infp->local_growth = growths;
+    }
+
+  /* Anything else that we've forgotten?, plus a few consistency checks.  */
+ finish:
+  gcc_assert (infp->reg_offset >= 0);
+  gcc_assert (growths <= MAX_STACK_GROWS);
+  
+  for (i = 0; i < growths; i++)
+    gcc_assert (!(infp->growth[i] % STACK_BYTES));
+}
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+mcore_initial_elimination_offset (int from, int to)
+{
+  int above_frame;
+  int below_frame;
+  struct mcore_frame fi;
+
+  layout_mcore_frame (& fi);
+
+  /* fp to ap */
+  above_frame = fi.local_size + fi.pad_local + fi.reg_size + fi.pad_reg;
+  /* sp to fp */
+  below_frame = fi.outbound_size + fi.pad_outbound;
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return above_frame;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return above_frame + below_frame;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return below_frame;
+
+  gcc_unreachable ();
+}
+
+/* Keep track of some information about varargs for the prolog.  */
+
+static void
+mcore_setup_incoming_varargs (CUMULATIVE_ARGS *args_so_far,
+			      enum machine_mode mode, tree type,
+			      int * ptr_pretend_size ATTRIBUTE_UNUSED,
+			      int second_time ATTRIBUTE_UNUSED)
+{
+  current_function_anonymous_args = 1;
+
+  /* We need to know how many argument registers are used before
+     the varargs start, so that we can push the remaining argument
+     registers during the prologue.  */
+  number_of_regs_before_varargs = *args_so_far + mcore_num_arg_regs (mode, type);
+  
+  /* There is a bug somewhere in the arg handling code.
+     Until I can find it this workaround always pushes the
+     last named argument onto the stack.  */
+  number_of_regs_before_varargs = *args_so_far;
+  
+  /* The last named argument may be split between argument registers
+     and the stack.  Allow for this here.  */
+  if (number_of_regs_before_varargs > NPARM_REGS)
+    number_of_regs_before_varargs = NPARM_REGS;
+}
+
+void
+mcore_expand_prolog (void)
+{
+  struct mcore_frame fi;
+  int space_allocated = 0;
+  int growth = 0;
+
+  /* Find out what we're doing.  */
+  layout_mcore_frame (&fi);
+  
+  space_allocated = fi.arg_size + fi.reg_size + fi.local_size +
+    fi.outbound_size + fi.pad_outbound + fi.pad_local + fi.pad_reg;
+
+  if (TARGET_CG_DATA)
+    {
+      /* Emit a symbol for this routine's frame size.  */
+      rtx x;
+
+      x = DECL_RTL (current_function_decl);
+      
+      gcc_assert (GET_CODE (x) == MEM);
+      
+      x = XEXP (x, 0);
+      
+      gcc_assert (GET_CODE (x) == SYMBOL_REF);
+      
+      if (mcore_current_function_name)
+	free (mcore_current_function_name);
+      
+      mcore_current_function_name = xstrdup (XSTR (x, 0));
+      
+      ASM_OUTPUT_CG_NODE (asm_out_file, mcore_current_function_name, space_allocated);
+
+      if (cfun->calls_alloca)
+	ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, "alloca", 1);
+
+      /* 970425: RBE:
+         We're looking at how the 8byte alignment affects stack layout
+         and where we had to pad things. This emits information we can
+         extract which tells us about frame sizes and the like.  */
+      fprintf (asm_out_file,
+	       "\t.equ\t__$frame$info$_%s_$_%d_%d_x%x_%d_%d_%d,0\n",
+	       mcore_current_function_name,
+	       fi.arg_size, fi.reg_size, fi.reg_mask,
+	       fi.local_size, fi.outbound_size,
+	       frame_pointer_needed);
+    }
+
+  if (mcore_naked_function_p ())
+    return;
+  
+  /* Handle stdarg+regsaves in one shot: can't be more than 64 bytes.  */
+  output_stack_adjust (-1, fi.growth[growth++]);	/* Grows it.  */
+
+  /* If we have a parameter passed partially in regs and partially in memory,
+     the registers will have been stored to memory already in function.c.  So
+     we only need to do something here for varargs functions.  */
+  if (fi.arg_size != 0 && crtl->args.pretend_args_size == 0)
+    {
+      int offset;
+      int rn = FIRST_PARM_REG + NPARM_REGS - 1;
+      int remaining = fi.arg_size;
+
+      for (offset = fi.arg_offset; remaining >= 4; offset -= 4, rn--, remaining -= 4)
+        {
+          emit_insn (gen_movsi
+                     (gen_rtx_MEM (SImode,
+                               plus_constant (stack_pointer_rtx, offset)),
+                      gen_rtx_REG (SImode, rn)));
+        }
+    }
+
+  /* Do we need another stack adjustment before we do the register saves?  */
+  if (growth < fi.reg_growth)
+    output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
+
+  if (fi.reg_size != 0)
+    {
+      int i;
+      int offs = fi.reg_offset;
+      
+      for (i = 15; i >= 0; i--)
+        {
+          if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
+	    {
+	      int first_reg = 15;
+
+	      while (fi.reg_mask & (1 << first_reg))
+	        first_reg--;
+	      first_reg++;
+
+	      emit_insn (gen_store_multiple (gen_rtx_MEM (SImode, stack_pointer_rtx),
+					     gen_rtx_REG (SImode, first_reg),
+					     GEN_INT (16 - first_reg)));
+
+	      i -= (15 - first_reg);
+	      offs += (16 - first_reg) * 4;
+	    }
+          else if (fi.reg_mask & (1 << i))
+	    {
+	      emit_insn (gen_movsi
+		         (gen_rtx_MEM (SImode,
+			           plus_constant (stack_pointer_rtx, offs)),
+		          gen_rtx_REG (SImode, i)));
+	      offs += 4;
+	    }
+        }
+    }
+
+  /* Figure the locals + outbounds.  */
+  if (frame_pointer_needed)
+    {
+      /* If we haven't already purchased to 'fp'.  */
+      if (growth < fi.local_growth)
+        output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
+      
+      emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+
+      /* ... and then go any remaining distance for outbounds, etc.  */
+      if (fi.growth[growth])
+        output_stack_adjust (-1, fi.growth[growth++]);
+    }
+  else
+    {
+      if (growth < fi.local_growth)
+        output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
+      if (fi.growth[growth])
+        output_stack_adjust (-1, fi.growth[growth++]);
+    }
+}
+
+void
+mcore_expand_epilog (void)
+{
+  struct mcore_frame fi;
+  int i;
+  int offs;
+  int growth = MAX_STACK_GROWS - 1 ;
+
+    
+  /* Find out what we're doing.  */
+  layout_mcore_frame(&fi);
+
+  if (mcore_naked_function_p ())
+    return;
+
+  /* If we had a frame pointer, restore the sp from that.  */
+  if (frame_pointer_needed)
+    {
+      emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
+      growth = fi.local_growth - 1;
+    }
+  else
+    {
+      /* XXX: while loop should accumulate and do a single sell.  */
+      while (growth >= fi.local_growth)
+        {
+          if (fi.growth[growth] != 0)
+            output_stack_adjust (1, fi.growth[growth]);
+	  growth--;
+        }
+    }
+
+  /* Make sure we've shrunk stack back to the point where the registers
+     were laid down. This is typically 0/1 iterations.  Then pull the
+     register save information back off the stack.  */
+  while (growth >= fi.reg_growth)
+    output_stack_adjust ( 1, fi.growth[growth--]);
+  
+  offs = fi.reg_offset;
+  
+  for (i = 15; i >= 0; i--)
+    {
+      if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
+	{
+	  int first_reg;
+
+	  /* Find the starting register.  */
+	  first_reg = 15;
+	  
+	  while (fi.reg_mask & (1 << first_reg))
+	    first_reg--;
+	  
+	  first_reg++;
+
+	  emit_insn (gen_load_multiple (gen_rtx_REG (SImode, first_reg),
+					gen_rtx_MEM (SImode, stack_pointer_rtx),
+					GEN_INT (16 - first_reg)));
+
+	  i -= (15 - first_reg);
+	  offs += (16 - first_reg) * 4;
+	}
+      else if (fi.reg_mask & (1 << i))
+	{
+	  emit_insn (gen_movsi
+		     (gen_rtx_REG (SImode, i),
+		      gen_rtx_MEM (SImode,
+			       plus_constant (stack_pointer_rtx, offs))));
+	  offs += 4;
+	}
+    }
+
+  /* Give back anything else.  */
+  /* XXX: Should accumulate total and then give it back.  */
+  while (growth >= 0)
+    output_stack_adjust ( 1, fi.growth[growth--]);
+}
+
+/* This code is borrowed from the SH port.  */
+
+/* The MCORE cannot load a large constant into a register, constants have to
+   come from a pc relative load.  The reference of a pc relative load
+   instruction must be less than 1k in front of the instruction.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow things
+   down and make things bigger.
+
+   Worst case code looks like:
+
+   lrw   L1,r0
+   br    L2
+   align
+   L1:   .long value
+   L2:
+   ..
+
+   lrw   L3,r0
+   br    L4
+   align
+   L3:   .long value
+   L4:
+   ..
+
+   We fix this by performing a scan before scheduling, which notices which
+   instructions need to have their operands fetched from the constant table
+   and builds the table.
+
+   The algorithm is:
+
+   scan, find an instruction which needs a pcrel move.  Look forward, find the
+   last barrier which is within MAX_COUNT bytes of the requirement.
+   If there isn't one, make one.  Process all the instructions between
+   the find and the barrier.
+
+   In the above example, we can tell that L3 is within 1k of L1, so
+   the first move can be shrunk from the 2 insn+constant sequence into
+   just 1 insn, and the constant moved to L3 to make:
+
+   lrw          L1,r0
+   ..
+   lrw          L3,r0
+   bra          L4
+   align
+   L3:.long value
+   L4:.long value
+
+   Then the second move becomes the target for the shortening process.  */
+
+typedef struct
+{
+  rtx value;			/* Value in table.  */
+  rtx label;			/* Label of value.  */
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+   the pc relative range is 0...1020 bytes and constants are at least 4
+   bytes long.  We subtract 4 from the range to allow for the case where
+   we need to add a branch/align before the constant pool.  */
+
+#define MAX_COUNT 1016
+#define MAX_POOL_SIZE (MAX_COUNT/4)
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+
+/* Dump out any constants accumulated in the final pass.  These
+   will only be labels.  */
+
+const char *
+mcore_output_jump_label_table (void)
+{
+  int i;
+
+  if (pool_size)
+    {
+      fprintf (asm_out_file, "\t.align 2\n");
+      
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node * p = pool_vector + i;
+
+	  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (p->label));
+	  
+	  output_asm_insn (".long	%0", &p->value);
+	}
+      
+      pool_size = 0;
+    }
+
+  return "";
+}
+
+/* Check whether insn is a candidate for a conditional.  */
+
+static cond_type
+is_cond_candidate (rtx insn)
+{
+  /* The only things we conditionalize are those that can be directly
+     changed into a conditional.  Only bother with SImode items.  If 
+     we wanted to be a little more aggressive, we could also do other
+     modes such as DImode with reg-reg move or load 0.  */
+  if (GET_CODE (insn) == INSN)
+    {
+      rtx pat = PATTERN (insn);
+      rtx src, dst;
+
+      if (GET_CODE (pat) != SET)
+	return COND_NO;
+
+      dst = XEXP (pat, 0);
+
+      if ((GET_CODE (dst) != REG &&
+           GET_CODE (dst) != SUBREG) ||
+	  GET_MODE (dst) != SImode)
+	return COND_NO;
+  
+      src = XEXP (pat, 1);
+
+      if ((GET_CODE (src) == REG ||
+           (GET_CODE (src) == SUBREG &&
+	    GET_CODE (SUBREG_REG (src)) == REG)) &&
+	  GET_MODE (src) == SImode)
+	return COND_MOV_INSN;
+      else if (GET_CODE (src) == CONST_INT && 
+               INTVAL (src) == 0)
+	return COND_CLR_INSN;
+      else if (GET_CODE (src) == PLUS &&
+               (GET_CODE (XEXP (src, 0)) == REG ||
+                (GET_CODE (XEXP (src, 0)) == SUBREG &&
+                 GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) &&
+               GET_MODE (XEXP (src, 0)) == SImode &&
+               GET_CODE (XEXP (src, 1)) == CONST_INT &&
+               INTVAL (XEXP (src, 1)) == 1)
+	return COND_INC_INSN;
+      else if (((GET_CODE (src) == MINUS &&
+		 GET_CODE (XEXP (src, 1)) == CONST_INT &&
+		 INTVAL( XEXP (src, 1)) == 1) ||
+                (GET_CODE (src) == PLUS &&
+		 GET_CODE (XEXP (src, 1)) == CONST_INT &&
+		 INTVAL (XEXP (src, 1)) == -1)) &&
+               (GET_CODE (XEXP (src, 0)) == REG ||
+		(GET_CODE (XEXP (src, 0)) == SUBREG &&
+		 GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) &&
+               GET_MODE (XEXP (src, 0)) == SImode)
+	return COND_DEC_INSN;
+
+      /* Some insns that we don't bother with:
+	 (set (rx:DI) (ry:DI))
+	 (set (rx:DI) (const_int 0))
+      */            
+
+    }
+  else if (GET_CODE (insn) == JUMP_INSN &&
+	   GET_CODE (PATTERN (insn)) == SET &&
+	   GET_CODE (XEXP (PATTERN (insn), 1)) == LABEL_REF)
+    return COND_BRANCH_INSN;
+
+  return COND_NO;
+}
+
+/* Emit a conditional version of insn and replace the old insn with the
+   new one.  Return the new insn if emitted.  */
+
+static rtx
+emit_new_cond_insn (rtx insn, int cond)
+{
+  rtx c_insn = 0;
+  rtx pat, dst, src;
+  cond_type num;
+
+  if ((num = is_cond_candidate (insn)) == COND_NO)
+    return NULL;
+
+  pat = PATTERN (insn);
+
+  if (GET_CODE (insn) == INSN)
+    {
+      dst = SET_DEST (pat);
+      src = SET_SRC (pat);
+    }
+  else
+    {
+      dst = JUMP_LABEL (insn);
+      src = NULL_RTX;
+    }
+
+  switch (num)
+    {
+    case COND_MOV_INSN: 
+    case COND_CLR_INSN:
+      if (cond)
+	c_insn = gen_movt0 (dst, src, dst);
+      else
+	c_insn = gen_movt0 (dst, dst, src);
+      break;
+
+    case COND_INC_INSN:
+      if (cond)
+	c_insn = gen_incscc (dst, dst);
+      else
+	c_insn = gen_incscc_false (dst, dst);
+      break;
+  
+    case COND_DEC_INSN:
+      if (cond)
+	c_insn = gen_decscc (dst, dst);
+      else
+	c_insn = gen_decscc_false (dst, dst);
+      break;
+
+    case COND_BRANCH_INSN:
+      if (cond)
+	c_insn = gen_branch_true (dst);
+      else
+	c_insn = gen_branch_false (dst);
+      break;
+
+    default:
+      return NULL;
+    }
+
+  /* Only copy the notes if they exist.  */
+  if (rtx_length [GET_CODE (c_insn)] >= 7 && rtx_length [GET_CODE (insn)] >= 7)
+    {
+      /* We really don't need to bother with the notes and links at this
+	 point, but go ahead and save the notes.  This will help is_dead()
+	 when applying peepholes (links don't matter since they are not
+	 used any more beyond this point for the mcore).  */
+      REG_NOTES (c_insn) = REG_NOTES (insn);
+    }
+  
+  if (num == COND_BRANCH_INSN)
+    {
+      /* For jumps, we need to be a little bit careful and emit the new jump
+         before the old one and to update the use count for the target label.
+         This way, the barrier following the old (uncond) jump will get
+	 deleted, but the label won't.  */
+      c_insn = emit_jump_insn_before (c_insn, insn);
+      
+      ++ LABEL_NUSES (dst);
+      
+      JUMP_LABEL (c_insn) = dst;
+    }
+  else
+    c_insn = emit_insn_after (c_insn, insn);
+
+  delete_insn (insn);
+  
+  return c_insn;
+}
+
+/* Attempt to change a basic block into a series of conditional insns.  This
+   works by taking the branch at the end of the 1st block and scanning for the 
+   end of the 2nd block.  If all instructions in the 2nd block have cond.
+   versions and the label at the start of block 3 is the same as the target
+   from the branch at block 1, then conditionalize all insn in block 2 using
+   the inverse condition of the branch at block 1.  (Note I'm bending the
+   definition of basic block here.)
+
+   e.g., change:   
+
+		bt	L2             <-- end of block 1 (delete)
+		mov	r7,r8          
+		addu	r7,1           
+		br	L3             <-- end of block 2
+
+	L2:	...                    <-- start of block 3 (NUSES==1)
+	L3:	...
+
+   to:
+
+		movf	r7,r8
+		incf	r7
+		bf	L3
+
+	L3:	...
+
+   we can delete the L2 label if NUSES==1 and re-apply the optimization
+   starting at the last instruction of block 2.  This may allow an entire
+   if-then-else statement to be conditionalized.  BRC  */
+static rtx
+conditionalize_block (rtx first)
+{
+  rtx insn;
+  rtx br_pat;
+  rtx end_blk_1_br = 0;
+  rtx end_blk_2_insn = 0;
+  rtx start_blk_3_lab = 0;
+  int cond;
+  int br_lab_num;
+  int blk_size = 0;
+
+    
+  /* Check that the first insn is a candidate conditional jump.  This is
+     the one that we'll eliminate.  If not, advance to the next insn to
+     try.  */
+  if (GET_CODE (first) != JUMP_INSN ||
+      GET_CODE (PATTERN (first)) != SET ||
+      GET_CODE (XEXP (PATTERN (first), 1)) != IF_THEN_ELSE)
+    return NEXT_INSN (first);
+
+  /* Extract some information we need.  */
+  end_blk_1_br = first;
+  br_pat = PATTERN (end_blk_1_br);
+
+  /* Complement the condition since we use the reverse cond. for the insns.  */
+  cond = (GET_CODE (XEXP (XEXP (br_pat, 1), 0)) == EQ);
+
+  /* Determine what kind of branch we have.  */
+  if (GET_CODE (XEXP (XEXP (br_pat, 1), 1)) == LABEL_REF)
+    {
+      /* A normal branch, so extract label out of first arm.  */
+      br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 1), 0));
+    }
+  else
+    {
+      /* An inverse branch, so extract the label out of the 2nd arm
+	 and complement the condition.  */
+      cond = (cond == 0);
+      br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 2), 0));
+    }
+
+  /* Scan forward for the start of block 2: it must start with a
+     label and that label must be the same as the branch target
+     label from block 1.  We don't care about whether block 2 actually
+     ends with a branch or a label (an uncond. branch is 
+     conditionalizable).  */
+  for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
+    {
+      enum rtx_code code;
+      
+      code = GET_CODE (insn);
+
+      /* Look for the label at the start of block 3.  */
+      if (code == CODE_LABEL && CODE_LABEL_NUMBER (insn) == br_lab_num)
+	break;
+
+      /* Skip barriers, notes, and conditionalizable insns.  If the
+         insn is not conditionalizable or makes this optimization fail,
+         just return the next insn so we can start over from that point.  */
+      if (code != BARRIER && code != NOTE && !is_cond_candidate (insn))
+	return NEXT_INSN (insn);
+     
+      /* Remember the last real insn before the label (i.e. end of block 2).  */
+      if (code == JUMP_INSN || code == INSN)
+	{
+	  blk_size ++;
+	  end_blk_2_insn = insn;
+	}
+    }
+
+  if (!insn)
+    return insn;
+ 
+  /* It is possible for this optimization to slow performance if the blocks 
+     are long.  This really depends upon whether the branch is likely taken 
+     or not.  If the branch is taken, we slow performance in many cases.  But,
+     if the branch is not taken, we always help performance (for a single 
+     block, but for a double block (i.e. when the optimization is re-applied) 
+     this is not true since the 'right thing' depends on the overall length of
+     the collapsed block).  As a compromise, don't apply this optimization on 
+     blocks larger than size 2 (unlikely for the mcore) when speed is important.
+     the best threshold depends on the latencies of the instructions (i.e., 
+     the branch penalty).  */
+  if (optimize > 1 && blk_size > 2)
+    return insn;
+
+  /* At this point, we've found the start of block 3 and we know that
+     it is the destination of the branch from block 1.   Also, all
+     instructions in the block 2 are conditionalizable.  So, apply the
+     conditionalization and delete the branch.  */
+  start_blk_3_lab = insn;   
+   
+  for (insn = NEXT_INSN (end_blk_1_br); insn != start_blk_3_lab; 
+       insn = NEXT_INSN (insn))
+    {
+      rtx newinsn;
+
+      if (INSN_DELETED_P (insn))
+	continue;
+      
+      /* Try to form a conditional variant of the instruction and emit it.  */
+      if ((newinsn = emit_new_cond_insn (insn, cond)))
+	{
+	  if (end_blk_2_insn == insn)
+            end_blk_2_insn = newinsn;
+
+	  insn = newinsn;
+	}
+    }
+
+  /* Note whether we will delete the label starting blk 3 when the jump
+     gets deleted.  If so, we want to re-apply this optimization at the 
+     last real instruction right before the label.  */
+  if (LABEL_NUSES (start_blk_3_lab) == 1)
+    {
+      start_blk_3_lab = 0;
+    }
+
+  /* ??? we probably should redistribute the death notes for this insn, esp.
+     the death of cc, but it doesn't really matter this late in the game.
+     The peepholes all use is_dead() which will find the correct death
+     regardless of whether there is a note.  */
+  delete_insn (end_blk_1_br);
+
+  if (! start_blk_3_lab)
+    return end_blk_2_insn;
+  
+  /* Return the insn right after the label at the start of block 3.  */
+  return NEXT_INSN (start_blk_3_lab);
+}
+
+/* Apply the conditionalization of blocks optimization.  This is the
+   outer loop that traverses through the insns scanning for a branch
+   that signifies an opportunity to apply the optimization.  Note that
+   this optimization is applied late.  If we could apply it earlier,
+   say before cse 2, it may expose more optimization opportunities.  
+   but, the pay back probably isn't really worth the effort (we'd have 
+   to update all reg/flow/notes/links/etc to make it work - and stick it
+   in before cse 2).  */
+
+static void
+conditionalize_optimization (void)
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = conditionalize_block (insn))
+    continue;
+}
+
+static int saved_warn_return_type = -1;
+static int saved_warn_return_type_count = 0;
+
+/* This is to handle loads from the constant pool.  */
+
+static void
+mcore_reorg (void)
+{
+  /* Reset this variable.  */
+  current_function_anonymous_args = 0;
+  
+  /* Restore the warn_return_type if it has been altered.  */
+  if (saved_warn_return_type != -1)
+    {
+      /* Only restore the value if we have reached another function.
+	 The test of warn_return_type occurs in final_function () in
+	 c-decl.c a long time after the code for the function is generated,
+	 so we need a counter to tell us when we have finished parsing that
+	 function and can restore the flag.  */
+      if (--saved_warn_return_type_count == 0)
+	{
+	  warn_return_type = saved_warn_return_type;
+	  saved_warn_return_type = -1;
+	}
+    }
+  
+  if (optimize == 0)
+    return;
+  
+  /* Conditionalize blocks where we can.  */
+  conditionalize_optimization ();
+
+  /* Literal pool generation is now pushed off until the assembler.  */
+}
+
+
+/* Return true if X is something that can be moved directly into r15.  */
+
+bool
+mcore_r15_operand_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      return mcore_const_ok_for_inline (INTVAL (x));
+
+    case REG:
+    case SUBREG:
+    case MEM:
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+/* Implement SECONDARY_RELOAD_CLASS.  If RCLASS contains r15, and we can't
+   directly move X into it, use r1-r14 as a temporary.  */
+
+enum reg_class
+mcore_secondary_reload_class (enum reg_class rclass,
+			      enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  if (TEST_HARD_REG_BIT (reg_class_contents[rclass], 15)
+      && !mcore_r15_operand_p (x))
+    return LRW_REGS;
+  return NO_REGS;
+}
+
+/* Return the reg_class to use when reloading the rtx X into the class
+   RCLASS.  If X is too complex to move directly into r15, prefer to
+   use LRW_REGS instead.  */
+
+enum reg_class
+mcore_reload_class (rtx x, enum reg_class rclass)
+{
+  if (reg_class_subset_p (LRW_REGS, rclass) && !mcore_r15_operand_p (x))
+    return LRW_REGS;
+
+  return rclass;
+}
+
+/* Tell me if a pair of reg/subreg rtx's actually refer to the same
+   register.  Note that the current version doesn't worry about whether
+   they are the same mode or note (e.g., a QImode in r2 matches an HImode
+   in r2 matches an SImode in r2. Might think in the future about whether
+   we want to be able to say something about modes.  */
+
+int
+mcore_is_same_reg (rtx x, rtx y)
+{
+  /* Strip any and all of the subreg wrappers.  */
+  while (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+  
+  while (GET_CODE (y) == SUBREG)
+    y = SUBREG_REG (y);
+
+  if (GET_CODE(x) == REG && GET_CODE(y) == REG && REGNO(x) == REGNO(y))
+    return 1;
+
+  return 0;
+}
+
+static void
+mcore_option_override (void)
+{
+  /* Only the m340 supports little endian code.  */
+  if (TARGET_LITTLE_END && ! TARGET_M340)
+    target_flags |= MASK_M340;
+}
+
+
+/* Compute the number of word sized registers needed to 
+   hold a function argument of mode MODE and type TYPE.  */
+
+int
+mcore_num_arg_regs (enum machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  if (type && mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return ROUND_ADVANCE (size);
+}
+
+static rtx
+handle_structs_in_regs (enum machine_mode mode, const_tree type, int reg)
+{
+  int size;
+
+  /* The MCore ABI defines that a structure whose size is not a whole multiple
+     of bytes is passed packed into registers (or spilled onto the stack if
+     not enough registers are available) with the last few bytes of the
+     structure being packed, left-justified, into the last register/stack slot.
+     GCC handles this correctly if the last word is in a stack slot, but we
+     have to generate a special, PARALLEL RTX if the last word is in an
+     argument register.  */
+  if (type
+      && TYPE_MODE (type) == BLKmode
+      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+      && (size = int_size_in_bytes (type)) > UNITS_PER_WORD
+      && (size % UNITS_PER_WORD != 0)
+      && (reg + mcore_num_arg_regs (mode, type) <= (FIRST_PARM_REG + NPARM_REGS)))
+    {
+      rtx    arg_regs [NPARM_REGS]; 
+      int    nregs;
+      rtx    result;
+      rtvec  rtvec;
+		     
+      for (nregs = 0; size > 0; size -= UNITS_PER_WORD)
+        {
+          arg_regs [nregs] =
+	    gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, reg ++),
+		  	       GEN_INT (nregs * UNITS_PER_WORD));
+	  nregs ++;
+        }
+
+      /* We assume here that NPARM_REGS == 6.  The assert checks this.  */
+      gcc_assert (ARRAY_SIZE (arg_regs) == 6);
+      rtvec = gen_rtvec (nregs, arg_regs[0], arg_regs[1], arg_regs[2],
+			  arg_regs[3], arg_regs[4], arg_regs[5]);
+      
+      result = gen_rtx_PARALLEL (mode, rtvec);
+      return result;
+    }
+  
+  return gen_rtx_REG (mode, reg);
+}
+
+rtx
+mcore_function_value (const_tree valtype, const_tree func)
+{
+  enum machine_mode mode;
+  int unsigned_p;
+  
+  mode = TYPE_MODE (valtype);
+
+  /* Since we promote return types, we must promote the mode here too.  */
+  mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1);
+  
+  return handle_structs_in_regs (mode, valtype, FIRST_RET_REG);
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On MCore the first args are normally in registers
+   and the rest are pushed.  Any arg that starts within the first
+   NPARM_REGS words is at least partially passed in a register unless
+   its data type forbids.  */
+
+static rtx
+mcore_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  int arg_reg;
+  
+  if (! named || mode == VOIDmode)
+    return 0;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+
+  arg_reg = ROUND_REG (*cum, mode);
+  
+  if (arg_reg < NPARM_REGS)
+    return handle_structs_in_regs (mode, type, FIRST_PARM_REG + arg_reg);
+
+  return 0;
+}
+
+static void
+mcore_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum = (ROUND_REG (*cum, mode)
+	  + (int)named * mcore_num_arg_regs (mode, type));
+}
+
+static unsigned int
+mcore_function_arg_boundary (enum machine_mode mode,
+			     const_tree type ATTRIBUTE_UNUSED)
+{
+  /* Doubles must be aligned to an 8 byte boundary.  */
+  return (mode != BLKmode && GET_MODE_SIZE (mode) == 8
+	  ? BIGGEST_ALIGNMENT
+	  : PARM_BOUNDARY);
+}
+
+/* Returns the number of bytes of argument registers required to hold *part*
+   of a parameter of machine mode MODE and type TYPE (which may be NULL if
+   the type is not known).  If the argument fits entirely in the argument
+   registers, or entirely on the stack, then 0 is returned.  CUM is the
+   number of argument registers already used by earlier parameters to
+   the function.  */
+
+static int
+mcore_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 tree type, bool named)
+{
+  int reg = ROUND_REG (*cum, mode);
+
+  if (named == 0)
+    return 0;
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return 0;
+      
+  /* REG is not the *hardware* register number of the register that holds
+     the argument, it is the *argument* register number.  So for example,
+     the first argument to a function goes in argument register 0, which
+     translates (for the MCore) into hardware register 2.  The second
+     argument goes into argument register 1, which translates into hardware
+     register 3, and so on.  NPARM_REGS is the number of argument registers
+     supported by the target, not the maximum hardware register number of
+     the target.  */
+  if (reg >= NPARM_REGS)
+    return 0;
+
+  /* If the argument fits entirely in registers, return 0.  */
+  if (reg + mcore_num_arg_regs (mode, type) <= NPARM_REGS)
+    return 0;
+
+  /* The argument overflows the number of available argument registers.
+     Compute how many argument registers have not yet been assigned to
+     hold an argument.  */
+  reg = NPARM_REGS - reg;
+
+  /* Return partially in registers and partially on the stack.  */
+  return reg * UNITS_PER_WORD;
+}
+
+/* Return nonzero if SYMBOL is marked as being dllexport'd.  */
+
+int
+mcore_dllexport_name_p (const char * symbol)
+{
+  return symbol[0] == '@' && symbol[1] == 'e' && symbol[2] == '.';
+}
+
+/* Return nonzero if SYMBOL is marked as being dllimport'd.  */
+
+int
+mcore_dllimport_name_p (const char * symbol)
+{
+  return symbol[0] == '@' && symbol[1] == 'i' && symbol[2] == '.';
+}
+
+/* Mark a DECL as being dllexport'd.  */
+
+static void
+mcore_mark_dllexport (tree decl)
+{
+  const char * oldname;
+  char * newname;
+  rtx    rtlname;
+  tree   idp;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  
+  if (GET_CODE (rtlname) == MEM)
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+  
+  if (mcore_dllexport_name_p (oldname))
+    return;  /* Already done.  */
+
+  newname = XALLOCAVEC (char, strlen (oldname) + 4);
+  sprintf (newname, "@e.%s", oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  /* ??? At least I think that's why we do this.  */
+  idp = get_identifier (newname);
+
+  XEXP (DECL_RTL (decl), 0) =
+    gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+}
+
+/* Mark a DECL as being dllimport'd.  */
+
+static void
+mcore_mark_dllimport (tree decl)
+{
+  const char * oldname;
+  char * newname;
+  tree   idp;
+  rtx    rtlname;
+  rtx    newrtl;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  
+  if (GET_CODE (rtlname) == MEM)
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+  
+  gcc_assert (!mcore_dllexport_name_p (oldname));
+  if (mcore_dllimport_name_p (oldname))
+    return; /* Already done.  */
+
+  /* ??? One can well ask why we're making these checks here,
+     and that would be a good question.  */
+
+  /* Imported variables can't be initialized.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && !DECL_VIRTUAL_P (decl)
+      && DECL_INITIAL (decl))
+    {
+      error ("initialized variable %q+D is marked dllimport", decl);
+      return;
+    }
+  
+  /* `extern' needn't be specified with dllimport.
+     Specify `extern' now and hope for the best.  Sigh.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      /* ??? Is this test for vtables needed?  */
+      && !DECL_VIRTUAL_P (decl))
+    {
+      DECL_EXTERNAL (decl) = 1;
+      TREE_PUBLIC (decl) = 1;
+    }
+
+  newname = XALLOCAVEC (char, strlen (oldname) + 11);
+  sprintf (newname, "@i.__imp_%s", oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  /* ??? At least I think that's why we do this.  */
+  idp = get_identifier (newname);
+
+  newrtl = gen_rtx_MEM (Pmode,
+		    gen_rtx_SYMBOL_REF (Pmode,
+			     IDENTIFIER_POINTER (idp)));
+  XEXP (DECL_RTL (decl), 0) = newrtl;
+}
+
+static int
+mcore_dllexport_p (tree decl)
+{
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+
+  return lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)) != 0;
+}
+
+static int
+mcore_dllimport_p (tree decl)
+{
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return 0;
+
+  return lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)) != 0;
+}
+
+/* We must mark dll symbols specially.  Definitions of dllexport'd objects
+   install some info in the .drective (PE) or .exports (ELF) sections.  */
+
+static void
+mcore_encode_section_info (tree decl, rtx rtl ATTRIBUTE_UNUSED, int first ATTRIBUTE_UNUSED)
+{
+  /* Mark the decl so we can tell from the rtl whether the object is
+     dllexport'd or dllimport'd.  */
+  if (mcore_dllexport_p (decl))
+    mcore_mark_dllexport (decl);
+  else if (mcore_dllimport_p (decl))
+    mcore_mark_dllimport (decl);
+  
+  /* It might be that DECL has already been marked as dllimport, but
+     a subsequent definition nullified that.  The attribute is gone
+     but DECL_RTL still has @i.__imp_foo.  We need to remove that.  */
+  else if ((TREE_CODE (decl) == FUNCTION_DECL
+	    || TREE_CODE (decl) == VAR_DECL)
+	   && DECL_RTL (decl) != NULL_RTX
+	   && GET_CODE (DECL_RTL (decl)) == MEM
+	   && GET_CODE (XEXP (DECL_RTL (decl), 0)) == MEM
+	   && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF
+	   && mcore_dllimport_name_p (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0)))
+    {
+      const char * oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0);
+      tree idp = get_identifier (oldname + 9);
+      rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+
+      XEXP (DECL_RTL (decl), 0) = newrtl;
+
+      /* We previously set TREE_PUBLIC and DECL_EXTERNAL.
+	 ??? We leave these alone for now.  */
+    }
+}
+
+/* Undo the effects of the above.  */
+
+static const char *
+mcore_strip_name_encoding (const char * str)
+{
+  return str + (str[0] == '@' ? 3 : 0);
+}
+
+/* MCore specific attribute support.
+   dllexport - for exporting a function/variable that will live in a dll
+   dllimport - for importing a function/variable from a dll
+   naked     - do not create a function prologue/epilogue.  */
+
+/* Handle a "naked" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+mcore_handle_naked_attribute (tree * node, tree name, tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) == FUNCTION_DECL)
+    {
+      /* PR14310 - don't complain about lack of return statement
+	 in naked functions.  The solution here is a gross hack
+	 but this is the only way to solve the problem without
+	 adding a new feature to GCC.  I did try submitting a patch
+	 that would add such a new feature, but it was (rightfully)
+	 rejected on the grounds that it was creeping featurism,
+	 so hence this code.  */
+      if (warn_return_type)
+	{
+	  saved_warn_return_type = warn_return_type;
+	  warn_return_type = 0;
+	  saved_warn_return_type_count = 2;
+	}
+      else if (saved_warn_return_type_count)
+	saved_warn_return_type_count = 2;
+    }
+  else
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* ??? It looks like this is PE specific?  Oh well, this is what the
+   old code did as well.  */
+
+static void
+mcore_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
+{
+  int len;
+  const char * name;
+  char * string;
+  const char * prefix;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  
+  /* Strip off any encoding in name.  */
+  name = (* targetm.strip_name_encoding) (name);
+
+  /* The object is put in, for example, section .text$foo.
+     The linker will then ultimately place them in .text
+     (everything from the $ on is stripped).  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    prefix = ".text$";
+  /* For compatibility with EPOC, we ignore the fact that the
+     section might have relocs against it.  */
+  else if (decl_readonly_section (decl, 0))
+    prefix = ".rdata$";
+  else
+    prefix = ".data$";
+  
+  len = strlen (name) + strlen (prefix);
+  string = XALLOCAVEC (char, len + 1);
+  
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+int
+mcore_naked_function_p (void)
+{
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE;
+}
+
+#ifdef OBJECT_FORMAT_ELF
+static void
+mcore_asm_named_section (const char *name, 
+			 unsigned int flags ATTRIBUTE_UNUSED,
+			 tree decl ATTRIBUTE_UNUSED)
+{
+  fprintf (asm_out_file, "\t.section %s\n", name);
+}
+#endif /* OBJECT_FORMAT_ELF */
+
+/* Worker function for TARGET_ASM_EXTERNAL_LIBCALL.  */
+
+static void
+mcore_external_libcall (rtx fun)
+{
+  fprintf (asm_out_file, "\t.import\t");
+  assemble_name (asm_out_file, XSTR (fun, 0));
+  fprintf (asm_out_file, "\n");
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+mcore_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > 2 * UNITS_PER_WORD);
+}
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.
+   Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+
+   On the MCore, the trampoline looks like:
+   	lrw	r1,  function
+     	lrw	r13, area
+   	jmp	r13
+   	or	r0, r0
+    .literals                                                */
+
+static void
+mcore_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.short	0x7102\n");
+  fprintf (f, "\t.short	0x7d02\n");
+  fprintf (f, "\t.short	0x00cd\n");
+  fprintf (f, "\t.short	0x1e00\n");
+  fprintf (f, "\t.long	0\n");
+  fprintf (f, "\t.long	0\n");
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+mcore_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, fnaddr);
+}
diff --git a/gcc/config/mcore/mcore.h b/gcc/config/mcore/mcore.h
new file mode 100644
index 000000000..54c12ef3d
--- /dev/null
+++ b/gcc/config/mcore/mcore.h
@@ -0,0 +1,851 @@
+/* Definitions of target machine for GNU compiler,
+   for Motorola M*CORE Processor.
+   Copyright (C) 1993, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007,
+   2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MCORE_H
+#define GCC_MCORE_H
+
+/* RBE: need to move these elsewhere.  */
+#undef	LIKE_PPC_ABI 
+#define	MCORE_STRUCT_ARGS
+/* RBE: end of "move elsewhere".  */
+
+/* Run-time Target Specification.  */
+#define TARGET_MCORE
+
+/* Get tree.c to declare a target-specific specialization of
+   merge_decl_attributes.  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+#define TARGET_CPU_CPP_BUILTINS()					  \
+  do									  \
+    {									  \
+      builtin_define ("__mcore__");					  \
+      builtin_define ("__MCORE__");					  \
+      if (TARGET_LITTLE_END)						  \
+        builtin_define ("__MCORELE__");					  \
+      else								  \
+        builtin_define ("__MCOREBE__");					  \
+      if (TARGET_M340)							  \
+        builtin_define ("__M340__");					  \
+      else								  \
+        builtin_define ("__M210__");					  \
+    }									  \
+  while (0)
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{m210:%{mlittle-endian:%ethe m210 does not have little endian support}}"
+
+/* We don't have a -lg library, so don't put it in the list.  */
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!shared: %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+#undef	ASM_SPEC
+#define	ASM_SPEC "%{mbig-endian:-EB} %{m210:-cpu=210 -EB}"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mbig-endian:-EB} %{m210:-EB} -X"
+
+#define TARGET_DEFAULT	\
+  (MASK_HARDLIT		\
+   | MASK_DIV		\
+   | MASK_RELAX_IMM	\
+   | MASK_M340		\
+   | MASK_LITTLE_END)
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mlittle-endian", "m340" }
+#endif
+
+/* The ability to have 4 byte alignment is being suppressed for now.
+   If this ability is reenabled, you must disable the definition below
+   *and* edit t-mcore to enable multilibs for 4 byte alignment code.  */
+#undef TARGET_8ALIGN
+#define TARGET_8ALIGN 1
+
+extern char * mcore_current_function_name;
+ 
+/* The MCore ABI says that bitfields are unsigned by default.  */
+#define CC1_SPEC "-funsigned-bitfields"
+
+/* Target machine storage Layout.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT         \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    {						\
+      (MODE) = SImode;				\
+      (UNSIGNEDP) = 1;				\
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (! TARGET_LITTLE_END)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (! TARGET_LITTLE_END)
+
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD	4
+
+/* A C expression for the size in bits of the type `long long' on the
+   target machine.  If you don't define this, the default is two
+   words.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY  	32
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY  (TARGET_8ALIGN ? 64 : 32)
+
+/* Largest increment in UNITS we allow the stack to grow in a single operation.  */
+#define STACK_UNITS_MAXSTEP  4096
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY  ((TARGET_OVERALIGN_FUNC) ? 32 : 16)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT  (TARGET_8ALIGN ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Every structures size must be a multiple of 8 bits.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Look at the fundamental type that is used for a bit-field and use 
+   that to impose alignment on the enclosing structure.
+   struct s {int a:8}; should have same alignment as "int", not "char".  */
+#define	PCC_BITFIELD_TYPE_MATTERS	1
+
+/* Largest integer machine mode for structures.  If undefined, the default
+   is GET_MODE_SIZE(DImode).  */
+#define MAX_FIXED_MODE_SIZE 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+     
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Standard register usage.  */
+
+/* Register allocation for our first guess 
+
+	r0		stack pointer
+	r1		scratch, target reg for xtrb?
+	r2-r7		arguments.
+	r8-r14		call saved
+	r15		link register
+	ap		arg pointer (doesn't really exist, always eliminated)
+	c               c bit
+	fp		frame pointer (doesn't really exist, always eliminated)
+	x19		two control registers.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   MCore has 16 integer registers and 2 control registers + the arg
+   pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 20
+
+#define R1_REG  1	/* Where literals are forced.  */
+#define LK_REG	15	/* Overloaded on general register.  */
+#define AP_REG  16	/* Fake arg pointer register.  */
+/* RBE: mcore.md depends on CC_REG being set to 17.  */
+#define CC_REG	17	/* Can't name it C_REG.  */
+#define FP_REG  18	/* Fake frame pointer register.  */
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+
+#undef PC_REGNUM /* Define this if the program counter is overloaded on a register.  */
+#define STACK_POINTER_REGNUM 0 /* Register to use for pushing function arguments.  */
+#define FRAME_POINTER_REGNUM 8 /* When we need FP, use r8.  */
+
+/* The assembler's names for the registers.  RFP need not always be used as
+   the Real framepointer; it can also be used as a normal general register.
+   Note that the name `fp' is horribly misleading since `fp' is in fact only
+   the argument-and-return-context pointer.  */
+#define REGISTER_NAMES  				\
+{				                   	\
+  "sp", "r1", "r2",  "r3",  "r4",  "r5",  "r6",  "r7", 	\
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",	\
+  "apvirtual",  "c", "fpvirtual", "x19" \
+}
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS  \
+ /*  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 r11 r12 r13 r14 r15 ap  c  fp x19 */ \
+   { 1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1, 1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+/* RBE: r15 {link register} not available across calls,
+   But we don't mark it that way here....  */
+#define CALL_USED_REGISTERS \
+ /*  r0  r1  r2  r3  r4  r5  r6  r7  r8  r9  r10 r11 r12 r13 r14 r15 ap  c   fp x19 */ \
+   { 1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1, 1}
+
+/* The order in which register should be allocated.  */
+#define REG_ALLOC_ORDER  \
+ /* r7  r6  r5  r4  r3  r2  r15 r14 r13 r12 r11 r10  r9  r8  r1  r0  ap  c   fp x19*/ \
+  {  7,  6,  5,  4,  3,  2,  15, 14, 13, 12, 11, 10,  9,  8,  1,  0, 16, 17, 18, 19}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the MCore regs are UNITS_PER_WORD bits wide; */
+#define HARD_REGNO_NREGS(REGNO, MODE)  \
+   (((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   We may keep double values in even registers.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)  \
+  ((TARGET_8ALIGN && GET_MODE_SIZE (MODE) > UNITS_PER_WORD) ? (((REGNO) & 1) == 0) : (REGNO < 18))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2) || GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the MCore.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.  */
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	16
+
+/* Register in which the static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	1
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+#define ELIMINABLE_REGS				\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM},}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = mcore_initial_elimination_offset (FROM, TO)
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The MCore has only general registers. There are
+   also some special purpose registers: the T bit register, the
+   procedure Link and the Count Registers.  */
+enum reg_class
+{
+  NO_REGS,
+  ONLYR1_REGS,
+  LRW_REGS,
+  GENERAL_REGS,
+  C_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+#define IRA_COVER_CLASSES		\
+{					\
+  GENERAL_REGS, C_REGS, LIM_REG_CLASSES	\
+}
+
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES  \
+{			\
+  "NO_REGS",		\
+  "ONLYR1_REGS",	\
+  "LRW_REGS",		\
+  "GENERAL_REGS",	\
+  "C_REGS",		\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+/* ??? STACK_POINTER_REGNUM should be excluded from LRW_REGS.  */
+#define REG_CLASS_CONTENTS      	\
+{					\
+  {0x000000},  /* NO_REGS       */	\
+  {0x000002},  /* ONLYR1_REGS   */	\
+  {0x007FFE},  /* LRW_REGS      */	\
+  {0x01FFFF},  /* GENERAL_REGS  */	\
+  {0x020000},  /* C_REGS        */	\
+  {0x0FFFFF}   /* ALL_REGS      */	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class[REGNO]
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+ 
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS  NO_REGS
+#define BASE_REG_CLASS	 GENERAL_REGS
+
+/* Convenience wrappers around insn_const_int_ok_for_constraint.  */
+#define CONST_OK_FOR_I(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_I)
+#define CONST_OK_FOR_J(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_J)
+#define CONST_OK_FOR_L(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_L)
+#define CONST_OK_FOR_K(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_K)
+#define CONST_OK_FOR_M(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_M)
+#define CONST_OK_FOR_N(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_N)
+#define CONST_OK_FOR_O(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_O)
+#define CONST_OK_FOR_P(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_P)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+#define PREFERRED_RELOAD_CLASS(X, CLASS) mcore_reload_class (X, CLASS)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \
+  mcore_secondary_reload_class (CLASS, MODE, X)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS. 
+
+   On MCore this is the size of MODE in words.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)  \
+     (ROUND_ADVANCE (GET_MODE_SIZE (MODE)))
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define the number of register that can hold parameters.
+   These two macros are used only in other macro definitions below.  */
+#define NPARM_REGS 6
+#define FIRST_PARM_REG 2
+#define FIRST_RET_REG 2
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD  
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If defined, the maximum amount of space required for outgoing arguments
+   will be computed and placed into the variable
+   `crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC)  mcore_function_value (VALTYPE, FUNC)
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+   we want to retain compatibility with older gcc versions.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, FIRST_RET_REG)
+
+/* 1 if N is a possible register number for a function value.
+   On the MCore, only r4 can return results.  */
+#define FUNCTION_VALUE_REGNO_P(REGNO)  ((REGNO) == FIRST_RET_REG)
+
+/* 1 if N is a possible register number for function argument passing.  */
+#define FUNCTION_ARG_REGNO_P(REGNO)  \
+  ((REGNO) >= FIRST_PARM_REG && (REGNO) < (NPARM_REGS + FIRST_PARM_REG))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On MCore, this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus NARGREGS or more means all following args should go on the stack.  */
+#define CUMULATIVE_ARGS  int
+
+#define ROUND_ADVANCE(SIZE)	\
+  ((SIZE + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode 
+   MODE. 
+   
+   We round to an even reg for things larger than a word.  */
+#define ROUND_REG(X, MODE) 				\
+  ((TARGET_8ALIGN 					\
+   && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD) 	\
+   ? ((X) + ((X) & 1)) : (X))
+
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On MCore, the offset always starts at 0: the first parm reg is always
+   the same reg.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  ((CUM) = 0)
+
+/* Call the function profiler with a given profile label.  */
+#define FUNCTION_PROFILER(STREAM,LABELNO)		\
+{							\
+  fprintf (STREAM, "	trap	1\n");			\
+  fprintf (STREAM, "	.align	2\n");			\
+  fprintf (STREAM, "	.long	LP%d\n", (LABELNO));	\
+}
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 0
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  12
+
+/* Alignment required for a trampoline in bits.  */
+#define TRAMPOLINE_ALIGNMENT  32
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+#define REGNO_OK_FOR_BASE_P(REGNO)  \
+  ((REGNO) < AP_REG || (unsigned) reg_renumber[(REGNO)] < AP_REG)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)   0
+
+/* Maximum number of registers that can appear in a valid memory 
+   address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) 	 (GET_CODE (X) == LABEL_REF)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
+
+   On the MCore, allow anything but a double.  */
+#define LEGITIMATE_CONSTANT_P(X) (GET_CODE(X) != CONST_DOUBLE \
+				  && CONSTANT_P (X))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.  */
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+    	(REGNO (X) <= 16 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X)	0
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X)	\
+	REGNO_OK_FOR_BASE_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X)	0
+
+#endif
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS.  */
+#define BASE_REGISTER_RTX_P(X)  \
+  (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))
+
+#define INDEX_REGISTER_RTX_P(X)  \
+  (GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X))
+
+
+/* Jump to LABEL if X is a valid address RTX.  This must also take
+   REG_OK_STRICT into account when deciding about valid registers, but it uses
+   the above macros so we are in luck.  
+ 
+   Allow  REG
+	  REG+disp 
+
+   A legitimate index for a QI is 0..15, for HI is 0..30, for SI is 0..60,
+   and for DI is 0..56 because we use two SI loads, etc.  */
+#define GO_IF_LEGITIMATE_INDEX(MODE, REGNO, OP, LABEL)			\
+  do									\
+    {									\
+      if (GET_CODE (OP) == CONST_INT) 					\
+        {								\
+	  if (GET_MODE_SIZE (MODE) >= 4					\
+	      && (((unsigned HOST_WIDE_INT) INTVAL (OP)) % 4) == 0	\
+	      &&  ((unsigned HOST_WIDE_INT) INTVAL (OP))		\
+	      <= (unsigned HOST_WIDE_INT) 64 - GET_MODE_SIZE (MODE))	\
+	    goto LABEL;							\
+	  if (GET_MODE_SIZE (MODE) == 2 				\
+	      && (((unsigned HOST_WIDE_INT) INTVAL (OP)) % 2) == 0	\
+	      &&  ((unsigned HOST_WIDE_INT) INTVAL (OP)) <= 30)		\
+	    goto LABEL;							\
+	  if (GET_MODE_SIZE (MODE) == 1 				\
+	      && ((unsigned HOST_WIDE_INT) INTVAL (OP)) <= 15)		\
+	    goto LABEL;							\
+        }								\
+    }									\
+  while (0)
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)                  \
+{ 								  \
+  if (BASE_REGISTER_RTX_P (X))					  \
+    goto LABEL;							  \
+  else if (GET_CODE (X) == PLUS || GET_CODE (X) == LO_SUM) 	  \
+    {								  \
+      rtx xop0 = XEXP (X,0);					  \
+      rtx xop1 = XEXP (X,1);					  \
+      if (BASE_REGISTER_RTX_P (xop0))				  \
+	GO_IF_LEGITIMATE_INDEX (MODE, REGNO (xop0), xop1, LABEL); \
+      if (BASE_REGISTER_RTX_P (xop1))				  \
+	GO_IF_LEGITIMATE_INDEX (MODE, REGNO (xop1), xop0, LABEL); \
+    }								  \
+}								   
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* 'char' is signed by default.  */
+#define DEFAULT_SIGNED_CHAR  0
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS TARGET_SLOW_BYTES
+
+/* Shift counts are truncated to 6-bits (0 to 63) instead of the expected
+   5-bits, so we can not define SHIFT_COUNT_TRUNCATED to true for this
+   target.  */
+#define SHIFT_COUNT_TRUNCATED 0
+
+/* All integers have the same format so truncation is easy.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC)  1
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/* Why is this defined??? -- dac */
+#define NO_FUNCTION_CSE 1
+
+/* The machine modes of pointers and functions.  */
+#define Pmode          SImode
+#define FUNCTION_MODE  Pmode
+
+/* Compute extra cost of moving data between one register class
+   and another.  All register moves are cheap.  */
+#define REGISTER_MOVE_COST(MODE, SRCCLASS, DSTCLASS) 2
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Assembler output control.  */
+#define ASM_COMMENT_START "\t//"
+
+#define ASM_APP_ON	"// inline asm begin\n"
+#define ASM_APP_OFF	"// inline asm end\n"
+
+#define FILE_ASM_OP     "\t.file\n"
+
+/* Switch to the text or data segment.  */
+#define TEXT_SECTION_ASM_OP  "\t.text"
+#define DATA_SECTION_ASM_OP  "\t.data"
+
+/* Switch into a generic section.  */
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  mcore_asm_named_section
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, LK_REG)
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)  \
+  fprintf (FILE, "\tsubi\t %s,%d\n\tstw\t %s,(%s)\n",	\
+	   reg_names[STACK_POINTER_REGNUM],		\
+	   (STACK_BOUNDARY / BITS_PER_UNIT),		\
+	   reg_names[REGNO],				\
+	   reg_names[STACK_POINTER_REGNUM])
+
+/* Length in instructions of the code output by ASM_OUTPUT_REG_PUSH.  */
+#define REG_PUSH_LENGTH 2
+
+/* This is how to output an insn to pop a register from the stack.  */
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)  \
+  fprintf (FILE, "\tldw\t %s,(%s)\n\taddi\t %s,%d\n",	\
+	   reg_names[REGNO],				\
+	   reg_names[STACK_POINTER_REGNUM],		\
+	   reg_names[STACK_POINTER_REGNUM],		\
+	   (STACK_BOUNDARY / BITS_PER_UNIT))
+
+  
+/* Output a reference to a label.  */
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)  \
+  fprintf (STREAM, "%s%s", USER_LABEL_PREFIX, \
+	   (* targetm.strip_name_encoding) (NAME))
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align\t%d\n", LOG)
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+#define MULTIPLE_SYMBOL_SPACES 1
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* A pair of macros to output things for the callgraph data.
+   VALUE means (to the tools that reads this info later):
+  	0 a call from src to dst
+  	1 the call is special (e.g. dst is "unknown" or "alloca")
+  	2 the call is special (e.g., the src is a table instead of routine)
+  
+   Frame sizes are augmented with timestamps to help later tools 
+   differentiate between static entities with same names in different
+   files.  */
+extern long mcore_current_compilation_timestamp;
+#define	ASM_OUTPUT_CG_NODE(FILE,SRCNAME,VALUE)				\
+  do									\
+    {									\
+      if (mcore_current_compilation_timestamp == 0)			\
+        mcore_current_compilation_timestamp = time (0);			\
+      fprintf ((FILE),"\t.equ\t__$frame$size$_%s_$_%08lx,%d\n",		\
+             (SRCNAME), mcore_current_compilation_timestamp, (VALUE));	\
+    }									\
+  while (0)
+
+#define	ASM_OUTPUT_CG_EDGE(FILE,SRCNAME,DSTNAME,VALUE)		\
+  do								\
+    {								\
+      fprintf ((FILE),"\t.equ\t__$function$call$_%s_$_%s,%d\n",	\
+             (SRCNAME), (DSTNAME), (VALUE));			\
+    }								\
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.export\t"
+
+/* The prefix to add to user-visible assembler symbols.  */
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* Make an internal label into a string.  */
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
+  sprintf (STRING, "*.%s%ld", PREFIX, (long) NUM)
+
+/* Jump tables must be 32 bit aligned.  */
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(STREAM,PREFIX,NUM,TABLE) \
+  fprintf (STREAM, "\t.align 2\n.%s%d:\n", PREFIX, NUM);
+
+/* Output a relative address. Not needed since jump tables are absolute
+   but we must define it anyway.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)  \
+  fputs ("- - - ASM_OUTPUT_ADDR_DIFF_ELT called!\n", STREAM)
+
+/* Output an element of a dispatch table.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  \
+    fprintf (STREAM, "\t.long\t.L%d\n", VALUE)
+
+/* Output various types of constants.  */
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+#undef  ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.fill %d, 1\n", (int)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol, with alignment information.  */
+/* XXX - for now we ignore the alignment.  */     
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)	\
+  do								\
+    {								\
+      if (mcore_dllexport_name_p (NAME))			\
+	MCORE_EXPORT_NAME (FILE, NAME)				\
+      if (! mcore_dllimport_name_p (NAME))			\
+        {							\
+          fputs ("\t.comm\t", FILE);				\
+          assemble_name (FILE, NAME);				\
+          fprintf (FILE, ",%lu\n", (unsigned long)(SIZE));	\
+        }							\
+    }								\
+  while (0)
+
+/* This says how to output an assembler line
+   to define a local common symbol....  */
+#undef  ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  (fputs ("\t.lcomm\t", FILE),				\
+  assemble_name (FILE, NAME),				\
+  fprintf (FILE, ",%d\n", (int)SIZE))
+
+/* ... and how to define a local common symbol whose alignment
+   we wish to specify.  ALIGN comes in as bits, we have to turn
+   it into bytes.  */
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      fputs ("\t.bss\t", (FILE));					\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ",%d,%d\n", (int)(SIZE), (ALIGN) / BITS_PER_UNIT);\
+    }									\
+  while (0)
+
+#endif /* ! GCC_MCORE_H */
diff --git a/gcc/config/mcore/mcore.md b/gcc/config/mcore/mcore.md
new file mode 100644
index 000000000..c56a0c6ae
--- /dev/null
+++ b/gcc/config/mcore/mcore.md
@@ -0,0 +1,3085 @@
+;;  Machine description the Motorola MCore
+;;  Copyright (C) 1993, 1999, 2000, 2004, 2005, 2007, 2009, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by Motorola.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+; Target CPU.
+
+(define_attr "type" "brcond,branch,jmp,load,store,move,alu,shift"
+  (const_string "alu"))
+
+;; If a branch destination is within -2048..2047 bytes away from the
+;; instruction it can be 2 bytes long.  All other conditional branches
+;; are 10 bytes long, and all other unconditional branches are 8 bytes.
+;;
+;; the assembler handles the long-branch span case for us if we use
+;; the "jb*" mnemonics for jumps/branches. This pushes the span
+;; calculations and the literal table placement into the assembler,
+;; where their interactions can be managed in a single place.
+
+;; All MCORE instructions are two bytes long.
+
+(define_attr "length" "" (const_int 2))
+
+;; Scheduling.  We only model a simple load latency.
+(define_insn_reservation "any_insn" 1
+			 (eq_attr "type" "!load")
+			 "nothing")
+(define_insn_reservation "memory" 2
+			 (eq_attr "type" "load")
+			 "nothing")
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; -------------------------------------------------------------------------
+;; Test and bit test
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI 17) 
+        (sign_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                         (const_int 1)
+                         (match_operand:SI 1 "mcore_literal_K_operand" "K")))]
+  ""
+  "btsti	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (reg:SI 17) 
+        (zero_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                         (const_int 1)
+                         (match_operand:SI 1 "mcore_literal_K_operand" "K")))]
+  ""
+  "btsti	%0,%1"
+  [(set_attr "type" "shift")])
+
+;;; This is created by combine.
+(define_insn ""
+  [(set (reg:CC 17)
+	(ne:CC (zero_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+				(const_int 1)
+				(match_operand:SI 1 "mcore_literal_K_operand" "K"))
+	       (const_int 0)))]
+  ""
+  "btsti	%0,%1"
+  [(set_attr "type" "shift")])
+
+
+;; Created by combine from conditional patterns below (see sextb/btsti rx,31)
+
+(define_insn ""
+  [(set (reg:CC 17)
+        (ne:CC (lshiftrt:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                            (const_int 7))
+               (const_int 0)))]
+  "GET_CODE(operands[0]) == SUBREG && 
+      GET_MODE(SUBREG_REG(operands[0])) == QImode"
+  "btsti	%0,7"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (reg:CC 17)
+        (ne:CC (lshiftrt:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                            (const_int 15))
+               (const_int 0)))]
+  "GET_CODE(operands[0]) == SUBREG && 
+      GET_MODE(SUBREG_REG(operands[0])) == HImode"
+  "btsti	%0,15"
+  [(set_attr "type" "shift")])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ne (eq:CC (zero_extract:SI
+				  (match_operand:SI 0 "mcore_arith_reg_operand" "")
+				  (const_int 1)
+				  (match_operand:SI 1 "mcore_literal_K_operand" ""))
+				 (const_int 0))
+			  (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  [(set (reg:CC 17)
+	(zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))]
+  "")
+
+(define_split
+  [(set (pc)
+	(if_then_else (eq (ne:CC (zero_extract:SI
+				  (match_operand:SI 0 "mcore_arith_reg_operand" "")
+				  (const_int 1)
+				  (match_operand:SI 1 "mcore_literal_K_operand" ""))
+				 (const_int 0))
+			  (const_int 0))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  [(set (reg:CC 17)
+	(zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))]
+  "")
+
+;; XXX - disabled by nickc because it fails on libiberty/fnmatch.c
+;;
+;; ; Experimental - relax immediates for and, andn, or, and tst to allow
+;; ;    any immediate value (or an immediate at all -- or, andn, & tst).  
+;; ;    This is done to allow bit field masks to fold together in combine.
+;; ;    The reload phase will force the immediate into a register at the
+;; ;    very end.  This helps in some cases, but hurts in others: we'd
+;; ;    really like to cse these immediates.  However, there is a phase
+;; ;    ordering problem here.  cse picks up individual masks and cse's
+;; ;    those, but not folded masks (cse happens before combine).  It's
+;; ;    not clear what the best solution is because we really want cse
+;; ;    before combine (leaving the bit field masks alone).   To pick up
+;; ;    relaxed immediates use -mrelax-immediates.  It might take some
+;; ;    experimenting to see which does better (i.e. regular imms vs.
+;; ;    arbitrary imms) for a particular code.   BRC
+;; 
+;; (define_insn ""
+;;   [(set (reg:CC 17)
+;; 	(ne:CC (and:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+;; 		       (match_operand:SI 1 "mcore_arith_any_imm_operand" "rI"))
+;; 	       (const_int 0)))]
+;;   "TARGET_RELAX_IMM"
+;;   "tst	%0,%1")
+;; 
+;; (define_insn ""
+;;   [(set (reg:CC 17)
+;; 	(ne:CC (and:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+;; 		       (match_operand:SI 1 "mcore_arith_M_operand" "r"))
+;; 	       (const_int 0)))]
+;;   "!TARGET_RELAX_IMM"
+;;   "tst	%0,%1")
+
+(define_insn ""
+  [(set (reg:CC 17)
+	(ne:CC (and:SI (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+		       (match_operand:SI 1 "mcore_arith_M_operand" "r"))
+	       (const_int 0)))]
+  ""
+  "tst	%0,%1")
+
+
+(define_split 
+  [(parallel[
+      (set (reg:CC 17)
+           (ne:CC (ne:SI (leu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "")
+                                 (match_operand:SI 1 "mcore_arith_reg_operand" ""))
+                         (const_int 0))
+                  (const_int 0)))
+      (clobber (match_operand:CC 2 "mcore_arith_reg_operand" ""))])]
+  ""
+  [(set (reg:CC 17) (ne:SI (match_dup 0) (const_int 0)))
+   (set (reg:CC 17) (leu:CC (match_dup 0) (match_dup 1)))])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "decne_t"
+  [(set (reg:CC 17) (ne:CC (plus:SI (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+				    (const_int -1))		  
+			   (const_int 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  ""
+  "decne	%0")
+
+;; The combiner seems to prefer the following to the former.
+;;
+(define_insn ""
+  [(set (reg:CC 17) (ne:CC (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+			   (const_int 1)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  ""
+  "decne	%0")
+
+(define_insn "cmpnesi_t"
+  [(set (reg:CC 17) (ne:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmpne	%0,%1")
+
+(define_insn "cmpneisi_t"
+  [(set (reg:CC 17) (ne:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_K_operand" "K")))]
+  ""
+  "cmpnei	%0,%1")
+
+(define_insn "cmpgtsi_t"
+  [(set (reg:CC 17) (gt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmplt	%1,%0")
+
+(define_insn ""
+  [(set (reg:CC 17) (gt:CC (plus:SI
+			    (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+			    (const_int -1))
+			   (const_int 0)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "decgt	%0")
+
+(define_insn "cmpltsi_t"
+  [(set (reg:CC 17) (lt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmplt	%0,%1")
+
+; cmplti is 1-32
+(define_insn "cmpltisi_t"
+  [(set (reg:CC 17) (lt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			   (match_operand:SI 1 "mcore_arith_J_operand" "J")))]
+  ""
+  "cmplti	%0,%1")
+
+; covers cmplti x,0
+(define_insn ""
+  [(set (reg:CC 17) (lt:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+                         (const_int 0)))]
+  ""
+  "btsti	%0,31")
+
+(define_insn ""
+  [(set (reg:CC 17) (lt:CC (plus:SI
+			    (match_operand:SI 0 "mcore_arith_reg_operand" "+r")
+			    (const_int -1))
+			   (const_int 0)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "declt	%0")
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeusi_t"
+  [(set (reg:CC 17) (geu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			    (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmphs	%0,%1")
+
+(define_insn "cmpgeusi_0"
+  [(set (reg:CC 17) (geu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			    (const_int 0)))]
+  ""
+  "cmpnei	%0, 0")
+
+(define_insn "cmpleusi_t"
+  [(set (reg:CC 17) (leu:CC (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+			    (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "cmphs	%1,%0")
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+;; Logical AND clearing a single bit.  andsi3 knows that we have this
+;; pattern and allows the constant literal pass through.
+;;
+
+;; RBE 2/97: don't need this pattern any longer...
+;; RBE: I don't think we need both "S" and exact_log2() clauses.
+;;(define_insn ""
+;;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;;	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;;		(match_operand:SI 2 "const_int_operand" "S")))]
+;;  "mcore_arith_S_operand (operands[2])"
+;;  "bclri	%0,%Q2")
+;;
+
+(define_insn "andnsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))
+		(match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "andn	%0,%1")
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0
+      && ! mcore_arith_S_operand (operands[2]))
+    {
+      HOST_WIDE_INT not_value = ~ INTVAL (operands[2]);
+
+      if (   CONST_OK_FOR_I (not_value)
+          || CONST_OK_FOR_M (not_value)
+	  || CONST_OK_FOR_N (not_value))
+	{
+	  operands[2] = copy_to_mode_reg (SImode, GEN_INT (not_value));
+	  emit_insn (gen_andnsi3 (operands[0], operands[2], operands[1]));
+	  DONE;
+	}
+    }
+
+  if (! mcore_arith_K_S_operand (operands[2], SImode))
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0,r,0")
+		(match_operand:SI 2 "mcore_arith_any_imm_operand" "r,K,0,S")))]
+  "TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"and	%0,%2\";
+     case 1: return \"andi	%0,%2\";
+     case 2: return \"and	%0,%1\";
+     /* case -1: return \"bclri	%0,%Q2\";	 will not happen */
+     case 3: return mcore_output_bclri (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+;; This was the old "S" which was "!(2^n)" */
+;; case -1: return \"bclri	%0,%Q2\";	 will not happen */
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0,r,0")
+		(match_operand:SI 2 "mcore_arith_K_S_operand" "r,K,0,S")))]
+  "!TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"and	%0,%2\";
+     case 1: return \"andi	%0,%2\";
+     case 2: return \"and	%0,%1\";
+     case 3: return mcore_output_bclri (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+;(define_insn "iorsi3"
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;		(match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+;  ""
+;  "or	%0,%2")
+
+; need an expand to resolve ambiguity betw. the two iors below.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+   if (! mcore_arith_M_operand (operands[2], SImode))
+      operands[2] = copy_to_mode_reg (SImode, operands[2]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		(match_operand:SI 2 "mcore_arith_any_imm_operand" "r,M,T")))]
+  "TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"or	%0,%2\";
+     case 1: return \"bseti	%0,%P2\";
+     case 2: return mcore_output_bseti (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		(match_operand:SI 2 "mcore_arith_M_operand" "r,M,T")))]
+  "!TARGET_RELAX_IMM"
+  "*
+{
+   switch (which_alternative)
+     {
+     case 0: return \"or	%0,%2\";
+     case 1: return \"bseti	%0,%P2\";
+     case 2: return mcore_output_bseti (operands[0], INTVAL (operands[2]));
+     default: gcc_unreachable ();
+     }
+}")
+
+;(define_insn ""
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+;		(match_operand:SI 2 "const_int_operand" "M")))]
+;  "exact_log2 (INTVAL (operands[2])) >= 0"
+;  "bseti	%0,%P2")
+
+;(define_insn ""
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;	(ior:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+;		(match_operand:SI 2 "const_int_operand" "i")))]
+;  "mcore_num_ones (INTVAL (operands[2])) < 3"
+;  "* return mcore_output_bseti (operands[0], INTVAL (operands[2]));")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(xor:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+		(match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "xor	%0,%2")
+
+; these patterns give better code then gcc invents if
+; left to its own devices
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r")
+	(and:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+		(match_operand:DI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "and	%0,%2\;and	%R0,%R2"
+  [(set_attr "length" "4")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r")
+	(ior:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+		(match_operand:DI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "or	%0,%2\;or	%R0,%R2"
+  [(set_attr "length" "4")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r")
+	(xor:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+		(match_operand:DI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "xor	%0,%2\;xor	%R0,%R2"
+  [(set_attr "length" "4")])
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+;; Only allow these if the shift count is a convenient constant.
+(define_expand "rotlsi3"
+  [(set (match_operand:SI            0 "mcore_arith_reg_operand" "")
+	(rotate:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		   (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "if (! mcore_literal_K_operand (operands[2], SImode))
+	 FAIL;
+  ")
+
+;; We can only do constant rotates, which is what this pattern provides.
+;; The combiner will put it together for us when we do:
+;;	(x << N) | (x >> (32 - N))
+(define_insn ""
+  [(set (match_operand:SI              0 "mcore_arith_reg_operand" "=r")
+	(rotate:SI (match_operand:SI   1 "mcore_arith_reg_operand"  "0")
+		     (match_operand:SI 2 "mcore_literal_K_operand"  "K")))]
+  ""
+  "rotli	%0,%2"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0")
+		   (match_operand:SI 2 "mcore_arith_K_operand_not_0" "r,K")))]
+  ""
+  "@
+	lsl	%0,%2
+	lsli	%0,%2"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(ashift:SI (const_int 1)
+		   (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "bgenr	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0")
+		     (match_operand:SI 2 "mcore_arith_K_operand_not_0" "r,K")))]
+  ""
+  "@
+	asr	%0,%2
+	asri	%0,%2"
+  [(set_attr "type" "shift")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0")
+		     (match_operand:SI 2 "mcore_arith_K_operand_not_0" "r,K")))]
+  ""
+  "@
+	lsr	%0,%2
+	lsri	%0,%2"
+  [(set_attr "type" "shift")])
+
+;(define_expand "ashldi3"
+;  [(parallel[(set (match_operand:DI 0 "mcore_arith_reg_operand" "")
+;		  (ashift:DI (match_operand:DI 1 "mcore_arith_reg_operand" "")
+;			     (match_operand:DI 2 "immediate_operand" "")))
+;
+;	     (clobber (reg:CC 17))])]
+;	    
+;  ""
+;  "
+;{
+;  if (GET_CODE (operands[2]) != CONST_INT
+;      || INTVAL (operands[2]) != 1)
+;    FAIL;
+;}")
+;
+;(define_insn ""
+;  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r")
+;	(ashift:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+;		     (const_int 1)))
+;   (clobber (reg:CC 17))]
+;  ""
+;  "lsli	%R0,0\;rotli	%0,0"
+;  [(set_attr "length" "4") (set_attr "type" "shift")])
+
+;; -------------------------------------------------------------------------
+;; Index instructions
+;; -------------------------------------------------------------------------
+;; The second of each set of patterns is borrowed from the alpha.md file.
+;; These variants of the above insns can occur if the second operand
+;; is the frame pointer.  This is a kludge, but there doesn't
+;; seem to be a way around it.  Only recognize them while reloading.
+
+;; We must use reload_operand for some operands in case frame pointer
+;; elimination put a MEM with invalid address there.  Otherwise,
+;; the result of the substitution will not match this pattern, and reload
+;; will not be able to correctly fix the result.
+
+;; indexing longlongs or doubles (8 bytes)
+
+(define_insn "indexdi_t"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+			  (const_int 8))
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "*
+    if (! mcore_is_same_reg (operands[1], operands[2]))
+      {
+        output_asm_insn (\"ixw\\t%0,%1\", operands);
+        output_asm_insn (\"ixw\\t%0,%1\", operands);
+      }
+    else
+      {
+        output_asm_insn (\"ixh\\t%0,%1\", operands);
+        output_asm_insn (\"ixh\\t%0,%1\", operands);
+      }
+    return \"\";
+  "
+;; if operands[1] == operands[2], the first option above is wrong! -- dac
+;; was this... -- dac
+;; ixw	%0,%1\;ixw	%0,%1"
+
+  [(set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_reload_operand" "=r,r,r")
+	(plus:SI (plus:SI (mult:SI (match_operand:SI 1 "mcore_reload_operand" "r,r,r")
+				   (const_int 8))
+			  (match_operand:SI 2 "mcore_arith_reg_operand" "0,0,0"))
+		 (match_operand:SI 3 "mcore_addsub_operand" "r,J,L")))]
+  "reload_in_progress"
+  "@
+	ixw	%0,%1\;ixw	%0,%1\;addu	%0,%3
+	ixw	%0,%1\;ixw	%0,%1\;addi	%0,%3
+	ixw	%0,%1\;ixw	%0,%1\;subi	%0,%M3"
+  [(set_attr "length" "6")])
+
+;; indexing longs (4 bytes)
+
+(define_insn "indexsi_t"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+			  (const_int 4))
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "ixw	%0,%1")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_reload_operand" "=r,r,r")
+	(plus:SI (plus:SI (mult:SI (match_operand:SI 1 "mcore_reload_operand" "r,r,r")
+				   (const_int 4))
+			  (match_operand:SI 2 "mcore_arith_reg_operand" "0,0,0"))
+		 (match_operand:SI 3 "mcore_addsub_operand" "r,J,L")))]
+  "reload_in_progress"
+  "@
+	ixw	%0,%1\;addu	%0,%3
+	ixw	%0,%1\;addi	%0,%3
+	ixw	%0,%1\;subi	%0,%M3"
+  [(set_attr "length" "4")])
+
+;; indexing shorts (2 bytes)
+
+(define_insn "indexhi_t"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+			  (const_int 2))
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "ixh	%0,%1")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_reload_operand" "=r,r,r")
+	(plus:SI (plus:SI (mult:SI (match_operand:SI 1 "mcore_reload_operand" "r,r,r")
+				   (const_int 2))
+			  (match_operand:SI 2 "mcore_arith_reg_operand" "0,0,0"))
+		 (match_operand:SI 3 "mcore_addsub_operand" "r,J,L")))]
+  "reload_in_progress"
+  "@
+	ixh	%0,%1\;addu	%0,%3
+	ixh	%0,%1\;addi	%0,%3
+	ixh	%0,%1\;subi	%0,%M3"
+  [(set_attr "length" "4")])
+
+;;
+;; Other sizes may be handy for indexing. 
+;; the tradeoffs to consider when adding these are
+;;	code size, execution time [vs. mul it is easy to win],
+;;	and register pressure -- these patterns don't use an extra
+;;	register to build the offset from the base
+;;	and whether the compiler will not come up with some other idiom.
+;;
+
+;; -------------------------------------------------------------------------
+;; Addition, Subtraction instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  /* If this is an add to the frame pointer, then accept it as is so
+     that we can later fold in the fp/sp offset from frame pointer
+     elimination.  */
+  if (flag_omit_frame_pointer
+      && GET_CODE (operands[1]) == REG
+      && (REGNO (operands[1]) == VIRTUAL_STACK_VARS_REGNUM
+	  || REGNO (operands[1]) == FRAME_POINTER_REGNUM))
+    {
+      emit_insn (gen_addsi3_fp (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  /* Convert adds to subtracts if this makes loading the constant cheaper.
+     But only if we are allowed to generate new pseudos.  */
+  if (! (reload_in_progress || reload_completed)
+      && GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) < -32)
+    {
+      HOST_WIDE_INT neg_value = - INTVAL (operands[2]);
+
+      if (   CONST_OK_FOR_I (neg_value)
+	  || CONST_OK_FOR_M (neg_value)
+	  || CONST_OK_FOR_N (neg_value))
+	{
+	  operands[2] = copy_to_mode_reg (SImode, GEN_INT (neg_value));
+	  emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+    } 
+
+  if (! mcore_addsub_operand (operands[2], SImode))
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+}")
+ 
+;; RBE: for some constants which are not in the range which allows
+;; us to do a single operation, we will try a paired addi/addi instead
+;; of a movi/addi. This relieves some register pressure at the expense
+;; of giving away some potential constant reuse.
+;;
+;; RBE 6/17/97: this didn't buy us anything, but I keep the pattern
+;; for later reference
+;; 
+;; (define_insn "addsi3_i2"
+;;   [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;;      (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;;               (match_operand:SI 2 "const_int_operand" "g")))]
+;;   "GET_CODE(operands[2]) == CONST_INT
+;;    && ((INTVAL (operands[2]) > 32 && INTVAL(operands[2]) <= 64)
+;;        || (INTVAL (operands[2]) < -32 && INTVAL(operands[2]) >= -64))"
+;;   "*
+;; {
+;;    HOST_WIDE_INT n = INTVAL(operands[2]);
+;;    if (n > 0)
+;;      {
+;;        operands[2] = GEN_INT(n - 32);
+;;        return \"addi\\t%0,32\;addi\\t%0,%2\";
+;;      }
+;;    else
+;;      {
+;;        n = (-n);
+;;        operands[2] = GEN_INT(n - 32);
+;;        return \"subi\\t%0,32\;subi\\t%0,%2\";
+;;      }
+;; }"
+;;  [(set_attr "length" "4")])
+
+(define_insn "addsi3_i"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		 (match_operand:SI 2 "mcore_addsub_operand" "r,J,L")))]
+  ""
+  "@
+	addu	%0,%2
+	addi	%0,%2
+	subi	%0,%M2")
+
+;; This exists so that address computations based on the frame pointer
+;; can be folded in when frame pointer elimination occurs.  Ordinarily
+;; this would be bad because it allows insns which would require reloading,
+;; but without it, we get multiple adds where one would do.
+
+(define_insn "addsi3_fp"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0,0,0")
+		 (match_operand:SI 2 "immediate_operand" "r,J,L")))]
+  "flag_omit_frame_pointer
+   && (reload_in_progress || reload_completed || REGNO (operands[1]) == FRAME_POINTER_REGNUM)"
+  "@
+	addu	%0,%2
+	addi	%0,%2
+	subi	%0,%M2")
+
+;; RBE: for some constants which are not in the range which allows
+;; us to do a single operation, we will try a paired addi/addi instead
+;; of a movi/addi. This relieves some register pressure at the expense
+;; of giving away some potential constant reuse.
+;;
+;; RBE 6/17/97: this didn't buy us anything, but I keep the pattern
+;; for later reference
+;; 
+;; (define_insn "subsi3_i2"
+;;   [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;;      (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+;;               (match_operand:SI 2 "const_int_operand" "g")))]
+;;   "TARGET_RBETEST && GET_CODE(operands[2]) == CONST_INT
+;;    && ((INTVAL (operands[2]) > 32 && INTVAL(operands[2]) <= 64)
+;;        || (INTVAL (operands[2]) < -32 && INTVAL(operands[2]) >= -64))"
+;;   "*
+;; {
+;;    HOST_WIDE_INT n = INTVAL(operands[2]);
+;;    if ( n > 0)
+;;      {
+;;        operands[2] = GEN_INT( n - 32);
+;;        return \"subi\\t%0,32\;subi\\t%0,%2\";
+;;      }
+;;    else
+;;      {
+;;        n = (-n);
+;;        operands[2] = GEN_INT(n - 32);
+;;        return \"addi\\t%0,32\;addi\\t%0,%2\";
+;;      }
+;; }"
+;;   [(set_attr "length" "4")])
+
+;(define_insn "subsi3"
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+;	(minus:SI (match_operand:SI 1 "mcore_arith_K_operand" "0,0,r,K")
+;		  (match_operand:SI 2 "mcore_arith_J_operand" "r,J,0,0")))]
+;  ""
+;  "@
+;	sub	%0,%2
+;	subi	%0,%2
+;	rsub	%0,%1
+;	rsubi	%0,%1")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r")
+        (minus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,0,r")
+                  (match_operand:SI 2 "mcore_arith_J_operand" "r,J,0")))]
+  ""
+  "@
+	subu	%0,%2
+	subi	%0,%2
+	rsub	%0,%1")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (minus:SI (match_operand:SI 1 "mcore_literal_K_operand" "K")
+                  (match_operand:SI 2 "mcore_arith_reg_operand" "0")))]
+  ""
+  "rsubi	%0,%1")
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+		 (match_operand:DI 2 "mcore_arith_reg_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+    if (TARGET_LITTLE_END)
+      return \"cmplt	%0,%0\;addc	%0,%2\;addc	%R0,%R2\";
+    return \"cmplt	%R0,%R0\;addc	%R0,%R2\;addc	%0,%2\";
+  }"
+  [(set_attr "length" "6")])
+
+;; special case for "longlong += 1"
+(define_insn ""
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		 (const_int 1)))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+   if (TARGET_LITTLE_END)
+      return \"addi	%0,1\;cmpnei %0,0\;incf	%R0\";
+    return \"addi	%R0,1\;cmpnei %R0,0\;incf	%0\";
+  }"
+  [(set_attr "length" "6")])
+
+;; special case for "longlong -= 1"
+(define_insn ""
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		 (const_int -1)))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+    if (TARGET_LITTLE_END)
+       return \"cmpnei %0,0\;decf	%R0\;subi	%0,1\";
+    return \"cmpnei %R0,0\;decf	%0\;subi	%R0,1\";
+  }"
+  [(set_attr "length" "6")])
+
+;; special case for "longlong += const_int"
+;; we have to use a register for the const_int because we don't
+;; have an unsigned compare immediate... only +/- 1 get to
+;; play the no-extra register game because they compare with 0.
+;; This winds up working out for any literal that is synthesized
+;; with a single instruction. The more complicated ones look
+;; like the get broken into subreg's to get initialized too soon
+;; for us to catch here. -- RBE 4/25/96
+;; only allow for-sure positive values.
+
+(define_insn ""
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		 (match_operand:SI 2 "const_int_operand" "r")))
+   (clobber (reg:CC 17))]
+  "GET_CODE (operands[2]) == CONST_INT
+   && INTVAL (operands[2]) > 0 && ! (INTVAL (operands[2]) & 0x80000000)"
+  "*
+{
+  gcc_assert (GET_MODE (operands[2]) == SImode);
+  if (TARGET_LITTLE_END)
+    return \"addu	%0,%2\;cmphs	%0,%2\;incf	%R0\";
+  return \"addu	%R0,%2\;cmphs	%R0,%2\;incf	%0\";
+}"
+  [(set_attr "length" "6")])
+
+;; optimize "long long" + "unsigned long"
+;; won't trigger because of how the extension is expanded upstream.
+;; (define_insn ""
+;;   [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+;; 	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+;; 		 (zero_extend:DI (match_operand:SI 2 "mcore_arith_reg_operand" "r"))))
+;;    (clobber (reg:CC 17))]
+;;   "0"
+;;   "cmplt	%R0,%R0\;addc	%R0,%2\;inct	%0"
+;;   [(set_attr "length" "6")])
+
+;; optimize "long long" + "signed long"
+;; won't trigger because of how the extension is expanded upstream.
+;; (define_insn ""
+;;   [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+;; 	(plus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "%0")
+;; 		 (sign_extend:DI (match_operand:SI 2 "mcore_arith_reg_operand" "r"))))
+;;    (clobber (reg:CC 17))]
+;;   "0"
+;;   "cmplt	%R0,%R0\;addc	%R0,%2\;inct	%0\;btsti	%2,31\;dect	%0"
+;;   [(set_attr "length" "6")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(minus:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")
+		  (match_operand:DI 2 "mcore_arith_reg_operand" "r")))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+  {
+    if (TARGET_LITTLE_END)
+      return \"cmphs	%0,%0\;subc	%0,%2\;subc	%R0,%R2\";
+    return \"cmphs	%R0,%R0\;subc	%R0,%R2\;subc	%0,%2\";
+  }"
+  [(set_attr "length" "6")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(mult:SI (match_operand:SI 1 "mcore_arith_reg_operand" "%0")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+  ""
+  "mult	%0,%2")
+
+;;
+;; 32/32 signed division -- added to the MCORE instruction set spring 1997
+;;
+;; Different constraints based on the architecture revision...
+;;
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (div:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  "TARGET_DIV"
+  "")
+ 
+;; MCORE Revision 1.50: restricts the divisor to be in r1. (6/97)
+;;
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (div:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+                (match_operand:SI 2 "mcore_arith_reg_operand" "b")))]
+  "TARGET_DIV"
+  "divs %0,%2")
+
+;;
+;; 32/32 signed division -- added to the MCORE instruction set spring 1997
+;;
+;; Different constraints based on the architecture revision...
+;;
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (udiv:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  "TARGET_DIV"
+  "")
+ 
+;; MCORE Revision 1.50: restricts the divisor to be in r1. (6/97)
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (udiv:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+                 (match_operand:SI 2 "mcore_arith_reg_operand" "b")))]
+  "TARGET_DIV"
+  "divu %0,%2")
+ 
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(neg:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "*
+{
+   return \"rsubi	%0,0\";
+}")
+
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(abs:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "abs	%0")
+	     
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=&r")
+	(neg:DI (match_operand:DI 1 "mcore_arith_reg_operand" "0")))
+   (clobber (reg:CC 17))]
+  ""
+  "*
+{
+   if (TARGET_LITTLE_END)
+     return \"cmpnei	%0,0\\n\\trsubi	%0,0\\n\\tnot	%R0\\n\\tincf	%R0\";
+   return \"cmpnei	%R0,0\\n\\trsubi	%R0,0\\n\\tnot	%0\\n\\tincf	%0\";
+}"
+  [(set_attr "length" "8")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(not:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "not	%0")
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "mcore_arith_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_operand" "0,m")))]
+  ""
+  "@
+	zexth	%0
+	ld.h	%0,%1"
+  [(set_attr "type" "shift,load")])
+
+;; ldh gives us a free zero-extension. The combiner picks up on this.
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:HI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))))]
+  ""
+  "ld.h	%0,(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:HI (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+				         (match_operand:SI 2 "const_int_operand" "")))))]
+  "(INTVAL (operands[2]) >= 0) &&
+   (INTVAL (operands[2]) < 32) &&
+   ((INTVAL (operands[2])&1) == 0)"
+  "ld.h	%0,(%1,%2)"
+  [(set_attr "type" "load")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_operand" "")))]
+  ""
+  "") 
+
+;; RBE: XXX: we don't recognize that the xtrb3 kills the CC register.
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b,r")
+	(zero_extend:SI (match_operand:QI 1 "general_operand" "0,r,m")))]
+  ""
+  "@
+	zextb	%0
+	xtrb3	%0,%1
+	ld.b	%0,%1"
+  [(set_attr "type" "shift,shift,load")])
+
+;; ldb gives us a free zero-extension. The combiner picks up on this.
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:QI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))))]
+  ""
+  "ld.b	%0,(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+				         (match_operand:SI 2 "const_int_operand" "")))))]
+  "(INTVAL (operands[2]) >= 0) &&
+   (INTVAL (operands[2]) < 16)"
+  "ld.b	%0,(%1,%2)"
+  [(set_attr "type" "load")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "")))]
+  ""
+  "") 
+
+;; RBE: XXX: we don't recognize that the xtrb3 kills the CC register.
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r,b,r")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "0,r,m")))]
+  ""
+  "@
+	zextb	%0
+	xtrb3	%0,%1
+	ld.b	%0,%1"
+  [(set_attr "type" "shift,shift,load")])
+
+;; ldb gives us a free zero-extension. The combiner picks up on this.
+;; this doesn't catch references that are into a structure.
+;; note that normally the compiler uses the above insn, unless it turns
+;; out that we're dealing with a volatile...
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:HI (mem:QI (match_operand:SI 1 "mcore_arith_reg_operand" "r"))))]
+  ""
+  "ld.b	%0,(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r")
+				         (match_operand:SI 2 "const_int_operand" "")))))]
+  "(INTVAL (operands[2]) >= 0) &&
+   (INTVAL (operands[2]) < 16)"
+  "ld.b	%0,(%1,%2)"
+  [(set_attr "type" "load")])
+
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "mcore_arith_reg_operand" "=r") 
+	(match_operand:SI 1 "mcore_arith_reg_operand" "r"))]
+  ""
+  "
+  {
+    int low, high;
+
+    if (TARGET_LITTLE_END)
+      low = 0, high = 4;
+    else
+      low = 4, high = 0;
+    
+    emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], low),
+	      operands[1]));
+    emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], high),
+	      gen_rtx_ASHIFTRT (SImode,
+			       gen_rtx_SUBREG (SImode, operands[0], low),
+			       GEN_INT (31))));
+    DONE;
+  }"
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "sexth	%0")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "sextb	%0")
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "sextb	%0")
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; SImode
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (SImode, operands[1]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_general_movdst_operand" "=r,r,a,r,a,r,m")
+	(match_operand:SI 1 "mcore_general_movsrc_operand"  "r,P,i,c,R,m,r"))]
+  "(register_operand (operands[0], SImode)
+    || register_operand (operands[1], SImode))"
+  "* return mcore_output_move (insn, operands, SImode);"
+  [(set_attr "type" "move,move,move,move,load,load,store")])
+
+;;
+;; HImode
+;;
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand"  ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (HImode, operands[1]);
+  else if (CONSTANT_P (operands[1])
+	   && (GET_CODE (operands[1]) != CONST_INT
+	       || (! CONST_OK_FOR_I (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_M (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_N (INTVAL (operands[1]))))
+	   && ! reload_completed && ! reload_in_progress)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_movsi (reg, operands[1]));
+      operands[1] = gen_lowpart (HImode, reg);
+    }
+}")
+  
+(define_insn ""
+  [(set (match_operand:HI 0 "mcore_general_movdst_operand" "=r,r,a,r,r,m")
+	(match_operand:HI 1 "mcore_general_movsrc_operand"  "r,P,i,c,m,r"))]
+  "(register_operand (operands[0], HImode)
+    || register_operand (operands[1], HImode))"
+  "* return mcore_output_move (insn, operands, HImode);"
+  [(set_attr "type" "move,move,move,move,load,store")])
+
+;;
+;; QImode
+;;
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand"  ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (QImode, operands[1]);
+  else if (CONSTANT_P (operands[1])
+	   && (GET_CODE (operands[1]) != CONST_INT
+	       || (! CONST_OK_FOR_I (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_M (INTVAL (operands[1]))
+		   && ! CONST_OK_FOR_N (INTVAL (operands[1]))))
+	   && ! reload_completed && ! reload_in_progress)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+      emit_insn (gen_movsi (reg, operands[1]));
+      operands[1] = gen_lowpart (QImode, reg);
+    }
+}")
+  
+(define_insn ""
+  [(set (match_operand:QI 0 "mcore_general_movdst_operand" "=r,r,a,r,r,m")
+	(match_operand:QI 1 "mcore_general_movsrc_operand"  "r,P,i,c,m,r"))]
+  "(register_operand (operands[0], QImode)
+    || register_operand (operands[1], QImode))"
+  "* return mcore_output_move (insn, operands, QImode);"
+   [(set_attr "type" "move,move,move,move,load,store")])
+
+
+;; DImode
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DImode, operands[1]);
+  else if (GET_CODE (operands[1]) == CONST_INT
+           && ! CONST_OK_FOR_I (INTVAL (operands[1]))
+	   && ! CONST_OK_FOR_M (INTVAL (operands[1]))
+	   && ! CONST_OK_FOR_N (INTVAL (operands[1])))
+    {
+      int i;
+      for (i = 0; i < UNITS_PER_WORD * 2; i += UNITS_PER_WORD)
+        emit_move_insn (simplify_gen_subreg (SImode, operands[0], DImode, i),
+		        simplify_gen_subreg (SImode, operands[1], DImode, i));
+      DONE;
+    }
+}")
+
+(define_insn "movdi_i"
+  [(set (match_operand:DI 0 "general_operand" "=r,r,r,r,a,r,m")
+	(match_operand:DI 1 "mcore_general_movsrc_operand" "I,M,N,r,R,m,r"))]
+  ""
+  "* return mcore_output_movedouble (operands, DImode);"
+  [(set_attr "length" "4") (set_attr "type" "move,move,move,move,load,load,store")])
+
+;; SFmode
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (SFmode, operands[1]);
+}")
+
+(define_insn "movsf_i"
+  [(set (match_operand:SF 0 "general_operand" "=r,r,m")
+	(match_operand:SF 1 "general_operand"  "r,m,r"))]
+  ""
+  "@
+	mov	%0,%1
+	ld.w	%0,%1
+	st.w	%1,%0"
+  [(set_attr "type" "move,load,store")])
+
+;; DFmode
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (DFmode, operands[1]);
+}")
+
+(define_insn "movdf_k"
+  [(set (match_operand:DF 0 "general_operand" "=r,r,m")
+	(match_operand:DF 1 "general_operand" "r,m,r"))]
+  ""
+  "* return mcore_output_movedouble (operands, DFmode);"
+  [(set_attr "length" "4") (set_attr "type" "move,load,store")])
+
+
+;; Load/store multiple
+
+;; ??? This is not currently used.
+(define_insn "ldm"
+  [(set (match_operand:TI 0 "mcore_arith_reg_operand" "=r")
+	(mem:TI (match_operand:SI 1 "mcore_arith_reg_operand" "r")))]
+  ""
+  "ldq	%U0,(%1)")
+
+;; ??? This is not currently used.
+(define_insn "stm"
+  [(set (mem:TI (match_operand:SI 0 "mcore_arith_reg_operand" "r"))
+	(match_operand:TI 1 "mcore_arith_reg_operand" "r"))]
+  ""
+  "stq	%U1,(%0)")
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  ""
+  "
+{
+  int regno, count, i;
+
+  /* Support only loading a constant number of registers from memory and
+     only if at least two registers.  The last register must be r15.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[1]) != MEM
+      || XEXP (operands[1], 0) != stack_pointer_rtx
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) + INTVAL (operands[2]) != 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[0]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode,
+		 gen_rtx_REG (SImode, regno + i),
+		 gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx,
+						      i * 4)));
+}")
+
+(define_insn ""
+  [(match_parallel 0 "mcore_load_multiple_operation"
+		   [(set (match_operand:SI 1 "mcore_arith_reg_operand" "=r")
+			 (mem:SI (match_operand:SI 2 "register_operand" "r")))])]
+  "GET_CODE (operands[2]) == REG && REGNO (operands[2]) == STACK_POINTER_REGNUM"
+  "ldm	%1-r15,(%2)")
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  ""
+  "
+{
+  int regno, count, i;
+
+  /* Support only storing a constant number of registers to memory and
+     only if at least two registers.  The last register must be r15.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || GET_CODE (operands[0]) != MEM
+      || XEXP (operands[0], 0) != stack_pointer_rtx
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) + INTVAL (operands[2]) != 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[1]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode,
+		 gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx,
+						      i * 4)),
+		 gen_rtx_REG (SImode, regno + i));
+}")
+
+(define_insn ""
+  [(match_parallel 0 "mcore_store_multiple_operation"
+		   [(set (mem:SI (match_operand:SI 2 "register_operand" "r"))
+			 (match_operand:SI 1 "mcore_arith_reg_operand" "r"))])]
+  "GET_CODE (operands[2]) == REG && REGNO (operands[2]) == STACK_POINTER_REGNUM"
+  "stm	%1-r15,(%2)")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+;; At top-level, condition test are eq/ne, because we
+;; are comparing against the condition register (which
+;; has the result of the true relational test
+
+(define_insn "branch_true"
+  [(set (pc) (if_then_else (ne (reg:CC 17) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  ""
+  "jbt	%l0"
+  [(set_attr "type" "brcond")])
+
+(define_insn "branch_false"
+  [(set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  ""
+  "jbf	%l0"
+  [(set_attr "type" "brcond")])
+
+(define_insn "inverse_branch_true"
+  [(set (pc) (if_then_else (ne (reg:CC 17) (const_int 0))
+			   (pc)
+			   (label_ref (match_operand 0 "" ""))))]
+  ""
+  "jbf	%l0"
+  [(set_attr "type" "brcond")])
+
+(define_insn "inverse_branch_false"
+  [(set (pc) (if_then_else (eq (reg:CC 17) (const_int 0))
+   			   (pc)
+			   (label_ref (match_operand 0 "" ""))))]
+  ""
+  "jbt	%l0"
+  [(set_attr "type" "brcond")])
+
+;; Conditional branch insns
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator:SI 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "mcore_compare_operand")
+			(match_operand:SI 2 "nonmemory_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+  "
+{
+  bool invert;
+  invert = mcore_gen_compare (GET_CODE (operands[0]),
+			      operands[1], operands[2]);
+
+  if (invert)
+    emit_jump_insn (gen_branch_false (operands[3]));
+  else
+    emit_jump_insn (gen_branch_true (operands[3]));
+  DONE;
+}")
+
+
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump_real"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jbr	%l0"
+  [(set_attr "type" "branch")])
+
+(define_expand "jump"
+ [(set (pc) (label_ref (match_operand 0 "" "")))]
+ ""
+ "
+{
+  emit_jump_insn (gen_jump_real (operand0));
+  DONE;
+}
+")
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "mcore_arith_reg_operand" "r"))]
+  ""
+  "jmp	%0"
+  [(set_attr "type" "jmp")])
+
+(define_expand "call"
+  [(parallel[(call (match_operand:SI 0 "" "")
+		   (match_operand 1 "" ""))
+	     (clobber (reg:SI 15))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM
+      && ! register_operand (XEXP (operands[0], 0), SImode)
+      && ! mcore_symbolic_address_p (XEXP (operands[0], 0)))
+    operands[0] = gen_rtx_MEM (GET_MODE (operands[0]),
+			   force_reg (Pmode, XEXP (operands[0], 0)));
+}")
+
+(define_insn "call_internal"
+  [(call (mem:SI (match_operand:SI 0 "mcore_call_address_operand" "riR"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 15))]
+  ""
+  "* return mcore_output_call (operands, 0);")
+
+(define_expand "call_value"
+  [(parallel[(set (match_operand 0 "register_operand" "")
+		  (call (match_operand:SI 1 "" "")
+			(match_operand 2 "" "")))
+	     (clobber (reg:SI 15))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) == MEM
+      && ! register_operand (XEXP (operands[0], 0), SImode)
+      && ! mcore_symbolic_address_p (XEXP (operands[0], 0)))
+    operands[1] = gen_rtx_MEM (GET_MODE (operands[1]),
+			   force_reg (Pmode, XEXP (operands[1], 0)));
+}")
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (match_operand:SI 1 "mcore_call_address_operand" "riR"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 15))]
+  ""
+  "* return mcore_output_call (operands, 1);")
+
+(define_insn "call_value_struct"
+  [(parallel [(set (match_parallel 0 ""
+	             [(expr_list (match_operand 3 "register_operand" "") (match_operand 4 "immediate_operand" ""))
+		      (expr_list (match_operand 5 "register_operand" "") (match_operand 6 "immediate_operand" ""))])
+		  (call (match_operand:SI 1 "" "")
+			(match_operand 2 "" "")))
+	     (clobber (reg:SI 15))])]
+  ""
+  "* return mcore_output_call (operands, 1);"
+)
+
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "or	r0,r0")
+
+(define_insn "tablejump"
+  [(set (pc)
+	(match_operand:SI 0 "mcore_arith_reg_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp	%0"
+  [(set_attr "type" "jmp")])
+
+(define_insn "*return"
+ [(return)]
+ "reload_completed && ! mcore_naked_function_p ()"
+ "jmp	r15"
+ [(set_attr "type" "jmp")])
+
+(define_insn "*no_return"
+ [(return)]
+ "reload_completed && mcore_naked_function_p ()"
+ ""
+ [(set_attr "length" "0")]
+)
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "mcore_expand_prolog (); DONE;")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "mcore_expand_epilog ();")
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "mvc"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(ne:SI (reg:CC 17) (const_int 0)))]
+  ""
+  "mvc	%0"
+  [(set_attr "type" "move")])
+
+(define_insn "mvcv"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(eq:SI (reg:CC 17) (const_int 0)))]
+  ""
+  "mvcv	%0"
+  [(set_attr "type" "move")])
+
+; in 0.97 use (LE 0) with (LT 1) and complement c.  BRC
+(define_split 
+  [(parallel[
+     (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+          (ne:SI (gt:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                        (const_int 0))
+                 (const_int 0)))
+     (clobber (reg:SI 17))])]
+  ""
+  [(set (reg:CC 17)
+        (lt:CC (match_dup 1) (const_int 1)))
+   (set (match_dup 0) (eq:SI (reg:CC 17) (const_int 0)))])
+     
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "mcore_compare_operand" "")
+	  (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+  "
+{
+  bool invert;
+  invert = mcore_gen_compare (GET_CODE (operands[1]),
+			      operands[2], operands[3]);
+
+  if (invert)
+    emit_insn (gen_mvcv (operands[0]));
+  else
+    emit_insn (gen_mvc (operands[0]));
+  DONE;
+}")
+
+(define_insn "incscc"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (ne (reg:CC 17) (const_int 0))
+		 (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "inct	%0")
+
+(define_insn "incscc_false"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(plus:SI (eq (reg:CC 17) (const_int 0))
+		 (match_operand:SI 1 "mcore_arith_reg_operand" "0")))]
+  ""
+  "incf	%0")
+
+(define_insn "decscc"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(minus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+		  (ne (reg:CC 17) (const_int 0))))]
+  ""
+  "dect	%0")
+
+(define_insn "decscc_false"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(minus:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0")
+		  (eq (reg:CC 17) (const_int 0))))]
+  ""
+  "decf	%0")
+
+;; ------------------------------------------------------------------------
+;; Conditional move patterns.
+;; ------------------------------------------------------------------------
+
+(define_expand "smaxsi3"
+  [(set (reg:CC 17)
+	(lt:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+	       (match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "")
+	       
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(smax:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(lt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  "")
+
+; no tstgt in 0.97, so just use cmplti (btsti x,31) and reverse move 
+; condition  BRC
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (smax:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                 (const_int 0)))]
+  ""
+  [(set (reg:CC 17)
+        (lt:CC (match_dup 1) (const_int 0)))
+   (set (match_dup 0)
+        (if_then_else:SI (eq (reg:CC 17) (const_int 0))
+                         (match_dup 1) (const_int 0)))]
+  "")
+
+(define_expand "sminsi3"
+  [(set (reg:CC 17)
+	(lt:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+	       (match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(smin:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(lt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  "")
+
+;(define_split
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+;        (smin:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+;                 (const_int 0)))]
+;  ""
+;  [(set (reg:CC 17)
+;        (gt:CC (match_dup 1) (const_int 0)))
+;   (set (match_dup 0)
+;        (if_then_else:SI (eq (reg:CC 17) (const_int 0))
+;                         (match_dup 1) (const_int 0)))]
+;  "")
+
+; changed these unsigned patterns to use geu instead of ltu.  it appears
+; that the c-torture & ssrl test suites didn't catch these!  only showed
+; up in friedman's clib work.   BRC 7/7/95
+
+(define_expand "umaxsi3"
+  [(set (reg:CC 17)
+	(geu:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "")
+	       
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(umax:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(geu:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (eq (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_expand "uminsi3"
+  [(set (reg:CC 17)
+	(geu:CC (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		(match_operand:SI 2 "mcore_arith_reg_operand" "")))
+   (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  ""
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(umin:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "")))]
+  ""
+  [(set (reg:CC 17)
+	(geu:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(if_then_else:SI (ne (reg:CC 17) (const_int 0))
+			 (match_dup 2) (match_dup 1)))]
+  "")
+
+;; ------------------------------------------------------------------------
+;; conditional move patterns really start here
+;; ------------------------------------------------------------------------
+
+;; the "movtK" patterns are experimental.  they are intended to account for
+;; gcc's mucking on code such as:
+;;
+;;            free_ent = ((block_compress) ? 257 : 256 );
+;;
+;; these patterns help to get a tstne/bgeni/inct (or equivalent) sequence
+;; when both arms have constants that are +/- 1 of each other.
+;;
+;; note in the following patterns that the "movtK" ones should be the first
+;; one defined in each sequence.  this is because the general pattern also
+;; matches, so use ordering to determine priority (it's easier this way than
+;; adding conditions to the general patterns).   BRC
+;;
+;; the U and Q constraints are necessary to ensure that reload does the
+;; 'right thing'.  U constrains the operand to 0 and Q to 1 for use in the
+;; clrt & clrf and clrt/inct & clrf/incf patterns.    BRC 6/26
+;;
+;; ??? there appears to be some problems with these movtK patterns for ops
+;; other than eq & ne.  need to fix.  6/30 BRC
+
+;; ------------------------------------------------------------------------
+;; ne 
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+
+(define_insn "movtK_1"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (reg:CC 17) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "  GET_CODE (operands[1]) == CONST_INT
+  && GET_CODE (operands[2]) == CONST_INT
+  && (   (INTVAL (operands[1]) - INTVAL (operands[2]) == 1)
+      || (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 1, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movt0"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (ne (reg:CC 17) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movt	%0,%1
+    movf	%0,%2
+    clrt	%0
+    clrf	%0")
+
+;; ------------------------------------------------------------------------
+;; eq
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (reg:CC 17) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "  GET_CODE (operands[1]) == CONST_INT
+  && GET_CODE (operands[2]) == CONST_INT
+  && (   (INTVAL (operands[1]) - INTVAL (operands[2]) == 1)
+      || (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 0, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movf0"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (eq (reg:CC 17) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movf	%0,%1
+    movt	%0,%2
+    clrf	%0
+    clrt	%0")
+
+; turns lsli rx,imm/btsti rx,31 into btsti rx,imm.  not done by a peephole
+; because the instructions are not adjacent (peepholes are related by posn -
+; not by dataflow).   BRC
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (eq (zero_extract:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 1)
+                              (match_operand:SI 2 "mcore_literal_K_operand" "K,K,K,K"))
+                             (const_int 0))
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 4 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,%2\;movf	%0,%3
+    btsti	%1,%2\;movt	%0,%4
+    btsti	%1,%2\;clrf	%0
+    btsti	%1,%2\;clrt	%0"
+  [(set_attr "length" "4")])
+
+; turns sextb rx/btsti rx,31 into btsti rx,7.  must be QImode to be safe.  BRC
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (eq (lshiftrt:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 7))
+                             (const_int 0))
+                         (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  "GET_CODE (operands[1]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[1])) == QImode"
+  "@
+    btsti	%1,7\;movf	%0,%2
+    btsti	%1,7\;movt	%0,%3
+    btsti	%1,7\;clrf	%0
+    btsti	%1,7\;clrt	%0"
+  [(set_attr "length" "4")])
+
+
+;; ------------------------------------------------------------------------
+;; ne
+;; ------------------------------------------------------------------------
+
+;; Combine creates this from an andn instruction in a scc sequence.
+;; We must recognize it to get conditional moves generated.
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "  GET_CODE (operands[2]) == CONST_INT
+  && GET_CODE (operands[3]) == CONST_INT
+  && (   (INTVAL (operands[2]) - INTVAL (operands[3]) == 1)
+      || (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "*
+{
+  rtx out_operands[4];
+  out_operands[0] = operands[0];
+  out_operands[1] = operands[2];
+  out_operands[2] = operands[3];
+  out_operands[3] = operands[1];
+
+  return mcore_output_cmov (out_operands, 1, \"cmpnei	%3,0\");
+
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movt2"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (ne (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""      
+  "@
+    cmpnei	%1,0\;movt	%0,%2
+    cmpnei	%1,0\;movf	%0,%3
+    cmpnei	%1,0\;clrt	%0
+    cmpnei	%1,0\;clrf	%0"
+  [(set_attr "length" "4")])
+
+; turns lsli rx,imm/btsti rx,31 into btsti rx,imm.  not done by a peephole
+; because the instructions are not adjacent (peepholes are related by posn -
+; not by dataflow).   BRC
+
+(define_insn ""
+ [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (ne (zero_extract:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 1)
+                              (match_operand:SI 2 "mcore_literal_K_operand" "K,K,K,K"))
+                             (const_int 0))
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 4 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,%2\;movt	%0,%3
+    btsti	%1,%2\;movf	%0,%4
+    btsti	%1,%2\;clrt	%0
+    btsti	%1,%2\;clrf	%0"
+  [(set_attr "length" "4")])
+
+; turns sextb rx/btsti rx,31 into btsti rx,7.  must be QImode to be safe.  BRC
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+        (if_then_else:SI (ne (lshiftrt:SI 
+                              (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+                              (const_int 7))
+                             (const_int 0))
+                         (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+                         (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  "GET_CODE (operands[1]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[1])) == QImode"
+  "@
+    btsti	%1,7\;movt	%0,%2
+    btsti	%1,7\;movf	%0,%3
+    btsti	%1,7\;clrt	%0
+    btsti	%1,7\;clrf	%0"
+  [(set_attr "length" "4")])
+
+;; ------------------------------------------------------------------------
+;; eq/eq
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+   (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov(operands, 1, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movt3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (eq (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movt	%0,%1
+    movf	%0,%2
+    clrt	%0
+    clrf	%0")
+
+;; ------------------------------------------------------------------------
+;; eq/ne
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_5"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+    (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 0, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movf1"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (eq (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movf	%0,%1
+    movt	%0,%2
+    clrf	%0
+    clrt	%0")
+
+;; ------------------------------------------------------------------------
+;; eq
+;; ------------------------------------------------------------------------
+
+;; Combine creates this from an andn instruction in a scc sequence.
+;; We must recognize it to get conditional moves generated.
+
+; experimental conditional move with two constants +/- 1  BRC
+
+(define_insn "movtK_6"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (eq (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[2]) - INTVAL (operands[3]) == 1) ||
+    (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "* 
+{
+   rtx out_operands[4];
+   out_operands[0] = operands[0];
+   out_operands[1] = operands[2];
+   out_operands[2] = operands[3];
+   out_operands[3] = operands[1];
+
+   return mcore_output_cmov (out_operands, 0, \"cmpnei	%3,0\");
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movf3"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (eq (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    cmpnei	%1,0\;movf	%0,%2
+    cmpnei	%1,0\;movt	%0,%3
+    cmpnei	%1,0\;clrf	%0
+    cmpnei	%1,0\;clrt	%0"
+  [(set_attr "length" "4")])
+
+;; ------------------------------------------------------------------------
+;; ne/eq
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_7"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+    (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 0, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movf4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (ne (eq:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movf	%0,%1
+    movt	%0,%2
+    clrf	%0
+    clrt	%0")
+
+;; ------------------------------------------------------------------------
+;; ne/ne
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_8"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ne (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+          (match_operand:SI 1 "mcore_arith_O_operand" "O")
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[1]) == CONST_INT &&
+   GET_CODE (operands[2]) == CONST_INT &&
+   ((INTVAL (operands[1]) - INTVAL (operands[2]) == 1) ||
+    (INTVAL (operands[2]) - INTVAL (operands[1]) == 1))"
+  "* return mcore_output_cmov (operands, 1, NULL);"
+  [(set_attr "length" "4")])
+
+(define_insn "movt4"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (ne (ne:SI (reg:CC 17) (const_int 0)) (const_int 0))
+	 (match_operand:SI 1 "mcore_arith_imm_operand" "r,0,U,0")
+	 (match_operand:SI 2 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    movt	%0,%1
+    movf	%0,%2
+    clrt	%0
+    clrf	%0")
+
+;; Also need patterns to recognize lt/ge, since otherwise the compiler will
+;; try to output not/asri/tstne/movf.
+
+;; ------------------------------------------------------------------------
+;; lt
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_9"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (lt (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[2]) == CONST_INT &&
+   GET_CODE (operands[3]) == CONST_INT &&
+   ((INTVAL (operands[2]) - INTVAL (operands[3]) == 1) ||
+    (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "*
+{
+   rtx out_operands[4];
+   out_operands[0] = operands[0];
+   out_operands[1] = operands[2];
+   out_operands[2] = operands[3];
+   out_operands[3] = operands[1];
+
+   return mcore_output_cmov (out_operands, 1, \"btsti	%3,31\");
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movt5"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (lt (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,31\;movt	%0,%2
+    btsti	%1,31\;movf	%0,%3
+    btsti	%1,31\;clrt	%0
+    btsti	%1,31\;clrf	%0"
+  [(set_attr "length" "4")])
+
+
+;; ------------------------------------------------------------------------
+;; ge
+;; ------------------------------------------------------------------------
+
+; experimental conditional move with two constants +/- 1  BRC
+(define_insn "movtK_10"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+        (if_then_else:SI
+            (ge (match_operand:SI 1 "mcore_arith_reg_operand" "r") 
+                (const_int 0))
+          (match_operand:SI 2 "mcore_arith_O_operand" "O")
+          (match_operand:SI 3 "mcore_arith_O_operand" "O")))]
+  "GET_CODE (operands[2]) == CONST_INT &&
+   GET_CODE (operands[3]) == CONST_INT &&
+   ((INTVAL (operands[2]) - INTVAL (operands[3]) == 1) ||
+    (INTVAL (operands[3]) - INTVAL (operands[2]) == 1))"
+  "*
+{
+  rtx out_operands[4];
+  out_operands[0] = operands[0];
+  out_operands[1] = operands[2];
+  out_operands[2] = operands[3];
+  out_operands[3] = operands[1];
+
+   return mcore_output_cmov (out_operands, 0, \"btsti	%3,31\");
+}"
+  [(set_attr "length" "6")])
+
+(define_insn "movf5"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,r,r,r")
+	(if_then_else:SI (ge (match_operand:SI 1 "mcore_arith_reg_operand" "r,r,r,r")
+			     (const_int 0))
+			 (match_operand:SI 2 "mcore_arith_imm_operand" "r,0,U,0")
+			 (match_operand:SI 3 "mcore_arith_imm_operand" "0,r,0,U")))]
+  ""
+  "@
+    btsti	%1,31\;movf	%0,%2
+    btsti	%1,31\;movt	%0,%3
+    btsti	%1,31\;clrf	%0
+    btsti	%1,31\;clrt	%0"
+  [(set_attr "length" "4")])
+
+;; ------------------------------------------------------------------------
+;; Bitfield extract (xtrbN)
+;; ------------------------------------------------------------------------
+
+; sometimes we're better off using QI/HI mode and letting the machine indep.
+; part expand insv and extv.
+;
+; e.g., sequences like:a	[an insertion]
+;
+;      ldw r8,(r6)
+;      movi r7,0x00ffffff
+;      and r8,r7                 r7 dead
+;      stw r8,(r6)                r8 dead
+;
+; become:
+;
+;      movi r8,0
+;      stb r8,(r6)              r8 dead
+;
+; it looks like always using SI mode is a win except in this type of code 
+; (when adjacent bit fields collapse on a byte or halfword boundary).  when
+; expanding with SI mode, non-adjacent bit field masks fold, but with QI/HI
+; mode, they do not.  one thought is to add some peepholes to cover cases
+; like the above, but this is not a general solution.
+;
+; -mword-bitfields expands/inserts using SI mode.  otherwise, do it with
+; the smallest mode possible (using the machine indep. expansions).  BRC
+
+;(define_expand "extv"
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+;	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+;			 (match_operand:SI 2 "const_int_operand" "")
+;			 (match_operand:SI 3 "const_int_operand" "")))
+;   (clobber (reg:CC 17))]
+;  ""
+;  "
+;{
+;  if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) % 8 != 0)
+;    {
+;     if (TARGET_W_FIELD)
+;       {
+;        rtx lshft = GEN_INT (32 - (INTVAL (operands[2]) + INTVAL (operands[3])));
+;        rtx rshft = GEN_INT (32 - INTVAL (operands[2]));
+;
+;        emit_insn (gen_rtx_SET (SImode, operands[0], operands[1]));
+;        emit_insn (gen_rtx_SET (SImode, operands[0],
+;                            gen_rtx_ASHIFT (SImode, operands[0], lshft)));
+;        emit_insn (gen_rtx_SET (SImode, operands[0],
+;                            gen_rtx_ASHIFTRT (SImode, operands[0], rshft)));
+;        DONE;
+;     }
+;     else
+;        FAIL;
+;  }
+;}")
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  "
+{
+  if (INTVAL (operands[2]) == 8 && INTVAL (operands[3]) % 8 == 0)
+    {
+       /* 8-bit field, aligned properly, use the xtrb[0123]+sext sequence.  */
+       /* not DONE, not FAIL, but let the RTL get generated....  */
+    }
+  else if (TARGET_W_FIELD)
+    {
+      /* Arbitrary placement; note that the tree->rtl generator will make
+         something close to this if we return FAIL  */
+      rtx lshft = GEN_INT (32 - (INTVAL (operands[2]) + INTVAL (operands[3])));
+      rtx rshft = GEN_INT (32 - INTVAL (operands[2]));
+      rtx tmp1 = gen_reg_rtx (SImode);
+      rtx tmp2 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (SImode, tmp1, operands[1]));
+      emit_insn (gen_rtx_SET (SImode, tmp2,
+                         gen_rtx_ASHIFT (SImode, tmp1, lshft)));
+      emit_insn (gen_rtx_SET (SImode, operands[0],
+                         gen_rtx_ASHIFTRT (SImode, tmp2, rshft)));
+      DONE;
+    }
+  else
+    {
+      /* Let the caller choose an alternate sequence.  */
+      FAIL;
+    }
+}")
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:CC 17))]
+  ""
+  "
+{
+  if (INTVAL (operands[2]) == 8 && INTVAL (operands[3]) % 8 == 0)
+    {
+       /* 8-bit field, aligned properly, use the xtrb[0123] sequence.  */
+       /* Let the template generate some RTL....  */
+    }
+  else if (CONST_OK_FOR_K ((1 << INTVAL (operands[2])) - 1))
+    {
+      /* A narrow bit-field (<=5 bits) means we can do a shift to put
+         it in place and then use an andi to extract it.
+         This is as good as a shiftleft/shiftright.  */
+
+      rtx shifted;
+      rtx mask = GEN_INT ((1 << INTVAL (operands[2])) - 1);
+
+      if (INTVAL (operands[3]) == 0)
+        {
+          shifted = operands[1];
+        }
+      else
+        {
+          rtx rshft = GEN_INT (INTVAL (operands[3]));
+          shifted = gen_reg_rtx (SImode);
+          emit_insn (gen_rtx_SET (SImode, shifted,
+                         gen_rtx_LSHIFTRT (SImode, operands[1], rshft)));
+        }
+     emit_insn (gen_rtx_SET (SImode, operands[0],
+                       gen_rtx_AND (SImode, shifted, mask)));
+     DONE;
+   }
+ else if (TARGET_W_FIELD)
+   {
+     /* Arbitrary pattern; play shift/shift games to get it. 
+      * this is pretty much what the caller will do if we say FAIL */
+     rtx lshft = GEN_INT (32 - (INTVAL (operands[2]) + INTVAL (operands[3])));
+     rtx rshft = GEN_INT (32 - INTVAL (operands[2]));
+     rtx tmp1 = gen_reg_rtx (SImode);
+     rtx tmp2 = gen_reg_rtx (SImode);
+
+     emit_insn (gen_rtx_SET (SImode, tmp1, operands[1]));
+     emit_insn (gen_rtx_SET (SImode, tmp2,
+                         gen_rtx_ASHIFT (SImode, tmp1, lshft)));
+     emit_insn (gen_rtx_SET (SImode, operands[0],
+                       gen_rtx_LSHIFTRT (SImode, tmp2, rshft)));
+     DONE;
+   }
+ else
+   {
+     /* Make the compiler figure out some alternative mechanism.  */
+     FAIL;
+   }
+
+ /* Emit the RTL pattern; something will match it later.  */
+}")
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "mcore_arith_reg_operand" "")
+			 (match_operand:SI 1 "const_int_operand" "")
+			 (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))
+   (clobber (reg:CC 17))]
+  ""
+  "
+{
+  if (mcore_expand_insv (operands))
+    {
+      DONE;
+    }
+  else
+    {
+      FAIL;
+    }
+}")
+
+;;
+;; the xtrb[0123] instructions handily get at 8-bit fields on nice boundaries.
+;; but then, they do force you through r1.
+;;
+;; the combiner will build such patterns for us, so we'll make them available
+;; for its use.
+;;
+;; Note that we have both SIGNED and UNSIGNED versions of these...
+;;
+
+;;
+;; These no longer worry about the clobbering of CC bit; not sure this is
+;; good...
+;;
+;; the SIGNED versions of these
+;;
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,r") (const_int 8) (const_int 24)))]
+  ""
+  "@
+	asri	%0,24
+	xtrb0	%0,%1\;sextb	%0"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 16)))]
+  ""
+  "xtrb1	%0,%1\;sextb	%0"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 8)))]
+  ""
+  "xtrb2	%0,%1\;sextb	%0"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0") (const_int 8) (const_int 0)))]
+  ""
+  "sextb	%0"
+  [(set_attr "type" "shift")])
+
+;; the UNSIGNED uses of xtrb[0123]
+;;
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,r") (const_int 8) (const_int 24)))]
+  ""
+  "@
+	lsri	%0,24
+	xtrb0	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 16)))]
+  ""
+  "xtrb1	%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "r") (const_int 8) (const_int 8)))]
+  ""
+  "xtrb2	%0,%1"
+  [(set_attr "type" "shift")])
+
+;; This can be peepholed if it follows a ldb ...
+(define_insn ""
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r,b")
+	(zero_extract:SI (match_operand:SI 1 "mcore_arith_reg_operand" "0,r") (const_int 8) (const_int 0)))]
+  ""
+  "@
+	zextb	%0
+	xtrb3	%0,%1\;zextb	%0"
+  [(set_attr "type" "shift")])
+
+
+;; ------------------------------------------------------------------------
+;; Block move - adapted from m88k.md
+;; ------------------------------------------------------------------------
+
+(define_expand "movmemsi"
+  [(parallel [(set (mem:BLK (match_operand:BLK 0 "" ""))
+		   (mem:BLK (match_operand:BLK 1 "" "")))
+	      (use (match_operand:SI 2 "general_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))])]
+  ""
+  "
+{
+  if (mcore_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; ;;; ??? These patterns are meant to be generated from expand_block_move,
+;; ;;; but they currently are not.
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:QI 0 "mcore_arith_reg_operand" "=r")
+;; 	(match_operand:BLK 1 "mcore_general_movsrc_operand" "m"))]
+;;   ""
+;;   "ld.b	%0,%1"
+;;   [(set_attr "type" "load")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:HI 0 "mcore_arith_reg_operand" "=r")
+;; 	(match_operand:BLK 1 "mcore_general_movsrc_operand" "m"))]
+;;   ""
+;;   "ld.h	%0,%1"
+;;   [(set_attr "type" "load")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+;; 	(match_operand:BLK 1 "mcore_general_movsrc_operand" "m"))]
+;;   ""
+;;   "ld.w	%0,%1"
+;;   [(set_attr "type" "load")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:BLK 0 "mcore_general_movdst_operand" "=m")
+;; 	(match_operand:QI 1 "mcore_arith_reg_operand" "r"))]
+;;   ""
+;;   "st.b	%1,%0"
+;;   [(set_attr "type" "store")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:BLK 0 "mcore_general_movdst_operand" "=m")
+;; 	(match_operand:HI 1 "mcore_arith_reg_operand" "r"))]
+;;   ""
+;;   "st.h	%1,%0"
+;;   [(set_attr "type" "store")])
+;; 
+;; (define_insn ""
+;;   [(set (match_operand:BLK 0 "mcore_general_movdst_operand" "=m")
+;; 	(match_operand:SI 1 "mcore_arith_reg_operand" "r"))]
+;;   ""
+;;   "st.w	%1,%0"
+;;   [(set_attr "type" "store")])
+
+;; ------------------------------------------------------------------------
+;; Misc Optimizing quirks
+;; ------------------------------------------------------------------------
+
+;; pair to catch constructs like:  (int *)((p+=4)-4) which happen
+;; in stdarg/varargs traversal. This changes a 3 insn sequence to a 2
+;; insn sequence. -- RBE 11/30/95
+(define_insn ""
+  [(parallel[
+      (set (match_operand:SI 0 "mcore_arith_reg_operand" "=r")
+	   (match_operand:SI 1 "mcore_arith_reg_operand" "+r"))
+      (set (match_dup 1) (plus:SI (match_dup 1) (match_operand 2 "mcore_arith_any_imm_operand" "")))])]
+  "GET_CODE(operands[2]) == CONST_INT"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split 
+  [(parallel[
+      (set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+	   (match_operand:SI 1 "mcore_arith_reg_operand" ""))
+      (set (match_dup 1) (plus:SI (match_dup 1) (match_operand 2 "mcore_arith_any_imm_operand" "")))])]
+  "GET_CODE(operands[2]) == CONST_INT &&
+   operands[0] != operands[1]"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))])
+
+
+;;; Peepholes
+
+; note: in the following patterns, use mcore_is_dead() to ensure that the
+; reg we may be trashing really is dead.  reload doesn't always mark
+; deaths, so mcore_is_dead() (see mcore.c) scans forward to find its death.  BRC
+
+;;; A peephole to convert the 3 instruction sequence generated by reload
+;;; to load a FP-offset address into a 2 instruction sequence.
+;;; ??? This probably never matches anymore.
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+	(match_operand:SI 1 "const_int_operand" "J"))
+   (set (match_dup 0) (neg:SI (match_dup 0)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "mcore_arith_reg_operand" "r")))]
+  "CONST_OK_FOR_J (INTVAL (operands[1]))"
+  "error\;mov	%0,%2\;subi	%0,%1")
+
+;; Moves of inlinable constants are done late, so when a 'not' is generated
+;; it is never combined with the following 'and' to generate an 'andn' b/c 
+;; the combiner never sees it.  use a peephole to pick up this case (happens
+;; mostly with bitfields)  BRC
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+        (match_operand:SI 1 "const_int_operand" "i"))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "r")
+        (and:SI (match_dup 2) (match_dup 0)))]
+  "mcore_const_trick_uses_not (INTVAL (operands[1])) &&
+ 	operands[0] != operands[2] &&
+        mcore_is_dead (insn, operands[0])"
+  "* return mcore_output_andn (insn, operands);")
+
+; when setting or clearing just two bits, it's cheapest to use two bseti's 
+; or bclri's.  only happens when relaxing immediates.  BRC
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (ior:SI (match_dup 2) (match_dup 0)))]
+  "TARGET_HARDLIT
+   && mcore_num_ones (INTVAL (operands[1])) == 2
+   && mcore_is_dead (insn, operands[0])"
+  "* return mcore_output_bseti (operands[2], INTVAL (operands[1]));")
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (and:SI (match_dup 2) (match_dup 0)))]
+  "TARGET_HARDLIT && mcore_num_zeros (INTVAL (operands[1])) == 2 &&
+       mcore_is_dead (insn, operands[0])"
+  "* return mcore_output_bclri (operands[2], INTVAL (operands[1]));")
+
+; change an and with a mask that has a single cleared bit into a bclri.  this
+; handles QI and HI mode values using the knowledge that the most significant
+; bits don't matter.
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (and:SI (match_operand:SI 3 "mcore_arith_reg_operand" "")
+                (match_dup 0)))]
+  "GET_CODE (operands[3]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[3])) == QImode &&
+      mcore_num_zeros (INTVAL (operands[1]) | 0xffffff00) == 1 &&
+      mcore_is_dead (insn, operands[0])"
+"*
+  if (! mcore_is_same_reg (operands[2], operands[3]))
+    output_asm_insn (\"mov\\t%2,%3\", operands);
+  return mcore_output_bclri (operands[2], INTVAL (operands[1]) | 0xffffff00);")
+
+/* Do not fold these together -- mode is lost at final output phase.  */
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (and:SI (match_operand:SI 3 "mcore_arith_reg_operand" "")
+                (match_dup 0)))]
+  "GET_CODE (operands[3]) == SUBREG && 
+      GET_MODE (SUBREG_REG (operands[3])) == HImode &&
+      mcore_num_zeros (INTVAL (operands[1]) | 0xffff0000) == 1 &&
+      operands[2] == operands[3] &&
+      mcore_is_dead (insn, operands[0])"
+"*
+  if (! mcore_is_same_reg (operands[2], operands[3]))
+    output_asm_insn (\"mov\\t%2,%3\", operands);
+  return mcore_output_bclri (operands[2], INTVAL (operands[1]) | 0xffff0000);")
+
+; This peephole helps when using -mwide-bitfields to widen fields so they 
+; collapse.   This, however, has the effect that a narrower mode is not used
+; when desirable.  
+;
+; e.g., sequences like:
+;
+;      ldw r8,(r6)
+;      movi r7,0x00ffffff
+;      and r8,r7                 r7 dead
+;      stw r8,(r6)                r8 dead
+;
+; get peepholed to become:
+;
+;      movi r8,0
+;      stb r8,(r6)              r8 dead
+;
+; Do only easy addresses that have no offset.  This peephole is also applied 
+; to halfwords.  We need to check that the load is non-volatile before we get
+; rid of it.
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+        (match_operand:SI 3 "const_int_operand" ""))
+   (set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
+   (set (match_operand:SI 4 "memory_operand" "") (match_dup 0))]
+  "mcore_is_dead (insn, operands[0]) &&
+   ! MEM_VOLATILE_P (operands[1]) &&
+   mcore_is_dead (insn, operands[2]) && 
+   (mcore_byte_offset (INTVAL (operands[3])) > -1 || 
+    mcore_halfword_offset (INTVAL (operands[3])) > -1) &&
+   ! MEM_VOLATILE_P (operands[4]) &&
+   GET_CODE (XEXP (operands[4], 0)) == REG"
+"*
+{
+   int ofs;
+   enum machine_mode mode;
+   rtx base_reg = XEXP (operands[4], 0);
+
+   if ((ofs = mcore_byte_offset (INTVAL (operands[3]))) > -1)
+      mode = QImode;
+   else if ((ofs = mcore_halfword_offset (INTVAL (operands[3]))) > -1)
+      mode = HImode;
+   else
+      gcc_unreachable ();
+
+   if (ofs > 0) 
+      operands[4] = gen_rtx_MEM (mode, 
+                              gen_rtx_PLUS (SImode, base_reg, GEN_INT(ofs)));
+   else
+      operands[4] = gen_rtx_MEM (mode, base_reg);
+
+   if (mode == QImode)
+      return \"movi	%0,0\\n\\tst.b	%0,%4\";
+
+   return \"movi	%0,0\\n\\tst.h	%0,%4\";
+}")
+
+; from sop11. get btsti's for (LT A 0) where A is a QI or HI value
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+        (sign_extend:SI (match_operand:QI 1 "mcore_arith_reg_operand" "0")))
+   (set (reg:CC 17)
+	(lt:CC (match_dup 0)
+	    (const_int 0)))]
+  "mcore_is_dead (insn, operands[0])"
+  "btsti	%0,7")
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+        (sign_extend:SI (match_operand:HI 1 "mcore_arith_reg_operand" "0")))
+   (set (reg:CC 17)
+	(lt:CC (match_dup 0)
+	    (const_int 0)))]
+  "mcore_is_dead (insn, operands[0])"
+  "btsti	%0,15")
+
+; Pick up a tst.  This combination happens because the immediate is not
+; allowed to fold into one of the operands of the tst.  Does not happen
+; when relaxing immediates.  BRC
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (match_operand:SI 1 "mcore_arith_reg_operand" ""))
+   (set (match_dup 0)
+        (and:SI (match_dup 0)
+                (match_operand:SI 2 "mcore_literal_K_operand" "")))
+   (set (reg:CC 17) (ne:CC (match_dup 0) (const_int 0)))]
+  "mcore_is_dead (insn, operands[0])"
+  "movi	%0,%2\;tst	%1,%0")
+
+(define_peephole
+  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+        (if_then_else:SI (ne (zero_extract:SI 
+                                (match_operand:SI 1 "mcore_arith_reg_operand" "")
+                                (const_int 1)
+	                        (match_operand:SI 2 "mcore_literal_K_operand" ""))
+			     (const_int 0))
+	   (match_operand:SI 3 "mcore_arith_imm_operand" "")
+           (match_operand:SI 4 "mcore_arith_imm_operand" "")))
+    (set (reg:CC 17) (ne:CC (match_dup 0) (const_int 0)))]
+  ""
+"*
+{
+  unsigned int op0 = REGNO (operands[0]);
+
+  if (GET_CODE (operands[3]) == REG)
+    {
+     if (REGNO (operands[3]) == op0 && GET_CODE (operands[4]) == CONST_INT
+	 && INTVAL (operands[4]) == 0)
+        return \"btsti	%1,%2\\n\\tclrf	%0\";
+     else if (GET_CODE (operands[4]) == REG)
+       {
+        if (REGNO (operands[4]) == op0)
+   	   return \"btsti	%1,%2\\n\\tmovf	%0,%3\";
+        else if (REGNO (operands[3]) == op0)
+ 	   return \"btsti	%1,%2\\n\\tmovt	%0,%4\";
+       }
+
+     gcc_unreachable ();
+    }
+  else if (GET_CODE (operands[3]) == CONST_INT
+           && INTVAL (operands[3]) == 0
+	   && GET_CODE (operands[4]) == REG)
+     return \"btsti	%1,%2\\n\\tclrt	%0\";
+
+  gcc_unreachable ();
+}")
+
+; experimental - do the constant folding ourselves.  note that this isn't
+;   re-applied like we'd really want.  i.e., four ands collapse into two
+;   instead of one.  this is because peepholes are applied as a sliding
+;   window.  the peephole does not generate new rtl's, but instead slides
+;   across the rtl's generating machine instructions.  it would be nice
+;   if the peephole optimizer is changed to re-apply patterns and to gen
+;   new rtl's.  this is more flexible.  the pattern below helps when we're
+;   not using relaxed immediates.   BRC
+
+;(define_peephole
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "")
+;        (match_operand:SI 1 "const_int_operand" ""))
+;   (set (match_operand:SI 2 "mcore_arith_reg_operand" "")
+;          (and:SI (match_dup 2) (match_dup 0)))
+;   (set (match_dup 0)
+;        (match_operand:SI 3 "const_int_operand" ""))
+;   (set (match_dup 2)
+;           (and:SI (match_dup 2) (match_dup 0)))]
+;  "!TARGET_RELAX_IMM && mcore_is_dead (insn, operands[0]) &&
+;       mcore_const_ok_for_inline (INTVAL (operands[1]) & INTVAL (operands[3]))"
+;  "*
+;{
+;  rtx out_operands[2];
+;  out_operands[0] = operands[0];
+;  out_operands[1] = GEN_INT (INTVAL (operands[1]) & INTVAL (operands[3]));
+;  
+;  output_inline_const (SImode, out_operands);
+;
+;  output_asm_insn (\"and	%2,%0\", operands);
+;
+;  return \"\";   
+;}")
+
+; BRC: for inlining get rid of extra test - experimental
+;(define_peephole
+;  [(set (match_operand:SI 0 "mcore_arith_reg_operand" "r")
+;          (ne:SI (reg:CC 17) (const_int 0)))
+;   (set (reg:CC 17) (ne:CC (match_dup 0) (const_int 0)))
+;   (set (pc) 
+;       (if_then_else (eq (reg:CC 17) (const_int 0))
+;         (label_ref (match_operand 1 "" ""))
+;         (pc)))]
+;   ""
+;   "*
+;{
+;  if (get_attr_length (insn) == 10)
+;    {
+;      output_asm_insn (\"bt	2f\\n\\tjmpi	[1f]\", operands);
+;      output_asm_insn (\".align	2\\n1:\", operands);
+;      output_asm_insn (\".long	%1\\n2:\", operands);
+;      return \"\";
+;    }
+;  return \"bf	%l1\";
+;}")
+
+
+;;; Special patterns for dealing with the constant pool.
+
+;;; 4 byte integer in line.
+
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")] 0)]
+ ""
+ "*
+{
+  assemble_integer (operands[0], 4, BITS_PER_WORD, 1);
+  return \"\";
+}"
+ [(set_attr "length" "4")])
+
+;;; align to a four byte boundary.
+
+(define_insn "align_4"
+ [(unspec_volatile [(const_int 0)] 1)]
+ ""
+ ".align 2")
+
+;;; Handle extra constant pool entries created during final pass.
+
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] 2)]
+  ""
+  "* return mcore_output_jump_label_table ();")
+
+;;
+;; Stack allocation -- in particular, for alloca().
+;; this is *not* what we use for entry into functions.
+;;
+;; This is how we allocate stack space.  If we are allocating a
+;; constant amount of space and we know it is less than 4096
+;; bytes, we need do nothing.
+;;
+;; If it is more than 4096 bytes, we need to probe the stack
+;; periodically. 
+;;
+;; operands[1], the distance is a POSITIVE number indicating that we
+;; are allocating stack space
+;;
+(define_expand "allocate_stack"
+  [(set (reg:SI 0)
+	(plus:SI (reg:SI 0)
+		 (match_operand:SI 1 "general_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+        (match_dup 2))]
+  ""
+  "
+{
+  /* If he wants no probing, just do it for him.  */
+  if (mcore_stack_increment == 0)
+    {
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,operands[1]));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+
+  /* For small constant growth, we unroll the code.  */
+  if (GET_CODE (operands[1]) == CONST_INT
+      && INTVAL (operands[1]) < 8 * STACK_UNITS_MAXSTEP)
+    {
+      HOST_WIDE_INT left = INTVAL(operands[1]);
+
+      /* If it's a long way, get close enough for a last shot.  */
+      if (left >= STACK_UNITS_MAXSTEP)
+	{
+	  rtx tmp = gen_reg_rtx (Pmode);
+	  emit_insn (gen_movsi (tmp, GEN_INT (STACK_UNITS_MAXSTEP)));
+	  do
+	    {
+	      rtx memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
+
+              MEM_VOLATILE_P (memref) = 1;
+	      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+	      emit_insn (gen_movsi (memref, stack_pointer_rtx));
+	      left -= STACK_UNITS_MAXSTEP;
+	    }
+	  while (left > STACK_UNITS_MAXSTEP);
+	}
+      /* Perform the final adjustment.  */
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-left)));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+  else
+    {
+      rtx out_label = 0;
+      rtx loop_label = gen_label_rtx ();
+      rtx step = gen_reg_rtx (Pmode);
+      rtx tmp = gen_reg_rtx (Pmode);
+      rtx test, memref;
+
+#if 1
+      emit_insn (gen_movsi (tmp, operands[1]));
+      emit_insn (gen_movsi (step, GEN_INT (STACK_UNITS_MAXSTEP)));
+
+      if (GET_CODE (operands[1]) != CONST_INT)
+	{
+	  out_label = gen_label_rtx ();
+	  test = gen_rtx_GEU (VOIDmode, step, tmp);		/* quick out */
+	  emit_jump_insn (gen_cbranchsi4 (test, step, tmp, out_label));
+	}
+
+      /* Run a loop that steps it incrementally.  */
+      emit_label (loop_label);
+
+      /* Extend a step, probe, and adjust remaining count.  */
+      emit_insn(gen_subsi3(stack_pointer_rtx, stack_pointer_rtx, step));
+      memref = gen_rtx_MEM (SImode, stack_pointer_rtx);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_insn(gen_movsi(memref, stack_pointer_rtx));
+      emit_insn(gen_subsi3(tmp, tmp, step));
+
+      /* Loop condition -- going back up.  */
+      test = gen_rtx_LTU (VOIDmode, step, tmp);
+      emit_jump_insn (gen_cbranchsi4 (test, step, tmp, loop_label));
+
+      if (out_label)
+	emit_label (out_label);
+
+      /* Bump the residual.  */
+      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+#else
+      /* simple one-shot -- ensure register and do a subtract.
+       * This does NOT comply with the ABI.  */
+      emit_insn (gen_movsi (tmp, operands[1]));
+      emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
+;;      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+#endif
+    }
+}")
diff --git a/gcc/config/mcore/mcore.opt b/gcc/config/mcore/mcore.opt
new file mode 100644
index 000000000..3e48cb22d
--- /dev/null
+++ b/gcc/config/mcore/mcore.opt
@@ -0,0 +1,75 @@
+; Options for the Motorola MCore port of the compiler.
+
+; Copyright (C) 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m210
+Target RejectNegative Report InverseMask(M340)
+Generate code for the M*Core M210
+
+m340
+Target RejectNegative Report Mask(M340)
+Generate code for the M*Core M340
+
+m4byte-functions
+Target Report Mask(OVERALIGN_FUNC)
+Force functions to be aligned to a 4 byte boundary
+
+mbig-endian
+Target RejectNegative Report InverseMask(LITTLE_END)
+Generate big-endian code
+
+mcallgraph-data
+Target Report Mask(CG_DATA)
+Emit call graph information
+
+mdiv
+Target Report Mask(DIV)
+Use the divide instruction
+
+mhardlit
+Target Report Mask(HARDLIT)
+Inline constants if it can be done in 2 insns or less
+
+mlittle-endian
+Target RejectNegative Report Mask(LITTLE_END)
+Generate little-endian code
+
+; Not used by the compiler proper.
+mno-lsim
+Target RejectNegative
+Assume that run-time support has been provided, so omit -lsim from the linker command line
+
+mrelax-immediates
+Target Report Mask(RELAX_IMM)
+Use arbitrary sized immediates in bit operations
+
+mslow-bytes
+Target Report Mask(SLOW_BYTES)
+Prefer word accesses over byte accesses
+
+; Maximum size we are allowed to grow the stack in a single operation.
+; If we want more, we must do it in increments of at most this size.
+; If this value is 0, we don't check at all.
+mstack-increment=
+Target RejectNegative Joined UInteger Var(mcore_stack_increment) Init(STACK_UNITS_MAXSTEP)
+Set the maximum amount for a single stack increment operation
+
+mwide-bitfields
+Target Report Mask(W_FIELD)
+Always treat bitfields as int-sized
diff --git a/gcc/config/mcore/predicates.md b/gcc/config/mcore/predicates.md
new file mode 100644
index 000000000..1f5fc00dc
--- /dev/null
+++ b/gcc/config/mcore/predicates.md
@@ -0,0 +1,338 @@
+;; Predicate definitions for Motorola MCore.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Nonzero if OP is a normal arithmetic register.
+
+(define_predicate "mcore_arith_reg_operand"
+  (match_code "reg,subreg")
+{
+  if (! register_operand (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) == REG)
+    return REGNO (op) != CC_REG;
+
+  return 1;
+})
+
+;; Nonzero if OP can be source of a simple move operation.
+
+(define_predicate "mcore_general_movsrc_operand"
+  (match_code "mem,const_int,reg,subreg,symbol_ref,label_ref,const")
+{
+  /* Any (MEM LABEL_REF) is OK.  That is a pc-relative load.  */
+  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == LABEL_REF)
+    return 1;
+
+  return general_operand (op, mode);
+})
+
+;; Nonzero if OP can be destination of a simple move operation.
+
+(define_predicate "mcore_general_movdst_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (GET_CODE (op) == REG && REGNO (op) == CC_REG)
+    return 0;
+
+  return general_operand (op, mode);
+})
+
+;; Nonzero if OP should be recognized during reload for an ixh/ixw
+;; operand.  See the ixh/ixw patterns.
+
+(define_predicate "mcore_reload_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (mcore_arith_reg_operand (op, mode))
+    return 1;
+
+  if (! reload_in_progress)
+    return 0;
+
+  return GET_CODE (op) == MEM;
+})
+
+;; Nonzero if OP is a valid source operand for an arithmetic insn.
+
+(define_predicate "mcore_arith_J_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for an arithmetic insn.
+
+(define_predicate "mcore_arith_K_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a shift or rotate insn.
+
+(define_predicate "mcore_arith_K_operand_not_0"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (   GET_CODE (op) == CONST_INT
+      && CONST_OK_FOR_K (INTVAL (op))
+      && INTVAL (op) != 0)
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_arith_M_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_arith_K_S_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      if (CONST_OK_FOR_K (INTVAL (op)) || (mcore_num_zeros (INTVAL (op)) <= 2))
+	return 1;
+    }
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a cmov with two consts
+;; +/- 1.
+
+(define_predicate "mcore_arith_O_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for loading.
+
+(define_predicate "mcore_arith_imm_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && const_ok_for_mcore (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_arith_any_imm_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a btsti.
+
+(define_predicate "mcore_literal_K_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for an add/sub insn.
+
+(define_predicate "mcore_addsub_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      /* The following has been removed because it precludes large constants from being
+	 returned as valid source operands for and add/sub insn.  While large
+	 constants may not directly be used in an add/sub, they may if first loaded
+	 into a register.  Thus, this predicate should indicate that they are valid,
+	 and the constraint in mcore.md should control whether an additional load to
+	 register is needed. (see mcore.md, addsi). -- DAC 4/2/1998
+      
+      if (CONST_OK_FOR_J (INTVAL (op)) || CONST_OK_FOR_L (INTVAL (op)))
+        return 1;
+
+	 However we do still need to check to make sure that the constant is not too
+	 big, especially if we are running on a 64-bit OS...  Nickc 8/1/07.  */
+
+      if (trunc_int_for_mode (INTVAL (op), mode) != INTVAL (op))
+	return 0;
+
+      return 1;
+
+    }
+
+  return 0;
+})
+
+;; Nonzero if OP is a valid source operand for a compare operation.
+
+(define_predicate "mcore_compare_operand"
+  (match_code "const_int,reg,subreg")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    return 1;
+
+  return 0;
+})
+
+;; Return 1 if OP is a load multiple operation.  It is known to be a
+;; PARALLEL and the first section will be tested.
+
+(define_predicate "mcore_load_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int dest_regno;
+  rtx src_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt))    != (unsigned) (dest_regno + i)
+	  || GET_CODE (SET_SRC (elt))  != MEM
+	  || GET_MODE (SET_SRC (elt))  != SImode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Similar, but tests for store multiple.
+
+(define_predicate "mcore_store_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int src_regno;
+  rtx dest_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != (unsigned) (src_regno + i)
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	  || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mcore_call_address_operand"
+  (match_code "reg,subreg,const_int,symbol_ref")
+{
+  return register_operand (op, mode) || CONSTANT_P (op);
+})
diff --git a/gcc/config/mcore/t-mcore b/gcc/config/mcore/t-mcore
new file mode 100644
index 000000000..827723968
--- /dev/null
+++ b/gcc/config/mcore/t-mcore
@@ -0,0 +1,73 @@
+# Copyright (C) 2000, 2001, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC    = mcore/lib1.asm
+LIB1ASMFUNCS  = _divsi3 _udivsi3 _modsi3 _umodsi3
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/mcore/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mcore/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mcore/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mcore/crtn.asm
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/mcore/t-mcore
+	rm -f dp-bit.c
+	echo '' > dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/mcore/t-mcore
+	rm -f fp-bit.c
+	echo '' > fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# could use -msifilter to be safe from interrupt/jmp interactions and others.
+TARGET_LIBGCC2_CFLAGS=-O3 -DNO_FLOATLIB_FIXUNSDFSI #-msifilter
+
+# We have values for float.h.
+CROSS_FLOAT_H = $(srcdir)/config/mcore/gfloat.h
+
+# If support for -m4align is ever re-enabled then comment out the
+# following line and uncomment the mutlilib lines below.
+
+EXTRA_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+# MULTILIB_OPTIONS     = m8align/m4align
+# MULTILIB_DIRNAMES    = align8 align4
+# MULTILIB_MATCHES     = 
+# MULTILIB_EXTRA_OPTS  = 
+# MULTILIB_EXCEPTIONS  =
+# EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+# LIBGCC               = stmp-multilib
+# INSTALL_LIBGCC       = install-multilib
+
+MULTILIB_OPTIONS     = mbig-endian/mlittle-endian m210/m340
+MULTILIB_DIRNAMES    = big little m210 m340
+
+EXTRA_PARTS          =
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+LIBGCC               = stmp-multilib
+INSTALL_LIBGCC       = install-multilib
diff --git a/gcc/config/mcore/t-mcore-pe b/gcc/config/mcore/t-mcore-pe
new file mode 100644
index 000000000..37ad3b6f9
--- /dev/null
+++ b/gcc/config/mcore/t-mcore-pe
@@ -0,0 +1,56 @@
+# Copyright (C) 2000, 2001, 2002, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC    = mcore/lib1.asm
+LIB1ASMFUNCS  = _divsi3 _udivsi3 _modsi3 _umodsi3
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/mcore/t-mcore
+	rm -f dp-bit.c
+	echo '' > dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/mcore/t-mcore
+	rm -f fp-bit.c
+	echo '' > fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# could use -msifilter to be safe from interrupt/jmp interactions and others.
+TARGET_LIBGCC2_CFLAGS=-O3 -DNO_FLOATLIB_FIXUNSDFSI #-msifilter
+
+# We have values for float.h.
+CROSS_FLOAT_H = $(srcdir)/config/mcore/gfloat.h
+
+MULTILIB_OPTIONS     = mbig-endian/mlittle-endian m210/m340
+MULTILIB_DIRNAMES    = big little m210 m340
+MULTILIB_MATCHES     = 
+MULTILIB_EXTRA_OPTS  = 
+MULTILIB_EXCEPTIONS  =
+
+# EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+LIBGCC               = stmp-multilib
+INSTALL_LIBGCC       = install-multilib
+
+# If EXTRA_MULTILIB_PARTS is not defined above then define EXTRA_PARTS here
+# EXTRA_PARTS = crtbegin.o crtend.o 
+
diff --git a/gcc/config/memcmp.c b/gcc/config/memcmp.c
new file mode 100644
index 000000000..2348afe1d
--- /dev/null
+++ b/gcc/config/memcmp.c
@@ -0,0 +1,16 @@
+/* Public domain.  */
+#include <stddef.h>
+
+int
+memcmp (const void *str1, const void *str2, size_t count)
+{
+  const unsigned char *s1 = str1;
+  const unsigned char *s2 = str2;
+
+  while (count-- > 0)
+    {
+      if (*s1++ != *s2++)
+	  return s1[-1] < s2[-1] ? -1 : 1;
+    }
+  return 0;
+}
diff --git a/gcc/config/memcpy.c b/gcc/config/memcpy.c
new file mode 100644
index 000000000..58b1e4056
--- /dev/null
+++ b/gcc/config/memcpy.c
@@ -0,0 +1,12 @@
+/* Public domain.  */
+#include <stddef.h>
+
+void *
+memcpy (void *dest, const void *src, size_t len)
+{
+  char *d = dest;
+  const char *s = src;
+  while (len--)
+    *d++ = *s++;
+  return dest;
+}
diff --git a/gcc/config/memmove.c b/gcc/config/memmove.c
new file mode 100644
index 000000000..13b340af6
--- /dev/null
+++ b/gcc/config/memmove.c
@@ -0,0 +1,20 @@
+/* Public domain.  */
+#include <stddef.h>
+
+void *
+memmove (void *dest, const void *src, size_t len)
+{
+  char *d = dest;
+  const char *s = src;
+  if (d < s)
+    while (len--)
+      *d++ = *s++;
+  else
+    {
+      char *lasts = s + (len-1);
+      char *lastd = d + (len-1);
+      while (len--)
+        *lastd-- = *lasts--;
+    }
+  return dest;
+}
diff --git a/gcc/config/memset.c b/gcc/config/memset.c
new file mode 100644
index 000000000..3e7025ee3
--- /dev/null
+++ b/gcc/config/memset.c
@@ -0,0 +1,11 @@
+/* Public domain.  */
+#include <stddef.h>
+
+void *
+memset (void *dest, int val, size_t len)
+{
+  unsigned char *ptr = dest;
+  while (len-- > 0)
+    *ptr++ = val;
+  return dest;
+}
diff --git a/gcc/config/mep/constraints.md b/gcc/config/mep/constraints.md
new file mode 100644
index 000000000..5aa2de69c
--- /dev/null
+++ b/gcc/config/mep/constraints.md
@@ -0,0 +1,162 @@
+;; Toshiba Media Processor Machine constraints
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Red Hat Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+
+
+(define_register_constraint "a" "SP_REGS"
+  "The $sp register.")
+
+(define_register_constraint "b" "TP_REGS"
+  "The $tp register.")
+
+(define_register_constraint "c" "CONTROL_REGS"
+  "Any control register.")
+
+(define_register_constraint "d" "HILO_REGS"
+  "Either the $hi or the $lo register.")
+
+(define_register_constraint "em" "LOADABLE_CR_REGS"
+  "Coprocessor registers that can be directly loaded ($c0-$c15).")
+
+(define_register_constraint "ex" "mep_have_copro_copro_moves_p ? CR_REGS : NO_REGS"
+  "Coprocessor registers that can be moved to each other.")
+
+(define_register_constraint "er" "mep_have_core_copro_moves_p ? CR_REGS : NO_REGS"
+  "Coprocessor registers that can be moved to core registers.")
+
+(define_register_constraint "h" "HI_REGS"
+  "The $hi register.")
+
+(define_register_constraint "j" "RPC_REGS"
+  "The $rpc register.")
+
+(define_register_constraint "l" "LO_REGS"
+  "The $lo register.")
+
+(define_register_constraint "t" "TPREL_REGS"
+  "Registers which can be used in $tp-relative addressing.")
+
+(define_register_constraint "v" "GP_REGS"
+  "The $gp register.")
+
+(define_register_constraint "x" "CR_REGS"
+  "The coprocessor registers.")
+
+(define_register_constraint "y" "CCR_REGS"
+  "The coprocessor control registers.")
+
+(define_register_constraint "z" "R0_REGS"
+  "The $0 register.")
+
+(define_register_constraint "A" "USER0_REGS"
+  "User-defined register set A.")
+
+(define_register_constraint "B" "USER1_REGS"
+  "User-defined register set B.")
+
+(define_register_constraint "C" "USER2_REGS"
+  "User-defined register set C.")
+
+(define_register_constraint "D" "USER3_REGS"
+  "User-defined register set D.")
+
+
+
+(define_constraint "I"
+  "Offsets for $gp-rel addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival < 32768")))
+
+(define_constraint "J"
+  "Constants that can be used directly with boolean insns."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 65536")))
+
+(define_constraint "K"
+  "Constants that can be moved directly to registers."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 0x01000000")))
+
+(define_constraint "L"
+  "Small constants that can be added to registers."
+  (and (match_code "const_int")
+       (match_test "ival >= -32 && ival < 32")))
+
+(define_constraint "M"
+  "Long shift counts."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 32")))
+
+(define_constraint "N"
+  "Small constants that can be compared to registers."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival < 16")))
+
+(define_constraint "O"
+  "Constants that can be loaded into the top half of registers."
+  (and (match_code "const_int")
+       (match_test "!(ival & 0xffff) && ival >= -2147483647-1 && ival <= 2147483647")))
+
+(define_constraint "S"
+  "Signed 8-bit immediates."
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival < 127")))
+
+
+
+;; This must only be used with mep_call_address_operand() as the predicate.
+(define_constraint "R"
+  "@internal
+Near symbols that can be used as addresses for CALL."
+  (not (match_code "reg")))
+
+(define_constraint "T"
+  "Symbols encoded for $tp-rel or $gp-rel addressing."
+  (ior (ior
+	(and (match_code "unspec")
+	     (match_code "symbol_ref" "a"))
+	(and (match_code "const")
+	     (and (match_code "unspec" "0")
+		  (match_code "symbol_ref" "0a"))))
+       (and (match_code "const")
+	    (and (match_code "plus" "0")
+		 (and (match_code "unspec" "00")
+		      (match_code "symbol_ref" "00a"))))))
+
+(define_constraint "U"
+  "Non-constant addresses for loading/saving coprocessor registers."
+  (and (match_code "mem")
+       (match_test "! CONSTANT_P (XEXP (op, 0))")))
+
+(define_constraint "W"
+  "The top half of a symbol's value."
+  (and (match_code "high")
+       (match_code "symbol_ref" "0")))
+
+(define_constraint "Y"
+  "A register indirect address without offset."
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+(define_constraint "Z"
+  "Symbolic references to the control bus."
+  (and (and (match_code "mem")
+	    (match_code "symbol_ref" "0"))
+       (match_test "mep_section_tag (op) == 'c'")))
diff --git a/gcc/config/mep/default.h b/gcc/config/mep/default.h
new file mode 100644
index 000000000..f5359721e
--- /dev/null
+++ b/gcc/config/mep/default.h
@@ -0,0 +1,10 @@
+/* Header created by MeP-Integrator */
+#undef __section
+#define __section(_secname) __attribute__((section(#_secname)))
+#undef mep_nop
+#define mep_nop() __asm__ volatile ("nop")
+
+#pragma GCC coprocessor available $c0...$c31
+#pragma GCC coprocessor call_saved $c6...$c7
+
+#include <intrinsics.h>
diff --git a/gcc/config/mep/intrinsics.h b/gcc/config/mep/intrinsics.h
new file mode 100644
index 000000000..b18217a8d
--- /dev/null
+++ b/gcc/config/mep/intrinsics.h
@@ -0,0 +1,620 @@
+
+
+/* DO NOT EDIT: This file is automatically generated by CGEN.
+   Any changes you make will be discarded when it is next regenerated.
+*/
+
+/* GCC defines these internally, as follows... 
+#if __MEP_CONFIG_CP_DATA_BUS_WIDTH == 64
+  typedef long long cp_data_bus_int;
+#else
+  typedef long cp_data_bus_int;
+#endif
+typedef          char  cp_v8qi  __attribute__((vector_size(8)));
+typedef unsigned char  cp_v8uqi __attribute__((vector_size(8)));
+typedef          short cp_v4hi  __attribute__((vector_size(8)));
+typedef unsigned short cp_v4uhi __attribute__((vector_size(8)));
+typedef          int   cp_v2si  __attribute__((vector_size(8)));
+typedef unsigned int   cp_v2usi __attribute__((vector_size(8)));
+*/
+
+
+// default
+void mep_cpfmadila1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmadiua1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmadia1_b (cp_v8qi, cp_v8qi, long, long); // volatile
+void mep_cpfmadia1u_b (cp_v8uqi, cp_v8uqi, long, long); // volatile
+void mep_cpfmulila1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmuliua1_h (cp_v4hi, cp_v4hi, long, long); // volatile
+void mep_cpfmulia1_b (cp_v8qi, cp_v8qi, long, long); // volatile
+void mep_cpfmulia1u_b (cp_v8uqi, cp_v8uqi, long, long); // volatile
+void mep_cpamadila1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamadiua1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamadia1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpamadia1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpamulila1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamuliua1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpamulia1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpamulia1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmadila1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadiua1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadia1s1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmadia1s1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmulila1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmuliua1s1_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmulia1s1_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmulia1s1u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmadila1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadiua1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmadia1s0_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmadia1s0u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpfmulila1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmuliua1s0_h (cp_v4hi, cp_v4hi, long); // volatile
+void mep_cpfmulia1s0_b (cp_v8qi, cp_v8qi, long); // volatile
+void mep_cpfmulia1s0u_b (cp_v8uqi, cp_v8uqi, long); // volatile
+void mep_cpacswp ();                    // volatile
+void mep_cpaccpa1 ();                   // volatile
+void mep_cpacsuma1 ();                  // volatile
+void mep_c1nop ();                      // volatile
+void mep_cpfacla0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfacua0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfaca0s1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfaca0s1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpfsftbla0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftbua0s1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftba0s1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfsftba0s1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpfacla0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfacua0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfaca0s0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfaca0s0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpfsftbla0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftbua0s0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpfsftba0s0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpfsftba0s0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsllia0 (long);               // volatile
+void mep_cpsraia0 (long);               // volatile
+void mep_cpsrlia0 (long);               // volatile
+void mep_cpslla0 (cp_data_bus_int);     // volatile
+void mep_cpsraa0 (cp_data_bus_int);     // volatile
+void mep_cpsrla0 (cp_data_bus_int);     // volatile
+void mep_cpaccpa0 ();                   // volatile
+void mep_cpacsuma0 ();                  // volatile
+cp_v2si mep_cpmovhla0_w ();             // volatile
+cp_v2si mep_cpmovhua0_w ();             // volatile
+cp_v2si mep_cppackla0_w ();             // volatile
+cp_v2si mep_cppackua0_w ();             // volatile
+cp_v4hi mep_cppackla0_h ();             // volatile
+cp_v4hi mep_cppackua0_h ();             // volatile
+cp_v8qi mep_cppacka0_b ();              // volatile
+cp_v8uqi mep_cppacka0u_b ();            // volatile
+cp_v2si mep_cpmovlla0_w ();             // volatile
+cp_v2si mep_cpmovlua0_w ();             // volatile
+cp_v2si mep_cpmovula0_w ();             // volatile
+cp_v2si mep_cpmovuua0_w ();             // volatile
+cp_v4hi mep_cpmovla0_h ();              // volatile
+cp_v4hi mep_cpmovua0_h ();              // volatile
+cp_v8qi mep_cpmova0_b ();               // volatile
+void mep_cpsetla0_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsetua0_w (cp_v2si, cp_v2si); // volatile
+void mep_cpseta0_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpsadla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsadua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsada0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsada0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpabsla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsa0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpabsa0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubacla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubacua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubaca0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpsubaca0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsuba0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsuba0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddacla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddacua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddaca0_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpaddaca0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddla0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddua0_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpadda0_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpadda0u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_c0nop ();                      // volatile
+void mep_cpsmsbslla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbslua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbslla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmsbslua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadslla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadslua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadslla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadslua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmulslla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulslua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulslla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmulslua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmsbla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmsbla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmsbua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsmadla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsmadua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmsbla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmsbua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmsbla1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmsbua1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmsbla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmsbua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmadla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmadua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmadla1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmadua1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmadla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmadua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmada1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpmada1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpmulla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpmulla1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmulua1u_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpmulla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmulua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpmula1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpmula1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpssda1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpssda1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpssqa1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpssqa1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsllia1 (long);               // volatile
+void mep_cpsraia1 (long);               // volatile
+void mep_cpsrlia1 (long);               // volatile
+void mep_cpslla1 (cp_data_bus_int);     // volatile
+void mep_cpsraa1 (cp_data_bus_int);     // volatile
+void mep_cpsrla1 (cp_data_bus_int);     // volatile
+cp_v2si mep_cpmovhla1_w ();             // volatile
+cp_v2si mep_cpmovhua1_w ();             // volatile
+cp_v2si mep_cppackla1_w ();             // volatile
+cp_v2si mep_cppackua1_w ();             // volatile
+cp_v4hi mep_cppackla1_h ();             // volatile
+cp_v4hi mep_cppackua1_h ();             // volatile
+cp_v8qi mep_cppacka1_b ();              // volatile
+cp_v8uqi mep_cppacka1u_b ();            // volatile
+cp_v2si mep_cpmovlla1_w ();             // volatile
+cp_v2si mep_cpmovlua1_w ();             // volatile
+cp_v2si mep_cpmovula1_w ();             // volatile
+cp_v2si mep_cpmovuua1_w ();             // volatile
+cp_v4hi mep_cpmovla1_h ();              // volatile
+cp_v4hi mep_cpmovua1_h ();              // volatile
+cp_v8qi mep_cpmova1_b ();               // volatile
+void mep_cpsetla1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpsetua1_w (cp_v2si, cp_v2si); // volatile
+void mep_cpseta1_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpsadla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsadua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsada1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsada1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpabsla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpabsa1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpabsa1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubacla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubacua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubaca1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpsubaca1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpsubla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsubua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpsuba1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpsuba1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddacla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddacua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddaca1_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpaddaca1u_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpaddla1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpaddua1_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpadda1_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpadda1u_b (cp_v8uqi, cp_v8uqi); // volatile
+cp_data_bus_int mep_cdmovi (long);
+cp_data_bus_int mep_cdmoviu (long);
+cp_v2si mep_cpmovi_w (long);
+cp_v2usi mep_cpmoviu_w (long);
+cp_v4hi mep_cpmovi_h (long);
+cp_v4uhi mep_cpmoviu_h (long);
+cp_v8qi mep_cpmovi_b (long);
+cp_data_bus_int mep_cdclipi3 (cp_data_bus_int, long);
+cp_data_bus_int mep_cdclipiu3 (cp_data_bus_int, long);
+cp_v2si mep_cpclipi3_w (cp_v2si, long);
+cp_v2si mep_cpclipiu3_w (cp_v2si, long);
+cp_v2si mep_cpslai3_w (cp_v2si, long);  // volatile
+cp_v4hi mep_cpslai3_h (cp_v4hi, long);  // volatile
+cp_data_bus_int mep_cdslli3 (cp_data_bus_int, long);
+cp_v2si mep_cpslli3_w (cp_v2si, long);
+cp_v4hi mep_cpslli3_h (cp_v4hi, long);
+cp_v8qi mep_cpslli3_b (cp_v8qi, long);
+cp_data_bus_int mep_cdsrai3 (cp_data_bus_int, long);
+cp_v2si mep_cpsrai3_w (cp_v2si, long);
+cp_v4hi mep_cpsrai3_h (cp_v4hi, long);
+cp_v8qi mep_cpsrai3_b (cp_v8qi, long);
+cp_data_bus_int mep_cdsrli3 (cp_data_bus_int, long);
+cp_v2si mep_cpsrli3_w (cp_v2si, long);
+cp_v4hi mep_cpsrli3_h (cp_v4hi, long);
+cp_v8qi mep_cpsrli3_b (cp_v8qi, long);
+void mep_cpocmpge_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpgeu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpocmpge_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpge_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpocmpgeu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpocmpgt_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpgtu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpocmpgt_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpgt_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpocmpgtu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpocmpne_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpne_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpne_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpocmpeq_w (cp_v2si, cp_v2si); // volatile
+void mep_cpocmpeq_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpocmpeq_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpge_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpgeu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpacmpge_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpge_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpgeu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpacmpgt_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpgtu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpacmpgt_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpgt_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpgtu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpacmpne_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpne_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpne_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpacmpeq_w (cp_v2si, cp_v2si); // volatile
+void mep_cpacmpeq_h (cp_v4hi, cp_v4hi); // volatile
+void mep_cpacmpeq_b (cp_v8qi, cp_v8qi); // volatile
+void mep_cpcmpge_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpgeu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpcmpge_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpge_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpgeu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpcmpgt_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpgtu_w (cp_v2usi, cp_v2usi); // volatile
+void mep_cpcmpgt_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpgt_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpgtu_b (cp_v8uqi, cp_v8uqi); // volatile
+void mep_cpcmpne_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpne_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpne_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpeq_w (cp_v2si, cp_v2si);  // volatile
+void mep_cpcmpeq_h (cp_v4hi, cp_v4hi);  // volatile
+void mep_cpcmpeq_b (cp_v8qi, cp_v8qi);  // volatile
+void mep_cpcmpeqz_b (cp_v8qi, cp_v8qi); // volatile
+cp_data_bus_int mep_cdcastw (cp_data_bus_int);
+cp_data_bus_int mep_cdcastuw (cp_data_bus_int);
+cp_v2si mep_cpcasth_w (cp_v2si);
+cp_v2si mep_cpcastuh_w (cp_v2si);
+cp_v2si mep_cpcastb_w (cp_v2si);
+cp_v2si mep_cpcastub_w (cp_v2si);
+cp_v4hi mep_cpcastb_h (cp_v4hi);
+cp_v4hi mep_cpcastub_h (cp_v4hi);
+cp_v4hi mep_cpextl_h (cp_v4hi);
+cp_v4uhi mep_cpextlu_h (cp_v4uhi);
+cp_v8qi mep_cpextl_b (cp_v8qi);
+cp_v8uqi mep_cpextlu_b (cp_v8uqi);
+cp_v4uhi mep_cpextu_h (cp_v4uhi);
+cp_v4uhi mep_cpextuu_h (cp_v4uhi);
+cp_v8uqi mep_cpextu_b (cp_v8uqi);
+cp_v8uqi mep_cpextuu_b (cp_v8uqi);
+cp_v2si mep_cpbcast_w (cp_v2si);
+cp_v4hi mep_cpbcast_h (cp_v4hi);
+cp_v8qi mep_cpbcast_b (cp_v8qi);
+void mep_cpccadd_b (cp_v8qi*);          // volatile
+cp_v2si mep_cphadd_w (cp_v2si);
+cp_v4hi mep_cphadd_h (cp_v4hi);
+cp_v8qi mep_cphadd_b (cp_v8qi);
+cp_v8uqi mep_cphaddu_b (cp_v8uqi);
+cp_v2si mep_cpnorm_w (cp_v2si);
+cp_v4hi mep_cpnorm_h (cp_v4hi);
+cp_v2si mep_cpldz_w (cp_v2si);
+cp_v4hi mep_cpldz_h (cp_v4hi);
+cp_v2si mep_cpabsz_w (cp_v2si);
+cp_v4hi mep_cpabsz_h (cp_v4hi);
+cp_v8qi mep_cpabsz_b (cp_v8qi);
+void mep_cpmovtocc (cp_data_bus_int);   // volatile
+void mep_cpmovtocsar1 (cp_data_bus_int); // volatile
+void mep_cpmovtocsar0 (cp_data_bus_int); // volatile
+cp_data_bus_int mep_cpmovfrcc ();       // volatile
+cp_data_bus_int mep_cpmovfrcsar1 ();    // volatile
+cp_data_bus_int mep_cpmovfrcsar0 ();    // volatile
+cp_v2si mep_cpmin3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpminu3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpmin3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpmin3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpminu3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpmax3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpmaxu3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpmax3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpmax3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpmaxu3_b (cp_v8qi, cp_v8qi);
+cp_v4hi mep_cpabs3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpabs3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpabsu3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpaddsr3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpaddsr3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpaddsr3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpaddsru3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpave3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpave3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpave3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpaveu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextlsub3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextlsubu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextusub3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextusubu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextladd3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextladdu3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextuadd3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpextuaddu3_b (cp_v8qi, cp_v8qi);
+cp_v2si mep_cpssub3_w (cp_v2si, cp_v2si); // volatile
+cp_v4hi mep_cpssub3_h (cp_v4hi, cp_v4hi); // volatile
+cp_v2si mep_cpsadd3_w (cp_v2si, cp_v2si); // volatile
+cp_v4hi mep_cpsadd3_h (cp_v4hi, cp_v4hi); // volatile
+cp_v2si mep_cpsla3_w (cp_v2si, cp_v2si); // volatile
+cp_v4hi mep_cpsla3_h (cp_v4hi, cp_v4hi); // volatile
+cp_data_bus_int mep_cdsll3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpssll3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpsll3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpssll3_h (cp_v4hi, cp_v4hi);
+cp_v4hi mep_cpsll3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpssll3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpsll3_b (cp_v8qi, cp_v8qi);
+cp_data_bus_int mep_cdsra3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpssra3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpsra3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpssra3_h (cp_v4hi, cp_v4hi);
+cp_v4hi mep_cpsra3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpssra3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpsra3_b (cp_v8qi, cp_v8qi);
+cp_data_bus_int mep_cdsrl3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpssrl3_w (cp_v2si, cp_v2si);
+cp_v2si mep_cpsrl3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpssrl3_h (cp_v4hi, cp_v4hi);
+cp_v4hi mep_cpsrl3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpssrl3_b (cp_v8qi, cp_v8qi);
+cp_v8qi mep_cpsrl3_b (cp_v8qi, cp_v8qi);
+cp_v4hi mep_cppack_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cppack_b (cp_v8qi, cp_v8qi);
+cp_v8uqi mep_cppacku_b (cp_v8uqi, cp_v8uqi);
+cp_v2si mep_cpunpackl_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpunpackl_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpunpackl_b (cp_v8qi, cp_v8qi);
+cp_v2usi mep_cpunpacku_w (cp_v2usi, cp_v2usi);
+cp_v4uhi mep_cpunpacku_h (cp_v4uhi, cp_v4uhi);
+cp_v8uqi mep_cpunpacku_b (cp_v8uqi, cp_v8uqi);
+cp_data_bus_int mep_cpfsftbs1 (cp_data_bus_int, cp_data_bus_int); // volatile
+cp_data_bus_int mep_cpfsftbs0 (cp_data_bus_int, cp_data_bus_int); // volatile
+cp_data_bus_int mep_cpfsftbi (cp_data_bus_int, cp_data_bus_int, long);
+cp_data_bus_int mep_cpsel (cp_data_bus_int, cp_data_bus_int); // volatile
+cp_vector mep_cpxor3 (cp_vector, cp_vector);
+cp_vector mep_cpnor3 (cp_vector, cp_vector);
+cp_vector mep_cpor3 (cp_vector, cp_vector);
+cp_vector mep_cpand3 (cp_vector, cp_vector);
+cp_data_bus_int mep_cdsub3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpsub3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpsub3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpsub3_b (cp_v8qi, cp_v8qi);
+cp_data_bus_int mep_cdadd3 (cp_data_bus_int, cp_data_bus_int);
+cp_v2si mep_cpadd3_w (cp_v2si, cp_v2si);
+cp_v4hi mep_cpadd3_h (cp_v4hi, cp_v4hi);
+cp_v8qi mep_cpadd3_b (cp_v8qi, cp_v8qi);
+void mep_bsrv (void *);
+void mep_jsrv (long);
+void mep_synccp ();                     // volatile
+void mep_bcpaf (long, void *);
+void mep_bcpat (long, void *);
+void mep_bcpne (long, void *);
+void mep_bcpeq (long, void *);
+void mep_lmcpm1 (cp_data_bus_int*, long **, long);
+void mep_smcpm1 (cp_data_bus_int, long **, long);
+void mep_lwcpm1 (cp_data_bus_int*, long **, long);
+void mep_swcpm1 (cp_data_bus_int, long **, long);
+void mep_lhcpm1 (cp_data_bus_int*, long **, long);
+void mep_shcpm1 (cp_data_bus_int, long **, long);
+void mep_lbcpm1 (cp_data_bus_int*, long **, long);
+void mep_sbcpm1 (cp_data_bus_int, long **, long);
+void mep_lmcpm0 (cp_data_bus_int*, long **, long);
+void mep_smcpm0 (cp_data_bus_int, long **, long);
+void mep_lwcpm0 (cp_data_bus_int*, long **, long);
+void mep_swcpm0 (cp_data_bus_int, long **, long);
+void mep_lhcpm0 (cp_data_bus_int*, long **, long);
+void mep_shcpm0 (cp_data_bus_int, long **, long);
+void mep_lbcpm0 (cp_data_bus_int*, long **, long);
+void mep_sbcpm0 (cp_data_bus_int, long **, long);
+void mep_lmcpa (cp_data_bus_int*, long **, long);
+void mep_smcpa (cp_data_bus_int, long **, long);
+void mep_lwcpa (cp_data_bus_int*, long **, long);
+void mep_swcpa (cp_data_bus_int, long **, long);
+void mep_lhcpa (cp_data_bus_int*, long **, long);
+void mep_shcpa (cp_data_bus_int, long **, long);
+void mep_lbcpa (cp_data_bus_int*, long **, long);
+void mep_sbcpa (cp_data_bus_int, long **, long);
+void mep_lmcp16 (cp_data_bus_int*, long, long *);
+void mep_smcp16 (cp_data_bus_int, long, long *); // volatile
+void mep_lwcp16 (cp_data_bus_int*, long, long *);
+void mep_swcp16 (cp_data_bus_int, long, long *);
+void mep_lmcpi (cp_data_bus_int*, long **);
+void mep_smcpi (cp_data_bus_int, long **);
+void mep_lwcpi (cp_data_bus_int*, long **);
+void mep_swcpi (cp_data_bus_int, long **);
+void mep_lmcp (cp_data_bus_int*, long *);
+void mep_smcp (cp_data_bus_int, long *); // volatile
+void mep_lwcp (cp_data_bus_int*, long *);
+void mep_swcp (cp_data_bus_int, long *);
+void mep_ssubu (long*, long);
+void mep_saddu (long*, long);
+void mep_ssub (long*, long);
+void mep_sadd (long*, long);
+void mep_clipu (long*, long);
+void mep_clip (long*, long);
+void mep_maxu (long*, long);
+void mep_minu (long*, long);
+void mep_max (long*, long);
+void mep_min (long*, long);
+void mep_ave (long*, long);
+void mep_abs (long*, long);
+void mep_ldz (long*, long);
+void mep_dbreak ();                     // volatile
+void mep_dret ();
+void mep_divu (long, long);
+void mep_div (long, long);
+void mep_maddru (long*, long);
+void mep_maddr (long*, long);
+void mep_maddu (long, long);
+void mep_madd (long, long);
+void mep_mulru (long*, long);
+void mep_mulr (long*, long);
+void mep_mulu (long, long);
+void mep_mul (long, long);
+void mep_cache (long, long *);          // volatile
+void mep_tas (long*, long *);
+void mep_btstm (long*, long *, long);
+void mep_bnotm (long *, long);
+void mep_bclrm (long *, long);
+void mep_bsetm (long *, long);
+void mep_ldcb (long*, long);            // volatile
+void mep_stcb (long, long);             // volatile
+void mep_syncm ();                      // volatile
+void mep_break ();                      // volatile
+void mep_swi (long);                    // volatile
+void mep_sleep ();                      // volatile
+void mep_halt ();                       // volatile
+void mep_reti ();
+void mep_ei ();                         // volatile
+void mep_di ();                         // volatile
+void mep_ldc (long*, long);             // volatile
+void mep_ldc_lo (long*);
+void mep_ldc_hi (long*);
+void mep_ldc_lp (long*);
+void mep_stc (long, long);              // volatile
+void mep_stc_lo (long);
+void mep_stc_hi (long);
+void mep_stc_lp (long);
+void mep_erepeat (void *);
+void mep_repeat (long, void *);
+void mep_ret ();
+void mep_jsr (long);
+void mep_jmp24 (void *);
+void mep_jmp (long);
+void mep_bsr24 (void *);
+void mep_bsr12 (void *);
+void mep_bne (long, long, void *);
+void mep_beq (long, long, void *);
+void mep_bgei (long, long, void *);
+void mep_blti (long, long, void *);
+void mep_bnei (long, long, void *);
+void mep_beqi (long, long, void *);
+void mep_bnez (long, void *);
+void mep_beqz (long, void *);
+void mep_bra (void *);
+void mep_fsft (long*, long);            // volatile
+void mep_sll3 (long*, long, long);
+void mep_slli (long*, long);
+void mep_srli (long*, long);
+void mep_srai (long*, long);
+void mep_sll (long*, long);
+void mep_srl (long*, long);
+void mep_sra (long*, long);
+void mep_xor3 (long*, long, long);
+void mep_and3 (long*, long, long);
+void mep_or3 (long*, long, long);
+void mep_nor (long*, long);
+void mep_xor (long*, long);
+void mep_and (long*, long);
+void mep_or (long*, long);
+void mep_sltu3x (long*, long, long);
+void mep_slt3x (long*, long, long);
+void mep_add3x (long*, long, long);
+void mep_sl2ad3 (long*, long, long);
+void mep_sl1ad3 (long*, long, long);
+void mep_sltu3i (long*, long, long);
+void mep_slt3i (long*, long, long);
+void mep_sltu3 (long*, long, long);
+void mep_slt3 (long*, long, long);
+void mep_neg (long*, long);
+void mep_sbvck3 (long*, long, long);
+void mep_sub (long*, long);
+void mep_advck3 (long*, long, long);
+void mep_add3i (long*, long);
+void mep_add (long*, long);
+void mep_add3 (long*, long, long);
+void mep_movh (long*, long);
+void mep_movu16 (long*, long);
+void mep_movu24 (long*, long);
+void mep_movi16 (long*, long);
+void mep_movi8 (long*, long);
+void mep_mov (long*, long);
+void mep_ssarb (long, long);            // volatile
+void mep_extuh (long*);
+void mep_extub (long*);
+void mep_exth (long*);
+void mep_extb (long*);
+void mep_lw24 (long*, long);
+void mep_sw24 (long, long);
+void mep_lhu16 (long*, long, long *);
+void mep_lbu16 (long*, long, long *);
+void mep_lw16 (long*, long, long *);
+void mep_lh16 (long*, long, long *);
+void mep_lb16 (long*, long, long *);
+void mep_sw16 (long, long, long *);
+void mep_sh16 (long, long, long *);
+void mep_sb16 (long, long, long *);
+void mep_lhu_tp (long*, long);
+void mep_lbu_tp (long*, long);
+void mep_lw_tp (long*, long);
+void mep_lh_tp (long*, long);
+void mep_lb_tp (long*, long);
+void mep_sw_tp (long, long);
+void mep_sh_tp (long, long);
+void mep_sb_tp (long, long);
+void mep_lw_sp (long*, long);
+void mep_sw_sp (long, long);
+void mep_lhu (long*, long *);
+void mep_lbu (long*, long *);
+void mep_lw (long*, long *);
+void mep_lh (long*, long *);
+void mep_lb (long*, long *);
+void mep_sw (long, long *);
+void mep_sh (long, long *);
+void mep_sb (long, long *);
+void mep_dsp1 (long*, long);            // volatile
+void mep_dsp0 (long);                   // volatile
+void mep_dsp (long*, long, long);       // volatile
+void mep_uci (long*, long, long);       // volatile
+void mep_lhucpm1 (cp_data_bus_int*, long **, long);
+void mep_lbucpm1 (cp_data_bus_int*, long **, long);
+void mep_lhucpm0 (cp_data_bus_int*, long **, long);
+void mep_lbucpm0 (cp_data_bus_int*, long **, long);
+void mep_lhucpa (cp_data_bus_int*, long **, long);
+void mep_lbucpa (cp_data_bus_int*, long **, long);
+void mep_lhucp (cp_data_bus_int*, long, long *);
+void mep_lhcp (cp_data_bus_int*, long, long *);
+void mep_shcp (cp_data_bus_int, long, long *);
+void mep_lbucp (cp_data_bus_int*, long, long *);
+void mep_lbcp (cp_data_bus_int*, long, long *);
+void mep_sbcp (cp_data_bus_int, long, long *);
+void mep_casw3 (long*, long, long);     // volatile
+void mep_cash3 (long*, long, long);     // volatile
+void mep_casb3 (long*, long, long);     // volatile
+void mep_prefd (long, long, long *);    // volatile
+void mep_pref (long, long *);           // volatile
+void mep_ldcb_r (long*, long *);        // volatile
+void mep_stcb_r (long, long *);         // volatile
+void mep_cmovh2 (long*, cp_data_bus_int);
+void mep_cmovh1 (cp_data_bus_int*, long);
+void mep_cmovc2 (long*, long);          // volatile
+void mep_cmovc1 (long, long);           // volatile
+void mep_cmov2 (long*, cp_data_bus_int);
+void mep_cmov1 (cp_data_bus_int*, long);
+cp_data_bus_int mep_cpmov (cp_data_bus_int);
diff --git a/gcc/config/mep/intrinsics.md b/gcc/config/mep/intrinsics.md
new file mode 100644
index 000000000..44343d3b7
--- /dev/null
+++ b/gcc/config/mep/intrinsics.md
@@ -0,0 +1,21568 @@
+
+
+;; DO NOT EDIT: This file is automatically generated by CGEN.
+;; Any changes you make will be discarded when it is next regenerated.
+
+
+(define_predicate "cgen_h_sint_12a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -2048
+                   && INTVAL (op) < 2048")))
+
+(define_predicate "cgen_h_uint_20a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 1048576")))
+
+(define_predicate "cgen_h_uint_7a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 128")))
+
+(define_predicate "cgen_h_uint_6a2_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 1) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 128")))
+
+(define_predicate "cgen_h_uint_22a4_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 3) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 33554432")))
+
+(define_predicate "cgen_h_sint_2a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -2
+                   && INTVAL (op) < 2")))
+
+(define_predicate "cgen_h_uint_24a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 16777216")))
+
+(define_predicate "cgen_h_sint_6a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -32
+                   && INTVAL (op) < 32")))
+
+(define_predicate "cgen_h_uint_5a4_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 3) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 256")))
+
+(define_predicate "cgen_h_uint_2a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 4")))
+
+(define_predicate "cgen_h_sint_10a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -512
+                   && INTVAL (op) < 512")))
+
+(define_predicate "cgen_h_uint_4a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 16")))
+
+(define_predicate "cgen_h_uint_6a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 64")))
+
+(define_predicate "cgen_h_uint_16a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 65536")))
+
+(define_predicate "cgen_h_uint_8a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 256")))
+
+(define_predicate "cgen_h_sint_16a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -32768
+                   && INTVAL (op) < 32768")))
+
+(define_predicate "cgen_h_uint_5a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 32")))
+
+(define_predicate "cgen_h_sint_8a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= -128
+                   && INTVAL (op) < 128")))
+
+(define_predicate "cgen_h_uint_3a1_immediate"
+  (and (match_code "const_int")
+        (match_test "(INTVAL (op) & 0) == 0
+                   && INTVAL (op) >= 0
+                   && INTVAL (op) < 8")))
+
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2198))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2200))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2202))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2204))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2206))]
+  "CGEN_ENABLE_INSN_P (0)"
+  "cpsmsbslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2198))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2200))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2202))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2204))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2206))]
+  "CGEN_ENABLE_INSN_P (1)"
+  "cpsmsbslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2208))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2210))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2212))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2214))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2216))]
+  "CGEN_ENABLE_INSN_P (2)"
+  "cpsmsbslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2208))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2210))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2212))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2214))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2216))]
+  "CGEN_ENABLE_INSN_P (3)"
+  "cpsmsbslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2218))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2220))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2222))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2224))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2226))]
+  "CGEN_ENABLE_INSN_P (4)"
+  "cpsmsbslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2218))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2220))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2222))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2224))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2226))]
+  "CGEN_ENABLE_INSN_P (5)"
+  "cpsmsbslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2228))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2230))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2232))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2234))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2236))]
+  "CGEN_ENABLE_INSN_P (6)"
+  "cpsmsbslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbslua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2228))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2230))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2232))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2234))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2236))]
+  "CGEN_ENABLE_INSN_P (7)"
+  "cpsmsbslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2238))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2240))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2242))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2244))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2246))]
+  "CGEN_ENABLE_INSN_P (8)"
+  "cpsmadslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2238))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2240))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2242))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2244))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2246))]
+  "CGEN_ENABLE_INSN_P (9)"
+  "cpsmadslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2248))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2250))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2252))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2254))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2256))]
+  "CGEN_ENABLE_INSN_P (10)"
+  "cpsmadslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2248))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2250))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2252))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2254))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2256))]
+  "CGEN_ENABLE_INSN_P (11)"
+  "cpsmadslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2258))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2260))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2262))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2264))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2266))]
+  "CGEN_ENABLE_INSN_P (12)"
+  "cpsmadslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2258))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2260))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2262))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2264))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2266))]
+  "CGEN_ENABLE_INSN_P (13)"
+  "cpsmadslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2268))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2270))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2272))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2274))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2276))]
+  "CGEN_ENABLE_INSN_P (14)"
+  "cpsmadslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadslua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2268))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2270))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2272))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2274))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2276))]
+  "CGEN_ENABLE_INSN_P (15)"
+  "cpsmadslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2278))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2280))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2282))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2284))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2286))]
+  "CGEN_ENABLE_INSN_P (16)"
+  "cpmulslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2278))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2280))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2282))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2284))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2286))]
+  "CGEN_ENABLE_INSN_P (17)"
+  "cpmulslla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2288))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2290))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2292))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2294))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2296))]
+  "CGEN_ENABLE_INSN_P (18)"
+  "cpmulslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2288))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2290))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2292))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2294))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2296))]
+  "CGEN_ENABLE_INSN_P (19)"
+  "cpmulslua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2298))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2300))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2302))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2304))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2306))]
+  "CGEN_ENABLE_INSN_P (20)"
+  "cpmulslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2298))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2300))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2302))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2304))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2306))]
+  "CGEN_ENABLE_INSN_P (21)"
+  "cpmulslla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2308))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2310))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2312))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2314))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2316))]
+  "CGEN_ENABLE_INSN_P (22)"
+  "cpmulslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulslua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2308))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2310))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2312))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2314))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2316))]
+  "CGEN_ENABLE_INSN_P (23)"
+  "cpmulslua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2318))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2320))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2322))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2324))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2326))]
+  "CGEN_ENABLE_INSN_P (24)"
+  "cpsmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2318))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2320))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2322))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2324))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2326))]
+  "CGEN_ENABLE_INSN_P (25)"
+  "cpsmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2328))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2330))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2332))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2334))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2336))]
+  "CGEN_ENABLE_INSN_P (26)"
+  "cpsmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2328))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2330))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2332))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2334))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2336))]
+  "CGEN_ENABLE_INSN_P (27)"
+  "cpsmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2338))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2340))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2342))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2344))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2346))]
+  "CGEN_ENABLE_INSN_P (28)"
+  "cpsmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2338))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2340))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2342))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2344))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2346))]
+  "CGEN_ENABLE_INSN_P (29)"
+  "cpsmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2348))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2350))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2352))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2354))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2356))]
+  "CGEN_ENABLE_INSN_P (30)"
+  "cpsmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmsbua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2348))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2350))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2352))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2354))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2356))]
+  "CGEN_ENABLE_INSN_P (31)"
+  "cpsmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2358))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2360))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2362))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2364))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2366))]
+  "CGEN_ENABLE_INSN_P (32)"
+  "cpsmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2358))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2360))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2362))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2364))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2366))]
+  "CGEN_ENABLE_INSN_P (33)"
+  "cpsmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2368))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2370))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2372))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2374))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2376))]
+  "CGEN_ENABLE_INSN_P (34)"
+  "cpsmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2368))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2370))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2372))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2374))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2376))]
+  "CGEN_ENABLE_INSN_P (35)"
+  "cpsmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2378))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2380))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2382))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2384))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2386))]
+  "CGEN_ENABLE_INSN_P (36)"
+  "cpsmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2378))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2380))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2382))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2384))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2386))]
+  "CGEN_ENABLE_INSN_P (37)"
+  "cpsmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2388))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2390))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2392))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2394))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2396))]
+  "CGEN_ENABLE_INSN_P (38)"
+  "cpsmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsmadua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2388))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2390))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2392))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2394))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2396))]
+  "CGEN_ENABLE_INSN_P (39)"
+  "cpsmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2398))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2400))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2402))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2404))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2406))]
+  "CGEN_ENABLE_INSN_P (40)"
+  "cpmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2398))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2400))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2402))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2404))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2406))]
+  "CGEN_ENABLE_INSN_P (41)"
+  "cpmsbla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2408))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2410))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2412))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2414))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2416))]
+  "CGEN_ENABLE_INSN_P (42)"
+  "cpmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2408))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2410))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2412))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2414))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2416))]
+  "CGEN_ENABLE_INSN_P (43)"
+  "cpmsbua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2418))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2420))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2422))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2424))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2426))]
+  "CGEN_ENABLE_INSN_P (44)"
+  "cpmsbla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2418))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2420))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2422))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2424))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2426))]
+  "CGEN_ENABLE_INSN_P (45)"
+  "cpmsbla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2428))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2430))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2432))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2434))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2436))]
+  "CGEN_ENABLE_INSN_P (46)"
+  "cpmsbua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2428))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2430))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2432))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2434))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2436))]
+  "CGEN_ENABLE_INSN_P (47)"
+  "cpmsbua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2438))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2440))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2442))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2444))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2446))]
+  "CGEN_ENABLE_INSN_P (48)"
+  "cpmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2438))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2440))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2442))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2444))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2446))]
+  "CGEN_ENABLE_INSN_P (49)"
+  "cpmsbla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2448))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2450))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2452))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2454))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2456))]
+  "CGEN_ENABLE_INSN_P (50)"
+  "cpmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmsbua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2448))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2450))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2452))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2454))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2456))]
+  "CGEN_ENABLE_INSN_P (51)"
+  "cpmsbua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2458))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2460))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2462))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2464))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2466))]
+  "CGEN_ENABLE_INSN_P (52)"
+  "cpmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2458))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2460))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2462))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2464))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2466))]
+  "CGEN_ENABLE_INSN_P (53)"
+  "cpmadla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2468))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2470))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2472))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2474))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2476))]
+  "CGEN_ENABLE_INSN_P (54)"
+  "cpmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2468))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2470))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2472))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2474))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2476))]
+  "CGEN_ENABLE_INSN_P (55)"
+  "cpmadua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2478))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2480))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2482))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2484))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2486))]
+  "CGEN_ENABLE_INSN_P (56)"
+  "cpmadla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2478))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2480))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2482))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2484))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2486))]
+  "CGEN_ENABLE_INSN_P (57)"
+  "cpmadla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1u_w_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2488))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2490))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2492))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2494))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2496))]
+  "CGEN_ENABLE_INSN_P (58)"
+  "cpmadua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1u_w_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2488))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2490))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2492))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2494))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2496))]
+  "CGEN_ENABLE_INSN_P (59)"
+  "cpmadua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2498))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2500))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2502))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2504))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2506))]
+  "CGEN_ENABLE_INSN_P (60)"
+  "cpmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2498))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2500))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2502))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2504))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2506))]
+  "CGEN_ENABLE_INSN_P (61)"
+  "cpmadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2508))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2510))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2512))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2514))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2516))]
+  "CGEN_ENABLE_INSN_P (62)"
+  "cpmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmadua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2508))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2510))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2512))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2514))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2516))]
+  "CGEN_ENABLE_INSN_P (63)"
+  "cpmadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2518))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2520))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2522))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2524))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2526))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2528))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2530))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2532))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2534))]
+  "CGEN_ENABLE_INSN_P (64)"
+  "cpmada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2518))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2520))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2522))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2524))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2526))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2528))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2530))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2532))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2534))]
+  "CGEN_ENABLE_INSN_P (65)"
+  "cpmada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2536))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2538))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2540))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2542))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2544))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2546))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2548))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2550))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2552))]
+  "CGEN_ENABLE_INSN_P (66)"
+  "cpmada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmada1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2536))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2538))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2540))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2542))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2544))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2546))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2548))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2550))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2552))]
+  "CGEN_ENABLE_INSN_P (67)"
+  "cpmada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_w_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2554))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2556))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2558))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2560))]
+  "CGEN_ENABLE_INSN_P (68)"
+  "cpmulla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_w_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2554))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2556))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2558))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2560))]
+  "CGEN_ENABLE_INSN_P (69)"
+  "cpmulla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_w_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2562))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2564))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2566))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2568))]
+  "CGEN_ENABLE_INSN_P (70)"
+  "cpmulua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_w_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2562))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2564))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2566))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2568))]
+  "CGEN_ENABLE_INSN_P (71)"
+  "cpmulua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1u_w_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2570))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2572))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2574))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2576))]
+  "CGEN_ENABLE_INSN_P (72)"
+  "cpmulla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1u_w_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2570))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2572))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2574))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2576))]
+  "CGEN_ENABLE_INSN_P (73)"
+  "cpmulla1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1u_w_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2578))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2580))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2582))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2584))]
+  "CGEN_ENABLE_INSN_P (74)"
+  "cpmulua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1u_w_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2578))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2580))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2582))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2584))]
+  "CGEN_ENABLE_INSN_P (75)"
+  "cpmulua1u.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2586))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2588))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2590))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2592))]
+  "CGEN_ENABLE_INSN_P (76)"
+  "cpmulla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2586))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2588))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2590))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2592))]
+  "CGEN_ENABLE_INSN_P (77)"
+  "cpmulla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2594))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2596))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2598))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2600))]
+  "CGEN_ENABLE_INSN_P (78)"
+  "cpmulua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmulua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2594))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2596))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2598))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2600))]
+  "CGEN_ENABLE_INSN_P (79)"
+  "cpmulua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2602))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2604))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2606))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2608))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2610))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2612))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2614))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2616))]
+  "CGEN_ENABLE_INSN_P (80)"
+  "cpmula1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2602))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2604))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2606))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2608))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2610))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2612))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2614))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2616))]
+  "CGEN_ENABLE_INSN_P (81)"
+  "cpmula1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2618))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2620))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2622))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2624))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2626))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2628))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2630))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2632))]
+  "CGEN_ENABLE_INSN_P (82)"
+  "cpmula1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmula1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2618))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2620))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2622))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2624))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2626))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2628))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2630))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2632))]
+  "CGEN_ENABLE_INSN_P (83)"
+  "cpmula1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2634))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2636))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2638))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2640))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2642))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2644))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2646))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2648))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2650))]
+  "CGEN_ENABLE_INSN_P (84)"
+  "cpssda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2634))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2636))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2638))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2640))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2642))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2644))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2646))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2648))]
+  "CGEN_ENABLE_INSN_P (85)"
+  "cpssda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2650))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2652))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2654))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2656))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2658))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2660))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2662))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2664))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2666))]
+  "CGEN_ENABLE_INSN_P (86)"
+  "cpssda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssda1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2650))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2652))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2654))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2656))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2658))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2660))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2662))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2664))]
+  "CGEN_ENABLE_INSN_P (87)"
+  "cpssda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2666))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2668))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2670))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2672))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2674))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2676))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2678))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2680))]
+  "CGEN_ENABLE_INSN_P (88)"
+  "cpssqa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2666))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2668))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2670))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2672))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2674))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2676))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2678))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2680))]
+  "CGEN_ENABLE_INSN_P (89)"
+  "cpssqa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2682))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2684))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2686))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2688))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2690))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2692))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2694))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2696))]
+  "CGEN_ENABLE_INSN_P (90)"
+  "cpssqa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssqa1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2682))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2684))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2686))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2688))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2690))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2692))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2694))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2696))]
+  "CGEN_ENABLE_INSN_P (91)"
+  "cpssqa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadila1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1000))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1002))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1004))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1006))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1008))]
+  "CGEN_ENABLE_INSN_P (92)"
+  "cpfmadila1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadiua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1010))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1012))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1014))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1016))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1018))]
+  "CGEN_ENABLE_INSN_P (93)"
+  "cpfmadiua1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1020))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1022))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1024))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1026))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1028))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1030))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1032))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1034))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1036))]
+  "CGEN_ENABLE_INSN_P (94)"
+  "cpfmadia1.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1038))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1040))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1042))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1044))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1046))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1048))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1050))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1052))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1054))]
+  "CGEN_ENABLE_INSN_P (95)"
+  "cpfmadia1u.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulila1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1056))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1058))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1060))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1062))]
+  "CGEN_ENABLE_INSN_P (96)"
+  "cpfmulila1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmuliua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1064))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1066))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1068))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1070))]
+  "CGEN_ENABLE_INSN_P (97)"
+  "cpfmuliua1.h\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1072))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1074))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1076))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1078))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1080))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1082))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1084))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1086))]
+  "CGEN_ENABLE_INSN_P (98)"
+  "cpfmulia1.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")
+        ] 1088))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1090))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1092))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1094))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1096))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1098))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1100))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 1102))]
+  "CGEN_ENABLE_INSN_P (99)"
+  "cpfmulia1u.b\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadila1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1104))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1106))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1108))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1110))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1112))]
+  "CGEN_ENABLE_INSN_P (100)"
+  "cpamadila1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadiua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1114))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1116))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1118))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1120))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1122))]
+  "CGEN_ENABLE_INSN_P (101)"
+  "cpamadiua1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadia1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1124))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1126))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1128))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1130))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1132))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1134))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1136))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1138))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1140))]
+  "CGEN_ENABLE_INSN_P (102)"
+  "cpamadia1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamadia1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1142))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1144))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1146))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1148))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1150))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1152))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1154))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1156))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1158))]
+  "CGEN_ENABLE_INSN_P (103)"
+  "cpamadia1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamulila1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1160))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1162))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1164))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1166))]
+  "CGEN_ENABLE_INSN_P (104)"
+  "cpamulila1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamuliua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1168))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1170))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1172))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1174))]
+  "CGEN_ENABLE_INSN_P (105)"
+  "cpamuliua1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamulia1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1176))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1178))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1180))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1182))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1184))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1186))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1188))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1190))]
+  "CGEN_ENABLE_INSN_P (106)"
+  "cpamulia1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpamulia1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1192))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1194))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1196))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1198))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1200))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1202))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1204))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1206))]
+  "CGEN_ENABLE_INSN_P (107)"
+  "cpamulia1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadila1s1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1208))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1210))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1212))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1214))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1216))]
+  "CGEN_ENABLE_INSN_P (108)"
+  "cpfmadila1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadiua1s1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1218))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1220))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1222))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1224))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1226))]
+  "CGEN_ENABLE_INSN_P (109)"
+  "cpfmadiua1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1228))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1230))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1232))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1234))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1236))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1238))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1240))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1242))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1244))]
+  "CGEN_ENABLE_INSN_P (110)"
+  "cpfmadia1s1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1246))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1248))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1250))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1252))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1254))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1256))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1258))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1260))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1262))]
+  "CGEN_ENABLE_INSN_P (111)"
+  "cpfmadia1s1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulila1s1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1264))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1266))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1268))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1270))]
+  "CGEN_ENABLE_INSN_P (112)"
+  "cpfmulila1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmuliua1s1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1272))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1274))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1276))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1278))]
+  "CGEN_ENABLE_INSN_P (113)"
+  "cpfmuliua1s1.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1280))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1282))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1284))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1286))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1288))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1290))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1292))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1294))]
+  "CGEN_ENABLE_INSN_P (114)"
+  "cpfmulia1s1.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1296))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1298))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1300))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1302))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1304))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1306))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1308))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1310))]
+  "CGEN_ENABLE_INSN_P (115)"
+  "cpfmulia1s1u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadila1s0_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1312))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1314))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1316))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1318))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1320))]
+  "CGEN_ENABLE_INSN_P (116)"
+  "cpfmadila1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadiua1s0_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1322))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1324))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1326))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1328))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1330))]
+  "CGEN_ENABLE_INSN_P (117)"
+  "cpfmadiua1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s0_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1332))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1334))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1336))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1338))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1340))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1342))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1344))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1346))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1348))]
+  "CGEN_ENABLE_INSN_P (118)"
+  "cpfmadia1s0.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmadia1s0u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1350))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1352))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1354))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1356))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1358))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1360))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1362))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1364))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1366))]
+  "CGEN_ENABLE_INSN_P (119)"
+  "cpfmadia1s0u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulila1s0_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1368))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1370))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1372))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1374))]
+  "CGEN_ENABLE_INSN_P (120)"
+  "cpfmulila1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmuliua1s0_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1376))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1378))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1380))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1382))]
+  "CGEN_ENABLE_INSN_P (121)"
+  "cpfmuliua1s0.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s0_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1384))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1386))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1388))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1390))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1392))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1394))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1396))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1398))]
+  "CGEN_ENABLE_INSN_P (122)"
+  "cpfmulia1s0.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfmulia1s0u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:SI 2 "cgen_h_sint_8a1_immediate" "")
+        ] 1400))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1402))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1404))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1406))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1408))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1410))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1412))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (match_dup 2)
+        ] 1414))]
+  "CGEN_ENABLE_INSN_P (123)"
+  "cpfmulia1s0u.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsllia1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2698))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2700))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2702))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2704))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2706))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2708))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2710))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2712))]
+  "CGEN_ENABLE_INSN_P (124)"
+  "cpsllia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsllia1_1_p1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2698))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2700))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2702))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2704))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2706))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2708))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2710))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2712))]
+  "CGEN_ENABLE_INSN_P (125)"
+  "cpsllia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraia1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2714))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2716))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2718))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2720))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2722))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2724))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2726))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2728))]
+  "CGEN_ENABLE_INSN_P (126)"
+  "cpsraia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraia1_1_p1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2714))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2716))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2718))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2720))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2722))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2724))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2726))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2728))]
+  "CGEN_ENABLE_INSN_P (127)"
+  "cpsraia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrlia1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2730))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2732))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2734))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2736))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2738))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2740))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2742))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2744))]
+  "CGEN_ENABLE_INSN_P (128)"
+  "cpsrlia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrlia1_1_p1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 2730))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2732))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2734))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2736))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2738))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2740))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2742))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2744))]
+  "CGEN_ENABLE_INSN_P (129)"
+  "cpsrlia1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslla1_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2746))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2748))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2750))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2752))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2754))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2756))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2758))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2760))]
+  "CGEN_ENABLE_INSN_P (130)"
+  "cpslla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslla1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2746))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2748))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2750))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2752))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2754))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2756))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2758))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2760))]
+  "CGEN_ENABLE_INSN_P (131)"
+  "cpslla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraa1_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2762))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2764))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2766))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2768))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2770))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2772))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2774))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2776))]
+  "CGEN_ENABLE_INSN_P (132)"
+  "cpsraa1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraa1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2762))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2764))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2766))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2768))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2770))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2772))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2774))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2776))]
+  "CGEN_ENABLE_INSN_P (133)"
+  "cpsraa1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrla1_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2778))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2780))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2782))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2784))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2786))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2788))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2790))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2792))]
+  "CGEN_ENABLE_INSN_P (134)"
+  "cpsrla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrla1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 2778))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2780))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2782))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2784))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2786))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2788))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2790))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 2792))]
+  "CGEN_ENABLE_INSN_P (135)"
+  "cpsrla1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacswp_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1416))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1418))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1420))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1422))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1424))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1426))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1428))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1430))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1432))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1434))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1436))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1438))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1440))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1442))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1444))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1446))]
+  "CGEN_ENABLE_INSN_P (136)"
+  "cpacswp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaccpa1_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1448))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1450))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1452))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1454))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1456))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1458))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1460))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1462))]
+  "CGEN_ENABLE_INSN_P (137)"
+  "cpaccpa1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacsuma1_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1464))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1466))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1468))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1470))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1472))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1474))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1476))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1478))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1480))]
+  "CGEN_ENABLE_INSN_P (138)"
+  "cpacsuma1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhla1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2794))]
+  "CGEN_ENABLE_INSN_P (139)"
+  "cpmovhla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhla1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2794))]
+  "CGEN_ENABLE_INSN_P (140)"
+  "cpmovhla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2796))]
+  "CGEN_ENABLE_INSN_P (141)"
+  "cpmovhua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2796))]
+  "CGEN_ENABLE_INSN_P (142)"
+  "cpmovhua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2798))]
+  "CGEN_ENABLE_INSN_P (143)"
+  "cppackla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2798))]
+  "CGEN_ENABLE_INSN_P (144)"
+  "cppackla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2800))]
+  "CGEN_ENABLE_INSN_P (145)"
+  "cppackua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2800))]
+  "CGEN_ENABLE_INSN_P (146)"
+  "cppackua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2802))]
+  "CGEN_ENABLE_INSN_P (147)"
+  "cppackla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2802))]
+  "CGEN_ENABLE_INSN_P (148)"
+  "cppackla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2804))]
+  "CGEN_ENABLE_INSN_P (149)"
+  "cppackua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2804))]
+  "CGEN_ENABLE_INSN_P (150)"
+  "cppackua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2806))]
+  "CGEN_ENABLE_INSN_P (151)"
+  "cppacka1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1_b_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2806))]
+  "CGEN_ENABLE_INSN_P (152)"
+  "cppacka1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1u_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2808))]
+  "CGEN_ENABLE_INSN_P (153)"
+  "cppacka1u.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka1u_b_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2808))]
+  "CGEN_ENABLE_INSN_P (154)"
+  "cppacka1u.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlla1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2810))]
+  "CGEN_ENABLE_INSN_P (155)"
+  "cpmovlla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlla1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2810))]
+  "CGEN_ENABLE_INSN_P (156)"
+  "cpmovlla1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2812))]
+  "CGEN_ENABLE_INSN_P (157)"
+  "cpmovlua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2812))]
+  "CGEN_ENABLE_INSN_P (158)"
+  "cpmovlua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovula1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2814))]
+  "CGEN_ENABLE_INSN_P (159)"
+  "cpmovula1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovula1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2814))]
+  "CGEN_ENABLE_INSN_P (160)"
+  "cpmovula1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovuua1_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2816))]
+  "CGEN_ENABLE_INSN_P (161)"
+  "cpmovuua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovuua1_w_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2816))]
+  "CGEN_ENABLE_INSN_P (162)"
+  "cpmovuua1.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovla1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2818))]
+  "CGEN_ENABLE_INSN_P (163)"
+  "cpmovla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovla1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2818))]
+  "CGEN_ENABLE_INSN_P (164)"
+  "cpmovla1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovua1_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2820))]
+  "CGEN_ENABLE_INSN_P (165)"
+  "cpmovua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovua1_h_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2820))]
+  "CGEN_ENABLE_INSN_P (166)"
+  "cpmovua1.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmova1_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2822))]
+  "CGEN_ENABLE_INSN_P (167)"
+  "cpmova1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmova1_b_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 2822))]
+  "CGEN_ENABLE_INSN_P (168)"
+  "cpmova1.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetla1_w_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2824))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2826))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2828))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2830))]
+  "CGEN_ENABLE_INSN_P (169)"
+  "cpsetla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetla1_w_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2824))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2826))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2828))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2830))]
+  "CGEN_ENABLE_INSN_P (170)"
+  "cpsetla1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetua1_w_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2832))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2834))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2836))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2838))]
+  "CGEN_ENABLE_INSN_P (171)"
+  "cpsetua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetua1_w_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2832))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2834))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2836))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2838))]
+  "CGEN_ENABLE_INSN_P (172)"
+  "cpsetua1.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpseta1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2840))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2842))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2844))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2846))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2848))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2850))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2852))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2854))]
+  "CGEN_ENABLE_INSN_P (173)"
+  "cpseta1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpseta1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2840))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2842))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2844))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2846))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2848))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2850))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2852))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2854))]
+  "CGEN_ENABLE_INSN_P (174)"
+  "cpseta1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2856))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2858))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2860))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2862))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2864))]
+  "CGEN_ENABLE_INSN_P (175)"
+  "cpsadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2856))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2858))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2860))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2862))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2864))]
+  "CGEN_ENABLE_INSN_P (176)"
+  "cpsadla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2866))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2868))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2870))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2872))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2874))]
+  "CGEN_ENABLE_INSN_P (177)"
+  "cpsadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2866))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2868))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2870))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2872))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2874))]
+  "CGEN_ENABLE_INSN_P (178)"
+  "cpsadua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2876))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2878))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2880))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2882))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2884))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2886))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2888))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2890))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2892))]
+  "CGEN_ENABLE_INSN_P (179)"
+  "cpsada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2876))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2878))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2880))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2882))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2884))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2886))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2888))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2890))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2892))]
+  "CGEN_ENABLE_INSN_P (180)"
+  "cpsada1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2894))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2896))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2898))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2900))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2902))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2904))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2906))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2908))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2910))]
+  "CGEN_ENABLE_INSN_P (181)"
+  "cpsada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2894))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2896))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2898))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2900))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2902))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2904))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2906))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2908))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2910))]
+  "CGEN_ENABLE_INSN_P (182)"
+  "cpsada1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2912))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2914))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2916))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2918))]
+  "CGEN_ENABLE_INSN_P (183)"
+  "cpabsla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2912))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2914))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2916))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2918))]
+  "CGEN_ENABLE_INSN_P (184)"
+  "cpabsla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2920))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2922))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2924))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2926))]
+  "CGEN_ENABLE_INSN_P (185)"
+  "cpabsua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2920))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2922))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2924))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2926))]
+  "CGEN_ENABLE_INSN_P (186)"
+  "cpabsua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2928))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2930))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2932))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2934))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2936))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2938))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2940))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2942))]
+  "CGEN_ENABLE_INSN_P (187)"
+  "cpabsa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2928))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2930))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2932))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2934))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2936))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2938))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2940))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2942))]
+  "CGEN_ENABLE_INSN_P (188)"
+  "cpabsa1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2944))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2946))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2948))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2950))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2952))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2954))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2956))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2958))]
+  "CGEN_ENABLE_INSN_P (189)"
+  "cpabsa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2944))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2946))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2948))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2950))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2952))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2954))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2956))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2958))]
+  "CGEN_ENABLE_INSN_P (190)"
+  "cpabsa1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2960))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2962))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2964))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2966))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2968))]
+  "CGEN_ENABLE_INSN_P (191)"
+  "cpsubacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2960))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2962))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2964))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2966))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2968))]
+  "CGEN_ENABLE_INSN_P (192)"
+  "cpsubacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2970))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2972))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2974))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2976))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2978))]
+  "CGEN_ENABLE_INSN_P (193)"
+  "cpsubacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2970))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2972))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2974))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2976))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2978))]
+  "CGEN_ENABLE_INSN_P (194)"
+  "cpsubacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2980))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2982))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2984))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2986))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2988))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2990))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2992))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2994))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2996))]
+  "CGEN_ENABLE_INSN_P (195)"
+  "cpsubaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2980))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2982))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2984))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2986))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2988))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2990))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2992))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2994))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2996))]
+  "CGEN_ENABLE_INSN_P (196)"
+  "cpsubaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2998))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3000))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3002))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3004))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3006))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3008))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3010))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3012))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3014))]
+  "CGEN_ENABLE_INSN_P (197)"
+  "cpsubaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2998))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3000))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3002))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3004))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3006))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3008))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3010))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3012))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3014))]
+  "CGEN_ENABLE_INSN_P (198)"
+  "cpsubaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3016))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3018))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3020))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3022))]
+  "CGEN_ENABLE_INSN_P (199)"
+  "cpsubla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3016))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3018))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3020))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3022))]
+  "CGEN_ENABLE_INSN_P (200)"
+  "cpsubla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3024))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3026))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3028))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3030))]
+  "CGEN_ENABLE_INSN_P (201)"
+  "cpsubua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3024))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3026))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3028))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3030))]
+  "CGEN_ENABLE_INSN_P (202)"
+  "cpsubua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3032))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3034))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3036))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3038))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3040))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3042))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3044))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3046))]
+  "CGEN_ENABLE_INSN_P (203)"
+  "cpsuba1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3032))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3034))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3036))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3038))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3040))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3042))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3044))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3046))]
+  "CGEN_ENABLE_INSN_P (204)"
+  "cpsuba1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3048))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3050))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3052))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3054))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3056))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3058))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3060))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3062))]
+  "CGEN_ENABLE_INSN_P (205)"
+  "cpsuba1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3048))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3050))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3052))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3054))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3056))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3058))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3060))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3062))]
+  "CGEN_ENABLE_INSN_P (206)"
+  "cpsuba1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacla1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3064))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3066))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3068))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3070))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3072))]
+  "CGEN_ENABLE_INSN_P (207)"
+  "cpaddacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacla1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3064))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3066))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3068))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3070))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3072))]
+  "CGEN_ENABLE_INSN_P (208)"
+  "cpaddacla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacua1_h_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3074))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3076))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3078))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3080))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3082))]
+  "CGEN_ENABLE_INSN_P (209)"
+  "cpaddacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacua1_h_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3074))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3076))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3078))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3080))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3082))]
+  "CGEN_ENABLE_INSN_P (210)"
+  "cpaddacua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3084))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3086))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3088))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3090))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3092))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3094))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3096))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3098))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3100))]
+  "CGEN_ENABLE_INSN_P (211)"
+  "cpaddaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3084))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3086))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3088))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3090))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3092))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3094))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3096))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3098))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3100))]
+  "CGEN_ENABLE_INSN_P (212)"
+  "cpaddaca1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1u_b_C3"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3102))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3104))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3106))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3108))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3110))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3112))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3114))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3116))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3118))]
+  "CGEN_ENABLE_INSN_P (213)"
+  "cpaddaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca1u_b_P1"
+  [(set (reg:SI 87)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3102))
+   (set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3104))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3106))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3108))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3110))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3112))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3114))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3116))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3118))]
+  "CGEN_ENABLE_INSN_P (214)"
+  "cpaddaca1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddla1_h_C3"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3120))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3122))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3124))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3126))]
+  "CGEN_ENABLE_INSN_P (215)"
+  "cpaddla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddla1_h_P1"
+  [(set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3120))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3122))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3124))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3126))]
+  "CGEN_ENABLE_INSN_P (216)"
+  "cpaddla1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddua1_h_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3128))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3130))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3132))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3134))]
+  "CGEN_ENABLE_INSN_P (217)"
+  "cpaddua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddua1_h_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3128))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3130))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3132))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3134))]
+  "CGEN_ENABLE_INSN_P (218)"
+  "cpaddua1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3136))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3138))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3140))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3142))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3144))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3146))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3148))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3150))]
+  "CGEN_ENABLE_INSN_P (219)"
+  "cpadda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3136))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3138))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3140))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3142))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3144))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3146))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3148))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3150))]
+  "CGEN_ENABLE_INSN_P (220)"
+  "cpadda1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1u_b_C3"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3152))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3154))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3156))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3158))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3160))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3162))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3164))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3166))]
+  "CGEN_ENABLE_INSN_P (221)"
+  "cpadda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda1u_b_P1"
+  [(set (reg:SI 111)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3152))
+   (set (reg:SI 110)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3154))
+   (set (reg:SI 109)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3156))
+   (set (reg:SI 108)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3158))
+   (set (reg:SI 107)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3160))
+   (set (reg:SI 106)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3162))
+   (set (reg:SI 105)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3164))
+   (set (reg:SI 104)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3166))]
+  "CGEN_ENABLE_INSN_P (222)"
+  "cpadda1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3180))]
+  "CGEN_ENABLE_INSN_P (223)"
+  "cpmovi.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3180))]
+  "CGEN_ENABLE_INSN_P (224)"
+  "cpmovi.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_c1nop_P1"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 1482)]
+  "CGEN_ENABLE_INSN_P (225)"
+  "c1nop"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmovi_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3168))]
+  "CGEN_ENABLE_INSN_P (226)"
+  "cdmovi\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmovi_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3168))]
+  "CGEN_ENABLE_INSN_P (227)"
+  "cdmovi\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmoviu_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_8a1_immediate" "")
+        ] 3170))]
+  "CGEN_ENABLE_INSN_P (228)"
+  "cdmoviu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdmoviu_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3170))]
+  "CGEN_ENABLE_INSN_P (229)"
+  "cdmoviu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3172))]
+  "CGEN_ENABLE_INSN_P (230)"
+  "cpmovi.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3172))]
+  "CGEN_ENABLE_INSN_P (231)"
+  "cpmovi.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmoviu_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_8a1_immediate" "")
+        ] 3174))]
+  "CGEN_ENABLE_INSN_P (232)"
+  "cpmoviu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmoviu_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3174))]
+  "CGEN_ENABLE_INSN_P (233)"
+  "cpmoviu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3176))]
+  "CGEN_ENABLE_INSN_P (234)"
+  "cpmovi.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovi_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3176))]
+  "CGEN_ENABLE_INSN_P (235)"
+  "cpmovi.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipi3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3182))]
+  "CGEN_ENABLE_INSN_P (236)"
+  "cdclipi3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipi3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3182))]
+  "CGEN_ENABLE_INSN_P (237)"
+  "cdclipi3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipiu3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3184))]
+  "CGEN_ENABLE_INSN_P (238)"
+  "cdclipiu3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdclipiu3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3184))]
+  "CGEN_ENABLE_INSN_P (239)"
+  "cdclipiu3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipi3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3186))]
+  "CGEN_ENABLE_INSN_P (240)"
+  "cpclipi3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipi3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3186))]
+  "CGEN_ENABLE_INSN_P (241)"
+  "cpclipi3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipiu3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3188))]
+  "CGEN_ENABLE_INSN_P (242)"
+  "cpclipiu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpclipiu3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3188))]
+  "CGEN_ENABLE_INSN_P (243)"
+  "cpclipiu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3190))]
+  "CGEN_ENABLE_INSN_P (244)"
+  "cpslai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3190))]
+  "CGEN_ENABLE_INSN_P (245)"
+  "cpslai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3192))]
+  "CGEN_ENABLE_INSN_P (246)"
+  "cpslai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslai3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3192))]
+  "CGEN_ENABLE_INSN_P (247)"
+  "cpslai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdslli3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3194))]
+  "CGEN_ENABLE_INSN_P (248)"
+  "cdslli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdslli3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3194))]
+  "CGEN_ENABLE_INSN_P (249)"
+  "cdslli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3196))]
+  "CGEN_ENABLE_INSN_P (250)"
+  "cpslli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3196))]
+  "CGEN_ENABLE_INSN_P (251)"
+  "cpslli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3198))]
+  "CGEN_ENABLE_INSN_P (252)"
+  "cpslli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3198))]
+  "CGEN_ENABLE_INSN_P (253)"
+  "cpslli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3200))]
+  "CGEN_ENABLE_INSN_P (254)"
+  "cpslli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslli3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3200))]
+  "CGEN_ENABLE_INSN_P (255)"
+  "cpslli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrai3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3202))]
+  "CGEN_ENABLE_INSN_P (256)"
+  "cdsrai3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrai3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3202))]
+  "CGEN_ENABLE_INSN_P (257)"
+  "cdsrai3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3204))]
+  "CGEN_ENABLE_INSN_P (258)"
+  "cpsrai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3204))]
+  "CGEN_ENABLE_INSN_P (259)"
+  "cpsrai3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3206))]
+  "CGEN_ENABLE_INSN_P (260)"
+  "cpsrai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3206))]
+  "CGEN_ENABLE_INSN_P (261)"
+  "cpsrai3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3208))]
+  "CGEN_ENABLE_INSN_P (262)"
+  "cpsrai3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrai3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3208))]
+  "CGEN_ENABLE_INSN_P (263)"
+  "cpsrai3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrli3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3210))]
+  "CGEN_ENABLE_INSN_P (264)"
+  "cdsrli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrli3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_6a1_immediate" "")
+        ] 3210))]
+  "CGEN_ENABLE_INSN_P (265)"
+  "cdsrli3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3212))]
+  "CGEN_ENABLE_INSN_P (266)"
+  "cpsrli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3212))]
+  "CGEN_ENABLE_INSN_P (267)"
+  "cpsrli3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3214))]
+  "CGEN_ENABLE_INSN_P (268)"
+  "cpsrli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_4a1_immediate" "")
+        ] 3214))]
+  "CGEN_ENABLE_INSN_P (269)"
+  "cpsrli3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3216))]
+  "CGEN_ENABLE_INSN_P (270)"
+  "cpsrli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrli3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "cgen_h_uint_3a1_immediate" "")
+        ] 3216))]
+  "CGEN_ENABLE_INSN_P (271)"
+  "cpsrli3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3460))]
+  "CGEN_ENABLE_INSN_P (272)"
+  "cpsla3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3460))]
+  "CGEN_ENABLE_INSN_P (273)"
+  "cpsla3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3462))]
+  "CGEN_ENABLE_INSN_P (274)"
+  "cpsla3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsla3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3462))]
+  "CGEN_ENABLE_INSN_P (275)"
+  "cpsla3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsll3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3464))]
+  "CGEN_ENABLE_INSN_P (276)"
+  "cdsll3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsll3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3464))]
+  "CGEN_ENABLE_INSN_P (277)"
+  "cdsll3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3466))]
+  "CGEN_ENABLE_INSN_P (278)"
+  "cpssll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3466))]
+  "CGEN_ENABLE_INSN_P (279)"
+  "cpssll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3468))]
+  "CGEN_ENABLE_INSN_P (280)"
+  "cpsll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3468))]
+  "CGEN_ENABLE_INSN_P (281)"
+  "cpsll3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3470))]
+  "CGEN_ENABLE_INSN_P (282)"
+  "cpssll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3470))]
+  "CGEN_ENABLE_INSN_P (283)"
+  "cpssll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3472))]
+  "CGEN_ENABLE_INSN_P (284)"
+  "cpsll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3472))]
+  "CGEN_ENABLE_INSN_P (285)"
+  "cpsll3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3474))]
+  "CGEN_ENABLE_INSN_P (286)"
+  "cpssll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssll3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3474))]
+  "CGEN_ENABLE_INSN_P (287)"
+  "cpssll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3476))]
+  "CGEN_ENABLE_INSN_P (288)"
+  "cpsll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsll3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3476))]
+  "CGEN_ENABLE_INSN_P (289)"
+  "cpsll3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsra3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3478))]
+  "CGEN_ENABLE_INSN_P (290)"
+  "cdsra3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsra3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3478))]
+  "CGEN_ENABLE_INSN_P (291)"
+  "cdsra3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3480))]
+  "CGEN_ENABLE_INSN_P (292)"
+  "cpssra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3480))]
+  "CGEN_ENABLE_INSN_P (293)"
+  "cpssra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3482))]
+  "CGEN_ENABLE_INSN_P (294)"
+  "cpsra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3482))]
+  "CGEN_ENABLE_INSN_P (295)"
+  "cpsra3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3484))]
+  "CGEN_ENABLE_INSN_P (296)"
+  "cpssra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3484))]
+  "CGEN_ENABLE_INSN_P (297)"
+  "cpssra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3486))]
+  "CGEN_ENABLE_INSN_P (298)"
+  "cpsra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3486))]
+  "CGEN_ENABLE_INSN_P (299)"
+  "cpsra3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3488))]
+  "CGEN_ENABLE_INSN_P (300)"
+  "cpssra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssra3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3488))]
+  "CGEN_ENABLE_INSN_P (301)"
+  "cpssra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3490))]
+  "CGEN_ENABLE_INSN_P (302)"
+  "cpsra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsra3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3490))]
+  "CGEN_ENABLE_INSN_P (303)"
+  "cpsra3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrl3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3492))]
+  "CGEN_ENABLE_INSN_P (304)"
+  "cdsrl3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsrl3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3492))]
+  "CGEN_ENABLE_INSN_P (305)"
+  "cdsrl3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3494))]
+  "CGEN_ENABLE_INSN_P (306)"
+  "cpssrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3494))]
+  "CGEN_ENABLE_INSN_P (307)"
+  "cpssrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3496))]
+  "CGEN_ENABLE_INSN_P (308)"
+  "cpsrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3496))]
+  "CGEN_ENABLE_INSN_P (309)"
+  "cpsrl3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3498))]
+  "CGEN_ENABLE_INSN_P (310)"
+  "cpssrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3498))]
+  "CGEN_ENABLE_INSN_P (311)"
+  "cpssrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3500))]
+  "CGEN_ENABLE_INSN_P (312)"
+  "cpsrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3500))]
+  "CGEN_ENABLE_INSN_P (313)"
+  "cpsrl3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3502))]
+  "CGEN_ENABLE_INSN_P (314)"
+  "cpssrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssrl3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3502))]
+  "CGEN_ENABLE_INSN_P (315)"
+  "cpssrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3504))]
+  "CGEN_ENABLE_INSN_P (316)"
+  "cpsrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrl3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3504))]
+  "CGEN_ENABLE_INSN_P (317)"
+  "cpsrl3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3390))]
+  "CGEN_ENABLE_INSN_P (318)"
+  "cpmin3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3390))]
+  "CGEN_ENABLE_INSN_P (319)"
+  "cpmin3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3392))]
+  "CGEN_ENABLE_INSN_P (320)"
+  "cpminu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3392))]
+  "CGEN_ENABLE_INSN_P (321)"
+  "cpminu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3394))]
+  "CGEN_ENABLE_INSN_P (322)"
+  "cpmin3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3394))]
+  "CGEN_ENABLE_INSN_P (323)"
+  "cpmin3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3396))]
+  "CGEN_ENABLE_INSN_P (324)"
+  "cpmin3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmin3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3396))]
+  "CGEN_ENABLE_INSN_P (325)"
+  "cpmin3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3398))]
+  "CGEN_ENABLE_INSN_P (326)"
+  "cpminu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpminu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3398))]
+  "CGEN_ENABLE_INSN_P (327)"
+  "cpminu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3400))]
+  "CGEN_ENABLE_INSN_P (328)"
+  "cpmax3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3400))]
+  "CGEN_ENABLE_INSN_P (329)"
+  "cpmax3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3402))]
+  "CGEN_ENABLE_INSN_P (330)"
+  "cpmaxu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3402))]
+  "CGEN_ENABLE_INSN_P (331)"
+  "cpmaxu3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3404))]
+  "CGEN_ENABLE_INSN_P (332)"
+  "cpmax3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3404))]
+  "CGEN_ENABLE_INSN_P (333)"
+  "cpmax3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3406))]
+  "CGEN_ENABLE_INSN_P (334)"
+  "cpmax3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmax3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3406))]
+  "CGEN_ENABLE_INSN_P (335)"
+  "cpmax3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3408))]
+  "CGEN_ENABLE_INSN_P (336)"
+  "cpmaxu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmaxu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3408))]
+  "CGEN_ENABLE_INSN_P (337)"
+  "cpmaxu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3506))]
+  "CGEN_ENABLE_INSN_P (338)"
+  "cppack.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3506))]
+  "CGEN_ENABLE_INSN_P (339)"
+  "cppack.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3508))]
+  "CGEN_ENABLE_INSN_P (340)"
+  "cppack.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppack_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3508))]
+  "CGEN_ENABLE_INSN_P (341)"
+  "cppack.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacku_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3510))]
+  "CGEN_ENABLE_INSN_P (342)"
+  "cppacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacku_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3510))]
+  "CGEN_ENABLE_INSN_P (343)"
+  "cppacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpxor3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3532))]
+  "CGEN_ENABLE_INSN_P (344)"
+  "cpxor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpxor3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3532))]
+  "CGEN_ENABLE_INSN_P (345)"
+  "cpxor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnor3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3534))]
+  "CGEN_ENABLE_INSN_P (346)"
+  "cpnor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnor3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3534))]
+  "CGEN_ENABLE_INSN_P (347)"
+  "cpnor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpor3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3536))]
+  "CGEN_ENABLE_INSN_P (348)"
+  "cpor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpor3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3536))]
+  "CGEN_ENABLE_INSN_P (349)"
+  "cpor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpand3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3538))]
+  "CGEN_ENABLE_INSN_P (350)"
+  "cpand3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpand3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3538))]
+  "CGEN_ENABLE_INSN_P (351)"
+  "cpand3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3410))]
+  "CGEN_ENABLE_INSN_P (352)"
+  "cpabs3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3410))]
+  "CGEN_ENABLE_INSN_P (353)"
+  "cpabs3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3412))]
+  "CGEN_ENABLE_INSN_P (354)"
+  "cpabs3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabs3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3412))]
+  "CGEN_ENABLE_INSN_P (355)"
+  "cpabs3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3414))]
+  "CGEN_ENABLE_INSN_P (356)"
+  "cpabsu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3414))]
+  "CGEN_ENABLE_INSN_P (357)"
+  "cpabsu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3416))]
+  "CGEN_ENABLE_INSN_P (358)"
+  "cpaddsr3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3416))]
+  "CGEN_ENABLE_INSN_P (359)"
+  "cpaddsr3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3418))]
+  "CGEN_ENABLE_INSN_P (360)"
+  "cpaddsr3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3418))]
+  "CGEN_ENABLE_INSN_P (361)"
+  "cpaddsr3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3420))]
+  "CGEN_ENABLE_INSN_P (362)"
+  "cpaddsr3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsr3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3420))]
+  "CGEN_ENABLE_INSN_P (363)"
+  "cpaddsr3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsru3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3422))]
+  "CGEN_ENABLE_INSN_P (364)"
+  "cpaddsru3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddsru3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3422))]
+  "CGEN_ENABLE_INSN_P (365)"
+  "cpaddsru3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3424))]
+  "CGEN_ENABLE_INSN_P (366)"
+  "cpave3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3424))]
+  "CGEN_ENABLE_INSN_P (367)"
+  "cpave3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3426))]
+  "CGEN_ENABLE_INSN_P (368)"
+  "cpave3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3426))]
+  "CGEN_ENABLE_INSN_P (369)"
+  "cpave3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3428))]
+  "CGEN_ENABLE_INSN_P (370)"
+  "cpave3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpave3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3428))]
+  "CGEN_ENABLE_INSN_P (371)"
+  "cpave3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaveu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3430))]
+  "CGEN_ENABLE_INSN_P (372)"
+  "cpaveu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaveu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3430))]
+  "CGEN_ENABLE_INSN_P (373)"
+  "cpaveu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsub3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3432))]
+  "CGEN_ENABLE_INSN_P (374)"
+  "cpextlsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsub3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3432))]
+  "CGEN_ENABLE_INSN_P (375)"
+  "cpextlsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsubu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3434))]
+  "CGEN_ENABLE_INSN_P (376)"
+  "cpextlsubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlsubu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3434))]
+  "CGEN_ENABLE_INSN_P (377)"
+  "cpextlsubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusub3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3436))]
+  "CGEN_ENABLE_INSN_P (378)"
+  "cpextusub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusub3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3436))]
+  "CGEN_ENABLE_INSN_P (379)"
+  "cpextusub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusubu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3438))]
+  "CGEN_ENABLE_INSN_P (380)"
+  "cpextusubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextusubu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3438))]
+  "CGEN_ENABLE_INSN_P (381)"
+  "cpextusubu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladd3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3440))]
+  "CGEN_ENABLE_INSN_P (382)"
+  "cpextladd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladd3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3440))]
+  "CGEN_ENABLE_INSN_P (383)"
+  "cpextladd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladdu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3442))]
+  "CGEN_ENABLE_INSN_P (384)"
+  "cpextladdu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextladdu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3442))]
+  "CGEN_ENABLE_INSN_P (385)"
+  "cpextladdu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuadd3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3444))]
+  "CGEN_ENABLE_INSN_P (386)"
+  "cpextuadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuadd3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3444))]
+  "CGEN_ENABLE_INSN_P (387)"
+  "cpextuadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuaddu3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3446))]
+  "CGEN_ENABLE_INSN_P (388)"
+  "cpextuaddu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuaddu3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3446))]
+  "CGEN_ENABLE_INSN_P (389)"
+  "cpextuaddu3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3448))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3450))]
+  "CGEN_ENABLE_INSN_P (390)"
+  "cpssub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3448))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3450))]
+  "CGEN_ENABLE_INSN_P (391)"
+  "cpssub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3452))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3454))]
+  "CGEN_ENABLE_INSN_P (392)"
+  "cpssub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpssub3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3452))
+   (set (reg:SI 84)
+        (unspec_volatile:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3454))]
+  "CGEN_ENABLE_INSN_P (393)"
+  "cpssub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3456))]
+  "CGEN_ENABLE_INSN_P (394)"
+  "cpsadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3456))]
+  "CGEN_ENABLE_INSN_P (395)"
+  "cpsadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3458))]
+  "CGEN_ENABLE_INSN_P (396)"
+  "cpsadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadd3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3458))]
+  "CGEN_ENABLE_INSN_P (397)"
+  "cpsadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsub3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3540))]
+  "CGEN_ENABLE_INSN_P (398)"
+  "cdsub3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdsub3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3540))]
+  "CGEN_ENABLE_INSN_P (399)"
+  "cdsub3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3542))]
+  "CGEN_ENABLE_INSN_P (400)"
+  "cpsub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_w_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3542))]
+  "CGEN_ENABLE_INSN_P (401)"
+  "cpsub3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3544))]
+  "CGEN_ENABLE_INSN_P (402)"
+  "cpsub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_h_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3544))]
+  "CGEN_ENABLE_INSN_P (403)"
+  "cpsub3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3546))]
+  "CGEN_ENABLE_INSN_P (404)"
+  "cpsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsub3_b_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3546))]
+  "CGEN_ENABLE_INSN_P (405)"
+  "cpsub3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdadd3_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3548))]
+  "CGEN_ENABLE_INSN_P (406)"
+  "cdadd3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdadd3_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3548))]
+  "CGEN_ENABLE_INSN_P (407)"
+  "cdadd3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3218))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3219))]
+  "CGEN_ENABLE_INSN_P (408)"
+  "cpocmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3218)]
+  "CGEN_ENABLE_INSN_P (409)"
+  "cpocmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3220))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3221))]
+  "CGEN_ENABLE_INSN_P (410)"
+  "cpocmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3220)]
+  "CGEN_ENABLE_INSN_P (411)"
+  "cpocmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3222))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3223))]
+  "CGEN_ENABLE_INSN_P (412)"
+  "cpocmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3222)]
+  "CGEN_ENABLE_INSN_P (413)"
+  "cpocmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3224))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3225))]
+  "CGEN_ENABLE_INSN_P (414)"
+  "cpocmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpge_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3224)]
+  "CGEN_ENABLE_INSN_P (415)"
+  "cpocmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3226))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3227))]
+  "CGEN_ENABLE_INSN_P (416)"
+  "cpocmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgeu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3226)]
+  "CGEN_ENABLE_INSN_P (417)"
+  "cpocmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3228))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3229))]
+  "CGEN_ENABLE_INSN_P (418)"
+  "cpocmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3228)]
+  "CGEN_ENABLE_INSN_P (419)"
+  "cpocmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3230))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3231))]
+  "CGEN_ENABLE_INSN_P (420)"
+  "cpocmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3230)]
+  "CGEN_ENABLE_INSN_P (421)"
+  "cpocmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3232))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3233))]
+  "CGEN_ENABLE_INSN_P (422)"
+  "cpocmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3232)]
+  "CGEN_ENABLE_INSN_P (423)"
+  "cpocmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3234))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3235))]
+  "CGEN_ENABLE_INSN_P (424)"
+  "cpocmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgt_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3234)]
+  "CGEN_ENABLE_INSN_P (425)"
+  "cpocmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3236))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3237))]
+  "CGEN_ENABLE_INSN_P (426)"
+  "cpocmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpgtu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3236)]
+  "CGEN_ENABLE_INSN_P (427)"
+  "cpocmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3238))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3239))]
+  "CGEN_ENABLE_INSN_P (428)"
+  "cpocmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3238)]
+  "CGEN_ENABLE_INSN_P (429)"
+  "cpocmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3240))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3241))]
+  "CGEN_ENABLE_INSN_P (430)"
+  "cpocmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3240)]
+  "CGEN_ENABLE_INSN_P (431)"
+  "cpocmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3242))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3243))]
+  "CGEN_ENABLE_INSN_P (432)"
+  "cpocmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpne_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3242)]
+  "CGEN_ENABLE_INSN_P (433)"
+  "cpocmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3244))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3245))]
+  "CGEN_ENABLE_INSN_P (434)"
+  "cpocmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3244)]
+  "CGEN_ENABLE_INSN_P (435)"
+  "cpocmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3246))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3247))]
+  "CGEN_ENABLE_INSN_P (436)"
+  "cpocmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3246)]
+  "CGEN_ENABLE_INSN_P (437)"
+  "cpocmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3248))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3249))]
+  "CGEN_ENABLE_INSN_P (438)"
+  "cpocmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpocmpeq_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3248)]
+  "CGEN_ENABLE_INSN_P (439)"
+  "cpocmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3250))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3251))]
+  "CGEN_ENABLE_INSN_P (440)"
+  "cpacmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3250)]
+  "CGEN_ENABLE_INSN_P (441)"
+  "cpacmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3252))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3253))]
+  "CGEN_ENABLE_INSN_P (442)"
+  "cpacmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3252)]
+  "CGEN_ENABLE_INSN_P (443)"
+  "cpacmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3254))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3255))]
+  "CGEN_ENABLE_INSN_P (444)"
+  "cpacmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3254)]
+  "CGEN_ENABLE_INSN_P (445)"
+  "cpacmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3256))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3257))]
+  "CGEN_ENABLE_INSN_P (446)"
+  "cpacmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpge_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3256)]
+  "CGEN_ENABLE_INSN_P (447)"
+  "cpacmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3258))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3259))]
+  "CGEN_ENABLE_INSN_P (448)"
+  "cpacmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgeu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3258)]
+  "CGEN_ENABLE_INSN_P (449)"
+  "cpacmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3260))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3261))]
+  "CGEN_ENABLE_INSN_P (450)"
+  "cpacmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3260)]
+  "CGEN_ENABLE_INSN_P (451)"
+  "cpacmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3262))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3263))]
+  "CGEN_ENABLE_INSN_P (452)"
+  "cpacmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3262)]
+  "CGEN_ENABLE_INSN_P (453)"
+  "cpacmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3264))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3265))]
+  "CGEN_ENABLE_INSN_P (454)"
+  "cpacmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3264)]
+  "CGEN_ENABLE_INSN_P (455)"
+  "cpacmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3266))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3267))]
+  "CGEN_ENABLE_INSN_P (456)"
+  "cpacmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgt_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3266)]
+  "CGEN_ENABLE_INSN_P (457)"
+  "cpacmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3268))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3269))]
+  "CGEN_ENABLE_INSN_P (458)"
+  "cpacmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpgtu_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3268)]
+  "CGEN_ENABLE_INSN_P (459)"
+  "cpacmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3270))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3271))]
+  "CGEN_ENABLE_INSN_P (460)"
+  "cpacmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3270)]
+  "CGEN_ENABLE_INSN_P (461)"
+  "cpacmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3272))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3273))]
+  "CGEN_ENABLE_INSN_P (462)"
+  "cpacmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3272)]
+  "CGEN_ENABLE_INSN_P (463)"
+  "cpacmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3274))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3275))]
+  "CGEN_ENABLE_INSN_P (464)"
+  "cpacmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpne_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3274)]
+  "CGEN_ENABLE_INSN_P (465)"
+  "cpacmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_w_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3276))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3277))]
+  "CGEN_ENABLE_INSN_P (466)"
+  "cpacmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_w_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3276)]
+  "CGEN_ENABLE_INSN_P (467)"
+  "cpacmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_h_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3278))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3279))]
+  "CGEN_ENABLE_INSN_P (468)"
+  "cpacmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_h_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3278)]
+  "CGEN_ENABLE_INSN_P (469)"
+  "cpacmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_b_C3"
+  [(set (reg:SI 81)
+        (unspec:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3280))
+   (set (reg:SI 113)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3281))]
+  "CGEN_ENABLE_INSN_P (470)"
+  "cpacmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacmpeq_b_P0_P1"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "x")
+     (match_operand:DI 1 "general_operand" "x")
+   ] 3280)]
+  "CGEN_ENABLE_INSN_P (471)"
+  "cpacmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbi_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+          (match_operand:DI 3 "cgen_h_uint_3a1_immediate" "")
+        ] 3528))]
+  "CGEN_ENABLE_INSN_P (472)"
+  "cpfsftbi\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbi_P0_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+          (match_operand:DI 3 "cgen_h_uint_3a1_immediate" "")
+        ] 3528))]
+  "CGEN_ENABLE_INSN_P (473)"
+  "cpfsftbi\\t%0,%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacla0s1_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1484))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1486))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1488))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1490))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1492))]
+  "CGEN_ENABLE_INSN_P (474)"
+  "cpfacla0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacua0s1_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1494))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1496))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1498))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1500))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1502))]
+  "CGEN_ENABLE_INSN_P (475)"
+  "cpfacua0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s1_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1504))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1506))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1508))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1510))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1512))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1514))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1516))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1518))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1520))]
+  "CGEN_ENABLE_INSN_P (476)"
+  "cpfaca0s1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s1u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1522))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1524))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1526))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1528))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1530))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1532))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1534))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1536))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1538))]
+  "CGEN_ENABLE_INSN_P (477)"
+  "cpfaca0s1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbla0s1_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1540))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1542))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1544))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1546))]
+  "CGEN_ENABLE_INSN_P (478)"
+  "cpfsftbla0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbua0s1_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1548))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1550))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1552))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1554))]
+  "CGEN_ENABLE_INSN_P (479)"
+  "cpfsftbua0s1.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s1_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1556))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1558))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1560))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1562))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1564))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1566))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1568))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1570))]
+  "CGEN_ENABLE_INSN_P (480)"
+  "cpfsftba0s1.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s1u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1572))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1574))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1576))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1578))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1580))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1582))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1584))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1586))]
+  "CGEN_ENABLE_INSN_P (481)"
+  "cpfsftba0s1u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacla0s0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1588))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1590))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1592))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1594))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1596))]
+  "CGEN_ENABLE_INSN_P (482)"
+  "cpfacla0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfacua0s0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1598))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1600))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1602))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1604))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1606))]
+  "CGEN_ENABLE_INSN_P (483)"
+  "cpfacua0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1608))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1610))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1612))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1614))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1616))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1618))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1620))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1622))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1624))]
+  "CGEN_ENABLE_INSN_P (484)"
+  "cpfaca0s0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfaca0s0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1626))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1628))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1630))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1632))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1634))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1636))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1638))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1640))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1642))]
+  "CGEN_ENABLE_INSN_P (485)"
+  "cpfaca0s0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbla0s0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1644))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1646))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1648))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1650))]
+  "CGEN_ENABLE_INSN_P (486)"
+  "cpfsftbla0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbua0s0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1652))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1654))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1656))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1658))]
+  "CGEN_ENABLE_INSN_P (487)"
+  "cpfsftbua0s0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1660))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1662))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1664))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1666))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1668))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1670))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1672))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1674))]
+  "CGEN_ENABLE_INSN_P (488)"
+  "cpfsftba0s0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftba0s0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1676))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1678))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1680))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1682))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1684))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1686))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1688))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1690))]
+  "CGEN_ENABLE_INSN_P (489)"
+  "cpfsftba0s0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsllia0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 1692))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1694))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1696))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1698))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1700))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1702))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1704))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1706))]
+  "CGEN_ENABLE_INSN_P (490)"
+  "cpsllia0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraia0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 1708))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1710))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1712))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1714))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1716))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1718))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1720))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1722))]
+  "CGEN_ENABLE_INSN_P (491)"
+  "cpsraia0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrlia0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_5a1_immediate" "")
+        ] 1724))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1726))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1728))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1730))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1732))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1734))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1736))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1738))]
+  "CGEN_ENABLE_INSN_P (492)"
+  "cpsrlia0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpslla0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 1740))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1742))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1744))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1746))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1748))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1750))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1752))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1754))]
+  "CGEN_ENABLE_INSN_P (493)"
+  "cpslla0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsraa0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 1756))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1758))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1760))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1762))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1764))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1766))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1768))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1770))]
+  "CGEN_ENABLE_INSN_P (494)"
+  "cpsraa0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsrla0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 1772))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1774))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1776))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1778))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1780))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1782))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1784))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+        ] 1786))]
+  "CGEN_ENABLE_INSN_P (495)"
+  "cpsrla0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaccpa0_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1788))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1790))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1792))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1794))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1796))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1798))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1800))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1802))]
+  "CGEN_ENABLE_INSN_P (496)"
+  "cpaccpa0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpacsuma0_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1804))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1806))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1808))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1810))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1812))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1814))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1816))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1818))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 1820))]
+  "CGEN_ENABLE_INSN_P (497)"
+  "cpacsuma0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhla0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1822))]
+  "CGEN_ENABLE_INSN_P (498)"
+  "cpmovhla0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovhua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1824))]
+  "CGEN_ENABLE_INSN_P (499)"
+  "cpmovhua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1826))]
+  "CGEN_ENABLE_INSN_P (500)"
+  "cppackla0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1828))]
+  "CGEN_ENABLE_INSN_P (501)"
+  "cppackua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackla0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1830))]
+  "CGEN_ENABLE_INSN_P (502)"
+  "cppackla0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppackua0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1832))]
+  "CGEN_ENABLE_INSN_P (503)"
+  "cppackua0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka0_b_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1834))]
+  "CGEN_ENABLE_INSN_P (504)"
+  "cppacka0.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cppacka0u_b_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1836))]
+  "CGEN_ENABLE_INSN_P (505)"
+  "cppacka0u.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlla0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1838))]
+  "CGEN_ENABLE_INSN_P (506)"
+  "cpmovlla0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovlua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1840))]
+  "CGEN_ENABLE_INSN_P (507)"
+  "cpmovlua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovula0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1842))]
+  "CGEN_ENABLE_INSN_P (508)"
+  "cpmovula0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovuua0_w_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1844))]
+  "CGEN_ENABLE_INSN_P (509)"
+  "cpmovuua0.w\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovla0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1846))]
+  "CGEN_ENABLE_INSN_P (510)"
+  "cpmovla0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovua0_h_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1848))]
+  "CGEN_ENABLE_INSN_P (511)"
+  "cpmovua0.h\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmova0_b_P0S"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 1850))]
+  "CGEN_ENABLE_INSN_P (512)"
+  "cpmova0.b\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetla0_w_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1852))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1854))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1856))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1858))]
+  "CGEN_ENABLE_INSN_P (513)"
+  "cpsetla0.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsetua0_w_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1860))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1862))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1864))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1866))]
+  "CGEN_ENABLE_INSN_P (514)"
+  "cpsetua0.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpseta0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1868))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1870))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1872))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1874))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1876))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1878))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1880))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1882))]
+  "CGEN_ENABLE_INSN_P (515)"
+  "cpseta0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadla0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1884))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1886))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1888))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1890))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1892))]
+  "CGEN_ENABLE_INSN_P (516)"
+  "cpsadla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsadua0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1894))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1896))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1898))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1900))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1902))]
+  "CGEN_ENABLE_INSN_P (517)"
+  "cpsadua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1904))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1906))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1908))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1910))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1912))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1914))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1916))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1918))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1920))]
+  "CGEN_ENABLE_INSN_P (518)"
+  "cpsada0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsada0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1922))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1924))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1926))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1928))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1930))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1932))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1934))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1936))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1938))]
+  "CGEN_ENABLE_INSN_P (519)"
+  "cpsada0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsla0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1940))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1942))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1944))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1946))]
+  "CGEN_ENABLE_INSN_P (520)"
+  "cpabsla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsua0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1948))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1950))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1952))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1954))]
+  "CGEN_ENABLE_INSN_P (521)"
+  "cpabsua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1956))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1958))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1960))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1962))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1964))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1966))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1968))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1970))]
+  "CGEN_ENABLE_INSN_P (522)"
+  "cpabsa0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsa0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1972))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1974))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1976))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1978))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1980))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1982))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1984))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1986))]
+  "CGEN_ENABLE_INSN_P (523)"
+  "cpabsa0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacla0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1988))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1990))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1992))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1994))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 1996))]
+  "CGEN_ENABLE_INSN_P (524)"
+  "cpsubacla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubacua0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 1998))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2000))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2002))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2004))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2006))]
+  "CGEN_ENABLE_INSN_P (525)"
+  "cpsubacua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2008))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2010))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2012))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2014))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2016))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2018))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2020))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2022))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2024))]
+  "CGEN_ENABLE_INSN_P (526)"
+  "cpsubaca0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubaca0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2026))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2028))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2030))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2032))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2034))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2036))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2038))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2040))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2042))]
+  "CGEN_ENABLE_INSN_P (527)"
+  "cpsubaca0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubla0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2044))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2046))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2048))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2050))]
+  "CGEN_ENABLE_INSN_P (528)"
+  "cpsubla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsubua0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2052))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2054))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2056))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2058))]
+  "CGEN_ENABLE_INSN_P (529)"
+  "cpsubua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2060))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2062))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2064))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2066))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2068))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2070))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2072))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2074))]
+  "CGEN_ENABLE_INSN_P (530)"
+  "cpsuba0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsuba0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2076))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2078))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2080))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2082))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2084))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2086))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2088))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2090))]
+  "CGEN_ENABLE_INSN_P (531)"
+  "cpsuba0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacla0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2092))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2094))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2096))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2098))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2100))]
+  "CGEN_ENABLE_INSN_P (532)"
+  "cpaddacla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddacua0_h_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2102))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2104))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2106))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2108))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2110))]
+  "CGEN_ENABLE_INSN_P (533)"
+  "cpaddacua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca0_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2112))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2114))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2116))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2118))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2120))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2122))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2124))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2126))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2128))]
+  "CGEN_ENABLE_INSN_P (534)"
+  "cpaddaca0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddaca0u_b_P0S"
+  [(set (reg:SI 86)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2130))
+   (set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2132))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2134))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2136))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2138))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2140))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2142))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2144))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2146))]
+  "CGEN_ENABLE_INSN_P (535)"
+  "cpaddaca0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddla0_h_P0S"
+  [(set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2148))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2150))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2152))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2154))]
+  "CGEN_ENABLE_INSN_P (536)"
+  "cpaddla0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpaddua0_h_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2156))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2158))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2160))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2162))]
+  "CGEN_ENABLE_INSN_P (537)"
+  "cpaddua0.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda0_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2164))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2166))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2168))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2170))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2172))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2174))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2176))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2178))]
+  "CGEN_ENABLE_INSN_P (538)"
+  "cpadda0.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadda0u_b_P0S"
+  [(set (reg:SI 103)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 2180))
+   (set (reg:SI 102)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2182))
+   (set (reg:SI 101)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2184))
+   (set (reg:SI 100)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2186))
+   (set (reg:SI 99)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2188))
+   (set (reg:SI 98)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2190))
+   (set (reg:SI 97)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2192))
+   (set (reg:SI 96)
+        (unspec_volatile:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 2194))]
+  "CGEN_ENABLE_INSN_P (539)"
+  "cpadda0u.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3282))]
+  "CGEN_ENABLE_INSN_P (540)"
+  "cpcmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3282))]
+  "CGEN_ENABLE_INSN_P (541)"
+  "cpcmpge.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3284))]
+  "CGEN_ENABLE_INSN_P (542)"
+  "cpcmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3284))]
+  "CGEN_ENABLE_INSN_P (543)"
+  "cpcmpgeu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3286))]
+  "CGEN_ENABLE_INSN_P (544)"
+  "cpcmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3286))]
+  "CGEN_ENABLE_INSN_P (545)"
+  "cpcmpge.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3288))]
+  "CGEN_ENABLE_INSN_P (546)"
+  "cpcmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpge_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3288))]
+  "CGEN_ENABLE_INSN_P (547)"
+  "cpcmpge.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3290))]
+  "CGEN_ENABLE_INSN_P (548)"
+  "cpcmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgeu_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3290))]
+  "CGEN_ENABLE_INSN_P (549)"
+  "cpcmpgeu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3292))]
+  "CGEN_ENABLE_INSN_P (550)"
+  "cpcmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3292))]
+  "CGEN_ENABLE_INSN_P (551)"
+  "cpcmpgt.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3294))]
+  "CGEN_ENABLE_INSN_P (552)"
+  "cpcmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3294))]
+  "CGEN_ENABLE_INSN_P (553)"
+  "cpcmpgtu.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3296))]
+  "CGEN_ENABLE_INSN_P (554)"
+  "cpcmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3296))]
+  "CGEN_ENABLE_INSN_P (555)"
+  "cpcmpgt.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3298))]
+  "CGEN_ENABLE_INSN_P (556)"
+  "cpcmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgt_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3298))]
+  "CGEN_ENABLE_INSN_P (557)"
+  "cpcmpgt.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3300))]
+  "CGEN_ENABLE_INSN_P (558)"
+  "cpcmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpgtu_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3300))]
+  "CGEN_ENABLE_INSN_P (559)"
+  "cpcmpgtu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3302))]
+  "CGEN_ENABLE_INSN_P (560)"
+  "cpcmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3302))]
+  "CGEN_ENABLE_INSN_P (561)"
+  "cpcmpne.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3304))]
+  "CGEN_ENABLE_INSN_P (562)"
+  "cpcmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3304))]
+  "CGEN_ENABLE_INSN_P (563)"
+  "cpcmpne.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3306))]
+  "CGEN_ENABLE_INSN_P (564)"
+  "cpcmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpne_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3306))]
+  "CGEN_ENABLE_INSN_P (565)"
+  "cpcmpne.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_w_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3308))]
+  "CGEN_ENABLE_INSN_P (566)"
+  "cpcmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_w_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3308))]
+  "CGEN_ENABLE_INSN_P (567)"
+  "cpcmpeq.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_h_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3310))]
+  "CGEN_ENABLE_INSN_P (568)"
+  "cpcmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_h_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3310))]
+  "CGEN_ENABLE_INSN_P (569)"
+  "cpcmpeq.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3312))]
+  "CGEN_ENABLE_INSN_P (570)"
+  "cpcmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeq_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3312))]
+  "CGEN_ENABLE_INSN_P (571)"
+  "cpcmpeq.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeqz_b_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3314))]
+  "CGEN_ENABLE_INSN_P (572)"
+  "cpcmpeqz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcmpeqz_b_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3314))]
+  "CGEN_ENABLE_INSN_P (573)"
+  "cpcmpeqz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocc_C3"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3378))]
+  "CGEN_ENABLE_INSN_P (574)"
+  "cpmovtocc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocc_P0S_P1"
+  [(set (reg:SI 81)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3378))]
+  "CGEN_ENABLE_INSN_P (575)"
+  "cpmovtocc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar1_C3"
+  [(set (reg:SI 95)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3380))]
+  "CGEN_ENABLE_INSN_P (576)"
+  "cpmovtocsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar1_P0S_P1"
+  [(set (reg:SI 95)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3380))]
+  "CGEN_ENABLE_INSN_P (577)"
+  "cpmovtocsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar0_C3"
+  [(set (reg:SI 80)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3382))]
+  "CGEN_ENABLE_INSN_P (578)"
+  "cpmovtocsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovtocsar0_P0S_P1"
+  [(set (reg:SI 80)
+        (unspec_volatile:SI [
+          (match_operand:DI 0 "general_operand" "x")
+        ] 3382))]
+  "CGEN_ENABLE_INSN_P (579)"
+  "cpmovtocsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcc_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3384))]
+  "CGEN_ENABLE_INSN_P (580)"
+  "cpmovfrcc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcc_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3384))]
+  "CGEN_ENABLE_INSN_P (581)"
+  "cpmovfrcc\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar1_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3386))]
+  "CGEN_ENABLE_INSN_P (582)"
+  "cpmovfrcsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar1_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3386))]
+  "CGEN_ENABLE_INSN_P (583)"
+  "cpmovfrcsar1\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar0_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3388))]
+  "CGEN_ENABLE_INSN_P (584)"
+  "cpmovfrcsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmovfrcsar0_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (const_int 0)
+        ] 3388))]
+  "CGEN_ENABLE_INSN_P (585)"
+  "cpmovfrcsar0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastw_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3316))]
+  "CGEN_ENABLE_INSN_P (586)"
+  "cdcastw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastw_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3316))]
+  "CGEN_ENABLE_INSN_P (587)"
+  "cdcastw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastuw_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3318))]
+  "CGEN_ENABLE_INSN_P (588)"
+  "cdcastuw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cdcastuw_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3318))]
+  "CGEN_ENABLE_INSN_P (589)"
+  "cdcastuw\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcasth_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3320))]
+  "CGEN_ENABLE_INSN_P (590)"
+  "cpcasth.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcasth_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3320))]
+  "CGEN_ENABLE_INSN_P (591)"
+  "cpcasth.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastuh_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3322))]
+  "CGEN_ENABLE_INSN_P (592)"
+  "cpcastuh.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastuh_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3322))]
+  "CGEN_ENABLE_INSN_P (593)"
+  "cpcastuh.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3324))]
+  "CGEN_ENABLE_INSN_P (594)"
+  "cpcastb.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3324))]
+  "CGEN_ENABLE_INSN_P (595)"
+  "cpcastb.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3326))]
+  "CGEN_ENABLE_INSN_P (596)"
+  "cpcastub.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3326))]
+  "CGEN_ENABLE_INSN_P (597)"
+  "cpcastub.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3328))]
+  "CGEN_ENABLE_INSN_P (598)"
+  "cpcastb.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastb_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3328))]
+  "CGEN_ENABLE_INSN_P (599)"
+  "cpcastb.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3330))]
+  "CGEN_ENABLE_INSN_P (600)"
+  "cpcastub.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpcastub_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3330))]
+  "CGEN_ENABLE_INSN_P (601)"
+  "cpcastub.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3332))]
+  "CGEN_ENABLE_INSN_P (602)"
+  "cpextl.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3332))]
+  "CGEN_ENABLE_INSN_P (603)"
+  "cpextl.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3334))]
+  "CGEN_ENABLE_INSN_P (604)"
+  "cpextlu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3334))]
+  "CGEN_ENABLE_INSN_P (605)"
+  "cpextlu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3336))]
+  "CGEN_ENABLE_INSN_P (606)"
+  "cpextl.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextl_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3336))]
+  "CGEN_ENABLE_INSN_P (607)"
+  "cpextl.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3338))]
+  "CGEN_ENABLE_INSN_P (608)"
+  "cpextlu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextlu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3338))]
+  "CGEN_ENABLE_INSN_P (609)"
+  "cpextlu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3340))]
+  "CGEN_ENABLE_INSN_P (610)"
+  "cpextu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3340))]
+  "CGEN_ENABLE_INSN_P (611)"
+  "cpextu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3342))]
+  "CGEN_ENABLE_INSN_P (612)"
+  "cpextuu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3342))]
+  "CGEN_ENABLE_INSN_P (613)"
+  "cpextuu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3344))]
+  "CGEN_ENABLE_INSN_P (614)"
+  "cpextu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3344))]
+  "CGEN_ENABLE_INSN_P (615)"
+  "cpextu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3346))]
+  "CGEN_ENABLE_INSN_P (616)"
+  "cpextuu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpextuu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3346))]
+  "CGEN_ENABLE_INSN_P (617)"
+  "cpextuu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3348))]
+  "CGEN_ENABLE_INSN_P (618)"
+  "cpbcast.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3348))]
+  "CGEN_ENABLE_INSN_P (619)"
+  "cpbcast.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3350))]
+  "CGEN_ENABLE_INSN_P (620)"
+  "cpbcast.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3350))]
+  "CGEN_ENABLE_INSN_P (621)"
+  "cpbcast.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3352))]
+  "CGEN_ENABLE_INSN_P (622)"
+  "cpbcast.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpbcast_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3352))]
+  "CGEN_ENABLE_INSN_P (623)"
+  "cpbcast.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpccadd_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "0")
+        ] 3354))]
+  "CGEN_ENABLE_INSN_P (624)"
+  "cpccadd.b\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpccadd_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "0")
+        ] 3354))]
+  "CGEN_ENABLE_INSN_P (625)"
+  "cpccadd.b\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3356))]
+  "CGEN_ENABLE_INSN_P (626)"
+  "cphadd.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3356))]
+  "CGEN_ENABLE_INSN_P (627)"
+  "cphadd.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3358))]
+  "CGEN_ENABLE_INSN_P (628)"
+  "cphadd.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3358))]
+  "CGEN_ENABLE_INSN_P (629)"
+  "cphadd.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3360))]
+  "CGEN_ENABLE_INSN_P (630)"
+  "cphadd.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphadd_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3360))]
+  "CGEN_ENABLE_INSN_P (631)"
+  "cphadd.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphaddu_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3362))]
+  "CGEN_ENABLE_INSN_P (632)"
+  "cphaddu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cphaddu_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3362))]
+  "CGEN_ENABLE_INSN_P (633)"
+  "cphaddu.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3364))]
+  "CGEN_ENABLE_INSN_P (634)"
+  "cpnorm.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3364))]
+  "CGEN_ENABLE_INSN_P (635)"
+  "cpnorm.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3366))]
+  "CGEN_ENABLE_INSN_P (636)"
+  "cpnorm.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpnorm_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3366))]
+  "CGEN_ENABLE_INSN_P (637)"
+  "cpnorm.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3368))]
+  "CGEN_ENABLE_INSN_P (638)"
+  "cpldz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3368))]
+  "CGEN_ENABLE_INSN_P (639)"
+  "cpldz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3370))]
+  "CGEN_ENABLE_INSN_P (640)"
+  "cpldz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpldz_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3370))]
+  "CGEN_ENABLE_INSN_P (641)"
+  "cpldz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3372))]
+  "CGEN_ENABLE_INSN_P (642)"
+  "cpabsz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3372))]
+  "CGEN_ENABLE_INSN_P (643)"
+  "cpabsz.w\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3374))]
+  "CGEN_ENABLE_INSN_P (644)"
+  "cpabsz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3374))]
+  "CGEN_ENABLE_INSN_P (645)"
+  "cpabsz.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3376))]
+  "CGEN_ENABLE_INSN_P (646)"
+  "cpabsz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpabsz_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 3376))]
+  "CGEN_ENABLE_INSN_P (647)"
+  "cpabsz.b\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmov_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4172))]
+  "CGEN_ENABLE_INSN_P (648)"
+  "cpmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmov_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4172))]
+  "CGEN_ENABLE_INSN_P (649)"
+  "cpmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs1_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3524))]
+  "CGEN_ENABLE_INSN_P (650)"
+  "cpfsftbs1\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs1_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3524))]
+  "CGEN_ENABLE_INSN_P (651)"
+  "cpfsftbs1\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs0_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3526))]
+  "CGEN_ENABLE_INSN_P (652)"
+  "cpfsftbs0\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpfsftbs0_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3526))]
+  "CGEN_ENABLE_INSN_P (653)"
+  "cpfsftbs0\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsel_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3530))]
+  "CGEN_ENABLE_INSN_P (654)"
+  "cpsel\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpsel_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec_volatile:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3530))]
+  "CGEN_ENABLE_INSN_P (655)"
+  "cpsel\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3512))]
+  "CGEN_ENABLE_INSN_P (656)"
+  "cpunpackl.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3512))]
+  "CGEN_ENABLE_INSN_P (657)"
+  "cpunpackl.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3514))]
+  "CGEN_ENABLE_INSN_P (658)"
+  "cpunpackl.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3514))]
+  "CGEN_ENABLE_INSN_P (659)"
+  "cpunpackl.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3516))]
+  "CGEN_ENABLE_INSN_P (660)"
+  "cpunpackl.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpackl_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3516))]
+  "CGEN_ENABLE_INSN_P (661)"
+  "cpunpackl.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3518))]
+  "CGEN_ENABLE_INSN_P (662)"
+  "cpunpacku.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3518))]
+  "CGEN_ENABLE_INSN_P (663)"
+  "cpunpacku.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3520))]
+  "CGEN_ENABLE_INSN_P (664)"
+  "cpunpacku.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3520))]
+  "CGEN_ENABLE_INSN_P (665)"
+  "cpunpacku.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3522))]
+  "CGEN_ENABLE_INSN_P (666)"
+  "cpunpacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpunpacku_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3522))]
+  "CGEN_ENABLE_INSN_P (667)"
+  "cpunpacku.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_w_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3550))]
+  "CGEN_ENABLE_INSN_P (668)"
+  "cpadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_w_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3550))]
+  "CGEN_ENABLE_INSN_P (669)"
+  "cpadd3.w\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3552))]
+  "CGEN_ENABLE_INSN_P (670)"
+  "cpadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_h_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3552))]
+  "CGEN_ENABLE_INSN_P (671)"
+  "cpadd3.h\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_b_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3554))]
+  "CGEN_ENABLE_INSN_P (672)"
+  "cpadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpadd3_b_P0S_P1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "x")
+          (match_operand:DI 2 "general_operand" "x")
+        ] 3554))]
+  "CGEN_ENABLE_INSN_P (673)"
+  "cpadd3.b\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0s_p1")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_c0nop_P0_P0S"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 2196)]
+  "CGEN_ENABLE_INSN_P (674)"
+  "c0nop"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0_p0s")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cpmoviu_h_C3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_uint_8a1_immediate" "")
+        ] 3178))]
+  "CGEN_ENABLE_INSN_P (675)"
+  "cpmoviu.h\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_rn_crm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4156))]
+  "CGEN_ENABLE_INSN_P (676)"
+  "cmovh\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_rn_crm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4156))]
+  "CGEN_ENABLE_INSN_P (677)"
+  "cmovh\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_crn_rm"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4158))]
+  "CGEN_ENABLE_INSN_P (678)"
+  "cmovh\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovh_crn_rm_p0"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4158))]
+  "CGEN_ENABLE_INSN_P (679)"
+  "cmovh\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_rn_ccrm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "y")
+        ] 4160))]
+  "CGEN_ENABLE_INSN_P (680)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_rn_ccrm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "y")
+        ] 4160))]
+  "CGEN_ENABLE_INSN_P (681)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_ccrn_rm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=y")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4162))]
+  "CGEN_ENABLE_INSN_P (682)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmovc_ccrn_rm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=y")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4162))]
+  "CGEN_ENABLE_INSN_P (683)"
+  "cmovc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_rn_crm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4164))]
+  "CGEN_ENABLE_INSN_P (684)"
+  "cmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_rn_crm_p0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "x")
+        ] 4164))]
+  "CGEN_ENABLE_INSN_P (685)"
+  "cmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_crn_rm"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:DI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4166))]
+  "CGEN_ENABLE_INSN_P (686)"
+  "cmov\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "c3")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cmov_crn_rm_p0"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=x")
+        (unspec:DI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4166))]
+  "CGEN_ENABLE_INSN_P (687)"
+  "cmov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "cop")
+   (set_attr "slots" "p0")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsrv"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3556)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3558))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3559))]
+  "CGEN_ENABLE_INSN_P (688)"
+  "bsrv\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_jsrv"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3560))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3562))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3563))]
+  "CGEN_ENABLE_INSN_P (689)"
+  "jsrv\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_synccp"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 3564)]
+  "CGEN_ENABLE_INSN_P (690)"
+  "synccp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpaf"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3566)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (691)"
+  "bcpaf\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpat"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3568)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (692)"
+  "bcpat\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpne"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3570)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (693)"
+  "bcpne\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bcpeq"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                            (reg:SI 81)
+                          ] 3572)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (694)"
+  "bcpeq\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lmcpm1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:DI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3574))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3576))]
+  "CGEN_ENABLE_INSN_P (695)"
+  "lmcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_smcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3578))]
+  "CGEN_ENABLE_INSN_P (696)"
+  "smcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lwcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3580))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3582))]
+  "CGEN_ENABLE_INSN_P (697)"
+  "lwcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_swcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3584))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3586))]
+  "CGEN_ENABLE_INSN_P (698)"
+  "swcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3588))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3590))]
+  "CGEN_ENABLE_INSN_P (699)"
+  "lhcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_shcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3592))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3594))]
+  "CGEN_ENABLE_INSN_P (700)"
+  "shcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3596))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 3598))]
+  "CGEN_ENABLE_INSN_P (701)"
+  "lbcpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sbcpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3600))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+        ] 3602))]
+  "CGEN_ENABLE_INSN_P (702)"
+  "sbcpm1\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lmcpm0"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:DI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3604))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3606))]
+  "CGEN_ENABLE_INSN_P (703)"
+  "lmcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_smcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3608))]
+  "CGEN_ENABLE_INSN_P (704)"
+  "smcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lwcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3610))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3612))]
+  "CGEN_ENABLE_INSN_P (705)"
+  "lwcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_swcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3614))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3616))]
+  "CGEN_ENABLE_INSN_P (706)"
+  "swcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3618))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3620))]
+  "CGEN_ENABLE_INSN_P (707)"
+  "lhcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_shcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3622))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3624))]
+  "CGEN_ENABLE_INSN_P (708)"
+  "shcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3626))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 3628))]
+  "CGEN_ENABLE_INSN_P (709)"
+  "lbcpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sbcpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3630))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+        ] 3632))]
+  "CGEN_ENABLE_INSN_P (710)"
+  "sbcpm0\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lmcpa"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:DI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3634))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+        ] 3636))]
+  "CGEN_ENABLE_INSN_P (711)"
+  "lmcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3638))]
+  "CGEN_ENABLE_INSN_P (712)"
+  "smcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3640))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 3642))]
+  "CGEN_ENABLE_INSN_P (713)"
+  "lwcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3644))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 3646))]
+  "CGEN_ENABLE_INSN_P (714)"
+  "swcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3648))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 3650))]
+  "CGEN_ENABLE_INSN_P (715)"
+  "lhcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_shcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3652))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 3654))]
+  "CGEN_ENABLE_INSN_P (716)"
+  "shcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lbcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3656))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 3658))]
+  "CGEN_ENABLE_INSN_P (717)"
+  "lbcpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sbcpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+        ] 3660))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (match_dup 3)
+        ] 3662))]
+  "CGEN_ENABLE_INSN_P (718)"
+  "sbcpa\\t%1,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lmcp16"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:DI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3664))]
+  "CGEN_ENABLE_INSN_P (719)"
+  "lmcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcp16"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "em")
+     (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+     (match_operand:SI 2 "general_operand" "r")
+   ] 3666)]
+  "CGEN_ENABLE_INSN_P (720)"
+  "smcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcp16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3668))]
+  "CGEN_ENABLE_INSN_P (721)"
+  "lwcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcp16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3670))]
+  "CGEN_ENABLE_INSN_P (722)"
+  "swcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lmcpi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 2 "general_operand" "1")
+        ] 3672))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+        ] 3674))]
+  "CGEN_ENABLE_INSN_P (723)"
+  "lmcpi\\t%0,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcpi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:DI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+        ] 3676))]
+  "CGEN_ENABLE_INSN_P (724)"
+  "smcpi\\t%1,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcpi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (mem:SI (scratch:SI))
+        ] 3678))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (mem:SI (scratch:SI))
+        ] 3680))]
+  "CGEN_ENABLE_INSN_P (725)"
+  "lwcpi\\t%0,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcpi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "em")
+          (match_operand:SI 2 "general_operand" "0")
+        ] 3682))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3684))]
+  "CGEN_ENABLE_INSN_P (726)"
+  "swcpi\\t%1,(%2+)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lmcp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=em")
+        (unspec:DI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3686))]
+  "CGEN_ENABLE_INSN_P (727)"
+  "lmcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_smcp"
+  [(unspec_volatile [
+     (match_operand:DI 0 "general_operand" "em")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 3688)]
+  "CGEN_ENABLE_INSN_P (728)"
+  "smcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lwcp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3690))]
+  "CGEN_ENABLE_INSN_P (729)"
+  "lwcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_swcp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3692))]
+  "CGEN_ENABLE_INSN_P (730)"
+  "swcp\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_ssubu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3694))]
+  "CGEN_ENABLE_INSN_P (731)"
+  "ssubu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_saddu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3696))]
+  "CGEN_ENABLE_INSN_P (732)"
+  "saddu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_ssub"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3698))]
+  "CGEN_ENABLE_INSN_P (733)"
+  "ssub\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sadd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3700))]
+  "CGEN_ENABLE_INSN_P (734)"
+  "sadd\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_clipu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3702))]
+  "CGEN_ENABLE_INSN_P (735)"
+  "clipu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_clip"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3704))]
+  "CGEN_ENABLE_INSN_P (736)"
+  "clip\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_maxu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3706))]
+  "CGEN_ENABLE_INSN_P (737)"
+  "maxu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_minu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3708))]
+  "CGEN_ENABLE_INSN_P (738)"
+  "minu\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_max"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3710))]
+  "CGEN_ENABLE_INSN_P (739)"
+  "max\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_min"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3712))]
+  "CGEN_ENABLE_INSN_P (740)"
+  "min\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_ave"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3714))]
+  "CGEN_ENABLE_INSN_P (741)"
+  "ave\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_abs"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3716))]
+  "CGEN_ENABLE_INSN_P (742)"
+  "abs\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_ldz"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3718))]
+  "CGEN_ENABLE_INSN_P (743)"
+  "ldz\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_dbreak"
+  [(set (reg:SI 40)
+        (unspec_volatile:SI [
+          (reg:SI 40)
+        ] 3720))]
+  "CGEN_ENABLE_INSN_P (744)"
+  "dbreak"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_dret"
+  [(set (pc)
+        (unspec:SI [
+          (reg:SI 41)
+          (reg:SI 40)
+        ] 3722))
+   (set (reg:SI 40)
+        (unspec:SI [
+          (reg:SI 41)
+          (reg:SI 40)
+        ] 3724))
+   (set (reg:SI 115)
+        (unspec:SI [
+          (reg:SI 41)
+          (reg:SI 40)
+        ] 3725))]
+  "CGEN_ENABLE_INSN_P (745)"
+  "dret"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_divu"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3726))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3728))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3729))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3730))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3731))]
+  "CGEN_ENABLE_INSN_P (746)"
+  "divu\\t%0,%1"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "34")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "div")])
+
+
+(define_insn "cgen_intrinsic_div"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3732))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3734))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3735))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3736))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3737))]
+  "CGEN_ENABLE_INSN_P (747)"
+  "div\\t%0,%1"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "34")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "div")])
+
+
+(define_insn "cgen_intrinsic_maddru"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3738))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3740))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3741))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3742))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3743))]
+  "CGEN_ENABLE_INSN_P (748)"
+  "maddru\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_maddr"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3744))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3746))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3747))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3748))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3749))]
+  "CGEN_ENABLE_INSN_P (749)"
+  "maddr\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_maddu"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3750))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3751))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3752))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3753))]
+  "CGEN_ENABLE_INSN_P (750)"
+  "maddu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_madd"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3754))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3755))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3756))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 24)
+          (reg:SI 23)
+        ] 3757))]
+  "CGEN_ENABLE_INSN_P (751)"
+  "madd\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_mulru"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3758))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3760))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3761))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3762))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3763))]
+  "CGEN_ENABLE_INSN_P (752)"
+  "mulru\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_mulr"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3764))
+   (set (reg:SI 24)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3766))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3767))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3768))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 1)
+          (match_dup 2)
+        ] 3769))]
+  "CGEN_ENABLE_INSN_P (753)"
+  "mulr\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mulr")])
+
+
+(define_insn "cgen_intrinsic_mulu"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3770))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3771))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3772))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3773))]
+  "CGEN_ENABLE_INSN_P (754)"
+  "mulu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_mul"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3774))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3775))
+   (set (reg:SI 23)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3776))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+        ] 3777))]
+  "CGEN_ENABLE_INSN_P (755)"
+  "mul\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "mul")])
+
+
+(define_insn "cgen_intrinsic_cache"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 3778)]
+  "CGEN_ENABLE_INSN_P (756)"
+  "cache\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_tas"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3780))
+   (set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_dup 1)
+          (mem:SI (scratch:SI))
+        ] 3782))]
+  "CGEN_ENABLE_INSN_P (757)"
+  "tas\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_btstm"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3784))]
+  "CGEN_ENABLE_INSN_P (758)"
+  "btstm\\t$0,(%1),%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bnotm"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3786))]
+  "CGEN_ENABLE_INSN_P (759)"
+  "bnotm\\t(%0),%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bclrm"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3788))]
+  "CGEN_ENABLE_INSN_P (760)"
+  "bclrm\\t(%0),%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsetm"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_3a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3790))]
+  "CGEN_ENABLE_INSN_P (761)"
+  "bsetm\\t(%0),%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ldcb"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3792))]
+  "CGEN_ENABLE_INSN_P (762)"
+  "ldcb\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldcb")])
+
+
+(define_insn "cgen_intrinsic_stcb"
+  [(unspec_volatile [
+     (match_operand:SI 0 "general_operand" "r")
+     (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+   ] 3794)]
+  "CGEN_ENABLE_INSN_P (763)"
+  "stcb\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stcb")])
+
+
+(define_insn "cgen_intrinsic_syncm"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 3796)]
+  "CGEN_ENABLE_INSN_P (764)"
+  "syncm"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_break"
+  [(set (pc)
+        (unspec_volatile:SI [
+          (const_int 0)
+        ] 3798))]
+  "CGEN_ENABLE_INSN_P (765)"
+  "break"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_swi"
+  [(set (reg:SI 36)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_uint_2a1_immediate" "")
+          (reg:SI 36)
+        ] 3800))]
+  "CGEN_ENABLE_INSN_P (766)"
+  "swi\\t%0"
+  [(set_attr "may_trap" "yes")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sleep"
+  [(unspec_volatile [
+     (const_int 0)
+   ] 3802)]
+  "CGEN_ENABLE_INSN_P (767)"
+  "sleep"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_halt"
+  [(unspec_volatile [
+     (reg:SI 32)
+   ] 3804)]
+  "CGEN_ENABLE_INSN_P (768)"
+  "halt"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_reti"
+  [(set (pc)
+        (unspec:SI [
+          (reg:SI 32)
+          (reg:SI 42)
+          (reg:SI 39)
+          (reg:SI 35)
+        ] 3806))]
+  "CGEN_ENABLE_INSN_P (769)"
+  "reti"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ret")])
+
+
+(define_insn "cgen_intrinsic_ei"
+  [(set (reg:SI 32)
+        (unspec_volatile:SI [
+          (reg:SI 32)
+        ] 3808))]
+  "CGEN_ENABLE_INSN_P (770)"
+  "ei"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_di"
+  [(set (reg:SI 32)
+        (unspec_volatile:SI [
+          (reg:SI 32)
+        ] 3810))]
+  "CGEN_ENABLE_INSN_P (771)"
+  "di"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ldc"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "c")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3812))]
+  "CGEN_ENABLE_INSN_P (772)"
+  "ldc\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_ldc_lo"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (reg:SI 24)
+        ] 3814))]
+  "CGEN_ENABLE_INSN_P (773)"
+  "ldc\\t%0,$lo"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_ldc_hi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (reg:SI 23)
+        ] 3816))]
+  "CGEN_ENABLE_INSN_P (774)"
+  "ldc\\t%0,$hi"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_ldc_lp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (reg:SI 17)
+        ] 3818))]
+  "CGEN_ENABLE_INSN_P (775)"
+  "ldc\\t%0,$lp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ldc")])
+
+
+(define_insn "cgen_intrinsic_stc"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=c")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3820))]
+  "CGEN_ENABLE_INSN_P (776)"
+  "stc\\t%1,%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_stc_lo"
+  [(set (reg:SI 24)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+        ] 3822))
+   (set (reg:SI 116)
+        (unspec:SI [
+          (match_dup 0)
+        ] 3823))]
+  "CGEN_ENABLE_INSN_P (777)"
+  "stc\\t%0,$lo"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_stc_hi"
+  [(set (reg:SI 23)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+        ] 3824))
+   (set (reg:SI 117)
+        (unspec:SI [
+          (match_dup 0)
+        ] 3825))]
+  "CGEN_ENABLE_INSN_P (778)"
+  "stc\\t%0,$hi"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_stc_lp"
+  [(set (reg:SI 17)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+        ] 3826))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+        ] 3827))]
+  "CGEN_ENABLE_INSN_P (779)"
+  "stc\\t%0,$lp"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "stc")])
+
+
+(define_insn "cgen_intrinsic_erepeat"
+  [(set (reg:SI 22)
+        (unspec:SI [
+          (match_operand:SI 0 "immediate_operand" "")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3828))
+   (set (reg:SI 118)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3829))
+   (set (reg:SI 21)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3830))
+   (set (reg:SI 119)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3831))
+   (set (reg:SI 20)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3832))
+   (set (reg:SI 120)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3833))]
+  "CGEN_ENABLE_INSN_P (780)"
+  "erepeat\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_repeat"
+  [(set (reg:SI 22)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "immediate_operand" "")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3834))
+   (set (reg:SI 118)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3835))
+   (set (reg:SI 21)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3836))
+   (set (reg:SI 119)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3837))
+   (set (reg:SI 20)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3838))
+   (set (reg:SI 120)
+        (unspec:SI [
+          (match_dup 0)
+          (match_dup 1)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3839))]
+  "CGEN_ENABLE_INSN_P (781)"
+  "repeat\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ret"
+  [(set (pc)
+        (unspec:SI [
+          (reg:SI 32)
+          (reg:SI 42)
+          (reg:SI 17)
+        ] 3840))]
+  "CGEN_ENABLE_INSN_P (782)"
+  "ret"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ret")])
+
+
+(define_insn "cgen_intrinsic_jsr"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3842))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3844))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3845))]
+  "CGEN_ENABLE_INSN_P (783)"
+  "jsr\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_jmp24"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3846)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (784)"
+  "jmp\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_jmp"
+  [(set (pc)
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3848))]
+  "CGEN_ENABLE_INSN_P (785)"
+  "jmp\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsr12"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3854)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3856))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3857))]
+  "CGEN_ENABLE_INSN_P (786)"
+  "bsr\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bsr24"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3850)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))
+   (set (reg:SI 17)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3852))
+   (set (reg:SI 114)
+        (unspec:SI [
+          (match_dup 0)
+          (reg:SI 32)
+          (reg:SI 42)
+        ] 3853))]
+  "CGEN_ENABLE_INSN_P (787)"
+  "bsr\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bne"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "general_operand" "r")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3858)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (788)"
+  "bne\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_beq"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "general_operand" "r")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3860)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (789)"
+  "beq\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bgei"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3862)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (790)"
+  "bgei\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_blti"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3864)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (791)"
+  "blti\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bnei"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3866)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (792)"
+  "bnei\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_beqi"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "cgen_h_uint_4a1_immediate" "")
+                            (match_operand:SI 2 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3868)
+                          (const_int 0))
+                      (match_dup 2)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (793)"
+  "beqi\\t%0,%1,%l2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bnez"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3870)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (794)"
+  "bnez\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_beqz"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "general_operand" "r")
+                            (match_operand:SI 1 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3872)
+                          (const_int 0))
+                      (match_dup 1)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (795)"
+  "beqz\\t%0,%l1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_bra"
+  [(set (pc)
+        (if_then_else (eq (unspec [
+                            (match_operand:SI 0 "immediate_operand" "")
+                            (reg:SI 32)
+                            (reg:SI 42)
+                          ] 3874)
+                          (const_int 0))
+                      (match_dup 0)
+                      (pc)))]
+  "CGEN_ENABLE_INSN_P (796)"
+  "bra\\t%l0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_fsft"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (reg:SI 18)
+        ] 3876))]
+  "CGEN_ENABLE_INSN_P (797)"
+  "fsft\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "fsft")])
+
+
+(define_insn "cgen_intrinsic_sll3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3878))]
+  "CGEN_ENABLE_INSN_P (798)"
+  "sll3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_slli"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3880))]
+  "CGEN_ENABLE_INSN_P (799)"
+  "sll\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "shiftop" "operand2")])
+
+
+(define_insn "cgen_intrinsic_srli"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3882))]
+  "CGEN_ENABLE_INSN_P (800)"
+  "srl\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "shiftop" "operand2")])
+
+
+(define_insn "cgen_intrinsic_srai"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3884))]
+  "CGEN_ENABLE_INSN_P (801)"
+  "sra\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "shiftop" "operand2")])
+
+
+(define_insn "cgen_intrinsic_sll"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3886))]
+  "CGEN_ENABLE_INSN_P (802)"
+  "sll\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_srl"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3888))]
+  "CGEN_ENABLE_INSN_P (803)"
+  "srl\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sra"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3890))]
+  "CGEN_ENABLE_INSN_P (804)"
+  "sra\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_xor3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3892))]
+  "CGEN_ENABLE_INSN_P (805)"
+  "xor3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_and3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3894))]
+  "CGEN_ENABLE_INSN_P (806)"
+  "and3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_or3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3896))]
+  "CGEN_ENABLE_INSN_P (807)"
+  "or3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_nor"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3898))]
+  "CGEN_ENABLE_INSN_P (808)"
+  "nor\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_xor"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3900))]
+  "CGEN_ENABLE_INSN_P (809)"
+  "xor\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_and"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3902))]
+  "CGEN_ENABLE_INSN_P (810)"
+  "and\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_or"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3904))]
+  "CGEN_ENABLE_INSN_P (811)"
+  "or\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sltu3x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_16a1_immediate" "")
+        ] 3906))]
+  "CGEN_ENABLE_INSN_P (812)"
+  "sltu3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_slt3x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_sint_16a1_immediate" "")
+        ] 3908))]
+  "CGEN_ENABLE_INSN_P (813)"
+  "slt3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_add3x"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_sint_16a1_immediate" "")
+        ] 3910))]
+  "CGEN_ENABLE_INSN_P (814)"
+  "add3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sl2ad3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3912))]
+  "CGEN_ENABLE_INSN_P (815)"
+  "sl2ad3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sl1ad3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3914))]
+  "CGEN_ENABLE_INSN_P (816)"
+  "sl1ad3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "int2")])
+
+
+(define_insn "cgen_intrinsic_sltu3i"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3916))]
+  "CGEN_ENABLE_INSN_P (817)"
+  "sltu3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_slt3i"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "cgen_h_uint_5a1_immediate" "")
+        ] 3918))]
+  "CGEN_ENABLE_INSN_P (818)"
+  "slt3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sltu3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3920))]
+  "CGEN_ENABLE_INSN_P (819)"
+  "sltu3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_slt3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3922))]
+  "CGEN_ENABLE_INSN_P (820)"
+  "slt3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_neg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3924))]
+  "CGEN_ENABLE_INSN_P (821)"
+  "neg\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_sbvck3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3926))]
+  "CGEN_ENABLE_INSN_P (822)"
+  "sbvck3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "advck")])
+
+
+(define_insn "cgen_intrinsic_sub"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3928))]
+  "CGEN_ENABLE_INSN_P (823)"
+  "sub\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_advck3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=z")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3930))]
+  "CGEN_ENABLE_INSN_P (824)"
+  "advck3\\t$0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "advck")])
+
+
+(define_insn "cgen_intrinsic_add3i"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 15)
+        ] 3932))]
+  "CGEN_ENABLE_INSN_P (825)"
+  "add3\\t%0,$sp,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_add"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_sint_6a1_immediate" "")
+        ] 3934))]
+  "CGEN_ENABLE_INSN_P (826)"
+  "add\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_add3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3936))]
+  "CGEN_ENABLE_INSN_P (827)"
+  "add3\\t%0,%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movh"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3938))]
+  "CGEN_ENABLE_INSN_P (828)"
+  "movh\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movu16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_16a1_immediate" "")
+        ] 3940))]
+  "CGEN_ENABLE_INSN_P (829)"
+  "movu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movu24"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_24a1_immediate" "")
+        ] 3942))]
+  "CGEN_ENABLE_INSN_P (830)"
+  "movu\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movi8"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_8a1_immediate" "")
+        ] 3946))]
+  "CGEN_ENABLE_INSN_P (831)"
+  "mov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_movi16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+        ] 3944))]
+  "CGEN_ENABLE_INSN_P (832)"
+  "mov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_mov"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3948))]
+  "CGEN_ENABLE_INSN_P (833)"
+  "mov\\t%0,%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ssarb"
+  [(set (reg:SI 18)
+        (unspec_volatile:SI [
+          (match_operand:SI 0 "cgen_h_sint_2a1_immediate" "")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 3950))]
+  "CGEN_ENABLE_INSN_P (834)"
+  "ssarb\\t%0(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "ssarb")])
+
+
+(define_insn "cgen_intrinsic_extuh"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3952))]
+  "CGEN_ENABLE_INSN_P (835)"
+  "extuh\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_extub"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3954))]
+  "CGEN_ENABLE_INSN_P (836)"
+  "extub\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_exth"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3956))]
+  "CGEN_ENABLE_INSN_P (837)"
+  "exth\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_extb"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "0")
+        ] 3958))]
+  "CGEN_ENABLE_INSN_P (838)"
+  "extb\\t%1"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lw24"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_22a4_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 3960))]
+  "CGEN_ENABLE_INSN_P (839)"
+  "lw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw24"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_22a4_immediate" "")
+        ] 3962))]
+  "CGEN_ENABLE_INSN_P (840)"
+  "sw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhu16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3964))]
+  "CGEN_ENABLE_INSN_P (841)"
+  "lhu\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbu16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3966))]
+  "CGEN_ENABLE_INSN_P (842)"
+  "lbu\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lw16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3968))]
+  "CGEN_ENABLE_INSN_P (843)"
+  "lw\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lh16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3970))]
+  "CGEN_ENABLE_INSN_P (844)"
+  "lh\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lb16"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 3972))]
+  "CGEN_ENABLE_INSN_P (845)"
+  "lb\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3974))]
+  "CGEN_ENABLE_INSN_P (846)"
+  "sw\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sh16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3976))]
+  "CGEN_ENABLE_INSN_P (847)"
+  "sh\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sb16"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 3978))]
+  "CGEN_ENABLE_INSN_P (848)"
+  "sb\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhu_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_6a2_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3980))]
+  "CGEN_ENABLE_INSN_P (849)"
+  "lhu\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbu_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_7a1_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3982))]
+  "CGEN_ENABLE_INSN_P (850)"
+  "lbu\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lw_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3984))]
+  "CGEN_ENABLE_INSN_P (851)"
+  "lw\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lh_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_6a2_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3986))]
+  "CGEN_ENABLE_INSN_P (852)"
+  "lh\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lb_tp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=t")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_7a1_immediate" "")
+          (reg:SI 13)
+          (mem:SI (scratch:SI))
+        ] 3988))]
+  "CGEN_ENABLE_INSN_P (853)"
+  "lb\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw_tp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "t")
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 13)
+        ] 3990))]
+  "CGEN_ENABLE_INSN_P (854)"
+  "sw\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sh_tp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "t")
+          (match_operand:SI 1 "cgen_h_uint_6a2_immediate" "")
+          (reg:SI 13)
+        ] 3992))]
+  "CGEN_ENABLE_INSN_P (855)"
+  "sh\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sb_tp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "t")
+          (match_operand:SI 1 "cgen_h_uint_7a1_immediate" "")
+          (reg:SI 13)
+        ] 3994))]
+  "CGEN_ENABLE_INSN_P (856)"
+  "sb\\t%0,%1($tp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lw_sp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 15)
+          (mem:SI (scratch:SI))
+        ] 3996))]
+  "CGEN_ENABLE_INSN_P (857)"
+  "lw\\t%0,%1($sp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw_sp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "cgen_h_uint_5a4_immediate" "")
+          (reg:SI 15)
+        ] 3998))]
+  "CGEN_ENABLE_INSN_P (858)"
+  "sw\\t%0,%1($sp)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4000))]
+  "CGEN_ENABLE_INSN_P (859)"
+  "lhu\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbu"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4002))]
+  "CGEN_ENABLE_INSN_P (860)"
+  "lbu\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lw"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4004))]
+  "CGEN_ENABLE_INSN_P (861)"
+  "lw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lh"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4006))]
+  "CGEN_ENABLE_INSN_P (862)"
+  "lh\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lb"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_operand:SI 1 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4008))]
+  "CGEN_ENABLE_INSN_P (863)"
+  "lb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "2")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_sw"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4010))]
+  "CGEN_ENABLE_INSN_P (864)"
+  "sw\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sh"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4012))]
+  "CGEN_ENABLE_INSN_P (865)"
+  "sh\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sb"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "r")
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4014))]
+  "CGEN_ENABLE_INSN_P (866)"
+  "sb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_dsp1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "cgen_h_uint_20a1_immediate" "")
+        ] 4016))]
+  "CGEN_ENABLE_INSN_P (867)"
+  "dsp1\\t%1,%2"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_dsp0"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_24a1_immediate" "")
+   ] 4018)]
+  "CGEN_ENABLE_INSN_P (868)"
+  "dsp0\\t%0"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_dsp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "cgen_h_uint_16a1_immediate" "")
+        ] 4020))]
+  "CGEN_ENABLE_INSN_P (869)"
+  "dsp\\t%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_uci"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "cgen_h_uint_16a1_immediate" "")
+        ] 4022))]
+  "CGEN_ENABLE_INSN_P (870)"
+  "uci\\t%1,%2,%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhucpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4024))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4026))]
+  "CGEN_ENABLE_INSN_P (871)"
+  "lhucpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbucpm1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4028))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 31)
+          (reg:SI 30)
+          (mem:SI (scratch:SI))
+        ] 4030))]
+  "CGEN_ENABLE_INSN_P (872)"
+  "lbucpm1\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhucpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4032))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4034))]
+  "CGEN_ENABLE_INSN_P (873)"
+  "lhucpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lbucpm0"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4036))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (reg:SI 29)
+          (reg:SI 28)
+          (mem:SI (scratch:SI))
+        ] 4038))]
+  "CGEN_ENABLE_INSN_P (874)"
+  "lbucpm0\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_lhucpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 4040))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 4042))]
+  "CGEN_ENABLE_INSN_P (875)"
+  "lhucpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lbucpa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 2 "general_operand" "1")
+          (match_operand:SI 3 "cgen_h_sint_10a1_immediate" "")
+          (mem:SI (scratch:SI))
+        ] 4044))
+   (set (match_operand:SI 1 "nonimmediate_operand" "=r")
+        (unspec:SI [
+          (match_dup 2)
+          (match_dup 3)
+          (mem:SI (scratch:SI))
+        ] 4046))]
+  "CGEN_ENABLE_INSN_P (876)"
+  "lbucpa\\t%0,(%2+),%3"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "load")])
+
+
+(define_insn "cgen_intrinsic_lhucp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4048))]
+  "CGEN_ENABLE_INSN_P (877)"
+  "lhucp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lhcp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4050))]
+  "CGEN_ENABLE_INSN_P (878)"
+  "lhcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_shcp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4052))]
+  "CGEN_ENABLE_INSN_P (879)"
+  "shcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lbucp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4054))]
+  "CGEN_ENABLE_INSN_P (880)"
+  "lbucp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_lbcp"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=em")
+        (unspec:SI [
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+          (mem:SI (scratch:SI))
+        ] 4056))]
+  "CGEN_ENABLE_INSN_P (881)"
+  "lbcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_sbcp"
+  [(set (mem:SI (scratch:SI))
+        (unspec:SI [
+          (match_operand:SI 0 "general_operand" "em")
+          (match_operand:SI 1 "cgen_h_sint_12a1_immediate" "")
+          (match_operand:SI 2 "general_operand" "r")
+        ] 4058))]
+  "CGEN_ENABLE_INSN_P (882)"
+  "sbcp\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "store")])
+
+
+(define_insn "cgen_intrinsic_casw3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "general_operand" "r")
+        ] 4060))]
+  "CGEN_ENABLE_INSN_P (883)"
+  "casw3\\t%1,%2,(%3)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_cash3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "general_operand" "r")
+        ] 4062))]
+  "CGEN_ENABLE_INSN_P (884)"
+  "cash3\\t%1,%2,(%3)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_casb3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "0")
+          (match_operand:SI 2 "general_operand" "r")
+          (match_operand:SI 3 "general_operand" "r")
+        ] 4064))]
+  "CGEN_ENABLE_INSN_P (885)"
+  "casb3\\t%1,%2,(%3)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_prefd"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+     (match_operand:SI 1 "cgen_h_sint_16a1_immediate" "")
+     (match_operand:SI 2 "general_operand" "r")
+   ] 4066)]
+  "CGEN_ENABLE_INSN_P (886)"
+  "pref\\t%0,%1(%2)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "4")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_pref"
+  [(unspec_volatile [
+     (match_operand:SI 0 "cgen_h_uint_4a1_immediate" "")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 4068)]
+  "CGEN_ENABLE_INSN_P (887)"
+  "pref\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_ldcb_r"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+        (unspec_volatile:SI [
+          (match_operand:SI 1 "general_operand" "r")
+        ] 4070))]
+  "CGEN_ENABLE_INSN_P (888)"
+  "ldcb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "3")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
+
+(define_insn "cgen_intrinsic_stcb_r"
+  [(unspec_volatile [
+     (match_operand:SI 0 "general_operand" "r")
+     (match_operand:SI 1 "general_operand" "r")
+   ] 4072)]
+  "CGEN_ENABLE_INSN_P (889)"
+  "stcb\\t%0,(%1)"
+  [(set_attr "may_trap" "no")
+   (set_attr "latency" "0")
+   (set_attr "length" "2")
+   (set_attr "slot" "core")
+   (set_attr "slots" "core")
+   (set_attr "stall" "none")])
+
diff --git a/gcc/config/mep/ivc2-template.h b/gcc/config/mep/ivc2-template.h
new file mode 100644
index 000000000..da0440c0d
--- /dev/null
+++ b/gcc/config/mep/ivc2-template.h
@@ -0,0 +1,9 @@
+#undef __section
+#define __section(_secname) __attribute__((section(#_secname)))
+#undef mep_nop
+#define mep_nop() __asm__ volatile ("nop")
+
+#pragma GCC coprocessor available $c0...$c31
+#pragma GCC coprocessor call_saved $c6...$c7
+
+#include <intrinsics.h>
diff --git a/gcc/config/mep/mep-c5.cpu b/gcc/config/mep/mep-c5.cpu
new file mode 100644
index 000000000..fc81e90ed
--- /dev/null
+++ b/gcc/config/mep/mep-c5.cpu
@@ -0,0 +1,278 @@
+; Toshiba MeP C5 Core description.  -*- scheme -*-
+; Copyright (C) 2009
+; Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+(dnf f-c5n4     "extended field"     (all-mep-core-isas)   16  4)
+(dnf f-c5n5     "extended field"     (all-mep-core-isas)   20  4)
+(dnf f-c5n6     "extended field"     (all-mep-core-isas)   24  4)
+(dnf f-c5n7     "extended field"     (all-mep-core-isas)   28  4)
+(dnf f-rl5      "register l c5"      (all-mep-core-isas)   20  4)
+(df  f-12s20    "extended field"     (all-mep-core-isas)   20  12  INT #f #f)
+
+(dnop rl5       "register Rl c5"     (all-mep-core-isas) h-gpr   f-rl5)
+(dnop cdisp12   "copro addend (12 bits)" (all-mep-core-isas) h-sint  f-12s20)
+
+(dnci stcb_r "store in control bus space" (VOLATILE (MACH c5))
+     "stcb $rn,($rma)"
+     (+ MAJ_7 rn rma (f-sub4 12))
+     (c-call VOID "do_stcb" rn (and rma #xffff))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-stcb))))
+
+(dnci ldcb_r "load from control bus space" (VOLATILE (MACH c5) (LATENCY 3))
+     "ldcb $rn,($rma)"
+     (+ MAJ_7 rn rma (f-sub4 13))
+     (set rn (c-call SI "do_ldcb" (and rma #xffff)))
+      ((mep (unit u-use-gpr (in usereg rma))
+	    (unit u-ldcb)
+	    (unit u-exec)
+	    (unit u-ldcb-gpr (out loadreg rn)))))
+
+(dnci pref "cache prefetch" ((MACH c5) VOLATILE)
+     "pref $cimm4,($rma)"
+     (+ MAJ_7 cimm4 rma (f-sub4 5))
+     (sequence ()
+	       (c-call VOID "check_option_dcache" pc)
+	       (c-call VOID "do_cache_prefetch" cimm4 rma pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci prefd "cache prefetch" ((MACH c5) VOLATILE)
+     "pref $cimm4,$sdisp16($rma)"
+     (+ MAJ_15 cimm4 rma (f-sub4 3) sdisp16)
+     (sequence ()
+	       (c-call VOID "check_option_dcache" pc)
+	       (c-call VOID "do_cache_prefetch" cimm4 (add INT rma (ext SI sdisp16)) pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci casb3 "compare and swap byte 3" ((MACH c5) VOLATILE OPTIONAL_BIT_INSN)
+      "casb3 $rl5,$rn,($rm)"
+      (+ MAJ_15 rn rm (f-sub4 #x1) (f-c5n4 #x2) rl5 (f-c5n6 #x0) (f-c5n7 #x0))
+      (sequence ()
+		(c-call VOID "do_casb3" (index-of rl5) rn rm pc)
+		(set rl5 rl5)
+		)
+      ((mep (unit u-use-gpr (in usereg rl5))
+	    (unit u-load-gpr (out loadreg rl5))
+	    (unit u-exec))))
+
+(dnci cash3 "compare and swap halfword 3" ((MACH c5) VOLATILE OPTIONAL_BIT_INSN)
+      "cash3 $rl5,$rn,($rm)"
+      (+ MAJ_15 rn rm (f-sub4 #x1) (f-c5n4 #x2) rl5 (f-c5n6 #x0) (f-c5n7 #x1))
+      (sequence ()
+		(c-call VOID "do_cash3" (index-of rl5) rn rm pc)
+		(set rl5 rl5)
+		)
+      ((mep (unit u-use-gpr (in usereg rl5))
+	    (unit u-load-gpr (out loadreg rl5))
+	    (unit u-exec))))
+
+(dnci casw3 "compare and swap word 3" ((MACH c5) VOLATILE OPTIONAL_BIT_INSN)
+      "casw3 $rl5,$rn,($rm)"
+      (+ MAJ_15 rn rm (f-sub4 #x1) (f-c5n4 #x2) rl5 (f-c5n6 #x0) (f-c5n7 #x2))
+      (sequence ()
+		(c-call VOID "do_casw3" (index-of rl5) rn rm pc)
+		(set rl5 rl5)
+		)
+      ((mep (unit u-use-gpr (in usereg rl5))
+	    (unit u-load-gpr (out loadreg rl5))
+	    (unit u-exec))))
+
+
+
+(dnci sbcp "store byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "sbcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 0) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (add rma (ext SI cdisp12)))
+	       (set (mem QI (add rma (ext SI cdisp12))) (and crn #xff)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcp "load byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lbcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 4) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbucp "load byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lbucp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 12) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+(dnci shcp "store half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "shcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 1) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (add rma (ext SI cdisp12)))
+	       (set (mem HI (add rma (ext SI cdisp12))) (and crn #xffff)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcp "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lhcp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 5) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucp "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE) (MACH c5))
+     "lhucp $crn,$cdisp12($rma)"
+     (+ MAJ_15 crn rma (f-sub4 6) (f-ext4 13) cdisp12)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (add rma (ext SI cdisp12))))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+(dnci lbucpa "load byte coprocessor" (OPTIONAL_CP_INSN (STALL LOAD) (MACH c5))
+     "lbucpa $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xC) (f-ext62 #x0) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI rma)))
+	       (set rma (add rma cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucpa "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD) (MACH c5))
+     "lhucpa $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xD) (f-ext62 #x0) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (add rma (ext SI cdisp10a2))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbucpm0 "lbucpm0" (OPTIONAL_CP_INSN (MACH c5))
+     "lbucpm0 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xc) (f-ext62 #x2) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI rma)))
+	       (set rma (mod0 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucpm0 "lhucpm0" (OPTIONAL_CP_INSN (MACH c5))
+     "lhucpm0 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xd) (f-ext62 #x2) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod0 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbucpm1 "lbucpm1" (OPTIONAL_CP_INSN (MACH c5))
+     "lbucpm1 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xc) (f-ext62 #x3) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem QI rma)))
+	       (set rma (mod1 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhucpm1 "lhucpm1" (OPTIONAL_CP_INSN (MACH c5))
+     "lhucpm1 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #xd) (f-ext62 #x3) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (zext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod1 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci uci "uci" ((MACH c5) VOLATILE)
+     "uci $rn,$rm,$uimm16"
+     (+ MAJ_15 rn rm (f-sub4 2) simm16)
+     (set rn (c-call SI "do_UCI" rn rm (zext SI uimm16) pc))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnf f-c5-rnm     "register n/m"              (all-mep-isas)    4  8)
+(dnf f-c5-rm      "register m"              (all-mep-isas)    8  4)
+(df  f-c5-16u16  "general 16-bit u-val"    (all-mep-isas) 16 16 UINT #f #f)
+
+(dnmf f-c5-rmuimm20 "20-bit immediate in Rm/Imm16" (all-mep-isas) UINT
+      (f-c5-rm f-c5-16u16)
+      (sequence () ; insert
+		(set (ifield f-c5-rm)    (srl (ifield f-c5-rmuimm20) 16))
+		(set (ifield f-c5-16u16) (and (ifield f-c5-rmuimm20) #xffff))
+		)
+      (sequence () ; extract
+		(set (ifield f-c5-rmuimm20) (or (ifield f-c5-16u16)
+						(sll (ifield f-c5-rm) 16)))
+		)
+      )
+(dnop c5rmuimm20 "20-bit immediate in rm and imm16" (all-mep-core-isas) h-uint f-c5-rmuimm20)
+
+(dnmf f-c5-rnmuimm24 "24-bit immediate in Rm/Imm16" (all-mep-isas) UINT
+      (f-c5-rnm f-c5-16u16)
+      (sequence () ; insert
+		(set (ifield f-c5-rnm)    (srl (ifield f-c5-rnmuimm24) 16))
+		(set (ifield f-c5-16u16) (and (ifield f-c5-rnmuimm24) #xffff))
+		)
+      (sequence () ; extract
+		(set (ifield f-c5-rnmuimm24) (or (ifield f-c5-16u16)
+						(sll (ifield f-c5-rnm) 16)))
+		)
+      )
+(dnop c5rnmuimm24 "24-bit immediate in rn, rm, and imm16" (all-mep-core-isas) h-uint f-c5-rnmuimm24)
+
+(dnci dsp "dsp" ((MACH c5) VOLATILE)
+     "dsp $rn,$rm,$uimm16"
+     (+ MAJ_15 rn rm (f-sub4 0) uimm16)
+     (set rn (c-call SI "do_DSP" rn rm (zext SI uimm16) pc))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci dsp0 "dsp0" ((MACH c5) VOLATILE NO-DIS ALIAS)
+     "dsp0 $c5rnmuimm24"
+     (+ MAJ_15 c5rnmuimm24 (f-sub4 0))
+     (c-call VOID "do_DSP" (zext SI c5rnmuimm24) pc)
+     ((mep (unit u-exec))))
+
+(dnci dsp1 "dsp1" ((MACH c5) VOLATILE NO-DIS ALIAS)
+     "dsp1 $rn,$c5rmuimm20"
+     (+ MAJ_15 rn (f-sub4 0) c5rmuimm20)
+     (set rn (c-call SI "do_DSP" rn (zext SI c5rmuimm20) pc))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
diff --git a/gcc/config/mep/mep-core.cpu b/gcc/config/mep/mep-core.cpu
new file mode 100644
index 000000000..cfcdd42da
--- /dev/null
+++ b/gcc/config/mep/mep-core.cpu
@@ -0,0 +1,3081 @@
+; Toshiba MeP Media Engine architecture description.  -*- Scheme -*-
+; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2009
+; Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+(include "simplify.inc")
+
+(define-pmacro isa-enum ()
+  (isas mep 
+; begin-isa-enum
+	ext_core1 ext_cop1_16 ext_cop1_32 ext_cop1_48 ext_cop1_64
+; end-isa-enum
+  )
+)
+
+(define-arch
+  (name mep)
+  (comment "Toshiba MeP Media Engine")
+  (insn-lsb0? #f) ;; work around cgen limitation
+  (machs mep h1 c5)
+  isa-enum
+)
+
+(define-isa
+  (name mep)
+  (comment "MeP core instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+; begin-isas
+(define-isa
+  (name ext_core1)
+  (comment "MeP core extension instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_16)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_32)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_48)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-isa
+  (name ext_cop1_64)
+  (comment "MeP coprocessor instruction set")
+  (default-insn-word-bitsize 32)
+  (default-insn-bitsize 32)
+  (base-insn-bitsize 32)
+)
+
+(define-pmacro all-mep-isas () (ISA mep,ext_core1,ext_cop1_16,ext_cop1_32,ext_cop1_48,ext_cop1_64))
+
+(define-pmacro all-mep-core-isas () (ISA mep,ext_core1,ext_cop1_32))
+
+(define-pmacro all-core-isa-list () mep,ext_core1)
+; end-isas
+
+(define-cpu
+  (name mepf)
+  (comment "MeP family")
+  (endian either)
+  (insn-chunk-bitsize 16)
+  (word-bitsize 32)
+)
+
+(define-mach
+  (name mep)
+  (comment "MeP media engine")
+  (cpu mepf)
+  isa-enum
+)
+
+(define-mach
+  (name h1)
+  (comment "H1 media engine")
+  (cpu mepf)
+  isa-enum
+)
+
+(define-mach
+  (name c5)
+  (comment "C5 media engine")
+  (cpu mepf)
+  isa-enum
+)
+
+(define-model
+  (name mep)
+  (comment "MeP media engine processor")
+  (mach c5) ; mach gets changed by MeP-Integrator
+
+  (unit u-exec "execution unit" ()
+	1 1 ; issue done
+	() () () ())
+
+  ; Branch unit
+  (unit u-branch "Branch Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((pc)) ; outputs
+	() ; profile action (default)
+	)
+
+  ; Multiply unit
+  (unit u-multiply "Multiply Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Divide unit
+  (unit u-divide "Divide Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Stcb unit
+  (unit u-stcb "stcb Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Ldcb unit
+  (unit u-ldcb "ldcb Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Load gpr unit
+  (unit u-load-gpr "Load into GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((loadreg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+
+  (unit u-ldcb-gpr "Ldcb into GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((loadreg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+
+  ; Multiply into GPR unit
+  (unit u-mul-gpr "Multiply into GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((resultreg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+
+  ; Use gpr unit -- stalls if GPR not ready
+  (unit u-use-gpr "Use GPR Unit" ()
+	0 0 ; issue done
+	() ; state
+	((usereg INT -1)) ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Use ctrl-reg unit -- stalls if CTRL-REG not ready
+  (unit u-use-ctrl-reg "Use CTRL-REG Unit" ()
+	0 0 ; issue done
+	() ; state
+	((usereg INT -1)) ; inputs
+	() ; outputs
+	() ; profile action (default)
+	)
+
+  ; Store ctrl-reg unit -- stalls if CTRL-REG not ready
+  (unit u-store-ctrl-reg "Store CTRL-REG Unit" ()
+	0 0 ; issue done
+	() ; state
+	() ; inputs
+	((storereg INT -1)) ; outputs
+	() ; profile action (default)
+	)
+)
+
+; Hardware elements.
+
+(dnh h-pc "program counter" (PC PROFILE all-mep-isas) (pc) () () ())
+
+(define-hardware
+  (name h-gpr)
+  (comment "General purpose registers")
+  (attrs all-mep-isas CACHE-ADDR PROFILE)
+  (type register SI (16))
+  (indices keyword "$"
+	   (("0" 0) ("1" 1) ("2" 2) ("3" 3) ("4" 4) ("5" 5)
+	    ("6" 6) ("7" 7) ("8" 8) ("9" 9) ("10" 10) ("11" 11)
+	    ; "$8" is the preferred name for register 8, but "$tp", "$gp"
+	    ; and "$sp" are preferred for their respective registers.
+	    (fp  8) (tp 13) (gp 14) (sp 15)
+	    ("12" 12) ("13" 13) ("14" 14) ("15" 15)))
+)
+
+(define-hardware
+  (name h-csr)
+  (comment "Control/special registers")
+  (attrs all-mep-isas PROFILE)
+  (type register SI (32))
+  (indices keyword "$"
+	   ((pc 0)   (lp 1)   (sar 2)   (rpb  4) (rpe 5)   (rpc 6)
+	    (hi 7)   (lo 8)   (mb0 12)  (me0 13) (mb1 14)  (me1 15)
+	    (psw 16) (id 17)  (tmp 18)  (epc 19) (exc 20)  (cfg 21)
+	    (npc 23) (dbg 24) (depc 25) (opt 26) (rcfg 27) (ccfg 28)
+; begin-extra-csr-registers
+	    (vid 22)
+; end-extra-csr-registers
+  ))
+  (get (index) (c-call SI "cgen_get_csr_value" index))
+  (set (index newval) (c-call VOID "cgen_set_csr_value" index newval))
+)
+
+(define-pmacro (-reg-pair n) ((.sym n) n))
+(define-hardware
+  (name h-cr64)
+  (comment "64-bit coprocessor registers")
+  (attrs all-mep-isas)
+  ; This assumes that the data path of the co-pro is 64 bits.
+  (type register DI (32))
+  (indices keyword "$c" (.map -reg-pair (.iota 32)))
+  (set (index newval) (c-call VOID "h_cr64_queue_set" index newval))
+)
+(define-hardware
+  (name h-cr64-w)
+  (comment "64-bit coprocessor registers, pending writes")
+  (attrs all-mep-isas)
+  ; This assumes that the data path of the co-pro is 64 bits.
+  (type register DI (32))
+)
+
+(define-hardware
+  (name h-cr)
+  (comment "32-bit coprocessor registers")
+  (attrs all-mep-isas VIRTUAL)
+  (type register SI (32))
+  (indices keyword "$c" (.map -reg-pair (.iota 32)))
+  (set (index newval) (c-call VOID "h_cr64_set" index (ext DI newval)))
+  (get (index) (trunc SI (c-call DI "h_cr64_get" index)))
+)
+
+;; Given a coprocessor control register number N, expand to a
+;; name/index pair: ($ccrN N)
+(define-pmacro (-ccr-reg-pair n) ((.sym "$ccr" n) n))
+
+(define-hardware
+  (name h-ccr)
+  (comment "Coprocessor control registers")
+  (attrs all-mep-isas)
+  (type register SI (64))
+  (indices keyword "" (.map -ccr-reg-pair (.iota 64)))
+  (set (index newval) (c-call VOID "h_ccr_queue_set" index newval))
+)
+(define-hardware
+  (name h-ccr-w)
+  (comment "Coprocessor control registers, pending writes")
+  (attrs all-mep-isas)
+  (type register SI (64))
+)
+
+
+; Instruction fields.  Bit numbering reversed.
+
+; Conventions:
+;
+; N = number of bits in value
+; A = alignment (2 or 4, omit for 1)
+; B = leftmost (i.e. closest to zero) bit position
+;
+; -- Generic Fields (f-*) --
+; N		number of bits in *value* (1-24)
+; [us]		signed vs unsigned
+; B		position of left-most bit (4-16)
+; aA		opt. alignment (2=drop 1 lsb, 4=drop 2 lsbs, etc)
+; n		opt. for noncontiguous fields
+; f-foo-{hi,lo}	msb/lsb parts of field f-foo
+;
+; -- Operands --
+; pcrelNaA	PC-relative branch target (signed)
+; pcabsNaA	Absolute branch target (unsigned)
+;
+; [us]dispNaA	[un]signed displacement
+; [us]immN	[un]signed immediate value
+; addrNaA	absolute address (unsigned)
+;
+; Additional prefixes may be used for special cases.
+
+(dnf f-major   "major opcode"            (all-mep-core-isas)    0  4)
+
+(dnf f-rn      "register n"              (all-mep-core-isas)    4  4)
+(dnf f-rn3     "register 0-7"            (all-mep-core-isas)    5  3)
+(dnf f-rm      "register m"              (all-mep-core-isas)    8  4)
+(dnf f-rl      "register l"              (all-mep-core-isas)   12  4)
+(dnf f-sub2    "sub opcode (2 bits)"     (all-mep-core-isas)   14  2)
+(dnf f-sub3    "sub opcode (3 bits)"     (all-mep-core-isas)   13  3)
+(dnf f-sub4    "sub opcode (4 bits)"     (all-mep-core-isas)   12  4)
+(dnf f-ext     "extended field"          (all-mep-core-isas)   16  8)
+(dnf f-ext4    "extended field 16:4"     (all-mep-core-isas)   16  4)
+(dnf f-ext62   "extended field 20:2"     (all-mep-core-isas)   20  2)
+(dnf f-crn     "copro register n"        (all-mep-core-isas)    4  4)
+
+(df f-csrn-hi "cr hi 1u15" (all-mep-core-isas) 15 1 UINT #f #f)
+(df f-csrn-lo "cr lo 4u8"  (all-mep-core-isas)  8 4 UINT #f #f)
+(define-multi-ifield
+  (name f-csrn)
+  (comment "control reg")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-csrn-hi f-csrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-csrn-lo) (and (ifield f-csrn) #xf))
+		    (set (ifield f-csrn-hi) (srl (ifield f-csrn) 4))))
+  (extract (set (ifield f-csrn)
+		(or (sll (ifield f-csrn-hi) 4) (ifield f-csrn-lo))))
+  )
+
+(df f-crnx-hi "crx hi 1u28" (all-mep-core-isas) 28 1 UINT #f #f)
+(df f-crnx-lo "crx lo 4u4"  (all-mep-core-isas)  4 4 UINT #f #f)
+(define-multi-ifield
+  (name f-crnx)
+  (comment "copro register n (0-31)")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-crnx-hi f-crnx-lo)
+  (insert (sequence ()
+		    (set (ifield f-crnx-lo) (and (ifield f-crnx) #xf))
+		    (set (ifield f-crnx-hi) (srl (ifield f-crnx) 4))))
+  (extract (set (ifield f-crnx)
+		(or (sll (ifield f-crnx-hi) 4) (ifield f-crnx-lo))))
+  )
+
+; Miscellaneous fields.
+
+(define-pmacro (dnfb n)
+  (dnf (.sym f- n) (.str "bit " n) (all-mep-isas) n 1))
+
+; Define small fields used throughout the instruction set description.
+; Each field (eg. `f-N') is at single bit field at position N.
+
+(dnfb  0)
+(dnfb  1)
+(dnfb  2)
+(dnfb  3)
+(dnfb  4)
+(dnfb  5)
+(dnfb  6)
+(dnfb  7)
+(dnfb  8)
+(dnfb  9)
+(dnfb  10)
+(dnfb  11)
+(dnfb  12)
+(dnfb  13)
+(dnfb  14)
+(dnfb  15)
+(dnfb  16)
+(dnfb  17)
+(dnfb  18)
+(dnfb  19)
+(dnfb  20)
+(dnfb  21)
+(dnfb  22)
+(dnfb  23)
+(dnfb  24)
+(dnfb  25)
+(dnfb  26)
+(dnfb  27)
+(dnfb  28)
+(dnfb  29)
+(dnfb  30)
+(dnfb  31)
+
+; Branch/Jump target addresses
+
+(df f-8s8a2 "pc-rel addr (8 bits)"    (all-mep-core-isas PCREL-ADDR)  8  7 INT
+    ((value pc) (sra SI (sub SI value    pc) 1))
+    ((value pc) (add SI (sll SI value 1) pc)))
+
+(df f-12s4a2 "pc-rel addr (12 bits)"  (all-mep-core-isas PCREL-ADDR)  4 11 INT
+    ((value pc) (sra SI (sub SI value    pc) 1))
+    ((value pc) (add SI (sll SI value 1) pc)))
+
+(df f-17s16a2 "pc-rel addr (17 bits)" (all-mep-core-isas PCREL-ADDR) 16 16 INT
+    ((value pc) (sra SI (sub SI value    pc) 1))
+    ((value pc) (add SI (sll SI value 1) pc)))
+
+(df f-24s5a2n-hi "24s5a2n hi 16s16" (all-mep-core-isas PCREL-ADDR) 16 16  INT #f #f)
+(df f-24s5a2n-lo "24s5a2n lo 7s5a2" (all-mep-core-isas PCREL-ADDR)  5  7 UINT #f #f)
+(define-multi-ifield
+  (name f-24s5a2n)
+  (comment "pc-rel addr (24 bits align 2)")
+  (attrs all-mep-core-isas PCREL-ADDR)
+  (mode INT)
+  (subfields f-24s5a2n-hi f-24s5a2n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24s5a2n)
+			 (sub (ifield f-24s5a2n) pc))
+		    (set (ifield f-24s5a2n-lo)
+			 (srl (and (ifield f-24s5a2n) #xfe) 1))
+		    (set (ifield f-24s5a2n-hi)
+			 (sra INT (ifield f-24s5a2n) 8))))
+  (extract (set (ifield f-24s5a2n)
+		(add SI (or (sll (ifield f-24s5a2n-hi) 8)
+			    (sll (ifield f-24s5a2n-lo) 1))
+		     pc)))
+  )
+
+(df f-24u5a2n-hi "24u5a2n hi 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-24u5a2n-lo "24u5a2n lo 7u5a2" (all-mep-core-isas)  5  7 UINT #f #f)
+(define-multi-ifield
+  (name f-24u5a2n)
+  (comment "abs jump target (24 bits, alignment 2)")
+  (attrs all-mep-core-isas ABS-ADDR)
+  (mode UINT)
+  (subfields f-24u5a2n-hi f-24u5a2n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u5a2n-lo)
+			 (srl (and (ifield f-24u5a2n) #xff) 1))
+		    (set (ifield f-24u5a2n-hi)
+			 (srl (ifield f-24u5a2n) 8))
+		    ))
+  (extract (set (ifield f-24u5a2n)
+		(or (sll (ifield f-24u5a2n-hi) 8)
+		    (sll (ifield f-24u5a2n-lo) 1))))
+  )
+
+; Displacement fields.
+
+(df f-2u6     "SAR offset (2 bits)"    (all-mep-core-isas)  6  2 UINT #f #f)
+(df f-7u9     "tp-rel b (7 bits)"      (all-mep-core-isas)  9  7 UINT #f #f)
+(df f-7u9a2   "tp-rel h (7 bits)"      (all-mep-core-isas)  9  6 UINT
+    ((value pc) (srl SI value 1))
+    ((value pc) (sll SI value 1)))
+(df f-7u9a4   "tp/sp-rel w (7 bits)"   (all-mep-core-isas)  9  5 UINT
+    ((value pc) (srl SI value 2))
+    ((value pc) (sll SI value 2)))
+(df f-16s16   "general 16-bit s-val"   (all-mep-core-isas) 16 16  INT #f #f)
+
+; Immediate fields.
+
+(df f-2u10   "swi level (2 bits)"      (all-mep-core-isas) 10  2 UINT #f #f)
+(df f-3u5    "bit offset (3 bits)"     (all-mep-core-isas)  5  3 UINT #f #f)
+(df f-4u8    "bCC const (4 bits)"      (all-mep-core-isas)  8  4 UINT #f #f)
+(df f-5u8    "slt & shifts (5 bits)"   (all-mep-core-isas)  8  5 UINT #f #f)
+(df f-5u24   "clip immediate (5 bits)" (all-mep-core-isas) 24  5 UINT #f #f)
+(df f-6s8    "add immediate (6 bits)"  (all-mep-core-isas)  8  6  INT #f #f)
+(df f-8s8    "add imm (8 bits)"        (all-mep-core-isas)  8  8  INT #f #f)
+(df f-16u16  "general 16-bit u-val"    (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-12u16  "cmov fixed 1"            (all-mep-core-isas) 16 12 UINT #f #f)
+(df f-3u29   "cmov fixed 2"            (all-mep-core-isas) 29  3 UINT #f #f)
+
+
+; These are all for the coprocessor opcodes
+
+; The field is like IJKiiiiiii where I and J are toggled if K is set,
+; for compatibility with older cores.
+(define-pmacro (compute-cdisp10 val)
+  (cond SI
+	((and SI (cond SI ((and SI val #x80) (xor SI val #x300)) (else val)) #x200)
+	 (sub (cond SI ((and SI val #x80) (xor SI val #x300)) (else val)) #x400))
+	(else
+	 (cond SI ((and SI val #x80) (xor SI val #x300)) (else val)))
+	)
+  )
+(define-pmacro (extend-cdisp10 val)
+  (cond SI
+	((and SI (compute-cdisp10 val) #x200)
+	 (sub (and SI (compute-cdisp10 val) #x3ff) #x400))
+	(else
+	 (and SI (compute-cdisp10 val) #x3ff))
+	)
+  )
+
+(df f-cdisp10    "cop imm10"          (all-mep-core-isas)   22  10 INT
+    ((value pc) (extend-cdisp10 value))
+    ((value pc) (extend-cdisp10 value))
+    )
+
+; Non-contiguous fields.
+
+(df f-24u8a4n-hi "24u8a4n hi 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-24u8a4n-lo "24u8a4n lo 8u8a4" (all-mep-core-isas)  8  6 UINT #f #f)
+(define-multi-ifield
+  (name f-24u8a4n)
+  (comment "absolute 24-bit address")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-24u8a4n-hi f-24u8a4n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u8a4n-hi) (srl (ifield f-24u8a4n) 8))
+		    (set (ifield f-24u8a4n-lo) (srl (and (ifield f-24u8a4n) #xfc) 2))))
+  (extract (set (ifield f-24u8a4n)
+		(or (sll (ifield f-24u8a4n-hi) 8)
+		    (sll (ifield f-24u8a4n-lo) 2))))
+  )
+
+(df f-24u8n-hi "24u8n hi 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(df f-24u8n-lo "24u8n lo  8u8"  (all-mep-core-isas)  8  8 UINT #f #f)
+(define-multi-ifield
+  (name f-24u8n)
+  (comment "24-bit constant")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-24u8n-hi f-24u8n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u8n-hi) (srl (ifield f-24u8n) 8))
+		    (set (ifield f-24u8n-lo) (and (ifield f-24u8n) #xff))))
+  (extract (set (ifield f-24u8n)
+		(or (sll (ifield f-24u8n-hi) 8)
+		    (ifield f-24u8n-lo))))
+  )
+
+(df f-24u4n-hi "24u4n hi  8u4"  (all-mep-core-isas)  4  8 UINT #f #f)
+(df f-24u4n-lo "24u4n lo 16u16" (all-mep-core-isas) 16 16 UINT #f #f)
+(define-multi-ifield
+  (name f-24u4n)
+  (comment "coprocessor code")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-24u4n-hi f-24u4n-lo)
+  (insert (sequence ()
+		    (set (ifield f-24u4n-hi) (srl (ifield f-24u4n) 16))
+		    (set (ifield f-24u4n-lo) (and (ifield f-24u4n) #xffff))))
+  (extract (set (ifield f-24u4n)
+		(or (sll (ifield f-24u4n-hi) 16)
+		    (ifield f-24u4n-lo))))
+  )
+
+(define-multi-ifield
+  (name f-callnum)
+  (comment "system call number field")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-5 f-6 f-7 f-11)
+  (insert (sequence ()
+		    (set (ifield f-5)  (and (srl (ifield f-callnum) 3) 1))
+		    (set (ifield f-6)  (and (srl (ifield f-callnum) 2) 1))
+		    (set (ifield f-7)  (and (srl (ifield f-callnum) 1) 1))
+		    (set (ifield f-11) (and (ifield f-callnum) 1))))
+  (extract (set (ifield f-callnum)
+		(or (sll (ifield f-5) 3)
+		    (or (sll (ifield f-6) 2)
+			(or (sll (ifield f-7) 1)
+			    (ifield f-11))))))
+  )
+
+(df f-ccrn-hi "ccrn hi  2u28" (all-mep-core-isas) 28 2 UINT #f #f)
+(df f-ccrn-lo "ccrn lo  4u4"  (all-mep-core-isas)  4 4 UINT #f #f)
+(define-multi-ifield
+  (name f-ccrn)
+  (comment "Coprocessor register number field")
+  (attrs all-mep-core-isas)
+  (mode UINT)
+  (subfields f-ccrn-hi f-ccrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-ccrn-hi)  (and (srl (ifield f-ccrn) 4) #x3))
+		    (set (ifield f-ccrn-lo)  (and (ifield f-ccrn) #xf))))
+  (extract (set (ifield f-ccrn)
+		(or (sll (ifield f-ccrn-hi) 4)
+		    (ifield f-ccrn-lo))))
+  )
+
+; Operands.
+
+;; Only LABEL, REGNUM, FMAX_FLOAT and FMAX_INT are now relevant for correct
+;; operation.  The others are mostly kept for backwards compatibility,
+;; although they do affect the dummy prototypes in
+;; gcc/config/mep/intrinsics.h.
+(define-attr
+  (type enum)
+  (for operand)
+  (name CDATA)
+  (comment "datatype to use for C intrinsics mapping")
+  (values LABEL REGNUM FMAX_FLOAT FMAX_INT
+	  POINTER LONG ULONG SHORT USHORT CHAR UCHAR CP_DATA_BUS_INT)
+  (default LONG))
+
+(define-attr
+  (type enum)
+  (for insn)
+  (name CPTYPE)
+  (comment "datatype to use for coprocessor values")
+  (values CP_DATA_BUS_INT VECT V2SI V4HI V8QI V2USI V4UHI V8UQI)
+  (default CP_DATA_BUS_INT))
+
+(define-attr
+  (type enum)
+  (for insn)
+  (name CRET)
+  ;; VOID - all arguments are passed as parameters; if any are written, pointers to them are passed.
+  ;; FIRST - the first argument is the return value.
+  ;; FIRSTCOPY - the first argument is the return value, but a copy is also the first parameter.
+  (values VOID FIRST FIRSTCOPY)
+  (default VOID)
+  (comment "Insn's intrinsic returns void, or the first argument rather than (or in addition to) passing it."))
+
+(define-attr
+  (type integer)
+  (for operand)
+  (name ALIGN)
+  (comment "alignment of immediate operands")
+  (default 1))
+
+(define-attr
+  (for operand)
+  (type boolean)
+  (name RELOC_IMPLIES_OVERFLOW)
+  (comment "Operand should not be considered as a candidate for relocs"))
+
+(define-attr
+  (for hardware)
+  (type boolean)
+  (name IS_FLOAT)
+  (comment "Register contains a floating point value"))
+
+(define-pmacro (dpop name commment attrib hwr field func)
+  (define-full-operand name comment attrib
+    hwr DFLT field ((parse func)) () ()))
+(define-pmacro (dprp name commment attrib hwr field pafunc prfunc)
+  (define-full-operand name comment attrib
+    hwr DFLT field ((parse pafunc) (print prfunc)) () ()))
+
+(dnop r0        "register 0"              (all-mep-core-isas) h-gpr   0)
+(dnop rn        "register Rn"             (all-mep-core-isas) h-gpr   f-rn)
+(dnop rm        "register Rm"             (all-mep-core-isas) h-gpr   f-rm)
+(dnop rl        "register Rl"             (all-mep-core-isas) h-gpr   f-rl)
+(dnop rn3       "register 0-7"            (all-mep-core-isas) h-gpr   f-rn3)
+
+;; Variants of RM/RN with different CDATA attributes.  See comment above
+;; CDATA for more details.
+
+(dnop rma       "register Rm holding pointer"          (all-mep-core-isas (CDATA POINTER)) h-gpr   f-rm)
+
+(dnop rnc       "register Rn holding char"             (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnuc      "register Rn holding unsigned char"    (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rns       "register Rn holding short"            (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnus      "register Rn holding unsigned short"   (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnl       "register Rn holding long"             (all-mep-core-isas (CDATA LONG))    h-gpr   f-rn)
+(dnop rnul      "register Rn holding unsigned  long"   (all-mep-core-isas (CDATA ULONG))   h-gpr   f-rn)
+
+(dnop rn3c       "register 0-7 holding unsigned char"    (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3uc      "register 0-7 holding byte"             (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3s       "register 0-7 holding unsigned short"   (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3us      "register 0-7 holding short"            (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3l       "register 0-7 holding unsigned long"    (all-mep-core-isas (CDATA LONG))  h-gpr   f-rn3)
+(dnop rn3ul      "register 0-7 holding long"             (all-mep-core-isas (CDATA ULONG)) h-gpr   f-rn3)
+
+
+(dnop lp        "link pointer"            (all-mep-core-isas) h-csr   1)
+(dnop sar       "shift amount register"   (all-mep-core-isas) h-csr   2)
+(dnop hi        "high result"             (all-mep-core-isas) h-csr   7)
+(dnop lo        "low result"              (all-mep-core-isas) h-csr   8)
+(dnop mb0       "modulo begin register 0" (all-mep-core-isas) h-csr  12)
+(dnop me0       "modulo end register 0"   (all-mep-core-isas) h-csr  13)
+(dnop mb1       "modulo begin register 1" (all-mep-core-isas) h-csr  14)
+(dnop me1       "modulo end register 1"   (all-mep-core-isas) h-csr  15)
+(dnop psw       "program status word"     (all-mep-core-isas) h-csr  16)
+(dnop epc	"exception prog counter"  (all-mep-core-isas) h-csr  19)
+(dnop exc       "exception cause"         (all-mep-core-isas) h-csr  20)
+(dnop npc       "nmi program counter"     (all-mep-core-isas) h-csr  23)
+(dnop dbg       "debug register"          (all-mep-core-isas) h-csr  24)
+(dnop depc      "debug exception pc"      (all-mep-core-isas) h-csr  25)
+(dnop opt       "option register"         (all-mep-core-isas) h-csr  26)
+(dnop r1        "register 1"              (all-mep-core-isas) h-gpr   1)
+(dnop tp        "tiny data area pointer"  (all-mep-core-isas) h-gpr  13)
+(dnop sp        "stack pointer"           (all-mep-core-isas) h-gpr  15)
+(dprp tpr       "TP register"             (all-mep-core-isas) h-gpr  13       "tpreg" "tpreg")
+(dprp spr       "SP register"             (all-mep-core-isas) h-gpr  15       "spreg" "spreg")
+
+(define-full-operand
+  csrn "control/special register" (all-mep-core-isas (CDATA REGNUM)) h-csr
+  DFLT f-csrn ((parse "csrn")) () ()
+)
+
+(dnop csrn-idx  "control/special reg idx" (all-mep-core-isas) h-uint  f-csrn)
+(dnop crn64     "copro Rn (64-bit)"       (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr64  f-crn)
+(dnop crn       "copro Rn (32-bit)"       (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr    f-crn)
+(dnop crnx64    "copro Rn (0-31, 64-bit)" (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr64  f-crnx)
+(dnop crnx      "copro Rn (0-31, 32-bit)" (all-mep-core-isas (CDATA CP_DATA_BUS_INT)) h-cr    f-crnx)
+(dnop ccrn      "copro control reg CCRn"  (all-mep-core-isas (CDATA REGNUM)) h-ccr   f-ccrn)
+(dnop cccc      "copro flags"             (all-mep-core-isas) h-uint  f-rm)
+
+(dprp pcrel8a2  "pc-rel addr (8 bits)"    (all-mep-core-isas (CDATA LABEL) RELAX) h-sint  f-8s8a2   "mep_align" "address")
+(dprp pcrel12a2 "pc-rel addr (12 bits)"   (all-mep-core-isas (CDATA LABEL) RELAX) h-sint  f-12s4a2  "mep_align" "address")
+(dprp pcrel17a2 "pc-rel addr (17 bits)"   (all-mep-core-isas (CDATA LABEL) RELAX) h-sint  f-17s16a2 "mep_align" "address")
+(dprp pcrel24a2 "pc-rel addr (24 bits)"   (all-mep-core-isas (CDATA LABEL))       h-sint  f-24s5a2n "mep_align" "address")
+(dprp pcabs24a2 "pc-abs addr (24 bits)"   (all-mep-core-isas (CDATA LABEL))       h-uint  f-24u5a2n "mep_alignu" "address")
+
+(dpop sdisp16   "displacement (16 bits)"  (all-mep-core-isas) h-sint  f-16s16    "signed16")
+(dpop simm16    "signed imm (16 bits)"    (all-mep-core-isas) h-sint  f-16s16    "signed16")
+(dpop uimm16    "unsigned imm (16 bits)"  (all-mep-core-isas) h-uint  f-16u16    "unsigned16")
+(dnop code16    "uci/dsp code (16 bits)"  (all-mep-core-isas) h-uint  f-16u16)
+
+(dnop udisp2    "SSARB addend (2 bits)"   (all-mep-core-isas) h-sint  f-2u6)
+(dnop uimm2     "interrupt (2 bits)"      (all-mep-core-isas) h-uint  f-2u10)
+
+(dnop simm6     "add const (6 bits)"      (all-mep-core-isas) h-sint  f-6s8)
+(dnop simm8     "mov const (8 bits)"      (all-mep-core-isas RELOC_IMPLIES_OVERFLOW) 
+                                             h-sint  f-8s8)
+
+(dpop addr24a4  "sw/lw addr (24 bits)"    (all-mep-core-isas (ALIGN 4)) h-uint  f-24u8a4n  "mep_alignu")
+(dnop code24    "coprocessor code"        (all-mep-core-isas) h-uint  f-24u4n)
+
+(dnop callnum   "system call number"      (all-mep-core-isas) h-uint  f-callnum)
+(dnop uimm3     "bit immediate (3 bits)"  (all-mep-core-isas) h-uint  f-3u5)
+(dnop uimm4     "bCC const (4 bits)"      (all-mep-core-isas) h-uint  f-4u8)
+(dnop uimm5     "bit/shift val (5 bits)"  (all-mep-core-isas) h-uint  f-5u8)
+
+(dpop udisp7    "tp-rel b (7 bits)"       (all-mep-core-isas)           h-uint  f-7u9      "unsigned7")
+(dpop udisp7a2  "tp-rel h (7 bits)"       (all-mep-core-isas (ALIGN 2)) h-uint  f-7u9a2    "unsigned7")
+(dpop udisp7a4  "tp/sp-rel w (7 bits)"    (all-mep-core-isas (ALIGN 4)) h-uint  f-7u9a4    "unsigned7")
+(dpop uimm7a4   "sp w-addend (7 bits)"    (all-mep-core-isas (ALIGN 4)) h-uint  f-7u9a4    "mep_alignu")
+
+(dnop uimm24    "immediate (24 bits)"     (all-mep-core-isas) h-uint  f-24u8n)
+
+(dnop cimm4     "cache immed'te (4 bits)" (all-mep-core-isas) h-uint  f-rn)
+(dnop cimm5     "clip immediate (5 bits)" (all-mep-core-isas) h-uint  f-5u24)
+
+(dpop cdisp10   "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+(dpop cdisp10a2 "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+(dpop cdisp10a4 "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+(dpop cdisp10a8 "copro addend (8/10 bits)" (all-mep-core-isas) h-sint  f-cdisp10  "cdisp10")
+
+; Special operand representing the various ways that the literal zero can be
+; specified.
+(define-full-operand
+  zero "Zero operand" (all-mep-core-isas) h-sint DFLT f-nil
+  ((parse "zero")) () ()
+)
+
+; Attributes.
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_BIT_INSN)
+  (comment "optional bit manipulation instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_MUL_INSN)
+  (comment "optional 32-bit multiply instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_DIV_INSN)
+  (comment "optional 32-bit divide instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_DEBUG_INSN)
+  (comment "optional debug instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_LDZ_INSN)
+  (comment "optional leading zeroes instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_ABS_INSN)
+  (comment "optional absolute difference instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_AVE_INSN)
+  (comment "optional average instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_MINMAX_INSN)
+  (comment "optional min/max instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_CLIP_INSN)
+  (comment "optional clipping instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_SAT_INSN)
+  (comment "optional saturation instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_UCI_INSN)
+  (comment "optional UCI instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_DSP_INSN)
+  (comment "optional DSP instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_CP_INSN)
+  (comment "optional coprocessor-related instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_CP64_INSN)
+  (comment "optional coprocessor-related 64 data bit instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name OPTIONAL_VLIW64)
+  (comment "optional vliw64 mode (vliw32 is default)"))
+
+(define-attr
+  (for insn)
+  (type enum)
+  (name STALL)
+  (attrs META)
+  (values NONE SHIFTI INT2 LOAD STORE LDC STC LDCB STCB SSARB FSFT RET
+	  ADVCK MUL MULR DIV)
+  (default NONE)
+  (comment "gcc stall attribute"))
+
+(define-attr
+  (for insn)
+  (type string)
+  (name INTRINSIC)
+  (attrs META)
+  (comment "gcc intrinsic name"))
+
+(define-attr
+  (for insn)
+  (type enum)
+  (name SLOT)
+  (attrs META)
+  (values NONE C3 V1 V3 P0S P0 P1)
+  (default NONE)
+  (comment "coprocessor slot type"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name MAY_TRAP)
+  (comment "instruction may generate an exception"))
+
+; Attributes for scheduling restrictions in vliw mode
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW_ALONE)
+  (comment "instruction can be scheduled alone in vliw mode"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW_NO_CORE_NOP)
+  (comment "there is no corresponding nop core instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW_NO_COP_NOP)
+  (comment "there is no corresponding nop coprocessor instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW64_NO_MATCHING_NOP)
+  (comment "there is no corresponding nop coprocessor instruction"))
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VLIW32_NO_MATCHING_NOP)
+  (comment "there is no corresponding nop coprocessor instruction"))
+
+(define-attr
+  (for insn)
+  (type boolean)
+  (name VOLATILE)
+  (comment "Insn is volatile."))
+
+(define-attr
+  (for insn)
+  (type integer)
+  (name LATENCY)
+  (comment "The latency of this insn, used for scheduling as an intrinsic in gcc")
+  (default 0))
+
+; The MeP config tool will edit this.
+(define-attr
+  (type enum)
+  (for insn)
+  (name CONFIG)
+  (values NONE ; config-attr-start
+	default
+	  ) ; config-attr-end
+)
+
+
+; Enumerations.
+
+(define-normal-insn-enum major "major opcodes" (all-mep-core-isas) MAJ_
+  f-major
+  (.map .str (.iota 16))
+)
+
+
+(define-pmacro (dni-isa xname xcomment xattrs xsyntax xformat xsemantics xtiming isa)
+  (define-insn          
+    (name xname)        
+    (comment xcomment)
+    (.splice attrs (.unsplice xattrs) (ISA isa))
+    (syntax xsyntax)
+    (format xformat)
+    (semantics xsemantics)
+    (.splice timing (.unsplice xtiming))
+    )
+) 
+  
+(define-pmacro (dnmi-isa xname xcomment xattrs xsyntax xemit isa)
+  (dnmi xname xcomment (.splice (.unsplice xattrs) (ISA isa)) xsyntax xemit)
+)
+
+; For making profiling calls and dynamic configuration
+(define-pmacro (cg-profile caller callee)
+  (c-call "cg_profile" caller callee)
+)
+; For dynamic configuration only
+(define-pmacro (cg-profile-jump caller callee)
+  (c-call "cg_profile_jump" caller callee)
+)
+
+; For defining Core Instructions
+(define-pmacro (dnci xname xcomment xattrs xsyntax xformat xsemantics xtiming)
+  (dni-isa xname xcomment xattrs xsyntax xformat xsemantics xtiming all-core-isa-list)
+)
+(define-pmacro (dncmi xname xcomment xattrs xsyntax xemit)
+  (dnmi-isa xname xcomment xattrs xsyntax xemit all-core-isa-list)
+)
+
+; For defining Coprocessor Instructions
+;(define-pmacro (dncpi xname xcomment xattrs xsyntax xformat xsemantics xtiming)  (dni-isa xname xcomment xattrs xsyntax xformat xsemantics xtiming cop)
+;)
+
+;; flag setting macro
+(define-pmacro (set-bit xop xbitnum xval) 
+  (set xop (or 
+	    (and xop (inv (sll 1 xbitnum)))
+	    (and (sll 1 xbitnum) (sll xval xbitnum)))))
+
+;; some flags we commonly use in vliw reasoning / mode-switching etc.
+(define-pmacro (get-opt.vliw64) (and (srl opt 6) 1))
+(define-pmacro (get-opt.vliw32) (and (srl opt 5) 1))
+(define-pmacro (get-rm.lsb) (and rm 1))
+(define-pmacro (get-psw.om) (and (srl psw 12) 1))
+(define-pmacro (get-psw.nmi) (and (srl psw 9) 1))
+(define-pmacro (get-psw.iep) (and (srl psw 1) 1))
+(define-pmacro (get-psw.ump) (and (srl psw 3) 1))
+(define-pmacro (get-epc.etom) (and epc 1))
+(define-pmacro (get-npc.ntom) (and npc 1))
+(define-pmacro (get-lp.ltom) (and lp 1))
+
+(define-pmacro (set-psw.om zval) (set-bit (raw-reg h-csr 16) 12 zval))
+(define-pmacro (set-psw.nmi zval) (set-bit (raw-reg h-csr 16) 9 zval))
+(define-pmacro (set-psw.umc zval) (set-bit (raw-reg h-csr 16) 2 zval))
+(define-pmacro (set-psw.iec zval) (set-bit (raw-reg h-csr 16) 0 zval))
+(define-pmacro (set-rpe.elr zval) (set-bit (raw-reg h-csr 5) 0 zval))
+
+
+;; the "3 way switch" depending on our current operating mode and vliw status flags
+(define-pmacro (core-vliw-switch core-rtl vliw32-rtl vliw64-rtl) 
+  (cond
+   ((andif (get-psw.om) (get-opt.vliw64)) vliw64-rtl)
+   ((andif (get-psw.om) (get-opt.vliw32)) vliw32-rtl)
+   (else core-rtl)))
+
+;; the varying-pcrel idiom
+(define-pmacro (set-vliw-modified-pcrel-offset xtarg xa xb xc)
+  (core-vliw-switch (set xtarg (add pc xa))
+		    (set xtarg (add pc xb))
+		    (set xtarg (add pc xc))))
+
+;; the increasing-alignment idiom in branch displacements
+(define-pmacro (set-vliw-alignment-modified xtarg zaddr)
+  (core-vliw-switch (set xtarg (and zaddr (inv 1)))
+		    (set xtarg (and zaddr (inv 3)))
+		    (set xtarg (and zaddr (inv 7)))))
+
+;; the increasing-alignment idiom in option-only form
+(define-pmacro (set-vliw-aliignment-modified-by-option xtarg zaddr)
+  (if (get-opt.vliw32)
+      (set xtarg (and zaddr (inv 3)))
+      (set xtarg (and zaddr (inv 7)))))
+
+
+
+; pmacros needed for coprocessor modulo addressing.
+
+; Taken from supplement ``The operation of the modulo addressing'' in
+; Toshiba documentation rev 2.2, p. 34.
+
+(define-pmacro (compute-mask0)
+  (sequence SI ((SI temp))
+    (set temp (or mb0 me0))
+    (srl (const SI -1) (c-call SI "do_ldz" temp))))
+
+(define-pmacro (mod0 immed)
+  (sequence SI ((SI modulo-mask))
+	    (set modulo-mask (compute-mask0))
+	    (if SI (eq (and rma modulo-mask) me0)
+		(or (and rma (inv modulo-mask)) mb0)
+		(add rma (ext SI immed)))))
+
+(define-pmacro (compute-mask1)
+  (sequence SI ((SI temp))
+    (set temp (or mb1 me1))
+    (srl (const SI -1) (c-call SI "do_ldz" temp))))
+
+(define-pmacro (mod1 immed)
+  (sequence SI ((SI modulo-mask))
+	    (set modulo-mask (compute-mask1))
+	    (if SI (eq (and rma modulo-mask) me1)
+		(or (and rma (inv modulo-mask)) mb1)
+		(add rma (ext SI immed)))))
+
+
+; Instructions.
+
+; A pmacro for use in semantic bodies of unimplemented insns.
+(define-pmacro (unimp mnemonic) (nop))
+
+; Core specific instructions
+; (include "mep-h1.cpu") ; -- exposed by MeP-Integrator
+(include "mep-c5.cpu") ; -- exposed by MeP-Integrator
+
+; Load/store instructions.
+
+(dnci sb "store byte (register indirect)" ((STALL STORE))
+     "sb $rnc,($rma)"
+     (+ MAJ_0 rnc rma (f-sub4 8))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem UQI rma) (and rnc #xff)))
+     ((mep (unit u-use-gpr (in usereg rnc))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sh "store half-word (register indirect)" ((STALL STORE))
+     "sh $rns,($rma)"
+     (+ MAJ_0 rns rma (f-sub4 9))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and rma (inv 1)))
+	       (set (mem UHI (and rma (inv 1))) (and rns #xffff)))
+     ((mep (unit u-use-gpr (in usereg rns))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sw "store word (register indirect)" ((STALL STORE))
+     "sw $rnl,($rma)"
+     (+ MAJ_0 rnl rma (f-sub4 10))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and rma (inv 3)))
+	       (set (mem USI (and rma (inv 3))) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lb "load byte (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lb $rnc,($rma)"
+     (+ MAJ_0 rnc rma (f-sub4 12))
+     (set rnc (ext SI (mem QI rma)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnc)))))
+
+(dnci lh "load half-word (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lh $rns,($rma)"
+     (+ MAJ_0 rns rma (f-sub4 13))
+     (set rns (ext SI (mem HI (and rma (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rns)))))
+
+(dnci lw "load word (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,($rma)"
+     (+ MAJ_0 rnl rma (f-sub4 14))
+     (set rnl (mem SI (and rma (inv 3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+(dnci lbu "load unsigned byte (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lbu $rnuc,($rma)"
+     (+ MAJ_0 rnuc rma (f-sub4 11))
+     (set rnuc (zext SI (mem UQI rma)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnuc)))))
+
+(dnci lhu "load unsigned half-word (register indirect)" ((STALL LOAD) (LATENCY 2))
+     "lhu $rnus,($rma)"
+     (+ MAJ_0 rnus rma (f-sub4 15))
+     (set rnus (zext SI (mem UHI (and rma (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnus)))))
+
+(dnci sw-sp "store word (sp relative)" ((STALL STORE))
+     "sw $rnl,$udisp7a4($spr)"
+     (+ MAJ_4 rnl (f-8 0) udisp7a4 (f-sub2 2))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add udisp7a4 sp) (inv 3)))
+	       (set (mem SI (and (add udisp7a4 sp) (inv 3))) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-use-gpr (in usereg sp))
+	   (unit u-exec))))
+
+
+(dnci lw-sp "load word (sp relative)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,$udisp7a4($spr)"
+     (+ MAJ_4 rnl (f-8 0) udisp7a4 (f-sub2 3))
+     (set rnl (mem SI (and (add udisp7a4 sp) (inv 3))))
+     ((mep (unit u-use-gpr (in usereg sp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+(dnci sb-tp "store byte (tp relative)" ((STALL STORE))
+     "sb $rn3c,$udisp7($tpr)"
+     (+ MAJ_8 (f-4 0) rn3c (f-8 0) udisp7)
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (add (zext SI udisp7) tp))
+	       (set (mem QI (add (zext SI udisp7) tp)) (and rn3c #xff)))
+     ((mep (unit u-use-gpr (in usereg rn3c))
+	   (unit u-use-gpr (in usereg tp))
+	   (unit u-exec))))
+
+(dnci sh-tp "store half-word (tp relative)" ((STALL STORE))
+     "sh $rn3s,$udisp7a2($tpr)"
+     (+ MAJ_8 (f-4 0) rn3s (f-8 1) udisp7a2 (f-15 0))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add (zext SI udisp7a2) tp) (inv 1)))
+	       (set (mem HI (and (add (zext SI udisp7a2) tp) (inv 1))) (and rn3s #xffff)))
+     ((mep (unit u-use-gpr (in usereg rn3s))
+	   (unit u-use-gpr (in usereg tp))
+	   (unit u-exec))))
+
+(dnci sw-tp "store word (tp relative)" ((STALL STORE))
+     "sw $rn3l,$udisp7a4($tpr)"
+     (+ MAJ_4 (f-4 0) rn3l (f-8 1) udisp7a4 (f-sub2 2))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add (zext SI udisp7a4) tp) (inv 3)))
+	       (set (mem SI (and (add (zext SI udisp7a4) tp) (inv 3))) rn3l))
+     ((mep (unit u-use-gpr (in usereg rn3l))
+	   (unit u-use-gpr (in usereg tp))
+	   (unit u-exec))))
+
+(dnci lb-tp "load byte (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lb $rn3c,$udisp7($tpr)"
+     (+ MAJ_8 (f-4 1) rn3c (f-8 0) udisp7)
+     (set rn3c (ext SI (mem QI (add (zext SI udisp7) tp))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3c)))))
+
+(dnci lh-tp "load half-word (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lh $rn3s,$udisp7a2($tpr)"
+     (+ MAJ_8 (f-4 1) rn3s (f-8 1) udisp7a2 (f-15 0))
+     (set rn3s (ext SI (mem HI (and (add (zext SI udisp7a2) tp) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3s)))))
+
+(dnci lw-tp "load word (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lw $rn3l,$udisp7a4($tpr)"
+     (+ MAJ_4 (f-4 0) rn3l (f-8 1) udisp7a4 (f-sub2 3))
+     (set rn3l (mem SI (and (add (zext SI udisp7a4) tp) (inv 3))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3l)))))
+
+(dnci lbu-tp "load unsigned byte (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lbu $rn3uc,$udisp7($tpr)"
+     (+ MAJ_4 (f-4 1) rn3uc (f-8 1) udisp7)
+     (set rn3uc (zext SI (mem QI (add (zext SI udisp7) tp))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3uc)))))
+
+(dnci lhu-tp "load unsigned half-word (tp relative)" ((STALL LOAD) (LATENCY 2))
+     "lhu $rn3us,$udisp7a2($tpr)"
+     (+ MAJ_8 (f-4 1) rn3us (f-8 1) udisp7a2 (f-15 1))
+     (set rn3us (zext SI (mem HI (and (add (zext SI udisp7a2) tp) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg tp))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rn3us)))))
+
+(dnci sb16 "store byte (16 bit displacement)" ((STALL STORE))
+     "sb $rnc,$sdisp16($rma)"
+     (+ MAJ_12 rnc rma (f-sub4 8) sdisp16)
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (add rma (ext SI sdisp16)))
+	       (set (mem QI (add rma (ext SI sdisp16))) (and rnc #xff)))
+     ((mep (unit u-use-gpr (in usereg rnc))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sh16 "store half-word (16 bit displacement)" ((STALL STORE))
+     "sh $rns,$sdisp16($rma)"
+     (+ MAJ_12 rns rma (f-sub4 9) sdisp16)
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (and (add rma (ext SI sdisp16)) (inv 1)))
+	       (set (mem HI (and (add rma (ext SI sdisp16)) (inv 1))) (and rns #xffff)))
+     ((mep (unit u-use-gpr (in usereg rns))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sw16 "store word (16 bit displacement)" ((STALL STORE))
+     "sw $rnl,$sdisp16($rma)"
+     (+ MAJ_12 rnl rma (f-sub4 10) sdisp16)
+     (sequence ()
+	       (c-call "check_write_to_text" (and (add rma (ext SI sdisp16)) (inv 3)))
+	       (set (mem SI (and (add rma (ext SI sdisp16)) (inv 3))) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lb16 "load byte (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lb $rnc,$sdisp16($rma)"
+     (+ MAJ_12 rnc rma (f-sub4 12) sdisp16)
+     (set rnc (ext SI (mem QI (add rma (ext SI sdisp16)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnc)))))
+
+(dnci lh16 "load half-word (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lh $rns,$sdisp16($rma)"
+     (+ MAJ_12 rns rma (f-sub4 13) sdisp16)
+     (set rns (ext SI (mem HI (and (add rma (ext SI sdisp16)) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rns)))))
+
+(dnci lw16 "load word (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,$sdisp16($rma)"
+     (+ MAJ_12 rnl rma (f-sub4 14) sdisp16)
+     (set rnl (mem SI (and (add rma (ext SI sdisp16)) (inv 3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+(dnci lbu16 "load unsigned byte (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lbu $rnuc,$sdisp16($rma)"
+     (+ MAJ_12 rnuc rma (f-sub4 11) sdisp16)
+     (set rnuc (zext SI (mem QI (add rma (ext SI sdisp16)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnuc)))))
+
+(dnci lhu16 "load unsigned half-word (16 bit displacement)" ((STALL LOAD) (LATENCY 2))
+     "lhu $rnus,$sdisp16($rma)"
+     (+ MAJ_12 rnus rma (f-sub4 15) sdisp16)
+     (set rnus (zext SI (mem HI (and (add rma (ext SI sdisp16)) (inv 1)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnus)))))
+
+(dnci sw24 "store word (24 bit absolute addressing)" ((STALL STORE))
+     "sw $rnl,($addr24a4)"
+     (+ MAJ_14 rnl addr24a4 (f-sub2 2))
+     (sequence ()
+	       (c-call VOID "check_write_to_text" (zext SI addr24a4))
+	       (set (mem SI (zext SI addr24a4)) rnl))
+     ((mep (unit u-use-gpr (in usereg rnl))
+	   (unit u-exec))))
+
+(dnci lw24 "load word (24 bit absolute addressing)" ((STALL LOAD) (LATENCY 2))
+     "lw $rnl,($addr24a4)"
+     (+ MAJ_14 rnl addr24a4 (f-sub2 3))
+     (set rnl (mem SI (zext SI addr24a4)))
+     ((mep (unit u-exec)
+	   (unit u-load-gpr (out loadreg rnl)))))
+
+
+; Extension instructions.
+
+(dnci extb "sign extend byte" ()
+     "extb $rn"
+     (+ MAJ_1 rn (f-rm 0) (f-sub4 13))
+     (set rn (ext SI (and QI rn #xff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci exth "sign extend half-word" ()
+     "exth $rn"
+     (+ MAJ_1 rn (f-rm 2) (f-sub4 13))
+     (set rn (ext SI (and HI rn #xffff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci extub "zero extend byte" ()
+     "extub $rn"
+     (+ MAJ_1 rn (f-rm 8) (f-sub4 13))
+     (set rn (zext SI (and rn #xff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci extuh "zero extend half-word" ()
+     "extuh $rn"
+     (+ MAJ_1 rn (f-rm 10) (f-sub4 13))
+     (set rn (zext SI (and rn #xffff)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Shift amount manipulation instructions.
+
+(dnci ssarb "set sar to bytes" ((STALL SSARB) VOLATILE)
+     "ssarb $udisp2($rm)"
+     (+ MAJ_1 (f-4 0) (f-5 0) udisp2 rm (f-sub4 12))
+     (if (c-call BI "big_endian_p")
+         (set sar (zext SI (mul (and (add udisp2 rm) 3) 8)))
+         (set sar (sub 32 (zext SI (mul (and (add udisp2 rm) 3) 8)))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Move instructions.
+
+(dnci mov "move" ()
+     "mov $rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 0))
+     (set rn rm)
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci movi8 "move 8-bit immediate" ()
+     "mov $rn,$simm8"
+     (+ MAJ_5 rn simm8)
+     (set rn (ext SI simm8))
+     ())
+
+(dnci movi16 "move 16-bit immediate" ()
+     "mov $rn,$simm16"
+     (+ MAJ_12 rn (f-rm 0) (f-sub4 1) simm16)
+     (set rn (ext SI simm16))
+     ())
+
+(dnci movu24 "move 24-bit unsigned immediate" ()
+     "movu $rn3,$uimm24"
+     (+ MAJ_13 (f-4 0) rn3 uimm24)
+     (set rn3 (zext SI uimm24))
+     ())
+
+(dnci movu16 "move 16-bit unsigned immediate" ()
+     "movu $rn,$uimm16"
+     (+ MAJ_12 rn (f-rm 1) (f-sub4 1) uimm16)
+     (set rn (zext SI uimm16))
+     ())
+
+(dnci movh "move high 16-bit immediate" ()
+     "movh $rn,$uimm16"
+     (+ MAJ_12 rn (f-rm 2) (f-sub4 1) uimm16)
+     (set rn (sll uimm16 16))
+     ())
+
+
+; Arithmetic instructions.
+
+(dnci add3 "add three registers" ()
+     "add3 $rl,$rn,$rm"
+     (+ MAJ_9 rn rm rl)
+     (set rl (add rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci add "add" ()
+     "add $rn,$simm6"
+     (+ MAJ_6 rn simm6 (f-sub2 0))
+     (set rn (add rn (ext SI simm6)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci add3i "add two registers and immediate" ()
+     "add3 $rn,$spr,$uimm7a4"
+     (+ MAJ_4 rn (f-8 0) uimm7a4 (f-sub2 0))
+     (set rn (add sp (zext SI uimm7a4)))
+     ((mep (unit u-use-gpr (in usereg sp))
+	   (unit u-exec))))
+
+(dnci advck3 "add overflow check" ((STALL ADVCK))
+     "advck3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 7))
+     (if (add-oflag rn rm 0)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sub "subtract" ()
+     "sub $rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 4))
+     (set rn (sub rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm)))))
+
+(dnci sbvck3 "subtraction overflow check" ((STALL ADVCK))
+     "sbvck3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 5))
+     (if (sub-oflag rn rm 0)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci neg "negate" ()
+     "neg $rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 1))
+     (set rn (neg rm))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci slt3 "set if less than" ()
+     "slt3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 2))
+     (if (lt rn rm)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sltu3 "set less than unsigned" ()
+     "sltu3 \\$0,$rn,$rm"
+     (+ MAJ_0 rn rm (f-sub4 3))
+     (if (ltu rn rm)
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci slt3i "set if less than immediate" ()
+     "slt3 \\$0,$rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 1))
+     (if (lt rn (zext SI uimm5))
+	 (set r0 1)
+	 (set r0 0))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci sltu3i "set if less than unsigned immediate" ()
+     "sltu3 \\$0,$rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 5))
+     (if (ltu rn (zext SI uimm5))
+	 (set r0 1)
+	 (set r0 0))
+     ())
+
+(dnci sl1ad3 "shift left one and add" ((STALL INT2))
+     "sl1ad3 \\$0,$rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 6))
+     (set r0 (add (sll rn 1) rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sl2ad3 "shift left two and add" ((STALL INT2))
+     "sl2ad3 \\$0,$rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 7))
+     (set r0 (add (sll rn 2) rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci add3x "three operand add (extended)" ()
+     "add3 $rn,$rm,$simm16"
+     (+ MAJ_12 rn rm (f-sub4 0) simm16)
+     (set rn (add rm (ext SI simm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci slt3x "set if less than (extended)" ()
+     "slt3 $rn,$rm,$simm16"
+     (+ MAJ_12 rn rm (f-sub4 2) simm16)
+     (if (lt rm (ext SI simm16))
+	 (set rn 1)
+	 (set rn 0))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sltu3x "set if less than unsigned (extended)" ()
+     "sltu3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 3) uimm16)
+     (if (ltu rm (zext SI uimm16))
+	 (set rn 1)
+	 (set rn 0))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Logical instructions.
+
+(dnci or "bitwise or" ()
+     "or $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 0))
+     (set rn (or rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci and "bitwise and" ()
+     "and $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 1))
+     (set rn (and rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci xor "bitwise exclusive or" ()
+     "xor $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 2))
+     (set rn (xor rn rm))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci nor "bitwise negated or" ()
+     "nor $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 3))
+     (set rn (inv (or rn rm)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci or3 "or three operand" ()
+     "or3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 4) uimm16)
+     (set rn (or rm (zext SI uimm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci and3 "and three operand" ()
+     "and3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 5) uimm16)
+     (set rn (and rm (zext SI uimm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci xor3 "exclusive or three operand" ()
+     "xor3 $rn,$rm,$uimm16"
+     (+ MAJ_12 rn rm (f-sub4 6) uimm16)
+     (set rn (xor rm (zext SI uimm16)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Shift instructions.
+
+(dnci sra "shift right arithmetic" ((STALL INT2))
+     "sra $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 13))
+     (set rn (sra rn (and rm #x1f)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci srl "shift right logical" ((STALL INT2))
+     "srl $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 12))
+     (set rn (srl rn (and rm #x1f)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci sll "shift left logical" ((STALL INT2))
+     "sll $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 14))
+     (set rn (sll rn (and rm #x1f)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+(dnci srai "shift right arithmetic (immediate)" ((STALL SHIFTI))
+     "sra $rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 3))
+     (set rn (sra rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci srli "shift right logical (immediate)" ((STALL SHIFTI))
+     "srl $rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 2))
+     (set rn (srl rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci slli "shift left logical (immediate)" ((STALL SHIFTI))
+     "sll $rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 6))
+     (set rn (sll rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci sll3 "three-register shift left logical" ((STALL INT2))
+     "sll3 \\$0,$rn,$uimm5"
+     (+ MAJ_6 rn uimm5 (f-sub3 7))
+     (set r0 (sll rn uimm5))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci fsft "field shift" ((STALL FSFT) VOLATILE)
+     "fsft $rn,$rm"
+     (+ MAJ_2 rn rm (f-sub4 15))
+     (sequence ((DI temp) (QI shamt))
+	       (set shamt (and sar #x3f))
+	       (set temp (sll (or (sll (zext DI rn) 32) (zext DI rm)) shamt))
+	       (set rn (subword SI (srl temp 32) 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Branch/jump instructions.
+
+(dnci bra "branch" (RELAXABLE)
+     "bra $pcrel12a2"
+     (+ MAJ_11 pcrel12a2 (f-15 0))
+     (set-vliw-alignment-modified pc pcrel12a2)
+     ((mep (unit u-branch)
+	   (unit u-exec))))
+
+(dnci beqz "branch if equal zero" (RELAXABLE)
+     "beqz $rn,$pcrel8a2"
+     (+ MAJ_10 rn pcrel8a2 (f-15 0))
+     (if (eq rn 0)
+	 (set-vliw-alignment-modified pc pcrel8a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bnez "branch if not equal zero" (RELAXABLE)
+     "bnez $rn,$pcrel8a2"
+     (+ MAJ_10 rn pcrel8a2 (f-15 1))
+     (if (ne rn 0)
+	 (set-vliw-alignment-modified pc pcrel8a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci beqi "branch equal immediate" (RELAXABLE)
+     "beqi $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 0) pcrel17a2)
+     (if (eq rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bnei "branch not equal immediate" (RELAXABLE)
+     "bnei $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 4) pcrel17a2)
+     (if (ne rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci blti "branch less than immediate" (RELAXABLE)
+     "blti $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 12) pcrel17a2)
+     (if (lt rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bgei "branch greater than immediate" (RELAXABLE)
+     "bgei $rn,$uimm4,$pcrel17a2"
+     (+ MAJ_14 rn uimm4 (f-sub4 8) pcrel17a2)
+     (if (ge rn (zext SI uimm4))
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci beq "branch equal" ()
+     "beq $rn,$rm,$pcrel17a2"
+     (+ MAJ_14 rn rm (f-sub4 1) pcrel17a2)
+     (if (eq rn rm)
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bne "branch not equal" ()
+     "bne $rn,$rm,$pcrel17a2"
+     (+ MAJ_14 rn rm (f-sub4 5) pcrel17a2)
+     (if (ne rn rm)
+	 (set-vliw-alignment-modified pc pcrel17a2))
+     ((mep (unit u-use-gpr (in usereg rn))
+           (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bsr12 "branch to subroutine (12 bit displacement)" (RELAXABLE)
+     "bsr $pcrel12a2"
+     (+ MAJ_11 pcrel12a2 (f-15 1))
+     (sequence ()
+	       (cg-profile pc pcrel12a2)
+	       (set-vliw-modified-pcrel-offset lp 2 4 8)
+	       (set-vliw-alignment-modified pc pcrel12a2))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bsr24 "branch to subroutine (24 bit displacement)" ()
+     "bsr $pcrel24a2"
+     (+ MAJ_13 (f-4 1) (f-sub4 9) pcrel24a2)
+     (sequence ()
+	       (cg-profile pc pcrel24a2)
+	       (set-vliw-modified-pcrel-offset lp 4 4 8)
+	       (set-vliw-alignment-modified pc pcrel24a2))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci jmp "jump" ()
+     "jmp $rm"
+     (+ MAJ_1 (f-rn 0) rm (f-sub4 14))
+     (sequence ()
+	       (if (eq (get-psw.om) 0)
+		   ;; core mode
+		   (if (get-rm.lsb)
+		       (sequence ()
+				 (set-psw.om 1) ;; enter VLIW mode
+				 (set-vliw-aliignment-modified-by-option pc rm))
+		       (set pc (and rm (inv 1))))
+		   ;; VLIW mode
+		   (if (get-rm.lsb)
+		       (sequence ()
+				 (set-psw.om 0) ;; enter core mode
+				 (set pc (and rm (inv 1))))
+		       (set-vliw-aliignment-modified-by-option pc rm)))
+	       (cg-profile-jump pc rm))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci jmp24 "jump (24 bit target)" ()
+     "jmp $pcabs24a2"
+     (+ MAJ_13 (f-4 1) (f-sub4 8) pcabs24a2)
+     (sequence ()
+	       (set-vliw-alignment-modified pc (or (and pc #xf0000000) pcabs24a2))
+	       (cg-profile-jump pc pcabs24a2))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci jsr "jump to subroutine" ()
+     "jsr $rm"
+     (+ MAJ_1 (f-rn 0) rm (f-sub4 15))
+     (sequence ()
+	       (cg-profile pc rm)
+	       (set-vliw-modified-pcrel-offset lp 2 4 8)
+	       (set-vliw-alignment-modified pc rm))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci ret "return from subroutine" ((STALL RET))
+     "ret"
+     (+ MAJ_7 (f-rn 0) (f-rm 0) (f-sub4 2))
+     (sequence ()
+	       (if (eq (get-psw.om) 0)
+		   ;; core mode
+		   (if (get-lp.ltom) ;; link-pointer "toggle mode" bit
+		       (sequence ()
+				 (set-psw.om 1) ;; enter VLIW mode
+				 (set-vliw-aliignment-modified-by-option pc lp))
+		       (set pc (and lp (inv 1))))
+		   ;; VLIW mode
+		   (if (get-lp.ltom) ;; link-pointer "toggle mode" bit
+		       (sequence ()
+				 (set-psw.om 0) ;; enter VLIW mode
+				 (set pc (and lp (inv 1))))
+		       (set-vliw-aliignment-modified-by-option pc lp)))
+	       (c-call VOID "notify_ret" pc))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+
+; Repeat instructions.
+
+(dnci repeat "repeat specified repeat block" ()
+     "repeat $rn,$pcrel17a2"
+     (+ MAJ_14 rn (f-rm 0) (f-sub4 9) pcrel17a2)
+     (sequence ()
+	       (set-vliw-modified-pcrel-offset (reg h-csr 4) 4 4 8)
+	       (set-vliw-alignment-modified (reg h-csr 5) pcrel17a2)
+	       (set (reg h-csr 6) rn))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci erepeat "endless repeat" ()
+     "erepeat $pcrel17a2"
+     (+ MAJ_14 (f-rn 0) (f-rm 1) (f-sub4 9) pcrel17a2)
+     (sequence ()
+	       (set-vliw-modified-pcrel-offset (reg h-csr 4) 4 4 8)
+	       (set-vliw-alignment-modified (reg h-csr 5) pcrel17a2)
+	       (set-rpe.elr 1)
+	       ; rpc may be undefined for erepeat
+	       ; use 1 to trigger repeat logic in the sim's main loop
+	       (set (reg h-csr 6) 1))
+     ())
+
+
+; Control instructions.
+
+;; special store variants
+
+(dnci stc_lp "store to control register lp" ((STALL STC))
+      "stc $rn,\\$lp" 
+      (+ MAJ_7 rn (f-csrn-lo 1) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 0))  
+      (set lp rn)     
+      ((mep (unit u-use-gpr (in usereg rn))
+	    (unit u-store-ctrl-reg (out storereg lp))
+	    (unit u-exec))))
+
+(dnci stc_hi "store to control register hi" ((STALL STC))
+      "stc $rn,\\$hi" 
+      (+ MAJ_7 rn (f-csrn-lo 7) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 0))  
+      (set hi rn)     
+      ((mep (unit u-use-gpr (in usereg rn))
+	    (unit u-store-ctrl-reg (out storereg hi))
+	    (unit u-exec))))
+
+(dnci stc_lo "store to control register lo" ((STALL STC))
+      "stc $rn,\\$lo" 
+      (+ MAJ_7 rn (f-csrn-lo 8) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 0))  
+      (set lo rn)    
+      ((mep (unit u-use-gpr (in usereg rn))
+	    (unit u-store-ctrl-reg (out storereg lo))
+	    (unit u-exec))))
+
+;; general store
+
+(dnci stc "store to control register" (VOLATILE (STALL STC))
+     "stc $rn,$csrn"
+     (+ MAJ_7 rn csrn (f-12 1) (f-13 0) (f-14 0))
+     (set csrn rn)
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-store-ctrl-reg (out storereg csrn))
+	   (unit u-exec))))
+
+;; special load variants 
+
+(dnci ldc_lp "load from control register lp" ((STALL LDC))
+      "ldc $rn,\\$lp"    
+      (+ MAJ_7 rn (f-csrn-lo 1) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 1))     
+      (set rn lp)     
+      ((mep (unit u-use-ctrl-reg (in usereg lp))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+       
+
+(dnci ldc_hi "load from control register hi" ((STALL LDC))
+      "ldc $rn,\\$hi"    
+      (+ MAJ_7 rn (f-csrn-lo 7) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 1))
+      (set rn hi)
+      ((mep (unit u-use-ctrl-reg (in usereg hi))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+
+(dnci ldc_lo "load from control register lo" ((STALL LDC))
+      "ldc $rn,\\$lo"
+      (+ MAJ_7 rn (f-csrn-lo 8) (f-csrn-hi 0) (f-12 1) (f-13 0) (f-14 1))     
+      (set rn lo)
+      ((mep (unit u-use-ctrl-reg (in usereg lo))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+
+;; general load
+
+(dnci ldc "load from control register" (VOLATILE (STALL LDC) (LATENCY 2))
+     "ldc $rn,$csrn"
+     (+ MAJ_7 rn csrn (f-12 1) (f-13 0) (f-14 1))
+     (if (eq (ifield f-csrn) 0) 
+	 ;; loading from the pc
+	 (set-vliw-modified-pcrel-offset rn 2 4 8)
+	 ;; loading from something else
+	 (set rn csrn))
+      ((mep (unit u-use-ctrl-reg (in usereg csrn))
+	    (unit u-exec)
+	    (unit u-load-gpr (out loadreg rn)))))
+
+(dnci di "disable interrupt" (VOLATILE)
+     "di"
+     (+ MAJ_7 (f-rn 0) (f-rm 0) (f-sub4 0))
+     ; clear psw.iec
+     (set psw (sll (srl psw 1) 1)) 
+     ())
+
+(dnci ei "enable interrupt" (VOLATILE)
+     "ei"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 0))
+     ; set psw.iec
+     (set psw (or psw 1))
+     ())
+
+(dnci reti "return from interrupt" ((STALL RET))
+     "reti"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 2))
+     (if (eq (get-psw.om) 0)
+	 ;; core operation mode
+	 (if (get-psw.nmi)
+	     ;; return from NMI
+	     (if (get-npc.ntom)
+		 ;; return in VLIW operation mode
+		 (sequence ()
+			   (set-psw.om 1)
+			   (set-vliw-aliignment-modified-by-option pc npc)
+			   (set-psw.nmi 0))
+		 ;; return in core mode
+		 (sequence ()
+			   (set pc (and npc (inv 1)))
+			   (set-psw.nmi 0)))
+	     ;; return from non-NMI
+	     (if (get-epc.etom)
+		 ;; return in VLIW mode
+		 (sequence () 
+			   (set-psw.om 1)
+			   (set-vliw-aliignment-modified-by-option pc epc)
+			   (set-psw.umc (get-psw.ump))
+			   (set-psw.iec (get-psw.iep)))
+		 ;; return in core mode
+		 (sequence ()
+			   (set pc (and epc (inv 1)))
+			   (set-psw.umc (get-psw.ump))
+			   (set-psw.iec (get-psw.iep)))))
+	 ;; VLIW operation mode
+	 ;; xxx undefined
+	 (nop))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci halt "halt pipeline" (VOLATILE)
+     "halt"
+     (+ MAJ_7 (f-rn 0) (f-rm 2) (f-sub4 2))
+     ; set psw.halt
+     (set (raw-reg h-csr 16) (or psw (sll 1 11)))
+     ())
+
+(dnci sleep "sleep pipeline" (VOLATILE)
+     "sleep"
+     (+ MAJ_7 (f-rn 0) (f-rm 6) (f-sub4 2))
+     (c-call VOID "do_sleep")
+     ())
+
+(dnci swi "software interrupt" (MAY_TRAP VOLATILE)
+     "swi $uimm2"
+     (+ MAJ_7 (f-rn 0) (f-8 0) (f-9 0) uimm2 (f-sub4 6))
+     (cond
+      ((eq uimm2 0) (set exc (or exc (sll 1 4))))
+      ((eq uimm2 1) (set exc (or exc (sll 1 5))))
+      ((eq uimm2 2) (set exc (or exc (sll 1 6))))
+      ((eq uimm2 3) (set exc (or exc (sll 1 7)))))
+     ())
+
+(dnci break "break exception" (MAY_TRAP VOLATILE)
+     "break"
+     (+ MAJ_7 (f-rn 0) (f-rm 3) (f-sub4 2))
+     (set pc (c-call USI "break_exception" pc))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci syncm "synchronise with memory" (VOLATILE)
+     "syncm"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 1))
+     (unimp "syncm")
+     ())
+
+(dnci stcb "store in control bus space" (VOLATILE (STALL STCB))
+     "stcb $rn,$uimm16"
+     (+ MAJ_15 rn (f-rm 0) (f-sub4 4) uimm16)
+     (c-call VOID "do_stcb" rn uimm16)
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec)
+	   (unit u-stcb))))
+
+(dnci ldcb "load from control bus space" (VOLATILE (STALL LDCB) (LATENCY 3))
+     "ldcb $rn,$uimm16"
+     (+ MAJ_15 rn (f-rm 1) (f-sub4 4) uimm16)
+     (set rn (c-call SI "do_ldcb" uimm16))
+      ((mep (unit u-ldcb)
+	    (unit u-exec)
+	    (unit u-ldcb-gpr (out loadreg rn)))))
+
+
+; Bit manipulation instructions.
+; The following instructions become the reserved instruction when the
+; bit manipulation option is off.
+
+(dnci bsetm "set bit in memory" (OPTIONAL_BIT_INSN)
+     "bsetm ($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 0))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set (mem UQI rma) (or (mem UQI rma) (sll 1 uimm3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci bclrm "clear bit in memory" (OPTIONAL_BIT_INSN)
+     "bclrm ($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 1))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set (mem UQI rma) (and (mem UQI rma) (inv (sll 1 uimm3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci bnotm "toggle bit in memory" (OPTIONAL_BIT_INSN)
+     "bnotm ($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 2))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set (mem UQI rma) (xor (mem UQI rma) (sll 1 uimm3))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci btstm "test bit in memory" (OPTIONAL_BIT_INSN)
+     "btstm \\$0,($rma),$uimm3"
+     (+ MAJ_2 (f-4 0) uimm3 rma (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_bit" pc)
+	       (set r0 (zext SI (and UQI (mem UQI rma) (sll 1 uimm3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci tas "test and set" (OPTIONAL_BIT_INSN)
+     "tas $rn,($rma)"
+     (+ MAJ_2 rn rma (f-sub4 4))
+     (sequence ((SI result))
+	       (c-call "check_option_bit" pc)
+	       (set result (zext SI (mem UQI rma)))
+	       (set (mem UQI rma) 1)
+	       (set rn result))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+; Data cache instruction.
+
+(dnci cache "cache operations" (VOLATILE)
+     "cache $cimm4,($rma)"
+     (+ MAJ_7 cimm4 rma (f-sub4 4))
+     (c-call VOID "do_cache" cimm4 rma pc)
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+; Multiply instructions.
+; These instructions become the RI when the 32-bit multiply
+; instruction option is off.
+
+(dnci mul "multiply" (OPTIONAL_MUL_INSN (STALL MUL))
+     "mul $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 4))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (ext DI rn) (ext DI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+(dnci mulu "multiply unsigned" (OPTIONAL_MUL_INSN (STALL MUL))
+     "mulu $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 5))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (zext UDI rn) (zext UDI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+(dnci mulr "multiply, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "mulr $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 6))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (ext DI rn) (ext DI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+(dnci mulru "multiply unsigned, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "mulru $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 7))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (mul (zext UDI rn) (zext UDI rm)))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+(dnci madd "multiply accumulate" (OPTIONAL_MUL_INSN (STALL MUL))
+     "madd $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3004))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (ext DI rn) (ext DI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+(dnci maddu "multiply accumulate unsigned" (OPTIONAL_MUL_INSN (STALL MUL))
+     "maddu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3005))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (zext UDI rn) (zext UDI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply))))
+
+
+(dnci maddr "multiply accumulate, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "maddr $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3006))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (ext DI rn) (ext DI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+(dnci maddru "multiple accumulate unsigned, lo -> reg" (OPTIONAL_MUL_INSN (STALL MULR) (LATENCY 3))
+     "maddru $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 #x3007))
+     (sequence ((DI result))
+	       (c-call "check_option_mul" pc)
+	       (set result (or (sll (zext DI hi) 32) (zext DI lo)))
+	       (set result (add result (mul (zext UDI rn) (zext UDI rm))))
+	       (set hi (subword SI result 0))
+	       (set lo (subword SI result 1))
+	       (set rn (subword SI result 1)))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-multiply)
+	   (unit u-mul-gpr (out resultreg rn)))))
+
+
+; Divide instructions.
+; These instructions become the RI when the 32-bit divide instruction
+; option is off.
+
+(dnci div "divide" (OPTIONAL_DIV_INSN (STALL DIV) (LATENCY 34) MAY_TRAP)
+     "div $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 8))
+     (sequence ()
+	       (c-call "check_option_div" pc)
+	       (if (eq rm 0)
+		   (set pc (c-call USI "zdiv_exception" pc))
+		   ; Special case described on p. 76.
+		   (if (and (eq rn #x80000000)
+			    (eq rm #xffffffff))
+		       (sequence ()
+				 (set lo #x80000000)
+				 (set hi 0))
+		       (sequence ()
+				 (set lo (div rn rm))
+				 (set hi (mod rn rm))))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-divide)
+           (unit u-branch))))
+
+(dnci divu "divide unsigned" (OPTIONAL_DIV_INSN (STALL DIV) (LATENCY 34) MAY_TRAP)
+     "divu $rn,$rm"
+     (+ MAJ_1 rn rm (f-sub4 9))
+     (sequence ()
+	       (c-call "check_option_div" pc)
+	       (if (eq rm 0)
+		   (set pc (c-call USI "zdiv_exception" pc))
+		   (sequence ()
+			     (set lo (udiv rn rm))
+			     (set hi (umod rn rm)))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-divide)
+           (unit u-branch))))
+
+
+; Debug functions.
+; These instructions become the RI when the debug function option is
+; off.
+
+(dnci dret "return from debug exception" (OPTIONAL_DEBUG_INSN)
+     "dret"
+     (+ MAJ_7 (f-rn 0) (f-rm 1) (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_debug" pc)
+	       ; set DBG.DM.
+	       (set dbg (and dbg (inv (sll SI 1 15))))
+	       (set pc depc))
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+(dnci dbreak "generate debug exception" (OPTIONAL_DEBUG_INSN MAY_TRAP VOLATILE)
+     "dbreak"
+     (+ MAJ_7 (f-rn 0) (f-rm 3) (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_debug" pc)
+	       ; set DBG.DPB.
+	       (set dbg (or dbg 1)))
+     ())
+
+
+; Leading zero instruction.
+
+(dnci ldz "leading zeroes" (OPTIONAL_LDZ_INSN (STALL INT2))
+     "ldz $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 0))
+     (sequence ()
+	       (c-call "check_option_ldz" pc)
+	       (set rn (c-call SI "do_ldz" rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec))))
+
+
+; Absolute difference instruction.
+
+(dnci abs "absolute difference" (OPTIONAL_ABS_INSN (STALL INT2))
+     "abs $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 3))
+     (sequence ()
+	       (c-call "check_option_abs" pc)
+	       (set rn (abs (sub rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Average instruction.
+
+(dnci ave "average" (OPTIONAL_AVE_INSN (STALL INT2))
+     "ave $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 2))
+     (sequence ()
+	       (c-call "check_option_ave" pc)
+	       (set rn (sra (add (add rn rm) 1) 1)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; MIN/MAX instructions.
+
+(dnci min "minimum" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "min $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 4))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (gt rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci max "maximum" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "max $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 5))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (lt rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci minu "minimum unsigned" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "minu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 6))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (gtu rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci maxu "maximum unsigned" (OPTIONAL_MINMAX_INSN (STALL INT2))
+     "maxu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 7))
+     (sequence ()
+	       (c-call "check_option_minmax" pc)
+	       (if (ltu rn rm)
+		   (set rn rm)))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Clipping instruction.
+
+(dnci clip "clip" (OPTIONAL_CLIP_INSN (STALL INT2))
+     "clip $rn,$cimm5"
+     (+ MAJ_15 rn (f-rm 0) (f-sub4 1) (f-ext #x10) cimm5 (f-29 0) (f-30 0) (f-31 0))
+     (sequence ((SI min) (SI max))
+	       (c-call "check_option_clip" pc)
+	       (set max (sub (sll 1 (sub cimm5 1)) 1))
+	       (set min (neg (sll 1 (sub cimm5 1))))
+	       (cond
+		((eq cimm5 0) (set rn 0))
+		((gt rn max) (set rn max))
+		((lt rn min) (set rn min))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci clipu "clip unsigned" (OPTIONAL_CLIP_INSN (STALL INT2))
+     "clipu $rn,$cimm5"
+     (+ MAJ_15 rn (f-rm 0) (f-sub4 1) (f-ext #x10) cimm5 (f-29 0) (f-30 0) (f-31 1))
+     (sequence ((SI max))
+	       (c-call "check_option_clip" pc)
+	       (set max (sub (sll 1 cimm5) 1))
+	       (cond
+		((eq cimm5 0) (set rn 0))
+		((gt rn max) (set rn max))
+		((lt rn 0) (set rn 0))))
+     ((mep (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; Saturation instructions.
+
+(dnci sadd "saturating addition" (OPTIONAL_SAT_INSN (STALL INT2))
+     "sadd $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 8))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (add-oflag rn rm 0)
+		   (if (nflag rn)
+		       ; underflow
+		       (set rn (neg (sll 1 31)))
+		       ; overflow
+		       (set rn (sub (sll 1 31) 1)))
+		   (set rn (add rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci ssub "saturating subtraction" (OPTIONAL_SAT_INSN (STALL INT2))
+     "ssub $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 10))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (sub-oflag rn rm 0)
+		   (if (nflag rn)
+		       ; underflow
+		       (set rn (neg (sll 1 31)))
+		       ; overflow
+		       (set rn (sub (sll 1 31) 1)))
+		   (set rn (sub rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci saddu "saturating unsigned addition" (OPTIONAL_SAT_INSN (STALL INT2))
+     "saddu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 9))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (add-cflag rn rm 0)
+		   (set rn (inv 0))
+		   (set rn (add rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+(dnci ssubu "saturating unsigned subtraction" (OPTIONAL_SAT_INSN (STALL INT2))
+     "ssubu $rn,$rm"
+     (+ MAJ_15 rn rm (f-sub4 1) (f-16u16 11))
+     (sequence ()
+	       (c-call "check_option_sat" pc)
+	       (if (sub-cflag rn rm 0)
+		   (set rn 0)
+		   (set rn (sub rn rm))))
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-use-gpr (in usereg rn))
+	   (unit u-exec))))
+
+
+; UCI and DSP options are defined in an external file.
+; See `mep-sample-ucidsp.cpu' for a sample.
+
+
+; Coprocessor instructions.
+
+(dnci swcp "store word coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcp $crn,($rma)"
+     (+ MAJ_3 crn rma (f-sub4 8))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcp "load word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcp $crn,($rma)"
+     (+ MAJ_3 crn rma (f-sub4 9))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcp "smcp" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcp $crn64,($rma)"
+     (+ MAJ_3 crn64 rma (f-sub4 10))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcp" rma crn64 pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcp "lmcp" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcp $crn64,($rma)"
+     (+ MAJ_3 crn64 rma (f-sub4 11))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp" rma pc)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpi "swcp (post-increment)" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcpi $crn,($rma+)"
+     (+ MAJ_3 crn rma (f-sub4 0))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (add rma 4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpi "lwcp (post-increment)" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcpi $crn,($rma+)"
+     (+ MAJ_3 crn rma (f-sub4 1))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3))))
+	       (set rma (add rma 4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpi "smcp (post-increment)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcpi $crn64,($rma+)"
+     (+ MAJ_3 crn64 rma (f-sub4 2))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcpi" (index-of rma) crn64 pc)
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpi "lmcp (post-increment)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcpi $crn64,($rma+)"
+     (+ MAJ_3 crn64 rma (f-sub4 3))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcpi" (index-of rma) pc))
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcp16 "swcp (16-bit displacement)" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcp $crn,$sdisp16($rma)"
+     (+ MAJ_15 crn rma (f-sub4 12) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set (mem SI (and (add rma sdisp16) (inv SI 3))) crn))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcp16 "lwcp (16-bit displacement)" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcp $crn,$sdisp16($rma)"
+     (+ MAJ_15 crn rma (f-sub4 13) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and (add rma sdisp16) (inv SI 3)))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcp16 "smcp (16-bit displacement)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcp $crn64,$sdisp16($rma)"
+     (+ MAJ_15 crn64 rma (f-sub4 14) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call "do_smcp16" rma sdisp16 crn64 pc))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcp16 "lmcp (16-bit displacement)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcp $crn64,$sdisp16($rma)"
+     (+ MAJ_15 crn64 rma (f-sub4 15) sdisp16)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp16" rma sdisp16 pc)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sbcpa "store byte coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "sbcpa $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 0) (f-ext62 0) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem QI rma) (and crn #xff))
+	       (set rma (add rma (ext SI cdisp10))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcpa "load byte coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lbcpa $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x4) (f-ext62 #x0) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI rma)))
+	       (set rma (add rma (ext SI cdisp10))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci shcpa "store half-word coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "shcpa $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x1) (f-ext62 #x0) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 1)))
+	       (set (mem HI (and rma (inv SI 1))) (and crn #xffff))
+	       (set rma (add rma (ext SI cdisp10a2))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcpa "load half-word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lhcpa $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x5) (f-ext62 #x0) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (add rma (ext SI cdisp10a2))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpa "store word coprocessor" (OPTIONAL_CP_INSN (STALL STORE))
+     "swcpa $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x2) (f-ext62 #x0) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (add rma (ext SI cdisp10a4))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpa "load word coprocessor" (OPTIONAL_CP_INSN (STALL LOAD))
+     "lwcpa $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x6) (f-ext62 #x0) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3))))
+	       (set rma (add rma (ext SI cdisp10a4))))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpa "smcpa" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL STORE))
+     "smcpa $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x3) (f-ext62 #x0) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcpa" (index-of rma) cdisp10a8 crn64 pc)
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpa "lmcpa" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN (STALL LOAD))
+     "lmcpa $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x7) (f-ext62 #x0) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcpa" (index-of rma) cdisp10a8 pc))
+	       (set rma rma)) ; reference as output for intrinsic generation
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+
+(dnci sbcpm0 "sbcpm0" (OPTIONAL_CP_INSN)
+     "sbcpm0 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x0) (f-ext62 #x2) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem QI rma) (and crn #xff))
+	       (set rma (mod0 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcpm0 "lbcpm0" (OPTIONAL_CP_INSN)
+     "lbcpm0 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x4) (f-ext62 #x2) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI rma)))
+	       (set rma (mod0 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci shcpm0 "shcpm0" (OPTIONAL_CP_INSN)
+     "shcpm0 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x1) (f-ext62 #x2) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 1)))
+	       (set (mem HI (and rma (inv SI 1))) (and crn #xffff))
+	       (set rma (mod0 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcpm0 "lhcpm0" (OPTIONAL_CP_INSN)
+     "lhcpm0 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x5) (f-ext62 #x2) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod0 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpm0 "swcpm0" (OPTIONAL_CP_INSN)
+     "swcpm0 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x2) (f-ext62 #x2) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (mod0 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpm0 "lwcpm0" (OPTIONAL_CP_INSN)
+     "lwcpm0 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x6) (f-ext62 #x2) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (mem SI (and rma (inv SI 3))))
+	       (set rma (mod0 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpm0 "smcpm0" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "smcpm0 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x3) (f-ext62 #x2) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (c-call "do_smcp" rma crn64 pc)
+	       (set rma (mod0 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpm0 "lmcpm0" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "lmcpm0 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x7) (f-ext62 #x2) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp" rma pc))
+	       (set rma (mod0 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci sbcpm1 "sbcpm1" (OPTIONAL_CP_INSN)
+     "sbcpm1 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x0) (f-ext62 #x3) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set (mem QI rma) (and crn #xff))
+	       (set rma (mod1 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lbcpm1 "lbcpm1" (OPTIONAL_CP_INSN)
+     "lbcpm1 $crn,($rma+),$cdisp10"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x4) (f-ext62 #x3) cdisp10)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem QI rma)))
+	       (set rma (mod1 cdisp10)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci shcpm1 "shcpm1" (OPTIONAL_CP_INSN)
+     "shcpm1 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x1) (f-ext62 #x3) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 1)))
+	       (set (mem HI (and rma (inv SI 1))) (and crn #xffff))
+	       (set rma (mod1 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lhcpm1 "lhcpm1" (OPTIONAL_CP_INSN)
+     "lhcpm1 $crn,($rma+),$cdisp10a2"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x5) (f-ext62 #x3) cdisp10a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem HI (and rma (inv SI 1)))))
+	       (set rma (mod1 cdisp10a2)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci swcpm1 "swcpm1" (OPTIONAL_CP_INSN)
+     "swcpm1 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x2) (f-ext62 #x3) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call VOID "check_write_to_text" (and rma (inv SI 3)))
+	       (set (mem SI (and rma (inv SI 3))) crn)
+	       (set rma (mod1 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lwcpm1 "lwcpm1" (OPTIONAL_CP_INSN)
+     "lwcpm1 $crn,($rma+),$cdisp10a4"
+     (+ MAJ_15 crn rma (f-sub4 5) (f-ext4 #x6) (f-ext62 #x3) cdisp10a4)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (set crn (ext SI (mem SI (and rma (inv SI 3)))))
+	       (set rma (mod1 cdisp10a4)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci smcpm1 "smcpm1" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "smcpm1 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x3) (f-ext62 #x3) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (c-call "do_smcp" rma crn64 pc)
+	       (c-call VOID "check_write_to_text" rma)
+	       (set rma (mod1 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnci lmcpm1 "lmcpm1" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN)
+     "lmcpm1 $crn64,($rma+),$cdisp10a8"
+     (+ MAJ_15 crn64 rma (f-sub4 5) (f-ext4 #x7) (f-ext62 #x3) cdisp10a8)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (c-call "check_option_cp64" pc)
+	       (set crn64 (c-call DI "do_lmcp" rma pc))
+	       (set rma (mod1 cdisp10a8)))
+     ((mep (unit u-use-gpr (in usereg rma))
+	   (unit u-exec))))
+
+(dnop cp_flag       "branch condition register"  (all-mep-isas) h-ccr   1)
+
+(dnci bcpeq "branch coprocessor equal" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpeq $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 4) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (eq (xor cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci bcpne "branch coprocessor not equal" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpne $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 5) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (ne (xor cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci bcpat "branch coprocessor and true" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpat $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 6) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (ne (and cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci bcpaf "branch coprocessor and false" (OPTIONAL_CP_INSN RELAXABLE)
+     "bcpaf $cccc,$pcrel17a2"
+     (+ MAJ_13 (f-rn 8) cccc (f-sub4 7) pcrel17a2)
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (if (eq (and cccc cp_flag) 0)
+	       (set-vliw-alignment-modified pc pcrel17a2)))
+     ())
+
+(dnci synccp "synchronise with coprocessor" (OPTIONAL_CP_INSN)
+     "synccp"
+     (+ MAJ_7 (f-rn 0) (f-rm 2) (f-sub4 1))
+     (sequence ()
+	       (c-call "check_option_cp" pc)
+	       (unimp "synccp"))
+     ())
+
+(dnci jsrv "jump to vliw subroutine " (OPTIONAL_CP_INSN)
+     "jsrv $rm"
+     (+ MAJ_1 (f-rn 8) rm (f-sub4 15))
+     (sequence ()
+	       (cg-profile pc rm)
+	       (c-call "check_option_cp" pc)
+	       (core-vliw-switch
+
+		;; in core operating mode
+		(sequence ()
+			  (set lp (or (add pc 2) 1))
+			  (set-vliw-aliignment-modified-by-option pc rm)
+			  (set-psw.om 1)) ;; to VLIW operation mode
+
+		;; in VLIW32 operating mode
+		(sequence ()
+			  (set lp (or (add pc 4) 1))
+			  (set pc (and rm (inv 1)))
+			  (set-psw.om 0)) ;; to core operation mode
+
+		;; in VLIW64 operating mode
+		(sequence ()
+			  (set lp (or (add pc 8) 1))
+			  (set pc (and rm (inv 1)))
+			  (set-psw.om 0)))) ;; to core operation mode
+     ((mep (unit u-use-gpr (in usereg rm))
+	   (unit u-exec)
+	   (unit u-branch))))
+
+(dnci bsrv "branch to vliw subroutine" (OPTIONAL_CP_INSN)
+     "bsrv $pcrel24a2"
+     (+ MAJ_13 (f-4 1) (f-sub4 11) pcrel24a2)
+     (sequence ()
+	       (cg-profile pc pcrel24a2)
+	       (c-call "check_option_cp" pc)
+	       (core-vliw-switch
+
+		;; in core operating mode
+		(sequence ()
+			  (set lp (or (add pc 4) 1))
+			  (set-vliw-aliignment-modified-by-option pc pcrel24a2)
+			  (set-psw.om 1)) ;; to VLIW operation mode
+
+		;; in VLIW32 operating mode
+		(sequence ()
+			  (set lp (or (add pc 4) 1))
+			  (set pc (and pcrel24a2 (inv 1)))
+			  (set-psw.om 0)) ;; to core operation mode
+
+		;; in VLIW64 operating mode
+		(sequence ()
+			  (set lp (or (add pc 8) 1))
+			  (set pc (and pcrel24a2 (inv 1)))
+			  (set-psw.om 0)))) ;; to core operation mode
+     ((mep (unit u-exec)
+	   (unit u-branch))))
+
+
+; An instruction for test instrumentation.
+; Using a reserved opcode.
+
+(dnci sim-syscall "simulator system call" ()
+     "--syscall--"
+     (+ MAJ_7 (f-4 1) callnum (f-8 0) (f-9 0) (f-10 0) (f-sub4 0))
+     (c-call "do_syscall" pc callnum)
+     ())
+
+(define-pmacro (dnri n major minor)
+  (dnci (.sym ri- n) "reserved instruction" ()
+	"--reserved--"
+	(+ major rn rm (f-sub4 minor))
+	(set pc (c-call USI "ri_exception" pc))
+	((mep (unit u-exec)
+	      (unit u-branch)))))
+
+(dnri 0  MAJ_0   6)
+(dnri 1  MAJ_1  10)
+(dnri 2  MAJ_1  11)
+(dnri 3  MAJ_2   5)
+(dnri 4  MAJ_2   8)
+(dnri 5  MAJ_2   9)
+(dnri 6  MAJ_2  10)
+(dnri 7  MAJ_2  11)
+(dnri 8  MAJ_3   4)
+(dnri 9  MAJ_3   5)
+(dnri 10 MAJ_3   6)
+(dnri 11 MAJ_3   7)
+(dnri 12 MAJ_3  12)
+(dnri 13 MAJ_3  13)
+(dnri 14 MAJ_3  14)
+(dnri 15 MAJ_3  15)
+(dnri 17 MAJ_7   7)
+(dnri 20 MAJ_7  14)
+(dnri 21 MAJ_7  15)
+(dnri 22 MAJ_12  7)
+(dnri 23 MAJ_14 13)
+;(dnri 24 MAJ_15  3)
+(dnri 26 MAJ_15  8)
+; begin core-specific reserved insns
+; end core-specific reserved insns
+
+
+; Macro instructions.
+
+(dnmi nop "nop"
+      ()
+      "nop"
+      (emit mov (rn 0) (rm 0)))
+
+; Emit the 16 bit form of these 32 bit insns when the displacement is zero.
+;
+(dncmi sb16-0 "store byte (explicit 16 bit displacement of zero)" (NO-DIS)
+     "sb $rnc,$zero($rma)"
+     (emit sb rnc rma))
+
+(dncmi sh16-0 "store half (explicit 16 bit displacement of zero)" (NO-DIS)
+     "sh $rns,$zero($rma)"
+     (emit sh rns rma))
+
+(dncmi sw16-0 "store word (explicit 16 bit displacement of zero)" (NO-DIS)
+     "sw $rnl,$zero($rma)"
+     (emit sw rnl rma))
+
+(dncmi lb16-0 "load byte (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lb $rnc,$zero($rma)"
+     (emit lb rnc rma))
+
+(dncmi lh16-0 "load half (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lh $rns,$zero($rma)"
+     (emit lh rns rma))
+
+(dncmi lw16-0 "load word (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lw $rnl,$zero($rma)"
+     (emit lw rnl rma))
+
+(dncmi lbu16-0 "load unsigned byte (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lbu $rnuc,$zero($rma)"
+     (emit lbu rnuc rma))
+
+(dncmi lhu16-0 "load unsigned half (explicit 16 bit displacement of zero)" (NO-DIS)
+     "lhu $rnus,$zero($rma)"
+     (emit lhu rnus rma))
+
+(dncmi swcp16-0 "swcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN NO-DIS)
+     "swcp $crn,$zero($rma)"
+     (emit swcp crn rma))
+
+(dncmi lwcp16-0 "lwcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN NO-DIS)
+     "lwcp $crn,$zero($rma)"
+     (emit lwcp crn rma))
+
+(dncmi smcp16-0 "smcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN NO-DIS)
+     "smcp $crn64,$zero($rma)"
+     (emit smcp crn64 rma))
+
+(dncmi lmcp16-0 "lmcp (explicit 16-bit displacement of zero)" (OPTIONAL_CP_INSN OPTIONAL_CP64_INSN NO-DIS)
+     "lmcp $crn64,$zero($rma)"
+     (emit lmcp crn64 rma))
diff --git a/gcc/config/mep/mep-default.cpu b/gcc/config/mep/mep-default.cpu
new file mode 100644
index 000000000..54756f15d
--- /dev/null
+++ b/gcc/config/mep/mep-default.cpu
@@ -0,0 +1,26 @@
+; Toshiba MeP Media Engine architecture description.  -*- Scheme -*-
+; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+; Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; This file serves as a wrapper to bring in the core description plus
+; sample implementations of the UCI and DSP instructions.
+
+(include "mep-core.cpu")
+(include "mep-ext-cop.cpu")
diff --git a/gcc/config/mep/mep-ext-cop.cpu b/gcc/config/mep/mep-ext-cop.cpu
new file mode 100644
index 000000000..d770d8daf
--- /dev/null
+++ b/gcc/config/mep/mep-ext-cop.cpu
@@ -0,0 +1,24 @@
+; Toshiba MeP IVC2 Coprocessor description.  -*- scheme -*-
+; Copyright (C) 2003,2009
+; Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; begin-user-isa-includes
+(include "mep-ivc2.cpu")
+;; end-user-isa-includes
diff --git a/gcc/config/mep/mep-intrin.h b/gcc/config/mep/mep-intrin.h
new file mode 100644
index 000000000..d556459b2
--- /dev/null
+++ b/gcc/config/mep/mep-intrin.h
@@ -0,0 +1,8933 @@
+
+
+/* DO NOT EDIT: This file is automatically generated by CGEN.
+   Any changes you make will be discarded when it is next regenerated. */
+
+#ifdef WANT_GCC_DECLARATIONS
+#define FIRST_SHADOW_REGISTER 113
+#define LAST_SHADOW_REGISTER 120
+#define FIXED_SHADOW_REGISTERS \
+  1, 1, 1, 1, 1, 1, 1, 1
+#define CALL_USED_SHADOW_REGISTERS FIXED_SHADOW_REGISTERS
+#define SHADOW_REG_ALLOC_ORDER \
+  113, 114, 115, 116, 117, 118, 119, 120
+#define SHADOW_REGISTER_NAMES \
+  "$shadow81", "$shadow17", "$shadow40", "$shadow24", "$shadow23", "$shadow22", "$shadow21", "$shadow20"
+
+
+
+#ifndef __MEP__
+enum {
+  mep_fcmpleis = 591,
+  mep_fcmplis = 593,
+  mep_fcmpes = 595,
+  mep_fcmpules = 597,
+  mep_fcmpuls = 599,
+  mep_fcmpues = 601,
+  mep_fcmpus = 603,
+  mep_fcvtsw = 605,
+  mep_ftruncws = 607,
+  mep_fnegs = 609,
+  mep_fabss = 611,
+  mep_fsqrts = 613,
+  mep_fdivs = 615,
+  mep_fmuls = 617,
+  mep_fsubs = 619,
+  mep_fadds = 621,
+  mep_fmovs = 623,
+  mep_cextb = 624,
+  mep_cexth = 625,
+  mep_cextub = 626,
+  mep_cextuh = 627,
+  mep_xmula0 = 628,
+  mep_cmula0 = 629,
+  mep_cneg = 630,
+  mep_cmovh2 = 632,
+  mep_cmovh1 = 633,
+  mep_cmovc2 = 634,
+  mep_cmovc1 = 635,
+  mep_cmov2 = 636,
+  mep_cmov1 = 637,
+  mep_cmovi = 638,
+  mep_cpmov = 640,
+  mep_cmov = 641,
+  mep_csrai3 = 642,
+  mep_csrai = 644,
+  mep_csra3 = 646,
+  mep_csra = 648,
+  mep_csrli3 = 650,
+  mep_csrli = 652,
+  mep_csrl3 = 654,
+  mep_csrl = 656,
+  mep_cslli3 = 658,
+  mep_cslli = 660,
+  mep_csll3 = 662,
+  mep_csll = 664,
+  mep_cxori3 = 666,
+  mep_cxori = 668,
+  mep_cxor3 = 670,
+  mep_cxor = 672,
+  mep_cnori3 = 674,
+  mep_cnori = 676,
+  mep_cnor3 = 678,
+  mep_cnor = 680,
+  mep_cori3 = 682,
+  mep_cori = 684,
+  mep_cor3 = 686,
+  mep_cor = 688,
+  mep_candi3 = 690,
+  mep_candi = 692,
+  mep_cand3 = 694,
+  mep_cand = 696,
+  mep_csubi3 = 698,
+  mep_csubi = 700,
+  mep_csub3 = 702,
+  mep_csub = 704,
+  mep_caddi3 = 706,
+  mep_caddi = 708,
+  mep_cadd3 = 710,
+  mep_cadd = 712
+};
+#endif /* ! defined (__MEP__) */
+
+
+enum cgen_regnum_operand_type {
+  cgen_regnum_operand_type_POINTER,         /* long *          */
+  cgen_regnum_operand_type_LABEL,           /* void *          */
+  cgen_regnum_operand_type_LONG,            /* long            */
+  cgen_regnum_operand_type_ULONG,           /* unsigned long   */
+  cgen_regnum_operand_type_SHORT,           /* short           */
+  cgen_regnum_operand_type_USHORT,          /* unsigned short  */
+  cgen_regnum_operand_type_CHAR,            /* char            */
+  cgen_regnum_operand_type_UCHAR,           /* unsigned char   */
+  cgen_regnum_operand_type_SI,           /* __cop long      */
+  cgen_regnum_operand_type_DI,           /* __cop long long */
+  cgen_regnum_operand_type_CP_DATA_BUS_INT, /* cp_data_bus_int */
+  cgen_regnum_operand_type_VECTOR,		/* opaque vector type */
+  cgen_regnum_operand_type_V8QI,		/* V8QI vector type */
+  cgen_regnum_operand_type_V4HI,		/* V4HI vector type */
+  cgen_regnum_operand_type_V2SI,		/* V2SI vector type */
+  cgen_regnum_operand_type_V8UQI,		/* V8UQI vector type */
+  cgen_regnum_operand_type_V4UHI,		/* V4UHI vector type */
+  cgen_regnum_operand_type_V2USI,		/* V2USI vector type */
+  cgen_regnum_operand_type_DEFAULT = cgen_regnum_operand_type_LONG
+};
+
+struct cgen_regnum_operand {
+  /* The number of addressable registers, 0 for non-regnum operands.  */
+  unsigned char count;
+
+  /* The first register.  */
+  unsigned char base;
+
+  /* The type of the operand.  */
+  enum cgen_regnum_operand_type type;
+
+  /* Is it passed by reference?  */
+  int reference_p;
+};
+
+struct cgen_insn {
+  /* An index into cgen_intrinsics[].  */
+  unsigned int intrinsic;
+
+  /* A bitmask of the ISAs which include this instruction.  */
+  unsigned int isas;
+
+  /* A bitmask of the target-specific groups to which this instruction
+     belongs.  */
+  unsigned int groups;
+
+  /* The insn_code for this instruction.  */
+  int icode;
+
+  /* The number of arguments to the intrinsic function.  */
+  unsigned int num_args;
+
+  /* If true, the first argument is the return value.  */
+  unsigned int cret_p;
+
+  /* Maps operand numbers to argument numbers.  */
+  unsigned int op_mapping[10];
+
+  /* Array of regnum properties, indexed by argument number.  */
+  struct cgen_regnum_operand regnums[10];
+
+  /* The length of the instruction, in bytes.  */
+  int length;
+};
+
+extern const struct cgen_insn cgen_insns[];
+extern const char *const cgen_intrinsics[];
+
+/* Is the instruction described by cgen_insns[INDEX] enabled?  */
+#define CGEN_ENABLE_INSN_P(INDEX) \
+  ((CGEN_CURRENT_ISAS & cgen_insns[INDEX].isas) != 0 \
+   && (CGEN_CURRENT_GROUP & cgen_insns[INDEX].groups) != 0)
+
+#define ISA_EXT1 1
+#define ISA_MEP 2
+
+#define GROUP_KNOWN_CODE 1
+#define GROUP_NORMAL 2
+#define GROUP_VLIW 4
+
+#endif
+#ifdef WANT_GCC_DEFINITIONS
+struct cgen_immediate_predicate {
+  insn_operand_predicate_fn predicate;
+  int lower, upper, align;
+};
+
+const struct cgen_immediate_predicate cgen_immediate_predicates[] = {
+  { cgen_h_sint_12a1_immediate, -2048, 2048, 1 },
+  { cgen_h_uint_20a1_immediate, 0, 1048576, 1 },
+  { cgen_h_uint_7a1_immediate, 0, 128, 1 },
+  { cgen_h_uint_6a2_immediate, 0, 128, 2 },
+  { cgen_h_uint_22a4_immediate, 0, 33554432, 4 },
+  { cgen_h_sint_2a1_immediate, -2, 2, 1 },
+  { cgen_h_uint_24a1_immediate, 0, 16777216, 1 },
+  { cgen_h_sint_6a1_immediate, -32, 32, 1 },
+  { cgen_h_uint_5a4_immediate, 0, 256, 4 },
+  { cgen_h_uint_2a1_immediate, 0, 4, 1 },
+  { cgen_h_sint_10a1_immediate, -512, 512, 1 },
+  { cgen_h_uint_4a1_immediate, 0, 16, 1 },
+  { cgen_h_uint_6a1_immediate, 0, 64, 1 },
+  { cgen_h_uint_16a1_immediate, 0, 65536, 1 },
+  { cgen_h_uint_8a1_immediate, 0, 256, 1 },
+  { cgen_h_sint_16a1_immediate, -32768, 32768, 1 },
+  { cgen_h_uint_5a1_immediate, 0, 32, 1 },
+  { cgen_h_sint_8a1_immediate, -128, 128, 1 },
+  { cgen_h_uint_3a1_immediate, 0, 8, 1 }
+};
+
+const char *const cgen_intrinsics[] = {
+  "mep_cpfmadila1_h",
+  "mep_cpfmadiua1_h",
+  "mep_cpfmadia1_b",
+  "mep_cpfmadia1u_b",
+  "mep_cpfmulila1_h",
+  "mep_cpfmuliua1_h",
+  "mep_cpfmulia1_b",
+  "mep_cpfmulia1u_b",
+  "mep_cpamadila1_h",
+  "mep_cpamadiua1_h",
+  "mep_cpamadia1_b",
+  "mep_cpamadia1u_b",
+  "mep_cpamulila1_h",
+  "mep_cpamuliua1_h",
+  "mep_cpamulia1_b",
+  "mep_cpamulia1u_b",
+  "mep_cpfmadila1s1_h",
+  "mep_cpfmadiua1s1_h",
+  "mep_cpfmadia1s1_b",
+  "mep_cpfmadia1s1u_b",
+  "mep_cpfmulila1s1_h",
+  "mep_cpfmuliua1s1_h",
+  "mep_cpfmulia1s1_b",
+  "mep_cpfmulia1s1u_b",
+  "mep_cpfmadila1s0_h",
+  "mep_cpfmadiua1s0_h",
+  "mep_cpfmadia1s0_b",
+  "mep_cpfmadia1s0u_b",
+  "mep_cpfmulila1s0_h",
+  "mep_cpfmuliua1s0_h",
+  "mep_cpfmulia1s0_b",
+  "mep_cpfmulia1s0u_b",
+  "mep_cpacswp",
+  "mep_cpaccpa1",
+  "mep_cpacsuma1",
+  "mep_c1nop",
+  "mep_cpfacla0s1_h",
+  "mep_cpfacua0s1_h",
+  "mep_cpfaca0s1_b",
+  "mep_cpfaca0s1u_b",
+  "mep_cpfsftbla0s1_h",
+  "mep_cpfsftbua0s1_h",
+  "mep_cpfsftba0s1_b",
+  "mep_cpfsftba0s1u_b",
+  "mep_cpfacla0s0_h",
+  "mep_cpfacua0s0_h",
+  "mep_cpfaca0s0_b",
+  "mep_cpfaca0s0u_b",
+  "mep_cpfsftbla0s0_h",
+  "mep_cpfsftbua0s0_h",
+  "mep_cpfsftba0s0_b",
+  "mep_cpfsftba0s0u_b",
+  "mep_cpsllia0",
+  "mep_cpsraia0",
+  "mep_cpsrlia0",
+  "mep_cpslla0",
+  "mep_cpsraa0",
+  "mep_cpsrla0",
+  "mep_cpaccpa0",
+  "mep_cpacsuma0",
+  "mep_cpmovhla0_w",
+  "mep_cpmovhua0_w",
+  "mep_cppackla0_w",
+  "mep_cppackua0_w",
+  "mep_cppackla0_h",
+  "mep_cppackua0_h",
+  "mep_cppacka0_b",
+  "mep_cppacka0u_b",
+  "mep_cpmovlla0_w",
+  "mep_cpmovlua0_w",
+  "mep_cpmovula0_w",
+  "mep_cpmovuua0_w",
+  "mep_cpmovla0_h",
+  "mep_cpmovua0_h",
+  "mep_cpmova0_b",
+  "mep_cpsetla0_w",
+  "mep_cpsetua0_w",
+  "mep_cpseta0_h",
+  "mep_cpsadla0_h",
+  "mep_cpsadua0_h",
+  "mep_cpsada0_b",
+  "mep_cpsada0u_b",
+  "mep_cpabsla0_h",
+  "mep_cpabsua0_h",
+  "mep_cpabsa0_b",
+  "mep_cpabsa0u_b",
+  "mep_cpsubacla0_h",
+  "mep_cpsubacua0_h",
+  "mep_cpsubaca0_b",
+  "mep_cpsubaca0u_b",
+  "mep_cpsubla0_h",
+  "mep_cpsubua0_h",
+  "mep_cpsuba0_b",
+  "mep_cpsuba0u_b",
+  "mep_cpaddacla0_h",
+  "mep_cpaddacua0_h",
+  "mep_cpaddaca0_b",
+  "mep_cpaddaca0u_b",
+  "mep_cpaddla0_h",
+  "mep_cpaddua0_h",
+  "mep_cpadda0_b",
+  "mep_cpadda0u_b",
+  "mep_c0nop",
+  "mep_cpsmsbslla1_w",
+  "mep_cpsmsbslua1_w",
+  "mep_cpsmsbslla1_h",
+  "mep_cpsmsbslua1_h",
+  "mep_cpsmadslla1_w",
+  "mep_cpsmadslua1_w",
+  "mep_cpsmadslla1_h",
+  "mep_cpsmadslua1_h",
+  "mep_cpmulslla1_w",
+  "mep_cpmulslua1_w",
+  "mep_cpmulslla1_h",
+  "mep_cpmulslua1_h",
+  "mep_cpsmsbla1_w",
+  "mep_cpsmsbua1_w",
+  "mep_cpsmsbla1_h",
+  "mep_cpsmsbua1_h",
+  "mep_cpsmadla1_w",
+  "mep_cpsmadua1_w",
+  "mep_cpsmadla1_h",
+  "mep_cpsmadua1_h",
+  "mep_cpmsbla1_w",
+  "mep_cpmsbua1_w",
+  "mep_cpmsbla1u_w",
+  "mep_cpmsbua1u_w",
+  "mep_cpmsbla1_h",
+  "mep_cpmsbua1_h",
+  "mep_cpmadla1_w",
+  "mep_cpmadua1_w",
+  "mep_cpmadla1u_w",
+  "mep_cpmadua1u_w",
+  "mep_cpmadla1_h",
+  "mep_cpmadua1_h",
+  "mep_cpmada1_b",
+  "mep_cpmada1u_b",
+  "mep_cpmulla1_w",
+  "mep_cpmulua1_w",
+  "mep_cpmulla1u_w",
+  "mep_cpmulua1u_w",
+  "mep_cpmulla1_h",
+  "mep_cpmulua1_h",
+  "mep_cpmula1_b",
+  "mep_cpmula1u_b",
+  "mep_cpssda1_b",
+  "mep_cpssda1u_b",
+  "mep_cpssqa1_b",
+  "mep_cpssqa1u_b",
+  "mep_cpsllia1",
+  "mep_cpsraia1",
+  "mep_cpsrlia1",
+  "mep_cpslla1",
+  "mep_cpsraa1",
+  "mep_cpsrla1",
+  "mep_cpmovhla1_w",
+  "mep_cpmovhua1_w",
+  "mep_cppackla1_w",
+  "mep_cppackua1_w",
+  "mep_cppackla1_h",
+  "mep_cppackua1_h",
+  "mep_cppacka1_b",
+  "mep_cppacka1u_b",
+  "mep_cpmovlla1_w",
+  "mep_cpmovlua1_w",
+  "mep_cpmovula1_w",
+  "mep_cpmovuua1_w",
+  "mep_cpmovla1_h",
+  "mep_cpmovua1_h",
+  "mep_cpmova1_b",
+  "mep_cpsetla1_w",
+  "mep_cpsetua1_w",
+  "mep_cpseta1_h",
+  "mep_cpsadla1_h",
+  "mep_cpsadua1_h",
+  "mep_cpsada1_b",
+  "mep_cpsada1u_b",
+  "mep_cpabsla1_h",
+  "mep_cpabsua1_h",
+  "mep_cpabsa1_b",
+  "mep_cpabsa1u_b",
+  "mep_cpsubacla1_h",
+  "mep_cpsubacua1_h",
+  "mep_cpsubaca1_b",
+  "mep_cpsubaca1u_b",
+  "mep_cpsubla1_h",
+  "mep_cpsubua1_h",
+  "mep_cpsuba1_b",
+  "mep_cpsuba1u_b",
+  "mep_cpaddacla1_h",
+  "mep_cpaddacua1_h",
+  "mep_cpaddaca1_b",
+  "mep_cpaddaca1u_b",
+  "mep_cpaddla1_h",
+  "mep_cpaddua1_h",
+  "mep_cpadda1_b",
+  "mep_cpadda1u_b",
+  "mep_cdmovi",
+  "mep_cdmoviu",
+  "mep_cpmovi_w",
+  "mep_cpmoviu_w",
+  "mep_cpmovi_h",
+  "mep_cpmoviu_h",
+  "mep_cpmovi_b",
+  "mep_cdclipi3",
+  "mep_cdclipiu3",
+  "mep_cpclipi3_w",
+  "mep_cpclipiu3_w",
+  "mep_cpslai3_w",
+  "mep_cpslai3_h",
+  "mep_cdslli3",
+  "mep_cpslli3_w",
+  "mep_cpslli3_h",
+  "mep_cpslli3_b",
+  "mep_cdsrai3",
+  "mep_cpsrai3_w",
+  "mep_cpsrai3_h",
+  "mep_cpsrai3_b",
+  "mep_cdsrli3",
+  "mep_cpsrli3_w",
+  "mep_cpsrli3_h",
+  "mep_cpsrli3_b",
+  "mep_cpocmpge_w",
+  "mep_cpocmpgeu_w",
+  "mep_cpocmpge_h",
+  "mep_cpocmpge_b",
+  "mep_cpocmpgeu_b",
+  "mep_cpocmpgt_w",
+  "mep_cpocmpgtu_w",
+  "mep_cpocmpgt_h",
+  "mep_cpocmpgt_b",
+  "mep_cpocmpgtu_b",
+  "mep_cpocmpne_w",
+  "mep_cpocmpne_h",
+  "mep_cpocmpne_b",
+  "mep_cpocmpeq_w",
+  "mep_cpocmpeq_h",
+  "mep_cpocmpeq_b",
+  "mep_cpacmpge_w",
+  "mep_cpacmpgeu_w",
+  "mep_cpacmpge_h",
+  "mep_cpacmpge_b",
+  "mep_cpacmpgeu_b",
+  "mep_cpacmpgt_w",
+  "mep_cpacmpgtu_w",
+  "mep_cpacmpgt_h",
+  "mep_cpacmpgt_b",
+  "mep_cpacmpgtu_b",
+  "mep_cpacmpne_w",
+  "mep_cpacmpne_h",
+  "mep_cpacmpne_b",
+  "mep_cpacmpeq_w",
+  "mep_cpacmpeq_h",
+  "mep_cpacmpeq_b",
+  "mep_cpcmpge_w",
+  "mep_cpcmpgeu_w",
+  "mep_cpcmpge_h",
+  "mep_cpcmpge_b",
+  "mep_cpcmpgeu_b",
+  "mep_cpcmpgt_w",
+  "mep_cpcmpgtu_w",
+  "mep_cpcmpgt_h",
+  "mep_cpcmpgt_b",
+  "mep_cpcmpgtu_b",
+  "mep_cpcmpne_w",
+  "mep_cpcmpne_h",
+  "mep_cpcmpne_b",
+  "mep_cpcmpeq_w",
+  "mep_cpcmpeq_h",
+  "mep_cpcmpeq_b",
+  "mep_cpcmpeqz_b",
+  "mep_cdcastw",
+  "mep_cdcastuw",
+  "mep_cpcasth_w",
+  "mep_cpcastuh_w",
+  "mep_cpcastb_w",
+  "mep_cpcastub_w",
+  "mep_cpcastb_h",
+  "mep_cpcastub_h",
+  "mep_cpextl_h",
+  "mep_cpextlu_h",
+  "mep_cpextl_b",
+  "mep_cpextlu_b",
+  "mep_cpextu_h",
+  "mep_cpextuu_h",
+  "mep_cpextu_b",
+  "mep_cpextuu_b",
+  "mep_cpbcast_w",
+  "mep_cpbcast_h",
+  "mep_cpbcast_b",
+  "mep_cpccadd_b",
+  "mep_cphadd_w",
+  "mep_cphadd_h",
+  "mep_cphadd_b",
+  "mep_cphaddu_b",
+  "mep_cpnorm_w",
+  "mep_cpnorm_h",
+  "mep_cpldz_w",
+  "mep_cpldz_h",
+  "mep_cpabsz_w",
+  "mep_cpabsz_h",
+  "mep_cpabsz_b",
+  "mep_cpmovtocc",
+  "mep_cpmovtocsar1",
+  "mep_cpmovtocsar0",
+  "mep_cpmovfrcc",
+  "mep_cpmovfrcsar1",
+  "mep_cpmovfrcsar0",
+  "mep_cpmin3_w",
+  "mep_cpminu3_w",
+  "mep_cpmin3_h",
+  "mep_cpmin3_b",
+  "mep_cpminu3_b",
+  "mep_cpmax3_w",
+  "mep_cpmaxu3_w",
+  "mep_cpmax3_h",
+  "mep_cpmax3_b",
+  "mep_cpmaxu3_b",
+  "mep_cpabs3_h",
+  "mep_cpabs3_b",
+  "mep_cpabsu3_b",
+  "mep_cpaddsr3_w",
+  "mep_cpaddsr3_h",
+  "mep_cpaddsr3_b",
+  "mep_cpaddsru3_b",
+  "mep_cpave3_w",
+  "mep_cpave3_h",
+  "mep_cpave3_b",
+  "mep_cpaveu3_b",
+  "mep_cpextlsub3_b",
+  "mep_cpextlsubu3_b",
+  "mep_cpextusub3_b",
+  "mep_cpextusubu3_b",
+  "mep_cpextladd3_b",
+  "mep_cpextladdu3_b",
+  "mep_cpextuadd3_b",
+  "mep_cpextuaddu3_b",
+  "mep_cpssub3_w",
+  "mep_cpssub3_h",
+  "mep_cpsadd3_w",
+  "mep_cpsadd3_h",
+  "mep_cpsla3_w",
+  "mep_cpsla3_h",
+  "mep_cdsll3",
+  "mep_cpssll3_w",
+  "mep_cpsll3_w",
+  "mep_cpssll3_h",
+  "mep_cpsll3_h",
+  "mep_cpssll3_b",
+  "mep_cpsll3_b",
+  "mep_cdsra3",
+  "mep_cpssra3_w",
+  "mep_cpsra3_w",
+  "mep_cpssra3_h",
+  "mep_cpsra3_h",
+  "mep_cpssra3_b",
+  "mep_cpsra3_b",
+  "mep_cdsrl3",
+  "mep_cpssrl3_w",
+  "mep_cpsrl3_w",
+  "mep_cpssrl3_h",
+  "mep_cpsrl3_h",
+  "mep_cpssrl3_b",
+  "mep_cpsrl3_b",
+  "mep_cppack_h",
+  "mep_cppack_b",
+  "mep_cppacku_b",
+  "mep_cpunpackl_w",
+  "mep_cpunpackl_h",
+  "mep_cpunpackl_b",
+  "mep_cpunpacku_w",
+  "mep_cpunpacku_h",
+  "mep_cpunpacku_b",
+  "mep_cpfsftbs1",
+  "mep_cpfsftbs0",
+  "mep_cpfsftbi",
+  "mep_cpsel",
+  "mep_cpxor3",
+  "mep_cpnor3",
+  "mep_cpor3",
+  "mep_cpand3",
+  "mep_cdsub3",
+  "mep_cpsub3_w",
+  "mep_cpsub3_h",
+  "mep_cpsub3_b",
+  "mep_cdadd3",
+  "mep_cpadd3_w",
+  "mep_cpadd3_h",
+  "mep_cpadd3_b",
+  "mep_bsrv",
+  "mep_jsrv",
+  "mep_synccp",
+  "mep_bcpaf",
+  "mep_bcpat",
+  "mep_bcpne",
+  "mep_bcpeq",
+  "mep_lmcpm1",
+  "mep_smcpm1",
+  "mep_lwcpm1",
+  "mep_swcpm1",
+  "mep_lhcpm1",
+  "mep_shcpm1",
+  "mep_lbcpm1",
+  "mep_sbcpm1",
+  "mep_lmcpm0",
+  "mep_smcpm0",
+  "mep_lwcpm0",
+  "mep_swcpm0",
+  "mep_lhcpm0",
+  "mep_shcpm0",
+  "mep_lbcpm0",
+  "mep_sbcpm0",
+  "mep_lmcpa",
+  "mep_smcpa",
+  "mep_lwcpa",
+  "mep_swcpa",
+  "mep_lhcpa",
+  "mep_shcpa",
+  "mep_lbcpa",
+  "mep_sbcpa",
+  "mep_lmcp16",
+  "mep_smcp16",
+  "mep_lwcp16",
+  "mep_swcp16",
+  "mep_lmcpi",
+  "mep_smcpi",
+  "mep_lwcpi",
+  "mep_swcpi",
+  "mep_lmcp",
+  "mep_smcp",
+  "mep_lwcp",
+  "mep_swcp",
+  "mep_ssubu",
+  "mep_saddu",
+  "mep_ssub",
+  "mep_sadd",
+  "mep_clipu",
+  "mep_clip",
+  "mep_maxu",
+  "mep_minu",
+  "mep_max",
+  "mep_min",
+  "mep_ave",
+  "mep_abs",
+  "mep_ldz",
+  "mep_dbreak",
+  "mep_dret",
+  "mep_divu",
+  "mep_div",
+  "mep_maddru",
+  "mep_maddr",
+  "mep_maddu",
+  "mep_madd",
+  "mep_mulru",
+  "mep_mulr",
+  "mep_mulu",
+  "mep_mul",
+  "mep_cache",
+  "mep_tas",
+  "mep_btstm",
+  "mep_bnotm",
+  "mep_bclrm",
+  "mep_bsetm",
+  "mep_ldcb",
+  "mep_stcb",
+  "mep_syncm",
+  "mep_break",
+  "mep_swi",
+  "mep_sleep",
+  "mep_halt",
+  "mep_reti",
+  "mep_ei",
+  "mep_di",
+  "mep_ldc",
+  "mep_ldc_lo",
+  "mep_ldc_hi",
+  "mep_ldc_lp",
+  "mep_stc",
+  "mep_stc_lo",
+  "mep_stc_hi",
+  "mep_stc_lp",
+  "mep_erepeat",
+  "mep_repeat",
+  "mep_ret",
+  "mep_jsr",
+  "mep_jmp24",
+  "mep_jmp",
+  "mep_bsr24",
+  "mep_bsr12",
+  "mep_bne",
+  "mep_beq",
+  "mep_bgei",
+  "mep_blti",
+  "mep_bnei",
+  "mep_beqi",
+  "mep_bnez",
+  "mep_beqz",
+  "mep_bra",
+  "mep_fsft",
+  "mep_sll3",
+  "mep_slli",
+  "mep_srli",
+  "mep_srai",
+  "mep_sll",
+  "mep_srl",
+  "mep_sra",
+  "mep_xor3",
+  "mep_and3",
+  "mep_or3",
+  "mep_nor",
+  "mep_xor",
+  "mep_and",
+  "mep_or",
+  "mep_sltu3x",
+  "mep_slt3x",
+  "mep_add3x",
+  "mep_sl2ad3",
+  "mep_sl1ad3",
+  "mep_sltu3i",
+  "mep_slt3i",
+  "mep_sltu3",
+  "mep_slt3",
+  "mep_neg",
+  "mep_sbvck3",
+  "mep_sub",
+  "mep_advck3",
+  "mep_add3i",
+  "mep_add",
+  "mep_add3",
+  "mep_movh",
+  "mep_movu16",
+  "mep_movu24",
+  "mep_movi16",
+  "mep_movi8",
+  "mep_mov",
+  "mep_ssarb",
+  "mep_extuh",
+  "mep_extub",
+  "mep_exth",
+  "mep_extb",
+  "mep_lw24",
+  "mep_sw24",
+  "mep_lhu16",
+  "mep_lbu16",
+  "mep_lw16",
+  "mep_lh16",
+  "mep_lb16",
+  "mep_sw16",
+  "mep_sh16",
+  "mep_sb16",
+  "mep_lhu_tp",
+  "mep_lbu_tp",
+  "mep_lw_tp",
+  "mep_lh_tp",
+  "mep_lb_tp",
+  "mep_sw_tp",
+  "mep_sh_tp",
+  "mep_sb_tp",
+  "mep_lw_sp",
+  "mep_sw_sp",
+  "mep_lhu",
+  "mep_lbu",
+  "mep_lw",
+  "mep_lh",
+  "mep_lb",
+  "mep_sw",
+  "mep_sh",
+  "mep_sb",
+  "mep_dsp1",
+  "mep_dsp0",
+  "mep_dsp",
+  "mep_uci",
+  "mep_lhucpm1",
+  "mep_lbucpm1",
+  "mep_lhucpm0",
+  "mep_lbucpm0",
+  "mep_lhucpa",
+  "mep_lbucpa",
+  "mep_lhucp",
+  "mep_lhcp",
+  "mep_shcp",
+  "mep_lbucp",
+  "mep_lbcp",
+  "mep_sbcp",
+  "mep_casw3",
+  "mep_cash3",
+  "mep_casb3",
+  "mep_prefd",
+  "mep_pref",
+  "mep_ldcb_r",
+  "mep_stcb_r",
+  "mep_fcmpleis",
+  "mep_fcmpleis",
+  "mep_fcmplis",
+  "mep_fcmplis",
+  "mep_fcmpes",
+  "mep_fcmpes",
+  "mep_fcmpules",
+  "mep_fcmpules",
+  "mep_fcmpuls",
+  "mep_fcmpuls",
+  "mep_fcmpues",
+  "mep_fcmpues",
+  "mep_fcmpus",
+  "mep_fcmpus",
+  "mep_fcvtsw",
+  "mep_fcvtsw",
+  "mep_ftruncws",
+  "mep_ftruncws",
+  "mep_fnegs",
+  "mep_fnegs",
+  "mep_fabss",
+  "mep_fabss",
+  "mep_fsqrts",
+  "mep_fsqrts",
+  "mep_fdivs",
+  "mep_fdivs",
+  "mep_fmuls",
+  "mep_fmuls",
+  "mep_fsubs",
+  "mep_fsubs",
+  "mep_fadds",
+  "mep_fadds",
+  "mep_fmovs",
+  "mep_cextb",
+  "mep_cexth",
+  "mep_cextub",
+  "mep_cextuh",
+  "mep_xmula0",
+  "mep_cmula0",
+  "mep_cneg",
+  "mep_cneg",
+  "mep_cmovh2",
+  "mep_cmovh1",
+  "mep_cmovc2",
+  "mep_cmovc1",
+  "mep_cmov2",
+  "mep_cmov1",
+  "mep_cmovi",
+  "mep_cmovi",
+  "mep_cpmov",
+  "mep_cmov",
+  "mep_csrai3",
+  "mep_csrai3",
+  "mep_csrai",
+  "mep_csrai",
+  "mep_csra3",
+  "mep_csra3",
+  "mep_csra",
+  "mep_csra",
+  "mep_csrli3",
+  "mep_csrli3",
+  "mep_csrli",
+  "mep_csrli",
+  "mep_csrl3",
+  "mep_csrl3",
+  "mep_csrl",
+  "mep_csrl",
+  "mep_cslli3",
+  "mep_cslli3",
+  "mep_cslli",
+  "mep_cslli",
+  "mep_csll3",
+  "mep_csll3",
+  "mep_csll",
+  "mep_csll",
+  "mep_cxori3",
+  "mep_cxori3",
+  "mep_cxori",
+  "mep_cxori",
+  "mep_cxor3",
+  "mep_cxor3",
+  "mep_cxor",
+  "mep_cxor",
+  "mep_cnori3",
+  "mep_cnori3",
+  "mep_cnori",
+  "mep_cnori",
+  "mep_cnor3",
+  "mep_cnor3",
+  "mep_cnor",
+  "mep_cnor",
+  "mep_cori3",
+  "mep_cori3",
+  "mep_cori",
+  "mep_cori",
+  "mep_cor3",
+  "mep_cor3",
+  "mep_cor",
+  "mep_cor",
+  "mep_candi3",
+  "mep_candi3",
+  "mep_candi",
+  "mep_candi",
+  "mep_cand3",
+  "mep_cand3",
+  "mep_cand",
+  "mep_cand",
+  "mep_csubi3",
+  "mep_csubi3",
+  "mep_csubi",
+  "mep_csubi",
+  "mep_csub3",
+  "mep_csub3",
+  "mep_csub",
+  "mep_csub",
+  "mep_caddi3",
+  "mep_caddi3",
+  "mep_caddi",
+  "mep_caddi",
+  "mep_cadd3",
+  "mep_cadd3",
+  "mep_cadd",
+  "mep_cadd"
+};
+
+const struct cgen_insn cgen_insns[] = {
+  { 103,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 103,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 104,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 104,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 105,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 105,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 106,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 106,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbslua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 107,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 107,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 108,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 108,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 109,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 109,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 110,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 110,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadslua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 111,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 111,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 112,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 112,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 113,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 113,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 114,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 114,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulslua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 115,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 115,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 116,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 116,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 117,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 117,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 118,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 118,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmsbua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 119,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 119,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 120,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 120,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 121,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 121,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 122,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 122,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsmadua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 123,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 123,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 124,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 124,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 125,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbla1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 125,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbla1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 126,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbua1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 126,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbua1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 127,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 127,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 128,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 128,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmsbua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 129,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 129,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 130,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 130,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 131,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadla1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 131,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadla1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 132,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadua1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 132,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadua1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 133,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 133,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 134,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmadua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 134,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmadua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 135,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmada1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 135,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmada1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 136,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmada1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 136,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmada1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 137,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 137,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 138,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 138,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 139,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulla1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 139,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulla1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 140,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulua1u_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 140,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulua1u_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 141,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 141,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 142,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmulua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 142,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmulua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 143,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmula1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 143,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmula1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 144,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmula1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 144,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmula1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 145,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssda1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 145,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssda1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 146,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssda1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 146,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssda1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 147,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssqa1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 147,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssqa1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 148,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssqa1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 148,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssqa1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 0,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadila1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 1,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadiua1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 2,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 3,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1u_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 4,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulila1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 5,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmuliua1_h_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 6,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 7,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1u_b_P1,
+    4,
+    0,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 8,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadila1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 9,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadiua1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 10,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadia1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 11,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamadia1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 12,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamulila1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 13,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamuliua1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 14,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamulia1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 15,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpamulia1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 16,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadila1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 17,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadiua1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 18,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 19,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 20,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulila1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 21,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmuliua1s1_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 22,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s1_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 23,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s1u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 24,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadila1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 25,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadiua1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 26,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s0_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 27,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmadia1s0u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 28,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulila1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 29,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmuliua1s0_h_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 30,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s0_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 31,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfmulia1s0u_b_P1,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 149,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsllia1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 149,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsllia1_1_p1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 150,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsraia1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 150,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraia1_1_p1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 151,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrlia1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 151,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrlia1_1_p1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 152,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslla1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 152,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslla1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 153,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsraa1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 153,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraa1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 154,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrla1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 154,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrla1_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 32,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacswp_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 33,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaccpa1_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 34,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacsuma1_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 155,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovhla1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 155,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhla1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 156,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovhua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 156,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 157,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackla1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 157,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 158,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 158,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 159,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackla1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 159,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 160,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppackua1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 160,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 161,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppacka1_b_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 161,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka1_b_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 162,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppacka1u_b_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 } },
+    4 },
+  { 162,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka1u_b_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 } },
+    4 },
+  { 163,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovlla1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 163,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlla1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 164,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovlua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 164,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 165,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovula1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 165,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovula1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 166,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovuua1_w_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 166,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovuua1_w_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 167,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovla1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 167,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovla1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 168,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovua1_h_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 168,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovua1_h_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 169,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmova1_b_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 169,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmova1_b_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 170,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsetla1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 170,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetla1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 171,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsetua1_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 171,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetua1_w_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 172,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpseta1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 172,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpseta1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 173,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 173,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 174,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 174,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 175,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsada1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 175,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 176,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsada1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 176,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 177,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 177,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 178,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 178,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 179,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsa1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 179,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 180,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsa1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 180,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 181,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubacla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 181,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 182,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubacua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 182,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 183,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubaca1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 183,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 184,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubaca1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 184,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 185,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 185,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 186,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsubua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 186,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 187,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsuba1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 187,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 188,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsuba1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 188,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 189,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddacla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 189,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 190,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddacua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 190,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 191,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddaca1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 191,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 192,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddaca1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 192,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 193,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddla1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 193,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddla1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 194,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddua1_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 194,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddua1_h_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 195,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadda1_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 195,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda1_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 196,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadda1u_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 196,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda1u_b_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 203,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovi_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 203,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovi_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 35,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_c1nop_P1,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 197,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdmovi_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 197,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdmovi_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 198,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdmoviu_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 198,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdmoviu_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 199,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovi_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 199,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovi_w_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 200,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmoviu_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 200,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmoviu_w_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 201,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovi_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 201,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovi_h_P0_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 204,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdclipi3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 204,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdclipi3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 205,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdclipiu3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 205,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdclipiu3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 206,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpclipi3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 206,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpclipi3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 207,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpclipiu3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 207,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpclipiu3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 208,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslai3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 208,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslai3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 209,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslai3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 209,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslai3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 210,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdslli3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 210,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdslli3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 211,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslli3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 211,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslli3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 212,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslli3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 212,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslli3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 213,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpslli3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 213,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslli3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 214,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsrai3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 214,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsrai3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 215,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrai3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 215,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrai3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 216,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrai3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 216,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrai3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 217,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrai3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 217,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrai3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 218,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsrli3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 218,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsrli3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 219,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrli3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 219,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrli3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 220,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrli3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 220,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrli3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 221,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrli3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 221,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrli3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 341,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsla3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 341,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsla3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 342,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsla3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 342,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsla3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 343,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsll3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 343,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsll3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 344,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssll3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 344,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssll3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 345,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsll3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 345,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsll3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 346,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssll3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 346,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssll3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 347,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsll3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 347,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsll3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 348,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssll3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 348,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssll3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 349,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsll3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 349,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsll3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 350,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsra3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 350,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsra3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 351,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssra3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 351,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssra3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 352,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsra3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 352,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsra3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 353,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssra3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 353,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssra3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 354,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsra3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 354,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsra3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 355,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssra3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 355,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssra3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 356,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsra3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 356,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsra3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 357,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsrl3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 357,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsrl3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 358,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssrl3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 358,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssrl3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 359,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrl3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 359,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrl3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 360,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssrl3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 360,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssrl3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 361,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrl3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 361,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrl3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 362,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssrl3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 362,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssrl3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 363,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsrl3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 363,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrl3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 308,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmin3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 308,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmin3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 309,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpminu3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 309,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpminu3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 310,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmin3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 310,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmin3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 311,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmin3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 311,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmin3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 312,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpminu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 312,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpminu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 313,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmax3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 313,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmax3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 314,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 314,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 315,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmax3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 315,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmax3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 316,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmax3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 316,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmax3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 317,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 317,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmaxu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 364,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppack_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 364,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppack_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 365,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppack_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 365,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppack_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 366,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cppacku_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 366,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacku_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 377,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpxor3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 377,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpxor3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 378,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpnor3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 378,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpnor3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 379,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpor3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 379,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpor3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 380,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpand3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 380,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpand3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_VECTOR, 1 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 }, { 0, 0, cgen_regnum_operand_type_VECTOR, 0 } },
+    4 },
+  { 318,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabs3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 318,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabs3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 319,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabs3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 319,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabs3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 320,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 320,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 321,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 321,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 322,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 322,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 323,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 323,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsr3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 324,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaddsru3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 324,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddsru3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 325,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpave3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 325,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpave3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 326,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpave3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 326,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpave3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 327,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpave3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 327,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpave3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 328,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpaveu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 328,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaveu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 329,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlsub3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 329,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlsub3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 330,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlsubu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 330,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlsubu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 331,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextusub3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 331,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextusub3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 332,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextusubu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 332,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextusubu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 333,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextladd3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 333,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextladd3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 334,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextladdu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 334,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextladdu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 335,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuadd3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 335,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuadd3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 336,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuaddu3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 336,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuaddu3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 337,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssub3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 337,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssub3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 338,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpssub3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 338,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpssub3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 339,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadd3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 339,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadd3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 340,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsadd3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 340,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadd3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 381,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdsub3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 381,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdsub3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 382,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsub3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 382,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsub3_w_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 383,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsub3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 383,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsub3_h_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 384,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsub3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 384,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsub3_b_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 385,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdadd3_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 385,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdadd3_P0_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 222,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpge_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 222,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpge_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 223,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 223,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 224,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpge_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 224,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpge_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 225,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpge_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 225,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpge_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 226,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 226,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgeu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 227,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 227,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 228,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 228,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 229,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 229,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 230,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 230,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgt_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 231,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 231,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpgtu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 232,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpne_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 232,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpne_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 233,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpne_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 233,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpne_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 234,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpne_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 234,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpne_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 235,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 235,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 236,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 236,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 237,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 237,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpocmpeq_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 238,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpge_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 238,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpge_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 239,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 239,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 240,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpge_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 240,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpge_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 241,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpge_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 241,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpge_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 242,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 242,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgeu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 243,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 243,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 244,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 244,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 245,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 245,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 246,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 246,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgt_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 247,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 247,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpgtu_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 248,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpne_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 248,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpne_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 249,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpne_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 249,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpne_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 250,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpne_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 250,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpne_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 251,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 251,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_w_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 252,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 252,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_h_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 253,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 253,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacmpeq_b_P0_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 375,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpfsftbi_C3,
+    4,
+    1,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 375,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbi_P0_P1,
+    4,
+    1,
+    { 0, 1, 2, 3 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 36,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacla0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 37,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacua0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 38,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s1_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 39,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s1u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 40,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbla0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 41,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbua0s1_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 42,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s1_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 43,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s1u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 44,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacla0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 45,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfacua0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 46,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 47,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfaca0s0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 48,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbla0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 49,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbua0s0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 50,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 51,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftba0s0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 52,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsllia0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 53,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraia0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 54,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrlia0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 55,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpslla0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 56,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsraa0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 57,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsrla0_P0S,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 58,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaccpa0_P0S,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 59,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpacsuma0_P0S,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 60,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhla0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 61,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovhua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 62,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 63,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 64,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackla0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 65,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppackua0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 66,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka0_b_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 67,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cppacka0u_b_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 } },
+    4 },
+  { 68,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlla0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 69,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovlua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 70,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovula0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 71,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovuua0_w_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 } },
+    4 },
+  { 72,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovla0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 73,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovua0_h_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 } },
+    4 },
+  { 74,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmova0_b_P0S,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 75,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetla0_w_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 76,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsetua0_w_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 77,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpseta0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 78,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 79,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsadua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 80,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 81,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsada0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 82,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 83,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 84,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 85,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsa0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 86,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 87,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubacua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 88,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 89,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubaca0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 90,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 91,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsubua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 92,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 93,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsuba0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 94,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 95,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddacua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 96,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 97,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddaca0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 98,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddla0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 99,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpaddua0_h_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 100,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda0_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 101,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadda0u_b_P0S,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 254,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpge_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 254,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpge_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 255,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 255,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 256,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpge_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 256,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpge_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 257,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpge_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 257,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpge_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 258,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 258,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgeu_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 259,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 259,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 260,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 260,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 261,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 261,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 262,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 262,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgt_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 263,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 263,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpgtu_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 264,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpne_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 264,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpne_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 265,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpne_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 265,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpne_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 266,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpne_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 266,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpne_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 267,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_w_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 267,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_w_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 268,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_h_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 268,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_h_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 269,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 269,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeq_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 270,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcmpeqz_b_C3,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 270,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcmpeqz_b_P0S_P1,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 302,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovtocc_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 302,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovtocc_P0S_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 303,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar1_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 303,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar1_P0S_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 304,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar0_C3,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 304,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovtocsar0_P0S_P1,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 305,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovfrcc_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 305,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovfrcc_P0S_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 306,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar1_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 306,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar1_P0S_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 307,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar0_C3,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 307,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmovfrcsar0_P0S_P1,
+    1,
+    1,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 } },
+    4 },
+  { 271,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdcastw_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 271,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdcastw_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 272,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cdcastuw_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 272,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cdcastuw_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 273,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcasth_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 273,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcasth_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 274,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastuh_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 274,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastuh_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 275,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastb_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 275,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastb_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 276,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastub_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 276,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastub_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 277,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastb_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 277,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastb_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 278,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpcastub_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 278,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpcastub_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 279,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextl_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 279,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextl_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 280,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 280,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlu_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 281,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextl_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 281,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextl_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 282,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextlu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 282,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextlu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 283,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 283,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextu_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 284,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 284,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuu_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 285,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 285,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 286,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpextuu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 286,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpextuu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 287,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpbcast_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 287,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpbcast_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 288,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpbcast_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 288,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpbcast_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 289,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpbcast_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 289,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpbcast_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 290,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpccadd_b_C3,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 290,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpccadd_b_P0S_P1,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 } },
+    4 },
+  { 291,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphadd_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 291,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphadd_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 292,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphadd_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 292,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphadd_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 293,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphadd_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 293,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphadd_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 294,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cphaddu_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 294,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cphaddu_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 295,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpnorm_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 295,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpnorm_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 296,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpnorm_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 296,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpnorm_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 297,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpldz_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 297,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpldz_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 298,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpldz_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 298,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpldz_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 299,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsz_w_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 299,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsz_w_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 300,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsz_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 300,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsz_h_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 301,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpabsz_b_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 301,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpabsz_b_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 640,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmov_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 640,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpmov_P0S_P1,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 373,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpfsftbs1_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 373,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbs1_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 374,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpfsftbs0_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 374,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpfsftbs0_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 376,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpsel_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 376,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpsel_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 367,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpackl_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 367,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpackl_w_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 368,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpackl_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 368,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpackl_h_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 369,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpackl_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 369,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpackl_b_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 370,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpacku_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 370,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpacku_w_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2USI, 1 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 }, { 0, 0, cgen_regnum_operand_type_V2USI, 0 } },
+    4 },
+  { 371,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpacku_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 371,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpacku_h_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 }, { 0, 0, cgen_regnum_operand_type_V4UHI, 0 } },
+    4 },
+  { 372,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpunpacku_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 372,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpunpacku_b_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8UQI, 1 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 }, { 0, 0, cgen_regnum_operand_type_V8UQI, 0 } },
+    4 },
+  { 386,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadd3_w_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 386,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadd3_w_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V2SI, 1 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 }, { 0, 0, cgen_regnum_operand_type_V2SI, 0 } },
+    4 },
+  { 387,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadd3_h_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 387,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadd3_h_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V4HI, 1 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 }, { 0, 0, cgen_regnum_operand_type_V4HI, 0 } },
+    4 },
+  { 388,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpadd3_b_C3,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 388,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cpadd3_b_P0S_P1,
+    3,
+    1,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_V8QI, 1 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 }, { 0, 0, cgen_regnum_operand_type_V8QI, 0 } },
+    4 },
+  { 102,
+    ISA_EXT1|ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_c0nop_P0_P0S,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    4 },
+  { 202,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cpmoviu_h_C3,
+    2,
+    1,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_V4UHI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 632,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovh_rn_crm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 632,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovh_rn_crm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 633,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovh_crn_rm,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 633,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovh_crn_rm_p0,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 634,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovc_rn_ccrm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 } },
+    4 },
+  { 634,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovc_rn_ccrm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 } },
+    4 },
+  { 635,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmovc_ccrn_rm,
+    2,
+    0,
+    { 0, 1 },
+    { { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 635,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmovc_ccrn_rm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 64, 80, cgen_regnum_operand_type_DEFAULT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 636,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmov_rn_crm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 636,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmov_rn_crm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 } },
+    4 },
+  { 637,
+    ISA_EXT1,
+    GROUP_NORMAL,
+    CODE_FOR_cgen_intrinsic_cmov_crn_rm,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 637,
+    ISA_EXT1,
+    GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cmov_crn_rm_p0,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 389,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsrv,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 390,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jsrv,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 391,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_synccp,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 392,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpaf,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 393,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpat,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 394,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpne,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 395,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bcpeq,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 396,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 397,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 398,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 399,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 400,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 401,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 402,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 403,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcpm1,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 404,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 405,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 406,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 407,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 408,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 409,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 410,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 411,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcpm0,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 412,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 413,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 414,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 415,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 416,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 417,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 418,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 419,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcpa,
+    3,
+    0,
+    { 1, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 420,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 421,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 422,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 423,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcp16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 424,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcpi,
+    2,
+    0,
+    { 0, 1, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 425,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcpi,
+    2,
+    0,
+    { 1, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 426,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcpi,
+    2,
+    0,
+    { 0, 1, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 427,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcpi,
+    2,
+    0,
+    { 1, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 } },
+    2 },
+  { 428,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lmcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 429,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_smcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_CP_DATA_BUS_INT, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 430,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lwcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 431,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swcp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 432,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ssubu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 433,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_saddu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 434,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ssub,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 435,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sadd,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 436,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_clipu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 437,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_clip,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 438,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maxu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 439,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_minu,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 440,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_max,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 441,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_min,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 442,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ave,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 443,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_abs,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 444,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldz,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 445,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dbreak,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 446,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dret,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 447,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_divu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 448,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_div,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 449,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maddru,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 450,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maddr,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 451,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_maddu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 452,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_madd,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 453,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mulru,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 454,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mulr,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 455,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mulu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 456,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mul,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 457,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cache,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 458,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_tas,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 459,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_btstm,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 460,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bnotm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 461,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bclrm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 462,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsetm,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_POINTER, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 463,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldcb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 464,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stcb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 465,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_syncm,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 466,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_break,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 467,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_swi,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 468,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sleep,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 469,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_halt,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 470,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_reti,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 471,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ei,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 472,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_di,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 473,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 32, 16, cgen_regnum_operand_type_DEFAULT, 0 } },
+    2 },
+  { 474,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc_lo,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 475,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc_hi,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 476,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldc_lp,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 477,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc,
+    2,
+    0,
+    { 1, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 32, 16, cgen_regnum_operand_type_DEFAULT, 0 } },
+    2 },
+  { 478,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc_lo,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 479,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc_hi,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 480,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stc_lp,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 481,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_erepeat,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 482,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_repeat,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 483,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ret,
+    0,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_DEFAULT, 0} },
+    2 },
+  { 484,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jsr,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 485,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jmp24,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 486,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_jmp,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 488,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsr12,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 487,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bsr24,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 489,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bne,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 490,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_beq,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 491,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bgei,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 492,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_blti,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 493,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bnei,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 494,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_beqi,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    4 },
+  { 495,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bnez,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 496,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_beqz,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 497,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_bra,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LABEL, 0 } },
+    2 },
+  { 498,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_fsft,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 499,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sll3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 500,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slli,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 501,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_srli,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 502,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_srai,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 503,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sll,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 504,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_srl,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 505,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sra,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 506,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_xor3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 507,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_and3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 508,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_or3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 509,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_nor,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 510,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_xor,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 511,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_and,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 512,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_or,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 513,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sltu3x,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 514,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slt3x,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 515,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add3x,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 516,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sl2ad3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 517,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sl1ad3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 518,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sltu3i,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 519,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slt3i,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 520,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sltu3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 521,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_slt3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 522,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_neg,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 523,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbvck3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 524,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sub,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 525,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_advck3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 526,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add3i,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 527,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 528,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_add3,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 529,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movh,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 530,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movu16,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 531,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movu24,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 533,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movi8,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 532,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_movi16,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 534,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_mov,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 535,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ssarb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 536,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_extuh,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 537,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_extub,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 538,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_exth,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 539,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_extb,
+    1,
+    0,
+    { 0, 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 } },
+    2 },
+  { 540,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw24,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 541,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw24,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 542,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhu16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 543,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbu16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 544,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 545,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lh16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 546,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lb16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 547,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 548,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sh16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 549,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sb16,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 550,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhu_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 551,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbu_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 552,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 553,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lh_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 554,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lb_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 555,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 556,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sh_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 557,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sb_tp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 558,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw_sp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 559,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw_sp,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    2 },
+  { 560,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 561,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbu,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 562,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lw,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 563,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lh,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 564,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 565,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sw,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 566,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sh,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 567,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sb,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 568,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dsp1,
+    2,
+    0,
+    { 0, 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 569,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dsp0,
+    1,
+    0,
+    { 0 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 570,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_dsp,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 571,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_uci,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 572,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 573,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucpm1,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 574,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 575,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucpm0,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 576,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 577,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucpa,
+    3,
+    0,
+    { 0, 1, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 578,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhucp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 579,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lhcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 580,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_shcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 581,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbucp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 582,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_lbcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 583,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_sbcp,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_SI, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 584,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_casw3,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 585,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_cash3,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 586,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_casb3,
+    3,
+    0,
+    { 0, 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 } },
+    4 },
+  { 587,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_prefd,
+    3,
+    0,
+    { 0, 1, 2 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    4 },
+  { 588,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_pref,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 589,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_ldcb_r,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 1 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 },
+  { 590,
+    ISA_MEP|ISA_EXT1,
+    GROUP_NORMAL|GROUP_VLIW,
+    CODE_FOR_cgen_intrinsic_stcb_r,
+    2,
+    0,
+    { 0, 1 },
+    { { 0, 0, cgen_regnum_operand_type_LONG, 0 }, { 0, 0, cgen_regnum_operand_type_POINTER, 0 } },
+    2 }
+};
+#endif
diff --git a/gcc/config/mep/mep-ivc2.cpu b/gcc/config/mep/mep-ivc2.cpu
new file mode 100644
index 000000000..1e0025185
--- /dev/null
+++ b/gcc/config/mep/mep-ivc2.cpu
@@ -0,0 +1,9776 @@
+; Toshiba MeP IVC2 Coprocessor description.  -*- scheme -*-
+; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+; Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;------------------------------------------------------------------------------
+; MeP-Integrator will redefine the isa pmacros below to allow the bit widths
+; specified below for each ME_MODULE using this coprocessor.
+; IVC2 uses the isas as follows:
+;   C3    32
+;   P0S   16
+;   P0    48
+;   P1    64
+;------------------------------------------------------------------------------
+; begin-isas
+(define-pmacro ivc2-core-isa () (ISA ext_core1))
+(define-pmacro ivc2-16-isa   () (ISA ext_cop1_16))
+(define-pmacro ivc2-32-isa   () (ISA ext_cop1_32))
+(define-pmacro ivc2-48-isa   () (ISA ext_cop1_48))
+(define-pmacro ivc2-64-isa   () (ISA ext_cop1_64))
+(define-pmacro all-ivc2-isas () (ISA ext_core1,ext_cop1_16,ext_cop1_32,ext_cop1_48,ext_cop1_64))
+(define-pmacro ivc2-p0s-isa  () (ISA ext_cop1_16))
+(define-pmacro ivc2-p0-isa  () (ISA ext_cop1_48))
+(define-pmacro ivc2-p0s-p0-isa  () (ISA ext_cop1_16,ext_cop1_48))
+(define-pmacro ivc2-p1-isa  () (ISA ext_cop1_64))
+(define-pmacro ivc2-p0s-p1-isa  () (ISA ext_cop1_16,ext_cop1_64))
+(define-pmacro ivc2-p0-p1-isa  () (ISA ext_cop1_48,ext_cop1_64))
+(define-pmacro ivc2-p0s-p0-p1-isa  () (ISA ext_cop1_16,ext_cop1_48,ext_cop1_64))
+(define-pmacro ivc2-c3-isa  () (ISA ext_cop1_32))
+; end-isas
+
+; register definitions
+; ---------------------
+; NOTE: This exists solely for the purpose of providing the proper register names for this coprocessor.
+; GDB will use the hardware table generated from this declaration. The operands use h-cr
+; from mep-core.cpu so that SID's semantic trace will be consistent between
+; the core and the coprocessor but use parse/print handlers which reference the hardware table
+; generated from this declarations
+(define-hardware
+  (name h-cr-ivc2)
+  (comment "64-bit coprocessor registers for ivc2 coprocessor")
+  (attrs VIRTUAL all-mep-core-isas (CDATA CP_DATA_BUS_INT))
+  (type register DI (64))
+  (set (index newval) (c-call VOID "h_cr64_set" index newval))
+  (get (index) (c-call DI "h_cr64_get" index))
+  (indices keyword "$c" (.map -reg-pair (.iota 8)))
+)
+
+; NOTE: This exists solely for the purpose of providing the proper register names for this coprocessor.
+; GDB will use the hardware table generated from this declaration. The operands use h-ccr
+; from mep-core.cpu so that SID's semantic trace will be consistent between
+; the core and the coprocessor but use parse/print handlers which reference the hardware table
+; generated from this declarations
+(define-hardware
+  (name h-ccr-ivc2)
+  (comment "Coprocessor control registers for ivc2 coprocessor")
+  (attrs VIRTUAL all-mep-isas)
+  (type register SI (32))
+  (set (index newval) (c-call VOID "h_ccr_set" index newval))
+  (get (index) (c-call SI "h_ccr_get" index))
+  (indices keyword ""
+	(.splice
+
+	 ($csar0 0)
+	 ($cc 1)
+	 ($cofr0 4)
+	 ($cofr1 5)
+	 ($cofa0 6)
+	 ($cofa1 7)
+
+	 ($csar1 15)
+
+	 ($acc0_0 16)
+	 ($acc0_1 17)
+	 ($acc0_2 18)
+	 ($acc0_3 19)
+	 ($acc0_4 20)
+	 ($acc0_5 21)
+	 ($acc0_6 22)
+	 ($acc0_7 23)
+
+	 ($acc1_0 24)
+	 ($acc1_1 25)
+	 ($acc1_2 26)
+	 ($acc1_3 27)
+	 ($acc1_4 28)
+	 ($acc1_5 29)
+	 ($acc1_6 30)
+	 ($acc1_7 31)
+	 (.unsplice (.map -ccr-reg-pair (.iota 32)))
+        )
+  )
+)
+
+(define-attr
+  (type bitset)
+  (for insn)
+  (name SLOTS)
+  (comment "slots for which this opcode is valid - c3, p0s, p0, p1")
+  (values CORE C3 P0S P0 P1)
+  (default CORE)
+  )
+
+;-----------------------------------------------------------------------------
+; macros for standard opcodes for each slot type
+
+; C3
+(dnf f-ivc2-2u4 "sub opcode field" (all-mep-isas) 4 2)
+(dnf f-ivc2-3u4 "sub opcode field" (all-mep-isas) 4 3)
+(dnf f-ivc2-8u4 "sub opcode field" (all-mep-isas) 4 8)
+(df  f-ivc2-8s4 "sub opcode field" (all-mep-isas) 4 8 INT #f #f)
+(dnf f-ivc2-1u6 "sub opcode field" (all-mep-isas) 6 1)
+(dnf f-ivc2-2u6 "sub opcode field" (all-mep-isas) 6 2)
+(dnf f-ivc2-3u6 "sub opcode field" (all-mep-isas) 6 3)
+(dnf f-ivc2-6u6 "sub opcode field" (all-mep-isas) 6 6)
+(dnf f-ivc2-5u7 "sub opcode field" (all-mep-isas) 7 5)
+(dnf f-ivc2-4u8 "sub opcode field" (all-mep-isas) 8 4)
+(dnf f-ivc2-3u9 "sub opcode field" (all-mep-isas) 9 3)
+(dnf f-ivc2-5u16 "sub opcode field" (all-mep-isas) 16 5)
+(dnf f-ivc2-5u21 "sub opcode field" (all-mep-isas) 21 5)
+(dnf f-ivc2-5u26 "sub opcode field" (all-mep-isas) 26 5)
+(dnf f-ivc2-1u31 "sub opcode field" (all-mep-isas) 31 1)
+
+(dnf f-ivc2-4u16 "sub opcode field" (all-mep-isas) 16 4)
+(dnf f-ivc2-4u20 "sub opcode field" (all-mep-isas) 20 4)
+(dnf f-ivc2-4u24 "sub opcode field" (all-mep-isas) 24 4)
+(dnf f-ivc2-4u28 "sub opcode field" (all-mep-isas) 28 4)
+
+; P0S/P0/P1
+(dnf f-ivc2-2u0 "sub opcode field" (all-mep-isas) 0 2)
+(dnf f-ivc2-3u0 "sub opcode field" (all-mep-isas) 0 3)
+(dnf f-ivc2-4u0 "sub opcode field" (all-mep-isas) 0 4)
+(dnf f-ivc2-5u0 "sub opcode field" (all-mep-isas) 0 5)
+(dnf f-ivc2-8u0 "sub opcode field" (all-mep-isas) 0 8)
+(df  f-ivc2-8s0 "sub opcode field" (all-mep-isas) 0 8 INT #f #f)
+(dnf f-ivc2-6u2 "sub opcode field" (all-mep-isas) 2 6)
+(dnf f-ivc2-5u3 "sub opcode field" (all-mep-isas) 3 5)
+(dnf f-ivc2-4u4 "sub opcode field" (all-mep-isas) 4 4)
+(dnf f-ivc2-3u5 "sub opcode field" (all-mep-isas) 5 3)
+(dnf f-ivc2-5u8 "sub opcode field" (all-mep-isas) 8 5)
+(dnf f-ivc2-4u10 "sub opcode field" (all-mep-isas) 10 4)
+(dnf f-ivc2-3u12 "sub opcode field" (all-mep-isas) 12 3)
+(dnf f-ivc2-5u13 "sub opcode field" (all-mep-isas) 13 5)
+(dnf f-ivc2-2u18 "sub opcode field" (all-mep-isas) 18 2)
+(dnf f-ivc2-5u18 "sub opcode field" (all-mep-isas) 18 5)
+(dnf f-ivc2-8u20 "sub opcode field" (all-mep-isas) 20 8)
+(df  f-ivc2-8s20 "sub opcode field" (all-mep-isas) 20 8 INT #f #f)
+(dnf f-ivc2-5u23 "sub opcode field" (all-mep-isas) 23 5)
+(dnf f-ivc2-2u23 "sub opcode field" (all-mep-isas) 23 2)
+(dnf f-ivc2-3u25 "sub opcode field" (all-mep-isas) 25 3)
+
+(dnmf f-ivc2-imm16p0 "16-bit immediate in P0/P1" (all-mep-isas) UINT
+      (f-ivc2-8u0 f-ivc2-8u20)
+      (sequence () ; insert
+		(set (ifield f-ivc2-8u0) (and (srl (ifield f-ivc2-imm16p0) 8) #xff))
+		(set (ifield f-ivc2-8u20) (and (ifield f-ivc2-imm16p0) #xff))
+		)
+      (sequence () ; extract
+		(set (ifield f-ivc2-imm16p0) (or (ifield f-ivc2-8u20)
+						 (sll (ifield f-ivc2-8u0) 8)))
+		)
+      )
+
+(dnmf f-ivc2-simm16p0 "16-bit immediate in P0/P1" (all-mep-isas) INT
+      (f-ivc2-8u0 f-ivc2-8u20)
+      (sequence () ; insert
+		(set (ifield f-ivc2-8u0) (and (srl (ifield f-ivc2-simm16p0) 8) #xff))
+		(set (ifield f-ivc2-8u20) (and (ifield f-ivc2-simm16p0) #xff))
+		)
+      (sequence () ; extract
+		(set (ifield f-ivc2-simm16p0) (or (ifield f-ivc2-8u20)
+						  (sll (ifield f-ivc2-8u0) 8)))
+		)
+      )
+
+(dnop ivc2_csar0  "ivc2_csar0" (all-ivc2-isas) h-ccr-ivc2 0)
+(dnop ivc2_cc     "ivc2_cc"    (all-ivc2-isas) h-ccr-ivc2 1)
+(dnop ivc2_cofr0  "ivc2_cofr0" (all-ivc2-isas) h-ccr-ivc2 4)
+(dnop ivc2_cofr1  "ivc2_cofr1" (all-ivc2-isas) h-ccr-ivc2 5)
+(dnop ivc2_cofa0  "ivc2_cofa0" (all-ivc2-isas) h-ccr-ivc2 6)
+(dnop ivc2_cofa1  "ivc2_cofa1" (all-ivc2-isas) h-ccr-ivc2 7)
+
+(dnop ivc2_csar1  "ivc2_csar1" (all-ivc2-isas) h-ccr-ivc2 15)
+
+(dnop ivc2_acc0_0      "acc0_0"     (all-ivc2-isas) h-ccr-ivc2 16)
+(dnop ivc2_acc0_1      "acc0_1"     (all-ivc2-isas) h-ccr-ivc2 17)
+(dnop ivc2_acc0_2      "acc0_2"     (all-ivc2-isas) h-ccr-ivc2 18)
+(dnop ivc2_acc0_3      "acc0_3"     (all-ivc2-isas) h-ccr-ivc2 19)
+(dnop ivc2_acc0_4      "acc0_4"     (all-ivc2-isas) h-ccr-ivc2 20)
+(dnop ivc2_acc0_5      "acc0_5"     (all-ivc2-isas) h-ccr-ivc2 21)
+(dnop ivc2_acc0_6      "acc0_6"     (all-ivc2-isas) h-ccr-ivc2 22)
+(dnop ivc2_acc0_7      "acc0_7"     (all-ivc2-isas) h-ccr-ivc2 23)
+
+(dnop ivc2_acc1_0      "acc1_0"     (all-ivc2-isas) h-ccr-ivc2 24)
+(dnop ivc2_acc1_1      "acc1_1"     (all-ivc2-isas) h-ccr-ivc2 25)
+(dnop ivc2_acc1_2      "acc1_2"     (all-ivc2-isas) h-ccr-ivc2 26)
+(dnop ivc2_acc1_3      "acc1_3"     (all-ivc2-isas) h-ccr-ivc2 27)
+(dnop ivc2_acc1_4      "acc1_4"     (all-ivc2-isas) h-ccr-ivc2 28)
+(dnop ivc2_acc1_5      "acc1_5"     (all-ivc2-isas) h-ccr-ivc2 29)
+(dnop ivc2_acc1_6      "acc1_6"     (all-ivc2-isas) h-ccr-ivc2 30)
+(dnop ivc2_acc1_7      "acc1_7"     (all-ivc2-isas) h-ccr-ivc2 31)
+
+(dnop croc "$CRo C3" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u7)
+(dnop crqc "$CRq C3" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u21)
+(dnop crpc "$CRp C3" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u26)
+
+(dnop ivc-x-6-1 "filler" (all-mep-isas) h-uint f-ivc2-1u6)
+(dnop ivc-x-6-2 "filler" (all-mep-isas) h-uint f-ivc2-2u6)
+(dnop ivc-x-6-3 "filler" (all-mep-isas) h-uint f-ivc2-3u6)
+
+
+(dnop imm3p4 "Imm3p4" (all-mep-isas) h-uint f-ivc2-3u4)
+(dnop imm3p9 "Imm3p9" (all-mep-isas) h-uint f-ivc2-3u9)
+(dnop imm4p8 "Imm4p8" (all-mep-isas) h-uint f-ivc2-4u8)
+(dnop imm5p7 "Imm5p7" (all-mep-isas) h-uint f-ivc2-5u7)
+(dnop imm6p6 "Imm6p6" (all-mep-isas) h-uint f-ivc2-6u6)
+(dnop imm8p4 "Imm8p4" (all-mep-isas) h-uint f-ivc2-8u4)
+(dnop simm8p4 "sImm8p4" (all-mep-isas) h-sint f-ivc2-8s4)
+
+(dnop imm3p5  "Imm3p5"  (all-mep-isas) h-uint f-ivc2-3u5)
+(dnop imm3p12 "Imm3p12" (all-mep-isas) h-uint f-ivc2-3u12)
+(dnop imm4p4  "Imm4p4"  (all-mep-isas) h-uint f-ivc2-4u4)
+(dnop imm4p10 "Imm4p10" (all-mep-isas) h-uint f-ivc2-4u10)
+(dnop imm5p8  "Imm5p8"  (all-mep-isas) h-uint f-ivc2-5u8)
+(dnop imm5p3  "Imm5p3"  (all-mep-isas) h-uint f-ivc2-5u3)
+(dnop imm6p2  "Imm6p2"  (all-mep-isas) h-uint f-ivc2-6u2)
+(dnop imm5p23 "Imm5p23" (all-mep-isas) h-uint f-ivc2-5u23)
+(dnop imm3p25 "Imm3p25" (all-mep-isas) h-uint f-ivc2-3u25)
+(dnop imm8p0  "Imm8p0"  (all-mep-isas) h-uint f-ivc2-8u0)
+(dnop simm8p0 "sImm8p0" (all-mep-isas) h-sint f-ivc2-8s0)
+(dnop simm8p20 "sImm8p20" (all-mep-isas) h-sint f-ivc2-8s20)
+(dnop imm8p20 "Imm8p20" (all-mep-isas) h-uint f-ivc2-8u20)
+
+(dnop crop "$CRo Pn" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u23)
+(dnop crqp "$CRq Pn" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u13)
+(dnop crpp "$CRp Pn" (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-5u18)
+
+(dnop ivc-x-0-2 "filler" (all-mep-isas) h-uint f-ivc2-2u0)
+(dnop ivc-x-0-3 "filler" (all-mep-isas) h-uint f-ivc2-3u0)
+(dnop ivc-x-0-4 "filler" (all-mep-isas) h-uint f-ivc2-4u0)
+(dnop ivc-x-0-5 "filler" (all-mep-isas) h-uint f-ivc2-5u0)
+
+(dpop imm16p0 "Imm16p0" (all-mep-isas) h-uint f-ivc2-imm16p0 "unsigned16_range")
+(dpop simm16p0 "sImm16p0" (all-mep-isas) h-sint f-ivc2-simm16p0 "signed16_range")
+
+
+(df f-ivc2-ccrn-c3hi "ccrn hi  2u28" (all-mep-isas) 28 2 UINT #f #f)
+(df f-ivc2-ccrn-c3lo "ccrn lo  4u4"  (all-mep-isas)  4 4 UINT #f #f)
+
+(df f-ivc2-crn     "ivc2 crn"      (all-mep-isas)  0 4 UINT #f #f)
+(df f-ivc2-crm     "ivc2 crm"      (all-mep-isas)  4 4 UINT #f #f)
+(df f-ivc2-ccrn-h1 "ccrx hi 1u20"  (all-mep-isas) 20 1 UINT #f #f)
+(df f-ivc2-ccrn-h2 "ccrx hi 2u20"  (all-mep-isas) 20 2 UINT #f #f)
+(df f-ivc2-ccrn-lo "ccrx lo 4u0"   (all-mep-isas)  0 4 UINT #f #f)
+(df f-ivc2-cmov1   "ivc2 cmov op1" (all-mep-isas) 8 12 UINT #f #f)
+(df f-ivc2-cmov2   "ivc2 cmov op2" (all-mep-isas) 22 6 UINT #f #f)
+(df f-ivc2-cmov3   "ivc2 cmov op2" (all-mep-isas) 28 4 UINT #f #f)
+
+(define-multi-ifield
+  (name f-ivc2-ccrn-c3)
+  (comment "Coprocessor register number field")
+  (attrs all-mep-isas)
+  (mode UINT)
+  (subfields f-ivc2-ccrn-c3hi f-ivc2-ccrn-c3lo)
+  (insert (sequence ()
+		    (set (ifield f-ivc2-ccrn-c3hi)  (and (srl (ifield f-ivc2-ccrn-c3) 4) #x3))
+		    (set (ifield f-ivc2-ccrn-c3lo)  (and (ifield f-ivc2-ccrn-c3) #xf))))
+  (extract (set (ifield f-ivc2-ccrn-c3)
+		(or (sll (ifield f-ivc2-ccrn-c3hi) 4)
+		    (ifield f-ivc2-ccrn-c3lo))))
+  )
+
+(define-multi-ifield
+  (name f-ivc2-ccrn)
+  (comment "Coprocessor control register number field")
+  (attrs all-mep-isas)
+  (mode UINT)
+  (subfields f-ivc2-ccrn-h2 f-ivc2-ccrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-ivc2-ccrn-h2)  (and (srl (ifield f-ivc2-ccrn) 4) #x3))
+		    (set (ifield f-ivc2-ccrn-lo)  (and (ifield f-ivc2-ccrn) #xf))))
+  (extract (set (ifield f-ivc2-ccrn)
+		(or (sll (ifield f-ivc2-ccrn-h2) 4)
+		    (ifield f-ivc2-ccrn-lo))))
+  )
+
+(define-multi-ifield
+  (name f-ivc2-crnx)
+  (comment "Coprocessor register number field")
+  (attrs all-mep-isas)
+  (mode UINT)
+  (subfields f-ivc2-ccrn-h1 f-ivc2-ccrn-lo)
+  (insert (sequence ()
+		    (set (ifield f-ivc2-ccrn-h1)  (and (srl (ifield f-ivc2-crnx) 4) #x1))
+		    (set (ifield f-ivc2-ccrn-lo)  (and (ifield f-ivc2-crnx) #xf))))
+  (extract (set (ifield f-ivc2-crnx)
+		(or (sll (ifield f-ivc2-ccrn-h1) 4)
+		    (ifield f-ivc2-ccrn-lo))))
+  )
+
+(dnop ivc2rm   "reg Rm"                  (all-mep-isas) h-gpr  f-ivc2-crm)
+(dnop ivc2crn  "copro Rn (0-31, 64-bit"  (all-mep-isas (CDATA CP_DATA_BUS_INT)) h-cr64 f-ivc2-crnx)
+(dnop ivc2ccrn "copro control reg CCRn"  (all-mep-isas (CDATA REGNUM)) h-ccr-ivc2  f-ivc2-ccrn)
+(dnop ivc2c3ccrn "copro control reg CCRn"  (all-mep-isas (CDATA REGNUM)) h-ccr-ivc2  f-ivc2-ccrn-c3)
+
+; [--][--] [--][--] [--][--] [--]
+; 0----+-- --1----+ ----2--- -+--
+; 01234567 89012345 67890123 4567
+
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N000   cmov =crn,rm
+(dni cmov-crn-rm
+     "cmov CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmov1"))
+     "cmov $crnx64,$rm"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 0))
+     (set crnx64 (or (zext DI rm) (and DI crnx64 #xffffffff00000000)))
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N001   cmov =rm,crn
+(dni cmov-rn-crm
+     "cmov Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmov2"))
+     "cmov $rm,$crnx64"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 1))
+     (set rm crnx64)
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N000   cmov =crn,rm
+(dni cmovc-ccrn-rm
+     "cmovc CCRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovc1"))
+     "cmovc $ivc2c3ccrn,$rm"
+     (+ MAJ_15 ivc2c3ccrn rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-30 1) (f-31 0))
+     (set ivc2c3ccrn rm)
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N001   cmov =rm,crn
+(dni cmovc-rn-ccrm
+     "cmovc Rm,CCRn"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovc2"))
+     "cmovc $rm,$ivc2c3ccrn"
+     (+ MAJ_15 ivc2c3ccrn rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 0) (f-ivc2-4u24 0) (f-30 1) (f-31 1))
+     (set rm ivc2c3ccrn)
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N000   cmov =crn,rm
+(dni cmovh-crn-rm
+     "cmovh CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovh1"))
+     "cmovh $crnx64,$rm"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 1) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 0))
+     (set crnx64 (or (sll (zext DI rm) 32) (and DI crnx64 #xffffffff)))
+     ()
+)
+
+; 1111 nnnn mmmm 0111 1111 0000 0000 N001   cmov =rm,crn
+(dni cmovh-rn-crm
+     "cmovh Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cmovh2"))
+     "cmovh $rm,$crnx64"
+     (+ MAJ_15 crnx64 rm (f-sub4 #x7)
+	(f-ivc2-4u16 #xF) (f-ivc2-4u20 1) (f-ivc2-4u24 0) (f-29 0) (f-30 0) (f-31 1))
+     (set rm (srl crnx64 32))
+     ()
+)
+
+; nnnnmmmm 11110000 0000N000 0000	cmov =crn,rm
+(dni cmov-crn-rm-p0
+     "cmov CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmov1"))
+     "cmov $ivc2crn,$ivc2rm"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf00) (f-21 0) (f-ivc2-cmov2 #x00) (f-ivc2-cmov3 0))
+     (set ivc2crn ivc2rm)
+     ()
+)
+
+; nnnnmmmm 11110000 0000N001 0000	cmov =rm,crn
+(dni cmov-rn-crm-p0
+     "cmov Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmov2"))
+     "cmov $ivc2rm,$ivc2crn"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf00) (f-21 0) (f-ivc2-cmov2 #x10) (f-ivc2-cmov3 0))
+     (set ivc2rm ivc2crn)
+     ()
+)
+
+; nnnnmmmm 11110000 0000NN10 0000	cmovc =ccrn,rm
+(dni cmovc-ccrn-rm-p0
+     "cmovc CCRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovc1"))
+     "cmovc $ivc2ccrn,$ivc2rm"
+     (+ ivc2ccrn ivc2rm (f-ivc2-cmov1 #xf00) (f-ivc2-cmov2 #x20) (f-ivc2-cmov3 0))
+     (set ivc2ccrn ivc2rm)
+     ()
+)
+
+; nnnnmmmm 11110000 0000NN11 0000	cmovc =rm,ccrn
+(dni cmovc-rn-ccrm-p0
+     "cmovc Rm,CCRn"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovc2"))
+     "cmovc $ivc2rm,$ivc2ccrn"
+     (+ ivc2ccrn ivc2rm (f-ivc2-cmov1 #xf00) (f-ivc2-cmov2 #x30) (f-ivc2-cmov3 0))
+     (set ivc2rm ivc2ccrn)
+     ()
+)
+
+; nnnnmmmm 11110001 0000N000 0000	cmovh =crn,rm		
+(dni cmovh-crn-rm-p0
+     "cmovh CRn,Rm"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovh1"))
+     "cmovh $ivc2crn,$ivc2rm"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf10) (f-21 0) (f-ivc2-cmov2 #x00) (f-ivc2-cmov3 0))
+     (set ivc2crn (or (sll (zext DI ivc2rm) 32) (and DI ivc2crn #xffffffff)))
+     ()
+)
+
+; nnnnmmmm 11110001 0000N001 0000	cmovh =rm,crn
+(dni cmovh-rn-crm-p0
+     "cmovh Rm,CRn"
+     (OPTIONAL_CP_INSN ivc2-p0-isa (SLOTS P0) (INTRINSIC "cmovh2"))
+     "cmovh $ivc2rm,$ivc2crn"
+     (+ ivc2crn ivc2rm (f-ivc2-cmov1 #xf10) (f-21 0) (f-ivc2-cmov2 #x10) (f-ivc2-cmov3 0))
+     (set ivc2rm (srl ivc2crn 32))
+     ()
+)
+
+
+; 1111 000 ooooo 0111 00000 qqqqq ppppp 0   cpadd3.b =croc,crqc,crpc (c3_1)
+(dni cpadd3_b_C3 "cpadd3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpadd3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpadd3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00000 qqqqq ppppp 0   cpadd3.h =croc,crqc,crpc (c3_1)
+(dni cpadd3_h_C3 "cpadd3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadd3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpadd3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpadd3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00000 qqqqq ppppp 0   cpadd3.w =croc,crqc,crpc (c3_1)
+(dni cpadd3_w_C3 "cpadd3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadd3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpadd3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpadd3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00000 qqqqq ppppp 0   cdadd3 =croc,crqc,crpc (c3_1)
+(dni cdadd3_C3 "cdadd3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdadd3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdadd3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdadd3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00000 qqqqq ppppp 0   cpsub3.b =croc,crqc,crpc (c3_1)
+(dni cpsub3_b_C3 "cpsub3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsub3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsub3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00000 qqqqq ppppp 0   cpsub3.h =croc,crqc,crpc (c3_1)
+(dni cpsub3_h_C3 "cpsub3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsub3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsub3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsub3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00000 qqqqq ppppp 0   cpsub3.w =croc,crqc,crpc (c3_1)
+(dni cpsub3_w_C3 "cpsub3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsub3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsub3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsub3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 00000 qqqqq ppppp 0   cdsub3 =croc,crqc,crpc (c3_1)
+(dni cdsub3_C3 "cdsub3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsub3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsub3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsub3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00001 qqqqq ppppp 0   cpand3 =croc,crqc,crpc (c3_1)
+(dni cpand3_C3 "cpand3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpand3") (CPTYPE VECT) (CRET FIRST))
+  "cpand3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpand3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00001 qqqqq ppppp 0   cpor3 =croc,crqc,crpc (c3_1)
+(dni cpor3_C3 "cpor3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpor3") (CPTYPE VECT) (CRET FIRST))
+  "cpor3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpor3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00001 qqqqq ppppp 0   cpnor3 =croc,crqc,crpc (c3_1)
+(dni cpnor3_C3 "cpnor3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpnor3") (CPTYPE VECT) (CRET FIRST))
+  "cpnor3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpnor3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00001 qqqqq ppppp 0   cpxor3 =croc,crqc,crpc (c3_1)
+(dni cpxor3_C3 "cpxor3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpxor3") (CPTYPE VECT) (CRET FIRST))
+  "cpxor3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpxor3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00001 qqqqq ppppp 0   cpsel =croc,crqc,crpc (c3_1)
+(dni cpsel_C3 "cpsel $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsel") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpsel $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsel" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 iii ooooo 0111 11101 qqqqq ppppp 0   cpfsftbi =croc,crqc,crpc,imm3p4 (c3_1)
+(dni cpfsftbi_C3 "cpfsftbi $croc,$crqc,$crpc,imm3p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpfsftbi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpfsftbi $croc,$crqc,$crpc,$imm3p4"
+  (+ MAJ_15 imm3p4 croc (f-sub4 7)
+	(f-ivc2-5u16 #x1d) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpfsftbi" pc crqc crpc imm3p4)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00001 qqqqq ppppp 0   cpfsftbs0 =croc,crqc,crpc (c3_1)
+(dni cpfsftbs0_C3 "cpfsftbs0 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpfsftbs0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs0 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpfsftbs0" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 00001 qqqqq ppppp 0   cpfsftbs1 =croc,crqc,crpc (c3_1)
+(dni cpfsftbs1_C3 "cpfsftbs1 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpfsftbs1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs1 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpfsftbs1" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00010 qqqqq ppppp 0   cpunpacku.b =croc,crqc,crpc (c3_1)
+(dni cpunpacku_b_C3 "cpunpacku.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpunpacku.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpacku_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00010 qqqqq ppppp 0   cpunpacku.h =croc,crqc,crpc (c3_1)
+(dni cpunpacku_h_C3 "cpunpacku.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpacku_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpunpacku.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpacku_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00010 qqqqq ppppp 0   cpunpacku.w =croc,crqc,crpc (c3_1)
+(dni cpunpacku_w_C3 "cpunpacku.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpacku_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpunpacku.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpacku_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00010 qqqqq ppppp 0   cpunpackl.b =croc,crqc,crpc (c3_1)
+(dni cpunpackl_b_C3 "cpunpackl.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpackl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpunpackl.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpackl_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00010 qqqqq ppppp 0   cpunpackl.h =croc,crqc,crpc (c3_1)
+(dni cpunpackl_h_C3 "cpunpackl.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpackl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpunpackl.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpackl_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00010 qqqqq ppppp 0   cpunpackl.w =croc,crqc,crpc (c3_1)
+(dni cpunpackl_w_C3 "cpunpackl.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpunpackl_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpunpackl.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpunpackl_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00011 qqqqq ppppp 0   cppacku.b =croc,crqc,crpc (c3_1)
+(dni cppacku_b_C3 "cppacku.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cppacku.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x3) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppacku_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00011 qqqqq ppppp 0   cppack.b =croc,crqc,crpc (c3_1)
+(dni cppack_b_C3 "cppack.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppack_b") (CPTYPE V8QI) (CRET FIRST))
+  "cppack.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x3) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppack_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 00011 qqqqq ppppp 0   cppack.h =croc,crqc,crpc (c3_1)
+(dni cppack_h_C3 "cppack.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppack_h") (CPTYPE V4HI) (CRET FIRST))
+  "cppack.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x3) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppack_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 qqqqq ppppp 0   cpsrl3.b =croc,crqc,crpc (c3_1)
+(dni cpsrl3_b_C3 "cpsrl3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrl3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsrl3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00100 qqqqq ppppp 0   cpssrl3.b =croc,crqc,crpc (c3_1)
+(dni cpssrl3_b_C3 "cpssrl3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssrl3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssrl3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00100 qqqqq ppppp 0   cpsrl3.h =croc,crqc,crpc (c3_1)
+(dni cpsrl3_h_C3 "cpsrl3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrl3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsrl3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00100 qqqqq ppppp 0   cpssrl3.h =croc,crqc,crpc (c3_1)
+(dni cpssrl3_h_C3 "cpssrl3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssrl3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssrl3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00100 qqqqq ppppp 0   cpsrl3.w =croc,crqc,crpc (c3_1)
+(dni cpsrl3_w_C3 "cpsrl3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrl3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsrl3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00100 qqqqq ppppp 0   cpssrl3.w =croc,crqc,crpc (c3_1)
+(dni cpssrl3_w_C3 "cpssrl3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssrl3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssrl3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00100 qqqqq ppppp 0   cdsrl3 =croc,crqc,crpc (c3_1)
+(dni cdsrl3_C3 "cdsrl3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsrl3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrl3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsrl3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00101 qqqqq ppppp 0   cpsra3.b =croc,crqc,crpc (c3_1)
+(dni cpsra3_b_C3 "cpsra3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsra3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsra3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00101 qqqqq ppppp 0   cpssra3.b =croc,crqc,crpc (c3_1)
+(dni cpssra3_b_C3 "cpssra3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssra3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssra3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00101 qqqqq ppppp 0   cpsra3.h =croc,crqc,crpc (c3_1)
+(dni cpsra3_h_C3 "cpsra3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsra3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsra3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00101 qqqqq ppppp 0   cpssra3.h =croc,crqc,crpc (c3_1)
+(dni cpssra3_h_C3 "cpssra3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssra3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssra3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00101 qqqqq ppppp 0   cpsra3.w =croc,crqc,crpc (c3_1)
+(dni cpsra3_w_C3 "cpsra3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsra3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsra3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00101 qqqqq ppppp 0   cpssra3.w =croc,crqc,crpc (c3_1)
+(dni cpssra3_w_C3 "cpssra3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssra3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssra3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00101 qqqqq ppppp 0   cdsra3 =croc,crqc,crpc (c3_1)
+(dni cdsra3_C3 "cdsra3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsra3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsra3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x5) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsra3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00110 qqqqq ppppp 0   cpsll3.b =croc,crqc,crpc (c3_1)
+(dni cpsll3_b_C3 "cpsll3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsll3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsll3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 00110 qqqqq ppppp 0   cpssll3.b =croc,crqc,crpc (c3_1)
+(dni cpssll3_b_C3 "cpssll3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssll3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssll3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00110 qqqqq ppppp 0   cpsll3.h =croc,crqc,crpc (c3_1)
+(dni cpsll3_h_C3 "cpsll3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsll3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsll3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 00110 qqqqq ppppp 0   cpssll3.h =croc,crqc,crpc (c3_1)
+(dni cpssll3_h_C3 "cpssll3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssll3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssll3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00110 qqqqq ppppp 0   cpsll3.w =croc,crqc,crpc (c3_1)
+(dni cpsll3_w_C3 "cpsll3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsll3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsll3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 00110 qqqqq ppppp 0   cpssll3.w =croc,crqc,crpc (c3_1)
+(dni cpssll3_w_C3 "cpssll3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssll3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpssll3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 00110 qqqqq ppppp 0   cdsll3 =croc,crqc,crpc (c3_1)
+(dni cdsll3_C3 "cdsll3 $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsll3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsll3 $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x6) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdsll3" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 00111 qqqqq ppppp 0   cpsla3.h =croc,crqc,crpc (c3_1)
+(dni cpsla3_h_C3 "cpsla3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsla3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsla3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x7) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsla3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 00111 qqqqq ppppp 0   cpsla3.w =croc,crqc,crpc (c3_1)
+(dni cpsla3_w_C3 "cpsla3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsla3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsla3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x7) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsla3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01000 qqqqq ppppp 0   cpsadd3.h =croc,crqc,crpc (c3_1)
+(dni cpsadd3_h_C3 "cpsadd3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadd3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsadd3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsadd3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01000 qqqqq ppppp 0   cpsadd3.w =croc,crqc,crpc (c3_1)
+(dni cpsadd3_w_C3 "cpsadd3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadd3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsadd3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpsadd3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 01000 qqqqq ppppp 0   cpssub3.h =croc,crqc,crpc (c3_1)
+(dni cpssub3_h_C3 "cpssub3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssub3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpssub3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set croc (c-call DI "ivc2_cpssub3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 01000 qqqqq ppppp 0   cpssub3.w =croc,crqc,crpc (c3_1)
+(dni cpssub3_w_C3 "cpssub3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssub3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpssub3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x8) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set croc (c-call DI "ivc2_cpssub3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01001 qqqqq ppppp 0   cpextuaddu3.b =croc,crqc,crpc (c3_1)
+(dni cpextuaddu3_b_C3 "cpextuaddu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuaddu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuaddu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuaddu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01001 qqqqq ppppp 0   cpextuadd3.b =croc,crqc,crpc (c3_1)
+(dni cpextuadd3_b_C3 "cpextuadd3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuadd3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuadd3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01001 qqqqq ppppp 0   cpextladdu3.b =croc,crqc,crpc (c3_1)
+(dni cpextladdu3_b_C3 "cpextladdu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextladdu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladdu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextladdu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01001 qqqqq ppppp 0   cpextladd3.b =croc,crqc,crpc (c3_1)
+(dni cpextladd3_b_C3 "cpextladd3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextladd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladd3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextladd3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01001 qqqqq ppppp 0   cpextusubu3.b =croc,crqc,crpc (c3_1)
+(dni cpextusubu3_b_C3 "cpextusubu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextusubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusubu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextusubu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01001 qqqqq ppppp 0   cpextusub3.b =croc,crqc,crpc (c3_1)
+(dni cpextusub3_b_C3 "cpextusub3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextusub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusub3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextusub3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 01001 qqqqq ppppp 0   cpextlsubu3.b =croc,crqc,crpc (c3_1)
+(dni cpextlsubu3_b_C3 "cpextlsubu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlsubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsubu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlsubu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 01001 qqqqq ppppp 0   cpextlsub3.b =croc,crqc,crpc (c3_1)
+(dni cpextlsub3_b_C3 "cpextlsub3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsub3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #x9) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlsub3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01010 qqqqq ppppp 0   cpaveu3.b =croc,crqc,crpc (c3_1)
+(dni cpaveu3_b_C3 "cpaveu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaveu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaveu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaveu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01010 qqqqq ppppp 0   cpave3.b =croc,crqc,crpc (c3_1)
+(dni cpave3_b_C3 "cpave3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpave3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpave3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpave3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01010 qqqqq ppppp 0   cpave3.h =croc,crqc,crpc (c3_1)
+(dni cpave3_h_C3 "cpave3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpave3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpave3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpave3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01010 qqqqq ppppp 0   cpave3.w =croc,crqc,crpc (c3_1)
+(dni cpave3_w_C3 "cpave3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpave3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpave3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpave3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01010 qqqqq ppppp 0   cpaddsru3.b =croc,crqc,crpc (c3_1)
+(dni cpaddsru3_b_C3 "cpaddsru3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsru3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsru3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsru3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01010 qqqqq ppppp 0   cpaddsr3.b =croc,crqc,crpc (c3_1)
+(dni cpaddsr3_b_C3 "cpaddsr3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsr3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsr3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsr3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 110 ooooo 0111 01010 qqqqq ppppp 0   cpaddsr3.h =croc,crqc,crpc (c3_1)
+(dni cpaddsr3_h_C3 "cpaddsr3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsr3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpaddsr3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x6) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsr3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 111 ooooo 0111 01010 qqqqq ppppp 0   cpaddsr3.w =croc,crqc,crpc (c3_1)
+(dni cpaddsr3_w_C3 "cpaddsr3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddsr3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpaddsr3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x7) croc (f-sub4 7)
+	(f-ivc2-5u16 #xa) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpaddsr3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01011 qqqqq ppppp 0   cpabsu3.b =croc,crqc,crpc (c3_1)
+(dni cpabsu3_b_C3 "cpabsu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xb) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01011 qqqqq ppppp 0   cpabs3.b =croc,crqc,crpc (c3_1)
+(dni cpabs3_b_C3 "cpabs3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabs3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabs3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xb) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabs3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 010 ooooo 0111 01011 qqqqq ppppp 0   cpabs3.h =croc,crqc,crpc (c3_1)
+(dni cpabs3_h_C3 "cpabs3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabs3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabs3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) croc (f-sub4 7)
+	(f-ivc2-5u16 #xb) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabs3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01100 qqqqq ppppp 0   cpmaxu3.b =croc,crqc,crpc (c3_1)
+(dni cpmaxu3_b_C3 "cpmaxu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmaxu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmaxu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmaxu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01100 qqqqq ppppp 0   cpmax3.b =croc,crqc,crpc (c3_1)
+(dni cpmax3_b_C3 "cpmax3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmax3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmax3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmax3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01100 qqqqq ppppp 0   cpmax3.h =croc,crqc,crpc (c3_1)
+(dni cpmax3_h_C3 "cpmax3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmax3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmax3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmax3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01100 qqqqq ppppp 0   cpmaxu3.w =croc,crqc,crpc (c3_1)
+(dni cpmaxu3_w_C3 "cpmaxu3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmaxu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmaxu3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmaxu3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01100 qqqqq ppppp 0   cpmax3.w =croc,crqc,crpc (c3_1)
+(dni cpmax3_w_C3 "cpmax3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmax3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmax3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #xc) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmax3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 01101 qqqqq ppppp 0   cpminu3.b =croc,crqc,crpc (c3_1)
+(dni cpminu3_b_C3 "cpminu3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpminu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpminu3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpminu3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 001 ooooo 0111 01101 qqqqq ppppp 0   cpmin3.b =croc,crqc,crpc (c3_1)
+(dni cpmin3_b_C3 "cpmin3.b $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmin3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmin3.b $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmin3_b" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 011 ooooo 0111 01101 qqqqq ppppp 0   cpmin3.h =croc,crqc,crpc (c3_1)
+(dni cpmin3_h_C3 "cpmin3.h $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmin3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmin3.h $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmin3_h" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 100 ooooo 0111 01101 qqqqq ppppp 0   cpminu3.w =croc,crqc,crpc (c3_1)
+(dni cpminu3_w_C3 "cpminu3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpminu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpminu3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x4) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpminu3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 101 ooooo 0111 01101 qqqqq ppppp 0   cpmin3.w =croc,crqc,crpc (c3_1)
+(dni cpmin3_w_C3 "cpmin3.w $croc,$crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmin3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmin3.w $croc,$crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x5) croc (f-sub4 7)
+	(f-ivc2-5u16 #xd) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmin3_w" pc crqc crpc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10000 00000 00000 0   cpmovfrcsar0 =croc (c3_1)
+(dni cpmovfrcsar0_C3 "cpmovfrcsar0 $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovfrcsar0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar0 $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x10) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovfrcsar0" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10000 00000 01111 0   cpmovfrcsar1 =croc (c3_1)
+(dni cpmovfrcsar1_C3 "cpmovfrcsar1 $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovfrcsar1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar1 $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x10) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #xf) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovfrcsar1" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10000 00000 00001 0   cpmovfrcc =croc (c3_1)
+(dni cpmovfrcc_C3 "cpmovfrcc $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovfrcc") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcc $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x10) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x1) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovfrcc" pc)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10000 qqqqq 10000 0   cpmovtocsar0 crqc (c3_1)
+(dni cpmovtocsar0_C3 "cpmovtocsar0 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovtocsar0") VOLATILE)
+  "cpmovtocsar0 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x10) crqc (f-ivc2-5u26 #x10) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar0 0)
+	(c-call "ivc2_cpmovtocsar0" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10000 qqqqq 11111 0   cpmovtocsar1 crqc (c3_1)
+(dni cpmovtocsar1_C3 "cpmovtocsar1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovtocsar1") VOLATILE)
+  "cpmovtocsar1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x10) crqc (f-ivc2-5u26 #x1f) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar1 0)
+	(c-call "ivc2_cpmovtocsar1" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10000 qqqqq 10001 0   cpmovtocc crqc (c3_1)
+(dni cpmovtocc_C3 "cpmovtocc $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovtocc") VOLATILE)
+  "cpmovtocc $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x10) crqc (f-ivc2-5u26 #x11) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpmovtocc" pc crqc) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00000 0   cpmov =croc,crqc (c3_1)
+(dni cpmov_C3 "cpmov $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmov") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpmov $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmov" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00001 0   cpabsz.b =croc,crqc (c3_1)
+(dni cpabsz_b_C3 "cpabsz.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsz_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsz.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsz_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00010 0   cpabsz.h =croc,crqc (c3_1)
+(dni cpabsz_h_C3 "cpabsz.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabsz.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x2) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsz_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00011 0   cpabsz.w =croc,crqc (c3_1)
+(dni cpabsz_w_C3 "cpabsz.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpabsz.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x3) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpabsz_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00100 0   cpldz.h =croc,crqc (c3_1)
+(dni cpldz_h_C3 "cpldz.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpldz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpldz.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x4) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpldz_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00101 0   cpldz.w =croc,crqc (c3_1)
+(dni cpldz_w_C3 "cpldz.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpldz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpldz.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x5) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpldz_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00110 0   cpnorm.h =croc,crqc (c3_1)
+(dni cpnorm_h_C3 "cpnorm.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpnorm_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpnorm.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x6) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpnorm_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 00111 0   cpnorm.w =croc,crqc (c3_1)
+(dni cpnorm_w_C3 "cpnorm.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpnorm_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpnorm.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x7) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpnorm_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01000 0   cphaddu.b =croc,crqc (c3_1)
+(dni cphaddu_b_C3 "cphaddu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphaddu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cphaddu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x8) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphaddu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01001 0   cphadd.b =croc,crqc (c3_1)
+(dni cphadd_b_C3 "cphadd.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphadd_b") (CPTYPE V8QI) (CRET FIRST))
+  "cphadd.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x9) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphadd_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01010 0   cphadd.h =croc,crqc (c3_1)
+(dni cphadd_h_C3 "cphadd.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphadd_h") (CPTYPE V4HI) (CRET FIRST))
+  "cphadd.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xa) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphadd_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01011 0   cphadd.w =croc,crqc (c3_1)
+(dni cphadd_w_C3 "cphadd.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cphadd_w") (CPTYPE V2SI) (CRET FIRST))
+  "cphadd.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xb) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cphadd_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01100 0   cpccadd.b +crqc (c3_1)
+(dni cpccadd_b_C3 "cpccadd.b $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpccadd_b") (CPTYPE V8QI) (CRET FIRSTCOPY) VOLATILE)
+  "cpccadd.b $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xc) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpccadd_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01101 0   cpbcast.b =croc,crqc (c3_1)
+(dni cpbcast_b_C3 "cpbcast.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpbcast_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpbcast.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xd) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpbcast_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01110 0   cpbcast.h =croc,crqc (c3_1)
+(dni cpbcast_h_C3 "cpbcast.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpbcast_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpbcast.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xe) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpbcast_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 01111 0   cpbcast.w =croc,crqc (c3_1)
+(dni cpbcast_w_C3 "cpbcast.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpbcast_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpbcast.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #xf) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpbcast_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10000 0   cpextuu.b =croc,crqc (c3_1)
+(dni cpextuu_b_C3 "cpextuu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextuu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x10) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10001 0   cpextu.b =croc,crqc (c3_1)
+(dni cpextu_b_C3 "cpextu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x11) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10010 0   cpextuu.h =croc,crqc (c3_1)
+(dni cpextuu_h_C3 "cpextuu.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextuu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextuu.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x12) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextuu_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10011 0   cpextu.h =croc,crqc (c3_1)
+(dni cpextu_h_C3 "cpextu.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextu.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x13) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextu_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10100 0   cpextlu.b =croc,crqc (c3_1)
+(dni cpextlu_b_C3 "cpextlu.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextlu.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x14) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlu_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10101 0   cpextl.b =croc,crqc (c3_1)
+(dni cpextl_b_C3 "cpextl.b $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextl.b $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x15) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextl_b" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10110 0   cpextlu.h =croc,crqc (c3_1)
+(dni cpextlu_h_C3 "cpextlu.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextlu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextlu.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x16) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextlu_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 10111 0   cpextl.h =croc,crqc (c3_1)
+(dni cpextl_h_C3 "cpextl.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpextl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpextl.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x17) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpextl_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11000 0   cpcastub.h =croc,crqc (c3_1)
+(dni cpcastub_h_C3 "cpcastub.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastub_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastub.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x18) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastub_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11001 0   cpcastb.h =croc,crqc (c3_1)
+(dni cpcastb_h_C3 "cpcastb.h $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastb_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastb.h $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x19) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastb_h" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11100 0   cpcastub.w =croc,crqc (c3_1)
+(dni cpcastub_w_C3 "cpcastub.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastub_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastub.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1c) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastub_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11101 0   cpcastb.w =croc,crqc (c3_1)
+(dni cpcastb_w_C3 "cpcastb.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastb_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastb.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1d) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastb_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11110 0   cpcastuh.w =croc,crqc (c3_1)
+(dni cpcastuh_w_C3 "cpcastuh.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcastuh_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastuh.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1e) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcastuh_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11111 0   cpcasth.w =croc,crqc (c3_1)
+(dni cpcasth_w_C3 "cpcasth.w $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcasth_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcasth.w $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1f) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpcasth_w" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11010 0   cdcastuw =croc,crqc (c3_1)
+(dni cdcastuw_C3 "cdcastuw $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdcastuw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastuw $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1a) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdcastuw" pc crqc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 10001 qqqqq 11011 0   cdcastw =croc,crqc (c3_1)
+(dni cdcastw_C3 "cdcastw $croc,$crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdcastw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastw $croc,$crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x11) crqc (f-ivc2-5u26 #x1b) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cdcastw" pc crqc)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 10010 qqqqq ppppp 0   cpcmpeqz.b crqc,crpc (c3_1)
+(dni cpcmpeqz_b_C3 "cpcmpeqz.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeqz_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeqz.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeqz_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 10010 qqqqq ppppp 0   cpcmpeq.b crqc,crpc (c3_1)
+(dni cpcmpeq_b_C3 "cpcmpeq.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeq_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeq.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0011 0111 10010 qqqqq ppppp 0   cpcmpeq.h crqc,crpc (c3_1)
+(dni cpcmpeq_h_C3 "cpcmpeq.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeq_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpeq.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0101 0111 10010 qqqqq ppppp 0   cpcmpeq.w crqc,crpc (c3_1)
+(dni cpcmpeq_w_C3 "cpcmpeq.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpeq_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpeq.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1001 0111 10010 qqqqq ppppp 0   cpcmpne.b crqc,crpc (c3_1)
+(dni cpcmpne_b_C3 "cpcmpne.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpne_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpne.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1011 0111 10010 qqqqq ppppp 0   cpcmpne.h crqc,crpc (c3_1)
+(dni cpcmpne_h_C3 "cpcmpne.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpne_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpne.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1101 0111 10010 qqqqq ppppp 0   cpcmpne.w crqc,crpc (c3_1)
+(dni cpcmpne_w_C3 "cpcmpne.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpne_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpne.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0000 0111 10010 qqqqq ppppp 0   cpcmpgtu.b crqc,crpc (c3_1)
+(dni cpcmpgtu_b_C3 "cpcmpgtu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgtu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgtu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0001 0111 10010 qqqqq ppppp 0   cpcmpgt.b crqc,crpc (c3_1)
+(dni cpcmpgt_b_C3 "cpcmpgt.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgt_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpgt.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0011 0111 10010 qqqqq ppppp 0   cpcmpgt.h crqc,crpc (c3_1)
+(dni cpcmpgt_h_C3 "cpcmpgt.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgt_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpgt.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0100 0111 10010 qqqqq ppppp 0   cpcmpgtu.w crqc,crpc (c3_1)
+(dni cpcmpgtu_w_C3 "cpcmpgtu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgtu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgtu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0101 0111 10010 qqqqq ppppp 0   cpcmpgt.w crqc,crpc (c3_1)
+(dni cpcmpgt_w_C3 "cpcmpgt.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgt_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpgt.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1000 0111 10010 qqqqq ppppp 0   cpcmpgeu.b crqc,crpc (c3_1)
+(dni cpcmpgeu_b_C3 "cpcmpgeu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgeu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgeu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x18) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1001 0111 10010 qqqqq ppppp 0   cpcmpge.b crqc,crpc (c3_1)
+(dni cpcmpge_b_C3 "cpcmpge.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpge_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpge.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x19) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1011 0111 10010 qqqqq ppppp 0   cpcmpge.h crqc,crpc (c3_1)
+(dni cpcmpge_h_C3 "cpcmpge.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpge_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpge.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1100 0111 10010 qqqqq ppppp 0   cpcmpgeu.w crqc,crpc (c3_1)
+(dni cpcmpgeu_w_C3 "cpcmpgeu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpgeu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgeu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1101 0111 10010 qqqqq ppppp 0   cpcmpge.w crqc,crpc (c3_1)
+(dni cpcmpge_w_C3 "cpcmpge.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpcmpge_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpge.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0001 0111 10010 qqqqq ppppp 0   cpacmpeq.b crqc,crpc (c3_1)
+(dni cpacmpeq_b_C3 "cpacmpeq.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpeq_b") (CPTYPE V8QI))
+  "cpacmpeq.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpeq_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0011 0111 10010 qqqqq ppppp 0   cpacmpeq.h crqc,crpc (c3_1)
+(dni cpacmpeq_h_C3 "cpacmpeq.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpeq_h") (CPTYPE V4HI))
+  "cpacmpeq.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpeq_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0101 0111 10010 qqqqq ppppp 0   cpacmpeq.w crqc,crpc (c3_1)
+(dni cpacmpeq_w_C3 "cpacmpeq.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpeq_w") (CPTYPE V2SI))
+  "cpacmpeq.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpeq_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 1001 0111 10010 qqqqq ppppp 0   cpacmpne.b crqc,crpc (c3_1)
+(dni cpacmpne_b_C3 "cpacmpne.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpne_b") (CPTYPE V8QI))
+  "cpacmpne.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpne_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 1011 0111 10010 qqqqq ppppp 0   cpacmpne.h crqc,crpc (c3_1)
+(dni cpacmpne_h_C3 "cpacmpne.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpne_h") (CPTYPE V4HI))
+  "cpacmpne.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpne_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 1101 0111 10010 qqqqq ppppp 0   cpacmpne.w crqc,crpc (c3_1)
+(dni cpacmpne_w_C3 "cpacmpne.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpne_w") (CPTYPE V2SI))
+  "cpacmpne.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpne_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0000 0111 10010 qqqqq ppppp 0   cpacmpgtu.b crqc,crpc (c3_1)
+(dni cpacmpgtu_b_C3 "cpacmpgtu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgtu_b") (CPTYPE V8UQI))
+  "cpacmpgtu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgtu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0001 0111 10010 qqqqq ppppp 0   cpacmpgt.b crqc,crpc (c3_1)
+(dni cpacmpgt_b_C3 "cpacmpgt.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgt_b") (CPTYPE V8QI))
+  "cpacmpgt.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgt_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0011 0111 10010 qqqqq ppppp 0   cpacmpgt.h crqc,crpc (c3_1)
+(dni cpacmpgt_h_C3 "cpacmpgt.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgt_h") (CPTYPE V4HI))
+  "cpacmpgt.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgt_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0100 0111 10010 qqqqq ppppp 0   cpacmpgtu.w crqc,crpc (c3_1)
+(dni cpacmpgtu_w_C3 "cpacmpgtu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgtu_w") (CPTYPE V2USI))
+  "cpacmpgtu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgtu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0101 0111 10010 qqqqq ppppp 0   cpacmpgt.w crqc,crpc (c3_1)
+(dni cpacmpgt_w_C3 "cpacmpgt.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgt_w") (CPTYPE V2SI))
+  "cpacmpgt.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgt_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1000 0111 10010 qqqqq ppppp 0   cpacmpgeu.b crqc,crpc (c3_1)
+(dni cpacmpgeu_b_C3 "cpacmpgeu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgeu_b") (CPTYPE V8UQI))
+  "cpacmpgeu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x18) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgeu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1001 0111 10010 qqqqq ppppp 0   cpacmpge.b crqc,crpc (c3_1)
+(dni cpacmpge_b_C3 "cpacmpge.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpge_b") (CPTYPE V8QI))
+  "cpacmpge.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x19) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpge_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1011 0111 10010 qqqqq ppppp 0   cpacmpge.h crqc,crpc (c3_1)
+(dni cpacmpge_h_C3 "cpacmpge.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpge_h") (CPTYPE V4HI))
+  "cpacmpge.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpge_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1100 0111 10010 qqqqq ppppp 0   cpacmpgeu.w crqc,crpc (c3_1)
+(dni cpacmpgeu_w_C3 "cpacmpgeu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpgeu_w") (CPTYPE V2USI))
+  "cpacmpgeu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpgeu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1101 0111 10010 qqqqq ppppp 0   cpacmpge.w crqc,crpc (c3_1)
+(dni cpacmpge_w_C3 "cpacmpge.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpacmpge_w") (CPTYPE V2SI))
+  "cpacmpge.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpacmpge_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 0001 0111 10010 qqqqq ppppp 0   cpocmpeq.b crqc,crpc (c3_1)
+(dni cpocmpeq_b_C3 "cpocmpeq.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpeq_b") (CPTYPE V8QI))
+  "cpocmpeq.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpeq_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 0011 0111 10010 qqqqq ppppp 0   cpocmpeq.h crqc,crpc (c3_1)
+(dni cpocmpeq_h_C3 "cpocmpeq.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpeq_h") (CPTYPE V4HI))
+  "cpocmpeq.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpeq_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 0101 0111 10010 qqqqq ppppp 0   cpocmpeq.w crqc,crpc (c3_1)
+(dni cpocmpeq_w_C3 "cpocmpeq.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpeq_w") (CPTYPE V2SI))
+  "cpocmpeq.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpeq_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1001 0111 10010 qqqqq ppppp 0   cpocmpne.b crqc,crpc (c3_1)
+(dni cpocmpne_b_C3 "cpocmpne.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpne_b") (CPTYPE V8QI))
+  "cpocmpne.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpne_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1011 0111 10010 qqqqq ppppp 0   cpocmpne.h crqc,crpc (c3_1)
+(dni cpocmpne_h_C3 "cpocmpne.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpne_h") (CPTYPE V4HI))
+  "cpocmpne.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpne_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1101 0111 10010 qqqqq ppppp 0   cpocmpne.w crqc,crpc (c3_1)
+(dni cpocmpne_w_C3 "cpocmpne.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpne_w") (CPTYPE V2SI))
+  "cpocmpne.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpne_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0000 0111 10010 qqqqq ppppp 0   cpocmpgtu.b crqc,crpc (c3_1)
+(dni cpocmpgtu_b_C3 "cpocmpgtu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgtu_b") (CPTYPE V8UQI))
+  "cpocmpgtu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgtu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0001 0111 10010 qqqqq ppppp 0   cpocmpgt.b crqc,crpc (c3_1)
+(dni cpocmpgt_b_C3 "cpocmpgt.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgt_b") (CPTYPE V8QI))
+  "cpocmpgt.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgt_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0011 0111 10010 qqqqq ppppp 0   cpocmpgt.h crqc,crpc (c3_1)
+(dni cpocmpgt_h_C3 "cpocmpgt.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgt_h") (CPTYPE V4HI))
+  "cpocmpgt.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgt_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0100 0111 10010 qqqqq ppppp 0   cpocmpgtu.w crqc,crpc (c3_1)
+(dni cpocmpgtu_w_C3 "cpocmpgtu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgtu_w") (CPTYPE V2USI))
+  "cpocmpgtu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgtu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 0101 0111 10010 qqqqq ppppp 0   cpocmpgt.w crqc,crpc (c3_1)
+(dni cpocmpgt_w_C3 "cpocmpgt.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgt_w") (CPTYPE V2SI))
+  "cpocmpgt.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgt_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1000 0111 10010 qqqqq ppppp 0   cpocmpgeu.b crqc,crpc (c3_1)
+(dni cpocmpgeu_b_C3 "cpocmpgeu.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgeu_b") (CPTYPE V8UQI))
+  "cpocmpgeu.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x18) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgeu_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1001 0111 10010 qqqqq ppppp 0   cpocmpge.b crqc,crpc (c3_1)
+(dni cpocmpge_b_C3 "cpocmpge.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpge_b") (CPTYPE V8QI))
+  "cpocmpge.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x19) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpge_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1011 0111 10010 qqqqq ppppp 0   cpocmpge.h crqc,crpc (c3_1)
+(dni cpocmpge_h_C3 "cpocmpge.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpge_h") (CPTYPE V4HI))
+  "cpocmpge.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpge_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1100 0111 10010 qqqqq ppppp 0   cpocmpgeu.w crqc,crpc (c3_1)
+(dni cpocmpgeu_w_C3 "cpocmpgeu.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpgeu_w") (CPTYPE V2USI))
+  "cpocmpgeu.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpgeu_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0101 1101 0111 10010 qqqqq ppppp 0   cpocmpge.w crqc,crpc (c3_1)
+(dni cpocmpge_w_C3 "cpocmpge.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpocmpge_w") (CPTYPE V2SI))
+  "cpocmpge.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x12) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpocmpge_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 00xx xiii 0111 10100 qqqqq ppppp 0   cpsrli3.b =crqc,crpc,imm3p9 (c3_imm)
+(dni cpsrli3_b_C3 "cpsrli3.b $crqc,$crpc,imm3p9 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrli3.b $crqc,$crpc,$imm3p9"
+  (+ MAJ_15 ivc-x-6-3 (f-ivc2-2u4 #x0) imm3p9 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrli3_b" pc crpc imm3p9)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10100 qqqqq ppppp 0   cpsrli3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpsrli3_h_C3 "cpsrli3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrli3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrli3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10100 qqqqq ppppp 0   cpsrli3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpsrli3_w_C3 "cpsrli3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrli3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrli3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 10100 qqqqq ppppp 0   cdsrli3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdsrli3_C3 "cdsrli3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsrli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrli3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x14) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdsrli3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 00xx xiii 0111 10101 qqqqq ppppp 0   cpsrai3.b =crqc,crpc,imm3p9 (c3_imm)
+(dni cpsrai3_b_C3 "cpsrai3.b $crqc,$crpc,imm3p9 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrai3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrai3.b $crqc,$crpc,$imm3p9"
+  (+ MAJ_15 ivc-x-6-3 (f-ivc2-2u4 #x0) imm3p9 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrai3_b" pc crpc imm3p9)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10101 qqqqq ppppp 0   cpsrai3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpsrai3_h_C3 "cpsrai3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrai3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrai3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrai3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10101 qqqqq ppppp 0   cpsrai3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpsrai3_w_C3 "cpsrai3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrai3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrai3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpsrai3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 10101 qqqqq ppppp 0   cdsrai3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdsrai3_C3 "cdsrai3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdsrai3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrai3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x15) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdsrai3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 00xx xiii 0111 10110 qqqqq ppppp 0   cpslli3.b =crqc,crpc,imm3p9 (c3_imm)
+(dni cpslli3_b_C3 "cpslli3.b $crqc,$crpc,imm3p9 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpslli3.b $crqc,$crpc,$imm3p9"
+  (+ MAJ_15 ivc-x-6-3 (f-ivc2-2u4 #x0) imm3p9 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslli3_b" pc crpc imm3p9)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10110 qqqqq ppppp 0   cpslli3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpslli3_h_C3 "cpslli3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpslli3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslli3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10110 qqqqq ppppp 0   cpslli3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpslli3_w_C3 "cpslli3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpslli3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslli3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 10110 qqqqq ppppp 0   cdslli3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdslli3_C3 "cdslli3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdslli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdslli3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x16) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdslli3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 01xx iiii 0111 10111 qqqqq ppppp 0   cpslai3.h =crqc,crpc,imm4p8 (c3_imm)
+(dni cpslai3_h_C3 "cpslai3.h $crqc,$crpc,imm4p8 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslai3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpslai3.h $crqc,$crpc,$imm4p8"
+  (+ MAJ_15 ivc-x-6-2 (f-ivc2-2u4 #x1) imm4p8 (f-sub4 7)
+	(f-ivc2-5u16 #x17) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslai3_h" pc crpc imm4p8)) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 10111 qqqqq ppppp 0   cpslai3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpslai3_w_C3 "cpslai3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslai3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpslai3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x17) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpslai3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 00xi iiii 0111 11000 qqqqq ppppp 0   cpclipiu3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpclipiu3_w_C3 "cpclipiu3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpclipiu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipiu3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x0) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpclipiu3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 01xi iiii 0111 11000 qqqqq ppppp 0   cpclipi3.w =crqc,crpc,imm5p7 (c3_imm)
+(dni cpclipi3_w_C3 "cpclipi3.w $crqc,$crpc,imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpclipi3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipi3.w $crqc,$crpc,$imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x1) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpclipi3_w" pc crpc imm5p7)) )
+  ()
+  )
+
+; 1111 10ii iiii 0111 11000 qqqqq ppppp 0   cdclipiu3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdclipiu3_C3 "cdclipiu3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdclipiu3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipiu3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x2) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdclipiu3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 11ii iiii 0111 11000 qqqqq ppppp 0   cdclipi3 =crqc,crpc,imm6p6 (c3_imm)
+(dni cdclipi3_C3 "cdclipi3 $crqc,$crpc,imm6p6 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdclipi3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipi3 $crqc,$crpc,$imm6p6"
+  (+ MAJ_15 (f-ivc2-2u4 #x3) imm6p6 (f-sub4 7)
+	(f-ivc2-5u16 #x18) crqc crpc (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdclipi3" pc crpc imm6p6)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00000 0   cpmovi.b =crqc,simm8p4 (c3_imm)
+(dni cpmovi_b_C3 "cpmovi.b $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovi_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmovi.b $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmovi_b" pc simm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00010 0   cpmoviu.h =crqc,imm8p4 (c3_imm)
+(dni cpmoviu_h_C3 "cpmoviu.h $crqc,imm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmoviu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpmoviu.h $crqc,$imm8p4"
+  (+ MAJ_15 imm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x2) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmoviu_h" pc imm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00011 0   cpmovi.h =crqc,simm8p4 (c3_imm)
+(dni cpmovi_h_C3 "cpmovi.h $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovi_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmovi.h $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x3) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmovi_h" pc simm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00100 0   cpmoviu.w =crqc,imm8p4 (c3_imm)
+(dni cpmoviu_w_C3 "cpmoviu.w $crqc,imm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmoviu_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpmoviu.w $crqc,$imm8p4"
+  (+ MAJ_15 imm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x4) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmoviu_w" pc imm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00101 0   cpmovi.w =crqc,simm8p4 (c3_imm)
+(dni cpmovi_w_C3 "cpmovi.w $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovi_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmovi.w $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x5) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cpmovi_w" pc simm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00110 0   cdmoviu =crqc,imm8p4 (c3_imm)
+(dni cdmoviu_C3 "cdmoviu $crqc,imm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdmoviu") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmoviu $crqc,$imm8p4"
+  (+ MAJ_15 imm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x6) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdmoviu" pc imm8p4)) )
+  ()
+  )
+
+; 1111 iiii iiii 0111 11001 qqqqq 00111 0   cdmovi =crqc,simm8p4 (c3_imm)
+(dni cdmovi_C3 "cdmovi $crqc,simm8p4 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cdmovi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmovi $crqc,$simm8p4"
+  (+ MAJ_15 simm8p4 (f-sub4 7)
+	(f-ivc2-5u16 #x19) crqc (f-ivc2-5u26 #x7) (f-ivc2-1u31 #x0) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqc (c-call DI "ivc2_cdmovi" pc simm8p4)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 00000 qqqqq ppppp 1   cpadda1u.b crqc,crpc (c3_1)
+(dni cpadda1u_b_C3 "cpadda1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpadda1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 00000 qqqqq ppppp 1   cpadda1.b crqc,crpc (c3_1)
+(dni cpadda1_b_C3 "cpadda1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpadda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpadda1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0010 0111 00000 qqqqq ppppp 1   cpaddua1.h crqc,crpc (c3_1)
+(dni cpaddua1_h_C3 "cpaddua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x2) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpaddua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0011 0111 00000 qqqqq ppppp 1   cpaddla1.h crqc,crpc (c3_1)
+(dni cpaddla1_h_C3 "cpaddla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpaddla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0100 0111 00000 qqqqq ppppp 1   cpaddaca1u.b crqc,crpc (c3_1)
+(dni cpaddaca1u_b_C3 "cpaddaca1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpaddaca1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x4) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0101 0111 00000 qqqqq ppppp 1   cpaddaca1.b crqc,crpc (c3_1)
+(dni cpaddaca1_b_C3 "cpaddaca1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpaddaca1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0110 0111 00000 qqqqq ppppp 1   cpaddacua1.h crqc,crpc (c3_1)
+(dni cpaddacua1_h_C3 "cpaddacua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x6) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0111 0111 00000 qqqqq ppppp 1   cpaddacla1.h crqc,crpc (c3_1)
+(dni cpaddacla1_h_C3 "cpaddacla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpaddacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x7) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1000 0111 00000 qqqqq ppppp 1   cpsuba1u.b crqc,crpc (c3_1)
+(dni cpsuba1u_b_C3 "cpsuba1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsuba1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsuba1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x8) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1001 0111 00000 qqqqq ppppp 1   cpsuba1.b crqc,crpc (c3_1)
+(dni cpsuba1_b_C3 "cpsuba1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsuba1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsuba1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1010 0111 00000 qqqqq ppppp 1   cpsubua1.h crqc,crpc (c3_1)
+(dni cpsubua1_h_C3 "cpsubua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xa) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsubua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1011 0111 00000 qqqqq ppppp 1   cpsubla1.h crqc,crpc (c3_1)
+(dni cpsubla1_h_C3 "cpsubla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsubla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1100 0111 00000 qqqqq ppppp 1   cpsubaca1u.b crqc,crpc (c3_1)
+(dni cpsubaca1u_b_C3 "cpsubaca1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsubaca1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xc) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1101 0111 00000 qqqqq ppppp 1   cpsubaca1.b crqc,crpc (c3_1)
+(dni cpsubaca1_b_C3 "cpsubaca1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsubaca1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1110 0111 00000 qqqqq ppppp 1   cpsubacua1.h crqc,crpc (c3_1)
+(dni cpsubacua1_h_C3 "cpsubacua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xe) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1111 0111 00000 qqqqq ppppp 1   cpsubacla1.h crqc,crpc (c3_1)
+(dni cpsubacla1_h_C3 "cpsubacla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsubacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xf) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0000 0111 00000 qqqqq ppppp 1   cpabsa1u.b crqc,crpc (c3_1)
+(dni cpabsa1u_b_C3 "cpabsa1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpabsa1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0001 0111 00000 qqqqq ppppp 1   cpabsa1.b crqc,crpc (c3_1)
+(dni cpabsa1_b_C3 "cpabsa1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpabsa1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0010 0111 00000 qqqqq ppppp 1   cpabsua1.h crqc,crpc (c3_1)
+(dni cpabsua1_h_C3 "cpabsua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0011 0111 00000 qqqqq ppppp 1   cpabsla1.h crqc,crpc (c3_1)
+(dni cpabsla1_h_C3 "cpabsla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpabsla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpabsla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0100 0111 00000 qqqqq ppppp 1   cpsada1u.b crqc,crpc (c3_1)
+(dni cpsada1u_b_C3 "cpsada1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsada1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0101 0111 00000 qqqqq ppppp 1   cpsada1.b crqc,crpc (c3_1)
+(dni cpsada1_b_C3 "cpsada1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsada1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0110 0111 00000 qqqqq ppppp 1   cpsadua1.h crqc,crpc (c3_1)
+(dni cpsadua1_h_C3 "cpsadua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0111 0111 00000 qqqqq ppppp 1   cpsadla1.h crqc,crpc (c3_1)
+(dni cpsadla1_h_C3 "cpsadla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0000 0111 00000 qqqqq ppppp 1   cpseta1.h crqc,crpc (c3_1)
+(dni cpseta1_h_C3 "cpseta1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpseta1_h") (CPTYPE V4HI) VOLATILE)
+  "cpseta1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpseta1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0010 0111 00000 qqqqq ppppp 1   cpsetua1.w crqc,crpc (c3_1)
+(dni cpsetua1_w_C3 "cpsetua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsetua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x2) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsetua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0010 0011 0111 00000 qqqqq ppppp 1   cpsetla1.w crqc,crpc (c3_1)
+(dni cpsetla1_w_C3 "cpsetla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsetla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x3) (f-sub4 7)
+	(f-ivc2-5u16 #x0) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsetla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00000 1   cpmova1.b =croc (c3_1)
+(dni cpmova1_b_C3 "cpmova1.b $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmova1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cpmova1.b $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmova1_b" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00010 1   cpmovua1.h =croc (c3_1)
+(dni cpmovua1_h_C3 "cpmovua1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovua1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x2) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovua1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00011 1   cpmovla1.h =croc (c3_1)
+(dni cpmovla1_h_C3 "cpmovla1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovla1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x3) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovla1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00100 1   cpmovuua1.w =croc (c3_1)
+(dni cpmovuua1_w_C3 "cpmovuua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovuua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovuua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x4) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovuua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00101 1   cpmovula1.w =croc (c3_1)
+(dni cpmovula1_w_C3 "cpmovula1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovula1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovula1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x5) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovula1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00110 1   cpmovlua1.w =croc (c3_1)
+(dni cpmovlua1_w_C3 "cpmovlua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovlua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x6) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovlua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 00111 1   cpmovlla1.w =croc (c3_1)
+(dni cpmovlla1_w_C3 "cpmovlla1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovlla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlla1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x7) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovlla1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10000 1   cppacka1u.b =croc (c3_1)
+(dni cppacka1u_b_C3 "cppacka1u.b $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppacka1u_b") (CPTYPE V8UQI) (CRET FIRST) VOLATILE)
+  "cppacka1u.b $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x10) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppacka1u_b" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10001 1   cppacka1.b =croc (c3_1)
+(dni cppacka1_b_C3 "cppacka1.b $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppacka1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cppacka1.b $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x11) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppacka1_b" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10010 1   cppackua1.h =croc (c3_1)
+(dni cppackua1_h_C3 "cppackua1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackua1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x12) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackua1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10011 1   cppackla1.h =croc (c3_1)
+(dni cppackla1_h_C3 "cppackla1.h $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackla1.h $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x13) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackla1_h" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10100 1   cppackua1.w =croc (c3_1)
+(dni cppackua1_w_C3 "cppackua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x14) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10101 1   cppackla1.w =croc (c3_1)
+(dni cppackla1_w_C3 "cppackla1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cppackla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackla1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x15) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cppackla1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10110 1   cpmovhua1.w =croc (c3_1)
+(dni cpmovhua1_w_C3 "cpmovhua1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovhua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhua1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x16) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovhua1_w" pc)) )
+  ()
+  )
+
+; 1111 000 ooooo 0111 00100 00000 10111 1   cpmovhla1.w =croc (c3_1)
+(dni cpmovhla1_w_C3 "cpmovhla1.w $croc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmovhla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhla1.w $croc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) croc (f-sub4 7)
+	(f-ivc2-5u16 #x4) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x17) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set croc (c-call DI "ivc2_cpmovhla1_w" pc)) )
+  ()
+  )
+
+; 1111 0000 0000 0111 00010 qqqqq 00000 1   cpsrla1 crqc (c3_1)
+(dni cpsrla1_C3 "cpsrla1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrla1") VOLATILE)
+  "cpsrla1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrla1" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 00010 qqqqq 00000 1   cpsraa1 crqc (c3_1)
+(dni cpsraa1_C3 "cpsraa1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsraa1") VOLATILE)
+  "cpsraa1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraa1" pc crqc) )
+  ()
+  )
+
+; 1111 0000 0010 0111 00010 qqqqq 00000 1   cpslla1 crqc (c3_1)
+(dni cpslla1_C3 "cpslla1 $crqc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpslla1") VOLATILE)
+  "cpslla1 $crqc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x2) (f-sub4 7)
+	(f-ivc2-5u16 #x2) crqc (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpslla1" pc crqc) )
+  ()
+  )
+
+; 1111 00xi iiii 0111 00011 00000 00000 1   cpsrlia1 imm5p7 (c3_imm)
+(dni cpsrlia1_P1 "cpsrlia1 imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsrlia1") VOLATILE)
+  "cpsrlia1 $imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x0) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x3) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrlia1" pc imm5p7) )
+  ()
+  )
+
+; 1111 01xi iiii 0111 00011 00000 00000 1   cpsraia1 imm5p7 (c3_imm)
+(dni cpsraia1_P1 "cpsraia1 imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsraia1") VOLATILE)
+  "cpsraia1 $imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x1) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x3) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraia1" pc imm5p7) )
+  ()
+  )
+
+; 1111 10xi iiii 0111 00011 00000 00000 1   cpsllia1 imm5p7 (c3_imm)
+(dni cpsllia1_P1 "cpsllia1 imm5p7 C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsllia1") VOLATILE)
+  "cpsllia1 $imm5p7"
+  (+ MAJ_15 ivc-x-6-1 (f-ivc2-2u4 #x2) imm5p7 (f-sub4 7)
+	(f-ivc2-5u16 #x3) (f-ivc2-5u21 #x0) (f-ivc2-5u26 #x0) (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsllia1" pc imm5p7) )
+  ()
+  )
+
+; 1111 0000 0000 0111 00001 qqqqq ppppp 1   cpssqa1u.b crqc,crpc (c3_1)
+(dni cpssqa1u_b_C3 "cpssqa1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssqa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssqa1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x0) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0001 0111 00001 qqqqq ppppp 1   cpssqa1.b crqc,crpc (c3_1)
+(dni cpssqa1_b_C3 "cpssqa1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssqa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssqa1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0100 0111 00001 qqqqq ppppp 1   cpssda1u.b crqc,crpc (c3_1)
+(dni cpssda1u_b_C3 "cpssda1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssda1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x4) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpssda1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 0101 0111 00001 qqqqq ppppp 1   cpssda1.b crqc,crpc (c3_1)
+(dni cpssda1_b_C3 "cpssda1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpssda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssda1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x5) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpssda1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1000 0111 00001 qqqqq ppppp 1   cpmula1u.b crqc,crpc (c3_1)
+(dni cpmula1u_b_C3 "cpmula1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmula1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmula1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x8) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1001 0111 00001 qqqqq ppppp 1   cpmula1.b crqc,crpc (c3_1)
+(dni cpmula1_b_C3 "cpmula1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmula1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmula1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x9) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1010 0111 00001 qqqqq ppppp 1   cpmulua1.h crqc,crpc (c3_1)
+(dni cpmulua1_h_C3 "cpmulua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xa) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1011 0111 00001 qqqqq ppppp 1   cpmulla1.h crqc,crpc (c3_1)
+(dni cpmulla1_h_C3 "cpmulla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1100 0111 00001 qqqqq ppppp 1   cpmulua1u.w crqc,crpc (c3_1)
+(dni cpmulua1u_w_C3 "cpmulua1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulua1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xc) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1101 0111 00001 qqqqq ppppp 1   cpmulla1u.w crqc,crpc (c3_1)
+(dni cpmulla1u_w_C3 "cpmulla1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulla1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xd) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1110 0111 00001 qqqqq ppppp 1   cpmulua1.w crqc,crpc (c3_1)
+(dni cpmulua1_w_C3 "cpmulua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xe) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0000 1111 0111 00001 qqqqq ppppp 1   cpmulla1.w crqc,crpc (c3_1)
+(dni cpmulla1_w_C3 "cpmulla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #xf) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0000 0111 00001 qqqqq ppppp 1   cpmada1u.b crqc,crpc (c3_1)
+(dni cpmada1u_b_C3 "cpmada1u.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmada1u.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x10) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1u_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0001 0111 00001 qqqqq ppppp 1   cpmada1.b crqc,crpc (c3_1)
+(dni cpmada1_b_C3 "cpmada1.b $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmada1.b $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x11) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1_b" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0010 0111 00001 qqqqq ppppp 1   cpmadua1.h crqc,crpc (c3_1)
+(dni cpmadua1_h_C3 "cpmadua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0011 0111 00001 qqqqq ppppp 1   cpmadla1.h crqc,crpc (c3_1)
+(dni cpmadla1_h_C3 "cpmadla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0100 0111 00001 qqqqq ppppp 1   cpmadua1u.w crqc,crpc (c3_1)
+(dni cpmadua1u_w_C3 "cpmadua1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadua1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x14) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0101 0111 00001 qqqqq ppppp 1   cpmadla1u.w crqc,crpc (c3_1)
+(dni cpmadla1u_w_C3 "cpmadla1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadla1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x15) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0110 0111 00001 qqqqq ppppp 1   cpmadua1.w crqc,crpc (c3_1)
+(dni cpmadua1_w_C3 "cpmadua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 0111 0111 00001 qqqqq ppppp 1   cpmadla1.w crqc,crpc (c3_1)
+(dni cpmadla1_w_C3 "cpmadla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1010 0111 00001 qqqqq ppppp 1   cpmsbua1.h crqc,crpc (c3_1)
+(dni cpmsbua1_h_C3 "cpmsbua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1a) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1011 0111 00001 qqqqq ppppp 1   cpmsbla1.h crqc,crpc (c3_1)
+(dni cpmsbla1_h_C3 "cpmsbla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1100 0111 00001 qqqqq ppppp 1   cpmsbua1u.w crqc,crpc (c3_1)
+(dni cpmsbua1u_w_C3 "cpmsbua1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbua1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1c) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1101 0111 00001 qqqqq ppppp 1   cpmsbla1u.w crqc,crpc (c3_1)
+(dni cpmsbla1u_w_C3 "cpmsbla1u.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbla1u.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1d) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1u_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1110 0111 00001 qqqqq ppppp 1   cpmsbua1.w crqc,crpc (c3_1)
+(dni cpmsbua1_w_C3 "cpmsbua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1e) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0001 1111 0111 00001 qqqqq ppppp 1   cpmsbla1.w crqc,crpc (c3_1)
+(dni cpmsbla1_w_C3 "cpmsbla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x0) (f-ivc2-5u7 #x1f) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0010 0111 00001 qqqqq ppppp 1   cpsmadua1.h crqc,crpc (c3_1)
+(dni cpsmadua1_h_C3 "cpsmadua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0011 0111 00001 qqqqq ppppp 1   cpsmadla1.h crqc,crpc (c3_1)
+(dni cpsmadla1_h_C3 "cpsmadla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0110 0111 00001 qqqqq ppppp 1   cpsmadua1.w crqc,crpc (c3_1)
+(dni cpsmadua1_w_C3 "cpsmadua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 0111 0111 00001 qqqqq ppppp 1   cpsmadla1.w crqc,crpc (c3_1)
+(dni cpsmadla1_w_C3 "cpsmadla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1010 0111 00001 qqqqq ppppp 1   cpsmsbua1.h crqc,crpc (c3_1)
+(dni cpsmsbua1_h_C3 "cpsmsbua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1a) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1011 0111 00001 qqqqq ppppp 1   cpsmsbla1.h crqc,crpc (c3_1)
+(dni cpsmsbla1_h_C3 "cpsmsbla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1110 0111 00001 qqqqq ppppp 1   cpsmsbua1.w crqc,crpc (c3_1)
+(dni cpsmsbua1_w_C3 "cpsmsbua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1e) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0011 1111 0111 00001 qqqqq ppppp 1   cpsmsbla1.w crqc,crpc (c3_1)
+(dni cpsmsbla1_w_C3 "cpsmsbla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x1) (f-ivc2-5u7 #x1f) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1010 0111 00001 qqqqq ppppp 1   cpmulslua1.h crqc,crpc (c3_1)
+(dni cpmulslua1_h_C3 "cpmulslua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xa) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1011 0111 00001 qqqqq ppppp 1   cpmulslla1.h crqc,crpc (c3_1)
+(dni cpmulslla1_h_C3 "cpmulslla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xb) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1110 0111 00001 qqqqq ppppp 1   cpmulslua1.w crqc,crpc (c3_1)
+(dni cpmulslua1_w_C3 "cpmulslua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xe) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0100 1111 0111 00001 qqqqq ppppp 1   cpmulslla1.w crqc,crpc (c3_1)
+(dni cpmulslla1_w_C3 "cpmulslla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpmulslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x2) (f-ivc2-5u7 #xf) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0010 0111 00001 qqqqq ppppp 1   cpsmadslua1.h crqc,crpc (c3_1)
+(dni cpsmadslua1_h_C3 "cpsmadslua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x12) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0011 0111 00001 qqqqq ppppp 1   cpsmadslla1.h crqc,crpc (c3_1)
+(dni cpsmadslla1_h_C3 "cpsmadslla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x13) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0110 0111 00001 qqqqq ppppp 1   cpsmadslua1.w crqc,crpc (c3_1)
+(dni cpsmadslua1_w_C3 "cpsmadslua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x16) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 0111 0111 00001 qqqqq ppppp 1   cpsmadslla1.w crqc,crpc (c3_1)
+(dni cpsmadslla1_w_C3 "cpsmadslla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmadslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x17) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1010 0111 00001 qqqqq ppppp 1   cpsmsbslua1.h crqc,crpc (c3_1)
+(dni cpsmsbslua1_h_C3 "cpsmsbslua1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslua1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1a) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1011 0111 00001 qqqqq ppppp 1   cpsmsbslla1.h crqc,crpc (c3_1)
+(dni cpsmsbslla1_h_C3 "cpsmsbslla1.h $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslla1.h $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1b) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_h" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1110 0111 00001 qqqqq ppppp 1   cpsmsbslua1.w crqc,crpc (c3_1)
+(dni cpsmsbslua1_w_C3 "cpsmsbslua1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslua1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1e) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_w" pc crqc crpc) )
+  ()
+  )
+
+; 1111 0111 1111 0111 00001 qqqqq ppppp 1   cpsmsbslla1.w crqc,crpc (c3_1)
+(dni cpsmsbslla1_w_C3 "cpsmsbslla1.w $crqc,$crpc C3"
+  (OPTIONAL_CP_INSN ivc2-c3-isa (SLOTS C3) (INTRINSIC "cpsmsbslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslla1.w $crqc,$crpc"
+  (+ MAJ_15 (f-ivc2-3u4 #x3) (f-ivc2-5u7 #x1f) (f-sub4 7)
+	(f-ivc2-5u16 #x1) crqc crpc (f-ivc2-1u31 #x1) )
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_w" pc crqc crpc) )
+  ()
+  )
+
+; 00000 00000 00000 00000   c0nop  (p0_1)
+(dni c0nop_P0_P0S "c0nop  Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p0-isa (SLOTS P0,P0S) (INTRINSIC "c0nop"))
+  "c0nop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x0) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x0) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_c0nop" pc) )
+  ()
+  )
+
+; 00001 qqqqq ppppp ooooo   cpadd3.b =crop,crqp,crpp (p0_1)
+(dni cpadd3_b_P0S_P1 "cpadd3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpadd3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpadd3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010 qqqqq ppppp ooooo   cpadd3.h =crop,crqp,crpp (p0_1)
+(dni cpadd3_h_P0S_P1 "cpadd3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpadd3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpadd3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x2) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpadd3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00011 qqqqq ppppp ooooo   cpadd3.w =crop,crqp,crpp (p0_1)
+(dni cpadd3_w_P0S_P1 "cpadd3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpadd3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpadd3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x3) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpadd3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00101 qqqqq ppppp ooooo   cpunpacku.b =crop,crqp,crpp (p0_1)
+(dni cpunpacku_b_P0S_P1 "cpunpacku.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpunpacku.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x5) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpacku_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00110 qqqqq ppppp ooooo   cpunpacku.h =crop,crqp,crpp (p0_1)
+(dni cpunpacku_h_P0S_P1 "cpunpacku.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpacku_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpunpacku.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x6) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpacku_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00111 qqqqq ppppp ooooo   cpunpacku.w =crop,crqp,crpp (p0_1)
+(dni cpunpacku_w_P0S_P1 "cpunpacku.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpacku_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpunpacku.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x7) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpacku_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01001 qqqqq ppppp ooooo   cpunpackl.b =crop,crqp,crpp (p0_1)
+(dni cpunpackl_b_P0S_P1 "cpunpackl.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpackl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpunpackl.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x9) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpackl_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01010 qqqqq ppppp ooooo   cpunpackl.h =crop,crqp,crpp (p0_1)
+(dni cpunpackl_h_P0S_P1 "cpunpackl.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpackl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpunpackl.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xa) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpackl_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01011 qqqqq ppppp ooooo   cpunpackl.w =crop,crqp,crpp (p0_1)
+(dni cpunpackl_w_P0S_P1 "cpunpackl.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpunpackl_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpunpackl.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xb) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpunpackl_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00100 qqqqq ppppp ooooo   cpsel =crop,crqp,crpp (p0_1)
+(dni cpsel_P0S_P1 "cpsel $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpsel") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpsel $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x4) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsel" pc crqp crpp)) )
+  ()
+  )
+
+; 01100 qqqqq ppppp ooooo   cpfsftbs0 =crop,crqp,crpp (p0_1)
+(dni cpfsftbs0_P0S_P1 "cpfsftbs0 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpfsftbs0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs0 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xc) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpfsftbs0" pc crqp crpp)) )
+  ()
+  )
+
+; 01101 qqqqq ppppp ooooo   cpfsftbs1 =crop,crqp,crpp (p0_1)
+(dni cpfsftbs1_P0S_P1 "cpfsftbs1 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpfsftbs1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpfsftbs1 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #xd) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpfsftbs1" pc crqp crpp)) )
+  ()
+  )
+
+; 10000 qqqqq 00000 ooooo   cpmov =crop,crqp (p0_1)
+(dni cpmov_P0S_P1 "cpmov $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmov") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpmov $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x0) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmov" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00001 ooooo   cpabsz.b =crop,crqp (p0_1)
+(dni cpabsz_b_P0S_P1 "cpabsz.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpabsz_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsz.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsz_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00010 ooooo   cpabsz.h =crop,crqp (p0_1)
+(dni cpabsz_h_P0S_P1 "cpabsz.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpabsz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabsz.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsz_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00011 ooooo   cpabsz.w =crop,crqp (p0_1)
+(dni cpabsz_w_P0S_P1 "cpabsz.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpabsz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpabsz.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsz_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00100 ooooo   cpldz.h =crop,crqp (p0_1)
+(dni cpldz_h_P0S_P1 "cpldz.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpldz_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpldz.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpldz_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00101 ooooo   cpldz.w =crop,crqp (p0_1)
+(dni cpldz_w_P0S_P1 "cpldz.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpldz_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpldz.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpldz_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00110 ooooo   cpnorm.h =crop,crqp (p0_1)
+(dni cpnorm_h_P0S_P1 "cpnorm.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpnorm_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpnorm.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpnorm_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 00111 ooooo   cpnorm.w =crop,crqp (p0_1)
+(dni cpnorm_w_P0S_P1 "cpnorm.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpnorm_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpnorm.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpnorm_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01000 ooooo   cphaddu.b =crop,crqp (p0_1)
+(dni cphaddu_b_P0S_P1 "cphaddu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphaddu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cphaddu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphaddu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01001 ooooo   cphadd.b =crop,crqp (p0_1)
+(dni cphadd_b_P0S_P1 "cphadd.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphadd_b") (CPTYPE V8QI) (CRET FIRST))
+  "cphadd.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphadd_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01010 ooooo   cphadd.h =crop,crqp (p0_1)
+(dni cphadd_h_P0S_P1 "cphadd.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphadd_h") (CPTYPE V4HI) (CRET FIRST))
+  "cphadd.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphadd_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01011 ooooo   cphadd.w =crop,crqp (p0_1)
+(dni cphadd_w_P0S_P1 "cphadd.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cphadd_w") (CPTYPE V2SI) (CRET FIRST))
+  "cphadd.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cphadd_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01100 00000   cpccadd.b +crqp (p0_1)
+(dni cpccadd_b_P0S_P1 "cpccadd.b $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpccadd_b") (CPTYPE V8QI) (CRET FIRSTCOPY) VOLATILE)
+  "cpccadd.b $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xc) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpccadd_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01101 ooooo   cpbcast.b =crop,crqp (p0_1)
+(dni cpbcast_b_P0S_P1 "cpbcast.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpbcast_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpbcast.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpbcast_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01110 ooooo   cpbcast.h =crop,crqp (p0_1)
+(dni cpbcast_h_P0S_P1 "cpbcast.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpbcast_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpbcast.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpbcast_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 01111 ooooo   cpbcast.w =crop,crqp (p0_1)
+(dni cpbcast_w_P0S_P1 "cpbcast.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpbcast_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpbcast.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpbcast_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10000 ooooo   cpextuu.b =crop,crqp (p0_1)
+(dni cpextuu_b_P0S_P1 "cpextuu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextuu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextuu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x10) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10001 ooooo   cpextu.b =crop,crqp (p0_1)
+(dni cpextu_b_P0S_P1 "cpextu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x11) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10010 ooooo   cpextuu.h =crop,crqp (p0_1)
+(dni cpextuu_h_P0S_P1 "cpextuu.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextuu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextuu.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x12) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuu_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10011 ooooo   cpextu.h =crop,crqp (p0_1)
+(dni cpextu_h_P0S_P1 "cpextu.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextu.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x13) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextu_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10100 ooooo   cpextlu.b =crop,crqp (p0_1)
+(dni cpextlu_b_P0S_P1 "cpextlu.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextlu_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cpextlu.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x14) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlu_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10101 ooooo   cpextl.b =crop,crqp (p0_1)
+(dni cpextl_b_P0S_P1 "cpextl.b $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextl_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextl.b $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x15) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextl_b" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10110 ooooo   cpextlu.h =crop,crqp (p0_1)
+(dni cpextlu_h_P0S_P1 "cpextlu.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextlu_h") (CPTYPE V4UHI) (CRET FIRST))
+  "cpextlu.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x16) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlu_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 10111 ooooo   cpextl.h =crop,crqp (p0_1)
+(dni cpextl_h_P0S_P1 "cpextl.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpextl_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpextl.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x17) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextl_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11000 ooooo   cpcastub.h =crop,crqp (p0_1)
+(dni cpcastub_h_P0S_P1 "cpcastub.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastub_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastub.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x18) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastub_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11001 ooooo   cpcastb.h =crop,crqp (p0_1)
+(dni cpcastb_h_P0S_P1 "cpcastb.h $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastb_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpcastb.h $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x19) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastb_h" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11100 ooooo   cpcastub.w =crop,crqp (p0_1)
+(dni cpcastub_w_P0S_P1 "cpcastub.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastub_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastub.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1c) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastub_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11101 ooooo   cpcastb.w =crop,crqp (p0_1)
+(dni cpcastb_w_P0S_P1 "cpcastb.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastb_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastb.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1d) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastb_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11110 ooooo   cpcastuh.w =crop,crqp (p0_1)
+(dni cpcastuh_w_P0S_P1 "cpcastuh.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcastuh_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcastuh.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1e) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcastuh_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11111 ooooo   cpcasth.w =crop,crqp (p0_1)
+(dni cpcasth_w_P0S_P1 "cpcasth.w $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcasth_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpcasth.w $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1f) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpcasth_w" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11010 ooooo   cdcastuw =crop,crqp (p0_1)
+(dni cdcastuw_P0S_P1 "cdcastuw $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cdcastuw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastuw $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1a) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdcastuw" pc crqp)) )
+  ()
+  )
+
+; 10000 qqqqq 11011 ooooo   cdcastw =crop,crqp (p0_1)
+(dni cdcastw_P0S_P1 "cdcastw $crop,$crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cdcastw") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdcastw $crop,$crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x10) crqp (f-ivc2-5u18 #x1b) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdcastw" pc crqp)) )
+  ()
+  )
+
+; 10001 00000 00000 ooooo   cpmovfrcsar0 =crop (p0_1)
+(dni cpmovfrcsar0_P0S_P1 "cpmovfrcsar0 $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovfrcsar0") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar0 $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x0) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovfrcsar0" pc)) )
+  ()
+  )
+
+; 10001 00000 01111 ooooo   cpmovfrcsar1 =crop (p0_1)
+(dni cpmovfrcsar1_P0S_P1 "cpmovfrcsar1 $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovfrcsar1") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcsar1 $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovfrcsar1" pc)) )
+  ()
+  )
+
+; 10001 00000 00001 ooooo   cpmovfrcc =crop (p0_1)
+(dni cpmovfrcc_P0S_P1 "cpmovfrcc $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovfrcc") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST) VOLATILE)
+  "cpmovfrcc $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovfrcc" pc)) )
+  ()
+  )
+
+; 10001 qqqqq 10000 00000   cpmovtocsar0 crqp (p0_1)
+(dni cpmovtocsar0_P0S_P1 "cpmovtocsar0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovtocsar0") VOLATILE)
+  "cpmovtocsar0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) crqp (f-ivc2-5u18 #x10) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar0 0)
+	(c-call "ivc2_cpmovtocsar0" pc crqp) )
+  ()
+  )
+
+; 10001 qqqqq 11111 00000   cpmovtocsar1 crqp (p0_1)
+(dni cpmovtocsar1_P0S_P1 "cpmovtocsar1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovtocsar1") VOLATILE)
+  "cpmovtocsar1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) crqp (f-ivc2-5u18 #x1f) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_csar1 0)
+	(c-call "ivc2_cpmovtocsar1" pc crqp) )
+  ()
+  )
+
+; 10001 qqqqq 10001 00000   cpmovtocc crqp (p0_1)
+(dni cpmovtocc_P0S_P1 "cpmovtocc $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovtocc") VOLATILE)
+  "cpmovtocc $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x11) crqp (f-ivc2-5u18 #x11) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpmovtocc" pc crqp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00000   cpcmpeqz.b crqp,crpp (p0_1)
+(dni cpcmpeqz_b_P0S_P1 "cpcmpeqz.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeqz_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeqz.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeqz_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00001   cpcmpeq.b crqp,crpp (p0_1)
+(dni cpcmpeq_b_P0S_P1 "cpcmpeq.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeq_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpeq.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00011   cpcmpeq.h crqp,crpp (p0_1)
+(dni cpcmpeq_h_P0S_P1 "cpcmpeq.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeq_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpeq.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 00101   cpcmpeq.w crqp,crpp (p0_1)
+(dni cpcmpeq_w_P0S_P1 "cpcmpeq.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpeq_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpeq.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpeq_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 01001   cpcmpne.b crqp,crpp (p0_1)
+(dni cpcmpne_b_P0S_P1 "cpcmpne.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpne_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpne.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 01011   cpcmpne.h crqp,crpp (p0_1)
+(dni cpcmpne_h_P0S_P1 "cpcmpne.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpne_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpne.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 01101   cpcmpne.w crqp,crpp (p0_1)
+(dni cpcmpne_w_P0S_P1 "cpcmpne.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpne_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpne.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpne_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10000   cpcmpgtu.b crqp,crpp (p0_1)
+(dni cpcmpgtu_b_P0S_P1 "cpcmpgtu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgtu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgtu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10001   cpcmpgt.b crqp,crpp (p0_1)
+(dni cpcmpgt_b_P0S_P1 "cpcmpgt.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgt_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpgt.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10011   cpcmpgt.h crqp,crpp (p0_1)
+(dni cpcmpgt_h_P0S_P1 "cpcmpgt.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgt_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpgt.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10100   cpcmpgtu.w crqp,crpp (p0_1)
+(dni cpcmpgtu_w_P0S_P1 "cpcmpgtu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgtu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgtu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgtu_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 10101   cpcmpgt.w crqp,crpp (p0_1)
+(dni cpcmpgt_w_P0S_P1 "cpcmpgt.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgt_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpgt.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgt_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11000   cpcmpgeu.b crqp,crpp (p0_1)
+(dni cpcmpgeu_b_P0S_P1 "cpcmpgeu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgeu_b") (CPTYPE V8UQI) VOLATILE)
+  "cpcmpgeu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x18) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11001   cpcmpge.b crqp,crpp (p0_1)
+(dni cpcmpge_b_P0S_P1 "cpcmpge.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpge_b") (CPTYPE V8QI) VOLATILE)
+  "cpcmpge.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x19) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_b" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11011   cpcmpge.h crqp,crpp (p0_1)
+(dni cpcmpge_h_P0S_P1 "cpcmpge.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpge_h") (CPTYPE V4HI) VOLATILE)
+  "cpcmpge.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_h" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11100   cpcmpgeu.w crqp,crpp (p0_1)
+(dni cpcmpgeu_w_P0S_P1 "cpcmpgeu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpgeu_w") (CPTYPE V2USI) VOLATILE)
+  "cpcmpgeu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpgeu_w" pc crqp crpp) )
+  ()
+  )
+
+; 10010 qqqqq ppppp 11101   cpcmpge.w crqp,crpp (p0_1)
+(dni cpcmpge_w_P0S_P1 "cpcmpge.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpcmpge_w") (CPTYPE V2SI) VOLATILE)
+  "cpcmpge.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x12) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cc 0)
+	(c-call "ivc2_cpcmpge_w" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00000   cpadda0u.b crqp,crpp (p0_1)
+(dni cpadda0u_b_P0S "cpadda0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpadda0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpadda0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpadda0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00001   cpadda0.b crqp,crpp (p0_1)
+(dni cpadda0_b_P0S "cpadda0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpadda0_b") (CPTYPE V8QI) VOLATILE)
+  "cpadda0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpadda0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00010   cpaddua0.h crqp,crpp (p0_1)
+(dni cpaddua0_h_P0S "cpaddua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpaddua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00011   cpaddla0.h crqp,crpp (p0_1)
+(dni cpaddla0_h_P0S "cpaddla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpaddla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00100   cpaddaca0u.b crqp,crpp (p0_1)
+(dni cpaddaca0u_b_P0S "cpaddaca0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddaca0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpaddaca0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddaca0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00101   cpaddaca0.b crqp,crpp (p0_1)
+(dni cpaddaca0_b_P0S "cpaddaca0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddaca0_b") (CPTYPE V8QI) VOLATILE)
+  "cpaddaca0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddaca0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00110   cpaddacua0.h crqp,crpp (p0_1)
+(dni cpaddacua0_h_P0S "cpaddacua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddacua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddacua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 00111   cpaddacla0.h crqp,crpp (p0_1)
+(dni cpaddacla0_h_P0S "cpaddacla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaddacla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpaddacla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01000   cpsuba0u.b crqp,crpp (p0_1)
+(dni cpsuba0u_b_P0S "cpsuba0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsuba0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsuba0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsuba0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01001   cpsuba0.b crqp,crpp (p0_1)
+(dni cpsuba0_b_P0S "cpsuba0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsuba0_b") (CPTYPE V8QI) VOLATILE)
+  "cpsuba0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsuba0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01010   cpsubua0.h crqp,crpp (p0_1)
+(dni cpsubua0_h_P0S "cpsubua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsubua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01011   cpsubla0.h crqp,crpp (p0_1)
+(dni cpsubla0_h_P0S "cpsubla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpsubla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01100   cpsubaca0u.b crqp,crpp (p0_1)
+(dni cpsubaca0u_b_P0S "cpsubaca0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubaca0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsubaca0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubaca0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01101   cpsubaca0.b crqp,crpp (p0_1)
+(dni cpsubaca0_b_P0S "cpsubaca0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubaca0_b") (CPTYPE V8QI) VOLATILE)
+  "cpsubaca0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubaca0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01110   cpsubacua0.h crqp,crpp (p0_1)
+(dni cpsubacua0_h_P0S "cpsubacua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubacua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubacua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 01111   cpsubacla0.h crqp,crpp (p0_1)
+(dni cpsubacla0_h_P0S "cpsubacla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsubacla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsubacla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10000   cpabsa0u.b crqp,crpp (p0_1)
+(dni cpabsa0u_b_P0S "cpabsa0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsa0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpabsa0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpabsa0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10001   cpabsa0.b crqp,crpp (p0_1)
+(dni cpabsa0_b_P0S "cpabsa0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsa0_b") (CPTYPE V8QI) VOLATILE)
+  "cpabsa0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpabsa0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10010   cpabsua0.h crqp,crpp (p0_1)
+(dni cpabsua0_h_P0S "cpabsua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpabsua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10011   cpabsla0.h crqp,crpp (p0_1)
+(dni cpabsla0_h_P0S "cpabsla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpabsla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpabsla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10100   cpsada0u.b crqp,crpp (p0_1)
+(dni cpsada0u_b_P0S "cpsada0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsada0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsada0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsada0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10101   cpsada0.b crqp,crpp (p0_1)
+(dni cpsada0_b_P0S "cpsada0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsada0_b") (CPTYPE V8QI) VOLATILE)
+  "cpsada0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsada0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10110   cpsadua0.h crqp,crpp (p0_1)
+(dni cpsadua0_h_P0S "cpsadua0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsadua0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadua0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsadua0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 10111   cpsadla0.h crqp,crpp (p0_1)
+(dni cpsadla0_h_P0S "cpsadla0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsadla0_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadla0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpsadla0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 11011   cpseta0.h crqp,crpp (p0_1)
+(dni cpseta0_h_P0S "cpseta0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpseta0_h") (CPTYPE V4HI) VOLATILE)
+  "cpseta0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpseta0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 11100   cpsetua0.w crqp,crpp (p0_1)
+(dni cpsetua0_w_P0S "cpsetua0.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsetua0_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetua0.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsetua0_w" pc crqp crpp) )
+  ()
+  )
+
+; 11000 qqqqq ppppp 11101   cpsetla0.w crqp,crpp (p0_1)
+(dni cpsetla0_w_P0S "cpsetla0.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsetla0_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetla0.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpsetla0_w" pc crqp crpp) )
+  ()
+  )
+
+; 11001 00000 00001 ooooo   cpmova0.b =crop (p0_1)
+(dni cpmova0_b_P0S "cpmova0.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmova0_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cpmova0.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmova0_b" pc)) )
+  ()
+  )
+
+; 11001 00000 00010 ooooo   cpmovua0.h =crop (p0_1)
+(dni cpmovua0_h_P0S "cpmovua0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovua0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovua0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovua0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 00011 ooooo   cpmovla0.h =crop (p0_1)
+(dni cpmovla0_h_P0S "cpmovla0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovla0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovla0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovla0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 00100 ooooo   cpmovuua0.w =crop (p0_1)
+(dni cpmovuua0_w_P0S "cpmovuua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovuua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovuua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovuua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 00101 ooooo   cpmovula0.w =crop (p0_1)
+(dni cpmovula0_w_P0S "cpmovula0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovula0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovula0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovula0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 00110 ooooo   cpmovlua0.w =crop (p0_1)
+(dni cpmovlua0_w_P0S "cpmovlua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovlua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 00111 ooooo   cpmovlla0.w =crop (p0_1)
+(dni cpmovlla0_w_P0S "cpmovlla0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovlla0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlla0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlla0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01000 ooooo   cppacka0u.b =crop (p0_1)
+(dni cppacka0u_b_P0S "cppacka0u.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppacka0u_b") (CPTYPE V8UQI) (CRET FIRST) VOLATILE)
+  "cppacka0u.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka0u_b" pc)) )
+  ()
+  )
+
+; 11001 00000 01001 ooooo   cppacka0.b =crop (p0_1)
+(dni cppacka0_b_P0S "cppacka0.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppacka0_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cppacka0.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka0_b" pc)) )
+  ()
+  )
+
+; 11001 00000 01010 ooooo   cppackua0.h =crop (p0_1)
+(dni cppackua0_h_P0S "cppackua0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackua0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackua0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 01011 ooooo   cppackla0.h =crop (p0_1)
+(dni cppackla0_h_P0S "cppackla0.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackla0_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackla0.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla0_h" pc)) )
+  ()
+  )
+
+; 11001 00000 01100 ooooo   cppackua0.w =crop (p0_1)
+(dni cppackua0_w_P0S "cppackua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xc) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01101 ooooo   cppackla0.w =crop (p0_1)
+(dni cppackla0_w_P0S "cppackla0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cppackla0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackla0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01110 ooooo   cpmovhua0.w =crop (p0_1)
+(dni cpmovhua0_w_P0S "cpmovhua0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovhua0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhua0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhua0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 01111 ooooo   cpmovhla0.w =crop (p0_1)
+(dni cpmovhla0_w_P0S "cpmovhla0.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpmovhla0_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhla0.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhla0_w" pc)) )
+  ()
+  )
+
+; 11001 00000 10000 00000   cpacsuma0  (p0_1)
+(dni cpacsuma0_P0S "cpacsuma0  Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpacsuma0") VOLATILE)
+  "cpacsuma0"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x10) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpacsuma0" pc) )
+  ()
+  )
+
+; 11001 00000 10001 00000   cpaccpa0  (p0_1)
+(dni cpaccpa0_P0S "cpaccpa0  Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpaccpa0") VOLATILE)
+  "cpaccpa0"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x11) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpaccpa0" pc) )
+  ()
+  )
+
+; 11001 qqqqq 11000 00000   cpsrla0 crqp (p0_1)
+(dni cpsrla0_P0S "cpsrla0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsrla0") VOLATILE)
+  "cpsrla0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x18) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsrla0" pc crqp) )
+  ()
+  )
+
+; 11001 qqqqq 11001 00000   cpsraa0 crqp (p0_1)
+(dni cpsraa0_P0S "cpsraa0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsraa0") VOLATILE)
+  "cpsraa0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x19) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsraa0" pc crqp) )
+  ()
+  )
+
+; 11001 qqqqq 11010 00000   cpslla0 crqp (p0_1)
+(dni cpslla0_P0S "cpslla0 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpslla0") VOLATILE)
+  "cpslla0 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x1a) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpslla0" pc crqp) )
+  ()
+  )
+
+; 11001 00000 11100 iiiii   cpsrlia0 imm5p23 (p0_1)
+(dni cpsrlia0_P0S "cpsrlia0 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsrlia0") VOLATILE)
+  "cpsrlia0 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1c) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsrlia0" pc imm5p23) )
+  ()
+  )
+
+; 11001 00000 11101 iiiii   cpsraia0 imm5p23 (p0_1)
+(dni cpsraia0_P0S "cpsraia0 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsraia0") VOLATILE)
+  "cpsraia0 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1d) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsraia0" pc imm5p23) )
+  ()
+  )
+
+; 11001 00000 11110 iiiii   cpsllia0 imm5p23 (p0_1)
+(dni cpsllia0_P0S "cpsllia0 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpsllia0") VOLATILE)
+  "cpsllia0 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1e) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpsllia0" pc imm5p23) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00000   cpfsftba0s0u.b crqp,crpp (p0_1)
+(dni cpfsftba0s0u_b_P0S "cpfsftba0s0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfsftba0s0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00001   cpfsftba0s0.b crqp,crpp (p0_1)
+(dni cpfsftba0s0_b_P0S "cpfsftba0s0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfsftba0s0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00010   cpfsftbua0s0.h crqp,crpp (p0_1)
+(dni cpfsftbua0s0_h_P0S "cpfsftbua0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbua0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbua0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftbua0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00011   cpfsftbla0s0.h crqp,crpp (p0_1)
+(dni cpfsftbla0s0_h_P0S "cpfsftbla0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbla0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbla0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpfsftbla0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00100   cpfaca0s0u.b crqp,crpp (p0_1)
+(dni cpfaca0s0u_b_P0S "cpfaca0s0u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfaca0s0u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s0u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00101   cpfaca0s0.b crqp,crpp (p0_1)
+(dni cpfaca0s0_b_P0S "cpfaca0s0.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfaca0s0.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s0_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00110   cpfacua0s0.h crqp,crpp (p0_1)
+(dni cpfacua0s0_h_P0S "cpfacua0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacua0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacua0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacua0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 00111   cpfacla0s0.h crqp,crpp (p0_1)
+(dni cpfacla0s0_h_P0S "cpfacla0s0.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacla0s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacla0s0.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacla0s0_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01000   cpfsftba0s1u.b crqp,crpp (p0_1)
+(dni cpfsftba0s1u_b_P0S "cpfsftba0s1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfsftba0s1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01001   cpfsftba0s1.b crqp,crpp (p0_1)
+(dni cpfsftba0s1_b_P0S "cpfsftba0s1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftba0s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfsftba0s1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftba0s1_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01010   cpfsftbua0s1.h crqp,crpp (p0_1)
+(dni cpfsftbua0s1_h_P0S "cpfsftbua0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbua0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbua0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(c-call "ivc2_cpfsftbua0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01011   cpfsftbla0s1.h crqp,crpp (p0_1)
+(dni cpfsftbla0s1_h_P0S "cpfsftbla0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfsftbla0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfsftbla0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(c-call "ivc2_cpfsftbla0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01100   cpfaca0s1u.b crqp,crpp (p0_1)
+(dni cpfaca0s1u_b_P0S "cpfaca0s1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfaca0s1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01101   cpfaca0s1.b crqp,crpp (p0_1)
+(dni cpfaca0s1_b_P0S "cpfaca0s1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfaca0s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfaca0s1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfaca0s1_b" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01110   cpfacua0s1.h crqp,crpp (p0_1)
+(dni cpfacua0s1_h_P0S "cpfacua0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacua0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacua0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacua0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; 11111 qqqqq ppppp 01111   cpfacla0s1.h crqp,crpp (p0_1)
+(dni cpfacla0s1_h_P0S "cpfacla0s1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-isa (SLOTS P0S) (INTRINSIC "cpfacla0s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfacla0s1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_cofa0 0)
+	(c-call "ivc2_cpfacla0s1_h" pc crqp crpp) )
+  ()
+  )
+
+; xxxxxiii 01000 qqqqq ppppp ooooo   cpfsftbi =crop,crqp,crpp,imm3p5 (p0_1)
+(dni cpfsftbi_P0_P1 "cpfsftbi $crop,$crqp,$crpp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpfsftbi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cpfsftbi $crop,$crqp,$crpp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x8) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpfsftbi" pc crqp crpp imm3p5)) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 00001   cpacmpeq.b crqp,crpp (p0_1)
+(dni cpacmpeq_b_P0_P1 "cpacmpeq.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpeq_b") (CPTYPE V8QI))
+  "cpacmpeq.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpeq_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 00011   cpacmpeq.h crqp,crpp (p0_1)
+(dni cpacmpeq_h_P0_P1 "cpacmpeq.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpeq_h") (CPTYPE V4HI))
+  "cpacmpeq.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpeq_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 00101   cpacmpeq.w crqp,crpp (p0_1)
+(dni cpacmpeq_w_P0_P1 "cpacmpeq.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpeq_w") (CPTYPE V2SI))
+  "cpacmpeq.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpeq_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 01001   cpacmpne.b crqp,crpp (p0_1)
+(dni cpacmpne_b_P0_P1 "cpacmpne.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpne_b") (CPTYPE V8QI))
+  "cpacmpne.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpne_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 01011   cpacmpne.h crqp,crpp (p0_1)
+(dni cpacmpne_h_P0_P1 "cpacmpne.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpne_h") (CPTYPE V4HI))
+  "cpacmpne.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpne_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 01101   cpacmpne.w crqp,crpp (p0_1)
+(dni cpacmpne_w_P0_P1 "cpacmpne.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpne_w") (CPTYPE V2SI))
+  "cpacmpne.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpne_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10000   cpacmpgtu.b crqp,crpp (p0_1)
+(dni cpacmpgtu_b_P0_P1 "cpacmpgtu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgtu_b") (CPTYPE V8UQI))
+  "cpacmpgtu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgtu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10001   cpacmpgt.b crqp,crpp (p0_1)
+(dni cpacmpgt_b_P0_P1 "cpacmpgt.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgt_b") (CPTYPE V8QI))
+  "cpacmpgt.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgt_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10011   cpacmpgt.h crqp,crpp (p0_1)
+(dni cpacmpgt_h_P0_P1 "cpacmpgt.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgt_h") (CPTYPE V4HI))
+  "cpacmpgt.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgt_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10100   cpacmpgtu.w crqp,crpp (p0_1)
+(dni cpacmpgtu_w_P0_P1 "cpacmpgtu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgtu_w") (CPTYPE V2USI))
+  "cpacmpgtu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgtu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 10101   cpacmpgt.w crqp,crpp (p0_1)
+(dni cpacmpgt_w_P0_P1 "cpacmpgt.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgt_w") (CPTYPE V2SI))
+  "cpacmpgt.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgt_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11000   cpacmpgeu.b crqp,crpp (p0_1)
+(dni cpacmpgeu_b_P0_P1 "cpacmpgeu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgeu_b") (CPTYPE V8UQI))
+  "cpacmpgeu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x18) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgeu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11001   cpacmpge.b crqp,crpp (p0_1)
+(dni cpacmpge_b_P0_P1 "cpacmpge.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpge_b") (CPTYPE V8QI))
+  "cpacmpge.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x19) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpge_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11011   cpacmpge.h crqp,crpp (p0_1)
+(dni cpacmpge_h_P0_P1 "cpacmpge.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpge_h") (CPTYPE V4HI))
+  "cpacmpge.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpge_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11100   cpacmpgeu.w crqp,crpp (p0_1)
+(dni cpacmpgeu_w_P0_P1 "cpacmpgeu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpgeu_w") (CPTYPE V2USI))
+  "cpacmpgeu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpgeu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 10011 qqqqq ppppp 11101   cpacmpge.w crqp,crpp (p0_1)
+(dni cpacmpge_w_P0_P1 "cpacmpge.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpacmpge_w") (CPTYPE V2SI))
+  "cpacmpge.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpacmpge_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 00001   cpocmpeq.b crqp,crpp (p0_1)
+(dni cpocmpeq_b_P0_P1 "cpocmpeq.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpeq_b") (CPTYPE V8QI))
+  "cpocmpeq.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpeq_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 00011   cpocmpeq.h crqp,crpp (p0_1)
+(dni cpocmpeq_h_P0_P1 "cpocmpeq.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpeq_h") (CPTYPE V4HI))
+  "cpocmpeq.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpeq_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 00101   cpocmpeq.w crqp,crpp (p0_1)
+(dni cpocmpeq_w_P0_P1 "cpocmpeq.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpeq_w") (CPTYPE V2SI))
+  "cpocmpeq.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpeq_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 01001   cpocmpne.b crqp,crpp (p0_1)
+(dni cpocmpne_b_P0_P1 "cpocmpne.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpne_b") (CPTYPE V8QI))
+  "cpocmpne.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpne_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 01011   cpocmpne.h crqp,crpp (p0_1)
+(dni cpocmpne_h_P0_P1 "cpocmpne.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpne_h") (CPTYPE V4HI))
+  "cpocmpne.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpne_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 01101   cpocmpne.w crqp,crpp (p0_1)
+(dni cpocmpne_w_P0_P1 "cpocmpne.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpne_w") (CPTYPE V2SI))
+  "cpocmpne.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpne_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10000   cpocmpgtu.b crqp,crpp (p0_1)
+(dni cpocmpgtu_b_P0_P1 "cpocmpgtu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgtu_b") (CPTYPE V8UQI))
+  "cpocmpgtu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgtu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10001   cpocmpgt.b crqp,crpp (p0_1)
+(dni cpocmpgt_b_P0_P1 "cpocmpgt.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgt_b") (CPTYPE V8QI))
+  "cpocmpgt.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgt_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10011   cpocmpgt.h crqp,crpp (p0_1)
+(dni cpocmpgt_h_P0_P1 "cpocmpgt.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgt_h") (CPTYPE V4HI))
+  "cpocmpgt.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgt_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10100   cpocmpgtu.w crqp,crpp (p0_1)
+(dni cpocmpgtu_w_P0_P1 "cpocmpgtu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgtu_w") (CPTYPE V2USI))
+  "cpocmpgtu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgtu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 10101   cpocmpgt.w crqp,crpp (p0_1)
+(dni cpocmpgt_w_P0_P1 "cpocmpgt.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgt_w") (CPTYPE V2SI))
+  "cpocmpgt.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgt_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11000   cpocmpgeu.b crqp,crpp (p0_1)
+(dni cpocmpgeu_b_P0_P1 "cpocmpgeu.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgeu_b") (CPTYPE V8UQI))
+  "cpocmpgeu.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x18) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgeu_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11001   cpocmpge.b crqp,crpp (p0_1)
+(dni cpocmpge_b_P0_P1 "cpocmpge.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpge_b") (CPTYPE V8QI))
+  "cpocmpge.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x19) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpge_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11011   cpocmpge.h crqp,crpp (p0_1)
+(dni cpocmpge_h_P0_P1 "cpocmpge.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpge_h") (CPTYPE V4HI))
+  "cpocmpge.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpge_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11100   cpocmpgeu.w crqp,crpp (p0_1)
+(dni cpocmpgeu_w_P0_P1 "cpocmpgeu.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpgeu_w") (CPTYPE V2USI))
+  "cpocmpgeu.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpgeu_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 10011 qqqqq ppppp 11101   cpocmpge.w crqp,crpp (p0_1)
+(dni cpocmpge_w_P0_P1 "cpocmpge.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpocmpge_w") (CPTYPE V2SI))
+  "cpocmpge.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x13) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_cpocmpge_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 10100 qqqqq ppppp ooooo   cdadd3 =crop,crqp,crpp (p0_1)
+(dni cdadd3_P0_P1 "cdadd3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdadd3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdadd3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdadd3" pc crqp crpp)) )
+  ()
+  )
+
+; 00000100 10100 qqqqq ppppp ooooo   cpsub3.b =crop,crqp,crpp (p0_1)
+(dni cpsub3_b_P0_P1 "cpsub3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsub3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsub3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00000101 10100 qqqqq ppppp ooooo   cpsub3.h =crop,crqp,crpp (p0_1)
+(dni cpsub3_h_P0_P1 "cpsub3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsub3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsub3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x5) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsub3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00000110 10100 qqqqq ppppp ooooo   cpsub3.w =crop,crqp,crpp (p0_1)
+(dni cpsub3_w_P0_P1 "cpsub3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsub3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsub3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x6) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsub3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00000111 10100 qqqqq ppppp ooooo   cdsub3 =crop,crqp,crpp (p0_1)
+(dni cdsub3_P0_P1 "cdsub3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsub3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsub3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x7) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsub3" pc crqp crpp)) )
+  ()
+  )
+
+; 00001010 10100 qqqqq ppppp ooooo   cpsadd3.h =crop,crqp,crpp (p0_1)
+(dni cpsadd3_h_P0_P1 "cpsadd3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsadd3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsadd3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xa) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsadd3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00001011 10100 qqqqq ppppp ooooo   cpsadd3.w =crop,crqp,crpp (p0_1)
+(dni cpsadd3_w_P0_P1 "cpsadd3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsadd3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsadd3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xb) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsadd3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00001110 10100 qqqqq ppppp ooooo   cpssub3.h =crop,crqp,crpp (p0_1)
+(dni cpssub3_h_P0_P1 "cpssub3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssub3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpssub3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xe) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set crop (c-call DI "ivc2_cpssub3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00001111 10100 qqqqq ppppp ooooo   cpssub3.w =crop,crqp,crpp (p0_1)
+(dni cpssub3_w_P0_P1 "cpssub3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssub3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpssub3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #xf) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_cofr0 0)
+	(set crop (c-call DI "ivc2_cpssub3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00010000 10100 qqqqq ppppp ooooo   cpextuaddu3.b =crop,crqp,crpp (p0_1)
+(dni cpextuaddu3_b_P0_P1 "cpextuaddu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextuaddu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuaddu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x10) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuaddu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010001 10100 qqqqq ppppp ooooo   cpextuadd3.b =crop,crqp,crpp (p0_1)
+(dni cpextuadd3_b_P0_P1 "cpextuadd3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextuadd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextuadd3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x11) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextuadd3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010010 10100 qqqqq ppppp ooooo   cpextladdu3.b =crop,crqp,crpp (p0_1)
+(dni cpextladdu3_b_P0_P1 "cpextladdu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextladdu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladdu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x12) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextladdu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010011 10100 qqqqq ppppp ooooo   cpextladd3.b =crop,crqp,crpp (p0_1)
+(dni cpextladd3_b_P0_P1 "cpextladd3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextladd3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextladd3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x13) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextladd3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010100 10100 qqqqq ppppp ooooo   cpextusubu3.b =crop,crqp,crpp (p0_1)
+(dni cpextusubu3_b_P0_P1 "cpextusubu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextusubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusubu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x14) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextusubu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010101 10100 qqqqq ppppp ooooo   cpextusub3.b =crop,crqp,crpp (p0_1)
+(dni cpextusub3_b_P0_P1 "cpextusub3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextusub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextusub3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x15) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextusub3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010110 10100 qqqqq ppppp ooooo   cpextlsubu3.b =crop,crqp,crpp (p0_1)
+(dni cpextlsubu3_b_P0_P1 "cpextlsubu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextlsubu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsubu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x16) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlsubu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00010111 10100 qqqqq ppppp ooooo   cpextlsub3.b =crop,crqp,crpp (p0_1)
+(dni cpextlsub3_b_P0_P1 "cpextlsub3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpextlsub3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpextlsub3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x17) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpextlsub3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011000 10100 qqqqq ppppp ooooo   cpaveu3.b =crop,crqp,crpp (p0_1)
+(dni cpaveu3_b_P0_P1 "cpaveu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaveu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaveu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x18) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaveu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011001 10100 qqqqq ppppp ooooo   cpave3.b =crop,crqp,crpp (p0_1)
+(dni cpave3_b_P0_P1 "cpave3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpave3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpave3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x19) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpave3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011010 10100 qqqqq ppppp ooooo   cpave3.h =crop,crqp,crpp (p0_1)
+(dni cpave3_h_P0_P1 "cpave3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpave3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpave3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1a) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpave3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00011011 10100 qqqqq ppppp ooooo   cpave3.w =crop,crqp,crpp (p0_1)
+(dni cpave3_w_P0_P1 "cpave3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpave3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpave3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1b) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpave3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00011100 10100 qqqqq ppppp ooooo   cpaddsru3.b =crop,crqp,crpp (p0_1)
+(dni cpaddsru3_b_P0_P1 "cpaddsru3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsru3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsru3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsru3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011101 10100 qqqqq ppppp ooooo   cpaddsr3.b =crop,crqp,crpp (p0_1)
+(dni cpaddsr3_b_P0_P1 "cpaddsr3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsr3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpaddsr3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsr3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00011110 10100 qqqqq ppppp ooooo   cpaddsr3.h =crop,crqp,crpp (p0_1)
+(dni cpaddsr3_h_P0_P1 "cpaddsr3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsr3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpaddsr3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1e) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsr3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00011111 10100 qqqqq ppppp ooooo   cpaddsr3.w =crop,crqp,crpp (p0_1)
+(dni cpaddsr3_w_P0_P1 "cpaddsr3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpaddsr3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpaddsr3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1f) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpaddsr3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00100000 10100 qqqqq ppppp ooooo   cpabsu3.b =crop,crqp,crpp (p0_1)
+(dni cpabsu3_b_P0_P1 "cpabsu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpabsu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabsu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x20) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabsu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00100001 10100 qqqqq ppppp ooooo   cpabs3.b =crop,crqp,crpp (p0_1)
+(dni cpabs3_b_P0_P1 "cpabs3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpabs3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpabs3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x21) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabs3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00100010 10100 qqqqq ppppp ooooo   cpabs3.h =crop,crqp,crpp (p0_1)
+(dni cpabs3_h_P0_P1 "cpabs3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpabs3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpabs3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x22) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpabs3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00100100 10100 qqqqq ppppp ooooo   cpand3 =crop,crqp,crpp (p0_1)
+(dni cpand3_P0_P1 "cpand3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpand3") (CPTYPE VECT) (CRET FIRST))
+  "cpand3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x24) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpand3" pc crqp crpp)) )
+  ()
+  )
+
+; 00100101 10100 qqqqq ppppp ooooo   cpor3 =crop,crqp,crpp (p0_1)
+(dni cpor3_P0_P1 "cpor3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpor3") (CPTYPE VECT) (CRET FIRST))
+  "cpor3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x25) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpor3" pc crqp crpp)) )
+  ()
+  )
+
+; 00100110 10100 qqqqq ppppp ooooo   cpnor3 =crop,crqp,crpp (p0_1)
+(dni cpnor3_P0_P1 "cpnor3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpnor3") (CPTYPE VECT) (CRET FIRST))
+  "cpnor3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x26) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpnor3" pc crqp crpp)) )
+  ()
+  )
+
+; 00100111 10100 qqqqq ppppp ooooo   cpxor3 =crop,crqp,crpp (p0_1)
+(dni cpxor3_P0_P1 "cpxor3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpxor3") (CPTYPE VECT) (CRET FIRST))
+  "cpxor3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x27) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpxor3" pc crqp crpp)) )
+  ()
+  )
+
+; 00101100 10100 qqqqq ppppp ooooo   cppacku.b =crop,crqp,crpp (p0_1)
+(dni cppacku_b_P0_P1 "cppacku.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cppacku_b") (CPTYPE V8UQI) (CRET FIRST))
+  "cppacku.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacku_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00101101 10100 qqqqq ppppp ooooo   cppack.b =crop,crqp,crpp (p0_1)
+(dni cppack_b_P0_P1 "cppack.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cppack_b") (CPTYPE V8QI) (CRET FIRST))
+  "cppack.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppack_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00101111 10100 qqqqq ppppp ooooo   cppack.h =crop,crqp,crpp (p0_1)
+(dni cppack_h_P0_P1 "cppack.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cppack_h") (CPTYPE V4HI) (CRET FIRST))
+  "cppack.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2f) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppack_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00110000 10100 qqqqq ppppp ooooo   cpmaxu3.b =crop,crqp,crpp (p0_1)
+(dni cpmaxu3_b_P0_P1 "cpmaxu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmaxu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmaxu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x30) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmaxu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00110001 10100 qqqqq ppppp ooooo   cpmax3.b =crop,crqp,crpp (p0_1)
+(dni cpmax3_b_P0_P1 "cpmax3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmax3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmax3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x31) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmax3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00110011 10100 qqqqq ppppp ooooo   cpmax3.h =crop,crqp,crpp (p0_1)
+(dni cpmax3_h_P0_P1 "cpmax3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmax3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmax3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x33) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmax3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00110100 10100 qqqqq ppppp ooooo   cpmaxu3.w =crop,crqp,crpp (p0_1)
+(dni cpmaxu3_w_P0_P1 "cpmaxu3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmaxu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmaxu3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x34) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmaxu3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00110101 10100 qqqqq ppppp ooooo   cpmax3.w =crop,crqp,crpp (p0_1)
+(dni cpmax3_w_P0_P1 "cpmax3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmax3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmax3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x35) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmax3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00111000 10100 qqqqq ppppp ooooo   cpminu3.b =crop,crqp,crpp (p0_1)
+(dni cpminu3_b_P0_P1 "cpminu3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpminu3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpminu3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x38) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpminu3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00111001 10100 qqqqq ppppp ooooo   cpmin3.b =crop,crqp,crpp (p0_1)
+(dni cpmin3_b_P0_P1 "cpmin3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmin3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmin3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x39) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmin3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 00111011 10100 qqqqq ppppp ooooo   cpmin3.h =crop,crqp,crpp (p0_1)
+(dni cpmin3_h_P0_P1 "cpmin3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmin3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmin3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3b) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmin3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 00111100 10100 qqqqq ppppp ooooo   cpminu3.w =crop,crqp,crpp (p0_1)
+(dni cpminu3_w_P0_P1 "cpminu3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpminu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpminu3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpminu3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 00111101 10100 qqqqq ppppp ooooo   cpmin3.w =crop,crqp,crpp (p0_1)
+(dni cpmin3_w_P0_P1 "cpmin3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmin3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmin3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmin3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01000000 10100 qqqqq ppppp ooooo   cpsrl3.b =crop,crqp,crpp (p0_1)
+(dni cpsrl3_b_P0_P1 "cpsrl3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrl3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x40) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrl3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01000001 10100 qqqqq ppppp ooooo   cpssrl3.b =crop,crqp,crpp (p0_1)
+(dni cpssrl3_b_P0_P1 "cpssrl3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssrl3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssrl3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x41) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssrl3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01000010 10100 qqqqq ppppp ooooo   cpsrl3.h =crop,crqp,crpp (p0_1)
+(dni cpsrl3_h_P0_P1 "cpsrl3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrl3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x42) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrl3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01000011 10100 qqqqq ppppp ooooo   cpssrl3.h =crop,crqp,crpp (p0_1)
+(dni cpssrl3_h_P0_P1 "cpssrl3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssrl3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssrl3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x43) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssrl3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01000100 10100 qqqqq ppppp ooooo   cpsrl3.w =crop,crqp,crpp (p0_1)
+(dni cpsrl3_w_P0_P1 "cpsrl3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrl3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x44) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrl3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01000101 10100 qqqqq ppppp ooooo   cpssrl3.w =crop,crqp,crpp (p0_1)
+(dni cpssrl3_w_P0_P1 "cpssrl3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssrl3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssrl3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x45) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssrl3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01000110 10100 qqqqq ppppp ooooo   cdsrl3 =crop,crqp,crpp (p0_1)
+(dni cdsrl3_P0_P1 "cdsrl3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsrl3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrl3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x46) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsrl3" pc crqp crpp)) )
+  ()
+  )
+
+; 01001000 10100 qqqqq ppppp ooooo   cpsra3.b =crop,crqp,crpp (p0_1)
+(dni cpsra3_b_P0_P1 "cpsra3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsra3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x48) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsra3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01001001 10100 qqqqq ppppp ooooo   cpssra3.b =crop,crqp,crpp (p0_1)
+(dni cpssra3_b_P0_P1 "cpssra3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssra3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssra3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x49) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssra3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01001010 10100 qqqqq ppppp ooooo   cpsra3.h =crop,crqp,crpp (p0_1)
+(dni cpsra3_h_P0_P1 "cpsra3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsra3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4a) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsra3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01001011 10100 qqqqq ppppp ooooo   cpssra3.h =crop,crqp,crpp (p0_1)
+(dni cpssra3_h_P0_P1 "cpssra3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssra3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssra3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4b) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssra3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01001100 10100 qqqqq ppppp ooooo   cpsra3.w =crop,crqp,crpp (p0_1)
+(dni cpsra3_w_P0_P1 "cpsra3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsra3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsra3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01001101 10100 qqqqq ppppp ooooo   cpssra3.w =crop,crqp,crpp (p0_1)
+(dni cpssra3_w_P0_P1 "cpssra3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssra3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssra3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4d) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssra3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01001110 10100 qqqqq ppppp ooooo   cdsra3 =crop,crqp,crpp (p0_1)
+(dni cdsra3_P0_P1 "cdsra3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsra3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsra3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x4e) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsra3" pc crqp crpp)) )
+  ()
+  )
+
+; 01010000 10100 qqqqq ppppp ooooo   cpsll3.b =crop,crqp,crpp (p0_1)
+(dni cpsll3_b_P0_P1 "cpsll3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsll3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x50) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsll3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01010001 10100 qqqqq ppppp ooooo   cpssll3.b =crop,crqp,crpp (p0_1)
+(dni cpssll3_b_P0_P1 "cpssll3.b $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssll3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpssll3.b $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x51) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssll3_b" pc crqp crpp)) )
+  ()
+  )
+
+; 01010010 10100 qqqqq ppppp ooooo   cpsll3.h =crop,crqp,crpp (p0_1)
+(dni cpsll3_h_P0_P1 "cpsll3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsll3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x52) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsll3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01010011 10100 qqqqq ppppp ooooo   cpssll3.h =crop,crqp,crpp (p0_1)
+(dni cpssll3_h_P0_P1 "cpssll3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssll3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpssll3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x53) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssll3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01010100 10100 qqqqq ppppp ooooo   cpsll3.w =crop,crqp,crpp (p0_1)
+(dni cpsll3_w_P0_P1 "cpsll3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsll3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x54) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsll3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01010101 10100 qqqqq ppppp ooooo   cpssll3.w =crop,crqp,crpp (p0_1)
+(dni cpssll3_w_P0_P1 "cpssll3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpssll3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpssll3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x55) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpssll3_w" pc crqp crpp)) )
+  ()
+  )
+
+; 01010110 10100 qqqqq ppppp ooooo   cdsll3 =crop,crqp,crpp (p0_1)
+(dni cdsll3_P0_P1 "cdsll3 $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsll3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsll3 $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x56) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsll3" pc crqp crpp)) )
+  ()
+  )
+
+; 01011010 10100 qqqqq ppppp ooooo   cpsla3.h =crop,crqp,crpp (p0_1)
+(dni cpsla3_h_P0_P1 "cpsla3.h $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsla3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpsla3.h $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x5a) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsla3_h" pc crqp crpp)) )
+  ()
+  )
+
+; 01011100 10100 qqqqq ppppp ooooo   cpsla3.w =crop,crqp,crpp (p0_1)
+(dni cpsla3_w_P0_P1 "cpsla3.w $crop,$crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsla3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpsla3.w $crop,$crqp,$crpp"
+  (+ (f-ivc2-8u0 #x5c) (f-ivc2-5u8 #x14) crqp crpp crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsla3_w" pc crqp crpp)) )
+  ()
+  )
+
+; xxxxxiii 10101 qqqqq 00000 ooooo   cpsrli3.b =crop,crqp,imm3p5 (p0_1)
+(dni cpsrli3_b_P0_P1 "cpsrli3.b $crop,$crqp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrli3.b $crop,$crqp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x0) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrli3_b" pc crqp imm3p5)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 00001 ooooo   cpsrli3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpsrli3_h_P0_P1 "cpsrli3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrli3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrli3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 00010 ooooo   cpsrli3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpsrli3_w_P0_P1 "cpsrli3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrli3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrli3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 00011 ooooo   cdsrli3 =crop,crqp,imm6p2 (p0_1)
+(dni cdsrli3_P0_P1 "cdsrli3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsrli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrli3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsrli3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxxxxiii 10101 qqqqq 00100 ooooo   cpsrai3.b =crop,crqp,imm3p5 (p0_1)
+(dni cpsrai3_b_P0_P1 "cpsrai3.b $crop,$crqp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrai3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpsrai3.b $crop,$crqp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrai3_b" pc crqp imm3p5)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 00101 ooooo   cpsrai3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpsrai3_h_P0_P1 "cpsrai3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrai3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpsrai3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrai3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 00110 ooooo   cpsrai3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpsrai3_w_P0_P1 "cpsrai3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpsrai3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpsrai3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpsrai3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 00111 ooooo   cdsrai3 =crop,crqp,imm6p2 (p0_1)
+(dni cdsrai3_P0_P1 "cdsrai3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdsrai3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdsrai3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdsrai3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxxxxiii 10101 qqqqq 01000 ooooo   cpslli3.b =crop,crqp,imm3p5 (p0_1)
+(dni cpslli3_b_P0_P1 "cpslli3.b $crop,$crqp,imm3p5 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslli3_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpslli3.b $crop,$crqp,$imm3p5"
+  (+ ivc-x-0-5 imm3p5 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslli3_b" pc crqp imm3p5)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 01001 ooooo   cpslli3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpslli3_h_P0_P1 "cpslli3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslli3_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpslli3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslli3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 01010 ooooo   cpslli3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpslli3_w_P0_P1 "cpslli3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslli3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpslli3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslli3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 01011 ooooo   cdslli3 =crop,crqp,imm6p2 (p0_1)
+(dni cdslli3_P0_P1 "cdslli3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdslli3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdslli3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdslli3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxxxiiii 10101 qqqqq 01101 ooooo   cpslai3.h =crop,crqp,imm4p4 (p0_1)
+(dni cpslai3_h_P0_P1 "cpslai3.h $crop,$crqp,imm4p4 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslai3_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpslai3.h $crop,$crqp,$imm4p4"
+  (+ ivc-x-0-4 imm4p4 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslai3_h" pc crqp imm4p4)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 01110 ooooo   cpslai3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpslai3_w_P0_P1 "cpslai3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpslai3_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpslai3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpslai3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 10000 ooooo   cpclipiu3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpclipiu3_w_P0_P1 "cpclipiu3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpclipiu3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipiu3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x10) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpclipiu3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxxiiiii 10101 qqqqq 10001 ooooo   cpclipi3.w =crop,crqp,imm5p3 (p0_1)
+(dni cpclipi3_w_P0_P1 "cpclipi3.w $crop,$crqp,imm5p3 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpclipi3_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpclipi3.w $crop,$crqp,$imm5p3"
+  (+ ivc-x-0-3 imm5p3 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x11) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpclipi3_w" pc crqp imm5p3)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 10010 ooooo   cdclipiu3 =crop,crqp,imm6p2 (p0_1)
+(dni cdclipiu3_P0_P1 "cdclipiu3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdclipiu3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipiu3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x12) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdclipiu3" pc crqp imm6p2)) )
+  ()
+  )
+
+; xxiiiiii 10101 qqqqq 10011 ooooo   cdclipi3 =crop,crqp,imm6p2 (p0_1)
+(dni cdclipi3_P0_P1 "cdclipi3 $crop,$crqp,imm6p2 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdclipi3") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdclipi3 $crop,$crqp,$imm6p2"
+  (+ ivc-x-0-2 imm6p2 (f-ivc2-5u8 #x15) crqp (f-ivc2-5u18 #x13) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cdclipi3" pc crqp imm6p2)) )
+  ()
+  )
+
+; iiiiiiii 10110 qqqqq 01iii iiiii   cpmovi.h =crqp,simm16p0 (p0_i)
+(dni cpmovi_h_P0_P1 "cpmovi.h $crqp,simm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmovi_h") (CPTYPE V4HI) (CRET FIRST))
+  "cpmovi.h $crqp,$simm16p0"
+  (+ (f-ivc2-5u8 #x16) crqp (f-ivc2-2u18 #x1)  simm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmovi_h16" pc simm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 00iii iiiii   cpmoviu.w =crqp,imm16p0 (p0_i)
+(dni cpmoviu_w_P0_P1 "cpmoviu.w $crqp,imm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmoviu_w") (CPTYPE V2USI) (CRET FIRST))
+  "cpmoviu.w $crqp,$imm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x0)  imm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmoviu_w16" pc imm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 01iii iiiii   cpmovi.w =crqp,simm16p0 (p0_i)
+(dni cpmovi_w_P0_P1 "cpmovi.w $crqp,simm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cpmovi_w") (CPTYPE V2SI) (CRET FIRST))
+  "cpmovi.w $crqp,$simm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x1)  simm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmovi_w16" pc simm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 10iii iiiii   cdmoviu =crqp,imm16p0 (p0_i)
+(dni cdmoviu_P0_P1 "cdmoviu $crqp,imm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdmoviu") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmoviu $crqp,$imm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x2)  imm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cdmoviu16" pc imm16p0)) )
+  ()
+  )
+
+; iiiiiiii 10111 qqqqq 11iii iiiii   cdmovi =crqp,simm16p0 (p0_i)
+(dni cdmovi_P0_P1 "cdmovi $crqp,simm16p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0-p1-isa (SLOTS P0,P1) (INTRINSIC "cdmovi") (CPTYPE CP_DATA_BUS_INT) (CRET FIRST))
+  "cdmovi $crqp,$simm16p0"
+  (+ (f-ivc2-5u8 #x17) crqp (f-ivc2-2u18 #x3)  simm16p0(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cdmovi16" pc simm16p0)) )
+  ()
+  )
+
+; 00000000 00000 00000 00000 00000   c1nop  (p0_1)
+(dni c1nop_P1 "c1nop  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "c1nop"))
+  "c1nop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x0) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x0) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(c-call "ivc2_c1nop" pc) )
+  ()
+  )
+
+; 00000000 10110 qqqqq 00iii iiiii   cpmovi.b =crqp,simm8p20 (p0_i)
+(dni cpmovi_b_P0S_P1 "cpmovi.b $crqp,simm8p20 Pn"
+  (OPTIONAL_CP_INSN ivc2-p0s-p1-isa (SLOTS P0S,P1) (INTRINSIC "cpmovi_b") (CPTYPE V8QI) (CRET FIRST))
+  "cpmovi.b $crqp,$simm8p20"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x16) crqp (f-ivc2-2u18 #x0)  imm8p20(f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crqp (c-call DI "ivc2_cpmovi_b" pc simm8p20)) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00000   cpadda1u.b crqp,crpp (p0_1)
+(dni cpadda1u_b_P1 "cpadda1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpadda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpadda1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00001   cpadda1.b crqp,crpp (p0_1)
+(dni cpadda1_b_P1 "cpadda1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpadda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpadda1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpadda1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00010   cpaddua1.h crqp,crpp (p0_1)
+(dni cpaddua1_h_P1 "cpaddua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpaddua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00011   cpaddla1.h crqp,crpp (p0_1)
+(dni cpaddla1_h_P1 "cpaddla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpaddla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00100   cpaddaca1u.b crqp,crpp (p0_1)
+(dni cpaddaca1u_b_P1 "cpaddaca1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpaddaca1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00101   cpaddaca1.b crqp,crpp (p0_1)
+(dni cpaddaca1_b_P1 "cpaddaca1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpaddaca1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddaca1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00110   cpaddacua1.h crqp,crpp (p0_1)
+(dni cpaddacua1_h_P1 "cpaddacua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 00111   cpaddacla1.h crqp,crpp (p0_1)
+(dni cpaddacla1_h_P1 "cpaddacla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaddacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpaddacla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpaddacla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01000   cpsuba1u.b crqp,crpp (p0_1)
+(dni cpsuba1u_b_P1 "cpsuba1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsuba1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsuba1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01001   cpsuba1.b crqp,crpp (p0_1)
+(dni cpsuba1_b_P1 "cpsuba1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsuba1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsuba1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsuba1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01010   cpsubua1.h crqp,crpp (p0_1)
+(dni cpsubua1_h_P1 "cpsubua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsubua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01011   cpsubla1.h crqp,crpp (p0_1)
+(dni cpsubla1_h_P1 "cpsubla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsubla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01100   cpsubaca1u.b crqp,crpp (p0_1)
+(dni cpsubaca1u_b_P1 "cpsubaca1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubaca1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsubaca1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01101   cpsubaca1.b crqp,crpp (p0_1)
+(dni cpsubaca1_b_P1 "cpsubaca1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubaca1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsubaca1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubaca1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01110   cpsubacua1.h crqp,crpp (p0_1)
+(dni cpsubacua1_h_P1 "cpsubacua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubacua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 01111   cpsubacla1.h crqp,crpp (p0_1)
+(dni cpsubacla1_h_P1 "cpsubacla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsubacla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsubacla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsubacla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10000   cpabsa1u.b crqp,crpp (p0_1)
+(dni cpabsa1u_b_P1 "cpabsa1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpabsa1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10001   cpabsa1.b crqp,crpp (p0_1)
+(dni cpabsa1_b_P1 "cpabsa1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpabsa1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsa1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10010   cpabsua1.h crqp,crpp (p0_1)
+(dni cpabsua1_h_P1 "cpabsua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpabsua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10011   cpabsla1.h crqp,crpp (p0_1)
+(dni cpabsla1_h_P1 "cpabsla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpabsla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpabsla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpabsla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10100   cpsada1u.b crqp,crpp (p0_1)
+(dni cpsada1u_b_P1 "cpsada1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpsada1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10101   cpsada1.b crqp,crpp (p0_1)
+(dni cpsada1_b_P1 "cpsada1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpsada1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsada1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10110   cpsadua1.h crqp,crpp (p0_1)
+(dni cpsadua1_h_P1 "cpsadua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 10111   cpsadla1.h crqp,crpp (p0_1)
+(dni cpsadla1_h_P1 "cpsadla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsadla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsadla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 11011   cpseta1.h crqp,crpp (p0_1)
+(dni cpseta1_h_P1 "cpseta1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpseta1_h") (CPTYPE V4HI) VOLATILE)
+  "cpseta1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpseta1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 11100   cpsetua1.w crqp,crpp (p0_1)
+(dni cpsetua1_w_P1 "cpsetua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsetua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsetua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11000 qqqqq ppppp 11101   cpsetla1.w crqp,crpp (p0_1)
+(dni cpsetla1_w_P1 "cpsetla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsetla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsetla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x18) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpsetla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11001 00000 00001 ooooo   cpmova1.b =crop (p0_1)
+(dni cpmova1_b_P1 "cpmova1.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmova1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cpmova1.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmova1_b" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00010 ooooo   cpmovua1.h =crop (p0_1)
+(dni cpmovua1_h_P1 "cpmovua1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovua1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x2) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovua1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00011 ooooo   cpmovla1.h =crop (p0_1)
+(dni cpmovla1_h_P1 "cpmovla1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cpmovla1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x3) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovla1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00100 ooooo   cpmovuua1.w =crop (p0_1)
+(dni cpmovuua1_w_P1 "cpmovuua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovuua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovuua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x4) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovuua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00101 ooooo   cpmovula1.w =crop (p0_1)
+(dni cpmovula1_w_P1 "cpmovula1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovula1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovula1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x5) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovula1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00110 ooooo   cpmovlua1.w =crop (p0_1)
+(dni cpmovlua1_w_P1 "cpmovlua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovlua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x6) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 00111 ooooo   cpmovlla1.w =crop (p0_1)
+(dni cpmovlla1_w_P1 "cpmovlla1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovlla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovlla1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x7) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovlla1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01000 ooooo   cppacka1u.b =crop (p0_1)
+(dni cppacka1u_b_P1 "cppacka1u.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppacka1u_b") (CPTYPE V8UQI) (CRET FIRST) VOLATILE)
+  "cppacka1u.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x8) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka1u_b" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01001 ooooo   cppacka1.b =crop (p0_1)
+(dni cppacka1_b_P1 "cppacka1.b $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppacka1_b") (CPTYPE V8QI) (CRET FIRST) VOLATILE)
+  "cppacka1.b $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x9) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppacka1_b" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01010 ooooo   cppackua1.h =crop (p0_1)
+(dni cppackua1_h_P1 "cppackua1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackua1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackua1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xa) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01011 ooooo   cppackla1.h =crop (p0_1)
+(dni cppackla1_h_P1 "cppackla1.h $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackla1_h") (CPTYPE V4HI) (CRET FIRST) VOLATILE)
+  "cppackla1.h $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xb) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla1_h" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01100 ooooo   cppackua1.w =crop (p0_1)
+(dni cppackua1_w_P1 "cppackua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xc) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01101 ooooo   cppackla1.w =crop (p0_1)
+(dni cppackla1_w_P1 "cppackla1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cppackla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cppackla1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xd) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cppackla1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01110 ooooo   cpmovhua1.w =crop (p0_1)
+(dni cpmovhua1_w_P1 "cpmovhua1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovhua1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhua1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xe) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhua1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 01111 ooooo   cpmovhla1.w =crop (p0_1)
+(dni cpmovhla1_w_P1 "cpmovhla1.w $crop Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmovhla1_w") (CPTYPE V2SI) (CRET FIRST) VOLATILE)
+  "cpmovhla1.w $crop"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #xf) crop (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set crop (c-call DI "ivc2_cpmovhla1_w" pc)) )
+  ()
+  )
+
+; 00000000 11001 00000 10000 00000   cpacsuma1  (p0_1)
+(dni cpacsuma1_P1 "cpacsuma1  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpacsuma1") VOLATILE)
+  "cpacsuma1"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x10) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpacsuma1" pc) )
+  ()
+  )
+
+; 00000000 11001 00000 10001 00000   cpaccpa1  (p0_1)
+(dni cpaccpa1_P1 "cpaccpa1  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpaccpa1") VOLATILE)
+  "cpaccpa1"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x11) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpaccpa1" pc) )
+  ()
+  )
+
+; 00000000 11001 00000 10010 00000   cpacswp  (p0_1)
+(dni cpacswp_P1 "cpacswp  Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpacswp") VOLATILE)
+  "cpacswp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x12) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc0_0 0)
+	(set ivc2_acc0_1 0)
+	(set ivc2_acc0_2 0)
+	(set ivc2_acc0_3 0)
+	(set ivc2_acc0_4 0)
+	(set ivc2_acc0_5 0)
+	(set ivc2_acc0_6 0)
+	(set ivc2_acc0_7 0)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpacswp" pc) )
+  ()
+  )
+
+; 00000000 11001 qqqqq 11000 00000   cpsrla1 crqp (p0_1)
+(dni cpsrla1_P1 "cpsrla1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsrla1") VOLATILE)
+  "cpsrla1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x18) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrla1" pc crqp) )
+  ()
+  )
+
+; 00000000 11001 qqqqq 11001 00000   cpsraa1 crqp (p0_1)
+(dni cpsraa1_P1 "cpsraa1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsraa1") VOLATILE)
+  "cpsraa1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x19) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraa1" pc crqp) )
+  ()
+  )
+
+; 00000000 11001 qqqqq 11010 00000   cpslla1 crqp (p0_1)
+(dni cpslla1_P1 "cpslla1 $crqp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpslla1") VOLATILE)
+  "cpslla1 $crqp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) crqp (f-ivc2-5u18 #x1a) (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpslla1" pc crqp) )
+  ()
+  )
+
+; 00000000 11001 00000 11100 iiiii   cpsrlia1 imm5p23 (p0_1)
+(dni cpsrlia1_1_p1 "cpsrlia1 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsrlia1") VOLATILE)
+  "cpsrlia1 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1c) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsrlia1" pc imm5p23) )
+  ()
+  )
+
+; 00000000 11001 00000 11101 iiiii   cpsraia1 imm5p23 (p0_1)
+(dni cpsraia1_1_p1 "cpsraia1 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsraia1") VOLATILE)
+  "cpsraia1 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1d) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsraia1" pc imm5p23) )
+  ()
+  )
+
+; 00000000 11001 00000 11110 iiiii   cpsllia1 imm5p23 (p0_1)
+(dni cpsllia1_1_p1 "cpsllia1 imm5p23 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsllia1") VOLATILE)
+  "cpsllia1 $imm5p23"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x19) (f-ivc2-5u13 #x0) (f-ivc2-5u18 #x1e) imm5p23 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpsllia1" pc imm5p23) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00000   cpfmulia1s0u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s0u_b_P1 "cpfmulia1s0u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmulia1s0u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s0u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00001   cpfmulia1s0.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s0_b_P1 "cpfmulia1s0.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmulia1s0.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s0_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00010   cpfmuliua1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmuliua1s0_h_P1 "cpfmuliua1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmuliua1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmuliua1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x2) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmuliua1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00011   cpfmulila1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulila1s0_h_P1 "cpfmulila1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulila1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmulila1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x3) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpfmulila1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00100   cpfmadia1s0u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s0u_b_P1 "cpfmadia1s0u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s0u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmadia1s0u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s0u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00101   cpfmadia1s0.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s0_b_P1 "cpfmadia1s0.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s0_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmadia1s0.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s0_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00110   cpfmadiua1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadiua1s0_h_P1 "cpfmadiua1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadiua1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadiua1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x6) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadiua1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 00111   cpfmadila1s0.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadila1s0_h_P1 "cpfmadila1s0.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadila1s0_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadila1s0.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x7) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadila1s0_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01000   cpfmulia1s1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s1u_b_P1 "cpfmulia1s1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmulia1s1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01001   cpfmulia1s1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulia1s1_b_P1 "cpfmulia1s1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmulia1s1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1s1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01010   cpfmuliua1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmuliua1s1_h_P1 "cpfmuliua1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmuliua1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmuliua1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmuliua1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01011   cpfmulila1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmulila1s1_h_P1 "cpfmulila1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulila1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmulila1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpfmulila1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01100   cpfmadia1s1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s1u_b_P1 "cpfmadia1s1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmadia1s1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01101   cpfmadia1s1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadia1s1_b_P1 "cpfmadia1s1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1s1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmadia1s1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1s1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01110   cpfmadiua1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadiua1s1_h_P1 "cpfmadiua1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadiua1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadiua1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadiua1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 01111   cpfmadila1s1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpfmadila1s1_h_P1 "cpfmadila1s1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadila1s1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadila1s1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadila1s1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10000   cpamulia1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamulia1u_b_P1 "cpamulia1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamulia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpamulia1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpamulia1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10001   cpamulia1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamulia1_b_P1 "cpamulia1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamulia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpamulia1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpamulia1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10010   cpamuliua1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamuliua1_h_P1 "cpamuliua1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamuliua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamuliua1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpamuliua1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10011   cpamulila1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamulila1_h_P1 "cpamulila1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamulila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamulila1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpamulila1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10100   cpamadia1u.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamadia1u_b_P1 "cpamadia1u.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpamadia1u.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadia1u_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10101   cpamadia1.b crqp,crpp,simm8p0 (p0_1)
+(dni cpamadia1_b_P1 "cpamadia1.b $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpamadia1.b $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadia1_b" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10110   cpamadiua1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamadiua1_h_P1 "cpamadiua1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadiua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamadiua1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadiua1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11111 qqqqq ppppp 10111   cpamadila1.h crqp,crpp,simm8p0 (p0_1)
+(dni cpamadila1_h_P1 "cpamadila1.h $crqp,$crpp,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpamadila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpamadila1.h $crqp,$crpp,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1f) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpamadila1_h" pc crqp crpp simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 00 III   cpfmulia1u.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmulia1u_b_P1 "cpfmulia1u.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmulia1u.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x0) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1u_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 01 III   cpfmulia1.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmulia1_b_P1 "cpfmulia1.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmulia1.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x1) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmulia1_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 10 III   cpfmuliua1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmuliua1_h_P1 "cpfmuliua1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmuliua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmuliua1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x2) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpfmuliua1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11100 qqqqq ppppp 11 III   cpfmulila1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmulila1_h_P1 "cpfmulila1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmulila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmulila1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1c) crqp crpp (f-ivc2-2u23 #x3) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpfmulila1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 00 III   cpfmadia1u.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadia1u_b_P1 "cpfmadia1u.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpfmadia1u.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x0) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1u_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 01 III   cpfmadia1.b crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadia1_b_P1 "cpfmadia1.b $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadia1_b") (CPTYPE V8QI) VOLATILE)
+  "cpfmadia1.b $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x1) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadia1_b" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 10 III   cpfmadiua1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadiua1_h_P1 "cpfmadiua1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadiua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadiua1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x2) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadiua1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; iiiiiiii 11101 qqqqq ppppp 11 III   cpfmadila1.h crqp,crpp,imm3p25,simm8p0 (cpfm)
+(dni cpfmadila1_h_P1 "cpfmadila1.h $crqp,$crpp,imm3p25,simm8p0 Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpfmadila1_h") (CPTYPE V4HI) VOLATILE)
+  "cpfmadila1.h $crqp,$crpp,$imm3p25,$simm8p0"
+  (+ simm8p0 (f-ivc2-5u8 #x1d) crqp crpp (f-ivc2-2u23 #x3) imm3p25 (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpfmadila1_h" pc crqp crpp imm3p25 simm8p0) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00000   cpssqa1u.b crqp,crpp (p0_1)
+(dni cpssqa1u_b_P1 "cpssqa1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssqa1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssqa1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x0) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00001   cpssqa1.b crqp,crpp (p0_1)
+(dni cpssqa1_b_P1 "cpssqa1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssqa1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssqa1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssqa1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00100   cpssda1u.b crqp,crpp (p0_1)
+(dni cpssda1u_b_P1 "cpssda1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssda1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpssda1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x4) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssda1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 00101   cpssda1.b crqp,crpp (p0_1)
+(dni cpssda1_b_P1 "cpssda1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpssda1_b") (CPTYPE V8QI) VOLATILE)
+  "cpssda1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x5) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpssda1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01000   cpmula1u.b crqp,crpp (p0_1)
+(dni cpmula1u_b_P1 "cpmula1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmula1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmula1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x8) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01001   cpmula1.b crqp,crpp (p0_1)
+(dni cpmula1_b_P1 "cpmula1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmula1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmula1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x9) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmula1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01010   cpmulua1.h crqp,crpp (p0_1)
+(dni cpmulua1_h_P1 "cpmulua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01011   cpmulla1.h crqp,crpp (p0_1)
+(dni cpmulla1_h_P1 "cpmulla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01100   cpmulua1u.w crqp,crpp (p0_1)
+(dni cpmulua1u_w_P1 "cpmulua1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulua1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xc) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01101   cpmulla1u.w crqp,crpp (p0_1)
+(dni cpmulla1u_w_P1 "cpmulla1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmulla1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xd) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01110   cpmulua1.w crqp,crpp (p0_1)
+(dni cpmulua1_w_P1 "cpmulua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(c-call "ivc2_cpmulua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 01111   cpmulla1.w crqp,crpp (p0_1)
+(dni cpmulla1_w_P1 "cpmulla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(c-call "ivc2_cpmulla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10000   cpmada1u.b crqp,crpp (p0_1)
+(dni cpmada1u_b_P1 "cpmada1u.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmada1u_b") (CPTYPE V8UQI) VOLATILE)
+  "cpmada1u.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x10) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1u_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10001   cpmada1.b crqp,crpp (p0_1)
+(dni cpmada1_b_P1 "cpmada1.b $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmada1_b") (CPTYPE V8QI) VOLATILE)
+  "cpmada1.b $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x11) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmada1_b" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10010   cpmadua1.h crqp,crpp (p0_1)
+(dni cpmadua1_h_P1 "cpmadua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10011   cpmadla1.h crqp,crpp (p0_1)
+(dni cpmadla1_h_P1 "cpmadla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmadla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10100   cpmadua1u.w crqp,crpp (p0_1)
+(dni cpmadua1u_w_P1 "cpmadua1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadua1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x14) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10101   cpmadla1u.w crqp,crpp (p0_1)
+(dni cpmadla1u_w_P1 "cpmadla1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmadla1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x15) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10110   cpmadua1.w crqp,crpp (p0_1)
+(dni cpmadua1_w_P1 "cpmadua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 10111   cpmadla1.w crqp,crpp (p0_1)
+(dni cpmadla1_w_P1 "cpmadla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmadla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmadla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11010   cpmsbua1.h crqp,crpp (p0_1)
+(dni cpmsbua1_h_P1 "cpmsbua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1a) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11011   cpmsbla1.h crqp,crpp (p0_1)
+(dni cpmsbla1_h_P1 "cpmsbla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmsbla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11100   cpmsbua1u.w crqp,crpp (p0_1)
+(dni cpmsbua1u_w_P1 "cpmsbua1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbua1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbua1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1c) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11101   cpmsbla1u.w crqp,crpp (p0_1)
+(dni cpmsbla1u_w_P1 "cpmsbla1u.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbla1u_w") (CPTYPE V2USI) VOLATILE)
+  "cpmsbla1u.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1d) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1u_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11110   cpmsbua1.w crqp,crpp (p0_1)
+(dni cpmsbua1_w_P1 "cpmsbua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1e) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000000 11110 qqqqq ppppp 11111   cpmsbla1.w crqp,crpp (p0_1)
+(dni cpmsbla1_w_P1 "cpmsbla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmsbla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x0) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1f) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmsbla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10010   cpsmadua1.h crqp,crpp (p0_1)
+(dni cpsmadua1_h_P1 "cpsmadua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10011   cpsmadla1.h crqp,crpp (p0_1)
+(dni cpsmadla1_h_P1 "cpsmadla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10110   cpsmadua1.w crqp,crpp (p0_1)
+(dni cpsmadua1_w_P1 "cpsmadua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 10111   cpsmadla1.w crqp,crpp (p0_1)
+(dni cpsmadla1_w_P1 "cpsmadla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11010   cpsmsbua1.h crqp,crpp (p0_1)
+(dni cpsmsbua1_h_P1 "cpsmsbua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1a) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11011   cpsmsbla1.h crqp,crpp (p0_1)
+(dni cpsmsbla1_h_P1 "cpsmsbla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11110   cpsmsbua1.w crqp,crpp (p0_1)
+(dni cpsmsbua1_w_P1 "cpsmsbua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1e) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000001 11110 qqqqq ppppp 11111   cpsmsbla1.w crqp,crpp (p0_1)
+(dni cpsmsbla1_w_P1 "cpsmsbla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x1) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1f) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01010   cpmulslua1.h crqp,crpp (p0_1)
+(dni cpmulslua1_h_P1 "cpmulslua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xa) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01011   cpmulslla1.h crqp,crpp (p0_1)
+(dni cpmulslla1_h_P1 "cpmulslla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpmulslla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xb) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01110   cpmulslua1.w crqp,crpp (p0_1)
+(dni cpmulslua1_w_P1 "cpmulslua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xe) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000010 11110 qqqqq ppppp 01111   cpmulslla1.w crqp,crpp (p0_1)
+(dni cpmulslla1_w_P1 "cpmulslla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpmulslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpmulslla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x2) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #xf) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpmulslla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10010   cpsmadslua1.h crqp,crpp (p0_1)
+(dni cpsmadslua1_h_P1 "cpsmadslua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x12) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10011   cpsmadslla1.h crqp,crpp (p0_1)
+(dni cpsmadslla1_h_P1 "cpsmadslla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmadslla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x13) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10110   cpsmadslua1.w crqp,crpp (p0_1)
+(dni cpsmadslua1_w_P1 "cpsmadslua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x16) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 10111   cpsmadslla1.w crqp,crpp (p0_1)
+(dni cpsmadslla1_w_P1 "cpsmadslla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmadslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmadslla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x17) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmadslla1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11010   cpsmsbslua1.h crqp,crpp (p0_1)
+(dni cpsmsbslua1_h_P1 "cpsmsbslua1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslua1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslua1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1a) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11011   cpsmsbslla1.h crqp,crpp (p0_1)
+(dni cpsmsbslla1_h_P1 "cpsmsbslla1.h $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslla1_h") (CPTYPE V4HI) VOLATILE)
+  "cpsmsbslla1.h $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1b) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_h" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11110   cpsmsbslua1.w crqp,crpp (p0_1)
+(dni cpsmsbslua1_w_P1 "cpsmsbslua1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslua1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslua1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1e) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_4 0)
+	(set ivc2_acc1_5 0)
+	(set ivc2_acc1_6 0)
+	(set ivc2_acc1_7 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslua1_w" pc crqp crpp) )
+  ()
+  )
+
+; 00000011 11110 qqqqq ppppp 11111   cpsmsbslla1.w crqp,crpp (p0_1)
+(dni cpsmsbslla1_w_P1 "cpsmsbslla1.w $crqp,$crpp Pn"
+  (OPTIONAL_CP_INSN ivc2-p1-isa (SLOTS P1) (INTRINSIC "cpsmsbslla1_w") (CPTYPE V2SI) VOLATILE)
+  "cpsmsbslla1.w $crqp,$crpp"
+  (+ (f-ivc2-8u0 #x3) (f-ivc2-5u8 #x1e) crqp crpp (f-ivc2-5u23 #x1f) (f-ivc2-4u28 0))
+  (sequence ()
+	(c-call "check_option_cp" pc)
+	(set ivc2_acc1_0 0)
+	(set ivc2_acc1_1 0)
+	(set ivc2_acc1_2 0)
+	(set ivc2_acc1_3 0)
+	(set ivc2_cofa1 0)
+	(c-call "ivc2_cpsmsbslla1_w" pc crqp crpp) )
+  ()
+  )
+
diff --git a/gcc/config/mep/mep-lib1.asm b/gcc/config/mep/mep-lib1.asm
new file mode 100644
index 000000000..0a18913f9
--- /dev/null
+++ b/gcc/config/mep/mep-lib1.asm
@@ -0,0 +1,125 @@
+/* libgcc routines for Toshiba Media Processor.
+   Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+  
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+  
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define SAVEALL \
+	add3	$sp, $sp, -16*4 ; \
+	sw	$0, ($sp) ; \
+	sw	$1, 4($sp) ; \
+	sw	$2, 8($sp) ; \
+	sw	$3, 12($sp) ; \
+	sw	$4, 16($sp) ; \
+	sw	$5, 20($sp) ; \
+	sw	$6, 24($sp) ; \
+	sw	$7, 28($sp) ; \
+	sw	$8, 32($sp) ; \
+	sw	$9, 36($sp) ; \
+	sw	$10, 40($sp) ; \
+	sw	$11, 44($sp) ; \
+	sw	$12, 48($sp) ; \
+	sw	$13, 52($sp) ; \
+	sw	$14, 56($sp) ; \
+	ldc	$5, $lp	; \
+	add	$5, 3 ; \
+	mov	$6, -4 ; \
+	and	$5, $6
+
+#define RESTOREALL \
+	stc	$5, $lp ; \
+	lw	$14, 56($sp) ; \
+	lw	$13, 52($sp) ; \
+	lw	$12, 48($sp) ; \
+	lw	$11, 44($sp) ; \
+	lw	$10, 40($sp) ; \
+	lw	$9, 36($sp) ; \
+	lw	$8, 32($sp) ; \
+	lw	$7, 28($sp) ; \
+	lw	$6, 24($sp) ; \
+	lw	$5, 20($sp) ; \
+	lw	$4, 16($sp) ; \
+	lw	$3, 12($sp) ; \
+	lw	$2, 8($sp) ; \
+	lw	$1, 4($sp) ; \
+	lw	$0, ($sp) ; \
+	add3	$sp, $sp, 16*4 ; \
+	ret
+
+#ifdef L_mep_profile
+	.text
+	.global __mep_mcount
+__mep_mcount:
+	SAVEALL
+	ldc	$1, $lp
+	mov	$2, $0
+	bsr	__mep_mcount_2
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init_trace
+	.text
+	.global __mep_bb_init_trace_func
+__mep_bb_init_trace_func:
+	SAVEALL
+	lw	$1, ($5)
+	lw	$2, 4($5)
+	add	$5, 8
+	bsr	__bb_init_trace_func
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_init
+	.text
+	.global __mep_bb_init_func
+__mep_bb_init_func:
+	SAVEALL
+	lw	$1, ($5)
+	add	$5, 4
+	bsr	__bb_init_func
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_trace
+	.text
+	.global __mep_bb_trace_func
+__mep_bb_trace_func:
+	SAVEALL
+	movu	$3, __bb
+	lw	$1, ($5)
+	sw	$1, ($3)
+	lw	$2, 4($5)
+	sw	$2, 4($3)
+	add	$5, 8
+	bsr	__bb_trace_func
+	RESTOREALL
+#endif
+
+#ifdef L_mep_bb_increment
+	.text
+	.global __mep_bb_increment_func
+__mep_bb_increment_func:
+	SAVEALL
+	lw	$1, ($5)
+	lw	$0, ($1)
+	add	$0, 1
+	sw	$0, ($1)
+	add	$5, 4
+	RESTOREALL
+#endif
diff --git a/gcc/config/mep/mep-lib2.c b/gcc/config/mep/mep-lib2.c
new file mode 100644
index 000000000..1dbf57d95
--- /dev/null
+++ b/gcc/config/mep/mep-lib2.c
@@ -0,0 +1,139 @@
+/* libgcc routines for MeP.
+   Copyright 2001, 2002, 2009 Free Software Foundation, Inc
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+  
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+  
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+typedef		 int SItype		__attribute__ ((mode (SI)));
+typedef unsigned int USItype		__attribute__ ((mode (SI)));
+
+typedef int word_type			__attribute__ ((mode (__word__)));
+
+USItype
+__mulsi3 (USItype a, USItype b)
+{
+  USItype c = 0;
+
+  while (a != 0)
+    {
+      if (a & 1)
+	c += b;
+      a >>= 1;
+      b <<= 1;
+    }
+
+  return c;
+}
+
+
+
+USItype
+udivmodsi4(USItype num, USItype den, word_type modwanted)
+{
+  USItype bit = 1;
+  USItype res = 0;
+
+  while (den < num && bit && !(den & (1L<<31)))
+    {
+      den <<=1;
+      bit <<=1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>=1;
+      den >>=1;
+    }
+  if (modwanted) return num;
+  return res;
+}
+
+
+
+SItype
+__divsi3 (SItype a, SItype b)
+{
+  word_type neg = 0;
+  SItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmodsi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+
+
+SItype
+__modsi3 (SItype a, SItype b)
+{
+  word_type neg = 0;
+  SItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmodsi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+
+
+
+
+SItype
+__udivsi3 (SItype a, SItype b)
+{
+  return udivmodsi4 (a, b, 0);
+}
+
+
+
+SItype
+__umodsi3 (SItype a, SItype b)
+{
+  return udivmodsi4 (a, b, 1);
+}
diff --git a/gcc/config/mep/mep-pragma.c b/gcc/config/mep/mep-pragma.c
new file mode 100644
index 000000000..d9457ed6c
--- /dev/null
+++ b/gcc/config/mep/mep-pragma.c
@@ -0,0 +1,404 @@
+/* Definitions of Toshiba Media Processor
+   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009, 2010 Free
+   Software Foundation, Inc.  Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "diagnostic-core.h"
+#include "c-family/c-pragma.h"
+#include "cpplib.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "mep-protos.h"
+#include "function.h"
+#define MAX_RECOG_OPERANDS 10
+#include "reload.h"
+#include "target.h"
+
+enum cw_which { CW_AVAILABLE, CW_CALL_SAVED };
+
+/* This is normally provided by rtl.h but we can't include that file
+   here.  It's safe to copy the definition here because we're only
+   using it internally; the value isn't passed to functions outside
+   this file.  */
+#ifndef INVALID_REGNUM
+#define INVALID_REGNUM                    (~(unsigned int) 0)
+#endif
+
+static enum cpp_ttype
+mep_pragma_lex (tree *valp)
+{
+  enum cpp_ttype t = pragma_lex (valp);
+  if (t == CPP_EOF)
+    t = CPP_PRAGMA_EOL;
+  return t;
+}
+
+static void
+mep_pragma_io_volatile (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  /* On off.  */
+  tree val;
+  enum cpp_ttype type;
+  const char * str;
+
+  type = mep_pragma_lex (&val);
+  if (type == CPP_NAME)
+    {
+      str = IDENTIFIER_POINTER (val);
+
+      type = mep_pragma_lex (&val);
+      if (type != CPP_PRAGMA_EOL)
+	warning (0, "junk at end of #pragma io_volatile");
+
+      if (strcmp (str, "on") == 0)
+	{
+	  target_flags |= MASK_IO_VOLATILE;
+	  return;
+	}
+      if (strcmp (str, "off") == 0)
+	{
+	  target_flags &= ~ MASK_IO_VOLATILE;
+	  return;
+	}
+    }
+
+  error ("#pragma io_volatile takes only on or off");
+}
+
+static unsigned int
+parse_cr_reg (const char * str)
+{
+  unsigned int regno;
+
+  regno = decode_reg_name (str);
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    return INVALID_REGNUM;
+
+  /* Verify that the regno is in CR_REGS.  */
+  if (! TEST_HARD_REG_BIT (reg_class_contents[CR_REGS], regno))
+    return INVALID_REGNUM;
+  return regno;
+}
+
+static bool
+parse_cr_set (HARD_REG_SET * set)
+{
+  tree val;
+  enum cpp_ttype type;
+  unsigned int last_regno = INVALID_REGNUM;
+  bool do_range = false;
+
+  CLEAR_HARD_REG_SET (*set);
+
+  while ((type = mep_pragma_lex (&val)) != CPP_PRAGMA_EOL)
+    {
+      if (type == CPP_COMMA)
+	{
+	  last_regno = INVALID_REGNUM;
+	  do_range = false;
+	}
+      else if (type == CPP_ELLIPSIS)
+	{
+	  if (last_regno == INVALID_REGNUM)
+	    {
+	      error ("invalid coprocessor register range");
+	      return false;
+	    }
+	  do_range = true;
+	}
+      else if (type == CPP_NAME || type == CPP_STRING)
+	{
+	  const char *str;
+	  unsigned int regno, i;
+
+	  if (TREE_CODE (val) == IDENTIFIER_NODE)
+	    str = IDENTIFIER_POINTER (val);
+  	  else if (TREE_CODE (val) == STRING_CST)
+	    str = TREE_STRING_POINTER (val);
+	  else
+	    gcc_unreachable ();
+
+	  regno = parse_cr_reg (str);
+	  if (regno == INVALID_REGNUM)
+	    {
+	      error ("invalid coprocessor register %qE", val);
+	      return false;
+	    }
+
+	  if (do_range)
+	    {
+	      if (last_regno > regno)
+		i = regno, regno = last_regno;
+	      else
+		i = last_regno;
+	      do_range = false;
+	    }
+	  else
+	    last_regno = i = regno;
+
+	  while (i <= regno)
+	    {
+	      SET_HARD_REG_BIT (*set, i);
+	      i++;
+	    }
+	}
+      else
+	{
+	  error ("malformed coprocessor register");
+	  return false;
+	}
+    }
+  return true;
+}
+
+static void
+mep_pragma_coprocessor_which (enum cw_which cw_which)
+{
+  HARD_REG_SET set;
+
+  /* Process the balance of the pragma and turn it into a hard reg set.  */
+  if (! parse_cr_set (&set))
+    return;
+
+  /* Process the collected hard reg set.  */
+  switch (cw_which)
+    {
+    case CW_AVAILABLE:
+      {
+	int i;
+	for (i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
+	  if (TEST_HARD_REG_BIT (set, i))
+	    fixed_regs[i] = 0;
+      }
+      break;
+
+    case CW_CALL_SAVED:
+      {
+	int i;
+	for (i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
+	  if (TEST_HARD_REG_BIT (set, i))
+	    fixed_regs[i] = call_used_regs[i] = 0;
+      }
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Fix up register class hierarchy.  */
+  mep_save_register_info ();
+  mep_reinit_regs ();
+
+  if (cfun == 0)
+    {
+      init_dummy_function_start ();
+      init_caller_save ();
+      expand_dummy_function_end ();
+    }
+  else
+    {
+      init_caller_save ();
+    }
+}
+
+static void
+mep_pragma_coprocessor_width (void)
+{
+  tree val;
+  enum cpp_ttype type;
+  HOST_WIDE_INT i;
+
+  type = mep_pragma_lex (&val);
+  switch (type)
+    {
+    case CPP_NUMBER:
+      if (! host_integerp (val, 1))
+	break;
+      i = tree_low_cst (val, 1);
+      /* This pragma no longer has any effect.  */
+#if 0
+      if (i == 32)
+	target_flags &= ~MASK_64BIT_CR_REGS;
+      else if (i == 64)
+	target_flags |= MASK_64BIT_CR_REGS;
+      else
+	break;
+      targetm.init_builtins ();
+#else
+      if (i != 32 && i != 64)
+	break;
+#endif
+
+      type = mep_pragma_lex (&val);
+      if (type != CPP_PRAGMA_EOL)
+	warning (0, "junk at end of #pragma GCC coprocessor width");
+      return;
+
+    default:
+      break;
+    }
+
+  error ("#pragma GCC coprocessor width takes only 32 or 64");
+}
+
+static void
+mep_pragma_coprocessor_subclass (void)
+{
+  tree val;
+  enum cpp_ttype type;
+  HARD_REG_SET set;
+  int class_letter;
+  enum reg_class rclass;
+
+  type = mep_pragma_lex (&val);
+  if (type != CPP_CHAR)
+    goto syntax_error;
+  class_letter = tree_low_cst (val, 1);
+  if (class_letter >= 'A' && class_letter <= 'D')
+    switch (class_letter)
+      {
+      case 'A':
+	rclass = USER0_REGS;
+	break;
+      case 'B':
+	rclass = USER1_REGS;
+	break;
+      case 'C':
+	rclass = USER2_REGS;
+	break;
+      case 'D':
+	rclass = USER3_REGS;
+	break;
+      }
+  else
+    {
+      error ("#pragma GCC coprocessor subclass letter must be in [ABCD]");
+      return;
+    }
+  if (reg_class_size[rclass] > 0)
+    {
+      error ("#pragma GCC coprocessor subclass '%c' already defined",
+	     class_letter);
+      return;
+    }
+
+  type = mep_pragma_lex (&val);
+  if (type != CPP_EQ)
+    goto syntax_error;
+
+  if (! parse_cr_set (&set))
+    return;
+
+  /* Fix up register class hierarchy.  */
+  COPY_HARD_REG_SET (reg_class_contents[rclass], set);
+  mep_init_regs ();
+  return;
+
+ syntax_error:
+  error ("malformed #pragma GCC coprocessor subclass");
+}
+
+static void
+mep_pragma_disinterrupt (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  tree val;
+  enum cpp_ttype type;
+  int saw_one = 0;
+
+  for (;;)
+    {
+      type = mep_pragma_lex (&val);
+      if (type == CPP_COMMA)
+	continue;
+      if (type != CPP_NAME)
+	break;
+      mep_note_pragma_disinterrupt (IDENTIFIER_POINTER (val));
+      saw_one = 1;
+    }
+  if (!saw_one || type != CPP_PRAGMA_EOL)
+    {
+      error ("malformed #pragma disinterrupt");
+      return;
+    }
+}
+
+static void
+mep_pragma_coprocessor (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  tree val;
+  enum cpp_ttype type;
+
+  type = mep_pragma_lex (&val);
+  if (type != CPP_NAME)
+    {
+      error ("malformed #pragma GCC coprocessor");
+      return;
+    }
+
+  if (!TARGET_COP)
+    error ("coprocessor not enabled");
+
+  if (strcmp (IDENTIFIER_POINTER (val), "available") == 0)
+    mep_pragma_coprocessor_which (CW_AVAILABLE);
+  else if (strcmp (IDENTIFIER_POINTER (val), "call_saved") == 0)
+    mep_pragma_coprocessor_which (CW_CALL_SAVED);
+  else if (strcmp (IDENTIFIER_POINTER (val), "width") == 0)
+    mep_pragma_coprocessor_width ();
+  else if (strcmp (IDENTIFIER_POINTER (val), "subclass") == 0)
+    mep_pragma_coprocessor_subclass ();
+  else
+    error ("unknown #pragma GCC coprocessor %E", val);
+}
+
+static void
+mep_pragma_call (cpp_reader *reader ATTRIBUTE_UNUSED)
+{
+  tree val;
+  enum cpp_ttype type;
+  int saw_one = 0;
+
+  for (;;)
+    {
+      type = mep_pragma_lex (&val);
+      if (type == CPP_COMMA)
+	continue;
+      if (type != CPP_NAME)
+	break;
+      mep_note_pragma_call (IDENTIFIER_POINTER (val));
+      saw_one = 1;
+    }
+  if (!saw_one || type != CPP_PRAGMA_EOL)
+    {
+      error ("malformed #pragma call");
+      return;
+    }
+}
+
+void
+mep_register_pragmas (void)
+{
+  c_register_pragma ("custom", "io_volatile", mep_pragma_io_volatile);
+  c_register_pragma ("GCC", "coprocessor", mep_pragma_coprocessor);
+  c_register_pragma (0, "disinterrupt", mep_pragma_disinterrupt);
+  c_register_pragma (0, "call", mep_pragma_call);
+}
diff --git a/gcc/config/mep/mep-protos.h b/gcc/config/mep/mep-protos.h
new file mode 100644
index 000000000..12cef58a8
--- /dev/null
+++ b/gcc/config/mep/mep-protos.h
@@ -0,0 +1,133 @@
+/* Prototypes for exported functions defined in mep.c
+   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010 Free
+   Software Foundation, Inc.
+   Contributed by Red Hat Inc (dj@redhat.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern int mep_regno_reg_class (int);
+extern int mep_reg_class_from_constraint (int, const char *);
+extern bool mep_const_ok_for_letter_p (HOST_WIDE_INT, int);
+extern bool mep_extra_constraint (rtx, int);
+extern rtx mep_mulr_source (rtx, rtx, rtx, rtx);
+extern bool mep_reuse_lo_p (rtx, rtx, rtx, bool);
+extern bool mep_use_post_modify_p (rtx, rtx, rtx);
+extern bool mep_allow_clip (rtx, rtx, int);
+extern bool mep_bit_position_p (rtx, bool);
+extern bool mep_split_mov (rtx *, int);
+extern bool mep_vliw_mode_match (rtx);
+extern bool mep_vliw_jmp_match (rtx);
+extern bool mep_multi_slot (rtx);
+extern bool mep_legitimate_address (enum machine_mode, rtx, int);
+extern int mep_legitimize_address (rtx *, rtx, enum machine_mode);
+extern int mep_legitimize_reload_address (rtx *, enum machine_mode, int, /*enum reload_type*/ int, int);
+extern int mep_core_address_length (rtx, int);
+extern int mep_cop_address_length (rtx, int);
+extern bool mep_expand_mov (rtx *, enum machine_mode);
+extern bool mep_mov_ok (rtx *, enum machine_mode);
+extern void mep_split_wide_move (rtx *, enum machine_mode);
+#ifdef RTX_CODE
+extern bool mep_expand_setcc (rtx *);
+extern rtx mep_expand_cbranch (rtx *);
+extern bool mep_legitimate_constant_p (rtx);
+#endif
+extern const char *mep_emit_cbranch (rtx *, int);
+extern void mep_expand_call (rtx *, int);
+extern rtx mep_find_base_term (rtx);
+extern enum reg_class mep_secondary_input_reload_class (enum reg_class, enum machine_mode, rtx);
+extern enum reg_class mep_secondary_output_reload_class (enum reg_class, enum machine_mode, rtx);
+extern bool mep_secondary_memory_needed (enum reg_class, enum reg_class,
+					 enum machine_mode);
+extern void mep_expand_reload (rtx *, enum machine_mode);
+extern enum reg_class mep_preferred_reload_class (rtx, enum reg_class);
+extern int mep_register_move_cost (enum machine_mode, enum reg_class, enum reg_class);
+extern void mep_init_expanders (void);
+extern rtx mep_return_addr_rtx (int);
+extern bool mep_epilogue_uses (int);
+extern int mep_elimination_offset (int, int);
+extern void mep_expand_prologue (void);
+extern void mep_expand_epilogue (void);
+extern void mep_expand_eh_return (rtx *);
+extern void mep_emit_eh_epilogue (rtx *);
+extern void mep_expand_sibcall_epilogue (void);
+extern rtx mep_return_stackadj_rtx (void);
+extern rtx mep_return_handler_rtx (void);
+extern void mep_function_profiler (FILE *);
+extern const char *mep_emit_bb_trace_ret (void);
+extern void mep_print_operand_address (FILE *, rtx);
+extern void mep_print_operand (FILE *, rtx, int);
+extern void mep_final_prescan_insn (rtx, rtx *, int);
+extern void mep_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+extern bool mep_return_in_memory (const_tree, const_tree);
+extern rtx mep_function_value (const_tree, const_tree);
+extern rtx mep_libcall_value (enum machine_mode);
+extern void mep_asm_output_opcode (FILE *, const char *);
+extern void mep_note_pragma_disinterrupt (const char *);
+extern void mep_note_pragma_call (const char *);
+extern void mep_file_cleanups (void);
+extern const char *mep_strip_name_encoding (const char *);
+extern void mep_output_aligned_common (FILE *, tree, const char *,
+				       int, int, int);
+extern void mep_emit_doloop (rtx *, int);
+extern bool mep_vliw_function_p (tree);
+extern bool mep_store_data_bypass_p (rtx, rtx);
+extern bool mep_mul_hilo_bypass_p (rtx, rtx);
+extern bool mep_ipipe_ldc_p (rtx);
+extern bool mep_emit_intrinsic (int, const rtx *);
+extern bool mep_expand_unary_intrinsic (int, rtx *);
+extern bool mep_expand_binary_intrinsic (int, int, int, int, rtx *);
+extern int mep_intrinsic_length (int);
+
+extern void mep_register_pragmas (void);
+extern int mep_section_tag (rtx);
+extern bool mep_lookup_pragma_call (const char *);
+extern bool mep_have_core_copro_moves_p;
+extern bool mep_have_copro_copro_moves_p;
+
+extern bool mep_cannot_change_mode_class (enum machine_mode, enum machine_mode,
+					  enum reg_class);
+
+/* These are called from mep-pragmas (front end) and then call into
+   the RTL layer to re-initialize the register tables once we're done
+   changing them via pragmas.  */
+extern void mep_save_register_info (void);
+extern void mep_reinit_regs (void);
+extern void mep_init_regs (void);
+
+
+extern int cgen_h_uint_6a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_7a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_8a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_6a2_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_22a4_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_2a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_24a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_6a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_5a4_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_2a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_16a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_3a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_5a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_16a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_8a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_7a2_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_6a4_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_5a8_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_4a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_10a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_sint_12a1_immediate (rtx, enum machine_mode);
+extern int cgen_h_uint_20a1_immediate (rtx, enum machine_mode);
diff --git a/gcc/config/mep/mep-tramp.c b/gcc/config/mep/mep-tramp.c
new file mode 100644
index 000000000..bf484ca4e
--- /dev/null
+++ b/gcc/config/mep/mep-tramp.c
@@ -0,0 +1,103 @@
+/* Trampoline support for MeP
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+  
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+  
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+  7a0a		ldc $10,$pc
+  c0ae000a	lw $0,10($10)
+  caae000e	lw $10,14($10)
+  10ae		jmp $10
+  00000000	static chain
+  00000000	function address
+*/
+
+static inline int
+cache_config_register(void) {
+  int rv;
+  asm ("ldc\t%0, $ccfg" : "=r" (rv));
+  return rv;
+}
+
+#define ICACHE_SIZE ((cache_config_register() >> 16) & 0x7f)
+#define DCACHE_SIZE (cache_config_register() & 0x7f)
+
+#define ICACHE_DATA_BASE 0x00300000
+#define ICACHE_TAG_BASE  0x00310000
+#define DCACHE_DATA_BASE 0x00320000
+#define DCACHE_TAG_BASE  0x00330000
+
+static inline void
+flush_dcache (int addr)
+{
+  asm volatile ("cache\t0, (%0)" : : "r" (addr));
+}
+
+void
+__mep_trampoline_helper (unsigned long *tramp,
+			 int function_address,
+			 int static_chain);
+
+void
+__mep_trampoline_helper (unsigned long *tramp,
+			 int function_address,
+			 int static_chain)
+{
+  int dsize, isize;
+
+#ifdef __LITTLE_ENDIAN__
+  tramp[0] = 0xc0ae7a0a;
+  tramp[1] = 0xcaae000a;
+  tramp[2] = 0x10ae000e;
+#else
+  tramp[0] = 0x7a0ac0ae;
+  tramp[1] = 0x000acaae;
+  tramp[2] = 0x000e10ae;
+#endif
+  tramp[3] = static_chain;
+  tramp[4] = function_address;
+
+  dsize = DCACHE_SIZE;
+  isize = ICACHE_SIZE;
+
+  if (dsize)
+    {
+      flush_dcache ((int)tramp);
+      flush_dcache ((int)tramp+16);
+    }
+
+  if (isize)
+    {
+      int imask = (isize * 1024) - 1;
+      int tmask = ~imask;
+      unsigned int i;
+      volatile unsigned int *tags;
+
+      imask &= 0xffe0;
+
+      for (i=(unsigned int)tramp; i<(unsigned int)tramp+20; i+=16)
+	{
+	  tags = (unsigned int *)(ICACHE_TAG_BASE + (i & imask));
+	  if ((*tags & tmask) == (i & tmask))
+	    *tags &= ~1;
+	}
+    }
+}
diff --git a/gcc/config/mep/mep.c b/gcc/config/mep/mep.c
new file mode 100644
index 000000000..913a30a75
--- /dev/null
+++ b/gcc/config/mep/mep.c
@@ -0,0 +1,7464 @@
+/* Definitions for Toshiba Media Processor
+   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "except.h"
+#include "function.h"
+#include "optabs.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "diagnostic-core.h"
+#include "integrate.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+#include "gimple.h"
+
+/* Structure of this file:
+
+ + Command Line Option Support
+ + Pattern support - constraints, predicates, expanders
+ + Reload Support
+ + Costs
+ + Functions to save and restore machine-specific function data.
+ + Frame/Epilog/Prolog Related
+ + Operand Printing
+ + Function args in registers
+ + Handle pipeline hazards
+ + Handle attributes
+ + Trampolines
+ + Machine-dependent Reorg
+ + Builtins.  */
+
+/* Symbol encodings:
+
+   Symbols are encoded as @ <char> . <name> where <char> is one of these:
+
+   b - based
+   t - tiny
+   n - near
+   f - far
+   i - io, near
+   I - io, far
+   c - cb (control bus)  */
+
+struct GTY(()) machine_function
+{
+  int mep_frame_pointer_needed;
+  
+  /* For varargs. */
+  int arg_regs_to_save;
+  int regsave_filler;
+  int frame_filler;
+  int frame_locked;
+  
+  /* Records __builtin_return address.  */
+  rtx eh_stack_adjust;
+  
+  int reg_save_size;
+  int reg_save_slot[FIRST_PSEUDO_REGISTER];
+  unsigned char reg_saved[FIRST_PSEUDO_REGISTER];
+  
+  /* 2 if the current function has an interrupt attribute, 1 if not, 0
+     if unknown.  This is here because resource.c uses EPILOGUE_USES
+     which needs it.  */
+  int interrupt_handler;
+  
+  /* Likewise, for disinterrupt attribute.  */
+  int disable_interrupts;
+
+  /* Number of doloop tags used so far.  */
+  int doloop_tags;
+
+  /* True if the last tag was allocated to a doloop_end.  */
+  bool doloop_tag_from_end;
+
+  /* True if reload changes $TP.  */
+  bool reload_changes_tp;
+
+  /* 2 if there are asm()s without operands, 1 if not, 0 if unknown.
+     We only set this if the function is an interrupt handler.  */
+  int asms_without_operands;
+};
+
+#define MEP_CONTROL_REG(x) \
+  (GET_CODE (x) == REG && ANY_CONTROL_REGNO_P (REGNO (x)))
+
+static GTY(()) section * based_section;
+static GTY(()) section * tinybss_section;
+static GTY(()) section * far_section;
+static GTY(()) section * farbss_section;
+static GTY(()) section * frodata_section;
+static GTY(()) section * srodata_section;
+
+static GTY(()) section * vtext_section;
+static GTY(()) section * vftext_section;
+static GTY(()) section * ftext_section;
+
+static void mep_set_leaf_registers (int);
+static bool symbol_p (rtx);
+static bool symbolref_p (rtx);
+static void encode_pattern_1 (rtx);
+static void encode_pattern (rtx);
+static bool const_in_range (rtx, int, int);
+static void mep_rewrite_mult (rtx, rtx);
+static void mep_rewrite_mulsi3 (rtx, rtx, rtx, rtx);
+static void mep_rewrite_maddsi3 (rtx, rtx, rtx, rtx, rtx);
+static bool mep_reuse_lo_p_1 (rtx, rtx, rtx, bool);
+static bool move_needs_splitting (rtx, rtx, enum machine_mode);
+static bool mep_expand_setcc_1 (enum rtx_code, rtx, rtx, rtx);
+static bool mep_nongeneral_reg (rtx);
+static bool mep_general_copro_reg (rtx);
+static bool mep_nonregister (rtx);
+static struct machine_function* mep_init_machine_status (void);
+static rtx mep_tp_rtx (void);
+static rtx mep_gp_rtx (void);
+static bool mep_interrupt_p (void);
+static bool mep_disinterrupt_p (void);
+static bool mep_reg_set_p (rtx, rtx);
+static bool mep_reg_set_in_function (int);
+static bool mep_interrupt_saved_reg (int);
+static bool mep_call_saves_register (int);
+static rtx F (rtx);
+static void add_constant (int, int, int, int);
+static rtx maybe_dead_move (rtx, rtx, bool);
+static void mep_reload_pointer (int, const char *);
+static void mep_start_function (FILE *, HOST_WIDE_INT);
+static bool mep_function_ok_for_sibcall (tree, tree);
+static int unique_bit_in (HOST_WIDE_INT);
+static int bit_size_for_clip (HOST_WIDE_INT);
+static int bytesize (const_tree, enum machine_mode);
+static tree mep_validate_based_tiny (tree *, tree, tree, int, bool *);
+static tree mep_validate_near_far (tree *, tree, tree, int, bool *);
+static tree mep_validate_disinterrupt (tree *, tree, tree, int, bool *);
+static tree mep_validate_interrupt (tree *, tree, tree, int, bool *);
+static tree mep_validate_io_cb (tree *, tree, tree, int, bool *);
+static tree mep_validate_vliw (tree *, tree, tree, int, bool *);
+static bool mep_function_attribute_inlinable_p (const_tree);
+static bool mep_can_inline_p (tree, tree);
+static bool mep_lookup_pragma_disinterrupt (const char *);
+static int mep_multiple_address_regions (tree, bool);
+static int mep_attrlist_to_encoding (tree, tree);
+static void mep_insert_attributes (tree, tree *);
+static void mep_encode_section_info (tree, rtx, int);
+static section * mep_select_section (tree, int, unsigned HOST_WIDE_INT);
+static void mep_unique_section (tree, int);
+static unsigned int mep_section_type_flags (tree, const char *, int);
+static void mep_asm_named_section (const char *, unsigned int, tree);
+static bool mep_mentioned_p (rtx, rtx, int);
+static void mep_reorg_regmove (rtx);
+static rtx mep_insert_repeat_label_last (rtx, rtx, bool, bool);
+static void mep_reorg_repeat (rtx);
+static bool mep_invertable_branch_p (rtx);
+static void mep_invert_branch (rtx, rtx);
+static void mep_reorg_erepeat (rtx);
+static void mep_jmp_return_reorg (rtx);
+static void mep_reorg_addcombine (rtx);
+static void mep_reorg (void);
+static void mep_init_intrinsics (void);
+static void mep_init_builtins (void);
+static void mep_intrinsic_unavailable (int);
+static bool mep_get_intrinsic_insn (int, const struct cgen_insn **);
+static bool mep_get_move_insn (int, const struct cgen_insn **);
+static rtx mep_convert_arg (enum machine_mode, rtx);
+static rtx mep_convert_regnum (const struct cgen_regnum_operand *, rtx);
+static rtx mep_legitimize_arg (const struct insn_operand_data *, rtx, int);
+static void mep_incompatible_arg (const struct insn_operand_data *, rtx, int, tree);
+static rtx mep_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static int mep_adjust_cost (rtx, rtx, rtx, int);
+static int mep_issue_rate (void);
+static rtx mep_find_ready_insn (rtx *, int, enum attr_slot, int);
+static void mep_move_ready_insn (rtx *, int, rtx);
+static int mep_sched_reorder (FILE *, int, rtx *, int *, int);
+static rtx mep_make_bundle (rtx, rtx);
+static void mep_bundle_insns (rtx);
+static bool mep_rtx_cost (rtx, int, int, int *, bool);
+static int mep_address_cost (rtx, bool);
+static void mep_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
+					tree, int *, int);
+static bool mep_pass_by_reference (CUMULATIVE_ARGS * cum, enum machine_mode,
+				   const_tree, bool);
+static rtx mep_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static void mep_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static bool mep_vector_mode_supported_p (enum machine_mode);
+static bool mep_handle_option (size_t, const char *, int);
+static rtx  mep_allocate_initial_value (rtx);
+static void mep_asm_init_sections (void);
+static int mep_comp_type_attributes (const_tree, const_tree);
+static bool mep_narrow_volatile_bitfield (void);
+static rtx mep_expand_builtin_saveregs (void);
+static tree mep_build_builtin_va_list (void);
+static void mep_expand_va_start (tree, rtx);
+static tree mep_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool mep_can_eliminate (const int, const int);
+static void mep_conditional_register_usage (void);
+static void mep_trampoline_init (rtx, tree, rtx);
+
+#define WANT_GCC_DEFINITIONS
+#include "mep-intrin.h"
+#undef WANT_GCC_DEFINITIONS
+
+
+/* Command Line Option Support.  */
+
+char mep_leaf_registers [FIRST_PSEUDO_REGISTER];
+
+/* True if we can use cmov instructions to move values back and forth
+   between core and coprocessor registers.  */
+bool mep_have_core_copro_moves_p;
+
+/* True if we can use cmov instructions (or a work-alike) to move
+   values between coprocessor registers.  */
+bool mep_have_copro_copro_moves_p;
+
+/* A table of all coprocessor instructions that can act like
+   a coprocessor-to-coprocessor cmov.  */
+static const int mep_cmov_insns[] = {
+  mep_cmov,
+  mep_cpmov,
+  mep_fmovs,
+  mep_caddi3,
+  mep_csubi3,
+  mep_candi3,
+  mep_cori3,
+  mep_cxori3,
+  mep_cand3,
+  mep_cor3
+};
+
+static int option_mtiny_specified = 0;
+
+
+static void
+mep_set_leaf_registers (int enable)
+{
+  int i;
+
+  if (mep_leaf_registers[0] != enable)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      mep_leaf_registers[i] = enable;
+}
+
+static void
+mep_conditional_register_usage (void)
+{
+  int i;
+
+  if (!TARGET_OPT_MULT && !TARGET_OPT_DIV)
+    {
+      fixed_regs[HI_REGNO] = 1;
+      fixed_regs[LO_REGNO] = 1;
+      call_used_regs[HI_REGNO] = 1;
+      call_used_regs[LO_REGNO] = 1;
+    }
+
+  for (i = FIRST_SHADOW_REGISTER; i <= LAST_SHADOW_REGISTER; i++)
+    global_regs[i] = 1;
+}
+
+
+static const struct default_options mep_option_optimization_table[] =
+  {
+    /* The first scheduling pass often increases register pressure and
+       tends to result in more spill code.  Only run it when
+       specifically asked.  */
+    { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
+
+    /* Using $fp doesn't gain us much, even when debugging is
+       important.  */
+    { OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 1 },
+
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+static void
+mep_option_override (void)
+{
+  if (flag_pic == 1)
+    warning (OPT_fpic, "-fpic is not supported");
+  if (flag_pic == 2)
+    warning (OPT_fPIC, "-fPIC is not supported");
+  if (TARGET_S && TARGET_M)
+    error ("only one of -ms and -mm may be given");
+  if (TARGET_S && TARGET_L)
+    error ("only one of -ms and -ml may be given");
+  if (TARGET_M && TARGET_L)
+    error ("only one of -mm and -ml may be given");
+  if (TARGET_S && option_mtiny_specified)
+    error ("only one of -ms and -mtiny= may be given");
+  if (TARGET_M && option_mtiny_specified)
+    error ("only one of -mm and -mtiny= may be given");
+  if (TARGET_OPT_CLIP && ! TARGET_OPT_MINMAX)
+    warning (0, "-mclip currently has no effect without -mminmax");
+
+  if (mep_const_section)
+    {
+      if (strcmp (mep_const_section, "tiny") != 0
+	  && strcmp (mep_const_section, "near") != 0
+	  && strcmp (mep_const_section, "far") != 0)
+	error ("-mc= must be -mc=tiny, -mc=near, or -mc=far");
+    }
+
+  if (TARGET_S)
+    mep_tiny_cutoff = 65536;
+  if (TARGET_M)
+    mep_tiny_cutoff = 0;
+  if (TARGET_L && ! option_mtiny_specified)
+    mep_tiny_cutoff = 0;
+
+  if (TARGET_64BIT_CR_REGS)
+    flag_split_wide_types = 0;
+
+  init_machine_status = mep_init_machine_status;
+  mep_init_intrinsics ();
+}
+  
+/* Pattern Support - constraints, predicates, expanders.  */
+
+/* MEP has very few instructions that can refer to the span of
+   addresses used by symbols, so it's common to check for them.  */
+
+static bool
+symbol_p (rtx x)
+{
+  int c = GET_CODE (x);
+
+  return (c == CONST_INT
+	  || c == CONST
+	  || c == SYMBOL_REF);
+}
+
+static bool
+symbolref_p (rtx x)
+{
+  int c;
+
+  if (GET_CODE (x) != MEM)
+    return false;
+
+  c = GET_CODE (XEXP (x, 0));
+  return (c == CONST_INT
+	  || c == CONST
+	  || c == SYMBOL_REF);
+}
+
+/* static const char *reg_class_names[] = REG_CLASS_NAMES; */
+
+#define GEN_REG(R, STRICT)				\
+  (GR_REGNO_P (R)					\
+   || (!STRICT						\
+       && ((R) == ARG_POINTER_REGNUM			\
+	   || (R) >= FIRST_PSEUDO_REGISTER)))
+
+static char pattern[12], *patternp;
+static GTY(()) rtx patternr[12];
+#define RTX_IS(x) (strcmp (pattern, x) == 0)
+
+static void
+encode_pattern_1 (rtx x)
+{
+  int i;
+
+  if (patternp == pattern + sizeof (pattern) - 2)
+    {
+      patternp[-1] = '?';
+      return;
+    }
+
+  patternr[patternp-pattern] = x;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      *patternp++ = 'r';
+      break;
+    case MEM:
+      *patternp++ = 'm';
+    case CONST:
+      encode_pattern_1 (XEXP(x, 0));
+      break;
+    case PLUS:
+      *patternp++ = '+';
+      encode_pattern_1 (XEXP(x, 0));
+      encode_pattern_1 (XEXP(x, 1));
+      break;
+    case LO_SUM:
+      *patternp++ = 'L';
+      encode_pattern_1 (XEXP(x, 0));
+      encode_pattern_1 (XEXP(x, 1));
+      break;
+    case HIGH:
+      *patternp++ = 'H';
+      encode_pattern_1 (XEXP(x, 0));
+      break;
+    case SYMBOL_REF:
+      *patternp++ = 's';
+      break;
+    case LABEL_REF:
+      *patternp++ = 'l';
+      break;
+    case CONST_INT:
+    case CONST_DOUBLE:
+      *patternp++ = 'i';
+      break;
+    case UNSPEC:
+      *patternp++ = 'u';
+      *patternp++ = '0' + XCINT(x, 1, UNSPEC);
+      for (i=0; i<XVECLEN (x, 0); i++)
+	encode_pattern_1 (XVECEXP (x, 0, i));
+      break;
+    case USE:
+      *patternp++ = 'U';
+      break;
+    default:
+      *patternp++ = '?';
+#if 0
+      fprintf (stderr, "can't encode pattern %s\n", GET_RTX_NAME(GET_CODE(x)));
+      debug_rtx (x);
+      gcc_unreachable ();
+#endif
+      break;
+    }      
+}
+
+static void
+encode_pattern (rtx x)
+{
+  patternp = pattern;
+  encode_pattern_1 (x);
+  *patternp = 0;
+}
+
+int
+mep_section_tag (rtx x)
+{
+  const char *name;
+
+  while (1)
+    {
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	case CONST:
+	  x = XEXP (x, 0);
+	  break;
+	case UNSPEC:
+	  x = XVECEXP (x, 0, 0);
+	  break;
+	case PLUS:
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    return 0;
+	  x = XEXP (x, 0);
+	  break;
+	default:
+	  goto done;
+	}
+    }
+ done:
+  if (GET_CODE (x) != SYMBOL_REF)
+    return 0;
+  name = XSTR (x, 0);
+  if (name[0] == '@' && name[2] == '.')
+    {
+      if (name[1] == 'i' || name[1] == 'I')
+	{
+	  if (name[1] == 'I')
+	    return 'f'; /* near */
+	  return 'n'; /* far */
+	}
+      return name[1];
+    }
+  return 0;
+}
+
+int
+mep_regno_reg_class (int regno)
+{
+  switch (regno)
+    {
+    case SP_REGNO:		return SP_REGS;
+    case TP_REGNO:		return TP_REGS;
+    case GP_REGNO:		return GP_REGS;
+    case 0: 			return R0_REGS;
+    case HI_REGNO:		return HI_REGS;
+    case LO_REGNO:		return LO_REGS;
+    case ARG_POINTER_REGNUM:	return GENERAL_REGS;
+    }
+
+  if (GR_REGNO_P (regno))
+    return regno < FIRST_GR_REGNO + 8 ? TPREL_REGS : GENERAL_REGS;
+  if (CONTROL_REGNO_P (regno))
+    return CONTROL_REGS;
+
+  if (CR_REGNO_P (regno))
+    {
+      int i, j;
+
+      /* Search for the register amongst user-defined subclasses of
+	 the coprocessor registers.  */
+      for (i = USER0_REGS; i <= USER3_REGS; ++i)
+	{
+	  if (! TEST_HARD_REG_BIT (reg_class_contents[i], regno))
+	    continue;
+	  for (j = 0; j < N_REG_CLASSES; ++j)
+	    {
+	      enum reg_class sub = reg_class_subclasses[i][j];
+
+	      if (sub == LIM_REG_CLASSES)
+		return i;
+	      if (TEST_HARD_REG_BIT (reg_class_contents[sub], regno))
+		break;
+	    }
+	}
+
+      return LOADABLE_CR_REGNO_P (regno) ? LOADABLE_CR_REGS : CR_REGS;
+    }
+
+  if (CCR_REGNO_P (regno))
+    return CCR_REGS;
+
+  gcc_assert (regno >= FIRST_SHADOW_REGISTER && regno <= LAST_SHADOW_REGISTER);
+  return NO_REGS;
+}
+
+#if 0
+int
+mep_reg_class_from_constraint (int c, const char *str)
+{
+  switch (c)
+    {
+    case 'a':
+      return SP_REGS;
+    case 'b':
+      return TP_REGS;
+    case 'c':
+      return CONTROL_REGS;
+    case 'd':
+      return HILO_REGS;
+    case 'e':
+      {
+	switch (str[1])
+	  {
+	  case 'm':
+	    return LOADABLE_CR_REGS;
+	  case 'x':
+	    return mep_have_copro_copro_moves_p ? CR_REGS : NO_REGS;
+	  case 'r':
+	    return mep_have_core_copro_moves_p ? CR_REGS : NO_REGS;
+	  default:
+	    return NO_REGS;
+	  }
+      }
+    case 'h':
+      return HI_REGS;
+    case 'j':
+      return RPC_REGS;
+    case 'l':
+      return LO_REGS;
+    case 't':
+      return TPREL_REGS;
+    case 'v':
+      return GP_REGS;
+    case 'x':
+      return CR_REGS;
+    case 'y':
+      return CCR_REGS;
+    case 'z':
+      return R0_REGS;
+
+    case 'A':
+    case 'B':
+    case 'C':
+    case 'D':
+      {
+	enum reg_class which = c - 'A' + USER0_REGS;
+	return (reg_class_size[which] > 0 ? which : NO_REGS);
+      }
+
+    default:
+      return NO_REGS;
+    }
+}
+
+bool
+mep_const_ok_for_letter_p (HOST_WIDE_INT value, int c)
+{
+  switch (c)
+    {
+      case 'I': return value >= -32768 && value <      32768;
+      case 'J': return value >=      0 && value <      65536;
+      case 'K': return value >=      0 && value < 0x01000000;
+      case 'L': return value >=    -32 && value <         32;
+      case 'M': return value >=      0 && value <         32;
+      case 'N': return value >=      0 && value <         16;
+      case 'O':
+	if (value & 0xffff)
+	  return false;
+	return value >= -2147483647-1 && value <= 2147483647;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+bool
+mep_extra_constraint (rtx value, int c)
+{
+  encode_pattern (value);
+
+  switch (c)
+    {
+    case 'R':
+      /* For near symbols, like what call uses.  */
+      if (GET_CODE (value) == REG)
+	return 0;
+      return mep_call_address_operand (value, GET_MODE (value));
+
+    case 'S':
+      /* For signed 8-bit immediates.  */
+      return (GET_CODE (value) == CONST_INT
+	      && INTVAL (value) >= -128
+	      && INTVAL (value) <= 127);
+
+    case 'T':
+      /* For tp/gp relative symbol values.  */
+      return (RTX_IS ("u3s") || RTX_IS ("u2s")
+              || RTX_IS ("+u3si") || RTX_IS ("+u2si"));
+
+    case 'U':
+      /* Non-absolute memories.  */
+      return GET_CODE (value) == MEM && ! CONSTANT_P (XEXP (value, 0));
+
+    case 'W':
+      /* %hi(sym) */
+      return RTX_IS ("Hs");
+
+    case 'Y':
+      /* Register indirect.  */
+      return RTX_IS ("mr");
+
+    case 'Z':
+      return mep_section_tag (value) == 'c' && RTX_IS ("ms");
+    }
+
+  return false;
+}
+#endif
+
+#undef PASS
+#undef FAIL
+
+static bool
+const_in_range (rtx x, int minv, int maxv)
+{
+  return (GET_CODE (x) == CONST_INT
+	  && INTVAL (x) >= minv
+	  && INTVAL (x) <= maxv);
+}
+
+/* Given three integer registers DEST, SRC1 and SRC2, return an rtx X
+   such that "mulr DEST,X" will calculate DEST = SRC1 * SRC2.  If a move
+   is needed, emit it before INSN if INSN is nonnull, otherwise emit it
+   at the end of the insn stream.  */
+
+rtx
+mep_mulr_source (rtx insn, rtx dest, rtx src1, rtx src2)
+{
+  if (rtx_equal_p (dest, src1))
+    return src2;
+  else if (rtx_equal_p (dest, src2))
+    return src1;
+  else
+    {
+      if (insn == 0)
+	emit_insn (gen_movsi (copy_rtx (dest), src1));
+      else
+	emit_insn_before (gen_movsi (copy_rtx (dest), src1), insn);
+      return src2;
+    }
+}
+
+/* Replace INSN's pattern with PATTERN, a multiplication PARALLEL.
+   Change the last element of PATTERN from (clobber (scratch:SI))
+   to (clobber (reg:SI HI_REGNO)).  */
+
+static void
+mep_rewrite_mult (rtx insn, rtx pattern)
+{
+  rtx hi_clobber;
+
+  hi_clobber = XVECEXP (pattern, 0, XVECLEN (pattern, 0) - 1);
+  XEXP (hi_clobber, 0) = gen_rtx_REG (SImode, HI_REGNO);
+  PATTERN (insn) = pattern;
+  INSN_CODE (insn) = -1;
+}
+
+/* Subroutine of mep_reuse_lo_p.  Rewrite instruction INSN so that it
+   calculates SRC1 * SRC2 and stores the result in $lo.  Also make it
+   store the result in DEST if nonnull.  */
+
+static void
+mep_rewrite_mulsi3 (rtx insn, rtx dest, rtx src1, rtx src2)
+{
+  rtx lo, pattern;
+
+  lo = gen_rtx_REG (SImode, LO_REGNO);
+  if (dest)
+    pattern = gen_mulsi3r (lo, dest, copy_rtx (dest),
+			   mep_mulr_source (insn, dest, src1, src2));
+  else
+    pattern = gen_mulsi3_lo (lo, src1, src2);
+  mep_rewrite_mult (insn, pattern);
+}
+
+/* Like mep_rewrite_mulsi3, but calculate SRC1 * SRC2 + SRC3.  First copy
+   SRC3 into $lo, then use either madd or maddr.  The move into $lo will
+   be deleted by a peephole2 if SRC3 is already in $lo.  */
+
+static void
+mep_rewrite_maddsi3 (rtx insn, rtx dest, rtx src1, rtx src2, rtx src3)
+{
+  rtx lo, pattern;
+
+  lo = gen_rtx_REG (SImode, LO_REGNO);
+  emit_insn_before (gen_movsi (copy_rtx (lo), src3), insn);
+  if (dest)
+    pattern = gen_maddsi3r (lo, dest, copy_rtx (dest),
+			    mep_mulr_source (insn, dest, src1, src2),
+			    copy_rtx (lo));
+  else
+    pattern = gen_maddsi3_lo (lo, src1, src2, copy_rtx (lo));
+  mep_rewrite_mult (insn, pattern);
+}
+
+/* Return true if $lo has the same value as integer register GPR when
+   instruction INSN is reached.  If necessary, rewrite the instruction
+   that sets $lo so that it uses a proper SET, not a CLOBBER.  LO is an
+   rtx for (reg:SI LO_REGNO).
+
+   This function is intended to be used by the peephole2 pass.  Since
+   that pass goes from the end of a basic block to the beginning, and
+   propagates liveness information on the way, there is no need to
+   update register notes here.
+
+   If GPR_DEAD_P is true on entry, and this function returns true,
+   then the caller will replace _every_ use of GPR in and after INSN
+   with LO.  This means that if the instruction that sets $lo is a
+   mulr- or maddr-type instruction, we can rewrite it to use mul or
+   madd instead.  In combination with the copy progagation pass,
+   this allows us to replace sequences like:
+
+	mov GPR,R1
+	mulr GPR,R2
+
+   with:
+
+	mul R1,R2
+
+   if GPR is no longer used.  */
+
+static bool
+mep_reuse_lo_p_1 (rtx lo, rtx gpr, rtx insn, bool gpr_dead_p)
+{
+  do
+    {
+      insn = PREV_INSN (insn);
+      if (INSN_P (insn))
+	switch (recog_memoized (insn))
+	  {
+	  case CODE_FOR_mulsi3_1:
+	    extract_insn (insn);
+	    if (rtx_equal_p (recog_data.operand[0], gpr))
+	      {
+		mep_rewrite_mulsi3 (insn,
+				    gpr_dead_p ? NULL : recog_data.operand[0],
+				    recog_data.operand[1],
+				    recog_data.operand[2]);
+		return true;
+	      }
+	    return false;
+
+	  case CODE_FOR_maddsi3:
+	    extract_insn (insn);
+	    if (rtx_equal_p (recog_data.operand[0], gpr))
+	      {
+		mep_rewrite_maddsi3 (insn,
+				     gpr_dead_p ? NULL : recog_data.operand[0],
+				     recog_data.operand[1],
+				     recog_data.operand[2],
+				     recog_data.operand[3]);
+		return true;
+	      }
+	    return false;
+
+	  case CODE_FOR_mulsi3r:
+	  case CODE_FOR_maddsi3r:
+	    extract_insn (insn);
+	    return rtx_equal_p (recog_data.operand[1], gpr);
+
+	  default:
+	    if (reg_set_p (lo, insn)
+		|| reg_set_p (gpr, insn)
+		|| volatile_insn_p (PATTERN (insn)))
+	      return false;
+
+	    if (gpr_dead_p && reg_referenced_p (gpr, PATTERN (insn)))
+	      gpr_dead_p = false;
+	    break;
+	  }
+    }
+  while (!NOTE_INSN_BASIC_BLOCK_P (insn));
+  return false;
+}
+
+/* A wrapper around mep_reuse_lo_p_1 that preserves recog_data.  */
+
+bool
+mep_reuse_lo_p (rtx lo, rtx gpr, rtx insn, bool gpr_dead_p)
+{
+  bool result = mep_reuse_lo_p_1 (lo, gpr, insn, gpr_dead_p);
+  extract_insn (insn);
+  return result;
+}
+
+/* Return true if SET can be turned into a post-modify load or store
+   that adds OFFSET to GPR.  In other words, return true if SET can be
+   changed into:
+
+       (parallel [SET (set GPR (plus:SI GPR OFFSET))]).
+
+   It's OK to change SET to an equivalent operation in order to
+   make it match.  */
+
+static bool
+mep_use_post_modify_for_set_p (rtx set, rtx gpr, rtx offset)
+{
+  rtx *reg, *mem;
+  unsigned int reg_bytes, mem_bytes;
+  enum machine_mode reg_mode, mem_mode;
+
+  /* Only simple SETs can be converted.  */
+  if (GET_CODE (set) != SET)
+    return false;
+
+  /* Point REG to what we hope will be the register side of the set and
+     MEM to what we hope will be the memory side.  */
+  if (GET_CODE (SET_DEST (set)) == MEM)
+    {
+      mem = &SET_DEST (set);
+      reg = &SET_SRC (set);
+    }
+  else
+    {
+      reg = &SET_DEST (set);
+      mem = &SET_SRC (set);
+      if (GET_CODE (*mem) == SIGN_EXTEND)
+	mem = &XEXP (*mem, 0);
+    }
+
+  /* Check that *REG is a suitable coprocessor register.  */
+  if (GET_CODE (*reg) != REG || !LOADABLE_CR_REGNO_P (REGNO (*reg)))
+    return false;
+
+  /* Check that *MEM is a suitable memory reference.  */
+  if (GET_CODE (*mem) != MEM || !rtx_equal_p (XEXP (*mem, 0), gpr))
+    return false;
+
+  /* Get the number of bytes in each operand.  */
+  mem_bytes = GET_MODE_SIZE (GET_MODE (*mem));
+  reg_bytes = GET_MODE_SIZE (GET_MODE (*reg));
+
+  /* Check that OFFSET is suitably aligned.  */
+  if (INTVAL (offset) & (mem_bytes - 1))
+    return false;
+
+  /* Convert *MEM to a normal integer mode.  */
+  mem_mode = mode_for_size (mem_bytes * BITS_PER_UNIT, MODE_INT, 0);
+  *mem = change_address (*mem, mem_mode, NULL);
+
+  /* Adjust *REG as well.  */
+  *reg = shallow_copy_rtx (*reg);
+  if (reg == &SET_DEST (set) && reg_bytes < UNITS_PER_WORD)
+    {
+      /* SET is a subword load.  Convert it to an explicit extension.  */
+      PUT_MODE (*reg, SImode);
+      *mem = gen_rtx_SIGN_EXTEND (SImode, *mem);
+    }
+  else
+    {
+      reg_mode = mode_for_size (reg_bytes * BITS_PER_UNIT, MODE_INT, 0);
+      PUT_MODE (*reg, reg_mode);
+    }
+  return true;
+}
+
+/* Return the effect of frame-related instruction INSN.  */
+
+static rtx
+mep_frame_expr (rtx insn)
+{
+  rtx note, expr;
+
+  note = find_reg_note (insn, REG_FRAME_RELATED_EXPR, 0);
+  expr = (note != 0 ? XEXP (note, 0) : copy_rtx (PATTERN (insn)));
+  RTX_FRAME_RELATED_P (expr) = 1;
+  return expr;
+}
+
+/* Merge instructions INSN1 and INSN2 using a PARALLEL.  Store the
+   new pattern in INSN1; INSN2 will be deleted by the caller.  */
+
+static void
+mep_make_parallel (rtx insn1, rtx insn2)
+{
+  rtx expr;
+
+  if (RTX_FRAME_RELATED_P (insn2))
+    {
+      expr = mep_frame_expr (insn2);
+      if (RTX_FRAME_RELATED_P (insn1))
+	expr = gen_rtx_SEQUENCE (VOIDmode,
+				 gen_rtvec (2, mep_frame_expr (insn1), expr));
+      set_unique_reg_note (insn1, REG_FRAME_RELATED_EXPR, expr);
+      RTX_FRAME_RELATED_P (insn1) = 1;
+    }
+
+  PATTERN (insn1) = gen_rtx_PARALLEL (VOIDmode,
+				      gen_rtvec (2, PATTERN (insn1),
+						 PATTERN (insn2)));
+  INSN_CODE (insn1) = -1;
+}
+
+/* SET_INSN is an instruction that adds OFFSET to REG.  Go back through
+   the basic block to see if any previous load or store instruction can
+   be persuaded to do SET_INSN as a side-effect.  Return true if so.  */
+
+static bool
+mep_use_post_modify_p_1 (rtx set_insn, rtx reg, rtx offset)
+{
+  rtx insn;
+
+  insn = set_insn;
+  do
+    {
+      insn = PREV_INSN (insn);
+      if (INSN_P (insn))
+	{
+	  if (mep_use_post_modify_for_set_p (PATTERN (insn), reg, offset))
+	    {
+	      mep_make_parallel (insn, set_insn);
+	      return true;
+	    }
+
+	  if (reg_set_p (reg, insn)
+	      || reg_referenced_p (reg, PATTERN (insn))
+	      || volatile_insn_p (PATTERN (insn)))
+	    return false;
+	}
+    }
+  while (!NOTE_INSN_BASIC_BLOCK_P (insn));
+  return false;
+}
+
+/* A wrapper around mep_use_post_modify_p_1 that preserves recog_data.  */
+
+bool
+mep_use_post_modify_p (rtx insn, rtx reg, rtx offset)
+{
+  bool result = mep_use_post_modify_p_1 (insn, reg, offset);
+  extract_insn (insn);
+  return result;
+}
+
+bool
+mep_allow_clip (rtx ux, rtx lx, int s)
+{
+  HOST_WIDE_INT u = INTVAL (ux);
+  HOST_WIDE_INT l = INTVAL (lx);
+  int i;
+
+  if (!TARGET_OPT_CLIP)
+    return false;
+
+  if (s)
+    {
+      for (i = 0; i < 30; i ++)
+	if ((u == ((HOST_WIDE_INT) 1 << i) - 1)
+	    && (l == - ((HOST_WIDE_INT) 1 << i)))
+	  return true;
+    }
+  else
+    {
+      if (l != 0)
+	return false;
+
+      for (i = 0; i < 30; i ++)
+	if ((u == ((HOST_WIDE_INT) 1 << i) - 1))
+	  return true;
+    }
+  return false;
+}
+
+bool
+mep_bit_position_p (rtx x, bool looking_for)
+{
+  if (GET_CODE (x) != CONST_INT)
+    return false;
+  switch ((int) INTVAL(x) & 0xff)
+    {
+    case 0x01: case 0x02: case 0x04: case 0x08:
+    case 0x10: case 0x20: case 0x40: case 0x80:
+      return looking_for;
+    case 0xfe: case 0xfd: case 0xfb: case 0xf7:
+    case 0xef: case 0xdf: case 0xbf: case 0x7f:
+      return !looking_for;
+    }
+  return false;
+}
+
+static bool
+move_needs_splitting (rtx dest, rtx src,
+		      enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int s = mep_section_tag (src);
+
+  while (1)
+    {
+      if (GET_CODE (src) == CONST
+	  || GET_CODE (src) == MEM)
+	src = XEXP (src, 0);
+      else if (GET_CODE (src) == SYMBOL_REF
+	       || GET_CODE (src) == LABEL_REF
+	       || GET_CODE (src) == PLUS)
+	break;
+      else
+	return false;
+    }
+  if (s == 'f'
+      || (GET_CODE (src) == PLUS
+	  && GET_CODE (XEXP (src, 1)) == CONST_INT
+	  && (INTVAL (XEXP (src, 1)) < -65536
+	      || INTVAL (XEXP (src, 1)) > 0xffffff))
+      || (GET_CODE (dest) == REG
+	  && REGNO (dest) > 7 && REGNO (dest) < FIRST_PSEUDO_REGISTER))
+    return true;
+  return false;
+}
+
+bool
+mep_split_mov (rtx *operands, int symbolic)
+{
+  if (symbolic)
+    {
+      if (move_needs_splitting (operands[0], operands[1], SImode))
+	return true;
+      return false;
+    }
+
+  if (GET_CODE (operands[1]) != CONST_INT)
+    return false;
+
+  if (constraint_satisfied_p (operands[1], CONSTRAINT_I)
+      || constraint_satisfied_p (operands[1], CONSTRAINT_J)
+      || constraint_satisfied_p (operands[1], CONSTRAINT_O))
+    return false;
+
+  if (((!reload_completed && !reload_in_progress)
+       || (REG_P (operands[0]) && REGNO (operands[0]) < 8))
+      && constraint_satisfied_p (operands[1], CONSTRAINT_K))
+    return false;
+
+  return true;
+}
+
+/* Irritatingly, the "jsrv" insn *toggles* PSW.OM rather than set
+   it to one specific value.  So the insn chosen depends on whether
+   the source and destination modes match.  */
+
+bool
+mep_vliw_mode_match (rtx tgt)
+{
+  bool src_vliw = mep_vliw_function_p (cfun->decl);
+  bool tgt_vliw = INTVAL (tgt);
+
+  return src_vliw == tgt_vliw;
+}
+
+/* Like the above, but also test for near/far mismatches.  */
+
+bool
+mep_vliw_jmp_match (rtx tgt)
+{
+  bool src_vliw = mep_vliw_function_p (cfun->decl);
+  bool tgt_vliw = INTVAL (tgt);
+
+  if (mep_section_tag (DECL_RTL (cfun->decl)) == 'f')
+    return false;
+
+  return src_vliw == tgt_vliw;
+}
+
+bool
+mep_multi_slot (rtx x)
+{
+  return get_attr_slot (x) == SLOT_MULTI;
+}
+
+
+bool
+mep_legitimate_constant_p (rtx x)
+{
+  /* We can't convert symbol values to gp- or tp-rel values after
+     reload, as reload might have used $gp or $tp for other
+     purposes.  */
+  if (GET_CODE (x) == SYMBOL_REF && (reload_in_progress || reload_completed))
+    {
+      char e = mep_section_tag (x);
+      return (e != 't' && e != 'b');
+    }
+  return 1;
+}
+
+/* Be careful not to use macros that need to be compiled one way for
+   strict, and another way for not-strict, like REG_OK_FOR_BASE_P.  */
+
+bool
+mep_legitimate_address (enum machine_mode mode, rtx x, int strict)
+{
+  int the_tag;
+
+#define DEBUG_LEGIT 0
+#if DEBUG_LEGIT
+  fprintf (stderr, "legit: mode %s strict %d ", mode_name[mode], strict);
+  debug_rtx (x);
+#endif
+
+  if (GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GEN_REG (REGNO (XEXP (x, 0)), strict)
+      && CONSTANT_P (XEXP (x, 1)))
+    {
+      if (GET_MODE_SIZE (mode) > 4)
+	{
+	  /* We will end up splitting this, and lo_sums are not
+	     offsettable for us.  */
+#if DEBUG_LEGIT
+	  fprintf(stderr, " - nope, %%lo(sym)[reg] not splittable\n");
+#endif
+	  return false;
+	}
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, %%lo(sym)[reg]\n");
+#endif
+      return true;
+    }
+
+  if (GET_CODE (x) == REG
+      && GEN_REG (REGNO (x), strict))
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, [reg]\n");
+#endif
+      return true;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GEN_REG (REGNO (XEXP (x, 0)), strict)
+      && const_in_range (XEXP (x, 1), -32768, 32767))
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, [reg+const]\n");
+#endif
+      return true;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GEN_REG (REGNO (XEXP (x, 0)), strict)
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && (GET_CODE (XEXP (XEXP (x, 1), 0)) == UNSPEC
+	  || (GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == UNSPEC
+	      && GET_CODE (XEXP (XEXP (XEXP (x, 1), 0), 1)) == CONST_INT)))
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, [reg+unspec]\n");
+#endif
+      return true;
+    }
+
+  the_tag = mep_section_tag (x);
+
+  if (the_tag == 'f')
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - nope, [far]\n");
+#endif
+      return false;
+    }
+
+  if (mode == VOIDmode
+      && GET_CODE (x) == SYMBOL_REF)
+    {
+#if DEBUG_LEGIT
+      fprintf (stderr, " - yup, call [symbol]\n");
+#endif
+      return true;
+    }
+
+  if ((mode == SImode || mode == SFmode)
+      && CONSTANT_P (x)
+      && LEGITIMATE_CONSTANT_P (x)
+      && the_tag != 't' && the_tag != 'b')
+    {
+      if (GET_CODE (x) != CONST_INT
+	  || (INTVAL (x) <= 0xfffff
+	      && INTVAL (x) >= 0
+	      && (INTVAL (x) % 4) == 0))
+	{
+#if DEBUG_LEGIT
+	  fprintf (stderr, " - yup, [const]\n");
+#endif
+	  return true;
+	}
+    }
+
+#if DEBUG_LEGIT
+  fprintf (stderr, " - nope.\n");
+#endif
+  return false;
+}
+
+int
+mep_legitimize_reload_address (rtx *x, enum machine_mode mode, int opnum,
+			       int type_i,
+			       int ind_levels ATTRIBUTE_UNUSED)
+{
+  enum reload_type type = (enum reload_type) type_i;
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == MEM
+      && GET_CODE (XEXP (*x, 1)) == REG)
+    {
+      /* GCC will by default copy the MEM into a REG, which results in
+	 an invalid address.  For us, the best thing to do is move the
+	 whole expression to a REG.  */
+      push_reload (*x, NULL_RTX, x, NULL,
+		   GENERAL_REGS, mode, VOIDmode,
+		   0, 0, opnum, type);
+      return 1;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT)
+    {
+      char e = mep_section_tag (XEXP (*x, 0));
+
+      if (e != 't' && e != 'b')
+	{
+	  /* GCC thinks that (sym+const) is a valid address.  Well,
+	     sometimes it is, this time it isn't.  The best thing to
+	     do is reload the symbol to a register, since reg+int
+	     tends to work, and we can't just add the symbol and
+	     constant anyway.  */
+	  push_reload (XEXP (*x, 0), NULL_RTX, &(XEXP(*x, 0)), NULL,
+		       GENERAL_REGS, mode, VOIDmode,
+		       0, 0, opnum, type);
+	  return 1;
+	}
+    }
+  return 0;
+}
+
+int
+mep_core_address_length (rtx insn, int opn)
+{
+  rtx set = single_set (insn);
+  rtx mem = XEXP (set, opn);
+  rtx other = XEXP (set, 1-opn);
+  rtx addr = XEXP (mem, 0);
+
+  if (register_operand (addr, Pmode))
+    return 2;
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx addend = XEXP (addr, 1);
+
+      gcc_assert (REG_P (XEXP (addr, 0)));
+
+      switch (REGNO (XEXP (addr, 0)))
+	{
+	case STACK_POINTER_REGNUM:
+	  if (GET_MODE_SIZE (GET_MODE (mem)) == 4
+	      && mep_imm7a4_operand (addend, VOIDmode))
+	    return 2;
+	  break;
+
+	case 13: /* TP */
+	  gcc_assert (REG_P (other));
+
+	  if (REGNO (other) >= 8)
+	    break;
+
+	  if (GET_CODE (addend) == CONST
+	      && GET_CODE (XEXP (addend, 0)) == UNSPEC
+	      && XINT (XEXP (addend, 0), 1) == UNS_TPREL)
+	    return 2;
+
+	  if (GET_CODE (addend) == CONST_INT
+	      && INTVAL (addend) >= 0
+	      && INTVAL (addend) <= 127
+	      && INTVAL (addend) % GET_MODE_SIZE (GET_MODE (mem)) == 0)
+	    return 2;
+	  break;
+	}
+    }
+
+  return 4;
+}
+
+int
+mep_cop_address_length (rtx insn, int opn)
+{
+  rtx set = single_set (insn);
+  rtx mem = XEXP (set, opn);
+  rtx addr = XEXP (mem, 0);
+
+  if (GET_CODE (mem) != MEM)
+    return 2;
+  if (register_operand (addr, Pmode))
+    return 2;
+  if (GET_CODE (addr) == POST_INC)
+    return 2;
+
+  return 4;
+}
+
+#define DEBUG_EXPAND_MOV 0
+bool
+mep_expand_mov (rtx *operands, enum machine_mode mode)
+{
+  int i, t;
+  int tag[2];
+  rtx tpsym, tpoffs;
+  int post_reload = 0;
+
+  tag[0] = mep_section_tag (operands[0]);
+  tag[1] = mep_section_tag (operands[1]);
+
+  if (!reload_in_progress
+      && !reload_completed
+      && GET_CODE (operands[0]) != REG
+      && GET_CODE (operands[0]) != SUBREG
+      && GET_CODE (operands[1]) != REG
+      && GET_CODE (operands[1]) != SUBREG)
+    operands[1] = copy_to_mode_reg (mode, operands[1]);
+  
+#if DEBUG_EXPAND_MOV
+  fprintf(stderr, "expand move %s %d\n", mode_name[mode],
+	  reload_in_progress || reload_completed);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  if (mode == DImode || mode == DFmode)
+    return false;
+
+  if (reload_in_progress || reload_completed)
+    {
+      rtx r;
+
+      if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == TP_REGNO)
+	cfun->machine->reload_changes_tp = true;
+
+      if (tag[0] == 't' || tag[1] == 't')
+	{
+	  r = has_hard_reg_initial_val (Pmode, GP_REGNO);
+	  if (!r || GET_CODE (r) != REG || REGNO (r) != GP_REGNO)
+	    post_reload = 1;
+	}
+      if (tag[0] == 'b' || tag[1] == 'b')
+	{
+	  r = has_hard_reg_initial_val (Pmode, TP_REGNO);
+	  if (!r || GET_CODE (r) != REG || REGNO (r) != TP_REGNO)
+	    post_reload = 1;
+	}
+      if (cfun->machine->reload_changes_tp == true)
+	post_reload = 1;
+    }
+
+  if (!post_reload)
+    {
+      rtx n;
+      if (symbol_p (operands[1]))
+	{
+	  t = mep_section_tag (operands[1]);
+	  if (t == 'b' || t == 't')
+	    {
+
+	      if (GET_CODE (operands[1]) == SYMBOL_REF)
+		{
+		  tpsym = operands[1];
+		  n = gen_rtx_UNSPEC (mode,
+				      gen_rtvec (1, operands[1]),
+				      t == 'b' ? UNS_TPREL : UNS_GPREL);
+		  n = gen_rtx_CONST (mode, n);
+		}
+	      else if (GET_CODE (operands[1]) == CONST
+		       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+		       && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF
+		       && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT)
+		{
+		  tpsym = XEXP (XEXP (operands[1], 0), 0);
+		  tpoffs = XEXP (XEXP (operands[1], 0), 1);
+		  n = gen_rtx_UNSPEC (mode,
+				      gen_rtvec (1, tpsym),
+				      t == 'b' ? UNS_TPREL : UNS_GPREL);
+		  n = gen_rtx_PLUS (mode, n, tpoffs);
+		  n = gen_rtx_CONST (mode, n);
+		}
+	      else if (GET_CODE (operands[1]) == CONST
+		       && GET_CODE (XEXP (operands[1], 0)) == UNSPEC)
+		return false;
+	      else
+		{
+		  error ("unusual TP-relative address");
+		  return false;
+		}
+
+	      n = gen_rtx_PLUS (mode, (t == 'b' ? mep_tp_rtx ()
+				       : mep_gp_rtx ()), n);
+	      n = emit_insn (gen_rtx_SET (mode, operands[0], n));
+#if DEBUG_EXPAND_MOV
+	      fprintf(stderr, "mep_expand_mov emitting ");
+	      debug_rtx(n);
+#endif
+	      return true;
+	    }
+	}
+
+      for (i=0; i < 2; i++)
+	{
+	  t = mep_section_tag (operands[i]);
+	  if (GET_CODE (operands[i]) == MEM && (t == 'b' || t == 't'))
+	    {
+	      rtx sym, n, r;
+	      int u;
+
+	      sym = XEXP (operands[i], 0);
+	      if (GET_CODE (sym) == CONST
+		  && GET_CODE (XEXP (sym, 0)) == UNSPEC)
+		sym = XVECEXP (XEXP (sym, 0), 0, 0);
+
+	      if (t == 'b')
+		{
+		  r = mep_tp_rtx ();
+		  u = UNS_TPREL;
+		}
+	      else
+		{
+		  r = mep_gp_rtx ();
+		  u = UNS_GPREL;
+		}
+
+	      n = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), u);
+	      n = gen_rtx_CONST (Pmode, n);
+	      n = gen_rtx_PLUS (Pmode, r, n);
+	      operands[i] = replace_equiv_address (operands[i], n);
+	    }
+	}
+    }
+
+  if ((GET_CODE (operands[1]) != REG
+       && MEP_CONTROL_REG (operands[0]))
+      || (GET_CODE (operands[0]) != REG
+	  && MEP_CONTROL_REG (operands[1])))
+    {
+      rtx temp;
+#if DEBUG_EXPAND_MOV
+      fprintf (stderr, "cr-mem, forcing op1 to reg\n");
+#endif
+      temp = gen_reg_rtx (mode);
+      emit_move_insn (temp, operands[1]);
+      operands[1] = temp;
+    }
+
+  if (symbolref_p (operands[0])
+      && (mep_section_tag (XEXP (operands[0], 0)) == 'f'
+	  || (GET_MODE_SIZE (mode) != 4)))
+    {
+      rtx temp;
+
+      gcc_assert (!reload_in_progress && !reload_completed);
+
+      temp = force_reg (Pmode, XEXP (operands[0], 0));
+      operands[0] = replace_equiv_address (operands[0], temp);
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+
+  if (!post_reload && (tag[1] == 't' || tag[1] == 'b'))
+    tag[1] = 0;
+
+  if (symbol_p (operands[1])
+      && (tag[1] == 'f' || tag[1] == 't' || tag[1] == 'b'))
+    {
+      emit_insn (gen_movsi_topsym_s (operands[0], operands[1]));
+      emit_insn (gen_movsi_botsym_s (operands[0], operands[0], operands[1]));
+      return true;
+    }
+
+  if (symbolref_p (operands[1])
+      && (tag[1] == 'f' || tag[1] == 't' || tag[1] == 'b'))
+    {
+      rtx temp;
+
+      if (reload_in_progress || reload_completed)
+	temp = operands[0];
+      else
+	temp = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_movsi_topsym_s (temp, operands[1]));
+      emit_insn (gen_movsi_botsym_s (temp, temp, operands[1]));
+      emit_move_insn (operands[0], replace_equiv_address (operands[1], temp));
+      return true;
+    }
+
+  return false;
+}
+
+/* Cases where the pattern can't be made to use at all.  */
+
+bool
+mep_mov_ok (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int i;
+
+#define DEBUG_MOV_OK 0
+#if DEBUG_MOV_OK
+  fprintf (stderr, "mep_mov_ok %s %c=%c\n", mode_name[mode], mep_section_tag (operands[0]),
+	   mep_section_tag (operands[1]));
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  /* We want the movh patterns to get these.  */
+  if (GET_CODE (operands[1]) == HIGH)
+    return false;
+
+  /* We can't store a register to a far variable without using a
+     scratch register to hold the address.  Using far variables should
+     be split by mep_emit_mov anyway.  */
+  if (mep_section_tag (operands[0]) == 'f'
+      || mep_section_tag (operands[1]) == 'f')
+    {
+#if DEBUG_MOV_OK
+      fprintf (stderr, " - no, f\n");
+#endif
+      return false;
+    }
+  i = mep_section_tag (operands[1]);
+  if ((i == 'b' || i == 't') && !reload_completed && !reload_in_progress)
+    /* These are supposed to be generated with adds of the appropriate
+       register.  During and after reload, however, we allow them to
+       be accessed as normal symbols because adding a dependency on
+       the base register now might cause problems.  */
+    {
+#if DEBUG_MOV_OK
+      fprintf (stderr, " - no, bt\n");
+#endif
+      return false;
+    }
+
+  /* The only moves we can allow involve at least one general
+     register, so require it.  */
+  for (i = 0; i < 2; i ++)
+    {
+      /* Allow subregs too, before reload.  */
+      rtx x = operands[i];
+
+      if (GET_CODE (x) == SUBREG)
+	x = XEXP (x, 0);
+      if (GET_CODE (x) == REG
+	  && ! MEP_CONTROL_REG (x))
+	{
+#if DEBUG_MOV_OK
+	  fprintf (stderr, " - ok\n");
+#endif
+	  return true;
+	}
+    }
+#if DEBUG_MOV_OK
+  fprintf (stderr, " - no, no gen reg\n");
+#endif
+  return false;
+}
+
+#define DEBUG_SPLIT_WIDE_MOVE 0
+void
+mep_split_wide_move (rtx *operands, enum machine_mode mode)
+{
+  int i;
+
+#if DEBUG_SPLIT_WIDE_MOVE
+  fprintf (stderr, "\n\033[34mmep_split_wide_move\033[0m mode %s\n", mode_name[mode]);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  for (i = 0; i <= 1; i++)
+    {
+      rtx op = operands[i], hi, lo;
+
+      switch (GET_CODE (op))
+	{
+	case REG:
+	  {
+	    unsigned int regno = REGNO (op);
+
+	    if (TARGET_64BIT_CR_REGS && CR_REGNO_P (regno))
+	      {
+		rtx i32;
+
+		lo = gen_rtx_REG (SImode, regno);
+		i32 = GEN_INT (32);
+		hi = gen_rtx_ZERO_EXTRACT (SImode,
+					   gen_rtx_REG (DImode, regno),
+					   i32, i32);
+	      }
+	    else
+	      {
+		hi = gen_rtx_REG (SImode, regno + TARGET_LITTLE_ENDIAN);
+		lo = gen_rtx_REG (SImode, regno + TARGET_BIG_ENDIAN);
+	      }
+	  }
+	  break;
+
+	case CONST_INT:
+	case CONST_DOUBLE:
+	case MEM:
+	  hi = operand_subword (op, TARGET_LITTLE_ENDIAN, 0, mode);
+	  lo = operand_subword (op, TARGET_BIG_ENDIAN, 0, mode);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* The high part of CR <- GPR moves must be done after the low part.  */
+      operands [i + 4] = lo;
+      operands [i + 2] = hi;
+    }
+
+  if (reg_mentioned_p (operands[2], operands[5])
+      || GET_CODE (operands[2]) == ZERO_EXTRACT
+      || GET_CODE (operands[4]) == ZERO_EXTRACT)
+    {
+      rtx tmp;
+
+      /* Overlapping register pairs -- make sure we don't
+	 early-clobber ourselves.  */
+      tmp = operands[2];
+      operands[2] = operands[4];
+      operands[4] = tmp;
+      tmp = operands[3];
+      operands[3] = operands[5];
+      operands[5] = tmp;
+    }
+
+#if DEBUG_SPLIT_WIDE_MOVE
+  fprintf(stderr, "\033[34m");
+  debug_rtx (operands[2]);
+  debug_rtx (operands[3]);
+  debug_rtx (operands[4]);
+  debug_rtx (operands[5]);
+  fprintf(stderr, "\033[0m");
+#endif
+}
+
+/* Emit a setcc instruction in its entirity.  */
+
+static bool
+mep_expand_setcc_1 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
+{
+  rtx tmp;
+
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      tmp = op1, op1 = op2, op2 = tmp;
+      code = swap_condition (code);
+      /* FALLTHRU */
+
+    case LT:
+    case LTU:
+      op1 = force_reg (SImode, op1);
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_fmt_ee (code, SImode, op1, op2)));
+      return true;
+
+    case EQ:
+      if (op2 != const0_rtx)
+	op1 = expand_binop (SImode, sub_optab, op1, op2, NULL, 1, OPTAB_WIDEN);
+      mep_expand_setcc_1 (LTU, dest, op1, const1_rtx);
+      return true;
+
+    case NE:
+      /* Branchful sequence:
+		mov dest, 0		16-bit
+		beq op1, op2, Lover	16-bit (op2 < 16), 32-bit otherwise
+		mov dest, 1		16-bit
+
+	 Branchless sequence:
+		add3 tmp, op1, -op2	32-bit (or mov + sub)
+		sltu3 tmp, tmp, 1	16-bit
+		xor3 dest, tmp, 1	32-bit
+	*/
+      if (optimize_size && op2 != const0_rtx)
+	return false;
+
+      if (op2 != const0_rtx)
+	op1 = expand_binop (SImode, sub_optab, op1, op2, NULL, 1, OPTAB_WIDEN);
+
+      op2 = gen_reg_rtx (SImode);
+      mep_expand_setcc_1 (LTU, op2, op1, const1_rtx);
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_XOR (SImode, op2, const1_rtx)));
+      return true;
+
+    case LE:
+      if (GET_CODE (op2) != CONST_INT
+	  || INTVAL (op2) == 0x7ffffff)
+	return false;
+      op2 = GEN_INT (INTVAL (op2) + 1);
+      return mep_expand_setcc_1 (LT, dest, op1, op2);
+
+    case LEU:
+      if (GET_CODE (op2) != CONST_INT
+	  || INTVAL (op2) == -1)
+	return false;
+      op2 = GEN_INT (trunc_int_for_mode (INTVAL (op2) + 1, SImode));
+      return mep_expand_setcc_1 (LTU, dest, op1, op2);
+
+    case GE:
+      if (GET_CODE (op2) != CONST_INT
+	  || INTVAL (op2) == trunc_int_for_mode (0x80000000, SImode))
+	return false;
+      op2 = GEN_INT (INTVAL (op2) - 1);
+      return mep_expand_setcc_1 (GT, dest, op1, op2);
+
+    case GEU:
+      if (GET_CODE (op2) != CONST_INT
+	  || op2 == const0_rtx)
+	return false;
+      op2 = GEN_INT (trunc_int_for_mode (INTVAL (op2) - 1, SImode));
+      return mep_expand_setcc_1 (GTU, dest, op1, op2);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+bool
+mep_expand_setcc (rtx *operands)
+{
+  rtx dest = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  return mep_expand_setcc_1 (code, dest, op0, op1);
+}
+
+rtx
+mep_expand_cbranch (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx tmp;
+
+ restart:
+  switch (code)
+    {
+    case LT:
+      if (mep_imm4_operand (op1, SImode))
+	break;
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LT, tmp, op0, op1));
+      code = NE;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case GE:
+      if (mep_imm4_operand (op1, SImode))
+	break;
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LT, tmp, op0, op1));
+
+      code = EQ;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case EQ:
+    case NE:
+      if (! mep_reg_or_imm4_operand (op1, SImode))
+	op1 = force_reg (SImode, op1);
+      break;
+
+    case LE:
+    case GT:
+      if (GET_CODE (op1) == CONST_INT
+	  && INTVAL (op1) != 0x7fffffff)
+	{
+	  op1 = GEN_INT (INTVAL (op1) + 1);
+	  code = (code == LE ? LT : GE);
+	  goto restart;
+	}
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LT, tmp, op1, op0));
+
+      code = (code == LE ? EQ : NE);
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case LTU:
+      if (op1 == const1_rtx)
+	{
+	  code = EQ;
+	  op1 = const0_rtx;
+	  break;
+	}
+
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (LTU, tmp, op0, op1));
+      code = NE;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case LEU:
+      tmp = gen_reg_rtx (SImode);
+      if (mep_expand_setcc_1 (LEU, tmp, op0, op1))
+	code = NE;
+      else if (mep_expand_setcc_1 (LTU, tmp, op1, op0))
+	code = EQ;
+      else
+	gcc_unreachable ();
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case GTU:
+      tmp = gen_reg_rtx (SImode);
+      gcc_assert (mep_expand_setcc_1 (GTU, tmp, op0, op1)
+		  || mep_expand_setcc_1 (LTU, tmp, op1, op0));
+      code = NE;
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    case GEU:
+      tmp = gen_reg_rtx (SImode);
+      if (mep_expand_setcc_1 (GEU, tmp, op0, op1))
+	code = NE;
+      else if (mep_expand_setcc_1 (LTU, tmp, op0, op1))
+	code = EQ;
+      else
+	gcc_unreachable ();
+      op0 = tmp;
+      op1 = const0_rtx;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+}
+
+const char *
+mep_emit_cbranch (rtx *operands, int ne)
+{
+  if (GET_CODE (operands[1]) == REG)
+    return ne ? "bne\t%0, %1, %l2" : "beq\t%0, %1, %l2";
+  else if (INTVAL (operands[1]) == 0 && !mep_vliw_function_p(cfun->decl))
+    return ne ? "bnez\t%0, %l2" : "beqz\t%0, %l2";
+  else
+    return ne ? "bnei\t%0, %1, %l2" : "beqi\t%0, %1, %l2";
+}
+
+void
+mep_expand_call (rtx *operands, int returns_value)
+{
+  rtx addr = operands[returns_value];
+  rtx tp = mep_tp_rtx ();
+  rtx gp = mep_gp_rtx ();
+
+  gcc_assert (GET_CODE (addr) == MEM);
+
+  addr = XEXP (addr, 0);
+
+  if (! mep_call_address_operand (addr, VOIDmode))
+    addr = force_reg (SImode, addr);
+
+  if (! operands[returns_value+2])
+    operands[returns_value+2] = const0_rtx;
+
+  if (returns_value)
+    emit_call_insn (gen_call_value_internal (operands[0], addr, operands[2],
+					     operands[3], tp, gp));
+  else
+    emit_call_insn (gen_call_internal (addr, operands[1],
+				       operands[2], tp, gp));
+}
+
+/* Aliasing Support.  */
+
+/* If X is a machine specific address (i.e. a symbol or label being
+   referenced as a displacement from the GOT implemented using an
+   UNSPEC), then return the base term.  Otherwise return X.  */
+
+rtx
+mep_find_base_term (rtx x)
+{
+  rtx base, term;
+  int unspec;
+
+  if (GET_CODE (x) != PLUS)
+    return x;
+  base = XEXP (x, 0);
+  term = XEXP (x, 1);
+
+  if (has_hard_reg_initial_val(Pmode, TP_REGNO)
+      && base == mep_tp_rtx ())
+    unspec = UNS_TPREL;
+  else if (has_hard_reg_initial_val(Pmode, GP_REGNO)
+	   && base == mep_gp_rtx ())
+    unspec = UNS_GPREL;
+  else
+    return x;
+
+  if (GET_CODE (term) != CONST)
+    return x;
+  term = XEXP (term, 0);
+
+  if (GET_CODE (term) != UNSPEC
+      || XINT (term, 1) != unspec)
+    return x;
+
+  return XVECEXP (term, 0, 0);
+}
+
+/* Reload Support.  */
+
+/* Return true if the registers in CLASS cannot represent the change from
+   modes FROM to TO.  */
+
+bool
+mep_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			       enum reg_class regclass)
+{
+  if (from == to)
+    return false;
+
+  /* 64-bit COP regs must remain 64-bit COP regs.  */
+  if (TARGET_64BIT_CR_REGS
+      && (regclass == CR_REGS
+	  || regclass == LOADABLE_CR_REGS)
+      && (GET_MODE_SIZE (to) < 8
+	  || GET_MODE_SIZE (from) < 8))
+    return true;
+
+  return false;
+}
+
+#define MEP_NONGENERAL_CLASS(C) (!reg_class_subset_p (C, GENERAL_REGS))
+
+static bool
+mep_general_reg (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return GET_CODE (x) == REG && GR_REGNO_P (REGNO (x));
+}
+
+static bool
+mep_nongeneral_reg (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return (GET_CODE (x) == REG
+	  && !GR_REGNO_P (REGNO (x)) && REGNO (x) < FIRST_PSEUDO_REGISTER);
+}
+
+static bool
+mep_general_copro_reg (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return (GET_CODE (x) == REG && CR_REGNO_P (REGNO (x)));
+}
+
+static bool
+mep_nonregister (rtx x)
+{
+  while (GET_CODE (x) == SUBREG)
+    x = XEXP (x, 0);
+  return (GET_CODE (x) != REG || REGNO (x) >= FIRST_PSEUDO_REGISTER);
+}
+
+#define DEBUG_RELOAD 0
+
+/* Return the secondary reload class needed for moving value X to or
+   from a register in coprocessor register class CLASS.  */
+
+static enum reg_class
+mep_secondary_copro_reload_class (enum reg_class rclass, rtx x)
+{
+  if (mep_general_reg (x))
+    /* We can do the move directly if mep_have_core_copro_moves_p,
+       otherwise we need to go through memory.  Either way, no secondary
+       register is needed.  */
+    return NO_REGS;
+
+  if (mep_general_copro_reg (x))
+    {
+      /* We can do the move directly if mep_have_copro_copro_moves_p.  */
+      if (mep_have_copro_copro_moves_p)
+	return NO_REGS;
+
+      /* Otherwise we can use a temporary if mep_have_core_copro_moves_p.  */
+      if (mep_have_core_copro_moves_p)
+	return GENERAL_REGS;
+
+      /* Otherwise we need to do it through memory.  No secondary
+	 register is needed.  */
+      return NO_REGS;
+    }
+
+  if (reg_class_subset_p (rclass, LOADABLE_CR_REGS)
+      && constraint_satisfied_p (x, CONSTRAINT_U))
+    /* X is a memory value that we can access directly.  */
+    return NO_REGS;
+
+  /* We have to move X into a GPR first and then copy it to
+     the coprocessor register.  The move from the GPR to the
+     coprocessor might be done directly or through memory,
+     depending on mep_have_core_copro_moves_p. */
+  return GENERAL_REGS;
+}
+
+/* Copying X to register in RCLASS.  */
+
+enum reg_class
+mep_secondary_input_reload_class (enum reg_class rclass,
+				  enum machine_mode mode ATTRIBUTE_UNUSED,
+				  rtx x)
+{
+  int rv = NO_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "secondary input reload copy to %s %s from ", reg_class_names[rclass], mode_name[mode]);
+  debug_rtx (x);
+#endif
+
+  if (reg_class_subset_p (rclass, CR_REGS))
+    rv = mep_secondary_copro_reload_class (rclass, x);
+  else if (MEP_NONGENERAL_CLASS (rclass)
+	   && (mep_nonregister (x) || mep_nongeneral_reg (x)))
+    rv = GENERAL_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, " - requires %s\n", reg_class_names[rv]);
+#endif
+  return (enum reg_class) rv;
+}
+
+/* Copying register in RCLASS to X.  */
+
+enum reg_class
+mep_secondary_output_reload_class (enum reg_class rclass,
+				   enum machine_mode mode ATTRIBUTE_UNUSED,
+				   rtx x)
+{
+  int rv = NO_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "secondary output reload copy from %s %s to ", reg_class_names[rclass], mode_name[mode]);
+  debug_rtx (x);
+#endif
+
+  if (reg_class_subset_p (rclass, CR_REGS))
+    rv = mep_secondary_copro_reload_class (rclass, x);
+  else if (MEP_NONGENERAL_CLASS (rclass)
+	   && (mep_nonregister (x) || mep_nongeneral_reg (x)))
+    rv = GENERAL_REGS;
+
+#if DEBUG_RELOAD
+  fprintf (stderr, " - requires %s\n", reg_class_names[rv]);
+#endif
+
+  return (enum reg_class) rv;
+}
+
+/* Implement SECONDARY_MEMORY_NEEDED.  */
+
+bool
+mep_secondary_memory_needed (enum reg_class rclass1, enum reg_class rclass2,
+			     enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (!mep_have_core_copro_moves_p)
+    {
+      if (reg_classes_intersect_p (rclass1, CR_REGS)
+	  && reg_classes_intersect_p (rclass2, GENERAL_REGS))
+	return true;
+      if (reg_classes_intersect_p (rclass2, CR_REGS)
+	  && reg_classes_intersect_p (rclass1, GENERAL_REGS))
+	return true;
+      if (!mep_have_copro_copro_moves_p
+	  && reg_classes_intersect_p (rclass1, CR_REGS)
+	  && reg_classes_intersect_p (rclass2, CR_REGS))
+	return true;
+    }
+  return false;
+}
+
+void
+mep_expand_reload (rtx *operands, enum machine_mode mode)
+{
+  /* There are three cases for each direction:
+     register, farsym
+     control, farsym
+     control, nearsym */
+
+  int s0 = mep_section_tag (operands[0]) == 'f';
+  int s1 = mep_section_tag (operands[1]) == 'f';
+  int c0 = mep_nongeneral_reg (operands[0]);
+  int c1 = mep_nongeneral_reg (operands[1]);
+  int which = (s0 ? 20:0) + (c0 ? 10:0) + (s1 ? 2:0) + (c1 ? 1:0);
+
+#if DEBUG_RELOAD
+  fprintf (stderr, "expand_reload %s\n", mode_name[mode]);
+  debug_rtx (operands[0]);
+  debug_rtx (operands[1]);
+#endif
+
+  switch (which)
+    {
+    case 00: /* Don't know why this gets here.  */
+    case 02: /* general = far */
+      emit_move_insn (operands[0], operands[1]);
+      return;
+
+    case 10: /* cr = mem */
+    case 11: /* cr = cr */
+    case 01: /* mem = cr */
+    case 12: /* cr = far */
+      emit_move_insn (operands[2], operands[1]);
+      emit_move_insn (operands[0], operands[2]);
+      return;
+
+    case 20: /* far = general */
+      emit_move_insn (operands[2], XEXP (operands[1], 0));
+      emit_move_insn (operands[0], gen_rtx_MEM (mode, operands[2]));
+      return;
+
+    case 21: /* far = cr */
+    case 22: /* far = far */
+    default:
+      fprintf (stderr, "unsupported expand reload case %02d for mode %s\n",
+	       which, mode_name[mode]);
+      debug_rtx (operands[0]);
+      debug_rtx (operands[1]);
+      gcc_unreachable ();
+    }
+}
+
+/* Implement PREFERRED_RELOAD_CLASS.  See whether X is a constant that
+   can be moved directly into registers 0 to 7, but not into the rest.
+   If so, and if the required class includes registers 0 to 7, restrict
+   it to those registers.  */
+
+enum reg_class
+mep_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      if (INTVAL (x) >= 0x10000
+	  && INTVAL (x) < 0x01000000
+	  && (INTVAL (x) & 0xffff) != 0
+	  && reg_class_subset_p (TPREL_REGS, rclass))
+	rclass = TPREL_REGS;
+      break;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (mep_section_tag (x) != 'f'
+	  && reg_class_subset_p (TPREL_REGS, rclass))
+	rclass = TPREL_REGS;
+      break;
+
+    default:
+      break;
+    }
+  return rclass;
+}
+
+/* Implement REGISTER_MOVE_COST.  Return 2 for direct single-register
+   moves, 4 for direct double-register moves, and 1000 for anything
+   that requires a temporary register or temporary stack slot.  */
+
+int
+mep_register_move_cost (enum machine_mode mode, enum reg_class from, enum reg_class to)
+{
+  if (mep_have_copro_copro_moves_p
+      && reg_class_subset_p (from, CR_REGS)
+      && reg_class_subset_p (to, CR_REGS))
+    {
+      if (TARGET_32BIT_CR_REGS && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return 4;
+      return 2;
+    }
+  if (reg_class_subset_p (from, CR_REGS)
+      && reg_class_subset_p (to, CR_REGS))
+    {
+      if (TARGET_32BIT_CR_REGS && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return 8;
+      return 4;
+    }
+  if (reg_class_subset_p (from, CR_REGS)
+      || reg_class_subset_p (to, CR_REGS))
+    {
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return 4;
+      return 2;
+    }
+  if (mep_secondary_memory_needed (from, to, mode))
+    return 1000;
+  if (MEP_NONGENERAL_CLASS (from) && MEP_NONGENERAL_CLASS (to))
+    return 1000;
+
+  if (GET_MODE_SIZE (mode) > 4)
+    return 4;
+
+  return 2;
+}
+
+
+/* Functions to save and restore machine-specific function data.  */
+
+static struct machine_function *
+mep_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+static rtx
+mep_allocate_initial_value (rtx reg)
+{
+  int rss;
+
+  if (GET_CODE (reg) != REG)
+    return NULL_RTX;
+
+  if (REGNO (reg) >= FIRST_PSEUDO_REGISTER)
+    return NULL_RTX;
+
+  /* In interrupt functions, the "initial" values of $gp and $tp are
+     provided by the prologue.  They are not necessarily the same as
+     the values that the caller was using.  */
+  if (REGNO (reg) == TP_REGNO || REGNO (reg) == GP_REGNO)
+    if (mep_interrupt_p ())
+      return NULL_RTX;
+
+  if (! cfun->machine->reg_save_slot[REGNO(reg)])
+    {
+      cfun->machine->reg_save_size += 4;
+      cfun->machine->reg_save_slot[REGNO(reg)] = cfun->machine->reg_save_size;
+    }
+
+  rss = cfun->machine->reg_save_slot[REGNO(reg)];
+  return gen_rtx_MEM (SImode, plus_constant (arg_pointer_rtx, -rss));
+}
+
+rtx
+mep_return_addr_rtx (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, LP_REGNO);
+}
+
+static rtx
+mep_tp_rtx (void)
+{
+  return get_hard_reg_initial_val (Pmode, TP_REGNO);
+}
+
+static rtx
+mep_gp_rtx (void)
+{
+  return get_hard_reg_initial_val (Pmode, GP_REGNO);
+}
+
+static bool
+mep_interrupt_p (void)
+{
+  if (cfun->machine->interrupt_handler == 0)
+    {
+      int interrupt_handler
+	= (lookup_attribute ("interrupt",
+			     DECL_ATTRIBUTES (current_function_decl))
+	   != NULL_TREE);
+      cfun->machine->interrupt_handler = interrupt_handler ? 2 : 1;
+    }
+  return cfun->machine->interrupt_handler == 2;
+}
+
+static bool
+mep_disinterrupt_p (void)
+{
+  if (cfun->machine->disable_interrupts == 0)
+    {
+      int disable_interrupts
+	= (lookup_attribute ("disinterrupt",
+			     DECL_ATTRIBUTES (current_function_decl))
+	   != NULL_TREE);
+      cfun->machine->disable_interrupts = disable_interrupts ? 2 : 1;
+    }
+  return cfun->machine->disable_interrupts == 2;
+}
+
+
+/* Frame/Epilog/Prolog Related.  */
+
+static bool
+mep_reg_set_p (rtx reg, rtx insn)
+{
+  /* Similar to reg_set_p in rtlanal.c, but we ignore calls */
+  if (INSN_P (insn))
+    {
+      if (FIND_REG_INC_NOTE (insn, reg))
+	return true;
+      insn = PATTERN (insn);
+    }
+
+  if (GET_CODE (insn) == SET
+      && GET_CODE (XEXP (insn, 0)) == REG
+      && GET_CODE (XEXP (insn, 1)) == REG
+      && REGNO (XEXP (insn, 0)) == REGNO (XEXP (insn, 1)))
+    return false;
+
+  return set_of (reg, insn) != NULL_RTX;
+}
+
+
+#define MEP_SAVES_UNKNOWN 0
+#define MEP_SAVES_YES 1
+#define MEP_SAVES_MAYBE 2
+#define MEP_SAVES_NO 3
+
+static bool
+mep_reg_set_in_function (int regno)
+{
+  rtx reg, insn;
+
+  if (mep_interrupt_p () && df_regs_ever_live_p(regno))
+    return true;
+
+  if (regno == LP_REGNO && (profile_arc_flag > 0 || profile_flag > 0))
+    return true;
+
+  push_topmost_sequence ();
+  insn = get_insns ();
+  pop_topmost_sequence ();
+
+  if (!insn)
+    return false;
+
+  reg = gen_rtx_REG (SImode, regno);
+
+  for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn) && mep_reg_set_p (reg, insn))
+      return true;
+  return false;
+}
+
+static bool
+mep_asm_without_operands_p (void)
+{
+  if (cfun->machine->asms_without_operands == 0)
+    {
+      rtx insn;
+
+      push_topmost_sequence ();
+      insn = get_insns ();
+      pop_topmost_sequence ();
+
+      cfun->machine->asms_without_operands = 1;
+      while (insn)
+	{
+	  if (INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) == ASM_INPUT)
+	    {
+	      cfun->machine->asms_without_operands = 2;
+	      break;
+	    }
+	  insn = NEXT_INSN (insn);
+	}
+
+    }
+  return cfun->machine->asms_without_operands == 2;
+}
+
+/* Interrupt functions save/restore every call-preserved register, and
+   any call-used register it uses (or all if it calls any function,
+   since they may get clobbered there too).  Here we check to see
+   which call-used registers need saving.  */
+
+#define IVC2_ISAVED_REG(r) (TARGET_IVC2 \
+			   && (r == FIRST_CCR_REGNO + 1 \
+			       || (r >= FIRST_CCR_REGNO + 8 && r <= FIRST_CCR_REGNO + 11) \
+			       || (r >= FIRST_CCR_REGNO + 16 && r <= FIRST_CCR_REGNO + 31)))
+
+static bool
+mep_interrupt_saved_reg (int r)
+{
+  if (!mep_interrupt_p ())
+    return false;
+  if (r == REGSAVE_CONTROL_TEMP
+      || (TARGET_64BIT_CR_REGS && TARGET_COP && r == REGSAVE_CONTROL_TEMP+1))
+    return true;
+  if (mep_asm_without_operands_p ()
+      && (!fixed_regs[r]
+	  || (r == RPB_REGNO || r == RPE_REGNO || r == RPC_REGNO || r == LP_REGNO)
+	  || IVC2_ISAVED_REG (r)))
+    return true;
+  if (!current_function_is_leaf)
+    /* Function calls mean we need to save $lp.  */
+    if (r == LP_REGNO || IVC2_ISAVED_REG (r))
+      return true;
+  if (!current_function_is_leaf || cfun->machine->doloop_tags > 0)
+    /* The interrupt handler might use these registers for repeat blocks,
+       or it might call a function that does so.  */
+    if (r == RPB_REGNO || r == RPE_REGNO || r == RPC_REGNO)
+      return true;
+  if (current_function_is_leaf && call_used_regs[r] && !df_regs_ever_live_p(r))
+    return false;
+  /* Functions we call might clobber these.  */
+  if (call_used_regs[r] && !fixed_regs[r])
+    return true;
+  /* Additional registers that need to be saved for IVC2.  */
+  if (IVC2_ISAVED_REG (r))
+    return true;
+
+  return false;
+}
+
+static bool
+mep_call_saves_register (int r)
+{
+  if (! cfun->machine->frame_locked)
+    {
+      int rv = MEP_SAVES_NO;
+
+      if (cfun->machine->reg_save_slot[r])
+  	rv = MEP_SAVES_YES;
+      else if (r == LP_REGNO && (profile_arc_flag > 0 || profile_flag > 0))
+	rv = MEP_SAVES_YES;
+      else if (r == FRAME_POINTER_REGNUM && frame_pointer_needed)
+	rv = MEP_SAVES_YES;
+      else if ((!call_used_regs[r] || r == LP_REGNO) && df_regs_ever_live_p(r))
+	rv = MEP_SAVES_YES;
+      else if (crtl->calls_eh_return && (r == 10 || r == 11))
+	/* We need these to have stack slots so that they can be set during
+	   unwinding.  */
+	rv = MEP_SAVES_YES;
+      else if (mep_interrupt_saved_reg (r))
+	rv = MEP_SAVES_YES;
+      cfun->machine->reg_saved[r] = rv;
+    }
+  return cfun->machine->reg_saved[r] == MEP_SAVES_YES;
+}
+
+/* Return true if epilogue uses register REGNO.  */
+
+bool
+mep_epilogue_uses (int regno)
+{
+  /* Since $lp is a call-saved register, the generic code will normally
+     mark it used in the epilogue if it needs to be saved and restored.
+     However, when profiling is enabled, the profiling code will implicitly
+     clobber $11.  This case has to be handled specially both here and in
+     mep_call_saves_register.  */
+  if (regno == LP_REGNO && (profile_arc_flag > 0 || profile_flag > 0))
+    return true;
+  /* Interrupt functions save/restore pretty much everything.  */
+  return (reload_completed && mep_interrupt_saved_reg (regno));
+}
+
+static int
+mep_reg_size (int regno)
+{
+  if (CR_REGNO_P (regno) && TARGET_64BIT_CR_REGS)
+    return 8;
+  return 4;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+bool
+mep_can_eliminate (const int from, const int to)
+{
+  return  (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+           ? ! frame_pointer_needed
+           : true);
+}
+
+int
+mep_elimination_offset (int from, int to)
+{
+  int reg_save_size;
+  int i;
+  int frame_size = get_frame_size () + crtl->outgoing_args_size;
+  int total_size;
+
+  if (!cfun->machine->frame_locked)
+    memset (cfun->machine->reg_saved, 0, sizeof (cfun->machine->reg_saved));
+
+  /* We don't count arg_regs_to_save in the arg pointer offset, because
+     gcc thinks the arg pointer has moved along with the saved regs.
+     However, we do count it when we adjust $sp in the prologue.  */
+  reg_save_size = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (mep_call_saves_register (i))
+      reg_save_size += mep_reg_size (i);
+
+  if (reg_save_size % 8)
+    cfun->machine->regsave_filler = 8 - (reg_save_size % 8);
+  else
+    cfun->machine->regsave_filler = 0;
+
+  /* This is what our total stack adjustment looks like.  */
+  total_size = (reg_save_size + frame_size + cfun->machine->regsave_filler);
+
+  if (total_size % 8)
+    cfun->machine->frame_filler = 8 - (total_size % 8);
+  else
+    cfun->machine->frame_filler = 0;
+
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    return reg_save_size + cfun->machine->regsave_filler;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return cfun->machine->frame_filler + frame_size;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return reg_save_size + cfun->machine->regsave_filler + cfun->machine->frame_filler + frame_size;
+
+  gcc_unreachable ();
+}
+
+static rtx
+F (rtx x)
+{
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Since the prologue/epilogue code is generated after optimization,
+   we can't rely on gcc to split constants for us.  So, this code
+   captures all the ways to add a constant to a register in one logic
+   chunk, including optimizing away insns we just don't need.  This
+   makes the prolog/epilog code easier to follow.  */
+static void
+add_constant (int dest, int src, int value, int mark_frame)
+{
+  rtx insn;
+  int hi, lo;
+
+  if (src == dest && value == 0)
+    return;
+
+  if (value == 0)
+    {
+      insn = emit_move_insn (gen_rtx_REG (SImode, dest),
+			     gen_rtx_REG (SImode, src));
+      if (mark_frame)
+	RTX_FRAME_RELATED_P(insn) = 1;
+      return;
+    }
+
+  if (value >= -32768 && value <= 32767)
+    {
+      insn = emit_insn (gen_addsi3 (gen_rtx_REG (SImode, dest),
+				    gen_rtx_REG (SImode, src),
+				    GEN_INT (value)));
+      if (mark_frame)
+	RTX_FRAME_RELATED_P(insn) = 1;
+      return;
+    }
+
+  /* Big constant, need to use a temp register.  We use
+     REGSAVE_CONTROL_TEMP because it's call clobberable (the reg save
+     area is always small enough to directly add to).  */
+
+  hi = trunc_int_for_mode (value & 0xffff0000, SImode);
+  lo = value & 0xffff;
+
+  insn = emit_move_insn (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+			 GEN_INT (hi));
+
+  if (lo)
+    {
+      insn = emit_insn (gen_iorsi3 (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+				    gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+				    GEN_INT (lo)));
+    }
+
+  insn = emit_insn (gen_addsi3 (gen_rtx_REG (SImode, dest),
+				gen_rtx_REG (SImode, src),
+				gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP)));
+  if (mark_frame)
+    {
+      RTX_FRAME_RELATED_P(insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (SImode,
+				 gen_rtx_REG (SImode, dest),
+				 gen_rtx_PLUS (SImode,
+					       gen_rtx_REG (SImode, dest),
+					       GEN_INT (value))));
+    }
+}
+
+/* Move SRC to DEST.  Mark the move as being potentially dead if
+   MAYBE_DEAD_P.  */
+
+static rtx
+maybe_dead_move (rtx dest, rtx src, bool ATTRIBUTE_UNUSED maybe_dead_p)
+{
+  rtx insn = emit_move_insn (dest, src);
+#if 0
+  if (maybe_dead_p)
+    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
+#endif
+  return insn;
+}
+
+/* Used for interrupt functions, which can't assume that $tp and $gp
+   contain the correct pointers.  */
+
+static void
+mep_reload_pointer (int regno, const char *symbol)
+{
+  rtx reg, sym;
+
+  if (!df_regs_ever_live_p(regno) && current_function_is_leaf)
+    return;
+
+  reg = gen_rtx_REG (SImode, regno);
+  sym = gen_rtx_SYMBOL_REF (SImode, symbol);
+  emit_insn (gen_movsi_topsym_s (reg, sym));
+  emit_insn (gen_movsi_botsym_s (reg, reg, sym));
+}
+
+/* Assign save slots for any register not already saved.  DImode
+   registers go at the end of the reg save area; the rest go at the
+   beginning.  This is for alignment purposes.  Returns true if a frame
+   is really needed.  */
+static bool
+mep_assign_save_slots (int reg_save_size)
+{
+  bool really_need_stack_frame = false;
+  int di_ofs = 0;
+  int i;
+
+  for (i=0; i<FIRST_PSEUDO_REGISTER; i++)
+    if (mep_call_saves_register(i))
+      {
+	int regsize = mep_reg_size (i);
+
+	if ((i != TP_REGNO && i != GP_REGNO && i != LP_REGNO)
+	    || mep_reg_set_in_function (i))
+	  really_need_stack_frame = true;
+
+	if (cfun->machine->reg_save_slot[i])
+	  continue;
+
+	if (regsize < 8)
+	  {
+	    cfun->machine->reg_save_size += regsize;
+	    cfun->machine->reg_save_slot[i] = cfun->machine->reg_save_size;
+	  }
+	else
+	  {
+	    cfun->machine->reg_save_slot[i] = reg_save_size - di_ofs;
+	    di_ofs += 8;
+	  }
+      }
+  cfun->machine->frame_locked = 1;
+  return really_need_stack_frame;
+}
+
+void
+mep_expand_prologue (void)
+{
+  int i, rss, sp_offset = 0;
+  int reg_save_size;
+  int frame_size;
+  int really_need_stack_frame;
+
+  /* We must not allow register renaming in interrupt functions,
+     because that invalidates the correctness of the set of call-used
+     registers we're going to save/restore.  */
+  mep_set_leaf_registers (mep_interrupt_p () ? 0 : 1);
+
+  if (mep_disinterrupt_p ())
+    emit_insn (gen_mep_disable_int ());
+
+  cfun->machine->mep_frame_pointer_needed = frame_pointer_needed;
+
+  reg_save_size = mep_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM);
+  frame_size = mep_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM);
+  really_need_stack_frame = frame_size;
+
+  really_need_stack_frame |= mep_assign_save_slots (reg_save_size);
+
+  sp_offset = reg_save_size;
+  if (sp_offset + frame_size < 128)
+    sp_offset += frame_size ;
+
+  add_constant (SP_REGNO, SP_REGNO, -sp_offset, 1);
+
+  for (i=0; i<FIRST_PSEUDO_REGISTER; i++)
+    if (mep_call_saves_register(i))
+      {
+	rtx mem;
+	bool maybe_dead_p;
+	enum machine_mode rmode;
+
+	rss = cfun->machine->reg_save_slot[i];
+
+  	if ((i == TP_REGNO || i == GP_REGNO || i == LP_REGNO)
+	    && (!mep_reg_set_in_function (i)
+		&& !mep_interrupt_p ()))
+	  continue;
+
+	if (mep_reg_size (i) == 8)
+	  rmode = DImode;
+	else
+	  rmode = SImode;
+
+	/* If there is a pseudo associated with this register's initial value,
+	   reload might have already spilt it to the stack slot suggested by
+	   ALLOCATE_INITIAL_VALUE.  The moves emitted here can then be safely
+	   deleted as dead.  */
+	mem = gen_rtx_MEM (rmode,
+			   plus_constant (stack_pointer_rtx, sp_offset - rss));
+	maybe_dead_p = rtx_equal_p (mem, has_hard_reg_initial_val (rmode, i));
+
+	if (GR_REGNO_P (i) || LOADABLE_CR_REGNO_P (i))
+	  F(maybe_dead_move (mem, gen_rtx_REG (rmode, i), maybe_dead_p));
+	else if (rmode == DImode)
+	  {
+	    rtx insn;
+	    int be = TARGET_BIG_ENDIAN ? 4 : 0;
+
+	    mem = gen_rtx_MEM (SImode,
+			       plus_constant (stack_pointer_rtx, sp_offset - rss + be));
+
+	    maybe_dead_move (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+			     gen_rtx_REG (SImode, i),
+			     maybe_dead_p);
+	    maybe_dead_move (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP+1),
+			     gen_rtx_ZERO_EXTRACT (SImode,
+						   gen_rtx_REG (DImode, i),
+						   GEN_INT (32),
+						   GEN_INT (32)),
+			     maybe_dead_p);
+	    insn = maybe_dead_move (mem,
+				    gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+				    maybe_dead_p);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode,
+				       copy_rtx (mem),
+				       gen_rtx_REG (rmode, i)));
+	    mem = gen_rtx_MEM (SImode,
+			       plus_constant (stack_pointer_rtx, sp_offset - rss + (4-be)));
+	    insn = maybe_dead_move (mem,
+				    gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP+1),
+				    maybe_dead_p);
+	  }
+	else
+	  {
+	    rtx insn;
+	    maybe_dead_move (gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP),
+			     gen_rtx_REG (rmode, i),
+			     maybe_dead_p);
+	    insn = maybe_dead_move (mem,
+				    gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP),
+				    maybe_dead_p);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode,
+				       copy_rtx (mem),
+				       gen_rtx_REG (rmode, i)));
+	  }
+      }
+  
+  if (frame_pointer_needed)
+    {
+      /* We've already adjusted down by sp_offset.  Total $sp change
+	 is reg_save_size + frame_size.  We want a net change here of
+	 just reg_save_size.  */
+      add_constant (FP_REGNO, SP_REGNO, sp_offset - reg_save_size, 1);
+    }
+
+  add_constant (SP_REGNO, SP_REGNO, sp_offset-(reg_save_size+frame_size), 1);
+
+  if (mep_interrupt_p ())
+    {
+      mep_reload_pointer(GP_REGNO, "__sdabase");
+      mep_reload_pointer(TP_REGNO, "__tpbase");
+    }
+}
+
+static void
+mep_start_function (FILE *file, HOST_WIDE_INT hwi_local)
+{
+  int local = hwi_local;
+  int frame_size = local + crtl->outgoing_args_size;
+  int reg_save_size;
+  int ffill;
+  int i, sp, skip;
+  int sp_offset;
+  int slot_map[FIRST_PSEUDO_REGISTER], si, sj;
+
+  reg_save_size = mep_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM);
+  frame_size = mep_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM);
+  sp_offset = reg_save_size + frame_size;
+
+  ffill = cfun->machine->frame_filler;
+
+  if (cfun->machine->mep_frame_pointer_needed)
+    reg_names[FP_REGNO] = "$fp";
+  else
+    reg_names[FP_REGNO] = "$8";
+
+  if (sp_offset == 0)
+    return;
+
+  if (debug_info_level == DINFO_LEVEL_NONE)
+    {
+      fprintf (file, "\t# frame: %d", sp_offset);
+      if (reg_save_size)
+	fprintf (file, "   %d regs", reg_save_size);
+      if (local)
+	fprintf (file, "   %d locals", local);
+      if (crtl->outgoing_args_size)
+	fprintf (file, "   %d args", crtl->outgoing_args_size);
+      fprintf (file, "\n");
+      return;
+    }
+
+  fprintf (file, "\t#\n");
+  fprintf (file, "\t# Initial Frame Information:\n");
+  if (sp_offset || !frame_pointer_needed)
+    fprintf (file, "\t# Entry   ---------- 0\n");
+
+  /* Sort registers by save slots, so they're printed in the order
+     they appear in memory, not the order they're saved in.  */
+  for (si=0; si<FIRST_PSEUDO_REGISTER; si++)
+    slot_map[si] = si;
+  for (si=0; si<FIRST_PSEUDO_REGISTER-1; si++)
+    for (sj=si+1; sj<FIRST_PSEUDO_REGISTER; sj++)
+      if (cfun->machine->reg_save_slot[slot_map[si]]
+	  > cfun->machine->reg_save_slot[slot_map[sj]])
+	{
+	  int t = slot_map[si];
+	  slot_map[si] = slot_map[sj];
+	  slot_map[sj] = t;
+	}
+
+  sp = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      int rsize;
+      int r = slot_map[i];
+      int rss = cfun->machine->reg_save_slot[r];
+
+      if (!mep_call_saves_register (r))
+	continue;
+
+      if ((r == TP_REGNO || r == GP_REGNO || r == LP_REGNO)
+	  && (!mep_reg_set_in_function (r)
+	      && !mep_interrupt_p ()))
+	continue;
+
+      rsize = mep_reg_size(r);
+      skip = rss - (sp+rsize);
+      if (skip)
+	fprintf (file, "\t#         %3d bytes for alignment\n", skip);
+      fprintf (file, "\t#         %3d bytes for saved %-3s   %3d($sp)\n",
+	       rsize, reg_names[r], sp_offset - rss);
+      sp = rss;
+    }
+
+  skip = reg_save_size - sp;
+  if (skip)
+    fprintf (file, "\t#         %3d bytes for alignment\n", skip);
+
+  if (frame_pointer_needed)
+    fprintf (file, "\t# FP ---> ---------- %d (sp-%d)\n", reg_save_size, sp_offset-reg_save_size);
+  if (local)
+    fprintf (file, "\t#         %3d bytes for local vars\n", local);
+  if (ffill)
+    fprintf (file, "\t#         %3d bytes for alignment\n", ffill);
+  if (crtl->outgoing_args_size)
+    fprintf (file, "\t#         %3d bytes for outgoing args\n",
+	     crtl->outgoing_args_size);
+  fprintf (file, "\t# SP ---> ---------- %d\n", sp_offset);
+  fprintf (file, "\t#\n");
+}
+
+
+static int mep_prevent_lp_restore = 0;
+static int mep_sibcall_epilogue = 0;
+
+void
+mep_expand_epilogue (void)
+{
+  int i, sp_offset = 0;
+  int reg_save_size = 0;
+  int frame_size;
+  int lp_temp = LP_REGNO, lp_slot = -1;
+  int really_need_stack_frame = get_frame_size() + crtl->outgoing_args_size;
+  int interrupt_handler = mep_interrupt_p ();
+
+  if (profile_arc_flag == 2)
+    emit_insn (gen_mep_bb_trace_ret ());
+
+  reg_save_size = mep_elimination_offset (ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM);
+  frame_size = mep_elimination_offset (FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM);
+
+  really_need_stack_frame |= mep_assign_save_slots (reg_save_size);
+
+  if (frame_pointer_needed)
+    {
+      /* If we have a frame pointer, we won't have a reliable stack
+	 pointer (alloca, you know), so rebase SP from FP */
+      emit_move_insn (gen_rtx_REG (SImode, SP_REGNO),
+		      gen_rtx_REG (SImode, FP_REGNO));
+      sp_offset = reg_save_size;
+    }
+  else
+    {
+      /* SP is right under our local variable space.  Adjust it if
+	 needed.  */
+      sp_offset = reg_save_size + frame_size;
+      if (sp_offset >= 128)
+	{
+	  add_constant (SP_REGNO, SP_REGNO, frame_size, 0);
+	  sp_offset -= frame_size;
+	}
+    }
+
+  /* This is backwards so that we restore the control and coprocessor
+     registers before the temporary registers we use to restore
+     them.  */
+  for (i=FIRST_PSEUDO_REGISTER-1; i>=1; i--)
+    if (mep_call_saves_register (i))
+      {
+	enum machine_mode rmode;
+	int rss = cfun->machine->reg_save_slot[i];
+
+	if (mep_reg_size (i) == 8)
+	  rmode = DImode;
+	else
+	  rmode = SImode;
+
+	if ((i == TP_REGNO || i == GP_REGNO || i == LP_REGNO)
+	    && !(mep_reg_set_in_function (i) || interrupt_handler))
+	  continue;
+	if (mep_prevent_lp_restore && i == LP_REGNO)
+	  continue;
+	if (!mep_prevent_lp_restore
+	    && !interrupt_handler
+	    && (i == 10 || i == 11))
+	  continue;
+  
+	if (GR_REGNO_P (i) || LOADABLE_CR_REGNO_P (i))
+	  emit_move_insn (gen_rtx_REG (rmode, i),
+			  gen_rtx_MEM (rmode,
+				       plus_constant (stack_pointer_rtx,
+						      sp_offset-rss)));
+	else
+	  {
+	    if (i == LP_REGNO && !mep_sibcall_epilogue && !interrupt_handler)
+	      /* Defer this one so we can jump indirect rather than
+		 copying the RA to $lp and "ret".  EH epilogues
+		 automatically skip this anyway.  */
+	      lp_slot = sp_offset-rss;
+	    else
+	      {
+		emit_move_insn (gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP),
+				gen_rtx_MEM (rmode,
+					     plus_constant (stack_pointer_rtx,
+							    sp_offset-rss)));
+		emit_move_insn (gen_rtx_REG (rmode, i),
+				gen_rtx_REG (rmode, REGSAVE_CONTROL_TEMP));
+	      }
+	  }
+      }
+  if (lp_slot != -1)
+    {
+      /* Restore this one last so we know it will be in the temp
+	 register when we return by jumping indirectly via the temp.  */
+      emit_move_insn (gen_rtx_REG (SImode, REGSAVE_CONTROL_TEMP),
+		      gen_rtx_MEM (SImode,
+				   plus_constant (stack_pointer_rtx,
+						  lp_slot)));
+      lp_temp = REGSAVE_CONTROL_TEMP;
+    }
+
+
+  add_constant (SP_REGNO, SP_REGNO, sp_offset, 0);
+
+  if (crtl->calls_eh_return && mep_prevent_lp_restore)
+    emit_insn (gen_addsi3 (gen_rtx_REG (SImode, SP_REGNO),
+			   gen_rtx_REG (SImode, SP_REGNO),
+			   cfun->machine->eh_stack_adjust));
+
+  if (mep_sibcall_epilogue)
+    return;
+
+  if (mep_disinterrupt_p ())
+    emit_insn (gen_mep_enable_int ());
+
+  if (mep_prevent_lp_restore)
+    {
+      emit_jump_insn (gen_eh_return_internal ());
+      emit_barrier ();
+    }
+  else if (interrupt_handler)
+    emit_jump_insn (gen_mep_reti ());
+  else
+    emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode, lp_temp)));
+}
+
+void
+mep_expand_eh_return (rtx *operands)
+{
+  if (GET_CODE (operands[0]) != REG || REGNO (operands[0]) != LP_REGNO)
+    {
+      rtx ra = gen_rtx_REG (Pmode, LP_REGNO);
+      emit_move_insn (ra, operands[0]);
+      operands[0] = ra;
+    }
+
+  emit_insn (gen_eh_epilogue (operands[0]));
+}
+
+void
+mep_emit_eh_epilogue (rtx *operands ATTRIBUTE_UNUSED)
+{
+  cfun->machine->eh_stack_adjust = gen_rtx_REG (Pmode, 0);
+  mep_prevent_lp_restore = 1;
+  mep_expand_epilogue ();
+  mep_prevent_lp_restore = 0;
+}
+
+void
+mep_expand_sibcall_epilogue (void)
+{
+  mep_sibcall_epilogue = 1;
+  mep_expand_epilogue ();
+  mep_sibcall_epilogue = 0;
+}
+
+static bool
+mep_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  if (decl == NULL)
+    return false;
+
+  if (mep_section_tag (DECL_RTL (decl)) == 'f')
+    return false;
+
+  /* Can't call to a sibcall from an interrupt or disinterrupt function.  */
+  if (mep_interrupt_p () || mep_disinterrupt_p ())
+    return false;
+
+  return true;
+}
+
+rtx
+mep_return_stackadj_rtx (void)
+{
+  return gen_rtx_REG (SImode, 10);
+}
+
+rtx
+mep_return_handler_rtx (void)
+{
+  return gen_rtx_REG (SImode, LP_REGNO);
+}
+
+void
+mep_function_profiler (FILE *file)
+{
+  /* Always right at the beginning of the function.  */
+  fprintf (file, "\t# mep function profiler\n");
+  fprintf (file, "\tadd\t$sp, -8\n");
+  fprintf (file, "\tsw\t$0, ($sp)\n");
+  fprintf (file, "\tldc\t$0, $lp\n");
+  fprintf (file, "\tsw\t$0, 4($sp)\n");
+  fprintf (file, "\tbsr\t__mep_mcount\n");
+  fprintf (file, "\tlw\t$0, 4($sp)\n");
+  fprintf (file, "\tstc\t$0, $lp\n");
+  fprintf (file, "\tlw\t$0, ($sp)\n");
+  fprintf (file, "\tadd\t$sp, 8\n\n");
+}
+
+const char *
+mep_emit_bb_trace_ret (void)
+{
+  fprintf (asm_out_file, "\t# end of block profiling\n");
+  fprintf (asm_out_file, "\tadd\t$sp, -8\n");
+  fprintf (asm_out_file, "\tsw\t$0, ($sp)\n");
+  fprintf (asm_out_file, "\tldc\t$0, $lp\n");
+  fprintf (asm_out_file, "\tsw\t$0, 4($sp)\n");
+  fprintf (asm_out_file, "\tbsr\t__bb_trace_ret\n");
+  fprintf (asm_out_file, "\tlw\t$0, 4($sp)\n");
+  fprintf (asm_out_file, "\tstc\t$0, $lp\n");
+  fprintf (asm_out_file, "\tlw\t$0, ($sp)\n");
+  fprintf (asm_out_file, "\tadd\t$sp, 8\n\n");
+  return "";
+}
+
+#undef SAVE
+#undef RESTORE
+
+/* Operand Printing.  */
+
+void
+mep_print_operand_address (FILE *stream, rtx address)
+{
+  if (GET_CODE (address) == MEM)
+    address = XEXP (address, 0);
+  else
+    /* cf: gcc.dg/asm-4.c.  */
+    gcc_assert (GET_CODE (address) == REG);
+
+  mep_print_operand (stream, address, 0);
+}
+
+static struct
+{
+  char code;
+  const char *pattern;
+  const char *format;
+}
+const conversions[] =
+{
+  { 0, "r", "0" },
+  { 0, "m+ri", "3(2)" },
+  { 0, "mr", "(1)" },
+  { 0, "ms", "(1)" },
+  { 0, "ml", "(1)" },
+  { 0, "mLrs", "%lo(3)(2)" },
+  { 0, "mLr+si", "%lo(4+5)(2)" },
+  { 0, "m+ru2s", "%tpoff(5)(2)" },
+  { 0, "m+ru3s", "%sdaoff(5)(2)" },
+  { 0, "m+r+u2si", "%tpoff(6+7)(2)" },
+  { 0, "m+ru2+si", "%tpoff(6+7)(2)" },
+  { 0, "m+r+u3si", "%sdaoff(6+7)(2)" },
+  { 0, "m+ru3+si", "%sdaoff(6+7)(2)" },
+  { 0, "mi", "(1)" },
+  { 0, "m+si", "(2+3)" },
+  { 0, "m+li", "(2+3)" },
+  { 0, "i", "0" },
+  { 0, "s", "0" },
+  { 0, "+si", "1+2" },
+  { 0, "+u2si", "%tpoff(3+4)" },
+  { 0, "+u3si", "%sdaoff(3+4)" },
+  { 0, "l", "0" },
+  { 'b', "i", "0" },
+  { 'B', "i", "0" },
+  { 'U', "i", "0" },
+  { 'h', "i", "0" },
+  { 'h', "Hs", "%hi(1)" },
+  { 'I', "i", "0" },
+  { 'I', "u2s", "%tpoff(2)" },
+  { 'I', "u3s", "%sdaoff(2)" },
+  { 'I', "+u2si", "%tpoff(3+4)" },
+  { 'I', "+u3si", "%sdaoff(3+4)" },
+  { 'J', "i", "0" },
+  { 'P', "mr", "(1\\+),\\0" },
+  { 'x', "i", "0" },
+  { 0, 0, 0 }
+};
+
+static int
+unique_bit_in (HOST_WIDE_INT i)
+{
+  switch (i & 0xff)
+    {
+    case 0x01: case 0xfe: return 0;
+    case 0x02: case 0xfd: return 1;
+    case 0x04: case 0xfb: return 2;
+    case 0x08: case 0xf7: return 3;
+    case 0x10: case 0x7f: return 4;
+    case 0x20: case 0xbf: return 5;
+    case 0x40: case 0xdf: return 6;
+    case 0x80: case 0xef: return 7;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int
+bit_size_for_clip (HOST_WIDE_INT i)
+{
+  int rv;
+
+  for (rv = 0; rv < 31; rv ++)
+    if (((HOST_WIDE_INT) 1 << rv) > i)
+      return rv + 1;
+  gcc_unreachable ();
+}
+
+/* Print an operand to a assembler instruction.  */
+
+void
+mep_print_operand (FILE *file, rtx x, int code)
+{
+  int i, j;
+  const char *real_name;
+
+  if (code == '<')
+    {
+      /* Print a mnemonic to do CR <- CR moves.  Find out which intrinsic
+	 we're using, then skip over the "mep_" part of its name.  */
+      const struct cgen_insn *insn;
+
+      if (mep_get_move_insn (mep_cmov, &insn))
+	fputs (cgen_intrinsics[insn->intrinsic] + 4, file);
+      else
+	mep_intrinsic_unavailable (mep_cmov);
+      return;
+    }
+  if (code == 'L')
+    {
+      switch (GET_CODE (x))
+	{
+	case AND:
+	  fputs ("clr", file);
+	  return;
+	case IOR:
+	  fputs ("set", file);
+	  return;
+	case XOR:
+	  fputs ("not", file);
+	  return;
+	default:
+	  output_operand_lossage ("invalid %%L code");
+	}
+    }
+  if (code == 'M')
+    {
+      /* Print the second operand of a CR <- CR move.  If we're using
+	 a two-operand instruction (i.e., a real cmov), then just print
+	 the operand normally.  If we're using a "reg, reg, immediate"
+	 instruction such as caddi3, print the operand followed by a
+	 zero field.  If we're using a three-register instruction,
+	 print the operand twice.  */
+      const struct cgen_insn *insn;
+
+      mep_print_operand (file, x, 0);
+      if (mep_get_move_insn (mep_cmov, &insn)
+	  && insn_data[insn->icode].n_operands == 3)
+	{
+	  fputs (", ", file);
+	  if (insn_data[insn->icode].operand[2].predicate (x, VOIDmode))
+	    mep_print_operand (file, x, 0);
+	  else
+	    mep_print_operand (file, const0_rtx, 0);
+	}
+      return;
+    }
+
+  encode_pattern (x);
+  for (i = 0; conversions[i].pattern; i++)
+    if (conversions[i].code == code
+	&& strcmp(conversions[i].pattern, pattern) == 0)
+      {
+	for (j = 0; conversions[i].format[j]; j++)
+	  if (conversions[i].format[j] == '\\')
+	    {
+	      fputc (conversions[i].format[j+1], file);
+	      j++;
+	    }
+	  else if (ISDIGIT(conversions[i].format[j]))
+	    {
+	      rtx r = patternr[conversions[i].format[j] - '0'];
+	      switch (GET_CODE (r))
+		{
+		case REG:
+		  fprintf (file, "%s", reg_names [REGNO (r)]);
+		  break;
+		case CONST_INT:
+		  switch (code)
+		    {
+		    case 'b':
+		      fprintf (file, "%d", unique_bit_in (INTVAL (r)));
+		      break;
+		    case 'B':
+		      fprintf (file, "%d", bit_size_for_clip (INTVAL (r)));
+		      break;
+		    case 'h':
+		      fprintf (file, "0x%x", ((int) INTVAL (r) >> 16) & 0xffff);
+		      break;
+		    case 'U':
+		      fprintf (file, "%d", bit_size_for_clip (INTVAL (r)) - 1);
+		      break;
+		    case 'J':
+		      fprintf (file, "0x%x", (int) INTVAL (r) & 0xffff);
+		      break;
+		    case 'x':
+		      if (INTVAL (r) & ~(HOST_WIDE_INT)0xff
+			  && !(INTVAL (r) & 0xff))
+			fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL(r));
+		      else
+			fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL(r));
+		      break;
+		    case 'I':
+		      if (INTVAL (r) & ~(HOST_WIDE_INT)0xff
+			  && conversions[i].format[j+1] == 0)
+			{
+			  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (r));
+			  fprintf (file, " # 0x%x", (int) INTVAL(r) & 0xffff);
+			}
+		      else
+			fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL(r));
+		      break;
+		    default:
+		      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL(r));
+		      break;
+		    }
+		  break;
+		case CONST_DOUBLE:
+		  fprintf(file, "[const_double 0x%lx]",
+			  (unsigned long) CONST_DOUBLE_HIGH(r));
+		  break;
+		case SYMBOL_REF:
+		  real_name = targetm.strip_name_encoding (XSTR (r, 0));
+		  assemble_name (file, real_name);
+		  break;
+		case LABEL_REF:
+		  output_asm_label (r);
+		  break;
+		default:
+		  fprintf (stderr, "don't know how to print this operand:");
+		  debug_rtx (r);
+		  gcc_unreachable ();
+		}
+	    }
+	  else
+	    {
+	      if (conversions[i].format[j] == '+'
+		  && (!code || code == 'I')
+		  && ISDIGIT (conversions[i].format[j+1])
+		  && GET_CODE (patternr[conversions[i].format[j+1] - '0']) == CONST_INT
+		  && INTVAL (patternr[conversions[i].format[j+1] - '0']) < 0)
+		continue;
+	      fputc(conversions[i].format[j], file);
+	    }
+	break;
+      }
+  if (!conversions[i].pattern)
+    {
+      error ("unconvertible operand %c %qs", code?code:'-', pattern);
+      debug_rtx(x);
+    }
+
+  return;
+}
+
+void
+mep_final_prescan_insn (rtx insn, rtx *operands ATTRIBUTE_UNUSED,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  /* Despite the fact that MeP is perfectly capable of branching and
+     doing something else in the same bundle, gcc does jump
+     optimization *after* scheduling, so we cannot trust the bundling
+     flags on jump instructions.  */
+  if (GET_MODE (insn) == BImode
+      && get_attr_slots (insn) != SLOTS_CORE)
+    fputc ('+', asm_out_file);
+}
+
+/* Function args in registers.  */
+
+static void
+mep_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+			    enum machine_mode mode ATTRIBUTE_UNUSED,
+			    tree type ATTRIBUTE_UNUSED, int *pretend_size,
+			    int second_time ATTRIBUTE_UNUSED)
+{
+  int nsave = 4 - (cum->nregs + 1);
+
+  if (nsave > 0)
+    cfun->machine->arg_regs_to_save = nsave;
+  *pretend_size = nsave * 4;
+}
+
+static int
+bytesize (const_tree type, enum machine_mode mode)
+{
+  if (mode == BLKmode)
+    return int_size_in_bytes (type);
+  return GET_MODE_SIZE (mode);
+}
+
+static rtx
+mep_expand_builtin_saveregs (void)
+{
+  int bufsize, i, ns;
+  rtx regbuf;
+
+  ns = cfun->machine->arg_regs_to_save;
+  if (TARGET_IVC2)
+    {
+      bufsize = 8 * ((ns + 1) / 2) + 8 * ns;
+      regbuf = assign_stack_local (SImode, bufsize, 64);
+    }
+  else
+    {
+      bufsize = ns * 4;
+      regbuf = assign_stack_local (SImode, bufsize, 32);
+    }
+
+  move_block_from_reg (5-ns, regbuf, ns);
+
+  if (TARGET_IVC2)
+    {
+      rtx tmp = gen_rtx_MEM (DImode, XEXP (regbuf, 0));
+      int ofs = 8 * ((ns+1)/2);
+
+      for (i=0; i<ns; i++)
+	{
+	  int rn = (4-ns) + i + 49;
+	  rtx ptr;
+
+	  ptr = offset_address (tmp, GEN_INT (ofs), 2);
+	  emit_move_insn (ptr, gen_rtx_REG (DImode, rn));
+	  ofs += 8;
+	}
+    }
+  return XEXP (regbuf, 0);
+}
+
+#define VECTOR_TYPE_P(t) (TREE_CODE(t) == VECTOR_TYPE)
+
+static tree
+mep_build_builtin_va_list (void)
+{
+  tree f_next_gp, f_next_gp_limit, f_next_cop, f_next_stack;
+  tree record;
+
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+
+  f_next_gp = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			  get_identifier ("__va_next_gp"), ptr_type_node);
+  f_next_gp_limit = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				get_identifier ("__va_next_gp_limit"),
+				ptr_type_node);
+  f_next_cop = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__va_next_cop"),
+			   ptr_type_node);
+  f_next_stack = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__va_next_stack"),
+			     ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_next_gp) = record;
+  DECL_FIELD_CONTEXT (f_next_gp_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_cop) = record;
+  DECL_FIELD_CONTEXT (f_next_stack) = record;
+
+  TYPE_FIELDS (record) = f_next_gp;
+  DECL_CHAIN (f_next_gp) = f_next_gp_limit;
+  DECL_CHAIN (f_next_gp_limit) = f_next_cop;
+  DECL_CHAIN (f_next_cop) = f_next_stack;
+
+  layout_type (record);
+
+  return record;
+}
+
+static void
+mep_expand_va_start (tree valist, rtx nextarg)
+{
+  tree f_next_gp, f_next_gp_limit, f_next_cop, f_next_stack;
+  tree next_gp, next_gp_limit, next_cop, next_stack;
+  tree t, u;
+  int ns;
+
+  ns = cfun->machine->arg_regs_to_save;
+
+  f_next_gp = TYPE_FIELDS (va_list_type_node);
+  f_next_gp_limit = DECL_CHAIN (f_next_gp);
+  f_next_cop = DECL_CHAIN (f_next_gp_limit);
+  f_next_stack = DECL_CHAIN (f_next_cop);
+
+  next_gp = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp), valist, f_next_gp,
+		    NULL_TREE);
+  next_gp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp_limit),
+			  valist, f_next_gp_limit, NULL_TREE);
+  next_cop = build3 (COMPONENT_REF, TREE_TYPE (f_next_cop), valist, f_next_cop,
+		     NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* va_list.next_gp = expand_builtin_saveregs (); */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_gp, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* va_list.next_gp_limit = va_list.next_gp + 4 * ns; */
+  u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
+		   size_int (4 * ns));
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_gp_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
+		   size_int (8 * ((ns+1)/2)));
+  /* va_list.next_cop = ROUND_UP(va_list.next_gp_limit,8); */
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_cop, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* va_list.next_stack = nextarg; */
+  u = make_tree (ptr_type_node, nextarg);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+static tree
+mep_gimplify_va_arg_expr (tree valist, tree type,
+			  gimple_seq *pre_p,
+			  gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size, rsize;
+  bool by_reference, ivc2_vec;
+  tree f_next_gp, f_next_gp_limit, f_next_cop, f_next_stack;
+  tree next_gp, next_gp_limit, next_cop, next_stack;
+  tree label_sover, label_selse;
+  tree tmp, res_addr;
+
+  ivc2_vec = TARGET_IVC2 && VECTOR_TYPE_P (type);
+
+  size = int_size_in_bytes (type);
+  by_reference = (size > (ivc2_vec ? 8 : 4)) || (size <= 0);
+
+  if (by_reference)
+    {
+      type = build_pointer_type (type);
+      size = 4;
+    }
+  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+
+  f_next_gp = TYPE_FIELDS (va_list_type_node);
+  f_next_gp_limit = DECL_CHAIN (f_next_gp);
+  f_next_cop = DECL_CHAIN (f_next_gp_limit);
+  f_next_stack = DECL_CHAIN (f_next_cop);
+
+  next_gp = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp), valist, f_next_gp,
+		    NULL_TREE);
+  next_gp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_gp_limit),
+			  valist, f_next_gp_limit, NULL_TREE);
+  next_cop = build3 (COMPONENT_REF, TREE_TYPE (f_next_cop), valist, f_next_cop,
+		     NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* if f_next_gp < f_next_gp_limit
+       IF (VECTOR_P && IVC2)
+         val = *f_next_cop;
+       ELSE
+         val = *f_next_gp;
+       f_next_gp += 4;
+       f_next_cop += 8;
+     else
+       label_selse:
+       val = *f_next_stack;
+       f_next_stack += rsize;
+     label_sover:
+  */
+
+  label_sover = create_artificial_label (UNKNOWN_LOCATION);
+  label_selse = create_artificial_label (UNKNOWN_LOCATION);
+  res_addr = create_tmp_var (ptr_type_node, NULL);
+
+  tmp = build2 (GE_EXPR, boolean_type_node, next_gp,
+		unshare_expr (next_gp_limit));
+  tmp = build3 (COND_EXPR, void_type_node, tmp,
+		build1 (GOTO_EXPR, void_type_node,
+			unshare_expr (label_selse)),
+		NULL_TREE);
+  gimplify_and_add (tmp, pre_p);
+
+  if (ivc2_vec)
+    {
+      tmp = build2 (MODIFY_EXPR, void_type_node, res_addr, next_cop);
+      gimplify_and_add (tmp, pre_p);
+    }
+  else
+    {
+      tmp = build2 (MODIFY_EXPR, void_type_node, res_addr, next_gp);
+      gimplify_and_add (tmp, pre_p);
+    }
+
+  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+		unshare_expr (next_gp), size_int (4));
+  gimplify_assign (unshare_expr (next_gp), tmp, pre_p);
+
+  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+		unshare_expr (next_cop), size_int (8));
+  gimplify_assign (unshare_expr (next_cop), tmp, pre_p);
+
+  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (label_sover));
+  gimplify_and_add (tmp, pre_p);
+
+  /* - - */
+
+  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (label_selse));
+  gimplify_and_add (tmp, pre_p);
+
+  tmp = build2 (MODIFY_EXPR, void_type_node, res_addr, unshare_expr (next_stack));
+  gimplify_and_add (tmp, pre_p);
+
+  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+		unshare_expr (next_stack), size_int (rsize));
+  gimplify_assign (unshare_expr (next_stack), tmp, pre_p);
+
+  /* - - */
+
+  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (label_sover));
+  gimplify_and_add (tmp, pre_p);
+
+  res_addr = fold_convert (build_pointer_type (type), res_addr);
+
+  if (by_reference)
+    res_addr = build_va_arg_indirect_ref (res_addr);
+
+  return build_va_arg_indirect_ref (res_addr);
+}
+
+void
+mep_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
+			  rtx libname ATTRIBUTE_UNUSED,
+			  tree fndecl ATTRIBUTE_UNUSED)
+{
+  pcum->nregs = 0;
+
+  if (fntype && lookup_attribute ("vliw", TYPE_ATTRIBUTES (fntype)))
+    pcum->vliw = 1;
+  else
+    pcum->vliw = 0;
+}
+
+/* The ABI is thus: Arguments are in $1, $2, $3, $4, stack.  Arguments
+   larger than 4 bytes are passed indirectly.  Return value in 0,
+   unless bigger than 4 bytes, then the caller passes a pointer as the
+   first arg.  For varargs, we copy $1..$4 to the stack.  */
+
+static rtx
+mep_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		  const_tree type ATTRIBUTE_UNUSED,
+		  bool named ATTRIBUTE_UNUSED)
+{
+  /* VOIDmode is a signal for the backend to pass data to the call
+     expander via the second operand to the call pattern.  We use
+     this to determine whether to use "jsr" or "jsrv".  */
+  if (mode == VOIDmode)
+    return GEN_INT (cum->vliw);
+
+  /* If we havn't run out of argument registers, return the next.  */
+  if (cum->nregs < 4)
+    {
+      if (type && TARGET_IVC2 && VECTOR_TYPE_P (type))
+	return gen_rtx_REG (mode, cum->nregs + 49);
+      else
+	return gen_rtx_REG (mode, cum->nregs + 1);
+    }
+
+  /* Otherwise the argument goes on the stack.  */
+  return NULL_RTX;
+}
+
+static bool
+mep_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
+		       enum machine_mode mode,
+		       const_tree        type,
+		       bool              named ATTRIBUTE_UNUSED)
+{
+  int size = bytesize (type, mode);
+
+  /* This is non-obvious, but yes, large values passed after we've run
+     out of registers are *still* passed by reference - we put the
+     address of the parameter on the stack, as well as putting the
+     parameter itself elsewhere on the stack.  */
+
+  if (size <= 0 || size > 8)
+    return true;
+  if (size <= 4)
+    return false;
+  if (TARGET_IVC2 && cum->nregs < 4 && type != NULL_TREE && VECTOR_TYPE_P (type))
+    return false;
+  return true;
+}
+
+static void
+mep_function_arg_advance (CUMULATIVE_ARGS *pcum,
+			  enum machine_mode mode ATTRIBUTE_UNUSED,
+			  const_tree type ATTRIBUTE_UNUSED,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  pcum->nregs += 1;
+}
+
+bool
+mep_return_in_memory (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
+{
+  int size = bytesize (type, BLKmode);
+  if (TARGET_IVC2 && VECTOR_TYPE_P (type))
+    return size > 0 && size <= 8 ? 0 : 1;
+  return size > 0 && size <= 4 ? 0 : 1;
+}
+
+static bool
+mep_narrow_volatile_bitfield (void)
+{
+  return true;
+  return false;
+}
+
+/* Implement FUNCTION_VALUE.  All values are returned in $0.  */
+
+rtx
+mep_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
+{
+  if (TARGET_IVC2 && VECTOR_TYPE_P (type))
+    return gen_rtx_REG (TYPE_MODE (type), 48);
+  return gen_rtx_REG (TYPE_MODE (type), RETURN_VALUE_REGNUM);
+}
+
+/* Implement LIBCALL_VALUE, using the same rules as mep_function_value.  */
+
+rtx
+mep_libcall_value (enum machine_mode mode)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Handle pipeline hazards.  */
+
+typedef enum { op_none, op_stc, op_fsft, op_ret } op_num;
+static const char *opnames[] = { "", "stc", "fsft", "ret" };
+
+static int prev_opcode = 0;
+
+/* This isn't as optimal as it could be, because we don't know what
+   control register the STC opcode is storing in.  We only need to add
+   the nop if it's the relevent register, but we add it for irrelevent
+   registers also.  */
+
+void
+mep_asm_output_opcode (FILE *file, const char *ptr)
+{
+  int this_opcode = op_none;
+  const char *hazard = 0;
+
+  switch (*ptr)
+    {
+    case 'f':
+      if (strncmp (ptr, "fsft", 4) == 0 && !ISGRAPH (ptr[4]))
+	this_opcode = op_fsft;
+      break;
+    case 'r':
+      if (strncmp (ptr, "ret", 3) == 0 && !ISGRAPH (ptr[3]))
+	this_opcode = op_ret;
+      break;
+    case 's':
+      if (strncmp (ptr, "stc", 3) == 0 && !ISGRAPH (ptr[3]))
+	this_opcode = op_stc;
+      break;
+    }
+
+  if (prev_opcode == op_stc && this_opcode == op_fsft)
+    hazard = "nop";
+  if (prev_opcode == op_stc && this_opcode == op_ret)
+    hazard = "nop";
+
+  if (hazard)
+    fprintf(file, "%s\t# %s-%s hazard\n\t",
+	    hazard, opnames[prev_opcode], opnames[this_opcode]);
+
+  prev_opcode = this_opcode;
+}
+
+/* Handle attributes.  */
+
+static tree
+mep_validate_based_tiny (tree *node, tree name, tree args,
+			 int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != VAR_DECL
+      && TREE_CODE (*node) != POINTER_TYPE
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (0, "%qE attribute only applies to variables", name);
+      *no_add = true;
+    }
+  else if (args == NULL_TREE && TREE_CODE (*node) == VAR_DECL)
+    {
+      if (! (TREE_PUBLIC (*node) || TREE_STATIC (*node)))
+	{
+	  warning (0, "address region attributes not allowed with auto storage class");
+	  *no_add = true;
+	}
+      /* Ignore storage attribute of pointed to variable: char __far * x;  */
+      if (TREE_TYPE (*node) && TREE_CODE (TREE_TYPE (*node)) == POINTER_TYPE)
+	{
+	  warning (0, "address region attributes on pointed-to types ignored");
+	  *no_add = true;
+	}
+    }
+  
+  return NULL_TREE;
+}
+
+static int
+mep_multiple_address_regions (tree list, bool check_section_attr)
+{
+  tree a;
+  int count_sections = 0;
+  int section_attr_count = 0;
+
+  for (a = list; a; a = TREE_CHAIN (a))
+    {
+      if (is_attribute_p ("based", TREE_PURPOSE (a))
+	  || is_attribute_p ("tiny", TREE_PURPOSE (a))
+	  || is_attribute_p ("near", TREE_PURPOSE (a))
+	  || is_attribute_p ("far", TREE_PURPOSE (a))
+	  || is_attribute_p ("io", TREE_PURPOSE (a)))
+	count_sections ++;
+      if (check_section_attr)
+	section_attr_count += is_attribute_p ("section", TREE_PURPOSE (a));
+    }
+	
+  if (check_section_attr)
+    return section_attr_count;
+  else
+    return count_sections;
+}
+
+#define MEP_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+                : DECL_ATTRIBUTES (decl) \
+                  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+static tree
+mep_validate_near_far (tree *node, tree name, tree args,
+		       int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != VAR_DECL
+      && TREE_CODE (*node) != FUNCTION_DECL
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != POINTER_TYPE
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (0, "%qE attribute only applies to variables and functions",
+	       name);
+      *no_add = true;
+    }
+  else if (args == NULL_TREE && TREE_CODE (*node) == VAR_DECL)
+    {
+      if (! (TREE_PUBLIC (*node) || TREE_STATIC (*node)))
+	{
+	  warning (0, "address region attributes not allowed with auto storage class");
+	  *no_add = true;
+	}
+      /* Ignore storage attribute of pointed to variable: char __far * x;  */
+      if (TREE_TYPE (*node) && TREE_CODE (TREE_TYPE (*node)) == POINTER_TYPE)
+	{
+	  warning (0, "address region attributes on pointed-to types ignored");
+	  *no_add = true;
+	}
+    }
+  else if (mep_multiple_address_regions (MEP_ATTRIBUTES (*node), false) > 0)
+    {
+      warning (0, "duplicate address region attribute %qE in declaration of %qE on line %d",
+	       name, DECL_NAME (*node), DECL_SOURCE_LINE (*node));
+      DECL_ATTRIBUTES (*node) = NULL_TREE;
+    }
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_disinterrupt (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			   int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL
+      && TREE_CODE (*node) != METHOD_TYPE)
+    {
+      warning (0, "%qE attribute only applies to functions", name);
+      *no_add = true;
+    }
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_interrupt (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  tree function_type;
+
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (0, "%qE attribute only applies to functions", name);
+      *no_add = true;
+      return NULL_TREE;
+    }
+
+  if (DECL_DECLARED_INLINE_P (*node))
+    error ("cannot inline interrupt function %qE", DECL_NAME (*node));
+  DECL_UNINLINABLE (*node) = 1;
+
+  function_type = TREE_TYPE (*node);
+
+  if (TREE_TYPE (function_type) != void_type_node)
+    error ("interrupt function must have return type of void");
+
+  if (prototype_p (function_type)
+      && (TREE_VALUE (TYPE_ARG_TYPES (function_type)) != void_type_node
+	  || TREE_CHAIN (TYPE_ARG_TYPES (function_type)) != NULL_TREE))
+    error ("interrupt function must have no arguments");
+
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_io_cb (tree *node, tree name, tree args,
+		    int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != VAR_DECL)
+    {
+      warning (0, "%qE attribute only applies to variables", name);
+      *no_add = true;
+    }
+
+  if (args != NULL_TREE)
+    {
+      if (TREE_CODE (TREE_VALUE (args)) == NON_LVALUE_EXPR)
+	TREE_VALUE (args) = TREE_OPERAND (TREE_VALUE (args), 0);
+      if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+	{
+	  warning (0, "%qE attribute allows only an integer constant argument",
+		   name);
+	  *no_add = true;
+	}
+    }
+
+  if (*no_add == false && !TARGET_IO_NO_VOLATILE)
+    TREE_THIS_VOLATILE (*node) = 1;
+
+  return NULL_TREE;
+}
+
+static tree
+mep_validate_vliw (tree *node, tree name, tree args ATTRIBUTE_UNUSED, 
+		   int flags ATTRIBUTE_UNUSED, bool *no_add)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != FUNCTION_DECL
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      static int gave_pointer_note = 0;
+      static int gave_array_note = 0;
+      static const char * given_type = NULL;
+ 
+      given_type = tree_code_name[TREE_CODE (*node)];
+      if (TREE_CODE (*node) == POINTER_TYPE)
+ 	given_type = "pointers";
+      if (TREE_CODE (*node) == ARRAY_TYPE)
+ 	given_type = "arrays";
+ 
+      if (given_type)
+ 	warning (0, "%qE attribute only applies to functions, not %s",
+ 		 name, given_type);
+      else
+ 	warning (0, "%qE attribute only applies to functions",
+ 		 name);
+      *no_add = true;
+ 
+      if (TREE_CODE (*node) == POINTER_TYPE
+ 	  && !gave_pointer_note)
+ 	{
+ 	  inform (input_location, "to describe a pointer to a VLIW function, use syntax like this:");
+ 	  inform (input_location, "  typedef int (__vliw *vfuncptr) ();");
+ 	  gave_pointer_note = 1;
+ 	}
+ 
+      if (TREE_CODE (*node) == ARRAY_TYPE
+ 	  && !gave_array_note)
+ 	{
+ 	  inform (input_location, "to describe an array of VLIW function pointers, use syntax like this:");
+ 	  inform (input_location, "  typedef int (__vliw *vfuncptr[]) ();");
+ 	  gave_array_note = 1;
+ 	}
+    }
+  if (!TARGET_VLIW)
+    error ("VLIW functions are not allowed without a VLIW configuration");
+  return NULL_TREE;
+}
+
+static const struct attribute_spec mep_attribute_table[11] =
+{
+  /* name         min max decl   type   func   handler */
+  { "based",        0, 0, false, false, false, mep_validate_based_tiny },
+  { "tiny",         0, 0, false, false, false, mep_validate_based_tiny },
+  { "near",         0, 0, false, false, false, mep_validate_near_far },
+  { "far",          0, 0, false, false, false, mep_validate_near_far },
+  { "disinterrupt", 0, 0, false, false, false, mep_validate_disinterrupt },
+  { "interrupt",    0, 0, false, false, false, mep_validate_interrupt },
+  { "io",           0, 1, false, false, false, mep_validate_io_cb },
+  { "cb",           0, 1, false, false, false, mep_validate_io_cb },
+  { "vliw",         0, 0, false, true,  false, mep_validate_vliw },
+  { NULL,           0, 0, false, false, false, NULL }
+};
+
+static bool
+mep_function_attribute_inlinable_p (const_tree callee)
+{
+  tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (callee));
+  if (!attrs) attrs = DECL_ATTRIBUTES (callee);
+  return (lookup_attribute ("disinterrupt", attrs) == 0
+	  && lookup_attribute ("interrupt", attrs) == 0);
+}
+
+static bool
+mep_can_inline_p (tree caller, tree callee)
+{
+  if (TREE_CODE (callee) == ADDR_EXPR)
+    callee = TREE_OPERAND (callee, 0);
+ 
+  if (!mep_vliw_function_p (caller)
+      && mep_vliw_function_p (callee))
+    {
+      return false;
+    }
+  return true;
+}
+
+#define FUNC_CALL		1
+#define FUNC_DISINTERRUPT	2
+
+
+struct GTY(()) pragma_entry {
+  int used;
+  int flag;
+  const char *funcname;
+};
+typedef struct pragma_entry pragma_entry;
+
+/* Hash table of farcall-tagged sections.  */
+static GTY((param_is (pragma_entry))) htab_t pragma_htab;
+
+static int
+pragma_entry_eq (const void *p1, const void *p2)
+{
+  const pragma_entry *old = (const pragma_entry *) p1;
+  const char *new_name = (const char *) p2;
+
+  return strcmp (old->funcname, new_name) == 0;
+}
+
+static hashval_t
+pragma_entry_hash (const void *p)
+{
+  const pragma_entry *old = (const pragma_entry *) p;
+  return htab_hash_string (old->funcname);
+}
+
+static void
+mep_note_pragma_flag (const char *funcname, int flag)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    pragma_htab = htab_create_ggc (31, pragma_entry_hash,
+				    pragma_entry_eq, NULL);
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, funcname,
+			      htab_hash_string (funcname), INSERT);
+
+  if (!*slot)
+    {
+      *slot = ggc_alloc_pragma_entry ();
+      (*slot)->flag = 0;
+      (*slot)->used = 0;
+      (*slot)->funcname = ggc_strdup (funcname);
+    }
+  (*slot)->flag |= flag;
+}
+
+static bool
+mep_lookup_pragma_flag (const char *funcname, int flag)
+{
+  pragma_entry **slot;
+
+  if (!pragma_htab)
+    return false;
+
+  if (funcname[0] == '@' && funcname[2] == '.')
+    funcname += 3;
+
+  slot = (pragma_entry **)
+    htab_find_slot_with_hash (pragma_htab, funcname,
+			      htab_hash_string (funcname), NO_INSERT);
+  if (slot && *slot && ((*slot)->flag & flag))
+    {
+      (*slot)->used |= flag;
+      return true;
+    }
+  return false;
+}
+
+bool
+mep_lookup_pragma_call (const char *funcname)
+{
+  return mep_lookup_pragma_flag (funcname, FUNC_CALL);
+}
+
+void
+mep_note_pragma_call (const char *funcname)
+{
+  mep_note_pragma_flag (funcname, FUNC_CALL);
+}
+
+bool
+mep_lookup_pragma_disinterrupt (const char *funcname)
+{
+  return mep_lookup_pragma_flag (funcname, FUNC_DISINTERRUPT);
+}
+
+void
+mep_note_pragma_disinterrupt (const char *funcname)
+{
+  mep_note_pragma_flag (funcname, FUNC_DISINTERRUPT);
+}
+
+static int
+note_unused_pragma_disinterrupt (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+  const pragma_entry *d = (const pragma_entry *)(*slot);
+
+  if ((d->flag & FUNC_DISINTERRUPT)
+      && !(d->used & FUNC_DISINTERRUPT))
+    warning (0, "\"#pragma disinterrupt %s\" not used", d->funcname);
+  return 1;
+}
+
+void
+mep_file_cleanups (void)
+{
+  if (pragma_htab)
+    htab_traverse (pragma_htab, note_unused_pragma_disinterrupt, NULL);
+}
+
+/* These three functions provide a bridge between the pramgas that
+   affect register classes, and the functions that maintain them.  We
+   can't call those functions directly as pragma handling is part of
+   the front end and doesn't have direct access to them.  */
+
+void
+mep_save_register_info (void)
+{
+  save_register_info ();
+}
+
+void
+mep_reinit_regs (void)
+{
+  reinit_regs ();
+}
+
+void
+mep_init_regs (void)
+{
+  init_regs ();
+}
+
+     
+
+static int
+mep_attrlist_to_encoding (tree list, tree decl)
+{
+  if (mep_multiple_address_regions (list, false) > 1)
+    {
+      warning (0, "duplicate address region attribute %qE in declaration of %qE on line %d",
+	       TREE_PURPOSE (TREE_CHAIN (list)),
+	       DECL_NAME (decl),
+	       DECL_SOURCE_LINE (decl));
+      TREE_CHAIN (list) = NULL_TREE;
+    }
+      
+  while (list)
+    {
+      if (is_attribute_p ("based", TREE_PURPOSE (list)))
+	return 'b';
+      if (is_attribute_p ("tiny", TREE_PURPOSE (list)))
+	return 't';
+      if (is_attribute_p ("near", TREE_PURPOSE (list)))
+	return 'n';
+      if (is_attribute_p ("far", TREE_PURPOSE (list)))
+	return 'f';
+      if (is_attribute_p ("io", TREE_PURPOSE (list)))
+	{
+	  if (TREE_VALUE (list)
+	      && TREE_VALUE (TREE_VALUE (list))
+	      && TREE_CODE (TREE_VALUE (TREE_VALUE (list))) == INTEGER_CST)
+	    {
+	      int location = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE(list)));
+	      if (location >= 0
+		  && location <= 0x1000000)
+		return 'i';
+	    }
+	  return 'I';
+	}
+      if (is_attribute_p ("cb", TREE_PURPOSE (list)))
+	return 'c';
+      list = TREE_CHAIN (list);
+    }
+  if (TARGET_TF
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && DECL_SECTION_NAME (decl) == 0)
+    return 'f';
+  return 0;
+}
+
+static int
+mep_comp_type_attributes (const_tree t1, const_tree t2)
+{
+  int vliw1, vliw2;
+
+  vliw1 = (lookup_attribute ("vliw", TYPE_ATTRIBUTES (t1)) != 0);
+  vliw2 = (lookup_attribute ("vliw", TYPE_ATTRIBUTES (t2)) != 0);
+
+  if (vliw1 != vliw2)
+    return 0;
+
+  return 1;
+}
+
+static void
+mep_insert_attributes (tree decl, tree *attributes)
+{
+  int size;
+  const char *secname = 0;
+  tree attrib, attrlist;
+  char encoding;
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      const char *funcname = IDENTIFIER_POINTER (DECL_NAME (decl));
+
+      if (mep_lookup_pragma_disinterrupt (funcname))
+	{
+	  attrib = build_tree_list (get_identifier ("disinterrupt"), NULL_TREE);
+	  *attributes = chainon (*attributes, attrib);
+	}
+    }
+
+  if (TREE_CODE (decl) != VAR_DECL
+      || ! (TREE_PUBLIC (decl) || TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    return;
+
+  if (TREE_READONLY (decl) && TARGET_DC)
+    /* -mdc means that const variables default to the near section,
+       regardless of the size cutoff.  */
+    return;
+
+  /* User specified an attribute, so override the default.
+     Ignore storage attribute of pointed to variable. char __far * x;  */
+  if (! (TREE_TYPE (decl) && TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE))
+    {
+      if (TYPE_P (decl) && TYPE_ATTRIBUTES (decl) && *attributes)
+	TYPE_ATTRIBUTES (decl) = NULL_TREE;
+      else if (DECL_ATTRIBUTES (decl) && *attributes)
+	DECL_ATTRIBUTES (decl) = NULL_TREE;
+    }
+
+  attrlist = *attributes ? *attributes : DECL_ATTRIBUTES (decl);
+  encoding = mep_attrlist_to_encoding (attrlist, decl);
+  if (!encoding && TYPE_P (TREE_TYPE (decl)))
+    {
+      attrlist = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+      encoding = mep_attrlist_to_encoding (attrlist, decl);
+    }
+  if (encoding)
+    {	  
+      /* This means that the declaration has a specific section
+	 attribute, so we should not apply the default rules.  */
+
+      if (encoding == 'i' || encoding == 'I')
+	{
+	  tree attr = lookup_attribute ("io", attrlist);
+	  if (attr
+	      && TREE_VALUE (attr)
+	      && TREE_VALUE (TREE_VALUE(attr)))
+	    {
+	      int location = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE(attr)));
+	      static tree previous_value = 0;
+	      static int previous_location = 0;
+	      static tree previous_name = 0;
+
+	      /* We take advantage of the fact that gcc will reuse the
+		 same tree pointer when applying an attribute to a
+		 list of decls, but produce a new tree for attributes
+		 on separate source lines, even when they're textually
+		 identical.  This is the behavior we want.  */
+	      if (TREE_VALUE (attr) == previous_value
+		  && location == previous_location)
+		{
+		  warning(0, "__io address 0x%x is the same for %qE and %qE",
+			  location, previous_name, DECL_NAME (decl));
+		}
+	      previous_name = DECL_NAME (decl);
+	      previous_location = location;
+	      previous_value = TREE_VALUE (attr);
+	    }
+	}
+      return;
+    }
+
+
+  /* Declarations of arrays can change size.  Don't trust them.  */
+  if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
+    size = 0;
+  else
+    size = int_size_in_bytes (TREE_TYPE (decl));
+
+  if (TARGET_RAND_TPGP && size <= 4 && size > 0)
+    {
+      if (TREE_PUBLIC (decl)
+	  || DECL_EXTERNAL (decl)
+	  || TREE_STATIC (decl))
+	{
+	  const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
+	  int key = 0;
+
+	  while (*name)
+	    key += *name++;
+
+	  switch (key & 3)
+	    {
+	    case 0:
+	      secname = "based";
+	      break;
+	    case 1:
+	      secname = "tiny";
+	      break;
+	    case 2:
+	      secname = "far";
+	      break;
+	    default:
+	      ;
+	    }
+	}
+    }
+  else
+    {
+      if (size <= mep_based_cutoff && size > 0)
+	secname = "based";
+      else if (size <= mep_tiny_cutoff && size > 0)
+	secname = "tiny";
+      else if (TARGET_L)
+	secname = "far";
+    }
+
+  if (mep_const_section && TREE_READONLY (decl))
+    {
+      if (strcmp (mep_const_section, "tiny") == 0)
+	secname = "tiny";
+      else if (strcmp (mep_const_section, "near") == 0)
+	return;
+      else if (strcmp (mep_const_section, "far") == 0)
+	secname = "far";
+    }
+
+  if (!secname)
+    return;
+
+  if (!mep_multiple_address_regions (*attributes, true)
+      && !mep_multiple_address_regions (DECL_ATTRIBUTES (decl), false))
+    {
+      attrib = build_tree_list (get_identifier (secname), NULL_TREE);
+
+      /* Chain the attribute directly onto the variable's DECL_ATTRIBUTES
+	 in order to avoid the POINTER_TYPE bypasses in mep_validate_near_far
+	 and mep_validate_based_tiny.  */
+      DECL_ATTRIBUTES (decl) = chainon (DECL_ATTRIBUTES (decl), attrib);
+    }
+}
+
+static void
+mep_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx rtlname;
+  const char *oldname;
+  const char *secname;
+  char encoding;
+  char *newname;
+  tree idp;
+  int maxsize;
+  tree type;
+  tree mep_attributes;
+
+  if (! first)
+    return;
+
+  if (TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return;
+
+  rtlname = XEXP (rtl, 0);
+  if (GET_CODE (rtlname) == SYMBOL_REF)
+    oldname = XSTR (rtlname, 0);
+  else if (GET_CODE (rtlname) == MEM
+	   && GET_CODE (XEXP (rtlname, 0)) == SYMBOL_REF)
+    oldname = XSTR (XEXP (rtlname, 0), 0);
+  else
+    gcc_unreachable ();
+
+  type = TREE_TYPE (decl);
+  if (type == error_mark_node)
+    return;
+  mep_attributes = MEP_ATTRIBUTES (decl);
+
+  encoding = mep_attrlist_to_encoding (mep_attributes, decl);
+
+  if (encoding)
+    {
+      newname = (char *) alloca (strlen (oldname) + 4);
+      sprintf (newname, "@%c.%s", encoding, oldname);
+      idp = get_identifier (newname);
+      XEXP (rtl, 0) =
+	gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+      SYMBOL_REF_WEAK (XEXP (rtl, 0)) = DECL_WEAK (decl);
+      SET_SYMBOL_REF_DECL (XEXP (rtl, 0), decl);
+
+      switch (encoding)
+	{
+	case 'b':
+	  maxsize = 128;
+	  secname = "based";
+	  break;
+	case 't':
+	  maxsize = 65536;
+	  secname = "tiny";
+	  break;
+	case 'n':
+	  maxsize = 0x1000000;
+	  secname = "near";
+	  break;
+	default:
+	  maxsize = 0;
+	  secname = 0;
+	  break;
+	}
+      if (maxsize && int_size_in_bytes (TREE_TYPE (decl)) > maxsize)
+	{
+	  warning (0, "variable %s (%ld bytes) is too large for the %s section (%d bytes)",
+		   oldname,
+		   (long) int_size_in_bytes (TREE_TYPE (decl)),
+		   secname,
+		   maxsize);
+	}
+    }
+}
+
+const char *
+mep_strip_name_encoding (const char *sym)
+{
+  while (1)
+    {
+      if (*sym == '*')
+	sym++;
+      else if (*sym == '@' && sym[2] == '.')
+	sym += 3;
+      else
+	return sym;
+    }
+}
+
+static section *
+mep_select_section (tree decl, int reloc ATTRIBUTE_UNUSED,
+		    unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  int readonly = 1;
+  int encoding;
+
+  switch (TREE_CODE (decl))
+    {
+    case VAR_DECL:
+      if (!TREE_READONLY (decl)
+	  || TREE_SIDE_EFFECTS (decl)
+	  || !DECL_INITIAL (decl)
+	  || (DECL_INITIAL (decl) != error_mark_node
+	      && !TREE_CONSTANT (DECL_INITIAL (decl))))
+	readonly = 0;
+      break;
+    case CONSTRUCTOR:
+      if (! TREE_CONSTANT (decl))
+	readonly = 0;
+      break;
+
+    default:
+      break;
+    }
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+      if (name[0] == '@' && name[2] == '.')
+	encoding = name[1];
+      else
+	encoding = 0;
+
+      if (flag_function_sections || DECL_ONE_ONLY (decl))
+	mep_unique_section (decl, 0);
+      else if (lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+	{
+	  if (encoding == 'f')
+	    return vftext_section;
+	  else
+	    return vtext_section;
+	}
+      else if (encoding == 'f')
+	return ftext_section;
+      else
+	return text_section;
+    }
+
+  if (TREE_CODE (decl) == VAR_DECL)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+      if (name[0] == '@' && name[2] == '.')
+	switch (name[1])
+	  {
+	  case 'b':
+	    return based_section;
+
+	  case 't':
+	    if (readonly)
+	      return srodata_section;
+	    if (DECL_INITIAL (decl))
+	      return sdata_section;
+	    return tinybss_section;
+
+	  case 'f':
+	    if (readonly)
+	      return frodata_section;
+	    return far_section;
+
+	  case 'i':
+	  case 'I':
+	    error_at (DECL_SOURCE_LOCATION (decl),
+		      "variable %D of type %<io%> must be uninitialized", decl);
+	    return data_section;
+
+	  case 'c':
+	    error_at (DECL_SOURCE_LOCATION (decl),
+		      "variable %D of type %<cb%> must be uninitialized", decl);
+	    return data_section;
+	  }
+    }
+
+  if (readonly)
+    return readonly_data_section;
+
+  return data_section;
+}
+
+static void
+mep_unique_section (tree decl, int reloc)
+{
+  static const char *prefixes[][2] =
+  {
+    { ".text.",   ".gnu.linkonce.t." },
+    { ".rodata.", ".gnu.linkonce.r." },
+    { ".data.",   ".gnu.linkonce.d." },
+    { ".based.",   ".gnu.linkonce.based." },
+    { ".sdata.",   ".gnu.linkonce.s." },
+    { ".far.",     ".gnu.linkonce.far." },
+    { ".ftext.",   ".gnu.linkonce.ft." },
+    { ".frodata.", ".gnu.linkonce.frd." },
+    { ".srodata.", ".gnu.linkonce.srd." },
+    { ".vtext.",   ".gnu.linkonce.v." },
+    { ".vftext.",   ".gnu.linkonce.vf." }
+  };
+  int sec = 2; /* .data */
+  int len;
+  const char *name, *prefix;
+  char *string;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  if (DECL_RTL (decl))
+    name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      if (lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+	sec = 9; /* .vtext */
+      else
+	sec = 0; /* .text */
+    }
+  else if (decl_readonly_section (decl, reloc))
+    sec = 1; /* .rodata */
+
+  if (name[0] == '@' && name[2] == '.')
+    {
+      switch (name[1])
+	{
+	case 'b':
+	  sec = 3; /* .based */
+	  break;
+	case 't':
+	  if (sec == 1)
+	    sec = 8; /* .srodata */
+	  else
+	    sec = 4; /* .sdata */
+	  break;
+	case 'f':
+	  if (sec == 0)
+	    sec = 6; /* .ftext */
+	  else if (sec == 9)
+	    sec = 10; /* .vftext */
+	  else if (sec == 1)
+	    sec = 7; /* .frodata */
+	  else
+	    sec = 5; /* .far. */
+	  break;
+	}
+      name += 3;
+    }
+
+  prefix = prefixes[sec][DECL_ONE_ONLY(decl)];
+  len    = strlen (name) + strlen (prefix);
+  string = (char *) alloca (len + 1);
+
+  sprintf (string, "%s%s", prefix, name);
+
+  DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+/* Given a decl, a section name, and whether the decl initializer
+   has relocs, choose attributes for the section.  */
+
+#define SECTION_MEP_VLIW	SECTION_MACH_DEP
+
+static unsigned int
+mep_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  if (decl && TREE_CODE (decl) == FUNCTION_DECL
+      && lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+    flags |= SECTION_MEP_VLIW;
+
+  return flags;
+}
+
+/* Switch to an arbitrary section NAME with attributes as specified
+   by FLAGS.  ALIGN specifies any known alignment requirements for
+   the section; 0 if the default should be used.
+
+   Differs from the standard ELF version only in support of VLIW mode.  */
+
+static void
+mep_asm_named_section (const char *name, unsigned int flags, tree decl ATTRIBUTE_UNUSED)
+{
+  char flagchars[8], *f = flagchars;
+  const char *type;
+
+  if (!(flags & SECTION_DEBUG))
+    *f++ = 'a';
+  if (flags & SECTION_WRITE)
+    *f++ = 'w';
+  if (flags & SECTION_CODE)
+    *f++ = 'x';
+  if (flags & SECTION_SMALL)
+    *f++ = 's';
+  if (flags & SECTION_MEP_VLIW)
+    *f++ = 'v';
+  *f = '\0';
+
+  if (flags & SECTION_BSS)
+    type = "nobits";
+  else
+    type = "progbits";
+
+  fprintf (asm_out_file, "\t.section\t%s,\"%s\",@%s\n",
+	   name, flagchars, type);
+
+  if (flags & SECTION_CODE)
+    fputs ((flags & SECTION_MEP_VLIW ? "\t.vliw\n" : "\t.core\n"),
+	   asm_out_file);
+}
+
+void
+mep_output_aligned_common (FILE *stream, tree decl, const char *name,
+			   int size, int align, int global)
+{
+  /* We intentionally don't use mep_section_tag() here.  */
+  if (name[0] == '@'
+      && (name[1] == 'i' || name[1] == 'I' || name[1] == 'c')
+      && name[2] == '.')
+    {
+      int location = -1;
+      tree attr = lookup_attribute ((name[1] == 'c' ? "cb" : "io"),
+				    DECL_ATTRIBUTES (decl));
+      if (attr
+	  && TREE_VALUE (attr)
+	  && TREE_VALUE (TREE_VALUE(attr)))
+	location = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE(attr)));
+      if (location == -1)
+	return;
+      if (global)
+	{
+	  fprintf (stream, "\t.globl\t");
+	  assemble_name (stream, name);
+	  fprintf (stream, "\n");
+	}
+      assemble_name (stream, name);
+      fprintf (stream, " = %d\n", location);
+      return;
+    }
+  if (name[0] == '@' && name[2] == '.')
+    {
+      const char *sec = 0;
+      switch (name[1])
+	{
+	case 'b':
+	  switch_to_section (based_section);
+	  sec = ".based";
+	  break;
+	case 't':
+	  switch_to_section (tinybss_section);
+	  sec = ".sbss";
+	  break;
+	case 'f':
+	  switch_to_section (farbss_section);
+	  sec = ".farbss";
+	  break;
+	}
+      if (sec)
+	{
+	  const char *name2;
+	  int p2align = 0;
+
+	  while (align > BITS_PER_UNIT)
+	    {
+	      align /= 2;
+	      p2align ++;
+	    }
+	  name2 = targetm.strip_name_encoding (name);
+	  if (global)
+	    fprintf (stream, "\t.globl\t%s\n", name2);
+	  fprintf (stream, "\t.p2align %d\n", p2align);
+	  fprintf (stream, "\t.type\t%s,@object\n", name2);
+	  fprintf (stream, "\t.size\t%s,%d\n", name2, size);
+	  fprintf (stream, "%s:\n\t.zero\t%d\n", name2, size);
+	  return;
+	}
+    }
+
+  if (!global)
+    {
+      fprintf (stream, "\t.local\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Trampolines.  */
+
+static void
+mep_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx addr = XEXP (m_tramp, 0);
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__mep_trampoline_helper"),
+		     LCT_NORMAL, VOIDmode, 3,
+		     addr, Pmode,
+		     fnaddr, Pmode,
+		     static_chain, Pmode);
+}
+
+/* Experimental Reorg.  */
+
+static bool
+mep_mentioned_p (rtx in,
+		 rtx reg, /* NULL for mem */
+		 int modes_too) /* if nonzero, modes must match also.  */
+{
+  const char *fmt;
+  int i;
+  enum rtx_code code;
+
+  if (in == 0)
+    return false;
+  if (reg && GET_CODE (reg) != REG)
+    return false;
+
+  if (GET_CODE (in) == LABEL_REF)
+    return (reg == 0);
+
+  code = GET_CODE (in);
+
+  switch (code)
+    {
+    case MEM:
+      if (reg)
+	return mep_mentioned_p (XEXP (in, 0), reg, modes_too);
+      return true;
+
+    case REG:
+      if (!reg)
+	return false;
+      if (modes_too && (GET_MODE (in) != GET_MODE (reg)))
+	return false;
+      return (REGNO (in) == REGNO (reg));
+
+    case SCRATCH:
+    case CC0:
+    case PC:
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return false;
+
+    default:
+      break;
+    }
+
+  /* Set's source should be read-only.  */
+  if (code == SET && !reg)
+    return mep_mentioned_p (SET_DEST (in), reg, modes_too);
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = XVECLEN (in, i) - 1; j >= 0; j--)
+	    if (mep_mentioned_p (XVECEXP (in, i, j), reg, modes_too))
+	      return true;
+	}
+      else if (fmt[i] == 'e'
+	       && mep_mentioned_p (XEXP (in, i), reg, modes_too))
+	return true;
+    }
+  return false;
+}
+
+#define EXPERIMENTAL_REGMOVE_REORG 1
+
+#if EXPERIMENTAL_REGMOVE_REORG
+
+static int
+mep_compatible_reg_class (int r1, int r2)
+{
+  if (GR_REGNO_P (r1) && GR_REGNO_P (r2))
+    return 1;
+  if (CR_REGNO_P (r1) && CR_REGNO_P (r2))
+    return 1;
+  return 0;
+}
+
+static void
+mep_reorg_regmove (rtx insns)
+{
+  rtx insn, next, pat, follow, *where;
+  int count = 0, done = 0, replace, before = 0;
+
+  if (dump_file)
+    for (insn = insns; insn; insn = NEXT_INSN (insn))
+      if (GET_CODE (insn) == INSN)
+	before++;
+
+  /* We're looking for (set r2 r1) moves where r1 dies, followed by a
+     set that uses the r2 and r2 dies there.  We replace r2 with r1
+     and see if it's still a valid insn.  If so, delete the first set.
+     Copied from reorg.c.  */
+
+  while (!done)
+    {
+      done = 1;
+      for (insn = insns; insn; insn = next)
+	{
+	  next = NEXT_INSN (insn);
+	  if (GET_CODE (insn) != INSN)
+	    continue;
+	  pat = PATTERN (insn);
+
+	  replace = 0;
+
+	  if (GET_CODE (pat) == SET
+	      && GET_CODE (SET_SRC (pat)) == REG
+	      && GET_CODE (SET_DEST (pat)) == REG
+	      && find_regno_note (insn, REG_DEAD, REGNO (SET_SRC (pat)))
+	      && mep_compatible_reg_class (REGNO (SET_SRC (pat)), REGNO (SET_DEST (pat))))
+	    {
+	      follow = next_nonnote_insn (insn);
+	      if (dump_file)
+		fprintf (dump_file, "superfluous moves: considering %d\n", INSN_UID (insn));
+
+	      while (follow && GET_CODE (follow) == INSN
+		     && GET_CODE (PATTERN (follow)) == SET
+		     && !dead_or_set_p (follow, SET_SRC (pat))
+		     && !mep_mentioned_p (PATTERN (follow), SET_SRC (pat), 0)
+		     && !mep_mentioned_p (PATTERN (follow), SET_DEST (pat), 0))
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "\tskipping %d\n", INSN_UID (follow));
+		  follow = next_nonnote_insn (follow);
+		}
+
+	      if (dump_file)
+		fprintf (dump_file, "\tfollow is %d\n", INSN_UID (follow));
+	      if (follow && GET_CODE (follow) == INSN
+		  && GET_CODE (PATTERN (follow)) == SET
+		  && find_regno_note (follow, REG_DEAD, REGNO (SET_DEST (pat))))
+		{
+		  if (GET_CODE (SET_DEST (PATTERN (follow))) == REG)
+		    {
+		      if (mep_mentioned_p (SET_SRC (PATTERN (follow)), SET_DEST (pat), 1))
+			{
+			  replace = 1;
+			  where = & SET_SRC (PATTERN (follow));
+			}
+		    }
+		  else if (GET_CODE (SET_DEST (PATTERN (follow))) == MEM)
+		    {
+		      if (mep_mentioned_p (PATTERN (follow), SET_DEST (pat), 1))
+			{
+			  replace = 1;
+			  where = & PATTERN (follow);
+			}
+		    }
+		}
+	    }
+
+	  /* If so, follow is the corresponding insn */
+	  if (replace)
+	    {
+	      if (dump_file)
+		{
+		  rtx x;
+
+		  fprintf (dump_file, "----- Candidate for superfluous move deletion:\n\n");
+		  for (x = insn; x ;x = NEXT_INSN (x))
+		    {
+		      print_rtl_single (dump_file, x);
+		      if (x == follow)
+			break;
+		      fprintf (dump_file, "\n");
+		    }
+		}
+
+	      if (validate_replace_rtx_subexp (SET_DEST (pat), SET_SRC (pat),
+					       follow, where))
+		{
+		  count ++;
+		  next = delete_insn (insn);
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "\n----- Success!  new insn:\n\n");
+		      print_rtl_single (dump_file, follow);
+		    }
+		  done = 0;
+		}
+	    }
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n%d insn%s deleted out of %d.\n\n", count, count == 1 ? "" : "s", before);
+      fprintf (dump_file, "=====\n");
+    }
+}
+#endif
+
+
+/* Figure out where to put LABEL, which is the label for a repeat loop.
+   If INCLUDING, LAST_INSN is the last instruction in the loop, otherwise
+   the loop ends just before LAST_INSN.  If SHARED, insns other than the
+   "repeat" might use LABEL to jump to the loop's continuation point.
+
+   Return the last instruction in the adjusted loop.  */
+
+static rtx
+mep_insert_repeat_label_last (rtx last_insn, rtx label, bool including,
+			      bool shared)
+{
+  rtx next, prev;
+  int count = 0, code, icode;
+
+  if (dump_file)
+    fprintf (dump_file, "considering end of repeat loop at insn %d\n",
+	     INSN_UID (last_insn));
+
+  /* Set PREV to the last insn in the loop.  */
+  prev = last_insn;
+  if (!including)
+    prev = PREV_INSN (prev);
+
+  /* Set NEXT to the next insn after the repeat label.  */
+  next = last_insn;
+  if (!shared)
+    while (prev != 0)
+      {
+	code = GET_CODE (prev);
+	if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
+	  break;
+
+	if (INSN_P (prev))
+	  {
+	    if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+	      prev = XVECEXP (PATTERN (prev), 0, 1);
+
+	    /* Other insns that should not be in the last two opcodes.  */
+	    icode = recog_memoized (prev);
+	    if (icode < 0
+		|| icode == CODE_FOR_repeat
+		|| icode == CODE_FOR_erepeat
+		|| get_attr_may_trap (prev) == MAY_TRAP_YES)
+	      break;
+
+	    /* That leaves JUMP_INSN and INSN.  It will have BImode if it
+	       is the second instruction in a VLIW bundle.  In that case,
+	       loop again: if the first instruction also satisfies the
+	       conditions above then we will reach here again and put
+	       both of them into the repeat epilogue.  Otherwise both
+	       should remain outside.  */
+	    if (GET_MODE (prev) != BImode)
+	      {
+		count++;
+		next = prev;
+		if (dump_file)
+		  print_rtl_single (dump_file, next);
+		if (count == 2)
+		  break;
+	      }
+	  }
+	prev = PREV_INSN (prev);
+      }
+
+  /* See if we're adding the label immediately after the repeat insn.
+     If so, we need to separate them with a nop.  */
+  prev = prev_real_insn (next);
+  if (prev)
+    switch (recog_memoized (prev))
+      {
+      case CODE_FOR_repeat:
+      case CODE_FOR_erepeat:
+	if (dump_file)
+	  fprintf (dump_file, "Adding nop inside loop\n");
+	emit_insn_before (gen_nop (), next);
+	break;
+
+      default:
+	break;
+      }
+
+  /* Insert the label.  */
+  emit_label_before (label, next);
+
+  /* Insert the nops.  */
+  if (dump_file && count < 2)
+    fprintf (dump_file, "Adding %d nop%s\n\n",
+	     2 - count, count == 1 ? "" : "s");
+
+  for (; count < 2; count++)
+    if (including)
+      last_insn = emit_insn_after (gen_nop (), last_insn);
+    else
+      emit_insn_before (gen_nop (), last_insn);
+
+  return last_insn;
+}
+
+
+void
+mep_emit_doloop (rtx *operands, int is_end)
+{
+  rtx tag;
+
+  if (cfun->machine->doloop_tags == 0
+      || cfun->machine->doloop_tag_from_end == is_end)
+    {
+      cfun->machine->doloop_tags++;
+      cfun->machine->doloop_tag_from_end = is_end;
+    }
+
+  tag = GEN_INT (cfun->machine->doloop_tags - 1);
+  if (is_end)
+    emit_jump_insn (gen_doloop_end_internal (operands[0], operands[4], tag));
+  else
+    emit_insn (gen_doloop_begin_internal (operands[0], operands[0], tag));
+}
+
+
+/* Code for converting doloop_begins and doloop_ends into valid
+   MeP instructions.  A doloop_begin is just a placeholder:
+
+	$count = unspec ($count)
+
+   where $count is initially the number of iterations - 1.
+   doloop_end has the form:
+
+	if ($count-- == 0) goto label
+
+   The counter variable is private to the doloop insns, nothing else
+   relies on its value.
+
+   There are three cases, in decreasing order of preference:
+
+      1. A loop has exactly one doloop_begin and one doloop_end.
+	 The doloop_end branches to the first instruction after
+	 the doloop_begin.
+
+	 In this case we can replace the doloop_begin with a repeat
+	 instruction and remove the doloop_end.  I.e.:
+
+		$count1 = unspec ($count1)
+	    label:
+		...
+		insn1
+		insn2
+		if ($count2-- == 0) goto label
+
+	  becomes:
+
+		repeat $count1,repeat_label
+	    label:
+		...
+	    repeat_label:
+		insn1
+		insn2
+		# end repeat
+
+      2. As for (1), except there are several doloop_ends.  One of them
+	 (call it X) falls through to a label L.  All the others fall
+	 through to branches to L.
+
+	 In this case, we remove X and replace the other doloop_ends
+	 with branches to the repeat label.  For example:
+
+		$count1 = unspec ($count1)
+	    start:
+		...
+		if ($count2-- == 0) goto label
+	    end:
+		...
+		if ($count3-- == 0) goto label
+		goto end
+
+	 becomes:
+
+		repeat $count1,repeat_label
+	    start:
+		...
+	    repeat_label:
+		nop
+		nop
+		# end repeat
+	    end:
+		...
+		goto repeat_label
+
+      3. The fallback case.  Replace doloop_begins with:
+
+		$count = $count + 1
+
+	 Replace doloop_ends with the equivalent of:
+
+		$count = $count - 1
+		if ($count == 0) goto label
+
+	 Note that this might need a scratch register if $count
+	 is stored in memory.  */
+
+/* A structure describing one doloop_begin.  */
+struct mep_doloop_begin {
+  /* The next doloop_begin with the same tag.  */
+  struct mep_doloop_begin *next;
+
+  /* The instruction itself.  */
+  rtx insn;
+
+  /* The initial counter value.  This is known to be a general register.  */
+  rtx counter;
+};
+
+/* A structure describing a doloop_end.  */
+struct mep_doloop_end {
+  /* The next doloop_end with the same loop tag.  */
+  struct mep_doloop_end *next;
+
+  /* The instruction itself.  */
+  rtx insn;
+
+  /* The first instruction after INSN when the branch isn't taken.  */
+  rtx fallthrough;
+
+  /* The location of the counter value.  Since doloop_end_internal is a
+     jump instruction, it has to allow the counter to be stored anywhere
+     (any non-fixed register or memory location).  */
+  rtx counter;
+
+  /* The target label (the place where the insn branches when the counter
+     isn't zero).  */
+  rtx label;
+
+  /* A scratch register.  Only available when COUNTER isn't stored
+     in a general register.  */
+  rtx scratch;
+};
+
+
+/* One do-while loop.  */
+struct mep_doloop {
+  /* All the doloop_begins for this loop (in no particular order).  */
+  struct mep_doloop_begin *begin;
+
+  /* All the doloop_ends.  When there is more than one, arrange things
+     so that the first one is the most likely to be X in case (2) above.  */
+  struct mep_doloop_end *end;
+};
+
+
+/* Return true if LOOP can be converted into repeat/repeat_end form
+   (that is, if it matches cases (1) or (2) above).  */
+
+static bool
+mep_repeat_loop_p (struct mep_doloop *loop)
+{
+  struct mep_doloop_end *end;
+  rtx fallthrough;
+
+  /* There must be exactly one doloop_begin and at least one doloop_end.  */
+  if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
+    return false;
+
+  /* The first doloop_end (X) must branch back to the insn after
+     the doloop_begin.  */
+  if (prev_real_insn (loop->end->label) != loop->begin->insn)
+    return false;
+
+  /* All the other doloop_ends must branch to the same place as X.
+     When the branch isn't taken, they must jump to the instruction
+     after X.  */
+  fallthrough = loop->end->fallthrough;
+  for (end = loop->end->next; end != 0; end = end->next)
+    if (end->label != loop->end->label
+	|| !simplejump_p (end->fallthrough)
+	|| next_real_insn (JUMP_LABEL (end->fallthrough)) != fallthrough)
+      return false;
+
+  return true;
+}
+
+
+/* The main repeat reorg function.  See comment above for details.  */
+
+static void
+mep_reorg_repeat (rtx insns)
+{
+  rtx insn;
+  struct mep_doloop *loops, *loop;
+  struct mep_doloop_begin *begin;
+  struct mep_doloop_end *end;
+
+  /* Quick exit if we haven't created any loops.  */
+  if (cfun->machine->doloop_tags == 0)
+    return;
+
+  /* Create an array of mep_doloop structures.  */
+  loops = (struct mep_doloop *) alloca (sizeof (loops[0]) * cfun->machine->doloop_tags);
+  memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
+
+  /* Search the function for do-while insns and group them by loop tag.  */
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      switch (recog_memoized (insn))
+	{
+	case CODE_FOR_doloop_begin_internal:
+	  insn_extract (insn);
+	  loop = &loops[INTVAL (recog_data.operand[2])];
+
+	  begin = (struct mep_doloop_begin *) alloca (sizeof (struct mep_doloop_begin));
+	  begin->next = loop->begin;
+	  begin->insn = insn;
+	  begin->counter = recog_data.operand[0];
+
+	  loop->begin = begin;
+	  break;
+
+	case CODE_FOR_doloop_end_internal:
+	  insn_extract (insn);
+	  loop = &loops[INTVAL (recog_data.operand[2])];
+
+	  end = (struct mep_doloop_end *) alloca (sizeof (struct mep_doloop_end));
+	  end->insn = insn;
+	  end->fallthrough = next_real_insn (insn);
+	  end->counter = recog_data.operand[0];
+	  end->label = recog_data.operand[1];
+	  end->scratch = recog_data.operand[3];
+
+	  /* If this insn falls through to an unconditional jump,
+	     give it a lower priority than the others.  */
+	  if (loop->end != 0 && simplejump_p (end->fallthrough))
+	    {
+	      end->next = loop->end->next;
+	      loop->end->next = end;
+	    }
+	  else
+	    {
+	      end->next = loop->end;
+	      loop->end = end;
+	    }
+	  break;
+	}
+
+  /* Convert the insns for each loop in turn.  */
+  for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
+    if (mep_repeat_loop_p (loop))
+      {
+	/* Case (1) or (2).  */
+	rtx repeat_label, label_ref;
+
+	/* Create a new label for the repeat insn.  */
+	repeat_label = gen_label_rtx ();
+
+	/* Replace the doloop_begin with a repeat.  */
+	label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
+	emit_insn_before (gen_repeat (loop->begin->counter, label_ref),
+			  loop->begin->insn);
+	delete_insn (loop->begin->insn);
+
+	/* Insert the repeat label before the first doloop_end.
+	   Fill the gap with nops if there are other doloop_ends.  */
+	mep_insert_repeat_label_last (loop->end->insn, repeat_label,
+				      false, loop->end->next != 0);
+
+	/* Emit a repeat_end (to improve the readability of the output).  */
+	emit_insn_before (gen_repeat_end (), loop->end->insn);
+
+	/* Delete the first doloop_end.  */
+	delete_insn (loop->end->insn);
+
+	/* Replace the others with branches to REPEAT_LABEL.  */
+	for (end = loop->end->next; end != 0; end = end->next)
+	  {
+	    emit_jump_insn_before (gen_jump (repeat_label), end->insn);
+	    delete_insn (end->insn);
+	    delete_insn (end->fallthrough);
+	  }
+      }
+    else
+      {
+	/* Case (3).  First replace all the doloop_begins with increment
+	   instructions.  */
+	for (begin = loop->begin; begin != 0; begin = begin->next)
+	  {
+	    emit_insn_before (gen_add3_insn (copy_rtx (begin->counter),
+					     begin->counter, const1_rtx),
+			      begin->insn);
+	    delete_insn (begin->insn);
+	  }
+
+	/* Replace all the doloop_ends with decrement-and-branch sequences.  */
+	for (end = loop->end; end != 0; end = end->next)
+	  {
+	    rtx reg;
+
+	    start_sequence ();
+
+	    /* Load the counter value into a general register.  */
+	    reg = end->counter;
+	    if (!REG_P (reg) || REGNO (reg) > 15)
+	      {
+		reg = end->scratch;
+		emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
+	      }
+
+	    /* Decrement the counter.  */
+	    emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
+				      constm1_rtx));
+
+	    /* Copy it back to its original location.  */
+	    if (reg != end->counter)
+	      emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
+
+	    /* Jump back to the start label.  */
+	    insn = emit_jump_insn (gen_mep_bne_true (reg, const0_rtx,
+						     end->label));
+	    JUMP_LABEL (insn) = end->label;
+	    LABEL_NUSES (end->label)++;
+
+	    /* Emit the whole sequence before the doloop_end.  */
+	    insn = get_insns ();
+	    end_sequence ();
+	    emit_insn_before (insn, end->insn);
+
+	    /* Delete the doloop_end.  */
+	    delete_insn (end->insn);
+	  }
+      }
+}
+
+
+static bool
+mep_invertable_branch_p (rtx insn)
+{
+  rtx cond, set;
+  enum rtx_code old_code;
+  int i;
+
+  set = PATTERN (insn);
+  if (GET_CODE (set) != SET)
+    return false;
+  if (GET_CODE (XEXP (set, 1)) != IF_THEN_ELSE)
+    return false;
+  cond = XEXP (XEXP (set, 1), 0);
+  old_code = GET_CODE (cond);
+  switch (old_code)
+    {
+    case EQ:
+      PUT_CODE (cond, NE);
+      break;
+    case NE:
+      PUT_CODE (cond, EQ);
+      break;
+    case LT:
+      PUT_CODE (cond, GE);
+      break;
+    case GE:
+      PUT_CODE (cond, LT);
+      break;
+    default:
+      return false;
+    }
+  INSN_CODE (insn) = -1;
+  i = recog_memoized (insn);
+  PUT_CODE (cond, old_code);
+  INSN_CODE (insn) = -1;
+  return i >= 0;
+}
+
+static void
+mep_invert_branch (rtx insn, rtx after)
+{
+  rtx cond, set, label;
+  int i;
+
+  set = PATTERN (insn);
+
+  gcc_assert (GET_CODE (set) == SET);
+  gcc_assert (GET_CODE (XEXP (set, 1)) == IF_THEN_ELSE);
+
+  cond = XEXP (XEXP (set, 1), 0);
+  switch (GET_CODE (cond))
+    {
+    case EQ:
+      PUT_CODE (cond, NE);
+      break;
+    case NE:
+      PUT_CODE (cond, EQ);
+      break;
+    case LT:
+      PUT_CODE (cond, GE);
+      break;
+    case GE:
+      PUT_CODE (cond, LT);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  label = gen_label_rtx ();
+  emit_label_after (label, after);
+  for (i=1; i<=2; i++)
+    if (GET_CODE (XEXP (XEXP (set, 1), i)) == LABEL_REF)
+      {
+	rtx ref = XEXP (XEXP (set, 1), i);
+	if (LABEL_NUSES (XEXP (ref, 0)) == 1)
+	  delete_insn (XEXP (ref, 0));
+	XEXP (ref, 0) = label;
+	LABEL_NUSES (label) ++;
+	JUMP_LABEL (insn) = label;
+      }
+  INSN_CODE (insn) = -1;
+  i = recog_memoized (insn);
+  gcc_assert (i >= 0);
+}
+
+static void
+mep_reorg_erepeat (rtx insns)
+{
+  rtx insn, prev, l, x;
+  int count;
+
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (JUMP_P (insn)
+	&& ! JUMP_TABLE_DATA_P (insn)
+	&& mep_invertable_branch_p (insn))
+      {
+	if (dump_file)
+	  {
+	    fprintf (dump_file, "\n------------------------------\n");
+	    fprintf (dump_file, "erepeat: considering this jump:\n");
+	    print_rtl_single (dump_file, insn);
+	  }
+	count = simplejump_p (insn) ? 0 : 1;
+	for (prev = PREV_INSN (insn); prev; prev = PREV_INSN (prev))
+	  {
+	    if (GET_CODE (prev) == CALL_INSN
+		|| BARRIER_P (prev))
+	      break;
+
+	    if (prev == JUMP_LABEL (insn))
+	      {
+		rtx newlast;
+		if (dump_file)
+		  fprintf (dump_file, "found loop top, %d insns\n", count);
+
+		if (LABEL_NUSES (prev) == 1)
+		  /* We're the only user, always safe */ ;
+		else if (LABEL_NUSES (prev) == 2)
+		  {
+		    /* See if there's a barrier before this label.  If
+		       so, we know nobody inside the loop uses it.
+		       But we must be careful to put the erepeat
+		       *after* the label.  */
+		    rtx barrier;
+		    for (barrier = PREV_INSN (prev);
+			 barrier && GET_CODE (barrier) == NOTE;
+			 barrier = PREV_INSN (barrier))
+		      ;
+		    if (barrier && GET_CODE (barrier) != BARRIER)
+		      break;
+		  }
+		else
+		  {
+		    /* We don't know who else, within or without our loop, uses this */
+		    if (dump_file)
+		      fprintf (dump_file, "... but there are multiple users, too risky.\n");
+		    break;
+		  }
+
+		/* Generate a label to be used by the erepat insn.  */
+		l = gen_label_rtx ();
+
+		/* Insert the erepeat after INSN's target label.  */
+		x = gen_erepeat (gen_rtx_LABEL_REF (VOIDmode, l));
+		LABEL_NUSES (l)++;
+		emit_insn_after (x, prev);
+
+		/* Insert the erepeat label.  */
+		newlast = (mep_insert_repeat_label_last
+			   (insn, l, !simplejump_p (insn), false));
+		if (simplejump_p (insn))
+		  {
+		    emit_insn_before (gen_erepeat_end (), insn);
+		    delete_insn (insn);
+		  }
+		else
+		  {
+		    mep_invert_branch (insn, newlast);
+		    emit_insn_after (gen_erepeat_end (), newlast);
+		  }
+		break;
+	      }
+
+	    if (LABEL_P (prev))
+	      {
+		/* A label is OK if there is exactly one user, and we
+		   can find that user before the next label.  */
+		rtx user = 0;
+		int safe = 0;
+		if (LABEL_NUSES (prev) == 1)
+		  {
+		    for (user = PREV_INSN (prev);
+			 user && (INSN_P (user) || GET_CODE (user) == NOTE);
+			 user = PREV_INSN (user))
+		      if (GET_CODE (user) == JUMP_INSN
+			  && JUMP_LABEL (user) == prev)
+			{
+			  safe = INSN_UID (user);
+			  break;
+			}
+		  }
+		if (!safe)
+		  break;
+		if (dump_file)
+		  fprintf (dump_file, "... ignoring jump from insn %d to %d\n",
+			   safe, INSN_UID (prev));
+	      }
+
+	    if (INSN_P (prev))
+	      {
+		count ++;
+	      }
+	  }
+      }
+  if (dump_file)
+    fprintf (dump_file, "\n==============================\n");
+}
+  
+/* Replace a jump to a return, with a copy of the return.  GCC doesn't
+   always do this on its own.  */
+
+static void
+mep_jmp_return_reorg (rtx insns)
+{
+  rtx insn, label, ret;
+  int ret_code;
+
+  for (insn = insns; insn; insn = NEXT_INSN (insn))
+    if (simplejump_p (insn))
+    {
+      /* Find the fist real insn the jump jumps to.  */
+      label = ret = JUMP_LABEL (insn);
+      while (ret
+	     && (GET_CODE (ret) == NOTE
+		 || GET_CODE (ret) == CODE_LABEL
+		 || GET_CODE (PATTERN (ret)) == USE))
+	ret = NEXT_INSN (ret);
+
+      if (ret)
+	{
+	  /* Is it a return?  */
+	  ret_code = recog_memoized (ret);
+	  if (ret_code == CODE_FOR_return_internal
+	      || ret_code == CODE_FOR_eh_return_internal)
+	    {
+	      /* It is.  Replace the jump with a return.  */
+	      LABEL_NUSES (label) --;
+	      if (LABEL_NUSES (label) == 0)
+		delete_insn (label);
+	      PATTERN (insn) = copy_rtx (PATTERN (ret));
+	      INSN_CODE (insn) = -1;
+	    }
+	}
+    }
+}
+
+
+static void
+mep_reorg_addcombine (rtx insns)
+{
+  rtx i, n;
+
+  for (i = insns; i; i = NEXT_INSN (i))
+    if (INSN_P (i)
+	&& INSN_CODE (i) == CODE_FOR_addsi3
+	&& GET_CODE (SET_DEST (PATTERN (i))) == REG
+	&& GET_CODE (XEXP (SET_SRC (PATTERN (i)), 0)) == REG
+	&& REGNO (SET_DEST (PATTERN (i))) == REGNO (XEXP (SET_SRC (PATTERN (i)), 0))
+	&& GET_CODE (XEXP (SET_SRC (PATTERN (i)), 1)) == CONST_INT)
+      {
+	n = NEXT_INSN (i);
+	if (INSN_P (n)
+	    && INSN_CODE (n) == CODE_FOR_addsi3
+	    && GET_CODE (SET_DEST (PATTERN (n))) == REG
+	    && GET_CODE (XEXP (SET_SRC (PATTERN (n)), 0)) == REG
+	    && REGNO (SET_DEST (PATTERN (n))) == REGNO (XEXP (SET_SRC (PATTERN (n)), 0))
+	    && GET_CODE (XEXP (SET_SRC (PATTERN (n)), 1)) == CONST_INT)
+	  {
+	    int ic = INTVAL (XEXP (SET_SRC (PATTERN (i)), 1));
+	    int nc = INTVAL (XEXP (SET_SRC (PATTERN (n)), 1));
+	    if (REGNO (SET_DEST (PATTERN (i))) == REGNO (SET_DEST (PATTERN (n)))
+		&& ic + nc < 32767
+		&& ic + nc > -32768)
+	      {
+		XEXP (SET_SRC (PATTERN (i)), 1) = GEN_INT (ic + nc);
+		NEXT_INSN (i) = NEXT_INSN (n);
+		if (NEXT_INSN (i))
+		  PREV_INSN (NEXT_INSN (i)) = i;
+	      }
+	  }
+      }
+}
+
+/* If this insn adjusts the stack, return the adjustment, else return
+   zero.  */
+static int
+add_sp_insn_p (rtx insn)
+{
+  rtx pat;
+
+  if (! single_set (insn))
+    return 0;
+  pat = PATTERN (insn);
+  if (GET_CODE (SET_DEST (pat)) != REG)
+    return 0;
+  if (REGNO (SET_DEST (pat)) != SP_REGNO)
+    return 0;
+  if (GET_CODE (SET_SRC (pat)) != PLUS)
+    return 0;
+  if (GET_CODE (XEXP (SET_SRC (pat), 0)) != REG)
+    return 0;
+  if (REGNO (XEXP (SET_SRC (pat), 0)) != SP_REGNO)
+    return 0;
+  if (GET_CODE (XEXP (SET_SRC (pat), 1)) != CONST_INT)
+    return 0;
+  return INTVAL (XEXP (SET_SRC (pat), 1));
+}
+
+/* Check for trivial functions that set up an unneeded stack
+   frame.  */
+static void
+mep_reorg_noframe (rtx insns)
+{
+  rtx start_frame_insn;
+  rtx end_frame_insn = 0;
+  int sp_adjust, sp2;
+  rtx sp;
+
+  /* The first insn should be $sp = $sp + N */
+  while (insns && ! INSN_P (insns))
+    insns = NEXT_INSN (insns);
+  if (!insns)
+    return;
+
+  sp_adjust = add_sp_insn_p (insns);
+  if (sp_adjust == 0)
+    return;
+
+  start_frame_insn = insns;
+  sp = SET_DEST (PATTERN (start_frame_insn));
+
+  insns = next_real_insn (insns);
+
+  while (insns)
+    {
+      rtx next = next_real_insn (insns);
+      if (!next)
+	break;
+
+      sp2 = add_sp_insn_p (insns);
+      if (sp2)
+	{
+	  if (end_frame_insn)
+	    return;
+	  end_frame_insn = insns;
+	  if (sp2 != -sp_adjust)
+	    return;
+	}
+      else if (mep_mentioned_p (insns, sp, 0))
+	return;
+      else if (CALL_P (insns))
+	return;
+
+      insns = next;
+    }
+
+  if (end_frame_insn)
+    {
+      delete_insn (start_frame_insn);
+      delete_insn (end_frame_insn);
+    }
+}
+
+static void
+mep_reorg (void)
+{
+  rtx insns = get_insns ();
+
+  /* We require accurate REG_DEAD notes.  */
+  compute_bb_for_insn ();
+  df_note_add_problem ();
+  df_analyze ();
+
+  mep_reorg_addcombine (insns);
+#if EXPERIMENTAL_REGMOVE_REORG
+  /* VLIW packing has been done already, so we can't just delete things.  */
+  if (!mep_vliw_function_p (cfun->decl))
+    mep_reorg_regmove (insns);
+#endif
+  mep_jmp_return_reorg (insns);
+  mep_bundle_insns (insns);
+  mep_reorg_repeat (insns);
+  if (optimize
+      && !profile_flag
+      && !profile_arc_flag
+      && TARGET_OPT_REPEAT
+      && (!mep_interrupt_p () || mep_interrupt_saved_reg (RPB_REGNO)))
+    mep_reorg_erepeat (insns);
+
+  /* This may delete *insns so make sure it's last.  */
+  mep_reorg_noframe (insns);
+
+  df_finish_pass (false);
+}
+
+
+
+/*----------------------------------------------------------------------*/
+/* Builtins								*/
+/*----------------------------------------------------------------------*/
+
+/* Element X gives the index into cgen_insns[] of the most general
+   implementation of intrinsic X.  Unimplemented intrinsics are
+   mapped to -1.  */
+int mep_intrinsic_insn[ARRAY_SIZE (cgen_intrinsics)];
+
+/* Element X gives the index of another instruction that is mapped to
+   the same intrinsic as cgen_insns[X].  It is -1 when there is no other
+   instruction.
+
+   Things are set up so that mep_intrinsic_chain[X] < X.  */
+static int mep_intrinsic_chain[ARRAY_SIZE (cgen_insns)];
+
+/* The bitmask for the current ISA.  The ISA masks are declared
+   in mep-intrin.h.  */
+unsigned int mep_selected_isa;
+
+struct mep_config {
+  const char *config_name;
+  unsigned int isa;
+};
+
+static struct mep_config mep_configs[] = {
+#ifdef COPROC_SELECTION_TABLE
+  COPROC_SELECTION_TABLE,
+#endif
+  { 0, 0 }
+};
+
+/* Initialize the global intrinsics variables above.  */
+
+static void
+mep_init_intrinsics (void)
+{
+  size_t i;
+
+  /* Set MEP_SELECTED_ISA to the ISA flag for this configuration.  */
+  mep_selected_isa = mep_configs[0].isa;
+  if (mep_config_string != 0)
+    for (i = 0; mep_configs[i].config_name; i++)
+      if (strcmp (mep_config_string, mep_configs[i].config_name) == 0)
+	{
+	  mep_selected_isa = mep_configs[i].isa;
+	  break;
+	}
+
+  /* Assume all intrinsics are unavailable.  */
+  for (i = 0; i < ARRAY_SIZE (mep_intrinsic_insn); i++)
+    mep_intrinsic_insn[i] = -1;
+
+  /* Build up the global intrinsic tables.  */
+  for (i = 0; i < ARRAY_SIZE (cgen_insns); i++)
+    if ((cgen_insns[i].isas & mep_selected_isa) != 0)
+      {
+	mep_intrinsic_chain[i] = mep_intrinsic_insn[cgen_insns[i].intrinsic];
+	mep_intrinsic_insn[cgen_insns[i].intrinsic] = i;
+      }
+  /* See whether we can directly move values between one coprocessor
+     register and another.  */
+  for (i = 0; i < ARRAY_SIZE (mep_cmov_insns); i++)
+    if (MEP_INTRINSIC_AVAILABLE_P (mep_cmov_insns[i]))
+      mep_have_copro_copro_moves_p = true;
+
+  /* See whether we can directly move values between core and
+     coprocessor registers.  */
+  mep_have_core_copro_moves_p = (MEP_INTRINSIC_AVAILABLE_P (mep_cmov1)
+                                 && MEP_INTRINSIC_AVAILABLE_P (mep_cmov2));
+
+  mep_have_core_copro_moves_p = 1;
+}
+
+/* Declare all available intrinsic functions.  Called once only.  */
+
+static tree cp_data_bus_int_type_node;
+static tree opaque_vector_type_node;
+static tree v8qi_type_node;
+static tree v4hi_type_node;
+static tree v2si_type_node;
+static tree v8uqi_type_node;
+static tree v4uhi_type_node;
+static tree v2usi_type_node;
+
+static tree
+mep_cgen_regnum_to_type (enum cgen_regnum_operand_type cr)
+{
+  switch (cr)
+    {
+    case cgen_regnum_operand_type_POINTER:	return ptr_type_node;
+    case cgen_regnum_operand_type_LONG:		return long_integer_type_node;
+    case cgen_regnum_operand_type_ULONG:	return long_unsigned_type_node;
+    case cgen_regnum_operand_type_SHORT:	return short_integer_type_node;
+    case cgen_regnum_operand_type_USHORT:	return short_unsigned_type_node;
+    case cgen_regnum_operand_type_CHAR:		return char_type_node;
+    case cgen_regnum_operand_type_UCHAR:	return unsigned_char_type_node;
+    case cgen_regnum_operand_type_SI:		return intSI_type_node;
+    case cgen_regnum_operand_type_DI:		return intDI_type_node;
+    case cgen_regnum_operand_type_VECTOR:	return opaque_vector_type_node;
+    case cgen_regnum_operand_type_V8QI:		return v8qi_type_node;
+    case cgen_regnum_operand_type_V4HI:		return v4hi_type_node;
+    case cgen_regnum_operand_type_V2SI:		return v2si_type_node;
+    case cgen_regnum_operand_type_V8UQI:	return v8uqi_type_node;
+    case cgen_regnum_operand_type_V4UHI:	return v4uhi_type_node;
+    case cgen_regnum_operand_type_V2USI:	return v2usi_type_node;
+    case cgen_regnum_operand_type_CP_DATA_BUS_INT: return cp_data_bus_int_type_node;
+    default:
+      return void_type_node;
+    }
+}
+
+static void
+mep_init_builtins (void)
+{
+  size_t i;
+
+  if (TARGET_64BIT_CR_REGS)
+    cp_data_bus_int_type_node = long_long_integer_type_node;
+  else
+    cp_data_bus_int_type_node = long_integer_type_node;
+
+  opaque_vector_type_node = build_opaque_vector_type (intQI_type_node, 8);
+  v8qi_type_node = build_vector_type (intQI_type_node, 8);
+  v4hi_type_node = build_vector_type (intHI_type_node, 4);
+  v2si_type_node = build_vector_type (intSI_type_node, 2);
+  v8uqi_type_node = build_vector_type (unsigned_intQI_type_node, 8);
+  v4uhi_type_node = build_vector_type (unsigned_intHI_type_node, 4);
+  v2usi_type_node = build_vector_type (unsigned_intSI_type_node, 2);
+
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_data_bus_int"),
+		 cp_data_bus_int_type_node));
+
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_vector"),
+		 opaque_vector_type_node));
+
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_v8qi"),
+		 v8qi_type_node));
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_v4hi"),
+		 v4hi_type_node));
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_v2si"),
+		 v2si_type_node));
+
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_v8uqi"),
+		 v8uqi_type_node));
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_v4uhi"),
+		 v4uhi_type_node));
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("cp_v2usi"),
+		 v2usi_type_node));
+
+  /* Intrinsics like mep_cadd3 are implemented with two groups of
+     instructions, one which uses UNSPECs and one which uses a specific
+     rtl code such as PLUS.  Instructions in the latter group belong
+     to GROUP_KNOWN_CODE.
+
+     In such cases, the intrinsic will have two entries in the global
+     tables above.  The unspec form is accessed using builtin functions
+     while the specific form is accessed using the mep_* enum in
+     mep-intrin.h.
+
+     The idea is that __cop arithmetic and builtin functions have
+     different optimization requirements.  If mep_cadd3() appears in
+     the source code, the user will surely except gcc to use cadd3
+     rather than a work-alike such as add3.  However, if the user
+     just writes "a + b", where a or b are __cop variables, it is
+     reasonable for gcc to choose a core instruction rather than
+     cadd3 if it believes that is more optimal.  */
+  for (i = 0; i < ARRAY_SIZE (cgen_insns); i++)
+    if ((cgen_insns[i].groups & GROUP_KNOWN_CODE) == 0
+	&& mep_intrinsic_insn[cgen_insns[i].intrinsic] >= 0)
+      {
+	tree ret_type = void_type_node;
+	tree bi_type;
+
+	if (i > 0 && cgen_insns[i].intrinsic == cgen_insns[i-1].intrinsic)
+	  continue;
+
+	if (cgen_insns[i].cret_p)
+	  ret_type = mep_cgen_regnum_to_type (cgen_insns[i].regnums[0].type);
+
+	bi_type = build_function_type (ret_type, 0);
+	add_builtin_function (cgen_intrinsics[cgen_insns[i].intrinsic],
+			      bi_type,
+			      cgen_insns[i].intrinsic, BUILT_IN_MD, NULL, NULL);
+      }
+}
+
+/* Report the unavailablity of the given intrinsic.  */
+
+#if 1
+static void
+mep_intrinsic_unavailable (int intrinsic)
+{
+  static int already_reported_p[ARRAY_SIZE (cgen_intrinsics)];
+
+  if (already_reported_p[intrinsic])
+    return;
+
+  if (mep_intrinsic_insn[intrinsic] < 0)
+    error ("coprocessor intrinsic %qs is not available in this configuration",
+	   cgen_intrinsics[intrinsic]);
+  else if (CGEN_CURRENT_GROUP == GROUP_VLIW)
+    error ("%qs is not available in VLIW functions",
+	   cgen_intrinsics[intrinsic]);
+  else
+    error ("%qs is not available in non-VLIW functions",
+	   cgen_intrinsics[intrinsic]);
+
+  already_reported_p[intrinsic] = 1;
+}
+#endif
+
+
+/* See if any implementation of INTRINSIC is available to the
+   current function.  If so, store the most general implementation
+   in *INSN_PTR and return true.  Return false otherwise.  */
+
+static bool
+mep_get_intrinsic_insn (int intrinsic ATTRIBUTE_UNUSED, const struct cgen_insn **insn_ptr ATTRIBUTE_UNUSED)
+{
+  int i;
+
+  i = mep_intrinsic_insn[intrinsic];
+  while (i >= 0 && !CGEN_ENABLE_INSN_P (i))
+    i = mep_intrinsic_chain[i];
+
+  if (i >= 0)
+    {
+      *insn_ptr = &cgen_insns[i];
+      return true;
+    }
+  return false;
+}
+
+
+/* Like mep_get_intrinsic_insn, but with extra handling for moves.
+   If INTRINSIC is mep_cmov, but there is no pure CR <- CR move insn,
+   try using a work-alike instead.  In this case, the returned insn
+   may have three operands rather than two.  */
+
+static bool
+mep_get_move_insn (int intrinsic, const struct cgen_insn **cgen_insn)
+{
+  size_t i;
+
+  if (intrinsic == mep_cmov)
+    {
+      for (i = 0; i < ARRAY_SIZE (mep_cmov_insns); i++)
+	if (mep_get_intrinsic_insn (mep_cmov_insns[i], cgen_insn))
+	  return true;
+      return false;
+    }
+  return mep_get_intrinsic_insn (intrinsic, cgen_insn);
+}
+
+
+/* If ARG is a register operand that is the same size as MODE, convert it
+   to MODE using a subreg.  Otherwise return ARG as-is.  */
+
+static rtx
+mep_convert_arg (enum machine_mode mode, rtx arg)
+{
+  if (GET_MODE (arg) != mode
+      && register_operand (arg, VOIDmode)
+      && GET_MODE_SIZE (GET_MODE (arg)) == GET_MODE_SIZE (mode))
+    return simplify_gen_subreg (mode, arg, GET_MODE (arg), 0);
+  return arg;
+}
+
+
+/* Apply regnum conversions to ARG using the description given by REGNUM.
+   Return the new argument on success and null on failure.  */
+
+static rtx
+mep_convert_regnum (const struct cgen_regnum_operand *regnum, rtx arg)
+{
+  if (regnum->count == 0)
+    return arg;
+
+  if (GET_CODE (arg) != CONST_INT
+      || INTVAL (arg) < 0
+      || INTVAL (arg) >= regnum->count)
+    return 0;
+
+  return gen_rtx_REG (SImode, INTVAL (arg) + regnum->base);
+}
+
+
+/* Try to make intrinsic argument ARG match the given operand.
+   UNSIGNED_P is true if the argument has an unsigned type.  */
+
+static rtx
+mep_legitimize_arg (const struct insn_operand_data *operand, rtx arg,
+		    int unsigned_p)
+{
+  if (GET_CODE (arg) == CONST_INT)
+    {
+      /* CONST_INTs can only be bound to integer operands.  */
+      if (GET_MODE_CLASS (operand->mode) != MODE_INT)
+	return 0;
+    }
+  else if (GET_CODE (arg) == CONST_DOUBLE)
+    /* These hold vector constants.  */;
+  else if (GET_MODE_SIZE (GET_MODE (arg)) != GET_MODE_SIZE (operand->mode))
+    {
+      /* If the argument is a different size from what's expected, we must
+	 have a value in the right mode class in order to convert it.  */
+      if (GET_MODE_CLASS (operand->mode) != GET_MODE_CLASS (GET_MODE (arg)))
+	return 0;
+
+      /* If the operand is an rvalue, promote or demote it to match the
+	 operand's size.  This might not need extra instructions when
+	 ARG is a register value.  */
+      if (operand->constraint[0] != '=')
+	arg = convert_to_mode (operand->mode, arg, unsigned_p);
+    }
+
+  /* If the operand is an lvalue, bind the operand to a new register.
+     The caller will copy this value into ARG after the main
+     instruction.  By doing this always, we produce slightly more
+     optimal code.  */
+  /* But not for control registers.  */
+  if (operand->constraint[0] == '='
+      && (! REG_P (arg)
+	  || ! (CONTROL_REGNO_P (REGNO (arg))
+		|| CCR_REGNO_P (REGNO (arg))
+		|| CR_REGNO_P (REGNO (arg)))
+	  ))
+    return gen_reg_rtx (operand->mode);
+
+  /* Try simple mode punning.  */
+  arg = mep_convert_arg (operand->mode, arg);
+  if (operand->predicate (arg, operand->mode))
+    return arg;
+
+  /* See if forcing the argument into a register will make it match.  */
+  if (GET_CODE (arg) == CONST_INT || GET_CODE (arg) == CONST_DOUBLE)
+    arg = force_reg (operand->mode, arg);
+  else
+    arg = mep_convert_arg (operand->mode, force_reg (GET_MODE (arg), arg));
+  if (operand->predicate (arg, operand->mode))
+    return arg;
+
+  return 0;
+}
+
+
+/* Report that ARG cannot be passed to argument ARGNUM of intrinsic
+   function FNNAME.  OPERAND describes the operand to which ARGNUM
+   is mapped.  */
+
+static void
+mep_incompatible_arg (const struct insn_operand_data *operand, rtx arg,
+		      int argnum, tree fnname)
+{
+  size_t i;
+
+  if (GET_CODE (arg) == CONST_INT)
+    for (i = 0; i < ARRAY_SIZE (cgen_immediate_predicates); i++)
+      if (operand->predicate == cgen_immediate_predicates[i].predicate)
+	{
+	  const struct cgen_immediate_predicate *predicate;
+	  HOST_WIDE_INT argval;
+
+	  predicate = &cgen_immediate_predicates[i];
+	  argval = INTVAL (arg);
+	  if (argval < predicate->lower || argval >= predicate->upper)
+	    error ("argument %d of %qE must be in the range %d...%d",
+		   argnum, fnname, predicate->lower, predicate->upper - 1);
+	  else
+	    error ("argument %d of %qE must be a multiple of %d",
+		   argnum, fnname, predicate->align);
+	  return;
+	}
+
+  error ("incompatible type for argument %d of %qE", argnum, fnname);
+}
+
+static rtx
+mep_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  rtx pat, op[10], arg[10];
+  unsigned int a;
+  int opindex, unsigned_p[10];
+  tree fndecl, args;
+  unsigned int n_args;
+  tree fnname;
+  const struct cgen_insn *cgen_insn;
+  const struct insn_data_d *idata;
+  unsigned int first_arg = 0;
+  unsigned int builtin_n_args;
+
+  fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  fnname = DECL_NAME (fndecl);
+
+  /* Find out which instruction we should emit.  Note that some coprocessor
+     intrinsics may only be available in VLIW mode, or only in normal mode.  */
+  if (!mep_get_intrinsic_insn (DECL_FUNCTION_CODE (fndecl), &cgen_insn))
+    {
+      mep_intrinsic_unavailable (DECL_FUNCTION_CODE (fndecl));
+      return NULL_RTX;
+    }
+  idata = &insn_data[cgen_insn->icode];
+
+  builtin_n_args = cgen_insn->num_args;
+
+  if (cgen_insn->cret_p)
+    {
+      if (cgen_insn->cret_p > 1)
+	builtin_n_args ++;
+      first_arg = 1;
+      mep_cgen_regnum_to_type (cgen_insn->regnums[0].type);
+      builtin_n_args --;
+    }
+
+  /* Evaluate each argument.  */
+  n_args = call_expr_nargs (exp);
+
+  if (n_args < builtin_n_args)
+    {
+      error ("too few arguments to %qE", fnname);
+      return NULL_RTX;
+    }
+  if (n_args > builtin_n_args)
+    {
+      error ("too many arguments to %qE", fnname);
+      return NULL_RTX;
+    }
+
+  for (a = first_arg; a < builtin_n_args + first_arg; a++)
+    {
+      tree value;
+
+      args = CALL_EXPR_ARG (exp, a - first_arg);
+
+      value = args;
+
+#if 0
+      if (cgen_insn->regnums[a].reference_p)
+	{
+	  if (TREE_CODE (value) != ADDR_EXPR)
+	    {
+	      debug_tree(value);
+	      error ("argument %d of %qE must be an address", a+1, fnname);
+	      return NULL_RTX;
+	    }
+	  value = TREE_OPERAND (value, 0);
+	}
+#endif
+
+      /* If the argument has been promoted to int, get the unpromoted
+	 value.  This is necessary when sub-int memory values are bound
+	 to reference parameters.  */
+      if (TREE_CODE (value) == NOP_EXPR
+	  && TREE_TYPE (value) == integer_type_node
+	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (value, 0)))
+	  && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (value, 0)))
+	      < TYPE_PRECISION (TREE_TYPE (value))))
+	value = TREE_OPERAND (value, 0);
+
+      /* If the argument has been promoted to double, get the unpromoted
+	 SFmode value.  This is necessary for FMAX support, for example.  */
+      if (TREE_CODE (value) == NOP_EXPR
+	  && SCALAR_FLOAT_TYPE_P (TREE_TYPE (value))
+	  && SCALAR_FLOAT_TYPE_P (TREE_TYPE (TREE_OPERAND (value, 0)))
+	  && TYPE_MODE (TREE_TYPE (value)) == DFmode
+	  && TYPE_MODE (TREE_TYPE (TREE_OPERAND (value, 0))) == SFmode)
+	value = TREE_OPERAND (value, 0);
+
+      unsigned_p[a] = TYPE_UNSIGNED (TREE_TYPE (value));
+      arg[a] = expand_expr (value, NULL, VOIDmode, EXPAND_NORMAL);
+      arg[a] = mep_convert_regnum (&cgen_insn->regnums[a], arg[a]);
+      if (cgen_insn->regnums[a].reference_p)
+	{
+	  tree pointed_to = TREE_TYPE (TREE_TYPE (value));
+	  enum machine_mode pointed_mode = TYPE_MODE (pointed_to);
+
+	  arg[a] = gen_rtx_MEM (pointed_mode, arg[a]);
+	}
+      if (arg[a] == 0)
+	{
+	  error ("argument %d of %qE must be in the range %d...%d",
+		 a + 1, fnname, 0, cgen_insn->regnums[a].count - 1);
+	  return NULL_RTX;
+	}
+    }
+
+  for (a = 0; a < first_arg; a++)
+    {
+      if (a == 0 && target && GET_MODE (target) == idata->operand[0].mode)
+	arg[a] = target;
+      else
+	arg[a] = gen_reg_rtx (idata->operand[0].mode);
+    }
+
+  /* Convert the arguments into a form suitable for the intrinsic.
+     Report an error if this isn't possible.  */
+  for (opindex = 0; opindex < idata->n_operands; opindex++)
+    {
+      a = cgen_insn->op_mapping[opindex];
+      op[opindex] = mep_legitimize_arg (&idata->operand[opindex],
+					arg[a], unsigned_p[a]);
+      if (op[opindex] == 0)
+	{
+	  mep_incompatible_arg (&idata->operand[opindex],
+				arg[a], a + 1 - first_arg, fnname);
+	  return NULL_RTX;
+	}
+    }
+
+  /* Emit the instruction.  */
+  pat = idata->genfun (op[0], op[1], op[2], op[3], op[4],
+		       op[5], op[6], op[7], op[8], op[9]);
+
+  if (GET_CODE (pat) == SET
+      && GET_CODE (SET_DEST (pat)) == PC
+      && GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
+    emit_jump_insn (pat);
+  else
+    emit_insn (pat);
+
+  /* Copy lvalues back to their final locations.  */
+  for (opindex = 0; opindex < idata->n_operands; opindex++)
+    if (idata->operand[opindex].constraint[0] == '=')
+      {
+	a = cgen_insn->op_mapping[opindex];
+	if (a >= first_arg)
+	  {
+	    if (GET_MODE_CLASS (GET_MODE (arg[a]))
+		!= GET_MODE_CLASS (GET_MODE (op[opindex])))
+	      emit_move_insn (arg[a], gen_lowpart (GET_MODE (arg[a]),
+						   op[opindex]));
+	    else
+	      {
+		/* First convert the operand to the right mode, then copy it
+		   into the destination.  Doing the conversion as a separate
+		   step (rather than using convert_move) means that we can
+		   avoid creating no-op moves when ARG[A] and OP[OPINDEX]
+		   refer to the same register.  */
+		op[opindex] = convert_to_mode (GET_MODE (arg[a]),
+					       op[opindex], unsigned_p[a]);
+		if (!rtx_equal_p (arg[a], op[opindex]))
+		  emit_move_insn (arg[a], op[opindex]);
+	      }
+	  }
+      }
+
+  if (first_arg > 0 && target && target != op[0])
+    {
+      emit_move_insn (target, op[0]);
+    }
+
+  return target;
+}
+
+static bool
+mep_vector_mode_supported_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* A subroutine of global_reg_mentioned_p, returns 1 if *LOC mentions
+   a global register.  */
+
+static int
+global_reg_mentioned_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  int regno;
+  rtx x = *loc;
+
+  if (! x)
+    return 0;
+
+  switch (GET_CODE (x))
+    {
+    case SUBREG:
+      if (REG_P (SUBREG_REG (x)))
+	{
+	  if (REGNO (SUBREG_REG (x)) < FIRST_PSEUDO_REGISTER
+	      && global_regs[subreg_regno (x)])
+	    return 1;
+	  return 0;
+	}
+      break;
+
+    case REG:
+      regno = REGNO (x);
+      if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
+	return 1;
+      return 0;
+
+    case SCRATCH:
+    case PC:
+    case CC0:
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case LABEL_REF:
+      return 0;
+
+    case CALL:
+      /* A non-constant call might use a global register.  */
+      return 1;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Returns nonzero if X mentions a global register.  */
+
+static int
+global_reg_mentioned_p (rtx x)
+{
+  if (INSN_P (x))
+    {
+      if (CALL_P (x))
+	{
+	  if (! RTL_CONST_OR_PURE_CALL_P (x))
+	    return 1;
+	  x = CALL_INSN_FUNCTION_USAGE (x);
+	  if (x == 0)
+	    return 0;
+	}
+      else
+	x = PATTERN (x);
+    }
+
+  return for_each_rtx (&x, global_reg_mentioned_p_1, NULL);
+}
+/* Scheduling hooks for VLIW mode.
+
+   Conceptually this is very simple: we have a two-pack architecture
+   that takes one core insn and one coprocessor insn to make up either
+   a 32- or 64-bit instruction word (depending on the option bit set in
+   the chip).  I.e. in VL32 mode, we can pack one 16-bit core insn and
+   one 16-bit cop insn; in VL64 mode we can pack one 16-bit core insn
+   and one 48-bit cop insn or two 32-bit core/cop insns.
+
+   In practice, instruction selection will be a bear.  Consider in
+   VL64 mode the following insns
+
+	add $1, 1
+	cmov $cr0, $0
+
+   these cannot pack, since the add is a 16-bit core insn and cmov
+   is a 32-bit cop insn.  However,
+
+	add3 $1, $1, 1
+	cmov $cr0, $0
+
+   packs just fine.  For good VLIW code generation in VL64 mode, we
+   will have to have 32-bit alternatives for many of the common core
+   insns.  Not implemented.  */
+
+static int
+mep_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  int cost_specified;
+
+  if (REG_NOTE_KIND (link) != 0)
+    {
+      /* See whether INSN and DEP_INSN are intrinsics that set the same
+	 hard register.  If so, it is more important to free up DEP_INSN
+	 than it is to free up INSN.
+
+	 Note that intrinsics like mep_mulr are handled differently from
+	 the equivalent mep.md patterns.  In mep.md, if we don't care
+	 about the value of $lo and $hi, the pattern will just clobber
+	 the registers, not set them.  Since clobbers don't count as
+	 output dependencies, it is often possible to reorder two mulrs,
+	 even after reload.
+
+	 In contrast, mep_mulr() sets both $lo and $hi to specific values,
+	 so any pair of mep_mulr()s will be inter-dependent.   We should
+	 therefore give the first mep_mulr() a higher priority.  */
+      if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
+	  && global_reg_mentioned_p (PATTERN (insn))
+	  && global_reg_mentioned_p (PATTERN (dep_insn)))
+	return 1;
+
+      /* If the dependence is an anti or output dependence, assume it
+	 has no cost.  */
+      return 0;
+    }
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (recog_memoized (dep_insn) < 0)
+    return cost;
+
+  /* The latency attribute doesn't apply to MeP-h1: we use the stall
+     attribute instead.  */
+  if (!TARGET_H1)
+    {
+      cost_specified = get_attr_latency (dep_insn);
+      if (cost_specified != 0)
+	return cost_specified;
+    }
+
+  return cost;
+}
+
+/* ??? We don't properly compute the length of a load/store insn,
+   taking into account the addressing mode.  */
+
+static int
+mep_issue_rate (void)
+{
+  return TARGET_IVC2 ? 3 : 2;
+}
+
+/* Return true if function DECL was declared with the vliw attribute.  */
+
+bool
+mep_vliw_function_p (tree decl)
+{
+  return lookup_attribute ("vliw", TYPE_ATTRIBUTES (TREE_TYPE (decl))) != 0;
+}
+
+static rtx
+mep_find_ready_insn (rtx *ready, int nready, enum attr_slot slot, int length)
+{
+  int i;
+
+  for (i = nready - 1; i >= 0; --i)
+    {
+      rtx insn = ready[i];
+      if (recog_memoized (insn) >= 0
+	  && get_attr_slot (insn) == slot
+	  && get_attr_length (insn) == length)
+	return insn;
+    }
+
+  return NULL_RTX;
+}
+
+static void
+mep_move_ready_insn (rtx *ready, int nready, rtx insn)
+{
+  int i;
+
+  for (i = 0; i < nready; ++i)
+    if (ready[i] == insn)
+      {
+	for (; i < nready - 1; ++i)
+	  ready[i] = ready[i + 1];
+	ready[i] = insn;
+	return;
+      }
+
+  gcc_unreachable ();
+}
+
+static void
+mep_print_sched_insn (FILE *dump, rtx insn)
+{
+  const char *slots = "none";
+  const char *name = NULL;
+  int code;
+  char buf[30];
+
+  if (GET_CODE (PATTERN (insn)) == SET
+      || GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      switch (get_attr_slots (insn))
+	{
+	case SLOTS_CORE: slots = "core"; break;
+	case SLOTS_C3: slots = "c3"; break;
+	case SLOTS_P0: slots = "p0"; break;
+	case SLOTS_P0_P0S: slots = "p0,p0s"; break;
+	case SLOTS_P0_P1: slots = "p0,p1"; break;
+	case SLOTS_P0S: slots = "p0s"; break;
+	case SLOTS_P0S_P1: slots = "p0s,p1"; break;
+	case SLOTS_P1: slots = "p1"; break;
+	default:
+	  sprintf(buf, "%d", get_attr_slots (insn));
+	  slots = buf;
+	  break;
+	}
+    }
+  if (GET_CODE (PATTERN (insn)) == USE)
+    slots = "use";
+
+  code = INSN_CODE (insn);
+  if (code >= 0)
+    name = get_insn_name (code);
+  if (!name)
+    name = "{unknown}";
+
+  fprintf (dump,
+	   "insn %4d %4d  %8s  %s\n",
+	   code,
+	   INSN_UID (insn),
+	   name,
+	   slots);
+}
+
+static int
+mep_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
+		   int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
+		   int *pnready, int clock ATTRIBUTE_UNUSED)
+{
+  int nready = *pnready;
+  rtx core_insn, cop_insn;
+  int i;
+
+  if (dump && sched_verbose > 1)
+    {
+      fprintf (dump, "\nsched_reorder: clock %d nready %d\n", clock, nready);
+      for (i=0; i<nready; i++)
+	mep_print_sched_insn (dump, ready[i]);
+      fprintf (dump, "\n");
+    }
+
+  if (!mep_vliw_function_p (cfun->decl))
+    return 1;
+  if (nready < 2)
+    return 1;
+
+  /* IVC2 uses a DFA to determine what's ready and what's not. */
+  if (TARGET_IVC2)
+    return nready;
+
+  /* We can issue either a core or coprocessor instruction.
+     Look for a matched pair of insns to reorder.  If we don't
+     find any, don't second-guess the scheduler's priorities.  */
+
+  if ((core_insn = mep_find_ready_insn (ready, nready, SLOT_CORE, 2))
+      && (cop_insn = mep_find_ready_insn (ready, nready, SLOT_COP,
+					  TARGET_OPT_VL64 ? 6 : 2)))
+    ;
+  else if (TARGET_OPT_VL64
+	   && (core_insn = mep_find_ready_insn (ready, nready, SLOT_CORE, 4))
+	   && (cop_insn = mep_find_ready_insn (ready, nready, SLOT_COP, 4)))
+    ;
+  else
+    /* We didn't find a pair.  Issue the single insn at the head
+       of the ready list.  */
+    return 1;
+
+  /* Reorder the two insns first.  */
+  mep_move_ready_insn (ready, nready, core_insn);
+  mep_move_ready_insn (ready, nready - 1, cop_insn);
+  return 2;
+}
+
+/* A for_each_rtx callback.  Return true if *X is a register that is
+   set by insn PREV.  */
+
+static int
+mep_store_find_set (rtx *x, void *prev)
+{
+  return REG_P (*x) && reg_set_p (*x, (const_rtx) prev);
+}
+
+/* Like mep_store_bypass_p, but takes a pattern as the second argument,
+   not the containing insn.  */
+
+static bool
+mep_store_data_bypass_1 (rtx prev, rtx pat)
+{
+  /* Cope with intrinsics like swcpa.  */
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (mep_store_data_bypass_p (prev, XVECEXP (pat, 0, i)))
+	  return true;
+
+      return false;
+    }
+
+  /* Check for some sort of store.  */
+  if (GET_CODE (pat) != SET
+      || GET_CODE (SET_DEST (pat)) != MEM)
+    return false;
+
+  /* Intrinsics use patterns of the form (set (mem (scratch)) (unspec ...)).
+     The first operand to the unspec is the store data and the other operands
+     are used to calculate the address.  */
+  if (GET_CODE (SET_SRC (pat)) == UNSPEC)
+    {
+      rtx src;
+      int i;
+
+      src = SET_SRC (pat);
+      for (i = 1; i < XVECLEN (src, 0); i++)
+	if (for_each_rtx (&XVECEXP (src, 0, i), mep_store_find_set, prev))
+	  return false;
+
+      return true;
+    }
+
+  /* Otherwise just check that PREV doesn't modify any register mentioned
+     in the memory destination.  */
+  return !for_each_rtx (&SET_DEST (pat), mep_store_find_set, prev);
+}
+
+/* Return true if INSN is a store instruction and if the store address
+   has no true dependence on PREV.  */
+
+bool
+mep_store_data_bypass_p (rtx prev, rtx insn)
+{
+  return INSN_P (insn) ? mep_store_data_bypass_1 (prev, PATTERN (insn)) : false;
+}
+
+/* A for_each_rtx subroutine of mep_mul_hilo_bypass_p.  Return 1 if *X
+   is a register other than LO or HI and if PREV sets *X.  */
+
+static int
+mep_mul_hilo_bypass_1 (rtx *x, void *prev)
+{
+  return (REG_P (*x)
+	  && REGNO (*x) != LO_REGNO
+	  && REGNO (*x) != HI_REGNO
+	  && reg_set_p (*x, (const_rtx) prev));
+}
+
+/* Return true if, apart from HI/LO, there are no true dependencies
+   between multiplication instructions PREV and INSN.  */
+
+bool
+mep_mul_hilo_bypass_p (rtx prev, rtx insn)
+{
+  rtx pat;
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  return (GET_CODE (pat) == SET
+	  && !for_each_rtx (&SET_SRC (pat), mep_mul_hilo_bypass_1, prev));
+}
+
+/* Return true if INSN is an ldc instruction that issues to the
+   MeP-h1 integer pipeline.  This is true for instructions that
+   read from PSW, LP, SAR, HI and LO.  */
+
+bool
+mep_ipipe_ldc_p (rtx insn)
+{
+  rtx pat, src;
+
+  pat = PATTERN (insn);
+
+  /* Cope with instrinsics that set both a hard register and its shadow.
+     The set of the hard register comes first.  */
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+
+  if (GET_CODE (pat) == SET)
+    {
+      src = SET_SRC (pat);
+
+      /* Cope with intrinsics.  The first operand to the unspec is
+	 the source register.  */
+      if (GET_CODE (src) == UNSPEC || GET_CODE (src) == UNSPEC_VOLATILE)
+	src = XVECEXP (src, 0, 0);
+
+      if (REG_P (src))
+	switch (REGNO (src))
+	  {
+	  case PSW_REGNO:
+	  case LP_REGNO:
+	  case SAR_REGNO:
+	  case HI_REGNO:
+	  case LO_REGNO:
+	    return true;
+	  }
+    }
+  return false;
+}
+
+/* Create a VLIW bundle from core instruction CORE and coprocessor
+   instruction COP.  COP always satisfies INSN_P, but CORE can be
+   either a new pattern or an existing instruction.
+
+   Emit the bundle in place of COP and return it.  */
+
+static rtx
+mep_make_bundle (rtx core, rtx cop)
+{
+  rtx insn;
+
+  /* If CORE is an existing instruction, remove it, otherwise put
+     the new pattern in an INSN harness.  */
+  if (INSN_P (core))
+    remove_insn (core);
+  else
+    core = make_insn_raw (core);
+
+  /* Generate the bundle sequence and replace COP with it.  */
+  insn = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec (2, core, cop));
+  insn = emit_insn_after (insn, cop);
+  remove_insn (cop);
+
+  /* Set up the links of the insns inside the SEQUENCE.  */
+  PREV_INSN (core) = PREV_INSN (insn);
+  NEXT_INSN (core) = cop;
+  PREV_INSN (cop) = core;
+  NEXT_INSN (cop) = NEXT_INSN (insn);
+
+  /* Set the VLIW flag for the coprocessor instruction.  */
+  PUT_MODE (core, VOIDmode);
+  PUT_MODE (cop, BImode);
+
+  /* Derive a location for the bundle.  Individual instructions cannot
+     have their own location because there can be no assembler labels
+     between CORE and COP.  */
+  INSN_LOCATOR (insn) = INSN_LOCATOR (INSN_LOCATOR (core) ? core : cop);
+  INSN_LOCATOR (core) = 0;
+  INSN_LOCATOR (cop) = 0;
+
+  return insn;
+}
+
+/* A helper routine for ms1_insn_dependent_p called through note_stores.  */
+
+static void
+mep_insn_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx * pinsn = (rtx *) data;
+
+  if (*pinsn && reg_mentioned_p (x, *pinsn))
+    *pinsn = NULL_RTX;
+}
+
+/* Return true if anything in insn X is (anti,output,true) dependent on
+   anything in insn Y.  */
+
+static int
+mep_insn_dependent_p (rtx x, rtx y)
+{
+  rtx tmp;
+
+  gcc_assert (INSN_P (x));
+  gcc_assert (INSN_P (y));
+
+  tmp = PATTERN (y);
+  note_stores (PATTERN (x), mep_insn_dependent_p_1, &tmp);
+  if (tmp == NULL_RTX)
+    return 1;
+
+  tmp = PATTERN (x);
+  note_stores (PATTERN (y), mep_insn_dependent_p_1, &tmp);
+  if (tmp == NULL_RTX)
+    return 1;
+
+  return 0;
+}
+
+static int
+core_insn_p (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == USE)
+    return 0;
+  if (get_attr_slot (insn) == SLOT_CORE)
+    return 1;
+  return 0;
+}
+
+/* Mark coprocessor instructions that can be bundled together with
+   the immediately preceeding core instruction.  This is later used
+   to emit the "+" that tells the assembler to create a VLIW insn.
+
+   For unbundled insns, the assembler will automatically add coprocessor
+   nops, and 16-bit core nops.  Due to an apparent oversight in the
+   spec, the assembler will _not_ automatically add 32-bit core nops,
+   so we have to emit those here.
+
+   Called from mep_insn_reorg.  */
+
+static void
+mep_bundle_insns (rtx insns)
+{
+  rtx insn, last = NULL_RTX, first = NULL_RTX;
+  int saw_scheduling = 0;
+
+  /* Only do bundling if we're in vliw mode.  */
+  if (!mep_vliw_function_p (cfun->decl))
+    return;
+
+  /* The first insn in a bundle are TImode, the remainder are
+     VOIDmode.  After this function, the first has VOIDmode and the
+     rest have BImode.  */
+
+  /* Note: this doesn't appear to be true for JUMP_INSNs.  */
+
+  /* First, move any NOTEs that are within a bundle, to the beginning
+     of the bundle.  */
+  for (insn = insns; insn ; insn = NEXT_INSN (insn))
+    {
+      if (NOTE_P (insn) && first)
+	/* Don't clear FIRST.  */;
+
+      else if (NONJUMP_INSN_P (insn) && GET_MODE (insn) == TImode)
+	first = insn;
+
+      else if (NONJUMP_INSN_P (insn) && GET_MODE (insn) == VOIDmode && first)
+	{
+	  rtx note, prev;
+
+	  /* INSN is part of a bundle; FIRST is the first insn in that
+	     bundle.  Move all intervening notes out of the bundle.
+	     In addition, since the debug pass may insert a label
+	     whenever the current line changes, set the location info
+	     for INSN to match FIRST.  */
+
+	  INSN_LOCATOR (insn) = INSN_LOCATOR (first);
+
+	  note = PREV_INSN (insn);
+	  while (note && note != first)
+	    {
+	      prev = PREV_INSN (note);
+
+	      if (NOTE_P (note))
+		{
+		  /* Remove NOTE from here... */
+		  PREV_INSN (NEXT_INSN (note)) = PREV_INSN (note);
+		  NEXT_INSN (PREV_INSN (note)) = NEXT_INSN (note);
+		  /* ...and put it in here.  */
+		  NEXT_INSN (note) = first;
+		  PREV_INSN (note) = PREV_INSN (first);
+		  NEXT_INSN (PREV_INSN (note)) = note;
+		  PREV_INSN (NEXT_INSN (note)) = note;
+		}
+
+	      note = prev;
+	    }
+	}
+
+      else if (!NONJUMP_INSN_P (insn))
+	first = 0;
+    }
+
+  /* Now fix up the bundles.  */
+  for (insn = insns; insn ; insn = NEXT_INSN (insn))
+    {
+      if (NOTE_P (insn))
+	continue;
+
+      if (!NONJUMP_INSN_P (insn))
+	{
+	  last = 0;
+	  continue;
+	}
+
+      /* If we're not optimizing enough, there won't be scheduling
+	 info.  We detect that here.  */
+      if (GET_MODE (insn) == TImode)
+	saw_scheduling = 1;
+      if (!saw_scheduling)
+	continue;
+
+      if (TARGET_IVC2)
+	{
+	  rtx core_insn = NULL_RTX;
+
+	  /* IVC2 slots are scheduled by DFA, so we just accept
+	     whatever the scheduler gives us.  However, we must make
+	     sure the core insn (if any) is the first in the bundle.
+	     The IVC2 assembler can insert whatever NOPs are needed,
+	     and allows a COP insn to be first.  */
+
+	  if (NONJUMP_INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) != USE
+	      && GET_MODE (insn) == TImode)
+	    {
+	      for (last = insn;
+		   NEXT_INSN (last)
+		     && GET_MODE (NEXT_INSN (last)) == VOIDmode
+		     && NONJUMP_INSN_P (NEXT_INSN (last));
+		   last = NEXT_INSN (last))
+		{
+		  if (core_insn_p (last))
+		    core_insn = last;
+		}
+	      if (core_insn_p (last))
+		core_insn = last;
+
+	      if (core_insn && core_insn != insn)
+		{
+		  /* Swap core insn to first in the bundle.  */
+
+		  /* Remove core insn.  */
+		  if (PREV_INSN (core_insn))
+		    NEXT_INSN (PREV_INSN (core_insn)) = NEXT_INSN (core_insn);
+		  if (NEXT_INSN (core_insn))
+		    PREV_INSN (NEXT_INSN (core_insn)) = PREV_INSN (core_insn);
+
+		  /* Re-insert core insn.  */
+		  PREV_INSN (core_insn) = PREV_INSN (insn);
+		  NEXT_INSN (core_insn) = insn;
+
+		  if (PREV_INSN (core_insn))
+		    NEXT_INSN (PREV_INSN (core_insn)) = core_insn;
+		  PREV_INSN (insn) = core_insn;
+
+		  PUT_MODE (core_insn, TImode);
+		  PUT_MODE (insn, VOIDmode);
+		}
+	    }
+
+	  /* The first insn has TImode, the rest have VOIDmode */
+	  if (GET_MODE (insn) == TImode)
+	    PUT_MODE (insn, VOIDmode);
+	  else
+	    PUT_MODE (insn, BImode);
+	  continue;
+	}
+
+      PUT_MODE (insn, VOIDmode);
+      if (recog_memoized (insn) >= 0
+	  && get_attr_slot (insn) == SLOT_COP)
+	{
+	  if (GET_CODE (insn) == JUMP_INSN
+	      || ! last
+	      || recog_memoized (last) < 0
+	      || get_attr_slot (last) != SLOT_CORE
+	      || (get_attr_length (insn)
+		  != (TARGET_OPT_VL64 ? 8 : 4) - get_attr_length (last))
+	      || mep_insn_dependent_p (insn, last))
+	    {
+	      switch (get_attr_length (insn))
+		{
+		case 8:
+		  break;
+		case 6:
+		  insn = mep_make_bundle (gen_nop (), insn);
+		  break;
+		case 4:
+		  if (TARGET_OPT_VL64)
+		    insn = mep_make_bundle (gen_nop32 (), insn);
+		  break;
+		case 2:
+		  if (TARGET_OPT_VL64)
+		    error ("2 byte cop instructions are"
+			   " not allowed in 64-bit VLIW mode");
+		  else
+		    insn = mep_make_bundle (gen_nop (), insn);
+		  break;
+		default:
+		  error ("unexpected %d byte cop instruction",
+			 get_attr_length (insn));
+		  break;
+		}
+	    }
+	  else
+	    insn = mep_make_bundle (last, insn);
+	}
+
+      last = insn;
+    }
+}
+
+
+/* Try to instantiate INTRINSIC with the operands given in OPERANDS.
+   Return true on success.  This function can fail if the intrinsic
+   is unavailable or if the operands don't satisfy their predicates.  */
+
+bool
+mep_emit_intrinsic (int intrinsic, const rtx *operands)
+{
+  const struct cgen_insn *cgen_insn;
+  const struct insn_data_d *idata;
+  rtx newop[10];
+  int i;
+
+  if (!mep_get_intrinsic_insn (intrinsic, &cgen_insn))
+    return false;
+
+  idata = &insn_data[cgen_insn->icode];
+  for (i = 0; i < idata->n_operands; i++)
+    {
+      newop[i] = mep_convert_arg (idata->operand[i].mode, operands[i]);
+      if (!idata->operand[i].predicate (newop[i], idata->operand[i].mode))
+	return false;
+    }
+
+  emit_insn (idata->genfun (newop[0], newop[1], newop[2],
+			    newop[3], newop[4], newop[5],
+			    newop[6], newop[7], newop[8]));
+
+  return true;
+}
+
+
+/* Apply the given unary intrinsic to OPERANDS[1] and store it on
+   OPERANDS[0].  Report an error if the instruction could not
+   be synthesized.  OPERANDS[1] is a register_operand.  For sign
+   and zero extensions, it may be smaller than SImode.  */
+
+bool
+mep_expand_unary_intrinsic (int ATTRIBUTE_UNUSED intrinsic,
+			    rtx * operands ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+
+/* Likewise, but apply a binary operation to OPERANDS[1] and
+   OPERANDS[2].  OPERANDS[1] is a register_operand, OPERANDS[2]
+   can be a general_operand.
+
+   IMMEDIATE and IMMEDIATE3 are intrinsics that take an immediate
+   third operand.  REG and REG3 take register operands only.  */
+
+bool
+mep_expand_binary_intrinsic (int ATTRIBUTE_UNUSED immediate,
+			     int ATTRIBUTE_UNUSED immediate3,
+			     int ATTRIBUTE_UNUSED reg,
+			     int ATTRIBUTE_UNUSED reg3,
+			     rtx * operands ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+static bool
+mep_rtx_cost (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, bool ATTRIBUTE_UNUSED speed_t)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) >= -128 && INTVAL (x) < 127)
+	*total = 0;
+      else if (INTVAL (x) >= -32768 && INTVAL (x) < 65536)
+	*total = 1;
+      else
+	*total = 3;
+      return true;
+
+    case SYMBOL_REF:
+      *total = optimize_size ? COSTS_N_INSNS (0) : COSTS_N_INSNS (1);
+      return true;
+
+    case MULT:							
+      *total = (GET_CODE (XEXP (x, 1)) == CONST_INT
+		? COSTS_N_INSNS (3)
+		: COSTS_N_INSNS (2));
+      return true;
+    }
+  return false;
+}
+
+static int
+mep_address_cost (rtx addr ATTRIBUTE_UNUSED, bool ATTRIBUTE_UNUSED speed_p)
+{
+  return 1;
+}
+
+static bool
+mep_handle_option (size_t code,
+		   const char *arg ATTRIBUTE_UNUSED,
+		   int value ATTRIBUTE_UNUSED)
+{
+  int i;
+
+  switch (code)
+    {
+    case OPT_mall_opts:
+      target_flags |= MEP_ALL_OPTS;
+      break;
+
+    case OPT_mno_opts:
+      target_flags &= ~ MEP_ALL_OPTS;
+      break;
+
+    case OPT_mcop64:
+      target_flags |= MASK_COP;
+      target_flags |= MASK_64BIT_CR_REGS;
+      break;
+
+    case OPT_mtiny_:
+      option_mtiny_specified = 1;
+
+    case OPT_mivc2:
+      target_flags |= MASK_COP;
+      target_flags |= MASK_64BIT_CR_REGS;
+      target_flags |= MASK_VLIW;
+      target_flags |= MASK_OPT_VL64;
+      target_flags |= MASK_IVC2;
+
+      for (i=0; i<32; i++)
+	fixed_regs[i+48] = 0;
+      for (i=0; i<32; i++)
+	call_used_regs[i+48] = 1;
+      for (i=6; i<8; i++)
+	call_used_regs[i+48] = 0;
+
+#define RN(n,s) reg_names[FIRST_CCR_REGNO + n] = s
+      RN (0, "$csar0");
+      RN (1, "$cc");
+      RN (4, "$cofr0");
+      RN (5, "$cofr1");
+      RN (6, "$cofa0");
+      RN (7, "$cofa1");
+      RN (15, "$csar1");
+
+      RN (16, "$acc0_0");
+      RN (17, "$acc0_1");
+      RN (18, "$acc0_2");
+      RN (19, "$acc0_3");
+      RN (20, "$acc0_4");
+      RN (21, "$acc0_5");
+      RN (22, "$acc0_6");
+      RN (23, "$acc0_7");
+
+      RN (24, "$acc1_0");
+      RN (25, "$acc1_1");
+      RN (26, "$acc1_2");
+      RN (27, "$acc1_3");
+      RN (28, "$acc1_4");
+      RN (29, "$acc1_5");
+      RN (30, "$acc1_6");
+      RN (31, "$acc1_7");
+#undef RN
+
+      break;
+
+    default:
+      break;
+    }
+  return TRUE;
+}
+
+static void
+mep_asm_init_sections (void)
+{
+  based_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .based,\"aw\"");
+
+  tinybss_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS, output_section_asm_op,
+			   "\t.section .sbss,\"aw\"");
+
+  sdata_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .sdata,\"aw\",@progbits");
+
+  far_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .far,\"aw\"");
+
+  farbss_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS, output_section_asm_op,
+			   "\t.section .farbss,\"aw\"");
+
+  frodata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .frodata,\"a\"");
+
+  srodata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .srodata,\"a\"");
+
+  vtext_section
+    = get_unnamed_section (SECTION_CODE | SECTION_MEP_VLIW, output_section_asm_op,
+			   "\t.section .vtext,\"axv\"\n\t.vliw");
+
+  vftext_section
+    = get_unnamed_section (SECTION_CODE | SECTION_MEP_VLIW, output_section_asm_op,
+			   "\t.section .vftext,\"axv\"\n\t.vliw");
+
+  ftext_section
+    = get_unnamed_section (SECTION_CODE, output_section_asm_op,
+			   "\t.section .ftext,\"ax\"\n\t.core");
+
+}
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE	mep_start_function
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		mep_attribute_table
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES	mep_comp_type_attributes
+#undef  TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES	mep_insert_attributes
+#undef  TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P	mep_function_attribute_inlinable_p
+#undef  TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P		mep_can_inline_p
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS	mep_section_type_flags
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION	mep_asm_named_section
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS		mep_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN		mep_expand_builtin
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST	mep_adjust_cost
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE		mep_issue_rate
+#undef  TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER		mep_sched_reorder
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING	mep_strip_name_encoding
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION	mep_select_section
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION	mep_unique_section
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO	mep_encode_section_info
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL	mep_function_ok_for_sibcall
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS		mep_rtx_cost
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST 		mep_address_cost
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG  mep_reorg
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS	mep_setup_incoming_varargs
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE        mep_pass_by_reference
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG             mep_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     mep_function_arg_advance
+#undef  TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P	mep_vector_mode_supported_p
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION            mep_handle_option
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		mep_option_override
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE	mep_option_optimization_table
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS	TARGET_DEFAULT
+#undef  TARGET_ALLOCATE_INITIAL_VALUE
+#define TARGET_ALLOCATE_INITIAL_VALUE   mep_allocate_initial_value
+#undef  TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS 	mep_asm_init_sections
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		mep_return_in_memory
+#undef  TARGET_NARROW_VOLATILE_BITFIELD
+#define TARGET_NARROW_VOLATILE_BITFIELD mep_narrow_volatile_bitfield
+#undef	TARGET_EXPAND_BUILTIN_SAVEREGS
+#define	TARGET_EXPAND_BUILTIN_SAVEREGS	mep_expand_builtin_saveregs
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST	mep_build_builtin_va_list
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START	mep_expand_va_start
+#undef	TARGET_GIMPLIFY_VA_ARG_EXPR
+#define	TARGET_GIMPLIFY_VA_ARG_EXPR	mep_gimplify_va_arg_expr
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE            mep_can_eliminate
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE	mep_conditional_register_usage
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		mep_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-mep.h"
diff --git a/gcc/config/mep/mep.cpu b/gcc/config/mep/mep.cpu
new file mode 100644
index 000000000..c87c5d1b6
--- /dev/null
+++ b/gcc/config/mep/mep.cpu
@@ -0,0 +1,21 @@
+; Toshiba MeP Media Engine description.  -*- Scheme -*-
+; Copyright (C) 2009 Free Software Foundation, Inc.
+; Contributed by Red Hat, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+(include "mep-default.cpu")
diff --git a/gcc/config/mep/mep.h b/gcc/config/mep/mep.h
new file mode 100644
index 000000000..5244bb38a
--- /dev/null
+++ b/gcc/config/mep/mep.h
@@ -0,0 +1,824 @@
+/* Definitions for Toshiba Media Processor
+   Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef  CPP_SPEC
+#define CPP_SPEC "\
+-D__MEP__ -D__MeP__ \
+-D__section(_x)=__attribute__((section(_x))) \
+-D__align(_x)=__attribute__((aligned(_x))) \
+-D__io(_x)=__attribute__((io(_x))) \
+-D__cb(_x)=__attribute__((cb(_x))) \
+-D__based=__attribute__((based)) \
+-D__tiny=__attribute__((tiny)) \
+-D__near=__attribute__((near)) \
+-D__far=__attribute__((far)) \
+-D__vliw=__attribute__((vliw)) \
+-D__interrupt=__attribute__((interrupt)) \
+-D__disinterrupt=__attribute__((disinterrupt)) \
+%{!meb:%{!mel:-D__BIG_ENDIAN__}} \
+%{meb:-U__LITTLE_ENDIAN__ -D__BIG_ENDIAN__} \
+%{mel:-U__BIG_ENDIAN__ -D__LITTLE_ENDIAN__} \
+%{mconfig=*:-D__MEP_CONFIG_%*} \
+%{mivc2:-D__MEP_CONFIG_CP_DATA_BUS_WIDTH=64} \
+"
+
+#undef  CC1_SPEC
+#define CC1_SPEC "%{!mlibrary:%(config_cc_spec)} \
+%{!.cc:%{O2:%{!funroll*:--param max-completely-peeled-insns=6 \
+                        --param max-unrolled-insns=6 -funroll-loops}}}"
+
+#undef  CC1PLUS_SPEC
+#define CC1PLUS_SPEC "%{!mlibrary:%(config_cc_spec)}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "%{mconfig=*} %{meb:-EB} %{mel:-EL} \
+%{mno-satur} %{msatur} %{mno-clip} %{mclip} %{mno-minmax} %{mminmax} \
+%{mno-absdiff} %{mabsdiff} %{mno-leadz} %{mleadz} %{mno-bitops} %{mbitops} \
+%{mno-div} %{mdiv} %{mno-mult} %{mmult} %{mno-average} %{maverage} \
+%{mcop32} %{mno-debug} %{mdebug} %{mlibrary}"
+
+/* The MeP config tool will edit this spec.  */
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{msdram:%{msim:simsdram-crt0.o%s}} \
+%{mno-sdram:%{msim:sim-crt0.o%s}} \
+%{msdram:%{!msim*:sdram-crt0.o%s}} \
+%{mno-sdram:%{!msim*:crt0.o%s}} \
+%(config_start_spec) \
+%{msimnovec:simnovec-crt0.o%s} \
+crtbegin.o%s"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim*:-lsim}%{!msim*:-lnosys} -) %(config_link_spec)"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{meb:-EB} %{mel:-EL}"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s %{msim*:sim-crtn.o%s}%{!msim*:crtn.o%s}"
+
+/* The MeP config tool will edit this spec.  */
+#define CONFIG_CC_SPEC "\
+%{mconfig=default: -mbitops -mleadz -mabsdiff -maverage -mminmax -mclip -msatur -mvl64 -mvliw -mcop64 -D__MEP_CONFIG_CP_DATA_BUS_WIDTH=64 -mivc2}\
+"
+/* end-config-cc-spec */
+
+/* The MeP config tool will edit this spec.  */
+#define CONFIG_LINK_SPEC "\
+%{mconfig=default: %{!T*:-Tdefault.ld}}\
+"
+/* end-config-link-spec */
+
+/* The MeP config tool will edit this spec.  */
+#define CONFIG_START_SPEC "\
+%{!msdram:%{!mno-sdram:%{!msim*:crt0.o%s}}} \
+%{!msdram:%{!mno-sdram:%{msim:sim-crt0.o%s}}} \
+"
+/* end-config-start-spec */
+
+#define EXTRA_SPECS \
+  { "config_cc_spec",  CONFIG_CC_SPEC }, \
+  { "config_link_spec",  CONFIG_LINK_SPEC }, \
+  { "config_start_spec",  CONFIG_START_SPEC },
+
+
+#define TARGET_CPU_CPP_BUILTINS() 		\
+  do						\
+    {						\
+      builtin_define_std ("mep");		\
+      builtin_assert ("machine=mep");		\
+    }						\
+  while (0)
+
+/* Controlled by MeP-Integrator.  */
+#define TARGET_H1		0
+
+#define MEP_ALL_OPTS	(MASK_OPT_AVERAGE	\
+			 | MASK_OPT_MULT	\
+			 | MASK_OPT_DIV		\
+			 | MASK_OPT_BITOPS	\
+			 | MASK_OPT_LEADZ	\
+			 | MASK_OPT_ABSDIFF	\
+			 | MASK_OPT_MINMAX	\
+			 | MASK_OPT_CLIP	\
+			 | MASK_OPT_SATUR )
+
+#define TARGET_DEFAULT		(MASK_IO_VOLATILE | MASK_OPT_REPEAT | MEP_ALL_OPTS | MASK_LITTLE_ENDIAN)
+
+#define TARGET_IO_NO_VOLATILE	(! (target_flags & MASK_IO_VOLATILE))
+#define TARGET_OPT_NOREPEAT	(! (target_flags & MASK_OPT_REPEAT))
+#define TARGET_32BIT_CR_REGS	(! (target_flags & MASK_64BIT_CR_REGS))
+#define TARGET_BIG_ENDIAN	(! (target_flags & MASK_LITTLE_ENDIAN))
+
+#define TARGET_COPRO_MULT	0
+
+#define TARGET_VERSION fprintf (stderr, " (Toshiba Media Processor (MeP))");
+
+/* The MeP config tool will replace this as appropriate.  */
+#define DEFAULT_ENDIAN_SPEC "%{!meb: -mel}"
+
+/* The MeP config tool will replace this with an -mconfig= switch.  */
+#define LIBRARY_CONFIG_SPEC "-mconfig=default"
+
+/* Don't add an endian option when building the libraries.  */
+#define DRIVER_SELF_SPECS \
+  "%{!mlibrary:" DEFAULT_ENDIAN_SPEC "}", \
+  "%{mlibrary: " LIBRARY_CONFIG_SPEC " %{!mel:-meb}}", \
+  "%{mall-opts:-maverage -mmult -mdiv -mbitops -mleadz \
+     -mabsdiff -mminmax -mclip -msatur -mdebug} %<mall-opts", \
+  "%{mno-opts:-mno-average -mno-mult -mno-div -mno-bitops -mno-leadz \
+     -mno-absdiff -mno-minmax -mno-clip -mno-satur -mno-debug} %<mno-opts", \
+  "%{mfar:-ml -mtf -mc=far} %<mfar", \
+  "%{mconfig=default:-mmult -mdiv -D__MEP_CONFIG_ISA=1}"
+
+/* The MeP config tool will add COPROC_SELECTION_TABLE here.  */
+/* start-coproc-selection-table */
+#define COPROC_SELECTION_TABLE \
+{"default", ISA_EXT1}
+/* end-coproc-selection-table */
+
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN ? 0 : 1)
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+#define UNITS_PER_WORD 4
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+          && GET_MODE_SIZE (MODE) < 4)		\
+        (MODE) = SImode;		\
+    }						\
+  while (0)
+
+#define PARM_BOUNDARY 32
+#define STACK_BOUNDARY 32
+#define PREFERRED_STACK_BOUNDARY 64
+#define FUNCTION_BOUNDARY 16
+#define BIGGEST_ALIGNMENT 64
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+#define DEFAULT_VTABLE_THUNKS 1
+
+
+#define INT_TYPE_SIZE         32
+#define SHORT_TYPE_SIZE       16
+#define LONG_TYPE_SIZE        32
+#define LONG_LONG_TYPE_SIZE   64
+#define CHAR_TYPE_SIZE         8
+#define FLOAT_TYPE_SIZE       32
+#define DOUBLE_TYPE_SIZE      64
+#define LONG_DOUBLE_TYPE_SIZE 64
+#define DEFAULT_SIGNED_CHAR    1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Register numbers:
+ 	0..15	core registers
+	16..47	control registers
+	48..79	coprocessor registers
+	80..111	coprocessor control registers
+	112	virtual arg pointer register  */
+
+#define FIRST_PSEUDO_REGISTER (LAST_SHADOW_REGISTER + 1)
+
+  /* R12 is optionally FP.  R13 is TP, R14 is GP, R15 is SP. */
+  /* hi and lo can be used as general registers.  Others have
+     immutable bits.  */
+/* A "1" here means the register is generally not available to gcc,
+   and is assumed to remain unchanged or unused throughout.  */
+#define FIXED_REGISTERS {				\
+  /* core registers */					\
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1,	\
+  /* control registers */				\
+  1, 1, 1, 1,  1, 1, 1, 0,  0, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor registers */				\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor control registers */			\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* virtual arg pointer */				\
+  1, FIXED_SHADOW_REGISTERS				\
+  }
+
+/* This is a call-clobbered reg not used for args or return value,
+   that we use as a temp for saving control registers in the prolog
+   and restoring them in the epilog. */
+#define REGSAVE_CONTROL_TEMP	11
+
+/* A "1" here means a register may be changed by a function without
+   needing to preserve its previous value.  */
+#define CALL_USED_REGISTERS {				\
+  /* core registers */					\
+  1, 1, 1, 1,  1, 0, 0, 0,  0, 1, 1, 1,  1, 0, 0, 1,	\
+  /* control registers */				\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor registers */				\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* coprocessor control registers */			\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,	\
+  /* virtual arg pointer */				\
+  1, CALL_USED_SHADOW_REGISTERS				\
+  }
+
+#define REG_ALLOC_ORDER {						\
+  /* core registers */							\
+  3, 2, 1, 0, 9, 10, 11, 12, 4, 5, 6, 7, 8, 13, 14, 15, 		\
+  /* control registers */						\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,	\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  /* coprocessor registers */						\
+  /* Prefer to use the non-loadable registers when looking for a	\
+     member of CR_REGS (as opposed to LOADABLE_CR_REGS).  */		\
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 48, 49, 50, 51, 52, 58,	\
+  59, 60, 61, 62, 63, 53, 54, 55, 56, 57, 74, 75, 76, 77, 78, 79,	\
+  /* coprocessor control registers */					\
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,	\
+  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, \
+  /* virtual arg pointer */						\
+  112, SHADOW_REG_ALLOC_ORDER						\
+  }
+
+/* We must somehow disable register remapping for interrupt functions.  */
+extern char mep_leaf_registers[];
+#define LEAF_REGISTERS mep_leaf_registers
+#define LEAF_REG_REMAP(REG) (REG)
+
+
+#define FIRST_GR_REGNO 0
+#define FIRST_CONTROL_REGNO (FIRST_GR_REGNO + 16)
+#define FIRST_CR_REGNO (FIRST_CONTROL_REGNO + 32)
+#define FIRST_CCR_REGNO (FIRST_CR_REGNO + 32)
+
+#define GR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_GR_REGNO) < 16)
+
+#define CONTROL_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CONTROL_REGNO) < 32)
+
+#define LOADABLE_CR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CR_REGNO) < 16)
+
+#define CR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CR_REGNO) < 32)
+
+#define CCR_REGNO_P(REGNO) \
+  ((unsigned) ((REGNO) - FIRST_CCR_REGNO) < 32)
+
+#define ANY_CONTROL_REGNO_P(REGNO) \
+  (CONTROL_REGNO_P (REGNO) || CCR_REGNO_P (REGNO))
+
+#define HARD_REGNO_NREGS(REGNO, MODE)		\
+  ((CR_REGNO_P (REGNO) && TARGET_64BIT_CR_REGS)	\
+   ? (GET_MODE_SIZE (MODE) + 8 - 1) / 8		\
+   : (GET_MODE_SIZE (MODE) + 4 - 1) / 4)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  mep_cannot_change_mode_class (FROM, TO, CLASS)
+
+enum reg_class
+{
+  NO_REGS,
+  SP_REGS,
+  TP_REGS,
+  GP_REGS,
+  R0_REGS,
+  RPC_REGS,
+  HI_REGS,
+  LO_REGS,
+  HILO_REGS,
+  TPREL_REGS,
+  GENERAL_NOT_R0_REGS,
+  GENERAL_REGS,
+  CONTROL_REGS,
+  CONTROL_OR_GENERAL_REGS,
+  USER0_REGS,
+  USER1_REGS,
+  USER2_REGS,
+  USER3_REGS,
+  LOADABLE_CR_REGS,
+  CR_REGS,
+  CCR_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define REG_CLASS_NAMES { \
+  "NO_REGS", \
+  "SP_REGS", \
+  "TP_REGS", \
+  "GP_REGS", \
+  "R0_REGS", \
+  "RPC_REGS", \
+  "HI_REGS", \
+  "LO_REGS", \
+  "HILO_REGS", \
+  "TPREL_REGS", \
+  "GENERAL_NOT_R0_REGS", \
+  "GENERAL_REGS", \
+  "CONTROL_REGS", \
+  "CONTROL_OR_GENERAL_REGS", \
+  "USER0_REGS", \
+  "USER1_REGS", \
+  "USER2_REGS", \
+  "USER3_REGS", \
+  "LOADABLE_CR_REGS", \
+  "CR_REGS", \
+  "CCR_REGS", \
+  "ALL_REGS" }
+
+#define REG_CLASS_CONTENTS { \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
+  { 0x00008000, 0x00000000, 0x00000000, 0x00000000 }, /* SP_REGS */ \
+  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* TP_REGS */ \
+  { 0x00004000, 0x00000000, 0x00000000, 0x00000000 }, /* GP_REGS */ \
+  { 0x00000001, 0x00000000, 0x00000000, 0x00000000 }, /* R0_REGS */ \
+  { 0x00400000, 0x00000000, 0x00000000, 0x00000000 }, /* RPC_REGS */ \
+  { 0x00800000, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \
+  { 0x01000000, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */ \
+  { 0x01800000, 0x00000000, 0x00000000, 0x00000000 }, /* HILO_REGS */ \
+  { 0x000000ff, 0x00000000, 0x00000000, 0x00000000 }, /* TPREL_REGS */ \
+  { 0x0000fffe, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_NOT_R0_REGS */ \
+  { 0x0000ffff, 0x00000000, 0x00000000, 0x00010000 }, /* GENERAL_REGS */ \
+  { 0xffff0000, 0x0000ffff, 0x00000000, 0x00000000 }, /* CONTROL_REGS */ \
+  { 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000 }, /* CONTROL_OR_GENERAL_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER0_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER1_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER2_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* USER3_REGS */ \
+  { 0x00000000, 0xffff0000, 0x00000000, 0x00000000 }, /* LOADABLE_CR_REGS */ \
+  { 0x00000000, 0xffff0000, 0x0000ffff, 0x00000000 }, /* CR_REGS */ \
+  { 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff }, /* CCR_REGS */ \
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0x0001ffff }, /* ALL_REGS */ \
+  }
+
+#define REGNO_REG_CLASS(REGNO) (enum reg_class) mep_regno_reg_class (REGNO)
+
+#define IRA_COVER_CLASSES { GENERAL_REGS, CONTROL_REGS, CR_REGS, CCR_REGS, LIM_REG_CLASSES }
+
+#define BASE_REG_CLASS GENERAL_REGS
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#if 0
+#define REG_CLASS_FROM_CONSTRAINT(CHAR, STRING) \
+	mep_reg_class_from_constraint (CHAR, STRING)
+#endif
+
+#define REGNO_OK_FOR_BASE_P(NUM) (GR_REGNO_P (NUM) \
+	|| (NUM) == ARG_POINTER_REGNUM \
+	|| (NUM) >= FIRST_PSEUDO_REGISTER)
+
+#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM)
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) mep_preferred_reload_class (X, CLASS)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+	mep_secondary_input_reload_class (CLASS, MODE, X)
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+	mep_secondary_output_reload_class (CLASS, MODE, X)
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+	mep_secondary_memory_needed (CLASS1, CLASS2, MODE)
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#if 0
+#define CONST_OK_FOR_LETTER_P(VALUE, C) mep_const_ok_for_letter_p (VALUE, C)
+
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) 0
+
+#define CONSTRAINT_LEN(C, STR) \
+	((C) == 'e' ? 2 : DEFAULT_CONSTRAINT_LEN (C, STR))
+#define EXTRA_CONSTRAINT(VALUE, C) mep_extra_constraint (VALUE, C)
+#endif
+
+#define WANT_GCC_DECLARATIONS
+#include "mep-intrin.h"
+#undef WANT_GCC_DECLARATIONS
+
+extern int mep_intrinsic_insn[];
+extern unsigned int mep_selected_isa;
+
+/* True if intrinsic X is available.  X is a mep_* value declared
+   in mep-intrin.h.  */
+#define MEP_INTRINSIC_AVAILABLE_P(X) (mep_intrinsic_insn[X] >= 0)
+
+/* Used to define CGEN_ENABLE_INTRINSIC_P in mep-intrin.h.  */
+#define CGEN_CURRENT_ISAS mep_selected_isa
+#define CGEN_CURRENT_GROUP \
+  (mep_vliw_function_p (cfun->decl) ? GROUP_VLIW : GROUP_NORMAL)
+
+
+
+#define STACK_GROWS_DOWNWARD       1
+#define FRAME_GROWS_DOWNWARD	   1
+#define STARTING_FRAME_OFFSET      0
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+#define INCOMING_FRAME_SP_OFFSET   0
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) mep_return_addr_rtx (COUNT)
+#define INCOMING_RETURN_ADDR_RTX          gen_rtx_REG (SImode, LP_REGNO)
+#define DWARF_FRAME_RETURN_COLUMN         LP_REGNO
+
+#define STACK_POINTER_REGNUM          15
+#define FRAME_POINTER_REGNUM           8
+#define ARG_POINTER_REGNUM            112
+#define RETURN_ADDRESS_POINTER_REGNUM 17
+#define STATIC_CHAIN_REGNUM            0
+
+
+
+#define ELIMINABLE_REGS						\
+{								\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},			\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},			\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}			\
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+	(OFFSET) = mep_elimination_offset (FROM, TO)
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+
+#define FUNCTION_ARG_CALLEE_COPIES(CUM, MODE, TYPE, NAMED) 1
+
+typedef struct
+{
+  int nregs;
+  int vliw;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+	mep_init_cumulative_args (& (CUM), FNTYPE, LIBNAME, FNDECL)
+
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+	(((REGNO) >= 1 && (REGNO) <= 4) \
+	 || ((REGNO) >= FIRST_CR_REGNO + 1 \
+	     && (REGNO) <= FIRST_CR_REGNO + 4 \
+	     && TARGET_COP))
+
+#define RETURN_VALUE_REGNUM	 0
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) mep_function_value (VALTYPE, FUNC)
+#define LIBCALL_VALUE(MODE) mep_libcall_value (MODE)
+
+#define FUNCTION_VALUE_REGNO_P(REGNO)				\
+  ((REGNO) == RETURN_VALUE_REGNUM)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define STRUCT_VALUE 0
+
+#define FUNCTION_OK_FOR_SIBCALL(DECL) mep_function_ok_for_sibcall(DECL)
+
+/* Prologue and epilogues are all handled via RTL.  */
+
+#define EXIT_IGNORE_STACK 1
+
+#define EPILOGUE_USES(REGNO)  mep_epilogue_uses (REGNO)
+
+/* Profiling is supported.  */
+     
+#define FUNCTION_PROFILER(FILE, LABELNO) mep_function_profiler (FILE);
+#undef TARGET_HAS_F_SETLKW
+#define NO_PROFILE_COUNTERS 1
+
+/* Trampolines are built at run-time.  The cache is invalidated at
+   run-time also.  */
+
+#define TRAMPOLINE_SIZE 20
+
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#ifdef REG_OK_STRICT
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) 		\
+	if (mep_legitimate_address ((MODE), (X), 1)) goto LABEL
+#else
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) 		\
+	if (mep_legitimate_address ((MODE), (X), 0)) goto LABEL
+#endif
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) GR_REGNO_P (REGNO (X))
+#else
+#define REG_OK_FOR_BASE_P(X) (GR_REGNO_P (REGNO (X)) \
+				|| REGNO (X) == ARG_POINTER_REGNUM \
+				|| REGNO (X) >= FIRST_PSEUDO_REGISTER)
+#endif
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \
+  if (mep_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE), (IND_LEVELS))) \
+    goto WIN
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL)
+
+#define LEGITIMATE_CONSTANT_P(X) \
+  mep_legitimate_constant_p(X)
+
+#define SELECT_CC_MODE(OP, X, Y)  CCmode
+
+
+/* Moves between control regs need a scratch.  */
+#define REGISTER_MOVE_COST(MODE, FROM, TO) mep_register_move_cost (MODE, FROM, TO)
+
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this macro if it is as good or better to call a constant function
+   address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE 
+
+
+#define TEXT_SECTION_ASM_OP "\t.text\n\t.core"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP  ".bss"
+
+#define USE_SELECT_SECTION_FOR_FUNCTIONS 1
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+#define TARGET_ASM_FILE_END mep_file_cleanups
+
+#define ASM_APP_ON "#APP\n"
+#define ASM_APP_OFF "#NO_APP\n"
+
+#define ASM_OUTPUT_DOUBLE(FILE, VALUE)				\
+  do								\
+    {								\
+      long l[2];						\
+								\
+      REAL_VALUE_TO_TARGET_DOUBLE (VALUE, l);			\
+      fprintf (FILE, "\t.long\t0x%lx,0x%lx\n", l[0], l[1]);	\
+    }								\
+  while (0)
+
+#define ASM_OUTPUT_FLOAT(FILE, VALUE)		\
+  do						\
+    {						\
+      long l;					\
+						\
+      REAL_VALUE_TO_TARGET_SINGLE (VALUE, l);	\
+      fprintf ((FILE), "\t.long\t0x%lx\n", l);	\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_CHAR(FILE, VALUE)		\
+  do						\
+    {						\
+      fprintf (FILE, "\t.byte\t");		\
+      output_addr_const (FILE, (VALUE));	\
+      fprintf (FILE, "\n");			\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_SHORT(FILE, VALUE)		\
+  do						\
+    {						\
+      fprintf (FILE, "\t.hword\t");		\
+      output_addr_const (FILE, (VALUE));	\
+      fprintf (FILE, "\n");			\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_INT(FILE, VALUE)		\
+  do						\
+    {						\
+      fprintf (FILE, "\t.word\t");		\
+      output_addr_const (FILE, (VALUE));	\
+      fprintf (FILE, "\n");			\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_BYTE(STREAM, VALUE) \
+  fprintf (STREAM, "\t%s\t0x%x\n", ASM_BYTE_OP, (VALUE))
+
+/* Most of these are here to support based/tiny/far/io attributes.  */
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	mep_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 1)
+
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+	mep_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+#define ASM_OUTPUT_LABEL(STREAM, NAME)		\
+  do						\
+    {						\
+      assemble_name (STREAM, NAME);		\
+      fputs (":\n", STREAM);			\
+    }						\
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  asm_fprintf ((STREAM), "%U%s", mep_strip_name_encoding (NAME))
+
+#define ASM_FORMAT_PRIVATE_NAME(OUTVAR, NAME, NUMBER)		\
+  do								\
+    {								\
+      (OUTVAR) = (char *) alloca (strlen ((NAME)) + 12);	\
+      sprintf ((OUTVAR), "%s.%ld", (NAME), (long)(NUMBER));	\
+    }								\
+  while (0)
+
+
+#define REGISTER_NAMES							\
+{									\
+  /* Core registers.  */						\
+  "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",			\
+  "$8", "$9", "$10", "$11", "$12", "$tp", "$gp", "$sp",			\
+  /* Control registers.  */						\
+  "$pc", "$lp", "$sar", "3", "$rpb", "$rpe", "$rpc", "$hi",		\
+  "$lo", "9", "10", "11", "$mb0", "$me0", "$mb1", "$me1",		\
+  "$psw", "$id", "$tmp", "$epc", "$exc", "$cfg", "22", "$npc",		\
+  "$dbg", "$depc", "$opt", "$rcfg", "$ccfg", "29", "30", "31",		\
+  /* Coprocessor registers.  */						\
+  "$c0", "$c1", "$c2", "$c3", "$c4", "$c5", "$c6", "$c7",		\
+  "$c8", "$c9", "$c10", "$c11", "$c12", "$c13", "$c14", "$c15",		\
+  "$c16", "$c17", "$c18", "$c19", "$c20", "$c21", "$c22", "$c23",	\
+  "$c24", "$c25", "$c26", "$c27", "$c28", "$c29", "$c30", "$c31",	\
+  /* Coprocessor control registers.  */					\
+  "$ccr0", "$ccr1", "$ccr2", "$ccr3", "$ccr4", "$ccr5", "$ccr6",	\
+  "$ccr7", "$ccr8", "$ccr9", "$ccr10", "$ccr11", "$ccr12", "$ccr13",	\
+  "$ccr14", "$ccr15", "$ccr16", "$ccr17", "$ccr18", "$ccr19", "$ccr20", \
+  "$ccr21", "$ccr22", "$ccr23", "$ccr24", "$ccr25", "$ccr26", "$ccr27", \
+  "$ccr28", "$ccr29", "$ccr30", "$ccr31",				\
+  /* Virtual arg pointer.  */						\
+  "$argp", SHADOW_REGISTER_NAMES					\
+}
+
+/* We duplicate some of the above because we twiddle the above
+   according to *how* the registers are used.  Likewise, we include
+   the standard names for coprocessor control registers so that
+   coprocessor options can rename them in the default table.  Note
+   that these are compared to stripped names (see REGISTER_PREFIX
+   below).  */
+#define ADDITIONAL_REGISTER_NAMES		\
+{						\
+  {  "8",  8 }, { "fp",  8 },			\
+  { "13", 13 }, { "tp", 13 },			\
+  { "14", 14 }, { "gp", 14 },			\
+  { "15", 15 }, { "sp", 15 },			\
+  { "ccr0", FIRST_CCR_REGNO + 0 },		\
+  { "ccr1", FIRST_CCR_REGNO + 1 },		\
+  { "ccr2", FIRST_CCR_REGNO + 2 },		\
+  { "ccr3", FIRST_CCR_REGNO + 3 },		\
+  { "ccr4", FIRST_CCR_REGNO + 4 },		\
+  { "ccr5", FIRST_CCR_REGNO + 5 },		\
+  { "ccr6", FIRST_CCR_REGNO + 6 },		\
+  { "ccr7", FIRST_CCR_REGNO + 7 },		\
+  { "ccr8", FIRST_CCR_REGNO + 8 },		\
+  { "ccr9", FIRST_CCR_REGNO + 9 },		\
+  { "ccr10", FIRST_CCR_REGNO + 10 },		\
+  { "ccr11", FIRST_CCR_REGNO + 11 },		\
+  { "ccr12", FIRST_CCR_REGNO + 12 },		\
+  { "ccr13", FIRST_CCR_REGNO + 13 },		\
+  { "ccr14", FIRST_CCR_REGNO + 14 },		\
+  { "ccr15", FIRST_CCR_REGNO + 15 },		\
+  { "ccr16", FIRST_CCR_REGNO + 16 },		\
+  { "ccr17", FIRST_CCR_REGNO + 17 },		\
+  { "ccr18", FIRST_CCR_REGNO + 18 },		\
+  { "ccr19", FIRST_CCR_REGNO + 19 },		\
+  { "ccr20", FIRST_CCR_REGNO + 20 },		\
+  { "ccr21", FIRST_CCR_REGNO + 21 },		\
+  { "ccr22", FIRST_CCR_REGNO + 22 },		\
+  { "ccr23", FIRST_CCR_REGNO + 23 },		\
+  { "ccr24", FIRST_CCR_REGNO + 24 },		\
+  { "ccr25", FIRST_CCR_REGNO + 25 },		\
+  { "ccr26", FIRST_CCR_REGNO + 26 },		\
+  { "ccr27", FIRST_CCR_REGNO + 27 },		\
+  { "ccr28", FIRST_CCR_REGNO + 28 },		\
+  { "ccr29", FIRST_CCR_REGNO + 29 },		\
+  { "ccr30", FIRST_CCR_REGNO + 30 },		\
+  { "ccr31", FIRST_CCR_REGNO + 31 }		\
+}
+
+/* We watch for pipeline hazards with these */
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) mep_asm_output_opcode (STREAM, PTR)
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) mep_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+#define PRINT_OPERAND(STREAM, X, CODE) mep_print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) ((CODE) == '!' || (CODE) == '<')
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) mep_print_operand_address (STREAM, X)
+
+#define REGISTER_PREFIX    "$"
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX  ""
+#define IMMEDIATE_PREFIX   ""
+
+
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  fprintf (STREAM, "\t.word .L%d\n", VALUE)
+
+
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE  DWARF2_DEBUG
+#define DWARF2_DEBUGGING_INFO     1
+#define DWARF2_UNWIND_INFO        1
+
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 10 : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_RTX mep_return_stackadj_rtx ()
+#define EH_RETURN_HANDLER_RTX  mep_return_handler_rtx ()
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+
+
+#define CASE_VECTOR_MODE SImode
+
+#define WORD_REGISTER_OPERATIONS
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+#define MOVE_MAX 4
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define STORE_FLAG_VALUE 1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE SImode
+
+#define REGISTER_TARGET_PRAGMAS()	 mep_register_pragmas ()
+
+/* If defined, a C expression to determine the base term of address X.
+   This macro is used in only one place: `find_base_term' in alias.c.
+
+   It is always safe for this macro to not be defined.  It exists so
+   that alias analysis can understand machine-dependent addresses.
+
+   The typical use of this macro is to handle addresses containing
+   a label_ref or symbol_ref within an UNSPEC.  */
+#define FIND_BASE_TERM(X) mep_find_base_term (X)
diff --git a/gcc/config/mep/mep.md b/gcc/config/mep/mep.md
new file mode 100644
index 000000000..773a9a0aa
--- /dev/null
+++ b/gcc/config/mep/mep.md
@@ -0,0 +1,2266 @@
+;; Toshiba Media Processor Machine description template
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009 Free
+;; Software Foundation, Inc.
+;; Contributed by Red Hat Inc
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+
+
+;; Constraints:
+;;
+;;  a   $sp
+;;  b   $tp
+;;  c   control regs
+;;  h   $hi ($23)
+;;  l   $lo ($24)
+;;  d   $hi/$lo pair (DImode)
+;;  j   $rpc ($22)
+;;  r   $0..$15
+;;  t   $0..$7
+;;  v   $gp
+;;  x	$c0..$c31
+;;  ex  coprocessor registers that can be moved to other coprocessor registers
+;;  er  coprocessor registers that can be moved to and from core registers
+;;  em  coprocessor registers that can be moves to and from memory
+;;  y	$ccr0..$ccr31
+;;  z   $0
+;;
+;;  I   sign imm16	mov/add
+;;  J   zero imm16	mov/add
+;;  K   zero imm24	mov
+;;  L   sign imm6	add
+;;  M   zero imm5	slt,shifts
+;;  N   zero imm4	bCC
+;;  O   high imm16	mov
+;;
+;;  R   near symbol
+;;  S   sign imm8	mov
+;;  T   tp or gp relative symbol
+;;  U   non-absolute memory
+;;  W   %hi(sym)
+;;  Y   (Rn)
+;;  Z   Control Bus Symbol
+;;
+;; Modifiers:
+;;
+;;  b   print unique bit in mask
+;;  B   print bits required for value (for clip)
+;;  h	print decimal >> 16.
+;;  I   print decimal, with hex comment if more than 8 bits
+;;  J   print unsigned hex
+;;  L   print set, clr or not (for bitops)
+;;  P	print memory as a post-inc with no increment
+;;  U   print bits required for value (for clipu)
+;;  x   print unsigned decimal or hex, depending on where set bits are
+
+(define_constants [
+		   (REGSAVE_CONTROL_TEMP 11)
+		   (FP_REGNO 8)
+		   (TP_REGNO 13)
+		   (GP_REGNO 14)
+		   (SP_REGNO 15)
+		   (PSW_REGNO 16)
+		   (LP_REGNO 17)
+		   (SAR_REGNO 18)
+		   (RPB_REGNO 20)
+		   (RPE_REGNO 21)
+		   (RPC_REGNO 22)
+		   (HI_REGNO 23)
+		   (LO_REGNO 24)
+		   (CBCR_REGNO 81)
+		   ])
+
+(define_constants [
+		   (UNS_BLOCKAGE 0)
+		   (UNS_TPREL 2)
+		   (UNS_GPREL 3)
+		   (UNS_REPEAT_BEG 4)
+		   (UNS_REPEAT_END 5)
+		   (UNS_EH_EPILOGUE 6)
+		   (UNS_EREPEAT_BEG 7)
+		   (UNS_EREPEAT_END 8)
+		   (UNS_BB_TRACE_RET 9)
+		   (UNS_DISABLE_INT 10)
+		   (UNS_ENABLE_INT 11)
+		   (UNS_RETI 12)
+		  ])
+
+;; This attribute determines the VLIW packing mechanism.  The IVC2
+;; coprocessor has two pipelines (P0 and P1), and a MeP+IVC2 can issue
+;; up to three insns at a time.  Most IVC2 insns can run on either
+;; pipeline, however, scheduling some insns on P0 precludes packing a
+;; core insn with it, and only 16-bit core insns can pack with any P0
+;; insn.
+(define_attr "vliw" "basic,ivc2"
+  (const (symbol_ref "TARGET_IVC2")))
+
+;; This attribute describes the kind of memory operand present in the
+;; instruction.  This is used to compute the length of the insn based
+;; on the addressing mode used.
+(define_attr "memop" "none,core0,core1,cop0,cop1"
+  (const_string "none"))
+
+(define_attr "intrinsic" "none,cmov,cmov1,cmov2,cmovc1,cmovc2,cmovh1,cmovh2"
+  (const_string "none"))
+
+;; This attribute describes how the instruction may be bundled in a
+;; VLIW instruction.  Type MULTI is assumed to use both slots.
+(define_attr "slot" "core,cop,multi"
+  (cond [(eq_attr "intrinsic" "!none")
+	   (const_string "cop")]
+	(const_string "core")))
+
+;; This attribute describes the latency of the opcode (ready delay).
+;; The 0 is used to indicate "unspecified".  An instruction that
+;; completes immediately with no potential stalls would have a value
+;; of 1, a one cycle stall would be 2, etc.
+(define_attr "latency" ""
+  (const_int 0))
+
+(define_attr "shiftop" "none,operand2"
+  (const_string "none"))
+
+;; This attribute describes the size of the instruction in bytes.
+;; This *must* be exact unless the pattern is SLOT_MULTI, as this
+;; is used by the VLIW bundling code.
+(define_attr "length" ""
+  (cond [(eq_attr "memop" "core0")
+	   (symbol_ref "mep_core_address_length (insn, 0)")
+	 (eq_attr "memop" "core1")
+	   (symbol_ref "mep_core_address_length (insn, 1)")
+	 (eq_attr "memop" "cop0")
+	   (symbol_ref "mep_cop_address_length (insn, 0)")
+	 (eq_attr "memop" "cop1")
+	   (symbol_ref "mep_cop_address_length (insn, 1)")
+         ]
+	 ; Catch patterns that don't define the length properly.
+         (symbol_ref "(abort (), 0)")))
+
+;; This attribute describes a pipeline hazard seen in the insn.
+(define_attr "stall" "none,int2,ssarb,load,store,ldc,stc,ldcb,stcb,ssrab,fsft,ret,advck,mul,mulr,div"
+  (cond [(and (eq_attr "shiftop" "operand2")
+	      (not (match_operand:SI 2 "mep_single_shift_operand" "")))
+	 (const_string "int2")]
+	(const_string "none")))
+
+(define_attr "may_trap" "no,yes"
+  (const_string "no"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "slot" "multi")])
+
+;; Each IVC2 instruction uses one of these two pipelines.  P0S insns
+;; use P0; C3 insns use P1.
+(define_automaton "mep_ivc2")
+(define_cpu_unit "ivc2_core,ivc2_p0,ivc2_p1" "mep_ivc2")
+
+;; Each core or IVC2 instruction is bundled into one of these slots.
+;; Supported bundlings:
+;; 
+;; Core mode:
+;;
+;;  C1	[-----core-----]
+;;  C2	[-------------core-------------]
+;;  C3	[--------------c3--------------]
+;;
+;; VLIW mode:
+;;
+;;  V1	[-----core-----][--------p0s-------][------------p1------------]
+;;  V2  [-------------core-------------]xxxx[------------p1------------]
+;;  V3	1111[--p0--]0111[--------p0--------][------------p1------------]
+
+(define_attr "slots" "core,c3,p0,p0_p0s,p0_p1,p0s,p0s_p1,p1" (const_string "core"))
+
+(define_cpu_unit "ivc2_slot_c16,ivc2_slot_c32,ivc2_slot_c3,ivc2_slot_p0s,ivc2_slot_p0,ivc2_slot_p1" "mep_ivc2")
+
+(define_insn_reservation "ivc2_insn_core16" 1
+  (and (eq_attr "vliw" "ivc2")
+       (and (eq (symbol_ref "get_attr_length(insn)") (const_int 2))
+	    (and (eq_attr "intrinsic" "none")
+		 (eq_attr "slot" "!cop"))))
+  "ivc2_core+ivc2_slot_c16")
+
+(define_insn_reservation "ivc2_insn_core32" 1
+  (and (eq_attr "vliw" "ivc2")
+       (and (eq (symbol_ref "get_attr_length(insn)") (const_int 4))
+	    (and (eq_attr "intrinsic" "none")
+		 (eq_attr "slot" "!cop"))))
+  "ivc2_core+ivc2_slot_c32")
+
+;; These shouldn't happen when in VLIW mode.
+(define_insn_reservation "ivc2_insn_c3" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "c3"))
+  "ivc2_p1+ivc2_slot_c3")
+
+(define_insn_reservation "ivc2_insn_p0" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0"))
+  "ivc2_p0+ivc2_slot_p0")
+
+(define_insn_reservation "ivc2_insn_p0_p0s" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0_p0s"))
+  "ivc2_p0+ivc2_slot_p0|ivc2_p0+ivc2_slot_p0s")
+
+(define_insn_reservation "ivc2_insn_p0_p1" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0_p1"))
+  "ivc2_p0+ivc2_slot_p0|ivc2_p1+ivc2_slot_p1")
+
+(define_insn_reservation "ivc2_insn_p0s" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0s"))
+  "ivc2_p0+ivc2_slot_p0s")
+
+(define_insn_reservation "ivc2_insn_p0s_p1" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p0s_p1"))
+  "ivc2_p0+ivc2_slot_p0s|ivc2_p1+ivc2_slot_p1")
+
+(define_insn_reservation "ivc2_insn_p1" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "slots" "p1"))
+  "ivc2_p1+ivc2_slot_p1")
+
+;; these run in C3 also, but when we're doing VLIW scheduling, they
+;; only run in P0.
+(define_insn_reservation "ivc2_insn_cmov" 1
+  (and (eq_attr "vliw" "ivc2")
+       (eq_attr "intrinsic" "!none"))
+  "ivc2_p0+ivc2_slot_p0")
+
+
+(exclusion_set "ivc2_slot_c32"
+	       "ivc2_slot_p0,ivc2_slot_p0s")
+(exclusion_set "ivc2_slot_p0"
+	       "ivc2_slot_p0s")
+(exclusion_set "ivc2_slot_c16"
+	       "ivc2_slot_p0")
+(exclusion_set "ivc2_slot_c16"
+	       "ivc2_slot_c32")
+
+;; Non-IVC2 scheduling.
+(define_automaton "mep")
+(define_cpu_unit "core,cop" "mep")
+
+;; Latencies are the time between one insn entering the second pipeline
+;; stage (E2, LD, A2 or V2) and the next instruction entering the same
+;; stage.  When an instruction assigns to general registers, the default
+;; latencies are for when the next instruction receives the register
+;; through bypass 1.
+
+;; Arithmetic instructions that execute in a single stage.
+(define_insn_reservation "h1_int1" 2
+  (and (eq_attr "slot" "!cop")
+       (eq_attr "stall" "none"))
+  "core")
+(define_bypass 1 "h1_int1" "h1_int1,h1_ssarb")
+(define_bypass 1 "h1_int1" "h1_store" "mep_store_data_bypass_p")
+
+;; $sar can be read by an immediately following fsft or ldc.
+(define_insn_reservation "h1_ssarb" 1
+  (eq_attr "stall" "ssarb")
+  "core")
+
+;; Arithmetic instructions that execute in two stages.
+(define_insn_reservation "h1_int2" 2
+  (eq_attr "stall" "int2,fsft")
+  "core")
+(define_bypass 1 "h1_int2" "h1_int1,h1_ssarb")
+(define_bypass 1 "h1_int2" "h1_store" "mep_store_data_bypass_p")
+
+(define_insn_reservation "h1_load" 4
+  (eq_attr "stall" "load")
+  "core")
+(define_bypass 3 "h1_load" "h1_int1,h1_ssarb")
+(define_bypass 3 "h1_load" "h1_store" "mep_store_data_bypass_p")
+
+(define_insn_reservation "h1_store" 1
+  (eq_attr "stall" "store")
+  "core")
+
+(define_insn_reservation "h1_ipipe_ldc" 2
+  (and (eq_attr "stall" "ldc")
+       (ne (symbol_ref "mep_ipipe_ldc_p(insn)") (const_int 0)))
+  "core")
+(define_bypass 1 "h1_ipipe_ldc" "h1_int1,h1_ssarb")
+(define_bypass 1 "h1_ipipe_ldc" "h1_store" "mep_store_data_bypass_p")
+
+(define_insn_reservation "h1_apipe_ldc" 2
+  (and (eq_attr "stall" "ldc")
+       (eq (symbol_ref "mep_ipipe_ldc_p(insn)") (const_int 0)))
+  "core")
+
+;; 2 is correct for stc->ret and stc->fsft.  The most important remaining
+;; case is stc->madd, which induces no stall.
+(define_insn_reservation "h1_stc" 2
+  (eq_attr "stall" "stc")
+  "core")
+(define_bypass 1 "h1_stc" "h1_mul")
+
+;; ??? Parameterised latency.
+(define_insn_reservation "h1_ldcb" 5
+  (eq_attr "stall" "ldcb")
+  "core")
+
+(define_insn_reservation "h1_stcb" 1
+  (eq_attr "stall" "stcb")
+  "core")
+
+(define_insn_reservation "h1_advck" 6
+  (eq_attr "stall" "advck")
+  "core")
+
+(define_insn_reservation "h1_mul" 5
+  (eq_attr "stall" "mul,mulr")
+  "core")
+(define_bypass 4 "h1_mul" "h1_int1,h1_ssarb")
+(define_bypass 4 "h1_mul" "h1_store" "mep_store_data_bypass_p")
+(define_bypass 1 "h1_mul" "h1_mul" "mep_mul_hilo_bypass_p")
+
+(define_insn_reservation "h1_div" 36
+  (eq_attr "stall" "div")
+  "core")
+
+(define_insn_reservation "h1_cop" 1
+  (eq_attr "slot" "cop")
+  "cop")
+
+(include "predicates.md")
+(include "constraints.md")
+(include "intrinsics.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, QImode))
+    DONE;
+}")
+
+;; The Idea here is to prefer the 16-bit tp-relative load, but to fall back
+;; to the general 32-bit load rather than do silly things with spill regs.
+(define_insn "*movqi_tprel_load"
+  [(set (match_operand:QI 0 "mep_tprel_operand" "=t,*r")
+	(mem:QI (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lb\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movqi_tprel_store"
+  [(set (mem:QI (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:QI 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sb\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r, r,m,r,c,r,y,r,er,ex,em,Y")
+	(match_operand:QI 1 "general_operand" " r,n,rm,r,c,r,y,r,er,r,ex,Y,em"))]
+  "mep_mov_ok (operands, QImode)"
+  "@
+   mov\\t%0, %1
+   mov\\t%0, %1
+   lb\\t%0, %1
+   sb\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lbcpa\\t%0, %P1
+   sbcpa\\t%1, %P0"
+  [(set_attr "length" "2,2,*,*,2,2,4,4,4,4,*,4,4")
+   (set_attr "intrinsic" "*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,load,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,core1,core0,*,*,*,*,*,*,*,*,*")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, HImode))
+    DONE;
+}")
+
+(define_insn "*movhi_tprel_load"
+  [(set (match_operand:HI 0 "mep_tprel_operand" "=t,*r")
+	(mem:HI (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lh\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movhi_tprel_store"
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:HI 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sh\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,c,r,y,r,er,ex,em,Y")
+	(match_operand:HI 1 "general_operand" " r,S,n,m,r,c,r,y,r,er,r,ex,Y,em"))]
+  "mep_mov_ok (operands, HImode)"
+  "@
+   mov\\t%0, %1
+   mov\\t%0, %I1
+   mov\\t%0, %I1
+   lh\\t%0, %1
+   sh\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lhcpa\\t%0, %P1
+   shcpa\\t%1, %P0"
+  [(set_attr "length" "2,2,4,*,*,2,2,4,4,4,4,*,4,4")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,*,load,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,*,core1,core0,*,*,*,*,*,*,*,*,*")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, SImode))
+    DONE;
+}")
+
+(define_insn "*movsi_tprel_load"
+  [(set (match_operand:SI 0 "mep_tprel_operand" "=t,*r")
+	(mem:SI (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lw\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movsi_tprel_store"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:SI 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sw\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "movsi_topsym_s"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "symbolic_operand" "s")))]
+  ""
+  "movh\\t%0, %%hi(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "movsi_botsym_s"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "symbolic_operand" "s")))]
+  ""
+  "add3\\t%0, %1, %%lo(%2)"
+  [(set_attr "length" "4")])
+
+
+
+(define_insn "cmovh_getsub"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(subreg:SI (match_operand:DI 1 "register_operand" "er") 4))]
+  "0 && TARGET_64BIT_CR_REGS"
+  "cmovh\\t%0, %1"
+  [(set_attr "intrinsic" "cmovh2")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "mep_movdest_operand"
+	    "=r,r,r,r,r, t,t,r,r,r,Z,m,r,c,r,y,r, er,ex,em,U ")
+	(match_operand:SI 1 "general_operand"
+	    " r,S,I,J,OW,K,s,i,Z,m,r,r,c,r,y,r,er,r, ex,U, em"))]
+  "mep_mov_ok (operands, SImode)"
+  "@
+   mov\\t%0, %1
+   mov\\t%0, %I1
+   mov\\t%0, %I1
+   movu\\t%0, %J1
+   movh\\t%0, %h1
+   movu\\t%0, %x1
+   movu\\t%0, %1
+   #
+   ldcb\\t%0, %1
+   lw\\t%0, %1
+   stcb\\t%1, %0
+   sw\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lwcp\\t%0, %1
+   swcp\\t%1, %0"
+  [(set_attr "length" "2,2,4,4,4,4,4,*,4,*,4,*,2,2,4,4,4,4,4,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,*,*,*,*,*,*,ldcb,load,stcb,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,*,*,*,*,*,*,*,core1,*,core0,*,*,*,*,*,*,*,cop1,cop0")
+   (set_attr "slot"   "*,*,*,*,*,*,*,multi,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))]
+  "mep_split_mov (operands, 0)"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ior:SI (match_dup 0) (match_dup 3)))]
+  "
+{
+  HOST_WIDE_INT value;
+  int lo, hi;
+
+  value = INTVAL (operands[1]);
+
+  lo = value & 0xffff;
+  hi = trunc_int_for_mode (value & 0xffff0000, SImode);
+
+  operands[2] = GEN_INT (hi);
+  operands[3] = GEN_INT (lo);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "immediate_operand" ""))]
+  "mep_split_mov (operands, 1)"
+  [(set (match_dup 0) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))]
+  "")
+
+;; ??? What purpose do these two serve that high+lo_sum do not?
+(define_insn "movsi_topsym_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_operand:SI 1 "symbolic_operand" "s")
+		(const_int -65536)))]
+  ""
+  "movh\\t%0, %%uhi(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "movsi_botsym_u"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "0")
+		(and:SI (match_operand:SI 2 "symbolic_operand" "s")
+			(const_int 65535))))]
+  ""
+  "or3\\t%0, %1, %%lo(%2)"
+  [(set_attr "length" "4")])
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "" "")
+	(match_operand:DI 1 "" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, DImode))
+    DONE;
+}")
+
+(define_insn "*movdi_internal_32"
+  [(set (match_operand:DI 0 "mep_movdest_operand" "= r,m,r,c,r,er,ex,em,U")
+	(match_operand:DI 1 "general_operand"     "rim,r,c,r,er,r,ex,U,em"))]
+  "TARGET_32BIT_CR_REGS && mep_mov_ok (operands, DImode)"
+  "#"
+  [(set_attr "slot" "multi")])
+
+(define_insn "*movdi_internal_64"
+  [(set (match_operand:DI 0 "mep_movdest_operand" "=r,r,m,r,c,r,er,ex,em,U")
+	(match_operand:DI 1 "general_operand"     "r,im,r,c,r,er,r,ex,U,em"))]
+  "TARGET_64BIT_CR_REGS && mep_mov_ok (operands, DImode)"
+  "@
+   #
+   #
+   #
+   #
+   #
+   #
+   #
+   %<\\t%0, %M1
+   lmcp\\t%0, %1
+   smcp\\t%1, %0"
+  [(set_attr "slot"  "multi,multi,multi,multi,multi,multi,multi,*,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,cmov,*,*")
+   (set_attr "memop" "*,*,*,*,*,*,*,cop0,cop1,cop0")
+   (set_attr "stall" "*,*,*,*,*,*,*,*,load,store")])
+
+(define_insn "*movdi_cop_postinc"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "=em")
+		   (mem:DI (reg:SI SP_REGNO)))
+	      (set (reg:SI SP_REGNO)
+		   (plus:SI (reg:SI SP_REGNO)
+			    (const_int 8)))
+	      ]
+	     )]
+  "TARGET_COP"
+  "lmcpi\\t%0,($sp+)"
+  [(set_attr "length" "2")])
+
+(define_insn "*movdi_cop_postinc"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "=em")
+		   (mem:DI (match_operand:SI 2 "register_operand" "r")))
+	      (set (match_operand:SI 1 "register_operand" "=0")
+		   (plus:SI (match_operand:SI 3 "register_operand" "0")
+			    (const_int 8)))
+	      ]
+	     )]
+  "TARGET_COP"
+  "lmcpi\\t%0,(%1+)"
+  [(set_attr "length" "2")])
+
+(define_insn "*cmovh_set"
+  [(set (zero_extract:SI (match_operand:DI 0 "register_operand" "+er")
+			 (const_int 32)
+			 (const_int 32))
+	(match_operand:SI 1 "register_operand" "r"))]
+  "TARGET_64BIT_CR_REGS"
+  "cmovh\\t%0, %1"
+  [(set_attr "intrinsic" "cmovh1")
+   (set_attr "length" "4")])
+
+(define_insn "cmovh_get"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:DI 1 "register_operand" "er")
+			 (const_int 32)
+			 (const_int 32)))]
+  "TARGET_64BIT_CR_REGS"
+  "cmovh\\t%0, %1"
+  [(set_attr "intrinsic" "cmovh2")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DI 0 "mep_movdest_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "reload_completed && mep_multi_slot (insn)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "mep_split_wide_move (operands, DImode);")
+
+;; Floating Point Moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, SFmode))
+    DONE;
+}")
+
+(define_insn "*movsf_tprel_load"
+  [(set (match_operand:SF 0 "mep_tprel_operand" "=t,*r")
+	(mem:SF (plus:SI (match_operand:SI 1 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 2
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL)))))]
+  ""
+  "lw\\t%0, %%tpoff(%2)(%1)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "load")])
+
+(define_insn "*movsf_tprel_store"
+  [(set (mem:SF (plus:SI (match_operand:SI 0 "mep_tp_operand" "b,*r")
+			 (const:SI (unspec:SI [(match_operand:SI 1
+						"symbolic_operand" "s,s")]
+					      UNS_TPREL))))
+	(match_operand:SF 2 "mep_tprel_operand" "t,*r"))]
+  ""
+  "sw\\t%2, %%tpoff(%1)(%0)"
+  [(set_attr "length" "2,4")
+   (set_attr "stall" "store")])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "mep_movdest_operand"
+	    "=r,r,r,r,Z,m,r,c,r,y,r,er,ex,em,U")
+	(match_operand:SF 1 "general_operand"
+	    " r,F,Z,m,r,r,c,r,y,r,er,r,ex,U,em"))]
+  "mep_mov_ok (operands, SFmode)"
+  "@
+   mov\\t%0, %1
+   #
+   ldcb\\t%0, %1
+   lw\\t%0, %1
+   stcb\\t%1, %0
+   sw\\t%1, %0
+   ldc\\t%0, %1
+   stc\\t%1, %0
+   cmovc\\t%0, %1
+   cmovc\\t%0, %1
+   cmov\\t%0, %1
+   cmov\\t%0, %1
+   %<\\t%0, %M1
+   lwcp\\t%0, %1
+   swcp\\t%1, %0"
+  [(set_attr "length" "2,*,2,*,2,*,2,2,*,*,4,4,*,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,*,*,cmovc2,cmovc1,cmov2,cmov1,cmov,*,*")
+   (set_attr "stall"  "*,*,ldcb,load,stcb,store,ldc,stc,*,*,*,*,*,load,store")
+   (set_attr "memop"  "*,*,*,core1,*,core0,*,*,*,*,*,*,*,cop1,cop0")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "const_double_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  REAL_VALUE_TYPE rv;
+  HOST_WIDE_INT value;
+  HOST_WIDE_INT lo, hi;
+  rtx out;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (rv, value);
+
+  lo = value & 0xffff;
+  hi = trunc_int_for_mode (value & 0xffff0000, SImode);
+
+  out = gen_rtx_REG (SImode, REGNO (operands[0]));
+  emit_move_insn (out, GEN_INT (hi));
+  if (lo != 0)
+    emit_insn (gen_iorsi3 (out, out, GEN_INT (lo)));
+  DONE;
+}")
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "" "")
+	(match_operand:DF 1 "" ""))]
+  ""
+  "
+{
+  if (mep_expand_mov (operands, DFmode))
+    DONE;
+}")
+
+(define_insn "*movdf_internal_32"
+  [(set (match_operand:DF 0 "mep_movdest_operand" "= r,m,r,c,r,er,ex,em,U")
+	(match_operand:DF 1 "general_operand"     "rFm,r,c,r,er,r,ex,U,em"))]
+  "TARGET_32BIT_CR_REGS && mep_mov_ok (operands, DFmode)"
+  "#"
+  [(set_attr "slot" "multi")])
+
+(define_insn "*movdf_internal_64"
+  [(set (match_operand:DF 0 "mep_movdest_operand" "= r,m,r,c,r,er,ex,em,U")
+	(match_operand:DF 1 "general_operand"     "rFm,r,c,r,er,r,ex,U,em"))]
+  "TARGET_64BIT_CR_REGS && mep_mov_ok (operands, DFmode)"
+  "@
+   #
+   #
+   #
+   #
+   #
+   #
+   %<\\t%0, %M1
+   lmcp\\t%0, %1
+   smcp\\t%1, %0"
+  [(set_attr "slot"  "multi,multi,multi,multi,multi,multi,*,*,*")
+   (set_attr "intrinsic" "*,*,*,*,*,*,cmov,*,*")
+   (set_attr "memop" "*,*,*,*,*,*,*,cop1,cop0")
+   (set_attr "stall" "*,*,*,*,*,*,*,load,store")])
+
+(define_split
+  [(set (match_operand:DF 0 "mep_movdest_operand" "")
+        (match_operand:DF 1 "general_operand" ""))]
+  "reload_completed && mep_multi_slot (insn)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "mep_split_wide_move (operands, DFmode);")
+
+
+(define_insn "*lbcpa"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(sign_extend:SI (mem:QI (match_operand:SI 2 "register_operand" "1"))))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "lbcpa\t%0, (%1+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "load")])
+
+(define_insn "*sbcpa"
+  [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:QI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "cgen_h_sint_8a1_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "sbcpa\t%2, (%0+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "store")])
+
+(define_insn "*lhcpa"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(sign_extend:SI (mem:HI (match_operand:SI 2 "register_operand" "1"))))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (match_operand:SI 3 "cgen_h_sint_7a2_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "lhcpa\t%0, (%1+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "load")])
+
+(define_insn "*shcpa"
+  [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:HI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "cgen_h_sint_7a2_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "shcpa\t%2, (%0+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "store")])
+
+(define_insn "*lwcpi"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(mem:SI (match_operand:SI 2 "register_operand" "1")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (const_int 4)))]
+  "TARGET_COP && reload_completed"
+  "lwcpi\t%0, (%1+)"
+  [(set_attr "length" "2")
+   (set_attr "stall" "load")])
+
+(define_insn "*lwcpa"
+  [(set (match_operand:SI 0 "register_operand" "=em")
+	(mem:SI (match_operand:SI 2 "register_operand" "1")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (match_dup 2)
+		 (match_operand:SI 3 "cgen_h_sint_6a4_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "lwcpa\t%0, (%1+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "load")])
+
+(define_insn "*swcpi"
+  [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (const_int 4)))]
+  "TARGET_COP && reload_completed"
+  "swcpi\t%2, (%0+)"
+  [(set_attr "length" "2")
+   (set_attr "stall" "store")])
+
+(define_insn "*swcpa"
+  [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+	(match_operand:SI 2 "register_operand" "em"))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 3 "cgen_h_sint_6a4_immediate" "")))]
+  "TARGET_COP && reload_completed"
+  "swcpa\t%2, (%0+), %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "store")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "cgen_h_sint_8a1_immediate" "")))]
+  "TARGET_COP && mep_use_post_modify_p (insn, operands[0], operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Reloads
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "reload_insi"
+  [(set (match_operand:SI 0 "mep_reload_operand" "")
+        (match_operand:SI 1 "mep_reload_operand" "r"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  mep_expand_reload (operands, SImode);
+  DONE;
+}")
+
+(define_expand "reload_outsi"
+  [(set (match_operand:SI 0 "mep_reload_operand" "=r")
+        (match_operand:SI 1 "mep_reload_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  mep_expand_reload (operands, SImode);
+  DONE;
+}")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,em")
+	(sign_extend:SI
+	  (match_operand:QI 1 "nonimmediate_operand" "0,m,Y")))]
+  ""
+  "@
+   extb\\t%0
+   lb\\t%0, %1
+   lbcpa\\t%0, %P1"
+  [(set_attr "length" "2,*,*")
+   (set_attr "stall"  "*,load,load")
+   (set_attr "memop"  "*,core1,cop1")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,em")
+	(sign_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "0,m,Y")))]
+  ""
+  "@
+   exth\\t%0
+   lh\\t%0, %1
+   lhcpa\\t%0, %P1"
+  [(set_attr "length" "2,*,*")
+   (set_attr "stall"  "*,load,load")
+   (set_attr "memop"  "*,core1,cop1")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI
+	  (match_operand:QI 1 "nonimmediate_operand" "0,r,m")))]
+  ""
+  "@
+   extub\\t%0
+   and3\\t%0, %1, 255
+   lbu\\t%0, %1"
+  [(set_attr "length" "2,4,*")
+   (set_attr "stall" "*,*,load")
+   (set_attr "memop"  "*,*,core1")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(zero_extend:SI
+	  (match_operand:HI 1 "nonimmediate_operand" "0,r,m")))]
+  ""
+  "@
+   extuh\\t%0
+   and3\\t%0, %1, 65535
+   lhu\\t%0, %1"
+  [(set_attr "length" "2,4,*")
+   (set_attr "stall" "*,*,load")
+   (set_attr "memop"  "*,*,core1")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,0,r")
+		 (match_operand:SI 2 "mep_add_operand" "r,L,IT")))]
+  ""
+  "@
+   add3\\t%0, %1, %2
+   add\\t%0, %2
+   add3\\t%0, %1, %I2"
+  [(set (attr "length")
+	(if_then_else (eq_attr "alternative" "2")
+	  (if_then_else (and (match_operand:SI 1 "mep_sp_operand" "")
+			     (match_operand:SI 2 "mep_imm7a4_operand" ""))
+	    (const_int 2)
+	    (const_int 4))
+	  (const_int 2)))])
+
+;; The intention here is to combine the 16-bit add with the 16-bit
+;; move to create a 32-bit add.  It's the same size, but takes one
+;; less machine cycle.  It will happen to match a 32-bit add with a
+;; 16-bit move also, but gcc shouldn't be doing that ;)
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "immediate_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(match_operand:SI 4 "register_operand" ""))]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && GR_REGNO_P (REGNO (operands[3]))
+   && dead_or_set_p (peep2_next_insn (1), operands[4])"
+  [(set (match_dup 3)
+	(plus:SI (match_dup 1)
+		 (match_dup 2)))]
+  "")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\\t%0, %2"
+  [(set_attr "length" "2")])
+
+(define_expand "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (mult:SI (match_operand:SI 1 "register_operand" "")
+                 (match_operand:SI 2 "register_operand" "")))]
+  "TARGET_OPT_MULT || TARGET_COPRO_MULT"
+{
+  emit_insn (gen_mulsi3_1 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; Generated by mep_reuse_lo_p when no GPR destination is needed.
+(define_insn "mulsi3_lo"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(mult:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (match_scratch:SI 3 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "mul\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+;; Generated by mep_reuse_lo_p when both destinations of a mulr
+;; are needed.
+(define_insn "mulsi3r"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(mult:SI (match_operand:SI 2 "register_operand" "1")
+		 (match_operand:SI 3 "register_operand" "r")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(mult:SI (match_dup 2)
+		 (match_dup 3)))
+   (clobber (match_scratch:SI 4 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "mulr\\t%2, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mulr")])
+
+(define_insn "mulsi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (match_scratch:SI 3 "=l"))
+   (clobber (match_scratch:SI 4 "=h"))]
+  "TARGET_OPT_MULT"
+  "mulr\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mulr")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+  "TARGET_OPT_MULT"
+  "
+{
+  rtx hi = gen_reg_rtx (SImode);
+  rtx lo = gen_reg_rtx (SImode);
+
+  emit_insn (gen_mulsidi3_i (hi, lo, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (SImode, operands[0]), lo);
+  emit_move_insn (gen_highpart (SImode, operands[0]), hi);
+  DONE;
+}")
+
+(define_insn "mulsidi3_i"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI
+		    (match_operand:SI 2 "register_operand" "r"))
+		   (sign_extend:DI
+		    (match_operand:SI 3 "register_operand" "r")))
+	  (const_int 32))))
+   (set (match_operand:SI 1 "mep_lo_operand" "=l")
+	(mult:SI (match_dup 2)
+		 (match_dup 3)))]
+  "TARGET_OPT_MULT"
+  "mul\\t%2, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+(define_insn "smulsi3_highpart"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI
+		    (match_operand:SI 1 "register_operand" "r"))
+		   (sign_extend:DI
+		    (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI LO_REGNO))]
+  "TARGET_OPT_MULT"
+  "mul\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "mep_hi_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+  "TARGET_OPT_MULT"
+  "
+{
+  rtx hi = gen_reg_rtx (SImode);
+  rtx lo = gen_reg_rtx (SImode);
+
+  emit_insn (gen_umulsidi3_i (hi, lo, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (SImode, operands[0]), lo);
+  emit_move_insn (gen_highpart (SImode, operands[0]), hi);
+  DONE;
+}")
+
+(define_insn "umulsidi3_i"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI
+		    (match_operand:SI 2 "register_operand" "r"))
+		   (zero_extend:DI
+		    (match_operand:SI 3 "register_operand" "r")))
+	  (const_int 32))))
+   (set (match_operand:SI 1 "mep_lo_operand" "=l")
+	(mult:SI (match_dup 2)
+		 (match_dup 3)))]
+  "TARGET_OPT_MULT"
+  "mulu\\t%2, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+(define_insn "umulsi3_highpart"
+  [(set (match_operand:SI 0 "mep_hi_operand" "=h")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (zero_extend:DI
+		    (match_operand:SI 1 "register_operand" "r"))
+		   (zero_extend:DI
+		    (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI LO_REGNO))]
+  "TARGET_OPT_MULT"
+  "mulu %1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "mul")])
+
+;; These two don't currently match because we don't have an adddi3 pattern.
+(define_insn "*smultdi_and_add"
+  [(set (match_operand:DI 0 "mep_hi_operand" "=d")
+	(plus:DI (mult:DI (zero_extend:DI
+			   (match_operand:SI 1 "register_operand" "r"))
+			  (zero_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "mep_hi_operand" "0")))]
+  "TARGET_OPT_MULT && TARGET_BIG_ENDIAN"
+  "maddu\\t%1, %2"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mul")])
+
+(define_insn "*umultdi_and_add"
+  [(set (match_operand:DI 0 "mep_hi_operand" "=d")
+	(plus:DI (mult:DI (sign_extend:DI
+			   (match_operand:SI 1 "register_operand" "r"))
+			  (sign_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "mep_hi_operand" "0")))]
+  "TARGET_OPT_MULT && TARGET_BIG_ENDIAN"
+  "madd\\t%1, %2"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mul")])
+
+;; A pattern for 'r1 = r2 * r3 + r4'.  There are three possible
+;; implementations:
+;;
+;;    (1) 'mulr;add3'.  This is usually the best choice if the instruction
+;;	  is not part of a natural multiply-accumulate chain.  It has the
+;;	  same latency as 'stc;maddr' but doesn't tie up $lo for as long.
+;;
+;;    (2) 'madd'.  This is the best choice if the instruction is in the
+;;	  middle of a natural multiply-accumulate chain.  r4 will already
+;;	  be in $lo and r1 will also be needed in $lo.
+;;
+;;    (3) 'maddr'.  This is the best choice if the instruction is at the
+;;	  end of a natural multiply-accumulate chain.  r4 will be in $lo
+;;	  but r1 will be needed in a GPR.
+;;
+;; In theory, we could put all the alternatives into a single pattern and
+;; leave the register allocator to choose between them.  However, this can
+;; sometimes produce poor results in practice.
+;;
+;; This pattern therefore describes a general GPR-to-GPR operation that
+;; has a slight preference for cases in which operands 0 and 1 are tied.
+;; After reload, we try to rewrite the patterns using peephole2s (if
+;; enabled), falling back on define_splits if that fails.  See also
+;; mep_reuse_lo_p.
+(define_insn "maddsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%0,r")
+			  (match_operand:SI 2 "register_operand" "r,r"))
+		 (match_operand:SI 3 "register_operand" "r,r")))
+   (clobber (match_scratch:SI 4 "=l,l"))
+   (clobber (match_scratch:SI 5 "=h,h"))]
+  "TARGET_OPT_MULT"
+  "#"
+  [(set_attr "length" "8")
+   (set_attr "stall" "mulr")])
+
+;; Implement maddsi3s using maddr if operand 3 is already available in $lo.
+(define_peephole2
+  [(parallel
+	[(set (match_operand:SI 0 "register_operand" "")
+	      (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+				(match_operand:SI 2 "register_operand" ""))
+		       (match_operand:SI 3 "register_operand" "")))
+	 (clobber (match_scratch:SI 4 ""))
+	 (clobber (match_scratch:SI 5 ""))])]
+  "TARGET_OPT_MULT
+   && reload_completed
+   && mep_reuse_lo_p (operands[4], operands[3], insn,
+		      !rtx_equal_p (operands[1], operands[3])
+		      && !rtx_equal_p (operands[2], operands[3])
+		      && (rtx_equal_p (operands[0], operands[3])
+			  || peep2_reg_dead_p (1, operands[3])))"
+  [(parallel
+	[(set (match_dup 4)
+	      (plus:SI (mult:SI (match_dup 0)
+			        (match_dup 2))
+		       (match_dup 4)))
+	 (set (match_dup 0)
+	      (plus:SI (mult:SI (match_dup 0)
+				(match_dup 2))
+		       (match_dup 4)))
+	 (clobber (match_dup 5))])]
+  "operands[2] = mep_mulr_source (0, operands[0], operands[1], operands[2]);")
+
+;; This splitter implements maddsi3 as "mulr;add3".  It only works if
+;; operands 0 and 3 are distinct, since operand 0 is clobbered before
+;; operand 3 is used.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (match_operand:SI 2 "register_operand" ""))
+		 (match_operand:SI 3 "register_operand" "")))
+   (clobber (match_scratch:SI 4 ""))
+   (clobber (match_scratch:SI 5 ""))]
+  "TARGET_OPT_MULT
+   && reload_completed
+   && !rtx_equal_p (operands[0], operands[3])"
+  [(parallel [(set (match_dup 0)
+		   (mult:SI (match_dup 0)
+			    (match_dup 2)))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))])
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_dup 3)))]
+  "operands[2] = mep_mulr_source (0, operands[0], operands[1], operands[2]);")
+
+;; This is the fallback splitter for maddsi3.  It moves operand 3 into
+;; $lo and then uses maddr.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (match_operand:SI 2 "register_operand" ""))
+		 (match_operand:SI 3 "register_operand" "")))
+   (clobber (match_scratch:SI 4 ""))
+   (clobber (match_scratch:SI 5 ""))]
+  "TARGET_OPT_MULT
+   && reload_completed"
+  [(parallel [(set (match_dup 4)
+		   (plus:SI (mult:SI (match_dup 0)
+				     (match_dup 2))
+			    (match_dup 4)))
+	      (set (match_dup 0)
+		   (plus:SI (mult:SI (match_dup 0)
+				     (match_dup 2))
+			    (match_dup 4)))
+	      (clobber (match_dup 5))])]
+{
+  emit_move_insn (operands[4], operands[3]);
+  operands[2] = mep_mulr_source (0, operands[0], operands[1], operands[2]);
+})
+
+;; Remove unnecessary stcs to $lo.  This cleans up the moves generated
+;; by earlier calls to mep_reuse_lo_p.
+(define_peephole2
+  [(set (match_operand:SI 0 "mep_lo_operand" "")
+	(match_operand:SI 1 "register_operand" ""))]
+  "TARGET_OPT_MULT
+   && mep_reuse_lo_p (operands[0], operands[1], insn,
+		      peep2_reg_dead_p (1, operands[1]))"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+(define_insn "maddsi3_lo"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 2 "register_operand" "r"))
+		 (match_operand:SI 3 "mep_lo_operand" "0")))
+   (clobber (match_scratch:SI 4 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "madd\\t%1, %2"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mul")])
+
+(define_insn "maddsi3r"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(plus:SI (mult:SI (match_operand:SI 2 "register_operand" "1")
+			  (match_operand:SI 3 "register_operand" "r"))
+		 (match_operand:SI 4 "register_operand" "0")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(plus:SI (mult:SI (match_dup 2)
+			  (match_dup 3))
+		 (match_dup 4)))
+   (clobber (match_scratch:SI 5 "=h"))]
+  "TARGET_OPT_MULT && reload_completed"
+  "maddr\\t%2, %3"
+  [(set_attr "length" "4")
+   (set_attr "stall" "mulr")])
+
+(define_insn "*shift_1_or_2_and_add"
+  [(set (match_operand:SI 0 "mep_r0_operand" "=z")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 2 "mep_slad_operand" "n"))
+		 (match_operand:SI 3 "register_operand" "r")))]
+  ""
+  "sl%b2ad3\\t%0, %1, %3"
+  [(set_attr "length" "2")
+   (set_attr "stall" "int2")])
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "mep_hi_operand" "=h")
+	(mod:SI (match_dup 1)
+		(match_dup 2)))]
+  "TARGET_OPT_DIV"
+  "div\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "div")
+   (set_attr "may_trap" "yes")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "mep_lo_operand" "=l")
+	(udiv:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "mep_hi_operand" "=h")
+	(umod:SI (match_dup 1)
+		(match_dup 2)))]
+  "TARGET_OPT_DIV"
+  "divu\\t%1, %2"
+  [(set_attr "length" "2")
+   (set_attr "stall" "div")
+   (set_attr "may_trap" "yes")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg\\t%0, %1"
+  [(set_attr "length" "2")])
+
+;; We have "absolute difference between two regs" which isn't quite
+;; what gcc is expecting.
+(define_expand "abssi2"
+  [(set (match_dup 2) (const_int 0))
+   (set (match_operand:SI 0 "register_operand" "")
+	(abs:SI (minus:SI (match_operand:SI 1 "register_operand" "")
+			  (match_dup 2))
+		))]
+  "TARGET_OPT_ABSDIFF"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_insn "*absdiff"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(abs:SI (minus:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_OPT_ABSDIFF"
+  "abs\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(abs:SI (plus:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  "!reload_completed"
+  [(set (match_dup 3)
+	(match_dup 4))
+   (set (match_operand:SI 0 "register_operand" "")
+	(abs:SI (minus:SI (match_operand:SI 1 "register_operand" "")
+			  (match_dup 3))))]
+  "operands[4] = GEN_INT (-INTVAL (operands[2]));")
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "min\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "max\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_insn "uminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(umin:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "minu\\t%0, %2"
+  [(set_attr "length" "4")])
+
+(define_insn "umaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(umax:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "r")))]
+  "TARGET_OPT_MINMAX"
+  "maxu\\t%0, %2"
+  [(set_attr "length" "4")])
+
+;; Average:  a = (b+c+1)>>1
+(define_insn "*averagesi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (plus:SI (plus:SI
+				(match_operand:SI 1 "register_operand" "0")
+				(match_operand:SI 2 "register_operand" "r"))
+			      (const_int 1))
+		     (const_int 1)))]
+  "TARGET_OPT_AVERAGE"
+  "ave\\t%0, %2"
+  [(set_attr "length" "4")])
+
+;; clip support
+
+(define_insn "clip_maxmin"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smax:SI (smin:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[2], operands[3], 1)"
+  "clip\\t%0, %B2"
+  [(set_attr "length" "4")])
+
+(define_insn "clip_minmax"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smin:SI (smax:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[3], operands[2], 1)"
+  "clip\\t%0, %B3"
+  [(set_attr "length" "4")])
+
+(define_insn "clipu_maxmin"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smax:SI (smin:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[2], operands[3], 0)"
+  "clipu\\t%0, %U2"
+  [(set_attr "length" "4")])
+
+(define_insn "clipu_minmax"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(smin:SI (smax:SI (match_operand:SI 1 "register_operand" "0")
+			  (match_operand:SI 2 "immediate_operand" "n"))
+		 (match_operand:SI 3 "immediate_operand" "n")))]
+  "mep_allow_clip (operands[3], operands[2], 0)"
+  "clipu\\t%0, %U3"
+  [(set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,z")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,r")
+		   (match_operand:SI 2 "nonmemory_operand" "rM,M")))]
+  ""
+  "@
+   sll\\t%0, %2
+   sll3\\t%0, %1, %2"
+  [(set_attr "length" "2,2")
+   (set_attr "shiftop" "operand2")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "nonmemory_operand" "rM")))]
+  ""
+  "sra\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "shiftop" "operand2")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "nonmemory_operand" "rM")))]
+  ""
+  "srl\\t%0, %2"
+  [(set_attr "length" "2")
+   (set_attr "shiftop" "operand2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32 Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,J")))]
+  ""
+  "@
+   and\\t%0, %2
+   and3\\t%0, %1, %J2"
+  [(set_attr "length" "2,4")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,J")))]
+  ""
+  "@
+   or\\t%0, %2
+   or3\\t%0, %1, %J2"
+  [(set_attr "length" "2,4")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,J")))]
+  ""
+  "@
+   xor\\t%0, %2
+   xor3\\t%0, %1, %J2"
+  [(set_attr "length" "2,4")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "operands[2] = operands[1];
+   ")
+
+;; No separate insn for this; use NOR
+(define_insn "*one_cmplsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "0")))]
+  ""
+  "nor\\t%0, %0"
+  [(set_attr "length" "2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit Manipulation
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "*bitop_be"
+  [(set (match_operand:QI 0 "mep_Y_operand" "=Y")
+	(subreg:QI (match_operator:SI 3 "mep_bit_operator"
+			[(subreg:SI (match_operand:QI 1 "mep_Y_operand" "0") 0)
+			 (match_operand 2 "immediate_operand" "n")])
+		   3)
+	)]
+  "TARGET_BIG_ENDIAN && TARGET_OPT_BITOPS
+   && rtx_equal_p (operands[0], operands[1])"
+  "b%L3m\\t%0, %b2"
+  [(set_attr "length" "2")])
+
+(define_insn "*bitop_le"
+  [(set (match_operand:QI 0 "mep_Y_operand" "=Y")
+	(subreg:QI (match_operator:SI 3 "mep_bit_operator"
+			[(subreg:SI (match_operand:QI 1 "mep_Y_operand" "0") 0)
+			 (match_operand 2 "immediate_operand" "n")])
+		   0)
+	)]
+  "!TARGET_BIG_ENDIAN && TARGET_OPT_BITOPS
+   && rtx_equal_p (operands[0], operands[1])"
+  "b%L3m\\t%0, %b2"
+  [(set_attr "length" "2")])
+
+(define_insn "btstm"
+  [(set (match_operand:SI 0 "mep_r0_operand" "=z")
+	(and:SI (subreg:SI (match_operand:QI 1 "mep_Y_operand" "Y") 0)
+		(match_operand 2 "immediate_operand" "n"))
+	)]
+  "TARGET_OPT_BITOPS && mep_bit_position_p (operands[2], 1)"
+  "btstm\\t%0, %1, %b2"
+  [(set_attr "length" "2")])
+
+(define_insn "tas"
+  [(parallel [(set (match_operand:SI 0 "mep_r0_operand" "=z")
+		   (zero_extend:SI (match_operand:QI 1 "mep_Y_operand" "+Y")))
+	      (set (match_dup 1)
+		   (const_int 1))
+	      ]
+	     )]
+  "TARGET_OPT_BITOPS"
+  "tas\\t%0, %1"
+  [(set_attr "length" "2")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "mep_r0_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "mep_Y_operand" "")))
+   (set (match_operand:QI 2 "register_operand" "")
+	(const_int 1))
+   (set (match_dup 1)
+	(match_dup 2))
+   ]
+  "TARGET_OPT_BITOPS"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:SI (match_dup 1)))
+	      (set (match_dup 1)
+		   (const_int 1))
+	      ])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "mep_r0_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "mep_Y_operand" "")))
+   (set (match_operand:QI 2 "register_operand" "")
+	(const_int 1))
+   (set (match_dup 1)
+	(match_dup 2))
+   ]
+  "TARGET_OPT_BITOPS"
+  [(parallel [(set (match_dup 0)
+		   (zero_extend:SI (match_dup 1)))
+	      (set (match_dup 1)
+		   (const_int 1))
+	      ])
+   (set (match_dup 0)
+	(sign_extend:SI (match_dup 3)))]
+  "operands[3] = gen_lowpart (QImode, operands[0]);")
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conditional branches and stores
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+				      [(match_operand:SI 1 "register_operand" "")
+				       (match_operand:SI 2 "nonmemory_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "emit_jump_insn (gen_branch_true (operands[3],
+			       mep_expand_cbranch (operands)));
+   DONE;")
+  
+(define_expand "branch_true"
+  [(set (pc)
+	(if_then_else (match_operand 1 "" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "")
+  
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "ordered_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+  "if (mep_expand_setcc (operands)) DONE; else FAIL;")
+
+;; ------------------------------------------------------------
+
+(define_insn "*slt"
+  [(set (match_operand:SI 0 "register_operand" "=z,z,r")
+	(lt:SI (match_operand:SI 1 "register_operand" "r,r,r")
+	    (match_operand:SI 2 "nonmemory_operand" "r,M,I")))]
+  ""
+  "slt3\\t%0, %1, %2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "*sltu"
+  [(set (match_operand:SI 0 "register_operand" "=z,z,r")
+	(ltu:SI (match_operand:SI 1 "register_operand" "r,r,r")
+	     (match_operand:SI 2 "nonmemory_operand" "r,M,J")))]
+  ""
+  "sltu3\\t%0, %1, %2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "*bcpeq_true"
+  [(set (pc)
+	(if_then_else (eq:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bcpeq\t0, %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "*bcpeq_false"
+  [(set (pc)
+	(if_then_else (eq:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bcpne\t0, %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "*bcpne_true"
+  [(set (pc)
+	(if_then_else (ne:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "bcpne\t0, %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "*bcpne_false"
+  [(set (pc)
+	(if_then_else (ne:SI (reg:SI CBCR_REGNO)
+			     (const_int 0))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "bcpeq\t0, %l0"
+  [(set_attr "length" "4")])
+
+;; ??? The lengths here aren't correct, since no attempt it made to
+;; find "beqz" in the 256-byte range.  However, this should not affect
+;; bundling, since we never run core branches in parallel.
+
+(define_insn "mep_beq_true"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "* return mep_emit_cbranch (operands, 0);"
+  [(set_attr "length" "4")]  )
+
+(define_insn "*beq_false"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "* return mep_emit_cbranch (operands, 1);"
+  [(set_attr "length" "4")])
+
+(define_insn "mep_bne_true"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "* return mep_emit_cbranch (operands, 1); "
+  [(set_attr "length" "4")])
+
+(define_insn "*bne_false"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_reg_or_imm4_operand" "rN"))
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+  "* return mep_emit_cbranch (operands, 0); "
+  [(set_attr "length" "4")])
+
+(define_insn "mep_blti"
+  [(set (pc)
+	(if_then_else (lt (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_imm4_operand" "N"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "blti\\t%0, %1, %l2"
+  [(set_attr "length" "4")])
+
+(define_insn "*bgei"
+  [(set (pc)
+	(if_then_else (ge (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "mep_imm4_operand" "N"))
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+  "bgei\\t%0, %1, %l2"
+  [(set_attr "length" "4")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "" "")
+		    (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "
+{
+  mep_expand_call (operands, 0);
+  DONE;
+}")
+
+(define_insn "call_internal"
+  [(call (mem (match_operand:SI 0 "mep_call_address_operand" "R,r"))
+	 (match_operand:SI 1 "" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (match_operand:SI 3 "mep_tp_operand" "b,b"))
+   (use (match_operand:SI 4 "mep_gp_operand" "v,v"))
+   (clobber (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  ""
+{
+  static char const pattern[2][2][8] = 
+  {
+    { "bsrv\t%0", "jsrv\t%0" },
+    { "bsr\t%0", "jsr\t%0" }
+  };
+
+  return pattern[mep_vliw_mode_match (operands[2])][which_alternative];
+}
+  [(set_attr "length" "4,2")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand:QI 0 "" "")
+		    (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (reg:SI LP_REGNO))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "")
+
+(define_insn "*sibcall_internal"
+  [(call (mem (match_operand:SI 0 "mep_nearsym_operand" "s"))
+	 (match_operand:SI 1 "" ""))
+   (use (match_operand:SI 2 "const_int_operand" ""))
+   (use (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  "SIBLING_CALL_P (insn)"
+{
+  if (mep_vliw_jmp_match (operands[2]))
+    return "jmp\t%0";
+  else if (mep_vliw_mode_match (operands[2]))
+    return
+        "movu	$0, %0\n\
+	jmp	$0";
+  else
+    return
+	"ldc	$12, $lp\n\
+	movh	$11, %%hi(%0)\n\
+	xor3	$12, $12, 1\n\
+	add3	$11, $11, %%lo(%0+1)\n\
+	stc	$12, $lp\n\
+	jmp	$11";
+}
+  [(set_attr "length" "48")
+   (set_attr "slot" "multi")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "" "")
+		         (match_operand:SI 2 "" "")))
+	      (use (match_operand:SI 3 "" ""))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "
+{
+  mep_expand_call (operands, 1);
+  DONE;
+}")
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=rx,rx")
+	(call (mem:SI (match_operand:SI 1 "mep_call_address_operand" "R,r"))
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (match_operand:SI 4 "mep_tp_operand" "b,b"))
+   (use (match_operand:SI 5 "mep_gp_operand" "v,v"))
+   (clobber (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  ""
+{
+  static char const pattern[2][2][8] = 
+  {
+    { "bsrv\t%1", "jsrv\t%1" },
+    { "bsr\t%1", "jsr\t%1" }
+  };
+
+  return pattern[mep_vliw_mode_match (operands[3])][which_alternative];
+}
+  [(set_attr "length" "4,2")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "" "")
+		         (match_operand:SI 2 "" "")))
+	      (use (match_operand:SI 3 "" ""))
+	      (use (reg:SI LP_REGNO))
+	      (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+	      ])]
+  ""
+  "")
+
+(define_insn "*sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "=rx")
+	(call (mem (match_operand:SI 1 "mep_nearsym_operand" "s"))
+	      (match_operand:SI 2 "" "")))
+   (use (match_operand:SI 3 "const_int_operand" ""))
+   (use (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  "SIBLING_CALL_P (insn)"
+{
+  if (mep_vliw_jmp_match (operands[3]))
+    return "jmp\t%1";
+  else if (mep_vliw_mode_match (operands[3]))
+    return
+        "movu	$0, %1\n\
+	jmp	$0";
+  else
+    return
+	"ldc	$12, $lp\n\
+	movh	$11, %%hi(%1)\n\
+	xor3	$12, $12, 1\n\
+	add3	$11, $11, %%lo(%1+1)\n\
+	stc	$12, $lp\n\
+	jmp	$11";
+}
+  [(set_attr "length" "48")
+   (set_attr "slot" "multi")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand:SI 0 "register_operand" ""))]
+  ""
+  "* return (REGNO (operands[0]) == LP_REGNO) ? \"ret\" : \"jmp\\t%0\";"
+  [(set_attr "length" "2")
+   (set_attr "stall" "ret")])
+
+(define_insn "eh_return_internal"
+  [(return)
+   (use (reg:SI 10))
+   (use (reg:SI 11))
+   (use (reg:SI LP_REGNO))
+   (clobber (reg:SI REGSAVE_CONTROL_TEMP))
+  ]
+  ""
+  "ret"
+  [(set_attr "length" "2")
+   (set_attr "stall" "ret")])
+
+;; The assembler replaces short jumps with long jumps as needed.
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "bra\\t%l0"
+  [(set_attr "length" "4")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp\\t%0"
+  [(set_attr "length" "2")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp\\t%0"
+  [(set_attr "length" "2")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Low Overhead Looping
+;; ::
+;; ::::::::::::::::::::
+
+;; This insn is volatile because we'd like it to stay in its original
+;; position, just before the loop header.  If it stays there, we might
+;; be able to convert it into a "repeat" insn.
+(define_insn "doloop_begin_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI
+	 [(match_operand:SI 1 "register_operand" "0")
+	  (match_operand 2 "const_int_operand" "")] UNS_REPEAT_BEG))]
+  ""
+  { gcc_unreachable (); }
+  [(set_attr "length" "4")])
+
+(define_expand "doloop_begin"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand:QI 1 "const_int_operand" ""))
+   (use (match_operand:QI 2 "const_int_operand" ""))
+   (use (match_operand:QI 3 "const_int_operand" ""))]
+  "!profile_arc_flag && TARGET_OPT_REPEAT"
+  "if (INTVAL (operands[3]) > 1)
+     FAIL;
+   mep_emit_doloop (operands, 0);
+   DONE;
+  ")
+
+(define_insn "doloop_end_internal"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "+r,cxy,*m")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (unspec [(match_operand 2 "const_int_operand" "")] UNS_REPEAT_END)
+   (clobber (match_scratch:SI 3 "=X,&r,&r"))]
+  ""
+  { gcc_unreachable (); }
+  ;; Worst case length:
+  ;;
+  ;;      lw <op3>,<op0>	4
+  ;;      add <op3>,-1		2
+  ;;      sw <op3>,<op0>	4
+  ;;      jmp <op1>		4
+  ;; 1f:
+  [(set_attr "length" "14")
+   (set_attr "slot" "multi")])
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "nonimmediate_operand" ""))
+   (use (match_operand:QI 1 "const_int_operand" ""))
+   (use (match_operand:QI 2 "const_int_operand" ""))
+   (use (match_operand:QI 3 "const_int_operand" ""))
+   (use (label_ref (match_operand 4 "" "")))]
+  "!profile_arc_flag && TARGET_OPT_REPEAT"
+  "if (INTVAL (operands[3]) > 1)
+     FAIL;
+   if (GET_CODE (operands[0]) == REG && GET_MODE (operands[0]) != SImode)
+     FAIL;
+   mep_emit_doloop (operands, 1);
+   DONE;
+  ")
+
+(define_insn "repeat"
+  [(set (reg:SI RPC_REGNO)
+	(unspec:SI [(match_operand:SI 0 "mep_r0_15_operand" "r")
+		    (match_operand:SI 1 "" "")]
+		   UNS_REPEAT_BEG))]
+  ""
+  "repeat\\t%0,%l1"
+  [(set_attr "length" "4")])
+
+(define_insn "repeat_end"
+  [(unspec [(const_int 0)] UNS_REPEAT_END)]
+  ""
+  "# repeat end"
+  [(set_attr "length" "0")])
+
+(define_insn "erepeat"
+  [(unspec [(match_operand 0 "" "")] UNS_EREPEAT_BEG)]
+  ""
+  "erepeat\\t%l0"
+  [(set_attr "length" "4")])
+
+(define_insn "erepeat_end"
+  [(unspec [(const_int 0)] UNS_EREPEAT_END)]
+  ""
+  "# erepeat end"
+  [(set_attr "length" "0")
+   (set_attr "slot" "multi")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  "
+{
+  mep_expand_prologue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  mep_expand_epilogue ();
+  DONE;
+}")
+
+(define_expand "eh_return"
+  [(use (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "
+{
+  mep_expand_eh_return (operands);
+  DONE;
+}")
+
+(define_insn_and_split "eh_epilogue"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNS_EH_EPILOGUE)
+   (use (reg:SI LP_REGNO))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 1)]
+  "mep_emit_eh_epilogue (operands); DONE;"
+  [(set_attr "slot" "multi")])
+
+(define_expand "sibcall_epilogue"
+  [(const_int 0)]
+  ""
+  "
+{
+  mep_expand_sibcall_epilogue ();
+  DONE;
+}")
+
+(define_insn "mep_bb_trace_ret"
+  [(unspec_volatile [(const_int 0)] UNS_BB_TRACE_RET)]
+  ""
+  "* return mep_emit_bb_trace_ret ();"
+  [(set_attr "slot" "multi")])
+
+(define_insn "mep_disable_int"
+  [(unspec_volatile [(const_int 0)] UNS_DISABLE_INT)]
+  ""
+  "di"
+  [(set_attr "length" "2")])
+
+(define_insn "mep_enable_int"
+  [(unspec_volatile [(const_int 0)] UNS_ENABLE_INT)]
+  ""
+  "ei"
+  [(set_attr "length" "2")])
+
+(define_insn "mep_reti"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNS_RETI)]
+  ""
+  "reti"
+  [(set_attr "length" "2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "2")])
+
+(define_insn "nop32"
+  [(const_int 1)]
+  ""
+  "or3\\t$0, $0, 0"
+  [(set_attr "length" "4")])
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNS_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "slot" "multi")])
+
+
+(define_insn "djmark"
+  [(unspec_volatile [(const_int 0)] 999)]
+  ""
+  "# dj"
+  [(set_attr "length" "0")
+   (set_attr "slot" "multi")])
+
diff --git a/gcc/config/mep/mep.opt b/gcc/config/mep/mep.opt
new file mode 100644
index 000000000..9670d3b4f
--- /dev/null
+++ b/gcc/config/mep/mep.opt
@@ -0,0 +1,162 @@
+; Target specific command line options for the MEP port of the compiler.
+; Copyright (C) 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+; Contributed by Red Hat Inc.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+; 
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.  */
+
+mabsdiff
+Target Mask(OPT_ABSDIFF)
+Enable absolute difference instructions
+
+mall-opts
+Target RejectNegative
+Enable all optional instructions
+
+maverage
+Target Mask(OPT_AVERAGE)
+Enable average instructions
+
+mbased=
+Target Joined Var(mep_based_cutoff) RejectNegative UInteger Init(0)
+Variables this size and smaller go in the based section. (default 0)
+
+mbitops
+Target Mask(OPT_BITOPS)
+Enable bit manipulation instructions
+
+mc=
+Target Joined Var(mep_const_section) RejectNegative
+Section to put all const variables in (tiny, near, far) (no default)
+
+mclip
+Target Mask(OPT_CLIP)
+Enable clip instructions
+
+mconfig=
+Target Joined Var(mep_config_string) RejectNegative
+Configuration name
+
+mcop
+Target Mask(COP)
+Enable MeP Coprocessor
+
+mcop32
+Target Mask(COP) MaskExists RejectNegative
+Enable MeP Coprocessor with 32-bit registers
+
+mcop64
+Target Mask(64BIT_CR_REGS) RejectNegative
+Enable MeP Coprocessor with 64-bit registers
+
+mivc2
+Target Mask(IVC2) RejectNegative
+Enable IVC2 scheduling
+
+mdc
+Target Mask(DC) RejectNegative
+Const variables default to the near section
+
+mdebug
+Target Disabled Undocumented
+
+mdiv
+Target Mask(OPT_DIV)
+Enable 32-bit divide instructions
+
+meb
+Target InverseMask(LITTLE_ENDIAN) RejectNegative
+Use big-endian byte order
+
+mel
+Target Mask(LITTLE_ENDIAN) RejectNegative
+Use little-endian byte order
+
+mfar
+Driver RejectNegative
+
+mio-volatile
+Target Mask(IO_VOLATILE) 
+__io vars are volatile by default
+
+ml
+Target Mask(L) RejectNegative
+All variables default to the far section
+
+mleadz
+Target Mask(OPT_LEADZ)
+Enable leading zero instructions
+
+mlibrary
+Target Mask(LIBRARY) RejectNegative Undocumented
+
+mm
+Target Mask(M) RejectNegative
+All variables default to the near section
+
+mminmax
+Target Mask(OPT_MINMAX)
+Enable min/max instructions
+
+mmult
+Target Mask(OPT_MULT)
+Enable 32-bit multiply instructions
+
+mno-opts
+Target RejectNegative
+Disable all optional instructions
+
+mrand-tpgp
+Target Mask(RAND_TPGP) RejectNegative Undocumented
+
+mrepeat
+Target Mask(OPT_REPEAT)
+Allow gcc to use the repeat/erepeat instructions
+
+ms
+Target Mask(S) RejectNegative
+All variables default to the tiny section
+
+msatur
+Target Mask(OPT_SATUR)
+Enable saturation instructions
+
+msdram
+Target 
+Use sdram version of runtime
+
+msim
+Target RejectNegative
+Use simulator runtime
+
+msimnovec
+Target RejectNegative
+Use simulator runtime without vectors
+
+mtf
+Target Mask(TF) RejectNegative
+All functions default to the far section
+
+mtiny=
+Target Joined Var(mep_tiny_cutoff) RejectNegative UInteger Init(4)
+Variables this size and smaller go in the tiny section. (default 4)
+
+mvl32
+Target InverseMask(OPT_VL64) Undocumented RejectNegative
+
+mvl64
+Target Mask(OPT_VL64) Undocumented RejectNegative
+
+mvliw
+Target Mask(VLIW) Undocumented
diff --git a/gcc/config/mep/predicates.md b/gcc/config/mep/predicates.md
new file mode 100644
index 000000000..4ba3a6b82
--- /dev/null
+++ b/gcc/config/mep/predicates.md
@@ -0,0 +1,184 @@
+;; Toshiba Media Processor Machine predicates
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Red Hat Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; (define_predicate "cgen_h_uint_7a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_6a2_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_22a4_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_2a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_24a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_6a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_5a4_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_2a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_16a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_3a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_5a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_16a1_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_sint_5a8_immediate"
+;;    (match_code "const_int"))
+;; (define_predicate "cgen_h_uint_4a1_immediate"
+;;    (match_code "const_int"))
+
+(define_predicate "cgen_h_sint_7a2_immediate"
+   (match_code "const_int")
+   { int i = INTVAL (op);
+     return ((i & 1) == 0 && i >= -128 && i < 128);
+   })
+
+(define_predicate "cgen_h_sint_6a4_immediate"
+   (match_code "const_int")
+   { int i = INTVAL (op);
+     return ((i & 3) == 0 && i >= -256 && i < 256);
+   })
+
+;; This is used below, to simplify things.
+(define_predicate "mep_subreg_operand"
+  (ior
+   (and (and (and (match_code "subreg")
+		  (match_code "reg" "0"))
+	     (match_test "REGNO (SUBREG_REG (op)) >= FIRST_PSEUDO_REGISTER"))
+	(match_test "!(reload_completed || reload_in_progress)"))
+   (and (match_code "reg")
+	(match_test "REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_predicate "symbolic_operand"
+  (match_code "const,symbol_ref,label_ref"))
+
+(define_predicate "mep_farsym_operand"
+  (and (match_code "const,symbol_ref")
+       (match_test "mep_section_tag (op) == 'f'")))
+
+(define_predicate "mep_nearsym_operand"
+  (and (match_code "const,symbol_ref,label_ref")
+       (match_test "mep_section_tag (op) != 'f'")))
+
+(define_predicate "mep_movdest_operand"
+  (and (match_test "mep_section_tag (op) != 'f'")
+       (match_operand 0 "nonimmediate_operand")))
+
+(define_predicate "mep_r0_15_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "GR_REGNO_P (REGNO (op))"))))
+
+(define_predicate "mep_r0_operand"
+  (and (match_code "reg")
+       (ior (match_test "REGNO (op) == 0")
+	    (match_test "!(reload_completed || reload_in_progress)
+		         && REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_predicate "mep_hi_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == HI_REGNO"))))
+
+(define_predicate "mep_lo_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == LO_REGNO"))))
+
+(define_predicate "mep_tp_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == TP_REGNO"))))
+
+(define_predicate "mep_gp_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) == GP_REGNO"))))
+
+(define_predicate "mep_sp_operand"
+  (match_test "op == stack_pointer_rtx"))
+
+(define_predicate "mep_tprel_operand"
+  (ior (match_operand 0 "mep_subreg_operand")
+       (and (match_code "reg")
+	    (match_test "REGNO (op) < 8"))))
+
+(define_predicate "mep_call_address_operand"
+  (and (match_test "mep_section_tag (op) != 'f'")
+       (and (ior (not (match_code "symbol_ref"))
+		 (match_test "mep_section_tag (DECL_RTL (cfun->decl)) != 'f'
+			      && !mep_lookup_pragma_call (XSTR (op, 0))"))
+	    (match_code "symbol_ref,reg"))))
+
+(define_predicate "mep_Y_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+(define_predicate "mep_imm4_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15")))
+
+(define_predicate "mep_reg_or_imm4_operand"
+  (ior (match_code "reg")
+       (and (match_code "const_int")
+	    (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))))
+
+(define_predicate "mep_imm7a4_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 128 && INTVAL (op) % 4 == 0")))
+
+(define_predicate "mep_slad_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4")))
+
+(define_predicate "mep_add_operand"
+  (ior (and (match_code "const")
+	    (and (match_operand 0 "symbolic_operand")
+		 (and (match_test "mep_section_tag(op) == 'b' || mep_section_tag(op) == 't'")
+		      (ior (match_code "unspec" "0")
+			   (and (match_code "plus" "0")
+				(match_code "unspec" "00"))))))
+       (match_code "const_int,reg")))
+
+;; Return true if OP is an integer in the range 0..7 inclusive.
+;; On the MeP-h1, shifts by such constants execute in a single stage
+;; and shifts by larger values execute in two.
+(define_predicate "mep_single_shift_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
+
+;; Return true if OP is an operation that can be performed using bsetm,
+;; bclrm or bnotm.  The possibilities are:
+
+;; bsetm: (ior X Y), Y has one bit set
+;; bclrm: (and X Y), Y has one bit clear
+;; bnotm: (xor X Y), Y has one bit set.
+(define_predicate "mep_bit_operator"
+  (and (match_code "and,ior,xor")
+       (match_test "mep_bit_position_p (XEXP (op, 1), GET_CODE (op) != AND)")))
+
+(define_predicate "mep_reload_operand"
+  (ior (and (match_code "reg")
+	    (match_test "!ANY_CONTROL_REGNO_P (REGNO (op))"))
+       (and (match_code "mem,symbol_ref")
+	    (match_test "mep_section_tag (op) != 'f'"))))
diff --git a/gcc/config/mep/t-mep b/gcc/config/mep/t-mep
new file mode 100644
index 000000000..fac6dad42
--- /dev/null
+++ b/gcc/config/mep/t-mep
@@ -0,0 +1,105 @@
+# -*- makefile -*-
+# GCC makefile fragment for MeP
+# Copyright (C) 2001, 2002, 2003, 2005, 2007, 2009, 2010
+# Free Software Foundation, Inc.
+# Contributed by Red Hat Inc
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  */
+
+# Force genpreds to be rebuilt in case MeP-Integrator changed the predicates
+
+GTM_H = tm.h $(tm_file_list) $(srcdir)/config/mep/mep-intrin.h insn-constants.h
+
+# Use -O0 instead of -O2 so we don't get complex relocations
+
+CRTSTUFF_CFLAGS = -O0 $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -g0 \
+  -finhibit-size-directive -fno-inline-functions -fno-exceptions \
+  -fno-zero-initialized-in-bss -fno-unit-at-a-time
+
+TCFLAGS = -mlibrary
+
+mep-pragma.o: $(srcdir)/config/mep/mep-pragma.c $(CONFIG_H) $(SYSTEM_H) \
+	coretypes.h $(TM_H) $(TREE_H) $(RTL_H) $(C_PRAGMA_H) \
+	$(CPPLIB_H) hard-reg-set.h output.h $(srcdir)/config/mep/mep-protos.h \
+	function.h insn-config.h reload.h $(TARGET_H)
+	$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+# profiling support
+
+LIB1ASMSRC = mep/mep-lib1.asm
+
+LIB1ASMFUNCS = _mep_profile \
+	       _mep_bb_init_trace \
+	       _mep_bb_init \
+	       _mep_bb_trace \
+	       _mep_bb_increment
+
+# multiply and divide routines
+
+LIB2FUNCS_EXTRA = \
+	$(srcdir)/config/mep/mep-lib2.c \
+	$(srcdir)/config/mep/mep-tramp.c
+
+# floating point emulation libraries
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT'				> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c			>> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+MULTILIB_OPTIONS = mel mall-opts mfar
+MULTILIB_DIRNAMES = el allopt far
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+
+MD_INCLUDES = \
+	$(srcdir)/config/mep/intrinsics.md \
+	$(srcdir)/config/mep/predicates.md \
+	$(srcdir)/config/mep/constraints.md
+
+mep.o : $(srcdir)/config/mep/mep-intrin.h
+
+# begin-isas
+MEP_CORE = ext_core1
+MEP_COPRO = ext_cop1_16,ext_cop1_32,ext_cop1_48,ext_cop1_64
+# end-isas
+
+# To use this, you must have cgen and cgen/cpu in the same source tree as
+# gcc.
+cgen-maint :
+	S=`cd $(srcdir); pwd`; \
+	cd $$S/config/mep && \
+	guile -s $$S/../cgen/cgen-intrinsics.scm \
+		-s $$S/../cgen \
+		$(CGENFLAGS) \
+		-a $$S/../cgen/cpu/mep.cpu \
+		-m mep,c5 \
+		-i mep,$(MEP_CORE),$(MEP_COPRO) \
+		-K mep,$(MEP_CORE),$(MEP_COPRO) \
+		-M intrinsics.md \
+		-N mep-intrin.h \
+		-P intrinsics.h
+
+# start-extra-headers
+EXTRA_HEADERS = $(srcdir)/config/mep/intrinsics.h \
+	$(srcdir)/config/mep/default.h
+# end-extra-headers
diff --git a/gcc/config/microblaze/constraints.md b/gcc/config/microblaze/constraints.md
new file mode 100644
index 000000000..2abe30190
--- /dev/null
+++ b/gcc/config/microblaze/constraints.md
@@ -0,0 +1,72 @@
+;; Constraint definitions for Xilinx MicroBlaze processors.
+;; Copyright 2010 Free Software Foundation, Inc.
+
+;; Contributed by Michael Eager <eager@eagercon.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>. 
+
+(define_register_constraint "d" "GR_REGS"
+  "A general register.")
+
+(define_register_constraint "z" "ST_REGS"
+  "A status register.")
+
+;; Define integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant."
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (ival)")))
+
+(define_constraint "J"
+  "Integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "M"
+  "A constant which needs two instructions to load."
+  (and (match_code "const_int")
+       (match_test "LARGE_OPERAND (ival)")))
+
+(define_constraint "N"
+  "A constant in the range -65535 to -1 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) (ival + 0xffff) < 0xffff")))
+
+(define_constraint "P"
+  "A constant in the range 1 to 65535 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "ival > 0 && ival < 0x10000")))
+
+;; Define floating point constraints
+
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Define memory constraints
+
+(define_memory_constraint "R"
+  "Memory operand which fits in single instruction."
+  (and (match_code "mem")
+       (match_test "simple_memory_operand (op, GET_MODE (op))")))
+
+(define_memory_constraint "T"
+  "Double word operand."
+  (and (match_code "mem")
+       (match_test "double_memory_operand (op, GET_MODE (op))")))
diff --git a/gcc/config/microblaze/crti.s b/gcc/config/microblaze/crti.s
new file mode 100644
index 000000000..3944443b4
--- /dev/null
+++ b/gcc/config/microblaze/crti.s
@@ -0,0 +1,39 @@
+/* crti.s for __init, __fini
+   This file supplies the prologue for __init and __fini routines 
+
+   Copyright 2009, 2010 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+    .section .init, "ax"
+    .global __init
+    .align 2
+__init: 
+    addik   r1, r1, -8
+    sw      r15, r0, r1
+
+    .section .fini, "ax"
+    .global __fini
+    .align 2
+__fini: 
+    addik   r1, r1, -8
+    sw      r15, r0, r1
diff --git a/gcc/config/microblaze/crtn.s b/gcc/config/microblaze/crtn.s
new file mode 100644
index 000000000..7970dee1c
--- /dev/null
+++ b/gcc/config/microblaze/crtn.s
@@ -0,0 +1,35 @@
+/* crtn.s for __init, __fini
+   This file supplies the epilogue for __init and __fini routines 
+
+   Copyright 2009, 2010 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+ 
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+    .section .init, "ax"
+    lw      r15, r0, r1
+    rtsd    r15, 8 
+    addik   r1, r1, 8
+
+    .section .fini, "ax"
+    lw      r15, r0, r1
+    rtsd    r15, 8 
+    addik   r1, r1, 8    
diff --git a/gcc/config/microblaze/linux.h b/gcc/config/microblaze/linux.h
new file mode 100644
index 000000000..3c13fabcb
--- /dev/null
+++ b/gcc/config/microblaze/linux.h
@@ -0,0 +1,35 @@
+/* Definitions for MicroBlaze running Linux.
+   Copyright 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#define DYNAMIC_LINKER "/lib/ld.so.1"
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "dynamic_linker", DYNAMIC_LINKER }
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker %(dynamic_linker)} \
+    %{static:-static}}"
+
diff --git a/gcc/config/microblaze/microblaze-c.c b/gcc/config/microblaze/microblaze-c.c
new file mode 100644
index 000000000..32c38da31
--- /dev/null
+++ b/gcc/config/microblaze/microblaze-c.c
@@ -0,0 +1,92 @@
+/* Subroutines used for the C front end for Xilinx MicroBlaze.
+   Copyright 2010 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "tm_p.h"
+#include "target.h"
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Define preprocessor symbols for MicroBlaze.  
+   Symbols which do not start with __ are deprecated.  */
+
+void 
+microblaze_cpp_define (cpp_reader *pfile)
+{
+  builtin_assert ("cpu=microblaze");
+  builtin_assert ("machine=microblaze");
+  builtin_define ("__MICROBLAZE__");
+  if (!TARGET_SOFT_MUL) 
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_MUL");
+      builtin_define ("__HAVE_HW_MUL__");
+    }
+  if (TARGET_MULTIPLY_HIGH)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_MUL_HIGH");
+      builtin_define ("__HAVE_HW_MUL_HIGH__");
+    }
+  if (!TARGET_SOFT_DIV)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_DIV");
+      builtin_define ("__HAVE_HW_DIV__");
+    }
+  if (TARGET_BARREL_SHIFT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_BSHIFT");
+      builtin_define ("__HAVE_HW_BSHIFT__");
+    }
+  if (TARGET_PATTERN_COMPARE)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_PCMP");
+      builtin_define ("__HAVE_HW_PCMP__");
+    }
+  if (TARGET_HARD_FLOAT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_FPU");
+      builtin_define ("__HAVE_HW_FPU__");
+    }
+  if (TARGET_FLOAT_CONVERT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_FPU_CONVERT");
+      builtin_define ("__HAVE_HW_FPU_CONVERT__");
+    }
+  if (TARGET_FLOAT_SQRT)
+    {
+      if (!flag_iso)
+        builtin_define ("HAVE_HW_FPU_SQRT");
+      builtin_define ("__HAVE_HW_FPU_SQRT__");
+    }
+}  
diff --git a/gcc/config/microblaze/microblaze-protos.h b/gcc/config/microblaze/microblaze-protos.h
new file mode 100644
index 000000000..56dca55dc
--- /dev/null
+++ b/gcc/config/microblaze/microblaze-protos.h
@@ -0,0 +1,58 @@
+/* Definitions of target machine for GNU compiler, for Xilinx MicroBlaze.
+   Copyright 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MICROBLAZE_PROTOS_H
+#define GCC_MICROBLAZE_PROTOS_H
+
+#ifdef RTX_CODE
+extern int pic_address_needs_scratch (rtx);
+extern void expand_block_move        (rtx *);
+extern void microblaze_expand_prologue (void);
+extern void microblaze_expand_epilogue (void);
+extern void override_options (void);
+extern int microblaze_expand_shift (rtx *);
+extern bool microblaze_expand_move (enum machine_mode, rtx *);
+extern bool microblaze_expand_block_move (rtx, rtx, rtx, rtx);
+extern void microblaze_expand_divide (rtx *);
+extern void microblaze_expand_conditional_branch (enum machine_mode, rtx *); 
+extern void microblaze_expand_conditional_branch_sf (rtx *); 
+extern int microblaze_can_use_return_insn (void);
+extern int microblaze_const_double_ok (rtx, enum machine_mode);
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void init_cumulative_args (CUMULATIVE_ARGS *,tree, rtx);
+extern bool microblaze_legitimate_address_p (enum machine_mode, rtx, bool);
+extern int microblaze_is_interrupt_handler (void);
+extern rtx microblaze_return_addr (int, rtx);
+extern int simple_memory_operand (rtx, enum machine_mode);
+extern int double_memory_operand (rtx, enum machine_mode);
+
+extern int microblaze_regno_ok_for_base_p (int, int);
+extern HOST_WIDE_INT microblaze_initial_elimination_offset (int, int);
+extern void microblaze_declare_object (FILE *, const char *, const char *,
+   const char *, int);
+extern void microblaze_asm_output_ident (FILE *, const char *);
+#endif  /* RTX_CODE */
+
+/* Declare functions in microblaze-c.c.  */
+extern void microblaze_cpp_define (struct cpp_reader *); 
+
+#endif  /* GCC_MICROBLAZE_PROTOS_H */
diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c
new file mode 100644
index 000000000..b50c7942e
--- /dev/null
+++ b/gcc/config/microblaze/microblaze.c
@@ -0,0 +1,3062 @@
+/* Subroutines used for code generation on Xilinx MicroBlaze.
+   Copyright 2009, 2010 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "integrate.h"
+#include "recog.h"
+#include "tree.h"
+#include "function.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "output.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "gstab.h"
+#include "df.h"
+#include "optabs.h"
+#include "diagnostic-core.h"
+
+#define MICROBLAZE_VERSION_COMPARE(VA,VB) strcasecmp (VA, VB)
+
+/* Classifies an address.
+
+ADDRESS_INVALID
+An invalid address.
+
+ADDRESS_REG
+
+A natural register or a register + const_int offset address.  
+The register satisfies microblaze_valid_base_register_p and the 
+offset is a const_arith_operand.
+
+ADDRESS_REG_INDEX
+
+A natural register offset by the index contained in an index register. The base
+register satisfies microblaze_valid_base_register_p and the index register
+satisfies microblaze_valid_index_register_p
+
+ADDRESS_CONST_INT
+
+A signed 16/32-bit constant address.
+
+ADDRESS_SYMBOLIC:
+
+A constant symbolic address or a (register + symbol).  */
+
+enum microblaze_address_type
+{
+  ADDRESS_INVALID,
+  ADDRESS_REG,
+  ADDRESS_REG_INDEX,
+  ADDRESS_CONST_INT,
+  ADDRESS_SYMBOLIC,
+  ADDRESS_GOTOFF,
+  ADDRESS_PLT
+};
+
+/* Classifies symbols
+
+SYMBOL_TYPE_GENERAL
+        
+A general symbol.  */
+enum microblaze_symbol_type
+{
+  SYMBOL_TYPE_INVALID,
+  SYMBOL_TYPE_GENERAL
+};
+
+/* Classification of a MicroBlaze address.  */
+struct microblaze_address_info
+{
+  enum microblaze_address_type type;
+  rtx regA; 	/* Contains valid values on ADDRESS_REG, ADDRESS_REG_INDEX, 
+     		   ADDRESS_SYMBOLIC.  */
+  rtx regB; 	/* Contains valid values on ADDRESS_REG_INDEX.  */
+  rtx offset; 	/* Contains valid values on ADDRESS_CONST_INT and ADDRESS_REG.  */
+  rtx symbol; 	/* Contains valid values on ADDRESS_SYMBOLIC.  */
+  enum microblaze_symbol_type symbol_type;
+};
+
+/* Structure to be filled in by compute_frame_size with register
+   save masks, and offsets for the current function.  */
+
+struct GTY(()) microblaze_frame_info {
+  long total_size;		/* # bytes that the entire frame takes up.  */
+  long var_size;		/* # bytes that variables take up.  */
+  long args_size;		/* # bytes that outgoing arguments take up.  */
+  int link_debug_size;		/* # bytes for the link reg and back pointer.  */
+  int gp_reg_size;		/* # bytes needed to store gp regs.  */
+  long gp_offset;		/* offset from new sp to store gp registers.  */
+  long mask;			/* mask of saved gp registers.  */
+  int initialized;		/* != 0 if frame size already calculated.  */
+  int num_gp;			/* number of gp registers saved.  */
+  long insns_len;		/* length of insns.  */
+  int alloc_stack;		/* Flag to indicate if the current function 
+				   must not create stack space. (As an optimization).  */
+};
+
+/* Global variables for machine-dependent things.  */
+
+/* Toggle which pipleline interface to use.  */
+static GTY(()) int microblaze_sched_use_dfa = 0;
+
+/* Threshold for data being put into the small data/bss area, instead
+   of the normal data area (references to the small data/bss area take
+   1 instruction, and use the global pointer, references to the normal
+   data area takes 2 instructions).  */
+int microblaze_section_threshold = -1;
+
+/* Prevent scheduling potentially exception causing instructions in 
+   delay slots.  -mcpu=v3.00.a or v4.00.a turns this on.  */
+int microblaze_no_unsafe_delay;
+
+/* Which CPU pipeline do we use. We haven't really standardized on a CPU 
+   version having only a particular type of pipeline. There can still be 
+   options on the CPU to scale pipeline features up or down. :( 
+   Bad Presentation (??), so we let the MD file rely on the value of 
+   this variable instead Making PIPE_5 the default. It should be backward 
+   optimal with PIPE_3 MicroBlazes.  */
+enum pipeline_type microblaze_pipe = MICROBLAZE_PIPE_5;
+
+/* High and low marks for floating point values which we will accept
+   as legitimate constants for LEGITIMATE_CONSTANT_P.  These are
+   initialized in override_options.  */
+REAL_VALUE_TYPE dfhigh, dflow, sfhigh, sflow;
+
+/* Array giving truth value on whether or not a given hard register
+   can support a given mode.  */
+char microblaze_hard_regno_mode_ok[(int)MAX_MACHINE_MODE]
+				  [FIRST_PSEUDO_REGISTER];
+
+/* Current frame information calculated by compute_frame_size.  */
+struct microblaze_frame_info current_frame_info;
+
+/* Zero structure to initialize current_frame_info.  */
+struct microblaze_frame_info zero_frame_info;
+
+/* List of all MICROBLAZE punctuation characters used by print_operand.  */
+char microblaze_print_operand_punct[256];
+
+/* Map GCC register number to debugger register number.  */
+int microblaze_dbx_regno[FIRST_PSEUDO_REGISTER];
+
+/* Map hard register number to register class.  */
+enum reg_class microblaze_regno_to_class[] =
+{
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
+  ST_REGS,	GR_REGS,	GR_REGS,	GR_REGS
+};
+
+/* MicroBlaze specific machine attributes.
+   interrupt_handler - Interrupt handler attribute to add interrupt prologue 
+		       and epilogue and use appropriate interrupt return.
+   save_volatiles    - Similiar to interrupt handler, but use normal return.  */
+int interrupt_handler;
+int save_volatiles;
+
+const struct attribute_spec microblaze_attribute_table[] = {
+  /* name         min_len, max_len, decl_req, type_req, fn_type, req_handler */
+  {"interrupt_handler", 0,       0,     true,    false,   false,        NULL},
+  {"save_volatiles"   , 0,       0,     true,    false,   false,        NULL},
+  { NULL,        	0,       0,    false,    false,   false,        NULL}
+};
+
+static int microblaze_interrupt_function_p (tree);
+
+section *sdata2_section;
+
+/* Return truth value if a CONST_DOUBLE is ok to be a legitimate constant.  */
+int
+microblaze_const_double_ok (rtx op, enum machine_mode mode)
+{
+  REAL_VALUE_TYPE d;
+
+  if (GET_CODE (op) != CONST_DOUBLE)
+    return 0;
+
+  if (mode == VOIDmode)
+    return 1;
+
+  if (mode != SFmode && mode != DFmode)
+    return 0;
+
+  if (op == CONST0_RTX (mode))
+    return 1;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (d, op);
+
+  if (REAL_VALUE_ISNAN (d))
+    return FALSE;
+
+  if (REAL_VALUE_NEGATIVE (d))
+    d = real_value_negate (&d);
+
+  if (mode == DFmode)
+    {
+      if (REAL_VALUES_LESS (d, dfhigh) && REAL_VALUES_LESS (dflow, d))
+	return 1;
+    }
+  else
+    {
+      if (REAL_VALUES_LESS (d, sfhigh) && REAL_VALUES_LESS (sflow, d))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return truth value if a memory operand fits in a single instruction
+   (ie, register + small offset) or (register + register).  */
+
+int
+simple_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx addr, plus0, plus1;
+
+  /* Eliminate non-memory operations.  */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* dword operations really put out 2 instructions, so eliminate them.  */
+  /* ??? This isn't strictly correct.  It is OK to accept multiword modes
+     here, since the length attributes are being set correctly, but only
+     if the address is offsettable.  */
+  if (GET_MODE_SIZE (GET_MODE (op)) > UNITS_PER_WORD)
+    return 0;
+
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+
+    {
+    case REG:
+      return 1;
+
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+
+      if (GET_CODE (plus0) == REG && GET_CODE (plus1) == CONST_INT
+	  && SMALL_INT (plus1))
+	{
+	  return 1;
+	}
+      else if (GET_CODE (plus1) == REG && GET_CODE (plus0) == CONST_INT)
+	{
+	  return 1;
+	}
+      else if (GET_CODE (plus0) == REG && GET_CODE (plus1) == REG)
+	{
+	  return 1;
+	}
+      else
+	return 0;
+
+    case SYMBOL_REF:
+      return 0;
+
+    default:
+      break;
+    }
+
+  return 0;
+}
+
+/* Return nonzero for a memory address that can be used to load or store
+   a doubleword.  */
+
+int
+double_memory_operand (rtx op, enum machine_mode mode)
+{
+  rtx addr;
+
+  if (GET_CODE (op) != MEM || !memory_operand (op, mode))
+    {
+      /* During reload, we accept a pseudo register if it has an
+         appropriate memory address.  If we don't do this, we will
+         wind up reloading into a register, and then reloading that
+         register from memory, when we could just reload directly from
+         memory.  */
+      if (reload_in_progress
+	  && GET_CODE (op) == REG
+	  && REGNO (op) >= FIRST_PSEUDO_REGISTER
+	  && reg_renumber[REGNO (op)] < 0
+	  && reg_equiv_mem[REGNO (op)] != 0
+	  && double_memory_operand (reg_equiv_mem[REGNO (op)], mode))
+	return 1;
+      return 0;
+    }
+
+  /* Make sure that 4 added to the address is a valid memory address.
+     This essentially just checks for overflow in an added constant.  */
+
+  addr = XEXP (op, 0);
+
+  if (CONSTANT_ADDRESS_P (addr))
+    return 1;
+
+  return memory_address_p ((GET_MODE_CLASS (mode) == MODE_INT
+			    ? SImode : SFmode), plus_constant (addr, 4));
+}
+
+/* Implement REG_OK_FOR_BASE_P -and- REG_OK_FOR_INDEX_P.  */
+int
+microblaze_regno_ok_for_base_p (int regno, int strict)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!strict)
+	return true;
+      regno = reg_renumber[regno];
+    }
+
+  /* These fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
+    return true;
+
+  return GP_REG_P (regno);
+}
+
+/* Return true if X is a valid base register for the given mode.
+   Allow only hard registers if STRICT.  */
+
+static bool
+microblaze_valid_base_register_p (rtx x,
+				  enum machine_mode mode ATTRIBUTE_UNUSED,
+				  int strict)
+{
+  if (!strict && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (GET_CODE (x) == REG
+	  && microblaze_regno_ok_for_base_p (REGNO (x), strict));
+}
+
+static bool
+microblaze_classify_unspec (struct microblaze_address_info *info, rtx x)
+{
+  info->symbol_type = SYMBOL_TYPE_GENERAL;
+  info->symbol = XVECEXP (x, 0, 0);
+
+  if (XINT (x, 1) == UNSPEC_GOTOFF)
+    {
+      info->regA = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
+      info->type = ADDRESS_GOTOFF;
+    }
+  else if (XINT (x, 1) == UNSPEC_PLT)
+    {
+      info->type = ADDRESS_PLT;
+    }
+  else
+    {
+      return false;
+    }
+  return true;
+}
+
+
+/* Return true if X is a valid index register for the given mode.
+   Allow only hard registers if STRICT.  */
+
+static bool
+microblaze_valid_index_register_p (rtx x,
+				   enum machine_mode mode ATTRIBUTE_UNUSED,
+				   int strict)
+{
+  if (!strict && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (GET_CODE (x) == REG
+	  /* A base register is good enough to be an index register on MicroBlaze.  */
+	  && microblaze_regno_ok_for_base_p (REGNO (x), strict));
+}
+
+/* Get the base register for accessing a value from the memory or
+   Symbol ref. Used for MicroBlaze Small Data Area Pointer Optimization.  */
+static int
+get_base_reg (rtx x)
+{
+  tree decl;
+  int base_reg = (flag_pic ? MB_ABI_PIC_ADDR_REGNUM : MB_ABI_BASE_REGNUM);
+
+  if (TARGET_XLGPOPT
+      && GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_SMALL_P (x) && (decl = SYMBOL_REF_DECL (x)) != NULL)
+    {
+      if (TREE_READONLY (decl))
+	base_reg = MB_ABI_GPRO_REGNUM;
+      else
+	base_reg = MB_ABI_GPRW_REGNUM;
+    }
+
+  return base_reg;
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT is true if we should only accept
+   hard base registers.  
+
+      type                     regA      regB    offset      symbol
+
+   ADDRESS_INVALID             NULL      NULL     NULL        NULL
+
+   ADDRESS_REG                 %0        NULL     const_0 /   NULL
+                                                  const_int
+   ADDRESS_REG_INDEX           %0        %1       NULL        NULL
+
+   ADDRESS_SYMBOLIC            r0 /      NULL     NULL        symbol    
+                           sda_base_reg 
+
+   ADDRESS_CONST_INT           r0       NULL      const       NULL
+
+   For modes spanning multiple registers (DFmode in 32-bit GPRs,
+   DImode, TImode), indexed addressing cannot be used because
+   adjacent memory cells are accessed by adding word-sized offsets
+   during assembly output.  */
+
+static bool
+microblaze_classify_address (struct microblaze_address_info *info, rtx x,
+			     enum machine_mode mode, int strict)
+{
+  rtx xplus0;
+  rtx xplus1;
+
+  info->type = ADDRESS_INVALID;
+  info->regA = NULL;
+  info->regB = NULL;
+  info->offset = NULL;
+  info->symbol = NULL;
+  info->symbol_type = SYMBOL_TYPE_INVALID;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      {
+	info->type = ADDRESS_REG;
+	info->regA = x;
+	info->offset = const0_rtx;
+	return microblaze_valid_base_register_p (info->regA, mode, strict);
+      }
+    case PLUS:
+      {
+	xplus0 = XEXP (x, 0);
+	xplus1 = XEXP (x, 1);
+
+	if (microblaze_valid_base_register_p (xplus0, mode, strict))
+	  {
+	    info->type = ADDRESS_REG;
+	    info->regA = xplus0;
+
+	    if (GET_CODE (xplus1) == CONST_INT)
+	      {
+		info->offset = xplus1;
+		return true;
+	      }
+	    else if (GET_CODE (xplus1) == UNSPEC)
+	      {
+		return microblaze_classify_unspec (info, xplus1);
+	      }
+	    else if ((GET_CODE (xplus1) == SYMBOL_REF ||
+		      GET_CODE (xplus1) == LABEL_REF) && flag_pic == 2)
+	      {
+		return false;
+	      }
+	    else if (GET_CODE (xplus1) == SYMBOL_REF ||
+		     GET_CODE (xplus1) == LABEL_REF ||
+		     GET_CODE (xplus1) == CONST)
+	      {
+		if (GET_CODE (XEXP (xplus1, 0)) == UNSPEC)
+		  return microblaze_classify_unspec (info, XEXP (xplus1, 0));
+		else if (flag_pic == 2)
+		  {
+		    return false;
+		  }
+		info->type = ADDRESS_SYMBOLIC;
+		info->symbol = xplus1;
+		info->symbol_type = SYMBOL_TYPE_GENERAL;
+		return true;
+	      }
+	    else if (GET_CODE (xplus1) == REG
+		     && microblaze_valid_index_register_p (xplus1, mode,
+							   strict)
+		     && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD))
+	      {
+		/* Restrict larger than word-width modes from using an index register.  */
+		info->type = ADDRESS_REG_INDEX;
+		info->regB = xplus1;
+		return true;
+	      }
+	  }
+	break;
+      }
+    case CONST_INT:
+      {
+	info->regA = gen_rtx_raw_REG (mode, 0);
+	info->type = ADDRESS_CONST_INT;
+	info->offset = x;
+	return true;
+      }
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      {
+	info->type = ADDRESS_SYMBOLIC;
+	info->symbol_type = SYMBOL_TYPE_GENERAL;
+	info->symbol = x;
+	info->regA = gen_rtx_raw_REG (mode, get_base_reg (x));
+
+	if (GET_CODE (x) == CONST)
+	  {
+	    return !(flag_pic && pic_address_needs_scratch (x));
+	  }
+	else if (flag_pic == 2)
+	  {
+	    return false;
+	  }
+
+	return true;
+      }
+
+    case UNSPEC:
+      {
+	if (reload_in_progress)
+	  df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	return microblaze_classify_unspec (info, x);
+      }
+
+    default:
+      return false;
+    }
+
+  return false;
+}
+
+/* This function is used to implement GO_IF_LEGITIMATE_ADDRESS.  It
+   returns a nonzero value if X is a legitimate address for a memory
+   operand of the indicated MODE.  STRICT is nonzero if this function
+   is called during reload.  */
+
+bool
+microblaze_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  struct microblaze_address_info addr;
+
+  return microblaze_classify_address (&addr, x, mode, strict);
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This is used from only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was
+   called.  In some cases it is useful to look at this to decide what
+   needs to be done.
+
+   It is always safe for this function to do nothing.  It exists to
+   recognize opportunities to optimize the output.
+
+   For the MicroBlaze, transform:
+
+   memory(X + <large int>)
+
+   into:
+
+   Y = <large int> & ~0x7fff;
+   Z = X + Y
+   memory (Z + (<large int> & 0x7fff));
+
+   This is for CSE to find several similar references, and only use one Z.
+
+   When PIC, convert addresses of the form memory (symbol+large int) to
+   memory (reg+large int).  */
+
+static rtx
+microblaze_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			       enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  register rtx xinsn = x, result;
+
+  if (GET_CODE (xinsn) == CONST
+      && flag_pic && pic_address_needs_scratch (xinsn))
+    {
+      rtx ptr_reg = gen_reg_rtx (Pmode);
+      rtx constant = XEXP (XEXP (xinsn, 0), 1);
+
+      emit_move_insn (ptr_reg, XEXP (XEXP (xinsn, 0), 0));
+
+      result = gen_rtx_PLUS (Pmode, ptr_reg, constant);
+      if (SMALL_INT (constant))
+	return result;
+      /* Otherwise we fall through so the code below will fix the 
+         constant.  */
+      xinsn = result;
+    }
+
+  if (GET_CODE (xinsn) == PLUS)
+    {
+      register rtx xplus0 = XEXP (xinsn, 0);
+      register rtx xplus1 = XEXP (xinsn, 1);
+      register enum rtx_code code0 = GET_CODE (xplus0);
+      register enum rtx_code code1 = GET_CODE (xplus1);
+
+      if (code0 != REG && code1 == REG)
+	{
+	  xplus0 = XEXP (xinsn, 1);
+	  xplus1 = XEXP (xinsn, 0);
+	  code0 = GET_CODE (xplus0);
+	  code1 = GET_CODE (xplus1);
+	}
+
+      if (code0 == REG && REG_OK_FOR_BASE_P (xplus0)
+	  && code1 == CONST_INT && !SMALL_INT (xplus1))
+	{
+	  rtx int_reg = gen_reg_rtx (Pmode);
+	  rtx ptr_reg = gen_reg_rtx (Pmode);
+
+	  emit_move_insn (int_reg, GEN_INT (INTVAL (xplus1) & ~0x7fff));
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  ptr_reg,
+				  gen_rtx_PLUS (Pmode, xplus0, int_reg)));
+
+	  result = gen_rtx_PLUS (Pmode, ptr_reg,
+				 GEN_INT (INTVAL (xplus1) & 0x7fff));
+	  return result;
+	}
+
+      if (code0 == REG && REG_OK_FOR_BASE_P (xplus0) && flag_pic == 2)
+	{
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  if (code1 == CONST)
+	    {
+	      xplus1 = XEXP (xplus1, 0);
+	      code1 = GET_CODE (xplus1);
+	    }
+	  if (code1 == SYMBOL_REF)
+	    {
+	      result =
+		gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xplus1), UNSPEC_GOTOFF);
+	      result = gen_rtx_CONST (Pmode, result);
+	      result = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, result);
+	      result = gen_const_mem (Pmode, result);
+	      result = gen_rtx_PLUS (Pmode, xplus0, result);
+	      return result;
+	    }
+	}
+    }
+
+  if (GET_CODE (xinsn) == SYMBOL_REF)
+    {
+      if (reload_in_progress)
+	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+      result = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xinsn), UNSPEC_GOTOFF);
+      result = gen_rtx_CONST (Pmode, result);
+      result = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, result);
+      result = gen_const_mem (Pmode, result);
+      return result;
+    }
+
+  return x;
+}
+
+/* Block Moves.  */
+
+#define MAX_MOVE_REGS 8
+#define MAX_MOVE_BYTES (MAX_MOVE_REGS * UNITS_PER_WORD)
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+microblaze_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  bits = BITS_PER_WORD;
+  mode = mode_for_size (bits, MODE_INT, 0);
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+     the source has enough alignment, otherwise use left/right pairs.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      emit_move_insn (regs[i], adjust_address (src, mode, offset));
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    emit_move_insn (adjust_address (dest, mode, offset), regs[i]);
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+    }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+microblaze_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+			     rtx * loop_reg, rtx * loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem,
+		 MIN ((HOST_WIDE_INT) MEM_ALIGN (mem),
+		      length * BITS_PER_UNIT));
+}
+
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves MAX_MOVE_BYTES
+   per iteration.  LENGTH must be at least MAX_MOVE_BYTES.  Assume that the
+   memory regions do not overlap.  */
+
+static void
+microblaze_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  rtx label, src_reg, dest_reg, final_src;
+  HOST_WIDE_INT leftover;
+
+  leftover = length % MAX_MOVE_BYTES;
+  length -= leftover;
+
+  /* Create registers and memory references for use within the loop.  */
+  microblaze_adjust_block_mem (src, MAX_MOVE_BYTES, &src_reg, &src);
+  microblaze_adjust_block_mem (dest, MAX_MOVE_BYTES, &dest_reg, &dest);
+
+  /* Calculate the value that SRC_REG should have after the last iteration
+     of the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Emit the loop body.  */
+  microblaze_block_move_straight (dest, src, MAX_MOVE_BYTES);
+
+  /* Move on to the next block.  */
+  emit_move_insn (src_reg, plus_constant (src_reg, MAX_MOVE_BYTES));
+  emit_move_insn (dest_reg, plus_constant (dest_reg, MAX_MOVE_BYTES));
+
+  /* Emit the test & branch.  */
+  emit_insn (gen_cbranchsi4 (gen_rtx_NE (SImode, src_reg, final_src),
+			     src_reg, final_src, label));
+
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    microblaze_block_move_straight (dest, src, leftover);
+}
+
+/* Expand a movmemsi instruction.  */
+
+bool
+microblaze_expand_block_move (rtx dest, rtx src, rtx length, rtx align_rtx)
+{
+
+  if (GET_CODE (length) == CONST_INT)
+    {
+      HOST_WIDE_INT bytes = INTVAL (length);
+      int align = INTVAL (align_rtx);
+
+      if (align > UNITS_PER_WORD)
+	{
+	  align = UNITS_PER_WORD;	/* We can't do any better.  */
+	}
+      else if (align < UNITS_PER_WORD)
+	{
+	  if (INTVAL (length) <= MAX_MOVE_BYTES)
+	    {
+	      move_by_pieces (dest, src, bytes, align, 0);
+	      return true;
+	    }
+	  else
+	    return false;
+	}
+
+      if (INTVAL (length) <= 2 * MAX_MOVE_BYTES)
+	{
+	  microblaze_block_move_straight (dest, src, INTVAL (length));
+	  return true;
+	}
+      else if (optimize)
+	{
+	  microblaze_block_move_loop (dest, src, INTVAL (length));
+	  return true;
+	}
+    }
+  return false;
+}
+
+static bool
+microblaze_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, 
+		      bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case MEM:
+      {
+	int num_words = (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ? 2 : 1;
+	if (simple_memory_operand (x, mode))
+	  *total = COSTS_N_INSNS (2 * num_words);
+	else
+	  *total = COSTS_N_INSNS (2 * (2 * num_words));
+
+	return true;
+      }
+    case NOT:
+      {
+	if (mode == DImode)
+	  {
+	    *total = COSTS_N_INSNS (2);
+	  }
+	else
+	  *total = COSTS_N_INSNS (1);
+	return false;
+      }
+    case AND:
+    case IOR:
+    case XOR:
+      {
+	if (mode == DImode)
+	  {
+	    *total = COSTS_N_INSNS (2);
+	  }
+	else
+	  *total = COSTS_N_INSNS (1);
+
+	return false;
+      }
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      {
+	if (TARGET_BARREL_SHIFT)
+	  {
+	    if (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v5.00.a")
+		>= 0)
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+	else if (!TARGET_SOFT_MUL)
+	  *total = COSTS_N_INSNS (1);
+	else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	  {
+	    /* Add 1 to make shift slightly more expensive than add.  */
+	    *total = COSTS_N_INSNS (INTVAL (XEXP (x, 1))) + 1;
+	    /* Reduce shift costs for for special circumstances.  */
+	    if (optimize_size && INTVAL (XEXP (x, 1)) > 5)
+	      *total -= 2;
+	    if (!optimize_size && INTVAL (XEXP (x, 1)) > 17)
+	      *total -= 2;
+	  }
+	else
+	  /* Double the worst cost of shifts when there is no barrel shifter and 
+	     the shift amount is in a reg.  */
+	  *total = COSTS_N_INSNS (32 * 4);
+	return true;
+      }
+    case PLUS:
+    case MINUS:
+      {
+	if (mode == SFmode || mode == DFmode)
+	  {
+	    if (TARGET_HARD_FLOAT)
+	      *total = COSTS_N_INSNS (6);
+	    return true;
+	  }
+	else if (mode == DImode)
+	  {
+	    *total = COSTS_N_INSNS (4);
+	    return true;
+	  }
+	else
+	  {
+	    *total = COSTS_N_INSNS (1);
+	    return true;
+	  }
+
+	return false;
+      }
+    case NEG:
+      {
+	if (mode == DImode)
+	  *total = COSTS_N_INSNS (4);
+
+	return false;
+      }
+    case MULT:
+      {
+	if (mode == SFmode)
+	  {
+	    if (TARGET_HARD_FLOAT)
+	      *total = COSTS_N_INSNS (6);
+	  }
+	else if (!TARGET_SOFT_MUL)
+	  {
+	    if (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v5.00.a")
+		>= 0)
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (3);
+	  }
+	else
+	  *total = COSTS_N_INSNS (10);
+	return true;
+      }
+    case DIV:
+    case UDIV:
+      {
+	if (mode == SFmode)
+	  {
+	    if (TARGET_HARD_FLOAT)
+	      *total = COSTS_N_INSNS (23);
+	  }
+	return false;
+      }
+    case SIGN_EXTEND:
+      {
+	*total = COSTS_N_INSNS (1);
+	return false;
+      }
+    case ZERO_EXTEND:
+      {
+	*total = COSTS_N_INSNS (1);
+	return false;
+      }
+    }
+
+  return false;
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at X.  Return 0 if X isn't valid for MODE.  */
+
+static int
+microblaze_address_insns (rtx x, enum machine_mode mode)
+{
+  struct microblaze_address_info addr;
+
+  if (microblaze_classify_address (&addr, x, mode, false))
+    {
+      switch (addr.type)
+	{
+	case ADDRESS_REG:
+	  if (SMALL_INT (addr.offset))
+	    return 1;
+	  else
+	    return 2;
+	case ADDRESS_CONST_INT:
+	  if (SMALL_INT (x))
+	    return 1;
+	  else
+	    return 2;
+	case ADDRESS_REG_INDEX:
+	case ADDRESS_SYMBOLIC:
+	  return 1;
+	case ADDRESS_GOTOFF:
+	  return 2;
+	default:
+	  break;
+	}
+    }
+  return 0;
+}
+
+/* Provide the costs of an addressing mode that contains ADDR.
+   If ADDR is not a valid address, its cost is irrelevant.  */
+static int
+microblaze_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  return COSTS_N_INSNS (microblaze_address_insns (addr, GET_MODE (addr)));
+}
+
+/* Return nonzero if X is an address which needs a temporary register when 
+   reloaded while generating PIC code.  */
+
+int
+pic_address_needs_scratch (rtx x)
+{
+  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && (flag_pic == 2 || !SMALL_INT (XEXP (XEXP (x, 0), 1))))
+    return 1;
+
+  return 0;
+}
+
+/* Argument support functions.  */
+/* Initialize CUMULATIVE_ARGS for a function.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS * cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cum;
+  tree param, next_param;
+
+  *cum = zero_cum;
+
+  /* Determine if this function has variable arguments.  This is
+     indicated by the last argument being 'void_type_mode' if there
+     are no variable arguments.  The standard MicroBlaze calling sequence
+     passes all arguments in the general purpose registers in this case. */
+
+  for (param = fntype ? TYPE_ARG_TYPES (fntype) : 0;
+       param != 0; param = next_param)
+    {
+      next_param = TREE_CHAIN (param);
+      if (next_param == 0 && TREE_VALUE (param) != void_type_node)
+	cum->gp_reg_found = 1;
+    }
+}
+
+/* Advance the argument to the next argument position.  */
+
+static void
+microblaze_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+				 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  cum->arg_number++;
+  switch (mode)
+    {
+    case VOIDmode:
+      break;
+
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case BLKmode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += ((int_size_in_bytes (type) + UNITS_PER_WORD - 1)
+			 / UNITS_PER_WORD);
+      break;
+
+    case SFmode:
+      cum->arg_words++;
+      if (!cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 1 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DFmode:
+      cum->arg_words += 2;
+      if (!cum->gp_reg_found && cum->arg_number <= 2)
+	cum->fp_code += 2 << ((cum->arg_number - 1) * 2);
+      break;
+
+    case DImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words += 2;
+      break;
+
+    case QImode:
+    case HImode:
+    case SImode:
+    case TImode:
+      cum->gp_reg_found = 1;
+      cum->arg_words++;
+      break;
+    }
+}
+
+/* Return an RTL expression containing the register for the given mode,
+   or 0 if the argument is to be passed on the stack.  */
+
+static rtx
+microblaze_function_arg (CUMULATIVE_ARGS * cum, enum machine_mode mode, 
+			 const_tree type ATTRIBUTE_UNUSED,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  rtx ret;
+  int regbase = -1;
+  int *arg_words = &cum->arg_words;
+
+  cum->last_arg_fp = 0;
+  switch (mode)
+    {
+    case SFmode:
+    case DFmode:
+    case VOIDmode:
+    case QImode:
+    case HImode:
+    case SImode:
+    case DImode:
+    case TImode:
+      regbase = GP_ARG_FIRST;
+      break;
+    default:
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
+      /* Drops through.  */
+    case BLKmode:
+      regbase = GP_ARG_FIRST;
+      break;
+    }
+
+  if (*arg_words >= MAX_ARGS_IN_REGISTERS)
+    ret = 0;
+  else
+    {
+      gcc_assert (regbase != -1);
+
+      ret = gen_rtx_REG (mode, regbase + *arg_words);
+    }
+
+  if (mode == VOIDmode)
+    {
+      if (cum->num_adjusts > 0)
+	ret = gen_rtx_PARALLEL ((enum machine_mode) cum->fp_code,
+				gen_rtvec_v (cum->num_adjusts, cum->adjust));
+    }
+
+  return ret;
+}
+
+/* Return number of bytes of argument to put in registers. */
+static int
+function_arg_partial_bytes (CUMULATIVE_ARGS * cum, enum machine_mode mode,	
+			    tree type, bool named ATTRIBUTE_UNUSED)	
+{
+  if ((mode == BLKmode
+       || GET_MODE_CLASS (mode) != MODE_COMPLEX_INT
+       || GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
+      && cum->arg_words < MAX_ARGS_IN_REGISTERS)
+    {
+      int words;
+      if (mode == BLKmode)
+	words = ((int_size_in_bytes (type) + UNITS_PER_WORD - 1)
+		 / UNITS_PER_WORD);
+      else
+	words = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+      if (words + cum->arg_words <= MAX_ARGS_IN_REGISTERS)
+	return 0;		/* structure fits in registers */
+
+      return (MAX_ARGS_IN_REGISTERS - cum->arg_words) * UNITS_PER_WORD;
+    }
+
+  else if (mode == DImode && cum->arg_words == MAX_ARGS_IN_REGISTERS - 1)
+    return UNITS_PER_WORD;
+
+  return 0;
+}
+
+/*  Convert a version number of the form "vX.YY.Z" to an integer encoding 
+    for easier range comparison.  */
+static int
+microblaze_version_to_int (const char *version)
+{
+  const char *p, *v;
+  const char *tmpl = "vX.YY.Z";
+  int iver = 0;
+
+  p = version;
+  v = tmpl;
+
+  while (*v)
+    {
+      if (*v == 'X')
+	{			/* Looking for major  */
+	  if (!(*p >= '0' && *p <= '9'))
+	    return -1;
+	  iver += (int) (*p - '0');
+	  iver *= 10;
+	}
+      else if (*v == 'Y')
+	{			/* Looking for minor  */
+	  if (!(*p >= '0' && *p <= '9'))
+	    return -1;
+	  iver += (int) (*p - '0');
+	  iver *= 10;
+	}
+      else if (*v == 'Z')
+	{			/* Looking for compat  */
+	  if (!(*p >= 'a' && *p <= 'z'))
+	    return -1;
+	  iver *= 10;
+	  iver += (int) (*p - 'a');
+	}
+      else
+	{
+	  if (*p != *v)
+	    return -1;
+	}
+
+      v++;
+      p++;
+    }
+
+  if (*p)
+    return -1;
+
+  return iver;
+}
+
+static bool
+microblaze_handle_option (size_t code,
+			  const char *arg ATTRIBUTE_UNUSED,
+			  int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mno_clearbss:
+      flag_zero_initialized_in_bss = 0;
+      warning (0, "-mno-clearbss is deprecated; use -fno-zero-initialized-in-bss");
+      break;
+    case OPT_mxl_stack_check:
+      warning (0, "-mxl_stack_check is deprecated; use -fstack-check");
+      break;
+    }
+  return true;
+}
+
+
+static void
+microblaze_option_override (void)
+{
+  register int i, start;
+  register int regno;
+  register enum machine_mode mode;
+  int ver;
+
+  microblaze_section_threshold = (global_options_set.x_g_switch_value
+				  ? g_switch_value
+				  : MICROBLAZE_DEFAULT_GVALUE);
+
+  /* Check the MicroBlaze CPU version for any special action to be done.  */
+  if (microblaze_select_cpu == NULL)
+    microblaze_select_cpu = MICROBLAZE_DEFAULT_CPU;
+  ver = microblaze_version_to_int (microblaze_select_cpu);
+  if (ver == -1)
+    {
+      error ("%qs is an invalid argument to -mcpu=", microblaze_select_cpu);
+    }
+
+  ver = MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v3.00.a");
+  if (ver < 0)
+    {
+      /* No hardware exceptions in earlier versions. So no worries.  */
+#if 0
+      microblaze_select_flags &= ~(MICROBLAZE_MASK_NO_UNSAFE_DELAY);
+#endif
+      microblaze_no_unsafe_delay = 0;
+      microblaze_pipe = MICROBLAZE_PIPE_3;
+    }
+  else if (ver == 0
+	   || (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v4.00.b")
+	       == 0))
+    {
+#if 0
+      microblaze_select_flags |= (MICROBLAZE_MASK_NO_UNSAFE_DELAY);
+#endif
+      microblaze_no_unsafe_delay = 1;
+      microblaze_pipe = MICROBLAZE_PIPE_3;
+    }
+  else
+    {
+      /* We agree to use 5 pipe-stage model even on area optimized 3 
+         pipe-stage variants.  */
+#if 0
+      microblaze_select_flags &= ~(MICROBLAZE_MASK_NO_UNSAFE_DELAY);
+#endif
+      microblaze_no_unsafe_delay = 0;
+      microblaze_pipe = MICROBLAZE_PIPE_5;
+      if (MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v5.00.a") == 0
+	  || MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu,
+					 "v5.00.b") == 0
+	  || MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu,
+					 "v5.00.c") == 0)
+	{
+	  /* Pattern compares are to be turned on by default only when 
+ 	     compiling for MB v5.00.'z'.  */
+	  target_flags |= MASK_PATTERN_COMPARE;
+	}
+    }
+
+  ver = MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu, "v6.00.a");
+  if (ver < 0)
+    {
+      if (TARGET_MULTIPLY_HIGH)
+	warning (0,
+		 "-mxl-multiply-high can be used only with -mcpu=v6.00.a or greater");
+    }
+
+  if (TARGET_MULTIPLY_HIGH && TARGET_SOFT_MUL)
+    error ("-mxl-multiply-high requires -mno-xl-soft-mul");
+
+  /* Always use DFA scheduler.  */
+  microblaze_sched_use_dfa = 1;
+
+#if 0
+  microblaze_abicalls = MICROBLAZE_ABICALLS_NO;
+#endif
+
+  /* Initialize the high, low values for legit floating point constants.  */
+  real_maxval (&dfhigh, 0, DFmode);
+  real_maxval (&dflow, 1, DFmode);
+  real_maxval (&sfhigh, 0, SFmode);
+  real_maxval (&sflow, 1, SFmode);
+
+  microblaze_print_operand_punct['?'] = 1;
+  microblaze_print_operand_punct['#'] = 1;
+  microblaze_print_operand_punct['&'] = 1;
+  microblaze_print_operand_punct['!'] = 1;
+  microblaze_print_operand_punct['*'] = 1;
+  microblaze_print_operand_punct['@'] = 1;
+  microblaze_print_operand_punct['.'] = 1;
+  microblaze_print_operand_punct['('] = 1;
+  microblaze_print_operand_punct[')'] = 1;
+  microblaze_print_operand_punct['['] = 1;
+  microblaze_print_operand_punct[']'] = 1;
+  microblaze_print_operand_punct['<'] = 1;
+  microblaze_print_operand_punct['>'] = 1;
+  microblaze_print_operand_punct['{'] = 1;
+  microblaze_print_operand_punct['}'] = 1;
+  microblaze_print_operand_punct['^'] = 1;
+  microblaze_print_operand_punct['$'] = 1;
+  microblaze_print_operand_punct['+'] = 1;
+
+  /* Set up array to map GCC register number to debug register number.
+     Ignore the special purpose register numbers.  */
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    microblaze_dbx_regno[i] = -1;
+
+  start = GP_DBX_FIRST - GP_REG_FIRST;
+  for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++)
+    microblaze_dbx_regno[i] = i + start;
+
+  /* Set up array giving whether a given register can hold a given mode.   */
+
+  for (mode = VOIDmode;
+       mode != MAX_MACHINE_MODE; mode = (enum machine_mode) ((int) mode + 1))
+    {
+      register int size = GET_MODE_SIZE (mode);
+
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	{
+	  register int ok;
+
+	  if (mode == CCmode)
+	    {
+	      ok = (ST_REG_P (regno) || GP_REG_P (regno));
+	    }
+	  else if (GP_REG_P (regno))
+	    ok = ((regno & 1) == 0 || size <= UNITS_PER_WORD);
+	  else
+	    ok = 0;
+
+	  microblaze_hard_regno_mode_ok[(int) mode][regno] = ok;
+	}
+    }
+}
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options microblaze_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Return true if FUNC is an interrupt function as specified
+   by the "interrupt_handler" attribute.  */
+
+static int
+microblaze_interrupt_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return true if FUNC is an interrupt function which uses
+   normal return, indicated by the "save_volatiles" attribute.  */
+
+static int
+microblaze_save_volatiles (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("save_volatiles", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+/* Return whether function is tagged with 'interrupt_handler'
+   attribute.  Return true if function should use return from
+   interrupt rather than normal function return.  */
+int
+microblaze_is_interrupt_handler (void)
+{
+  return interrupt_handler;
+}
+
+/* Determine of register must be saved/restored in call.  */
+static int
+microblaze_must_save_register (int regno)
+{
+  if (pic_offset_table_rtx &&
+      (regno == MB_ABI_PIC_ADDR_REGNUM) && df_regs_ever_live_p (regno))
+    return 1;
+
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return 1;
+
+  if (frame_pointer_needed && (regno == HARD_FRAME_POINTER_REGNUM))
+    return 1;
+
+  if (!current_function_is_leaf)
+    {
+      if (regno == MB_ABI_SUB_RETURN_ADDR_REGNUM)
+	return 1;
+      if ((interrupt_handler || save_volatiles) &&
+	  (regno >= 3 && regno <= 12))
+	return 1;
+    }
+
+  if (interrupt_handler)
+    {
+      if (df_regs_ever_live_p (regno) 
+	  || regno == MB_ABI_MSR_SAVE_REG
+	  || regno == MB_ABI_ASM_TEMP_REGNUM
+	  || regno == MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM)
+	return 1;
+    }
+
+  if (save_volatiles)
+    {
+      if (df_regs_ever_live_p (regno)
+	  || regno == MB_ABI_ASM_TEMP_REGNUM
+	  || regno == MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM)
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.
+
+   MicroBlaze stack frames look like:
+
+
+
+             Before call		        After call
+        +-----------------------+	+-----------------------+
+   high |			|       |      			|
+   mem. |  local variables,     |	|  local variables,	|
+        |  callee saved and     |       |  callee saved and    	|
+	|  temps     		|       |  temps     	        |
+        +-----------------------+	+-----------------------+
+        |  arguments for called	|       |  arguments for called |
+	|  subroutines		|	|  subroutines  	|
+        |  (optional)           |       |  (optional)           |
+        +-----------------------+	+-----------------------+
+	|  Link register 	|	|  Link register        |
+    SP->|                       |       |                       |
+	+-----------------------+       +-----------------------+
+					|		        |
+                                        |  local variables,     |
+                                        |  callee saved and     |
+                                        |  temps                |
+					+-----------------------+
+                                        |   MSR (optional if,   |
+                                        |   interrupt handler)  |
+					+-----------------------+
+					|			|
+                                        |  alloca allocations   |
+        				|			|
+					+-----------------------+
+					|			|
+                                        |  arguments for called |
+                                        |  subroutines          |
+                                        |  (optional)           |
+        				|		        |
+					+-----------------------+
+                                        |  Link register        |
+   low                           FP,SP->|                       |
+   memory        			+-----------------------+
+
+*/
+
+static HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size)	
+{
+  int regno;
+  HOST_WIDE_INT total_size;	/* # bytes that the entire frame takes up.  */
+  HOST_WIDE_INT var_size;	/* # bytes that local variables take up.  */
+  HOST_WIDE_INT args_size;	/* # bytes that outgoing arguments take up.  */
+  int link_debug_size;		/* # bytes for link register.  */
+  HOST_WIDE_INT gp_reg_size;	/* # bytes needed to store calle-saved gp regs.  */
+  long mask;			/* mask of saved gp registers.  */
+
+  interrupt_handler =
+    microblaze_interrupt_function_p (current_function_decl);
+  save_volatiles = microblaze_save_volatiles (current_function_decl);
+
+  gp_reg_size = 0;
+  mask = 0;
+  var_size = size;
+  args_size = crtl->outgoing_args_size;
+
+  if ((args_size == 0) && cfun->calls_alloca)
+    args_size = NUM_OF_ARGS * UNITS_PER_WORD;
+
+  total_size = var_size + args_size;
+
+  if (flag_pic == 2)
+    /* force setting GOT.  */
+    df_set_regs_ever_live (MB_ABI_PIC_ADDR_REGNUM, true);
+
+  /* Calculate space needed for gp registers.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (microblaze_must_save_register (regno))
+	{
+
+	  if (regno != MB_ABI_SUB_RETURN_ADDR_REGNUM)
+	    /* Don't account for link register. It is accounted specially below.  */
+	    gp_reg_size += GET_MODE_SIZE (SImode);
+
+	  mask |= (1L << (regno - GP_REG_FIRST));
+	}
+    }
+
+  total_size += gp_reg_size;
+
+  /* Add 4 bytes for MSR.  */
+  if (interrupt_handler)
+    total_size += 4;
+
+  /* No space to be allocated for link register in leaf functions with no other
+     stack requirements.  */
+  if (total_size == 0 && current_function_is_leaf)
+    link_debug_size = 0;
+  else
+    link_debug_size = UNITS_PER_WORD;
+
+  total_size += link_debug_size;
+
+  /* Save other computed information.  */
+  current_frame_info.total_size = total_size;
+  current_frame_info.var_size = var_size;
+  current_frame_info.args_size = args_size;
+  current_frame_info.gp_reg_size = gp_reg_size;
+  current_frame_info.mask = mask;
+  current_frame_info.initialized = reload_completed;
+  current_frame_info.num_gp = gp_reg_size / UNITS_PER_WORD;
+  current_frame_info.link_debug_size = link_debug_size;
+
+  if (mask)
+    /* Offset from which to callee-save GP regs.  */
+    current_frame_info.gp_offset = (total_size - gp_reg_size);
+  else
+    current_frame_info.gp_offset = 0;
+
+  /* Ok, we're done.  */
+  return total_size;
+}
+
+/* Make sure that we're not trying to eliminate to the wrong hard frame
+   pointer.  */
+
+static bool
+microblaze_can_eliminate (const int from, const int to)
+{
+  return ((from == RETURN_ADDRESS_POINTER_REGNUM && !leaf_function_p())
+   	  || (to == MB_ABI_SUB_RETURN_ADDR_REGNUM && leaf_function_p())
+  	  || (from != RETURN_ADDRESS_POINTER_REGNUM
+   	      && (to == HARD_FRAME_POINTER_REGNUM
+		  || (to == STACK_POINTER_REGNUM && !frame_pointer_needed))));
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer or argument pointer or the return address pointer.  TO is either 
+   the stack pointer or hard frame pointer.  */
+
+HOST_WIDE_INT
+microblaze_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      offset = 0;
+      break;
+    case ARG_POINTER_REGNUM:
+      if (to == STACK_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
+	offset = compute_frame_size (get_frame_size ());
+      else
+	gcc_unreachable ();
+      break;
+    case RETURN_ADDRESS_POINTER_REGNUM:
+      if (current_function_is_leaf)
+	offset = 0;
+      else
+	offset = current_frame_info.gp_offset +
+	  ((UNITS_PER_WORD - (POINTER_SIZE / BITS_PER_UNIT)));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return offset;
+}
+
+/* Print operands using format code.
+ 
+   The MicroBlaze specific codes are:
+
+   'X'  X is CONST_INT, prints 32 bits in hexadecimal format = "0x%08x",
+   'x'  X is CONST_INT, prints 16 bits in hexadecimal format = "0x%04x",
+   'F'  op is CONST_DOUBLE, print 32 bits in hex,
+   'd'  output integer constant in decimal,
+   'z'	if the operand is 0, use $0 instead of normal operand.
+   'D'  print second register of double-word register operand.
+   'L'  print low-order register of double-word register operand.
+   'M'  print high-order register of double-word register operand.
+   'C'  print part of opcode for a branch condition.
+   'N'  print part of opcode for a branch condition, inverted.
+   'S'  X is CODE_LABEL, print with prefix of "LS" (for embedded switch).
+   'B'  print 'z' for EQ, 'n' for NE
+   'b'  print 'n' for EQ, 'z' for NE
+   'T'  print 'f' for EQ, 't' for NE
+   't'  print 't' for EQ, 'f' for NE
+   'm'  Print 1<<operand.
+   'i'  Print 'i' if MEM operand has immediate value
+   'o'	Print operand address+4
+   '?'	Print 'd' if we use a branch with delay slot instead of normal branch.
+   'h'  Print high word of const_double (int or float) value as hex
+   'j'  Print low word of const_double (int or float) value as hex
+   's'  Print -1 if operand is negative, 0 if positive (sign extend)
+   '@'	Print the name of the temporary register (rMB_ABI_ASM_TEMP_REGNUM).
+   '#'	Print nop if the delay slot of a branch is not filled. 
+*/
+
+void
+print_operand (FILE * file, rtx op, int letter)
+{
+  register enum rtx_code code;
+
+  if (PRINT_OPERAND_PUNCT_VALID_P (letter))
+    {
+      switch (letter)
+	{
+	case '?':
+	  /* Conditionally add a 'd' to indicate filled delay slot.  */
+	  if (final_sequence != NULL)
+	    fputs ("d", file);
+	  break;
+
+	case '#':
+	  /* Conditionally add a nop in unfilled delay slot.  */
+	  if (final_sequence == NULL)
+	    fputs ("nop\t\t# Unfilled delay slot\n", file);
+	  break;
+
+	case '@':
+	  fputs (reg_names[GP_REG_FIRST + MB_ABI_ASM_TEMP_REGNUM], file);
+	  break;
+
+	default:
+	  output_operand_lossage ("unknown punctuation '%c'", letter);
+	  break;
+	}
+
+      return;
+    }
+
+  if (!op)
+    {
+      output_operand_lossage ("null pointer");
+      return;
+    }
+
+  code = GET_CODE (op);
+
+  if (code == SIGN_EXTEND)
+    op = XEXP (op, 0), code = GET_CODE (op);
+
+  if (letter == 'C')
+    switch (code)
+      {
+      case EQ:
+	fputs ("eq", file);
+	break;
+      case NE:
+	fputs ("ne", file);
+	break;
+      case GT:
+      case GTU:
+	fputs ("gt", file);
+	break;
+      case GE:
+      case GEU:
+	fputs ("ge", file);
+	break;
+      case LT:
+      case LTU:
+	fputs ("lt", file);
+	break;
+      case LE:
+      case LEU:
+	fputs ("le", file);
+	break;
+      default:
+	fatal_insn ("PRINT_OPERAND, invalid insn for %%C", op);
+      }
+
+  else if (letter == 'N')
+    switch (code)
+      {
+      case EQ:
+	fputs ("ne", file);
+	break;
+      case NE:
+	fputs ("eq", file);
+	break;
+      case GT:
+      case GTU:
+	fputs ("le", file);
+	break;
+      case GE:
+      case GEU:
+	fputs ("lt", file);
+	break;
+      case LT:
+      case LTU:
+	fputs ("ge", file);
+	break;
+      case LE:
+      case LEU:
+	fputs ("gt", file);
+	break;
+      default:
+	fatal_insn ("PRINT_OPERAND, invalid insn for %%N", op);
+      }
+
+  else if (letter == 'S')
+    {
+      char buffer[100];
+
+      ASM_GENERATE_INTERNAL_LABEL (buffer, "LS", CODE_LABEL_NUMBER (op));
+      assemble_name (file, buffer);
+    }
+
+  /* Print 'i' for memory operands which have immediate values.  */
+  else if (letter == 'i')
+    {
+      if (code == MEM)
+	{
+	  struct microblaze_address_info info;
+
+	  if (!microblaze_classify_address
+	      (&info, XEXP (op, 0), GET_MODE (op), 1))
+	    fatal_insn ("insn contains an invalid address !", op);
+
+	  switch (info.type)
+	    {
+	    case ADDRESS_REG:
+	    case ADDRESS_CONST_INT:
+	    case ADDRESS_SYMBOLIC:
+	    case ADDRESS_GOTOFF:
+	      fputs ("i", file);
+	      break;
+	    case ADDRESS_REG_INDEX:
+	      break;
+	    case ADDRESS_INVALID:
+	    case ADDRESS_PLT:
+	      fatal_insn ("invalid address", op);
+	    }
+	}
+    }
+
+  else if (code == REG || code == SUBREG)
+    {
+      register int regnum;
+
+      if (code == REG)
+	regnum = REGNO (op);
+      else
+	regnum = true_regnum (op);
+
+      if ((letter == 'M' && !WORDS_BIG_ENDIAN)
+	  || (letter == 'L' && WORDS_BIG_ENDIAN) || letter == 'D')
+	regnum++;
+
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+
+  else if (code == MEM)
+    if (letter == 'o')
+      {
+	rtx op4 = adjust_address (op, GET_MODE (op), 4);
+	output_address (XEXP (op4, 0));
+      }
+    else
+      output_address (XEXP (op, 0));
+
+  else if (letter == 'h' || letter == 'j')
+    {
+      long val[2];
+      if (code == CONST_DOUBLE)
+	{
+	  if (GET_MODE (op) == DFmode)
+	    {
+	      REAL_VALUE_TYPE value;
+	      REAL_VALUE_FROM_CONST_DOUBLE (value, op);
+	      REAL_VALUE_TO_TARGET_DOUBLE (value, val);
+	    }
+	  else
+	    {
+	      val[0] = CONST_DOUBLE_HIGH (op);
+	      val[1] = CONST_DOUBLE_LOW (op);
+	    }
+	}
+      else if (code == CONST_INT)
+        {
+	  val[0] = (INTVAL (op) & 0xffffffff00000000LL) >> 32;
+	  val[1] = INTVAL (op) & 0x00000000ffffffffLL;
+	  if (val[0] == 0 && val[1] < 0)
+	    val[0] = -1;
+	    
+        }
+      fprintf (file, "0x%8.8lx", (letter == 'h') ? val[0] : val[1]);
+    }
+  else if (code == CONST_DOUBLE)
+    {
+      if (letter == 'F')
+	{
+	  unsigned long value_long;
+	  REAL_VALUE_TYPE value;
+	  REAL_VALUE_FROM_CONST_DOUBLE (value, op);
+	  REAL_VALUE_TO_TARGET_SINGLE (value, value_long);
+	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, value_long);
+	}
+      else
+	{
+	  char s[60];
+	  real_to_decimal (s, CONST_DOUBLE_REAL_VALUE (op), sizeof (s), 0, 1);
+	  fputs (s, file);
+	}
+    }
+
+  else if (code == UNSPEC)
+    {
+      print_operand_address (file, op);
+    }
+
+  else if (letter == 'x' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, 0xffff & INTVAL (op));
+
+  else if (letter == 'X' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
+
+  else if (letter == 'd' && GET_CODE (op) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, (INTVAL (op)));
+
+  else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
+    fputs (reg_names[GP_REG_FIRST], file);
+
+  else if (letter == 's' && GET_CODE (op) == CONST_INT)
+    if (INTVAL (op) < 0)
+      fputs ("-1", file);
+    else
+      fputs ("0", file);
+
+  else if (letter == 'd' || letter == 'x' || letter == 'X' || letter == 's')
+    output_operand_lossage ("letter %c was found & insn was not CONST_INT", letter);
+
+  else if (letter == 'B')
+    fputs (code == EQ ? "z" : "n", file);
+  else if (letter == 'b')
+    fputs (code == EQ ? "n" : "z", file);
+  else if (letter == 'T')
+    fputs (code == EQ ? "f" : "t", file);
+  else if (letter == 't')
+    fputs (code == EQ ? "t" : "f", file);
+
+  else if (code == CONST && GET_CODE (XEXP (op, 0)) == REG)
+    {
+      print_operand (file, XEXP (op, 0), letter);
+    }
+  else if (letter == 'm')
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, (1L << INTVAL (op)));
+  else
+    output_addr_const (file, op);
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   Possible address classifications and output formats are,
+   
+   ADDRESS_REG                  "%0, r0"
+
+   ADDRESS_REG with non-zero    "%0, <addr_const>"
+   offset       
+
+   ADDRESS_REG_INDEX            "rA, RB"    
+                                (if rA is r0, rA and rB are swapped)
+
+   ADDRESS_CONST_INT            "r0, <addr_const>"
+
+   ADDRESS_SYMBOLIC             "rBase, <addr_const>"   
+                                (rBase is a base register suitable for the 
+				 symbol's type)
+*/
+
+void
+print_operand_address (FILE * file, rtx addr)
+{
+  struct microblaze_address_info info;
+  enum microblaze_address_type type;
+  if (!microblaze_classify_address (&info, addr, GET_MODE (addr), 1))
+    fatal_insn ("insn contains an invalid address !", addr);
+
+  type = info.type;
+  switch (info.type)
+    {
+    case ADDRESS_REG:
+      fprintf (file, "%s,", reg_names[REGNO (info.regA)]);
+      output_addr_const (file, info.offset);
+      break;
+    case ADDRESS_REG_INDEX:
+      if (REGNO (info.regA) == 0)
+	/* Make rB == r0 instead of rA == r0. This helps reduce read port 
+           congestion.  */
+	fprintf (file, "%s,%s", reg_names[REGNO (info.regB)],
+		 reg_names[REGNO (info.regA)]);
+      else if (REGNO (info.regB) != 0)
+	/* This is a silly swap to help Dhrystone.  */
+	fprintf (file, "%s,%s", reg_names[REGNO (info.regB)],
+		 reg_names[REGNO (info.regA)]);
+      break;
+    case ADDRESS_CONST_INT:
+      fprintf (file, "%s,", reg_names[REGNO (info.regA)]);
+      output_addr_const (file, info.offset);
+      break;
+    case ADDRESS_SYMBOLIC:
+    case ADDRESS_GOTOFF:
+    case ADDRESS_PLT:
+      if (info.regA)
+	fprintf (file, "%s,", reg_names[REGNO (info.regA)]);
+      output_addr_const (file, info.symbol);
+      if (type == ADDRESS_GOTOFF)
+	{
+	  fputs ("@GOT", file);
+	}
+      else if (type == ADDRESS_PLT)
+	{
+	  fputs ("@PLT", file);
+	}
+      break;
+    case ADDRESS_INVALID:
+      fatal_insn ("invalid address", addr);
+      break;
+    }
+}
+
+/* Emit either a label, .comm, or .lcomm directive, and mark that the symbol
+   is used, so that we don't emit an .extern for it in 
+   microblaze_asm_file_end.  */
+
+void
+microblaze_declare_object (FILE * stream, const char *name,
+			   const char *section, const char *fmt, int size)
+{
+
+  fputs (section, stream);	
+  assemble_name (stream, name);
+  fprintf (stream, fmt, size);
+}
+
+/* Common code to emit the insns (or to write the instructions to a file)
+   to save/restore registers.
+
+   Other parts of the code assume that MICROBLAZE_TEMP1_REGNUM (aka large_reg)
+   is not modified within save_restore_insns.  */
+
+#define BITSET_P(VALUE,BIT) (((VALUE) & (1L << (BIT))) != 0)
+
+/* Save or restore instructions based on whether this is the prologue or 
+   epilogue.  prologue is 1 for the prologue.  */
+static void
+save_restore_insns (int prologue)
+{
+  rtx base_reg_rtx, reg_rtx, mem_rtx, /* msr_rtx, */ isr_reg_rtx =
+    0, isr_mem_rtx = 0;
+  rtx isr_msr_rtx = 0, insn;
+  long mask = current_frame_info.mask;
+  HOST_WIDE_INT gp_offset;
+  int regno;
+
+  if (frame_pointer_needed
+      && !BITSET_P (mask, HARD_FRAME_POINTER_REGNUM - GP_REG_FIRST))
+    gcc_unreachable ();
+
+  if (mask == 0)
+    return;
+
+  /* Save registers starting from high to low.  The debuggers prefer at least
+     the return register be stored at func+4, and also it allows us not to
+     need a nop in the epilog if at least one register is reloaded in
+     addition to return address.  */
+
+  /* Pick which pointer to use as a base register.  For small frames, just
+     use the stack pointer.  Otherwise, use a temporary register.  Save 2
+     cycles if the save area is near the end of a large frame, by reusing
+     the constant created in the prologue/epilogue to adjust the stack
+     frame.  */
+
+  gp_offset = current_frame_info.gp_offset;
+
+  gcc_assert (gp_offset > 0);
+
+  base_reg_rtx = stack_pointer_rtx;
+
+  /* For interrupt_handlers, need to save/restore the MSR.  */
+  if (interrupt_handler)
+    {
+      isr_mem_rtx = gen_rtx_MEM (SImode,
+				 gen_rtx_PLUS (Pmode, base_reg_rtx,
+					       GEN_INT (current_frame_info.
+							gp_offset -
+							UNITS_PER_WORD)));
+
+      /* Do not optimize in flow analysis.  */
+      MEM_VOLATILE_P (isr_mem_rtx) = 1;
+      isr_reg_rtx = gen_rtx_REG (SImode, MB_ABI_MSR_SAVE_REG);
+      isr_msr_rtx = gen_rtx_REG (SImode, ST_REG);
+    }
+
+  if (interrupt_handler && !prologue)
+    {
+      emit_move_insn (isr_reg_rtx, isr_mem_rtx);
+      emit_move_insn (isr_msr_rtx, isr_reg_rtx);
+      /* Do not optimize in flow analysis.  */
+      emit_insn (gen_rtx_USE (SImode, isr_reg_rtx));
+      emit_insn (gen_rtx_USE (SImode, isr_msr_rtx));
+    }
+
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (BITSET_P (mask, regno - GP_REG_FIRST))
+	{
+	  if (regno == MB_ABI_SUB_RETURN_ADDR_REGNUM)
+	    /* Don't handle here. Already handled as the first register.  */
+	    continue;
+
+	  reg_rtx = gen_rtx_REG (SImode, regno);
+	  insn = gen_rtx_PLUS (Pmode, base_reg_rtx, GEN_INT (gp_offset));
+	  mem_rtx = gen_rtx_MEM (SImode, insn);
+	  if (interrupt_handler || save_volatiles)
+	    /* Do not optimize in flow analysis.  */
+	    MEM_VOLATILE_P (mem_rtx) = 1;
+
+	  if (prologue)
+	    {
+	      insn = emit_move_insn (mem_rtx, reg_rtx);
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  else
+	    {
+	      insn = emit_move_insn (reg_rtx, mem_rtx);
+	    }
+
+	  gp_offset += GET_MODE_SIZE (SImode);
+	}
+    }
+
+  if (interrupt_handler && prologue)
+    {
+      emit_move_insn (isr_reg_rtx, isr_msr_rtx);
+      emit_move_insn (isr_mem_rtx, isr_reg_rtx);
+
+      /* Do not optimize in flow analysis.  */
+      emit_insn (gen_rtx_USE (SImode, isr_reg_rtx));
+      emit_insn (gen_rtx_USE (SImode, isr_msr_rtx));
+    }
+
+  /* Done saving and restoring */
+}
+
+
+/* Set up the stack and frame (if desired) for the function.  */
+static void
+microblaze_function_prologue (FILE * file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+  long fsiz = current_frame_info.total_size;
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent\t", file);
+      if (interrupt_handler && strcmp (INTERRUPT_HANDLER_NAME, fnname))
+	fputs ("_interrupt_handler", file);
+      else
+	assemble_name (file, fnname);
+      fputs ("\n", file);
+      if (!interrupt_handler)
+	ASM_OUTPUT_TYPE_DIRECTIVE (file, fnname, "function");
+    }
+
+  assemble_name (file, fnname);
+  fputs (":\n", file);
+
+  if (interrupt_handler && strcmp (INTERRUPT_HANDLER_NAME, fnname))
+    fputs ("_interrupt_handler:\n", file);
+
+  if (!flag_inhibit_size_directive)
+    {
+      /* .frame FRAMEREG, FRAMESIZE, RETREG.  */
+      fprintf (file,
+	       "\t.frame\t%s,%ld,%s\t\t# vars= %ld, regs= %d, args= %d\n",
+	       (reg_names[(frame_pointer_needed)
+			  ? HARD_FRAME_POINTER_REGNUM :
+			  STACK_POINTER_REGNUM]), fsiz,
+	       reg_names[MB_ABI_SUB_RETURN_ADDR_REGNUM + GP_REG_FIRST],
+	       current_frame_info.var_size, current_frame_info.num_gp,
+	       crtl->outgoing_args_size);
+      fprintf (file, "\t.mask\t0x%08lx\n", current_frame_info.mask);
+    }
+}
+
+/* Output extra assembler code at the end of a prologue.  */
+static void
+microblaze_function_end_prologue (FILE * file)
+{
+  if (TARGET_STACK_CHECK)
+    {
+      fprintf (file, "\t# Stack Check Stub -- Start.\n\t");
+      fprintf (file, "ori\tr18,r0,_stack_end\n\t");
+      fprintf (file, "cmpu\tr18,r1,r18\n\t");
+      fprintf (file, "bgei\tr18,_stack_overflow_exit\n\t");
+      fprintf (file, "# Stack Check Stub -- End.\n");
+    }
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+microblaze_expand_prologue (void)
+{
+  int regno;
+  HOST_WIDE_INT fsiz;
+  const char *arg_name = 0;
+  tree fndecl = current_function_decl;
+  tree fntype = TREE_TYPE (fndecl);
+  tree fnargs = DECL_ARGUMENTS (fndecl);
+  rtx next_arg_reg;
+  int i;
+  tree next_arg;
+  tree cur_arg;
+  CUMULATIVE_ARGS args_so_far;
+  rtx mem_rtx, reg_rtx;
+
+  /* If struct value address is treated as the first argument, make it so.  */
+  if (aggregate_value_p (DECL_RESULT (fndecl), fntype)
+      && !cfun->returns_pcc_struct)
+    {
+      tree type = build_pointer_type (fntype);
+      tree function_result_decl = build_decl (BUILTINS_LOCATION, PARM_DECL, 
+					      NULL_TREE, type);
+
+      DECL_ARG_TYPE (function_result_decl) = type;
+      TREE_CHAIN (function_result_decl) = fnargs;
+      fnargs = function_result_decl;
+    }
+
+  /* Determine the last argument, and get its name.  */
+
+  INIT_CUMULATIVE_ARGS (args_so_far, fntype, NULL_RTX, 0, 0);
+  regno = GP_ARG_FIRST;
+
+  for (cur_arg = fnargs; cur_arg != 0; cur_arg = next_arg)
+    {
+      tree passed_type = DECL_ARG_TYPE (cur_arg);
+      enum machine_mode passed_mode = TYPE_MODE (passed_type);
+      rtx entry_parm;
+
+      if (TREE_ADDRESSABLE (passed_type))
+	{
+	  passed_type = build_pointer_type (passed_type);
+	  passed_mode = Pmode;
+	}
+
+      entry_parm = targetm.calls.function_arg (&args_so_far, passed_mode,
+					       passed_type, true);
+
+      if (entry_parm)
+	{
+	  int words;
+
+	  /* passed in a register, so will get homed automatically.  */
+	  if (GET_MODE (entry_parm) == BLKmode)
+	    words = (int_size_in_bytes (passed_type) + 3) / 4;
+	  else
+	    words = (GET_MODE_SIZE (GET_MODE (entry_parm)) + 3) / 4;
+
+	  regno = REGNO (entry_parm) + words - 1;
+	}
+      else
+	{
+	  regno = GP_ARG_LAST + 1;
+	  break;
+	}
+
+      targetm.calls.function_arg_advance (&args_so_far, passed_mode,
+					  passed_type, true);
+
+      next_arg = TREE_CHAIN (cur_arg);
+      if (next_arg == 0)
+	{
+	  if (DECL_NAME (cur_arg))
+	    arg_name = IDENTIFIER_POINTER (DECL_NAME (cur_arg));
+
+	  break;
+	}
+    }
+
+  /* Split parallel insn into a sequence of insns.  */
+
+  next_arg_reg = targetm.calls.function_arg (&args_so_far, VOIDmode,
+					     void_type_node, true);
+  if (next_arg_reg != 0 && GET_CODE (next_arg_reg) == PARALLEL)
+    {
+      rtvec adjust = XVEC (next_arg_reg, 0);
+      int num = GET_NUM_ELEM (adjust);
+
+      for (i = 0; i < num; i++)
+	{
+	  rtx pattern = RTVEC_ELT (adjust, i);
+	  emit_insn (pattern);
+	}
+    }
+
+  fsiz = compute_frame_size (get_frame_size ());
+
+  /* If this function is a varargs function, store any registers that
+     would normally hold arguments ($5 - $10) on the stack.  */
+  if (((TYPE_ARG_TYPES (fntype) != 0
+	&& (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
+	    != void_type_node))
+       || (arg_name != 0
+	   && ((arg_name[0] == '_'
+		&& strcmp (arg_name, "__builtin_va_alist") == 0)
+	       || (arg_name[0] == 'v'
+		   && strcmp (arg_name, "va_alist") == 0)))))
+    {
+      int offset = (regno - GP_ARG_FIRST + 1) * UNITS_PER_WORD;
+      rtx ptr = stack_pointer_rtx;
+
+      /* If we are doing svr4-abi, sp has already been decremented by fsiz. */
+      for (; regno <= GP_ARG_LAST; regno++)
+	{
+	  if (offset != 0)
+	    ptr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+	  emit_move_insn (gen_rtx_MEM (SImode, ptr),
+			  gen_rtx_REG (SImode, regno));
+
+	  offset += GET_MODE_SIZE (SImode);
+	}
+
+    }
+
+  if (fsiz > 0)
+    {
+      rtx fsiz_rtx = GEN_INT (fsiz);
+
+      rtx insn = NULL;
+      insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				    fsiz_rtx));
+      if (insn)
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* Handle SUB_RETURN_ADDR_REGNUM specially at first.  */
+      if (!current_function_is_leaf || interrupt_handler)
+	{
+	  mem_rtx = gen_rtx_MEM (SImode,
+				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					       const0_rtx));
+
+	  if (interrupt_handler)
+	    /* Do not optimize in flow analysis.  */
+	    MEM_VOLATILE_P (mem_rtx) = 1;
+
+	  reg_rtx = gen_rtx_REG (SImode, MB_ABI_SUB_RETURN_ADDR_REGNUM);
+	  insn = emit_move_insn (mem_rtx, reg_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* _save_ registers for prologue.  */
+      save_restore_insns (1);
+
+      if (frame_pointer_needed)
+	{
+	  rtx insn = 0;
+
+	  insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				       stack_pointer_rtx));
+
+	  if (insn)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (flag_pic == 2 && df_regs_ever_live_p (MB_ABI_PIC_ADDR_REGNUM))
+    {
+      SET_REGNO (pic_offset_table_rtx, MB_ABI_PIC_ADDR_REGNUM);
+      emit_insn (gen_set_got (pic_offset_table_rtx));	/* setting GOT.  */
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  */
+
+  if (profile_flag)
+    emit_insn (gen_blockage ());
+}
+
+/* Do necessary cleanup after a function to restore stack, frame, and regs.  */
+
+#define RA_MASK ((long) 0x80000000)	/* 1 << 31 */
+#define PIC_OFFSET_TABLE_MASK (1 << (PIC_OFFSET_TABLE_REGNUM - GP_REG_FIRST))
+
+static void
+microblaze_function_epilogue (FILE * file ATTRIBUTE_UNUSED,
+			      HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.end\t", file);
+      if (interrupt_handler)
+	fputs ("_interrupt_handler", file);
+      else
+	assemble_name (file, fnname);
+      fputs ("\n", file);
+    }
+
+  /* Reset state info for each function.  */
+  current_frame_info = zero_frame_info;
+
+  /* Restore the output file if optimizing the GP (optimizing the GP causes
+     the text to be diverted to a tempfile, so that data decls come before
+     references to the data).  */
+}
+
+/* Expand the epilogue into a bunch of separate insns.  */
+
+void
+microblaze_expand_epilogue (void)
+{
+  HOST_WIDE_INT fsiz = current_frame_info.total_size;
+  rtx fsiz_rtx = GEN_INT (fsiz);
+  rtx reg_rtx;
+  rtx mem_rtx;
+
+  /* In case of interrupt handlers use addki instead of addi for changing the 
+     stack pointer value.  */
+
+  if (microblaze_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode,
+							GP_REG_FIRST +
+							MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+      return;
+    }
+
+  if (fsiz > 0)
+    {
+      /* Restore SUB_RETURN_ADDR_REGNUM at first. This is to prevent the 
+         sequence of load-followed by a use (in rtsd) in every prologue. Saves 
+         a load-use stall cycle  :)   This is also important to handle alloca. 
+         (See comments for if (frame_pointer_needed) below.  */
+
+      if (!current_function_is_leaf || interrupt_handler)
+	{
+	  mem_rtx =
+	    gen_rtx_MEM (SImode,
+			 gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx));
+	  if (interrupt_handler)
+	    /* Do not optimize in flow analysis.  */
+	    MEM_VOLATILE_P (mem_rtx) = 1;
+	  reg_rtx = gen_rtx_REG (SImode, MB_ABI_SUB_RETURN_ADDR_REGNUM);
+	  emit_move_insn (reg_rtx, mem_rtx);
+	}
+
+      /* It is important that this is done after we restore the return address 
+         register (above).  When alloca is used, we want to restore the 
+	 sub-routine return address only from the current stack top and not 
+	 from the frame pointer (which we restore below). (frame_pointer + 0) 
+	 might have been over-written since alloca allocates memory on the 
+	 current stack.  */
+      if (frame_pointer_needed)
+	emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+
+      /* _restore_ registers for epilogue.  */
+      save_restore_insns (0);
+      emit_insn (gen_blockage ());
+      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, fsiz_rtx));
+    }
+
+  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, GP_REG_FIRST +
+						    MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+}
+
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+int
+microblaze_can_use_return_insn (void)
+{
+  if (!reload_completed)
+    return 0;
+
+  if (df_regs_ever_live_p (MB_ABI_SUB_RETURN_ADDR_REGNUM) || profile_flag)
+    return 0;
+
+  if (current_frame_info.initialized)
+    return current_frame_info.total_size == 0;
+
+  return compute_frame_size (get_frame_size ()) == 0;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+microblaze_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x ATTRIBUTE_UNUSED, 
+			     reg_class_t rclass, enum machine_mode mode ATTRIBUTE_UNUSED, 
+			     secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  if (rclass == ST_REGS)
+    return GR_REGS;
+
+  return NO_REGS;
+}
+
+static void
+microblaze_globalize_label (FILE * stream, const char *name)
+{
+  fputs ("\t.globl\t", stream);
+  if (interrupt_handler && strcmp (name, INTERRUPT_HANDLER_NAME))
+    {
+      fputs (INTERRUPT_HANDLER_NAME, stream);
+      fputs ("\n\t.globl\t", stream);
+    }
+  assemble_name (stream, name);
+  fputs ("\n", stream);
+}
+
+/* Returns true if decl should be placed into a "small data" section.  */
+static bool
+microblaze_elf_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+
+  if (!TARGET_XLGPOPT)
+    return false;
+
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (decl) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (section, ".sdata") == 0
+	  || strcmp (section, ".sdata2") == 0
+	  || strcmp (section, ".sbss") == 0
+	  || strcmp (section, ".sbss2") == 0)
+	return true;
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+  return (size > 0 && size <= microblaze_section_threshold);
+}
+
+
+static section *
+microblaze_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  switch (categorize_decl_for_section (decl, reloc))
+    {
+    case SECCAT_RODATA_MERGE_STR:
+    case SECCAT_RODATA_MERGE_STR_INIT:
+      /* MB binutils have various issues with mergeable string sections and
+         relaxation/relocation. Currently, turning mergeable sections 
+         into regular readonly sections.  */
+
+      return readonly_data_section;
+    default:
+      return default_elf_select_section (decl, reloc, align);
+    }
+}
+
+/*
+  Encode info about sections into the RTL based on a symbol's declaration.
+  The default definition of this hook, default_encode_section_info in 
+  `varasm.c', sets a number of commonly-useful bits in SYMBOL_REF_FLAGS. */
+
+static void
+microblaze_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+}
+
+static rtx
+expand_pic_symbol_ref (enum machine_mode mode ATTRIBUTE_UNUSED, rtx op)
+{
+  rtx result;
+  result = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), UNSPEC_GOTOFF);
+  result = gen_rtx_CONST (Pmode, result);
+  result = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, result);
+  result = gen_const_mem (Pmode, result);
+  return result;
+}
+
+bool
+microblaze_expand_move (enum machine_mode mode, rtx operands[])
+{
+  /* If operands[1] is a constant address invalid for pic, then we need to
+     handle it just like LEGITIMIZE_ADDRESS does.  */
+  if (flag_pic)
+    {
+      if (GET_CODE (operands[0]) == MEM)
+	{
+	  rtx addr = XEXP (operands[0], 0);
+	  if (GET_CODE (addr) == SYMBOL_REF)
+	    {
+	      rtx ptr_reg, result;
+
+	      if (reload_in_progress)
+		df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	      addr = expand_pic_symbol_ref (mode, addr);
+	      ptr_reg = gen_reg_rtx (Pmode);
+	      emit_move_insn (ptr_reg, addr);
+	      result = gen_rtx_MEM (mode, ptr_reg);
+	      operands[0] = result;
+	    }
+	}
+      if (GET_CODE (operands[1]) == SYMBOL_REF
+	  || GET_CODE (operands[1]) == LABEL_REF)
+	{
+	  rtx result;
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  result = expand_pic_symbol_ref (mode, operands[1]);
+	  if (GET_CODE (operands[0]) != REG)
+	    {
+	      rtx ptr_reg = gen_reg_rtx (Pmode);
+	      emit_move_insn (ptr_reg, result);
+	      emit_move_insn (operands[0], ptr_reg);
+	    }
+	  else
+	    {
+	      emit_move_insn (operands[0], result);
+	    }
+	  return true;
+	}
+      else if (GET_CODE (operands[1]) == MEM &&
+	       GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+	{
+	  rtx result;
+	  rtx ptr_reg;
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  result = expand_pic_symbol_ref (mode, XEXP (operands[1], 0));
+
+	  ptr_reg = gen_reg_rtx (Pmode);
+
+	  emit_move_insn (ptr_reg, result);
+	  result = gen_rtx_MEM (mode, ptr_reg);
+	  emit_move_insn (operands[0], result);
+	  return true;
+	}
+      else if (pic_address_needs_scratch (operands[1]))
+	{
+	  rtx temp = force_reg (SImode, XEXP (XEXP (operands[1], 0), 0));
+	  rtx temp2 = XEXP (XEXP (operands[1], 0), 1);
+
+	  if (reload_in_progress)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+	  emit_move_insn (operands[0], gen_rtx_PLUS (SImode, temp, temp2));
+	  return true;
+	}
+    }
+
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (operands[0], SImode)
+      && !register_operand (operands[1], SImode)
+      && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0))
+    {
+      rtx temp = force_reg (SImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      return true;
+    }
+  return false;
+}
+
+/* Expand shift operations.  */
+int
+microblaze_expand_shift (rtx operands[])
+{
+  gcc_assert ((GET_CODE (operands[2]) == CONST_INT)
+	      || (GET_CODE (operands[2]) == REG)
+	      || (GET_CODE (operands[2]) == SUBREG));
+
+  /* Shift by one -- generate pattern.  */
+  if ((GET_CODE (operands[2]) == CONST_INT) && (INTVAL (operands[2]) == 1))
+    return 0;
+
+  /* Have barrel shifter and shift > 1: use it.  */
+  if (TARGET_BARREL_SHIFT)
+    return 0;
+
+  gcc_assert ((GET_CODE (operands[0]) == REG)
+	      || (GET_CODE (operands[0]) == SUBREG)
+	      || (GET_CODE (operands[1]) == REG)
+	      || (GET_CODE (operands[1]) == SUBREG));
+
+  /* Shift by zero -- copy regs if necessary.  */
+  if ((GET_CODE (operands[2]) == CONST_INT) && (INTVAL (operands[2]) == 0))
+    {
+      if (REGNO (operands[0]) != REGNO (operands[1]))
+	emit_insn (gen_movsi (operands[0], operands[1]));
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+rtx
+microblaze_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL_RTX;
+
+  return gen_rtx_PLUS (Pmode,
+		       get_hard_reg_initial_val (Pmode,
+						 MB_ABI_SUB_RETURN_ADDR_REGNUM),
+		       GEN_INT (8));
+}
+
+/* Put string into .sdata2 if below threashold.  */
+void 
+microblaze_asm_output_ident (FILE *file ATTRIBUTE_UNUSED, const char *string)
+{
+  int size = strlen (string) + 1;
+  if (size <= microblaze_section_threshold)
+    switch_to_section (sdata2_section);
+  else
+    switch_to_section (readonly_data_section);
+  assemble_string (string, size);
+}
+
+static void
+microblaze_elf_asm_init_sections (void)
+{
+  sdata2_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   SDATA2_SECTION_ASM_OP);
+}
+
+/*  Generate assembler code for constant parts of a trampoline.  */
+
+static void
+microblaze_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.word\t0x03e00821\t\t# move   $1,$31\n");
+  fprintf (f, "\t.word\t0x04110001\t\t# bgezal $0,.+8\n");
+  fprintf (f, "\t.word\t0x00000000\t\t# nop\n");
+  fprintf (f, "\t.word\t0x8fe30014\t\t# lw     $3,20($31)\n");
+  fprintf (f, "\t.word\t0x8fe20018\t\t# lw     $2,24($31)\n");
+  fprintf (f, "\t.word\t0x0060c821\t\t# move   $25,$3 (abicalls)\n");
+  fprintf (f, "\t.word\t0x00600008\t\t# jr     $3\n");
+  fprintf (f, "\t.word\t0x0020f821\t\t# move   $31,$1\n");
+  /* fprintf (f, "\t.word\t0x00000000\t\t# <function address>\n");  */
+  /* fprintf (f, "\t.word\t0x00000000\t\t# <static chain value>\n");  */
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+microblaze_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (8*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Emit instruction to perform compare.  
+   cmp is (compare_op op0 op1).  */
+static rtx
+microblaze_emit_compare (enum machine_mode mode, rtx cmp, enum rtx_code *cmp_code)
+{
+  rtx cmp_op0 = XEXP (cmp, 0);
+  rtx cmp_op1 = XEXP (cmp, 1);
+  rtx comp_reg = gen_reg_rtx (SImode);
+  enum rtx_code code = *cmp_code;
+  
+  gcc_assert ((GET_CODE (cmp_op0) == REG) || (GET_CODE (cmp_op0) == SUBREG));
+
+  /* If comparing against zero, just test source reg.  */
+  if (cmp_op1 == const0_rtx) 
+    return cmp_op0;
+
+  if (code == EQ || code == NE)
+    {
+      if (TARGET_PATTERN_COMPARE && GET_CODE(cmp_op1) == REG) 
+        {
+          if (code == EQ) 
+	    emit_insn (gen_seq_internal_pat (comp_reg, cmp_op0, cmp_op1));
+	  else
+	    {    
+	      emit_insn (gen_sne_internal_pat (comp_reg, cmp_op0, cmp_op1));
+	      *cmp_code = EQ;
+	    }
+        }
+      else
+	/* Use xor for equal/not-equal comparison.  */
+	emit_insn (gen_xorsi3 (comp_reg, cmp_op0, cmp_op1));
+    }
+  else if (code == GT || code == GTU || code == LE || code == LEU)
+    {
+      /* MicroBlaze compare is not symmetrical.  */
+      /* Swap argument order.  */
+      cmp_op1 = force_reg (mode, cmp_op1);
+      if (code == GT || code == LE) 
+        emit_insn (gen_signed_compare (comp_reg, cmp_op0, cmp_op1));
+      else
+        emit_insn (gen_unsigned_compare (comp_reg, cmp_op0, cmp_op1));
+      /* Translate test condition.  */
+      *cmp_code = swap_condition (code);
+    }
+  else /* if (code == GE || code == GEU || code == LT || code == LTU) */
+    {
+      cmp_op1 = force_reg (mode, cmp_op1);
+      if (code == GE || code == LT) 
+        emit_insn (gen_signed_compare (comp_reg, cmp_op1, cmp_op0));
+      else
+        emit_insn (gen_unsigned_compare (comp_reg, cmp_op1, cmp_op0));
+    }
+
+  return comp_reg;
+}
+
+/* Generate conditional branch -- first, generate test condition,
+   second, generate correct branch instruction.  */
+
+void
+microblaze_expand_conditional_branch (enum machine_mode mode, rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx comp;
+  rtx condition;
+
+  comp = microblaze_emit_compare (mode, operands[0], &code);
+  condition = gen_rtx_fmt_ee (signed_condition (code), SImode, comp, const0_rtx);
+  emit_jump_insn (gen_condjump (condition, operands[3]));
+}
+
+void
+microblaze_expand_conditional_branch_sf (rtx operands[])
+{
+  rtx condition;
+  rtx cmp_op0 = XEXP (operands[0], 0);
+  rtx cmp_op1 = XEXP (operands[0], 1);
+  rtx comp_reg = gen_reg_rtx (SImode);
+
+  emit_insn (gen_cstoresf4 (comp_reg, operands[0], cmp_op0, cmp_op1));
+  condition = gen_rtx_NE (SImode, comp_reg, const0_rtx);
+  emit_jump_insn (gen_condjump (condition, operands[3]));
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+static bool
+microblaze_frame_pointer_required (void)
+{
+  /* If the function contains dynamic stack allocations, we need to
+     use the frame pointer to access the static parts of the frame.  */
+  if (cfun->calls_alloca)
+    return true;
+  return false;
+}
+
+void
+microblaze_expand_divide (rtx operands[])
+{
+  /* Table lookup software divides. Works for all (nr/dr) where (0 <= nr,dr <= 15).  */
+
+  rtx regt1 = gen_reg_rtx (SImode); 
+  rtx reg18 = gen_rtx_REG (SImode, R_TMP);
+  rtx regqi = gen_reg_rtx (QImode);
+  rtx div_label = gen_label_rtx ();
+  rtx div_end_label = gen_label_rtx ();
+  rtx div_table_rtx = gen_rtx_SYMBOL_REF (QImode,"_divsi3_table");
+  rtx mem_rtx;
+  rtx ret;
+  rtx jump, cjump, insn;
+
+  insn = emit_insn (gen_iorsi3 (regt1, operands[1], operands[2]));
+  cjump = emit_jump_insn_after (gen_cbranchsi4 (
+					gen_rtx_GTU (SImode, regt1, GEN_INT (15)), 
+					regt1, GEN_INT (15), div_label), insn);
+  LABEL_NUSES (div_label) = 1; 
+  JUMP_LABEL (cjump) = div_label;
+  emit_insn (gen_rtx_CLOBBER (SImode, reg18));
+
+  emit_insn (gen_ashlsi3_bshift (regt1, operands[1], GEN_INT(4)));
+  emit_insn (gen_addsi3 (regt1, regt1, operands[2]));
+  mem_rtx = gen_rtx_MEM (QImode,
+                            gen_rtx_PLUS (Pmode, regt1, div_table_rtx));
+
+  insn = emit_insn (gen_movqi (regqi, mem_rtx)); 
+  insn = emit_insn (gen_movsi (operands[0], gen_rtx_SUBREG (SImode, regqi, 0)));
+  jump = emit_jump_insn_after (gen_jump (div_end_label), insn); 
+  JUMP_LABEL (jump) = div_end_label;
+  LABEL_NUSES (div_end_label) = 1; 
+  emit_barrier ();
+
+  emit_label (div_label);
+  ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__divsi3"), 
+				       operands[0], LCT_NORMAL, 
+				       GET_MODE (operands[0]), 2, operands[1], 
+				       GET_MODE (operands[1]), operands[2], 
+				       GET_MODE (operands[2]));
+  if (ret != operands[0])
+                emit_move_insn (operands[0], ret);    
+
+  emit_label (div_end_label);
+  emit_insn (gen_blockage ());
+}
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+static rtx
+microblaze_function_value (const_tree valtype,
+			   const_tree func ATTRIBUTE_UNUSED,
+			   bool outgoing ATTRIBUTE_UNUSED)
+{
+  return LIBCALL_VALUE (TYPE_MODE (valtype));
+}
+
+/* Implement TARGET_SCHED_ADJUST_COST.  */
+static int
+microblaze_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link,
+			rtx dep ATTRIBUTE_UNUSED, int cost)
+{
+  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
+    return cost;
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+  return cost;
+}
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO      microblaze_encode_section_info
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL      microblaze_globalize_label
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE    microblaze_function_prologue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE    microblaze_function_epilogue
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS                microblaze_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST             microblaze_address_cost
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE          microblaze_attribute_table
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P          microblaze_elf_in_small_data_p
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION       microblaze_select_section
+
+#undef TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION     true
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE \
+                                        microblaze_function_end_prologue
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION		microblaze_handle_option
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS	TARGET_DEFAULT
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES	function_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		microblaze_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	microblaze_function_arg_advance
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE 		microblaze_can_eliminate
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS 	microblaze_legitimize_address
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P 	microblaze_legitimate_address_p 
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED	microblaze_frame_pointer_required
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	microblaze_asm_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		microblaze_trampoline_init
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE 	default_promote_function_mode_always_promote
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE		microblaze_function_value 
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD		microblaze_secondary_reload
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST	microblaze_adjust_cost
+
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS	microblaze_elf_asm_init_sections
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE		microblaze_option_override 
+
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE microblaze_option_optimization_table
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO  sjlj_except_unwind_info
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-microblaze.h"
diff --git a/gcc/config/microblaze/microblaze.h b/gcc/config/microblaze/microblaze.h
new file mode 100644
index 000000000..f60ab6bfa
--- /dev/null
+++ b/gcc/config/microblaze/microblaze.h
@@ -0,0 +1,938 @@
+/* Definitions of target machine for GNU compiler for Xilinx MicroBlaze.
+   Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
+
+   Contributed by Michael Eager <eager@eagercon.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Standard GCC variables that we reference.  */
+
+/* MicroBlaze external variables defined in microblaze.c.  */
+
+/* Which pipeline to schedule for.  */
+enum pipeline_type
+{
+  MICROBLAZE_PIPE_3 = 0,
+  MICROBLAZE_PIPE_5 = 1
+};
+
+#define MICROBLAZE_MASK_NO_UNSAFE_DELAY         0x00000001
+
+/* print_operand punctuation chars */
+extern char microblaze_print_operand_punct[];
+
+/* # bytes of data/sdata cutoff */
+extern int microblaze_section_threshold;
+
+/* Map register # to debug register # */
+extern int microblaze_dbx_regno[];
+
+extern int microblaze_no_unsafe_delay;
+extern enum pipeline_type microblaze_pipe;
+
+#define OBJECT_FORMAT_ELF
+
+/* Default target_flags if no switches are specified  */
+#define TARGET_DEFAULT      (MASK_SOFT_MUL | MASK_SOFT_DIV | MASK_SOFT_FLOAT)
+
+/* What is the default setting for -mcpu= . We set it to v4.00.a even though 
+   we are actually ahead. This is safest version that has generate code 
+   compatible for the original ISA */
+#define MICROBLAZE_DEFAULT_CPU      "v4.00.a"
+
+/* Macros to decide whether certain features are available or not,
+   depending on the instruction set architecture level.  */
+
+#define DRIVER_SELF_SPECS    				\
+	"%{mxl-soft-mul:%<mno-xl-soft-mul}", 		\
+	"%{mno-xl-barrel-shift:%<mxl-barrel-shift}", 	\
+	"%{mno-xl-pattern-compare:%<mxl-pattern-compare}", \
+	"%{mxl-soft-div:%<mno-xl-soft-div}", 		\
+	"%{msoft-float:%<mhard-float}"
+
+/* Tell collect what flags to pass to nm.  */
+#ifndef NM_FLAGS
+#define NM_FLAGS "-Bn"
+#endif
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS() microblaze_cpp_define (pfile)
+
+/* Assembler specs.  */
+
+#define TARGET_ASM_SPEC ""
+
+#define ASM_SPEC "\
+%(target_asm_spec)"
+
+/* Extra switches sometimes passed to the linker.  */
+/* -xl-mode-xmdstub translated to -Zxl-mode-xmdstub -- deprecated.  */
+
+#define LINK_SPEC "%{shared:-shared} -N -relax \
+  %{Zxl-mode-xmdstub:-defsym _TEXT_START_ADDR=0x800} \
+  %{mxl-mode-xmdstub:-defsym _TEXT_START_ADDR=0x800} \
+  %{mxl-gp-opt:%{G*}} %{!mxl-gp-opt: -G 0} \
+  %{!T*: -dT xilinx.ld%s}"
+
+/* Specs for the compiler proper  */
+
+#ifndef CC1_SPEC
+#define CC1_SPEC " \
+%{G*} \
+%(subtarget_cc1_spec) \
+%{mxl-multiply-high:-mcpu=v6.00.a} \
+"
+#endif
+
+#define EXTRA_SPECS							\
+  { "target_asm_spec", TARGET_ASM_SPEC },				\
+  SUBTARGET_EXTRA_SPECS
+
+/* Print subsidiary information on the compiler version in use.  */
+#define MICROBLAZE_VERSION MICROBLAZE_DEFAULT_CPU
+
+#ifndef MACHINE_TYPE
+#define MACHINE_TYPE "MicroBlaze/ELF"
+#endif
+
+#ifndef TARGET_VERSION_INTERNAL
+#define TARGET_VERSION_INTERNAL(STREAM)					\
+  fprintf (STREAM, " %s %s", MACHINE_TYPE, MICROBLAZE_VERSION)
+#endif
+
+#ifndef TARGET_VERSION
+#define TARGET_VERSION TARGET_VERSION_INTERNAL (stderr)
+#endif
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.   */
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"$"
+#endif
+
+/* fixed registers.  */
+#define MB_ABI_BASE_REGNUM                   0
+#define MB_ABI_STACK_POINTER_REGNUM          1
+#define MB_ABI_GPRO_REGNUM                   2
+#define MB_ABI_GPRW_REGNUM                  13
+#define MB_ABI_INTR_RETURN_ADDR_REGNUM      14
+#define MB_ABI_SUB_RETURN_ADDR_REGNUM       15
+#define MB_ABI_DEBUG_RETURN_ADDR_REGNUM     16
+#define MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM 17
+#define MB_ABI_ASM_TEMP_REGNUM              18	
+/* This is our temp register.  */
+#define MB_ABI_FRAME_POINTER_REGNUM         19
+#define MB_ABI_PIC_ADDR_REGNUM              20
+#define MB_ABI_PIC_FUNC_REGNUM              21
+/* Volatile registers.  */
+#define MB_ABI_INT_RETURN_VAL_REGNUM         3
+#define MB_ABI_INT_RETURN_VAL2_REGNUM        4
+#define MB_ABI_FIRST_ARG_REGNUM              5
+#define MB_ABI_LAST_ARG_REGNUM              10
+#define MB_ABI_MAX_ARG_REGS                 (MB_ABI_LAST_ARG_REGNUM 	\
+					     - MB_ABI_FIRST_ARG_REGNUM + 1)
+#define MB_ABI_STATIC_CHAIN_REGNUM           3
+#define MB_ABI_TEMP1_REGNUM                 11
+#define MB_ABI_TEMP2_REGNUM                 12
+#define MB_ABI_MSR_SAVE_REG                 11	
+/* Volatile register used to save MSR in interrupt handlers.  */
+
+
+/* Debug stuff.  */
+
+/* How to renumber registers for dbx and gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO) microblaze_dbx_regno[(REGNO)]
+
+/* Generate DWARF exception handling info.  */
+#define DWARF2_UNWIND_INFO 1
+
+/* Don't generate .loc operations.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 0
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN \
+	(GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)
+
+/* Initial state of return address on entry to func = R15.
+   Actually, the RA is at R15+8, but gcc doesn't know how 
+   to generate this. 
+   NOTE:  GDB has a workaround and expects this incorrect value.
+   If this is fixed, a corresponding fix to GDB is needed.  */
+#define INCOMING_RETURN_ADDR_RTX  			\
+  gen_rtx_REG (VOIDmode, GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)
+
+/* Use DWARF 2 debugging information by default.  */
+#define DWARF2_DEBUGGING_INFO
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Target machine storage layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+#define BITS_PER_UNIT           8
+#define BITS_PER_WORD           32
+#define UNITS_PER_WORD          4
+#define MIN_UNITS_PER_WORD      4
+#define INT_TYPE_SIZE           32
+#define SHORT_TYPE_SIZE         16
+#define LONG_TYPE_SIZE          32
+#define LONG_LONG_TYPE_SIZE     64
+#define FLOAT_TYPE_SIZE         32
+#define DOUBLE_TYPE_SIZE        64
+#define LONG_DOUBLE_TYPE_SIZE   64
+#define POINTER_SIZE            32
+#define PARM_BOUNDARY           32
+#define FUNCTION_BOUNDARY       32
+#define EMPTY_FIELD_BOUNDARY    32
+#define STRUCTURE_SIZE_BOUNDARY 8
+#define BIGGEST_ALIGNMENT       32
+#define STRICT_ALIGNMENT        1
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD						\
+	? BITS_PER_WORD							\
+	: (ALIGN))
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)     				\
+    (((TREE_CODE (TYPE) == ARRAY_TYPE 					\
+       && TYPE_MODE (TREE_TYPE (TYPE)) == QImode)			\
+     && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN))
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE)  ZERO_EXTEND
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < 4)		\
+    (MODE) = SImode;
+
+/* Standard register usage.  */
+
+/* On the MicroBlaze, we have 32 integer registers */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,			\
+  1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 1, 1 								\
+}
+
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 1, 1								\
+}
+
+#define GP_REG_FIRST    0
+#define GP_REG_LAST     31
+#define GP_REG_NUM      (GP_REG_LAST - GP_REG_FIRST + 1)
+#define GP_DBX_FIRST    0
+
+#define ST_REG		32
+#define AP_REG_NUM      33
+#define RAP_REG_NUM     34
+#define FRP_REG_NUM     35
+
+#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define ST_REG_P(REGNO) ((REGNO) == ST_REG)
+
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+	((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  In 32 bit mode, require that DImode and DFmode be in even
+   registers.  For DImode, this makes some of the insns easier to
+   write, since you don't have to worry about a DImode value in
+   registers 3 & 4, producing a result in 4 & 5.
+
+   To make the code simpler HARD_REGNO_MODE_OK now just references an
+   array built in override_options.  Because machmodes.h is not yet
+   included before this file is processed, the MODE bound can't be
+   expressed here.  */
+extern char microblaze_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+            microblaze_hard_regno_mode_ok[ (int)(MODE) ][ (REGNO)]
+
+#define MODES_TIEABLE_P(MODE1, MODE2)					\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||				\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)			\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||				\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+#define STACK_POINTER_REGNUM   (GP_REG_FIRST + MB_ABI_STACK_POINTER_REGNUM)
+
+#define STACK_POINTER_OFFSET   FIRST_PARM_OFFSET(FNDECL)
+
+/* Base register for access to local variables of the function.  We
+   pretend that the frame pointer is
+   MB_ABI_INTR_RETURN_ADDR_REGNUM, and then eliminate it
+   to HARD_FRAME_POINTER_REGNUM.  We can get away with this because
+   rMB_ABI_INTR_RETUREN_ADDR_REGNUM is a fixed
+   register(return address for interrupt), and will not be used for
+   anything else.  */
+   
+#define FRAME_POINTER_REGNUM 		FRP_REG_NUM
+#define HARD_FRAME_POINTER_REGNUM       \
+        (GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM)
+#define ARG_POINTER_REGNUM		AP_REG_NUM
+#define RETURN_ADDRESS_POINTER_REGNUM	RAP_REG_NUM
+#define STATIC_CHAIN_REGNUM             \
+        (GP_REG_FIRST + MB_ABI_STATIC_CHAIN_REGNUM)
+
+/* registers used in prologue/epilogue code when the stack frame
+   is larger than 32K bytes.  These registers must come from the
+   scratch register set, and not used for passing and returning
+   arguments and any other information used in the calling sequence
+   (such as pic).  */
+
+#define MICROBLAZE_TEMP1_REGNUM         \
+        (GP_REG_FIRST + MB_ABI_TEMP1_REGNUM)
+
+#define MICROBLAZE_TEMP2_REGNUM         \
+        (GP_REG_FIRST + MB_ABI_TEMP2_REGNUM)
+
+#define NO_FUNCTION_CSE                 1
+
+#define PIC_OFFSET_TABLE_REGNUM         \
+        (flag_pic ? (GP_REG_FIRST + MB_ABI_PIC_ADDR_REGNUM) : \
+        INVALID_REGNUM)
+
+enum reg_class
+{
+  NO_REGS,			/* no registers in set.  */
+  GR_REGS,			/* integer registers.  */
+  ST_REGS,			/* status register.  */
+  ALL_REGS,			/* all registers.  */
+  LIM_REG_CLASSES		/* max value + 1.  */
+};
+
+#define N_REG_CLASSES 		(int) LIM_REG_CLASSES
+
+#define GENERAL_REGS 		GR_REGS
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "GR_REGS",								\
+  "ST_REGS",								\
+  "ALL_REGS"								\
+}
+
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000 },		/* no registers.  */		\
+  { 0xffffffff, 0x00000000 },		/* integer registers.  */	\
+  { 0x00000000, 0x00000001 },		/* status registers.  */	\
+  { 0xffffffff, 0x0000000f }		/* all registers.  */		\
+}
+
+extern enum reg_class microblaze_regno_to_class[];
+
+#define REGNO_REG_CLASS(REGNO) 		microblaze_regno_to_class[ (REGNO) ]
+
+#define BASE_REG_CLASS  		GR_REGS
+
+#define INDEX_REG_CLASS 		GR_REGS
+
+#define GR_REG_CLASS_P(CLASS) 		((CLASS) == GR_REGS)
+
+/* REGISTER AND CONSTANT CLASSES */
+
+#define SMALL_INT(X) ((unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000)
+#define LARGE_INT(X) \
+  (INTVAL (X) > 0 && UINTVAL (X) >= 0x80000000 && UINTVAL (X) <= 0xffffffff)
+#define PLT_ADDR_P(X) (GET_CODE (X) == UNSPEC && XINT (X,1) == UNSPEC_PLT)
+/* Test for a valid operand for a call instruction.
+   Don't allow the arg pointer register or virtual regs
+   since they may change into reg + const, which the patterns
+   can't handle yet.  */
+#define CALL_INSN_OP(X) (CONSTANT_ADDRESS_P (X) \
+                         || (GET_CODE (X) == REG && X != arg_pointer_rtx\
+                             && ! (REGNO (X) >= FIRST_PSEUDO_REGISTER	\
+                             && REGNO (X) <= LAST_VIRTUAL_REGISTER)))
+
+/* True if VALUE is a signed 16-bit number.  */
+#define SMALL_OPERAND(VALUE) 						\
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000)
+
+/* Constant which cannot be loaded in one instruction.  */
+#define LARGE_OPERAND(VALUE)						\
+  ((((VALUE) & ~0x0000ffff) != 0)					\
+   && (((VALUE) & ~0x0000ffff) != ~0x0000ffff)				\
+   && (((VALUE) & 0x0000ffff) != 0					\
+       || (((VALUE) & ~2147483647) != 0					\
+	   && ((VALUE) & ~2147483647) != ~2147483647)))
+	
+#define PREFERRED_RELOAD_CLASS(X,CLASS)					\
+  ((CLASS) != ALL_REGS							\
+   ? (CLASS)							\
+   : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT			\
+       || GET_MODE_CLASS (GET_MODE (X)) == MODE_COMPLEX_FLOAT)		\
+      ? (GR_REGS)			\
+      : ((GET_MODE_CLASS (GET_MODE (X)) == MODE_INT			\
+	  || GET_MODE (X) == VOIDmode)					\
+	 ? (GR_REGS) : (CLASS))))
+
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)			\
+  (GET_MODE_CLASS (MODE) == MODE_INT)
+
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((GET_MODE_SIZE (MODE) + (UNITS_PER_WORD) - 1) / (UNITS_PER_WORD))
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Changed the starting frame offset to including the new link stuff */
+#define STARTING_FRAME_OFFSET						\
+   (crtl->outgoing_args_size + FIRST_PARM_OFFSET(FNDECL))
+
+/* The return address for the current frame is in r31 if this is a leaf
+   function.  Otherwise, it is on the stack.  It is at a variable offset
+   from sp/fp/ap, so we define a fake hard register rap which is a
+   poiner to the return address on the stack.  This always gets eliminated
+   during reload to be either the frame pointer or the stack pointer plus
+   an offset.  */
+
+#define RETURN_ADDR_RTX(count, frame)			\
+  microblaze_return_addr(count,frame)
+
+extern struct microblaze_frame_info current_frame_info;
+
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+ { ARG_POINTER_REGNUM,   GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, 					\
+   GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM},				\
+ { RETURN_ADDRESS_POINTER_REGNUM, 					\
+   GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM},			\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+ { FRAME_POINTER_REGNUM, GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			 \
+        (OFFSET) = microblaze_initial_elimination_offset ((FROM), (TO))
+
+#define ACCUMULATE_OUTGOING_ARGS        1
+
+#define FIRST_PARM_OFFSET(FNDECL)		(UNITS_PER_WORD)
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL)		0
+
+#define REG_PARM_STACK_SPACE(FNDECL)  		(MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE)	1
+
+#define STACK_BOUNDARY				32
+
+#define NUM_OF_ARGS				6
+
+#define GP_RETURN				(GP_REG_FIRST + MB_ABI_INT_RETURN_VAL_REGNUM)
+
+#define GP_ARG_FIRST				(GP_REG_FIRST + MB_ABI_FIRST_ARG_REGNUM)
+#define GP_ARG_LAST				(GP_REG_FIRST + MB_ABI_LAST_ARG_REGNUM)
+
+#define MAX_ARGS_IN_REGISTERS			MB_ABI_MAX_ARG_REGS
+
+#define LIBCALL_VALUE(MODE)						\
+  gen_rtx_REG (								\
+	   ((GET_MODE_CLASS (MODE) != MODE_INT				\
+	     || GET_MODE_SIZE (MODE) >= 4)				\
+	    ? (MODE)							\
+	    : SImode), GP_RETURN)
+
+/* 1 if N is a possible register number for a function value.
+   On the MicroBlaze, R2 R3 are the only register thus used.
+   Currently, R2 are only implemented  here (C has no complex type)  */
+
+#define FUNCTION_VALUE_REGNO_P(N)		((N) == GP_RETURN)
+
+#define FUNCTION_ARG_REGNO_P(N)			(((N) >= GP_ARG_FIRST && (N) <= GP_ARG_LAST))
+
+typedef struct microblaze_args
+{
+  int gp_reg_found;		/* whether a gp register was found yet */
+  int arg_number;		/* argument number */
+  int arg_words;		/* # total words the arguments take */
+  int fp_arg_words;		/* # words for FP args */
+  int last_arg_fp;		/* nonzero if last arg was FP (EABI only) */
+  int fp_code;			/* Mode of FP arguments */
+  int num_adjusts;		/* number of adjustments made */
+  /* Adjustments made to args pass in regs.  */
+  /* ??? The size is doubled to work around a bug in the code that sets the 
+     adjustments in function_arg.  */
+  struct rtx_def *adjust[MAX_ARGS_IN_REGISTERS * 2];
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS)	\
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME)
+
+#define NO_PROFILE_COUNTERS			1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) { \
+  {                                        \
+    fprintf (FILE, "\tbrki\tr16,_mcount\n");           \
+  }                                                    \
+ }
+
+#define EXIT_IGNORE_STACK			1
+
+#define TRAMPOLINE_SIZE				(32 + 8)
+
+#define TRAMPOLINE_ALIGNMENT			32
+
+#define REGNO_OK_FOR_BASE_P(regno)		microblaze_regno_ok_for_base_p ((regno), 1)
+
+#define REGNO_OK_FOR_INDEX_P(regno)		microblaze_regno_ok_for_base_p ((regno), 1)
+
+#ifndef REG_OK_STRICT
+#define REG_STRICT_FLAG				0
+#else
+#define REG_STRICT_FLAG				1
+#endif
+
+#define REG_OK_FOR_BASE_P(X)    \
+  microblaze_regno_ok_for_base_p (REGNO (X), REG_STRICT_FLAG)
+
+#define REG_OK_FOR_INDEX_P(X)   \
+  microblaze_regno_ok_for_base_p (REGNO (X), REG_STRICT_FLAG)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* Identify valid constant addresses.  Exclude if PIC addr which 
+   needs scratch register.  */
+#define CONSTANT_ADDRESS_P(X)						\
+  (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT 		                        \
+    || (GET_CODE (X) == CONST						\
+	&& ! (flag_pic && pic_address_needs_scratch (X))))
+
+/* Define this, so that when PIC, reload won't try to reload invalid
+   addresses which require two reload registers.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)  (!pic_address_needs_scratch (X))
+
+/* At present, GAS doesn't understand li.[sd], so don't allow it
+   to be generated at present.  */
+#define LEGITIMATE_CONSTANT_P(X)				\
+  (GET_CODE (X) != CONST_DOUBLE					\
+    || microblaze_const_double_ok (X, GET_MODE (X)))
+
+#define CASE_VECTOR_MODE			(SImode)
+
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR			1
+#endif
+
+#define MOVE_MAX				4
+#define MAX_MOVE_MAX				8
+
+#define SLOW_BYTE_ACCESS			1
+
+/* sCOND operations return 1.  */
+#define STORE_FLAG_VALUE			1
+
+#define SHIFT_COUNT_TRUNCATED			1
+
+/* This results in inefficient code for 64 bit to 32 conversions.
+   Something needs to be done about this.  Perhaps not use any 32 bit
+   instructions?  Perhaps use PROMOTE_MODE?  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE   SImode
+
+/* Mode should alwasy be SImode */
+#define REGISTER_MOVE_COST(MODE, FROM, TO)			\
+  ( GR_REG_CLASS_P (FROM) && GR_REG_CLASS_P (TO) ? 2 		\
+   : (FROM) == ST_REGS && GR_REG_CLASS_P (TO) ? 4		\
+   : 12)
+
+#define MEMORY_MOVE_COST(MODE,CLASS,TO_P) \
+  (4 + memory_move_secondary_cost ((MODE), (CLASS), (TO_P)))
+
+#define BRANCH_COST(speed_p, predictable_p)	2
+
+/* Control the assembler format that we output.  */
+#define ASM_APP_ON " #APP\n"
+#define ASM_APP_OFF " #NO_APP\n"
+
+#define REGISTER_NAMES {						\
+  "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",		\
+  "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",	\
+  "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",	\
+  "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",	\
+  "rmsr", "$ap",  "$rap", "$frp" }
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "r0",	 0 + GP_REG_FIRST },					\
+  { "r1",	 1 + GP_REG_FIRST },					\
+  { "r2",	 2 + GP_REG_FIRST },					\
+  { "r3",	 3 + GP_REG_FIRST },					\
+  { "r4",	 4 + GP_REG_FIRST },					\
+  { "r5",	 5 + GP_REG_FIRST },					\
+  { "r6",	 6 + GP_REG_FIRST },					\
+  { "r7",	 7 + GP_REG_FIRST },					\
+  { "r8",	 8 + GP_REG_FIRST },					\
+  { "r9",	 9 + GP_REG_FIRST },					\
+  { "r10",	10 + GP_REG_FIRST },					\
+  { "r11",	11 + GP_REG_FIRST },					\
+  { "r12",	12 + GP_REG_FIRST },					\
+  { "r13",	13 + GP_REG_FIRST },					\
+  { "r14",	14 + GP_REG_FIRST },					\
+  { "r15",	15 + GP_REG_FIRST },					\
+  { "r16",	16 + GP_REG_FIRST },					\
+  { "r17",	17 + GP_REG_FIRST },					\
+  { "r18",	18 + GP_REG_FIRST },					\
+  { "r19",	19 + GP_REG_FIRST },					\
+  { "r20",	20 + GP_REG_FIRST },					\
+  { "r21",	21 + GP_REG_FIRST },					\
+  { "r22",	22 + GP_REG_FIRST },					\
+  { "r23",	23 + GP_REG_FIRST },					\
+  { "r24",	24 + GP_REG_FIRST },					\
+  { "r25",	25 + GP_REG_FIRST },					\
+  { "r26",	26 + GP_REG_FIRST },					\
+  { "r27",	27 + GP_REG_FIRST },					\
+  { "r28",	28 + GP_REG_FIRST },					\
+  { "r29",	29 + GP_REG_FIRST },					\
+  { "r30",	30 + GP_REG_FIRST },					\
+  { "r31",	31 + GP_REG_FIRST },					\
+  { "rmsr",     ST_REG}							\
+}
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) microblaze_print_operand_punct[CODE]
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* ASM_OUTPUT_ALIGNED_COMMON and ASM_OUTPUT_ALIGNED_LOCAL
+
+   Unfortunately, we still need to set the section explicitly. Somehow,
+   our binutils assign .comm and .lcomm variables to the "current" section 
+   in the assembly file, rather than where they implicitly belong. We need to
+   remove this explicit setting in GCC when binutils can understand sections
+   better.  */
+#undef	ASM_OUTPUT_ALIGNED_COMMON
+#define	ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) > 0 && (SIZE) <= INT_MAX					\
+      && (int) (SIZE) <= microblaze_section_threshold			\
+      && TARGET_XLGPOPT)						\
+    {                                                                   \
+      switch_to_section (sbss_section);					\
+    }									\
+  else									\
+    {									\
+      switch_to_section (bss_section);					\
+    }                                                                   \
+  fprintf (FILE, "%s", COMMON_ASM_OP);                                  \
+  assemble_name ((FILE), (NAME));					\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+           (SIZE), (ALIGN) / BITS_PER_UNIT);                            \
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+} while (0)
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define	ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  if ((SIZE) > 0 && (SIZE) <= INT_MAX					\
+      && (int) (SIZE) <= microblaze_section_threshold			\
+      && TARGET_XLGPOPT)						\
+    {                                                                   \
+      switch_to_section (sbss_section);					\
+    }									\
+  else									\
+    {									\
+      switch_to_section (bss_section);					\
+    }                                                                   \
+  fprintf (FILE, "%s", LCOMMON_ASM_OP);                                 \
+  assemble_name ((FILE), (NAME));					\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+           (SIZE), (ALIGN) / BITS_PER_UNIT);                            \
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+} while (0)
+
+#define	ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+do {									\
+  ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL)                     \
+{                                                                       \
+}
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM))
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	   ".gpword",                                                   \
+	   LOCAL_LABEL_PREFIX, VALUE)
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+do {									\
+  if (flag_pic == 2)                                               \
+    fprintf (STREAM, "\t%s\t%sL%d@GOTOFF\n",                            \
+	     ".gpword",                                                 \
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+  else                                                                  \
+    fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	     ".gpword",                                                 \
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+} while (0)
+
+#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+  fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+  fprintf (STREAM, "\t.space\t%lu\n", (SIZE))
+
+#define ASCII_DATA_ASM_OP		"\t.ascii\t"
+#define STRING_ASM_OP			"\t.asciz\t"
+
+#define ASM_OUTPUT_IDENT(FILE, STRING)					\
+  microblaze_asm_output_ident (FILE, STRING)
+
+/* Default to -G 8 */
+#ifndef MICROBLAZE_DEFAULT_GVALUE
+#define MICROBLAZE_DEFAULT_GVALUE 8
+#endif
+
+/* Given a decl node or constant node, choose the section to output it in
+   and select that section.  */
+
+/* Store in OUTPUT a string (made with alloca) containing
+   an assembler-name for a local static variable named NAME.
+   LABELNO is an integer which is different for each call.  */
+#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO)			\
+( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10),			\
+  sprintf ((OUTPUT), "%s.%lu", (NAME), (unsigned long)(LABELNO)))
+
+/* How to start an assembler comment.
+   The leading space is important (the microblaze assembler requires it).  */
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START		" #"
+#endif
+
+#define BSS_VAR         1
+#define SBSS_VAR        2
+#define DATA_VAR        4
+#define SDATA_VAR       5
+#define RODATA_VAR      6
+#define SDATA2_VAR      7
+
+/* These definitions are used in with the shift_type flag in the rtl.  */
+#define SHIFT_CONST     1
+#define SHIFT_REG       2
+#define USE_ADDK        3
+
+/* Handle interrupt attribute.  */
+extern int interrupt_handler;
+extern int save_volatiles;
+
+#define INTERRUPT_HANDLER_NAME "_interrupt_handler"
+
+/* These #define added for C++.  */
+#define UNALIGNED_SHORT_ASM_OP          ".data16"
+#define UNALIGNED_INT_ASM_OP            ".data32"
+#define UNALIGNED_DOUBLE_INT_ASM_OP     ".data8"
+
+#define ASM_BYTE_OP                     ".data8"
+
+/* The following #defines are used in the headers files. Always retain these.  */
+
+/* Added for declaring size at the end of the function.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do {									\
+    if (!flag_inhibit_size_directive)					\
+      {									\
+        char label[256];						\
+	static int labelno;						\
+	labelno++;							\
+	ASM_GENERATE_INTERNAL_LABEL (label, "Lfe", labelno);		\
+        (*targetm.asm_out.internal_label) (FILE, "Lfe", labelno);	\
+	fprintf (FILE, "%s", SIZE_ASM_OP);				\
+	assemble_name (FILE, (FNAME));					\
+        fprintf (FILE, ",");						\
+	assemble_name (FILE, label);					\
+        fprintf (FILE, "-");						\
+	assemble_name (FILE, (FNAME));					\
+	putc ('\n', FILE);						\
+      }									\
+  } while (0)
+
+#define GLOBAL_ASM_OP			"\t.globl\t"
+#define TYPE_ASM_OP			"\t.type\t"
+#define SIZE_ASM_OP			"\t.size\t"
+#define COMMON_ASM_OP			"\t.comm\t"
+#define LCOMMON_ASM_OP			"\t.lcomm\t"
+
+#define MAX_OFILE_ALIGNMENT		(32768*8)
+
+#define TYPE_OPERAND_FMT        	"@%s"
+
+/* Write the extra assembler code needed to declare an object properly.  */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)			\
+  do {									\
+    fprintf (FILE, "%s", TYPE_ASM_OP);			         	\
+    assemble_name (FILE, NAME);						\
+    putc (',', FILE);							\
+    fprintf (FILE, TYPE_OPERAND_FMT, "object");				\
+    putc ('\n', FILE);							\
+    size_directive_output = 0;						\
+    if (!flag_inhibit_size_directive && DECL_SIZE (DECL))		\
+      {									\
+	size_directive_output = 1;					\
+	fprintf (FILE, "%s", SIZE_ASM_OP);				\
+	assemble_name (FILE, NAME);					\
+	fprintf (FILE, "," HOST_WIDE_INT_PRINT_DEC "\n",		\
+	int_size_in_bytes (TREE_TYPE (DECL)));				\
+      }									\
+    microblaze_declare_object (FILE, NAME, "", ":\n", 0);			\
+  } while (0)
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
+do {									 \
+     const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
+     if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		 \
+         && ! AT_END && TOP_LEVEL					 \
+	 && DECL_INITIAL (DECL) == error_mark_node			 \
+	 && !size_directive_output)					 \
+       {								 \
+	 size_directive_output = 1;					 \
+	 fprintf (FILE, "%s", SIZE_ASM_OP);			         \
+	 assemble_name (FILE, name);					 \
+	 fprintf (FILE, "," HOST_WIDE_INT_PRINT_DEC "\n",		 \
+		  int_size_in_bytes (TREE_TYPE (DECL)));		 \
+       }								 \
+   } while (0)
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)                            \
+ do { fputc ( '\t', FILE);                                            \
+      assemble_name (FILE, LABEL1);                                   \
+      fputs ( " = ", FILE);                                           \
+      assemble_name (FILE, LABEL2);                                   \
+      fputc ( '\n', FILE);                                            \
+ } while (0)
+
+#define ASM_WEAKEN_LABEL(FILE,NAME) 					\
+ do { fputs ("\t.weakext\t", FILE);					\
+      assemble_name (FILE, NAME);					\
+      fputc ('\n', FILE);						\
+    } while (0)
+
+#define MAKE_DECL_ONE_ONLY(DECL)	(DECL_WEAK (DECL) = 1)
+#undef UNIQUE_SECTION_P
+#define UNIQUE_SECTION_P(DECL)		(DECL_ONE_ONLY (DECL))
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION        default_elf_asm_named_section
+
+/* Define the strings to put out for each section in the object file.  
+   
+   Note: For ctors/dtors, we want to give these sections the SHF_WRITE 
+   attribute to allow shared libraries to patch/resolve addresses into 
+   these locations.  On Microblaze, there is no concept of shared libraries 
+   yet, so this is for future use.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define READONLY_DATA_SECTION_ASM_OP    \
+                                "\t.rodata"
+#define BSS_SECTION_ASM_OP      "\t.bss"
+#define CTORS_SECTION_ASM_OP    "\t.section\t.ctors,\"aw\""
+#define DTORS_SECTION_ASM_OP    "\t.section\t.dtors,\"aw\""
+#define INIT_SECTION_ASM_OP     "\t.section\t.init,\"ax\""
+#define FINI_SECTION_ASM_OP     "\t.section\t.fini,\"ax\""
+
+#define SDATA_SECTION_ASM_OP	"\t.sdata"	/* Small RW initialized data   */
+#define SDATA2_SECTION_ASM_OP	"\t.sdata2"	/* Small RO initialized data   */
+#define SBSS_SECTION_ASM_OP     "\t.sbss"	/* Small RW uninitialized data */
+#define SBSS2_SECTION_ASM_OP    "\t.sbss2"	/* Small RO uninitialized data */
+
+/* We do this to save a few 10s of code space that would be taken up
+   by the call_FUNC () wrappers, used by the generic CRT_CALL_STATIC_FUNCTION
+   definition in crtstuff.c.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+    asm ( SECTION_OP "\n"                               \
+          "\tbrlid   r15, " #FUNC "\n\t nop\n"         \
+          TEXT_SECTION_ASM_OP);
+
+/* We need to group -lm as well, since some Newlib math functions 
+   reference __errno!  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+"%{!nostdlib: \
+%{pg:-start-group -lxilprofile -lgloss -lxil -lc -lm -end-group } \
+%{!pg:-start-group -lgloss -lxil -lc -lm -end-group }} "
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#define STARTFILE_EXECUTABLE_SPEC   "crt0.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_XMDSTUB_SPEC      "crt1.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_BOOTSTRAP_SPEC    "crt2.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_NOVECTORS_SPEC    "crt3.o%s crti.o%s crtbegin.o%s"
+#define STARTFILE_CRTINIT_SPEC      "%{!pg: %{!mno-clearbss: crtinit.o%s} \
+%{mno-clearbss: sim-crtinit.o%s}} \
+%{pg: %{!mno-clearbss: pgcrtinit.o%s} %{mno-clearbss: sim-pgcrtinit.o%s}}"
+
+#define STARTFILE_DEFAULT_SPEC      STARTFILE_EXECUTABLE_SPEC
+
+#undef SUBTARGET_EXTRA_SPECS
+#define	SUBTARGET_EXTRA_SPECS						\
+  { "startfile_executable",	STARTFILE_EXECUTABLE_SPEC },		\
+  { "startfile_xmdstub",	STARTFILE_XMDSTUB_SPEC },		\
+  { "startfile_bootstrap",	STARTFILE_BOOTSTRAP_SPEC },		\
+  { "startfile_novectors",	STARTFILE_NOVECTORS_SPEC },		\
+  { "startfile_crtinit",        STARTFILE_CRTINIT_SPEC },               \
+  { "startfile_default",	STARTFILE_DEFAULT_SPEC },
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC  "\
+%{Zxl-mode-executable   : %(startfile_executable)  ; \
+  mxl-mode-executable   : %(startfile_executable)  ; \
+  Zxl-mode-xmdstub      : %(startfile_xmdstub)     ; \
+  mxl-mode-xmdstub      : %(startfile_xmdstub)     ; \
+  Zxl-mode-bootstrap    : %(startfile_bootstrap)   ; \
+  mxl-mode-bootstrap    : %(startfile_bootstrap)   ; \
+  Zxl-mode-novectors    : %(startfile_novectors)   ; \
+  mxl-mode-novectors    : %(startfile_novectors)   ; \
+  Zxl-mode-xilkernel    : %(startfile_xilkernel)   ; \
+  mxl-mode-xilkernel    : %(startfile_xilkernel)   ; \
+                        : %(startfile_default)       \
+} \
+%(startfile_crtinit)"
diff --git a/gcc/config/microblaze/microblaze.md b/gcc/config/microblaze/microblaze.md
new file mode 100644
index 000000000..19b77f9bc
--- /dev/null
+++ b/gcc/config/microblaze/microblaze.md
@@ -0,0 +1,2231 @@
+;; microblaze.md -- Machine description for Xilinx MicroBlaze processors.
+;; Copyright 2009, 2010 Free Software Foundation, Inc.
+
+;; Contributed by Michael Eager <eager@eagercon.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(include "constraints.md")
+(include "predicates.md")
+
+;;----------------------------------------------------
+;; Constants
+;;----------------------------------------------------
+(define_constants [
+  (R_SP        1)       ;; Stack pointer reg
+  (R_SR       15)       ;; Sub-routine return addr reg
+  (R_IR       14)       ;; Interrupt return addr reg
+  (R_DR       16)       ;; Debug trap return addr reg
+  (R_ER       17)       ;; Exception return addr reg
+  (R_TMP      18)       ;; Assembler temporary reg
+  (R_GOT      20)       ;; GOT ptr reg
+  (MB_PIPE_3   0)       ;; Microblaze 3-stage pipeline 
+  (MB_PIPE_5   1)       ;; Microblaze 5-stage pipeline 
+  (UNSPEC_SET_GOT       101)    ;;
+  (UNSPEC_GOTOFF        102)    ;; GOT offset
+  (UNSPEC_PLT           103)    ;; jump table
+  (UNSPEC_CMP		104)    ;; signed compare
+  (UNSPEC_CMPU		105)    ;; unsigned compare
+])
+
+
+;;----------------------------------------------------
+;; Instruction Attributes
+;;----------------------------------------------------
+
+;; Classification of each insn.
+;; branch	conditional branch
+;; jump		unconditional jump
+;; call		unconditional call
+;; load		load instruction(s)
+;; store	store instruction(s)
+;; move		data movement within same register set
+;; arith	integer arithmetic instruction
+;; darith	double precision integer arithmetic instructions
+;; imul		integer multiply
+;; idiv		integer divide
+;; icmp		integer compare
+;; Xfadd		floating point add/subtract
+;; Xfmul		floating point multiply
+;; Xfmadd	floating point multiply-add
+;; Xfdiv		floating point divide
+;; Xfabs		floating point absolute value
+;; Xfneg		floating point negation
+;; Xfcmp		floating point compare
+;; Xfcvt		floating point convert
+;; Xfsqrt	floating point square root
+;; multi	multiword sequence (or user asm statements)
+;; nop		no operation
+;; bshift 	Shift operations
+
+(define_attr "type"
+  "unknown,branch,jump,call,load,store,move,arith,darith,imul,idiv,icmp,multi,nop,no_delay_arith,no_delay_load,no_delay_store,no_delay_imul,no_delay_move,bshift,fadd,frsub,fmul,fdiv,fcmp,fsl,fsqrt,fcvt"
+  (const_string "unknown"))
+
+;; Main data type used by the insn
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,SF,DF" (const_string "unknown"))
+
+;; # instructions (4 bytes each)
+(define_attr "length" "" (const_int 4))
+
+;;----------------------------------------------------
+;; Attribute describing the processor.  
+;;----------------------------------------------------
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+;; whether or not generating calls to position independent functions
+(define_attr "abicalls" "no,yes"
+  (const (symbol_ref "microblaze_abicalls_attr")))
+
+;;----------------------------------------------------------------
+;; Microblaze DFA Pipeline description
+;;----------------------------------------------------------------
+                  
+;;-----------------------------------------------------------------
+/*
+   This is description of pipeline hazards based on DFA.  The
+   following constructions can be used for this:
+
+   o define_cpu_unit string [string]) describes a cpu functional unit
+     (separated by comma).
+
+     1st operand: Names of cpu function units.
+     2nd operand: Name of automaton (see comments for
+     DEFINE_AUTOMATON).
+
+     All define_reservations and define_cpu_units should have unique
+     names which can not be "nothing".
+
+   o (exclusion_set string string) means that each CPU function unit
+     in the first string can not be reserved simultaneously with each
+     unit whose name is in the second string and vise versa.  CPU
+     units in the string are separated by commas. For example, it is
+     useful for description CPU with fully pipelined floating point
+     functional unit which can execute simultaneously only single
+     floating point insns or only double floating point insns.
+
+   o (presence_set string string) means that each CPU function unit in
+     the first string can not be reserved unless at least one of units
+     whose names are in the second string is reserved.  This is an
+     asymmetric relation.  CPU units in the string are separated by
+     commas.  For example, it is useful for description that slot1 is
+     reserved after slot0 reservation for a VLIW processor.
+
+   o (absence_set string string) means that each CPU function unit in
+     the first string can not be reserved only if each unit whose name
+     is in the second string is not reserved.  This is an asymmetric
+     relation (actually exclusion set is analogous to this one but it
+     is symmetric).  CPU units in the string are separated by commas.
+     For example, it is useful for description that slot0 can not be
+     reserved after slot1 or slot2 reservation for a VLIW processor.
+
+   o (define_bypass number out_insn_names in_insn_names) names bypass with
+     given latency (the first number) from insns given by the first
+     string (see define_insn_reservation) into insns given by the
+     second string.  Insn names in the strings are separated by
+     commas.
+
+   o (define_automaton string) describes names of an automaton
+     generated and used for pipeline hazards recognition.  The names
+     are separated by comma.  Actually it is possibly to generate the
+     single automaton but unfortunately it can be very large.  If we
+     use more one automata, the summary size of the automata usually
+     is less than the single one.  The automaton name is used in
+     define_cpu_unit.  All automata should have unique names.
+
+   o (define_reservation string string) names reservation (the first
+     string) of cpu functional units (the 2nd string).  Sometimes unit
+     reservations for different insns contain common parts.  In such
+     case, you describe common part and use one its name (the 1st
+     parameter) in regular expression in define_insn_reservation.  All
+     define_reservations, define results and define_cpu_units should
+     have unique names which can not be "nothing".
+
+   o (define_insn_reservation name default_latency condition regexpr)
+     describes reservation of cpu functional units (the 3nd operand)
+     for instruction which is selected by the condition (the 2nd
+     parameter).  The first parameter is used for output of debugging
+     information.  The reservations are described by a regular
+     expression according the following syntax:
+
+       regexp = regexp "," oneof
+              | oneof
+
+       oneof = oneof "|" allof
+             | allof
+
+       allof = allof "+" repeat
+             | repeat
+
+       repeat = element "*" number
+              | element
+
+       element = cpu_function_name
+               | reservation_name
+               | result_name
+               | "nothing"
+               | "(" regexp ")"
+
+       1. "," is used for describing start of the next cycle in
+          reservation.
+
+       2. "|" is used for describing the reservation described by the
+          first regular expression *or* the reservation described by
+          the second regular expression *or* etc.
+
+       3. "+" is used for describing the reservation described by the
+          first regular expression *and* the reservation described by
+          the second regular expression *and* etc.
+
+       4. "*" is used for convenience and simply means sequence in
+          which the regular expression are repeated NUMBER times with
+          cycle advancing (see ",").
+
+       5. cpu function unit name which means reservation.
+
+       6. reservation name -- see define_reservation.
+
+       7. string "nothing" means no units reservation.
+
+*/
+;;-----------------------------------------------------------------
+
+
+;;----------------------------------------------------------------
+;; Microblaze 5-stage pipeline description (v5.00.a and later)
+;;----------------------------------------------------------------                 
+                    
+(define_automaton   "mbpipe_5")
+(define_cpu_unit    "mb_issue,mb_iu,mb_wb,mb_fpu,mb_fpu_2,mb_mul,mb_mul_2,mb_div,mb_div_2,mb_bs,mb_bs_2" "mbpipe_5")
+
+(define_insn_reservation "mb-integer" 1 
+  (and (eq_attr "type" "branch,jump,call,arith,darith,icmp,nop,no_delay_arith")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu,mb_wb")
+
+(define_insn_reservation "mb-special-move" 2
+  (and (eq_attr "type" "move")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu*2,mb_wb")
+
+(define_insn_reservation "mb-mem-load" 3
+  (and (eq_attr "type" "load,no_delay_load")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu,mb_wb")
+
+(define_insn_reservation "mb-mem-store" 1
+  (and (eq_attr "type" "store,no_delay_store")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_iu,mb_wb")
+
+(define_insn_reservation "mb-mul" 3
+  (and (eq_attr "type" "imul,no_delay_imul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_mul,mb_mul_2*2,mb_wb")
+
+(define_insn_reservation "mb-div" 34            
+  (and (eq_attr "type" "idiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+    "mb_issue,mb_div,mb_div_2*33,mb_wb")
+
+(define_insn_reservation "mb-bs" 2 
+  (and (eq_attr "type" "bshift")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+   "mb_issue,mb_bs,mb_bs_2,mb_wb")
+
+(define_insn_reservation "mb-fpu-add-sub-mul" 6
+  (and (eq_attr "type" "fadd,frsub,fmul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*5,mb_wb")
+
+(define_insn_reservation "mb-fpu-fcmp" 3
+  (and (eq_attr "type" "fcmp")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu*2,mb_wb")
+
+(define_insn_reservation "mb-fpu-div" 30
+  (and (eq_attr "type" "fdiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*29,mb_wb")
+
+(define_insn_reservation "mb-fpu-sqrt" 30
+  (and (eq_attr "type" "fsqrt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*29,mb_wb")
+
+(define_insn_reservation "mb-fpu-fcvt" 4
+  (and (eq_attr "type" "fcvt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_5)))
+  "mb_issue,mb_fpu,mb_fpu_2*3,mb_wb")
+
+;;----------------------------------------------------------------
+;; Microblaze 3-stage pipeline description (for v4.00.a and earlier)
+;;----------------------------------------------------------------
+
+(define_automaton   "mbpipe_3")
+(define_cpu_unit    "mb3_iu" "mbpipe_3")
+
+(define_insn_reservation "mb3-integer" 1 
+  (and (eq_attr "type" "branch,jump,call,arith,darith,icmp,nop,no_delay_arith")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-special-move" 2
+  (and (eq_attr "type" "move")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu*2")
+
+(define_insn_reservation "mb3-mem-load" 2
+  (and (eq_attr "type" "load,no_delay_load")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-mem-store" 1
+  (and (eq_attr "type" "store,no_delay_store")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-mul" 3
+  (and (eq_attr "type" "imul,no_delay_imul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-div" 34            
+  (and (eq_attr "type" "idiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+    "mb3_iu")
+
+(define_insn_reservation "mb3-bs" 2 
+  (and (eq_attr "type" "bshift")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+   "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-add-sub-mul" 6
+  (and (eq_attr "type" "fadd,frsub,fmul")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-fcmp" 3
+  (and (eq_attr "type" "fcmp")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-div" 30
+  (and (eq_attr "type" "fdiv")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-sqrt" 30
+  (and (eq_attr "type" "fsqrt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(define_insn_reservation "mb3-fpu-fcvt" 4
+  (and (eq_attr "type" "fcvt")
+       (eq (symbol_ref  "microblaze_pipe") (const_int MB_PIPE_3)))
+  "mb3_iu")
+
+(automata_option "v")
+(automata_option "time")
+(automata_option "progress")
+
+;;----------------------------------------------------------------
+;; Microblaze delay slot description
+;;----------------------------------------------------------------
+(define_delay (eq_attr "type" "branch,call,jump")
+  [(and (eq_attr "type" "!branch,call,jump,icmp,multi,no_delay_arith,no_delay_load,no_delay_store,no_delay_imul,no_delay_move,darith") 
+        (ior (eq (symbol_ref "microblaze_no_unsafe_delay") (const_int 0))
+             (eq_attr "type" "!fadd,frsub,fmul,fdiv,fcmp,store,load")
+             ))
+  (nil) (nil)])
+
+
+;;----------------------------------------------------------------
+;; Microblaze FPU
+;;----------------------------------------------------------------
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (plus:SF (match_operand:SF 1 "register_operand" "d")
+                 (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "fadd\t%0,%1,%2"
+  [(set_attr "type"     "fadd")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (minus:SF (match_operand:SF 1 "register_operand" "d")
+                  (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "frsub\t%0,%2,%1"
+  [(set_attr "type"     "frsub")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (mult:SF (match_operand:SF 1 "register_operand" "d")
+                 (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "fmul\t%0,%1,%2"
+  [(set_attr "type"     "fmul")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (div:SF (match_operand:SF 1 "register_operand" "d")
+                (match_operand:SF 2 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "fdiv\t%0,%2,%1"
+  [(set_attr "type"     "fdiv")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (sqrt:SF (match_operand:SF 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT_SQRT"
+  "fsqrt\t%0,%1"
+  [(set_attr "type"     "fsqrt")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=d")
+        (float:SF (match_operand:SI 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT_CONVERT"
+  "flt\t%0,%1"
+  [(set_attr "type"     "fcvt")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (fix:SI (match_operand:SF 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT_CONVERT"
+  "fint\t%0,%1"
+  [(set_attr "type"     "fcvt")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4")])
+
+;;----------------------------------------------------------------
+;; Add
+;;----------------------------------------------------------------
+
+;; Add 2 SImode integers [ src1 = reg ; src2 = arith ; dest = reg ]
+;; Leave carry as is
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%dJ,dJ,dJ")
+		 (match_operand:SI 2 "arith_operand" "d,I,i")))]
+  ""
+  "@
+   addk\t%0,%z1,%2
+   addik\t%0,%z1,%2
+   addik\t%0,%z1,%2"
+  [(set_attr "type"	"arith,arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"4,4,8")])
+
+;;----------------------------------------------------------------
+;; Double Precision Additions
+;;----------------------------------------------------------------
+
+;; reg_DI_dest = reg_DI_src1 + DI_src2
+
+;; Adding 2 DI operands in register or reg/imm
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(plus:DI (match_operand:DI 1 "register_operand" "%d,d,d")
+		 (match_operand:DI 2 "arith_operand32" "d,P,N")))]
+  ""
+  "@
+  add\t%L0,%L1,%L2\;addc\t%M0,%M1,%M2
+  addi\t%L0,%L1,%2\;addc\t%M0,%M1,r0
+  addi\t%L0,%L1,%2\;addc\t%M0,%M1,r0\;addi\t%M0,%M0,-1"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"8,8,12")])
+
+;;----------------------------------------------------------------
+;; Subtraction
+;;----------------------------------------------------------------
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(minus:SI (match_operand:SI 1 "arith_operand" "d,d")
+		  (match_operand:SI 2 "arith_operand" "d,n")))]
+  ""
+  "@
+   rsubk\t%0,%2,%z1
+   addik\t%0,%z1,-%2"
+  [(set_attr "type"	"arith,no_delay_arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4,8")])
+
+
+;;----------------------------------------------------------------
+;; Double Precision Subtraction
+;;----------------------------------------------------------------
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+	(minus:DI (match_operand:DI 1 "register_operand" "d")
+		  (match_operand:DI 2 "arith_operand32" "d")))]
+  ""
+  "@
+   rsub\t%L0,%L2,%L1\;rsubc\t%M0,%M2,%M1"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"8")])
+
+
+;;----------------------------------------------------------------
+;; Multiplication
+;;----------------------------------------------------------------
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(mult:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		 (match_operand:SI 2 "arith_operand" "d,I,i")))]
+  "!TARGET_SOFT_MUL"
+  "@
+  mul\t%0,%1,%2
+  muli\t%0,%1,%2
+  muli\t%0,%1,%2"
+  [(set_attr "type"	"imul,imul,no_delay_imul")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4,4,8")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (mult:DI
+         (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
+         (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mul\t%L0,%1,%2\;mulh\t%M0,%1,%2"
+  [(set_attr "type"     "no_delay_arith")
+   (set_attr "mode"     "DI")
+   (set_attr "length"   "8")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (mult:DI
+         (zero_extend:DI (match_operand:SI 1 "register_operand" "d"))
+         (zero_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mul\t%L0,%1,%2\;mulhu\t%M0,%1,%2"
+  [(set_attr "type"     "no_delay_arith")
+   (set_attr "mode"     "DI")
+   (set_attr "length"   "8")])
+
+(define_insn "usmulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (mult:DI
+         (zero_extend:DI (match_operand:SI 1 "register_operand" "d"))
+         (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mul\t%L0,%1,%2\;mulhsu\t%M0,%2,%1"
+  [(set_attr "type"     "no_delay_arith")
+   (set_attr "mode"     "DI")
+   (set_attr "length"   "8")])
+
+(define_insn "*smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"  "d"))
+                   (sign_extend:DI (match_operand:SI 2 "register_operand"  "d")))
+          (const_int 32))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mulh\t%0,%1,%2"
+  [(set_attr "type"     "imul")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+(define_insn "*umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                            "=d")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"  "d"))
+                   (zero_extend:DI (match_operand:SI 2 "register_operand"  "d"))
+)
+          (const_int 32))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mulhu\t%0,%1,%2"
+  [(set_attr "type"     "imul")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+(define_insn "*usmulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                            "=d")
+        (truncate:SI
+         (lshiftrt:DI
+          (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"  "d"))
+                   (sign_extend:DI (match_operand:SI 2 "register_operand"  "d"))
+)
+          (const_int 32))))]
+  "!TARGET_SOFT_MUL && TARGET_MULTIPLY_HIGH"
+  "mulhsu\t%0,%2,%1"
+  [(set_attr "type"     "imul")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4")])
+
+
+;;----------------------------------------------------------------
+;; Division and remainder
+;;----------------------------------------------------------------
+(define_expand "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(div:SI (match_operand:SI 1 "register_operand" "d")
+                (match_operand:SI 2 "register_operand" "d")))
+  ]
+  "(!TARGET_SOFT_DIV) || (TARGET_BARREL_SHIFT && TARGET_SMALL_DIVIDES)"
+  {
+    if (TARGET_SOFT_DIV && TARGET_BARREL_SHIFT && TARGET_SMALL_DIVIDES) 
+      { 
+        microblaze_expand_divide (operands);
+        DONE;
+      } 
+    else if (!TARGET_SOFT_DIV) 
+      {
+        emit_insn (gen_divsi3_internal (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  }     
+)
+
+
+(define_insn "divsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(div:SI (match_operand:SI 1 "register_operand" "d")
+		(match_operand:SI 2 "register_operand" "d")))
+  ]
+  "!TARGET_SOFT_DIV"
+  "idiv\t%0,%2,%1"
+  [(set_attr "type"	"idiv")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(udiv:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))
+  ]
+  "!TARGET_SOFT_DIV"
+  "idivu\t%0,%2,%1"
+  [(set_attr "type"	"idiv")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+
+;;----------------------------------------------------------------
+;; Negation and one's complement
+;;----------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "rsubk\t%0,%1,r0"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(neg:DI (match_operand:DI 1 "register_operand" "d")))]
+  ""
+  "rsub\t%L0,%L1,r0\;rsubc\t%M0,%M1,r0"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"8")])
+
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(not:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+  "xori\t%0,%1,-1"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "*one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(not:DI (match_operand:DI 1 "register_operand" "d")))]
+  ""
+  "nor\t%M0,r0,%M1\;nor\t%L0,r0,%L1"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"    "8")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(not:DI (match_operand:DI 1 "register_operand" "")))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))"
+
+  [(set (subreg:SI (match_dup 0) 0) (not:SI (subreg:SI (match_dup 1) 0)))
+  (set (subreg:SI (match_dup 0) 4) (not:SI (subreg:SI (match_dup 1) 4)))]
+  "")
+
+
+;;----------------------------------------------------------------
+;; Logical
+;;----------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+	(and:SI (match_operand:SI 1 "arith_operand" "%d,d,d,d")
+		(match_operand:SI 2 "arith_operand" "d,I,i,M")))]
+  ""
+  "@
+   and\t%0,%1,%2
+   andi\t%0,%1,%2 #and1
+   andi\t%0,%1,%2 #and2
+   andi\t%0,%1,%2 #and3"
+  [(set_attr "type"	"arith,arith,no_delay_arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI,SI")
+  (set_attr "length"	"4,8,8,8")])
+
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(and:DI (match_operand:DI 1 "register_operand" "d")
+		(match_operand:DI 2 "register_operand" "d")))]
+  ""
+  "and\t%M0,%M1,%M2\;and\t%L0,%L1,%L2"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"    "8")])
+
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && GET_CODE (operands[2]) == REG && GP_REG_P (REGNO (operands[2]))"
+
+  [(set (subreg:SI (match_dup 0) 0) (and:SI (subreg:SI (match_dup 1) 0) 
+					    (subreg:SI (match_dup 2) 0)))
+  (set (subreg:SI (match_dup 0) 4) (and:SI (subreg:SI (match_dup 1) 4) 
+					   (subreg:SI (match_dup 2) 4)))]
+  "")
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+	(ior:SI (match_operand:SI 1 "arith_operand" "%d,d,d,d")
+		(match_operand:SI 2 "arith_operand" "d,I,M,i")))]
+  ""
+  "@
+   or\t%0,%1,%2
+   ori\t%0,%1,%2
+   ori\t%0,%1,%2
+   ori\t%0,%1,%2" 
+  [(set_attr "type"	"arith,no_delay_arith,no_delay_arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI,SI")
+  (set_attr "length"	"4,8,8,8")])
+
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ior:DI (match_operand:DI 1 "register_operand" "d")
+		(match_operand:DI 2 "register_operand" "d")))]
+  ""
+  "or\t%M0,%M1,%M2\;or\t%L0,%L1,%L2"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"    "8")]
+)
+
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ior:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && GET_CODE (operands[2]) == REG && GP_REG_P (REGNO (operands[2]))"
+
+  [(set (subreg:SI (match_dup 0) 0) (ior:SI (subreg:SI (match_dup 1) 0) 
+					    (subreg:SI (match_dup 2) 0)))
+  (set (subreg:SI (match_dup 0) 4) (ior:SI (subreg:SI (match_dup 1) 4) 
+					   (subreg:SI (match_dup 2) 4)))]
+  "")
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(xor:SI (match_operand:SI 1 "arith_operand" "%d,d,d")
+		(match_operand:SI 2 "arith_operand" "d,I,i")))]
+  ""
+  "@
+   xor\t%0,%1,%2
+   xori\t%0,%1,%2
+   xori\t%0,%1,%2"
+  [(set_attr "type"	"arith,arith,no_delay_arith")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"4,8,8")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(xor:DI (match_operand:DI 1 "register_operand" "d")
+		(match_operand:DI 2 "register_operand" "d")))]
+  ""
+  "xor\t%M0,%M1,%M2\;xor\t%L0,%L1,%L2"
+  [(set_attr "type"	"darith")
+  (set_attr "mode"	"DI")
+  (set_attr "length"    "8")]
+)
+
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && GET_CODE (operands[2]) == REG && GP_REG_P (REGNO (operands[2]))"
+
+  [(set (subreg:SI (match_dup 0) 0) (xor:SI (subreg:SI (match_dup 1) 0) 
+					    (subreg:SI (match_dup 2) 0)))
+  (set (subreg:SI (match_dup 0) 4) (xor:SI (subreg:SI (match_dup 1) 4) 
+					   (subreg:SI (match_dup 2) 4)))]
+  "")
+
+;;----------------------------------------------------------------
+;; Zero extension
+;;----------------------------------------------------------------
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "@
+  andi\t%0,%1,0xffff
+  lhu%i1\t%0,%1
+  lhu%i1\t%0,%1"
+  [(set_attr "type"	"no_delay_arith,load,no_delay_load")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"8,4,8")])
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d,d")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "@
+  andi\t%0,%1,0x00ff
+  lbu%i1\t%0,%1
+  lbu%i1\t%0,%1"
+  [(set_attr "type"	"arith,load,no_delay_load")
+  (set_attr "mode"	"HI")
+  (set_attr "length"	"4,4,8")])
+  
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  "@
+  andi\t%0,%1,0x00ff
+  lbu%i1\t%0,%1
+  lbu%i1\t%0,%1"
+  [(set_attr "type"	"arith,load,no_delay_load")
+  (set_attr "mode"	"SI,SI,SI")
+  (set_attr "length"	"4,4,8")])
+
+;;----------------------------------------------------------------
+;; Sign extension
+;;----------------------------------------------------------------
+
+;; basic Sign Extend Operations
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "d")))]
+  ""
+  "sext8\t%0,%1"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "d")))]
+  ""
+  "sext16\t%0,%1"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+;; Those for integer source operand are ordered
+;; widest source type first.
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,R,m")))]
+  ""
+  { 
+     if (which_alternative == 0)
+       output_asm_insn ("addk\t%D0,r0,%1", operands);
+     else
+       output_asm_insn ("lw%i1\t%D0,%1", operands);
+
+     output_asm_insn ("add\t%0,%D0,%D0", operands);
+     output_asm_insn ("addc\t%0,r0,r0", operands);
+     output_asm_insn ("beqi\t%0,.+8", operands);
+     return "addi\t%0,r0,0xffffffff";
+  }
+  [(set_attr "type"	"multi,multi,multi")
+  (set_attr "mode"	"DI")
+  (set_attr "length"	"20,20,20")])
+
+;;----------------------------------------------------------------
+;; Data movement
+;;----------------------------------------------------------------
+
+;; 64-bit integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  {
+    /* If operands[1] is a constant address illegal for pic, then we need to
+       handle it just like microblaze_legitimize_address does.  */
+    if (flag_pic && pic_address_needs_scratch (operands[1]))
+    {
+        rtx temp = force_reg (DImode, XEXP (XEXP (operands[1], 0), 0));
+        rtx temp2 = XEXP (XEXP (operands[1], 0), 1);
+        emit_move_insn (operands[0], gen_rtx_PLUS (DImode, temp, temp2));
+        DONE;
+    }
+
+
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], DImode)
+        && !register_operand (operands[1], DImode)
+        && (((GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+	       && operands[1] != CONST0_RTX (DImode))))
+    {
+
+      rtx temp = force_reg (DImode, operands[1]);
+      emit_move_insn (operands[0], temp);
+      DONE;
+    }
+  }
+)
+
+
+
+(define_insn "*movdi_internal"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+	(match_operand:DI 1 "general_operand"      " d,i,J,R,m,d,d"))]
+  ""
+  { 
+    switch (which_alternative)
+    {
+      case 0:
+        return "addk\t%0,%1\n\taddk\t%D0,%d1";
+      case 1:
+	return "addik\t%0,r0,%h1\n\taddik\t%D0,r0,%j1 #li => la";
+      case 2:
+	  return "addk\t%0,r0,r0\n\taddk\t%D0,r0,r0";
+      case 3:
+      case 4:
+        if (reg_mentioned_p (operands[0], operands[1]))
+          return "lwi\t%D0,%o1\n\tlwi\t%0,%1";
+	else
+	  return "lwi\t%0,%1\n\tlwi\t%D0,%o1";
+      case 5:
+      case 6:
+        return "swi\t%1,%0\n\tswi\t%D1,%o0";
+    }
+    return "unreachable";
+  }
+  [(set_attr "type"	"no_delay_move,no_delay_arith,no_delay_arith,no_delay_load,no_delay_load,no_delay_store,no_delay_store")
+  (set_attr "mode"	"DI")
+  (set_attr "length"   "8,8,8,8,12,8,12")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1])) 
+   && (REGNO(operands[0]) == (REGNO(operands[1]) + 1))"
+
+  [(set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))
+  (set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))]
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))]
+  "reload_completed 
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1])) 
+   && (REGNO (operands[0]) != (REGNO (operands[1]) + 1))"
+
+  [(set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))
+  (set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))]
+  "")
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  {
+    if (microblaze_expand_move (SImode, operands)) DONE;
+  }
+)
+
+;; Added for status resgisters 
+(define_insn "movsi_status"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,z")
+        (match_operand:SI 1 "register_operand" "z,d,d"))]
+  "interrupt_handler"
+  "@
+	mfs\t%0,%1  #mfs
+	addk\t%0,%1,r0 #add movsi
+	mts\t%0,%1  #mts"	
+  [(set_attr "type" "move")
+  (set_attr "mode" "SI")
+  (set_attr "length" "12")])
+
+;; This move will be not be moved to delay slot.	
+(define_insn "*movsi_internal3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d")
+	(match_operand:SI 1 "immediate_operand" "J,I,Mnis"))]
+  "(register_operand (operands[0], SImode) && 
+           (GET_CODE (operands[1]) == CONST_INT && 
+                 (INTVAL (operands[1]) <= 32767 && INTVAL (operands[1]) >= -32768)))"  
+  "@
+   addk\t%0,r0,r0
+   addik\t%0,r0,%1\t# %X1
+   addik\t%0,r0,%1\t# %X1"
+  [(set_attr "type"	"arith,arith,no_delay_arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+;; This move may be used for PLT label operand
+(define_insn "*movsi_internal5_pltop"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(match_operand:SI 1 "call_insn_operand" ""))]
+  "(register_operand (operands[0], Pmode) && 
+           PLT_ADDR_P (operands[1]))"
+  { 
+     gcc_unreachable ();
+  }
+  [(set_attr "type"	"load")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "*movsi_internal2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,   d,d,R, T")
+	(match_operand:SI 1 "move_operand"         " d,I,Mnis,R,m,dJ,dJ"))]
+  "(register_operand (operands[0], SImode)
+    || register_operand (operands[1], SImode) 
+    || (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) == 0))
+    && (flag_pic != 2 || (GET_CODE (operands[1]) != SYMBOL_REF 
+                         && GET_CODE (operands[1]) != LABEL_REF))"
+  "@
+   addk\t%0,%1,r0
+   addik\t%0,r0,%1\t# %X1
+   addik\t%0,%a1
+   lw%i1\t%0,%1
+   lw%i1\t%0,%1
+   sw%i0\t%z1,%0
+   sw%i0\t%z1,%0"
+  [(set_attr "type"	"load,load,no_delay_load,load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4,4,8,4,8,4,8")])
+
+
+;; 16-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  {
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], HImode)
+        && !register_operand (operands[1], HImode)
+        && ((GET_CODE (operands[1]) != CONST_INT
+  	    || INTVAL (operands[1]) != 0)))
+    {
+        rtx temp = force_reg (HImode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+(define_insn "*movhi_internal2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,R,m")
+	(match_operand:HI 1 "general_operand"       "I,d,R,m,dJ,dJ"))]
+  ""
+  "@
+   addik\t%0,r0,%1\t# %X1
+   addk\t%0,%1,r0
+   lhui\t%0,%1
+   lhui\t%0,%1
+   sh%i0\t%z1,%0
+   sh%i0\t%z1,%0"
+  [(set_attr "type"	"arith,move,load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"	"HI")
+  (set_attr "length"	"4,4,4,8,8,8")])
+
+;; 8-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because BYTE_LOADS_ZERO_EXTEND is defined
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  {
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], QImode)
+        && !register_operand (operands[1], QImode)
+        && ((GET_CODE (operands[1]) != CONST_INT
+            || INTVAL (operands[1]) != 0)))
+    {
+        rtx temp = force_reg (QImode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+(define_insn "*movqi_internal2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+	(match_operand:QI 1 "general_operand"       "J,I,d,R,m,dJ,dJ"))]
+  ""
+  "@
+   addk\t%0,r0,%z1
+   addik\t%0,r0,%1\t# %X1
+   addk\t%0,%1,r0
+   lbu%i1\t%0,%1
+   lbu%i1\t%0,%1
+   sb%i0\t%z1,%0
+   sbi\t%z1,%0"
+  [(set_attr "type"	"arith,arith,move,load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"	"QI")
+  (set_attr "length"	"4,4,8,4,8,4,8")])
+
+;; Block moves, see microblaze.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+ 
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand")
+		   (match_operand:BLK 1 "general_operand"))
+	      (use (match_operand:SI 2 ""))
+	      (use (match_operand:SI 3 "const_int_operand"))])]
+  ""
+  {
+    if (microblaze_expand_block_move (operands[0], operands[1], 
+				      operands[2], operands[3]))
+        DONE;
+    else  
+        FAIL;
+  }
+)
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+        (match_operand:SF 1 "general_operand" ""))]
+  ""
+  {
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], SFmode)
+        && !register_operand (operands[1], SFmode)
+        && ( ((GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+                 && operands[1] != CONST0_RTX (SFmode))))
+    {
+        rtx temp = force_reg (SFmode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+;; Applies to both TARGET_SOFT_FLOAT and TARGET_HARD_FLOAT
+;;
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,d,d,d,R,m")
+        (match_operand:SF 1 "general_operand" "G,d,R,F,m,d,d"))]
+  "(register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode)
+       || operands[1] == CONST0_RTX (SFmode))"
+  "@
+   addk\t%0,r0,r0
+   addk\t%0,%1,r0
+   lw%i1\t%0,%1
+   addik\t%0,r0,%F1
+   lw%i1\t%0,%1
+   sw%i0\t%z1,%0
+   swi\t%z1,%0"
+  [(set_attr "type"     "move,no_delay_load,load,no_delay_load,no_delay_load,store,no_delay_store")
+  (set_attr "mode"      "SF")
+  (set_attr "length"    "4,4,4,4,4,4,4")])
+
+;; 64-bit floating point moves
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+        (match_operand:DF 1 "general_operand" ""))]
+  ""
+  {
+    if (flag_pic == 2) {
+      if (GET_CODE (operands[1]) == MEM 
+          && !microblaze_legitimate_address_p (DFmode, XEXP (operands[1],0), 0))
+      {
+        rtx ptr_reg;
+        rtx result;
+        ptr_reg = force_reg (Pmode, XEXP (operands[1],0));
+        result = gen_rtx_MEM (DFmode, ptr_reg);
+        emit_move_insn (operands[0], result);
+        DONE;
+      }
+    }
+    if ((reload_in_progress | reload_completed) == 0
+        && !register_operand (operands[0], DFmode)
+        && !register_operand (operands[1], DFmode)
+        && (((GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+                 && operands[1] != CONST0_RTX (DFmode))))
+    {
+        rtx temp = force_reg (DFmode, operands[1]);
+        emit_move_insn (operands[0], temp);
+        DONE;
+    }
+  }
+)
+
+;; movdf_internal
+;; Applies to both TARGET_SOFT_FLOAT and TARGET_HARD_FLOAT
+;;
+(define_insn "*movdf_internal"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,d,d,To")
+        (match_operand:DF 1 "general_operand" "dG,o,F,T,d"))]
+  ""
+  {
+    switch (which_alternative)
+    {
+      case 0:
+	return "addk\t%0,r0,r0\n\taddk\t%D0,r0,r0";
+      case 1:
+      case 3:
+	if (reg_mentioned_p (operands[0], operands[1]))
+          return "lwi\t%D0,%o1\n\tlwi\t%0,%1";
+        else
+	  return "lwi\t%0,%1\n\tlwi\t%D0,%o1";
+      case 2:
+      {
+	return "addik\t%0,r0,%h1 \n\taddik\t%D0,r0,%j1 #Xfer Lo";
+      }
+      case 4:
+	return "swi\t%1,%0\n\tswi\t%D1,%o0";
+    }
+    gcc_unreachable ();
+  }
+  [(set_attr "type"     "no_delay_move,no_delay_load,no_delay_load,no_delay_load,no_delay_store")
+  (set_attr "mode"      "DF")
+  (set_attr "length"    "4,8,8,16,8")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && (REGNO (operands[0]) == (REGNO (operands[1]) + 1))"
+  [(set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))
+  (set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))]
+  "")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && GET_CODE (operands[0]) == REG && GP_REG_P (REGNO (operands[0]))
+   && GET_CODE (operands[1]) == REG && GP_REG_P (REGNO (operands[1]))
+   && (REGNO (operands[0]) != (REGNO (operands[1]) + 1))"
+  [(set (subreg:SI (match_dup 0) 0) (subreg:SI (match_dup 1) 0))
+  (set (subreg:SI (match_dup 0) 4) (subreg:SI (match_dup 1) 4))]
+  "")
+
+;;----------------------------------------------------------------
+;; Shifts
+;;----------------------------------------------------------------
+
+;;----------------------------------------------------------------
+;; 32-bit left shifts
+;;----------------------------------------------------------------
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  { 
+    /* Avoid recursion for trivial cases. */
+    if (!((GET_CODE (operands [2]) == CONST_INT) && (INTVAL (operands[2]) == 1)))
+      if (microblaze_expand_shift (operands))
+        DONE;
+  }
+)
+
+;; Irrespective of if we have a barrel-shifter or not, we want to match 
+;; shifts by 1 with a special pattern. When a barrel shifter is present, 
+;; saves a cycle. If not, allows us to annotate the instruction for delay 
+;; slot optimization
+(define_insn "*ashlsi3_byone"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d")
+                   (match_operand:SI 2 "arith_operand"    "I")))] 
+  "(INTVAL (operands[2]) == 1)"
+  "addk\t%0,%1,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+;; Barrel shift left
+(define_insn "ashlsi3_bshift"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ashift:SI (match_operand:SI 1 "register_operand" "d,d")
+                   (match_operand:SI 2 "arith_operand"    "I,d")))]
+  "TARGET_BARREL_SHIFT"
+  "@
+  bslli\t%0,%1,%2
+  bsll\t%0,%1,%2"
+  [(set_attr "type"	"bshift,bshift")
+  (set_attr "mode"	"SI,SI")
+  (set_attr "length"	"4,4")]
+)
+
+;; The following patterns apply when there is no barrel shifter present
+
+(define_insn "*ashlsi3_with_mul_delay"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))] 
+  "!TARGET_SOFT_MUL 
+   && ((1 << INTVAL (operands[2])) <= 32767 && (1 << INTVAL (operands[2])) >= -32768)"
+  "muli\t%0,%1,%m2"
+  ;; This MUL will not generate an imm. Can go into a delay slot.
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+(define_insn "*ashlsi3_with_mul_nodelay"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))] 
+  "!TARGET_SOFT_MUL"
+  "muli\t%0,%1,%m2"
+  ;; This MUL will generate an IMM. Cannot go into a delay slot
+  [(set_attr "type"	"no_delay_arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"8")]
+)
+
+(define_insn "*ashlsi3_with_size_opt"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  "(INTVAL (operands[2]) > 5 && optimize_size)"
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+
+    output_asm_insn ("ori\t%3,r0,%2", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+        output_asm_insn ("addk\t%0,%1,r0", operands);
+
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "addk\t%0,%0,%0";
+  }
+  [(set_attr "type"    "multi")
+   (set_attr "mode"    "SI")
+   (set_attr "length"  "20")]
+)
+
+(define_insn "*ashlsi3_with_rotate"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  "(INTVAL (operands[2]) > 17 && !optimize_size)"
+  {
+    int i, nshift;
+    
+    nshift = INTVAL (operands[2]);
+    operands[3] = gen_int_mode (0xFFFFFFFF << nshift, SImode);
+
+    /* We do one extra shift so that the first bit (carry) coming into the MSB
+       will be masked out */
+    output_asm_insn ("src\t%0,%1", operands);
+    for (i = 0; i < (32 - nshift); i++)
+       output_asm_insn ("src\t%0,%0", operands);
+
+    return "andi\t%0,%0,%3";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "80")]
+)
+
+(define_insn "*ashlsi_inline"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  ""
+  {
+    int i;
+    int nshift = INTVAL (operands[2]);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    output_asm_insn ("addk\t%0,%1,%1", operands);
+    for (i = 0; i < (nshift - 2); i++)
+      output_asm_insn ("addk\t%0,%0,%0", operands);
+    return "addk\t%0,%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "124")]
+)
+
+(define_insn "*ashlsi_reg"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashift:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    output_asm_insn ("andi\t%3,%2,31", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1])) 
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    /* Exit the loop if zero shift. */
+    output_asm_insn ("beqid\t%3,.+20", operands);
+    /* Emit the loop.  */
+    output_asm_insn ("addk\t%0,%0,r0", operands);
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "addk\t%0,%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "28")]
+)
+
+
+;;----------------------------------------------------------------
+;; 32-bit right shifts
+;;----------------------------------------------------------------
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  {
+    /* Avoid recursion for trivial cases. */
+    if (!((GET_CODE (operands [2]) == CONST_INT) && (INTVAL (operands[2]) == 1)))
+      if (microblaze_expand_shift (operands))
+        DONE;
+  }
+)
+
+;; Irrespective of if we have a barrel-shifter or not, we want to match 
+;; shifts by 1 with a special pattern. When a barrel shifter is present, 
+;; saves a cycle. If not, allows us to annotate the instruction for delay 
+;; slot optimization
+(define_insn "*ashrsi3_byone"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand"    "I")))] 
+  "(INTVAL (operands[2]) == 1)"
+  "sra\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+;; Barrel shift right logical
+(define_insn "*ashrsi3_bshift"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand"    "I,d")))]
+  "TARGET_BARREL_SHIFT"
+  "@
+  bsrai\t%0,%1,%2
+  bsra\t%0,%1,%2"
+  [(set_attr "type"	"bshift,bshift")
+  (set_attr "mode"	"SI,SI")
+  (set_attr "length"	"4,4")]
+)
+
+(define_insn "*ashrsi_inline"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  ""
+  {
+    int i;
+    int nshift = INTVAL (operands[2]);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    output_asm_insn ("sra\t%0,%1", operands);
+    for (i = 0; i < (nshift - 2); i++)
+      output_asm_insn ("sra\t%0,%0", operands);
+    return "sra\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "124")]
+)
+
+(define_insn "*ashlri_reg"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (ashiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    output_asm_insn ("andi\t%3,%2,31", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1])) 
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    /* Exit the loop if zero shift. */
+    output_asm_insn ("beqid\t%3,.+20", operands);
+    /* Emit the loop.  */
+    output_asm_insn ("addk\t%0,%0,r0", operands);
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "sra\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "28")]
+)
+
+;;----------------------------------------------------------------
+;; 32-bit right shifts (logical)
+;;----------------------------------------------------------------
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  {
+    /* Avoid recursion for trivial cases. */
+    if (!((GET_CODE (operands [2]) == CONST_INT) && (INTVAL (operands[2]) == 1)))
+      if (microblaze_expand_shift (operands))
+        DONE;
+  }
+)
+
+;; Irrespective of if we have a barrel-shifter or not, we want to match 
+;; shifts by 1 with a special pattern. When a barrel shifter is present, 
+;; saves a cycle. If not, allows us to annotate the instruction for delay 
+;; slot optimization
+(define_insn "*lshrsi3_byone"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d")
+                     (match_operand:SI 2 "arith_operand"    "I")))] 
+  "(INTVAL (operands[2]) == 1)"
+  "srl\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)
+
+;; Barrel shift right logical
+(define_insn "*lshrsi3_bshift"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand"    "I,d")))]
+  "TARGET_BARREL_SHIFT"
+  "@
+  bsrli\t%0,%1,%2
+  bsrl\t%0,%1,%2"
+  [(set_attr "type"	"bshift,bshift")
+  (set_attr "mode"	"SI,SI")
+  (set_attr "length"	"4,4")]
+)
+
+(define_insn "*lshrsi_inline"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (lshiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "immediate_operand" "I")))]
+  ""
+  {
+    int i;
+    int nshift = INTVAL (operands[2]);
+    if (REGNO (operands[0]) != REGNO (operands[1]))
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    output_asm_insn ("srl\t%0,%1", operands);
+    for (i = 0; i < (nshift - 2); i++)
+      output_asm_insn ("srl\t%0,%0", operands);
+    return "srl\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "124")]
+)
+
+(define_insn "*lshlri_reg"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+       (lshiftrt:SI (match_operand:SI 1 "register_operand"  "d")
+                   (match_operand:SI 2 "register_operand" "d")))]
+  ""
+  {
+    operands[3] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    output_asm_insn ("andi\t%3,%2,31", operands);
+    if (REGNO (operands[0]) != REGNO (operands[1])) 
+      output_asm_insn ("addk\t%0,r0,%1", operands);
+    /* Exit the loop if zero shift. */
+    output_asm_insn ("beqid\t%3,.+20", operands);
+    /* Emit the loop.  */
+    output_asm_insn ("addk\t%0,%0,r0", operands);
+    output_asm_insn ("addik\t%3,%3,-1", operands);
+    output_asm_insn ("bneid\t%3,.-4", operands);
+    return "srl\t%0,%0";
+  }
+  [(set_attr "type"    "multi")
+  (set_attr "mode"     "SI")
+  (set_attr "length"   "28")]
+)
+
+;;----------------------------------------------------------------
+;; Setting a register from an integer comparison. 
+;;----------------------------------------------------------------
+(define_expand "cstoresi4"
+   [(set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator"
+	      [(match_operand:SI 2 "register_operand")
+	       (match_operand:SI 3 "register_operand")]))]
+  "TARGET_PATTERN_COMPARE"
+  "if (GET_CODE (operand1) != EQ && GET_CODE (operand1) != NE) 
+     FAIL;
+  "
+)
+
+(define_insn "seq_internal_pat" 
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(eq:SI 
+	       (match_operand:SI 1 "register_operand" "d")
+	       (match_operand:SI 2 "register_operand" "d")))]
+  "TARGET_PATTERN_COMPARE"
+  "pcmpeq\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4")]
+)              
+
+(define_insn "sne_internal_pat" 
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI 
+	       (match_operand:SI 1 "register_operand" "d")
+	       (match_operand:SI 2 "register_operand" "d")))]
+  "TARGET_PATTERN_COMPARE"
+  "pcmpne\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")]
+)              
+
+(define_insn "signed_compare"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec
+		[(match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "register_operand" "d")] UNSPEC_CMP))]
+  ""
+  "cmp\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+(define_insn "unsigned_compare"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec 
+		[(match_operand:SI 1 "register_operand" "d")
+		 (match_operand:SI 2 "register_operand" "d")] UNSPEC_CMPU))]
+  ""
+  "cmpu\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+  (set_attr "mode"	"SI")
+  (set_attr "length"	"4")])
+
+;;----------------------------------------------------------------
+;; Setting a register from an floating point comparison. 
+;;----------------------------------------------------------------
+(define_insn "cstoresf4"
+   [(set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator"
+	      [(match_operand:SF 2 "register_operand")
+	       (match_operand:SF 3 "register_operand")]))]
+  "TARGET_HARD_FLOAT"
+  "fcmp.%C1\t%0,%3,%2"
+  [(set_attr "type"     "fcmp")
+   (set_attr "mode"      "SF")
+   (set_attr "length"    "4")]
+)
+
+;;----------------------------------------------------------------
+;; Conditional branches
+;;----------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "register_operand")
+		        (match_operand:SI 2 "arith_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  microblaze_expand_conditional_branch (SImode, operands);
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator:SI 0 "ordered_comparison_operator"
+		       [(match_operand:SF 1 "register_operand")
+		        (match_operand:SF 2 "register_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  microblaze_expand_conditional_branch_sf (operands);
+  DONE;
+
+})
+
+;; Used to implement comparison instructions
+(define_expand "condjump"
+  [(set (pc)
+	(if_then_else (match_operand 0)
+		      (label_ref (match_operand 1))
+		      (pc)))])
+
+(define_insn "branch_zero"
+  [(set (pc)
+	(if_then_else (match_operator:SI 0 "ordered_comparison_operator"
+  				 [(match_operand:SI 1 "register_operand" "d")
+                                  (const_int 0)])
+                      (match_operand:SI 2 "pc_or_label_operand" "")
+                      (match_operand:SI 3 "pc_or_label_operand" "")))
+  ]
+  ""
+  {
+    if (operands[3] == pc_rtx) 
+      return "b%C0i%?\t%z1,%2";
+    else 
+      return "b%N0i%?\t%z1,%3";
+  }
+  [(set_attr "type"	"branch")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"4")]
+)
+
+;;----------------------------------------------------------------
+;; Unconditional branches
+;;----------------------------------------------------------------
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  {
+    if (GET_CODE (operands[0]) == REG)
+        return "br%?\t%0";
+    else	
+        return "bri%?\t%l0";
+  }
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "d"))]
+  ""
+  {
+    rtx dest = operands[0];
+    if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+      operands[0] = copy_to_mode_reg (Pmode, dest);
+
+    emit_jump_insn (gen_indirect_jump_internal1 (operands[0]));
+    DONE;
+  }
+)
+
+;; Indirect jumps. Jump to register values. Assuming absolute jumps
+
+(define_insn "indirect_jump_internal1"
+  [(set (pc) (match_operand:SI 0 "register_operand" "d"))]
+  ""
+  "bra%?\t%0"
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "tablejump"
+  [(set (pc)
+	(match_operand 0 "register_operand" "d"))
+  (use (label_ref (match_operand 1 "" "")))]
+  ""
+  {
+    gcc_assert (GET_MODE (operands[0]) == Pmode);
+
+    if (!flag_pic)
+      emit_jump_insn (gen_tablejump_internal1 (operands[0], operands[1]));
+    else
+      emit_jump_insn (gen_tablejump_internal3 (operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "tablejump_internal1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "d"))
+  (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "bra%?\t%0 "
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "tablejump_internal3"
+  [(parallel [(set (pc)
+		   (plus:SI (match_operand:SI 0 "register_operand" "d")
+			    (label_ref:SI (match_operand:SI 1 "" ""))))
+             (use (label_ref:SI (match_dup 1)))])]
+  ""
+  ""
+)
+
+;; need to change for MicroBlaze PIC
+(define_insn ""
+ [(set (pc)
+	(plus:SI (match_operand:SI 0 "register_operand" "d")
+		 (label_ref:SI (match_operand 1 "" ""))))
+  (use (label_ref:SI (match_dup 1)))]
+ "next_active_insn (insn) != 0
+  && GET_CODE (PATTERN (next_active_insn (insn))) == ADDR_DIFF_VEC
+  && PREV_INSN (next_active_insn (insn)) == operands[1]
+  && flag_pic"
+  {
+    output_asm_insn ("addk\t%0,%0,r20",operands);
+    return "bra%?\t%0";
+}
+ [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_expand "tablejump_internal4"
+  [(parallel [(set (pc)
+		   (plus:DI (match_operand:DI 0 "register_operand" "d")
+			    (label_ref:DI (match_operand:SI 1 "" ""))))
+             (use (label_ref:DI (match_dup 1)))])]
+  ""
+  ""
+)
+
+;;----------------------------------------------------------------
+;; Function prologue/epilogue and stack allocation
+;;----------------------------------------------------------------
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  {
+      microblaze_expand_prologue ();
+      DONE;
+  }
+)
+
+(define_expand "epilogue"
+  [(use (const_int 0))]
+  ""
+  {
+      microblaze_expand_epilogue ();
+      DONE;
+  }
+)
+
+;; An insn to allocate new stack space for dynamic use (e.g., alloca).
+;; We copy the return address, decrement the stack pointer and save the 
+;; return address again at the new stack top 
+
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "register_operand" "=r")
+	(minus (reg 1) (match_operand 1 "register_operand" "")))
+   (set (reg 1)
+	(minus (reg 1) (match_dup 1)))]
+  ""
+  { 
+    rtx retaddr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+    rtx rtmp    = gen_rtx_REG (SImode, R_TMP);
+    rtx neg_op0;
+
+    emit_move_insn (rtmp, retaddr);
+    if (GET_CODE (operands[1]) != CONST_INT)
+    {
+        neg_op0 = gen_reg_rtx (Pmode);
+	emit_insn (gen_negsi2 (neg_op0, operands[1]));
+    } else
+        neg_op0 = GEN_INT (- INTVAL (operands[1]));
+
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, neg_op0));
+    emit_move_insn (gen_rtx_MEM (Pmode, stack_pointer_rtx), rtmp);
+    emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+    emit_insn (gen_rtx_CLOBBER (SImode, rtmp));
+    DONE;
+  }
+)
+
+;; Trivial return.  Make it look like a normal return insn as that
+;; allows jump optimizations to work better .
+(define_insn "return"
+  [(return)]
+  "microblaze_can_use_return_insn ()"
+  { 
+    if (microblaze_is_interrupt_handler ())
+        return "rtid\tr14, 0\;%#";
+    else
+        return "rtsd\tr15, 8\;%#";
+  }
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+;; Normal return.
+;; We match any mode for the return address, so that this will work with
+;; both 32 bit and 64 bit targets.
+
+(define_insn "return_internal"
+  [(parallel [(use (match_operand:SI 0 "register_operand" ""))
+              (return)])]
+  ""
+  {	
+    if (microblaze_is_interrupt_handler ())
+        return "rtid\tr14,0 \;%#";
+    else
+        return "rtsd\tr15,8 \;%#";
+  }
+  [(set_attr "type"	"jump")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+
+;; Block any insns from across this point
+;; Useful to group sequences together.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "type"	"unknown")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"0")])
+
+  
+;;----------------------------------------------------------------
+;; Function calls
+;;----------------------------------------------------------------
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "m")
+		    (match_operand 1 "" "i"))
+             (clobber (reg:SI R_SR))
+             (use (match_operand 2 "" ""))
+             (use (match_operand 3 "" ""))])]
+  ""
+  {
+    rtx addr = XEXP (operands[0], 0);
+
+    if (flag_pic == 2 && GET_CODE (addr) == SYMBOL_REF 
+	&& !SYMBOL_REF_LOCAL_P (addr)) 
+      {
+        rtx temp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PLT);
+        XEXP (operands[0], 0) = temp;
+      }
+    
+    if ((GET_CODE (addr) != REG && !CONSTANT_ADDRESS_P (addr))
+	|| !call_insn_operand (addr, VOIDmode))
+      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+
+    if (GET_CODE (XEXP (operands[0], 0)) == UNSPEC)
+      emit_call_insn (gen_call_internal_plt0 (operands[0], operands[1],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM),
+                               	     pic_offset_table_rtx));
+    else
+      emit_call_insn (gen_call_internal0 (operands[0], operands[1],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+
+        DONE;
+  }
+)
+
+(define_expand "call_internal0"
+  [(parallel [(call (match_operand 0 "" "")
+		    (match_operand 1 "" ""))
+             (clobber (match_operand:SI 2 "" ""))])]
+  ""
+  {
+  }
+)
+ 
+(define_expand "call_internal_plt0"
+  [(parallel [(call (match_operand 0 "" "")
+		    (match_operand 1 "" ""))
+             (clobber (match_operand:SI 2 "" ""))
+             (use (match_operand:SI 3 "" ""))])]
+  ""
+  {
+  }
+)
+ 
+(define_insn "call_internal_plt"
+  [(call (mem (match_operand:SI 0 "call_insn_plt_operand" ""))
+	 (match_operand:SI 1 "" "i"))
+  (clobber (reg:SI R_SR))
+  (use (reg:SI R_GOT))]
+  "flag_pic"
+  {
+    register rtx target2 = gen_rtx_REG (Pmode, 
+			      GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+    gen_rtx_CLOBBER (VOIDmode, target2);
+    return "brlid\tr15,%0\;%#";
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_insn "call_internal1"
+  [(call (mem (match_operand:SI 0 "call_insn_operand" "ri"))
+	 (match_operand:SI 1 "" "i"))
+  (clobber (reg:SI R_SR))]
+  ""
+  {
+    register rtx target = operands[0];
+    register rtx target2 = gen_rtx_REG (Pmode,
+			      GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+    if (GET_CODE (target) == SYMBOL_REF) {
+        gen_rtx_CLOBBER (VOIDmode, target2);
+        return "brlid\tr15,%0\;%#";
+    } else if (GET_CODE (target) == CONST_INT)
+        return "la\t%@,r0,%0\;brald\tr15,%@\;%#";
+    else if (GET_CODE (target) == REG)
+        return "brald\tr15,%0\;%#";	
+    else {
+        fprintf (stderr,"Unsupported call insn\n");
+        return NULL;
+    }
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+;; calls.c now passes a fourth argument, make saber happy
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "register_operand" "=d")
+		   (call (match_operand 1 "memory_operand" "m")
+			 (match_operand 2 "" "i")))
+             (clobber (reg:SI R_SR))
+             (use (match_operand 3 "" ""))])] ;; next_arg_reg
+  ""
+  {
+    rtx addr = XEXP (operands[1], 0);
+
+    if (flag_pic == 2 && GET_CODE (addr) == SYMBOL_REF
+	&& !SYMBOL_REF_LOCAL_P (addr)) 
+      {
+        rtx temp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PLT);
+        XEXP (operands[1], 0) = temp;
+      }
+
+    if ((GET_CODE (addr) != REG && !CONSTANT_ADDRESS_P (addr))
+        || !call_insn_operand (addr, VOIDmode))
+      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+
+    if (GET_CODE (XEXP (operands[1], 0)) == UNSPEC)
+      emit_call_insn (gen_call_value_intern_plt0 (operands[0], operands[1], 
+			operands[2],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM),
+				     pic_offset_table_rtx));
+    else
+      emit_call_insn (gen_call_value_internal (operands[0], operands[1], 
+			operands[2],
+                        gen_rtx_REG (SImode, 
+				     GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)));
+
+    DONE;
+  }
+)
+
+
+(define_expand "call_value_internal"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "" "")
+			 (match_operand 2 "" "")))
+             (clobber (match_operand:SI 3 "" ""))
+             ])]
+  ""
+  {}
+)
+
+(define_expand "call_value_intern_plt0"
+  [(parallel[(set (match_operand 0 "" "")
+                  (call (match_operand 1 "" "")
+                        (match_operand 2 "" "")))
+             (clobber (match_operand:SI 3 "" ""))
+             (use (match_operand:SI 4 "" ""))])]
+  "flag_pic"
+  {}
+)
+
+(define_insn "call_value_intern_plt"
+  [(set (match_operand:VOID 0 "register_operand" "=d")
+        (call (mem (match_operand:SI 1 "call_insn_plt_operand" ""))
+              (match_operand:SI 2 "" "i")))
+   (clobber (match_operand:SI 3 "register_operand" "=d"))
+   (use (match_operand:SI 4 "register_operand"))]
+  "flag_pic"
+  { 
+    register rtx target2=gen_rtx_REG (Pmode,GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+
+    gen_rtx_CLOBBER (VOIDmode,target2);
+    return "brlid\tr15,%1\;%#";
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+(define_insn "call_value_intern"
+  [(set (match_operand:VOID 0 "register_operand" "=d")
+        (call (mem (match_operand:VOID 1 "call_insn_operand" "ri"))
+              (match_operand:SI 2 "" "i")))
+   (clobber (match_operand:SI 3 "register_operand" "=d"))]
+  ""
+  { 
+    register rtx target = operands[1];
+    register rtx target2=gen_rtx_REG (Pmode,GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM);
+
+    if (GET_CODE (target) == SYMBOL_REF){
+	gen_rtx_CLOBBER (VOIDmode,target2);
+	return "brlid\tr15,%1\;%#";
+    }
+    else if (GET_CODE (target) == CONST_INT)
+        return "la\t%@,r0,%1\;brald\tr15,%@\;%#";
+    else if (GET_CODE (target) == REG)
+        return "brald\tr15,%1\;%#";	
+    else 
+        return "Unsupported call insn\n";
+  }
+  [(set_attr "type"	"call")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+             (match_operand 1 "" "")
+             (match_operand 2 "" "")])]
+  ""
+  {
+    if (operands[0])		/* silence statement not reached warnings */
+    {
+        int i;
+
+        emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
+
+        for (i = 0; i < XVECLEN (operands[2], 0); i++)
+	{
+	    rtx set = XVECEXP (operands[2], 0, i);
+	    emit_move_insn (SET_DEST (set), SET_SRC (set));
+	}
+
+        emit_insn (gen_blockage ());
+        DONE;
+      }
+  }
+)
+
+;;----------------------------------------------------------------
+;; Misc.
+;;----------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type"	"nop")
+  (set_attr "mode"	"none")
+  (set_attr "length"	"4")])
+
+;; The insn to set GOT. The hardcoded number "8" accounts for $pc difference
+;; between "mfs" and "addik" instructions.
+(define_insn "set_got"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+    (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))]
+  ""
+  "mfs\t%0,rpc\n\taddik\t%0,%0,_GLOBAL_OFFSET_TABLE_+8"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
diff --git a/gcc/config/microblaze/microblaze.opt b/gcc/config/microblaze/microblaze.opt
new file mode 100644
index 000000000..97f316142
--- /dev/null
+++ b/gcc/config/microblaze/microblaze.opt
@@ -0,0 +1,115 @@
+; Options for the MicroBlaze port of the compiler
+;
+; Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
+;
+; Contributed by Michael Eager <eager@eagercon.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.  */
+
+Zxl-mode-bootstrap
+Driver
+
+Zxl-mode-executable
+Driver
+
+Zxl-mode-novectors
+Driver
+
+Zxl-mode-xilkernel
+Driver
+
+Zxl-mode-xmdstub
+Driver
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT)
+Use software emulation for floating point (default)
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Use hardware floating point instructions
+
+msmall-divides
+Target Mask(SMALL_DIVIDES)
+Use table lookup optimization for small signed integer divisions
+
+mcpu=
+Target RejectNegative Joined Var(microblaze_select_cpu)
+-mcpu=PROCESSOR		Use features of and schedule code for given CPU
+
+mmemcpy
+Target Mask(MEMCPY)
+Don't optimize block moves, use memcpy
+
+mxl-soft-mul
+Target Mask(SOFT_MUL)
+Use the soft multiply emulation (default)
+
+mxl-soft-div
+Target Mask(SOFT_DIV)
+Use the software emulation for divides (default)
+
+mxl-barrel-shift
+Target Mask(BARREL_SHIFT)
+Use the hardware barrel shifter instead of emulation
+
+mxl-pattern-compare
+Target Mask(PATTERN_COMPARE)
+Use pattern compare instructions
+
+mxl-stack-check
+Target Mask(STACK_CHECK)
+Check for stack overflow at runtime
+
+mxl-gp-opt
+Target Mask(XLGPOPT)
+Use GP relative sdata/sbss sections
+
+mno-clearbss
+Target RejectNegative
+Clear the BSS to zero and place zero initialized in BSS
+
+mxl-multiply-high
+Target Mask(MULTIPLY_HIGH)
+Use multiply high instructions for high part of 32x32 multiply
+
+mxl-float-convert
+Target Mask(FLOAT_CONVERT)
+Use hardware floating point conversion instructions
+
+mxl-float-sqrt
+Target Mask(FLOAT_SQRT)
+Use hardware floating point square root instruction
+
+mxl-mode-executable
+Target Mask(XL_MODE_EXECUTABLE)
+Description for mxl-mode-executable
+
+mxl-mode-xmdstub
+Target Mask(XL_MODE_XMDSTUB)
+Description for mxl-mode-xmdstub
+
+mxl-mode-bootstrap
+Target Mask(XL_MODE_BOOTSTRAP)
+Description for mxl-mode-bootstrap
+
+mxl-mode-novectors
+Target Mask(XL_MODE_NOVECTORS)
+Description for mxl-mode-novectors
+
+mxl-mode-xilkernel
+Target
diff --git a/gcc/config/microblaze/predicates.md b/gcc/config/microblaze/predicates.md
new file mode 100644
index 000000000..ea2f1f080
--- /dev/null
+++ b/gcc/config/microblaze/predicates.md
@@ -0,0 +1,64 @@
+;; Predicate definitions for Xilinx MicroBlaze
+;; Copyright 2009, 2010 Free Software Foundation, Inc.
+;;
+;; Contributed by Michael Eager <eager@eagercon.com>.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  
+
+
+;; Return whether OP can be used as an operands in arithmetic.
+(define_predicate "arith_operand"
+  (ior (match_code "const_int,const_double")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "arith_operand32"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int,const_double")
+	    (match_test "LARGE_INT (op)"))))
+
+(define_predicate "const_0_operand"
+  (and (match_code "const_int,const_double")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+;; Return whether OP is a register or the constant 0.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "const_0_operand")
+       (match_operand 0 "register_operand")))
+
+;;  Return if the operand is either the PC or a label_ref.  
+(define_special_predicate "pc_or_label_operand"
+  (ior (match_code "pc,label_ref")
+       (and (match_code "symbol_ref")
+            (match_test "!(strcmp ((XSTR (op, 0)), \"_stack_overflow_exit\"))"))))
+
+;; Test for valid call operand
+(define_predicate "call_insn_operand"
+  (match_test "CALL_INSN_OP (op)"))
+
+;; Return if OPERAND is valid as a source operand for a move instruction.
+(define_predicate "move_operand"
+  (and (
+     not (
+       and (match_code "plus")
+           (not (match_test "(GET_CODE (XEXP (op, 0)) == REG) ^ (GET_CODE (XEXP (op,1)) == REG)"))
+	 )
+       )
+       (match_operand 0 "general_operand")))
+
+;; Test for valid PIC call operand
+(define_predicate "call_insn_plt_operand"
+  (match_test "PLT_ADDR_P (op)"))
diff --git a/gcc/config/microblaze/t-microblaze b/gcc/config/microblaze/t-microblaze
new file mode 100644
index 000000000..021dbbe3e
--- /dev/null
+++ b/gcc/config/microblaze/t-microblaze
@@ -0,0 +1,33 @@
+# For C++ crtstuff
+EXTRA_MULTILIB_PARTS = crtbegin$(objext) crtend$(objext)
+
+EXTRA_PARTS += crti$(objext) crtn$(objext)
+
+MULTILIB_OPTIONS = mxl-barrel-shift mno-xl-soft-mul mxl-multiply-high
+MULTILIB_DIRNAMES = bs m mh
+MULTILIB_EXCEPTIONS = *mxl-barrel-shift/mxl-multiply-high mxl-multiply-high
+
+# Extra files
+microblaze-c.o: $(srcdir)/config/microblaze/microblaze-c.c \
+    $(srcdir)/config/microblaze/microblaze-protos.h \
+    $(CONFIG_H) $(SYSTEM_H) $(CPPLIB_H) $(TM_P_H) $(TREE_H) errors.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/microblaze/microblaze-c.c
+
+# Build soft FP routines. 
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+# Assemble startup files
+$(T)crti$(objext): $(srcdir)/config/microblaze/crti.s
+	$(GCC_FOR_TARGET) -c $(srcdir)/config/microblaze/crti.s -o $(T)crti$(objext)
+
+$(T)crtn$(objext): $(srcdir)/config/microblaze/crtn.s
+	$(GCC_FOR_TARGET) -c $(srcdir)/config/microblaze/crtn.s -o $(T)crtn$(objext)
diff --git a/gcc/config/mips/10000.md b/gcc/config/mips/10000.md
new file mode 100644
index 000000000..ad21e9e93
--- /dev/null
+++ b/gcc/config/mips/10000.md
@@ -0,0 +1,253 @@
+;; DFA-based pipeline description for the VR1x000.
+;;   Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
+;; until specific tuning for each is added.
+
+;; R10000 has an int queue, fp queue, address queue.
+;; The int queue feeds ALU1 and ALU2.
+;; The fp queue feeds the fp-adder and fp-multiplier.
+;; The addr queue feeds the Load/Store unit.
+;;
+;; However, we define the fp-adder and fp-multiplier as
+;; separate automatons, because the fp-multiplier is
+;; divided into fp-multiplier, fp-division, and
+;; fp-squareroot units, all of which share the same
+;; issue and completion logic, yet can operate in
+;; parallel.
+;;
+;; This is based on the model described in the R10K Manual
+;; and it helps to reduce the size of the automata.
+(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
+                   r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
+
+(define_cpu_unit "r10k_alu1" "r10k_a_int")
+(define_cpu_unit "r10k_alu2" "r10k_a_int")
+(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
+(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
+(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
+(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
+(define_cpu_unit "r10k_loadstore" "r10k_a_addr")
+
+
+;; R10k Loads and Stores.
+(define_insn_reservation "r10k_load" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "load,prefetch,prefetchx"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_store" 0
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_fpload" 3
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r10k_loadstore")
+
+
+;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
+;; Miscellaneous arith goes here too (this is a guess).
+(define_insn_reservation "r10k_arith" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical"))
+  "r10k_alu1 | r10k_alu2")
+
+;; We treat mfhilo differently, because we need to know when
+;; it's HI and when it's LO.
+(define_insn_reservation "r10k_mfhi" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "mfhilo")
+            (not (match_operand 1 "lo_operand"))))
+  "r10k_alu1 | r10k_alu2")
+
+(define_insn_reservation "r10k_mflo" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "mfhilo")
+            (match_operand 1 "lo_operand")))
+  "r10k_alu1 | r10k_alu2")
+
+
+;; ALU1 handles shifts, branch eval, and condmove.
+;;
+;; Brancher is separate, but part of ALU1, but can only
+;; do one branch per cycle (is this even implementable?).
+;;
+;; Unsure if the brancher handles jumps and calls as well, but since
+;; they're related, we'll add them here for now.
+(define_insn_reservation "r10k_brancher" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "shift,branch,jump,call"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_int_cmove" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SI,DI")))
+  "r10k_alu1")
+
+
+;; Coprocessor Moves.
+;; mtc1/dmtc1 are handled by ALU1.
+;; mfc1/dmfc1 are handled by the fp-multiplier.
+(define_insn_reservation "r10k_mt_xfer" 3
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mtc"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_mf_xfer" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mfc"))
+  "r10k_fpmpy")
+
+
+;; Only ALU2 does int multiplications and divisions.
+;;
+;; According to the Vr10000 series user manual,
+;; integer mult and div insns can be issued one
+;; cycle earlier if using register Lo.  We model
+;; this by using the Lo value by default, as it
+;; is the more common value, and use a bypass
+;; for the Hi value when needed.
+;;
+;; Also of note, There are different latencies
+;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
+;; However, gcc does not have separate types
+;; for these insns.  Thus to strike a balance,
+;; we use the Hi latency value for imul
+;; operations until the imul type can be split.
+(define_insn_reservation "r10k_imul_single" 6
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 6")
+
+(define_insn_reservation "r10k_imul_double" 10
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 10")
+
+;; Divides keep ALU2 busy.
+(define_insn_reservation "r10k_idiv_single" 34
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 35")
+
+(define_insn_reservation "r10k_idiv_double" 66
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 67")
+
+(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
+(define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
+
+
+;; Floating point add/sub, mul, abs value, neg, comp, & moves.
+(define_insn_reservation "r10k_fp_miscadd" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fadd,fabs,fneg,fcmp"))
+  "r10k_fpadd")
+
+(define_insn_reservation "r10k_fp_miscmul" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fmul,fmove"))
+  "r10k_fpmpy")
+
+(define_insn_reservation "r10k_fp_cmove" 2
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SF,DF")))
+  "r10k_fpmpy")
+
+
+;; The fcvt.s.[wl] insn has latency 4, repeat 2.
+;; All other fcvt insns have latency 2, repeat 1.
+(define_insn_reservation "r10k_fcvt_single" 4
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "I2S")))
+  "r10k_fpadd * 2")
+
+(define_insn_reservation "r10k_fcvt_other" 2
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "!I2S")))
+  "r10k_fpadd")
+
+
+;; Run the fmadd insn through fp-adder first, then fp-multiplier.
+;;
+;; The latency for fmadd is 2 cycles if the result is used
+;; by another fmadd instruction.
+(define_insn_reservation "r10k_fmadd" 4
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fmadd"))
+  "r10k_fpadd, r10k_fpmpy")
+
+(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
+
+
+;; Floating point Divisions & square roots.
+(define_insn_reservation "r10k_fdiv_single" 12
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "SF")))
+  "r10k_fpdiv * 14")
+
+(define_insn_reservation "r10k_fdiv_double" 19
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "DF")))
+  "r10k_fpdiv * 21")
+
+(define_insn_reservation "r10k_fsqrt_single" 18
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_fsqrt_double" 33
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+(define_insn_reservation "r10k_frsqrt_single" 30
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_frsqrt_double" 52
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+
+;; Handle unknown/multi insns here (this is a guess).
+(define_insn_reservation "r10k_unknown" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "unknown,multi"))
+  "r10k_alu1 + r10k_alu2")
diff --git a/gcc/config/mips/20kc.md b/gcc/config/mips/20kc.md
new file mode 100644
index 000000000..1d3aadf69
--- /dev/null
+++ b/gcc/config/mips/20kc.md
@@ -0,0 +1,284 @@
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; .........................
+;;
+;; DFA-based pipeline description for MIPS64 model R20Kc.
+;; Contributed by Jason Eckhardt (jle@cygnus.com).
+;;
+;; The R20Kc is a dual-issue processor that can generally bundle
+;; instructions as follows:
+;;   1. integer with integer
+;;   2. integer with fp
+;;   3. fp with fpload/fpstore 
+;;
+;; Of course, there are various restrictions.
+;; Reference:
+;;   "Ruby (R20K) Technical Specification Rev. 1.2, December 28, 1999."
+;;
+;; .........................
+
+;; Use three automata to isolate long latency operations, reducing space.
+(define_automaton "r20kc_other, r20kc_fdiv, r20kc_idiv")
+
+;;
+;; Describe the resources.
+;;
+
+;; Global.
+(define_cpu_unit "r20kc_iss0, r20kc_iss1" "r20kc_other")
+
+;; Integer execution unit (pipeline A).
+(define_cpu_unit "r20kc_ixua_addsub_agen" "r20kc_other")
+(define_cpu_unit "r20kc_ixua_shift"	  "r20kc_other")
+
+(exclusion_set "r20kc_ixua_addsub_agen" "r20kc_ixua_shift")
+
+;; Integer execution unit (pipeline B).
+(define_cpu_unit "r20kc_ixub_addsub"	  "r20kc_other")
+(define_cpu_unit "r20kc_ixub_branch"	  "r20kc_other")
+(define_cpu_unit "r20kc_ixub_mpydiv"	  "r20kc_other")
+(define_cpu_unit "r20kc_ixub_mpydiv_iter" "r20kc_idiv")
+
+(exclusion_set "r20kc_ixub_addsub" "r20kc_ixub_branch, r20kc_ixub_mpydiv")
+(exclusion_set "r20kc_ixub_branch" "r20kc_ixub_mpydiv")
+
+;; Cache / memory interface.
+(define_cpu_unit "r20kc_cache"	    "r20kc_other")
+
+;; Floating-point unit.
+(define_cpu_unit "r20kc_fpu_add"	  "r20kc_other")
+(define_cpu_unit "r20kc_fpu_mpy"	  "r20kc_other")
+(define_cpu_unit "r20kc_fpu_mpy_iter"	  "r20kc_fdiv")
+(define_cpu_unit "r20kc_fpu_divsqrt"	  "r20kc_other")
+(define_cpu_unit "r20kc_fpu_divsqrt_iter" "r20kc_fdiv")
+
+(exclusion_set "r20kc_fpu_add" "r20kc_fpu_mpy, r20kc_fpu_divsqrt")
+(exclusion_set "r20kc_fpu_mpy" "r20kc_fpu_divsqrt")
+
+;; After branch any insn can not be issued.
+(absence_set "r20kc_iss0,r20kc_iss1" "r20kc_ixub_branch")
+
+;;
+;; Define reservations for unit name mnemonics or combinations.
+;;
+
+(define_reservation "r20kc_iss"
+  "r20kc_iss0|r20kc_iss1")
+(define_reservation "r20kc_single_dispatch"
+  "r20kc_iss0+r20kc_iss1")
+(define_reservation "r20kc_iaddsub"
+  "r20kc_iss+(r20kc_ixua_addsub_agen|r20kc_ixub_addsub)")
+(define_reservation "r20kc_ishift"
+  "r20kc_iss+r20kc_ixua_shift")
+(define_reservation "r20kc_fpmove"
+  "r20kc_iss+r20kc_ixua_addsub_agen")
+(define_reservation "r20kc_imem"
+  "r20kc_iss+r20kc_ixua_addsub_agen+r20kc_cache")
+(define_reservation "r20kc_icache"
+  "r20kc_cache")
+(define_reservation "r20kc_impydiv"
+  "r20kc_iss+r20kc_ixub_mpydiv")
+(define_reservation "r20kc_impydiv_iter"
+  "r20kc_ixub_mpydiv_iter")
+(define_reservation "r20kc_ibranch"
+  "r20kc_iss+r20kc_ixub_branch")
+
+(define_reservation "r20kc_fpadd"
+  "r20kc_iss+r20kc_fpu_add")
+(define_reservation "r20kc_fpmpy"
+  "r20kc_iss+r20kc_fpu_mpy")
+(define_reservation "r20kc_fpmpy_iter"
+  "r20kc_fpu_mpy_iter")
+(define_reservation "r20kc_fpdivsqrt"
+  "r20kc_iss+r20kc_fpu_divsqrt")
+(define_reservation "r20kc_fpdivsqrt_iter"
+  "r20kc_fpu_divsqrt_iter")
+
+;;
+;; Describe instruction reservations for integer operations.
+;;
+
+;; Conditional moves always force single-dispatch.
+(define_insn_reservation "r20kc_cond_move_int" 1 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "condmove")
+				   (eq_attr "mode" "!SF,DF")))
+			 "r20kc_single_dispatch")
+
+(define_insn_reservation "r20kc_cond_move_fp" 4 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "condmove")
+				   (eq_attr "mode" "SF,DF")))
+			 "r20kc_single_dispatch")
+
+(define_insn_reservation "r20kc_int_other" 1
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "move,arith,const,nop"))
+			  "r20kc_iaddsub")
+
+;; Shifts can only execute on ixu pipeline A.
+(define_insn_reservation "r20kc_int_shift" 1
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "shift"))
+			  "r20kc_ishift")
+
+(define_insn_reservation "r20kc_ld" 2 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "load,prefetch,prefetchx"))
+			 "r20kc_imem")
+
+
+;; A load immediately following a store will stall, so
+;; say that a store uses the cache for an extra cycle.
+(define_insn_reservation "r20kc_st" 2 
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "store"))
+			  "r20kc_imem,r20kc_icache")
+
+(define_insn_reservation "r20kc_fld" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "fpload"))
+			 "r20kc_imem")
+
+(define_insn_reservation "r20kc_ffst" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "fpstore"))
+			 "r20kc_imem,r20kc_icache*2")
+
+;; Integer divide latency is between 13 and 42 cycles for DIV[U] and between
+;; 13 and 72 cycles for DDIV[U]. This depends on the value of the inputs
+;; so we just choose the worst case latency.
+(define_insn_reservation "r20kc_idiv_si" 42 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "mode" "SI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*42)")
+
+(define_insn_reservation "r20kc_idiv_di" 72 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "idiv")
+				   (eq_attr "mode" "DI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*72)")
+
+;; Integer multiply latency is 4 or 7 cycles for word and double-word
+;; respectively.
+(define_insn_reservation "r20kc_impy_si" 4 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "imadd,imul,imul3")
+				   (eq_attr "mode" "SI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*2)")
+
+(define_insn_reservation "r20kc_impy_di" 7 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "imadd,imul,imul3")
+				   (eq_attr "mode" "DI")))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*7)")
+
+;; Move to/from HI/LO.
+;; Moving to HI/LO has a 3 cycle latency while moving from only has a 1
+;; cycle latency.  Repeat rate is 3 for both.
+(define_insn_reservation "r20kc_imthilo" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "mthilo"))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*3)")
+
+(define_insn_reservation "r20kc_imfhilo" 1
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "mfhilo"))
+			 "r20kc_impydiv+(r20kc_impydiv_iter*3)")
+
+;; Move to fp coprocessor.
+(define_insn_reservation "r20kc_ixfer_mt" 3 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "mtc"))
+			 "r20kc_fpmove")
+
+;; Move from fp coprocessor.
+(define_insn_reservation "r20kc_ixfer_mf" 2 
+			  (and (eq_attr "cpu" "20kc")
+			       (eq_attr "type" "mfc"))
+			"r20kc_fpmove")
+
+;; Assume branch predicted correctly.
+(define_insn_reservation "r20kc_ibr" 1 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "branch,jump,call"))
+			 "r20kc_ibranch")
+
+;;
+;; Describe instruction reservations for the floating-point operations.
+;;
+(define_insn_reservation "r20kc_fp_other" 4
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "fmove,fadd,fabs,fneg,fcmp"))
+			 "r20kc_fpadd")
+
+(define_insn_reservation "r20kc_fp_cvt_a" 4
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fcvt")
+				   (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+			 "r20kc_fpadd")
+
+(define_insn_reservation "r20kc_fp_cvt_b" 5
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fcvt")
+				   (eq_attr "cnv_mode" "D2S,S2I")))
+			 "r20kc_fpadd")
+
+(define_insn_reservation "r20kc_fp_divsqrt_df" 32
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fdiv,fsqrt")
+				   (eq_attr "mode" "DF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*32)")
+
+(define_insn_reservation "r20kc_fp_divsqrt_sf" 17
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fdiv,fsqrt")
+				   (eq_attr "mode" "SF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)")
+
+(define_insn_reservation "r20kc_fp_rsqrt_df" 35 
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "frsqrt")
+				   (eq_attr "mode" "DF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*35)")
+
+(define_insn_reservation "r20kc_fp_rsqrt_sf" 17
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "frsqrt")
+				   (eq_attr "mode" "SF")))
+			 "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)")
+
+(define_insn_reservation "r20kc_fp_mpy_sf" 4
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fmul,fmadd")
+				   (eq_attr "mode" "SF")))
+			 "r20kc_fpmpy+r20kc_fpmpy_iter")
+
+(define_insn_reservation "r20kc_fp_mpy_df" 5
+			 (and (eq_attr "cpu" "20kc")
+			      (and (eq_attr "type" "fmul,fmadd")
+				   (eq_attr "mode" "DF")))
+			 "r20kc_fpmpy+(r20kc_fpmpy_iter*2)")
+
+;; Force single-dispatch for unknown or multi.
+(define_insn_reservation "r20kc_unknown" 1 
+			 (and (eq_attr "cpu" "20kc")
+			      (eq_attr "type" "unknown,multi"))
+			 "r20kc_single_dispatch")
diff --git a/gcc/config/mips/24k.md b/gcc/config/mips/24k.md
new file mode 100644
index 000000000..5df8a32dc
--- /dev/null
+++ b/gcc/config/mips/24k.md
@@ -0,0 +1,457 @@
+;; DFA-based pipeline descriptions for MIPS Technologies 24K core.
+;; Contributed by Chao-ying Fu (fu@mips.com), Nigel Stephens (nigel@mips.com)
+;;   and David Ung (davidu@mips.com)
+;;
+;; The 24kf2_1 is a single-issue processor with a half-clocked fpu.
+;; The 24kf1_1 is 24k with 1:1 clocked fpu.
+;;
+;; References:
+;;   "MIPS32 24K Processor Core Family Software User's Manual, Rev 3.04."
+;;
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r24k_cpu, r24k_mdu, r24k_fpu")
+
+;; Integer execution unit.
+(define_cpu_unit "r24k_iss"		"r24k_cpu")
+(define_cpu_unit "r24k_ixu_arith"	"r24k_cpu")
+(define_cpu_unit "r24k_mul3a"	        "r24k_mdu")
+(define_cpu_unit "r24k_mul3b"	        "r24k_mdu")
+(define_cpu_unit "r24k_mul3c"	        "r24k_mdu")
+
+;; --------------------------------------------------------------
+;; Producers
+;; --------------------------------------------------------------
+
+;; 1. Loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs
+(define_insn_reservation "r24k_int_load" 2
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "load"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 2. Arithmetic: add, addi, addiu, addiupc, addu, and, andi, clo, clz,
+;;    ext, ins, lui, movn, movz, nor, or, ori, rotr, rotrv, seb, seh, sll,
+;;    sllv, slt, slti, sltiu, sltu, sra, srav, srl, srlv, sub, subu, wsbh,
+;;    xor, xori
+;; (movn/movz is not matched, we'll need to split condmov to
+;;  differentiate between integer/float moves)
+(define_insn_reservation "r24k_int_arith" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 3. Links: bgezal, bgezall, bltzal, bltzall, jal, jalr, jalx
+;; 3a. jr/jalr consumer
+(define_insn_reservation "r24k_int_jump" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "call,jump"))
+  "r24k_iss+r24k_ixu_arith")
+
+;; 3b. branch consumer
+(define_insn_reservation "r24k_int_branch" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "branch"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 4. MDU: fully pipelined multiplier
+;; mult - delivers result to hi/lo in 1 cycle (pipelined)
+(define_insn_reservation "r24k_int_mult" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "imul"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; madd, msub - delivers result to hi/lo in 1 cycle (pipelined)
+(define_insn_reservation "r24k_int_madd" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "imadd"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; mul - delivers result to gpr in 5 cycles
+(define_insn_reservation "r24k_int_mul3" 5
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "imul3"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)*5")
+
+;; mfhi, mflo, mflhxu - deliver result to gpr in 5 cycles
+(define_insn_reservation "r24k_int_mfhilo" 5
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "mfhilo"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass
+(define_insn_reservation "r24k_int_mthilo" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "mthilo"))
+  "r24k_iss+(r24k_mul3a|r24k_mul3b|r24k_mul3c)")
+
+;; div - default to 36 cycles for 32bit operands.  Faster for 24bit, 16bit and
+;; 8bit, but is tricky to identify.
+(define_insn_reservation "r24k_int_div" 36
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "idiv"))
+  "r24k_iss+(r24k_mul3a+r24k_mul3b+r24k_mul3c)*36")
+
+
+;; 5. Cop: cfc1, di, ei, mfc0, mtc0
+;; (Disabled until we add proper cop0 support)
+;;(define_insn_reservation "r24k_int_cop" 3
+;;  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+;;       (eq_attr "type" "cop0"))
+;;  "r24k_iss+r24k_ixu_arith")
+
+
+;; 6. Store
+(define_insn_reservation "r24k_int_store" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (and (eq_attr "type" "store")
+	    (eq_attr "mode" "!unknown")))
+  "r24k_iss+r24k_ixu_arith")
+
+;; 6.1 Special case - matches the cprestore pattern which don't set the mode
+;;     attrib. This avoids being set as r24k_int_store and have it checked
+;;     against store_data_bypass_p, which would then fail because cprestore
+;;     does not have a normal SET pattern.
+(define_insn_reservation "r24k_unknown_store" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (and (eq_attr "type" "store")
+	    (eq_attr "mode" "unknown")))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; 7. Multiple instructions
+(define_insn_reservation "r24k_int_multi" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "multi"))
+  "r24k_iss+r24k_ixu_arith+r24k_fpu_arith+(r24k_mul3a+r24k_mul3b+r24k_mul3c)")
+
+
+;; 8. Unknowns - Currently these include blockage, consttable and alignment
+;;    rtls. They do not really affect scheduling latency, (blockage affects
+;;    scheduling via log links, but not used here).
+(define_insn_reservation "r24k_int_unknown" 0
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "unknown"))
+  "r24k_iss")
+
+
+;; 9. Prefetch
+(define_insn_reservation "r24k_int_prefetch" 1
+  (and (eq_attr "cpu" "24kc,24kf2_1,24kf1_1")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "r24k_iss+r24k_ixu_arith")
+
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+
+;; load->next use :  2 cycles (Default)
+;; load->load base:  3 cycles
+;; load->store base: 3 cycles
+;; load->prefetch:   3 cycles
+(define_bypass 3 "r24k_int_load" "r24k_int_load")
+(define_bypass 3 "r24k_int_load" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 3 "r24k_int_load" "r24k_int_prefetch")
+
+;; arith->next use :  1 cycles (Default)
+;; arith->load base:  2 cycles
+;; arith->store base: 2 cycles
+;; arith->prefetch:   2 cycles
+(define_bypass 2 "r24k_int_arith" "r24k_int_load")
+(define_bypass 2 "r24k_int_arith" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 2 "r24k_int_arith" "r24k_int_prefetch")
+
+;; mul3->next use : 5 cycles (default)
+;; mul3->l/s base : 6 cycles
+;; mul3->prefetch : 6 cycles
+(define_bypass 6 "r24k_int_mul3" "r24k_int_load")
+(define_bypass 6 "r24k_int_mul3" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 6 "r24k_int_mul3" "r24k_int_prefetch")
+
+;; mul3->madd/msub : 1 cycle
+(define_bypass 1 "r24k_int_mul3" "r24k_int_madd" "mips_linked_madd_p")
+
+;; mfhilo->next use  : 5 cycles (default)
+;; mfhilo->l/s base  : 6 cycles
+;; mfhilo->prefetch  : 6 cycles
+;; mthilo->madd/msub : 2 cycle (only for mthi/lo not mfhi/lo)
+(define_bypass 6 "r24k_int_mfhilo" "r24k_int_load")
+(define_bypass 6 "r24k_int_mfhilo" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 6 "r24k_int_mfhilo" "r24k_int_prefetch")
+(define_bypass 2 "r24k_int_mthilo" "r24k_int_madd")
+
+;; cop->next use : 3 cycles (Default)
+;; cop->l/s base : 4 cycles
+;; (define_bypass 4 "r24k_int_cop" "r24k_int_load")
+;; (define_bypass 4 "r24k_int_cop" "r24k_int_store" "!store_data_bypass_p")
+
+;; multi->next use : 1 cycles (Default)
+;; multi->l/s base : 2 cycles
+;; multi->prefetch : 2 cycles
+(define_bypass 2 "r24k_int_multi" "r24k_int_load")
+(define_bypass 2 "r24k_int_multi" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 2 "r24k_int_multi" "r24k_int_prefetch")
+
+
+;; --------------------------------------------------------------
+;; Floating Point Instructions
+;; --------------------------------------------------------------
+
+(define_cpu_unit "r24k_fpu_arith" "r24k_fpu")
+
+;; The 24k is a single issue cpu, and the fpu runs at half clock speed,
+;; so each fpu instruction ties up the shared instruction scheduler for
+;; 1 cycle, and the fpu scheduler for 2 cycles.
+;;
+;; These timings are therefore twice the values in the 24K manual,
+;; which are quoted in fpu clocks.
+;;
+;; The 24kf1_1 is a 24k configured with 1:1 cpu and fpu, so use
+;; the unscaled timings
+
+(define_reservation "r24kf2_1_fpu_iss"	"r24k_iss+(r24k_fpu_arith*2)")
+
+;; fadd, fabs, fneg
+(define_insn_reservation "r24kf2_1_fadd" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r24kf2_1_fpu_iss")
+
+;; fmove, fcmove
+(define_insn_reservation "r24kf2_1_fmove" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fmove,condmove"))
+  "r24kf2_1_fpu_iss")
+
+;; fload
+(define_insn_reservation "r24kf2_1_fload" 6
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r24kf2_1_fpu_iss")
+
+;; fstore
+(define_insn_reservation "r24kf2_1_fstore" 2
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fpstore"))
+  "r24kf2_1_fpu_iss")
+
+;; fmul, fmadd
+(define_insn_reservation "r24kf2_1_fmul_sf" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r24kf2_1_fpu_iss")
+
+(define_insn_reservation "r24kf2_1_fmul_df" 10
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*2)")
+
+
+;; fdiv, fsqrt, frsqrt
+(define_insn_reservation "r24kf2_1_fdiv_sf" 34
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*26)")
+
+(define_insn_reservation "r24kf2_1_fdiv_df" 64
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*56)")
+
+;; frsqrt
+(define_insn_reservation "r24kf2_1_frsqrt_df" 70
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf2_1_fpu_iss,(r24k_fpu_arith*60)")
+
+;; fcmp
+(define_insn_reservation "r24kf2_1_fcmp" 4
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "fcmp"))
+  "r24kf2_1_fpu_iss")
+
+;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on the condition)
+(define_bypass 2 "r24kf2_1_fcmp" "r24kf2_1_fmove")
+
+;; fcvt (cvt.d.s, cvt.[sd].[wl])
+(define_insn_reservation "r24kf2_1_fcvt_i2f_s2d" 8
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+  "r24kf2_1_fpu_iss")
+
+;; fcvt (cvt.s.d)
+(define_insn_reservation "r24kf2_1_fcvt_s2d" 12
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "D2S")))
+  "r24kf2_1_fpu_iss")
+
+;; fcvt (cvt.[wl].[sd], etc)
+(define_insn_reservation "r24kf2_1_fcvt_f2i" 10
+  (and (eq_attr "cpu" "24kf2_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "S2I,D2I")))
+  "r24kf2_1_fpu_iss")
+
+;; fxfer (mfc1, mfhc1, mtc1, mthc1)
+(define_insn_reservation "r24kf2_1_fxfer" 4
+  (and (eq_attr "cpu" "24kf2_1")
+       (eq_attr "type" "mfc,mtc"))
+  "r24kf2_1_fpu_iss")
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+;; r24kf2_1_fcvt_f2i->l/s base : 11 cycles
+;; r24kf2_1_fcvt_f2i->prefetch : 11 cycles
+(define_bypass 11 "r24kf2_1_fcvt_f2i" "r24k_int_load")
+(define_bypass 11 "r24kf2_1_fcvt_f2i" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 11 "r24kf2_1_fcvt_f2i" "r24k_int_prefetch")
+
+;; r24kf2_1_fxfer->l/s base : 5 cycles
+;; r24kf2_1_fxfer->prefetch : 5 cycles
+(define_bypass 5 "r24kf2_1_fxfer" "r24k_int_load")
+(define_bypass 5 "r24kf2_1_fxfer" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 5 "r24kf2_1_fxfer" "r24k_int_prefetch")
+
+;; --------------------------------------------------------------
+;; The 24kf1_1 is a 24k configured with 1:1 cpu and fpu, so use
+;; the unscaled timings
+;; --------------------------------------------------------------
+
+(define_reservation "r24kf1_1_fpu_iss"	"r24k_iss+r24k_fpu_arith")
+
+;; fadd, fabs, fneg
+(define_insn_reservation "r24kf1_1_fadd" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r24kf1_1_fpu_iss")
+
+;; fmove, fcmove
+(define_insn_reservation "r24kf1_1_fmove" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fmove,condmove"))
+  "r24kf1_1_fpu_iss")
+
+;; fload
+(define_insn_reservation "r24kf1_1_fload" 3
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r24kf1_1_fpu_iss")
+
+;; fstore
+(define_insn_reservation "r24kf1_1_fstore" 1
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fpstore"))
+  "r24kf1_1_fpu_iss")
+
+;; fmul, fmadd
+(define_insn_reservation "r24kf1_1_fmul_sf" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r24kf1_1_fpu_iss")
+
+(define_insn_reservation "r24kf1_1_fmul_df" 5
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r24kf1_1_fpu_iss,r24k_fpu_arith")
+
+
+;; fdiv, fsqrt, frsqrt
+(define_insn_reservation "r24kf1_1_fdiv_sf" 17
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r24kf1_1_fpu_iss,(r24k_fpu_arith*13)")
+
+(define_insn_reservation "r24kf1_1_fdiv_df" 32
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf1_1_fpu_iss,(r24k_fpu_arith*28)")
+
+;; frsqrt
+(define_insn_reservation "r24kf1_1_frsqrt_df" 35
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r24kf1_1_fpu_iss,(r24k_fpu_arith*30)")
+
+;; fcmp
+(define_insn_reservation "r24kf1_1_fcmp" 2
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "fcmp"))
+  "r24kf1_1_fpu_iss")
+
+;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on the condition)
+(define_bypass 1 "r24kf1_1_fcmp" "r24kf1_1_fmove")
+
+;; fcvt (cvt.d.s, cvt.[sd].[wl])
+(define_insn_reservation "r24kf1_1_fcvt_i2f_s2d" 4
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+  "r24kf1_1_fpu_iss")
+
+;; fcvt (cvt.s.d)
+(define_insn_reservation "r24kf1_1_fcvt_s2d" 6
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "D2S")))
+  "r24kf1_1_fpu_iss")
+
+;; fcvt (cvt.[wl].[sd], etc)
+(define_insn_reservation "r24kf1_1_fcvt_f2i" 5
+  (and (eq_attr "cpu" "24kf1_1")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "S2I,D2I")))
+  "r24kf1_1_fpu_iss")
+
+;; fxfer (mfc1, mfhc1, mtc1, mthc1)
+(define_insn_reservation "r24kf1_1_fxfer" 2
+  (and (eq_attr "cpu" "24kf1_1")
+       (eq_attr "type" "mfc,mtc"))
+  "r24kf1_1_fpu_iss")
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+;; r24kf1_1_fcvt_f2i->l/s base : 6 cycles
+;; r24kf1_1_fcvt_f2i->prefetch : 6 cycles
+(define_bypass 6 "r24kf1_1_fcvt_f2i" "r24k_int_load")
+(define_bypass 6 "r24kf1_1_fcvt_f2i" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 6 "r24kf1_1_fcvt_f2i" "r24k_int_prefetch")
+
+;; r24kf1_1_fxfer->l/s base : 3 cycles
+;; r24kf1_1_fxfer->prefetch : 3 cycles
+(define_bypass 3 "r24kf1_1_fxfer" "r24k_int_load")
+(define_bypass 3 "r24kf1_1_fxfer" "r24k_int_store" "!store_data_bypass_p")
+(define_bypass 3 "r24kf1_1_fxfer" "r24k_int_prefetch")
+
diff --git a/gcc/config/mips/3000.md b/gcc/config/mips/3000.md
new file mode 100644
index 000000000..64bdfe113
--- /dev/null
+++ b/gcc/config/mips/3000.md
@@ -0,0 +1,71 @@
+;; R3000 and TX39 pipeline description.
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r3k_load" 2
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "alu")
+
+(define_insn_reservation "r3k_imul" 12
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*12")
+
+(define_insn_reservation "r3k_idiv" 35
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "idiv"))
+  "imuldiv*35")
+
+(define_insn_reservation "r3k_fmove" 1
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "alu")
+
+(define_insn_reservation "r3k_fadd" 2
+  (and (eq_attr "cpu" "r3000,r3900")
+       (eq_attr "type" "fcmp,fadd"))
+  "alu")
+
+(define_insn_reservation "r3k_fmul_single" 4
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r3k_fmul_double" 5
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r3k_fdiv_single" 12
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r3k_fdiv_double" 19
+  (and (eq_attr "cpu" "r3000,r3900")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc/config/mips/4000.md b/gcc/config/mips/4000.md
new file mode 100644
index 000000000..e6e23289f
--- /dev/null
+++ b/gcc/config/mips/4000.md
@@ -0,0 +1,32 @@
+;; R4000 pipeline description.
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r4k_imul" 10
+  (and (eq_attr "cpu" "r4000")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*10")
+
+(define_insn_reservation "r4k_idiv" 69
+  (and (eq_attr "cpu" "r4000")
+       (eq_attr "type" "idiv"))
+  "imuldiv*69")
diff --git a/gcc/config/mips/4100.md b/gcc/config/mips/4100.md
new file mode 100644
index 000000000..dc36384c9
--- /dev/null
+++ b/gcc/config/mips/4100.md
@@ -0,0 +1,51 @@
+;; VR4100 and VR4120 pipeline description.
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r4100_load" 2
+  (and (eq_attr "cpu" "r4100,r4120")
+       (eq_attr "type" "load,fpload,fpidxload,mfc,mtc"))
+  "alu")
+
+(define_insn_reservation "r4100_imul_si" 1
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "imuldiv")
+
+(define_insn_reservation "r4100_imul_di" 4
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*4")
+
+(define_insn_reservation "r4100_idiv_si" 35
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*35")
+
+(define_insn_reservation "r4100_idiv_di" 67
+  (and (eq_attr "cpu" "r4100,r4120")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*67")
diff --git a/gcc/config/mips/4130.md b/gcc/config/mips/4130.md
new file mode 100644
index 000000000..6de814fc7
--- /dev/null
+++ b/gcc/config/mips/4130.md
@@ -0,0 +1,153 @@
+;; Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+;;
+;; Pipeline description for the VR4130 family.
+;;
+;; The processor issues each 8-byte aligned pair of instructions together,
+;; stalling the second instruction if it depends on the first.  Thus, if we
+;; want two instructions to issue in parallel, we need to make sure that the
+;; first one is 8-byte aligned.
+;;
+;; For the purposes of this pipeline description, we treat the processor
+;; like a standard two-way superscalar architecture.  If scheduling were
+;; the last pass to run, we could use the scheduler hooks to vary the
+;; issue rate depending on whether an instruction is at an aligned or
+;; unaligned address.  Unfortunately, delayed branch scheduling and
+;; hazard avoidance are done after the final scheduling pass, and they
+;; can change the addresses of many instructions.
+;;
+;; We get around this in two ways:
+;;
+;;   (1) By running an extra pass at the end of compilation.  This pass goes
+;;	 through the function looking for pairs of instructions that could
+;;	 execute in parallel.  It makes sure that the first instruction in
+;;	 each pair is suitably aligned, inserting nops if necessary.  Doing
+;;	 this gives the same kind of pipeline behavior we would see on a
+;;	 normal superscalar target.
+;;
+;;	 This pass is generally a speed improvement, but the extra nops will
+;;	 obviously make the program bigger.  It is therefore unsuitable for
+;;	 -Os (at the very least).
+;;
+;;   (2) By modifying the scheduler hooks so that, where possible:
+;;
+;;	 (a) dependent instructions are separated by a non-dependent
+;;	     instruction;
+;;
+;;	 (b) instructions that use the multiplication unit are separated
+;;	     by non-multiplication instructions; and
+;;
+;;	 (c) memory access instructions are separated by non-memory
+;;	     instructions.
+;;
+;;	 The idea is to keep conflicting instructions apart wherever possible
+;;	 and thus make the schedule less dependent on alignment.
+
+(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre")
+
+(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main")
+(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv")
+
+;; This is a fake unit for pre-reload scheduling of multiplications.
+;; It enforces the true post-reload repeat rate.
+(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre")
+
+;; The scheduling hooks use this attribute for (b) above.
+(define_attr "vr4130_class" "mul,mem,alu"
+  (cond [(eq_attr "type" "load,store")
+	 (const_string "mem")
+
+	 (eq_attr "type" "mfhilo,mthilo,imul,imul3,imadd,idiv")
+	 (const_string "mul")]
+	(const_string "alu")))
+
+(define_insn_reservation "vr4130_multi" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "multi,unknown"))
+  "vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv")
+
+(define_insn_reservation "vr4130_int" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "arith,const,logical,move,nop,shift,signext,slt"))
+  "vr4130_alu1 | vr4130_alu2")
+
+(define_insn_reservation "vr4130_load" 3
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "load"))
+  "vr4130_dcache")
+
+(define_insn_reservation "vr4130_store" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "store"))
+  "vr4130_dcache")
+
+(define_insn_reservation "vr4130_mfhilo" 3
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "mfhilo"))
+  "vr4130_muldiv")
+
+(define_insn_reservation "vr4130_mthilo" 1
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "mthilo"))
+  "vr4130_muldiv")
+
+;; The product is available in LO & HI after one cycle.  Moving the result
+;; into an integer register will take an additional three cycles, see mflo
+;; & mfhi above.  Note that the same latencies and repeat rates apply if we
+;; use "mtlo; macc" instead of "mult; mflo".
+(define_insn_reservation "vr4130_mulsi" 4
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "SI")))
+  "vr4130_muldiv + (vr4130_mulpre * 2)")
+
+;; As for vr4130_mulsi, but the product is available in LO and HI
+;; after 3 cycles.
+(define_insn_reservation "vr4130_muldi" 6
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "DI")))
+  "(vr4130_muldiv * 3) + (vr4130_mulpre * 4)")
+
+;; maccs can execute in consecutive cycles without stalling, but it
+;; is 3 cycles before the integer destination can be read.
+(define_insn_reservation "vr4130_macc" 3
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "imadd"))
+  "vr4130_muldiv")
+
+(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p")
+(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo")
+(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo")
+
+(define_insn_reservation "vr4130_divsi" 36
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "vr4130_muldiv * 36")
+
+(define_insn_reservation "vr4130_divdi" 72
+  (and (eq_attr "cpu" "r4130")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "vr4130_muldiv * 72")
+
+(define_insn_reservation "vr4130_branch" 0
+  (and (eq_attr "cpu" "r4130")
+       (eq_attr "type" "branch,jump,call"))
+  "vr4130_alu1 | vr4130_alu2")
diff --git a/gcc/config/mips/4300.md b/gcc/config/mips/4300.md
new file mode 100644
index 000000000..b55180b0c
--- /dev/null
+++ b/gcc/config/mips/4300.md
@@ -0,0 +1,85 @@
+;; VR4300 pipeline description.
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r4300_load" 2
+  (and (eq_attr "cpu" "r4300")
+       (eq_attr "type" "load,fpload,fpidxload,mfc,mtc"))
+  "alu")
+
+(define_insn_reservation "r4300_imul_si" 5
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*5")
+
+(define_insn_reservation "r4300_imul_di" 8
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*8")
+
+(define_insn_reservation "r4300_idiv_si" 37
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*37")
+
+(define_insn_reservation "r4300_idiv_di" 69
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*69")
+
+(define_insn_reservation "r4300_fmove" 1
+  (and (eq_attr "cpu" "r4300")
+       (eq_attr "type" "fcmp,fabs,fneg,fmove"))
+  "imuldiv")
+
+(define_insn_reservation "r4300_fadd" 3
+  (and (eq_attr "cpu" "r4300")
+       (eq_attr "type" "fadd"))
+  "imuldiv*3")
+
+(define_insn_reservation "r4300_fmul_single" 5
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "imuldiv*5")
+
+(define_insn_reservation "r4300_fmul_double" 8
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "imuldiv*8")
+
+(define_insn_reservation "r4300_fdiv_single" 29
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "imuldiv*29")
+
+(define_insn_reservation "r4300_fdiv_double" 58
+  (and (eq_attr "cpu" "r4300")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "imuldiv*58")
diff --git a/gcc/config/mips/4600.md b/gcc/config/mips/4600.md
new file mode 100644
index 000000000..c645cbc5d
--- /dev/null
+++ b/gcc/config/mips/4600.md
@@ -0,0 +1,87 @@
+;; R4600 and R4650 pipeline description.
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+;;
+;; We handle the R4600 and R4650 in much the same way.  The only difference
+;; is in the integer multiplication and division costs.
+
+(define_insn_reservation "r4600_imul" 10
+  (and (eq_attr "cpu" "r4600")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*10")
+
+(define_insn_reservation "r4600_idiv" 42
+  (and (eq_attr "cpu" "r4600")
+       (eq_attr "type" "idiv"))
+  "imuldiv*42")
+
+
+(define_insn_reservation "r4650_imul" 4
+  (and (eq_attr "cpu" "r4650")
+       (eq_attr "type" "imul,imul3,imadd"))
+  "imuldiv*4")
+
+(define_insn_reservation "r4650_idiv" 36
+  (and (eq_attr "cpu" "r4650")
+       (eq_attr "type" "idiv"))
+  "imuldiv*36")
+
+
+(define_insn_reservation "r4600_load" 2
+  (and (eq_attr "cpu" "r4600,r4650")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "alu")
+
+(define_insn_reservation "r4600_fmove" 1
+  (and (eq_attr "cpu" "r4600,r4650")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "alu")
+
+(define_insn_reservation "r4600_fmul_single" 8
+  (and (eq_attr "cpu" "r4600,r4650")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r4600_fdiv_single" 32
+  (and (eq_attr "cpu" "r4600,r4650")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r4600_fdiv_double" 61
+  (and (eq_attr "cpu" "r4600,r4650")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r4600_fsqrt_single" 31
+  (and (eq_attr "cpu" "r4600,r4650")
+       (and (eq_attr "type" "fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r4600_fsqrt_double" 60
+  (and (eq_attr "cpu" "r4600,r4650")
+       (and (eq_attr "type" "fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc/config/mips/4k.md b/gcc/config/mips/4k.md
new file mode 100644
index 000000000..88cdbd195
--- /dev/null
+++ b/gcc/config/mips/4k.md
@@ -0,0 +1,153 @@
+;; DFA-based pipeline descriptions for MIPS32 4K processor family
+;; Contributed by Nigel Stephens (nigel@mips.com)
+;;   and David Ung (davidu@mips.com)
+;;
+;; References:
+;;   "MIPS32 4K Processor Core Family Software User's Manual,
+;;     Doc no: MD00016, Rev 1.18, Nov 15, 2004."
+;;
+;; 4Kc - pipelined multiplier and translation lookaside buffer (TLB)
+;; 4km - pipelined multiplier and block address translator (BAT)
+;; 4kp - non-pipelined multiplier and block address translator (BAT)
+;;
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r4k_cpu, r4k_mdu")
+
+;; Integer execution unit.
+(define_cpu_unit "r4k_ixu_arith"       "r4k_cpu")
+(define_cpu_unit "r4k_ixu_mpydiv"      "r4k_mdu")
+
+(define_insn_reservation "r4k_int_load" 2
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "load"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_prefetch" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "prefetch"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_store" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "store"))
+  "r4k_ixu_arith")
+
+;; 4Kc/4Km 
+;; unsigned divide - 8/16/24/32-bit operand have latencies  9/17/25/33
+;;   signed divide - 8/16/24/32-bit operand have latencies 10/18/26/34
+(define_insn_reservation "r4k_idiv_4kc" 34
+  (and (eq_attr "cpu" "4kc")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*34)")
+
+;; 4Kp
+;; unsigned divide - 33
+;;   signed divide - 33-35
+(define_insn_reservation "r4k_idiv_4kp" 35
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*35)")
+
+;; 4Kc/4Km fast 32x32 multiply
+;; 16x32 is faster, but there's no way to detect this
+(define_insn_reservation "r4k_mult_4kc" 2
+  (and (eq_attr "cpu" "4kc")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "SI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*2)")
+
+;; 4Kc/4Km MUL has 2 cycle latency, but has the special property that it will
+;; stall the integer unit pipeline. MUL 16x16 or 32x16 forces 1 cycle stall,
+;; while MUL 32x32 forces 2 cycle stall.  If next insn use the result, an
+;; additional stall is forced.
+(define_insn_reservation "r4k_mul_4kc" 4
+  (and (eq_attr "cpu" "4kc")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "(r4k_ixu_arith+r4k_ixu_mpydiv)*3")
+
+;; 4Kp slow iterative 2-op MULT
+;; Latency of 32 if next insn is MADD/MSUB,MFHI/MFLO.
+;; Repeat rate of 33 cycles.
+(define_insn_reservation "r4k_mult_4kp" 32
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "imul")
+	    (eq_attr "mode" "SI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*32)")
+
+;; 4Kp slow iterative 3-op MUL
+;; Latency of 32 cycles, but stalls the whole pipeline until complete.
+(define_insn_reservation "r4k_mul_4kp" 32
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "(r4k_ixu_arith+r4k_ixu_mpydiv)*32")
+
+;; 4Kp slow iterative MADD
+;; Latency of 34 if next use insn is MADD/MSUB,MFHI/MFLO.
+;; Repeat rate of 35 cycles.
+(define_insn_reservation "r4k_madd_4kp" 34
+  (and (eq_attr "cpu" "4kp")
+       (and (eq_attr "type" "imadd")
+	    (eq_attr "mode" "SI")))
+  "r4k_ixu_arith+(r4k_ixu_mpydiv*34)")
+
+;; Move to HI/LO -> MADD/MSUB,MFHI/MFLO has a 1 cycle latency.
+(define_insn_reservation "r4k_int_mthilo" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "mthilo"))
+  "r4k_ixu_arith+r4k_ixu_mpydiv")
+
+;; Move from HI/LO -> integer operation has a 2 cycle latency.
+(define_insn_reservation "r4k_int_mfhilo" 2
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "mfhilo"))
+  "r4k_ixu_arith+r4k_ixu_mpydiv")
+
+;; All other integer insns.
+(define_insn_reservation "r4k_int_alu" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "arith,condmove,const,logical,move,nop,shift,signext,slt"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_branch" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "branch"))
+  "r4k_ixu_arith")
+
+(define_insn_reservation "r4k_int_jump_4k" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "jump,call"))
+  "r4k_ixu_arith")
+
+;; mfcx/mtcx - non FPU
+;; (Disabled until we add cop0 support)
+;; (define_insn_reservation "r4k_int_cop" 2
+;;   (and (eq_attr "cpu" "4kc,4kp")
+;;      (eq_attr "type" "cop0"))
+;;  "r4k_ixu_arith")
+
+;; Unknown or multi - single issue
+(define_insn_reservation "r4k_unknown" 1
+  (and (eq_attr "cpu" "4kc,4kp")
+       (eq_attr "type" "unknown,multi"))
+  "r4k_ixu_arith+r4k_ixu_mpydiv")
diff --git a/gcc/config/mips/5000.md b/gcc/config/mips/5000.md
new file mode 100644
index 000000000..0ad12ba7e
--- /dev/null
+++ b/gcc/config/mips/5000.md
@@ -0,0 +1,80 @@
+;; VR5000 pipeline description.
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r5k_load" 2
+  (and (eq_attr "cpu" "r5000")
+       (eq_attr "type" "load,fpload,fpidxload,mfc,mtc"))
+  "alu")
+
+(define_insn_reservation "r5k_imul_si" 5
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*5")
+
+(define_insn_reservation "r5k_imul_di" 9
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*9")
+
+(define_insn_reservation "r5k_idiv_si" 36
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "imuldiv*36")
+
+(define_insn_reservation "r5k_idiv_di" 68
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "imuldiv*68")
+
+(define_insn_reservation "r5k_fmove" 1
+  (and (eq_attr "cpu" "r5000")
+       (eq_attr "type" "fcmp,fabs,fneg,fmove"))
+  "alu")
+
+(define_insn_reservation "r5k_fmul_single" 4
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r5k_fmul_double" 5
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r5k_fdiv_single" 21
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r5k_fsqrt_double" 36
+  (and (eq_attr "cpu" "r5000")
+       (and (eq_attr "type" "fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc/config/mips/5400.md b/gcc/config/mips/5400.md
new file mode 100644
index 000000000..362999d7b
--- /dev/null
+++ b/gcc/config/mips/5400.md
@@ -0,0 +1,184 @@
+;; Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; DFA-based pipeline description for 5400
+(define_automaton "vr54")
+(define_cpu_unit "vr54_dp0"     "vr54")
+(define_cpu_unit "vr54_dp1"     "vr54")
+(define_cpu_unit "vr54_mem"     "vr54")
+(define_cpu_unit "vr54_mac"     "vr54")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+(define_insn_reservation "ir_vr54_unknown" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "unknown"))
+  "vr54_dp0+vr54_dp1+vr54_mem+vr54_mac")
+
+;; Assume prediction fails.
+(define_insn_reservation "ir_vr54_branch" 3
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "branch,jump,call"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_load" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "vr54_mem")
+
+(define_insn_reservation "ir_vr54_store" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "store"))
+  "vr54_mem")
+
+(define_insn_reservation "ir_vr54_fstore" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "vr54_mem")
+
+
+;; This reservation is for conditional move based on integer
+;; or floating point CC.
+(define_insn_reservation "ir_vr54_condmove" 4
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "condmove"))
+  "vr54_dp0|vr54_dp1")
+
+;; Move to/from FPU registers
+(define_insn_reservation "ir_vr54_xfer" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "mfc,mtc"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_hilo" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "mthilo,mfhilo"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_arith" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,nop,trap"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_imul_si" 3
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_imul_di" 4
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_imadd_si" 3
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "imul,imul3"))
+  "vr54_mac")
+
+(define_insn_reservation "ir_vr54_idiv_si" 42
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_idiv_di" 74
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fadd" 4
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fadd"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmul_sf" 5
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmul_df" 6
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmadd_sf" 9
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fmadd_df" 10
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fdiv_sf" 42
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fdiv_df" 72
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fabs" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fcmp" 2
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fcmp"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_fcvt" 6
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "fcvt"))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_frsqrt_sf" 61
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_frsqrt_df" 121
+  (and (eq_attr "cpu" "r5400")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "vr54_dp0|vr54_dp1")
+
+(define_insn_reservation "ir_vr54_multi" 1
+  (and (eq_attr "cpu" "r5400")
+       (eq_attr "type" "multi"))
+  "vr54_dp0+vr54_dp1+vr54_mem+vr54_mac")
diff --git a/gcc/config/mips/5500.md b/gcc/config/mips/5500.md
new file mode 100644
index 000000000..0b59af15d
--- /dev/null
+++ b/gcc/config/mips/5500.md
@@ -0,0 +1,227 @@
+;; Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; DFA-based pipeline description for 5500
+(define_automaton "vr55")
+(define_cpu_unit "vr55_dp0"     "vr55")
+(define_cpu_unit "vr55_dp1"     "vr55")
+(define_cpu_unit "vr55_mem"     "vr55")
+(define_cpu_unit "vr55_mac"     "vr55")
+(define_cpu_unit "vr55_fp"      "vr55")
+(define_cpu_unit "vr55_bru"     "vr55")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+(define_insn_reservation "ir_vr55_unknown" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "unknown"))
+  "vr55_dp0+vr55_dp1+vr55_mem+vr55_mac+vr55_fp+vr55_bru")
+
+;; Assume prediction fails.
+(define_insn_reservation "ir_vr55_branch" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "branch,jump,call"))
+  "vr55_bru")
+
+(define_insn_reservation "ir_vr55_load" 3
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "vr55_mem")
+
+(define_bypass 4
+  "ir_vr55_load"
+  "ir_vr55_mthilo,ir_vr55_imul_si,ir_vr55_imul_di,ir_vr55_imadd,
+   ir_vr55_idiv_si,ir_vr55_idiv_di")
+
+(define_insn_reservation "ir_vr55_store" 0
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "vr55_mem")
+
+;; This reservation is for conditional move based on integer
+;; or floating point CC.
+(define_insn_reservation "ir_vr55_condmove" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "condmove"))
+  "vr55_dp0|vr55_dp1")
+
+;; Move to/from FPU registers
+(define_insn_reservation "ir_vr55_xfer" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mfc,mtc"))
+  "vr55_dp0|vr55_dp1")
+
+(define_insn_reservation "ir_vr55_arith" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,nop,trap"))
+  "vr55_dp0|vr55_dp1")
+
+(define_bypass 2
+  "ir_vr55_arith"
+  "ir_vr55_mthilo,ir_vr55_imul_si,ir_vr55_imul_di,ir_vr55_imadd,
+   ir_vr55_idiv_si,ir_vr55_idiv_di")
+
+(define_insn_reservation "ir_vr55_mthilo" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mthilo"))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_mfhilo" 5
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "mfhilo"))
+  "vr55_mac")
+
+;; The default latency is for the GPR result of a mul.  Bypasses handle the
+;; latency of {mul,mult}->{mfhi,mflo}.
+(define_insn_reservation "ir_vr55_imul_si" 5
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "vr55_mac")
+
+;; The default latency is for pre-reload scheduling and handles the case
+;; where a pseudo destination will be stored in a GPR (as it usually is).
+;; The delay includes the latency of the dmult itself and the anticipated
+;; mflo or mfhi.
+;;
+;; Once the mflo or mfhi has been created, bypasses handle the latency
+;; between it and the dmult.
+(define_insn_reservation "ir_vr55_imul_di" 9
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "vr55_mac*4")
+
+;; The default latency is as for ir_vr55_imul_si.
+(define_insn_reservation "ir_vr55_imadd" 5
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "imadd"))
+  "vr55_mac")
+
+(define_bypass 1
+  "ir_vr55_imul_si,ir_vr55_imadd"
+  "ir_vr55_imadd"
+  "mips_linked_madd_p")
+
+(define_bypass 2
+  "ir_vr55_imul_si,ir_vr55_imadd"
+  "ir_vr55_mfhilo")
+
+(define_bypass 4
+  "ir_vr55_imul_di"
+  "ir_vr55_mfhilo")
+
+;; Divide algorithm is early out with best latency of 7 pcycles.
+;; Use worst case for scheduling purposes.
+(define_insn_reservation "ir_vr55_idiv_si" 42
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_idiv_di" 74
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fadd" 4
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "fadd"))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fmul_sf" 5
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fmul_df" 6
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmul")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fmadd_sf" 9
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fmadd_df" 10
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fmadd")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fdiv_sf" 30
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fdiv_df" 59
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_fabs" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fcmp" 2
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "fcmp"))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fcvt_sf" 4
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "mode" "SF")))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_fcvt_df" 6
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "mode" "DF")))
+  "vr55_fp")
+
+(define_insn_reservation "ir_vr55_frsqrt_sf" 60
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_frsqrt_df" 118
+  (and (eq_attr "cpu" "r5500")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "vr55_mac")
+
+(define_insn_reservation "ir_vr55_multi" 1
+  (and (eq_attr "cpu" "r5500")
+       (eq_attr "type" "multi"))
+  "vr55_dp0+vr55_dp1+vr55_mem+vr55_mac+vr55_fp+vr55_bru")
diff --git a/gcc/config/mips/5k.md b/gcc/config/mips/5k.md
new file mode 100644
index 000000000..ade06ec44
--- /dev/null
+++ b/gcc/config/mips/5k.md
@@ -0,0 +1,229 @@
+;; DFA-based pipeline descriptions for MIPS32 5K processor family
+;; Contributed by David Ung (davidu@mips.com)
+;;   and Nigel Stephens (nigel@mips.com)
+;;
+;; References:
+;;   "MIPS64 5K Processor Core Family Software User's Manual,
+;;     Doc no: MD00012, Rev 2.09, Jan 28, 2005."
+;;
+;; 5Kc - Single issue with no floating point unit.
+;; 5kf - Separate floating point pipe which can dual-issue with the
+;;       integer pipe.
+;;
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r5k_cpu, r5k_mdu, r5k_fpu")
+
+;; Integer execution unit.
+(define_cpu_unit "r5k_ixu_arith"       "r5k_cpu")
+(define_cpu_unit "r5k_ixu_mpydiv"      "r5k_mdu")
+(define_cpu_unit "r5kf_fpu_arith"      "r5k_fpu")
+
+(define_insn_reservation "r5k_int_load" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "load"))
+  "r5k_ixu_arith")
+
+(define_insn_reservation "r5k_int_prefetch" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "r5k_ixu_arith")
+
+(define_insn_reservation "r5k_int_store" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "store"))
+  "r5k_ixu_arith")
+
+;; Divides
+(define_insn_reservation "r5k_int_divsi" 34
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+ "r5k_ixu_arith+(r5k_ixu_mpydiv*34)")
+
+(define_insn_reservation "r5k_int_divdi" 66
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*66)")
+
+;; 32x32 multiply
+;; 32x16 is faster, but there's no way to detect this
+(define_insn_reservation "r5k_int_mult" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "SI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*2)")
+
+;; 64x64 multiply
+(define_insn_reservation "r5k_int_mult_64" 9
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "DI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*2)")
+
+;; 3 operand MUL 32x32
+(define_insn_reservation "r5k_int_mul" 4
+  (and (eq_attr "cpu" "5kc,5kf")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "r5k_ixu_arith+(r5k_ixu_mpydiv*2)")
+
+;; Move to HI/LO -> MADD/MSUB,MFHI/MFLO has a 1 cycle latency.
+(define_insn_reservation "r5k_int_mthilo" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "mthilo"))
+  "r5k_ixu_arith+r5k_ixu_mpydiv")
+
+;; Move from HI/LO -> integer operation has a 2 cycle latency.
+(define_insn_reservation "r5k_int_mfhilo" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "mfhilo"))
+  "r5k_ixu_arith+r5k_ixu_mpydiv")
+
+;; All other integer insns.
+(define_insn_reservation "r5k_int_alu" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "arith,condmove,const,logical,move,nop,shift,signext,slt"))
+  "r5k_ixu_arith")
+
+(define_insn_reservation "r5k_int_branch" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "branch"))
+  "r5k_ixu_arith")
+
+;; JR/JALR always cause one pipeline bubble because of interlock.
+(define_insn_reservation "r5k_int_jump" 2
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "jump,call"))
+  "r5k_ixu_arith")
+
+;; Any    -> JR/JALR (without dependency) : 1 clock issue delay
+;; Any    -> JR/JALR (with dependency)    : 2 clock issue delay
+;; load   -> JR/JALR (with dependency)    : 3 clock issue delay
+;; mfhilo -> JR/JALR (with dependency)    : 3 clock issue delay
+;; mul    -> JR/JALR (with dependency)    : 3 clock issue delay
+(define_bypass 2 "r5k_int_alu"    "r5k_int_jump")
+(define_bypass 3 "r5k_int_load"   "r5k_int_jump")
+(define_bypass 3 "r5k_int_mfhilo" "r5k_int_jump")
+(define_bypass 3 "r5k_int_mul"    "r5k_int_jump")
+
+;; Unknown or multi - single issue
+(define_insn_reservation "r5k_int_unknown" 1
+  (and (eq_attr "cpu" "5kc,5kf")
+       (eq_attr "type" "unknown,multi"))
+  "r5k_ixu_arith+r5k_ixu_mpydiv")
+
+
+;; Floating Point Instructions
+;; The 5Kf is a partial dual-issue cpu which can dual issue an integer
+;; and floating-point instruction in the same cycle.
+
+;; fadd, fabs, fneg
+(define_insn_reservation "r5kf_fadd" 4
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r5kf_fpu_arith")
+
+;; fmove, fcmove
+(define_insn_reservation "r5kf_fmove" 4
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fmove"))
+  "r5kf_fpu_arith")
+
+;; fload
+(define_insn_reservation "r5kf_fload" 3
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r5kf_fpu_arith")
+
+;; fstore
+(define_insn_reservation "r5kf_fstore" 1
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fpstore"))
+  "r5kf_fpu_arith")
+
+;; fmul, fmadd
+(define_insn_reservation "r5kf_fmul_sf" 4
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r5kf_fpu_arith")
+
+(define_insn_reservation "r5kf_fmul_df" 5
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r5kf_fpu_arith*2")
+
+;; fdiv, fsqrt, frsqrt
+(define_insn_reservation "r5kf_fdiv_sf" 17
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r5kf_fpu_arith*14")
+
+(define_insn_reservation "r5kf_fdiv_df" 32
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r5kf_fpu_arith*29")
+
+;; frsqrt
+(define_insn_reservation "r5kf_frsqrt_df" 35
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r5kf_fpu_arith*31")
+
+;; fcmp
+(define_insn_reservation "r5kf_fcmp" 2
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "fcmp"))
+  "r5kf_fpu_arith")
+
+;; fcmp -> movf.fmt & movt.fmt bypass (dependency must be on condition)
+(define_bypass 1 "r5kf_fcmp" "r5kf_fmove")
+
+;; fcvt (cvt.d.s, cvt.[sd].[wl]
+(define_insn_reservation "r5kf_fcvt_d2s" 4
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "I2S,I2D,S2D")))
+  "r5kf_fpu_arith")
+
+;; fcvt (cvt.s.d)
+(define_insn_reservation "r5kf_fcvt_s2d" 6
+  (and (eq_attr "cpu" "5kc")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "D2S")))
+  "r5kf_fpu_arith")
+
+;; fcvt (cvt.[wl].[sd], etc)
+(define_insn_reservation "r5kf_fcvt_f2i" 5
+  (and (eq_attr "cpu" "5kf")
+       (and (eq_attr "type" "fcvt")
+	    (eq_attr "cnv_mode" "S2I,D2I")))
+  "r5kf_fpu_arith")
+
+;; fxfer (mfc1, mfhc1, mtc1, mthc1) - single issue
+(define_insn_reservation "r5kf_fxfer" 2
+  (and (eq_attr "cpu" "5kf")
+       (eq_attr "type" "mfc,mtc"))
+  "r5k_ixu_arith+r5kf_fpu_arith")
diff --git a/gcc/config/mips/6000.md b/gcc/config/mips/6000.md
new file mode 100644
index 000000000..51730fb08
--- /dev/null
+++ b/gcc/config/mips/6000.md
@@ -0,0 +1,56 @@
+;; R6000 pipeline description.
+;;   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file overrides parts of generic.md.  It is derived from the
+;; old define_function_unit description.
+
+(define_insn_reservation "r6k_fcmp" 2
+  (and (eq_attr "cpu" "r6000")
+       (eq_attr "type" "fcmp"))
+  "alu")
+
+(define_insn_reservation "r6k_fadd" 3
+  (and (eq_attr "cpu" "r6000")
+       (eq_attr "type" "fadd"))
+  "alu")
+
+(define_insn_reservation "r6k_fmul_single" 5
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r6k_fmul_double" 6
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "alu")
+
+(define_insn_reservation "r6k_fdiv_single" 15
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "alu")
+
+(define_insn_reservation "r6k_fdiv_double" 16
+  (and (eq_attr "cpu" "r6000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "alu")
diff --git a/gcc/config/mips/7000.md b/gcc/config/mips/7000.md
new file mode 100644
index 000000000..6c91d0472
--- /dev/null
+++ b/gcc/config/mips/7000.md
@@ -0,0 +1,214 @@
+;; DFA-based pipeline description for the RM7000.
+;;   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; .........................
+;;
+;; The RM7000 is a dual-issue processor that can bundle instructions as:
+;; {arith|load|store}{arith|imul|idiv|branch|float}
+;;
+;; Reference:
+;;   "RM7000 Family User Manual, PMC-2002296"
+;;
+;; .........................
+
+;; Use three automata to isolate long latency operations, reducing space.
+(define_automaton "rm7000_other, rm7000_fdiv, rm7000_idiv")
+
+;;
+;; Describe the resources.
+;;
+
+;; Global
+(define_cpu_unit "rm7_iss0,rm7_iss1" "rm7000_other")
+
+;; Integer execution unit (M-Pipe).
+(define_cpu_unit "ixum_addsub_agen" "rm7000_other")
+
+;; Integer execution unit (F-Pipe).
+(define_cpu_unit "ixuf_addsub" "rm7000_other")
+(define_cpu_unit "ixuf_branch" "rm7000_other")
+(define_cpu_unit "ixuf_mpydiv" "rm7000_other")
+(define_cpu_unit "ixuf_mpydiv_iter" "rm7000_idiv")
+;; Floating-point unit (F-Pipe).
+(define_cpu_unit "fxuf_add" "rm7000_other")
+(define_cpu_unit "fxuf_mpy" "rm7000_other")
+(define_cpu_unit "fxuf_mpy_iter" "rm7000_fdiv")
+(define_cpu_unit "fxuf_divsqrt" "rm7000_other")
+(define_cpu_unit "fxuf_divsqrt_iter" "rm7000_fdiv")
+
+(exclusion_set "ixuf_addsub"
+	       "ixuf_branch,ixuf_mpydiv,fxuf_add,fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "ixuf_branch" "ixuf_mpydiv,fxuf_add,fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "ixuf_mpydiv" "fxuf_add,fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "fxuf_add" "fxuf_mpy,fxuf_divsqrt")
+(exclusion_set "fxuf_mpy" "fxuf_divsqrt")
+
+;; After branch any insn cannot be issued.
+(absence_set "rm7_iss0,rm7_iss1" "ixuf_branch")
+
+;;
+;; Define reservations for unit name mnemonics or combinations.
+;;
+
+(define_reservation "rm7_iss" "rm7_iss0|rm7_iss1")
+(define_reservation "rm7_single_dispatch" "rm7_iss0+rm7_iss1")
+
+(define_reservation "rm7_iaddsub" "rm7_iss+(ixum_addsub_agen|ixuf_addsub)")
+(define_reservation "rm7_imem" "rm7_iss+ixum_addsub_agen")
+(define_reservation "rm7_impydiv" "rm7_iss+ixuf_mpydiv")
+(define_reservation "rm7_impydiv_iter" "ixuf_mpydiv_iter")
+(define_reservation "rm7_branch" "rm7_iss+ixuf_branch")
+
+(define_reservation "rm7_fpadd"	"rm7_iss+fxuf_add")
+(define_reservation "rm7_fpmpy"	"rm7_iss+fxuf_mpy")
+(define_reservation "rm7_fpmpy_iter" "fxuf_mpy_iter")
+(define_reservation "rm7_fpdivsqr" "rm7_iss+fxuf_divsqrt")
+(define_reservation "rm7_fpdivsqr_iter" "fxuf_divsqrt_iter")
+
+;;
+;; Describe instruction reservations for integer operations.
+;;
+
+(define_insn_reservation "rm7_int_other" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,condmove,logical,move,nop,trap"))
+  "rm7_iaddsub")
+
+(define_insn_reservation "rm7_ld" 2
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "rm7_imem")
+
+(define_insn_reservation "rm7_st" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "rm7_imem")
+
+(define_insn_reservation "rm7_idiv_si" 36
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "rm7_impydiv+(rm7_impydiv_iter*36)")
+
+(define_insn_reservation "rm7_idiv_di" 68
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "rm7_impydiv+(rm7_impydiv_iter*68)")
+
+(define_insn_reservation "rm7_impy_si_mult" 5
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "imul,imadd")
+	    (eq_attr "mode" "SI")))
+  "rm7_impydiv+(rm7_impydiv_iter*3)")
+
+;; There are an additional 2 stall cycles.
+(define_insn_reservation "rm7_impy_si_mul" 2
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "imul3")
+	    (eq_attr "mode" "SI")))
+  "rm7_impydiv")
+
+(define_insn_reservation "rm7_impy_di" 9
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "DI")))
+  "rm7_impydiv+(rm7_impydiv_iter*8)")
+
+;; Move to/from HI/LO.
+(define_insn_reservation "rm7_mthilo" 3
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "mthilo"))
+  "rm7_impydiv")
+
+(define_insn_reservation "rm7_mfhilo" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "mfhilo"))
+  "rm7_impydiv")
+
+;; Move to/from fp coprocessor.
+(define_insn_reservation "rm7_ixfer" 2
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "mfc,mtc"))
+  "rm7_iaddsub")
+
+(define_insn_reservation "rm7_ibr" 3
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "branch,jump,call"))
+  "rm7_branch")
+
+;;
+;; Describe instruction reservations for the floating-point operations.
+;;
+(define_insn_reservation "rm7_fp_quick" 4
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "fneg,fcmp,fabs,fmove"))
+  "rm7_fpadd")
+
+(define_insn_reservation "rm7_fp_other" 4
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "fadd"))
+  "rm7_fpadd")
+
+(define_insn_reservation "rm7_fp_cvt" 4
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "fcvt"))
+  "rm7_fpadd")
+
+(define_insn_reservation "rm7_fp_divsqrt_df" 36
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*36)")
+
+(define_insn_reservation "rm7_fp_divsqrt_sf" 21
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*21)")
+
+(define_insn_reservation "rm7_fp_rsqrt_df" 68
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*68)")
+
+(define_insn_reservation "rm7_fp_rsqrt_sf" 38
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "rm7_fpdivsqr+(rm7_fpdivsqr_iter*38)")
+
+(define_insn_reservation "rm7_fp_mpy_sf" 4
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "rm7_fpmpy+rm7_fpmpy_iter")
+
+(define_insn_reservation "rm7_fp_mpy_df" 5
+  (and (eq_attr "cpu" "r7000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "rm7_fpmpy+(rm7_fpmpy_iter*2)")
+
+;; Force single-dispatch for unknown or multi.
+(define_insn_reservation "rm7_unknown" 1
+  (and (eq_attr "cpu" "r7000")
+       (eq_attr "type" "unknown,multi"))
+  "rm7_single_dispatch")
diff --git a/gcc/config/mips/74k.md b/gcc/config/mips/74k.md
new file mode 100644
index 000000000..b75bfc4b9
--- /dev/null
+++ b/gcc/config/mips/74k.md
@@ -0,0 +1,418 @@
+;; DFA-based pipeline description for MIPS32 model 74k.
+;; Contributed by MIPS Technologies and CodeSourcery.
+;;
+;; Reference:
+;;   "MIPS32 74K Microarchitecure Specification Rev. 01.02 Jun 15, 2006"
+;;   "MIPS32 74Kf Processor Core Datasheet Jun 2, 2006"
+;;
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "r74k_mdu_pipe, r74k_alu_pipe, r74k_agen_pipe, r74k_fpu")
+(define_cpu_unit "r74k_mul" "r74k_mdu_pipe")
+(define_cpu_unit "r74k_alu" "r74k_alu_pipe")
+(define_cpu_unit "r74k_agen" "r74k_agen_pipe")
+(define_cpu_unit "r74k_fpu_arith" "r74k_fpu")
+(define_cpu_unit "r74k_fpu_ldst" "r74k_fpu")
+
+;; --------------------------------------------------------------
+;; Producers
+;; --------------------------------------------------------------
+
+;; ALU: Logicals/Arithmetics
+;; - Logicals, move (addu/addiu with rt = 0), Set less than, 
+;;   sign extend - 1 cycle
+(define_insn_reservation "r74k_int_logical" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "logical,move,signext,slt"))
+  "r74k_alu")
+
+;; - Arithmetics - 2 cycles
+(define_insn_reservation "r74k_int_arith" 2
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "arith,const,shift,clz"))
+  "r74k_alu")
+
+(define_insn_reservation "r74k_int_nop" 0
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "nop"))
+  "nothing")
+
+(define_insn_reservation "r74k_int_cmove" 4
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "condmove"))
+  "r74k_agen*2")
+
+;; MDU: fully pipelined multiplier
+;; mult - delivers result to hi/lo in 4 cycle (pipelined)
+(define_insn_reservation "r74k_int_mult" 4
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "imul"))
+  "r74k_alu+r74k_mul")
+
+;; madd, msub - delivers result to hi/lo in 4 cycle (pipelined)
+(define_insn_reservation "r74k_int_madd" 4
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "imadd"))
+  "r74k_alu+r74k_mul")
+
+;; mul - delivers result to general register in 7 cycles
+(define_insn_reservation "r74k_int_mul3" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "imul3"))
+  "r74k_alu+r74k_mul")
+
+;; mfhi, mflo, mflhxu - deliver result to gpr in 7 cycles
+(define_insn_reservation "r74k_int_mfhilo" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "mfhilo"))
+  "r74k_alu+r74k_mul")
+
+;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass
+(define_insn_reservation "r74k_int_mthilo" 7
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "mthilo"))
+  "r74k_alu+r74k_mul")
+
+;; div - default to 50 cycles for 32bit operands.  Faster for 8 bit,
+;; but is tricky to identify.
+(define_insn_reservation "r74k_int_div" 50
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "idiv"))
+  "r74k_alu+r74k_mul*50")
+
+;; call
+(define_insn_reservation "r74k_int_call" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "call"))
+  "r74k_agen")
+
+;; branch/jump
+(define_insn_reservation "r74k_int_jump" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "branch,jump"))
+  "r74k_agen")
+
+;; loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs
+;; prefetch: prefetch, prefetchx
+(define_insn_reservation "r74k_int_load" 3 
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "load,prefetch,prefetchx"))
+  "r74k_agen")
+
+;; stores
+(define_insn_reservation "r74k_int_store" 1
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (and (eq_attr "type" "store")
+            (eq_attr "mode" "!unknown")))
+  "r74k_agen")
+
+
+;; Unknowns - Currently these include blockage, consttable and alignment
+;;            rtls.  They do not really affect scheduling latency, (blockage
+;;            affects scheduling via log links, but not used here).
+;;
+(define_insn_reservation "r74k_unknown" 1 
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "unknown"))
+  "r74k_alu")
+
+(define_insn_reservation "r74k_multi" 10
+  (and (eq_attr "cpu" "74kc,74kf2_1,74kf1_1,74kf3_2")
+       (eq_attr "type" "multi"))
+  "(r74k_alu+r74k_agen)*10")
+
+;; --------------------------------------------------------------
+;; Bypass to Consumer
+;; --------------------------------------------------------------
+
+;; load->next use :  3 cycles (Default)
+;; load->load base:  4 cycles
+;; load->store base: 4 cycles
+(define_bypass 4 "r74k_int_load" "r74k_int_load")
+(define_bypass 4 "r74k_int_load" "r74k_int_store" "!store_data_bypass_p")
+
+;; logical/move/slt/signext->next use :  1 cycles (Default)
+;; logical/move/slt/signext->load base:  2 cycles
+;; logical/move/slt/signext->store base: 2 cycles
+(define_bypass 2 "r74k_int_logical" "r74k_int_load")
+(define_bypass 2 "r74k_int_logical" "r74k_int_store" "!store_data_bypass_p")
+
+;; arith->next use :  2 cycles (Default)
+;; arith->load base:  3 cycles
+;; arith->store base: 3 cycles
+(define_bypass 3 "r74k_int_arith" "r74k_int_load")
+(define_bypass 3 "r74k_int_arith" "r74k_int_store" "!store_data_bypass_p")
+
+;; cmove->next use :  4 cycles (Default)
+;; cmove->load base:  5 cycles
+;; cmove->store base: 5 cycles
+(define_bypass 5 "r74k_int_cmove"  "r74k_int_load")
+(define_bypass 5 "r74k_int_cmove"  "r74k_int_store" "!store_data_bypass_p")
+
+;; mult/madd/msub->int_mfhilo  : 4 cycles (default)
+;; mult->madd/msub             : 1 cycles
+;; madd/msub->madd/msub        : 1 cycles
+(define_bypass 1 "r74k_int_mult,r74k_int_mul3" "r74k_int_madd"
+  "mips_linked_madd_p")
+(define_bypass 1 "r74k_int_madd" "r74k_int_madd"
+  "mips_linked_madd_p")
+
+;; --------------------------------------------------------------
+;; Floating Point Instructions
+;; --------------------------------------------------------------
+
+;; 74Kf FPU runs at 1:1 or 2:1 core/FPU clock ratio.
+
+;; fadd, fabs, fneg, 
+(define_insn_reservation "r74kf1_1_fadd" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fadd" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fadd" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fadd,fabs,fneg"))
+  "r74k_fpu_arith")
+
+;; fmove, fcmove
+(define_insn_reservation "r74kf1_1_fmove" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fmove"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fmove" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fmove"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fmove" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fmove"))
+  "r74k_fpu_arith")
+
+;; fload
+(define_insn_reservation "r74kf1_1_fload" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r74k_agen+r74k_fpu_ldst")
+
+(define_insn_reservation "r74kf2_1_fload" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r74k_agen+(r74k_fpu_ldst*2)")
+
+(define_insn_reservation "r74kf3_2_fload" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r74k_agen+r74k_fpu_ldst")
+
+;; fstore
+(define_insn_reservation "r74kf1_1_fstore" 1
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "r74k_agen+r74k_fpu_ldst")
+
+(define_insn_reservation "r74kf2_1_fstore" 2
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "r74k_agen+(r74k_fpu_ldst*2)")
+
+(define_insn_reservation "r74kf3_2_fstore" 1
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "r74k_agen+r74k_fpu_ldst")
+
+;; fmul, fmadd
+(define_insn_reservation "r74kf1_1_fmul_sf" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fmul_sf" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fmul_sf" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf1_1_fmul_df" 5
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf2_1_fmul_df" 10
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*4")
+
+(define_insn_reservation "r74kf3_2_fmul_df" 7
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*2")
+
+;; fdiv, fsqrt
+(define_insn_reservation "r74kf1_1_fdiv_sf" 17
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf2_1_fdiv_sf" 34
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*28")
+
+(define_insn_reservation "r74kf3_2_fdiv_sf" 25
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf1_1_fdiv_df" 32
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*29")
+
+(define_insn_reservation "r74kf2_1_fdiv_df" 64
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*58")
+
+(define_insn_reservation "r74kf3_2_fdiv_df" 48
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*29")
+
+;; frsqrt
+(define_insn_reservation "r74kf1_1_frsqrt_sf" 17
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf2_1_frsqrt_sf" 34
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*28")
+
+(define_insn_reservation "r74kf3_2_frsqrt_sf" 25
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "r74k_fpu_arith*14")
+
+(define_insn_reservation "r74kf1_1_frsqrt_df" 36
+  (and (eq_attr "cpu" "74kf1_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*31")
+
+(define_insn_reservation "r74kf2_1_frsqrt_df" 72
+  (and (eq_attr "cpu" "74kf2_1")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*62")
+
+(define_insn_reservation "r74kf3_2_frsqrt_df" 54
+  (and (eq_attr "cpu" "74kf3_2")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "r74k_fpu_arith*31")
+
+;; fcmp
+(define_insn_reservation "r74kf1_1_fcmp" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fcmp"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fcmp" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fcmp"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fcmp" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fcmp"))
+  "r74k_fpu_arith")
+
+;; fcvt
+(define_insn_reservation "r74kf1_1_fcvt" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "fcvt"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fcvt" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "fcvt"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fcvt" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "fcvt"))
+  "r74k_fpu_arith")
+
+;; fxfer (MTC1, DMTC1: latency is 4) (MFC1, DMFC1: latency is 1)
+(define_insn_reservation "r74kf1_1_fxfer_to_c1" 4
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "mtc"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fxfer_to_c1" 8
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "mtc"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fxfer_to_c1" 6
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "mtc"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf1_1_fxfer_from_c1" 1
+  (and (eq_attr "cpu" "74kf1_1")
+       (eq_attr "type" "mfc"))
+  "r74k_fpu_arith")
+
+(define_insn_reservation "r74kf2_1_fxfer_from_c1" 2
+  (and (eq_attr "cpu" "74kf2_1")
+       (eq_attr "type" "mfc"))
+  "r74k_fpu_arith*2")
+
+(define_insn_reservation "r74kf3_2_fxfer_from_c1" 1
+  (and (eq_attr "cpu" "74kf3_2")
+       (eq_attr "type" "mfc"))
+  "r74k_fpu_arith")
diff --git a/gcc/config/mips/9000.md b/gcc/config/mips/9000.md
new file mode 100644
index 000000000..c0c8d3ac8
--- /dev/null
+++ b/gcc/config/mips/9000.md
@@ -0,0 +1,151 @@
+;; DFA-based pipeline description for the RM9000.
+;;   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "rm9k_main, rm9k_imul, rm9k_fdiv")
+
+;; These units are for insns that can issue in either pipe.  We don't
+;; want to use constructs like "rm9k_m | rm9k_f_int" since that would
+;; needlessly make an insn prefer the M pipe.
+(define_cpu_unit "rm9k_any1" "rm9k_main")
+(define_cpu_unit "rm9k_any2" "rm9k_main")
+
+;; F and M pipe units, for instructions that must be issued by a
+;; particular pipe.  Split the F pipe into two units so that integer
+;; instructions can issue while the FPU is busy.  We don't need to
+;; split M because it is only ever reserved for a single cycle.
+(define_cpu_unit "rm9k_m" "rm9k_main")
+(define_cpu_unit "rm9k_f_int" "rm9k_main")
+(define_cpu_unit "rm9k_f_float" "rm9k_main")
+
+(exclusion_set "rm9k_f_int" "rm9k_f_float")
+
+;; Multiply/divide units.
+(define_cpu_unit "rm9k_imul" "rm9k_imul")
+(define_cpu_unit "rm9k_fdiv" "rm9k_fdiv")
+
+(define_insn_reservation "rm9k_load" 3
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "load,fpload,fpidxload"))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_store" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_int" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,nop,trap"))
+  "rm9k_any1 | rm9k_any2")
+
+(define_insn_reservation "rm9k_int_cmove" 2
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "condmove")
+	    (eq_attr "mode" "SI,DI")))
+  "rm9k_any1 | rm9k_any2")
+
+;; This applies to both 'mul' and 'mult'.
+(define_insn_reservation "rm9k_mulsi" 3
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "!DI")))
+  "rm9k_f_int")
+
+(define_insn_reservation "rm9k_muldi" 7
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "rm9k_f_int + rm9k_imul * 7")
+
+(define_insn_reservation "rm9k_divsi" 38
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "!DI")))
+  "rm9k_f_int + rm9k_imul * 38")
+
+(define_insn_reservation "rm9k_divdi" 70
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "rm9k_f_int + rm9k_imul * 70")
+
+(define_insn_reservation "rm9k_mfhilo" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "mfhilo"))
+  "rm9k_f_int")
+
+(define_insn_reservation "rm9k_mthilo" 5
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "mthilo"))
+  "rm9k_f_int")
+
+(define_insn_reservation "rm9k_xfer" 2
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "mfc,mtc"))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_fquick" 2
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "fabs,fneg,fcmp,fmove"))
+  "rm9k_f_float")
+
+(define_insn_reservation "rm9k_fcmove" 2
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "condmove")
+	    (eq_attr "mode" "SF,DF")))
+  "rm9k_m")
+
+(define_insn_reservation "rm9k_fadd" 6
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "fadd,fcvt"))
+  "rm9k_f_float")
+
+(define_insn_reservation "rm9k_fmuls" 6
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "rm9k_f_float")
+
+(define_insn_reservation "rm9k_fmuld" 9
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "rm9k_f_float * 3")
+
+(define_insn_reservation "rm9k_fdivs" 22
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "SF")))
+  "rm9k_f_float + rm9k_fdiv * 22")
+
+(define_insn_reservation "rm9k_fdivd" 37
+  (and (eq_attr "cpu" "r9000")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+	    (eq_attr "mode" "DF")))
+  "rm9k_f_float + rm9k_fdiv * 37")
+
+(define_insn_reservation "rm9k_branch" 2
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "branch,jump,call"))
+  "rm9k_any1 | rm9k_any2")
+
+(define_insn_reservation "rm9k_unknown" 1
+  (and (eq_attr "cpu" "r9000")
+       (eq_attr "type" "unknown,multi"))
+  "rm9k_m + rm9k_f_int + rm9k_any1 + rm9k_any2")
diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md
new file mode 100644
index 000000000..37eee875a
--- /dev/null
+++ b/gcc/config/mips/constraints.md
@@ -0,0 +1,233 @@
+;; Constraint definitions for MIPS.
+;; Copyright (C) 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+(define_register_constraint "d" "BASE_REG_CLASS"
+  "An address register.  This is equivalent to @code{r} unless
+   generating MIPS16 code.")
+
+(define_register_constraint "t" "T_REG"
+  "@internal")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+  "A floating-point register (if available).")
+
+(define_register_constraint "h" "NO_REGS"
+  "Formerly the @code{hi} register.  This constraint is no longer supported.")
+
+(define_register_constraint "l" "TARGET_BIG_ENDIAN ? MD1_REG : MD0_REG"
+  "The @code{lo} register.  Use this register to store values that are
+   no bigger than a word.")
+
+(define_register_constraint "x" "MD_REGS"
+  "The concatenated @code{hi} and @code{lo} registers.  Use this register
+   to store doubleword values.")
+
+(define_register_constraint "b" "ALL_REGS"
+  "@internal")
+
+;; MIPS16 code always calls through a MIPS16 register; see mips_emit_call_insn
+;; for details.
+(define_register_constraint "c" "TARGET_MIPS16 ? M16_REGS
+				 : TARGET_USE_PIC_FN_ADDR_REG ? PIC_FN_ADDR_REG
+				 : GR_REGS"
+  "A register suitable for use in an indirect jump.  This will always be
+   @code{$25} for @option{-mabicalls}.")
+
+(define_register_constraint "e" "LEA_REGS"
+  "@internal")
+
+(define_register_constraint "j" "PIC_FN_ADDR_REG"
+  "@internal")
+
+;; Don't use this constraint in gcc code!  It runs the risk of
+;; introducing a spill failure; see tls_get_tp_<mode>.
+(define_register_constraint "v" "V1_REG"
+  "Register @code{$3}.  Do not use this constraint in new code;
+   it is retained only for compatibility with glibc.")
+
+(define_register_constraint "y" "GR_REGS"
+  "Equivalent to @code{r}; retained for backwards compatibility.")
+
+(define_register_constraint "z" "ST_REGS"
+  "A floating-point condition code register.")
+
+(define_register_constraint "A" "DSP_ACC_REGS"
+  "@internal")
+
+(define_register_constraint "a" "ACC_REGS"
+  "@internal")
+
+(define_register_constraint "B" "COP0_REGS"
+  "@internal")
+
+(define_register_constraint "C" "COP2_REGS"
+  "@internal")
+
+(define_register_constraint "D" "COP3_REGS"
+  "@internal")
+
+;; Registers that can be used as the target of multiply-accumulate
+;; instructions.  The core MIPS32 ISA provides a hi/lo madd,
+;; but the DSP version allows any accumulator target.
+(define_register_constraint "ka" "ISA_HAS_DSP_MULT ? ACC_REGS : MD_REGS")
+
+(define_constraint "kf"
+  "@internal"
+  (match_operand 0 "force_to_mem_operand"))
+
+;; This is a normal rather than a register constraint because we can
+;; never use the stack pointer as a reload register.
+(define_constraint "ks"
+  "@internal"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == STACK_POINTER_REGNUM")))
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant (for arithmetic instructions)."
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (ival)")))
+
+(define_constraint "J"
+  "Integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "An unsigned 16-bit constant (for logic instructions)."
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND_UNSIGNED (ival)")))
+ 
+(define_constraint "L"
+  "A signed 32-bit constant in which the lower 16 bits are zero.
+   Such constants can be loaded using @code{lui}."
+  (and (match_code "const_int")
+       (match_test "LUI_OPERAND (ival)")))
+
+(define_constraint "M"
+  "A constant that cannot be loaded using @code{lui}, @code{addiu}
+   or @code{ori}."
+  (and (match_code "const_int")
+       (match_test "!SMALL_OPERAND (ival)")
+       (match_test "!SMALL_OPERAND_UNSIGNED (ival)")
+       (match_test "!LUI_OPERAND (ival)")))
+
+(define_constraint "N"
+  "A constant in the range -65535 to -1 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "ival >= -0xffff && ival < 0")))
+
+(define_constraint "O"
+  "A signed 15-bit constant."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x4000 && ival < 0x4000")))
+
+(define_constraint "P"
+  "A constant in the range 1 to 65535 (inclusive)."
+  (and (match_code "const_int")
+       (match_test "ival > 0 && ival < 0x10000")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; General constraints
+
+(define_constraint "Q"
+  "@internal"
+  (match_operand 0 "const_arith_operand"))
+
+(define_memory_constraint "R"
+  "An address that can be used in a non-macro load or store."
+  (and (match_code "mem")
+       (match_test "mips_address_insns (XEXP (op, 0), mode, false) == 1")))
+
+(define_constraint "S"
+  "@internal
+   A constant call address."
+  (and (match_operand 0 "call_insn_operand")
+       (match_test "CONSTANT_P (op)")))
+
+(define_constraint "T"
+  "@internal
+   A constant @code{move_operand} that cannot be safely loaded into @code{$25}
+   using @code{la}."
+  (and (match_operand 0 "move_operand")
+       (match_test "CONSTANT_P (op)")
+       (match_test "mips_dangerous_for_la25_p (op)")))
+
+(define_constraint "U"
+  "@internal
+   A constant @code{move_operand} that can be safely loaded into @code{$25}
+   using @code{la}."
+  (and (match_operand 0 "move_operand")
+       (match_test "CONSTANT_P (op)")
+       (match_test "!mips_dangerous_for_la25_p (op)")))
+
+(define_memory_constraint "W"
+  "@internal
+   A memory address based on a member of @code{BASE_REG_CLASS}.  This is
+   true for all non-mips16 references (although it can sometimes be implicit
+   if @samp{!TARGET_EXPLICIT_RELOCS}).  For MIPS16, it excludes stack and
+   constant-pool references."
+  (and (match_code "mem")
+       (match_operand 0 "memory_operand")
+       (ior (match_test "!TARGET_MIPS16")
+	    (and (not (match_operand 0 "stack_operand"))
+		 (not (match_test "CONSTANT_P (XEXP (op, 0))"))))))
+
+(define_constraint "YG"
+  "@internal
+   A vector zero."
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "YA"
+  "@internal
+   An unsigned 6-bit constant."
+  (and (match_code "const_int")
+       (match_test "UIMM6_OPERAND (ival)")))
+
+(define_constraint "YB"
+  "@internal
+   A signed 10-bit constant."
+  (and (match_code "const_int")
+       (match_test "IMM10_OPERAND (ival)")))
+
+(define_constraint "Yb"
+   "@internal"
+   (match_operand 0 "qi_mask_operand"))
+
+(define_constraint "Yh"
+   "@internal"
+    (match_operand 0 "hi_mask_operand"))
+
+(define_constraint "Yw"
+   "@internal"
+    (match_operand 0 "si_mask_operand"))
+
+(define_constraint "Yx"
+   "@internal"
+   (match_operand 0 "low_bitmask_operand"))
diff --git a/gcc/config/mips/crtfastmath.c b/gcc/config/mips/crtfastmath.c
new file mode 100644
index 000000000..a9586b0a7
--- /dev/null
+++ b/gcc/config/mips/crtfastmath.c
@@ -0,0 +1,53 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc. 
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License
+   and a copy of the GCC Runtime Library Exception along with this
+   program; see the files COPYING3 and COPYING.RUNTIME respectively.
+   If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef __mips_hard_float
+
+/* Flush denormalized numbers to zero.  */
+#define _FPU_FLUSH_TZ   0x1000000
+
+/* Rounding control.  */
+#define _FPU_RC_NEAREST 0x0     /* RECOMMENDED */
+#define _FPU_RC_ZERO    0x1
+#define _FPU_RC_UP      0x2
+#define _FPU_RC_DOWN    0x3
+
+/* Enable interrupts for IEEE exceptions.  */
+#define _FPU_IEEE     0x00000F80
+
+/* Macros for accessing the hardware control word.  */
+#define _FPU_GETCW(cw) __asm__ ("cfc1 %0,$31" : "=r" (cw))
+#define _FPU_SETCW(cw) __asm__ ("ctc1 %0,$31" : : "r" (cw))
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+  unsigned int fcr;
+
+  /* Flush to zero, round to nearest, IEEE exceptions disabled.  */
+  fcr = _FPU_FLUSH_TZ | _FPU_RC_NEAREST;
+
+  _FPU_SETCW(fcr);
+}
+
+#endif /* __mips_hard_float */
diff --git a/gcc/config/mips/crti.asm b/gcc/config/mips/crti.asm
new file mode 100644
index 000000000..ac04271c5
--- /dev/null
+++ b/gcc/config/mips/crti.asm
@@ -0,0 +1,49 @@
+/* Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* 4 slots for argument spill area.  1 for cpreturn, 1 for stack.
+   Return spill offset of 40 and 20.  Aligned to 16 bytes for n32.  */
+
+	.section .init,"ax",@progbits
+	.globl	_init
+	.type	_init,@function
+_init:
+#ifdef __mips64
+	daddu   $sp,$sp,-48
+	sd      $31,40($sp)
+#else
+	addu	$sp,$sp,-32
+	sw	$31,20($sp)
+#endif
+
+	.section .fini,"ax",@progbits
+	.globl	_fini
+	.type	_fini,@function
+_fini:
+#ifdef __mips64
+	daddu   $sp,$sp,-48
+	sd      $31,40($sp)
+#else
+	addu	$sp,$sp,-32
+	sw	$31,20($sp)
+#endif
diff --git a/gcc/config/mips/crtn.asm b/gcc/config/mips/crtn.asm
new file mode 100644
index 000000000..03a6b68c9
--- /dev/null
+++ b/gcc/config/mips/crtn.asm
@@ -0,0 +1,52 @@
+/* Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* 4 slots for argument spill area.  1 for cpreturn, 1 for stack.
+   Return spill offset of 40 and 20.  Aligned to 16 bytes for n32.  */
+
+#ifdef	__mips16
+#define RA $7
+#else
+#define RA $31
+#endif
+
+	.section .init,"ax",@progbits
+#ifdef __mips64
+	ld      RA,40($sp)
+	daddu	$sp,$sp,48
+#else
+	lw	RA,20($sp)
+	addu	$sp,$sp,32
+#endif
+	j	RA
+
+	.section .fini,"ax",@progbits
+#ifdef	__mips64
+	ld	RA,40($sp)
+	daddu	$sp,$sp,48
+#else
+	lw	RA,20($sp)
+	addu	$sp,$sp,32
+#endif
+	j	RA
+
diff --git a/gcc/config/mips/driver-native.c b/gcc/config/mips/driver-native.c
new file mode 100644
index 000000000..1947d7198
--- /dev/null
+++ b/gcc/config/mips/driver-native.c
@@ -0,0 +1,81 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch" or "tune" as argument depending on if -march=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-march=loongson2f" on a Loongson 2F for
+   -march=native.  If the routine can't detect a known processor,
+   the -march or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu = NULL;
+  char buf[128];
+  FILE *f;
+  bool arch;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = strcmp (argv[0], "arch") == 0;
+  if (!arch && strcmp (argv[0], "tune"))
+    return NULL;
+
+  f = fopen ("/proc/cpuinfo", "r");
+  if (f == NULL)
+    return NULL;
+
+  while (fgets (buf, sizeof (buf), f) != NULL)
+    if (strncmp (buf, "cpu model", sizeof ("cpu model") - 1) == 0)
+      {
+	if (strstr (buf, "Godson2 V0.2") != NULL
+	    || strstr (buf, "Loongson-2 V0.2") != NULL)
+	  cpu = "loongson2e";
+	else if (strstr (buf, "Godson2 V0.3") != NULL
+		 || strstr (buf, "Loongson-2 V0.3") != NULL)
+	  cpu = "loongson2f";
+	else if (strstr (buf, "SiByte SB1") != NULL)
+	  cpu = "sb1";
+	else if (strstr (buf, "R5000") != NULL)
+	  cpu = "r5000";
+	else if (strstr (buf, "Octeon") != NULL)
+	  cpu = "octeon";
+	break;
+      }
+
+  fclose (f);
+
+  if (cpu == NULL)
+    return NULL;
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
diff --git a/gcc/config/mips/elf.h b/gcc/config/mips/elf.h
new file mode 100644
index 000000000..572553742
--- /dev/null
+++ b/gcc/config/mips/elf.h
@@ -0,0 +1,51 @@
+/* Target macros for mips*-elf targets.
+   Copyright (C) 1994, 1997, 1999, 2000, 2002, 2003, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* MIPS assemblers don't have the usual .set foo,bar construct;
+   .set is used for assembler options instead.  */
+#undef SET_ASM_OP
+#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2)			\
+  do								\
+    {								\
+      fputc ('\t', FILE);					\
+      assemble_name (FILE, LABEL1);				\
+      fputs (" = ", FILE);					\
+      assemble_name (FILE, LABEL2);				\
+      fputc ('\n', FILE);					\
+    }								\
+  while (0)
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME mips_declare_object_name
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object
+
+/* Leave the linker script to choose the appropriate libraries.  */
+#undef  LIB_SPEC
+#define LIB_SPEC ""
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
+
+#define NO_IMPLICIT_EXTERN_C 1
diff --git a/gcc/config/mips/elfoabi.h b/gcc/config/mips/elfoabi.h
new file mode 100644
index 000000000..a3d92bf87
--- /dev/null
+++ b/gcc/config/mips/elfoabi.h
@@ -0,0 +1,40 @@
+/* Target macros for mips*-elf targets that selected between o32 and o64
+   based on the target architecture.
+   Copyright (C) 1994, 1997, 1999, 2000, 2002, 2003, 2004, 2007, 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS						\
+  /* Make sure a -mips option is present.  This helps us to pick	\
+     the right multilib, and also makes the later specs easier		\
+     to write.  */							\
+  MIPS_ISA_LEVEL_SPEC,							\
+									\
+  /* If no ABI option is specified, infer one from the ISA level	\
+     or -mgp setting.  */						\
+  "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=o64}}",	\
+									\
+  /* Remove a redundant -mfp64 for -mabi=o64; we want the !mfp64	\
+     multilibs.  There's no need to check whether the architecture	\
+     is 64-bit; cc1 will complain if it isn't.  */			\
+  "%{mabi=o64: %<mfp64}",						\
+									\
+  /* Configuration-independent MIPS rules.*/				\
+  BASE_DRIVER_SELF_SPECS
+
diff --git a/gcc/config/mips/elforion.h b/gcc/config/mips/elforion.h
new file mode 100644
index 000000000..5560580c2
--- /dev/null
+++ b/gcc/config/mips/elforion.h
@@ -0,0 +1,20 @@
+/* Definitions of target machine for GNU compiler.  MIPS ORION version.
+   Copyright (C) 1994, 1998, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define MIPS_CPU_STRING_DEFAULT "orion"
diff --git a/gcc/config/mips/generic.md b/gcc/config/mips/generic.md
new file mode 100644
index 000000000..d61511f33
--- /dev/null
+++ b/gcc/config/mips/generic.md
@@ -0,0 +1,105 @@
+;; Generic DFA-based pipeline description for MIPS targets
+;;   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This file is derived from the old define_function_unit description.
+;; Each reservation can be overridden on a processor-by-processor basis.
+
+(define_insn_reservation "generic_alu" 1
+  (eq_attr "type" "unknown,prefetch,prefetchx,condmove,const,arith,
+		   shift,slt,clz,trap,multi,nop,logical,signext,move")
+  "alu")
+
+(define_insn_reservation "generic_load" 3
+  (eq_attr "type" "load,fpload,fpidxload")
+  "alu")
+
+(define_insn_reservation "generic_store" 1
+  (eq_attr "type" "store,fpstore,fpidxstore")
+  "alu")
+
+(define_insn_reservation "generic_xfer" 2
+  (eq_attr "type" "mfc,mtc")
+  "alu")
+
+(define_insn_reservation "generic_branch" 1
+  (eq_attr "type" "branch,jump,call")
+  "alu")
+
+(define_insn_reservation "generic_hilo" 1
+  (eq_attr "type" "mfhilo,mthilo")
+  "imuldiv*3")
+
+(define_insn_reservation "generic_imul" 17
+  (eq_attr "type" "imul,imul3,imadd")
+  "imuldiv*17")
+
+(define_insn_reservation "generic_idiv" 38
+  (eq_attr "type" "idiv")
+  "imuldiv*38")
+
+(define_insn_reservation "generic_fcvt" 1
+  (eq_attr "type" "fcvt")
+  "alu")
+
+(define_insn_reservation "generic_fmove" 2
+  (eq_attr "type" "fabs,fneg,fmove")
+  "alu")
+
+(define_insn_reservation "generic_fcmp" 3
+  (eq_attr "type" "fcmp")
+  "alu")
+
+(define_insn_reservation "generic_fadd" 4
+  (eq_attr "type" "fadd")
+  "alu")
+
+(define_insn_reservation "generic_fmul_single" 7
+  (and (eq_attr "type" "fmul,fmadd")
+       (eq_attr "mode" "SF"))
+  "alu")
+
+(define_insn_reservation "generic_fmul_double" 8
+  (and (eq_attr "type" "fmul,fmadd")
+       (eq_attr "mode" "DF"))
+  "alu")
+
+(define_insn_reservation "generic_fdiv_single" 23
+  (and (eq_attr "type" "fdiv,frdiv")
+       (eq_attr "mode" "SF"))
+  "alu")
+
+(define_insn_reservation "generic_fdiv_double" 36
+  (and (eq_attr "type" "fdiv,frdiv")
+       (eq_attr "mode" "DF"))
+  "alu")
+
+(define_insn_reservation "generic_fsqrt_single" 54
+  (and (eq_attr "type" "fsqrt,frsqrt")
+       (eq_attr "mode" "SF"))
+  "alu")
+
+(define_insn_reservation "generic_fsqrt_double" 112
+  (and (eq_attr "type" "fsqrt,frsqrt")
+       (eq_attr "mode" "DF"))
+  "alu")
+
+(define_insn_reservation "generic_frecip_fsqrt_step" 5
+  (eq_attr "type" "frdiv1,frdiv2,frsqrt1,frsqrt2")
+  "alu")
diff --git a/gcc/config/mips/iris6.h b/gcc/config/mips/iris6.h
new file mode 100644
index 000000000..edf8020f5
--- /dev/null
+++ b/gcc/config/mips/iris6.h
@@ -0,0 +1,341 @@
+/* Definitions of target machine for GNU compiler.  IRIX 6.5 version.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 2000,
+   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We are compiling for IRIX 6 now.  */
+#undef TARGET_IRIX6
+#define TARGET_IRIX6 1
+
+#undef MACHINE_TYPE
+#define MACHINE_TYPE "SGI running IRIX 6.5"
+
+/* Default to -mabi=n32 and -mips3.  */
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mabi=n32" }
+
+/* Force the default ABI onto the command line in order to make the specs
+   easier to write.  */
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS 			\
+  "%{!mabi=*: -mabi=n32}", 			\
+  /* Configuration-independent MIPS rules.  */	\
+  BASE_DRIVER_SELF_SPECS
+
+/* IRIX 6.5 has the float and long double forms of math functions.  */
+#define TARGET_C99_FUNCTIONS 1
+
+/* MIPS specific debugging info */
+#define MIPS_DEBUGGING_INFO 1
+
+/* Force the generation of dwarf .debug_frame sections even if not
+   compiling -g.  This guarantees that we can unwind the stack.  */
+#define DWARF2_FRAME_INFO 1
+
+/* The system unwinder in libexc requires a specific dwarf return address
+   column to work.  */
+#undef  DWARF_FRAME_RETURN_COLUMN
+#define DWARF_FRAME_RETURN_COLUMN (FP_REG_LAST + 1)
+
+/* The size in bytes of a DWARF field indicating an offset or length
+   relative to a debug info section, specified to be 4 bytes in the DWARF-2
+   specification.  The SGI/MIPS ABI defines it to be the same as PTR_SIZE.  */
+#define DWARF_OFFSET_SIZE PTR_SIZE
+
+/* The size in bytes of the initial length field in a debug info
+   section.  The DWARF 3 (draft) specification defines this to be
+   either 4 or 12 (with a 4-byte "escape" word when it's 12), but the
+   SGI/MIPS ABI predates this standard and defines it to be the same
+   as DWARF_OFFSET_SIZE.  */
+#define DWARF_INITIAL_LENGTH_SIZE DWARF_OFFSET_SIZE
+
+/* MIPS assemblers don't have the usual .set foo,bar construct;
+   .set is used for assembler options instead.  */
+#undef SET_ASM_OP
+#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2)			\
+  do								\
+    {								\
+      fputc ('\t', FILE);					\
+      assemble_name (FILE, LABEL1);				\
+      fputs (" = ", FILE);					\
+      assemble_name (FILE, LABEL2);				\
+      fputc ('\n', FILE);					\
+    }								\
+  while (0)
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX (TARGET_NEWABI ? "." : "$")
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME mips_declare_object_name
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT mips_finish_declare_object
+
+/* The native IRIX 6 linker does not support merging without a special
+   elspec(5) file.  */
+#ifndef IRIX_USING_GNU_LD
+#undef HAVE_GAS_SHF_MERGE
+#define HAVE_GAS_SHF_MERGE 0
+#endif
+
+/* Specify wchar_t types.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (Pmode == DImode ? "int" : "long int")
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE INT_TYPE_SIZE
+
+/* Same for wint_t.  */
+#undef WINT_TYPE
+#define WINT_TYPE (Pmode == DImode ? "int" : "long int")
+
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 32
+
+/* C99 stdint.h types.  */
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE "long long int"
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE "long long unsigned int"
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE "long long int"
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE "long long unsigned int"
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE "long long int"
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE "long long unsigned int"
+
+#define INTMAX_TYPE "long long int"
+#define UINTMAX_TYPE "long long unsigned int"
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* Plain char is unsigned in the SGI compiler.  */
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 0
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+      builtin_define_std ("host_mips");				\
+      builtin_define_std ("sgi");				\
+      builtin_define_std ("unix");				\
+      builtin_define_std ("SYSTYPE_SVR4");			\
+      builtin_define ("_MODERN_C");				\
+      builtin_define ("_SVR4_SOURCE");				\
+      builtin_define ("__DSO__");				\
+      builtin_assert ("system=unix");				\
+      builtin_assert ("system=svr4");				\
+      builtin_assert ("machine=sgi");				\
+								\
+      if (!ISA_MIPS1 && !ISA_MIPS2)				\
+	builtin_define ("_COMPILER_VERSION=601");		\
+								\
+      /* We must always define _LONGLONG, even when -ansi is	\
+	 used, because IRIX 5 system header files require it.	\
+	 This is OK, because gcc never warns when long long	\
+	 is used in system header files.			\
+								\
+	 An alternative would be to support the SGI builtin	\
+	 type __long_long.  */					\
+      builtin_define ("_LONGLONG");				\
+								\
+      /* IRIX 6.5.18 and above provide many ISO C99		\
+	 features protected by the __c99 macro.			\
+	 libstdc++ v3 needs them as well.  */			\
+      if (TARGET_IRIX6)						\
+	if (flag_isoc99 || c_dialect_cxx ())			\
+	  builtin_define ("__c99");				\
+								\
+      /* The GNU C++ standard library requires that		\
+	 __EXTENSIONS__ and _SGI_SOURCE be defined on at	\
+	 least IRIX 6.2 and probably all IRIX 6 prior to 6.5.	\
+	 We don't need this on IRIX 6.5 itself, but it		\
+	 shouldn't hurt other than the namespace pollution.  */	\
+      if (!flag_iso || (TARGET_IRIX6 && c_dialect_cxx ()))	\
+	{							\
+	  builtin_define ("__EXTENSIONS__");			\
+	  builtin_define ("_SGI_SOURCE");			\
+	}							\
+    }								\
+  while (0)
+
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC "%{static: -mno-abicalls}"
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP "\t.section\t.gcc_init,\"ax\",@progbits"
+
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP "\t.section\t.gcc_fini,\"ax\",@progbits"
+
+#ifdef IRIX_USING_GNU_LD
+#define IRIX_NO_UNRESOLVED ""
+#else
+#define IRIX_NO_UNRESOLVED "-no_unresolved"
+#endif
+
+#ifdef IRIX_USING_GNU_LD
+#define SUBTARGET_DONT_WARN_UNUSED_SPEC ""
+#define SUBTARGET_WARN_UNUSED_SPEC ""
+#else
+#define SUBTARGET_DONT_WARN_UNUSED_SPEC "-dont_warn_unused"
+#define SUBTARGET_WARN_UNUSED_SPEC "-warn_unused"
+#endif
+
+/* Profiling is supported via libprof1.a not -lc_p as in IRIX 3.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: \
+     %{mabi=n32: \
+       %{mips4:%{pg:/usr/lib32/mips4/gcrt1.o%s} \
+         %{!pg:%{p:/usr/lib32/mips4/mcrt1.o%s /usr/lib32/mips4/libprof1.a%s} \
+           %{!p:/usr/lib32/mips4/crt1.o%s}}} \
+       %{!mips4:%{pg:/usr/lib32/mips3/gcrt1.o%s} \
+         %{!pg:%{p:/usr/lib32/mips3/mcrt1.o%s /usr/lib32/mips3/libprof1.a%s} \
+           %{!p:/usr/lib32/mips3/crt1.o%s}}}} \
+     %{mabi=64: \
+       %{mips4:%{pg:/usr/lib64/mips4/gcrt1.o} \
+         %{!pg:%{p:/usr/lib64/mips4/mcrt1.o /usr/lib64/mips4/libprof1.a} \
+           %{!p:/usr/lib64/mips4/crt1.o}}} \
+       %{!mips4:%{pg:/usr/lib64/mips3/gcrt1.o} \
+         %{!pg:%{p:/usr/lib64/mips3/mcrt1.o /usr/lib64/mips3/libprof1.a} \
+           %{!p:/usr/lib64/mips3/crt1.o}}}}} \
+  irix-crti.o%s crtbegin.o%s"
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{mabi=n32: %{mips4:-L/usr/lib32/mips4} %{!mips4:-L/usr/lib32/mips3} \
+     -L/usr/lib32} \
+   %{mabi=64: %{mips4:-L/usr/lib64/mips4} %{!mips4:-L/usr/lib64/mips3} \
+     -L/usr/lib64} \
+   %{!shared:" \
+     SUBTARGET_DONT_WARN_UNUSED_SPEC \
+     " %{pthread:-lpthread} %{p:libprof1.a%s}%{pg:libprof1.a%s} -lc " \
+     SUBTARGET_WARN_UNUSED_SPEC "}"
+
+/* Avoid getting two warnings for libgcc.a everytime we link.  libgcc.a
+   contains references to copysignl, so link with libm to resolve them.  */
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC \
+  SUBTARGET_DONT_WARN_UNUSED_SPEC " -lgcc -lm " SUBTARGET_WARN_UNUSED_SPEC
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "crtend.o%s irix-crtn.o%s \
+   %{!shared: \
+     %{mabi=n32:%{mips4:/usr/lib32/mips4/crtn.o%s}\
+       %{!mips4:/usr/lib32/mips3/crtn.o%s}}\
+     %{mabi=64:%{mips4:/usr/lib64/mips4/crtn.o%s}\
+       %{!mips4:/usr/lib64/mips3/crtn.o%s}}}"
+
+/* Generic part of the LINK_SPEC.  */
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%{G*} %{EB} %{EL} %{mips1} %{mips2} %{mips3} %{mips4} \
+%{bestGnum} %{shared} %{non_shared} \
+%{call_shared} %{no_archive} %{exact_version} \
+%{!shared: \
+  %{!non_shared: %{!call_shared:%{!r: -call_shared " IRIX_NO_UNRESOLVED "}}}} \
+%{rpath} %{!r: -init __gcc_init -fini __gcc_fini} " IRIX_SUBTARGET_LINK_SPEC
+
+#ifdef IRIX_USING_GNU_LD
+#define IRIX_SUBTARGET_LINK_SPEC \
+  "%{mabi=n32: -melf32bmipn32}%{mabi=64: -melf64bmip}"
+#else
+  /* Explicitly hide crt symbols that would normally be marked with
+     a "hidden" visibility attribute.
+     
+     We have traditionally disabled this attribute when using the
+     native linker because the native linker's visibility support is
+     not fully-compatible with the GNU linker's.  In particular, the
+     native linker does not pull in archive objects purely to resolve
+     references to the object's hidden symbols, whereas the GNU
+     linker does.
+     
+     The gcc build system currently hides symbols in some static
+     libraries (typically libgcov.a or libgcc.a) whenever visibility
+     attributes are supported.  On targets with GNU semantics, this
+     makes sure that uses of libx.so symbols in one dynamic object are
+     not resolved to libx.a symbols in another dynamic object.  But
+     on targets with IRIX semantics, hiding the symbols prevents the
+     static archive from working at all.
+     
+     It would probably be better to enable visiblity attributes for
+     IRIX ld and disable the static archives versioning.  It shouldn't
+     make anything worse, since libx.a symbols are global by default
+     anyway.  However, no-one has volunteered to do this yet.  */
+
+#define IRIX_SUBTARGET_LINK_SPEC \
+  "%{w} -_SYSTYPE_SVR4 -woff 131 \
+   %{shared:-hidden_symbol __dso_handle} \
+   %{mabi=n32: -n32}%{mabi=64: -64}%{!mabi*: -n32}"
+#endif
+
+/* A linker error can empirically be avoided by removing duplicate
+   library search directories.  */
+#define LINK_ELIMINATE_DUPLICATE_LDIRECTORIES 1
+
+/* The SGI linker doesn't understand constructor priorities.  */
+#ifndef IRIX_USING_GNU_LD
+#define SUPPORTS_INIT_PRIORITY 0
+#endif
+
+/* Add -g to mips.h default to avoid confusing gas with local symbols
+   generated from stabs info.  */
+#undef NM_FLAGS
+#define NM_FLAGS "-Bng"
+
+/* The system header files are C++ aware.  */
+/* ??? Unfortunately, most but not all of the headers are C++ aware.
+   Specifically, curses.h is not, and as a consequence, defining this
+   used to prevent libg++ building.  This is no longer the case so
+   define it again to prevent other problems, e.g. with getopt in
+   unistd.h.  We still need some way to fix just those files that need
+   fixing.  */
+#define NO_IMPLICIT_EXTERN_C 1
+
+/* -G is incompatible with -KPIC which is the default, so only allow objects
+   in the small data section if the user explicitly asks for it.  */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+#define MIPS_TFMODE_FORMAT mips_extended_format
diff --git a/gcc/config/mips/iris6.opt b/gcc/config/mips/iris6.opt
new file mode 100644
index 000000000..05fc378c5
--- /dev/null
+++ b/gcc/config/mips/iris6.opt
@@ -0,0 +1,45 @@
+; IRIX 6.5 options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+bestGnum
+Driver
+
+call_shared
+Driver
+
+exact_version
+Driver
+
+no_archive
+Driver
+
+non_shared
+Driver
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/mips/irix-crti.asm b/gcc/config/mips/irix-crti.asm
new file mode 100644
index 000000000..0e52e6194
--- /dev/null
+++ b/gcc/config/mips/irix-crti.asm
@@ -0,0 +1,81 @@
+/* Copyright (C) 2004, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.abicalls
+	.set	noreorder
+	.set	nomacro
+
+/* The GNU and SGI linkers differ in their implementation of -init and -fini.
+   With the GNU linker, there can only be a single -init option, and the
+   linker simply sets DT_INIT to that value.  gcc's initialization and
+   finalization code can go directly in .init, with the prologue and
+   epilogue of the main initialization routine being provided by external
+   object files (*crti.o and *crtn.o in this case).
+
+   The SGI linker instead accepts several -init options.  It will set DT_INIT
+   to a linker-created function (placed in .init) that calls each of the -init
+   functions in turn.  If there is any user code in .init, this linker-created
+   function will be placed after it.  Note that such user code is not treated
+   specially; it will only be called if the -init options arrange for it to
+   be called.
+
+   In theory, the SGI model should allow the crti, crtn and intermediate code
+   to go in .init, just like it can with the GNU linker.  However, doing this
+   seems to confuse the linker and triggers an internal error:
+
+      ld32: FATAL   2  : Internal: at ../../ld/mips_code.c mips_code_fixup()
+	 text section overflow!
+
+   (seen with MIPSpro 7.30).  We therefore put everything in a special
+   .gcc_init section instead.  */
+
+	.section .gcc_init,"ax",@progbits
+	.globl	__gcc_init
+__gcc_init:
+#if _MIPS_SIM == _ABIO32
+	addiu	$sp,$sp,-16
+	sw	$31,0($sp)
+#else
+	daddiu	$sp,$sp,-16
+	sd	$31,0($sp)
+	sd	$28,8($sp)
+#endif
+
+	.section .gcc_fini,"ax",@progbits
+	.globl	__gcc_fini
+__gcc_fini:
+#if _MIPS_SIM == _ABIO32
+	addiu	$sp,$sp,-16
+	sw	$31,0($sp)
+#else
+	daddiu	$sp,$sp,-16
+	sd	$31,0($sp)
+	sd	$28,8($sp)
+#endif
+
+/* This object will typically be included in the final link for both
+   shared libraries and executable, and we need to hide the symbols to
+   prevent possible symbol preemption warnings from the SGI linker.  */
+.hidden __gcc_init
+.hidden __gcc_fini
+
diff --git a/gcc/config/mips/irix-crtn.asm b/gcc/config/mips/irix-crtn.asm
new file mode 100644
index 000000000..7c28c6ff4
--- /dev/null
+++ b/gcc/config/mips/irix-crtn.asm
@@ -0,0 +1,50 @@
+/* Copyright (C) 2004 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.abicalls
+	.set	noreorder
+	.set	nomacro
+
+	.section .gcc_init,"ax",@progbits
+#if _MIPS_SIM == _ABIO32
+	lw	$31,0($sp)
+	jr	$31
+	addiu	$sp,$sp,16
+#else
+	ld	$31,0($sp)
+	ld	$28,8($sp)
+	jr	$31
+	daddiu	$sp,$sp,16
+#endif
+
+	.section .gcc_fini,"ax",@progbits
+#if _MIPS_SIM == _ABIO32
+	lw	$31,0($sp)
+	jr	$31
+	addiu	$sp,$sp,16
+#else
+	ld	$31,0($sp)
+	ld	$28,8($sp)
+	jr	$31
+	daddiu	$sp,$sp,16
+#endif
diff --git a/gcc/config/mips/libgcc-mips16.ver b/gcc/config/mips/libgcc-mips16.ver
new file mode 100644
index 000000000..ddb23e7e7
--- /dev/null
+++ b/gcc/config/mips/libgcc-mips16.ver
@@ -0,0 +1,86 @@
+# Copyright (C) 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+GCC_4.4.0 {
+  __mips16_addsf3
+  __mips16_subsf3
+  __mips16_mulsf3
+  __mips16_divsf3
+  __mips16_eqsf2
+  __mips16_nesf2
+  __mips16_gtsf2
+  __mips16_gesf2
+  __mips16_lesf2
+  __mips16_ltsf2
+  __mips16_floatsisf
+  __mips16_floatunsisf
+  __mips16_fix_truncsfsi
+  __mips16_adddf3
+  __mips16_subdf3
+  __mips16_muldf3
+  __mips16_divdf3
+  __mips16_extendsfdf2
+  __mips16_truncdfsf2
+  __mips16_eqdf2
+  __mips16_nedf2
+  __mips16_gtdf2
+  __mips16_gedf2
+  __mips16_ledf2
+  __mips16_ltdf2
+  __mips16_floatsidf
+  __mips16_floatunsidf
+  __mips16_fix_truncdfsi
+  __mips16_ret_sf
+  __mips16_ret_sc
+  __mips16_ret_df
+  __mips16_ret_dc
+  __mips16_call_stub_1
+  __mips16_call_stub_5
+  __mips16_call_stub_2
+  __mips16_call_stub_6
+  __mips16_call_stub_9
+  __mips16_call_stub_10
+  __mips16_call_stub_sf_0
+  __mips16_call_stub_sf_1
+  __mips16_call_stub_sf_5
+  __mips16_call_stub_sf_2
+  __mips16_call_stub_sf_6
+  __mips16_call_stub_sf_9
+  __mips16_call_stub_sf_10
+  __mips16_call_stub_sc_0
+  __mips16_call_stub_sc_1
+  __mips16_call_stub_sc_5
+  __mips16_call_stub_sc_2
+  __mips16_call_stub_sc_6
+  __mips16_call_stub_sc_9
+  __mips16_call_stub_sc_10
+  __mips16_call_stub_df_0
+  __mips16_call_stub_df_1
+  __mips16_call_stub_df_5
+  __mips16_call_stub_df_2
+  __mips16_call_stub_df_6
+  __mips16_call_stub_df_9
+  __mips16_call_stub_df_10
+  __mips16_call_stub_dc_0
+  __mips16_call_stub_dc_1
+  __mips16_call_stub_dc_5
+  __mips16_call_stub_dc_2
+  __mips16_call_stub_dc_6
+  __mips16_call_stub_dc_9
+  __mips16_call_stub_dc_10
+}
diff --git a/gcc/config/mips/linux-unwind.h b/gcc/config/mips/linux-unwind.h
new file mode 100644
index 000000000..094ff58cb
--- /dev/null
+++ b/gcc/config/mips/linux-unwind.h
@@ -0,0 +1,121 @@
+/* DWARF2 EH unwinding support for MIPS Linux.
+   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Free Software
+   Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef inhibit_libc
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <signal.h>
+#include <asm/unistd.h>
+
+/* The third parameter to the signal handler points to something with
+ * this structure defined in asm/ucontext.h, but the name clashes with
+ * struct ucontext from sys/ucontext.h so this private copy is used.  */
+typedef struct _sig_ucontext {
+    unsigned long         uc_flags;
+    struct _sig_ucontext  *uc_link;
+    stack_t               uc_stack;
+    struct sigcontext uc_mcontext;
+    sigset_t      uc_sigmask;
+} _sig_ucontext_t;
+
+#define MD_FALLBACK_FRAME_STATE_FOR mips_fallback_frame_state
+
+static _Unwind_Reason_Code
+mips_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  u_int32_t *pc = (u_int32_t *) context->ra;
+  struct sigcontext *sc;
+  _Unwind_Ptr new_cfa, reg_offset;
+  int i;
+
+  /* 24021061 li v0, 0x1061 (rt_sigreturn)*/
+  /* 0000000c syscall    */
+  /*    or */
+  /* 24021017 li v0, 0x1017 (sigreturn) */
+  /* 0000000c syscall  */
+  if (pc[1] != 0x0000000c)
+    return _URC_END_OF_STACK;
+#if _MIPS_SIM == _ABIO32
+  if (pc[0] == (0x24020000 | __NR_sigreturn))
+    {
+      struct sigframe {
+	u_int32_t ass[4];  /* Argument save space for o32.  */
+	u_int32_t trampoline[2];
+	struct sigcontext sigctx;
+      } *rt_ = context->cfa;
+      sc = &rt_->sigctx;
+    }
+  else
+#endif
+  if (pc[0] == (0x24020000 | __NR_rt_sigreturn))
+    {
+      struct rt_sigframe {
+	u_int32_t ass[4];  /* Argument save space for o32.  */
+	u_int32_t trampoline[2];
+	siginfo_t info;
+	_sig_ucontext_t uc;
+      } *rt_ = context->cfa;
+      sc = &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = (_Unwind_Ptr) sc;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
+  fs->regs.cfa_offset = new_cfa - (_Unwind_Ptr) context->cfa;
+
+  /* On o32 Linux, the register save slots in the sigcontext are
+     eight bytes.  We need the lower half of each register slot,
+     so slide our view of the structure back four bytes.  */
+#if _MIPS_SIM == _ABIO32 && defined __MIPSEB__
+  reg_offset = 4;
+#else
+  reg_offset = 0;
+#endif
+
+  for (i = 0; i < 32; i++) {
+    fs->regs.reg[i].how = REG_SAVED_OFFSET;
+    fs->regs.reg[i].loc.offset
+      = (_Unwind_Ptr)&(sc->sc_regs[i]) + reg_offset - new_cfa;
+  }
+  /* "PC & -2" points to the faulting instruction, but the unwind code
+     searches for "(ADDR & -2) - 1".  (See MASK_RETURN_ADDR for the source
+     of the -2 mask.)  Adding 2 here ensures that "(ADDR & -2) - 1" is the
+     address of the second byte of the faulting instruction.
+
+     Note that setting fs->signal_frame would not work.  As the comment
+     above MASK_RETURN_ADDR explains, MIPS unwinders must earch for an
+     odd-valued address.  */
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_VAL_OFFSET;
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset
+    = (_Unwind_Ptr)(sc->sc_pc) + 2 - new_cfa;
+  fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN;
+
+  return _URC_NO_REASON;
+}
+#endif
diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h
new file mode 100644
index 000000000..a78f6bcbb
--- /dev/null
+++ b/gcc/config/mips/linux.h
@@ -0,0 +1,151 @@
+/* Definitions for MIPS running Linux-based GNU systems with ELF format.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME mips_declare_object_name
+
+#undef TARGET_VERSION
+#if TARGET_ENDIAN_DEFAULT == 0
+#define TARGET_VERSION fprintf (stderr, " (MIPSel GNU/Linux with ELF)");
+#else
+#define TARGET_VERSION fprintf (stderr, " (MIPS GNU/Linux with ELF)");
+#endif
+
+/* If we don't set MASK_ABICALLS, we can't default to PIC.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_ABICALLS
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do {								\
+    LINUX_TARGET_OS_CPP_BUILTINS();				\
+    /* The GNU C++ standard library requires this.  */		\
+    if (c_dialect_cxx ())					\
+      builtin_define ("_GNU_SOURCE");				\
+  } while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* A standard GNU/Linux mapping.  On most targets, it is included in
+   CC1_SPEC itself by config/linux.h, but mips.h overrides CC1_SPEC
+   and provides this hook instead.  */
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC "%{profile:-p}"
+
+/* From iris5.h */
+/* -G is incompatible with -KPIC which is the default, so only allow objects
+   in the small data section if the user explicitly asks for it.  */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+/* Borrowed from sparc/linux.h */
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%(endian_spec) \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC \
+  "%{!mno-abicalls:%{mplt:-call_nonpic;:-KPIC}}"
+
+/* The MIPS assembler has different syntax for .set. We set it to
+   .dummy to trap any errors.  */
+#undef SET_ASM_OP
+#define SET_ASM_OP "\t.dummy\t"
+
+#undef ASM_OUTPUT_DEF
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {									\
+	fputc ( '\t', FILE);						\
+	assemble_name (FILE, LABEL1);					\
+	fputs ( " = ", FILE);						\
+	assemble_name (FILE, LABEL2);					\
+	fputc ( '\n', FILE);						\
+ } while (0)
+
+/* The glibc _mcount stub will save $v0 for us.  Don't mess with saving
+   it, since ASM_OUTPUT_REG_PUSH/ASM_OUTPUT_REG_POP do not work in the
+   presence of $gp-relative calls.  */
+#undef ASM_OUTPUT_REG_PUSH
+#undef ASM_OUTPUT_REG_POP
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+%{pthread:-lpthread} \
+%{shared:-lc} \
+%{!shared: \
+  %{profile:-lc_p} %{!profile:-lc}}"
+
+#define MD_UNWIND_SUPPORT "config/mips/linux-unwind.h"
+
+#ifdef HAVE_AS_NO_SHARED
+/* Default to -mno-shared for non-PIC.  */
+# define NO_SHARED_SPECS \
+  "%{mshared|mno-shared|fpic|fPIC|fpie|fPIE:;:-mno-shared}"
+#else
+# define NO_SHARED_SPECS ""
+#endif
+
+/* -march=native handling only makes sense with compiler running on
+   a MIPS chip.  */
+#if defined(__mips__)
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+# define EXTRA_SPEC_FUNCTIONS \
+  { "local_cpu_detect", host_detect_local_cpu },
+
+# define MARCH_MTUNE_NATIVE_SPECS				\
+  " %{march=native:%<march=native %:local_cpu_detect(arch)}"	\
+  " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+# define MARCH_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define LINUX_DRIVER_SELF_SPECS \
+  NO_SHARED_SPECS							\
+  MARCH_MTUNE_NATIVE_SPECS,						\
+  /* -mplt has no effect without -mno-shared.  Simplify later		\
+     specs handling by removing a redundant option.  */			\
+  "%{!mno-shared:%<mplt}",						\
+  /* -mplt likewise has no effect for -mabi=64 without -msym32.  */	\
+  "%{mabi=64:%{!msym32:%<mplt}}"
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+  BASE_DRIVER_SELF_SPECS, \
+  LINUX_DRIVER_SELF_SPECS
+
+/* Similar to standard Linux, but adding -ffast-math support.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
diff --git a/gcc/config/mips/linux64.h b/gcc/config/mips/linux64.h
new file mode 100644
index 000000000..987a9915c
--- /dev/null
+++ b/gcc/config/mips/linux64.h
@@ -0,0 +1,70 @@
+/* Definitions for MIPS running Linux-based GNU systems with ELF format
+   using n32/64 abi.
+   Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Force the default endianness and ABI flags onto the command line
+   in order to make the other specs easier to write.  */
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+  BASE_DRIVER_SELF_SPECS, \
+  LINUX_DRIVER_SELF_SPECS \
+  " %{!EB:%{!EL:%(endian_spec)}}" \
+  " %{!mabi=*: -mabi=n32}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+%{pthread:-lpthread} \
+%{shared:-lc} \
+%{!shared: \
+  %{profile:-lc_p} %{!profile:-lc}}"
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld.so.1"
+#define GLIBC_DYNAMIC_LINKERN32 "/lib32/ld.so.1"
+#define UCLIBC_DYNAMIC_LINKERN32 "/lib32/ld-uClibc.so.0"
+#define BIONIC_DYNAMIC_LINKERN32 "/system/bin/linker32"
+#define LINUX_DYNAMIC_LINKERN32 \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKERN32, UCLIBC_DYNAMIC_LINKERN32, \
+			 BIONIC_DYNAMIC_LINKERN32)
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%{G*} %{EB} %{EL} %{mips1} %{mips2} %{mips3} %{mips4} \
+%{shared} \
+ %(endian_spec) \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      %{mabi=n32: -dynamic-linker " LINUX_DYNAMIC_LINKERN32 "} \
+      %{mabi=64: -dynamic-linker " LINUX_DYNAMIC_LINKER64 "} \
+      %{mabi=32: -dynamic-linker " LINUX_DYNAMIC_LINKER32 "}} \
+    %{static:-static}} \
+%{mabi=n32:-melf32%{EB:b}%{EL:l}tsmipn32} \
+%{mabi=64:-melf64%{EB:b}%{EL:l}tsmip} \
+%{mabi=32:-melf32%{EB:b}%{EL:l}tsmip}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX (TARGET_OLDABI ? "$" : ".")
+
+/* GNU/Linux doesn't use the same floating-point format that IRIX uses
+   for long double.  There's no need to override this here, since
+   ieee_quad_format is the default, but let's put this here to make
+   sure nobody thinks we just forgot to set it to something else.  */
+#define MIPS_TFMODE_FORMAT mips_quad_format
diff --git a/gcc/config/mips/loongson.h b/gcc/config/mips/loongson.h
new file mode 100644
index 000000000..6bfd4d7e5
--- /dev/null
+++ b/gcc/config/mips/loongson.h
@@ -0,0 +1,690 @@
+/* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
+
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by CodeSourcery.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_LOONGSON_H
+#define _GCC_LOONGSON_H
+
+#if !defined(__mips_loongson_vector_rev)
+# error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* Vectors of unsigned bytes, halfwords and words.  */
+typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
+typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
+typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
+
+/* Vectors of signed bytes, halfwords and words.  */
+typedef int8_t int8x8_t __attribute__((vector_size (8)));
+typedef int16_t int16x4_t __attribute__((vector_size (8)));
+typedef int32_t int32x2_t __attribute__((vector_size (8)));
+
+/* SIMD intrinsics.
+   Unless otherwise noted, calls to the functions below will expand into
+   precisely one machine instruction, modulo any moves required to
+   satisfy register allocation constraints.  */
+
+/* Pack with signed saturation.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+packsswh (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_packsswh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+packsshb (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_packsshb (s, t);
+}
+
+/* Pack with unsigned saturation.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+packushb (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_packushb (s, t);
+}
+
+/* Vector addition, treating overflow by wraparound.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+paddw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_paddw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+paddh_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_paddh_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+paddb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_paddb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+paddw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_paddw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+paddh_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_paddh_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+paddb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_paddb_s (s, t);
+}
+
+/* Addition of doubleword integers, treating overflow by wraparound.  */
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+paddd_u (uint64_t s, uint64_t t)
+{
+  return __builtin_loongson_paddd_u (s, t);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+paddd_s (int64_t s, int64_t t)
+{
+  return __builtin_loongson_paddd_s (s, t);
+}
+
+/* Vector addition, treating overflow by signed saturation.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+paddsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_paddsh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+paddsb (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_paddsb (s, t);
+}
+
+/* Vector addition, treating overflow by unsigned saturation.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+paddush (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_paddush (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+paddusb (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_paddusb (s, t);
+}
+
+/* Logical AND NOT.  */
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+pandn_ud (uint64_t s, uint64_t t)
+{
+  return __builtin_loongson_pandn_ud (s, t);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+pandn_uw (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pandn_uw (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pandn_uh (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pandn_uh (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pandn_ub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pandn_ub (s, t);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+pandn_sd (int64_t s, int64_t t)
+{
+  return __builtin_loongson_pandn_sd (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pandn_sw (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_pandn_sw (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pandn_sh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pandn_sh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pandn_sb (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_pandn_sb (s, t);
+}
+
+/* Average.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pavgh (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pavgh (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pavgb (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pavgb (s, t);
+}
+
+/* Equality test.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+pcmpeqw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pcmpeqw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pcmpeqh_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pcmpeqh_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pcmpeqb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pcmpeqb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pcmpeqw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_pcmpeqw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pcmpeqh_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pcmpeqh_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pcmpeqb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_pcmpeqb_s (s, t);
+}
+
+/* Greater-than test.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+pcmpgtw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pcmpgtw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pcmpgth_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pcmpgth_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pcmpgtb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pcmpgtb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pcmpgtw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_pcmpgtw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pcmpgth_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pcmpgth_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pcmpgtb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_pcmpgtb_s (s, t);
+}
+
+/* Extract halfword.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pextrh_u (uint16x4_t s, int field /* 0--3 */)
+{
+  return __builtin_loongson_pextrh_u (s, field);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pextrh_s (int16x4_t s, int field /* 0--3 */)
+{
+  return __builtin_loongson_pextrh_s (s, field);
+}
+
+/* Insert halfword.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_0_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_0_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_1_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_1_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_2_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_2_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pinsrh_3_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pinsrh_3_u (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_0_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_0_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_1_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_1_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_2_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_2_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pinsrh_3_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pinsrh_3_s (s, t);
+}
+
+/* Multiply and add.  */
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+pmaddhw (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmaddhw (s, t);
+}
+
+/* Maximum of signed halfwords.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pmaxsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmaxsh (s, t);
+}
+
+/* Maximum of unsigned bytes.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pmaxub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pmaxub (s, t);
+}
+
+/* Minimum of signed halfwords.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pminsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pminsh (s, t);
+}
+
+/* Minimum of unsigned bytes.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pminub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pminub (s, t);
+}
+
+/* Move byte mask.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pmovmskb_u (uint8x8_t s)
+{
+  return __builtin_loongson_pmovmskb_u (s);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+pmovmskb_s (int8x8_t s)
+{
+  return __builtin_loongson_pmovmskb_s (s);
+}
+
+/* Multiply unsigned integers and store high result.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pmulhuh (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_pmulhuh (s, t);
+}
+
+/* Multiply signed integers and store high result.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pmulhh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmulhh (s, t);
+}
+
+/* Multiply signed integers and store low result.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pmullh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_pmullh (s, t);
+}
+
+/* Multiply unsigned word integers.  */
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+pmuluw (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_pmuluw (s, t);
+}
+
+/* Absolute difference.  */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+pasubub (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_pasubub (s, t);
+}
+
+/* Sum of unsigned byte integers.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+biadd (uint8x8_t s)
+{
+  return __builtin_loongson_biadd (s);
+}
+
+/* Sum of absolute differences.
+   Note that this intrinsic expands into two machine instructions:
+   PASUBUB followed by BIADD.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psadbh (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_psadbh (s, t);
+}
+
+/* Shuffle halfwords.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
+{
+  return __builtin_loongson_pshufh_u (dest, s, order);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
+{
+  return __builtin_loongson_pshufh_s (dest, s, order);
+}
+
+/* Shift left logical.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psllh_u (uint16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllh_u (s, amount);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psllh_s (int16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllh_s (s, amount);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psllw_u (uint32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllw_u (s, amount);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psllw_s (int32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psllw_s (s, amount);
+}
+
+/* Shift right logical.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psrlh_u (uint16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlh_u (s, amount);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psrlh_s (int16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlh_s (s, amount);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psrlw_u (uint32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlw_u (s, amount);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psrlw_s (int32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrlw_s (s, amount);
+}
+
+/* Shift right arithmetic.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psrah_u (uint16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrah_u (s, amount);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psrah_s (int16x4_t s, uint8_t amount)
+{
+  return __builtin_loongson_psrah_s (s, amount);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psraw_u (uint32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psraw_u (s, amount);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psraw_s (int32x2_t s, uint8_t amount)
+{
+  return __builtin_loongson_psraw_s (s, amount);
+}
+
+/* Vector subtraction, treating overflow by wraparound.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+psubw_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_psubw_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psubh_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_psubh_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+psubb_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_psubb_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+psubw_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_psubw_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psubh_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_psubh_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+psubb_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_psubb_s (s, t);
+}
+
+/* Subtraction of doubleword integers, treating overflow by wraparound.  */
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+psubd_u (uint64_t s, uint64_t t)
+{
+  return __builtin_loongson_psubd_u (s, t);
+}
+
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+psubd_s (int64_t s, int64_t t)
+{
+  return __builtin_loongson_psubd_s (s, t);
+}
+
+/* Vector subtraction, treating overflow by signed saturation.  */
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+psubsh (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_psubsh (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+psubsb (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_psubsb (s, t);
+}
+
+/* Vector subtraction, treating overflow by unsigned saturation.  */
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+psubush (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_psubush (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+psubusb (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_psubusb (s, t);
+}
+
+/* Unpack high data.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+punpckhwd_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_punpckhwd_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+punpckhhw_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_punpckhhw_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+punpckhbh_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_punpckhbh_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+punpckhwd_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_punpckhwd_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+punpckhhw_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_punpckhhw_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+punpckhbh_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_punpckhbh_s (s, t);
+}
+
+/* Unpack low data.  */
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+punpcklwd_u (uint32x2_t s, uint32x2_t t)
+{
+  return __builtin_loongson_punpcklwd_u (s, t);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+punpcklhw_u (uint16x4_t s, uint16x4_t t)
+{
+  return __builtin_loongson_punpcklhw_u (s, t);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+punpcklbh_u (uint8x8_t s, uint8x8_t t)
+{
+  return __builtin_loongson_punpcklbh_u (s, t);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+punpcklwd_s (int32x2_t s, int32x2_t t)
+{
+  return __builtin_loongson_punpcklwd_s (s, t);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+punpcklhw_s (int16x4_t s, int16x4_t t)
+{
+  return __builtin_loongson_punpcklhw_s (s, t);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+punpcklbh_s (int8x8_t s, int8x8_t t)
+{
+  return __builtin_loongson_punpcklbh_s (s, t);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md
new file mode 100644
index 000000000..225f4d16d
--- /dev/null
+++ b/gcc/config/mips/loongson.md
@@ -0,0 +1,529 @@
+;; Machine description for Loongson-specific patterns, such as
+;; ST Microelectronics Loongson-2E/2F etc.
+;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_LOONGSON_PAVG
+  UNSPEC_LOONGSON_PCMPEQ
+  UNSPEC_LOONGSON_PCMPGT
+  UNSPEC_LOONGSON_PEXTR
+  UNSPEC_LOONGSON_PINSR_0
+  UNSPEC_LOONGSON_PINSR_1
+  UNSPEC_LOONGSON_PINSR_2
+  UNSPEC_LOONGSON_PINSR_3
+  UNSPEC_LOONGSON_PMADD
+  UNSPEC_LOONGSON_PMOVMSK
+  UNSPEC_LOONGSON_PMULHU
+  UNSPEC_LOONGSON_PMULH
+  UNSPEC_LOONGSON_PMULU
+  UNSPEC_LOONGSON_PASUBUB
+  UNSPEC_LOONGSON_BIADD
+  UNSPEC_LOONGSON_PSADBH
+  UNSPEC_LOONGSON_PSHUFH
+  UNSPEC_LOONGSON_PUNPCKH
+  UNSPEC_LOONGSON_PUNPCKL
+  UNSPEC_LOONGSON_PADDD
+  UNSPEC_LOONGSON_PSUBD
+])
+
+;; Mode iterators and attributes.
+
+;; 64-bit vectors of bytes.
+(define_mode_iterator VB [V8QI])
+
+;; 64-bit vectors of halfwords.
+(define_mode_iterator VH [V4HI])
+
+;; 64-bit vectors of words.
+(define_mode_iterator VW [V2SI])
+
+;; 64-bit vectors of halfwords and bytes.
+(define_mode_iterator VHB [V4HI V8QI])
+
+;; 64-bit vectors of words and halfwords.
+(define_mode_iterator VWH [V2SI V4HI])
+
+;; 64-bit vectors of words, halfwords and bytes.
+(define_mode_iterator VWHB [V2SI V4HI V8QI])
+
+;; 64-bit vectors of words, halfwords and bytes; and DImode.
+(define_mode_iterator VWHBDI [V2SI V4HI V8QI DI])
+
+;; The Loongson instruction suffixes corresponding to the modes in the
+;; VWHBDI iterator.
+(define_mode_attr V_suffix [(V2SI "w") (V4HI "h") (V8QI "b") (DI "d")])
+
+;; Given a vector type T, the mode of a vector half the size of T
+;; and with the same number of elements.
+(define_mode_attr V_squash [(V2SI "V2HI") (V4HI "V4QI")])
+
+;; Given a vector type T, the mode of a vector the same size as T
+;; but with half as many elements.
+(define_mode_attr V_stretch_half [(V2SI "DI") (V4HI "V2SI") (V8QI "V4HI")])
+
+;; The Loongson instruction suffixes corresponding to the transformation
+;; expressed by V_stretch_half.
+(define_mode_attr V_stretch_half_suffix [(V2SI "wd") (V4HI "hw") (V8QI "bh")])
+
+;; Given a vector type T, the mode of a vector the same size as T
+;; but with twice as many elements.
+(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
+
+;; The Loongson instruction suffixes corresponding to the conversions
+;; specified by V_half_width.
+(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
+
+;; Move patterns.
+
+;; Expander to legitimize moves involving values of vector modes.
+(define_expand "mov<mode>"
+  [(set (match_operand:VWHB 0)
+	(match_operand:VWHB 1))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+;; Handle legitimized moves between values of vector modes.
+(define_insn "mov<mode>_internal"
+  [(set (match_operand:VWHB 0 "nonimmediate_operand" "=m,f,d,f,  d,  m,  d")
+	(match_operand:VWHB 1 "move_operand"          "f,m,f,dYG,dYG,dYG,m"))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fpstore,fpload,mfc,mtc,move,store,load")
+   (set_attr "mode" "DI")])
+
+;; Initialization of a vector.
+
+(define_expand "vec_init<mode>"
+  [(set (match_operand:VWHB 0 "register_operand")
+	(match_operand 1 ""))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+  mips_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;; Instruction patterns for SIMD instructions.
+
+;; Pack with signed saturation.
+(define_insn "vec_pack_ssat_<mode>"
+  [(set (match_operand:<V_squash_double> 0 "register_operand" "=f")
+        (vec_concat:<V_squash_double>
+	 (ss_truncate:<V_squash>
+	  (match_operand:VWH 1 "register_operand" "f"))
+	 (ss_truncate:<V_squash>
+	  (match_operand:VWH 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "packss<V_squash_double_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Pack with unsigned saturation.
+(define_insn "vec_pack_usat_<mode>"
+  [(set (match_operand:<V_squash_double> 0 "register_operand" "=f")
+        (vec_concat:<V_squash_double>
+	 (us_truncate:<V_squash>
+	  (match_operand:VH 1 "register_operand" "f"))
+	 (us_truncate:<V_squash>
+	  (match_operand:VH 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "packus<V_squash_double_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Addition, treating overflow by wraparound.
+(define_insn "add<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (plus:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		   (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "padd<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Addition of doubleword integers stored in FP registers.
+;; Overflow is treated by wraparound.
+;; We use 'unspec' instead of 'plus' here to avoid clash with
+;; mips.md::add<mode>3.  If 'plus' was used, then such instruction
+;; would be recognized as adddi3 and reload would make it use
+;; GPRs instead of FPRs.
+(define_insn "loongson_paddd"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "f")
+		    (match_operand:DI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PADDD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "paddd\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Addition, treating overflow by signed saturation.
+(define_insn "ssadd<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f")
+		     (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "padds<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Addition, treating overflow by unsigned saturation.
+(define_insn "usadd<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (us_plus:VHB (match_operand:VHB 1 "register_operand" "f")
+		     (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "paddus<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Logical AND NOT.
+(define_insn "loongson_pandn_<V_suffix>"
+  [(set (match_operand:VWHBDI 0 "register_operand" "=f")
+        (and:VWHBDI
+	 (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f"))
+	 (match_operand:VWHBDI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pandn\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Average.
+(define_insn "loongson_pavg<V_suffix>"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (unspec:VHB [(match_operand:VHB 1 "register_operand" "f")
+		     (match_operand:VHB 2 "register_operand" "f")]
+		    UNSPEC_LOONGSON_PAVG))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pavg<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Equality test.
+(define_insn "loongson_pcmpeq<V_suffix>"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
+		      (match_operand:VWHB 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PCMPEQ))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pcmpeq<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Greater-than test.
+(define_insn "loongson_pcmpgt<V_suffix>"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
+		      (match_operand:VWHB 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PCMPGT))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pcmpgt<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Extract halfword.
+(define_insn "loongson_pextr<V_suffix>"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+ 		    (match_operand:SI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PEXTR))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pextr<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Insert halfword.
+(define_insn "loongson_pinsr<V_suffix>_0"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PINSR_0))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsr<V_suffix>_0\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_pinsr<V_suffix>_1"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PINSR_1))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsr<V_suffix>_1\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_pinsr<V_suffix>_2"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PINSR_2))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsr<V_suffix>_2\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+(define_insn "loongson_pinsr<V_suffix>_3"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PINSR_3))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pinsr<V_suffix>_3\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+;; Multiply and add packed integers.
+(define_insn "loongson_pmadd<V_stretch_half_suffix>"
+  [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
+        (unspec:<V_stretch_half> [(match_operand:VH 1 "register_operand" "f")
+				  (match_operand:VH 2 "register_operand" "f")]
+				 UNSPEC_LOONGSON_PMADD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmadd<V_stretch_half_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Maximum of signed halfwords.
+(define_insn "smax<mode>3"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (smax:VH (match_operand:VH 1 "register_operand" "f")
+		 (match_operand:VH 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmaxs<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Maximum of unsigned bytes.
+(define_insn "umax<mode>3"
+  [(set (match_operand:VB 0 "register_operand" "=f")
+        (umax:VB (match_operand:VB 1 "register_operand" "f")
+		 (match_operand:VB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmaxu<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Minimum of signed halfwords.
+(define_insn "smin<mode>3"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (smin:VH (match_operand:VH 1 "register_operand" "f")
+		 (match_operand:VH 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmins<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Minimum of unsigned bytes.
+(define_insn "umin<mode>3"
+  [(set (match_operand:VB 0 "register_operand" "=f")
+        (umin:VB (match_operand:VB 1 "register_operand" "f")
+		 (match_operand:VB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pminu<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Move byte mask.
+(define_insn "loongson_pmovmsk<V_suffix>"
+  [(set (match_operand:VB 0 "register_operand" "=f")
+        (unspec:VB [(match_operand:VB 1 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMOVMSK))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmovmsk<V_suffix>\t%0,%1"
+  [(set_attr "type" "fabs")])
+
+;; Multiply unsigned integers and store high result.
+(define_insn "umul<mode>3_highpart"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMULHU))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmulhu<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Multiply signed integers and store high result.
+(define_insn "smul<mode>3_highpart"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "f")
+		    (match_operand:VH 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMULH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmulh<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Multiply signed integers and store low result.
+(define_insn "mul<mode>3"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (mult:VH (match_operand:VH 1 "register_operand" "f")
+                 (match_operand:VH 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmull<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Multiply unsigned word integers.
+(define_insn "loongson_pmulu<V_suffix>"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+        (unspec:DI [(match_operand:VW 1 "register_operand" "f")
+		    (match_operand:VW 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PMULU))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pmulu<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Absolute difference.
+(define_insn "loongson_pasubub"
+  [(set (match_operand:VB 0 "register_operand" "=f")
+        (unspec:VB [(match_operand:VB 1 "register_operand" "f")
+		    (match_operand:VB 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PASUBUB))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pasubub\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Sum of unsigned byte integers.
+(define_insn "loongson_biadd"
+  [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
+        (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")]
+				 UNSPEC_LOONGSON_BIADD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "biadd\t%0,%1"
+  [(set_attr "type" "fabs")])
+
+;; Sum of absolute differences.
+(define_insn "loongson_psadbh"
+  [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
+        (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")
+				  (match_operand:VB 2 "register_operand" "f")]
+				 UNSPEC_LOONGSON_PSADBH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pasubub\t%0,%1,%2;biadd\t%0,%0"
+  [(set_attr "type" "fadd")])
+
+;; Shuffle halfwords.
+(define_insn "loongson_pshufh"
+  [(set (match_operand:VH 0 "register_operand" "=f")
+        (unspec:VH [(match_operand:VH 1 "register_operand" "0")
+		    (match_operand:VH 2 "register_operand" "f")
+		    (match_operand:SI 3 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PSHUFH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "pshufh\t%0,%2,%3"
+  [(set_attr "type" "fmul")])
+
+;; Shift left logical.
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VWH 0 "register_operand" "=f")
+        (ashift:VWH (match_operand:VWH 1 "register_operand" "f")
+		    (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psll<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fmul")])
+
+;; Shift right arithmetic.
+(define_insn "ashr<mode>3"
+  [(set (match_operand:VWH 0 "register_operand" "=f")
+        (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f")
+		      (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psra<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+;; Shift right logical.
+(define_insn "lshr<mode>3"
+  [(set (match_operand:VWH 0 "register_operand" "=f")
+        (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f")
+		      (match_operand:SI 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psrl<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+;; Subtraction, treating overflow by wraparound.
+(define_insn "sub<mode>3"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (minus:VWHB (match_operand:VWHB 1 "register_operand" "f")
+		    (match_operand:VWHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psub<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Subtraction of doubleword integers stored in FP registers.
+;; Overflow is treated by wraparound.
+;; See loongson_paddd for the reason we use 'unspec' rather than
+;; 'minus' here.
+(define_insn "loongson_psubd"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "f")
+		    (match_operand:DI 2 "register_operand" "f")]
+		   UNSPEC_LOONGSON_PSUBD))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psubd\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Subtraction, treating overflow by signed saturation.
+(define_insn "sssub<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f")
+		      (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psubs<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Subtraction, treating overflow by unsigned saturation.
+(define_insn "ussub<mode>3"
+  [(set (match_operand:VHB 0 "register_operand" "=f")
+        (us_minus:VHB (match_operand:VHB 1 "register_operand" "f")
+		      (match_operand:VHB 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "psubus<V_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fadd")])
+
+;; Unpack high data.
+(define_insn "vec_interleave_high<mode>"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
+		      (match_operand:VWHB 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PUNPCKH))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckh<V_stretch_half_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+;; Unpack low data.
+(define_insn "vec_interleave_low<mode>"
+  [(set (match_operand:VWHB 0 "register_operand" "=f")
+        (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f")
+		      (match_operand:VWHB 2 "register_operand" "f")]
+		     UNSPEC_LOONGSON_PUNPCKL))]
+  "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+  "punpckl<V_stretch_half_suffix>\t%0,%1,%2"
+  [(set_attr "type" "fdiv")])
+
+;; Integer division and modulus.  For integer multiplication, see mips.md.
+
+(define_insn "<u>div<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+	(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A"
+  {
+    if (TARGET_LOONGSON_2EF)
+      return mips_output_division ("<d>div<u>.g\t%0,%1,%2", operands);
+    else
+      return mips_output_division ("gs<d>div<u>\t%0,%1,%2", operands);
+  }
+  [(set_attr "type" "idiv3")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "<u>mod<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+	(any_mod:GPR (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A"
+  {
+    if (TARGET_LOONGSON_2EF)
+      return mips_output_division ("<d>mod<u>.g\t%0,%1,%2", operands);
+    else
+      return mips_output_division ("gs<d>mod<u>\t%0,%1,%2", operands);
+  }
+  [(set_attr "type" "idiv3")
+   (set_attr "mode" "<MODE>")])
diff --git a/gcc/config/mips/loongson2ef.md b/gcc/config/mips/loongson2ef.md
new file mode 100644
index 000000000..fa5ae7e9f
--- /dev/null
+++ b/gcc/config/mips/loongson2ef.md
@@ -0,0 +1,252 @@
+;; Pipeline model for ST Microelectronics Loongson-2E/2F cores.
+
+;; Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN
+  UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN
+  UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN
+  UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN
+])
+
+;; Automaton for integer instructions.
+(define_automaton "ls2_alu")
+
+;; ALU1 and ALU2.
+;; We need to query these units to adjust round-robin counter.
+(define_query_cpu_unit "ls2_alu1_core,ls2_alu2_core" "ls2_alu")
+
+;; Pseudo units to help modeling of ALU1/2 round-robin dispatch strategy.
+(define_cpu_unit "ls2_alu1_turn,ls2_alu2_turn" "ls2_alu")
+
+;; Pseudo units to enable/disable ls2_alu[12]_turn units.
+;; ls2_alu[12]_turn unit can be subscribed only after ls2_alu[12]_turn_enabled
+;; unit is subscribed.
+(define_cpu_unit "ls2_alu1_turn_enabled,ls2_alu2_turn_enabled" "ls2_alu")
+(presence_set "ls2_alu1_turn" "ls2_alu1_turn_enabled")
+(presence_set "ls2_alu2_turn" "ls2_alu2_turn_enabled")
+
+;; Reservations for ALU1 (ALU2) instructions.
+;; Instruction goes to ALU1 (ALU2) and makes next ALU1/2 instruction to
+;; be dispatched to ALU2 (ALU1).
+(define_reservation "ls2_alu1"
+  "(ls2_alu1_core+ls2_alu2_turn_enabled)|ls2_alu1_core")
+(define_reservation "ls2_alu2"
+  "(ls2_alu2_core+ls2_alu1_turn_enabled)|ls2_alu2_core")
+
+;; Reservation for ALU1/2 instructions.
+;; Instruction will go to ALU1 iff ls2_alu1_turn_enabled is subscribed and
+;; switch the turn to ALU2 by subscribing ls2_alu2_turn_enabled.
+;; Or to ALU2 otherwise.
+(define_reservation "ls2_alu"
+  "(ls2_alu1_core+ls2_alu1_turn+ls2_alu2_turn_enabled)
+   |(ls2_alu1_core+ls2_alu1_turn)
+   |(ls2_alu2_core+ls2_alu2_turn+ls2_alu1_turn_enabled)
+   |(ls2_alu2_core+ls2_alu2_turn)")
+
+;; Automaton for floating-point instructions.
+(define_automaton "ls2_falu")
+
+;; FALU1 and FALU2.
+;; We need to query these units to adjust round-robin counter.
+(define_query_cpu_unit "ls2_falu1_core,ls2_falu2_core" "ls2_falu")
+
+;; Pseudo units to help modeling of FALU1/2 round-robin dispatch strategy.
+(define_cpu_unit "ls2_falu1_turn,ls2_falu2_turn" "ls2_falu")
+
+;; Pseudo units to enable/disable ls2_falu[12]_turn units.
+;; ls2_falu[12]_turn unit can be subscribed only after
+;; ls2_falu[12]_turn_enabled unit is subscribed.
+(define_cpu_unit "ls2_falu1_turn_enabled,ls2_falu2_turn_enabled" "ls2_falu")
+(presence_set "ls2_falu1_turn" "ls2_falu1_turn_enabled")
+(presence_set "ls2_falu2_turn" "ls2_falu2_turn_enabled")
+
+;; Reservations for FALU1 (FALU2) instructions.
+;; Instruction goes to FALU1 (FALU2) and makes next FALU1/2 instruction to
+;; be dispatched to FALU2 (FALU1).
+(define_reservation "ls2_falu1"
+  "(ls2_falu1_core+ls2_falu2_turn_enabled)|ls2_falu1_core")
+(define_reservation "ls2_falu2"
+  "(ls2_falu2_core+ls2_falu1_turn_enabled)|ls2_falu2_core")
+
+;; Reservation for FALU1/2 instructions.
+;; Instruction will go to FALU1 iff ls2_falu1_turn_enabled is subscribed and
+;; switch the turn to FALU2 by subscribing ls2_falu2_turn_enabled.
+;; Or to FALU2 otherwise.
+(define_reservation "ls2_falu"
+  "(ls2_falu1+ls2_falu1_turn+ls2_falu2_turn_enabled)
+   |(ls2_falu1+ls2_falu1_turn)
+   |(ls2_falu2+ls2_falu2_turn+ls2_falu1_turn_enabled)
+   |(ls2_falu2+ls2_falu2_turn)")
+
+;; The following 4 instructions each subscribe one of
+;; ls2_[f]alu{1,2}_turn_enabled units according to this attribute.
+;; These instructions are used in mips.c: sched_ls2_dfa_post_advance_cycle.
+
+(define_attr "ls2_turn_type" "alu1,alu2,falu1,falu2,unknown"
+  (const_string "unknown"))
+
+;; Subscribe ls2_alu1_turn_enabled.
+(define_insn "ls2_alu1_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU1_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "alu1")])
+
+(define_insn_reservation "ls2_alu1_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "alu1")
+  "ls2_alu1_turn_enabled")
+
+;; Subscribe ls2_alu2_turn_enabled.
+(define_insn "ls2_alu2_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_ALU2_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "alu2")])
+
+(define_insn_reservation "ls2_alu2_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "alu2")
+  "ls2_alu2_turn_enabled")
+
+;; Subscribe ls2_falu1_turn_enabled.
+(define_insn "ls2_falu1_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU1_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "falu1")])
+
+(define_insn_reservation "ls2_falu1_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "falu1")
+  "ls2_falu1_turn_enabled")
+
+;; Subscribe ls2_falu2_turn_enabled.
+(define_insn "ls2_falu2_turn_enabled_insn"
+  [(unspec [(const_int 0)] UNSPEC_LOONGSON_FALU2_TURN_ENABLED_INSN)]
+  "TUNE_LOONGSON_2EF"
+  { gcc_unreachable (); }
+  [(set_attr "ls2_turn_type" "falu2")])
+
+(define_insn_reservation "ls2_falu2_turn_enabled" 0
+  (eq_attr "ls2_turn_type" "falu2")
+  "ls2_falu2_turn_enabled")
+
+;; Automaton for memory operations.
+(define_automaton "ls2_mem")
+
+;; Memory unit.
+(define_query_cpu_unit "ls2_mem" "ls2_mem")
+
+;; Reservation for integer instructions.
+(define_insn_reservation "ls2_alu" 2
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "arith,condmove,const,logical,mfhilo,move,
+                        mthilo,nop,shift,signext,slt"))
+  "ls2_alu")
+
+;; Reservation for branch instructions.
+(define_insn_reservation "ls2_branch" 2
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "branch,jump,call,trap"))
+  "ls2_alu1")
+
+;; Reservation for integer multiplication instructions.
+(define_insn_reservation "ls2_imult" 5
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "imul,imul3nc"))
+  "ls2_alu2,ls2_alu2_core")
+
+;; Reservation for integer division / remainder instructions.
+;; These instructions use the SRT algorithm and hence take 2-38 cycles.
+(define_insn_reservation "ls2_idiv" 20
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "idiv,idiv3"))
+  "ls2_alu2,ls2_alu2_core*18")
+
+;; Reservation for memory load instructions.
+(define_insn_reservation "ls2_load" 5
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "load,fpload,mfc,mtc"))
+  "ls2_mem")
+
+(define_insn_reservation "ls2_prefetch" 0
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "ls2_mem")
+
+;; Reservation for memory store instructions.
+;; With stores we assume they don't alias with dependent loads.
+;; Therefore we set the latency to zero.
+(define_insn_reservation "ls2_store" 0
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "store,fpstore"))
+  "ls2_mem")
+
+;; Reservation for floating-point instructions of latency 3.
+(define_insn_reservation "ls2_fp3" 3
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fabs,fneg,fcmp,fmove"))
+  "ls2_falu1")
+
+;; Reservation for floating-point instructions of latency 5.
+(define_insn_reservation "ls2_fp5" 5
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fcvt"))
+  "ls2_falu1")
+
+;; Reservation for floating-point instructions that can go
+;; to either of FALU1/2 units.
+(define_insn_reservation "ls2_falu" 7
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fadd,fmul,fmadd"))
+  "ls2_falu")
+
+;; Reservation for floating-point division / remainder instructions.
+;; These instructions use the SRT algorithm and hence take a variable amount
+;; of cycles:
+;; div.s takes 5-11 cycles
+;; div.d takes 5-18 cycles
+(define_insn_reservation "ls2_fdiv" 9
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fdiv"))
+  "ls2_falu2,ls2_falu2_core*7")
+
+;; Reservation for floating-point sqrt instructions.
+;; These instructions use the SRT algorithm and hence take a variable amount
+;; of cycles:
+;; sqrt.s takes 5-17 cycles
+;; sqrt.d takes 5-32 cycles
+(define_insn_reservation "ls2_fsqrt" 15
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "fsqrt"))
+  "ls2_falu2,ls2_falu2_core*13")
+
+;; Two consecutive ALU instructions.
+(define_insn_reservation "ls2_multi" 4
+  (and (eq_attr "cpu" "loongson_2e,loongson_2f")
+       (eq_attr "type" "multi"))
+  "(ls2_alu1,ls2_alu2_core)|(ls2_alu2,ls2_alu1_core)")
+
+;; Reservation for everything else.  Normally, this reservation
+;; will only be used to handle cases like compiling for non-loongson
+;; CPUs with -mtune=loongson2?.
+;;
+;; This reservation depends upon the fact that DFA will check
+;; reservations in the same order as they appear in the file.
+(define_insn_reservation "ls2_unknown" 1
+  (eq_attr "cpu" "loongson_2e,loongson_2f")
+  "ls2_alu1_core+ls2_alu2_core+ls2_falu1_core+ls2_falu2_core+ls2_mem")
diff --git a/gcc/config/mips/loongson3a.md b/gcc/config/mips/loongson3a.md
new file mode 100644
index 000000000..c584f42f0
--- /dev/null
+++ b/gcc/config/mips/loongson3a.md
@@ -0,0 +1,137 @@
+;; Pipeline model for Loongson-3A cores.
+
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Uncomment the following line to output automata for debugging.
+;; (automata_option "v")
+
+;; Automaton for integer instructions.
+(define_automaton "ls3a_a_alu")
+
+;; Automaton for floating-point instructions.
+(define_automaton "ls3a_a_falu")
+
+;; Automaton for memory operations.
+(define_automaton "ls3a_a_mem")
+
+;; Describe the resources.
+
+(define_cpu_unit "ls3a_alu1" "ls3a_a_alu")
+(define_cpu_unit "ls3a_alu2" "ls3a_a_alu")
+(define_cpu_unit "ls3a_mem" "ls3a_a_mem")
+(define_cpu_unit "ls3a_falu1" "ls3a_a_falu")
+(define_cpu_unit "ls3a_falu2" "ls3a_a_falu")
+
+;; Describe instruction reservations.
+
+(define_insn_reservation "ls3a_arith" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "arith,clz,const,logical,
+                        move,nop,shift,signext,slt"))
+  "ls3a_alu1 | ls3a_alu2")
+
+(define_insn_reservation "ls3a_branch" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "branch,jump,call,condmove,trap"))
+  "ls3a_alu1")
+
+(define_insn_reservation "ls3a_mfhilo" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "mfhilo,mthilo"))
+  "ls3a_alu2")
+
+;; Operation imul3nc is fully pipelined.
+(define_insn_reservation "ls3a_imul3nc" 5
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "imul3nc"))
+  "ls3a_alu2")
+ 
+(define_insn_reservation "ls3a_imul" 7
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "imul,imadd"))
+  "ls3a_alu2 * 7")
+ 
+(define_insn_reservation "ls3a_idiv_si" 12
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "ls3a_alu2 * 12")
+
+(define_insn_reservation "ls3a_idiv_di" 25
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "ls3a_alu2 * 25")
+
+(define_insn_reservation "ls3a_load" 3
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "load"))
+  "ls3a_mem")
+ 
+(define_insn_reservation "ls3a_fpload" 4
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "load,mfc,mtc"))
+  "ls3a_mem")
+
+(define_insn_reservation "ls3a_prefetch" 0
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "ls3a_mem")
+ 
+(define_insn_reservation "ls3a_store" 0
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "ls3a_mem")
+
+;; All the fp operations can be executed in FALU1.  Only fp add,
+;; sub, mul, madd can be executed in FALU2.  Try FALU2 firstly.
+(define_insn_reservation "ls3a_fadd" 6
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "fadd,fmul,fmadd"))
+  "ls3a_falu2 | ls3a_falu1")
+
+(define_insn_reservation "ls3a_fcmp" 2
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "fabs,fcmp,fmove,fneg"))
+  "ls3a_falu1")
+
+(define_insn_reservation "ls3a_fcvt" 4
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "fcvt"))
+  "ls3a_falu1")
+
+(define_insn_reservation "ls3a_fdiv_sf" 12
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+            (eq_attr "mode" "SF")))
+  "ls3a_falu1 * 12")
+ 
+(define_insn_reservation "ls3a_fdiv_df" 19
+  (and (eq_attr "cpu" "loongson_3a")
+       (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt")
+            (eq_attr "mode" "DF")))
+  "ls3a_falu1 * 19")
+
+;; Force single-dispatch for unknown or multi.
+(define_insn_reservation "ls3a_unknown" 1
+  (and (eq_attr "cpu" "loongson_3a")
+       (eq_attr "type" "unknown,multi"))
+  "ls3a_alu1 + ls3a_alu2 + ls3a_falu1 + ls3a_falu2 + ls3a_mem")
+
+;; End of DFA-based pipeline description for loongson_3a
diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md
new file mode 100644
index 000000000..4f518cbc6
--- /dev/null
+++ b/gcc/config/mips/mips-dsp.md
@@ -0,0 +1,1198 @@
+;; Copyright (C) 2005, 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; MIPS DSP ASE Revision 0.98 3/24/2005
+(define_c_enum "unspec" [
+  UNSPEC_ADDQ
+  UNSPEC_ADDQ_S
+  UNSPEC_SUBQ
+  UNSPEC_SUBQ_S
+  UNSPEC_ADDSC
+  UNSPEC_ADDWC
+  UNSPEC_MODSUB
+  UNSPEC_RADDU_W_QB
+  UNSPEC_ABSQ_S
+  UNSPEC_PRECRQ_QB_PH
+  UNSPEC_PRECRQ_PH_W
+  UNSPEC_PRECRQ_RS_PH_W
+  UNSPEC_PRECRQU_S_QB_PH
+  UNSPEC_PRECEQ_W_PHL
+  UNSPEC_PRECEQ_W_PHR
+  UNSPEC_PRECEQU_PH_QBL
+  UNSPEC_PRECEQU_PH_QBR
+  UNSPEC_PRECEQU_PH_QBLA
+  UNSPEC_PRECEQU_PH_QBRA
+  UNSPEC_PRECEU_PH_QBL
+  UNSPEC_PRECEU_PH_QBR
+  UNSPEC_PRECEU_PH_QBLA
+  UNSPEC_PRECEU_PH_QBRA
+  UNSPEC_SHLL
+  UNSPEC_SHLL_S
+  UNSPEC_SHRL_QB
+  UNSPEC_SHRA_PH
+  UNSPEC_SHRA_R
+  UNSPEC_MULEU_S_PH_QBL
+  UNSPEC_MULEU_S_PH_QBR
+  UNSPEC_MULQ_RS_PH
+  UNSPEC_MULEQ_S_W_PHL
+  UNSPEC_MULEQ_S_W_PHR
+  UNSPEC_DPAU_H_QBL
+  UNSPEC_DPAU_H_QBR
+  UNSPEC_DPSU_H_QBL
+  UNSPEC_DPSU_H_QBR
+  UNSPEC_DPAQ_S_W_PH
+  UNSPEC_DPSQ_S_W_PH
+  UNSPEC_MULSAQ_S_W_PH
+  UNSPEC_DPAQ_SA_L_W
+  UNSPEC_DPSQ_SA_L_W
+  UNSPEC_MAQ_S_W_PHL
+  UNSPEC_MAQ_S_W_PHR
+  UNSPEC_MAQ_SA_W_PHL
+  UNSPEC_MAQ_SA_W_PHR
+  UNSPEC_BITREV
+  UNSPEC_INSV
+  UNSPEC_REPL_QB
+  UNSPEC_REPL_PH
+  UNSPEC_CMP_EQ
+  UNSPEC_CMP_LT
+  UNSPEC_CMP_LE
+  UNSPEC_CMPGU_EQ_QB
+  UNSPEC_CMPGU_LT_QB
+  UNSPEC_CMPGU_LE_QB
+  UNSPEC_PICK
+  UNSPEC_PACKRL_PH
+  UNSPEC_EXTR_W
+  UNSPEC_EXTR_R_W
+  UNSPEC_EXTR_RS_W
+  UNSPEC_EXTR_S_H
+  UNSPEC_EXTP
+  UNSPEC_EXTPDP
+  UNSPEC_SHILO
+  UNSPEC_MTHLIP
+  UNSPEC_WRDSP
+  UNSPEC_RDDSP
+])
+
+(define_constants
+  [(CCDSP_PO_REGNUM	182)
+   (CCDSP_SC_REGNUM	183)
+   (CCDSP_CA_REGNUM	184)
+   (CCDSP_OU_REGNUM	185)
+   (CCDSP_CC_REGNUM	186)
+   (CCDSP_EF_REGNUM	187)])
+
+;; This mode iterator allows si, v2hi, v4qi for all possible modes in DSP ASE.
+(define_mode_iterator DSP [(SI "ISA_HAS_DSP")
+			   (V2HI "ISA_HAS_DSP")
+		 	   (V4QI "ISA_HAS_DSP")])
+
+;; This mode iterator allows v2hi, v4qi for vector/SIMD data.
+(define_mode_iterator DSPV [(V2HI "ISA_HAS_DSP")
+			    (V4QI "ISA_HAS_DSP")])
+
+;; This mode iterator allows si, v2hi for Q31 and V2Q15 fixed-point data.
+(define_mode_iterator DSPQ [(SI "ISA_HAS_DSP")
+			    (V2HI "ISA_HAS_DSP")])
+
+;; DSP instructions use q for fixed-point data, and u for integer in the infix.
+(define_mode_attr dspfmt1 [(SI "q") (V2HI "q") (V4QI "u")])
+
+;; DSP instructions use nothing for fixed-point data, and u for integer in
+;; the infix.
+(define_mode_attr dspfmt1_1 [(SI "") (V2HI "") (V4QI "u")])
+
+;; DSP instructions use w, ph, qb in the postfix.
+(define_mode_attr dspfmt2 [(SI "w") (V2HI "ph") (V4QI "qb")])
+
+;; DSP shift masks for SI, V2HI, V4QI.
+(define_mode_attr dspshift_mask [(SI "0x1f") (V2HI "0xf") (V4QI "0x7")])
+
+;; MIPS DSP ASE Revision 0.98 3/24/2005
+;; Table 2-1. MIPS DSP ASE Instructions: Arithmetic
+;; ADDQ*
+(define_insn "add<DSPV:mode>3"
+  [(parallel
+    [(set (match_operand:DSPV 0 "register_operand" "=d")
+	  (plus:DSPV (match_operand:DSPV 1 "register_operand" "d")
+		     (match_operand:DSPV 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ))])]
+  "ISA_HAS_DSP"
+  "add<DSPV:dspfmt1>.<DSPV:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_add<DSP:dspfmt1>_s_<DSP:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSP 0 "register_operand" "=d")
+	  (unspec:DSP [(match_operand:DSP 1 "register_operand" "d")
+		       (match_operand:DSP 2 "register_operand" "d")]
+		      UNSPEC_ADDQ_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
+  "ISA_HAS_DSP"
+  "add<DSP:dspfmt1>_s.<DSP:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; SUBQ*
+(define_insn "sub<DSPV:mode>3"
+  [(parallel
+    [(set (match_operand:DSPV 0 "register_operand" "=d")
+	  (minus:DSPV (match_operand:DSPV 1 "register_operand" "d")
+		      (match_operand:DSPV 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ))])]
+  "ISA_HAS_DSP"
+  "sub<DSPV:dspfmt1>.<DSPV:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_sub<DSP:dspfmt1>_s_<DSP:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSP 0 "register_operand" "=d")
+	  (unspec:DSP [(match_operand:DSP 1 "register_operand" "d")
+		       (match_operand:DSP 2 "register_operand" "d")]
+		      UNSPEC_SUBQ_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
+  "ISA_HAS_DSP"
+  "sub<DSP:dspfmt1>_s.<DSP:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; ADDSC
+(define_insn "mips_addsc"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d")]
+		     UNSPEC_ADDSC))
+     (set (reg:CCDSP CCDSP_CA_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDSC))])]
+  "ISA_HAS_DSP"
+  "addsc\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; ADDWC
+(define_insn "mips_addwc"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d")
+		    (reg:CCDSP CCDSP_CA_REGNUM)]
+		     UNSPEC_ADDWC))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDWC))])]
+  "ISA_HAS_DSP"
+  "addwc\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; MODSUB
+(define_insn "mips_modsub"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")
+		    (match_operand:SI 2 "register_operand" "d")]
+		   UNSPEC_MODSUB))]
+  "ISA_HAS_DSP"
+  "modsub\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; RADDU*
+(define_insn "mips_raddu_w_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")]
+		   UNSPEC_RADDU_W_QB))]
+  "ISA_HAS_DSP"
+  "raddu.w.qb\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; ABSQ*
+(define_insn "mips_absq_s_<DSPQ:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSPQ 0 "register_operand" "=d")
+	  (unspec:DSPQ [(match_operand:DSPQ 1 "register_operand" "d")]
+		       UNSPEC_ABSQ_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1)] UNSPEC_ABSQ_S))])]
+  "ISA_HAS_DSP"
+  "absq_s.<DSPQ:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PRECRQ*
+(define_insn "mips_precrq_qb_ph"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_PRECRQ_QB_PH))]
+  "ISA_HAS_DSP"
+  "precrq.qb.ph\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precrq_ph_w"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d")]
+		     UNSPEC_PRECRQ_PH_W))]
+  "ISA_HAS_DSP"
+  "precrq.ph.w\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precrq_rs_ph_w"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:SI 1 "register_operand" "d")
+			(match_operand:SI 2 "register_operand" "d")]
+		       UNSPEC_PRECRQ_RS_PH_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)]
+			UNSPEC_PRECRQ_RS_PH_W))])]
+  "ISA_HAS_DSP"
+  "precrq_rs.ph.w\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PRECRQU*
+(define_insn "mips_precrqu_s_qb_ph"
+  [(parallel
+    [(set (match_operand:V4QI 0 "register_operand" "=d")
+	  (unspec:V4QI [(match_operand:V2HI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_PRECRQU_S_QB_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)]
+			UNSPEC_PRECRQU_S_QB_PH))])]
+  "ISA_HAS_DSP"
+  "precrqu_s.qb.ph\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PRECEQ*
+(define_insn "mips_preceq_w_phl"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "d")]
+		   UNSPEC_PRECEQ_W_PHL))]
+  "ISA_HAS_DSP"
+  "preceq.w.phl\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceq_w_phr"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "d")]
+		   UNSPEC_PRECEQ_W_PHR))]
+  "ISA_HAS_DSP"
+  "preceq.w.phr\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PRECEQU*
+(define_insn "mips_precequ_ph_qbl"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBL))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbl\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precequ_ph_qbr"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBR))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbr\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precequ_ph_qbla"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBLA))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbla\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precequ_ph_qbra"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEQU_PH_QBRA))]
+  "ISA_HAS_DSP"
+  "precequ.ph.qbra\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PRECEU*
+(define_insn "mips_preceu_ph_qbl"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBL))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbl\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceu_ph_qbr"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBR))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbr\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceu_ph_qbla"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBLA))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbla\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_preceu_ph_qbra"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")]
+		     UNSPEC_PRECEU_PH_QBRA))]
+  "ISA_HAS_DSP"
+  "preceu.ph.qbra\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-2. MIPS DSP ASE Instructions: Shift
+;; SHLL*
+(define_insn "mips_shll_<DSPV:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSPV 0 "register_operand" "=d,d")
+	  (unspec:DSPV [(match_operand:DSPV 1 "register_operand" "d,d")
+			(match_operand:SI 2 "arith_operand" "I,d")]
+		       UNSPEC_SHLL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SHLL))])]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2])
+	  & ~(unsigned HOST_WIDE_INT) <DSPV:dspshift_mask>)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & <DSPV:dspshift_mask>);
+      return "shll.<DSPV:dspfmt2>\t%0,%1,%2";
+    }
+  return "shllv.<DSPV:dspfmt2>\t%0,%1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shll_s_<DSPQ:dspfmt2>"
+  [(parallel
+    [(set (match_operand:DSPQ 0 "register_operand" "=d,d")
+	  (unspec:DSPQ [(match_operand:DSPQ 1 "register_operand" "d,d")
+			(match_operand:SI 2 "arith_operand" "I,d")]
+		       UNSPEC_SHLL_S))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SHLL_S))])]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2])
+          & ~(unsigned HOST_WIDE_INT) <DSPQ:dspshift_mask>)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & <DSPQ:dspshift_mask>);
+      return "shll_s.<DSPQ:dspfmt2>\t%0,%1,%2";
+    }
+  return "shllv_s.<DSPQ:dspfmt2>\t%0,%1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+;; SHRL*
+(define_insn "mips_shrl_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:V4QI 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRL_QB))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x7)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x7);
+      return "shrl.qb\t%0,%1,%2";
+    }
+  return "shrlv.qb\t%0,%1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+;; SHRA*
+(define_insn "mips_shra_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_PH))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xf)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0xf);
+      return "shra.ph\t%0,%1,%2";
+    }
+  return "shrav.ph\t%0,%1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shra_r_<DSPQ:dspfmt2>"
+  [(set (match_operand:DSPQ 0 "register_operand" "=d,d")
+	(unspec:DSPQ [(match_operand:DSPQ 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_R))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2])
+	  & ~(unsigned HOST_WIDE_INT) <DSPQ:dspshift_mask>)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & <DSPQ:dspshift_mask>);
+      return "shra_r.<DSPQ:dspfmt2>\t%0,%1,%2";
+    }
+  return "shrav_r.<DSPQ:dspfmt2>\t%0,%1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-3. MIPS DSP ASE Instructions: Multiply
+;; MULEU*
+(define_insn "mips_muleu_s_ph_qbl"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_MULEU_S_PH_QBL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEU_S_PH_QBL))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleu_s.ph.qbl\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_muleu_s_ph_qbr"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V4QI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_MULEU_S_PH_QBR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEU_S_PH_QBR))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleu_s.ph.qbr\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+;; MULQ*
+(define_insn "mips_mulq_rs_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d")
+			(match_operand:V2HI 2 "register_operand" "d")]
+		       UNSPEC_MULQ_RS_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_RS_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "mulq_rs.ph\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+;; MULEQ*
+(define_insn "mips_muleq_s_w_phl"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_MULEQ_S_W_PHL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEQ_S_W_PHL))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleq_s.w.phl\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_muleq_s_w_phr"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_MULEQ_S_W_PHR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULEQ_S_W_PHR))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSP"
+  "muleq_s.w.phr\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+;; DPAU*
+(define_insn "mips_dpau_h_qbl"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPAU_H_QBL))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpau.h.qbl\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpau_h_qbr"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPAU_H_QBR))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpau.h.qbr\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; DPSU*
+(define_insn "mips_dpsu_h_qbl"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPSU_H_QBL))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsu.h.qbl\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsu_h_qbr"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V4QI 2 "register_operand" "d")
+		    (match_operand:V4QI 3 "register_operand" "d")]
+		   UNSPEC_DPSU_H_QBR))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsu.h.qbr\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; DPAQ*
+(define_insn "mips_dpaq_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_DPAQ_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQ_S_W_PH))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpaq_s.w.ph\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; DPSQ*
+(define_insn "mips_dpsq_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_DPSQ_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQ_S_W_PH))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsq_s.w.ph\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; MULSAQ*
+(define_insn "mips_mulsaq_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MULSAQ_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MULSAQ_S_W_PH))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "mulsaq_s.w.ph\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; DPAQ*
+(define_insn "mips_dpaq_sa_l_w"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:SI 2 "register_operand" "d")
+		      (match_operand:SI 3 "register_operand" "d")]
+		     UNSPEC_DPAQ_SA_L_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpaq_sa.l.w\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; DPSQ*
+(define_insn "mips_dpsq_sa_l_w"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:SI 2 "register_operand" "d")
+		      (match_operand:SI 3 "register_operand" "d")]
+		     UNSPEC_DPSQ_SA_L_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsq_sa.l.w\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; MAQ*
+(define_insn "mips_maq_s_w_phl"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_S_W_PHL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_S_W_PHL))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_s.w.phl\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_maq_s_w_phr"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_S_W_PHR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_S_W_PHR))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_s.w.phr\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; MAQ_SA*
+(define_insn "mips_maq_sa_w_phl"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_SA_W_PHL))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_SA_W_PHL))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_sa.w.phl\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_maq_sa_w_phr"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "register_operand" "d")
+		      (match_operand:V2HI 3 "register_operand" "d")]
+		     UNSPEC_MAQ_SA_W_PHR))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_MAQ_SA_W_PHR))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "maq_sa.w.phr\t%q0,%2,%3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-4. MIPS DSP ASE Instructions: General Bit/Manipulation
+;; BITREV
+(define_insn "mips_bitrev"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")]
+		   UNSPEC_BITREV))]
+  "ISA_HAS_DSP"
+  "bitrev\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; INSV
+(define_insn "mips_insv"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "register_operand" "d")
+		    (reg:CCDSP CCDSP_SC_REGNUM)
+		    (reg:CCDSP CCDSP_PO_REGNUM)]
+		   UNSPEC_INSV))]
+  "ISA_HAS_DSP"
+  "insv\t%0,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; REPL*
+(define_insn "mips_repl_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:SI 1 "arith_operand" "I,d")]
+		     UNSPEC_REPL_QB))]
+  "ISA_HAS_DSP"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[1]) & ~(unsigned HOST_WIDE_INT) 0xff)
+	operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+      return "repl.qb\t%0,%1";
+    }
+  return "replv.qb\t%0,%1";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_repl_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(unspec:V2HI [(match_operand:SI 1 "reg_imm10_operand" "YB,d")]
+		     UNSPEC_REPL_PH))]
+  "ISA_HAS_DSP"
+  "@
+   repl.ph\t%0,%1
+   replv.ph\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-5. MIPS DSP ASE Instructions: Compare-Pick
+;; CMPU.* CMP.*
+(define_insn "mips_cmp<DSPV:dspfmt1_1>_eq_<DSPV:dspfmt2>"
+  [(set (reg:CCDSP CCDSP_CC_REGNUM)
+	(unspec:CCDSP [(match_operand:DSPV 0 "register_operand" "d")
+		       (match_operand:DSPV 1 "register_operand" "d")
+		       (reg:CCDSP CCDSP_CC_REGNUM)]
+		      UNSPEC_CMP_EQ))]
+  "ISA_HAS_DSP"
+  "cmp<DSPV:dspfmt1_1>.eq.<DSPV:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmp<DSPV:dspfmt1_1>_lt_<DSPV:dspfmt2>"
+  [(set (reg:CCDSP CCDSP_CC_REGNUM)
+	(unspec:CCDSP [(match_operand:DSPV 0 "register_operand" "d")
+		       (match_operand:DSPV 1 "register_operand" "d")
+		       (reg:CCDSP CCDSP_CC_REGNUM)]
+		      UNSPEC_CMP_LT))]
+  "ISA_HAS_DSP"
+  "cmp<DSPV:dspfmt1_1>.lt.<DSPV:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmp<DSPV:dspfmt1_1>_le_<DSPV:dspfmt2>"
+  [(set (reg:CCDSP CCDSP_CC_REGNUM)
+	(unspec:CCDSP [(match_operand:DSPV 0 "register_operand" "d")
+		       (match_operand:DSPV 1 "register_operand" "d")
+		       (reg:CCDSP CCDSP_CC_REGNUM)]
+		      UNSPEC_CMP_LE))]
+  "ISA_HAS_DSP"
+  "cmp<DSPV:dspfmt1_1>.le.<DSPV:dspfmt2>\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgu_eq_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")
+		    (match_operand:V4QI 2 "register_operand" "d")]
+		   UNSPEC_CMPGU_EQ_QB))]
+  "ISA_HAS_DSP"
+  "cmpgu.eq.qb\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgu_lt_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")
+		    (match_operand:V4QI 2 "register_operand" "d")]
+		   UNSPEC_CMPGU_LT_QB))]
+  "ISA_HAS_DSP"
+  "cmpgu.lt.qb\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgu_le_qb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:V4QI 1 "register_operand" "d")
+		    (match_operand:V4QI 2 "register_operand" "d")]
+		   UNSPEC_CMPGU_LE_QB))]
+  "ISA_HAS_DSP"
+  "cmpgu.le.qb\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PICK*
+(define_insn "mips_pick_<DSPV:dspfmt2>"
+  [(set (match_operand:DSPV 0 "register_operand" "=d")
+	(unspec:DSPV [(match_operand:DSPV 1 "register_operand" "d")
+		      (match_operand:DSPV 2 "register_operand" "d")
+		      (reg:CCDSP CCDSP_CC_REGNUM)]
+		     UNSPEC_PICK))]
+  "ISA_HAS_DSP"
+  "pick.<DSPV:dspfmt2>\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; PACKRL*
+(define_insn "mips_packrl_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d")
+		      (match_operand:V2HI 2 "register_operand" "d")]
+		     UNSPEC_PACKRL_PH))]
+  "ISA_HAS_DSP"
+  "packrl.ph\t%0,%1,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-6. MIPS DSP ASE Instructions: Accumulator and DSPControl Access
+;; EXTR*
+(define_insn "mips_extr_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr.w\t%0,%q1,%2";
+    }
+  return "extrv.w\t%0,%q1,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_extr_r_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_R_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_R_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr_r.w\t%0,%q1,%2";
+    }
+  return "extrv_r.w\t%0,%q1,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_extr_rs_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_RS_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_RS_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr_rs.w\t%0,%q1,%2";
+    }
+  return "extrv_rs.w\t%0,%q1,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+;; EXTR*_S.H
+(define_insn "mips_extr_s_h"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_EXTR_S_H))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTR_S_H))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extr_s.h\t%0,%q1,%2";
+    }
+  return "extrv_s.h\t%0,%q1,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+;; EXTP*
+(define_insn "mips_extp"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")
+		      (reg:CCDSP CCDSP_PO_REGNUM)]
+		     UNSPEC_EXTP))
+     (set (reg:CCDSP CCDSP_EF_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTP))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extp\t%0,%q1,%2";
+    }
+  return "extpv\t%0,%q1,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_extpdp"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d,d")
+	  (unspec:SI [(match_operand:DI 1 "register_operand" "a,a")
+		      (match_operand:SI 2 "arith_operand" "I,d")
+		      (reg:CCDSP CCDSP_PO_REGNUM)]
+		     UNSPEC_EXTPDP))
+     (set (reg:CCDSP CCDSP_PO_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_PO_REGNUM)] UNSPEC_EXTPDP))
+     (set (reg:CCDSP CCDSP_EF_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_EXTPDP))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0x1f)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+      return "extpdp\t%0,%q1,%2";
+    }
+  return "extpdpv\t%0,%q1,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+;; SHILO*
+(define_insn "mips_shilo"
+  [(set (match_operand:DI 0 "register_operand" "=a,a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0,0")
+		    (match_operand:SI 2 "arith_operand" "I,d")]
+		   UNSPEC_SHILO))]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) < -32 || INTVAL (operands[2]) > 31)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+      return "shilo\t%q0,%2";
+    }
+  return "shilov\t%q0,%2";
+}
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+;; MTHLIP*
+(define_insn "mips_mthlip"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:SI 2 "register_operand" "d")
+		      (reg:CCDSP CCDSP_PO_REGNUM)]
+		     UNSPEC_MTHLIP))
+     (set (reg:CCDSP CCDSP_PO_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_PO_REGNUM)] UNSPEC_MTHLIP))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "mthlip\t%2,%q0"
+  [(set_attr "type"	"mfhilo")
+   (set_attr "mode"	"SI")])
+
+;; WRDSP
+(define_insn "mips_wrdsp"
+  [(parallel
+    [(set (reg:CCDSP CCDSP_PO_REGNUM)
+	  (unspec:CCDSP [(match_operand:SI 0 "register_operand" "d")
+			 (match_operand:SI 1 "const_uimm6_operand" "YA")]
+			 UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_SC_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_CA_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))
+     (set (reg:CCDSP CCDSP_EF_REGNUM)
+	  (unspec:CCDSP [(match_dup 0) (match_dup 1)] UNSPEC_WRDSP))])]
+  "ISA_HAS_DSP"
+  "wrdsp\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; RDDSP
+(define_insn "mips_rddsp"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "const_uimm6_operand" "YA")
+		    (reg:CCDSP CCDSP_PO_REGNUM)
+		    (reg:CCDSP CCDSP_SC_REGNUM)
+		    (reg:CCDSP CCDSP_CA_REGNUM)
+		    (reg:CCDSP CCDSP_OU_REGNUM)
+		    (reg:CCDSP CCDSP_CC_REGNUM)
+		    (reg:CCDSP CCDSP_EF_REGNUM)]
+		   UNSPEC_RDDSP))]
+  "ISA_HAS_DSP"
+  "rddsp\t%0,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-7. MIPS DSP ASE Instructions: Indexed-Load
+;; L*X
+(define_expand "mips_lbux"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "ISA_HAS_DSP"
+{
+  operands[2] = convert_to_mode (Pmode, operands[2], false);
+  if (Pmode == SImode)
+    emit_insn (gen_mips_lbux_si (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_mips_lbux_di (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mips_lbux_<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+   	(zero_extend:SI
+	  (mem:QI (plus:P (match_operand:P 1 "register_operand" "d")
+			  (match_operand:P 2 "register_operand" "d")))))]
+  "ISA_HAS_DSP"
+  "lbux\t%0,%2(%1)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")])
+
+(define_expand "mips_lhx"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "ISA_HAS_DSP"
+{
+  operands[2] = convert_to_mode (Pmode, operands[2], false);
+  if (Pmode == SImode)
+    emit_insn (gen_mips_lhx_si (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_mips_lhx_di (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mips_lhx_<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI
+	  (mem:HI (plus:P (match_operand:P 1 "register_operand" "d")
+			  (match_operand:P 2 "register_operand" "d")))))]
+  "ISA_HAS_DSP"
+  "lhx\t%0,%2(%1)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")])
+
+(define_expand "mips_lwx"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand:SI 2 "register_operand")]
+  "ISA_HAS_DSP"
+{
+  operands[2] = convert_to_mode (Pmode, operands[2], false);
+  if (Pmode == SImode)
+    emit_insn (gen_mips_lwx_si (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_mips_lwx_di (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mips_lwx_<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(mem:SI (plus:P (match_operand:P 1 "register_operand" "d")
+	    		(match_operand:P 2 "register_operand" "d"))))]
+  "ISA_HAS_DSP"
+  "lwx\t%0,%2(%1)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")])
+
+;; Table 2-8. MIPS DSP ASE Instructions: Branch
+;; BPOSGE32
+(define_insn "mips_bposge"
+  [(set (pc)
+	(if_then_else (ge (reg:CCDSP CCDSP_PO_REGNUM)
+			  (match_operand:SI 1 "immediate_operand" "I"))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "ISA_HAS_DSP"
+  "%*bposge%1\t%0%/"
+  [(set_attr "type"	"branch")])
+
+(define_expand "mips_madd<u>"
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI
+	 (mult:DI (any_extend:DI (match_operand:SI 2 "register_operand"))
+		  (any_extend:DI (match_operand:SI 3 "register_operand")))
+	 (match_operand:DI 1 "register_operand")))]
+  "ISA_HAS_DSP && !TARGET_64BIT")
+
+(define_expand "mips_msub<u>"
+  [(set (match_operand:DI 0 "register_operand")
+	(minus:DI
+	 (match_operand:DI 1 "register_operand")
+	 (mult:DI (any_extend:DI (match_operand:SI 2 "register_operand"))
+		  (any_extend:DI (match_operand:SI 3 "register_operand")))))]
+  "ISA_HAS_DSP && !TARGET_64BIT")
diff --git a/gcc/config/mips/mips-dspr2.md b/gcc/config/mips/mips-dspr2.md
new file mode 100644
index 000000000..5ae902f01
--- /dev/null
+++ b/gcc/config/mips/mips-dspr2.md
@@ -0,0 +1,623 @@
+;; Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+; MIPS DSP ASE REV 2 Revision 0.02 11/24/2006
+
+(define_c_enum "unspec" [
+  UNSPEC_ABSQ_S_QB
+  UNSPEC_ADDU_PH
+  UNSPEC_ADDU_S_PH
+  UNSPEC_ADDUH_QB
+  UNSPEC_ADDUH_R_QB
+  UNSPEC_APPEND
+  UNSPEC_BALIGN
+  UNSPEC_CMPGDU_EQ_QB
+  UNSPEC_CMPGDU_LT_QB
+  UNSPEC_CMPGDU_LE_QB
+  UNSPEC_DPA_W_PH
+  UNSPEC_DPS_W_PH
+  UNSPEC_MADD
+  UNSPEC_MADDU
+  UNSPEC_MSUB
+  UNSPEC_MSUBU
+  UNSPEC_MUL_PH
+  UNSPEC_MUL_S_PH
+  UNSPEC_MULQ_RS_W
+  UNSPEC_MULQ_S_PH
+  UNSPEC_MULQ_S_W
+  UNSPEC_MULSA_W_PH
+  UNSPEC_MULT
+  UNSPEC_MULTU
+  UNSPEC_PRECR_QB_PH
+  UNSPEC_PRECR_SRA_PH_W
+  UNSPEC_PRECR_SRA_R_PH_W
+  UNSPEC_PREPEND
+  UNSPEC_SHRA_QB
+  UNSPEC_SHRA_R_QB
+  UNSPEC_SHRL_PH
+  UNSPEC_SUBU_PH
+  UNSPEC_SUBU_S_PH
+  UNSPEC_SUBUH_QB
+  UNSPEC_SUBUH_R_QB
+  UNSPEC_ADDQH_PH
+  UNSPEC_ADDQH_R_PH
+  UNSPEC_ADDQH_W
+  UNSPEC_ADDQH_R_W
+  UNSPEC_SUBQH_PH
+  UNSPEC_SUBQH_R_PH
+  UNSPEC_SUBQH_W
+  UNSPEC_SUBQH_R_W
+  UNSPEC_DPAX_W_PH
+  UNSPEC_DPSX_W_PH
+  UNSPEC_DPAQX_S_W_PH
+  UNSPEC_DPAQX_SA_W_PH
+  UNSPEC_DPSQX_S_W_PH
+  UNSPEC_DPSQX_SA_W_PH
+])
+
+(define_insn "mips_absq_s_qb"
+  [(parallel
+    [(set (match_operand:V4QI 0 "register_operand" "=d")
+	  (unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")]
+		       UNSPEC_ABSQ_S_QB))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1)] UNSPEC_ABSQ_S_QB))])]
+  "ISA_HAS_DSPR2"
+  "absq_s.qb\t%0,%z1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addu_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (plus:V2HI (match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		     (match_operand:V2HI 2 "reg_or_0_operand" "dYG")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDU_PH))])]
+  "ISA_HAS_DSPR2"
+  "addu.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addu_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_ADDU_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDU_S_PH))])]
+  "ISA_HAS_DSPR2"
+  "addu_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_adduh_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDUH_QB))]
+  "ISA_HAS_DSPR2"
+  "adduh.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_adduh_r_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDUH_R_QB))]
+  "ISA_HAS_DSPR2"
+  "adduh_r.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_append"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_APPEND))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "append\t%0,%z2,%3";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_balign"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_BALIGN))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 3)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 3);
+  return "balign\t%0,%z2,%3";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgdu_eq_qb"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_CMPGDU_EQ_QB))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_CC_REGNUM)]
+			UNSPEC_CMPGDU_EQ_QB))])]
+  "ISA_HAS_DSPR2"
+  "cmpgdu.eq.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgdu_lt_qb"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_CMPGDU_LT_QB))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_CC_REGNUM)]
+			UNSPEC_CMPGDU_LT_QB))])]
+  "ISA_HAS_DSPR2"
+  "cmpgdu.lt.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_cmpgdu_le_qb"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_CMPGDU_LE_QB))
+     (set (reg:CCDSP CCDSP_CC_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)
+			 (reg:CCDSP CCDSP_CC_REGNUM)]
+			UNSPEC_CMPGDU_LE_QB))])]
+  "ISA_HAS_DSPR2"
+  "cmpgdu.le.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpa_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPA_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dps_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPS_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dps.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mulv2hi3"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (mult:V2HI (match_operand:V2HI 1 "register_operand" "d")
+		     (match_operand:V2HI 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MUL_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mul.ph\t%0,%1,%2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mul_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_MUL_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MUL_S_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mul_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulq_rs_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		     UNSPEC_MULQ_RS_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_RS_W))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mulq_rs.w\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulq_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_MULQ_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_S_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mulq_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulq_s_w"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		     UNSPEC_MULQ_S_W))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_S_W))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  "ISA_HAS_DSPR2"
+  "mulq_s.w\t%0,%z1,%z2"
+  [(set_attr "type"	"imul3")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_mulsa_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_MULSA_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "mulsa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precr_qb_ph"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_PRECR_QB_PH))]
+  "ISA_HAS_DSPR2"
+  "precr.qb.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precr_sra_ph_w"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:SI 1 "register_operand" "0")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_PRECR_SRA_PH_W))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "precr_sra.ph.w\t%0,%z2,%3";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_precr_sra_r_ph_w"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:SI 1 "register_operand" "0")
+		      (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		      (match_operand:SI 3 "const_int_operand" "n")]
+		     UNSPEC_PRECR_SRA_R_PH_W))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "precr_sra_r.ph.w\t%0,%z2,%3";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_prepend"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 3 "const_int_operand" "n")]
+		   UNSPEC_PREPEND))]
+  "ISA_HAS_DSPR2"
+{
+  if (INTVAL (operands[3]) & ~(unsigned HOST_WIDE_INT) 31)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+  return "prepend\t%0,%z2,%3";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shra_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG,dYG")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_QB))]
+  "ISA_HAS_DSPR2"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 7)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 7);
+      return "shra.qb\t%0,%z1,%2";
+    }
+  return "shrav.qb\t%0,%z1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+
+(define_insn "mips_shra_r_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d,d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG,dYG")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRA_R_QB))]
+  "ISA_HAS_DSPR2"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 7)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 7);
+      return "shra_r.qb\t%0,%z1,%2";
+    }
+  return "shrav_r.qb\t%0,%z1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_shrl_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d,d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG,dYG")
+		      (match_operand:SI 2 "arith_operand" "I,d")]
+		     UNSPEC_SHRL_PH))]
+  "ISA_HAS_DSPR2"
+{
+  if (which_alternative == 0)
+    {
+      if (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 15)
+	operands[2] = GEN_INT (INTVAL (operands[2]) & 15);
+      return "shrl.ph\t%0,%z1,%2";
+    }
+  return "shrlv.ph\t%0,%z1,%2";
+}
+  [(set_attr "type"	"shift")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subu_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_SUBU_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBU_PH))])]
+  "ISA_HAS_DSPR2"
+  "subu.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subu_s_ph"
+  [(parallel
+    [(set (match_operand:V2HI 0 "register_operand" "=d")
+	  (unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+			(match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		       UNSPEC_SUBU_S_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBU_S_PH))])]
+  "ISA_HAS_DSPR2"
+  "subu_s.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subuh_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBUH_QB))]
+  "ISA_HAS_DSPR2"
+  "subuh.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subuh_r_qb"
+  [(set (match_operand:V4QI 0 "register_operand" "=d")
+	(unspec:V4QI [(match_operand:V4QI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V4QI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBUH_R_QB))]
+  "ISA_HAS_DSPR2"
+  "subuh_r.qb\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDQH_PH))]
+  "ISA_HAS_DSPR2"
+  "addqh.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_r_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_ADDQH_R_PH))]
+  "ISA_HAS_DSPR2"
+  "addqh_r.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_ADDQH_W))]
+  "ISA_HAS_DSPR2"
+  "addqh.w\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_addqh_r_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_ADDQH_R_W))]
+  "ISA_HAS_DSPR2"
+  "addqh_r.w\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBQH_PH))]
+  "ISA_HAS_DSPR2"
+  "subqh.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_r_ph"
+  [(set (match_operand:V2HI 0 "register_operand" "=d")
+	(unspec:V2HI [(match_operand:V2HI 1 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")]
+		     UNSPEC_SUBQH_R_PH))]
+  "ISA_HAS_DSPR2"
+  "subqh_r.ph\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_SUBQH_W))]
+  "ISA_HAS_DSPR2"
+  "subqh.w\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_subqh_r_w"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "reg_or_0_operand" "dJ")
+		    (match_operand:SI 2 "reg_or_0_operand" "dJ")]
+		   UNSPEC_SUBQH_R_W))]
+  "ISA_HAS_DSPR2"
+  "subqh_r.w\t%0,%z1,%z2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpax_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPAX_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpax.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsx_w_ph"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		    (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		   UNSPEC_DPSX_W_PH))]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpsx.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpaqx_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPAQX_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQX_S_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpaqx_s.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpaqx_sa_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPAQX_SA_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQX_SA_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpaqx_sa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsqx_s_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPSQX_S_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQX_S_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpsqx_s.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "mips_dpsqx_sa_w_ph"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=a")
+	  (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		      (match_operand:V2HI 2 "reg_or_0_operand" "dYG")
+		      (match_operand:V2HI 3 "reg_or_0_operand" "dYG")]
+		     UNSPEC_DPSQX_SA_W_PH))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQX_SA_W_PH))])]
+  "ISA_HAS_DSPR2 && !TARGET_64BIT"
+  "dpsqx_sa.w.ph\t%q0,%z2,%z3"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
diff --git a/gcc/config/mips/mips-fixed.md b/gcc/config/mips/mips-fixed.md
new file mode 100644
index 000000000..d5dd909b1
--- /dev/null
+++ b/gcc/config/mips/mips-fixed.md
@@ -0,0 +1,156 @@
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; This file contains MIPS instructions that support fixed-point operations.
+
+;; All supported fixed-point modes
+(define_mode_iterator FIXED [(QQ "") (HQ "") (SQ "") (DQ "TARGET_64BIT")
+			     (UQQ "") (UHQ "") (USQ "") (UDQ "TARGET_64BIT")
+			     (HA "") (SA "") (DA "TARGET_64BIT")
+			     (UHA "") (USA "") (UDA "TARGET_64BIT")])
+
+;; For signed add/sub with saturation
+(define_mode_iterator ADDSUB [(HQ "") (SQ "") (HA "") (SA "") (V2HQ "")
+			      (V2HA "")])
+(define_mode_attr addsubfmt [(HQ "ph") (SQ "w") (HA "ph") (SA "w")
+			     (V2HQ "ph") (V2HA "ph")])
+
+;; For unsigned add/sub with saturation
+(define_mode_iterator UADDSUB [(UQQ "ISA_HAS_DSP") (UHQ "ISA_HAS_DSPR2")
+			       (UHA "ISA_HAS_DSPR2") (V4UQQ "ISA_HAS_DSP")
+			       (V2UHQ "ISA_HAS_DSPR2") (V2UHA "ISA_HAS_DSPR2")])
+(define_mode_attr uaddsubfmt [(UQQ "qb") (UHQ "ph") (UHA "ph")
+			      (V4UQQ "qb") (V2UHQ "ph") (V2UHA "ph")])
+
+;; For signed multiplication with saturation
+(define_mode_iterator MULQ [(V2HQ "ISA_HAS_DSP") (HQ "ISA_HAS_DSP")
+			    (SQ "ISA_HAS_DSPR2")])
+(define_mode_attr mulqfmt [(V2HQ "ph") (HQ "ph") (SQ "w")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:FIXED 0 "register_operand" "=d")
+	(plus:FIXED (match_operand:FIXED 1 "register_operand" "d")
+		    (match_operand:FIXED 2 "register_operand" "d")))]
+  ""
+  "<d>addu\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "usadd<mode>3"
+  [(parallel
+    [(set (match_operand:UADDSUB 0 "register_operand" "=d")
+	  (us_plus:UADDSUB (match_operand:UADDSUB 1 "register_operand" "d")
+			   (match_operand:UADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
+  ""
+  "addu_s.<uaddsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "ssadd<mode>3"
+  [(parallel
+    [(set (match_operand:ADDSUB 0 "register_operand" "=d")
+	  (ss_plus:ADDSUB (match_operand:ADDSUB 1 "register_operand" "d")
+			  (match_operand:ADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_ADDQ_S))])]
+  "ISA_HAS_DSP"
+  "addq_s.<addsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:FIXED 0 "register_operand" "=d")
+        (minus:FIXED (match_operand:FIXED 1 "register_operand" "d")
+		     (match_operand:FIXED 2 "register_operand" "d")))]
+  ""
+  "<d>subu\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "ussub<mode>3"
+  [(parallel
+    [(set (match_operand:UADDSUB 0 "register_operand" "=d")
+	  (us_minus:UADDSUB (match_operand:UADDSUB 1 "register_operand" "d")
+			    (match_operand:UADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
+  ""
+  "subu_s.<uaddsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "sssub<mode>3"
+  [(parallel
+    [(set (match_operand:ADDSUB 0 "register_operand" "=d")
+	  (ss_minus:ADDSUB (match_operand:ADDSUB 1 "register_operand" "d")
+			   (match_operand:ADDSUB 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_SUBQ_S))])]
+  "ISA_HAS_DSP"
+  "subq_s.<addsubfmt>\t%0,%1,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<IMODE>")])
+
+(define_insn "ssmul<mode>3"
+  [(parallel
+    [(set (match_operand:MULQ 0 "register_operand" "=d")
+          (ss_mult:MULQ (match_operand:MULQ 1 "register_operand" "d")
+			(match_operand:MULQ 2 "register_operand" "d")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+          (unspec:CCDSP [(match_dup 1) (match_dup 2)] UNSPEC_MULQ_RS_PH))
+     (clobber (match_scratch:DI 3 "=x"))])]
+  ""
+  "mulq_rs.<mulqfmt>\t%0,%1,%2"
+  [(set_attr "type"     "imul3")
+   (set_attr "mode"     "<IMODE>")])
+
+(define_insn "ssmaddsqdq4"
+  [(parallel
+    [(set (match_operand:DQ 0 "register_operand" "=a")
+	  (ss_plus:DQ
+	  (ss_mult:DQ (sat_fract:DQ (match_operand:SQ 1
+				     "register_operand" "d"))
+                      (sat_fract:DQ (match_operand:SQ 2
+				     "register_operand" "d")))
+          (match_operand:DQ 3 "register_operand" "0")))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPAQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpaq_sa.l.w\t%q0,%1,%2"
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
+
+(define_insn "ssmsubsqdq4"
+  [(parallel
+    [(set (match_operand:DQ 0 "register_operand" "=a")
+          (ss_minus:DQ
+	   (match_operand:DQ 3 "register_operand" "0")
+           (ss_mult:DQ (sat_fract:DQ (match_operand:SQ 1
+				      "register_operand" "d"))
+                       (sat_fract:DQ (match_operand:SQ 2
+				      "register_operand" "d")))))
+     (set (reg:CCDSP CCDSP_OU_REGNUM)
+	  (unspec:CCDSP [(match_dup 1) (match_dup 2) (match_dup 3)]
+			UNSPEC_DPSQ_SA_L_W))])]
+  "ISA_HAS_DSP && !TARGET_64BIT"
+  "dpsq_sa.l.w\t%q0,%1,%2"
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
diff --git a/gcc/config/mips/mips-ftypes.def b/gcc/config/mips/mips-ftypes.def
new file mode 100644
index 000000000..0357aa649
--- /dev/null
+++ b/gcc/config/mips/mips-ftypes.def
@@ -0,0 +1,126 @@
+/* Definitions of prototypes for MIPS built-in functions.  -*- C -*-
+   Copyright (C) 2007, 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Invoke DEF_MIPS_FTYPE (NARGS, LIST) for each prototype used by
+   MIPS built-in functions, where:
+
+      NARGS is the number of arguments.
+      LIST contains the return-type code followed by the codes for each
+        argument type.
+
+   Argument- and return-type codes are either modes or one of the following:
+
+      VOID for void_type_node
+      INT for integer_type_node
+      POINTER for ptr_type_node
+
+   (we don't use PTR because that's a ANSI-compatibillity macro).
+
+   Please keep this list lexicographically sorted by the LIST argument.  */
+DEF_MIPS_FTYPE (1, (DF, DF))
+DEF_MIPS_FTYPE (2, (DF, DF, DF))
+
+DEF_MIPS_FTYPE (2, (DI, DI, DI))
+DEF_MIPS_FTYPE (2, (DI, DI, SI))
+DEF_MIPS_FTYPE (3, (DI, DI, SI, SI))
+DEF_MIPS_FTYPE (3, (DI, DI, USI, USI))
+DEF_MIPS_FTYPE (3, (DI, DI, V2HI, V2HI))
+DEF_MIPS_FTYPE (3, (DI, DI, V4QI, V4QI))
+DEF_MIPS_FTYPE (2, (DI, SI, SI))
+DEF_MIPS_FTYPE (2, (DI, USI, USI))
+
+DEF_MIPS_FTYPE (2, (INT, DF, DF))
+DEF_MIPS_FTYPE (2, (INT, SF, SF))
+DEF_MIPS_FTYPE (2, (INT, V2SF, V2SF))
+DEF_MIPS_FTYPE (4, (INT, V2SF, V2SF, V2SF, V2SF))
+
+DEF_MIPS_FTYPE (2, (SI, DI, SI))
+DEF_MIPS_FTYPE (2, (SI, POINTER, SI))
+DEF_MIPS_FTYPE (1, (SI, SI))
+DEF_MIPS_FTYPE (2, (SI, SI, SI))
+DEF_MIPS_FTYPE (3, (SI, SI, SI, SI))
+DEF_MIPS_FTYPE (1, (SI, V2HI))
+DEF_MIPS_FTYPE (2, (SI, V2HI, V2HI))
+DEF_MIPS_FTYPE (1, (SI, V4QI))
+DEF_MIPS_FTYPE (2, (SI, V4QI, V4QI))
+DEF_MIPS_FTYPE (1, (SI, VOID))
+
+DEF_MIPS_FTYPE (1, (SF, SF))
+DEF_MIPS_FTYPE (2, (SF, SF, SF))
+DEF_MIPS_FTYPE (1, (SF, V2SF))
+
+DEF_MIPS_FTYPE (2, (UDI, UDI, UDI))
+DEF_MIPS_FTYPE (2, (UDI, UV2SI, UV2SI))
+
+DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UQI))
+DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UV2SI))
+
+DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, UQI))
+DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, USI))
+DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, UQI))
+DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, USI))
+DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, UV4HI))
+DEF_MIPS_FTYPE (1, (UV4HI, UV8QI))
+DEF_MIPS_FTYPE (2, (UV4HI, UV8QI, UV8QI))
+
+DEF_MIPS_FTYPE (2, (UV8QI, UV4HI, UV4HI))
+DEF_MIPS_FTYPE (1, (UV8QI, UV8QI))
+DEF_MIPS_FTYPE (2, (UV8QI, UV8QI, UV8QI))
+
+DEF_MIPS_FTYPE (1, (V2HI, SI))
+DEF_MIPS_FTYPE (2, (V2HI, SI, SI))
+DEF_MIPS_FTYPE (3, (V2HI, SI, SI, SI))
+DEF_MIPS_FTYPE (1, (V2HI, V2HI))
+DEF_MIPS_FTYPE (2, (V2HI, V2HI, SI))
+DEF_MIPS_FTYPE (2, (V2HI, V2HI, V2HI))
+DEF_MIPS_FTYPE (1, (V2HI, V4QI))
+DEF_MIPS_FTYPE (2, (V2HI, V4QI, V2HI))
+
+DEF_MIPS_FTYPE (2, (V2SF, SF, SF))
+DEF_MIPS_FTYPE (1, (V2SF, V2SF))
+DEF_MIPS_FTYPE (2, (V2SF, V2SF, V2SF))
+DEF_MIPS_FTYPE (3, (V2SF, V2SF, V2SF, INT))
+DEF_MIPS_FTYPE (4, (V2SF, V2SF, V2SF, V2SF, V2SF))
+
+DEF_MIPS_FTYPE (2, (V2SI, V2SI, UQI))
+DEF_MIPS_FTYPE (2, (V2SI, V2SI, V2SI))
+DEF_MIPS_FTYPE (2, (V2SI, V4HI, V4HI))
+
+DEF_MIPS_FTYPE (2, (V4HI, V2SI, V2SI))
+DEF_MIPS_FTYPE (2, (V4HI, V4HI, UQI))
+DEF_MIPS_FTYPE (2, (V4HI, V4HI, USI))
+DEF_MIPS_FTYPE (2, (V4HI, V4HI, V4HI))
+DEF_MIPS_FTYPE (3, (V4HI, V4HI, V4HI, UQI))
+DEF_MIPS_FTYPE (3, (V4HI, V4HI, V4HI, USI))
+
+DEF_MIPS_FTYPE (1, (V4QI, SI))
+DEF_MIPS_FTYPE (2, (V4QI, V2HI, V2HI))
+DEF_MIPS_FTYPE (1, (V4QI, V4QI))
+DEF_MIPS_FTYPE (2, (V4QI, V4QI, SI))
+DEF_MIPS_FTYPE (2, (V4QI, V4QI, V4QI))
+
+DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI))
+DEF_MIPS_FTYPE (1, (V8QI, V8QI))
+DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI))
+
+DEF_MIPS_FTYPE (2, (VOID, SI, CVPOINTER))
+DEF_MIPS_FTYPE (2, (VOID, SI, SI))
+DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
+DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def
new file mode 100644
index 000000000..b9c508b5c
--- /dev/null
+++ b/gcc/config/mips/mips-modes.def
@@ -0,0 +1,48 @@
+/* MIPS extra machine modes. 
+   Copyright (C) 2003, 2004, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* MIPS has a quirky almost-IEEE format for all its
+   floating point.  */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+
+/* Irix6 will override this via MIPS_TFMODE_FORMAT.  */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+
+VECTOR_MODES (FRACT, 4);	/* V4QQ  V2HQ */
+VECTOR_MODES (UFRACT, 4);	/* V4UQQ V2UHQ */
+VECTOR_MODES (ACCUM, 4);	/*       V2HA */
+VECTOR_MODES (UACCUM, 4);	/*       V2UHA */
+
+/* Paired single comparison instructions use 2 or 4 CC.  */
+CC_MODE (CCV2);
+ADJUST_BYTESIZE (CCV2, 8);
+ADJUST_ALIGNMENT (CCV2, 8);
+
+CC_MODE (CCV4);
+ADJUST_BYTESIZE (CCV4, 16);
+ADJUST_ALIGNMENT (CCV4, 16);
+
+/* For MIPS DSP control registers.  */
+CC_MODE (CCDSP);
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
new file mode 100644
index 000000000..611459884
--- /dev/null
+++ b/gcc/config/mips/mips-protos.h
@@ -0,0 +1,341 @@
+/* Prototypes of target machine for GNU compiler.  MIPS version.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+   1999, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by A. Lichnewsky (lich@inria.inria.fr).
+   Changed by Michael Meissner	(meissner@osf.org).
+   64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
+   Brendan Eich (brendan@microunity.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MIPS_PROTOS_H
+#define GCC_MIPS_PROTOS_H
+
+/* Describes how a symbol is used.
+
+   SYMBOL_CONTEXT_CALL
+       The symbol is used as the target of a call instruction.
+
+   SYMBOL_CONTEXT_LEA
+       The symbol is used in a load-address operation.
+
+   SYMBOL_CONTEXT_MEM
+       The symbol is used as the address in a MEM.  */
+enum mips_symbol_context {
+  SYMBOL_CONTEXT_CALL,
+  SYMBOL_CONTEXT_LEA,
+  SYMBOL_CONTEXT_MEM
+};
+
+/* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address.
+
+   SYMBOL_ABSOLUTE
+       The symbol's value will be calculated using absolute relocations,
+       such as %hi and %lo.
+
+   SYMBOL_GP_RELATIVE
+       The symbol's value will be calculated by adding a 16-bit offset
+       from $gp.
+
+   SYMBOL_PC_RELATIVE
+       The symbol's value will be calculated using a MIPS16 PC-relative
+       calculation.
+
+   SYMBOL_FORCE_TO_MEM
+       The symbol's value must be forced to memory and loaded from there.
+
+   SYMBOL_GOT_PAGE_OFST
+       The symbol's value will be calculated by loading an address
+       from the GOT and then applying a 16-bit offset.
+
+   SYMBOL_GOT_DISP
+       The symbol's value will be loaded directly from the GOT.
+
+   SYMBOL_GOTOFF_PAGE
+       An UNSPEC wrapper around a SYMBOL_GOT_PAGE_OFST.  It represents the
+       offset from _gp of the GOT entry.
+
+   SYMBOL_GOTOFF_DISP
+       An UNSPEC wrapper around a SYMBOL_GOT_DISP.  It represents the
+       the offset from _gp of the symbol's GOT entry.
+
+   SYMBOL_GOTOFF_CALL
+       Like SYMBOL_GOTOFF_DISP, but used when calling a global function.
+       The GOT entry is allowed to point to a stub rather than to the
+       function itself.
+
+   SYMBOL_GOTOFF_LOADGP
+       An UNSPEC wrapper around a function's address.  It represents the
+       offset of _gp from the start of the function.
+
+   SYMBOL_TLS
+       A thread-local symbol.
+
+   SYMBOL_TLSGD
+   SYMBOL_TLSLDM
+   SYMBOL_DTPREL
+   SYMBOL_GOTTPREL
+   SYMBOL_TPREL
+       UNSPEC wrappers around SYMBOL_TLS, corresponding to the
+       thread-local storage relocation operators.
+
+   SYMBOL_32_HIGH
+       For a 32-bit symbolic address X, this is the value of %hi(X).
+
+   SYMBOL_64_HIGH
+       For a 64-bit symbolic address X, this is the value of
+       (%highest(X) << 16) + %higher(X).
+
+   SYMBOL_64_MID
+       For a 64-bit symbolic address X, this is the value of
+       (%higher(X) << 16) + %hi(X).
+
+   SYMBOL_64_LOW
+       For a 64-bit symbolic address X, this is the value of
+       (%hi(X) << 16) + %lo(X).
+
+   SYMBOL_HALF
+       An UNSPEC wrapper around any kind of address.  It represents the
+       low 16 bits of that address.  */
+enum mips_symbol_type {
+  SYMBOL_ABSOLUTE,
+  SYMBOL_GP_RELATIVE,
+  SYMBOL_PC_RELATIVE,
+  SYMBOL_FORCE_TO_MEM,
+  SYMBOL_GOT_PAGE_OFST,
+  SYMBOL_GOT_DISP,
+  SYMBOL_GOTOFF_PAGE,
+  SYMBOL_GOTOFF_DISP,
+  SYMBOL_GOTOFF_CALL,
+  SYMBOL_GOTOFF_LOADGP,
+  SYMBOL_TLS,
+  SYMBOL_TLSGD,
+  SYMBOL_TLSLDM,
+  SYMBOL_DTPREL,
+  SYMBOL_GOTTPREL,
+  SYMBOL_TPREL,
+  SYMBOL_32_HIGH,
+  SYMBOL_64_HIGH,
+  SYMBOL_64_MID,
+  SYMBOL_64_LOW,
+  SYMBOL_HALF
+};
+#define NUM_SYMBOL_TYPES (SYMBOL_HALF + 1)
+
+/* Identifiers a style of $gp initialization sequence.
+
+   LOADGP_NONE
+	No initialization sequence is needed.
+
+   LOADGP_OLDABI
+	The o32 and o64 PIC sequence (the kind traditionally generated
+	by .cpload).
+
+   LOADGP_NEWABI
+	The n32 and n64 PIC sequence (the kind traditionally generated
+	by .cpsetup).
+
+   LOADGP_ABSOLUTE
+	The GNU absolute sequence, as generated by loadgp_absolute.
+
+   LOADGP_RTP
+	The VxWorks RTP PIC sequence, as generated by loadgp_rtp.  */
+enum mips_loadgp_style {
+  LOADGP_NONE,
+  LOADGP_OLDABI,
+  LOADGP_NEWABI,
+  LOADGP_ABSOLUTE,
+  LOADGP_RTP
+};
+
+struct mips16e_save_restore_info;
+
+/* Classifies a type of call.
+
+   MIPS_CALL_NORMAL
+	A normal call or call_value pattern.
+
+   MIPS_CALL_SIBCALL
+	A sibcall or sibcall_value pattern.
+
+   MIPS_CALL_EPILOGUE
+	A call inserted in the epilogue.  */
+enum mips_call_type {
+  MIPS_CALL_NORMAL,
+  MIPS_CALL_SIBCALL,
+  MIPS_CALL_EPILOGUE
+};
+
+extern bool mips_symbolic_constant_p (rtx, enum mips_symbol_context,
+				      enum mips_symbol_type *);
+extern int mips_regno_mode_ok_for_base_p (int, enum machine_mode, bool);
+extern bool mips_stack_address_p (rtx, enum machine_mode);
+extern int mips_address_insns (rtx, enum machine_mode, bool);
+extern int mips_const_insns (rtx);
+extern int mips_split_const_insns (rtx);
+extern int mips_load_store_insns (rtx, rtx);
+extern int mips_idiv_insns (void);
+extern rtx mips_emit_move (rtx, rtx);
+extern rtx mips_pic_base_register (rtx);
+extern rtx mips_got_load (rtx, rtx, enum mips_symbol_type);
+extern bool mips_split_symbol (rtx, rtx, enum machine_mode, rtx *);
+extern rtx mips_unspec_address (rtx, enum mips_symbol_type);
+extern void mips_move_integer (rtx, rtx, unsigned HOST_WIDE_INT);
+extern bool mips_legitimize_move (enum machine_mode, rtx, rtx);
+
+extern int m16_uimm3_b (rtx, enum machine_mode);
+extern int m16_simm4_1 (rtx, enum machine_mode);
+extern int m16_nsimm4_1 (rtx, enum machine_mode);
+extern int m16_simm5_1 (rtx, enum machine_mode);
+extern int m16_nsimm5_1 (rtx, enum machine_mode);
+extern int m16_uimm5_4 (rtx, enum machine_mode);
+extern int m16_nuimm5_4 (rtx, enum machine_mode);
+extern int m16_simm8_1 (rtx, enum machine_mode);
+extern int m16_nsimm8_1 (rtx, enum machine_mode);
+extern int m16_uimm8_1 (rtx, enum machine_mode);
+extern int m16_nuimm8_1 (rtx, enum machine_mode);
+extern int m16_uimm8_m1_1 (rtx, enum machine_mode);
+extern int m16_uimm8_4 (rtx, enum machine_mode);
+extern int m16_nuimm8_4 (rtx, enum machine_mode);
+extern int m16_simm8_8 (rtx, enum machine_mode);
+extern int m16_nsimm8_8 (rtx, enum machine_mode);
+
+extern rtx mips_subword (rtx, bool);
+extern bool mips_split_64bit_move_p (rtx, rtx);
+extern void mips_split_doubleword_move (rtx, rtx);
+extern const char *mips_output_move (rtx, rtx);
+extern bool mips_cfun_has_cprestore_slot_p (void);
+extern bool mips_cprestore_address_p (rtx, bool);
+extern void mips_save_gp_to_cprestore_slot (rtx, rtx, rtx, rtx);
+extern void mips_restore_gp_from_cprestore_slot (rtx);
+#ifdef RTX_CODE
+extern void mips_expand_scc (rtx *);
+extern void mips_expand_conditional_branch (rtx *);
+extern void mips_expand_vcondv2sf (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
+extern void mips_expand_conditional_move (rtx *);
+extern void mips_expand_conditional_trap (rtx);
+#endif
+extern bool mips_use_pic_fn_addr_reg_p (const_rtx);
+extern rtx mips_expand_call (enum mips_call_type, rtx, rtx, rtx, rtx, bool);
+extern void mips_split_call (rtx, rtx);
+extern bool mips_get_pic_call_symbol (rtx *, int);
+extern void mips_expand_fcc_reload (rtx, rtx, rtx);
+extern void mips_set_return_address (rtx, rtx);
+extern bool mips_expand_block_move (rtx, rtx, rtx);
+extern void mips_expand_synci_loop (rtx, rtx);
+
+extern void mips_init_cumulative_args (CUMULATIVE_ARGS *, tree);
+extern bool mips_pad_arg_upward (enum machine_mode, const_tree);
+extern bool mips_pad_reg_upward (enum machine_mode, tree);
+
+extern bool mips_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+					       HOST_WIDE_INT);
+extern bool mips_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+						HOST_WIDE_INT);
+extern bool mips_mem_fits_mode_p (enum machine_mode mode, rtx x);
+extern void mips_order_regs_for_local_alloc (void);
+extern HOST_WIDE_INT mips_debugger_offset (rtx, HOST_WIDE_INT);
+
+extern void mips_push_asm_switch (struct mips_asm_switch *);
+extern void mips_pop_asm_switch (struct mips_asm_switch *);
+extern void mips_output_external (FILE *, tree, const char *);
+extern void mips_output_ascii (FILE *, const char *, size_t);
+extern void mips_output_aligned_decl_common (FILE *, tree, const char *,
+					     unsigned HOST_WIDE_INT,
+					     unsigned int);
+extern void mips_declare_common_object (FILE *, const char *,
+					const char *, unsigned HOST_WIDE_INT,
+					unsigned int, bool);
+extern void mips_declare_object (FILE *, const char *, const char *,
+				 const char *, ...) ATTRIBUTE_PRINTF_4;
+extern void mips_declare_object_name (FILE *, const char *, tree);
+extern void mips_finish_declare_object (FILE *, tree, int, int);
+
+extern bool mips_small_data_pattern_p (rtx);
+extern rtx mips_rewrite_small_data (rtx);
+extern HOST_WIDE_INT mips_initial_elimination_offset (int, int);
+extern rtx mips_return_addr (int, rtx);
+extern bool mips_must_initialize_gp_p (void);
+extern enum mips_loadgp_style mips_current_loadgp_style (void);
+extern void mips_emit_save_slot_move (rtx, rtx, rtx);
+extern void mips_expand_prologue (void);
+extern void mips_expand_before_return (void);
+extern void mips_expand_epilogue (bool);
+extern bool mips_can_use_return_insn (void);
+extern rtx mips_function_value (const_tree, const_tree, enum machine_mode);
+
+extern bool mips_cannot_change_mode_class (enum machine_mode,
+					   enum machine_mode, enum reg_class);
+extern bool mips_dangerous_for_la25_p (rtx);
+extern bool mips_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern enum reg_class mips_secondary_reload_class (enum reg_class,
+						   enum machine_mode,
+						   rtx, bool);
+extern int mips_class_max_nregs (enum reg_class, enum machine_mode);
+
+extern int mips_adjust_insn_length (rtx, int);
+extern void mips_output_load_label (rtx);
+extern const char *mips_output_conditional_branch (rtx, rtx *, const char *,
+						   const char *);
+extern const char *mips_output_order_conditional_branch (rtx, rtx *, bool);
+extern const char *mips_output_sync (void);
+extern const char *mips_output_sync_loop (rtx, rtx *);
+extern unsigned int mips_sync_loop_insns (rtx, rtx *);
+extern const char *mips_output_division (const char *, rtx *);
+extern unsigned int mips_hard_regno_nregs (int, enum machine_mode);
+extern bool mips_linked_madd_p (rtx, rtx);
+extern bool mips_store_data_bypass_p (rtx, rtx);
+extern rtx mips_prefetch_cookie (rtx, rtx);
+
+extern void irix_asm_output_align (FILE *, unsigned);
+extern const char *current_section_name (void);
+extern unsigned int current_section_flags (void);
+extern bool mips_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+
+extern const char *mips16e_output_save_restore (rtx, HOST_WIDE_INT);
+extern bool mips16e_save_restore_pattern_p (rtx, HOST_WIDE_INT,
+					    struct mips16e_save_restore_info *);
+
+extern bool mask_low_and_shift_p (enum machine_mode, rtx, rtx, int);
+extern int mask_low_and_shift_len (enum machine_mode, rtx, rtx);
+extern bool and_operands_ok (enum machine_mode, rtx, rtx);
+
+union mips_gen_fn_ptrs
+{
+  rtx (*fn_6) (rtx, rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_5) (rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_4) (rtx, rtx, rtx, rtx);
+};
+
+extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
+				     rtx, rtx, rtx, rtx);
+
+extern void mips_expand_vector_init (rtx, rtx);
+
+extern bool mips_eh_uses (unsigned int);
+extern bool mips_epilogue_uses (unsigned int);
+extern void mips_final_prescan_insn (rtx, rtx *, int);
+extern int mips_trampoline_code_size (void);
+extern void mips_function_profiler (FILE *);
+
+typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
+#ifdef RTX_CODE
+extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
+#endif
+
+#endif /* ! GCC_MIPS_PROTOS_H */
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
new file mode 100644
index 000000000..8e942307c
--- /dev/null
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -0,0 +1,635 @@
+;; MIPS Paired-Single Floating and MIPS-3D Instructions.
+;; Copyright (C) 2004, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_MOVE_TF_PS
+  UNSPEC_C
+
+  ;; MIPS64/MIPS32R2 alnv.ps
+  UNSPEC_ALNV_PS
+
+  ;; MIPS-3D instructions
+  UNSPEC_CABS
+
+  UNSPEC_ADDR_PS
+  UNSPEC_CVT_PW_PS
+  UNSPEC_CVT_PS_PW
+  UNSPEC_MULR_PS
+  UNSPEC_ABS_PS
+
+  UNSPEC_RSQRT1
+  UNSPEC_RSQRT2
+  UNSPEC_RECIP1
+  UNSPEC_RECIP2
+  UNSPEC_SINGLE_CC
+  UNSPEC_SCC
+])
+
+(define_insn "*movcc_v2sf_<mode>"
+  [(set (match_operand:V2SF 0 "register_operand" "=f,f")
+	(if_then_else:V2SF
+	 (match_operator:GPR 4 "equality_operator"
+			 [(match_operand:GPR 1 "register_operand" "d,d")
+			  (const_int 0)])
+	 (match_operand:V2SF 2 "register_operand" "f,0")
+	 (match_operand:V2SF 3 "register_operand" "0,f")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "@
+    mov%T4.ps\t%0,%2,%1
+    mov%t4.ps\t%0,%3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "SF")])
+
+(define_insn "mips_cond_move_tf_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f,f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f,0")
+		      (match_operand:V2SF 2 "register_operand" "0,f")
+		      (match_operand:CCV2 3 "register_operand" "z,z")]
+		     UNSPEC_MOVE_TF_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "@
+    movt.ps\t%0,%1,%3
+    movf.ps\t%0,%2,%3"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "SF")])
+
+(define_expand "movv2sfcc"
+  [(set (match_dup 4) (match_operand 1 "comparison_operator"))
+   (set (match_operand:V2SF 0 "register_operand")
+	(if_then_else:V2SF (match_dup 5)
+			   (match_operand:V2SF 2 "register_operand")
+			   (match_operand:V2SF 3 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  /* We can only support MOVN.PS and MOVZ.PS.
+     NOTE: MOVT.PS and MOVF.PS have different semantics from MOVN.PS and 
+	   MOVZ.PS.  MOVT.PS and MOVF.PS depend on two CC values and move 
+	   each item independently.  */
+
+  if (GET_MODE_CLASS (GET_MODE (XEXP (operands[1], 0))) != MODE_INT)
+    FAIL;
+
+  mips_expand_conditional_move (operands);
+  DONE;
+})
+
+; pul.ps - Pair Upper Lower
+(define_insn "mips_pul_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_merge:V2SF
+	 (match_operand:V2SF 1 "register_operand" "f")
+	 (match_operand:V2SF 2 "register_operand" "f")
+	 (const_int 2)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "pul.ps\t%0,%1,%2"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+; puu.ps - Pair upper upper
+(define_insn "mips_puu_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_merge:V2SF
+	 (match_operand:V2SF 1 "register_operand" "f")
+	 (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f")
+			  (parallel [(const_int 1)
+				     (const_int 0)]))
+	 (const_int 2)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "puu.ps\t%0,%1,%2"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+; pll.ps - Pair Lower Lower
+(define_insn "mips_pll_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_merge:V2SF
+	 (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f")
+			  (parallel [(const_int 1)
+				     (const_int 0)]))
+	 (match_operand:V2SF 2 "register_operand" "f")
+	 (const_int 2)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "pll.ps\t%0,%1,%2"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+; plu.ps - Pair Lower Upper
+(define_insn "mips_plu_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_merge:V2SF
+	 (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f")
+			  (parallel [(const_int 1)
+				     (const_int 0)]))
+	 (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f")
+			  (parallel [(const_int 1)
+				     (const_int 0)]))
+	 (const_int 2)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "plu.ps\t%0,%1,%2"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+; vec_init
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0));
+  rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1));
+  emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1));
+  DONE;
+})
+
+(define_insn "vec_initv2sf_internal"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(vec_concat:V2SF
+	 (match_operand:SF 1 "register_operand" "f")
+	 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "cvt.ps.s\t%0,%1,%2";
+  else
+    return "cvt.ps.s\t%0,%2,%1";
+}
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+;; ??? This is only generated if we perform a vector operation that has to be
+;; emulated.  There is no other way to get a vector mode bitfield extract
+;; currently.
+
+(define_insn "vec_extractv2sf"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(vec_select:SF (match_operand:V2SF 1 "register_operand" "f")
+		       (parallel
+			[(match_operand 2 "const_0_or_1_operand" "")])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN)
+    return "cvt.s.pu\t%0,%1";
+  else
+    return "cvt.s.pl\t%0,%1";
+}
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+;; ??? This is only generated if we disable the vec_init pattern.  There is
+;; no other way to get a vector mode bitfield store currently.
+
+(define_expand "vec_setv2sf"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:SF 1 "register_operand")
+   (match_operand 2 "const_0_or_1_operand")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  rtx temp;
+
+  /* We don't have an insert instruction, so we duplicate the float, and
+     then use a PUL instruction.  */
+  temp = gen_reg_rtx (V2SFmode);
+  emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1]));
+  if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN)
+    emit_insn (gen_mips_pul_ps (operands[0], temp, operands[0]));
+  else
+    emit_insn (gen_mips_pul_ps (operands[0], operands[0], temp));
+  DONE;
+})
+
+; cvt.ps.s - Floating Point Convert Pair to Paired Single
+(define_expand "mips_cvt_ps_s"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:SF 1 "register_operand")
+   (match_operand:SF 2 "register_operand")]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1],
+	       operands[2]));
+  else
+    emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2],
+	       operands[1]));
+  DONE;
+})
+
+; cvt.s.pl - Floating Point Convert Pair Lower to Single Floating Point
+(define_expand "mips_cvt_s_pl"
+  [(set (match_operand:SF 0 "register_operand")
+	(vec_select:SF (match_operand:V2SF 1 "register_operand")
+		       (parallel [(match_dup 2)])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  { operands[2] = GEN_INT (BYTES_BIG_ENDIAN); })
+
+; cvt.s.pu - Floating Point Convert Pair Upper to Single Floating Point
+(define_expand "mips_cvt_s_pu"
+  [(set (match_operand:SF 0 "register_operand")
+	(vec_select:SF (match_operand:V2SF 1 "register_operand")
+		       (parallel [(match_dup 2)])))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  { operands[2] = GEN_INT (!BYTES_BIG_ENDIAN); })
+
+; alnv.ps - Floating Point Align Variable
+(define_insn "mips_alnv_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand:SI 3 "register_operand" "d")]
+		     UNSPEC_ALNV_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "alnv.ps\t%0,%1,%2,%3"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "SF")])
+
+; addr.ps - Floating Point Reduction Add
+(define_insn "mips_addr_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")]
+		     UNSPEC_ADDR_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "addr.ps\t%0,%1,%2"
+  [(set_attr "type" "fadd")
+   (set_attr "mode" "SF")])
+
+; cvt.pw.ps - Floating Point Convert Paired Single to Paired Word
+(define_insn "mips_cvt_pw_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")]
+		     UNSPEC_CVT_PW_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "cvt.pw.ps\t%0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+; cvt.ps.pw - Floating Point Convert Paired Word to Paired Single
+(define_insn "mips_cvt_ps_pw"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")]
+		     UNSPEC_CVT_PS_PW))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "cvt.ps.pw\t%0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
+; mulr.ps - Floating Point Reduction Multiply
+(define_insn "mips_mulr_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")]
+		     UNSPEC_MULR_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "mulr.ps\t%0,%1,%2"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "SF")])
+
+; abs.ps
+(define_expand "mips_abs_ps"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")]
+		     UNSPEC_ABS_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  /* If we can ignore NaNs, this operation is equivalent to the
+     rtl ABS code.  */
+  if (!HONOR_NANS (V2SFmode))
+    {
+      emit_insn (gen_absv2sf2 (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*mips_abs_ps"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f")]
+		     UNSPEC_ABS_PS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "abs.ps\t%0,%1"
+  [(set_attr "type" "fabs")
+   (set_attr "mode" "SF")])
+
+;----------------------------------------------------------------------------
+; Floating Point Comparisons for Scalars
+;----------------------------------------------------------------------------
+
+(define_insn "mips_cabs_cond_<fmt>"
+  [(set (match_operand:CC 0 "register_operand" "=z")
+	(unspec:CC [(match_operand:SCALARF 1 "register_operand" "f")
+		    (match_operand:SCALARF 2 "register_operand" "f")
+		    (match_operand 3 "const_int_operand" "")]
+		   UNSPEC_CABS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "cabs.%Y3.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+
+;----------------------------------------------------------------------------
+; Floating Point Comparisons for Four Singles
+;----------------------------------------------------------------------------
+
+(define_insn_and_split "mips_c_cond_4s"
+  [(set (match_operand:CCV4 0 "register_operand" "=z")
+	(unspec:CCV4 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand:V2SF 3 "register_operand" "f")
+		      (match_operand:V2SF 4 "register_operand" "f")
+		      (match_operand 5 "const_int_operand" "")]
+		     UNSPEC_C))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 6)
+	(unspec:CCV2 [(match_dup 1)
+		      (match_dup 2)
+		      (match_dup 5)]
+		     UNSPEC_C))
+   (set (match_dup 7)
+	(unspec:CCV2 [(match_dup 3)
+		      (match_dup 4)
+		      (match_dup 5)]
+		     UNSPEC_C))]
+{
+  operands[6] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 0);
+  operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
+}
+  [(set_attr "type" "fcmp")
+   (set_attr "length" "8")
+   (set_attr "mode" "FPSW")])
+
+(define_insn_and_split "mips_cabs_cond_4s"
+  [(set (match_operand:CCV4 0 "register_operand" "=z")
+	(unspec:CCV4 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand:V2SF 3 "register_operand" "f")
+		      (match_operand:V2SF 4 "register_operand" "f")
+		      (match_operand 5 "const_int_operand" "")]
+		     UNSPEC_CABS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 6)
+	(unspec:CCV2 [(match_dup 1)
+		      (match_dup 2)
+		      (match_dup 5)]
+		     UNSPEC_CABS))
+   (set (match_dup 7)
+	(unspec:CCV2 [(match_dup 3)
+		      (match_dup 4)
+		      (match_dup 5)]
+		     UNSPEC_CABS))]
+{
+  operands[6] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 0);
+  operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
+}
+  [(set_attr "type" "fcmp")
+   (set_attr "length" "8")
+   (set_attr "mode" "FPSW")])
+
+
+;----------------------------------------------------------------------------
+; Floating Point Comparisons for Paired Singles
+;----------------------------------------------------------------------------
+
+(define_insn "mips_c_cond_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand 3 "const_int_operand" "")]
+		     UNSPEC_C))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "c.%Y3.ps\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+(define_insn "mips_cabs_cond_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2 [(match_operand:V2SF 1 "register_operand" "f")
+		      (match_operand:V2SF 2 "register_operand" "f")
+		      (match_operand 3 "const_int_operand" "")]
+		     UNSPEC_CABS))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "cabs.%Y3.ps\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+;; An expander for generating an scc operation.
+(define_expand "scc_ps"
+  [(set (match_operand:CCV2 0)
+	(unspec:CCV2 [(match_operand 1)] UNSPEC_SCC))])
+
+(define_insn "s<code>_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2
+	   [(fcond (match_operand:V2SF 1 "register_operand" "f")
+		   (match_operand:V2SF 2 "register_operand" "f"))]
+	   UNSPEC_SCC))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "c.<fcond>.ps\t%0,%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+(define_insn "s<code>_ps"
+  [(set (match_operand:CCV2 0 "register_operand" "=z")
+	(unspec:CCV2
+	   [(swapped_fcond (match_operand:V2SF 1 "register_operand" "f")
+			   (match_operand:V2SF 2 "register_operand" "f"))]
+	   UNSPEC_SCC))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "c.<swapped_fcond>.ps\t%0,%2,%1"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+;----------------------------------------------------------------------------
+; Floating Point Branch Instructions.
+;----------------------------------------------------------------------------
+
+; Branch on Any of Four Floating Point Condition Codes True
+(define_insn "bc1any4t"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV4 1 "register_operand" "z")
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "%*bc1any4t\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Branch on Any of Four Floating Point Condition Codes False
+(define_insn "bc1any4f"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV4 1 "register_operand" "z")
+			  (const_int -1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "%*bc1any4f\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Branch on Any of Two Floating Point Condition Codes True
+(define_insn "bc1any2t"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV2 1 "register_operand" "z")
+			  (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "%*bc1any2t\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Branch on Any of Two Floating Point Condition Codes False
+(define_insn "bc1any2f"
+  [(set (pc)
+	(if_then_else (ne (match_operand:CCV2 1 "register_operand" "z")
+			  (const_int -1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "%*bc1any2f\t%1,%0%/"
+  [(set_attr "type" "branch")])
+
+; Used to access one register in a CCV2 pair.  Operand 0 is the register
+; pair and operand 1 is the index of the register we want (a CONST_INT).
+(define_expand "single_cc"
+  [(ne (unspec:CC [(match_operand 0) (match_operand 1)] UNSPEC_SINGLE_CC)
+       (const_int 0))])
+
+; This is a normal floating-point branch pattern, but rather than check
+; a single CCmode register, it checks one register in a CCV2 pair.
+; Operand 2 is the register pair and operand 3 is the index of the
+; register we want.
+(define_insn "*branch_upper_lower"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 1 "equality_operator"
+	    [(unspec:CC [(match_operand:CCV2 2 "register_operand" "z")
+			 (match_operand 3 "const_int_operand")]
+			UNSPEC_SINGLE_CC)
+	     (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2]
+    = gen_rtx_REG (CCmode, REGNO (operands[2]) + INTVAL (operands[3]));
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%F1", "%2,%0"),
+					 MIPS_BRANCH ("b%W1", "%2,%0"));
+}
+  [(set_attr "type" "branch")])
+
+; As above, but with the sense of the condition reversed.
+(define_insn "*branch_upper_lower_inverted"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 1 "equality_operator"
+	    [(unspec:CC [(match_operand:CCV2 2 "register_operand" "z")
+			 (match_operand 3 "const_int_operand")]
+			UNSPEC_SINGLE_CC)
+	     (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2]
+    = gen_rtx_REG (CCmode, REGNO (operands[2]) + INTVAL (operands[3]));
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%W1", "%2,%0"),
+					 MIPS_BRANCH ("b%F1", "%2,%0"));
+}
+  [(set_attr "type" "branch")])
+
+;----------------------------------------------------------------------------
+; Floating Point Reduced Precision Reciprocal Square Root Instructions.
+;----------------------------------------------------------------------------
+
+(define_insn "mips_rsqrt1_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+		     UNSPEC_RSQRT1))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "rsqrt1.<fmt>\t%0,%1"
+  [(set_attr "type" "frsqrt1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "mips_rsqrt2_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")]
+		     UNSPEC_RSQRT2))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "rsqrt2.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "frsqrt2")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "mips_recip1_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+		     UNSPEC_RECIP1))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "recip1.<fmt>\t%0,%1"
+  [(set_attr "type" "frdiv1")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "mips_recip2_<fmt>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")]
+		     UNSPEC_RECIP2))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "recip2.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "frdiv2")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "vcondv2sf"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(if_then_else:V2SF
+	  (match_operator 3 ""
+	    [(match_operand:V2SF 4 "register_operand")
+	     (match_operand:V2SF 5 "register_operand")])
+	  (match_operand:V2SF 1 "register_operand")
+	  (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vcondv2sf (operands[0], operands[1], operands[2],
+			 GET_CODE (operands[3]), operands[4], operands[5]);
+  DONE;
+})
+
+(define_expand "sminv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(smin:V2SF (match_operand:V2SF 1 "register_operand")
+		   (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vcondv2sf (operands[0], operands[1], operands[2],
+			 LE, operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "smaxv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(smax:V2SF (match_operand:V2SF 1 "register_operand")
+		   (match_operand:V2SF 2 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  mips_expand_vcondv2sf (operands[0], operands[1], operands[2],
+			 LE, operands[2], operands[1]);
+  DONE;
+})
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
new file mode 100644
index 000000000..87bf18e6c
--- /dev/null
+++ b/gcc/config/mips/mips.c
@@ -0,0 +1,16662 @@
+/* Subroutines used for MIPS code generation.
+   Copyright (C) 1989, 1990, 1991, 1993, 1994, 1995, 1996, 1997, 1998,
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+   2011
+   Free Software Foundation, Inc.
+   Contributed by A. Lichnewsky, lich@inria.inria.fr.
+   Changes by Michael Meissner, meissner@osf.org.
+   64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
+   Brendan Eich, brendan@microunity.com.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "gstab.h"
+#include "hashtab.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "integrate.h"
+#include "langhooks.h"
+#include "cfglayout.h"
+#include "sched-int.h"
+#include "gimple.h"
+#include "bitmap.h"
+#include "diagnostic.h"
+#include "target-globals.h"
+
+/* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
+#define UNSPEC_ADDRESS_P(X)					\
+  (GET_CODE (X) == UNSPEC					\
+   && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST			\
+   && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
+
+/* Extract the symbol or label from UNSPEC wrapper X.  */
+#define UNSPEC_ADDRESS(X) \
+  XVECEXP (X, 0, 0)
+
+/* Extract the symbol type from UNSPEC wrapper X.  */
+#define UNSPEC_ADDRESS_TYPE(X) \
+  ((enum mips_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
+
+/* The maximum distance between the top of the stack frame and the
+   value $sp has when we save and restore registers.
+
+   The value for normal-mode code must be a SMALL_OPERAND and must
+   preserve the maximum stack alignment.  We therefore use a value
+   of 0x7ff0 in this case.
+
+   MIPS16e SAVE and RESTORE instructions can adjust the stack pointer by
+   up to 0x7f8 bytes and can usually save or restore all the registers
+   that we need to save or restore.  (Note that we can only use these
+   instructions for o32, for which the stack alignment is 8 bytes.)
+
+   We use a maximum gap of 0x100 or 0x400 for MIPS16 code when SAVE and
+   RESTORE are not available.  We can then use unextended instructions
+   to save and restore registers, and to allocate and deallocate the top
+   part of the frame.  */
+#define MIPS_MAX_FIRST_STACK_STEP					\
+  (!TARGET_MIPS16 ? 0x7ff0						\
+   : GENERATE_MIPS16E_SAVE_RESTORE ? 0x7f8				\
+   : TARGET_64BIT ? 0x100 : 0x400)
+
+/* True if INSN is a mips.md pattern or asm statement.  */
+#define USEFUL_INSN_P(INSN)						\
+  (NONDEBUG_INSN_P (INSN)						\
+   && GET_CODE (PATTERN (INSN)) != USE					\
+   && GET_CODE (PATTERN (INSN)) != CLOBBER				\
+   && GET_CODE (PATTERN (INSN)) != ADDR_VEC				\
+   && GET_CODE (PATTERN (INSN)) != ADDR_DIFF_VEC)
+
+/* If INSN is a delayed branch sequence, return the first instruction
+   in the sequence, otherwise return INSN itself.  */
+#define SEQ_BEGIN(INSN)							\
+  (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE		\
+   ? XVECEXP (PATTERN (INSN), 0, 0)					\
+   : (INSN))
+
+/* Likewise for the last instruction in a delayed branch sequence.  */
+#define SEQ_END(INSN)							\
+  (INSN_P (INSN) && GET_CODE (PATTERN (INSN)) == SEQUENCE		\
+   ? XVECEXP (PATTERN (INSN), 0, XVECLEN (PATTERN (INSN), 0) - 1)	\
+   : (INSN))
+
+/* Execute the following loop body with SUBINSN set to each instruction
+   between SEQ_BEGIN (INSN) and SEQ_END (INSN) inclusive.  */
+#define FOR_EACH_SUBINSN(SUBINSN, INSN)					\
+  for ((SUBINSN) = SEQ_BEGIN (INSN);					\
+       (SUBINSN) != NEXT_INSN (SEQ_END (INSN));				\
+       (SUBINSN) = NEXT_INSN (SUBINSN))
+
+/* True if bit BIT is set in VALUE.  */
+#define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
+
+/* Return the opcode for a ptr_mode load of the form:
+
+       l[wd]    DEST, OFFSET(BASE).  */
+#define MIPS_LOAD_PTR(DEST, OFFSET, BASE)	\
+  (((ptr_mode == DImode ? 0x37 : 0x23) << 26)	\
+   | ((BASE) << 21)				\
+   | ((DEST) << 16)				\
+   | (OFFSET))
+
+/* Return the opcode to move register SRC into register DEST.  */
+#define MIPS_MOVE(DEST, SRC)		\
+  ((TARGET_64BIT ? 0x2d : 0x21)		\
+   | ((DEST) << 11)			\
+   | ((SRC) << 21))
+
+/* Return the opcode for:
+
+       lui      DEST, VALUE.  */
+#define MIPS_LUI(DEST, VALUE) \
+  ((0xf << 26) | ((DEST) << 16) | (VALUE))
+
+/* Return the opcode to jump to register DEST.  */
+#define MIPS_JR(DEST) \
+  (((DEST) << 21) | 0x8)
+
+/* Return the opcode for:
+
+       bal     . + (1 + OFFSET) * 4.  */
+#define MIPS_BAL(OFFSET) \
+  ((0x1 << 26) | (0x11 << 16) | (OFFSET))
+
+/* Return the usual opcode for a nop.  */
+#define MIPS_NOP 0
+
+/* Classifies an address.
+
+   ADDRESS_REG
+       A natural register + offset address.  The register satisfies
+       mips_valid_base_register_p and the offset is a const_arith_operand.
+
+   ADDRESS_LO_SUM
+       A LO_SUM rtx.  The first operand is a valid base register and
+       the second operand is a symbolic address.
+
+   ADDRESS_CONST_INT
+       A signed 16-bit constant address.
+
+   ADDRESS_SYMBOLIC:
+       A constant symbolic address.  */
+enum mips_address_type {
+  ADDRESS_REG,
+  ADDRESS_LO_SUM,
+  ADDRESS_CONST_INT,
+  ADDRESS_SYMBOLIC
+};
+
+/* Enumerates the setting of the -mr10k-cache-barrier option.  */
+enum mips_r10k_cache_barrier_setting {
+  R10K_CACHE_BARRIER_NONE,
+  R10K_CACHE_BARRIER_STORE,
+  R10K_CACHE_BARRIER_LOAD_STORE
+};
+
+/* Macros to create an enumeration identifier for a function prototype.  */
+#define MIPS_FTYPE_NAME1(A, B) MIPS_##A##_FTYPE_##B
+#define MIPS_FTYPE_NAME2(A, B, C) MIPS_##A##_FTYPE_##B##_##C
+#define MIPS_FTYPE_NAME3(A, B, C, D) MIPS_##A##_FTYPE_##B##_##C##_##D
+#define MIPS_FTYPE_NAME4(A, B, C, D, E) MIPS_##A##_FTYPE_##B##_##C##_##D##_##E
+
+/* Classifies the prototype of a built-in function.  */
+enum mips_function_type {
+#define DEF_MIPS_FTYPE(NARGS, LIST) MIPS_FTYPE_NAME##NARGS LIST,
+#include "config/mips/mips-ftypes.def"
+#undef DEF_MIPS_FTYPE
+  MIPS_MAX_FTYPE_MAX
+};
+
+/* Specifies how a built-in function should be converted into rtl.  */
+enum mips_builtin_type {
+  /* The function corresponds directly to an .md pattern.  The return
+     value is mapped to operand 0 and the arguments are mapped to
+     operands 1 and above.  */
+  MIPS_BUILTIN_DIRECT,
+
+  /* The function corresponds directly to an .md pattern.  There is no return
+     value and the arguments are mapped to operands 0 and above.  */
+  MIPS_BUILTIN_DIRECT_NO_TARGET,
+
+  /* The function corresponds to a comparison instruction followed by
+     a mips_cond_move_tf_ps pattern.  The first two arguments are the
+     values to compare and the second two arguments are the vector
+     operands for the movt.ps or movf.ps instruction (in assembly order).  */
+  MIPS_BUILTIN_MOVF,
+  MIPS_BUILTIN_MOVT,
+
+  /* The function corresponds to a V2SF comparison instruction.  Operand 0
+     of this instruction is the result of the comparison, which has mode
+     CCV2 or CCV4.  The function arguments are mapped to operands 1 and
+     above.  The function's return value is an SImode boolean that is
+     true under the following conditions:
+
+     MIPS_BUILTIN_CMP_ANY: one of the registers is true
+     MIPS_BUILTIN_CMP_ALL: all of the registers are true
+     MIPS_BUILTIN_CMP_LOWER: the first register is true
+     MIPS_BUILTIN_CMP_UPPER: the second register is true.  */
+  MIPS_BUILTIN_CMP_ANY,
+  MIPS_BUILTIN_CMP_ALL,
+  MIPS_BUILTIN_CMP_UPPER,
+  MIPS_BUILTIN_CMP_LOWER,
+
+  /* As above, but the instruction only sets a single $fcc register.  */
+  MIPS_BUILTIN_CMP_SINGLE,
+
+  /* For generating bposge32 branch instructions in MIPS32 DSP ASE.  */
+  MIPS_BUILTIN_BPOSGE32
+};
+
+/* Invoke MACRO (COND) for each C.cond.fmt condition.  */
+#define MIPS_FP_CONDITIONS(MACRO) \
+  MACRO (f),	\
+  MACRO (un),	\
+  MACRO (eq),	\
+  MACRO (ueq),	\
+  MACRO (olt),	\
+  MACRO (ult),	\
+  MACRO (ole),	\
+  MACRO (ule),	\
+  MACRO (sf),	\
+  MACRO (ngle),	\
+  MACRO (seq),	\
+  MACRO (ngl),	\
+  MACRO (lt),	\
+  MACRO (nge),	\
+  MACRO (le),	\
+  MACRO (ngt)
+
+/* Enumerates the codes above as MIPS_FP_COND_<X>.  */
+#define DECLARE_MIPS_COND(X) MIPS_FP_COND_ ## X
+enum mips_fp_condition {
+  MIPS_FP_CONDITIONS (DECLARE_MIPS_COND)
+};
+
+/* Index X provides the string representation of MIPS_FP_COND_<X>.  */
+#define STRINGIFY(X) #X
+static const char *const mips_fp_conditions[] = {
+  MIPS_FP_CONDITIONS (STRINGIFY)
+};
+
+/* Information about a function's frame layout.  */
+struct GTY(())  mips_frame_info {
+  /* The size of the frame in bytes.  */
+  HOST_WIDE_INT total_size;
+
+  /* The number of bytes allocated to variables.  */
+  HOST_WIDE_INT var_size;
+
+  /* The number of bytes allocated to outgoing function arguments.  */
+  HOST_WIDE_INT args_size;
+
+  /* The number of bytes allocated to the .cprestore slot, or 0 if there
+     is no such slot.  */
+  HOST_WIDE_INT cprestore_size;
+
+  /* Bit X is set if the function saves or restores GPR X.  */
+  unsigned int mask;
+
+  /* Likewise FPR X.  */
+  unsigned int fmask;
+
+  /* Likewise doubleword accumulator X ($acX).  */
+  unsigned int acc_mask;
+
+  /* The number of GPRs, FPRs, doubleword accumulators and COP0
+     registers saved.  */
+  unsigned int num_gp;
+  unsigned int num_fp;
+  unsigned int num_acc;
+  unsigned int num_cop0_regs;
+
+  /* The offset of the topmost GPR, FPR, accumulator and COP0-register
+     save slots from the top of the frame, or zero if no such slots are
+     needed.  */
+  HOST_WIDE_INT gp_save_offset;
+  HOST_WIDE_INT fp_save_offset;
+  HOST_WIDE_INT acc_save_offset;
+  HOST_WIDE_INT cop0_save_offset;
+
+  /* Likewise, but giving offsets from the bottom of the frame.  */
+  HOST_WIDE_INT gp_sp_offset;
+  HOST_WIDE_INT fp_sp_offset;
+  HOST_WIDE_INT acc_sp_offset;
+  HOST_WIDE_INT cop0_sp_offset;
+
+  /* Similar, but the value passed to _mcount.  */
+  HOST_WIDE_INT ra_fp_offset;
+
+  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
+  HOST_WIDE_INT arg_pointer_offset;
+
+  /* The offset of hard_frame_pointer_rtx from the bottom of the frame.  */
+  HOST_WIDE_INT hard_frame_pointer_offset;
+};
+
+struct GTY(())  machine_function {
+  /* The register returned by mips16_gp_pseudo_reg; see there for details.  */
+  rtx mips16_gp_pseudo_rtx;
+
+  /* The number of extra stack bytes taken up by register varargs.
+     This area is allocated by the callee at the very top of the frame.  */
+  int varargs_size;
+
+  /* The current frame information, calculated by mips_compute_frame_info.  */
+  struct mips_frame_info frame;
+
+  /* The register to use as the function's global pointer, or INVALID_REGNUM
+     if the function doesn't need one.  */
+  unsigned int global_pointer;
+
+  /* How many instructions it takes to load a label into $AT, or 0 if
+     this property hasn't yet been calculated.  */
+  unsigned int load_label_num_insns;
+
+  /* True if mips_adjust_insn_length should ignore an instruction's
+     hazard attribute.  */
+  bool ignore_hazard_length_p;
+
+  /* True if the whole function is suitable for .set noreorder and
+     .set nomacro.  */
+  bool all_noreorder_p;
+
+  /* True if the function has "inflexible" and "flexible" references
+     to the global pointer.  See mips_cfun_has_inflexible_gp_ref_p
+     and mips_cfun_has_flexible_gp_ref_p for details.  */
+  bool has_inflexible_gp_insn_p;
+  bool has_flexible_gp_insn_p;
+
+  /* True if the function's prologue must load the global pointer
+     value into pic_offset_table_rtx and store the same value in
+     the function's cprestore slot (if any).  Even if this value
+     is currently false, we may decide to set it to true later;
+     see mips_must_initialize_gp_p () for details.  */
+  bool must_initialize_gp_p;
+
+  /* True if the current function must restore $gp after any potential
+     clobber.  This value is only meaningful during the first post-epilogue
+     split_insns pass; see mips_must_initialize_gp_p () for details.  */
+  bool must_restore_gp_when_clobbered_p;
+
+  /* True if this is an interrupt handler.  */
+  bool interrupt_handler_p;
+
+  /* True if this is an interrupt handler that uses shadow registers.  */
+  bool use_shadow_register_set_p;
+
+  /* True if this is an interrupt handler that should keep interrupts
+     masked.  */
+  bool keep_interrupts_masked_p;
+
+  /* True if this is an interrupt handler that should use DERET
+     instead of ERET.  */
+  bool use_debug_exception_return_p;
+};
+
+/* Information about a single argument.  */
+struct mips_arg_info {
+  /* True if the argument is passed in a floating-point register, or
+     would have been if we hadn't run out of registers.  */
+  bool fpr_p;
+
+  /* The number of words passed in registers, rounded up.  */
+  unsigned int reg_words;
+
+  /* For EABI, the offset of the first register from GP_ARG_FIRST or
+     FP_ARG_FIRST.  For other ABIs, the offset of the first register from
+     the start of the ABI's argument structure (see the CUMULATIVE_ARGS
+     comment for details).
+
+     The value is MAX_ARGS_IN_REGISTERS if the argument is passed entirely
+     on the stack.  */
+  unsigned int reg_offset;
+
+  /* The number of words that must be passed on the stack, rounded up.  */
+  unsigned int stack_words;
+
+  /* The offset from the start of the stack overflow area of the argument's
+     first stack word.  Only meaningful when STACK_WORDS is nonzero.  */
+  unsigned int stack_offset;
+};
+
+/* Information about an address described by mips_address_type.
+
+   ADDRESS_CONST_INT
+       No fields are used.
+
+   ADDRESS_REG
+       REG is the base register and OFFSET is the constant offset.
+
+   ADDRESS_LO_SUM
+       REG and OFFSET are the operands to the LO_SUM and SYMBOL_TYPE
+       is the type of symbol it references.
+
+   ADDRESS_SYMBOLIC
+       SYMBOL_TYPE is the type of symbol that the address references.  */
+struct mips_address_info {
+  enum mips_address_type type;
+  rtx reg;
+  rtx offset;
+  enum mips_symbol_type symbol_type;
+};
+
+/* One stage in a constant building sequence.  These sequences have
+   the form:
+
+	A = VALUE[0]
+	A = A CODE[1] VALUE[1]
+	A = A CODE[2] VALUE[2]
+	...
+
+   where A is an accumulator, each CODE[i] is a binary rtl operation
+   and each VALUE[i] is a constant integer.  CODE[0] is undefined.  */
+struct mips_integer_op {
+  enum rtx_code code;
+  unsigned HOST_WIDE_INT value;
+};
+
+/* The largest number of operations needed to load an integer constant.
+   The worst accepted case for 64-bit constants is LUI,ORI,SLL,ORI,SLL,ORI.
+   When the lowest bit is clear, we can try, but reject a sequence with
+   an extra SLL at the end.  */
+#define MIPS_MAX_INTEGER_OPS 7
+
+/* Information about a MIPS16e SAVE or RESTORE instruction.  */
+struct mips16e_save_restore_info {
+  /* The number of argument registers saved by a SAVE instruction.
+     0 for RESTORE instructions.  */
+  unsigned int nargs;
+
+  /* Bit X is set if the instruction saves or restores GPR X.  */
+  unsigned int mask;
+
+  /* The total number of bytes to allocate.  */
+  HOST_WIDE_INT size;
+};
+
+/* Costs of various operations on the different architectures.  */
+
+struct mips_rtx_cost_data
+{
+  unsigned short fp_add;
+  unsigned short fp_mult_sf;
+  unsigned short fp_mult_df;
+  unsigned short fp_div_sf;
+  unsigned short fp_div_df;
+  unsigned short int_mult_si;
+  unsigned short int_mult_di;
+  unsigned short int_div_si;
+  unsigned short int_div_di;
+  unsigned short branch_cost;
+  unsigned short memory_latency;
+};
+
+/* Global variables for machine-dependent things.  */
+
+/* The -G setting, or the configuration's default small-data limit if
+   no -G option is given.  */
+static unsigned int mips_small_data_threshold;
+
+/* The number of file directives written by mips_output_filename.  */
+int num_source_filenames;
+
+/* The name that appeared in the last .file directive written by
+   mips_output_filename, or "" if mips_output_filename hasn't
+   written anything yet.  */
+const char *current_function_file = "";
+
+/* A label counter used by PUT_SDB_BLOCK_START and PUT_SDB_BLOCK_END.  */
+int sdb_label_count;
+
+/* Arrays that map GCC register numbers to debugger register numbers.  */
+int mips_dbx_regno[FIRST_PSEUDO_REGISTER];
+int mips_dwarf_regno[FIRST_PSEUDO_REGISTER];
+
+/* The nesting depth of the PRINT_OPERAND '%(', '%<' and '%[' constructs.  */
+struct mips_asm_switch mips_noreorder = { "reorder", 0 };
+struct mips_asm_switch mips_nomacro = { "macro", 0 };
+struct mips_asm_switch mips_noat = { "at", 0 };
+
+/* True if we're writing out a branch-likely instruction rather than a
+   normal branch.  */
+static bool mips_branch_likely;
+
+/* The current instruction-set architecture.  */
+enum processor mips_arch;
+const struct mips_cpu_info *mips_arch_info;
+
+/* The processor that we should tune the code for.  */
+enum processor mips_tune;
+const struct mips_cpu_info *mips_tune_info;
+
+/* The ISA level associated with mips_arch.  */
+int mips_isa;
+
+/* The architecture selected by -mipsN, or null if -mipsN wasn't used.  */
+static const struct mips_cpu_info *mips_isa_option_info;
+
+/* Which ABI to use.  */
+int mips_abi = MIPS_ABI_DEFAULT;
+
+/* Which cost information to use.  */
+static const struct mips_rtx_cost_data *mips_cost;
+
+/* The ambient target flags, excluding MASK_MIPS16.  */
+static int mips_base_target_flags;
+
+/* True if MIPS16 is the default mode.  */
+bool mips_base_mips16;
+
+/* The ambient values of other global variables.  */
+static int mips_base_schedule_insns; /* flag_schedule_insns */
+static int mips_base_reorder_blocks_and_partition; /* flag_reorder... */
+static int mips_base_move_loop_invariants; /* flag_move_loop_invariants */
+static int mips_base_align_loops; /* align_loops */
+static int mips_base_align_jumps; /* align_jumps */
+static int mips_base_align_functions; /* align_functions */
+
+/* The -mcode-readable setting.  */
+enum mips_code_readable_setting mips_code_readable = CODE_READABLE_YES;
+
+/* The -mr10k-cache-barrier setting.  */
+static enum mips_r10k_cache_barrier_setting mips_r10k_cache_barrier;
+
+/* Index [M][R] is true if register R is allowed to hold a value of mode M.  */
+bool mips_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
+
+/* Index C is true if character C is a valid PRINT_OPERAND punctation
+   character.  */
+static bool mips_print_operand_punct[256];
+
+static GTY (()) int mips_output_filename_first_time = 1;
+
+/* mips_split_p[X] is true if symbols of type X can be split by
+   mips_split_symbol.  */
+bool mips_split_p[NUM_SYMBOL_TYPES];
+
+/* mips_split_hi_p[X] is true if the high parts of symbols of type X
+   can be split by mips_split_symbol.  */
+bool mips_split_hi_p[NUM_SYMBOL_TYPES];
+
+/* mips_lo_relocs[X] is the relocation to use when a symbol of type X
+   appears in a LO_SUM.  It can be null if such LO_SUMs aren't valid or
+   if they are matched by a special .md file pattern.  */
+static const char *mips_lo_relocs[NUM_SYMBOL_TYPES];
+
+/* Likewise for HIGHs.  */
+static const char *mips_hi_relocs[NUM_SYMBOL_TYPES];
+
+/* Target state for MIPS16.  */
+struct target_globals *mips16_globals;
+
+/* Cached value of can_issue_more. This is cached in mips_variable_issue hook
+   and returned from mips_sched_reorder2.  */
+static int cached_can_issue_more;
+
+/* Index R is the smallest register class that contains register R.  */
+const enum reg_class mips_regno_to_class[FIRST_PSEUDO_REGISTER] = {
+  LEA_REGS,	LEA_REGS,	M16_REGS,	V1_REG,
+  M16_REGS,	M16_REGS,	M16_REGS,	M16_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  M16_REGS,	M16_REGS,	LEA_REGS,	LEA_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  T_REG,	PIC_FN_ADDR_REG, LEA_REGS,	LEA_REGS,
+  LEA_REGS,	LEA_REGS,	LEA_REGS,	LEA_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  MD0_REG,	MD1_REG,	NO_REGS,	ST_REGS,
+  ST_REGS,	ST_REGS,	ST_REGS,	ST_REGS,
+  ST_REGS,	ST_REGS,	ST_REGS,	NO_REGS,
+  NO_REGS,	FRAME_REGS,	FRAME_REGS,	NO_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP0_REGS,	COP0_REGS,	COP0_REGS,	COP0_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP2_REGS,	COP2_REGS,	COP2_REGS,	COP2_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  COP3_REGS,	COP3_REGS,	COP3_REGS,	COP3_REGS,
+  DSP_ACC_REGS,	DSP_ACC_REGS,	DSP_ACC_REGS,	DSP_ACC_REGS,
+  DSP_ACC_REGS,	DSP_ACC_REGS,	ALL_REGS,	ALL_REGS,
+  ALL_REGS,	ALL_REGS,	ALL_REGS,	ALL_REGS
+};
+
+/* The value of TARGET_ATTRIBUTE_TABLE.  */
+static const struct attribute_spec mips_attribute_table[] = {
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "long_call",   0, 0, false, true,  true,  NULL },
+  { "far",     	   0, 0, false, true,  true,  NULL },
+  { "near",        0, 0, false, true,  true,  NULL },
+  /* We would really like to treat "mips16" and "nomips16" as type
+     attributes, but GCC doesn't provide the hooks we need to support
+     the right conversion rules.  As declaration attributes, they affect
+     code generation but don't carry other semantics.  */
+  { "mips16", 	   0, 0, true,  false, false, NULL },
+  { "nomips16",    0, 0, true,  false, false, NULL },
+  /* Allow functions to be specified as interrupt handlers */
+  { "interrupt",   0, 0, false, true,  true, NULL },
+  { "use_shadow_register_set",	0, 0, false, true,  true, NULL },
+  { "keep_interrupts_masked",	0, 0, false, true,  true, NULL },
+  { "use_debug_exception_return", 0, 0, false, true,  true, NULL },
+  { NULL,	   0, 0, false, false, false, NULL }
+};
+
+/* A table describing all the processors GCC knows about.  Names are
+   matched in the order listed.  The first mention of an ISA level is
+   taken as the canonical name for that ISA.
+
+   To ease comparison, please keep this table in the same order
+   as GAS's mips_cpu_info_table.  Please also make sure that
+   MIPS_ISA_LEVEL_SPEC and MIPS_ARCH_FLOAT_SPEC handle all -march
+   options correctly.  */
+static const struct mips_cpu_info mips_cpu_info_table[] = {
+  /* Entries for generic ISAs.  */
+  { "mips1", PROCESSOR_R3000, 1, 0 },
+  { "mips2", PROCESSOR_R6000, 2, 0 },
+  { "mips3", PROCESSOR_R4000, 3, 0 },
+  { "mips4", PROCESSOR_R8000, 4, 0 },
+  /* Prefer not to use branch-likely instructions for generic MIPS32rX
+     and MIPS64rX code.  The instructions were officially deprecated
+     in revisions 2 and earlier, but revision 3 is likely to downgrade
+     that to a recommendation to avoid the instructions in code that
+     isn't tuned to a specific processor.  */
+  { "mips32", PROCESSOR_4KC, 32, PTF_AVOID_BRANCHLIKELY },
+  { "mips32r2", PROCESSOR_M4K, 33, PTF_AVOID_BRANCHLIKELY },
+  { "mips64", PROCESSOR_5KC, 64, PTF_AVOID_BRANCHLIKELY },
+  /* ??? For now just tune the generic MIPS64r2 for 5KC as well.   */
+  { "mips64r2", PROCESSOR_5KC, 65, PTF_AVOID_BRANCHLIKELY },
+
+  /* MIPS I processors.  */
+  { "r3000", PROCESSOR_R3000, 1, 0 },
+  { "r2000", PROCESSOR_R3000, 1, 0 },
+  { "r3900", PROCESSOR_R3900, 1, 0 },
+
+  /* MIPS II processors.  */
+  { "r6000", PROCESSOR_R6000, 2, 0 },
+
+  /* MIPS III processors.  */
+  { "r4000", PROCESSOR_R4000, 3, 0 },
+  { "vr4100", PROCESSOR_R4100, 3, 0 },
+  { "vr4111", PROCESSOR_R4111, 3, 0 },
+  { "vr4120", PROCESSOR_R4120, 3, 0 },
+  { "vr4130", PROCESSOR_R4130, 3, 0 },
+  { "vr4300", PROCESSOR_R4300, 3, 0 },
+  { "r4400", PROCESSOR_R4000, 3, 0 },
+  { "r4600", PROCESSOR_R4600, 3, 0 },
+  { "orion", PROCESSOR_R4600, 3, 0 },
+  { "r4650", PROCESSOR_R4650, 3, 0 },
+  /* ST Loongson 2E/2F processors.  */
+  { "loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY },
+  { "loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY },
+
+  /* MIPS IV processors. */
+  { "r8000", PROCESSOR_R8000, 4, 0 },
+  { "r10000", PROCESSOR_R10000, 4, 0 },
+  { "r12000", PROCESSOR_R10000, 4, 0 },
+  { "r14000", PROCESSOR_R10000, 4, 0 },
+  { "r16000", PROCESSOR_R10000, 4, 0 },
+  { "vr5000", PROCESSOR_R5000, 4, 0 },
+  { "vr5400", PROCESSOR_R5400, 4, 0 },
+  { "vr5500", PROCESSOR_R5500, 4, PTF_AVOID_BRANCHLIKELY },
+  { "rm7000", PROCESSOR_R7000, 4, 0 },
+  { "rm9000", PROCESSOR_R9000, 4, 0 },
+
+  /* MIPS32 processors.  */
+  { "4kc", PROCESSOR_4KC, 32, 0 },
+  { "4km", PROCESSOR_4KC, 32, 0 },
+  { "4kp", PROCESSOR_4KP, 32, 0 },
+  { "4ksc", PROCESSOR_4KC, 32, 0 },
+
+  /* MIPS32 Release 2 processors.  */
+  { "m4k", PROCESSOR_M4K, 33, 0 },
+  { "4kec", PROCESSOR_4KC, 33, 0 },
+  { "4kem", PROCESSOR_4KC, 33, 0 },
+  { "4kep", PROCESSOR_4KP, 33, 0 },
+  { "4ksd", PROCESSOR_4KC, 33, 0 },
+
+  { "24kc", PROCESSOR_24KC, 33, 0 },
+  { "24kf2_1", PROCESSOR_24KF2_1, 33, 0 },
+  { "24kf", PROCESSOR_24KF2_1, 33, 0 },
+  { "24kf1_1", PROCESSOR_24KF1_1, 33, 0 },
+  { "24kfx", PROCESSOR_24KF1_1, 33, 0 },
+  { "24kx", PROCESSOR_24KF1_1, 33, 0 },
+
+  { "24kec", PROCESSOR_24KC, 33, 0 }, /* 24K with DSP.  */
+  { "24kef2_1", PROCESSOR_24KF2_1, 33, 0 },
+  { "24kef", PROCESSOR_24KF2_1, 33, 0 },
+  { "24kef1_1", PROCESSOR_24KF1_1, 33, 0 },
+  { "24kefx", PROCESSOR_24KF1_1, 33, 0 },
+  { "24kex", PROCESSOR_24KF1_1, 33, 0 },
+
+  { "34kc", PROCESSOR_24KC, 33, 0 }, /* 34K with MT/DSP.  */
+  { "34kf2_1", PROCESSOR_24KF2_1, 33, 0 },
+  { "34kf", PROCESSOR_24KF2_1, 33, 0 },
+  { "34kf1_1", PROCESSOR_24KF1_1, 33, 0 },
+  { "34kfx", PROCESSOR_24KF1_1, 33, 0 },
+  { "34kx", PROCESSOR_24KF1_1, 33, 0 },
+
+  { "74kc", PROCESSOR_74KC, 33, 0 }, /* 74K with DSPr2.  */
+  { "74kf2_1", PROCESSOR_74KF2_1, 33, 0 },
+  { "74kf", PROCESSOR_74KF2_1, 33, 0 },
+  { "74kf1_1", PROCESSOR_74KF1_1, 33, 0 },
+  { "74kfx", PROCESSOR_74KF1_1, 33, 0 },
+  { "74kx", PROCESSOR_74KF1_1, 33, 0 },
+  { "74kf3_2", PROCESSOR_74KF3_2, 33, 0 },
+
+  { "1004kc", PROCESSOR_24KC, 33, 0 }, /* 1004K with MT/DSP.  */
+  { "1004kf2_1", PROCESSOR_24KF2_1, 33, 0 },
+  { "1004kf", PROCESSOR_24KF2_1, 33, 0 },
+  { "1004kf1_1", PROCESSOR_24KF1_1, 33, 0 },
+
+  /* MIPS64 processors.  */
+  { "5kc", PROCESSOR_5KC, 64, 0 },
+  { "5kf", PROCESSOR_5KF, 64, 0 },
+  { "20kc", PROCESSOR_20KC, 64, PTF_AVOID_BRANCHLIKELY },
+  { "sb1", PROCESSOR_SB1, 64, PTF_AVOID_BRANCHLIKELY },
+  { "sb1a", PROCESSOR_SB1A, 64, PTF_AVOID_BRANCHLIKELY },
+  { "sr71000", PROCESSOR_SR71000, 64, PTF_AVOID_BRANCHLIKELY },
+  { "xlr", PROCESSOR_XLR, 64, 0 },
+  { "loongson3a", PROCESSOR_LOONGSON_3A, 64, PTF_AVOID_BRANCHLIKELY },
+
+  /* MIPS64 Release 2 processors.  */
+  { "octeon", PROCESSOR_OCTEON, 65, PTF_AVOID_BRANCHLIKELY }
+};
+
+/* Default costs.  If these are used for a processor we should look
+   up the actual costs.  */
+#define DEFAULT_COSTS COSTS_N_INSNS (6),  /* fp_add */       \
+                      COSTS_N_INSNS (7),  /* fp_mult_sf */   \
+                      COSTS_N_INSNS (8),  /* fp_mult_df */   \
+                      COSTS_N_INSNS (23), /* fp_div_sf */    \
+                      COSTS_N_INSNS (36), /* fp_div_df */    \
+                      COSTS_N_INSNS (10), /* int_mult_si */  \
+                      COSTS_N_INSNS (10), /* int_mult_di */  \
+                      COSTS_N_INSNS (69), /* int_div_si */   \
+                      COSTS_N_INSNS (69), /* int_div_di */   \
+                                       2, /* branch_cost */  \
+                                       4  /* memory_latency */
+
+/* Floating-point costs for processors without an FPU.  Just assume that
+   all floating-point libcalls are very expensive.  */
+#define SOFT_FP_COSTS COSTS_N_INSNS (256), /* fp_add */       \
+                      COSTS_N_INSNS (256), /* fp_mult_sf */   \
+                      COSTS_N_INSNS (256), /* fp_mult_df */   \
+                      COSTS_N_INSNS (256), /* fp_div_sf */    \
+                      COSTS_N_INSNS (256)  /* fp_div_df */
+
+/* Costs to use when optimizing for size.  */
+static const struct mips_rtx_cost_data mips_rtx_cost_optimize_size = {
+  COSTS_N_INSNS (1),            /* fp_add */
+  COSTS_N_INSNS (1),            /* fp_mult_sf */
+  COSTS_N_INSNS (1),            /* fp_mult_df */
+  COSTS_N_INSNS (1),            /* fp_div_sf */
+  COSTS_N_INSNS (1),            /* fp_div_df */
+  COSTS_N_INSNS (1),            /* int_mult_si */
+  COSTS_N_INSNS (1),            /* int_mult_di */
+  COSTS_N_INSNS (1),            /* int_div_si */
+  COSTS_N_INSNS (1),            /* int_div_di */
+		   2,           /* branch_cost */
+		   4            /* memory_latency */
+};
+
+/* Costs to use when optimizing for speed, indexed by processor.  */
+static const struct mips_rtx_cost_data
+  mips_rtx_cost_data[NUM_PROCESSOR_VALUES] = {
+  { /* R3000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (12),           /* int_mult_si */
+    COSTS_N_INSNS (12),           /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (35),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 4KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (6),            /* int_mult_si */
+    COSTS_N_INSNS (6),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (36),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 4KP */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (36),           /* int_mult_si */
+    COSTS_N_INSNS (36),           /* int_mult_di */
+    COSTS_N_INSNS (37),           /* int_div_si */
+    COSTS_N_INSNS (37),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 5KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (11),           /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 5KF */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (11),           /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 20KC */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (7),            /* int_mult_di */
+    COSTS_N_INSNS (42),           /* int_div_si */
+    COSTS_N_INSNS (72),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 24KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 24KF2_1 */
+    COSTS_N_INSNS (8),            /* fp_add */
+    COSTS_N_INSNS (8),            /* fp_mult_sf */
+    COSTS_N_INSNS (10),           /* fp_mult_df */
+    COSTS_N_INSNS (34),           /* fp_div_sf */
+    COSTS_N_INSNS (64),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 24KF1_1 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KC */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KF2_1 */
+    COSTS_N_INSNS (8),            /* fp_add */
+    COSTS_N_INSNS (8),            /* fp_mult_sf */
+    COSTS_N_INSNS (10),           /* fp_mult_df */
+    COSTS_N_INSNS (34),           /* fp_div_sf */
+    COSTS_N_INSNS (64),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KF1_1 */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (17),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* 74KF3_2 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (6),            /* fp_mult_sf */
+    COSTS_N_INSNS (7),            /* fp_mult_df */
+    COSTS_N_INSNS (25),           /* fp_div_sf */
+    COSTS_N_INSNS (48),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (41),           /* int_div_si */
+    COSTS_N_INSNS (41),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* Loongson-2E */
+    DEFAULT_COSTS
+  },
+  { /* Loongson-2F */
+    DEFAULT_COSTS
+  },
+  { /* Loongson-3A */
+    DEFAULT_COSTS
+  },
+  { /* M4k */
+    DEFAULT_COSTS
+  },
+    /* Octeon */
+  {
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (72),           /* int_div_si */
+    COSTS_N_INSNS (72),           /* int_div_di */
+                     1,		  /* branch_cost */
+                     4		  /* memory_latency */
+  },
+  { /* R3900 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (2),            /* int_mult_si */
+    COSTS_N_INSNS (2),            /* int_mult_di */
+    COSTS_N_INSNS (35),           /* int_div_si */
+    COSTS_N_INSNS (35),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R6000 */
+    COSTS_N_INSNS (3),            /* fp_add */
+    COSTS_N_INSNS (5),            /* fp_mult_sf */
+    COSTS_N_INSNS (6),            /* fp_mult_df */
+    COSTS_N_INSNS (15),           /* fp_div_sf */
+    COSTS_N_INSNS (16),           /* fp_div_df */
+    COSTS_N_INSNS (17),           /* int_mult_si */
+    COSTS_N_INSNS (17),           /* int_mult_di */
+    COSTS_N_INSNS (38),           /* int_div_si */
+    COSTS_N_INSNS (38),           /* int_div_di */
+		     2,           /* branch_cost */
+		     6            /* memory_latency */
+  },
+  { /* R4000 */
+     COSTS_N_INSNS (6),           /* fp_add */
+     COSTS_N_INSNS (7),           /* fp_mult_sf */
+     COSTS_N_INSNS (8),           /* fp_mult_df */
+     COSTS_N_INSNS (23),          /* fp_div_sf */
+     COSTS_N_INSNS (36),          /* fp_div_df */
+     COSTS_N_INSNS (10),          /* int_mult_si */
+     COSTS_N_INSNS (10),          /* int_mult_di */
+     COSTS_N_INSNS (69),          /* int_div_si */
+     COSTS_N_INSNS (69),          /* int_div_di */
+		      2,          /* branch_cost */
+		      6           /* memory_latency */
+  },
+  { /* R4100 */
+    DEFAULT_COSTS
+  },
+  { /* R4111 */
+    DEFAULT_COSTS
+  },
+  { /* R4120 */
+    DEFAULT_COSTS
+  },
+  { /* R4130 */
+    /* The only costs that appear to be updated here are
+       integer multiplication.  */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (4),            /* int_mult_si */
+    COSTS_N_INSNS (6),            /* int_mult_di */
+    COSTS_N_INSNS (69),           /* int_div_si */
+    COSTS_N_INSNS (69),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R4300 */
+    DEFAULT_COSTS
+  },
+  { /* R4600 */
+    DEFAULT_COSTS
+  },
+  { /* R4650 */
+    DEFAULT_COSTS
+  },
+  { /* R5000 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (5),            /* fp_mult_df */
+    COSTS_N_INSNS (23),           /* fp_div_sf */
+    COSTS_N_INSNS (36),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (5),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (36),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R5400 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (5),            /* fp_mult_sf */
+    COSTS_N_INSNS (6),            /* fp_mult_df */
+    COSTS_N_INSNS (30),           /* fp_div_sf */
+    COSTS_N_INSNS (59),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (4),            /* int_mult_di */
+    COSTS_N_INSNS (42),           /* int_div_si */
+    COSTS_N_INSNS (74),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R5500 */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (5),            /* fp_mult_sf */
+    COSTS_N_INSNS (6),            /* fp_mult_df */
+    COSTS_N_INSNS (30),           /* fp_div_sf */
+    COSTS_N_INSNS (59),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (9),            /* int_mult_di */
+    COSTS_N_INSNS (42),           /* int_div_si */
+    COSTS_N_INSNS (74),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R7000 */
+    /* The only costs that are changed here are
+       integer multiplication.  */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (7),            /* fp_mult_sf */
+    COSTS_N_INSNS (8),            /* fp_mult_df */
+    COSTS_N_INSNS (23),           /* fp_div_sf */
+    COSTS_N_INSNS (36),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (9),            /* int_mult_di */
+    COSTS_N_INSNS (69),           /* int_div_si */
+    COSTS_N_INSNS (69),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R8000 */
+    DEFAULT_COSTS
+  },
+  { /* R9000 */
+    /* The only costs that are changed here are
+       integer multiplication.  */
+    COSTS_N_INSNS (6),            /* fp_add */
+    COSTS_N_INSNS (7),            /* fp_mult_sf */
+    COSTS_N_INSNS (8),            /* fp_mult_df */
+    COSTS_N_INSNS (23),           /* fp_div_sf */
+    COSTS_N_INSNS (36),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (8),            /* int_mult_di */
+    COSTS_N_INSNS (69),           /* int_div_si */
+    COSTS_N_INSNS (69),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* R1x000 */
+    COSTS_N_INSNS (2),            /* fp_add */
+    COSTS_N_INSNS (2),            /* fp_mult_sf */
+    COSTS_N_INSNS (2),            /* fp_mult_df */
+    COSTS_N_INSNS (12),           /* fp_div_sf */
+    COSTS_N_INSNS (19),           /* fp_div_df */
+    COSTS_N_INSNS (5),            /* int_mult_si */
+    COSTS_N_INSNS (9),            /* int_mult_di */
+    COSTS_N_INSNS (34),           /* int_div_si */
+    COSTS_N_INSNS (66),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* SB1 */
+    /* These costs are the same as the SB-1A below.  */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (4),            /* fp_mult_df */
+    COSTS_N_INSNS (24),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (4),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* SB1-A */
+    /* These costs are the same as the SB-1 above.  */
+    COSTS_N_INSNS (4),            /* fp_add */
+    COSTS_N_INSNS (4),            /* fp_mult_sf */
+    COSTS_N_INSNS (4),            /* fp_mult_df */
+    COSTS_N_INSNS (24),           /* fp_div_sf */
+    COSTS_N_INSNS (32),           /* fp_div_df */
+    COSTS_N_INSNS (3),            /* int_mult_si */
+    COSTS_N_INSNS (4),            /* int_mult_di */
+    COSTS_N_INSNS (36),           /* int_div_si */
+    COSTS_N_INSNS (68),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  },
+  { /* SR71000 */
+    DEFAULT_COSTS
+  },
+  { /* XLR */
+    SOFT_FP_COSTS,
+    COSTS_N_INSNS (8),            /* int_mult_si */
+    COSTS_N_INSNS (8),            /* int_mult_di */
+    COSTS_N_INSNS (72),           /* int_div_si */
+    COSTS_N_INSNS (72),           /* int_div_di */
+		     1,           /* branch_cost */
+		     4            /* memory_latency */
+  }
+};
+
+static rtx mips_find_pic_call_symbol (rtx, rtx, bool);
+static int mips_register_move_cost (enum machine_mode, reg_class_t,
+				    reg_class_t);
+static unsigned int mips_function_arg_boundary (enum machine_mode, const_tree);
+
+/* This hash table keeps track of implicit "mips16" and "nomips16" attributes
+   for -mflip_mips16.  It maps decl names onto a boolean mode setting.  */
+struct GTY (())  mflip_mips16_entry {
+  const char *name;
+  bool mips16_p;
+};
+static GTY ((param_is (struct mflip_mips16_entry))) htab_t mflip_mips16_htab;
+
+/* Hash table callbacks for mflip_mips16_htab.  */
+
+static hashval_t
+mflip_mips16_htab_hash (const void *entry)
+{
+  return htab_hash_string (((const struct mflip_mips16_entry *) entry)->name);
+}
+
+static int
+mflip_mips16_htab_eq (const void *entry, const void *name)
+{
+  return strcmp (((const struct mflip_mips16_entry *) entry)->name,
+		 (const char *) name) == 0;
+}
+
+/* True if -mflip-mips16 should next add an attribute for the default MIPS16
+   mode, false if it should next add an attribute for the opposite mode.  */
+static GTY(()) bool mips16_flipper;
+
+/* DECL is a function that needs a default "mips16" or "nomips16" attribute
+   for -mflip-mips16.  Return true if it should use "mips16" and false if
+   it should use "nomips16".  */
+
+static bool
+mflip_mips16_use_mips16_p (tree decl)
+{
+  struct mflip_mips16_entry *entry;
+  const char *name;
+  hashval_t hash;
+  void **slot;
+
+  /* Use the opposite of the command-line setting for anonymous decls.  */
+  if (!DECL_NAME (decl))
+    return !mips_base_mips16;
+
+  if (!mflip_mips16_htab)
+    mflip_mips16_htab = htab_create_ggc (37, mflip_mips16_htab_hash,
+					 mflip_mips16_htab_eq, NULL);
+
+  name = IDENTIFIER_POINTER (DECL_NAME (decl));
+  hash = htab_hash_string (name);
+  slot = htab_find_slot_with_hash (mflip_mips16_htab, name, hash, INSERT);
+  entry = (struct mflip_mips16_entry *) *slot;
+  if (!entry)
+    {
+      mips16_flipper = !mips16_flipper;
+      entry = ggc_alloc_mflip_mips16_entry ();
+      entry->name = name;
+      entry->mips16_p = mips16_flipper ? !mips_base_mips16 : mips_base_mips16;
+      *slot = entry;
+    }
+  return entry->mips16_p;
+}
+
+/* Predicates to test for presence of "near" and "far"/"long_call"
+   attributes on the given TYPE.  */
+
+static bool
+mips_near_type_p (const_tree type)
+{
+  return lookup_attribute ("near", TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+static bool
+mips_far_type_p (const_tree type)
+{
+  return (lookup_attribute ("long_call", TYPE_ATTRIBUTES (type)) != NULL
+	  || lookup_attribute ("far", TYPE_ATTRIBUTES (type)) != NULL);
+}
+
+/* Similar predicates for "mips16"/"nomips16" function attributes.  */
+
+static bool
+mips_mips16_decl_p (const_tree decl)
+{
+  return lookup_attribute ("mips16", DECL_ATTRIBUTES (decl)) != NULL;
+}
+
+static bool
+mips_nomips16_decl_p (const_tree decl)
+{
+  return lookup_attribute ("nomips16", DECL_ATTRIBUTES (decl)) != NULL;
+}
+
+/* Check if the interrupt attribute is set for a function.  */
+
+static bool
+mips_interrupt_type_p (tree type)
+{
+  return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to use shadow register set is set for a function.  */
+
+static bool
+mips_use_shadow_register_set_p (tree type)
+{
+  return lookup_attribute ("use_shadow_register_set",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to keep interrupts masked is set for a function.  */
+
+static bool
+mips_keep_interrupts_masked_p (tree type)
+{
+  return lookup_attribute ("keep_interrupts_masked",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Check if the attribute to use debug exception return is set for
+   a function.  */
+
+static bool
+mips_use_debug_exception_return_p (tree type)
+{
+  return lookup_attribute ("use_debug_exception_return",
+			   TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Return true if function DECL is a MIPS16 function.  Return the ambient
+   setting if DECL is null.  */
+
+static bool
+mips_use_mips16_mode_p (tree decl)
+{
+  if (decl)
+    {
+      /* Nested functions must use the same frame pointer as their
+	 parent and must therefore use the same ISA mode.  */
+      tree parent = decl_function_context (decl);
+      if (parent)
+	decl = parent;
+      if (mips_mips16_decl_p (decl))
+	return true;
+      if (mips_nomips16_decl_p (decl))
+	return false;
+    }
+  return mips_base_mips16;
+}
+
+/* Implement TARGET_COMP_TYPE_ATTRIBUTES.  */
+
+static int
+mips_comp_type_attributes (const_tree type1, const_tree type2)
+{
+  /* Disallow mixed near/far attributes.  */
+  if (mips_far_type_p (type1) && mips_near_type_p (type2))
+    return 0;
+  if (mips_near_type_p (type1) && mips_far_type_p (type2))
+    return 0;
+  return 1;
+}
+
+/* Implement TARGET_INSERT_ATTRIBUTES.  */
+
+static void
+mips_insert_attributes (tree decl, tree *attributes)
+{
+  const char *name;
+  bool mips16_p, nomips16_p;
+
+  /* Check for "mips16" and "nomips16" attributes.  */
+  mips16_p = lookup_attribute ("mips16", *attributes) != NULL;
+  nomips16_p = lookup_attribute ("nomips16", *attributes) != NULL;
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    {
+      if (mips16_p)
+	error ("%qs attribute only applies to functions", "mips16");
+      if (nomips16_p)
+	error ("%qs attribute only applies to functions", "nomips16");
+    }
+  else
+    {
+      mips16_p |= mips_mips16_decl_p (decl);
+      nomips16_p |= mips_nomips16_decl_p (decl);
+      if (mips16_p || nomips16_p)
+	{
+	  /* DECL cannot be simultaneously "mips16" and "nomips16".  */
+	  if (mips16_p && nomips16_p)
+	    error ("%qE cannot have both %<mips16%> and "
+		   "%<nomips16%> attributes",
+		   DECL_NAME (decl));
+	}
+      else if (TARGET_FLIP_MIPS16 && !DECL_ARTIFICIAL (decl))
+	{
+	  /* Implement -mflip-mips16.  If DECL has neither a "nomips16" nor a
+	     "mips16" attribute, arbitrarily pick one.  We must pick the same
+	     setting for duplicate declarations of a function.  */
+	  name = mflip_mips16_use_mips16_p (decl) ? "mips16" : "nomips16";
+	  *attributes = tree_cons (get_identifier (name), NULL, *attributes);
+	}
+    }
+}
+
+/* Implement TARGET_MERGE_DECL_ATTRIBUTES.  */
+
+static tree
+mips_merge_decl_attributes (tree olddecl, tree newdecl)
+{
+  /* The decls' "mips16" and "nomips16" attributes must match exactly.  */
+  if (mips_mips16_decl_p (olddecl) != mips_mips16_decl_p (newdecl))
+    error ("%qE redeclared with conflicting %qs attributes",
+	   DECL_NAME (newdecl), "mips16");
+  if (mips_nomips16_decl_p (olddecl) != mips_nomips16_decl_p (newdecl))
+    error ("%qE redeclared with conflicting %qs attributes",
+	   DECL_NAME (newdecl), "nomips16");
+
+  return merge_attributes (DECL_ATTRIBUTES (olddecl),
+			   DECL_ATTRIBUTES (newdecl));
+}
+
+/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
+   and *OFFSET_PTR.  Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise.  */
+
+static void
+mips_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
+{
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    {
+      *base_ptr = XEXP (x, 0);
+      *offset_ptr = INTVAL (XEXP (x, 1));
+    }
+  else
+    {
+      *base_ptr = x;
+      *offset_ptr = 0;
+    }
+}
+
+static unsigned int mips_build_integer (struct mips_integer_op *,
+					unsigned HOST_WIDE_INT);
+
+/* A subroutine of mips_build_integer, with the same interface.
+   Assume that the final action in the sequence should be a left shift.  */
+
+static unsigned int
+mips_build_shift (struct mips_integer_op *codes, HOST_WIDE_INT value)
+{
+  unsigned int i, shift;
+
+  /* Shift VALUE right until its lowest bit is set.  Shift arithmetically
+     since signed numbers are easier to load than unsigned ones.  */
+  shift = 0;
+  while ((value & 1) == 0)
+    value /= 2, shift++;
+
+  i = mips_build_integer (codes, value);
+  codes[i].code = ASHIFT;
+  codes[i].value = shift;
+  return i + 1;
+}
+
+/* As for mips_build_shift, but assume that the final action will be
+   an IOR or PLUS operation.  */
+
+static unsigned int
+mips_build_lower (struct mips_integer_op *codes, unsigned HOST_WIDE_INT value)
+{
+  unsigned HOST_WIDE_INT high;
+  unsigned int i;
+
+  high = value & ~(unsigned HOST_WIDE_INT) 0xffff;
+  if (!LUI_OPERAND (high) && (value & 0x18000) == 0x18000)
+    {
+      /* The constant is too complex to load with a simple LUI/ORI pair,
+	 so we want to give the recursive call as many trailing zeros as
+	 possible.  In this case, we know bit 16 is set and that the
+	 low 16 bits form a negative number.  If we subtract that number
+	 from VALUE, we will clear at least the lowest 17 bits, maybe more.  */
+      i = mips_build_integer (codes, CONST_HIGH_PART (value));
+      codes[i].code = PLUS;
+      codes[i].value = CONST_LOW_PART (value);
+    }
+  else
+    {
+      /* Either this is a simple LUI/ORI pair, or clearing the lowest 16
+	 bits gives a value with at least 17 trailing zeros.  */
+      i = mips_build_integer (codes, high);
+      codes[i].code = IOR;
+      codes[i].value = value & 0xffff;
+    }
+  return i + 1;
+}
+
+/* Fill CODES with a sequence of rtl operations to load VALUE.
+   Return the number of operations needed.  */
+
+static unsigned int
+mips_build_integer (struct mips_integer_op *codes,
+		    unsigned HOST_WIDE_INT value)
+{
+  if (SMALL_OPERAND (value)
+      || SMALL_OPERAND_UNSIGNED (value)
+      || LUI_OPERAND (value))
+    {
+      /* The value can be loaded with a single instruction.  */
+      codes[0].code = UNKNOWN;
+      codes[0].value = value;
+      return 1;
+    }
+  else if ((value & 1) != 0 || LUI_OPERAND (CONST_HIGH_PART (value)))
+    {
+      /* Either the constant is a simple LUI/ORI combination or its
+	 lowest bit is set.  We don't want to shift in this case.  */
+      return mips_build_lower (codes, value);
+    }
+  else if ((value & 0xffff) == 0)
+    {
+      /* The constant will need at least three actions.  The lowest
+	 16 bits are clear, so the final action will be a shift.  */
+      return mips_build_shift (codes, value);
+    }
+  else
+    {
+      /* The final action could be a shift, add or inclusive OR.
+	 Rather than use a complex condition to select the best
+	 approach, try both mips_build_shift and mips_build_lower
+	 and pick the one that gives the shortest sequence.
+	 Note that this case is only used once per constant.  */
+      struct mips_integer_op alt_codes[MIPS_MAX_INTEGER_OPS];
+      unsigned int cost, alt_cost;
+
+      cost = mips_build_shift (codes, value);
+      alt_cost = mips_build_lower (alt_codes, value);
+      if (alt_cost < cost)
+	{
+	  memcpy (codes, alt_codes, alt_cost * sizeof (codes[0]));
+	  cost = alt_cost;
+	}
+      return cost;
+    }
+}
+
+/* Return true if symbols of type TYPE require a GOT access.  */
+
+static bool
+mips_got_symbol_type_p (enum mips_symbol_type type)
+{
+  switch (type)
+    {
+    case SYMBOL_GOT_PAGE_OFST:
+    case SYMBOL_GOT_DISP:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if X is a thread-local symbol.  */
+
+static bool
+mips_tls_symbol_p (rtx x)
+{
+  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Return true if SYMBOL_REF X is associated with a global symbol
+   (in the STB_GLOBAL sense).  */
+
+static bool
+mips_global_symbol_p (const_rtx x)
+{
+  const_tree decl = SYMBOL_REF_DECL (x);
+
+  if (!decl)
+    return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
+
+  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
+     or weak symbols.  Relocations in the object file will be against
+     the target symbol, so it's that symbol's binding that matters here.  */
+  return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
+}
+
+/* Return true if function X is a libgcc MIPS16 stub function.  */
+
+static bool
+mips16_stub_function_p (const_rtx x)
+{
+  return (GET_CODE (x) == SYMBOL_REF
+	  && strncmp (XSTR (x, 0), "__mips16_", 9) == 0);
+}
+
+/* Return true if function X is a locally-defined and locally-binding
+   MIPS16 function.  */
+
+static bool
+mips16_local_function_p (const_rtx x)
+{
+  return (GET_CODE (x) == SYMBOL_REF
+	  && SYMBOL_REF_LOCAL_P (x)
+	  && !SYMBOL_REF_EXTERNAL_P (x)
+	  && mips_use_mips16_mode_p (SYMBOL_REF_DECL (x)));
+}
+
+/* Return true if SYMBOL_REF X binds locally.  */
+
+static bool
+mips_symbol_binds_local_p (const_rtx x)
+{
+  return (SYMBOL_REF_DECL (x)
+	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+	  : SYMBOL_REF_LOCAL_P (x));
+}
+
+/* Return true if rtx constants of mode MODE should be put into a small
+   data section.  */
+
+static bool
+mips_rtx_constant_in_small_data_p (enum machine_mode mode)
+{
+  return (!TARGET_EMBEDDED_DATA
+	  && TARGET_LOCAL_SDATA
+	  && GET_MODE_SIZE (mode) <= mips_small_data_threshold);
+}
+
+/* Return true if X should not be moved directly into register $25.
+   We need this because many versions of GAS will treat "la $25,foo" as
+   part of a call sequence and so allow a global "foo" to be lazily bound.  */
+
+bool
+mips_dangerous_for_la25_p (rtx x)
+{
+  return (!TARGET_EXPLICIT_RELOCS
+	  && TARGET_USE_GOT
+	  && GET_CODE (x) == SYMBOL_REF
+	  && mips_global_symbol_p (x));
+}
+
+/* Return true if calls to X might need $25 to be valid on entry.  */
+
+bool
+mips_use_pic_fn_addr_reg_p (const_rtx x)
+{
+  if (!TARGET_USE_PIC_FN_ADDR_REG)
+    return false;
+
+  /* MIPS16 stub functions are guaranteed not to use $25.  */
+  if (mips16_stub_function_p (x))
+    return false;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      /* If PLTs and copy relocations are available, the static linker
+	 will make sure that $25 is valid on entry to the target function.  */
+      if (TARGET_ABICALLS_PIC0)
+	return false;
+
+      /* Locally-defined functions use absolute accesses to set up
+	 the global pointer.  */
+      if (TARGET_ABSOLUTE_ABICALLS
+	  && mips_symbol_binds_local_p (x)
+	  && !SYMBOL_REF_EXTERNAL_P (x))
+	return false;
+    }
+
+  return true;
+}
+
+/* Return the method that should be used to access SYMBOL_REF or
+   LABEL_REF X in context CONTEXT.  */
+
+static enum mips_symbol_type
+mips_classify_symbol (const_rtx x, enum mips_symbol_context context)
+{
+  if (TARGET_RTP_PIC)
+    return SYMBOL_GOT_DISP;
+
+  if (GET_CODE (x) == LABEL_REF)
+    {
+      /* Only return SYMBOL_PC_RELATIVE if we are generating MIPS16
+	 code and if we know that the label is in the current function's
+	 text section.  LABEL_REFs are used for jump tables as well as
+	 text labels, so we must check whether jump tables live in the
+	 text section.  */
+      if (TARGET_MIPS16_SHORT_JUMP_TABLES
+	  && !LABEL_REF_NONLOCAL_P (x))
+	return SYMBOL_PC_RELATIVE;
+
+      if (TARGET_ABICALLS && !TARGET_ABSOLUTE_ABICALLS)
+	return SYMBOL_GOT_PAGE_OFST;
+
+      return SYMBOL_ABSOLUTE;
+    }
+
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (SYMBOL_REF_TLS_MODEL (x))
+    return SYMBOL_TLS;
+
+  if (CONSTANT_POOL_ADDRESS_P (x))
+    {
+      if (TARGET_MIPS16_TEXT_LOADS)
+	return SYMBOL_PC_RELATIVE;
+
+      if (TARGET_MIPS16_PCREL_LOADS && context == SYMBOL_CONTEXT_MEM)
+	return SYMBOL_PC_RELATIVE;
+
+      if (mips_rtx_constant_in_small_data_p (get_pool_mode (x)))
+	return SYMBOL_GP_RELATIVE;
+    }
+
+  /* Do not use small-data accesses for weak symbols; they may end up
+     being zero.  */
+  if (TARGET_GPOPT && SYMBOL_REF_SMALL_P (x) && !SYMBOL_REF_WEAK (x))
+    return SYMBOL_GP_RELATIVE;
+
+  /* Don't use GOT accesses for locally-binding symbols when -mno-shared
+     is in effect.  */
+  if (TARGET_ABICALLS_PIC2
+      && !(TARGET_ABSOLUTE_ABICALLS && mips_symbol_binds_local_p (x)))
+    {
+      /* There are three cases to consider:
+
+	    - o32 PIC (either with or without explicit relocs)
+	    - n32/n64 PIC without explicit relocs
+	    - n32/n64 PIC with explicit relocs
+
+	 In the first case, both local and global accesses will use an
+	 R_MIPS_GOT16 relocation.  We must correctly predict which of
+	 the two semantics (local or global) the assembler and linker
+	 will apply.  The choice depends on the symbol's binding rather
+	 than its visibility.
+
+	 In the second case, the assembler will not use R_MIPS_GOT16
+	 relocations, but it chooses between local and global accesses
+	 in the same way as for o32 PIC.
+
+	 In the third case we have more freedom since both forms of
+	 access will work for any kind of symbol.  However, there seems
+	 little point in doing things differently.  */
+      if (mips_global_symbol_p (x))
+	return SYMBOL_GOT_DISP;
+
+      return SYMBOL_GOT_PAGE_OFST;
+    }
+
+  if (TARGET_MIPS16_PCREL_LOADS && context != SYMBOL_CONTEXT_CALL)
+    return SYMBOL_FORCE_TO_MEM;
+
+  return SYMBOL_ABSOLUTE;
+}
+
+/* Classify the base of symbolic expression X, given that X appears in
+   context CONTEXT.  */
+
+static enum mips_symbol_type
+mips_classify_symbolic_expression (rtx x, enum mips_symbol_context context)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  if (UNSPEC_ADDRESS_P (x))
+    return UNSPEC_ADDRESS_TYPE (x);
+
+  return mips_classify_symbol (x, context);
+}
+
+/* Return true if OFFSET is within the range [0, ALIGN), where ALIGN
+   is the alignment in bytes of SYMBOL_REF X.  */
+
+static bool
+mips_offset_within_alignment_p (rtx x, HOST_WIDE_INT offset)
+{
+  HOST_WIDE_INT align;
+
+  align = SYMBOL_REF_DECL (x) ? DECL_ALIGN_UNIT (SYMBOL_REF_DECL (x)) : 1;
+  return IN_RANGE (offset, 0, align - 1);
+}
+
+/* Return true if X is a symbolic constant that can be used in context
+   CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
+
+bool
+mips_symbolic_constant_p (rtx x, enum mips_symbol_context context,
+			  enum mips_symbol_type *symbol_type)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  if (UNSPEC_ADDRESS_P (x))
+    {
+      *symbol_type = UNSPEC_ADDRESS_TYPE (x);
+      x = UNSPEC_ADDRESS (x);
+    }
+  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    {
+      *symbol_type = mips_classify_symbol (x, context);
+      if (*symbol_type == SYMBOL_TLS)
+	return false;
+    }
+  else
+    return false;
+
+  if (offset == const0_rtx)
+    return true;
+
+  /* Check whether a nonzero offset is valid for the underlying
+     relocations.  */
+  switch (*symbol_type)
+    {
+    case SYMBOL_ABSOLUTE:
+    case SYMBOL_FORCE_TO_MEM:
+    case SYMBOL_32_HIGH:
+    case SYMBOL_64_HIGH:
+    case SYMBOL_64_MID:
+    case SYMBOL_64_LOW:
+      /* If the target has 64-bit pointers and the object file only
+	 supports 32-bit symbols, the values of those symbols will be
+	 sign-extended.  In this case we can't allow an arbitrary offset
+	 in case the 32-bit value X + OFFSET has a different sign from X.  */
+      if (Pmode == DImode && !ABI_HAS_64BIT_SYMBOLS)
+	return offset_within_block_p (x, INTVAL (offset));
+
+      /* In other cases the relocations can handle any offset.  */
+      return true;
+
+    case SYMBOL_PC_RELATIVE:
+      /* Allow constant pool references to be converted to LABEL+CONSTANT.
+	 In this case, we no longer have access to the underlying constant,
+	 but the original symbol-based access was known to be valid.  */
+      if (GET_CODE (x) == LABEL_REF)
+	return true;
+
+      /* Fall through.  */
+
+    case SYMBOL_GP_RELATIVE:
+      /* Make sure that the offset refers to something within the
+	 same object block.  This should guarantee that the final
+	 PC- or GP-relative offset is within the 16-bit limit.  */
+      return offset_within_block_p (x, INTVAL (offset));
+
+    case SYMBOL_GOT_PAGE_OFST:
+    case SYMBOL_GOTOFF_PAGE:
+      /* If the symbol is global, the GOT entry will contain the symbol's
+	 address, and we will apply a 16-bit offset after loading it.
+	 If the symbol is local, the linker should provide enough local
+	 GOT entries for a 16-bit offset, but larger offsets may lead
+	 to GOT overflow.  */
+      return SMALL_INT (offset);
+
+    case SYMBOL_TPREL:
+    case SYMBOL_DTPREL:
+      /* There is no carry between the HI and LO REL relocations, so the
+	 offset is only valid if we know it won't lead to such a carry.  */
+      return mips_offset_within_alignment_p (x, INTVAL (offset));
+
+    case SYMBOL_GOT_DISP:
+    case SYMBOL_GOTOFF_DISP:
+    case SYMBOL_GOTOFF_CALL:
+    case SYMBOL_GOTOFF_LOADGP:
+    case SYMBOL_TLSGD:
+    case SYMBOL_TLSLDM:
+    case SYMBOL_GOTTPREL:
+    case SYMBOL_TLS:
+    case SYMBOL_HALF:
+      return false;
+    }
+  gcc_unreachable ();
+}
+
+/* Like mips_symbol_insns, but treat extended MIPS16 instructions as a
+   single instruction.  We rely on the fact that, in the worst case,
+   all instructions involved in a MIPS16 address calculation are usually
+   extended ones.  */
+
+static int
+mips_symbol_insns_1 (enum mips_symbol_type type, enum machine_mode mode)
+{
+  switch (type)
+    {
+    case SYMBOL_ABSOLUTE:
+      /* When using 64-bit symbols, we need 5 preparatory instructions,
+	 such as:
+
+	     lui     $at,%highest(symbol)
+	     daddiu  $at,$at,%higher(symbol)
+	     dsll    $at,$at,16
+	     daddiu  $at,$at,%hi(symbol)
+	     dsll    $at,$at,16
+
+	 The final address is then $at + %lo(symbol).  With 32-bit
+	 symbols we just need a preparatory LUI for normal mode and
+	 a preparatory LI and SLL for MIPS16.  */
+      return ABI_HAS_64BIT_SYMBOLS ? 6 : TARGET_MIPS16 ? 3 : 2;
+
+    case SYMBOL_GP_RELATIVE:
+      /* Treat GP-relative accesses as taking a single instruction on
+	 MIPS16 too; the copy of $gp can often be shared.  */
+      return 1;
+
+    case SYMBOL_PC_RELATIVE:
+      /* PC-relative constants can be only be used with ADDIUPC,
+	 DADDIUPC, LWPC and LDPC.  */
+      if (mode == MAX_MACHINE_MODE
+	  || GET_MODE_SIZE (mode) == 4
+	  || GET_MODE_SIZE (mode) == 8)
+	return 1;
+
+      /* The constant must be loaded using ADDIUPC or DADDIUPC first.  */
+      return 0;
+
+    case SYMBOL_FORCE_TO_MEM:
+      /* LEAs will be converted into constant-pool references by
+	 mips_reorg.  */
+      if (mode == MAX_MACHINE_MODE)
+	return 1;
+
+      /* The constant must be loaded and then dereferenced.  */
+      return 0;
+
+    case SYMBOL_GOT_DISP:
+      /* The constant will have to be loaded from the GOT before it
+	 is used in an address.  */
+      if (mode != MAX_MACHINE_MODE)
+	return 0;
+
+      /* Fall through.  */
+
+    case SYMBOL_GOT_PAGE_OFST:
+      /* Unless -funit-at-a-time is in effect, we can't be sure whether the
+	 local/global classification is accurate.  The worst cases are:
+
+	 (1) For local symbols when generating o32 or o64 code.  The assembler
+	     will use:
+
+		 lw	      $at,%got(symbol)
+		 nop
+
+	     ...and the final address will be $at + %lo(symbol).
+
+	 (2) For global symbols when -mxgot.  The assembler will use:
+
+	         lui     $at,%got_hi(symbol)
+	         (d)addu $at,$at,$gp
+
+	     ...and the final address will be $at + %got_lo(symbol).  */
+      return 3;
+
+    case SYMBOL_GOTOFF_PAGE:
+    case SYMBOL_GOTOFF_DISP:
+    case SYMBOL_GOTOFF_CALL:
+    case SYMBOL_GOTOFF_LOADGP:
+    case SYMBOL_32_HIGH:
+    case SYMBOL_64_HIGH:
+    case SYMBOL_64_MID:
+    case SYMBOL_64_LOW:
+    case SYMBOL_TLSGD:
+    case SYMBOL_TLSLDM:
+    case SYMBOL_DTPREL:
+    case SYMBOL_GOTTPREL:
+    case SYMBOL_TPREL:
+    case SYMBOL_HALF:
+      /* A 16-bit constant formed by a single relocation, or a 32-bit
+	 constant formed from a high 16-bit relocation and a low 16-bit
+	 relocation.  Use mips_split_p to determine which.  32-bit
+	 constants need an "lui; addiu" sequence for normal mode and
+	 an "li; sll; addiu" sequence for MIPS16 mode.  */
+      return !mips_split_p[type] ? 1 : TARGET_MIPS16 ? 3 : 2;
+
+    case SYMBOL_TLS:
+      /* We don't treat a bare TLS symbol as a constant.  */
+      return 0;
+    }
+  gcc_unreachable ();
+}
+
+/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed
+   to load symbols of type TYPE into a register.  Return 0 if the given
+   type of symbol cannot be used as an immediate operand.
+
+   Otherwise, return the number of instructions needed to load or store
+   values of mode MODE to or from addresses of type TYPE.  Return 0 if
+   the given type of symbol is not valid in addresses.
+
+   In both cases, treat extended MIPS16 instructions as two instructions.  */
+
+static int
+mips_symbol_insns (enum mips_symbol_type type, enum machine_mode mode)
+{
+  return mips_symbol_insns_1 (type, mode) * (TARGET_MIPS16 ? 2 : 1);
+}
+
+/* A for_each_rtx callback.  Stop the search if *X references a
+   thread-local symbol.  */
+
+static int
+mips_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return mips_tls_symbol_p (*x);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+mips_cannot_force_const_mem (rtx x)
+{
+  enum mips_symbol_type type;
+  rtx base, offset;
+
+  /* There is no assembler syntax for expressing an address-sized
+     high part.  */
+  if (GET_CODE (x) == HIGH)
+    return true;
+
+  /* As an optimization, reject constants that mips_legitimize_move
+     can expand inline.
+
+     Suppose we have a multi-instruction sequence that loads constant C
+     into register R.  If R does not get allocated a hard register, and
+     R is used in an operand that allows both registers and memory
+     references, reload will consider forcing C into memory and using
+     one of the instruction's memory alternatives.  Returning false
+     here will force it to use an input reload instead.  */
+  if (CONST_INT_P (x) && LEGITIMATE_CONSTANT_P (x))
+    return true;
+
+  split_const (x, &base, &offset);
+  if (mips_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type)
+      && type != SYMBOL_FORCE_TO_MEM)
+    {
+      /* The same optimization as for CONST_INT.  */
+      if (SMALL_INT (offset) && mips_symbol_insns (type, MAX_MACHINE_MODE) > 0)
+	return true;
+
+      /* If MIPS16 constant pools live in the text section, they should
+	 not refer to anything that might need run-time relocation.  */
+      if (TARGET_MIPS16_PCREL_LOADS && mips_got_symbol_type_p (type))
+	return true;
+    }
+
+  /* TLS symbols must be computed by mips_legitimize_move.  */
+  if (for_each_rtx (&x, &mips_tls_symbol_ref_1, NULL))
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  We can't use blocks for
+   constants when we're using a per-function constant pool.  */
+
+static bool
+mips_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				const_rtx x ATTRIBUTE_UNUSED)
+{
+  return !TARGET_MIPS16_PCREL_LOADS;
+}
+
+/* Return true if register REGNO is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+int
+mips_regno_mode_ok_for_base_p (int regno, enum machine_mode mode,
+			       bool strict_p)
+{
+  if (!HARD_REGISTER_NUM_P (regno))
+    {
+      if (!strict_p)
+	return true;
+      regno = reg_renumber[regno];
+    }
+
+  /* These fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
+    return true;
+
+  /* In MIPS16 mode, the stack pointer can only address word and doubleword
+     values, nothing smaller.  There are two problems here:
+
+       (a) Instantiating virtual registers can introduce new uses of the
+	   stack pointer.  If these virtual registers are valid addresses,
+	   the stack pointer should be too.
+
+       (b) Most uses of the stack pointer are not made explicit until
+	   FRAME_POINTER_REGNUM and ARG_POINTER_REGNUM have been eliminated.
+	   We don't know until that stage whether we'll be eliminating to the
+	   stack pointer (which needs the restriction) or the hard frame
+	   pointer (which doesn't).
+
+     All in all, it seems more consistent to only enforce this restriction
+     during and after reload.  */
+  if (TARGET_MIPS16 && regno == STACK_POINTER_REGNUM)
+    return !strict_p || GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8;
+
+  return TARGET_MIPS16 ? M16_REG_P (regno) : GP_REG_P (regno);
+}
+
+/* Return true if X is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+static bool
+mips_valid_base_register_p (rtx x, enum machine_mode mode, bool strict_p)
+{
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && mips_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
+}
+
+/* Return true if, for every base register BASE_REG, (plus BASE_REG X)
+   can address a value of mode MODE.  */
+
+static bool
+mips_valid_offset_p (rtx x, enum machine_mode mode)
+{
+  /* Check that X is a signed 16-bit number.  */
+  if (!const_arith_operand (x, Pmode))
+    return false;
+
+  /* We may need to split multiword moves, so make sure that every word
+     is accessible.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
+    return false;
+
+  return true;
+}
+
+/* Return true if a LO_SUM can address a value of mode MODE when the
+   LO_SUM symbol has type SYMBOL_TYPE.  */
+
+static bool
+mips_valid_lo_sum_p (enum mips_symbol_type symbol_type, enum machine_mode mode)
+{
+  /* Check that symbols of type SYMBOL_TYPE can be used to access values
+     of mode MODE.  */
+  if (mips_symbol_insns (symbol_type, mode) == 0)
+    return false;
+
+  /* Check that there is a known low-part relocation.  */
+  if (mips_lo_relocs[symbol_type] == NULL)
+    return false;
+
+  /* We may need to split multiword moves, so make sure that each word
+     can be accessed without inducing a carry.  This is mainly needed
+     for o64, which has historically only guaranteed 64-bit alignment
+     for 128-bit types.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))
+    return false;
+
+  return true;
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
+   effect.  */
+
+static bool
+mips_classify_address (struct mips_address_info *info, rtx x,
+		       enum machine_mode mode, bool strict_p)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      info->type = ADDRESS_REG;
+      info->reg = x;
+      info->offset = const0_rtx;
+      return mips_valid_base_register_p (info->reg, mode, strict_p);
+
+    case PLUS:
+      info->type = ADDRESS_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      return (mips_valid_base_register_p (info->reg, mode, strict_p)
+	      && mips_valid_offset_p (info->offset, mode));
+
+    case LO_SUM:
+      info->type = ADDRESS_LO_SUM;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      /* We have to trust the creator of the LO_SUM to do something vaguely
+	 sane.  Target-independent code that creates a LO_SUM should also
+	 create and verify the matching HIGH.  Target-independent code that
+	 adds an offset to a LO_SUM must prove that the offset will not
+	 induce a carry.  Failure to do either of these things would be
+	 a bug, and we are not required to check for it here.  The MIPS
+	 backend itself should only create LO_SUMs for valid symbolic
+	 constants, with the high part being either a HIGH or a copy
+	 of _gp. */
+      info->symbol_type
+	= mips_classify_symbolic_expression (info->offset, SYMBOL_CONTEXT_MEM);
+      return (mips_valid_base_register_p (info->reg, mode, strict_p)
+	      && mips_valid_lo_sum_p (info->symbol_type, mode));
+
+    case CONST_INT:
+      /* Small-integer addresses don't occur very often, but they
+	 are legitimate if $0 is a valid base register.  */
+      info->type = ADDRESS_CONST_INT;
+      return !TARGET_MIPS16 && SMALL_INT (x);
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      info->type = ADDRESS_SYMBOLIC;
+      return (mips_symbolic_constant_p (x, SYMBOL_CONTEXT_MEM,
+					&info->symbol_type)
+	      && mips_symbol_insns (info->symbol_type, mode) > 0
+	      && !mips_split_p[info->symbol_type]);
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+mips_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
+{
+  struct mips_address_info addr;
+
+  return mips_classify_address (&addr, x, mode, strict_p);
+}
+
+/* Return true if X is a legitimate $sp-based address for mode MDOE.  */
+
+bool
+mips_stack_address_p (rtx x, enum machine_mode mode)
+{
+  struct mips_address_info addr;
+
+  return (mips_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && addr.reg == stack_pointer_rtx);
+}
+
+/* Return true if ADDR matches the pattern for the LWXS load scaled indexed
+   address instruction.  Note that such addresses are not considered
+   legitimate in the TARGET_LEGITIMATE_ADDRESS_P sense, because their use
+   is so restricted.  */
+
+static bool
+mips_lwxs_address_p (rtx addr)
+{
+  if (ISA_HAS_LWXS
+      && GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 1)))
+    {
+      rtx offset = XEXP (addr, 0);
+      if (GET_CODE (offset) == MULT
+	  && REG_P (XEXP (offset, 0))
+	  && CONST_INT_P (XEXP (offset, 1))
+	  && INTVAL (XEXP (offset, 1)) == 4)
+	return true;
+    }
+  return false;
+}
+
+/* Return true if a value at OFFSET bytes from base register BASE can be
+   accessed using an unextended MIPS16 instruction.  MODE is the mode of
+   the value.
+
+   Usually the offset in an unextended instruction is a 5-bit field.
+   The offset is unsigned and shifted left once for LH and SH, twice
+   for LW and SW, and so on.  An exception is LWSP and SWSP, which have
+   an 8-bit immediate field that's shifted left twice.  */
+
+static bool
+mips16_unextended_reference_p (enum machine_mode mode, rtx base,
+			       unsigned HOST_WIDE_INT offset)
+{
+  if (offset % GET_MODE_SIZE (mode) == 0)
+    {
+      if (GET_MODE_SIZE (mode) == 4 && base == stack_pointer_rtx)
+	return offset < 256U * GET_MODE_SIZE (mode);
+      return offset < 32U * GET_MODE_SIZE (mode);
+    }
+  return false;
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at address X.  Return 0 if X isn't valid for MODE.
+   Assume that multiword moves may need to be split into word moves
+   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+   enough.
+
+   For MIPS16 code, count extended instructions as two instructions.  */
+
+int
+mips_address_insns (rtx x, enum machine_mode mode, bool might_split_p)
+{
+  struct mips_address_info addr;
+  int factor;
+
+  /* BLKmode is used for single unaligned loads and stores and should
+     not count as a multiword mode.  (GET_MODE_SIZE (BLKmode) is pretty
+     meaningless, so we have to single it out as a special case one way
+     or the other.)  */
+  if (mode != BLKmode && might_split_p)
+    factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  else
+    factor = 1;
+
+  if (mips_classify_address (&addr, x, mode, false))
+    switch (addr.type)
+      {
+      case ADDRESS_REG:
+	if (TARGET_MIPS16
+	    && !mips16_unextended_reference_p (mode, addr.reg,
+					       UINTVAL (addr.offset)))
+	  return factor * 2;
+	return factor;
+
+      case ADDRESS_LO_SUM:
+	return TARGET_MIPS16 ? factor * 2 : factor;
+
+      case ADDRESS_CONST_INT:
+	return factor;
+
+      case ADDRESS_SYMBOLIC:
+	return factor * mips_symbol_insns (addr.symbol_type, mode);
+      }
+  return 0;
+}
+
+/* Return the number of instructions needed to load constant X.
+   Return 0 if X isn't a valid constant.  */
+
+int
+mips_const_insns (rtx x)
+{
+  struct mips_integer_op codes[MIPS_MAX_INTEGER_OPS];
+  enum mips_symbol_type symbol_type;
+  rtx offset;
+
+  switch (GET_CODE (x))
+    {
+    case HIGH:
+      if (!mips_symbolic_constant_p (XEXP (x, 0), SYMBOL_CONTEXT_LEA,
+				     &symbol_type)
+	  || !mips_split_p[symbol_type])
+	return 0;
+
+      /* This is simply an LUI for normal mode.  It is an extended
+	 LI followed by an extended SLL for MIPS16.  */
+      return TARGET_MIPS16 ? 4 : 1;
+
+    case CONST_INT:
+      if (TARGET_MIPS16)
+	/* Unsigned 8-bit constants can be loaded using an unextended
+	   LI instruction.  Unsigned 16-bit constants can be loaded
+	   using an extended LI.  Negative constants must be loaded
+	   using LI and then negated.  */
+	return (IN_RANGE (INTVAL (x), 0, 255) ? 1
+		: SMALL_OPERAND_UNSIGNED (INTVAL (x)) ? 2
+		: IN_RANGE (-INTVAL (x), 0, 255) ? 2
+		: SMALL_OPERAND_UNSIGNED (-INTVAL (x)) ? 3
+		: 0);
+
+      return mips_build_integer (codes, INTVAL (x));
+
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+      /* Allow zeros for normal mode, where we can use $0.  */
+      return !TARGET_MIPS16 && x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+
+    case CONST:
+      if (CONST_GP_P (x))
+	return 1;
+
+      /* See if we can refer to X directly.  */
+      if (mips_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type))
+	return mips_symbol_insns (symbol_type, MAX_MACHINE_MODE);
+
+      /* Otherwise try splitting the constant into a base and offset.
+	 If the offset is a 16-bit value, we can load the base address
+	 into a register and then use (D)ADDIU to add in the offset.
+	 If the offset is larger, we can load the base and offset
+	 into separate registers and add them together with (D)ADDU.
+	 However, the latter is only possible before reload; during
+	 and after reload, we must have the option of forcing the
+	 constant into the pool instead.  */
+      split_const (x, &x, &offset);
+      if (offset != 0)
+	{
+	  int n = mips_const_insns (x);
+	  if (n != 0)
+	    {
+	      if (SMALL_INT (offset))
+		return n + 1;
+	      else if (!targetm.cannot_force_const_mem (x))
+		return n + 1 + mips_build_integer (codes, INTVAL (offset));
+	    }
+	}
+      return 0;
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return mips_symbol_insns (mips_classify_symbol (x, SYMBOL_CONTEXT_LEA),
+				MAX_MACHINE_MODE);
+
+    default:
+      return 0;
+    }
+}
+
+/* X is a doubleword constant that can be handled by splitting it into
+   two words and loading each word separately.  Return the number of
+   instructions required to do this.  */
+
+int
+mips_split_const_insns (rtx x)
+{
+  unsigned int low, high;
+
+  low = mips_const_insns (mips_subword (x, false));
+  high = mips_const_insns (mips_subword (x, true));
+  gcc_assert (low > 0 && high > 0);
+  return low + high;
+}
+
+/* Return the number of instructions needed to implement INSN,
+   given that it loads from or stores to MEM.  Count extended
+   MIPS16 instructions as two instructions.  */
+
+int
+mips_load_store_insns (rtx mem, rtx insn)
+{
+  enum machine_mode mode;
+  bool might_split_p;
+  rtx set;
+
+  gcc_assert (MEM_P (mem));
+  mode = GET_MODE (mem);
+
+  /* Try to prove that INSN does not need to be split.  */
+  might_split_p = true;
+  if (GET_MODE_BITSIZE (mode) == 64)
+    {
+      set = single_set (insn);
+      if (set && !mips_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
+	might_split_p = false;
+    }
+
+  return mips_address_insns (XEXP (mem, 0), mode, might_split_p);
+}
+
+/* Return the number of instructions needed for an integer division.  */
+
+int
+mips_idiv_insns (void)
+{
+  int count;
+
+  count = 1;
+  if (TARGET_CHECK_ZERO_DIV)
+    {
+      if (GENERATE_DIVIDE_TRAPS)
+        count++;
+      else
+        count += 2;
+    }
+
+  if (TARGET_FIX_R4000 || TARGET_FIX_R4400)
+    count++;
+  return count;
+}
+
+/* Emit a move from SRC to DEST.  Assume that the move expanders can
+   handle all moves if !can_create_pseudo_p ().  The distinction is
+   important because, unlike emit_move_insn, the move expanders know
+   how to force Pmode objects into the constant pool even when the
+   constant pool address is not itself legitimate.  */
+
+rtx
+mips_emit_move (rtx dest, rtx src)
+{
+  return (can_create_pseudo_p ()
+	  ? emit_move_insn (dest, src)
+	  : emit_move_insn_1 (dest, src));
+}
+
+/* Emit an instruction of the form (set TARGET (CODE OP0)).  */
+
+static void
+mips_emit_unary (enum rtx_code code, rtx target, rtx op0)
+{
+  emit_insn (gen_rtx_SET (VOIDmode, target,
+			  gen_rtx_fmt_e (code, GET_MODE (op0), op0)));
+}
+
+/* Compute (CODE OP0) and store the result in a new register of mode MODE.
+   Return that new register.  */
+
+static rtx
+mips_force_unary (enum machine_mode mode, enum rtx_code code, rtx op0)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  mips_emit_unary (code, reg, op0);
+  return reg;
+}
+
+/* Emit an instruction of the form (set TARGET (CODE OP0 OP1)).  */
+
+static void
+mips_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
+{
+  emit_insn (gen_rtx_SET (VOIDmode, target,
+			  gen_rtx_fmt_ee (code, GET_MODE (target), op0, op1)));
+}
+
+/* Compute (CODE OP0 OP1) and store the result in a new register
+   of mode MODE.  Return that new register.  */
+
+static rtx
+mips_force_binary (enum machine_mode mode, enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  mips_emit_binary (code, reg, op0, op1);
+  return reg;
+}
+
+/* Copy VALUE to a register and return that register.  If new pseudos
+   are allowed, copy it into a new register, otherwise use DEST.  */
+
+static rtx
+mips_force_temporary (rtx dest, rtx value)
+{
+  if (can_create_pseudo_p ())
+    return force_reg (Pmode, value);
+  else
+    {
+      mips_emit_move (dest, value);
+      return dest;
+    }
+}
+
+/* Emit a call sequence with call pattern PATTERN and return the call
+   instruction itself (which is not necessarily the last instruction
+   emitted).  ORIG_ADDR is the original, unlegitimized address,
+   ADDR is the legitimized form, and LAZY_P is true if the call
+   address is lazily-bound.  */
+
+static rtx
+mips_emit_call_insn (rtx pattern, rtx orig_addr, rtx addr, bool lazy_p)
+{
+  rtx insn, reg;
+
+  insn = emit_call_insn (pattern);
+
+  if (TARGET_MIPS16 && mips_use_pic_fn_addr_reg_p (orig_addr))
+    {
+      /* MIPS16 JALRs only take MIPS16 registers.  If the target
+	 function requires $25 to be valid on entry, we must copy it
+	 there separately.  The move instruction can be put in the
+	 call's delay slot.  */
+      reg = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      emit_insn_before (gen_move_insn (reg, addr), insn);
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
+    }
+
+  if (lazy_p)
+    /* Lazy-binding stubs require $gp to be valid on entry.  */
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+
+  if (TARGET_USE_GOT)
+    {
+      /* See the comment above load_call<mode> for details.  */
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+	       gen_rtx_REG (Pmode, GOT_VERSION_REGNUM));
+      emit_insn (gen_update_got_version ());
+    }
+  return insn;
+}
+
+/* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
+   then add CONST_INT OFFSET to the result.  */
+
+static rtx
+mips_unspec_address_offset (rtx base, rtx offset,
+			    enum mips_symbol_type symbol_type)
+{
+  base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
+			 UNSPEC_ADDRESS_FIRST + symbol_type);
+  if (offset != const0_rtx)
+    base = gen_rtx_PLUS (Pmode, base, offset);
+  return gen_rtx_CONST (Pmode, base);
+}
+
+/* Return an UNSPEC address with underlying address ADDRESS and symbol
+   type SYMBOL_TYPE.  */
+
+rtx
+mips_unspec_address (rtx address, enum mips_symbol_type symbol_type)
+{
+  rtx base, offset;
+
+  split_const (address, &base, &offset);
+  return mips_unspec_address_offset (base, offset, symbol_type);
+}
+
+/* If OP is an UNSPEC address, return the address to which it refers,
+   otherwise return OP itself.  */
+
+static rtx
+mips_strip_unspec_address (rtx op)
+{
+  rtx base, offset;
+
+  split_const (op, &base, &offset);
+  if (UNSPEC_ADDRESS_P (base))
+    op = plus_constant (UNSPEC_ADDRESS (base), INTVAL (offset));
+  return op;
+}
+
+/* If mips_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
+   high part to BASE and return the result.  Just return BASE otherwise.
+   TEMP is as for mips_force_temporary.
+
+   The returned expression can be used as the first operand to a LO_SUM.  */
+
+static rtx
+mips_unspec_offset_high (rtx temp, rtx base, rtx addr,
+			 enum mips_symbol_type symbol_type)
+{
+  if (mips_split_p[symbol_type])
+    {
+      addr = gen_rtx_HIGH (Pmode, mips_unspec_address (addr, symbol_type));
+      addr = mips_force_temporary (temp, addr);
+      base = mips_force_temporary (temp, gen_rtx_PLUS (Pmode, addr, base));
+    }
+  return base;
+}
+
+/* Return an instruction that copies $gp into register REG.  We want
+   GCC to treat the register's value as constant, so that its value
+   can be rematerialized on demand.  */
+
+static rtx
+gen_load_const_gp (rtx reg)
+{
+  return (Pmode == SImode
+	  ? gen_load_const_gp_si (reg)
+	  : gen_load_const_gp_di (reg));
+}
+
+/* Return a pseudo register that contains the value of $gp throughout
+   the current function.  Such registers are needed by MIPS16 functions,
+   for which $gp itself is not a valid base register or addition operand.  */
+
+static rtx
+mips16_gp_pseudo_reg (void)
+{
+  if (cfun->machine->mips16_gp_pseudo_rtx == NULL_RTX)
+    {
+      rtx insn, scan;
+
+      cfun->machine->mips16_gp_pseudo_rtx = gen_reg_rtx (Pmode);
+
+      push_topmost_sequence ();
+
+      scan = get_insns ();
+      while (NEXT_INSN (scan) && !INSN_P (NEXT_INSN (scan)))
+	scan = NEXT_INSN (scan);
+
+      insn = gen_load_const_gp (cfun->machine->mips16_gp_pseudo_rtx);
+      emit_insn_after (insn, scan);
+
+      pop_topmost_sequence ();
+    }
+
+  return cfun->machine->mips16_gp_pseudo_rtx;
+}
+
+/* Return a base register that holds pic_offset_table_rtx.
+   TEMP, if nonnull, is a scratch Pmode base register.  */
+
+rtx
+mips_pic_base_register (rtx temp)
+{
+  if (!TARGET_MIPS16)
+    return pic_offset_table_rtx;
+
+  if (currently_expanding_to_rtl)
+    return mips16_gp_pseudo_reg ();
+
+  if (can_create_pseudo_p ())
+    temp = gen_reg_rtx (Pmode);
+
+  if (TARGET_USE_GOT)
+    /* The first post-reload split exposes all references to $gp
+       (both uses and definitions).  All references must remain
+       explicit after that point.
+
+       It is safe to introduce uses of $gp at any time, so for
+       simplicity, we do that before the split too.  */
+    mips_emit_move (temp, pic_offset_table_rtx);
+  else
+    emit_insn (gen_load_const_gp (temp));
+  return temp;
+}
+
+/* Return the RHS of a load_call<mode> insn.  */
+
+static rtx
+mips_unspec_call (rtx reg, rtx symbol)
+{
+  rtvec vec;
+
+  vec = gen_rtvec (3, reg, symbol, gen_rtx_REG (SImode, GOT_VERSION_REGNUM));
+  return gen_rtx_UNSPEC (Pmode, vec, UNSPEC_LOAD_CALL);
+}
+
+/* If SRC is the RHS of a load_call<mode> insn, return the underlying symbol
+   reference.  Return NULL_RTX otherwise.  */
+
+static rtx
+mips_strip_unspec_call (rtx src)
+{
+  if (GET_CODE (src) == UNSPEC && XINT (src, 1) == UNSPEC_LOAD_CALL)
+    return mips_strip_unspec_address (XVECEXP (src, 0, 1));
+  return NULL_RTX;
+}
+
+/* Create and return a GOT reference of type TYPE for address ADDR.
+   TEMP, if nonnull, is a scratch Pmode base register.  */
+
+rtx
+mips_got_load (rtx temp, rtx addr, enum mips_symbol_type type)
+{
+  rtx base, high, lo_sum_symbol;
+
+  base = mips_pic_base_register (temp);
+
+  /* If we used the temporary register to load $gp, we can't use
+     it for the high part as well.  */
+  if (temp != NULL && reg_overlap_mentioned_p (base, temp))
+    temp = NULL;
+
+  high = mips_unspec_offset_high (temp, base, addr, type);
+  lo_sum_symbol = mips_unspec_address (addr, type);
+
+  if (type == SYMBOL_GOTOFF_CALL)
+    return mips_unspec_call (high, lo_sum_symbol);
+  else
+    return (Pmode == SImode
+	    ? gen_unspec_gotsi (high, lo_sum_symbol)
+	    : gen_unspec_gotdi (high, lo_sum_symbol));
+}
+
+/* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
+   it appears in a MEM of that mode.  Return true if ADDR is a legitimate
+   constant in that context and can be split into high and low parts.
+   If so, and if LOW_OUT is nonnull, emit the high part and store the
+   low part in *LOW_OUT.  Leave *LOW_OUT unchanged otherwise.
+
+   TEMP is as for mips_force_temporary and is used to load the high
+   part into a register.
+
+   When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
+   a legitimize SET_SRC for an .md pattern, otherwise the low part
+   is guaranteed to be a legitimate address for mode MODE.  */
+
+bool
+mips_split_symbol (rtx temp, rtx addr, enum machine_mode mode, rtx *low_out)
+{
+  enum mips_symbol_context context;
+  enum mips_symbol_type symbol_type;
+  rtx high;
+
+  context = (mode == MAX_MACHINE_MODE
+	     ? SYMBOL_CONTEXT_LEA
+	     : SYMBOL_CONTEXT_MEM);
+  if (GET_CODE (addr) == HIGH && context == SYMBOL_CONTEXT_LEA)
+    {
+      addr = XEXP (addr, 0);
+      if (mips_symbolic_constant_p (addr, context, &symbol_type)
+	  && mips_symbol_insns (symbol_type, mode) > 0
+	  && mips_split_hi_p[symbol_type])
+	{
+	  if (low_out)
+	    switch (symbol_type)
+	      {
+	      case SYMBOL_GOT_PAGE_OFST:
+		/* The high part of a page/ofst pair is loaded from the GOT.  */
+		*low_out = mips_got_load (temp, addr, SYMBOL_GOTOFF_PAGE);
+		break;
+
+	      default:
+		gcc_unreachable ();
+	      }
+	  return true;
+	}
+    }
+  else
+    {
+      if (mips_symbolic_constant_p (addr, context, &symbol_type)
+	  && mips_symbol_insns (symbol_type, mode) > 0
+	  && mips_split_p[symbol_type])
+	{
+	  if (low_out)
+	    switch (symbol_type)
+	      {
+	      case SYMBOL_GOT_DISP:
+		/* SYMBOL_GOT_DISP symbols are loaded from the GOT.  */
+		*low_out = mips_got_load (temp, addr, SYMBOL_GOTOFF_DISP);
+		break;
+
+	      case SYMBOL_GP_RELATIVE:
+		high = mips_pic_base_register (temp);
+		*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+		break;
+
+	      default:
+		high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
+		high = mips_force_temporary (temp, high);
+		*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+		break;
+	      }
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Return a legitimate address for REG + OFFSET.  TEMP is as for
+   mips_force_temporary; it is only needed when OFFSET is not a
+   SMALL_OPERAND.  */
+
+static rtx
+mips_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+{
+  if (!SMALL_OPERAND (offset))
+    {
+      rtx high;
+
+      if (TARGET_MIPS16)
+	{
+	  /* Load the full offset into a register so that we can use
+	     an unextended instruction for the address itself.  */
+	  high = GEN_INT (offset);
+	  offset = 0;
+	}
+      else
+	{
+	  /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
+	     The addition inside the macro CONST_HIGH_PART may cause an
+	     overflow, so we need to force a sign-extension check.  */
+	  high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
+	  offset = CONST_LOW_PART (offset);
+	}
+      high = mips_force_temporary (temp, high);
+      reg = mips_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
+    }
+  return plus_constant (reg, offset);
+}
+
+/* The __tls_get_attr symbol.  */
+static GTY(()) rtx mips_tls_symbol;
+
+/* Return an instruction sequence that calls __tls_get_addr.  SYM is
+   the TLS symbol we are referencing and TYPE is the symbol type to use
+   (either global dynamic or local dynamic).  V0 is an RTX for the
+   return value location.  */
+
+static rtx
+mips_call_tls_get_addr (rtx sym, enum mips_symbol_type type, rtx v0)
+{
+  rtx insn, loc, a0;
+
+  a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+
+  if (!mips_tls_symbol)
+    mips_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  loc = mips_unspec_address (sym, type);
+
+  start_sequence ();
+
+  emit_insn (gen_rtx_SET (Pmode, a0,
+			  gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, loc)));
+  insn = mips_expand_call (MIPS_CALL_NORMAL, v0, mips_tls_symbol,
+			   const0_rtx, NULL_RTX, false);
+  RTL_CONST_CALL_P (insn) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
+  insn = get_insns ();
+
+  end_sequence ();
+
+  return insn;
+}
+
+/* Return a pseudo register that contains the current thread pointer.  */
+
+static rtx
+mips_get_tp (void)
+{
+  rtx tp;
+
+  tp = gen_reg_rtx (Pmode);
+  if (Pmode == DImode)
+    emit_insn (gen_tls_get_tp_di (tp));
+  else
+    emit_insn (gen_tls_get_tp_si (tp));
+  return tp;
+}
+
+/* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
+   its address.  The return value will be both a valid address and a valid
+   SET_SRC (either a REG or a LO_SUM).  */
+
+static rtx
+mips_legitimize_tls_address (rtx loc)
+{
+  rtx dest, insn, v0, tp, tmp1, tmp2, eqv;
+  enum tls_model model;
+
+  if (TARGET_MIPS16)
+    {
+      sorry ("MIPS16 TLS");
+      return gen_reg_rtx (Pmode);
+    }
+
+  model = SYMBOL_REF_TLS_MODEL (loc);
+  /* Only TARGET_ABICALLS code can have more than one module; other
+     code must be be static and should not use a GOT.  All TLS models
+     reduce to local exec in this situation.  */
+  if (!TARGET_ABICALLS)
+    model = TLS_MODEL_LOCAL_EXEC;
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      v0 = gen_rtx_REG (Pmode, GP_RETURN);
+      insn = mips_call_tls_get_addr (loc, SYMBOL_TLSGD, v0);
+      dest = gen_reg_rtx (Pmode);
+      emit_libcall_block (insn, dest, v0, loc);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      v0 = gen_rtx_REG (Pmode, GP_RETURN);
+      insn = mips_call_tls_get_addr (loc, SYMBOL_TLSLDM, v0);
+      tmp1 = gen_reg_rtx (Pmode);
+
+      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	 share the LDM result with other LD model accesses.  */
+      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			    UNSPEC_TLS_LDM);
+      emit_libcall_block (insn, tmp1, v0, eqv);
+
+      tmp2 = mips_unspec_offset_high (NULL, tmp1, loc, SYMBOL_DTPREL);
+      dest = gen_rtx_LO_SUM (Pmode, tmp2,
+			     mips_unspec_address (loc, SYMBOL_DTPREL));
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      tp = mips_get_tp ();
+      tmp1 = gen_reg_rtx (Pmode);
+      tmp2 = mips_unspec_address (loc, SYMBOL_GOTTPREL);
+      if (Pmode == DImode)
+	emit_insn (gen_load_gotdi (tmp1, pic_offset_table_rtx, tmp2));
+      else
+	emit_insn (gen_load_gotsi (tmp1, pic_offset_table_rtx, tmp2));
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_add3_insn (dest, tmp1, tp));
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = mips_get_tp ();
+      tmp1 = mips_unspec_offset_high (NULL, tp, loc, SYMBOL_TPREL);
+      dest = gen_rtx_LO_SUM (Pmode, tmp1,
+			     mips_unspec_address (loc, SYMBOL_TPREL));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return dest;
+}
+
+/* If X is not a valid address for mode MODE, force it into a register.  */
+
+static rtx
+mips_force_address (rtx x, enum machine_mode mode)
+{
+  if (!mips_legitimate_address_p (mode, x, false))
+    x = force_reg (Pmode, x);
+  return x;
+}
+
+/* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
+   be legitimized in a way that the generic machinery might not expect,
+   return a new address, otherwise return NULL.  MODE is the mode of
+   the memory being accessed.  */
+
+static rtx
+mips_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  rtx base, addr;
+  HOST_WIDE_INT offset;
+
+  if (mips_tls_symbol_p (x))
+    return mips_legitimize_tls_address (x);
+
+  /* See if the address can split into a high part and a LO_SUM.  */
+  if (mips_split_symbol (NULL, x, mode, &addr))
+    return mips_force_address (addr, mode);
+
+  /* Handle BASE + OFFSET using mips_add_offset.  */
+  mips_split_plus (x, &base, &offset);
+  if (offset != 0)
+    {
+      if (!mips_valid_base_register_p (base, mode, false))
+	base = copy_to_mode_reg (Pmode, base);
+      addr = mips_add_offset (NULL, base, offset);
+      return mips_force_address (addr, mode);
+    }
+
+  return x;
+}
+
+/* Load VALUE into DEST.  TEMP is as for mips_force_temporary.  */
+
+void
+mips_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+{
+  struct mips_integer_op codes[MIPS_MAX_INTEGER_OPS];
+  enum machine_mode mode;
+  unsigned int i, num_ops;
+  rtx x;
+
+  mode = GET_MODE (dest);
+  num_ops = mips_build_integer (codes, value);
+
+  /* Apply each binary operation to X.  Invariant: X is a legitimate
+     source operand for a SET pattern.  */
+  x = GEN_INT (codes[0].value);
+  for (i = 1; i < num_ops; i++)
+    {
+      if (!can_create_pseudo_p ())
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, x));
+	  x = temp;
+	}
+      else
+	x = force_reg (mode, x);
+      x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Subroutine of mips_legitimize_move.  Move constant SRC into register
+   DEST given that SRC satisfies immediate_operand but doesn't satisfy
+   move_operand.  */
+
+static void
+mips_legitimize_const_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  rtx base, offset;
+
+  /* Split moves of big integers into smaller pieces.  */
+  if (splittable_const_int_operand (src, mode))
+    {
+      mips_move_integer (dest, dest, INTVAL (src));
+      return;
+    }
+
+  /* Split moves of symbolic constants into high/low pairs.  */
+  if (mips_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, dest, src));
+      return;
+    }
+
+  /* Generate the appropriate access sequences for TLS symbols.  */
+  if (mips_tls_symbol_p (src))
+    {
+      mips_emit_move (dest, mips_legitimize_tls_address (src));
+      return;
+    }
+
+  /* If we have (const (plus symbol offset)), and that expression cannot
+     be forced into memory, load the symbol first and add in the offset.
+     In non-MIPS16 mode, prefer to do this even if the constant _can_ be
+     forced into memory, as it usually produces better code.  */
+  split_const (src, &base, &offset);
+  if (offset != const0_rtx
+      && (targetm.cannot_force_const_mem (src)
+	  || (!TARGET_MIPS16 && can_create_pseudo_p ())))
+    {
+      base = mips_force_temporary (dest, base);
+      mips_emit_move (dest, mips_add_offset (NULL, base, INTVAL (offset)));
+      return;
+    }
+
+  src = force_const_mem (mode, src);
+
+  /* When using explicit relocs, constant pool references are sometimes
+     not legitimate addresses.  */
+  mips_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
+  mips_emit_move (dest, src);
+}
+
+/* If (set DEST SRC) is not a valid move instruction, emit an equivalent
+   sequence that is valid.  */
+
+bool
+mips_legitimize_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
+    {
+      mips_emit_move (dest, force_reg (mode, src));
+      return true;
+    }
+
+  /* We need to deal with constants that would be legitimate
+     immediate_operands but aren't legitimate move_operands.  */
+  if (CONSTANT_P (src) && !move_operand (src, mode))
+    {
+      mips_legitimize_const_move (mode, dest, src);
+      set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
+      return true;
+    }
+  return false;
+}
+
+/* Return true if value X in context CONTEXT is a small-data address
+   that can be rewritten as a LO_SUM.  */
+
+static bool
+mips_rewrite_small_data_p (rtx x, enum mips_symbol_context context)
+{
+  enum mips_symbol_type symbol_type;
+
+  return (mips_lo_relocs[SYMBOL_GP_RELATIVE]
+	  && !mips_split_p[SYMBOL_GP_RELATIVE]
+	  && mips_symbolic_constant_p (x, context, &symbol_type)
+	  && symbol_type == SYMBOL_GP_RELATIVE);
+}
+
+/* A for_each_rtx callback for mips_small_data_pattern_p.  DATA is the
+   containing MEM, or null if none.  */
+
+static int
+mips_small_data_pattern_1 (rtx *loc, void *data)
+{
+  enum mips_symbol_context context;
+
+  if (GET_CODE (*loc) == LO_SUM)
+    return -1;
+
+  if (MEM_P (*loc))
+    {
+      if (for_each_rtx (&XEXP (*loc, 0), mips_small_data_pattern_1, *loc))
+	return 1;
+      return -1;
+    }
+
+  context = data ? SYMBOL_CONTEXT_MEM : SYMBOL_CONTEXT_LEA;
+  return mips_rewrite_small_data_p (*loc, context);
+}
+
+/* Return true if OP refers to small data symbols directly, not through
+   a LO_SUM.  */
+
+bool
+mips_small_data_pattern_p (rtx op)
+{
+  return for_each_rtx (&op, mips_small_data_pattern_1, NULL);
+}
+
+/* A for_each_rtx callback, used by mips_rewrite_small_data.
+   DATA is the containing MEM, or null if none.  */
+
+static int
+mips_rewrite_small_data_1 (rtx *loc, void *data)
+{
+  enum mips_symbol_context context;
+
+  if (MEM_P (*loc))
+    {
+      for_each_rtx (&XEXP (*loc, 0), mips_rewrite_small_data_1, *loc);
+      return -1;
+    }
+
+  context = data ? SYMBOL_CONTEXT_MEM : SYMBOL_CONTEXT_LEA;
+  if (mips_rewrite_small_data_p (*loc, context))
+    *loc = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, *loc);
+
+  if (GET_CODE (*loc) == LO_SUM)
+    return -1;
+
+  return 0;
+}
+
+/* Rewrite instruction pattern PATTERN so that it refers to small data
+   using explicit relocations.  */
+
+rtx
+mips_rewrite_small_data (rtx pattern)
+{
+  pattern = copy_insn (pattern);
+  for_each_rtx (&pattern, mips_rewrite_small_data_1, NULL);
+  return pattern;
+}
+
+/* We need a lot of little routines to check the range of MIPS16 immediate
+   operands.  */
+
+static int
+m16_check_op (rtx op, int low, int high, int mask)
+{
+  return (CONST_INT_P (op)
+	  && IN_RANGE (INTVAL (op), low, high)
+	  && (INTVAL (op) & mask) == 0);
+}
+
+int
+m16_uimm3_b (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, 0x1, 0x8, 0);
+}
+
+int
+m16_simm4_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x8, 0x7, 0);
+}
+
+int
+m16_nsimm4_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x7, 0x8, 0);
+}
+
+int
+m16_simm5_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x10, 0xf, 0);
+}
+
+int
+m16_nsimm5_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0xf, 0x10, 0);
+}
+
+int
+m16_uimm5_4 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x10 << 2, 0xf << 2, 3);
+}
+
+int
+m16_nuimm5_4 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0xf << 2, 0x10 << 2, 3);
+}
+
+int
+m16_simm8_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x80, 0x7f, 0);
+}
+
+int
+m16_nsimm8_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x7f, 0x80, 0);
+}
+
+int
+m16_uimm8_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, 0x0, 0xff, 0);
+}
+
+int
+m16_nuimm8_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0xff, 0x0, 0);
+}
+
+int
+m16_uimm8_m1_1 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x1, 0xfe, 0);
+}
+
+int
+m16_uimm8_4 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, 0x0, 0xff << 2, 3);
+}
+
+int
+m16_nuimm8_4 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0xff << 2, 0x0, 3);
+}
+
+int
+m16_simm8_8 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x80 << 3, 0x7f << 3, 7);
+}
+
+int
+m16_nsimm8_8 (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return m16_check_op (op, -0x7f << 3, 0x80 << 3, 7);
+}
+
+/* The cost of loading values from the constant pool.  It should be
+   larger than the cost of any constant we want to synthesize inline.  */
+#define CONSTANT_POOL_COST COSTS_N_INSNS (TARGET_MIPS16 ? 4 : 8)
+
+/* Return the cost of X when used as an operand to the MIPS16 instruction
+   that implements CODE.  Return -1 if there is no such instruction, or if
+   X is not a valid immediate operand for it.  */
+
+static int
+mips16_constant_cost (int code, HOST_WIDE_INT x)
+{
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* Shifts by between 1 and 8 bits (inclusive) are unextended,
+	 other shifts are extended.  The shift patterns truncate the shift
+	 count to the right size, so there are no out-of-range values.  */
+      if (IN_RANGE (x, 1, 8))
+	return 0;
+      return COSTS_N_INSNS (1);
+
+    case PLUS:
+      if (IN_RANGE (x, -128, 127))
+	return 0;
+      if (SMALL_OPERAND (x))
+	return COSTS_N_INSNS (1);
+      return -1;
+
+    case LEU:
+      /* Like LE, but reject the always-true case.  */
+      if (x == -1)
+	return -1;
+    case LE:
+      /* We add 1 to the immediate and use SLT.  */
+      x += 1;
+    case XOR:
+      /* We can use CMPI for an xor with an unsigned 16-bit X.  */
+    case LT:
+    case LTU:
+      if (IN_RANGE (x, 0, 255))
+	return 0;
+      if (SMALL_OPERAND_UNSIGNED (x))
+	return COSTS_N_INSNS (1);
+      return -1;
+
+    case EQ:
+    case NE:
+      /* Equality comparisons with 0 are cheap.  */
+      if (x == 0)
+	return 0;
+      return -1;
+
+    default:
+      return -1;
+    }
+}
+
+/* Return true if there is a non-MIPS16 instruction that implements CODE
+   and if that instruction accepts X as an immediate operand.  */
+
+static int
+mips_immediate_operand_p (int code, HOST_WIDE_INT x)
+{
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* All shift counts are truncated to a valid constant.  */
+      return true;
+
+    case ROTATE:
+    case ROTATERT:
+      /* Likewise rotates, if the target supports rotates at all.  */
+      return ISA_HAS_ROR;
+
+    case AND:
+    case IOR:
+    case XOR:
+      /* These instructions take 16-bit unsigned immediates.  */
+      return SMALL_OPERAND_UNSIGNED (x);
+
+    case PLUS:
+    case LT:
+    case LTU:
+      /* These instructions take 16-bit signed immediates.  */
+      return SMALL_OPERAND (x);
+
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+      /* The "immediate" forms of these instructions are really
+	 implemented as comparisons with register 0.  */
+      return x == 0;
+
+    case GE:
+    case GEU:
+      /* Likewise, meaning that the only valid immediate operand is 1.  */
+      return x == 1;
+
+    case LE:
+      /* We add 1 to the immediate and use SLT.  */
+      return SMALL_OPERAND (x + 1);
+
+    case LEU:
+      /* Likewise SLTU, but reject the always-true case.  */
+      return SMALL_OPERAND (x + 1) && x + 1 != 0;
+
+    case SIGN_EXTRACT:
+    case ZERO_EXTRACT:
+      /* The bit position and size are immediate operands.  */
+      return ISA_HAS_EXT_INS;
+
+    default:
+      /* By default assume that $0 can be used for 0.  */
+      return x == 0;
+    }
+}
+
+/* Return the cost of binary operation X, given that the instruction
+   sequence for a word-sized or smaller operation has cost SINGLE_COST
+   and that the sequence of a double-word operation has cost DOUBLE_COST.
+   If SPEED is true, optimize for speed otherwise optimize for size.  */
+
+static int
+mips_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
+{
+  int cost;
+
+  if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
+    cost = double_cost;
+  else
+    cost = single_cost;
+  return (cost
+	  + rtx_cost (XEXP (x, 0), SET, speed)
+	  + rtx_cost (XEXP (x, 1), GET_CODE (x), speed));
+}
+
+/* Return the cost of floating-point multiplications of mode MODE.  */
+
+static int
+mips_fp_mult_cost (enum machine_mode mode)
+{
+  return mode == DFmode ? mips_cost->fp_mult_df : mips_cost->fp_mult_sf;
+}
+
+/* Return the cost of floating-point divisions of mode MODE.  */
+
+static int
+mips_fp_div_cost (enum machine_mode mode)
+{
+  return mode == DFmode ? mips_cost->fp_div_df : mips_cost->fp_div_sf;
+}
+
+/* Return the cost of sign-extending OP to mode MODE, not including the
+   cost of OP itself.  */
+
+static int
+mips_sign_extend_cost (enum machine_mode mode, rtx op)
+{
+  if (MEM_P (op))
+    /* Extended loads are as cheap as unextended ones.  */
+    return 0;
+
+  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+    /* A sign extension from SImode to DImode in 64-bit mode is free.  */
+    return 0;
+
+  if (ISA_HAS_SEB_SEH || GENERATE_MIPS16E)
+    /* We can use SEB or SEH.  */
+    return COSTS_N_INSNS (1);
+
+  /* We need to use a shift left and a shift right.  */
+  return COSTS_N_INSNS (TARGET_MIPS16 ? 4 : 2);
+}
+
+/* Return the cost of zero-extending OP to mode MODE, not including the
+   cost of OP itself.  */
+
+static int
+mips_zero_extend_cost (enum machine_mode mode, rtx op)
+{
+  if (MEM_P (op))
+    /* Extended loads are as cheap as unextended ones.  */
+    return 0;
+
+  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+    /* We need a shift left by 32 bits and a shift right by 32 bits.  */
+    return COSTS_N_INSNS (TARGET_MIPS16 ? 4 : 2);
+
+  if (GENERATE_MIPS16E)
+    /* We can use ZEB or ZEH.  */
+    return COSTS_N_INSNS (1);
+
+  if (TARGET_MIPS16)
+    /* We need to load 0xff or 0xffff into a register and use AND.  */
+    return COSTS_N_INSNS (GET_MODE (op) == QImode ? 2 : 3);
+
+  /* We can use ANDI.  */
+  return COSTS_N_INSNS (1);
+}
+
+/* Implement TARGET_RTX_COSTS.  */
+
+static bool
+mips_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+  int cost;
+  rtx addr;
+
+  /* The cost of a COMPARE is hard to define for MIPS.  COMPAREs don't
+     appear in the instruction stream, and the cost of a comparison is
+     really the cost of the branch or scc condition.  At the time of
+     writing, GCC only uses an explicit outer COMPARE code when optabs
+     is testing whether a constant is expensive enough to force into a
+     register.  We want optabs to pass such constants through the MIPS
+     expanders instead, so make all constants very cheap here.  */
+  if (outer_code == COMPARE)
+    {
+      gcc_assert (CONSTANT_P (x));
+      *total = 0;
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* Treat *clear_upper32-style ANDs as having zero cost in the
+	 second operand.  The cost is entirely in the first operand.
+
+	 ??? This is needed because we would otherwise try to CSE
+	 the constant operand.  Although that's the right thing for
+	 instructions that continue to be a register operation throughout
+	 compilation, it is disastrous for instructions that could
+	 later be converted into a memory operation.  */
+      if (TARGET_64BIT
+	  && outer_code == AND
+	  && UINTVAL (x) == 0xffffffff)
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      if (TARGET_MIPS16)
+	{
+	  cost = mips16_constant_cost (outer_code, INTVAL (x));
+	  if (cost >= 0)
+	    {
+	      *total = cost;
+	      return true;
+	    }
+	}
+      else
+	{
+	  /* When not optimizing for size, we care more about the cost
+	     of hot code, and hot code is often in a loop.  If a constant
+	     operand needs to be forced into a register, we will often be
+	     able to hoist the constant load out of the loop, so the load
+	     should not contribute to the cost.  */
+	  if (speed || mips_immediate_operand_p (outer_code, INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	}
+      /* Fall through.  */
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      if (force_to_mem_operand (x, VOIDmode))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      cost = mips_const_insns (x);
+      if (cost > 0)
+	{
+	  /* If the constant is likely to be stored in a GPR, SETs of
+	     single-insn constants are as cheap as register sets; we
+	     never want to CSE them.
+
+	     Don't reduce the cost of storing a floating-point zero in
+	     FPRs.  If we have a zero in an FPR for other reasons, we
+	     can get better cfg-cleanup and delayed-branch results by
+	     using it consistently, rather than using $0 sometimes and
+	     an FPR at other times.  Also, moves between floating-point
+	     registers are sometimes cheaper than (D)MTC1 $0.  */
+	  if (cost == 1
+	      && outer_code == SET
+	      && !(float_mode_p && TARGET_HARD_FLOAT))
+	    cost = 0;
+	  /* When non-MIPS16 code loads a constant N>1 times, we rarely
+	     want to CSE the constant itself.  It is usually better to
+	     have N copies of the last operation in the sequence and one
+	     shared copy of the other operations.  (Note that this is
+	     not true for MIPS16 code, where the final operation in the
+	     sequence is often an extended instruction.)
+
+	     Also, if we have a CONST_INT, we don't know whether it is
+	     for a word or doubleword operation, so we cannot rely on
+	     the result of mips_build_integer.  */
+	  else if (!TARGET_MIPS16
+		   && (outer_code == SET || mode == VOIDmode))
+	    cost = 1;
+	  *total = COSTS_N_INSNS (cost);
+	  return true;
+	}
+      /* The value will need to be fetched from the constant pool.  */
+      *total = CONSTANT_POOL_COST;
+      return true;
+
+    case MEM:
+      /* If the address is legitimate, return the number of
+	 instructions it needs.  */
+      addr = XEXP (x, 0);
+      cost = mips_address_insns (addr, mode, true);
+      if (cost > 0)
+	{
+	  *total = COSTS_N_INSNS (cost + 1);
+	  return true;
+	}
+      /* Check for a scaled indexed address.  */
+      if (mips_lwxs_address_p (addr))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+      /* Otherwise use the default handling.  */
+      return false;
+
+    case FFS:
+      *total = COSTS_N_INSNS (6);
+      return false;
+
+    case NOT:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
+      return false;
+
+    case AND:
+      /* Check for a *clear_upper32 pattern and treat it like a zero
+	 extension.  See the pattern's comment for details.  */
+      if (TARGET_64BIT
+	  && mode == DImode
+	  && CONST_INT_P (XEXP (x, 1))
+	  && UINTVAL (XEXP (x, 1)) == 0xffffffff)
+	{
+	  *total = (mips_zero_extend_cost (mode, XEXP (x, 0))
+		    + rtx_cost (XEXP (x, 0), SET, speed));
+	  return true;
+	}
+      /* Fall through.  */
+
+    case IOR:
+    case XOR:
+      /* Double-word operations use two single-word operations.  */
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+				 speed);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+      if (CONSTANT_P (XEXP (x, 1)))
+	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				   speed);
+      else
+	*total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (12),
+				   speed);
+      return true;
+
+    case ABS:
+      if (float_mode_p)
+        *total = mips_cost->fp_add;
+      else
+        *total = COSTS_N_INSNS (4);
+      return false;
+
+    case LO_SUM:
+      /* Low-part immediates need an extended MIPS16 instruction.  */
+      *total = (COSTS_N_INSNS (TARGET_MIPS16 ? 2 : 1)
+		+ rtx_cost (XEXP (x, 0), SET, speed));
+      return true;
+
+    case LT:
+    case LTU:
+    case LE:
+    case LEU:
+    case GT:
+    case GTU:
+    case GE:
+    case GEU:
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case LTGT:
+      /* Branch comparisons have VOIDmode, so use the first operand's
+	 mode instead.  */
+      mode = GET_MODE (XEXP (x, 0));
+      if (FLOAT_MODE_P (mode))
+	{
+	  *total = mips_cost->fp_add;
+	  return false;
+	}
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				 speed);
+      return true;
+
+    case MINUS:
+      if (float_mode_p
+	  && (ISA_HAS_NMADD4_NMSUB4 (mode) || ISA_HAS_NMADD3_NMSUB3 (mode))
+	  && TARGET_FUSED_MADD
+	  && !HONOR_NANS (mode)
+	  && !HONOR_SIGNED_ZEROS (mode))
+	{
+	  /* See if we can use NMADD or NMSUB.  See mips.md for the
+	     associated patterns.  */
+	  rtx op0 = XEXP (x, 0);
+	  rtx op1 = XEXP (x, 1);
+	  if (GET_CODE (op0) == MULT && GET_CODE (XEXP (op0, 0)) == NEG)
+	    {
+	      *total = (mips_fp_mult_cost (mode)
+			+ rtx_cost (XEXP (XEXP (op0, 0), 0), SET, speed)
+			+ rtx_cost (XEXP (op0, 1), SET, speed)
+			+ rtx_cost (op1, SET, speed));
+	      return true;
+	    }
+	  if (GET_CODE (op1) == MULT)
+	    {
+	      *total = (mips_fp_mult_cost (mode)
+			+ rtx_cost (op0, SET, speed)
+			+ rtx_cost (XEXP (op1, 0), SET, speed)
+			+ rtx_cost (XEXP (op1, 1), SET, speed));
+	      return true;
+	    }
+	}
+      /* Fall through.  */
+
+    case PLUS:
+      if (float_mode_p)
+	{
+	  /* If this is part of a MADD or MSUB, treat the PLUS as
+	     being free.  */
+	  if (ISA_HAS_FP4
+	      && TARGET_FUSED_MADD
+	      && GET_CODE (XEXP (x, 0)) == MULT)
+	    *total = 0;
+	  else
+	    *total = mips_cost->fp_add;
+	  return false;
+	}
+
+      /* Double-word operations require three single-word operations and
+	 an SLTU.  The MIPS16 version then needs to move the result of
+	 the SLTU from $24 to a MIPS16 register.  */
+      *total = mips_binary_cost (x, COSTS_N_INSNS (1),
+				 COSTS_N_INSNS (TARGET_MIPS16 ? 5 : 4),
+				 speed);
+      return true;
+
+    case NEG:
+      if (float_mode_p
+	  && (ISA_HAS_NMADD4_NMSUB4 (mode) || ISA_HAS_NMADD3_NMSUB3 (mode))
+	  && TARGET_FUSED_MADD
+	  && !HONOR_NANS (mode)
+	  && HONOR_SIGNED_ZEROS (mode))
+	{
+	  /* See if we can use NMADD or NMSUB.  See mips.md for the
+	     associated patterns.  */
+	  rtx op = XEXP (x, 0);
+	  if ((GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+	      && GET_CODE (XEXP (op, 0)) == MULT)
+	    {
+	      *total = (mips_fp_mult_cost (mode)
+			+ rtx_cost (XEXP (XEXP (op, 0), 0), SET, speed)
+			+ rtx_cost (XEXP (XEXP (op, 0), 1), SET, speed)
+			+ rtx_cost (XEXP (op, 1), SET, speed));
+	      return true;
+	    }
+	}
+
+      if (float_mode_p)
+	*total = mips_cost->fp_add;
+      else
+	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = mips_fp_mult_cost (mode);
+      else if (mode == DImode && !TARGET_64BIT)
+	/* Synthesized from 2 mulsi3s, 1 mulsidi3 and two additions,
+	   where the mulsidi3 always includes an MFHI and an MFLO.  */
+	*total = (speed
+		  ? mips_cost->int_mult_si * 3 + 6
+		  : COSTS_N_INSNS (ISA_HAS_MUL3 ? 7 : 9));
+      else if (!speed)
+	*total = (ISA_HAS_MUL3 ? 1 : 2);
+      else if (mode == DImode)
+	*total = mips_cost->int_mult_di;
+      else
+	*total = mips_cost->int_mult_si;
+      return false;
+
+    case DIV:
+      /* Check for a reciprocal.  */
+      if (float_mode_p
+	  && ISA_HAS_FP4
+	  && flag_unsafe_math_optimizations
+	  && XEXP (x, 0) == CONST1_RTX (mode))
+	{
+	  if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
+	    /* An rsqrt<mode>a or rsqrt<mode>b pattern.  Count the
+	       division as being free.  */
+	    *total = rtx_cost (XEXP (x, 1), SET, speed);
+	  else
+	    *total = (mips_fp_div_cost (mode)
+		      + rtx_cost (XEXP (x, 1), SET, speed));
+	  return true;
+	}
+      /* Fall through.  */
+
+    case SQRT:
+    case MOD:
+      if (float_mode_p)
+	{
+	  *total = mips_fp_div_cost (mode);
+	  return false;
+	}
+      /* Fall through.  */
+
+    case UDIV:
+    case UMOD:
+      if (!speed)
+	{
+	  /* It is our responsibility to make division by a power of 2
+	     as cheap as 2 register additions if we want the division
+	     expanders to be used for such operations; see the setting
+	     of sdiv_pow2_cheap in optabs.c.  Using (D)DIV for MIPS16
+	     should always produce shorter code than using
+	     expand_sdiv2_pow2.  */
+	  if (TARGET_MIPS16
+	      && CONST_INT_P (XEXP (x, 1))
+	      && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
+	    {
+	      *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), SET, speed);
+	      return true;
+	    }
+	  *total = COSTS_N_INSNS (mips_idiv_insns ());
+	}
+      else if (mode == DImode)
+        *total = mips_cost->int_div_di;
+      else
+	*total = mips_cost->int_div_si;
+      return false;
+
+    case SIGN_EXTEND:
+      *total = mips_sign_extend_cost (mode, XEXP (x, 0));
+      return false;
+
+    case ZERO_EXTEND:
+      *total = mips_zero_extend_cost (mode, XEXP (x, 0));
+      return false;
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+      *total = mips_cost->fp_add;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_ADDRESS_COST.  */
+
+static int
+mips_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  return mips_address_insns (addr, SImode, false);
+}
+
+/* Information about a single instruction in a multi-instruction
+   asm sequence.  */
+struct mips_multi_member {
+  /* True if this is a label, false if it is code.  */
+  bool is_label_p;
+
+  /* The output_asm_insn format of the instruction.  */
+  const char *format;
+
+  /* The operands to the instruction.  */
+  rtx operands[MAX_RECOG_OPERANDS];
+};
+typedef struct mips_multi_member mips_multi_member;
+
+/* Vector definitions for the above.  */
+DEF_VEC_O(mips_multi_member);
+DEF_VEC_ALLOC_O(mips_multi_member, heap);
+
+/* The instructions that make up the current multi-insn sequence.  */
+static VEC (mips_multi_member, heap) *mips_multi_members;
+
+/* How many instructions (as opposed to labels) are in the current
+   multi-insn sequence.  */
+static unsigned int mips_multi_num_insns;
+
+/* Start a new multi-insn sequence.  */
+
+static void
+mips_multi_start (void)
+{
+  VEC_truncate (mips_multi_member, mips_multi_members, 0);
+  mips_multi_num_insns = 0;
+}
+
+/* Add a new, uninitialized member to the current multi-insn sequence.  */
+
+static struct mips_multi_member *
+mips_multi_add (void)
+{
+  return VEC_safe_push (mips_multi_member, heap, mips_multi_members, 0);
+}
+
+/* Add a normal insn with the given asm format to the current multi-insn
+   sequence.  The other arguments are a null-terminated list of operands.  */
+
+static void
+mips_multi_add_insn (const char *format, ...)
+{
+  struct mips_multi_member *member;
+  va_list ap;
+  unsigned int i;
+  rtx op;
+
+  member = mips_multi_add ();
+  member->is_label_p = false;
+  member->format = format;
+  va_start (ap, format);
+  i = 0;
+  while ((op = va_arg (ap, rtx)))
+    member->operands[i++] = op;
+  va_end (ap);
+  mips_multi_num_insns++;
+}
+
+/* Add the given label definition to the current multi-insn sequence.
+   The definition should include the colon.  */
+
+static void
+mips_multi_add_label (const char *label)
+{
+  struct mips_multi_member *member;
+
+  member = mips_multi_add ();
+  member->is_label_p = true;
+  member->format = label;
+}
+
+/* Return the index of the last member of the current multi-insn sequence.  */
+
+static unsigned int
+mips_multi_last_index (void)
+{
+  return VEC_length (mips_multi_member, mips_multi_members) - 1;
+}
+
+/* Add a copy of an existing instruction to the current multi-insn
+   sequence.  I is the index of the instruction that should be copied.  */
+
+static void
+mips_multi_copy_insn (unsigned int i)
+{
+  struct mips_multi_member *member;
+
+  member = mips_multi_add ();
+  memcpy (member, VEC_index (mips_multi_member, mips_multi_members, i),
+	  sizeof (*member));
+  gcc_assert (!member->is_label_p);
+}
+
+/* Change the operand of an existing instruction in the current
+   multi-insn sequence.  I is the index of the instruction,
+   OP is the index of the operand, and X is the new value.  */
+
+static void
+mips_multi_set_operand (unsigned int i, unsigned int op, rtx x)
+{
+  VEC_index (mips_multi_member, mips_multi_members, i)->operands[op] = x;
+}
+
+/* Write out the asm code for the current multi-insn sequence.  */
+
+static void
+mips_multi_write (void)
+{
+  struct mips_multi_member *member;
+  unsigned int i;
+
+  FOR_EACH_VEC_ELT (mips_multi_member, mips_multi_members, i, member)
+    if (member->is_label_p)
+      fprintf (asm_out_file, "%s\n", member->format);
+    else
+      output_asm_insn (member->format, member->operands);
+}
+
+/* Return one word of double-word value OP, taking into account the fixed
+   endianness of certain registers.  HIGH_P is true to select the high part,
+   false to select the low part.  */
+
+rtx
+mips_subword (rtx op, bool high_p)
+{
+  unsigned int byte, offset;
+  enum machine_mode mode;
+
+  mode = GET_MODE (op);
+  if (mode == VOIDmode)
+    mode = TARGET_64BIT ? TImode : DImode;
+
+  if (TARGET_BIG_ENDIAN ? !high_p : high_p)
+    byte = UNITS_PER_WORD;
+  else
+    byte = 0;
+
+  if (FP_REG_RTX_P (op))
+    {
+      /* Paired FPRs are always ordered little-endian.  */
+      offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0);
+      return gen_rtx_REG (word_mode, REGNO (op) + offset);
+    }
+
+  if (MEM_P (op))
+    return mips_rewrite_small_data (adjust_address (op, word_mode, byte));
+
+  return simplify_gen_subreg (word_mode, op, mode, byte);
+}
+
+/* Return true if a 64-bit move from SRC to DEST should be split into two.  */
+
+bool
+mips_split_64bit_move_p (rtx dest, rtx src)
+{
+  if (TARGET_64BIT)
+    return false;
+
+  /* FPR-to-FPR moves can be done in a single instruction, if they're
+     allowed at all.  */
+  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+    return false;
+
+  /* Check for floating-point loads and stores.  */
+  if (ISA_HAS_LDC1_SDC1)
+    {
+      if (FP_REG_RTX_P (dest) && MEM_P (src))
+	return false;
+      if (FP_REG_RTX_P (src) && MEM_P (dest))
+	return false;
+    }
+  return true;
+}
+
+/* Split a doubleword move from SRC to DEST.  On 32-bit targets,
+   this function handles 64-bit moves for which mips_split_64bit_move_p
+   holds.  For 64-bit targets, this function handles 128-bit moves.  */
+
+void
+mips_split_doubleword_move (rtx dest, rtx src)
+{
+  rtx low_dest;
+
+  if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+    {
+      if (!TARGET_64BIT && GET_MODE (dest) == DImode)
+	emit_insn (gen_move_doubleword_fprdi (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
+	emit_insn (gen_move_doubleword_fprdf (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V2SFmode)
+	emit_insn (gen_move_doubleword_fprv2sf (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V2SImode)
+	emit_insn (gen_move_doubleword_fprv2si (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V4HImode)
+	emit_insn (gen_move_doubleword_fprv4hi (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == V8QImode)
+	emit_insn (gen_move_doubleword_fprv8qi (dest, src));
+      else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
+	emit_insn (gen_move_doubleword_fprtf (dest, src));
+      else
+	gcc_unreachable ();
+    }
+  else if (REG_P (dest) && REGNO (dest) == MD_REG_FIRST)
+    {
+      low_dest = mips_subword (dest, false);
+      mips_emit_move (low_dest, mips_subword (src, false));
+      if (TARGET_64BIT)
+	emit_insn (gen_mthidi_ti (dest, mips_subword (src, true), low_dest));
+      else
+	emit_insn (gen_mthisi_di (dest, mips_subword (src, true), low_dest));
+    }
+  else if (REG_P (src) && REGNO (src) == MD_REG_FIRST)
+    {
+      mips_emit_move (mips_subword (dest, false), mips_subword (src, false));
+      if (TARGET_64BIT)
+	emit_insn (gen_mfhidi_ti (mips_subword (dest, true), src));
+      else
+	emit_insn (gen_mfhisi_di (mips_subword (dest, true), src));
+    }
+  else
+    {
+      /* The operation can be split into two normal moves.  Decide in
+	 which order to do them.  */
+      low_dest = mips_subword (dest, false);
+      if (REG_P (low_dest)
+	  && reg_overlap_mentioned_p (low_dest, src))
+	{
+	  mips_emit_move (mips_subword (dest, true), mips_subword (src, true));
+	  mips_emit_move (low_dest, mips_subword (src, false));
+	}
+      else
+	{
+	  mips_emit_move (low_dest, mips_subword (src, false));
+	  mips_emit_move (mips_subword (dest, true), mips_subword (src, true));
+	}
+    }
+}
+
+/* Return the appropriate instructions to move SRC into DEST.  Assume
+   that SRC is operand 1 and DEST is operand 0.  */
+
+const char *
+mips_output_move (rtx dest, rtx src)
+{
+  enum rtx_code dest_code, src_code;
+  enum machine_mode mode;
+  enum mips_symbol_type symbol_type;
+  bool dbl_p;
+
+  dest_code = GET_CODE (dest);
+  src_code = GET_CODE (src);
+  mode = GET_MODE (dest);
+  dbl_p = (GET_MODE_SIZE (mode) == 8);
+
+  if (dbl_p && mips_split_64bit_move_p (dest, src))
+    return "#";
+
+  if ((src_code == REG && GP_REG_P (REGNO (src)))
+      || (!TARGET_MIPS16 && src == CONST0_RTX (mode)))
+    {
+      if (dest_code == REG)
+	{
+	  if (GP_REG_P (REGNO (dest)))
+	    return "move\t%0,%z1";
+
+	  /* Moves to HI are handled by special .md insns.  */
+	  if (REGNO (dest) == LO_REGNUM)
+	    return "mtlo\t%z1";
+
+	  if (DSP_ACC_REG_P (REGNO (dest)))
+	    {
+	      static char retval[] = "mt__\t%z1,%q0";
+
+	      retval[2] = reg_names[REGNO (dest)][4];
+	      retval[3] = reg_names[REGNO (dest)][5];
+	      return retval;
+	    }
+
+	  if (FP_REG_P (REGNO (dest)))
+	    return dbl_p ? "dmtc1\t%z1,%0" : "mtc1\t%z1,%0";
+
+	  if (ALL_COP_REG_P (REGNO (dest)))
+	    {
+	      static char retval[] = "dmtc_\t%z1,%0";
+
+	      retval[4] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (dest));
+	      return dbl_p ? retval : retval + 1;
+	    }
+	}
+      if (dest_code == MEM)
+	switch (GET_MODE_SIZE (mode))
+	  {
+	  case 1: return "sb\t%z1,%0";
+	  case 2: return "sh\t%z1,%0";
+	  case 4: return "sw\t%z1,%0";
+	  case 8: return "sd\t%z1,%0";
+	  }
+    }
+  if (dest_code == REG && GP_REG_P (REGNO (dest)))
+    {
+      if (src_code == REG)
+	{
+	  /* Moves from HI are handled by special .md insns.  */
+	  if (REGNO (src) == LO_REGNUM)
+	    {
+	      /* When generating VR4120 or VR4130 code, we use MACC and
+		 DMACC instead of MFLO.  This avoids both the normal
+		 MIPS III HI/LO hazards and the errata related to
+		 -mfix-vr4130.  */
+	      if (ISA_HAS_MACCHI)
+		return dbl_p ? "dmacc\t%0,%.,%." : "macc\t%0,%.,%.";
+	      return "mflo\t%0";
+	    }
+
+	  if (DSP_ACC_REG_P (REGNO (src)))
+	    {
+	      static char retval[] = "mf__\t%0,%q1";
+
+	      retval[2] = reg_names[REGNO (src)][4];
+	      retval[3] = reg_names[REGNO (src)][5];
+	      return retval;
+	    }
+
+	  if (FP_REG_P (REGNO (src)))
+	    return dbl_p ? "dmfc1\t%0,%1" : "mfc1\t%0,%1";
+
+	  if (ALL_COP_REG_P (REGNO (src)))
+	    {
+	      static char retval[] = "dmfc_\t%0,%1";
+
+	      retval[4] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (src));
+	      return dbl_p ? retval : retval + 1;
+	    }
+
+	  if (ST_REG_P (REGNO (src)) && ISA_HAS_8CC)
+	    return "lui\t%0,0x3f80\n\tmovf\t%0,%.,%1";
+	}
+
+      if (src_code == MEM)
+	switch (GET_MODE_SIZE (mode))
+	  {
+	  case 1: return "lbu\t%0,%1";
+	  case 2: return "lhu\t%0,%1";
+	  case 4: return "lw\t%0,%1";
+	  case 8: return "ld\t%0,%1";
+	  }
+
+      if (src_code == CONST_INT)
+	{
+	  /* Don't use the X format for the operand itself, because that
+	     will give out-of-range numbers for 64-bit hosts and 32-bit
+	     targets.  */
+	  if (!TARGET_MIPS16)
+	    return "li\t%0,%1\t\t\t# %X1";
+
+	  if (SMALL_OPERAND_UNSIGNED (INTVAL (src)))
+	    return "li\t%0,%1";
+
+	  if (SMALL_OPERAND_UNSIGNED (-INTVAL (src)))
+	    return "#";
+	}
+
+      if (src_code == HIGH)
+	return TARGET_MIPS16 ? "#" : "lui\t%0,%h1";
+
+      if (CONST_GP_P (src))
+	return "move\t%0,%1";
+
+      if (mips_symbolic_constant_p (src, SYMBOL_CONTEXT_LEA, &symbol_type)
+	  && mips_lo_relocs[symbol_type] != 0)
+	{
+	  /* A signed 16-bit constant formed by applying a relocation
+	     operator to a symbolic address.  */
+	  gcc_assert (!mips_split_p[symbol_type]);
+	  return "li\t%0,%R1";
+	}
+
+      if (symbolic_operand (src, VOIDmode))
+	{
+	  gcc_assert (TARGET_MIPS16
+		      ? TARGET_MIPS16_TEXT_LOADS
+		      : !TARGET_EXPLICIT_RELOCS);
+	  return dbl_p ? "dla\t%0,%1" : "la\t%0,%1";
+	}
+    }
+  if (src_code == REG && FP_REG_P (REGNO (src)))
+    {
+      if (dest_code == REG && FP_REG_P (REGNO (dest)))
+	{
+	  if (GET_MODE (dest) == V2SFmode)
+	    return "mov.ps\t%0,%1";
+	  else
+	    return dbl_p ? "mov.d\t%0,%1" : "mov.s\t%0,%1";
+	}
+
+      if (dest_code == MEM)
+	return dbl_p ? "sdc1\t%1,%0" : "swc1\t%1,%0";
+    }
+  if (dest_code == REG && FP_REG_P (REGNO (dest)))
+    {
+      if (src_code == MEM)
+	return dbl_p ? "ldc1\t%0,%1" : "lwc1\t%0,%1";
+    }
+  if (dest_code == REG && ALL_COP_REG_P (REGNO (dest)) && src_code == MEM)
+    {
+      static char retval[] = "l_c_\t%0,%1";
+
+      retval[1] = (dbl_p ? 'd' : 'w');
+      retval[3] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (dest));
+      return retval;
+    }
+  if (dest_code == MEM && src_code == REG && ALL_COP_REG_P (REGNO (src)))
+    {
+      static char retval[] = "s_c_\t%1,%0";
+
+      retval[1] = (dbl_p ? 'd' : 'w');
+      retval[3] = COPNUM_AS_CHAR_FROM_REGNUM (REGNO (src));
+      return retval;
+    }
+  gcc_unreachable ();
+}
+
+/* Return true if CMP1 is a suitable second operand for integer ordering
+   test CODE.  See also the *sCC patterns in mips.md.  */
+
+static bool
+mips_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
+{
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      return reg_or_0_operand (cmp1, VOIDmode);
+
+    case GE:
+    case GEU:
+      return !TARGET_MIPS16 && cmp1 == const1_rtx;
+
+    case LT:
+    case LTU:
+      return arith_operand (cmp1, VOIDmode);
+
+    case LE:
+      return sle_operand (cmp1, VOIDmode);
+
+    case LEU:
+      return sleu_operand (cmp1, VOIDmode);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if *CMP1 (of mode MODE) is a valid second operand for
+   integer ordering test *CODE, or if an equivalent combination can
+   be formed by adjusting *CODE and *CMP1.  When returning true, update
+   *CODE and *CMP1 with the chosen code and operand, otherwise leave
+   them alone.  */
+
+static bool
+mips_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
+				  enum machine_mode mode)
+{
+  HOST_WIDE_INT plus_one;
+
+  if (mips_int_order_operand_ok_p (*code, *cmp1))
+    return true;
+
+  if (CONST_INT_P (*cmp1))
+    switch (*code)
+      {
+      case LE:
+	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
+	if (INTVAL (*cmp1) < plus_one)
+	  {
+	    *code = LT;
+	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
+	    return true;
+	  }
+	break;
+
+      case LEU:
+	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
+	if (plus_one != 0)
+	  {
+	    *code = LTU;
+	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
+	    return true;
+	  }
+	break;
+
+      default:
+	break;
+      }
+  return false;
+}
+
+/* Compare CMP0 and CMP1 using ordering test CODE and store the result
+   in TARGET.  CMP0 and TARGET are register_operands.  If INVERT_PTR
+   is nonnull, it's OK to set TARGET to the inverse of the result and
+   flip *INVERT_PTR instead.  */
+
+static void
+mips_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
+			  rtx target, rtx cmp0, rtx cmp1)
+{
+  enum machine_mode mode;
+
+  /* First see if there is a MIPS instruction that can do this operation.
+     If not, try doing the same for the inverse operation.  If that also
+     fails, force CMP1 into a register and try again.  */
+  mode = GET_MODE (cmp0);
+  if (mips_canonicalize_int_order_test (&code, &cmp1, mode))
+    mips_emit_binary (code, target, cmp0, cmp1);
+  else
+    {
+      enum rtx_code inv_code = reverse_condition (code);
+      if (!mips_canonicalize_int_order_test (&inv_code, &cmp1, mode))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	  mips_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
+	}
+      else if (invert_ptr == 0)
+	{
+	  rtx inv_target;
+
+	  inv_target = mips_force_binary (GET_MODE (target),
+					  inv_code, cmp0, cmp1);
+	  mips_emit_binary (XOR, target, inv_target, const1_rtx);
+	}
+      else
+	{
+	  *invert_ptr = !*invert_ptr;
+	  mips_emit_binary (inv_code, target, cmp0, cmp1);
+	}
+    }
+}
+
+/* Return a register that is zero iff CMP0 and CMP1 are equal.
+   The register will have the same mode as CMP0.  */
+
+static rtx
+mips_zero_if_equal (rtx cmp0, rtx cmp1)
+{
+  if (cmp1 == const0_rtx)
+    return cmp0;
+
+  if (uns_arith_operand (cmp1, VOIDmode))
+    return expand_binop (GET_MODE (cmp0), xor_optab,
+			 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
+
+  return expand_binop (GET_MODE (cmp0), sub_optab,
+		       cmp0, cmp1, 0, 0, OPTAB_DIRECT);
+}
+
+/* Convert *CODE into a code that can be used in a floating-point
+   scc instruction (C.cond.fmt).  Return true if the values of
+   the condition code registers will be inverted, with 0 indicating
+   that the condition holds.  */
+
+static bool
+mips_reversed_fp_cond (enum rtx_code *code)
+{
+  switch (*code)
+    {
+    case NE:
+    case LTGT:
+    case ORDERED:
+      *code = reverse_condition_maybe_unordered (*code);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Convert a comparison into something that can be used in a branch or
+   conditional move.  On entry, *OP0 and *OP1 are the values being
+   compared and *CODE is the code used to compare them.
+
+   Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
+   If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are possible,
+   otherwise any standard branch condition can be used.  The standard branch
+   conditions are:
+
+      - EQ or NE between two registers.
+      - any comparison between a register and zero.  */
+
+static void
+mips_emit_compare (enum rtx_code *code, rtx *op0, rtx *op1, bool need_eq_ne_p)
+{
+  rtx cmp_op0 = *op0;
+  rtx cmp_op1 = *op1;
+
+  if (GET_MODE_CLASS (GET_MODE (*op0)) == MODE_INT)
+    {
+      if (!need_eq_ne_p && *op1 == const0_rtx)
+	;
+      else if (*code == EQ || *code == NE)
+	{
+	  if (need_eq_ne_p)
+	    {
+	      *op0 = mips_zero_if_equal (cmp_op0, cmp_op1);
+	      *op1 = const0_rtx;
+	    }
+	  else
+	    *op1 = force_reg (GET_MODE (cmp_op0), cmp_op1);
+	}
+      else
+	{
+	  /* The comparison needs a separate scc instruction.  Store the
+	     result of the scc in *OP0 and compare it against zero.  */
+	  bool invert = false;
+	  *op0 = gen_reg_rtx (GET_MODE (cmp_op0));
+	  mips_emit_int_order_test (*code, &invert, *op0, cmp_op0, cmp_op1);
+	  *code = (invert ? EQ : NE);
+	  *op1 = const0_rtx;
+	}
+    }
+  else if (ALL_FIXED_POINT_MODE_P (GET_MODE (cmp_op0)))
+    {
+      *op0 = gen_rtx_REG (CCDSPmode, CCDSP_CC_REGNUM);
+      mips_emit_binary (*code, *op0, cmp_op0, cmp_op1);
+      *code = NE;
+      *op1 = const0_rtx;
+    }
+  else
+    {
+      enum rtx_code cmp_code;
+
+      /* Floating-point tests use a separate C.cond.fmt comparison to
+	 set a condition code register.  The branch or conditional move
+	 will then compare that register against zero.
+
+	 Set CMP_CODE to the code of the comparison instruction and
+	 *CODE to the code that the branch or move should use.  */
+      cmp_code = *code;
+      *code = mips_reversed_fp_cond (&cmp_code) ? EQ : NE;
+      *op0 = (ISA_HAS_8CC
+	      ? gen_reg_rtx (CCmode)
+	      : gen_rtx_REG (CCmode, FPSW_REGNUM));
+      *op1 = const0_rtx;
+      mips_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
+    }
+}
+
+/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
+   and OPERAND[3].  Store the result in OPERANDS[0].
+
+   On 64-bit targets, the mode of the comparison and target will always be
+   SImode, thus possibly narrower than that of the comparison's operands.  */
+
+void
+mips_expand_scc (rtx operands[])
+{
+  rtx target = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
+
+  if (code == EQ || code == NE)
+    {
+      if (ISA_HAS_SEQ_SNE
+	  && reg_imm10_operand (op1, GET_MODE (op1)))
+	mips_emit_binary (code, target, op0, op1);
+      else
+	{
+	  rtx zie = mips_zero_if_equal (op0, op1);
+	  mips_emit_binary (code, target, zie, const0_rtx);
+	}
+    }
+  else
+    mips_emit_int_order_test (code, 0, target, op0, op1);
+}
+
+/* Compare OPERANDS[1] with OPERANDS[2] using comparison code
+   CODE and jump to OPERANDS[3] if the condition holds.  */
+
+void
+mips_expand_conditional_branch (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx condition;
+
+  mips_emit_compare (&code, &op0, &op1, TARGET_MIPS16);
+  condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+  emit_jump_insn (gen_condjump (condition, operands[3]));
+}
+
+/* Implement:
+
+   (set temp (COND:CCV2 CMP_OP0 CMP_OP1))
+   (set DEST (unspec [TRUE_SRC FALSE_SRC temp] UNSPEC_MOVE_TF_PS))  */
+
+void
+mips_expand_vcondv2sf (rtx dest, rtx true_src, rtx false_src,
+		       enum rtx_code cond, rtx cmp_op0, rtx cmp_op1)
+{
+  rtx cmp_result;
+  bool reversed_p;
+
+  reversed_p = mips_reversed_fp_cond (&cond);
+  cmp_result = gen_reg_rtx (CCV2mode);
+  emit_insn (gen_scc_ps (cmp_result,
+			 gen_rtx_fmt_ee (cond, VOIDmode, cmp_op0, cmp_op1)));
+  if (reversed_p)
+    emit_insn (gen_mips_cond_move_tf_ps (dest, false_src, true_src,
+					 cmp_result));
+  else
+    emit_insn (gen_mips_cond_move_tf_ps (dest, true_src, false_src,
+					 cmp_result));
+}
+
+/* Perform the comparison in OPERANDS[1].  Move OPERANDS[2] into OPERANDS[0]
+   if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0].  */
+
+void
+mips_expand_conditional_move (rtx *operands)
+{
+  rtx cond;
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  mips_emit_compare (&code, &op0, &op1, true);
+  cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
+						operands[2], operands[3])));
+}
+
+/* Perform the comparison in COMPARISON, then trap if the condition holds.  */
+
+void
+mips_expand_conditional_trap (rtx comparison)
+{
+  rtx op0, op1;
+  enum machine_mode mode;
+  enum rtx_code code;
+
+  /* MIPS conditional trap instructions don't have GT or LE flavors,
+     so we must swap the operands and convert to LT and GE respectively.  */
+  code = GET_CODE (comparison);
+  switch (code)
+    {
+    case GT:
+    case LE:
+    case GTU:
+    case LEU:
+      code = swap_condition (code);
+      op0 = XEXP (comparison, 1);
+      op1 = XEXP (comparison, 0);
+      break;
+
+    default:
+      op0 = XEXP (comparison, 0);
+      op1 = XEXP (comparison, 1);
+      break;
+    }
+
+  mode = GET_MODE (XEXP (comparison, 0));
+  op0 = force_reg (mode, op0);
+  if (!arith_operand (op1, mode))
+    op1 = force_reg (mode, op1);
+
+  emit_insn (gen_rtx_TRAP_IF (VOIDmode,
+			      gen_rtx_fmt_ee (code, mode, op0, op1),
+			      const0_rtx));
+}
+
+/* Initialize *CUM for a call to a function of type FNTYPE.  */
+
+void
+mips_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype)
+{
+  memset (cum, 0, sizeof (*cum));
+  cum->prototype = (fntype && prototype_p (fntype));
+  cum->gp_reg_found = (cum->prototype && stdarg_p (fntype));
+}
+
+/* Fill INFO with information about a single argument.  CUM is the
+   cumulative state for earlier arguments.  MODE is the mode of this
+   argument and TYPE is its type (if known).  NAMED is true if this
+   is a named (fixed) argument rather than a variable one.  */
+
+static void
+mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
+		   enum machine_mode mode, const_tree type, bool named)
+{
+  bool doubleword_aligned_p;
+  unsigned int num_bytes, num_words, max_regs;
+
+  /* Work out the size of the argument.  */
+  num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* Decide whether it should go in a floating-point register, assuming
+     one is free.  Later code checks for availability.
+
+     The checks against UNITS_PER_FPVALUE handle the soft-float and
+     single-float cases.  */
+  switch (mips_abi)
+    {
+    case ABI_EABI:
+      /* The EABI conventions have traditionally been defined in terms
+	 of TYPE_MODE, regardless of the actual type.  */
+      info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT
+		      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
+      break;
+
+    case ABI_32:
+    case ABI_O64:
+      /* Only leading floating-point scalars are passed in
+	 floating-point registers.  We also handle vector floats the same
+	 say, which is OK because they are not covered by the standard ABI.  */
+      info->fpr_p = (!cum->gp_reg_found
+		     && cum->arg_number < 2
+		     && (type == 0
+			 || SCALAR_FLOAT_TYPE_P (type)
+			 || VECTOR_FLOAT_TYPE_P (type))
+		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
+			 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
+      break;
+
+    case ABI_N32:
+    case ABI_64:
+      /* Scalar, complex and vector floating-point types are passed in
+	 floating-point registers, as long as this is a named rather
+	 than a variable argument.  */
+      info->fpr_p = (named
+		     && (type == 0 || FLOAT_TYPE_P (type))
+		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
+			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+			 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+		     && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE);
+
+      /* ??? According to the ABI documentation, the real and imaginary
+	 parts of complex floats should be passed in individual registers.
+	 The real and imaginary parts of stack arguments are supposed
+	 to be contiguous and there should be an extra word of padding
+	 at the end.
+
+	 This has two problems.  First, it makes it impossible to use a
+	 single "void *" va_list type, since register and stack arguments
+	 are passed differently.  (At the time of writing, MIPSpro cannot
+	 handle complex float varargs correctly.)  Second, it's unclear
+	 what should happen when there is only one register free.
+
+	 For now, we assume that named complex floats should go into FPRs
+	 if there are two FPRs free, otherwise they should be passed in the
+	 same way as a struct containing two floats.  */
+      if (info->fpr_p
+	  && GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+	  && GET_MODE_UNIT_SIZE (mode) < UNITS_PER_FPVALUE)
+	{
+	  if (cum->num_gprs >= MAX_ARGS_IN_REGISTERS - 1)
+	    info->fpr_p = false;
+	  else
+	    num_words = 2;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* See whether the argument has doubleword alignment.  */
+  doubleword_aligned_p = (mips_function_arg_boundary (mode, type)
+			  > BITS_PER_WORD);
+
+  /* Set REG_OFFSET to the register count we're interested in.
+     The EABI allocates the floating-point registers separately,
+     but the other ABIs allocate them like integer registers.  */
+  info->reg_offset = (mips_abi == ABI_EABI && info->fpr_p
+		      ? cum->num_fprs
+		      : cum->num_gprs);
+
+  /* Advance to an even register if the argument is doubleword-aligned.  */
+  if (doubleword_aligned_p)
+    info->reg_offset += info->reg_offset & 1;
+
+  /* Work out the offset of a stack argument.  */
+  info->stack_offset = cum->stack_words;
+  if (doubleword_aligned_p)
+    info->stack_offset += info->stack_offset & 1;
+
+  max_regs = MAX_ARGS_IN_REGISTERS - info->reg_offset;
+
+  /* Partition the argument between registers and stack.  */
+  info->reg_words = MIN (num_words, max_regs);
+  info->stack_words = num_words - info->reg_words;
+}
+
+/* INFO describes a register argument that has the normal format for the
+   argument's mode.  Return the register it uses, assuming that FPRs are
+   available if HARD_FLOAT_P.  */
+
+static unsigned int
+mips_arg_regno (const struct mips_arg_info *info, bool hard_float_p)
+{
+  if (!info->fpr_p || !hard_float_p)
+    return GP_ARG_FIRST + info->reg_offset;
+  else if (mips_abi == ABI_32 && TARGET_DOUBLE_FLOAT && info->reg_offset > 0)
+    /* In o32, the second argument is always passed in $f14
+       for TARGET_DOUBLE_FLOAT, regardless of whether the
+       first argument was a word or doubleword.  */
+    return FP_ARG_FIRST + 2;
+  else
+    return FP_ARG_FIRST + info->reg_offset;
+}
+
+/* Implement TARGET_STRICT_ARGUMENT_NAMING.  */
+
+static bool
+mips_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
+{
+  return !TARGET_OLDABI;
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+mips_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  struct mips_arg_info info;
+
+  /* We will be called with a mode of VOIDmode after the last argument
+     has been seen.  Whatever we return will be passed to the call expander.
+     If we need a MIPS16 fp_code, return a REG with the code stored as
+     the mode.  */
+  if (mode == VOIDmode)
+    {
+      if (TARGET_MIPS16 && cum->fp_code != 0)
+	return gen_rtx_REG ((enum machine_mode) cum->fp_code, 0);
+      else
+	return NULL;
+    }
+
+  mips_get_arg_info (&info, cum, mode, type, named);
+
+  /* Return straight away if the whole argument is passed on the stack.  */
+  if (info.reg_offset == MAX_ARGS_IN_REGISTERS)
+    return NULL;
+
+  /* The n32 and n64 ABIs say that if any 64-bit chunk of the structure
+     contains a double in its entirety, then that 64-bit chunk is passed
+     in a floating-point register.  */
+  if (TARGET_NEWABI
+      && TARGET_HARD_FLOAT
+      && named
+      && type != 0
+      && TREE_CODE (type) == RECORD_TYPE
+      && TYPE_SIZE_UNIT (type)
+      && host_integerp (TYPE_SIZE_UNIT (type), 1))
+    {
+      tree field;
+
+      /* First check to see if there is any such field.  */
+      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	if (TREE_CODE (field) == FIELD_DECL
+	    && SCALAR_FLOAT_TYPE_P (TREE_TYPE (field))
+	    && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD
+	    && host_integerp (bit_position (field), 0)
+	    && int_bit_position (field) % BITS_PER_WORD == 0)
+	  break;
+
+      if (field != 0)
+	{
+	  /* Now handle the special case by returning a PARALLEL
+	     indicating where each 64-bit chunk goes.  INFO.REG_WORDS
+	     chunks are passed in registers.  */
+	  unsigned int i;
+	  HOST_WIDE_INT bitpos;
+	  rtx ret;
+
+	  /* assign_parms checks the mode of ENTRY_PARM, so we must
+	     use the actual mode here.  */
+	  ret = gen_rtx_PARALLEL (mode, rtvec_alloc (info.reg_words));
+
+	  bitpos = 0;
+	  field = TYPE_FIELDS (type);
+	  for (i = 0; i < info.reg_words; i++)
+	    {
+	      rtx reg;
+
+	      for (; field; field = DECL_CHAIN (field))
+		if (TREE_CODE (field) == FIELD_DECL
+		    && int_bit_position (field) >= bitpos)
+		  break;
+
+	      if (field
+		  && int_bit_position (field) == bitpos
+		  && SCALAR_FLOAT_TYPE_P (TREE_TYPE (field))
+		  && TYPE_PRECISION (TREE_TYPE (field)) == BITS_PER_WORD)
+		reg = gen_rtx_REG (DFmode, FP_ARG_FIRST + info.reg_offset + i);
+	      else
+		reg = gen_rtx_REG (DImode, GP_ARG_FIRST + info.reg_offset + i);
+
+	      XVECEXP (ret, 0, i)
+		= gen_rtx_EXPR_LIST (VOIDmode, reg,
+				     GEN_INT (bitpos / BITS_PER_UNIT));
+
+	      bitpos += BITS_PER_WORD;
+	    }
+	  return ret;
+	}
+    }
+
+  /* Handle the n32/n64 conventions for passing complex floating-point
+     arguments in FPR pairs.  The real part goes in the lower register
+     and the imaginary part goes in the upper register.  */
+  if (TARGET_NEWABI
+      && info.fpr_p
+      && GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+    {
+      rtx real, imag;
+      enum machine_mode inner;
+      unsigned int regno;
+
+      inner = GET_MODE_INNER (mode);
+      regno = FP_ARG_FIRST + info.reg_offset;
+      if (info.reg_words * UNITS_PER_WORD == GET_MODE_SIZE (inner))
+	{
+	  /* Real part in registers, imaginary part on stack.  */
+	  gcc_assert (info.stack_words == info.reg_words);
+	  return gen_rtx_REG (inner, regno);
+	}
+      else
+	{
+	  gcc_assert (info.stack_words == 0);
+	  real = gen_rtx_EXPR_LIST (VOIDmode,
+				    gen_rtx_REG (inner, regno),
+				    const0_rtx);
+	  imag = gen_rtx_EXPR_LIST (VOIDmode,
+				    gen_rtx_REG (inner,
+						 regno + info.reg_words / 2),
+				    GEN_INT (GET_MODE_SIZE (inner)));
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, real, imag));
+	}
+    }
+
+  return gen_rtx_REG (mode, mips_arg_regno (&info, TARGET_HARD_FLOAT));
+}
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+mips_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named)
+{
+  struct mips_arg_info info;
+
+  mips_get_arg_info (&info, cum, mode, type, named);
+
+  if (!info.fpr_p)
+    cum->gp_reg_found = true;
+
+  /* See the comment above the CUMULATIVE_ARGS structure in mips.h for
+     an explanation of what this code does.  It assumes that we're using
+     either the o32 or the o64 ABI, both of which pass at most 2 arguments
+     in FPRs.  */
+  if (cum->arg_number < 2 && info.fpr_p)
+    cum->fp_code += (mode == SFmode ? 1 : 2) << (cum->arg_number * 2);
+
+  /* Advance the register count.  This has the effect of setting
+     num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
+     argument required us to skip the final GPR and pass the whole
+     argument on the stack.  */
+  if (mips_abi != ABI_EABI || !info.fpr_p)
+    cum->num_gprs = info.reg_offset + info.reg_words;
+  else if (info.reg_words > 0)
+    cum->num_fprs += MAX_FPRS_PER_FMT;
+
+  /* Advance the stack word count.  */
+  if (info.stack_words > 0)
+    cum->stack_words = info.stack_offset + info.stack_words;
+
+  cum->arg_number++;
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.  */
+
+static int
+mips_arg_partial_bytes (CUMULATIVE_ARGS *cum,
+			enum machine_mode mode, tree type, bool named)
+{
+  struct mips_arg_info info;
+
+  mips_get_arg_info (&info, cum, mode, type, named);
+  return info.stack_words > 0 ? info.reg_words * UNITS_PER_WORD : 0;
+}
+
+/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Every parameter gets at
+   least PARM_BOUNDARY bits of alignment, but will be given anything up
+   to STACK_BOUNDARY bits if the type requires it.  */
+
+static unsigned int
+mips_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+/* Return true if FUNCTION_ARG_PADDING (MODE, TYPE) should return
+   upward rather than downward.  In other words, return true if the
+   first byte of the stack slot has useful data, false if the last
+   byte does.  */
+
+bool
+mips_pad_arg_upward (enum machine_mode mode, const_tree type)
+{
+  /* On little-endian targets, the first byte of every stack argument
+     is passed in the first byte of the stack slot.  */
+  if (!BYTES_BIG_ENDIAN)
+    return true;
+
+  /* Otherwise, integral types are padded downward: the last byte of a
+     stack argument is passed in the last byte of the stack slot.  */
+  if (type != 0
+      ? (INTEGRAL_TYPE_P (type)
+	 || POINTER_TYPE_P (type)
+	 || FIXED_POINT_TYPE_P (type))
+      : (SCALAR_INT_MODE_P (mode)
+	 || ALL_SCALAR_FIXED_POINT_MODE_P (mode)))
+    return false;
+
+  /* Big-endian o64 pads floating-point arguments downward.  */
+  if (mips_abi == ABI_O64)
+    if (type != 0 ? FLOAT_TYPE_P (type) : GET_MODE_CLASS (mode) == MODE_FLOAT)
+      return false;
+
+  /* Other types are padded upward for o32, o64, n32 and n64.  */
+  if (mips_abi != ABI_EABI)
+    return true;
+
+  /* Arguments smaller than a stack slot are padded downward.  */
+  if (mode != BLKmode)
+    return GET_MODE_BITSIZE (mode) >= PARM_BOUNDARY;
+  else
+    return int_size_in_bytes (type) >= (PARM_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Likewise BLOCK_REG_PADDING (MODE, TYPE, ...).  Return !BYTES_BIG_ENDIAN
+   if the least significant byte of the register has useful data.  Return
+   the opposite if the most significant byte does.  */
+
+bool
+mips_pad_reg_upward (enum machine_mode mode, tree type)
+{
+  /* No shifting is required for floating-point arguments.  */
+  if (type != 0 ? FLOAT_TYPE_P (type) : GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return !BYTES_BIG_ENDIAN;
+
+  /* Otherwise, apply the same padding to register arguments as we do
+     to stack arguments.  */
+  return mips_pad_arg_upward (mode, type);
+}
+
+/* Return nonzero when an argument must be passed by reference.  */
+
+static bool
+mips_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  if (mips_abi == ABI_EABI)
+    {
+      int size;
+
+      /* ??? How should SCmode be handled?  */
+      if (mode == DImode || mode == DFmode
+	  || mode == DQmode || mode == UDQmode
+	  || mode == DAmode || mode == UDAmode)
+	return 0;
+
+      size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+      return size == -1 || size > UNITS_PER_WORD;
+    }
+  else
+    {
+      /* If we have a variable-sized parameter, we have no choice.  */
+      return targetm.calls.must_pass_in_stack (mode, type);
+    }
+}
+
+/* Implement TARGET_CALLEE_COPIES.  */
+
+static bool
+mips_callee_copies (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    const_tree type ATTRIBUTE_UNUSED, bool named)
+{
+  return mips_abi == ABI_EABI && named;
+}
+
+/* See whether VALTYPE is a record whose fields should be returned in
+   floating-point registers.  If so, return the number of fields and
+   list them in FIELDS (which should have two elements).  Return 0
+   otherwise.
+
+   For n32 & n64, a structure with one or two fields is returned in
+   floating-point registers as long as every field has a floating-point
+   type.  */
+
+static int
+mips_fpr_return_fields (const_tree valtype, tree *fields)
+{
+  tree field;
+  int i;
+
+  if (!TARGET_NEWABI)
+    return 0;
+
+  if (TREE_CODE (valtype) != RECORD_TYPE)
+    return 0;
+
+  i = 0;
+  for (field = TYPE_FIELDS (valtype); field != 0; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+
+      if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (field)))
+	return 0;
+
+      if (i == 2)
+	return 0;
+
+      fields[i++] = field;
+    }
+  return i;
+}
+
+/* Implement TARGET_RETURN_IN_MSB.  For n32 & n64, we should return
+   a value in the most significant part of $2/$3 if:
+
+      - the target is big-endian;
+
+      - the value has a structure or union type (we generalize this to
+	cover aggregates from other languages too); and
+
+      - the structure is not returned in floating-point registers.  */
+
+static bool
+mips_return_in_msb (const_tree valtype)
+{
+  tree fields[2];
+
+  return (TARGET_NEWABI
+	  && TARGET_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && mips_fpr_return_fields (valtype, fields) == 0);
+}
+
+/* Return true if the function return value MODE will get returned in a
+   floating-point register.  */
+
+static bool
+mips_return_mode_in_fpr_p (enum machine_mode mode)
+{
+  return ((GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	  && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE);
+}
+
+/* Return the representation of an FPR return register when the
+   value being returned in FP_RETURN has mode VALUE_MODE and the
+   return type itself has mode TYPE_MODE.  On NewABI targets,
+   the two modes may be different for structures like:
+
+       struct __attribute__((packed)) foo { float f; }
+
+   where we return the SFmode value of "f" in FP_RETURN, but where
+   the structure itself has mode BLKmode.  */
+
+static rtx
+mips_return_fpr_single (enum machine_mode type_mode,
+			enum machine_mode value_mode)
+{
+  rtx x;
+
+  x = gen_rtx_REG (value_mode, FP_RETURN);
+  if (type_mode != value_mode)
+    {
+      x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
+      x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
+    }
+  return x;
+}
+
+/* Return a composite value in a pair of floating-point registers.
+   MODE1 and OFFSET1 are the mode and byte offset for the first value,
+   likewise MODE2 and OFFSET2 for the second.  MODE is the mode of the
+   complete value.
+
+   For n32 & n64, $f0 always holds the first value and $f2 the second.
+   Otherwise the values are packed together as closely as possible.  */
+
+static rtx
+mips_return_fpr_pair (enum machine_mode mode,
+		      enum machine_mode mode1, HOST_WIDE_INT offset1,
+		      enum machine_mode mode2, HOST_WIDE_INT offset2)
+{
+  int inc;
+
+  inc = (TARGET_NEWABI ? 2 : MAX_FPRS_PER_FMT);
+  return gen_rtx_PARALLEL
+    (mode,
+     gen_rtvec (2,
+		gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode1, FP_RETURN),
+				   GEN_INT (offset1)),
+		gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode2, FP_RETURN + inc),
+				   GEN_INT (offset2))));
+
+}
+
+/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
+   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
+   VALTYPE is null and MODE is the mode of the return value.  */
+
+rtx
+mips_function_value (const_tree valtype, const_tree func, enum machine_mode mode)
+{
+  if (valtype)
+    {
+      tree fields[2];
+      int unsigned_p;
+
+      mode = TYPE_MODE (valtype);
+      unsigned_p = TYPE_UNSIGNED (valtype);
+
+      /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+	 return values, promote the mode here too.  */
+      mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1);
+
+      /* Handle structures whose fields are returned in $f0/$f2.  */
+      switch (mips_fpr_return_fields (valtype, fields))
+	{
+	case 1:
+	  return mips_return_fpr_single (mode,
+					 TYPE_MODE (TREE_TYPE (fields[0])));
+
+	case 2:
+	  return mips_return_fpr_pair (mode,
+				       TYPE_MODE (TREE_TYPE (fields[0])),
+				       int_byte_position (fields[0]),
+				       TYPE_MODE (TREE_TYPE (fields[1])),
+				       int_byte_position (fields[1]));
+	}
+
+      /* If a value is passed in the most significant part of a register, see
+	 whether we have to round the mode up to a whole number of words.  */
+      if (mips_return_in_msb (valtype))
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (valtype);
+	  if (size % UNITS_PER_WORD != 0)
+	    {
+	      size += UNITS_PER_WORD - size % UNITS_PER_WORD;
+	      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+	    }
+	}
+
+      /* For EABI, the class of return register depends entirely on MODE.
+	 For example, "struct { some_type x; }" and "union { some_type x; }"
+	 are returned in the same way as a bare "some_type" would be.
+	 Other ABIs only use FPRs for scalar, complex or vector types.  */
+      if (mips_abi != ABI_EABI && !FLOAT_TYPE_P (valtype))
+	return gen_rtx_REG (mode, GP_RETURN);
+    }
+
+  if (!TARGET_MIPS16)
+    {
+      /* Handle long doubles for n32 & n64.  */
+      if (mode == TFmode)
+	return mips_return_fpr_pair (mode,
+				     DImode, 0,
+				     DImode, GET_MODE_SIZE (mode) / 2);
+
+      if (mips_return_mode_in_fpr_p (mode))
+	{
+	  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	    return mips_return_fpr_pair (mode,
+					 GET_MODE_INNER (mode), 0,
+					 GET_MODE_INNER (mode),
+					 GET_MODE_SIZE (mode) / 2);
+	  else
+	    return gen_rtx_REG (mode, FP_RETURN);
+	}
+    }
+
+  return gen_rtx_REG (mode, GP_RETURN);
+}
+
+/* Implement TARGET_RETURN_IN_MEMORY.  Under the o32 and o64 ABIs,
+   all BLKmode objects are returned in memory.  Under the n32, n64
+   and embedded ABIs, small structures are returned in a register.
+   Objects with varying size must still be returned in memory, of
+   course.  */
+
+static bool
+mips_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  return (TARGET_OLDABI
+	  ? TYPE_MODE (type) == BLKmode
+	  : !IN_RANGE (int_size_in_bytes (type), 0, 2 * UNITS_PER_WORD));
+}
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+mips_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
+			     int no_rtl)
+{
+  CUMULATIVE_ARGS local_cum;
+  int gp_saved, fp_saved;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argument.  Advance a local copy of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  local_cum = *cum;
+  mips_function_arg_advance (&local_cum, mode, type, true);
+
+  /* Found out how many registers we need to save.  */
+  gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
+  fp_saved = (EABI_FLOAT_VARARGS_P
+	      ? MAX_ARGS_IN_REGISTERS - local_cum.num_fprs
+	      : 0);
+
+  if (!no_rtl)
+    {
+      if (gp_saved > 0)
+	{
+	  rtx ptr, mem;
+
+	  ptr = plus_constant (virtual_incoming_args_rtx,
+			       REG_PARM_STACK_SPACE (cfun->decl)
+			       - gp_saved * UNITS_PER_WORD);
+	  mem = gen_frame_mem (BLKmode, ptr);
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+
+	  move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
+			       mem, gp_saved);
+	}
+      if (fp_saved > 0)
+	{
+	  /* We can't use move_block_from_reg, because it will use
+	     the wrong mode.  */
+	  enum machine_mode mode;
+	  int off, i;
+
+	  /* Set OFF to the offset from virtual_incoming_args_rtx of
+	     the first float register.  The FP save area lies below
+	     the integer one, and is aligned to UNITS_PER_FPVALUE bytes.  */
+	  off = (-gp_saved * UNITS_PER_WORD) & -UNITS_PER_FPVALUE;
+	  off -= fp_saved * UNITS_PER_FPREG;
+
+	  mode = TARGET_SINGLE_FLOAT ? SFmode : DFmode;
+
+	  for (i = local_cum.num_fprs; i < MAX_ARGS_IN_REGISTERS;
+	       i += MAX_FPRS_PER_FMT)
+	    {
+	      rtx ptr, mem;
+
+	      ptr = plus_constant (virtual_incoming_args_rtx, off);
+	      mem = gen_frame_mem (mode, ptr);
+	      set_mem_alias_set (mem, get_varargs_alias_set ());
+	      mips_emit_move (mem, gen_rtx_REG (mode, FP_ARG_FIRST + i));
+	      off += UNITS_PER_HWFPVALUE;
+	    }
+	}
+    }
+  if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
+    cfun->machine->varargs_size = (gp_saved * UNITS_PER_WORD
+				   + fp_saved * UNITS_PER_FPREG);
+}
+
+/* Implement TARGET_BUILTIN_VA_LIST.  */
+
+static tree
+mips_build_builtin_va_list (void)
+{
+  if (EABI_FLOAT_VARARGS_P)
+    {
+      /* We keep 3 pointers, and two offsets.
+
+	 Two pointers are to the overflow area, which starts at the CFA.
+	 One of these is constant, for addressing into the GPR save area
+	 below it.  The other is advanced up the stack through the
+	 overflow region.
+
+	 The third pointer is to the bottom of the GPR save area.
+	 Since the FPR save area is just below it, we can address
+	 FPR slots off this pointer.
+
+	 We also keep two one-byte offsets, which are to be subtracted
+	 from the constant pointers to yield addresses in the GPR and
+	 FPR save areas.  These are downcounted as float or non-float
+	 arguments are used, and when they get to zero, the argument
+	 must be obtained from the overflow region.  */
+      tree f_ovfl, f_gtop, f_ftop, f_goff, f_foff, f_res, record;
+      tree array, index;
+
+      record = lang_hooks.types.make_type (RECORD_TYPE);
+
+      f_ovfl = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__overflow_argptr"),
+			   ptr_type_node);
+      f_gtop = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__gpr_top"),
+			   ptr_type_node);
+      f_ftop = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__fpr_top"),
+			   ptr_type_node);
+      f_goff = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__gpr_offset"),
+			   unsigned_char_type_node);
+      f_foff = build_decl (BUILTINS_LOCATION,
+			   FIELD_DECL, get_identifier ("__fpr_offset"),
+			   unsigned_char_type_node);
+      /* Explicitly pad to the size of a pointer, so that -Wpadded won't
+	 warn on every user file.  */
+      index = build_int_cst (NULL_TREE, GET_MODE_SIZE (ptr_mode) - 2 - 1);
+      array = build_array_type (unsigned_char_type_node,
+			        build_index_type (index));
+      f_res = build_decl (BUILTINS_LOCATION,
+			  FIELD_DECL, get_identifier ("__reserved"), array);
+
+      DECL_FIELD_CONTEXT (f_ovfl) = record;
+      DECL_FIELD_CONTEXT (f_gtop) = record;
+      DECL_FIELD_CONTEXT (f_ftop) = record;
+      DECL_FIELD_CONTEXT (f_goff) = record;
+      DECL_FIELD_CONTEXT (f_foff) = record;
+      DECL_FIELD_CONTEXT (f_res) = record;
+
+      TYPE_FIELDS (record) = f_ovfl;
+      DECL_CHAIN (f_ovfl) = f_gtop;
+      DECL_CHAIN (f_gtop) = f_ftop;
+      DECL_CHAIN (f_ftop) = f_goff;
+      DECL_CHAIN (f_goff) = f_foff;
+      DECL_CHAIN (f_foff) = f_res;
+
+      layout_type (record);
+      return record;
+    }
+  else if (TARGET_IRIX6)
+    /* On IRIX 6, this type is 'char *'.  */
+    return build_pointer_type (char_type_node);
+  else
+    /* Otherwise, we use 'void *'.  */
+    return ptr_type_node;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+
+static void
+mips_va_start (tree valist, rtx nextarg)
+{
+  if (EABI_FLOAT_VARARGS_P)
+    {
+      const CUMULATIVE_ARGS *cum;
+      tree f_ovfl, f_gtop, f_ftop, f_goff, f_foff;
+      tree ovfl, gtop, ftop, goff, foff;
+      tree t;
+      int gpr_save_area_size;
+      int fpr_save_area_size;
+      int fpr_offset;
+
+      cum = &crtl->args.info;
+      gpr_save_area_size
+	= (MAX_ARGS_IN_REGISTERS - cum->num_gprs) * UNITS_PER_WORD;
+      fpr_save_area_size
+	= (MAX_ARGS_IN_REGISTERS - cum->num_fprs) * UNITS_PER_FPREG;
+
+      f_ovfl = TYPE_FIELDS (va_list_type_node);
+      f_gtop = DECL_CHAIN (f_ovfl);
+      f_ftop = DECL_CHAIN (f_gtop);
+      f_goff = DECL_CHAIN (f_ftop);
+      f_foff = DECL_CHAIN (f_goff);
+
+      ovfl = build3 (COMPONENT_REF, TREE_TYPE (f_ovfl), valist, f_ovfl,
+		     NULL_TREE);
+      gtop = build3 (COMPONENT_REF, TREE_TYPE (f_gtop), valist, f_gtop,
+		     NULL_TREE);
+      ftop = build3 (COMPONENT_REF, TREE_TYPE (f_ftop), valist, f_ftop,
+		     NULL_TREE);
+      goff = build3 (COMPONENT_REF, TREE_TYPE (f_goff), valist, f_goff,
+		     NULL_TREE);
+      foff = build3 (COMPONENT_REF, TREE_TYPE (f_foff), valist, f_foff,
+		     NULL_TREE);
+
+      /* Emit code to initialize OVFL, which points to the next varargs
+	 stack argument.  CUM->STACK_WORDS gives the number of stack
+	 words used by named arguments.  */
+      t = make_tree (TREE_TYPE (ovfl), virtual_incoming_args_rtx);
+      if (cum->stack_words > 0)
+	t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovfl), t,
+		    size_int (cum->stack_words * UNITS_PER_WORD));
+      t = build2 (MODIFY_EXPR, TREE_TYPE (ovfl), ovfl, t);
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Emit code to initialize GTOP, the top of the GPR save area.  */
+      t = make_tree (TREE_TYPE (gtop), virtual_incoming_args_rtx);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (gtop), gtop, t);
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Emit code to initialize FTOP, the top of the FPR save area.
+	 This address is gpr_save_area_bytes below GTOP, rounded
+	 down to the next fp-aligned boundary.  */
+      t = make_tree (TREE_TYPE (ftop), virtual_incoming_args_rtx);
+      fpr_offset = gpr_save_area_size + UNITS_PER_FPVALUE - 1;
+      fpr_offset &= -UNITS_PER_FPVALUE;
+      if (fpr_offset)
+	t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ftop), t,
+		    size_int (-fpr_offset));
+      t = build2 (MODIFY_EXPR, TREE_TYPE (ftop), ftop, t);
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Emit code to initialize GOFF, the offset from GTOP of the
+	 next GPR argument.  */
+      t = build2 (MODIFY_EXPR, TREE_TYPE (goff), goff,
+		  build_int_cst (TREE_TYPE (goff), gpr_save_area_size));
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* Likewise emit code to initialize FOFF, the offset from FTOP
+	 of the next FPR argument.  */
+      t = build2 (MODIFY_EXPR, TREE_TYPE (foff), foff,
+		  build_int_cst (TREE_TYPE (foff), fpr_save_area_size));
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+  else
+    {
+      nextarg = plus_constant (nextarg, -cfun->machine->varargs_size);
+      std_expand_builtin_va_start (valist, nextarg);
+    }
+}
+
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+
+static tree
+mips_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			   gimple_seq *post_p)
+{
+  tree addr;
+  bool indirect_p;
+
+  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
+  if (indirect_p)
+    type = build_pointer_type (type);
+
+  if (!EABI_FLOAT_VARARGS_P)
+    addr = std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+  else
+    {
+      tree f_ovfl, f_gtop, f_ftop, f_goff, f_foff;
+      tree ovfl, top, off, align;
+      HOST_WIDE_INT size, rsize, osize;
+      tree t, u;
+
+      f_ovfl = TYPE_FIELDS (va_list_type_node);
+      f_gtop = DECL_CHAIN (f_ovfl);
+      f_ftop = DECL_CHAIN (f_gtop);
+      f_goff = DECL_CHAIN (f_ftop);
+      f_foff = DECL_CHAIN (f_goff);
+
+      /* Let:
+
+	 TOP be the top of the GPR or FPR save area;
+	 OFF be the offset from TOP of the next register;
+	 ADDR_RTX be the address of the argument;
+	 SIZE be the number of bytes in the argument type;
+	 RSIZE be the number of bytes used to store the argument
+	   when it's in the register save area; and
+	 OSIZE be the number of bytes used to store it when it's
+	   in the stack overflow area.
+
+	 The code we want is:
+
+	 1: off &= -rsize;	  // round down
+	 2: if (off != 0)
+	 3:   {
+	 4:	addr_rtx = top - off + (BYTES_BIG_ENDIAN ? RSIZE - SIZE : 0);
+	 5:	off -= rsize;
+	 6:   }
+	 7: else
+	 8:   {
+	 9:	ovfl = ((intptr_t) ovfl + osize - 1) & -osize;
+	 10:	addr_rtx = ovfl + (BYTES_BIG_ENDIAN ? OSIZE - SIZE : 0);
+	 11:	ovfl += osize;
+	 14:  }
+
+	 [1] and [9] can sometimes be optimized away.  */
+
+      ovfl = build3 (COMPONENT_REF, TREE_TYPE (f_ovfl), valist, f_ovfl,
+		     NULL_TREE);
+      size = int_size_in_bytes (type);
+
+      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT
+	  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FPVALUE)
+	{
+	  top = build3 (COMPONENT_REF, TREE_TYPE (f_ftop),
+			unshare_expr (valist), f_ftop, NULL_TREE);
+	  off = build3 (COMPONENT_REF, TREE_TYPE (f_foff),
+			unshare_expr (valist), f_foff, NULL_TREE);
+
+	  /* When va_start saves FPR arguments to the stack, each slot
+	     takes up UNITS_PER_HWFPVALUE bytes, regardless of the
+	     argument's precision.  */
+	  rsize = UNITS_PER_HWFPVALUE;
+
+	  /* Overflow arguments are padded to UNITS_PER_WORD bytes
+	     (= PARM_BOUNDARY bits).  This can be different from RSIZE
+	     in two cases:
+
+	     (1) On 32-bit targets when TYPE is a structure such as:
+
+	     struct s { float f; };
+
+	     Such structures are passed in paired FPRs, so RSIZE
+	     will be 8 bytes.  However, the structure only takes
+	     up 4 bytes of memory, so OSIZE will only be 4.
+
+	     (2) In combinations such as -mgp64 -msingle-float
+	     -fshort-double.  Doubles passed in registers will then take
+	     up 4 (UNITS_PER_HWFPVALUE) bytes, but those passed on the
+	     stack take up UNITS_PER_WORD bytes.  */
+	  osize = MAX (GET_MODE_SIZE (TYPE_MODE (type)), UNITS_PER_WORD);
+	}
+      else
+	{
+	  top = build3 (COMPONENT_REF, TREE_TYPE (f_gtop),
+			unshare_expr (valist), f_gtop, NULL_TREE);
+	  off = build3 (COMPONENT_REF, TREE_TYPE (f_goff),
+			unshare_expr (valist), f_goff, NULL_TREE);
+	  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+	  if (rsize > UNITS_PER_WORD)
+	    {
+	      /* [1] Emit code for: off &= -rsize.	*/
+	      t = build2 (BIT_AND_EXPR, TREE_TYPE (off), unshare_expr (off),
+			  build_int_cst (TREE_TYPE (off), -rsize));
+	      gimplify_assign (unshare_expr (off), t, pre_p);
+	    }
+	  osize = rsize;
+	}
+
+      /* [2] Emit code to branch if off == 0.  */
+      t = build2 (NE_EXPR, boolean_type_node, off,
+		  build_int_cst (TREE_TYPE (off), 0));
+      addr = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
+
+      /* [5] Emit code for: off -= rsize.  We do this as a form of
+	 post-decrement not available to C.  */
+      t = fold_convert (TREE_TYPE (off), build_int_cst (NULL_TREE, rsize));
+      t = build2 (POSTDECREMENT_EXPR, TREE_TYPE (off), off, t);
+
+      /* [4] Emit code for:
+	 addr_rtx = top - off + (BYTES_BIG_ENDIAN ? RSIZE - SIZE : 0).  */
+      t = fold_convert (sizetype, t);
+      t = fold_build1 (NEGATE_EXPR, sizetype, t);
+      t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (top), top, t);
+      if (BYTES_BIG_ENDIAN && rsize > size)
+	{
+	  u = size_int (rsize - size);
+	  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, u);
+	}
+      COND_EXPR_THEN (addr) = t;
+
+      if (osize > UNITS_PER_WORD)
+	{
+	  /* [9] Emit: ovfl = ((intptr_t) ovfl + osize - 1) & -osize.  */
+	  u = size_int (osize - 1);
+	  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovfl),
+		      unshare_expr (ovfl), u);
+	  t = fold_convert (sizetype, t);
+	  u = size_int (-osize);
+	  t = build2 (BIT_AND_EXPR, sizetype, t, u);
+	  t = fold_convert (TREE_TYPE (ovfl), t);
+	  align = build2 (MODIFY_EXPR, TREE_TYPE (ovfl),
+			  unshare_expr (ovfl), t);
+	}
+      else
+	align = NULL;
+
+      /* [10, 11] Emit code for:
+	 addr_rtx = ovfl + (BYTES_BIG_ENDIAN ? OSIZE - SIZE : 0)
+	 ovfl += osize.  */
+      u = fold_convert (TREE_TYPE (ovfl), build_int_cst (NULL_TREE, osize));
+      t = build2 (POSTINCREMENT_EXPR, TREE_TYPE (ovfl), ovfl, u);
+      if (BYTES_BIG_ENDIAN && osize > size)
+	{
+	  u = size_int (osize - size);
+	  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, u);
+	}
+
+      /* String [9] and [10, 11] together.  */
+      if (align)
+	t = build2 (COMPOUND_EXPR, TREE_TYPE (t), align, t);
+      COND_EXPR_ELSE (addr) = t;
+
+      addr = fold_convert (build_pointer_type (type), addr);
+      addr = build_va_arg_indirect_ref (addr);
+    }
+
+  if (indirect_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return addr;
+}
+
+/* Start a definition of function NAME.  MIPS16_P indicates whether the
+   function contains MIPS16 code.  */
+
+static void
+mips_start_function_definition (const char *name, bool mips16_p)
+{
+  if (mips16_p)
+    fprintf (asm_out_file, "\t.set\tmips16\n");
+  else
+    fprintf (asm_out_file, "\t.set\tnomips16\n");
+
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent\t", asm_out_file);
+      assemble_name (asm_out_file, name);
+      fputs ("\n", asm_out_file);
+    }
+
+  ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function");
+
+  /* Start the definition proper.  */
+  assemble_name (asm_out_file, name);
+  fputs (":\n", asm_out_file);
+}
+
+/* End a function definition started by mips_start_function_definition.  */
+
+static void
+mips_end_function_definition (const char *name)
+{
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.end\t", asm_out_file);
+      assemble_name (asm_out_file, name);
+      fputs ("\n", asm_out_file);
+    }
+}
+
+/* Return true if calls to X can use R_MIPS_CALL* relocations.  */
+
+static bool
+mips_ok_for_lazy_binding_p (rtx x)
+{
+  return (TARGET_USE_GOT
+	  && GET_CODE (x) == SYMBOL_REF
+	  && !SYMBOL_REF_BIND_NOW_P (x)
+	  && !mips_symbol_binds_local_p (x));
+}
+
+/* Load function address ADDR into register DEST.  TYPE is as for
+   mips_expand_call.  Return true if we used an explicit lazy-binding
+   sequence.  */
+
+static bool
+mips_load_call_address (enum mips_call_type type, rtx dest, rtx addr)
+{
+  /* If we're generating PIC, and this call is to a global function,
+     try to allow its address to be resolved lazily.  This isn't
+     possible for sibcalls when $gp is call-saved because the value
+     of $gp on entry to the stub would be our caller's gp, not ours.  */
+  if (TARGET_EXPLICIT_RELOCS
+      && !(type == MIPS_CALL_SIBCALL && TARGET_CALL_SAVED_GP)
+      && mips_ok_for_lazy_binding_p (addr))
+    {
+      addr = mips_got_load (dest, addr, SYMBOL_GOTOFF_CALL);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, addr));
+      return true;
+    }
+  else
+    {
+      mips_emit_move (dest, addr);
+      return false;
+    }
+}
+
+/* Each locally-defined hard-float MIPS16 function has a local symbol
+   associated with it.  This hash table maps the function symbol (FUNC)
+   to the local symbol (LOCAL). */
+struct GTY(()) mips16_local_alias {
+  rtx func;
+  rtx local;
+};
+static GTY ((param_is (struct mips16_local_alias))) htab_t mips16_local_aliases;
+
+/* Hash table callbacks for mips16_local_aliases.  */
+
+static hashval_t
+mips16_local_aliases_hash (const void *entry)
+{
+  const struct mips16_local_alias *alias;
+
+  alias = (const struct mips16_local_alias *) entry;
+  return htab_hash_string (XSTR (alias->func, 0));
+}
+
+static int
+mips16_local_aliases_eq (const void *entry1, const void *entry2)
+{
+  const struct mips16_local_alias *alias1, *alias2;
+
+  alias1 = (const struct mips16_local_alias *) entry1;
+  alias2 = (const struct mips16_local_alias *) entry2;
+  return rtx_equal_p (alias1->func, alias2->func);
+}
+
+/* FUNC is the symbol for a locally-defined hard-float MIPS16 function.
+   Return a local alias for it, creating a new one if necessary.  */
+
+static rtx
+mips16_local_alias (rtx func)
+{
+  struct mips16_local_alias *alias, tmp_alias;
+  void **slot;
+
+  /* Create the hash table if this is the first call.  */
+  if (mips16_local_aliases == NULL)
+    mips16_local_aliases = htab_create_ggc (37, mips16_local_aliases_hash,
+					    mips16_local_aliases_eq, NULL);
+
+  /* Look up the function symbol, creating a new entry if need be.  */
+  tmp_alias.func = func;
+  slot = htab_find_slot (mips16_local_aliases, &tmp_alias, INSERT);
+  gcc_assert (slot != NULL);
+
+  alias = (struct mips16_local_alias *) *slot;
+  if (alias == NULL)
+    {
+      const char *func_name, *local_name;
+      rtx local;
+
+      /* Create a new SYMBOL_REF for the local symbol.  The choice of
+	 __fn_local_* is based on the __fn_stub_* names that we've
+	 traditionally used for the non-MIPS16 stub.  */
+      func_name = targetm.strip_name_encoding (XSTR (func, 0));
+      local_name = ACONCAT (("__fn_local_", func_name, NULL));
+      local = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (local_name));
+      SYMBOL_REF_FLAGS (local) = SYMBOL_REF_FLAGS (func) | SYMBOL_FLAG_LOCAL;
+
+      /* Create a new structure to represent the mapping.  */
+      alias = ggc_alloc_mips16_local_alias ();
+      alias->func = func;
+      alias->local = local;
+      *slot = alias;
+    }
+  return alias->local;
+}
+
+/* A chained list of functions for which mips16_build_call_stub has already
+   generated a stub.  NAME is the name of the function and FP_RET_P is true
+   if the function returns a value in floating-point registers.  */
+struct mips16_stub {
+  struct mips16_stub *next;
+  char *name;
+  bool fp_ret_p;
+};
+static struct mips16_stub *mips16_stubs;
+
+/* Return a SYMBOL_REF for a MIPS16 function called NAME.  */
+
+static rtx
+mips16_stub_function (const char *name)
+{
+  rtx x;
+
+  x = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+  SYMBOL_REF_FLAGS (x) |= (SYMBOL_FLAG_EXTERNAL | SYMBOL_FLAG_FUNCTION);
+  return x;
+}
+
+/* Return the two-character string that identifies floating-point
+   return mode MODE in the name of a MIPS16 function stub.  */
+
+static const char *
+mips16_call_stub_mode_suffix (enum machine_mode mode)
+{
+  if (mode == SFmode)
+    return "sf";
+  else if (mode == DFmode)
+    return "df";
+  else if (mode == SCmode)
+    return "sc";
+  else if (mode == DCmode)
+    return "dc";
+  else if (mode == V2SFmode)
+    return "df";
+  else
+    gcc_unreachable ();
+}
+
+/* Write instructions to move a 32-bit value between general register
+   GPREG and floating-point register FPREG.  DIRECTION is 't' to move
+   from GPREG to FPREG and 'f' to move in the opposite direction.  */
+
+static void
+mips_output_32bit_xfer (char direction, unsigned int gpreg, unsigned int fpreg)
+{
+  fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+	   reg_names[gpreg], reg_names[fpreg]);
+}
+
+/* Likewise for 64-bit values.  */
+
+static void
+mips_output_64bit_xfer (char direction, unsigned int gpreg, unsigned int fpreg)
+{
+  if (TARGET_64BIT)
+    fprintf (asm_out_file, "\tdm%cc1\t%s,%s\n", direction,
+ 	     reg_names[gpreg], reg_names[fpreg]);
+  else if (TARGET_FLOAT64)
+    {
+      fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+ 	       reg_names[gpreg + TARGET_BIG_ENDIAN], reg_names[fpreg]);
+      fprintf (asm_out_file, "\tm%chc1\t%s,%s\n", direction,
+ 	       reg_names[gpreg + TARGET_LITTLE_ENDIAN], reg_names[fpreg]);
+    }
+  else
+    {
+      /* Move the least-significant word.  */
+      fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+	       reg_names[gpreg + TARGET_BIG_ENDIAN], reg_names[fpreg]);
+      /* ...then the most significant word.  */
+      fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
+	       reg_names[gpreg + TARGET_LITTLE_ENDIAN], reg_names[fpreg + 1]);
+    }
+}
+
+/* Write out code to move floating-point arguments into or out of
+   general registers.  FP_CODE is the code describing which arguments
+   are present (see the comment above the definition of CUMULATIVE_ARGS
+   in mips.h).  DIRECTION is as for mips_output_32bit_xfer.  */
+
+static void
+mips_output_args_xfer (int fp_code, char direction)
+{
+  unsigned int gparg, fparg, f;
+  CUMULATIVE_ARGS cum;
+
+  /* This code only works for o32 and o64.  */
+  gcc_assert (TARGET_OLDABI);
+
+  mips_init_cumulative_args (&cum, NULL);
+
+  for (f = (unsigned int) fp_code; f != 0; f >>= 2)
+    {
+      enum machine_mode mode;
+      struct mips_arg_info info;
+
+      if ((f & 3) == 1)
+	mode = SFmode;
+      else if ((f & 3) == 2)
+	mode = DFmode;
+      else
+	gcc_unreachable ();
+
+      mips_get_arg_info (&info, &cum, mode, NULL, true);
+      gparg = mips_arg_regno (&info, false);
+      fparg = mips_arg_regno (&info, true);
+
+      if (mode == SFmode)
+	mips_output_32bit_xfer (direction, gparg, fparg);
+      else
+	mips_output_64bit_xfer (direction, gparg, fparg);
+
+      mips_function_arg_advance (&cum, mode, NULL, true);
+    }
+}
+
+/* Write a MIPS16 stub for the current function.  This stub is used
+   for functions which take arguments in the floating-point registers.
+   It is normal-mode code that moves the floating-point arguments
+   into the general registers and then jumps to the MIPS16 code.  */
+
+static void
+mips16_build_function_stub (void)
+{
+  const char *fnname, *alias_name, *separator;
+  char *secname, *stubname;
+  tree stubdecl;
+  unsigned int f;
+  rtx symbol, alias;
+
+  /* Create the name of the stub, and its unique section.  */
+  symbol = XEXP (DECL_RTL (current_function_decl), 0);
+  alias = mips16_local_alias (symbol);
+
+  fnname = targetm.strip_name_encoding (XSTR (symbol, 0));
+  alias_name = targetm.strip_name_encoding (XSTR (alias, 0));
+  secname = ACONCAT ((".mips16.fn.", fnname, NULL));
+  stubname = ACONCAT (("__fn_stub_", fnname, NULL));
+
+  /* Build a decl for the stub.  */
+  stubdecl = build_decl (BUILTINS_LOCATION,
+			 FUNCTION_DECL, get_identifier (stubname),
+			 build_function_type (void_type_node, NULL_TREE));
+  DECL_SECTION_NAME (stubdecl) = build_string (strlen (secname), secname);
+  DECL_RESULT (stubdecl) = build_decl (BUILTINS_LOCATION,
+				       RESULT_DECL, NULL_TREE, void_type_node);
+
+  /* Output a comment.  */
+  fprintf (asm_out_file, "\t# Stub function for %s (",
+	   current_function_name ());
+  separator = "";
+  for (f = (unsigned int) crtl->args.info.fp_code; f != 0; f >>= 2)
+    {
+      fprintf (asm_out_file, "%s%s", separator,
+	       (f & 3) == 1 ? "float" : "double");
+      separator = ", ";
+    }
+  fprintf (asm_out_file, ")\n");
+
+  /* Start the function definition.  */
+  assemble_start_function (stubdecl, stubname);
+  mips_start_function_definition (stubname, false);
+
+  /* If generating pic2 code, either set up the global pointer or
+     switch to pic0.  */
+  if (TARGET_ABICALLS_PIC2)
+    {
+      if (TARGET_ABSOLUTE_ABICALLS)
+	fprintf (asm_out_file, "\t.option\tpic0\n");
+      else
+	{
+	  output_asm_insn ("%(.cpload\t%^%)", NULL);
+	  /* Emit an R_MIPS_NONE relocation to tell the linker what the
+	     target function is.  Use a local GOT access when loading the
+	     symbol, to cut down on the number of unnecessary GOT entries
+	     for stubs that aren't needed.  */
+	  output_asm_insn (".reloc\t0,R_MIPS_NONE,%0", &symbol);
+	  symbol = alias;
+	}
+    }
+
+  /* Load the address of the MIPS16 function into $25.  Do this first so
+     that targets with coprocessor interlocks can use an MFC1 to fill the
+     delay slot.  */
+  output_asm_insn ("la\t%^,%0", &symbol);
+
+  /* Move the arguments from floating-point registers to general registers.  */
+  mips_output_args_xfer (crtl->args.info.fp_code, 'f');
+
+  /* Jump to the MIPS16 function.  */
+  output_asm_insn ("jr\t%^", NULL);
+
+  if (TARGET_ABICALLS_PIC2 && TARGET_ABSOLUTE_ABICALLS)
+    fprintf (asm_out_file, "\t.option\tpic2\n");
+
+  mips_end_function_definition (stubname);
+
+  /* If the linker needs to create a dynamic symbol for the target
+     function, it will associate the symbol with the stub (which,
+     unlike the target function, follows the proper calling conventions).
+     It is therefore useful to have a local alias for the target function,
+     so that it can still be identified as MIPS16 code.  As an optimization,
+     this symbol can also be used for indirect MIPS16 references from
+     within this file.  */
+  ASM_OUTPUT_DEF (asm_out_file, alias_name, fnname);
+
+  switch_to_section (function_section (current_function_decl));
+}
+
+/* The current function is a MIPS16 function that returns a value in an FPR.
+   Copy the return value from its soft-float to its hard-float location.
+   libgcc2 has special non-MIPS16 helper functions for each case.  */
+
+static void
+mips16_copy_fpr_return_value (void)
+{
+  rtx fn, insn, retval;
+  tree return_type;
+  enum machine_mode return_mode;
+  const char *name;
+
+  return_type = DECL_RESULT (current_function_decl);
+  return_mode = DECL_MODE (return_type);
+
+  name = ACONCAT (("__mips16_ret_",
+		   mips16_call_stub_mode_suffix (return_mode),
+		   NULL));
+  fn = mips16_stub_function (name);
+
+  /* The function takes arguments in $2 (and possibly $3), so calls
+     to it cannot be lazily bound.  */
+  SYMBOL_REF_FLAGS (fn) |= SYMBOL_FLAG_BIND_NOW;
+
+  /* Model the call as something that takes the GPR return value as
+     argument and returns an "updated" value.  */
+  retval = gen_rtx_REG (return_mode, GP_RETURN);
+  insn = mips_expand_call (MIPS_CALL_EPILOGUE, retval, fn,
+			   const0_rtx, NULL_RTX, false);
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), retval);
+}
+
+/* Consider building a stub for a MIPS16 call to function *FN_PTR.
+   RETVAL is the location of the return value, or null if this is
+   a "call" rather than a "call_value".  ARGS_SIZE is the size of the
+   arguments and FP_CODE is the code built by mips_function_arg;
+   see the comment before the fp_code field in CUMULATIVE_ARGS for details.
+
+   There are three alternatives:
+
+   - If a stub was needed, emit the call and return the call insn itself.
+
+   - If we can avoid using a stub by redirecting the call, set *FN_PTR
+     to the new target and return null.
+
+   - If *FN_PTR doesn't need a stub, return null and leave *FN_PTR
+     unmodified.
+
+   A stub is needed for calls to functions that, in normal mode,
+   receive arguments in FPRs or return values in FPRs.  The stub
+   copies the arguments from their soft-float positions to their
+   hard-float positions, calls the real function, then copies the
+   return value from its hard-float position to its soft-float
+   position.
+
+   We can emit a JAL to *FN_PTR even when *FN_PTR might need a stub.
+   If *FN_PTR turns out to be to a non-MIPS16 function, the linker
+   automatically redirects the JAL to the stub, otherwise the JAL
+   continues to call FN directly.  */
+
+static rtx
+mips16_build_call_stub (rtx retval, rtx *fn_ptr, rtx args_size, int fp_code)
+{
+  const char *fnname;
+  bool fp_ret_p;
+  struct mips16_stub *l;
+  rtx insn, fn;
+
+  /* We don't need to do anything if we aren't in MIPS16 mode, or if
+     we were invoked with the -msoft-float option.  */
+  if (!TARGET_MIPS16 || TARGET_SOFT_FLOAT_ABI)
+    return NULL_RTX;
+
+  /* Figure out whether the value might come back in a floating-point
+     register.  */
+  fp_ret_p = retval && mips_return_mode_in_fpr_p (GET_MODE (retval));
+
+  /* We don't need to do anything if there were no floating-point
+     arguments and the value will not be returned in a floating-point
+     register.  */
+  if (fp_code == 0 && !fp_ret_p)
+    return NULL_RTX;
+
+  /* We don't need to do anything if this is a call to a special
+     MIPS16 support function.  */
+  fn = *fn_ptr;
+  if (mips16_stub_function_p (fn))
+    return NULL_RTX;
+
+  /* This code will only work for o32 and o64 abis.  The other ABI's
+     require more sophisticated support.  */
+  gcc_assert (TARGET_OLDABI);
+
+  /* If we're calling via a function pointer, use one of the magic
+     libgcc.a stubs provided for each (FP_CODE, FP_RET_P) combination.
+     Each stub expects the function address to arrive in register $2.  */
+  if (GET_CODE (fn) != SYMBOL_REF
+      || !call_insn_operand (fn, VOIDmode))
+    {
+      char buf[30];
+      rtx stub_fn, insn, addr;
+      bool lazy_p;
+
+      /* If this is a locally-defined and locally-binding function,
+	 avoid the stub by calling the local alias directly.  */
+      if (mips16_local_function_p (fn))
+	{
+	  *fn_ptr = mips16_local_alias (fn);
+	  return NULL_RTX;
+	}
+
+      /* Create a SYMBOL_REF for the libgcc.a function.  */
+      if (fp_ret_p)
+	sprintf (buf, "__mips16_call_stub_%s_%d",
+		 mips16_call_stub_mode_suffix (GET_MODE (retval)),
+		 fp_code);
+      else
+	sprintf (buf, "__mips16_call_stub_%d", fp_code);
+      stub_fn = mips16_stub_function (buf);
+
+      /* The function uses $2 as an argument, so calls to it
+	 cannot be lazily bound.  */
+      SYMBOL_REF_FLAGS (stub_fn) |= SYMBOL_FLAG_BIND_NOW;
+
+      /* Load the target function into $2.  */
+      addr = gen_rtx_REG (Pmode, GP_REG_FIRST + 2);
+      lazy_p = mips_load_call_address (MIPS_CALL_NORMAL, addr, fn);
+
+      /* Emit the call.  */
+      insn = mips_expand_call (MIPS_CALL_NORMAL, retval, stub_fn,
+			       args_size, NULL_RTX, lazy_p);
+
+      /* Tell GCC that this call does indeed use the value of $2.  */
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), addr);
+
+      /* If we are handling a floating-point return value, we need to
+         save $18 in the function prologue.  Putting a note on the
+         call will mean that df_regs_ever_live_p ($18) will be true if the
+         call is not eliminated, and we can check that in the prologue
+         code.  */
+      if (fp_ret_p)
+	CALL_INSN_FUNCTION_USAGE (insn) =
+	  gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_REG (word_mode, 18)),
+			     CALL_INSN_FUNCTION_USAGE (insn));
+
+      return insn;
+    }
+
+  /* We know the function we are going to call.  If we have already
+     built a stub, we don't need to do anything further.  */
+  fnname = targetm.strip_name_encoding (XSTR (fn, 0));
+  for (l = mips16_stubs; l != NULL; l = l->next)
+    if (strcmp (l->name, fnname) == 0)
+      break;
+
+  if (l == NULL)
+    {
+      const char *separator;
+      char *secname, *stubname;
+      tree stubid, stubdecl;
+      unsigned int f;
+
+      /* If the function does not return in FPRs, the special stub
+	 section is named
+	     .mips16.call.FNNAME
+
+	 If the function does return in FPRs, the stub section is named
+	     .mips16.call.fp.FNNAME
+
+	 Build a decl for the stub.  */
+      secname = ACONCAT ((".mips16.call.", fp_ret_p ? "fp." : "",
+			  fnname, NULL));
+      stubname = ACONCAT (("__call_stub_", fp_ret_p ? "fp_" : "",
+			   fnname, NULL));
+      stubid = get_identifier (stubname);
+      stubdecl = build_decl (BUILTINS_LOCATION,
+			     FUNCTION_DECL, stubid,
+			     build_function_type (void_type_node, NULL_TREE));
+      DECL_SECTION_NAME (stubdecl) = build_string (strlen (secname), secname);
+      DECL_RESULT (stubdecl) = build_decl (BUILTINS_LOCATION,
+					   RESULT_DECL, NULL_TREE,
+					   void_type_node);
+
+      /* Output a comment.  */
+      fprintf (asm_out_file, "\t# Stub function to call %s%s (",
+	       (fp_ret_p
+		? (GET_MODE (retval) == SFmode ? "float " : "double ")
+		: ""),
+	       fnname);
+      separator = "";
+      for (f = (unsigned int) fp_code; f != 0; f >>= 2)
+	{
+	  fprintf (asm_out_file, "%s%s", separator,
+		   (f & 3) == 1 ? "float" : "double");
+	  separator = ", ";
+	}
+      fprintf (asm_out_file, ")\n");
+
+      /* Start the function definition.  */
+      assemble_start_function (stubdecl, stubname);
+      mips_start_function_definition (stubname, false);
+
+      if (!fp_ret_p)
+	{
+	  /* Load the address of the MIPS16 function into $25.  Do this
+	     first so that targets with coprocessor interlocks can use
+	     an MFC1 to fill the delay slot.  */
+	  if (TARGET_EXPLICIT_RELOCS)
+	    {
+	      output_asm_insn ("lui\t%^,%%hi(%0)", &fn);
+	      output_asm_insn ("addiu\t%^,%^,%%lo(%0)", &fn);
+	    }
+	  else
+	    output_asm_insn ("la\t%^,%0", &fn);
+	}
+
+      /* Move the arguments from general registers to floating-point
+	 registers.  */
+      mips_output_args_xfer (fp_code, 't');
+
+      if (!fp_ret_p)
+	{
+	  /* Jump to the previously-loaded address.  */
+	  output_asm_insn ("jr\t%^", NULL);
+	}
+      else
+	{
+	  /* Save the return address in $18 and call the non-MIPS16 function.
+	     The stub's caller knows that $18 might be clobbered, even though
+	     $18 is usually a call-saved register.  */
+	  fprintf (asm_out_file, "\tmove\t%s,%s\n",
+		   reg_names[GP_REG_FIRST + 18], reg_names[RETURN_ADDR_REGNUM]);
+	  output_asm_insn (MIPS_CALL ("jal", &fn, 0, -1), &fn);
+
+	  /* Move the result from floating-point registers to
+	     general registers.  */
+	  switch (GET_MODE (retval))
+	    {
+	    case SCmode:
+	      mips_output_32bit_xfer ('f', GP_RETURN + TARGET_BIG_ENDIAN,
+				      TARGET_BIG_ENDIAN
+				      ? FP_REG_FIRST + MAX_FPRS_PER_FMT
+				      : FP_REG_FIRST);
+	      mips_output_32bit_xfer ('f', GP_RETURN + TARGET_LITTLE_ENDIAN,
+				      TARGET_LITTLE_ENDIAN
+				      ? FP_REG_FIRST + MAX_FPRS_PER_FMT
+				      : FP_REG_FIRST);
+	      if (GET_MODE (retval) == SCmode && TARGET_64BIT)
+		{
+		  /* On 64-bit targets, complex floats are returned in
+		     a single GPR, such that "sd" on a suitably-aligned
+		     target would store the value correctly.  */
+		  fprintf (asm_out_file, "\tdsll\t%s,%s,32\n",
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN],
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN]);
+		  fprintf (asm_out_file, "\tdsll\t%s,%s,32\n",
+			   reg_names[GP_RETURN + TARGET_LITTLE_ENDIAN],
+			   reg_names[GP_RETURN + TARGET_LITTLE_ENDIAN]);
+		  fprintf (asm_out_file, "\tdsrl\t%s,%s,32\n",
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN],
+			   reg_names[GP_RETURN + TARGET_BIG_ENDIAN]);
+		  fprintf (asm_out_file, "\tor\t%s,%s,%s\n",
+			   reg_names[GP_RETURN],
+			   reg_names[GP_RETURN],
+			   reg_names[GP_RETURN + 1]);
+		}
+	      break;
+
+	    case SFmode:
+	      mips_output_32bit_xfer ('f', GP_RETURN, FP_REG_FIRST);
+	      break;
+
+	    case DCmode:
+	      mips_output_64bit_xfer ('f', GP_RETURN + (8 / UNITS_PER_WORD),
+				      FP_REG_FIRST + MAX_FPRS_PER_FMT);
+	      /* Fall though.  */
+ 	    case DFmode:
+	    case V2SFmode:
+	      mips_output_64bit_xfer ('f', GP_RETURN, FP_REG_FIRST);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  fprintf (asm_out_file, "\tjr\t%s\n", reg_names[GP_REG_FIRST + 18]);
+	}
+
+#ifdef ASM_DECLARE_FUNCTION_SIZE
+      ASM_DECLARE_FUNCTION_SIZE (asm_out_file, stubname, stubdecl);
+#endif
+
+      mips_end_function_definition (stubname);
+
+      /* Record this stub.  */
+      l = XNEW (struct mips16_stub);
+      l->name = xstrdup (fnname);
+      l->fp_ret_p = fp_ret_p;
+      l->next = mips16_stubs;
+      mips16_stubs = l;
+    }
+
+  /* If we expect a floating-point return value, but we've built a
+     stub which does not expect one, then we're in trouble.  We can't
+     use the existing stub, because it won't handle the floating-point
+     value.  We can't build a new stub, because the linker won't know
+     which stub to use for the various calls in this object file.
+     Fortunately, this case is illegal, since it means that a function
+     was declared in two different ways in a single compilation.  */
+  if (fp_ret_p && !l->fp_ret_p)
+    error ("cannot handle inconsistent calls to %qs", fnname);
+
+  if (retval == NULL_RTX)
+    insn = gen_call_internal_direct (fn, args_size);
+  else
+    insn = gen_call_value_internal_direct (retval, fn, args_size);
+  insn = mips_emit_call_insn (insn, fn, fn, false);
+
+  /* If we are calling a stub which handles a floating-point return
+     value, we need to arrange to save $18 in the prologue.  We do this
+     by marking the function call as using the register.  The prologue
+     will later see that it is used, and emit code to save it.  */
+  if (fp_ret_p)
+    CALL_INSN_FUNCTION_USAGE (insn) =
+      gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_CLOBBER (VOIDmode,
+					  gen_rtx_REG (word_mode, 18)),
+			 CALL_INSN_FUNCTION_USAGE (insn));
+
+  return insn;
+}
+
+/* Expand a call of type TYPE.  RESULT is where the result will go (null
+   for "call"s and "sibcall"s), ADDR is the address of the function,
+   ARGS_SIZE is the size of the arguments and AUX is the value passed
+   to us by mips_function_arg.  LAZY_P is true if this call already
+   involves a lazily-bound function address (such as when calling
+   functions through a MIPS16 hard-float stub).
+
+   Return the call itself.  */
+
+rtx
+mips_expand_call (enum mips_call_type type, rtx result, rtx addr,
+		  rtx args_size, rtx aux, bool lazy_p)
+{
+  rtx orig_addr, pattern, insn;
+  int fp_code;
+
+  fp_code = aux == 0 ? 0 : (int) GET_MODE (aux);
+  insn = mips16_build_call_stub (result, &addr, args_size, fp_code);
+  if (insn)
+    {
+      gcc_assert (!lazy_p && type == MIPS_CALL_NORMAL);
+      return insn;
+    }
+				 ;
+  orig_addr = addr;
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      if (type == MIPS_CALL_EPILOGUE)
+	addr = MIPS_EPILOGUE_TEMP (Pmode);
+      else
+	addr = gen_reg_rtx (Pmode);
+      lazy_p |= mips_load_call_address (type, addr, orig_addr);
+    }
+
+  if (result == 0)
+    {
+      rtx (*fn) (rtx, rtx);
+
+      if (type == MIPS_CALL_SIBCALL)
+	fn = gen_sibcall_internal;
+      else
+	fn = gen_call_internal;
+
+      pattern = fn (addr, args_size);
+    }
+  else if (GET_CODE (result) == PARALLEL && XVECLEN (result, 0) == 2)
+    {
+      /* Handle return values created by mips_return_fpr_pair.  */
+      rtx (*fn) (rtx, rtx, rtx, rtx);
+      rtx reg1, reg2;
+
+      if (type == MIPS_CALL_SIBCALL)
+	fn = gen_sibcall_value_multiple_internal;
+      else
+	fn = gen_call_value_multiple_internal;
+
+      reg1 = XEXP (XVECEXP (result, 0, 0), 0);
+      reg2 = XEXP (XVECEXP (result, 0, 1), 0);
+      pattern = fn (reg1, addr, args_size, reg2);
+    }
+  else
+    {
+      rtx (*fn) (rtx, rtx, rtx);
+
+      if (type == MIPS_CALL_SIBCALL)
+	fn = gen_sibcall_value_internal;
+      else
+	fn = gen_call_value_internal;
+
+      /* Handle return values created by mips_return_fpr_single.  */
+      if (GET_CODE (result) == PARALLEL && XVECLEN (result, 0) == 1)
+	result = XEXP (XVECEXP (result, 0, 0), 0);
+      pattern = fn (result, addr, args_size);
+    }
+
+  return mips_emit_call_insn (pattern, orig_addr, addr, lazy_p);
+}
+
+/* Split call instruction INSN into a $gp-clobbering call and
+   (where necessary) an instruction to restore $gp from its save slot.
+   CALL_PATTERN is the pattern of the new call.  */
+
+void
+mips_split_call (rtx insn, rtx call_pattern)
+{
+  emit_call_insn (call_pattern);
+  if (!find_reg_note (insn, REG_NORETURN, 0))
+    /* Pick a temporary register that is suitable for both MIPS16 and
+       non-MIPS16 code.  $4 and $5 are used for returning complex double
+       values in soft-float code, so $6 is the first suitable candidate.  */
+    mips_restore_gp_from_cprestore_slot (gen_rtx_REG (Pmode, GP_ARG_FIRST + 2));
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+
+static bool
+mips_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  if (!TARGET_SIBCALLS)
+    return false;
+
+  /* Interrupt handlers need special epilogue code and therefore can't
+     use sibcalls.  */
+  if (mips_interrupt_type_p (TREE_TYPE (current_function_decl)))
+    return false;
+
+  /* We can't do a sibcall if the called function is a MIPS16 function
+     because there is no direct "jx" instruction equivalent to "jalx" to
+     switch the ISA mode.  We only care about cases where the sibling
+     and normal calls would both be direct.  */
+  if (decl
+      && mips_use_mips16_mode_p (decl)
+      && const_call_insn_operand (XEXP (DECL_RTL (decl), 0), VOIDmode))
+    return false;
+
+  /* When -minterlink-mips16 is in effect, assume that non-locally-binding
+     functions could be MIPS16 ones unless an attribute explicitly tells
+     us otherwise.  */
+  if (TARGET_INTERLINK_MIPS16
+      && decl
+      && (DECL_EXTERNAL (decl) || !targetm.binds_local_p (decl))
+      && !mips_nomips16_decl_p (decl)
+      && const_call_insn_operand (XEXP (DECL_RTL (decl), 0), VOIDmode))
+    return false;
+
+  /* Otherwise OK.  */
+  return true;
+}
+
+/* Emit code to move general operand SRC into condition-code
+   register DEST given that SCRATCH is a scratch TFmode FPR.
+   The sequence is:
+
+	FP1 = SRC
+	FP2 = 0.0f
+	DEST = FP2 < FP1
+
+   where FP1 and FP2 are single-precision FPRs taken from SCRATCH.  */
+
+void
+mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
+{
+  rtx fp1, fp2;
+
+  /* Change the source to SFmode.  */
+  if (MEM_P (src))
+    src = adjust_address (src, SFmode, 0);
+  else if (REG_P (src) || GET_CODE (src) == SUBREG)
+    src = gen_rtx_REG (SFmode, true_regnum (src));
+
+  fp1 = gen_rtx_REG (SFmode, REGNO (scratch));
+  fp2 = gen_rtx_REG (SFmode, REGNO (scratch) + MAX_FPRS_PER_FMT);
+
+  mips_emit_move (copy_rtx (fp1), src);
+  mips_emit_move (copy_rtx (fp2), CONST0_RTX (SFmode));
+  emit_insn (gen_slt_sf (dest, fp2, fp1));
+}
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+mips_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  enum machine_mode mode;
+  rtx *regs;
+
+  /* Work out how many bits to move at a time.  If both operands have
+     half-word alignment, it is usually better to move in half words.
+     For instance, lh/lh/sh/sh is usually better than lwl/lwr/swl/swr
+     and lw/lw/sw/sw is usually better than ldl/ldr/sdl/sdr.
+     Otherwise move word-sized chunks.  */
+  if (MEM_ALIGN (src) == BITS_PER_WORD / 2
+      && MEM_ALIGN (dest) == BITS_PER_WORD / 2)
+    bits = BITS_PER_WORD / 2;
+  else
+    bits = BITS_PER_WORD;
+
+  mode = mode_for_size (bits, MODE_INT, 0);
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+     the source has enough alignment, otherwise use left/right pairs.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      if (MEM_ALIGN (src) >= bits)
+	mips_emit_move (regs[i], adjust_address (src, mode, offset));
+      else
+	{
+	  rtx part = adjust_address (src, BLKmode, offset);
+	  if (!mips_expand_ext_as_unaligned_load (regs[i], part, bits, 0))
+	    gcc_unreachable ();
+	}
+    }
+
+  /* Copy the chunks to the destination.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    if (MEM_ALIGN (dest) >= bits)
+      mips_emit_move (adjust_address (dest, mode, offset), regs[i]);
+    else
+      {
+	rtx part = adjust_address (dest, BLKmode, offset);
+	if (!mips_expand_ins_as_unaligned_store (part, regs[i], bits, 0))
+	  gcc_unreachable ();
+      }
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+    }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+		       rtx *loop_reg, rtx *loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
+
+static void
+mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+		      HOST_WIDE_INT bytes_per_iter)
+{
+  rtx label, src_reg, dest_reg, final_src, test;
+  HOST_WIDE_INT leftover;
+
+  leftover = length % bytes_per_iter;
+  length -= leftover;
+
+  /* Create registers and memory references for use within the loop.  */
+  mips_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  mips_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+
+  /* Calculate the value that SRC_REG should have after the last iteration
+     of the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+				   0, 0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Emit the loop body.  */
+  mips_block_move_straight (dest, src, bytes_per_iter);
+
+  /* Move on to the next block.  */
+  mips_emit_move (src_reg, plus_constant (src_reg, bytes_per_iter));
+  mips_emit_move (dest_reg, plus_constant (dest_reg, bytes_per_iter));
+
+  /* Emit the loop condition.  */
+  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+  if (Pmode == DImode)
+    emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
+  else
+    emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    mips_block_move_straight (dest, src, leftover);
+}
+
+/* Expand a movmemsi instruction, which copies LENGTH bytes from
+   memory reference SRC to memory reference DEST.  */
+
+bool
+mips_expand_block_move (rtx dest, rtx src, rtx length)
+{
+  if (CONST_INT_P (length))
+    {
+      if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)
+	{
+	  mips_block_move_straight (dest, src, INTVAL (length));
+	  return true;
+	}
+      else if (optimize)
+	{
+	  mips_block_move_loop (dest, src, INTVAL (length),
+				MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Expand a loop of synci insns for the address range [BEGIN, END).  */
+
+void
+mips_expand_synci_loop (rtx begin, rtx end)
+{
+  rtx inc, label, end_label, cmp_result, mask, length;
+
+  /* Create end_label.  */
+  end_label = gen_label_rtx ();
+
+  /* Check if begin equals end.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, begin, end);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
+  /* Load INC with the cache line size (rdhwr INC,$1).  */
+  inc = gen_reg_rtx (Pmode);
+  emit_insn (Pmode == SImode
+	     ? gen_rdhwr_synci_step_si (inc)
+	     : gen_rdhwr_synci_step_di (inc));
+
+  /* Check if inc is 0.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, inc, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
+  /* Calculate mask.  */
+  mask = mips_force_unary (Pmode, NEG, inc);
+
+  /* Mask out begin by mask.  */
+  begin = mips_force_binary (Pmode, AND, begin, mask);
+
+  /* Calculate length.  */
+  length = mips_force_binary (Pmode, MINUS, end, begin);
+
+  /* Loop back to here.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  emit_insn (gen_synci (begin));
+
+  /* Update length.  */
+  mips_emit_binary (MINUS, length, length, inc);
+
+  /* Update begin.  */
+  mips_emit_binary (PLUS, begin, begin, inc);
+
+  /* Check if length is greater than 0.  */
+  cmp_result = gen_rtx_GT (VOIDmode, length, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, label));
+
+  emit_label (end_label);
+}
+
+/* Expand a QI or HI mode atomic memory operation.
+
+   GENERATOR contains a pointer to the gen_* function that generates
+   the SI mode underlying atomic operation using masks that we
+   calculate.
+
+   RESULT is the return register for the operation.  Its value is NULL
+   if unused.
+
+   MEM is the location of the atomic access.
+
+   OLDVAL is the first operand for the operation.
+
+   NEWVAL is the optional second operand for the operation.  Its value
+   is NULL if unused.  */
+
+void
+mips_expand_atomic_qihi (union mips_gen_fn_ptrs generator,
+                         rtx result, rtx mem, rtx oldval, rtx newval)
+{
+  rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
+  rtx unshifted_mask_reg, mask, inverted_mask, si_op;
+  rtx res = NULL;
+  enum machine_mode mode;
+
+  mode = GET_MODE (mem);
+
+  /* Compute the address of the containing SImode value.  */
+  orig_addr = force_reg (Pmode, XEXP (mem, 0));
+  memsi_addr = mips_force_binary (Pmode, AND, orig_addr,
+				  force_reg (Pmode, GEN_INT (-4)));
+
+  /* Create a memory reference for it.  */
+  memsi = gen_rtx_MEM (SImode, memsi_addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  /* Work out the byte offset of the QImode or HImode value,
+     counting from the least significant byte.  */
+  shift = mips_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
+  if (TARGET_BIG_ENDIAN)
+    mips_emit_binary (XOR, shift, shift, GEN_INT (mode == QImode ? 3 : 2));
+
+  /* Multiply by eight to convert the shift value from bytes to bits.  */
+  mips_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
+
+  /* Make the final shift an SImode value, so that it can be used in
+     SImode operations.  */
+  shiftsi = force_reg (SImode, gen_lowpart (SImode, shift));
+
+  /* Set MASK to an inclusive mask of the QImode or HImode value.  */
+  unshifted_mask = GEN_INT (GET_MODE_MASK (mode));
+  unshifted_mask_reg = force_reg (SImode, unshifted_mask);
+  mask = mips_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi);
+
+  /* Compute the equivalent exclusive mask.  */
+  inverted_mask = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (VOIDmode, inverted_mask,
+			  gen_rtx_NOT (SImode, mask)));
+
+  /* Shift the old value into place.  */
+  if (oldval != const0_rtx)
+    {
+      oldval = convert_modes (SImode, mode, oldval, true);
+      oldval = force_reg (SImode, oldval);
+      oldval = mips_force_binary (SImode, ASHIFT, oldval, shiftsi);
+    }
+
+  /* Do the same for the new value.  */
+  if (newval && newval != const0_rtx)
+    {
+      newval = convert_modes (SImode, mode, newval, true);
+      newval = force_reg (SImode, newval);
+      newval = mips_force_binary (SImode, ASHIFT, newval, shiftsi);
+    }
+
+  /* Do the SImode atomic access.  */
+  if (result)
+    res = gen_reg_rtx (SImode);
+  if (newval)
+    si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, newval);
+  else if (result)
+    si_op = generator.fn_5 (res, memsi, mask, inverted_mask, oldval);
+  else
+    si_op = generator.fn_4 (memsi, mask, inverted_mask, oldval);
+
+  emit_insn (si_op);
+
+  if (result)
+    {
+      /* Shift and convert the result.  */
+      mips_emit_binary (AND, res, res, mask);
+      mips_emit_binary (LSHIFTRT, res, res, shiftsi);
+      mips_emit_move (result, gen_lowpart (GET_MODE (result), res));
+    }
+}
+
+/* Return true if it is possible to use left/right accesses for a
+   bitfield of WIDTH bits starting BITPOS bits into *OP.  When
+   returning true, update *OP, *LEFT and *RIGHT as follows:
+
+   *OP is a BLKmode reference to the whole field.
+
+   *LEFT is a QImode reference to the first byte if big endian or
+   the last byte if little endian.  This address can be used in the
+   left-side instructions (LWL, SWL, LDL, SDL).
+
+   *RIGHT is a QImode reference to the opposite end of the field and
+   can be used in the patterning right-side instruction.  */
+
+static bool
+mips_get_unaligned_mem (rtx *op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos,
+			rtx *left, rtx *right)
+{
+  rtx first, last;
+
+  /* Check that the operand really is a MEM.  Not all the extv and
+     extzv predicates are checked.  */
+  if (!MEM_P (*op))
+    return false;
+
+  /* Check that the size is valid.  */
+  if (width != 32 && (!TARGET_64BIT || width != 64))
+    return false;
+
+  /* We can only access byte-aligned values.  Since we are always passed
+     a reference to the first byte of the field, it is not necessary to
+     do anything with BITPOS after this check.  */
+  if (bitpos % BITS_PER_UNIT != 0)
+    return false;
+
+  /* Reject aligned bitfields: we want to use a normal load or store
+     instead of a left/right pair.  */
+  if (MEM_ALIGN (*op) >= width)
+    return false;
+
+  /* Adjust *OP to refer to the whole field.  This also has the effect
+     of legitimizing *OP's address for BLKmode, possibly simplifying it.  */
+  *op = adjust_address (*op, BLKmode, 0);
+  set_mem_size (*op, GEN_INT (width / BITS_PER_UNIT));
+
+  /* Get references to both ends of the field.  We deliberately don't
+     use the original QImode *OP for FIRST since the new BLKmode one
+     might have a simpler address.  */
+  first = adjust_address (*op, QImode, 0);
+  last = adjust_address (*op, QImode, width / BITS_PER_UNIT - 1);
+
+  /* Allocate to LEFT and RIGHT according to endianness.  LEFT should
+     correspond to the MSB and RIGHT to the LSB.  */
+  if (TARGET_BIG_ENDIAN)
+    *left = first, *right = last;
+  else
+    *left = last, *right = first;
+
+  return true;
+}
+
+/* Try to use left/right loads to expand an "extv" or "extzv" pattern.
+   DEST, SRC, WIDTH and BITPOS are the operands passed to the expander;
+   the operation is the equivalent of:
+
+      (set DEST (*_extract SRC WIDTH BITPOS))
+
+   Return true on success.  */
+
+bool
+mips_expand_ext_as_unaligned_load (rtx dest, rtx src, HOST_WIDE_INT width,
+				   HOST_WIDE_INT bitpos)
+{
+  rtx left, right, temp;
+
+  /* If TARGET_64BIT, the destination of a 32-bit "extz" or "extzv" will
+     be a paradoxical word_mode subreg.  This is the only case in which
+     we allow the destination to be larger than the source.  */
+  if (GET_CODE (dest) == SUBREG
+      && GET_MODE (dest) == DImode
+      && GET_MODE (SUBREG_REG (dest)) == SImode)
+    dest = SUBREG_REG (dest);
+
+  /* After the above adjustment, the destination must be the same
+     width as the source.  */
+  if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+    return false;
+
+  if (!mips_get_unaligned_mem (&src, width, bitpos, &left, &right))
+    return false;
+
+  temp = gen_reg_rtx (GET_MODE (dest));
+  if (GET_MODE (dest) == DImode)
+    {
+      emit_insn (gen_mov_ldl (temp, src, left));
+      emit_insn (gen_mov_ldr (dest, copy_rtx (src), right, temp));
+    }
+  else
+    {
+      emit_insn (gen_mov_lwl (temp, src, left));
+      emit_insn (gen_mov_lwr (dest, copy_rtx (src), right, temp));
+    }
+  return true;
+}
+
+/* Try to use left/right stores to expand an "ins" pattern.  DEST, WIDTH,
+   BITPOS and SRC are the operands passed to the expander; the operation
+   is the equivalent of:
+
+       (set (zero_extract DEST WIDTH BITPOS) SRC)
+
+   Return true on success.  */
+
+bool
+mips_expand_ins_as_unaligned_store (rtx dest, rtx src, HOST_WIDE_INT width,
+				    HOST_WIDE_INT bitpos)
+{
+  rtx left, right;
+  enum machine_mode mode;
+
+  if (!mips_get_unaligned_mem (&dest, width, bitpos, &left, &right))
+    return false;
+
+  mode = mode_for_size (width, MODE_INT, 0);
+  src = gen_lowpart (mode, src);
+  if (mode == DImode)
+    {
+      emit_insn (gen_mov_sdl (dest, src, left));
+      emit_insn (gen_mov_sdr (copy_rtx (dest), copy_rtx (src), right));
+    }
+  else
+    {
+      emit_insn (gen_mov_swl (dest, src, left));
+      emit_insn (gen_mov_swr (copy_rtx (dest), copy_rtx (src), right));
+    }
+  return true;
+}
+
+/* Return true if X is a MEM with the same size as MODE.  */
+
+bool
+mips_mem_fits_mode_p (enum machine_mode mode, rtx x)
+{
+  rtx size;
+
+  if (!MEM_P (x))
+    return false;
+
+  size = MEM_SIZE (x);
+  return size && INTVAL (size) == GET_MODE_SIZE (mode);
+}
+
+/* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
+   source of an "ext" instruction or the destination of an "ins"
+   instruction.  OP must be a register operand and the following
+   conditions must hold:
+
+     0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
+     0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+     0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+
+   Also reject lengths equal to a word as they are better handled
+   by the move patterns.  */
+
+bool
+mips_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
+{
+  if (!ISA_HAS_EXT_INS
+      || !register_operand (op, VOIDmode)
+      || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD)
+    return false;
+
+  if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1))
+    return false;
+
+  if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op)))
+    return false;
+
+  return true;
+}
+
+/* Check if MASK and SHIFT are valid in mask-low-and-shift-left
+   operation if MAXLEN is the maxium length of consecutive bits that
+   can make up MASK.  MODE is the mode of the operation.  See
+   mask_low_and_shift_len for the actual definition.  */
+
+bool
+mask_low_and_shift_p (enum machine_mode mode, rtx mask, rtx shift, int maxlen)
+{
+  return IN_RANGE (mask_low_and_shift_len (mode, mask, shift), 1, maxlen);
+}
+
+/* Return true iff OP1 and OP2 are valid operands together for the
+   *and<MODE>3 and *and<MODE>3_mips16 patterns.  For the cases to consider,
+   see the table in the comment before the pattern.  */
+
+bool
+and_operands_ok (enum machine_mode mode, rtx op1, rtx op2)
+{
+  return (memory_operand (op1, mode)
+	  ? and_load_operand (op2, mode)
+	  : and_reg_operand (op2, mode));
+}
+
+/* The canonical form of a mask-low-and-shift-left operation is
+   (and (ashift X SHIFT) MASK) where MASK has the lower SHIFT number of bits
+   cleared.  Thus we need to shift MASK to the right before checking if it
+   is a valid mask value.  MODE is the mode of the operation.  If true
+   return the length of the mask, otherwise return -1.  */
+
+int
+mask_low_and_shift_len (enum machine_mode mode, rtx mask, rtx shift)
+{
+  HOST_WIDE_INT shval;
+
+  shval = INTVAL (shift) & (GET_MODE_BITSIZE (mode) - 1);
+  return exact_log2 ((UINTVAL (mask) >> shval) + 1);
+}
+
+/* Return true if -msplit-addresses is selected and should be honored.
+
+   -msplit-addresses is a half-way house between explicit relocations
+   and the traditional assembler macros.  It can split absolute 32-bit
+   symbolic constants into a high/lo_sum pair but uses macros for other
+   sorts of access.
+
+   Like explicit relocation support for REL targets, it relies
+   on GNU extensions in the assembler and the linker.
+
+   Although this code should work for -O0, it has traditionally
+   been treated as an optimization.  */
+
+static bool
+mips_split_addresses_p (void)
+{
+  return (TARGET_SPLIT_ADDRESSES
+	  && optimize
+	  && !TARGET_MIPS16
+	  && !flag_pic
+	  && !ABI_HAS_64BIT_SYMBOLS);
+}
+
+/* (Re-)Initialize mips_split_p, mips_lo_relocs and mips_hi_relocs.  */
+
+static void
+mips_init_relocs (void)
+{
+  memset (mips_split_p, '\0', sizeof (mips_split_p));
+  memset (mips_split_hi_p, '\0', sizeof (mips_split_hi_p));
+  memset (mips_hi_relocs, '\0', sizeof (mips_hi_relocs));
+  memset (mips_lo_relocs, '\0', sizeof (mips_lo_relocs));
+
+  if (ABI_HAS_64BIT_SYMBOLS)
+    {
+      if (TARGET_EXPLICIT_RELOCS)
+	{
+	  mips_split_p[SYMBOL_64_HIGH] = true;
+	  mips_hi_relocs[SYMBOL_64_HIGH] = "%highest(";
+	  mips_lo_relocs[SYMBOL_64_HIGH] = "%higher(";
+
+	  mips_split_p[SYMBOL_64_MID] = true;
+	  mips_hi_relocs[SYMBOL_64_MID] = "%higher(";
+	  mips_lo_relocs[SYMBOL_64_MID] = "%hi(";
+
+	  mips_split_p[SYMBOL_64_LOW] = true;
+	  mips_hi_relocs[SYMBOL_64_LOW] = "%hi(";
+	  mips_lo_relocs[SYMBOL_64_LOW] = "%lo(";
+
+	  mips_split_p[SYMBOL_ABSOLUTE] = true;
+	  mips_lo_relocs[SYMBOL_ABSOLUTE] = "%lo(";
+	}
+    }
+  else
+    {
+      if (TARGET_EXPLICIT_RELOCS || mips_split_addresses_p () || TARGET_MIPS16)
+	{
+	  mips_split_p[SYMBOL_ABSOLUTE] = true;
+	  mips_hi_relocs[SYMBOL_ABSOLUTE] = "%hi(";
+	  mips_lo_relocs[SYMBOL_ABSOLUTE] = "%lo(";
+
+	  mips_lo_relocs[SYMBOL_32_HIGH] = "%hi(";
+	}
+    }
+
+  if (TARGET_MIPS16)
+    {
+      /* The high part is provided by a pseudo copy of $gp.  */
+      mips_split_p[SYMBOL_GP_RELATIVE] = true;
+      mips_lo_relocs[SYMBOL_GP_RELATIVE] = "%gprel(";
+    }
+  else if (TARGET_EXPLICIT_RELOCS)
+    /* Small data constants are kept whole until after reload,
+       then lowered by mips_rewrite_small_data.  */
+    mips_lo_relocs[SYMBOL_GP_RELATIVE] = "%gp_rel(";
+
+  if (TARGET_EXPLICIT_RELOCS)
+    {
+      mips_split_p[SYMBOL_GOT_PAGE_OFST] = true;
+      if (TARGET_NEWABI)
+	{
+	  mips_lo_relocs[SYMBOL_GOTOFF_PAGE] = "%got_page(";
+	  mips_lo_relocs[SYMBOL_GOT_PAGE_OFST] = "%got_ofst(";
+	}
+      else
+	{
+	  mips_lo_relocs[SYMBOL_GOTOFF_PAGE] = "%got(";
+	  mips_lo_relocs[SYMBOL_GOT_PAGE_OFST] = "%lo(";
+	}
+      if (TARGET_MIPS16)
+	/* Expose the use of $28 as soon as possible.  */
+	mips_split_hi_p[SYMBOL_GOT_PAGE_OFST] = true;
+
+      if (TARGET_XGOT)
+	{
+	  /* The HIGH and LO_SUM are matched by special .md patterns.  */
+	  mips_split_p[SYMBOL_GOT_DISP] = true;
+
+	  mips_split_p[SYMBOL_GOTOFF_DISP] = true;
+	  mips_hi_relocs[SYMBOL_GOTOFF_DISP] = "%got_hi(";
+	  mips_lo_relocs[SYMBOL_GOTOFF_DISP] = "%got_lo(";
+
+	  mips_split_p[SYMBOL_GOTOFF_CALL] = true;
+	  mips_hi_relocs[SYMBOL_GOTOFF_CALL] = "%call_hi(";
+	  mips_lo_relocs[SYMBOL_GOTOFF_CALL] = "%call_lo(";
+	}
+      else
+	{
+	  if (TARGET_NEWABI)
+	    mips_lo_relocs[SYMBOL_GOTOFF_DISP] = "%got_disp(";
+	  else
+	    mips_lo_relocs[SYMBOL_GOTOFF_DISP] = "%got(";
+	  mips_lo_relocs[SYMBOL_GOTOFF_CALL] = "%call16(";
+	  if (TARGET_MIPS16)
+	    /* Expose the use of $28 as soon as possible.  */
+	    mips_split_p[SYMBOL_GOT_DISP] = true;
+	}
+    }
+
+  if (TARGET_NEWABI)
+    {
+      mips_split_p[SYMBOL_GOTOFF_LOADGP] = true;
+      mips_hi_relocs[SYMBOL_GOTOFF_LOADGP] = "%hi(%neg(%gp_rel(";
+      mips_lo_relocs[SYMBOL_GOTOFF_LOADGP] = "%lo(%neg(%gp_rel(";
+    }
+
+  mips_lo_relocs[SYMBOL_TLSGD] = "%tlsgd(";
+  mips_lo_relocs[SYMBOL_TLSLDM] = "%tlsldm(";
+
+  mips_split_p[SYMBOL_DTPREL] = true;
+  mips_hi_relocs[SYMBOL_DTPREL] = "%dtprel_hi(";
+  mips_lo_relocs[SYMBOL_DTPREL] = "%dtprel_lo(";
+
+  mips_lo_relocs[SYMBOL_GOTTPREL] = "%gottprel(";
+
+  mips_split_p[SYMBOL_TPREL] = true;
+  mips_hi_relocs[SYMBOL_TPREL] = "%tprel_hi(";
+  mips_lo_relocs[SYMBOL_TPREL] = "%tprel_lo(";
+
+  mips_lo_relocs[SYMBOL_HALF] = "%half(";
+}
+
+/* Print symbolic operand OP, which is part of a HIGH or LO_SUM
+   in context CONTEXT.  RELOCS is the array of relocations to use.  */
+
+static void
+mips_print_operand_reloc (FILE *file, rtx op, enum mips_symbol_context context,
+			  const char **relocs)
+{
+  enum mips_symbol_type symbol_type;
+  const char *p;
+
+  symbol_type = mips_classify_symbolic_expression (op, context);
+  gcc_assert (relocs[symbol_type]);
+
+  fputs (relocs[symbol_type], file);
+  output_addr_const (file, mips_strip_unspec_address (op));
+  for (p = relocs[symbol_type]; *p != 0; p++)
+    if (*p == '(')
+      fputc (')', file);
+}
+
+/* Start a new block with the given asm switch enabled.  If we need
+   to print a directive, emit PREFIX before it and SUFFIX after it.  */
+
+static void
+mips_push_asm_switch_1 (struct mips_asm_switch *asm_switch,
+			const char *prefix, const char *suffix)
+{
+  if (asm_switch->nesting_level == 0)
+    fprintf (asm_out_file, "%s.set\tno%s%s", prefix, asm_switch->name, suffix);
+  asm_switch->nesting_level++;
+}
+
+/* Likewise, but end a block.  */
+
+static void
+mips_pop_asm_switch_1 (struct mips_asm_switch *asm_switch,
+		       const char *prefix, const char *suffix)
+{
+  gcc_assert (asm_switch->nesting_level);
+  asm_switch->nesting_level--;
+  if (asm_switch->nesting_level == 0)
+    fprintf (asm_out_file, "%s.set\t%s%s", prefix, asm_switch->name, suffix);
+}
+
+/* Wrappers around mips_push_asm_switch_1 and mips_pop_asm_switch_1
+   that either print a complete line or print nothing.  */
+
+void
+mips_push_asm_switch (struct mips_asm_switch *asm_switch)
+{
+  mips_push_asm_switch_1 (asm_switch, "\t", "\n");
+}
+
+void
+mips_pop_asm_switch (struct mips_asm_switch *asm_switch)
+{
+  mips_pop_asm_switch_1 (asm_switch, "\t", "\n");
+}
+
+/* Print the text for PRINT_OPERAND punctation character CH to FILE.
+   The punctuation characters are:
+
+   '('	Start a nested ".set noreorder" block.
+   ')'	End a nested ".set noreorder" block.
+   '['	Start a nested ".set noat" block.
+   ']'	End a nested ".set noat" block.
+   '<'	Start a nested ".set nomacro" block.
+   '>'	End a nested ".set nomacro" block.
+   '*'	Behave like %(%< if generating a delayed-branch sequence.
+   '#'	Print a nop if in a ".set noreorder" block.
+   '/'	Like '#', but do nothing within a delayed-branch sequence.
+   '?'	Print "l" if mips_branch_likely is true
+   '~'	Print a nop if mips_branch_likely is true
+   '.'	Print the name of the register with a hard-wired zero (zero or $0).
+   '@'	Print the name of the assembler temporary register (at or $1).
+   '^'	Print the name of the pic call-through register (t9 or $25).
+   '+'	Print the name of the gp register (usually gp or $28).
+   '$'	Print the name of the stack pointer register (sp or $29).
+
+   See also mips_init_print_operand_pucnt.  */
+
+static void
+mips_print_operand_punctuation (FILE *file, int ch)
+{
+  switch (ch)
+    {
+    case '(':
+      mips_push_asm_switch_1 (&mips_noreorder, "", "\n\t");
+      break;
+
+    case ')':
+      mips_pop_asm_switch_1 (&mips_noreorder, "\n\t", "");
+      break;
+
+    case '[':
+      mips_push_asm_switch_1 (&mips_noat, "", "\n\t");
+      break;
+
+    case ']':
+      mips_pop_asm_switch_1 (&mips_noat, "\n\t", "");
+      break;
+
+    case '<':
+      mips_push_asm_switch_1 (&mips_nomacro, "", "\n\t");
+      break;
+
+    case '>':
+      mips_pop_asm_switch_1 (&mips_nomacro, "\n\t", "");
+      break;
+
+    case '*':
+      if (final_sequence != 0)
+	{
+	  mips_print_operand_punctuation (file, '(');
+	  mips_print_operand_punctuation (file, '<');
+	}
+      break;
+
+    case '#':
+      if (mips_noreorder.nesting_level > 0)
+	fputs ("\n\tnop", file);
+      break;
+
+    case '/':
+      /* Print an extra newline so that the delayed insn is separated
+	 from the following ones.  This looks neater and is consistent
+	 with non-nop delayed sequences.  */
+      if (mips_noreorder.nesting_level > 0 && final_sequence == 0)
+	fputs ("\n\tnop\n", file);
+      break;
+
+    case '?':
+      if (mips_branch_likely)
+	putc ('l', file);
+      break;
+
+    case '~':
+      if (mips_branch_likely)
+	fputs ("\n\tnop", file);
+      break;
+
+    case '.':
+      fputs (reg_names[GP_REG_FIRST + 0], file);
+      break;
+
+    case '@':
+      fputs (reg_names[AT_REGNUM], file);
+      break;
+
+    case '^':
+      fputs (reg_names[PIC_FUNCTION_ADDR_REGNUM], file);
+      break;
+
+    case '+':
+      fputs (reg_names[PIC_OFFSET_TABLE_REGNUM], file);
+      break;
+
+    case '$':
+      fputs (reg_names[STACK_POINTER_REGNUM], file);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Initialize mips_print_operand_punct.  */
+
+static void
+mips_init_print_operand_punct (void)
+{
+  const char *p;
+
+  for (p = "()[]<>*#/?~.@^+$"; *p; p++)
+    mips_print_operand_punct[(unsigned char) *p] = true;
+}
+
+/* PRINT_OPERAND prefix LETTER refers to the integer branch instruction
+   associated with condition CODE.  Print the condition part of the
+   opcode to FILE.  */
+
+static void
+mips_print_int_branch_condition (FILE *file, enum rtx_code code, int letter)
+{
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case GT:
+    case GE:
+    case LT:
+    case LE:
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      /* Conveniently, the MIPS names for these conditions are the same
+	 as their RTL equivalents.  */
+      fputs (GET_RTX_NAME (code), file);
+      break;
+
+    default:
+      output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
+      break;
+    }
+}
+
+/* Likewise floating-point branches.  */
+
+static void
+mips_print_float_branch_condition (FILE *file, enum rtx_code code, int letter)
+{
+  switch (code)
+    {
+    case EQ:
+      fputs ("c1f", file);
+      break;
+
+    case NE:
+      fputs ("c1t", file);
+      break;
+
+    default:
+      output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
+      break;
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+mips_print_operand_punct_valid_p (unsigned char code)
+{
+  return mips_print_operand_punct[code];
+}
+
+/* Implement TARGET_PRINT_OPERAND.  The MIPS-specific operand codes are:
+
+   'X'	Print CONST_INT OP in hexadecimal format.
+   'x'	Print the low 16 bits of CONST_INT OP in hexadecimal format.
+   'd'	Print CONST_INT OP in decimal.
+   'm'	Print one less than CONST_INT OP in decimal.
+   'h'	Print the high-part relocation associated with OP, after stripping
+	  any outermost HIGH.
+   'R'	Print the low-part relocation associated with OP.
+   'C'	Print the integer branch condition for comparison OP.
+   'N'	Print the inverse of the integer branch condition for comparison OP.
+   'F'	Print the FPU branch condition for comparison OP.
+   'W'	Print the inverse of the FPU branch condition for comparison OP.
+   'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+   't'	Like 'T', but with the EQ/NE cases reversed
+   'Y'	Print mips_fp_conditions[INTVAL (OP)]
+   'Z'	Print OP and a comma for ISA_HAS_8CC, otherwise print nothing.
+   'q'	Print a DSP accumulator register.
+   'D'	Print the second part of a double-word register or memory operand.
+   'L'	Print the low-order register in a double-word register operand.
+   'M'	Print high-order register in a double-word register operand.
+   'z'	Print $0 if OP is zero, otherwise print OP normally.  */
+
+static void
+mips_print_operand (FILE *file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  if (mips_print_operand_punct_valid_p (letter))
+    {
+      mips_print_operand_punctuation (file, letter);
+      return;
+    }
+
+  gcc_assert (op);
+  code = GET_CODE (op);
+
+  switch (letter)
+    {
+    case 'X':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'x':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'd':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'm':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'h':
+      if (code == HIGH)
+	op = XEXP (op, 0);
+      mips_print_operand_reloc (file, op, SYMBOL_CONTEXT_LEA, mips_hi_relocs);
+      break;
+
+    case 'R':
+      mips_print_operand_reloc (file, op, SYMBOL_CONTEXT_LEA, mips_lo_relocs);
+      break;
+
+    case 'C':
+      mips_print_int_branch_condition (file, code, letter);
+      break;
+
+    case 'N':
+      mips_print_int_branch_condition (file, reverse_condition (code), letter);
+      break;
+
+    case 'F':
+      mips_print_float_branch_condition (file, code, letter);
+      break;
+
+    case 'W':
+      mips_print_float_branch_condition (file, reverse_condition (code),
+					 letter);
+      break;
+
+    case 'T':
+    case 't':
+      {
+	int truth = (code == NE) == (letter == 'T');
+	fputc ("zfnt"[truth * 2 + (GET_MODE (op) == CCmode)], file);
+      }
+      break;
+
+    case 'Y':
+      if (code == CONST_INT && UINTVAL (op) < ARRAY_SIZE (mips_fp_conditions))
+	fputs (mips_fp_conditions[UINTVAL (op)], file);
+      else
+	output_operand_lossage ("'%%%c' is not a valid operand prefix",
+				letter);
+      break;
+
+    case 'Z':
+      if (ISA_HAS_8CC)
+	{
+	  mips_print_operand (file, op, 0);
+	  fputc (',', file);
+	}
+      break;
+
+    case 'q':
+      if (code == REG && MD_REG_P (REGNO (op)))
+	fprintf (file, "$ac0");
+      else if (code == REG && DSP_ACC_REG_P (REGNO (op)))
+	fprintf (file, "$ac%c", reg_names[REGNO (op)][3]);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    default:
+      switch (code)
+	{
+	case REG:
+	  {
+	    unsigned int regno = REGNO (op);
+	    if ((letter == 'M' && TARGET_LITTLE_ENDIAN)
+		|| (letter == 'L' && TARGET_BIG_ENDIAN)
+		|| letter == 'D')
+	      regno++;
+	    else if (letter && letter != 'z' && letter != 'M' && letter != 'L')
+	      output_operand_lossage ("invalid use of '%%%c'", letter);
+	    /* We need to print $0 .. $31 for COP0 registers.  */
+	    if (COP0_REG_P (regno))
+	      fprintf (file, "$%s", &reg_names[regno][4]);
+	    else
+	      fprintf (file, "%s", reg_names[regno]);
+	  }
+	  break;
+
+	case MEM:
+	  if (letter == 'D')
+	    output_address (plus_constant (XEXP (op, 0), 4));
+	  else if (letter && letter != 'z')
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	  else
+	    output_address (XEXP (op, 0));
+	  break;
+
+	default:
+	  if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
+	    fputs (reg_names[GP_REG_FIRST], file);
+	  else if (letter && letter != 'z')
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	  else if (CONST_GP_P (op))
+	    fputs (reg_names[GLOBAL_POINTER_REGNUM], file);
+	  else
+	    output_addr_const (file, mips_strip_unspec_address (op));
+	  break;
+	}
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+mips_print_operand_address (FILE *file, rtx x)
+{
+  struct mips_address_info addr;
+
+  if (mips_classify_address (&addr, x, word_mode, true))
+    switch (addr.type)
+      {
+      case ADDRESS_REG:
+	mips_print_operand (file, addr.offset, 0);
+	fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
+	return;
+
+      case ADDRESS_LO_SUM:
+	mips_print_operand_reloc (file, addr.offset, SYMBOL_CONTEXT_MEM,
+				  mips_lo_relocs);
+	fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
+	return;
+
+      case ADDRESS_CONST_INT:
+	output_addr_const (file, x);
+	fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
+	return;
+
+      case ADDRESS_SYMBOLIC:
+	output_addr_const (file, mips_strip_unspec_address (x));
+	return;
+      }
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_ENCODE_SECTION_INFO.  */
+
+static void
+mips_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      rtx symbol = XEXP (rtl, 0);
+      tree type = TREE_TYPE (decl);
+
+      /* Encode whether the symbol is short or long.  */
+      if ((TARGET_LONG_CALLS && !mips_near_type_p (type))
+	  || mips_far_type_p (type))
+	SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_LONG_CALL;
+    }
+}
+
+/* Implement TARGET_SELECT_RTX_SECTION.  */
+
+static section *
+mips_select_rtx_section (enum machine_mode mode, rtx x,
+			 unsigned HOST_WIDE_INT align)
+{
+  /* ??? Consider using mergeable small data sections.  */
+  if (mips_rtx_constant_in_small_data_p (mode))
+    return get_named_section (NULL, ".sdata", 0);
+
+  return default_elf_select_rtx_section (mode, x, align);
+}
+
+/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
+
+   The complication here is that, with the combination TARGET_ABICALLS
+   && !TARGET_ABSOLUTE_ABICALLS && !TARGET_GPWORD, jump tables will use
+   absolute addresses, and should therefore not be included in the
+   read-only part of a DSO.  Handle such cases by selecting a normal
+   data section instead of a read-only one.  The logic apes that in
+   default_function_rodata_section.  */
+
+static section *
+mips_function_rodata_section (tree decl)
+{
+  if (!TARGET_ABICALLS || TARGET_ABSOLUTE_ABICALLS || TARGET_GPWORD)
+    return default_function_rodata_section (decl);
+
+  if (decl && DECL_SECTION_NAME (decl))
+    {
+      const char *name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (DECL_ONE_ONLY (decl) && strncmp (name, ".gnu.linkonce.t.", 16) == 0)
+	{
+	  char *rname = ASTRDUP (name);
+	  rname[14] = 'd';
+	  return get_section (rname, SECTION_LINKONCE | SECTION_WRITE, decl);
+	}
+      else if (flag_function_sections
+	       && flag_data_sections
+	       && strncmp (name, ".text.", 6) == 0)
+	{
+	  char *rname = ASTRDUP (name);
+	  memcpy (rname + 1, "data", 4);
+	  return get_section (rname, SECTION_WRITE, decl);
+	}
+    }
+  return data_section;
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  */
+
+static bool
+mips_in_small_data_p (const_tree decl)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  /* We don't yet generate small-data references for -mabicalls
+     or VxWorks RTP code.  See the related -G handling in
+     mips_option_override.  */
+  if (TARGET_ABICALLS || TARGET_VXWORKS_RTP)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+
+      /* Reject anything that isn't in a known small-data section.  */
+      name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
+	return false;
+
+      /* If a symbol is defined externally, the assembler will use the
+	 usual -G rules when deciding how to implement macros.  */
+      if (mips_lo_relocs[SYMBOL_GP_RELATIVE] || !DECL_EXTERNAL (decl))
+	return true;
+    }
+  else if (TARGET_EMBEDDED_DATA)
+    {
+      /* Don't put constants into the small data section: we want them
+	 to be in ROM rather than RAM.  */
+      if (TREE_CODE (decl) != VAR_DECL)
+	return false;
+
+      if (TREE_READONLY (decl)
+	  && !TREE_SIDE_EFFECTS (decl)
+	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
+	return false;
+    }
+
+  /* Enforce -mlocal-sdata.  */
+  if (!TARGET_LOCAL_SDATA && !TREE_PUBLIC (decl))
+    return false;
+
+  /* Enforce -mextern-sdata.  */
+  if (!TARGET_EXTERN_SDATA && DECL_P (decl))
+    {
+      if (DECL_EXTERNAL (decl))
+	return false;
+      if (DECL_COMMON (decl) && DECL_INITIAL (decl) == NULL)
+	return false;
+    }
+
+  /* We have traditionally not treated zero-sized objects as small data,
+     so this is now effectively part of the ABI.  */
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  return size > 0 && size <= mips_small_data_threshold;
+}
+
+/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
+   anchors for small data: the GP register acts as an anchor in that
+   case.  We also don't want to use them for PC-relative accesses,
+   where the PC acts as an anchor.  */
+
+static bool
+mips_use_anchors_for_symbol_p (const_rtx symbol)
+{
+  switch (mips_classify_symbol (symbol, SYMBOL_CONTEXT_MEM))
+    {
+    case SYMBOL_PC_RELATIVE:
+    case SYMBOL_GP_RELATIVE:
+      return false;
+
+    default:
+      return default_use_anchors_for_symbol_p (symbol);
+    }
+}
+
+/* The MIPS debug format wants all automatic variables and arguments
+   to be in terms of the virtual frame pointer (stack pointer before
+   any adjustment in the function), while the MIPS 3.0 linker wants
+   the frame pointer to be the stack pointer after the initial
+   adjustment.  So, we do the adjustment here.  The arg pointer (which
+   is eliminated) points to the virtual frame pointer, while the frame
+   pointer (which may be eliminated) points to the stack pointer after
+   the initial adjustments.  */
+
+HOST_WIDE_INT
+mips_debugger_offset (rtx addr, HOST_WIDE_INT offset)
+{
+  rtx offset2 = const0_rtx;
+  rtx reg = eliminate_constant_term (addr, &offset2);
+
+  if (offset == 0)
+    offset = INTVAL (offset2);
+
+  if (reg == stack_pointer_rtx
+      || reg == frame_pointer_rtx
+      || reg == hard_frame_pointer_rtx)
+    {
+      offset -= cfun->machine->frame.total_size;
+      if (reg == hard_frame_pointer_rtx)
+	offset += cfun->machine->frame.hard_frame_pointer_offset;
+    }
+
+  /* sdbout_parms does not want this to crash for unrecognized cases.  */
+#if 0
+  else if (reg != arg_pointer_rtx)
+    fatal_insn ("mips_debugger_offset called with non stack/frame/arg pointer",
+		addr);
+#endif
+
+  return offset;
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL.  */
+
+void
+mips_output_external (FILE *file, tree decl, const char *name)
+{
+  default_elf_asm_output_external (file, decl, name);
+
+  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+     set in order to avoid putting out names that are never really
+     used. */
+  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+    {
+      if (!TARGET_EXPLICIT_RELOCS && mips_in_small_data_p (decl))
+	{
+	  /* When using assembler macros, emit .extern directives for
+	     all small-data externs so that the assembler knows how
+	     big they are.
+
+	     In most cases it would be safe (though pointless) to emit
+	     .externs for other symbols too.  One exception is when an
+	     object is within the -G limit but declared by the user to
+	     be in a section other than .sbss or .sdata.  */
+	  fputs ("\t.extern\t", file);
+	  assemble_name (file, name);
+	  fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n",
+		   int_size_in_bytes (TREE_TYPE (decl)));
+	}
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_SOURCE_FILENAME.  */
+
+static void
+mips_output_filename (FILE *stream, const char *name)
+{
+  /* If we are emitting DWARF-2, let dwarf2out handle the ".file"
+     directives.  */
+  if (write_symbols == DWARF2_DEBUG)
+    return;
+  else if (mips_output_filename_first_time)
+    {
+      mips_output_filename_first_time = 0;
+      num_source_filenames += 1;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+      output_quoted_string (stream, name);
+      putc ('\n', stream);
+    }
+  /* If we are emitting stabs, let dbxout.c handle this (except for
+     the mips_output_filename_first_time case).  */
+  else if (write_symbols == DBX_DEBUG)
+    return;
+  else if (name != current_function_file
+	   && strcmp (name, current_function_file) != 0)
+    {
+      num_source_filenames += 1;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t%d ", num_source_filenames);
+      output_quoted_string (stream, name);
+      putc ('\n', stream);
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void ATTRIBUTE_UNUSED
+mips_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.dtprelword\t", file);
+      break;
+
+    case 8:
+      fputs ("\t.dtpreldword\t", file);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("+0x8000", file);
+}
+
+/* Implement TARGET_DWARF_REGISTER_SPAN.  */
+
+static rtx
+mips_dwarf_register_span (rtx reg)
+{
+  rtx high, low;
+  enum machine_mode mode;
+
+  /* By default, GCC maps increasing register numbers to increasing
+     memory locations, but paired FPRs are always little-endian,
+     regardless of the prevailing endianness.  */
+  mode = GET_MODE (reg);
+  if (FP_REG_P (REGNO (reg))
+      && TARGET_BIG_ENDIAN
+      && MAX_FPRS_PER_FMT > 1
+      && GET_MODE_SIZE (mode) > UNITS_PER_FPREG)
+    {
+      gcc_assert (GET_MODE_SIZE (mode) == UNITS_PER_HWFPVALUE);
+      high = mips_subword (reg, true);
+      low = mips_subword (reg, false);
+      return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, high, low));
+    }
+
+  return NULL_RTX;
+}
+
+/* Implement ASM_OUTPUT_ASCII.  */
+
+void
+mips_output_ascii (FILE *stream, const char *string, size_t len)
+{
+  size_t i;
+  int cur_pos;
+
+  cur_pos = 17;
+  fprintf (stream, "\t.ascii\t\"");
+  for (i = 0; i < len; i++)
+    {
+      int c;
+
+      c = (unsigned char) string[i];
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
+	    {
+	      putc ('\\', stream);
+	      cur_pos++;
+	    }
+	  putc (c, stream);
+	  cur_pos++;
+	}
+      else
+	{
+	  fprintf (stream, "\\%03o", c);
+	  cur_pos += 4;
+	}
+
+      if (cur_pos > 72 && i+1 < len)
+	{
+	  cur_pos = 17;
+	  fprintf (stream, "\"\n\t.ascii\t\"");
+	}
+    }
+  fprintf (stream, "\"\n");
+}
+
+/* Emit either a label, .comm, or .lcomm directive.  When using assembler
+   macros, mark the symbol as written so that mips_asm_output_external
+   won't emit an .extern for it.  STREAM is the output file, NAME is the
+   name of the symbol, INIT_STRING is the string that should be written
+   before the symbol and FINAL_STRING is the string that should be
+   written after it.  FINAL_STRING is a printf format that consumes the
+   remaining arguments.  */
+
+void
+mips_declare_object (FILE *stream, const char *name, const char *init_string,
+		     const char *final_string, ...)
+{
+  va_list ap;
+
+  fputs (init_string, stream);
+  assemble_name (stream, name);
+  va_start (ap, final_string);
+  vfprintf (stream, final_string, ap);
+  va_end (ap);
+
+  if (!TARGET_EXPLICIT_RELOCS)
+    {
+      tree name_tree = get_identifier (name);
+      TREE_ASM_WRITTEN (name_tree) = 1;
+    }
+}
+
+/* Declare a common object of SIZE bytes using asm directive INIT_STRING.
+   NAME is the name of the object and ALIGN is the required alignment
+   in bytes.  TAKES_ALIGNMENT_P is true if the directive takes a third
+   alignment argument.  */
+
+void
+mips_declare_common_object (FILE *stream, const char *name,
+			    const char *init_string,
+			    unsigned HOST_WIDE_INT size,
+			    unsigned int align, bool takes_alignment_p)
+{
+  if (!takes_alignment_p)
+    {
+      size += (align / BITS_PER_UNIT) - 1;
+      size -= size % (align / BITS_PER_UNIT);
+      mips_declare_object (stream, name, init_string,
+			   "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size);
+    }
+  else
+    mips_declare_object (stream, name, init_string,
+			 "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
+			 size, align / BITS_PER_UNIT);
+}
+
+/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON.  This is usually the same as the
+   elfos.h version, but we also need to handle -muninit-const-in-rodata.  */
+
+void
+mips_output_aligned_decl_common (FILE *stream, tree decl, const char *name,
+				 unsigned HOST_WIDE_INT size,
+				 unsigned int align)
+{
+  /* If the target wants uninitialized const declarations in
+     .rdata then don't put them in .comm.  */
+  if (TARGET_EMBEDDED_DATA
+      && TARGET_UNINIT_CONST_IN_RODATA
+      && TREE_CODE (decl) == VAR_DECL
+      && TREE_READONLY (decl)
+      && (DECL_INITIAL (decl) == 0 || DECL_INITIAL (decl) == error_mark_node))
+    {
+      if (TREE_PUBLIC (decl) && DECL_NAME (decl))
+	targetm.asm_out.globalize_label (stream, name);
+
+      switch_to_section (readonly_data_section);
+      ASM_OUTPUT_ALIGN (stream, floor_log2 (align / BITS_PER_UNIT));
+      mips_declare_object (stream, name, "",
+			   ":\n\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n",
+			   size);
+    }
+  else
+    mips_declare_common_object (stream, name, "\n\t.comm\t",
+				size, align, true);
+}
+
+#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
+extern int size_directive_output;
+
+/* Implement ASM_DECLARE_OBJECT_NAME.  This is like most of the standard ELF
+   definitions except that it uses mips_declare_object to emit the label.  */
+
+void
+mips_declare_object_name (FILE *stream, const char *name,
+			  tree decl ATTRIBUTE_UNUSED)
+{
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+#endif
+
+  size_directive_output = 0;
+  if (!flag_inhibit_size_directive && DECL_SIZE (decl))
+    {
+      HOST_WIDE_INT size;
+
+      size_directive_output = 1;
+      size = int_size_in_bytes (TREE_TYPE (decl));
+      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+    }
+
+  mips_declare_object (stream, name, "", ":\n");
+}
+
+/* Implement ASM_FINISH_DECLARE_OBJECT.  This is generic ELF stuff.  */
+
+void
+mips_finish_declare_object (FILE *stream, tree decl, int top_level, int at_end)
+{
+  const char *name;
+
+  name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  if (!flag_inhibit_size_directive
+      && DECL_SIZE (decl) != 0
+      && !at_end
+      && top_level
+      && DECL_INITIAL (decl) == error_mark_node
+      && !size_directive_output)
+    {
+      HOST_WIDE_INT size;
+
+      size_directive_output = 1;
+      size = int_size_in_bytes (TREE_TYPE (decl));
+      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+    }
+}
+#endif
+
+/* Return the FOO in the name of the ".mdebug.FOO" section associated
+   with the current ABI.  */
+
+static const char *
+mips_mdebug_abi_name (void)
+{
+  switch (mips_abi)
+    {
+    case ABI_32:
+      return "abi32";
+    case ABI_O64:
+      return "abiO64";
+    case ABI_N32:
+      return "abiN32";
+    case ABI_64:
+      return "abi64";
+    case ABI_EABI:
+      return TARGET_64BIT ? "eabi64" : "eabi32";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_ASM_FILE_START.  */
+
+static void
+mips_file_start (void)
+{
+  default_file_start ();
+
+  /* Generate a special section to describe the ABI switches used to
+     produce the resultant binary.  This is unnecessary on IRIX and
+     causes unwanted warnings from the native linker.  */
+  if (!TARGET_IRIX6)
+    {
+      /* Record the ABI itself.  Modern versions of binutils encode
+	 this information in the ELF header flags, but GDB needs the
+	 information in order to correctly debug binaries produced by
+	 older binutils.  See the function mips_gdbarch_init in
+	 gdb/mips-tdep.c.  */
+      fprintf (asm_out_file, "\t.section .mdebug.%s\n\t.previous\n",
+	       mips_mdebug_abi_name ());
+
+      /* There is no ELF header flag to distinguish long32 forms of the
+	 EABI from long64 forms.  Emit a special section to help tools
+	 such as GDB.  Do the same for o64, which is sometimes used with
+	 -mlong64.  */
+      if (mips_abi == ABI_EABI || mips_abi == ABI_O64)
+	fprintf (asm_out_file, "\t.section .gcc_compiled_long%d\n"
+		 "\t.previous\n", TARGET_LONG64 ? 64 : 32);
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+      {
+	int attr;
+
+	/* No floating-point operations, -mno-float.  */
+	if (TARGET_NO_FLOAT)
+	  attr = 0;
+	/* Soft-float code, -msoft-float.  */
+	else if (!TARGET_HARD_FLOAT_ABI)
+	  attr = 3;
+	/* Single-float code, -msingle-float.  */
+	else if (!TARGET_DOUBLE_FLOAT)
+	  attr = 2;
+	/* 64-bit FP registers on a 32-bit target, -mips32r2 -mfp64.  */
+	else if (!TARGET_64BIT && TARGET_FLOAT64)
+	  attr = 4;
+	/* Regular FP code, FP regs same size as GP regs, -mdouble-float.  */
+	else
+	  attr = 1;
+
+	fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", attr);
+      }
+#endif
+    }
+
+  /* If TARGET_ABICALLS, tell GAS to generate -KPIC code.  */
+  if (TARGET_ABICALLS)
+    {
+      fprintf (asm_out_file, "\t.abicalls\n");
+      if (TARGET_ABICALLS_PIC0)
+	fprintf (asm_out_file, "\t.option\tpic0\n");
+    }
+
+  if (flag_verbose_asm)
+    fprintf (asm_out_file, "\n%s -G value = %d, Arch = %s, ISA = %d\n",
+	     ASM_COMMENT_START,
+	     mips_small_data_threshold, mips_arch_info->name, mips_isa);
+}
+
+/* Make the last instruction frame-related and note that it performs
+   the operation described by FRAME_PATTERN.  */
+
+static void
+mips_set_frame_expr (rtx frame_pattern)
+{
+  rtx insn;
+
+  insn = get_last_insn ();
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				      frame_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Return a frame-related rtx that stores REG at MEM.
+   REG must be a single register.  */
+
+static rtx
+mips_frame_set (rtx mem, rtx reg)
+{
+  rtx set;
+
+  /* If we're saving the return address register and the DWARF return
+     address column differs from the hard register number, adjust the
+     note reg to refer to the former.  */
+  if (REGNO (reg) == RETURN_ADDR_REGNUM
+      && DWARF_FRAME_RETURN_COLUMN != RETURN_ADDR_REGNUM)
+    reg = gen_rtx_REG (GET_MODE (reg), DWARF_FRAME_RETURN_COLUMN);
+
+  set = gen_rtx_SET (VOIDmode, mem, reg);
+  RTX_FRAME_RELATED_P (set) = 1;
+
+  return set;
+}
+
+/* If a MIPS16e SAVE or RESTORE instruction saves or restores register
+   mips16e_s2_s8_regs[X], it must also save the registers in indexes
+   X + 1 onwards.  Likewise mips16e_a0_a3_regs.  */
+static const unsigned char mips16e_s2_s8_regs[] = {
+  30, 23, 22, 21, 20, 19, 18
+};
+static const unsigned char mips16e_a0_a3_regs[] = {
+  4, 5, 6, 7
+};
+
+/* A list of the registers that can be saved by the MIPS16e SAVE instruction,
+   ordered from the uppermost in memory to the lowest in memory.  */
+static const unsigned char mips16e_save_restore_regs[] = {
+  31, 30, 23, 22, 21, 20, 19, 18, 17, 16, 7, 6, 5, 4
+};
+
+/* Return the index of the lowest X in the range [0, SIZE) for which
+   bit REGS[X] is set in MASK.  Return SIZE if there is no such X.  */
+
+static unsigned int
+mips16e_find_first_register (unsigned int mask, const unsigned char *regs,
+			     unsigned int size)
+{
+  unsigned int i;
+
+  for (i = 0; i < size; i++)
+    if (BITSET_P (mask, regs[i]))
+      break;
+
+  return i;
+}
+
+/* *MASK_PTR is a mask of general-purpose registers and *NUM_REGS_PTR
+   is the number of set bits.  If *MASK_PTR contains REGS[X] for some X
+   in [0, SIZE), adjust *MASK_PTR and *NUM_REGS_PTR so that the same
+   is true for all indexes (X, SIZE).  */
+
+static void
+mips16e_mask_registers (unsigned int *mask_ptr, const unsigned char *regs,
+			unsigned int size, unsigned int *num_regs_ptr)
+{
+  unsigned int i;
+
+  i = mips16e_find_first_register (*mask_ptr, regs, size);
+  for (i++; i < size; i++)
+    if (!BITSET_P (*mask_ptr, regs[i]))
+      {
+	*num_regs_ptr += 1;
+	*mask_ptr |= 1 << regs[i];
+      }
+}
+
+/* Return a simplified form of X using the register values in REG_VALUES.
+   REG_VALUES[R] is the last value assigned to hard register R, or null
+   if R has not been modified.
+
+   This function is rather limited, but is good enough for our purposes.  */
+
+static rtx
+mips16e_collect_propagate_value (rtx x, rtx *reg_values)
+{
+  x = avoid_constant_pool_reference (x);
+
+  if (UNARY_P (x))
+    {
+      rtx x0 = mips16e_collect_propagate_value (XEXP (x, 0), reg_values);
+      return simplify_gen_unary (GET_CODE (x), GET_MODE (x),
+				 x0, GET_MODE (XEXP (x, 0)));
+    }
+
+  if (ARITHMETIC_P (x))
+    {
+      rtx x0 = mips16e_collect_propagate_value (XEXP (x, 0), reg_values);
+      rtx x1 = mips16e_collect_propagate_value (XEXP (x, 1), reg_values);
+      return simplify_gen_binary (GET_CODE (x), GET_MODE (x), x0, x1);
+    }
+
+  if (REG_P (x)
+      && reg_values[REGNO (x)]
+      && !rtx_unstable_p (reg_values[REGNO (x)]))
+    return reg_values[REGNO (x)];
+
+  return x;
+}
+
+/* Return true if (set DEST SRC) stores an argument register into its
+   caller-allocated save slot, storing the number of that argument
+   register in *REGNO_PTR if so.  REG_VALUES is as for
+   mips16e_collect_propagate_value.  */
+
+static bool
+mips16e_collect_argument_save_p (rtx dest, rtx src, rtx *reg_values,
+				 unsigned int *regno_ptr)
+{
+  unsigned int argno, regno;
+  HOST_WIDE_INT offset, required_offset;
+  rtx addr, base;
+
+  /* Check that this is a word-mode store.  */
+  if (!MEM_P (dest) || !REG_P (src) || GET_MODE (dest) != word_mode)
+    return false;
+
+  /* Check that the register being saved is an unmodified argument
+     register.  */
+  regno = REGNO (src);
+  if (!IN_RANGE (regno, GP_ARG_FIRST, GP_ARG_LAST) || reg_values[regno])
+    return false;
+  argno = regno - GP_ARG_FIRST;
+
+  /* Check whether the address is an appropriate stack-pointer or
+     frame-pointer access.  */
+  addr = mips16e_collect_propagate_value (XEXP (dest, 0), reg_values);
+  mips_split_plus (addr, &base, &offset);
+  required_offset = cfun->machine->frame.total_size + argno * UNITS_PER_WORD;
+  if (base == hard_frame_pointer_rtx)
+    required_offset -= cfun->machine->frame.hard_frame_pointer_offset;
+  else if (base != stack_pointer_rtx)
+    return false;
+  if (offset != required_offset)
+    return false;
+
+  *regno_ptr = regno;
+  return true;
+}
+
+/* A subroutine of mips_expand_prologue, called only when generating
+   MIPS16e SAVE instructions.  Search the start of the function for any
+   instructions that save argument registers into their caller-allocated
+   save slots.  Delete such instructions and return a value N such that
+   saving [GP_ARG_FIRST, GP_ARG_FIRST + N) would make all the deleted
+   instructions redundant.  */
+
+static unsigned int
+mips16e_collect_argument_saves (void)
+{
+  rtx reg_values[FIRST_PSEUDO_REGISTER];
+  rtx insn, next, set, dest, src;
+  unsigned int nargs, regno;
+
+  push_topmost_sequence ();
+  nargs = 0;
+  memset (reg_values, 0, sizeof (reg_values));
+  for (insn = get_insns (); insn; insn = next)
+    {
+      next = NEXT_INSN (insn);
+      if (NOTE_P (insn) || DEBUG_INSN_P (insn))
+	continue;
+
+      if (!INSN_P (insn))
+	break;
+
+      set = PATTERN (insn);
+      if (GET_CODE (set) != SET)
+	break;
+
+      dest = SET_DEST (set);
+      src = SET_SRC (set);
+      if (mips16e_collect_argument_save_p (dest, src, reg_values, &regno))
+	{
+	  if (!BITSET_P (cfun->machine->frame.mask, regno))
+	    {
+	      delete_insn (insn);
+	      nargs = MAX (nargs, (regno - GP_ARG_FIRST) + 1);
+	    }
+	}
+      else if (REG_P (dest) && GET_MODE (dest) == word_mode)
+	reg_values[REGNO (dest)]
+	  = mips16e_collect_propagate_value (src, reg_values);
+      else
+	break;
+    }
+  pop_topmost_sequence ();
+
+  return nargs;
+}
+
+/* Return a move between register REGNO and memory location SP + OFFSET.
+   Make the move a load if RESTORE_P, otherwise make it a frame-related
+   store.  */
+
+static rtx
+mips16e_save_restore_reg (bool restore_p, HOST_WIDE_INT offset,
+			  unsigned int regno)
+{
+  rtx reg, mem;
+
+  mem = gen_frame_mem (SImode, plus_constant (stack_pointer_rtx, offset));
+  reg = gen_rtx_REG (SImode, regno);
+  return (restore_p
+	  ? gen_rtx_SET (VOIDmode, reg, mem)
+	  : mips_frame_set (mem, reg));
+}
+
+/* Return RTL for a MIPS16e SAVE or RESTORE instruction; RESTORE_P says which.
+   The instruction must:
+
+     - Allocate or deallocate SIZE bytes in total; SIZE is known
+       to be nonzero.
+
+     - Save or restore as many registers in *MASK_PTR as possible.
+       The instruction saves the first registers at the top of the
+       allocated area, with the other registers below it.
+
+     - Save NARGS argument registers above the allocated area.
+
+   (NARGS is always zero if RESTORE_P.)
+
+   The SAVE and RESTORE instructions cannot save and restore all general
+   registers, so there may be some registers left over for the caller to
+   handle.  Destructively modify *MASK_PTR so that it contains the registers
+   that still need to be saved or restored.  The caller can save these
+   registers in the memory immediately below *OFFSET_PTR, which is a
+   byte offset from the bottom of the allocated stack area.  */
+
+static rtx
+mips16e_build_save_restore (bool restore_p, unsigned int *mask_ptr,
+			    HOST_WIDE_INT *offset_ptr, unsigned int nargs,
+			    HOST_WIDE_INT size)
+{
+  rtx pattern, set;
+  HOST_WIDE_INT offset, top_offset;
+  unsigned int i, regno;
+  int n;
+
+  gcc_assert (cfun->machine->frame.num_fp == 0);
+
+  /* Calculate the number of elements in the PARALLEL.  We need one element
+     for the stack adjustment, one for each argument register save, and one
+     for each additional register move.  */
+  n = 1 + nargs;
+  for (i = 0; i < ARRAY_SIZE (mips16e_save_restore_regs); i++)
+    if (BITSET_P (*mask_ptr, mips16e_save_restore_regs[i]))
+      n++;
+
+  /* Create the final PARALLEL.  */
+  pattern = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (n));
+  n = 0;
+
+  /* Add the stack pointer adjustment.  */
+  set = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		     plus_constant (stack_pointer_rtx,
+				    restore_p ? size : -size));
+  RTX_FRAME_RELATED_P (set) = 1;
+  XVECEXP (pattern, 0, n++) = set;
+
+  /* Stack offsets in the PARALLEL are relative to the old stack pointer.  */
+  top_offset = restore_p ? size : 0;
+
+  /* Save the arguments.  */
+  for (i = 0; i < nargs; i++)
+    {
+      offset = top_offset + i * UNITS_PER_WORD;
+      set = mips16e_save_restore_reg (restore_p, offset, GP_ARG_FIRST + i);
+      XVECEXP (pattern, 0, n++) = set;
+    }
+
+  /* Then fill in the other register moves.  */
+  offset = top_offset;
+  for (i = 0; i < ARRAY_SIZE (mips16e_save_restore_regs); i++)
+    {
+      regno = mips16e_save_restore_regs[i];
+      if (BITSET_P (*mask_ptr, regno))
+	{
+	  offset -= UNITS_PER_WORD;
+	  set = mips16e_save_restore_reg (restore_p, offset, regno);
+	  XVECEXP (pattern, 0, n++) = set;
+	  *mask_ptr &= ~(1 << regno);
+	}
+    }
+
+  /* Tell the caller what offset it should use for the remaining registers.  */
+  *offset_ptr = size + (offset - top_offset);
+
+  gcc_assert (n == XVECLEN (pattern, 0));
+
+  return pattern;
+}
+
+/* PATTERN is a PARALLEL whose first element adds ADJUST to the stack
+   pointer.  Return true if PATTERN matches the kind of instruction
+   generated by mips16e_build_save_restore.  If INFO is nonnull,
+   initialize it when returning true.  */
+
+bool
+mips16e_save_restore_pattern_p (rtx pattern, HOST_WIDE_INT adjust,
+				struct mips16e_save_restore_info *info)
+{
+  unsigned int i, nargs, mask, extra;
+  HOST_WIDE_INT top_offset, save_offset, offset;
+  rtx set, reg, mem, base;
+  int n;
+
+  if (!GENERATE_MIPS16E_SAVE_RESTORE)
+    return false;
+
+  /* Stack offsets in the PARALLEL are relative to the old stack pointer.  */
+  top_offset = adjust > 0 ? adjust : 0;
+
+  /* Interpret all other members of the PARALLEL.  */
+  save_offset = top_offset - UNITS_PER_WORD;
+  mask = 0;
+  nargs = 0;
+  i = 0;
+  for (n = 1; n < XVECLEN (pattern, 0); n++)
+    {
+      /* Check that we have a SET.  */
+      set = XVECEXP (pattern, 0, n);
+      if (GET_CODE (set) != SET)
+	return false;
+
+      /* Check that the SET is a load (if restoring) or a store
+	 (if saving).  */
+      mem = adjust > 0 ? SET_SRC (set) : SET_DEST (set);
+      if (!MEM_P (mem))
+	return false;
+
+      /* Check that the address is the sum of the stack pointer and a
+	 possibly-zero constant offset.  */
+      mips_split_plus (XEXP (mem, 0), &base, &offset);
+      if (base != stack_pointer_rtx)
+	return false;
+
+      /* Check that SET's other operand is a register.  */
+      reg = adjust > 0 ? SET_DEST (set) : SET_SRC (set);
+      if (!REG_P (reg))
+	return false;
+
+      /* Check for argument saves.  */
+      if (offset == top_offset + nargs * UNITS_PER_WORD
+	  && REGNO (reg) == GP_ARG_FIRST + nargs)
+	nargs++;
+      else if (offset == save_offset)
+	{
+	  while (mips16e_save_restore_regs[i++] != REGNO (reg))
+	    if (i == ARRAY_SIZE (mips16e_save_restore_regs))
+	      return false;
+
+	  mask |= 1 << REGNO (reg);
+	  save_offset -= UNITS_PER_WORD;
+	}
+      else
+	return false;
+    }
+
+  /* Check that the restrictions on register ranges are met.  */
+  extra = 0;
+  mips16e_mask_registers (&mask, mips16e_s2_s8_regs,
+			  ARRAY_SIZE (mips16e_s2_s8_regs), &extra);
+  mips16e_mask_registers (&mask, mips16e_a0_a3_regs,
+			  ARRAY_SIZE (mips16e_a0_a3_regs), &extra);
+  if (extra != 0)
+    return false;
+
+  /* Make sure that the topmost argument register is not saved twice.
+     The checks above ensure that the same is then true for the other
+     argument registers.  */
+  if (nargs > 0 && BITSET_P (mask, GP_ARG_FIRST + nargs - 1))
+    return false;
+
+  /* Pass back information, if requested.  */
+  if (info)
+    {
+      info->nargs = nargs;
+      info->mask = mask;
+      info->size = (adjust > 0 ? adjust : -adjust);
+    }
+
+  return true;
+}
+
+/* Add a MIPS16e SAVE or RESTORE register-range argument to string S
+   for the register range [MIN_REG, MAX_REG].  Return a pointer to
+   the null terminator.  */
+
+static char *
+mips16e_add_register_range (char *s, unsigned int min_reg,
+			    unsigned int max_reg)
+{
+  if (min_reg != max_reg)
+    s += sprintf (s, ",%s-%s", reg_names[min_reg], reg_names[max_reg]);
+  else
+    s += sprintf (s, ",%s", reg_names[min_reg]);
+  return s;
+}
+
+/* Return the assembly instruction for a MIPS16e SAVE or RESTORE instruction.
+   PATTERN and ADJUST are as for mips16e_save_restore_pattern_p.  */
+
+const char *
+mips16e_output_save_restore (rtx pattern, HOST_WIDE_INT adjust)
+{
+  static char buffer[300];
+
+  struct mips16e_save_restore_info info;
+  unsigned int i, end;
+  char *s;
+
+  /* Parse the pattern.  */
+  if (!mips16e_save_restore_pattern_p (pattern, adjust, &info))
+    gcc_unreachable ();
+
+  /* Add the mnemonic.  */
+  s = strcpy (buffer, adjust > 0 ? "restore\t" : "save\t");
+  s += strlen (s);
+
+  /* Save the arguments.  */
+  if (info.nargs > 1)
+    s += sprintf (s, "%s-%s,", reg_names[GP_ARG_FIRST],
+		  reg_names[GP_ARG_FIRST + info.nargs - 1]);
+  else if (info.nargs == 1)
+    s += sprintf (s, "%s,", reg_names[GP_ARG_FIRST]);
+
+  /* Emit the amount of stack space to allocate or deallocate.  */
+  s += sprintf (s, "%d", (int) info.size);
+
+  /* Save or restore $16.  */
+  if (BITSET_P (info.mask, 16))
+    s += sprintf (s, ",%s", reg_names[GP_REG_FIRST + 16]);
+
+  /* Save or restore $17.  */
+  if (BITSET_P (info.mask, 17))
+    s += sprintf (s, ",%s", reg_names[GP_REG_FIRST + 17]);
+
+  /* Save or restore registers in the range $s2...$s8, which
+     mips16e_s2_s8_regs lists in decreasing order.  Note that this
+     is a software register range; the hardware registers are not
+     numbered consecutively.  */
+  end = ARRAY_SIZE (mips16e_s2_s8_regs);
+  i = mips16e_find_first_register (info.mask, mips16e_s2_s8_regs, end);
+  if (i < end)
+    s = mips16e_add_register_range (s, mips16e_s2_s8_regs[end - 1],
+				    mips16e_s2_s8_regs[i]);
+
+  /* Save or restore registers in the range $a0...$a3.  */
+  end = ARRAY_SIZE (mips16e_a0_a3_regs);
+  i = mips16e_find_first_register (info.mask, mips16e_a0_a3_regs, end);
+  if (i < end)
+    s = mips16e_add_register_range (s, mips16e_a0_a3_regs[i],
+				    mips16e_a0_a3_regs[end - 1]);
+
+  /* Save or restore $31.  */
+  if (BITSET_P (info.mask, RETURN_ADDR_REGNUM))
+    s += sprintf (s, ",%s", reg_names[RETURN_ADDR_REGNUM]);
+
+  return buffer;
+}
+
+/* Return true if the current function returns its value in a floating-point
+   register in MIPS16 mode.  */
+
+static bool
+mips16_cfun_returns_in_fpr_p (void)
+{
+  tree return_type = DECL_RESULT (current_function_decl);
+  return (TARGET_MIPS16
+	  && TARGET_HARD_FLOAT_ABI
+	  && !aggregate_value_p (return_type, current_function_decl)
+ 	  && mips_return_mode_in_fpr_p (DECL_MODE (return_type)));
+}
+
+/* Return true if predicate PRED is true for at least one instruction.
+   Cache the result in *CACHE, and assume that the result is true
+   if *CACHE is already true.  */
+
+static bool
+mips_find_gp_ref (bool *cache, bool (*pred) (rtx))
+{
+  rtx insn;
+
+  if (!*cache)
+    {
+      push_topmost_sequence ();
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if (USEFUL_INSN_P (insn) && pred (insn))
+	  {
+	    *cache = true;
+	    break;
+	  }
+      pop_topmost_sequence ();
+    }
+  return *cache;
+}
+
+/* Return true if INSN refers to the global pointer in an "inflexible" way.
+   See mips_cfun_has_inflexible_gp_ref_p for details.  */
+
+static bool
+mips_insn_has_inflexible_gp_ref_p (rtx insn)
+{
+  /* Uses of pic_offset_table_rtx in CALL_INSN_FUNCTION_USAGE
+     indicate that the target could be a traditional MIPS
+     lazily-binding stub.  */
+  return find_reg_fusage (insn, USE, pic_offset_table_rtx);
+}
+
+/* Return true if the current function refers to the global pointer
+   in a way that forces $28 to be valid.  This means that we can't
+   change the choice of global pointer, even for NewABI code.
+
+   One example of this (and one which needs several checks) is that
+   $28 must be valid when calling traditional MIPS lazy-binding stubs.
+   (This restriction does not apply to PLTs.)  */
+
+static bool
+mips_cfun_has_inflexible_gp_ref_p (void)
+{
+  /* If the function has a nonlocal goto, $28 must hold the correct
+     global pointer for the target function.  That is, the target
+     of the goto implicitly uses $28.  */
+  if (crtl->has_nonlocal_goto)
+    return true;
+
+  if (TARGET_ABICALLS_PIC2)
+    {
+      /* Symbolic accesses implicitly use the global pointer unless
+	 -mexplicit-relocs is in effect.  JAL macros to symbolic addresses
+	 might go to traditional MIPS lazy-binding stubs.  */
+      if (!TARGET_EXPLICIT_RELOCS)
+	return true;
+
+      /* FUNCTION_PROFILER includes a JAL to _mcount, which again
+	 can be lazily-bound.  */
+      if (crtl->profile)
+	return true;
+
+      /* MIPS16 functions that return in FPRs need to call an
+	 external libgcc routine.  This call is only made explict
+	 during mips_expand_epilogue, and it too might be lazily bound.  */
+      if (mips16_cfun_returns_in_fpr_p ())
+	return true;
+    }
+
+  return mips_find_gp_ref (&cfun->machine->has_inflexible_gp_insn_p,
+			   mips_insn_has_inflexible_gp_ref_p);
+}
+
+/* Return true if INSN refers to the global pointer in a "flexible" way.
+   See mips_cfun_has_flexible_gp_ref_p for details.  */
+
+static bool
+mips_insn_has_flexible_gp_ref_p (rtx insn)
+{
+  return (get_attr_got (insn) != GOT_UNSET
+	  || mips_small_data_pattern_p (PATTERN (insn))
+	  || reg_overlap_mentioned_p (pic_offset_table_rtx, PATTERN (insn)));
+}
+
+/* Return true if the current function references the global pointer,
+   but if those references do not inherently require the global pointer
+   to be $28.  Assume !mips_cfun_has_inflexible_gp_ref_p ().  */
+
+static bool
+mips_cfun_has_flexible_gp_ref_p (void)
+{
+  /* Reload can sometimes introduce constant pool references
+     into a function that otherwise didn't need them.  For example,
+     suppose we have an instruction like:
+
+	(set (reg:DF R1) (float:DF (reg:SI R2)))
+
+     If R2 turns out to be a constant such as 1, the instruction may
+     have a REG_EQUAL note saying that R1 == 1.0.  Reload then has
+     the option of using this constant if R2 doesn't get allocated
+     to a register.
+
+     In cases like these, reload will have added the constant to the
+     pool but no instruction will yet refer to it.  */
+  if (TARGET_ABICALLS_PIC2 && !reload_completed && crtl->uses_const_pool)
+    return true;
+
+  return mips_find_gp_ref (&cfun->machine->has_flexible_gp_insn_p,
+			   mips_insn_has_flexible_gp_ref_p);
+}
+
+/* Return the register that should be used as the global pointer
+   within this function.  Return INVALID_REGNUM if the function
+   doesn't need a global pointer.  */
+
+static unsigned int
+mips_global_pointer (void)
+{
+  unsigned int regno;
+
+  /* $gp is always available unless we're using a GOT.  */
+  if (!TARGET_USE_GOT)
+    return GLOBAL_POINTER_REGNUM;
+
+  /* If there are inflexible references to $gp, we must use the
+     standard register.  */
+  if (mips_cfun_has_inflexible_gp_ref_p ())
+    return GLOBAL_POINTER_REGNUM;
+
+  /* If there are no current references to $gp, then the only uses
+     we can introduce later are those involved in long branches.  */
+  if (TARGET_ABSOLUTE_JUMPS && !mips_cfun_has_flexible_gp_ref_p ())
+    return INVALID_REGNUM;
+
+  /* If the global pointer is call-saved, try to use a call-clobbered
+     alternative.  */
+  if (TARGET_CALL_SAVED_GP && current_function_is_leaf)
+    for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+      if (!df_regs_ever_live_p (regno)
+	  && call_really_used_regs[regno]
+	  && !fixed_regs[regno]
+	  && regno != PIC_FUNCTION_ADDR_REGNUM)
+	return regno;
+
+  return GLOBAL_POINTER_REGNUM;
+}
+
+/* Return true if the current function's prologue must load the global
+   pointer value into pic_offset_table_rtx and store the same value in
+   the function's cprestore slot (if any).
+
+   One problem we have to deal with is that, when emitting GOT-based
+   position independent code, long-branch sequences will need to load
+   the address of the branch target from the GOT.  We don't know until
+   the very end of compilation whether (and where) the function needs
+   long branches, so we must ensure that _any_ branch can access the
+   global pointer in some form.  However, we do not want to pessimize
+   the usual case in which all branches are short.
+
+   We handle this as follows:
+
+   (1) During reload, we set cfun->machine->global_pointer to
+       INVALID_REGNUM if we _know_ that the current function
+       doesn't need a global pointer.  This is only valid if
+       long branches don't need the GOT.
+
+       Otherwise, we assume that we might need a global pointer
+       and pick an appropriate register.
+
+   (2) If cfun->machine->global_pointer != INVALID_REGNUM,
+       we ensure that the global pointer is available at every
+       block boundary bar entry and exit.  We do this in one of two ways:
+
+       - If the function has a cprestore slot, we ensure that this
+	 slot is valid at every branch.  However, as explained in
+	 point (6) below, there is no guarantee that pic_offset_table_rtx
+	 itself is valid if new uses of the global pointer are introduced
+	 after the first post-epilogue split.
+
+	 We guarantee that the cprestore slot is valid by loading it
+	 into a fake register, CPRESTORE_SLOT_REGNUM.  We then make
+	 this register live at every block boundary bar function entry
+	 and exit.  It is then invalid to move the load (and thus the
+	 preceding store) across a block boundary.
+
+       - If the function has no cprestore slot, we guarantee that
+	 pic_offset_table_rtx itself is valid at every branch.
+
+       See mips_eh_uses for the handling of the register liveness.
+
+   (3) During prologue and epilogue generation, we emit "ghost"
+       placeholder instructions to manipulate the global pointer.
+
+   (4) During prologue generation, we set cfun->machine->must_initialize_gp_p
+       and cfun->machine->must_restore_gp_when_clobbered_p if we already know
+       that the function needs a global pointer.  (There is no need to set
+       them earlier than this, and doing it as late as possible leads to
+       fewer false positives.)
+
+   (5) If cfun->machine->must_initialize_gp_p is true during a
+       split_insns pass, we split the ghost instructions into real
+       instructions.  These split instructions can then be optimized in
+       the usual way.  Otherwise, we keep the ghost instructions intact,
+       and optimize for the case where they aren't needed.  We still
+       have the option of splitting them later, if we need to introduce
+       new uses of the global pointer.
+
+       For example, the scheduler ignores a ghost instruction that
+       stores $28 to the stack, but it handles the split form of
+       the ghost instruction as an ordinary store.
+
+   (6) [OldABI only.]  If cfun->machine->must_restore_gp_when_clobbered_p
+       is true during the first post-epilogue split_insns pass, we split
+       calls and restore_gp patterns into instructions that explicitly
+       load pic_offset_table_rtx from the cprestore slot.  Otherwise,
+       we split these patterns into instructions that _don't_ load from
+       the cprestore slot.
+
+       If cfun->machine->must_restore_gp_when_clobbered_p is true at the
+       time of the split, then any instructions that exist at that time
+       can make free use of pic_offset_table_rtx.  However, if we want
+       to introduce new uses of the global pointer after the split,
+       we must explicitly load the value from the cprestore slot, since
+       pic_offset_table_rtx itself might not be valid at a given point
+       in the function.
+
+       The idea is that we want to be able to delete redundant
+       loads from the cprestore slot in the usual case where no
+       long branches are needed.
+
+   (7) If cfun->machine->must_initialize_gp_p is still false at the end
+       of md_reorg, we decide whether the global pointer is needed for
+       long branches.  If so, we set cfun->machine->must_initialize_gp_p
+       to true and split the ghost instructions into real instructions
+       at that stage.
+
+   Note that the ghost instructions must have a zero length for three reasons:
+
+   - Giving the length of the underlying $gp sequence might cause
+     us to use long branches in cases where they aren't really needed.
+
+   - They would perturb things like alignment calculations.
+
+   - More importantly, the hazard detection in md_reorg relies on
+     empty instructions having a zero length.
+
+   If we find a long branch and split the ghost instructions at the
+   end of md_reorg, the split could introduce more long branches.
+   That isn't a problem though, because we still do the split before
+   the final shorten_branches pass.
+
+   This is extremely ugly, but it seems like the best compromise between
+   correctness and efficiency.  */
+
+bool
+mips_must_initialize_gp_p (void)
+{
+  return cfun->machine->must_initialize_gp_p;
+}
+
+/* Return true if REGNO is a register that is ordinarily call-clobbered
+   but must nevertheless be preserved by an interrupt handler.  */
+
+static bool
+mips_interrupt_extra_call_saved_reg_p (unsigned int regno)
+{
+  if (MD_REG_P (regno))
+    return true;
+
+  if (TARGET_DSP && DSP_ACC_REG_P (regno))
+    return true;
+
+  if (GP_REG_P (regno) && !cfun->machine->use_shadow_register_set_p)
+    {
+      /* $0 is hard-wired.  */
+      if (regno == GP_REG_FIRST)
+	return false;
+
+      /* The interrupt handler can treat kernel registers as
+	 scratch registers.  */
+      if (KERNEL_REG_P (regno))
+	return false;
+
+      /* The function will return the stack pointer to its original value
+	 anyway.  */
+      if (regno == STACK_POINTER_REGNUM)
+	return false;
+
+      /* Otherwise, return true for registers that aren't ordinarily
+	 call-clobbered.  */
+      return call_really_used_regs[regno];
+    }
+
+  return false;
+}
+
+/* Return true if the current function should treat register REGNO
+   as call-saved.  */
+
+static bool
+mips_cfun_call_saved_reg_p (unsigned int regno)
+{
+  /* If the user makes an ordinarily-call-saved register global,
+     that register is no longer call-saved.  */
+  if (global_regs[regno])
+    return false;
+
+  /* Interrupt handlers need to save extra registers.  */
+  if (cfun->machine->interrupt_handler_p
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  /* call_insns preserve $28 unless they explicitly say otherwise,
+     so call_really_used_regs[] treats $28 as call-saved.  However,
+     we want the ABI property rather than the default call_insn
+     property here.  */
+  return (regno == GLOBAL_POINTER_REGNUM
+	  ? TARGET_CALL_SAVED_GP
+	  : !call_really_used_regs[regno]);
+}
+
+/* Return true if the function body might clobber register REGNO.
+   We know that REGNO is call-saved.  */
+
+static bool
+mips_cfun_might_clobber_call_saved_reg_p (unsigned int regno)
+{
+  /* Some functions should be treated as clobbering all call-saved
+     registers.  */
+  if (crtl->saves_all_registers)
+    return true;
+
+  /* DF handles cases where a register is explicitly referenced in
+     the rtl.  Incoming values are passed in call-clobbered registers,
+     so we can assume that any live call-saved register is set within
+     the function.  */
+  if (df_regs_ever_live_p (regno))
+    return true;
+
+  /* Check for registers that are clobbered by FUNCTION_PROFILER.
+     These clobbers are not explicit in the rtl.  */
+  if (crtl->profile && MIPS_SAVE_REG_FOR_PROFILING_P (regno))
+    return true;
+
+  /* If we're using a call-saved global pointer, the function's
+     prologue will need to set it up.  */
+  if (cfun->machine->global_pointer == regno)
+    return true;
+
+  /* The function's prologue will need to set the frame pointer if
+     frame_pointer_needed.  */
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return true;
+
+  /* If a MIPS16 function returns a value in FPRs, its epilogue
+     will need to call an external libgcc routine.  This yet-to-be
+     generated call_insn will clobber $31.  */
+  if (regno == RETURN_ADDR_REGNUM && mips16_cfun_returns_in_fpr_p ())
+    return true;
+
+  /* If REGNO is ordinarily call-clobbered, we must assume that any
+     called function could modify it.  */
+  if (cfun->machine->interrupt_handler_p
+      && !current_function_is_leaf
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  return false;
+}
+
+/* Return true if the current function must save register REGNO.  */
+
+static bool
+mips_save_reg_p (unsigned int regno)
+{
+  if (mips_cfun_call_saved_reg_p (regno))
+    {
+      if (mips_cfun_might_clobber_call_saved_reg_p (regno))
+	return true;
+
+      /* Save both registers in an FPR pair if either one is used.  This is
+	 needed for the case when MIN_FPRS_PER_FMT == 1, which allows the odd
+	 register to be used without the even register.  */
+      if (FP_REG_P (regno)
+	  && MAX_FPRS_PER_FMT == 2
+	  && mips_cfun_might_clobber_call_saved_reg_p (regno + 1))
+	return true;
+    }
+
+  /* We need to save the incoming return address if __builtin_eh_return
+     is being used to set a different return address.  */
+  if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
+    return true;
+
+  return false;
+}
+
+/* Populate the current function's mips_frame_info structure.
+
+   MIPS stack frames look like:
+
+	+-------------------------------+
+	|                               |
+	|  incoming stack arguments     |
+	|                               |
+	+-------------------------------+
+	|                               |
+	|  caller-allocated save area   |
+      A |  for register arguments       |
+	|                               |
+	+-------------------------------+ <-- incoming stack pointer
+	|                               |
+	|  callee-allocated save area   |
+      B |  for arguments that are       |
+	|  split between registers and  |
+	|  the stack                    |
+	|                               |
+	+-------------------------------+ <-- arg_pointer_rtx
+	|                               |
+      C |  callee-allocated save area   |
+	|  for register varargs         |
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx
+	|                               |       + cop0_sp_offset
+	|  COP0 reg save area           |	+ UNITS_PER_WORD
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx + acc_sp_offset
+	|                               |       + UNITS_PER_WORD
+	|  accumulator save area        |
+	|                               |
+	+-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
+	|                               |       + UNITS_PER_HWFPVALUE
+	|  FPR save area                |
+	|                               |
+	+-------------------------------+ <-- stack_pointer_rtx + gp_sp_offset
+	|                               |       + UNITS_PER_WORD
+	|  GPR save area                |
+	|                               |
+	+-------------------------------+ <-- frame_pointer_rtx with
+	|                               | \     -fstack-protector
+	|  local variables              |  | var_size
+	|                               | /
+	+-------------------------------+
+	|                               | \
+	|  $gp save area                |  | cprestore_size
+	|                               | /
+      P +-------------------------------+ <-- hard_frame_pointer_rtx for
+	|                               | \     MIPS16 code
+	|  outgoing stack arguments     |  |
+	|                               |  |
+	+-------------------------------+  | args_size
+	|                               |  |
+	|  caller-allocated save area   |  |
+	|  for register arguments       |  |
+	|                               | /
+	+-------------------------------+ <-- stack_pointer_rtx
+					      frame_pointer_rtx without
+					        -fstack-protector
+					      hard_frame_pointer_rtx for
+						non-MIPS16 code.
+
+   At least two of A, B and C will be empty.
+
+   Dynamic stack allocations such as alloca insert data at point P.
+   They decrease stack_pointer_rtx but leave frame_pointer_rtx and
+   hard_frame_pointer_rtx unchanged.  */
+
+static void
+mips_compute_frame_info (void)
+{
+  struct mips_frame_info *frame;
+  HOST_WIDE_INT offset, size;
+  unsigned int regno, i;
+
+  /* Set this function's interrupt properties.  */
+  if (mips_interrupt_type_p (TREE_TYPE (current_function_decl)))
+    {
+      if (!ISA_MIPS32R2)
+	error ("the %<interrupt%> attribute requires a MIPS32r2 processor");
+      else if (TARGET_HARD_FLOAT)
+	error ("the %<interrupt%> attribute requires %<-msoft-float%>");
+      else if (TARGET_MIPS16)
+	error ("interrupt handlers cannot be MIPS16 functions");
+      else
+	{
+	  cfun->machine->interrupt_handler_p = true;
+	  cfun->machine->use_shadow_register_set_p =
+	    mips_use_shadow_register_set_p (TREE_TYPE (current_function_decl));
+	  cfun->machine->keep_interrupts_masked_p =
+	    mips_keep_interrupts_masked_p (TREE_TYPE (current_function_decl));
+	  cfun->machine->use_debug_exception_return_p =
+	    mips_use_debug_exception_return_p (TREE_TYPE
+					       (current_function_decl));
+	}
+    }
+
+  frame = &cfun->machine->frame;
+  memset (frame, 0, sizeof (*frame));
+  size = get_frame_size ();
+
+  cfun->machine->global_pointer = mips_global_pointer ();
+
+  /* The first two blocks contain the outgoing argument area and the $gp save
+     slot.  This area isn't needed in leaf functions, but if the
+     target-independent frame size is nonzero, we have already committed to
+     allocating these in STARTING_FRAME_OFFSET for !FRAME_GROWS_DOWNWARD.  */
+  if ((size == 0 || FRAME_GROWS_DOWNWARD) && current_function_is_leaf)
+    {
+      /* The MIPS 3.0 linker does not like functions that dynamically
+	 allocate the stack and have 0 for STACK_DYNAMIC_OFFSET, since it
+	 looks like we are trying to create a second frame pointer to the
+	 function, so allocate some stack space to make it happy.  */
+      if (cfun->calls_alloca)
+	frame->args_size = REG_PARM_STACK_SPACE (cfun->decl);
+      else
+	frame->args_size = 0;
+      frame->cprestore_size = 0;
+    }
+  else
+    {
+      frame->args_size = crtl->outgoing_args_size;
+      frame->cprestore_size = MIPS_GP_SAVE_AREA_SIZE;
+    }
+  offset = frame->args_size + frame->cprestore_size;
+
+  /* Move above the local variables.  */
+  frame->var_size = MIPS_STACK_ALIGN (size);
+  offset += frame->var_size;
+
+  /* Find out which GPRs we need to save.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (mips_save_reg_p (regno))
+      {
+	frame->num_gp++;
+	frame->mask |= 1 << (regno - GP_REG_FIRST);
+      }
+
+  /* If this function calls eh_return, we must also save and restore the
+     EH data registers.  */
+  if (crtl->calls_eh_return)
+    for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++)
+      {
+	frame->num_gp++;
+	frame->mask |= 1 << (EH_RETURN_DATA_REGNO (i) - GP_REG_FIRST);
+      }
+
+  /* The MIPS16e SAVE and RESTORE instructions have two ranges of registers:
+     $a3-$a0 and $s2-$s8.  If we save one register in the range, we must
+     save all later registers too.  */
+  if (GENERATE_MIPS16E_SAVE_RESTORE)
+    {
+      mips16e_mask_registers (&frame->mask, mips16e_s2_s8_regs,
+ 			      ARRAY_SIZE (mips16e_s2_s8_regs), &frame->num_gp);
+      mips16e_mask_registers (&frame->mask, mips16e_a0_a3_regs,
+ 			      ARRAY_SIZE (mips16e_a0_a3_regs), &frame->num_gp);
+    }
+
+  /* Move above the GPR save area.  */
+  if (frame->num_gp > 0)
+    {
+      offset += MIPS_STACK_ALIGN (frame->num_gp * UNITS_PER_WORD);
+      frame->gp_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Find out which FPRs we need to save.  This loop must iterate over
+     the same space as its companion in mips_for_each_saved_gpr_and_fpr.  */
+  if (TARGET_HARD_FLOAT)
+    for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno += MAX_FPRS_PER_FMT)
+      if (mips_save_reg_p (regno))
+	{
+	  frame->num_fp += MAX_FPRS_PER_FMT;
+	  frame->fmask |= ~(~0 << MAX_FPRS_PER_FMT) << (regno - FP_REG_FIRST);
+	}
+
+  /* Move above the FPR save area.  */
+  if (frame->num_fp > 0)
+    {
+      offset += MIPS_STACK_ALIGN (frame->num_fp * UNITS_PER_FPREG);
+      frame->fp_sp_offset = offset - UNITS_PER_HWFPVALUE;
+    }
+
+  /* Add in space for the interrupt context information.  */
+  if (cfun->machine->interrupt_handler_p)
+    {
+      /* Check HI/LO.  */
+      if (mips_save_reg_p (LO_REGNUM) || mips_save_reg_p (HI_REGNUM))
+	{
+	  frame->num_acc++;
+	  frame->acc_mask |= (1 << 0);
+	}
+
+      /* Check accumulators 1, 2, 3.  */
+      for (i = DSP_ACC_REG_FIRST; i <= DSP_ACC_REG_LAST; i += 2)
+	if (mips_save_reg_p (i) || mips_save_reg_p (i + 1))
+	  {
+	    frame->num_acc++;
+	    frame->acc_mask |= 1 << (((i - DSP_ACC_REG_FIRST) / 2) + 1);
+	  }
+
+      /* All interrupt context functions need space to preserve STATUS.  */
+      frame->num_cop0_regs++;
+
+      /* If we don't keep interrupts masked, we need to save EPC.  */
+      if (!cfun->machine->keep_interrupts_masked_p)
+	frame->num_cop0_regs++;
+    }
+
+  /* Move above the accumulator save area.  */
+  if (frame->num_acc > 0)
+    {
+      /* Each accumulator needs 2 words.  */
+      offset += frame->num_acc * 2 * UNITS_PER_WORD;
+      frame->acc_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Move above the COP0 register save area.  */
+  if (frame->num_cop0_regs > 0)
+    {
+      offset += frame->num_cop0_regs * UNITS_PER_WORD;
+      frame->cop0_sp_offset = offset - UNITS_PER_WORD;
+    }
+
+  /* Move above the callee-allocated varargs save area.  */
+  offset += MIPS_STACK_ALIGN (cfun->machine->varargs_size);
+  frame->arg_pointer_offset = offset;
+
+  /* Move above the callee-allocated area for pretend stack arguments.  */
+  offset += crtl->args.pretend_args_size;
+  frame->total_size = offset;
+
+  /* Work out the offsets of the save areas from the top of the frame.  */
+  if (frame->gp_sp_offset > 0)
+    frame->gp_save_offset = frame->gp_sp_offset - offset;
+  if (frame->fp_sp_offset > 0)
+    frame->fp_save_offset = frame->fp_sp_offset - offset;
+  if (frame->acc_sp_offset > 0)
+    frame->acc_save_offset = frame->acc_sp_offset - offset;
+  if (frame->num_cop0_regs > 0)
+    frame->cop0_save_offset = frame->cop0_sp_offset - offset;
+
+  /* MIPS16 code offsets the frame pointer by the size of the outgoing
+     arguments.  This tends to increase the chances of using unextended
+     instructions for local variables and incoming arguments.  */
+  if (TARGET_MIPS16)
+    frame->hard_frame_pointer_offset = frame->args_size;
+}
+
+/* Return the style of GP load sequence that is being used for the
+   current function.  */
+
+enum mips_loadgp_style
+mips_current_loadgp_style (void)
+{
+  if (!TARGET_USE_GOT || cfun->machine->global_pointer == INVALID_REGNUM)
+    return LOADGP_NONE;
+
+  if (TARGET_RTP_PIC)
+    return LOADGP_RTP;
+
+  if (TARGET_ABSOLUTE_ABICALLS)
+    return LOADGP_ABSOLUTE;
+
+  return TARGET_NEWABI ? LOADGP_NEWABI : LOADGP_OLDABI;
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+static bool
+mips_frame_pointer_required (void)
+{
+  /* If the function contains dynamic stack allocations, we need to
+     use the frame pointer to access the static parts of the frame.  */
+  if (cfun->calls_alloca)
+    return true;
+
+  /* In MIPS16 mode, we need a frame pointer for a large frame; otherwise,
+     reload may be unable to compute the address of a local variable,
+     since there is no way to add a large constant to the stack pointer
+     without using a second temporary register.  */
+  if (TARGET_MIPS16)
+    {
+      mips_compute_frame_info ();
+      if (!SMALL_OPERAND (cfun->machine->frame.total_size))
+	return true;
+    }
+
+  return false;
+}
+
+/* Make sure that we're not trying to eliminate to the wrong hard frame
+   pointer.  */
+
+static bool
+mips_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame pointer
+   or argument pointer.  TO is either the stack pointer or hard frame
+   pointer.  */
+
+HOST_WIDE_INT
+mips_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  mips_compute_frame_info ();
+
+  /* Set OFFSET to the offset from the end-of-prologue stack pointer.  */
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      if (FRAME_GROWS_DOWNWARD)
+	offset = (cfun->machine->frame.args_size
+		  + cfun->machine->frame.cprestore_size
+		  + cfun->machine->frame.var_size);
+      else
+	offset = 0;
+      break;
+
+    case ARG_POINTER_REGNUM:
+      offset = cfun->machine->frame.arg_pointer_offset;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (to == HARD_FRAME_POINTER_REGNUM)
+    offset -= cfun->machine->frame.hard_frame_pointer_offset;
+
+  return offset;
+}
+
+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  */
+
+static void
+mips_extra_live_on_entry (bitmap regs)
+{
+  if (TARGET_USE_GOT)
+    {
+      /* PIC_FUNCTION_ADDR_REGNUM is live if we need it to set up
+	 the global pointer.   */
+      if (!TARGET_ABSOLUTE_ABICALLS)
+	bitmap_set_bit (regs, PIC_FUNCTION_ADDR_REGNUM);
+
+      /* The prologue may set MIPS16_PIC_TEMP_REGNUM to the value of
+	 the global pointer.  */
+      if (TARGET_MIPS16)
+	bitmap_set_bit (regs, MIPS16_PIC_TEMP_REGNUM);
+
+      /* See the comment above load_call<mode> for details.  */
+      bitmap_set_bit (regs, GOT_VERSION_REGNUM);
+    }
+}
+
+/* Implement RETURN_ADDR_RTX.  We do not support moving back to a
+   previous frame.  */
+
+rtx
+mips_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
+}
+
+/* Emit code to change the current function's return address to
+   ADDRESS.  SCRATCH is available as a scratch register, if needed.
+   ADDRESS and SCRATCH are both word-mode GPRs.  */
+
+void
+mips_set_return_address (rtx address, rtx scratch)
+{
+  rtx slot_address;
+
+  gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
+  slot_address = mips_add_offset (scratch, stack_pointer_rtx,
+				  cfun->machine->frame.gp_sp_offset);
+  mips_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
+}
+
+/* Return true if the current function has a cprestore slot.  */
+
+bool
+mips_cfun_has_cprestore_slot_p (void)
+{
+  return (cfun->machine->global_pointer != INVALID_REGNUM
+	  && cfun->machine->frame.cprestore_size > 0);
+}
+
+/* Fill *BASE and *OFFSET such that *BASE + *OFFSET refers to the
+   cprestore slot.  LOAD_P is true if the caller wants to load from
+   the cprestore slot; it is false if the caller wants to store to
+   the slot.  */
+
+static void
+mips_get_cprestore_base_and_offset (rtx *base, HOST_WIDE_INT *offset,
+				    bool load_p)
+{
+  const struct mips_frame_info *frame;
+
+  frame = &cfun->machine->frame;
+  /* .cprestore always uses the stack pointer instead of the frame pointer.
+     We have a free choice for direct stores for non-MIPS16 functions,
+     and for MIPS16 functions whose cprestore slot is in range of the
+     stack pointer.  Using the stack pointer would sometimes give more
+     (early) scheduling freedom, but using the frame pointer would
+     sometimes give more (late) scheduling freedom.  It's hard to
+     predict which applies to a given function, so let's keep things
+     simple.
+
+     Loads must always use the frame pointer in functions that call
+     alloca, and there's little benefit to using the stack pointer
+     otherwise.  */
+  if (frame_pointer_needed && !(TARGET_CPRESTORE_DIRECTIVE && !load_p))
+    {
+      *base = hard_frame_pointer_rtx;
+      *offset = frame->args_size - frame->hard_frame_pointer_offset;
+    }
+  else
+    {
+      *base = stack_pointer_rtx;
+      *offset = frame->args_size;
+    }
+}
+
+/* Return true if X is the load or store address of the cprestore slot;
+   LOAD_P says which.  */
+
+bool
+mips_cprestore_address_p (rtx x, bool load_p)
+{
+  rtx given_base, required_base;
+  HOST_WIDE_INT given_offset, required_offset;
+
+  mips_split_plus (x, &given_base, &given_offset);
+  mips_get_cprestore_base_and_offset (&required_base, &required_offset, load_p);
+  return given_base == required_base && given_offset == required_offset;
+}
+
+/* Return a MEM rtx for the cprestore slot.  LOAD_P is true if we are
+   going to load from it, false if we are going to store to it.
+   Use TEMP as a temporary register if need be.  */
+
+static rtx
+mips_cprestore_slot (rtx temp, bool load_p)
+{
+  rtx base;
+  HOST_WIDE_INT offset;
+
+  mips_get_cprestore_base_and_offset (&base, &offset, load_p);
+  return gen_frame_mem (Pmode, mips_add_offset (temp, base, offset));
+}
+
+/* Emit instructions to save global pointer value GP into cprestore
+   slot MEM.  OFFSET is the offset that MEM applies to the base register.
+
+   MEM may not be a legitimate address.  If it isn't, TEMP is a
+   temporary register that can be used, otherwise it is a SCRATCH.  */
+
+void
+mips_save_gp_to_cprestore_slot (rtx mem, rtx offset, rtx gp, rtx temp)
+{
+  if (TARGET_CPRESTORE_DIRECTIVE)
+    {
+      gcc_assert (gp == pic_offset_table_rtx);
+      emit_insn (gen_cprestore (mem, offset));
+    }
+  else
+    mips_emit_move (mips_cprestore_slot (temp, false), gp);
+}
+
+/* Restore $gp from its save slot, using TEMP as a temporary base register
+   if need be.  This function is for o32 and o64 abicalls only.
+
+   See mips_must_initialize_gp_p for details about how we manage the
+   global pointer.  */
+
+void
+mips_restore_gp_from_cprestore_slot (rtx temp)
+{
+  gcc_assert (TARGET_ABICALLS && TARGET_OLDABI && epilogue_completed);
+
+  if (!cfun->machine->must_restore_gp_when_clobbered_p)
+    {
+      emit_note (NOTE_INSN_DELETED);
+      return;
+    }
+
+  if (TARGET_MIPS16)
+    {
+      mips_emit_move (temp, mips_cprestore_slot (temp, true));
+      mips_emit_move (pic_offset_table_rtx, temp);
+    }
+  else
+    mips_emit_move (pic_offset_table_rtx, mips_cprestore_slot (temp, true));
+  if (!TARGET_EXPLICIT_RELOCS)
+    emit_insn (gen_blockage ());
+}
+
+/* A function to save or store a register.  The first argument is the
+   register and the second is the stack slot.  */
+typedef void (*mips_save_restore_fn) (rtx, rtx);
+
+/* Use FN to save or restore register REGNO.  MODE is the register's
+   mode and OFFSET is the offset of its save slot from the current
+   stack pointer.  */
+
+static void
+mips_save_restore_reg (enum machine_mode mode, int regno,
+		       HOST_WIDE_INT offset, mips_save_restore_fn fn)
+{
+  rtx mem;
+
+  mem = gen_frame_mem (mode, plus_constant (stack_pointer_rtx, offset));
+  fn (gen_rtx_REG (mode, regno), mem);
+}
+
+/* Call FN for each accumlator that is saved by the current function.
+   SP_OFFSET is the offset of the current stack pointer from the start
+   of the frame.  */
+
+static void
+mips_for_each_saved_acc (HOST_WIDE_INT sp_offset, mips_save_restore_fn fn)
+{
+  HOST_WIDE_INT offset;
+  int regno;
+
+  offset = cfun->machine->frame.acc_sp_offset - sp_offset;
+  if (BITSET_P (cfun->machine->frame.acc_mask, 0))
+    {
+      mips_save_restore_reg (word_mode, LO_REGNUM, offset, fn);
+      offset -= UNITS_PER_WORD;
+      mips_save_restore_reg (word_mode, HI_REGNUM, offset, fn);
+      offset -= UNITS_PER_WORD;
+    }
+
+  for (regno = DSP_ACC_REG_FIRST; regno <= DSP_ACC_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.acc_mask,
+		  ((regno - DSP_ACC_REG_FIRST) / 2) + 1))
+      {
+	mips_save_restore_reg (word_mode, regno, offset, fn);
+	offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Call FN for each register that is saved by the current function.
+   SP_OFFSET is the offset of the current stack pointer from the start
+   of the frame.  */
+
+static void
+mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset,
+				 mips_save_restore_fn fn)
+{
+  enum machine_mode fpr_mode;
+  HOST_WIDE_INT offset;
+  int regno;
+
+  /* Save registers starting from high to low.  The debuggers prefer at least
+     the return register be stored at func+4, and also it allows us not to
+     need a nop in the epilogue if at least one register is reloaded in
+     addition to return address.  */
+  offset = cfun->machine->frame.gp_sp_offset - sp_offset;
+  for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+      {
+	/* Record the ra offset for use by mips_function_profiler.  */
+	if (regno == RETURN_ADDR_REGNUM)
+	  cfun->machine->frame.ra_fp_offset = offset + sp_offset;
+	mips_save_restore_reg (word_mode, regno, offset, fn);
+	offset -= UNITS_PER_WORD;
+      }
+
+  /* This loop must iterate over the same space as its companion in
+     mips_compute_frame_info.  */
+  offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+  fpr_mode = (TARGET_SINGLE_FLOAT ? SFmode : DFmode);
+  for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1;
+       regno >= FP_REG_FIRST;
+       regno -= MAX_FPRS_PER_FMT)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+	mips_save_restore_reg (fpr_mode, regno, offset, fn);
+	offset -= GET_MODE_SIZE (fpr_mode);
+      }
+}
+
+/* Return true if a move between register REGNO and its save slot (MEM)
+   can be done in a single move.  LOAD_P is true if we are loading
+   from the slot, false if we are storing to it.  */
+
+static bool
+mips_direct_save_slot_move_p (unsigned int regno, rtx mem, bool load_p)
+{
+  /* There is a specific MIPS16 instruction for saving $31 to the stack.  */
+  if (TARGET_MIPS16 && !load_p && regno == RETURN_ADDR_REGNUM)
+    return false;
+
+  return mips_secondary_reload_class (REGNO_REG_CLASS (regno),
+				      GET_MODE (mem), mem, load_p) == NO_REGS;
+}
+
+/* Emit a move from SRC to DEST, given that one of them is a register
+   save slot and that the other is a register.  TEMP is a temporary
+   GPR of the same mode that is available if need be.  */
+
+void
+mips_emit_save_slot_move (rtx dest, rtx src, rtx temp)
+{
+  unsigned int regno;
+  rtx mem;
+
+  if (REG_P (src))
+    {
+      regno = REGNO (src);
+      mem = dest;
+    }
+  else
+    {
+      regno = REGNO (dest);
+      mem = src;
+    }
+
+  if (regno == cfun->machine->global_pointer && !mips_must_initialize_gp_p ())
+    {
+      /* We don't yet know whether we'll need this instruction or not.
+	 Postpone the decision by emitting a ghost move.  This move
+	 is specifically not frame-related; only the split version is.  */
+      if (TARGET_64BIT)
+	emit_insn (gen_move_gpdi (dest, src));
+      else
+	emit_insn (gen_move_gpsi (dest, src));
+      return;
+    }
+
+  if (regno == HI_REGNUM)
+    {
+      if (REG_P (dest))
+	{
+	  mips_emit_move (temp, src);
+	  if (TARGET_64BIT)
+	    emit_insn (gen_mthisi_di (gen_rtx_REG (TImode, MD_REG_FIRST),
+				      temp, gen_rtx_REG (DImode, LO_REGNUM)));
+	  else
+	    emit_insn (gen_mthisi_di (gen_rtx_REG (DImode, MD_REG_FIRST),
+				      temp, gen_rtx_REG (SImode, LO_REGNUM)));
+	}
+      else
+	{
+	  if (TARGET_64BIT)
+	    emit_insn (gen_mfhidi_ti (temp,
+				      gen_rtx_REG (TImode, MD_REG_FIRST)));
+	  else
+	    emit_insn (gen_mfhisi_di (temp,
+				      gen_rtx_REG (DImode, MD_REG_FIRST)));
+	  mips_emit_move (dest, temp);
+	}
+    }
+  else if (mips_direct_save_slot_move_p (regno, mem, mem == src))
+    mips_emit_move (dest, src);
+  else
+    {
+      gcc_assert (!reg_overlap_mentioned_p (dest, temp));
+      mips_emit_move (temp, src);
+      mips_emit_move (dest, temp);
+    }
+  if (MEM_P (dest))
+    mips_set_frame_expr (mips_frame_set (dest, src));
+}
+
+/* If we're generating n32 or n64 abicalls, and the current function
+   does not use $28 as its global pointer, emit a cplocal directive.
+   Use pic_offset_table_rtx as the argument to the directive.  */
+
+static void
+mips_output_cplocal (void)
+{
+  if (!TARGET_EXPLICIT_RELOCS
+      && mips_must_initialize_gp_p ()
+      && cfun->machine->global_pointer != GLOBAL_POINTER_REGNUM)
+    output_asm_insn (".cplocal %+", 0);
+}
+
+/* Implement TARGET_OUTPUT_FUNCTION_PROLOGUE.  */
+
+static void
+mips_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+
+#ifdef SDB_DEBUGGING_INFO
+  if (debug_info_level != DINFO_LEVEL_TERSE && write_symbols == SDB_DEBUG)
+    SDB_OUTPUT_SOURCE_LINE (file, DECL_SOURCE_LINE (current_function_decl));
+#endif
+
+  /* In MIPS16 mode, we may need to generate a non-MIPS16 stub to handle
+     floating-point arguments.  */
+  if (TARGET_MIPS16
+      && TARGET_HARD_FLOAT_ABI
+      && crtl->args.info.fp_code != 0)
+    mips16_build_function_stub ();
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  mips_start_function_definition (fnname, TARGET_MIPS16);
+
+  /* Output MIPS-specific frame information.  */
+  if (!flag_inhibit_size_directive)
+    {
+      const struct mips_frame_info *frame;
+
+      frame = &cfun->machine->frame;
+
+      /* .frame FRAMEREG, FRAMESIZE, RETREG.  */
+      fprintf (file,
+	       "\t.frame\t%s," HOST_WIDE_INT_PRINT_DEC ",%s\t\t"
+	       "# vars= " HOST_WIDE_INT_PRINT_DEC
+	       ", regs= %d/%d"
+	       ", args= " HOST_WIDE_INT_PRINT_DEC
+	       ", gp= " HOST_WIDE_INT_PRINT_DEC "\n",
+	       reg_names[frame_pointer_needed
+			 ? HARD_FRAME_POINTER_REGNUM
+			 : STACK_POINTER_REGNUM],
+	       (frame_pointer_needed
+		? frame->total_size - frame->hard_frame_pointer_offset
+		: frame->total_size),
+	       reg_names[RETURN_ADDR_REGNUM],
+	       frame->var_size,
+	       frame->num_gp, frame->num_fp,
+	       frame->args_size,
+	       frame->cprestore_size);
+
+      /* .mask MASK, OFFSET.  */
+      fprintf (file, "\t.mask\t0x%08x," HOST_WIDE_INT_PRINT_DEC "\n",
+	       frame->mask, frame->gp_save_offset);
+
+      /* .fmask MASK, OFFSET.  */
+      fprintf (file, "\t.fmask\t0x%08x," HOST_WIDE_INT_PRINT_DEC "\n",
+	       frame->fmask, frame->fp_save_offset);
+    }
+
+  /* Handle the initialization of $gp for SVR4 PIC, if applicable.
+     Also emit the ".set noreorder; .set nomacro" sequence for functions
+     that need it.  */
+  if (mips_must_initialize_gp_p ()
+      && mips_current_loadgp_style () == LOADGP_OLDABI)
+    {
+      if (TARGET_MIPS16)
+	{
+	  /* This is a fixed-form sequence.  The position of the
+	     first two instructions is important because of the
+	     way _gp_disp is defined.  */
+	  output_asm_insn ("li\t$2,%%hi(_gp_disp)", 0);
+	  output_asm_insn ("addiu\t$3,$pc,%%lo(_gp_disp)", 0);
+	  output_asm_insn ("sll\t$2,16", 0);
+	  output_asm_insn ("addu\t$2,$3", 0);
+	}
+      else
+	{
+	  /* .cpload must be in a .set noreorder but not a
+	     .set nomacro block.  */
+	  mips_push_asm_switch (&mips_noreorder);
+	  output_asm_insn (".cpload\t%^", 0);
+	  if (!cfun->machine->all_noreorder_p)
+	    mips_pop_asm_switch (&mips_noreorder);
+	  else
+	    mips_push_asm_switch (&mips_nomacro);
+	}
+    }
+  else if (cfun->machine->all_noreorder_p)
+    {
+      mips_push_asm_switch (&mips_noreorder);
+      mips_push_asm_switch (&mips_nomacro);
+    }
+
+  /* Tell the assembler which register we're using as the global
+     pointer.  This is needed for thunks, since they can use either
+     explicit relocs or assembler macros.  */
+  mips_output_cplocal ();
+}
+
+/* Implement TARGET_OUTPUT_FUNCTION_EPILOGUE.  */
+
+static void
+mips_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+
+  /* Reinstate the normal $gp.  */
+  SET_REGNO (pic_offset_table_rtx, GLOBAL_POINTER_REGNUM);
+  mips_output_cplocal ();
+
+  if (cfun->machine->all_noreorder_p)
+    {
+      mips_pop_asm_switch (&mips_nomacro);
+      mips_pop_asm_switch (&mips_noreorder);
+    }
+
+  /* Get the function name the same way that toplev.c does before calling
+     assemble_start_function.  This is needed so that the name used here
+     exactly matches the name used in ASM_DECLARE_FUNCTION_NAME.  */
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  mips_end_function_definition (fnname);
+}
+
+/* Save register REG to MEM.  Make the instruction frame-related.  */
+
+static void
+mips_save_reg (rtx reg, rtx mem)
+{
+  if (GET_MODE (reg) == DFmode && !TARGET_FLOAT64)
+    {
+      rtx x1, x2;
+
+      if (mips_split_64bit_move_p (mem, reg))
+	mips_split_doubleword_move (mem, reg);
+      else
+	mips_emit_move (mem, reg);
+
+      x1 = mips_frame_set (mips_subword (mem, false),
+			   mips_subword (reg, false));
+      x2 = mips_frame_set (mips_subword (mem, true),
+			   mips_subword (reg, true));
+      mips_set_frame_expr (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x1, x2)));
+    }
+  else
+    mips_emit_save_slot_move (mem, reg, MIPS_PROLOGUE_TEMP (GET_MODE (reg)));
+}
+
+/* The __gnu_local_gp symbol.  */
+
+static GTY(()) rtx mips_gnu_local_gp;
+
+/* If we're generating n32 or n64 abicalls, emit instructions
+   to set up the global pointer.  */
+
+static void
+mips_emit_loadgp (void)
+{
+  rtx addr, offset, incoming_address, base, index, pic_reg;
+
+  pic_reg = TARGET_MIPS16 ? MIPS16_PIC_TEMP : pic_offset_table_rtx;
+  switch (mips_current_loadgp_style ())
+    {
+    case LOADGP_ABSOLUTE:
+      if (mips_gnu_local_gp == NULL)
+	{
+	  mips_gnu_local_gp = gen_rtx_SYMBOL_REF (Pmode, "__gnu_local_gp");
+	  SYMBOL_REF_FLAGS (mips_gnu_local_gp) |= SYMBOL_FLAG_LOCAL;
+	}
+      emit_insn (Pmode == SImode
+		 ? gen_loadgp_absolute_si (pic_reg, mips_gnu_local_gp)
+		 : gen_loadgp_absolute_di (pic_reg, mips_gnu_local_gp));
+      break;
+
+    case LOADGP_OLDABI:
+      /* Added by mips_output_function_prologue.  */
+      break;
+
+    case LOADGP_NEWABI:
+      addr = XEXP (DECL_RTL (current_function_decl), 0);
+      offset = mips_unspec_address (addr, SYMBOL_GOTOFF_LOADGP);
+      incoming_address = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      emit_insn (Pmode == SImode
+		 ? gen_loadgp_newabi_si (pic_reg, offset, incoming_address)
+		 : gen_loadgp_newabi_di (pic_reg, offset, incoming_address));
+      break;
+
+    case LOADGP_RTP:
+      base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (VXWORKS_GOTT_BASE));
+      index = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (VXWORKS_GOTT_INDEX));
+      emit_insn (Pmode == SImode
+		 ? gen_loadgp_rtp_si (pic_reg, base, index)
+		 : gen_loadgp_rtp_di (pic_reg, base, index));
+      break;
+
+    default:
+      return;
+    }
+
+  if (TARGET_MIPS16)
+    emit_insn (gen_copygp_mips16 (pic_offset_table_rtx, pic_reg));
+
+  /* Emit a blockage if there are implicit uses of the GP register.
+     This includes profiled functions, because FUNCTION_PROFILE uses
+     a jal macro.  */
+  if (!TARGET_EXPLICIT_RELOCS || crtl->profile)
+    emit_insn (gen_loadgp_blockage ());
+}
+
+/* A for_each_rtx callback.  Stop the search if *X is a kernel register.  */
+
+static int
+mips_kernel_reg_p (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return REG_P (*x) && KERNEL_REG_P (REGNO (*x));
+}
+
+/* Expand the "prologue" pattern.  */
+
+void
+mips_expand_prologue (void)
+{
+  const struct mips_frame_info *frame;
+  HOST_WIDE_INT size;
+  unsigned int nargs;
+  rtx insn;
+
+  if (cfun->machine->global_pointer != INVALID_REGNUM)
+    {
+      /* Check whether an insn uses pic_offset_table_rtx, either explicitly
+	 or implicitly.  If so, we can commit to using a global pointer
+	 straight away, otherwise we need to defer the decision.  */
+      if (mips_cfun_has_inflexible_gp_ref_p ()
+	  || mips_cfun_has_flexible_gp_ref_p ())
+	{
+	  cfun->machine->must_initialize_gp_p = true;
+	  cfun->machine->must_restore_gp_when_clobbered_p = true;
+	}
+
+      SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer);
+    }
+
+  frame = &cfun->machine->frame;
+  size = frame->total_size;
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = size;
+
+  /* Save the registers.  Allocate up to MIPS_MAX_FIRST_STACK_STEP
+     bytes beforehand; this is enough to cover the register save area
+     without going out of range.  */
+  if (((frame->mask | frame->fmask | frame->acc_mask) != 0)
+      || frame->num_cop0_regs > 0)
+    {
+      HOST_WIDE_INT step1;
+
+      step1 = MIN (size, MIPS_MAX_FIRST_STACK_STEP);
+      if (GENERATE_MIPS16E_SAVE_RESTORE)
+ 	{
+ 	  HOST_WIDE_INT offset;
+ 	  unsigned int mask, regno;
+
+	  /* Try to merge argument stores into the save instruction.  */
+	  nargs = mips16e_collect_argument_saves ();
+
+	  /* Build the save instruction.  */
+	  mask = frame->mask;
+	  insn = mips16e_build_save_restore (false, &mask, &offset,
+					     nargs, step1);
+	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+ 	  size -= step1;
+
+ 	  /* Check if we need to save other registers.  */
+ 	  for (regno = GP_REG_FIRST; regno < GP_REG_LAST; regno++)
+ 	    if (BITSET_P (mask, regno - GP_REG_FIRST))
+ 	      {
+		offset -= UNITS_PER_WORD;
+		mips_save_restore_reg (word_mode, regno,
+				       offset, mips_save_reg);
+ 	      }
+ 	}
+      else
+ 	{
+	  if (cfun->machine->interrupt_handler_p)
+	    {
+	      HOST_WIDE_INT offset;
+	      rtx mem;
+
+	      /* If this interrupt is using a shadow register set, we need to
+		 get the stack pointer from the previous register set.  */
+	      if (cfun->machine->use_shadow_register_set_p)
+		emit_insn (gen_mips_rdpgpr (stack_pointer_rtx,
+					    stack_pointer_rtx));
+
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		{
+		  /* Move from COP0 Cause to K0.  */
+		  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K0_REG_NUM),
+					    gen_rtx_REG (SImode,
+							 COP0_CAUSE_REG_NUM)));
+		  /* Move from COP0 EPC to K1.  */
+		  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K1_REG_NUM),
+					    gen_rtx_REG (SImode,
+							 COP0_EPC_REG_NUM)));
+		}
+
+	      /* Allocate the first part of the frame.  */
+	      insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+				    GEN_INT (-step1));
+	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	      size -= step1;
+
+	      /* Start at the uppermost location for saving.  */
+	      offset = frame->cop0_sp_offset - size;
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		{
+		  /* Push EPC into its stack slot.  */
+		  mem = gen_frame_mem (word_mode,
+				       plus_constant (stack_pointer_rtx,
+						      offset));
+		  mips_emit_move (mem, gen_rtx_REG (word_mode, K1_REG_NUM));
+		  offset -= UNITS_PER_WORD;
+		}
+
+	      /* Move from COP0 Status to K1.  */
+	      emit_insn (gen_cop0_move (gen_rtx_REG (SImode, K1_REG_NUM),
+					gen_rtx_REG (SImode,
+						     COP0_STATUS_REG_NUM)));
+
+	      /* Right justify the RIPL in k0.  */
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		emit_insn (gen_lshrsi3 (gen_rtx_REG (SImode, K0_REG_NUM),
+					gen_rtx_REG (SImode, K0_REG_NUM),
+					GEN_INT (CAUSE_IPL)));
+
+	      /* Push Status into its stack slot.  */
+	      mem = gen_frame_mem (word_mode,
+				   plus_constant (stack_pointer_rtx, offset));
+	      mips_emit_move (mem, gen_rtx_REG (word_mode, K1_REG_NUM));
+	      offset -= UNITS_PER_WORD;
+
+	      /* Insert the RIPL into our copy of SR (k1) as the new IPL.  */
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (6),
+				       GEN_INT (SR_IPL),
+				       gen_rtx_REG (SImode, K0_REG_NUM)));
+
+	      if (!cfun->machine->keep_interrupts_masked_p)
+		/* Enable interrupts by clearing the KSU ERL and EXL bits.
+		   IE is already the correct value, so we don't have to do
+		   anything explicit.  */
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (4),
+				       GEN_INT (SR_EXL),
+				       gen_rtx_REG (SImode, GP_REG_FIRST)));
+	      else
+		/* Disable interrupts by clearing the KSU, ERL, EXL,
+		   and IE bits.  */
+		emit_insn (gen_insvsi (gen_rtx_REG (SImode, K1_REG_NUM),
+				       GEN_INT (5),
+				       GEN_INT (SR_IE),
+				       gen_rtx_REG (SImode, GP_REG_FIRST)));
+	    }
+	  else
+	    {
+	      insn = gen_add3_insn (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-step1));
+	      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	      size -= step1;
+	    }
+	  mips_for_each_saved_acc (size, mips_save_reg);
+	  mips_for_each_saved_gpr_and_fpr (size, mips_save_reg);
+	}
+    }
+
+  /* Allocate the rest of the frame.  */
+  if (size > 0)
+    {
+      if (SMALL_OPERAND (-size))
+	RTX_FRAME_RELATED_P (emit_insn (gen_add3_insn (stack_pointer_rtx,
+						       stack_pointer_rtx,
+						       GEN_INT (-size)))) = 1;
+      else
+	{
+	  mips_emit_move (MIPS_PROLOGUE_TEMP (Pmode), GEN_INT (size));
+	  if (TARGET_MIPS16)
+	    {
+	      /* There are no instructions to add or subtract registers
+		 from the stack pointer, so use the frame pointer as a
+		 temporary.  We should always be using a frame pointer
+		 in this case anyway.  */
+	      gcc_assert (frame_pointer_needed);
+	      mips_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx);
+	      emit_insn (gen_sub3_insn (hard_frame_pointer_rtx,
+					hard_frame_pointer_rtx,
+					MIPS_PROLOGUE_TEMP (Pmode)));
+	      mips_emit_move (stack_pointer_rtx, hard_frame_pointer_rtx);
+	    }
+	  else
+	    emit_insn (gen_sub3_insn (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      MIPS_PROLOGUE_TEMP (Pmode)));
+
+	  /* Describe the combined effect of the previous instructions.  */
+	  mips_set_frame_expr
+	    (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			  plus_constant (stack_pointer_rtx, -size)));
+	}
+    }
+
+  /* Set up the frame pointer, if we're using one.  */
+  if (frame_pointer_needed)
+    {
+      HOST_WIDE_INT offset;
+
+      offset = frame->hard_frame_pointer_offset;
+      if (offset == 0)
+	{
+	  insn = mips_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else if (SMALL_OPERAND (offset))
+	{
+	  insn = gen_add3_insn (hard_frame_pointer_rtx,
+				stack_pointer_rtx, GEN_INT (offset));
+	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	}
+      else
+	{
+	  mips_emit_move (MIPS_PROLOGUE_TEMP (Pmode), GEN_INT (offset));
+	  mips_emit_move (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
+				    hard_frame_pointer_rtx,
+				    MIPS_PROLOGUE_TEMP (Pmode)));
+	  mips_set_frame_expr
+	    (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
+			  plus_constant (stack_pointer_rtx, offset)));
+	}
+    }
+
+  mips_emit_loadgp ();
+
+  /* Initialize the $gp save slot.  */
+  if (mips_cfun_has_cprestore_slot_p ())
+    {
+      rtx base, mem, gp, temp;
+      HOST_WIDE_INT offset;
+
+      mips_get_cprestore_base_and_offset (&base, &offset, false);
+      mem = gen_frame_mem (Pmode, plus_constant (base, offset));
+      gp = TARGET_MIPS16 ? MIPS16_PIC_TEMP : pic_offset_table_rtx;
+      temp = (SMALL_OPERAND (offset)
+	      ? gen_rtx_SCRATCH (Pmode)
+	      : MIPS_PROLOGUE_TEMP (Pmode));
+      emit_insn (gen_potential_cprestore (mem, GEN_INT (offset), gp, temp));
+
+      mips_get_cprestore_base_and_offset (&base, &offset, true);
+      mem = gen_frame_mem (Pmode, plus_constant (base, offset));
+      emit_insn (gen_use_cprestore (mem));
+    }
+
+  /* We need to search back to the last use of K0 or K1.  */
+  if (cfun->machine->interrupt_handler_p)
+    {
+      for (insn = get_last_insn (); insn != NULL_RTX; insn = PREV_INSN (insn))
+	if (INSN_P (insn)
+	    && for_each_rtx (&PATTERN (insn), mips_kernel_reg_p, NULL))
+	  break;
+      /* Emit a move from K1 to COP0 Status after insn.  */
+      gcc_assert (insn != NULL_RTX);
+      emit_insn_after (gen_cop0_move (gen_rtx_REG (SImode, COP0_STATUS_REG_NUM),
+				      gen_rtx_REG (SImode, K1_REG_NUM)),
+		       insn);
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  */
+  if (crtl->profile)
+    emit_insn (gen_blockage ());
+}
+
+/* Emit instructions to restore register REG from slot MEM.  */
+
+static void
+mips_restore_reg (rtx reg, rtx mem)
+{
+  /* There's no MIPS16 instruction to load $31 directly.  Load into
+     $7 instead and adjust the return insn appropriately.  */
+  if (TARGET_MIPS16 && REGNO (reg) == RETURN_ADDR_REGNUM)
+    reg = gen_rtx_REG (GET_MODE (reg), GP_REG_FIRST + 7);
+
+  mips_emit_save_slot_move (reg, mem, MIPS_EPILOGUE_TEMP (GET_MODE (reg)));
+}
+
+/* Emit any instructions needed before a return.  */
+
+void
+mips_expand_before_return (void)
+{
+  /* When using a call-clobbered gp, we start out with unified call
+     insns that include instructions to restore the gp.  We then split
+     these unified calls after reload.  These split calls explicitly
+     clobber gp, so there is no need to define
+     PIC_OFFSET_TABLE_REG_CALL_CLOBBERED.
+
+     For consistency, we should also insert an explicit clobber of $28
+     before return insns, so that the post-reload optimizers know that
+     the register is not live on exit.  */
+  if (TARGET_CALL_CLOBBERED_GP)
+    emit_clobber (pic_offset_table_rtx);
+}
+
+/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
+   says which.  */
+
+void
+mips_expand_epilogue (bool sibcall_p)
+{
+  const struct mips_frame_info *frame;
+  HOST_WIDE_INT step1, step2;
+  rtx base, target, insn;
+
+  if (!sibcall_p && mips_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  /* In MIPS16 mode, if the return value should go into a floating-point
+     register, we need to call a helper routine to copy it over.  */
+  if (mips16_cfun_returns_in_fpr_p ())
+    mips16_copy_fpr_return_value ();
+
+  /* Split the frame into two.  STEP1 is the amount of stack we should
+     deallocate before restoring the registers.  STEP2 is the amount we
+     should deallocate afterwards.
+
+     Start off by assuming that no registers need to be restored.  */
+  frame = &cfun->machine->frame;
+  step1 = frame->total_size;
+  step2 = 0;
+
+  /* Work out which register holds the frame address.  */
+  if (!frame_pointer_needed)
+    base = stack_pointer_rtx;
+  else
+    {
+      base = hard_frame_pointer_rtx;
+      step1 -= frame->hard_frame_pointer_offset;
+    }
+
+  /* If we need to restore registers, deallocate as much stack as
+     possible in the second step without going out of range.  */
+  if ((frame->mask | frame->fmask | frame->acc_mask) != 0
+      || frame->num_cop0_regs > 0)
+    {
+      step2 = MIN (step1, MIPS_MAX_FIRST_STACK_STEP);
+      step1 -= step2;
+    }
+
+  /* Set TARGET to BASE + STEP1.  */
+  target = base;
+  if (step1 > 0)
+    {
+      rtx adjust;
+
+      /* Get an rtx for STEP1 that we can add to BASE.  */
+      adjust = GEN_INT (step1);
+      if (!SMALL_OPERAND (step1))
+	{
+	  mips_emit_move (MIPS_EPILOGUE_TEMP (Pmode), adjust);
+	  adjust = MIPS_EPILOGUE_TEMP (Pmode);
+	}
+
+      /* Normal mode code can copy the result straight into $sp.  */
+      if (!TARGET_MIPS16)
+	target = stack_pointer_rtx;
+
+      emit_insn (gen_add3_insn (target, base, adjust));
+    }
+
+  /* Copy TARGET into the stack pointer.  */
+  if (target != stack_pointer_rtx)
+    mips_emit_move (stack_pointer_rtx, target);
+
+  /* If we're using addressing macros, $gp is implicitly used by all
+     SYMBOL_REFs.  We must emit a blockage insn before restoring $gp
+     from the stack.  */
+  if (TARGET_CALL_SAVED_GP && !TARGET_EXPLICIT_RELOCS)
+    emit_insn (gen_blockage ());
+
+  if (GENERATE_MIPS16E_SAVE_RESTORE && frame->mask != 0)
+    {
+      unsigned int regno, mask;
+      HOST_WIDE_INT offset;
+      rtx restore;
+
+      /* Generate the restore instruction.  */
+      mask = frame->mask;
+      restore = mips16e_build_save_restore (true, &mask, &offset, 0, step2);
+
+      /* Restore any other registers manually.  */
+      for (regno = GP_REG_FIRST; regno < GP_REG_LAST; regno++)
+ 	if (BITSET_P (mask, regno - GP_REG_FIRST))
+ 	  {
+ 	    offset -= UNITS_PER_WORD;
+ 	    mips_save_restore_reg (word_mode, regno, offset, mips_restore_reg);
+ 	  }
+
+      /* Restore the remaining registers and deallocate the final bit
+	 of the frame.  */
+      emit_insn (restore);
+    }
+  else
+    {
+      /* Restore the registers.  */
+      mips_for_each_saved_acc (frame->total_size - step2, mips_restore_reg);
+      mips_for_each_saved_gpr_and_fpr (frame->total_size - step2,
+				       mips_restore_reg);
+
+      if (cfun->machine->interrupt_handler_p)
+	{
+	  HOST_WIDE_INT offset;
+	  rtx mem;
+
+	  offset = frame->cop0_sp_offset - (frame->total_size - step2);
+	  if (!cfun->machine->keep_interrupts_masked_p)
+	    {
+	      /* Restore the original EPC.  */
+	      mem = gen_frame_mem (word_mode,
+				   plus_constant (stack_pointer_rtx, offset));
+	      mips_emit_move (gen_rtx_REG (word_mode, K0_REG_NUM), mem);
+	      offset -= UNITS_PER_WORD;
+
+	      /* Move to COP0 EPC.  */
+	      emit_insn (gen_cop0_move (gen_rtx_REG (SImode, COP0_EPC_REG_NUM),
+					gen_rtx_REG (SImode, K0_REG_NUM)));
+	    }
+
+	  /* Restore the original Status.  */
+	  mem = gen_frame_mem (word_mode,
+			       plus_constant (stack_pointer_rtx, offset));
+	  mips_emit_move (gen_rtx_REG (word_mode, K0_REG_NUM), mem);
+	  offset -= UNITS_PER_WORD;
+
+	  /* If we don't use shoadow register set, we need to update SP.  */
+	  if (!cfun->machine->use_shadow_register_set_p && step2 > 0)
+	    emit_insn (gen_add3_insn (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      GEN_INT (step2)));
+
+	  /* Move to COP0 Status.  */
+	  emit_insn (gen_cop0_move (gen_rtx_REG (SImode, COP0_STATUS_REG_NUM),
+				    gen_rtx_REG (SImode, K0_REG_NUM)));
+	}
+      else
+	{
+	  /* Deallocate the final bit of the frame.  */
+	  if (step2 > 0)
+	    emit_insn (gen_add3_insn (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      GEN_INT (step2)));
+	}
+    }
+
+  /* Add in the __builtin_eh_return stack adjustment.  We need to
+     use a temporary in MIPS16 code.  */
+  if (crtl->calls_eh_return)
+    {
+      if (TARGET_MIPS16)
+	{
+	  mips_emit_move (MIPS_EPILOGUE_TEMP (Pmode), stack_pointer_rtx);
+	  emit_insn (gen_add3_insn (MIPS_EPILOGUE_TEMP (Pmode),
+				    MIPS_EPILOGUE_TEMP (Pmode),
+				    EH_RETURN_STACKADJ_RTX));
+	  mips_emit_move (stack_pointer_rtx, MIPS_EPILOGUE_TEMP (Pmode));
+	}
+      else
+	emit_insn (gen_add3_insn (stack_pointer_rtx,
+				  stack_pointer_rtx,
+				  EH_RETURN_STACKADJ_RTX));
+    }
+
+  if (!sibcall_p)
+    {
+      mips_expand_before_return ();
+      if (cfun->machine->interrupt_handler_p)
+	{
+	  /* Interrupt handlers generate eret or deret.  */
+	  if (cfun->machine->use_debug_exception_return_p)
+	    emit_jump_insn (gen_mips_deret ());
+	  else
+	    emit_jump_insn (gen_mips_eret ());
+	}
+      else
+	{
+	  unsigned int regno;
+
+	  /* When generating MIPS16 code, the normal
+	     mips_for_each_saved_gpr_and_fpr path will restore the return
+	     address into $7 rather than $31.  */
+	  if (TARGET_MIPS16
+	      && !GENERATE_MIPS16E_SAVE_RESTORE
+	      && BITSET_P (frame->mask, RETURN_ADDR_REGNUM))
+	    regno = GP_REG_FIRST + 7;
+	  else
+	    regno = RETURN_ADDR_REGNUM;
+	  emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, regno)));
+	}
+    }
+
+  /* Search from the beginning to the first use of K0 or K1.  */
+  if (cfun->machine->interrupt_handler_p
+      && !cfun->machine->keep_interrupts_masked_p)
+    {
+      for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+	if (INSN_P (insn)
+	    && for_each_rtx (&PATTERN(insn), mips_kernel_reg_p, NULL))
+	  break;
+      gcc_assert (insn != NULL_RTX);
+      /* Insert disable interrupts before the first use of K0 or K1.  */
+      emit_insn_before (gen_mips_di (), insn);
+      emit_insn_before (gen_mips_ehb (), insn);
+    }
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+bool
+mips_can_use_return_insn (void)
+{
+  /* Interrupt handlers need to go through the epilogue.  */
+  if (cfun->machine->interrupt_handler_p)
+    return false;
+
+  if (!reload_completed)
+    return false;
+
+  if (crtl->profile)
+    return false;
+
+  /* In MIPS16 mode, a function that returns a floating-point value
+     needs to arrange to copy the return value into the floating-point
+     registers.  */
+  if (mips16_cfun_returns_in_fpr_p ())
+    return false;
+
+  return cfun->machine->frame.total_size == 0;
+}
+
+/* Return true if register REGNO can store a value of mode MODE.
+   The result of this function is cached in mips_hard_regno_mode_ok.  */
+
+static bool
+mips_hard_regno_mode_ok_p (unsigned int regno, enum machine_mode mode)
+{
+  unsigned int size;
+  enum mode_class mclass;
+
+  if (mode == CCV2mode)
+    return (ISA_HAS_8CC
+	    && ST_REG_P (regno)
+	    && (regno - ST_REG_FIRST) % 2 == 0);
+
+  if (mode == CCV4mode)
+    return (ISA_HAS_8CC
+	    && ST_REG_P (regno)
+	    && (regno - ST_REG_FIRST) % 4 == 0);
+
+  if (mode == CCmode)
+    {
+      if (!ISA_HAS_8CC)
+	return regno == FPSW_REGNUM;
+
+      return (ST_REG_P (regno)
+	      || GP_REG_P (regno)
+	      || FP_REG_P (regno));
+    }
+
+  size = GET_MODE_SIZE (mode);
+  mclass = GET_MODE_CLASS (mode);
+
+  if (GP_REG_P (regno))
+    return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
+
+  if (FP_REG_P (regno)
+      && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0
+	  || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG)))
+    {
+      /* Allow TFmode for CCmode reloads.  */
+      if (mode == TFmode && ISA_HAS_8CC)
+	return true;
+
+      /* Allow 64-bit vector modes for Loongson-2E/2F.  */
+      if (TARGET_LOONGSON_VECTORS
+	  && (mode == V2SImode
+	      || mode == V4HImode
+	      || mode == V8QImode
+	      || mode == DImode))
+	return true;
+
+      if (mclass == MODE_FLOAT
+	  || mclass == MODE_COMPLEX_FLOAT
+	  || mclass == MODE_VECTOR_FLOAT)
+	return size <= UNITS_PER_FPVALUE;
+
+      /* Allow integer modes that fit into a single register.  We need
+	 to put integers into FPRs when using instructions like CVT
+	 and TRUNC.  There's no point allowing sizes smaller than a word,
+	 because the FPU has no appropriate load/store instructions.  */
+      if (mclass == MODE_INT)
+	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
+    }
+
+  if (ACC_REG_P (regno)
+      && (INTEGRAL_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode)))
+    {
+      if (MD_REG_P (regno))
+	{
+	  /* After a multiplication or division, clobbering HI makes
+	     the value of LO unpredictable, and vice versa.  This means
+	     that, for all interesting cases, HI and LO are effectively
+	     a single register.
+
+	     We model this by requiring that any value that uses HI
+	     also uses LO.  */
+	  if (size <= UNITS_PER_WORD * 2)
+	    return regno == (size <= UNITS_PER_WORD ? LO_REGNUM : MD_REG_FIRST);
+	}
+      else
+	{
+	  /* DSP accumulators do not have the same restrictions as
+	     HI and LO, so we can treat them as normal doubleword
+	     registers.  */
+	  if (size <= UNITS_PER_WORD)
+	    return true;
+
+	  if (size <= UNITS_PER_WORD * 2
+	      && ((regno - DSP_ACC_REG_FIRST) & 1) == 0)
+	    return true;
+	}
+    }
+
+  if (ALL_COP_REG_P (regno))
+    return mclass == MODE_INT && size <= UNITS_PER_WORD;
+
+  if (regno == GOT_VERSION_REGNUM)
+    return mode == SImode;
+
+  return false;
+}
+
+/* Implement HARD_REGNO_NREGS.  */
+
+unsigned int
+mips_hard_regno_nregs (int regno, enum machine_mode mode)
+{
+  if (ST_REG_P (regno))
+    /* The size of FP status registers is always 4, because they only hold
+       CCmode values, and CCmode is always considered to be 4 bytes wide.  */
+    return (GET_MODE_SIZE (mode) + 3) / 4;
+
+  if (FP_REG_P (regno))
+    return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+
+  /* All other registers are word-sized.  */
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Implement CLASS_MAX_NREGS, taking the maximum of the cases
+   in mips_hard_regno_nregs.  */
+
+int
+mips_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+{
+  int size;
+  HARD_REG_SET left;
+
+  size = 0x8000;
+  COPY_HARD_REG_SET (left, reg_class_contents[(int) rclass]);
+  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) ST_REGS]))
+    {
+      size = MIN (size, 4);
+      AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) ST_REGS]);
+    }
+  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
+    {
+      size = MIN (size, UNITS_PER_FPREG);
+      AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) FP_REGS]);
+    }
+  if (!hard_reg_set_empty_p (left))
+    size = MIN (size, UNITS_PER_WORD);
+  return (GET_MODE_SIZE (mode) + size - 1) / size;
+}
+
+/* Implement CANNOT_CHANGE_MODE_CLASS.  */
+
+bool
+mips_cannot_change_mode_class (enum machine_mode from ATTRIBUTE_UNUSED,
+			       enum machine_mode to ATTRIBUTE_UNUSED,
+			       enum reg_class rclass)
+{
+  /* There are several problems with changing the modes of values
+     in floating-point registers:
+
+     - When a multi-word value is stored in paired floating-point
+       registers, the first register always holds the low word.
+       We therefore can't allow FPRs to change between single-word
+       and multi-word modes on big-endian targets.
+
+     - GCC assumes that each word of a multiword register can be accessed
+       individually using SUBREGs.  This is not true for floating-point
+       registers if they are bigger than a word.
+
+     - Loading a 32-bit value into a 64-bit floating-point register
+       will not sign-extend the value, despite what LOAD_EXTEND_OP says.
+       We can't allow FPRs to change from SImode to to a wider mode on
+       64-bit targets.
+
+     - If the FPU has already interpreted a value in one format, we must
+       not ask it to treat the value as having a different format.
+
+     We therefore disallow all mode changes involving FPRs.  */
+  return reg_classes_intersect_p (FP_REGS, rclass);
+}
+
+/* Implement target hook small_register_classes_for_mode_p.  */
+
+static bool
+mips_small_register_classes_for_mode_p (enum machine_mode mode
+					ATTRIBUTE_UNUSED)
+{
+  return TARGET_MIPS16;
+}
+
+/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction.  */
+
+static bool
+mips_mode_ok_for_mov_fmt_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case SFmode:
+      return TARGET_HARD_FLOAT;
+
+    case DFmode:
+      return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
+
+    case V2SFmode:
+      return TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement MODES_TIEABLE_P.  */
+
+bool
+mips_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  /* FPRs allow no mode punning, so it's not worth tying modes if we'd
+     prefer to put one of them in FPRs.  */
+  return (mode1 == mode2
+	  || (!mips_mode_ok_for_mov_fmt_p (mode1)
+	      && !mips_mode_ok_for_mov_fmt_p (mode2)));
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+mips_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (mips_dangerous_for_la25_p (x) && reg_class_subset_p (LEA_REGS, rclass))
+    return LEA_REGS;
+
+  if (reg_class_subset_p (FP_REGS, rclass)
+      && mips_mode_ok_for_mov_fmt_p (GET_MODE (x)))
+    return FP_REGS;
+
+  if (reg_class_subset_p (GR_REGS, rclass))
+    rclass = GR_REGS;
+
+  if (TARGET_MIPS16 && reg_class_subset_p (M16_REGS, rclass))
+    rclass = M16_REGS;
+
+  return rclass;
+}
+
+/* RCLASS is a class involved in a REGISTER_MOVE_COST calculation.
+   Return a "canonical" class to represent it in later calculations.  */
+
+static reg_class_t
+mips_canonicalize_move_class (reg_class_t rclass)
+{
+  /* All moves involving accumulator registers have the same cost.  */
+  if (reg_class_subset_p (rclass, ACC_REGS))
+    rclass = ACC_REGS;
+
+  /* Likewise promote subclasses of general registers to the most
+     interesting containing class.  */
+  if (TARGET_MIPS16 && reg_class_subset_p (rclass, M16_REGS))
+    rclass = M16_REGS;
+  else if (reg_class_subset_p (rclass, GENERAL_REGS))
+    rclass = GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Return the cost of moving a value of mode MODE from a register of
+   class FROM to a GPR.  Return 0 for classes that are unions of other
+   classes handled by this function.  */
+
+static int
+mips_move_to_gpr_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t from)
+{
+  switch (from)
+    {
+    case GENERAL_REGS:
+      /* A MIPS16 MOVE instruction, or a non-MIPS16 MOVE macro.  */
+      return 2;
+
+    case ACC_REGS:
+      /* MFLO and MFHI.  */
+      return 6;
+
+    case FP_REGS:
+      /* MFC1, etc.  */
+      return 4;
+
+    case ST_REGS:
+      /* LUI followed by MOVF.  */
+      return 4;
+
+    case COP0_REGS:
+    case COP2_REGS:
+    case COP3_REGS:
+      /* This choice of value is historical.  */
+      return 5;
+
+    default:
+      return 0;
+    }
+}
+
+/* Return the cost of moving a value of mode MODE from a GPR to a
+   register of class TO.  Return 0 for classes that are unions of
+   other classes handled by this function.  */
+
+static int
+mips_move_from_gpr_cost (enum machine_mode mode, reg_class_t to)
+{
+  switch (to)
+    {
+    case GENERAL_REGS:
+      /* A MIPS16 MOVE instruction, or a non-MIPS16 MOVE macro.  */
+      return 2;
+
+    case ACC_REGS:
+      /* MTLO and MTHI.  */
+      return 6;
+
+    case FP_REGS:
+      /* MTC1, etc.  */
+      return 4;
+
+    case ST_REGS:
+      /* A secondary reload through an FPR scratch.  */
+      return (mips_register_move_cost (mode, GENERAL_REGS, FP_REGS)
+	      + mips_register_move_cost (mode, FP_REGS, ST_REGS));
+
+    case COP0_REGS:
+    case COP2_REGS:
+    case COP3_REGS:
+      /* This choice of value is historical.  */
+      return 5;
+
+    default:
+      return 0;
+    }
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.  Return 0 for classes that are the
+   maximum of the move costs for subclasses; regclass will work out
+   the maximum for us.  */
+
+static int
+mips_register_move_cost (enum machine_mode mode,
+			 reg_class_t from, reg_class_t to)
+{
+  reg_class_t dregs;
+  int cost1, cost2;
+
+  from = mips_canonicalize_move_class (from);
+  to = mips_canonicalize_move_class (to);
+
+  /* Handle moves that can be done without using general-purpose registers.  */
+  if (from == FP_REGS)
+    {
+      if (to == FP_REGS && mips_mode_ok_for_mov_fmt_p (mode))
+	/* MOV.FMT.  */
+	return 4;
+      if (to == ST_REGS)
+	/* The sequence generated by mips_expand_fcc_reload.  */
+	return 8;
+    }
+
+  /* Handle cases in which only one class deviates from the ideal.  */
+  dregs = TARGET_MIPS16 ? M16_REGS : GENERAL_REGS;
+  if (from == dregs)
+    return mips_move_from_gpr_cost (mode, to);
+  if (to == dregs)
+    return mips_move_to_gpr_cost (mode, from);
+
+  /* Handles cases that require a GPR temporary.  */
+  cost1 = mips_move_to_gpr_cost (mode, from);
+  if (cost1 != 0)
+    {
+      cost2 = mips_move_from_gpr_cost (mode, to);
+      if (cost2 != 0)
+	return cost1 + cost2;
+    }
+
+  return 0;
+}
+
+/* Implement TARGET_MEMORY_MOVE_COST.  */
+
+static int
+mips_memory_move_cost (enum machine_mode mode, reg_class_t rclass, bool in)
+{
+  return (mips_cost->memory_latency
+	  + memory_move_secondary_cost (mode, rclass, in));
+} 
+
+/* Implement TARGET_IRA_COVER_CLASSES.  */
+
+static const reg_class_t *
+mips_ira_cover_classes (void)
+{
+  static const reg_class_t acc_classes[] = {
+    GR_AND_ACC_REGS, FP_REGS, COP0_REGS, COP2_REGS, COP3_REGS,
+    ST_REGS, LIM_REG_CLASSES
+  };
+  static const reg_class_t no_acc_classes[] = {
+    GR_REGS, FP_REGS, COP0_REGS, COP2_REGS, COP3_REGS,
+    ST_REGS, LIM_REG_CLASSES
+  };
+
+  /* Don't allow the register allocators to use LO and HI in MIPS16 mode,
+     which has no MTLO or MTHI instructions.  Also, using GR_AND_ACC_REGS
+     as a cover class only works well when we keep per-register costs.
+     Using it when not optimizing can cause us to think accumulators
+     have the same cost as GPRs in cases where GPRs are actually much
+     cheaper.  */
+  return TARGET_MIPS16 || !optimize ? no_acc_classes : acc_classes;
+}
+
+/* Return the register class required for a secondary register when
+   copying between one of the registers in RCLASS and value X, which
+   has mode MODE.  X is the source of the move if IN_P, otherwise it
+   is the destination.  Return NO_REGS if no secondary register is
+   needed.  */
+
+enum reg_class
+mips_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode, rtx x, bool in_p)
+{
+  int regno;
+
+  /* If X is a constant that cannot be loaded into $25, it must be loaded
+     into some other GPR.  No other register class allows a direct move.  */
+  if (mips_dangerous_for_la25_p (x))
+    return reg_class_subset_p (rclass, LEA_REGS) ? NO_REGS : LEA_REGS;
+
+  regno = true_regnum (x);
+  if (TARGET_MIPS16)
+    {
+      /* In MIPS16 mode, every move must involve a member of M16_REGS.  */
+      if (!reg_class_subset_p (rclass, M16_REGS) && !M16_REG_P (regno))
+	return M16_REGS;
+
+      return NO_REGS;
+    }
+
+  /* Copying from accumulator registers to anywhere other than a general
+     register requires a temporary general register.  */
+  if (reg_class_subset_p (rclass, ACC_REGS))
+    return GP_REG_P (regno) ? NO_REGS : GR_REGS;
+  if (ACC_REG_P (regno))
+    return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+
+  /* We can only copy a value to a condition code register from a
+     floating-point register, and even then we require a scratch
+     floating-point register.  We can only copy a value out of a
+     condition-code register into a general register.  */
+  if (reg_class_subset_p (rclass, ST_REGS))
+    {
+      if (in_p)
+	return FP_REGS;
+      return GP_REG_P (regno) ? NO_REGS : GR_REGS;
+    }
+  if (ST_REG_P (regno))
+    {
+      if (!in_p)
+	return FP_REGS;
+      return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+    }
+
+  if (reg_class_subset_p (rclass, FP_REGS))
+    {
+      if (MEM_P (x)
+	  && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8))
+	/* In this case we can use lwc1, swc1, ldc1 or sdc1.  We'll use
+	   pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported.  */
+	return NO_REGS;
+
+      if (GP_REG_P (regno) || x == CONST0_RTX (mode))
+	/* In this case we can use mtc1, mfc1, dmtc1 or dmfc1.  */
+	return NO_REGS;
+
+      if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (x))
+	/* We can force the constant to memory and use lwc1
+	   and ldc1.  As above, we will use pairs of lwc1s if
+	   ldc1 is not supported.  */
+	return NO_REGS;
+
+      if (FP_REG_P (regno) && mips_mode_ok_for_mov_fmt_p (mode))
+	/* In this case we can use mov.fmt.  */
+	return NO_REGS;
+
+      /* Otherwise, we need to reload through an integer register.  */
+      return GR_REGS;
+    }
+  if (FP_REG_P (regno))
+    return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_MODE_REP_EXTENDED.  */
+
+static int
+mips_mode_rep_extended (enum machine_mode mode, enum machine_mode mode_rep)
+{
+  /* On 64-bit targets, SImode register values are sign-extended to DImode.  */
+  if (TARGET_64BIT && mode == SImode && mode_rep == DImode)
+    return SIGN_EXTEND;
+
+  return UNKNOWN;
+}
+
+/* Implement TARGET_VALID_POINTER_MODE.  */
+
+static bool
+mips_valid_pointer_mode (enum machine_mode mode)
+{
+  return mode == SImode || (TARGET_64BIT && mode == DImode);
+}
+
+/* Implement TARGET_VECTOR_MODE_SUPPORTED_P.  */
+
+static bool
+mips_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V2SFmode:
+      return TARGET_PAIRED_SINGLE_FLOAT;
+
+    case V2HImode:
+    case V4QImode:
+    case V2HQmode:
+    case V2UHQmode:
+    case V2HAmode:
+    case V2UHAmode:
+    case V4QQmode:
+    case V4UQQmode:
+      return TARGET_DSP;
+
+    case V2SImode:
+    case V4HImode:
+    case V8QImode:
+      return TARGET_LOONGSON_VECTORS;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
+
+static bool
+mips_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (ALL_FIXED_POINT_MODE_P (mode)
+      && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
+    return true;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static enum machine_mode
+mips_preferred_simd_mode (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (TARGET_PAIRED_SINGLE_FLOAT
+      && mode == SFmode)
+    return V2SFmode;
+  return word_mode;
+}
+
+/* Implement TARGET_INIT_LIBFUNCS.  */
+
+static void
+mips_init_libfuncs (void)
+{
+  if (TARGET_FIX_VR4120)
+    {
+      /* Register the special divsi3 and modsi3 functions needed to work
+	 around VR4120 division errata.  */
+      set_optab_libfunc (sdiv_optab, SImode, "__vr4120_divsi3");
+      set_optab_libfunc (smod_optab, SImode, "__vr4120_modsi3");
+    }
+
+  if (TARGET_MIPS16 && TARGET_HARD_FLOAT_ABI)
+    {
+      /* Register the MIPS16 -mhard-float stubs.  */
+      set_optab_libfunc (add_optab, SFmode, "__mips16_addsf3");
+      set_optab_libfunc (sub_optab, SFmode, "__mips16_subsf3");
+      set_optab_libfunc (smul_optab, SFmode, "__mips16_mulsf3");
+      set_optab_libfunc (sdiv_optab, SFmode, "__mips16_divsf3");
+
+      set_optab_libfunc (eq_optab, SFmode, "__mips16_eqsf2");
+      set_optab_libfunc (ne_optab, SFmode, "__mips16_nesf2");
+      set_optab_libfunc (gt_optab, SFmode, "__mips16_gtsf2");
+      set_optab_libfunc (ge_optab, SFmode, "__mips16_gesf2");
+      set_optab_libfunc (lt_optab, SFmode, "__mips16_ltsf2");
+      set_optab_libfunc (le_optab, SFmode, "__mips16_lesf2");
+      set_optab_libfunc (unord_optab, SFmode, "__mips16_unordsf2");
+
+      set_conv_libfunc (sfix_optab, SImode, SFmode, "__mips16_fix_truncsfsi");
+      set_conv_libfunc (sfloat_optab, SFmode, SImode, "__mips16_floatsisf");
+      set_conv_libfunc (ufloat_optab, SFmode, SImode, "__mips16_floatunsisf");
+
+      if (TARGET_DOUBLE_FLOAT)
+	{
+	  set_optab_libfunc (add_optab, DFmode, "__mips16_adddf3");
+	  set_optab_libfunc (sub_optab, DFmode, "__mips16_subdf3");
+	  set_optab_libfunc (smul_optab, DFmode, "__mips16_muldf3");
+	  set_optab_libfunc (sdiv_optab, DFmode, "__mips16_divdf3");
+
+	  set_optab_libfunc (eq_optab, DFmode, "__mips16_eqdf2");
+	  set_optab_libfunc (ne_optab, DFmode, "__mips16_nedf2");
+	  set_optab_libfunc (gt_optab, DFmode, "__mips16_gtdf2");
+	  set_optab_libfunc (ge_optab, DFmode, "__mips16_gedf2");
+	  set_optab_libfunc (lt_optab, DFmode, "__mips16_ltdf2");
+	  set_optab_libfunc (le_optab, DFmode, "__mips16_ledf2");
+	  set_optab_libfunc (unord_optab, DFmode, "__mips16_unorddf2");
+
+	  set_conv_libfunc (sext_optab, DFmode, SFmode,
+			    "__mips16_extendsfdf2");
+	  set_conv_libfunc (trunc_optab, SFmode, DFmode,
+			    "__mips16_truncdfsf2");
+	  set_conv_libfunc (sfix_optab, SImode, DFmode,
+			    "__mips16_fix_truncdfsi");
+	  set_conv_libfunc (sfloat_optab, DFmode, SImode,
+			    "__mips16_floatsidf");
+	  set_conv_libfunc (ufloat_optab, DFmode, SImode,
+			    "__mips16_floatunsidf");
+	}
+    }
+
+  /* The MIPS16 ISA does not have an encoding for "sync", so we rely
+     on an external non-MIPS16 routine to implement __sync_synchronize.  */
+  if (TARGET_MIPS16)
+    synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+}
+
+/* Build up a multi-insn sequence that loads label TARGET into $AT.  */
+
+static void
+mips_process_load_label (rtx target)
+{
+  rtx base, gp, intop;
+  HOST_WIDE_INT offset;
+
+  mips_multi_start ();
+  switch (mips_abi)
+    {
+    case ABI_N32:
+      mips_multi_add_insn ("lw\t%@,%%got_page(%0)(%+)", target, 0);
+      mips_multi_add_insn ("addiu\t%@,%@,%%got_ofst(%0)", target, 0);
+      break;
+
+    case ABI_64:
+      mips_multi_add_insn ("ld\t%@,%%got_page(%0)(%+)", target, 0);
+      mips_multi_add_insn ("daddiu\t%@,%@,%%got_ofst(%0)", target, 0);
+      break;
+
+    default:
+      gp = pic_offset_table_rtx;
+      if (mips_cfun_has_cprestore_slot_p ())
+	{
+	  gp = gen_rtx_REG (Pmode, AT_REGNUM);
+	  mips_get_cprestore_base_and_offset (&base, &offset, true);
+	  if (!SMALL_OPERAND (offset))
+	    {
+	      intop = GEN_INT (CONST_HIGH_PART (offset));
+	      mips_multi_add_insn ("lui\t%0,%1", gp, intop, 0);
+	      mips_multi_add_insn ("addu\t%0,%0,%1", gp, base, 0);
+
+	      base = gp;
+	      offset = CONST_LOW_PART (offset);
+	    }
+	  intop = GEN_INT (offset);
+	  if (ISA_HAS_LOAD_DELAY)
+	    mips_multi_add_insn ("lw\t%0,%1(%2)%#", gp, intop, base, 0);
+	  else
+	    mips_multi_add_insn ("lw\t%0,%1(%2)", gp, intop, base, 0);
+	}
+      if (ISA_HAS_LOAD_DELAY)
+	mips_multi_add_insn ("lw\t%@,%%got(%0)(%1)%#", target, gp, 0);
+      else
+	mips_multi_add_insn ("lw\t%@,%%got(%0)(%1)", target, gp, 0);
+      mips_multi_add_insn ("addiu\t%@,%@,%%lo(%0)", target, 0);
+      break;
+    }
+}
+
+/* Return the number of instructions needed to load a label into $AT.  */
+
+static unsigned int
+mips_load_label_num_insns (void)
+{
+  if (cfun->machine->load_label_num_insns == 0)
+    {
+      mips_process_load_label (pc_rtx);
+      cfun->machine->load_label_num_insns = mips_multi_num_insns;
+    }
+  return cfun->machine->load_label_num_insns;
+}
+
+/* Emit an asm sequence to start a noat block and load the address
+   of a label into $1.  */
+
+void
+mips_output_load_label (rtx target)
+{
+  mips_push_asm_switch (&mips_noat);
+  if (TARGET_EXPLICIT_RELOCS)
+    {
+      mips_process_load_label (target);
+      mips_multi_write ();
+    }
+  else
+    {
+      if (Pmode == DImode)
+	output_asm_insn ("dla\t%@,%0", &target);
+      else
+	output_asm_insn ("la\t%@,%0", &target);
+    }
+}
+
+/* Return the length of INSN.  LENGTH is the initial length computed by
+   attributes in the machine-description file.  */
+
+int
+mips_adjust_insn_length (rtx insn, int length)
+{
+  /* mips.md uses MAX_PIC_BRANCH_LENGTH as a placeholder for the length
+     of a PIC long-branch sequence.  Substitute the correct value.  */
+  if (length == MAX_PIC_BRANCH_LENGTH
+      && INSN_CODE (insn) >= 0
+      && get_attr_type (insn) == TYPE_BRANCH)
+    {
+      /* Add the branch-over instruction and its delay slot, if this
+	 is a conditional branch.  */
+      length = simplejump_p (insn) ? 0 : 8;
+
+      /* Load the label into $AT and jump to it.  Ignore the delay
+	 slot of the jump.  */
+      length += 4 * mips_load_label_num_insns() + 4;
+    }
+
+  /* A unconditional jump has an unfilled delay slot if it is not part
+     of a sequence.  A conditional jump normally has a delay slot, but
+     does not on MIPS16.  */
+  if (CALL_P (insn) || (TARGET_MIPS16 ? simplejump_p (insn) : JUMP_P (insn)))
+    length += 4;
+
+  /* See how many nops might be needed to avoid hardware hazards.  */
+  if (!cfun->machine->ignore_hazard_length_p && INSN_CODE (insn) >= 0)
+    switch (get_attr_hazard (insn))
+      {
+      case HAZARD_NONE:
+	break;
+
+      case HAZARD_DELAY:
+	length += 4;
+	break;
+
+      case HAZARD_HILO:
+	length += 8;
+	break;
+      }
+
+  /* In order to make it easier to share MIPS16 and non-MIPS16 patterns,
+     the .md file length attributes are 4-based for both modes.
+     Adjust the MIPS16 ones here.  */
+  if (TARGET_MIPS16)
+    length /= 2;
+
+  return length;
+}
+
+/* Return the assembly code for INSN, which has the operands given by
+   OPERANDS, and which branches to OPERANDS[0] if some condition is true.
+   BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
+   is in range of a direct branch.  BRANCH_IF_FALSE is an inverted
+   version of BRANCH_IF_TRUE.  */
+
+const char *
+mips_output_conditional_branch (rtx insn, rtx *operands,
+				const char *branch_if_true,
+				const char *branch_if_false)
+{
+  unsigned int length;
+  rtx taken, not_taken;
+
+  gcc_assert (LABEL_P (operands[0]));
+
+  length = get_attr_length (insn);
+  if (length <= 8)
+    {
+      /* Just a simple conditional branch.  */
+      mips_branch_likely = (final_sequence && INSN_ANNULLED_BRANCH_P (insn));
+      return branch_if_true;
+    }
+
+  /* Generate a reversed branch around a direct jump.  This fallback does
+     not use branch-likely instructions.  */
+  mips_branch_likely = false;
+  not_taken = gen_label_rtx ();
+  taken = operands[0];
+
+  /* Generate the reversed branch to NOT_TAKEN.  */
+  operands[0] = not_taken;
+  output_asm_insn (branch_if_false, operands);
+
+  /* If INSN has a delay slot, we must provide delay slots for both the
+     branch to NOT_TAKEN and the conditional jump.  We must also ensure
+     that INSN's delay slot is executed in the appropriate cases.  */
+  if (final_sequence)
+    {
+      /* This first delay slot will always be executed, so use INSN's
+	 delay slot if is not annulled.  */
+      if (!INSN_ANNULLED_BRANCH_P (insn))
+	{
+	  final_scan_insn (XVECEXP (final_sequence, 0, 1),
+			   asm_out_file, optimize, 1, NULL);
+	  INSN_DELETED_P (XVECEXP (final_sequence, 0, 1)) = 1;
+	}
+      else
+	output_asm_insn ("nop", 0);
+      fprintf (asm_out_file, "\n");
+    }
+
+  /* Output the unconditional branch to TAKEN.  */
+  if (TARGET_ABSOLUTE_JUMPS)
+    output_asm_insn (MIPS_ABSOLUTE_JUMP ("j\t%0%/"), &taken);
+  else
+    {
+      mips_output_load_label (taken);
+      output_asm_insn ("jr\t%@%]%/", 0);
+    }
+
+  /* Now deal with its delay slot; see above.  */
+  if (final_sequence)
+    {
+      /* This delay slot will only be executed if the branch is taken.
+	 Use INSN's delay slot if is annulled.  */
+      if (INSN_ANNULLED_BRANCH_P (insn))
+	{
+	  final_scan_insn (XVECEXP (final_sequence, 0, 1),
+			   asm_out_file, optimize, 1, NULL);
+	  INSN_DELETED_P (XVECEXP (final_sequence, 0, 1)) = 1;
+	}
+      else
+	output_asm_insn ("nop", 0);
+      fprintf (asm_out_file, "\n");
+    }
+
+  /* Output NOT_TAKEN.  */
+  targetm.asm_out.internal_label (asm_out_file, "L",
+				  CODE_LABEL_NUMBER (not_taken));
+  return "";
+}
+
+/* Return the assembly code for INSN, which branches to OPERANDS[0]
+   if some ordering condition is true.  The condition is given by
+   OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
+   OPERANDS[1].  OPERANDS[2] is the comparison's first operand;
+   its second is always zero.  */
+
+const char *
+mips_output_order_conditional_branch (rtx insn, rtx *operands, bool inverted_p)
+{
+  const char *branch[2];
+
+  /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
+     Make BRANCH[0] branch on the inverse condition.  */
+  switch (GET_CODE (operands[1]))
+    {
+      /* These cases are equivalent to comparisons against zero.  */
+    case LEU:
+      inverted_p = !inverted_p;
+      /* Fall through.  */
+    case GTU:
+      branch[!inverted_p] = MIPS_BRANCH ("bne", "%2,%.,%0");
+      branch[inverted_p] = MIPS_BRANCH ("beq", "%2,%.,%0");
+      break;
+
+      /* These cases are always true or always false.  */
+    case LTU:
+      inverted_p = !inverted_p;
+      /* Fall through.  */
+    case GEU:
+      branch[!inverted_p] = MIPS_BRANCH ("beq", "%.,%.,%0");
+      branch[inverted_p] = MIPS_BRANCH ("bne", "%.,%.,%0");
+      break;
+
+    default:
+      branch[!inverted_p] = MIPS_BRANCH ("b%C1z", "%2,%0");
+      branch[inverted_p] = MIPS_BRANCH ("b%N1z", "%2,%0");
+      break;
+    }
+  return mips_output_conditional_branch (insn, operands, branch[1], branch[0]);
+}
+
+/* Start a block of code that needs access to the LL, SC and SYNC
+   instructions.  */
+
+static void
+mips_start_ll_sc_sync_block (void)
+{
+  if (!ISA_HAS_LL_SC)
+    {
+      output_asm_insn (".set\tpush", 0);
+      output_asm_insn (".set\tmips2", 0);
+    }
+}
+
+/* End a block started by mips_start_ll_sc_sync_block.  */
+
+static void
+mips_end_ll_sc_sync_block (void)
+{
+  if (!ISA_HAS_LL_SC)
+    output_asm_insn (".set\tpop", 0);
+}
+
+/* Output and/or return the asm template for a sync instruction.  */
+
+const char *
+mips_output_sync (void)
+{
+  mips_start_ll_sc_sync_block ();
+  output_asm_insn ("sync", 0);
+  mips_end_ll_sc_sync_block ();
+  return "";
+}
+
+/* Return the asm template associated with sync_insn1 value TYPE.
+   IS_64BIT_P is true if we want a 64-bit rather than 32-bit operation.  */
+
+static const char *
+mips_sync_insn1_template (enum attr_sync_insn1 type, bool is_64bit_p)
+{
+  switch (type)
+    {
+    case SYNC_INSN1_MOVE:
+      return "move\t%0,%z2";
+    case SYNC_INSN1_LI:
+      return "li\t%0,%2";
+    case SYNC_INSN1_ADDU:
+      return is_64bit_p ? "daddu\t%0,%1,%z2" : "addu\t%0,%1,%z2";
+    case SYNC_INSN1_ADDIU:
+      return is_64bit_p ? "daddiu\t%0,%1,%2" : "addiu\t%0,%1,%2";
+    case SYNC_INSN1_SUBU:
+      return is_64bit_p ? "dsubu\t%0,%1,%z2" : "subu\t%0,%1,%z2";
+    case SYNC_INSN1_AND:
+      return "and\t%0,%1,%z2";
+    case SYNC_INSN1_ANDI:
+      return "andi\t%0,%1,%2";
+    case SYNC_INSN1_OR:
+      return "or\t%0,%1,%z2";
+    case SYNC_INSN1_ORI:
+      return "ori\t%0,%1,%2";
+    case SYNC_INSN1_XOR:
+      return "xor\t%0,%1,%z2";
+    case SYNC_INSN1_XORI:
+      return "xori\t%0,%1,%2";
+    }
+  gcc_unreachable ();
+}
+
+/* Return the asm template associated with sync_insn2 value TYPE.  */
+
+static const char *
+mips_sync_insn2_template (enum attr_sync_insn2 type)
+{
+  switch (type)
+    {
+    case SYNC_INSN2_NOP:
+      gcc_unreachable ();
+    case SYNC_INSN2_AND:
+      return "and\t%0,%1,%z2";
+    case SYNC_INSN2_XOR:
+      return "xor\t%0,%1,%z2";
+    case SYNC_INSN2_NOT:
+      return "nor\t%0,%1,%.";
+    }
+  gcc_unreachable ();
+}
+
+/* OPERANDS are the operands to a sync loop instruction and INDEX is
+   the value of the one of the sync_* attributes.  Return the operand
+   referred to by the attribute, or DEFAULT_VALUE if the insn doesn't
+   have the associated attribute.  */
+
+static rtx
+mips_get_sync_operand (rtx *operands, int index, rtx default_value)
+{
+  if (index > 0)
+    default_value = operands[index - 1];
+  return default_value;
+}
+
+/* INSN is a sync loop with operands OPERANDS.  Build up a multi-insn
+   sequence for it.  */
+
+static void
+mips_process_sync_loop (rtx insn, rtx *operands)
+{
+  rtx at, mem, oldval, newval, inclusive_mask, exclusive_mask;
+  rtx required_oldval, insn1_op2, tmp1, tmp2, tmp3;
+  unsigned int tmp3_insn;
+  enum attr_sync_insn1 insn1;
+  enum attr_sync_insn2 insn2;
+  bool is_64bit_p;
+
+  /* Read an operand from the sync_WHAT attribute and store it in
+     variable WHAT.  DEFAULT is the default value if no attribute
+     is specified.  */
+#define READ_OPERAND(WHAT, DEFAULT) \
+  WHAT = mips_get_sync_operand (operands, (int) get_attr_sync_##WHAT (insn), \
+  				DEFAULT)
+
+  /* Read the memory.  */
+  READ_OPERAND (mem, 0);
+  gcc_assert (mem);
+  is_64bit_p = (GET_MODE_BITSIZE (GET_MODE (mem)) == 64);
+
+  /* Read the other attributes.  */
+  at = gen_rtx_REG (GET_MODE (mem), AT_REGNUM);
+  READ_OPERAND (oldval, at);
+  READ_OPERAND (newval, at);
+  READ_OPERAND (inclusive_mask, 0);
+  READ_OPERAND (exclusive_mask, 0);
+  READ_OPERAND (required_oldval, 0);
+  READ_OPERAND (insn1_op2, 0);
+  insn1 = get_attr_sync_insn1 (insn);
+  insn2 = get_attr_sync_insn2 (insn);
+
+  mips_multi_start ();
+
+  /* Output the release side of the memory barrier.  */
+  if (get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES)
+    {
+      if (required_oldval == 0 && TARGET_OCTEON)
+	{
+	  /* Octeon doesn't reorder reads, so a full barrier can be
+	     created by using SYNCW to order writes combined with the
+	     write from the following SC.  When the SC successfully
+	     completes, we know that all preceding writes are also
+	     committed to the coherent memory system.  It is possible
+	     for a single SYNCW to fail, but a pair of them will never
+	     fail, so we use two.  */
+	  mips_multi_add_insn ("syncw", NULL);
+	  mips_multi_add_insn ("syncw", NULL);
+	}
+      else
+	mips_multi_add_insn ("sync", NULL);
+    }
+
+  /* Output the branch-back label.  */
+  mips_multi_add_label ("1:");
+
+  /* OLDVAL = *MEM.  */
+  mips_multi_add_insn (is_64bit_p ? "lld\t%0,%1" : "ll\t%0,%1",
+		       oldval, mem, NULL);
+
+  /* if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2.  */
+  if (required_oldval)
+    {
+      if (inclusive_mask == 0)
+	tmp1 = oldval;
+      else
+	{
+	  gcc_assert (oldval != at);
+	  mips_multi_add_insn ("and\t%0,%1,%2",
+			       at, oldval, inclusive_mask, NULL);
+	  tmp1 = at;
+	}
+      mips_multi_add_insn ("bne\t%0,%z1,2f", tmp1, required_oldval, NULL);
+    }
+
+  /* $TMP1 = OLDVAL & EXCLUSIVE_MASK.  */
+  if (exclusive_mask == 0)
+    tmp1 = const0_rtx;
+  else
+    {
+      gcc_assert (oldval != at);
+      mips_multi_add_insn ("and\t%0,%1,%z2",
+			   at, oldval, exclusive_mask, NULL);
+      tmp1 = at;
+    }
+
+  /* $TMP2 = INSN1 (OLDVAL, INSN1_OP2).
+
+     We can ignore moves if $TMP4 != INSN1_OP2, since we'll still emit
+     at least one instruction in that case.  */
+  if (insn1 == SYNC_INSN1_MOVE
+      && (tmp1 != const0_rtx || insn2 != SYNC_INSN2_NOP))
+    tmp2 = insn1_op2;
+  else
+    {
+      mips_multi_add_insn (mips_sync_insn1_template (insn1, is_64bit_p),
+			   newval, oldval, insn1_op2, NULL);
+      tmp2 = newval;
+    }
+
+  /* $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK).  */
+  if (insn2 == SYNC_INSN2_NOP)
+    tmp3 = tmp2;
+  else
+    {
+      mips_multi_add_insn (mips_sync_insn2_template (insn2),
+			   newval, tmp2, inclusive_mask, NULL);
+      tmp3 = newval;
+    }
+  tmp3_insn = mips_multi_last_index ();
+
+  /* $AT = $TMP1 | $TMP3.  */
+  if (tmp1 == const0_rtx || tmp3 == const0_rtx)
+    {
+      mips_multi_set_operand (tmp3_insn, 0, at);
+      tmp3 = at;
+    }
+  else
+    {
+      gcc_assert (tmp1 != tmp3);
+      mips_multi_add_insn ("or\t%0,%1,%2", at, tmp1, tmp3, NULL);
+    }
+
+  /* if (!commit (*MEM = $AT)) goto 1.
+
+     This will sometimes be a delayed branch; see the write code below
+     for details.  */
+  mips_multi_add_insn (is_64bit_p ? "scd\t%0,%1" : "sc\t%0,%1", at, mem, NULL);
+  mips_multi_add_insn ("beq%?\t%0,%.,1b", at, NULL);
+
+  /* if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot].  */
+  if (insn1 != SYNC_INSN1_MOVE && insn1 != SYNC_INSN1_LI && tmp3 != newval)
+    {
+      mips_multi_copy_insn (tmp3_insn);
+      mips_multi_set_operand (mips_multi_last_index (), 0, newval);
+    }
+  else
+    mips_multi_add_insn ("nop", NULL);
+
+  /* Output the acquire side of the memory barrier.  */
+  if (TARGET_SYNC_AFTER_SC)
+    mips_multi_add_insn ("sync", NULL);
+
+  /* Output the exit label, if needed.  */
+  if (required_oldval)
+    mips_multi_add_label ("2:");
+
+#undef READ_OPERAND
+}
+
+/* Output and/or return the asm template for sync loop INSN, which has
+   the operands given by OPERANDS.  */
+
+const char *
+mips_output_sync_loop (rtx insn, rtx *operands)
+{
+  mips_process_sync_loop (insn, operands);
+
+  /* Use branch-likely instructions to work around the LL/SC R10000
+     errata.  */
+  mips_branch_likely = TARGET_FIX_R10000;
+
+  mips_push_asm_switch (&mips_noreorder);
+  mips_push_asm_switch (&mips_nomacro);
+  mips_push_asm_switch (&mips_noat);
+  mips_start_ll_sc_sync_block ();
+
+  mips_multi_write ();
+
+  mips_end_ll_sc_sync_block ();
+  mips_pop_asm_switch (&mips_noat);
+  mips_pop_asm_switch (&mips_nomacro);
+  mips_pop_asm_switch (&mips_noreorder);
+
+  return "";
+}
+
+/* Return the number of individual instructions in sync loop INSN,
+   which has the operands given by OPERANDS.  */
+
+unsigned int
+mips_sync_loop_insns (rtx insn, rtx *operands)
+{
+  mips_process_sync_loop (insn, operands);
+  return mips_multi_num_insns;
+}
+
+/* Return the assembly code for DIV or DDIV instruction DIVISION, which has
+   the operands given by OPERANDS.  Add in a divide-by-zero check if needed.
+
+   When working around R4000 and R4400 errata, we need to make sure that
+   the division is not immediately followed by a shift[1][2].  We also
+   need to stop the division from being put into a branch delay slot[3].
+   The easiest way to avoid both problems is to add a nop after the
+   division.  When a divide-by-zero check is needed, this nop can be
+   used to fill the branch delay slot.
+
+   [1] If a double-word or a variable shift executes immediately
+       after starting an integer division, the shift may give an
+       incorrect result.  See quotations of errata #16 and #28 from
+       "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+       in mips.md for details.
+
+   [2] A similar bug to [1] exists for all revisions of the
+       R4000 and the R4400 when run in an MC configuration.
+       From "MIPS R4000MC Errata, Processor Revision 2.2 and 3.0":
+
+       "19. In this following sequence:
+
+		    ddiv		(or ddivu or div or divu)
+		    dsll32		(or dsrl32, dsra32)
+
+	    if an MPT stall occurs, while the divide is slipping the cpu
+	    pipeline, then the following double shift would end up with an
+	    incorrect result.
+
+	    Workaround: The compiler needs to avoid generating any
+	    sequence with divide followed by extended double shift."
+
+       This erratum is also present in "MIPS R4400MC Errata, Processor
+       Revision 1.0" and "MIPS R4400MC Errata, Processor Revision 2.0
+       & 3.0" as errata #10 and #4, respectively.
+
+   [3] From "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+       (also valid for MIPS R4000MC processors):
+
+       "52. R4000SC: This bug does not apply for the R4000PC.
+
+	    There are two flavors of this bug:
+
+	    1) If the instruction just after divide takes an RF exception
+	       (tlb-refill, tlb-invalid) and gets an instruction cache
+	       miss (both primary and secondary) and the line which is
+	       currently in secondary cache at this index had the first
+	       data word, where the bits 5..2 are set, then R4000 would
+	       get a wrong result for the div.
+
+	    ##1
+		    nop
+		    div	r8, r9
+		    -------------------		# end-of page. -tlb-refill
+		    nop
+	    ##2
+		    nop
+		    div	r8, r9
+		    -------------------		# end-of page. -tlb-invalid
+		    nop
+
+	    2) If the divide is in the taken branch delay slot, where the
+	       target takes RF exception and gets an I-cache miss for the
+	       exception vector or where I-cache miss occurs for the
+	       target address, under the above mentioned scenarios, the
+	       div would get wrong results.
+
+	    ##1
+		    j	r2		# to next page mapped or unmapped
+		    div	r8,r9		# this bug would be there as long
+					# as there is an ICache miss and
+		    nop			# the "data pattern" is present
+
+	    ##2
+		    beq	r0, r0, NextPage	# to Next page
+		    div	r8,r9
+		    nop
+
+	    This bug is present for div, divu, ddiv, and ddivu
+	    instructions.
+
+	    Workaround: For item 1), OS could make sure that the next page
+	    after the divide instruction is also mapped.  For item 2), the
+	    compiler could make sure that the divide instruction is not in
+	    the branch delay slot."
+
+       These processors have PRId values of 0x00004220 and 0x00004300 for
+       the R4000 and 0x00004400, 0x00004500 and 0x00004600 for the R4400.  */
+
+const char *
+mips_output_division (const char *division, rtx *operands)
+{
+  const char *s;
+
+  s = division;
+  if (TARGET_FIX_R4000 || TARGET_FIX_R4400)
+    {
+      output_asm_insn (s, operands);
+      s = "nop";
+    }
+  if (TARGET_CHECK_ZERO_DIV)
+    {
+      if (TARGET_MIPS16)
+	{
+	  output_asm_insn (s, operands);
+	  s = "bnez\t%2,1f\n\tbreak\t7\n1:";
+	}
+      else if (GENERATE_DIVIDE_TRAPS)
+	{
+	  /* Avoid long replay penalty on load miss by putting the trap before
+	     the divide.  */
+	  if (TUNE_74K)
+	    output_asm_insn ("teq\t%2,%.,7", operands);
+	  else
+	    {
+	      output_asm_insn (s, operands);
+	      s = "teq\t%2,%.,7";
+	    }
+	}
+      else
+	{
+	  output_asm_insn ("%(bne\t%2,%.,1f", operands);
+	  output_asm_insn (s, operands);
+	  s = "break\t7%)\n1:";
+	}
+    }
+  return s;
+}
+
+/* Return true if IN_INSN is a multiply-add or multiply-subtract
+   instruction and if OUT_INSN assigns to the accumulator operand.  */
+
+bool
+mips_linked_madd_p (rtx out_insn, rtx in_insn)
+{
+  rtx x;
+
+  x = single_set (in_insn);
+  if (x == 0)
+    return false;
+
+  x = SET_SRC (x);
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == MULT
+      && reg_set_p (XEXP (x, 1), out_insn))
+    return true;
+
+  if (GET_CODE (x) == MINUS
+      && GET_CODE (XEXP (x, 1)) == MULT
+      && reg_set_p (XEXP (x, 0), out_insn))
+    return true;
+
+  return false;
+}
+
+/* True if the dependency between OUT_INSN and IN_INSN is on the store
+   data rather than the address.  We need this because the cprestore
+   pattern is type "store", but is defined using an UNSPEC_VOLATILE,
+   which causes the default routine to abort.  We just return false
+   for that case.  */
+
+bool
+mips_store_data_bypass_p (rtx out_insn, rtx in_insn)
+{
+  if (GET_CODE (PATTERN (in_insn)) == UNSPEC_VOLATILE)
+    return false;
+
+  return !store_data_bypass_p (out_insn, in_insn);
+}
+
+
+/* Variables and flags used in scheduler hooks when tuning for
+   Loongson 2E/2F.  */
+static struct
+{
+  /* Variables to support Loongson 2E/2F round-robin [F]ALU1/2 dispatch
+     strategy.  */
+
+  /* If true, then next ALU1/2 instruction will go to ALU1.  */
+  bool alu1_turn_p;
+
+  /* If true, then next FALU1/2 unstruction will go to FALU1.  */
+  bool falu1_turn_p;
+
+  /* Codes to query if [f]alu{1,2}_core units are subscribed or not.  */
+  int alu1_core_unit_code;
+  int alu2_core_unit_code;
+  int falu1_core_unit_code;
+  int falu2_core_unit_code;
+
+  /* True if current cycle has a multi instruction.
+     This flag is used in mips_ls2_dfa_post_advance_cycle.  */
+  bool cycle_has_multi_p;
+
+  /* Instructions to subscribe ls2_[f]alu{1,2}_turn_enabled units.
+     These are used in mips_ls2_dfa_post_advance_cycle to initialize
+     DFA state.
+     E.g., when alu1_turn_enabled_insn is issued it makes next ALU1/2
+     instruction to go ALU1.  */
+  rtx alu1_turn_enabled_insn;
+  rtx alu2_turn_enabled_insn;
+  rtx falu1_turn_enabled_insn;
+  rtx falu2_turn_enabled_insn;
+} mips_ls2;
+
+/* Implement TARGET_SCHED_ADJUST_COST.  We assume that anti and output
+   dependencies have no cost, except on the 20Kc where output-dependence
+   is treated like input-dependence.  */
+
+static int
+mips_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link,
+		  rtx dep ATTRIBUTE_UNUSED, int cost)
+{
+  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
+      && TUNE_20KC)
+    return cost;
+  if (REG_NOTE_KIND (link) != 0)
+    return 0;
+  return cost;
+}
+
+/* Return the number of instructions that can be issued per cycle.  */
+
+static int
+mips_issue_rate (void)
+{
+  switch (mips_tune)
+    {
+    case PROCESSOR_74KC:
+    case PROCESSOR_74KF2_1:
+    case PROCESSOR_74KF1_1:
+    case PROCESSOR_74KF3_2:
+      /* The 74k is not strictly quad-issue cpu, but can be seen as one
+	 by the scheduler.  It can issue 1 ALU, 1 AGEN and 2 FPU insns,
+	 but in reality only a maximum of 3 insns can be issued as
+	 floating-point loads and stores also require a slot in the
+	 AGEN pipe.  */
+    case PROCESSOR_R10000:
+      /* All R10K Processors are quad-issue (being the first MIPS
+         processors to support this feature). */
+      return 4;
+
+    case PROCESSOR_20KC:
+    case PROCESSOR_R4130:
+    case PROCESSOR_R5400:
+    case PROCESSOR_R5500:
+    case PROCESSOR_R7000:
+    case PROCESSOR_R9000:
+    case PROCESSOR_OCTEON:
+      return 2;
+
+    case PROCESSOR_SB1:
+    case PROCESSOR_SB1A:
+      /* This is actually 4, but we get better performance if we claim 3.
+	 This is partly because of unwanted speculative code motion with the
+	 larger number, and partly because in most common cases we can't
+	 reach the theoretical max of 4.  */
+      return 3;
+
+    case PROCESSOR_LOONGSON_2E:
+    case PROCESSOR_LOONGSON_2F:
+    case PROCESSOR_LOONGSON_3A:
+      return 4;
+
+    default:
+      return 1;
+    }
+}
+
+/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook for Loongson2.  */
+
+static void
+mips_ls2_init_dfa_post_cycle_insn (void)
+{
+  start_sequence ();
+  emit_insn (gen_ls2_alu1_turn_enabled_insn ());
+  mips_ls2.alu1_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  emit_insn (gen_ls2_alu2_turn_enabled_insn ());
+  mips_ls2.alu2_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  emit_insn (gen_ls2_falu1_turn_enabled_insn ());
+  mips_ls2.falu1_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  start_sequence ();
+  emit_insn (gen_ls2_falu2_turn_enabled_insn ());
+  mips_ls2.falu2_turn_enabled_insn = get_insns ();
+  end_sequence ();
+
+  mips_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core");
+  mips_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core");
+  mips_ls2.falu1_core_unit_code = get_cpu_unit_code ("ls2_falu1_core");
+  mips_ls2.falu2_core_unit_code = get_cpu_unit_code ("ls2_falu2_core");
+}
+
+/* Implement TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN hook.
+   Init data used in mips_dfa_post_advance_cycle.  */
+
+static void
+mips_init_dfa_post_cycle_insn (void)
+{
+  if (TUNE_LOONGSON_2EF)
+    mips_ls2_init_dfa_post_cycle_insn ();
+}
+
+/* Initialize STATE when scheduling for Loongson 2E/2F.
+   Support round-robin dispatch scheme by enabling only one of
+   ALU1/ALU2 and one of FALU1/FALU2 units for ALU1/2 and FALU1/2 instructions
+   respectively.  */
+
+static void
+mips_ls2_dfa_post_advance_cycle (state_t state)
+{
+  if (cpu_unit_reservation_p (state, mips_ls2.alu1_core_unit_code))
+    {
+      /* Though there are no non-pipelined ALU1 insns,
+	 we can get an instruction of type 'multi' before reload.  */
+      gcc_assert (mips_ls2.cycle_has_multi_p);
+      mips_ls2.alu1_turn_p = false;
+    }
+
+  mips_ls2.cycle_has_multi_p = false;
+
+  if (cpu_unit_reservation_p (state, mips_ls2.alu2_core_unit_code))
+    /* We have a non-pipelined alu instruction in the core,
+       adjust round-robin counter.  */
+    mips_ls2.alu1_turn_p = true;
+
+  if (mips_ls2.alu1_turn_p)
+    {
+      if (state_transition (state, mips_ls2.alu1_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+  else
+    {
+      if (state_transition (state, mips_ls2.alu2_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+
+  if (cpu_unit_reservation_p (state, mips_ls2.falu1_core_unit_code))
+    {
+      /* There are no non-pipelined FALU1 insns.  */
+      gcc_unreachable ();
+      mips_ls2.falu1_turn_p = false;
+    }
+
+  if (cpu_unit_reservation_p (state, mips_ls2.falu2_core_unit_code))
+    /* We have a non-pipelined falu instruction in the core,
+       adjust round-robin counter.  */
+    mips_ls2.falu1_turn_p = true;
+
+  if (mips_ls2.falu1_turn_p)
+    {
+      if (state_transition (state, mips_ls2.falu1_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+  else
+    {
+      if (state_transition (state, mips_ls2.falu2_turn_enabled_insn) >= 0)
+	gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_SCHED_DFA_POST_ADVANCE_CYCLE.
+   This hook is being called at the start of each cycle.  */
+
+static void
+mips_dfa_post_advance_cycle (void)
+{
+  if (TUNE_LOONGSON_2EF)
+    mips_ls2_dfa_post_advance_cycle (curr_state);
+}
+
+/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD.  This should
+   be as wide as the scheduling freedom in the DFA.  */
+
+static int
+mips_multipass_dfa_lookahead (void)
+{
+  /* Can schedule up to 4 of the 6 function units in any one cycle.  */
+  if (TUNE_SB1)
+    return 4;
+
+  if (TUNE_LOONGSON_2EF || TUNE_LOONGSON_3A)
+    return 4;
+
+  if (TUNE_OCTEON)
+    return 2;
+
+  return 0;
+}
+
+/* Remove the instruction at index LOWER from ready queue READY and
+   reinsert it in front of the instruction at index HIGHER.  LOWER must
+   be <= HIGHER.  */
+
+static void
+mips_promote_ready (rtx *ready, int lower, int higher)
+{
+  rtx new_head;
+  int i;
+
+  new_head = ready[lower];
+  for (i = lower; i < higher; i++)
+    ready[i] = ready[i + 1];
+  ready[i] = new_head;
+}
+
+/* If the priority of the instruction at POS2 in the ready queue READY
+   is within LIMIT units of that of the instruction at POS1, swap the
+   instructions if POS2 is not already less than POS1.  */
+
+static void
+mips_maybe_swap_ready (rtx *ready, int pos1, int pos2, int limit)
+{
+  if (pos1 < pos2
+      && INSN_PRIORITY (ready[pos1]) + limit >= INSN_PRIORITY (ready[pos2]))
+    {
+      rtx temp;
+
+      temp = ready[pos1];
+      ready[pos1] = ready[pos2];
+      ready[pos2] = temp;
+    }
+}
+
+/* Used by TUNE_MACC_CHAINS to record the last scheduled instruction
+   that may clobber hi or lo.  */
+static rtx mips_macc_chains_last_hilo;
+
+/* A TUNE_MACC_CHAINS helper function.  Record that instruction INSN has
+   been scheduled, updating mips_macc_chains_last_hilo appropriately.  */
+
+static void
+mips_macc_chains_record (rtx insn)
+{
+  if (get_attr_may_clobber_hilo (insn))
+    mips_macc_chains_last_hilo = insn;
+}
+
+/* A TUNE_MACC_CHAINS helper function.  Search ready queue READY, which
+   has NREADY elements, looking for a multiply-add or multiply-subtract
+   instruction that is cumulative with mips_macc_chains_last_hilo.
+   If there is one, promote it ahead of anything else that might
+   clobber hi or lo.  */
+
+static void
+mips_macc_chains_reorder (rtx *ready, int nready)
+{
+  int i, j;
+
+  if (mips_macc_chains_last_hilo != 0)
+    for (i = nready - 1; i >= 0; i--)
+      if (mips_linked_madd_p (mips_macc_chains_last_hilo, ready[i]))
+	{
+	  for (j = nready - 1; j > i; j--)
+	    if (recog_memoized (ready[j]) >= 0
+		&& get_attr_may_clobber_hilo (ready[j]))
+	      {
+		mips_promote_ready (ready, i, j);
+		break;
+	      }
+	  break;
+	}
+}
+
+/* The last instruction to be scheduled.  */
+static rtx vr4130_last_insn;
+
+/* A note_stores callback used by vr4130_true_reg_dependence_p.  DATA
+   points to an rtx that is initially an instruction.  Nullify the rtx
+   if the instruction uses the value of register X.  */
+
+static void
+vr4130_true_reg_dependence_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+				void *data)
+{
+  rtx *insn_ptr;
+
+  insn_ptr = (rtx *) data;
+  if (REG_P (x)
+      && *insn_ptr != 0
+      && reg_referenced_p (x, PATTERN (*insn_ptr)))
+    *insn_ptr = 0;
+}
+
+/* Return true if there is true register dependence between vr4130_last_insn
+   and INSN.  */
+
+static bool
+vr4130_true_reg_dependence_p (rtx insn)
+{
+  note_stores (PATTERN (vr4130_last_insn),
+	       vr4130_true_reg_dependence_p_1, &insn);
+  return insn == 0;
+}
+
+/* A TUNE_MIPS4130 helper function.  Given that INSN1 is at the head of
+   the ready queue and that INSN2 is the instruction after it, return
+   true if it is worth promoting INSN2 ahead of INSN1.  Look for cases
+   in which INSN1 and INSN2 can probably issue in parallel, but for
+   which (INSN2, INSN1) should be less sensitive to instruction
+   alignment than (INSN1, INSN2).  See 4130.md for more details.  */
+
+static bool
+vr4130_swap_insns_p (rtx insn1, rtx insn2)
+{
+  sd_iterator_def sd_it;
+  dep_t dep;
+
+  /* Check for the following case:
+
+     1) there is some other instruction X with an anti dependence on INSN1;
+     2) X has a higher priority than INSN2; and
+     3) X is an arithmetic instruction (and thus has no unit restrictions).
+
+     If INSN1 is the last instruction blocking X, it would better to
+     choose (INSN1, X) over (INSN2, INSN1).  */
+  FOR_EACH_DEP (insn1, SD_LIST_FORW, sd_it, dep)
+    if (DEP_TYPE (dep) == REG_DEP_ANTI
+	&& INSN_PRIORITY (DEP_CON (dep)) > INSN_PRIORITY (insn2)
+	&& recog_memoized (DEP_CON (dep)) >= 0
+	&& get_attr_vr4130_class (DEP_CON (dep)) == VR4130_CLASS_ALU)
+      return false;
+
+  if (vr4130_last_insn != 0
+      && recog_memoized (insn1) >= 0
+      && recog_memoized (insn2) >= 0)
+    {
+      /* See whether INSN1 and INSN2 use different execution units,
+	 or if they are both ALU-type instructions.  If so, they can
+	 probably execute in parallel.  */
+      enum attr_vr4130_class class1 = get_attr_vr4130_class (insn1);
+      enum attr_vr4130_class class2 = get_attr_vr4130_class (insn2);
+      if (class1 != class2 || class1 == VR4130_CLASS_ALU)
+	{
+	  /* If only one of the instructions has a dependence on
+	     vr4130_last_insn, prefer to schedule the other one first.  */
+	  bool dep1_p = vr4130_true_reg_dependence_p (insn1);
+	  bool dep2_p = vr4130_true_reg_dependence_p (insn2);
+	  if (dep1_p != dep2_p)
+	    return dep1_p;
+
+	  /* Prefer to schedule INSN2 ahead of INSN1 if vr4130_last_insn
+	     is not an ALU-type instruction and if INSN1 uses the same
+	     execution unit.  (Note that if this condition holds, we already
+	     know that INSN2 uses a different execution unit.)  */
+	  if (class1 != VR4130_CLASS_ALU
+	      && recog_memoized (vr4130_last_insn) >= 0
+	      && class1 == get_attr_vr4130_class (vr4130_last_insn))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* A TUNE_MIPS4130 helper function.  (READY, NREADY) describes a ready
+   queue with at least two instructions.  Swap the first two if
+   vr4130_swap_insns_p says that it could be worthwhile.  */
+
+static void
+vr4130_reorder (rtx *ready, int nready)
+{
+  if (vr4130_swap_insns_p (ready[nready - 1], ready[nready - 2]))
+    mips_promote_ready (ready, nready - 2, nready - 1);
+}
+
+/* Record whether last 74k AGEN instruction was a load or store.  */
+static enum attr_type mips_last_74k_agen_insn = TYPE_UNKNOWN;
+
+/* Initialize mips_last_74k_agen_insn from INSN.  A null argument
+   resets to TYPE_UNKNOWN state.  */
+
+static void
+mips_74k_agen_init (rtx insn)
+{
+  if (!insn || CALL_P (insn) || JUMP_P (insn))
+    mips_last_74k_agen_insn = TYPE_UNKNOWN;
+  else
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_LOAD || type == TYPE_STORE)
+	mips_last_74k_agen_insn = type;
+    }
+}
+
+/* A TUNE_74K helper function.  The 74K AGEN pipeline likes multiple
+   loads to be grouped together, and multiple stores to be grouped
+   together.  Swap things around in the ready queue to make this happen.  */
+
+static void
+mips_74k_agen_reorder (rtx *ready, int nready)
+{
+  int i;
+  int store_pos, load_pos;
+
+  store_pos = -1;
+  load_pos = -1;
+
+  for (i = nready - 1; i >= 0; i--)
+    {
+      rtx insn = ready[i];
+      if (USEFUL_INSN_P (insn))
+	switch (get_attr_type (insn))
+	  {
+	  case TYPE_STORE:
+	    if (store_pos == -1)
+	      store_pos = i;
+	    break;
+
+	  case TYPE_LOAD:
+	    if (load_pos == -1)
+	      load_pos = i;
+	    break;
+
+	  default:
+	    break;
+	  }
+    }
+
+  if (load_pos == -1 || store_pos == -1)
+    return;
+
+  switch (mips_last_74k_agen_insn)
+    {
+    case TYPE_UNKNOWN:
+      /* Prefer to schedule loads since they have a higher latency.  */
+    case TYPE_LOAD:
+      /* Swap loads to the front of the queue.  */
+      mips_maybe_swap_ready (ready, load_pos, store_pos, 4);
+      break;
+    case TYPE_STORE:
+      /* Swap stores to the front of the queue.  */
+      mips_maybe_swap_ready (ready, store_pos, load_pos, 4);
+      break;
+    default:
+      break;
+    }
+}
+
+/* Implement TARGET_SCHED_INIT.  */
+
+static void
+mips_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		 int max_ready ATTRIBUTE_UNUSED)
+{
+  mips_macc_chains_last_hilo = 0;
+  vr4130_last_insn = 0;
+  mips_74k_agen_init (NULL_RTX);
+
+  /* When scheduling for Loongson2, branch instructions go to ALU1,
+     therefore basic block is most likely to start with round-robin counter
+     pointed to ALU2.  */
+  mips_ls2.alu1_turn_p = false;
+  mips_ls2.falu1_turn_p = true;
+}
+
+/* Subroutine used by TARGET_SCHED_REORDER and TARGET_SCHED_REORDER2.  */
+
+static void
+mips_sched_reorder_1 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		      rtx *ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+  if (!reload_completed
+      && TUNE_MACC_CHAINS
+      && *nreadyp > 0)
+    mips_macc_chains_reorder (ready, *nreadyp);
+
+  if (reload_completed
+      && TUNE_MIPS4130
+      && !TARGET_VR4130_ALIGN
+      && *nreadyp > 1)
+    vr4130_reorder (ready, *nreadyp);
+
+  if (TUNE_74K)
+    mips_74k_agen_reorder (ready, *nreadyp);
+}
+
+/* Implement TARGET_SCHED_REORDER.  */
+
+static int
+mips_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		    rtx *ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+  mips_sched_reorder_1 (file, verbose, ready, nreadyp, cycle);
+  return mips_issue_rate ();
+}
+
+/* Implement TARGET_SCHED_REORDER2.  */
+
+static int
+mips_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		     rtx *ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+  mips_sched_reorder_1 (file, verbose, ready, nreadyp, cycle);
+  return cached_can_issue_more;
+}
+
+/* Update round-robin counters for ALU1/2 and FALU1/2.  */
+
+static void
+mips_ls2_variable_issue (rtx insn)
+{
+  if (mips_ls2.alu1_turn_p)
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.alu1_core_unit_code))
+	mips_ls2.alu1_turn_p = false;
+    }
+  else
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.alu2_core_unit_code))
+	mips_ls2.alu1_turn_p = true;
+    }
+
+  if (mips_ls2.falu1_turn_p)
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.falu1_core_unit_code))
+	mips_ls2.falu1_turn_p = false;
+    }
+  else
+    {
+      if (cpu_unit_reservation_p (curr_state, mips_ls2.falu2_core_unit_code))
+	mips_ls2.falu1_turn_p = true;
+    }
+
+  if (recog_memoized (insn) >= 0)
+    mips_ls2.cycle_has_multi_p |= (get_attr_type (insn) == TYPE_MULTI);
+}
+
+/* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+
+static int
+mips_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		     rtx insn, int more)
+{
+  /* Ignore USEs and CLOBBERs; don't count them against the issue rate.  */
+  if (USEFUL_INSN_P (insn))
+    {
+      if (get_attr_type (insn) != TYPE_GHOST)
+	more--;
+      if (!reload_completed && TUNE_MACC_CHAINS)
+	mips_macc_chains_record (insn);
+      vr4130_last_insn = insn;
+      if (TUNE_74K)
+	mips_74k_agen_init (insn);
+      else if (TUNE_LOONGSON_2EF)
+	mips_ls2_variable_issue (insn);
+    }
+
+  /* Instructions of type 'multi' should all be split before
+     the second scheduling pass.  */
+  gcc_assert (!reload_completed
+	      || recog_memoized (insn) < 0
+	      || get_attr_type (insn) != TYPE_MULTI);
+
+  cached_can_issue_more = more;
+  return more;
+}
+
+/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
+   return the first operand of the associated PREF or PREFX insn.  */
+
+rtx
+mips_prefetch_cookie (rtx write, rtx locality)
+{
+  /* store_streamed / load_streamed.  */
+  if (INTVAL (locality) <= 0)
+    return GEN_INT (INTVAL (write) + 4);
+
+  /* store / load.  */
+  if (INTVAL (locality) <= 2)
+    return write;
+
+  /* store_retained / load_retained.  */
+  return GEN_INT (INTVAL (write) + 6);
+}
+
+/* Flags that indicate when a built-in function is available.
+
+   BUILTIN_AVAIL_NON_MIPS16
+	The function is available on the current target, but only
+	in non-MIPS16 mode.  */
+#define BUILTIN_AVAIL_NON_MIPS16 1
+
+/* Declare an availability predicate for built-in functions that
+   require non-MIPS16 mode and also require COND to be true.
+   NAME is the main part of the predicate's name.  */
+#define AVAIL_NON_MIPS16(NAME, COND)					\
+ static unsigned int							\
+ mips_builtin_avail_##NAME (void)					\
+ {									\
+   return (COND) ? BUILTIN_AVAIL_NON_MIPS16 : 0;			\
+ }
+
+/* This structure describes a single built-in function.  */
+struct mips_builtin_description {
+  /* The code of the main .md file instruction.  See mips_builtin_type
+     for more information.  */
+  enum insn_code icode;
+
+  /* The floating-point comparison code to use with ICODE, if any.  */
+  enum mips_fp_condition cond;
+
+  /* The name of the built-in function.  */
+  const char *name;
+
+  /* Specifies how the function should be expanded.  */
+  enum mips_builtin_type builtin_type;
+
+  /* The function's prototype.  */
+  enum mips_function_type function_type;
+
+  /* Whether the function is available.  */
+  unsigned int (*avail) (void);
+};
+
+AVAIL_NON_MIPS16 (paired_single, TARGET_PAIRED_SINGLE_FLOAT)
+AVAIL_NON_MIPS16 (sb1_paired_single, TARGET_SB1 && TARGET_PAIRED_SINGLE_FLOAT)
+AVAIL_NON_MIPS16 (mips3d, TARGET_MIPS3D)
+AVAIL_NON_MIPS16 (dsp, TARGET_DSP)
+AVAIL_NON_MIPS16 (dspr2, TARGET_DSPR2)
+AVAIL_NON_MIPS16 (dsp_32, !TARGET_64BIT && TARGET_DSP)
+AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2)
+AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS)
+AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
+
+/* Construct a mips_builtin_description from the given arguments.
+
+   INSN is the name of the associated instruction pattern, without the
+   leading CODE_FOR_mips_.
+
+   CODE is the floating-point condition code associated with the
+   function.  It can be 'f' if the field is not applicable.
+
+   NAME is the name of the function itself, without the leading
+   "__builtin_mips_".
+
+   BUILTIN_TYPE and FUNCTION_TYPE are mips_builtin_description fields.
+
+   AVAIL is the name of the availability predicate, without the leading
+   mips_builtin_avail_.  */
+#define MIPS_BUILTIN(INSN, COND, NAME, BUILTIN_TYPE,			\
+		     FUNCTION_TYPE, AVAIL)				\
+  { CODE_FOR_mips_ ## INSN, MIPS_FP_COND_ ## COND,			\
+    "__builtin_mips_" NAME, BUILTIN_TYPE, FUNCTION_TYPE,		\
+    mips_builtin_avail_ ## AVAIL }
+
+/* Define __builtin_mips_<INSN>, which is a MIPS_BUILTIN_DIRECT function
+   mapped to instruction CODE_FOR_mips_<INSN>,  FUNCTION_TYPE and AVAIL
+   are as for MIPS_BUILTIN.  */
+#define DIRECT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)			\
+  MIPS_BUILTIN (INSN, f, #INSN, MIPS_BUILTIN_DIRECT, FUNCTION_TYPE, AVAIL)
+
+/* Define __builtin_mips_<INSN>_<COND>_{s,d} functions, both of which
+   are subject to mips_builtin_avail_<AVAIL>.  */
+#define CMP_SCALAR_BUILTINS(INSN, COND, AVAIL)				\
+  MIPS_BUILTIN (INSN ## _cond_s, COND, #INSN "_" #COND "_s",		\
+		MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_SF_SF, AVAIL),	\
+  MIPS_BUILTIN (INSN ## _cond_d, COND, #INSN "_" #COND "_d",		\
+		MIPS_BUILTIN_CMP_SINGLE, MIPS_INT_FTYPE_DF_DF, AVAIL)
+
+/* Define __builtin_mips_{any,all,upper,lower}_<INSN>_<COND>_ps.
+   The lower and upper forms are subject to mips_builtin_avail_<AVAIL>
+   while the any and all forms are subject to mips_builtin_avail_mips3d.  */
+#define CMP_PS_BUILTINS(INSN, COND, AVAIL)				\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "any_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_ANY, MIPS_INT_FTYPE_V2SF_V2SF,		\
+		mips3d),						\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "all_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_ALL, MIPS_INT_FTYPE_V2SF_V2SF,		\
+		mips3d),						\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "lower_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_LOWER, MIPS_INT_FTYPE_V2SF_V2SF,	\
+		AVAIL),							\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "upper_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_CMP_UPPER, MIPS_INT_FTYPE_V2SF_V2SF,	\
+		AVAIL)
+
+/* Define __builtin_mips_{any,all}_<INSN>_<COND>_4s.  The functions
+   are subject to mips_builtin_avail_mips3d.  */
+#define CMP_4S_BUILTINS(INSN, COND)					\
+  MIPS_BUILTIN (INSN ## _cond_4s, COND, "any_" #INSN "_" #COND "_4s",	\
+		MIPS_BUILTIN_CMP_ANY,					\
+		MIPS_INT_FTYPE_V2SF_V2SF_V2SF_V2SF, mips3d),		\
+  MIPS_BUILTIN (INSN ## _cond_4s, COND, "all_" #INSN "_" #COND "_4s",	\
+		MIPS_BUILTIN_CMP_ALL,					\
+		MIPS_INT_FTYPE_V2SF_V2SF_V2SF_V2SF, mips3d)
+
+/* Define __builtin_mips_mov{t,f}_<INSN>_<COND>_ps.  The comparison
+   instruction requires mips_builtin_avail_<AVAIL>.  */
+#define MOVTF_BUILTINS(INSN, COND, AVAIL)				\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "movt_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_MOVT, MIPS_V2SF_FTYPE_V2SF_V2SF_V2SF_V2SF,	\
+		AVAIL),							\
+  MIPS_BUILTIN (INSN ## _cond_ps, COND, "movf_" #INSN "_" #COND "_ps",	\
+		MIPS_BUILTIN_MOVF, MIPS_V2SF_FTYPE_V2SF_V2SF_V2SF_V2SF,	\
+		AVAIL)
+
+/* Define all the built-in functions related to C.cond.fmt condition COND.  */
+#define CMP_BUILTINS(COND)						\
+  MOVTF_BUILTINS (c, COND, paired_single),				\
+  MOVTF_BUILTINS (cabs, COND, mips3d),					\
+  CMP_SCALAR_BUILTINS (cabs, COND, mips3d),				\
+  CMP_PS_BUILTINS (c, COND, paired_single),				\
+  CMP_PS_BUILTINS (cabs, COND, mips3d),					\
+  CMP_4S_BUILTINS (c, COND),						\
+  CMP_4S_BUILTINS (cabs, COND)
+
+/* Define __builtin_mips_<INSN>, which is a MIPS_BUILTIN_DIRECT_NO_TARGET
+   function mapped to instruction CODE_FOR_mips_<INSN>,  FUNCTION_TYPE
+   and AVAIL are as for MIPS_BUILTIN.  */
+#define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)		\
+  MIPS_BUILTIN (INSN, f, #INSN,	MIPS_BUILTIN_DIRECT_NO_TARGET,		\
+		FUNCTION_TYPE, AVAIL)
+
+/* Define __builtin_mips_bposge<VALUE>.  <VALUE> is 32 for the MIPS32 DSP
+   branch instruction.  AVAIL is as for MIPS_BUILTIN.  */
+#define BPOSGE_BUILTIN(VALUE, AVAIL)					\
+  MIPS_BUILTIN (bposge, f, "bposge" #VALUE,				\
+		MIPS_BUILTIN_BPOSGE ## VALUE, MIPS_SI_FTYPE_VOID, AVAIL)
+
+/* Define a Loongson MIPS_BUILTIN_DIRECT function __builtin_loongson_<FN_NAME>
+   for instruction CODE_FOR_loongson_<INSN>.  FUNCTION_TYPE is a
+   builtin_description field.  */
+#define LOONGSON_BUILTIN_ALIAS(INSN, FN_NAME, FUNCTION_TYPE)		\
+  { CODE_FOR_loongson_ ## INSN, MIPS_FP_COND_f,				\
+    "__builtin_loongson_" #FN_NAME, MIPS_BUILTIN_DIRECT,		\
+    FUNCTION_TYPE, mips_builtin_avail_loongson }
+
+/* Define a Loongson MIPS_BUILTIN_DIRECT function __builtin_loongson_<INSN>
+   for instruction CODE_FOR_loongson_<INSN>.  FUNCTION_TYPE is a
+   builtin_description field.  */
+#define LOONGSON_BUILTIN(INSN, FUNCTION_TYPE)				\
+  LOONGSON_BUILTIN_ALIAS (INSN, INSN, FUNCTION_TYPE)
+
+/* Like LOONGSON_BUILTIN, but add _<SUFFIX> to the end of the function name.
+   We use functions of this form when the same insn can be usefully applied
+   to more than one datatype.  */
+#define LOONGSON_BUILTIN_SUFFIX(INSN, SUFFIX, FUNCTION_TYPE)		\
+  LOONGSON_BUILTIN_ALIAS (INSN, INSN ## _ ## SUFFIX, FUNCTION_TYPE)
+
+#define CODE_FOR_mips_sqrt_ps CODE_FOR_sqrtv2sf2
+#define CODE_FOR_mips_addq_ph CODE_FOR_addv2hi3
+#define CODE_FOR_mips_addu_qb CODE_FOR_addv4qi3
+#define CODE_FOR_mips_subq_ph CODE_FOR_subv2hi3
+#define CODE_FOR_mips_subu_qb CODE_FOR_subv4qi3
+#define CODE_FOR_mips_mul_ph CODE_FOR_mulv2hi3
+#define CODE_FOR_mips_mult CODE_FOR_mulsidi3_32bit
+#define CODE_FOR_mips_multu CODE_FOR_umulsidi3_32bit
+
+#define CODE_FOR_loongson_packsswh CODE_FOR_vec_pack_ssat_v2si
+#define CODE_FOR_loongson_packsshb CODE_FOR_vec_pack_ssat_v4hi
+#define CODE_FOR_loongson_packushb CODE_FOR_vec_pack_usat_v4hi
+#define CODE_FOR_loongson_paddw CODE_FOR_addv2si3
+#define CODE_FOR_loongson_paddh CODE_FOR_addv4hi3
+#define CODE_FOR_loongson_paddb CODE_FOR_addv8qi3
+#define CODE_FOR_loongson_paddsh CODE_FOR_ssaddv4hi3
+#define CODE_FOR_loongson_paddsb CODE_FOR_ssaddv8qi3
+#define CODE_FOR_loongson_paddush CODE_FOR_usaddv4hi3
+#define CODE_FOR_loongson_paddusb CODE_FOR_usaddv8qi3
+#define CODE_FOR_loongson_pmaxsh CODE_FOR_smaxv4hi3
+#define CODE_FOR_loongson_pmaxub CODE_FOR_umaxv8qi3
+#define CODE_FOR_loongson_pminsh CODE_FOR_sminv4hi3
+#define CODE_FOR_loongson_pminub CODE_FOR_uminv8qi3
+#define CODE_FOR_loongson_pmulhuh CODE_FOR_umulv4hi3_highpart
+#define CODE_FOR_loongson_pmulhh CODE_FOR_smulv4hi3_highpart
+#define CODE_FOR_loongson_pmullh CODE_FOR_mulv4hi3
+#define CODE_FOR_loongson_psllh CODE_FOR_ashlv4hi3
+#define CODE_FOR_loongson_psllw CODE_FOR_ashlv2si3
+#define CODE_FOR_loongson_psrlh CODE_FOR_lshrv4hi3
+#define CODE_FOR_loongson_psrlw CODE_FOR_lshrv2si3
+#define CODE_FOR_loongson_psrah CODE_FOR_ashrv4hi3
+#define CODE_FOR_loongson_psraw CODE_FOR_ashrv2si3
+#define CODE_FOR_loongson_psubw CODE_FOR_subv2si3
+#define CODE_FOR_loongson_psubh CODE_FOR_subv4hi3
+#define CODE_FOR_loongson_psubb CODE_FOR_subv8qi3
+#define CODE_FOR_loongson_psubsh CODE_FOR_sssubv4hi3
+#define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3
+#define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3
+#define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3
+#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi
+#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi
+#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si
+#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi
+#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi
+#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si
+
+static const struct mips_builtin_description mips_builtins[] = {
+  DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (pul_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (plu_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (puu_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single),
+  DIRECT_BUILTIN (cvt_ps_s, MIPS_V2SF_FTYPE_SF_SF, paired_single),
+  DIRECT_BUILTIN (cvt_s_pl, MIPS_SF_FTYPE_V2SF, paired_single),
+  DIRECT_BUILTIN (cvt_s_pu, MIPS_SF_FTYPE_V2SF, paired_single),
+  DIRECT_BUILTIN (abs_ps, MIPS_V2SF_FTYPE_V2SF, paired_single),
+
+  DIRECT_BUILTIN (alnv_ps, MIPS_V2SF_FTYPE_V2SF_V2SF_INT, paired_single),
+  DIRECT_BUILTIN (addr_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+  DIRECT_BUILTIN (mulr_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+  DIRECT_BUILTIN (cvt_pw_ps, MIPS_V2SF_FTYPE_V2SF, mips3d),
+  DIRECT_BUILTIN (cvt_ps_pw, MIPS_V2SF_FTYPE_V2SF, mips3d),
+
+  DIRECT_BUILTIN (recip1_s, MIPS_SF_FTYPE_SF, mips3d),
+  DIRECT_BUILTIN (recip1_d, MIPS_DF_FTYPE_DF, mips3d),
+  DIRECT_BUILTIN (recip1_ps, MIPS_V2SF_FTYPE_V2SF, mips3d),
+  DIRECT_BUILTIN (recip2_s, MIPS_SF_FTYPE_SF_SF, mips3d),
+  DIRECT_BUILTIN (recip2_d, MIPS_DF_FTYPE_DF_DF, mips3d),
+  DIRECT_BUILTIN (recip2_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+
+  DIRECT_BUILTIN (rsqrt1_s, MIPS_SF_FTYPE_SF, mips3d),
+  DIRECT_BUILTIN (rsqrt1_d, MIPS_DF_FTYPE_DF, mips3d),
+  DIRECT_BUILTIN (rsqrt1_ps, MIPS_V2SF_FTYPE_V2SF, mips3d),
+  DIRECT_BUILTIN (rsqrt2_s, MIPS_SF_FTYPE_SF_SF, mips3d),
+  DIRECT_BUILTIN (rsqrt2_d, MIPS_DF_FTYPE_DF_DF, mips3d),
+  DIRECT_BUILTIN (rsqrt2_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, mips3d),
+
+  MIPS_FP_CONDITIONS (CMP_BUILTINS),
+
+  /* Built-in functions for the SB-1 processor.  */
+  DIRECT_BUILTIN (sqrt_ps, MIPS_V2SF_FTYPE_V2SF, sb1_paired_single),
+
+  /* Built-in functions for the DSP ASE (32-bit and 64-bit).  */
+  DIRECT_BUILTIN (addq_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (addq_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (addq_s_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (addu_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (addu_s_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (subq_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (subq_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (subq_s_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (subu_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (subu_s_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (addsc, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (addwc, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (modsub, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (raddu_w_qb, MIPS_SI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (absq_s_ph, MIPS_V2HI_FTYPE_V2HI, dsp),
+  DIRECT_BUILTIN (absq_s_w, MIPS_SI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (precrq_qb_ph, MIPS_V4QI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (precrq_ph_w, MIPS_V2HI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (precrq_rs_ph_w, MIPS_V2HI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (precrqu_s_qb_ph, MIPS_V4QI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (preceq_w_phl, MIPS_SI_FTYPE_V2HI, dsp),
+  DIRECT_BUILTIN (preceq_w_phr, MIPS_SI_FTYPE_V2HI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbl, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbr, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbla, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (precequ_ph_qbra, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbl, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbr, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbla, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (preceu_ph_qbra, MIPS_V2HI_FTYPE_V4QI, dsp),
+  DIRECT_BUILTIN (shll_qb, MIPS_V4QI_FTYPE_V4QI_SI, dsp),
+  DIRECT_BUILTIN (shll_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shll_s_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shll_s_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (shrl_qb, MIPS_V4QI_FTYPE_V4QI_SI, dsp),
+  DIRECT_BUILTIN (shra_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shra_r_ph, MIPS_V2HI_FTYPE_V2HI_SI, dsp),
+  DIRECT_BUILTIN (shra_r_w, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (muleu_s_ph_qbl, MIPS_V2HI_FTYPE_V4QI_V2HI, dsp),
+  DIRECT_BUILTIN (muleu_s_ph_qbr, MIPS_V2HI_FTYPE_V4QI_V2HI, dsp),
+  DIRECT_BUILTIN (mulq_rs_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (muleq_s_w_phl, MIPS_SI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (muleq_s_w_phr, MIPS_SI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (bitrev, MIPS_SI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (insv, MIPS_SI_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (repl_qb, MIPS_V4QI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (repl_ph, MIPS_V2HI_FTYPE_SI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmpu_eq_qb, MIPS_VOID_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmpu_lt_qb, MIPS_VOID_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmpu_le_qb, MIPS_VOID_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (cmpgu_eq_qb, MIPS_SI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (cmpgu_lt_qb, MIPS_SI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (cmpgu_le_qb, MIPS_SI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmp_eq_ph, MIPS_VOID_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmp_lt_ph, MIPS_VOID_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (cmp_le_ph, MIPS_VOID_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (pick_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dsp),
+  DIRECT_BUILTIN (pick_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_BUILTIN (packrl_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dsp),
+  DIRECT_NO_TARGET_BUILTIN (wrdsp, MIPS_VOID_FTYPE_SI_SI, dsp),
+  DIRECT_BUILTIN (rddsp, MIPS_SI_FTYPE_SI, dsp),
+  DIRECT_BUILTIN (lbux, MIPS_SI_FTYPE_POINTER_SI, dsp),
+  DIRECT_BUILTIN (lhx, MIPS_SI_FTYPE_POINTER_SI, dsp),
+  DIRECT_BUILTIN (lwx, MIPS_SI_FTYPE_POINTER_SI, dsp),
+  BPOSGE_BUILTIN (32, dsp),
+
+  /* The following are for the MIPS DSP ASE REV 2 (32-bit and 64-bit).  */
+  DIRECT_BUILTIN (absq_s_qb, MIPS_V4QI_FTYPE_V4QI, dspr2),
+  DIRECT_BUILTIN (addu_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (addu_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (adduh_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (adduh_r_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (append, MIPS_SI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (balign, MIPS_SI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (cmpgdu_eq_qb, MIPS_SI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (cmpgdu_lt_qb, MIPS_SI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (cmpgdu_le_qb, MIPS_SI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (mul_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (mul_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (mulq_rs_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (mulq_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (mulq_s_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (precr_qb_ph, MIPS_V4QI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (precr_sra_ph_w, MIPS_V2HI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (precr_sra_r_ph_w, MIPS_V2HI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (prepend, MIPS_SI_FTYPE_SI_SI_SI, dspr2),
+  DIRECT_BUILTIN (shra_qb, MIPS_V4QI_FTYPE_V4QI_SI, dspr2),
+  DIRECT_BUILTIN (shra_r_qb, MIPS_V4QI_FTYPE_V4QI_SI, dspr2),
+  DIRECT_BUILTIN (shrl_ph, MIPS_V2HI_FTYPE_V2HI_SI, dspr2),
+  DIRECT_BUILTIN (subu_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subu_s_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subuh_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (subuh_r_qb, MIPS_V4QI_FTYPE_V4QI_V4QI, dspr2),
+  DIRECT_BUILTIN (addqh_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (addqh_r_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (addqh_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (addqh_r_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (subqh_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subqh_r_ph, MIPS_V2HI_FTYPE_V2HI_V2HI, dspr2),
+  DIRECT_BUILTIN (subqh_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+  DIRECT_BUILTIN (subqh_r_w, MIPS_SI_FTYPE_SI_SI, dspr2),
+
+  /* Built-in functions for the DSP ASE (32-bit only).  */
+  DIRECT_BUILTIN (dpau_h_qbl, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpau_h_qbr, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpsu_h_qbl, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpsu_h_qbr, MIPS_DI_FTYPE_DI_V4QI_V4QI, dsp_32),
+  DIRECT_BUILTIN (dpaq_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (dpsq_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (mulsaq_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (dpaq_sa_l_w, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (dpsq_sa_l_w, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (maq_s_w_phl, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (maq_s_w_phr, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (maq_sa_w_phl, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (maq_sa_w_phr, MIPS_DI_FTYPE_DI_V2HI_V2HI, dsp_32),
+  DIRECT_BUILTIN (extr_w, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extr_r_w, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extr_rs_w, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extr_s_h, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extp, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (extpdp, MIPS_SI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (shilo, MIPS_DI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (mthlip, MIPS_DI_FTYPE_DI_SI, dsp_32),
+  DIRECT_BUILTIN (madd, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (maddu, MIPS_DI_FTYPE_DI_USI_USI, dsp_32),
+  DIRECT_BUILTIN (msub, MIPS_DI_FTYPE_DI_SI_SI, dsp_32),
+  DIRECT_BUILTIN (msubu, MIPS_DI_FTYPE_DI_USI_USI, dsp_32),
+  DIRECT_BUILTIN (mult, MIPS_DI_FTYPE_SI_SI, dsp_32),
+  DIRECT_BUILTIN (multu, MIPS_DI_FTYPE_USI_USI, dsp_32),
+
+  /* The following are for the MIPS DSP ASE REV 2 (32-bit only).  */
+  DIRECT_BUILTIN (dpa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dps_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (mulsa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpax_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpsx_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpaqx_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpaqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpsqx_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+  DIRECT_BUILTIN (dpsqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32),
+
+  /* Builtin functions for ST Microelectronics Loongson-2E/2F cores.  */
+  LOONGSON_BUILTIN (packsswh, MIPS_V4HI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN (packsshb, MIPS_V8QI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (packushb, MIPS_UV8QI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (paddw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (paddh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (paddb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (paddw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (paddh, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (paddb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (paddd, u, MIPS_UDI_FTYPE_UDI_UDI),
+  LOONGSON_BUILTIN_SUFFIX (paddd, s, MIPS_DI_FTYPE_DI_DI),
+  LOONGSON_BUILTIN (paddsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (paddsb, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN (paddush, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (paddusb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_ALIAS (pandn_d, pandn_ud, MIPS_UDI_FTYPE_UDI_UDI),
+  LOONGSON_BUILTIN_ALIAS (pandn_w, pandn_uw, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_ALIAS (pandn_h, pandn_uh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_ALIAS (pandn_b, pandn_ub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_ALIAS (pandn_d, pandn_sd, MIPS_DI_FTYPE_DI_DI),
+  LOONGSON_BUILTIN_ALIAS (pandn_w, pandn_sw, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_ALIAS (pandn_h, pandn_sh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_ALIAS (pandn_b, pandn_sb, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN (pavgh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (pavgb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqh, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpeqb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgth, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgth, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pcmpgtb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (pextrh, u, MIPS_UV4HI_FTYPE_UV4HI_USI),
+  LOONGSON_BUILTIN_SUFFIX (pextrh, s, MIPS_V4HI_FTYPE_V4HI_USI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_0, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_1, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_2, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_3, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_0, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_1, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_2, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (pinsrh_3, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmaddhw, MIPS_V2SI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmaxsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmaxub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN (pminsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pminub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pmovmskb, u, MIPS_UV8QI_FTYPE_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pmovmskb, s, MIPS_V8QI_FTYPE_V8QI),
+  LOONGSON_BUILTIN (pmulhuh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (pmulhh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmullh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (pmuluw, MIPS_UDI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI),
+  LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psllw, s, MIPS_V2SI_FTYPE_V2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrah, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrah, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psraw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psraw, s, MIPS_V2SI_FTYPE_V2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlh, s, MIPS_V4HI_FTYPE_V4HI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psrlw, s, MIPS_V2SI_FTYPE_V2SI_UQI),
+  LOONGSON_BUILTIN_SUFFIX (psubw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (psubh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (psubb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (psubw, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (psubh, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (psubb, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (psubd, u, MIPS_UDI_FTYPE_UDI_UDI),
+  LOONGSON_BUILTIN_SUFFIX (psubd, s, MIPS_DI_FTYPE_DI_DI),
+  LOONGSON_BUILTIN (psubsh, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN (psubsb, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN (psubush, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN (psubusb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhbh, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhhw, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpckhwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklbh, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklhw, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI),
+  LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
+
+  /* Sundry other built-in functions.  */
+  DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache)
+};
+
+/* Index I is the function declaration for mips_builtins[I], or null if the
+   function isn't defined on this target.  */
+static GTY(()) tree mips_builtin_decls[ARRAY_SIZE (mips_builtins)];
+
+/* MODE is a vector mode whose elements have type TYPE.  Return the type
+   of the vector itself.  */
+
+static tree
+mips_builtin_vector_type (tree type, enum machine_mode mode)
+{
+  static tree types[2 * (int) MAX_MACHINE_MODE];
+  int mode_index;
+
+  mode_index = (int) mode;
+
+  if (TREE_CODE (type) == INTEGER_TYPE && TYPE_UNSIGNED (type))
+    mode_index += MAX_MACHINE_MODE;
+
+  if (types[mode_index] == NULL_TREE)
+    types[mode_index] = build_vector_type_for_mode (type, mode);
+  return types[mode_index];
+}
+
+/* Return a type for 'const volatile void *'.  */
+
+static tree
+mips_build_cvpointer_type (void)
+{
+  static tree cache;
+
+  if (cache == NULL_TREE)
+    cache = build_pointer_type (build_qualified_type
+				(void_type_node,
+				 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
+  return cache;
+}
+
+/* Source-level argument types.  */
+#define MIPS_ATYPE_VOID void_type_node
+#define MIPS_ATYPE_INT integer_type_node
+#define MIPS_ATYPE_POINTER ptr_type_node
+#define MIPS_ATYPE_CVPOINTER mips_build_cvpointer_type ()
+
+/* Standard mode-based argument types.  */
+#define MIPS_ATYPE_UQI unsigned_intQI_type_node
+#define MIPS_ATYPE_SI intSI_type_node
+#define MIPS_ATYPE_USI unsigned_intSI_type_node
+#define MIPS_ATYPE_DI intDI_type_node
+#define MIPS_ATYPE_UDI unsigned_intDI_type_node
+#define MIPS_ATYPE_SF float_type_node
+#define MIPS_ATYPE_DF double_type_node
+
+/* Vector argument types.  */
+#define MIPS_ATYPE_V2SF mips_builtin_vector_type (float_type_node, V2SFmode)
+#define MIPS_ATYPE_V2HI mips_builtin_vector_type (intHI_type_node, V2HImode)
+#define MIPS_ATYPE_V2SI mips_builtin_vector_type (intSI_type_node, V2SImode)
+#define MIPS_ATYPE_V4QI mips_builtin_vector_type (intQI_type_node, V4QImode)
+#define MIPS_ATYPE_V4HI mips_builtin_vector_type (intHI_type_node, V4HImode)
+#define MIPS_ATYPE_V8QI mips_builtin_vector_type (intQI_type_node, V8QImode)
+#define MIPS_ATYPE_UV2SI					\
+  mips_builtin_vector_type (unsigned_intSI_type_node, V2SImode)
+#define MIPS_ATYPE_UV4HI					\
+  mips_builtin_vector_type (unsigned_intHI_type_node, V4HImode)
+#define MIPS_ATYPE_UV8QI					\
+  mips_builtin_vector_type (unsigned_intQI_type_node, V8QImode)
+
+/* MIPS_FTYPE_ATYPESN takes N MIPS_FTYPES-like type codes and lists
+   their associated MIPS_ATYPEs.  */
+#define MIPS_FTYPE_ATYPES1(A, B) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B
+
+#define MIPS_FTYPE_ATYPES2(A, B, C) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B, MIPS_ATYPE_##C
+
+#define MIPS_FTYPE_ATYPES3(A, B, C, D) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B, MIPS_ATYPE_##C, MIPS_ATYPE_##D
+
+#define MIPS_FTYPE_ATYPES4(A, B, C, D, E) \
+  MIPS_ATYPE_##A, MIPS_ATYPE_##B, MIPS_ATYPE_##C, MIPS_ATYPE_##D, \
+  MIPS_ATYPE_##E
+
+/* Return the function type associated with function prototype TYPE.  */
+
+static tree
+mips_build_function_type (enum mips_function_type type)
+{
+  static tree types[(int) MIPS_MAX_FTYPE_MAX];
+
+  if (types[(int) type] == NULL_TREE)
+    switch (type)
+      {
+#define DEF_MIPS_FTYPE(NUM, ARGS)					\
+  case MIPS_FTYPE_NAME##NUM ARGS:					\
+    types[(int) type]							\
+      = build_function_type_list (MIPS_FTYPE_ATYPES##NUM ARGS,		\
+				  NULL_TREE);				\
+    break;
+#include "config/mips/mips-ftypes.def"
+#undef DEF_MIPS_FTYPE
+      default:
+	gcc_unreachable ();
+      }
+
+  return types[(int) type];
+}
+
+/* Implement TARGET_INIT_BUILTINS.  */
+
+static void
+mips_init_builtins (void)
+{
+  const struct mips_builtin_description *d;
+  unsigned int i;
+
+  /* Iterate through all of the bdesc arrays, initializing all of the
+     builtin functions.  */
+  for (i = 0; i < ARRAY_SIZE (mips_builtins); i++)
+    {
+      d = &mips_builtins[i];
+      if (d->avail ())
+	mips_builtin_decls[i]
+	  = add_builtin_function (d->name,
+				  mips_build_function_type (d->function_type),
+				  i, BUILT_IN_MD, NULL, NULL);
+    }
+}
+
+/* Implement TARGET_BUILTIN_DECL.  */
+
+static tree
+mips_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ARRAY_SIZE (mips_builtins))
+    return error_mark_node;
+  return mips_builtin_decls[code];
+}
+
+/* Take argument ARGNO from EXP's argument list and convert it into a
+   form suitable for input operand OPNO of instruction ICODE.  Return the
+   value.  */
+
+static rtx
+mips_prepare_builtin_arg (enum insn_code icode,
+			  unsigned int opno, tree exp, unsigned int argno)
+{
+  tree arg;
+  rtx value;
+  enum machine_mode mode;
+
+  arg = CALL_EXPR_ARG (exp, argno);
+  value = expand_normal (arg);
+  mode = insn_data[icode].operand[opno].mode;
+  if (!insn_data[icode].operand[opno].predicate (value, mode))
+    {
+      /* We need to get the mode from ARG for two reasons:
+
+	   - to cope with address operands, where MODE is the mode of the
+	     memory, rather than of VALUE itself.
+
+	   - to cope with special predicates like pmode_register_operand,
+	     where MODE is VOIDmode.  */
+      value = copy_to_mode_reg (TYPE_MODE (TREE_TYPE (arg)), value);
+
+      /* Check the predicate again.  */
+      if (!insn_data[icode].operand[opno].predicate (value, mode))
+	{
+	  error ("invalid argument to built-in function");
+	  return const0_rtx;
+	}
+    }
+
+  return value;
+}
+
+/* Return an rtx suitable for output operand OP of instruction ICODE.
+   If TARGET is non-null, try to use it where possible.  */
+
+static rtx
+mips_prepare_builtin_target (enum insn_code icode, unsigned int op, rtx target)
+{
+  enum machine_mode mode;
+
+  mode = insn_data[icode].operand[op].mode;
+  if (target == 0 || !insn_data[icode].operand[op].predicate (target, mode))
+    target = gen_reg_rtx (mode);
+
+  return target;
+}
+
+/* Expand a MIPS_BUILTIN_DIRECT or MIPS_BUILTIN_DIRECT_NO_TARGET function;
+   HAS_TARGET_P says which.  EXP is the CALL_EXPR that calls the function
+   and ICODE is the code of the associated .md pattern.  TARGET, if nonnull,
+   suggests a good place to put the result.  */
+
+static rtx
+mips_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
+			    bool has_target_p)
+{
+  rtx ops[MAX_RECOG_OPERANDS];
+  int opno, argno;
+
+  /* Map any target to operand 0.  */
+  opno = 0;
+  if (has_target_p)
+    {
+      target = mips_prepare_builtin_target (icode, opno, target);
+      ops[opno] = target;
+      opno++;
+    }
+
+  /* Map the arguments to the other operands.  The n_operands value
+     for an expander includes match_dups and match_scratches as well as
+     match_operands, so n_operands is only an upper bound on the number
+     of arguments to the expander function.  */
+  gcc_assert (opno + call_expr_nargs (exp) <= insn_data[icode].n_operands);
+  for (argno = 0; argno < call_expr_nargs (exp); argno++, opno++)
+    ops[opno] = mips_prepare_builtin_arg (icode, opno, exp, argno);
+
+  switch (opno)
+    {
+    case 2:
+      emit_insn (GEN_FCN (icode) (ops[0], ops[1]));
+      break;
+
+    case 3:
+      emit_insn (GEN_FCN (icode) (ops[0], ops[1], ops[2]));
+      break;
+
+    case 4:
+      emit_insn (GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return target;
+}
+
+/* Expand a __builtin_mips_movt_*_ps or __builtin_mips_movf_*_ps
+   function; TYPE says which.  EXP is the CALL_EXPR that calls the
+   function, ICODE is the instruction that should be used to compare
+   the first two arguments, and COND is the condition it should test.
+   TARGET, if nonnull, suggests a good place to put the result.  */
+
+static rtx
+mips_expand_builtin_movtf (enum mips_builtin_type type,
+			   enum insn_code icode, enum mips_fp_condition cond,
+			   rtx target, tree exp)
+{
+  rtx cmp_result, op0, op1;
+
+  cmp_result = mips_prepare_builtin_target (icode, 0, 0);
+  op0 = mips_prepare_builtin_arg (icode, 1, exp, 0);
+  op1 = mips_prepare_builtin_arg (icode, 2, exp, 1);
+  emit_insn (GEN_FCN (icode) (cmp_result, op0, op1, GEN_INT (cond)));
+
+  icode = CODE_FOR_mips_cond_move_tf_ps;
+  target = mips_prepare_builtin_target (icode, 0, target);
+  if (type == MIPS_BUILTIN_MOVT)
+    {
+      op1 = mips_prepare_builtin_arg (icode, 2, exp, 2);
+      op0 = mips_prepare_builtin_arg (icode, 1, exp, 3);
+    }
+  else
+    {
+      op0 = mips_prepare_builtin_arg (icode, 1, exp, 2);
+      op1 = mips_prepare_builtin_arg (icode, 2, exp, 3);
+    }
+  emit_insn (gen_mips_cond_move_tf_ps (target, op0, op1, cmp_result));
+  return target;
+}
+
+/* Move VALUE_IF_TRUE into TARGET if CONDITION is true; move VALUE_IF_FALSE
+   into TARGET otherwise.  Return TARGET.  */
+
+static rtx
+mips_builtin_branch_and_move (rtx condition, rtx target,
+			      rtx value_if_true, rtx value_if_false)
+{
+  rtx true_label, done_label;
+
+  true_label = gen_label_rtx ();
+  done_label = gen_label_rtx ();
+
+  /* First assume that CONDITION is false.  */
+  mips_emit_move (target, value_if_false);
+
+  /* Branch to TRUE_LABEL if CONDITION is true and DONE_LABEL otherwise.  */
+  emit_jump_insn (gen_condjump (condition, true_label));
+  emit_jump_insn (gen_jump (done_label));
+  emit_barrier ();
+
+  /* Fix TARGET if CONDITION is true.  */
+  emit_label (true_label);
+  mips_emit_move (target, value_if_true);
+
+  emit_label (done_label);
+  return target;
+}
+
+/* Expand a comparison built-in function of type BUILTIN_TYPE.  EXP is
+   the CALL_EXPR that calls the function, ICODE is the code of the
+   comparison instruction, and COND is the condition it should test.
+   TARGET, if nonnull, suggests a good place to put the boolean result.  */
+
+static rtx
+mips_expand_builtin_compare (enum mips_builtin_type builtin_type,
+			     enum insn_code icode, enum mips_fp_condition cond,
+			     rtx target, tree exp)
+{
+  rtx offset, condition, cmp_result, args[MAX_RECOG_OPERANDS];
+  int argno;
+
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+
+  /* The instruction should have a target operand, an operand for each
+     argument, and an operand for COND.  */
+  gcc_assert (call_expr_nargs (exp) + 2 == insn_data[icode].n_operands);
+
+  /* Prepare the operands to the comparison.  */
+  cmp_result = mips_prepare_builtin_target (icode, 0, 0);
+  for (argno = 0; argno < call_expr_nargs (exp); argno++)
+    args[argno] = mips_prepare_builtin_arg (icode, argno + 1, exp, argno);
+
+  switch (insn_data[icode].n_operands)
+    {
+    case 4:
+      emit_insn (GEN_FCN (icode) (cmp_result, args[0], args[1],
+				  GEN_INT (cond)));
+      break;
+
+    case 6:
+      emit_insn (GEN_FCN (icode) (cmp_result, args[0], args[1],
+				  args[2], args[3], GEN_INT (cond)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* If the comparison sets more than one register, we define the result
+     to be 0 if all registers are false and -1 if all registers are true.
+     The value of the complete result is indeterminate otherwise.  */
+  switch (builtin_type)
+    {
+    case MIPS_BUILTIN_CMP_ALL:
+      condition = gen_rtx_NE (VOIDmode, cmp_result, constm1_rtx);
+      return mips_builtin_branch_and_move (condition, target,
+					   const0_rtx, const1_rtx);
+
+    case MIPS_BUILTIN_CMP_UPPER:
+    case MIPS_BUILTIN_CMP_LOWER:
+      offset = GEN_INT (builtin_type == MIPS_BUILTIN_CMP_UPPER);
+      condition = gen_single_cc (cmp_result, offset);
+      return mips_builtin_branch_and_move (condition, target,
+					   const1_rtx, const0_rtx);
+
+    default:
+      condition = gen_rtx_NE (VOIDmode, cmp_result, const0_rtx);
+      return mips_builtin_branch_and_move (condition, target,
+					   const1_rtx, const0_rtx);
+    }
+}
+
+/* Expand a bposge built-in function of type BUILTIN_TYPE.  TARGET,
+   if nonnull, suggests a good place to put the boolean result.  */
+
+static rtx
+mips_expand_builtin_bposge (enum mips_builtin_type builtin_type, rtx target)
+{
+  rtx condition, cmp_result;
+  int cmp_value;
+
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+
+  cmp_result = gen_rtx_REG (CCDSPmode, CCDSP_PO_REGNUM);
+
+  if (builtin_type == MIPS_BUILTIN_BPOSGE32)
+    cmp_value = 32;
+  else
+    gcc_assert (0);
+
+  condition = gen_rtx_GE (VOIDmode, cmp_result, GEN_INT (cmp_value));
+  return mips_builtin_branch_and_move (condition, target,
+				       const1_rtx, const0_rtx);
+}
+
+/* Implement TARGET_EXPAND_BUILTIN.  */
+
+static rtx
+mips_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode, int ignore)
+{
+  tree fndecl;
+  unsigned int fcode, avail;
+  const struct mips_builtin_description *d;
+
+  fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  fcode = DECL_FUNCTION_CODE (fndecl);
+  gcc_assert (fcode < ARRAY_SIZE (mips_builtins));
+  d = &mips_builtins[fcode];
+  avail = d->avail ();
+  gcc_assert (avail != 0);
+  if (TARGET_MIPS16)
+    {
+      error ("built-in function %qE not supported for MIPS16",
+	     DECL_NAME (fndecl));
+      return ignore ? const0_rtx : CONST0_RTX (mode);
+    }
+  switch (d->builtin_type)
+    {
+    case MIPS_BUILTIN_DIRECT:
+      return mips_expand_builtin_direct (d->icode, target, exp, true);
+
+    case MIPS_BUILTIN_DIRECT_NO_TARGET:
+      return mips_expand_builtin_direct (d->icode, target, exp, false);
+
+    case MIPS_BUILTIN_MOVT:
+    case MIPS_BUILTIN_MOVF:
+      return mips_expand_builtin_movtf (d->builtin_type, d->icode,
+					d->cond, target, exp);
+
+    case MIPS_BUILTIN_CMP_ANY:
+    case MIPS_BUILTIN_CMP_ALL:
+    case MIPS_BUILTIN_CMP_UPPER:
+    case MIPS_BUILTIN_CMP_LOWER:
+    case MIPS_BUILTIN_CMP_SINGLE:
+      return mips_expand_builtin_compare (d->builtin_type, d->icode,
+					  d->cond, target, exp);
+
+    case MIPS_BUILTIN_BPOSGE32:
+      return mips_expand_builtin_bposge (d->builtin_type, target);
+    }
+  gcc_unreachable ();
+}
+
+/* An entry in the MIPS16 constant pool.  VALUE is the pool constant,
+   MODE is its mode, and LABEL is the CODE_LABEL associated with it.  */
+struct mips16_constant {
+  struct mips16_constant *next;
+  rtx value;
+  rtx label;
+  enum machine_mode mode;
+};
+
+/* Information about an incomplete MIPS16 constant pool.  FIRST is the
+   first constant, HIGHEST_ADDRESS is the highest address that the first
+   byte of the pool can have, and INSN_ADDRESS is the current instruction
+   address.  */
+struct mips16_constant_pool {
+  struct mips16_constant *first;
+  int highest_address;
+  int insn_address;
+};
+
+/* Add constant VALUE to POOL and return its label.  MODE is the
+   value's mode (used for CONST_INTs, etc.).  */
+
+static rtx
+mips16_add_constant (struct mips16_constant_pool *pool,
+		     rtx value, enum machine_mode mode)
+{
+  struct mips16_constant **p, *c;
+  bool first_of_size_p;
+
+  /* See whether the constant is already in the pool.  If so, return the
+     existing label, otherwise leave P pointing to the place where the
+     constant should be added.
+
+     Keep the pool sorted in increasing order of mode size so that we can
+     reduce the number of alignments needed.  */
+  first_of_size_p = true;
+  for (p = &pool->first; *p != 0; p = &(*p)->next)
+    {
+      if (mode == (*p)->mode && rtx_equal_p (value, (*p)->value))
+	return (*p)->label;
+      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE ((*p)->mode))
+	break;
+      if (GET_MODE_SIZE (mode) == GET_MODE_SIZE ((*p)->mode))
+	first_of_size_p = false;
+    }
+
+  /* In the worst case, the constant needed by the earliest instruction
+     will end up at the end of the pool.  The entire pool must then be
+     accessible from that instruction.
+
+     When adding the first constant, set the pool's highest address to
+     the address of the first out-of-range byte.  Adjust this address
+     downwards each time a new constant is added.  */
+  if (pool->first == 0)
+    /* For LWPC, ADDIUPC and DADDIUPC, the base PC value is the address
+       of the instruction with the lowest two bits clear.  The base PC
+       value for LDPC has the lowest three bits clear.  Assume the worst
+       case here; namely that the PC-relative instruction occupies the
+       last 2 bytes in an aligned word.  */
+    pool->highest_address = pool->insn_address - (UNITS_PER_WORD - 2) + 0x8000;
+  pool->highest_address -= GET_MODE_SIZE (mode);
+  if (first_of_size_p)
+    /* Take into account the worst possible padding due to alignment.  */
+    pool->highest_address -= GET_MODE_SIZE (mode) - 1;
+
+  /* Create a new entry.  */
+  c = XNEW (struct mips16_constant);
+  c->value = value;
+  c->mode = mode;
+  c->label = gen_label_rtx ();
+  c->next = *p;
+  *p = c;
+
+  return c->label;
+}
+
+/* Output constant VALUE after instruction INSN and return the last
+   instruction emitted.  MODE is the mode of the constant.  */
+
+static rtx
+mips16_emit_constants_1 (enum machine_mode mode, rtx value, rtx insn)
+{
+  if (SCALAR_INT_MODE_P (mode) || ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+    {
+      rtx size = GEN_INT (GET_MODE_SIZE (mode));
+      return emit_insn_after (gen_consttable_int (value, size), insn);
+    }
+
+  if (SCALAR_FLOAT_MODE_P (mode))
+    return emit_insn_after (gen_consttable_float (value), insn);
+
+  if (VECTOR_MODE_P (mode))
+    {
+      int i;
+
+      for (i = 0; i < CONST_VECTOR_NUNITS (value); i++)
+	insn = mips16_emit_constants_1 (GET_MODE_INNER (mode),
+					CONST_VECTOR_ELT (value, i), insn);
+      return insn;
+    }
+
+  gcc_unreachable ();
+}
+
+/* Dump out the constants in CONSTANTS after INSN.  */
+
+static void
+mips16_emit_constants (struct mips16_constant *constants, rtx insn)
+{
+  struct mips16_constant *c, *next;
+  int align;
+
+  align = 0;
+  for (c = constants; c != NULL; c = next)
+    {
+      /* If necessary, increase the alignment of PC.  */
+      if (align < GET_MODE_SIZE (c->mode))
+	{
+	  int align_log = floor_log2 (GET_MODE_SIZE (c->mode));
+	  insn = emit_insn_after (gen_align (GEN_INT (align_log)), insn);
+	}
+      align = GET_MODE_SIZE (c->mode);
+
+      insn = emit_label_after (c->label, insn);
+      insn = mips16_emit_constants_1 (c->mode, c->value, insn);
+
+      next = c->next;
+      free (c);
+    }
+
+  emit_barrier_after (insn);
+}
+
+/* Return the length of instruction INSN.  */
+
+static int
+mips16_insn_length (rtx insn)
+{
+  if (JUMP_P (insn))
+    {
+      rtx body = PATTERN (insn);
+      if (GET_CODE (body) == ADDR_VEC)
+	return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, 0);
+      if (GET_CODE (body) == ADDR_DIFF_VEC)
+	return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, 1);
+    }
+  return get_attr_length (insn);
+}
+
+/* If *X is a symbolic constant that refers to the constant pool, add
+   the constant to POOL and rewrite *X to use the constant's label.  */
+
+static void
+mips16_rewrite_pool_constant (struct mips16_constant_pool *pool, rtx *x)
+{
+  rtx base, offset, label;
+
+  split_const (*x, &base, &offset);
+  if (GET_CODE (base) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (base))
+    {
+      label = mips16_add_constant (pool, get_pool_constant (base),
+				   get_pool_mode (base));
+      base = gen_rtx_LABEL_REF (Pmode, label);
+      *x = mips_unspec_address_offset (base, offset, SYMBOL_PC_RELATIVE);
+    }
+}
+
+/* This structure is used to communicate with mips16_rewrite_pool_refs.
+   INSN is the instruction we're rewriting and POOL points to the current
+   constant pool.  */
+struct mips16_rewrite_pool_refs_info {
+  rtx insn;
+  struct mips16_constant_pool *pool;
+};
+
+/* Rewrite *X so that constant pool references refer to the constant's
+   label instead.  DATA points to a mips16_rewrite_pool_refs_info
+   structure.  */
+
+static int
+mips16_rewrite_pool_refs (rtx *x, void *data)
+{
+  struct mips16_rewrite_pool_refs_info *info =
+    (struct mips16_rewrite_pool_refs_info *) data;
+
+  if (force_to_mem_operand (*x, Pmode))
+    {
+      rtx mem = force_const_mem (GET_MODE (*x), *x);
+      validate_change (info->insn, x, mem, false);
+    }
+
+  if (MEM_P (*x))
+    {
+      mips16_rewrite_pool_constant (info->pool, &XEXP (*x, 0));
+      return -1;
+    }
+
+  if (TARGET_MIPS16_TEXT_LOADS)
+    mips16_rewrite_pool_constant (info->pool, x);
+
+  return GET_CODE (*x) == CONST ? -1 : 0;
+}
+
+/* Return whether CFG is used in mips_reorg.  */
+
+static bool
+mips_cfg_in_reorg (void)
+{
+  return (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE
+	  || TARGET_RELAX_PIC_CALLS);
+}
+
+/* Build MIPS16 constant pools.  */
+
+static void
+mips16_lay_out_constants (void)
+{
+  struct mips16_constant_pool pool;
+  struct mips16_rewrite_pool_refs_info info;
+  rtx insn, barrier;
+
+  if (!TARGET_MIPS16_PCREL_LOADS)
+    return;
+
+  if (mips_cfg_in_reorg ())
+    split_all_insns ();
+  else
+    split_all_insns_noflow ();
+  barrier = 0;
+  memset (&pool, 0, sizeof (pool));
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      /* Rewrite constant pool references in INSN.  */
+      if (USEFUL_INSN_P (insn))
+	{
+	  info.insn = insn;
+	  info.pool = &pool;
+	  for_each_rtx (&PATTERN (insn), mips16_rewrite_pool_refs, &info);
+	}
+
+      pool.insn_address += mips16_insn_length (insn);
+
+      if (pool.first != NULL)
+	{
+	  /* If there are no natural barriers between the first user of
+	     the pool and the highest acceptable address, we'll need to
+	     create a new instruction to jump around the constant pool.
+	     In the worst case, this instruction will be 4 bytes long.
+
+	     If it's too late to do this transformation after INSN,
+	     do it immediately before INSN.  */
+	  if (barrier == 0 && pool.insn_address + 4 > pool.highest_address)
+	    {
+	      rtx label, jump;
+
+	      label = gen_label_rtx ();
+
+	      jump = emit_jump_insn_before (gen_jump (label), insn);
+	      JUMP_LABEL (jump) = label;
+	      LABEL_NUSES (label) = 1;
+	      barrier = emit_barrier_after (jump);
+
+	      emit_label_after (label, barrier);
+	      pool.insn_address += 4;
+	    }
+
+	  /* See whether the constant pool is now out of range of the first
+	     user.  If so, output the constants after the previous barrier.
+	     Note that any instructions between BARRIER and INSN (inclusive)
+	     will use negative offsets to refer to the pool.  */
+	  if (pool.insn_address > pool.highest_address)
+	    {
+	      mips16_emit_constants (pool.first, barrier);
+	      pool.first = NULL;
+	      barrier = 0;
+	    }
+	  else if (BARRIER_P (insn))
+	    barrier = insn;
+	}
+    }
+  mips16_emit_constants (pool.first, get_last_insn ());
+}
+
+/* Return true if it is worth r10k_simplify_address's while replacing
+   an address with X.  We are looking for constants, and for addresses
+   at a known offset from the incoming stack pointer.  */
+
+static bool
+r10k_simplified_address_p (rtx x)
+{
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    x = XEXP (x, 0);
+  return x == virtual_incoming_args_rtx || CONSTANT_P (x);
+}
+
+/* X is an expression that appears in INSN.  Try to use the UD chains
+   to simplify it, returning the simplified form on success and the
+   original form otherwise.  Replace the incoming value of $sp with
+   virtual_incoming_args_rtx (which should never occur in X otherwise).  */
+
+static rtx
+r10k_simplify_address (rtx x, rtx insn)
+{
+  rtx newx, op0, op1, set, def_insn, note;
+  df_ref use, def;
+  struct df_link *defs;
+
+  newx = NULL_RTX;
+  if (UNARY_P (x))
+    {
+      op0 = r10k_simplify_address (XEXP (x, 0), insn);
+      if (op0 != XEXP (x, 0))
+	newx = simplify_gen_unary (GET_CODE (x), GET_MODE (x),
+				   op0, GET_MODE (XEXP (x, 0)));
+    }
+  else if (BINARY_P (x))
+    {
+      op0 = r10k_simplify_address (XEXP (x, 0), insn);
+      op1 = r10k_simplify_address (XEXP (x, 1), insn);
+      if (op0 != XEXP (x, 0) || op1 != XEXP (x, 1))
+	newx = simplify_gen_binary (GET_CODE (x), GET_MODE (x), op0, op1);
+    }
+  else if (GET_CODE (x) == LO_SUM)
+    {
+      /* LO_SUMs can be offset from HIGHs, if we know they won't
+	 overflow.  See mips_classify_address for the rationale behind
+	 the lax check.  */
+      op0 = r10k_simplify_address (XEXP (x, 0), insn);
+      if (GET_CODE (op0) == HIGH)
+	newx = XEXP (x, 1);
+    }
+  else if (REG_P (x))
+    {
+      /* Uses are recorded by regno_reg_rtx, not X itself.  */
+      use = df_find_use (insn, regno_reg_rtx[REGNO (x)]);
+      gcc_assert (use);
+      defs = DF_REF_CHAIN (use);
+
+      /* Require a single definition.  */
+      if (defs && defs->next == NULL)
+	{
+	  def = defs->ref;
+	  if (DF_REF_IS_ARTIFICIAL (def))
+	    {
+	      /* Replace the incoming value of $sp with
+		 virtual_incoming_args_rtx.  */
+	      if (x == stack_pointer_rtx
+		  && DF_REF_BB (def) == ENTRY_BLOCK_PTR)
+		newx = virtual_incoming_args_rtx;
+	    }
+	  else if (dominated_by_p (CDI_DOMINATORS, DF_REF_BB (use),
+				   DF_REF_BB (def)))
+	    {
+	      /* Make sure that DEF_INSN is a single set of REG.  */
+	      def_insn = DF_REF_INSN (def);
+	      if (NONJUMP_INSN_P (def_insn))
+		{
+		  set = single_set (def_insn);
+		  if (set && rtx_equal_p (SET_DEST (set), x))
+		    {
+		      /* Prefer to use notes, since the def-use chains
+			 are often shorter.  */
+		      note = find_reg_equal_equiv_note (def_insn);
+		      if (note)
+			newx = XEXP (note, 0);
+		      else
+			newx = SET_SRC (set);
+		      newx = r10k_simplify_address (newx, def_insn);
+		    }
+		}
+	    }
+	}
+    }
+  if (newx && r10k_simplified_address_p (newx))
+    return newx;
+  return x;
+}
+
+/* Return true if ADDRESS is known to be an uncached address
+   on R10K systems.  */
+
+static bool
+r10k_uncached_address_p (unsigned HOST_WIDE_INT address)
+{
+  unsigned HOST_WIDE_INT upper;
+
+  /* Check for KSEG1.  */
+  if (address + 0x60000000 < 0x20000000)
+    return true;
+
+  /* Check for uncached XKPHYS addresses.  */
+  if (Pmode == DImode)
+    {
+      upper = (address >> 40) & 0xf9ffff;
+      if (upper == 0x900000 || upper == 0xb80000)
+	return true;
+    }
+  return false;
+}
+
+/* Return true if we can prove that an access to address X in instruction
+   INSN would be safe from R10K speculation.  This X is a general
+   expression; it might not be a legitimate address.  */
+
+static bool
+r10k_safe_address_p (rtx x, rtx insn)
+{
+  rtx base, offset;
+  HOST_WIDE_INT offset_val;
+
+  x = r10k_simplify_address (x, insn);
+
+  /* Check for references to the stack frame.  It doesn't really matter
+     how much of the frame has been allocated at INSN; -mr10k-cache-barrier
+     allows us to assume that accesses to any part of the eventual frame
+     is safe from speculation at any point in the function.  */
+  mips_split_plus (x, &base, &offset_val);
+  if (base == virtual_incoming_args_rtx
+      && offset_val >= -cfun->machine->frame.total_size
+      && offset_val < cfun->machine->frame.args_size)
+    return true;
+
+  /* Check for uncached addresses.  */
+  if (CONST_INT_P (x))
+    return r10k_uncached_address_p (INTVAL (x));
+
+  /* Check for accesses to a static object.  */
+  split_const (x, &base, &offset);
+  return offset_within_block_p (base, INTVAL (offset));
+}
+
+/* Return true if a MEM with MEM_EXPR EXPR and MEM_OFFSET OFFSET is
+   an in-range access to an automatic variable, or to an object with
+   a link-time-constant address.  */
+
+static bool
+r10k_safe_mem_expr_p (tree expr, rtx offset)
+{
+  if (expr == NULL_TREE
+      || offset == NULL_RTX
+      || !CONST_INT_P (offset)
+      || INTVAL (offset) < 0
+      || INTVAL (offset) >= int_size_in_bytes (TREE_TYPE (expr)))
+    return false;
+
+  while (TREE_CODE (expr) == COMPONENT_REF)
+    {
+      expr = TREE_OPERAND (expr, 0);
+      if (expr == NULL_TREE)
+	return false;
+    }
+
+  return DECL_P (expr);
+}
+
+/* A for_each_rtx callback for which DATA points to the instruction
+   containing *X.  Stop the search if we find a MEM that is not safe
+   from R10K speculation.  */
+
+static int
+r10k_needs_protection_p_1 (rtx *loc, void *data)
+{
+  rtx mem;
+
+  mem = *loc;
+  if (!MEM_P (mem))
+    return 0;
+
+  if (r10k_safe_mem_expr_p (MEM_EXPR (mem), MEM_OFFSET (mem)))
+    return -1;
+
+  if (r10k_safe_address_p (XEXP (mem, 0), (rtx) data))
+    return -1;
+
+  return 1;
+}
+
+/* A note_stores callback for which DATA points to an instruction pointer.
+   If *DATA is nonnull, make it null if it X contains a MEM that is not
+   safe from R10K speculation.  */
+
+static void
+r10k_needs_protection_p_store (rtx x, const_rtx pat ATTRIBUTE_UNUSED,
+			       void *data)
+{
+  rtx *insn_ptr;
+
+  insn_ptr = (rtx *) data;
+  if (*insn_ptr && for_each_rtx (&x, r10k_needs_protection_p_1, *insn_ptr))
+    *insn_ptr = NULL_RTX;
+}
+
+/* A for_each_rtx callback that iterates over the pattern of a CALL_INSN.
+   Return nonzero if the call is not to a declared function.  */
+
+static int
+r10k_needs_protection_p_call (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x;
+
+  x = *loc;
+  if (!MEM_P (x))
+    return 0;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DECL (x))
+    return -1;
+
+  return 1;
+}
+
+/* Return true if instruction INSN needs to be protected by an R10K
+   cache barrier.  */
+
+static bool
+r10k_needs_protection_p (rtx insn)
+{
+  if (CALL_P (insn))
+    return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_call, NULL);
+
+  if (mips_r10k_cache_barrier == R10K_CACHE_BARRIER_STORE)
+    {
+      note_stores (PATTERN (insn), r10k_needs_protection_p_store, &insn);
+      return insn == NULL_RTX;
+    }
+
+  return for_each_rtx (&PATTERN (insn), r10k_needs_protection_p_1, insn);
+}
+
+/* Return true if BB is only reached by blocks in PROTECTED_BBS and if every
+   edge is unconditional.  */
+
+static bool
+r10k_protected_bb_p (basic_block bb, sbitmap protected_bbs)
+{
+  edge_iterator ei;
+  edge e;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    if (!single_succ_p (e->src)
+	|| !TEST_BIT (protected_bbs, e->src->index)
+	|| (e->flags & EDGE_COMPLEX) != 0)
+      return false;
+  return true;
+}
+
+/* Implement -mr10k-cache-barrier= for the current function.  */
+
+static void
+r10k_insert_cache_barriers (void)
+{
+  int *rev_post_order;
+  unsigned int i, n;
+  basic_block bb;
+  sbitmap protected_bbs;
+  rtx insn, end, unprotected_region;
+
+  if (TARGET_MIPS16)
+    {
+      sorry ("%qs does not support MIPS16 code", "-mr10k-cache-barrier");
+      return;
+    }
+
+  /* Calculate dominators.  */
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  /* Bit X of PROTECTED_BBS is set if the last operation in basic block
+     X is protected by a cache barrier.  */
+  protected_bbs = sbitmap_alloc (last_basic_block);
+  sbitmap_zero (protected_bbs);
+
+  /* Iterate over the basic blocks in reverse post-order.  */
+  rev_post_order = XNEWVEC (int, last_basic_block);
+  n = pre_and_rev_post_order_compute (NULL, rev_post_order, false);
+  for (i = 0; i < n; i++)
+    {
+      bb = BASIC_BLOCK (rev_post_order[i]);
+
+      /* If this block is only reached by unconditional edges, and if the
+	 source of every edge is protected, the beginning of the block is
+	 also protected.  */
+      if (r10k_protected_bb_p (bb, protected_bbs))
+	unprotected_region = NULL_RTX;
+      else
+	unprotected_region = pc_rtx;
+      end = NEXT_INSN (BB_END (bb));
+
+      /* UNPROTECTED_REGION is:
+
+	 - null if we are processing a protected region,
+	 - pc_rtx if we are processing an unprotected region but have
+	   not yet found the first instruction in it
+	 - the first instruction in an unprotected region otherwise.  */
+      for (insn = BB_HEAD (bb); insn != end; insn = NEXT_INSN (insn))
+	{
+	  if (unprotected_region && USEFUL_INSN_P (insn))
+	    {
+	      if (recog_memoized (insn) == CODE_FOR_mips_cache)
+		/* This CACHE instruction protects the following code.  */
+		unprotected_region = NULL_RTX;
+	      else
+		{
+		  /* See if INSN is the first instruction in this
+		     unprotected region.  */
+		  if (unprotected_region == pc_rtx)
+		    unprotected_region = insn;
+
+		  /* See if INSN needs to be protected.  If so,
+		     we must insert a cache barrier somewhere between
+		     PREV_INSN (UNPROTECTED_REGION) and INSN.  It isn't
+		     clear which position is better performance-wise,
+		     but as a tie-breaker, we assume that it is better
+		     to allow delay slots to be back-filled where
+		     possible, and that it is better not to insert
+		     barriers in the middle of already-scheduled code.
+		     We therefore insert the barrier at the beginning
+		     of the region.  */
+		  if (r10k_needs_protection_p (insn))
+		    {
+		      emit_insn_before (gen_r10k_cache_barrier (),
+					unprotected_region);
+		      unprotected_region = NULL_RTX;
+		    }
+		}
+	    }
+
+	  if (CALL_P (insn))
+	    /* The called function is not required to protect the exit path.
+	       The code that follows a call is therefore unprotected.  */
+	    unprotected_region = pc_rtx;
+	}
+
+      /* Record whether the end of this block is protected.  */
+      if (unprotected_region == NULL_RTX)
+	SET_BIT (protected_bbs, bb->index);
+    }
+  XDELETEVEC (rev_post_order);
+
+  sbitmap_free (protected_bbs);
+
+  free_dominance_info (CDI_DOMINATORS);
+}
+
+/* If INSN is a call, return the underlying CALL expr.  Return NULL_RTX
+   otherwise.  If INSN has two call rtx, then store the second one in
+   SECOND_CALL.  */
+
+static rtx
+mips_call_expr_from_insn (rtx insn, rtx *second_call)
+{
+  rtx x;
+  rtx x2;
+
+  if (!CALL_P (insn))
+    return NULL_RTX;
+
+  x = PATTERN (insn);
+  if (GET_CODE (x) == PARALLEL)
+    {
+      /* Calls returning complex values have two CALL rtx.  Look for the second
+	 one here, and return it via the SECOND_CALL arg.  */
+      x2 = XVECEXP (x, 0, 1);
+      if (GET_CODE (x2) == SET)
+	x2 = XEXP (x2, 1);
+      if (GET_CODE (x2) == CALL)
+	*second_call = x2;
+
+      x = XVECEXP (x, 0, 0);
+    }
+  if (GET_CODE (x) == SET)
+    x = XEXP (x, 1);
+  gcc_assert (GET_CODE (x) == CALL);
+
+  return x;
+}
+
+/* REG is set in DEF.  See if the definition is one of the ways we load a
+   register with a symbol address for a mips_use_pic_fn_addr_reg_p call.
+   If it is, return the symbol reference of the function, otherwise return
+   NULL_RTX.
+
+   If RECURSE_P is true, use mips_find_pic_call_symbol to interpret
+   the values of source registers, otherwise treat such registers as
+   having an unknown value.  */
+
+static rtx
+mips_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p)
+{
+  rtx def_insn, set;
+
+  if (DF_REF_IS_ARTIFICIAL (def))
+    return NULL_RTX;
+
+  def_insn = DF_REF_INSN (def);
+  set = single_set (def_insn);
+  if (set && rtx_equal_p (SET_DEST (set), reg))
+    {
+      rtx note, src, symbol;
+
+      /* First, look at REG_EQUAL/EQUIV notes.  */
+      note = find_reg_equal_equiv_note (def_insn);
+      if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF)
+	return XEXP (note, 0);
+
+      /* For %call16 references we don't have REG_EQUAL.  */
+      src = SET_SRC (set);
+      symbol = mips_strip_unspec_call (src);
+      if (symbol)
+	{
+	  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+	  return symbol;
+	}
+
+      /* Follow at most one simple register copy.  Such copies are
+	 interesting in cases like:
+
+	     for (...)
+	       {
+	         locally_binding_fn (...);
+	       }
+
+	 and:
+
+	     locally_binding_fn (...);
+	     ...
+	     locally_binding_fn (...);
+
+	 where the load of locally_binding_fn can legitimately be
+	 hoisted or shared.  However, we do not expect to see complex
+	 chains of copies, so a full worklist solution to the problem
+	 would probably be overkill.  */
+      if (recurse_p && REG_P (src))
+	return mips_find_pic_call_symbol (def_insn, src, false);
+    }
+
+  return NULL_RTX;
+}
+
+/* Find the definition of the use of REG in INSN.  See if the definition
+   is one of the ways we load a register with a symbol address for a
+   mips_use_pic_fn_addr_reg_p call.  If it is return the symbol reference
+   of the function, otherwise return NULL_RTX.  RECURSE_P is as for
+   mips_pic_call_symbol_from_set.  */
+
+static rtx
+mips_find_pic_call_symbol (rtx insn, rtx reg, bool recurse_p)
+{
+  df_ref use;
+  struct df_link *defs;
+  rtx symbol;
+
+  use = df_find_use (insn, regno_reg_rtx[REGNO (reg)]);
+  if (!use)
+    return NULL_RTX;
+  defs = DF_REF_CHAIN (use);
+  if (!defs)
+    return NULL_RTX;
+  symbol = mips_pic_call_symbol_from_set (defs->ref, reg, recurse_p);
+  if (!symbol)
+    return NULL_RTX;
+
+  /* If we have more than one definition, they need to be identical.  */
+  for (defs = defs->next; defs; defs = defs->next)
+    {
+      rtx other;
+
+      other = mips_pic_call_symbol_from_set (defs->ref, reg, recurse_p);
+      if (!rtx_equal_p (symbol, other))
+	return NULL_RTX;
+    }
+
+  return symbol;
+}
+
+/* Replace the args_size operand of the call expression CALL with the
+   call-attribute UNSPEC and fill in SYMBOL as the function symbol.  */
+
+static void
+mips_annotate_pic_call_expr (rtx call, rtx symbol)
+{
+  rtx args_size;
+
+  args_size = XEXP (call, 1);
+  XEXP (call, 1) = gen_rtx_UNSPEC (GET_MODE (args_size),
+				   gen_rtvec (2, args_size, symbol),
+				   UNSPEC_CALL_ATTR);
+}
+
+/* OPERANDS[ARGS_SIZE_OPNO] is the arg_size operand of a CALL expression.  See
+   if instead of the arg_size argument it contains the call attributes.  If
+   yes return true along with setting OPERANDS[ARGS_SIZE_OPNO] to the function
+   symbol from the call attributes.  Also return false if ARGS_SIZE_OPNO is
+   -1.  */
+
+bool
+mips_get_pic_call_symbol (rtx *operands, int args_size_opno)
+{
+  rtx args_size, symbol;
+
+  if (!TARGET_RELAX_PIC_CALLS || args_size_opno == -1)
+    return false;
+
+  args_size = operands[args_size_opno];
+  if (GET_CODE (args_size) != UNSPEC)
+    return false;
+  gcc_assert (XINT (args_size, 1) == UNSPEC_CALL_ATTR);
+
+  symbol = XVECEXP (args_size, 0, 1);
+  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+  operands[args_size_opno] = symbol;
+  return true;
+}
+
+/* Use DF to annotate PIC indirect calls with the function symbol they
+   dispatch to.  */
+
+static void
+mips_annotate_pic_calls (void)
+{
+  basic_block bb;
+  rtx insn;
+
+  FOR_EACH_BB (bb)
+    FOR_BB_INSNS (bb, insn)
+    {
+      rtx call, reg, symbol, second_call;
+
+      second_call = 0;
+      call = mips_call_expr_from_insn (insn, &second_call);
+      if (!call)
+	continue;
+      gcc_assert (MEM_P (XEXP (call, 0)));
+      reg = XEXP (XEXP (call, 0), 0);
+      if (!REG_P (reg))
+	continue;
+
+      symbol = mips_find_pic_call_symbol (insn, reg, true);
+      if (symbol)
+	{
+	  mips_annotate_pic_call_expr (call, symbol);
+	  if (second_call)
+	    mips_annotate_pic_call_expr (second_call, symbol);
+	}
+    }
+}
+
+/* A temporary variable used by for_each_rtx callbacks, etc.  */
+static rtx mips_sim_insn;
+
+/* A structure representing the state of the processor pipeline.
+   Used by the mips_sim_* family of functions.  */
+struct mips_sim {
+  /* The maximum number of instructions that can be issued in a cycle.
+     (Caches mips_issue_rate.)  */
+  unsigned int issue_rate;
+
+  /* The current simulation time.  */
+  unsigned int time;
+
+  /* How many more instructions can be issued in the current cycle.  */
+  unsigned int insns_left;
+
+  /* LAST_SET[X].INSN is the last instruction to set register X.
+     LAST_SET[X].TIME is the time at which that instruction was issued.
+     INSN is null if no instruction has yet set register X.  */
+  struct {
+    rtx insn;
+    unsigned int time;
+  } last_set[FIRST_PSEUDO_REGISTER];
+
+  /* The pipeline's current DFA state.  */
+  state_t dfa_state;
+};
+
+/* Reset STATE to the initial simulation state.  */
+
+static void
+mips_sim_reset (struct mips_sim *state)
+{
+  state->time = 0;
+  state->insns_left = state->issue_rate;
+  memset (&state->last_set, 0, sizeof (state->last_set));
+  state_reset (state->dfa_state);
+}
+
+/* Initialize STATE before its first use.  DFA_STATE points to an
+   allocated but uninitialized DFA state.  */
+
+static void
+mips_sim_init (struct mips_sim *state, state_t dfa_state)
+{
+  state->issue_rate = mips_issue_rate ();
+  state->dfa_state = dfa_state;
+  mips_sim_reset (state);
+}
+
+/* Advance STATE by one clock cycle.  */
+
+static void
+mips_sim_next_cycle (struct mips_sim *state)
+{
+  state->time++;
+  state->insns_left = state->issue_rate;
+  state_transition (state->dfa_state, 0);
+}
+
+/* Advance simulation state STATE until instruction INSN can read
+   register REG.  */
+
+static void
+mips_sim_wait_reg (struct mips_sim *state, rtx insn, rtx reg)
+{
+  unsigned int regno, end_regno;
+
+  end_regno = END_REGNO (reg);
+  for (regno = REGNO (reg); regno < end_regno; regno++)
+    if (state->last_set[regno].insn != 0)
+      {
+	unsigned int t;
+
+	t = (state->last_set[regno].time
+	     + insn_latency (state->last_set[regno].insn, insn));
+	while (state->time < t)
+	  mips_sim_next_cycle (state);
+    }
+}
+
+/* A for_each_rtx callback.  If *X is a register, advance simulation state
+   DATA until mips_sim_insn can read the register's value.  */
+
+static int
+mips_sim_wait_regs_2 (rtx *x, void *data)
+{
+  if (REG_P (*x))
+    mips_sim_wait_reg ((struct mips_sim *) data, mips_sim_insn, *x);
+  return 0;
+}
+
+/* Call mips_sim_wait_regs_2 (R, DATA) for each register R mentioned in *X.  */
+
+static void
+mips_sim_wait_regs_1 (rtx *x, void *data)
+{
+  for_each_rtx (x, mips_sim_wait_regs_2, data);
+}
+
+/* Advance simulation state STATE until all of INSN's register
+   dependencies are satisfied.  */
+
+static void
+mips_sim_wait_regs (struct mips_sim *state, rtx insn)
+{
+  mips_sim_insn = insn;
+  note_uses (&PATTERN (insn), mips_sim_wait_regs_1, state);
+}
+
+/* Advance simulation state STATE until the units required by
+   instruction INSN are available.  */
+
+static void
+mips_sim_wait_units (struct mips_sim *state, rtx insn)
+{
+  state_t tmp_state;
+
+  tmp_state = alloca (state_size ());
+  while (state->insns_left == 0
+	 || (memcpy (tmp_state, state->dfa_state, state_size ()),
+	     state_transition (tmp_state, insn) >= 0))
+    mips_sim_next_cycle (state);
+}
+
+/* Advance simulation state STATE until INSN is ready to issue.  */
+
+static void
+mips_sim_wait_insn (struct mips_sim *state, rtx insn)
+{
+  mips_sim_wait_regs (state, insn);
+  mips_sim_wait_units (state, insn);
+}
+
+/* mips_sim_insn has just set X.  Update the LAST_SET array
+   in simulation state DATA.  */
+
+static void
+mips_sim_record_set (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  struct mips_sim *state;
+
+  state = (struct mips_sim *) data;
+  if (REG_P (x))
+    {
+      unsigned int regno, end_regno;
+
+      end_regno = END_REGNO (x);
+      for (regno = REGNO (x); regno < end_regno; regno++)
+	{
+	  state->last_set[regno].insn = mips_sim_insn;
+	  state->last_set[regno].time = state->time;
+	}
+    }
+}
+
+/* Issue instruction INSN in scheduler state STATE.  Assume that INSN
+   can issue immediately (i.e., that mips_sim_wait_insn has already
+   been called).  */
+
+static void
+mips_sim_issue_insn (struct mips_sim *state, rtx insn)
+{
+  state_transition (state->dfa_state, insn);
+  state->insns_left--;
+
+  mips_sim_insn = insn;
+  note_stores (PATTERN (insn), mips_sim_record_set, state);
+}
+
+/* Simulate issuing a NOP in state STATE.  */
+
+static void
+mips_sim_issue_nop (struct mips_sim *state)
+{
+  if (state->insns_left == 0)
+    mips_sim_next_cycle (state);
+  state->insns_left--;
+}
+
+/* Update simulation state STATE so that it's ready to accept the instruction
+   after INSN.  INSN should be part of the main rtl chain, not a member of a
+   SEQUENCE.  */
+
+static void
+mips_sim_finish_insn (struct mips_sim *state, rtx insn)
+{
+  /* If INSN is a jump with an implicit delay slot, simulate a nop.  */
+  if (JUMP_P (insn))
+    mips_sim_issue_nop (state);
+
+  switch (GET_CODE (SEQ_BEGIN (insn)))
+    {
+    case CODE_LABEL:
+    case CALL_INSN:
+      /* We can't predict the processor state after a call or label.  */
+      mips_sim_reset (state);
+      break;
+
+    case JUMP_INSN:
+      /* The delay slots of branch likely instructions are only executed
+	 when the branch is taken.  Therefore, if the caller has simulated
+	 the delay slot instruction, STATE does not really reflect the state
+	 of the pipeline for the instruction after the delay slot.  Also,
+	 branch likely instructions tend to incur a penalty when not taken,
+	 so there will probably be an extra delay between the branch and
+	 the instruction after the delay slot.  */
+      if (INSN_ANNULLED_BRANCH_P (SEQ_BEGIN (insn)))
+	mips_sim_reset (state);
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* The VR4130 pipeline issues aligned pairs of instructions together,
+   but it stalls the second instruction if it depends on the first.
+   In order to cut down the amount of logic required, this dependence
+   check is not based on a full instruction decode.  Instead, any non-SPECIAL
+   instruction is assumed to modify the register specified by bits 20-16
+   (which is usually the "rt" field).
+
+   In BEQ, BEQL, BNE and BNEL instructions, the rt field is actually an
+   input, so we can end up with a false dependence between the branch
+   and its delay slot.  If this situation occurs in instruction INSN,
+   try to avoid it by swapping rs and rt.  */
+
+static void
+vr4130_avoid_branch_rt_conflict (rtx insn)
+{
+  rtx first, second;
+
+  first = SEQ_BEGIN (insn);
+  second = SEQ_END (insn);
+  if (JUMP_P (first)
+      && NONJUMP_INSN_P (second)
+      && GET_CODE (PATTERN (first)) == SET
+      && GET_CODE (SET_DEST (PATTERN (first))) == PC
+      && GET_CODE (SET_SRC (PATTERN (first))) == IF_THEN_ELSE)
+    {
+      /* Check for the right kind of condition.  */
+      rtx cond = XEXP (SET_SRC (PATTERN (first)), 0);
+      if ((GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
+	  && REG_P (XEXP (cond, 0))
+	  && REG_P (XEXP (cond, 1))
+	  && reg_referenced_p (XEXP (cond, 1), PATTERN (second))
+	  && !reg_referenced_p (XEXP (cond, 0), PATTERN (second)))
+	{
+	  /* SECOND mentions the rt register but not the rs register.  */
+	  rtx tmp = XEXP (cond, 0);
+	  XEXP (cond, 0) = XEXP (cond, 1);
+	  XEXP (cond, 1) = tmp;
+	}
+    }
+}
+
+/* Implement -mvr4130-align.  Go through each basic block and simulate the
+   processor pipeline.  If we find that a pair of instructions could execute
+   in parallel, and the first of those instructions is not 8-byte aligned,
+   insert a nop to make it aligned.  */
+
+static void
+vr4130_align_insns (void)
+{
+  struct mips_sim state;
+  rtx insn, subinsn, last, last2, next;
+  bool aligned_p;
+
+  dfa_start ();
+
+  /* LAST is the last instruction before INSN to have a nonzero length.
+     LAST2 is the last such instruction before LAST.  */
+  last = 0;
+  last2 = 0;
+
+  /* ALIGNED_P is true if INSN is known to be at an aligned address.  */
+  aligned_p = true;
+
+  mips_sim_init (&state, alloca (state_size ()));
+  for (insn = get_insns (); insn != 0; insn = next)
+    {
+      unsigned int length;
+
+      next = NEXT_INSN (insn);
+
+      /* See the comment above vr4130_avoid_branch_rt_conflict for details.
+	 This isn't really related to the alignment pass, but we do it on
+	 the fly to avoid a separate instruction walk.  */
+      vr4130_avoid_branch_rt_conflict (insn);
+
+      if (USEFUL_INSN_P (insn))
+	FOR_EACH_SUBINSN (subinsn, insn)
+	  {
+	    mips_sim_wait_insn (&state, subinsn);
+
+	    /* If we want this instruction to issue in parallel with the
+	       previous one, make sure that the previous instruction is
+	       aligned.  There are several reasons why this isn't worthwhile
+	       when the second instruction is a call:
+
+	          - Calls are less likely to be performance critical,
+		  - There's a good chance that the delay slot can execute
+		    in parallel with the call.
+	          - The return address would then be unaligned.
+
+	       In general, if we're going to insert a nop between instructions
+	       X and Y, it's better to insert it immediately after X.  That
+	       way, if the nop makes Y aligned, it will also align any labels
+	       between X and Y.  */
+	    if (state.insns_left != state.issue_rate
+		&& !CALL_P (subinsn))
+	      {
+		if (subinsn == SEQ_BEGIN (insn) && aligned_p)
+		  {
+		    /* SUBINSN is the first instruction in INSN and INSN is
+		       aligned.  We want to align the previous instruction
+		       instead, so insert a nop between LAST2 and LAST.
+
+		       Note that LAST could be either a single instruction
+		       or a branch with a delay slot.  In the latter case,
+		       LAST, like INSN, is already aligned, but the delay
+		       slot must have some extra delay that stops it from
+		       issuing at the same time as the branch.  We therefore
+		       insert a nop before the branch in order to align its
+		       delay slot.  */
+		    emit_insn_after (gen_nop (), last2);
+		    aligned_p = false;
+		  }
+		else if (subinsn != SEQ_BEGIN (insn) && !aligned_p)
+		  {
+		    /* SUBINSN is the delay slot of INSN, but INSN is
+		       currently unaligned.  Insert a nop between
+		       LAST and INSN to align it.  */
+		    emit_insn_after (gen_nop (), last);
+		    aligned_p = true;
+		  }
+	      }
+	    mips_sim_issue_insn (&state, subinsn);
+	  }
+      mips_sim_finish_insn (&state, insn);
+
+      /* Update LAST, LAST2 and ALIGNED_P for the next instruction.  */
+      length = get_attr_length (insn);
+      if (length > 0)
+	{
+	  /* If the instruction is an asm statement or multi-instruction
+	     mips.md patern, the length is only an estimate.  Insert an
+	     8 byte alignment after it so that the following instructions
+	     can be handled correctly.  */
+	  if (NONJUMP_INSN_P (SEQ_BEGIN (insn))
+	      && (recog_memoized (insn) < 0 || length >= 8))
+	    {
+	      next = emit_insn_after (gen_align (GEN_INT (3)), insn);
+	      next = NEXT_INSN (next);
+	      mips_sim_next_cycle (&state);
+	      aligned_p = true;
+	    }
+	  else if (length & 4)
+	    aligned_p = !aligned_p;
+	  last2 = last;
+	  last = insn;
+	}
+
+      /* See whether INSN is an aligned label.  */
+      if (LABEL_P (insn) && label_to_alignment (insn) >= 3)
+	aligned_p = true;
+    }
+  dfa_finish ();
+}
+
+/* This structure records that the current function has a LO_SUM
+   involving SYMBOL_REF or LABEL_REF BASE and that MAX_OFFSET is
+   the largest offset applied to BASE by all such LO_SUMs.  */
+struct mips_lo_sum_offset {
+  rtx base;
+  HOST_WIDE_INT offset;
+};
+
+/* Return a hash value for SYMBOL_REF or LABEL_REF BASE.  */
+
+static hashval_t
+mips_hash_base (rtx base)
+{
+  int do_not_record_p;
+
+  return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false);
+}
+
+/* Hash-table callbacks for mips_lo_sum_offsets.  */
+
+static hashval_t
+mips_lo_sum_offset_hash (const void *entry)
+{
+  return mips_hash_base (((const struct mips_lo_sum_offset *) entry)->base);
+}
+
+static int
+mips_lo_sum_offset_eq (const void *entry, const void *value)
+{
+  return rtx_equal_p (((const struct mips_lo_sum_offset *) entry)->base,
+		      (const_rtx) value);
+}
+
+/* Look up symbolic constant X in HTAB, which is a hash table of
+   mips_lo_sum_offsets.  If OPTION is NO_INSERT, return true if X can be
+   paired with a recorded LO_SUM, otherwise record X in the table.  */
+
+static bool
+mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option)
+{
+  rtx base, offset;
+  void **slot;
+  struct mips_lo_sum_offset *entry;
+
+  /* Split X into a base and offset.  */
+  split_const (x, &base, &offset);
+  if (UNSPEC_ADDRESS_P (base))
+    base = UNSPEC_ADDRESS (base);
+
+  /* Look up the base in the hash table.  */
+  slot = htab_find_slot_with_hash (htab, base, mips_hash_base (base), option);
+  if (slot == NULL)
+    return false;
+
+  entry = (struct mips_lo_sum_offset *) *slot;
+  if (option == INSERT)
+    {
+      if (entry == NULL)
+	{
+	  entry = XNEW (struct mips_lo_sum_offset);
+	  entry->base = base;
+	  entry->offset = INTVAL (offset);
+	  *slot = entry;
+	}
+      else
+	{
+	  if (INTVAL (offset) > entry->offset)
+	    entry->offset = INTVAL (offset);
+	}
+    }
+  return INTVAL (offset) <= entry->offset;
+}
+
+/* A for_each_rtx callback for which DATA is a mips_lo_sum_offset hash table.
+   Record every LO_SUM in *LOC.  */
+
+static int
+mips_record_lo_sum (rtx *loc, void *data)
+{
+  if (GET_CODE (*loc) == LO_SUM)
+    mips_lo_sum_offset_lookup ((htab_t) data, XEXP (*loc, 1), INSERT);
+  return 0;
+}
+
+/* Return true if INSN is a SET of an orphaned high-part relocation.
+   HTAB is a hash table of mips_lo_sum_offsets that describes all the
+   LO_SUMs in the current function.  */
+
+static bool
+mips_orphaned_high_part_p (htab_t htab, rtx insn)
+{
+  enum mips_symbol_type type;
+  rtx x, set;
+
+  set = single_set (insn);
+  if (set)
+    {
+      /* Check for %his.  */
+      x = SET_SRC (set);
+      if (GET_CODE (x) == HIGH
+	  && absolute_symbolic_operand (XEXP (x, 0), VOIDmode))
+	return !mips_lo_sum_offset_lookup (htab, XEXP (x, 0), NO_INSERT);
+
+      /* Check for local %gots (and %got_pages, which is redundant but OK).  */
+      if (GET_CODE (x) == UNSPEC
+	  && XINT (x, 1) == UNSPEC_LOAD_GOT
+	  && mips_symbolic_constant_p (XVECEXP (x, 0, 1),
+				       SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_GOTOFF_PAGE)
+	return !mips_lo_sum_offset_lookup (htab, XVECEXP (x, 0, 1), NO_INSERT);
+    }
+  return false;
+}
+
+/* Subroutine of mips_reorg_process_insns.  If there is a hazard between
+   INSN and a previous instruction, avoid it by inserting nops after
+   instruction AFTER.
+
+   *DELAYED_REG and *HILO_DELAY describe the hazards that apply at
+   this point.  If *DELAYED_REG is non-null, INSN must wait a cycle
+   before using the value of that register.  *HILO_DELAY counts the
+   number of instructions since the last hilo hazard (that is,
+   the number of instructions since the last MFLO or MFHI).
+
+   After inserting nops for INSN, update *DELAYED_REG and *HILO_DELAY
+   for the next instruction.
+
+   LO_REG is an rtx for the LO register, used in dependence checking.  */
+
+static void
+mips_avoid_hazard (rtx after, rtx insn, int *hilo_delay,
+		   rtx *delayed_reg, rtx lo_reg)
+{
+  rtx pattern, set;
+  int nops, ninsns;
+
+  pattern = PATTERN (insn);
+
+  /* Do not put the whole function in .set noreorder if it contains
+     an asm statement.  We don't know whether there will be hazards
+     between the asm statement and the gcc-generated code.  */
+  if (GET_CODE (pattern) == ASM_INPUT || asm_noperands (pattern) >= 0)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Ignore zero-length instructions (barriers and the like).  */
+  ninsns = get_attr_length (insn) / 4;
+  if (ninsns == 0)
+    return;
+
+  /* Work out how many nops are needed.  Note that we only care about
+     registers that are explicitly mentioned in the instruction's pattern.
+     It doesn't matter that calls use the argument registers or that they
+     clobber hi and lo.  */
+  if (*hilo_delay < 2 && reg_set_p (lo_reg, pattern))
+    nops = 2 - *hilo_delay;
+  else if (*delayed_reg != 0 && reg_referenced_p (*delayed_reg, pattern))
+    nops = 1;
+  else
+    nops = 0;
+
+  /* Insert the nops between this instruction and the previous one.
+     Each new nop takes us further from the last hilo hazard.  */
+  *hilo_delay += nops;
+  while (nops-- > 0)
+    emit_insn_after (gen_hazard_nop (), after);
+
+  /* Set up the state for the next instruction.  */
+  *hilo_delay += ninsns;
+  *delayed_reg = 0;
+  if (INSN_CODE (insn) >= 0)
+    switch (get_attr_hazard (insn))
+      {
+      case HAZARD_NONE:
+	break;
+
+      case HAZARD_HILO:
+	*hilo_delay = 0;
+	break;
+
+      case HAZARD_DELAY:
+	set = single_set (insn);
+	gcc_assert (set);
+	*delayed_reg = SET_DEST (set);
+	break;
+      }
+}
+
+/* Go through the instruction stream and insert nops where necessary.
+   Also delete any high-part relocations whose partnering low parts
+   are now all dead.  See if the whole function can then be put into
+   .set noreorder and .set nomacro.  */
+
+static void
+mips_reorg_process_insns (void)
+{
+  rtx insn, last_insn, subinsn, next_insn, lo_reg, delayed_reg;
+  int hilo_delay;
+  htab_t htab;
+
+  /* Force all instructions to be split into their final form.  */
+  split_all_insns_noflow ();
+
+  /* Recalculate instruction lengths without taking nops into account.  */
+  cfun->machine->ignore_hazard_length_p = true;
+  shorten_branches (get_insns ());
+
+  cfun->machine->all_noreorder_p = true;
+
+  /* We don't track MIPS16 PC-relative offsets closely enough to make
+     a good job of "set .noreorder" code in MIPS16 mode.  */
+  if (TARGET_MIPS16)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Code that doesn't use explicit relocs can't be ".set nomacro".  */
+  if (!TARGET_EXPLICIT_RELOCS)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Profiled functions can't be all noreorder because the profiler
+     support uses assembler macros.  */
+  if (crtl->profile)
+    cfun->machine->all_noreorder_p = false;
+
+  /* Code compiled with -mfix-vr4120 can't be all noreorder because
+     we rely on the assembler to work around some errata.  */
+  if (TARGET_FIX_VR4120)
+    cfun->machine->all_noreorder_p = false;
+
+  /* The same is true for -mfix-vr4130 if we might generate MFLO or
+     MFHI instructions.  Note that we avoid using MFLO and MFHI if
+     the VR4130 MACC and DMACC instructions are available instead;
+     see the *mfhilo_{si,di}_macc patterns.  */
+  if (TARGET_FIX_VR4130 && !ISA_HAS_MACCHI)
+    cfun->machine->all_noreorder_p = false;
+
+  htab = htab_create (37, mips_lo_sum_offset_hash,
+		      mips_lo_sum_offset_eq, free);
+
+  /* Make a first pass over the instructions, recording all the LO_SUMs.  */
+  for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
+    FOR_EACH_SUBINSN (subinsn, insn)
+      if (USEFUL_INSN_P (subinsn))
+	for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, htab);
+
+  last_insn = 0;
+  hilo_delay = 2;
+  delayed_reg = 0;
+  lo_reg = gen_rtx_REG (SImode, LO_REGNUM);
+
+  /* Make a second pass over the instructions.  Delete orphaned
+     high-part relocations or turn them into NOPs.  Avoid hazards
+     by inserting NOPs.  */
+  for (insn = get_insns (); insn != 0; insn = next_insn)
+    {
+      next_insn = NEXT_INSN (insn);
+      if (USEFUL_INSN_P (insn))
+	{
+	  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+	    {
+	      /* If we find an orphaned high-part relocation in a delay
+		 slot, it's easier to turn that instruction into a NOP than
+		 to delete it.  The delay slot will be a NOP either way.  */
+	      FOR_EACH_SUBINSN (subinsn, insn)
+		if (INSN_P (subinsn))
+		  {
+		    if (mips_orphaned_high_part_p (htab, subinsn))
+		      {
+			PATTERN (subinsn) = gen_nop ();
+			INSN_CODE (subinsn) = CODE_FOR_nop;
+		      }
+		    mips_avoid_hazard (last_insn, subinsn, &hilo_delay,
+				       &delayed_reg, lo_reg);
+		  }
+	      last_insn = insn;
+	    }
+	  else
+	    {
+	      /* INSN is a single instruction.  Delete it if it's an
+		 orphaned high-part relocation.  */
+	      if (mips_orphaned_high_part_p (htab, insn))
+		delete_insn (insn);
+	      /* Also delete cache barriers if the last instruction
+		 was an annulled branch.  INSN will not be speculatively
+		 executed.  */
+	      else if (recog_memoized (insn) == CODE_FOR_r10k_cache_barrier
+		       && last_insn
+		       && INSN_ANNULLED_BRANCH_P (SEQ_BEGIN (last_insn)))
+		delete_insn (insn);
+	      else
+		{
+		  mips_avoid_hazard (last_insn, insn, &hilo_delay,
+				     &delayed_reg, lo_reg);
+		  last_insn = insn;
+		}
+	    }
+	}
+    }
+
+  htab_delete (htab);
+}
+
+/* If we are using a GOT, but have not decided to use a global pointer yet,
+   see whether we need one to implement long branches.  Convert the ghost
+   global-pointer instructions into real ones if so.  */
+
+static bool
+mips_expand_ghost_gp_insns (void)
+{
+  rtx insn;
+  int normal_length;
+
+  /* Quick exit if we already know that we will or won't need a
+     global pointer.  */
+  if (!TARGET_USE_GOT
+      || cfun->machine->global_pointer == INVALID_REGNUM
+      || mips_must_initialize_gp_p ())
+    return false;
+
+  shorten_branches (get_insns ());
+
+  /* Look for a branch that is longer than normal.  The normal length for
+     non-MIPS16 branches is 8, because the length includes the delay slot.
+     It is 4 for MIPS16, because MIPS16 branches are extended instructions,
+     but they have no delay slot.  */
+  normal_length = (TARGET_MIPS16 ? 4 : 8);
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (JUMP_P (insn)
+	&& USEFUL_INSN_P (insn)
+	&& get_attr_length (insn) > normal_length)
+      break;
+
+  if (insn == NULL_RTX)
+    return false;
+
+  /* We've now established that we need $gp.  */
+  cfun->machine->must_initialize_gp_p = true;
+  split_all_insns_noflow ();
+
+  return true;
+}
+
+/* Subroutine of mips_reorg to manage passes that require DF.  */
+
+static void
+mips_df_reorg (void)
+{
+  /* Create def-use chains.  */
+  df_set_flags (DF_EQ_NOTES);
+  df_chain_add_problem (DF_UD_CHAIN);
+  df_analyze ();
+
+  if (TARGET_RELAX_PIC_CALLS)
+    mips_annotate_pic_calls ();
+
+  if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE)
+    r10k_insert_cache_barriers ();
+
+  df_finish_pass (false);
+}
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  */
+
+static void
+mips_reorg (void)
+{
+  /* Restore the BLOCK_FOR_INSN pointers, which are needed by DF.  Also during
+     insn splitting in mips16_lay_out_constants, DF insn info is only kept up
+     to date if the CFG is available.  */
+  if (mips_cfg_in_reorg ())
+    compute_bb_for_insn ();
+  mips16_lay_out_constants ();
+  if (mips_cfg_in_reorg ())
+    {
+      mips_df_reorg ();
+      free_bb_for_insn ();
+    }
+
+  if (optimize > 0 && flag_delayed_branch)
+    {
+      cleanup_barriers ();
+      dbr_schedule (get_insns ());
+    }
+  mips_reorg_process_insns ();
+  if (!TARGET_MIPS16
+      && TARGET_EXPLICIT_RELOCS
+      && TUNE_MIPS4130
+      && TARGET_VR4130_ALIGN)
+    vr4130_align_insns ();
+  if (mips_expand_ghost_gp_insns ())
+    /* The expansion could invalidate some of the VR4130 alignment
+       optimizations, but this should be an extremely rare case anyhow.  */
+    mips_reorg_process_insns ();
+}
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+   in order to avoid duplicating too much logic from elsewhere.  */
+
+static void
+mips_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx this_rtx, temp1, temp2, insn, fnaddr;
+  bool use_sibcall_p;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Determine if we can use a sibcall to call FUNCTION directly.  */
+  fnaddr = XEXP (DECL_RTL (function), 0);
+  use_sibcall_p = (mips_function_ok_for_sibcall (function, NULL)
+		   && const_call_insn_operand (fnaddr, Pmode));
+
+  /* Determine if we need to load FNADDR from the GOT.  */
+  if (!use_sibcall_p
+      && (mips_got_symbol_type_p
+	  (mips_classify_symbol (fnaddr, SYMBOL_CONTEXT_LEA))))
+    {
+      /* Pick a global pointer.  Use a call-clobbered register if
+	 TARGET_CALL_SAVED_GP.  */
+      cfun->machine->global_pointer
+	= TARGET_CALL_SAVED_GP ? 15 : GLOBAL_POINTER_REGNUM;
+      cfun->machine->must_initialize_gp_p = true;
+      SET_REGNO (pic_offset_table_rtx, cfun->machine->global_pointer);
+
+      /* Set up the global pointer for n32 or n64 abicalls.  */
+      mips_emit_loadgp ();
+    }
+
+  /* We need two temporary registers in some cases.  */
+  temp1 = gen_rtx_REG (Pmode, 2);
+  temp2 = gen_rtx_REG (Pmode, 3);
+
+  /* Find out which register contains the "this" pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (delta != 0)
+    {
+      rtx offset = GEN_INT (delta);
+      if (!SMALL_OPERAND (delta))
+	{
+	  mips_emit_move (temp1, offset);
+	  offset = temp1;
+	}
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+    }
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset != 0)
+    {
+      rtx addr;
+
+      /* Set TEMP1 to *THIS_RTX.  */
+      mips_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+      addr = mips_add_offset (temp2, temp1, vcall_offset);
+
+      /* Load the offset and add it to THIS_RTX.  */
+      mips_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+    }
+
+  /* Jump to the target function.  Use a sibcall if direct jumps are
+     allowed, otherwise load the address into a register first.  */
+  if (use_sibcall_p)
+    {
+      insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
+      SIBLING_CALL_P (insn) = 1;
+    }
+  else
+    {
+      /* This is messy.  GAS treats "la $25,foo" as part of a call
+	 sequence and may allow a global "foo" to be lazily bound.
+	 The general move patterns therefore reject this combination.
+
+	 In this context, lazy binding would actually be OK
+	 for TARGET_CALL_CLOBBERED_GP, but it's still wrong for
+	 TARGET_CALL_SAVED_GP; see mips_load_call_address.
+	 We must therefore load the address via a temporary
+	 register if mips_dangerous_for_la25_p.
+
+	 If we jump to the temporary register rather than $25,
+	 the assembler can use the move insn to fill the jump's
+	 delay slot.
+
+	 We can use the same technique for MIPS16 code, where $25
+	 is not a valid JR register.  */
+      if (TARGET_USE_PIC_FN_ADDR_REG
+	  && !TARGET_MIPS16
+	  && !mips_dangerous_for_la25_p (fnaddr))
+	temp1 = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      mips_load_call_address (MIPS_CALL_SIBCALL, temp1, fnaddr);
+
+      if (TARGET_USE_PIC_FN_ADDR_REG
+	  && REGNO (temp1) != PIC_FUNCTION_ADDR_REGNUM)
+	mips_emit_move (gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM), temp1);
+      emit_jump_insn (gen_indirect_jump (temp1));
+    }
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from alpha.c.  */
+  insn = get_insns ();
+  insn_locators_alloc ();
+  split_all_insns_noflow ();
+  mips16_lay_out_constants ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Clean up the vars set above.  Note that final_end_function resets
+     the global pointer for us.  */
+  reload_completed = 0;
+}
+
+/* The last argument passed to mips_set_mips16_mode, or negative if the
+   function hasn't been called yet.
+
+   There are two copies of this information.  One is saved and restored
+   by the PCH process while the other is specific to this compiler
+   invocation.  The information calculated by mips_set_mips16_mode
+   is invalid unless the two variables are the same.  */
+static int was_mips16_p = -1;
+static GTY(()) int was_mips16_pch_p = -1;
+
+/* Set up the target-dependent global state so that it matches the
+   current function's ISA mode.  */
+
+static void
+mips_set_mips16_mode (int mips16_p)
+{
+  if (mips16_p == was_mips16_p
+      && mips16_p == was_mips16_pch_p)
+    return;
+
+  /* Restore base settings of various flags.  */
+  target_flags = mips_base_target_flags;
+  flag_schedule_insns = mips_base_schedule_insns;
+  flag_reorder_blocks_and_partition = mips_base_reorder_blocks_and_partition;
+  flag_move_loop_invariants = mips_base_move_loop_invariants;
+  align_loops = mips_base_align_loops;
+  align_jumps = mips_base_align_jumps;
+  align_functions = mips_base_align_functions;
+
+  if (mips16_p)
+    {
+      /* Switch to MIPS16 mode.  */
+      target_flags |= MASK_MIPS16;
+
+      /* Don't run the scheduler before reload, since it tends to
+         increase register pressure.  */
+      flag_schedule_insns = 0;
+
+      /* Don't do hot/cold partitioning.  mips16_lay_out_constants expects
+	 the whole function to be in a single section.  */
+      flag_reorder_blocks_and_partition = 0;
+
+      /* Don't move loop invariants, because it tends to increase
+	 register pressure.  It also introduces an extra move in cases
+	 where the constant is the first operand in a two-operand binary
+	 instruction, or when it forms a register argument to a functon
+	 call.  */
+      flag_move_loop_invariants = 0;
+
+      target_flags |= MASK_EXPLICIT_RELOCS;
+
+      /* Experiments suggest we get the best overall section-anchor
+	 results from using the range of an unextended LW or SW.  Code
+	 that makes heavy use of byte or short accesses can do better
+	 with ranges of 0...31 and 0...63 respectively, but most code is
+	 sensitive to the range of LW and SW instead.  */
+      targetm.min_anchor_offset = 0;
+      targetm.max_anchor_offset = 127;
+
+      targetm.const_anchor = 0;
+
+      /* MIPS16 has no BAL instruction.  */
+      target_flags &= ~MASK_RELAX_PIC_CALLS;
+
+      if (flag_pic && !TARGET_OLDABI)
+	sorry ("MIPS16 PIC for ABIs other than o32 and o64");
+
+      if (TARGET_XGOT)
+	sorry ("MIPS16 -mxgot code");
+
+      if (TARGET_HARD_FLOAT_ABI && !TARGET_OLDABI)
+	sorry ("hard-float MIPS16 code for ABIs other than o32 and o64");
+    }
+  else
+    {
+      /* Switch to normal (non-MIPS16) mode.  */
+      target_flags &= ~MASK_MIPS16;
+
+      /* Provide default values for align_* for 64-bit targets.  */
+      if (TARGET_64BIT)
+	{
+	  if (align_loops == 0)
+	    align_loops = 8;
+	  if (align_jumps == 0)
+	    align_jumps = 8;
+	  if (align_functions == 0)
+	    align_functions = 8;
+	}
+
+      targetm.min_anchor_offset = -32768;
+      targetm.max_anchor_offset = 32767;
+
+      targetm.const_anchor = 0x8000;
+    }
+
+  /* (Re)initialize MIPS target internals for new ISA.  */
+  mips_init_relocs ();
+
+  if (mips16_p)
+    {
+      if (!mips16_globals)
+	mips16_globals = save_target_globals ();
+      else
+	restore_target_globals (mips16_globals);
+    }
+  else
+    restore_target_globals (&default_target_globals);
+
+  was_mips16_p = mips16_p;
+  was_mips16_pch_p = mips16_p;
+}
+
+/* Implement TARGET_SET_CURRENT_FUNCTION.  Decide whether the current
+   function should use the MIPS16 ISA and switch modes accordingly.  */
+
+static void
+mips_set_current_function (tree fndecl)
+{
+  mips_set_mips16_mode (mips_use_mips16_mode_p (fndecl));
+}
+
+/* Allocate a chunk of memory for per-function machine-dependent data.  */
+
+static struct machine_function *
+mips_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Return the processor associated with the given ISA level, or null
+   if the ISA isn't valid.  */
+
+static const struct mips_cpu_info *
+mips_cpu_info_from_isa (int isa)
+{
+  unsigned int i;
+
+  for (i = 0; i < ARRAY_SIZE (mips_cpu_info_table); i++)
+    if (mips_cpu_info_table[i].isa == isa)
+      return mips_cpu_info_table + i;
+
+  return NULL;
+}
+
+/* Return true if GIVEN is the same as CANONICAL, or if it is CANONICAL
+   with a final "000" replaced by "k".  Ignore case.
+
+   Note: this function is shared between GCC and GAS.  */
+
+static bool
+mips_strict_matching_cpu_name_p (const char *canonical, const char *given)
+{
+  while (*given != 0 && TOLOWER (*given) == TOLOWER (*canonical))
+    given++, canonical++;
+
+  return ((*given == 0 && *canonical == 0)
+	  || (strcmp (canonical, "000") == 0 && strcasecmp (given, "k") == 0));
+}
+
+/* Return true if GIVEN matches CANONICAL, where GIVEN is a user-supplied
+   CPU name.  We've traditionally allowed a lot of variation here.
+
+   Note: this function is shared between GCC and GAS.  */
+
+static bool
+mips_matching_cpu_name_p (const char *canonical, const char *given)
+{
+  /* First see if the name matches exactly, or with a final "000"
+     turned into "k".  */
+  if (mips_strict_matching_cpu_name_p (canonical, given))
+    return true;
+
+  /* If not, try comparing based on numerical designation alone.
+     See if GIVEN is an unadorned number, or 'r' followed by a number.  */
+  if (TOLOWER (*given) == 'r')
+    given++;
+  if (!ISDIGIT (*given))
+    return false;
+
+  /* Skip over some well-known prefixes in the canonical name,
+     hoping to find a number there too.  */
+  if (TOLOWER (canonical[0]) == 'v' && TOLOWER (canonical[1]) == 'r')
+    canonical += 2;
+  else if (TOLOWER (canonical[0]) == 'r' && TOLOWER (canonical[1]) == 'm')
+    canonical += 2;
+  else if (TOLOWER (canonical[0]) == 'r')
+    canonical += 1;
+
+  return mips_strict_matching_cpu_name_p (canonical, given);
+}
+
+/* Return the mips_cpu_info entry for the processor or ISA given
+   by CPU_STRING.  Return null if the string isn't recognized.
+
+   A similar function exists in GAS.  */
+
+static const struct mips_cpu_info *
+mips_parse_cpu (const char *cpu_string)
+{
+  unsigned int i;
+  const char *s;
+
+  /* In the past, we allowed upper-case CPU names, but it doesn't
+     work well with the multilib machinery.  */
+  for (s = cpu_string; *s != 0; s++)
+    if (ISUPPER (*s))
+      {
+	warning (0, "CPU names must be lower case");
+	break;
+      }
+
+  /* 'from-abi' selects the most compatible architecture for the given
+     ABI: MIPS I for 32-bit ABIs and MIPS III for 64-bit ABIs.  For the
+     EABIs, we have to decide whether we're using the 32-bit or 64-bit
+     version.  */
+  if (strcasecmp (cpu_string, "from-abi") == 0)
+    return mips_cpu_info_from_isa (ABI_NEEDS_32BIT_REGS ? 1
+				   : ABI_NEEDS_64BIT_REGS ? 3
+				   : (TARGET_64BIT ? 3 : 1));
+
+  /* 'default' has traditionally been a no-op.  Probably not very useful.  */
+  if (strcasecmp (cpu_string, "default") == 0)
+    return NULL;
+
+  for (i = 0; i < ARRAY_SIZE (mips_cpu_info_table); i++)
+    if (mips_matching_cpu_name_p (mips_cpu_info_table[i].name, cpu_string))
+      return mips_cpu_info_table + i;
+
+  return NULL;
+}
+
+/* Set up globals to generate code for the ISA or processor
+   described by INFO.  */
+
+static void
+mips_set_architecture (const struct mips_cpu_info *info)
+{
+  if (info != 0)
+    {
+      mips_arch_info = info;
+      mips_arch = info->cpu;
+      mips_isa = info->isa;
+    }
+}
+
+/* Likewise for tuning.  */
+
+static void
+mips_set_tune (const struct mips_cpu_info *info)
+{
+  if (info != 0)
+    {
+      mips_tune_info = info;
+      mips_tune = info->cpu;
+    }
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+mips_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mabi_:
+      if (strcmp (arg, "32") == 0)
+	mips_abi = ABI_32;
+      else if (strcmp (arg, "o64") == 0)
+	mips_abi = ABI_O64;
+      else if (strcmp (arg, "n32") == 0)
+	mips_abi = ABI_N32;
+      else if (strcmp (arg, "64") == 0)
+	mips_abi = ABI_64;
+      else if (strcmp (arg, "eabi") == 0)
+	mips_abi = ABI_EABI;
+      else
+	return false;
+      return true;
+
+    case OPT_march_:
+    case OPT_mtune_:
+      return mips_parse_cpu (arg) != 0;
+
+    case OPT_mips:
+      mips_isa_option_info = mips_parse_cpu (ACONCAT (("mips", arg, NULL)));
+      return mips_isa_option_info != 0;
+
+    case OPT_mno_flush_func:
+      mips_cache_flush_func = NULL;
+      return true;
+
+    case OPT_mcode_readable_:
+      if (strcmp (arg, "yes") == 0)
+	mips_code_readable = CODE_READABLE_YES;
+      else if (strcmp (arg, "pcrel") == 0)
+	mips_code_readable = CODE_READABLE_PCREL;
+      else if (strcmp (arg, "no") == 0)
+	mips_code_readable = CODE_READABLE_NO;
+      else
+	return false;
+      return true;
+
+    case OPT_mr10k_cache_barrier_:
+      if (strcmp (arg, "load-store") == 0)
+	mips_r10k_cache_barrier = R10K_CACHE_BARRIER_LOAD_STORE;
+      else if (strcmp (arg, "store") == 0)
+	mips_r10k_cache_barrier = R10K_CACHE_BARRIER_STORE;
+      else if (strcmp (arg, "none") == 0)
+	mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE;
+      else
+	return false;
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+mips_option_override (void)
+{
+  int i, start, regno, mode;
+
+  /* Process flags as though we were generating non-MIPS16 code.  */
+  mips_base_mips16 = TARGET_MIPS16;
+  target_flags &= ~MASK_MIPS16;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* -mno-float overrides -mhard-float and -msoft-float.  */
+  if (TARGET_NO_FLOAT)
+    {
+      target_flags |= MASK_SOFT_FLOAT_ABI;
+      target_flags_explicit |= MASK_SOFT_FLOAT_ABI;
+    }
+
+  if (TARGET_FLIP_MIPS16)
+    TARGET_INTERLINK_MIPS16 = 1;
+
+  /* Set the small data limit.  */
+  mips_small_data_threshold = (global_options_set.x_g_switch_value
+			       ? g_switch_value
+			       : MIPS_DEFAULT_GVALUE);
+
+  /* The following code determines the architecture and register size.
+     Similar code was added to GAS 2.14 (see tc-mips.c:md_after_parse_args()).
+     The GAS and GCC code should be kept in sync as much as possible.  */
+
+  if (mips_arch_string != 0)
+    mips_set_architecture (mips_parse_cpu (mips_arch_string));
+
+  if (mips_isa_option_info != 0)
+    {
+      if (mips_arch_info == 0)
+	mips_set_architecture (mips_isa_option_info);
+      else if (mips_arch_info->isa != mips_isa_option_info->isa)
+	error ("%<-%s%> conflicts with the other architecture options, "
+	       "which specify a %s processor",
+	       mips_isa_option_info->name,
+	       mips_cpu_info_from_isa (mips_arch_info->isa)->name);
+    }
+
+  if (mips_arch_info == 0)
+    {
+#ifdef MIPS_CPU_STRING_DEFAULT
+      mips_set_architecture (mips_parse_cpu (MIPS_CPU_STRING_DEFAULT));
+#else
+      mips_set_architecture (mips_cpu_info_from_isa (MIPS_ISA_DEFAULT));
+#endif
+    }
+
+  if (ABI_NEEDS_64BIT_REGS && !ISA_HAS_64BIT_REGS)
+    error ("%<-march=%s%> is not compatible with the selected ABI",
+	   mips_arch_info->name);
+
+  /* Optimize for mips_arch, unless -mtune selects a different processor.  */
+  if (mips_tune_string != 0)
+    mips_set_tune (mips_parse_cpu (mips_tune_string));
+
+  if (mips_tune_info == 0)
+    mips_set_tune (mips_arch_info);
+
+  if ((target_flags_explicit & MASK_64BIT) != 0)
+    {
+      /* The user specified the size of the integer registers.  Make sure
+	 it agrees with the ABI and ISA.  */
+      if (TARGET_64BIT && !ISA_HAS_64BIT_REGS)
+	error ("%<-mgp64%> used with a 32-bit processor");
+      else if (!TARGET_64BIT && ABI_NEEDS_64BIT_REGS)
+	error ("%<-mgp32%> used with a 64-bit ABI");
+      else if (TARGET_64BIT && ABI_NEEDS_32BIT_REGS)
+	error ("%<-mgp64%> used with a 32-bit ABI");
+    }
+  else
+    {
+      /* Infer the integer register size from the ABI and processor.
+	 Restrict ourselves to 32-bit registers if that's all the
+	 processor has, or if the ABI cannot handle 64-bit registers.  */
+      if (ABI_NEEDS_32BIT_REGS || !ISA_HAS_64BIT_REGS)
+	target_flags &= ~MASK_64BIT;
+      else
+	target_flags |= MASK_64BIT;
+    }
+
+  if ((target_flags_explicit & MASK_FLOAT64) != 0)
+    {
+      if (TARGET_SINGLE_FLOAT && TARGET_FLOAT64)
+	error ("unsupported combination: %s", "-mfp64 -msingle-float");
+      else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT && !TARGET_FLOAT64)
+	error ("unsupported combination: %s", "-mgp64 -mfp32 -mdouble-float");
+      else if (!TARGET_64BIT && TARGET_FLOAT64)
+	{
+	  if (!ISA_HAS_MXHC1)
+	    error ("%<-mgp32%> and %<-mfp64%> can only be combined if"
+		   " the target supports the mfhc1 and mthc1 instructions");
+	  else if (mips_abi != ABI_32)
+	    error ("%<-mgp32%> and %<-mfp64%> can only be combined when using"
+		   " the o32 ABI");
+	}
+    }
+  else
+    {
+      /* -msingle-float selects 32-bit float registers.  Otherwise the
+	 float registers should be the same size as the integer ones.  */
+      if (TARGET_64BIT && TARGET_DOUBLE_FLOAT)
+	target_flags |= MASK_FLOAT64;
+      else
+	target_flags &= ~MASK_FLOAT64;
+    }
+
+  /* End of code shared with GAS.  */
+
+  /* If no -mlong* option was given, infer it from the other options.  */
+  if ((target_flags_explicit & MASK_LONG64) == 0)
+    {
+      if ((mips_abi == ABI_EABI && TARGET_64BIT) || mips_abi == ABI_64)
+	target_flags |= MASK_LONG64;
+      else
+	target_flags &= ~MASK_LONG64;
+    }
+
+  if (!TARGET_OLDABI)
+    flag_pcc_struct_return = 0;
+
+  /* Decide which rtx_costs structure to use.  */
+  if (optimize_size)
+    mips_cost = &mips_rtx_cost_optimize_size;
+  else
+    mips_cost = &mips_rtx_cost_data[mips_tune];
+
+  /* If the user hasn't specified a branch cost, use the processor's
+     default.  */
+  if (mips_branch_cost == 0)
+    mips_branch_cost = mips_cost->branch_cost;
+
+  /* If neither -mbranch-likely nor -mno-branch-likely was given
+     on the command line, set MASK_BRANCHLIKELY based on the target
+     architecture and tuning flags.  Annulled delay slots are a
+     size win, so we only consider the processor-specific tuning
+     for !optimize_size.  */
+  if ((target_flags_explicit & MASK_BRANCHLIKELY) == 0)
+    {
+      if (ISA_HAS_BRANCHLIKELY
+	  && (optimize_size
+	      || (mips_tune_info->tune_flags & PTF_AVOID_BRANCHLIKELY) == 0))
+	target_flags |= MASK_BRANCHLIKELY;
+      else
+	target_flags &= ~MASK_BRANCHLIKELY;
+    }
+  else if (TARGET_BRANCHLIKELY && !ISA_HAS_BRANCHLIKELY)
+    warning (0, "the %qs architecture does not support branch-likely"
+	     " instructions", mips_arch_info->name);
+
+  /* The effect of -mabicalls isn't defined for the EABI.  */
+  if (mips_abi == ABI_EABI && TARGET_ABICALLS)
+    {
+      error ("unsupported combination: %s", "-mabicalls -mabi=eabi");
+      target_flags &= ~MASK_ABICALLS;
+    }
+
+  if (TARGET_ABICALLS_PIC2)
+    /* We need to set flag_pic for executables as well as DSOs
+       because we may reference symbols that are not defined in
+       the final executable.  (MIPS does not use things like
+       copy relocs, for example.)
+
+       There is a body of code that uses __PIC__ to distinguish
+       between -mabicalls and -mno-abicalls code.  The non-__PIC__
+       variant is usually appropriate for TARGET_ABICALLS_PIC0, as
+       long as any indirect jumps use $25.  */
+    flag_pic = 1;
+
+  /* -mvr4130-align is a "speed over size" optimization: it usually produces
+     faster code, but at the expense of more nops.  Enable it at -O3 and
+     above.  */
+  if (optimize > 2 && (target_flags_explicit & MASK_VR4130_ALIGN) == 0)
+    target_flags |= MASK_VR4130_ALIGN;
+
+  /* Prefer a call to memcpy over inline code when optimizing for size,
+     though see MOVE_RATIO in mips.h.  */
+  if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0)
+    target_flags |= MASK_MEMCPY;
+
+  /* If we have a nonzero small-data limit, check that the -mgpopt
+     setting is consistent with the other target flags.  */
+  if (mips_small_data_threshold > 0)
+    {
+      if (!TARGET_GPOPT)
+	{
+	  if (!TARGET_EXPLICIT_RELOCS)
+	    error ("%<-mno-gpopt%> needs %<-mexplicit-relocs%>");
+
+	  TARGET_LOCAL_SDATA = false;
+	  TARGET_EXTERN_SDATA = false;
+	}
+      else
+	{
+	  if (TARGET_VXWORKS_RTP)
+	    warning (0, "cannot use small-data accesses for %qs", "-mrtp");
+
+	  if (TARGET_ABICALLS)
+	    warning (0, "cannot use small-data accesses for %qs",
+		     "-mabicalls");
+	}
+    }
+
+#ifdef MIPS_TFMODE_FORMAT
+  REAL_MODE_FORMAT (TFmode) = &MIPS_TFMODE_FORMAT;
+#endif
+
+  /* Make sure that the user didn't turn off paired single support when
+     MIPS-3D support is requested.  */
+  if (TARGET_MIPS3D
+      && (target_flags_explicit & MASK_PAIRED_SINGLE_FLOAT)
+      && !TARGET_PAIRED_SINGLE_FLOAT)
+    error ("%<-mips3d%> requires %<-mpaired-single%>");
+
+  /* If TARGET_MIPS3D, enable MASK_PAIRED_SINGLE_FLOAT.  */
+  if (TARGET_MIPS3D)
+    target_flags |= MASK_PAIRED_SINGLE_FLOAT;
+
+  /* Make sure that when TARGET_PAIRED_SINGLE_FLOAT is true, TARGET_FLOAT64
+     and TARGET_HARD_FLOAT_ABI are both true.  */
+  if (TARGET_PAIRED_SINGLE_FLOAT && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI))
+    error ("%qs must be used with %qs",
+	   TARGET_MIPS3D ? "-mips3d" : "-mpaired-single",
+	   TARGET_HARD_FLOAT_ABI ? "-mfp64" : "-mhard-float");
+
+  /* Make sure that the ISA supports TARGET_PAIRED_SINGLE_FLOAT when it is
+     enabled.  */
+  if (TARGET_PAIRED_SINGLE_FLOAT && !ISA_HAS_PAIRED_SINGLE)
+    warning (0, "the %qs architecture does not support paired-single"
+	     " instructions", mips_arch_info->name);
+
+  if (mips_r10k_cache_barrier != R10K_CACHE_BARRIER_NONE
+      && !TARGET_CACHE_BUILTIN)
+    {
+      error ("%qs requires a target that provides the %qs instruction",
+	     "-mr10k-cache-barrier", "cache");
+      mips_r10k_cache_barrier = R10K_CACHE_BARRIER_NONE;
+    }
+
+  /* If TARGET_DSPR2, enable MASK_DSP.  */
+  if (TARGET_DSPR2)
+    target_flags |= MASK_DSP;
+
+  /* .eh_frame addresses should be the same width as a C pointer.
+     Most MIPS ABIs support only one pointer size, so the assembler
+     will usually know exactly how big an .eh_frame address is.
+
+     Unfortunately, this is not true of the 64-bit EABI.  The ABI was
+     originally defined to use 64-bit pointers (i.e. it is LP64), and
+     this is still the default mode.  However, we also support an n32-like
+     ILP32 mode, which is selected by -mlong32.  The problem is that the
+     assembler has traditionally not had an -mlong option, so it has
+     traditionally not known whether we're using the ILP32 or LP64 form.
+
+     As it happens, gas versions up to and including 2.19 use _32-bit_
+     addresses for EABI64 .cfi_* directives.  This is wrong for the
+     default LP64 mode, so we can't use the directives by default.
+     Moreover, since gas's current behavior is at odds with gcc's
+     default behavior, it seems unwise to rely on future versions
+     of gas behaving the same way.  We therefore avoid using .cfi
+     directives for -mlong32 as well.  */
+  if (mips_abi == ABI_EABI && TARGET_64BIT)
+    flag_dwarf2_cfi_asm = 0;
+
+  /* .cfi_* directives generate a read-only section, so fall back on
+     manual .eh_frame creation if we need the section to be writable.  */
+  if (TARGET_WRITABLE_EH_FRAME)
+    flag_dwarf2_cfi_asm = 0;
+
+  mips_init_print_operand_punct ();
+
+  /* Set up array to map GCC register number to debug register number.
+     Ignore the special purpose register numbers.  */
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      mips_dbx_regno[i] = INVALID_REGNUM;
+      if (GP_REG_P (i) || FP_REG_P (i) || ALL_COP_REG_P (i))
+	mips_dwarf_regno[i] = i;
+      else
+	mips_dwarf_regno[i] = INVALID_REGNUM;
+    }
+
+  start = GP_DBX_FIRST - GP_REG_FIRST;
+  for (i = GP_REG_FIRST; i <= GP_REG_LAST; i++)
+    mips_dbx_regno[i] = i + start;
+
+  start = FP_DBX_FIRST - FP_REG_FIRST;
+  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+    mips_dbx_regno[i] = i + start;
+
+  /* Accumulator debug registers use big-endian ordering.  */
+  mips_dbx_regno[HI_REGNUM] = MD_DBX_FIRST + 0;
+  mips_dbx_regno[LO_REGNUM] = MD_DBX_FIRST + 1;
+  mips_dwarf_regno[HI_REGNUM] = MD_REG_FIRST + 0;
+  mips_dwarf_regno[LO_REGNUM] = MD_REG_FIRST + 1;
+  for (i = DSP_ACC_REG_FIRST; i <= DSP_ACC_REG_LAST; i += 2)
+    {
+      mips_dwarf_regno[i + TARGET_LITTLE_ENDIAN] = i;
+      mips_dwarf_regno[i + TARGET_BIG_ENDIAN] = i + 1;
+    }
+
+  /* Set up mips_hard_regno_mode_ok.  */
+  for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
+    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+      mips_hard_regno_mode_ok[mode][regno]
+	= mips_hard_regno_mode_ok_p (regno, (enum machine_mode) mode);
+
+  /* Function to allocate machine-dependent function status.  */
+  init_machine_status = &mips_init_machine_status;
+
+  /* Default to working around R4000 errata only if the processor
+     was selected explicitly.  */
+  if ((target_flags_explicit & MASK_FIX_R4000) == 0
+      && mips_matching_cpu_name_p (mips_arch_info->name, "r4000"))
+    target_flags |= MASK_FIX_R4000;
+
+  /* Default to working around R4400 errata only if the processor
+     was selected explicitly.  */
+  if ((target_flags_explicit & MASK_FIX_R4400) == 0
+      && mips_matching_cpu_name_p (mips_arch_info->name, "r4400"))
+    target_flags |= MASK_FIX_R4400;
+
+  /* Default to working around R10000 errata only if the processor
+     was selected explicitly.  */
+  if ((target_flags_explicit & MASK_FIX_R10000) == 0
+      && mips_matching_cpu_name_p (mips_arch_info->name, "r10000"))
+    target_flags |= MASK_FIX_R10000;
+
+  /* Make sure that branch-likely instructions available when using
+     -mfix-r10000.  The instructions are not available if either:
+
+	1. -mno-branch-likely was passed.
+	2. The selected ISA does not support branch-likely and
+	   the command line does not include -mbranch-likely.  */
+  if (TARGET_FIX_R10000
+      && ((target_flags_explicit & MASK_BRANCHLIKELY) == 0
+          ? !ISA_HAS_BRANCHLIKELY
+          : !TARGET_BRANCHLIKELY))
+    sorry ("%qs requires branch-likely instructions", "-mfix-r10000");
+
+  if (TARGET_SYNCI && !ISA_HAS_SYNCI)
+    {
+      warning (0, "the %qs architecture does not support the synci "
+	       "instruction", mips_arch_info->name);
+      target_flags &= ~MASK_SYNCI;
+    }
+
+  /* Only optimize PIC indirect calls if they are actually required.  */
+  if (!TARGET_USE_GOT || !TARGET_EXPLICIT_RELOCS)
+    target_flags &= ~MASK_RELAX_PIC_CALLS;
+
+  /* Save base state of options.  */
+  mips_base_target_flags = target_flags;
+  mips_base_schedule_insns = flag_schedule_insns;
+  mips_base_reorder_blocks_and_partition = flag_reorder_blocks_and_partition;
+  mips_base_move_loop_invariants = flag_move_loop_invariants;
+  mips_base_align_loops = align_loops;
+  mips_base_align_jumps = align_jumps;
+  mips_base_align_functions = align_functions;
+
+  /* Now select the ISA mode.
+
+     Do all CPP-sensitive stuff in non-MIPS16 mode; we'll switch to
+     MIPS16 mode afterwards if need be.  */
+  mips_set_mips16_mode (false);
+}
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options mips_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Swap the register information for registers I and I + 1, which
+   currently have the wrong endianness.  Note that the registers'
+   fixedness and call-clobberedness might have been set on the
+   command line.  */
+
+static void
+mips_swap_registers (unsigned int i)
+{
+  int tmpi;
+  const char *tmps;
+
+#define SWAP_INT(X, Y) (tmpi = (X), (X) = (Y), (Y) = tmpi)
+#define SWAP_STRING(X, Y) (tmps = (X), (X) = (Y), (Y) = tmps)
+
+  SWAP_INT (fixed_regs[i], fixed_regs[i + 1]);
+  SWAP_INT (call_used_regs[i], call_used_regs[i + 1]);
+  SWAP_INT (call_really_used_regs[i], call_really_used_regs[i + 1]);
+  SWAP_STRING (reg_names[i], reg_names[i + 1]);
+
+#undef SWAP_STRING
+#undef SWAP_INT
+}
+
+/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+mips_conditional_register_usage (void)
+{
+
+  if (ISA_HAS_DSP)
+    {
+      /* These DSP control register fields are global.  */
+      global_regs[CCDSP_PO_REGNUM] = 1;
+      global_regs[CCDSP_SC_REGNUM] = 1;
+    }
+  else 
+    {
+      int regno;
+
+      for (regno = DSP_ACC_REG_FIRST; regno <= DSP_ACC_REG_LAST; regno++)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+    }
+  if (!TARGET_HARD_FLOAT)
+    {
+      int regno;
+
+      for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+      for (regno = ST_REG_FIRST; regno <= ST_REG_LAST; regno++)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+    }
+  else if (! ISA_HAS_8CC)
+    {
+      int regno;
+
+      /* We only have a single condition-code register.  We implement
+	 this by fixing all the condition-code registers and generating
+	 RTL that refers directly to ST_REG_FIRST.  */
+      for (regno = ST_REG_FIRST; regno <= ST_REG_LAST; regno++)
+	fixed_regs[regno] = call_used_regs[regno] = 1;
+    }
+  /* In MIPS16 mode, we permit the $t temporary registers to be used
+     for reload.  We prohibit the unused $s registers, since they
+     are call-saved, and saving them via a MIPS16 register would
+     probably waste more time than just reloading the value.  */
+  if (TARGET_MIPS16)
+    {
+      fixed_regs[18] = call_used_regs[18] = 1;
+      fixed_regs[19] = call_used_regs[19] = 1;
+      fixed_regs[20] = call_used_regs[20] = 1;
+      fixed_regs[21] = call_used_regs[21] = 1;
+      fixed_regs[22] = call_used_regs[22] = 1;
+      fixed_regs[23] = call_used_regs[23] = 1;
+      fixed_regs[26] = call_used_regs[26] = 1;
+      fixed_regs[27] = call_used_regs[27] = 1;
+      fixed_regs[30] = call_used_regs[30] = 1;
+    }
+  /* $f20-$f23 are call-clobbered for n64.  */
+  if (mips_abi == ABI_64)
+    {
+      int regno;
+      for (regno = FP_REG_FIRST + 20; regno < FP_REG_FIRST + 24; regno++)
+	call_really_used_regs[regno] = call_used_regs[regno] = 1;
+    }
+  /* Odd registers in the range $f21-$f31 (inclusive) are call-clobbered
+     for n32.  */
+  if (mips_abi == ABI_N32)
+    {
+      int regno;
+      for (regno = FP_REG_FIRST + 21; regno <= FP_REG_FIRST + 31; regno+=2)
+	call_really_used_regs[regno] = call_used_regs[regno] = 1;
+    }
+  /* Make sure that double-register accumulator values are correctly
+     ordered for the current endianness.  */
+  if (TARGET_LITTLE_ENDIAN)
+    {
+      unsigned int regno;
+
+      mips_swap_registers (MD_REG_FIRST);
+      for (regno = DSP_ACC_REG_FIRST; regno <= DSP_ACC_REG_LAST; regno += 2)
+	mips_swap_registers (regno);
+    }
+}
+
+/* Initialize vector TARGET to VALS.  */
+
+void
+mips_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode;
+  enum machine_mode inner;
+  unsigned int i, n_elts;
+  rtx mem;
+
+  mode = GET_MODE (target);
+  inner = GET_MODE_INNER (mode);
+  n_elts = GET_MODE_NUNITS (mode);
+
+  gcc_assert (VECTOR_MODE_P (mode));
+
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
+                    XVECEXP (vals, 0, i));
+
+  emit_move_insn (target, mem);
+}
+
+/* When generating MIPS16 code, we want to allocate $24 (T_REG) before
+   other registers for instructions for which it is possible.  This
+   encourages the compiler to use CMP in cases where an XOR would
+   require some register shuffling.  */
+
+void
+mips_order_regs_for_local_alloc (void)
+{
+  int i;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    reg_alloc_order[i] = i;
+
+  if (TARGET_MIPS16)
+    {
+      /* It really doesn't matter where we put register 0, since it is
+         a fixed register anyhow.  */
+      reg_alloc_order[0] = 24;
+      reg_alloc_order[24] = 0;
+    }
+}
+
+/* Implement EH_USES.  */
+
+bool
+mips_eh_uses (unsigned int regno)
+{
+  if (reload_completed && !TARGET_ABSOLUTE_JUMPS)
+    {
+      /* We need to force certain registers to be live in order to handle
+	 PIC long branches correctly.  See mips_must_initialize_gp_p for
+	 details.  */
+      if (mips_cfun_has_cprestore_slot_p ())
+	{
+	  if (regno == CPRESTORE_SLOT_REGNUM)
+	    return true;
+	}
+      else
+	{
+	  if (cfun->machine->global_pointer == regno)
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Implement EPILOGUE_USES.  */
+
+bool
+mips_epilogue_uses (unsigned int regno)
+{
+  /* Say that the epilogue uses the return address register.  Note that
+     in the case of sibcalls, the values "used by the epilogue" are
+     considered live at the start of the called function.  */
+  if (regno == RETURN_ADDR_REGNUM)
+    return true;
+
+  /* If using a GOT, say that the epilogue also uses GOT_VERSION_REGNUM.
+     See the comment above load_call<mode> for details.  */
+  if (TARGET_USE_GOT && (regno) == GOT_VERSION_REGNUM)
+    return true;
+
+  /* An interrupt handler must preserve some registers that are
+     ordinarily call-clobbered.  */
+  if (cfun->machine->interrupt_handler_p
+      && mips_interrupt_extra_call_saved_reg_p (regno))
+    return true;
+
+  return false;
+}
+
+/* A for_each_rtx callback.  Stop the search if *X is an AT register.  */
+
+static int
+mips_at_reg_p (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return REG_P (*x) && REGNO (*x) == AT_REGNUM;
+}
+
+/* Return true if INSN needs to be wrapped in ".set noat".
+   INSN has NOPERANDS operands, stored in OPVEC.  */
+
+static bool
+mips_need_noat_wrapper_p (rtx insn, rtx *opvec, int noperands)
+{
+  int i;
+
+  if (recog_memoized (insn) >= 0)
+    for (i = 0; i < noperands; i++)
+      if (for_each_rtx (&opvec[i], mips_at_reg_p, NULL))
+	return true;
+  return false;
+}
+
+/* Implement FINAL_PRESCAN_INSN.  */
+
+void
+mips_final_prescan_insn (rtx insn, rtx *opvec, int noperands)
+{
+  if (mips_need_noat_wrapper_p (insn, opvec, noperands))
+    mips_push_asm_switch (&mips_noat);
+}
+
+/* Implement TARGET_ASM_FINAL_POSTSCAN_INSN.  */
+
+static void
+mips_final_postscan_insn (FILE *file ATTRIBUTE_UNUSED, rtx insn,
+			  rtx *opvec, int noperands)
+{
+  if (mips_need_noat_wrapper_p (insn, opvec, noperands))
+    mips_pop_asm_switch (&mips_noat);
+}
+
+/* Return the function that is used to expand the <u>mulsidi3 pattern.
+   EXT_CODE is the code of the extension used.  Return NULL if widening
+   multiplication shouldn't be used.  */
+
+mulsidi3_gen_fn
+mips_mulsidi3_gen_fn (enum rtx_code ext_code)
+{
+  bool signed_p;
+
+  signed_p = ext_code == SIGN_EXTEND;
+  if (TARGET_64BIT)
+    {
+      /* Don't use widening multiplication with MULT when we have DMUL.  Even
+	 with the extension of its input operands DMUL is faster.  Note that
+	 the extension is not needed for signed multiplication.  In order to
+	 ensure that we always remove the redundant sign-extension in this
+	 case we still expand mulsidi3 for DMUL.  */
+      if (ISA_HAS_DMUL3)
+	return signed_p ? gen_mulsidi3_64bit_dmul : NULL;
+      if (TARGET_FIX_R4000)
+	return NULL;
+      return signed_p ? gen_mulsidi3_64bit : gen_umulsidi3_64bit;
+    }
+  else
+    {
+      if (TARGET_FIX_R4000 && !ISA_HAS_DSP)
+	return signed_p ? gen_mulsidi3_32bit_r4000 : gen_umulsidi3_32bit_r4000;
+      return signed_p ? gen_mulsidi3_32bit : gen_umulsidi3_32bit;
+    }
+}
+
+/* Return the size in bytes of the trampoline code, padded to
+   TRAMPOLINE_ALIGNMENT bits.  The static chain pointer and target
+   function address immediately follow.  */
+
+int
+mips_trampoline_code_size (void)
+{
+  if (TARGET_USE_PIC_FN_ADDR_REG)
+    return 4 * 4;
+  else if (ptr_mode == DImode)
+    return 8 * 4;
+  else if (ISA_HAS_LOAD_DELAY)
+    return 6 * 4;
+  else
+    return 4 * 4;
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+mips_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx addr, end_addr, high, low, opcode, mem;
+  rtx trampoline[8];
+  unsigned int i, j;
+  HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
+
+  /* Work out the offsets of the pointers from the start of the
+     trampoline code.  */
+  end_addr_offset = mips_trampoline_code_size ();
+  static_chain_offset = end_addr_offset;
+  target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
+
+  /* Get pointers to the beginning and end of the code block.  */
+  addr = force_reg (Pmode, XEXP (m_tramp, 0));
+  end_addr = mips_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
+
+#define OP(X) gen_int_mode (X, SImode)
+
+  /* Build up the code in TRAMPOLINE.  */
+  i = 0;
+  if (TARGET_USE_PIC_FN_ADDR_REG)
+    {
+      /* $25 contains the address of the trampoline.  Emit code of the form:
+
+	     l[wd]    $1, target_function_offset($25)
+	     l[wd]    $static_chain, static_chain_offset($25)
+	     jr       $1
+	     move     $25,$1.  */
+      trampoline[i++] = OP (MIPS_LOAD_PTR (AT_REGNUM,
+					   target_function_offset,
+					   PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+					   static_chain_offset,
+					   PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_JR (AT_REGNUM));
+      trampoline[i++] = OP (MIPS_MOVE (PIC_FUNCTION_ADDR_REGNUM, AT_REGNUM));
+    }
+  else if (ptr_mode == DImode)
+    {
+      /* It's too cumbersome to create the full 64-bit address, so let's
+	 instead use:
+
+	     move    $1, $31
+	     bal     1f
+	     nop
+	 1:  l[wd]   $25, target_function_offset - 12($31)
+	     l[wd]   $static_chain, static_chain_offset - 12($31)
+	     jr      $25
+	     move    $31, $1
+
+	where 12 is the offset of "1:" from the start of the code block.  */
+      trampoline[i++] = OP (MIPS_MOVE (AT_REGNUM, RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_BAL (1));
+      trampoline[i++] = OP (MIPS_NOP);
+      trampoline[i++] = OP (MIPS_LOAD_PTR (PIC_FUNCTION_ADDR_REGNUM,
+					   target_function_offset - 12,
+					   RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+					   static_chain_offset - 12,
+					   RETURN_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+      trampoline[i++] = OP (MIPS_MOVE (RETURN_ADDR_REGNUM, AT_REGNUM));
+    }
+  else
+    {
+      /* If the target has load delays, emit:
+
+	     lui     $1, %hi(end_addr)
+	     lw      $25, %lo(end_addr + ...)($1)
+	     lw      $static_chain, %lo(end_addr + ...)($1)
+	     jr      $25
+	     nop
+
+	 Otherwise emit:
+
+	     lui     $1, %hi(end_addr)
+	     lw      $25, %lo(end_addr + ...)($1)
+	     jr      $25
+	     lw      $static_chain, %lo(end_addr + ...)($1).  */
+
+      /* Split END_ADDR into %hi and %lo values.  Trampolines are aligned
+	 to 64 bits, so the %lo value will have the bottom 3 bits clear.  */
+      high = expand_simple_binop (SImode, PLUS, end_addr, GEN_INT (0x8000),
+				  NULL, false, OPTAB_WIDEN);
+      high = expand_simple_binop (SImode, LSHIFTRT, high, GEN_INT (16),
+				  NULL, false, OPTAB_WIDEN);
+      low = convert_to_mode (SImode, gen_lowpart (HImode, end_addr), true);
+
+      /* Emit the LUI.  */
+      opcode = OP (MIPS_LUI (AT_REGNUM, 0));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, high,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the load of the target function.  */
+      opcode = OP (MIPS_LOAD_PTR (PIC_FUNCTION_ADDR_REGNUM,
+				  target_function_offset - end_addr_offset,
+				  AT_REGNUM));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, low,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the JR here, if we can.  */
+      if (!ISA_HAS_LOAD_DELAY)
+	trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+
+      /* Emit the load of the static chain register.  */
+      opcode = OP (MIPS_LOAD_PTR (STATIC_CHAIN_REGNUM,
+				  static_chain_offset - end_addr_offset,
+				  AT_REGNUM));
+      trampoline[i++] = expand_simple_binop (SImode, IOR, opcode, low,
+					     NULL, false, OPTAB_WIDEN);
+
+      /* Emit the JR, if we couldn't above.  */
+      if (ISA_HAS_LOAD_DELAY)
+	{
+	  trampoline[i++] = OP (MIPS_JR (PIC_FUNCTION_ADDR_REGNUM));
+	  trampoline[i++] = OP (MIPS_NOP);
+	}
+    }
+
+#undef OP
+
+  /* Copy the trampoline code.  Leave any padding uninitialized.  */
+  for (j = 0; j < i; j++)
+    {
+      mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
+      mips_emit_move (mem, trampoline[j]);
+    }
+
+  /* Set up the static chain pointer field.  */
+  mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
+  mips_emit_move (mem, chain_value);
+
+  /* Set up the target function field.  */
+  mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
+  mips_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
+
+  /* Flush the code part of the trampoline.  */
+  emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
+  emit_insn (gen_clear_cache (addr, end_addr));
+}
+
+/* Implement FUNCTION_PROFILER.  */
+
+void mips_function_profiler (FILE *file)
+{
+  if (TARGET_MIPS16)
+    sorry ("mips16 function profiling");
+  if (TARGET_LONG_CALLS)
+    {
+      /* For TARGET_LONG_CALLS use $3 for the address of _mcount.  */
+      if (Pmode == DImode)
+	fprintf (file, "\tdla\t%s,_mcount\n", reg_names[3]);
+      else
+	fprintf (file, "\tla\t%s,_mcount\n", reg_names[3]);
+    }
+  mips_push_asm_switch (&mips_noat);
+  fprintf (file, "\tmove\t%s,%s\t\t# save current return address\n",
+	   reg_names[AT_REGNUM], reg_names[RETURN_ADDR_REGNUM]);
+  /* _mcount treats $2 as the static chain register.  */
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\tmove\t%s,%s\n", reg_names[2],
+	     reg_names[STATIC_CHAIN_REGNUM]);
+  if (TARGET_MCOUNT_RA_ADDRESS)
+    {
+      /* If TARGET_MCOUNT_RA_ADDRESS load $12 with the address of the
+	 ra save location.  */
+      if (cfun->machine->frame.ra_fp_offset == 0)
+	/* ra not saved, pass zero.  */
+	fprintf (file, "\tmove\t%s,%s\n", reg_names[12], reg_names[0]);
+      else
+	fprintf (file, "\t%s\t%s," HOST_WIDE_INT_PRINT_DEC "(%s)\n",
+		 Pmode == DImode ? "dla" : "la", reg_names[12],
+		 cfun->machine->frame.ra_fp_offset,
+		 reg_names[STACK_POINTER_REGNUM]);
+    }
+  if (!TARGET_NEWABI)
+    fprintf (file,
+	     "\t%s\t%s,%s,%d\t\t# _mcount pops 2 words from  stack\n",
+	     TARGET_64BIT ? "dsubu" : "subu",
+	     reg_names[STACK_POINTER_REGNUM],
+	     reg_names[STACK_POINTER_REGNUM],
+	     Pmode == DImode ? 16 : 8);
+
+  if (TARGET_LONG_CALLS)
+    fprintf (file, "\tjalr\t%s\n", reg_names[3]);
+  else
+    fprintf (file, "\tjal\t_mcount\n");
+  mips_pop_asm_switch (&mips_noat);
+  /* _mcount treats $2 as the static chain register.  */
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],
+	     reg_names[2]);
+}
+
+/* Implement TARGET_SHIFT_TRUNCATION_MASK.  We want to keep the default
+   behaviour of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even
+   when TARGET_LOONGSON_VECTORS is true.  */
+
+static unsigned HOST_WIDE_INT
+mips_shift_truncation_mask (enum machine_mode mode)
+{
+  if (TARGET_LOONGSON_VECTORS && VECTOR_MODE_P (mode))
+    return 0;
+
+  return GET_MODE_BITSIZE (mode) - 1;
+}
+
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mips_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE mips_option_optimization_table
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS mips_legitimize_address
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE mips_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE mips_output_function_epilogue
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION mips_select_rtx_section
+#undef TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION mips_function_rodata_section
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT mips_sched_init
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER mips_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 mips_sched_reorder2
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE mips_variable_issue
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST mips_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE mips_issue_rate
+#undef TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN
+#define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN mips_init_dfa_post_cycle_insn
+#undef TARGET_SCHED_DFA_POST_ADVANCE_CYCLE
+#define TARGET_SCHED_DFA_POST_ADVANCE_CYCLE mips_dfa_post_advance_cycle
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  mips_multipass_dfa_lookahead
+#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  mips_small_register_classes_for_mode_p
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (TARGET_DEFAULT				\
+   | TARGET_CPU_DEFAULT				\
+   | TARGET_ENDIAN_DEFAULT			\
+   | TARGET_FP_EXCEPTIONS_DEFAULT		\
+   | MASK_CHECK_ZERO_DIV			\
+   | MASK_FUSED_MADD)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION mips_handle_option
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL mips_function_ok_for_sibcall
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES mips_insert_attributes
+#undef TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES mips_merge_decl_attributes
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION mips_set_current_function
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE mips_valid_pointer_mode
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST mips_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST mips_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS mips_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST mips_address_cost
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P mips_in_small_data_p
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG mips_reorg
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS mips_preferred_reload_class
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START mips_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS mips_init_libfuncs
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST mips_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START mips_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR mips_gimplify_va_arg_expr
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY mips_return_in_memory
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB mips_return_in_msb
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK mips_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND mips_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS mips_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P mips_print_operand_punct_valid_p
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS mips_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING mips_strict_argument_naming
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE mips_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES mips_callee_copies
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES mips_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG mips_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE mips_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY mips_function_arg_boundary
+
+#undef TARGET_MODE_REP_EXTENDED
+#define TARGET_MODE_REP_EXTENDED mips_mode_rep_extended
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P mips_vector_mode_supported_p
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P mips_scalar_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS mips_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL mips_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN mips_expand_builtin
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM mips_cannot_force_const_mem
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO mips_encode_section_info
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE mips_attribute_table
+/* All our function attributes are related to how out-of-line copies should
+   be compiled or called.  They don't in themselves prevent inlining.  */
+#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY mips_extra_live_on_entry
+
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P mips_use_blocks_for_constant_p
+#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+#define TARGET_USE_ANCHORS_FOR_SYMBOL_P mips_use_anchors_for_symbol_p
+
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES mips_comp_type_attributes
+
+#ifdef HAVE_AS_DTPRELWORD
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL mips_output_dwarf_dtprel
+#endif
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN mips_dwarf_register_span
+
+#undef TARGET_IRA_COVER_CLASSES
+#define TARGET_IRA_COVER_CLASSES mips_ira_cover_classes
+
+#undef TARGET_ASM_FINAL_POSTSCAN_INSN
+#define TARGET_ASM_FINAL_POSTSCAN_INSN mips_final_postscan_insn
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	mips_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED mips_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE mips_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE mips_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT mips_trampoline_init
+
+#undef TARGET_ASM_OUTPUT_SOURCE_FILENAME
+#define TARGET_ASM_OUTPUT_SOURCE_FILENAME mips_output_filename
+
+#undef TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK mips_shift_truncation_mask
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-mips.h"
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
new file mode 100644
index 000000000..9600dcb28
--- /dev/null
+++ b/gcc/config/mips/mips.h
@@ -0,0 +1,2984 @@
+/* Definitions of target machine for GNU compiler.  MIPS version.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by A. Lichnewsky (lich@inria.inria.fr).
+   Changed by Michael Meissner	(meissner@osf.org).
+   64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
+   Brendan Eich (brendan@microunity.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "config/vxworks-dummy.h"
+
+#ifdef GENERATOR_FILE
+/* This is used in some insn conditions, so needs to be declared, but
+   does not need to be defined.  */
+extern int target_flags_explicit;
+#endif
+
+/* MIPS external variables defined in mips.c.  */
+
+/* Which ABI to use.  ABI_32 (original 32, or o32), ABI_N32 (n32),
+   ABI_64 (n64) are all defined by SGI.  ABI_O64 is o32 extended
+   to work on a 64-bit machine.  */
+
+#define ABI_32  0
+#define ABI_N32 1
+#define ABI_64  2
+#define ABI_EABI 3
+#define ABI_O64  4
+
+/* Masks that affect tuning.
+
+   PTF_AVOID_BRANCHLIKELY
+	Set if it is usually not profitable to use branch-likely instructions
+	for this target, typically because the branches are always predicted
+	taken and so incur a large overhead when not taken.  */
+#define PTF_AVOID_BRANCHLIKELY 0x1
+
+/* Information about one recognized processor.  Defined here for the
+   benefit of TARGET_CPU_CPP_BUILTINS.  */
+struct mips_cpu_info {
+  /* The 'canonical' name of the processor as far as GCC is concerned.
+     It's typically a manufacturer's prefix followed by a numerical
+     designation.  It should be lowercase.  */
+  const char *name;
+
+  /* The internal processor number that most closely matches this
+     entry.  Several processors can have the same value, if there's no
+     difference between them from GCC's point of view.  */
+  enum processor cpu;
+
+  /* The ISA level that the processor implements.  */
+  int isa;
+
+  /* A mask of PTF_* values.  */
+  unsigned int tune_flags;
+};
+
+/* Enumerates the setting of the -mcode-readable option.  */
+enum mips_code_readable_setting {
+  CODE_READABLE_NO,
+  CODE_READABLE_PCREL,
+  CODE_READABLE_YES
+};
+
+/* Macros to silence warnings about numbers being signed in traditional
+   C and unsigned in ISO C when compiled on 32-bit hosts.  */
+
+#define BITMASK_HIGH	(((unsigned long)1) << 31)	/* 0x80000000 */
+#define BITMASK_UPPER16	((unsigned long)0xffff << 16)	/* 0xffff0000 */
+#define BITMASK_LOWER16	((unsigned long)0xffff)		/* 0x0000ffff */
+
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* True if we are generating position-independent VxWorks RTP code.  */
+#define TARGET_RTP_PIC (TARGET_VXWORKS_RTP && flag_pic)
+
+/* True if the output file is marked as ".abicalls; .option pic0"
+   (-call_nonpic).  */
+#define TARGET_ABICALLS_PIC0 \
+  (TARGET_ABSOLUTE_ABICALLS && TARGET_PLT)
+
+/* True if the output file is marked as ".abicalls; .option pic2" (-KPIC).  */
+#define TARGET_ABICALLS_PIC2 \
+  (TARGET_ABICALLS && !TARGET_ABICALLS_PIC0)
+
+/* True if the call patterns should be split into a jalr followed by
+   an instruction to restore $gp.  It is only safe to split the load
+   from the call when every use of $gp is explicit.
+
+   See mips_must_initialize_gp_p for details about how we manage the
+   global pointer.  */
+
+#define TARGET_SPLIT_CALLS \
+  (TARGET_EXPLICIT_RELOCS && TARGET_CALL_CLOBBERED_GP && epilogue_completed)
+
+/* True if we're generating a form of -mabicalls in which we can use
+   operators like %hi and %lo to refer to locally-binding symbols.
+   We can only do this for -mno-shared, and only then if we can use
+   relocation operations instead of assembly macros.  It isn't really
+   worth using absolute sequences for 64-bit symbols because GOT
+   accesses are so much shorter.  */
+
+#define TARGET_ABSOLUTE_ABICALLS	\
+  (TARGET_ABICALLS			\
+   && !TARGET_SHARED			\
+   && TARGET_EXPLICIT_RELOCS		\
+   && !ABI_HAS_64BIT_SYMBOLS)
+
+/* True if we can optimize sibling calls.  For simplicity, we only
+   handle cases in which call_insn_operand will reject invalid
+   sibcall addresses.  There are two cases in which this isn't true:
+
+      - TARGET_MIPS16.  call_insn_operand accepts constant addresses
+	but there is no direct jump instruction.  It isn't worth
+	using sibling calls in this case anyway; they would usually
+	be longer than normal calls.
+
+      - TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS.  call_insn_operand
+	accepts global constants, but all sibcalls must be indirect.  */
+#define TARGET_SIBCALLS \
+  (!TARGET_MIPS16 && (!TARGET_USE_GOT || TARGET_EXPLICIT_RELOCS))
+
+/* True if we need to use a global offset table to access some symbols.  */
+#define TARGET_USE_GOT (TARGET_ABICALLS || TARGET_RTP_PIC)
+
+/* True if TARGET_USE_GOT and if $gp is a call-clobbered register.  */
+#define TARGET_CALL_CLOBBERED_GP (TARGET_ABICALLS && TARGET_OLDABI)
+
+/* True if TARGET_USE_GOT and if $gp is a call-saved register.  */
+#define TARGET_CALL_SAVED_GP (TARGET_USE_GOT && !TARGET_CALL_CLOBBERED_GP)
+
+/* True if we should use .cprestore to store to the cprestore slot.
+
+   We continue to use .cprestore for explicit-reloc code so that JALs
+   inside inline asms will work correctly.  */
+#define TARGET_CPRESTORE_DIRECTIVE \
+  (TARGET_ABICALLS_PIC2 && !TARGET_MIPS16)
+
+/* True if we can use the J and JAL instructions.  */
+#define TARGET_ABSOLUTE_JUMPS \
+  (!flag_pic || TARGET_ABSOLUTE_ABICALLS)
+
+/* True if indirect calls must use register class PIC_FN_ADDR_REG.
+   This is true for both the PIC and non-PIC VxWorks RTP modes.  */
+#define TARGET_USE_PIC_FN_ADDR_REG (TARGET_ABICALLS || TARGET_VXWORKS_RTP)
+
+/* True if .gpword or .gpdword should be used for switch tables.
+
+   Although GAS does understand .gpdword, the SGI linker mishandles
+   the relocations GAS generates (R_MIPS_GPREL32 followed by R_MIPS_64).
+   We therefore disable GP-relative switch tables for n64 on IRIX targets.  */
+#define TARGET_GPWORD				\
+  (TARGET_ABICALLS				\
+   && !TARGET_ABSOLUTE_ABICALLS			\
+   && !(mips_abi == ABI_64 && TARGET_IRIX6))
+
+/* True if the output must have a writable .eh_frame.
+   See ASM_PREFERRED_EH_DATA_FORMAT for details.  */
+#ifdef HAVE_LD_PERSONALITY_RELAXATION
+#define TARGET_WRITABLE_EH_FRAME 0
+#else
+#define TARGET_WRITABLE_EH_FRAME (flag_pic && TARGET_SHARED)
+#endif
+
+/* Test the assembler to set ISA_HAS_DSP_MULT to DSP Rev 1 or 2.  */
+#ifdef HAVE_AS_DSPR1_MULT
+#define ISA_HAS_DSP_MULT ISA_HAS_DSP
+#else
+#define ISA_HAS_DSP_MULT ISA_HAS_DSPR2
+#endif
+
+/* Generate mips16 code */
+#define TARGET_MIPS16		((target_flags & MASK_MIPS16) != 0)
+/* Generate mips16e code. Default 16bit ASE for mips32* and mips64* */
+#define GENERATE_MIPS16E	(TARGET_MIPS16 && mips_isa >= 32)
+/* Generate mips16e register save/restore sequences.  */
+#define GENERATE_MIPS16E_SAVE_RESTORE (GENERATE_MIPS16E && mips_abi == ABI_32)
+
+/* True if we're generating a form of MIPS16 code in which general
+   text loads are allowed.  */
+#define TARGET_MIPS16_TEXT_LOADS \
+  (TARGET_MIPS16 && mips_code_readable == CODE_READABLE_YES)
+
+/* True if we're generating a form of MIPS16 code in which PC-relative
+   loads are allowed.  */
+#define TARGET_MIPS16_PCREL_LOADS \
+  (TARGET_MIPS16 && mips_code_readable >= CODE_READABLE_PCREL)
+
+/* Generic ISA defines.  */
+#define ISA_MIPS1		    (mips_isa == 1)
+#define ISA_MIPS2		    (mips_isa == 2)
+#define ISA_MIPS3                   (mips_isa == 3)
+#define ISA_MIPS4		    (mips_isa == 4)
+#define ISA_MIPS32		    (mips_isa == 32)
+#define ISA_MIPS32R2		    (mips_isa == 33)
+#define ISA_MIPS64                  (mips_isa == 64)
+#define ISA_MIPS64R2		    (mips_isa == 65)
+
+/* Architecture target defines.  */
+#define TARGET_LOONGSON_2E          (mips_arch == PROCESSOR_LOONGSON_2E)
+#define TARGET_LOONGSON_2F          (mips_arch == PROCESSOR_LOONGSON_2F)
+#define TARGET_LOONGSON_2EF         (TARGET_LOONGSON_2E || TARGET_LOONGSON_2F)
+#define TARGET_LOONGSON_3A          (mips_arch == PROCESSOR_LOONGSON_3A)
+#define TARGET_MIPS3900             (mips_arch == PROCESSOR_R3900)
+#define TARGET_MIPS4000             (mips_arch == PROCESSOR_R4000)
+#define TARGET_MIPS4120             (mips_arch == PROCESSOR_R4120)
+#define TARGET_MIPS4130             (mips_arch == PROCESSOR_R4130)
+#define TARGET_MIPS5400             (mips_arch == PROCESSOR_R5400)
+#define TARGET_MIPS5500             (mips_arch == PROCESSOR_R5500)
+#define TARGET_MIPS7000             (mips_arch == PROCESSOR_R7000)
+#define TARGET_MIPS9000             (mips_arch == PROCESSOR_R9000)
+#define TARGET_OCTEON		    (mips_arch == PROCESSOR_OCTEON)
+#define TARGET_SB1                  (mips_arch == PROCESSOR_SB1		\
+				     || mips_arch == PROCESSOR_SB1A)
+#define TARGET_SR71K                (mips_arch == PROCESSOR_SR71000)
+
+/* Scheduling target defines.  */
+#define TUNE_20KC		    (mips_tune == PROCESSOR_20KC)
+#define TUNE_24K		    (mips_tune == PROCESSOR_24KC	\
+				     || mips_tune == PROCESSOR_24KF2_1	\
+				     || mips_tune == PROCESSOR_24KF1_1)
+#define TUNE_74K                    (mips_tune == PROCESSOR_74KC	\
+				     || mips_tune == PROCESSOR_74KF2_1	\
+				     || mips_tune == PROCESSOR_74KF1_1  \
+				     || mips_tune == PROCESSOR_74KF3_2)
+#define TUNE_LOONGSON_2EF           (mips_tune == PROCESSOR_LOONGSON_2E	\
+				     || mips_tune == PROCESSOR_LOONGSON_2F)
+#define TUNE_LOONGSON_3A            (mips_tune == PROCESSOR_LOONGSON_3A)
+#define TUNE_MIPS3000               (mips_tune == PROCESSOR_R3000)
+#define TUNE_MIPS3900               (mips_tune == PROCESSOR_R3900)
+#define TUNE_MIPS4000               (mips_tune == PROCESSOR_R4000)
+#define TUNE_MIPS4120               (mips_tune == PROCESSOR_R4120)
+#define TUNE_MIPS4130               (mips_tune == PROCESSOR_R4130)
+#define TUNE_MIPS5000               (mips_tune == PROCESSOR_R5000)
+#define TUNE_MIPS5400               (mips_tune == PROCESSOR_R5400)
+#define TUNE_MIPS5500               (mips_tune == PROCESSOR_R5500)
+#define TUNE_MIPS6000               (mips_tune == PROCESSOR_R6000)
+#define TUNE_MIPS7000               (mips_tune == PROCESSOR_R7000)
+#define TUNE_MIPS9000               (mips_tune == PROCESSOR_R9000)
+#define TUNE_OCTEON		    (mips_tune == PROCESSOR_OCTEON)
+#define TUNE_SB1                    (mips_tune == PROCESSOR_SB1		\
+				     || mips_tune == PROCESSOR_SB1A)
+
+/* Whether vector modes and intrinsics for ST Microelectronics
+   Loongson-2E/2F processors should be enabled.  In o32 pairs of
+   floating-point registers provide 64-bit values.  */
+#define TARGET_LOONGSON_VECTORS	    (TARGET_HARD_FLOAT_ABI		\
+				     && (TARGET_LOONGSON_2EF		\
+					 || TARGET_LOONGSON_3A))
+
+/* True if the pre-reload scheduler should try to create chains of
+   multiply-add or multiply-subtract instructions.  For example,
+   suppose we have:
+
+	t1 = a * b
+	t2 = t1 + c * d
+	t3 = e * f
+	t4 = t3 - g * h
+
+   t1 will have a higher priority than t2 and t3 will have a higher
+   priority than t4.  However, before reload, there is no dependence
+   between t1 and t3, and they can often have similar priorities.
+   The scheduler will then tend to prefer:
+
+	t1 = a * b
+	t3 = e * f
+	t2 = t1 + c * d
+	t4 = t3 - g * h
+
+   which stops us from making full use of macc/madd-style instructions.
+   This sort of situation occurs frequently in Fourier transforms and
+   in unrolled loops.
+
+   To counter this, the TUNE_MACC_CHAINS code will reorder the ready
+   queue so that chained multiply-add and multiply-subtract instructions
+   appear ahead of any other instruction that is likely to clobber lo.
+   In the example above, if t2 and t3 become ready at the same time,
+   the code ensures that t2 is scheduled first.
+
+   Multiply-accumulate instructions are a bigger win for some targets
+   than others, so this macro is defined on an opt-in basis.  */
+#define TUNE_MACC_CHAINS	    (TUNE_MIPS5500		\
+				     || TUNE_MIPS4120		\
+				     || TUNE_MIPS4130		\
+				     || TUNE_24K)
+
+#define TARGET_OLDABI		    (mips_abi == ABI_32 || mips_abi == ABI_O64)
+#define TARGET_NEWABI		    (mips_abi == ABI_N32 || mips_abi == ABI_64)
+
+/* TARGET_HARD_FLOAT and TARGET_SOFT_FLOAT reflect whether the FPU is
+   directly accessible, while the command-line options select
+   TARGET_HARD_FLOAT_ABI and TARGET_SOFT_FLOAT_ABI to reflect the ABI
+   in use.  */
+#define TARGET_HARD_FLOAT (TARGET_HARD_FLOAT_ABI && !TARGET_MIPS16)
+#define TARGET_SOFT_FLOAT (TARGET_SOFT_FLOAT_ABI || TARGET_MIPS16)
+
+/* False if SC acts as a memory barrier with respect to itself,
+   otherwise a SYNC will be emitted after SC for atomic operations
+   that require ordering between the SC and following loads and
+   stores.  It does not tell anything about ordering of loads and
+   stores prior to and following the SC, only about the SC itself and
+   those loads and stores follow it.  */
+#define TARGET_SYNC_AFTER_SC (!TARGET_OCTEON)
+
+/* IRIX specific stuff.  */
+#define TARGET_IRIX6	   0
+
+/* Define preprocessor macros for the -march and -mtune options.
+   PREFIX is either _MIPS_ARCH or _MIPS_TUNE, INFO is the selected
+   processor.  If INFO's canonical name is "foo", define PREFIX to
+   be "foo", and define an additional macro PREFIX_FOO.  */
+#define MIPS_CPP_SET_PROCESSOR(PREFIX, INFO)			\
+  do								\
+    {								\
+      char *macro, *p;						\
+								\
+      macro = concat ((PREFIX), "_", (INFO)->name, NULL);	\
+      for (p = macro; *p != 0; p++)				\
+	*p = TOUPPER (*p);					\
+								\
+      builtin_define (macro);					\
+      builtin_define_with_value ((PREFIX), (INFO)->name, 1);	\
+      free (macro);						\
+    }								\
+  while (0)
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do									\
+    {									\
+      /* Everyone but IRIX defines this to mips.  */            	\
+      if (!TARGET_IRIX6)                                         	\
+	builtin_assert ("machine=mips");                        	\
+									\
+      builtin_assert ("cpu=mips");					\
+      builtin_define ("__mips__");     					\
+      builtin_define ("_mips");						\
+									\
+      /* We do this here because __mips is defined below and so we	\
+	 can't use builtin_define_std.  We don't ever want to define	\
+	 "mips" for VxWorks because some of the VxWorks headers		\
+	 construct include filenames from a root directory macro,	\
+	 an architecture macro and a filename, where the architecture	\
+	 macro expands to 'mips'.  If we define 'mips' to 1, the	\
+	 architecture macro expands to 1 as well.  */			\
+      if (!flag_iso && !TARGET_VXWORKS)					\
+	builtin_define ("mips");					\
+									\
+      if (TARGET_64BIT)							\
+	builtin_define ("__mips64");					\
+									\
+      if (!TARGET_IRIX6)						\
+	{								\
+	  /* Treat _R3000 and _R4000 like register-size			\
+	     defines, which is how they've historically			\
+	     been used.  */						\
+	  if (TARGET_64BIT)						\
+	    {								\
+	      builtin_define_std ("R4000");				\
+	      builtin_define ("_R4000");				\
+	    }								\
+	  else								\
+	    {								\
+	      builtin_define_std ("R3000");				\
+	      builtin_define ("_R3000");				\
+	    }								\
+	}								\
+      if (TARGET_FLOAT64)						\
+	builtin_define ("__mips_fpr=64");				\
+      else								\
+	builtin_define ("__mips_fpr=32");				\
+									\
+      if (mips_base_mips16)						\
+	builtin_define ("__mips16");					\
+									\
+      if (TARGET_MIPS3D)						\
+	builtin_define ("__mips3d");					\
+									\
+      if (TARGET_SMARTMIPS)						\
+	builtin_define ("__mips_smartmips");				\
+									\
+      if (TARGET_DSP)							\
+	{								\
+	  builtin_define ("__mips_dsp");				\
+	  if (TARGET_DSPR2)						\
+	    {								\
+	      builtin_define ("__mips_dspr2");				\
+	      builtin_define ("__mips_dsp_rev=2");			\
+	    }								\
+	  else								\
+	    builtin_define ("__mips_dsp_rev=1");			\
+	}								\
+									\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_ARCH", mips_arch_info);		\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_TUNE", mips_tune_info);		\
+									\
+      if (ISA_MIPS1)							\
+	{								\
+	  builtin_define ("__mips=1");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS1");			\
+	}								\
+      else if (ISA_MIPS2)						\
+	{								\
+	  builtin_define ("__mips=2");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS2");			\
+	}								\
+      else if (ISA_MIPS3)						\
+	{								\
+	  builtin_define ("__mips=3");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS3");			\
+	}								\
+      else if (ISA_MIPS4)						\
+	{								\
+	  builtin_define ("__mips=4");					\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS4");			\
+	}								\
+      else if (ISA_MIPS32)						\
+	{								\
+	  builtin_define ("__mips=32");					\
+	  builtin_define ("__mips_isa_rev=1");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS32");		\
+	}								\
+      else if (ISA_MIPS32R2)						\
+	{								\
+	  builtin_define ("__mips=32");					\
+	  builtin_define ("__mips_isa_rev=2");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS32");		\
+	}								\
+      else if (ISA_MIPS64)						\
+	{								\
+	  builtin_define ("__mips=64");					\
+	  builtin_define ("__mips_isa_rev=1");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS64");		\
+	}								\
+      else if (ISA_MIPS64R2)						\
+	{								\
+	  builtin_define ("__mips=64");					\
+	  builtin_define ("__mips_isa_rev=2");				\
+	  builtin_define ("_MIPS_ISA=_MIPS_ISA_MIPS64");		\
+	}								\
+									\
+      switch (mips_abi)							\
+	{								\
+	case ABI_32:							\
+	  builtin_define ("_ABIO32=1");					\
+	  builtin_define ("_MIPS_SIM=_ABIO32");				\
+	  break;							\
+									\
+	case ABI_N32:							\
+	  builtin_define ("_ABIN32=2");					\
+	  builtin_define ("_MIPS_SIM=_ABIN32");				\
+	  break;							\
+									\
+	case ABI_64:							\
+	  builtin_define ("_ABI64=3");					\
+	  builtin_define ("_MIPS_SIM=_ABI64");				\
+	  break;							\
+									\
+	case ABI_O64:							\
+	  builtin_define ("_ABIO64=4");					\
+	  builtin_define ("_MIPS_SIM=_ABIO64");				\
+	  break;							\
+	}								\
+									\
+      builtin_define_with_int_value ("_MIPS_SZINT", INT_TYPE_SIZE);	\
+      builtin_define_with_int_value ("_MIPS_SZLONG", LONG_TYPE_SIZE);	\
+      builtin_define_with_int_value ("_MIPS_SZPTR", POINTER_SIZE);	\
+      builtin_define_with_int_value ("_MIPS_FPSET",			\
+				     32 / MAX_FPRS_PER_FMT);		\
+									\
+      /* These defines reflect the ABI in use, not whether the  	\
+	 FPU is directly accessible.  */				\
+      if (TARGET_NO_FLOAT)						\
+	builtin_define ("__mips_no_float");				\
+      else if (TARGET_HARD_FLOAT_ABI)					\
+	builtin_define ("__mips_hard_float");				\
+      else								\
+	builtin_define ("__mips_soft_float");				\
+									\
+      if (TARGET_SINGLE_FLOAT)						\
+	builtin_define ("__mips_single_float");				\
+									\
+      if (TARGET_PAIRED_SINGLE_FLOAT)					\
+	builtin_define ("__mips_paired_single_float");			\
+									\
+      if (TARGET_BIG_ENDIAN)						\
+	{								\
+	  builtin_define_std ("MIPSEB");				\
+	  builtin_define ("_MIPSEB");					\
+	}								\
+      else								\
+	{								\
+	  builtin_define_std ("MIPSEL");				\
+	  builtin_define ("_MIPSEL");					\
+	}								\
+                                                                        \
+      /* Whether calls should go through $25.  The separate __PIC__	\
+	 macro indicates whether abicalls code might use a GOT.  */	\
+      if (TARGET_ABICALLS)						\
+	builtin_define ("__mips_abicalls");				\
+									\
+      /* Whether Loongson vector modes are enabled.  */                 \
+      if (TARGET_LOONGSON_VECTORS)					\
+        builtin_define ("__mips_loongson_vector_rev");                  \
+									\
+      /* Historical Octeon macro.  */					\
+      if (TARGET_OCTEON)						\
+	builtin_define ("__OCTEON__");					\
+									\
+      /* Macros dependent on the C dialect.  */				\
+      if (preprocessing_asm_p ())					\
+	{								\
+	  builtin_define_std ("LANGUAGE_ASSEMBLY");			\
+	  builtin_define ("_LANGUAGE_ASSEMBLY");			\
+	}								\
+      else if (c_dialect_cxx ())					\
+	{								\
+	  builtin_define ("_LANGUAGE_C_PLUS_PLUS");			\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS");			\
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS__");			\
+	}								\
+      else								\
+	{								\
+	  builtin_define_std ("LANGUAGE_C");				\
+	  builtin_define ("_LANGUAGE_C");				\
+	}								\
+      if (c_dialect_objc ())						\
+	{								\
+	  builtin_define ("_LANGUAGE_OBJECTIVE_C");			\
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C");			\
+	  /* Bizarre, but needed at least for Irix.  */			\
+	  builtin_define_std ("LANGUAGE_C");				\
+	  builtin_define ("_LANGUAGE_C");				\
+	}								\
+									\
+      if (mips_abi == ABI_EABI)						\
+	builtin_define ("__mips_eabi");					\
+									\
+      if (TARGET_CACHE_BUILTIN)						\
+	builtin_define ("__GCC_HAVE_BUILTIN_MIPS_CACHE");		\
+    }									\
+  while (0)
+
+/* Default target_flags if no switches are specified  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT MASK_BIG_ENDIAN
+#endif
+
+#ifndef TARGET_FP_EXCEPTIONS_DEFAULT
+#define TARGET_FP_EXCEPTIONS_DEFAULT MASK_FP_EXCEPTIONS
+#endif
+
+/* 'from-abi' makes a good default: you get whatever the ABI requires.  */
+#ifndef MIPS_ISA_DEFAULT
+#ifndef MIPS_CPU_STRING_DEFAULT
+#define MIPS_CPU_STRING_DEFAULT "from-abi"
+#endif
+#endif
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+/* Make this compile time constant for libgcc2 */
+#ifdef __mips64
+#define TARGET_64BIT		1
+#else
+#define TARGET_64BIT		0
+#endif
+#endif /* IN_LIBGCC2 */
+
+/* Force the call stack unwinders in unwind.inc not to be MIPS16 code
+   when compiled with hardware floating point.  This is because MIPS16
+   code cannot save and restore the floating-point registers, which is
+   important if in a mixed MIPS16/non-MIPS16 environment.  */
+
+#ifdef IN_LIBGCC2
+#if __mips_hard_float
+#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((__nomips16__))
+#endif
+#endif /* IN_LIBGCC2 */
+
+#define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+
+#ifndef MULTILIB_ENDIAN_DEFAULT
+#if TARGET_ENDIAN_DEFAULT == 0
+#define MULTILIB_ENDIAN_DEFAULT "EL"
+#else
+#define MULTILIB_ENDIAN_DEFAULT "EB"
+#endif
+#endif
+
+#ifndef MULTILIB_ISA_DEFAULT
+#  if MIPS_ISA_DEFAULT == 1
+#    define MULTILIB_ISA_DEFAULT "mips1"
+#  else
+#    if MIPS_ISA_DEFAULT == 2
+#      define MULTILIB_ISA_DEFAULT "mips2"
+#    else
+#      if MIPS_ISA_DEFAULT == 3
+#        define MULTILIB_ISA_DEFAULT "mips3"
+#      else
+#        if MIPS_ISA_DEFAULT == 4
+#          define MULTILIB_ISA_DEFAULT "mips4"
+#        else
+#          if MIPS_ISA_DEFAULT == 32
+#            define MULTILIB_ISA_DEFAULT "mips32"
+#          else
+#            if MIPS_ISA_DEFAULT == 33
+#              define MULTILIB_ISA_DEFAULT "mips32r2"
+#            else
+#              if MIPS_ISA_DEFAULT == 64
+#                define MULTILIB_ISA_DEFAULT "mips64"
+#              else
+#		 if MIPS_ISA_DEFAULT == 65
+#		   define MULTILIB_ISA_DEFAULT "mips64r2"
+#	         else
+#                  define MULTILIB_ISA_DEFAULT "mips1"
+#		 endif
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifndef MIPS_ABI_DEFAULT
+#define MIPS_ABI_DEFAULT ABI_32
+#endif
+
+/* Use the most portable ABI flag for the ASM specs.  */
+
+#if MIPS_ABI_DEFAULT == ABI_32
+#define MULTILIB_ABI_DEFAULT "mabi=32"
+#endif
+
+#if MIPS_ABI_DEFAULT == ABI_O64
+#define MULTILIB_ABI_DEFAULT "mabi=o64"
+#endif
+
+#if MIPS_ABI_DEFAULT == ABI_N32
+#define MULTILIB_ABI_DEFAULT "mabi=n32"
+#endif
+
+#if MIPS_ABI_DEFAULT == ABI_64
+#define MULTILIB_ABI_DEFAULT "mabi=64"
+#endif
+
+#if MIPS_ABI_DEFAULT == ABI_EABI
+#define MULTILIB_ABI_DEFAULT "mabi=eabi"
+#endif
+
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+    { MULTILIB_ENDIAN_DEFAULT, MULTILIB_ISA_DEFAULT, MULTILIB_ABI_DEFAULT }
+#endif
+
+/* We must pass -EL to the linker by default for little endian embedded
+   targets using linker scripts with a OUTPUT_FORMAT line.  Otherwise, the
+   linker will default to using big-endian output files.  The OUTPUT_FORMAT
+   line must be in the linker script, otherwise -EB/-EL will not work.  */
+
+#ifndef ENDIAN_SPEC
+#if TARGET_ENDIAN_DEFAULT == 0
+#define ENDIAN_SPEC "%{!EB:%{!meb:-EL}} %{EB|meb:-EB}"
+#else
+#define ENDIAN_SPEC "%{!EL:%{!mel:-EB}} %{EL|mel:-EL}"
+#endif
+#endif
+
+/* A spec condition that matches all non-mips16 -mips arguments.  */
+
+#define MIPS_ISA_LEVEL_OPTION_SPEC \
+  "mips1|mips2|mips3|mips4|mips32*|mips64*"
+
+/* A spec condition that matches all non-mips16 architecture arguments.  */
+
+#define MIPS_ARCH_OPTION_SPEC \
+  MIPS_ISA_LEVEL_OPTION_SPEC "|march=*"
+
+/* A spec that infers a -mips argument from an -march argument,
+   or injects the default if no architecture is specified.  */
+
+#define MIPS_ISA_LEVEL_SPEC \
+  "%{" MIPS_ISA_LEVEL_OPTION_SPEC ":;: \
+     %{march=mips1|march=r2000|march=r3000|march=r3900:-mips1} \
+     %{march=mips2|march=r6000:-mips2} \
+     %{march=mips3|march=r4*|march=vr4*|march=orion|march=loongson2*:-mips3} \
+     %{march=mips4|march=r8000|march=vr5*|march=rm7000|march=rm9000 \
+       |march=r10000|march=r12000|march=r14000|march=r16000:-mips4} \
+     %{march=mips32|march=4kc|march=4km|march=4kp|march=4ksc:-mips32} \
+     %{march=mips32r2|march=m4k|march=4ke*|march=4ksd|march=24k* \
+       |march=34k*|march=74k*|march=1004k*: -mips32r2} \
+     %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000 \
+       |march=xlr|march=loongson3a: -mips64} \
+     %{march=mips64r2|march=octeon: -mips64r2} \
+     %{!march=*: -" MULTILIB_ISA_DEFAULT "}}"
+
+/* A spec that infers a -mhard-float or -msoft-float setting from an
+   -march argument.  Note that soft-float and hard-float code are not
+   link-compatible.  */
+
+#define MIPS_ARCH_FLOAT_SPEC \
+  "%{mhard-float|msoft-float|march=mips*:; \
+     march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \
+     |march=34kc|march=74kc|march=1004kc|march=5kc \
+     |march=octeon|march=xlr: -msoft-float;		  \
+     march=*: -mhard-float}"
+
+/* A spec condition that matches 32-bit options.  It only works if
+   MIPS_ISA_LEVEL_SPEC has been applied.  */
+
+#define MIPS_32BIT_OPTION_SPEC \
+  "mips1|mips2|mips32*|mgp32"
+
+#if MIPS_ABI_DEFAULT == ABI_O64 \
+  || MIPS_ABI_DEFAULT == ABI_N32 \
+  || MIPS_ABI_DEFAULT == ABI_64
+#define OPT_ARCH64 "mabi=32|mgp32:;"
+#define OPT_ARCH32 "mabi=32|mgp32"
+#else
+#define OPT_ARCH64 "mabi=o64|mabi=n32|mabi=64|mgp64"
+#define OPT_ARCH32 "mabi=o64|mabi=n32|mabi=64|mgp64:;"
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-arch is ignored if -march is specified or a -mips is specified
+     (other than -mips16); likewise --with-arch-32 and --with-arch-64.
+   --with-tune is ignored if -mtune is specified; likewise
+     --with-tune-32 and --with-tune-64.
+   --with-abi is ignored if -mabi is specified.
+   --with-float is ignored if -mhard-float or -msoft-float are
+     specified.
+   --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
+     specified. */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}" }, \
+  {"arch_32", "%{" OPT_ARCH32 ":%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+  {"arch_64", "%{" OPT_ARCH64 ":%{" MIPS_ARCH_OPTION_SPEC ":;: -march=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }, \
+  {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \
+  {"llsc", "%{!mllsc:%{!mno-llsc:-m%(VALUE)}}" }, \
+  {"mips-plt", "%{!mplt:%{!mno-plt:-m%(VALUE)}}" }, \
+  {"synci", "%{!msynci:%{!mno-synci:-m%(VALUE)}}" }
+
+
+/* A spec that infers the -mdsp setting from an -march argument.  */
+#define BASE_DRIVER_SELF_SPECS \
+  "%{!mno-dsp: \
+     %{march=24ke*|march=34k*|march=1004k*: -mdsp} \
+     %{march=74k*:%{!mno-dspr2: -mdspr2 -mdsp}}}"
+
+#define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS
+
+#define GENERATE_DIVIDE_TRAPS (TARGET_DIVIDE_TRAPS \
+                               && ISA_HAS_COND_TRAP)
+
+#define GENERATE_BRANCHLIKELY   (TARGET_BRANCHLIKELY && !TARGET_MIPS16)
+
+/* True if the ABI can only work with 64-bit integer registers.  We
+   generally allow ad-hoc variations for TARGET_SINGLE_FLOAT, but
+   otherwise floating-point registers must also be 64-bit.  */
+#define ABI_NEEDS_64BIT_REGS	(TARGET_NEWABI || mips_abi == ABI_O64)
+
+/* Likewise for 32-bit regs.  */
+#define ABI_NEEDS_32BIT_REGS	(mips_abi == ABI_32)
+
+/* True if the file format uses 64-bit symbols.  At present, this is
+   only true for n64, which uses 64-bit ELF.  */
+#define FILE_HAS_64BIT_SYMBOLS	(mips_abi == ABI_64)
+
+/* True if symbols are 64 bits wide.  This is usually determined by
+   the ABI's file format, but it can be overridden by -msym32.  Note that
+   overriding the size with -msym32 changes the ABI of relocatable objects,
+   although it doesn't change the ABI of a fully-linked object.  */
+#define ABI_HAS_64BIT_SYMBOLS	(FILE_HAS_64BIT_SYMBOLS && !TARGET_SYM32)
+
+/* ISA has instructions for managing 64-bit fp and gp regs (e.g. mips3).  */
+#define ISA_HAS_64BIT_REGS	(ISA_MIPS3				\
+				 || ISA_MIPS4				\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2)
+
+/* ISA has branch likely instructions (e.g. mips2).  */
+/* Disable branchlikely for tx39 until compare rewrite.  They haven't
+   been generated up to this point.  */
+#define ISA_HAS_BRANCHLIKELY	(!ISA_MIPS1)
+
+/* ISA has a three-operand multiplication instruction (usually spelt "mul").  */
+#define ISA_HAS_MUL3		((TARGET_MIPS3900                       \
+				  || TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_MIPS7000			\
+				  || TARGET_MIPS9000			\
+				  || TARGET_MAD				\
+				  || ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has a three-operand multiplication instruction.  */
+#define ISA_HAS_DMUL3		(TARGET_64BIT				\
+				 && TARGET_OCTEON			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the floating-point conditional move instructions introduced
+   in mips4.  */
+#define ISA_HAS_FP_CONDMOVE	((ISA_MIPS4				\
+				  || ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS5500			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the integer conditional move instructions introduced in mips4 and
+   ST Loongson 2E/2F.  */
+#define ISA_HAS_CONDMOVE        (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF)
+
+/* ISA has LDC1 and SDC1.  */
+#define ISA_HAS_LDC1_SDC1	(!ISA_MIPS1 && !TARGET_MIPS16)
+
+/* ISA has the mips4 FP condition code instructions: FP-compare to CC,
+   branch on CC, and move (both FP and non-FP) on CC.  */
+#define ISA_HAS_8CC		(ISA_MIPS4				\
+				 || ISA_MIPS32				\
+				 || ISA_MIPS32R2			\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2)
+
+/* This is a catch all for other mips4 instructions: indexed load, the
+   FP madd and msub instructions, and the FP recip and recip sqrt
+   instructions.  */
+#define ISA_HAS_FP4		((ISA_MIPS4				\
+				  || (ISA_MIPS32R2 && TARGET_FLOAT64)   \
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has paired-single instructions.  */
+#define ISA_HAS_PAIRED_SINGLE	(ISA_MIPS32R2 || ISA_MIPS64 || ISA_MIPS64R2)
+
+/* ISA has conditional trap instructions.  */
+#define ISA_HAS_COND_TRAP	(!ISA_MIPS1				\
+				 && !TARGET_MIPS16)
+
+/* ISA has integer multiply-accumulate instructions, madd and msub.  */
+#define ISA_HAS_MADD_MSUB	((ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* Integer multiply-accumulate instructions should be generated.  */
+#define GENERATE_MADD_MSUB      (ISA_HAS_MADD_MSUB && !TUNE_74K)
+
+/* ISA has floating-point madd and msub instructions 'd = a * b [+-] c'.  */
+#define ISA_HAS_FP_MADD4_MSUB4  ISA_HAS_FP4
+
+/* ISA has floating-point madd and msub instructions 'c = a * b [+-] c'.  */
+#define ISA_HAS_FP_MADD3_MSUB3  TARGET_LOONGSON_2EF
+
+/* ISA has floating-point nmadd and nmsub instructions
+   'd = -((a * b) [+-] c)'.  */
+#define ISA_HAS_NMADD4_NMSUB4(MODE)					\
+				((ISA_MIPS4				\
+				  || (ISA_MIPS32R2 && (MODE) == V2SFmode) \
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && (!TARGET_MIPS5400 || TARGET_MAD)	\
+				 && !TARGET_MIPS16)
+
+/* ISA has floating-point nmadd and nmsub instructions
+   'c = -((a * b) [+-] c)'.  */
+#define ISA_HAS_NMADD3_NMSUB3(MODE)					\
+                                TARGET_LOONGSON_2EF
+
+/* ISA has count leading zeroes/ones instruction (not implemented).  */
+#define ISA_HAS_CLZ_CLO		((ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that put
+   the high part in an accumulator: mulhi or mulhiu.  */
+#define ISA_HAS_MULHI		((TARGET_MIPS5400			 \
+				  || TARGET_MIPS5500			 \
+				  || TARGET_SR71K)			 \
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that
+   negates the result and puts the result in an accumulator.  */
+#define ISA_HAS_MULS		((TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that subtracts the
+   result from a 4th operand and puts the result in an accumulator.  */
+#define ISA_HAS_MSAC		((TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has three operand multiply instructions that  the result
+   from a 4th operand and puts the result in an accumulator.  */
+#define ISA_HAS_MACC		((TARGET_MIPS4120			\
+				  || TARGET_MIPS4130			\
+				  || TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has NEC VR-style MACC, MACCHI, DMACC and DMACCHI instructions.  */
+#define ISA_HAS_MACCHI		((TARGET_MIPS4120			\
+				  || TARGET_MIPS4130)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has the "ror" (rotate right) instructions.  */
+#define ISA_HAS_ROR		((ISA_MIPS32R2				\
+				  || ISA_MIPS64R2			\
+				  || TARGET_MIPS5400			\
+				  || TARGET_MIPS5500			\
+				  || TARGET_SR71K			\
+				  || TARGET_SMARTMIPS)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has data prefetch instructions.  This controls use of 'pref'.  */
+#define ISA_HAS_PREFETCH	((ISA_MIPS4				\
+				  || TARGET_LOONGSON_2EF		\
+				  || ISA_MIPS32				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* ISA has data indexed prefetch instructions.  This controls use of
+   'prefx', along with TARGET_HARD_FLOAT and TARGET_DOUBLE_FLOAT.
+   (prefx is a cop1x instruction, so can only be used if FP is
+   enabled.)  */
+#define ISA_HAS_PREFETCHX	((ISA_MIPS4				\
+				  || ISA_MIPS32R2			\
+				  || ISA_MIPS64				\
+				  || ISA_MIPS64R2)			\
+				 && !TARGET_MIPS16)
+
+/* True if trunc.w.s and trunc.w.d are real (not synthetic)
+   instructions.  Both require TARGET_HARD_FLOAT, and trunc.w.d
+   also requires TARGET_DOUBLE_FLOAT.  */
+#define ISA_HAS_TRUNC_W		(!ISA_MIPS1)
+
+/* ISA includes the MIPS32r2 seb and seh instructions.  */
+#define ISA_HAS_SEB_SEH		((ISA_MIPS32R2		\
+				  || ISA_MIPS64R2)	\
+				 && !TARGET_MIPS16)
+
+/* ISA includes the MIPS32/64 rev 2 ext and ins instructions.  */
+#define ISA_HAS_EXT_INS		((ISA_MIPS32R2		\
+				  || ISA_MIPS64R2)	\
+				 && !TARGET_MIPS16)
+
+/* ISA has instructions for accessing top part of 64-bit fp regs.  */
+#define ISA_HAS_MXHC1		(TARGET_FLOAT64		\
+				 && (ISA_MIPS32R2	\
+				     || ISA_MIPS64R2))
+
+/* ISA has lwxs instruction (load w/scaled index address.  */
+#define ISA_HAS_LWXS		(TARGET_SMARTMIPS && !TARGET_MIPS16)
+
+/* The DSP ASE is available.  */
+#define ISA_HAS_DSP		(TARGET_DSP && !TARGET_MIPS16)
+
+/* Revision 2 of the DSP ASE is available.  */
+#define ISA_HAS_DSPR2		(TARGET_DSPR2 && !TARGET_MIPS16)
+
+/* True if the result of a load is not available to the next instruction.
+   A nop will then be needed between instructions like "lw $4,..."
+   and "addiu $4,$4,1".  */
+#define ISA_HAS_LOAD_DELAY	(ISA_MIPS1				\
+				 && !TARGET_MIPS3900			\
+				 && !TARGET_MIPS16)
+
+/* Likewise mtc1 and mfc1.  */
+#define ISA_HAS_XFER_DELAY	(mips_isa <= 3			\
+				 && !TARGET_LOONGSON_2EF)
+
+/* Likewise floating-point comparisons.  */
+#define ISA_HAS_FCMP_DELAY	(mips_isa <= 3			\
+				 && !TARGET_LOONGSON_2EF)
+
+/* True if mflo and mfhi can be immediately followed by instructions
+   which write to the HI and LO registers.
+
+   According to MIPS specifications, MIPS ISAs I, II, and III need
+   (at least) two instructions between the reads of HI/LO and
+   instructions which write them, and later ISAs do not.  Contradicting
+   the MIPS specifications, some MIPS IV processor user manuals (e.g.
+   the UM for the NEC Vr5000) document needing the instructions between
+   HI/LO reads and writes, as well.  Therefore, we declare only MIPS32,
+   MIPS64 and later ISAs to have the interlocks, plus any specific
+   earlier-ISA CPUs for which CPU documentation declares that the
+   instructions are really interlocked.  */
+#define ISA_HAS_HILO_INTERLOCKS	(ISA_MIPS32				\
+				 || ISA_MIPS32R2			\
+				 || ISA_MIPS64				\
+				 || ISA_MIPS64R2			\
+				 || TARGET_MIPS5500			\
+				 || TARGET_LOONGSON_2EF)
+
+/* ISA includes synci, jr.hb and jalr.hb.  */
+#define ISA_HAS_SYNCI ((ISA_MIPS32R2		\
+			|| ISA_MIPS64R2)	\
+		       && !TARGET_MIPS16)
+
+/* ISA includes sync.  */
+#define ISA_HAS_SYNC ((mips_isa >= 2 || TARGET_MIPS3900) && !TARGET_MIPS16)
+#define GENERATE_SYNC			\
+  (target_flags_explicit & MASK_LLSC	\
+   ? TARGET_LLSC && !TARGET_MIPS16	\
+   : ISA_HAS_SYNC)
+
+/* ISA includes ll and sc.  Note that this implies ISA_HAS_SYNC
+   because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC
+   instructions.  */
+#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16)
+#define GENERATE_LL_SC			\
+  (target_flags_explicit & MASK_LLSC	\
+   ? TARGET_LLSC && !TARGET_MIPS16	\
+   : ISA_HAS_LL_SC)
+
+/* ISA includes the baddu instruction.  */
+#define ISA_HAS_BADDU		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the bbit* instructions.  */
+#define ISA_HAS_BBIT		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the cins instruction.  */
+#define ISA_HAS_CINS		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the exts instruction.  */
+#define ISA_HAS_EXTS		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the seq and sne instructions.  */
+#define ISA_HAS_SEQ_SNE		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* ISA includes the pop instruction.  */
+#define ISA_HAS_POP		(TARGET_OCTEON && !TARGET_MIPS16)
+
+/* The CACHE instruction is available in non-MIPS16 code.  */
+#define TARGET_CACHE_BUILTIN (mips_isa >= 3)
+
+/* The CACHE instruction is available.  */
+#define ISA_HAS_CACHE (TARGET_CACHE_BUILTIN && !TARGET_MIPS16)
+
+/* Tell collect what flags to pass to nm.  */
+#ifndef NM_FLAGS
+#define NM_FLAGS "-Bn"
+#endif
+
+
+/* SUBTARGET_ASM_OPTIMIZING_SPEC handles passing optimization options
+   to the assembler.  It may be overridden by subtargets.  */
+#ifndef SUBTARGET_ASM_OPTIMIZING_SPEC
+#define SUBTARGET_ASM_OPTIMIZING_SPEC "\
+%{noasmopt:-O0} \
+%{!noasmopt:%{O:-O2} %{O1:-O2} %{O2:-O2} %{O3:-O3}}"
+#endif
+
+/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to
+   the assembler.  It may be overridden by subtargets.
+
+   Beginning with gas 2.13, -mdebug must be passed to correctly handle
+   COFF debugging info.  */
+
+#ifndef SUBTARGET_ASM_DEBUGGING_SPEC
+#define SUBTARGET_ASM_DEBUGGING_SPEC "\
+%{g} %{g0} %{g1} %{g2} %{g3} \
+%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \
+%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \
+%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3} \
+%{gcoff:-g} %{gcoff0:-g0} %{gcoff1:-g1} %{gcoff2:-g2} %{gcoff3:-g3} \
+%{gcoff*:-mdebug} %{!gcoff*:-no-mdebug}"
+#endif
+
+/* SUBTARGET_ASM_SPEC is always passed to the assembler.  It may be
+   overridden by subtargets.  */
+
+#ifndef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC ""
+#endif
+
+#undef ASM_SPEC
+#define ASM_SPEC "\
+%{G*} %(endian_spec) %{mips1} %{mips2} %{mips3} %{mips4} \
+%{mips32*} %{mips64*} \
+%{mips16} %{mno-mips16:-no-mips16} \
+%{mips3d} %{mno-mips3d:-no-mips3d} \
+%{mdmx} %{mno-mdmx:-no-mdmx} \
+%{mdsp} %{mno-dsp} \
+%{mdspr2} %{mno-dspr2} \
+%{msmartmips} %{mno-smartmips} \
+%{mmt} %{mno-mt} \
+%{mfix-vr4120} %{mfix-vr4130} \
+%(subtarget_asm_optimizing_spec) \
+%(subtarget_asm_debugging_spec) \
+%{mabi=*} %{!mabi=*: %(asm_abi_default_spec)} \
+%{mgp32} %{mgp64} %{march=*} %{mxgot:-xgot} \
+%{mfp32} %{mfp64} \
+%{mshared} %{mno-shared} \
+%{msym32} %{mno-sym32} \
+%{mtune=*} \
+%(subtarget_asm_spec)"
+
+/* Extra switches sometimes passed to the linker.  */
+
+#ifndef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32*} %{mips64*} \
+%{shared}"
+#endif  /* LINK_SPEC defined */
+
+
+/* Specs for the compiler proper */
+
+/* SUBTARGET_CC1_SPEC is passed to the compiler proper.  It may be
+   overridden by subtargets.  */
+#ifndef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC ""
+#endif
+
+/* CC1_SPEC is the set of arguments to pass to the compiler proper.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "\
+%{G*} %{EB:-meb} %{EL:-mel} %{EB:%{EL:%emay not use both -EB and -EL}} \
+%(subtarget_cc1_spec)"
+
+/* Preprocessor specs.  */
+
+/* SUBTARGET_CPP_SPEC is passed to the preprocessor.  It may be
+   overridden by subtargets.  */
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define CPP_SPEC "%(subtarget_cpp_spec)"
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define EXTRA_SPECS							\
+  { "subtarget_cc1_spec", SUBTARGET_CC1_SPEC },				\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },				\
+  { "subtarget_asm_optimizing_spec", SUBTARGET_ASM_OPTIMIZING_SPEC },	\
+  { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC },	\
+  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },				\
+  { "asm_abi_default_spec", "-" MULTILIB_ABI_DEFAULT },			\
+  { "endian_spec", ENDIAN_SPEC },					\
+  SUBTARGET_EXTRA_SPECS
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define DBX_DEBUGGING_INFO 1		/* generate stabs (OSF/rose) */
+#define DWARF2_DEBUGGING_INFO 1         /* dwarf2 debugging info */
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+
+/* The size of DWARF addresses should be the same as the size of symbols
+   in the target file format.  They shouldn't depend on things like -msym32,
+   because many DWARF consumers do not allow the mixture of address sizes
+   that one would then get from linking -msym32 code with -msym64 code.
+
+   Note that the default POINTER_SIZE test is not appropriate for MIPS.
+   EABI64 has 64-bit pointers but uses 32-bit ELF.  */
+#define DWARF2_ADDR_SIZE (FILE_HAS_64BIT_SYMBOLS ? 8 : 4)
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.   By default, this is $, although some targets (e.g.,
+   NetBSD-ELF) need to override this.  */
+
+#ifndef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"$"
+#endif
+
+/* By default on the mips, external symbols do not have an underscore
+   prepended, but some targets (e.g., NetBSD) require this.  */
+
+#ifndef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	""
+#endif
+
+/* On Sun 4, this limit is 2048.  We use 1500 to be safe,
+   since the length can run past this up to a continuation point.  */
+#undef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 1500
+
+/* How to renumber registers for dbx and gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO) mips_dbx_regno[REGNO]
+
+/* The mapping from gcc register number to DWARF 2 CFA column number.  */
+#define DWARF_FRAME_REGNUM(REGNO) mips_dwarf_regno[REGNO]
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM
+
+/* Before the prologue, RA lives in r31.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (VOIDmode, RETURN_ADDR_REGNUM)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < (TARGET_MIPS16 ? 2 : 4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, GP_REG_FIRST + 3)
+
+#define EH_USES(N) mips_eh_uses (N)
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.
+   The default for this in 64-bit mode is 8, which causes problems with
+   SFmode register saves.  */
+#define DWARF_CIE_DATA_ALIGNMENT -4
+
+/* Correct the offset of automatic variables and arguments.  Note that
+   the MIPS debug format wants all automatic variables and arguments
+   to be in terms of the virtual frame pointer (stack pointer before
+   any adjustment in the function), while the MIPS 3.0 linker wants
+   the frame pointer to be the stack pointer after the initial
+   adjustment.  */
+
+#define DEBUGGER_AUTO_OFFSET(X)				\
+  mips_debugger_offset (X, (HOST_WIDE_INT) 0)
+#define DEBUGGER_ARG_OFFSET(OFFSET, X)			\
+  mips_debugger_offset (X, (HOST_WIDE_INT) OFFSET)
+
+/* Target machine storage layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* For MIPS, width of a floating point register.  */
+#define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4)
+
+/* The number of consecutive floating-point registers needed to store the
+   largest format supported by the FPU.  */
+#define MAX_FPRS_PER_FMT (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2)
+
+/* The number of consecutive floating-point registers needed to store the
+   smallest format supported by the FPU.  */
+#define MIN_FPRS_PER_FMT \
+  (ISA_MIPS32 || ISA_MIPS32R2 || ISA_MIPS64 || ISA_MIPS64R2 \
+   ? 1 : MAX_FPRS_PER_FMT)
+
+/* The largest size of value that can be held in floating-point
+   registers and moved with a single instruction.  */
+#define UNITS_PER_HWFPVALUE \
+  (TARGET_SOFT_FLOAT_ABI ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG)
+
+/* The largest size of value that can be held in floating-point
+   registers.  */
+#define UNITS_PER_FPVALUE			\
+  (TARGET_SOFT_FLOAT_ABI ? 0			\
+   : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG	\
+   : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+
+/* The number of bytes in a double.  */
+#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
+
+/* Set the sizes of the core types.  */
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE (TARGET_LONG64 ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_NEWABI ? 128 : 64)
+
+/* Define the sizes of fixed-point types.  */
+#define SHORT_FRACT_TYPE_SIZE 8
+#define FRACT_TYPE_SIZE 16
+#define LONG_FRACT_TYPE_SIZE 32
+#define LONG_LONG_FRACT_TYPE_SIZE 64
+
+#define SHORT_ACCUM_TYPE_SIZE 16
+#define ACCUM_TYPE_SIZE 32
+#define LONG_ACCUM_TYPE_SIZE 64
+/* FIXME.  LONG_LONG_ACCUM_TYPE_SIZE should be 128 bits, but GCC
+   doesn't support 128-bit integers for MIPS32 currently.  */
+#define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+
+/* long double is not a fixed mode, but the idea is that, if we
+   support long double, we also want a 128-bit integer type.  */
+#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
+
+#ifdef IN_LIBGCC2
+#if  (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+  || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+# else
+#  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+# endif
+#endif
+
+/* Width in bits of a pointer.  */
+#ifndef POINTER_SIZE
+#define POINTER_SIZE ((TARGET_LONG64 && TARGET_64BIT) ? 64 : 32)
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+/* 8 is observed right on a DECstation and on riscos 4.02.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT LONG_DOUBLE_TYPE_SIZE
+
+/* All accesses must be aligned.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define this if you wish to imitate the way many other C compilers
+   handle alignment of bitfields and the structures that contain
+   them.
+
+   The behavior is that the type written for a bit-field (`int',
+   `short', or other integer type) imposes an alignment for the
+   entire structure, as if the structure really did contain an
+   ordinary field of that type.  In addition, the bit-field is placed
+   within the structure so that it would fit within such a field,
+   not crossing a boundary for it.
+
+   Thus, on most machines, a bit-field whose type is written as `int'
+   would not cross a four-byte boundary, and would force four-byte
+   alignment for the whole structure.  (The alignment used may not
+   be four bytes; it is controlled by the other alignment
+   parameters.)
+
+   If the macro is defined, its definition should be a C expression;
+   a nonzero value for the expression enables this behavior.  */
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* If defined, a C expression to compute the alignment given to a
+   constant that is being placed in memory.  CONSTANT is the constant
+   and ALIGN is the alignment that the object would ordinarily have.
+   The value of this macro is used instead of that alignment to align
+   the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that `strcpy' calls that copy
+   constants can be done inline.  */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* We need this for the same reason as DATA_ALIGNMENT, namely to cause
+   character arrays to be word-aligned so that `strcpy' calls that copy
+   constants to character arrays can be done inline, and 'strcmp' can be
+   optimised to use word loads. */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  DATA_ALIGNMENT (TYPE, ALIGN)
+  
+#define PAD_VARARGS_DOWN \
+  (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* When in 64-bit mode, move insns will sign extend SImode and CCmode
+   moves.  All other references are zero extended.  */
+#define LOAD_EXTEND_OP(MODE) \
+  (TARGET_64BIT && ((MODE) == SImode || (MODE) == CCmode) \
+   ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    {                                           \
+      if ((MODE) == SImode)                     \
+        (UNSIGNEDP) = 0;                        \
+      (MODE) = Pmode;                           \
+    }
+
+/* Pmode is always the same as ptr_mode, but not always the same as word_mode.
+   Extensions of pointers to word_mode must be signed.  */
+#define POINTERS_EXTEND_UNSIGNED false
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* The [d]clz instructions have the natural values at 0.  */
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
+
+/* Standard register usage.  */
+
+/* Number of hardware registers.  We have:
+
+   - 32 integer registers
+   - 32 floating point registers
+   - 8 condition code registers
+   - 2 accumulator registers (hi and lo)
+   - 32 registers each for coprocessors 0, 2 and 3
+   - 4 fake registers:
+	- ARG_POINTER_REGNUM
+	- FRAME_POINTER_REGNUM
+	- GOT_VERSION_REGNUM (see the comment above load_call<mode> for details)
+	- CPRESTORE_SLOT_REGNUM
+   - 2 dummy entries that were used at various times in the past.
+   - 6 DSP accumulator registers (3 hi-lo pairs) for MIPS DSP ASE
+   - 6 DSP control registers  */
+
+#define FIRST_PSEUDO_REGISTER 188
+
+/* By default, fix the kernel registers ($26 and $27), the global
+   pointer ($28) and the stack pointer ($29).  This can change
+   depending on the command-line options.
+
+   Regarding coprocessor registers: without evidence to the contrary,
+   it's best to assume that each coprocessor register has a unique
+   use.  This can be overridden, in, e.g., mips_option_override or
+   TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be
+   inappropriate for a particular target.  */
+
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,			\
+  /* COP0 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP2 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP3 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* 6 DSP accumulator registers & 6 control registers */		\
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1					\
+}
+
+
+/* Set up this array for o32 by default.
+
+   Note that we don't mark $31 as a call-clobbered register.  The idea is
+   that it's really the call instructions themselves which clobber $31.
+   We don't care what the called function does with it afterwards.
+
+   This approach makes it easier to implement sibcalls.  Unlike normal
+   calls, sibcalls don't clobber $31, so the register reaches the
+   called function in tact.  EPILOGUE_USES says that $31 is useful
+   to the called function.  */
+
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP0 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP2 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* COP3 registers */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  /* 6 DSP accumulator registers & 6 control registers */		\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1					\
+}
+
+
+/* Define this since $28, though fixed, is call-saved in many ABIs.  */
+
+#define CALL_REALLY_USED_REGISTERS                                      \
+{ /* General registers.  */                                             \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,                       \
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,                       \
+  /* Floating-point registers.  */                                      \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* Others.  */                                                        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,			\
+  /* COP0 registers */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* COP2 registers */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* COP3 registers */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* 6 DSP accumulator registers & 6 control registers */		\
+  1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0					\
+}
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register, a floating point register, a
+   multiply/divide register, or a status register.  */
+
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  31
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+#define GP_DBX_FIRST 0
+#define K0_REG_NUM   (GP_REG_FIRST + 26)
+#define K1_REG_NUM   (GP_REG_FIRST + 27)
+#define KERNEL_REG_P(REGNO)	(IN_RANGE (REGNO, K0_REG_NUM, K1_REG_NUM))
+
+#define FP_REG_FIRST 32
+#define FP_REG_LAST  63
+#define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
+#define FP_DBX_FIRST ((write_symbols == DBX_DEBUG) ? 38 : 32)
+
+#define MD_REG_FIRST 64
+#define MD_REG_LAST  65
+#define MD_REG_NUM   (MD_REG_LAST - MD_REG_FIRST + 1)
+#define MD_DBX_FIRST (FP_DBX_FIRST + FP_REG_NUM)
+
+/* The DWARF 2 CFA column which tracks the return address from a
+   signal handler context.  This means that to maintain backwards
+   compatibility, no hard register can be assigned this column if it
+   would need to be handled by the DWARF unwinder.  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN 66
+
+#define ST_REG_FIRST 67
+#define ST_REG_LAST  74
+#define ST_REG_NUM   (ST_REG_LAST - ST_REG_FIRST + 1)
+
+
+/* FIXME: renumber.  */
+#define COP0_REG_FIRST 80
+#define COP0_REG_LAST 111
+#define COP0_REG_NUM (COP0_REG_LAST - COP0_REG_FIRST + 1)
+
+#define COP0_STATUS_REG_NUM	(COP0_REG_FIRST + 12)
+#define COP0_CAUSE_REG_NUM	(COP0_REG_FIRST + 13)
+#define COP0_EPC_REG_NUM	(COP0_REG_FIRST + 14)
+
+#define COP2_REG_FIRST 112
+#define COP2_REG_LAST 143
+#define COP2_REG_NUM (COP2_REG_LAST - COP2_REG_FIRST + 1)
+
+#define COP3_REG_FIRST 144
+#define COP3_REG_LAST 175
+#define COP3_REG_NUM (COP3_REG_LAST - COP3_REG_FIRST + 1)
+/* ALL_COP_REG_NUM assumes that COP0,2,and 3 are numbered consecutively.  */
+#define ALL_COP_REG_NUM (COP3_REG_LAST - COP0_REG_FIRST + 1)
+
+#define DSP_ACC_REG_FIRST 176
+#define DSP_ACC_REG_LAST 181
+#define DSP_ACC_REG_NUM (DSP_ACC_REG_LAST - DSP_ACC_REG_FIRST + 1)
+
+#define AT_REGNUM	(GP_REG_FIRST + 1)
+#define HI_REGNUM	(TARGET_BIG_ENDIAN ? MD_REG_FIRST : MD_REG_FIRST + 1)
+#define LO_REGNUM	(TARGET_BIG_ENDIAN ? MD_REG_FIRST + 1 : MD_REG_FIRST)
+
+/* A few bitfield locations for the coprocessor registers.  */
+/* Request Interrupt Priority Level is from bit 10 to bit 15 of
+   the cause register for the EIC interrupt mode.  */
+#define CAUSE_IPL	10
+/* Interrupt Priority Level is from bit 10 to bit 15 of the status register.  */
+#define SR_IPL		10
+/* Exception Level is at bit 1 of the status register.  */
+#define SR_EXL		1
+/* Interrupt Enable is at bit 0 of the status register.  */
+#define SR_IE		0
+
+/* FPSW_REGNUM is the single condition code used if !ISA_HAS_8CC.
+   If ISA_HAS_8CC, it should not be used, and an arbitrary ST_REG
+   should be used instead.  */
+#define FPSW_REGNUM	ST_REG_FIRST
+
+#define GP_REG_P(REGNO)	\
+  ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define M16_REG_P(REGNO) \
+  (((REGNO) >= 2 && (REGNO) <= 7) || (REGNO) == 16 || (REGNO) == 17)
+#define FP_REG_P(REGNO)  \
+  ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define MD_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - MD_REG_FIRST) < MD_REG_NUM)
+#define ST_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - ST_REG_FIRST) < ST_REG_NUM)
+#define COP0_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP0_REG_FIRST) < COP0_REG_NUM)
+#define COP2_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP2_REG_FIRST) < COP2_REG_NUM)
+#define COP3_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP3_REG_FIRST) < COP3_REG_NUM)
+#define ALL_COP_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - COP0_REG_FIRST) < ALL_COP_REG_NUM)
+/* Test if REGNO is one of the 6 new DSP accumulators.  */
+#define DSP_ACC_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - DSP_ACC_REG_FIRST) < DSP_ACC_REG_NUM)
+/* Test if REGNO is hi, lo, or one of the 6 new DSP accumulators.  */
+#define ACC_REG_P(REGNO) \
+  (MD_REG_P (REGNO) || DSP_ACC_REG_P (REGNO))
+
+#define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X)))
+
+/* True if X is (const (unspec [(const_int 0)] UNSPEC_GP)).  This is used
+   to initialize the mips16 gp pseudo register.  */
+#define CONST_GP_P(X)				\
+  (GET_CODE (X) == CONST			\
+   && GET_CODE (XEXP (X, 0)) == UNSPEC		\
+   && XINT (XEXP (X, 0), 1) == UNSPEC_GP)
+
+/* Return coprocessor number from register number.  */
+
+#define COPNUM_AS_CHAR_FROM_REGNUM(REGNO) 				\
+  (COP0_REG_P (REGNO) ? '0' : COP2_REG_P (REGNO) ? '2'			\
+   : COP3_REG_P (REGNO) ? '3' : '?')
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE) mips_hard_regno_nregs (REGNO, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  mips_hard_regno_mode_ok[ (int)(MODE) ][ (REGNO) ]
+
+#define MODES_TIEABLE_P mips_modes_tieable_p
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 29)
+
+/* These two registers don't really exist: they get eliminated to either
+   the stack or hard frame pointer.  */
+#define ARG_POINTER_REGNUM 77
+#define FRAME_POINTER_REGNUM 78
+
+/* $30 is not available on the mips16, so we use $17 as the frame
+   pointer.  */
+#define HARD_FRAME_POINTER_REGNUM \
+  (TARGET_MIPS16 ? GP_REG_FIRST + 17 : GP_REG_FIRST + 30)
+
+#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
+#define HARD_FRAME_POINTER_IS_ARG_POINTER 0
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 15)
+
+/* Registers used as temporaries in prologue/epilogue code:
+
+   - If a MIPS16 PIC function needs access to _gp, it first loads
+     the value into MIPS16_PIC_TEMP and then copies it to $gp.
+
+   - The prologue can use MIPS_PROLOGUE_TEMP as a general temporary
+     register.  The register must not conflict with MIPS16_PIC_TEMP.
+
+   - The epilogue can use MIPS_EPILOGUE_TEMP as a general temporary
+     register.
+
+   If we're generating MIPS16 code, these registers must come from the
+   core set of 8.  The prologue registers mustn't conflict with any
+   incoming arguments, the static chain pointer, or the frame pointer.
+   The epilogue temporary mustn't conflict with the return registers,
+   the PIC call register ($25), the frame pointer, the EH stack adjustment,
+   or the EH data registers.
+
+   If we're generating interrupt handlers, we use K0 as a temporary register
+   in prologue/epilogue code.  */
+
+#define MIPS16_PIC_TEMP_REGNUM (GP_REG_FIRST + 2)
+#define MIPS_PROLOGUE_TEMP_REGNUM \
+  (cfun->machine->interrupt_handler_p ? K0_REG_NUM : GP_REG_FIRST + 3)
+#define MIPS_EPILOGUE_TEMP_REGNUM		\
+  (cfun->machine->interrupt_handler_p		\
+   ? K0_REG_NUM					\
+   : GP_REG_FIRST + (TARGET_MIPS16 ? 6 : 8))
+
+#define MIPS16_PIC_TEMP gen_rtx_REG (Pmode, MIPS16_PIC_TEMP_REGNUM)
+#define MIPS_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, MIPS_PROLOGUE_TEMP_REGNUM)
+#define MIPS_EPILOGUE_TEMP(MODE) gen_rtx_REG (MODE, MIPS_EPILOGUE_TEMP_REGNUM)
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE 1
+
+/* The ABI-defined global pointer.  Sometimes we use a different
+   register in leaf functions: see PIC_OFFSET_TABLE_REGNUM.  */
+#define GLOBAL_POINTER_REGNUM (GP_REG_FIRST + 28)
+
+/* We normally use $28 as the global pointer.  However, when generating
+   n32/64 PIC, it is better for leaf functions to use a call-clobbered
+   register instead.  They can then avoid saving and restoring $28
+   and perhaps avoid using a frame at all.
+
+   When a leaf function uses something other than $28, mips_expand_prologue
+   will modify pic_offset_table_rtx in place.  Take the register number
+   from there after reload.  */
+#define PIC_OFFSET_TABLE_REGNUM \
+  (reload_completed ? REGNO (pic_offset_table_rtx) : GLOBAL_POINTER_REGNUM)
+
+#define PIC_FUNCTION_ADDR_REGNUM (GP_REG_FIRST + 25)
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  M16_REGS,			/* mips16 directly accessible registers */
+  T_REG,			/* mips16 T register ($24) */
+  M16_T_REGS,			/* mips16 registers plus T register */
+  PIC_FN_ADDR_REG,		/* SVR4 PIC function address register */
+  V1_REG,			/* Register $v1 ($3) used for TLS access.  */
+  LEA_REGS,			/* Every GPR except $25 */
+  GR_REGS,			/* integer registers */
+  FP_REGS,			/* floating point registers */
+  MD0_REG,			/* first multiply/divide register */
+  MD1_REG,			/* second multiply/divide register */
+  MD_REGS,			/* multiply/divide registers (hi/lo) */
+  COP0_REGS,			/* generic coprocessor classes */
+  COP2_REGS,
+  COP3_REGS,
+  ST_REGS,			/* status registers (fp status) */
+  DSP_ACC_REGS,			/* DSP accumulator registers */
+  ACC_REGS,			/* Hi/Lo and DSP accumulator registers */
+  FRAME_REGS,			/* $arg and $frame */
+  GR_AND_MD0_REGS,		/* union classes */
+  GR_AND_MD1_REGS,
+  GR_AND_MD_REGS,
+  GR_AND_ACC_REGS,
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES		/* max value + 1 */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS GR_REGS
+
+/* An initializer containing the names of the register classes as C
+   string constants.  These names are used in writing some of the
+   debugging dumps.  */
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "M16_REGS",								\
+  "T_REG",								\
+  "M16_T_REGS",								\
+  "PIC_FN_ADDR_REG",							\
+  "V1_REG",								\
+  "LEA_REGS",								\
+  "GR_REGS",								\
+  "FP_REGS",								\
+  "MD0_REG",								\
+  "MD1_REG",								\
+  "MD_REGS",								\
+  /* coprocessor registers */						\
+  "COP0_REGS",								\
+  "COP2_REGS",								\
+  "COP3_REGS",								\
+  "ST_REGS",								\
+  "DSP_ACC_REGS",							\
+  "ACC_REGS",								\
+  "FRAME_REGS",								\
+  "GR_AND_MD0_REGS",							\
+  "GR_AND_MD1_REGS",							\
+  "GR_AND_MD_REGS",							\
+  "GR_AND_ACC_REGS",							\
+  "ALL_REGS"								\
+}
+
+/* An initializer containing the contents of the register classes,
+   as integers which are bit masks.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not
+   suffice.  Then the integers are replaced by sub-initializers,
+   braced groupings containing several integers.  Each
+   sub-initializer must be suitable as an initializer for the type
+   `HARD_REG_SET' which is defined in `hard-reg-set.h'.  */
+
+#define REG_CLASS_CONTENTS						                                \
+{									                                \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x000300fc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* M16_REGS */		\
+  { 0x01000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* T_REG */		\
+  { 0x010300fc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* M16_T_REGS */	\
+  { 0x02000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* PIC_FN_ADDR_REG */	\
+  { 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* V1_REG */		\
+  { 0xfdffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* LEA_REGS */		\
+  { 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* GR_REGS */		\
+  { 0x00000000, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* FP_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000 },	/* MD0_REG */		\
+  { 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000 },	/* MD1_REG */		\
+  { 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000 },	/* MD_REGS */		\
+  { 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff, 0x00000000, 0x00000000 },   /* COP0_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff, 0x00000000 },   /* COP2_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffff0000, 0x0000ffff },   /* COP3_REGS */		\
+  { 0x00000000, 0x00000000, 0x000007f8, 0x00000000, 0x00000000, 0x00000000 },	/* ST_REGS */		\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x003f0000 },	/* DSP_ACC_REGS */	\
+  { 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x003f0000 },	/* ACC_REGS */		\
+  { 0x00000000, 0x00000000, 0x00006000, 0x00000000, 0x00000000, 0x00000000 },	/* FRAME_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000 },	/* GR_AND_MD0_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000002, 0x00000000, 0x00000000, 0x00000000 },	/* GR_AND_MD1_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000 },	/* GR_AND_MD_REGS */	\
+  { 0xffffffff, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x003f0000 },	/* GR_AND_ACC_REGS */	\
+  { 0xffffffff, 0xffffffff, 0xffff67ff, 0xffffffff, 0xffffffff, 0x0fffffff }	/* ALL_REGS */		\
+}
+
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+
+#define REGNO_REG_CLASS(REGNO) mips_regno_to_class[ (REGNO) ]
+
+/* A macro whose definition is the name of the class to which a
+   valid base register must belong.  A base register is one used in
+   an address which is the register value plus a displacement.  */
+
+#define BASE_REG_CLASS  (TARGET_MIPS16 ? M16_REGS : GR_REGS)
+
+/* A macro whose definition is the name of the class to which a
+   valid index register must belong.  An index register is one used
+   in an address where its value is either multiplied by a scale
+   factor or added to another register (as well as added to a
+   displacement).  */
+
+#define INDEX_REG_CLASS NO_REGS
+
+/* We generally want to put call-clobbered registers ahead of
+   call-saved ones.  (IRA expects this.)  */
+
+#define REG_ALLOC_ORDER							\
+{ /* Accumulator registers.  When GPRs and accumulators have equal	\
+     cost, we generally prefer to use accumulators.  For example,	\
+     a division of multiplication result is better allocated to LO,	\
+     so that we put the MFLO at the point of use instead of at the	\
+     point of definition.  It's also needed if we're to take advantage	\
+     of the extra accumulators available with -mdspr2.  In some cases,	\
+     it can also help to reduce register pressure.  */			\
+  64, 65,176,177,178,179,180,181,					\
+  /* Call-clobbered GPRs.  */						\
+  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,		\
+  24, 25, 31,								\
+  /* The global pointer.  This is call-clobbered for o32 and o64	\
+     abicalls, call-saved for n32 and n64 abicalls, and a program	\
+     invariant otherwise.  Putting it between the call-clobbered	\
+     and call-saved registers should cope with all eventualities.  */	\
+  28,									\
+  /* Call-saved GPRs.  */						\
+  16, 17, 18, 19, 20, 21, 22, 23, 30,					\
+  /* GPRs that can never be exposed to the register allocator.  */	\
+  0,  26, 27, 29,							\
+  /* Call-clobbered FPRs.  */						\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49, 50, 51,							\
+  /* FPRs that are usually call-saved.  The odd ones are actually	\
+     call-clobbered for n32, but listing them ahead of the even		\
+     registers might encourage the register allocator to fragment	\
+     the available FPR pairs.  We need paired FPRs to store long	\
+     doubles, so it isn't clear that using a different order		\
+     for n32 would be a win.  */					\
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,			\
+  /* None of the remaining classes have defined call-saved		\
+     registers.  */							\
+  66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,		\
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,	\
+  96, 97, 98, 99, 100,101,102,103,104,105,106,107,108,109,110,111,	\
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,	\
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,	\
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,	\
+  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,	\
+  182,183,184,185,186,187						\
+}
+
+/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
+   to be rearranged based on a particular function.  On the mips16, we
+   want to allocate $24 (T_REG) before other registers for
+   instructions for which it is possible.  */
+
+#define ADJUST_REG_ALLOC_ORDER mips_order_regs_for_local_alloc ()
+
+/* True if VALUE is an unsigned 6-bit number.  */
+
+#define UIMM6_OPERAND(VALUE) \
+  (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0)
+
+/* True if VALUE is a signed 10-bit number.  */
+
+#define IMM10_OPERAND(VALUE) \
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400)
+
+/* True if VALUE is a signed 16-bit number.  */
+
+#define SMALL_OPERAND(VALUE) \
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000)
+
+/* True if VALUE is an unsigned 16-bit number.  */
+
+#define SMALL_OPERAND_UNSIGNED(VALUE) \
+  (((VALUE) & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
+
+/* True if VALUE can be loaded into a register using LUI.  */
+
+#define LUI_OPERAND(VALUE)					\
+  (((VALUE) | 0x7fff0000) == 0x7fff0000				\
+   || ((VALUE) | 0x7fff0000) + 0x10000 == 0)
+
+/* Return a value X with the low 16 bits clear, and such that
+   VALUE - X is a signed 16-bit value.  */
+
+#define CONST_HIGH_PART(VALUE) \
+  (((VALUE) + 0x8000) & ~(unsigned HOST_WIDE_INT) 0xffff)
+
+#define CONST_LOW_PART(VALUE) \
+  ((VALUE) - CONST_HIGH_PART (VALUE))
+
+#define SMALL_INT(X) SMALL_OPERAND (INTVAL (X))
+#define SMALL_INT_UNSIGNED(X) SMALL_OPERAND_UNSIGNED (INTVAL (X))
+#define LUI_INT(X) LUI_OPERAND (INTVAL (X))
+
+/* The HI and LO registers can only be reloaded via the general
+   registers.  Condition code registers can only be loaded to the
+   general registers, and from the floating point registers.  */
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  mips_secondary_reload_class (CLASS, MODE, X, true)
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  mips_secondary_reload_class (CLASS, MODE, X, false)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) mips_class_max_nregs (CLASS, MODE)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  mips_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD flag_stack_protect
+
+/* Size of the area allocated in the frame to save the GP.  */
+
+#define MIPS_GP_SAVE_AREA_SIZE \
+  (TARGET_CALL_CLOBBERED_GP ? MIPS_STACK_ALIGN (UNITS_PER_WORD) : 0)
+
+/* The offset of the first local variable from the frame pointer.  See
+   mips_compute_frame_info for details about the frame layout.  */
+
+#define STARTING_FRAME_OFFSET				\
+  (FRAME_GROWS_DOWNWARD					\
+   ? 0							\
+   : crtl->outgoing_args_size + MIPS_GP_SAVE_AREA_SIZE)
+
+#define RETURN_ADDR_RTX mips_return_addr
+
+/* Mask off the MIPS16 ISA bit in unwind addresses.
+
+   The reason for this is a little subtle.  When unwinding a call,
+   we are given the call's return address, which on most targets
+   is the address of the following instruction.  However, what we
+   actually want to find is the EH region for the call itself.
+   The target-independent unwind code therefore searches for "RA - 1".
+
+   In the MIPS16 case, RA is always an odd-valued (ISA-encoded) address.
+   RA - 1 is therefore the real (even-valued) start of the return
+   instruction.  EH region labels are usually odd-valued MIPS16 symbols
+   too, so a search for an even address within a MIPS16 region would
+   usually work.
+
+   However, there is an exception.  If the end of an EH region is also
+   the end of a function, the end label is allowed to be even.  This is
+   necessary because a following non-MIPS16 function may also need EH
+   information for its first instruction.
+
+   Thus a MIPS16 region may be terminated by an ISA-encoded or a
+   non-ISA-encoded address.  This probably isn't ideal, but it is
+   the traditional (legacy) behavior.  It is therefore only safe
+   to search MIPS EH regions for an _odd-valued_ address.
+
+   Masking off the ISA bit means that the target-independent code
+   will search for "(RA & -2) - 1", which is guaranteed to be odd.  */
+#define MASK_RETURN_ADDR GEN_INT (-2)
+
+
+/* Similarly, don't use the least-significant bit to tell pointers to
+   code from vtable index.  */
+
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+
+/* The eliminations to $17 are only used for mips16 code.  See the
+   definition of HARD_FRAME_POINTER_REGNUM.  */
+
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},				\
+ { ARG_POINTER_REGNUM,   GP_REG_FIRST + 30},				\
+ { ARG_POINTER_REGNUM,   GP_REG_FIRST + 17},				\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+ { FRAME_POINTER_REGNUM, GP_REG_FIRST + 30},				\
+ { FRAME_POINTER_REGNUM, GP_REG_FIRST + 17}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = mips_initial_elimination_offset ((FROM), (TO))
+
+/* Allocate stack space for arguments at the beginning of each function.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* The argument pointer always points to the first argument.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* o32 and o64 reserve stack space for all argument registers.  */
+#define REG_PARM_STACK_SPACE(FNDECL) 			\
+  (TARGET_OLDABI					\
+   ? (MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD)		\
+   : 0)
+
+/* Define this if it is the responsibility of the caller to
+   allocate the area reserved for arguments passed in registers.
+   If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect
+   of this macro is to determine whether the space is included in
+   `crtl->outgoing_args_size'.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+#define STACK_BOUNDARY (TARGET_NEWABI ? 128 : 64)
+
+/* Symbolic macros for the registers used to return integer and floating
+   point values.  */
+
+#define GP_RETURN (GP_REG_FIRST + 2)
+#define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0))
+
+#define MAX_ARGS_IN_REGISTERS (TARGET_OLDABI ? 4 : 8)
+
+/* Symbolic macros for the first/last argument registers.  */
+
+#define GP_ARG_FIRST (GP_REG_FIRST + 4)
+#define GP_ARG_LAST  (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+#define FP_ARG_FIRST (FP_REG_FIRST + 12)
+#define FP_ARG_LAST  (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+
+#define LIBCALL_VALUE(MODE) \
+  mips_function_value (NULL_TREE, NULL_TREE, MODE)
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  mips_function_value (VALTYPE, FUNC, VOIDmode)
+
+/* 1 if N is a possible register number for a function value.
+   On the MIPS, R2 R3 and F0 F2 are the only register thus used.
+   Currently, R2 and F0 are only implemented here (C has no complex type)  */
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN \
+  || (LONG_DOUBLE_TYPE_SIZE == 128 && FP_RETURN != GP_RETURN \
+      && (N) == FP_RETURN + 2))
+
+/* 1 if N is a possible register number for function argument passing.
+   We have no FP argument registers when soft-float.  When FP registers
+   are 32 bits, we can't directly reference the odd numbered ones.  */
+
+#define FUNCTION_ARG_REGNO_P(N)					\
+  ((IN_RANGE((N), GP_ARG_FIRST, GP_ARG_LAST)			\
+    || (IN_RANGE((N), FP_ARG_FIRST, FP_ARG_LAST)))		\
+   && !fixed_regs[N])
+
+/* This structure has to cope with two different argument allocation
+   schemes.  Most MIPS ABIs view the arguments as a structure, of which
+   the first N words go in registers and the rest go on the stack.  If I
+   < N, the Ith word might go in Ith integer argument register or in a
+   floating-point register.  For these ABIs, we only need to remember
+   the offset of the current argument into the structure.
+
+   The EABI instead allocates the integer and floating-point arguments
+   separately.  The first N words of FP arguments go in FP registers,
+   the rest go on the stack.  Likewise, the first N words of the other
+   arguments go in integer registers, and the rest go on the stack.  We
+   need to maintain three counts: the number of integer registers used,
+   the number of floating-point registers used, and the number of words
+   passed on the stack.
+
+   We could keep separate information for the two ABIs (a word count for
+   the standard ABIs, and three separate counts for the EABI).  But it
+   seems simpler to view the standard ABIs as forms of EABI that do not
+   allocate floating-point registers.
+
+   So for the standard ABIs, the first N words are allocated to integer
+   registers, and mips_function_arg decides on an argument-by-argument
+   basis whether that argument should really go in an integer register,
+   or in a floating-point one.  */
+
+typedef struct mips_args {
+  /* Always true for varargs functions.  Otherwise true if at least
+     one argument has been passed in an integer register.  */
+  int gp_reg_found;
+
+  /* The number of arguments seen so far.  */
+  unsigned int arg_number;
+
+  /* The number of integer registers used so far.  For all ABIs except
+     EABI, this is the number of words that have been added to the
+     argument structure, limited to MAX_ARGS_IN_REGISTERS.  */
+  unsigned int num_gprs;
+
+  /* For EABI, the number of floating-point registers used so far.  */
+  unsigned int num_fprs;
+
+  /* The number of words passed on the stack.  */
+  unsigned int stack_words;
+
+  /* On the mips16, we need to keep track of which floating point
+     arguments were passed in general registers, but would have been
+     passed in the FP regs if this were a 32-bit function, so that we
+     can move them to the FP regs if we wind up calling a 32-bit
+     function.  We record this information in fp_code, encoded in base
+     four.  A zero digit means no floating point argument, a one digit
+     means an SFmode argument, and a two digit means a DFmode argument,
+     and a three digit is not used.  The low order digit is the first
+     argument.  Thus 6 == 1 * 4 + 2 means a DFmode argument followed by
+     an SFmode argument.  ??? A more sophisticated approach will be
+     needed if MIPS_ABI != ABI_32.  */
+  int fp_code;
+
+  /* True if the function has a prototype.  */
+  int prototype;
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  mips_init_cumulative_args (&CUM, FNTYPE)
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+  (mips_pad_arg_upward (MODE, TYPE) ? upward : downward)
+
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (mips_pad_reg_upward (MODE, TYPE) ? upward : downward)
+
+/* True if using EABI and varargs can be passed in floating-point
+   registers.  Under these conditions, we need a more complex form
+   of va_list, which tracks GPR, FPR and stack arguments separately.  */
+#define EABI_FLOAT_VARARGS_P \
+	(mips_abi == ABI_EABI && UNITS_PER_FPVALUE >= UNITS_PER_DOUBLE)
+
+
+#define EPILOGUE_USES(REGNO)	mips_epilogue_uses (REGNO)
+
+/* Treat LOC as a byte offset from the stack pointer and round it up
+   to the next fully-aligned offset.  */
+#define MIPS_STACK_ALIGN(LOC) \
+  (TARGET_NEWABI ? ((LOC) + 15) & -16 : ((LOC) + 7) & -8)
+
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) mips_function_profiler ((FILE))
+
+/* The profiler preserves all interesting registers, including $31.  */
+#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) false
+
+/* No mips port has ever used the profiler counter word, so don't emit it
+   or the label for it.  */
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+
+/* #define PROFILE_BEFORE_PROLOGUE */
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+
+/* Trampolines are a block of code followed by two pointers.  */
+
+#define TRAMPOLINE_SIZE \
+  (mips_trampoline_code_size () + GET_MODE_SIZE (ptr_mode) * 2)
+
+/* Forcing a 64-bit alignment for 32-bit targets allows us to load two
+   pointers from a single LUI base.  */
+
+#define TRAMPOLINE_ALIGNMENT 64
+
+/* mips_trampoline_init calls this library function to flush
+   program and data caches.  */
+
+#ifndef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_flush_cache"
+#endif
+
+#define MIPS_ICACHE_SYNC(ADDR, SIZE)					\
+  /* Flush both caches.  We need to flush the data cache in case	\
+     the system has a write-back cache.  */				\
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),	\
+		     LCT_NORMAL, VOIDmode, 3, ADDR, Pmode, SIZE, Pmode,	\
+		     GEN_INT (3), TYPE_MODE (integer_type_node))
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
+  mips_regno_mode_ok_for_base_p (REGNO, MODE, 1)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects them all.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Some source files that are used after register allocation
+   need to be strict.  */
+
+#ifndef REG_OK_STRICT
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  mips_regno_mode_ok_for_base_p (REGNO (X), MODE, 0)
+#else
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  mips_regno_mode_ok_for_base_p (REGNO (X), MODE, 1)
+#endif
+
+#define REG_OK_FOR_INDEX_P(X) 0
+
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Check for constness inline but use mips_legitimate_address_p
+   to check whether a constant really is an address.  */
+
+#define CONSTANT_ADDRESS_P(X) \
+  (CONSTANT_P (X) && memory_address_p (SImode, X))
+
+#define LEGITIMATE_CONSTANT_P(X) (mips_const_insns (X) > 0)
+
+/* This handles the magic '..CURRENT_FUNCTION' symbol, which means
+   'the start of the function that this code is output in'.  */
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)  \
+  if (strcmp (NAME, "..CURRENT_FUNCTION") == 0)				\
+    asm_fprintf ((FILE), "%U%s",					\
+		 XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));	\
+  else									\
+    asm_fprintf ((FILE), "%U%s", (NAME))
+
+/* Flag to mark a function decl symbol that requires a long call.  */
+#define SYMBOL_FLAG_LONG_CALL	(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_LONG_CALL_P(X)					\
+  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0)
+
+/* This flag marks functions that cannot be lazily bound.  */
+#define SYMBOL_FLAG_BIND_NOW (SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_BIND_NOW_P(RTX) \
+  ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_BIND_NOW) != 0)
+
+/* True if we're generating a form of MIPS16 code in which jump tables
+   are stored in the text section and encoded as 16-bit PC-relative
+   offsets.  This is only possible when general text loads are allowed,
+   since the table access itself will be an "lh" instruction.  */
+/* ??? 16-bit offsets can overflow in large functions.  */
+#define TARGET_MIPS16_SHORT_JUMP_TABLES TARGET_MIPS16_TEXT_LOADS
+
+#define JUMP_TABLES_IN_TEXT_SECTION TARGET_MIPS16_SHORT_JUMP_TABLES
+
+#define CASE_VECTOR_MODE (TARGET_MIPS16_SHORT_JUMP_TABLES ? HImode : ptr_mode)
+
+#define CASE_VECTOR_PC_RELATIVE TARGET_MIPS16_SHORT_JUMP_TABLES
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 1
+#endif
+
+/* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets,
+   we generally don't want to use them for copying arbitrary data.
+   A single N-word move is usually the same cost as N single-word moves.  */
+#define MOVE_MAX UNITS_PER_WORD
+#define MAX_MOVE_MAX 8
+
+/* Define this macro as a C expression which is nonzero if
+   accessing less than a word of memory (i.e. a `char' or a
+   `short') is no faster than accessing a word of memory, i.e., if
+   such access require more than one instruction or if there is no
+   difference in cost between byte and (aligned) word loads.
+
+   On RISC machines, it tends to generate better code to define
+   this as 1, since it avoids making a QI or HI mode register.
+
+   But, generating word accesses for -mips16 is generally bad as shifts
+   (often extended) would be needed for byte accesses.  */
+#define SLOW_BYTE_ACCESS (!TARGET_MIPS16)
+
+/* Standard MIPS integer shifts truncate the shift amount to the
+   width of the shifted operand.  However, Loongson vector shifts
+   do not truncate the shift amount at all.  */
+#define SHIFT_COUNT_TRUNCATED (!TARGET_LOONGSON_VECTORS)
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) \
+  (TARGET_64BIT ? ((INPREC) <= 32 || (OUTPREC) > 32) : 1)
+
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+
+#ifndef Pmode
+#define Pmode (TARGET_64BIT && TARGET_LONG64 ? DImode : SImode)
+#endif
+
+/* Give call MEMs SImode since it is the "most permissive" mode
+   for both 32-bit and 64-bit targets.  */
+
+#define FUNCTION_MODE SImode
+
+
+
+/* Define if copies to/from condition code registers should be avoided.
+
+   This is needed for the MIPS because reload_outcc is not complete;
+   it needs to handle cases where the source is a general or another
+   condition code register.  */
+#define AVOID_CCMODE_COPIES
+
+/* A C expression for the cost of a branch instruction.  A value of
+   1 is the default; other values are interpreted relative to that.  */
+
+#define BRANCH_COST(speed_p, predictable_p) mips_branch_cost
+#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+
+/* If defined, modifies the length assigned to instruction INSN as a
+   function of the context in which it is used.  LENGTH is an lvalue
+   that contains the initially computed length of the insn and should
+   be updated with the correct length of the insn.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = mips_adjust_insn_length ((INSN), (LENGTH)))
+
+/* Return the asm template for a non-MIPS16 conditional branch instruction.
+   OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
+   its operands.  */
+#define MIPS_BRANCH(OPCODE, OPERANDS) \
+  "%*" OPCODE "%?\t" OPERANDS "%/"
+
+/* Return an asm string that forces INSN to be treated as an absolute
+   J or JAL instruction instead of an assembler macro.  */
+#define MIPS_ABSOLUTE_JUMP(INSN) \
+  (TARGET_ABICALLS_PIC2						\
+   ? ".option\tpic0\n\t" INSN "\n\t.option\tpic2"		\
+   : INSN)
+
+/* Return the asm template for a call.  INSN is the instruction's mnemonic
+   ("j" or "jal"), OPERANDS are its operands, TARGET_OPNO is the operand
+   number of the target.  SIZE_OPNO is the operand number of the argument size
+   operand that can optionally hold the call attributes.  If SIZE_OPNO is not
+   -1 and the call is indirect, use the function symbol from the call
+   attributes to attach a R_MIPS_JALR relocation to the call.
+
+   When generating GOT code without explicit relocation operators,
+   all calls should use assembly macros.  Otherwise, all indirect
+   calls should use "jr" or "jalr"; we will arrange to restore $gp
+   afterwards if necessary.  Finally, we can only generate direct
+   calls for -mabicalls by temporarily switching to non-PIC mode.  */
+#define MIPS_CALL(INSN, OPERANDS, TARGET_OPNO, SIZE_OPNO)	\
+  (TARGET_USE_GOT && !TARGET_EXPLICIT_RELOCS			\
+   ? "%*" INSN "\t%" #TARGET_OPNO "%/"				\
+   : (REG_P (OPERANDS[TARGET_OPNO])				\
+      && mips_get_pic_call_symbol (OPERANDS, SIZE_OPNO))	\
+   ? ("%*.reloc\t1f,R_MIPS_JALR,%" #SIZE_OPNO "\n"		\
+      "1:\t" INSN "r\t%" #TARGET_OPNO "%/")			\
+   : REG_P (OPERANDS[TARGET_OPNO])				\
+   ? "%*" INSN "r\t%" #TARGET_OPNO "%/"				\
+   : MIPS_ABSOLUTE_JUMP ("%*" INSN "\t%" #TARGET_OPNO "%/"))
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#ifndef ASM_APP_ON
+#define ASM_APP_ON " #APP\n"
+#endif
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#ifndef ASM_APP_OFF
+#define ASM_APP_OFF " #NO_APP\n"
+#endif
+
+#define REGISTER_NAMES							   \
+{ "$0",   "$1",   "$2",   "$3",   "$4",   "$5",   "$6",   "$7",		   \
+  "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",	   \
+  "$16",  "$17",  "$18",  "$19",  "$20",  "$21",  "$22",  "$23",	   \
+  "$24",  "$25",  "$26",  "$27",  "$28",  "$sp",  "$fp",  "$31",	   \
+  "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",	   \
+  "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",	   \
+  "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",	   \
+  "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",	   \
+  "hi",   "lo",   "",     "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4",	   \
+  "$fcc5","$fcc6","$fcc7","", "$cprestore", "$arg", "$frame", "$fakec",	   \
+  "$c0r0", "$c0r1", "$c0r2", "$c0r3", "$c0r4", "$c0r5", "$c0r6", "$c0r7",  \
+  "$c0r8", "$c0r9", "$c0r10","$c0r11","$c0r12","$c0r13","$c0r14","$c0r15", \
+  "$c0r16","$c0r17","$c0r18","$c0r19","$c0r20","$c0r21","$c0r22","$c0r23", \
+  "$c0r24","$c0r25","$c0r26","$c0r27","$c0r28","$c0r29","$c0r30","$c0r31", \
+  "$c2r0", "$c2r1", "$c2r2", "$c2r3", "$c2r4", "$c2r5", "$c2r6", "$c2r7",  \
+  "$c2r8", "$c2r9", "$c2r10","$c2r11","$c2r12","$c2r13","$c2r14","$c2r15", \
+  "$c2r16","$c2r17","$c2r18","$c2r19","$c2r20","$c2r21","$c2r22","$c2r23", \
+  "$c2r24","$c2r25","$c2r26","$c2r27","$c2r28","$c2r29","$c2r30","$c2r31", \
+  "$c3r0", "$c3r1", "$c3r2", "$c3r3", "$c3r4", "$c3r5", "$c3r6", "$c3r7",  \
+  "$c3r8", "$c3r9", "$c3r10","$c3r11","$c3r12","$c3r13","$c3r14","$c3r15", \
+  "$c3r16","$c3r17","$c3r18","$c3r19","$c3r20","$c3r21","$c3r22","$c3r23", \
+  "$c3r24","$c3r25","$c3r26","$c3r27","$c3r28","$c3r29","$c3r30","$c3r31", \
+  "$ac1hi","$ac1lo","$ac2hi","$ac2lo","$ac3hi","$ac3lo","$dsp_po","$dsp_sc", \
+  "$dsp_ca","$dsp_ou","$dsp_cc","$dsp_ef" }
+
+/* List the "software" names for each register.  Also list the numerical
+   names for $fp and $sp.  */
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "$29",	29 + GP_REG_FIRST },					\
+  { "$30",	30 + GP_REG_FIRST },					\
+  { "at",	 1 + GP_REG_FIRST },					\
+  { "v0",	 2 + GP_REG_FIRST },					\
+  { "v1",	 3 + GP_REG_FIRST },					\
+  { "a0",	 4 + GP_REG_FIRST },					\
+  { "a1",	 5 + GP_REG_FIRST },					\
+  { "a2",	 6 + GP_REG_FIRST },					\
+  { "a3",	 7 + GP_REG_FIRST },					\
+  { "t0",	 8 + GP_REG_FIRST },					\
+  { "t1",	 9 + GP_REG_FIRST },					\
+  { "t2",	10 + GP_REG_FIRST },					\
+  { "t3",	11 + GP_REG_FIRST },					\
+  { "t4",	12 + GP_REG_FIRST },					\
+  { "t5",	13 + GP_REG_FIRST },					\
+  { "t6",	14 + GP_REG_FIRST },					\
+  { "t7",	15 + GP_REG_FIRST },					\
+  { "s0",	16 + GP_REG_FIRST },					\
+  { "s1",	17 + GP_REG_FIRST },					\
+  { "s2",	18 + GP_REG_FIRST },					\
+  { "s3",	19 + GP_REG_FIRST },					\
+  { "s4",	20 + GP_REG_FIRST },					\
+  { "s5",	21 + GP_REG_FIRST },					\
+  { "s6",	22 + GP_REG_FIRST },					\
+  { "s7",	23 + GP_REG_FIRST },					\
+  { "t8",	24 + GP_REG_FIRST },					\
+  { "t9",	25 + GP_REG_FIRST },					\
+  { "k0",	26 + GP_REG_FIRST },					\
+  { "k1",	27 + GP_REG_FIRST },					\
+  { "gp",	28 + GP_REG_FIRST },					\
+  { "sp",	29 + GP_REG_FIRST },					\
+  { "fp",	30 + GP_REG_FIRST },					\
+  { "ra",	31 + GP_REG_FIRST },					\
+  ALL_COP_ADDITIONAL_REGISTER_NAMES					\
+}
+
+/* This is meant to be redefined in the host dependent files.  It is a
+   set of alternative names and regnums for mips coprocessors.  */
+
+#define ALL_COP_ADDITIONAL_REGISTER_NAMES
+
+#define DBR_OUTPUT_SEQEND(STREAM)					\
+do									\
+  {									\
+    /* Undo the effect of '%*'.  */					\
+    mips_pop_asm_switch (&mips_nomacro);				\
+    mips_pop_asm_switch (&mips_noreorder);				\
+    /* Emit a blank line after the delay slot for emphasis.  */		\
+    fputs ("\n", STREAM);						\
+  }									\
+while (0)
+
+/* mips-tfile does not understand .stabd directives.  */
+#define DBX_OUTPUT_SOURCE_LINE(STREAM, LINE, COUNTER) do {	\
+  dbxout_begin_stabn_sline (LINE);				\
+  dbxout_stab_value_internal_label ("LM", &COUNTER);		\
+} while (0)
+
+/* Use .loc directives for SDB line numbers.  */
+#define SDB_OUTPUT_SOURCE_LINE(STREAM, LINE)			\
+  fprintf (STREAM, "\t.loc\t%d %d\n", num_source_filenames, LINE)
+
+/* The MIPS implementation uses some labels for its own purpose.  The
+   following lists what labels are created, and are all formed by the
+   pattern $L[a-z].*.  The machine independent portion of GCC creates
+   labels matching:  $L[A-Z][0-9]+ and $L[0-9]+.
+
+	LM[0-9]+	Silicon Graphics/ECOFF stabs label before each stmt.
+	$Lb[0-9]+	Begin blocks for MIPS debug support
+	$Lc[0-9]+	Label for use in s<xx> operation.
+	$Le[0-9]+	End blocks for MIPS debug support  */
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \
+  mips_declare_object (STREAM, NAME, "", ":\n")
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+/* This says how to define a global common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON mips_output_aligned_decl_common
+
+/* This says how to define a local common symbol (i.e., not visible to
+   linker).  */
+
+#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \
+  mips_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, false)
+#endif
+
+/* This says how to output an external.  It would be possible not to
+   output anything and let undefined symbol become external. However
+   the assembler uses length information on externals to allocate in
+   data/sdata bss/sbss, thereby saving exec time.  */
+
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(STREAM,DECL,NAME) \
+  mips_output_external(STREAM,DECL,NAME)
+
+/* This is how to declare a function name.  The actual work of
+   emitting the label is moved to function_prologue, so that we can
+   get the line number correctly emitted before the .ent directive,
+   and after any .file directives.  Define as empty so that the function
+   is not declared before the .ent directive elsewhere.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM,NAME,DECL)
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long)(NUM))
+
+/* Print debug labels as "foo = ." rather than "foo:" because they should
+   represent a byte pointer rather than an ISA-encoded address.  This is
+   particularly important for code like:
+
+	$LFBxxx = .
+		.cfi_startproc
+		...
+		.section .gcc_except_table,...
+		...
+		.uleb128 foo-$LFBxxx
+
+   The .uleb128 requies $LFBxxx to match the FDE start address, which is
+   likewise a byte pointer rather than an ISA-encoded address.
+
+   At the time of writing, this hook is not used for the function end
+   label:
+
+   	$LFExxx:
+		.end foo
+
+   But this doesn't matter, because GAS doesn't treat a pre-.end label
+   as a MIPS16 one anyway.  */
+
+#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM)			\
+  fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	   ptr_mode == DImode ? ".dword" : ".word",			\
+	   LOCAL_LABEL_PREFIX,						\
+	   VALUE)
+
+/* This is how to output an element of a case-vector.  We can make the
+   entries PC-relative in MIPS16 code and GP-relative when .gp(d)word
+   is supported.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+do {									\
+  if (TARGET_MIPS16_SHORT_JUMP_TABLES)					\
+    fprintf (STREAM, "\t.half\t%sL%d-%sL%d\n",				\
+	     LOCAL_LABEL_PREFIX, VALUE, LOCAL_LABEL_PREFIX, REL);	\
+  else if (TARGET_GPWORD)						\
+    fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	     ptr_mode == DImode ? ".gpdword" : ".gpword",		\
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+  else if (TARGET_RTP_PIC)						\
+    {									\
+      /* Make the entry relative to the start of the function.  */	\
+      rtx fnsym = XEXP (DECL_RTL (current_function_decl), 0);		\
+      fprintf (STREAM, "\t%s\t%sL%d-",					\
+	       Pmode == DImode ? ".dword" : ".word",			\
+	       LOCAL_LABEL_PREFIX, VALUE);				\
+      assemble_name (STREAM, XSTR (fnsym, 0));				\
+      fprintf (STREAM, "\n");						\
+    }									\
+  else									\
+    fprintf (STREAM, "\t%s\t%sL%d\n",					\
+	     ptr_mode == DImode ? ".dword" : ".word",			\
+	     LOCAL_LABEL_PREFIX, VALUE);				\
+} while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM,LOG)					\
+  fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+/* This is how to output an assembler line to advance the location
+   counter by SIZE bytes.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM,SIZE)					\
+  fprintf (STREAM, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This is how to output a string.  */
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII mips_output_ascii
+
+/* Output #ident as a in the read-only data section.  */
+#undef  ASM_OUTPUT_IDENT
+#define ASM_OUTPUT_IDENT(FILE, STRING)					\
+{									\
+  const char *p = STRING;						\
+  int size = strlen (p) + 1;						\
+  switch_to_section (readonly_data_section);				\
+  assemble_string (p, size);						\
+}
+
+/* Default to -G 8 */
+#ifndef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 8
+#endif
+
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"	/* instructions */
+#define DATA_SECTION_ASM_OP	"\t.data"	/* large data */
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.rdata"	/* read-only data */
+
+#define ASM_OUTPUT_REG_PUSH(STREAM,REGNO)				\
+do									\
+  {									\
+    fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,0(%s)\n",		\
+	     TARGET_64BIT ? "daddiu" : "addiu",				\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     TARGET_64BIT ? "sd" : "sw",				\
+	     reg_names[REGNO],						\
+	     reg_names[STACK_POINTER_REGNUM]);				\
+  }									\
+while (0)
+
+#define ASM_OUTPUT_REG_POP(STREAM,REGNO)				\
+do									\
+  {									\
+    mips_push_asm_switch (&mips_noreorder);				\
+    fprintf (STREAM, "\t%s\t%s,0(%s)\n\t%s\t%s,%s,8\n",			\
+	     TARGET_64BIT ? "ld" : "lw",				\
+	     reg_names[REGNO],						\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     TARGET_64BIT ? "daddu" : "addu",				\
+	     reg_names[STACK_POINTER_REGNUM],				\
+	     reg_names[STACK_POINTER_REGNUM]);				\
+    mips_pop_asm_switch (&mips_noreorder);				\
+  }									\
+while (0)
+
+/* How to start an assembler comment.
+   The leading space is important (the mips native assembler requires it).  */
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START " #"
+#endif
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+
+/* The maximum number of bytes that can be copied by one iteration of
+   a movmemsi loop; see mips_block_move_loop.  */
+#define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
+  (UNITS_PER_WORD * 4)
+
+/* The maximum number of bytes that can be copied by a straight-line
+   implementation of movmemsi; see mips_block_move_straight.  We want
+   to make sure that any loop-based implementation will iterate at
+   least twice.  */
+#define MIPS_MAX_MOVE_BYTES_STRAIGHT \
+  (MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+
+/* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
+   values were determined experimentally by benchmarking with CSiBE.
+   In theory, the call overhead is higher for TARGET_ABICALLS (especially
+   for o32 where we have to restore $gp afterwards as well as make an
+   indirect call), but in practice, bumping this up higher for
+   TARGET_ABICALLS doesn't make much difference to code size.  */
+
+#define MIPS_CALL_RATIO 8
+
+/* Any loop-based implementation of movmemsi will have at least
+   MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
+   moves, so allow individual copies of fewer elements.
+
+   When movmemsi is not available, use a value approximating
+   the length of a memcpy call sequence, so that move_by_pieces
+   will generate inline code if it is shorter than a function call.
+   Since move_by_pieces_ninsns counts memory-to-memory moves, but
+   we'll have to generate a load/store pair for each, halve the
+   value of MIPS_CALL_RATIO to take that into account.  */
+
+#define MOVE_RATIO(speed)				\
+  (HAVE_movmemsi					\
+   ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
+   : MIPS_CALL_RATIO / 2)
+
+/* movmemsi is meant to generate code that is at least as good as
+   move_by_pieces.  However, movmemsi effectively uses a by-pieces
+   implementation both for moves smaller than a word and for word-aligned
+   moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes.  We should
+   allow the tree-level optimisers to do such moves by pieces, as it
+   often exposes other optimization opportunities.  We might as well
+   continue to use movmemsi at the rtl level though, as it produces
+   better code when scheduling is disabled (such as at -O).  */
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN)				\
+  (HAVE_movmemsi						\
+   ? (!currently_expanding_to_rtl				\
+      && ((ALIGN) < BITS_PER_WORD				\
+	  ? (SIZE) < UNITS_PER_WORD				\
+	  : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT))		\
+   : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1)	\
+      < (unsigned int) MOVE_RATIO (false)))
+
+/* For CLEAR_RATIO, when optimizing for size, give a better estimate
+   of the length of a memset call, but use the default otherwise.  */
+
+#define CLEAR_RATIO(speed)\
+  ((speed) ? 15 : MIPS_CALL_RATIO)
+
+/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
+   optimizing for size adjust the ratio to account for the overhead of
+   loading the constant and replicating it across the word.  */
+
+#define SET_RATIO(speed) \
+  ((speed) ? 15 : MIPS_CALL_RATIO - 2)
+
+/* STORE_BY_PIECES_P can be used when copying a constant string, but
+   in that case each word takes 3 insns (lui, ori, sw), or more in
+   64-bit mode, instead of 2 (lw, sw).  For now we always fail this
+   and let the move_by_pieces code copy the string from read-only
+   memory.  In the future, this could be tuned further for multi-issue
+   CPUs that can issue stores down one pipe and arithmetic instructions
+   down another; in that case, the lui/ori/sw combination would be a
+   win for long enough strings.  */
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
+
+#ifndef __mips16
+/* Since the bits of the _init and _fini function is spread across
+   many object files, each potentially with its own GP, we must assume
+   we need to load our GP.  We don't preserve $gp or $ra, since each
+   init/fini chunk is supposed to initialize $gp, and crti/crtn
+   already take care of preserving $ra and, when appropriate, $gp.  */
+#if (defined _ABIO32 && _MIPS_SIM == _ABIO32)
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n\
+	.set noreorder\n\
+	bal 1f\n\
+	nop\n\
+1:	.cpload $31\n\
+	.set reorder\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	" TEXT_SECTION_ASM_OP);
+#endif /* Switch to #elif when we're no longer limited by K&R C.  */
+#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+   || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n\
+	.set noreorder\n\
+	bal 1f\n\
+	nop\n\
+1:	.set reorder\n\
+	.cpsetup $31, $2, 1b\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	" TEXT_SECTION_ASM_OP);
+#endif
+#endif
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#ifndef USED_FOR_TARGET
+/* Information about ".set noFOO; ...; .set FOO" blocks.  */
+struct mips_asm_switch {
+  /* The FOO in the description above.  */
+  const char *name;
+
+  /* The current block nesting level, or 0 if we aren't in a block.  */
+  int nesting_level;
+};
+
+extern const enum reg_class mips_regno_to_class[];
+extern bool mips_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+extern const char *current_function_file; /* filename current function is in */
+extern int num_source_filenames;	/* current .file # */
+extern struct mips_asm_switch mips_noreorder;
+extern struct mips_asm_switch mips_nomacro;
+extern struct mips_asm_switch mips_noat;
+extern int mips_dbx_regno[];
+extern int mips_dwarf_regno[];
+extern bool mips_split_p[];
+extern bool mips_split_hi_p[];
+extern enum processor mips_arch;        /* which cpu to codegen for */
+extern enum processor mips_tune;        /* which cpu to schedule for */
+extern int mips_isa;			/* architectural level */
+extern int mips_abi;			/* which ABI to use */
+extern const struct mips_cpu_info *mips_arch_info;
+extern const struct mips_cpu_info *mips_tune_info;
+extern bool mips_base_mips16;
+extern enum mips_code_readable_setting mips_code_readable;
+extern GTY(()) struct target_globals *mips16_globals;
+#endif
+
+/* Enable querying of DFA units.  */
+#define CPU_UNITS_QUERY 1
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)	\
+  mips_final_prescan_insn (INSN, OPVEC, NOPERANDS)
+
+/* As on most targets, we want the .eh_frame section to be read-only where
+   possible.  And as on most targets, this means two things:
+
+     (a) Non-locally-binding pointers must have an indirect encoding,
+	 so that the addresses in the .eh_frame section itself become
+	 locally-binding.
+
+     (b) A shared library's .eh_frame section must encode locally-binding
+	 pointers in a relative (relocation-free) form.
+
+   However, MIPS has traditionally not allowed directives like:
+
+	.long	x-.
+
+   in cases where "x" is in a different section, or is not defined in the
+   same assembly file.  We are therefore unable to emit the PC-relative
+   form required by (b) at assembly time.
+
+   Fortunately, the linker is able to convert absolute addresses into
+   PC-relative addresses on our behalf.  Unfortunately, only certain
+   versions of the linker know how to do this for indirect pointers,
+   and for personality data.  We must fall back on using writable
+   .eh_frame sections for shared libraries if the linker does not
+   support this feature.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
+
+/* For switching between MIPS16 and non-MIPS16 modes.  */
+#define SWITCHABLE_TARGET 1
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
new file mode 100644
index 000000000..e629db7a7
--- /dev/null
+++ b/gcc/config/mips/mips.md
@@ -0,0 +1,6486 @@
+;;  Mips.md	     Machine Description for MIPS based processors
+;;  Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+;;  1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by   A. Lichnewsky, lich@inria.inria.fr
+;;  Changes by       Michael Meissner, meissner@osf.org
+;;  64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
+;;  Brendan Eich, brendan@microunity.com.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_enum "processor" [
+  r3000
+  4kc
+  4kp
+  5kc
+  5kf
+  20kc
+  24kc
+  24kf2_1
+  24kf1_1
+  74kc
+  74kf2_1
+  74kf1_1
+  74kf3_2
+  loongson_2e
+  loongson_2f
+  loongson_3a
+  m4k
+  octeon
+  r3900
+  r6000
+  r4000
+  r4100
+  r4111
+  r4120
+  r4130
+  r4300
+  r4600
+  r4650
+  r5000
+  r5400
+  r5500
+  r7000
+  r8000
+  r9000
+  r10000
+  sb1
+  sb1a
+  sr71000
+  xlr
+])
+
+(define_c_enum "unspec" [
+  ;; Unaligned accesses.
+  UNSPEC_LOAD_LEFT
+  UNSPEC_LOAD_RIGHT
+  UNSPEC_STORE_LEFT
+  UNSPEC_STORE_RIGHT
+
+  ;; Floating-point moves.
+  UNSPEC_LOAD_LOW
+  UNSPEC_LOAD_HIGH
+  UNSPEC_STORE_WORD
+  UNSPEC_MFHC1
+  UNSPEC_MTHC1
+
+  ;; HI/LO moves.
+  UNSPEC_MFHI
+  UNSPEC_MTHI
+  UNSPEC_SET_HILO
+
+  ;; GP manipulation.
+  UNSPEC_LOADGP
+  UNSPEC_COPYGP
+  UNSPEC_MOVE_GP
+  UNSPEC_POTENTIAL_CPRESTORE
+  UNSPEC_CPRESTORE
+  UNSPEC_RESTORE_GP
+  UNSPEC_EH_RETURN
+  UNSPEC_GP
+  UNSPEC_SET_GOT_VERSION
+  UNSPEC_UPDATE_GOT_VERSION
+
+  ;; Symbolic accesses.
+  UNSPEC_LOAD_CALL
+  UNSPEC_LOAD_GOT
+  UNSPEC_TLS_LDM
+  UNSPEC_TLS_GET_TP
+
+  ;; MIPS16 constant pools.
+  UNSPEC_ALIGN
+  UNSPEC_CONSTTABLE_INT
+  UNSPEC_CONSTTABLE_FLOAT
+
+  ;; Blockage and synchronisation.
+  UNSPEC_BLOCKAGE
+  UNSPEC_CLEAR_HAZARD
+  UNSPEC_RDHWR
+  UNSPEC_SYNCI
+  UNSPEC_SYNC
+
+  ;; Cache manipulation.
+  UNSPEC_MIPS_CACHE
+  UNSPEC_R10K_CACHE_BARRIER
+
+  ;; Interrupt handling.
+  UNSPEC_ERET
+  UNSPEC_DERET
+  UNSPEC_DI
+  UNSPEC_EHB
+  UNSPEC_RDPGPR
+  UNSPEC_COP0
+
+  ;; Used in a call expression in place of args_size.  It's present for PIC
+  ;; indirect calls where it contains args_size and the function symbol.
+  UNSPEC_CALL_ATTR
+])
+
+(define_constants
+  [(TLS_GET_TP_REGNUM		3)
+   (RETURN_ADDR_REGNUM		31)
+   (CPRESTORE_SLOT_REGNUM	76)
+   (GOT_VERSION_REGNUM		79)
+
+   ;; PIC long branch sequences are never longer than 100 bytes.
+   (MAX_PIC_BRANCH_LENGTH	100)
+  ]
+)
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ....................
+;;
+;;	Attributes
+;;
+;; ....................
+
+(define_attr "got" "unset,xgot_high,load"
+  (const_string "unset"))
+
+;; For jal instructions, this attribute is DIRECT when the target address
+;; is symbolic and INDIRECT when it is a register.
+(define_attr "jal" "unset,direct,indirect"
+  (const_string "unset"))
+
+;; This attribute is YES if the instruction is a jal macro (not a
+;; real jal instruction).
+;;
+;; jal is always a macro for TARGET_CALL_CLOBBERED_GP because it includes
+;; an instruction to restore $gp.  Direct jals are also macros for
+;; !TARGET_ABSOLUTE_JUMPS because they first load the target address
+;; into a register.
+(define_attr "jal_macro" "no,yes"
+  (cond [(eq_attr "jal" "direct")
+	 (symbol_ref "(TARGET_CALL_CLOBBERED_GP || !TARGET_ABSOLUTE_JUMPS
+		       ? JAL_MACRO_YES : JAL_MACRO_NO)")
+	 (eq_attr "jal" "indirect")
+	 (symbol_ref "(TARGET_CALL_CLOBBERED_GP
+		       ? JAL_MACRO_YES : JAL_MACRO_NO)")]
+	(const_string "no")))
+
+;; Classification of moves, extensions and truncations.  Most values
+;; are as for "type" (see below) but there are also the following
+;; move-specific values:
+;;
+;; constN	move an N-constraint integer into a MIPS16 register
+;; sll0		"sll DEST,SRC,0", which on 64-bit targets is guaranteed
+;;		to produce a sign-extended DEST, even if SRC is not
+;;		properly sign-extended
+;; ext_ins	EXT, DEXT, INS or DINS instruction
+;; andi		a single ANDI instruction
+;; loadpool	move a constant into a MIPS16 register by loading it
+;;		from the pool
+;; shift_shift	a shift left followed by a shift right
+;; lui_movf	an LUI followed by a MOVF (for d<-z CC moves)
+;;
+;; This attribute is used to determine the instruction's length and
+;; scheduling type.  For doubleword moves, the attribute always describes
+;; the split instructions; in some cases, it is more appropriate for the
+;; scheduling type to be "multi" instead.
+(define_attr "move_type"
+  "unknown,load,fpload,store,fpstore,mtc,mfc,mthilo,mfhilo,move,fmove,
+   const,constN,signext,ext_ins,logical,arith,sll0,andi,loadpool,
+   shift_shift,lui_movf"
+  (const_string "unknown"))
+
+(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor"
+  (const_string "unknown"))
+
+;; Main data type used by the insn
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FPSW"
+  (const_string "unknown"))
+
+;; True if the main data type is twice the size of a word.
+(define_attr "dword_mode" "no,yes"
+  (cond [(and (eq_attr "mode" "DI,DF")
+	      (eq (symbol_ref "TARGET_64BIT") (const_int 0)))
+	 (const_string "yes")
+
+	 (and (eq_attr "mode" "TI,TF")
+	      (ne (symbol_ref "TARGET_64BIT") (const_int 0)))
+	 (const_string "yes")]
+	(const_string "no")))
+
+;; Classification of each insn.
+;; branch	conditional branch
+;; jump		unconditional jump
+;; call		unconditional call
+;; load		load instruction(s)
+;; fpload	floating point load
+;; fpidxload    floating point indexed load
+;; store	store instruction(s)
+;; fpstore	floating point store
+;; fpidxstore	floating point indexed store
+;; prefetch	memory prefetch (register + offset)
+;; prefetchx	memory indexed prefetch (register + register)
+;; condmove	conditional moves
+;; mtc		transfer to coprocessor
+;; mfc		transfer from coprocessor
+;; mthilo	transfer to hi/lo registers
+;; mfhilo	transfer from hi/lo registers
+;; const	load constant
+;; arith	integer arithmetic instructions
+;; logical      integer logical instructions
+;; shift	integer shift instructions
+;; slt		set less than instructions
+;; signext      sign extend instructions
+;; clz		the clz and clo instructions
+;; pop		the pop instruction
+;; trap		trap if instructions
+;; imul		integer multiply 2 operands
+;; imul3	integer multiply 3 operands
+;; imul3nc	integer multiply 3 operands without clobbering HI/LO
+;; imadd	integer multiply-add
+;; idiv		integer divide 2 operands
+;; idiv3	integer divide 3 operands
+;; move		integer register move ({,D}ADD{,U} with rt = 0)
+;; fmove	floating point register move
+;; fadd		floating point add/subtract
+;; fmul		floating point multiply
+;; fmadd	floating point multiply-add
+;; fdiv		floating point divide
+;; frdiv	floating point reciprocal divide
+;; frdiv1	floating point reciprocal divide step 1
+;; frdiv2	floating point reciprocal divide step 2
+;; fabs		floating point absolute value
+;; fneg		floating point negation
+;; fcmp		floating point compare
+;; fcvt		floating point convert
+;; fsqrt	floating point square root
+;; frsqrt       floating point reciprocal square root
+;; frsqrt1      floating point reciprocal square root step1
+;; frsqrt2      floating point reciprocal square root step2
+;; multi	multiword sequence (or user asm statements)
+;; nop		no operation
+;; ghost	an instruction that produces no real code
+(define_attr "type"
+  "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+   prefetch,prefetchx,condmove,mtc,mfc,mthilo,mfhilo,const,arith,logical,
+   shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move,
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt,
+   frsqrt,frsqrt1,frsqrt2,multi,nop,ghost"
+  (cond [(eq_attr "jal" "!unset") (const_string "call")
+	 (eq_attr "got" "load") (const_string "load")
+
+	 (eq_attr "alu_type" "add,sub") (const_string "arith")
+
+	 (eq_attr "alu_type" "not,nor,and,or,xor") (const_string "logical")
+
+	 ;; If a doubleword move uses these expensive instructions,
+	 ;; it is usually better to schedule them in the same way
+	 ;; as the singleword form, rather than as "multi".
+	 (eq_attr "move_type" "load") (const_string "load")
+	 (eq_attr "move_type" "fpload") (const_string "fpload")
+	 (eq_attr "move_type" "store") (const_string "store")
+	 (eq_attr "move_type" "fpstore") (const_string "fpstore")
+	 (eq_attr "move_type" "mtc") (const_string "mtc")
+	 (eq_attr "move_type" "mfc") (const_string "mfc")
+	 (eq_attr "move_type" "mthilo") (const_string "mthilo")
+	 (eq_attr "move_type" "mfhilo") (const_string "mfhilo")
+
+	 ;; These types of move are always single insns.
+	 (eq_attr "move_type" "fmove") (const_string "fmove")
+	 (eq_attr "move_type" "loadpool") (const_string "load")
+	 (eq_attr "move_type" "signext") (const_string "signext")
+	 (eq_attr "move_type" "ext_ins") (const_string "arith")
+	 (eq_attr "move_type" "arith") (const_string "arith")
+	 (eq_attr "move_type" "logical") (const_string "logical")
+	 (eq_attr "move_type" "sll0") (const_string "shift")
+	 (eq_attr "move_type" "andi") (const_string "logical")
+
+	 ;; These types of move are always split.
+	 (eq_attr "move_type" "constN,shift_shift")
+	   (const_string "multi")
+
+	 ;; These types of move are split for doubleword modes only.
+	 (and (eq_attr "move_type" "move,const")
+	      (eq_attr "dword_mode" "yes"))
+	   (const_string "multi")
+	 (eq_attr "move_type" "move") (const_string "move")
+	 (eq_attr "move_type" "const") (const_string "const")]
+	;; We classify "lui_movf" as "unknown" rather than "multi"
+	;; because we don't split it.  FIXME: we should split instead.
+	(const_string "unknown")))
+
+;; Mode for conversion types (fcvt)
+;; I2S          integer to float single (SI/DI to SF)
+;; I2D          integer to float double (SI/DI to DF)
+;; S2I          float to integer (SF to SI/DI)
+;; D2I          float to integer (DF to SI/DI)
+;; D2S          double to float single
+;; S2D          float single to double
+
+(define_attr "cnv_mode" "unknown,I2S,I2D,S2I,D2I,D2S,S2D" 
+  (const_string "unknown"))
+
+;; Is this an extended instruction in mips16 mode?
+(define_attr "extended_mips16" "no,yes"
+  (if_then_else (ior (eq_attr "move_type" "sll0")
+		     (eq_attr "type" "branch")
+		     (eq_attr "jal" "direct"))
+		(const_string "yes")
+		(const_string "no")))
+
+;; Attributes describing a sync loop.  These loops have the form:
+;;
+;;       if (RELEASE_BARRIER == YES) sync
+;;    1: OLDVAL = *MEM
+;;       if ((OLDVAL & INCLUSIVE_MASK) != REQUIRED_OLDVAL) goto 2
+;;       $TMP1 = OLDVAL & EXCLUSIVE_MASK
+;;       $TMP2 = INSN1 (OLDVAL, INSN1_OP2)
+;;       $TMP3 = INSN2 ($TMP2, INCLUSIVE_MASK)
+;;       $AT |= $TMP1 | $TMP3
+;;       if (!commit (*MEM = $AT)) goto 1.
+;;         if (INSN1 != MOVE && INSN1 != LI) NEWVAL = $TMP3 [delay slot]
+;;       sync
+;;    2:
+;;
+;; where "$" values are temporaries and where the other values are
+;; specified by the attributes below.  Values are specified as operand
+;; numbers and insns are specified as enums.  If no operand number is
+;; specified, the following values are used instead:
+;;
+;;    - OLDVAL: $AT
+;;    - NEWVAL: $AT
+;;    - INCLUSIVE_MASK: -1
+;;    - REQUIRED_OLDVAL: OLDVAL & INCLUSIVE_MASK
+;;    - EXCLUSIVE_MASK: 0
+;;
+;; MEM and INSN1_OP2 are required.
+;;
+;; Ideally, the operand attributes would be integers, with -1 meaning "none",
+;; but the gen* programs don't yet support that.
+(define_attr "sync_mem" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_oldval" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_newval" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_inclusive_mask" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_exclusive_mask" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_required_oldval" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_insn1_op2" "none,0,1,2,3,4,5" (const_string "none"))
+(define_attr "sync_insn1" "move,li,addu,addiu,subu,and,andi,or,ori,xor,xori"
+  (const_string "move"))
+(define_attr "sync_insn2" "nop,and,xor,not"
+  (const_string "nop"))
+(define_attr "sync_release_barrier" "yes,no"
+  (const_string "yes"))
+
+;; Length of instruction in bytes.
+(define_attr "length" ""
+   (cond [(and (eq_attr "extended_mips16" "yes")
+	       (ne (symbol_ref "TARGET_MIPS16") (const_int 0)))
+	  (const_int 8)
+
+	  ;; Direct branch instructions have a range of [-0x20000,0x1fffc],
+	  ;; relative to the address of the delay slot.  If a branch is
+	  ;; outside this range, we have a choice of two sequences.
+	  ;; For PIC, an out-of-range branch like:
+	  ;;
+	  ;;	bne	r1,r2,target
+	  ;;	dslot
+	  ;;
+	  ;; becomes the equivalent of:
+	  ;;
+	  ;;	beq	r1,r2,1f
+	  ;;	dslot
+	  ;;	la	$at,target
+	  ;;	jr	$at
+	  ;;	nop
+	  ;; 1:
+	  ;;
+	  ;; The non-PIC case is similar except that we use a direct
+	  ;; jump instead of an la/jr pair.  Since the target of this
+	  ;; jump is an absolute 28-bit bit address (the other bits
+	  ;; coming from the address of the delay slot) this form cannot
+	  ;; cross a 256MB boundary.  We could provide the option of
+	  ;; using la/jr in this case too, but we do not do so at
+	  ;; present.
+	  ;;
+	  ;; Note that this value does not account for the delay slot
+	  ;; instruction, whose length is added separately.  If the RTL
+	  ;; pattern has no explicit delay slot, mips_adjust_insn_length
+	  ;; will add the length of the implicit nop.  The values for
+	  ;; forward and backward branches will be different as well.
+	  (eq_attr "type" "branch")
+	  (cond [(and (le (minus (match_dup 0) (pc)) (const_int 131064))
+			  (le (minus (pc) (match_dup 0)) (const_int 131068)))
+		   (const_int 4)
+
+		 ;; The non-PIC case: branch, first delay slot, and J.
+		 (ne (symbol_ref "TARGET_ABSOLUTE_JUMPS") (const_int 0))
+		   (const_int 12)]
+
+		 ;; Use MAX_PIC_BRANCH_LENGTH as a (gross) overestimate.
+		 ;; mips_adjust_insn_length substitutes the correct length.
+		 ;;
+		 ;; Note that we can't simply use (symbol_ref ...) here
+		 ;; because genattrtab needs to know the maximum length
+		 ;; of an insn.
+		 (const_int MAX_PIC_BRANCH_LENGTH))
+
+	  ;; "Ghost" instructions occupy no space.
+	  (eq_attr "type" "ghost")
+	  (const_int 0)
+
+	  (eq_attr "got" "load")
+	  (if_then_else (ne (symbol_ref "TARGET_MIPS16") (const_int 0))
+			(const_int 8)
+			(const_int 4))
+	  (eq_attr "got" "xgot_high")
+	  (const_int 8)
+
+	  ;; In general, constant-pool loads are extended instructions.
+	  (eq_attr "move_type" "loadpool")
+	  (const_int 8)
+
+	  ;; LUI_MOVFs are decomposed into two separate instructions.
+	  (eq_attr "move_type" "lui_movf")
+	  (const_int 8)
+
+	  ;; SHIFT_SHIFTs are decomposed into two separate instructions.
+	  ;; They are extended instructions on MIPS16 targets.
+	  (eq_attr "move_type" "shift_shift")
+	  (if_then_else (ne (symbol_ref "TARGET_MIPS16") (const_int 0))
+			(const_int 16)
+			(const_int 8))
+
+	  ;; Check for doubleword moves that are decomposed into two
+	  ;; instructions.
+	  (and (eq_attr "move_type" "mtc,mfc,mthilo,mfhilo,move")
+	       (eq_attr "dword_mode" "yes"))
+	  (const_int 8)
+
+	  ;; Doubleword CONST{,N} moves are split into two word
+	  ;; CONST{,N} moves.
+	  (and (eq_attr "move_type" "const,constN")
+	       (eq_attr "dword_mode" "yes"))
+	  (symbol_ref "mips_split_const_insns (operands[1]) * 4")
+
+	  ;; Otherwise, constants, loads and stores are handled by external
+	  ;; routines.
+	  (eq_attr "move_type" "const,constN")
+	  (symbol_ref "mips_const_insns (operands[1]) * 4")
+	  (eq_attr "move_type" "load,fpload")
+	  (symbol_ref "mips_load_store_insns (operands[1], insn) * 4")
+	  (eq_attr "move_type" "store,fpstore")
+	  (symbol_ref "mips_load_store_insns (operands[0], insn) * 4")
+
+	  ;; In the worst case, a call macro will take 8 instructions:
+	  ;;
+	  ;;	 lui $25,%call_hi(FOO)
+	  ;;	 addu $25,$25,$28
+	  ;;     lw $25,%call_lo(FOO)($25)
+	  ;;	 nop
+	  ;;	 jalr $25
+	  ;;	 nop
+	  ;;	 lw $gp,X($sp)
+	  ;;	 nop
+	  (eq_attr "jal_macro" "yes")
+	  (const_int 32)
+
+	  ;; Various VR4120 errata require a nop to be inserted after a macc
+	  ;; instruction.  The assembler does this for us, so account for
+	  ;; the worst-case length here.
+	  (and (eq_attr "type" "imadd")
+	       (ne (symbol_ref "TARGET_FIX_VR4120") (const_int 0)))
+	  (const_int 8)
+
+	  ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
+	  ;; the result of the second one is missed.  The assembler should work
+	  ;; around this by inserting a nop after the first dmult.
+	  (and (eq_attr "type" "imul,imul3")
+	       (and (eq_attr "mode" "DI")
+		    (ne (symbol_ref "TARGET_FIX_VR4120") (const_int 0))))
+	  (const_int 8)
+
+	  (eq_attr "type" "idiv,idiv3")
+	  (symbol_ref "mips_idiv_insns () * 4")
+
+	  (not (eq_attr "sync_mem" "none"))
+	  (symbol_ref "mips_sync_loop_insns (insn, operands) * 4")
+	  ] (const_int 4)))
+
+;; Attribute describing the processor.
+(define_enum_attr "cpu" "processor"
+  (const (symbol_ref "mips_tune")))
+
+;; The type of hardware hazard associated with this instruction.
+;; DELAY means that the next instruction cannot read the result
+;; of this one.  HILO means that the next two instructions cannot
+;; write to HI or LO.
+(define_attr "hazard" "none,delay,hilo"
+  (cond [(and (eq_attr "type" "load,fpload,fpidxload")
+	      (ne (symbol_ref "ISA_HAS_LOAD_DELAY") (const_int 0)))
+	 (const_string "delay")
+
+	 (and (eq_attr "type" "mfc,mtc")
+	      (ne (symbol_ref "ISA_HAS_XFER_DELAY") (const_int 0)))
+	 (const_string "delay")
+
+	 (and (eq_attr "type" "fcmp")
+	      (ne (symbol_ref "ISA_HAS_FCMP_DELAY") (const_int 0)))
+	 (const_string "delay")
+
+	 ;; The r4000 multiplication patterns include an mflo instruction.
+	 (and (eq_attr "type" "imul")
+	      (ne (symbol_ref "TARGET_FIX_R4000") (const_int 0)))
+	 (const_string "hilo")
+
+	 (and (eq_attr "type" "mfhilo")
+	      (eq (symbol_ref "ISA_HAS_HILO_INTERLOCKS") (const_int 0)))
+	 (const_string "hilo")]
+	(const_string "none")))
+
+;; Is it a single instruction?
+(define_attr "single_insn" "no,yes"
+  (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4)
+		? SINGLE_INSN_YES : SINGLE_INSN_NO)"))
+
+;; Can the instruction be put into a delay slot?
+(define_attr "can_delay" "no,yes"
+  (if_then_else (and (eq_attr "type" "!branch,call,jump")
+		     (and (eq_attr "hazard" "none")
+			  (eq_attr "single_insn" "yes")))
+		(const_string "yes")
+		(const_string "no")))
+
+;; Attribute defining whether or not we can use the branch-likely
+;; instructions.
+(define_attr "branch_likely" "no,yes"
+  (if_then_else (ne (symbol_ref "GENERATE_BRANCHLIKELY") (const_int 0))
+		(const_string "yes")
+		(const_string "no")))
+
+;; True if an instruction might assign to hi or lo when reloaded.
+;; This is used by the TUNE_MACC_CHAINS code.
+(define_attr "may_clobber_hilo" "no,yes"
+  (if_then_else (eq_attr "type" "imul,imul3,imadd,idiv,mthilo")
+		(const_string "yes")
+		(const_string "no")))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")
+   (set_attr "can_delay" "no")])
+
+;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated
+;; from the same template.
+(define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
+
+;; A copy of GPR that can be used when a pattern has two independent
+;; modes.
+(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
+
+;; This mode iterator allows :HILO to be used as the mode of the
+;; concatenated HI and LO registers.
+(define_mode_iterator HILO [(DI "!TARGET_64BIT") (TI "TARGET_64BIT")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; This mode iterator allows :MOVECC to be used anywhere that a
+;; conditional-move-type condition is needed.
+(define_mode_iterator MOVECC [SI (DI "TARGET_64BIT")
+                              (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")])
+
+;; 32-bit integer moves for which we provide move patterns.
+(define_mode_iterator IMOVE32
+  [SI
+   (V2HI "TARGET_DSP")
+   (V4QI "TARGET_DSP")
+   (V2HQ "TARGET_DSP")
+   (V2UHQ "TARGET_DSP")
+   (V2HA "TARGET_DSP")
+   (V2UHA "TARGET_DSP")
+   (V4QQ "TARGET_DSP")
+   (V4UQQ "TARGET_DSP")])
+
+;; 64-bit modes for which we provide move patterns.
+(define_mode_iterator MOVE64
+  [DI DF
+   (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT")
+   (V2SI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")
+   (V4HI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")
+   (V8QI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")])
+
+;; 128-bit modes for which we provide move patterns on 64-bit targets.
+(define_mode_iterator MOVE128 [TI TF])
+
+;; This mode iterator allows the QI and HI extension patterns to be
+;; defined from the same template.
+(define_mode_iterator SHORT [QI HI])
+
+;; Likewise the 64-bit truncate-and-shift patterns.
+(define_mode_iterator SUBDI [QI HI SI])
+
+;; This mode iterator allows :ANYF to be used wherever a scalar or vector
+;; floating-point mode is allowed.
+(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
+			    (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")
+			    (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT")])
+
+;; Like ANYF, but only applies to scalar modes.
+(define_mode_iterator SCALARF [(SF "TARGET_HARD_FLOAT")
+			       (DF "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT")])
+
+;; A floating-point mode for which moves involving FPRs may need to be split.
+(define_mode_iterator SPLITF
+  [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+   (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+   (V2SF "!TARGET_64BIT && TARGET_PAIRED_SINGLE_FLOAT")
+   (V2SI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS")
+   (V4HI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS")
+   (V8QI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS")
+   (TF "TARGET_64BIT && TARGET_FLOAT64")])
+
+;; In GPR templates, a string like "<d>subu" will expand to "subu" in the
+;; 32-bit version and "dsubu" in the 64-bit version.
+(define_mode_attr d [(SI "") (DI "d")
+		     (QQ "") (HQ "") (SQ "") (DQ "d")
+		     (UQQ "") (UHQ "") (USQ "") (UDQ "d")
+		     (HA "") (SA "") (DA "d")
+		     (UHA "") (USA "") (UDA "d")])
+
+;; Same as d but upper-case.
+(define_mode_attr D [(SI "") (DI "D")
+		     (QQ "") (HQ "") (SQ "") (DQ "D")
+		     (UQQ "") (UHQ "") (USQ "") (UDQ "D")
+		     (HA "") (SA "") (DA "D")
+		     (UHA "") (USA "") (UDA "D")])
+
+;; This attribute gives the length suffix for a sign- or zero-extension
+;; instruction.
+(define_mode_attr size [(QI "b") (HI "h")])
+
+;; This attributes gives the mode mask of a SHORT.
+(define_mode_attr mask [(QI "0x00ff") (HI "0xffff")])
+
+;; Mode attributes for GPR loads.
+(define_mode_attr load [(SI "lw") (DI "ld")])
+;; Instruction names for stores.
+(define_mode_attr store [(QI "sb") (HI "sh") (SI "sw") (DI "sd")])
+
+;; Similarly for MIPS IV indexed FPR loads and stores.
+(define_mode_attr loadx [(SF "lwxc1") (DF "ldxc1") (V2SF "ldxc1")])
+(define_mode_attr storex [(SF "swxc1") (DF "sdxc1") (V2SF "sdxc1")])
+
+;; The unextended ranges of the MIPS16 addiu and daddiu instructions
+;; are different.  Some forms of unextended addiu have an 8-bit immediate
+;; field but the equivalent daddiu has only a 5-bit field.
+(define_mode_attr si8_di5 [(SI "8") (DI "5")])
+
+;; This attribute gives the best constraint to use for registers of
+;; a given mode.
+(define_mode_attr reg [(SI "d") (DI "d") (CC "z")])
+
+;; This attribute gives the format suffix for floating-point operations.
+(define_mode_attr fmt [(SF "s") (DF "d") (V2SF "ps")])
+
+;; This attribute gives the upper-case mode name for one unit of a
+;; floating-point mode.
+(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF")])
+
+;; This attribute gives the integer mode that has the same size as a
+;; fixed-point mode.
+(define_mode_attr IMODE [(QQ "QI") (HQ "HI") (SQ "SI") (DQ "DI")
+			 (UQQ "QI") (UHQ "HI") (USQ "SI") (UDQ "DI")
+			 (HA "HI") (SA "SI") (DA "DI")
+			 (UHA "HI") (USA "SI") (UDA "DI")
+			 (V4UQQ "SI") (V2UHQ "SI") (V2UHA "SI")
+			 (V2HQ "SI") (V2HA "SI")])
+
+;; This attribute gives the integer mode that has half the size of
+;; the controlling mode.
+(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI")
+			    (V2SI "SI") (V4HI "SI") (V8QI "SI")
+			    (TF "DI")])
+
+;; This attribute works around the early SB-1 rev2 core "F2" erratum:
+;;
+;; In certain cases, div.s and div.ps may have a rounding error
+;; and/or wrong inexact flag.
+;;
+;; Therefore, we only allow div.s if not working around SB-1 rev2
+;; errata or if a slight loss of precision is OK.
+(define_mode_attr divide_condition
+  [DF (SF "!TARGET_FIX_SB1 || flag_unsafe_math_optimizations")
+   (V2SF "TARGET_SB1 && (!TARGET_FIX_SB1 || flag_unsafe_math_optimizations)")])
+
+;; This attribute gives the conditions under which SQRT.fmt instructions
+;; can be used.
+(define_mode_attr sqrt_condition
+  [(SF "!ISA_MIPS1") (DF "!ISA_MIPS1") (V2SF "TARGET_SB1")])
+
+;; This attribute gives the conditions under which RECIP.fmt and RSQRT.fmt
+;; instructions can be used.  The MIPS32 and MIPS64 ISAs say that RECIP.D
+;; and RSQRT.D are unpredictable when doubles are stored in pairs of FPRs,
+;; so for safety's sake, we apply this restriction to all targets.
+(define_mode_attr recip_condition
+  [(SF "ISA_HAS_FP4")
+   (DF "ISA_HAS_FP4 && TARGET_FLOAT64")
+   (V2SF "TARGET_SB1")])
+
+;; This code iterator allows signed and unsigned widening multiplications
+;; to use the same template.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; This code iterator allows the two right shift instructions to be
+;; generated from the same template.
+(define_code_iterator any_shiftrt [ashiftrt lshiftrt])
+
+;; This code iterator allows the three shift instructions to be generated
+;; from the same template.
+(define_code_iterator any_shift [ashift ashiftrt lshiftrt])
+
+;; This code iterator allows unsigned and signed division to be generated
+;; from the same template.
+(define_code_iterator any_div [div udiv])
+
+;; This code iterator allows unsigned and signed modulus to be generated
+;; from the same template.
+(define_code_iterator any_mod [mod umod])
+
+;; This code iterator allows all native floating-point comparisons to be
+;; generated from the same template.
+(define_code_iterator fcond [unordered uneq unlt unle eq lt le])
+
+;; This code iterator is used for comparisons that can be implemented
+;; by swapping the operands.
+(define_code_iterator swapped_fcond [ge gt unge ungt])
+
+;; Equality operators.
+(define_code_iterator equality_op [eq ne])
+
+;; These code iterators allow the signed and unsigned scc operations to use
+;; the same template.
+(define_code_iterator any_gt [gt gtu])
+(define_code_iterator any_ge [ge geu])
+(define_code_iterator any_lt [lt ltu])
+(define_code_iterator any_le [le leu])
+
+;; <u> expands to an empty string when doing a signed operation and
+;; "u" when doing an unsigned operation.
+(define_code_attr u [(sign_extend "") (zero_extend "u")
+		     (div "") (udiv "u")
+		     (mod "") (umod "u")
+		     (gt "") (gtu "u")
+		     (ge "") (geu "u")
+		     (lt "") (ltu "u")
+		     (le "") (leu "u")])
+
+;; <su> is like <u>, but the signed form expands to "s" rather than "".
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; <optab> expands to the name of the optab for a particular code.
+(define_code_attr optab [(ashift "ashl")
+			 (ashiftrt "ashr")
+			 (lshiftrt "lshr")
+			 (ior "ior")
+			 (xor "xor")
+			 (and "and")
+			 (plus "add")
+			 (minus "sub")])
+
+;; <insn> expands to the name of the insn that implements a particular code.
+(define_code_attr insn [(ashift "sll")
+			(ashiftrt "sra")
+			(lshiftrt "srl")
+			(ior "or")
+			(xor "xor")
+			(and "and")
+			(plus "addu")
+			(minus "subu")])
+
+;; <immediate_insn> expands to the name of the insn that implements
+;; a particular code to operate on immediate values.
+(define_code_attr immediate_insn [(ior "ori")
+				  (xor "xori")
+				  (and "andi")])
+
+;; <fcond> is the c.cond.fmt condition associated with a particular code.
+(define_code_attr fcond [(unordered "un")
+			 (uneq "ueq")
+			 (unlt "ult")
+			 (unle "ule")
+			 (eq "eq")
+			 (lt "lt")
+			 (le "le")])
+
+;; Similar, but for swapped conditions.
+(define_code_attr swapped_fcond [(ge "le")
+				 (gt "lt")
+				 (unge "ule")
+				 (ungt "ult")])
+
+;; The value of the bit when the branch is taken for branch_bit patterns.
+;; Comparison is always against zero so this depends on the operator.
+(define_code_attr bbv [(eq "0") (ne "1")])
+
+;; This is the inverse value of bbv.
+(define_code_attr bbinv [(eq "1") (ne "0")])
+
+;; .........................
+;;
+;;	Branch, call and jump delay slots
+;;
+;; .........................
+
+(define_delay (and (eq_attr "type" "branch")
+		   (eq (symbol_ref "TARGET_MIPS16") (const_int 0))
+		   (eq_attr "branch_likely" "yes"))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (eq_attr "can_delay" "yes")])
+
+;; Branches that don't have likely variants do not annul on false.
+(define_delay (and (eq_attr "type" "branch")
+		   (eq (symbol_ref "TARGET_MIPS16") (const_int 0))
+		   (eq_attr "branch_likely" "no"))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+(define_delay (eq_attr "type" "jump")
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+(define_delay (and (eq_attr "type" "call")
+		   (eq_attr "jal_macro" "no"))
+  [(eq_attr "can_delay" "yes")
+   (nil)
+   (nil)])
+
+;; Pipeline descriptions.
+;;
+;; generic.md provides a fallback for processors without a specific
+;; pipeline description.  It is derived from the old define_function_unit
+;; version and uses the "alu" and "imuldiv" units declared below.
+;;
+;; Some of the processor-specific files are also derived from old
+;; define_function_unit descriptions and simply override the parts of
+;; generic.md that don't apply.  The other processor-specific files
+;; are self-contained.
+(define_automaton "alu,imuldiv")
+
+(define_cpu_unit "alu" "alu")
+(define_cpu_unit "imuldiv" "imuldiv")
+
+;; Ghost instructions produce no real code and introduce no hazards.
+;; They exist purely to express an effect on dataflow.
+(define_insn_reservation "ghost" 0
+  (eq_attr "type" "ghost")
+  "nothing")
+
+(include "4k.md")
+(include "5k.md")
+(include "20kc.md")
+(include "24k.md")
+(include "74k.md")
+(include "3000.md")
+(include "4000.md")
+(include "4100.md")
+(include "4130.md")
+(include "4300.md")
+(include "4600.md")
+(include "5000.md")
+(include "5400.md")
+(include "5500.md")
+(include "6000.md")
+(include "7000.md")
+(include "9000.md")
+(include "10000.md")
+(include "loongson2ef.md")
+(include "loongson3a.md")
+(include "octeon.md")
+(include "sb1.md")
+(include "sr71k.md")
+(include "xlr.md")
+(include "generic.md")
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL TRAPS
+;;
+;;  ....................
+;;
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+{
+  if (ISA_HAS_COND_TRAP)
+    return "teq\t$0,$0";
+  else if (TARGET_MIPS16)
+    return "break 0";
+  else
+    return "break";
+}
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "comparison_operator"
+			    [(match_operand:GPR 1 "reg_or_0_operand")
+			     (match_operand:GPR 2 "arith_operand")])
+	    (match_operand 3 "const_0_operand"))]
+  "ISA_HAS_COND_TRAP"
+{
+  mips_expand_conditional_trap (operands[0]);
+  DONE;
+})
+
+(define_insn "*conditional_trap<mode>"
+  [(trap_if (match_operator:GPR 0 "trap_comparison_operator"
+				[(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+				 (match_operand:GPR 2 "arith_operand" "dI")])
+	    (const_int 0))]
+  "ISA_HAS_COND_TRAP"
+  "t%C0\t%z1,%2"
+  [(set_attr "type" "trap")])
+
+;;
+;;  ....................
+;;
+;;	ADDITION
+;;
+;;  ....................
+;;
+
+(define_insn "add<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "add.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(plus:GPR (match_operand:GPR 1 "register_operand")
+		  (match_operand:GPR 2 "arith_operand")))]
+  "")
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "d,d")
+		  (match_operand:GPR 2 "arith_operand" "d,Q")))]
+  "!TARGET_MIPS16"
+  "@
+    <d>addu\t%0,%1,%2
+    <d>addiu\t%0,%1,%2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=ks,d,d,d,d")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "ks,ks,0,d,d")
+		  (match_operand:GPR 2 "arith_operand" "Q,Q,Q,O,d")))]
+  "TARGET_MIPS16"
+  "@
+    <d>addiu\t%0,%2
+    <d>addiu\t%0,%1,%2
+    <d>addiu\t%0,%2
+    <d>addiu\t%0,%1,%2
+    <d>addu\t%0,%1,%2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")
+   (set_attr_alternative "length"
+		[(if_then_else (match_operand 2 "m16_simm8_8")
+			       (const_int 4)
+			       (const_int 8))
+		 (if_then_else (match_operand 2 "m16_uimm<si8_di5>_4")
+			       (const_int 4)
+			       (const_int 8))
+		 (if_then_else (match_operand 2 "m16_simm<si8_di5>_1")
+			       (const_int 4)
+			       (const_int 8))
+		 (if_then_else (match_operand 2 "m16_simm4_1")
+			       (const_int 4)
+			       (const_int 8))
+		 (const_int 4)])])
+
+;; On the mips16, we can sometimes split an add of a constant which is
+;; a 4 byte instruction into two adds which are both 2 byte
+;; instructions.  There are two cases: one where we are adding a
+;; constant plus a register to another register, and one where we are
+;; simply adding a constant to a register.
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 1 "const_int_operand")))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) > 0x7f
+	&& INTVAL (operands[1]) <= 0x7f + 0x7f)
+       || (INTVAL (operands[1]) < - 0x80
+	   && INTVAL (operands[1]) >= - 0x80 - 0x80))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val >= 0)
+    {
+      operands[1] = GEN_INT (0x7f);
+      operands[2] = GEN_INT (val - 0x7f);
+    }
+  else
+    {
+      operands[1] = GEN_INT (- 0x80);
+      operands[2] = GEN_INT (val + 0x80);
+    }
+})
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(plus:SI (match_operand:SI 1 "d_operand")
+		 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && ((INTVAL (operands[2]) > 0x7
+	&& INTVAL (operands[2]) <= 0x7 + 0x7f)
+       || (INTVAL (operands[2]) < - 0x8
+	   && INTVAL (operands[2]) >= - 0x8 - 0x80))"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+
+  if (val >= 0)
+    {
+      operands[2] = GEN_INT (0x7);
+      operands[3] = GEN_INT (val - 0x7);
+    }
+  else
+    {
+      operands[2] = GEN_INT (- 0x8);
+      operands[3] = GEN_INT (val + 0x8);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(plus:DI (match_dup 0)
+		 (match_operand:DI 1 "const_int_operand")))]
+  "TARGET_MIPS16 && TARGET_64BIT && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) > 0xf
+	&& INTVAL (operands[1]) <= 0xf + 0xf)
+       || (INTVAL (operands[1]) < - 0x10
+	   && INTVAL (operands[1]) >= - 0x10 - 0x10))"
+  [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val >= 0)
+    {
+      operands[1] = GEN_INT (0xf);
+      operands[2] = GEN_INT (val - 0xf);
+    }
+  else
+    {
+      operands[1] = GEN_INT (- 0x10);
+      operands[2] = GEN_INT (val + 0x10);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(plus:DI (match_operand:DI 1 "d_operand")
+		 (match_operand:DI 2 "const_int_operand")))]
+  "TARGET_MIPS16 && TARGET_64BIT && reload_completed && !TARGET_DEBUG_D_MODE
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && ((INTVAL (operands[2]) > 0x7
+	&& INTVAL (operands[2]) <= 0x7 + 0xf)
+       || (INTVAL (operands[2]) < - 0x8
+	   && INTVAL (operands[2]) >= - 0x8 - 0x10))"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+
+  if (val >= 0)
+    {
+      operands[2] = GEN_INT (0x7);
+      operands[3] = GEN_INT (val - 0x7);
+    }
+  else
+    {
+      operands[2] = GEN_INT (- 0x8);
+      operands[3] = GEN_INT (val + 0x8);
+    }
+})
+
+(define_insn "*addsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(sign_extend:DI
+	     (plus:SI (match_operand:SI 1 "register_operand" "d,d")
+		      (match_operand:SI 2 "arith_operand" "d,Q"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "@
+    addu\t%0,%1,%2
+    addiu\t%0,%1,%2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "SI")])
+
+;; Split this insn so that the addiu splitters can have a crack at it.
+;; Use a conservative length estimate until the split.
+(define_insn_and_split "*addsi3_extended_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+	(sign_extend:DI
+	     (plus:SI (match_operand:SI 1 "register_operand" "0,d,d")
+		      (match_operand:SI 2 "arith_operand" "Q,O,d"))))]
+  "TARGET_64BIT && TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2)))]
+  { operands[3] = gen_lowpart (SImode, operands[0]); }
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "SI")
+   (set_attr "extended_mips16" "yes")])
+
+;; Combiner patterns for unsigned byte-add.
+
+(define_insn "*baddu_si_eb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI
+	 (subreg:QI
+	  (plus:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "register_operand" "d")) 3)))]
+  "ISA_HAS_BADDU && BYTES_BIG_ENDIAN"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+(define_insn "*baddu_si_el"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI
+	 (subreg:QI
+	  (plus:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "register_operand" "d")) 0)))]
+  "ISA_HAS_BADDU && !BYTES_BIG_ENDIAN"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+(define_insn "*baddu_di<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR
+	 (truncate:QI
+	  (plus:DI (match_operand:DI 1 "register_operand" "d")
+		   (match_operand:DI 2 "register_operand" "d")))))]
+  "ISA_HAS_BADDU && TARGET_64BIT"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+;;
+;;  ....................
+;;
+;;	SUBTRACTION
+;;
+;;  ....................
+;;
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		    (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "sub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(minus:GPR (match_operand:GPR 1 "register_operand" "d")
+		   (match_operand:GPR 2 "register_operand" "d")))]
+  ""
+  "<d>subu\t%0,%1,%2"
+  [(set_attr "alu_type" "sub")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	    (minus:SI (match_operand:SI 1 "register_operand" "d")
+		      (match_operand:SI 2 "register_operand" "d"))))]
+  "TARGET_64BIT"
+  "subu\t%0,%1,%2"
+  [(set_attr "alu_type" "sub")
+   (set_attr "mode" "DI")])
+
+;;
+;;  ....................
+;;
+;;	MULTIPLICATION
+;;
+;;  ....................
+;;
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:SCALARF 0 "register_operand")
+	(mult:SCALARF (match_operand:SCALARF 1 "register_operand")
+		      (match_operand:SCALARF 2 "register_operand")))]
+  ""
+  "")
+
+(define_insn "*mul<mode>3"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+	(mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+		      (match_operand:SCALARF 2 "register_operand" "f")))]
+  "!TARGET_4300_MUL_FIX"
+  "mul.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "<MODE>")])
+
+;; Early VR4300 silicon has a CPU bug where multiplies with certain
+;; operands may corrupt immediately following multiplies. This is a
+;; simple fix to insert NOPs.
+
+(define_insn "*mul<mode>3_r4300"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+	(mult:SCALARF (match_operand:SCALARF 1 "register_operand" "f")
+		      (match_operand:SCALARF 2 "register_operand" "f")))]
+  "TARGET_4300_MUL_FIX"
+  "mul.<fmt>\t%0,%1,%2\;nop"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "8")])
+
+(define_insn "mulv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 1 "register_operand" "f")
+		   (match_operand:V2SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+  "mul.ps\t%0,%1,%2"
+  [(set_attr "type" "fmul")
+   (set_attr "mode" "SF")])
+
+;; The original R4000 has a cpu bug.  If a double-word or a variable
+;; shift executes while an integer multiplication is in progress, the
+;; shift may give an incorrect result.  Avoid this by keeping the mflo
+;; with the mult on the R4000.
+;;
+;; From "MIPS R4000PC/SC Errata, Processor Revision 2.2 and 3.0"
+;; (also valid for MIPS R4000MC processors):
+;;
+;; "16. R4000PC, R4000SC: Please refer to errata 28 for an update to
+;;	this errata description.
+;;	The following code sequence causes the R4000 to incorrectly
+;;	execute the Double Shift Right Arithmetic 32 (dsra32)
+;;	instruction.  If the dsra32 instruction is executed during an
+;;	integer multiply, the dsra32 will only shift by the amount in
+;;	specified in the instruction rather than the amount plus 32
+;;	bits.
+;;	instruction 1:		mult	rs,rt		integer multiply
+;;	instruction 2-12:	dsra32	rd,rt,rs	doubleword shift
+;;							right arithmetic + 32
+;;	Workaround: A dsra32 instruction placed after an integer
+;;	multiply should not be one of the 11 instructions after the
+;;	multiply instruction."
+;;
+;; and:
+;;
+;; "28. R4000PC, R4000SC: The text from errata 16 should be replaced by
+;;	the following description.
+;;	All extended shifts (shift by n+32) and variable shifts (32 and
+;;	64-bit versions) may produce incorrect results under the
+;;	following conditions:
+;;	1) An integer multiply is currently executing
+;;	2) These types of shift instructions are executed immediately
+;;	   following an integer divide instruction.
+;;	Workaround:
+;;	1) Make sure no integer multiply is running wihen these
+;;	   instruction are executed.  If this cannot be predicted at
+;;	   compile time, then insert a "mfhi" to R0 instruction
+;;	   immediately after the integer multiply instruction.  This
+;;	   will cause the integer multiply to complete before the shift
+;;	   is executed.
+;;	2) Separate integer divide and these two classes of shift
+;;	   instructions by another instruction or a noop."
+;;
+;; These processors have PRId values of 0x00004220 and 0x00004300,
+;; respectively.
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(mult:GPR (match_operand:GPR 1 "register_operand")
+		  (match_operand:GPR 2 "register_operand")))]
+  ""
+{
+  if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A)
+    emit_insn (gen_mul<mode>3_mul3_loongson (operands[0], operands[1],
+                                             operands[2]));
+  else if (ISA_HAS_<D>MUL3)
+    emit_insn (gen_mul<mode>3_mul3 (operands[0], operands[1], operands[2]));
+  else if (TARGET_FIX_R4000)
+    emit_insn (gen_mul<mode>3_r4000 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn
+      (gen_mul<mode>3_internal (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mul<mode>3_mul3_loongson"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (mult:GPR (match_operand:GPR 1 "register_operand" "d")
+                  (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A"
+{
+  if (TARGET_LOONGSON_2EF)
+    return "<d>multu.g\t%0,%1,%2";
+  else
+    return "gs<d>multu\t%0,%1,%2";
+}
+  [(set_attr "type" "imul3nc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3_mul3"
+  [(set (match_operand:GPR 0 "register_operand" "=d,l")
+	(mult:GPR (match_operand:GPR 1 "register_operand" "d,d")
+		  (match_operand:GPR 2 "register_operand" "d,d")))
+   (clobber (match_scratch:GPR 3 "=l,X"))]
+  "ISA_HAS_<D>MUL3"
+{
+  if (which_alternative == 1)
+    return "<d>mult\t%1,%2";
+  if (<MODE>mode == SImode && TARGET_MIPS3900)
+    return "mult\t%0,%1,%2";
+  return "<d>mul\t%0,%1,%2";
+}
+  [(set_attr "type" "imul3,imul")
+   (set_attr "mode" "<MODE>")])
+
+;; If a register gets allocated to LO, and we spill to memory, the reload
+;; will include a move from LO to a GPR.  Merge it into the multiplication
+;; if it can set the GPR directly.
+;;
+;; Operand 0: LO
+;; Operand 1: GPR (1st multiplication operand)
+;; Operand 2: GPR (2nd multiplication operand)
+;; Operand 3: GPR (destination)
+(define_peephole2
+  [(parallel
+       [(set (match_operand:SI 0 "lo_operand")
+	     (mult:SI (match_operand:SI 1 "d_operand")
+		      (match_operand:SI 2 "d_operand")))
+        (clobber (scratch:SI))])
+   (set (match_operand:SI 3 "d_operand")
+	(match_dup 0))]
+  "ISA_HAS_MUL3 && peep2_reg_dead_p (2, operands[0])"
+  [(parallel
+       [(set (match_dup 3)
+	     (mult:SI (match_dup 1)
+		      (match_dup 2)))
+        (clobber (match_dup 0))])])
+
+(define_insn "mul<mode>3_internal"
+  [(set (match_operand:GPR 0 "register_operand" "=l")
+	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))]
+  "!TARGET_FIX_R4000"
+  "<d>mult\t%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3_r4000"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(mult:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))
+   (clobber (match_scratch:GPR 3 "=l"))]
+  "TARGET_FIX_R4000"
+  "<d>mult\t%1,%2\;mflo\t%0"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "8")])
+
+;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead
+;; of "mult; mflo".  They have the same latency, but the first form gives
+;; us an extra cycle to compute the operands.
+
+;; Operand 0: LO
+;; Operand 1: GPR (1st multiplication operand)
+;; Operand 2: GPR (2nd multiplication operand)
+;; Operand 3: GPR (destination)
+(define_peephole2
+  [(set (match_operand:SI 0 "lo_operand")
+	(mult:SI (match_operand:SI 1 "d_operand")
+		 (match_operand:SI 2 "d_operand")))
+   (set (match_operand:SI 3 "d_operand")
+	(match_dup 0))]
+  "ISA_HAS_MACC && !ISA_HAS_MUL3"
+  [(set (match_dup 0)
+	(const_int 0))
+   (parallel
+       [(set (match_dup 0)
+	     (plus:SI (mult:SI (match_dup 1)
+			       (match_dup 2))
+		      (match_dup 0)))
+	(set (match_dup 3)
+	     (plus:SI (mult:SI (match_dup 1)
+			       (match_dup 2))
+		      (match_dup 0)))])])
+
+;; Multiply-accumulate patterns
+
+;; This pattern is first matched by combine, which tries to use the
+;; pattern wherever it can.  We don't know until later whether it
+;; is actually profitable to use MADD over a "MUL; ADDIU" sequence,
+;; so we need to keep both options open.
+;;
+;; The second alternative has a "?" marker because it is generally
+;; one instruction more costly than the first alternative.  This "?"
+;; marker is enough to convey the relative costs to the register
+;; allocator.
+;;
+;; However, reload counts reloads of operands 4 and 5 in the same way as
+;; reloads of the other operands, even though operands 4 and 5 need no
+;; copy instructions.  Reload therefore thinks that the second alternative
+;; is two reloads more costly than the first.  We add "*?*?" to the first
+;; alternative as a counterweight.
+(define_insn "*mul_acc_si"
+  [(set (match_operand:SI 0 "register_operand" "=l*?*?,d?")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d")
+			  (match_operand:SI 2 "register_operand" "d,d"))
+		 (match_operand:SI 3 "register_operand" "0,d")))
+   (clobber (match_scratch:SI 4 "=X,l"))
+   (clobber (match_scratch:SI 5 "=X,&d"))]
+  "GENERATE_MADD_MSUB && !TARGET_MIPS16"
+  "@
+    madd\t%1,%2
+    #"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,8")])
+
+;; The same idea applies here.  The middle alternative needs one less
+;; clobber than the final alternative, so we add "*?" as a counterweight.
+(define_insn "*mul_acc_si_r3900"
+  [(set (match_operand:SI 0 "register_operand" "=l*?*?,d*?,d?")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d,d")
+			  (match_operand:SI 2 "register_operand" "d,d,d"))
+		 (match_operand:SI 3 "register_operand" "0,l,d")))
+   (clobber (match_scratch:SI 4 "=X,3,l"))
+   (clobber (match_scratch:SI 5 "=X,X,&d"))]
+  "TARGET_MIPS3900 && !TARGET_MIPS16"
+  "@
+    madd\t%1,%2
+    madd\t%0,%1,%2
+    #"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"4,4,8")])
+
+;; Split *mul_acc_si if both the source and destination accumulator
+;; values are GPRs.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(plus:SI (mult:SI (match_operand:SI 1 "d_operand")
+			  (match_operand:SI 2 "d_operand"))
+		 (match_operand:SI 3 "d_operand")))
+   (clobber (match_operand:SI 4 "lo_operand"))
+   (clobber (match_operand:SI 5 "d_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5)
+		   (mult:SI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0) (plus:SI (match_dup 5) (match_dup 3)))]
+  "")
+
+(define_insn "*macc"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d")
+			  (match_operand:SI 2 "register_operand" "d,d"))
+		 (match_operand:SI 3 "register_operand" "0,l")))
+   (clobber (match_scratch:SI 4 "=X,3"))]
+  "ISA_HAS_MACC"
+{
+  if (which_alternative == 1)
+    return "macc\t%0,%1,%2";
+  else if (TARGET_MIPS5500)
+    return "madd\t%1,%2";
+  else
+    /* The VR4130 assumes that there is a two-cycle latency between a macc
+       that "writes" to $0 and an instruction that reads from it.  We avoid
+       this by assigning to $1 instead.  */
+    return "%[macc\t%@,%1,%2%]";
+}
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
+
+(define_insn "*msac"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,l")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d")
+                           (match_operand:SI 3 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 4 "=X,1"))]
+  "ISA_HAS_MSAC"
+{
+  if (which_alternative == 1)
+    return "msac\t%0,%2,%3";
+  else if (TARGET_MIPS5500)
+    return "msub\t%2,%3";
+  else
+    return "msac\t$0,%2,%3";
+}
+  [(set_attr "type"     "imadd")
+   (set_attr "mode"     "SI")])
+
+;; An msac-like instruction implemented using negation and a macc.
+(define_insn_and_split "*msac_using_macc"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,l")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d")
+                           (match_operand:SI 3 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 4 "=X,1"))
+   (clobber (match_scratch:SI 5 "=d,d"))]
+  "ISA_HAS_MACC && !ISA_HAS_MSAC"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 5)
+	(neg:SI (match_dup 3)))
+   (parallel
+       [(set (match_dup 0)
+	     (plus:SI (mult:SI (match_dup 2)
+			       (match_dup 5))
+		      (match_dup 1)))
+	(clobber (match_dup 4))])]
+  ""
+  [(set_attr "type"     "imadd")
+   (set_attr "length"	"8")])
+
+;; Patterns generated by the define_peephole2 below.
+
+(define_insn "*macc2"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d")
+			  (match_operand:SI 2 "register_operand" "d"))
+		 (match_dup 0)))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 0)))]
+  "ISA_HAS_MACC && reload_completed"
+  "macc\t%3,%1,%2"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+(define_insn "*msac2"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+	(minus:SI (match_dup 0)
+		  (mult:SI (match_operand:SI 1 "register_operand" "d")
+			   (match_operand:SI 2 "register_operand" "d"))))
+   (set (match_operand:SI 3 "register_operand" "=d")
+	(minus:SI (match_dup 0)
+		  (mult:SI (match_dup 1)
+			   (match_dup 2))))]
+  "ISA_HAS_MSAC && reload_completed"
+  "msac\t%3,%1,%2"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; Convert macc $0,<r1>,<r2> & mflo <r3> into macc <r3>,<r1>,<r2>
+;; Similarly msac.
+;;
+;; Operand 0: LO
+;; Operand 1: macc/msac
+;; Operand 2: GPR (destination)
+(define_peephole2
+  [(parallel
+       [(set (match_operand:SI 0 "lo_operand")
+	     (match_operand:SI 1 "macc_msac_operand"))
+	(clobber (scratch:SI))])
+   (set (match_operand:SI 2 "d_operand")
+	(match_dup 0))]
+  ""
+  [(parallel [(set (match_dup 0)
+		   (match_dup 1))
+	      (set (match_dup 2)
+		   (match_dup 1))])])
+
+;; When we have a three-address multiplication instruction, it should
+;; be faster to do a separate multiply and add, rather than moving
+;; something into LO in order to use a macc instruction.
+;;
+;; This peephole needs a scratch register to cater for the case when one
+;; of the multiplication operands is the same as the destination.
+;;
+;; Operand 0: GPR (scratch)
+;; Operand 1: LO
+;; Operand 2: GPR (addend)
+;; Operand 3: GPR (destination)
+;; Operand 4: macc/msac
+;; Operand 5: new multiplication
+;; Operand 6: new addition/subtraction
+(define_peephole2
+  [(match_scratch:SI 0 "d")
+   (set (match_operand:SI 1 "lo_operand")
+	(match_operand:SI 2 "d_operand"))
+   (match_dup 0)
+   (parallel
+       [(set (match_operand:SI 3 "d_operand")
+	     (match_operand:SI 4 "macc_msac_operand"))
+	(clobber (match_dup 1))])]
+  "ISA_HAS_MUL3 && peep2_reg_dead_p (2, operands[1])"
+  [(parallel [(set (match_dup 0)
+		   (match_dup 5))
+	      (clobber (match_dup 1))])
+   (set (match_dup 3)
+	(match_dup 6))]
+{
+  operands[5] = XEXP (operands[4], GET_CODE (operands[4]) == PLUS ? 0 : 1);
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[4]), SImode,
+				operands[2], operands[0]);
+})
+
+;; Same as above, except LO is the initial target of the macc.
+;;
+;; Operand 0: GPR (scratch)
+;; Operand 1: LO
+;; Operand 2: GPR (addend)
+;; Operand 3: macc/msac
+;; Operand 4: GPR (destination)
+;; Operand 5: new multiplication
+;; Operand 6: new addition/subtraction
+(define_peephole2
+  [(match_scratch:SI 0 "d")
+   (set (match_operand:SI 1 "lo_operand")
+	(match_operand:SI 2 "d_operand"))
+   (match_dup 0)
+   (parallel
+       [(set (match_dup 1)
+	     (match_operand:SI 3 "macc_msac_operand"))
+	(clobber (scratch:SI))])
+   (match_dup 0)
+   (set (match_operand:SI 4 "d_operand")
+	(match_dup 1))]
+  "ISA_HAS_MUL3 && peep2_reg_dead_p (3, operands[1])"
+  [(parallel [(set (match_dup 0)
+		   (match_dup 5))
+	      (clobber (match_dup 1))])
+   (set (match_dup 4)
+	(match_dup 6))]
+{
+  operands[5] = XEXP (operands[3], GET_CODE (operands[3]) == PLUS ? 0 : 1);
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[2], operands[0]);
+})
+
+;; See the comment above *mul_add_si for details.
+(define_insn "*mul_sub_si"
+  [(set (match_operand:SI 0 "register_operand" "=l*?*?,d?")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,d")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d")
+                           (match_operand:SI 3 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 4 "=X,l"))
+   (clobber (match_scratch:SI 5 "=X,&d"))]
+  "GENERATE_MADD_MSUB"
+  "@
+   msub\t%2,%3
+   #"
+  [(set_attr "type"     "imadd")
+   (set_attr "mode"     "SI")
+   (set_attr "length"   "4,8")])
+
+;; Split *mul_sub_si if both the source and destination accumulator
+;; values are GPRs.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+        (minus:SI (match_operand:SI 1 "d_operand")
+                  (mult:SI (match_operand:SI 2 "d_operand")
+                           (match_operand:SI 3 "d_operand"))))
+   (clobber (match_operand:SI 4 "lo_operand"))
+   (clobber (match_operand:SI 5 "d_operand"))]
+  "reload_completed"
+  [(parallel [(set (match_dup 5)
+                   (mult:SI (match_dup 2) (match_dup 3)))
+              (clobber (match_dup 4))])
+   (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 5)))]
+  "")
+
+(define_insn "*muls"
+  [(set (match_operand:SI 0 "register_operand" "=l,d")
+        (neg:SI (mult:SI (match_operand:SI 1 "register_operand" "d,d")
+                         (match_operand:SI 2 "register_operand" "d,d"))))
+   (clobber (match_scratch:SI 3 "=X,l"))]
+  "ISA_HAS_MULS"
+  "@
+   muls\t$0,%1,%2
+   muls\t%0,%1,%2"
+  [(set_attr "type"     "imul,imul3")
+   (set_attr "mode"     "SI")])
+
+(define_expand "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "mips_mulsidi3_gen_fn (<CODE>) != NULL"
+{
+  mulsidi3_gen_fn fn = mips_mulsidi3_gen_fn (<CODE>);
+  emit_insn (fn (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; As well as being named patterns, these instructions are used by the
+;; __builtin_mips_mult<u>() functions.  We must always make those functions
+;; available if !TARGET_64BIT && ISA_HAS_DSP.
+(define_insn "<u>mulsidi3_32bit"
+  [(set (match_operand:DI 0 "register_operand" "=ka")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))]
+  "!TARGET_64BIT && (!TARGET_FIX_R4000 || ISA_HAS_DSP)"
+{
+  if (ISA_HAS_DSP_MULT)
+    return "mult<u>\t%q0,%1,%2";
+  else
+    return "mult<u>\t%1,%2";
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")])
+
+(define_insn "<u>mulsidi3_32bit_r4000"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+   (clobber (match_scratch:DI 3 "=x"))]
+  "!TARGET_64BIT && TARGET_FIX_R4000"
+  "mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set_attr "length" "12")])
+
+(define_insn "<u>mulsidi3_64bit"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+   (clobber (match_scratch:TI 3 "=x"))
+   (clobber (match_scratch:DI 4 "=d"))]
+  "TARGET_64BIT && !TARGET_FIX_R4000 && !ISA_HAS_DMUL3"
+  "#"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set (attr "length")
+	(if_then_else (ne (symbol_ref "ISA_HAS_EXT_INS") (const_int 0))
+		      (const_int 16)
+		      (const_int 28)))])
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "d_operand"))
+		 (any_extend:DI (match_operand:SI 2 "d_operand"))))
+   (clobber (match_operand:TI 3 "hilo_operand"))
+   (clobber (match_operand:DI 4 "d_operand"))]
+  "TARGET_64BIT && !TARGET_FIX_R4000 && ISA_HAS_EXT_INS && reload_completed"
+  [(set (match_dup 3)
+	(unspec:TI [(mult:DI (any_extend:DI (match_dup 1))
+			     (any_extend:DI (match_dup 2)))]
+		   UNSPEC_SET_HILO))
+
+   ;; OP0 <- LO, OP4 <- HI
+   (set (match_dup 0) (match_dup 5))
+   (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
+
+   (set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 32))
+	(match_dup 4))]
+  { operands[5] = gen_rtx_REG (DImode, LO_REGNUM); })
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "d_operand"))
+		 (any_extend:DI (match_operand:SI 2 "d_operand"))))
+   (clobber (match_operand:TI 3 "hilo_operand"))
+   (clobber (match_operand:DI 4 "d_operand"))]
+  "TARGET_64BIT && !TARGET_FIX_R4000 && !ISA_HAS_EXT_INS && reload_completed"
+  [(set (match_dup 3)
+	(unspec:TI [(mult:DI (any_extend:DI (match_dup 1))
+			     (any_extend:DI (match_dup 2)))]
+		   UNSPEC_SET_HILO))
+
+   ;; OP0 <- LO, OP4 <- HI
+   (set (match_dup 0) (match_dup 5))
+   (set (match_dup 4) (unspec:DI [(match_dup 3)] UNSPEC_MFHI))
+
+   ;; Zero-extend OP0.
+   (set (match_dup 0)
+	(ashift:DI (match_dup 0)
+		   (const_int 32)))
+   (set (match_dup 0)
+	(lshiftrt:DI (match_dup 0)
+		     (const_int 32)))
+
+   ;; Shift OP4 into place.
+   (set (match_dup 4)
+	(ashift:DI (match_dup 4)
+		   (const_int 32)))
+
+   ;; OR the two halves together
+   (set (match_dup 0)
+	(ior:DI (match_dup 0)
+		(match_dup 4)))]
+  { operands[5] = gen_rtx_REG (DImode, LO_REGNUM); })
+
+(define_insn "<u>mulsidi3_64bit_hilo"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+	(unspec:TI
+	  [(mult:DI
+	     (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	     (any_extend:DI (match_operand:SI 2 "register_operand" "d")))]
+	  UNSPEC_SET_HILO))]
+  "TARGET_64BIT && !TARGET_FIX_R4000"
+  "mult<u>\t%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")])
+
+;; See comment before the ISA_HAS_DMUL3 case in mips_mulsidi3_gen_fn.
+(define_insn "mulsidi3_64bit_dmul"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+   (clobber (match_scratch:DI 3 "=l"))]
+  "TARGET_64BIT && ISA_HAS_DMUL3"
+  "dmul\t%0,%1,%2"
+  [(set_attr "type" "imul3")
+   (set_attr "mode" "DI")])
+
+;; Widening multiply with negation.
+(define_insn "*muls<u>_di"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (neg:DI
+	 (mult:DI
+	  (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	  (any_extend:DI (match_operand:SI 2 "register_operand" "d")))))]
+  "!TARGET_64BIT && ISA_HAS_MULS"
+  "muls<u>\t$0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")])
+
+;; As well as being named patterns, these instructions are used by the
+;; __builtin_mips_msub<u>() functions.  We must always make those functions
+;; available if !TARGET_64BIT && ISA_HAS_DSP.
+;;
+;; This leads to a slight inconsistency.  We honor any tuning overrides
+;; in GENERATE_MADD_MSUB for -mno-dsp, but always ignore them for -mdsp,
+;; even if !ISA_HAS_DSP_MULT.
+(define_insn "<u>msubsidi4"
+  [(set (match_operand:DI 0 "register_operand" "=ka")
+        (minus:DI
+	   (match_operand:DI 3 "register_operand" "0")
+	   (mult:DI
+	      (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	      (any_extend:DI (match_operand:SI 2 "register_operand" "d")))))]
+  "!TARGET_64BIT && (ISA_HAS_MSAC || GENERATE_MADD_MSUB || ISA_HAS_DSP)"
+{
+  if (ISA_HAS_DSP_MULT)
+    return "msub<u>\t%q0,%1,%2";
+  else if (TARGET_MIPS5500 || GENERATE_MADD_MSUB)
+    return "msub<u>\t%1,%2";
+  else
+    return "msac<u>\t$0,%1,%2";
+}
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
+
+;; _highpart patterns
+
+(define_expand "<su>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand")))
+	  (const_int 32))))]
+  ""
+{
+  if (ISA_HAS_MULHI)
+    emit_insn (gen_<su>mulsi3_highpart_mulhi_internal (operands[0],
+						       operands[1],
+						       operands[2]));
+  else
+    emit_insn (gen_<su>mulsi3_highpart_internal (operands[0], operands[1],
+					         operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "<su>mulsi3_highpart_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand" "d")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=l"))]
+  "!ISA_HAS_MULHI"
+  { return TARGET_FIX_R4000 ? "mult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
+  "&& reload_completed && !TARGET_FIX_R4000"
+  [(const_int 0)]
+{
+  rtx hilo;
+
+  if (TARGET_64BIT)
+    {
+      hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+      emit_insn (gen_<u>mulsidi3_64bit_hilo (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhisi_ti (operands[0], hilo));
+    }
+  else
+    {
+      hilo = gen_rtx_REG (DImode, MD_REG_FIRST);
+      emit_insn (gen_<u>mulsidi3_32bit (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhisi_di (operands[0], hilo));
+    }
+  DONE;
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")
+   (set_attr "length" "8")])
+
+(define_insn "<su>mulsi3_highpart_mulhi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	   (any_extend:DI (match_operand:SI 2 "register_operand" "d")))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=l"))]
+  "ISA_HAS_MULHI"
+  "mulhi<u>\t%0,%1,%2"
+  [(set_attr "type" "imul3")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<su>mulsi3_highpart_neg_mulhi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+	 (lshiftrt:DI
+	  (neg:DI
+	   (mult:DI
+	    (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+	    (any_extend:DI (match_operand:SI 2 "register_operand" "d"))))
+	  (const_int 32))))
+   (clobber (match_scratch:SI 3 "=l"))]
+  "ISA_HAS_MULHI"
+  "mulshi<u>\t%0,%1,%2"
+  [(set_attr "type" "imul3")
+   (set_attr "mode" "SI")])
+
+;; Disable unsigned multiplication for -mfix-vr4120.  This is for VR4120
+;; errata MD(0), which says that dmultu does not always produce the
+;; correct result.
+(define_insn_and_split "<su>muldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
+		   (any_extend:TI (match_operand:DI 2 "register_operand" "d")))
+	  (const_int 64))))
+   (clobber (match_scratch:DI 3 "=l"))]
+  "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  { return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
+  "&& reload_completed && !TARGET_FIX_R4000"
+  [(const_int 0)]
+{
+  rtx hilo;
+
+  hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+  emit_insn (gen_<u>mulditi3_internal (hilo, operands[1], operands[2]));
+  emit_insn (gen_mfhidi_ti (operands[0], hilo));
+  DONE;
+}
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")
+   (set_attr "length" "8")])
+
+(define_expand "<u>mulditi3"
+  [(set (match_operand:TI 0 "register_operand")
+	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
+		 (any_extend:TI (match_operand:DI 2 "register_operand"))))]
+  "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+{
+  if (TARGET_FIX_R4000)
+    emit_insn (gen_<u>mulditi3_r4000 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_<u>mulditi3_internal (operands[0], operands[1],
+					 operands[2]));
+  DONE;
+})
+
+(define_insn "<u>mulditi3_internal"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
+		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))]
+  "TARGET_64BIT
+   && !TARGET_FIX_R4000
+   && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "dmult<u>\t%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")])
+
+(define_insn "<u>mulditi3_r4000"
+  [(set (match_operand:TI 0 "register_operand" "=d")
+	(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
+		 (any_extend:TI (match_operand:DI 2 "register_operand" "d"))))
+   (clobber (match_scratch:TI 3 "=x"))]
+  "TARGET_64BIT
+   && TARGET_FIX_R4000
+   && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+  "dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")
+   (set_attr "length" "12")])
+
+;; The R4650 supports a 32-bit multiply/ 64-bit accumulate
+;; instruction.  The HI/LO registers are used as a 64-bit accumulator.
+
+(define_insn "madsi"
+  [(set (match_operand:SI 0 "register_operand" "+l")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "d")
+			  (match_operand:SI 2 "register_operand" "d"))
+		 (match_dup 0)))]
+  "TARGET_MAD"
+  "mad\t%1,%2"
+  [(set_attr "type"	"imadd")
+   (set_attr "mode"	"SI")])
+
+;; See the comment above <u>msubsidi4 for the relationship between
+;; ISA_HAS_DSP and ISA_HAS_DSP_MULT.
+(define_insn "<u>maddsidi4"
+  [(set (match_operand:DI 0 "register_operand" "=ka")
+	(plus:DI
+	 (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "d"))
+		  (any_extend:DI (match_operand:SI 2 "register_operand" "d")))
+	 (match_operand:DI 3 "register_operand" "0")))]
+  "(TARGET_MAD || ISA_HAS_MACC || GENERATE_MADD_MSUB || ISA_HAS_DSP)
+   && !TARGET_64BIT"
+{
+  if (TARGET_MAD)
+    return "mad<u>\t%1,%2";
+  else if (ISA_HAS_DSP_MULT)
+    return "madd<u>\t%q0,%1,%2";
+  else if (GENERATE_MADD_MSUB || TARGET_MIPS5500)
+    return "madd<u>\t%1,%2";
+  else
+    /* See comment in *macc.  */
+    return "%[macc<u>\t%@,%1,%2%]";
+}
+  [(set_attr "type" "imadd")
+   (set_attr "mode" "SI")])
+
+;; Floating point multiply accumulate instructions.
+
+(define_insn "*madd4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "f")))]
+  "ISA_HAS_FP_MADD4_MSUB4 && TARGET_FUSED_MADD"
+  "madd.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*madd3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(plus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_FP_MADD3_MSUB3 && TARGET_FUSED_MADD"
+  "madd.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*msub4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			       (match_operand:ANYF 2 "register_operand" "f"))
+		    (match_operand:ANYF 3 "register_operand" "f")))]
+  "ISA_HAS_FP_MADD4_MSUB4 && TARGET_FUSED_MADD"
+  "msub.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*msub3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			       (match_operand:ANYF 2 "register_operand" "f"))
+		    (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_FP_MADD3_MSUB3 && TARGET_FUSED_MADD"
+  "msub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (plus:ANYF
+		   (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "f"))))]
+  "ISA_HAS_NMADD4_NMSUB4 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (plus:ANYF
+		   (mult:ANYF (match_operand:ANYF 1 "register_operand" "f")
+			      (match_operand:ANYF 2 "register_operand" "f"))
+		   (match_operand:ANYF 3 "register_operand" "0"))))]
+  "ISA_HAS_NMADD3_NMSUB3 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd4<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (mult:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		    (match_operand:ANYF 2 "register_operand" "f"))
+	 (match_operand:ANYF 3 "register_operand" "f")))]
+  "ISA_HAS_NMADD4_NMSUB4 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%3,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmadd3<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (mult:ANYF (neg:ANYF (match_operand:ANYF 1 "register_operand" "f"))
+		    (match_operand:ANYF 2 "register_operand" "f"))
+	 (match_operand:ANYF 3 "register_operand" "0")))]
+  "ISA_HAS_NMADD3_NMSUB3 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmadd.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub4<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (minus:ANYF
+		   (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+			      (match_operand:ANYF 3 "register_operand" "f"))
+		   (match_operand:ANYF 1 "register_operand" "f"))))]
+  "ISA_HAS_NMADD4_NMSUB4 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2,%3"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub3<mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (minus:ANYF
+		   (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+			      (match_operand:ANYF 3 "register_operand" "f"))
+		   (match_operand:ANYF 1 "register_operand" "0"))))]
+  "ISA_HAS_NMADD3_NMSUB3 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub4<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (match_operand:ANYF 1 "register_operand" "f")
+	 (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+		    (match_operand:ANYF 3 "register_operand" "f"))))]
+  "ISA_HAS_NMADD4_NMSUB4 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2,%3"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "*nmsub3<mode>_fastmath"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(minus:ANYF
+	 (match_operand:ANYF 1 "register_operand" "f")
+	 (mult:ANYF (match_operand:ANYF 2 "register_operand" "f")
+		    (match_operand:ANYF 3 "register_operand" "0"))))]
+  "ISA_HAS_NMADD3_NMSUB3 (<MODE>mode)
+   && TARGET_FUSED_MADD
+   && !HONOR_SIGNED_ZEROS (<MODE>mode)
+   && !HONOR_NANS (<MODE>mode)"
+  "nmsub.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmadd")
+   (set_attr "mode" "<UNITMODE>")])
+
+;;
+;;  ....................
+;;
+;;	DIVISION and REMAINDER
+;;
+;;  ....................
+;;
+
+(define_expand "div<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand")
+	(div:ANYF (match_operand:ANYF 1 "reg_or_1_operand")
+		  (match_operand:ANYF 2 "register_operand")))]
+  "<divide_condition>"
+{
+  if (const_1_operand (operands[1], <MODE>mode))
+    if (!(<recip_condition> && flag_unsafe_math_optimizations))
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+;; These patterns work around the early SB-1 rev2 core "F1" erratum:
+;;
+;; If an mfc1 or dmfc1 happens to access the floating point register
+;; file at the same time a long latency operation (div, sqrt, recip,
+;; sqrt) iterates an intermediate result back through the floating
+;; point register file bypass, then instead returning the correct
+;; register value the mfc1 or dmfc1 operation returns the intermediate
+;; result of the long latency operation.
+;;
+;; The workaround is to insert an unconditional 'mov' from/to the
+;; long latency op destination register.
+
+(define_insn "*div<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		  (match_operand:ANYF 2 "register_operand" "f")))]
+  "<divide_condition>"
+{
+  if (TARGET_FIX_SB1)
+    return "div.<fmt>\t%0,%1,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "div.<fmt>\t%0,%1,%2";
+}
+  [(set_attr "type" "fdiv")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_FIX_SB1") (const_int 0))
+                      (const_int 8)
+                      (const_int 4)))])
+
+(define_insn "*recip<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+		  (match_operand:ANYF 2 "register_operand" "f")))]
+  "<recip_condition> && flag_unsafe_math_optimizations"
+{
+  if (TARGET_FIX_SB1)
+    return "recip.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "recip.<fmt>\t%0,%2";
+}
+  [(set_attr "type" "frdiv")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_FIX_SB1") (const_int 0))
+                      (const_int 8)
+                      (const_int 4)))])
+
+;; VR4120 errata MD(A1): signed division instructions do not work correctly
+;; with negative operands.  We use special libgcc functions instead.
+(define_insn_and_split "divmod<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=l")
+	(div:GPR (match_operand:GPR 1 "register_operand" "d")
+		 (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_operand:GPR 3 "register_operand" "=d")
+	(mod:GPR (match_dup 1)
+		 (match_dup 2)))]
+  "!TARGET_FIX_VR4120"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx hilo;
+
+  if (TARGET_64BIT)
+    {
+      hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+      emit_insn (gen_divmod<mode>4_hilo_ti (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhi<mode>_ti (operands[3], hilo));
+    }
+  else
+    {
+      hilo = gen_rtx_REG (DImode, MD_REG_FIRST);
+      emit_insn (gen_divmod<mode>4_hilo_di (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhi<mode>_di (operands[3], hilo));
+    }
+  DONE;
+}
+ [(set_attr "type" "idiv")
+  (set_attr "mode" "<MODE>")
+  (set_attr "length" "8")])
+
+(define_insn_and_split "udivmod<mode>4"
+  [(set (match_operand:GPR 0 "register_operand" "=l")
+	(udiv:GPR (match_operand:GPR 1 "register_operand" "d")
+		  (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_operand:GPR 3 "register_operand" "=d")
+	(umod:GPR (match_dup 1)
+		  (match_dup 2)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx hilo;
+
+  if (TARGET_64BIT)
+    {
+      hilo = gen_rtx_REG (TImode, MD_REG_FIRST);
+      emit_insn (gen_udivmod<mode>4_hilo_ti (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhi<mode>_ti (operands[3], hilo));
+    }
+  else
+    {
+      hilo = gen_rtx_REG (DImode, MD_REG_FIRST);
+      emit_insn (gen_udivmod<mode>4_hilo_di (hilo, operands[1], operands[2]));
+      emit_insn (gen_mfhi<mode>_di (operands[3], hilo));
+    }
+  DONE;
+}
+ [(set_attr "type" "idiv")
+  (set_attr "mode" "<MODE>")
+  (set_attr "length" "8")])
+
+(define_insn "<u>divmod<GPR:mode>4_hilo_<HILO:mode>"
+  [(set (match_operand:HILO 0 "register_operand" "=x")
+	(unspec:HILO
+	  [(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
+			(match_operand:GPR 2 "register_operand" "d"))]
+	  UNSPEC_SET_HILO))]
+  ""
+  { return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); }
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;;
+;;  ....................
+;;
+;;	SQUARE ROOT
+;;
+;;  ....................
+
+;; These patterns work around the early SB-1 rev2 core "F1" erratum (see
+;; "*div[sd]f3" comment for details).
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  "<sqrt_condition>"
+{
+  if (TARGET_FIX_SB1)
+    return "sqrt.<fmt>\t%0,%1\;mov.<fmt>\t%0,%0";
+  else
+    return "sqrt.<fmt>\t%0,%1";
+}
+  [(set_attr "type" "fsqrt")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_FIX_SB1") (const_int 0))
+                      (const_int 8)
+                      (const_int 4)))])
+
+(define_insn "*rsqrt<mode>a"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
+  "<recip_condition> && flag_unsafe_math_optimizations"
+{
+  if (TARGET_FIX_SB1)
+    return "rsqrt.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "rsqrt.<fmt>\t%0,%2";
+}
+  [(set_attr "type" "frsqrt")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_FIX_SB1") (const_int 0))
+                      (const_int 8)
+                      (const_int 4)))])
+
+(define_insn "*rsqrt<mode>b"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
+			     (match_operand:ANYF 2 "register_operand" "f"))))]
+  "<recip_condition> && flag_unsafe_math_optimizations"
+{
+  if (TARGET_FIX_SB1)
+    return "rsqrt.<fmt>\t%0,%2\;mov.<fmt>\t%0,%0";
+  else
+    return "rsqrt.<fmt>\t%0,%2";
+}
+  [(set_attr "type" "frsqrt")
+   (set_attr "mode" "<UNITMODE>")
+   (set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_FIX_SB1") (const_int 0))
+                      (const_int 8)
+                      (const_int 4)))])
+
+;;
+;;  ....................
+;;
+;;	ABSOLUTE VALUE
+;;
+;;  ....................
+
+;; Do not use the integer abs macro instruction, since that signals an
+;; exception on -2147483648 (sigh).
+
+;; abs.fmt is an arithmetic instruction and treats all NaN inputs as
+;; invalid; it does not clear their sign bits.  We therefore can't use
+;; abs.fmt if the signs of NaNs matter.
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(abs:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  "!HONOR_NANS (<MODE>mode)"
+  "abs.<fmt>\t%0,%1"
+  [(set_attr "type" "fabs")
+   (set_attr "mode" "<UNITMODE>")])
+
+;;
+;;  ...................
+;;
+;;  Count leading zeroes.
+;;
+;;  ...................
+;;
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(clz:GPR (match_operand:GPR 1 "register_operand" "d")))]
+  "ISA_HAS_CLZ_CLO"
+  "<d>clz\t%0,%1"
+  [(set_attr "type" "clz")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ...................
+;;
+;;  Count number of set bits.
+;;
+;;  ...................
+;;
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(popcount:GPR (match_operand:GPR 1 "register_operand" "d")))]
+  "ISA_HAS_POP"
+  "<d>pop\t%0,%1"
+  [(set_attr "type" "pop")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	NEGATION and ONE'S COMPLEMENT
+;;
+;;  ....................
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  ""
+{
+  if (TARGET_MIPS16)
+    return "neg\t%0,%1";
+  else
+    return "subu\t%0,%.,%1";
+}
+  [(set_attr "alu_type"	"sub")
+   (set_attr "mode"	"SI")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(neg:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "dsubu\t%0,%.,%1"
+  [(set_attr "alu_type"	"sub")
+   (set_attr "mode"	"DI")])
+
+;; neg.fmt is an arithmetic instruction and treats all NaN inputs as
+;; invalid; it does not flip their sign bit.  We therefore can't use
+;; neg.fmt if the signs of NaNs matter.
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(neg:ANYF (match_operand:ANYF 1 "register_operand" "f")))]
+  "!HONOR_NANS (<MODE>mode)"
+  "neg.<fmt>\t%0,%1"
+  [(set_attr "type" "fneg")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(not:GPR (match_operand:GPR 1 "register_operand" "d")))]
+  ""
+{
+  if (TARGET_MIPS16)
+    return "not\t%0,%1";
+  else
+    return "nor\t%0,%.,%1";
+}
+  [(set_attr "alu_type" "not")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	LOGICAL
+;;
+;;  ....................
+;;
+
+;; Many of these instructions use trivial define_expands, because we
+;; want to use a different set of constraints when TARGET_MIPS16.
+
+(define_expand "and<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(and:GPR (match_operand:GPR 1 "register_operand")
+		 (match_operand:GPR 2 "and_reg_operand")))])
+
+;; The middle-end is not allowed to convert ANDing with 0xffff_ffff into a
+;; zero_extendsidi2 because of TRULY_NOOP_TRUNCATION, so handle these here.
+;; Note that this variant does not trigger for SI mode because we require
+;; a 64-bit HOST_WIDE_INT and 0xffff_ffff wouldn't be a canonical
+;; sign-extended SImode value.
+;;
+;; These are possible combinations for operand 1 and 2.  The table
+;; includes both MIPS and MIPS16 cases.  (r=register, mem=memory,
+;; 16=MIPS16, x=match, S=split):
+;;
+;;     \ op1    r/EXT   r/!EXT  mem   r/16   mem/16
+;;  op2
+;;
+;;  andi           x     x
+;;  0xff           x     x       x             x
+;;  0xffff         x     x       x             x
+;;  0xffff_ffff    x     S       x     S       x
+;;  low-bitmask    x
+;;  register       x     x
+;;  register =op1                      x
+
+(define_insn "*and<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d,d,d,d")
+	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "o,o,W,d,d,d,d")
+		 (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,K,Yx,Yw,d")))]
+  "!TARGET_MIPS16 && and_operands_ok (<MODE>mode, operands[1], operands[2])"
+{
+  int len;
+
+  switch (which_alternative)
+    {
+    case 0:
+      operands[1] = gen_lowpart (QImode, operands[1]);
+      return "lbu\t%0,%1";
+    case 1:
+      operands[1] = gen_lowpart (HImode, operands[1]);
+      return "lhu\t%0,%1";
+    case 2:
+      operands[1] = gen_lowpart (SImode, operands[1]);
+      return "lwu\t%0,%1";
+    case 3:
+      return "andi\t%0,%1,%x2";
+    case 4:
+      len = low_bitmask_len (<MODE>mode, INTVAL (operands[2]));
+      operands[2] = GEN_INT (len);
+      return "<d>ext\t%0,%1,0,%2";
+    case 5:
+      return "#";
+    case 6:
+      return "and\t%0,%1,%2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "move_type" "load,load,load,andi,ext_ins,shift_shift,logical")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*and<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d,d")
+	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "%o,o,W,d,0")
+		 (match_operand:GPR 2 "and_operand" "Yb,Yh,Yw,Yw,d")))]
+  "TARGET_MIPS16 && and_operands_ok (<MODE>mode, operands[1], operands[2])"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      operands[1] = gen_lowpart (QImode, operands[1]);
+      return "lbu\t%0,%1";
+    case 1:
+      operands[1] = gen_lowpart (HImode, operands[1]);
+      return "lhu\t%0,%1";
+    case 2:
+      operands[1] = gen_lowpart (SImode, operands[1]);
+      return "lwu\t%0,%1";
+    case 3:
+      return "#";
+    case 4:
+      return "and\t%0,%2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "move_type" "load,load,load,shift_shift,logical")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(ior:GPR (match_operand:GPR 1 "register_operand")
+		 (match_operand:GPR 2 "uns_arith_operand")))]
+  ""
+{
+  if (TARGET_MIPS16)
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+})
+
+(define_insn "*ior<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+	(ior:GPR (match_operand:GPR 1 "register_operand" "%d,d")
+		 (match_operand:GPR 2 "uns_arith_operand" "d,K")))]
+  "!TARGET_MIPS16"
+  "@
+   or\t%0,%1,%2
+   ori\t%0,%1,%x2"
+  [(set_attr "alu_type" "or")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*ior<mode>3_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(ior:GPR (match_operand:GPR 1 "register_operand" "%0")
+		 (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_MIPS16"
+  "or\t%0,%2"
+  [(set_attr "alu_type" "or")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(xor:GPR (match_operand:GPR 1 "register_operand")
+		 (match_operand:GPR 2 "uns_arith_operand")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+	(xor:GPR (match_operand:GPR 1 "register_operand" "%d,d")
+		 (match_operand:GPR 2 "uns_arith_operand" "d,K")))]
+  "!TARGET_MIPS16"
+  "@
+   xor\t%0,%1,%2
+   xori\t%0,%1,%x2"
+  [(set_attr "alu_type" "xor")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn ""
+  [(set (match_operand:GPR 0 "register_operand" "=d,t,t")
+	(xor:GPR (match_operand:GPR 1 "register_operand" "%0,d,d")
+		 (match_operand:GPR 2 "uns_arith_operand" "d,K,d")))]
+  "TARGET_MIPS16"
+  "@
+   xor\t%0,%2
+   cmpi\t%1,%2
+   cmp\t%1,%2"
+  [(set_attr "alu_type" "xor")
+   (set_attr "mode" "<MODE>")
+   (set_attr_alternative "length"
+		[(const_int 4)
+		 (if_then_else (match_operand:VOID 2 "m16_uimm8_1")
+			       (const_int 4)
+			       (const_int 8))
+		 (const_int 4)])])
+
+(define_insn "*nor<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "d"))
+		 (not:GPR (match_operand:GPR 2 "register_operand" "d"))))]
+  "!TARGET_MIPS16"
+  "nor\t%0,%1,%2"
+  [(set_attr "alu_type" "nor")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	TRUNCATION
+;;
+;;  ....................
+
+
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "cvt.s.d\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "cnv_mode"	"D2S")   
+   (set_attr "mode"	"SF")])
+
+;; Integer truncation patterns.  Truncating SImode values to smaller
+;; modes is a no-op, as it is for most other GCC ports.  Truncating
+;; DImode values to SImode is not a no-op for TARGET_64BIT since we
+;; need to make sure that the lower 32 bits are properly sign-extended
+;; (see TRULY_NOOP_TRUNCATION).  Truncating DImode values into modes
+;; smaller than SImode is equivalent to two separate truncations:
+;;
+;;                        A       B
+;;    DI ---> HI  ==  DI ---> SI ---> HI
+;;    DI ---> QI  ==  DI ---> SI ---> QI
+;;
+;; Step A needs a real instruction but step B does not.
+
+(define_insn "truncdi<mode>2"
+  [(set (match_operand:SUBDI 0 "nonimmediate_operand" "=d,m")
+        (truncate:SUBDI (match_operand:DI 1 "register_operand" "d,d")))]
+  "TARGET_64BIT"
+  "@
+    sll\t%0,%1,0
+    <store>\t%1,%0"
+  [(set_attr "move_type" "sll0,store")
+   (set_attr "mode" "SI")])
+
+;; Combiner patterns to optimize shift/truncate combinations.
+
+(define_insn "*ashr_trunc<mode>"
+  [(set (match_operand:SUBDI 0 "register_operand" "=d")
+        (truncate:SUBDI
+	  (ashiftrt:DI (match_operand:DI 1 "register_operand" "d")
+		       (match_operand:DI 2 "const_arith_operand" ""))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && IN_RANGE (INTVAL (operands[2]), 32, 63)"
+  "dsra\t%0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*lshr32_trunc<mode>"
+  [(set (match_operand:SUBDI 0 "register_operand" "=d")
+        (truncate:SUBDI
+	  (lshiftrt:DI (match_operand:DI 1 "register_operand" "d")
+		       (const_int 32))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "dsra\t%0,%1,32"
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+;; Logical shift by more than 32 results in proper SI values so truncation is
+;; removed by the middle end.  Note that a logical shift by 32 is handled by
+;; the previous pattern.
+(define_insn "*<optab>_trunc<mode>_exts"
+  [(set (match_operand:SUBDI 0 "register_operand" "=d")
+        (truncate:SUBDI
+	 (any_shiftrt:DI (match_operand:DI 1 "register_operand" "d")
+			 (match_operand:DI 2 "const_arith_operand" ""))))]
+  "ISA_HAS_EXTS && TARGET_64BIT && UINTVAL (operands[2]) < 32"
+  "exts\t%0,%1,%2,31"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	ZERO EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
+  "TARGET_64BIT")
+
+(define_insn_and_split "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,W")))]
+  "TARGET_64BIT && !ISA_HAS_EXT_INS"
+  "@
+   #
+   lwu\t%0,%1"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 0)
+        (ashift:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 0)
+        (lshiftrt:DI (match_dup 0) (const_int 32)))]
+  { operands[1] = gen_lowpart (DImode, operands[1]); }
+  [(set_attr "move_type" "shift_shift,load")
+   (set_attr "mode" "DI")])
+
+(define_insn "*zero_extendsidi2_dext"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,W")))]
+  "TARGET_64BIT && ISA_HAS_EXT_INS"
+  "@
+   dext\t%0,%1,0,32
+   lwu\t%0,%1"
+  [(set_attr "move_type" "arith,load")
+   (set_attr "mode" "DI")])
+
+;; See the comment before the *and<mode>3 pattern why this is generated by
+;; combine.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+        (and:DI (match_operand:DI 1 "register_operand")
+		(const_int 4294967295)))]
+  "TARGET_64BIT && !ISA_HAS_EXT_INS && reload_completed"
+  [(set (match_dup 0)
+        (ashift:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 0)
+        (lshiftrt:DI (match_dup 0) (const_int 32)))])
+
+(define_expand "zero_extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand")
+        (zero_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_MIPS16 && !GENERATE_MIPS16E
+      && !memory_operand (operands[1], <SHORT:MODE>mode))
+    {
+      emit_insn (gen_and<GPR:mode>3 (operands[0],
+				     gen_lowpart (<GPR:MODE>mode, operands[1]),
+				     force_reg (<GPR:MODE>mode,
+						GEN_INT (<SHORT:mask>))));
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (zero_extend:GPR
+	     (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))]
+  "!TARGET_MIPS16"
+  "@
+   andi\t%0,%1,<SHORT:mask>
+   l<SHORT:size>u\t%0,%1"
+  [(set_attr "move_type" "andi,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*zero_extend<SHORT:mode><GPR:mode>2_mips16e"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR (match_operand:SHORT 1 "register_operand" "0")))]
+  "GENERATE_MIPS16E"
+  "ze<SHORT:size>\t%0"
+  ;; This instruction is effectively a special encoding of ANDI.
+  [(set_attr "move_type" "andi")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*zero_extend<SHORT:mode><GPR:mode>2_mips16"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR (match_operand:SHORT 1 "memory_operand" "m")))]
+  "TARGET_MIPS16"
+  "l<SHORT:size>u\t%0,%1"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+{
+  if (TARGET_MIPS16 && !memory_operand (operands[1], QImode))
+    {
+      emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				       operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "!TARGET_MIPS16"
+  "@
+   andi\t%0,%1,0x00ff
+   lbu\t%0,%1"
+  [(set_attr "move_type" "andi,load")
+   (set_attr "mode" "HI")])
+
+(define_insn "*zero_extendqihi2_mips16"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (zero_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
+  "TARGET_MIPS16"
+  "lbu\t%0,%1"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "HI")])
+
+;; Combiner patterns to optimize truncate/zero_extend combinations.
+
+(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+{
+  operands[2] = GEN_INT (GET_MODE_MASK (<SHORT:MODE>mode));
+  return "andi\t%0,%1,%x2";
+}
+  [(set_attr "alu_type" "and")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*zero_extendhi_truncqi"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (zero_extend:HI
+	    (truncate:QI (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+  "andi\t%0,%1,0xff"
+  [(set_attr "alu_type" "and")
+   (set_attr "mode" "HI")])
+
+;;
+;;  ....................
+;;
+;;	SIGN EXTENSION
+;;
+;;  ....................
+
+;; Extension insns.
+;; Those for integer source operand are ordered widest source type first.
+
+;; When TARGET_64BIT, all SImode integer registers should already be in
+;; sign-extended form (see TRULY_NOOP_TRUNCATION and truncdisi2).  We can
+;; therefore get rid of register->register instructions if we constrain
+;; the source to be in the same register as the destination.
+;;
+;; The register alternative has type "arith" so that the pre-reload
+;; scheduler will treat it as a move.  This reflects what happens if
+;; the register alternative needs a reload.
+(define_insn_and_split "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,m")))]
+  "TARGET_64BIT"
+  "@
+   #
+   lw\t%0,%1"
+  "&& reload_completed && register_operand (operands[1], VOIDmode)"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr "move_type" "move,load")
+   (set_attr "mode" "DI")])
+
+(define_expand "extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand")
+        (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand")))]
+  "")
+
+(define_insn "*extend<SHORT:mode><GPR:mode>2_mips16e"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR (match_operand:SHORT 1 "nonimmediate_operand" "0,m")))]
+  "GENERATE_MIPS16E"
+  "@
+   se<SHORT:size>\t%0
+   l<SHORT:size>\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn_and_split "*extend<SHORT:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR
+	     (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))]
+  "!ISA_HAS_SEB_SEH && !GENERATE_MIPS16E"
+  "@
+   #
+   l<SHORT:size>\t%0,%1"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 0) (ashift:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ashiftrt:GPR (match_dup 0) (match_dup 2)))]
+{
+  operands[1] = gen_lowpart (<GPR:MODE>mode, operands[1]);
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<GPR:MODE>mode)
+			 - GET_MODE_BITSIZE (<SHORT:MODE>mode));
+}
+  [(set_attr "move_type" "shift_shift,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*extend<SHORT:mode><GPR:mode>2_se<SHORT:size>"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR
+	     (match_operand:SHORT 1 "nonimmediate_operand" "d,m")))]
+  "ISA_HAS_SEB_SEH"
+  "@
+   se<SHORT:size>\t%0,%1
+   l<SHORT:size>\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand")
+        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand")))]
+  "")
+
+(define_insn "*extendqihi2_mips16e"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,m")))]
+  "GENERATE_MIPS16E"
+  "@
+   seb\t%0
+   lb\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (sign_extend:HI
+	     (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "!ISA_HAS_SEB_SEH && !GENERATE_MIPS16E"
+  "@
+   #
+   lb\t%0,%1"
+  "&& reload_completed && REG_P (operands[1])"
+  [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 2)))]
+{
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (SImode)
+			 - GET_MODE_BITSIZE (QImode));
+}
+  [(set_attr "move_type" "shift_shift,load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendqihi2_seb"
+  [(set (match_operand:HI 0 "register_operand" "=d,d")
+        (sign_extend:HI
+	     (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "ISA_HAS_SEB_SEH"
+  "@
+   seb\t%0,%1
+   lb\t%0,%1"
+  [(set_attr "move_type" "signext,load")
+   (set_attr "mode" "SI")])
+
+;; Combiner patterns for truncate/sign_extend combinations.  The SI versions
+;; use the shift/truncate patterns.
+
+(define_insn_and_split "*extenddi_truncate<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && !ISA_HAS_EXTS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ashift:DI (match_dup 1)
+		   (match_dup 3)))
+   (set (match_dup 0)
+	(ashiftrt:DI (match_dup 2)
+		     (match_dup 3)))]
+{
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (<MODE>mode));
+}
+  [(set_attr "move_type" "shift_shift")
+   (set_attr "mode" "DI")])
+
+(define_insn_and_split "*extendsi_truncate<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(sign_extend:SI
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && !ISA_HAS_EXTS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ashift:DI (match_dup 1)
+		   (match_dup 3)))
+   (set (match_dup 0)
+	(truncate:SI (ashiftrt:DI (match_dup 2)
+				  (match_dup 3))))]
+{
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  operands[3] = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (<MODE>mode));
+}
+  [(set_attr "move_type" "shift_shift")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*extendhi_truncateqi"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sign_extend:HI
+	    (truncate:QI (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && !ISA_HAS_EXTS"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(ashift:DI (match_dup 1)
+		   (const_int 56)))
+   (set (match_dup 0)
+	(truncate:HI (ashiftrt:DI (match_dup 2)
+				  (const_int 56))))]
+{
+  operands[2] = gen_lowpart (DImode, operands[0]);
+}
+  [(set_attr "move_type" "shift_shift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extend<GPR:mode>_truncate<SHORT:mode>_exts"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(sign_extend:GPR
+	    (truncate:SHORT (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && ISA_HAS_EXTS"
+{
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<SHORT:MODE>mode));
+  return "exts\t%0,%1,0,%m2";
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*extendhi_truncateqi_exts"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(sign_extend:HI
+	    (truncate:QI (match_operand:DI 1 "register_operand" "d"))))]
+  "TARGET_64BIT && !TARGET_MIPS16 && ISA_HAS_EXTS"
+  "exts\t%0,%1,0,7"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "cvt.d.s\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "cnv_mode"	"S2D")   
+   (set_attr "mode"	"DF")])
+
+;;
+;;  ....................
+;;
+;;	CONVERSIONS
+;;
+;;  ....................
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(fix:SI (match_operand:DF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+{
+  if (!ISA_HAS_TRUNC_W)
+    {
+      emit_insn (gen_fix_truncdfsi2_macro (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "fix_truncdfsi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && ISA_HAS_TRUNC_W"
+  "trunc.w.d %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"D2I")])
+
+(define_insn "fix_truncdfsi2_macro"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:DF 1 "register_operand" "f")))
+   (clobber (match_scratch:DF 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !ISA_HAS_TRUNC_W"
+{
+  if (mips_nomacro.nesting_level > 0)
+    return ".set\tmacro\;trunc.w.d %0,%1,%2\;.set\tnomacro";
+  else
+    return "trunc.w.d %0,%1,%2";
+}
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"D2I")
+   (set_attr "length"	"36")])
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(fix:SI (match_operand:SF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT"
+{
+  if (!ISA_HAS_TRUNC_W)
+    {
+      emit_insn (gen_fix_truncsfsi2_macro (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "fix_truncsfsi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && ISA_HAS_TRUNC_W"
+  "trunc.w.s %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"S2I")])
+
+(define_insn "fix_truncsfsi2_macro"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))
+   (clobber (match_scratch:SF 2 "=d"))]
+  "TARGET_HARD_FLOAT && !ISA_HAS_TRUNC_W"
+{
+  if (mips_nomacro.nesting_level > 0)
+    return ".set\tmacro\;trunc.w.s %0,%1,%2\;.set\tnomacro";
+  else
+    return "trunc.w.s %0,%1,%2";
+}
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"S2I")
+   (set_attr "length"	"36")])
+
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "trunc.l.d %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"D2I")])
+
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "trunc.l.s %0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"S2I")])
+
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "cvt.d.w\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"I2D")])
+
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "cvt.d.l\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"DF")
+   (set_attr "cnv_mode"	"I2D")])
+
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "cvt.s.w\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"I2S")])
+
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FLOAT64 && TARGET_DOUBLE_FLOAT"
+  "cvt.s.l\t%0,%1"
+  [(set_attr "type"	"fcvt")
+   (set_attr "mode"	"SF")
+   (set_attr "cnv_mode"	"I2S")])
+
+
+(define_expand "fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(unsigned_fix:SI (match_operand:DF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (DFmode);
+  rtx reg2 = gen_reg_rtx (DFmode);
+  rtx reg3 = gen_reg_rtx (SImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 31, DFmode);
+
+  if (reg1)			/* Turn off complaints about unreached code.  */
+    {
+      mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, DFmode));
+      do_pending_stack_adjust ();
+
+      test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+      emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
+
+      emit_insn (gen_fix_truncdfsi2 (operands[0], operands[1]));
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				   gen_rtx_LABEL_REF (VOIDmode, label2)));
+      emit_barrier ();
+
+      emit_label (label1);
+      mips_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
+      mips_emit_move (reg3, GEN_INT (trunc_int_for_mode
+				     (BITMASK_HIGH, SImode)));
+
+      emit_insn (gen_fix_truncdfsi2 (operands[0], reg2));
+      emit_insn (gen_iorsi3 (operands[0], operands[0], reg3));
+
+      emit_label (label2);
+
+      /* Allow REG_NOTES to be set on last insn (labels don't have enough
+	 fields, and can't be used for REG_NOTES anyway).  */
+      emit_use (stack_pointer_rtx);
+      DONE;
+    }
+})
+
+
+(define_expand "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(unsigned_fix:DI (match_operand:DF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (DFmode);
+  rtx reg2 = gen_reg_rtx (DFmode);
+  rtx reg3 = gen_reg_rtx (DImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 63, DFmode);
+
+  mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, DFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  mips_emit_move (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
+  mips_emit_move (reg3, GEN_INT (BITMASK_HIGH));
+  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], reg2));
+  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+})
+
+
+(define_expand "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (SFmode);
+  rtx reg2 = gen_reg_rtx (SFmode);
+  rtx reg3 = gen_reg_rtx (SImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 31, SFmode);
+
+  mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, SFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  mips_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1));
+  mips_emit_move (reg3, GEN_INT (trunc_int_for_mode
+				 (BITMASK_HIGH, SImode)));
+
+  emit_insn (gen_fix_truncsfsi2 (operands[0], reg2));
+  emit_insn (gen_iorsi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+})
+
+
+(define_expand "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(unsigned_fix:DI (match_operand:SF 1 "register_operand")))]
+  "TARGET_HARD_FLOAT && TARGET_64BIT && TARGET_DOUBLE_FLOAT"
+{
+  rtx reg1 = gen_reg_rtx (SFmode);
+  rtx reg2 = gen_reg_rtx (SFmode);
+  rtx reg3 = gen_reg_rtx (DImode);
+  rtx label1 = gen_label_rtx ();
+  rtx label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 63, SFmode);
+
+  mips_emit_move (reg1, CONST_DOUBLE_FROM_REAL_VALUE (offset, SFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchsf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncsfdi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  mips_emit_move (reg2, gen_rtx_MINUS (SFmode, operands[1], reg1));
+  mips_emit_move (reg3, GEN_INT (BITMASK_HIGH));
+  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
+
+  emit_insn (gen_fix_truncsfdi2 (operands[0], reg2));
+  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	DATA MOVEMENT
+;;
+;;  ....................
+
+;; Bit field extract patterns which use lwl/lwr or ldl/ldr.
+
+(define_expand "extv"
+  [(set (match_operand 0 "register_operand")
+	(sign_extract (match_operand 1 "nonimmediate_operand")
+		      (match_operand 2 "const_int_operand")
+		      (match_operand 3 "const_int_operand")))]
+  "!TARGET_MIPS16"
+{
+  if (mips_expand_ext_as_unaligned_load (operands[0], operands[1],
+					 INTVAL (operands[2]),
+					 INTVAL (operands[3])))
+    DONE;
+  else if (register_operand (operands[1], GET_MODE (operands[0]))
+	   && ISA_HAS_EXTS && UINTVAL (operands[2]) <= 32)
+    {
+      if (GET_MODE (operands[0]) == DImode)
+	emit_insn (gen_extvdi (operands[0], operands[1], operands[2],
+			       operands[3]));
+      else
+	emit_insn (gen_extvsi (operands[0], operands[1], operands[2],
+			       operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn "extv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (sign_extract:GPR (match_operand:GPR 1 "register_operand" "d")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" "")))]
+  "ISA_HAS_EXTS && UINTVAL (operands[2]) <= 32"
+  "exts\t%0,%1,%3,%m2"
+  [(set_attr "type"     "arith")
+   (set_attr "mode"     "<MODE>")])
+
+
+(define_expand "extzv"
+  [(set (match_operand 0 "register_operand")
+	(zero_extract (match_operand 1 "nonimmediate_operand")
+		      (match_operand 2 "const_int_operand")
+		      (match_operand 3 "const_int_operand")))]
+  "!TARGET_MIPS16"
+{
+  if (mips_expand_ext_as_unaligned_load (operands[0], operands[1],
+					 INTVAL (operands[2]),
+					 INTVAL (operands[3])))
+    DONE;
+  else if (mips_use_ins_ext_p (operands[1], INTVAL (operands[2]),
+			       INTVAL (operands[3])))
+    {
+      if (GET_MODE (operands[0]) == DImode)
+        emit_insn (gen_extzvdi (operands[0], operands[1], operands[2],
+				operands[3]));
+      else
+        emit_insn (gen_extzvsi (operands[0], operands[1], operands[2],
+				operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn "extzv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(zero_extract:GPR (match_operand:GPR 1 "register_operand" "d")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" "")))]
+  "mips_use_ins_ext_p (operands[1], INTVAL (operands[2]),
+		       INTVAL (operands[3]))"
+  "<d>ext\t%0,%1,%3,%2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"<MODE>")])
+
+(define_insn "*extzv_truncsi_exts"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (truncate:SI
+	 (zero_extract:DI (match_operand:DI 1 "register_operand" "d")
+			  (match_operand 2 "const_int_operand" "")
+			  (match_operand 3 "const_int_operand" ""))))]
+  "ISA_HAS_EXTS && TARGET_64BIT && IN_RANGE (INTVAL (operands[2]), 32, 63)"
+  "exts\t%0,%1,%3,31"
+  [(set_attr "type"     "arith")
+   (set_attr "mode"     "SI")])
+
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand")
+		      (match_operand 1 "immediate_operand")
+		      (match_operand 2 "immediate_operand"))
+	(match_operand 3 "reg_or_0_operand"))]
+  "!TARGET_MIPS16"
+{
+  if (mips_expand_ins_as_unaligned_store (operands[0], operands[3],
+					  INTVAL (operands[1]),
+					  INTVAL (operands[2])))
+    DONE;
+  else if (mips_use_ins_ext_p (operands[0], INTVAL (operands[1]),
+			       INTVAL (operands[2])))
+    {
+      if (GET_MODE (operands[0]) == DImode)
+        emit_insn (gen_insvdi (operands[0], operands[1], operands[2],
+			       operands[3]));
+      else
+        emit_insn (gen_insvsi (operands[0], operands[1], operands[2],
+			       operands[3]));
+      DONE;
+   }
+   else
+     FAIL;
+})
+
+(define_insn "insv<mode>"
+  [(set (zero_extract:GPR (match_operand:GPR 0 "register_operand" "+d")
+			  (match_operand:SI 1 "immediate_operand" "I")
+			  (match_operand:SI 2 "immediate_operand" "I"))
+	(match_operand:GPR 3 "reg_or_0_operand" "dJ"))]
+  "mips_use_ins_ext_p (operands[0], INTVAL (operands[1]),
+		       INTVAL (operands[2]))"
+  "<d>ins\t%0,%z3,%2,%1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"<MODE>")])
+
+;; Combiner pattern for cins (clear and insert bit field).  We can
+;; implement mask-and-shift-left operation with this.  Note that if
+;; the upper bit of the mask is set in an SImode operation, the mask
+;; itself will be sign-extended.  mask_low_and_shift_len will
+;; therefore be greater than our threshold of 32.
+
+(define_insn "*cins<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(and:GPR
+	 (ashift:GPR (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "const_int_operand" ""))
+	 (match_operand:GPR 3 "const_int_operand" "")))]
+  "ISA_HAS_CINS
+   && mask_low_and_shift_p (<MODE>mode, operands[3], operands[2], 32)"
+{
+  operands[3] =
+    GEN_INT (mask_low_and_shift_len (<MODE>mode, operands[3], operands[2]));
+  return "cins\t%0,%1,%2,%m3";
+}
+  [(set_attr "type"     "shift")
+   (set_attr "mode"     "<MODE>")])
+
+;; Unaligned word moves generated by the bit field patterns.
+;;
+;; As far as the rtl is concerned, both the left-part and right-part
+;; instructions can access the whole field.  However, the real operand
+;; refers to just the first or the last byte (depending on endianness).
+;; We therefore use two memory operands to each instruction, one to
+;; describe the rtl effect and one to use in the assembly output.
+;;
+;; Operands 0 and 1 are the rtl-level target and source respectively.
+;; This allows us to use the standard length calculations for the "load"
+;; and "store" type attributes.
+
+(define_insn "mov_<load>l"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:BLK 1 "memory_operand" "m")
+		     (match_operand:QI 2 "memory_operand" "m")]
+		    UNSPEC_LOAD_LEFT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "<load>l\t%0,%2"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_<load>r"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:BLK 1 "memory_operand" "m")
+		     (match_operand:QI 2 "memory_operand" "m")
+		     (match_operand:GPR 3 "register_operand" "0")]
+		    UNSPEC_LOAD_RIGHT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[1])"
+  "<load>r\t%0,%2"
+  [(set_attr "move_type" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_<store>l"
+  [(set (match_operand:BLK 0 "memory_operand" "=m")
+	(unspec:BLK [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		     (match_operand:QI 2 "memory_operand" "m")]
+		    UNSPEC_STORE_LEFT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
+  "<store>l\t%z1,%2"
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mov_<store>r"
+  [(set (match_operand:BLK 0 "memory_operand" "+m")
+	(unspec:BLK [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		     (match_operand:QI 2 "memory_operand" "m")
+		     (match_dup 0)]
+		    UNSPEC_STORE_RIGHT))]
+  "!TARGET_MIPS16 && mips_mem_fits_mode_p (<MODE>mode, operands[0])"
+  "<store>r\t%z1,%2"
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "<MODE>")])
+
+;; An instruction to calculate the high part of a 64-bit SYMBOL_ABSOLUTE.
+;; The required value is:
+;;
+;;	(%highest(op1) << 48) + (%higher(op1) << 32) + (%hi(op1) << 16)
+;;
+;; which translates to:
+;;
+;;	lui	op0,%highest(op1)
+;;	daddiu	op0,op0,%higher(op1)
+;;	dsll	op0,op0,16
+;;	daddiu	op0,op0,%hi(op1)
+;;	dsll	op0,op0,16
+;;
+;; The split is deferred until after flow2 to allow the peephole2 below
+;; to take effect.
+(define_insn_and_split "*lea_high64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(high:DI (match_operand:DI 1 "absolute_symbolic_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && ABI_HAS_64BIT_SYMBOLS"
+  "#"
+  "&& epilogue_completed"
+  [(set (match_dup 0) (high:DI (match_dup 2)))
+   (set (match_dup 0) (lo_sum:DI (match_dup 0) (match_dup 2)))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 16)))
+   (set (match_dup 0) (lo_sum:DI (match_dup 0) (match_dup 3)))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
+  operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID);
+}
+  [(set_attr "length" "20")])
+
+;; Use a scratch register to reduce the latency of the above pattern
+;; on superscalar machines.  The optimized sequence is:
+;;
+;;	lui	op1,%highest(op2)
+;;	lui	op0,%hi(op2)
+;;	daddiu	op1,op1,%higher(op2)
+;;	dsll32	op1,op1,0
+;;	daddu	op1,op1,op0
+(define_peephole2
+  [(set (match_operand:DI 1 "d_operand")
+	(high:DI (match_operand:DI 2 "absolute_symbolic_operand")))
+   (match_scratch:DI 0 "d")]
+  "TARGET_EXPLICIT_RELOCS && ABI_HAS_64BIT_SYMBOLS"
+  [(set (match_dup 1) (high:DI (match_dup 3)))
+   (set (match_dup 0) (high:DI (match_dup 4)))
+   (set (match_dup 1) (lo_sum:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 1) (ashift:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 0)))]
+{
+  operands[3] = mips_unspec_address (operands[2], SYMBOL_64_HIGH);
+  operands[4] = mips_unspec_address (operands[2], SYMBOL_64_LOW);
+})
+
+;; On most targets, the expansion of (lo_sum (high X) X) for a 64-bit
+;; SYMBOL_ABSOLUTE X will take 6 cycles.  This next pattern allows combine
+;; to merge the HIGH and LO_SUM parts of a move if the HIGH part is only
+;; used once.  We can then use the sequence:
+;;
+;;	lui	op0,%highest(op1)
+;;	lui	op2,%hi(op1)
+;;	daddiu	op0,op0,%higher(op1)
+;;	daddiu	op2,op2,%lo(op1)
+;;	dsll32	op0,op0,0
+;;	daddu	op0,op0,op2
+;;
+;; which takes 4 cycles on most superscalar targets.
+(define_insn_and_split "*lea64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(match_operand:DI 1 "absolute_symbolic_operand" ""))
+   (clobber (match_scratch:DI 2 "=&d"))]
+  "TARGET_EXPLICIT_RELOCS && ABI_HAS_64BIT_SYMBOLS && cse_not_expected"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (high:DI (match_dup 3)))
+   (set (match_dup 2) (high:DI (match_dup 4)))
+   (set (match_dup 0) (lo_sum:DI (match_dup 0) (match_dup 3)))
+   (set (match_dup 2) (lo_sum:DI (match_dup 2) (match_dup 4)))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))]
+{
+  operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
+  operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW);
+}
+  [(set_attr "length" "24")])
+
+;; Split HIGHs into:
+;;
+;;	li op0,%hi(sym)
+;;	sll op0,16
+;;
+;; on MIPS16 targets.
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(high:SI (match_operand:SI 1 "absolute_symbolic_operand")))]
+  "TARGET_MIPS16 && reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (ashift:SI (match_dup 0) (const_int 16)))]
+{
+  operands[2] = mips_unspec_address (operands[1], SYMBOL_32_HIGH);
+})
+
+;; Insns to fetch a symbol from a big GOT.
+
+(define_insn_and_split "*xgot_hi<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(high:P (match_operand:P 1 "got_disp_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_XGOT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (high:P (match_dup 2)))
+   (set (match_dup 0) (plus:P (match_dup 0) (match_dup 3)))]
+{
+  operands[2] = mips_unspec_address (operands[1], SYMBOL_GOTOFF_DISP);
+  operands[3] = pic_offset_table_rtx;
+}
+  [(set_attr "got" "xgot_high")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*xgot_lo<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(lo_sum:P (match_operand:P 1 "register_operand" "d")
+		  (match_operand:P 2 "got_disp_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && TARGET_XGOT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:P [(match_dup 1) (match_dup 3)] UNSPEC_LOAD_GOT))]
+  { operands[3] = mips_unspec_address (operands[2], SYMBOL_GOTOFF_DISP); }
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Insns to fetch a symbol from a normal GOT.
+
+(define_insn_and_split "*got_disp<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(match_operand:P 1 "got_disp_operand" ""))]
+  "TARGET_EXPLICIT_RELOCS && !mips_split_p[SYMBOL_GOT_DISP]"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 2))]
+  { operands[2] = mips_got_load (NULL, operands[1], SYMBOL_GOTOFF_DISP); }
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Insns for loading the "page" part of a page/ofst address from the GOT.
+
+(define_insn_and_split "*got_page<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(high:P (match_operand:P 1 "got_page_ofst_operand" "")))]
+  "TARGET_EXPLICIT_RELOCS && !mips_split_hi_p[SYMBOL_GOT_PAGE_OFST]"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 2))]
+  { operands[2] = mips_got_load (NULL, operands[1], SYMBOL_GOTOFF_PAGE); }
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Convenience expander that generates the rhs of a load_got<mode> insn.
+(define_expand "unspec_got<mode>"
+  [(unspec:P [(match_operand:P 0)
+	      (match_operand:P 1)] UNSPEC_LOAD_GOT)])
+
+;; Lower-level instructions for loading an address from the GOT.
+;; We could use MEMs, but an unspec gives more optimization
+;; opportunities.
+
+(define_insn "load_got<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "register_operand" "d")
+		   (match_operand:P 2 "immediate_operand" "")]
+		  UNSPEC_LOAD_GOT))]
+  ""
+  "<load>\t%0,%R2(%1)"
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+;; Instructions for adding the low 16 bits of an address to a register.
+;; Operand 2 is the address: mips_print_operand works out which relocation
+;; should be applied.
+
+(define_insn "*low<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(lo_sum:P (match_operand:P 1 "register_operand" "d")
+		  (match_operand:P 2 "immediate_operand" "")))]
+  "!TARGET_MIPS16"
+  "<d>addiu\t%0,%1,%R2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*low<mode>_mips16"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(lo_sum:P (match_operand:P 1 "register_operand" "0")
+		  (match_operand:P 2 "immediate_operand" "")))]
+  "TARGET_MIPS16"
+  "<d>addiu\t%0,%R2"
+  [(set_attr "alu_type" "add")
+   (set_attr "mode" "<MODE>")
+   (set_attr "extended_mips16" "yes")])
+
+;; Expose MIPS16 uses of the global pointer after reload if the function
+;; is responsible for setting up the register itself.
+(define_split
+  [(set (match_operand:GPR 0 "d_operand")
+	(const:GPR (unspec:GPR [(const_int 0)] UNSPEC_GP)))]
+  "TARGET_MIPS16 && TARGET_USE_GOT && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  { operands[1] = pic_offset_table_rtx; })
+
+;; Allow combine to split complex const_int load sequences, using operand 2
+;; to store the intermediate results.  See move_operand for details.
+(define_split
+  [(set (match_operand:GPR 0 "register_operand")
+	(match_operand:GPR 1 "splittable_const_int_operand"))
+   (clobber (match_operand:GPR 2 "register_operand"))]
+  ""
+  [(const_int 0)]
+{
+  mips_move_integer (operands[2], operands[0], INTVAL (operands[1]));
+  DONE;
+})
+
+;; Likewise, for symbolic operands.
+(define_split
+  [(set (match_operand:P 0 "register_operand")
+	(match_operand:P 1))
+   (clobber (match_operand:P 2 "register_operand"))]
+  "mips_split_symbol (operands[2], operands[1], MAX_MACHINE_MODE, NULL)"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  mips_split_symbol (operands[2], operands[1],
+		     MAX_MACHINE_MODE, &operands[3]);
+})
+
+;; 64-bit integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "")
+	(match_operand:DI 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (DImode, operands[0], operands[1]))
+    DONE;
+})
+
+;; For mips16, we need a special case to handle storing $31 into
+;; memory, since we don't have a constraint to match $31.  This
+;; instruction can be generated by save_restore_insns.
+
+(define_insn "*mov<mode>_ra"
+  [(set (match_operand:GPR 0 "stack_operand" "=m")
+	(reg:GPR RETURN_ADDR_REGNUM))]
+  "TARGET_MIPS16"
+  "<store>\t$31,%0"
+  [(set_attr "move_type" "store")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*movdi_32bit"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,m,*a,*d,*f,*f,*d,*m,*B*C*D,*B*C*D,*d,*m")
+	(match_operand:DI 1 "move_operand" "d,i,m,d,*J*d,*a,*J*d,*m,*f,*f,*d,*m,*B*C*D,*B*C*D"))]
+  "!TARGET_64BIT && !TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,load,store,mthilo,mfhilo,mtc,fpload,mfc,fpstore,mtc,fpload,mfc,fpstore")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movdi_32bit_mips16"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:DI 1 "move_operand" "d,d,y,K,N,m,d,*x"))]
+  "!TARGET_64BIT && TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mfhilo")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movdi_64bit"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,e,d,m,*f,*f,*d,*m,*a,*d,*B*C*D,*B*C*D,*d,*m")
+	(match_operand:DI 1 "move_operand" "d,U,T,m,dJ,*d*J,*m,*f,*f,*J*d,*a,*d,*m,*B*C*D,*B*C*D"))]
+  "TARGET_64BIT && !TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || reg_or_0_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,const,load,store,mtc,fpload,mfc,fpstore,mthilo,mfhilo,mtc,fpload,mfc,fpstore")
+   (set_attr "mode" "DI")])
+
+(define_insn "*movdi_64bit_mips16"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,y,d,d,d,d,d,d,m,*d")
+	(match_operand:DI 1 "move_operand" "d,d,y,K,N,U,kf,m,d,*a"))]
+  "TARGET_64BIT && TARGET_MIPS16
+   && (register_operand (operands[0], DImode)
+       || register_operand (operands[1], DImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,const,loadpool,load,store,mfhilo")
+   (set_attr "mode" "DI")])
+
+;; On the mips16, we can split ld $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:DI 0 "d_operand")
+	(mem:DI (plus:DI (match_dup 0)
+			 (match_operand:DI 1 "const_int_operand"))))]
+  "TARGET_64BIT && TARGET_MIPS16 && reload_completed
+   && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x10)
+       || (INTVAL (operands[1]) >= 32 * 8
+	   && INTVAL (operands[1]) <= 31 * 8 + 0x8)
+       || (INTVAL (operands[1]) >= 0
+	   && INTVAL (operands[1]) < 32 * 8
+	   && (INTVAL (operands[1]) & 7) != 0))"
+  [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:DI (plus:DI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else if (val >= 32 * 8)
+    {
+      int off = val & 7;
+
+      operands[1] = GEN_INT (0x8 + off);
+      operands[2] = GEN_INT (val - off - 0x8);
+    }
+  else
+    {
+      int off = val & 7;
+
+      operands[1] = GEN_INT (off);
+      operands[2] = GEN_INT (val - off);
+    }
+})
+
+;; 32-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:IMOVE32 0 "")
+	(match_operand:IMOVE32 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+;; The difference between these two is whether or not ints are allowed
+;; in FP registers (off by default, use -mdebugh to enable).
+
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=d,d,e,d,m,*f,*f,*d,*m,*d,*z,*a,*d,*B*C*D,*B*C*D,*d,*m")
+	(match_operand:IMOVE32 1 "move_operand" "d,U,T,m,dJ,*d*J,*m,*f,*f,*z,*d,*J*d,*a,*d,*m,*B*C*D,*B*C*D"))]
+  "!TARGET_MIPS16
+   && (register_operand (operands[0], <MODE>mode)
+       || reg_or_0_operand (operands[1], <MODE>mode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,const,load,store,mtc,fpload,mfc,fpstore,mfc,mtc,mthilo,mfhilo,mtc,fpload,mfc,fpstore")
+   (set_attr "mode" "SI")])
+
+(define_insn "*mov<mode>_mips16"
+  [(set (match_operand:IMOVE32 0 "nonimmediate_operand" "=d,y,d,d,d,d,d,d,m,*d")
+	(match_operand:IMOVE32 1 "move_operand" "d,d,y,K,N,U,kf,m,d,*a"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,const,loadpool,load,store,mfhilo")
+   (set_attr "mode" "SI")])
+
+;; On the mips16, we can split lw $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(mem:SI (plus:SI (match_dup 0)
+			 (match_operand:SI 1 "const_int_operand"))))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x80)
+       || (INTVAL (operands[1]) >= 32 * 4
+	   && INTVAL (operands[1]) <= 31 * 4 + 0x7c)
+       || (INTVAL (operands[1]) >= 0
+	   && INTVAL (operands[1]) < 32 * 4
+	   && (INTVAL (operands[1]) & 3) != 0))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:SI (plus:SI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else if (val >= 32 * 4)
+    {
+      int off = val & 3;
+
+      operands[1] = GEN_INT (0x7c + off);
+      operands[2] = GEN_INT (val - off - 0x7c);
+    }
+  else
+    {
+      int off = val & 3;
+
+      operands[1] = GEN_INT (off);
+      operands[2] = GEN_INT (val - off);
+    }
+})
+
+;; On the mips16, we can split a load of certain constants into a load
+;; and an add.  This turns a 4 byte instruction into 2 2 byte
+;; instructions.
+
+(define_split
+  [(set (match_operand:SI 0 "d_operand")
+	(match_operand:SI 1 "const_int_operand"))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && INTVAL (operands[1]) >= 0x100
+   && INTVAL (operands[1]) <= 0xff + 0x7f"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))]
+{
+  int val = INTVAL (operands[1]);
+
+  operands[1] = GEN_INT (0xff);
+  operands[2] = GEN_INT (val - 0xff);
+})
+
+;; This insn handles moving CCmode values.  It's really just a
+;; slightly simplified copy of movsi_internal2, with additional cases
+;; to move a condition register to a general register and to move
+;; between the general registers and the floating point registers.
+
+(define_insn "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "=d,*d,*d,*m,*d,*f,*f,*f,*m")
+	(match_operand:CC 1 "general_operand" "z,*d,*m,*d,*f,*d,*f,*m,*f"))]
+  "ISA_HAS_8CC && TARGET_HARD_FLOAT"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "lui_movf,move,load,store,mfc,mtc,fmove,fpload,fpstore")
+   (set_attr "mode" "SI")])
+
+;; Reload condition code registers.  reload_incc and reload_outcc
+;; both handle moves from arbitrary operands into condition code
+;; registers.  reload_incc handles the more common case in which
+;; a source operand is constrained to be in a condition-code
+;; register, but has not been allocated to one.
+;;
+;; Sometimes, such as in movcc, we have a CCmode destination whose
+;; constraints do not include 'z'.  reload_outcc handles the case
+;; when such an operand is allocated to a condition-code register.
+;;
+;; Note that reloads from a condition code register to some
+;; other location can be done using ordinary moves.  Moving
+;; into a GPR takes a single movcc, moving elsewhere takes
+;; two.  We can leave these cases to the generic reload code.
+(define_expand "reload_incc"
+  [(set (match_operand:CC 0 "fcc_reload_operand" "=z")
+	(match_operand:CC 1 "general_operand" ""))
+   (clobber (match_operand:TF 2 "register_operand" "=&f"))]
+  "ISA_HAS_8CC && TARGET_HARD_FLOAT"
+{
+  mips_expand_fcc_reload (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "reload_outcc"
+  [(set (match_operand:CC 0 "fcc_reload_operand" "=z")
+	(match_operand:CC 1 "register_operand" ""))
+   (clobber (match_operand:TF 2 "register_operand" "=&f"))]
+  "ISA_HAS_8CC && TARGET_HARD_FLOAT"
+{
+  mips_expand_fcc_reload (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+;; MIPS4 supports loading and storing a floating point register from
+;; the sum of two general registers.  We use two versions for each of
+;; these four instructions: one where the two general registers are
+;; SImode, and one where they are DImode.  This is because general
+;; registers will be in SImode when they hold 32-bit values, but,
+;; since the 32-bit values are always sign extended, the [ls][wd]xc1
+;; instructions will still work correctly.
+
+;; ??? Perhaps it would be better to support these instructions by
+;; modifying TARGET_LEGITIMATE_ADDRESS_P and friends.  However, since
+;; these instructions can only be used to load and store floating
+;; point registers, that would probably cause trouble in reload.
+
+(define_insn "*<ANYF:loadx>_<P:mode>"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d")
+			  (match_operand:P 2 "register_operand" "d"))))]
+  "ISA_HAS_FP4"
+  "<ANYF:loadx>\t%0,%1(%2)"
+  [(set_attr "type" "fpidxload")
+   (set_attr "mode" "<ANYF:UNITMODE>")])
+
+(define_insn "*<ANYF:storex>_<P:mode>"
+  [(set (mem:ANYF (plus:P (match_operand:P 1 "register_operand" "d")
+			  (match_operand:P 2 "register_operand" "d")))
+	(match_operand:ANYF 0 "register_operand" "f"))]
+  "ISA_HAS_FP4"
+  "<ANYF:storex>\t%0,%1(%2)"
+  [(set_attr "type" "fpidxstore")
+   (set_attr "mode" "<ANYF:UNITMODE>")])
+
+;; Scaled indexed address load.
+;; Per md.texi, we only need to look for a pattern with multiply in the
+;; address expression, not shift.
+
+(define_insn "*lwxs"
+  [(set (match_operand:IMOVE32 0 "register_operand" "=d")
+	(mem:IMOVE32
+	  (plus:P (mult:P (match_operand:P 1 "register_operand" "d")
+			  (const_int 4))
+		  (match_operand:P 2 "register_operand" "d"))))]
+  "ISA_HAS_LWXS"
+  "lwxs\t%0,%1(%2)"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")])
+
+;; 16-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND.
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "")
+	(match_operand:HI 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (HImode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,m,*a,*d")
+	(match_operand:HI 1 "move_operand"         "d,I,m,dJ,*d*J,*a"))]
+  "!TARGET_MIPS16
+   && (register_operand (operands[0], HImode)
+       || reg_or_0_operand (operands[1], HImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,load,store,mthilo,mfhilo")
+   (set_attr "mode" "HI")])
+
+(define_insn "*movhi_mips16"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:HI 1 "move_operand"         "d,d,y,K,N,m,d,*a"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mfhilo")
+   (set_attr "mode" "HI")])
+
+;; On the mips16, we can split lh $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:HI 0 "d_operand")
+	(mem:HI (plus:SI (match_dup 0)
+			 (match_operand:SI 1 "const_int_operand"))))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x80)
+       || (INTVAL (operands[1]) >= 32 * 2
+	   && INTVAL (operands[1]) <= 31 * 2 + 0x7e)
+       || (INTVAL (operands[1]) >= 0
+	   && INTVAL (operands[1]) < 32 * 2
+	   && (INTVAL (operands[1]) & 1) != 0))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:HI (plus:SI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else if (val >= 32 * 2)
+    {
+      int off = val & 1;
+
+      operands[1] = GEN_INT (0x7e + off);
+      operands[2] = GEN_INT (val - off - 0x7e);
+    }
+  else
+    {
+      int off = val & 1;
+
+      operands[1] = GEN_INT (off);
+      operands[2] = GEN_INT (val - off);
+    }
+})
+
+;; 8-bit Integer moves
+
+;; Unlike most other insns, the move insns can't be split with
+;; different predicates, because register spilling and other parts of
+;; the compiler, have memoized the insn number already.
+;; Unsigned loads are used because LOAD_EXTEND_OP returns ZERO_EXTEND.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "")
+	(match_operand:QI 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (QImode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,*a,*d")
+	(match_operand:QI 1 "move_operand"         "d,I,m,dJ,*d*J,*a"))]
+  "!TARGET_MIPS16
+   && (register_operand (operands[0], QImode)
+       || reg_or_0_operand (operands[1], QImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,load,store,mthilo,mfhilo")
+   (set_attr "mode" "QI")])
+
+(define_insn "*movqi_mips16"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:QI 1 "move_operand"         "d,d,y,K,N,m,d,*a"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], QImode)
+       || register_operand (operands[1], QImode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mfhilo")
+   (set_attr "mode" "QI")])
+
+;; On the mips16, we can split lb $r,N($r) into an add and a load,
+;; when the original load is a 4 byte instruction but the add and the
+;; load are 2 2 byte instructions.
+
+(define_split
+  [(set (match_operand:QI 0 "d_operand")
+	(mem:QI (plus:SI (match_dup 0)
+			 (match_operand:SI 1 "const_int_operand"))))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && ((INTVAL (operands[1]) < 0
+	&& INTVAL (operands[1]) >= -0x80)
+       || (INTVAL (operands[1]) >= 32
+	   && INTVAL (operands[1]) <= 31 + 0x7f))"
+  [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+   (set (match_dup 0) (mem:QI (plus:SI (match_dup 0) (match_dup 2))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[1]);
+
+  if (val < 0)
+    operands[2] = const0_rtx;
+  else
+    {
+      operands[1] = GEN_INT (0x7f);
+      operands[2] = GEN_INT (val - 0x7f);
+    }
+})
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "")
+	(match_operand:SF 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (SFmode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movsf_hardfloat"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m")
+	(match_operand:SF 1 "move_operand" "f,G,m,f,G,*d,*f,*G*d,*m,*d"))]
+  "TARGET_HARD_FLOAT
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
+   (set_attr "mode" "SF")])
+
+(define_insn "*movsf_softfloat"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,m")
+	(match_operand:SF 1 "move_operand" "Gd,m,d"))]
+  "TARGET_SOFT_FLOAT && !TARGET_MIPS16
+   && (register_operand (operands[0], SFmode)
+       || reg_or_0_operand (operands[1], SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,load,store")
+   (set_attr "mode" "SF")])
+
+(define_insn "*movsf_mips16"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,y,d,d,m")
+	(match_operand:SF 1 "move_operand" "d,d,y,m,d"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,load,store")
+   (set_attr "mode" "SF")])
+
+;; 64-bit floating point moves
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "")
+	(match_operand:DF 1 ""))]
+  ""
+{
+  if (mips_legitimize_move (DFmode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movdf_hardfloat"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m")
+	(match_operand:DF 1 "move_operand" "f,G,m,f,G,*d,*f,*d*G,*m,*d"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
+   (set_attr "mode" "DF")])
+
+(define_insn "*movdf_softfloat"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,m")
+	(match_operand:DF 1 "move_operand" "dG,m,dG"))]
+  "(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT) && !TARGET_MIPS16
+   && (register_operand (operands[0], DFmode)
+       || reg_or_0_operand (operands[1], DFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,load,store")
+   (set_attr "mode" "DF")])
+
+(define_insn "*movdf_mips16"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,y,d,d,m")
+	(match_operand:DF 1 "move_operand" "d,d,y,m,d"))]
+  "TARGET_MIPS16
+   && (register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,move,move,load,store")
+   (set_attr "mode" "DF")])
+
+;; 128-bit integer moves
+
+(define_expand "movti"
+  [(set (match_operand:TI 0)
+	(match_operand:TI 1))]
+  "TARGET_64BIT"
+{
+  if (mips_legitimize_move (TImode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m,*a,*d")
+	(match_operand:TI 1 "move_operand" "d,i,m,dJ,*d*J,*a"))]
+  "TARGET_64BIT
+   && !TARGET_MIPS16
+   && (register_operand (operands[0], TImode)
+       || reg_or_0_operand (operands[1], TImode))"
+  "#"
+  [(set_attr "move_type" "move,const,load,store,mthilo,mfhilo")
+   (set_attr "mode" "TI")])
+
+(define_insn "*movti_mips16"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,y,d,d,d,d,m,*d")
+	(match_operand:TI 1 "move_operand" "d,d,y,K,N,m,d,*a"))]
+  "TARGET_64BIT
+   && TARGET_MIPS16
+   && (register_operand (operands[0], TImode)
+       || register_operand (operands[1], TImode))"
+  "#"
+  [(set_attr "move_type" "move,move,move,const,constN,load,store,mfhilo")
+   (set_attr "mode" "TI")])
+
+;; 128-bit floating point moves
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0)
+	(match_operand:TF 1))]
+  "TARGET_64BIT"
+{
+  if (mips_legitimize_move (TFmode, operands[0], operands[1]))
+    DONE;
+})
+
+;; This pattern handles both hard- and soft-float cases.
+(define_insn "*movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=d,d,m,f,d,f,m")
+	(match_operand:TF 1 "move_operand" "dG,m,dG,dG,f,m,f"))]
+  "TARGET_64BIT
+   && !TARGET_MIPS16
+   && (register_operand (operands[0], TFmode)
+       || reg_or_0_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "move_type" "move,load,store,mtc,mfc,fpload,fpstore")
+   (set_attr "mode" "TF")])
+
+(define_insn "*movtf_mips16"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=d,y,d,d,m")
+	(match_operand:TF 1 "move_operand" "d,d,y,m,d"))]
+  "TARGET_64BIT
+   && TARGET_MIPS16
+   && (register_operand (operands[0], TFmode)
+       || register_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "move_type" "move,move,move,load,store")
+   (set_attr "mode" "TF")])
+
+(define_split
+  [(set (match_operand:MOVE64 0 "nonimmediate_operand")
+	(match_operand:MOVE64 1 "move_operand"))]
+  "reload_completed && !TARGET_64BIT
+   && mips_split_64bit_move_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  mips_split_doubleword_move (operands[0], operands[1]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MOVE128 0 "nonimmediate_operand")
+	(match_operand:MOVE128 1 "move_operand"))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+{
+  mips_split_doubleword_move (operands[0], operands[1]);
+  DONE;
+})
+
+;; When generating mips16 code, split moves of negative constants into
+;; a positive "li" followed by a negation.
+(define_split
+  [(set (match_operand 0 "d_operand")
+	(match_operand 1 "const_int_operand"))]
+  "TARGET_MIPS16 && reload_completed && INTVAL (operands[1]) < 0"
+  [(set (match_dup 2)
+	(match_dup 3))
+   (set (match_dup 2)
+	(neg:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (SImode, operands[0]);
+  operands[3] = GEN_INT (-INTVAL (operands[1]));
+})
+
+;; 64-bit paired-single floating point moves
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0)
+	(match_operand:V2SF 1))]
+  "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
+{
+  if (mips_legitimize_move (V2SFmode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn "*movv2sf"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,f,m,m,*f,*d,*d,*d,*m")
+	(match_operand:V2SF 1 "move_operand" "f,YG,m,f,YG,*d,*f,*d*YG,*m,*d"))]
+  "TARGET_HARD_FLOAT
+   && TARGET_PAIRED_SINGLE_FLOAT
+   && (register_operand (operands[0], V2SFmode)
+       || reg_or_0_operand (operands[1], V2SFmode))"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
+   (set_attr "mode" "DF")])
+
+;; Extract the high part of a HI/LO value.  See mips_hard_regno_mode_ok_p
+;; for the reason why we can't just use (reg:GPR HI_REGNUM).
+;;
+;; When generating VR4120 or VR4130 code, we use MACCHI and DMACCHI
+;; instead of MFHI.  This avoids both the normal MIPS III hi/lo hazards
+;; and the errata related to -mfix-vr4130.
+(define_insn "mfhi<GPR:mode>_<HILO:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(unspec:GPR [(match_operand:HILO 1 "register_operand" "x")]
+		    UNSPEC_MFHI))]
+  ""
+  { return ISA_HAS_MACCHI ? "<GPR:d>macchi\t%0,%.,%." : "mfhi\t%0"; }
+  [(set_attr "move_type" "mfhilo")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;; Set the high part of a HI/LO value, given that the low part has
+;; already been set.  See mips_hard_regno_mode_ok_p for the reason
+;; why we can't just use (reg:GPR HI_REGNUM).
+(define_insn "mthi<GPR:mode>_<HILO:mode>"
+  [(set (match_operand:HILO 0 "register_operand" "=x")
+	(unspec:HILO [(match_operand:GPR 1 "reg_or_0_operand" "dJ")
+		      (match_operand:GPR 2 "register_operand" "l")]
+		     UNSPEC_MTHI))]
+  ""
+  "mthi\t%z1"
+  [(set_attr "move_type" "mthilo")
+   (set_attr "mode" "SI")])
+
+;; Emit a doubleword move in which exactly one of the operands is
+;; a floating-point register.  We can't just emit two normal moves
+;; because of the constraints imposed by the FPU register model;
+;; see mips_cannot_change_mode_class for details.  Instead, we keep
+;; the FPR whole and use special patterns to refer to each word of
+;; the other operand.
+
+(define_expand "move_doubleword_fpr<mode>"
+  [(set (match_operand:SPLITF 0)
+	(match_operand:SPLITF 1))]
+  ""
+{
+  if (FP_REG_RTX_P (operands[0]))
+    {
+      rtx low = mips_subword (operands[1], 0);
+      rtx high = mips_subword (operands[1], 1);
+      emit_insn (gen_load_low<mode> (operands[0], low));
+      if (TARGET_FLOAT64 && !TARGET_64BIT)
+      	emit_insn (gen_mthc1<mode> (operands[0], high, operands[0]));
+      else
+	emit_insn (gen_load_high<mode> (operands[0], high, operands[0]));
+    }
+  else
+    {
+      rtx low = mips_subword (operands[0], 0);
+      rtx high = mips_subword (operands[0], 1);
+      emit_insn (gen_store_word<mode> (low, operands[1], const0_rtx));
+      if (TARGET_FLOAT64 && !TARGET_64BIT)
+	emit_insn (gen_mfhc1<mode> (high, operands[1]));
+      else
+	emit_insn (gen_store_word<mode> (high, operands[1], const1_rtx));
+    }
+  DONE;
+})
+
+;; Load the low word of operand 0 with operand 1.
+(define_insn "load_low<mode>"
+  [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "general_operand" "dJ,m")]
+		       UNSPEC_LOAD_LOW))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[0] = mips_subword (operands[0], 0);
+  return mips_output_move (operands[0], operands[1]);
+}
+  [(set_attr "move_type" "mtc,fpload")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Load the high word of operand 0 from operand 1, preserving the value
+;; in the low word.
+(define_insn "load_high<mode>"
+  [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "general_operand" "dJ,m")
+			(match_operand:SPLITF 2 "register_operand" "0,0")]
+		       UNSPEC_LOAD_HIGH))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[0] = mips_subword (operands[0], 1);
+  return mips_output_move (operands[0], operands[1]);
+}
+  [(set_attr "move_type" "mtc,fpload")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Store one word of operand 1 in operand 0.  Operand 2 is 1 to store the
+;; high word and 0 to store the low word.
+(define_insn "store_word<mode>"
+  [(set (match_operand:<HALFMODE> 0 "nonimmediate_operand" "=d,m")
+	(unspec:<HALFMODE> [(match_operand:SPLITF 1 "register_operand" "f,f")
+			    (match_operand 2 "const_int_operand")]
+			   UNSPEC_STORE_WORD))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[1] = mips_subword (operands[1], INTVAL (operands[2]));
+  return mips_output_move (operands[0], operands[1]);
+}
+  [(set_attr "move_type" "mfc,fpstore")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Move operand 1 to the high word of operand 0 using mthc1, preserving the
+;; value in the low word.
+(define_insn "mthc1<mode>"
+  [(set (match_operand:SPLITF 0 "register_operand" "=f")
+	(unspec:SPLITF [(match_operand:<HALFMODE> 1 "reg_or_0_operand" "dJ")
+		        (match_operand:SPLITF 2 "register_operand" "0")]
+		       UNSPEC_MTHC1))]
+  "TARGET_HARD_FLOAT && ISA_HAS_MXHC1"
+  "mthc1\t%z1,%0"
+  [(set_attr "move_type" "mtc")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Move high word of operand 1 to operand 0 using mfhc1.
+(define_insn "mfhc1<mode>"
+  [(set (match_operand:<HALFMODE> 0 "register_operand" "=d")
+	(unspec:<HALFMODE> [(match_operand:SPLITF 1 "register_operand" "f")]
+			    UNSPEC_MFHC1))]
+  "TARGET_HARD_FLOAT && ISA_HAS_MXHC1"
+  "mfhc1\t%0,%1"
+  [(set_attr "move_type" "mfc")
+   (set_attr "mode" "<HALFMODE>")])
+
+;; Move a constant that satisfies CONST_GP_P into operand 0.
+(define_expand "load_const_gp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(const:P (unspec:P [(const_int 0)] UNSPEC_GP)))])
+
+;; Insn to initialize $gp for n32/n64 abicalls.  Operand 0 is the offset
+;; of _gp from the start of this function.  Operand 1 is the incoming
+;; function address.
+(define_insn_and_split "loadgp_newabi_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=&d")
+	(unspec:P [(match_operand:P 1)
+		   (match_operand:P 2 "register_operand" "d")]
+		  UNSPEC_LOADGP))]
+  "mips_current_loadgp_style () == LOADGP_NEWABI"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))
+   (set (match_dup 0) (match_dup 5))]
+{
+  operands[3] = gen_rtx_HIGH (Pmode, operands[1]);
+  operands[4] = gen_rtx_PLUS (Pmode, operands[0], operands[2]);
+  operands[5] = gen_rtx_LO_SUM (Pmode, operands[0], operands[1]);
+}
+  [(set_attr "type" "ghost")])
+
+;; Likewise, for -mno-shared code.  Operand 0 is the __gnu_local_gp symbol.
+(define_insn_and_split "loadgp_absolute_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1)] UNSPEC_LOADGP))]
+  "mips_current_loadgp_style () == LOADGP_ABSOLUTE"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(const_int 0)]
+{
+  mips_emit_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;; This blockage instruction prevents the gp load from being
+;; scheduled after an implicit use of gp.  It also prevents
+;; the load from being deleted as dead.
+(define_insn "loadgp_blockage"
+  [(unspec_volatile [(reg:SI 28)] UNSPEC_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "ghost")])
+
+;; Initialize $gp for RTP PIC.  Operand 0 is the __GOTT_BASE__ symbol
+;; and operand 1 is the __GOTT_INDEX__ symbol.
+(define_insn_and_split "loadgp_rtp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "symbol_ref_operand")
+		   (match_operand:P 2 "symbol_ref_operand")]
+		  UNSPEC_LOADGP))]
+  "mips_current_loadgp_style () == LOADGP_RTP"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(set (match_dup 0) (high:P (match_dup 3)))
+   (set (match_dup 0) (unspec:P [(match_dup 0)
+				 (match_dup 3)] UNSPEC_LOAD_GOT))
+   (set (match_dup 0) (unspec:P [(match_dup 0)
+				 (match_dup 4)] UNSPEC_LOAD_GOT))]
+{
+  operands[3] = mips_unspec_address (operands[1], SYMBOL_ABSOLUTE);
+  operands[4] = mips_unspec_address (operands[2], SYMBOL_HALF);
+}
+  [(set_attr "type" "ghost")])
+
+;; Initialize the global pointer for MIPS16 code.  Operand 0 is the
+;; global pointer and operand 1 is the MIPS16 register that holds
+;; the required value.
+(define_insn_and_split "copygp_mips16"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")]
+		   UNSPEC_COPYGP))]
+  "TARGET_MIPS16"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "&& mips_must_initialize_gp_p ()"
+  [(set (match_dup 0) (match_dup 1))]
+  ""
+  [(set_attr "type" "ghost")])
+
+;; A placeholder for where the cprestore instruction should go,
+;; if we decide we need one.  Operand 0 and operand 1 are as for
+;; "cprestore".  Operand 2 is a register that holds the gp value.
+;;
+;; The "cprestore" pattern requires operand 2 to be pic_offset_table_rtx,
+;; otherwise any register that holds the correct value will do.
+(define_insn_and_split "potential_cprestore"
+  [(set (match_operand:SI 0 "cprestore_save_slot_operand" "=X,X")
+	(unspec:SI [(match_operand:SI 1 "const_int_operand" "I,i")
+		    (match_operand:SI 2 "register_operand" "d,d")]
+		   UNSPEC_POTENTIAL_CPRESTORE))
+   (clobber (match_operand:SI 3 "scratch_operand" "=X,&d"))]
+  "!TARGET_CPRESTORE_DIRECTIVE || operands[2] == pic_offset_table_rtx"
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "mips_must_initialize_gp_p ()"
+  [(const_int 0)]
+{
+  mips_save_gp_to_cprestore_slot (operands[0], operands[1],
+				  operands[2], operands[3]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;; Emit a .cprestore directive, which normally expands to a single store
+;; instruction.  Operand 0 is a (possibly illegitimate) sp-based MEM
+;; for the cprestore slot.  Operand 1 is the offset of the slot from
+;; the stack pointer.  (This is redundant with operand 0, but it makes
+;; things a little simpler.)
+(define_insn "cprestore"
+  [(set (match_operand:SI 0 "cprestore_save_slot_operand" "=X,X")
+	(unspec:SI [(match_operand:SI 1 "const_int_operand" "I,i")
+		    (reg:SI 28)]
+		   UNSPEC_CPRESTORE))]
+  "TARGET_CPRESTORE_DIRECTIVE"
+{
+  if (mips_nomacro.nesting_level > 0 && which_alternative == 1)
+    return ".set\tmacro\;.cprestore\t%1\;.set\tnomacro";
+  else
+    return ".cprestore\t%1";
+}
+  [(set_attr "type" "store")
+   (set_attr "length" "4,12")])
+
+(define_insn "use_cprestore"
+  [(set (reg:SI CPRESTORE_SLOT_REGNUM)
+	(match_operand:SI 0 "cprestore_load_slot_operand"))]
+  ""
+  ""
+  [(set_attr "type" "ghost")])
+
+;; Expand in-line code to clear the instruction cache between operand[0] and
+;; operand[1].
+(define_expand "clear_cache"
+  [(match_operand 0 "pmode_register_operand")
+   (match_operand 1 "pmode_register_operand")]
+  ""
+  "
+{
+  if (TARGET_SYNCI)
+    {
+      mips_expand_synci_loop (operands[0], operands[1]);
+      emit_insn (gen_sync ());
+      emit_insn (Pmode == SImode
+		 ? gen_clear_hazard_si ()
+		 : gen_clear_hazard_di ());
+    }
+  else if (mips_cache_flush_func && mips_cache_flush_func[0])
+    {
+      rtx len = gen_reg_rtx (Pmode);
+      emit_insn (gen_sub3_insn (len, operands[1], operands[0]));
+      MIPS_ICACHE_SYNC (operands[0], len);
+    }
+  DONE;
+}")
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)]
+  "GENERATE_SYNC"
+  { return mips_output_sync (); })
+
+(define_insn "synci"
+  [(unspec_volatile [(match_operand 0 "pmode_register_operand" "d")]
+		    UNSPEC_SYNCI)]
+  "TARGET_SYNCI"
+  "synci\t0(%0)")
+
+(define_insn "rdhwr_synci_step_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+        (unspec_volatile [(const_int 1)]
+        UNSPEC_RDHWR))]
+  "ISA_HAS_SYNCI"
+  "rdhwr\t%0,$1")
+
+(define_insn "clear_hazard_<mode>"
+  [(unspec_volatile [(const_int 0)] UNSPEC_CLEAR_HAZARD)
+   (clobber (reg:P RETURN_ADDR_REGNUM))]
+  "ISA_HAS_SYNCI"
+{
+  return "%(%<bal\t1f\n"
+         "\tnop\n"
+         "1:\t<d>addiu\t$31,$31,12\n"
+         "\tjr.hb\t$31\n"
+         "\tnop%>%)";
+}
+  [(set_attr "length" "20")])
+
+;; Cache operations for R4000-style caches.
+(define_insn "mips_cache"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(match_operand:SI 0 "const_int_operand")
+		     (match_operand:QI 1 "address_operand" "p")]
+		    UNSPEC_MIPS_CACHE))]
+  "ISA_HAS_CACHE"
+  "cache\t%X0,%a1")
+
+;; Similar, but with the operands hard-coded to an R10K cache barrier
+;; operation.  We keep the pattern distinct so that we can identify
+;; cache operations inserted by -mr10k-cache-barrier=, and so that
+;; the operation is never inserted into a delay slot.
+(define_insn "r10k_cache_barrier"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK [(const_int 0)] UNSPEC_R10K_CACHE_BARRIER))]
+  "ISA_HAS_CACHE"
+  "cache\t0x14,0(%$)"
+  [(set_attr "can_delay" "no")])
+
+;; Block moves, see mips.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand")
+		   (match_operand:BLK 1 "general_operand"))
+	      (use (match_operand:SI 2 ""))
+	      (use (match_operand:SI 3 "const_int_operand"))])]
+  "!TARGET_MIPS16 && !TARGET_MEMCPY"
+{
+  if (mips_expand_block_move (operands[0], operands[1], operands[2]))
+    DONE;
+  else
+    FAIL;
+})
+
+;;
+;;  ....................
+;;
+;;	SHIFTS
+;;
+;;  ....................
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:GPR 0 "register_operand")
+	(any_shift:GPR (match_operand:GPR 1 "register_operand")
+		       (match_operand:SI 2 "arith_operand")))]
+  ""
+{
+  /* On the mips16, a shift of more than 8 is a four byte instruction,
+     so, for a shift between 8 and 16, it is just as fast to do two
+     shifts of 8 or less.  If there is a lot of shifting going on, we
+     may win in CSE.  Otherwise combine will put the shifts back
+     together again.  This can be called by mips_function_arg, so we must
+     be careful not to allocate a new register if we've reached the
+     reload pass.  */
+  if (TARGET_MIPS16
+      && optimize
+      && CONST_INT_P (operands[2])
+      && INTVAL (operands[2]) > 8
+      && INTVAL (operands[2]) <= 16
+      && !reload_in_progress
+      && !reload_completed)
+    {
+      rtx temp = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_<optab><mode>3 (temp, operands[1], GEN_INT (8)));
+      emit_insn (gen_<optab><mode>3 (operands[0], temp,
+				     GEN_INT (INTVAL (operands[2]) - 8)));
+      DONE;
+    }
+})
+
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(any_shift:GPR (match_operand:GPR 1 "register_operand" "d")
+		       (match_operand:SI 2 "arith_operand" "dI")))]
+  "!TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2])
+			   & (GET_MODE_BITSIZE (<MODE>mode) - 1));
+
+  return "<d><insn>\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<optab>si3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	   (any_shift:SI (match_operand:SI 1 "register_operand" "d")
+			 (match_operand:SI 2 "arith_operand" "dI"))))]
+  "TARGET_64BIT && !TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+
+  return "<insn>\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<optab>si3_mips16"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(any_shift:SI (match_operand:SI 1 "register_operand" "0,d")
+		      (match_operand:SI 2 "arith_operand" "d,I")))]
+  "TARGET_MIPS16"
+{
+  if (which_alternative == 0)
+    return "<insn>\t%0,%2";
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+  return "<insn>\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "SI")
+   (set_attr_alternative "length"
+		[(const_int 4)
+		 (if_then_else (match_operand 2 "m16_uimm3_b")
+			       (const_int 4)
+			       (const_int 8))])])
+
+;; We need separate DImode MIPS16 patterns because of the irregularity
+;; of right shifts.
+(define_insn "*ashldi3_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(ashift:DI (match_operand:DI 1 "register_operand" "0,d")
+		   (match_operand:SI 2 "arith_operand" "d,I")))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  if (which_alternative == 0)
+    return "dsll\t%0,%2";
+
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+  return "dsll\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "DI")
+   (set_attr_alternative "length"
+		[(const_int 4)
+		 (if_then_else (match_operand 2 "m16_uimm3_b")
+			       (const_int 4)
+			       (const_int 8))])])
+
+(define_insn "*ashrdi3_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "0,0")
+		     (match_operand:SI 2 "arith_operand" "d,I")))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
+  return "dsra\t%0,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "DI")
+   (set_attr_alternative "length"
+		[(const_int 4)
+		 (if_then_else (match_operand 2 "m16_uimm3_b")
+			       (const_int 4)
+			       (const_int 8))])])
+
+(define_insn "*lshrdi3_mips16"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0")
+		     (match_operand:SI 2 "arith_operand" "d,I")))]
+  "TARGET_64BIT && TARGET_MIPS16"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
+  return "dsrl\t%0,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "DI")
+   (set_attr_alternative "length"
+		[(const_int 4)
+		 (if_then_else (match_operand 2 "m16_uimm3_b")
+			       (const_int 4)
+			       (const_int 8))])])
+
+;; On the mips16, we can split a 4 byte shift into 2 2 byte shifts.
+
+(define_split
+  [(set (match_operand:GPR 0 "d_operand")
+	(any_shift:GPR (match_operand:GPR 1 "d_operand")
+		       (match_operand:GPR 2 "const_int_operand")))]
+  "TARGET_MIPS16 && reload_completed && !TARGET_DEBUG_D_MODE
+   && INTVAL (operands[2]) > 8
+   && INTVAL (operands[2]) <= 16"
+  [(set (match_dup 0) (any_shift:GPR (match_dup 1) (const_int 8)))
+   (set (match_dup 0) (any_shift:GPR (match_dup 0) (match_dup 2)))]
+  { operands[2] = GEN_INT (INTVAL (operands[2]) - 8); })
+
+;; If we load a byte on the mips16 as a bitfield, the resulting
+;; sequence of instructions is too complicated for combine, because it
+;; involves four instructions: a load, a shift, a constant load into a
+;; register, and an and (the key problem here is that the mips16 does
+;; not have and immediate).  We recognize a shift of a load in order
+;; to make it simple enough for combine to understand.
+;;
+;; The length here is the worst case: the length of the split version
+;; will be more accurate.
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
+		     (match_operand:SI 2 "immediate_operand" "I")))]
+  "TARGET_MIPS16"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 2)))]
+  ""
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"16")])
+
+(define_insn "rotr<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(rotatert:GPR (match_operand:GPR 1 "register_operand" "d")
+		      (match_operand:SI 2 "arith_operand" "dI")))]
+  "ISA_HAS_ROR"
+{
+  if (CONST_INT_P (operands[2]))
+    gcc_assert (INTVAL (operands[2]) >= 0
+		&& INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode));
+
+  return "<d>ror\t%0,%1,%2";
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "<MODE>")])
+
+;;
+;;  ....................
+;;
+;;	CONDITIONAL BRANCHES
+;;
+;;  ....................
+
+;; Conditional branches on floating-point equality tests.
+
+(define_insn "*branch_fp"
+  [(set (pc)
+        (if_then_else
+         (match_operator 1 "equality_operator"
+                         [(match_operand:CC 2 "register_operand" "z")
+			  (const_int 0)])
+         (label_ref (match_operand 0 "" ""))
+         (pc)))]
+  "TARGET_HARD_FLOAT"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%F1", "%Z2%0"),
+					 MIPS_BRANCH ("b%W1", "%Z2%0"));
+}
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_fp_inverted"
+  [(set (pc)
+        (if_then_else
+         (match_operator 1 "equality_operator"
+                         [(match_operand:CC 2 "register_operand" "z")
+			  (const_int 0)])
+         (pc)
+         (label_ref (match_operand 0 "" ""))))]
+  "TARGET_HARD_FLOAT"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%W1", "%Z2%0"),
+					 MIPS_BRANCH ("b%F1", "%Z2%0"));
+}
+  [(set_attr "type" "branch")])
+
+;; Conditional branches on ordered comparisons with zero.
+
+(define_insn "*branch_order<mode>"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "order_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "!TARGET_MIPS16"
+  { return mips_output_order_conditional_branch (insn, operands, false); }
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_order<mode>_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "order_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_MIPS16"
+  { return mips_output_order_conditional_branch (insn, operands, true); }
+  [(set_attr "type" "branch")])
+
+;; Conditional branch on equality comparison.
+
+(define_insn "*branch_equality<mode>"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (match_operand:GPR 3 "reg_or_0_operand" "dJ")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "!TARGET_MIPS16"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%C1", "%2,%z3,%0"),
+					 MIPS_BRANCH ("b%N1", "%2,%z3,%0"));
+}
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_equality<mode>_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "equality_operator"
+			 [(match_operand:GPR 2 "register_operand" "d")
+			  (match_operand:GPR 3 "reg_or_0_operand" "dJ")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_MIPS16"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("b%N1", "%2,%z3,%0"),
+					 MIPS_BRANCH ("b%C1", "%2,%z3,%0"));
+}
+  [(set_attr "type" "branch")])
+
+;; MIPS16 branches
+
+(define_insn "*branch_equality<mode>_mips16"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 0 "equality_operator"
+			 [(match_operand:GPR 1 "register_operand" "d,t")
+			  (const_int 0)])
+	 (match_operand 2 "pc_or_label_operand" "")
+	 (match_operand 3 "pc_or_label_operand" "")))]
+  "TARGET_MIPS16"
+{
+  if (operands[2] != pc_rtx)
+    {
+      if (which_alternative == 0)
+	return "b%C0z\t%1,%2";
+      else
+	return "bt%C0z\t%2";
+    }
+  else
+    {
+      if (which_alternative == 0)
+	return "b%N0z\t%1,%3";
+      else
+	return "bt%N0z\t%3";
+    }
+}
+  [(set_attr "type" "branch")])
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:GPR 1 "register_operand")
+		        (match_operand:GPR 2 "nonmemory_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  mips_expand_conditional_branch (operands);
+  DONE;
+})
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:SCALARF 1 "register_operand")
+		        (match_operand:SCALARF 2 "register_operand")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))]
+  ""
+{
+  mips_expand_conditional_branch (operands);
+  DONE;
+})
+
+;; Used to implement built-in functions.
+(define_expand "condjump"
+  [(set (pc)
+	(if_then_else (match_operand 0)
+		      (label_ref (match_operand 1))
+		      (pc)))])
+
+;; Branch if bit is set/clear.
+
+(define_insn "*branch_bit<bbv><mode>"
+  [(set (pc)
+	(if_then_else
+	 (equality_op (zero_extract:GPR
+		       (match_operand:GPR 1 "register_operand" "d")
+		       (const_int 1)
+		       (match_operand 2 "const_int_operand" ""))
+		      (const_int 0))
+	 (label_ref (match_operand 0 ""))
+	 (pc)))]
+  "ISA_HAS_BBIT && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+{
+  return
+    mips_output_conditional_branch (insn, operands,
+				    MIPS_BRANCH ("bbit<bbv>", "%1,%2,%0"),
+				    MIPS_BRANCH ("bbit<bbinv>", "%1,%2,%0"));
+}
+  [(set_attr "type"	     "branch")
+   (set_attr "branch_likely" "no")])
+
+(define_insn "*branch_bit<bbv><mode>_inverted"
+  [(set (pc)
+	(if_then_else
+	 (equality_op (zero_extract:GPR
+		       (match_operand:GPR 1 "register_operand" "d")
+		       (const_int 1)
+		       (match_operand 2 "const_int_operand" ""))
+		      (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 0 ""))))]
+  "ISA_HAS_BBIT && UINTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)"
+{
+  return
+    mips_output_conditional_branch (insn, operands,
+				    MIPS_BRANCH ("bbit<bbinv>", "%1,%2,%0"),
+				    MIPS_BRANCH ("bbit<bbv>", "%1,%2,%0"));
+}
+  [(set_attr "type"	     "branch")
+   (set_attr "branch_likely" "no")])
+
+;;
+;;  ....................
+;;
+;;	SETTING A REGISTER FROM A COMPARISON
+;;
+;;  ....................
+
+;; Destination is always set in SI mode.
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "mips_cstore_operator"
+	 [(match_operand:GPR 2 "register_operand")
+	  (match_operand:GPR 3 "nonmemory_operand")]))]
+  ""
+{
+  mips_expand_scc (operands);
+  DONE;
+})
+
+(define_insn "*seq_zero_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(eq:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		 (const_int 0)))]
+  "!TARGET_MIPS16 && !ISA_HAS_SEQ_SNE"
+  "sltu\t%0,%1,1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*seq_zero_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t")
+	(eq:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		 (const_int 0)))]
+  "TARGET_MIPS16 && !ISA_HAS_SEQ_SNE"
+  "sltu\t%1,1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;; Generate sltiu unless using seq results in better code.
+(define_insn "*seq_<GPR:mode><GPR2:mode>_seq"
+  [(set (match_operand:GPR2 0 "register_operand" "=d,d,d")
+	(eq:GPR2 (match_operand:GPR 1 "register_operand" "%d,d,d")
+		 (match_operand:GPR 2 "reg_imm10_operand" "d,J,YB")))]
+  "ISA_HAS_SEQ_SNE"
+  "@
+   seq\t%0,%1,%2
+   sltiu\t%0,%1,1
+   seqi\t%0,%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sne_zero_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(ne:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		 (const_int 0)))]
+  "!TARGET_MIPS16 && !ISA_HAS_SEQ_SNE"
+  "sltu\t%0,%.,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+;; Generate sltu unless using sne results in better code.
+(define_insn "*sne_<GPR:mode><GPR2:mode>_sne"
+  [(set (match_operand:GPR2 0 "register_operand" "=d,d,d")
+	(ne:GPR2 (match_operand:GPR 1 "register_operand" "%d,d,d")
+		 (match_operand:GPR 2 "reg_imm10_operand" "d,J,YB")))]
+  "ISA_HAS_SEQ_SNE"
+  "@
+   sne\t%0,%1,%2
+   sltu\t%0,%.,%1
+   snei\t%0,%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sgt<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_gt:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "reg_or_0_operand" "dJ")))]
+  "!TARGET_MIPS16"
+  "slt<u>\t%0,%z2,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sgt<u>_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t")
+	(any_gt:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "register_operand" "d")))]
+  "TARGET_MIPS16"
+  "slt<u>\t%2,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sge<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_ge:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (const_int 1)))]
+  "!TARGET_MIPS16"
+  "slt<u>\t%0,%.,%1"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*slt<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_lt:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "arith_operand" "dI")))]
+  "!TARGET_MIPS16"
+  "slt<u>\t%0,%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*slt<u>_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t,t")
+	(any_lt:GPR2 (match_operand:GPR 1 "register_operand" "d,d")
+		     (match_operand:GPR 2 "arith_operand" "d,I")))]
+  "TARGET_MIPS16"
+  "slt<u>\t%1,%2"
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")
+   (set_attr_alternative "length"
+		[(const_int 4)
+		 (if_then_else (match_operand 2 "m16_uimm8_1")
+			       (const_int 4)
+			       (const_int 8))])])
+
+(define_insn "*sle<u>_<GPR:mode><GPR2:mode>"
+  [(set (match_operand:GPR2 0 "register_operand" "=d")
+	(any_le:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "sle_operand" "")))]
+  "!TARGET_MIPS16"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+  return "slt<u>\t%0,%1,%2";
+}
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*sle<u>_<GPR:mode><GPR2:mode>_mips16"
+  [(set (match_operand:GPR2 0 "register_operand" "=t")
+	(any_le:GPR2 (match_operand:GPR 1 "register_operand" "d")
+		     (match_operand:GPR 2 "sle_operand" "")))]
+  "TARGET_MIPS16"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 1);
+  return "slt<u>\t%1,%2";
+}
+  [(set_attr "type" "slt")
+   (set_attr "mode" "<GPR:MODE>")
+   (set (attr "length") (if_then_else (match_operand 2 "m16_uimm8_m1_1")
+				      (const_int 4)
+				      (const_int 8)))])
+
+;;
+;;  ....................
+;;
+;;	FLOATING POINT COMPARISONS
+;;
+;;  ....................
+
+(define_insn "s<code>_<mode>"
+  [(set (match_operand:CC 0 "register_operand" "=z")
+	(fcond:CC (match_operand:SCALARF 1 "register_operand" "f")
+		  (match_operand:SCALARF 2 "register_operand" "f")))]
+  ""
+  "c.<fcond>.<fmt>\t%Z0%1,%2"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+(define_insn "s<code>_<mode>"
+  [(set (match_operand:CC 0 "register_operand" "=z")
+	(swapped_fcond:CC (match_operand:SCALARF 1 "register_operand" "f")
+		          (match_operand:SCALARF 2 "register_operand" "f")))]
+  ""
+  "c.<swapped_fcond>.<fmt>\t%Z0%2,%1"
+  [(set_attr "type" "fcmp")
+   (set_attr "mode" "FPSW")])
+
+;;
+;;  ....................
+;;
+;;	UNCONDITIONAL BRANCHES
+;;
+;;  ....................
+
+;; Unconditional branches.
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0)))])
+
+(define_insn "*jump_absolute"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  "!TARGET_MIPS16 && TARGET_ABSOLUTE_JUMPS"
+  { return MIPS_ABSOLUTE_JUMP ("%*j\t%l0%/"); }
+  [(set_attr "type" "jump")])
+
+(define_insn "*jump_pic"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  "!TARGET_MIPS16 && !TARGET_ABSOLUTE_JUMPS"
+{
+  if (get_attr_length (insn) <= 8)
+    return "%*b\t%l0%/";
+  else
+    {
+      mips_output_load_label (operands[0]);
+      return "%*jr\t%@%/%]";
+    }
+}
+  [(set_attr "type" "branch")])
+
+;; We need a different insn for the mips16, because a mips16 branch
+;; does not have a delay slot.
+
+(define_insn "*jump_mips16"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_MIPS16"
+  "b\t%l0"
+  [(set_attr "type" "branch")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand"))]
+  ""
+{
+  operands[0] = force_reg (Pmode, operands[0]);
+  if (Pmode == SImode)
+    emit_jump_insn (gen_indirect_jumpsi (operands[0]));
+  else
+    emit_jump_insn (gen_indirect_jumpdi (operands[0]));
+  DONE;
+})
+
+(define_insn "indirect_jump<mode>"
+  [(set (pc) (match_operand:P 0 "register_operand" "d"))]
+  ""
+  "%*j\t%0%/"
+  [(set_attr "type" "jump")
+   (set_attr "mode" "none")])
+
+(define_expand "tablejump"
+  [(set (pc)
+	(match_operand 0 "register_operand"))
+   (use (label_ref (match_operand 1 "")))]
+  ""
+{
+  if (TARGET_MIPS16_SHORT_JUMP_TABLES)
+    operands[0] = expand_binop (Pmode, add_optab,
+				convert_to_mode (Pmode, operands[0], false),
+				gen_rtx_LABEL_REF (Pmode, operands[1]),
+				0, 0, OPTAB_WIDEN);
+  else if (TARGET_GPWORD)
+    operands[0] = expand_binop (Pmode, add_optab, operands[0],
+				pic_offset_table_rtx, 0, 0, OPTAB_WIDEN);
+  else if (TARGET_RTP_PIC)
+    {
+      /* When generating RTP PIC, we use case table entries that are relative
+	 to the start of the function.  Add the function's address to the
+	 value we loaded.  */
+      rtx start = get_hard_reg_initial_val (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+      operands[0] = expand_binop (ptr_mode, add_optab, operands[0],
+				  start, 0, 0, OPTAB_WIDEN);
+    }
+
+  if (Pmode == SImode)
+    emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
+  else
+    emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "tablejump<mode>"
+  [(set (pc)
+	(match_operand:P 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "%*j\t%0%/"
+  [(set_attr "type" "jump")
+   (set_attr "mode" "none")])
+
+;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well.
+;; While it is possible to either pull it off the stack (in the
+;; o32 case) or recalculate it given t9 and our target label,
+;; it takes 3 or 4 insns to do so.
+
+(define_expand "builtin_setjmp_setup"
+  [(use (match_operand 0 "register_operand"))]
+  "TARGET_USE_GOT"
+{
+  rtx addr;
+
+  addr = plus_constant (operands[0], GET_MODE_SIZE (Pmode) * 3);
+  mips_emit_move (gen_rtx_MEM (Pmode, addr), pic_offset_table_rtx);
+  DONE;
+})
+
+;; Restore the gp that we saved above.  Despite the earlier comment, it seems
+;; that older code did recalculate the gp from $25.  Continue to jump through
+;; $25 for compatibility (we lose nothing by doing so).
+
+(define_expand "builtin_longjmp"
+  [(use (match_operand 0 "register_operand"))]
+  "TARGET_USE_GOT"
+{
+  /* The elements of the buffer are, in order:  */
+  int W = GET_MODE_SIZE (Pmode);
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0], 1*W));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0], 2*W));
+  rtx gpv = gen_rtx_MEM (Pmode, plus_constant (operands[0], 3*W));
+  rtx pv = gen_rtx_REG (Pmode, PIC_FUNCTION_ADDR_REGNUM);
+  /* Use gen_raw_REG to avoid being given pic_offset_table_rtx.
+     The target is bound to be using $28 as the global pointer
+     but the current function might not be.  */
+  rtx gp = gen_raw_REG (Pmode, GLOBAL_POINTER_REGNUM);
+
+  /* This bit is similar to expand_builtin_longjmp except that it
+     restores $gp as well.  */
+  mips_emit_move (hard_frame_pointer_rtx, fp);
+  mips_emit_move (pv, lab);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  mips_emit_move (gp, gpv);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+  emit_use (gp);
+  emit_indirect_jump (pv);
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	Function prologue/epilogue
+;;
+;;  ....................
+;;
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  mips_expand_prologue ();
+  DONE;
+})
+
+;; Block any insns from being moved before this point, since the
+;; profiling call to mcount can use various registers that aren't
+;; saved or used to pass arguments.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "ghost")
+   (set_attr "mode" "none")])
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+{
+  mips_expand_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(const_int 2)]
+  ""
+{
+  mips_expand_epilogue (true);
+  DONE;
+})
+
+;; Trivial return.  Make it look like a normal return insn as that
+;; allows jump optimizations to work better.
+
+(define_expand "return"
+  [(return)]
+  "mips_can_use_return_insn ()"
+  { mips_expand_before_return (); })
+
+(define_insn "*return"
+  [(return)]
+  "mips_can_use_return_insn ()"
+  "%*j\t$31%/"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+;; Normal return.
+
+(define_insn "return_internal"
+  [(return)
+   (use (match_operand 0 "pmode_register_operand" ""))]
+  ""
+  "%*j\t%0%/"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")])
+
+;; Exception return.
+(define_insn "mips_eret"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNSPEC_ERET)]
+  ""
+  "eret"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Debug exception return.
+(define_insn "mips_deret"
+  [(return)
+   (unspec_volatile [(const_int 0)] UNSPEC_DERET)]
+  ""
+  "deret"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Disable interrupts.
+(define_insn "mips_di"
+  [(unspec_volatile [(const_int 0)] UNSPEC_DI)]
+  ""
+  "di"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Execution hazard barrier.
+(define_insn "mips_ehb"
+  [(unspec_volatile [(const_int 0)] UNSPEC_EHB)]
+  ""
+  "ehb"
+  [(set_attr "type"	"trap")
+   (set_attr "mode"	"none")])
+
+;; Read GPR from previous shadow register set.
+(define_insn "mips_rdpgpr"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d")]
+			    UNSPEC_RDPGPR))]
+  ""
+  "rdpgpr\t%0,%1"
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")])
+
+;; Move involving COP0 registers.
+(define_insn "cop0_move"
+  [(set (match_operand:SI 0 "register_operand" "=B,d")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "d,B")]
+			    UNSPEC_COP0))]
+  ""
+{ return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "type"	"mtc,mfc")
+   (set_attr "mode"	"SI")])
+
+;; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  if (GET_MODE (operands[0]) != word_mode)
+    operands[0] = convert_to_mode (word_mode, operands[0], 0);
+  if (TARGET_64BIT)
+    emit_insn (gen_eh_set_lr_di (operands[0]));
+  else
+    emit_insn (gen_eh_set_lr_si (operands[0]));
+  DONE;
+})
+
+;; Clobber the return address on the stack.  We can't expand this
+;; until we know where it will be put in the stack frame.
+
+(define_insn "eh_set_lr_si"
+  [(unspec [(match_operand:SI 0 "register_operand" "d")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&d"))]
+  "! TARGET_64BIT"
+  "#")
+
+(define_insn "eh_set_lr_di"
+  [(unspec [(match_operand:DI 0 "register_operand" "d")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch:DI 1 "=&d"))]
+  "TARGET_64BIT"
+  "#")
+
+(define_split
+  [(unspec [(match_operand 0 "register_operand")] UNSPEC_EH_RETURN)
+   (clobber (match_scratch 1))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  mips_set_return_address (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "exception_receiver"
+  [(const_int 0)]
+  "TARGET_USE_GOT"
+{
+  /* See the comment above load_call<mode> for details.  */
+  emit_insn (gen_set_got_version ());
+
+  /* If we have a call-clobbered $gp, restore it from its save slot.  */
+  if (HAVE_restore_gp)
+    emit_insn (gen_restore_gp ());
+  DONE;
+})
+
+(define_expand "nonlocal_goto_receiver"
+  [(const_int 0)]
+  "TARGET_USE_GOT"
+{
+  /* See the comment above load_call<mode> for details.  */
+  emit_insn (gen_set_got_version ());
+  DONE;
+})
+
+;; Restore $gp from its .cprestore stack slot.  The instruction remains
+;; volatile until all uses of $28 are exposed.
+(define_insn_and_split "restore_gp"
+  [(set (reg:SI 28)
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_RESTORE_GP))
+   (clobber (match_scratch:SI 0 "=&d"))]
+  "TARGET_CALL_CLOBBERED_GP"
+  "#"
+  "&& epilogue_completed"
+  [(const_int 0)]
+{
+  mips_restore_gp_from_cprestore_slot (operands[0]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;; Move between $gp and its register save slot.
+(define_insn_and_split "move_gp<mode>"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d,m")
+  	(unspec:GPR [(match_operand:GPR 1 "move_operand" "m,d")]
+		    UNSPEC_MOVE_GP))]
+  ""
+  { return mips_must_initialize_gp_p () ? "#" : ""; }
+  "mips_must_initialize_gp_p ()"
+  [(const_int 0)]
+{
+  mips_emit_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "ghost")])
+
+;;
+;;  ....................
+;;
+;;	FUNCTION CALLS
+;;
+;;  ....................
+
+;; Instructions to load a call address from the GOT.  The address might
+;; point to a function or to a lazy binding stub.  In the latter case,
+;; the stub will use the dynamic linker to resolve the function, which
+;; in turn will change the GOT entry to point to the function's real
+;; address.
+;;
+;; This means that every call, even pure and constant ones, can
+;; potentially modify the GOT entry.  And once a stub has been called,
+;; we must not call it again.
+;;
+;; We represent this restriction using an imaginary, fixed, call-saved
+;; register called GOT_VERSION_REGNUM.  The idea is to make the register
+;; live throughout the function and to change its value after every
+;; potential call site.  This stops any rtx value that uses the register
+;; from being computed before an earlier call.  To do this, we:
+;;
+;;    - Ensure that the register is live on entry to the function,
+;;	so that it is never thought to be used uninitalized.
+;;
+;;    - Ensure that the register is live on exit from the function,
+;;	so that it is live throughout.
+;;
+;;    - Make each call (lazily-bound or not) use the current value
+;;	of GOT_VERSION_REGNUM, so that updates of the register are
+;;	not moved across call boundaries.
+;;
+;;    - Add "ghost" definitions of the register to the beginning of
+;;	blocks reached by EH and ABNORMAL_CALL edges, because those
+;;	edges may involve calls that normal paths don't.  (E.g. the
+;;	unwinding code that handles a non-call exception may change
+;;	lazily-bound GOT entries.)  We do this by making the
+;;	exception_receiver and nonlocal_goto_receiver expanders emit
+;;	a set_got_version instruction.
+;;
+;;    - After each call (lazily-bound or not), use a "ghost"
+;;	update_got_version instruction to change the register's value.
+;;	This instruction mimics the _possible_ effect of the dynamic
+;;	resolver during the call and it remains live even if the call
+;;	itself becomes dead.
+;;
+;;    - Leave GOT_VERSION_REGNUM out of all register classes.
+;;	The register is therefore not a valid register_operand
+;;	and cannot be moved to or from other registers.
+
+(define_insn "load_call<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(match_operand:P 1 "register_operand" "d")
+		   (match_operand:P 2 "immediate_operand" "")
+		   (reg:SI GOT_VERSION_REGNUM)] UNSPEC_LOAD_CALL))]
+  "TARGET_USE_GOT"
+  "<load>\t%0,%R2(%1)"
+  [(set_attr "got" "load")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "set_got_version"
+  [(set (reg:SI GOT_VERSION_REGNUM)
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_SET_GOT_VERSION))]
+  "TARGET_USE_GOT"
+  ""
+  [(set_attr "type" "ghost")])
+
+(define_insn "update_got_version"
+  [(set (reg:SI GOT_VERSION_REGNUM)
+	(unspec:SI [(reg:SI GOT_VERSION_REGNUM)] UNSPEC_UPDATE_GOT_VERSION))]
+  "TARGET_USE_GOT"
+  ""
+  [(set_attr "type" "ghost")])
+
+;; Sibling calls.  All these patterns use jump instructions.
+
+;; If TARGET_SIBCALLS, call_insn_operand will only accept constant
+;; addresses if a direct jump is acceptable.  Since the 'S' constraint
+;; is defined in terms of call_insn_operand, the same is true of the
+;; constraints.
+
+;; When we use an indirect jump, we need a register that will be
+;; preserved by the epilogue.  Since TARGET_USE_PIC_FN_ADDR_REG forces
+;; us to use $25 for this purpose -- and $25 is never clobbered by the
+;; epilogue -- we might as well use it for !TARGET_USE_PIC_FN_ADDR_REG
+;; as well.
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "")
+		    (match_operand 1 ""))
+	      (use (match_operand 2 ""))	;; next_arg_reg
+	      (use (match_operand 3 ""))])]	;; struct_value_size_rtx
+  "TARGET_SIBCALLS"
+{
+  mips_expand_call (MIPS_CALL_SIBCALL, NULL_RTX, XEXP (operands[0], 0),
+		    operands[1], operands[2], false);
+  DONE;
+})
+
+(define_insn "sibcall_internal"
+  [(call (mem:SI (match_operand 0 "call_insn_operand" "j,S"))
+	 (match_operand 1 "" ""))]
+  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
+  { return MIPS_CALL ("j", operands, 0, 1); }
+  [(set_attr "type" "call")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "")
+		   (call (match_operand 1 "")
+			 (match_operand 2 "")))
+	      (use (match_operand 3 ""))])]		;; next_arg_reg
+  "TARGET_SIBCALLS"
+{
+  mips_expand_call (MIPS_CALL_SIBCALL, operands[0], XEXP (operands[1], 0),
+		    operands[2], operands[3], false);
+  DONE;
+})
+
+(define_insn "sibcall_value_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "j,S"))
+              (match_operand 2 "" "")))]
+  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
+  { return MIPS_CALL ("j", operands, 1, 2); }
+  [(set_attr "type" "call")])
+
+(define_insn "sibcall_value_multiple_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "j,S"))
+              (match_operand 2 "" "")))
+   (set (match_operand 3 "register_operand" "")
+	(call (mem:SI (match_dup 1))
+	      (match_dup 2)))]
+  "TARGET_SIBCALLS && SIBLING_CALL_P (insn)"
+  { return MIPS_CALL ("j", operands, 1, 2); }
+  [(set_attr "type" "call")])
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "")
+		    (match_operand 1 ""))
+	      (use (match_operand 2 ""))	;; next_arg_reg
+	      (use (match_operand 3 ""))])]	;; struct_value_size_rtx
+  ""
+{
+  mips_expand_call (MIPS_CALL_NORMAL, NULL_RTX, XEXP (operands[0], 0),
+		    operands[1], operands[2], false);
+  DONE;
+})
+
+;; This instruction directly corresponds to an assembly-language "jal".
+;; There are four cases:
+;;
+;;    - -mno-abicalls:
+;;	  Both symbolic and register destinations are OK.  The pattern
+;;	  always expands to a single mips instruction.
+;;
+;;    - -mabicalls/-mno-explicit-relocs:
+;;	  Again, both symbolic and register destinations are OK.
+;;	  The call is treated as a multi-instruction black box.
+;;
+;;    - -mabicalls/-mexplicit-relocs with n32 or n64:
+;;	  Only "jal $25" is allowed.  This expands to a single "jalr $25"
+;;	  instruction.
+;;
+;;    - -mabicalls/-mexplicit-relocs with o32 or o64:
+;;	  Only "jal $25" is allowed.  The call is actually two instructions:
+;;	  "jalr $25" followed by an insn to reload $gp.
+;;
+;; In the last case, we can generate the individual instructions with
+;; a define_split.  There are several things to be wary of:
+;;
+;;   - We can't expose the load of $gp before reload.  If we did,
+;;     it might get removed as dead, but reload can introduce new
+;;     uses of $gp by rematerializing constants.
+;;
+;;   - We shouldn't restore $gp after calls that never return.
+;;     It isn't valid to insert instructions between a noreturn
+;;     call and the following barrier.
+;;
+;;   - The splitter deliberately changes the liveness of $gp.  The unsplit
+;;     instruction preserves $gp and so have no effect on its liveness.
+;;     But once we generate the separate insns, it becomes obvious that
+;;     $gp is not live on entry to the call.
+;;
+(define_insn_and_split "call_internal"
+  [(call (mem:SI (match_operand 0 "call_insn_operand" "c,S"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, 1); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn, gen_call_split (operands[0], operands[1]));
+  DONE;
+}
+  [(set_attr "jal" "indirect,direct")])
+
+(define_insn "call_split"
+  [(call (mem:SI (match_operand 0 "call_insn_operand" "cS"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 0, 1); }
+  [(set_attr "type" "call")])
+
+;; A pattern for calls that must be made directly.  It is used for
+;; MIPS16 calls that the linker may need to redirect to a hard-float
+;; stub; the linker relies on the call relocation type to detect when
+;; such redirection is needed.
+(define_insn_and_split "call_internal_direct"
+  [(call (mem:SI (match_operand 0 "const_call_insn_operand"))
+	 (match_operand 1))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 0, -1); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_direct_split (operands[0], operands[1]));
+  DONE;
+}
+  [(set_attr "type" "call")])
+
+(define_insn "call_direct_split"
+  [(call (mem:SI (match_operand 0 "const_call_insn_operand"))
+	 (match_operand 1))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 0, -1); }
+  [(set_attr "type" "call")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "")
+		   (call (match_operand 1 "")
+			 (match_operand 2 "")))
+	      (use (match_operand 3 ""))])]		;; next_arg_reg
+  ""
+{
+  mips_expand_call (MIPS_CALL_NORMAL, operands[0], XEXP (operands[1], 0),
+		    operands[2], operands[3], false);
+  DONE;
+})
+
+;; See comment for call_internal.
+(define_insn_and_split "call_value_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_value_split (operands[0], operands[1],
+					 operands[2]));
+  DONE;
+}
+  [(set_attr "jal" "indirect,direct")])
+
+(define_insn "call_value_split"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "cS"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 1, 2); }
+  [(set_attr "type" "call")])
+
+;; See call_internal_direct.
+(define_insn_and_split "call_value_internal_direct"
+  [(set (match_operand 0 "register_operand")
+        (call (mem:SI (match_operand 1 "const_call_insn_operand"))
+              (match_operand 2)))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, -1); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_value_direct_split (operands[0], operands[1],
+						operands[2]));
+  DONE;
+}
+  [(set_attr "type" "call")])
+
+(define_insn "call_value_direct_split"
+  [(set (match_operand 0 "register_operand")
+        (call (mem:SI (match_operand 1 "const_call_insn_operand"))
+              (match_operand 2)))
+   (const_int 1)
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 1, -1); }
+  [(set_attr "type" "call")])
+
+;; See comment for call_internal.
+(define_insn_and_split "call_value_multiple_internal"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "c,S"))
+              (match_operand 2 "" "")))
+   (set (match_operand 3 "register_operand" "")
+	(call (mem:SI (match_dup 1))
+	      (match_dup 2)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))]
+  ""
+  { return TARGET_SPLIT_CALLS ? "#" : MIPS_CALL ("jal", operands, 1, 2); }
+  "reload_completed && TARGET_SPLIT_CALLS"
+  [(const_int 0)]
+{
+  mips_split_call (curr_insn,
+		   gen_call_value_multiple_split (operands[0], operands[1],
+						  operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "jal" "indirect,direct")])
+
+(define_insn "call_value_multiple_split"
+  [(set (match_operand 0 "register_operand" "")
+        (call (mem:SI (match_operand 1 "call_insn_operand" "cS"))
+              (match_operand 2 "" "")))
+   (set (match_operand 3 "register_operand" "")
+	(call (mem:SI (match_dup 1))
+	      (match_dup 2)))
+   (clobber (reg:SI RETURN_ADDR_REGNUM))
+   (clobber (reg:SI 28))]
+  "TARGET_SPLIT_CALLS"
+  { return MIPS_CALL ("jal", operands, 1, 2); }
+  [(set_attr "type" "call")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "")
+		    (const_int 0))
+	      (match_operand 1 "")
+	      (match_operand 2 "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      mips_emit_move (SET_DEST (set), SET_SRC (set));
+    }
+
+  emit_insn (gen_blockage ());
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	MISC.
+;;
+;;  ....................
+;;
+
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:QI 0 "address_operand" "p")
+	     (match_operand 1 "const_int_operand" "n")
+	     (match_operand 2 "const_int_operand" "n"))]
+  "ISA_HAS_PREFETCH && TARGET_EXPLICIT_RELOCS"
+{
+  if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A)
+    /* Loongson 2[ef] and Loongson 3a use load to $0 to perform prefetching.  */
+    return "ld\t$0,%a0";
+  operands[1] = mips_prefetch_cookie (operands[1], operands[2]);
+  return "pref\t%1,%a0";
+}
+  [(set_attr "type" "prefetch")])
+
+(define_insn "*prefetch_indexed_<mode>"
+  [(prefetch (plus:P (match_operand:P 0 "register_operand" "d")
+		     (match_operand:P 1 "register_operand" "d"))
+	     (match_operand 2 "const_int_operand" "n")
+	     (match_operand 3 "const_int_operand" "n"))]
+  "ISA_HAS_PREFETCHX && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+{
+  operands[2] = mips_prefetch_cookie (operands[2], operands[3]);
+  return "prefx\t%2,%1(%0)";
+}
+  [(set_attr "type" "prefetchx")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "%(nop%)"
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")])
+
+;; Like nop, but commented out when outside a .set noreorder block.
+(define_insn "hazard_nop"
+  [(const_int 1)]
+  ""
+  {
+    if (mips_noreorder.nesting_level > 0)
+      return "nop";
+    else
+      return "#nop";
+  }
+  [(set_attr "type"	"nop")])
+
+;; MIPS4 Conditional move instructions.
+
+(define_insn "*mov<GPR:mode>_on_<MOVECC:mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+	(if_then_else:GPR
+	 (match_operator:MOVECC 4 "equality_operator"
+		[(match_operand:MOVECC 1 "register_operand" "<MOVECC:reg>,<MOVECC:reg>")
+		 (const_int 0)])
+	 (match_operand:GPR 2 "reg_or_0_operand" "dJ,0")
+	 (match_operand:GPR 3 "reg_or_0_operand" "0,dJ")))]
+  "ISA_HAS_CONDMOVE"
+  "@
+    mov%T4\t%0,%z2,%1
+    mov%t4\t%0,%z3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "<GPR:MODE>")])
+
+(define_insn "*mov<SCALARF:mode>_on_<MOVECC:mode>"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f,f")
+	(if_then_else:SCALARF
+	 (match_operator:MOVECC 4 "equality_operator"
+		[(match_operand:MOVECC 1 "register_operand" "<MOVECC:reg>,<MOVECC:reg>")
+		 (const_int 0)])
+	 (match_operand:SCALARF 2 "register_operand" "f,0")
+	 (match_operand:SCALARF 3 "register_operand" "0,f")))]
+  "ISA_HAS_FP_CONDMOVE"
+  "@
+    mov%T4.<fmt>\t%0,%2,%1
+    mov%t4.<fmt>\t%0,%3,%1"
+  [(set_attr "type" "condmove")
+   (set_attr "mode" "<SCALARF:MODE>")])
+
+;; These are the main define_expand's used to make conditional moves.
+
+(define_expand "mov<mode>cc"
+  [(set (match_dup 4) (match_operand 1 "comparison_operator"))
+   (set (match_operand:GPR 0 "register_operand")
+	(if_then_else:GPR (match_dup 5)
+			  (match_operand:GPR 2 "reg_or_0_operand")
+			  (match_operand:GPR 3 "reg_or_0_operand")))]
+  "ISA_HAS_CONDMOVE"
+{
+  mips_expand_conditional_move (operands);
+  DONE;
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_dup 4) (match_operand 1 "comparison_operator"))
+   (set (match_operand:SCALARF 0 "register_operand")
+	(if_then_else:SCALARF (match_dup 5)
+			      (match_operand:SCALARF 2 "register_operand")
+			      (match_operand:SCALARF 3 "register_operand")))]
+  "ISA_HAS_FP_CONDMOVE"
+{
+  mips_expand_conditional_move (operands);
+  DONE;
+})
+
+;;
+;;  ....................
+;;
+;;	mips16 inline constant tables
+;;
+;;  ....................
+;;
+
+(define_insn "consttable_int"
+  [(unspec_volatile [(match_operand 0 "consttable_operand" "")
+		     (match_operand 1 "const_int_operand" "")]
+		    UNSPEC_CONSTTABLE_INT)]
+  "TARGET_MIPS16"
+{
+  assemble_integer (operands[0], INTVAL (operands[1]),
+		    BITS_PER_UNIT * INTVAL (operands[1]), 1);
+  return "";
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[1])"))])
+
+(define_insn "consttable_float"
+  [(unspec_volatile [(match_operand 0 "consttable_operand" "")]
+		    UNSPEC_CONSTTABLE_FLOAT)]
+  "TARGET_MIPS16"
+{
+  REAL_VALUE_TYPE d;
+
+  gcc_assert (GET_CODE (operands[0]) == CONST_DOUBLE);
+  REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+  assemble_real (d, GET_MODE (operands[0]),
+		 GET_MODE_BITSIZE (GET_MODE (operands[0])));
+  return "";
+}
+  [(set (attr "length")
+	(symbol_ref "GET_MODE_SIZE (GET_MODE (operands[0]))"))])
+
+(define_insn "align"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPEC_ALIGN)]
+  ""
+  ".align\t%0"
+  [(set (attr "length") (symbol_ref "(1 << INTVAL (operands[0])) - 1"))])
+
+(define_split
+  [(match_operand 0 "small_data_pattern")]
+  "reload_completed"
+  [(match_dup 0)]
+  { operands[0] = mips_rewrite_small_data (operands[0]); })
+
+;;
+;;  ....................
+;;
+;;	MIPS16e Save/Restore
+;;
+;;  ....................
+;;
+
+(define_insn "*mips16e_save_restore"
+  [(match_parallel 0 ""
+       [(set (match_operand:SI 1 "register_operand")
+	     (plus:SI (match_dup 1)
+		      (match_operand:SI 2 "const_int_operand")))])]
+  "operands[1] == stack_pointer_rtx
+   && mips16e_save_restore_pattern_p (operands[0], INTVAL (operands[2]), NULL)"
+  { return mips16e_output_save_restore (operands[0], INTVAL (operands[2])); }
+  [(set_attr "type" "arith")
+   (set_attr "extended_mips16" "yes")])
+
+;; Thread-Local Storage
+
+;; The TLS base pointer is accessed via "rdhwr $3, $29".  No current
+;; MIPS architecture defines this register, and no current
+;; implementation provides it; instead, any OS which supports TLS is
+;; expected to trap and emulate this instruction.  rdhwr is part of the
+;; MIPS 32r2 specification, but we use it on any architecture because
+;; we expect it to be emulated.  Use .set to force the assembler to
+;; accept it.
+;;
+;; We do not use a constraint to force the destination to be $3
+;; because $3 can appear explicitly as a function return value.
+;; If we leave the use of $3 implicit in the constraints until
+;; reload, we may end up making a $3 return value live across
+;; the instruction, leading to a spill failure when reloading it.
+(define_insn_and_split "tls_get_tp_<mode>"
+  [(set (match_operand:P 0 "register_operand" "=d")
+	(unspec:P [(const_int 0)] UNSPEC_TLS_GET_TP))
+   (clobber (reg:P TLS_GET_TP_REGNUM))]
+  "HAVE_AS_TLS && !TARGET_MIPS16"
+  "#"
+  "&& reload_completed"
+  [(set (reg:P TLS_GET_TP_REGNUM)
+	(unspec:P [(const_int 0)] UNSPEC_TLS_GET_TP))
+   (set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
+  ""
+  [(set_attr "type" "unknown")
+   ; Since rdhwr always generates a trap for now, putting it in a delay
+   ; slot would make the kernel's emulation of it much slower.
+   (set_attr "can_delay" "no")
+   (set_attr "mode" "<MODE>")
+   (set_attr "length" "8")])
+
+(define_insn "*tls_get_tp_<mode>_split"
+  [(set (reg:P TLS_GET_TP_REGNUM)
+	(unspec:P [(const_int 0)] UNSPEC_TLS_GET_TP))]
+  "HAVE_AS_TLS && !TARGET_MIPS16"
+  ".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop"
+  [(set_attr "type" "unknown")
+   ; See tls_get_tp_<mode>
+   (set_attr "can_delay" "no")
+   (set_attr "mode" "<MODE>")])
+
+;; Synchronization instructions.
+
+(include "sync.md")
+
+; The MIPS Paired-Single Floating Point and MIPS-3D Instructions.
+
+(include "mips-ps-3d.md")
+
+; The MIPS DSP Instructions.
+
+(include "mips-dsp.md")
+
+; The MIPS DSP REV 2 Instructions.
+
+(include "mips-dspr2.md")
+
+; MIPS fixed-point instructions.
+(include "mips-fixed.md")
+
+; ST-Microelectronics Loongson-2E/2F-specific patterns.
+(include "loongson.md")
+
+(define_c_enum "unspec" [
+  UNSPEC_ADDRESS_FIRST
+])
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
new file mode 100644
index 000000000..20b0b6cde
--- /dev/null
+++ b/gcc/config/mips/mips.opt
@@ -0,0 +1,310 @@
+; Options for the MIPS port of the compiler
+;
+; Copyright (C) 2005, 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+EB
+Driver
+
+EL
+Driver
+
+mabi=
+Target RejectNegative Joined
+-mabi=ABI	Generate code that conforms to the given ABI
+
+mabicalls
+Target Report Mask(ABICALLS)
+Generate code that can be used in SVR4-style dynamic objects
+
+mad
+Target Report Var(TARGET_MAD)
+Use PMC-style 'mad' instructions
+
+march=
+Target RejectNegative Joined Var(mips_arch_string)
+-march=ISA	Generate code for the given ISA
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(mips_branch_cost)
+-mbranch-cost=COST	Set the cost of branches to roughly COST instructions
+
+mbranch-likely
+Target Report Mask(BRANCHLIKELY)
+Use Branch Likely instructions, overriding the architecture default
+
+mflip-mips16
+Target Report Var(TARGET_FLIP_MIPS16)
+Switch on/off MIPS16 ASE on alternating functions for compiler testing
+
+mcheck-zero-division
+Target Report Mask(CHECK_ZERO_DIV)
+Trap on integer divide by zero
+
+mcode-readable=
+Target RejectNegative Joined
+-mcode-readable=SETTING	Specify when instructions are allowed to access code
+
+mdivide-breaks
+Target Report RejectNegative Mask(DIVIDE_BREAKS)
+Use branch-and-break sequences to check for integer divide by zero
+
+mdivide-traps
+Target Report RejectNegative InverseMask(DIVIDE_BREAKS, DIVIDE_TRAPS)
+Use trap instructions to check for integer divide by zero
+
+mdmx
+Target Report RejectNegative Var(TARGET_MDMX)
+Allow the use of MDMX instructions
+
+mdouble-float
+Target Report RejectNegative InverseMask(SINGLE_FLOAT, DOUBLE_FLOAT)
+Allow hardware floating-point instructions to cover both 32-bit and 64-bit operations
+
+mdsp
+Target Report Mask(DSP)
+Use MIPS-DSP instructions
+
+mdspr2
+Target Report Mask(DSPR2)
+Use MIPS-DSP REV 2 instructions
+
+mdebug
+Target Var(TARGET_DEBUG_MODE) Undocumented
+
+mdebugd
+Target Var(TARGET_DEBUG_D_MODE) Undocumented
+
+meb
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Use big-endian byte order
+
+mel
+Target Report RejectNegative InverseMask(BIG_ENDIAN, LITTLE_ENDIAN)
+Use little-endian byte order
+
+membedded-data
+Target Report Var(TARGET_EMBEDDED_DATA)
+Use ROM instead of RAM
+
+mexplicit-relocs
+Target Report Mask(EXPLICIT_RELOCS)
+Use NewABI-style %reloc() assembly operators
+
+mextern-sdata
+Target Report Var(TARGET_EXTERN_SDATA) Init(1)
+Use -G for data that is not defined by the current object
+
+mfix-r4000
+Target Report Mask(FIX_R4000)
+Work around certain R4000 errata
+
+mfix-r4400
+Target Report Mask(FIX_R4400)
+Work around certain R4400 errata
+
+mfix-r10000
+Target Report Mask(FIX_R10000)
+Work around certain R10000 errata
+
+mfix-sb1
+Target Report Var(TARGET_FIX_SB1)
+Work around errata for early SB-1 revision 2 cores
+
+mfix-vr4120
+Target Report Var(TARGET_FIX_VR4120)
+Work around certain VR4120 errata
+
+mfix-vr4130
+Target Report Var(TARGET_FIX_VR4130)
+Work around VR4130 mflo/mfhi errata
+
+mfix4300
+Target Report Var(TARGET_4300_MUL_FIX)
+Work around an early 4300 hardware bug
+
+mfp-exceptions
+Target Report Mask(FP_EXCEPTIONS)
+FP exceptions are enabled
+
+mfp32
+Target Report RejectNegative InverseMask(FLOAT64)
+Use 32-bit floating-point registers
+
+mfp64
+Target Report RejectNegative Mask(FLOAT64)
+Use 64-bit floating-point registers
+
+mflush-func=
+Target RejectNegative Joined Var(mips_cache_flush_func) Init(CACHE_FLUSH_FUNC)
+-mflush-func=FUNC	Use FUNC to flush the cache before calling stack trampolines
+
+mfused-madd
+Target Report Mask(FUSED_MADD)
+Generate floating-point multiply-add instructions
+
+mgp32
+Target Report RejectNegative InverseMask(64BIT)
+Use 32-bit general registers
+
+mgp64
+Target Report RejectNegative Mask(64BIT)
+Use 64-bit general registers
+
+mgpopt
+Target Report Var(TARGET_GPOPT) Init(1)
+Use GP-relative addressing to access small data
+
+mplt
+Target Report Var(TARGET_PLT)
+When generating -mabicalls code, allow executables to use PLTs and copy relocations
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT_ABI, HARD_FLOAT_ABI)
+Allow the use of hardware floating-point ABI and instructions
+
+minterlink-mips16
+Target Report Var(TARGET_INTERLINK_MIPS16) Init(0)
+Generate code that can be safely linked with MIPS16 code.
+
+mips
+Target RejectNegative Joined
+-mipsN	Generate code for ISA level N
+
+mips16
+Target Report RejectNegative Mask(MIPS16)
+Generate MIPS16 code
+
+mips3d
+Target Report RejectNegative Mask(MIPS3D)
+Use MIPS-3D instructions
+
+mllsc
+Target Report Mask(LLSC)
+Use ll, sc and sync instructions
+
+mlocal-sdata
+Target Report Var(TARGET_LOCAL_SDATA) Init(1)
+Use -G for object-local data
+
+mlong-calls
+Target Report Var(TARGET_LONG_CALLS)
+Use indirect calls
+
+mlong32
+Target Report RejectNegative InverseMask(LONG64, LONG32)
+Use a 32-bit long type
+
+mlong64
+Target Report RejectNegative Mask(LONG64)
+Use a 64-bit long type
+
+mmcount-ra-address
+Target Report Var(TARGET_MCOUNT_RA_ADDRESS)
+Pass the address of the ra save location to _mcount in $12
+
+mmemcpy
+Target Report Mask(MEMCPY)
+Don't optimize block moves
+
+mmips-tfile
+Target
+Use the mips-tfile postpass
+
+mmt
+Target Report Var(TARGET_MT)
+Allow the use of MT instructions
+
+mno-float
+Target Report RejectNegative Var(TARGET_NO_FLOAT) Condition(TARGET_SUPPORTS_NO_FLOAT)
+Prevent the use of all floating-point operations
+
+mno-flush-func
+Target RejectNegative
+Do not use a cache-flushing function before calling stack trampolines
+
+mno-mdmx
+Target Report RejectNegative Var(TARGET_MDMX, 0)
+Do not use MDMX instructions
+
+mno-mips16
+Target Report RejectNegative InverseMask(MIPS16)
+Generate normal-mode code
+
+mno-mips3d
+Target Report RejectNegative InverseMask(MIPS3D)
+Do not use MIPS-3D instructions
+
+mpaired-single
+Target Report Mask(PAIRED_SINGLE_FLOAT)
+Use paired-single floating-point instructions
+
+mr10k-cache-barrier=
+Target Joined RejectNegative
+-mr10k-cache-barrier=SETTING	Specify when r10k cache barriers should be inserted
+
+mrelax-pic-calls
+Target Report Mask(RELAX_PIC_CALLS)
+Try to allow the linker to turn PIC calls into direct calls
+
+mshared
+Target Report Var(TARGET_SHARED) Init(1)
+When generating -mabicalls code, make the code suitable for use in shared libraries
+
+msingle-float
+Target Report RejectNegative Mask(SINGLE_FLOAT)
+Restrict the use of hardware floating-point instructions to 32-bit operations
+
+msmartmips
+Target Report Mask(SMARTMIPS)
+Use SmartMIPS instructions
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT_ABI)
+Prevent the use of all hardware floating-point instructions
+
+msplit-addresses
+Target Report Mask(SPLIT_ADDRESSES)
+Optimize lui/addiu address loads
+
+msym32
+Target Report Var(TARGET_SYM32)
+Assume all symbols have 32-bit values
+
+msynci
+Target Report Mask(SYNCI)
+Use synci instruction to invalidate i-cache
+
+mtune=
+Target RejectNegative Joined Var(mips_tune_string)
+-mtune=PROCESSOR	Optimize the output for PROCESSOR
+
+muninit-const-in-rodata
+Target Report Var(TARGET_UNINIT_CONST_IN_RODATA)
+Put uninitialized constants in ROM (needs -membedded-data)
+
+mvr4130-align
+Target Report Mask(VR4130_ALIGN)
+Perform VR4130-specific alignment optimizations
+
+mxgot
+Target Report Var(TARGET_XGOT)
+Lift restrictions on GOT size
+
+noasmopt
+Driver
diff --git a/gcc/config/mips/mips16.S b/gcc/config/mips/mips16.S
new file mode 100644
index 000000000..ec331b5f6
--- /dev/null
+++ b/gcc/config/mips/mips16.S
@@ -0,0 +1,712 @@
+/* mips16 floating point support code
+   Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Cygnus Support
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file contains mips16 floating point support functions.  These
+   functions are called by mips16 code to handle floating point when
+   -msoft-float is not used.  They accept the arguments and return
+   values using the soft-float calling convention, but do the actual
+   operation using the hard floating point instructions.  */
+
+#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64)
+
+/* This file contains 32-bit assembly code.  */
+	.set nomips16
+
+/* Start a function.  */
+
+#define STARTFN(NAME) .globl NAME; .ent NAME; NAME:
+
+/* Finish a function.  */
+
+#define ENDFN(NAME) .end NAME
+
+/* ARG1
+	The FPR that holds the first floating-point argument.
+
+   ARG2
+	The FPR that holds the second floating-point argument.
+
+   RET
+	The FPR that holds a floating-point return value.  */
+
+#define RET $f0
+#define ARG1 $f12
+#ifdef __mips64
+#define ARG2 $f13
+#else
+#define ARG2 $f14
+#endif
+
+/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR
+   and so that its low 32 bits contain LOW_FPR.  */
+#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR)	\
+	.set	noat;				\
+	mfc1	$1, LOW_FPR;			\
+	mfc1	GPR, HIGH_FPR;			\
+	dsll	$1, $1, 32;			\
+	dsll	GPR, GPR, 32;			\
+	dsrl	$1, $1, 32;			\
+	or	GPR, GPR, $1;			\
+	.set	at
+
+/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of
+   GPR to LOW_FPR.  */
+#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR)	\
+	.set	noat;				\
+	dsrl	$1, GPR, 32;			\
+	mtc1	GPR, LOW_FPR;			\
+	mtc1	$1, HIGH_FPR;			\
+	.set	at
+
+/* Jump to T, and use "OPCODE, OP2" to implement a delayed move.  */
+#define DELAYt(T, OPCODE, OP2)			\
+	.set	noreorder;			\
+	jr	T;				\
+	OPCODE, OP2;				\
+	.set	reorder
+
+/* Use "OPCODE. OP2" and jump to T.  */
+#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T
+
+/* MOVE_SF_BYTE0(D)
+	Move the first single-precision floating-point argument between
+	GPRs and FPRs.
+
+   MOVE_SI_BYTE0(D)
+	Likewise the first single-precision integer argument.
+
+   MOVE_SF_BYTE4(D)
+	Move the second single-precision floating-point argument between
+	GPRs and FPRs, given that the first argument occupies 4 bytes.
+
+   MOVE_SF_BYTE8(D)
+	Move the second single-precision floating-point argument between
+	GPRs and FPRs, given that the first argument occupies 8 bytes.
+
+   MOVE_DF_BYTE0(D)
+	Move the first double-precision floating-point argument between
+	GPRs and FPRs.
+
+   MOVE_DF_BYTE8(D)
+	Likewise the second double-precision floating-point argument.
+
+   MOVE_SF_RET(D, T)
+	Likewise a single-precision floating-point return value,
+	then jump to T.
+
+   MOVE_SC_RET(D, T)
+	Likewise a complex single-precision floating-point return value.
+
+   MOVE_DF_RET(D, T)
+	Likewise a double-precision floating-point return value.
+
+   MOVE_DC_RET(D, T)
+	Likewise a complex double-precision floating-point return value.
+
+   MOVE_SI_RET(D, T)
+	Likewise a single-precision integer return value.
+
+   The D argument is "t" to move to FPRs and "f" to move from FPRs.
+   The return macros may assume that the target of the jump does not
+   use a floating-point register.  */
+
+#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0)
+
+#if defined(__mips64) && defined(__MIPSEB__)
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T
+#elif defined(__mips64)
+/* The high 32 bits of $2 correspond to the second word in memory;
+   i.e. the imaginary part.  */
+#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T
+#elif __mips_fpr == 64
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#else
+#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2)
+#endif
+
+#if defined(__mips64)
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13
+#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13
+#else
+#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12
+#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14
+#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14
+#endif
+#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D)
+
+#if defined(__mips64)
+#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12
+#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13
+#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0)
+#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64 && defined(__MIPSEB__)
+#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T)
+#elif __mips_fpr == 64
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T)
+#elif defined(__MIPSEB__)
+/* FPRs are little-endian.  */
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T)
+#else
+#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13
+#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15
+#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1)
+#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T)
+#endif
+
+/* Single-precision math.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs FPU operation OPCODE on them, and returns the single-
+   precision result.  */
+
+#define OPSF3(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_SF_BYTE0 (t);	\
+	MOVE_SF_BYTE4 (t);	\
+	OPCODE	RET,ARG1,ARG2;	\
+	MOVE_SF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16addsf3
+OPSF3 (__mips16_addsf3, add.s)
+#endif
+#ifdef L_m16subsf3
+OPSF3 (__mips16_subsf3, sub.s)
+#endif
+#ifdef L_m16mulsf3
+OPSF3 (__mips16_mulsf3, mul.s)
+#endif
+#ifdef L_m16divsf3
+OPSF3 (__mips16_divsf3, div.s)
+#endif
+
+/* Define a function NAME that loads a single-precision value,
+   performs FPU operation OPCODE on it, and returns the single-
+   precision result.  */
+
+#define OPSF2(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_SF_BYTE0 (t);	\
+	OPCODE	RET,ARG1;	\
+	MOVE_SF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16negsf2
+OPSF2 (__mips16_negsf2, neg.s)
+#endif
+#ifdef L_m16abssf2
+OPSF2 (__mips16_abssf2, abs.s)
+#endif
+
+/* Single-precision comparisons.  */
+
+/* Define a function NAME that loads two single-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
+
+#define CMPSF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_SF_BYTE0 (t);			\
+	MOVE_SF_BYTE4 (t);			\
+	OPCODE	ARG1,ARG2;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+/* Like CMPSF, but reverse the comparison operands.  */
+
+#define REVCMPSF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_SF_BYTE0 (t);			\
+	MOVE_SF_BYTE4 (t);			\
+	OPCODE	ARG2,ARG1;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+#ifdef L_m16eqsf2
+CMPSF (__mips16_eqsf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16nesf2
+CMPSF (__mips16_nesf2, c.eq.s, 0, 1)
+#endif
+#ifdef L_m16gtsf2
+REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0)
+#endif
+#ifdef L_m16gesf2
+REVCMPSF (__mips16_gesf2, c.le.s, 0, -1)
+#endif
+#ifdef L_m16lesf2
+CMPSF (__mips16_lesf2, c.le.s, 0, 1)
+#endif
+#ifdef L_m16ltsf2
+CMPSF (__mips16_ltsf2, c.lt.s, -1, 0)
+#endif
+#ifdef L_m16unordsf2
+CMPSF(__mips16_unordsf2, c.un.s, 1, 0)
+#endif
+
+
+/* Single-precision conversions.  */
+
+#ifdef L_m16fltsisf
+STARTFN (__mips16_floatsisf)
+	MOVE_SF_BYTE0 (t)
+	cvt.s.w	RET,ARG1
+	MOVE_SF_RET (f, $31)
+	ENDFN (__mips16_floatsisf)
+#endif
+
+#ifdef L_m16fltunsisf
+STARTFN (__mips16_floatunsisf)
+	.set	noreorder
+	bltz	$4,1f
+	MOVE_SF_BYTE0 (t)
+	.set	reorder
+	cvt.s.w	RET,ARG1
+	MOVE_SF_RET (f, $31)
+1:		
+	and	$2,$4,1
+	srl	$3,$4,1
+	or	$2,$2,$3
+	mtc1	$2,RET
+	cvt.s.w	RET,RET
+	add.s	RET,RET,RET
+	MOVE_SF_RET (f, $31)
+	ENDFN (__mips16_floatunsisf)
+#endif
+	
+#ifdef L_m16fix_truncsfsi
+STARTFN (__mips16_fix_truncsfsi)
+	MOVE_SF_BYTE0 (t)
+	trunc.w.s RET,ARG1,$4
+	MOVE_SI_RET (f, $31)
+	ENDFN (__mips16_fix_truncsfsi)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+/* Double-precision math.  */
+
+/* Define a function NAME that loads two double-precision values,
+   performs FPU operation OPCODE on them, and returns the double-
+   precision result.  */
+
+#define OPDF3(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_DF_BYTE0 (t);	\
+	MOVE_DF_BYTE8 (t);	\
+	OPCODE RET,ARG1,ARG2;	\
+	MOVE_DF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16adddf3
+OPDF3 (__mips16_adddf3, add.d)
+#endif
+#ifdef L_m16subdf3
+OPDF3 (__mips16_subdf3, sub.d)
+#endif
+#ifdef L_m16muldf3
+OPDF3 (__mips16_muldf3, mul.d)
+#endif
+#ifdef L_m16divdf3
+OPDF3 (__mips16_divdf3, div.d)
+#endif
+
+/* Define a function NAME that loads a double-precision value,
+   performs FPU operation OPCODE on it, and returns the double-
+   precision result.  */
+
+#define OPDF2(NAME, OPCODE)	\
+STARTFN (NAME);			\
+	MOVE_DF_BYTE0 (t);	\
+	OPCODE RET,ARG1;	\
+	MOVE_DF_RET (f, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16negdf2
+OPDF2 (__mips16_negdf2, neg.d)
+#endif
+#ifdef L_m16absdf2
+OPDF2 (__mips16_absdf2, abs.d)
+#endif
+
+/* Conversions between single and double precision.  */
+
+#ifdef L_m16extsfdf2
+STARTFN (__mips16_extendsfdf2)
+	MOVE_SF_BYTE0 (t)
+	cvt.d.s	RET,ARG1
+	MOVE_DF_RET (f, $31)
+	ENDFN (__mips16_extendsfdf2)
+#endif
+
+#ifdef L_m16trdfsf2
+STARTFN (__mips16_truncdfsf2)
+	MOVE_DF_BYTE0 (t)
+	cvt.s.d	RET,ARG1
+	MOVE_SF_RET (f, $31)
+	ENDFN (__mips16_truncdfsf2)
+#endif
+
+/* Double-precision comparisons.  */
+
+/* Define a function NAME that loads two double-precision values,
+   performs floating point comparison OPCODE, and returns TRUE or
+   FALSE depending on the result.  */
+
+#define CMPDF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_DF_BYTE0 (t);			\
+	MOVE_DF_BYTE8 (t);			\
+	OPCODE	ARG1,ARG2;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+/* Like CMPDF, but reverse the comparison operands.  */
+
+#define REVCMPDF(NAME, OPCODE, TRUE, FALSE)	\
+STARTFN (NAME);					\
+	MOVE_DF_BYTE0 (t);			\
+	MOVE_DF_BYTE8 (t);			\
+	OPCODE	ARG2,ARG1;			\
+	li	$2,TRUE;			\
+	bc1t	1f;				\
+	li	$2,FALSE;			\
+1:;						\
+	j	$31;				\
+	ENDFN (NAME)
+
+#ifdef L_m16eqdf2
+CMPDF (__mips16_eqdf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16nedf2
+CMPDF (__mips16_nedf2, c.eq.d, 0, 1)
+#endif
+#ifdef L_m16gtdf2
+REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0)
+#endif
+#ifdef L_m16gedf2
+REVCMPDF (__mips16_gedf2, c.le.d, 0, -1)
+#endif
+#ifdef L_m16ledf2
+CMPDF (__mips16_ledf2, c.le.d, 0, 1)
+#endif
+#ifdef L_m16ltdf2
+CMPDF (__mips16_ltdf2, c.lt.d, -1, 0)
+#endif
+#ifdef L_m16unorddf2
+CMPDF(__mips16_unorddf2, c.un.d, 1, 0)
+#endif
+
+/* Double-precision conversions.  */
+
+#ifdef L_m16fltsidf
+STARTFN (__mips16_floatsidf)
+	MOVE_SI_BYTE0 (t)
+	cvt.d.w	RET,ARG1
+	MOVE_DF_RET (f, $31)
+	ENDFN (__mips16_floatsidf)
+#endif
+	
+#ifdef L_m16fltunsidf
+STARTFN (__mips16_floatunsidf)
+	MOVE_SI_BYTE0 (t)
+	cvt.d.w RET,ARG1
+	bgez	$4,1f
+	li.d	ARG1, 4.294967296e+9
+	add.d	RET, RET, ARG1
+1:	MOVE_DF_RET (f, $31)
+	ENDFN (__mips16_floatunsidf)
+#endif
+	
+#ifdef L_m16fix_truncdfsi
+STARTFN (__mips16_fix_truncdfsi)
+	MOVE_DF_BYTE0 (t)
+	trunc.w.d RET,ARG1,$4
+	MOVE_SI_RET (f, $31)
+	ENDFN (__mips16_fix_truncdfsi)
+#endif
+#endif /* !__mips_single_float */
+
+/* Define a function NAME that moves a return value of mode MODE from
+   FPRs to GPRs.  */
+
+#define RET_FUNCTION(NAME, MODE)	\
+STARTFN (NAME);				\
+	MOVE_##MODE##_RET (t, $31);	\
+	ENDFN (NAME)
+
+#ifdef L_m16retsf
+RET_FUNCTION (__mips16_ret_sf, SF)
+#endif
+
+#ifdef L_m16retsc
+RET_FUNCTION (__mips16_ret_sc, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16retdf
+RET_FUNCTION (__mips16_ret_df, DF)
+#endif
+
+#ifdef L_m16retdc
+RET_FUNCTION (__mips16_ret_dc, DC)
+#endif
+#endif /* !__mips_single_float */
+
+/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument
+   code X.  X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2
+   classify the first and second arguments as follows:
+
+	1: a single-precision argument
+	2: a double-precision argument
+	0: no argument, or not one of the above.  */
+
+#define STUB_ARGS_0						/* () */
+#define STUB_ARGS_1 MOVE_SF_BYTE0 (t)				/* (sf) */
+#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t)	/* (sf, sf) */
+#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t)	/* (sf, df) */
+#define STUB_ARGS_2 MOVE_DF_BYTE0 (t)				/* (df) */
+#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t)	/* (df, sf) */
+#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t)	/* (df, df) */
+
+/* These functions are used by 16-bit code when calling via a function
+   pointer.  They must copy the floating point arguments from the GPRs
+   to FPRs and then call function $2.  */
+
+#define CALL_STUB_NO_RET(NAME, CODE)	\
+STARTFN (NAME);				\
+	STUB_ARGS_##CODE;		\
+	.set	noreorder;		\
+	jr	$2;			\
+	move	$25,$2;			\
+	.set	reorder;		\
+	ENDFN (NAME)
+
+#ifdef L_m16stub1
+CALL_STUB_NO_RET (__mips16_call_stub_1, 1)
+#endif
+
+#ifdef L_m16stub5
+CALL_STUB_NO_RET (__mips16_call_stub_5, 5)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+
+#ifdef L_m16stub2
+CALL_STUB_NO_RET (__mips16_call_stub_2, 2)
+#endif
+
+#ifdef L_m16stub6
+CALL_STUB_NO_RET (__mips16_call_stub_6, 6)
+#endif
+
+#ifdef L_m16stub9
+CALL_STUB_NO_RET (__mips16_call_stub_9, 9)
+#endif
+
+#ifdef L_m16stub10
+CALL_STUB_NO_RET (__mips16_call_stub_10, 10)
+#endif
+#endif /* !__mips_single_float */
+
+/* Now we have the same set of functions, except that this time the
+   function being called returns an SFmode, SCmode, DFmode or DCmode
+   value; we need to instantiate a set for each case.  The calling
+   function will arrange to preserve $18, so these functions are free
+   to use it to hold the return address.
+
+   Note that we do not know whether the function we are calling is 16
+   bit or 32 bit.  However, it does not matter, because 16-bit
+   functions always return floating point values in both the gp and
+   the fp regs.  It would be possible to check whether the function
+   being called is 16 bits, in which case the copy is unnecessary;
+   however, it's faster to always do the copy.  */
+
+#define CALL_STUB_RET(NAME, CODE, MODE)	\
+STARTFN (NAME);				\
+	move	$18,$31;		\
+	STUB_ARGS_##CODE;		\
+	.set	noreorder;		\
+	jalr	$2;			\
+	move	$25,$2;			\
+	.set	reorder;		\
+	MOVE_##MODE##_RET (f, $18);	\
+	ENDFN (NAME)
+
+/* First, instantiate the single-float set.  */
+
+#ifdef L_m16stubsf0
+CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF)
+#endif
+
+#ifdef L_m16stubsf1
+CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF)
+#endif
+
+#ifdef L_m16stubsf5
+CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsf2
+CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF)
+#endif
+
+#ifdef L_m16stubsf6
+CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF)
+#endif
+
+#ifdef L_m16stubsf9
+CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF)
+#endif
+
+#ifdef L_m16stubsf10
+CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Now we have the same set of functions again, except that this time
+   the function being called returns an DFmode value.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdf0
+CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF)
+#endif
+
+#ifdef L_m16stubdf1
+CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF)
+#endif
+
+#ifdef L_m16stubdf5
+CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF)
+#endif
+
+#ifdef L_m16stubdf2
+CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF)
+#endif
+
+#ifdef L_m16stubdf6
+CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF)
+#endif
+
+#ifdef L_m16stubdf9
+CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF)
+#endif
+
+#ifdef L_m16stubdf10
+CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Ho hum.  Here we have the same set of functions again, this time
+   for when the function being called returns an SCmode value.  */
+
+#ifdef L_m16stubsc0
+CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC)
+#endif
+
+#ifdef L_m16stubsc1
+CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC)
+#endif
+
+#ifdef L_m16stubsc5
+CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC)
+#endif
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubsc2
+CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC)
+#endif
+
+#ifdef L_m16stubsc6
+CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC)
+#endif
+
+#ifdef L_m16stubsc9
+CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC)
+#endif
+
+#ifdef L_m16stubsc10
+CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC)
+#endif
+#endif /* !__mips_single_float */
+
+
+/* Finally, another set of functions for DCmode.  */
+
+#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT)
+#ifdef L_m16stubdc0
+CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC)
+#endif
+
+#ifdef L_m16stubdc1
+CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC)
+#endif
+
+#ifdef L_m16stubdc5
+CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC)
+#endif
+
+#ifdef L_m16stubdc2
+CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC)
+#endif
+
+#ifdef L_m16stubdc6
+CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC)
+#endif
+
+#ifdef L_m16stubdc9
+CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC)
+#endif
+
+#ifdef L_m16stubdc10
+CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC)
+#endif
+#endif /* !__mips_single_float */
+#endif
diff --git a/gcc/config/mips/netbsd.h b/gcc/config/mips/netbsd.h
new file mode 100644
index 000000000..82a0921ed
--- /dev/null
+++ b/gcc/config/mips/netbsd.h
@@ -0,0 +1,187 @@
+/* Definitions of target machine for GNU compiler, for MIPS NetBSD systems.
+   Copyright (C) 1993, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
+   2007, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Define default target values.  */
+
+#undef MACHINE_TYPE
+#if TARGET_ENDIAN_DEFAULT != 0
+#define MACHINE_TYPE "NetBSD/mipseb ELF"
+#else
+#define MACHINE_TYPE "NetBSD/mipsel ELF"
+#endif
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      NETBSD_OS_CPP_BUILTINS_ELF();			\
+      builtin_define ("__NO_LEADING_UNDERSCORES__");	\
+      builtin_define ("__GP_SUPPORT__");		\
+      if (TARGET_LONG64)				\
+	builtin_define ("__LONG64");			\
+							\
+      if (TARGET_ABICALLS)				\
+	builtin_define ("__ABICALLS__");		\
+							\
+      if (mips_abi == ABI_EABI)				\
+	builtin_define ("__mips_eabi");			\
+      else if (mips_abi == ABI_N32)			\
+	builtin_define ("__mips_n32");			\
+      else if (mips_abi == ABI_64)			\
+	builtin_define ("__mips_n64");			\
+      else if (mips_abi == ABI_O64)			\
+	builtin_define ("__mips_o64");			\
+    }							\
+  while (0)
+
+/* The generic MIPS TARGET_CPU_CPP_BUILTINS are incorrect for NetBSD.
+   Specifically, they define too many namespace-invasive macros.  Override
+   them here.  Note this is structured for easy comparison to the version
+   in mips.h.
+
+   FIXME: This probably isn't the best solution.  But in the absence
+   of something better, it will have to do, for now.  */
+
+#undef TARGET_CPU_CPP_BUILTINS
+#define TARGET_CPU_CPP_BUILTINS()				\
+  do								\
+    {								\
+      builtin_assert ("cpu=mips");				\
+      builtin_define ("__mips__");				\
+      builtin_define ("_mips");					\
+								\
+      /* No _R3000 or _R4000.  */				\
+      if (TARGET_64BIT)						\
+	builtin_define ("__mips64");				\
+								\
+      if (TARGET_FLOAT64)					\
+	builtin_define ("__mips_fpr=64");			\
+      else							\
+	builtin_define ("__mips_fpr=32");			\
+								\
+      if (TARGET_MIPS16)					\
+	builtin_define ("__mips16");				\
+								\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_ARCH", mips_arch_info);	\
+      MIPS_CPP_SET_PROCESSOR ("_MIPS_TUNE", mips_tune_info);	\
+								\
+      if (ISA_MIPS1)						\
+	builtin_define ("__mips=1");				\
+      else if (ISA_MIPS2)					\
+	builtin_define ("__mips=2");				\
+      else if (ISA_MIPS3)					\
+	builtin_define ("__mips=3");				\
+      else if (ISA_MIPS4)					\
+	builtin_define ("__mips=4");				\
+      else if (ISA_MIPS32)					\
+	{							\
+	  builtin_define ("__mips=32");				\
+	  builtin_define ("__mips_isa_rev=1");			\
+	}							\
+      else if (ISA_MIPS32R2)					\
+	{							\
+	  builtin_define ("__mips=32");				\
+	  builtin_define ("__mips_isa_rev=2");			\
+	}							\
+      else if (ISA_MIPS64)					\
+	{							\
+	  builtin_define ("__mips=64");				\
+	  builtin_define ("__mips_isa_rev=1");			\
+	}							\
+								\
+      if (TARGET_HARD_FLOAT)					\
+	builtin_define ("__mips_hard_float");			\
+      else if (TARGET_SOFT_FLOAT)				\
+	builtin_define ("__mips_soft_float");			\
+								\
+      if (TARGET_SINGLE_FLOAT)					\
+	builtin_define ("__mips_single_float");			\
+								\
+      if (TARGET_BIG_ENDIAN)					\
+	builtin_define ("__MIPSEB__");				\
+      else							\
+	builtin_define ("__MIPSEL__");				\
+								\
+      /* No language dialect defines.  */			\
+								\
+      /* ABIs handled in TARGET_OS_CPP_BUILTINS.  */		\
+    }								\
+  while (0)
+
+
+/* Extra specs we need.  */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "netbsd_cpp_spec",		NETBSD_CPP_SPEC },			\
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF },			\
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },
+
+/* Provide a SUBTARGET_CPP_SPEC appropriate for NetBSD.  */
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%(netbsd_cpp_spec)"
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/mips target.
+   This is a copy of LINK_SPEC from <netbsd-elf.h> tweaked for
+   the MIPS target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{EL:-m elf32lmip} \
+   %{EB:-m elf32bmip} \
+   %(endian_spec) \
+   %{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32} %{mips32r2} %{mips64} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC \
+  "%{!mno-abicalls: \
+     %{!fno-PIC:%{!fno-pic:-KPIC}}}"
+
+
+/* -G is incompatible with -KPIC which is the default, so only allow objects
+   in the small data section if the user explicitly asks for it.  */
+
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+
+#undef ASM_FINAL_SPEC
+#undef SET_ASM_OP
+
+
+/* NetBSD hasn't historically provided _flush_cache(), but rather
+   _cacheflush(), which takes the same arguments as the former.  */
+#undef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_cacheflush"
+
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
diff --git a/gcc/config/mips/octeon.md b/gcc/config/mips/octeon.md
new file mode 100644
index 000000000..0d94e6eec
--- /dev/null
+++ b/gcc/config/mips/octeon.md
@@ -0,0 +1,88 @@
+;;  Octeon pipeline description.
+;;  Copyright (C) 2008
+;;  Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;   Copyright (C) 2004, 2005, 2006 Cavium Networks.
+
+
+;; Octeon is a dual-issue processor that can issue all instructions on
+;; pipe0 and a subset on pipe1.
+
+(define_automaton "octeon_main, octeon_mult")
+
+(define_cpu_unit "octeon_pipe0" "octeon_main")
+(define_cpu_unit "octeon_pipe1" "octeon_main")
+(define_cpu_unit "octeon_mult" "octeon_mult")
+
+(define_insn_reservation "octeon_arith" 1
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "arith,const,logical,move,shift,signext,slt,nop"))
+  "octeon_pipe0 | octeon_pipe1")
+
+(define_insn_reservation "octeon_condmove" 2
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "condmove"))
+  "octeon_pipe0 | octeon_pipe1")
+
+(define_insn_reservation "octeon_load" 2
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "load,prefetch,mtc,mfc"))
+  "octeon_pipe0")
+
+(define_insn_reservation "octeon_store" 1
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "store"))
+  "octeon_pipe0")
+
+(define_insn_reservation "octeon_brj" 1
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "branch,jump,call,trap"))
+  "octeon_pipe0")
+
+(define_insn_reservation "octeon_imul3" 5
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "imul3,pop,clz"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult")
+
+(define_insn_reservation "octeon_imul" 2
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "imul,mthilo"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult, octeon_mult")
+
+(define_insn_reservation "octeon_mfhilo" 5
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "mfhilo"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult")
+
+(define_insn_reservation "octeon_imadd" 4
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "imadd"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult, octeon_mult*3")
+
+(define_insn_reservation "octeon_idiv" 72
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "idiv"))
+  "(octeon_pipe0 | octeon_pipe1) + octeon_mult, octeon_mult*71")
+
+;; Assume both pipes are needed for unknown and multiple-instruction
+;; patterns.
+
+(define_insn_reservation "octeon_unknown" 1
+  (and (eq_attr "cpu" "octeon")
+       (eq_attr "type" "unknown,multi"))
+  "octeon_pipe0 + octeon_pipe1")
diff --git a/gcc/config/mips/openbsd.h b/gcc/config/mips/openbsd.h
new file mode 100644
index 000000000..a392ca470
--- /dev/null
+++ b/gcc/config/mips/openbsd.h
@@ -0,0 +1,101 @@
+/* Configuration for  a MIPS ABI32 OpenBSD target.
+   Copyright (C) 1999, 2003, 2004, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Definitions needed for OpenBSD, to avoid picking mips 'defaults'.  */
+
+/* GAS must know this.  */
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{fPIC|fPIE:-KPIC}"
+
+/* CPP specific OpenBSD specs.  */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC OBSD_CPP_SPEC
+
+/* Needed for ELF (inspired by netbsd-elf).  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* The profiling lib spec here is not really correct but we leave
+   it as it is until we have some kind of profiling working.  */
+#define LIB_SPEC OBSD_LIB_SPEC
+
+/* mips assembler uses .set for arcane purposes.  __attribute__((alias))
+   and friends won't work until we get recent binutils with .weakext
+	support.  */
+#undef SET_ASM_OP
+
+#define TARGET_OS_CPP_BUILTINS()			\
+    do {						\
+	builtin_define ("__unix__");			\
+	builtin_define ("__SYSTYPE_BSD__");		\
+	builtin_define ("__NO_LEADING_UNDERSCORES__");	\
+	builtin_define ("__GP_SUPPORT__");		\
+	builtin_define ("__OpenBSD__");			\
+	builtin_assert ("system=unix");			\
+	builtin_assert ("system=OpenBSD");		\
+} while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h>.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* Controlling the compilation driver.  */
+
+/* LINK_SPEC appropriate for OpenBSD:  support for GCC options
+   -static, -assert, and -nostdlib. Dynamic loader control.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{G*} %{EB} %{EL} %{mips1} %{mips2} %{mips3} \
+   %{shared} \
+   %{!shared: -non_shared} \
+   -dynamic-linker /usr/libexec/ld.so \
+   %{!nostdlib:%{!r:%{!e*:-e __start}}} -dc -dp \
+   %{static:-Bstatic} %{!static:-Bdynamic} %{assert*}"
+
+/* -G is incompatible with -KPIC which is the default, so only allow objects
+   in the small data section if the user explicitly asks for it.  */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+
+/* Since gas and gld are standard on OpenBSD, we don't need these.  */
+#undef ASM_FINAL_SPEC
+#undef STARTFILE_SPEC
+
+/* Switch into a generic section.  */
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+/* MIPS specific debugging info */
+#define MIPS_DEBUGGING_INFO 1
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
new file mode 100644
index 000000000..7430dd32b
--- /dev/null
+++ b/gcc/config/mips/predicates.md
@@ -0,0 +1,342 @@
+;; Predicate definitions for MIPS.
+;; Copyright (C) 2004, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "const_uns_arith_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND_UNSIGNED (INTVAL (op))")))
+
+(define_predicate "uns_arith_operand"
+  (ior (match_operand 0 "const_uns_arith_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_arith_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (INTVAL (op))")))
+
+(define_predicate "arith_operand"
+  (ior (match_operand 0 "const_arith_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_uimm6_operand"
+  (and (match_code "const_int")
+       (match_test "UIMM6_OPERAND (INTVAL (op))")))
+
+(define_predicate "const_imm10_operand"
+  (and (match_code "const_int")
+       (match_test "IMM10_OPERAND (INTVAL (op))")))
+
+(define_predicate "reg_imm10_operand"
+  (ior (match_operand 0 "const_imm10_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "sle_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_OPERAND (INTVAL (op) + 1)")))
+
+(define_predicate "sleu_operand"
+  (and (match_operand 0 "sle_operand")
+       (match_test "INTVAL (op) + 1 != 0")))
+
+(define_predicate "const_0_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_predicate "reg_or_0_operand"
+  (ior (and (match_operand 0 "const_0_operand")
+	    (match_test "!TARGET_MIPS16"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "const_1_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST1_RTX (GET_MODE (op))")))
+
+(define_predicate "reg_or_1_operand"
+  (ior (match_operand 0 "const_1_operand")
+       (match_operand 0 "register_operand")))
+
+;; This is used for indexing into vectors, and hence only accepts const_int.
+(define_predicate "const_0_or_1_operand"
+  (and (match_code "const_int")
+       (ior (match_test "op == CONST0_RTX (GET_MODE (op))")
+	    (match_test "op == CONST1_RTX (GET_MODE (op))"))))
+
+(define_predicate "qi_mask_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) == 0xff")))
+
+(define_predicate "hi_mask_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) == 0xffff")))
+
+(define_predicate "si_mask_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) == 0xffffffff")))
+
+(define_predicate "and_load_operand"
+  (ior (match_operand 0 "qi_mask_operand")
+       (match_operand 0 "hi_mask_operand")
+       (match_operand 0 "si_mask_operand")))
+
+(define_predicate "low_bitmask_operand"
+  (and (match_test "ISA_HAS_EXT_INS")
+       (match_code "const_int")
+       (match_test "low_bitmask_len (mode, INTVAL (op)) > 16")))
+
+(define_predicate "and_reg_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "!TARGET_MIPS16")
+	    (match_operand 0 "const_uns_arith_operand"))
+       (match_operand 0 "low_bitmask_operand")
+       (match_operand 0 "si_mask_operand")))
+
+(define_predicate "and_operand"
+  (ior (match_operand 0 "and_load_operand")
+       (match_operand 0 "and_reg_operand")))
+
+(define_predicate "d_operand"
+  (and (match_code "reg")
+       (match_test "TARGET_MIPS16
+		    ? M16_REG_P (REGNO (op))
+		    : GP_REG_P (REGNO (op))")))
+
+(define_predicate "lo_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == LO_REGNUM")))
+
+(define_predicate "hilo_operand"
+  (and (match_code "reg")
+       (match_test "MD_REG_P (REGNO (op))")))
+
+(define_predicate "fcc_reload_operand"
+  (and (match_code "reg,subreg")
+       (match_test "ST_REG_P (true_regnum (op))")))
+
+(define_special_predicate "pc_or_label_operand"
+  (match_code "pc,label_ref"))
+
+(define_predicate "const_call_insn_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type symbol_type;
+
+  if (!mips_symbolic_constant_p (op, SYMBOL_CONTEXT_CALL, &symbol_type))
+    return false;
+
+  switch (symbol_type)
+    {
+    case SYMBOL_ABSOLUTE:
+      /* We can only use direct calls if we're sure that the target
+	 function does not need $25 to be valid on entry.  */
+      if (mips_use_pic_fn_addr_reg_p (op))
+	return false;
+
+      /* If -mlong-calls or if this function has an explicit long_call
+	 attribute, we must use register addressing.  The
+	 SYMBOL_FLAG_LONG_CALL bit is set by mips_encode_section_info.  */
+      return !(GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_LONG_CALL_P (op));
+
+    case SYMBOL_GOT_DISP:
+      /* Without explicit relocs, there is no special syntax for
+	 loading the address of a call destination into a register.
+	 Using "la $25,foo; jal $25" would prevent the lazy binding
+	 of "foo", so keep the address of global symbols with the
+	 jal macro.  */
+      return !TARGET_EXPLICIT_RELOCS;
+
+    default:
+      return false;
+    }
+})
+
+(define_predicate "call_insn_operand"
+  (ior (match_operand 0 "const_call_insn_operand")
+       (match_operand 0 "register_operand")))
+
+;; A legitimate CONST_INT operand that takes more than one instruction
+;; to load.
+(define_predicate "splittable_const_int_operand"
+  (match_code "const_int")
+{
+  /* When generating mips16 code, LEGITIMATE_CONSTANT_P rejects
+     CONST_INTs that can't be loaded using simple insns.  */
+  if (TARGET_MIPS16)
+    return false;
+
+  /* Don't handle multi-word moves this way; we don't want to introduce
+     the individual word-mode moves until after reload.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return false;
+
+  /* Otherwise check whether the constant can be loaded in a single
+     instruction.  */
+  return !LUI_INT (op) && !SMALL_INT (op) && !SMALL_INT_UNSIGNED (op);
+})
+
+(define_predicate "move_operand"
+  (match_operand 0 "general_operand")
+{
+  enum mips_symbol_type symbol_type;
+
+  /* The thinking here is as follows:
+
+     (1) The move expanders should split complex load sequences into
+	 individual instructions.  Those individual instructions can
+	 then be optimized by all rtl passes.
+
+     (2) The target of pre-reload load sequences should not be used
+	 to store temporary results.  If the target register is only
+	 assigned one value, reload can rematerialize that value
+	 on demand, rather than spill it to the stack.
+
+     (3) If we allowed pre-reload passes like combine and cse to recreate
+	 complex load sequences, we would want to be able to split the
+	 sequences before reload as well, so that the pre-reload scheduler
+	 can see the individual instructions.  This falls foul of (2);
+	 the splitter would be forced to reuse the target register for
+	 intermediate results.
+
+     (4) We want to define complex load splitters for combine.  These
+	 splitters can request a temporary scratch register, which avoids
+	 the problem in (2).  They allow things like:
+
+	      (set (reg T1) (high SYM))
+	      (set (reg T2) (low (reg T1) SYM))
+	      (set (reg X) (plus (reg T2) (const_int OFFSET)))
+
+	 to be combined into:
+
+	      (set (reg T3) (high SYM+OFFSET))
+	      (set (reg X) (lo_sum (reg T3) SYM+OFFSET))
+
+	 if T2 is only used this once.  */
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+      return !splittable_const_int_operand (op, mode);
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (CONST_GP_P (op))
+	return true;
+      return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+	      && !mips_split_p[symbol_type]);
+
+    case HIGH:
+      op = XEXP (op, 0);
+      return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+	      && !mips_split_hi_p[symbol_type]);
+
+    default:
+      return true;
+    }
+})
+
+(define_predicate "cprestore_save_slot_operand"
+  (and (match_code "mem")
+       (match_test "mips_cprestore_address_p (XEXP (op, 0), false)")))
+
+(define_predicate "cprestore_load_slot_operand"
+  (and (match_code "mem")
+       (match_test "mips_cprestore_address_p (XEXP (op, 0), true)")))
+
+(define_predicate "consttable_operand"
+  (match_test "CONSTANT_P (op)"))
+
+(define_predicate "symbolic_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type);
+})
+
+(define_predicate "absolute_symbolic_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_ABSOLUTE);
+})
+
+(define_predicate "force_to_mem_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type symbol_type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &symbol_type)
+	  && symbol_type == SYMBOL_FORCE_TO_MEM);
+})
+
+(define_predicate "got_disp_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_GOT_DISP);
+})
+
+(define_predicate "got_page_ofst_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum mips_symbol_type type;
+  return (mips_symbolic_constant_p (op, SYMBOL_CONTEXT_LEA, &type)
+	  && type == SYMBOL_GOT_PAGE_OFST);
+})
+
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+(define_predicate "stack_operand"
+  (and (match_code "mem")
+       (match_test "mips_stack_address_p (XEXP (op, 0), GET_MODE (op))")))
+
+(define_predicate "macc_msac_operand"
+  (ior (and (match_code "plus") (match_test "ISA_HAS_MACC"))
+       (and (match_code "minus") (match_test "ISA_HAS_MSAC")))
+{
+  rtx mult = XEXP (op, GET_CODE (op) == PLUS ? 0 : 1);
+  rtx accum = XEXP (op, GET_CODE (op) == PLUS ? 1 : 0);
+  return (GET_CODE (mult) == MULT
+	  && REG_P (XEXP (mult, 0))
+	  && REG_P (XEXP (mult, 1))
+	  && REG_P (accum));
+})
+
+
+(define_predicate "equality_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "extend_operator"
+  (match_code "zero_extend,sign_extend"))
+
+(define_predicate "trap_comparison_operator"
+  (match_code "eq,ne,lt,ltu,ge,geu"))
+
+(define_predicate "order_operator"
+  (match_code "lt,ltu,le,leu,ge,geu,gt,gtu"))
+
+;; For NE, cstore uses sltu instructions in which the first operand is $0.
+;; This isn't possible in mips16 code.
+
+(define_predicate "mips_cstore_operator"
+  (ior (match_code "eq,gt,gtu,ge,geu,lt,ltu,le,leu")
+       (and (match_code "ne") (match_test "!TARGET_MIPS16"))))
+
+(define_predicate "small_data_pattern"
+  (and (match_code "set,parallel,unspec,unspec_volatile,prefetch")
+       (match_test "mips_small_data_pattern_p (op)")))
diff --git a/gcc/config/mips/r3900.h b/gcc/config/mips/r3900.h
new file mode 100644
index 000000000..557ca3e47
--- /dev/null
+++ b/gcc/config/mips/r3900.h
@@ -0,0 +1,40 @@
+/* Definitions of MIPS sub target machine for GNU compiler.
+   Toshiba r3900.  You should include mips.h after this.
+
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 2004,
+   2007, 2010 Free Software Foundation, Inc.
+   Contributed by Gavin Koch (gavin@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef MIPS_CPU_STRING_DEFAULT
+#define MIPS_CPU_STRING_DEFAULT "r3900"
+#define MIPS_ISA_DEFAULT 1
+
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { MULTILIB_ENDIAN_DEFAULT, "msoft-float" }
+
+/* We use the MIPS EABI by default.  */
+#undef MIPS_ABI_DEFAULT
+#define MIPS_ABI_DEFAULT ABI_EABI
+
+/* By default (if not mips-something-else) produce code for the r3900 */
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC "\
+%{mhard-float:%e-mhard-float not supported} \
+%{msingle-float:%{msoft-float: \
+  %e-msingle-float and -msoft-float cannot both be specified}}"
diff --git a/gcc/config/mips/rtems.h b/gcc/config/mips/rtems.h
new file mode 100644
index 000000000..74da4f706
--- /dev/null
+++ b/gcc/config/mips/rtems.h
@@ -0,0 +1,35 @@
+/* Definitions for rtems targeting a MIPS using ELF.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2005, 2007
+   Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS()	\
+do {					\
+  builtin_define ("__rtems__");		\
+  builtin_define ("__USE_INIT_FINI__");	\
+  builtin_assert ("system=rtems");	\
+} while (0)
+
+/* No sdata.
+ * The RTEMS BSPs expect -G0
+ */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
diff --git a/gcc/config/mips/sb1.md b/gcc/config/mips/sb1.md
new file mode 100644
index 000000000..8c0b7608e
--- /dev/null
+++ b/gcc/config/mips/sb1.md
@@ -0,0 +1,579 @@
+;; Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; DFA-based pipeline description for Broadcom SB-1
+;;
+
+;; The Broadcom SB-1 core is 4-way superscalar, in-order.  It has 2 load/store
+;; pipes (one of which can support some ALU operations), 2 alu pipes, 2 FP
+;; pipes, and 1 MDMX pipes.  It can issue 2 ls insns and 2 exe/fpu/mdmx insns
+;; each cycle.
+
+;; We model the 4-way issue by ordering unit choices.  The possible choices are
+;; {ex1,fp1}|{ex0,fp0}|ls1|ls0.  Instructions issue to the first eligible unit
+;; in the list in most cases.  Non-indexed load/stores issue to ls0 first.
+;; simple alu operations issue to ls1 if it is still available, and their
+;; operands are ready (no co-issue with loads), otherwise to the first
+;; available ex unit.
+
+;; When exceptions are enabled, can only issue FP insns to fp1.  This is
+;; to ensure that instructions complete in order.  The -mfp-exceptions option
+;; can be used to specify whether the system has FP exceptions enabled or not.
+
+;; In 32-bit mode, dependent FP can't co-issue with load, and only one FP exe
+;; insn can issue per cycle (fp1).
+
+;; The A1 MDMX pipe is separate from the FP pipes, but uses the same register
+;; file.  As a result, once an MDMX insn is issued, no FP insns can be issued
+;; for 3 cycles.  When an FP insn is issued, no MDMX insn can be issued for
+;; 5 cycles.  This is currently not handled because there is no MDMX insn
+;; support as yet.
+
+;;
+;; We use two automata.  sb1_cpu_div is for the integer divides, which are
+;; not pipelined.  sb1_cpu is for everything else.
+;;
+(define_automaton "sb1_cpu, sb1_cpu_div")
+
+;; Load/store function units.
+(define_cpu_unit "sb1_ls0" "sb1_cpu")
+(define_cpu_unit "sb1_ls1" "sb1_cpu")
+
+;; CPU function units.
+(define_cpu_unit "sb1_ex0" "sb1_cpu")
+(define_cpu_unit "sb1_ex1" "sb1_cpu")
+
+;; The divide unit is not pipelined, and blocks hi/lo reads and writes.
+(define_cpu_unit "sb1_div" "sb1_cpu_div")
+;; DMULT block any multiply from issuing in the next cycle.
+(define_cpu_unit "sb1_mul" "sb1_cpu")
+
+;; Floating-point units.
+(define_cpu_unit "sb1_fp0" "sb1_cpu")
+(define_cpu_unit "sb1_fp1" "sb1_cpu")
+
+;; Can only issue to one of the ex and fp pipes at a time.
+(exclusion_set "sb1_ex0" "sb1_fp0")
+(exclusion_set "sb1_ex1" "sb1_fp1")
+
+;; Define an SB-1 specific attribute to simplify some FP descriptions.
+;; We can use 2 FP pipes only if we have 64-bit FP code, and exceptions are
+;; disabled.
+
+(define_attr "sb1_fp_pipes" "one,two"
+  (cond [(and (ne (symbol_ref "TARGET_FLOAT64") (const_int 0))
+	      (eq (symbol_ref "TARGET_FP_EXCEPTIONS") (const_int 0)))
+	 (const_string "two")]
+	(const_string "one")))
+
+;; Define reservations for common combinations.
+
+;; For long cycle operations, the FPU has a 4 cycle pipeline that repeats,
+;; effectively re-issuing the operation every 4 cycles.  This means that we
+;; can have at most 4 long-cycle operations per pipe.
+
+;; ??? The fdiv operations should be e.g.
+;; sb1_fp1_4cycles*7" | "sb1_fp0_4cycle*7
+;; but the DFA is too large when we do that.  Perhaps have to use scheduler
+;; hooks here.
+
+;; ??? Try limiting scheduler to 2 long latency operations, and see if this
+;; results in a usable DFA, and whether it helps code performance.
+
+;;(define_reservation "sb1_fp0_4cycles" "sb1_fp0, nothing*3")
+;;(define_reservation "sb1_fp1_4cycles" "sb1_fp1, nothing*3")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+(define_insn_reservation "ir_sb1_unknown" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "unknown,multi"))
+  "sb1_ls0+sb1_ls1+sb1_ex0+sb1_ex1+sb1_fp0+sb1_fp1")
+
+;; predicted taken branch causes 2 cycle ifetch bubble.  predicted not
+;; taken branch causes 0 cycle ifetch bubble.  mispredicted branch causes 8
+;; cycle ifetch bubble.  We assume all branches predicted not taken.
+
+;; ??? This assumption that branches are predicated not taken should be
+;; investigated.  Maybe using 2 here will give better results.
+
+(define_insn_reservation "ir_sb1_branch" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "branch,jump,call"))
+  "sb1_ex0")
+
+;; ??? This is 1 cycle for ldl/ldr to ldl/ldr when they use the same data
+;; register as destination.
+
+;; ??? SB-1 can co-issue a load with a dependent arith insn if it executes on
+;; an EX unit.  Can not co-issue if the dependent insn executes on an LS unit.
+;; SB-1A can always co-issue here.
+
+;; A load normally has a latency of zero cycles.  In some cases, dependent
+;; insns can be issued in the same cycle.  However, a value of 1 gives
+;; better performance in empirical testing.
+
+(define_insn_reservation "ir_sb1_load" 1
+  (and (eq_attr "cpu" "sb1")
+       (eq_attr "type" "load,prefetch"))
+  "sb1_ls0 | sb1_ls1")
+
+(define_insn_reservation "ir_sb1a_load" 0
+  (and (eq_attr "cpu" "sb1a")
+       (eq_attr "type" "load,prefetch"))
+  "sb1_ls0 | sb1_ls1")
+
+;; Can not co-issue fpload with fp exe when in 32-bit mode.
+
+(define_insn_reservation "ir_sb1_fpload" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpload")
+	    (ne (symbol_ref "TARGET_FLOAT64")
+		(const_int 0))))
+  "sb1_ls0 | sb1_ls1")
+
+(define_insn_reservation "ir_sb1_fpload_32bitfp" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpload")
+	    (eq (symbol_ref "TARGET_FLOAT64")
+		(const_int 0))))
+  "sb1_ls0 | sb1_ls1")
+
+;; Indexed loads can only execute on LS1 pipe.
+
+(define_insn_reservation "ir_sb1_fpidxload" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpidxload")
+	    (ne (symbol_ref "TARGET_FLOAT64")
+		(const_int 0))))
+  "sb1_ls1")
+
+(define_insn_reservation "ir_sb1_fpidxload_32bitfp" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fpidxload")
+	    (eq (symbol_ref "TARGET_FLOAT64")
+		(const_int 0))))
+  "sb1_ls1")
+
+;; prefx can only execute on the ls1 pipe.
+
+(define_insn_reservation "ir_sb1_prefetchx" 0
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "prefetchx"))
+  "sb1_ls1")
+
+;; ??? There is a 4.5 cycle latency if a store is followed by a load, and
+;; there is a RAW dependency.
+
+(define_insn_reservation "ir_sb1_store" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "store"))
+  "sb1_ls0+sb1_ex1 | sb1_ls0+sb1_ex0 | sb1_ls1+sb1_ex1 | sb1_ls1+sb1_ex0")
+
+(define_insn_reservation "ir_sb1_fpstore" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "fpstore"))
+  "sb1_ls0+sb1_fp1 | sb1_ls0+sb1_fp0 | sb1_ls1+sb1_fp1 | sb1_ls1+sb1_fp0")
+
+;; Indexed stores can only execute on LS1 pipe.
+
+(define_insn_reservation "ir_sb1_fpidxstore" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "fpidxstore"))
+  "sb1_ls1+sb1_fp1 | sb1_ls1+sb1_fp0")
+
+;; Load latencies are 3 cycles for one load to another load or store (address
+;; only).  This is 0 cycles for one load to a store using it as the data
+;; written.
+
+;; This assumes that if a load is dependent on a previous insn, then it must
+;; be an address dependence.
+
+(define_bypass 3
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp"
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
+
+(define_bypass 3
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp"
+  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
+  "mips_store_data_bypass_p")
+
+;; On SB-1, simple alu instructions can execute on the LS1 unit.
+
+;; ??? A simple alu insn issued on an LS unit has 0 cycle latency to an EX
+;; insn, to a store (for data), and to an xfer insn.  It has 1 cycle latency to
+;; another LS insn (excluding store data).  A simple alu insn issued on an EX
+;; unit has a latency of 5 cycles when the results goes to a LS unit (excluding
+;; store data), otherwise a latency of 1 cycle.
+
+;; ??? We cannot handle latencies properly for simple alu instructions
+;; within the DFA pipeline model.  Latencies can be defined only from one
+;; insn reservation to another.  We can't make them depend on which function
+;; unit was used.  This isn't a DFA flaw.  There is a conflict here, as we
+;; need to know the latency before we can determine which unit will be
+;; available, but we need to know which unit it is issued to before we can
+;; compute the latency.  Perhaps this can be handled via scheduler hooks.
+;; This needs to be investigated.
+
+;; ??? Optimal scheduling taking the LS units into account seems to require
+;; a pre-scheduling pass.  We need to determine which instructions feed results
+;; into store/load addresses, and thus benefit most from being issued to the
+;; LS unit.  Also, we need to prune the list to ensure we don't overschedule
+;; insns to the LS unit, and that we don't conflict with insns that need LS1
+;; such as indexed loads.  We then need to emit nops to ensure that simple
+;; alu instructions that are not supposed to be scheduled to LS1 don't
+;; accidentally end up there because LS1 is free when they are issued.  This
+;; will be a lot of work, and it isn't clear how useful it will be.
+
+;; Empirical testing shows that 2 gives the best result.
+
+(define_insn_reservation "ir_sb1_simple_alu" 2
+  (and (eq_attr "cpu" "sb1")
+       (eq_attr "type" "const,arith,logical,move,signext"))
+  "sb1_ls1 | sb1_ex1 | sb1_ex0")
+
+;; On SB-1A, simple alu instructions can not execute on the LS1 unit, and we
+;; have none of the above problems.
+
+(define_insn_reservation "ir_sb1a_simple_alu" 1
+  (and (eq_attr "cpu" "sb1a")
+       (eq_attr "type" "const,arith,logical,move,signext"))
+  "sb1_ex1 | sb1_ex0")
+
+;; ??? condmove also includes some FP instructions that execute on the FP
+;; units.  This needs to be clarified.
+
+(define_insn_reservation "ir_sb1_alu" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "condmove,nop,shift"))
+  "sb1_ex1 | sb1_ex0")
+
+;; These are type arith/darith that only execute on the EX0 unit.
+
+(define_insn_reservation "ir_sb1_alu_0" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "slt,clz,trap"))
+  "sb1_ex0")
+
+;; An alu insn issued on an EX unit has a latency of 5 cycles when the
+;; result goes to a LS unit (excluding store data).
+
+;; This assumes that if a load is dependent on a previous insn, then it must
+;; be an address dependence.
+
+(define_bypass 5
+  "ir_sb1a_simple_alu,ir_sb1_alu,ir_sb1_alu_0,ir_sb1_mfhi,ir_sb1_mflo"
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
+
+(define_bypass 5
+  "ir_sb1a_simple_alu,ir_sb1_alu,ir_sb1_alu_0,ir_sb1_mfhi,ir_sb1_mflo"
+  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
+  "mips_store_data_bypass_p")
+
+;; mf{hi,lo} is 1 cycle.  
+
+(define_insn_reservation "ir_sb1_mfhi" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "mfhilo")
+	    (not (match_operand 1 "lo_operand"))))
+  "sb1_ex1")
+
+(define_insn_reservation "ir_sb1_mflo" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "mfhilo")
+	    (match_operand 1 "lo_operand")))
+  "sb1_ex1")
+
+;; mt{hi,lo} to mul/div is 4 cycles.
+
+(define_insn_reservation "ir_sb1_mthilo" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mthilo"))
+  "sb1_ex1")
+
+;; mt{hi,lo} to mf{hi,lo} is 3 cycles.
+
+(define_bypass 3 "ir_sb1_mthilo" "ir_sb1_mfhi,ir_sb1_mflo")
+
+;; multiply latency to an EX operation is 3 cycles.
+
+;; ??? Should check whether we need to make multiply conflict with moves
+;; to/from hilo registers.
+
+(define_insn_reservation "ir_sb1_mulsi" 3
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "sb1_ex1+sb1_mul")
+
+;; muldi to mfhi is 4 cycles.
+;; Blocks any other multiply insn issue for 1 cycle.
+
+(define_insn_reservation "ir_sb1_muldi" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "imul,imul3")
+	    (eq_attr "mode" "DI")))
+  "sb1_ex1+sb1_mul, sb1_mul")
+
+;; muldi to mflo is 3 cycles.
+
+(define_bypass 3 "ir_sb1_muldi" "ir_sb1_mflo")
+
+;;  mul latency is 7 cycles if the result is used by any LS insn.
+
+;; This assumes that if a load is dependent on a previous insn, then it must
+;; be an address dependence.
+
+(define_bypass 7
+  "ir_sb1_mulsi,ir_sb1_muldi"
+  "ir_sb1_load,ir_sb1a_load,ir_sb1_fpload,ir_sb1_fpload_32bitfp,
+   ir_sb1_fpidxload,ir_sb1_fpidxload_32bitfp,ir_sb1_prefetchx")
+
+(define_bypass 7
+  "ir_sb1_mulsi,ir_sb1_muldi"
+  "ir_sb1_store,ir_sb1_fpstore,ir_sb1_fpidxstore"
+  "mips_store_data_bypass_p")
+
+;; The divide unit is not pipelined.  Divide busy is asserted in the 4th
+;; cycle, and then deasserted on the latency cycle.  So only one divide at
+;; a time, but the first/last 4 cycles can overlap.
+
+;; ??? All divides block writes to hi/lo regs.  hi/lo regs are written 4 cycles
+;; after the latency cycle for divides (e.g. 40/72).  dmult writes lo in
+;; cycle 7, and hi in cycle 8.  All other insns write hi/lo regs in cycle 7.
+;; Default for output dependencies is the difference in latencies, which is
+;; only 1 cycle off here, e.g. div to mtlo stalls for 32 cycles, but should
+;; stall for 33 cycles.  This does not seem significant enough to worry about.
+
+(define_insn_reservation "ir_sb1_divsi" 36
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "sb1_ex1, nothing*3, sb1_div*32")
+
+(define_insn_reservation "ir_sb1_divdi" 68
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "sb1_ex1, nothing*3, sb1_div*64")
+
+(define_insn_reservation "ir_sb1_fpu_2pipes" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmove,fadd,fmul,fabs,fneg,fcvt,frdiv1,frsqrt1")
+	    (eq_attr "sb1_fp_pipes" "two")))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_fpu_1pipe" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmove,fadd,fmul,fabs,fneg,fcvt,frdiv1,frsqrt1")
+	    (eq_attr "sb1_fp_pipes" "one")))
+  "sb1_fp1")
+
+(define_insn_reservation "ir_sb1_fpu_step2_2pipes" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv2,frsqrt2")
+	    (eq_attr "sb1_fp_pipes" "two")))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_fpu_step2_1pipe" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv2,frsqrt2")
+	    (eq_attr "sb1_fp_pipes" "one")))
+  "sb1_fp1")
+
+;; ??? madd/msub 4-cycle latency to itself (same fr?), but 8 cycle latency
+;; otherwise.
+
+;; ??? Blocks issue of another non-madd/msub after 4 cycles.
+
+(define_insn_reservation "ir_sb1_fmadd_2pipes" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmadd")
+	    (eq_attr "sb1_fp_pipes" "two")))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_fmadd_1pipe" 8
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fmadd")
+	    (eq_attr "sb1_fp_pipes" "one")))
+  "sb1_fp1")
+
+(define_insn_reservation "ir_sb1_fcmp" 4
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "fcmp"))
+  "sb1_fp1")
+
+;; mtc1 latency 5 cycles.
+
+(define_insn_reservation "ir_sb1_mtxfer" 5
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mtc"))
+  "sb1_fp0")
+
+;; mfc1 latency 1 cycle.  
+
+(define_insn_reservation "ir_sb1_mfxfer" 1
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (eq_attr "type" "mfc"))
+  "sb1_fp0")
+
+;; ??? Can deliver at most 1 result per every 6 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_divsf_2pipes" 24
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_divsf_1pipe" 24
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 8 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_divdf_2pipes" 32
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_divdf_1pipe" 32
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 3 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_recipsf_2pipes" 12
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_recipsf_1pipe" 12
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 5 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_recipdf_2pipes" 20
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_recipdf_1pipe" 20
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frdiv")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 7 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_sqrtsf_2pipes" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_sqrtsf_1pipe" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 10 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_sqrtdf_2pipes" 40
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_sqrtdf_1pipe" 40
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "fsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 4 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_rsqrtsf_2pipes" 16
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_rsqrtsf_1pipe" 16
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "SF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
+
+;; ??? Can deliver at most 1 result per every 7 cycles because of issue
+;; restrictions.
+
+(define_insn_reservation "ir_sb1_rsqrtdf_2pipes" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "two"))))
+  "sb1_fp1 | sb1_fp0")
+
+(define_insn_reservation "ir_sb1_rsqrtdf_1pipe" 28
+  (and (eq_attr "cpu" "sb1,sb1a")
+       (and (eq_attr "type" "frsqrt")
+	    (and (eq_attr "mode" "DF")
+		 (eq_attr "sb1_fp_pipes" "one"))))
+  "sb1_fp1")
diff --git a/gcc/config/mips/sdb.h b/gcc/config/mips/sdb.h
new file mode 100644
index 000000000..27a42df7a
--- /dev/null
+++ b/gcc/config/mips/sdb.h
@@ -0,0 +1,87 @@
+/* Generate SDB debugging info.
+   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Note that no configuration uses sdb as its preferred format.  */
+
+#define SDB_DEBUGGING_INFO 1
+
+/* Forward references to tags are allowed.  */
+#define SDB_ALLOW_FORWARD_REFERENCES
+
+/* Unknown tags are also allowed.  */
+#define SDB_ALLOW_UNKNOWN_REFERENCES
+
+/* Block start/end next label #.  */
+extern int sdb_label_count;
+
+/* Starting line of current function.  */
+extern int sdb_begin_function_line;
+
+/* For block start and end, we create labels, so that
+   later we can figure out where the correct offset is.
+   The normal .ent/.end serve well enough for functions,
+   so those are just commented out.  */
+
+#define PUT_SDB_BLOCK_START(LINE)			\
+do {							\
+  fprintf (asm_out_file,				\
+	   "%sLb%d:\n\t.begin\t%sLb%d\t%d\n",		\
+	   LOCAL_LABEL_PREFIX,				\
+	   sdb_label_count,				\
+	   LOCAL_LABEL_PREFIX,				\
+	   sdb_label_count,				\
+	   (LINE));					\
+  sdb_label_count++;					\
+} while (0)
+
+#define PUT_SDB_BLOCK_END(LINE)				\
+do {							\
+  fprintf (asm_out_file,				\
+	   "%sLe%d:\n\t.bend\t%sLe%d\t%d\n",		\
+	   LOCAL_LABEL_PREFIX,				\
+	   sdb_label_count,				\
+	   LOCAL_LABEL_PREFIX,				\
+	   sdb_label_count,				\
+	   (LINE));					\
+  sdb_label_count++;					\
+} while (0)
+
+#define PUT_SDB_FUNCTION_START(LINE)
+
+#define PUT_SDB_FUNCTION_END(LINE)			\
+do {							\
+  SDB_OUTPUT_SOURCE_LINE (asm_out_file, LINE + sdb_begin_function_line); \
+} while (0)
+
+#define PUT_SDB_EPILOGUE_END(NAME)
+
+/* We need to use .esize and .etype instead of .size and .type to
+   avoid conflicting with ELF directives.  */
+#undef PUT_SDB_SIZE
+#define PUT_SDB_SIZE(a)					\
+do {							\
+  fprintf (asm_out_file, "\t.esize\t" HOST_WIDE_INT_PRINT_DEC ";", \
+ 	   (HOST_WIDE_INT) (a));			\
+} while (0)
+
+#undef PUT_SDB_TYPE
+#define PUT_SDB_TYPE(a)					\
+do {							\
+  fprintf (asm_out_file, "\t.etype\t0x%x;", (a));	\
+} while (0)
diff --git a/gcc/config/mips/sde.h b/gcc/config/mips/sde.h
new file mode 100644
index 000000000..7194c1e54
--- /dev/null
+++ b/gcc/config/mips/sde.h
@@ -0,0 +1,135 @@
+/* Definitions of target machine for GNU compiler.
+   MIPS SDE version.
+   Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS						\
+  /* Make sure a -mips option is present.  This helps us to pick	\
+     the right multilib, and also makes the later specs easier		\
+     to write.  */							\
+  MIPS_ISA_LEVEL_SPEC,							\
+									\
+  /* Infer the default float setting from -march.  */			\
+  MIPS_ARCH_FLOAT_SPEC,							\
+									\
+  /* If no ABI option is specified, infer one from the ISA level	\
+     or -mgp setting.  */						\
+  "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=n32}}",	\
+									\
+  /* Remove a redundant -mfp64 for -mabi=n32; we want the !mfp64	\
+     multilibs.  There's no need to check whether the architecture	\
+     is 64-bit; cc1 will complain if it isn't.  */			\
+  "%{mabi=n32: %<mfp64}",						\
+									\
+  /* Make sure that an endian option is always present.  This makes	\
+     things like LINK_SPEC easier to write.  */				\
+  "%{!EB:%{!EL:%(endian_spec)}}",					\
+									\
+  /* Configuration-independent MIPS rules.  */				\
+  BASE_DRIVER_SELF_SPECS				
+
+/* Use trap rather than break for all but MIPS I ISA.  Force -no-mips16,
+   so that MIPS16 assembler code requires an explicit ".set mips16".
+   Very little hand-written MIPS16 assembler exists, and some build
+   systems expect code to be assembled as non-MIPS16 even if the
+   prevailing compiler flags select -mips16.  */
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "\
+%{!mips1:--trap} \
+%{mips16:-no-mips16}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32*} %{mips64*} \
+%{shared} \
+%{mabi=n32:-melf32%{EB:b}%{EL:l}tsmipn32} \
+%{mabi=64:-melf64%{EB:b}%{EL:l}tsmip} \
+%{mabi=32:-melf32%{EB:b}%{EL:l}tsmip}"
+
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 0
+
+/* SDE-MIPS won't ever support SDB debugging info.  */
+#undef SDB_DEBUGGING_INFO
+
+/* Describe how we implement __builtin_eh_return.  */
+
+/* At the moment, nothing appears to use more than 2 EH data registers.
+   The chosen registers must not clash with the return register ($2),
+   EH_RETURN_STACKADJ ($3), or MIPS_EPILOGUE_TEMP ($5), and they must
+   be general MIPS16 registers.  Pick $6 and $7.  */
+#undef EH_RETURN_DATA_REGNO
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 2 ? 7 - (N) : INVALID_REGNUM)
+
+/* Use $5 as a temporary for both MIPS16 and non-MIPS16.  */
+#undef MIPS_EPILOGUE_TEMP_REGNUM
+#define MIPS_EPILOGUE_TEMP_REGNUM \
+  (cfun->machine->interrupt_handler_p ? K0_REG_NUM : GP_REG_FIRST + 5)
+
+/* Using long will always be right for size_t and ptrdiff_t, since
+   sizeof(long) must equal sizeof(void *), following from the setting
+   of the -mlong64 option.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Use standard ELF-style local labels (not '$' as on early Irix).  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Use periods rather than dollar signs in special g++ assembler names.  */
+#define NO_DOLLAR_IN_LABEL
+
+/* Attach a special .ident directive to the end of the file to identify
+   the version of GCC which compiled this code.  */
+#undef IDENT_ASM_OP
+#define IDENT_ASM_OP "\t.ident\t"
+
+/* Output #ident string into the ELF .comment section, so it doesn't
+   form part of the load image, and so that it can be stripped.  */
+#undef ASM_OUTPUT_IDENT
+#define ASM_OUTPUT_IDENT(STREAM, STRING) \
+  fprintf (STREAM, "%s\"%s\"\n", IDENT_ASM_OP, STRING);
+
+/* Currently we don't support 128bit long doubles, so for now we force
+   n32 to be 64bit.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#ifdef IN_LIBGCC2
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Force all .init and .fini entries to be 32-bit, not mips16, so that
+   in a mixed environment they are all the same mode. The crti.asm and
+   crtn.asm files will also be compiled as 32-bit due to the
+   -no-mips16 flag in SUBTARGET_ASM_SPEC above. */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+   asm (SECTION_OP "\n\
+	.set push\n\
+	.set nomips16\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	.set pop\n\
+	" TEXT_SECTION_ASM_OP);
diff --git a/gcc/config/mips/sde.opt b/gcc/config/mips/sde.opt
new file mode 100644
index 000000000..61b0ebedd
--- /dev/null
+++ b/gcc/config/mips/sde.opt
@@ -0,0 +1,28 @@
+; MIPS SDE options.
+;
+; Copyright (C) 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; -mcode-xonly is a traditional alias for -mcode-readable=pcrel and
+; -mno-data-in-code is a traditional alias for -mcode-readable=no.
+
+mno-data-in-code
+Target RejectNegative Alias(mcode-readable=, no)
+
+mcode-xonly
+Target RejectNegative Alias(mcode-readable=, pcrel)
diff --git a/gcc/config/mips/sdemtk.h b/gcc/config/mips/sdemtk.h
new file mode 100644
index 000000000..16c3a6cba
--- /dev/null
+++ b/gcc/config/mips/sdemtk.h
@@ -0,0 +1,103 @@
+/* Definitions of target machine for GNU compiler.
+   MIPS SDE version, for use with the SDE C library rather than newlib.
+   Copyright (C) 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_assert ("system=sde");			\
+      builtin_assert ("system=posix");			\
+      builtin_define ("__SDE_MIPS__");			\
+							\
+      /* Deprecated: use __mips_isa_rev >= 2.  */	\
+      if (ISA_MIPS32R2)					\
+        builtin_define ("__mipsr2");			\
+							\
+      /* Deprecated: use __mips_fpr == 64.  */		\
+      if (TARGET_FLOAT64)				\
+	builtin_define ("__mipsfp64");			\
+							\
+      if (TARGET_NO_FLOAT) 				\
+	builtin_define ("__NO_FLOAT");			\
+      else if (TARGET_SOFT_FLOAT_ABI)			\
+	builtin_define ("__SOFT_FLOAT");		\
+      else if (TARGET_SINGLE_FLOAT)			\
+	builtin_define ("__SINGLE_FLOAT");		\
+							\
+      if (TARGET_BIG_ENDIAN)				\
+        {						\
+	  builtin_assert ("endian=big");		\
+	  builtin_assert ("cpu=mipseb");		\
+	}						\
+      else						\
+        {						\
+	  builtin_assert ("endian=little");		\
+	  builtin_assert ("cpu=mipsel");		\
+	}						\
+    }							\
+  while (0)
+
+/* For __clear_cache in libgcc2.c.  */
+#ifdef IN_LIBGCC2
+extern void mips_sync_icache (void *beg, unsigned long len);
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(beg, end) \
+  mips_sync_icache (beg, end - beg)
+#endif
+
+/* For mips_cache_flush_func in mips.opt.  */
+#undef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "mips_sync_icache"
+
+/* For inline code which needs to sync the icache and dcache,
+   noting that the SDE library takes arguments (address, size).  */
+#undef MIPS_ICACHE_SYNC
+#define MIPS_ICACHE_SYNC(ADDR, SIZE)					\
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mips_cache_flush_func),	\
+		     LCT_NORMAL, VOIDmode, 2, ADDR, Pmode,		\
+		     SIZE, TYPE_MODE (sizetype))
+
+/* This version of _mcount does not pop 2 words from the stack.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO)				\
+  {									\
+    mips_push_asm_switch (&mips_noat);					\
+    /* _mcount treats $2 as the static chain register.  */		\
+    if (cfun->static_chain_decl != NULL)				\
+      fprintf (FILE, "\tmove\t%s,%s\n", reg_names[2],			\
+	       reg_names[STATIC_CHAIN_REGNUM]);				\
+    /* MIPS16 code passes saved $ra in $v1 instead of $at.  */		\
+    fprintf (FILE, "\tmove\t%s,%s\n",					\
+	     reg_names[GP_REG_FIRST + (TARGET_MIPS16 ? 3 : 1)],		\
+	     reg_names[RETURN_ADDR_REGNUM]);				\
+    fprintf (FILE, "\tjal\t_mcount\n");					\
+    mips_pop_asm_switch (&mips_noat);					\
+    /* _mcount treats $2 as the static chain register.  */		\
+    if (cfun->static_chain_decl != NULL)				\
+      fprintf (FILE, "\tmove\t%s,%s\n", reg_names[STATIC_CHAIN_REGNUM],	\
+	       reg_names[2]);						\
+  }
+
+/* ...nor does the call sequence preserve $31.  */
+#undef MIPS_SAVE_REG_FOR_PROFILING_P
+#define MIPS_SAVE_REG_FOR_PROFILING_P(REGNO) ((REGNO) == RETURN_ADDR_REGNUM)
+
+/* Compile in support for the -mno-float option.  */
+#define TARGET_SUPPORTS_NO_FLOAT 1
diff --git a/gcc/config/mips/sr71k.md b/gcc/config/mips/sr71k.md
new file mode 100644
index 000000000..9b2a784b1
--- /dev/null
+++ b/gcc/config/mips/sr71k.md
@@ -0,0 +1,337 @@
+;; Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; .........................
+;;
+;; DFA-based pipeline description for Sandcraft SR3 (MIPS64 based)
+;;
+;; The SR3 is described as:
+;;     - nine-stage pipeline, insn buffering with out-of-order issue to
+;;       multiple function units, with an average dispatch rate of 2
+;;       insn.s per cycle (max 6 insns: 2 fpu, 4 cpu).
+;;
+;;  The details on this are scant except for a diagram in
+;;  Chap. 6 of Rev. 1.0 SR3 Spec.
+;;
+;;  The model employed below is designed to closely approximate the
+;;  published latencies. Emulation of out-of-order issue and the insn
+;;  buffering is done via a VLIW dispatch style (with a packing of 6 insns);
+;;  the function unit reservations restrictions (define_*_set) are
+;;  contrived to support published timings.
+;;
+;; Reference:
+;;   "SR3 Microprocessor Specification, System development information,"
+;;   Revision 1.0, 13 December 2000.
+;;
+;;
+;; Reservation model is based on:
+;;   1) Figure 6-1, from the 1.0 specification.
+;;   2) Chapter 19, from the 1.0 specification.
+;;   3) following questions(Red Hat)/answers(Sandcraft):
+;;     RH> From Section 19.1
+;;     RH>      1) In terms of figure 6-1, are all the instructions in
+;;     RH>         table 19-1 restricted
+;;     RH>         to ALUx? When ALUx is not in use for an instruction in table;;     RH>          19-1 is
+;;     RH>         it fully compatible with all insns that issue to ALUy?
+;;
+;;     Yes, all the instructions in Table 19-1 only go to ALUX, and all the
+;;     instructions that can be issued to ALUY can also be issued to ALUX.
+;;
+;;
+;;     RH> From Section 19.2
+;;     RH>      2) Explain conditional moves execution path (in terms of
+;;     RH>      figure 6-1)
+;;
+;;     Conditional move of integer registers (based on floating point condition
+;;     codes or integer register value) go to ALUX or ALUY.
+;;
+;;     RH>      3) Explain floating point store execution path (in terms of
+;;     RH>      figure 6-1)
+;;
+;;     Floating point stores go to Ld/St and go to MOV in the floating point
+;;     pipeline.
+;;
+;;     Floating point loads go to Ld/St and go to LOAD in the floating point
+;;     pipeline.
+;;
+;;     RH>      4) Explain branch on floating condition (in terms of figure 6-1);;
+;;     Branch on floating condition go to BRU.
+;;
+;;     RH>      5) Is the column for single RECIP instruction latency correct?
+;;     RH>      What about for RSQRT single and double?
+;;
+;;     The latency/repeat for RECIP and RSQRT are correct.
+;;
+
+;;
+;; Use four automata to isolate long latency operations, and to
+;; reduce the complexity of cpu+fpu, reducing space.
+;;
+(define_automaton "sr71_cpu, sr71_cpu1, sr71_cp1, sr71_cp2, sr71_fextra, sr71_imacc")
+
+;;  feeders for CPU function units and feeders for fpu (CP1 interface)
+(define_cpu_unit "sr_iss0,sr_iss1,sr_iss2,sr_iss3,sr_iss4,sr_iss5" "sr71_cpu")
+
+;; CPU function units
+(define_cpu_unit "ipu_bru"       "sr71_cpu1")
+(define_cpu_unit "ipu_alux"      "sr71_cpu1")
+(define_cpu_unit "ipu_aluy"      "sr71_cpu1")
+(define_cpu_unit "ipu_ldst"      "sr71_cpu1")
+(define_cpu_unit "ipu_macc_iter" "sr71_imacc")
+
+
+;; Floating-point unit (Co-processor interface 1).
+(define_cpu_unit "fpu_mov"          "sr71_cp1")
+(define_cpu_unit "fpu_load"         "sr71_cp1")
+(define_cpu_unit "fpu_fpu"          "sr71_cp2")
+
+;; fictitous unit to track long float insns with separate automaton
+(define_cpu_unit "fpu_iter"         "sr71_fextra")
+
+
+;;
+;; Define common execution path (reservation) combinations
+;;
+
+;;
+(define_reservation "cpu_iss"         "sr_iss0|sr_iss1|sr_iss2|sr_iss3")
+
+;; two cycles are used for instruction using the fpu as it runs
+;; at half the clock speed of the cpu. By adding an extra cycle
+;; to the issue units, the default/minimum "repeat" dispatch delay is
+;; accounted for all insn.s
+(define_reservation "cp1_iss"         "(sr_iss4*2)|(sr_iss5*2)")
+
+(define_reservation "serial_dispatch" "sr_iss0+sr_iss1+sr_iss2+sr_iss3+sr_iss4+sr_iss5")
+
+;; Simulate a 6 insn VLIW dispatch, 1 cycle in dispatch followed by
+;; reservation of function unit.
+(define_reservation "ri_insns"         "cpu_iss,(ipu_alux|ipu_aluy)")
+(define_reservation "ri_mem"           "cpu_iss,ipu_ldst")
+(define_reservation "ri_alux"          "cpu_iss,ipu_alux")
+(define_reservation "ri_branch"        "cpu_iss,ipu_bru")
+
+(define_reservation "rf_insn"          "cp1_iss,fpu_fpu")
+(define_reservation "rf_ldmem"         "cp1_iss,fpu_load")
+
+; simultaneous reservation of pseudo-unit keeps cp1 fpu tied
+; up until long cycle insn is finished...
+(define_reservation "rf_multi1"        "rf_insn+fpu_iter")
+
+;;
+;; The ordering of the instruction-execution-path/resource-usage
+;; descriptions (also known as reservation RTL) is roughly ordered
+;; based on the define attribute RTL for the "type" classification.
+;; When modifying, remember that the first test that matches is the
+;; reservation used!
+;;
+
+
+(define_insn_reservation "ir_sr70_unknown" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "unknown"))
+  "serial_dispatch")
+
+
+;; Assume prediction fails.
+(define_insn_reservation "ir_sr70_branch" 6
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "branch,jump,call"))
+  "ri_branch")
+
+(define_insn_reservation "ir_sr70_load" 2
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "load"))
+  "ri_mem")
+
+(define_insn_reservation "ir_sr70_store" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "store"))
+  "ri_mem")
+
+
+;;
+;; float loads/stores flow through both cpu and cp1...
+;;
+(define_insn_reservation "ir_sr70_fload" 9
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fpload,fpidxload"))
+  "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)")
+
+(define_insn_reservation "ir_sr70_fstore" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fpstore,fpidxstore"))
+  "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)")
+
+
+;; This reservation is for conditional move based on integer
+;; or floating point CC.
+(define_insn_reservation "ir_sr70_condmove" 4
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "condmove"))
+  "ri_insns")
+
+;; Try to discriminate move-from-cp1 versus move-to-cp1 as latencies
+;; are different. Like float load/store, these insns use multiple
+;; resources simultaneously
+(define_insn_reservation "ir_sr70_xfer_from" 6
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "mfc"))
+  "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)")
+
+(define_insn_reservation "ir_sr70_xfer_to" 9
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "mtc"))
+  "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)")
+
+(define_insn_reservation "ir_sr70_hilo" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "mthilo,mfhilo"))
+  "ri_insns")
+
+(define_insn_reservation "ir_sr70_arith" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,trap"))
+  "ri_insns")
+
+;; emulate repeat (dispatch stall) by spending extra cycle(s) in
+;; in iter unit
+(define_insn_reservation "ir_sr70_imul_si" 4
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "SI")))
+  "ri_alux,ipu_alux,ipu_macc_iter")
+
+(define_insn_reservation "ir_sr70_imul_di" 6
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "imul,imul3,imadd")
+	    (eq_attr "mode" "DI")))
+  "ri_alux,ipu_alux,(ipu_macc_iter*3)")
+
+;; Divide algorithm is early out with best latency of 7 pcycles.
+;; Use worst case for scheduling purposes.
+(define_insn_reservation "ir_sr70_idiv_si" 41
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "SI")))
+  "ri_alux,ipu_alux,(ipu_macc_iter*38)")
+
+(define_insn_reservation "ir_sr70_idiv_di" 73
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "idiv")
+	    (eq_attr "mode" "DI")))
+  "ri_alux,ipu_alux,(ipu_macc_iter*70)")
+
+;; extra reservations of fpu_fpu are for repeat latency
+(define_insn_reservation "ir_sr70_fadd_sf" 8
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fadd")
+	    (eq_attr "mode" "SF")))
+  "rf_insn,fpu_fpu")
+
+(define_insn_reservation "ir_sr70_fadd_df" 10
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fadd")
+	    (eq_attr "mode" "DF")))
+  "rf_insn,fpu_fpu")
+
+;; Latencies for MADD,MSUB, NMADD, NMSUB assume the Multiply is fused
+;; with the sub or add.
+(define_insn_reservation "ir_sr70_fmul_sf" 8
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "SF")))
+  "rf_insn,fpu_fpu")
+
+;; tie up the fpu unit to emulate the balance for the "repeat
+;; rate" of 8 (2 are spent in the iss unit)
+(define_insn_reservation "ir_sr70_fmul_df" 16
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fmul,fmadd")
+	    (eq_attr "mode" "DF")))
+  "rf_insn,fpu_fpu*6")
+
+
+;; RECIP insn uses same type attr as div, and for SR3, has same
+;; timings for double. However, single RECIP has a latency of
+;; 28 -- only way to fix this is to introduce new insn attrs.
+;; cycles spent in iter unit are designed to satisfy balance
+;; of "repeat" latency after insn uses up rf_multi1 reservation
+(define_insn_reservation "ir_sr70_fdiv_sf" 60
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "SF")))
+  "rf_multi1+(fpu_iter*51)")
+
+(define_insn_reservation "ir_sr70_fdiv_df" 120
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fdiv,frdiv")
+	    (eq_attr "mode" "DF")))
+  "rf_multi1+(fpu_iter*109)")
+
+(define_insn_reservation "ir_sr70_fabs" 4
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fabs,fneg,fmove"))
+  "rf_insn,fpu_fpu")
+
+(define_insn_reservation "ir_sr70_fcmp" 10
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fcmp"))
+  "rf_insn,fpu_fpu")
+
+;; "fcvt" type attribute covers a number of diff insns, most have the same
+;; latency descriptions, a few vary. We use the
+;; most common timing (which is also worst case).
+(define_insn_reservation "ir_sr70_fcvt" 12
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "fcvt"))
+  "rf_insn,fpu_fpu*4")
+
+(define_insn_reservation "ir_sr70_fsqrt_sf" 62
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "mode" "SF")))
+  "rf_multi1+(fpu_iter*53)")
+
+(define_insn_reservation "ir_sr70_fsqrt_df" 122
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "mode" "DF")))
+  "rf_multi1+(fpu_iter*111)")
+
+(define_insn_reservation "ir_sr70_frsqrt_sf" 48
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "SF")))
+  "rf_multi1+(fpu_iter*39)")
+
+(define_insn_reservation "ir_sr70_frsqrt_df" 240
+  (and (eq_attr "cpu" "sr71000")
+       (and (eq_attr "type" "frsqrt")
+	    (eq_attr "mode" "DF")))
+  "rf_multi1+(fpu_iter*229)")
+
+(define_insn_reservation "ir_sr70_multi" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "multi"))
+  "serial_dispatch")
+
+(define_insn_reservation "ir_sr70_nop" 1
+  (and (eq_attr "cpu" "sr71000")
+       (eq_attr "type" "nop"))
+  "ri_insns")
diff --git a/gcc/config/mips/st.h b/gcc/config/mips/st.h
new file mode 100644
index 000000000..363e797a9
--- /dev/null
+++ b/gcc/config/mips/st.h
@@ -0,0 +1,31 @@
+/* ST 2e / 2f GNU/Linux Configuration.
+   Copyright (C) 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The various C libraries each have their own subdirectory.  */
+#undef SYSROOT_SUFFIX_SPEC
+#define SYSROOT_SUFFIX_SPEC			\
+  "%{march=loongson2e:/2e ;			\
+     march=loongson2f:/2f}"
+
+#undef STARTFILE_PREFIX_SPEC
+#define STARTFILE_PREFIX_SPEC				\
+  "%{mabi=32: /usr/local/lib/ /lib/ /usr/lib/}		\
+   %{mabi=n32: /usr/local/lib32/ /lib32/ /usr/lib32/}	\
+   %{mabi=64: /usr/local/lib64/ /lib64/ /usr/lib64/}"
diff --git a/gcc/config/mips/sync.md b/gcc/config/mips/sync.md
new file mode 100644
index 000000000..b3098b434
--- /dev/null
+++ b/gcc/config/mips/sync.md
@@ -0,0 +1,560 @@
+;;  Machine Description for MIPS based processor synchronization
+;;  instructions.
+;;  Copyright (C) 2007, 2008, 2009, 2010
+;;  Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+  UNSPEC_COMPARE_AND_SWAP
+  UNSPEC_COMPARE_AND_SWAP_12
+  UNSPEC_SYNC_OLD_OP
+  UNSPEC_SYNC_NEW_OP
+  UNSPEC_SYNC_NEW_OP_12
+  UNSPEC_SYNC_OLD_OP_12
+  UNSPEC_SYNC_EXCHANGE
+  UNSPEC_SYNC_EXCHANGE_12
+  UNSPEC_MEMORY_BARRIER
+])
+
+;; Atomic fetch bitwise operations.
+(define_code_iterator fetchop_bit [ior xor and])
+
+;; Atomic HI and QI operations
+(define_code_iterator atomic_hiqi_op [plus minus ior xor and])
+
+;; Atomic memory operations.
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "GENERATE_SYNC"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "GENERATE_SYNC"
+  { return mips_output_sync (); })
+
+(define_insn "sync_compare_and_swap<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "dJ,dJ")
+			      (match_operand:GPR 3 "arith_operand" "I,d")]
+	 UNSPEC_COMPARE_AND_SWAP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "li,move")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_required_oldval" "2")
+   (set_attr "sync_insn1_op2" "3")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(match_operand:SHORT 0 "register_operand")
+   (match_operand:SHORT 1 "memory_operand")
+   (match_operand:SHORT 2 "general_operand")
+   (match_operand:SHORT 3 "general_operand")]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_6 = gen_compare_and_swap_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], operands[3]);
+  DONE;
+})
+
+;; Helper insn for mips_expand_atomic_qihi.
+(define_insn "compare_and_swap_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d,&d")
+	(match_operand:SI 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "d,d")
+			     (match_operand:SI 3 "register_operand" "d,d")
+			     (match_operand:SI 4 "reg_or_0_operand" "dJ,dJ")
+			     (match_operand:SI 5 "reg_or_0_operand" "d,J")]
+			    UNSPEC_COMPARE_AND_SWAP_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_required_oldval" "4")
+   (set_attr "sync_insn1_op2" "5")])
+
+(define_insn "sync_add<mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+R,R")
+	(unspec_volatile:GPR
+          [(plus:GPR (match_dup 0)
+		     (match_operand:GPR 1 "arith_operand" "I,d"))]
+	  UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_expand "sync_<optab><mode>"
+  [(set (match_operand:SHORT 0 "memory_operand")
+	(unspec_volatile:SHORT
+	  [(atomic_hiqi_op:SHORT (match_dup 0)
+				 (match_operand:SHORT 1 "general_operand"))]
+	  UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_4 = gen_sync_<optab>_12;
+  mips_expand_atomic_qihi (generator,
+			   NULL, operands[0], operands[1], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_<optab><mode>
+(define_insn "sync_<optab>_12"
+  [(set (match_operand:SI 0 "memory_operand" "+R")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "register_operand" "d")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (atomic_hiqi_op:SI (match_dup 0)
+			      (match_operand:SI 3 "register_operand" "dJ"))]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 4 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<insn>")
+   (set_attr "sync_insn2" "and")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_inclusive_mask" "1")
+   (set_attr "sync_exclusive_mask" "2")
+   (set_attr "sync_insn1_op2" "3")
+   (set_attr "sync_oldval" "4")
+   (set_attr "sync_newval" "4")])
+
+(define_expand "sync_old_<optab><mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (match_operand:SHORT 1 "memory_operand"))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(atomic_hiqi_op:SHORT
+				    (match_dup 1)
+				    (match_operand:SHORT 2 "general_operand"))]
+	    UNSPEC_SYNC_OLD_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_old_<optab>_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_old_<optab><mode>
+(define_insn "sync_old_<optab>_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(match_operand:SI 1 "memory_operand" "+R"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+          [(match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (atomic_hiqi_op:SI (match_dup 0)
+			      (match_operand:SI 4 "register_operand" "dJ"))]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 5 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<insn>")
+   (set_attr "sync_insn2" "and")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")
+   (set_attr "sync_newval" "5")])
+
+(define_expand "sync_new_<optab><mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (unspec_volatile:SHORT [(atomic_hiqi_op:SHORT
+				    (match_operand:SHORT 1 "memory_operand")
+				    (match_operand:SHORT 2 "general_operand"))]
+	    UNSPEC_SYNC_NEW_OP))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
+	    UNSPEC_SYNC_NEW_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_new_<optab>_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_new_<optab><mode>
+(define_insn "sync_new_<optab>_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "memory_operand" "+R")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (atomic_hiqi_op:SI (match_dup 0)
+			      (match_operand:SI 4 "register_operand" "dJ"))]
+	  UNSPEC_SYNC_NEW_OP_12))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)] UNSPEC_SYNC_NEW_OP_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<insn>")
+   (set_attr "sync_insn2" "and")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")])
+
+(define_expand "sync_nand<mode>"
+  [(set (match_operand:SHORT 0 "memory_operand")
+	(unspec_volatile:SHORT
+	  [(match_dup 0)
+	   (match_operand:SHORT 1 "general_operand")]
+	  UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_4 = gen_sync_nand_12;
+  mips_expand_atomic_qihi (generator,
+			   NULL, operands[0], operands[1], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_nand<mode>
+(define_insn "sync_nand_12"
+  [(set (match_operand:SI 0 "memory_operand" "+R")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "register_operand" "d")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (match_dup 0)
+	   (match_operand:SI 3 "register_operand" "dJ")]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 4 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "and")
+   (set_attr "sync_insn2" "xor")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_inclusive_mask" "1")
+   (set_attr "sync_exclusive_mask" "2")
+   (set_attr "sync_insn1_op2" "3")
+   (set_attr "sync_oldval" "4")
+   (set_attr "sync_newval" "4")])
+
+(define_expand "sync_old_nand<mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (match_operand:SHORT 1 "memory_operand"))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(match_dup 1)
+				  (match_operand:SHORT 2 "general_operand")]
+	    UNSPEC_SYNC_OLD_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_old_nand_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_old_nand<mode>
+(define_insn "sync_old_nand_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(match_operand:SI 1 "memory_operand" "+R"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+          [(match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (match_operand:SI 4 "register_operand" "dJ")]
+	  UNSPEC_SYNC_OLD_OP_12))
+   (clobber (match_scratch:SI 5 "=&d"))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "and")
+   (set_attr "sync_insn2" "xor")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")
+   (set_attr "sync_newval" "5")])
+
+(define_expand "sync_new_nand<mode>"
+  [(parallel [
+     (set (match_operand:SHORT 0 "register_operand")
+	  (unspec_volatile:SHORT [(match_operand:SHORT 1 "memory_operand")
+				  (match_operand:SHORT 2 "general_operand")]
+	    UNSPEC_SYNC_NEW_OP))
+     (set (match_dup 1)
+	  (unspec_volatile:SHORT [(match_dup 1) (match_dup 2)]
+	    UNSPEC_SYNC_NEW_OP))])]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_sync_new_nand_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+;; Helper insn for sync_new_nand<mode>
+(define_insn "sync_new_nand_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(unspec_volatile:SI
+          [(match_operand:SI 1 "memory_operand" "+R")
+	   (match_operand:SI 2 "register_operand" "d")
+	   (match_operand:SI 3 "register_operand" "d")
+	   (match_operand:SI 4 "register_operand" "dJ")]
+	  UNSPEC_SYNC_NEW_OP_12))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)
+	   (match_dup 4)] UNSPEC_SYNC_NEW_OP_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "and")
+   (set_attr "sync_insn2" "xor")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")])
+
+(define_insn "sync_sub<mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+R")
+	(unspec_volatile:GPR
+          [(minus:GPR (match_dup 0)
+		      (match_operand:GPR 1 "register_operand" "d"))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "subu")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_insn "sync_old_add<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(plus:GPR (match_dup 1)
+		     (match_operand:GPR 2 "arith_operand" "I,d"))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_old_sub<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+	(match_operand:GPR 1 "memory_operand" "+R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(minus:GPR (match_dup 1)
+		      (match_operand:GPR 2 "register_operand" "d"))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "subu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_add<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+        (plus:GPR (match_operand:GPR 1 "memory_operand" "+R,R")
+		  (match_operand:GPR 2 "arith_operand" "I,d")))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	  [(plus:GPR (match_dup 1) (match_dup 2))]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "addiu,addu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_sub<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+        (minus:GPR (match_operand:GPR 1 "memory_operand" "+R")
+		   (match_operand:GPR 2 "register_operand" "d")))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	  [(minus:GPR (match_dup 1) (match_dup 2))]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "subu")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_<optab><mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+R,R")
+	(unspec_volatile:GPR
+          [(fetchop_bit:GPR (match_operand:GPR 1 "uns_arith_operand" "K,d")
+			      (match_dup 0))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<immediate_insn>,<insn>")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_insn "sync_old_<optab><mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(fetchop_bit:GPR (match_operand:GPR 2 "uns_arith_operand" "K,d")
+			    (match_dup 1))]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<immediate_insn>,<insn>")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_<optab><mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+          [(fetchop_bit:GPR (match_operand:GPR 2 "uns_arith_operand" "K,d")
+			    (match_dup 1))]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "<immediate_insn>,<insn>")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_nand<mode>"
+  [(set (match_operand:GPR 0 "memory_operand" "+R,R")
+	(unspec_volatile:GPR [(match_operand:GPR 1 "uns_arith_operand" "K,d")]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "andi,and")
+   (set_attr "sync_insn2" "not")
+   (set_attr "sync_mem" "0")
+   (set_attr "sync_insn1_op2" "1")])
+
+(define_insn "sync_old_nand<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+        (unspec_volatile:GPR [(match_operand:GPR 2 "uns_arith_operand" "K,d")]
+	 UNSPEC_SYNC_OLD_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "andi,and")
+   (set_attr "sync_insn2" "not")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_new_nand<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "uns_arith_operand" "K,d")]
+	 UNSPEC_SYNC_NEW_OP))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_insn1" "andi,and")
+   (set_attr "sync_insn2" "not")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_newval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_insn "sync_lock_test_and_set<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d,&d")
+	(match_operand:GPR 1 "memory_operand" "+R,R"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR [(match_operand:GPR 2 "arith_operand" "I,d")]
+	 UNSPEC_SYNC_EXCHANGE))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_release_barrier" "no")
+   (set_attr "sync_insn1" "li,move")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   (set_attr "sync_insn1_op2" "2")])
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:SHORT 0 "register_operand")
+   (match_operand:SHORT 1 "memory_operand")
+   (match_operand:SHORT 2 "general_operand")]
+  "GENERATE_LL_SC"
+{
+  union mips_gen_fn_ptrs generator;
+  generator.fn_5 = gen_test_and_set_12;
+  mips_expand_atomic_qihi (generator,
+			   operands[0], operands[1], operands[2], NULL);
+  DONE;
+})
+
+(define_insn "test_and_set_12"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+	(match_operand:SI 1 "memory_operand" "+R"))
+   (set (match_dup 1)
+	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "d")
+			     (match_operand:SI 3 "register_operand" "d")
+			     (match_operand:SI 4 "arith_operand" "dJ")]
+	  UNSPEC_SYNC_EXCHANGE_12))]
+  "GENERATE_LL_SC"
+  { return mips_output_sync_loop (insn, operands); }
+  [(set_attr "sync_release_barrier" "no")
+   (set_attr "sync_oldval" "0")
+   (set_attr "sync_mem" "1")
+   ;; Unused, but needed to give the number of operands expected by
+   ;; the expander.
+   (set_attr "sync_inclusive_mask" "2")
+   (set_attr "sync_exclusive_mask" "3")
+   (set_attr "sync_insn1_op2" "4")])
diff --git a/gcc/config/mips/t-elf b/gcc/config/mips/t-elf
new file mode 100644
index 000000000..4ed36da65
--- /dev/null
+++ b/gcc/config/mips/t-elf
@@ -0,0 +1,44 @@
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2006,
+# 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS = -G 0
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/mips/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mips/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mips/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mips/crtn.asm
+
+# We must build libgcc2.a with -G 0, in case the user wants to link
+# without the $gp register.
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+# Build the libraries for both hard and soft floating point
+
+MULTILIB_OPTIONS = msoft-float EL/EB
+MULTILIB_DIRNAMES = soft-float el eb
+MULTILIB_MATCHES = EL=mel EB=meb msingle-float=m4650
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/mips/t-iris b/gcc/config/mips/t-iris
new file mode 100644
index 000000000..a200cf8c1
--- /dev/null
+++ b/gcc/config/mips/t-iris
@@ -0,0 +1,9 @@
+$(T)irix-crti.o: $(srcdir)/config/mips/irix-crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $@ -x assembler-with-cpp $<
+
+$(T)irix-crtn.o: $(srcdir)/config/mips/irix-crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $@ -x assembler-with-cpp $<
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o irix-crti.o irix-crtn.o
diff --git a/gcc/config/mips/t-iris6 b/gcc/config/mips/t-iris6
new file mode 100644
index 000000000..49f16d7fc
--- /dev/null
+++ b/gcc/config/mips/t-iris6
@@ -0,0 +1,38 @@
+# Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005,
+# 2006, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS=mabi=n32/mabi=64
+MULTILIB_DIRNAMES=n32 64
+MULTILIB_MATCHES=
+MULTILIB_OSDIRNAMES=../lib32 ../lib64
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+TPBIT = tp-bit.c
+
+tp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __MIPSEL__' > tp-bit.c
+	echo '# define FLOAT_BIT_ORDER_MISMATCH' >> tp-bit.c
+	echo '#endif' >> tp-bit.c
+	echo '#define QUIET_NAN_NEGATED' >> tp-bit.c
+	echo '#if __LDBL_MANT_DIG__ == 106' >> tp-bit.c
+	echo '# define TFLOAT' >> tp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> tp-bit.c
+	echo '#endif' >> tp-bit.c
diff --git a/gcc/config/mips/t-isa3264 b/gcc/config/mips/t-isa3264
new file mode 100644
index 000000000..f6dce3255
--- /dev/null
+++ b/gcc/config/mips/t-isa3264
@@ -0,0 +1,54 @@
+# Copyright (C) 2001, 2002, 2003, 2004, 2007,
+# 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS = -G 0
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/mips/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mips/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mips/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mips/crtn.asm
+
+# We must build libgcc2.a with -G 0, in case the user wants to link
+# without the $gp register.
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+# Build the libraries for both hard and soft floating point
+
+ifneq ($(filter MIPS_ABI_DEFAULT=ABI_EABI,$(tm_defines)),)
+MULTILIB_OPTIONS = msoft-float EL/EB mips32/mips32r2/mips64/mips64r2
+MULTILIB_DIRNAMES = soft-float el eb mips32 mips32r2 mips64 mips64r2
+else
+MULTILIB_OPTIONS = msoft-float/mfp64 EL/EB mips32/mips32r2/mips64/mips64r2
+MULTILIB_DIRNAMES = soft-float fp64 el eb mips32 mips32r2 mips64 mips64r2
+ifneq ($(filter MIPS_ISA_DEFAULT=33,$(tm_defines)),)
+MULTILIB_EXCLUSIONS = mips32/mfp64 mips64/mfp64 mips64r2/mfp64
+else
+MULTILIB_EXCLUSIONS = !mips32r2/mfp64
+endif
+endif
+MULTILIB_MATCHES = EL=mel EB=meb
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/mips/t-libgcc-mips16 b/gcc/config/mips/t-libgcc-mips16
new file mode 100644
index 000000000..fa3949a20
--- /dev/null
+++ b/gcc/config/mips/t-libgcc-mips16
@@ -0,0 +1,45 @@
+# Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = mips/mips16.S
+LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \
+	_m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \
+	_m16unordsf2 \
+	_m16fltsisf _m16fix_truncsfsi _m16fltunsisf \
+	_m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \
+	_m16extsfdf2 _m16trdfsf2 \
+	_m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \
+	_m16unorddf2 \
+	_m16fltsidf _m16fix_truncdfsi _m16fltunsidf \
+	_m16retsf _m16retdf \
+	_m16retsc _m16retdc \
+	_m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \
+	_m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \
+	_m16stubsf9 _m16stubsf10 \
+	_m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \
+	_m16stubdf9 _m16stubdf10 \
+	_m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \
+	_m16stubsc9 _m16stubsc10 \
+	_m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \
+	_m16stubdc9 _m16stubdc10
+
+LIBGCC_SYNC = yes
+LIBGCC_SYNC_CFLAGS = -mno-mips16
+
+# Version these symbols if building libgcc.so.
+SHLIB_MAPFILES += $(srcdir)/config/mips/libgcc-mips16.ver
diff --git a/gcc/config/mips/t-linux64 b/gcc/config/mips/t-linux64
new file mode 100644
index 000000000..7915f4d7d
--- /dev/null
+++ b/gcc/config/mips/t-linux64
@@ -0,0 +1,40 @@
+# Copyright (C) 2003, 2004, 2005, 2006, 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = mabi=n32/mabi=32/mabi=64
+MULTILIB_DIRNAMES = n32 32 64
+MIPS_EL = $(if $(filter %el, $(firstword $(subst -, ,$(target)))),el)
+MIPS_SOFT = $(if $(strip $(filter MASK_SOFT_FLOAT_ABI, $(target_cpu_default)) $(filter soft, $(with_float))),soft)
+MULTILIB_OSDIRNAMES = \
+	../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \
+	../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \
+	../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT))
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+
+TPBIT = tp-bit.c
+
+tp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __MIPSEL__' > tp-bit.c
+	echo '# define FLOAT_BIT_ORDER_MISMATCH' >> tp-bit.c
+	echo '#endif' >> tp-bit.c
+	echo '#if __LDBL_MANT_DIG__ == 113' >> tp-bit.c
+	echo '#define QUIET_NAN_NEGATED' >> tp-bit.c
+	echo '# define TFLOAT' >> tp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> tp-bit.c
+	echo '#endif' >> tp-bit.c
diff --git a/gcc/config/mips/t-mips b/gcc/config/mips/t-mips
new file mode 100644
index 000000000..d412da571
--- /dev/null
+++ b/gcc/config/mips/t-mips
@@ -0,0 +1,41 @@
+# Copyright (C) 2002, 2003, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# fp-bit and dp-bit are really part of libgcc1, but this will cause
+# them to be built correctly, so... [taken from t-sparclite]
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __MIPSEL__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	echo '#define QUIET_NAN_NEGATED' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __MIPSEL__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	echo '#define QUIET_NAN_NEGATED' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+LIB2_SIDITI_CONV_FUNCS=yes
diff --git a/gcc/config/mips/t-r3900 b/gcc/config/mips/t-r3900
new file mode 100644
index 000000000..2c4216399
--- /dev/null
+++ b/gcc/config/mips/t-r3900
@@ -0,0 +1,35 @@
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+# 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We must build libgcc2.a with -G 0, in case the user wants to link
+# without the $gp register.
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS = -G 0
+
+# Build the libraries for both hard and soft floating point
+
+MULTILIB_OPTIONS = msoft-float EL/EB
+MULTILIB_DIRNAMES = soft-float el eb
+MULTILIB_MATCHES = EL=mel EB=meb
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/mips/t-rtems b/gcc/config/mips/t-rtems
new file mode 100644
index 000000000..a2a185725
--- /dev/null
+++ b/gcc/config/mips/t-rtems
@@ -0,0 +1,34 @@
+# Custom multilibs for RTEMS
+#
+# Copyright (C) 2003, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# default is mips1 EB hard-float
+MULTILIB_OPTIONS = mips1/mips3/mips32 EB/EL msoft-float
+MULTILIB_DIRNAMES = mips1 mips3 mips32 eb el soft-float
+MULTILIB_MATCHES = EL=mel EB=meb
+
+MULTILIB_EXCEPTIONS =
+
+# Big endian only
+MULTILIB_EXCEPTIONS += EL*
+MULTILIB_EXCEPTIONS += mips32/EL*
+
+# Little endian only
+MULTILIB_EXCEPTIONS += mips3
+MULTILIB_EXCEPTIONS += mips3/msoft-float
diff --git a/gcc/config/mips/t-sb1 b/gcc/config/mips/t-sb1
new file mode 100644
index 000000000..6c4acb320
--- /dev/null
+++ b/gcc/config/mips/t-sb1
@@ -0,0 +1,62 @@
+# Copyright (C) 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GP-rel: G0 only
+#
+# Endianness: EB or EL
+#
+# ABIs: mabi=32
+#	mabi=o64
+#	mabi=o64/mlong64
+#
+# FPU: (default mhard-float)
+#      msoft-float (only for mabi=32)
+#
+
+MULTILIB_EXTRA_OPTS = G0
+
+MULTILIB_OPTIONS =			\
+	EB/EL				\
+	mabi=32/mabi=o64		\
+	mips32/mips64                   \
+	mlong64/msoft-float		\
+
+MULTILIB_DIRNAMES =			\
+	eb el				\
+	o32 o64				\
+	mips32 mips64			\
+	long64 soft-float		\
+
+MULTILIB_MATCHES =			\
+	EB=meb EL=mel			\
+
+MULTILIB_EXCEPTIONS =			\
+        *mabi=32/*mlong64*		\
+
+MULTILIB_EXCLUSIONS =			\
+	mips32/!mabi=32                 \
+	mabi=32/!mips32			\
+	msoft-float/!mabi=32		\
+
+# Small multilib list for quick builds and tests.
+# Must either comment out everything above these lines, or everything below
+# these lines.
+
+#MULTILIB_OPTIONS = EB/EL msoft-float
+#MULTILIB_DIRNAMES = eb el soft-float
+#MULTILIB_MATCHES = EB=meb EL=mel
diff --git a/gcc/config/mips/t-sde b/gcc/config/mips/t-sde
new file mode 100644
index 000000000..0fa2277d5
--- /dev/null
+++ b/gcc/config/mips/t-sde
@@ -0,0 +1,53 @@
+# Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS = -G 0
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/mips/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mips/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mips/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mips/crtn.asm
+
+MULTILIB_OPTIONS = EL/EB mips32/mips32r2/mips64/mips64r2 mips16 msoft-float/mfp64 mcode-readable=no
+MULTILIB_DIRNAMES = el eb mips32 mips32r2 mips64 mips64r2 mips16 sof f64 spram
+MULTILIB_MATCHES = EL=mel EB=meb
+
+# The -mfp64 option is only valid in conjunction with -mips32r2.
+ifneq ($(filter MIPS_ISA_DEFAULT=33,$(tm_defines)),)
+MULTILIB_EXCLUSIONS := mips32/mfp64 mips64/mfp64 mips64r2/mfp64
+else
+MULTILIB_EXCLUSIONS := !mips32r2/mfp64
+endif
+
+# Don't build 64-bit MIPS16 multilibs.
+ifneq ($(filter MIPS_ISA_DEFAULT=6%,$(tm_defines)),)
+MULTILIB_EXCLUSIONS += !mips32/!mips32r2/mips16
+else
+MULTILIB_EXCLUSIONS += mips64/mips16 mips64r2/mips16
+endif
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+# Build the multilibs.
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/mips/t-sdemtk b/gcc/config/mips/t-sdemtk
new file mode 100644
index 000000000..40a8294fd
--- /dev/null
+++ b/gcc/config/mips/t-sdemtk
@@ -0,0 +1,44 @@
+# Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Override newlib settings in t-sde and set up for building
+# against SDE header files and libraries.
+
+MULTILIB_OPTIONS = EL/EB mips32/mips32r2/mips64/mips64r2 mips16 msoft-float/mno-float/mfp64
+MULTILIB_DIRNAMES = el eb mips32 mips32r2 mips64 mips64r2 mips16 sof nof f64
+
+# Remove stdarg.h and stddef.h from USER_H.
+USER_H = $(srcdir)/ginclude/float.h \
+         $(srcdir)/ginclude/iso646.h \
+         $(srcdir)/ginclude/stdbool.h \
+         $(srcdir)/ginclude/varargs.h \
+         $(EXTRA_HEADERS)
+
+# Don't run fixinclude
+STMP_FIXINC = stmp-sdefixinc
+stmp-sdefixinc: gsyslimits.h
+	rm -rf include; mkdir include
+	chmod a+rx include
+	rm -f include/syslimits.h
+	cp $(srcdir)/gsyslimits.h include/syslimits.h
+	chmod a+r include/syslimits.h
+	$(STAMP) stmp-sdefixinc
+
+# Don't build FPBIT and DPBIT; we'll be using the SDE soft-float library.
+FPBIT =
+DPBIT =
diff --git a/gcc/config/mips/t-slibgcc-irix b/gcc/config/mips/t-slibgcc-irix
new file mode 100644
index 000000000..6227ccef3
--- /dev/null
+++ b/gcc/config/mips/t-slibgcc-irix
@@ -0,0 +1,52 @@
+# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build a shared libgcc library.
+
+SHLIB_EXT = .so
+SHLIB_SOLINK = @shlib_base_name@.so
+SHLIB_SOVERSION = 1
+SHLIB_SONAME = @shlib_base_name@.so.$(SHLIB_SOVERSION)
+SHLIB_MAP = @shlib_map_file@
+SHLIB_OBJS = @shlib_objs@
+SHLIB_DIR = @multilib_dir@
+SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@
+SHLIB_LC = -lc
+
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,-soname,$(SHLIB_SONAME) \
+	-o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
+	$(SHLIB_OBJS) $(SHLIB_LC) && \
+	rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIB_SONAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIB_SONAME) \
+		$(SHLIB_DIR)/$(SHLIB_SONAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIB_SONAME).tmp $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+	$(LN_S) $(SHLIB_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	$(LN_S) $(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/mips/t-sr71k b/gcc/config/mips/t-sr71k
new file mode 100644
index 000000000..5eb96a12e
--- /dev/null
+++ b/gcc/config/mips/t-sr71k
@@ -0,0 +1,67 @@
+# Copyright (C) 2002, 2003, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Suppress building libgcc1.a, since the MIPS compiler port is complete
+# and does not need anything from libgcc1.a.
+LIBGCC1 =
+CROSS_LIBGCC1 =
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS = -G 0
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/mips/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mips/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mips/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mips/crtn.asm
+
+# We must build libgcc2.a with -G 0, in case the user wants to link
+# without the $gp register.
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+# fp-bit and dp-bit are really part of libgcc1, but this will cause
+# them to be built correctly, so... [taken from t-sparclite]
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __MIPSEL__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
+	echo '#endif' >> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __MIPSEL__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
+	echo '#endif' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Build the libraries for both hard and soft floating point
+
+MULTILIB_OPTIONS = EL/EB msoft-float mips2
+MULTILIB_DIRNAMES = el eb soft-float mips2
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/mips/t-st b/gcc/config/mips/t-st
new file mode 100644
index 000000000..83115f6fd
--- /dev/null
+++ b/gcc/config/mips/t-st
@@ -0,0 +1,32 @@
+# Copyright (C) 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS = march=loongson2e/march=loongson2f mabi=n32/mabi=32/mabi=64 
+MULTILIB_DIRNAMES = 2e 2f lib32 lib lib64
+
+MULTILIB_OSDIRNAMES  = march.loongson2e/mabi.n32=../lib32/2e
+MULTILIB_OSDIRNAMES += march.loongson2e/mabi.32=../lib/2e
+MULTILIB_OSDIRNAMES += march.loongson2e/mabi.64=../lib64/2e
+MULTILIB_OSDIRNAMES += march.loongson2f/mabi.n32=../lib32/2f
+MULTILIB_OSDIRNAMES += march.loongson2f/mabi.32=../lib/2f
+MULTILIB_OSDIRNAMES += march.loongson2f/mabi.64=../lib64/2f
+MULTILIB_OSDIRNAMES += mabi.n32=../lib32
+MULTILIB_OSDIRNAMES += mabi.32=../lib
+MULTILIB_OSDIRNAMES += mabi.64=../lib64
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
diff --git a/gcc/config/mips/t-vr b/gcc/config/mips/t-vr
new file mode 100644
index 000000000..81efef9b8
--- /dev/null
+++ b/gcc/config/mips/t-vr
@@ -0,0 +1,130 @@
+# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# BEGIN boiler-plate MIPS stuff
+
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS = -G 0
+
+# We must build libgcc2.a with -G 0, in case the user wants to link
+# without the $gp register.
+TARGET_LIBGCC2_CFLAGS = -G 0
+
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/mips/mips16.S \
+			 $(srcdir)/config/mips/vr4120-div.S
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/mips/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/mips/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/mips/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/mips/crtn.asm
+
+# END boiler-plate
+
+# Main multilibs
+# --------------
+#
+# Endianness: EB or EL
+#
+# ABIs: mabi=32
+#	mabi=o64
+#	mabi=eabi
+#	mabi=eabi/mlong32
+#	mabi=eabi/mgp32
+#	mabi=eabi/mgp32/mlong64
+#
+# Architecture: march=vr4120 with -mfix-vr4120
+#		march=vr4130 with -mfix-vr4130 (default)
+#		march=vr5000
+#		march=vr5400
+#		march=vr5500
+#
+# Total: 2 * 6 * 5 = 60 multilibs.
+#
+#
+# Extra vr4300 multilibs
+# ----------------------
+#
+# Endianness: EB or EL
+#
+# ABI: o64
+#
+# Architecture: vr4300.
+#
+# Total: 2 * 1 * 2 = 2 multilibs.
+#
+#
+# Extra MIPS16 multilibs
+# ----------------------
+#
+# Endianness: EB or EL
+#
+# ABIs: mabi=o64
+#	mabi=eabi/mlong32
+#	mabi=eabi/mgp32
+#
+# Architecture: march=vr4120 with -mfix-vr4120
+#		march=vr4130 with -mfix-vr4130 (default)
+#
+# Total: 2 * 3 * 2 = 12 multilibs.
+MULTILIB_OPTIONS =			\
+	EL/EB				\
+	mabi=32/mabi=o64/mabi=eabi	\
+	mgp32				\
+	mlong64				\
+	mips16				\
+	mfix-vr4120/mfix-vr4130/march=vr4300/march=vr5000/march=vr5400/march=vr5500
+
+MULTILIB_DIRNAMES =	\
+	el eb		\
+	o32 o64 eabi	\
+	gp32		\
+	long64		\
+	mips16		\
+	vr4120 vr4130 vr4300 vr5000 vr5400 vr5500
+
+MULTILIB_MATCHES = EL=mel EB=meb mfix-vr4120=march?vr4120 \
+		   mfix-vr4130=march?vr4130
+
+# Assume a 41xx-series is the default: we'd need a *mips16 entry if
+# the default processor didn't support mips16.  Also assume the
+# default ABI is EABI64 -mlong32.
+MULTILIB_EXCEPTIONS =				\
+	*mabi=32/mlong64*			\
+	*mabi=32/mgp32*				\
+	*mabi=o64/mgp32*			\
+	*mabi=o64/mlong64*			\
+	*mips16/march=vr5*			\
+	*mips16/march=vr4300			\
+	$(MIPS16_EXCEPTIONS)			\
+	$(VR4300_EXCEPTIONS)
+
+MIPS16_EXCEPTIONS =				\
+	*mabi=32*mips16*			\
+	*mlong64*mips16*
+
+VR4300_EXCEPTIONS =				\
+	*mabi=32*march=vr4300			\
+	*mgp32*march=vr4300			\
+	*mlong64*march=vr4300			\
+	march=vr4300				\
+	E[LB]/march=vr4300
diff --git a/gcc/config/mips/t-vxworks b/gcc/config/mips/t-vxworks
new file mode 100644
index 000000000..ac2fa9d9f
--- /dev/null
+++ b/gcc/config/mips/t-vxworks
@@ -0,0 +1,35 @@
+# Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Multilibs for VxWorks.  We want these 8 architecture combinations:
+#
+#    {-mips2,-mips3} x {-EB,-EL} x {-mhard-float,-msoft-float}
+#
+# where the first option in each group is the default.  The -mips2
+# multilibs use o32 and the -mips3 multilibs use o64.
+#
+# We want three multilibs for each architecture combination:
+# default (kernel mode), -mrtp and -mrtp/-fPIC.
+MULTILIB_OPTIONS = mabi=o64 mips3 EL msoft-float mrtp fPIC
+MULTILIB_DIRNAMES = o64 mips3 EL msoft-float mrtp pic
+MULTILIB_MATCHES = EL=mel fPIC=fpic
+MULTILIB_EXCEPTIONS = mips3* mabi=o64 fPIC \
+		      $(addprefix mabi=o64/, EL* msoft-float* mrtp* fPIC*) \
+		      $(addsuffix /fPIC, *mabi=o64 *mips3 *EL *msoft-float)
+
+MUTLILIB_EXTRA_OPTS = -G 0 -mno-branch-likely
diff --git a/gcc/config/mips/vr.h b/gcc/config/mips/vr.h
new file mode 100644
index 000000000..3a89e69ce
--- /dev/null
+++ b/gcc/config/mips/vr.h
@@ -0,0 +1,58 @@
+/* Definitions of target machine for GNU compiler.
+   NEC VR Series Processors
+   Copyright (c) 2002, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define DEFAULT_VR_ARCH "mfix-vr4130"
+#undef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS \
+	{ MULTILIB_ENDIAN_DEFAULT,		\
+	  MULTILIB_ABI_DEFAULT,			\
+	  DEFAULT_VR_ARCH }
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS \
+	/* Enforce the default architecture.  This is mostly for	\
+	   the assembler's benefit.  */					\
+	"%{!march=*:%{!mfix-vr4120:%{!mfix-vr4130:"			\
+	"-" DEFAULT_VR_ARCH "}}}",					\
+									\
+	/* Make -mfix-vr4120 imply -march=vr4120.  This cuts down	\
+	   on command-line tautology and makes it easier for t-vr to	\
+	   provide a -mfix-vr4120 multilib.  */				\
+	"%{mfix-vr4120:%{!march=*:-march=vr4120}}",			\
+									\
+	/* Same idea for -mfix-vr4130.  */				\
+	"%{mfix-vr4130:%{!march=*:-march=vr4130}}",			\
+									\
+	/* Infer the default float setting from -march.  */		\
+	MIPS_ARCH_FLOAT_SPEC,						\
+									\
+	/* Make -mabi=eabi -mlong32 the default.  */			\
+	"%{!mabi=*:-mabi=eabi %{!mlong*:-mlong32}}",			\
+									\
+	/* Make sure -mlong64 multilibs are chosen when	64-bit longs	\
+	   are needed.  */						\
+	"%{mabi=eabi:%{!mlong*:%{!mgp32:-mlong64}}}",			\
+									\
+	/* Remove -mgp32 if it is redundant.  */			\
+	"%{mabi=32:%<mgp32}",						\
+									\
+	/* Configuration-independent MIPS rules.  */			\
+	BASE_DRIVER_SELF_SPECS
diff --git a/gcc/config/mips/vr4120-div.S b/gcc/config/mips/vr4120-div.S
new file mode 100644
index 000000000..79ede3de9
--- /dev/null
+++ b/gcc/config/mips/vr4120-div.S
@@ -0,0 +1,74 @@
+/* Support file for -mfix-vr4120.
+   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+	along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file contains functions which implement divsi3 and modsi3 for
+   -mfix-vr4120.  div and ddiv do not give the correct result when one
+   of the operands is negative.  */
+
+	.set	nomips16
+
+#define DIV								\
+	xor	$3,$4,$5	/* t = x ^ y */ ;			\
+	li	$2,0x80000000;						\
+	.set	noreorder;						\
+	bgez	$4,1f		/* x >= 0 */; 				\
+	and	$3,$3,$2	/* t = (x ^ y) & 0x80000000 in delay slot */ ;\
+	.set	reorder;						\
+	subu	$4,$0,$4	/* x = -x */ ;				\
+1:; 									\
+	.set	noreorder;						\
+	bgez	$5,2f		/* y >= 0 */ ;				\
+	nop;								\
+	subu	$5,$0,$5	/* y = -y */ ;				\
+	.set	reorder;						\
+2:;									\
+	divu	$0,$4,$5;	/* we use divu because of INT_MIN */	\
+	.set	noreorder;						\
+	bne	$5,$0,3f;						\
+	nop;								\
+	break	7		/* division on zero y */ ;		\
+3:;									\
+	.set	reorder;						\
+	mflo	$2		/* r = x / y */ ;			\
+	.set	noreorder;						\
+	beq	$3,$0,4f	/* t == 0 */ ;				\
+	nop;								\
+	subu	$2,$0,$2	/* r = -r */ ;				\
+	.set	reorder;						\
+4:
+
+	.globl	__vr4120_divsi3
+	.ent	__vr4120_divsi3
+__vr4120_divsi3:
+	DIV
+	j	$31
+	.end	__vr4120_divsi3
+
+	.globl	__vr4120_modsi3
+	.ent	__vr4120_modsi3
+__vr4120_modsi3:
+	move	$6,$4		# x1 = x
+	move	$7,$5		# y1 = y
+	DIV
+	mult	$2,$7		# r = r * y1
+	mflo	$2
+	.set	noreorder
+	j	$31
+	subu	$2,$6,$2	# r = x1 - r  in delay slot
+	.end	__vr4120_modsi3
diff --git a/gcc/config/mips/vxworks.h b/gcc/config/mips/vxworks.h
new file mode 100644
index 000000000..f6c84b9a6
--- /dev/null
+++ b/gcc/config/mips/vxworks.h
@@ -0,0 +1,82 @@
+/* Copyright (C) 1999, 2003, 2004, 2007, 2008, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (MIPS, VxWorks syntax)");
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{!G:-G 0} %{G*} %(endian_spec) %{mips1} %{mips2} %{mips3} %{mips4} \
+%{mips32} %{mips32r2} %{mips64} \
+%{mips16:%{!mno-mips16:-mips16}} %{mno-mips16:-no-mips16} \
+%(subtarget_asm_optimizing_spec) \
+%(subtarget_asm_debugging_spec) \
+%{mabi=*} %{!mabi*: %(asm_abi_default_spec)} \
+%{mgp32} %{mgp64} %{march=*} %{mxgot:-xgot} \
+%{mtune=*} \
+%(subtarget_asm_spec)"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%(endian_spec) \
+%{!G:-G 0} %{G*} %{mips1} %{mips2} %{mips3} %{mips4} %{mips32} %{mips64} " \
+VXWORKS_LINK_SPEC
+
+#undef  LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#define TARGET_OS_CPP_BUILTINS()                        \
+  do                                                    \
+    {                                                   \
+      if (TARGET_64BIT)					\
+	builtin_define ("CPU=MIPS64");			\
+      else						\
+	builtin_define ("CPU=MIPS32");			\
+      if (TARGET_BIG_ENDIAN)				\
+	builtin_define ("MIPSEB");			\
+      else						\
+	builtin_define ("MIPSEL");			\
+      if (TARGET_SOFT_FLOAT)				\
+	builtin_define ("SOFT_FLOAT");			\
+      VXWORKS_OS_CPP_BUILTINS ();			\
+    }                                                   \
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+/* No sdata.  */
+#undef MIPS_DEFAULT_GVALUE
+#define MIPS_DEFAULT_GVALUE 0
+
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{mrtp:%{fPIC|fpic:-mvxworks-pic}}"
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+#undef DBX_REGISTER_NUMBER
diff --git a/gcc/config/mips/x-native b/gcc/config/mips/x-native
new file mode 100644
index 000000000..5e31121ed
--- /dev/null
+++ b/gcc/config/mips/x-native
@@ -0,0 +1,3 @@
+driver-native.o : $(srcdir)/config/mips/driver-native.c \
+  $(CONFIG_H) $(SYSTEM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/mips/xlr.md b/gcc/config/mips/xlr.md
new file mode 100644
index 000000000..69913b7b2
--- /dev/null
+++ b/gcc/config/mips/xlr.md
@@ -0,0 +1,89 @@
+;; DFA-based pipeline description for the XLR.
+;;   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;;
+;; xlr.md   Machine Description for the RMI XLR Microprocessor
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "xlr_main,xlr_muldiv")
+
+;; Definitions for xlr_main automaton.
+(define_cpu_unit "xlr_main_pipe" "xlr_main")
+
+(define_insn_reservation "ir_xlr_alu_slt" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "slt"))
+  "xlr_main_pipe")
+
+;; Integer arithmetic instructions.
+(define_insn_reservation "ir_xlr_alu" 1
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "move,arith,shift,clz,logical,signext,const,unknown,multi,nop,trap"))
+  "xlr_main_pipe")
+
+;; Integer arithmetic instructions.
+(define_insn_reservation "ir_xlr_condmove" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "condmove"))
+  "xlr_main_pipe")
+
+;; Load/store instructions.
+(define_insn_reservation "ir_xlr_load" 4
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "load"))
+  "xlr_main_pipe")
+
+(define_insn_reservation "ir_xlr_store" 1
+  (and  (eq_attr "cpu" "xlr") 
+        (eq_attr "type" "store"))
+  "xlr_main_pipe")
+
+(define_insn_reservation "ir_xlr_prefetch_x" 1
+  (and (eq_attr "cpu" "xlr")
+       (eq_attr "type" "prefetch,prefetchx"))
+  "xlr_main_pipe")
+
+;; Branch instructions - use branch misprediction latency.
+(define_insn_reservation "ir_xlr_branch" 1
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "branch,jump,call"))
+  "xlr_main_pipe")
+
+;; Coprocessor move instructions.
+(define_insn_reservation "ir_xlr_xfer" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "mtc,mfc"))
+  "xlr_main_pipe")
+
+(define_bypass 5 "ir_xlr_xfer" "ir_xlr_xfer")
+
+;; Definitions for the xlr_muldiv automaton.
+(define_cpu_unit "xlr_imuldiv_nopipe" "xlr_muldiv")
+
+(define_insn_reservation "ir_xlr_imul" 8
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "imul,imul3,imadd"))
+  "xlr_main_pipe,xlr_imuldiv_nopipe*6")
+
+(define_insn_reservation "ir_xlr_div" 68
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "idiv"))
+  "xlr_main_pipe,xlr_imuldiv_nopipe*67")
+
+(define_insn_reservation "xlr_hilo" 2
+  (and (eq_attr "cpu" "xlr") 
+       (eq_attr "type" "mfhilo,mthilo"))
+  "xlr_imuldiv_nopipe")
diff --git a/gcc/config/mmix/crti.asm b/gcc/config/mmix/crti.asm
new file mode 100644
index 000000000..f5f4c5d68
--- /dev/null
+++ b/gcc/config/mmix/crti.asm
@@ -0,0 +1,116 @@
+/* Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson <hp@bitrange.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+% This is the crt0 equivalent for mmix-knuth-mmixware, for setting up
+% things for compiler-generated assembly-code and for setting up things
+% between where the simulator calls and main, and shutting things down on
+% the way back.  There's an actual crt0.o elsewhere, but that's a dummy.
+
+% This file and the GCC output are supposed to be *reasonably*
+% mmixal-compatible to enable people to re-use output with Knuth's mmixal.
+% However, forward references are used more freely: we are using the
+% binutils tools.  Users of mmixal beware; you will sometimes have to
+% re-order things or use temporary variables.
+
+% Users of mmixal will want to set up 8H and 9H to be .text and .data
+% respectively, so the compiler can switch between them pretending they're
+% segments.
+
+% This little treasure is here so the 32 lowest address bits of user data
+% will not be zero.  Because of truncation, that would cause testcase
+% gcc.c-torture/execute/980701-1.c to incorrectly fail.
+
+	.data	! mmixal:= 8H LOC Data_Segment
+	.p2align 3
+	LOC @+(8-@)@7
+	OCTA 2009
+
+	.text	! mmixal:= 9H LOC 8B; LOC #100
+	.global Main
+
+% The __Stack_start symbol is provided by the link script.
+stackpp	OCTA __Stack_start
+
+% "Main" is the magic symbol the simulator jumps to.  We want to go
+% on to "main".
+% We need to set rG explicitly to avoid hard-to-debug situations.
+Main	SETL	$255,32
+	PUT	rG,$255
+
+% Initialize the stack pointer.  It is supposedly made a global
+% zero-initialized (allowed to change) register in crtn.asm; we use the
+% explicit number.
+	GETA	$255,stackpp
+	LDOU	$254,$255,0
+
+% Make sure we get more than one mem, to simplify counting cycles.
+	LDBU	$255,$1,0
+	LDBU	$255,$1,1
+
+	PUSHJ	$2,_init
+
+#ifdef __MMIX_ABI_GNU__
+% Copy argc and argv from their initial position to argument registers
+% where necessary.
+	SET	$231,$0
+	SET	$232,$1
+#else
+% For the mmixware ABI, we need to move arguments.  The return value will
+% appear in $0.
+	SET	$2,$1
+	SET	$1,$0
+#endif
+
+	PUSHJ	$0,main
+	JMP	exit
+
+% Provide the first part of _init and _fini.  Save the return address on the
+% register stack.  We eventually ignore the return address of these
+% PUSHJ:s, so it doesn't matter that whether .init and .fini code calls
+% functions or where they store rJ.  We shouldn't get there, so die
+% (TRAP Halt) if that happens.
+
+	.section .init,"ax",@progbits
+	.global	_init
+_init:
+	GET	$0,:rJ
+	PUSHJ	$1,0F
+	SETL	$255,255
+	TRAP	0,0,0
+0H	IS	@
+
+% Register _fini to be executed as the last atexit function.
+#ifdef __MMIX_ABI_GNU__
+	GETA	$231,_fini
+#else
+	GETA	$1,_fini
+#endif
+	PUSHJ	$0,atexit
+
+	.section .fini,"ax",@progbits
+	.global	_fini
+_fini:
+	GET	$0,:rJ
+	PUSHJ	$1,0F
+	SETL	$255,255
+	TRAP	0,0,0
+0H	IS	@
diff --git a/gcc/config/mmix/crtn.asm b/gcc/config/mmix/crtn.asm
new file mode 100644
index 000000000..c109e54db
--- /dev/null
+++ b/gcc/config/mmix/crtn.asm
@@ -0,0 +1,87 @@
+/* Copyright (C) 2001, 2002, 2009 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson <hp@bitrange.com>
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+% This must be the last file on the link-line, allocating global registers
+% from the top.
+
+% Register $254 is the stack-pointer.
+sp GREG
+
+% Register $253 is frame-pointer.  It's not supposed to be used in most
+% functions.
+fp GREG
+
+% $252 is the static chain register; nested functions receive the
+% context of the surrounding function through a pointer passed in this
+% register.
+static_chain GREG
+struct_value_reg GREG
+
+% These registers are used to pass state at an exceptional return (C++).
+eh_state_3 GREG
+eh_state_2 GREG
+eh_state_1 GREG
+eh_state_0 GREG
+
+#ifdef __MMIX_ABI_GNU__
+
+% Allocate global registers used by the GNU ABI.
+gnu_parm_reg_16 GREG
+gnu_parm_reg_15 GREG
+gnu_parm_reg_14 GREG
+gnu_parm_reg_13 GREG
+gnu_parm_reg_12 GREG
+gnu_parm_reg_11 GREG
+gnu_parm_reg_10 GREG
+gnu_parm_reg_9 GREG
+gnu_parm_reg_8 GREG
+gnu_parm_reg_7 GREG
+gnu_parm_reg_6 GREG
+gnu_parm_reg_5 GREG
+gnu_parm_reg_4 GREG
+gnu_parm_reg_3 GREG
+gnu_parm_reg_2 GREG
+gnu_parm_reg_1 GREG
+
+#endif /* __MMIX_ABI_GNU__ */
+
+% Provide last part of _init and _fini.
+
+% The return address is stored in the topmost stored register in the
+% register-stack.  We ignore the current value in rJ.  It is probably
+% garbage because each fragment of _init and _fini may have their own idea
+% of the current stack frame, if they're cut out from a "real" function
+% like in gcc/crtstuff.c.
+
+	.section .init,"ax",@progbits
+	GETA	$255,0F
+	PUT	rJ,$255
+	POP	0,0
+0H	PUT	rJ,$0
+	POP	0,0
+	
+	.section .fini,"ax",@progbits
+	GETA	$255,0F
+	PUT	rJ,$255
+	POP	0,0
+0H	PUT	rJ,$0
+	POP	0,0
diff --git a/gcc/config/mmix/mmix-modes.def b/gcc/config/mmix/mmix-modes.def
new file mode 100644
index 000000000..afec3e0d4
--- /dev/null
+++ b/gcc/config/mmix/mmix-modes.def
@@ -0,0 +1,49 @@
+/* Definitions of target machine for GNU compiler, for MMIX.
+   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Node: Condition Code */
+
+/* Like other non-CC0 ports, MMIX need to code which combination of
+   comparison insn and branch insn or conditional-set insn to use into the
+   condition mode.  The CC mode depends partly on which condition is used
+   and partly on the type of the operands.  */
+
+/* The "usual" CC mode is used for a signed operands integer comparison,
+   where the CMP insn is used and the result is (integer) -1, 0 or 1 for
+   respectively a < b, a == b and a > b.  */
+
+/* The CC_UNS mode is for an unsigned operands integer comparison using
+   the CMPU insn.  Result values correspond to those in CCmode.  */
+CC_MODE (CC_UNS);
+
+/* The CC_FP mode is for a non-equality floating-point comparison, using
+   the FCMP or FCMPE insn.  The result is (integer) -1 or 1 for
+   respectively a < b and a > b, otherwise 0.  */
+CC_MODE (CC_FP);
+
+/* The CC_FPEQ mode is for an equality floating-point comparison, using
+   the FEQL or FEQLE insn.  The result is (integer) 1 for a == b,
+   otherwise 0 (including NaN:s).  */
+CC_MODE (CC_FPEQ);
+
+/* The CC_FUN mode is for an ordering comparison, using the FUN or FUNE
+   insn.  The result is (integer) 1 if a is unordered to b, otherwise the
+   result is 0.  */
+CC_MODE (CC_FUN);
diff --git a/gcc/config/mmix/mmix-protos.h b/gcc/config/mmix/mmix-protos.h
new file mode 100644
index 000000000..2e6abd85a
--- /dev/null
+++ b/gcc/config/mmix/mmix-protos.h
@@ -0,0 +1,97 @@
+/* Prototypes for exported functions defined in mmix.c
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void mmix_init_expanders (void);
+extern int mmix_eh_return_data_regno (int);
+extern int mmix_initial_elimination_offset (int, int);
+extern int mmix_starting_frame_offset (void);
+extern int mmix_function_arg_regno_p (int, int);
+extern void mmix_function_profiler (FILE *, int);
+extern int mmix_reversible_cc_mode (enum machine_mode);
+extern int mmix_register_move_cost
+  (enum machine_mode, enum reg_class, enum reg_class);
+extern const char *mmix_text_section_asm_op (void);
+extern const char *mmix_data_section_asm_op (void);
+extern void mmix_output_quoted_string (FILE *, const char *, int);
+extern void mmix_asm_output_source_line  (FILE *, int);
+extern void mmix_asm_output_ascii (FILE *, const char *, int);
+extern void mmix_asm_output_label (FILE *, const char *);
+extern void mmix_asm_output_internal_label (FILE *, const char *);
+extern void mmix_asm_weaken_label (FILE *, const char *);
+extern void mmix_asm_output_labelref (FILE *, const char *);
+extern void mmix_asm_output_def (FILE *, const char *, const char *);
+extern int mmix_print_operand_punct_valid_p (int);
+extern void mmix_asm_output_reg_push (FILE *, int);
+extern void mmix_asm_output_reg_pop (FILE *, int);
+extern void mmix_asm_output_skip (FILE *, int);
+extern void mmix_asm_output_align (FILE *, int);
+extern int mmix_shiftable_wyde_value (unsigned HOST_WIDEST_INT);
+extern void mmix_output_register_setting (FILE *, int, HOST_WIDEST_INT, int);
+extern int mmix_opposite_regno (int, int);
+extern int mmix_local_regno (int);
+extern unsigned mmix_dbx_register_number (unsigned);
+extern int mmix_use_simple_return (void);
+extern void mmix_make_decl_one_only (tree);
+extern int mmix_data_alignment (tree, int);
+extern int mmix_constant_alignment (tree, int);
+extern unsigned mmix_local_alignment (tree, unsigned);
+extern void mmix_asm_output_pool_prologue (FILE *, const char *, tree, int);
+extern void mmix_asm_output_aligned_common (FILE *, const char *, int, int);
+extern void mmix_asm_output_aligned_local (FILE *, const char *, int, int);
+extern void mmix_asm_declare_register_global
+  (FILE *, tree, int, const char *);
+extern void mmix_asm_output_addr_diff_elt (FILE *, rtx, int, int);
+extern void mmix_asm_output_addr_vec_elt (FILE *, int);
+extern enum reg_class mmix_preferred_reload_class (rtx, enum reg_class);
+extern enum reg_class mmix_preferred_output_reload_class
+  (rtx, enum reg_class);
+extern enum reg_class mmix_secondary_reload_class
+  (enum reg_class, enum machine_mode, rtx, int);
+extern int mmix_const_ok_for_letter_p (HOST_WIDE_INT, int);
+extern int mmix_const_double_ok_for_letter_p (rtx, int);
+extern int mmix_extra_constraint (rtx, int, int);
+extern rtx mmix_dynamic_chain_address (rtx);
+extern rtx mmix_return_addr_rtx (int, rtx);
+extern rtx mmix_eh_return_stackadj_rtx (void);
+extern rtx mmix_eh_return_handler_rtx (void);
+extern int mmix_constant_address_p (rtx);
+extern int mmix_legitimate_constant_p (rtx);
+extern void mmix_print_operand (FILE *, rtx, int);
+extern void mmix_print_operand_address (FILE *, rtx);
+extern void mmix_expand_prologue (void);
+extern void mmix_expand_epilogue (void);
+extern rtx mmix_get_hard_reg_initial_val (enum machine_mode, int);
+extern int mmix_asm_preferred_eh_data_format (int, int);
+extern void mmix_setup_frame_addresses (void);
+
+#ifdef RTX_CODE
+/* Needs to be ifdef:d for sake of enum rtx_code.  */
+extern enum machine_mode mmix_select_cc_mode (enum rtx_code, rtx, rtx);
+extern void mmix_canonicalize_comparison (enum rtx_code *, rtx *, rtx *);
+extern rtx mmix_gen_compare_reg (enum rtx_code, rtx, rtx);
+#endif
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
new file mode 100644
index 000000000..c96fdcb66
--- /dev/null
+++ b/gcc/config/mmix/mmix.c
@@ -0,0 +1,2838 @@
+/* Definitions of target machine for GNU compiler, for MMIX.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
+   2010
+   Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "hashtab.h"
+#include "insn-config.h"
+#include "output.h"
+#include "basic-block.h"
+#include "flags.h"
+#include "tree.h"
+#include "function.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "ggc.h"
+#include "dwarf2.h"
+#include "debug.h"
+#include "tm_p.h"
+#include "integrate.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* First some local helper definitions.  */
+#define MMIX_FIRST_GLOBAL_REGNUM 32
+
+/* We'd need a current_function_has_landing_pad.  It's marked as such when
+   a nonlocal_goto_receiver is expanded.  Not just a C++ thing, but
+   mostly.  */
+#define MMIX_CFUN_HAS_LANDING_PAD (cfun->machine->has_landing_pad != 0)
+
+/* We have no means to tell DWARF 2 about the register stack, so we need
+   to store the return address on the stack if an exception can get into
+   this function.  FIXME: Narrow condition.  Before any whole-function
+   analysis, df_regs_ever_live_p () isn't initialized.  We know it's up-to-date
+   after reload_completed; it may contain incorrect information some time
+   before that.  Within a RTL sequence (after a call to start_sequence,
+   such as in RTL expanders), leaf_function_p doesn't see all insns
+   (perhaps any insn).  But regs_ever_live is up-to-date when
+   leaf_function_p () isn't, so we "or" them together to get accurate
+   information.  FIXME: Some tweak to leaf_function_p might be
+   preferable.  */
+#define MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS			\
+ (flag_exceptions						\
+  && ((reload_completed && df_regs_ever_live_p (MMIX_rJ_REGNUM))	\
+      || !leaf_function_p ()))
+
+#define IS_MMIX_EH_RETURN_DATA_REG(REGNO)	\
+ (crtl->calls_eh_return		\
+  && (EH_RETURN_DATA_REGNO (0) == REGNO		\
+      || EH_RETURN_DATA_REGNO (1) == REGNO	\
+      || EH_RETURN_DATA_REGNO (2) == REGNO	\
+      || EH_RETURN_DATA_REGNO (3) == REGNO))
+
+/* For the default ABI, we rename registers at output-time to fill the gap
+   between the (statically partitioned) saved registers and call-clobbered
+   registers.  In effect this makes unused call-saved registers to be used
+   as call-clobbered registers.  The benefit comes from keeping the number
+   of local registers (value of rL) low, since there's a cost of
+   increasing rL and clearing unused (unset) registers with lower numbers.
+   Don't translate while outputting the prologue.  */
+#define MMIX_OUTPUT_REGNO(N)					\
+ (TARGET_ABI_GNU 						\
+  || (int) (N) < MMIX_RETURN_VALUE_REGNUM			\
+  || (int) (N) > MMIX_LAST_STACK_REGISTER_REGNUM		\
+  || cfun == NULL 						\
+  || cfun->machine == NULL 					\
+  || cfun->machine->in_prologue					\
+  ? (N) : ((N) - MMIX_RETURN_VALUE_REGNUM			\
+	   + cfun->machine->highest_saved_stack_register + 1))
+
+/* The %d in "POP %d,0".  */
+#define MMIX_POP_ARGUMENT()						\
+ ((! TARGET_ABI_GNU							\
+   && crtl->return_rtx != NULL				\
+   && ! cfun->returns_struct)				\
+  ? (GET_CODE (crtl->return_rtx) == PARALLEL			\
+     ? GET_NUM_ELEM (XVEC (crtl->return_rtx, 0)) : 1)	\
+  : 0)
+
+/* The canonical saved comparison operands for non-cc0 machines, set in
+   the compare expander.  */
+rtx mmix_compare_op0;
+rtx mmix_compare_op1;
+
+/* Declarations of locals.  */
+
+/* Intermediate for insn output.  */
+static int mmix_output_destination_register;
+
+static void mmix_option_override (void);
+static void mmix_asm_output_source_filename (FILE *, const char *);
+static void mmix_output_shiftvalue_op_from_str
+  (FILE *, const char *, HOST_WIDEST_INT);
+static void mmix_output_shifted_value (FILE *, HOST_WIDEST_INT);
+static void mmix_output_condition (FILE *, rtx, int);
+static HOST_WIDEST_INT mmix_intval (rtx);
+static void mmix_output_octa (FILE *, HOST_WIDEST_INT, int);
+static bool mmix_assemble_integer (rtx, unsigned int, int);
+static struct machine_function *mmix_init_machine_status (void);
+static void mmix_encode_section_info (tree, rtx, int);
+static const char *mmix_strip_name_encoding (const char *);
+static void mmix_emit_sp_add (HOST_WIDE_INT offset);
+static void mmix_target_asm_function_prologue (FILE *, HOST_WIDE_INT);
+static void mmix_target_asm_function_end_prologue (FILE *);
+static void mmix_target_asm_function_epilogue (FILE *, HOST_WIDE_INT);
+static bool mmix_legitimate_address_p (enum machine_mode, rtx, bool);
+static void mmix_reorg (void);
+static void mmix_asm_output_mi_thunk
+  (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void mmix_setup_incoming_varargs
+  (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
+static void mmix_file_start (void);
+static void mmix_file_end (void);
+static bool mmix_rtx_costs (rtx, int, int, int *, bool);
+static rtx mmix_struct_value_rtx (tree, int);
+static enum machine_mode mmix_promote_function_mode (const_tree,
+						     enum machine_mode,
+	                                             int *, const_tree, int);
+static void mmix_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static rtx mmix_function_arg_1 (const CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool, bool);
+static rtx mmix_function_incoming_arg (CUMULATIVE_ARGS *, enum machine_mode,
+				       const_tree, bool);
+static rtx mmix_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static rtx mmix_function_value (const_tree, const_tree, bool);
+static rtx mmix_libcall_value (enum machine_mode, const_rtx);
+static bool mmix_function_value_regno_p (const unsigned int);
+static bool mmix_pass_by_reference (CUMULATIVE_ARGS *,
+				    enum machine_mode, const_tree, bool);
+static bool mmix_frame_pointer_required (void);
+static void mmix_asm_trampoline_template (FILE *);
+static void mmix_trampoline_init (rtx, tree, rtx);
+static void mmix_conditional_register_usage (void);
+
+/* TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options mmix_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fregmove, NULL, 1 },
+    { OPT_LEVELS_2_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Target structure macros.  Listed by node.  See `Using and Porting GCC'
+   for a general description.  */
+
+/* Node: Function Entry */
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP NULL
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP NULL
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP NULL
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP NULL
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER mmix_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE mmix_target_asm_function_prologue
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE mmix_target_asm_function_end_prologue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE mmix_target_asm_function_epilogue
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO  mmix_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING  mmix_strip_name_encoding
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK mmix_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START mmix_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END mmix_file_end
+#undef TARGET_ASM_OUTPUT_SOURCE_FILENAME
+#define TARGET_ASM_OUTPUT_SOURCE_FILENAME mmix_asm_output_source_filename
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE mmix_conditional_register_usage
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS mmix_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG mmix_reorg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE mmix_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mmix_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mmix_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P mmix_function_value_regno_p
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG mmix_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG mmix_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE mmix_function_arg_advance
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX mmix_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS mmix_setup_incoming_varargs
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE mmix_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	mmix_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED mmix_frame_pointer_required
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE mmix_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT mmix_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mmix_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE mmix_option_optimization_table
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Functions that are expansions for target macros.
+   See Target Macros in `Using and Porting GCC'.  */
+
+/* TARGET_OPTION_OVERRIDE.  */
+
+static void
+mmix_option_override (void)
+{
+  /* Should we err or should we warn?  Hmm.  At least we must neutralize
+     it.  For example the wrong kind of case-tables will be generated with
+     PIC; we use absolute address items for mmixal compatibility.  FIXME:
+     They could be relative if we just elide them to after all pertinent
+     labels.  */
+  if (flag_pic)
+    {
+      warning (0, "-f%s not supported: ignored", (flag_pic > 1) ? "PIC" : "pic");
+      flag_pic = 0;
+    }
+}
+
+/* INIT_EXPANDERS.  */
+
+void
+mmix_init_expanders (void)
+{
+  init_machine_status = mmix_init_machine_status;
+}
+
+/* Set the per-function data.  */
+
+static struct machine_function *
+mmix_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* DATA_ALIGNMENT.
+   We have trouble getting the address of stuff that is located at other
+   than 32-bit alignments (GETA requirements), so try to give everything
+   at least 32-bit alignment.  */
+
+int
+mmix_data_alignment (tree type ATTRIBUTE_UNUSED, int basic_align)
+{
+  if (basic_align < 32)
+    return 32;
+
+  return basic_align;
+}
+
+/* CONSTANT_ALIGNMENT.  */
+
+int
+mmix_constant_alignment (tree constant ATTRIBUTE_UNUSED, int basic_align)
+{
+  if (basic_align < 32)
+    return 32;
+
+  return basic_align;
+}
+
+/* LOCAL_ALIGNMENT.  */
+
+unsigned
+mmix_local_alignment (tree type ATTRIBUTE_UNUSED, unsigned basic_align)
+{
+  if (basic_align < 32)
+    return 32;
+
+  return basic_align;
+}
+
+/* TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+mmix_conditional_register_usage (void)
+{
+  int i;
+
+  if (TARGET_ABI_GNU)
+    {
+      static const int gnu_abi_reg_alloc_order[]
+	= MMIX_GNU_ABI_REG_ALLOC_ORDER;
+
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	reg_alloc_order[i] = gnu_abi_reg_alloc_order[i];
+
+      /* Change the default from the mmixware ABI.  For the GNU ABI,
+	 $15..$30 are call-saved just as $0..$14.  There must be one
+	 call-clobbered local register for the "hole" that holds the
+	 number of saved local registers saved by PUSHJ/PUSHGO during the
+	 function call, receiving the return value at return.  So best is
+	 to use the highest, $31.  It's already marked call-clobbered for
+	 the mmixware ABI.  */
+      for (i = 15; i <= 30; i++)
+	call_used_regs[i] = 0;
+
+      /* "Unfix" the parameter registers.  */
+      for (i = MMIX_RESERVED_GNU_ARG_0_REGNUM;
+	   i < MMIX_RESERVED_GNU_ARG_0_REGNUM + MMIX_MAX_ARGS_IN_REGS;
+	   i++)
+	fixed_regs[i] = 0;
+    }
+
+  /* Step over the ":" in special register names.  */
+  if (! TARGET_TOPLEVEL_SYMBOLS)
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (reg_names[i][0] == ':')
+	reg_names[i]++;
+}
+
+/* INCOMING_REGNO and OUTGOING_REGNO worker function.
+   Those two macros must only be applied to function argument
+   registers.  FIXME: for their current use in gcc, it'd be better
+   with an explicit specific additional FUNCTION_INCOMING_ARG_REGNO_P
+   a'la TARGET_FUNCTION_ARG / TARGET_FUNCTION_INCOMING_ARG instead of
+   forcing the target to commit to a fixed mapping and for any
+   unspecified register use.  */
+
+int
+mmix_opposite_regno (int regno, int incoming)
+{
+  if (!mmix_function_arg_regno_p (regno, incoming))
+    return regno;
+
+  return
+    regno - (incoming
+	     ? MMIX_FIRST_INCOMING_ARG_REGNUM - MMIX_FIRST_ARG_REGNUM
+	     : MMIX_FIRST_ARG_REGNUM - MMIX_FIRST_INCOMING_ARG_REGNUM);
+}
+
+/* LOCAL_REGNO.
+   All registers that are part of the register stack and that will be
+   saved are local.  */
+
+int
+mmix_local_regno (int regno)
+{
+  return regno <= MMIX_LAST_STACK_REGISTER_REGNUM && !call_used_regs[regno];
+}
+
+/* PREFERRED_RELOAD_CLASS.
+   We need to extend the reload class of REMAINDER_REG and HIMULT_REG.  */
+
+enum reg_class
+mmix_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, enum reg_class rclass)
+{
+  /* FIXME: Revisit.  */
+  return GET_CODE (x) == MOD && GET_MODE (x) == DImode
+    ? REMAINDER_REG : rclass;
+}
+
+/* PREFERRED_OUTPUT_RELOAD_CLASS.
+   We need to extend the reload class of REMAINDER_REG and HIMULT_REG.  */
+
+enum reg_class
+mmix_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED,
+				    enum reg_class rclass)
+{
+  /* FIXME: Revisit.  */
+  return GET_CODE (x) == MOD && GET_MODE (x) == DImode
+    ? REMAINDER_REG : rclass;
+}
+
+/* SECONDARY_RELOAD_CLASS.
+   We need to reload regs of REMAINDER_REG and HIMULT_REG elsewhere.  */
+
+enum reg_class
+mmix_secondary_reload_class (enum reg_class rclass,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x ATTRIBUTE_UNUSED,
+			     int in_p ATTRIBUTE_UNUSED)
+{
+  if (rclass == REMAINDER_REG
+      || rclass == HIMULT_REG
+      || rclass == SYSTEM_REGS)
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* CONST_OK_FOR_LETTER_P.  */
+
+int
+mmix_const_ok_for_letter_p (HOST_WIDE_INT value, int c)
+{
+  return
+    (c == 'I' ? value >= 0 && value <= 255
+     : c == 'J' ? value >= 0 && value <= 65535
+     : c == 'K' ? value <= 0 && value >= -255
+     : c == 'L' ? mmix_shiftable_wyde_value (value)
+     : c == 'M' ? value == 0
+     : c == 'N' ? mmix_shiftable_wyde_value (~value)
+     : c == 'O' ? (value == 3 || value == 5 || value == 9
+		   || value == 17)
+     : 0);
+}
+
+/* CONST_DOUBLE_OK_FOR_LETTER_P.  */
+
+int
+mmix_const_double_ok_for_letter_p (rtx value, int c)
+{
+  return
+    (c == 'G' ? value == CONST0_RTX (GET_MODE (value))
+     : 0);
+}
+
+/* EXTRA_CONSTRAINT.
+   We need this since our constants are not always expressible as
+   CONST_INT:s, but rather often as CONST_DOUBLE:s.  */
+
+int
+mmix_extra_constraint (rtx x, int c, int strict)
+{
+  HOST_WIDEST_INT value;
+
+  /* When checking for an address, we need to handle strict vs. non-strict
+     register checks.  Don't use address_operand, but instead its
+     equivalent (its callee, which it is just a wrapper for),
+     memory_operand_p and the strict-equivalent strict_memory_address_p.  */
+  if (c == 'U')
+    return
+      strict
+      ? strict_memory_address_p (Pmode, x)
+      : memory_address_p (Pmode, x);
+
+  /* R asks whether x is to be loaded with GETA or something else.  Right
+     now, only a SYMBOL_REF and LABEL_REF can fit for
+     TARGET_BASE_ADDRESSES.
+
+     Only constant symbolic addresses apply.  With TARGET_BASE_ADDRESSES,
+     we just allow straight LABEL_REF or SYMBOL_REFs with SYMBOL_REF_FLAG
+     set right now; only function addresses and code labels.  If we change
+     to let SYMBOL_REF_FLAG be set on other symbols, we have to check
+     inside CONST expressions.  When TARGET_BASE_ADDRESSES is not in
+     effect, a "raw" constant check together with mmix_constant_address_p
+     is all that's needed; we want all constant addresses to be loaded
+     with GETA then.  */
+  if (c == 'R')
+    return
+      GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE
+      && mmix_constant_address_p (x)
+      && (! TARGET_BASE_ADDRESSES
+	  || (GET_CODE (x) == LABEL_REF
+	      || (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FLAG (x))));
+
+  if (GET_CODE (x) != CONST_DOUBLE || GET_MODE (x) != VOIDmode)
+    return 0;
+
+  value = mmix_intval (x);
+
+  /* We used to map Q->J, R->K, S->L, T->N, U->O, but we don't have to any
+     more ('U' taken for address_operand, 'R' similarly).  Some letters map
+     outside of CONST_INT, though; we still use 'S' and 'T'.  */
+  if (c == 'S')
+    return mmix_shiftable_wyde_value (value);
+  else if (c == 'T')
+    return mmix_shiftable_wyde_value (~value);
+  return 0;
+}
+
+/* DYNAMIC_CHAIN_ADDRESS.  */
+
+rtx
+mmix_dynamic_chain_address (rtx frame)
+{
+  /* FIXME: the frame-pointer is stored at offset -8 from the current
+     frame-pointer.  Unfortunately, the caller assumes that a
+     frame-pointer is present for *all* previous frames.  There should be
+     a way to say that that cannot be done, like for RETURN_ADDR_RTX.  */
+  return plus_constant (frame, -8);
+}
+
+/* STARTING_FRAME_OFFSET.  */
+
+int
+mmix_starting_frame_offset (void)
+{
+  /* The old frame pointer is in the slot below the new one, so
+     FIRST_PARM_OFFSET does not need to depend on whether the
+     frame-pointer is needed or not.  We have to adjust for the register
+     stack pointer being located below the saved frame pointer.
+     Similarly, we store the return address on the stack too, for
+     exception handling, and always if we save the register stack pointer.  */
+  return
+    (-8
+     + (MMIX_CFUN_HAS_LANDING_PAD
+	? -16 : (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS ? -8 : 0)));
+}
+
+/* RETURN_ADDR_RTX.  */
+
+rtx
+mmix_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  return count == 0
+    ? (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS
+       /* FIXME: Set frame_alias_set on the following.  (Why?)
+	  See mmix_initial_elimination_offset for the reason we can't use
+	  get_hard_reg_initial_val for both.  Always using a stack slot
+	  and not a register would be suboptimal.  */
+       ? validize_mem (gen_rtx_MEM (Pmode, plus_constant (frame_pointer_rtx, -16)))
+       : get_hard_reg_initial_val (Pmode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM))
+    : NULL_RTX;
+}
+
+/* SETUP_FRAME_ADDRESSES.  */
+
+void
+mmix_setup_frame_addresses (void)
+{
+  /* Nothing needed at the moment.  */
+}
+
+/* The difference between the (imaginary) frame pointer and the stack
+   pointer.  Used to eliminate the frame pointer.  */
+
+int
+mmix_initial_elimination_offset (int fromreg, int toreg)
+{
+  int regno;
+  int fp_sp_offset
+    = (get_frame_size () + crtl->outgoing_args_size + 7) & ~7;
+
+  /* There is no actual offset between these two virtual values, but for
+     the frame-pointer, we have the old one in the stack position below
+     it, so the offset for the frame-pointer to the stack-pointer is one
+     octabyte larger.  */
+  if (fromreg == MMIX_ARG_POINTER_REGNUM
+      && toreg == MMIX_FRAME_POINTER_REGNUM)
+    return 0;
+
+  /* The difference is the size of local variables plus the size of
+     outgoing function arguments that would normally be passed as
+     registers but must be passed on stack because we're out of
+     function-argument registers.  Only global saved registers are
+     counted; the others go on the register stack.
+
+     The frame-pointer is counted too if it is what is eliminated, as we
+     need to balance the offset for it from STARTING_FRAME_OFFSET.
+
+     Also add in the slot for the register stack pointer we save if we
+     have a landing pad.
+
+     Unfortunately, we can't access $0..$14, from unwinder code easily, so
+     store the return address in a frame slot too.  FIXME: Only for
+     non-leaf functions.  FIXME: Always with a landing pad, because it's
+     hard to know whether we need the other at the time we know we need
+     the offset for one (and have to state it).  It's a kludge until we
+     can express the register stack in the EH frame info.
+
+     We have to do alignment here; get_frame_size will not return a
+     multiple of STACK_BOUNDARY.  FIXME: Add note in manual.  */
+
+  for (regno = MMIX_FIRST_GLOBAL_REGNUM;
+       regno <= 255;
+       regno++)
+    if ((df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      fp_sp_offset += 8;
+
+  return fp_sp_offset
+    + (MMIX_CFUN_HAS_LANDING_PAD
+       ? 16 : (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS ? 8 : 0))
+    + (fromreg == MMIX_ARG_POINTER_REGNUM ? 0 : 8);
+}
+
+static void
+mmix_function_arg_advance (CUMULATIVE_ARGS *argsp, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int arg_size = MMIX_FUNCTION_ARG_SIZE (mode, type);
+
+  argsp->regs = ((targetm.calls.must_pass_in_stack (mode, type)
+		  || (arg_size > 8
+		      && !TARGET_LIBFUNC
+		      && !argsp->lib))
+		 ? (MMIX_MAX_ARGS_IN_REGS) + 1
+		 : argsp->regs + (7 + arg_size) / 8);
+}
+
+/* Helper function for mmix_function_arg and mmix_function_incoming_arg.  */
+
+static rtx
+mmix_function_arg_1 (const CUMULATIVE_ARGS *argsp,
+		     enum machine_mode mode,
+		     const_tree type,
+		     bool named ATTRIBUTE_UNUSED,
+		     bool incoming)
+{
+  /* Last-argument marker.  */
+  if (type == void_type_node)
+    return (argsp->regs < MMIX_MAX_ARGS_IN_REGS)
+      ? gen_rtx_REG (mode,
+		     (incoming
+		      ? MMIX_FIRST_INCOMING_ARG_REGNUM
+		      : MMIX_FIRST_ARG_REGNUM) + argsp->regs)
+      : NULL_RTX;
+
+  return (argsp->regs < MMIX_MAX_ARGS_IN_REGS
+	  && !targetm.calls.must_pass_in_stack (mode, type)
+	  && (GET_MODE_BITSIZE (mode) <= 64
+	      || argsp->lib
+	      || TARGET_LIBFUNC))
+    ? gen_rtx_REG (mode,
+		   (incoming
+		    ? MMIX_FIRST_INCOMING_ARG_REGNUM
+		    : MMIX_FIRST_ARG_REGNUM)
+		   + argsp->regs)
+    : NULL_RTX;
+}
+
+/* Return an rtx for a function argument to go in a register, and 0 for
+   one that must go on stack.  */
+
+static rtx
+mmix_function_arg (CUMULATIVE_ARGS *argsp,
+		   enum machine_mode mode,
+		   const_tree type,
+		   bool named)
+{
+  return mmix_function_arg_1 (argsp, mode, type, named, false);
+}
+
+static rtx
+mmix_function_incoming_arg (CUMULATIVE_ARGS *argsp,
+			    enum machine_mode mode,
+			    const_tree type,
+			    bool named)
+{
+  return mmix_function_arg_1 (argsp, mode, type, named, true);
+}
+
+/* Returns nonzero for everything that goes by reference, 0 for
+   everything that goes by value.  */
+
+static bool
+mmix_pass_by_reference (CUMULATIVE_ARGS *argsp, enum machine_mode mode,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* FIXME: Check: I'm not sure the must_pass_in_stack check is
+     necessary.  */
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return true;
+
+  if (MMIX_FUNCTION_ARG_SIZE (mode, type) > 8
+      && !TARGET_LIBFUNC
+      && (!argsp || !argsp->lib))
+    return true;
+
+  return false;
+}
+
+/* Return nonzero if regno is a register number where a parameter is
+   passed, and 0 otherwise.  */
+
+int
+mmix_function_arg_regno_p (int regno, int incoming)
+{
+  int first_arg_regnum
+    = incoming ? MMIX_FIRST_INCOMING_ARG_REGNUM : MMIX_FIRST_ARG_REGNUM;
+
+  return regno >= first_arg_regnum
+    && regno < first_arg_regnum + MMIX_MAX_ARGS_IN_REGS;
+}
+
+/* Implements TARGET_FUNCTION_VALUE.  */
+
+static rtx
+mmix_function_value (const_tree valtype,
+		     const_tree func ATTRIBUTE_UNUSED,
+		     bool outgoing)
+{
+  enum machine_mode mode = TYPE_MODE (valtype);
+  enum machine_mode cmode;
+  int first_val_regnum = MMIX_OUTGOING_RETURN_VALUE_REGNUM;
+  rtx vec[MMIX_MAX_REGS_FOR_VALUE];
+  int i;
+  int nregs;
+
+  if (!outgoing)
+    return gen_rtx_REG (mode, MMIX_RETURN_VALUE_REGNUM);
+  
+  /* Return values that fit in a register need no special handling.
+     There's no register hole when parameters are passed in global
+     registers.  */
+  if (TARGET_ABI_GNU
+      || GET_MODE_BITSIZE (mode) <= BITS_PER_WORD)
+    return
+      gen_rtx_REG (mode, MMIX_OUTGOING_RETURN_VALUE_REGNUM);
+
+  if (COMPLEX_MODE_P (mode))
+    /* A complex type, made up of components.  */
+    cmode = TYPE_MODE (TREE_TYPE (valtype));
+  else
+    {
+      /* Of the other larger-than-register modes, we only support
+	 scalar mode TImode.  (At least, that's the only one that's
+	 been rudimentally tested.)  Make sure we're alerted for
+	 unexpected cases.  */
+      if (mode != TImode)
+	sorry ("support for mode %qs", GET_MODE_NAME (mode));
+
+      /* In any case, we will fill registers to the natural size.  */
+      cmode = DImode;
+    }
+
+  nregs = ((GET_MODE_BITSIZE (mode) + BITS_PER_WORD - 1) / BITS_PER_WORD);
+
+  /* We need to take care of the effect of the register hole on return
+     values of large sizes; the last register will appear as the first
+     register, with the rest shifted.  (For complex modes, this is just
+     swapped registers.)  */
+
+  if (nregs > MMIX_MAX_REGS_FOR_VALUE)
+    internal_error ("too large function value type, needs %d registers,\
+ have only %d registers for this", nregs, MMIX_MAX_REGS_FOR_VALUE);
+
+  /* FIXME: Maybe we should handle structure values like this too
+     (adjusted for BLKmode), perhaps for both ABI:s.  */
+  for (i = 0; i < nregs - 1; i++)
+    vec[i]
+      = gen_rtx_EXPR_LIST (VOIDmode,
+			   gen_rtx_REG (cmode, first_val_regnum + i),
+			   GEN_INT ((i + 1) * BITS_PER_UNIT));
+
+  vec[nregs - 1]
+    = gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_REG (cmode, first_val_regnum + nregs - 1),
+			 const0_rtx);
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs, vec));
+}
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+mmix_libcall_value (enum machine_mode mode,
+		    const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, MMIX_RETURN_VALUE_REGNUM);
+}
+
+/* Implements TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+mmix_function_value_regno_p (const unsigned int regno)
+{
+  return regno == MMIX_RETURN_VALUE_REGNUM;
+}
+
+/* EH_RETURN_DATA_REGNO. */
+
+int
+mmix_eh_return_data_regno (int n)
+{
+  if (n >= 0 && n < 4)
+    return MMIX_EH_RETURN_DATA_REGNO_START + n;
+
+  return INVALID_REGNUM;
+}
+
+/* EH_RETURN_STACKADJ_RTX. */
+
+rtx
+mmix_eh_return_stackadj_rtx (void)
+{
+  return gen_rtx_REG (Pmode, MMIX_EH_RETURN_STACKADJ_REGNUM);
+}
+
+/* EH_RETURN_HANDLER_RTX.  */
+
+rtx
+mmix_eh_return_handler_rtx (void)
+{
+  return gen_rtx_REG (Pmode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+}
+
+/* ASM_PREFERRED_EH_DATA_FORMAT. */
+
+int
+mmix_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED,
+				   int global ATTRIBUTE_UNUSED)
+{
+  /* This is the default (was at 2001-07-20).  Revisit when needed.  */
+  return DW_EH_PE_absptr;
+}
+
+/* Make a note that we've seen the beginning of the prologue.  This
+   matters to whether we'll translate register numbers as calculated by
+   mmix_reorg.  */
+
+static void
+mmix_target_asm_function_prologue (FILE *stream ATTRIBUTE_UNUSED,
+				   HOST_WIDE_INT framesize ATTRIBUTE_UNUSED)
+{
+  cfun->machine->in_prologue = 1;
+}
+
+/* Make a note that we've seen the end of the prologue.  */
+
+static void
+mmix_target_asm_function_end_prologue (FILE *stream ATTRIBUTE_UNUSED)
+{
+  cfun->machine->in_prologue = 0;
+}
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  No actual rearrangements
+   done here; just virtually by calculating the highest saved stack
+   register number used to modify the register numbers at output time.  */
+
+static void
+mmix_reorg (void)
+{
+  int regno;
+
+  /* We put the number of the highest saved register-file register in a
+     location convenient for the call-patterns to output.  Note that we
+     don't tell dwarf2 about these registers, since it can't restore them
+     anyway.  */
+  for (regno = MMIX_LAST_STACK_REGISTER_REGNUM;
+       regno >= 0;
+       regno--)
+    if ((df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	|| (regno == MMIX_FRAME_POINTER_REGNUM && frame_pointer_needed))
+      break;
+
+  /* Regardless of whether they're saved (they might be just read), we
+     mustn't include registers that carry parameters.  We could scan the
+     insns to see whether they're actually used (and indeed do other less
+     trivial register usage analysis and transformations), but it seems
+     wasteful to optimize for unused parameter registers.  As of
+     2002-04-30, df_regs_ever_live_p (n) seems to be set for only-reads too, but
+     that might change.  */
+  if (!TARGET_ABI_GNU && regno < crtl->args.info.regs - 1)
+    {
+      regno = crtl->args.info.regs - 1;
+
+      /* We don't want to let this cause us to go over the limit and make
+	 incoming parameter registers be misnumbered and treating the last
+	 parameter register and incoming return value register call-saved.
+	 Stop things at the unmodified scheme.  */
+      if (regno > MMIX_RETURN_VALUE_REGNUM - 1)
+	regno = MMIX_RETURN_VALUE_REGNUM - 1;
+    }
+
+  cfun->machine->highest_saved_stack_register = regno;
+}
+
+/* TARGET_ASM_FUNCTION_EPILOGUE.  */
+
+static void
+mmix_target_asm_function_epilogue (FILE *stream,
+				   HOST_WIDE_INT locals_size ATTRIBUTE_UNUSED)
+{
+  /* Emit an \n for readability of the generated assembly.  */
+  fputc ('\n', stream);
+}
+
+/* TARGET_ASM_OUTPUT_MI_THUNK.  */
+
+static void
+mmix_asm_output_mi_thunk (FILE *stream,
+			  tree fndecl ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT delta,
+			  HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			  tree func)
+{
+  /* If you define TARGET_STRUCT_VALUE_RTX that returns 0 (i.e. pass
+     location of structure to return as invisible first argument), you
+     need to tweak this code too.  */
+  const char *regname = reg_names[MMIX_FIRST_INCOMING_ARG_REGNUM];
+
+  if (delta >= 0 && delta < 65536)
+    fprintf (stream, "\tINCL %s,%d\n", regname, (int)delta);
+  else if (delta < 0 && delta >= -255)
+    fprintf (stream, "\tSUBU %s,%s,%d\n", regname, regname, (int)-delta);
+  else
+    {
+      mmix_output_register_setting (stream, 255, delta, 1);
+      fprintf (stream, "\tADDU %s,%s,$255\n", regname, regname);
+    }
+
+  fprintf (stream, "\tJMP ");
+  assemble_name (stream, XSTR (XEXP (DECL_RTL (func), 0), 0));
+  fprintf (stream, "\n");
+}
+
+/* FUNCTION_PROFILER.  */
+
+void
+mmix_function_profiler (FILE *stream ATTRIBUTE_UNUSED,
+			int labelno ATTRIBUTE_UNUSED)
+{
+  sorry ("function_profiler support for MMIX");
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  For the moment,
+   let's stick to pushing argument registers on the stack.  Later, we
+   can parse all arguments in registers, to improve performance.  */
+
+static void
+mmix_setup_incoming_varargs (CUMULATIVE_ARGS *args_so_farp,
+			     enum machine_mode mode,
+			     tree vartype,
+			     int *pretend_sizep,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  /* The last named variable has been handled, but
+     args_so_farp has not been advanced for it.  */
+  if (args_so_farp->regs + 1 < MMIX_MAX_ARGS_IN_REGS)
+    *pretend_sizep = (MMIX_MAX_ARGS_IN_REGS - (args_so_farp->regs + 1)) * 8;
+
+  /* We assume that one argument takes up one register here.  That should
+     be true until we start messing with multi-reg parameters.  */
+  if ((7 + (MMIX_FUNCTION_ARG_SIZE (mode, vartype))) / 8 != 1)
+    internal_error ("MMIX Internal: Last named vararg would not fit in a register");
+}
+
+/* TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+mmix_asm_trampoline_template (FILE *stream)
+{
+  /* Read a value into the static-chain register and jump somewhere.  The
+     static chain is stored at offset 16, and the function address is
+     stored at offset 24.  */
+
+  fprintf (stream, "\tGETA $255,1F\n\t");
+  fprintf (stream, "LDOU %s,$255,0\n\t", reg_names[MMIX_STATIC_CHAIN_REGNUM]);
+  fprintf (stream, "LDOU $255,$255,8\n\t");
+  fprintf (stream, "GO $255,$255,0\n");
+  fprintf (stream, "1H\tOCTA 0\n\t");
+  fprintf (stream, "OCTA 0\n");
+}
+
+/* TARGET_TRAMPOLINE_INIT.  */
+/* Set the static chain and function pointer field in the trampoline.
+   We also SYNCID here to be sure (doesn't matter in the simulator, but
+   some day it will).  */
+
+static void
+mmix_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, DImode, 2*UNITS_PER_WORD);
+  emit_move_insn (mem, static_chain);
+  mem = adjust_address (m_tramp, DImode, 3*UNITS_PER_WORD);
+  emit_move_insn (mem, fnaddr);
+
+  mem = adjust_address (m_tramp, DImode, 0);
+  emit_insn (gen_sync_icache (mem, GEN_INT (TRAMPOLINE_SIZE - 1)));
+}
+
+/* We must exclude constant addresses that have an increment that is not a
+   multiple of four bytes because of restrictions of the GETA
+   instruction, unless TARGET_BASE_ADDRESSES.  */
+
+int
+mmix_constant_address_p (rtx x)
+{
+  RTX_CODE code = GET_CODE (x);
+  int addend = 0;
+  /* When using "base addresses", anything constant goes.  */
+  int constant_ok = TARGET_BASE_ADDRESSES != 0;
+
+  switch (code)
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return 1;
+
+    case HIGH:
+      /* FIXME: Don't know how to dissect these.  Avoid them for now,
+	 except we know they're constants.  */
+      return constant_ok;
+
+    case CONST_INT:
+      addend = INTVAL (x);
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) != VOIDmode)
+	/* Strange that we got here.  FIXME: Check if we do.  */
+	return constant_ok;
+      addend = CONST_DOUBLE_LOW (x);
+      break;
+
+    case CONST:
+      /* Note that expressions with arithmetic on forward references don't
+	 work in mmixal.  People using gcc assembly code with mmixal might
+	 need to move arrays and such to before the point of use.  */
+      if (GET_CODE (XEXP (x, 0)) == PLUS)
+	{
+	  rtx x0 = XEXP (XEXP (x, 0), 0);
+	  rtx x1 = XEXP (XEXP (x, 0), 1);
+
+	  if ((GET_CODE (x0) == SYMBOL_REF
+	       || GET_CODE (x0) == LABEL_REF)
+	      && (GET_CODE (x1) == CONST_INT
+		  || (GET_CODE (x1) == CONST_DOUBLE
+		      && GET_MODE (x1) == VOIDmode)))
+	    addend = mmix_intval (x1);
+	  else
+	    return constant_ok;
+	}
+      else
+	return constant_ok;
+      break;
+
+    default:
+      return 0;
+    }
+
+  return constant_ok || (addend & 3) == 0;
+}
+
+/* Return 1 if the address is OK, otherwise 0.  */
+
+bool
+mmix_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x,
+			   bool strict_checking)
+{
+#define MMIX_REG_OK(X)							\
+  ((strict_checking							\
+    && (REGNO (X) <= MMIX_LAST_GENERAL_REGISTER				\
+	|| (reg_renumber[REGNO (X)] > 0					\
+	    && reg_renumber[REGNO (X)] <= MMIX_LAST_GENERAL_REGISTER)))	\
+   || (!strict_checking							\
+       && (REGNO (X) <= MMIX_LAST_GENERAL_REGISTER			\
+	   || REGNO (X) >= FIRST_PSEUDO_REGISTER			\
+	   || REGNO (X) == ARG_POINTER_REGNUM)))
+
+  /* We only accept:
+     (mem reg)
+     (mem (plus reg reg))
+     (mem (plus reg 0..255)).
+     unless TARGET_BASE_ADDRESSES, in which case we accept all
+     (mem constant_address) too.  */
+
+
+    /* (mem reg) */
+  if (REG_P (x) && MMIX_REG_OK (x))
+    return 1;
+
+  if (GET_CODE(x) == PLUS)
+    {
+      rtx x1 = XEXP (x, 0);
+      rtx x2 = XEXP (x, 1);
+
+      /* Try swapping the order.  FIXME: Do we need this?  */
+      if (! REG_P (x1))
+	{
+	  rtx tem = x1;
+	  x1 = x2;
+	  x2 = tem;
+	}
+
+      /* (mem (plus (reg?) (?))) */
+      if (!REG_P (x1) || !MMIX_REG_OK (x1))
+	return TARGET_BASE_ADDRESSES && mmix_constant_address_p (x);
+
+      /* (mem (plus (reg) (reg?))) */
+      if (REG_P (x2) && MMIX_REG_OK (x2))
+	return 1;
+
+      /* (mem (plus (reg) (0..255?))) */
+      if (GET_CODE (x2) == CONST_INT
+	  && CONST_OK_FOR_LETTER_P (INTVAL (x2), 'I'))
+	return 1;
+
+      return 0;
+    }
+
+  return TARGET_BASE_ADDRESSES && mmix_constant_address_p (x);
+}
+
+/* LEGITIMATE_CONSTANT_P.  */
+
+int
+mmix_legitimate_constant_p (rtx x)
+{
+  RTX_CODE code = GET_CODE (x);
+
+  /* We must allow any number due to the way the cse passes works; if we
+     do not allow any number here, general_operand will fail, and insns
+     will fatally fail recognition instead of "softly".  */
+  if (code == CONST_INT || code == CONST_DOUBLE)
+    return 1;
+
+  return CONSTANT_ADDRESS_P (x);
+}
+
+/* SELECT_CC_MODE.  */
+
+enum machine_mode
+mmix_select_cc_mode (RTX_CODE op, rtx x, rtx y ATTRIBUTE_UNUSED)
+{
+  /* We use CCmode, CC_UNSmode, CC_FPmode, CC_FPEQmode and CC_FUNmode to
+     output different compare insns.  Note that we do not check the
+     validity of the comparison here.  */
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      if (op == ORDERED || op == UNORDERED || op == UNGE
+	  || op == UNGT || op == UNLE || op == UNLT)
+	return CC_FUNmode;
+
+      if (op == EQ || op == NE)
+	return CC_FPEQmode;
+
+      return CC_FPmode;
+    }
+
+  if (op == GTU || op == LTU || op == GEU || op == LEU)
+    return CC_UNSmode;
+
+  return CCmode;
+}
+
+/* REVERSIBLE_CC_MODE.  */
+
+int
+mmix_reversible_cc_mode (enum machine_mode mode)
+{
+  /* That is, all integer and the EQ, NE, ORDERED and UNORDERED float
+     compares.  */
+  return mode != CC_FPmode;
+}
+
+/* TARGET_RTX_COSTS.  */
+
+static bool
+mmix_rtx_costs (rtx x ATTRIBUTE_UNUSED,
+		int code ATTRIBUTE_UNUSED,
+		int outer_code ATTRIBUTE_UNUSED,
+		int *total ATTRIBUTE_UNUSED,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  /* For the time being, this is just a stub and we'll accept the
+     generic calculations, until we can do measurements, at least.
+     Say we did not modify any calculated costs.  */
+  return false;
+}
+
+/* REGISTER_MOVE_COST.  */
+
+int
+mmix_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 enum reg_class from,
+			 enum reg_class to)
+{
+  return (from == GENERAL_REGS && from == to) ? 2 : 3;
+}
+
+/* Note that we don't have a TEXT_SECTION_ASM_OP, because it has to be a
+   compile-time constant; it's used in an asm in crtstuff.c, compiled for
+   the target.  */
+
+/* DATA_SECTION_ASM_OP.  */
+
+const char *
+mmix_data_section_asm_op (void)
+{
+  return "\t.data ! mmixal:= 8H LOC 9B";
+}
+
+static void
+mmix_encode_section_info (tree decl, rtx rtl, int first)
+{
+  /* Test for an external declaration, and do nothing if it is one.  */
+  if ((TREE_CODE (decl) == VAR_DECL
+       && (DECL_EXTERNAL (decl) || TREE_PUBLIC (decl)))
+      || (TREE_CODE (decl) == FUNCTION_DECL && TREE_PUBLIC (decl)))
+    ;
+  else if (first && DECL_P (decl))
+    {
+      /* For non-visible declarations, add a "@" prefix, which we skip
+	 when the label is output.  If the label does not have this
+	 prefix, a ":" is output if -mtoplevel-symbols.
+
+	 Note that this does not work for data that is declared extern and
+	 later defined as static.  If there's code in between, that code
+	 will refer to the extern declaration, and vice versa.  This just
+	 means that when -mtoplevel-symbols is in use, we can just handle
+	 well-behaved ISO-compliant code.  */
+
+      const char *str = XSTR (XEXP (rtl, 0), 0);
+      int len = strlen (str);
+      char *newstr = XALLOCAVEC (char, len + 2);
+      newstr[0] = '@';
+      strcpy (newstr + 1, str);
+      XSTR (XEXP (rtl, 0), 0) = ggc_alloc_string (newstr, len + 1);
+    }
+
+  /* Set SYMBOL_REF_FLAG for things that we want to access with GETA.  We
+     may need different options to reach for different things with GETA.
+     For now, functions and things we know or have been told are constant.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      || TREE_CONSTANT (decl)
+      || (TREE_CODE (decl) == VAR_DECL
+	  && TREE_READONLY (decl)
+	  && !TREE_SIDE_EFFECTS (decl)
+	  && (!DECL_INITIAL (decl)
+	      || TREE_CONSTANT (DECL_INITIAL (decl)))))
+    SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+}
+
+static const char *
+mmix_strip_name_encoding (const char *name)
+{
+  for (; (*name == '@' || *name == '*'); name++)
+    ;
+
+  return name;
+}
+
+/* TARGET_ASM_FILE_START.
+   We just emit a little comment for the time being.  */
+
+static void
+mmix_file_start (void)
+{
+  default_file_start ();
+
+  fputs ("! mmixal:= 8H LOC Data_Section\n", asm_out_file);
+
+  /* Make sure each file starts with the text section.  */
+  switch_to_section (text_section);
+}
+
+/* TARGET_ASM_FILE_END.  */
+
+static void
+mmix_file_end (void)
+{
+  /* Make sure each file ends with the data section.  */
+  switch_to_section (data_section);
+}
+
+/* TARGET_ASM_OUTPUT_SOURCE_FILENAME.  */
+
+static void
+mmix_asm_output_source_filename (FILE *stream, const char *name)
+{
+  fprintf (stream, "# 1 ");
+  OUTPUT_QUOTED_STRING (stream, name);
+  fprintf (stream, "\n");
+}
+
+/* OUTPUT_QUOTED_STRING.  */
+
+void
+mmix_output_quoted_string (FILE *stream, const char *string, int length)
+{
+  const char * string_end = string + length;
+  static const char *const unwanted_chars = "\"[]\\";
+
+  /* Output "any character except newline and double quote character".  We
+     play it safe and avoid all control characters too.  We also do not
+     want [] as characters, should input be passed through m4 with [] as
+     quotes.  Further, we avoid "\", because the GAS port handles it as a
+     quoting character.  */
+  while (string < string_end)
+    {
+      if (*string
+	  && (unsigned char) *string < 128
+	  && !ISCNTRL (*string)
+	  && strchr (unwanted_chars, *string) == NULL)
+	{
+	  fputc ('"', stream);
+	  while (*string
+		 && (unsigned char) *string < 128
+		 && !ISCNTRL (*string)
+		 && strchr (unwanted_chars, *string) == NULL
+		 && string < string_end)
+	    {
+	      fputc (*string, stream);
+	      string++;
+	    }
+	  fputc ('"', stream);
+	  if (string < string_end)
+	    fprintf (stream, ",");
+	}
+      if (string < string_end)
+	{
+	  fprintf (stream, "#%x", *string & 255);
+	  string++;
+	  if (string < string_end)
+	    fprintf (stream, ",");
+	}
+    }
+}
+
+/* Target hook for assembling integer objects.  Use mmix_print_operand
+   for WYDE and TETRA.  Use mmix_output_octa to output 8-byte
+   CONST_DOUBLEs.  */
+
+static bool
+mmix_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (aligned_p)
+    switch (size)
+      {
+	/* We handle a limited number of types of operands in here.  But
+	   that's ok, because we can punt to generic functions.  We then
+	   pretend that aligned data isn't needed, so the usual .<pseudo>
+	   syntax is used (which works for aligned data too).  We actually
+	   *must* do that, since we say we don't have simple aligned
+	   pseudos, causing this function to be called.  We just try and
+	   keep as much compatibility as possible with mmixal syntax for
+	   normal cases (i.e. without GNU extensions and C only).  */
+      case 1:
+	if (GET_CODE (x) != CONST_INT)
+	  {
+	    aligned_p = 0;
+	    break;
+	  }
+	fputs ("\tBYTE\t", asm_out_file);
+	mmix_print_operand (asm_out_file, x, 'B');
+	fputc ('\n', asm_out_file);
+	return true;
+
+      case 2:
+	if (GET_CODE (x) != CONST_INT)
+	  {
+	    aligned_p = 0;
+	    break;
+	  }
+	fputs ("\tWYDE\t", asm_out_file);
+	mmix_print_operand (asm_out_file, x, 'W');
+	fputc ('\n', asm_out_file);
+	return true;
+
+      case 4:
+	if (GET_CODE (x) != CONST_INT)
+	  {
+	    aligned_p = 0;
+	    break;
+	  }
+	fputs ("\tTETRA\t", asm_out_file);
+	mmix_print_operand (asm_out_file, x, 'L');
+	fputc ('\n', asm_out_file);
+	return true;
+
+      case 8:
+	/* We don't get here anymore for CONST_DOUBLE, because DImode
+	   isn't expressed as CONST_DOUBLE, and DFmode is handled
+	   elsewhere.  */
+	gcc_assert (GET_CODE (x) != CONST_DOUBLE);
+	assemble_integer_with_op ("\tOCTA\t", x);
+	return true;
+      }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* ASM_OUTPUT_ASCII.  */
+
+void
+mmix_asm_output_ascii (FILE *stream, const char *string, int length)
+{
+  while (length > 0)
+    {
+      int chunk_size = length > 60 ? 60 : length;
+      fprintf (stream, "\tBYTE ");
+      mmix_output_quoted_string (stream, string, chunk_size);
+      string += chunk_size;
+      length -= chunk_size;
+      fprintf (stream, "\n");
+    }
+}
+
+/* ASM_OUTPUT_ALIGNED_COMMON.  */
+
+void
+mmix_asm_output_aligned_common (FILE *stream,
+				const char *name,
+				int size,
+				int align)
+{
+  /* This is mostly the elfos.h one.  There doesn't seem to be a way to
+     express this in a mmixal-compatible way.  */
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u ! mmixal-incompatible COMMON\n",
+	   size, align / BITS_PER_UNIT);
+}
+
+/* ASM_OUTPUT_ALIGNED_LOCAL.  */
+
+void
+mmix_asm_output_aligned_local (FILE *stream,
+			       const char *name,
+			       int size,
+			       int align)
+{
+  switch_to_section (data_section);
+
+  ASM_OUTPUT_ALIGN (stream, exact_log2 (align/BITS_PER_UNIT));
+  assemble_name (stream, name);
+  fprintf (stream, "\tLOC @+%d\n", size);
+}
+
+/* ASM_OUTPUT_LABEL.  */
+
+void
+mmix_asm_output_label (FILE *stream, const char *name)
+{
+  assemble_name (stream, name);
+  fprintf (stream, "\tIS @\n");
+}
+
+/* ASM_OUTPUT_INTERNAL_LABEL.  */
+
+void
+mmix_asm_output_internal_label (FILE *stream, const char *name)
+{
+  assemble_name_raw (stream, name);
+  fprintf (stream, "\tIS @\n");
+}
+
+/* ASM_DECLARE_REGISTER_GLOBAL.  */
+
+void
+mmix_asm_declare_register_global (FILE *stream ATTRIBUTE_UNUSED,
+				  tree decl ATTRIBUTE_UNUSED,
+				  int regno ATTRIBUTE_UNUSED,
+				  const char *name ATTRIBUTE_UNUSED)
+{
+  /* Nothing to do here, but there *will* be, therefore the framework is
+     here.  */
+}
+
+/* ASM_WEAKEN_LABEL.  */
+
+void
+mmix_asm_weaken_label (FILE *stream ATTRIBUTE_UNUSED,
+		       const char *name ATTRIBUTE_UNUSED)
+{
+  fprintf (stream, "\t.weak ");
+  assemble_name (stream, name);
+  fprintf (stream, " ! mmixal-incompatible\n");
+}
+
+/* MAKE_DECL_ONE_ONLY.  */
+
+void
+mmix_make_decl_one_only (tree decl)
+{
+  DECL_WEAK (decl) = 1;
+}
+
+/* ASM_OUTPUT_LABELREF.
+   Strip GCC's '*' and our own '@'.  No order is assumed.  */
+
+void
+mmix_asm_output_labelref (FILE *stream, const char *name)
+{
+  int is_extern = 1;
+
+  for (; (*name == '@' || *name == '*'); name++)
+    if (*name == '@')
+      is_extern = 0;
+
+  asm_fprintf (stream, "%s%U%s",
+	       is_extern && TARGET_TOPLEVEL_SYMBOLS ? ":" : "",
+	       name);
+}
+
+/* ASM_OUTPUT_DEF.  */
+
+void
+mmix_asm_output_def (FILE *stream, const char *name, const char *value)
+{
+  assemble_name (stream, name);
+  fprintf (stream, "\tIS ");
+  assemble_name (stream, value);
+  fputc ('\n', stream);
+}
+
+/* PRINT_OPERAND.  */
+
+void
+mmix_print_operand (FILE *stream, rtx x, int code)
+{
+  /* When we add support for different codes later, we can, when needed,
+     drop through to the main handler with a modified operand.  */
+  rtx modified_x = x;
+  int regno = x != NULL_RTX && REG_P (x) ? REGNO (x) : 0;
+
+  switch (code)
+    {
+      /* Unrelated codes are in alphabetic order.  */
+
+    case '+':
+      /* For conditional branches, output "P" for a probable branch.  */
+      if (TARGET_BRANCH_PREDICT)
+	{
+	  x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+	  if (x && INTVAL (XEXP (x, 0)) > REG_BR_PROB_BASE / 2)
+	    putc ('P', stream);
+	}
+      return;
+
+    case '.':
+      /* For the %d in POP %d,0.  */
+      fprintf (stream, "%d", MMIX_POP_ARGUMENT ());
+      return;
+
+    case 'B':
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("MMIX Internal: Expected a CONST_INT, not this", x);
+      fprintf (stream, "%d", (int) (INTVAL (x) & 0xff));
+      return;
+
+    case 'H':
+      /* Highpart.  Must be general register, and not the last one, as
+	 that one cannot be part of a consecutive register pair.  */
+      if (regno > MMIX_LAST_GENERAL_REGISTER - 1)
+	internal_error ("MMIX Internal: Bad register: %d", regno);
+
+      /* This is big-endian, so the high-part is the first one.  */
+      fprintf (stream, "%s", reg_names[MMIX_OUTPUT_REGNO (regno)]);
+      return;
+
+    case 'L':
+      /* Lowpart.  Must be CONST_INT or general register, and not the last
+	 one, as that one cannot be part of a consecutive register pair.  */
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  fprintf (stream, "#%lx",
+		   (unsigned long) (INTVAL (x)
+				    & ((unsigned int) 0x7fffffff * 2 + 1)));
+	  return;
+	}
+
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  output_addr_const (stream, x);
+	  return;
+	}
+
+      if (regno > MMIX_LAST_GENERAL_REGISTER - 1)
+	internal_error ("MMIX Internal: Bad register: %d", regno);
+
+      /* This is big-endian, so the low-part is + 1.  */
+      fprintf (stream, "%s", reg_names[MMIX_OUTPUT_REGNO (regno) + 1]);
+      return;
+
+      /* Can't use 'a' because that's a generic modifier for address
+	 output.  */
+    case 'A':
+      mmix_output_shiftvalue_op_from_str (stream, "ANDN",
+					  ~(unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 'i':
+      mmix_output_shiftvalue_op_from_str (stream, "INC",
+					  (unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 'o':
+      mmix_output_shiftvalue_op_from_str (stream, "OR",
+					  (unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 's':
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  (unsigned HOST_WIDEST_INT)
+					  mmix_intval (x));
+      return;
+
+    case 'd':
+    case 'D':
+      mmix_output_condition (stream, x, (code == 'D'));
+      return;
+
+    case 'e':
+      /* Output an extra "e" to make fcmpe, fune.  */
+      if (TARGET_FCMP_EPSILON)
+	fprintf (stream, "e");
+      return;
+
+    case 'm':
+      /* Output the number minus 1.  */
+      if (GET_CODE (x) != CONST_INT)
+	{
+	  fatal_insn ("MMIX Internal: Bad value for 'm', not a CONST_INT",
+		      x);
+	}
+      fprintf (stream, HOST_WIDEST_INT_PRINT_DEC,
+	       (HOST_WIDEST_INT) (mmix_intval (x) - 1));
+      return;
+
+    case 'p':
+      /* Store the number of registers we want to save.  This was setup
+	 by the prologue.  The actual operand contains the number of
+	 registers to pass, but we don't use it currently.  Anyway, we
+	 need to output the number of saved registers here.  */
+      fprintf (stream, "%d",
+	       cfun->machine->highest_saved_stack_register + 1);
+      return;
+
+    case 'r':
+      /* Store the register to output a constant to.  */
+      if (! REG_P (x))
+	fatal_insn ("MMIX Internal: Expected a register, not this", x);
+      mmix_output_destination_register = MMIX_OUTPUT_REGNO (regno);
+      return;
+
+    case 'I':
+      /* Output the constant.  Note that we use this for floats as well.  */
+      if (GET_CODE (x) != CONST_INT
+	  && (GET_CODE (x) != CONST_DOUBLE
+	      || (GET_MODE (x) != VOIDmode && GET_MODE (x) != DFmode
+		  && GET_MODE (x) != SFmode)))
+	fatal_insn ("MMIX Internal: Expected a constant, not this", x);
+      mmix_output_register_setting (stream,
+				    mmix_output_destination_register,
+				    mmix_intval (x), 0);
+      return;
+
+    case 'U':
+      /* An U for unsigned, if TARGET_ZERO_EXTEND.  Ignore the operand.  */
+      if (TARGET_ZERO_EXTEND)
+	putc ('U', stream);
+      return;
+
+    case 'v':
+      mmix_output_shifted_value (stream, (HOST_WIDEST_INT) mmix_intval (x));
+      return;
+
+    case 'V':
+      mmix_output_shifted_value (stream, (HOST_WIDEST_INT) ~mmix_intval (x));
+      return;
+
+    case 'W':
+      if (GET_CODE (x) != CONST_INT)
+	fatal_insn ("MMIX Internal: Expected a CONST_INT, not this", x);
+      fprintf (stream, "#%x", (int) (INTVAL (x) & 0xffff));
+      return;
+
+    case 0:
+      /* Nothing to do.  */
+      break;
+
+    default:
+      /* Presumably there's a missing case above if we get here.  */
+      internal_error ("MMIX Internal: Missing %qc case in mmix_print_operand", code);
+    }
+
+  switch (GET_CODE (modified_x))
+    {
+    case REG:
+      regno = REGNO (modified_x);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	internal_error ("MMIX Internal: Bad register: %d", regno);
+      fprintf (stream, "%s", reg_names[MMIX_OUTPUT_REGNO (regno)]);
+      return;
+
+    case MEM:
+      output_address (XEXP (modified_x, 0));
+      return;
+
+    case CONST_INT:
+      /* For -2147483648, mmixal complains that the constant does not fit
+	 in 4 bytes, so let's output it as hex.  Take care to handle hosts
+	 where HOST_WIDE_INT is longer than an int.
+
+	 Print small constants +-255 using decimal.  */
+
+      if (INTVAL (modified_x) > -256 && INTVAL (modified_x) < 256)
+	fprintf (stream, "%d", (int) (INTVAL (modified_x)));
+      else
+	fprintf (stream, "#%x",
+		 (int) (INTVAL (modified_x)) & (unsigned int) ~0);
+      return;
+
+    case CONST_DOUBLE:
+      /* Do somewhat as CONST_INT.  */
+      mmix_output_octa (stream, mmix_intval (modified_x), 0);
+      return;
+
+    case CONST:
+      output_addr_const (stream, modified_x);
+      return;
+
+    default:
+      /* No need to test for all strange things.  Let output_addr_const do
+	 it for us.  */
+      if (CONSTANT_P (modified_x)
+	  /* Strangely enough, this is not included in CONSTANT_P.
+	     FIXME: Ask/check about sanity here.  */
+	  || GET_CODE (modified_x) == CODE_LABEL)
+	{
+	  output_addr_const (stream, modified_x);
+	  return;
+	}
+
+      /* We need the original here.  */
+      fatal_insn ("MMIX Internal: Cannot decode this operand", x);
+    }
+}
+
+/* PRINT_OPERAND_PUNCT_VALID_P.  */
+
+int
+mmix_print_operand_punct_valid_p (int code ATTRIBUTE_UNUSED)
+{
+  /* A '+' is used for branch prediction, similar to other ports.  */
+  return code == '+'
+    /* A '.' is used for the %d in the POP %d,0 return insn.  */
+    || code == '.';
+}
+
+/* PRINT_OPERAND_ADDRESS.  */
+
+void
+mmix_print_operand_address (FILE *stream, rtx x)
+{
+  if (REG_P (x))
+    {
+      /* I find the generated assembly code harder to read without
+	 the ",0".  */
+      fprintf (stream, "%s,0", reg_names[MMIX_OUTPUT_REGNO (REGNO (x))]);
+      return;
+    }
+  else if (GET_CODE (x) == PLUS)
+    {
+      rtx x1 = XEXP (x, 0);
+      rtx x2 = XEXP (x, 1);
+
+      if (REG_P (x1))
+	{
+	  fprintf (stream, "%s,", reg_names[MMIX_OUTPUT_REGNO (REGNO (x1))]);
+
+	  if (REG_P (x2))
+	    {
+	      fprintf (stream, "%s",
+		       reg_names[MMIX_OUTPUT_REGNO (REGNO (x2))]);
+	      return;
+	    }
+	  else if (GET_CODE (x2) == CONST_INT
+		   && CONST_OK_FOR_LETTER_P (INTVAL (x2), 'I'))
+	    {
+	      output_addr_const (stream, x2);
+	      return;
+	    }
+	}
+    }
+
+  if (TARGET_BASE_ADDRESSES && mmix_legitimate_constant_p (x))
+    {
+      output_addr_const (stream, x);
+      return;
+    }
+
+  fatal_insn ("MMIX Internal: This is not a recognized address", x);
+}
+
+/* ASM_OUTPUT_REG_PUSH.  */
+
+void
+mmix_asm_output_reg_push (FILE *stream, int regno)
+{
+  fprintf (stream, "\tSUBU %s,%s,8\n\tSTOU %s,%s,0\n",
+	   reg_names[MMIX_STACK_POINTER_REGNUM],
+	   reg_names[MMIX_STACK_POINTER_REGNUM],
+	   reg_names[MMIX_OUTPUT_REGNO (regno)],
+	   reg_names[MMIX_STACK_POINTER_REGNUM]);
+}
+
+/* ASM_OUTPUT_REG_POP.  */
+
+void
+mmix_asm_output_reg_pop (FILE *stream, int regno)
+{
+  fprintf (stream, "\tLDOU %s,%s,0\n\tINCL %s,8\n",
+	   reg_names[MMIX_OUTPUT_REGNO (regno)],
+	   reg_names[MMIX_STACK_POINTER_REGNUM],
+	   reg_names[MMIX_STACK_POINTER_REGNUM]);
+}
+
+/* ASM_OUTPUT_ADDR_DIFF_ELT.  */
+
+void
+mmix_asm_output_addr_diff_elt (FILE *stream,
+			       rtx body ATTRIBUTE_UNUSED,
+			       int value,
+			       int rel)
+{
+  fprintf (stream, "\tTETRA L%d-L%d\n", value, rel);
+}
+
+/* ASM_OUTPUT_ADDR_VEC_ELT.  */
+
+void
+mmix_asm_output_addr_vec_elt (FILE *stream, int value)
+{
+  fprintf (stream, "\tOCTA L:%d\n", value);
+}
+
+/* ASM_OUTPUT_SKIP.  */
+
+void
+mmix_asm_output_skip (FILE *stream, int nbytes)
+{
+  fprintf (stream, "\tLOC @+%d\n", nbytes);
+}
+
+/* ASM_OUTPUT_ALIGN.  */
+
+void
+mmix_asm_output_align (FILE *stream, int power)
+{
+  /* We need to record the needed alignment of this section in the object,
+     so we have to output an alignment directive.  Use a .p2align (not
+     .align) so people will never have to wonder about whether the
+     argument is in number of bytes or the log2 thereof.  We do it in
+     addition to the LOC directive, so nothing needs tweaking when
+     copy-pasting assembly into mmixal.  */
+ fprintf (stream, "\t.p2align %d\n", power);
+ fprintf (stream, "\tLOC @+(%d-@)&%d\n", 1 << power, (1 << power) - 1);
+}
+
+/* DBX_REGISTER_NUMBER.  */
+
+unsigned
+mmix_dbx_register_number (unsigned regno)
+{
+  /* Adjust the register number to the one it will be output as, dammit.
+     It'd be nice if we could check the assumption that we're filling a
+     gap, but every register between the last saved register and parameter
+     registers might be a valid parameter register.  */
+  regno = MMIX_OUTPUT_REGNO (regno);
+
+  /* We need to renumber registers to get the number of the return address
+     register in the range 0..255.  It is also space-saving if registers
+     mentioned in the call-frame information (which uses this function by
+     defaulting DWARF_FRAME_REGNUM to DBX_REGISTER_NUMBER) are numbered
+     0 .. 63.  So map 224 .. 256+15 -> 0 .. 47 and 0 .. 223 -> 48..223+48.  */
+  return regno >= 224 ? (regno - 224) : (regno + 48);
+}
+
+/* End of target macro support functions.
+
+   Now the MMIX port's own functions.  First the exported ones.  */
+
+/* Wrapper for get_hard_reg_initial_val since integrate.h isn't included
+   from insn-emit.c.  */
+
+rtx
+mmix_get_hard_reg_initial_val (enum machine_mode mode, int regno)
+{
+  return get_hard_reg_initial_val (mode, regno);
+}
+
+/* Nonzero when the function epilogue is simple enough that a single
+   "POP %d,0" should be used even within the function.  */
+
+int
+mmix_use_simple_return (void)
+{
+  int regno;
+
+  int stack_space_to_allocate
+    = (crtl->outgoing_args_size
+       + crtl->args.pretend_args_size
+       + get_frame_size () + 7) & ~7;
+
+  if (!TARGET_USE_RETURN_INSN || !reload_completed)
+    return 0;
+
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    /* Note that we assume that the frame-pointer-register is one of these
+       registers, in which case we don't count it here.  */
+    if ((((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	  && df_regs_ever_live_p (regno) && !call_used_regs[regno]))
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      return 0;
+
+  if (frame_pointer_needed)
+    stack_space_to_allocate += 8;
+
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    stack_space_to_allocate += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    stack_space_to_allocate += 8;
+
+  return stack_space_to_allocate == 0;
+}
+
+
+/* Expands the function prologue into RTX.  */
+
+void
+mmix_expand_prologue (void)
+{
+  HOST_WIDE_INT locals_size = get_frame_size ();
+  int regno;
+  HOST_WIDE_INT stack_space_to_allocate
+    = (crtl->outgoing_args_size
+       + crtl->args.pretend_args_size
+       + locals_size + 7) & ~7;
+  HOST_WIDE_INT offset = -8;
+
+  /* Add room needed to save global non-register-stack registers.  */
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    /* Note that we assume that the frame-pointer-register is one of these
+       registers, in which case we don't count it here.  */
+    if ((((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	  && df_regs_ever_live_p (regno) && !call_used_regs[regno]))
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      stack_space_to_allocate += 8;
+
+  /* If we do have a frame-pointer, add room for it.  */
+  if (frame_pointer_needed)
+    stack_space_to_allocate += 8;
+
+  /* If we have a non-local label, we need to be able to unwind to it, so
+     store the current register stack pointer.  Also store the return
+     address if we do that.  */
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    stack_space_to_allocate += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    /* If we do have a saved return-address slot, add room for it.  */
+    stack_space_to_allocate += 8;
+
+  /* Make sure we don't get an unaligned stack.  */
+  if ((stack_space_to_allocate % 8) != 0)
+    internal_error ("stack frame not a multiple of 8 bytes: %wd",
+		    stack_space_to_allocate);
+
+  if (crtl->args.pretend_args_size)
+    {
+      int mmix_first_vararg_reg
+	= (MMIX_FIRST_INCOMING_ARG_REGNUM
+	   + (MMIX_MAX_ARGS_IN_REGS
+	      - crtl->args.pretend_args_size / 8));
+
+      for (regno
+	     = MMIX_FIRST_INCOMING_ARG_REGNUM + MMIX_MAX_ARGS_IN_REGS - 1;
+	   regno >= mmix_first_vararg_reg;
+	   regno--)
+	{
+	  if (offset < 0)
+	    {
+	      HOST_WIDE_INT stack_chunk
+		= stack_space_to_allocate > (256 - 8)
+		? (256 - 8) : stack_space_to_allocate;
+
+	      mmix_emit_sp_add (-stack_chunk);
+	      offset += stack_chunk;
+	      stack_space_to_allocate -= stack_chunk;
+	    }
+
+	  /* These registers aren't actually saved (as in "will be
+	     restored"), so don't tell DWARF2 they're saved.  */
+	  emit_move_insn (gen_rtx_MEM (DImode,
+				       plus_constant (stack_pointer_rtx,
+						      offset)),
+			  gen_rtx_REG (DImode, regno));
+	  offset -= 8;
+	}
+    }
+
+  /* Store the frame-pointer.  */
+
+  if (frame_pointer_needed)
+    {
+      rtx insn;
+
+      if (offset < 0)
+	{
+	  /* Get 8 less than otherwise, since we need to reach offset + 8.  */
+	  HOST_WIDE_INT stack_chunk
+	    = stack_space_to_allocate > (256 - 8 - 8)
+	    ? (256 - 8 - 8) : stack_space_to_allocate;
+
+	  mmix_emit_sp_add (-stack_chunk);
+
+	  offset += stack_chunk;
+	  stack_space_to_allocate -= stack_chunk;
+	}
+
+      insn = emit_move_insn (gen_rtx_MEM (DImode,
+					  plus_constant (stack_pointer_rtx,
+							 offset)),
+			     hard_frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      insn = emit_insn (gen_adddi3 (hard_frame_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (offset + 8)));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      offset -= 8;
+    }
+
+  if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    {
+      rtx tmpreg, retreg;
+      rtx insn;
+
+      /* Store the return-address, if one is needed on the stack.  We
+	 usually store it in a register when needed, but that doesn't work
+	 with -fexceptions.  */
+
+      if (offset < 0)
+	{
+	  /* Get 8 less than otherwise, since we need to reach offset + 8.  */
+	  HOST_WIDE_INT stack_chunk
+	    = stack_space_to_allocate > (256 - 8 - 8)
+	    ? (256 - 8 - 8) : stack_space_to_allocate;
+
+	  mmix_emit_sp_add (-stack_chunk);
+
+	  offset += stack_chunk;
+	  stack_space_to_allocate -= stack_chunk;
+	}
+
+      tmpreg = gen_rtx_REG (DImode, 255);
+      retreg = gen_rtx_REG (DImode, MMIX_rJ_REGNUM);
+
+      /* Dwarf2 code is confused by the use of a temporary register for
+	 storing the return address, so we have to express it as a note,
+	 which we attach to the actual store insn.  */
+      emit_move_insn (tmpreg, retreg);
+
+      insn = emit_move_insn (gen_rtx_MEM (DImode,
+					  plus_constant (stack_pointer_rtx,
+							 offset)),
+			     tmpreg);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode,
+				 gen_rtx_MEM (DImode,
+					      plus_constant (stack_pointer_rtx,
+							     offset)),
+				 retreg));
+
+      offset -= 8;
+    }
+  else if (MMIX_CFUN_HAS_LANDING_PAD)
+    offset -= 8;
+
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    {
+      /* Store the register defining the numbering of local registers, so
+	 we know how long to unwind the register stack.  */
+
+      if (offset < 0)
+	{
+	  /* Get 8 less than otherwise, since we need to reach offset + 8.  */
+	  HOST_WIDE_INT stack_chunk
+	    = stack_space_to_allocate > (256 - 8 - 8)
+	    ? (256 - 8 - 8) : stack_space_to_allocate;
+
+	  mmix_emit_sp_add (-stack_chunk);
+
+	  offset += stack_chunk;
+	  stack_space_to_allocate -= stack_chunk;
+	}
+
+      /* We don't tell dwarf2 about this one; we just have it to unwind
+	 the register stack at landing pads.  FIXME: It's a kludge because
+	 we can't describe the effect of the PUSHJ and PUSHGO insns on the
+	 register stack at the moment.  Best thing would be to handle it
+	 like stack-pointer offsets.  Better: some hook into dwarf2out.c
+	 to produce DW_CFA_expression:s that specify the increment of rO,
+	 and unwind it at eh_return (preferred) or at the landing pad.
+	 Then saves to $0..$G-1 could be specified through that register.  */
+
+      emit_move_insn (gen_rtx_REG (DImode, 255),
+		      gen_rtx_REG (DImode,
+				   MMIX_rO_REGNUM));
+      emit_move_insn (gen_rtx_MEM (DImode,
+				   plus_constant (stack_pointer_rtx, offset)),
+		      gen_rtx_REG (DImode, 255));
+      offset -= 8;
+    }
+
+  /* After the return-address and the frame-pointer, we have the local
+     variables.  They're the ones that may have an "unaligned" size.  */
+  offset -= (locals_size + 7) & ~7;
+
+  /* Now store all registers that are global, i.e. not saved by the
+     register file machinery.
+
+     It is assumed that the frame-pointer is one of these registers, so it
+     is explicitly excluded in the count.  */
+
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    if (((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	 && df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      {
+	rtx insn;
+
+	if (offset < 0)
+	  {
+	    HOST_WIDE_INT stack_chunk
+	      = (stack_space_to_allocate > (256 - offset - 8)
+		 ? (256 - offset - 8) : stack_space_to_allocate);
+
+	    mmix_emit_sp_add (-stack_chunk);
+	    offset += stack_chunk;
+	    stack_space_to_allocate -= stack_chunk;
+	  }
+
+	insn = emit_move_insn (gen_rtx_MEM (DImode,
+					    plus_constant (stack_pointer_rtx,
+							   offset)),
+			       gen_rtx_REG (DImode, regno));
+	RTX_FRAME_RELATED_P (insn) = 1;
+	offset -= 8;
+      }
+
+  /* Finally, allocate room for outgoing args and local vars if room
+     wasn't allocated above.  */
+  if (stack_space_to_allocate)
+    mmix_emit_sp_add (-stack_space_to_allocate);
+}
+
+/* Expands the function epilogue into RTX.  */
+
+void
+mmix_expand_epilogue (void)
+{
+  HOST_WIDE_INT locals_size = get_frame_size ();
+  int regno;
+  HOST_WIDE_INT stack_space_to_deallocate
+    = (crtl->outgoing_args_size
+       + crtl->args.pretend_args_size
+       + locals_size + 7) & ~7;
+
+  /* The first address to access is beyond the outgoing_args area.  */
+  HOST_WIDE_INT offset = crtl->outgoing_args_size;
+
+  /* Add the space for global non-register-stack registers.
+     It is assumed that the frame-pointer register can be one of these
+     registers, in which case it is excluded from the count when needed.  */
+  for (regno = 255;
+       regno >= MMIX_FIRST_GLOBAL_REGNUM;
+       regno--)
+    if (((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	 && df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      stack_space_to_deallocate += 8;
+
+  /* Add in the space for register stack-pointer.  If so, always add room
+     for the saved PC.  */
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    stack_space_to_deallocate += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    /* If we have a saved return-address slot, add it in.  */
+    stack_space_to_deallocate += 8;
+
+  /* Add in the frame-pointer.  */
+  if (frame_pointer_needed)
+    stack_space_to_deallocate += 8;
+
+  /* Make sure we don't get an unaligned stack.  */
+  if ((stack_space_to_deallocate % 8) != 0)
+    internal_error ("stack frame not a multiple of octabyte: %wd",
+		    stack_space_to_deallocate);
+
+  /* We will add back small offsets to the stack pointer as we go.
+     First, we restore all registers that are global, i.e. not saved by
+     the register file machinery.  */
+
+  for (regno = MMIX_FIRST_GLOBAL_REGNUM;
+       regno <= 255;
+       regno++)
+    if (((regno != MMIX_FRAME_POINTER_REGNUM || !frame_pointer_needed)
+	 && df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	|| IS_MMIX_EH_RETURN_DATA_REG (regno))
+      {
+	if (offset > 255)
+	  {
+	    mmix_emit_sp_add (offset);
+	    stack_space_to_deallocate -= offset;
+	    offset = 0;
+	  }
+
+	emit_move_insn (gen_rtx_REG (DImode, regno),
+			gen_rtx_MEM (DImode,
+				     plus_constant (stack_pointer_rtx,
+						    offset)));
+	offset += 8;
+      }
+
+  /* Here is where the local variables were.  As in the prologue, they
+     might be of an unaligned size.  */
+  offset += (locals_size + 7) & ~7;
+
+  /* The saved register stack pointer is just below the frame-pointer
+     register.  We don't need to restore it "manually"; the POP
+     instruction does that.  */
+  if (MMIX_CFUN_HAS_LANDING_PAD)
+    offset += 16;
+  else if (MMIX_CFUN_NEEDS_SAVED_EH_RETURN_ADDRESS)
+    /* The return-address slot is just below the frame-pointer register.
+       We don't need to restore it because we don't really use it.  */
+    offset += 8;
+
+  /* Get back the old frame-pointer-value.  */
+  if (frame_pointer_needed)
+    {
+      if (offset > 255)
+	{
+	  mmix_emit_sp_add (offset);
+
+	  stack_space_to_deallocate -= offset;
+	  offset = 0;
+	}
+
+      emit_move_insn (hard_frame_pointer_rtx,
+		      gen_rtx_MEM (DImode,
+				   plus_constant (stack_pointer_rtx,
+						  offset)));
+      offset += 8;
+    }
+
+  /* We do not need to restore pretended incoming args, just add back
+     offset to sp.  */
+  if (stack_space_to_deallocate != 0)
+    mmix_emit_sp_add (stack_space_to_deallocate);
+
+  if (crtl->calls_eh_return)
+    /* Adjust the (normal) stack-pointer to that of the receiver.
+       FIXME: It would be nice if we could also adjust the register stack
+       here, but we need to express it through DWARF 2 too.  */
+    emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   gen_rtx_REG (DImode,
+					MMIX_EH_RETURN_STACKADJ_REGNUM)));
+}
+
+/* Output an optimal sequence for setting a register to a specific
+   constant.  Used in an alternative for const_ints in movdi, and when
+   using large stack-frame offsets.
+
+   Use do_begin_end to say if a line-starting TAB and newline before the
+   first insn and after the last insn is wanted.  */
+
+void
+mmix_output_register_setting (FILE *stream,
+			      int regno,
+			      HOST_WIDEST_INT value,
+			      int do_begin_end)
+{
+  if (do_begin_end)
+    fprintf (stream, "\t");
+
+  if (mmix_shiftable_wyde_value ((unsigned HOST_WIDEST_INT) value))
+    {
+      /* First, the one-insn cases.  */
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  (unsigned HOST_WIDEST_INT)
+					  value);
+      fprintf (stream, " %s,", reg_names[regno]);
+      mmix_output_shifted_value (stream, (unsigned HOST_WIDEST_INT) value);
+    }
+  else if (mmix_shiftable_wyde_value (-(unsigned HOST_WIDEST_INT) value))
+    {
+      /* We do this to get a bit more legible assembly code.  The next
+	 alternative is mostly redundant with this.  */
+
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  -(unsigned HOST_WIDEST_INT)
+					  value);
+      fprintf (stream, " %s,", reg_names[regno]);
+      mmix_output_shifted_value (stream, -(unsigned HOST_WIDEST_INT) value);
+      fprintf (stream, "\n\tNEGU %s,0,%s", reg_names[regno],
+	       reg_names[regno]);
+    }
+  else if (mmix_shiftable_wyde_value (~(unsigned HOST_WIDEST_INT) value))
+    {
+      /* Slightly more expensive, the two-insn cases.  */
+
+      /* FIXME: We could of course also test if 0..255-N or ~(N | 1..255)
+	 is shiftable, or any other one-insn transformation of the value.
+	 FIXME: Check first if the value is "shiftable" by two loading
+	 with two insns, since it makes more readable assembly code (if
+	 anyone else cares).  */
+
+      mmix_output_shiftvalue_op_from_str (stream, "SET",
+					  ~(unsigned HOST_WIDEST_INT)
+					  value);
+      fprintf (stream, " %s,", reg_names[regno]);
+      mmix_output_shifted_value (stream, ~(unsigned HOST_WIDEST_INT) value);
+      fprintf (stream, "\n\tNOR %s,%s,0", reg_names[regno],
+	       reg_names[regno]);
+    }
+  else
+    {
+      /* The generic case.  2..4 insns.  */
+      static const char *const higher_parts[] = {"L", "ML", "MH", "H"};
+      const char *op = "SET";
+      const char *line_begin = "";
+      int insns = 0;
+      int i;
+      HOST_WIDEST_INT tmpvalue = value;
+
+      /* Compute the number of insns needed to output this constant.  */
+      for (i = 0; i < 4 && tmpvalue != 0; i++)
+	{
+	  if (tmpvalue & 65535)
+	    insns++;
+	  tmpvalue >>= 16;
+	}
+      if (TARGET_BASE_ADDRESSES && insns == 3)
+	{
+	  /* The number three is based on a static observation on
+	     ghostscript-6.52.  Two and four are excluded because there
+	     are too many such constants, and each unique constant (maybe
+	     offset by 1..255) were used few times compared to other uses,
+	     e.g. addresses.
+
+	     We use base-plus-offset addressing to force it into a global
+	     register; we just use a "LDA reg,VALUE", which will cause the
+	     assembler and linker to DTRT (for constants as well as
+	     addresses).  */
+	  fprintf (stream, "LDA %s,", reg_names[regno]);
+	  mmix_output_octa (stream, value, 0);
+	}
+      else
+	{
+	  /* Output pertinent parts of the 4-wyde sequence.
+	     Still more to do if we want this to be optimal, but hey...
+	     Note that the zero case has been handled above.  */
+	  for (i = 0; i < 4 && value != 0; i++)
+	    {
+	      if (value & 65535)
+		{
+		  fprintf (stream, "%s%s%s %s,#%x", line_begin, op,
+			   higher_parts[i], reg_names[regno],
+			   (int) (value & 65535));
+		  /* The first one sets the rest of the bits to 0, the next
+		     ones add set bits.  */
+		  op = "INC";
+		  line_begin = "\n\t";
+		}
+
+	      value >>= 16;
+	    }
+	}
+    }
+
+  if (do_begin_end)
+    fprintf (stream, "\n");
+}
+
+/* Return 1 if value is 0..65535*2**(16*N) for N=0..3.
+   else return 0.  */
+
+int
+mmix_shiftable_wyde_value (unsigned HOST_WIDEST_INT value)
+{
+  /* Shift by 16 bits per group, stop when we've found two groups with
+     nonzero bits.  */
+  int i;
+  int has_candidate = 0;
+
+  for (i = 0; i < 4; i++)
+    {
+      if (value & 65535)
+	{
+	  if (has_candidate)
+	    return 0;
+	  else
+	    has_candidate = 1;
+	}
+
+      value >>= 16;
+    }
+
+  return 1;
+}
+
+/* X and Y are two things to compare using CODE.  Return the rtx for
+   the cc-reg in the proper mode.  */
+
+rtx
+mmix_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
+{
+  enum machine_mode ccmode = SELECT_CC_MODE (code, x, y);
+  return gen_reg_rtx (ccmode);
+}
+
+/* Local (static) helper functions.  */
+
+static void
+mmix_emit_sp_add (HOST_WIDE_INT offset)
+{
+  rtx insn;
+
+  if (offset < 0)
+    {
+      /* Negative stack-pointer adjustments are allocations and appear in
+	 the prologue only.  We mark them as frame-related so unwind and
+	 debug info is properly emitted for them.  */
+      if (offset > -255)
+	insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+				      stack_pointer_rtx,
+				      GEN_INT (offset)));
+      else
+	{
+	  rtx tmpr = gen_rtx_REG (DImode, 255);
+	  RTX_FRAME_RELATED_P (emit_move_insn (tmpr, GEN_INT (offset))) = 1;
+	  insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+					stack_pointer_rtx, tmpr));
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      /* Positive adjustments are in the epilogue only.  Don't mark them
+	 as "frame-related" for unwind info.  */
+      if (CONST_OK_FOR_LETTER_P (offset, 'L'))
+	emit_insn (gen_adddi3 (stack_pointer_rtx,
+			       stack_pointer_rtx,
+			       GEN_INT (offset)));
+      else
+	{
+	  rtx tmpr = gen_rtx_REG (DImode, 255);
+	  emit_move_insn (tmpr, GEN_INT (offset));
+	  insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+					stack_pointer_rtx, tmpr));
+	}
+    }
+}
+
+/* Print operator suitable for doing something with a shiftable
+   wyde.  The type of operator is passed as an asm output modifier.  */
+
+static void
+mmix_output_shiftvalue_op_from_str (FILE *stream,
+				    const char *mainop,
+				    HOST_WIDEST_INT value)
+{
+  static const char *const op_part[] = {"L", "ML", "MH", "H"};
+  int i;
+
+  if (! mmix_shiftable_wyde_value (value))
+    {
+      char s[sizeof ("0xffffffffffffffff")];
+      sprintf (s, HOST_WIDEST_INT_PRINT_HEX, value);
+      internal_error ("MMIX Internal: %s is not a shiftable int", s);
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      /* We know we're through when we find one-bits in the low
+	 16 bits.  */
+      if (value & 0xffff)
+	{
+	  fprintf (stream, "%s%s", mainop, op_part[i]);
+	  return;
+	}
+      value >>= 16;
+    }
+
+  /* No bits set?  Then it must have been zero.  */
+  fprintf (stream, "%sL", mainop);
+}
+
+/* Print a 64-bit value, optionally prefixed by assembly pseudo.  */
+
+static void
+mmix_output_octa (FILE *stream, HOST_WIDEST_INT value, int do_begin_end)
+{
+  /* Snipped from final.c:output_addr_const.  We need to avoid the
+     presumed universal "0x" prefix.  We can do it by replacing "0x" with
+     "#0" here; we must avoid a space in the operands and no, the zero
+     won't cause the number to be assumed in octal format.  */
+  char hex_format[sizeof (HOST_WIDEST_INT_PRINT_HEX)];
+
+  if (do_begin_end)
+    fprintf (stream, "\tOCTA ");
+
+  strcpy (hex_format, HOST_WIDEST_INT_PRINT_HEX);
+  hex_format[0] = '#';
+  hex_format[1] = '0';
+
+  /* Provide a few alternative output formats depending on the number, to
+     improve legibility of assembler output.  */
+  if ((value < (HOST_WIDEST_INT) 0 && value > (HOST_WIDEST_INT) -10000)
+      || (value >= (HOST_WIDEST_INT) 0 && value <= (HOST_WIDEST_INT) 16384))
+    fprintf (stream, "%d", (int) value);
+  else if (value > (HOST_WIDEST_INT) 0
+	   && value < ((HOST_WIDEST_INT) 1 << 31) * 2)
+    fprintf (stream, "#%x", (unsigned int) value);
+  else
+    fprintf (stream, hex_format, value);
+
+  if (do_begin_end)
+    fprintf (stream, "\n");
+}
+
+/* Print the presumed shiftable wyde argument shifted into place (to
+   be output with an operand).  */
+
+static void
+mmix_output_shifted_value (FILE *stream, HOST_WIDEST_INT value)
+{
+  int i;
+
+  if (! mmix_shiftable_wyde_value (value))
+    {
+      char s[16+2+1];
+      sprintf (s, HOST_WIDEST_INT_PRINT_HEX, value);
+      internal_error ("MMIX Internal: %s is not a shiftable int", s);
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      /* We know we're through when we find one-bits in the low 16 bits.  */
+      if (value & 0xffff)
+	{
+	  fprintf (stream, "#%x", (int) (value & 0xffff));
+	  return;
+	}
+
+    value >>= 16;
+  }
+
+  /* No bits set?  Then it must have been zero.  */
+  fprintf (stream, "0");
+}
+
+/* Output an MMIX condition name corresponding to an operator
+   and operands:
+   (comparison_operator [(comparison_operator ...) (const_int 0)])
+   which means we have to look at *two* operators.
+
+   The argument "reversed" refers to reversal of the condition (not the
+   same as swapping the arguments).  */
+
+static void
+mmix_output_condition (FILE *stream, rtx x, int reversed)
+{
+  struct cc_conv
+  {
+    RTX_CODE cc;
+
+    /* The normal output cc-code.  */
+    const char *const normal;
+
+    /* The reversed cc-code, or NULL if invalid.  */
+    const char *const reversed;
+  };
+
+  struct cc_type_conv
+  {
+    enum machine_mode cc_mode;
+
+    /* Terminated with {UNKNOWN, NULL, NULL} */
+    const struct cc_conv *const convs;
+  };
+
+#undef CCEND
+#define CCEND {UNKNOWN, NULL, NULL}
+
+  static const struct cc_conv cc_fun_convs[]
+    = {{ORDERED, "Z", "P"},
+       {UNORDERED, "P", "Z"},
+       CCEND};
+  static const struct cc_conv cc_fp_convs[]
+    = {{GT, "P", NULL},
+       {LT, "N", NULL},
+       CCEND};
+  static const struct cc_conv cc_fpeq_convs[]
+    = {{NE, "Z", "P"},
+       {EQ, "P", "Z"},
+       CCEND};
+  static const struct cc_conv cc_uns_convs[]
+    = {{GEU, "NN", "N"},
+       {GTU, "P", "NP"},
+       {LEU, "NP", "P"},
+       {LTU, "N", "NN"},
+       CCEND};
+  static const struct cc_conv cc_signed_convs[]
+    = {{NE, "NZ", "Z"},
+       {EQ, "Z", "NZ"},
+       {GE, "NN", "N"},
+       {GT, "P", "NP"},
+       {LE, "NP", "P"},
+       {LT, "N", "NN"},
+       CCEND};
+  static const struct cc_conv cc_di_convs[]
+    = {{NE, "NZ", "Z"},
+       {EQ, "Z", "NZ"},
+       {GE, "NN", "N"},
+       {GT, "P", "NP"},
+       {LE, "NP", "P"},
+       {LT, "N", "NN"},
+       {GTU, "NZ", "Z"},
+       {LEU, "Z", "NZ"},
+       CCEND};
+#undef CCEND
+
+  static const struct cc_type_conv cc_convs[]
+    = {{CC_FUNmode, cc_fun_convs},
+       {CC_FPmode, cc_fp_convs},
+       {CC_FPEQmode, cc_fpeq_convs},
+       {CC_UNSmode, cc_uns_convs},
+       {CCmode, cc_signed_convs},
+       {DImode, cc_di_convs}};
+
+  size_t i;
+  int j;
+
+  enum machine_mode mode = GET_MODE (XEXP (x, 0));
+  RTX_CODE cc = GET_CODE (x);
+
+  for (i = 0; i < ARRAY_SIZE (cc_convs); i++)
+    {
+      if (mode == cc_convs[i].cc_mode)
+	{
+	  for (j = 0; cc_convs[i].convs[j].cc != UNKNOWN; j++)
+	    if (cc == cc_convs[i].convs[j].cc)
+	      {
+		const char *mmix_cc
+		  = (reversed ? cc_convs[i].convs[j].reversed
+		     : cc_convs[i].convs[j].normal);
+
+		if (mmix_cc == NULL)
+		  fatal_insn ("MMIX Internal: Trying to output invalidly\
+ reversed condition:", x);
+
+		fprintf (stream, "%s", mmix_cc);
+		return;
+	      }
+
+	  fatal_insn ("MMIX Internal: What's the CC of this?", x);
+	}
+    }
+
+  fatal_insn ("MMIX Internal: What is the CC of this?", x);
+}
+
+/* Return the bit-value for a const_int or const_double.  */
+
+static HOST_WIDEST_INT
+mmix_intval (rtx x)
+{
+  unsigned HOST_WIDEST_INT retval;
+
+  if (GET_CODE (x) == CONST_INT)
+    return INTVAL (x);
+
+  /* We make a little song and dance because converting to long long in
+     gcc-2.7.2 is broken.  I still want people to be able to use it for
+     cross-compilation to MMIX.  */
+  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == VOIDmode)
+    {
+      if (sizeof (HOST_WIDE_INT) < sizeof (HOST_WIDEST_INT))
+	{
+	  retval = (unsigned) CONST_DOUBLE_LOW (x) / 2;
+	  retval *= 2;
+	  retval |= CONST_DOUBLE_LOW (x) & 1;
+
+	  retval |=
+	    (unsigned HOST_WIDEST_INT) CONST_DOUBLE_HIGH (x)
+	      << (HOST_BITS_PER_LONG)/2 << (HOST_BITS_PER_LONG)/2;
+	}
+      else
+	retval = CONST_DOUBLE_HIGH (x);
+
+      return retval;
+    }
+
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE value;
+
+      /* FIXME:  This macro is not in the manual but should be.  */
+      REAL_VALUE_FROM_CONST_DOUBLE (value, x);
+
+      if (GET_MODE (x) == DFmode)
+	{
+	  long bits[2];
+
+	  REAL_VALUE_TO_TARGET_DOUBLE (value, bits);
+
+	  /* The double cast is necessary to avoid getting the long
+	     sign-extended to unsigned long long(!) when they're of
+	     different size (usually 32-bit hosts).  */
+	  return
+	    ((unsigned HOST_WIDEST_INT) (unsigned long) bits[0]
+	     << (unsigned HOST_WIDEST_INT) 32U)
+	    | (unsigned HOST_WIDEST_INT) (unsigned long) bits[1];
+	}
+      else if (GET_MODE (x) == SFmode)
+	{
+	  long bits;
+	  REAL_VALUE_TO_TARGET_SINGLE (value, bits);
+
+	  return (unsigned long) bits;
+	}
+    }
+
+  fatal_insn ("MMIX Internal: This is not a constant:", x);
+}
+
+/* Worker function for TARGET_PROMOTE_FUNCTION_MODE.  */
+
+enum machine_mode
+mmix_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                            enum machine_mode mode,
+                            int *punsignedp ATTRIBUTE_UNUSED,
+                            const_tree fntype ATTRIBUTE_UNUSED,
+                            int for_return)
+{
+  /* Apparently not doing TRT if int < register-size.  FIXME: Perhaps
+     FUNCTION_VALUE and LIBCALL_VALUE needs tweaking as some ports say.  */
+  if (for_return == 1)
+    return mode;
+
+  /* Promotion of modes currently generates slow code, extending before
+     operation, so we do it only for arguments.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < 8)
+    return DImode;
+  else
+    return mode;
+}
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+mmix_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		       int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, MMIX_STRUCT_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_FRAME_POINTER_REQUIRED.
+
+   FIXME: Is this requirement built-in?  Anyway, we should try to get rid
+   of it; we can deduce the value.  */
+
+bool
+mmix_frame_pointer_required (void)
+{
+  return (cfun->has_nonlocal_label);
+}
+
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
new file mode 100644
index 000000000..06632f6e0
--- /dev/null
+++ b/gcc/config/mmix/mmix.h
@@ -0,0 +1,899 @@
+/* Definitions of target machine for GNU compiler, for MMIX.
+   Copyright (C) 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MMIX_H
+#define GCC_MMIX_H
+
+/* First, some local helper macros.  Note that the "default" value of
+   FIXED_REGISTERS, CALL_USED_REGISTERS, REG_ALLOC_ORDER and
+   REG_CLASS_CONTENTS depend on these values.  */
+#define MMIX_RESERVED_GNU_ARG_0_REGNUM 231
+#define MMIX_FIRST_ARG_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 16)
+#define MMIX_FIRST_INCOMING_ARG_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 0)
+#define MMIX_MAX_ARGS_IN_REGS 16
+
+/* FIXME: This one isn't fully implemented yet.  Return values larger than
+   one register are passed by reference in MMIX_STRUCT_VALUE_REGNUM by the
+   caller, except for return values of type "complex".  */
+#define MMIX_MAX_REGS_FOR_VALUE 16
+#define MMIX_RETURN_VALUE_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 15)
+#define MMIX_OUTGOING_RETURN_VALUE_REGNUM \
+  (TARGET_ABI_GNU ? MMIX_RESERVED_GNU_ARG_0_REGNUM : 0)
+#define MMIX_STRUCT_VALUE_REGNUM 251
+#define MMIX_STATIC_CHAIN_REGNUM 252
+#define MMIX_FRAME_POINTER_REGNUM 253
+#define MMIX_STACK_POINTER_REGNUM 254
+#define MMIX_LAST_GENERAL_REGISTER 255
+#define MMIX_INCOMING_RETURN_ADDRESS_REGNUM MMIX_rJ_REGNUM
+#define MMIX_HIMULT_REGNUM 258
+#define MMIX_REMAINDER_REGNUM MMIX_rR_REGNUM
+#define MMIX_ARG_POINTER_REGNUM 261
+#define MMIX_rO_REGNUM 262
+#define MMIX_LAST_STACK_REGISTER_REGNUM 31
+
+/* Four registers; "ideally, these registers should be call-clobbered", so
+   just grab a bunch of the common clobbered registers.  FIXME: Last
+   registers of return-value should be used, with an error if there's a
+   return-value (that collides in size).  */
+#define MMIX_EH_RETURN_DATA_REGNO_START (MMIX_STRUCT_VALUE_REGNUM - 4)
+
+/* Try to keep the definitions from running away on their own.  */
+#if (MMIX_EH_RETURN_DATA_REGNO_START \
+     != MMIX_RESERVED_GNU_ARG_0_REGNUM + MMIX_MAX_ARGS_IN_REGS)
+ #error MMIX register definition inconsistency
+#endif
+
+#if (MMIX_MAX_REGS_FOR_VALUE + MMIX_MAX_ARGS_IN_REGS > 32)
+ #error MMIX parameters and return values bad, more than 32 registers
+#endif
+
+/* This chosen as "a call-clobbered hard register that is otherwise
+   untouched by the epilogue".  */
+#define MMIX_EH_RETURN_STACKADJ_REGNUM MMIX_STATIC_CHAIN_REGNUM
+
+#ifdef REG_OK_STRICT
+# define MMIX_REG_OK_STRICT 1
+#else
+# define MMIX_REG_OK_STRICT 0
+#endif
+
+#define MMIX_FUNCTION_ARG_SIZE(MODE, TYPE) \
+ ((MODE) != BLKmode ? GET_MODE_SIZE (MODE) : int_size_in_bytes (TYPE))
+
+/* Per-function machine data.  This is normally an opaque type just
+   defined and used in the tm.c file, but we need to see the definition in
+   mmix.md too.  */
+struct GTY(()) machine_function
+ {
+   int has_landing_pad;
+   int highest_saved_stack_register;
+   int in_prologue;
+ };
+
+/* For these target macros, there is no generic documentation here.  You
+   should read `Using and Porting GCC' for that.  Only comments specific
+   to the MMIX target are here.
+
+   There are however references to the specific texinfo node (comments
+   with "Node:"), so there should be little or nothing amiss.  Probably
+   the opposite, since we don't have to care about old littering and
+   soon outdated generic comments.  */
+
+/* Node: Driver */
+
+/* User symbols are in the same name-space as built-in symbols, but we
+   don't need the built-in symbols, so remove those and instead apply
+   stricter operand checking.  Don't warn when expanding insns.  */
+#define ASM_SPEC "-no-predefined-syms -x"
+
+/* Pass on -mset-program-start=N and -mset-data-start=M to the linker.
+   Provide default program start 0x100 unless -mno-set-program-start.
+   Don't do this if linking relocatably, with -r.  For a final link,
+   produce mmo, unless ELF is requested or when linking relocatably.  */
+#define LINK_SPEC \
+ "%{mset-program-start=*:--defsym __.MMIX.start..text=%*}\
+  %{mset-data-start=*:--defsym __.MMIX.start..data=%*}\
+  %{!mset-program-start=*:\
+    %{!mno-set-program-start:\
+     %{!r:--defsym __.MMIX.start..text=0x100}}}\
+  %{!melf:%{!r:-m mmo}}%{melf|r:-m elf64mmix}"
+
+/* FIXME: There's no provision for profiling here.  */
+#define STARTFILE_SPEC  \
+  "crti%O%s crtbegin%O%s"
+
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"
+
+/* Node: Run-time Target */
+
+/* Define __LONG_MAX__, since we're advised not to change glimits.h.  */
+#define TARGET_CPU_CPP_BUILTINS()				\
+  do								\
+    {								\
+      builtin_define ("__mmix__");				\
+      builtin_define ("__MMIX__");				\
+      if (TARGET_ABI_GNU)					\
+	builtin_define ("__MMIX_ABI_GNU__");			\
+      else							\
+	builtin_define ("__MMIX_ABI_MMIXWARE__");		\
+    }								\
+  while (0)
+
+#define TARGET_DEFAULT \
+ (MASK_BRANCH_PREDICT | MASK_BASE_ADDRESSES | MASK_USE_RETURN_INSN)
+
+/* Unfortunately, this must not reference anything in "mmix.c".  */
+#define TARGET_VERSION \
+  fprintf (stderr, " (MMIX)")
+
+
+/* Node: Per-Function Data */
+#define INIT_EXPANDERS mmix_init_expanders ()
+
+
+/* Node: Storage Layout */
+/* I see no bit-field instructions.  Anyway, the common order is from low
+   to high, as the power of two, hence little-endian.  */
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+#define FLOAT_WORDS_BIG_ENDIAN 1
+#define UNITS_PER_WORD 8
+
+/* We need to align everything to 64 bits that can affect the alignment
+   of other types.  Since address N is interpreted in MMIX as (N modulo
+   access_size), we must align.  */
+#define PARM_BOUNDARY 64
+#define STACK_BOUNDARY 64
+#define FUNCTION_BOUNDARY 32
+#define BIGGEST_ALIGNMENT 64
+
+/* This one is only used in the ADA front end.  */
+#define MINIMUM_ATOMIC_ALIGNMENT 8
+
+/* Copied from elfos.h.  */
+#define MAX_OFILE_ALIGNMENT (32768 * 8)
+
+#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \
+ mmix_data_alignment (TYPE, BASIC_ALIGN)
+
+#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \
+ mmix_constant_alignment (CONSTANT, BASIC_ALIGN)
+
+#define LOCAL_ALIGNMENT(TYPE, BASIC_ALIGN) \
+ mmix_local_alignment (TYPE, BASIC_ALIGN)
+
+/* Following other ports, this seems to most commonly be the word-size,
+   so let's do that here too.  */
+#define EMPTY_FIELD_BOUNDARY 64
+
+/* We chose to have this low solely for similarity with the alpha.  It has
+   nothing to do with passing the tests dg/c99-scope-2 and
+   execute/align-1.c.  Nothing.  Though the tests seem wrong.  Padding of
+   the structure is automatically added to get alignment when needed if we
+   set this to just byte-boundary.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* The lower bits are ignored.  */
+#define STRICT_ALIGNMENT 1
+
+
+/* Node: Type Layout */
+
+/* It might seem more natural to have 64-bit ints on a 64-bit machine,
+   but then an occasional MMIX programmer needs to know how to put a lot
+   of __attribute__ stuff to get to the 8, 16 and 32-bit modes rather
+   than the "intuitive" char, short and int types.  */
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+
+/* Node: Register Basics */
+/* We tell GCC about all 256 general registers, and we also include
+   rD, rE, rH, rJ, rR and rO (in that order) so we can describe what insns
+   clobber them.  We use a faked register for the argument pointer.  It is
+   always eliminated towards the frame-pointer or the stack-pointer, never
+   output in assembly.  Any fixed register would do for this, like $255,
+   but future debugging is easier when using a separate register.  It
+   counts as a global register for pseudorandom reasons.  */
+#define FIRST_PSEUDO_REGISTER 263
+
+/* We treat general registers with no assigned purpose as fixed.  The
+   stack pointer, $254, is also fixed.  Register $255 is referred to as a
+   temporary register in the MMIX papers, and used as such in mmixal, so
+   it should not be used as a stack pointer.  We set it to fixed, and use
+   it "manually" at times of despair.  */
+#define FIXED_REGISTERS \
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, \
+   1, 1, 0, 0, 0, 1, 1 \
+ }
+
+/* General registers are fixed and therefore "historically" marked
+   call-used.  (FIXME: This has changed).  Registers $15..$31 are
+   call-clobbered; we'll put arguments in $16 and up, and we need $15 for
+   the MMIX register-stack "hole".  */
+#define CALL_USED_REGISTERS \
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, \
+   1, 1, 1, 1, 1, 1, 1 \
+ }
+
+#define INCOMING_REGNO(OUT) mmix_opposite_regno (OUT, 0)
+
+#define OUTGOING_REGNO(IN) mmix_opposite_regno (IN, 1)
+
+/* Defining LOCAL_REGNO is necessary in presence of prologue/epilogue,
+   else GCC will be confused that those registers aren't saved and
+   restored.  */
+#define LOCAL_REGNO(REGNO) mmix_local_regno (REGNO)
+
+/* Node: Allocation Order */
+
+/* We should allocate registers from 0 to 31 by increasing number, because
+   I think that's what people expect.  Beyond that, just use
+   call-clobbered global registers first, then call-clobbered special
+   registers.  Last, the fixed registers.  */
+#define MMIX_MMIXWARE_ABI_REG_ALLOC_ORDER	\
+ { 0, 1, 2, 3, 4, 5, 6, 7,			\
+   8, 9, 10, 11, 12, 13, 14, 15,		\
+   16, 17, 18, 19, 20, 21, 22, 23,		\
+   24, 25, 26, 27, 28, 29, 30, 31,    		\
+						\
+   252, 251, 250, 249, 248, 247, 		\
+						\
+   253,						\
+						\
+   258, 260, 259,				\
+						\
+   32, 33, 34, 35, 36, 37, 38, 39,		\
+   40, 41, 42, 43, 44, 45, 46, 47,		\
+   48, 49, 50, 51, 52, 53, 54, 55,		\
+   56, 57, 58, 59, 60, 61, 62, 63,		\
+   64, 65, 66, 67, 68, 69, 70, 71,		\
+   72, 73, 74, 75, 76, 77, 78, 79,		\
+   80, 81, 82, 83, 84, 85, 86, 87,		\
+   88, 89, 90, 91, 92, 93, 94, 95,		\
+   96, 97, 98, 99, 100, 101, 102, 103,		\
+   104, 105, 106, 107, 108, 109, 110, 111,	\
+   112, 113, 114, 115, 116, 117, 118, 119,	\
+   120, 121, 122, 123, 124, 125, 126, 127,	\
+   128, 129, 130, 131, 132, 133, 134, 135,	\
+   136, 137, 138, 139, 140, 141, 142, 143,	\
+   144, 145, 146, 147, 148, 149, 150, 151,	\
+   152, 153, 154, 155, 156, 157, 158, 159,	\
+   160, 161, 162, 163, 164, 165, 166, 167,	\
+   168, 169, 170, 171, 172, 173, 174, 175,	\
+   176, 177, 178, 179, 180, 181, 182, 183,	\
+   184, 185, 186, 187, 188, 189, 190, 191,	\
+   192, 193, 194, 195, 196, 197, 198, 199,	\
+   200, 201, 202, 203, 204, 205, 206, 207,	\
+   208, 209, 210, 211, 212, 213, 214, 215,	\
+   216, 217, 218, 219, 220, 221, 222, 223,	\
+   224, 225, 226, 227, 228, 229, 230, 231,	\
+   232, 233, 234, 235, 236, 237, 238, 239,	\
+   240, 241, 242, 243, 244, 245, 246,		\
+						\
+   254, 255, 256, 257, 261, 262			\
+ }
+
+/* As a convenience, we put this nearby, for ease of comparison.
+   First, call-clobbered registers in reverse order of assignment as
+   parameters (also the top ones; not because they're parameters, but
+   for continuity).
+
+   Second, saved registers that go on the register-stack.
+
+   Third, special registers rH, rR and rJ.  They should not normally be
+   allocated, but since they're call-clobbered, it is cheaper to use one
+   of them than using a call-saved register for a call-clobbered use,
+   assuming it is referenced a very limited number of times.  Other global
+   and fixed registers come next; they are never allocated.  */
+#define MMIX_GNU_ABI_REG_ALLOC_ORDER		\
+ { 252, 251, 250, 249, 248, 247, 246,		\
+   245, 244, 243, 242, 241, 240, 239, 238,	\
+   237, 236, 235, 234, 233, 232, 231,		\
+						\
+   0, 1, 2, 3, 4, 5, 6, 7,			\
+   8, 9, 10, 11, 12, 13, 14, 15,		\
+   16, 17, 18, 19, 20, 21, 22, 23,		\
+   24, 25, 26, 27, 28, 29, 30, 31,		\
+						\
+   253,						\
+						\
+   258, 260, 259,				\
+						\
+   32, 33, 34, 35, 36, 37, 38, 39,		\
+   40, 41, 42, 43, 44, 45, 46, 47,		\
+   48, 49, 50, 51, 52, 53, 54, 55,		\
+   56, 57, 58, 59, 60, 61, 62, 63,		\
+   64, 65, 66, 67, 68, 69, 70, 71,		\
+   72, 73, 74, 75, 76, 77, 78, 79,		\
+   80, 81, 82, 83, 84, 85, 86, 87,		\
+   88, 89, 90, 91, 92, 93, 94, 95,		\
+   96, 97, 98, 99, 100, 101, 102, 103,		\
+   104, 105, 106, 107, 108, 109, 110, 111,	\
+   112, 113, 114, 115, 116, 117, 118, 119,	\
+   120, 121, 122, 123, 124, 125, 126, 127,	\
+   128, 129, 130, 131, 132, 133, 134, 135,	\
+   136, 137, 138, 139, 140, 141, 142, 143,	\
+   144, 145, 146, 147, 148, 149, 150, 151,	\
+   152, 153, 154, 155, 156, 157, 158, 159,	\
+   160, 161, 162, 163, 164, 165, 166, 167,	\
+   168, 169, 170, 171, 172, 173, 174, 175,	\
+   176, 177, 178, 179, 180, 181, 182, 183,	\
+   184, 185, 186, 187, 188, 189, 190, 191,	\
+   192, 193, 194, 195, 196, 197, 198, 199,	\
+   200, 201, 202, 203, 204, 205, 206, 207,	\
+   208, 209, 210, 211, 212, 213, 214, 215,	\
+   216, 217, 218, 219, 220, 221, 222, 223,	\
+   224, 225, 226, 227, 228, 229, 230,		\
+						\
+   254, 255, 256, 257, 261, 262			\
+ }
+
+/* The default one.  */
+#define REG_ALLOC_ORDER MMIX_MMIXWARE_ABI_REG_ALLOC_ORDER
+
+/* Node: Values in Registers */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)            	\
+   ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)  	\
+    / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+/* Note that no register can really be accessed in single-float mode, so
+   we *can* say 1 here.  FIXME:  Will TRT happen for single-float, or do
+   we have to punt to libgcc1.asm?  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+
+/* Node: Leaf Functions */
+/* (empty) */
+
+
+/* Node: Register Classes */
+
+enum reg_class
+ {
+   NO_REGS, GENERAL_REGS, REMAINDER_REG, HIMULT_REG,
+   SYSTEM_REGS, ALL_REGS, LIM_REG_CLASSES
+ };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES						\
+ {"NO_REGS", "GENERAL_REGS", "REMAINDER_REG", "HIMULT_REG",	\
+  "SYSTEM_REGS", "ALL_REGS"}
+
+/* Note that the contents of each item is always 32 bits.  */
+#define REG_CLASS_CONTENTS			\
+ {{0, 0, 0, 0, 0, 0, 0, 0, 0},			\
+  {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0x20},	\
+  {0, 0, 0, 0, 0, 0, 0, 0, 0x10},		\
+  {0, 0, 0, 0, 0, 0, 0, 0, 4},			\
+  {0, 0, 0, 0, 0, 0, 0, 0, 0x7f},		\
+  {~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0x7f}}
+
+#define REGNO_REG_CLASS(REGNO)					\
+ ((REGNO) <= MMIX_LAST_GENERAL_REGISTER				\
+  || (REGNO) == MMIX_ARG_POINTER_REGNUM				\
+  ? GENERAL_REGS						\
+  : (REGNO) == MMIX_REMAINDER_REGNUM ? REMAINDER_REG		\
+  : (REGNO) == MMIX_HIMULT_REGNUM ? HIMULT_REG : SYSTEM_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REG_CLASS_FROM_LETTER(CHAR)		\
+ ((CHAR) == 'x' ? SYSTEM_REGS			\
+  : (CHAR) == 'y' ? REMAINDER_REG		\
+  : (CHAR) == 'z' ? HIMULT_REG : NO_REGS)
+
+#define REGNO_OK_FOR_BASE_P(REGNO)				\
+ ((REGNO) <= MMIX_LAST_GENERAL_REGISTER				\
+  || (REGNO) == MMIX_ARG_POINTER_REGNUM				\
+  || (reg_renumber[REGNO] > 0					\
+      && reg_renumber[REGNO] <= MMIX_LAST_GENERAL_REGISTER))
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P (REGNO)
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+ mmix_preferred_reload_class (X, CLASS)
+
+#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \
+ mmix_preferred_output_reload_class (X, CLASS)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+ mmix_secondary_reload_class (CLASS, MODE, X, 1)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+ mmix_secondary_reload_class (CLASS, MODE, X, 0)
+
+#define CLASS_MAX_NREGS(CLASS, MODE) HARD_REGNO_NREGS (CLASS, MODE)
+
+#define CONST_OK_FOR_LETTER_P(VALUE, C)	\
+ mmix_const_ok_for_letter_p (VALUE, C)
+
+#define EXTRA_CONSTRAINT(VALUE, C)	\
+ mmix_extra_constraint (VALUE, C, MMIX_REG_OK_STRICT)
+
+/* Do we need anything serious here?  Yes, any FLOT constant.  */
+#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C)			\
+ mmix_const_double_ok_for_letter_p (VALUE, C)
+
+
+/* Node: Frame Layout */
+
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET \
+  mmix_starting_frame_offset ()
+
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+#define DYNAMIC_CHAIN_ADDRESS(FRAMEADDR) \
+ mmix_dynamic_chain_address (FRAMEADDR)
+
+/* FIXME: It seems RETURN_ADDR_OFFSET is undocumented.  */
+
+#define SETUP_FRAME_ADDRESSES() \
+ mmix_setup_frame_addresses ()
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)		\
+ mmix_return_addr_rtx (COUNT, FRAME)
+
+/* It's in rJ before we store it somewhere.  */
+#define INCOMING_RETURN_ADDR_RTX \
+ gen_rtx_REG (Pmode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM)
+
+/* FIXME: This does not seem properly documented or cross-indexed.
+   Nowhere except in the code does it say it *has* to be in the range
+   0..255, or else it will be truncated.  That goes for the default too.  */
+#define DWARF_FRAME_RETURN_COLUMN \
+ DWARF_FRAME_REGNUM (MMIX_INCOMING_RETURN_ADDRESS_REGNUM)
+
+/* No return address is stored there.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+/* Node: Stack Checking */
+/* (empty) */
+
+
+/* Node: Exception Handling */
+
+#define EH_RETURN_DATA_REGNO(N) \
+ mmix_eh_return_data_regno (N)
+
+#define EH_RETURN_STACKADJ_RTX \
+ mmix_eh_return_stackadj_rtx ()
+
+#define EH_RETURN_HANDLER_RTX \
+ mmix_eh_return_handler_rtx ()
+
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ mmix_asm_preferred_eh_data_format (CODE, GLOBAL)
+
+/* Node: Frame Registers */
+#define STACK_POINTER_REGNUM MMIX_STACK_POINTER_REGNUM
+
+/* Perhaps we can use HARD_FRAME_POINTER_REGNUM and decide later on
+   what register we want to use.  */
+#define FRAME_POINTER_REGNUM MMIX_FRAME_POINTER_REGNUM
+#define ARG_POINTER_REGNUM MMIX_ARG_POINTER_REGNUM
+
+#define STATIC_CHAIN_REGNUM MMIX_STATIC_CHAIN_REGNUM
+
+
+/* Node: Elimination */
+
+/* The frame-pointer is stored in a location that either counts to the
+   offset of incoming parameters, or that counts to the offset of the
+   frame, so we can't use a single offset.  We therefore eliminate those
+   two separately.  */
+#define ELIMINABLE_REGS				\
+ {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ (OFFSET) = mmix_initial_elimination_offset (FROM, TO)
+
+
+/* Node: Stack Arguments */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Node: Register Arguments */
+
+typedef struct { int regs; int lib; } CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).regs = 0, (CUM).lib = ((LIBNAME) != 0))
+
+#define FUNCTION_ARG_REGNO_P(REGNO)		\
+ mmix_function_arg_regno_p (REGNO, 0)
+
+
+/* Node: Caller Saves */
+/* (empty) */
+
+
+/* Node: Function Entry */
+
+/* See mmix.c for TARGET_ASM_FUNCTION_PROLOGUE and
+   TARGET_ASM_FUNCTION_EPILOGUE.  */
+
+/* We need to say that the epilogue uses the return address, so the
+   initial-value machinery restores it.  FIXME: Some targets
+   conditionalize on "reload_completed &&".  Investigate difference.
+   FIXME: Not needed if nonlocal_goto_stack_level.  */
+#define EPILOGUE_USES(REGNO) \
+ ((REGNO) == MMIX_INCOMING_RETURN_ADDRESS_REGNUM)
+
+/* Node: Profiling */
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+ mmix_function_profiler (FILE, LABELNO)
+
+/* Node: Trampolines */
+
+#define TRAMPOLINE_SIZE		(4*UNITS_PER_WORD)
+#define TRAMPOLINE_ALIGNMENT	BITS_PER_WORD
+
+/* Node: Addressing Modes */
+
+#define CONSTANT_ADDRESS_P(X) \
+ mmix_constant_address_p (X)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#ifndef REG_OK_STRICT
+# define REG_OK_FOR_BASE_P(X)			\
+  (REGNO (X) <= MMIX_LAST_GENERAL_REGISTER	\
+   || REGNO (X) == MMIX_ARG_POINTER_REGNUM	\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+#else
+# define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#endif /* REG_OK_STRICT */
+
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
+
+#define LEGITIMATE_CONSTANT_P(X) \
+ mmix_legitimate_constant_p (X)
+
+
+/* Node: Condition Code */
+
+#define SELECT_CC_MODE(OP, X, Y)		\
+ mmix_select_cc_mode (OP, X, Y)
+
+/* A definition of CANONICALIZE_COMPARISON that changed LE and GT
+   comparisons with -1 to LT and GE respectively, and LT, LTU, GE or GEU
+   comparisons with 256 to 255 and LE, LEU, GT and GTU has been
+   ineffective; the code path for performing the changes did not trig for
+   neither the GCC testsuite nor ghostscript-6.52 nor Knuth's mmix.tar.gz
+   itself (core GCC functionality supposedly handling it) with sources
+   from 2002-06-06.  */
+
+#define REVERSIBLE_CC_MODE(MODE)		\
+ mmix_reversible_cc_mode (MODE)
+
+
+/* Node: Costs */
+
+/* The special registers can only move to and from general regs, and we
+   need to check that their constraints match, so say 3 for them.  */
+/* WARNING: gcc-2.7.2.2 i686-pc-linux-gnulibc1 (as shipped with RH 4.2)
+   miscompiles reload1.c:reload_cse_simplify_set; a call to
+   reload_cse_regno_equal_p is missing when checking if a substitution of
+   a register setting is valid if this is defined to just the expression
+   in mmix_register_move_cost.
+
+   Symptom: a (all?) register setting is optimized away for e.g.
+   "char *p1(char *p) { return p+1; }" and the value of register zero ($0)
+   is returned.
+
+   We can workaround by making this a function call - unknown if this
+   causes dire speed effects.  */
+#define REGISTER_MOVE_COST(MODE, FROM, TO) \
+ mmix_register_move_cost (MODE, FROM, TO)
+
+#define SLOW_BYTE_ACCESS 0
+
+
+/* Node: Sections */
+
+/* This must be a constant string, since it's used in crtstuff.c.  */
+#define TEXT_SECTION_ASM_OP \
+ "\t.text ! mmixal:= 9H LOC 8B"
+
+/* FIXME: Not documented.  */
+#define DATA_SECTION_ASM_OP \
+ mmix_data_section_asm_op ()
+
+#define READONLY_DATA_SECTION_ASM_OP	"\t.section\t.rodata"
+
+/* Node: PIC */
+/* (empty) */
+
+
+/* Node: File Framework */
+
+/* While any other punctuation character but ";" would do, we prefer "%"
+   or "!"; "!" is an unary operator and so will not be mistakenly included
+   in correctly formed expressions.  The hash character adds mass; catches
+   the eye.  We can't have it as a comment char by itself, since it's a
+   hex-number prefix.  */
+#define ASM_COMMENT_START "!#"
+
+/* These aren't currently functional.  We just keep them as markers.  */
+#define ASM_APP_ON "%APP\n"
+#define ASM_APP_OFF "%NO_APP\n"
+
+#define OUTPUT_QUOTED_STRING(STREAM, STRING) \
+ mmix_output_quoted_string (STREAM, STRING, strlen (STRING))
+
+#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section
+
+/* Node: Data Output */
+
+#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN) \
+ mmix_asm_output_ascii (STREAM, PTR, LEN)
+
+/* Node: Uninitialized Data */
+
+#define ASM_OUTPUT_ALIGNED_COMMON(ST, N, S, A) \
+ mmix_asm_output_aligned_common (ST, N, S, A)
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(ST, N, S, A) \
+ mmix_asm_output_aligned_local (ST, N, S, A)
+
+
+/* Node: Label Output */
+
+#define ASM_OUTPUT_LABEL(STREAM, NAME) \
+ mmix_asm_output_label (STREAM, NAME)
+
+#define ASM_OUTPUT_INTERNAL_LABEL(STREAM, NAME) \
+ mmix_asm_output_internal_label (STREAM, NAME)
+
+#define ASM_DECLARE_REGISTER_GLOBAL(STREAM, DECL, REGNO, NAME) \
+ mmix_asm_declare_register_global (STREAM, DECL, REGNO, NAME)
+
+#define GLOBAL_ASM_OP "\t.global "
+
+#define ASM_WEAKEN_LABEL(STREAM, NAME) \
+ mmix_asm_weaken_label (STREAM, NAME)
+
+#define MAKE_DECL_ONE_ONLY(DECL) \
+ mmix_make_decl_one_only (DECL)
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+ mmix_asm_output_labelref (STREAM, NAME)
+
+/* We insert a ":" to disambiguate against user symbols like L5.  */
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+ sprintf (LABEL, "*%s:%ld", PREFIX, (long)(NUM))
+
+/* Insert "::"; these are rarer than internal labels.  FIXME: Make sure no
+   ":" is seen in the object file; we don't really want that mmixal
+   feature visible there.  We don't want the default, which uses a dot;
+   that'd be incompatible with mmixal.  */
+#define ASM_PN_FORMAT "%s::%lu"
+
+#define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \
+ mmix_asm_output_def (STREAM, NAME, VALUE)
+
+/* Node: Macros for Initialization */
+/* We're compiling to ELF and linking to MMO; fundamental ELF features
+   that GCC depend on are there.  */
+
+/* These must be constant strings, since they're used in crtstuff.c.  */
+#define INIT_SECTION_ASM_OP "\t.section .init,\"ax\" ! mmixal-incompatible"
+
+#define FINI_SECTION_ASM_OP "\t.section .fini,\"ax\" ! mmixal-incompatible"
+
+#define OBJECT_FORMAT_ELF
+
+
+/* Node: Instruction Output */
+
+/* The non-$ register names must be prefixed with ":", since they're
+   affected by PREFIX.  We provide the non-colon names as additional
+   names.  */
+#define REGISTER_NAMES							\
+ {"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7",			\
+  "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15",			\
+  "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23",		\
+  "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31",		\
+  "$32", "$33", "$34", "$35", "$36", "$37", "$38", "$39",		\
+  "$40", "$41", "$42", "$43", "$44", "$45", "$46", "$47",		\
+  "$48", "$49", "$50", "$51", "$52", "$53", "$54", "$55",		\
+  "$56", "$57", "$58", "$59", "$60", "$61", "$62", "$63",		\
+  "$64", "$65", "$66", "$67", "$68", "$69", "$70", "$71",		\
+  "$72", "$73", "$74", "$75", "$76", "$77", "$78", "$79",		\
+  "$80", "$81", "$82", "$83", "$84", "$85", "$86", "$87",		\
+  "$88", "$89", "$90", "$91", "$92", "$93", "$94", "$95",		\
+  "$96", "$97", "$98", "$99", "$100", "$101", "$102", "$103",		\
+  "$104", "$105", "$106", "$107", "$108", "$109", "$110", "$111",	\
+  "$112", "$113", "$114", "$115", "$116", "$117", "$118", "$119",	\
+  "$120", "$121", "$122", "$123", "$124", "$125", "$126", "$127",	\
+  "$128", "$129", "$130", "$131", "$132", "$133", "$134", "$135",	\
+  "$136", "$137", "$138", "$139", "$140", "$141", "$142", "$143",	\
+  "$144", "$145", "$146", "$147", "$148", "$149", "$150", "$151",	\
+  "$152", "$153", "$154", "$155", "$156", "$157", "$158", "$159",	\
+  "$160", "$161", "$162", "$163", "$164", "$165", "$166", "$167",	\
+  "$168", "$169", "$170", "$171", "$172", "$173", "$174", "$175",	\
+  "$176", "$177", "$178", "$179", "$180", "$181", "$182", "$183",	\
+  "$184", "$185", "$186", "$187", "$188", "$189", "$190", "$191",	\
+  "$192", "$193", "$194", "$195", "$196", "$197", "$198", "$199",	\
+  "$200", "$201", "$202", "$203", "$204", "$205", "$206", "$207",	\
+  "$208", "$209", "$210", "$211", "$212", "$213", "$214", "$215",	\
+  "$216", "$217", "$218", "$219", "$220", "$221", "$222", "$223",	\
+  "$224", "$225", "$226", "$227", "$228", "$229", "$230", "$231",	\
+  "$232", "$233", "$234", "$235", "$236", "$237", "$238", "$239",	\
+  "$240", "$241", "$242", "$243", "$244", "$245", "$246", "$247",	\
+  "$248", "$249", "$250", "$251", "$252", "$253", "$254", "$255",	\
+  ":rD",  ":rE",  ":rH",  ":rJ",  ":rR",  "ap_!BAD!", ":rO"}
+
+#define ADDITIONAL_REGISTER_NAMES			\
+ {{"sp", 254}, {":sp", 254}, {"rD", 256}, {"rE", 257},	\
+  {"rH", 258}, {"rJ", MMIX_rJ_REGNUM}, {"rO", MMIX_rO_REGNUM}}
+
+#define PRINT_OPERAND(STREAM, X, CODE) \
+ mmix_print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+ mmix_print_operand_punct_valid_p (CODE)
+
+#define PRINT_OPERAND_ADDRESS(STREAM, X) \
+ mmix_print_operand_address (STREAM, X)
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \
+ mmix_asm_output_reg_push (STREAM, REGNO)
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \
+ mmix_asm_output_reg_pop (STREAM, REGNO)
+
+
+/* Node: Dispatch Tables */
+
+/* We define both types, since SImode is the better, but DImode the only
+   possible for mmixal so that's the one actually used.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+ mmix_asm_output_addr_diff_elt (STREAM, BODY, VALUE, REL)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+ mmix_asm_output_addr_vec_elt (STREAM, VALUE)
+
+
+/* Node: Exception Region Output */
+/* (empty) */
+
+/* Node: Alignment Output */
+
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES) \
+ mmix_asm_output_skip (STREAM, NBYTES)
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+ mmix_asm_output_align (STREAM, POWER)
+
+
+/* Node: All Debuggers */
+
+#define DBX_REGISTER_NUMBER(REGNO) \
+ mmix_dbx_register_number (REGNO)
+
+
+/* Node: DBX Options */
+/* (empty) */
+/* Node: DBX Hooks */
+/* (empty) */
+/* Node: File Names and DBX */
+/* (empty) */
+
+
+/* Node: SDB and DWARF */
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+/* Node: Misc */
+
+/* There's no way to get a PC-relative offset into tables for SImode, so
+   for the moment we have absolute entries in DImode.
+   When we're going ELF, these should be SImode and 1.  */
+#define CASE_VECTOR_MODE DImode
+#define CASE_VECTOR_PC_RELATIVE 0
+
+#define WORD_REGISTER_OPERATIONS
+
+/* We have a choice, which makes this yet another parameter to tweak.  The
+   gut feeling is currently that SIGN_EXTEND wins; "int" is more frequent
+   than "unsigned int", and we have signed characters.  FIXME: measure.  */
+#define LOAD_EXTEND_OP(MODE) (TARGET_ZERO_EXTEND ? ZERO_EXTEND : SIGN_EXTEND)
+
+#define MOVE_MAX 8
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* ??? MMIX allows a choice of STORE_FLAG_VALUE.  Revisit later,
+   we don't have scc expanders yet.  */
+
+#define Pmode DImode
+
+#define FUNCTION_MODE QImode
+
+#define NO_IMPLICIT_EXTERN_C
+
+/* These are checked.  */
+#define DOLLARS_IN_IDENTIFIERS 0
+#define NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+#endif /* GCC_MMIX_H */
+/*
+ * Local variables:
+ * eval: (c-set-style "gnu")
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/gcc/config/mmix/mmix.md b/gcc/config/mmix/mmix.md
new file mode 100644
index 000000000..97ab5e131
--- /dev/null
+++ b/gcc/config/mmix/mmix.md
@@ -0,0 +1,1240 @@
+;; GCC machine description for MMIX
+;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Hans-Peter Nilsson (hp@bitrange.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et al.
+
+;; Uses of UNSPEC in this file:
+;; UNSPEC_VOLATILE:
+;;
+;;	0	sync_icache (sync icache before trampoline jump)
+;;	1	nonlocal_goto_receiver
+;;
+
+;; The order of insns is as in Node: Standard Names, with smaller modes
+;; before bigger modes.
+
+(define_constants
+  [(MMIX_rJ_REGNUM 259)
+   (MMIX_rR_REGNUM 260)
+   (MMIX_fp_rO_OFFSET -24)]
+)
+
+;; Operand and operator predicates.
+
+(include "predicates.md")
+
+;; FIXME: Can we remove the reg-to-reg for smaller modes?  Shouldn't they
+;; be synthesized ok?
+(define_insn "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r ,r,x ,r,r,m,??r")
+	(match_operand:QI 1 "general_operand"	    "r,LS,K,rI,x,m,r,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   LDB%U0 %0,%1
+   STBU %1,%0
+   %r0%I1")
+
+(define_insn "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,x,r,r,m,??r")
+	(match_operand:HI 1 "general_operand"	    "r,LS,K,r,x,m,r,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   LDW%U0 %0,%1
+   STWU %1,%0
+   %r0%I1")
+
+;; gcc.c-torture/compile/920428-2.c fails if there's no "n".
+(define_insn "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r ,r,x,r,r,m,??r")
+	(match_operand:SI 1 "general_operand"	    "r,LS,K,r,x,m,r,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   LDT%U0 %0,%1
+   STTU %1,%0
+   %r0%I1")
+
+;; We assume all "s" are addresses.  Does that hold?
+(define_insn "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r ,r,x,r,m,r,m,r,r,??r")
+	(match_operand:DI 1 "general_operand"	    "r,LS,K,r,x,I,m,r,R,s,n"))]
+  ""
+  "@
+   SET %0,%1
+   %s1 %0,%v1
+   NEGU %0,0,%n1
+   PUT %0,%1
+   GET %0,%1
+   STCO %1,%0
+   LDO %0,%1
+   STOU %1,%0
+   GETA %0,%1
+   LDA %0,%1
+   %r0%I1")
+
+;; Note that we move around the float as a collection of bits; no
+;; conversion to double.
+(define_insn "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,x,r,r,m,??r")
+       (match_operand:SF 1 "general_operand"	   "r,G,r,x,m,r,F"))]
+  ""
+  "@
+   SET %0,%1
+   SETL %0,0
+   PUT %0,%1
+   GET %0,%1
+   LDT %0,%1
+   STTU %1,%0
+   %r0%I1")
+
+(define_insn "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,x,r,r,m,??r")
+	(match_operand:DF 1 "general_operand"	    "r,G,r,x,m,r,F"))]
+  ""
+  "@
+   SET %0,%1
+   SETL %0,0
+   PUT %0,%1
+   GET %0,%1
+   LDO %0,%1
+   STOU %1,%0
+   %r0%I1")
+
+;; We need to be able to move around the values used as condition codes.
+;; First spotted as reported in
+;; <URL:http://gcc.gnu.org/ml/gcc-bugs/2003-03/msg00008.html> due to
+;; changes in loop optimization.  The file machmode.def says they're of
+;; size 4 QI.  Valid bit-patterns correspond to integers -1, 0 and 1, so
+;; we treat them as signed entities; see mmix-modes.def.  The following
+;; expanders should cover all MODE_CC modes, and expand for this pattern.
+(define_insn "*movcc_expanded"
+  [(set (match_operand 0 "nonimmediate_operand" "=r,x,r,r,m")
+	(match_operand 1 "nonimmediate_operand"  "r,r,x,m,r"))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_CC
+   && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC"
+  "@
+   SET %0,%1
+   PUT %0,%1
+   GET %0,%1
+   LDT %0,%1
+   STT %1,%0")
+
+(define_expand "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "")
+	(match_operand:CC 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_uns"
+  [(set (match_operand:CC_UNS 0 "nonimmediate_operand" "")
+	(match_operand:CC_UNS 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_fp"
+  [(set (match_operand:CC_FP 0 "nonimmediate_operand" "")
+	(match_operand:CC_FP 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_fpeq"
+  [(set (match_operand:CC_FPEQ 0 "nonimmediate_operand" "")
+	(match_operand:CC_FPEQ 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_expand "movcc_fun"
+  [(set (match_operand:CC_FUN 0 "nonimmediate_operand" "")
+	(match_operand:CC_FUN 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "register_operand"	"=r,r,r")
+	(plus:DI
+	 (match_operand:DI 1 "register_operand" "%r,r,0")
+	 (match_operand:DI 2 "mmix_reg_or_constant_operand" "rI,K,LS")))]
+  ""
+  "@
+   ADDU %0,%1,%2
+   SUBU %0,%1,%n2
+   %i2 %0,%v2")
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(plus:DF (match_operand:DF 1 "register_operand" "%r")
+		 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FADD %0,%1,%2")
+
+;; Insn canonicalization *should* have removed the need for an integer
+;; in operand 2.
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "mmix_reg_or_8bit_operand" "r,I")
+		  (match_operand:DI 2 "register_operand" "r,r")))]
+  ""
+  "@
+   SUBU %0,%1,%2
+   NEGU %0,%1,%2")
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(minus:DF (match_operand:DF 1 "register_operand" "r")
+		  (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FSUB %0,%1,%2")
+
+;; FIXME: Should we define_expand and match 2, 4, 8 (etc) with shift (or
+;; %{something}2ADDU %0,%1,0)?  Hopefully GCC should still handle it, so
+;; we don't have to taint the machine description.  If results are bad
+;; enough, we may have to do it anyway.
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(mult:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "O,rI")))
+   (clobber (match_scratch:DI 3 "=X,z"))]
+  ""
+  "@
+   %m2ADDU %0,%1,%1
+   MULU %0,%1,%2")
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(mult:DF (match_operand:DF 1 "register_operand" "r")
+		 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FMUL %0,%1,%2")
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(div:DF (match_operand:DF 1 "register_operand" "r")
+		(match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FDIV %0,%1,%2")
+
+;; FIXME: Is "frem" doing the right operation for moddf3?
+(define_insn "moddf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(mod:DF (match_operand:DF 1 "register_operand" "r")
+		(match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FREM %0,%1,%2")
+
+;; FIXME: Should we define_expand for smin, smax, umin, umax using a
+;; nifty conditional sequence?
+
+;; FIXME: The cuter andn combinations don't get here, presumably because
+;; they ended up in the constant pool.  Check: still?
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(and:DI
+	 (match_operand:DI 1 "register_operand" "%r,0")
+	 (match_operand:DI 2 "mmix_reg_or_constant_operand" "rI,NT")))]
+  ""
+  "@
+   AND %0,%1,%2
+   %A2 %0,%V2")
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "register_operand" "%r,0")
+		(match_operand:DI 2 "mmix_reg_or_constant_operand" "rH,LS")))]
+  ""
+  "@
+   OR %0,%1,%2
+   %o2 %0,%v2")
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "XOR %0,%1,%2")
+
+;; FIXME:  When TImode works for other reasons (like cross-compiling from
+;; a 32-bit host), add back umulditi3 and umuldi3_highpart here.
+
+;; FIXME: Check what's really reasonable for the mod part.
+
+;; One day we might persuade GCC to expand divisions with constants the
+;; way MMIX does; giving the remainder the sign of the divisor.  But even
+;; then, it might be good to have an option to divide the way "everybody
+;; else" does.  Perhaps then, this option can be on by default.  However,
+;; it's not likely to happen because major (C, C++, Fortran) language
+;; standards in effect at 2002-04-29 reportedly demand that the sign of
+;; the remainder must follow the sign of the dividend.
+
+(define_insn "divmoddi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(div:DI (match_operand:DI 1 "register_operand" "r")
+		(match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))
+   (set (match_operand:DI 3 "register_operand" "=y")
+	(mod:DI (match_dup 1) (match_dup 2)))]
+  ;; Do the library stuff later.
+  "TARGET_KNUTH_DIVISION"
+  "DIV %0,%1,%2")
+
+(define_insn "udivmoddi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(udiv:DI (match_operand:DI 1 "register_operand" "r")
+		 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))
+   (set (match_operand:DI 3 "register_operand" "=y")
+	(umod:DI (match_dup 1) (match_dup 2)))]
+  ""
+  "DIVU %0,%1,%2")
+
+(define_expand "divdi3"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=&r")
+	  (div:DI (match_operand:DI 1 "register_operand" "r")
+		  (match_operand:DI 2 "register_operand" "r")))
+     (clobber (scratch:DI))
+     (clobber (scratch:DI))
+     (clobber (reg:DI MMIX_rR_REGNUM))])]
+  "! TARGET_KNUTH_DIVISION"
+  "")
+
+;; The %2-is-%1-case is there just to make sure things don't fail.  Could
+;; presumably happen with optimizations off; no evidence.
+(define_insn "*divdi3_nonknuth"
+  [(set (match_operand:DI 0 "register_operand" "=&r,&r")
+	(div:DI (match_operand:DI 1 "register_operand" "r,r")
+		(match_operand:DI 2 "register_operand" "1,r")))
+   (clobber (match_scratch:DI 3 "=1,1"))
+   (clobber (match_scratch:DI 4 "=2,2"))
+   (clobber (reg:DI MMIX_rR_REGNUM))]
+  "! TARGET_KNUTH_DIVISION"
+  "@
+   SETL %0,1
+   XOR $255,%1,%2\;NEGU %0,0,%2\;CSN %2,%2,%0\;NEGU %0,0,%1\;CSN %1,%1,%0\;\
+DIVU %0,%1,%2\;NEGU %1,0,%0\;CSN %0,$255,%1")
+
+(define_expand "moddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "=&r")
+	  (mod:DI (match_operand:DI 1 "register_operand" "r")
+		  (match_operand:DI 2 "register_operand" "r")))
+     (clobber (scratch:DI))
+     (clobber (scratch:DI))
+     (clobber (reg:DI MMIX_rR_REGNUM))])]
+  "! TARGET_KNUTH_DIVISION"
+  "")
+
+;; The %2-is-%1-case is there just to make sure things don't fail.  Could
+;; presumably happen with optimizations off; no evidence.
+(define_insn "*moddi3_nonknuth"
+  [(set (match_operand:DI 0 "register_operand" "=&r,&r")
+	(mod:DI (match_operand:DI 1 "register_operand" "r,r")
+		(match_operand:DI 2 "register_operand" "1,r")))
+   (clobber (match_scratch:DI 3 "=1,1"))
+   (clobber (match_scratch:DI 4 "=2,2"))
+   (clobber (reg:DI MMIX_rR_REGNUM))]
+  "! TARGET_KNUTH_DIVISION"
+  "@
+   SETL %0,0
+   NEGU %0,0,%2\;CSN %2,%2,%0\;NEGU $255,0,%1\;CSN %1,%1,$255\;\
+DIVU %1,%1,%2\;GET %0,:rR\;NEGU %2,0,%0\;CSNN %0,$255,%2")
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "SLU %0,%1,%2")
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "SR %0,%1,%2")
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "SRU %0,%1,%2")
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "NEGU %0,0,%1")
+
+(define_expand "negdf2"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=r")
+                   (neg:DF (match_operand:DF 1 "register_operand" "r")))
+              (use (match_dup 2))])]
+  ""
+{
+  /* Emit bit-flipping sequence to be IEEE-safe wrt. -+0.  */
+  operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));
+})
+
+(define_insn "*expanded_negdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+        (neg:DF (match_operand:DF 1 "register_operand" "r")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  ""
+  "XOR %0,%1,%2")
+
+;; FIXME: define_expand for absdi2?
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(abs:DF (match_operand:DF 1 "register_operand" "0")))]
+  ""
+  "ANDNH %0,#8000")
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "r")))]
+  ""
+  "FSQRT %0,%1")
+
+;; FIXME: define_expand for ffssi2? (not ffsdi2 since int is SImode).
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "NOR %0,%1,0")
+
+;; When the user-patterns expand, the resulting insns will match the
+;; patterns below.
+
+;; We can fold the signed-compare where the register value is
+;; already equal to (compare:CCTYPE (reg) (const_int 0)).
+;;  We can't do that at all for floating-point, due to NaN, +0.0
+;; and -0.0, and we can only do it for the non/zero test of
+;; unsigned, so that has to be done another way.
+;;  FIXME: Perhaps a peep2 changing CCcode to a new code, that
+;; gets folded here.
+(define_insn "*cmpdi_folded"
+  [(set (match_operand:CC 0 "register_operand" "=r")
+	(compare:CC
+	 (match_operand:DI 1 "register_operand" "r")
+	 (const_int 0)))]
+  ;; FIXME: Can we test equivalence any other way?
+  ;; FIXME: Can we fold any other way?
+  "REG_P (operands[0]) && REG_P (operands[1])
+   && REGNO (operands[1]) == REGNO (operands[0])"
+  "%% folded: cmp %0,%1,0")
+
+(define_insn "*cmps"
+  [(set (match_operand:CC 0 "register_operand" "=r")
+	(compare:CC
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "CMP %0,%1,%2")
+
+(define_insn "*cmpu"
+  [(set (match_operand:CC_UNS 0 "register_operand" "=r")
+	(compare:CC_UNS
+	 (match_operand:DI 1 "register_operand" "r")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "CMPU %0,%1,%2")
+
+(define_insn "*fcmp"
+  [(set (match_operand:CC_FP 0 "register_operand" "=r")
+	(compare:CC_FP
+	 (match_operand:DF 1 "register_operand" "r")
+	 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FCMP%e0 %0,%1,%2")
+
+;; FIXME: for -mieee, add fsub %0,%1,%1\;fsub %0,%2,%2 before to
+;; make signalling compliant.
+(define_insn "*feql"
+  [(set (match_operand:CC_FPEQ 0 "register_operand" "=r")
+	(compare:CC_FPEQ
+	 (match_operand:DF 1 "register_operand" "r")
+	 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FEQL%e0 %0,%1,%2")
+
+(define_insn "*fun"
+  [(set (match_operand:CC_FUN 0 "register_operand" "=r")
+	(compare:CC_FUN
+	 (match_operand:DF 1 "register_operand" "r")
+	 (match_operand:DF 2 "register_operand" "r")))]
+  ""
+  "FUN%e0 %0,%1,%2")
+
+;; In order to get correct rounding, we have to use SFLOT and SFLOTU for
+;; conversion.  They do not convert to SFmode; they convert to DFmode,
+;; with rounding as of SFmode.  They are not usable as is, but we pretend
+;; we have a single instruction but emit two.
+
+;; Note that this will (somewhat unexpectedly) create an inexact
+;; exception if rounding is necessary - has to be masked off in crt0?
+(define_expand "floatdisf2"
+  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+		   (float:SF
+		    (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+	      ;; Let's use a DI scratch, since SF don't generally get into
+	      ;; registers.  Dunno what's best; it's really a DF, but that
+	      ;; doesn't logically follow from operands in the pattern.
+	      (clobber (match_scratch:DI 2 "=&r"))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != MEM)
+    {
+      rtx stack_slot;
+
+      /* FIXME: This stack-slot remains even at -O3.  There must be a
+	 better way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode), 0));
+      emit_insn (gen_floatdisf2 (stack_slot, operands[1]));
+      emit_move_insn (operands[0], stack_slot);
+      DONE;
+    }
+}")
+
+(define_insn "*floatdisf2_real"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float:SF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 2 "=&r"))]
+  ""
+  "SFLOT %2,%1\;STSF %2,%0")
+
+(define_expand "floatunsdisf2"
+  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+		   (unsigned_float:SF
+		    (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+	      ;; Let's use a DI scratch, since SF don't generally get into
+	      ;; registers.  Dunno what's best; it's really a DF, but that
+	      ;; doesn't logically follow from operands in the pattern.
+	      (clobber (scratch:DI))])]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != MEM)
+    {
+      rtx stack_slot;
+
+      /* FIXME: This stack-slot remains even at -O3.  Must be a better
+	 way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode), 0));
+      emit_insn (gen_floatunsdisf2 (stack_slot, operands[1]));
+      emit_move_insn (operands[0], stack_slot);
+      DONE;
+    }
+}")
+
+(define_insn "*floatunsdisf2_real"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(unsigned_float:SF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 2 "=&r"))]
+  ""
+  "SFLOTU %2,%1\;STSF %2,%0")
+
+;; Note that this will (somewhat unexpectedly) create an inexact
+;; exception if rounding is necessary - has to be masked off in crt0?
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(float:DF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "FLOT %0,%1")
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unsigned_float:DF
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI")))]
+  ""
+  "FLOTU %0,%1")
+
+(define_insn "ftruncdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(fix:DF (match_operand:DF 1 "register_operand" "r")))]
+  ""
+  ;; ROUND_OFF
+  "FINT %0,1,%1")
+
+;; Note that this will (somewhat unexpectedly) create an inexact
+;; exception if rounding is necessary - has to be masked off in crt0?
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "r"))))]
+  ""
+  ;; ROUND_OFF
+  "FIX %0,1,%1")
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	 (fix:DF (match_operand:DF 1 "register_operand" "r"))))]
+  ""
+  ;; ROUND_OFF
+  "FIXU %0,1,%1")
+
+;; It doesn't seem like it's possible to have memory_operand as a
+;; predicate here (testcase: libgcc2 floathisf).  FIXME:  Shouldn't it be
+;; possible to do that?  Bug in GCC?  Anyway, this used to be a simple
+;; pattern with a memory_operand predicate, but was split up with a
+;; define_expand with the old pattern as "anonymous".
+;; FIXME: Perhaps with SECONDARY_MEMORY_NEEDED?
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "memory_operand" "")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != MEM)
+    {
+      /* FIXME: There should be a way to say: 'put this in operands[0]
+	 but *after* the expanded insn'.  */
+      rtx stack_slot;
+
+      /* There is no sane destination but a register here, if it wasn't
+	 already MEM.  (It's too hard to get fatal_insn to work here.)  */
+      if (! REG_P (operands[0]))
+	internal_error (\"MMIX Internal: Bad truncdfsf2 expansion\");
+
+      /* FIXME: This stack-slot remains even at -O3.  Must be a better
+	 way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode), 0));
+      emit_insn (gen_truncdfsf2 (stack_slot, operands[1]));
+      emit_move_insn (operands[0], stack_slot);
+      DONE;
+    }
+}")
+
+(define_insn "*truncdfsf2_real"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "r")))]
+  ""
+  "STSF %1,%0")
+
+;; Same comment as for truncdfsf2.
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(float_extend:DF (match_operand:SF 1 "memory_operand" "m")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) != MEM)
+    {
+      rtx stack_slot;
+
+      /* There is no sane destination but a register here, if it wasn't
+	 already MEM.  (It's too hard to get fatal_insn to work here.)  */
+      if (! REG_P (operands[0]))
+	internal_error (\"MMIX Internal: Bad extendsfdf2 expansion\");
+
+      /* FIXME: This stack-slot remains even at -O3.  There must be a
+	 better way.  */
+      stack_slot
+	= validize_mem (assign_stack_temp (SFmode,
+					   GET_MODE_SIZE (SFmode), 0));
+      emit_move_insn (stack_slot, operands[1]);
+      emit_insn (gen_extendsfdf2 (operands[0], stack_slot));
+      DONE;
+    }
+}")
+
+(define_insn "*extendsfdf2_real"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(float_extend:DF (match_operand:SF 1 "memory_operand" "m")))]
+  ""
+  "LDSF %0,%1")
+
+;; Neither sign-extend nor zero-extend are necessary; gcc knows how to
+;; synthesize using shifts or and, except with a memory source and not
+;; completely optimal.  FIXME: Actually, other bugs surface when those
+;; patterns are defined; fix later.
+
+;; There are no sane values with the bit-patterns of (int) 0..255 except
+;; 0 to use in movdfcc.
+
+(define_expand "movdfcc"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_operand:DF 0 "register_operand" "")
+	(if_then_else:DF
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:DF 2 "mmix_reg_or_0_operand" "")
+	 (match_operand:DF 3 "mmix_reg_or_0_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  if (code == LE || code == GE)
+    FAIL;
+
+  operands[4] = mmix_gen_compare_reg (code, XEXP (operands[1], 0),
+				      XEXP (operands[1], 1));
+  operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				 XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[4], const0_rtx);
+}")
+
+(define_expand "movdicc"
+  [(set (match_dup 4) (match_dup 5))
+   (set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "")
+	 (match_operand:DI 3 "mmix_reg_or_8bit_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  if (code == LE || code == GE)
+    FAIL;
+
+  operands[4] = mmix_gen_compare_reg (code, XEXP (operands[1], 0),
+				      XEXP (operands[1], 1));
+  operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]),
+				 XEXP (operands[1], 0),
+				 XEXP (operands[1], 1));
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[4], const0_rtx);
+}")
+
+;; FIXME: Is this the right way to do "folding" of CCmode -> DImode?
+(define_insn "*movdicc_real_foldable"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 2 "mmix_foldable_comparison_operator"
+			 [(match_operand:DI 3 "register_operand" "r,r,r,r")
+			  (const_int 0)])
+	 (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI,0 ,rI,GM")
+	 (match_operand:DI 4 "mmix_reg_or_8bit_operand" "0 ,rI,GM,rI")))]
+  ""
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdicc_real_reversible"
+  [(set
+    (match_operand:DI 0 "register_operand"	   "=r ,r ,r ,r")
+    (if_then_else:DI
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	    "r ,r ,r ,r")
+      (const_int 0)])
+     (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI,0 ,rI,GM")
+     (match_operand:DI 4 "mmix_reg_or_8bit_operand" "0 ,rI,GM,rI")))]
+  "REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdicc_real_nonreversible"
+  [(set
+    (match_operand:DI 0 "register_operand"	   "=r ,r")
+    (if_then_else:DI
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	    "r ,r")
+      (const_int 0)])
+     (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI,rI")
+     (match_operand:DI 4 "mmix_reg_or_0_operand" "0 ,GM")))]
+  "!REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   ZS%d2 %0,%3,%1")
+
+(define_insn "*movdfcc_real_foldable"
+  [(set
+    (match_operand:DF 0 "register_operand"	"=r  ,r  ,r  ,r")
+    (if_then_else:DF
+     (match_operator
+      2 "mmix_foldable_comparison_operator"
+      [(match_operand:DI 3 "register_operand"	 "r  ,r  ,r  ,r")
+      (const_int 0)])
+     (match_operand:DF 1 "mmix_reg_or_0_operand" "rGM,0  ,rGM,GM")
+     (match_operand:DF 4 "mmix_reg_or_0_operand" "0  ,rGM,GM ,rGM")))]
+  ""
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdfcc_real_reversible"
+  [(set
+    (match_operand:DF 0 "register_operand"	"=r  ,r  ,r  ,r")
+    (if_then_else:DF
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	 "r  ,r  ,r  ,r")
+      (const_int 0)])
+     (match_operand:DF 1 "mmix_reg_or_0_operand" "rGM,0  ,rGM,GM")
+     (match_operand:DF 4 "mmix_reg_or_0_operand" "0  ,rGM,GM ,rGM")))]
+  "REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   CS%D2 %0,%3,%4
+   ZS%d2 %0,%3,%1
+   ZS%D2 %0,%3,%4")
+
+(define_insn "*movdfcc_real_nonreversible"
+  [(set
+    (match_operand:DF 0 "register_operand"	"=r  ,r")
+    (if_then_else:DF
+     (match_operator
+      2 "mmix_comparison_operator"
+      [(match_operand 3 "mmix_reg_cc_operand"	 "r  ,r")
+      (const_int 0)])
+     (match_operand:DF 1 "mmix_reg_or_0_operand" "rGM,rGM")
+     (match_operand:DF 4 "mmix_reg_or_0_operand" "0  ,GM")))]
+  "!REVERSIBLE_CC_MODE (GET_MODE (operands[3]))"
+  "@
+   CS%d2 %0,%3,%1
+   ZS%d2 %0,%3,%1")
+
+;; FIXME: scc insns will probably help, I just skip them
+;; right now.  Revisit.
+
+(define_expand "cbranchdi4"
+  [(set (match_dup 4)
+        (match_op_dup 5
+         [(match_operand:DI 1 "register_operand" "")
+          (match_operand:DI 2 "mmix_reg_or_8bit_operand" "")]))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator"
+               [(match_dup 4)
+                (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  ""
+  "
+{
+  operands[4] = mmix_gen_compare_reg (GET_CODE (operands[0]),
+                                      operands[1], operands[2]);
+  operands[5] = gen_rtx_fmt_ee (COMPARE,
+                                GET_MODE (operands[4]),
+                                operands[1], operands[2]);
+}")
+
+(define_expand "cbranchdf4"
+  [(set (match_dup 4)
+        (match_op_dup 5
+         [(match_operand:DF 1 "register_operand" "")
+          (match_operand:DF 2 "register_operand" "")]))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "float_comparison_operator"
+               [(match_dup 4)
+                (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+  ""
+  "
+{
+  /* The head comment of optabs.c:can_compare_p says we're required to
+     implement this, so we have to clean up the mess here.  */
+  if (GET_CODE (operands[0]) == LE || GET_CODE (operands[0]) == GE)
+    {
+      enum rtx_code ltgt_code = GET_CODE (operands[0]) == LE ? LT : GT;
+      emit_cmp_and_jump_insns (operands[1], operands[2], ltgt_code, NULL_RTX,
+			       DFmode, 0, operands[3]);
+      emit_cmp_and_jump_insns (operands[1], operands[2], EQ, NULL_RTX,
+			       DFmode, 0, operands[3]);
+      DONE;
+    }
+
+  operands[4] = mmix_gen_compare_reg (GET_CODE (operands[0]),
+                                      operands[1], operands[2]);
+  operands[5] = gen_rtx_fmt_ee (COMPARE,
+                                GET_MODE (operands[4]),
+                                operands[1], operands[2]);
+}")
+
+
+;; FIXME: we can emit an unordered-or-*not*-equal compare in one insn, but
+;; there's no RTL code for it.  Maybe revisit in future.
+
+;; FIXME: Odd/Even matchers?
+(define_insn "*bCC_foldable"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_foldable_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "%+B%d1 %2,%0")
+
+(define_insn "*bCC"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_comparison_operator"
+			 [(match_operand 2 "mmix_reg_cc_operand" "r")
+			  (const_int 0)])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "%+B%d1 %2,%0")
+
+(define_insn "*bCC_inverted_foldable"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_foldable_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+;; REVERSIBLE_CC_MODE is checked by mmix_foldable_comparison_operator.
+  ""
+  "%+B%D1 %2,%0")
+
+(define_insn "*bCC_inverted"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "mmix_comparison_operator"
+			 [(match_operand 2 "mmix_reg_cc_operand" "r")
+			  (const_int 0)])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "REVERSIBLE_CC_MODE (GET_MODE (operands[2]))"
+  "%+B%D1 %2,%0")
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "general_operand" ""))
+	      (clobber (match_dup 4))])
+   (set (match_dup 4) (match_dup 3))]
+  ""
+  "
+{
+  /* The caller checks that the operand is generally valid as an
+     address, but at -O0 nothing makes sure that it's also a valid
+     call address for a *call*; a mmix_symbolic_or_address_operand.
+     Force into a register if it isn't.  */
+  if (!mmix_symbolic_or_address_operand (XEXP (operands[0], 0),
+					 GET_MODE (XEXP (operands[0], 0))))
+    operands[0]
+      = replace_equiv_address (operands[0],
+			       force_reg (Pmode, XEXP (operands[0], 0)));
+
+  /* Since the epilogue 'uses' the return address, and it is clobbered
+     in the call, and we set it back after every call (all but one setting
+     will be optimized away), integrity is maintained.  */
+  operands[3]
+    = mmix_get_hard_reg_initial_val (Pmode,
+				     MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+
+  /* FIXME: There's a bug in gcc which causes NULL to be passed as
+     operand[2] when we get out of registers, which later confuses gcc.
+     Work around it by replacing it with const_int 0.  Possibly documentation
+     error too.  */
+  if (operands[2] == NULL_RTX)
+    operands[2] = const0_rtx;
+
+  operands[4] = gen_rtx_REG (DImode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+}")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "general_operand" ""))
+	      (clobber (match_dup 5))])
+   (set (match_dup 5) (match_dup 4))]
+  ""
+  "
+{
+  /* The caller checks that the operand is generally valid as an
+     address, but at -O0 nothing makes sure that it's also a valid
+     call address for a *call*; a mmix_symbolic_or_address_operand.
+     Force into a register if it isn't.  */
+  if (!mmix_symbolic_or_address_operand (XEXP (operands[1], 0),
+					 GET_MODE (XEXP (operands[1], 0))))
+    operands[1]
+      = replace_equiv_address (operands[1],
+			       force_reg (Pmode, XEXP (operands[1], 0)));
+
+  /* Since the epilogue 'uses' the return address, and it is clobbered
+     in the call, and we set it back after every call (all but one setting
+     will be optimized away), integrity is maintained.  */
+  operands[4]
+    = mmix_get_hard_reg_initial_val (Pmode,
+				     MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+
+  /* FIXME: See 'call'.  */
+  if (operands[3] == NULL_RTX)
+    operands[3] = const0_rtx;
+
+  /* FIXME: Documentation bug: operands[3] (operands[2] for 'call') is the
+     *next* argument register, not the number of arguments in registers.
+     (There used to be code here where that mattered.)  */
+
+  operands[5] = gen_rtx_REG (DImode, MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+}")
+
+;; Don't use 'p' here.  A 'p' must stand first in constraints, or reload
+;; messes up, not registering the address for reload.  Several C++
+;; testcases, including g++.brendan/crash40.C.  FIXME: This is arguably a
+;; bug in gcc.  Note line ~2612 in reload.c, that does things on the
+;; condition <<else if (constraints[i][0] == 'p')>> and the comment on
+;; ~3017 that says:
+;; <<   case 'p':
+;;	     /* All necessary reloads for an address_operand
+;;	        were handled in find_reloads_address.  */>>
+;; Sorry, I have not dug deeper.  If symbolic addresses are used
+;; rarely compared to addresses in registers, disparaging the
+;; first ("p") alternative by adding ? in the first operand
+;; might do the trick.  We define 'U' as a synonym to 'p', but without the
+;; caveats (and very small advantages) of 'p'.
+(define_insn "*call_real"
+  [(call (mem:QI
+	  (match_operand:DI 0 "mmix_symbolic_or_address_operand" "s,rU"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:DI MMIX_rJ_REGNUM))]
+  ""
+  "@
+   PUSHJ $%p2,%0
+   PUSHGO $%p2,%a0")
+
+(define_insn "*call_value_real"
+  [(set (match_operand 0 "register_operand" "=r,r")
+	(call (mem:QI
+	       (match_operand:DI 1 "mmix_symbolic_or_address_operand" "s,rU"))
+	      (match_operand 2 "" "")))
+  (use (match_operand 3 "" ""))
+  (clobber (reg:DI MMIX_rJ_REGNUM))]
+  ""
+  "@
+   PUSHJ $%p3,%1
+   PUSHGO $%p3,%a1")
+
+;; I hope untyped_call and untyped_return are not needed for MMIX.
+;; Users of Objective-C will notice.
+
+; Generated by GCC.
+(define_expand "return"
+  [(return)]
+  "mmix_use_simple_return ()"
+  "")
+
+; Generated by the epilogue expander.
+(define_insn "*expanded_return"
+  [(return)]
+  ""
+  "POP %.,0")
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "mmix_expand_prologue (); DONE;")
+
+; Note that the (return) from the expander itself is always the last insn
+; in the epilogue.
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "mmix_expand_epilogue ();")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "SWYM 0,0,0")
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "JMP %0")
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand 0 "address_operand" "p"))]
+  ""
+  "GO $255,%a0")
+
+;; FIXME: This is just a jump, and should be expanded to one.
+(define_insn "tablejump"
+  [(set (pc) (match_operand:DI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "GO $255,%a0")
+
+;; The only peculiar thing is that the register stack has to be unwound at
+;; nonlocal_goto_receiver.  At each function that has a nonlocal label, we
+;; save at function entry the location of the "alpha" register stack
+;; pointer, rO, in a stack slot known to that function (right below where
+;; the frame-pointer would be located).
+;; In the nonlocal goto receiver, we unwind the register stack by a series
+;; of "pop 0,0" until rO equals the saved value.  (If it goes lower, we
+;; should die with a trap.)
+(define_expand "nonlocal_goto_receiver"
+  [(parallel [(unspec_volatile [(const_int 0)] 1)
+	      (clobber (scratch:DI))
+	      (clobber (reg:DI MMIX_rJ_REGNUM))])
+   (set (reg:DI MMIX_rJ_REGNUM) (match_dup 0))]
+  ""
+  "
+{
+  operands[0]
+    = mmix_get_hard_reg_initial_val (Pmode,
+				     MMIX_INCOMING_RETURN_ADDRESS_REGNUM);
+
+  /* Mark this function as containing a landing-pad.  */
+  cfun->machine->has_landing_pad = 1;
+}")
+
+;; GCC can insist on using saved registers to keep the slot address in
+;; "across" the exception, or (perhaps) to use saved registers in the
+;; address and re-use them after the register stack unwind, so it's best
+;; to form the address ourselves.
+(define_insn "*nonlocal_goto_receiver_expanded"
+  [(unspec_volatile [(const_int 0)] 1)
+   (clobber (match_scratch:DI 0 "=&r"))
+   (clobber (reg:DI MMIX_rJ_REGNUM))]
+  ""
+{
+  rtx temp_reg = operands[0];
+  rtx my_operands[2];
+  HOST_WIDEST_INT offs;
+  const char *my_template
+    = "GETA $255,0f\;PUT rJ,$255\;LDOU $255,%a0\n\
+0:\;GET %1,rO\;CMPU %1,%1,$255\;BNP %1,1f\;POP 0,0\n1:";
+
+  my_operands[1] = temp_reg;
+
+  /* If we have a frame-pointer (hence unknown stack-pointer offset),
+     just use the frame-pointer and the known offset.  */
+  if (frame_pointer_needed)
+    {
+      my_operands[0] = GEN_INT (-MMIX_fp_rO_OFFSET);
+
+      output_asm_insn ("NEGU %1,0,%0", my_operands);
+      my_operands[0] = gen_rtx_PLUS (Pmode, frame_pointer_rtx, temp_reg);
+    }
+  else
+    {
+      /* We know the fp-based offset, so "eliminate" it to be sp-based.  */
+      offs
+	= (mmix_initial_elimination_offset (MMIX_FRAME_POINTER_REGNUM,
+					    MMIX_STACK_POINTER_REGNUM)
+	   + MMIX_fp_rO_OFFSET);
+
+      if (offs >= 0 && offs <= 255)
+	my_operands[0]
+	  = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offs));
+      else
+	{
+	  mmix_output_register_setting (asm_out_file, REGNO (temp_reg),
+					offs, 1);
+	  my_operands[0] = gen_rtx_PLUS (Pmode, stack_pointer_rtx, temp_reg);
+	}
+    }
+
+  output_asm_insn (my_template, my_operands);
+  return "";
+})
+
+(define_insn "*Naddu"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
+			  (match_operand:DI 2 "const_int_operand" "n"))
+		 (match_operand:DI 3 "mmix_reg_or_8bit_operand" "rI")))]
+  "GET_CODE (operands[2]) == CONST_INT
+   && (INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 4
+       || INTVAL (operands[2]) == 8
+       || INTVAL (operands[2]) == 16)"
+  "%2ADDU %0,%1,%3")
+
+(define_insn "*andn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	 (not:DI (match_operand:DI 1 "mmix_reg_or_8bit_operand" "rI"))
+	 (match_operand:DI 2 "register_operand" "r")))]
+  ""
+  "ANDN %0,%2,%1")
+
+(define_insn "*nand"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI
+	 (not:DI (match_operand:DI 1 "register_operand" "%r"))
+	 (not:DI (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI"))))]
+  ""
+  "NAND %0,%1,%2")
+
+(define_insn "*nor"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	 (not:DI (match_operand:DI 1 "register_operand" "%r"))
+	 (not:DI (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI"))))]
+  ""
+  "NOR %0,%1,%2")
+
+(define_insn "*nxor"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI
+	 (xor:DI (match_operand:DI 1 "register_operand" "%r")
+		 (match_operand:DI 2 "mmix_reg_or_8bit_operand" "rI"))))]
+  ""
+  "NXOR %0,%1,%2")
+
+(define_insn "sync_icache"
+  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")
+		     (match_operand:DI 1 "const_int_operand" "I")] 0)]
+  ""
+  "SYNCID %1,%0")
+
+;; Local Variables:
+;; mode: lisp
+;; indent-tabs-mode: t
+;; End:
diff --git a/gcc/config/mmix/mmix.opt b/gcc/config/mmix/mmix.opt
new file mode 100644
index 000000000..9439471d6
--- /dev/null
+++ b/gcc/config/mmix/mmix.opt
@@ -0,0 +1,99 @@
+; Options for the MMIX port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; FIXME: Get rid of this one.
+mlibfuncs
+Target Report Mask(LIBFUNC)
+For intrinsics library: pass all parameters in registers
+
+mabi=mmixware
+Target Report RejectNegative InverseMask(ABI_GNU)
+Use register stack for parameters and return value
+
+mabi=gnu
+Target Report RejectNegative Mask(ABI_GNU)
+Use call-clobbered registers for parameters and return value
+
+; FIXME: Provide a way to *load* the epsilon register.
+mepsilon
+Target Report Mask(FCMP_EPSILON)
+Use epsilon-respecting floating point compare instructions
+
+mzero-extend
+Target Report Mask(ZERO_EXTEND)
+Use zero-extending memory loads, not sign-extending ones
+
+mknuthdiv
+Target Report Mask(KNUTH_DIVISION)
+Generate divide results with reminder having the same sign as the divisor (not the dividend)
+
+mtoplevel-symbols
+Target Report Mask(TOPLEVEL_SYMBOLS)
+Prepend global symbols with \":\" (for use with PREFIX)
+
+mno-set-program-start
+Target Report RejectNegative
+Do not provide a default start-address 0x100 of the program
+
+melf
+Target Report RejectNegative
+Link to emit program in ELF format (rather than mmo)
+
+mbranch-predict
+Target Report RejectNegative Mask(BRANCH_PREDICT)
+Use P-mnemonics for branches statically predicted as taken
+
+mno-branch-predict
+Target Report RejectNegative InverseMask(BRANCH_PREDICT)
+Don't use P-mnemonics for branches
+
+; We use the term "base address" since that's what Knuth uses.  The base
+; address goes in a global register.  When addressing, it's more like
+; "base address plus offset", with the offset being 0..255 from the base,
+; which itself can be a symbol plus an offset.  The effect is like having
+; a constant pool in global registers, code offsetting from those
+; registers (automatically causing a request for a suitable constant base
+; address register) without having to know the specific register or the
+; specific offset.  The setback is that there's a limited number of
+; registers, and you'll not find out until link time whether you
+; should have compiled with -mno-base-addresses.
+mbase-addresses
+Target Report RejectNegative Mask(BASE_ADDRESSES)
+Use addresses that allocate global registers
+
+mno-base-addresses
+Target Report RejectNegative InverseMask(BASE_ADDRESSES)
+Do not use addresses that allocate global registers
+
+msingle-exit
+Target Report RejectNegative InverseMask(USE_RETURN_INSN)
+Generate a single exit point for each function
+
+mno-single-exit
+Target Report RejectNegative Mask(USE_RETURN_INSN)
+Do not generate a single exit point for each function
+
+mset-program-start=
+Target Report RejectNegative Joined
+Set start-address of the program
+
+mset-data-start=
+Target Report RejectNegative Joined
+Set start-address of data
diff --git a/gcc/config/mmix/predicates.md b/gcc/config/mmix/predicates.md
new file mode 100644
index 000000000..b5773b87a
--- /dev/null
+++ b/gcc/config/mmix/predicates.md
@@ -0,0 +1,155 @@
+;; Operand and operator predicates for the GCC MMIX port.
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; LE and GE are further lowered by the cbranchdf4 pattern.
+(define_predicate "float_comparison_operator"
+  (match_code "ne, eq, le, ge, lt, gt, ordered, unordered"))
+
+;; True if this is a foldable comparison operator
+;; - one where a the result of (compare:CC (reg) (const_int 0)) can be
+;; replaced by (reg).  */
+
+(define_predicate "mmix_foldable_comparison_operator"
+  (match_code "ne, eq, ge, gt, le, lt, gtu, leu")
+{
+  RTX_CODE code = GET_CODE (op);
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* This little bit is why the body of this predicate is kept as C.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (XEXP (op, 0));
+
+  return ((mode == CCmode || mode == DImode)
+	  && (code == NE || code == EQ || code == GE || code == GT
+	      || code == LE || code == LT))
+    /* FIXME: This may be a stupid trick.  What happens when GCC wants to
+       reverse the condition?  Can it do that by itself?  Maybe it can
+       even reverse the condition to fit a foldable one in the first
+       place?  */
+    || (mode == CC_UNSmode && (code == GTU || code == LEU));
+})
+
+;; Like comparison_operator, but only true if this comparison operator is
+;; applied to a valid mode.  Needed to avoid jump.c generating invalid
+;; code with -ffast-math (gcc.dg/20001228-1.c).
+
+(define_predicate "mmix_comparison_operator"
+  (match_operand 0 "comparison_operator")
+{
+  RTX_CODE code = GET_CODE (op);
+
+  /* Comparison operators usually don't have a mode, but let's try and get
+     one anyway for the day that changes.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* Get the mode from the first operand if we don't have one.
+     Also the reason why we do this in C.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (XEXP (op, 0));
+
+  /* FIXME: This needs to be kept in sync with the tables in
+     mmix_output_condition.  */
+  return
+    mode == VOIDmode
+    || (mode == CC_FUNmode
+	&& (code == ORDERED || code == UNORDERED))
+    || (mode == CC_FPmode
+	&& (code == GT || code == LT))
+    || (mode == CC_FPEQmode
+	&& (code == NE || code == EQ))
+    || (mode == CC_UNSmode
+	&& (code == GEU || code == GTU || code == LEU || code == LTU))
+    || (mode == CCmode
+	&& (code == NE || code == EQ || code == GE || code == GT
+	    || code == LE || code == LT))
+    || (mode == DImode
+	&& (code == NE || code == EQ || code == GE || code == GT
+	    || code == LE || code == LT || code == LEU || code == GTU));
+})
+
+;; True if this is a register with a condition-code mode.
+
+(define_predicate "mmix_reg_cc_operand"
+  (and (match_operand 0 "register_operand")
+       (ior (match_test "GET_MODE (op) == CCmode")
+	    (ior (match_test "GET_MODE (op) == CC_UNSmode")
+		 (ior (match_test "GET_MODE (op) == CC_FPmode")
+		      (ior (match_test "GET_MODE (op) == CC_FPEQmode")
+			   (match_test "GET_MODE (op) == CC_FUNmode")))))))
+
+;; True if this is an address_operand or a symbolic operand.
+
+(define_predicate "mmix_symbolic_or_address_operand"
+  (match_code "symbol_ref, label_ref, const, subreg, reg, plus")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      /* The reason why this body still is C.  */
+      op = XEXP (op, 0);
+      if ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+	   || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	  && (GET_CODE (XEXP (op, 1)) == CONST_INT
+	      || (GET_CODE (XEXP (op, 1)) == CONST_DOUBLE
+		  && GET_MODE (XEXP (op, 1)) == VOIDmode)))
+	return 1;
+      /* Fall through.  */
+    default:
+      return address_operand (op, mode);
+    }
+})
+
+;; True if this is a register or CONST_INT (or CONST_DOUBLE for DImode).
+;; We could narrow the value down with a couple of predicates, but that
+;; doesn't seem to be worth it at the moment.
+
+(define_predicate "mmix_reg_or_constant_operand"
+  (ior (match_operand 0 "register_operand")
+       (ior (match_code "const_int")
+	    (and (match_code "const_double")
+		 (match_test "GET_MODE (op) == VOIDmode")))))
+
+;; True if this is a register or 0 (int or float).
+
+(define_predicate "mmix_reg_or_0_operand"
+  (ior
+   (match_operand 0 "register_operand")
+   (ior
+    (and (match_code "const_int")
+	 (match_test "op == const0_rtx"))
+    (and
+     (match_code "const_double")
+     ;; FIXME: Is mode calculation necessary and correct?
+     (match_test
+      "op == CONST0_RTX (mode == VOIDmode ? GET_MODE (op) : mode)")))))
+
+;; True if this is a register or an int 0..255.
+
+(define_predicate "mmix_reg_or_8bit_operand"
+  (ior
+   (match_operand 0 "register_operand")
+   (and (match_code "const_int")
+	(match_test "CONST_OK_FOR_LETTER_P (INTVAL (op), 'I')"))))
diff --git a/gcc/config/mmix/t-mmix b/gcc/config/mmix/t-mmix
new file mode 100644
index 000000000..dc05c8e82
--- /dev/null
+++ b/gcc/config/mmix/t-mmix
@@ -0,0 +1,31 @@
+# Copyright (C) 2001, 2002, 2003, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# See "Target Fragment" in GCC info.  That same order is used here.
+
+TARGET_LIBGCC2_CFLAGS = -mlibfuncs -O2
+
+# We need to turn off some assumptions on normality for code in crtstuff.c
+# and crt{i,n}.asm, specifically about execution not continuing past the
+# end of the section in the file being compiled.  Thus we must stop the
+# assembler from generating stubbable PUSHJ relocs, because that will add
+# stubs at the end of the current section when necessary.
+CRTSTUFF_T_CFLAGS = -Wa,--no-stubs
+
+MULTILIB_OPTIONS = mabi=gnu
+MULTILIB_DIRNAMES = gnuabi
diff --git a/gcc/config/mn10300/constraints.md b/gcc/config/mn10300/constraints.md
new file mode 100644
index 000000000..c8ee2d4e2
--- /dev/null
+++ b/gcc/config/mn10300/constraints.md
@@ -0,0 +1,107 @@
+;; Constraint definitions for the MN10300.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "d" "DATA_REGS"
+  "A data register.")
+
+(define_register_constraint "a" "ADDRESS_REGS"
+  "An address register.")
+
+;; This can be used for QI/HImode memory operations, and most arithmetic.
+;; AM33 supports these on all registers, where MN103 needs DATA_REGS.
+(define_register_constraint "D" "TARGET_AM33 ? GENERAL_REGS : DATA_REGS"
+  "A general register for AM33, and a data register otherwise.")
+
+;; Similarly for ADDRESS_REGS vs GENERAL_REGS.
+(define_register_constraint "A" "TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS"
+  "A general register for AM33, and an address register otherwise.")
+
+(define_register_constraint "y" "SP_REGS"
+  "An SP register (if available).")
+
+(define_register_constraint "z" "MDR_REGS"
+  "The MDR register.")
+
+(define_register_constraint "x" "TARGET_AM33 ? EXTENDED_REGS : NO_REGS"
+  "An extended register.")
+
+(define_register_constraint "f" "TARGET_AM33_2 ? FP_REGS : NO_REGS"
+  "A floating point register.")
+
+(define_register_constraint "c" "TARGET_AM33_2 ? FP_ACC_REGS : NO_REGS"
+  "A floating point accumulator register.")
+
+(define_memory_constraint "Q"
+  "@internal"
+  (and (match_code "mem")
+       (match_test "!CONSTANT_ADDRESS_P (XEXP (op, 0))")))
+
+(define_constraint "S"
+  "@internal"
+  (if_then_else (match_test "flag_pic")
+	(and (match_test "GET_CODE (op) == UNSPEC")
+	     (ior (match_test "XINT (op, 1) == UNSPEC_PLT")
+		  (match_test "XINT (op, 1) == UNSPEC_PIC")
+		  (match_test "XINT (op, 1) == UNSPEC_GOTSYM_OFF")))
+	(match_test "GET_CODE (op) == SYMBOL_REF")))
+
+;; Integer constraints
+
+(define_constraint "I"
+  "An integer zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "J"
+  "An integer one."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "K"
+  "An integer two."
+  (and (match_code "const_int")
+       (match_test "ival == 2")))
+
+(define_constraint "L"
+  "An integer four."
+  (and (match_code "const_int")
+       (match_test "ival == 4")))
+
+(define_constraint "M"
+  "An integer three."
+  (and (match_code "const_int")
+       (match_test "ival == 3")))
+
+(define_constraint "N"
+  "An integer of either 255 or 65535."
+  (and (match_code "const_int")
+       (ior (match_test "ival == 255")
+	    (match_test "ival == 65535"))))
+
+(define_constraint "O"
+  "An integer between -8 and +7 inclusive."
+  (and (match_code "const_int")
+       (and (match_test "ival >= -8")
+	    (match_test "ival <=  7"))))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Floating-point zero."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
diff --git a/gcc/config/mn10300/linux.h b/gcc/config/mn10300/linux.h
new file mode 100644
index 000000000..ca0e10a65
--- /dev/null
+++ b/gcc/config/mn10300/linux.h
@@ -0,0 +1,90 @@
+/* Definitions of taret machine for GNU compiler.
+   Matsushita AM33/2.0
+   Copyright 2001, 2002, 2005, 2006, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Alexandre Oliva <aoliva@redhat.com>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+   
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#undef  CPP_SPEC
+#define CPP_SPEC "%{mam33:-D__AM33__} %{!mam33:-D__AM33__=2 -D__AM33_2__} \
+  %{posix:-D_POSIX_SOURCE} \
+  %{pthread:-D_REENTRANT -D_PTHREADS}"
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mrelax:--relax} %{shared:-shared} \
+   %{!static: \
+     %{rdynamic:-export-dynamic} \
+     -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+   %{static:-static}"
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_AM33_2
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (AM33/2.0 GNU/Linux)");
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+extern int mn10300_protect_label;
+
+#undef  PRINT_OPERAND
+#define PRINT_OPERAND(FILE, X, CODE)		\
+  do						\
+    {						\
+      mn10300_protect_label = 1;		\
+      mn10300_print_operand ((FILE), (X), (CODE));	\
+      mn10300_protect_label = 0;		\
+    }						\
+  while (0)
+
+#undef  PRINT_OPERAND_ADDRESS
+#define PRINT_OPERAND_ADDRESS(FILE, X)		\
+  do						\
+    {						\
+      mn10300_protect_label = 1;		\
+      mn10300_print_operand_address ((FILE), (X));	\
+      mn10300_protect_label = 0;		\
+    }						\
+   while (0)
+
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME)		\
+  do						\
+    {						\
+      const char * real_name;			\
+						\
+      real_name = (*targetm.strip_name_encoding) (NAME);	\
+      if (mn10300_protect_label)		\
+        asm_fprintf (FILE, "+");		\
+      asm_fprintf (FILE, "%U%s", real_name);	\
+    }						\
+  while (0)           
+
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
diff --git a/gcc/config/mn10300/mn10300-modes.def b/gcc/config/mn10300/mn10300-modes.def
new file mode 100644
index 000000000..832663edb
--- /dev/null
+++ b/gcc/config/mn10300/mn10300-modes.def
@@ -0,0 +1,24 @@
+/* Definitions of target machine for GNU compiler, for MN10300.
+   Copyright (C) 2006 Free Software Foundation, Inc.
+   Contributed by Red Hat Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 2, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to
+   the Free Software Foundation, , Inc., 51 Franklin Street - Fifth
+   Floor, Boston, MA 02110-1301, USA.  */
+
+CC_MODE (CCZN);
+CC_MODE (CCZNC);
+CC_MODE (CC_FLOAT);
diff --git a/gcc/config/mn10300/mn10300-protos.h b/gcc/config/mn10300/mn10300-protos.h
new file mode 100644
index 000000000..058f5df87
--- /dev/null
+++ b/gcc/config/mn10300/mn10300-protos.h
@@ -0,0 +1,57 @@
+/* Definitions of target machine for GNU compiler. Matsushita MN10300 series
+   Copyright (C) 2000, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define Mmode enum machine_mode
+#define Cstar const char *
+#define Rclas enum reg_class
+
+#ifdef RTX_CODE
+extern rtx   mn10300_legitimize_pic_address (rtx, rtx);
+extern int   mn10300_legitimate_pic_operand_p (rtx);
+extern rtx   mn10300_legitimize_reload_address (rtx, Mmode, int, int, int);
+extern bool  mn10300_function_value_regno_p (const unsigned int);
+extern int   mn10300_get_live_callee_saved_regs (void);
+extern bool  mn10300_hard_regno_mode_ok (unsigned int, Mmode);
+extern bool  mn10300_legitimate_constant_p (rtx);
+extern bool  mn10300_modes_tieable (Mmode, Mmode);
+extern Cstar mn10300_output_add (rtx[3], bool);
+extern void  mn10300_print_operand (FILE *, rtx, int);
+extern void  mn10300_print_operand_address (FILE *, rtx);
+extern void  mn10300_print_reg_list (FILE *, int);
+extern Mmode mn10300_select_cc_mode (enum rtx_code, rtx, rtx);
+extern int   mn10300_store_multiple_operation (rtx, Mmode);
+extern int   mn10300_symbolic_operand (rtx, Mmode);
+extern void  mn10300_split_cbranch (Mmode, rtx, rtx);
+extern int   mn10300_split_and_operand_count (rtx);
+extern bool  mn10300_match_ccmode (rtx, Mmode);
+#endif /* RTX_CODE */
+
+extern bool  mn10300_regno_in_class_p (unsigned, int, bool);
+extern bool  mn10300_can_use_rets_insn (void);
+extern bool  mn10300_can_use_retf_insn (void);
+extern void  mn10300_expand_prologue (void);
+extern void  mn10300_expand_epilogue (void);
+extern int   mn10300_initial_offset (int, int);
+extern int   mn10300_frame_size (void);
+
+#undef Mmode
+#undef Cstar
+#undef Rclas
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
new file mode 100644
index 000000000..e3c417e46
--- /dev/null
+++ b/gcc/config/mn10300/mn10300.c
@@ -0,0 +1,3254 @@
+/* Subroutines for insn-output.c for Matsushita MN10300 series
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "expr.h"
+#include "optabs.h"
+#include "function.h"
+#include "obstack.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "tm-constrs.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* This is used in the am33_2.0-linux-gnu port, in which global symbol
+   names are not prefixed by underscores, to tell whether to prefix a
+   label with a plus sign or not, so that the assembler can tell
+   symbol names from register names.  */
+int mn10300_protect_label;
+
+/* The selected processor.  */
+enum processor_type mn10300_processor = PROCESSOR_DEFAULT;
+
+/* Processor type to select for tuning.  */
+static const char * mn10300_tune_string = NULL;
+
+/* Selected processor type for tuning.  */
+enum processor_type mn10300_tune_cpu = PROCESSOR_DEFAULT;
+
+/* The size of the callee register save area.  Right now we save everything
+   on entry since it costs us nothing in code size.  It does cost us from a
+   speed standpoint, so we want to optimize this sooner or later.  */
+#define REG_SAVE_BYTES (4 * df_regs_ever_live_p (2)		\
+			+ 4 * df_regs_ever_live_p (3)		\
+		        + 4 * df_regs_ever_live_p (6)		\
+			+ 4 * df_regs_ever_live_p (7)		\
+			+ 16 * (df_regs_ever_live_p (14)	\
+				|| df_regs_ever_live_p (15)	\
+				|| df_regs_ever_live_p (16)	\
+				|| df_regs_ever_live_p (17)))
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options mn10300_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#define CC_FLAG_Z	1
+#define CC_FLAG_N	2
+#define CC_FLAG_C	4
+#define CC_FLAG_V	8
+
+static int cc_flags_for_mode(enum machine_mode);
+static int cc_flags_for_code(enum rtx_code);
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+mn10300_handle_option (size_t code,
+		       const char *arg ATTRIBUTE_UNUSED,
+		       int value)
+{
+  switch (code)
+    {
+    case OPT_mam33:
+      mn10300_processor = value ? PROCESSOR_AM33 : PROCESSOR_MN10300;
+      return true;
+
+    case OPT_mam33_2:
+      mn10300_processor = (value
+			   ? PROCESSOR_AM33_2
+			   : MIN (PROCESSOR_AM33, PROCESSOR_DEFAULT));
+      return true;
+
+    case OPT_mam34:
+      mn10300_processor = (value ? PROCESSOR_AM34 : PROCESSOR_DEFAULT);
+      return true;
+
+    case OPT_mtune_:
+      mn10300_tune_string = arg;
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+mn10300_option_override (void)
+{
+  if (TARGET_AM33)
+    target_flags &= ~MASK_MULT_BUG;
+  else
+    {
+      /* Disable scheduling for the MN10300 as we do
+	 not have timing information available for it.  */
+      flag_schedule_insns = 0;
+      flag_schedule_insns_after_reload = 0;
+
+      /* Force enable splitting of wide types, as otherwise it is trivial
+	 to run out of registers.  Indeed, this works so well that register
+	 allocation problems are now more common *without* optimization,
+	 when this flag is not enabled by default.  */
+      flag_split_wide_types = 1;
+    }
+
+  if (mn10300_tune_string)
+    {
+      if (strcasecmp (mn10300_tune_string, "mn10300") == 0)
+	mn10300_tune_cpu = PROCESSOR_MN10300;
+      else if (strcasecmp (mn10300_tune_string, "am33") == 0)
+	mn10300_tune_cpu = PROCESSOR_AM33;
+      else if (strcasecmp (mn10300_tune_string, "am33-2") == 0)
+	mn10300_tune_cpu = PROCESSOR_AM33_2;
+      else if (strcasecmp (mn10300_tune_string, "am34") == 0)
+	mn10300_tune_cpu = PROCESSOR_AM34;
+      else
+	error ("-mtune= expects mn10300, am33, am33-2, or am34");
+    }
+}
+
+static void
+mn10300_file_start (void)
+{
+  default_file_start ();
+
+  if (TARGET_AM33_2)
+    fprintf (asm_out_file, "\t.am33_2\n");
+  else if (TARGET_AM33)
+    fprintf (asm_out_file, "\t.am33\n");
+}
+
+/* Note: This list must match the liw_op attribute in mn10300.md.  */
+
+static const char *liw_op_names[] =
+{
+  "add", "cmp", "sub", "mov",
+  "and", "or", "xor",
+  "asr", "lsr", "asl",
+  "none", "max"
+};
+
+/* Print operand X using operand code CODE to assembly language output file
+   FILE.  */
+
+void
+mn10300_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'W':
+      {
+	unsigned int liw_op = UINTVAL (x);
+
+	gcc_assert (TARGET_ALLOW_LIW);
+	gcc_assert (liw_op < LIW_OP_MAX);
+	fputs (liw_op_names[liw_op], file);
+	break;
+      }
+
+    case 'b':
+    case 'B':
+      {
+	enum rtx_code cmp = GET_CODE (x);
+	enum machine_mode mode = GET_MODE (XEXP (x, 0));
+	const char *str;
+	int have_flags;
+
+	if (code == 'B')
+	  cmp = reverse_condition (cmp);
+	have_flags = cc_flags_for_mode (mode);
+
+	switch (cmp)
+	  {
+	  case NE:
+	    str = "ne";
+	    break;
+	  case EQ:
+	    str = "eq";
+	    break;
+	  case GE:
+	    /* bge is smaller than bnc.  */
+	    str = (have_flags & CC_FLAG_V ? "ge" : "nc");
+	    break;
+	  case LT:
+	    str = (have_flags & CC_FLAG_V ? "lt" : "ns");
+	    break;
+	  case GT:
+	    str = "gt";
+	    break;
+	  case LE:
+	    str = "le";
+	    break;
+	  case GEU:
+	    str = "cc";
+	    break;
+	  case GTU:
+	    str = "hi";
+	    break;
+	  case LEU:
+	    str = "ls";
+	    break;
+	  case LTU:
+	    str = "cs";
+	    break;
+	  case ORDERED:
+	    str = "lge";
+	    break;
+	  case UNORDERED:
+	    str = "uo";
+	    break;
+	  case LTGT:
+	    str = "lg";
+	    break;
+	  case UNEQ:
+	    str = "ue";
+	    break;
+	  case UNGE:
+	    str = "uge";
+	    break;
+	  case UNGT:
+	    str = "ug";
+	    break;
+	  case UNLE:
+	    str = "ule";
+	    break;
+	  case UNLT:
+	    str = "ul";
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	gcc_checking_assert ((cc_flags_for_code (cmp) & ~have_flags) == 0);
+	fputs (str, file);
+      }
+      break;
+
+    case 'C':
+      /* This is used for the operand to a call instruction;
+	 if it's a REG, enclose it in parens, else output
+	 the operand normally.  */
+      if (REG_P (x))
+	{
+	  fputc ('(', file);
+	  mn10300_print_operand (file, x, 0);
+	  fputc (')', file);
+	}
+      else
+	mn10300_print_operand (file, x, 0);
+      break;
+
+    case 'D':
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case REG:
+	  fprintf (file, "fd%d", REGNO (x) - 18);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+      /* These are the least significant word in a 64bit value.  */
+    case 'L':
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case SUBREG:
+	  fprintf (file, "%s", reg_names[subreg_regno (x)]);
+	  break;
+
+	case CONST_DOUBLE:
+	  {
+	    long val[2];
+	    REAL_VALUE_TYPE rv;
+
+	    switch (GET_MODE (x))
+	      {
+	      case DFmode:
+		REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+		REAL_VALUE_TO_TARGET_DOUBLE (rv, val);
+		fprintf (file, "0x%lx", val[0]);
+		break;;
+	      case SFmode:
+		REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+		REAL_VALUE_TO_TARGET_SINGLE (rv, val[0]);
+		fprintf (file, "0x%lx", val[0]);
+		break;;
+	      case VOIDmode:
+	      case DImode:
+		mn10300_print_operand_address (file,
+					       GEN_INT (CONST_DOUBLE_LOW (x)));
+		break;
+	      default:
+		break;
+	      }
+	    break;
+	  }
+
+	case CONST_INT:
+	  {
+	    rtx low, high;
+	    split_double (x, &low, &high);
+	    fprintf (file, "%ld", (long)INTVAL (low));
+	    break;
+	    }
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+      /* Similarly, but for the most significant word.  */
+    case 'H':
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  x = adjust_address (x, SImode, 4);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x) + 1]);
+	  break;
+
+	case SUBREG:
+	  fprintf (file, "%s", reg_names[subreg_regno (x) + 1]);
+	  break;
+
+	case CONST_DOUBLE:
+	  {
+	    long val[2];
+	    REAL_VALUE_TYPE rv;
+
+	    switch (GET_MODE (x))
+	      {
+	      case DFmode:
+		REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+		REAL_VALUE_TO_TARGET_DOUBLE (rv, val);
+		fprintf (file, "0x%lx", val[1]);
+		break;;
+	      case SFmode:
+		gcc_unreachable ();
+	      case VOIDmode:
+	      case DImode:
+		mn10300_print_operand_address (file,
+					       GEN_INT (CONST_DOUBLE_HIGH (x)));
+		break;
+	      default:
+		break;
+	      }
+	    break;
+	  }
+
+	case CONST_INT:
+	  {
+	    rtx low, high;
+	    split_double (x, &low, &high);
+	    fprintf (file, "%ld", (long)INTVAL (high));
+	    break;
+	  }
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'A':
+      fputc ('(', file);
+      if (REG_P (XEXP (x, 0)))
+	output_address (gen_rtx_PLUS (SImode, XEXP (x, 0), const0_rtx));
+      else
+	output_address (XEXP (x, 0));
+      fputc (')', file);
+      break;
+
+    case 'N':
+      gcc_assert (INTVAL (x) >= -128 && INTVAL (x) <= 255);
+      fprintf (file, "%d", (int)((~INTVAL (x)) & 0xff));
+      break;
+
+    case 'U':
+      gcc_assert (INTVAL (x) >= -128 && INTVAL (x) <= 255);
+      fprintf (file, "%d", (int)(INTVAL (x) & 0xff));
+      break;
+
+      /* For shift counts.  The hardware ignores the upper bits of
+	 any immediate, but the assembler will flag an out of range
+	 shift count as an error.  So we mask off the high bits
+	 of the immediate here.  */
+    case 'S':
+      if (CONST_INT_P (x))
+	{
+	  fprintf (file, "%d", (int)(INTVAL (x) & 0x1f));
+	  break;
+	}
+      /* FALL THROUGH */
+
+    default:
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  fputc ('(', file);
+	  output_address (XEXP (x, 0));
+	  fputc (')', file);
+	  break;
+
+	case PLUS:
+	  output_address (x);
+	  break;
+
+	case REG:
+	  fprintf (file, "%s", reg_names[REGNO (x)]);
+	  break;
+
+	case SUBREG:
+	  fprintf (file, "%s", reg_names[subreg_regno (x)]);
+	  break;
+
+	  /* This will only be single precision....  */
+	case CONST_DOUBLE:
+	  {
+	    unsigned long val;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, "0x%lx", val);
+	    break;
+	  }
+
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	case CODE_LABEL:
+	case UNSPEC:
+	  mn10300_print_operand_address (file, x);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    }
+}
+
+/* Output assembly language output for the address ADDR to FILE.  */
+
+void
+mn10300_print_operand_address (FILE *file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case POST_INC:
+      mn10300_print_operand (file, XEXP (addr, 0), 0);
+      fputc ('+', file);
+      break;
+
+    case POST_MODIFY:
+      mn10300_print_operand (file, XEXP (addr, 0), 0);
+      fputc ('+', file);
+      fputc (',', file);
+      mn10300_print_operand (file, XEXP (addr, 1), 0);
+      break;
+
+    case REG:
+      mn10300_print_operand (file, addr, 0);
+      break;
+    case PLUS:
+      {
+	rtx base = XEXP (addr, 0);
+	rtx index = XEXP (addr, 1);
+	
+	if (REG_P (index) && !REG_OK_FOR_INDEX_P (index))
+	  {
+	    rtx x = base;
+	    base = index;
+	    index = x;
+
+	    gcc_assert (REG_P (index) && REG_OK_FOR_INDEX_P (index));
+	  }
+	gcc_assert (REG_OK_FOR_BASE_P (base));
+
+	mn10300_print_operand (file, index, 0);
+	fputc (',', file);
+	mn10300_print_operand (file, base, 0);
+	break;
+      }
+    case SYMBOL_REF:
+      output_addr_const (file, addr);
+      break;
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.
+
+   Used for PIC-specific UNSPECs.  */
+
+static bool
+mn10300_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_PIC:
+	  /* GLOBAL_OFFSET_TABLE or local symbols, no suffix.  */
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_GOTSYM_OFF:
+	  assemble_name (file, GOT_SYMBOL_NAME);
+	  fputs ("-(", file);
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("-.)", file);
+	  break;
+	default:
+	  return false;
+	}
+      return true;
+    }
+  else
+    return false;
+}
+
+/* Count the number of FP registers that have to be saved.  */
+static int
+fp_regs_to_save (void)
+{
+  int i, n = 0;
+
+  if (! TARGET_AM33_2)
+    return 0;
+
+  for (i = FIRST_FP_REGNUM; i <= LAST_FP_REGNUM; ++i)
+    if (df_regs_ever_live_p (i) && ! call_really_used_regs[i])
+      ++n;
+
+  return n;
+}
+
+/* Print a set of registers in the format required by "movm" and "ret".
+   Register K is saved if bit K of MASK is set.  The data and address
+   registers can be stored individually, but the extended registers cannot.
+   We assume that the mask already takes that into account.  For instance,
+   bits 14 to 17 must have the same value.  */
+
+void
+mn10300_print_reg_list (FILE *file, int mask)
+{
+  int need_comma;
+  int i;
+
+  need_comma = 0;
+  fputc ('[', file);
+
+  for (i = 0; i < FIRST_EXTENDED_REGNUM; i++)
+    if ((mask & (1 << i)) != 0)
+      {
+	if (need_comma)
+	  fputc (',', file);
+	fputs (reg_names [i], file);
+	need_comma = 1;
+      }
+
+  if ((mask & 0x3c000) != 0)
+    {
+      gcc_assert ((mask & 0x3c000) == 0x3c000);
+      if (need_comma)
+	fputc (',', file);
+      fputs ("exreg1", file);
+      need_comma = 1;
+    }
+
+  fputc (']', file);
+}
+
+/* If the MDR register is never clobbered, we can use the RETF instruction
+   which takes the address from the MDR register.  This is 3 cycles faster
+   than having to load the address from the stack.  */
+
+bool
+mn10300_can_use_retf_insn (void)
+{
+  /* Don't bother if we're not optimizing.  In this case we won't
+     have proper access to df_regs_ever_live_p.  */
+  if (!optimize)
+    return false;
+
+  /* EH returns alter the saved return address; MDR is not current.  */
+  if (crtl->calls_eh_return)
+    return false;
+
+  /* Obviously not if MDR is ever clobbered.  */
+  if (df_regs_ever_live_p (MDR_REG))
+    return false;
+
+  /* ??? Careful not to use this during expand_epilogue etc.  */
+  gcc_assert (!in_sequence_p ());
+  return leaf_function_p ();
+}
+
+bool
+mn10300_can_use_rets_insn (void)
+{
+  return !mn10300_initial_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM);
+}
+
+/* Returns the set of live, callee-saved registers as a bitmask.  The
+   callee-saved extended registers cannot be stored individually, so
+   all of them will be included in the mask if any one of them is used.  */
+
+int
+mn10300_get_live_callee_saved_regs (void)
+{
+  int mask;
+  int i;
+
+  mask = 0;
+  for (i = 0; i <= LAST_EXTENDED_REGNUM; i++)
+    if (df_regs_ever_live_p (i) && ! call_really_used_regs[i])
+      mask |= (1 << i);
+  if ((mask & 0x3c000) != 0)
+    mask |= 0x3c000;
+
+  return mask;
+}
+
+static rtx
+F (rtx r)
+{
+  RTX_FRAME_RELATED_P (r) = 1;
+  return r;
+}
+
+/* Generate an instruction that pushes several registers onto the stack.
+   Register K will be saved if bit K in MASK is set.  The function does
+   nothing if MASK is zero.
+
+   To be compatible with the "movm" instruction, the lowest-numbered
+   register must be stored in the lowest slot.  If MASK is the set
+   { R1,...,RN }, where R1...RN are ordered least first, the generated
+   instruction will have the form:
+
+       (parallel
+         (set (reg:SI 9) (plus:SI (reg:SI 9) (const_int -N*4)))
+	 (set (mem:SI (plus:SI (reg:SI 9)
+	                       (const_int -1*4)))
+	      (reg:SI RN))
+	 ...
+	 (set (mem:SI (plus:SI (reg:SI 9)
+	                       (const_int -N*4)))
+	      (reg:SI R1))) */
+
+static void
+mn10300_gen_multiple_store (unsigned int mask)
+{
+  /* The order in which registers are stored, from SP-4 through SP-N*4.  */
+  static const unsigned int store_order[8] = {
+    /* e2, e3: never saved */
+    FIRST_EXTENDED_REGNUM + 4,
+    FIRST_EXTENDED_REGNUM + 5,
+    FIRST_EXTENDED_REGNUM + 6,
+    FIRST_EXTENDED_REGNUM + 7,
+    /* e0, e1, mdrq, mcrh, mcrl, mcvf: never saved. */
+    FIRST_DATA_REGNUM + 2,
+    FIRST_DATA_REGNUM + 3,
+    FIRST_ADDRESS_REGNUM + 2,
+    FIRST_ADDRESS_REGNUM + 3,
+    /* d0, d1, a0, a1, mdr, lir, lar: never saved.  */
+  };
+
+  rtx x, elts[9];
+  unsigned int i;
+  int count;
+
+  if (mask == 0)
+    return;
+
+  for (i = count = 0; i < ARRAY_SIZE(store_order); ++i)
+    {
+      unsigned regno = store_order[i];
+
+      if (((mask >> regno) & 1) == 0)
+	continue;
+
+      ++count;
+      x = plus_constant (stack_pointer_rtx, count * -4);
+      x = gen_frame_mem (SImode, x);
+      x = gen_rtx_SET (VOIDmode, x, gen_rtx_REG (SImode, regno));
+      elts[count] = F(x);
+
+      /* Remove the register from the mask so that... */
+      mask &= ~(1u << regno);
+    }
+
+  /* ... we can make sure that we didn't try to use a register
+     not listed in the store order.  */
+  gcc_assert (mask == 0);
+
+  /* Create the instruction that updates the stack pointer.  */
+  x = plus_constant (stack_pointer_rtx, count * -4);
+  x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+  elts[0] = F(x);
+
+  /* We need one PARALLEL element to update the stack pointer and
+     an additional element for each register that is stored.  */
+  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (count + 1, elts));
+  F (emit_insn (x));
+}
+
+void
+mn10300_expand_prologue (void)
+{
+  HOST_WIDE_INT size = mn10300_frame_size ();
+
+  /* If we use any of the callee-saved registers, save them now.  */
+  mn10300_gen_multiple_store (mn10300_get_live_callee_saved_regs ());
+
+  if (TARGET_AM33_2 && fp_regs_to_save ())
+    {
+      int num_regs_to_save = fp_regs_to_save (), i;
+      HOST_WIDE_INT xsize;
+      enum
+      {
+	save_sp_merge,
+	save_sp_no_merge,
+	save_sp_partial_merge,
+	save_a0_merge,
+	save_a0_no_merge
+      } strategy;
+      unsigned int strategy_size = (unsigned)-1, this_strategy_size;
+      rtx reg;
+
+      /* We have several different strategies to save FP registers.
+	 We can store them using SP offsets, which is beneficial if
+	 there are just a few registers to save, or we can use `a0' in
+	 post-increment mode (`a0' is the only call-clobbered address
+	 register that is never used to pass information to a
+	 function).  Furthermore, if we don't need a frame pointer, we
+	 can merge the two SP adds into a single one, but this isn't
+	 always beneficial; sometimes we can just split the two adds
+	 so that we don't exceed a 16-bit constant size.  The code
+	 below will select which strategy to use, so as to generate
+	 smallest code.  Ties are broken in favor or shorter sequences
+	 (in terms of number of instructions).  */
+
+#define SIZE_ADD_AX(S) ((((S) >= (1 << 15)) || ((S) < -(1 << 15))) ? 6 \
+			: (((S) >= (1 << 7)) || ((S) < -(1 << 7))) ? 4 : 2)
+#define SIZE_ADD_SP(S) ((((S) >= (1 << 15)) || ((S) < -(1 << 15))) ? 6 \
+			: (((S) >= (1 << 7)) || ((S) < -(1 << 7))) ? 4 : 3)
+
+/* We add 0 * (S) in two places to promote to the type of S,
+   so that all arms of the conditional have the same type.  */
+#define SIZE_FMOV_LIMIT(S,N,L,SIZE1,SIZE2,ELSE) \
+  (((S) >= (L)) ? 0 * (S) + (SIZE1) * (N) \
+   : ((S) + 4 * (N) >= (L)) ? (((L) - (S)) / 4 * (SIZE2) \
+			       + ((S) + 4 * (N) - (L)) / 4 * (SIZE1)) \
+   : 0 * (S) + (ELSE))
+#define SIZE_FMOV_SP_(S,N) \
+  (SIZE_FMOV_LIMIT ((S), (N), (1 << 24), 7, 6, \
+                   SIZE_FMOV_LIMIT ((S), (N), (1 << 8), 6, 4, \
+				    (S) ? 4 * (N) : 3 + 4 * ((N) - 1))))
+#define SIZE_FMOV_SP(S,N) (SIZE_FMOV_SP_ ((unsigned HOST_WIDE_INT)(S), (N)))
+
+      /* Consider alternative save_sp_merge only if we don't need the
+	 frame pointer and size is nonzero.  */
+      if (! frame_pointer_needed && size)
+	{
+	  /* Insn: add -(size + 4 * num_regs_to_save), sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-(size + 4 * num_regs_to_save));
+	  /* Insn: fmov fs#, (##, sp), for each fs# to be saved.  */
+	  this_strategy_size += SIZE_FMOV_SP (size, num_regs_to_save);
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_sp_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Consider alternative save_sp_no_merge unconditionally.  */
+      /* Insn: add -4 * num_regs_to_save, sp.  */
+      this_strategy_size = SIZE_ADD_SP (-4 * num_regs_to_save);
+      /* Insn: fmov fs#, (##, sp), for each fs# to be saved.  */
+      this_strategy_size += SIZE_FMOV_SP (0, num_regs_to_save);
+      if (size)
+	{
+	  /* Insn: add -size, sp.  */
+	  this_strategy_size += SIZE_ADD_SP (-size);
+	}
+
+      if (this_strategy_size < strategy_size)
+	{
+	  strategy = save_sp_no_merge;
+	  strategy_size = this_strategy_size;
+	}
+
+      /* Consider alternative save_sp_partial_merge only if we don't
+	 need a frame pointer and size is reasonably large.  */
+      if (! frame_pointer_needed && size + 4 * num_regs_to_save > 128)
+	{
+	  /* Insn: add -128, sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-128);
+	  /* Insn: fmov fs#, (##, sp), for each fs# to be saved.  */
+	  this_strategy_size += SIZE_FMOV_SP (128 - 4 * num_regs_to_save,
+					      num_regs_to_save);
+	  if (size)
+	    {
+	      /* Insn: add 128-size, sp.  */
+	      this_strategy_size += SIZE_ADD_SP (128 - size);
+	    }
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_sp_partial_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Consider alternative save_a0_merge only if we don't need a
+	 frame pointer, size is nonzero and the user hasn't
+	 changed the calling conventions of a0.  */
+      if (! frame_pointer_needed && size
+	  && call_really_used_regs [FIRST_ADDRESS_REGNUM]
+	  && ! fixed_regs[FIRST_ADDRESS_REGNUM])
+	{
+	  /* Insn: add -(size + 4 * num_regs_to_save), sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-(size + 4 * num_regs_to_save));
+	  /* Insn: mov sp, a0.  */
+	  this_strategy_size++;
+	  if (size)
+	    {
+	      /* Insn: add size, a0.  */
+	      this_strategy_size += SIZE_ADD_AX (size);
+	    }
+	  /* Insn: fmov fs#, (a0+), for each fs# to be saved.  */
+	  this_strategy_size += 3 * num_regs_to_save;
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_a0_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Consider alternative save_a0_no_merge if the user hasn't
+	 changed the calling conventions of a0.  */
+      if (call_really_used_regs [FIRST_ADDRESS_REGNUM]
+	  && ! fixed_regs[FIRST_ADDRESS_REGNUM])
+	{
+	  /* Insn: add -4 * num_regs_to_save, sp.  */
+	  this_strategy_size = SIZE_ADD_SP (-4 * num_regs_to_save);
+	  /* Insn: mov sp, a0.  */
+	  this_strategy_size++;
+	  /* Insn: fmov fs#, (a0+), for each fs# to be saved.  */
+	  this_strategy_size += 3 * num_regs_to_save;
+	  if (size)
+	    {
+	      /* Insn: add -size, sp.  */
+	      this_strategy_size += SIZE_ADD_SP (-size);
+	    }
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = save_a0_no_merge;
+	      strategy_size = this_strategy_size;
+	    }
+	}
+
+      /* Emit the initial SP add, common to all strategies.  */
+      switch (strategy)
+	{
+	case save_sp_no_merge:
+	case save_a0_no_merge:
+	  F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-4 * num_regs_to_save))));
+	  xsize = 0;
+	  break;
+
+	case save_sp_partial_merge:
+	  F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-128))));
+	  xsize = 128 - 4 * num_regs_to_save;
+	  size -= xsize;
+	  break;
+
+	case save_sp_merge:
+	case save_a0_merge:
+	  F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+				    stack_pointer_rtx,
+				    GEN_INT (-(size + 4 * num_regs_to_save)))));
+	  /* We'll have to adjust FP register saves according to the
+	     frame size.  */
+	  xsize = size;
+	  /* Since we've already created the stack frame, don't do it
+	     again at the end of the function.  */
+	  size = 0;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Now prepare register a0, if we have decided to use it.  */
+      switch (strategy)
+	{
+	case save_sp_merge:
+	case save_sp_no_merge:
+	case save_sp_partial_merge:
+	  reg = 0;
+	  break;
+
+	case save_a0_merge:
+	case save_a0_no_merge:
+	  reg = gen_rtx_REG (SImode, FIRST_ADDRESS_REGNUM);
+	  F (emit_insn (gen_movsi (reg, stack_pointer_rtx)));
+	  if (xsize)
+	    F (emit_insn (gen_addsi3 (reg, reg, GEN_INT (xsize))));
+	  reg = gen_rtx_POST_INC (SImode, reg);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Now actually save the FP registers.  */
+      for (i = FIRST_FP_REGNUM; i <= LAST_FP_REGNUM; ++i)
+	if (df_regs_ever_live_p (i) && ! call_really_used_regs [i])
+	  {
+	    rtx addr;
+
+	    if (reg)
+	      addr = reg;
+	    else
+	      {
+		/* If we aren't using `a0', use an SP offset.  */
+		if (xsize)
+		  {
+		    addr = gen_rtx_PLUS (SImode,
+					 stack_pointer_rtx,
+					 GEN_INT (xsize));
+		  }
+		else
+		  addr = stack_pointer_rtx;
+
+		xsize += 4;
+	      }
+
+	    F (emit_insn (gen_movsf (gen_rtx_MEM (SFmode, addr),
+				     gen_rtx_REG (SFmode, i))));
+	  }
+    }
+
+  /* Now put the frame pointer into the frame pointer register.  */
+  if (frame_pointer_needed)
+    F (emit_move_insn (frame_pointer_rtx, stack_pointer_rtx));
+
+  /* Allocate stack for this frame.  */
+  if (size)
+    F (emit_insn (gen_addsi3 (stack_pointer_rtx,
+			      stack_pointer_rtx,
+			      GEN_INT (-size))));
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    emit_insn (gen_load_pic ());
+}
+
+void
+mn10300_expand_epilogue (void)
+{
+  HOST_WIDE_INT size = mn10300_frame_size ();
+  int reg_save_bytes = REG_SAVE_BYTES;
+  
+  if (TARGET_AM33_2 && fp_regs_to_save ())
+    {
+      int num_regs_to_save = fp_regs_to_save (), i;
+      rtx reg = 0;
+
+      /* We have several options to restore FP registers.  We could
+	 load them from SP offsets, but, if there are enough FP
+	 registers to restore, we win if we use a post-increment
+	 addressing mode.  */
+
+      /* If we have a frame pointer, it's the best option, because we
+	 already know it has the value we want.  */
+      if (frame_pointer_needed)
+	reg = gen_rtx_REG (SImode, FRAME_POINTER_REGNUM);
+      /* Otherwise, we may use `a1', since it's call-clobbered and
+	 it's never used for return values.  But only do so if it's
+	 smaller than using SP offsets.  */
+      else
+	{
+	  enum { restore_sp_post_adjust,
+		 restore_sp_pre_adjust,
+		 restore_sp_partial_adjust,
+		 restore_a1 } strategy;
+	  unsigned int this_strategy_size, strategy_size = (unsigned)-1;
+
+	  /* Consider using sp offsets before adjusting sp.  */
+	  /* Insn: fmov (##,sp),fs#, for each fs# to be restored.  */
+	  this_strategy_size = SIZE_FMOV_SP (size, num_regs_to_save);
+	  /* If size is too large, we'll have to adjust SP with an
+		 add.  */
+	  if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
+	    {
+	      /* Insn: add size + 4 * num_regs_to_save, sp.  */
+	      this_strategy_size += SIZE_ADD_SP (size + 4 * num_regs_to_save);
+	    }
+	  /* If we don't have to restore any non-FP registers,
+		 we'll be able to save one byte by using rets.  */
+	  if (! reg_save_bytes)
+	    this_strategy_size--;
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = restore_sp_post_adjust;
+	      strategy_size = this_strategy_size;
+	    }
+
+	  /* Consider using sp offsets after adjusting sp.  */
+	  /* Insn: add size, sp.  */
+	  this_strategy_size = SIZE_ADD_SP (size);
+	  /* Insn: fmov (##,sp),fs#, for each fs# to be restored.  */
+	  this_strategy_size += SIZE_FMOV_SP (0, num_regs_to_save);
+	  /* We're going to use ret to release the FP registers
+		 save area, so, no savings.  */
+
+	  if (this_strategy_size < strategy_size)
+	    {
+	      strategy = restore_sp_pre_adjust;
+	      strategy_size = this_strategy_size;
+	    }
+
+	  /* Consider using sp offsets after partially adjusting sp.
+	     When size is close to 32Kb, we may be able to adjust SP
+	     with an imm16 add instruction while still using fmov
+	     (d8,sp).  */
+	  if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
+	    {
+	      /* Insn: add size + 4 * num_regs_to_save
+				+ reg_save_bytes - 252,sp.  */
+	      this_strategy_size = SIZE_ADD_SP (size + 4 * num_regs_to_save
+						+ reg_save_bytes - 252);
+	      /* Insn: fmov (##,sp),fs#, fo each fs# to be restored.  */
+	      this_strategy_size += SIZE_FMOV_SP (252 - reg_save_bytes
+						  - 4 * num_regs_to_save,
+						  num_regs_to_save);
+	      /* We're going to use ret to release the FP registers
+		 save area, so, no savings.  */
+
+	      if (this_strategy_size < strategy_size)
+		{
+		  strategy = restore_sp_partial_adjust;
+		  strategy_size = this_strategy_size;
+		}
+	    }
+
+	  /* Consider using a1 in post-increment mode, as long as the
+	     user hasn't changed the calling conventions of a1.  */
+	  if (call_really_used_regs [FIRST_ADDRESS_REGNUM + 1]
+	      && ! fixed_regs[FIRST_ADDRESS_REGNUM+1])
+	    {
+	      /* Insn: mov sp,a1.  */
+	      this_strategy_size = 1;
+	      if (size)
+		{
+		  /* Insn: add size,a1.  */
+		  this_strategy_size += SIZE_ADD_AX (size);
+		}
+	      /* Insn: fmov (a1+),fs#, for each fs# to be restored.  */
+	      this_strategy_size += 3 * num_regs_to_save;
+	      /* If size is large enough, we may be able to save a
+		 couple of bytes.  */
+	      if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
+		{
+		  /* Insn: mov a1,sp.  */
+		  this_strategy_size += 2;
+		}
+	      /* If we don't have to restore any non-FP registers,
+		 we'll be able to save one byte by using rets.  */
+	      if (! reg_save_bytes)
+		this_strategy_size--;
+
+	      if (this_strategy_size < strategy_size)
+		{
+		  strategy = restore_a1;
+		  strategy_size = this_strategy_size;
+		}
+	    }
+
+	  switch (strategy)
+	    {
+	    case restore_sp_post_adjust:
+	      break;
+
+	    case restore_sp_pre_adjust:
+	      emit_insn (gen_addsi3 (stack_pointer_rtx,
+				     stack_pointer_rtx,
+				     GEN_INT (size)));
+	      size = 0;
+	      break;
+
+	    case restore_sp_partial_adjust:
+	      emit_insn (gen_addsi3 (stack_pointer_rtx,
+				     stack_pointer_rtx,
+				     GEN_INT (size + 4 * num_regs_to_save
+					      + reg_save_bytes - 252)));
+	      size = 252 - reg_save_bytes - 4 * num_regs_to_save;
+	      break;
+
+	    case restore_a1:
+	      reg = gen_rtx_REG (SImode, FIRST_ADDRESS_REGNUM + 1);
+	      emit_insn (gen_movsi (reg, stack_pointer_rtx));
+	      if (size)
+		emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      /* Adjust the selected register, if any, for post-increment.  */
+      if (reg)
+	reg = gen_rtx_POST_INC (SImode, reg);
+
+      for (i = FIRST_FP_REGNUM; i <= LAST_FP_REGNUM; ++i)
+	if (df_regs_ever_live_p (i) && ! call_really_used_regs [i])
+	  {
+	    rtx addr;
+
+	    if (reg)
+	      addr = reg;
+	    else if (size)
+	      {
+		/* If we aren't using a post-increment register, use an
+		   SP offset.  */
+		addr = gen_rtx_PLUS (SImode,
+				     stack_pointer_rtx,
+				     GEN_INT (size));
+	      }
+	    else
+	      addr = stack_pointer_rtx;
+
+	    size += 4;
+
+	    emit_insn (gen_movsf (gen_rtx_REG (SFmode, i),
+				  gen_rtx_MEM (SFmode, addr)));
+	  }
+
+      /* If we were using the restore_a1 strategy and the number of
+	 bytes to be released won't fit in the `ret' byte, copy `a1'
+	 to `sp', to avoid having to use `add' to adjust it.  */
+      if (! frame_pointer_needed && reg && size + reg_save_bytes > 255)
+	{
+	  emit_move_insn (stack_pointer_rtx, XEXP (reg, 0));
+	  size = 0;
+	}
+    }
+
+  /* Maybe cut back the stack, except for the register save area.
+
+     If the frame pointer exists, then use the frame pointer to
+     cut back the stack.
+
+     If the stack size + register save area is more than 255 bytes,
+     then the stack must be cut back here since the size + register
+     save size is too big for a ret/retf instruction.
+
+     Else leave it alone, it will be cut back as part of the
+     ret/retf instruction, or there wasn't any stack to begin with.
+
+     Under no circumstances should the register save area be
+     deallocated here, that would leave a window where an interrupt
+     could occur and trash the register save area.  */
+  if (frame_pointer_needed)
+    {
+      emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+      size = 0;
+    }
+  else if (size + reg_save_bytes > 255)
+    {
+      emit_insn (gen_addsi3 (stack_pointer_rtx,
+			     stack_pointer_rtx,
+			     GEN_INT (size)));
+      size = 0;
+    }
+
+  /* Adjust the stack and restore callee-saved registers, if any.  */
+  if (mn10300_can_use_rets_insn ())
+    emit_jump_insn (gen_rtx_RETURN (VOIDmode));
+  else
+    emit_jump_insn (gen_return_ret (GEN_INT (size + REG_SAVE_BYTES)));
+}
+
+/* Recognize the PARALLEL rtx generated by mn10300_gen_multiple_store().
+   This function is for MATCH_PARALLEL and so assumes OP is known to be
+   parallel.  If OP is a multiple store, return a mask indicating which
+   registers it saves.  Return 0 otherwise.  */
+
+int
+mn10300_store_multiple_operation (rtx op,
+				  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int count;
+  int mask;
+  int i;
+  unsigned int last;
+  rtx elt;
+
+  count = XVECLEN (op, 0);
+  if (count < 2)
+    return 0;
+
+  /* Check that first instruction has the form (set (sp) (plus A B)) */
+  elt = XVECEXP (op, 0, 0);
+  if (GET_CODE (elt) != SET
+      || (! REG_P (SET_DEST (elt)))
+      || REGNO (SET_DEST (elt)) != STACK_POINTER_REGNUM
+      || GET_CODE (SET_SRC (elt)) != PLUS)
+    return 0;
+
+  /* Check that A is the stack pointer and B is the expected stack size.
+     For OP to match, each subsequent instruction should push a word onto
+     the stack.  We therefore expect the first instruction to create
+     COUNT-1 stack slots.  */
+  elt = SET_SRC (elt);
+  if ((! REG_P (XEXP (elt, 0)))
+      || REGNO (XEXP (elt, 0)) != STACK_POINTER_REGNUM
+      || (! CONST_INT_P (XEXP (elt, 1)))
+      || INTVAL (XEXP (elt, 1)) != -(count - 1) * 4)
+    return 0;
+
+  mask = 0;
+  for (i = 1; i < count; i++)
+    {
+      /* Check that element i is a (set (mem M) R).  */
+      /* ??? Validate the register order a-la mn10300_gen_multiple_store.
+	 Remember: the ordering is *not* monotonic.  */
+      elt = XVECEXP (op, 0, i);
+      if (GET_CODE (elt) != SET
+	  || (! MEM_P (SET_DEST (elt)))
+	  || (! REG_P (SET_SRC (elt))))
+	return 0;
+
+      /* Remember which registers are to be saved.  */
+      last = REGNO (SET_SRC (elt));
+      mask |= (1 << last);
+
+      /* Check that M has the form (plus (sp) (const_int -I*4)) */
+      elt = XEXP (SET_DEST (elt), 0);
+      if (GET_CODE (elt) != PLUS
+	  || (! REG_P (XEXP (elt, 0)))
+	  || REGNO (XEXP (elt, 0)) != STACK_POINTER_REGNUM
+	  || (! CONST_INT_P (XEXP (elt, 1)))
+	  || INTVAL (XEXP (elt, 1)) != -i * 4)
+	return 0;
+    }
+
+  /* All or none of the callee-saved extended registers must be in the set.  */
+  if ((mask & 0x3c000) != 0
+      && (mask & 0x3c000) != 0x3c000)
+    return 0;
+
+  return mask;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+mn10300_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (x == stack_pointer_rtx && rclass != SP_REGS)
+    return (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+  else if (MEM_P (x)
+	   || (REG_P (x) 
+	       && !HARD_REGISTER_P (x))
+	   || (GET_CODE (x) == SUBREG
+	       && REG_P (SUBREG_REG (x))
+	       && !HARD_REGISTER_P (SUBREG_REG (x))))
+    return LIMIT_RELOAD_CLASS (GET_MODE (x), rclass);
+  else
+    return rclass;
+}
+
+/* Implement TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+mn10300_preferred_output_reload_class (rtx x, reg_class_t rclass)
+{
+  if (x == stack_pointer_rtx && rclass != SP_REGS)
+    return (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+  return rclass;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+mn10300_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+			  enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+  enum reg_class xclass = NO_REGS;
+  unsigned int xregno = INVALID_REGNUM;
+
+  if (REG_P (x))
+    {
+      xregno = REGNO (x);
+      if (xregno >= FIRST_PSEUDO_REGISTER)
+	xregno = true_regnum (x);
+      if (xregno != INVALID_REGNUM)
+	xclass = REGNO_REG_CLASS (xregno);
+    }
+
+  if (!TARGET_AM33)
+    {
+      /* Memory load/stores less than a full word wide can't have an
+         address or stack pointer destination.  They must use a data
+         register as an intermediate register.  */
+      if (rclass != DATA_REGS
+	  && (mode == QImode || mode == HImode)
+	  && xclass == NO_REGS)
+	return DATA_REGS;
+
+      /* We can only move SP to/from an address register.  */
+      if (in_p
+	  && rclass == SP_REGS
+	  && xclass != ADDRESS_REGS)
+	return ADDRESS_REGS;
+      if (!in_p
+	  && xclass == SP_REGS
+	  && rclass != ADDRESS_REGS
+	  && rclass != SP_OR_ADDRESS_REGS)
+	return ADDRESS_REGS;
+    }
+
+  /* We can't directly load sp + const_int into a register;
+     we must use an address register as an scratch.  */
+  if (in_p
+      && rclass != SP_REGS
+      && rclass != SP_OR_ADDRESS_REGS
+      && rclass != SP_OR_GENERAL_REGS
+      && GET_CODE (x) == PLUS
+      && (XEXP (x, 0) == stack_pointer_rtx
+	  || XEXP (x, 1) == stack_pointer_rtx))
+    {
+      sri->icode = CODE_FOR_reload_plus_sp_const;
+      return NO_REGS;
+    }
+
+  /* We can only move MDR to/from a data register.  */
+  if (rclass == MDR_REGS && xclass != DATA_REGS)
+    return DATA_REGS;
+  if (xclass == MDR_REGS && rclass != DATA_REGS)
+    return DATA_REGS;
+
+  /* We can't load/store an FP register from a constant address.  */
+  if (TARGET_AM33_2
+      && (rclass == FP_REGS || xclass == FP_REGS)
+      && (xclass == NO_REGS || rclass == NO_REGS))
+    {
+      rtx addr = NULL;
+
+      if (xregno >= FIRST_PSEUDO_REGISTER && xregno != INVALID_REGNUM)
+	{
+	  addr = reg_equiv_mem [xregno];
+	  if (addr)
+	    addr = XEXP (addr, 0);
+	}
+      else if (MEM_P (x))
+	addr = XEXP (x, 0);
+
+      if (addr && CONSTANT_ADDRESS_P (addr))
+	return GENERAL_REGS;
+    }
+
+  /* Otherwise assume no secondary reloads are needed.  */
+  return NO_REGS;
+}
+
+int
+mn10300_frame_size (void)
+{
+  /* size includes the fixed stack space needed for function calls.  */
+  int size = get_frame_size () + crtl->outgoing_args_size;
+
+  /* And space for the return pointer.  */
+  size += crtl->outgoing_args_size ? 4 : 0;
+
+  return size;
+}
+
+int
+mn10300_initial_offset (int from, int to)
+{
+  int diff = 0;
+
+  gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
+  gcc_assert (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
+
+  if (to == STACK_POINTER_REGNUM)
+    diff = mn10300_frame_size ();
+
+  /* The difference between the argument pointer and the frame pointer
+     is the size of the callee register save area.  */
+  if (from == ARG_POINTER_REGNUM)
+    {
+      diff += REG_SAVE_BYTES;
+      diff += 4 * fp_regs_to_save ();
+    }
+
+  return diff;
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+mn10300_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Return values > 8 bytes in length in memory.  */
+  return (int_size_in_bytes (type) > 8
+	  || int_size_in_bytes (type) == 0
+	  || TYPE_MODE (type) == BLKmode);
+}
+
+/* Flush the argument registers to the stack for a stdarg function;
+   return the new argument pointer.  */
+static rtx
+mn10300_builtin_saveregs (void)
+{
+  rtx offset, mem;
+  tree fntype = TREE_TYPE (current_function_decl);
+  int argadj = ((!stdarg_p (fntype))
+                ? UNITS_PER_WORD : 0);
+  alias_set_type set = get_varargs_alias_set ();
+
+  if (argadj)
+    offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
+  else
+    offset = crtl->args.arg_offset_rtx;
+
+  mem = gen_rtx_MEM (SImode, crtl->args.internal_arg_pointer);
+  set_mem_alias_set (mem, set);
+  emit_move_insn (mem, gen_rtx_REG (SImode, 0));
+
+  mem = gen_rtx_MEM (SImode,
+		     plus_constant (crtl->args.internal_arg_pointer, 4));
+  set_mem_alias_set (mem, set);
+  emit_move_insn (mem, gen_rtx_REG (SImode, 1));
+
+  return copy_to_reg (expand_binop (Pmode, add_optab,
+				    crtl->args.internal_arg_pointer,
+				    offset, 0, 0, OPTAB_LIB_WIDEN));
+}
+
+static void
+mn10300_va_start (tree valist, rtx nextarg)
+{
+  nextarg = expand_builtin_saveregs ();
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Return true when a parameter should be passed by reference.  */
+
+static bool
+mn10300_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			   enum machine_mode mode, const_tree type,
+			   bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return (size > 8 || size == 0);
+}
+
+/* Return an RTX to represent where a value with mode MODE will be returned
+   from a function.  If the result is NULL_RTX, the argument is pushed.  */
+
+static rtx
+mn10300_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  rtx result = NULL_RTX;
+  int size;
+
+  /* We only support using 2 data registers as argument registers.  */
+  int nregs = 2;
+
+  /* Figure out the size of the object to be passed.  */
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  cum->nbytes = (cum->nbytes + 3) & ~3;
+
+  /* Don't pass this arg via a register if all the argument registers
+     are used up.  */
+  if (cum->nbytes > nregs * UNITS_PER_WORD)
+    return result;
+
+  /* Don't pass this arg via a register if it would be split between
+     registers and memory.  */
+  if (type == NULL_TREE
+      && cum->nbytes + size > nregs * UNITS_PER_WORD)
+    return result;
+
+  switch (cum->nbytes / UNITS_PER_WORD)
+    {
+    case 0:
+      result = gen_rtx_REG (mode, FIRST_ARGUMENT_REGNUM);
+      break;
+    case 1:
+      result = gen_rtx_REG (mode, FIRST_ARGUMENT_REGNUM + 1);
+      break;
+    default:
+      break;
+    }
+
+  return result;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+mn10300_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  cum->nbytes += (mode != BLKmode
+		  ? (GET_MODE_SIZE (mode) + 3) & ~3
+		  : (int_size_in_bytes (type) + 3) & ~3);
+}
+
+/* Return the number of bytes of registers to use for an argument passed
+   partially in registers and partially in memory.  */
+
+static int
+mn10300_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int size;
+
+  /* We only support using 2 data registers as argument registers.  */
+  int nregs = 2;
+
+  /* Figure out the size of the object to be passed.  */
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  cum->nbytes = (cum->nbytes + 3) & ~3;
+
+  /* Don't pass this arg via a register if all the argument registers
+     are used up.  */
+  if (cum->nbytes > nregs * UNITS_PER_WORD)
+    return 0;
+
+  if (cum->nbytes + size <= nregs * UNITS_PER_WORD)
+    return 0;
+
+  /* Don't pass this arg via a register if it would be split between
+     registers and memory.  */
+  if (type == NULL_TREE
+      && cum->nbytes + size > nregs * UNITS_PER_WORD)
+    return 0;
+
+  return nregs * UNITS_PER_WORD - cum->nbytes;
+}
+
+/* Return the location of the function's value.  This will be either
+   $d0 for integer functions, $a0 for pointers, or a PARALLEL of both
+   $d0 and $a0 if the -mreturn-pointer-on-do flag is set.  Note that
+   we only return the PARALLEL for outgoing values; we do not want
+   callers relying on this extra copy.  */
+
+static rtx
+mn10300_function_value (const_tree valtype,
+			const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+			bool outgoing)
+{
+  rtx rv;
+  enum machine_mode mode = TYPE_MODE (valtype);
+
+  if (! POINTER_TYPE_P (valtype))
+    return gen_rtx_REG (mode, FIRST_DATA_REGNUM);
+  else if (! TARGET_PTR_A0D0 || ! outgoing
+	   || cfun->returns_struct)
+    return gen_rtx_REG (mode, FIRST_ADDRESS_REGNUM);
+
+  rv = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
+  XVECEXP (rv, 0, 0)
+    = gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_REG (mode, FIRST_ADDRESS_REGNUM),
+			 GEN_INT (0));
+
+  XVECEXP (rv, 0, 1)
+    = gen_rtx_EXPR_LIST (VOIDmode,
+			 gen_rtx_REG (mode, FIRST_DATA_REGNUM),
+			 GEN_INT (0));
+  return rv;
+}
+
+/* Implements TARGET_LIBCALL_VALUE.  */
+
+static rtx
+mn10300_libcall_value (enum machine_mode mode,
+		       const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, FIRST_DATA_REGNUM);
+}
+
+/* Implements FUNCTION_VALUE_REGNO_P.  */
+
+bool
+mn10300_function_value_regno_p (const unsigned int regno)
+{
+ return (regno == FIRST_DATA_REGNUM || regno == FIRST_ADDRESS_REGNUM);
+}
+
+/* Output an addition operation.  */
+
+const char *
+mn10300_output_add (rtx operands[3], bool need_flags)
+{
+  rtx dest, src1, src2;
+  unsigned int dest_regnum, src1_regnum, src2_regnum;
+  enum reg_class src1_class, src2_class, dest_class;
+
+  dest = operands[0];
+  src1 = operands[1];
+  src2 = operands[2];
+
+  dest_regnum = true_regnum (dest);
+  src1_regnum = true_regnum (src1);
+
+  dest_class = REGNO_REG_CLASS (dest_regnum);
+  src1_class = REGNO_REG_CLASS (src1_regnum);
+
+  if (CONST_INT_P (src2))
+    {
+      gcc_assert (dest_regnum == src1_regnum);
+
+      if (src2 == const1_rtx && !need_flags)
+	return "inc %0";
+      if (INTVAL (src2) == 4 && !need_flags && dest_class != DATA_REGS)
+        return "inc4 %0";
+
+      gcc_assert (!need_flags || dest_class != SP_REGS);
+      return "add %2,%0";
+    }
+  else if (CONSTANT_P (src2))
+    return "add %2,%0";
+
+  src2_regnum = true_regnum (src2);
+  src2_class = REGNO_REG_CLASS (src2_regnum);
+      
+  if (dest_regnum == src1_regnum)
+    return "add %2,%0";
+  if (dest_regnum == src2_regnum)
+    return "add %1,%0";
+
+  /* The rest of the cases are reg = reg+reg.  For AM33, we can implement
+     this directly, as below, but when optimizing for space we can sometimes
+     do better by using a mov+add.  For MN103, we claimed that we could
+     implement a three-operand add because the various move and add insns
+     change sizes across register classes, and we can often do better than
+     reload in choosing which operand to move.  */
+  if (TARGET_AM33 && optimize_insn_for_speed_p ())
+    return "add %2,%1,%0";
+
+  /* Catch cases where no extended register was used.  */
+  if (src1_class != EXTENDED_REGS
+      && src2_class != EXTENDED_REGS
+      && dest_class != EXTENDED_REGS)
+    {
+      /* We have to copy one of the sources into the destination, then
+         add the other source to the destination.
+
+         Carefully select which source to copy to the destination; a
+         naive implementation will waste a byte when the source classes
+         are different and the destination is an address register.
+         Selecting the lowest cost register copy will optimize this
+         sequence.  */
+      if (src1_class == dest_class)
+        return "mov %1,%0\n\tadd %2,%0";
+      else
+	return "mov %2,%0\n\tadd %1,%0";
+    }
+
+  /* At least one register is an extended register.  */
+
+  /* The three operand add instruction on the am33 is a win iff the
+     output register is an extended register, or if both source
+     registers are extended registers.  */
+  if (dest_class == EXTENDED_REGS || src1_class == src2_class)
+    return "add %2,%1,%0";
+
+  /* It is better to copy one of the sources to the destination, then
+     perform a 2 address add.  The destination in this case must be
+     an address or data register and one of the sources must be an
+     extended register and the remaining source must not be an extended
+     register.
+
+     The best code for this case is to copy the extended reg to the
+     destination, then emit a two address add.  */
+  if (src1_class == EXTENDED_REGS)
+    return "mov %1,%0\n\tadd %2,%0";
+  else
+    return "mov %2,%0\n\tadd %1,%0";
+}
+
+/* Return 1 if X contains a symbolic expression.  We know these
+   expressions will have one of a few well defined forms, so
+   we need only check those forms.  */
+
+int
+mn10300_symbolic_operand (rtx op,
+			  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      op = XEXP (op, 0);
+      return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+               || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+              && CONST_INT_P (XEXP (op, 1)));
+    default:
+      return 0;
+    }
+}
+
+/* Try machine dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   Normally it is always safe for this macro to do nothing.  It exists to
+   recognize opportunities to optimize the output.
+
+   But on a few ports with segmented architectures and indexed addressing
+   (mn10300, hppa) it is used to rewrite certain problematical addresses.  */
+
+static rtx
+mn10300_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			    enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (flag_pic && ! mn10300_legitimate_pic_operand_p (x))
+    x = mn10300_legitimize_pic_address (oldx, NULL_RTX);
+
+  /* Uh-oh.  We might have an address for x[n-100000].  This needs
+     special handling to avoid creating an indexed memory address
+     with x-100000 as the base.  */
+  if (GET_CODE (x) == PLUS
+      && mn10300_symbolic_operand (XEXP (x, 1), VOIDmode))
+    {
+      /* Ugly.  We modify things here so that the address offset specified
+         by the index expression is computed first, then added to x to form
+         the entire address.  */
+
+      rtx regx1, regy1, regy2, y;
+
+      /* Strip off any CONST.  */
+      y = XEXP (x, 1);
+      if (GET_CODE (y) == CONST)
+        y = XEXP (y, 0);
+
+      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
+	{
+	  regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
+	  regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
+	  regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
+	  regx1 = force_reg (Pmode,
+			     gen_rtx_fmt_ee (GET_CODE (y), Pmode, regx1,
+					     regy2));
+	  return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
+	}
+    }
+  return x;
+}
+
+/* Convert a non-PIC address in `orig' to a PIC address using @GOT or
+   @GOTOFF in `reg'.  */
+
+rtx
+mn10300_legitimize_pic_address (rtx orig, rtx reg)
+{
+  rtx x;
+
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF
+	  && (CONSTANT_POOL_ADDRESS_P (orig)
+	      || ! MN10300_GLOBAL_P (orig))))
+    {
+      if (reg == NULL)
+	reg = gen_reg_rtx (Pmode);
+
+      x = gen_rtx_UNSPEC (SImode, gen_rtvec (1, orig), UNSPEC_GOTOFF);
+      x = gen_rtx_CONST (SImode, x);
+      emit_move_insn (reg, x);
+
+      x = emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+    }
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      if (reg == NULL)
+	reg = gen_reg_rtx (Pmode);
+
+      x = gen_rtx_UNSPEC (SImode, gen_rtvec (1, orig), UNSPEC_GOT);
+      x = gen_rtx_CONST (SImode, x);
+      x = gen_rtx_PLUS (SImode, pic_offset_table_rtx, x);
+      x = gen_const_mem (SImode, x);
+
+      x = emit_move_insn (reg, x);
+    }
+  else
+    return orig;
+
+  set_unique_reg_note (x, REG_EQUAL, orig);
+  return reg;
+}
+
+/* Return zero if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec; nonzero otherwise.  */
+
+int
+mn10300_legitimate_pic_operand_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    return 0;
+
+  if (GET_CODE (x) == UNSPEC
+      && (XINT (x, 1) == UNSPEC_PIC
+	  || XINT (x, 1) == UNSPEC_GOT
+	  || XINT (x, 1) == UNSPEC_GOTOFF
+	  || XINT (x, 1) == UNSPEC_PLT
+	  || XINT (x, 1) == UNSPEC_GOTSYM_OFF))
+      return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (! mn10300_legitimate_pic_operand_p (XVECEXP (x, i, j)))
+	      return 0;
+	}
+      else if (fmt[i] == 'e'
+	       && ! mn10300_legitimate_pic_operand_p (XEXP (x, i)))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Return TRUE if the address X, taken from a (MEM:MODE X) rtx, is
+   legitimate, and FALSE otherwise.
+
+   On the mn10300, the value in the address register must be
+   in the same memory space/segment as the effective address.
+
+   This is problematical for reload since it does not understand
+   that base+index != index+base in a memory reference.
+
+   Note it is still possible to use reg+reg addressing modes,
+   it's just much more difficult.  For a discussion of a possible
+   workaround and solution, see the comments in pa.c before the
+   function record_unscaled_index_insn_codes.  */
+
+static bool
+mn10300_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  rtx base, index;
+
+  if (CONSTANT_ADDRESS_P (x))
+    return !flag_pic || mn10300_legitimate_pic_operand_p (x);
+
+  if (RTX_OK_FOR_BASE_P (x, strict))
+    return true;
+
+  if (TARGET_AM33 && (mode == SImode || mode == SFmode || mode == HImode))
+    {
+      if (GET_CODE (x) == POST_INC)
+	return RTX_OK_FOR_BASE_P (XEXP (x, 0), strict);
+      if (GET_CODE (x) == POST_MODIFY)
+	return (RTX_OK_FOR_BASE_P (XEXP (x, 0), strict)
+		&& CONSTANT_ADDRESS_P (XEXP (x, 1)));
+    }
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  base = XEXP (x, 0);
+  index = XEXP (x, 1);
+
+  if (!REG_P (base))
+    return false;
+  if (REG_P (index))
+    {
+      /* ??? Without AM33 generalized (Ri,Rn) addressing, reg+reg
+	 addressing is hard to satisfy.  */
+      if (!TARGET_AM33)
+	return false;
+
+      return (REGNO_GENERAL_P (REGNO (base), strict)
+	      && REGNO_GENERAL_P (REGNO (index), strict));
+    }
+
+  if (!REGNO_STRICT_OK_FOR_BASE_P (REGNO (base), strict))
+    return false;
+
+  if (CONST_INT_P (index))
+    return IN_RANGE (INTVAL (index), -1 - 0x7fffffff, 0x7fffffff);
+
+  if (CONSTANT_ADDRESS_P (index))
+    return !flag_pic || mn10300_legitimate_pic_operand_p (index);
+
+  return false;
+}
+
+bool
+mn10300_regno_in_class_p (unsigned regno, int rclass, bool strict)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!strict)
+	return true;
+      if (!reg_renumber)
+	return false;
+      regno = reg_renumber[regno];
+      if (regno == INVALID_REGNUM)
+	return false;
+    }
+  return TEST_HARD_REG_BIT (reg_class_contents[rclass], regno);
+}
+
+rtx
+mn10300_legitimize_reload_address (rtx x,
+				   enum machine_mode mode ATTRIBUTE_UNUSED,
+				   int opnum, int type,
+				   int ind_levels ATTRIBUTE_UNUSED)
+{
+  bool any_change = false;
+
+  /* See above re disabling reg+reg addressing for MN103.  */
+  if (!TARGET_AM33)
+    return NULL_RTX;
+
+  if (GET_CODE (x) != PLUS)
+    return NULL_RTX;
+
+  if (XEXP (x, 0) == stack_pointer_rtx)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   GENERAL_REGS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      any_change = true;
+    }
+  if (XEXP (x, 1) == stack_pointer_rtx)
+    {
+      push_reload (XEXP (x, 1), NULL_RTX, &XEXP (x, 1), NULL,
+		   GENERAL_REGS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      any_change = true;
+    }
+
+  return any_change ? x : NULL_RTX;
+}
+
+/* Used by LEGITIMATE_CONSTANT_P().  Returns TRUE if X is a valid
+   constant.  Note that some "constants" aren't valid, such as TLS
+   symbols and unconverted GOT-based references, so we eliminate
+   those here.  */
+
+bool
+mn10300_legitimate_constant_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (! CONST_INT_P (XEXP (x, 1)))
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (x) == UNSPEC)
+	{
+	  switch (XINT (x, 1))
+	    {
+	    case UNSPEC_PIC:
+	    case UNSPEC_GOT:
+	    case UNSPEC_GOTOFF:
+	    case UNSPEC_PLT:
+	      return true;
+	    default:
+	      return false;
+	    }
+	}
+
+      /* We must have drilled down to a symbol.  */
+      if (! mn10300_symbolic_operand (x, Pmode))
+	return false;
+      break;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+/* Undo pic address legitimization for the benefit of debug info.  */
+
+static rtx
+mn10300_delegitimize_address (rtx orig_x)
+{
+  rtx x = orig_x, ret, addend = NULL;
+  bool need_mem;
+
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
+    return orig_x;
+
+  if (XEXP (x, 0) == pic_offset_table_rtx)
+    ;
+  /* With the REG+REG addressing of AM33, var-tracking can re-assemble
+     some odd-looking "addresses" that were never valid in the first place.
+     We need to look harder to avoid warnings being emitted.  */
+  else if (GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      rtx x0 = XEXP (x, 0);
+      rtx x00 = XEXP (x0, 0);
+      rtx x01 = XEXP (x0, 1);
+
+      if (x00 == pic_offset_table_rtx)
+	addend = x01;
+      else if (x01 == pic_offset_table_rtx)
+	addend = x00;
+      else
+	return orig_x;
+
+    }
+  else
+    return orig_x;
+  x = XEXP (x, 1);
+
+  if (GET_CODE (x) != CONST)
+    return orig_x;
+  x = XEXP (x, 0);
+  if (GET_CODE (x) != UNSPEC)
+    return orig_x;
+
+  ret = XVECEXP (x, 0, 0);
+  if (XINT (x, 1) == UNSPEC_GOTOFF)
+    need_mem = false;
+  else if (XINT (x, 1) == UNSPEC_GOT)
+    need_mem = true;
+  else
+    return orig_x;
+
+  gcc_assert (GET_CODE (ret) == SYMBOL_REF);
+  if (need_mem != MEM_P (orig_x))
+    return orig_x;
+  if (need_mem && addend)
+    return orig_x;
+  if (addend)
+    ret = gen_rtx_PLUS (Pmode, addend, ret);
+  return ret;
+}
+
+/* For addresses, costs are relative to "MOV (Rm),Rn".  For AM33 this is
+   the 3-byte fully general instruction; for MN103 this is the 2-byte form
+   with an address register.  */
+
+static int
+mn10300_address_cost (rtx x, bool speed)
+{
+  HOST_WIDE_INT i;
+  rtx base, index;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      /* We assume all of these require a 32-bit constant, even though
+	 some symbol and label references can be relaxed.  */
+      return speed ? 1 : 4;
+
+    case REG:
+    case SUBREG:
+    case POST_INC:
+      return 0;
+
+    case POST_MODIFY:
+      /* Assume any symbolic offset is a 32-bit constant.  */
+      i = (CONST_INT_P (XEXP (x, 1)) ? INTVAL (XEXP (x, 1)) : 0x12345678);
+      if (IN_RANGE (i, -128, 127))
+	return speed ? 0 : 1;
+      if (speed)
+	return 1;
+      if (IN_RANGE (i, -0x800000, 0x7fffff))
+	return 3;
+      return 4;
+
+    case PLUS:
+      base = XEXP (x, 0);
+      index = XEXP (x, 1);
+      if (register_operand (index, SImode))
+	{
+	  /* Attempt to minimize the number of registers in the address.
+	     This is similar to what other ports do.  */
+	  if (register_operand (base, SImode))
+	    return 1;
+
+	  base = XEXP (x, 1);
+	  index = XEXP (x, 0);
+	}
+
+      /* Assume any symbolic offset is a 32-bit constant.  */
+      i = (CONST_INT_P (XEXP (x, 1)) ? INTVAL (XEXP (x, 1)) : 0x12345678);
+      if (IN_RANGE (i, -128, 127))
+	return speed ? 0 : 1;
+      if (IN_RANGE (i, -32768, 32767))
+	return speed ? 0 : 2;
+      return speed ? 2 : 6;
+
+    default:
+      return rtx_cost (x, MEM, speed);
+    }
+}
+
+/* Implement the TARGET_REGISTER_MOVE_COST hook.
+
+   Recall that the base value of 2 is required by assumptions elsewhere
+   in the body of the compiler, and that cost 2 is special-cased as an
+   early exit from reload meaning no work is required.  */
+
+static int
+mn10300_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			    reg_class_t ifrom, reg_class_t ito)
+{
+  enum reg_class from = (enum reg_class) ifrom;
+  enum reg_class to = (enum reg_class) ito;
+  enum reg_class scratch, test;
+
+  /* Simplify the following code by unifying the fp register classes.  */
+  if (to == FP_ACC_REGS)
+    to = FP_REGS;
+  if (from == FP_ACC_REGS)
+    from = FP_REGS;
+
+  /* Diagnose invalid moves by costing them as two moves.  */
+
+  scratch = NO_REGS;
+  test = from;
+  if (to == SP_REGS)
+    scratch = (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+  else if (to == MDR_REGS)
+    scratch = DATA_REGS;
+  else if (to == FP_REGS && to != from)
+    scratch = GENERAL_REGS;
+  else
+    {
+      test = to;
+      if (from == SP_REGS)
+	scratch = (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
+      else if (from == MDR_REGS)
+	scratch = DATA_REGS;
+      else if (from == FP_REGS && to != from)
+	scratch = GENERAL_REGS;
+    }
+  if (scratch != NO_REGS && !reg_class_subset_p (test, scratch))
+    return (mn10300_register_move_cost (VOIDmode, from, scratch)
+	    + mn10300_register_move_cost (VOIDmode, scratch, to));
+
+  /* From here on, all we need consider are legal combinations.  */
+
+  if (optimize_size)
+    {
+      /* The scale here is bytes * 2.  */
+
+      if (from == to && (to == ADDRESS_REGS || to == DATA_REGS))
+	return 2;
+
+      if (from == SP_REGS)
+	return (to == ADDRESS_REGS ? 2 : 6);
+
+      /* For MN103, all remaining legal moves are two bytes.  */
+      if (TARGET_AM33)
+	return 4;
+
+      if (to == SP_REGS)
+	return (from == ADDRESS_REGS ? 4 : 6);
+
+      if ((from == ADDRESS_REGS || from == DATA_REGS)
+	   && (to == ADDRESS_REGS || to == DATA_REGS))
+	return 4;
+
+      if (to == EXTENDED_REGS)
+	return (to == from ? 6 : 4);
+
+      /* What's left are SP_REGS, FP_REGS, or combinations of the above.  */
+      return 6;
+    }
+  else
+    {
+      /* The scale here is cycles * 2.  */
+
+      if (to == FP_REGS)
+	return 8;
+      if (from == FP_REGS)
+	return 4;
+
+      /* All legal moves between integral registers are single cycle.  */
+      return 2;
+    }
+}
+
+/* Implement the TARGET_MEMORY_MOVE_COST hook.
+
+   Given lack of the form of the address, this must be speed-relative,
+   though we should never be less expensive than a size-relative register
+   move cost above.  This is not a problem.  */
+
+static int
+mn10300_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, 
+			  reg_class_t iclass, bool in ATTRIBUTE_UNUSED)
+{
+  enum reg_class rclass = (enum reg_class) iclass;
+
+  if (rclass == FP_REGS)
+    return 8;
+  return 6;
+}
+
+/* Implement the TARGET_RTX_COSTS hook.
+
+   Speed-relative costs are relative to COSTS_N_INSNS, which is intended
+   to represent cycles.  Size-relative costs are in bytes.  */
+
+static bool
+mn10300_rtx_costs (rtx x, int code, int outer_code, int *ptotal, bool speed)
+{
+  /* This value is used for SYMBOL_REF etc where we want to pretend
+     we have a full 32-bit constant.  */
+  HOST_WIDE_INT i = 0x12345678;
+  int total;
+
+  switch (code)
+    {
+    case CONST_INT:
+      i = INTVAL (x);
+    do_int_costs:
+      if (speed)
+	{
+	  if (outer_code == SET)
+	    {
+	      /* 16-bit integer loads have latency 1, 32-bit loads 2.  */
+	      if (IN_RANGE (i, -32768, 32767))
+		total = COSTS_N_INSNS (1);
+	      else
+		total = COSTS_N_INSNS (2);
+	    }
+	  else
+	    {
+	      /* 16-bit integer operands don't affect latency;
+		 24-bit and 32-bit operands add a cycle.  */
+	      if (IN_RANGE (i, -32768, 32767))
+		total = 0;
+	      else
+		total = COSTS_N_INSNS (1);
+	    }
+	}
+      else
+	{
+	  if (outer_code == SET)
+	    {
+	      if (i == 0)
+		total = 1;
+	      else if (IN_RANGE (i, -128, 127))
+		total = 2;
+	      else if (IN_RANGE (i, -32768, 32767))
+		total = 3;
+	      else
+		total = 6;
+	    }
+	  else
+	    {
+	      /* Reference here is ADD An,Dn, vs ADD imm,Dn.  */
+	      if (IN_RANGE (i, -128, 127))
+		total = 0;
+	      else if (IN_RANGE (i, -32768, 32767))
+		total = 2;
+	      else if (TARGET_AM33 && IN_RANGE (i, -0x01000000, 0x00ffffff))
+		total = 3;
+	      else
+		total = 4;
+	    }
+	}
+      goto alldone;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+      /* We assume all of these require a 32-bit constant, even though
+	 some symbol and label references can be relaxed.  */
+      goto do_int_costs;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_PIC:
+	case UNSPEC_GOT:
+	case UNSPEC_GOTOFF:
+	case UNSPEC_PLT:
+	case UNSPEC_GOTSYM_OFF:
+	  /* The PIC unspecs also resolve to a 32-bit constant.  */
+	  goto do_int_costs;
+
+	default:
+	  /* Assume any non-listed unspec is some sort of arithmetic.  */
+	  goto do_arith_costs;
+	}
+
+    case PLUS:
+      /* Notice the size difference of INC and INC4.  */
+      if (!speed && outer_code == SET && CONST_INT_P (XEXP (x, 1)))
+	{
+	  i = INTVAL (XEXP (x, 1));
+	  if (i == 1 || i == 4)
+	    {
+	      total = 1 + rtx_cost (XEXP (x, 0), PLUS, speed);
+	      goto alldone;
+	    }
+	}
+      goto do_arith_costs;
+	
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case NOT:
+    case NEG:
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+    case COMPARE:
+    case BSWAP:
+    case CLZ:
+    do_arith_costs:
+      total = (speed ? COSTS_N_INSNS (1) : 2);
+      break;
+
+    case ASHIFT:
+      /* Notice the size difference of ASL2 and variants.  */
+      if (!speed && CONST_INT_P (XEXP (x, 1)))
+	switch (INTVAL (XEXP (x, 1)))
+	  {
+	  case 1:
+	  case 2:
+	    total = 1;
+	    goto alldone;
+	  case 3:
+	  case 4:
+	    total = 2;
+	    goto alldone;
+	  }
+      /* FALLTHRU */
+
+    case ASHIFTRT:
+    case LSHIFTRT:
+      total = (speed ? COSTS_N_INSNS (1) : 3);
+      goto alldone;
+
+    case MULT:
+      total = (speed ? COSTS_N_INSNS (3) : 2);
+      break;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      total = (speed ? COSTS_N_INSNS (39)
+		/* Include space to load+retrieve MDR.  */
+		: code == MOD || code == UMOD ? 6 : 4);
+      break;
+
+    case MEM:
+      total = mn10300_address_cost (XEXP (x, 0), speed);
+      if (speed)
+	total = COSTS_N_INSNS (2 + total);
+      goto alldone;
+
+    default:
+      /* Probably not implemented.  Assume external call.  */
+      total = (speed ? COSTS_N_INSNS (10) : 7);
+      break;
+    }
+
+  *ptotal = total;
+  return false;
+
+ alldone:
+  *ptotal = total;
+  return true;
+}
+
+/* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
+   may access it using GOTOFF instead of GOT.  */
+
+static void
+mn10300_encode_section_info (tree decl, rtx rtl, int first)
+{
+  rtx symbol;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (! MEM_P (rtl))
+    return;
+
+  symbol = XEXP (rtl, 0);
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return;
+
+  if (flag_pic)
+    SYMBOL_REF_FLAG (symbol) = (*targetm.binds_local_p) (decl);
+}
+
+/* Dispatch tables on the mn10300 are extremely expensive in terms of code
+   and readonly data size.  So we crank up the case threshold value to
+   encourage a series of if/else comparisons to implement many small switch
+   statements.  In theory, this value could be increased much more if we
+   were solely optimizing for space, but we keep it "reasonable" to avoid
+   serious code efficiency lossage.  */
+
+static unsigned int
+mn10300_case_values_threshold (void)
+{
+  return 6;
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+mn10300_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, disp, fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  /* This is a strict alignment target, which means that we play
+     some games to make sure that the locations at which we need
+     to store <chain> and <disp> wind up at aligned addresses.
+
+	0x28 0x00			add 0,d0
+	          0xfc 0xdd		mov chain,a1
+        <chain>
+	0xf8 0xed 0x00			btst 0,d1
+	               0xdc		jmp fnaddr
+	<disp>
+
+     Note that the two extra insns are effectively nops; they 
+     clobber the flags but do not affect the contents of D0 or D1.  */
+
+  disp = expand_binop (SImode, sub_optab, fnaddr,
+		       plus_constant (XEXP (m_tramp, 0), 11),
+		       NULL_RTX, 1, OPTAB_DIRECT);
+
+  mem = adjust_address (m_tramp, SImode, 0);
+  emit_move_insn (mem, gen_int_mode (0xddfc0028, SImode));
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 8);
+  emit_move_insn (mem, gen_int_mode (0xdc00edf8, SImode));
+  mem = adjust_address (m_tramp, SImode, 12);
+  emit_move_insn (mem, disp);
+}
+
+/* Output the assembler code for a C++ thunk function.
+   THUNK_DECL is the declaration for the thunk function itself, FUNCTION
+   is the decl for the target function.  DELTA is an immediate constant
+   offset to be added to the THIS parameter.  If VCALL_OFFSET is nonzero
+   the word at the adjusted address *(*THIS' + VCALL_OFFSET) should be
+   additionally added to THIS.  Finally jump to the entry point of
+   FUNCTION.  */
+
+static void
+mn10300_asm_output_mi_thunk (FILE *        file,
+			     tree          thunk_fndecl ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT delta,
+			     HOST_WIDE_INT vcall_offset,
+			     tree          function)
+{
+  const char * _this;
+
+  /* Get the register holding the THIS parameter.  Handle the case
+     where there is a hidden first argument for a returned structure.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    _this = reg_names [FIRST_ARGUMENT_REGNUM + 1];
+  else
+    _this = reg_names [FIRST_ARGUMENT_REGNUM];
+
+  fprintf (file, "\t%s Thunk Entry Point:\n", ASM_COMMENT_START);
+
+  if (delta)
+    fprintf (file, "\tadd %d, %s\n", (int) delta, _this);
+
+  if (vcall_offset)
+    {
+      const char * scratch = reg_names [FIRST_ADDRESS_REGNUM + 1];
+
+      fprintf (file, "\tmov %s, %s\n", _this, scratch);
+      fprintf (file, "\tmov (%s), %s\n", scratch, scratch);
+      fprintf (file, "\tadd %d, %s\n", (int) vcall_offset, scratch);
+      fprintf (file, "\tmov (%s), %s\n", scratch, scratch);
+      fprintf (file, "\tadd %s, %s\n", scratch, _this);
+    }
+
+  fputs ("\tjmp ", file);
+  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+  putc ('\n', file);
+}
+
+/* Return true if mn10300_output_mi_thunk would be able to output the
+   assembler code for the thunk function specified by the arguments
+   it is passed, and false otherwise.  */
+
+static bool
+mn10300_can_output_mi_thunk (const_tree    thunk_fndecl ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT delta        ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			     const_tree    function     ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+bool
+mn10300_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (REGNO_REG_CLASS (regno) == FP_REGS
+      || REGNO_REG_CLASS (regno) == FP_ACC_REGS)
+    /* Do not store integer values in FP registers.  */
+    return GET_MODE_CLASS (mode) == MODE_FLOAT && ((regno & 1) == 0);
+  
+  if (((regno) & 1) == 0 || GET_MODE_SIZE (mode) == 4)
+    return true;
+
+  if (REGNO_REG_CLASS (regno) == DATA_REGS
+      || (TARGET_AM33 && REGNO_REG_CLASS (regno) == ADDRESS_REGS)
+      || REGNO_REG_CLASS (regno) == EXTENDED_REGS)
+    return GET_MODE_SIZE (mode) <= 4;
+  
+  return false;
+}
+
+bool
+mn10300_modes_tieable (enum machine_mode mode1, enum machine_mode mode2)
+{
+  if (GET_MODE_CLASS (mode1) == MODE_FLOAT
+      && GET_MODE_CLASS (mode2) != MODE_FLOAT)
+    return false;
+
+  if (GET_MODE_CLASS (mode2) == MODE_FLOAT
+      && GET_MODE_CLASS (mode1) != MODE_FLOAT)
+    return false;
+
+  if (TARGET_AM33
+      || mode1 == mode2
+      || (GET_MODE_SIZE (mode1) <= 4 && GET_MODE_SIZE (mode2) <= 4))
+    return true;
+
+  return false;
+}
+
+static int
+cc_flags_for_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case CCmode:
+      return CC_FLAG_Z | CC_FLAG_N | CC_FLAG_C | CC_FLAG_V;
+    case CCZNCmode:
+      return CC_FLAG_Z | CC_FLAG_N | CC_FLAG_C;
+    case CCZNmode:
+      return CC_FLAG_Z | CC_FLAG_N;
+    case CC_FLOATmode:
+      return -1;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static int
+cc_flags_for_code (enum rtx_code code)
+{
+  switch (code)
+    {
+    case EQ:	/* Z */
+    case NE:	/* ~Z */
+      return CC_FLAG_Z;
+
+    case LT:	/* N */
+    case GE:	/* ~N */
+      return CC_FLAG_N;
+      break;
+
+    case GT:    /* ~(Z|(N^V)) */
+    case LE:    /* Z|(N^V) */
+      return CC_FLAG_Z | CC_FLAG_N | CC_FLAG_V;
+
+    case GEU:	/* ~C */
+    case LTU:	/* C */
+      return CC_FLAG_C;
+
+    case GTU:	/* ~(C | Z) */
+    case LEU:	/* C | Z */
+      return CC_FLAG_Z | CC_FLAG_C;
+
+    case ORDERED:
+    case UNORDERED:
+    case LTGT:
+    case UNEQ:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
+      return -1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+enum machine_mode
+mn10300_select_cc_mode (enum rtx_code code, rtx x, rtx y ATTRIBUTE_UNUSED)
+{
+  int req;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    return CC_FLOATmode;
+
+  req = cc_flags_for_code (code);
+
+  if (req & CC_FLAG_V)
+    return CCmode;
+  if (req & CC_FLAG_C)
+    return CCZNCmode;
+  return CCZNmode;
+}
+
+static inline bool
+is_load_insn (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) != SET)
+    return false;
+
+  return MEM_P (SET_SRC (PATTERN (insn)));
+}
+
+static inline bool
+is_store_insn (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) != SET)
+    return false;
+
+  return MEM_P (SET_DEST (PATTERN (insn)));
+}
+
+/* Update scheduling costs for situations that cannot be
+   described using the attributes and DFA machinery.
+   DEP is the insn being scheduled.
+   INSN is the previous insn.
+   COST is the current cycle cost for DEP.  */
+
+static int
+mn10300_adjust_sched_cost (rtx insn, rtx link, rtx dep, int cost)
+{
+  int timings = get_attr_timings (insn);
+
+  if (!TARGET_AM33)
+    return 1;
+
+  if (GET_CODE (insn) == PARALLEL)
+    insn = XVECEXP (insn, 0, 0);
+
+  if (GET_CODE (dep) == PARALLEL)
+    dep = XVECEXP (dep, 0, 0);
+
+  /* For the AM34 a load instruction that follows a
+     store instruction incurs an extra cycle of delay.  */
+  if (mn10300_tune_cpu == PROCESSOR_AM34
+      && is_load_insn (dep)
+      && is_store_insn (insn))
+    cost += 1;
+
+  /* For the AM34 a non-store, non-branch FPU insn that follows
+     another FPU insn incurs a one cycle throughput increase.  */
+  else if (mn10300_tune_cpu == PROCESSOR_AM34
+      && ! is_store_insn (insn)
+      && ! JUMP_P (insn)
+      && GET_CODE (PATTERN (dep)) == SET
+      && GET_CODE (PATTERN (insn)) == SET
+      && GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (dep)))) == MODE_FLOAT
+      && GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
+    cost += 1;
+
+  /*  Resolve the conflict described in section 1-7-4 of
+      Chapter 3 of the MN103E Series Instruction Manual
+      where it says:
+
+        "When the preceeding instruction is a CPU load or
+	 store instruction, a following FPU instruction
+	 cannot be executed until the CPU completes the
+	 latency period even though there are no register
+	 or flag dependencies between them."  */
+
+  /* Only the AM33-2 (and later) CPUs have FPU instructions.  */
+  if (! TARGET_AM33_2)
+    return cost;
+
+  /* If a data dependence already exists then the cost is correct.  */
+  if (REG_NOTE_KIND (link) == 0)
+    return cost;
+
+  /* Check that the instruction about to scheduled is an FPU instruction.  */
+  if (GET_CODE (PATTERN (dep)) != SET)
+    return cost;
+
+  if (GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (dep)))) != MODE_FLOAT)
+    return cost;
+
+  /* Now check to see if the previous instruction is a load or store.  */
+  if (! is_load_insn (insn) && ! is_store_insn (insn))
+    return cost;
+
+  /* XXX: Verify: The text of 1-7-4 implies that the restriction
+     only applies when an INTEGER load/store preceeds an FPU
+     instruction, but is this true ?  For now we assume that it is.  */
+  if (GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (insn)))) != MODE_INT)
+    return cost;
+
+  /* Extract the latency value from the timings attribute.  */
+  return timings < 100 ? (timings % 10) : (timings % 100);
+}
+
+static void
+mn10300_conditional_register_usage (void)
+{
+  unsigned int i;
+
+  if (!TARGET_AM33)
+    {
+      for (i = FIRST_EXTENDED_REGNUM;
+	   i <= LAST_EXTENDED_REGNUM; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (!TARGET_AM33_2)
+    {
+      for (i = FIRST_FP_REGNUM;
+	   i <= LAST_FP_REGNUM; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (flag_pic)
+    fixed_regs[PIC_OFFSET_TABLE_REGNUM] =
+    call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+   We do this in the mn10300 backend to maintain source compatibility
+   with the old cc0-based compiler.  */
+
+static tree
+mn10300_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+                         tree inputs ATTRIBUTE_UNUSED,
+                         tree clobbers)
+{
+  clobbers = tree_cons (NULL_TREE, build_string (5, "EPSW"),
+                        clobbers);
+  return clobbers;
+}
+
+/* A helper function for splitting cbranch patterns after reload.  */
+
+void
+mn10300_split_cbranch (enum machine_mode cmp_mode, rtx cmp_op, rtx label_ref)
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (cmp_mode, CC_REG);
+  x = gen_rtx_COMPARE (cmp_mode, XEXP (cmp_op, 0), XEXP (cmp_op, 1));
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (GET_CODE (cmp_op), VOIDmode, flags, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label_ref, pc_rtx);
+  x = gen_rtx_SET (VOIDmode, pc_rtx, x);
+  emit_jump_insn (x);
+}
+
+/* A helper function for matching parallels that set the flags.  */
+
+bool
+mn10300_match_ccmode (rtx insn, enum machine_mode cc_mode)
+{
+  rtx op1, flags;
+  enum machine_mode flags_mode;
+
+  gcc_checking_assert (XVECLEN (PATTERN (insn), 0) == 2);
+
+  op1 = XVECEXP (PATTERN (insn), 0, 1);
+  gcc_checking_assert (GET_CODE (SET_SRC (op1)) == COMPARE);
+
+  flags = SET_DEST (op1);
+  flags_mode = GET_MODE (flags);
+
+  if (GET_MODE (SET_SRC (op1)) != flags_mode)
+    return false;
+  if (GET_MODE_CLASS (flags_mode) != MODE_CC)
+    return false;
+
+  /* Ensure that the mode of FLAGS is compatible with CC_MODE.  */
+  if (cc_flags_for_mode (flags_mode) & ~cc_flags_for_mode (cc_mode))
+    return false;
+
+  return true;
+}
+
+int
+mn10300_split_and_operand_count (rtx op)
+{
+  HOST_WIDE_INT val = INTVAL (op);
+  int count;
+
+  if (val < 0)
+    {
+      /* High bit is set, look for bits clear at the bottom.  */
+      count = exact_log2 (-val);
+      if (count < 0)
+	return 0;
+      /* This is only size win if we can use the asl2 insn.  Otherwise we
+	 would be replacing 1 6-byte insn with 2 3-byte insns.  */
+      if (count > (optimize_insn_for_speed_p () ? 2 : 4))
+	return 0;
+      return -count;
+    }
+  else
+    {
+      /* High bit is clear, look for bits set at the bottom.  */
+      count = exact_log2 (val + 1);
+      count = 32 - count;
+      /* Again, this is only a size win with asl2.  */
+      if (count > (optimize_insn_for_speed_p () ? 2 : 4))
+	return 0;
+      return -count;
+    }
+}
+
+struct liw_data
+{
+  enum attr_liw slot;
+  enum attr_liw_op op;
+  rtx dest;
+  rtx src;
+};
+
+/* Decide if the given insn is a candidate for LIW bundling.  If it is then
+   extract the operands and LIW attributes from the insn and use them to fill
+   in the liw_data structure.  Return true upon success or false if the insn
+   cannot be bundled.  */
+
+static bool
+extract_bundle (rtx insn, struct liw_data * pdata)
+{
+  bool allow_consts = true;
+  rtx p,s;
+
+  gcc_assert (pdata != NULL);
+
+  if (insn == NULL_RTX)
+    return false;
+  /* Make sure that we are dealing with a simple SET insn.  */
+  p = single_set (insn);
+  if (p == NULL_RTX)
+    return false;
+
+  /* Make sure that it could go into one of the LIW pipelines.  */
+  pdata->slot = get_attr_liw (insn);
+  if (pdata->slot == LIW_BOTH)
+    return false;
+
+  pdata->op = get_attr_liw_op (insn);
+
+  s = SET_SRC (p);
+
+  switch (pdata->op)
+    {
+    case LIW_OP_MOV:
+      pdata->dest = SET_DEST (p);
+      pdata->src = SET_SRC (p);
+      break;
+    case LIW_OP_CMP:
+      pdata->dest = XEXP (SET_SRC (p), 0);
+      pdata->src = XEXP (SET_SRC (p), 1);
+      break;
+    case LIW_OP_NONE:
+      return false;
+    case LIW_OP_AND:
+    case LIW_OP_OR:
+    case LIW_OP_XOR:
+      /* The AND, OR and XOR long instruction words only accept register arguments.  */
+      allow_consts = false;
+      /* Fall through.  */
+    default:
+      pdata->dest = SET_DEST (p);
+      pdata->src = XEXP (SET_SRC (p), 1);
+      break;
+    }
+
+  if (! REG_P (pdata->dest))
+    return false;
+
+  if (REG_P (pdata->src))
+    return true;
+
+  return allow_consts && satisfies_constraint_O (pdata->src);
+}
+
+/* Make sure that it is OK to execute LIW1 and LIW2 in parallel.  GCC generated
+   the instructions with the assumption that LIW1 would be executed before LIW2
+   so we must check for overlaps between their sources and destinations.  */
+
+static bool
+check_liw_constraints (struct liw_data * pliw1, struct liw_data * pliw2)
+{
+  /* Check for slot conflicts.  */
+  if (pliw2->slot == pliw1->slot && pliw1->slot != LIW_EITHER)
+    return false;
+
+  /* If either operation is a compare, then "dest" is really an input; the real
+     destination is CC_REG.  So these instructions need different checks.  */
+
+  /* Changing "CMP ; OP" into "CMP | OP" is OK because the comparison will
+     check its values prior to any changes made by OP.  */
+  if (pliw1->op == LIW_OP_CMP)
+    {
+      /* Two sequential comparisons means dead code, which ought to 
+         have been eliminated given that bundling only happens with
+         optimization.  We cannot bundle them in any case.  */
+      gcc_assert (pliw1->op != pliw2->op);
+      return true;
+    }
+
+  /* Changing "OP ; CMP" into "OP | CMP" does not work if the value being compared
+     is the destination of OP, as the CMP will look at the old value, not the new
+     one.  */
+  if (pliw2->op == LIW_OP_CMP)
+    {
+      if (REGNO (pliw2->dest) == REGNO (pliw1->dest))
+	return false;
+
+      if (REG_P (pliw2->src))
+	return REGNO (pliw2->src) != REGNO (pliw1->dest);
+
+      return true;
+    }
+
+  /* Changing "OP1 ; OP2" into "OP1 | OP2" does not work if they both write to the
+     same destination register.  */
+  if (REGNO (pliw2->dest) == REGNO (pliw1->dest))
+    return false;
+
+  /* Changing "OP1 ; OP2" into "OP1 | OP2" generally does not work if the destination
+     of OP1 is the source of OP2.  The exception is when OP1 is a MOVE instruction when
+     we can replace the source in OP2 with the source of OP1.  */
+  if (REG_P (pliw2->src) && REGNO (pliw2->src) == REGNO (pliw1->dest))
+    {
+      if (pliw1->op == LIW_OP_MOV && REG_P (pliw1->src))
+	{
+	  if (! REG_P (pliw1->src)
+	      && (pliw2->op == LIW_OP_AND
+		  || pliw2->op == LIW_OP_OR
+		  || pliw2->op == LIW_OP_XOR))
+	    return false;
+		  
+	  pliw2->src = pliw1->src;
+	  return true;
+	}
+      return false;
+    }
+
+  /* Everything else is OK.  */
+  return true;
+}
+
+/* Combine pairs of insns into LIW bundles.  */
+
+static void
+mn10300_bundle_liw (void)
+{
+  rtx r;
+
+  for (r = get_insns (); r != NULL_RTX; r = next_nonnote_nondebug_insn (r))
+    {
+      rtx insn1, insn2;
+      struct liw_data liw1, liw2;
+
+      insn1 = r;
+      if (! extract_bundle (insn1, & liw1))
+	continue;
+
+      insn2 = next_nonnote_nondebug_insn (insn1);
+      if (! extract_bundle (insn2, & liw2))
+	continue;
+
+      /* Check for source/destination overlap.  */
+      if (! check_liw_constraints (& liw1, & liw2))
+	continue;
+
+      if (liw1.slot == LIW_OP2 || liw2.slot == LIW_OP1)
+	{
+	  struct liw_data temp;
+	  
+	  temp = liw1;
+	  liw1 = liw2;
+	  liw2 = temp;
+	}
+
+      delete_insn (insn2);
+
+      if (liw1.op == LIW_OP_CMP)
+	insn2 = gen_cmp_liw (liw2.dest, liw2.src, liw1.dest, liw1.src,
+			     GEN_INT (liw2.op));
+      else if (liw2.op == LIW_OP_CMP)
+	insn2 = gen_liw_cmp (liw1.dest, liw1.src, liw2.dest, liw2.src,
+			     GEN_INT (liw1.op));
+      else
+	insn2 = gen_liw (liw1.dest, liw2.dest, liw1.src, liw2.src,
+			 GEN_INT (liw1.op), GEN_INT (liw2.op));
+
+      insn2 = emit_insn_after (insn2, insn1);
+      delete_insn (insn1);
+      r = insn2;
+    }
+}
+
+static void
+mn10300_reorg (void)
+{
+  if (TARGET_AM33)
+    {
+      if (TARGET_ALLOW_LIW)
+	mn10300_bundle_liw ();
+    }
+}
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG mn10300_reorg
+
+#undef  TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef  TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS mn10300_legitimize_address
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST  mn10300_address_cost
+#undef  TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST  mn10300_register_move_cost
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST  mn10300_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS mn10300_rtx_costs
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START mn10300_file_start
+#undef  TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA mn10300_asm_output_addr_const_extra
+
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS MASK_MULT_BUG | MASK_PTR_A0D0 | MASK_ALLOW_LIW
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION mn10300_handle_option
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE mn10300_option_override
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE mn10300_option_optimization_table
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO mn10300_encode_section_info
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY mn10300_return_in_memory
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE mn10300_pass_by_reference
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES mn10300_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG mn10300_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE mn10300_function_arg_advance
+
+#undef  TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS mn10300_builtin_saveregs
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START mn10300_va_start
+
+#undef  TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD mn10300_case_values_threshold
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	mn10300_legitimate_address_p
+#undef  TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS	mn10300_delegitimize_address
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS mn10300_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS \
+  mn10300_preferred_output_reload_class
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD  mn10300_secondary_reload
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT mn10300_trampoline_init
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE mn10300_function_value
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE mn10300_libcall_value
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK      mn10300_asm_output_mi_thunk
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK  mn10300_can_output_mi_thunk
+
+#undef  TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST mn10300_adjust_sched_cost
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE mn10300_conditional_register_usage
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS  mn10300_md_asm_clobbers
+
+#undef  TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM  CC_REG
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
new file mode 100644
index 000000000..2e97ca4ff
--- /dev/null
+++ b/gcc/config/mn10300/mn10300.h
@@ -0,0 +1,766 @@
+/* Definitions of target machine for GNU compiler.
+   Matsushita MN10300 series
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+   2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef ASM_SPEC
+#undef LIB_SPEC
+#undef ENDFILE_SPEC
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mrelax:--relax}"
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!mno-crt0:%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}"
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__mn10300__");		\
+      builtin_define ("__MN10300__");		\
+      builtin_assert ("cpu=mn10300");		\
+      builtin_assert ("machine=mn10300");	\
+						\
+      if (TARGET_AM34)				\
+        { 					\
+          builtin_define ("__AM33__=4");	\
+          builtin_define ("__AM34__");		\
+        }					\
+      else if (TARGET_AM33_2)			\
+        { 					\
+          builtin_define ("__AM33__=2");	\
+          builtin_define ("__AM33_2__");	\
+        }					\
+      else if (TARGET_AM33)			\
+        builtin_define ("__AM33__=1");		\
+						\
+      builtin_define (TARGET_ALLOW_LIW ?	\
+		      "__LIW__" : "__NO_LIW__");\
+						\
+    }						\
+  while (0)
+
+enum processor_type
+{
+  PROCESSOR_MN10300,
+  PROCESSOR_AM33,
+  PROCESSOR_AM33_2,
+  PROCESSOR_AM34
+};
+
+extern enum processor_type mn10300_processor;
+extern enum processor_type mn10300_tune_cpu;
+
+#define TARGET_AM33	(mn10300_processor >= PROCESSOR_AM33)
+#define TARGET_AM33_2	(mn10300_processor >= PROCESSOR_AM33_2)
+#define TARGET_AM34	(mn10300_processor >= PROCESSOR_AM34)
+
+#ifndef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_MN10300
+#endif
+
+/* Print subsidiary information on the compiler version in use.  */
+
+#define TARGET_VERSION fprintf (stderr, " (MN10300)");
+
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the Matsushita MN1003.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* This is not true on the Matsushita MN10300.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+   This is not true on the Matsushita MN10300.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		4
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY		32
+
+/* The stack goes in 32-bit lumps.  */
+#define STACK_BOUNDARY 		32
+
+/* Allocation boundary (in *bits*) for the code of a function.
+   8 is the minimum boundary; it's unclear if bigger alignments
+   would improve performance.  */
+#define FUNCTION_BOUNDARY 8
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT	32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY    32
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define FIRST_PSEUDO_REGISTER 52
+
+/* Specify machine-specific register numbers.  The commented out entries
+   are defined in mn10300.md.  */
+#define FIRST_DATA_REGNUM      0
+#define LAST_DATA_REGNUM       3
+#define FIRST_ADDRESS_REGNUM   4
+/* #define PIC_REG             6 */
+#define LAST_ADDRESS_REGNUM    8
+/* #define SP_REG              9 */
+#define FIRST_EXTENDED_REGNUM 10
+#define LAST_EXTENDED_REGNUM  17
+#define FIRST_FP_REGNUM       18
+#define LAST_FP_REGNUM        49
+/* #define MDR_REG            50 */
+/* #define CC_REG             51 */
+#define FIRST_ARGUMENT_REGNUM  0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (LAST_ADDRESS_REGNUM + 1)
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM (LAST_ADDRESS_REGNUM - 1)
+
+/* Base register for access to arguments of the function.  This
+   is a fake register and will be eliminated into either the frame
+   pointer or stack pointer.  */
+#define ARG_POINTER_REGNUM LAST_ADDRESS_REGNUM
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (FIRST_ADDRESS_REGNUM + 1)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS \
+  { 0, 0, 0, 0,				/* data regs */		\
+    0, 0, 0, 0,				/* addr regs */		\
+    1,					/* arg reg */		\
+    1,					/* sp reg */		\
+    0, 0, 0, 0, 0, 0, 0, 0,		/* extended regs */	\
+    0, 0,				/* fp regs (18-19) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (20-29) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (30-39) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (40-49) */	\
+    0,					/* mdr reg */		\
+    1					/* cc reg */		\
+  }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you
+   like.  */
+
+#define CALL_USED_REGISTERS \
+  { 1, 1, 0, 0,				/* data regs */		\
+    1, 1, 0, 0,				/* addr regs */		\
+    1,					/* arg reg */		\
+    1,					/* sp reg */		\
+    1, 1, 1, 1, 0, 0, 0, 0,		/* extended regs */	\
+    1, 1,				/* fp regs (18-19) */	\
+    1, 1, 0, 0, 0, 0, 0, 0, 0, 0,	/* fp regs (20-29) */	\
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1,	/* fp regs (30-39) */	\
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* fp regs (40-49) */	\
+    1,					/* mdr reg */		\
+    1					/* cc reg */		\
+  }
+
+/* Note: The definition of CALL_REALLY_USED_REGISTERS is not
+   redundant.  It is needed when compiling in PIC mode because
+   the a2 register becomes fixed (and hence must be marked as
+   call_used) but in order to preserve the ABI it is not marked
+   as call_really_used.  */
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+#define REG_ALLOC_ORDER \
+  { 0, 1, 4, 5, 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 8, 9 \
+  , 42, 43, 44, 45, 46, 47, 48, 49, 34, 35, 36, 37, 38, 39, 40, 41 \
+  , 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 50, 51 \
+  }
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  mn10300_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  mn10300_modes_tieable ((MODE1), (MODE2))
+
+/* 4 data, and effectively 3 address registers is small as far as I'm
+   concerned.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class
+{
+  NO_REGS, DATA_REGS, ADDRESS_REGS, SP_REGS, SP_OR_ADDRESS_REGS,
+  EXTENDED_REGS, FP_REGS, FP_ACC_REGS, CC_REGS, MDR_REGS,
+  GENERAL_REGS, SP_OR_GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES					   		\
+{ "NO_REGS", "DATA_REGS", "ADDRESS_REGS", "SP_REGS", "SP_OR_ADDRESS_REGS", \
+  "EXTENDED_REGS", "FP_REGS", "FP_ACC_REGS", "CC_REGS", "MDR_REGS",	\
+  "GENERAL_REGS", "SP_OR_GENERAL_REGS", "ALL_REGS", "LIM_REGS"		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS					\
+{ { 0,	        0 },	  /* No regs */				\
+  { 0x0000000f, 0 },	  /* DATA_REGS */			\
+  { 0x000001f0, 0 },	  /* ADDRESS_REGS */			\
+  { 0x00000200, 0 },	  /* SP_REGS */				\
+  { 0x000003f0, 0 },	  /* SP_OR_ADDRESS_REGS */		\
+  { 0x0003fc00, 0 },	  /* EXTENDED_REGS */			\
+  { 0xfffc0000, 0x3ffff },/* FP_REGS */				\
+  { 0x03fc0000, 0 },	  /* FP_ACC_REGS */			\
+  { 0x00000000, 0x80000 },/* CC_REGS */				\
+  { 0x00000000, 0x40000 },/* MDR_REGS */			\
+  { 0x0003fdff, 0 }, 	  /* GENERAL_REGS */			\
+  { 0x0003ffff, 0 },      /* SP_OR_GENERAL_REGS */		\
+  { 0xffffffff, 0xfffff } /* ALL_REGS */			\
+}
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES					\
+{								\
+  GENERAL_REGS, FP_REGS, MDR_REGS, LIM_REG_CLASSES		\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)			     \
+  ((REGNO) <= LAST_DATA_REGNUM ? DATA_REGS :	     \
+   (REGNO) <= LAST_ADDRESS_REGNUM ? ADDRESS_REGS :   \
+   (REGNO) == STACK_POINTER_REGNUM ? SP_REGS :	     \
+   (REGNO) <= LAST_EXTENDED_REGNUM ? EXTENDED_REGS : \
+   (REGNO) <= LAST_FP_REGNUM ? FP_REGS :	     \
+   (REGNO) == MDR_REG ? MDR_REGS :		     \
+   (REGNO) == CC_REG ? CC_REGS :		     \
+   NO_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS \
+  (TARGET_AM33 ? GENERAL_REGS : DATA_REGS)
+#define BASE_REG_CLASS \
+  (TARGET_AM33 ? SP_OR_GENERAL_REGS : SP_OR_ADDRESS_REGS)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#ifndef REG_OK_STRICT
+# define REG_STRICT 0
+#else
+# define REG_STRICT 1
+#endif
+
+#define REGNO_DATA_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, DATA_REGS, strict)
+#define REGNO_ADDRESS_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, ADDRESS_REGS, strict)
+#define REGNO_EXTENDED_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, EXTENDED_REGS, strict)
+#define REGNO_GENERAL_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, GENERAL_REGS, strict)
+
+#define REGNO_STRICT_OK_FOR_BASE_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, BASE_REG_CLASS, strict)
+#define REGNO_OK_FOR_BASE_P(regno) \
+  (REGNO_STRICT_OK_FOR_BASE_P ((regno), REG_STRICT))
+#define REG_OK_FOR_BASE_P(X) \
+  (REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+#define REGNO_STRICT_OK_FOR_BIT_BASE_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, ADDRESS_REGS, strict)
+#define REGNO_OK_FOR_BIT_BASE_P(regno) \
+  (REGNO_STRICT_OK_FOR_BIT_BASE_P ((regno), REG_STRICT))
+#define REG_OK_FOR_BIT_BASE_P(X) \
+  (REGNO_OK_FOR_BIT_BASE_P (REGNO (X)))
+
+#define REGNO_STRICT_OK_FOR_INDEX_P(regno, strict) \
+  mn10300_regno_in_class_p (regno, INDEX_REG_CLASS, strict)
+#define REGNO_OK_FOR_INDEX_P(regno) \
+  (REGNO_STRICT_OK_FOR_INDEX_P ((regno), REG_STRICT))
+#define REG_OK_FOR_INDEX_P(X) \
+  (REGNO_OK_FOR_INDEX_P (REGNO (X)))
+
+#define LIMIT_RELOAD_CLASS(MODE, CLASS) \
+  (!TARGET_AM33 && (MODE == QImode || MODE == HImode) ? DATA_REGS : CLASS)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* A class that contains registers which the compiler must always
+   access in a mode that is the same size as the mode in which it
+   loaded the register.  */
+#define CLASS_CANNOT_CHANGE_SIZE FP_REGS
+
+/* Return 1 if VALUE is in the range specified.  */
+
+#define INT_8_BITS(VALUE) ((unsigned) (VALUE) + 0x80 < 0x100)
+#define INT_16_BITS(VALUE) ((unsigned) (VALUE) + 0x8000 < 0x10000)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* Is equal to the size of the saved fp + pc, even if an fp isn't
+   saved since the value is used before we know.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 4
+
+/* But the CFA is at the arg pointer directly, not at the first argument.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL) 0
+
+#define ELIMINABLE_REGS				\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = mn10300_initial_offset (FROM, TO)
+
+/* We use d0/d1 for passing parameters, so allocate 8 bytes of space
+   for a register flushback area.  */
+#define REG_PARM_STACK_SPACE(DECL) 8
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* So we can allocate space for return pointers once for the function
+   instead of around every call.  */
+#define STACK_POINTER_OFFSET 4
+
+/* 1 if N is a possible register number for function argument passing.
+   On the MN10300, d0 and d1 are used in this way.  */
+
+#define FUNCTION_ARG_REGNO_P(N) ((N) <= 1)
+
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the MN10300, this is a single integer, which is a number of bytes
+   of arguments scanned so far.  */
+
+#define CUMULATIVE_ARGS struct cum_arg
+
+struct cum_arg
+{
+  int nbytes;
+};
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On the MN10300, the offset starts at 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).nbytes = 0)
+
+#define FUNCTION_VALUE_REGNO_P(N)  mn10300_function_value_regno_p (N)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) ;
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE		16
+#define TRAMPOLINE_ALIGNMENT	32
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+
+   On the mn10300, the return address is not at a constant location
+   due to the frame layout.  Luckily, it is at a constant offset from
+   the argument pointer, so we define RETURN_ADDR_RTX to return a
+   MEM using arg_pointer_rtx.  Reload will replace arg_pointer_rtx
+   with a reference to the stack/frame pointer + an appropriate offset.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)   \
+  ((COUNT == 0)                         \
+   ? gen_rtx_MEM (Pmode, arg_pointer_rtx) \
+   : (rtx) 0)
+
+/* The return address is saved both in the stack and in MDR.  Using
+   the stack location is handiest for what unwinding needs.  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+
+/* We have post-increments.  */
+#define HAVE_POST_INCREMENT	TARGET_AM33
+#define HAVE_POST_MODIFY_DISP	TARGET_AM33
+
+/* ... But we don't want to use them for block moves.  Small offsets are
+   just as effective, at least for inline block move sizes, and appears
+   to produce cleaner code.  */
+#define USE_LOAD_POST_INCREMENT(M)	0
+#define USE_STORE_POST_INCREMENT(M)	0
+
+/* Accept either REG or SUBREG where a register is valid.  */
+
+#define RTX_OK_FOR_BASE_P(X, strict)				\
+  ((REG_P (X) && REGNO_STRICT_OK_FOR_BASE_P (REGNO (X),		\
+ 					     (strict))) 	\
+   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))		\
+       && REGNO_STRICT_OK_FOR_BASE_P (REGNO (SUBREG_REG (X)),	\
+ 				      (strict))))
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN)		     \
+do {									     \
+  rtx new_x = mn10300_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \
+  if (new_x)								     \
+    {									     \
+      X = new_x;							     \
+      goto WIN;								     \
+    }									     \
+} while (0)
+
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+#define LEGITIMATE_CONSTANT_P(X) mn10300_legitimate_constant_p (X)
+
+/* Zero if this needs fixing up to become PIC.  */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) \
+  mn10300_legitimate_pic_operand_p (X)
+
+/* Register to hold the addressing base for
+   position independent code access to data items.  */
+#define PIC_OFFSET_TABLE_REGNUM	PIC_REG
+
+/* The name of the pseudo-symbol representing the Global Offset Table.  */
+#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
+
+#define SYMBOLIC_CONST_P(X)	\
+((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == LABEL_REF)	\
+  && ! LEGITIMATE_PIC_OPERAND_P (X))
+
+/* Non-global SYMBOL_REFs have SYMBOL_REF_FLAG enabled.  */
+#define MN10300_GLOBAL_P(X) (! SYMBOL_REF_FLAG (X))
+
+#define SELECT_CC_MODE(OP, X, Y)  mn10300_select_cc_mode (OP, X, Y)
+#define REVERSIBLE_CC_MODE(MODE)  0
+
+/* Nonzero if access to memory by bytes or half words is no faster
+   than accessing full words.  */
+#define SLOW_BYTE_ACCESS 1
+
+#define NO_FUNCTION_CSE
+
+/* According expr.c, a value of around 6 should minimize code size, and
+   for the MN10300 series, that's our primary concern.  */
+#define MOVE_RATIO(speed) 6
+
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+#define DATA_SECTION_ASM_OP "\t.section .data"
+#define BSS_SECTION_ASM_OP  "\t.section .bss"
+
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* This says how to output the assembler to define a global
+   uninitialized but not common symbol.
+   Try to use asm_output_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  asm_fprintf (FILE, "%U%s", (*targetm.strip_name_encoding) (NAME))
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+  do					 \
+    {					 \
+      assemble_name (FILE, NAME1);	 \
+      fputs (" = ", FILE);		 \
+      assemble_name (FILE, NAME2);	 \
+      fputc ('\n', FILE);		 \
+    }					 \
+  while (0)
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES							\
+{ "d0", "d1", "d2", "d3", "a0", "a1", "a2", "a3", "ap", "sp",		\
+  "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7"			\
+, "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"		\
+, "fs8", "fs9", "fs10", "fs11", "fs12", "fs13", "fs14", "fs15"		\
+, "fs16", "fs17", "fs18", "fs19", "fs20", "fs21", "fs22", "fs23"	\
+, "fs24", "fs25", "fs26", "fs27", "fs28", "fs29", "fs30", "fs31"	\
+, "mdr", "EPSW"								\
+}
+
+#define ADDITIONAL_REGISTER_NAMES				\
+{ {"r8",  4}, {"r9",  5}, {"r10", 6}, {"r11", 7},		\
+  {"r12", 0}, {"r13", 1}, {"r14", 2}, {"r15", 3},		\
+  {"e0", 10}, {"e1", 11}, {"e2", 12}, {"e3", 13},		\
+  {"e4", 14}, {"e5", 15}, {"e6", 16}, {"e7", 17}		\
+, {"fd0", 18}, {"fd2", 20}, {"fd4", 22}, {"fd6", 24}		\
+, {"fd8", 26}, {"fd10", 28}, {"fd12", 30}, {"fd14", 32}		\
+, {"fd16", 34}, {"fd18", 36}, {"fd20", 38}, {"fd22", 40}	\
+, {"fd24", 42}, {"fd26", 44}, {"fd28", 46}, {"fd30", 48}	\
+, {"cc", CC_REG}						\
+}
+
+/* Print an instruction operand X on file FILE.
+   look in mn10300.c for details */
+
+#define PRINT_OPERAND(FILE, X, CODE) \
+  mn10300_print_operand (FILE, X, CODE)
+
+/* Print a memory operand whose address is X, on file FILE.
+   This uses a function in output-vax.c.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+  mn10300_print_operand_address (FILE, ADDR)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t%s .L%d\n", ".long", VALUE)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, "\t%s .L%d-.L%d\n", ".long", VALUE, REL)
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* We don't have to worry about dbx compatibility for the mn10300.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Use dwarf2 debugging info by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE Pmode
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* This flag, if defined, says the same insns that convert to a signed fixnum
+   also convert validly to an unsigned one.  */
+#define FIXUNS_TRUNC_LIKE_FIX_TRUNC
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX	4
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* The assembler op to get a word.  */
+
+#define FILE_ASM_OP "\t.file\n"
+
diff --git a/gcc/config/mn10300/mn10300.md b/gcc/config/mn10300/mn10300.md
new file mode 100644
index 000000000..3d8e91470
--- /dev/null
+++ b/gcc/config/mn10300/mn10300.md
@@ -0,0 +1,2154 @@
+;; GCC machine description for Matsushita MN10300
+;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;; Contributed by Jeff Law (law@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+(define_constants [
+  (PIC_REG   6)
+  (SP_REG    9)
+  (MDR_REG  50)
+  (CC_REG   51)
+
+  (UNSPEC_PIC		1)
+  (UNSPEC_GOT		2)
+  (UNSPEC_GOTOFF	3)
+  (UNSPEC_PLT		4)
+  (UNSPEC_GOTSYM_OFF	5)
+
+  (UNSPEC_EXT		6)
+  (UNSPEC_BSCH		7)
+
+  ;; This is used to encode LIW patterns.
+  (UNSPEC_LIW		8)
+])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in mn10300.h.
+(define_attr "cpu" "mn10300,am33,am33_2,am34"
+  (const (symbol_ref "(enum attr_cpu) mn10300_tune_cpu")))
+
+;; Used to control the "enabled" attribute on a per-instruction basis.
+(define_attr "isa" "base,am33,am33_2,am34"
+  (const_string "base"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "isa" "base")
+         (const_int 1)
+
+         (and (eq_attr "isa" "am33")
+	      (ne (symbol_ref "TARGET_AM33") (const_int 0)))
+         (const_int 1)
+
+         (and (eq_attr "isa" "am33_2")
+	      (ne (symbol_ref "TARGET_AM33_2") (const_int 0)))
+         (const_int 1)
+        
+         (and (eq_attr "isa" "am34")
+	      (ne (symbol_ref "TARGET_AM34") (const_int 0)))
+         (const_int 1)
+	]
+	(const_int 0))
+)
+
+(define_mode_iterator INT [QI HI SI])
+
+
+;; Bundling of smaller insns into a long instruction word (LIW)
+(define_automaton "liw_bundling")
+(automata_option "ndfa")
+
+(define_cpu_unit "liw_op1_u,liw_op2_u" "liw_bundling")
+
+(define_attr "liw" "op1,op2,both,either"
+  (const_string "both"))
+;; Note: this list must match the one defined for liw_op_names[].
+(define_attr "liw_op" "add,cmp,sub,mov,and,or,xor,asr,lsr,asl,none,max"
+  (const_string "none"))
+
+(define_insn_reservation "liw_op1" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "op1"))
+  "liw_op1_u");
+(define_insn_reservation "liw_op2" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "op2"))
+  "liw_op2_u");
+(define_insn_reservation "liw_both" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "both"))
+  "liw_op1_u + liw_op2_u");
+(define_insn_reservation "liw_either" 1
+  (and (ior (eq_attr "cpu" "am33")
+            (eq_attr "cpu" "am33_2")
+	    (eq_attr "cpu" "am34"))
+       (eq_attr "liw" "either"))
+  "liw_op1_u | liw_op2_u");
+
+;; ----------------------------------------------------------------------
+;; Pipeline description.
+;; ----------------------------------------------------------------------
+
+;; The AM33 only has a single pipeline.  It has five stages (fetch,
+;; decode, execute, memory access, writeback) each of which normally
+;; takes a single CPU clock cycle.
+
+;; The timings attribute consists of two numbers, the first is the
+;; throughput, which is the number of cycles the instruction takes
+;; to execute and generate a result.  The second is the latency
+;; which is the effective number of cycles the instruction takes to
+;; execute if its result is used by the following instruction.  The
+;; latency is always greater than or equal to the throughput.
+;; These values were taken from the Appendix of the "MN103E Series
+;; Instruction Manual" and the timings for the AM34.
+
+;; Note - it would be nice to use strings rather than integers for
+;; the possible values of this attribute, so that we can have the
+;; gcc build mechanism check for values that are not supported by
+;; the reservations below.  But this will not work because the code
+;; in mn10300_adjust_sched_cost() needs integers not strings.
+
+(define_attr "timings" "" (const_int 11))
+
+(define_automaton "pipelining")
+(define_cpu_unit "throughput" "pipelining")
+
+(define_insn_reservation "throughput__1_latency__1"  1
+  (eq_attr "timings" "11") "throughput")
+(define_insn_reservation "throughput__1_latency__2"  2
+  (eq_attr "timings" "12") "throughput,nothing")
+(define_insn_reservation "throughput__1_latency__3"  3
+  (eq_attr "timings" "13") "throughput,nothing*2")
+(define_insn_reservation "throughput__1_latency__4"  4
+  (eq_attr "timings" "14") "throughput,nothing*3")
+(define_insn_reservation "throughput__2_latency__2"  2
+  (eq_attr "timings" "22") "throughput*2")
+(define_insn_reservation "throughput__2_latency__3"  3
+  (eq_attr "timings" "23") "throughput*2,nothing")
+(define_insn_reservation "throughput__2_latency__4"  4
+  (eq_attr "timings" "24") "throughput*2,nothing*2")
+(define_insn_reservation "throughput__2_latency__5"  5
+  (eq_attr "timings" "25") "throughput*2,nothing*3")
+(define_insn_reservation "throughput__3_latency__3"  3
+  (eq_attr "timings" "33") "throughput*3")
+(define_insn_reservation "throughput__3_latency__7"  7
+  (eq_attr "timings" "37") "throughput*3,nothing*4")
+(define_insn_reservation "throughput__4_latency__4"  4
+  (eq_attr "timings" "44") "throughput*4")
+(define_insn_reservation "throughput__4_latency__7"  7
+  (eq_attr "timings" "47") "throughput*4,nothing*3")
+(define_insn_reservation "throughput__4_latency__8"  8
+  (eq_attr "timings" "48") "throughput*4,nothing*4")
+(define_insn_reservation "throughput__5_latency__5"  5
+  (eq_attr "timings" "55") "throughput*5")
+(define_insn_reservation "throughput__6_latency__6"  6
+  (eq_attr "timings" "66") "throughput*6")
+(define_insn_reservation "throughput__7_latency__7"  7
+  (eq_attr "timings" "77") "throughput*7")
+(define_insn_reservation "throughput__7_latency__8"  8
+  (eq_attr "timings" "78") "throughput*7,nothing")
+(define_insn_reservation "throughput__8_latency__8"  8
+  (eq_attr "timings" "88") "throughput*8")
+(define_insn_reservation "throughput__9_latency__9"  9
+  (eq_attr "timings" "99") "throughput*9")
+(define_insn_reservation "throughput__8_latency_14" 14
+  (eq_attr "timings" "814") "throughput*8,nothing*6")
+(define_insn_reservation "throughput__9_latency_10" 10
+  (eq_attr "timings" "910") "throughput*9,nothing")
+(define_insn_reservation "throughput_10_latency_10" 10
+  (eq_attr "timings" "1010") "throughput*10")
+(define_insn_reservation "throughput_12_latency_16" 16
+  (eq_attr "timings" "1216") "throughput*12,nothing*4")
+(define_insn_reservation "throughput_13_latency_13" 13
+  (eq_attr "timings" "1313") "throughput*13")
+(define_insn_reservation "throughput_14_latency_14" 14
+  (eq_attr "timings" "1414") "throughput*14")
+(define_insn_reservation "throughput_13_latency_17" 17
+  (eq_attr "timings" "1317") "throughput*13,nothing*4")
+(define_insn_reservation "throughput_23_latency_27" 27
+  (eq_attr "timings" "2327") "throughput*23,nothing*4")
+(define_insn_reservation "throughput_25_latency_31" 31
+  (eq_attr "timings" "2531") "throughput*25,nothing*6")
+(define_insn_reservation "throughput_38_latency_39" 39
+  (eq_attr "timings" "3839") "throughput*38,nothing")
+(define_insn_reservation "throughput_39_latency_40" 40
+  (eq_attr "timings" "3940") "throughput*39,nothing")
+(define_insn_reservation "throughput_40_latency_40" 40
+  (eq_attr "timings" "4040") "throughput*40")
+(define_insn_reservation "throughput_41_latency_42" 42
+  (eq_attr "timings" "4142") "throughput*41,nothing")
+(define_insn_reservation "throughput_42_latency_43" 44
+  (eq_attr "timings" "4243") "throughput*42,nothing")
+(define_insn_reservation "throughput_43_latency_44" 44
+  (eq_attr "timings" "4344") "throughput*43,nothing")
+(define_insn_reservation "throughput_45_latency_46" 46
+  (eq_attr "timings" "4546") "throughput*45,nothing")
+(define_insn_reservation "throughput_47_latency_53" 53
+  (eq_attr "timings" "4753") "throughput*47,nothing*6")
+
+;; Note - the conflict between memory load/store instructions
+;; and floating point instructions described in section 1-7-4
+;; of Chapter 3 of the MN103E Series Instruction Manual is
+;; handled by the mn10300_adjust_sched_cost function.
+
+;; ----------------------------------------------------------------------
+;; MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; movqi
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+	(match_operand:QI 1 "general_operand"))]
+  ""
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand0, QImode)
+      && !register_operand (operand1, QImode))
+    operands[1] = force_reg (QImode, operand1);
+})
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=*r,D*r,D*r,D,m")
+	(match_operand:QI 1 "general_operand"      "  0,D*r,  i,m,D"))]
+  "(register_operand (operands[0], QImode)
+    || register_operand (operands[1], QImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1:
+    case 2:
+      return "mov %1,%0";
+    case 3:
+    case 4:
+      return "movbu %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr_alternative "timings"
+	 [(const_int 11)
+	  (const_int 11)
+	  (const_int 11)
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 13) (const_int 24))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 11) (const_int 22))
+	 ])]
+)
+
+;; movhi
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand")
+	(match_operand:HI 1 "general_operand"))]
+  ""
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, HImode)
+      && !register_operand (operand0, HImode))
+    operands[1] = force_reg (HImode, operand1);
+})
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=*r,D*r,D*r,D,m")
+	(match_operand:HI 1 "general_operand"      "  0,  i,D*r,m,D"))]
+  "(register_operand (operands[0], HImode)
+    || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1:
+      /* Note that "MOV imm8,An" is already zero-extending, and is 2 bytes.
+	 We have "MOV imm16,Dn" at 3 bytes.  The only win for the 4 byte
+	 movu is for an 8-bit unsigned move into Rn.  */
+      if (TARGET_AM33
+	  && CONST_INT_P (operands[1])
+	  && IN_RANGE (INTVAL (operands[1]), 0x80, 0xff)
+	  && REGNO_EXTENDED_P (REGNO (operands[0]), 1))
+	return "movu %1,%0";
+      /* FALLTHRU */
+    case 2:
+      return "mov %1,%0";
+    case 3:
+    case 4:
+      return "movhu %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr_alternative "timings"
+	 [(const_int 11)
+	  (const_int 11)
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 13) (const_int 24))
+	  (if_then_else (eq_attr "cpu" "am34")
+	  		(const_int 11) (const_int 22))
+	 ])]
+)
+
+;; movsi and helpers
+
+;; We use this to handle addition of two values when one operand is the
+;; stack pointer and the other is a memory reference of some kind.  Reload
+;; does not handle them correctly without this expander.
+(define_expand "reload_plus_sp_const"
+  [(set (match_operand:SI     0 "register_operand" "=r")
+	(match_operand:SI     1 "impossible_plus_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&A"))]
+  ""
+{
+  rtx dest, scratch, other;
+
+  dest = operands[0];
+  scratch = operands[2];
+
+  other = XEXP (operands[1], 1);
+  if (other == stack_pointer_rtx)
+    other = XEXP (operands[1], 0);
+
+  if (true_regnum (other) == true_regnum (dest))
+    {
+      gcc_assert (true_regnum (scratch) != true_regnum (dest));
+      emit_move_insn (scratch, stack_pointer_rtx);
+      emit_insn (gen_addsi3 (dest, dest, scratch));
+    }
+  else if (TARGET_AM33 || REGNO_REG_CLASS (true_regnum (dest)) == ADDRESS_REGS)
+    {
+      emit_move_insn (dest, stack_pointer_rtx);
+      if (other == stack_pointer_rtx)
+        emit_insn (gen_addsi3 (dest, dest, dest));
+      else if (other != const0_rtx)
+        emit_insn (gen_addsi3 (dest, dest, other));
+    }
+  else
+    {
+      emit_move_insn (scratch, stack_pointer_rtx);
+      if (other == stack_pointer_rtx)
+	{
+	  emit_move_insn (dest, scratch);
+          emit_insn (gen_addsi3 (dest, dest, dest));
+	}
+      else if (other != const0_rtx)
+	{
+	  emit_move_insn (dest, other);
+          emit_insn (gen_addsi3 (dest, dest, scratch));
+	}
+      else
+	emit_move_insn (dest, scratch);
+    }
+  DONE;
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "general_operand"))]
+  ""
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, SImode)
+      && !register_operand (operand0, SImode))
+    operands[1] = force_reg (SImode, operand1);
+  if (flag_pic)
+    {
+      rtx temp;
+      if (SYMBOLIC_CONST_P (operands[1]))
+	{
+	  if (MEM_P (operands[0]))
+	    operands[1] = force_reg (Pmode, operands[1]);
+	  else
+	    {
+	      temp = (!can_create_pseudo_p ()
+		      ? operands[0]
+		      : gen_reg_rtx (Pmode));
+	      operands[1] = mn10300_legitimize_pic_address (operands[1], temp);
+	    }
+	}
+      else if (GET_CODE (operands[1]) == CONST
+	       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
+	{
+	  temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+	  temp = mn10300_legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
+						 temp);
+	  operands[1] = expand_binop (SImode, add_optab, temp,
+				      XEXP (XEXP (operands[1], 0), 1),
+				      (!can_create_pseudo_p ()
+				       ? temp
+				       : gen_reg_rtx (Pmode)),
+				      0, OPTAB_LIB_WIDEN);
+	}
+    }
+})
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			  "=r,r,r,r,m,r, A,*y,*y,*z,*d")
+	(match_operand:SI 1 "general_operand"
+			  " 0,O,i,r,r,m,*y, A, i,*d,*z"))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1: /* imm-reg.  */
+    case 2: 
+      /* See movhi for a discussion of sizes for 8-bit movu.  Note that the
+	 24-bit movu is 6 bytes, which is the same size as the full 32-bit
+	 mov form for An and Dn.  So again movu is only a win for Rn.  */
+      if (TARGET_AM33
+	  && CONST_INT_P (operands[1])
+	  && REGNO_EXTENDED_P (REGNO (operands[0]), 1))
+	{
+	  HOST_WIDE_INT val = INTVAL (operands[1]);
+	  if (IN_RANGE (val, 0x80, 0xff)
+	      || IN_RANGE (val, 0x800000, 0xffffff))
+	    return "movu %1,%0";
+	}
+      /* FALLTHRU */
+    case 3:  /* reg-reg */
+    case 4:  /* reg-mem */
+    case 5:  /* mem-reg */
+    case 6:  /* sp-reg */
+    case 7:  /* reg-sp */
+    case 8:  /* imm-sp */
+    case 9:  /* reg-mdr */
+    case 10:  /* mdr-reg */
+      return "mov %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,*,*,*,*,*,*,*,am33,*,*")
+   (set_attr "liw" "*,either,*,either,*,*,*,*,*,*,*")
+   (set_attr "liw_op" "mov")
+   (set_attr_alternative "timings"
+	 [(const_int 11)
+	  (const_int 22)
+	  (const_int 22)
+	  (const_int 11)
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 13) (const_int 24))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 11) (const_int 22))
+	  (if_then_else (eq_attr "cpu" "am34")
+			(const_int 13) (const_int 24))
+	  (const_int 11)
+	  (const_int 11)
+	  (const_int 11)
+	 ])]
+)
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(match_operand:SF 1 "general_operand"))]
+  "TARGET_AM33_2"
+{
+  /* One of the ops has to be in a register.  */
+  if (!register_operand (operand1, SFmode)
+      && !register_operand (operand0, SFmode))
+    operands[1] = force_reg (SFmode, operand1);
+})
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rf,r,f,r,f,r,f,r,m,f,Q")
+	(match_operand:SF 1 "general_operand"	   "  0,F,F,r,f,f,r,m,r,Q,f"))]
+  "TARGET_AM33_2
+   && (register_operand (operands[0], SFmode)
+       || register_operand (operands[1], SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "";
+    case 1:
+    case 3:
+    case 7:
+    case 8:
+      return "mov %1,%0";
+    case 2:
+    case 4:
+    case 5:
+    case 6:
+    case 9:
+    case 10:
+      return "fmov %1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr_alternative "timings"
+		 [(const_int 11)
+		  (const_int 22)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 47) (const_int 25))
+		  (const_int 11)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 14))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 12))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 14))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		 ])]
+)
+
+;; If the flags register is not live, generate CLR instead of MOV 0.
+;; For MN103, this is only legal for DATA_REGS; for AM33 this is legal
+;; but not a win for ADDRESS_REGS.
+(define_peephole2
+  [(set (match_operand:INT 0 "register_operand" "") (const_int 0))]
+  "peep2_regno_dead_p (0, CC_REG)
+   && (REGNO_DATA_P (REGNO (operands[0]), 1)
+       || REGNO_EXTENDED_P (REGNO (operands[0]), 1))"
+  [(parallel [(set (match_dup 0) (const_int 0))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_insn "*mov<mode>_clr"
+  [(set (match_operand:INT 0 "register_operand" "=D")
+	(const_int 0))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "clr %0"
+)
+
+;; ----------------------------------------------------------------------
+;; ADD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,!*y,!r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,  0, r")
+		 (match_operand:SI 2 "nonmemory_operand"  "r,O,i,  i, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  { return mn10300_output_add (operands, false); }
+  [(set_attr "timings" "11,11,11,11,22")
+   (set_attr "liw" "either,either,*,*,*")
+   (set_attr "liw_op" "add")]
+)
+
+;; Note that ADD IMM,SP does not set the flags, so omit that here.
+(define_insn "*addsi3_flags"
+  [(set (match_operand:SI          0 "register_operand"  "=r,!r")
+  	(plus:SI (match_operand:SI 1 "register_operand"  "%0, r")
+		 (match_operand:SI 2 "nonmemory_operand" "ri, r")))
+   (set (reg CC_REG)
+   	(compare (plus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNCmode)"
+  { return mn10300_output_add (operands, true); }
+  [(set_attr "timings" "11,22")]
+)
+
+;; A helper to expand the above, with the CC_MODE filled in.
+(define_expand "addsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (plus:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "nonmemory_operand")))
+	      (set (reg:CCZNC CC_REG)
+		   (compare:CCZNC (plus:SI (match_dup 1) (match_dup 2))
+				  (const_int 0)))])]
+  ""
+)
+
+(define_insn "addc_internal"
+  [(set (match_operand:SI 0 "register_operand"            "=D,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_operand:SI 1 "register_operand"        "%0,0,r"))
+	  (match_operand:SI 2 "reg_or_am33_const_operand" " D,i,r")))
+    (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "@
+   addc %2,%0
+   addc %2,%0
+   addc %2,%1,%0"
+  [(set_attr "isa" "*,am33,am33")]
+)
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  if (!reg_or_am33_const_operand (op2h, SImode))
+    op2h = force_reg (SImode, op2h);
+
+  emit_insn (gen_adddi3_internal (op0l, op0h, op1l, op2l, op1h, op2h));
+  DONE;
+})
+
+;; Note that reload only supports one commutative operand.  Thus we cannot
+;; auto-swap both the high and low outputs with their matching constraints.
+;; For MN103, we're strapped for registers but thankfully the alternatives
+;; are few.  For AM33, it becomes much easier to not represent the early
+;; clobber and 6 permutations of immediate and three-operand adds, but
+;; instead allocate a scratch register and do the expansion by hand.
+
+(define_insn_and_split "adddi3_internal"
+  [(set (match_operand:SI          0 "register_operand"   "=r, r, r")
+	(plus:SI (match_operand:SI 2 "register_operand"   "%0, 0, r")
+		 (match_operand:SI 3 "nonmemory_operand"  "ri,ri,ri")))
+   (set (match_operand:SI          1 "register_operand"   "=D, D, r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (plus:SI (match_dup 2) (match_dup 3)) (match_dup 2))
+	    (match_operand:SI      4 "register_operand"   " 1, D, r"))
+	  (match_operand:SI 5 "reg_or_am33_const_operand" " D, 1,ri")))
+   (clobber (match_scratch:SI      6                      "=X, X,&r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op0l = operands[0];
+  rtx op0h = operands[1];
+  rtx op1l = operands[2];
+  rtx op2l = operands[3];
+  rtx op1h = operands[4];
+  rtx op2h = operands[5];
+  rtx scratch = operands[6];
+  rtx x;
+
+  if (reg_overlap_mentioned_p (op0l, op1h))
+    {
+      emit_move_insn (scratch, op0l);
+      op1h = scratch;
+      if (reg_overlap_mentioned_p (op0l, op2h))
+	op2h = scratch;
+    }
+  else if (reg_overlap_mentioned_p (op0l, op2h))
+    {
+      emit_move_insn (scratch, op0l);
+      op2h = scratch;
+    }
+
+  if (rtx_equal_p (op0l, op1l))
+    ;
+  else if (rtx_equal_p (op0l, op2l))
+    x = op1l, op1l = op2l, op2l = x;
+  else
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2l))
+	{
+	  emit_move_insn (op0l, op2l);
+	  op2l = op1l;
+	  op1l = op0l;
+	}
+    }
+  emit_insn (gen_addsi3_flags (op0l, op1l, op2l));
+
+  if (rtx_equal_p (op0h, op1h))
+    ;
+  else if (rtx_equal_p (op0h, op2h))
+    x = op1h, op1h = op2h, op2h = x;
+  else
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2h))
+	{
+	  emit_move_insn (op0h, op2h);
+	  op2h = op1h;
+	  op1h = op0h;
+	}
+    }
+  emit_insn (gen_addc_internal (op0h, op1h, op2h));
+  DONE;
+}
+  [(set_attr "isa" "*,*,am33")]
+)
+
+;; The following pattern is generated by combine when it proves that one
+;; of the inputs to the low-part of the double-word add is zero, and thus
+;; no carry is generated into the high-part.
+
+(define_insn_and_split "*adddi3_degenerate"
+  [(set (match_operand:SI          0 "register_operand"  "=&r,&r")
+	(match_operand:SI          2 "nonmemory_operand" "  0, 0"))
+   (set (match_operand:SI          1 "register_operand"  "=r , r")
+	(plus:SI (match_operand:SI 3 "register_operand"  "%1 , r")
+		 (match_operand:SI 4 "nonmemory_operand" "ri, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+{
+  rtx scratch = NULL_RTX;
+  if (!rtx_equal_p (operands[0], operands[2]))
+    {
+      gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
+      if (reg_overlap_mentioned_p (operands[0], operands[3])
+	  || reg_overlap_mentioned_p (operands[0], operands[4]))
+	{
+	  scratch = gen_reg_rtx (SImode);
+	  emit_move_insn (scratch, operands[2]);
+	}
+      else
+	emit_move_insn (operands[0], operands[2]);
+    }
+  emit_insn (gen_addsi3 (operands[1], operands[3], operands[4]));
+  if (scratch)
+    emit_move_insn (operands[0], scratch);
+  DONE;
+})
+
+;; ----------------------------------------------------------------------
+;; SUBTRACT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "subsi3"
+  [(set (match_operand:SI           0 "register_operand"  "=r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"   "0,0,0,r")
+		  (match_operand:SI 2 "nonmemory_operand"  "r,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   sub %2,%0
+   sub %2,%0
+   sub %2,%0
+   sub %2,%1,%0"
+  [(set_attr "isa" "*,*,*,am33")
+   (set_attr "liw" "either,either,*,*")
+   (set_attr "liw_op" "sub")
+   (set_attr "timings" "11,11,11,22")]
+)
+
+(define_insn "*subsi3_flags"
+  [(set (match_operand:SI           0 "register_operand"  "=r, r")
+	(minus:SI (match_operand:SI 1 "register_operand"   "0, r")
+		  (match_operand:SI 2 "nonmemory_operand"  "ri,r")))
+   (set (reg CC_REG)
+   	(compare (minus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNCmode)"
+  "@
+   sub %2,%0
+   sub %2,%1,%0"
+  [(set_attr "isa" "*,am33")
+   (set_attr "timings" "11,22")]
+)
+
+;; A helper to expand the above, with the CC_MODE filled in.
+(define_expand "subsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (minus:SI (match_operand:SI 1 "register_operand")
+			     (match_operand:SI 2 "nonmemory_operand")))
+	      (set (reg:CCZNC CC_REG)
+		   (compare:CCZNC (minus:SI (match_dup 1) (match_dup 2))
+				  (const_int 0)))])]
+  ""
+)
+
+(define_insn "subc_internal"
+  [(set (match_operand:SI 0 "register_operand"                      "=D,r,r")
+	(minus:SI
+	  (minus:SI (match_operand:SI 1 "register_operand"          " 0,0,r")
+		    (match_operand:SI 2 "reg_or_am33_const_operand" " D,i,r"))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))
+   (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "@
+   subc %2,%0
+   subc %2,%0
+   subc %2,%1,%0"
+  [(set_attr "isa" "*,am33,am33")]
+)
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (minus:DI (match_operand:DI 1 "register_operand" "")
+                  (match_operand:DI 2 "nonmemory_operand" "")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  if (!reg_or_am33_const_operand (op2h, SImode))
+    op2h = force_reg (SImode, op2h);
+
+  emit_insn (gen_subdi3_internal (op0l, op0h, op1l, op1h, op2l, op2h));
+  DONE;
+})
+
+;; As with adddi3, the use of the scratch register helps reduce the 
+;; number of permutations for AM33.
+;; ??? The early clobber on op0 avoids a reload bug wherein both output
+;; registers are set the same.  Consider negate, where both op2 and op3
+;; are 0, are csed to the same input register, and reload fails to undo
+;; the cse when satisfying the matching constraints.
+
+(define_insn_and_split "subdi3_internal"
+  [(set (match_operand:SI     0 "register_operand"         "=&r, r")
+	(minus:SI
+	  (match_operand:SI   2 "register_operand"         "  0, r")
+	  (match_operand:SI   4 "nonmemory_operand"        " ri,ri")))
+   (set (match_operand:SI     1 "register_operand"         "=D , r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI 3 "register_operand"         "  1, r")
+	    (match_operand:SI 5 "reg_or_am33_const_operand" " D,ri"))
+	  (ltu:SI (match_dup 2) (match_dup 4))))
+   (clobber (match_scratch:SI 6                            "=X ,&r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op0l = operands[0];
+  rtx op0h = operands[1];
+  rtx op1l = operands[2];
+  rtx op1h = operands[3];
+  rtx op2l = operands[4];
+  rtx op2h = operands[5];
+  rtx scratch = operands[6];
+
+  if (reg_overlap_mentioned_p (op0l, op1h))
+    {
+      emit_move_insn (scratch, op0l);
+      op1h = scratch;
+      if (reg_overlap_mentioned_p (op0l, op2h))
+	op2h = scratch;
+    }
+  else if (reg_overlap_mentioned_p (op0l, op2h))
+    {
+      emit_move_insn (scratch, op0l);
+      op2h = scratch;
+    }
+
+  if (!rtx_equal_p (op0l, op1l))
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2l))
+	{
+	  emit_move_insn (op0l, op1l);
+	  op1l = op0l;
+	}
+    }
+  emit_insn (gen_subsi3_flags (op0l, op1l, op2l));
+
+  if (!rtx_equal_p (op0h, op1h))
+    {
+      gcc_assert (TARGET_AM33);
+      if (!REG_P (op2h))
+	{
+	  emit_move_insn (op0h, op1h);
+	  op1h = op0h;
+	}
+    }
+  emit_insn (gen_subc_internal (op0h, op1h, op2h));
+  DONE;
+}
+  [(set_attr "isa" "*,am33")]
+)
+
+;; The following pattern is generated by combine when it proves that one
+;; of the inputs to the low-part of the double-word sub is zero, and thus
+;; no carry is generated into the high-part.
+
+(define_insn_and_split "*subdi3_degenerate"
+  [(set (match_operand:SI          0 "register_operand"   "=&r,&r")
+	(match_operand:SI          2 "nonmemory_operand"  "  0, 0"))
+   (set (match_operand:SI          1 "register_operand"   "=r , r")
+	(minus:SI (match_operand:SI 3 "register_operand"  "  1, r")
+		  (match_operand:SI 4 "nonmemory_operand" " ri, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+{
+  rtx scratch = NULL_RTX;
+  if (!rtx_equal_p (operands[0], operands[2]))
+    {
+      gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
+      if (reg_overlap_mentioned_p (operands[0], operands[3])
+	  || reg_overlap_mentioned_p (operands[0], operands[4]))
+	{
+	  scratch = gen_reg_rtx (SImode);
+	  emit_move_insn (scratch, operands[2]);
+	}
+      else
+	emit_move_insn (operands[0], operands[2]);
+    }
+  emit_insn (gen_subsi3 (operands[1], operands[3], operands[4]));
+  if (scratch)
+    emit_move_insn (operands[0], scratch);
+  DONE;
+})
+
+(define_insn_and_split "negsi2"
+  [(set (match_operand:SI         0 "register_operand"  "=D,&r")
+	(neg:SI (match_operand:SI 1 "register_operand"  " 0, r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  /* Recall that twos-compliment is ones-compliment plus one.  When
+     allocated in DATA_REGS this is 2+1 bytes; otherwise (for am33)
+     this is 3+3 bytes.
+
+     For AM33, it would have been possible to load zero and use the
+     three-address subtract to have a total size of 3+4*N bytes for
+     multiple negations, plus increased throughput.  Not attempted here.  */
+     
+  if (true_regnum (operands[0]) == true_regnum (operands[1]))
+    {
+      emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
+      emit_insn (gen_addsi3 (operands[0], operands[0], const1_rtx));
+    }
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_subsi3 (operands[0], operands[0], operands[1]));
+    }
+  DONE;
+})
+
+;; ----------------------------------------------------------------------
+;; MULTIPLY INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; ??? Note that AM33 has a third multiply variant that puts the high part
+;; into the MDRQ register, however this variant also constrains the inputs
+;; to be in DATA_REGS and thus isn't as helpful as it might be considering
+;; the existance of the 4-operand multiply.  Nor is there a set of divide
+;; insns that use MDRQ.  Given that there is an IMM->MDRQ insn, this would
+;; have been very handy for starting udivmodsi4...
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+                 (sign_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+  ""
+{
+  emit_insn (gen_mulsidi3_internal (gen_lowpart (SImode, operands[0]),
+				    gen_highpart (SImode, operands[0]),
+				    operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mulsidi3_internal"
+  [(set (match_operand:SI          0 "register_operand" "=D,r")
+	(mult:SI (match_operand:SI 2 "register_operand" "%0,r")
+		 (match_operand:SI 3 "register_operand" " D,r")))
+   (set (match_operand:SI          1 "register_operand" "=z,r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI (sign_extend:DI (match_dup 2))
+		     (sign_extend:DI (match_dup 3)))
+	    (const_int 32))))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  if (which_alternative == 1)
+    return "mul %2,%3,%1,%0";
+  else if (TARGET_MULT_BUG)
+    return "nop\;nop\;mul %3,%0";
+  else
+    return "mul %3,%0";
+}
+  [(set_attr "isa" "*,am33")
+   (set (attr "timings")
+        (if_then_else (eq_attr "cpu" "am34") (const_int 24) (const_int 23)))]
+)
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+                 (zero_extend:DI (match_operand:SI 2 "register_operand" ""))))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  emit_insn (gen_umulsidi3_internal (gen_lowpart (SImode, operands[0]),
+				     gen_highpart (SImode, operands[0]),
+				     operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "umulsidi3_internal"
+  [(set (match_operand:SI          0 "register_operand" "=D,r")
+	(mult:SI (match_operand:SI 2 "register_operand" "%0,r")
+		 (match_operand:SI 3 "register_operand" " D,r")))
+   (set (match_operand:SI          1 "register_operand" "=z,r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  if (which_alternative == 1)
+    return "mulu %2,%3,%1,%0";
+  else if (TARGET_MULT_BUG)
+    return "nop\;nop\;mulu %3,%0";
+  else
+    return "mulu %3,%0";
+}
+  [(set_attr "isa" "*,am33")
+   (set (attr "timings")
+        (if_then_else (eq_attr "cpu" "am34") (const_int 24) (const_int 23)))]
+)
+
+(define_expand "mulsi3"
+  [(parallel [(set (match_operand:SI          0 "register_operand")
+		   (mult:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "reg_or_am33_const_operand")))
+	      (clobber (match_scratch:SI      3))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+)
+
+(define_insn "*mulsi3"
+  [(set (match_operand:SI          0 "register_operand"          "=D, r,r")
+	(mult:SI (match_operand:SI 2 "register_operand"          "%0, 0,r")
+		 (match_operand:SI 3 "reg_or_am33_const_operand" " D,ri,r")))
+   (clobber (match_scratch:SI      1                             "=z, z,r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  if (which_alternative == 2)
+    return "mul %2,%3,%1,%0";
+  else if (TARGET_MULT_BUG)
+    return "nop\;nop\;mul %3,%0";
+  else
+    return "mul %3,%0";
+}
+  [(set_attr "isa" "*,am33,am33")
+   (set (attr "timings")
+	(if_then_else (eq_attr "cpu" "am34") (const_int 24) (const_int 23)))]
+)
+
+(define_expand "udivmodsi4"
+  [(parallel [(set (match_operand:SI          0 "register_operand")
+		   (udiv:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "register_operand")))
+	      (set (match_operand:SI          3 "register_operand")
+		   (umod:SI (match_dup 1) (match_dup 2)))
+	      (use (const_int 0))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+)
+
+;; Note the trick to get reload to put the zero into the MDR register,
+;; rather than exposing the load early and letting CSE or someone try
+;; to share the zeros between division insns.  Which tends to result
+;; in sequences like 0->r0->d0->mdr.
+
+(define_insn "*udivmodsi4"
+  [(set (match_operand:SI          0 "register_operand" "=D")
+	(udiv:SI (match_operand:SI 2 "register_operand" " 0")
+		 (match_operand:SI 3 "register_operand" " D")))
+   (set (match_operand:SI          1 "register_operand" "=z")
+	(umod:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI          4 "nonmemory_operand" " 1"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "divu %3,%0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+  	      		 	       (const_int 3839) (const_int 4243)))]
+)
+
+(define_expand "divmodsi4"
+  [(parallel [(set (match_operand:SI          0 "register_operand" "")
+		   (div:SI (match_operand:SI  1 "register_operand" "")
+			   (match_operand:SI  2 "register_operand" "")))
+	      (set (match_operand:SI          3 "register_operand" "")
+		   (mod:SI (match_dup 1) (match_dup 2)))
+	      (use (match_dup 4))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+{
+  operands[4] = gen_reg_rtx (SImode);
+  emit_insn (gen_ext_internal (operands[4], operands[1]));
+})
+
+;; ??? Ideally we'd represent this via shift, but it seems like adding a
+;; special-case pattern for (ashiftrt x 31) is just as likely to result
+;; in poor register allocation choices.
+(define_insn "ext_internal"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "D")] UNSPEC_EXT))]
+  ""
+  "ext %1"
+)
+
+(define_insn "*divmodsi4"
+  [(set (match_operand:SI          0 "register_operand" "=D")
+	(div:SI (match_operand:SI  2 "register_operand" " 0")
+		(match_operand:SI  3 "register_operand" " D")))
+   (set (match_operand:SI          1 "register_operand" "=z")
+	(mod:SI (match_dup 2) (match_dup 3)))
+   (use (match_operand:SI          4 "register_operand" " 1"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "div %3,%0";
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+  	      		 	       (const_int 3839) (const_int 4243)))]
+)
+
+
+;; ----------------------------------------------------------------------
+;; AND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   and %2,%0
+   and %2,%0
+   and %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "liw" "*,op1,*")
+   (set_attr "liw_op" "and")
+   (set_attr "timings" "22,11,11")]
+)
+
+(define_insn "*andsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (set (reg CC_REG)
+   	(compare (and:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "@
+   and %2,%0
+   and %2,%0
+   and %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "timings" "22,11,11")]
+)
+
+;; Make sure we generate extensions instead of ANDs.
+
+(define_split
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (and:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 255)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
+  { operands[1] = gen_lowpart (QImode, operands[1]); }
+)
+
+(define_split
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (and:SI (match_operand:SI 1 "register_operand" "")
+			   (const_int 65535)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
+  { operands[1] = gen_lowpart (HImode, operands[1]); }
+)
+
+;; Split AND by an appropriate constant into two shifts.  Recall that 
+;; operations with a full 32-bit immediate require an extra cycle, so
+;; this is a size optimization with no speed penalty.  This only applies
+;; do DATA_REGS; the shift insns that AM33 adds are too large for a win.
+
+(define_split
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (and:SI (match_dup 0)
+			   (match_operand:SI 1 "const_int_operand" "")))
+	      (clobber (reg:CC CC_REG))])]
+  "reload_completed
+   && REGNO_DATA_P (true_regnum (operands[0]), 1)
+   && mn10300_split_and_operand_count (operands[1]) != 0"
+  [(const_int 0)]
+{
+  int count = mn10300_split_and_operand_count (operands[1]);
+  if (count > 0)
+    {
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (count)));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (count)));
+    }
+  else
+    {
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (-count)));
+      emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (-count)));
+    }
+  DONE;
+})
+
+;; ----------------------------------------------------------------------
+;; OR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(ior:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   or %2,%0
+   or %2,%0
+   or %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "liw" "*,op1,*")
+   (set_attr "liw_op" "or")
+   (set_attr "timings" "22,11,11")]
+)
+
+(define_insn "*iorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(ior:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (set (reg CC_REG)
+   	(compare (ior:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "@
+   or %2,%0
+   or %2,%0
+   or %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "timings" "22,11,11")]
+)
+
+;; ----------------------------------------------------------------------
+;; XOR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   xor %2,%0
+   xor %2,%0
+   xor %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "liw" "*,op1,*")
+   (set_attr "liw_op" "xor")
+   (set_attr "timings" "22,11,11")]
+)
+
+(define_insn "*xorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
+   (set (reg CC_REG)
+   	(compare (xor:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "@
+   xor %2,%0
+   xor %2,%0
+   xor %2,%1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr "timings" "22,11,11")]
+)
+
+;; ----------------------------------------------------------------------
+;; NOT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=D")
+	(not:SI (match_operand:SI 1 "register_operand" " 0")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "not %0"
+)
+
+(define_insn "*one_cmplsi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=D")
+	(not:SI (match_operand:SI 1 "register_operand" " 0")))
+   (set (reg CC_REG)
+   	(compare (not:SI (match_dup 1))
+		 (const_int 0)))]
+  "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
+  "not %0"
+)
+
+;; ----------------------------------------------------------------------
+;; COMPARE AND BRANCH INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; We expand the comparison into a single insn so that it will not be split
+;; up by reload.
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	      (match_operator                    0 "ordered_comparison_operator"
+			      [(match_operand:SI 1 "register_operand")
+			       (match_operand:SI 2 "nonmemory_operand")])
+              (label_ref (match_operand 3 ""))
+              (pc)))]
+  ""
+  ""
+)
+
+(define_insn_and_split "*cbranchsi4_cmp"
+  [(set (pc)
+	(if_then_else (match_operator           3 "ordered_comparison_operator"
+                       [(match_operand:SI       0 "register_operand"  "r")
+		        (match_operand:SI       1 "nonmemory_operand" "ri")])
+		      (match_operand            2 "label_ref_operand" "")
+		      (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  mn10300_split_cbranch (CCmode, operands[3], operands[2]);
+  DONE;
+})
+
+(define_insn "*cmpsi"
+  [(set (reg CC_REG)
+	(compare (match_operand:SI 0 "register_operand"  "r,r,r")
+		 (match_operand:SI 1 "nonmemory_operand" "r,O,i")))]
+  "reload_completed"
+{
+  /* The operands of CMP must be distinct registers.  In the case where
+     we've failed to optimize the comparison of a register to itself, we
+     must use another method to set the Z flag.  We can achieve this 
+     effect with a BTST 0,D0.  This will not alter the contents of D0;
+     the use of d0 is arbitrary; any data register would work.  */
+  if (rtx_equal_p (operands[0], operands[1]))
+    return "btst 0,d0";
+  else
+    return "cmp %1,%0";
+}
+  [(set_attr_alternative "timings"
+     [(if_then_else (eq_attr "cpu" "am34") (const_int 11) (const_int 22))
+      (if_then_else (eq_attr "cpu" "am34") (const_int 11) (const_int 22))
+      (if_then_else (eq_attr "cpu" "am34") (const_int 11) (const_int 22))])
+   (set_attr "liw" "either,either,*")
+   (set_attr "liw_op" "cmp")]
+)
+
+(define_insn "*integer_conditional_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand 2 "int_mode_flags" "")
+			 (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "reload_completed"
+  "b%b0 %1"
+)
+
+(define_insn_and_split "*cbranchsi4_btst"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "CCZN_comparison_operator"
+	    [(and:SI (match_operand:SI 0 "register_operand" "D")
+		     (match_operand:SI 1 "immediate_operand" "i"))
+	     (const_int 0)])
+	  (match_operand 2 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  mn10300_split_cbranch (CCZNmode, operands[3], operands[2]);
+  DONE;
+})
+
+(define_insn "*btstsi"
+  [(set (reg:CCZN CC_REG)
+	(compare:CCZN
+	  (and:SI (match_operand:SI 0 "register_operand" "D")
+		  (match_operand:SI 1 "immediate_operand" "i"))
+	  (const_int 0)))]
+  "reload_completed"
+  "btst %1,%0"
+)
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+      (if_then_else
+            (match_operator                    0 "ordered_comparison_operator"
+			    [(match_operand:SF 1 "register_operand")
+			     (match_operand:SF 2 "nonmemory_operand")])
+	    (label_ref (match_operand 3 ""))
+	    (pc)))]
+  "TARGET_AM33_2"
+  ""
+)
+
+(define_insn_and_split "*cbranchsf4_cmp"
+  [(set (pc)
+	(if_then_else (match_operator            3 "ordered_comparison_operator"
+			[(match_operand:SF       0 "register_operand"  "f")
+			 (match_operand:SF       1 "nonmemory_operand" "fF")])
+		      (match_operand             2 "label_ref_operand" "")
+		      (pc)))
+   ]
+  "TARGET_AM33_2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  mn10300_split_cbranch (CC_FLOATmode, operands[3], operands[2]);
+  DONE;
+})
+
+(define_insn "*am33_cmpsf"
+  [(set (reg:CC_FLOAT CC_REG)
+	(compare:CC_FLOAT (match_operand:SF 0 "register_operand"  "f")
+			  (match_operand:SF 1 "nonmemory_operand" "fF")))]
+  "TARGET_AM33_2 && reload_completed"
+  "fcmp %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 25)))]
+)
+
+(define_insn "*float_conditional_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+				      [(reg:CC_FLOAT CC_REG) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_AM33_2 && reload_completed"
+  "fb%b0 %1"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 44) (const_int 33)))]
+)
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmp %l0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 11) (const_int 44)))]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))]
+  ""
+  "jmp (%0)"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 11) (const_int 33)))]
+)
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+{
+  emit_insn (gen_load_pic ());
+  DONE;
+})
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "immediate_operand")
+   (match_operand:SI 2 "immediate_operand")
+   (match_operand 3 "" "") (match_operand 4 "")]
+  ""
+{
+  rtx table = gen_reg_rtx (SImode);
+  rtx index = gen_reg_rtx (SImode);
+  rtx addr = gen_reg_rtx (Pmode);
+  rtx test;
+
+  emit_move_insn (table, gen_rtx_LABEL_REF (VOIDmode, operands[3]));
+  emit_insn (gen_addsi3 (index, operands[0], GEN_INT (- INTVAL (operands[1]))));
+  test = gen_rtx_fmt_ee (GTU, VOIDmode, index, operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, index, operands[2], operands[4]));
+ 
+  emit_insn (gen_ashlsi3 (index, index, const2_rtx));
+  emit_move_insn (addr, gen_rtx_MEM (SImode,
+				     gen_rtx_PLUS (SImode, table, index)));
+  if (flag_pic)
+    emit_insn (gen_addsi3 (addr, addr, table));
+
+  emit_jump_insn (gen_tablejump (addr, operands[3]));
+  DONE;
+})
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "a"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp (%0)"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 11) (const_int 33)))]
+)
+
+;; Call subroutine with no return value.
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand")
+	 (match_operand:SI 1 "general_operand"))]
+  ""
+{
+  rtx fn = XEXP (operands[0], 0);
+
+  if (flag_pic && GET_CODE (fn) == SYMBOL_REF)
+    {
+      if (MN10300_GLOBAL_P (fn))
+	{
+	  /* The PLT code won't run on AM30, but then, there's no
+	     shared library support for AM30 either, so we just assume
+	     the linker is going to adjust all @PLT relocs to the
+	     actual symbols.  */
+	  emit_use (pic_offset_table_rtx);
+	  fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PLT);
+	}
+      else
+	fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PIC);
+    }
+  if (! call_address_operand (fn, VOIDmode))
+    fn = force_reg (SImode, fn);
+
+  XEXP (operands[0], 0) = fn;
+})
+
+(define_insn "*call_internal"
+  [(call (mem:QI (match_operand:SI 0 "call_address_operand" "a,S"))
+	 (match_operand:SI 1 "" ""))]
+  ""
+  "@
+   calls %C0
+   call %C0,[],0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 33) (const_int 44))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 55) (const_int 33))
+			 ])
+  ]
+)
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+
+(define_expand "call_value"
+  [(set (match_operand 0 "")
+	(call (match_operand:QI 1 "general_operand")
+	      (match_operand:SI 2 "general_operand")))]
+  ""
+{
+  rtx fn = XEXP (operands[1], 0);
+
+  if (flag_pic && GET_CODE (fn) == SYMBOL_REF)
+    {
+      if (MN10300_GLOBAL_P (fn))
+	{
+	  /* The PLT code won't run on AM30, but then, there's no
+	     shared library support for AM30 either, so we just assume
+	     the linker is going to adjust all @PLT relocs to the
+	     actual symbols.  */
+	  emit_use (pic_offset_table_rtx);
+	  fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PLT);
+	}
+      else
+	fn = gen_rtx_UNSPEC (SImode, gen_rtvec (1, fn), UNSPEC_PIC);
+    }
+  if (! call_address_operand (fn, VOIDmode))
+    fn = force_reg (SImode, fn);
+
+  XEXP (operands[1], 0) = fn;
+})
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand:SI 1 "call_address_operand" "a,S"))
+	      (match_operand:SI 2 "" "")))]
+  ""
+  "@
+   calls %C1
+   call %C1,[],0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 33) (const_int 44))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 55) (const_int 33))
+			 ])
+  ]
+)
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "")
+                    (const_int 0))
+              (match_operand 1 "")
+              (match_operand 2 "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+)
+
+;; ----------------------------------------------------------------------
+;; EXTEND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand"      "=D,D,r")
+	(zero_extend:SI
+	 (match_operand:QI 1 "nonimmediate_operand" " 0,m,r")))]
+  ""
+  "@
+   extbu %0
+   movbu %1,%0
+   extbu %1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr_alternative "timings"
+		 [(const_int 11)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (const_int 11)
+		 ])]
+)
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand"      "=D,D,r")
+	(zero_extend:SI
+	 (match_operand:HI 1 "nonimmediate_operand" " 0,m,r")))]
+  ""
+  "@
+   exthu %0
+   movhu %1,%0
+   exthu %1,%0"
+  [(set_attr "isa" "*,*,am33")
+   (set_attr_alternative "timings"
+		 [(const_int 11)
+		  (if_then_else (eq_attr "cpu" "am34")
+				(const_int 13) (const_int 24))
+		  (const_int 11)])]
+)
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,r")
+	(sign_extend:SI
+	 (match_operand:QI 1 "register_operand" "0,r")))]
+  ""
+  "@
+   extb %0
+   extb %1,%0"
+  [(set_attr "isa" "*,am33")]
+)
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=D,r")
+	(sign_extend:SI
+	 (match_operand:HI 1 "register_operand" "0,r")))]
+  ""
+  "@
+   exth %0
+   exth %1,%0"
+  [(set_attr "isa" "*,am33")]
+)
+
+;; ----------------------------------------------------------------------
+;; SHIFTS
+;; ----------------------------------------------------------------------
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI  0 "register_operand"   "=r,D,d,d,D,D,D,r")
+	(ashift:SI
+	  (match_operand:SI 1 "register_operand"  " 0,0,0,0,0,0,0,r")
+	  (match_operand:QI 2 "nonmemory_operand" " J,K,M,L,D,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   add %0,%0
+   asl2 %0
+   asl2 %0\;add %0,%0
+   asl2 %0\;asl2 %0
+   asl %S2,%0
+   asl %S2,%0
+   asl %S2,%0
+   asl %2,%1,%0"
+  [(set_attr "isa" "*,*,*,*,*,*,*,am33")
+   (set_attr "liw" "op2,op2,op2,op2,op2,op2,*,*")
+   (set_attr "liw_op" "asl")
+   (set_attr "timings" "11,11,22,22,11,11,11,11")]
+)
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI  0 "register_operand"  "=D,D,D,r")
+	(lshiftrt:SI
+	  (match_operand:SI 1 "register_operand"  "0,0,0,r")
+	  (match_operand:QI 2 "nonmemory_operand" "D,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   lsr %S2,%0
+   lsr %S2,%0
+   lsr %S2,%0
+   lsr %2,%1,%0"
+  [(set_attr "isa" "*,*,*,am33")
+   (set_attr "liw" "op2,op2,*,*")
+   (set_attr "liw_op" "lsr")]
+)
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI  0 "register_operand"  "=D,D,D,r")
+	(ashiftrt:SI
+	  (match_operand:SI 1 "register_operand"  "0,0,0,r")
+	  (match_operand:QI 2 "nonmemory_operand" "D,O,i,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+   asr %S2,%0
+   asr %S2,%0
+   asr %S2,%0
+   asr %2,%1,%0"
+  [(set_attr "isa" "*,*,*,am33")
+   (set_attr "liw" "op2,op2,*,*")
+   (set_attr "liw_op" "asr")]
+)
+
+;; ----------------------------------------------------------------------
+;; MISCELANEOUS
+;; ----------------------------------------------------------------------
+
+(define_expand "clzsi2"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI [(match_operand:SI 1 "register_operand" "")
+			       (const_int 0)] UNSPEC_BSCH))
+	      (clobber (reg:CC CC_REG))])]
+  "TARGET_AM33"
+)
+
+(define_insn "*bsch"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "nonmemory_operand" "0")]
+		   UNSPEC_BSCH))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_AM33"
+  "bsch %1,%0"
+)
+
+;; ----------------------------------------------------------------------
+;; FP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "abssf2"
+  [(set (match_operand:SF         0 "register_operand" "=f,f")
+	(abs:SF (match_operand:SF 1 "register_operand" "0,?f")))]
+  "TARGET_AM33_2"
+  "@
+   fabs %0
+   fabs %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 14)))]
+)
+
+(define_insn "negsf2"
+  [(set (match_operand:SF         0 "register_operand" "=f,f")
+	(neg:SF (match_operand:SF 1 "register_operand" "0,?f")))]
+  "TARGET_AM33_2"
+  "@
+   fneg %0
+   fneg %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 14)))]
+)
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "")))]
+  "TARGET_AM33_2 && flag_unsafe_math_optimizations"
+{
+  rtx scratch = gen_reg_rtx (SFmode);
+  emit_insn (gen_rsqrtsf2 (scratch, operands[1], CONST1_RTX (SFmode)));
+  emit_insn (gen_divsf3 (operands[0], force_reg (SFmode, CONST1_RTX (SFmode)),
+			 scratch));
+  DONE;
+})
+
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF                  0 "register_operand" "=f,f")
+	(div:SF (match_operand:SF          2 "const_1f_operand" "F,F")
+		(sqrt:SF (match_operand:SF 1 "register_operand" "0,?f"))))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   frsqrt %0
+   frsqrt %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 4753) (const_int 2327)))]
+)
+
+(define_insn "addsf3"
+  [(set (match_operand:SF          0 "register_operand" "=f,f")
+	(plus:SF (match_operand:SF 1 "register_operand" "%0,f")
+		 (match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   fadd %2, %0
+   fadd %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 14))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 25))
+			 ])]
+)
+
+(define_insn "subsf3"
+  [(set (match_operand:SF           0 "register_operand" "=f,f")
+	(minus:SF (match_operand:SF 1 "register_operand" "0,f")
+		  (match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   fsub %2, %0
+   fsub %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 14))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 25))
+			 ])]
+)
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF          0 "register_operand" "=f,f")
+	(mult:SF (match_operand:SF 1 "register_operand" "%0,f")
+		 (match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+  (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "@
+   fmul %2, %0
+   fmul %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 14))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 17) (const_int 25))
+			 ])]
+)
+
+(define_insn "divsf3"
+  [(set (match_operand:SF         0 "register_operand" "=f,f")
+	(div:SF (match_operand:SF 1 "register_operand"  "0,f")
+		(match_operand:SF 2 "nonmemory_operand" "f,?fF")))
+   (clobber (reg:CC_FLOAT CC_REG))]
+  "TARGET_AM33_2"
+  "@
+   fdiv %2, %0
+   fdiv %2, %1, %0"
+  [(set_attr_alternative "timings"
+			 [(if_then_else (eq_attr "cpu" "am34")
+					(const_int 2531) (const_int 1216))
+			  (if_then_else (eq_attr "cpu" "am34")
+					(const_int 2531) (const_int 1317))
+			 ])]
+)
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF         0 "register_operand" "=c")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fmadd %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+(define_insn "fmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=c")
+	(fma:SF (match_operand:SF         1 "register_operand" "f")
+		(match_operand:SF         2 "register_operand" "f")
+		(neg:SF (match_operand:SF 3 "register_operand" "f"))))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fmsub %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+(define_insn "fnmasf4"
+  [(set (match_operand:SF                 0 "register_operand" "=c")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF         2 "register_operand" "f")
+		(match_operand:SF         3 "register_operand" "f")))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fnmadd %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+(define_insn "fnmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=c")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF         2 "register_operand" "f")
+		(neg:SF (match_operand:SF 3 "register_operand" "f"))))
+   (clobber (reg:CC_FLOAT CC_REG))
+  ]
+  "TARGET_AM33_2"
+  "fnmsub %1, %2, %3, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 17) (const_int 24)))]
+)
+
+;; ----------------------------------------------------------------------
+;; PROLOGUE/EPILOGUE
+;; ----------------------------------------------------------------------
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  { mn10300_expand_prologue (); DONE; }
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  { mn10300_expand_epilogue (); DONE; }
+)
+
+(define_insn "return"
+  [(return)]
+  "mn10300_can_use_rets_insn ()"
+{
+  /* The RETF insn is 4 cycles faster than RETS, though 1 byte larger.  */
+  if (optimize_insn_for_speed_p () && mn10300_can_use_retf_insn ())
+    return "retf [],0";
+  else
+    return "rets";
+})
+
+(define_insn "return_ret"
+  [(return)
+   (use (match_operand:SI 0 "const_int_operand" ""))]
+  ""
+{
+  /* The RETF insn is up to 3 cycles faster than RET.  */
+  fputs ((mn10300_can_use_retf_insn () ? "\tretf " : "\tret "), asm_out_file);
+  mn10300_print_reg_list (asm_out_file, mn10300_get_live_callee_saved_regs ());
+  fprintf (asm_out_file, ",%d\n", (int) INTVAL (operands[0]));
+  return "";
+})
+
+;; This instruction matches one generated by mn10300_gen_multiple_store()
+(define_insn "store_movm"
+  [(match_parallel 0 "mn10300_store_multiple_operation"
+    [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand 1 "" "")))])]
+  ""
+{
+  fputs ("\tmovm ", asm_out_file);
+  mn10300_print_reg_list (asm_out_file,
+                          mn10300_store_multiple_operation (operands[0],
+						            VOIDmode));
+  fprintf (asm_out_file, ",(sp)\n");
+  return "";
+}
+  ;; Assume that no more than 8 registers will be pushed.
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 99) (const_int 88)))]
+)
+
+(define_expand "load_pic"
+  [(const_int 0)]
+  "flag_pic"
+{
+  if (TARGET_AM33)
+    emit_insn (gen_am33_load_pic (pic_offset_table_rtx));
+  else if (mn10300_frame_size () == 0)
+    emit_insn (gen_mn10300_load_pic0 (pic_offset_table_rtx));
+  else
+    emit_insn (gen_mn10300_load_pic1 (pic_offset_table_rtx));
+  DONE;
+})
+
+(define_insn "am33_load_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))
+   (clobber (reg:CC CC_REG))]
+  "TARGET_AM33"
+{
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+  return ".LPIC%=:\;mov pc,%0\;add %1-(.LPIC%=-.),%0";
+}
+  [(set_attr "timings" "33")]
+)
+
+;; Load pic register with push/pop of stack.
+(define_insn "mn10300_load_pic0"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))
+   (clobber (reg:SI MDR_REG))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+  return ("add -4,sp\;"
+	  "calls .LPIC%=\n"
+          ".LPIC%=:\;"
+	  "movm (sp),[%0]\;"
+	  "add %1-(.LPIC%=-.),%0");
+}
+  [(set_attr "timings" "88")]
+)
+
+;; Load pic register re-using existing stack space.
+(define_insn "mn10300_load_pic1"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_GOT))
+   (clobber (mem:SI (reg:SI SP_REG)))
+   (clobber (reg:SI MDR_REG))
+   (clobber (reg:CC CC_REG))]
+  ""
+{
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+  return ("calls .LPIC%=\n"
+	  ".LPIC%=:\;"
+          "mov (sp),%0\;"
+          "add %1-(.LPIC%=-.),%0");
+}
+  [(set_attr "timings" "66")]
+)
+
+;; The mode on operand 3 has been deliberately omitted because it
+;; can be either SI (for arithmetic operations) or QI (for shifts).
+(define_insn "liw"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_dup 0)
+                    (match_operand    2 "liw_operand"       "rO")
+                    (match_operand:SI 4 "const_int_operand" "")]
+                   UNSPEC_LIW))
+   (set (match_operand:SI             1 "register_operand" "=r")
+        (unspec:SI [(match_dup 1)
+                    (match_operand    3 "liw_operand"       "rO")
+                    (match_operand:SI 5 "const_int_operand" "")]
+                   UNSPEC_LIW))]
+  "TARGET_ALLOW_LIW"
+  "%W4_%W5 %2, %0, %3, %1"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 13) (const_int 12)))]
+)
+
+;; The mode on operand 1 has been deliberately omitted because it
+;; can be either SI (for arithmetic operations) or QI (for shifts).
+(define_insn "cmp_liw"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 2 "register_operand" "r")
+		    (match_operand    3 "liw_operand"      "rO")))
+   (set (match_operand:SI             0 "register_operand" "=r")
+        (unspec:SI [(match_dup 0)
+                    (match_operand    1 "liw_operand"       "rO")
+                    (match_operand:SI 4 "const_int_operand" "")]
+                   UNSPEC_LIW))]
+  "TARGET_ALLOW_LIW"
+  "cmp_%W4 %3, %2, %1, %0"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 13) (const_int 12)))]
+)
+
+(define_insn "liw_cmp"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+        (unspec:SI [(match_dup 0)
+                    (match_operand    1 "liw_operand"      "rO")
+                    (match_operand:SI 4 "const_int_operand" "")]
+                   UNSPEC_LIW))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 2 "register_operand" "r")
+		    (match_operand    3 "liw_operand"      "rO")))]
+  "TARGET_ALLOW_LIW"
+  "%W4_cmp %1, %0, %3, %2"
+  [(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
+				       (const_int 13) (const_int 12)))]
+)
diff --git a/gcc/config/mn10300/mn10300.opt b/gcc/config/mn10300/mn10300.opt
new file mode 100644
index 000000000..8909d8bd1
--- /dev/null
+++ b/gcc/config/mn10300/mn10300.opt
@@ -0,0 +1,56 @@
+; Options for the Matsushita MN10300 port of the compiler.
+
+; Copyright (C) 2005, 2007, 2010, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mam33
+Target
+Target the AM33 processor
+
+mam33-2
+Target
+Target the AM33/2.0 processor
+
+mam34
+Target Report
+Target the AM34 processor
+
+mtune=
+Target RejectNegative Joined
+Tune code for the given processor
+
+mmult-bug
+Target Report Mask(MULT_BUG)
+Work around hardware multiply bug
+
+; Ignored by the compiler
+mno-crt0
+Target RejectNegative
+
+; Ignored by the compiler
+mrelax
+Target RejectNegative
+Enable linker relaxations
+
+mreturn-pointer-on-d0
+Target Report Mask(PTR_A0D0)
+Return pointers in both a0 and d0
+
+mliw
+Target Report Mask(ALLOW_LIW)
+Allow gcc to generate LIW instructions
diff --git a/gcc/config/mn10300/predicates.md b/gcc/config/mn10300/predicates.md
new file mode 100644
index 000000000..4c78c51e4
--- /dev/null
+++ b/gcc/config/mn10300/predicates.md
@@ -0,0 +1,69 @@
+;; Predicate definitions for Matsushita MN10300.
+;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if the operand is the 1.0f constant.
+
+(define_predicate "const_1f_operand"
+  (match_code "const_int,const_double")
+{
+  return (op == CONST1_RTX (SFmode));
+})
+
+;; Return true if OP is a valid call operand.
+
+(define_predicate "call_address_operand"
+  (match_code "symbol_ref,reg,unspec")
+{
+  if (flag_pic)
+    return (satisfies_constraint_S (op) || GET_CODE (op) == REG);
+
+  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG);
+})
+
+(define_predicate "impossible_plus_operand"
+  (match_code "plus")
+{
+  return XEXP (op, 0) == stack_pointer_rtx
+      || XEXP (op, 1) == stack_pointer_rtx;
+})
+
+(define_predicate "reg_or_am33_const_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "TARGET_AM33")
+	    (match_operand 0 "immediate_operand"))))
+
+(define_predicate "label_ref_operand"
+  (match_code "label_ref"))
+
+(define_special_predicate "int_mode_flags"
+  (match_code "reg")
+{
+  if (REGNO (op) != CC_REG)
+    return false;
+  if (GET_MODE (op) == CC_FLOATmode)
+    return false;
+  return GET_MODE_CLASS (GET_MODE (op)) == MODE_CC;
+})
+
+(define_predicate "CCZN_comparison_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "liw_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "satisfies_constraint_O (op)")))
diff --git a/gcc/config/mn10300/t-linux b/gcc/config/mn10300/t-linux
new file mode 100644
index 000000000..61ed88e66
--- /dev/null
+++ b/gcc/config/mn10300/t-linux
@@ -0,0 +1,29 @@
+# Copyright (C) 2003 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
diff --git a/gcc/config/mn10300/t-mn10300 b/gcc/config/mn10300/t-mn10300
new file mode 100644
index 000000000..eeefeb602
--- /dev/null
+++ b/gcc/config/mn10300/t-mn10300
@@ -0,0 +1,36 @@
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001,
+# 2003, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+MULTILIB_OPTIONS = mam33/mam33-2/mam34
+MULTILIB_DIRNAMES = am33 am33-2 am34
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/moxie/constraints.md b/gcc/config/moxie/constraints.md
new file mode 100644
index 000000000..f76726813
--- /dev/null
+++ b/gcc/config/moxie/constraints.md
@@ -0,0 +1,56 @@
+;; Constraint definitions for Moxie
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Anthony Green <green@moxielogic.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Constraints
+;; -------------------------------------------------------------------------
+
+(define_constraint "A"
+  "An absolute address."
+  (and (match_code "mem")
+       (ior (match_test "GET_CODE (XEXP (op, 0)) == SYMBOL_REF")
+	    (match_test "GET_CODE (XEXP (op, 0)) == LABEL_REF")
+	    (match_test "GET_CODE (XEXP (op, 0)) == CONST"))))
+
+(define_constraint "B"
+  "An offset address."
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")))
+
+(define_constraint "W"
+  "A register indirect memory operand."
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    && REGNO_OK_FOR_BASE_P (REGNO (XEXP (op, 0)))")))
+
+(define_constraint "O"
+  "The constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "I"
+  "An 8-bit constant (0..255)"
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 255")))
+
+(define_constraint "N"
+  "A constant -(0..255)"
+  (and (match_code "const_int")
+       (match_test "ival >= -255 && ival <= 0")))
diff --git a/gcc/config/moxie/crti.asm b/gcc/config/moxie/crti.asm
new file mode 100644
index 000000000..f44582799
--- /dev/null
+++ b/gcc/config/moxie/crti.asm
@@ -0,0 +1,40 @@
+# crti.asm for moxie
+#
+#   Copyright (C) 2009 Free Software Foundation
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just make a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+	.file		"crti.asm"
+
+	.section	".init"
+	.global	_init
+	.type	_init, @function	
+	.p2align	1
+_init:
+
+	.section	".fini"
+	.global	_fini
+	.type	_fini,@function
+	.p2align	1
+_fini:
diff --git a/gcc/config/moxie/crtn.asm b/gcc/config/moxie/crtn.asm
new file mode 100644
index 000000000..3ac9d31ee
--- /dev/null
+++ b/gcc/config/moxie/crtn.asm
@@ -0,0 +1,34 @@
+# crtn.asm for moxie
+# 
+#   Copyright (C) 2009 Free Software Foundation
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+	.file		"crtn.asm"
+
+	.section	".init"
+	ret
+	
+	.section	".fini"
+	ret
diff --git a/gcc/config/moxie/moxie-protos.h b/gcc/config/moxie/moxie-protos.h
new file mode 100644
index 000000000..7d939f588
--- /dev/null
+++ b/gcc/config/moxie/moxie-protos.h
@@ -0,0 +1,25 @@
+/* Prototypes for moxie.c functions used in the md file & elsewhere.
+   Copyright (C) 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void  moxie_expand_prologue (void);
+extern void  moxie_expand_epilogue (void);
+extern int   moxie_initial_elimination_offset (int, int);
+extern void  moxie_print_operand (FILE *, rtx, int);
+extern void  moxie_print_operand_address (FILE *, rtx);
diff --git a/gcc/config/moxie/moxie.c b/gcc/config/moxie/moxie.c
new file mode 100644
index 000000000..53b73d15f
--- /dev/null
+++ b/gcc/config/moxie/moxie.c
@@ -0,0 +1,592 @@
+/* Target Code for moxie
+   Copyright (C) 2008, 2009, 2010  Free Software Foundation
+   Contributed by Anthony Green.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "reload.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "df.h"
+
+#define LOSE_AND_RETURN(msgid, x)		\
+  do						\
+    {						\
+      moxie_operand_lossage (msgid, x);		\
+      return;					\
+    } while (0)
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+moxie_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > 2 * UNITS_PER_WORD);
+}
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its
+   FUNCTION_DECL; otherwise, FUNC is 0.  
+
+   We always return values in register $r0 for moxie.  */
+
+static rtx
+moxie_function_value (const_tree valtype, 
+		      const_tree fntype_or_decl ATTRIBUTE_UNUSED,
+		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), MOXIE_R0);
+}
+
+/* Define how to find the value returned by a library function.
+
+   We always return values in register $r0 for moxie.  */
+
+static rtx
+moxie_libcall_value (enum machine_mode mode,
+                     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, MOXIE_R0);
+}
+
+/* Handle TARGET_FUNCTION_VALUE_REGNO_P.
+
+   We always return values in register $r0 for moxie.  */
+
+static bool
+moxie_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == MOXIE_R0);
+}
+
+/* Emit an error message when we're in an asm, and a fatal error for
+   "normal" insns.  Formatted output isn't easily implemented, since we
+   use output_operand_lossage to output the actual message and handle the
+   categorization of the error.  */
+
+static void
+moxie_operand_lossage (const char *msgid, rtx op)
+{
+  debug_rtx (op);
+  output_operand_lossage ("%s", msgid);
+}
+
+/* The PRINT_OPERAND_ADDRESS worker.  */
+
+void
+moxie_print_operand_address (FILE *file, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fprintf (file, "(%s)", reg_names[REGNO (x)]);
+      break;
+      
+    case PLUS:
+      switch (GET_CODE (XEXP (x, 1)))
+	{
+	case CONST_INT:
+	  fprintf (file, "%ld(%s)", 
+		   INTVAL(XEXP (x, 1)), reg_names[REGNO (XEXP (x, 0))]);
+	  break;
+	case SYMBOL_REF:
+	  output_addr_const (file, XEXP (x, 1));
+	  fprintf (file, "(%s)", reg_names[REGNO (XEXP (x, 0))]);
+	  break;
+	case CONST:
+	  {
+	    rtx plus = XEXP (XEXP (x, 1), 0);
+	    if (GET_CODE (XEXP (plus, 0)) == SYMBOL_REF 
+		&& CONST_INT_P (XEXP (plus, 1)))
+	      {
+		output_addr_const(file, XEXP (plus, 0));
+		fprintf (file,"+%ld(%s)", INTVAL (XEXP (plus, 1)),
+			 reg_names[REGNO (XEXP (x, 0))]);
+	      }
+	    else
+	      abort();
+	  }
+	  break;
+	default:
+	  abort();
+	}
+      break;
+
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+/* The PRINT_OPERAND worker.  */
+
+void
+moxie_print_operand (FILE *file, rtx x, int code)
+{
+  rtx operand = x;
+
+  /* New code entries should just be added to the switch below.  If
+     handling is finished, just return.  If handling was just a
+     modification of the operand, the modified operand should be put in
+     "operand", and then do a break to let default handling
+     (zero-modifier) output the operand.  */
+
+  switch (code)
+    {
+    case 0:
+      /* No code, print as usual.  */
+      break;
+
+    default:
+      LOSE_AND_RETURN ("invalid operand modifier letter", x);
+    }
+
+  /* Print an operand as without a modifier letter.  */
+  switch (GET_CODE (operand))
+    {
+    case REG:
+      if (REGNO (operand) > MOXIE_R13)
+	internal_error ("internal error: bad register: %d", REGNO (operand));
+      fprintf (file, "%s", reg_names[REGNO (operand)]);
+      return;
+
+    case MEM:
+      output_address (XEXP (operand, 0));
+      return;
+
+    default:
+      /* No need to handle all strange variants, let output_addr_const
+	 do it for us.  */
+      if (CONSTANT_P (operand))
+	{
+	  output_addr_const (file, operand);
+	  return;
+	}
+
+      LOSE_AND_RETURN ("unexpected operand", x);
+    }
+}
+
+/* Per-function machine data.  */
+struct GTY(()) machine_function
+ {
+   /* Number of bytes saved on the stack for callee saved registers.  */
+   int callee_saved_reg_size;
+
+   /* Number of bytes saved on the stack for local variables.  */
+   int local_vars_size;
+
+   /* The sum of 2 sizes: locals vars and padding byte for saving the
+    * registers.  Used in expand_prologue () and expand_epilogue().  */
+   int size_for_adjusting_sp;
+ };
+
+/* Zero initialization is OK for all current fields.  */
+
+static struct machine_function *
+moxie_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* The TARGET_OPTION_OVERRIDE worker.
+   All this curently does is set init_machine_status.  */
+static void
+moxie_option_override (void)
+{
+  /* Set the per-function-data initializer.  */
+  init_machine_status = moxie_init_machine_status;
+}
+
+/* Compute the size of the local area and the size to be adjusted by the
+ * prologue and epilogue.  */
+
+static void
+moxie_compute_frame (void)
+{
+  /* For aligning the local variables.  */
+  int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  int padding_locals;
+  int regno;
+
+  /* Padding needed for each element of the frame.  */
+  cfun->machine->local_vars_size = get_frame_size ();
+
+  /* Align to the stack alignment.  */
+  padding_locals = cfun->machine->local_vars_size % stack_alignment;
+  if (padding_locals)
+    padding_locals = stack_alignment - padding_locals;
+
+  cfun->machine->local_vars_size += padding_locals;
+
+  cfun->machine->callee_saved_reg_size = 0;
+
+  /* Save callee-saved registers.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (df_regs_ever_live_p (regno) && (! call_used_regs[regno]))
+      cfun->machine->callee_saved_reg_size += 4;
+
+  cfun->machine->size_for_adjusting_sp = 
+    crtl->args.pretend_args_size
+    + cfun->machine->local_vars_size 
+    + (ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0);
+}
+
+void
+moxie_expand_prologue (void)
+{
+  int regno;
+  rtx insn;
+
+  moxie_compute_frame ();
+
+  /* Save callee-saved registers.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    {
+      if (!fixed_regs[regno] && df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	{
+	  insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  if (cfun->machine->size_for_adjusting_sp > 0)
+    {
+      int i = cfun->machine->size_for_adjusting_sp;
+      while (i > 255)
+	{
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
+					stack_pointer_rtx, 
+					GEN_INT (255)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  i -= 255;
+	}
+      if (i > 0)
+	{
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
+					stack_pointer_rtx, 
+					GEN_INT (i)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+void
+moxie_expand_epilogue (void)
+{
+  int regno;
+  rtx reg;
+
+  if (cfun->machine->callee_saved_reg_size != 0)
+    {
+      reg = gen_rtx_REG (Pmode, MOXIE_R5);
+      if (cfun->machine->callee_saved_reg_size <= 255)
+	{
+	  emit_move_insn (reg, hard_frame_pointer_rtx);
+	  emit_insn (gen_subsi3 
+		     (reg, reg, 
+		      GEN_INT (cfun->machine->callee_saved_reg_size)));
+	}
+      else
+	{
+	  emit_move_insn (reg,
+			  GEN_INT (-cfun->machine->callee_saved_reg_size));
+	  emit_insn (gen_addsi3 (reg, reg, hard_frame_pointer_rtx));
+	}
+      for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
+	if (!fixed_regs[regno] && !call_used_regs[regno]
+	    && df_regs_ever_live_p (regno))
+	  {
+	    rtx preg = gen_rtx_REG (Pmode, regno);
+	    emit_insn (gen_movsi_pop (reg, preg));
+	  }
+    }
+
+  emit_jump_insn (gen_returner ());
+}
+
+/* Implements the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET.  */
+
+int
+moxie_initial_elimination_offset (int from, int to)
+{
+  int ret;
+  
+  if ((from) == FRAME_POINTER_REGNUM && (to) == HARD_FRAME_POINTER_REGNUM)
+    {
+      /* Compute this since we need to use cfun->machine->local_vars_size.  */
+      moxie_compute_frame ();
+      ret = -cfun->machine->callee_saved_reg_size;
+    }
+  else if ((from) == ARG_POINTER_REGNUM && (to) == HARD_FRAME_POINTER_REGNUM)
+    ret = 0x00;
+  else
+    abort ();
+
+  return ret;
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+moxie_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
+			      enum machine_mode mode ATTRIBUTE_UNUSED,
+			      tree type ATTRIBUTE_UNUSED,
+			      int *pretend_size, int no_rtl)
+{
+  int regno;
+  int regs = 8 - *cum;
+  
+  *pretend_size = regs < 0 ? 0 : GET_MODE_SIZE (SImode) * regs;
+  
+  if (no_rtl)
+    return;
+  
+  for (regno = *cum; regno < 8; regno++)
+    {
+      rtx reg = gen_rtx_REG (SImode, regno);
+      rtx slot = gen_rtx_PLUS (Pmode,
+			       gen_rtx_REG (SImode, ARG_POINTER_REGNUM),
+			       GEN_INT (UNITS_PER_WORD * (3 + (regno-2))));
+      
+      emit_move_insn (gen_rtx_MEM (SImode, slot), reg);
+    }
+}
+
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+moxie_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CC_REG;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+/* Return the next register to be used to hold a function argument or
+   NULL_RTX if there's no more space.  */
+
+static rtx
+moxie_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    const_tree type ATTRIBUTE_UNUSED,
+		    bool named ATTRIBUTE_UNUSED)
+{
+  if (*cum < 8)
+    return gen_rtx_REG (mode, *cum);
+  else 
+    return NULL_RTX;
+}
+
+#define MOXIE_FUNCTION_ARG_SIZE(MODE, TYPE)	\
+  ((MODE) != BLKmode ? GET_MODE_SIZE (MODE)	\
+   : (unsigned) int_size_in_bytes (TYPE))
+
+static void
+moxie_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum = (*cum < MOXIE_R6
+	  ? *cum + ((3 + MOXIE_FUNCTION_ARG_SIZE (mode, type)) / 4)
+	  : *cum);
+}
+
+/* Return non-zero if the function argument described by TYPE is to be
+   passed by reference.  */
+
+static bool
+moxie_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			 enum machine_mode mode, const_tree type,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+      size = int_size_in_bytes (type);
+    }
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return size > 4*6;
+}
+
+/* Some function arguments will only partially fit in the registers
+   that hold arguments.  Given a new arg, return the number of bytes
+   that fit in argument passing registers.  */
+
+static int
+moxie_arg_partial_bytes (CUMULATIVE_ARGS *cum,
+			 enum machine_mode mode,
+			 tree type, bool named)
+{
+  int bytes_left, size;
+
+  if (*cum >= 8)
+    return 0;
+
+  if (moxie_pass_by_reference (cum, mode, type, named))
+    size = 4;
+  else if (type)
+    {
+      if (AGGREGATE_TYPE_P (type))
+	return 0;
+      size = int_size_in_bytes (type);
+    }
+  else
+    size = GET_MODE_SIZE (mode);
+
+  bytes_left = (4 * 6) - ((*cum - 2) * 4);
+
+  if (size > bytes_left)
+    return bytes_left;
+  else
+    return 0;
+}
+
+/* Worker function for TARGET_STATIC_CHAIN.  */
+
+static rtx
+moxie_static_chain (const_tree fndecl, bool incoming_p)
+{
+  rtx addr, mem;
+
+  if (!DECL_STATIC_CHAIN (fndecl))
+    return NULL;
+
+  if (incoming_p)
+    addr = plus_constant (arg_pointer_rtx, 2 * UNITS_PER_WORD);
+  else
+    addr = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
+
+  mem = gen_rtx_MEM (Pmode, addr);
+  MEM_NOTRAP_P (mem) = 1;
+
+  return mem;
+}
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+moxie_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tpush  $sp, $r0\n");
+  fprintf (f, "\tldi.l $r0, 0x0\n");
+  fprintf (f, "\tsto.l 0x8($fp), $r0\n");
+  fprintf (f, "\tpop   $sp, $r0\n");
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\tjmpa  0x0\n");
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+moxie_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 20);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* The Global `targetm' Variable.  */
+
+/* Initialize the GCC target structure.  */
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES	hook_bool_const_tree_true
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		moxie_return_in_memory
+#undef  TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK	must_pass_in_stack_var_size
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE        moxie_pass_by_reference
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES        moxie_arg_partial_bytes
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG		moxie_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE	moxie_function_arg_advance
+
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS 	moxie_setup_incoming_varargs
+
+#undef	TARGET_FIXED_CONDITION_CODE_REGS
+#define	TARGET_FIXED_CONDITION_CODE_REGS moxie_fixed_condition_code_regs
+
+/* Define this to return an RTX representing the place where a
+   function returns or receives a value of data type RET_TYPE, a tree
+   node node representing a data type.  */
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE moxie_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE moxie_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P moxie_function_value_regno_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED hook_bool_void_true
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN moxie_static_chain
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE moxie_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT moxie_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE moxie_option_override
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-moxie.h"
diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h
new file mode 100644
index 000000000..41f66cb6b
--- /dev/null
+++ b/gcc/config/moxie/moxie.h
@@ -0,0 +1,500 @@
+/* Target Definitions for moxie.
+   Copyright (C) 2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by Anthony Green.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_MOXIE_H
+#define GCC_MOXIE_H
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0%O%s crti.o%s crtbegin.o%s"
+
+/* Provide an ENDFILE_SPEC appropriate for svr4.  Here we tack on our own
+   magical crtend.o file (see crtstuff.c) which provides part of the
+   support for getting C++ file-scope static object constructed before
+   entering `main', followed by the normal svr3/svr4 "finalizer" file,
+   which is either `gcrtn.o' or `crtn.o'.  */
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+/* Provide a LIB_SPEC appropriate for svr4.  Here we tack on the default
+   standard C library (unless we are building a shared library) and
+   the simulator BSP code.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:-lc}}"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/* Layout of Source Language Data Types */
+
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Registers...
+
+   $fp  - frame pointer
+   $sp  - stack pointer
+   $r0  - general purpose 32-bit register.
+   $r1  - general purpose 32-bit register.
+   $r2  - general purpose 32-bit register.
+   $r3  - general purpose 32-bit register.
+   $r4  - general purpose 32-bit register.
+   $r5  - general purpose 32-bit register.
+   $r6  - general purpose 32-bit register.
+   $r7  - general purpose 32-bit register.
+   $r8  - general purpose 32-bit register.
+   $r9  - general purpose 32-bit register.
+   $r10 - general purpose 32-bit register.
+   $r11 - general purpose 32-bit register.
+   $r12 - general purpose 32-bit register.
+   $r13 - reserved for execution environment.
+
+   Special Registers...
+
+   $pc - 32-bit program counter.
+   
+*/
+
+#define REGISTER_NAMES {	\
+  "$fp", "$sp", "$r0", "$r1",   \
+  "$r2", "$r3", "$r4", "$r5",   \
+  "$r6", "$r7", "$r8", "$r9",   \
+  "$r10", "$r11", "$r12", "$r13",   \
+  "?fp", "?ap", "$pc", "?cc" }
+
+#define MOXIE_FP     0
+#define MOXIE_SP     1
+#define MOXIE_R0     2
+#define MOXIE_R1     3 
+#define MOXIE_R2     4
+#define MOXIE_R3     5
+#define MOXIE_R4     6
+#define MOXIE_R5     7
+#define MOXIE_R6     8
+#define MOXIE_R7     9
+#define MOXIE_R8     10
+#define MOXIE_R9     11
+#define MOXIE_R10    12
+#define MOXIE_R11    13
+#define MOXIE_R12    14
+#define MOXIE_R13    15
+#define MOXIE_QFP    16
+#define MOXIE_QAP    17
+#define MOXIE_PC     18
+#define MOXIE_CC     19
+
+#define FIRST_PSEUDO_REGISTER 20
+
+enum reg_class
+{
+  NO_REGS,
+  GENERAL_REGS,
+  SPECIAL_REGS,
+  CC_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+#define IRA_COVER_CLASSES { GENERAL_REGS, LIM_REG_CLASSES }
+
+#define REG_CLASS_CONTENTS \
+{ { 0x00000000 }, /* Empty */			   \
+  { 0x0003FFFF }, /* $fp, $sp, $r0 to $r13, ?fp */ \
+  { 0x00040000 }, /* $pc */	                   \
+  { 0x00080000 }, /* ?cc */                        \
+  { 0x000FFFFF }  /* All registers */              \
+}
+
+#define N_REG_CLASSES LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES {\
+    "NO_REGS", \
+    "GENERAL_REGS", \
+    "SPECIAL_REGS", \
+    "CC_REGS", \
+    "ALL_REGS" }
+
+#define FIXED_REGISTERS     { 1, 1, 0, 0, \
+			      0, 0, 0, 0, \
+			      0, 0, 0, 0, \
+			      0, 0, 0, 1, \
+                              1, 1, 1, 1 }
+
+#define CALL_USED_REGISTERS { 1, 1, 1, 1, \
+			      1, 1, 1, 1, \
+			      0, 0, 0, 0, \
+			      0, 0, 1, 1, \
+                              1, 1, 1, 1 }
+
+/* We can't copy to or from our CC register. */
+#define AVOID_CCMODE_COPIES 1
+
+/* A C expression that is nonzero if it is permissible to store a
+   value of mode MODE in hard register number REGNO (or in several
+   registers starting with that one).  All gstore registers are 
+   equivalent, so we can set this to 1.  */
+#define HARD_REGNO_MODE_OK(R,M) 1
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  */
+#define REGNO_REG_CLASS(R) ((R < MOXIE_PC) ? GENERAL_REGS :		\
+                            (R == MOXIE_CC ? CC_REGS : SPECIAL_REGS))
+
+/* A C expression for the number of consecutive hard registers,
+   starting at register number REGNO, required to hold a value of mode
+   MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)			   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1)		   \
+   / UNITS_PER_WORD)
+
+/* A C expression that is nonzero if a value of mode MODE1 is
+   accessible in mode MODE2 without copying.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 1
+
+/* A C expression for the maximum number of consecutive registers of
+   class CLASS needed to hold a value of mode MODE.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* The Overall Framework of an Assembler File */
+
+#undef  ASM_SPEC
+#define ASM_COMMENT_START "#"
+#define ASM_APP_ON ""
+#define ASM_APP_OFF ""
+
+#define FILE_ASM_OP     "\t.file\n"
+
+/* Switch to the text or data segment.  */
+#define TEXT_SECTION_ASM_OP  "\t.text"
+#define DATA_SECTION_ASM_OP  "\t.data"
+
+/* Assembler Commands for Alignment */
+
+#define ASM_OUTPUT_ALIGN(STREAM,POWER) \
+	fprintf (STREAM, "\t.p2align\t%d\n", POWER);
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  */
+#define PRINT_OPERAND(STREAM, X, CODE) moxie_print_operand (STREAM, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(STREAM ,X) moxie_print_operand_address (STREAM, X)
+
+/* Output and Generation of Labels */
+
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Passing Arguments in Registers */
+
+/* A C type for declaring a variable that is used as the first
+   argument of `FUNCTION_ARG' and other related values.  */
+#define CUMULATIVE_ARGS unsigned int
+
+/* If defined, the maximum amount of space required for outgoing arguments
+   will be computed and placed into the variable
+   `current_function_outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue should
+   increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* A C statement (sans semicolon) for initializing the variable CUM
+   for the state at the beginning of the argument list.  
+   For moxie, the first arg is passed in register 2 (aka $r0).  */
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+  (CUM = MOXIE_R0)
+
+/* How Scalar Function Values Are Returned */
+
+/* STACK AND CALLING */
+
+/* Define this macro if pushing a word onto the stack moves the stack
+   pointer to a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+#define INITIAL_FRAME_POINTER_OFFSET(DEPTH) (DEPTH) = 0
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Define this if the above stack space is to be considered part of the
+   space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+#define STACK_PARMS_IN_REG_PARM_AREA
+
+/* Define this if it is the responsibility of the caller to allocate
+   the area reserved for arguments passed in registers.  */
+#define REG_PARM_STACK_SPACE(FNDECL) (6 * UNITS_PER_WORD)
+
+/* Offset from the argument pointer register to the first argument's
+   address.  On some machines it may depend on the data type of the
+   function.  */
+#define FIRST_PARM_OFFSET(F) 12
+
+/* Define this macro to nonzero value if the addresses of local variable slots
+   are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Define this macro as a C expression that is nonzero for registers that are
+   used by the epilogue or the return pattern.  The stack and frame
+   pointer registers are already assumed to be used as needed.  */
+#define EPILOGUE_USES(R) (R == MOXIE_R5)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before
+   the prologue.  */
+#define INCOMING_RETURN_ADDR_RTX					\
+  gen_frame_mem (Pmode,							\
+		 plus_constant (stack_pointer_rtx, UNITS_PER_WORD))
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	((N) < 4 ? (N+2) : INVALID_REGNUM)
+
+/* Store the return handler into the call frame.  */
+#define EH_RETURN_HANDLER_RTX						\
+  gen_frame_mem (Pmode,							\
+		 plus_constant (frame_pointer_rtx, UNITS_PER_WORD))
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+/* Alignment required for a function entry point, in bits.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Define this macro as a C expression which is nonzero if accessing
+   less than a word of memory (i.e. a `char' or a `short') is no
+   faster than accessing a word of memory.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Number of storage units in a word; normally the size of a
+   general-purpose register, a power of two from 1 or 8.  */
+#define UNITS_PER_WORD 4
+
+/* Define this macro to the minimum alignment enforced by hardware
+   for the stack pointer on this machine.  The definition is a C
+   expression for the desired alignment (measured in bits).  */
+#define STACK_BOUNDARY 32
+
+/* Normal alignment required for function parameters on the stack, in
+   bits.  All stack parameters receive at least this much alignment
+   regardless of data type.  */
+#define PARM_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 32
+
+/* Every structures size must be a multiple of 8 bits.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Look at the fundamental type that is used for a bit-field and use 
+   that to impose alignment on the enclosing structure.
+   struct s {int a:8}; should have same alignment as "int", not "char".  */
+#define	PCC_BITFIELD_TYPE_MATTERS	1
+
+/* Largest integer machine mode for structures.  If undefined, the default
+   is GET_MODE_SIZE(DImode).  */
+#define MAX_FIXED_MODE_SIZE 32
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+     
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Generating Code for Profiling */
+#define FUNCTION_PROFILER(FILE,LABELNO) (abort (), 0)
+
+/* Trampolines for Nested Functions.  */
+#define TRAMPOLINE_SIZE (2 + 6 + 6 + 2 + 2 + 6)
+
+/* Alignment required for trampolines, in bits.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/* An alias for the machine mode for pointers.  */
+#define Pmode         SImode
+
+/* An alias for the machine mode used for memory references to
+   functions being called, in `call' RTL expressions.  */
+#define FUNCTION_MODE QImode
+
+/* The register number of the stack pointer register, which must also
+   be a fixed register according to `FIXED_REGISTERS'.  */
+#define STACK_POINTER_REGNUM MOXIE_SP
+
+/* The register number of the frame pointer register, which is used to
+   access automatic variables in the stack frame.  */
+#define FRAME_POINTER_REGNUM MOXIE_QFP
+
+/* The register number of the arg pointer register, which is used to
+   access the function's argument list.  */
+#define ARG_POINTER_REGNUM MOXIE_QAP
+
+#define HARD_FRAME_POINTER_REGNUM MOXIE_FP
+
+#define ELIMINABLE_REGS							\
+{{ FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }}			
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  do {									\
+    (OFFSET) = moxie_initial_elimination_offset ((FROM), (TO));		\
+  } while (0)
+
+/* A C expression that is nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  */
+#define FUNCTION_ARG_REGNO_P(r) (r >= MOXIE_R0 && r <= MOXIE_R5)
+
+/* A macro whose definition is the name of the class to which a valid
+   base register must belong.  A base register is one used in an
+   address which is the register value plus a displacement.  */
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS NO_REGS
+
+#define HARD_REGNO_OK_FOR_BASE_P(NUM) \
+  ((unsigned) (NUM) < FIRST_PSEUDO_REGISTER \
+   && (REGNO_REG_CLASS(NUM) == GENERAL_REGS \
+       || (NUM) == HARD_FRAME_POINTER_REGNUM))
+
+/* A C expression which is nonzero if register number NUM is suitable
+   for use as a base register in operand addresses.  */
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(NUM)		 \
+  (HARD_REGNO_OK_FOR_BASE_P(NUM) 		 \
+   || HARD_REGNO_OK_FOR_BASE_P(reg_renumber[(NUM)]))
+#else
+#define REGNO_OK_FOR_BASE_P(NUM)		 \
+  ((NUM) >= FIRST_PSEUDO_REGISTER || HARD_REGNO_OK_FOR_BASE_P(NUM))
+#endif
+
+/* A C expression which is nonzero if register number NUM is suitable
+   for use as an index register in operand addresses.  */
+#define REGNO_OK_FOR_INDEX_P(NUM) MOXIE_FP
+
+/* The maximum number of bytes that a single instruction can move
+   quickly between memory and registers or between two memory
+   locations.  */
+#define MOVE_MAX 4
+#define TRULY_NOOP_TRUNCATION(op,ip) 1
+
+/* All load operations zero extend.  */
+#define LOAD_EXTEND_OP(MEM) ZERO_EXTEND
+
+/* A C expression that is nonzero if X is a legitimate constant for
+   an immediate operand on the target machine.  */
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+/* A number, the maximum number of registers that can appear in a
+   valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+#define TRULY_NOOP_TRUNCATION(op,ip) 1
+
+/* An alias for a machine mode name.  This is the machine mode that
+   elements of a jump-table should have.  */
+#define CASE_VECTOR_MODE SImode
+
+/* A C compound statement with a conditional `goto LABEL;' executed
+   if X (an RTX) is a legitimate memory address on the target machine
+   for a memory operand of mode MODE.  */
+#define GO_IF_LEGITIMATE_ADDRESS(MODE,X,LABEL)		\
+  do {                                                  \
+    if (GET_CODE(X) == PLUS)				\
+      {							\
+	rtx op1,op2;					\
+	op1 = XEXP(X,0);				\
+	op2 = XEXP(X,1);				\
+	if (GET_CODE(op1) == REG			\
+	    && CONSTANT_ADDRESS_P(op2)			\
+	    && REGNO_OK_FOR_BASE_P(REGNO(op1)))		\
+	  goto LABEL;					\
+      }							\
+    if (REG_P (X) && REGNO_OK_FOR_BASE_P (REGNO (X)))	\
+      goto LABEL;					\
+    if (GET_CODE (X) == SYMBOL_REF			\
+	|| GET_CODE (X) == LABEL_REF			\
+	|| GET_CODE (X) == CONST)			\
+      goto LABEL;					\
+  } while (0)
+
+/* Run-time Target Specification */
+
+#define TARGET_CPU_CPP_BUILTINS() \
+  { \
+    builtin_define_std ("moxie");		\
+    builtin_define_std ("MOXIE");		\
+  }
+
+#define HAS_LONG_UNCOND_BRANCH true
+
+#endif /* GCC_MOXIE_H */
diff --git a/gcc/config/moxie/moxie.md b/gcc/config/moxie/moxie.md
new file mode 100644
index 000000000..64f8395cb
--- /dev/null
+++ b/gcc/config/moxie/moxie.md
@@ -0,0 +1,450 @@
+;; Machine description for Moxie
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Anthony Green <green@moxielogic.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Moxie specific constraints, predicates and attributes
+;; -------------------------------------------------------------------------
+
+(include "constraints.md")
+(include "predicates.md")
+
+; Most instructions are two bytes long.
+(define_attr "length" "" (const_int 2))
+
+;; -------------------------------------------------------------------------
+;; nop instruction
+;; -------------------------------------------------------------------------
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; -------------------------------------------------------------------------
+;; Arithmetic instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	  (plus:SI
+	   (match_operand:SI 1 "register_operand" "0,0,0")
+	   (match_operand:SI 2 "moxie_add_operand" "I,N,r")))]
+  ""
+  "@
+  inc    %0, %2
+  dec	 %0, -%2
+  add.l  %0, %2")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	  (minus:SI
+	   (match_operand:SI 1 "register_operand" "0,0")
+	   (match_operand:SI 2 "moxie_sub_operand" "I,r")))]
+  ""
+  "@
+  dec    %0, %2
+  sub.l  %0, %2")
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (mult:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "mul.l  %0, %2")
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (div:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "div.l  %0, %2")
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (udiv:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "udiv.l %0, %2")
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (mod:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "mod.l  %0, %2")
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (umod:SI
+	   (match_operand:SI 1 "register_operand" "0")
+	   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "umod.l %0, %2")
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg    %0, %1")
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "not    %0, %1")
+
+;; -------------------------------------------------------------------------
+;; Logical operators
+;; -------------------------------------------------------------------------
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "and    %0, %2";
+})
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "xor    %0, %2";
+})
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "or     %0, %2";
+})
+
+;; -------------------------------------------------------------------------
+;; Shifters
+;; -------------------------------------------------------------------------
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "ashl   %0, %2";
+})
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "ashr   %0, %2";
+})
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+		     (match_operand:SI 2 "register_operand" "r")))]
+  ""
+{
+  return "lshr   %0, %2";
+})
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; SImode
+
+;; Push a register onto the stack
+(define_insn "movsi_push"
+  [(set (mem:SI (pre_dec:SI (reg:SI 1)))
+  	(match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "push   $sp, %0")
+
+;; Pop a register from the stack
+(define_insn "movsi_pop"
+  [(set (match_operand:SI 1 "register_operand" "=r")
+  	(mem:SI (post_inc:SI (match_operand:SI 0 "register_operand" "r"))))]
+  ""
+  "pop    %0, %1")
+
+(define_expand "movsi"
+   [(set (match_operand:SI 0 "general_operand" "")
+ 	(match_operand:SI 1 "general_operand" ""))]
+   ""
+  "
+{
+  /* If this is a store, force the value into a register.  */
+  if (! (reload_in_progress || reload_completed))
+  {
+    if (MEM_P (operands[0]))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      if (MEM_P (XEXP (operands[0], 0)))
+        operands[0] = gen_rtx_MEM (SImode, force_reg (SImode, XEXP (operands[0], 0)));
+    }
+    else 
+      if (MEM_P (operands[1])
+          && MEM_P (XEXP (operands[1], 0)))
+        operands[1] = gen_rtx_MEM (SImode, force_reg (SImode, XEXP (operands[1], 0)));
+  }
+}")
+
+(define_insn "*movsi"
+  [(set (match_operand:SI 0 "general_operand" "=r,r,r,W,A,r,r,B,r")
+	(match_operand:SI 1 "moxie_general_movsrc_operand" "O,r,i,r,r,W,A,r,B"))]
+  "register_operand (operands[0], SImode)
+   || register_operand (operands[1], SImode)"
+  "@
+   xor    %0, %0
+   mov    %0, %1
+   ldi.l  %0, %1
+   st.l   %0, %1
+   sta.l  %0, %1
+   ld.l   %0, %1
+   lda.l  %0, %1
+   sto.l  %0, %1
+   ldo.l  %0, %1"
+  [(set_attr "length"	"2,2,6,2,6,2,6,6,6")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* If this is a store, force the value into a register.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (QImode, operands[1]);
+}")
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "general_operand" "=r,r,r,W,A,r,r,B,r")
+	(match_operand:QI 1 "moxie_general_movsrc_operand" "O,r,i,r,r,W,A,r,B"))]
+  "register_operand (operands[0], QImode)
+   || register_operand (operands[1], QImode)"
+  "@
+   xor    %0, %0
+   mov    %0, %1
+   ldi.b  %0, %1
+   st.b   %0, %1
+   sta.b  %0, %1
+   ld.b   %0, %1
+   lda.b  %0, %1
+   sto.b  %0, %1
+   ldo.b  %0, %1"
+  [(set_attr "length"	"2,2,6,2,6,2,6,6,6")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* If this is a store, force the value into a register.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (HImode, operands[1]);
+}")
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "general_operand" "=r,r,r,W,A,r,r,B,r")
+	(match_operand:HI 1 "moxie_general_movsrc_operand" "O,r,i,r,r,W,A,r,B"))]
+  "(register_operand (operands[0], HImode)
+    || register_operand (operands[1], HImode))"
+  "@
+   xor    %0, %0
+   mov    %0, %1
+   ldi.s  %0, %1
+   st.s   %0, %1
+   sta.s  %0, %1
+   ld.s   %0, %1
+   lda.s  %0, %1
+   sto.s  %0, %1
+   ldo.s  %0, %1"
+  [(set_attr "length"	"2,2,6,2,6,2,6,6,6")])
+
+;; -------------------------------------------------------------------------
+;; Compare instructions
+;; -------------------------------------------------------------------------
+
+(define_constants
+  [(CC_REG 11)])
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC CC_REG)
+        (compare:CC
+         (match_operand:SI 1 "general_operand" "")
+         (match_operand:SI 2 "general_operand" "")))
+   (set (pc)
+        (if_then_else (match_operator:CC 0 "comparison_operator"
+                       [(reg:CC CC_REG) (const_int 0)])
+                      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  ""
+  "
+  /* Force the compare operands into registers.  */
+  if (GET_CODE (operands[1]) != REG)
+	operands[1] = force_reg (SImode, operands[1]);
+  if (GET_CODE (operands[2]) != REG)
+	operands[2] = force_reg (SImode, operands[2]);
+  ")
+
+(define_insn "*cmpsi"
+  [(set (reg:CC CC_REG)
+	(compare
+	 (match_operand:SI 0 "register_operand" "r")
+	 (match_operand:SI 1 "register_operand"	"r")))]
+  ""
+  "cmp    %0, %1")
+
+
+;; -------------------------------------------------------------------------
+;; Branch instructions
+;; -------------------------------------------------------------------------
+
+(define_code_iterator cond [ne eq lt ltu gt gtu ge le geu leu])
+(define_code_attr CC [(ne "ne") (eq "eq") (lt "lt") (ltu "ltu") 
+		      (gt "gt") (gtu "gtu") (ge "ge") (le "le") 
+		      (geu "geu") (leu "leu") ])
+(define_code_attr rCC [(ne "eq") (eq "ne") (lt "ge") (ltu "geu") 
+		       (gt "le") (gtu "leu") (ge "lt") (le "gt") 
+		       (geu "ltu") (leu "gtu") ])
+
+(define_insn "*b<cond:code>"
+  [(set (pc)
+	(if_then_else (cond (reg:CC CC_REG)
+			    (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+{
+  if (get_attr_length (insn) == 2)
+    return "b<CC>   %l0";
+  else
+    return "b<rCC>   .+6\n\tjmpa   %l0";
+}
+  [(set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 1022))
+                      (const_int 2) (const_int 8)))])
+
+;; -------------------------------------------------------------------------
+;; Call and Jump instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+		(match_operand 1 "general_operand" ""))]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+})
+
+(define_insn "*call"
+  [(call (mem:QI (match_operand:SI
+		  0 "nonmemory_operand" "i,r"))
+	 (match_operand 1 "" ""))]
+  ""
+  "@
+   jsra   %0
+   jsr    %0"
+  [(set_attr "length"	"6,2")])
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+		(call (match_operand:QI 1 "memory_operand" "")
+		 (match_operand 2 "" "")))]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+})
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:QI (match_operand:SI
+		       1 "immediate_operand" "i"))
+	      (match_operand 2 "" "")))]
+  ""
+  "jsra   %1"
+  [(set_attr "length"	"6")])
+
+(define_insn "*call_value_indirect"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:QI (match_operand:SI
+		       1 "register_operand" "r"))
+	      (match_operand 2 "" "")))]
+  ""
+  "jsr    %1")
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "r"))]
+  ""
+  "jmp    %0")
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jmpa   %l0"
+  [(set_attr "length"	"6")])
+
+
+;; -------------------------------------------------------------------------
+;; Prologue & Epilogue
+;; -------------------------------------------------------------------------
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+  "
+{
+  moxie_expand_prologue ();
+  DONE;
+}
+")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  moxie_expand_epilogue ();
+  DONE;
+}
+")
+
+(define_insn "returner"
+  [(return)]
+  "reload_completed"
+  "ret")
diff --git a/gcc/config/moxie/predicates.md b/gcc/config/moxie/predicates.md
new file mode 100644
index 000000000..f0595c011
--- /dev/null
+++ b/gcc/config/moxie/predicates.md
@@ -0,0 +1,55 @@
+;; Predicate definitions for Moxie
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Anthony Green <green@moxielogic.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+;; Predicates
+;; -------------------------------------------------------------------------
+
+;; Nonzero if OP can be source of a simple move operation.
+
+(define_predicate "moxie_general_movsrc_operand"
+  (match_code "mem,const_int,reg,subreg,symbol_ref,label_ref,const")
+{
+  /* Any (MEM LABEL_REF) is OK.  That is a pc-relative load.  */
+  if (MEM_P (op) && GET_CODE (XEXP (op, 0)) == LABEL_REF)
+    return 1;
+
+  if (MEM_P (op)
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)
+    return 1;
+
+  return general_operand (op, mode);
+})
+
+;; Nonzero if OP can be an operand to an add/inc/dec instruction.
+
+(define_predicate "moxie_add_operand"
+  (ior (match_code "reg")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), -255, 255)"))))
+
+;; Nonzero if OP can be an operand to an sub/dec instruction.
+
+(define_predicate "moxie_sub_operand"
+  (ior (match_code "reg")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), 0, 255)"))))
+\ No newline at end of file
diff --git a/gcc/config/moxie/rtems.h b/gcc/config/moxie/rtems.h
new file mode 100644
index 000000000..f42baf3f7
--- /dev/null
+++ b/gcc/config/moxie/rtems.h
@@ -0,0 +1,35 @@
+/* Definitions for rtems targeting the Moxie core.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Anthony Green (green@moxielogic.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS preprocessor built-ins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("moxie");		\
+      builtin_define ("__rtems__");		\
+      builtin_assert ("system=rtems");		\
+    }						\
+  while (0)
+
+#undef LINK_SPEC
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+#undef WCHAR_TYPE
+#undef WCHAR_TYPE_SIZE
diff --git a/gcc/config/moxie/sfp-machine.h b/gcc/config/moxie/sfp-machine.h
new file mode 100644
index 000000000..98f9f1bf4
--- /dev/null
+++ b/gcc/config/moxie/sfp-machine.h
@@ -0,0 +1,57 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+# define __BYTE_ORDER __BIG_ENDIAN
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
diff --git a/gcc/config/moxie/t-moxie b/gcc/config/moxie/t-moxie
new file mode 100644
index 000000000..5498ecbb3
--- /dev/null
+++ b/gcc/config/moxie/t-moxie
@@ -0,0 +1,20 @@
+# Target Makefile Fragment for moxie
+# Copyright (C) 2008  Free Software Foundation, Inc.
+# Contributed by Anthony Green.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
diff --git a/gcc/config/moxie/t-moxie-softfp b/gcc/config/moxie/t-moxie-softfp
new file mode 100644
index 000000000..61c575132
--- /dev/null
+++ b/gcc/config/moxie/t-moxie-softfp
@@ -0,0 +1,9 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := moxie/sfp-machine.h
+softfp_exclude_libgcc2 := y
+
+# softfp seems to be missing a whole bunch of prototypes.
+TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
diff --git a/gcc/config/moxie/uclinux.h b/gcc/config/moxie/uclinux.h
new file mode 100644
index 000000000..0e5ee3d6b
--- /dev/null
+++ b/gcc/config/moxie/uclinux.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1%O%s} crti%O%s crtbegin%O%s"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-elf2flt"
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+/* Like the definition in gcc.c, but for purposes of uClinux, every link is
+   static.  */
+#define MFWRAP_SPEC " %{fmudflap|fmudflapth: \
+ --wrap=malloc --wrap=free --wrap=calloc --wrap=realloc\
+ --wrap=mmap --wrap=munmap --wrap=alloca\
+ %{fmudflapth: --wrap=pthread_create\
+}} %{fmudflap|fmudflapth: --wrap=main}"
diff --git a/gcc/config/netbsd-aout.h b/gcc/config/netbsd-aout.h
new file mode 100644
index 000000000..7306c156e
--- /dev/null
+++ b/gcc/config/netbsd-aout.h
@@ -0,0 +1,196 @@
+/* Common configuration file for NetBSD a.out targets.
+   Copyright (C) 2002, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* TARGET_OS_CPP_BUILTINS() common to all NetBSD a.out targets.  */
+#define NETBSD_OS_CPP_BUILTINS_AOUT()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_COMMON();		\
+    }						\
+  while (0)
+
+
+/* Provide an ASM_SPEC appropriate for NetBSD.  Currently we only deal
+   with the options for generating PIC code.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
+
+/* Provide a STARTFILE_SPEC appropriate for NetBSD a.out.  Here we
+   provide support for the special GCC option -static.  */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC			\
+  "%{!shared:				\
+     %{pg:gcrt0%O%s}			\
+     %{!pg:				\
+       %{p:mcrt0%O%s}			\
+       %{!p:				\
+	 %{!static:crt0%O%s}		\
+	 %{static:scrt0%O%s}}}}"
+
+/* Provide a LINK_SPEC appropriate for NetBSD a.out.  Here we provide
+   support for the special GCC options -static, -assert, and -nostdlib.  */
+
+#undef NETBSD_LINK_SPEC_AOUT
+#define NETBSD_LINK_SPEC_AOUT		\
+  "%{nostdlib:-nostdlib}		\
+   %{!shared:				\
+     %{!nostdlib:			\
+       %{!r:				\
+	 %{!e*:-e start}}}		\
+     -dc -dp				\
+     %{static:-Bstatic}}		\
+   %{shared:-Bshareable}		\
+   %{R*}				\
+   %{assert*}"
+
+/* Default LINK_SPEC.  */
+#undef LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_AOUT
+
+/* Define the strings used for the .type, .size, and .set directives.
+   These strings generally do not vary from one system running NetBSD
+   to another, but if a given system needs to use different pseudo-op
+   names for these, they may be overridden in the file included after
+   this one.  */
+
+#undef TYPE_ASM_OP
+#undef SIZE_ASM_OP
+#undef SET_ASM_OP                
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+#define SET_ASM_OP	"\t.set\t"
+
+
+/* This is how we tell the assembler that a symbol is weak.  */
+
+#undef ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE,NAME)					\
+  do 									\
+    {									\
+      fputs ("\t.globl\t", FILE); assemble_name (FILE, NAME);		\
+      fputc ('\n', FILE);						\
+      fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME);		\
+      fputc ('\n', FILE);						\
+    }									\
+  while (0)
+
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms of this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending on the particulars of your assembler).  */
+
+#undef TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+
+
+/* Write the extra assembler code needed to declare a function's result.
+   Most svr4 assemblers don't require any special declaration of the
+   result value, but there are exceptions.  */
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries in an ELF object file under SVR4 (and a.out on NetBSD).
+   These macros also output the starting labels for the relevant
+   functions/objects.  */
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  do									\
+    {									\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");		\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));			\
+      ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);			\
+    }									\
+  while (0)
+
+
+/* Write the extra assembler code needed to declare an object properly.  */
+
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+								\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+								\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set
+   by ASM_DECLARE_OBJECT_NAME when it was run for the same decl.  */
+
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	\
+  do									\
+    {									\
+      const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		\
+      HOST_WIDE_INT size;						\
+      if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		\
+	  && ! AT_END && TOP_LEVEL					\
+	  && DECL_INITIAL (DECL) == error_mark_node			\
+	  && !size_directive_output)					\
+	{								\
+	  size_directive_output = 1;					\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));			\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			\
+	}								\
+    }									\
+  while (0)
+
+
+/* This is how to declare the size of a function.  */
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do									\
+    {									\
+      if (!flag_inhibit_size_directive)					\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);				\
+    }									\
+  while (0)
diff --git a/gcc/config/netbsd-elf.h b/gcc/config/netbsd-elf.h
new file mode 100644
index 000000000..4fd55c189
--- /dev/null
+++ b/gcc/config/netbsd-elf.h
@@ -0,0 +1,86 @@
+/* Common configuration file for NetBSD ELF targets.
+   Copyright (C) 2002, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* TARGET_OS_CPP_BUILTINS() common to all NetBSD ELF targets.  */
+#define NETBSD_OS_CPP_BUILTINS_ELF()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_COMMON();		\
+    }						\
+  while (0)
+
+/* Provide a STARTFILE_SPEC appropriate for NetBSD ELF.  Here we
+   provide support for the special GCC option -static.  On ELF
+   targets, we also add the crtbegin.o file, which provides part
+   of the support for getting C++ file-scope static objects
+   constructed before entering "main".  */
+
+#define NETBSD_STARTFILE_SPEC	\
+  "%{!shared:			\
+     %{pg:gcrt0%O%s}		\
+     %{!pg:			\
+       %{p:gcrt0%O%s}		\
+       %{!p:crt0%O%s}}}		\
+   %:if-exists(crti%O%s)	\
+   %{static:%:if-exists-else(crtbeginT%O%s crtbegin%O%s)} \
+   %{!static: \
+     %{!shared:crtbegin%O%s} %{shared:crtbeginS%O%s}}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC NETBSD_STARTFILE_SPEC
+
+
+/* Provide an ENDFILE_SPEC appropriate for NetBSD ELF.  Here we
+   add crtend.o, which provides part of the support for getting
+   C++ file-scope static objects deconstructed after exiting "main".  */
+
+#define NETBSD_ENDFILE_SPEC	\
+  "%{!shared:crtend%O%s} %{shared:crtendS%O%s} \
+   %:if-exists(crtn%O%s)"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC NETBSD_ENDFILE_SPEC
+
+/* Provide a LINK_SPEC appropriate for NetBSD ELF.  Here we provide
+   support for the special GCC options -assert, -R, -rpath, -shared,
+   -nostdlib, -static, -rdynamic, and -dynamic-linker.
+
+   Target-specific code can use this in conjunction with any other
+   target-specific LINK_SPEC options.
+
+   Target-specific code must provide the %(netbsd_entry_point) spec.  */
+
+#define NETBSD_LINK_SPEC_ELF \
+  "%{assert*} %{R*} %{rpath*} \
+   %{shared:-shared} \
+   %{!shared: \
+     -dc -dp \
+     %{!nostdlib: \
+       %{!r: \
+	 %{!e*:-e %(netbsd_entry_point)}}} \
+     %{!static: \
+       %{rdynamic:-export-dynamic} \
+       -dynamic-linker /usr/libexec/ld.elf_so} \
+     %{static:-static}}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
diff --git a/gcc/config/netbsd-elf.opt b/gcc/config/netbsd-elf.opt
new file mode 100644
index 000000000..1b468c631
--- /dev/null
+++ b/gcc/config/netbsd-elf.opt
@@ -0,0 +1,33 @@
+; NetBSD ELF-only options.
+
+; Copyright (C) 2010
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rdynamic
+Driver
+
+rpath=
+Driver JoinedOrMissing
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/netbsd.h b/gcc/config/netbsd.h
new file mode 100644
index 000000000..ddf6567dc
--- /dev/null
+++ b/gcc/config/netbsd.h
@@ -0,0 +1,222 @@
+/* Base configuration file for all NetBSD targets.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+   2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* TARGET_OS_CPP_BUILTINS() common to all NetBSD targets.  */
+#define NETBSD_OS_CPP_BUILTINS_COMMON()		\
+  do						\
+    {						\
+      builtin_define ("__NetBSD__");		\
+      builtin_define ("__unix__");		\
+      builtin_assert ("system=bsd");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=NetBSD");		\
+    }						\
+  while (0)
+
+/* CPP_SPEC parts common to all NetBSD targets.  */
+#define NETBSD_CPP_SPEC				\
+  "%{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS}"
+
+/* NETBSD_NATIVE is defined when gcc is integrated into the NetBSD
+   source tree so it can be configured appropriately without using
+   the GNU configure/build mechanism.  */
+
+#ifdef NETBSD_NATIVE
+
+/* Look for the include files in the system-defined places.  */
+
+#undef GPLUSPLUS_INCLUDE_DIR
+#define GPLUSPLUS_INCLUDE_DIR "/usr/include/g++"
+
+#undef GCC_INCLUDE_DIR
+#define GCC_INCLUDE_DIR "/usr/include"
+
+#undef INCLUDE_DEFAULTS
+#define INCLUDE_DEFAULTS			\
+  {						\
+    { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1 },	\
+    { GCC_INCLUDE_DIR, "GCC", 0, 0 },		\
+    { 0, 0, 0, 0 }				\
+  }
+
+/* Under NetBSD, the normal location of the compiler back ends is the
+   /usr/libexec directory.  */
+
+#undef STANDARD_EXEC_PREFIX
+#define STANDARD_EXEC_PREFIX		"/usr/libexec/"
+
+/* Under NetBSD, the normal location of the various *crt*.o files is the
+   /usr/lib directory.  */
+
+#undef STANDARD_STARTFILE_PREFIX
+#define STANDARD_STARTFILE_PREFIX	"/usr/lib/"
+
+#endif /* NETBSD_NATIVE */
+
+
+/* Provide a LIB_SPEC appropriate for NetBSD.  Here we:
+
+   1. Select the appropriate set of libs, depending on whether we're
+      profiling.
+
+   2. Include the pthread library if -pthread is specified (only
+      if threads are enabled).
+
+   3. Include the posix library if -posix is specified.
+
+   FIXME: Could eliminate the duplication here if we were allowed to
+   use string concatenation.  */
+
+#ifdef NETBSD_ENABLE_PTHREADS
+#define NETBSD_LIB_SPEC		\
+  "%{pthread:			\
+     %{!p:			\
+       %{!pg:-lpthread}}	\
+     %{p:-lpthread_p}		\
+     %{pg:-lpthread_p}}		\
+   %{posix:			\
+     %{!p:			\
+       %{!pg:-lposix}}		\
+     %{p:-lposix_p}		\
+     %{pg:-lposix_p}}		\
+   %{!shared:			\
+     %{!symbolic:		\
+       %{!p:			\
+	 %{!pg:-lc}}		\
+       %{p:-lc_p}		\
+       %{pg:-lc_p}}}"
+#else
+#define NETBSD_LIB_SPEC		\
+  "%{posix:			\
+     %{!p:			\
+       %{!pg:-lposix}}		\
+     %{p:-lposix_p}		\
+     %{pg:-lposix_p}}		\
+   %{!shared:			\
+     %{!symbolic:		\
+       %{!p:			\
+	 %{!pg:-lc}}		\
+       %{p:-lc_p}		\
+       %{pg:-lc_p}}}"
+#endif
+
+#undef LIB_SPEC
+#define LIB_SPEC NETBSD_LIB_SPEC
+
+/* Provide a LIBGCC_SPEC appropriate for NetBSD.  We also want to exclude
+   libgcc with -symbolic.  */
+
+#ifdef NETBSD_NATIVE
+#define NETBSD_LIBGCC_SPEC	\
+  "%{!symbolic:			\
+     %{!shared:			\
+       %{!p:			\
+	 %{!pg: -lgcc}}}	\
+     %{shared: -lgcc_pic}	\
+     %{p: -lgcc_p}		\
+     %{pg: -lgcc_p}}"
+#else
+#define NETBSD_LIBGCC_SPEC "%{!shared:%{!symbolic: -lgcc}}"
+#endif
+
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC NETBSD_LIBGCC_SPEC
+
+/* When building shared libraries, the initialization and finalization 
+   functions for the library are .init and .fini respectively.  */
+
+#define COLLECT_SHARED_INIT_FUNC(STREAM,FUNC)				\
+  do {									\
+    fprintf ((STREAM), "void __init() __asm__ (\".init\");");		\
+    fprintf ((STREAM), "void __init() {\n\t%s();\n}\n", (FUNC));	\
+  } while (0)
+
+#define COLLECT_SHARED_FINI_FUNC(STREAM,FUNC)				\
+  do {									\
+    fprintf ((STREAM), "void __fini() __asm__ (\".fini\");");		\
+    fprintf ((STREAM), "void __fini() {\n\t%s();\n}\n", (FUNC));	\
+  } while (0)
+
+#undef TARGET_POSIX_IO
+#define TARGET_POSIX_IO
+
+/* Don't assume anything about the header files.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C    1
+
+/* Define some types that are the same on all NetBSD platforms,
+   making them agree with <machine/ansi.h>.  */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+
+/* Attempt to turn on execute permission for the stack.  This may be
+   used by TARGET_TRAMPOLINE_INIT if the target needs it (that is,
+   if the target machine can change execute permissions on a page).
+
+   There is no way to query the execute permission of the stack, so
+   we always issue the mprotect() call.
+
+   Note that we go out of our way to use namespace-non-invasive calls
+   here.  Unfortunately, there is no libc-internal name for mprotect().
+
+   Also note that no errors should be emitted by this code; it is considered
+   dangerous for library calls to send messages to stdout/stderr.  */
+
+#define NETBSD_ENABLE_EXECUTE_STACK					\
+extern void __enable_execute_stack (void *);				\
+void									\
+__enable_execute_stack (void *addr)					\
+{									\
+  extern int mprotect (void *, size_t, int);				\
+  extern int __sysctl (int *, unsigned int, void *, size_t *,		\
+		       void *, size_t);					\
+									\
+  static int size;							\
+  static long mask;							\
+									\
+  char *page, *end;							\
+									\
+  if (size == 0)							\
+    {									\
+      int mib[2];							\
+      size_t len;							\
+									\
+      mib[0] = 6; /* CTL_HW */						\
+      mib[1] = 7; /* HW_PAGESIZE */					\
+      len = sizeof (size);						\
+      (void) __sysctl (mib, 2, &size, &len, NULL, 0);			\
+      mask = ~((long) size - 1);					\
+    }									\
+									\
+  page = (char *) (((long) addr) & mask);				\
+  end  = (char *) ((((long) (addr + TRAMPOLINE_SIZE)) & mask) + size);	\
+									\
+  /* 7 == PROT_READ | PROT_WRITE | PROT_EXEC */				\
+  (void) mprotect (page, end - page, 7);				\
+}
diff --git a/gcc/config/netbsd.opt b/gcc/config/netbsd.opt
new file mode 100644
index 000000000..f90fb029e
--- /dev/null
+++ b/gcc/config/netbsd.opt
@@ -0,0 +1,36 @@
+; NetBSD options.
+
+; Copyright (C) 2010
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+assert=
+Driver JoinedOrMissing
+
+posix
+Driver
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/newlib-stdint.h b/gcc/config/newlib-stdint.h
new file mode 100644
index 000000000..3bc8a5913
--- /dev/null
+++ b/gcc/config/newlib-stdint.h
@@ -0,0 +1,64 @@
+/* Definitions for <stdint.h> types on systems using newlib.
+   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* newlib uses 32-bit long in certain cases for all non-SPU
+   targets.  */
+#ifndef STDINT_LONG32
+#define STDINT_LONG32 (LONG_TYPE_SIZE == 32)
+#endif
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* The newlib logic actually checks for sizes greater than 32 rather
+   than equal to 64 for various 64-bit types.  */
+
+#define INT8_TYPE (CHAR_TYPE_SIZE == 8 ? "signed char" : 0)
+#define INT16_TYPE (SHORT_TYPE_SIZE == 16 ? "short int" : INT_TYPE_SIZE == 16 ? "int" : CHAR_TYPE_SIZE == 16 ? "signed char" : 0)
+#define INT32_TYPE (STDINT_LONG32 ? "long int" : INT_TYPE_SIZE == 32 ? "int" : SHORT_TYPE_SIZE == 32 ? "short int" : CHAR_TYPE_SIZE == 32 ? "signed char" : 0)
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : LONG_LONG_TYPE_SIZE == 64 ? "long long int" : INT_TYPE_SIZE == 64 ? "int" : 0)
+#define UINT8_TYPE (CHAR_TYPE_SIZE == 8 ? "unsigned char" : 0)
+#define UINT16_TYPE (SHORT_TYPE_SIZE == 16 ? "short unsigned int" : INT_TYPE_SIZE == 16 ? "unsigned int" : CHAR_TYPE_SIZE == 16 ? "unsigned char" : 0)
+#define UINT32_TYPE (STDINT_LONG32 ? "long unsigned int" : INT_TYPE_SIZE == 32 ? "unsigned int" : SHORT_TYPE_SIZE == 32 ? "short unsigned int" : CHAR_TYPE_SIZE == 32 ? "unsigned char" : 0)
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : LONG_LONG_TYPE_SIZE == 64 ? "long long unsigned int" : INT_TYPE_SIZE == 64 ? "unsigned int" : 0)
+
+#define INT_LEAST8_TYPE (INT8_TYPE ? INT8_TYPE : INT16_TYPE ? INT16_TYPE : INT32_TYPE ? INT32_TYPE : INT64_TYPE ? INT64_TYPE : 0)
+#define INT_LEAST16_TYPE (INT16_TYPE ? INT16_TYPE : INT32_TYPE ? INT32_TYPE : INT64_TYPE ? INT64_TYPE : 0)
+#define INT_LEAST32_TYPE (INT32_TYPE ? INT32_TYPE : INT64_TYPE ? INT64_TYPE : 0)
+#define INT_LEAST64_TYPE INT64_TYPE
+#define UINT_LEAST8_TYPE (UINT8_TYPE ? UINT8_TYPE : UINT16_TYPE ? UINT16_TYPE : UINT32_TYPE ? UINT32_TYPE : UINT64_TYPE ? UINT64_TYPE : 0)
+#define UINT_LEAST16_TYPE (UINT16_TYPE ? UINT16_TYPE : UINT32_TYPE ? UINT32_TYPE : UINT64_TYPE ? UINT64_TYPE : 0)
+#define UINT_LEAST32_TYPE (UINT32_TYPE ? UINT32_TYPE : UINT64_TYPE ? UINT64_TYPE : 0)
+#define UINT_LEAST64_TYPE UINT64_TYPE
+
+#define INT_FAST8_TYPE (INT_TYPE_SIZE >= 8 ? "int" : INT_LEAST8_TYPE)
+#define INT_FAST16_TYPE (INT_TYPE_SIZE >= 16 ? "int" : INT_LEAST16_TYPE)
+#define INT_FAST32_TYPE (INT_TYPE_SIZE >= 32 ? "int" : INT_LEAST32_TYPE)
+#define INT_FAST64_TYPE (INT_TYPE_SIZE >= 64 ? "int" : INT_LEAST64_TYPE)
+#define UINT_FAST8_TYPE (INT_TYPE_SIZE >= 8 ? "unsigned int" : UINT_LEAST8_TYPE)
+#define UINT_FAST16_TYPE (INT_TYPE_SIZE >= 16 ? "unsigned int" : UINT_LEAST16_TYPE)
+#define UINT_FAST32_TYPE (INT_TYPE_SIZE >= 32 ? "unsigned int" : UINT_LEAST32_TYPE)
+#define UINT_FAST64_TYPE (INT_TYPE_SIZE >= 64 ? "unsigned int" : UINT_LEAST64_TYPE)
+
+/* Newlib uses the unsigned type corresponding to ptrdiff_t for
+   uintptr_t; this is the same as size_t for most newlib-using
+   targets.  */
+#define INTPTR_TYPE PTRDIFF_TYPE
+#ifndef UINTPTR_TYPE
+#define UINTPTR_TYPE SIZE_TYPE
+#endif
diff --git a/gcc/config/openbsd-libpthread.h b/gcc/config/openbsd-libpthread.h
new file mode 100644
index 000000000..104e64528
--- /dev/null
+++ b/gcc/config/openbsd-libpthread.h
@@ -0,0 +1,22 @@
+/* LIB_SPEC appropriate for OpenBSD.  Include -lpthread if -pthread is
+   specified on the command line. */
+/*   Copyright (C) 2004 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define OBSD_LIB_SPEC "%{!shared:%{pthread:-lpthread} -lc}"
+
diff --git a/gcc/config/openbsd-oldgas.h b/gcc/config/openbsd-oldgas.h
new file mode 100644
index 000000000..e5750ec85
--- /dev/null
+++ b/gcc/config/openbsd-oldgas.h
@@ -0,0 +1,22 @@
+/* Generic settings for a.out OpenBSD systems.
+   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define OBSD_OLD_GAS
diff --git a/gcc/config/openbsd-stdint.h b/gcc/config/openbsd-stdint.h
new file mode 100644
index 000000000..ab1f9cfff
--- /dev/null
+++ b/gcc/config/openbsd-stdint.h
@@ -0,0 +1,31 @@
+#define SIG_ATOMIC_TYPE		"int"
+ 
+#define INT8_TYPE		"signed char"
+#define INT16_TYPE		"short int"
+#define INT32_TYPE		"int"
+#define INT64_TYPE		"long long int"
+#define UINT8_TYPE		"unsigned char"
+#define UINT16_TYPE		"short unsigned int"
+#define UINT32_TYPE		"unsigned int"
+#define UINT64_TYPE		"long long unsigned int"
+ 
+#define INT_LEAST8_TYPE		"signed char"
+#define INT_LEAST16_TYPE	"short int"
+#define INT_LEAST32_TYPE	"int"
+#define INT_LEAST64_TYPE	"long long int"
+#define UINT_LEAST8_TYPE	"unsigned char"
+#define UINT_LEAST16_TYPE	"short unsigned int"
+#define UINT_LEAST32_TYPE	"unsigned int"
+#define UINT_LEAST64_TYPE	"long long unsigned int"
+ 
+#define INT_FAST8_TYPE		"int"
+#define INT_FAST16_TYPE		"int"
+#define INT_FAST32_TYPE		"int"
+#define INT_FAST64_TYPE		"long long int"
+#define UINT_FAST8_TYPE		"unsigned int"
+#define UINT_FAST16_TYPE	"unsigned int"
+#define UINT_FAST32_TYPE	"unsigned int"
+#define UINT_FAST64_TYPE	"long long unsigned int"
+ 
+#define INTPTR_TYPE		"long int"
+#define UINTPTR_TYPE		"long unsigned int"
diff --git a/gcc/config/openbsd.h b/gcc/config/openbsd.h
new file mode 100644
index 000000000..ae08ed8b5
--- /dev/null
+++ b/gcc/config/openbsd.h
@@ -0,0 +1,300 @@
+/* Base configuration file for all OpenBSD targets.
+   Copyright (C) 1999, 2000, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Common OpenBSD configuration. 
+   All OpenBSD architectures include this file, which is intended as
+   a repository for common defines. 
+
+   Some defines are common to all architectures, a few of them are
+   triggered by OBSD_* guards, so that we won't override architecture
+   defaults by mistakes.
+
+   OBSD_HAS_CORRECT_SPECS: 
+      another mechanism provides correct specs already.
+   OBSD_NO_DYNAMIC_LIBRARIES: 
+      no implementation of dynamic libraries.
+   OBSD_OLD_GAS: 
+      older flavor of gas which needs help for PIC.
+   OBSD_HAS_DECLARE_FUNCTION_NAME, OBSD_HAS_DECLARE_FUNCTION_SIZE,
+   OBSD_HAS_DECLARE_OBJECT: 
+      PIC support, FUNCTION_NAME/FUNCTION_SIZE are independent, whereas
+      the corresponding logic for OBJECTS is necessarily coupled.
+
+   There are also a few `default' defines such as ASM_WEAKEN_LABEL,
+   intended as common ground for arch that don't provide 
+   anything suitable.  */
+
+/* OPENBSD_NATIVE is defined only when gcc is configured as part of
+   the OpenBSD source tree, specifically through Makefile.bsd-wrapper.
+
+   In such a case the include path can be trimmed as there is no
+   distinction between system includes and gcc includes.  */
+
+/* This configuration method, namely Makefile.bsd-wrapper and
+   OPENBSD_NATIVE is NOT recommended for building cross-compilers.  */
+
+#ifdef OPENBSD_NATIVE
+
+/* The compiler is configured with ONLY the gcc/g++ standard headers.  */
+#undef INCLUDE_DEFAULTS
+#define INCLUDE_DEFAULTS			\
+  {						\
+    { GPLUSPLUS_INCLUDE_DIR, "G++", 1, 1 },	\
+    { GPLUSPLUS_TOOL_INCLUDE_DIR, "G++", 1, 1 }, \
+    { GPLUSPLUS_BACKWARD_INCLUDE_DIR, "G++", 1, 1 }, \
+    { STANDARD_INCLUDE_DIR, STANDARD_INCLUDE_COMPONENT, 0, 0 }, \
+    { 0, 0, 0, 0 }				\
+  }
+
+/* Under OpenBSD, the normal location of the various *crt*.o files is the
+   /usr/lib directory.  */
+#undef STANDARD_STARTFILE_PREFIX
+#define STANDARD_STARTFILE_PREFIX	"/usr/local/lib/"
+
+#endif
+
+
+/* Controlling the compilation driver.  */
+/* TARGET_OS_CPP_BUILTINS() common to all OpenBSD targets.  */
+#define OPENBSD_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__OpenBSD__");		\
+      builtin_define ("__unix__");		\
+      builtin_define ("__ANSI_COMPAT");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=bsd");		\
+      builtin_assert ("system=OpenBSD");	\
+    }						\
+  while (0)
+
+/* TARGET_OS_CPP_BUILTINS() common to all OpenBSD ELF targets.  */
+#define OPENBSD_OS_CPP_BUILTINS_ELF()		\
+  do						\
+    {						\
+      OPENBSD_OS_CPP_BUILTINS();		\
+      builtin_define ("__ELF__");		\
+    }						\
+while (0)
+
+/* TARGET_OS_CPP_BUILTINS() common to all LP64 OpenBSD targets.  */
+#define OPENBSD_OS_CPP_BUILTINS_LP64()		\
+  do						\
+    {						\
+      builtin_define ("_LP64");			\
+      builtin_define ("__LP64__");		\
+    }						\
+  while (0)
+
+/* CPP_SPEC appropriate for OpenBSD. We deal with -posix and -pthread.
+   XXX the way threads are handled currently is not very satisfying,
+   since all code must be compiled with -pthread to work. 
+   This two-stage defines makes it easy to pick that for targets that
+   have subspecs.  */
+#ifdef CPP_CPU_SPEC
+#define OBSD_CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+#else
+#define OBSD_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+#endif
+
+#undef LIB_SPEC
+#define LIB_SPEC OBSD_LIB_SPEC
+
+#ifndef OBSD_HAS_CORRECT_SPECS
+
+#undef CPP_SPEC
+#define CPP_SPEC OBSD_CPP_SPEC
+
+#ifdef OBSD_OLD_GAS
+/* ASM_SPEC appropriate for OpenBSD.  For some architectures, OpenBSD 
+   still uses a special flavor of gas that needs to be told when generating 
+   pic code.  */
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fpie:-k} %{fPIC|fPIE:-k -K}"
+#endif
+
+/* Since we use gas, stdin -> - is a good idea.  */
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
+#undef LIB_SPEC
+#define LIB_SPEC OBSD_LIB_SPEC
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#undef LIB_SPEC
+#define LIB_SPEC OBSD_LIB_SPEC
+#endif
+
+#define TARGET_POSIX_IO
+
+/* Runtime target specification.  */
+
+/* Miscellaneous parameters.  */
+
+/* Controlling debugging info: dbx options.  */
+
+/* Don't use the `xsTAG;' construct in DBX output; OpenBSD systems that
+   use DBX don't support it.  */
+#define DBX_NO_XREFS
+
+
+/* - we use . - _func instead of a local label,
+   - we put extra spaces in expressions such as 
+     .type _func , @function
+     This is more readable for a human being and confuses c++filt less.  */
+
+/* Assembler format: output and generation of labels.  */
+
+/* Define the strings used for the .type and .size directives.
+   These strings generally do not vary from one system running OpenBSD
+   to another, but if a given system needs to use different pseudo-op
+   names for these, they may be overridden in the arch specific file.  */ 
+
+/* OpenBSD assembler is hacked to have .type & .size support even in a.out
+   format object files.  Functions size are supported but not activated 
+   yet (look for GRACE_PERIOD_EXPIRED in gas/config/obj-aout.c).  
+   SET_ASM_OP is needed for attribute alias to work.  */
+
+#undef TYPE_ASM_OP
+#undef SIZE_ASM_OP
+#undef SET_ASM_OP
+#undef GLOBAL_ASM_OP
+
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+#define SET_ASM_OP	"\t.set\t"
+#define GLOBAL_ASM_OP	"\t.globl\t"
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  */
+#undef TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+
+/* Provision if extra assembler code is needed to declare a function's result
+   (taken from svr4, not needed yet actually).  */
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries under OpenBSD.  These macros also have to output the starting 
+   labels for the relevant functions/objects.  */
+
+#ifndef OBSD_HAS_DECLARE_FUNCTION_NAME
+/* Extra assembler code needed to declare a function properly.
+   Some assemblers may also need to also have something extra said 
+   about the function's return value.  We allow for that here.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  do {									\
+    ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");			\
+    ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));			\
+    ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL);			\
+  } while (0)
+#endif
+
+#ifndef OBSD_HAS_DECLARE_FUNCTION_SIZE
+/* Declare the size of a function.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do {								\
+    if (!flag_inhibit_size_directive)				\
+      ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+  } while (0)
+#endif
+
+#ifndef OBSD_HAS_DECLARE_OBJECT
+/* Extra assembler code needed to declare an object properly.  */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do {								\
+      HOST_WIDE_INT size;					\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+  } while (0)
+
+/* Output the size directive for a decl in rest_of_decl_compilation
+   in the case where we did not do so before the initializer.
+   Once we find the error_mark_node, we know that the value of
+   size_directive_output was set by ASM_DECLARE_OBJECT_NAME 
+   when it was run for the same decl.  */
+#undef ASM_FINISH_DECLARE_OBJECT
+#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
+do {									 \
+     const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
+     HOST_WIDE_INT size;						 \
+     if (!flag_inhibit_size_directive && DECL_SIZE (DECL)		 \
+         && ! AT_END && TOP_LEVEL					 \
+	 && DECL_INITIAL (DECL) == error_mark_node			 \
+	 && !size_directive_output)					 \
+       {								 \
+	 size_directive_output = 1;					 \
+	 size = int_size_in_bytes (TREE_TYPE (DECL));			 \
+	 ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size);			 \
+       }								 \
+   } while (0)
+#endif
+
+
+/* Those are `generic' ways to weaken/globalize a label. We shouldn't need
+   to override a processor specific definition. Hence, #ifndef ASM_*
+   In case overriding turns out to be needed, one can always #undef ASM_* 
+   before including this file.  */
+
+/* Tell the assembler that a symbol is weak.  */
+/* Note: netbsd arm32 assembler needs a .globl here. An override may 
+   be needed when/if we go for arm32 support.  */
+#ifndef ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+       fputc ('\n', FILE); } while (0)
+#endif
+
+/* Storage layout.  */
+
+
+/* Stack is explicitly denied execution rights on OpenBSD platforms.  */
+#define ENABLE_EXECUTE_STACK						\
+extern void __enable_execute_stack (void *);				\
+void									\
+__enable_execute_stack (void *addr)					\
+{									\
+  long size = getpagesize ();						\
+  long mask = ~(size-1);						\
+  char *page = (char *) (((long) addr) & mask); 			\
+  char *end  = (char *) ((((long) (addr + TRAMPOLINE_SIZE)) & mask) + size); \
+								      \
+  if (mprotect (page, end - page, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) \
+    perror ("mprotect of trampoline code");				\
+}
+
+#include <sys/types.h>
+#include <sys/mman.h>
diff --git a/gcc/config/openbsd.opt b/gcc/config/openbsd.opt
new file mode 100644
index 000000000..b8c8fd416
--- /dev/null
+++ b/gcc/config/openbsd.opt
@@ -0,0 +1,36 @@
+; OpenBSD options.
+
+; Copyright (C) 2010, 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+assert=
+Driver JoinedOrMissing
+
+posix
+Driver
+
+pthread
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/pa/constraints.md b/gcc/config/pa/constraints.md
new file mode 100644
index 000000000..c1f3d5cd3
--- /dev/null
+++ b/gcc/config/pa/constraints.md
@@ -0,0 +1,140 @@
+;; Constraint definitions for pa
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;    ABCDEF H             V  Y 
+;;;     bcde ghijklmnop  stuvw  z
+
+;; Register constraints.
+(define_register_constraint "a" "R1_REGS"
+  "General register 1.")
+
+(define_register_constraint "f" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "q" "SHIFT_REGS"
+  "Shift amount register.")
+
+;; Keep 'x' for backward compatibility with user asm.
+(define_register_constraint "x" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "y" "TARGET_64BIT ? FP_REGS : FPUPPER_REGS"
+  "Upper floating-point register.")
+
+(define_register_constraint "Z" "ALL_REGS"
+  "Any register.")
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "Signed 11-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "VAL_11_BITS_P (ival)")))
+
+(define_constraint "J"
+  "Signed 14-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "VAL_14_BITS_P (ival)")))
+
+(define_constraint "K"
+  "Integer constant that can be deposited with a zdepi instruction."
+  (and (match_code "const_int")
+       (match_test "zdepi_cint_p (ival)")))
+
+(define_constraint "L"
+  "Signed 5-bit integer constant."
+  (and (match_code "const_int")
+       (match_test "VAL_5_BITS_P (ival)")))
+
+(define_constraint "M"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "N"
+  "Integer constant that can be loaded with a ldil instruction."
+  (and (match_code "const_int")
+       (match_test "ldil_cint_p (ival)")))
+
+(define_constraint "O"
+  "Integer constant such that ival+1 is a power of 2."
+  (and (match_code "const_int")
+       (match_test "(ival & (ival + 1)) == 0")))
+
+(define_constraint "P"
+  "Integer constant that can be used as an and mask in depi and
+   extru instructions."
+  (and (match_code "const_int")
+       (match_test "and_mask_p (ival)")))
+
+(define_constraint "S"
+  "Integer constant 31."
+  (and (match_code "const_int")
+       (match_test "ival == 31")))
+
+(define_constraint "U"
+  "Integer constant 63."
+  (and (match_code "const_int")
+       (match_test "ival == 63")))
+
+;; Floating-point constant constraints.
+(define_constraint "G"
+  "Floating-point constant 0."
+  (and (match_code "const_double")
+       (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT
+		    && op == CONST0_RTX (mode)")))
+
+;; Extra constraints.
+(define_constraint "A"
+  "A LO_SUM DLT memory operand."
+  (and (match_code "mem")
+       (match_test "IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "Q"
+  "A memory operand that can be used as the destination operand of an
+   integer store, or the source operand of an integer load.  That is
+   any memory operand that isn't a symbolic, indexed or lo_sum memory
+   operand.  Note that an unassigned pseudo register is such a memory
+   operand.  We accept unassigned pseudo registers because reload
+   generates them and then doesn't re-recognize the insn, causing
+   constrain_operands to fail."
+  (match_test "integer_store_memory_operand (op, mode)"))
+
+(define_constraint "R"
+  "A scaled or unscaled indexed memory operand that can be used as the
+   source address in integer and floating-point loads."
+  (and (match_code "mem")
+       (match_test "IS_INDEX_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "T"
+  "A memory operand for floating-point loads and stores."
+  (and (match_code "mem")
+       (match_test "!IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
+		    && !IS_INDEX_ADDR_P (XEXP (op, 0))
+		    && memory_address_p ((GET_MODE_SIZE (mode) == 4
+					  ? SFmode : DFmode),
+					 XEXP (op, 0))")))
+
+;; We could allow short displacements but TARGET_LEGITIMATE_ADDRESS_P
+;; can't tell when a long displacement is valid.
+(define_constraint "W"
+  "A register indirect memory operand."
+  (and (match_code "mem")
+       (match_test "REG_P (XEXP (op, 0))
+		    && REG_OK_FOR_BASE_P (XEXP (op, 0))")))
diff --git a/gcc/config/pa/elf.h b/gcc/config/pa/elf.h
new file mode 100644
index 000000000..1028206fd
--- /dev/null
+++ b/gcc/config/pa/elf.h
@@ -0,0 +1,92 @@
+/* Definitions for ELF assembler support.
+   Copyright (C) 1999, 2003, 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* So we can conditionalize small amounts of code in pa.c or pa.md.  */
+#define OBJ_ELF
+
+#define ENDFILE_SPEC "crtend.o%s"
+
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+			crtbegin.o%s"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+#define TARGET_ASM_FILE_START pa_elf_file_start
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+do {  \
+  if (TREE_PUBLIC (DECL)) \
+    { \
+      fputs ("\t.EXPORT ", FILE); \
+      assemble_name (FILE, NAME); \
+      fputs (",ENTRY\n", FILE); \
+    } \
+   } while (0)
+
+/* This is how to output a command to make the user-level label
+   named NAME defined for reference from other files.  We use
+   assemble_name_raw instead of assemble_name since a symbol in
+   a .IMPORT directive that isn't otherwise referenced is not
+   placed in the symbol table of the assembled object.
+
+   Failure to import a function reference can cause the HP linker
+   to segmentation fault!
+
+   Note that the SOM based tools need the symbol imported as a
+   CODE symbol, while the ELF based tools require the symbol to
+   be imported as an ENTRY symbol.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  pa_hpux_asm_output_external ((FILE), (DECL), (NAME))
+#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \
+  do { fputs ("\t.IMPORT ", FILE);					\
+       assemble_name_raw (FILE, NAME);					\
+       if (FUNCTION_NAME_P (NAME))     					\
+	 fputs (",ENTRY\n", FILE);					\
+       else								\
+	 fputs (",DATA\n", FILE);					\
+     } while (0)
+
+/* The bogus HP assembler requires ALL external references to be
+   "imported", even library calls. They look a bit different, so
+   here's this macro.
+
+   Also note not all libcall names are passed to
+   targetm.encode_section_info (__main for example).  To make sure all
+   libcall names have section info recorded in them, we do it here.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, RTL) \
+  do { fputs ("\t.IMPORT ", FILE);					\
+       if (!function_label_operand (RTL, VOIDmode))			\
+	 hppa_encode_label (RTL);					\
+       assemble_name (FILE, XSTR ((RTL), 0));		       		\
+       fputs (",ENTRY\n", FILE);					\
+     } while (0)
+
+/* Biggest alignment supported by the object file format of this
+   machine.  Use this macro to limit the alignment which can be
+   specified using the `__attribute__ ((aligned (N)))' construct.  If
+   not defined, the default value is `BIGGEST_ALIGNMENT'.  */
+#define MAX_OFILE_ALIGNMENT (32768 * 8)
diff --git a/gcc/config/pa/fptr.c b/gcc/config/pa/fptr.c
new file mode 100644
index 000000000..320d18267
--- /dev/null
+++ b/gcc/config/pa/fptr.c
@@ -0,0 +1,131 @@
+/* Subroutine for function pointer canonicalization on PA-RISC with ELF32.
+   Copyright 2002, 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by John David Anglin (dave.anglin@nrc.ca).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* WARNING: The code is this function depends on internal and undocumented
+   details of the GNU linker and dynamic loader as implemented for parisc
+   linux.  */
+
+/* This MUST match the defines sysdeps/hppa/dl-machine.h and
+   bfd/elf32-hppa.c.  */
+#define GOT_FROM_PLT_STUB (4*4)
+
+/* List of byte offsets in _dl_runtime_resolve to search for "bl" branches.
+   The first "bl" branch instruction found MUST be a call to fixup.  See
+   the define for TRAMPOLINE_TEMPLATE in sysdeps/hppa/dl-machine.h.  If
+   the trampoline template is changed, the list must be appropriately
+   updated.  The offset of -4 allows for a magic branch at the start of
+   the template should it be necessary to change the current branch
+   position.  */
+#define NOFFSETS 2
+static int fixup_branch_offset[NOFFSETS] = { 32, -4 };
+
+#define GET_FIELD(X, FROM, TO) \
+  ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1))
+#define SIGN_EXTEND(VAL,BITS) \
+  ((int) ((VAL) >> ((BITS) - 1) ? (-1 << (BITS)) | (VAL) : (VAL)))
+
+struct link_map;
+typedef int (*fptr_t) (void);
+typedef int (*fixup_t) (struct link_map *, unsigned int);
+extern unsigned int _GLOBAL_OFFSET_TABLE_;
+
+/* __canonicalize_funcptr_for_compare must be hidden so that it is not
+   placed in the dynamic symbol table.  Like millicode functions, it
+   must be linked into all binaries in order access the got table of 
+   that binary.  However, we don't use the millicode calling convention
+   and the routine must be a normal function so that it can be compiled
+   as pic code.  */
+unsigned int __canonicalize_funcptr_for_compare (fptr_t)
+      __attribute__ ((visibility ("hidden")));
+
+unsigned int
+__canonicalize_funcptr_for_compare (fptr_t fptr)
+{
+  static unsigned int fixup_plabel[2];
+  static fixup_t fixup;
+  unsigned int *plabel, *got;
+
+  /* -1 and page 0 are special.  -1 is used in crtend to mark the end of
+     a list of function pointers.  Also return immediately if the plabel
+     bit is not set in the function pointer.  In this case, the function
+     pointer points directly to the function.  */
+  if ((int) fptr == -1 || (unsigned int) fptr < 4096 || !((int) fptr & 2))
+    return (unsigned int) fptr;
+
+  /* The function pointer points to a function descriptor (plabel).  If
+     the plabel hasn't been resolved, the first word of the plabel points
+     to the entry of the PLT stub just before the global offset table.
+     The second word in the plabel contains the relocation offset for the
+     function.  */
+  plabel = (unsigned int *) ((unsigned int) fptr & ~3);
+  got = (unsigned int *) (plabel[0] + GOT_FROM_PLT_STUB);
+
+  /* Return the address of the function if the plabel has been resolved.  */
+  if (got !=  &_GLOBAL_OFFSET_TABLE_)
+    return plabel[0];
+
+  /* Initialize our plabel for calling fixup if we haven't done so already.
+     This code needs to be thread safe but we don't have to be too careful
+     as the result is invariant.  */
+  if (!fixup)
+    {
+      int i;
+      unsigned int *iptr;
+
+      /* Find the first "bl" branch in the offset search list.  This is a
+	 call to fixup or a magic branch to fixup at the beginning of the
+	 trampoline template.  The fixup function does the actual runtime
+	 resolution of function descriptors.  We only look for "bl" branches
+	 with a 17-bit pc-relative displacement.  */
+      for (i = 0; i < NOFFSETS; i++)
+	{
+	  iptr = (unsigned int *) (got[-2] + fixup_branch_offset[i]);
+	  if ((*iptr & 0xfc00e000) == 0xe8000000)
+	    break;
+	}
+
+      /* This should not happen... */
+      if (i == NOFFSETS)
+	return ~0;
+
+      /* Extract the 17-bit displacement from the instruction.  */
+      iptr += SIGN_EXTEND (GET_FIELD (*iptr, 19, 28) |
+			   GET_FIELD (*iptr, 29, 29) << 10 |
+			   GET_FIELD (*iptr, 11, 15) << 11 |
+			   GET_FIELD (*iptr, 31, 31) << 16, 17);
+
+      /* Build a plabel for an indirect call to fixup.  */
+      fixup_plabel[0] = (unsigned int) iptr + 8;  /* address of fixup */
+      fixup_plabel[1] = got[-1];		  /* ltp for fixup */
+      fixup = (fixup_t) ((int) fixup_plabel | 3);
+    }
+
+  /* Call fixup to resolve the function address.  got[1] contains the
+     link_map pointer and plabel[1] the relocation offset.  */
+  fixup ((struct link_map *) got[1], plabel[1]);
+
+  return plabel[0];
+}
diff --git a/gcc/config/pa/hpux-unwind.h b/gcc/config/pa/hpux-unwind.h
new file mode 100644
index 000000000..92061ec36
--- /dev/null
+++ b/gcc/config/pa/hpux-unwind.h
@@ -0,0 +1,361 @@
+/* DWARF2 EH unwinding support for PA HP-UX.
+   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+/* Don't use this if inhibit_libc is set.
+   The build for this target will fail trying to include missing headers. */
+#ifndef inhibit_libc
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+
+/* FIXME: We currently ignore the high halves of general, space and
+   control registers on PA 2.0 machines for applications using the
+   32-bit runtime.  We don't restore space registers or the floating
+   point status registers.  */
+
+#define MD_FALLBACK_FRAME_STATE_FOR pa_fallback_frame_state
+
+/* HP-UX 10.X doesn't define GetSSReg.  */
+#ifndef GetSSReg
+#define GetSSReg(ssp, ss_reg) \
+  ((UseWideRegs (ssp))							\
+   ? (ssp)->ss_wide.ss_32.ss_reg ## _lo					\
+   : (ssp)->ss_narrow.ss_reg)
+#endif
+
+#if TARGET_64BIT
+#define GetSSRegAddr(ssp, ss_reg) ((long) &((ssp)->ss_wide.ss_64.ss_reg))
+#else
+#define GetSSRegAddr(ssp, ss_reg) \
+  ((UseWideRegs (ssp))							\
+   ? (long) &((ssp)->ss_wide.ss_32.ss_reg ## _lo)			\
+   : (long) &((ssp)->ss_narrow.ss_reg))
+#endif
+
+#define UPDATE_FS_FOR_SAR(FS, N) \
+  (FS)->regs.reg[N].how = REG_SAVED_OFFSET;				\
+  (FS)->regs.reg[N].loc.offset = GetSSRegAddr (mc, ss_cr11) - new_cfa
+
+#define UPDATE_FS_FOR_GR(FS, GRN, N) \
+  (FS)->regs.reg[N].how = REG_SAVED_OFFSET;				\
+  (FS)->regs.reg[N].loc.offset = GetSSRegAddr (mc, ss_gr##GRN) - new_cfa
+
+#define UPDATE_FS_FOR_FR(FS, FRN, N) \
+  (FS)->regs.reg[N].how = REG_SAVED_OFFSET;				\
+  (FS)->regs.reg[N].loc.offset = (long) &(mc->ss_fr##FRN) - new_cfa;
+
+#define UPDATE_FS_FOR_PC(FS, N) \
+  (FS)->regs.reg[N].how = REG_SAVED_OFFSET;				\
+  (FS)->regs.reg[N].loc.offset = GetSSRegAddr (mc, ss_pcoq_head) - new_cfa
+
+/* Extract bit field from word using HP's numbering (MSB = 0).  */
+#define GET_FIELD(X, FROM, TO) \
+  ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1))
+
+static inline int
+sign_extend (int x, int len)
+{
+  int signbit = (1 << (len - 1));
+  int mask = (signbit << 1) - 1;
+  return ((x & mask) ^ signbit) - signbit;
+}
+
+/* Extract a 17-bit signed constant from branch instructions.  */
+static inline int
+extract_17 (unsigned word)
+{
+  return sign_extend (GET_FIELD (word, 19, 28)
+		      | GET_FIELD (word, 29, 29) << 10
+		      | GET_FIELD (word, 11, 15) << 11
+		      | (word & 0x1) << 16, 17);
+}
+
+/* Extract a 22-bit signed constant from branch instructions.  */
+static inline int
+extract_22 (unsigned word)
+{
+  return sign_extend (GET_FIELD (word, 19, 28)
+		      | GET_FIELD (word, 29, 29) << 10
+		      | GET_FIELD (word, 11, 15) << 11
+		      | GET_FIELD (word, 6, 10) << 16
+		      | (word & 0x1) << 21, 22);
+}
+
+static _Unwind_Reason_Code
+pa_fallback_frame_state (struct _Unwind_Context *context,
+			 _Unwind_FrameState *fs)
+{
+  static long cpu;
+  unsigned int *pc = (unsigned int *) context->ra;
+
+  if (pc == 0)
+    return _URC_END_OF_STACK;
+
+  /* Check for relocation of the return value.  */
+  if (!TARGET_64BIT
+      && *(pc + 0) == 0x2fd01224		/* fstd,ma fr4,8(sp) */
+      && *(pc + 1) == 0x0fd9109d		/* ldw -4(sp),ret1 */
+      && *(pc + 2) == 0x0fd130bc)		/* ldw,mb -8(sp),ret0 */
+    pc += 3;
+  else if (!TARGET_64BIT
+	   && *(pc + 0) == 0x27d01224		/* fstw,ma fr4,8(sp) */
+	   && *(pc + 1) == 0x0fd130bc)		/* ldw,mb -8(sp),ret0 */
+    pc += 2;
+  else if (!TARGET_64BIT
+	   && *(pc + 0) == 0x0fdc12b0		/* stw,ma ret0,8(sp) */
+	   && *(pc + 1) == 0x0fdd1299		/* stw ret1,-4(sp) */
+	   && *(pc + 2) == 0x2fd13024)		/* fldd,mb -8(sp),fr4 */
+    pc += 3;
+  else if (!TARGET_64BIT
+	   && *(pc + 0) == 0x0fdc12b0		/* stw,ma ret0,8(sp) */
+	   && *(pc + 1) == 0x27d13024)		/* fldw,mb -8(sp),fr4 */
+    pc += 2;
+
+  /* Check if the return address points to an export stub (PA 1.1 or 2.0).  */
+  if ((!TARGET_64BIT
+       && *(pc + 0) == 0x4bc23fd1		/* ldw -18(sp),rp */
+       && *(pc + 1) == 0x004010a1		/* ldsid (rp),r1 */
+       && *(pc + 2) == 0x00011820		/* mtsp r1,sr0 */
+       && *(pc + 3) == 0xe0400002)		/* be,n 0(sr0,rp) */
+      ||
+      (!TARGET_64BIT
+       && *(pc + 0) == 0x4bc23fd1		/* ldw -18(sp),rp */
+       && *(pc + 1) == 0xe840d002))		/* bve,n (rp) */
+    {
+      fs->regs.cfa_how    = CFA_REG_OFFSET;
+      fs->regs.cfa_reg    = 30;
+      fs->regs.cfa_offset = 0;
+
+      fs->retaddr_column = 0;
+      fs->regs.reg[0].how = REG_SAVED_OFFSET;
+      fs->regs.reg[0].loc.offset = -24;
+
+      /* Update context to describe the stub frame.  */
+      uw_update_context (context, fs);
+
+      /* Set up fs to describe the FDE for the caller of this stub.  */
+      return uw_frame_state_for (context, fs);
+    }
+  /* Check if the return address points to a relocation stub.  */
+  else if (!TARGET_64BIT
+	   && *(pc + 0) == 0x0fd11082		/* ldw -8(sp),rp */
+	   && (*(pc + 1) == 0xe840c002		/* bv,n r0(rp) */
+	       || *(pc + 1) == 0xe840d002))	/* bve,n (rp) */
+    {
+      fs->regs.cfa_how    = CFA_REG_OFFSET;
+      fs->regs.cfa_reg    = 30;
+      fs->regs.cfa_offset = 0;
+
+      fs->retaddr_column = 0;
+      fs->regs.reg[0].how = REG_SAVED_OFFSET;
+      fs->regs.reg[0].loc.offset = -8;
+
+      /* Update context to describe the stub frame.  */
+      uw_update_context (context, fs);
+
+      /* Set up fs to describe the FDE for the caller of this stub.  */
+      return uw_frame_state_for (context, fs);
+    }
+
+  /* Check if the return address is an export stub as signal handlers
+     may return via an export stub.  */
+  if (!TARGET_64BIT
+      && (*pc & 0xffe0e002) == 0xe8400000	/* bl x,r2 */
+      && *(pc + 1) == 0x08000240		/* nop */
+      && *(pc + 2) == 0x4bc23fd1		/* ldw -18(sp),rp */
+      && *(pc + 3) == 0x004010a1		/* ldsid (rp),r1 */
+      && *(pc + 4) == 0x00011820		/* mtsp r1,sr0 */
+      && *(pc + 5) == 0xe0400002)		/* be,n 0(sr0,rp) */
+    /* Extract target address from PA 1.x 17-bit branch.  */
+    pc += extract_17 (*pc) + 2;
+  else if (!TARGET_64BIT
+	   && (*pc & 0xfc00e002) == 0xe800a000	/* b,l x,r2 */
+	   && *(pc + 1) == 0x08000240		/* nop */
+	   && *(pc + 2) == 0x4bc23fd1		/* ldw -18(sp),rp */
+	   && *(pc + 3) == 0xe840d002)		/* bve,n (rp) */
+    /* Extract target address from PA 2.0 22-bit branch.  */
+    pc += extract_22 (*pc) + 2;
+
+  /* Now check if the return address is one of the signal handler
+     returns, _sigreturn or _sigsetreturn.  */
+  if ((TARGET_64BIT
+       && *(pc + 0)  == 0x53db3f51		/* ldd -58(sp),dp */
+       && *(pc + 8)  == 0x34160116		/* ldi 8b,r22 */
+       && *(pc + 9)  == 0x08360ac1		/* shladd,l r22,3,r1,r1 */
+       && *(pc + 10) == 0x0c2010c1		/* ldd 0(r1),r1 */
+       && *(pc + 11) == 0xe4202000)		/* be,l 0(sr4,r1) */
+      ||
+      (TARGET_64BIT
+       && *(pc + 0)  == 0x36dc0000		/* ldo 0(r22),ret0 */
+       && *(pc + 6)  == 0x341601c0		/* ldi e0,r22 */
+       && *(pc + 7)  == 0x08360ac1		/* shladd,l r22,3,r1,r1 */
+       && *(pc + 8)  == 0x0c2010c1		/* ldd 0(r1),r1 */
+       && *(pc + 9)  == 0xe4202000)		/* be,l 0(sr4,r1) */
+      ||
+      (!TARGET_64BIT
+       && *(pc + 0)  == 0x379a0000		/* ldo 0(ret0),r26 */
+       && *(pc + 1)  == 0x6bd33fc9		/* stw r19,-1c(sp) */
+       && *(pc + 2)  == 0x20200801		/* ldil L%-40000000,r1 */
+       && *(pc + 3)  == 0xe420e008		/* be,l 4(sr7,r1) */
+       && *(pc + 4)  == 0x34160116)		/* ldi 8b,r22 */
+      ||
+      (!TARGET_64BIT
+       && *(pc + 0)  == 0x6bd33fc9		/* stw r19,-1c(sp) */
+       && *(pc + 1)  == 0x20200801		/* ldil L%-40000000,r1 */
+       && *(pc + 2)  == 0xe420e008		/* be,l 4(sr7,r1) */
+       && *(pc + 3)  == 0x341601c0))		/* ldi e0,r22 */
+    {
+      /* The previous stack pointer is saved at (long *)SP - 1.  The
+	 ucontext structure is offset from the start of the previous
+	 frame by the siglocal_misc structure.  */
+      struct siglocalx *sl = (struct siglocalx *)
+	(*((long *) context->cfa - 1));
+      mcontext_t *mc = &(sl->sl_uc.uc_mcontext);
+
+      long new_cfa = GetSSReg (mc, ss_sp);
+
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = 30;
+      fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+      UPDATE_FS_FOR_GR (fs, 1, 1);
+      UPDATE_FS_FOR_GR (fs, 2, 2);
+      UPDATE_FS_FOR_GR (fs, 3, 3);
+      UPDATE_FS_FOR_GR (fs, 4, 4);
+      UPDATE_FS_FOR_GR (fs, 5, 5);
+      UPDATE_FS_FOR_GR (fs, 6, 6);
+      UPDATE_FS_FOR_GR (fs, 7, 7);
+      UPDATE_FS_FOR_GR (fs, 8, 8);
+      UPDATE_FS_FOR_GR (fs, 9, 9);
+      UPDATE_FS_FOR_GR (fs, 10, 10);
+      UPDATE_FS_FOR_GR (fs, 11, 11);
+      UPDATE_FS_FOR_GR (fs, 12, 12);
+      UPDATE_FS_FOR_GR (fs, 13, 13);
+      UPDATE_FS_FOR_GR (fs, 14, 14);
+      UPDATE_FS_FOR_GR (fs, 15, 15);
+      UPDATE_FS_FOR_GR (fs, 16, 16);
+      UPDATE_FS_FOR_GR (fs, 17, 17);
+      UPDATE_FS_FOR_GR (fs, 18, 18);
+      UPDATE_FS_FOR_GR (fs, 19, 19);
+      UPDATE_FS_FOR_GR (fs, 20, 20);
+      UPDATE_FS_FOR_GR (fs, 21, 21);
+      UPDATE_FS_FOR_GR (fs, 22, 22);
+      UPDATE_FS_FOR_GR (fs, 23, 23);
+      UPDATE_FS_FOR_GR (fs, 24, 24);
+      UPDATE_FS_FOR_GR (fs, 25, 25);
+      UPDATE_FS_FOR_GR (fs, 26, 26);
+      UPDATE_FS_FOR_GR (fs, 27, 27);
+      UPDATE_FS_FOR_GR (fs, 28, 28);
+      UPDATE_FS_FOR_GR (fs, 29, 29);
+      UPDATE_FS_FOR_GR (fs, 30, 30);
+      UPDATE_FS_FOR_GR (fs, 31, 31);
+
+      if (TARGET_64BIT)
+	{
+	  UPDATE_FS_FOR_FR (fs, 4, 32);
+	  UPDATE_FS_FOR_FR (fs, 5, 33);
+	  UPDATE_FS_FOR_FR (fs, 6, 34);
+	  UPDATE_FS_FOR_FR (fs, 7, 35);
+	  UPDATE_FS_FOR_FR (fs, 8, 36);
+	  UPDATE_FS_FOR_FR (fs, 9, 37);
+	  UPDATE_FS_FOR_FR (fs, 10, 38);
+	  UPDATE_FS_FOR_FR (fs, 11, 39);
+	  UPDATE_FS_FOR_FR (fs, 12, 40);
+	  UPDATE_FS_FOR_FR (fs, 13, 41);
+	  UPDATE_FS_FOR_FR (fs, 14, 42);
+	  UPDATE_FS_FOR_FR (fs, 15, 43);
+	  UPDATE_FS_FOR_FR (fs, 16, 44);
+	  UPDATE_FS_FOR_FR (fs, 17, 45);
+	  UPDATE_FS_FOR_FR (fs, 18, 46);
+	  UPDATE_FS_FOR_FR (fs, 19, 47);
+	  UPDATE_FS_FOR_FR (fs, 20, 48);
+	  UPDATE_FS_FOR_FR (fs, 21, 49);
+	  UPDATE_FS_FOR_FR (fs, 22, 50);
+	  UPDATE_FS_FOR_FR (fs, 23, 51);
+	  UPDATE_FS_FOR_FR (fs, 24, 52);
+	  UPDATE_FS_FOR_FR (fs, 25, 53);
+	  UPDATE_FS_FOR_FR (fs, 26, 54);
+	  UPDATE_FS_FOR_FR (fs, 27, 55);
+	  UPDATE_FS_FOR_FR (fs, 28, 56);
+	  UPDATE_FS_FOR_FR (fs, 29, 57);
+	  UPDATE_FS_FOR_FR (fs, 30, 58);
+	  UPDATE_FS_FOR_FR (fs, 31, 59);
+
+	  UPDATE_FS_FOR_SAR (fs, 60);
+	}
+      else
+	{
+	  UPDATE_FS_FOR_FR (fs, 4, 32);
+	  UPDATE_FS_FOR_FR (fs, 5, 34);
+	  UPDATE_FS_FOR_FR (fs, 6, 36);
+	  UPDATE_FS_FOR_FR (fs, 7, 38);
+	  UPDATE_FS_FOR_FR (fs, 8, 40);
+	  UPDATE_FS_FOR_FR (fs, 9, 44);
+	  UPDATE_FS_FOR_FR (fs, 10, 44);
+	  UPDATE_FS_FOR_FR (fs, 11, 46);
+	  UPDATE_FS_FOR_FR (fs, 12, 48);
+	  UPDATE_FS_FOR_FR (fs, 13, 50);
+	  UPDATE_FS_FOR_FR (fs, 14, 52);
+	  UPDATE_FS_FOR_FR (fs, 15, 54);
+
+	  if (!cpu)
+	    cpu = sysconf (_SC_CPU_VERSION);
+
+	  /* PA-RISC 1.0 only has 16 floating point registers.  */
+	  if (cpu != CPU_PA_RISC1_0)
+	    {
+	      UPDATE_FS_FOR_FR (fs, 16, 56);
+	      UPDATE_FS_FOR_FR (fs, 17, 58);
+	      UPDATE_FS_FOR_FR (fs, 18, 60);
+	      UPDATE_FS_FOR_FR (fs, 19, 62);
+	      UPDATE_FS_FOR_FR (fs, 20, 64);
+	      UPDATE_FS_FOR_FR (fs, 21, 66);
+	      UPDATE_FS_FOR_FR (fs, 22, 68);
+	      UPDATE_FS_FOR_FR (fs, 23, 70);
+	      UPDATE_FS_FOR_FR (fs, 24, 72);
+	      UPDATE_FS_FOR_FR (fs, 25, 74);
+	      UPDATE_FS_FOR_FR (fs, 26, 76);
+	      UPDATE_FS_FOR_FR (fs, 27, 78);
+	      UPDATE_FS_FOR_FR (fs, 28, 80);
+	      UPDATE_FS_FOR_FR (fs, 29, 82);
+	      UPDATE_FS_FOR_FR (fs, 30, 84);
+	      UPDATE_FS_FOR_FR (fs, 31, 86);
+	    }
+
+	  UPDATE_FS_FOR_SAR (fs, 88);
+	}
+
+      fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN;
+      UPDATE_FS_FOR_PC (fs, DWARF_ALT_FRAME_RETURN_COLUMN);
+      fs->signal_frame = 1;
+
+      return _URC_NO_REASON;
+    }
+
+  return _URC_END_OF_STACK;
+}
+#endif /* inhibit_libc */
diff --git a/gcc/config/pa/lib2funcs.asm b/gcc/config/pa/lib2funcs.asm
new file mode 100644
index 000000000..8aa398c87
--- /dev/null
+++ b/gcc/config/pa/lib2funcs.asm
@@ -0,0 +1,74 @@
+;  Subroutines for calling unbound dynamic functions from within GDB for HPPA.
+;  Subroutines for out of line prologues and epilogues on for the HPPA
+;  Copyright (C) 1994, 1995, 1996, 2009 Free Software Foundation, Inc.
+
+;  This file is part of GCC.
+
+;  GCC is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License as published by
+;  the Free Software Foundation; either version 3, or (at your option)
+;  any later version.
+
+;  GCC is distributed in the hope that it will be useful,
+;  but WITHOUT ANY WARRANTY; without even the implied warranty of
+;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;  GNU General Public License for more details.
+
+;  Under Section 7 of GPL version 3, you are granted additional
+;  permissions described in the GCC Runtime Library Exception, version
+;  3.1, as published by the Free Software Foundation.
+
+;  You should have received a copy of the GNU General Public License and
+;  a copy of the GCC Runtime Library Exception along with this program;
+;  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+;  <http://www.gnu.org/licenses/>.
+
+#if !defined(__pro__) && !defined(__rtems__)
+	.SPACE $PRIVATE$
+	.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
+	.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
+	.SPACE $TEXT$
+	.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
+	.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
+	.SUBSPA $MILLICODE$,QUAD=0,ALIGN=8,ACCESS=44,SORT=8
+#endif
+	.IMPORT $$dyncall,MILLICODE
+#if !defined(__pro__) && !defined(__rtems__)
+	.SPACE $TEXT$
+	.SUBSPA $CODE$
+#else
+	.text
+#endif
+
+; Simply call with the address of the desired import stub in %r22 and
+; arguments in the normal place (%r26-%r23 and stack slots).
+;
+	.align 4
+	.EXPORT __gcc_plt_call,ENTRY,PRIV_LEV=3,RTNVAL=GR
+__gcc_plt_call
+	.PROC
+	.CALLINFO
+	.ENTRY
+	; Our return address comes in %r31, not %r2!
+	stw %r31,-8(%r30)
+
+	; An inline version of dyncall so we don't have to worry
+	; about long calls to millicode, PIC and other complexities.
+	bb,>=,n %r22,30,L$foo
+        depi 0,31,2,%r22
+        ldw 4(%r22),%r19
+        ldw 0(%r22),%r22
+L$foo
+        ldsid (%r22),%r1
+        mtsp %r1,%sr0
+        ble 0(%sr0,%r22)
+	copy %r31,%r2
+	ldw -8(%r30),%r2
+
+	; We're going to be returning to a stack address, so we
+	; need to do an intra-space return.
+	ldsid (%rp),%r1
+	mtsp %r1,%sr0
+	be,n 0(%sr0,%rp)
+	.EXIT
+	.PROCEND
diff --git a/gcc/config/pa/linux-atomic.c b/gcc/config/pa/linux-atomic.c
new file mode 100644
index 000000000..2ae242635
--- /dev/null
+++ b/gcc/config/pa/linux-atomic.c
@@ -0,0 +1,305 @@
+/* Linux-specific atomic operations for PA Linux.
+   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+   Based on code contributed by CodeSourcery for ARM EABI Linux.
+   Modifications for PA Linux by Helge Deller <deller@gmx.de>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#define EFAULT  14 
+#define EBUSY   16
+#define ENOSYS 251 
+
+/* All PA-RISC implementations supported by linux have strongly
+   ordered loads and stores.  Only cache flushes and purges can be
+   delayed.  The data cache implementations are all globally
+   coherent.  Thus, there is no need to synchonize memory accesses.
+
+   GCC automatically issues a asm memory barrier when it encounters
+   a __sync_synchronize builtin.  Thus, we do not need to define this
+   builtin.
+
+   We implement byte, short and int versions of each atomic operation
+   using the kernel helper defined below.  There is no support for
+   64-bit operations yet.  */
+
+/* A privileged instruction to crash a userspace program with SIGILL.  */
+#define ABORT_INSTRUCTION asm ("iitlbp %r0,(%sr0, %r0)")
+
+/* Determine kernel LWS function call (0=32-bit, 1=64-bit userspace).  */
+#define LWS_CAS (sizeof(unsigned long) == 4 ? 0 : 1)
+
+/* Kernel helper for compare-and-exchange a 32-bit value.  */
+static inline long
+__kernel_cmpxchg (int oldval, int newval, int *mem)
+{
+  register unsigned long lws_mem asm("r26") = (unsigned long) (mem);
+  register long lws_ret   asm("r28");
+  register long lws_errno asm("r21");
+  register int lws_old asm("r25") = oldval;
+  register int lws_new asm("r24") = newval;
+  asm volatile (	"ble	0xb0(%%sr2, %%r0)	\n\t"
+			"ldi	%5, %%r20		\n\t"
+	: "=r" (lws_ret), "=r" (lws_errno), "=r" (lws_mem),
+	  "=r" (lws_old), "=r" (lws_new)
+	: "i" (LWS_CAS), "2" (lws_mem), "3" (lws_old), "4" (lws_new)
+	: "r1", "r20", "r22", "r23", "r29", "r31", "memory"
+  );
+  if (__builtin_expect (lws_errno == -EFAULT || lws_errno == -ENOSYS, 0))
+    ABORT_INSTRUCTION;
+
+  /* If the kernel LWS call succeeded (lws_errno == 0), lws_ret contains
+     the old value from memory.  If this value is equal to OLDVAL, the
+     new value was written to memory.  If not, return -EBUSY.  */
+  if (!lws_errno && lws_ret != oldval)
+    lws_errno = -EBUSY;
+
+  return lws_errno;
+}
+
+#define HIDDEN __attribute__ ((visibility ("hidden")))
+
+/* Big endian masks  */
+#define INVERT_MASK_1 24
+#define INVERT_MASK_2 16
+
+#define MASK_1 0xffu
+#define MASK_2 0xffffu
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)				\
+  {									\
+    int failure, tmp;							\
+									\
+    do {								\
+      tmp = *ptr;							\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);						\
+									\
+    return tmp;								\
+  }
+
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
+   subword-sized quantities.  */
+
+#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)	\
+  TYPE HIDDEN								\
+  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)			\
+  {									\
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+    unsigned int mask, shift, oldval, newval;				\
+    int failure;							\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = *wordptr;						\
+      newval = ((PFX_OP (((oldval & mask) >> shift)			\
+                         INF_OP (unsigned int) val)) << shift) & mask;	\
+      newval |= oldval & ~mask;						\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (failure != 0);						\
+									\
+    return (RETURN & mask) >> shift;					\
+  }
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
+
+#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)				\
+  int HIDDEN								\
+  __sync_##OP##_and_fetch_4 (int *ptr, int val)				\
+  {									\
+    int tmp, failure;							\
+									\
+    do {								\
+      tmp = *ptr;							\
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);	\
+    } while (failure != 0);						\
+									\
+    return PFX_OP (tmp INF_OP val);					\
+  }
+
+OP_AND_FETCH_WORD (add,   , +)
+OP_AND_FETCH_WORD (sub,   , -)
+OP_AND_FETCH_WORD (or,    , |)
+OP_AND_FETCH_WORD (and,   , &)
+OP_AND_FETCH_WORD (xor,   , ^)
+OP_AND_FETCH_WORD (nand, ~, &)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+
+int HIDDEN
+__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int actual_oldval, fail;
+    
+  while (1)
+    {
+      actual_oldval = *ptr;
+
+      if (__builtin_expect (oldval != actual_oldval, 0))
+	return actual_oldval;
+
+      fail = __kernel_cmpxchg (actual_oldval, newval, ptr);
+  
+      if (__builtin_expect (!fail, 1))
+	return actual_oldval;
+    }
+}
+
+#define SUBWORD_VAL_CAS(TYPE, WIDTH)					\
+  TYPE HIDDEN								\
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+				       TYPE newval)			\
+  {									\
+    int *wordptr = (int *)((unsigned long) ptr & ~3), fail;		\
+    unsigned int mask, shift, actual_oldval, actual_newval;		\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    while (1)								\
+      {									\
+	actual_oldval = *wordptr;					\
+									\
+	if (__builtin_expect (((actual_oldval & mask) >> shift)		\
+			      != (unsigned int) oldval, 0))		\
+	  return (actual_oldval & mask) >> shift;			\
+									\
+	actual_newval = (actual_oldval & ~mask)				\
+			| (((unsigned int) newval << shift) & mask);	\
+									\
+	fail = __kernel_cmpxchg (actual_oldval, actual_newval,		\
+				 wordptr);				\
+									\
+	if (__builtin_expect (!fail, 1))				\
+	  return (actual_oldval & mask) >> shift;			\
+      }									\
+  }
+
+SUBWORD_VAL_CAS (unsigned short, 2)
+SUBWORD_VAL_CAS (unsigned char,  1)
+
+typedef unsigned char bool;
+
+bool HIDDEN
+__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int failure = __kernel_cmpxchg (oldval, newval, ptr);
+  return (failure == 0);
+}
+
+#define SUBWORD_BOOL_CAS(TYPE, WIDTH)					\
+  bool HIDDEN								\
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,		\
+					TYPE newval)			\
+  {									\
+    TYPE actual_oldval							\
+      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);	\
+    return (oldval == actual_oldval);					\
+  }
+
+SUBWORD_BOOL_CAS (unsigned short, 2)
+SUBWORD_BOOL_CAS (unsigned char,  1)
+
+int HIDDEN
+__sync_lock_test_and_set_4 (int *ptr, int val)
+{
+  int failure, oldval;
+
+  do {
+    oldval = *ptr;
+    failure = __kernel_cmpxchg (oldval, val, ptr);
+  } while (failure != 0);
+
+  return oldval;
+}
+
+#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)				\
+  TYPE HIDDEN								\
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)		\
+  {									\
+    int failure;							\
+    unsigned int oldval, newval, shift, mask;				\
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);			\
+									\
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;	\
+    mask = MASK_##WIDTH << shift;					\
+									\
+    do {								\
+      oldval = *wordptr;						\
+      newval = (oldval & ~mask)						\
+	       | (((unsigned int) val << shift) & mask);		\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);		\
+    } while (failure != 0);						\
+									\
+    return (oldval & mask) >> shift;					\
+  }
+
+SUBWORD_TEST_AND_SET (unsigned short, 2)
+SUBWORD_TEST_AND_SET (unsigned char,  1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH)					\
+  void HIDDEN								\
+  __sync_lock_release_##WIDTH (TYPE *ptr)				\
+  {									\
+    *ptr = 0;								\
+  }
+
+SYNC_LOCK_RELEASE (int,   4)
+SYNC_LOCK_RELEASE (short, 2)
+SYNC_LOCK_RELEASE (char,  1)
diff --git a/gcc/config/pa/linux-unwind.h b/gcc/config/pa/linux-unwind.h
new file mode 100644
index 000000000..38b4eda7a
--- /dev/null
+++ b/gcc/config/pa/linux-unwind.h
@@ -0,0 +1,141 @@
+/* DWARF2 EH unwinding support for PA Linux.
+   Copyright (C) 2004, 2005, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+/* Don't use this if inhibit_libc is set.
+   The build for this target will fail trying to include missing headers. */
+#ifndef inhibit_libc
+#include <signal.h>
+#include <sys/ucontext.h>
+
+/* Unfortunately, because of various bugs and changes to the kernel,
+   we have several cases to deal with.
+
+   In 2.4, the signal trampoline is 4 words, and (CONTEXT)->ra should
+   point directly at the beginning of the trampoline and struct rt_sigframe.
+
+   In <= 2.6.5-rc2-pa3, the signal trampoline is 9 words, and 
+   (CONTEXT)->ra points at the 4th word in the trampoline structure.  This 
+   is wrong, it should point at the 5th word.  This is fixed in 2.6.5-rc2-pa4.
+
+   To detect these cases, we first take (CONTEXT)->ra, align it to 64-bytes
+   to get the beginning of the signal frame, and then check offsets 0, 4
+   and 5 to see if we found the beginning of the trampoline.  This will
+   tell us how to locate the sigcontext structure.
+
+   Note that with a 2.4 64-bit kernel, the signal context is not properly
+   passed back to userspace so the unwind will not work correctly.  */
+
+#define MD_FALLBACK_FRAME_STATE_FOR pa32_fallback_frame_state
+
+static _Unwind_Reason_Code
+pa32_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  unsigned long sp = (unsigned long)context->ra & ~63;
+  unsigned int *pc = (unsigned int *)sp;
+  unsigned long off;
+  _Unwind_Ptr new_cfa;
+  int i;
+  struct sigcontext *sc;
+  struct rt_sigframe {
+    siginfo_t info;
+    struct ucontext uc;
+  } *frame;
+
+  /* rt_sigreturn trampoline:
+     3419000x ldi 0, %r25 or ldi 1, %r25   (x = 0 or 2)
+     3414015a ldi __NR_rt_sigreturn, %r20
+     e4008200 be,l 0x100(%sr2, %r0), %sr0, %r31
+     08000240 nop  */
+
+  if (pc[0] == 0x34190000 || pc[0] == 0x34190002)
+    off = 4*4;
+  else if (pc[4] == 0x34190000 || pc[4] == 0x34190002)
+    {
+      pc += 4;
+      off = 10 * 4;
+    }
+  else if (pc[5] == 0x34190000 || pc[5] == 0x34190002)
+    {
+      pc += 5;
+      off = 10 * 4;
+    }
+  else
+    {
+      /* We may have to unwind through an alternate signal stack.
+	 We assume that the alignment of the alternate signal stack
+	 is BIGGEST_ALIGNMENT (i.e., that it has been allocated using
+	 malloc).  As a result, we can't distinguish trampolines
+	 used prior to 2.6.5-rc2-pa4.  However after 2.6.5-rc2-pa4,
+	 the return address of a signal trampoline will be on an odd
+	 word boundary and we can then determine the frame offset.  */
+      sp = (unsigned long)context->ra;
+      pc = (unsigned int *)sp;
+      if ((pc[0] == 0x34190000 || pc[0] == 0x34190002) && (sp & 4))
+	off = 5 * 4;
+      else
+	return _URC_END_OF_STACK;
+    }
+
+  if (pc[1] != 0x3414015a
+      || pc[2] != 0xe4008200
+      || pc[3] != 0x08000240)
+    return _URC_END_OF_STACK;
+
+  frame = (struct rt_sigframe *)(sp + off);
+  sc = &frame->uc.uc_mcontext;
+
+  new_cfa = sc->sc_gr[30];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 30;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+  for (i = 1; i <= 31; i++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset = (long)&sc->sc_gr[i] - new_cfa;
+    }
+  for (i = 4; i <= 31; i++)
+    {
+      /* FP regs have left and right halves */
+      fs->regs.reg[2*i+24].how = REG_SAVED_OFFSET;
+      fs->regs.reg[2*i+24].loc.offset
+	= (long)&sc->sc_fr[i] - new_cfa;
+      fs->regs.reg[2*i+24+1].how = REG_SAVED_OFFSET;
+      fs->regs.reg[2*i+24+1].loc.offset
+	= (long)&sc->sc_fr[i] + 4 - new_cfa;
+    }
+  fs->regs.reg[88].how = REG_SAVED_OFFSET;
+  fs->regs.reg[88].loc.offset = (long) &sc->sc_sar - new_cfa;
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].how = REG_SAVED_OFFSET;
+  fs->regs.reg[DWARF_ALT_FRAME_RETURN_COLUMN].loc.offset
+    = (long) &sc->sc_iaoq[0] - new_cfa;
+  fs->retaddr_column = DWARF_ALT_FRAME_RETURN_COLUMN;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+#endif /* inhibit_libc */
diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S
new file mode 100644
index 000000000..2e9c4f741
--- /dev/null
+++ b/gcc/config/pa/milli64.S
@@ -0,0 +1,2134 @@
+/* 32 and 64-bit millicode, original author Hewlett-Packard
+   adapted for gcc by Paul Bame <bame@debian.org>
+   and Alan Modra <alan@linuxcare.com.au>.
+
+   Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef pa64
+        .level  2.0w
+#endif
+
+/* Hardware General Registers.  */
+r0:	.reg	%r0
+r1:	.reg	%r1
+r2:	.reg	%r2
+r3:	.reg	%r3
+r4:	.reg	%r4
+r5:	.reg	%r5
+r6:	.reg	%r6
+r7:	.reg	%r7
+r8:	.reg	%r8
+r9:	.reg	%r9
+r10:	.reg	%r10
+r11:	.reg	%r11
+r12:	.reg	%r12
+r13:	.reg	%r13
+r14:	.reg	%r14
+r15:	.reg	%r15
+r16:	.reg	%r16
+r17:	.reg	%r17
+r18:	.reg	%r18
+r19:	.reg	%r19
+r20:	.reg	%r20
+r21:	.reg	%r21
+r22:	.reg	%r22
+r23:	.reg	%r23
+r24:	.reg	%r24
+r25:	.reg	%r25
+r26:	.reg	%r26
+r27:	.reg	%r27
+r28:	.reg	%r28
+r29:	.reg	%r29
+r30:	.reg	%r30
+r31:	.reg	%r31
+
+/* Hardware Space Registers.  */
+sr0:	.reg	%sr0
+sr1:	.reg	%sr1
+sr2:	.reg	%sr2
+sr3:	.reg	%sr3
+sr4:	.reg	%sr4
+sr5:	.reg	%sr5
+sr6:	.reg	%sr6
+sr7:	.reg	%sr7
+
+/* Hardware Floating Point Registers.  */
+fr0:	.reg	%fr0
+fr1:	.reg	%fr1
+fr2:	.reg	%fr2
+fr3:	.reg	%fr3
+fr4:	.reg	%fr4
+fr5:	.reg	%fr5
+fr6:	.reg	%fr6
+fr7:	.reg	%fr7
+fr8:	.reg	%fr8
+fr9:	.reg	%fr9
+fr10:	.reg	%fr10
+fr11:	.reg	%fr11
+fr12:	.reg	%fr12
+fr13:	.reg	%fr13
+fr14:	.reg	%fr14
+fr15:	.reg	%fr15
+
+/* Hardware Control Registers.  */
+cr11:	.reg	%cr11
+sar:	.reg	%cr11	/* Shift Amount Register */
+
+/* Software Architecture General Registers.  */
+rp:	.reg    r2	/* return pointer */
+#ifdef pa64
+mrp:	.reg	r2 	/* millicode return pointer */
+#else
+mrp:	.reg	r31	/* millicode return pointer */
+#endif
+ret0:	.reg    r28	/* return value */
+ret1:	.reg    r29	/* return value (high part of double) */
+sp:	.reg 	r30	/* stack pointer */
+dp:	.reg	r27	/* data pointer */
+arg0:	.reg	r26	/* argument */
+arg1:	.reg	r25	/* argument or high part of double argument */
+arg2:	.reg	r24	/* argument */
+arg3:	.reg	r23	/* argument or high part of double argument */
+
+/* Software Architecture Space Registers.  */
+/* 		sr0	; return link from BLE */
+sret:	.reg	sr1	/* return value */
+sarg:	.reg	sr1	/* argument */
+/* 		sr4	; PC SPACE tracker */
+/* 		sr5	; process private data */
+
+/* Frame Offsets (millicode convention!)  Used when calling other
+   millicode routines.  Stack unwinding is dependent upon these
+   definitions.  */
+r31_slot:	.equ	-20	/* "current RP" slot */
+sr0_slot:	.equ	-16     /* "static link" slot */
+#if defined(pa64)
+mrp_slot:       .equ    -16	/* "current RP" slot */
+psp_slot:       .equ    -8	/* "previous SP" slot */
+#else
+mrp_slot:	.equ	-20     /* "current RP" slot (replacing "r31_slot") */
+#endif
+
+
+#define DEFINE(name,value)name:	.EQU	value
+#define RDEFINE(name,value)name:	.REG	value
+#ifdef milliext
+#define MILLI_BE(lbl)   BE    lbl(sr7,r0)
+#define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
+#define MILLI_BLE(lbl)	BLE   lbl(sr7,r0)
+#define MILLI_BLEN(lbl)	BLE,n lbl(sr7,r0)
+#define MILLIRETN	BE,n  0(sr0,mrp)
+#define MILLIRET	BE    0(sr0,mrp)
+#define MILLI_RETN	BE,n  0(sr0,mrp)
+#define MILLI_RET	BE    0(sr0,mrp)
+#else
+#define MILLI_BE(lbl)	B     lbl
+#define MILLI_BEN(lbl)  B,n   lbl
+#define MILLI_BLE(lbl)	BL    lbl,mrp
+#define MILLI_BLEN(lbl)	BL,n  lbl,mrp
+#define MILLIRETN	BV,n  0(mrp)
+#define MILLIRET	BV    0(mrp)
+#define MILLI_RETN	BV,n  0(mrp)
+#define MILLI_RET	BV    0(mrp)
+#endif
+
+#ifdef __STDC__
+#define CAT(a,b)	a##b
+#else
+#define CAT(a,b)	a/**/b
+#endif
+
+#ifdef ELF
+#define SUBSPA_MILLI	 .section .text
+#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
+#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
+#define ATTR_MILLI
+#define SUBSPA_DATA	 .section .data
+#define ATTR_DATA
+#define GLOBAL		 $global$
+#define GSYM(sym) 	 !sym:
+#define LSYM(sym)	 !CAT(.L,sym:)
+#define LREF(sym)	 CAT(.L,sym)
+
+#else
+
+#ifdef coff
+/* This used to be .milli but since link32 places different named
+   sections in different segments millicode ends up a long ways away
+   from .text (1meg?).  This way they will be a lot closer.
+
+   The SUBSPA_MILLI_* specify locality sets for certain millicode
+   modules in order to ensure that modules that call one another are
+   placed close together. Without locality sets this is unlikely to
+   happen because of the Dynamite linker library search algorithm. We
+   want these modules close together so that short calls always reach
+   (we don't want to require long calls or use long call stubs).  */
+
+#define SUBSPA_MILLI	 .subspa .text
+#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
+#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
+#define ATTR_MILLI	 .attr code,read,execute
+#define SUBSPA_DATA	 .subspa .data
+#define ATTR_DATA	 .attr init_data,read,write
+#define GLOBAL		 _gp
+#else
+#define SUBSPA_MILLI	 .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
+#define SUBSPA_MILLI_DIV SUBSPA_MILLI
+#define SUBSPA_MILLI_MUL SUBSPA_MILLI
+#define ATTR_MILLI
+#define SUBSPA_DATA	 .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
+#define ATTR_DATA
+#define GLOBAL		 $global$
+#endif
+#define SPACE_DATA	 .space $PRIVATE$,spnum=1,sort=16
+
+#define GSYM(sym)	 !sym
+#define LSYM(sym)	 !CAT(L$,sym)
+#define LREF(sym)	 CAT(L$,sym)
+#endif
+
+#ifdef L_dyncall
+	SUBSPA_MILLI
+	ATTR_DATA
+GSYM($$dyncall)
+	.export $$dyncall,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+	bb,>=,n %r22,30,LREF(1)		; branch if not plabel address
+	depi	0,31,2,%r22		; clear the two least significant bits
+	ldw	4(%r22),%r19		; load new LTP value
+	ldw	0(%r22),%r22		; load address of target
+LSYM(1)
+#ifdef LINUX
+	bv	%r0(%r22)		; branch to the real target
+#else
+	ldsid	(%sr0,%r22),%r1		; get the "space ident" selected by r22
+	mtsp	%r1,%sr0		; move that space identifier into sr0
+	be	0(%sr0,%r22)		; branch to the real target
+#endif
+	stw	%r2,-24(%r30)		; save return address into frame marker
+	.exit
+	.procend
+#endif
+
+#ifdef L_divI
+/* ROUTINES:	$$divI, $$divoI
+
+   Single precision divide for signed binary integers.
+
+   The quotient is truncated towards zero.
+   The sign of the quotient is the XOR of the signs of the dividend and
+   divisor.
+   Divide by zero is trapped.
+   Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 ==	divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:
+   .		divisor is zero  (traps with ADDIT,=  0,25,0)
+   .		dividend==-2**31  and divisor==-1 and routine is $$divoI
+   .				 (traps with ADDO  26,25,0)
+   .	Changes memory at the following places:
+   .		NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Branchs to other millicode routines using BE
+   .		$$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
+   .
+   .	For selected divisors, calls a divide by constant routine written by
+   .	Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
+   .
+   .	The only overflow case is -2**31 divided by -1.
+   .	Both routines return -2**31 but only $$divoI traps.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)	/*  r29 */
+RDEFINE(temp1,arg0)
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+	.import $$divI_2,millicode
+	.import $$divI_3,millicode
+	.import $$divI_4,millicode
+	.import $$divI_5,millicode
+	.import $$divI_6,millicode
+	.import $$divI_7,millicode
+	.import $$divI_8,millicode
+	.import $$divI_9,millicode
+	.import $$divI_10,millicode
+	.import $$divI_12,millicode
+	.import $$divI_14,millicode
+	.import $$divI_15,millicode
+	.export $$divI,millicode
+	.export	$$divoI,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$divoI)
+	comib,=,n  -1,arg1,LREF(negative1)	/*  when divisor == -1 */
+GSYM($$divI)
+	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
+	and,<>	arg1,temp,r0		/*  if not, don't use power of 2 divide */
+	addi,>	0,arg1,r0		/*  if divisor > 0, use power of 2 divide */
+	b,n	LREF(neg_denom)
+LSYM(pow2)
+	addi,>=	0,arg0,retreg		/*  if numerator is negative, add the */
+	add	arg0,temp,retreg	/*  (denominaotr -1) to correct for shifts */
+	extru,=	arg1,15,16,temp		/*  test denominator with 0xffff0000 */
+	extrs	retreg,15,16,retreg	/*  retreg = retreg >> 16 */
+	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 16) */
+	ldi	0xcc,temp1		/*  setup 0xcc in temp1 */
+	extru,= arg1,23,8,temp		/*  test denominator with 0xff00 */
+	extrs	retreg,23,24,retreg	/*  retreg = retreg >> 8 */
+	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 8) */
+	ldi	0xaa,temp		/*  setup 0xaa in temp */
+	extru,= arg1,27,4,r0		/*  test denominator with 0xf0 */
+	extrs	retreg,27,28,retreg	/*  retreg = retreg >> 4 */
+	and,=	arg1,temp1,r0		/*  test denominator with 0xcc */
+	extrs	retreg,29,30,retreg	/*  retreg = retreg >> 2 */
+	and,=	arg1,temp,r0		/*  test denominator with 0xaa */
+	extrs	retreg,30,31,retreg	/*  retreg = retreg >> 1 */
+	MILLIRETN
+LSYM(neg_denom)
+	addi,<	0,arg1,r0		/*  if arg1 >= 0, it's not power of 2 */
+	b,n	LREF(regular_seq)
+	sub	r0,arg1,temp		/*  make denominator positive */
+	comb,=,n  arg1,temp,LREF(regular_seq)	/*  test against 0x80000000 and 0 */
+	ldo	-1(temp),retreg		/*  is there at most one bit set ? */
+	and,=	temp,retreg,r0		/*  if so, the denominator is power of 2 */
+	b,n	LREF(regular_seq)
+	sub	r0,arg0,retreg		/*  negate numerator */
+	comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
+	copy	retreg,arg0		/*  set up arg0, arg1 and temp	*/
+	copy	temp,arg1		/*  before branching to pow2 */
+	b	LREF(pow2)
+	ldo	-1(arg1),temp
+LSYM(regular_seq)
+	comib,>>=,n 15,arg1,LREF(small_divisor)
+	add,>=	0,arg0,retreg		/*  move dividend, if retreg < 0, */
+LSYM(normal)
+	subi	0,retreg,retreg		/*    make it positive */
+	sub	0,arg1,temp		/*  clear carry,  */
+					/*    negate the divisor */
+	ds	0,temp,0		/*  set V-bit to the comple- */
+					/*    ment of the divisor sign */
+	add	retreg,retreg,retreg	/*  shift msb bit into carry */
+	ds	r0,arg1,temp		/*  1st divide step, if no carry */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  2nd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  3rd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  4th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  5th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  6th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  7th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  8th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  9th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  10th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  11th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  12th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  13th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  14th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  15th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  16th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  17th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  18th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  19th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  20th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  21st divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  22nd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  23rd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  24th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  25th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  26th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  27th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  28th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  29th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  30th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  31st divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  32nd divide step, */
+	addc	retreg,retreg,retreg	/*  shift last retreg bit into retreg */
+	xor,>=	arg0,arg1,0		/*  get correct sign of quotient */
+	  sub	0,retreg,retreg		/*    based on operand signs */
+	MILLIRETN
+	nop
+
+LSYM(small_divisor)
+
+#if defined(pa64)
+/*  Clear the upper 32 bits of the arg1 register.  We are working with	*/
+/*  small divisors (and 32-bit integers)   We must not be mislead  */
+/*  by "1" bits left in the upper 32 bits.  */
+	depd %r0,31,32,%r25
+#endif
+	blr,n	arg1,r0
+	nop
+/*  table for divisor == 0,1, ... ,15 */
+	addit,=	0,arg1,r0	/*  trap if divisor == 0 */
+	nop
+	MILLIRET		/*  divisor == 1 */
+	copy	arg0,retreg
+	MILLI_BEN($$divI_2)	/*  divisor == 2 */
+	nop
+	MILLI_BEN($$divI_3)	/*  divisor == 3 */
+	nop
+	MILLI_BEN($$divI_4)	/*  divisor == 4 */
+	nop
+	MILLI_BEN($$divI_5)	/*  divisor == 5 */
+	nop
+	MILLI_BEN($$divI_6)	/*  divisor == 6 */
+	nop
+	MILLI_BEN($$divI_7)	/*  divisor == 7 */
+	nop
+	MILLI_BEN($$divI_8)	/*  divisor == 8 */
+	nop
+	MILLI_BEN($$divI_9)	/*  divisor == 9 */
+	nop
+	MILLI_BEN($$divI_10)	/*  divisor == 10 */
+	nop
+	b	LREF(normal)		/*  divisor == 11 */
+	add,>=	0,arg0,retreg
+	MILLI_BEN($$divI_12)	/*  divisor == 12 */
+	nop
+	b	LREF(normal)		/*  divisor == 13 */
+	add,>=	0,arg0,retreg
+	MILLI_BEN($$divI_14)	/*  divisor == 14 */
+	nop
+	MILLI_BEN($$divI_15)	/*  divisor == 15 */
+	nop
+
+LSYM(negative1)
+	sub	0,arg0,retreg	/*  result is negation of dividend */
+	MILLIRET
+	addo	arg0,arg1,r0	/*  trap iff dividend==0x80000000 && divisor==-1 */
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_divU
+/* ROUTINE:	$$divU
+   .
+   .	Single precision divide for unsigned integers.
+   .
+   .	Quotient is truncated towards zero.
+   .	Traps on divide by zero.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 ==	divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:
+   .		divisor is zero
+   .	Changes memory at the following places:
+   .		NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Branchs to other millicode routines using BE:
+   .		$$divU_# for 3,5,6,7,9,10,12,14,15
+   .
+   .	For selected small divisors calls the special divide by constant
+   .	routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)	/* r29 */
+RDEFINE(temp1,arg0)
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+	.export $$divU,millicode
+	.import $$divU_3,millicode
+	.import $$divU_5,millicode
+	.import $$divU_6,millicode
+	.import $$divU_7,millicode
+	.import $$divU_9,millicode
+	.import $$divU_10,millicode
+	.import $$divU_12,millicode
+	.import $$divU_14,millicode
+	.import $$divU_15,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$divU)
+/* The subtract is not nullified since it does no harm and can be used
+   by the two cases that branch back to "normal".  */
+	ldo	-1(arg1),temp		/* is there at most one bit set ? */
+	and,=	arg1,temp,r0		/* if so, denominator is power of 2 */
+	b	LREF(regular_seq)
+	addit,=	0,arg1,0		/* trap for zero dvr */
+	copy	arg0,retreg
+	extru,= arg1,15,16,temp		/* test denominator with 0xffff0000 */
+	extru	retreg,15,16,retreg	/* retreg = retreg >> 16 */
+	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 16) */
+	ldi	0xcc,temp1		/* setup 0xcc in temp1 */
+	extru,= arg1,23,8,temp		/* test denominator with 0xff00 */
+	extru	retreg,23,24,retreg	/* retreg = retreg >> 8 */
+	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 8) */
+	ldi	0xaa,temp		/* setup 0xaa in temp */
+	extru,= arg1,27,4,r0		/* test denominator with 0xf0 */
+	extru	retreg,27,28,retreg	/* retreg = retreg >> 4 */
+	and,=	arg1,temp1,r0		/* test denominator with 0xcc */
+	extru	retreg,29,30,retreg	/* retreg = retreg >> 2 */
+	and,=	arg1,temp,r0		/* test denominator with 0xaa */
+	extru	retreg,30,31,retreg	/* retreg = retreg >> 1 */
+	MILLIRETN
+	nop	
+LSYM(regular_seq)
+	comib,>=  15,arg1,LREF(special_divisor)
+	subi	0,arg1,temp		/* clear carry, negate the divisor */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+LSYM(normal)
+	add	arg0,arg0,retreg	/* shift msb bit into carry */
+	ds	r0,arg1,temp		/* 1st divide step, if no carry */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 2nd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 3rd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 4th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 5th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 6th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 7th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 8th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 9th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 10th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 11th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 12th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 13th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 14th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 15th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 16th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 17th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 18th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 19th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 20th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 21st divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 22nd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 23rd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 24th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 25th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 26th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 27th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 28th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 29th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 30th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 31st divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 32nd divide step, */
+	MILLIRET
+	addc	retreg,retreg,retreg	/* shift last retreg bit into retreg */
+
+/* Handle the cases where divisor is a small constant or has high bit on.  */
+LSYM(special_divisor)
+/*	blr	arg1,r0 */
+/*	comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
+
+/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
+   generating such a blr, comib sequence. A problem in nullification. So I
+   rewrote this code.  */
+
+#if defined(pa64)
+/* Clear the upper 32 bits of the arg1 register.  We are working with
+   small divisors (and 32-bit unsigned integers)   We must not be mislead
+   by "1" bits left in the upper 32 bits.  */
+	depd %r0,31,32,%r25
+#endif
+	comib,>	0,arg1,LREF(big_divisor)
+	nop
+	blr	arg1,r0
+	nop
+
+LSYM(zero_divisor)	/* this label is here to provide external visibility */
+	addit,=	0,arg1,0		/* trap for zero dvr */
+	nop
+	MILLIRET			/* divisor == 1 */
+	copy	arg0,retreg
+	MILLIRET			/* divisor == 2 */
+	extru	arg0,30,31,retreg
+	MILLI_BEN($$divU_3)		/* divisor == 3 */
+	nop
+	MILLIRET			/* divisor == 4 */
+	extru	arg0,29,30,retreg
+	MILLI_BEN($$divU_5)		/* divisor == 5 */
+	nop
+	MILLI_BEN($$divU_6)		/* divisor == 6 */
+	nop
+	MILLI_BEN($$divU_7)		/* divisor == 7 */
+	nop
+	MILLIRET			/* divisor == 8 */
+	extru	arg0,28,29,retreg
+	MILLI_BEN($$divU_9)		/* divisor == 9 */
+	nop
+	MILLI_BEN($$divU_10)		/* divisor == 10 */
+	nop
+	b	LREF(normal)		/* divisor == 11 */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+	MILLI_BEN($$divU_12)		/* divisor == 12 */
+	nop
+	b	LREF(normal)		/* divisor == 13 */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+	MILLI_BEN($$divU_14)		/* divisor == 14 */
+	nop
+	MILLI_BEN($$divU_15)		/* divisor == 15 */
+	nop
+
+/* Handle the case where the high bit is on in the divisor.
+   Compute:	if( dividend>=divisor) quotient=1; else quotient=0;
+   Note:	dividend>==divisor iff dividend-divisor does not borrow
+   and		not borrow iff carry.  */
+LSYM(big_divisor)
+	sub	arg0,arg1,r0
+	MILLIRET
+	addc	r0,r0,retreg
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_remI
+/* ROUTINE:	$$remI
+
+   DESCRIPTION:
+   .	$$remI returns the remainder of the division of two signed 32-bit
+   .	integers.  The sign of the remainder is the same as the sign of
+   .	the dividend.
+
+
+   INPUT REGISTERS:
+   .	arg0 == dividend
+   .	arg1 == divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 = destroyed
+   .	arg1 = destroyed
+   .	ret1 = remainder
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   = undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable
+   .	Does not create a stack frame
+   .	Is usable for internal or external microcode
+
+   DISCUSSION:
+   .	Calls other millicode routines via mrp:  NONE
+   .	Calls other millicode routines:  NONE  */
+
+RDEFINE(tmp,r1)
+RDEFINE(retreg,ret1)
+
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.proc
+	.callinfo millicode
+	.entry
+GSYM($$remI)
+GSYM($$remoI)
+	.export $$remI,MILLICODE
+	.export $$remoI,MILLICODE
+	ldo		-1(arg1),tmp		/*  is there at most one bit set ? */
+	and,<>		arg1,tmp,r0		/*  if not, don't use power of 2 */
+	addi,>		0,arg1,r0		/*  if denominator > 0, use power */
+						/*  of 2 */
+	b,n		LREF(neg_denom)
+LSYM(pow2)
+	comb,>,n	0,arg0,LREF(neg_num)	/*  is numerator < 0 ? */
+	and		arg0,tmp,retreg		/*  get the result */
+	MILLIRETN
+LSYM(neg_num)
+	subi		0,arg0,arg0		/*  negate numerator */
+	and		arg0,tmp,retreg		/*  get the result */
+	subi		0,retreg,retreg		/*  negate result */
+	MILLIRETN
+LSYM(neg_denom)
+	addi,<		0,arg1,r0		/*  if arg1 >= 0, it's not power */
+						/*  of 2 */
+	b,n		LREF(regular_seq)
+	sub		r0,arg1,tmp		/*  make denominator positive */
+	comb,=,n	arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
+	ldo		-1(tmp),retreg		/*  is there at most one bit set ? */
+	and,=		tmp,retreg,r0		/*  if not, go to regular_seq */
+	b,n		LREF(regular_seq)
+	comb,>,n	0,arg0,LREF(neg_num_2)	/*  if arg0 < 0, negate it  */
+	and		arg0,retreg,retreg
+	MILLIRETN
+LSYM(neg_num_2)
+	subi		0,arg0,tmp		/*  test against 0x80000000 */
+	and		tmp,retreg,retreg
+	subi		0,retreg,retreg
+	MILLIRETN
+LSYM(regular_seq)
+	addit,=		0,arg1,0		/*  trap if div by zero */
+	add,>=		0,arg0,retreg		/*  move dividend, if retreg < 0, */
+	sub		0,retreg,retreg		/*    make it positive */
+	sub		0,arg1, tmp		/*  clear carry,  */
+						/*    negate the divisor */
+	ds		0, tmp,0		/*  set V-bit to the comple- */
+						/*    ment of the divisor sign */
+	or		0,0, tmp		/*  clear  tmp */
+	add		retreg,retreg,retreg	/*  shift msb bit into carry */
+	ds		 tmp,arg1, tmp		/*  1st divide step, if no carry */
+						/*    out, msb of quotient = 0 */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+LSYM(t1)
+	ds		 tmp,arg1, tmp		/*  2nd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  3rd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  4th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  5th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  6th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  7th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  8th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  9th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  10th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  11th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  12th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  13th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  14th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  15th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  16th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  17th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  18th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  19th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  20th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  21st divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  22nd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  23rd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  24th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  25th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  26th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  27th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  28th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  29th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  30th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  31st divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  32nd divide step, */
+	addc		retreg,retreg,retreg	/*  shift last bit into retreg */
+	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
+	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */
+	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */
+	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */
+LSYM(finish)
+	add,>=		arg0,0,0		/*  set sign of remainder */
+	sub		0,retreg,retreg		/*    to sign of dividend */
+	MILLIRET
+	nop
+	.exit
+	.procend
+#ifdef milliext
+	.origin 0x00000200
+#endif
+	.end
+#endif
+
+#ifdef L_remU
+/* ROUTINE:	$$remU
+   .	Single precision divide for remainder with unsigned binary integers.
+   .
+   .	The remainder must be dividend-(dividend/divisor)*divisor.
+   .	Divide by zero is trapped.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 == divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	remainder
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Calls other millicode routines using mrp: NONE
+   .	Calls other millicode routines: NONE  */
+
+
+RDEFINE(temp,r1)
+RDEFINE(rmndr,ret1)	/*  r29 */
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.export $$remU,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$remU)
+	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
+	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */
+	b	LREF(regular_seq)
+	addit,=	0,arg1,r0		/*  trap on div by zero */
+	and	arg0,temp,rmndr		/*  get the result for power of 2 */
+	MILLIRETN
+LSYM(regular_seq)
+	comib,>=,n  0,arg1,LREF(special_case)
+	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */
+	ds	r0,rmndr,r0		/*  set V-bit to 1 */
+	add	arg0,arg0,temp		/*  shift msb bit into carry */
+	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  2nd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  3rd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  4th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  5th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  6th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  7th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  8th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  9th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  10th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  11th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  12th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  13th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  14th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  15th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  16th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  17th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  18th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  19th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  20th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  21st divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  22nd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  23rd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  24th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  25th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  26th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  27th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  28th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  29th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  30th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  31st divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  32nd divide step, */
+	comiclr,<= 0,rmndr,r0
+	  add	rmndr,arg1,rmndr	/*  correction */
+	MILLIRETN
+	nop
+
+/* Putting >= on the last DS and deleting COMICLR does not work!  */
+LSYM(special_case)
+	sub,>>=	arg0,arg1,rmndr
+	  copy	arg0,rmndr
+	MILLIRETN
+	nop
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_div_const
+/* ROUTINE:	$$divI_2
+   .		$$divI_3	$$divU_3
+   .		$$divI_4
+   .		$$divI_5	$$divU_5
+   .		$$divI_6	$$divU_6
+   .		$$divI_7	$$divU_7
+   .		$$divI_8
+   .		$$divI_9	$$divU_9
+   .		$$divI_10	$$divU_10
+   .
+   .		$$divI_12	$$divU_12
+   .
+   .		$$divI_14	$$divU_14
+   .		$$divI_15	$$divU_15
+   .		$$divI_16
+   .		$$divI_17	$$divU_17
+   .
+   .	Divide by selected constants for single precision binary integers.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions: NONE
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Calls other millicode routines using mrp:  NONE
+   .	Calls other millicode routines:  NONE  */
+
+
+/* TRUNCATED DIVISION BY SMALL INTEGERS
+
+   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
+   (with y fixed).
+
+   Let a = floor(z/y), for some choice of z.  Note that z will be
+   chosen so that division by z is cheap.
+
+   Let r be the remainder(z/y).  In other words, r = z - ay.
+
+   Now, our method is to choose a value for b such that
+
+   q'(x) = floor((ax+b)/z)
+
+   is equal to q(x) over as large a range of x as possible.  If the
+   two are equal over a sufficiently large range, and if it is easy to
+   form the product (ax), and it is easy to divide by z, then we can
+   perform the division much faster than the general division algorithm.
+
+   So, we want the following to be true:
+
+   .	For x in the following range:
+   .
+   .	    ky <= x < (k+1)y
+   .
+   .	implies that
+   .
+   .	    k <= (ax+b)/z < (k+1)
+
+   We want to determine b such that this is true for all k in the
+   range {0..K} for some maximum K.
+
+   Since (ax+b) is an increasing function of x, we can take each
+   bound separately to determine the "best" value for b.
+
+   (ax+b)/z < (k+1)	       implies
+
+   (a((k+1)y-1)+b < (k+1)z     implies
+
+   b < a + (k+1)(z-ay)	       implies
+
+   b < a + (k+1)r
+
+   This needs to be true for all k in the range {0..K}.  In
+   particular, it is true for k = 0 and this leads to a maximum
+   acceptable value for b.
+
+   b < a+r   or   b <= a+r-1
+
+   Taking the other bound, we have
+
+   k <= (ax+b)/z	       implies
+
+   k <= (aky+b)/z	       implies
+
+   k(z-ay) <= b		       implies
+
+   kr <= b
+
+   Clearly, the largest range for k will be achieved by maximizing b,
+   when r is not zero.	When r is zero, then the simplest choice for b
+   is 0.  When r is not 0, set
+
+   .	b = a+r-1
+
+   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
+   for all x in the range:
+
+   .	0 <= x < (K+1)y
+
+   We need to determine what K is.  Of our two bounds,
+
+   .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.
+
+   The other bound is
+
+   .	kr <= b
+
+   This is always true if r = 0.  If r is not 0 (the usual case), then
+   K = floor((a+r-1)/r), is the maximum value for k.
+
+   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
+   answer for q(x) = floor(x/y) when x is in the range
+
+   (0,(K+1)y-1)	       K = floor((a+r-1)/r)
+
+   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
+   the formula for q'(x) yields the correct value of q(x) for all x
+   representable by a single word in HPPA.
+
+   We are also constrained in that computing the product (ax), adding
+   b, and dividing by z must all be done quickly, otherwise we will be
+   better off going through the general algorithm using the DS
+   instruction, which uses approximately 70 cycles.
+
+   For each y, there is a choice of z which satisfies the constraints
+   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
+   timing constraints for arbitrary y.	It seems that z being equal to
+   a power of 2 or a power of 2 minus 1 is as good as we can do, since
+   it minimizes the time to do division by z.  We want the choice of z
+   to also result in a value for (a) that minimizes the computation of
+   the product (ax).  This is best achieved if (a) has a regular bit
+   pattern (so the multiplication can be done with shifts and adds).
+   The value of (a) also needs to be less than 2**32 so the product is
+   always guaranteed to fit in 2 words.
+
+   In actual practice, the following should be done:
+
+   1) For negative x, you should take the absolute value and remember
+   .  the fact so that the result can be negated.  This obviously does
+   .  not apply in the unsigned case.
+   2) For even y, you should factor out the power of 2 that divides y
+   .  and divide x by it.  You can then proceed by dividing by the
+   .  odd factor of y.
+
+   Here is a table of some odd values of y, and corresponding choices
+   for z which are "good".
+
+    y	  z	  r	 a (hex)     max x (hex)
+
+    3	2**32	  1	55555555      100000001
+    5	2**32	  1	33333333      100000003
+    7  2**24-1	  0	  249249     (infinite)
+    9  2**24-1	  0	  1c71c7     (infinite)
+   11  2**20-1	  0	   1745d     (infinite)
+   13  2**24-1	  0	  13b13b     (infinite)
+   15	2**32	  1	11111111      10000000d
+   17	2**32	  1	 f0f0f0f      10000000f
+
+   If r is 1, then b = a+r-1 = a.  This simplifies the computation
+   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
+   then b = 0 is ok to use which simplifies (ax+b).
+
+   The bit patterns for 55555555, 33333333, and 11111111 are obviously
+   very regular.  The bit patterns for the other values of a above are:
+
+    y	   (hex)	  (binary)
+
+    7	  249249  001001001001001001001001  << regular >>
+    9	  1c71c7  000111000111000111000111  << regular >>
+   11	   1745d  000000010111010001011101  << irregular >>
+   13	  13b13b  000100111011000100111011  << irregular >>
+
+   The bit patterns for (a) corresponding to (y) of 11 and 13 may be
+   too irregular to warrant using this method.
+
+   When z is a power of 2 minus 1, then the division by z is slightly
+   more complicated, involving an iterative solution.
+
+   The code presented here solves division by 1 through 17, except for
+   11 and 13. There are algorithms for both signed and unsigned
+   quantities given.
+
+   TIMINGS (cycles)
+
+   divisor  positive  negative	unsigned
+
+   .   1	2	   2	     2
+   .   2	4	   4	     2
+   .   3       19	  21	    19
+   .   4	4	   4	     2
+   .   5       18	  22	    19
+   .   6       19	  22	    19
+   .   8	4	   4	     2
+   .  10       18	  19	    17
+   .  12       18	  20	    18
+   .  15       16	  18	    16
+   .  16	4	   4	     2
+   .  17       16	  18	    16
+
+   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
+   a loop body is executed until the tentative quotient is 0.  The
+   number of times the loop body is executed varies depending on the
+   dividend, but is never more than two times.	If the dividend is
+   less than the divisor, then the loop body is not executed at all.
+   Each iteration adds 4 cycles to the timings.
+
+   divisor  positive  negative	unsigned
+
+   .   7       19+4n	 20+4n	   20+4n    n = number of iterations
+   .   9       21+4n	 22+4n	   21+4n
+   .  14       21+4n	 22+4n	   20+4n
+
+   To give an idea of how the number of iterations varies, here is a
+   table of dividend versus number of iterations when dividing by 7.
+
+   smallest	 largest       required
+   dividend	dividend      iterations
+
+   .	0	     6		    0
+   .	7	 0x6ffffff	    1
+   0x1000006	0xffffffff	    2
+
+   There is some overlap in the range of numbers requiring 1 and 2
+   iterations.	*/
+
+RDEFINE(t2,r1)
+RDEFINE(x2,arg0)	/*  r26 */
+RDEFINE(t1,arg1)	/*  r25 */
+RDEFINE(x1,ret1)	/*  r29 */
+
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+
+	.proc
+	.callinfo	millicode
+	.entry
+/* NONE of these routines require a stack frame
+   ALL of these routines are unwindable from millicode	*/
+
+GSYM($$divide_by_constant)
+	.export $$divide_by_constant,millicode
+/*  Provides a "nice" label for the code covered by the unwind descriptor
+    for things like gprof.  */
+
+/* DIVISION BY 2 (shift by 1) */
+GSYM($$divI_2)
+	.export		$$divI_2,millicode
+	comclr,>=	arg0,0,0
+	addi		1,arg0,arg0
+	MILLIRET
+	extrs		arg0,30,31,ret1
+
+
+/* DIVISION BY 4 (shift by 2) */
+GSYM($$divI_4)
+	.export		$$divI_4,millicode
+	comclr,>=	arg0,0,0
+	addi		3,arg0,arg0
+	MILLIRET
+	extrs		arg0,29,30,ret1
+
+
+/* DIVISION BY 8 (shift by 3) */
+GSYM($$divI_8)
+	.export		$$divI_8,millicode
+	comclr,>=	arg0,0,0
+	addi		7,arg0,arg0
+	MILLIRET
+	extrs		arg0,28,29,ret1
+
+/* DIVISION BY 16 (shift by 4) */
+GSYM($$divI_16)
+	.export		$$divI_16,millicode
+	comclr,>=	arg0,0,0
+	addi		15,arg0,arg0
+	MILLIRET
+	extrs		arg0,27,28,ret1
+
+/****************************************************************************
+*
+*	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
+*
+*	includes 3,5,15,17 and also 6,10,12
+*
+****************************************************************************/
+
+/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
+
+GSYM($$divI_3)
+	.export		$$divI_3,millicode
+	comb,<,N	x2,0,LREF(neg3)
+
+	addi		1,x2,x2		/* this cannot overflow	*/
+	extru		x2,1,2,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,0,x1
+
+LSYM(neg3)
+	subi		1,x2,x2		/* this cannot overflow	*/
+	extru		x2,1,2,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_3)
+	.export		$$divU_3,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,30,t1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,t1,x1
+
+/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
+
+GSYM($$divI_5)
+	.export		$$divI_5,millicode
+	comb,<,N	x2,0,LREF(neg5)
+
+	addi		3,x2,t1		/* this cannot overflow	*/
+	sh1add		x2,t1,x2	/* multiply by 3 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+LSYM(neg5)
+	sub		0,x2,x2		/* negate x2			*/
+	addi		1,x2,x2		/* this cannot overflow	*/
+	shd		0,x2,31,x1	/* get top bit (can be 1)	*/
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_5)
+	.export		$$divU_5,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,31,t1	/* multiply by 3 to get started */
+	sh1add		x2,x2,x2
+	b		LREF(pos)
+	addc		t1,x1,x1
+
+/* DIVISION BY	6 (shift to divide by 2 then divide by 3) */
+GSYM($$divI_6)
+	.export		$$divI_6,millicode
+	comb,<,N	x2,0,LREF(neg6)
+	extru		x2,30,31,x2	/* divide by 2			*/
+	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/
+	sh2add		x2,t1,x2	/* multiply by 5 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+LSYM(neg6)
+	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,30,31,x2
+	shd		0,x2,30,x1
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_6)
+	.export		$$divU_6,millicode
+	extru		x2,30,31,x2	/* divide by 2 */
+	addi		1,x2,x2		/* cannot carry */
+	shd		0,x2,30,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,0,x1
+
+/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
+GSYM($$divU_10)
+	.export		$$divU_10,millicode
+	extru		x2,30,31,x2	/* divide by 2 */
+	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/
+	sh1add		x2,t1,x2	/* multiply by 3 to get started */
+	addc		0,0,x1
+LSYM(pos)
+	shd		x1,x2,28,t1	/* multiply by 0x11 */
+	shd		x2,0,28,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+LSYM(pos_for_17)
+	shd		x1,x2,24,t1	/* multiply by 0x101 */
+	shd		x2,0,24,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,16,t1	/* multiply by 0x10001 */
+	shd		x2,0,16,t2
+	add		x2,t2,x2
+	MILLIRET
+	addc		x1,t1,x1
+
+GSYM($$divI_10)
+	.export		$$divI_10,millicode
+	comb,<		x2,0,LREF(neg10)
+	copy		0,x1
+	extru		x2,30,31,x2	/* divide by 2 */
+	addib,TR	1,x2,LREF(pos)	/* add 1 (cannot overflow)     */
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+
+LSYM(neg10)
+	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,30,31,x2
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+LSYM(neg)
+	shd		x1,x2,28,t1	/* multiply by 0x11 */
+	shd		x2,0,28,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+LSYM(neg_for_17)
+	shd		x1,x2,24,t1	/* multiply by 0x101 */
+	shd		x2,0,24,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,16,t1	/* multiply by 0x10001 */
+	shd		x2,0,16,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+	MILLIRET
+	sub		0,x1,x1
+
+/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
+GSYM($$divI_12)
+	.export		$$divI_12,millicode
+	comb,<		x2,0,LREF(neg12)
+	copy		0,x1
+	extru		x2,29,30,x2	/* divide by 4			*/
+	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+
+LSYM(neg12)
+	subi		4,x2,x2		/* negate, divide by 4, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,29,30,x2
+	b		LREF(neg)
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+
+GSYM($$divU_12)
+	.export		$$divU_12,millicode
+	extru		x2,29,30,x2	/* divide by 4   */
+	addi		5,x2,t1		/* cannot carry */
+	sh2add		x2,t1,x2	/* multiply by 5 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
+GSYM($$divI_15)
+	.export		$$divI_15,millicode
+	comb,<		x2,0,LREF(neg15)
+	copy		0,x1
+	addib,tr	1,x2,LREF(pos)+4
+	shd		x1,x2,28,t1
+
+LSYM(neg15)
+	b		LREF(neg)
+	subi		1,x2,x2
+
+GSYM($$divU_15)
+	.export		$$divU_15,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	b		LREF(pos)
+	addc		0,0,x1
+
+/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
+GSYM($$divI_17)
+	.export		$$divI_17,millicode
+	comb,<,n	x2,0,LREF(neg17)
+	addi		1,x2,x2		/* this cannot overflow */
+	shd		0,x2,28,t1	/* multiply by 0xf to get started */
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(pos_for_17)
+	subb		t1,0,x1
+
+LSYM(neg17)
+	subi		1,x2,x2		/* this cannot overflow */
+	shd		0,x2,28,t1	/* multiply by 0xf to get started */
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(neg_for_17)
+	subb		t1,0,x1
+
+GSYM($$divU_17)
+	.export		$$divU_17,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,28,t1	/* multiply by 0xf to get started */
+LSYM(u17)
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(pos_for_17)
+	subb		t1,x1,x1
+
+
+/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
+   includes 7,9 and also 14
+
+
+   z = 2**24-1
+   r = z mod x = 0
+
+   so choose b = 0
+
+   Also, in order to divide by z = 2**24-1, we approximate by dividing
+   by (z+1) = 2**24 (which is easy), and then correcting.
+
+   (ax) = (z+1)q' + r
+   .	= zq' + (q'+r)
+
+   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
+   Then the true remainder of (ax)/z is (q'+r).  Repeat the process
+   with this new remainder, adding the tentative quotients together,
+   until a tentative quotient is 0 (and then we are done).  There is
+   one last correction to be done.  It is possible that (q'+r) = z.
+   If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,
+   in fact, we need to add 1 more to the quotient.  Now, it turns
+   out that this happens if and only if the original value x is
+   an exact multiple of y.  So, to avoid a three instruction test at
+   the end, instead use 1 instruction to add 1 to x at the beginning.  */
+
+/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
+GSYM($$divI_7)
+	.export		$$divI_7,millicode
+	comb,<,n	x2,0,LREF(neg7)
+LSYM(7)
+	addi		1,x2,x2		/* cannot overflow */
+	shd		0,x2,29,x1
+	sh3add		x2,x2,x2
+	addc		x1,0,x1
+LSYM(pos7)
+	shd		x1,x2,26,t1
+	shd		x2,0,26,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,20,t1
+	shd		x2,0,20,t2
+	add		x2,t2,x2
+	addc		x1,t1,t1
+
+	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
+
+	copy		0,x1
+	shd,=		t1,x2,24,t1	/* tentative quotient  */
+LSYM(1)
+	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */
+	extru		x2,31,24,x2	/* new remainder (unadjusted) */
+
+	MILLIRETN
+
+LSYM(2)
+	addb,tr		t1,x2,LREF(1)	/* adjust remainder */
+	extru,=		x2,7,8,t1	/* new quotient     */
+
+LSYM(neg7)
+	subi		1,x2,x2		/* negate x2 and add 1 */
+LSYM(8)
+	shd		0,x2,29,x1
+	sh3add		x2,x2,x2
+	addc		x1,0,x1
+
+LSYM(neg7_shift)
+	shd		x1,x2,26,t1
+	shd		x2,0,26,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,20,t1
+	shd		x2,0,20,t2
+	add		x2,t2,x2
+	addc		x1,t1,t1
+
+	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
+
+	copy		0,x1
+	shd,=		t1,x2,24,t1	/* tentative quotient  */
+LSYM(3)
+	addb,tr		t1,x1,LREF(4)	/* add to previous quotient   */
+	extru		x2,31,24,x2	/* new remainder (unadjusted) */
+
+	MILLIRET
+	sub		0,x1,x1		/* negate result    */
+
+LSYM(4)
+	addb,tr		t1,x2,LREF(3)	/* adjust remainder */
+	extru,=		x2,7,8,t1	/* new quotient     */
+
+GSYM($$divU_7)
+	.export		$$divU_7,millicode
+	addi		1,x2,x2		/* can carry */
+	addc		0,0,x1
+	shd		x1,x2,29,t1
+	sh3add		x2,x2,x2
+	b		LREF(pos7)
+	addc		t1,x1,x1
+
+/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
+GSYM($$divI_9)
+	.export		$$divI_9,millicode
+	comb,<,n	x2,0,LREF(neg9)
+	addi		1,x2,x2		/* cannot overflow */
+	shd		0,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(pos7)
+	subb		t1,0,x1
+
+LSYM(neg9)
+	subi		1,x2,x2		/* negate and add 1 */
+	shd		0,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(neg7_shift)
+	subb		t1,0,x1
+
+GSYM($$divU_9)
+	.export		$$divU_9,millicode
+	addi		1,x2,x2		/* can carry */
+	addc		0,0,x1
+	shd		x1,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(pos7)
+	subb		t1,x1,x1
+
+/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
+GSYM($$divI_14)
+	.export		$$divI_14,millicode
+	comb,<,n	x2,0,LREF(neg14)
+GSYM($$divU_14)
+	.export		$$divU_14,millicode
+	b		LREF(7)		/* go to 7 case */
+	extru		x2,30,31,x2	/* divide by 2  */
+
+LSYM(neg14)
+	subi		2,x2,x2		/* negate (and add 2) */
+	b		LREF(8)
+	extru		x2,30,31,x2	/* divide by 2	      */
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_mulI
+/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
+/******************************************************************************
+This routine is used on PA2.0 processors when gcc -mno-fpregs is used
+
+ROUTINE:	$$mulI
+
+
+DESCRIPTION:	
+
+	$$mulI multiplies two single word integers, giving a single 
+	word result.  
+
+
+INPUT REGISTERS:
+
+	arg0 = Operand 1
+	arg1 = Operand 2
+	r31  == return pc
+	sr0  == return space when called externally 
+
+
+OUTPUT REGISTERS:
+
+	arg0 = undefined
+	arg1 = undefined
+	ret1 = result 
+
+OTHER REGISTERS AFFECTED:
+
+	r1   = undefined
+
+SIDE EFFECTS:
+
+	Causes a trap under the following conditions:  NONE
+	Changes memory at the following places:  NONE
+
+PERMISSIBLE CONTEXT:
+
+	Unwindable
+	Does not create a stack frame
+	Is usable for internal or external microcode
+
+DISCUSSION:
+
+	Calls other millicode routines via mrp:  NONE
+	Calls other millicode routines:  NONE
+
+***************************************************************************/
+
+
+#define	a0	%arg0
+#define	a1	%arg1
+#define	t0	%r1
+#define	r	%ret1
+
+#define	a0__128a0	zdep	a0,24,25,a0
+#define	a0__256a0	zdep	a0,23,24,a0
+#define	a1_ne_0_b_l0	comb,<>	a1,0,LREF(l0)
+#define	a1_ne_0_b_l1	comb,<>	a1,0,LREF(l1)
+#define	a1_ne_0_b_l2	comb,<>	a1,0,LREF(l2)
+#define	b_n_ret_t0	b,n	LREF(ret_t0)
+#define	b_e_shift	b	LREF(e_shift)
+#define	b_e_t0ma0	b	LREF(e_t0ma0)
+#define	b_e_t0		b	LREF(e_t0)
+#define	b_e_t0a0	b	LREF(e_t0a0)
+#define	b_e_t02a0	b	LREF(e_t02a0)
+#define	b_e_t04a0	b	LREF(e_t04a0)
+#define	b_e_2t0		b	LREF(e_2t0)
+#define	b_e_2t0a0	b	LREF(e_2t0a0)
+#define	b_e_2t04a0	b	LREF(e2t04a0)
+#define	b_e_3t0		b	LREF(e_3t0)
+#define	b_e_4t0		b	LREF(e_4t0)
+#define	b_e_4t0a0	b	LREF(e_4t0a0)
+#define	b_e_4t08a0	b	LREF(e4t08a0)
+#define	b_e_5t0		b	LREF(e_5t0)
+#define	b_e_8t0		b	LREF(e_8t0)
+#define	b_e_8t0a0	b	LREF(e_8t0a0)
+#define	r__r_a0		add	r,a0,r
+#define	r__r_2a0	sh1add	a0,r,r
+#define	r__r_4a0	sh2add	a0,r,r
+#define	r__r_8a0	sh3add	a0,r,r
+#define	r__r_t0		add	r,t0,r
+#define	r__r_2t0	sh1add	t0,r,r
+#define	r__r_4t0	sh2add	t0,r,r
+#define	r__r_8t0	sh3add	t0,r,r
+#define	t0__3a0		sh1add	a0,a0,t0
+#define	t0__4a0		sh2add	a0,0,t0
+#define	t0__5a0		sh2add	a0,a0,t0
+#define	t0__8a0		sh3add	a0,0,t0
+#define	t0__9a0		sh3add	a0,a0,t0
+#define	t0__16a0	zdep	a0,27,28,t0
+#define	t0__32a0	zdep	a0,26,27,t0
+#define	t0__64a0	zdep	a0,25,26,t0
+#define	t0__128a0	zdep	a0,24,25,t0
+#define	t0__t0ma0	sub	t0,a0,t0
+#define	t0__t0_a0	add	t0,a0,t0
+#define	t0__t0_2a0	sh1add	a0,t0,t0
+#define	t0__t0_4a0	sh2add	a0,t0,t0
+#define	t0__t0_8a0	sh3add	a0,t0,t0
+#define	t0__2t0_a0	sh1add	t0,a0,t0
+#define	t0__3t0		sh1add	t0,t0,t0
+#define	t0__4t0		sh2add	t0,0,t0
+#define	t0__4t0_a0	sh2add	t0,a0,t0
+#define	t0__5t0		sh2add	t0,t0,t0
+#define	t0__8t0		sh3add	t0,0,t0
+#define	t0__8t0_a0	sh3add	t0,a0,t0
+#define	t0__9t0		sh3add	t0,t0,t0
+#define	t0__16t0	zdep	t0,27,28,t0
+#define	t0__32t0	zdep	t0,26,27,t0
+#define	t0__256a0	zdep	a0,23,24,t0
+
+
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.align 16
+	.proc
+	.callinfo millicode
+	.export $$mulI,millicode
+GSYM($$mulI)	
+	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
+	copy		0,r		/* zero out the result */
+	xor		a0,a1,a0	/* swap a0 & a1 using the */
+	xor		a0,a1,a1	/*  old xor trick */
+	xor		a0,a1,a0
+LSYM(l4)
+	combt,<=	0,a0,LREF(l3)		/* if a0>=0 then proceed like unsigned */
+	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
+	sub,>		0,a1,t0		/* otherwise negate both and */
+	combt,<=,n	a0,t0,LREF(l2)	/*  swap back if |a0|<|a1| */
+	sub		0,a0,a1
+	movb,tr,n	t0,a0,LREF(l2)	/* 10th inst.  */
+
+LSYM(l0)	r__r_t0				/* add in this partial product */
+LSYM(l1)	a0__256a0			/* a0 <<= 8 ****************** */
+LSYM(l2)	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
+LSYM(l3)	blr		t0,0		/* case on these 8 bits ****** */
+		extru		a1,23,24,a1	/* a1 >>= 8 ****************** */
+
+/*16 insts before this.  */
+/*			  a0 <<= 8 ************************** */
+LSYM(x0)	a1_ne_0_b_l2	! a0__256a0	! MILLIRETN	! nop
+LSYM(x1)	a1_ne_0_b_l1	! r__r_a0	! MILLIRETN	! nop
+LSYM(x2)	a1_ne_0_b_l1	! r__r_2a0	! MILLIRETN	! nop
+LSYM(x3)	a1_ne_0_b_l0	! t0__3a0	! MILLIRET	! r__r_t0
+LSYM(x4)	a1_ne_0_b_l1	! r__r_4a0	! MILLIRETN	! nop
+LSYM(x5)	a1_ne_0_b_l0	! t0__5a0	! MILLIRET	! r__r_t0
+LSYM(x6)	t0__3a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x7)	t0__3a0		! a1_ne_0_b_l0	! r__r_4a0	! b_n_ret_t0
+LSYM(x8)	a1_ne_0_b_l1	! r__r_8a0	! MILLIRETN	! nop
+LSYM(x9)	a1_ne_0_b_l0	! t0__9a0	! MILLIRET	! r__r_t0
+LSYM(x10)	t0__5a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x11)	t0__3a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
+LSYM(x12)	t0__3a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x13)	t0__5a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
+LSYM(x14)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x15)	t0__5a0		! a1_ne_0_b_l0	! t0__3t0	! b_n_ret_t0
+LSYM(x16)	t0__16a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x17)	t0__9a0		! a1_ne_0_b_l0	! t0__t0_8a0	! b_n_ret_t0
+LSYM(x18)	t0__9a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x19)	t0__9a0		! a1_ne_0_b_l0	! t0__2t0_a0	! b_n_ret_t0
+LSYM(x20)	t0__5a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x21)	t0__5a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x22)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x23)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x24)	t0__3a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x25)	t0__5a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
+LSYM(x26)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x27)	t0__3a0		! a1_ne_0_b_l0	! t0__9t0	! b_n_ret_t0
+LSYM(x28)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x29)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x30)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_2t0
+LSYM(x31)	t0__32a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x32)	t0__32a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x33)	t0__8a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x34)	t0__16a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x35)	t0__9a0		! t0__3t0	! b_e_t0	! t0__t0_8a0
+LSYM(x36)	t0__9a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x37)	t0__9a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x38)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x39)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x40)	t0__5a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x41)	t0__5a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
+LSYM(x42)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x43)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x44)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x45)	t0__9a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
+LSYM(x46)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_a0
+LSYM(x47)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_2a0
+LSYM(x48)	t0__3a0		! a1_ne_0_b_l0	! t0__16t0	! b_n_ret_t0
+LSYM(x49)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_4a0
+LSYM(x50)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_2t0
+LSYM(x51)	t0__9a0		! t0__t0_8a0	! b_e_t0	! t0__3t0
+LSYM(x52)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x53)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x54)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_2t0
+LSYM(x55)	t0__9a0		! t0__3t0	! b_e_t0	! t0__2t0_a0
+LSYM(x56)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x57)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__3t0
+LSYM(x58)	t0__3a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x59)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__3t0
+LSYM(x60)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x61)	t0__5a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x62)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x63)	t0__64a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x64)	t0__64a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x65)	t0__8a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
+LSYM(x66)	t0__32a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x67)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x68)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x69)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x70)	t0__64a0	! t0__t0_4a0	! b_e_t0	! t0__t0_2a0
+LSYM(x71)	t0__9a0		! t0__8t0	! b_e_t0	! t0__t0ma0
+LSYM(x72)	t0__9a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x73)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_t0
+LSYM(x74)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x75)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x76)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x77)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x78)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x79)	t0__16a0	! t0__5t0	! b_e_t0	! t0__t0ma0
+LSYM(x80)	t0__16a0	! t0__5t0	! b_e_shift	! r__r_t0
+LSYM(x81)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_t0
+LSYM(x82)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x83)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x84)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x85)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
+LSYM(x86)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x87)	t0__9a0		! t0__9t0	! b_e_t02a0	! t0__t0_4a0
+LSYM(x88)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x89)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x90)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_2t0
+LSYM(x91)	t0__9a0		! t0__5t0	! b_e_t0	! t0__2t0_a0
+LSYM(x92)	t0__5a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x93)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__3t0
+LSYM(x94)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__t0_2a0
+LSYM(x95)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
+LSYM(x96)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x97)	t0__8a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x98)	t0__32a0	! t0__3t0	! b_e_t0	! t0__t0_2a0
+LSYM(x99)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
+LSYM(x100)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_4t0
+LSYM(x101)	t0__5a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x102)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
+LSYM(x103)	t0__5a0		! t0__5t0	! b_e_t02a0	! t0__4t0_a0
+LSYM(x104)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x105)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x106)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x107)	t0__9a0		! t0__t0_4a0	! b_e_t02a0	! t0__8t0_a0
+LSYM(x108)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x109)	t0__9a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x110)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__2t0_a0
+LSYM(x111)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
+LSYM(x112)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__16t0
+LSYM(x113)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__3t0
+LSYM(x114)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x115)	t0__9a0		! t0__2t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x116)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__4t0_a0
+LSYM(x117)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
+LSYM(x118)	t0__3a0		! t0__4t0_a0	! b_e_t0a0	! t0__9t0
+LSYM(x119)	t0__3a0		! t0__4t0_a0	! b_e_t02a0	! t0__9t0
+LSYM(x120)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x121)	t0__5a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x122)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x123)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x124)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
+LSYM(x125)	t0__5a0		! t0__5t0	! b_e_t0	! t0__5t0
+LSYM(x126)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x127)	t0__128a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x128)	t0__128a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x129)	t0__128a0	! a1_ne_0_b_l0	! t0__t0_a0	! b_n_ret_t0
+LSYM(x130)	t0__64a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x131)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x132)	t0__8a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x133)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x134)	t0__8a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x135)	t0__9a0		! t0__5t0	! b_e_t0	! t0__3t0
+LSYM(x136)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x137)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x138)	t0__8a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x139)	t0__8a0		! t0__2t0_a0	! b_e_2t0a0	! t0__4t0_a0
+LSYM(x140)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__5t0
+LSYM(x141)	t0__8a0		! t0__2t0_a0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x142)	t0__9a0		! t0__8t0	! b_e_2t0	! t0__t0ma0
+LSYM(x143)	t0__16a0	! t0__9t0	! b_e_t0	! t0__t0ma0
+LSYM(x144)	t0__9a0		! t0__8t0	! b_e_shift	! r__r_2t0
+LSYM(x145)	t0__9a0		! t0__8t0	! b_e_t0	! t0__2t0_a0
+LSYM(x146)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x147)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x148)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x149)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x150)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x151)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x152)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x153)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x154)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x155)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__5t0
+LSYM(x156)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x157)	t0__32a0	! t0__t0ma0	! b_e_t02a0	! t0__5t0
+LSYM(x158)	t0__16a0	! t0__5t0	! b_e_2t0	! t0__t0ma0
+LSYM(x159)	t0__32a0	! t0__5t0	! b_e_t0	! t0__t0ma0
+LSYM(x160)	t0__5a0		! t0__4t0	! b_e_shift	! r__r_8t0
+LSYM(x161)	t0__8a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x162)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_2t0
+LSYM(x163)	t0__9a0		! t0__9t0	! b_e_t0	! t0__2t0_a0
+LSYM(x164)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x165)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x166)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x167)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x168)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x169)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x170)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__5t0
+LSYM(x171)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__9t0
+LSYM(x172)	t0__5a0		! t0__4t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x173)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__9t0
+LSYM(x174)	t0__32a0	! t0__t0_2a0	! b_e_t04a0	! t0__5t0
+LSYM(x175)	t0__8a0		! t0__2t0_a0	! b_e_5t0	! t0__2t0_a0
+LSYM(x176)	t0__5a0		! t0__4t0_a0	! b_e_8t0	! t0__t0_a0
+LSYM(x177)	t0__5a0		! t0__4t0_a0	! b_e_8t0a0	! t0__t0_a0
+LSYM(x178)	t0__5a0		! t0__2t0_a0	! b_e_2t0	! t0__8t0_a0
+LSYM(x179)	t0__5a0		! t0__2t0_a0	! b_e_2t0a0	! t0__8t0_a0
+LSYM(x180)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_4t0
+LSYM(x181)	t0__9a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x182)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__2t0_a0
+LSYM(x183)	t0__9a0		! t0__5t0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x184)	t0__5a0		! t0__9t0	! b_e_4t0	! t0__t0_a0
+LSYM(x185)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x186)	t0__32a0	! t0__t0ma0	! b_e_2t0	! t0__3t0
+LSYM(x187)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__5t0
+LSYM(x188)	t0__9a0		! t0__5t0	! b_e_4t0	! t0__t0_2a0
+LSYM(x189)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
+LSYM(x190)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__5t0
+LSYM(x191)	t0__64a0	! t0__3t0	! b_e_t0	! t0__t0ma0
+LSYM(x192)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x193)	t0__8a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x194)	t0__8a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x195)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x196)	t0__8a0		! t0__3t0	! b_e_4t0	! t0__2t0_a0
+LSYM(x197)	t0__8a0		! t0__3t0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x198)	t0__64a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
+LSYM(x199)	t0__8a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x200)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_8t0
+LSYM(x201)	t0__5a0		! t0__5t0	! b_e_t0	! t0__8t0_a0
+LSYM(x202)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x203)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__4t0_a0
+LSYM(x204)	t0__8a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
+LSYM(x205)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__5t0
+LSYM(x206)	t0__64a0	! t0__t0_4a0	! b_e_t02a0	! t0__3t0
+LSYM(x207)	t0__8a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
+LSYM(x208)	t0__5a0		! t0__5t0	! b_e_8t0	! t0__t0_a0
+LSYM(x209)	t0__5a0		! t0__5t0	! b_e_8t0a0	! t0__t0_a0
+LSYM(x210)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__5t0
+LSYM(x211)	t0__5a0		! t0__4t0_a0	! b_e_2t0a0	! t0__5t0
+LSYM(x212)	t0__3a0		! t0__4t0_a0	! b_e_4t0	! t0__4t0_a0
+LSYM(x213)	t0__3a0		! t0__4t0_a0	! b_e_4t0a0	! t0__4t0_a0
+LSYM(x214)	t0__9a0		! t0__t0_4a0	! b_e_2t04a0	! t0__8t0_a0
+LSYM(x215)	t0__5a0		! t0__4t0_a0	! b_e_5t0	! t0__2t0_a0
+LSYM(x216)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x217)	t0__9a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x218)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x219)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x220)	t0__3a0		! t0__9t0	! b_e_4t0	! t0__2t0_a0
+LSYM(x221)	t0__3a0		! t0__9t0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x222)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x223)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x224)	t0__9a0		! t0__3t0	! b_e_8t0	! t0__t0_a0
+LSYM(x225)	t0__9a0		! t0__5t0	! b_e_t0	! t0__5t0
+LSYM(x226)	t0__3a0		! t0__2t0_a0	! b_e_t02a0	! t0__32t0
+LSYM(x227)	t0__9a0		! t0__5t0	! b_e_t02a0	! t0__5t0
+LSYM(x228)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
+LSYM(x229)	t0__9a0		! t0__2t0_a0	! b_e_4t0a0	! t0__3t0
+LSYM(x230)	t0__9a0		! t0__5t0	! b_e_5t0	! t0__t0_a0
+LSYM(x231)	t0__9a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
+LSYM(x232)	t0__3a0		! t0__2t0_a0	! b_e_8t0	! t0__4t0_a0
+LSYM(x233)	t0__3a0		! t0__2t0_a0	! b_e_8t0a0	! t0__4t0_a0
+LSYM(x234)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__9t0
+LSYM(x235)	t0__3a0		! t0__4t0_a0	! b_e_2t0a0	! t0__9t0
+LSYM(x236)	t0__9a0		! t0__2t0_a0	! b_e_4t08a0	! t0__3t0
+LSYM(x237)	t0__16a0	! t0__5t0	! b_e_3t0	! t0__t0ma0
+LSYM(x238)	t0__3a0		! t0__4t0_a0	! b_e_2t04a0	! t0__9t0
+LSYM(x239)	t0__16a0	! t0__5t0	! b_e_t0ma0	! t0__3t0
+LSYM(x240)	t0__9a0		! t0__t0_a0	! b_e_8t0	! t0__3t0
+LSYM(x241)	t0__9a0		! t0__t0_a0	! b_e_8t0a0	! t0__3t0
+LSYM(x242)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__8t0_a0
+LSYM(x243)	t0__9a0		! t0__9t0	! b_e_t0	! t0__3t0
+LSYM(x244)	t0__5a0		! t0__3t0	! b_e_4t0	! t0__4t0_a0
+LSYM(x245)	t0__8a0		! t0__3t0	! b_e_5t0	! t0__2t0_a0
+LSYM(x246)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x247)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x248)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_8t0
+LSYM(x249)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__8t0_a0
+LSYM(x250)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__5t0
+LSYM(x251)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__5t0
+LSYM(x252)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
+LSYM(x253)	t0__64a0	! t0__t0ma0	! b_e_t0	! t0__4t0_a0
+LSYM(x254)	t0__128a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x255)	t0__256a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+/*1040 insts before this.  */
+LSYM(ret_t0)	MILLIRET
+LSYM(e_t0)	r__r_t0
+LSYM(e_shift)	a1_ne_0_b_l2
+	a0__256a0	/* a0 <<= 8 *********** */
+	MILLIRETN
+LSYM(e_t0ma0)	a1_ne_0_b_l0
+	t0__t0ma0
+	MILLIRET
+	r__r_t0
+LSYM(e_t0a0)	a1_ne_0_b_l0
+	t0__t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e_t02a0)	a1_ne_0_b_l0
+	t0__t0_2a0
+	MILLIRET
+	r__r_t0
+LSYM(e_t04a0)	a1_ne_0_b_l0
+	t0__t0_4a0
+	MILLIRET
+	r__r_t0
+LSYM(e_2t0)	a1_ne_0_b_l1
+	r__r_2t0
+	MILLIRETN
+LSYM(e_2t0a0)	a1_ne_0_b_l0
+	t0__2t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e2t04a0)	t0__t0_2a0
+	a1_ne_0_b_l1
+	r__r_2t0
+	MILLIRETN
+LSYM(e_3t0)	a1_ne_0_b_l0
+	t0__3t0
+	MILLIRET
+	r__r_t0
+LSYM(e_4t0)	a1_ne_0_b_l1
+	r__r_4t0
+	MILLIRETN
+LSYM(e_4t0a0)	a1_ne_0_b_l0
+	t0__4t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e4t08a0)	t0__t0_2a0
+	a1_ne_0_b_l1
+	r__r_4t0
+	MILLIRETN
+LSYM(e_5t0)	a1_ne_0_b_l0
+	t0__5t0
+	MILLIRET
+	r__r_t0
+LSYM(e_8t0)	a1_ne_0_b_l1
+	r__r_8t0
+	MILLIRETN
+LSYM(e_8t0a0)	a1_ne_0_b_l0
+	t0__8t0_a0
+	MILLIRET
+	r__r_t0
+
+	.procend
+	.end
+#endif
diff --git a/gcc/config/pa/pa-64.h b/gcc/config/pa/pa-64.h
new file mode 100644
index 000000000..67c8179c5
--- /dev/null
+++ b/gcc/config/pa/pa-64.h
@@ -0,0 +1,100 @@
+/* Definitions of target machine for GNU compiler, for HPs using the
+   64bit runtime model.
+   Copyright (C) 1999, 2000, 2003, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The default sizes for basic datatypes provided by GCC are not
+   correct for the PA64 runtime architecture.
+
+   In PA64, basic types have the following sizes
+
+     char	1 byte
+     short	2 bytes
+     int	4 bytes
+     long	8 bytes
+     long long	8 bytes
+     pointer	8 bytes
+     float	4 bytes
+     double	8 bytes
+     long double 16 bytes
+     size_t	8 bytes
+     ptrdiff_t	8 bytes
+     wchar	4 bytes
+     
+  Make GCC agree with types.h.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* If it is not listed here, then the default selected by GCC is OK.  */
+#undef SHORT_TYPE_SIZE
+#define SHORT_TYPE_SIZE 16
+#undef INT_TYPE_SIZE
+#define INT_TYPE_SIZE 32
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 64
+#undef LONG_LONG_TYPE_SIZE
+#define LONG_LONG_TYPE_SIZE 64
+#undef FLOAT_TYPE_SIZE
+#define FLOAT_TYPE_SIZE 32
+#undef DOUBLE_TYPE_SIZE
+#define DOUBLE_TYPE_SIZE 64
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+/* Temporary until we figure out what to do with those *(&@$ 32bit
+   relocs which appear in stabs.  */
+#undef DBX_DEBUGGING_INFO
+
+/* ?!? This needs to be made compile-time selectable.
+
+   The PA64 runtime model has arguments that grow to higher addresses
+   (like most other targets).  The older runtime model has arguments
+   that grow to lower addresses.  What fun.  */
+#undef ARGS_GROW_DOWNWARD
+
+/* If defined, a C expression which determines whether the default
+   implementation of va_arg will attempt to pad down before reading the
+   next argument, if that argument is smaller than its aligned space as
+   controlled by PARM_BOUNDARY.  If this macro is not defined, all such
+   arguments are padded down when BYTES_BIG_ENDIAN is true.  We don't
+   want aggregates padded down.  */
+
+#define PAD_VARARGS_DOWN (!AGGREGATE_TYPE_P (type))
+
+/* In the PA architecture, it is not possible to directly move data
+   between GENERAL_REGS and FP_REGS.  On the 32-bit port, we use the
+   location at SP-16 because PA 1.X only supports 5-bit immediates for
+   floating-point loads and stores.  We don't expose this location in
+   the RTL to avoid scheduling related problems.  For example, the
+   store and load could be separated by a call to a pure or const
+   function which has no frame and this function might also use SP-16.
+   We have 14-bit immediates on the 64-bit port, so we use secondary
+   memory for the copies.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+  (MAYBE_FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)		\
+   || MAYBE_FP_REG_CLASS_P (CLASS2) != FP_REG_CLASS_P (CLASS1))
+
diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h
new file mode 100644
index 000000000..f167e133e
--- /dev/null
+++ b/gcc/config/pa/pa-hpux.h
@@ -0,0 +1,119 @@
+/* Definitions of target machine for GNU compiler, for HP-UX.
+   Copyright (C) 1991, 1995, 1996, 2002, 2003, 2004, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX UNIX features.  */
+#undef TARGET_HPUX
+#define TARGET_HPUX 1
+
+#undef HPUX_LONG_DOUBLE_LIBRARY
+#define HPUX_LONG_DOUBLE_LIBRARY 1
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_BIG_SWITCH
+
+/* Make GCC agree with types.h.  */
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+
+#define SIZE_TYPE "unsigned int"
+#define PTRDIFF_TYPE "int"
+
+#define LONG_DOUBLE_TYPE_SIZE 128
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode)
+
+/* GCC always defines __STDC__.  HP C++ compilers don't define it.  This
+   causes trouble when sys/stdsyms.h is included.  As a work around,
+   we define __STDC_EXT__.  A similar situation exists with respect to
+   the definition of __cplusplus.  We define _INCLUDE_LONGLONG
+   to prevent nlist.h from defining __STDC_32_MODE__ (no longlong
+   support).  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	builtin_assert ("system=hpux");				\
+	builtin_assert ("system=unix");				\
+	builtin_define ("__hp9000s800");			\
+	builtin_define ("__hp9000s800__");			\
+	builtin_define ("__hp9k8");				\
+	builtin_define ("__hp9k8__");				\
+	builtin_define ("__hpux");				\
+	builtin_define ("__hpux__");				\
+	builtin_define ("__unix");				\
+	builtin_define ("__unix__");				\
+	builtin_define ("__STDC_EXT__");			\
+	if (c_dialect_cxx ())					\
+	  {							\
+	    builtin_define ("_HPUX_SOURCE");			\
+	    builtin_define ("_INCLUDE_LONGLONG");		\
+	  }							\
+	else if (!flag_iso)					\
+	  {							\
+	    builtin_define ("_HPUX_SOURCE");			\
+	    if (preprocessing_trad_p ())			\
+	      {							\
+		builtin_define ("hp9000s800");			\
+		builtin_define ("hp9k8");			\
+		builtin_define ("hppa");			\
+		builtin_define ("hpux");			\
+		builtin_define ("unix");			\
+		builtin_define ("__CLASSIC_C__");		\
+		builtin_define ("_PWB");			\
+		builtin_define ("PWB");				\
+	      }							\
+	  }							\
+	if (TARGET_SIO)						\
+	  builtin_define ("_SIO");				\
+	else							\
+	  {							\
+	    builtin_define ("__hp9000s700");			\
+	    builtin_define ("__hp9000s700__");			\
+	    builtin_define ("_WSIO");				\
+	  }							\
+    }								\
+  while (0)
+
+/* Like the default, except no -lg.  */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!p:%{!pg:-lc}}%{p: -L/lib/libp/ -lc}%{pg: -L/lib/libp/ -lc}}"
+
+#undef LINK_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_PA_11)
+#define LINK_SPEC \
+  "%{!mpa-risc-1-0:%{!march=1.0:%{static:-L/lib/pa1.1 -L/usr/lib/pa1.1 }}}%{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{g*:-a archive} %{shared:-b}"
+#else
+#define LINK_SPEC \
+  "%{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{g*:-a archive} %{shared:-b}"
+#endif
+
+/* hpux8 and later have C++ compatible include files, so do not
+   pretend they are `extern "C"'.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* hpux11 and earlier don't have fputc_unlocked, so we must inhibit the
+   transformation of fputs_unlocked and fprintf_unlocked to fputc_unlocked.  */
+#define DONT_HAVE_FPUTC_UNLOCKED
+
+/* We want the entry value of SP saved in the frame marker for
+   compatibility with the HP-UX unwind library.  */
+#undef TARGET_HPUX_UNWIND_LIBRARY
+#define TARGET_HPUX_UNWIND_LIBRARY 1
+
+#define MD_UNWIND_SUPPORT "config/pa/hpux-unwind.h"
diff --git a/gcc/config/pa/pa-hpux.opt b/gcc/config/pa/pa-hpux.opt
new file mode 100644
index 000000000..eaed8be2d
--- /dev/null
+++ b/gcc/config/pa/pa-hpux.opt
@@ -0,0 +1,37 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+msio
+Target RejectNegative Mask(SIO) MaskExists
+Generate cpp defines for server IO
+
+munix=93
+Target RejectNegative
+Specify UNIX standard for predefines and linking
+
+mwsio
+Target RejectNegative InverseMask(SIO)
+Generate cpp defines for workstation IO
+
+nolibdld
+Driver
+
+rdynamic
+Driver
diff --git a/gcc/config/pa/pa-hpux10.h b/gcc/config/pa/pa-hpux10.h
new file mode 100644
index 000000000..bfe09f247
--- /dev/null
+++ b/gcc/config/pa/pa-hpux10.h
@@ -0,0 +1,144 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
+   2007, 2008, 2010 Free Software Foundation, Inc.
+   Contributed by Tim Moore (moore@defmacro.cs.utah.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* GCC always defines __STDC__.  HP C++ compilers don't define it.  This
+   causes trouble when sys/stdsyms.h is included.  As a work around,
+   we define __STDC_EXT__.  A similar situation exists with respect to
+   the definition of __cplusplus.  We define _INCLUDE_LONGLONG
+   to prevent nlist.h from defining __STDC_32_MODE__ (no longlong
+   support).  We define __STDCPP__ to get certain system headers
+   (notably assert.h) to assume standard preprocessor behavior in C++.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_assert ("system=hpux");					\
+	builtin_assert ("system=unix");					\
+	builtin_define ("__hp9000s800");				\
+	builtin_define ("__hp9000s800__");				\
+	builtin_define ("__hpux");					\
+	builtin_define ("__hpux__");					\
+	builtin_define ("__unix");					\
+	builtin_define ("__unix__");					\
+	builtin_define ("__STDC_EXT__");				\
+	if (c_dialect_cxx ())						\
+	  {								\
+	    builtin_define ("_HPUX_SOURCE");				\
+	    builtin_define ("_REENTRANT");				\
+	    builtin_define ("_INCLUDE_LONGLONG");			\
+	    builtin_define ("__STDCPP__");				\
+	  }								\
+	else if (!flag_iso)						\
+	  {								\
+	    builtin_define ("_HPUX_SOURCE");				\
+	    builtin_define ("_REENTRANT");				\
+	    if (preprocessing_trad_p ())				\
+	      {								\
+		builtin_define ("hp9000s800");				\
+		builtin_define ("hppa");				\
+		builtin_define ("hpux");				\
+		builtin_define ("unix");				\
+		builtin_define ("__CLASSIC_C__");			\
+		builtin_define ("_PWB");				\
+		builtin_define ("PWB");					\
+	      }								\
+	  }								\
+	if (flag_pa_unix >= 1995)					\
+	  {								\
+	    builtin_define ("_XOPEN_UNIX");				\
+	    builtin_define ("_XOPEN_SOURCE_EXTENDED");			\
+	  }								\
+	if (TARGET_SIO)							\
+	  builtin_define ("_SIO");					\
+	else								\
+	  {								\
+	    builtin_define ("__hp9000s700");				\
+	    builtin_define ("__hp9000s700__");				\
+	    builtin_define ("_WSIO");					\
+	  }								\
+    }									\
+  while (0)
+
+#define CPP_SPEC "%{threads: -D_REENTRANT -D_DCE_THREADS}"
+
+/* We can debug dynamically linked executables on hpux9; we also want
+   dereferencing of a NULL pointer to cause a SEGV.  */
+#undef LINK_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_PA_11)
+#define LINK_SPEC \
+  "%{!mpa-risc-1-0:%{!march=1.0:%{static:-L/lib/pa1.1 -L/usr/lib/pa1.1 }}}\
+   %{!shared:%{p:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   -z %{mlinker-opt:-O} %{!shared:-u main}\
+   %{static:-a archive} %{shared:-b}"
+#else
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   -z %{mlinker-opt:-O} %{!shared:-u main}\
+   %{static:-a archive} %{shared:-b}"
+#endif
+
+/* Like the default, except no -lg.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:%{!pg:\
+       %{!threads:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
+       %{threads:-lcma -lc}}}\
+     %{p:%{!pg:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
+     %{pg:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}"
+
+#undef THREAD_MODEL_SPEC
+#define THREAD_MODEL_SPEC "%{!threads:single}%{threads:dce}"
+
+/* Under hpux10, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+#endif
+
+/* Under hpux10, the normal location of the various *crt*.o files is
+   the /usr/ccs/lib directory.  However, the profiling files are in
+   /opt/langtools/lib.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/"
+#endif
+
+/* hpux10 has the new HP assembler.  It's still lousy, but it's a whole lot
+   better than the assembler shipped with older versions of hpux.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
diff --git a/gcc/config/pa/pa-hpux10.opt b/gcc/config/pa/pa-hpux10.opt
new file mode 100644
index 000000000..59056deba
--- /dev/null
+++ b/gcc/config/pa/pa-hpux10.opt
@@ -0,0 +1,22 @@
+; Options specific to HP-UX 10.
+
+; Copyright (C) 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+threads
+Driver
diff --git a/gcc/config/pa/pa-hpux1010.h b/gcc/config/pa/pa-hpux1010.h
new file mode 100644
index 000000000..dfda771fa
--- /dev/null
+++ b/gcc/config/pa/pa-hpux1010.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX 10.10 UNIX 95 features.  */
+#undef TARGET_HPUX_10_10
+#define TARGET_HPUX_10_10 1
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{!munix=93:unix95%O%s}}"
diff --git a/gcc/config/pa/pa-hpux1010.opt b/gcc/config/pa/pa-hpux1010.opt
new file mode 100644
index 000000000..f409e8404
--- /dev/null
+++ b/gcc/config/pa/pa-hpux1010.opt
@@ -0,0 +1,23 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+munix=95
+Target RejectNegative
+Specify UNIX standard for predefines and linking
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
new file mode 100644
index 000000000..81dfdf3d1
--- /dev/null
+++ b/gcc/config/pa/pa-hpux11.h
@@ -0,0 +1,189 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* GCC always defines __STDC__.  HP C++ compilers don't define it.  This
+   causes trouble when sys/stdsyms.h is included.  As a work around,
+   we define __STDC_EXT__.  A similar situation exists with respect to
+   the definition of __cplusplus.  We define _INCLUDE_LONGLONG
+   to prevent nlist.h from defining __STDC_32_MODE__ (no longlong
+   support).  We define __STDCPP__ to get certain system headers
+   (notably assert.h) to assume standard preprocessor behavior in C++.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	builtin_assert ("system=hpux");					\
+	builtin_assert ("system=unix");					\
+	builtin_define ("__hp9000s800");				\
+	builtin_define ("__hp9000s800__");				\
+	builtin_define ("__hpux");					\
+	builtin_define ("__hpux__");					\
+	builtin_define ("__unix");					\
+	builtin_define ("__unix__");					\
+	builtin_define ("__STDC_EXT__");				\
+	if (c_dialect_cxx ())						\
+	  {								\
+	    builtin_define ("_HPUX_SOURCE");				\
+	    builtin_define ("_INCLUDE_LONGLONG");			\
+	    builtin_define ("__STDCPP__");				\
+	  }								\
+	else								\
+	  {								\
+	    if (!flag_iso)						\
+	      {								\
+		builtin_define ("_HPUX_SOURCE");			\
+		if (preprocessing_trad_p ())				\
+		  {							\
+		    builtin_define ("hp9000s800");			\
+		    builtin_define ("hppa");				\
+		    builtin_define ("hpux");				\
+		    builtin_define ("unix");				\
+		    builtin_define ("__CLASSIC_C__");			\
+		    builtin_define ("_PWB");				\
+		    builtin_define ("PWB");				\
+		  }							\
+	      }								\
+	  }								\
+	if (!TARGET_64BIT)						\
+	  builtin_define ("_ILP32");					\
+	if (flag_pa_unix >= 1995 && !flag_iso)				\
+	  {								\
+	    builtin_define ("_XOPEN_UNIX");				\
+	    builtin_define ("_XOPEN_SOURCE_EXTENDED");			\
+	  }								\
+	if (TARGET_HPUX_11_11)						\
+	  {								\
+	    if (flag_pa_unix >= 1998)					\
+	      {								\
+		if (flag_isoc94 || flag_isoc99 || c_dialect_cxx()	\
+		    || !flag_iso)					\
+		  builtin_define ("_INCLUDE__STDC_A1_SOURCE");		\
+		if (!flag_iso)						\
+		  builtin_define ("_INCLUDE_XOPEN_SOURCE_500");		\
+	      }								\
+	    else if (flag_isoc94 || flag_isoc99 || c_dialect_cxx ())	\
+	      warning (0, "-munix=98 option required for C89 "		\
+		       "Amendment 1 features.\n");			\
+	  }								\
+	if (TARGET_SIO)							\
+	  builtin_define ("_SIO");					\
+	else								\
+	  {								\
+	    builtin_define ("__hp9000s700");				\
+	    builtin_define ("__hp9000s700__");				\
+	    builtin_define ("_WSIO");					\
+	  }								\
+    }									\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+  "%{mt|pthread:-D_REENTRANT -D_THREAD_SAFE -D_POSIX_C_SOURCE=199506L}"
+/* aCC defines also -DRWSTD_MULTI_THREAD, -DRW_MULTI_THREAD.  These
+   affect only aCC's C++ library (Rogue Wave-derived) which we do not
+   use, and they violate the user's name space.  */
+
+/* We can debug dynamically linked executables on hpux11; we also
+   want dereferencing of a NULL pointer to cause a SEGV.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/libp -L/usr/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/libp -L/usr/lib/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   -z %{mlinker-opt:-O} %{!shared:-u main -u __gcc_plt_call}\
+   %{static:-a archive} %{shared:-b}"
+
+/* HP-UX 11 has posix threads.  HP's shared libc contains pthread stubs
+   so that non-threaded applications can be linked with a thread-safe
+   libc without a subsequent loss of performance.  For more details,
+   see <http://docs.hp.com/en/1896/pthreads.html>.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared:\
+     %{fopenmp:%{static:-a archive_shared} -lrt %{static:-a archive}}\
+     %{mt|pthread:-lpthread} -lc\
+     %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}\
+       %{!mt:%{!pthread:-a shared -lc -a archive}}}}\
+   %{shared:%{mt|pthread:-lpthread}}"
+
+/* The libgcc_stub.a library needs to come last.  */
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%G %L %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}}}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{!munix=93:unix95%O%s}}"
+
+/* Under hpux11, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+#endif
+
+/* Under hpux11 the normal location of the various *crt*.o files is
+   the /usr/ccs/lib directory.  However, the profiling files are in
+   /opt/langtools/lib.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/"
+#endif
+
+/* hpux11 has the new HP assembler.  It's still lousy, but it's a whole lot
+   better than the assembler shipped with older versions of hpux.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
+
+/* Make GCC agree with types.h.  */
+#undef SIZE_TYPE
+#undef PTRDIFF_TYPE
+
+#define SIZE_TYPE "long unsigned int"
+#define PTRDIFF_TYPE "long int"
+
+/* HP-UX 11.0 and above provides initialization and finalization function
+   support from linker command line.  We don't need to invoke __main to run
+   constructors.  We also don't need chatr to determine the dependencies of
+   dynamically linked executables and shared libraries.  */
+#undef LDD_SUFFIX
+#undef PARSE_LDD_OUTPUT
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION 1
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "+init"
+#undef LD_FINI_SWITCH
+#define LD_FINI_SWITCH "+fini"
+
+/* The HP-UX 11.X SOM linker (ld32) can successfully link shared libraries
+   with secondary definition (weak) symbols.  */
+#undef TARGET_SOM_SDEF
+#define TARGET_SOM_SDEF 1
+
+#undef TARGET_HPUX_11
+#define TARGET_HPUX_11 1
diff --git a/gcc/config/pa/pa-hpux1111.h b/gcc/config/pa/pa-hpux1111.h
new file mode 100644
index 000000000..e47d08c7e
--- /dev/null
+++ b/gcc/config/pa/pa-hpux1111.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for GNU compiler, for HP PA-RISC
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* HP-UX 11i multibyte and UNIX 98 extensions.  */
+#undef TARGET_HPUX_11_11
+#define TARGET_HPUX_11_11 1
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
+     %{munix=95:unix95%O%s}%{!munix=93:%{!munix=95:unix98%O%s}}}"
diff --git a/gcc/config/pa/pa-hpux1111.opt b/gcc/config/pa/pa-hpux1111.opt
new file mode 100644
index 000000000..b59f64a15
--- /dev/null
+++ b/gcc/config/pa/pa-hpux1111.opt
@@ -0,0 +1,23 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+munix=98
+Target RejectNegative
+Specify UNIX standard for predefines and linking
diff --git a/gcc/config/pa/pa-linux.h b/gcc/config/pa/pa-linux.h
new file mode 100644
index 000000000..64626e6fc
--- /dev/null
+++ b/gcc/config/pa/pa-linux.h
@@ -0,0 +1,138 @@
+/* Definitions for PA_RISC with ELF format
+   Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
+   2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	LINUX_TARGET_OS_CPP_BUILTINS();		\
+	builtin_assert ("machine=bigendian");	\
+    }						\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+  ""
+
+/* Define this for shared library support because it isn't in the main
+   linux.h file.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+/* glibc's profiling functions don't need gcc to allocate counters.  */
+#define NO_DEFERRED_PROFILE_COUNTERS 1
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP   "\t.stringz\t"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+#define TARGET_ASM_FILE_START pa_linux_file_start
+
+/* We want local labels to start with period if made with asm_fprintf.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Define these to generate the Linux/ELF/SysV style of internal
+   labels all the time - i.e. to be compatible with
+   ASM_GENERATE_INTERNAL_LABEL in <elfos.h>.  Compare these with the
+   ones in pa.h and note the lack of dollar signs in these.  FIXME:
+   shouldn't we fix pa.h to use ASM_GENERATE_INTERNAL_LABEL instead? */
+
+#undef ASM_OUTPUT_ADDR_VEC_ELT
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  if (TARGET_BIG_SWITCH)					\
+    fprintf (FILE, "\t.word .L%d\n", VALUE);			\
+  else								\
+    fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE)
+
+#undef ASM_OUTPUT_ADDR_DIFF_ELT
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  if (TARGET_BIG_SWITCH)					\
+    fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL);		\
+  else								\
+    fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE)
+
+/* Use the default.  */
+#undef ASM_OUTPUT_LABEL
+
+/* NOTE: (*targetm.asm_out.internal_label)() is defined for us by elfos.h, and
+   does what we want (i.e. uses colons).  It must be compatible with
+   ASM_GENERATE_INTERNAL_LABEL(), so do not define it here.  */
+
+/* Use the default.  */
+#undef ASM_OUTPUT_INTERNAL_LABEL
+
+/* Use the default.  */
+#undef TARGET_ASM_GLOBALIZE_LABEL
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".globl "
+
+/* FIXME: Hacked from the <elfos.h> one so that we avoid multiple
+   labels in a function declaration (since pa.c seems determined to do
+   it differently)  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+    }								\
+  while (0)
+
+/* As well as globalizing the label, we need to encode the label
+   to ensure a plabel is generated in an indirect call.  */
+
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)  		\
+  do								\
+    {								\
+      if (!FUNCTION_NAME_P (XSTR (FUN, 0)))			\
+	hppa_encode_label (FUN);				\
+      (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0));	\
+    }								\
+  while (0)
+
+/* Linux always uses gas.  */
+#undef TARGET_GAS
+#define TARGET_GAS 1
diff --git a/gcc/config/pa/pa-modes.def b/gcc/config/pa/pa-modes.def
new file mode 100644
index 000000000..6a2368c7a
--- /dev/null
+++ b/gcc/config/pa/pa-modes.def
@@ -0,0 +1,32 @@
+/* Definitions of target machine for GNU compiler, for the HP Spectrum.
+   Copyright (C) 2002, 2003, 2006, 2007 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support
+   and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for
+   Software Science at the University of Utah.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* PA-RISC has the same reversed quiet bit as MIPS.
+   ??? Why is this called the MIPS format.  */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+
+/* TFmode: IEEE quad floating point (software).  */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
+/* HPPA floating comparisons produce distinct condition codes.  */
+CC_MODE (CCFP);
diff --git a/gcc/config/pa/pa-protos.h b/gcc/config/pa/pa-protos.h
new file mode 100644
index 000000000..53dcda009
--- /dev/null
+++ b/gcc/config/pa/pa-protos.h
@@ -0,0 +1,172 @@
+/* Prototypes for pa.c functions used in the md file & elsewhere.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2010
+   Free Software Foundation,
+   Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef RTX_CODE
+/* Prototype function used in various macros.  */
+extern int symbolic_operand (rtx, enum machine_mode);
+extern int tls_symbolic_operand (rtx);
+extern rtx pa_eh_return_handler_rtx (void);
+
+/* Used in insn-*.c.  */
+extern int following_call (rtx);
+extern int function_label_operand (rtx, enum machine_mode);
+extern int lhs_lshift_cint_operand (rtx, enum machine_mode);
+
+/* Define functions in pa.c and used in insn-output.c.  */
+
+extern const char *output_and (rtx *);
+extern const char *output_ior (rtx *);
+extern const char *output_move_double (rtx *);
+extern const char *output_fp_move_double (rtx *);
+extern const char *output_block_move (rtx *, int);
+extern const char *output_block_clear (rtx *, int);
+extern const char *output_cbranch (rtx *, int, rtx);
+extern const char *output_lbranch (rtx, rtx, int);
+extern const char *output_bb (rtx *, int, rtx, int);
+extern const char *output_bvb (rtx *, int, rtx, int);
+extern const char *output_dbra (rtx *, rtx, int);
+extern const char *output_movb (rtx *, rtx, int, int);
+extern const char *output_parallel_movb (rtx *, rtx);
+extern const char *output_parallel_addb (rtx *, rtx);
+extern const char *output_call (rtx, rtx, int);
+extern const char *output_indirect_call (rtx, rtx);
+extern const char *output_millicode_call (rtx, rtx);
+extern const char *output_mul_insn (int, rtx);
+extern const char *output_div_insn (rtx *, int, rtx);
+extern const char *output_mod_insn (int, rtx);
+extern const char *singlemove_string (rtx *);
+extern void output_arg_descriptor (rtx);
+extern void output_global_address (FILE *, rtx, int);
+extern void print_operand (FILE *, rtx, int);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern void hppa_encode_label (rtx);
+extern int arith11_operand (rtx, enum machine_mode);
+extern int adddi3_operand (rtx, enum machine_mode);
+extern int indexed_memory_operand (rtx, enum machine_mode);
+extern int symbolic_expression_p (rtx);
+extern int symbolic_memory_operand (rtx, enum machine_mode);
+extern bool pa_tls_referenced_p (rtx);
+extern int pa_adjust_insn_length (rtx, int);
+extern int int11_operand (rtx, enum machine_mode);
+extern int reg_or_cint_move_operand (rtx, enum machine_mode);
+extern int arith5_operand (rtx, enum machine_mode);
+extern int uint5_operand (rtx, enum machine_mode);
+extern int pic_label_operand (rtx, enum machine_mode);
+extern int plus_xor_ior_operator (rtx, enum machine_mode);
+extern int borx_reg_operand (rtx, enum machine_mode);
+extern int shadd_operand (rtx, enum machine_mode);
+extern int arith_operand (rtx, enum machine_mode);
+extern int read_only_operand (rtx, enum machine_mode);
+extern int move_dest_operand (rtx, enum machine_mode);
+extern int move_src_operand (rtx, enum machine_mode);
+extern int prefetch_cc_operand (rtx, enum machine_mode);
+extern int prefetch_nocc_operand (rtx, enum machine_mode);
+extern int and_operand (rtx, enum machine_mode);
+extern int arith32_operand (rtx, enum machine_mode);
+extern int uint32_operand (rtx, enum machine_mode);
+extern int reg_before_reload_operand (rtx, enum machine_mode);
+extern int reg_or_0_operand (rtx, enum machine_mode);
+extern int reg_or_0_or_nonsymb_mem_operand (rtx, enum machine_mode);
+extern int pre_cint_operand (rtx, enum machine_mode);
+extern int post_cint_operand (rtx, enum machine_mode);
+extern int div_operand (rtx, enum machine_mode);
+extern int int5_operand (rtx, enum machine_mode);
+extern int movb_comparison_operator (rtx, enum machine_mode);
+extern int ireg_or_int5_operand (rtx, enum machine_mode);
+extern int fmpyaddoperands (rtx *);
+extern int fmpysuboperands (rtx *);
+extern int call_operand_address (rtx, enum machine_mode);
+extern void emit_bcond_fp (rtx[]);
+extern int emit_move_sequence (rtx *, enum machine_mode, rtx);
+extern int emit_hpdiv_const (rtx *, int);
+extern int is_function_label_plus_const (rtx);
+extern int jump_in_call_delay (rtx);
+extern int hppa_fpstore_bypass_p (rtx, rtx);
+extern int attr_length_millicode_call (rtx);
+extern int attr_length_call (rtx, int);
+extern int attr_length_indirect_call (rtx);
+extern int attr_length_save_restore_dltp (rtx);
+
+/* Declare functions defined in pa.c and used in templates.  */
+
+extern struct rtx_def *return_addr_rtx (int, rtx);
+
+extern int fp_reg_operand (rtx, enum machine_mode);
+extern int arith_double_operand (rtx, enum machine_mode);
+extern int ireg_operand (rtx, enum machine_mode);
+extern int lhs_lshift_operand (rtx, enum machine_mode);
+extern int pc_or_label_operand (rtx, enum machine_mode);
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction' */
+#ifdef TREE_CODE
+extern enum direction function_arg_padding (enum machine_mode, const_tree);
+#endif
+#endif /* ARGS_SIZE_RTX */
+extern int non_hard_reg_operand (rtx, enum machine_mode);
+extern int eq_neq_comparison_operator (rtx, enum machine_mode);
+extern int insn_refs_are_delayed (rtx);
+extern rtx get_deferred_plabel (rtx);
+#endif /* RTX_CODE */
+
+extern int integer_store_memory_operand (rtx, enum machine_mode);
+extern int ldil_cint_p (HOST_WIDE_INT);
+extern int zdepi_cint_p (unsigned HOST_WIDE_INT);
+
+extern void output_ascii (FILE *, const char *, int);
+extern HOST_WIDE_INT compute_frame_size (HOST_WIDE_INT, int *);
+extern int and_mask_p (unsigned HOST_WIDE_INT);
+extern int cint_ok_for_move (HOST_WIDE_INT);
+extern void hppa_expand_prologue (void);
+extern void hppa_expand_epilogue (void);
+extern bool pa_can_use_return_insn (void);
+extern int ior_mask_p (unsigned HOST_WIDE_INT);
+extern void compute_zdepdi_operands (unsigned HOST_WIDE_INT,
+				     unsigned *);
+#ifdef RTX_CODE
+extern const char * output_64bit_and (rtx *);
+extern const char * output_64bit_ior (rtx *);
+extern int cmpib_comparison_operator (rtx, enum machine_mode);
+#endif
+
+
+/* Miscellaneous functions in pa.c.  */
+#ifdef TREE_CODE
+extern int reloc_needed (tree);
+extern bool pa_return_in_memory (const_tree, const_tree);
+#endif /* TREE_CODE */
+
+extern void pa_asm_output_aligned_bss (FILE *, const char *,
+				       unsigned HOST_WIDE_INT,
+				       unsigned int);
+extern void pa_asm_output_aligned_common (FILE *, const char *,
+					  unsigned HOST_WIDE_INT,
+					  unsigned int);
+extern void pa_asm_output_aligned_local (FILE *, const char *,
+					 unsigned HOST_WIDE_INT,
+					 unsigned int);
+extern void pa_hpux_asm_output_external (FILE *, tree, const char *);
+extern bool pa_cannot_change_mode_class (enum machine_mode, enum machine_mode,
+					 enum reg_class);
+extern bool pa_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern HOST_WIDE_INT pa_initial_elimination_offset (int, int);
+
+extern const int magic_milli[];
+extern int shadd_constant_p (int);
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
new file mode 100644
index 000000000..8a4445fdc
--- /dev/null
+++ b/gcc/config/pa/pa.c
@@ -0,0 +1,10471 @@
+/* Subroutines for insn-output.c for HPPA.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "tree.h"
+#include "output.h"
+#include "except.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "integrate.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "recog.h"
+#include "predict.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "df.h"
+
+/* Return nonzero if there is a bypass for the output of 
+   OUT_INSN and the fp store IN_INSN.  */
+int
+hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
+{
+  enum machine_mode store_mode;
+  enum machine_mode other_mode;
+  rtx set;
+
+  if (recog_memoized (in_insn) < 0
+      || (get_attr_type (in_insn) != TYPE_FPSTORE
+	  && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
+      || recog_memoized (out_insn) < 0)
+    return 0;
+
+  store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
+
+  set = single_set (out_insn);
+  if (!set)
+    return 0;
+
+  other_mode = GET_MODE (SET_SRC (set));
+
+  return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
+}
+  
+
+#ifndef DO_FRAME_NOTES
+#ifdef INCOMING_RETURN_ADDR_RTX
+#define DO_FRAME_NOTES 1
+#else
+#define DO_FRAME_NOTES 0
+#endif
+#endif
+
+static void pa_option_override (void);
+static void copy_reg_pointer (rtx, rtx);
+static void fix_range (const char *);
+static bool pa_handle_option (size_t, const char *, int);
+static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
+				    reg_class_t);
+static int hppa_address_cost (rtx, bool);
+static bool hppa_rtx_costs (rtx, int, int, int *, bool);
+static inline rtx force_mode (enum machine_mode, rtx);
+static void pa_reorg (void);
+static void pa_combine_instructions (void);
+static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
+static bool forward_branch_p (rtx);
+static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
+static int compute_movmem_length (rtx);
+static int compute_clrmem_length (rtx);
+static bool pa_assemble_integer (rtx, unsigned int, int);
+static void remove_useless_addtr_insns (int);
+static void store_reg (int, HOST_WIDE_INT, int);
+static void store_reg_modify (int, int, HOST_WIDE_INT);
+static void load_reg (int, HOST_WIDE_INT, int);
+static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
+static rtx pa_function_value (const_tree, const_tree, bool);
+static rtx pa_libcall_value (enum machine_mode, const_rtx);
+static bool pa_function_value_regno_p (const unsigned int);
+static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void update_total_code_bytes (unsigned int);
+static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static int pa_adjust_cost (rtx, rtx, rtx, int);
+static int pa_adjust_priority (rtx, int);
+static int pa_issue_rate (void);
+static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
+static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
+     ATTRIBUTE_UNUSED;
+static void pa_encode_section_info (tree, rtx, int);
+static const char *pa_strip_name_encoding (const char *);
+static bool pa_function_ok_for_sibcall (tree, tree);
+static void pa_globalize_label (FILE *, const char *)
+     ATTRIBUTE_UNUSED;
+static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				    HOST_WIDE_INT, tree);
+#if !defined(USE_COLLECT2)
+static void pa_asm_out_constructor (rtx, int);
+static void pa_asm_out_destructor (rtx, int);
+#endif
+static void pa_init_builtins (void);
+static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
+static rtx hppa_builtin_saveregs (void);
+static void hppa_va_start (tree, rtx);
+static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool pa_scalar_mode_supported_p (enum machine_mode);
+static bool pa_commutative_p (const_rtx x, int outer_code);
+static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
+static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
+static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
+static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
+static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
+static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
+static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
+static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
+static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
+static void output_deferred_plabels (void);
+static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
+#ifdef ASM_OUTPUT_EXTERNAL_REAL
+static void pa_hpux_file_end (void);
+#endif
+#if HPUX_LONG_DOUBLE_LIBRARY
+static void pa_hpux_init_libfuncs (void);
+#endif
+static rtx pa_struct_value_rtx (tree, int);
+static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				  const_tree, bool);
+static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				 tree, bool);
+static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				     const_tree, bool);
+static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			    const_tree, bool);
+static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
+static struct machine_function * pa_init_machine_status (void);
+static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
+					enum machine_mode,
+					secondary_reload_info *);
+static void pa_extra_live_on_entry (bitmap);
+static enum machine_mode pa_promote_function_mode (const_tree,
+						   enum machine_mode, int *,
+						   const_tree, int);
+
+static void pa_asm_trampoline_template (FILE *);
+static void pa_trampoline_init (rtx, tree, rtx);
+static rtx pa_trampoline_adjust_address (rtx);
+static rtx pa_delegitimize_address (rtx);
+static bool pa_print_operand_punct_valid_p (unsigned char);
+static rtx pa_internal_arg_pointer (void);
+static bool pa_can_eliminate (const int, const int);
+static void pa_conditional_register_usage (void);
+static enum machine_mode pa_c_mode_for_suffix (char);
+static section *pa_function_section (tree, enum node_frequency, bool, bool);
+static unsigned int pa_section_type_flags (tree, const char *, int);
+
+/* The following extra sections are only used for SOM.  */
+static GTY(()) section *som_readonly_data_section;
+static GTY(()) section *som_one_only_readonly_data_section;
+static GTY(()) section *som_one_only_data_section;
+
+/* Which cpu we are scheduling for.  */
+enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
+
+/* The UNIX standard to use for predefines and linking.  */
+int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
+
+/* Counts for the number of callee-saved general and floating point
+   registers which were saved by the current function's prologue.  */
+static int gr_saved, fr_saved;
+
+/* Boolean indicating whether the return pointer was saved by the
+   current function's prologue.  */
+static bool rp_saved;
+
+static rtx find_addr_reg (rtx);
+
+/* Keep track of the number of bytes we have output in the CODE subspace
+   during this compilation so we'll know when to emit inline long-calls.  */
+unsigned long total_code_bytes;
+
+/* The last address of the previous function plus the number of bytes in
+   associated thunks that have been output.  This is used to determine if
+   a thunk can use an IA-relative branch to reach its target function.  */
+static unsigned int last_address;
+
+/* Variables to handle plabels that we discover are necessary at assembly
+   output time.  They are output after the current function.  */
+struct GTY(()) deferred_plabel
+{
+  rtx internal_label;
+  rtx symbol;
+};
+static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
+  deferred_plabels;
+static size_t n_deferred_plabels = 0;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options pa_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+
+/* Initialize the GCC target structure.  */
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE pa_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE pa_option_optimization_table
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER pa_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE pa_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE pa_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
+
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
+
+#undef TARGET_COMMUTATIVE_P
+#define TARGET_COMMUTATIVE_P pa_commutative_p
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef TARGET_ASM_FILE_END
+#ifdef ASM_OUTPUT_EXTERNAL_REAL
+#define TARGET_ASM_FILE_END pa_hpux_file_end
+#else
+#define TARGET_ASM_FILE_END output_deferred_plabels
+#endif
+
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
+
+#if !defined(USE_COLLECT2)
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
+#endif
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION pa_handle_option
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS pa_init_builtins
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN pa_expand_builtin
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS hppa_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hppa_address_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
+
+#if HPUX_LONG_DOUBLE_LIBRARY
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
+#endif
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG pa_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD pa_secondary_reload
+
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE pa_can_eliminate
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
+#undef TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION pa_function_section
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Parse the -mfixed-range= option string.  */
+
+static void
+fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  This is
+     used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
+
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+
+  /* Check if all floating point registers have been fixed.  */
+  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+    if (!fixed_regs[i])
+      break;
+
+  if (i > FP_REG_LAST)
+    target_flags |= MASK_DISABLE_FPREGS;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mnosnake:
+    case OPT_mpa_risc_1_0:
+    case OPT_march_1_0:
+      target_flags &= ~(MASK_PA_11 | MASK_PA_20);
+      return true;
+
+    case OPT_msnake:
+    case OPT_mpa_risc_1_1:
+    case OPT_march_1_1:
+      target_flags &= ~MASK_PA_20;
+      target_flags |= MASK_PA_11;
+      return true;
+
+    case OPT_mpa_risc_2_0:
+    case OPT_march_2_0:
+      target_flags |= MASK_PA_11 | MASK_PA_20;
+      return true;
+
+    case OPT_mschedule_:
+      if (strcmp (arg, "8000") == 0)
+	pa_cpu = PROCESSOR_8000;
+      else if (strcmp (arg, "7100") == 0)
+	pa_cpu = PROCESSOR_7100;
+      else if (strcmp (arg, "700") == 0)
+	pa_cpu = PROCESSOR_700;
+      else if (strcmp (arg, "7100LC") == 0)
+	pa_cpu = PROCESSOR_7100LC;
+      else if (strcmp (arg, "7200") == 0)
+	pa_cpu = PROCESSOR_7200;
+      else if (strcmp (arg, "7300") == 0)
+	pa_cpu = PROCESSOR_7300;
+      else
+	return false;
+      return true;
+
+    case OPT_mfixed_range_:
+      fix_range (arg);
+      return true;
+
+#if TARGET_HPUX
+    case OPT_munix_93:
+      flag_pa_unix = 1993;
+      return true;
+#endif
+
+#if TARGET_HPUX_10_10
+    case OPT_munix_95:
+      flag_pa_unix = 1995;
+      return true;
+#endif
+
+#if TARGET_HPUX_11_11
+    case OPT_munix_98:
+      flag_pa_unix = 1998;
+      return true;
+#endif
+
+    default:
+      return true;
+    }
+}
+
+/* Implement the TARGET_OPTION_OVERRIDE hook.  */
+
+static void
+pa_option_override (void)
+{
+  /* Unconditional branches in the delay slot are not compatible with dwarf2
+     call frame information.  There is no benefit in using this optimization
+     on PA8000 and later processors.  */
+  if (pa_cpu >= PROCESSOR_8000
+      || (targetm.except_unwind_info (&global_options) == UI_DWARF2
+	  && flag_exceptions)
+      || flag_unwind_tables)
+    target_flags &= ~MASK_JUMP_IN_DELAY;
+
+  if (flag_pic && TARGET_PORTABLE_RUNTIME)
+    {
+      warning (0, "PIC code generation is not supported in the portable runtime model");
+    }
+
+  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
+   {
+      warning (0, "PIC code generation is not compatible with fast indirect calls");
+   }
+
+  if (! TARGET_GAS && write_symbols != NO_DEBUG)
+    {
+      warning (0, "-g is only supported when using GAS on this processor,");
+      warning (0, "-g option disabled");
+      write_symbols = NO_DEBUG;
+    }
+
+  /* We only support the "big PIC" model now.  And we always generate PIC
+     code when in 64bit mode.  */
+  if (flag_pic == 1 || TARGET_64BIT)
+    flag_pic = 2;
+
+  /* Disable -freorder-blocks-and-partition as we don't support hot and
+     cold partitioning.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      inform (input_location,
+              "-freorder-blocks-and-partition does not work "
+              "on this architecture");
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+
+  /* We can't guarantee that .dword is available for 32-bit targets.  */
+  if (UNITS_PER_WORD == 4)
+    targetm.asm_out.aligned_op.di = NULL;
+
+  /* The unaligned ops are only available when using GAS.  */
+  if (!TARGET_GAS)
+    {
+      targetm.asm_out.unaligned_op.hi = NULL;
+      targetm.asm_out.unaligned_op.si = NULL;
+      targetm.asm_out.unaligned_op.di = NULL;
+    }
+
+  init_machine_status = pa_init_machine_status;
+}
+
+enum pa_builtins
+{
+  PA_BUILTIN_COPYSIGNQ,
+  PA_BUILTIN_FABSQ,
+  PA_BUILTIN_INFQ,
+  PA_BUILTIN_HUGE_VALQ,
+  PA_BUILTIN_max
+};
+
+static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
+
+static void
+pa_init_builtins (void)
+{
+#ifdef DONT_HAVE_FPUTC_UNLOCKED
+  built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
+    built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
+  implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
+    = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
+#endif
+#if TARGET_HPUX_11
+  if (built_in_decls [BUILT_IN_FINITE])
+    set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
+  if (built_in_decls [BUILT_IN_FINITEF])
+    set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
+#endif
+
+  if (HPUX_LONG_DOUBLE_LIBRARY)
+    {
+      tree decl, ftype;
+
+      /* Under HPUX, the __float128 type is a synonym for "long double".  */
+      (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+						 "__float128");
+
+      /* TFmode support builtins.  */
+      ftype = build_function_type_list (long_double_type_node,
+					long_double_type_node,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_fabsq", ftype,
+				   PA_BUILTIN_FABSQ, BUILT_IN_MD,
+				   "_U_Qfabs", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      pa_builtins[PA_BUILTIN_FABSQ] = decl;
+
+      ftype = build_function_type_list (long_double_type_node,
+					long_double_type_node,
+					long_double_type_node,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_copysignq", ftype,
+				   PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
+				   "_U_Qfcopysign", NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
+
+      ftype = build_function_type (long_double_type_node, void_list_node);
+      decl = add_builtin_function ("__builtin_infq", ftype,
+				   PA_BUILTIN_INFQ, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      pa_builtins[PA_BUILTIN_INFQ] = decl;
+
+      decl = add_builtin_function ("__builtin_huge_valq", ftype,
+                                   PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
+                                   NULL, NULL_TREE);
+      pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
+    }
+}
+
+static rtx
+pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case PA_BUILTIN_FABSQ:
+    case PA_BUILTIN_COPYSIGNQ:
+      return expand_call (exp, target, ignore);
+
+    case PA_BUILTIN_INFQ:
+    case PA_BUILTIN_HUGE_VALQ:
+      {
+	enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
+	REAL_VALUE_TYPE inf;
+	rtx tmp;
+
+	real_inf (&inf);
+	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
+
+	tmp = validize_mem (force_const_mem (target_mode, tmp));
+
+	if (target == 0)
+	  target = gen_reg_rtx (target_mode);
+
+	emit_move_insn (target, tmp);
+	return target;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return NULL_RTX;
+}
+
+/* Function to init struct machine_function.
+   This will be called, via a pointer variable,
+   from push_function_context.  */
+
+static struct machine_function *
+pa_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* If FROM is a probable pointer register, mark TO as a probable
+   pointer register with the same pointer alignment as FROM.  */
+
+static void
+copy_reg_pointer (rtx to, rtx from)
+{
+  if (REG_POINTER (from))
+    mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
+}
+
+/* Return 1 if X contains a symbolic expression.  We know these
+   expressions will have one of a few well defined forms, so
+   we need only check those forms.  */
+int
+symbolic_expression_p (rtx x)
+{
+
+  /* Strip off any HIGH.  */
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
+
+  return (symbolic_operand (x, VOIDmode));
+}
+
+/* Accept any constant that can be moved in one instruction into a
+   general register.  */
+int
+cint_ok_for_move (HOST_WIDE_INT ival)
+{
+  /* OK if ldo, ldil, or zdepi, can be used.  */
+  return (VAL_14_BITS_P (ival)
+	  || ldil_cint_p (ival)
+	  || zdepi_cint_p (ival));
+}
+
+/* Return truth value of whether OP can be used as an operand in a
+   adddi3 insn.  */
+int
+adddi3_operand (rtx op, enum machine_mode mode)
+{
+  return (register_operand (op, mode)
+	  || (GET_CODE (op) == CONST_INT
+	      && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
+}
+
+/* True iff the operand OP can be used as the destination operand of
+   an integer store.  This also implies the operand could be used as
+   the source operand of an integer load.  Symbolic, lo_sum and indexed
+   memory operands are not allowed.  We accept reloading pseudos and
+   other memory operands.  */
+int
+integer_store_memory_operand (rtx op, enum machine_mode mode)
+{
+  return ((reload_in_progress
+	   && REG_P (op)
+	   && REGNO (op) >= FIRST_PSEUDO_REGISTER
+	   && reg_renumber [REGNO (op)] < 0)
+	  || (GET_CODE (op) == MEM
+	      && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
+	      && !symbolic_memory_operand (op, VOIDmode)
+	      && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
+	      && !IS_INDEX_ADDR_P (XEXP (op, 0))));
+}
+
+/* True iff ldil can be used to load this CONST_INT.  The least
+   significant 11 bits of the value must be zero and the value must
+   not change sign when extended from 32 to 64 bits.  */
+int
+ldil_cint_p (HOST_WIDE_INT ival)
+{
+  HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
+
+  return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
+}
+
+/* True iff zdepi can be used to generate this CONST_INT.
+   zdepi first sign extends a 5-bit signed number to a given field
+   length, then places this field anywhere in a zero.  */
+int
+zdepi_cint_p (unsigned HOST_WIDE_INT x)
+{
+  unsigned HOST_WIDE_INT lsb_mask, t;
+
+  /* This might not be obvious, but it's at least fast.
+     This function is critical; we don't have the time loops would take.  */
+  lsb_mask = x & -x;
+  t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
+  /* Return true iff t is a power of two.  */
+  return ((t & (t - 1)) == 0);
+}
+
+/* True iff depi or extru can be used to compute (reg & mask).
+   Accept bit pattern like these:
+   0....01....1
+   1....10....0
+   1..10..01..1  */
+int
+and_mask_p (unsigned HOST_WIDE_INT mask)
+{
+  mask = ~mask;
+  mask += mask & -mask;
+  return (mask & (mask - 1)) == 0;
+}
+
+/* True iff depi can be used to compute (reg | MASK).  */
+int
+ior_mask_p (unsigned HOST_WIDE_INT mask)
+{
+  mask += mask & -mask;
+  return (mask & (mask - 1)) == 0;
+}
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go to REG.  If we need more
+   than one register, we lose.  */
+
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
+{
+  rtx pic_ref = orig;
+
+  gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
+
+  /* Labels need special handling.  */
+  if (pic_label_operand (orig, mode))
+    {
+      rtx insn;
+
+      /* We do not want to go through the movXX expanders here since that
+	 would create recursion.
+
+	 Nor do we really want to call a generator for a named pattern
+	 since that requires multiple patterns if we want to support
+	 multiple word sizes.
+
+	 So instead we just emit the raw set, which avoids the movXX
+	 expanders completely.  */
+      mark_reg_pointer (reg, BITS_PER_UNIT);
+      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
+      add_reg_note (insn, REG_EQUAL, orig);
+
+      /* During and after reload, we need to generate a REG_LABEL_OPERAND note
+	 and update LABEL_NUSES because this is not done automatically.  */
+      if (reload_in_progress || reload_completed)
+	{
+	  /* Extract LABEL_REF.  */
+	  if (GET_CODE (orig) == CONST)
+	    orig = XEXP (XEXP (orig, 0), 0);
+	  /* Extract CODE_LABEL.  */
+	  orig = XEXP (orig, 0);
+	  add_reg_note (insn, REG_LABEL_OPERAND, orig);
+	  LABEL_NUSES (orig)++;
+	}
+      crtl->uses_pic_offset_table = 1;
+      return reg;
+    }
+  if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      rtx insn, tmp_reg;
+
+      gcc_assert (reg);
+
+      /* Before reload, allocate a temporary register for the intermediate
+	 result.  This allows the sequence to be deleted when the final
+	 result is unused and the insns are trivially dead.  */
+      tmp_reg = ((reload_in_progress || reload_completed)
+		 ? reg : gen_reg_rtx (Pmode));
+
+      if (function_label_operand (orig, mode))
+	{
+	  /* Force function label into memory in word mode.  */
+	  orig = XEXP (force_const_mem (word_mode, orig), 0);
+	  /* Load plabel address from DLT.  */
+	  emit_move_insn (tmp_reg,
+			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
+					gen_rtx_HIGH (word_mode, orig)));
+	  pic_ref
+	    = gen_const_mem (Pmode,
+			     gen_rtx_LO_SUM (Pmode, tmp_reg,
+					     gen_rtx_UNSPEC (Pmode,
+						         gen_rtvec (1, orig),
+						         UNSPEC_DLTIND14R)));
+	  emit_move_insn (reg, pic_ref);
+	  /* Now load address of function descriptor.  */
+	  pic_ref = gen_rtx_MEM (Pmode, reg);
+	}
+      else
+	{
+	  /* Load symbol reference from DLT.  */
+	  emit_move_insn (tmp_reg,
+			  gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
+					gen_rtx_HIGH (word_mode, orig)));
+	  pic_ref
+	    = gen_const_mem (Pmode,
+			     gen_rtx_LO_SUM (Pmode, tmp_reg,
+					     gen_rtx_UNSPEC (Pmode,
+						         gen_rtvec (1, orig),
+						         UNSPEC_DLTIND14R)));
+	}
+
+      crtl->uses_pic_offset_table = 1;
+      mark_reg_pointer (reg, BITS_PER_UNIT);
+      insn = emit_move_insn (reg, pic_ref);
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
+      set_unique_reg_note (insn, REG_EQUAL, orig);
+
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      gcc_assert (reg);
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
+				     base == reg ? 0 : reg);
+
+      if (GET_CODE (orig) == CONST_INT)
+	{
+	  if (INT_14_BITS (orig))
+	    return plus_constant (base, INTVAL (orig));
+	  orig = force_reg (Pmode, orig);
+	}
+      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
+      /* Likewise, should we set special REG_NOTEs here?  */
+    }
+
+  return pic_ref;
+}
+
+static GTY(()) rtx gen_tls_tga;
+
+static rtx
+gen_tls_get_addr (void)
+{
+  if (!gen_tls_tga)
+    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
+  return gen_tls_tga;
+}
+
+static rtx
+hppa_tls_call (rtx arg)
+{
+  rtx ret;
+
+  ret = gen_reg_rtx (Pmode);
+  emit_library_call_value (gen_tls_get_addr (), ret,
+		  	   LCT_CONST, Pmode, 1, arg, Pmode);
+
+  return ret;
+}
+
+static rtx
+legitimize_tls_address (rtx addr)
+{
+  rtx ret, insn, tmp, t1, t2, tp;
+  enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
+
+  switch (model) 
+    {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	tmp = gen_reg_rtx (Pmode);
+	if (flag_pic)
+	  emit_insn (gen_tgd_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tgd_load (tmp, addr));
+	ret = hppa_tls_call (tmp);
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	ret = gen_reg_rtx (Pmode);
+	tmp = gen_reg_rtx (Pmode);
+	start_sequence ();
+	if (flag_pic)
+	  emit_insn (gen_tld_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tld_load (tmp, addr));
+	t1 = hppa_tls_call (tmp);
+	insn = get_insns ();
+	end_sequence ();
+	t2 = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, t2, t1, 
+			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				            UNSPEC_TLSLDBASE));
+	emit_insn (gen_tld_offset_load (ret, addr, t2));
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	tp = gen_reg_rtx (Pmode);
+	tmp = gen_reg_rtx (Pmode);
+	ret = gen_reg_rtx (Pmode);
+	emit_insn (gen_tp_load (tp));
+	if (flag_pic)
+	  emit_insn (gen_tie_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tie_load (tmp, addr));
+	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	tp = gen_reg_rtx (Pmode);
+	ret = gen_reg_rtx (Pmode);
+	emit_insn (gen_tp_load (tp));
+	emit_insn (gen_tle_load (ret, addr, tp));
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  return ret;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   It is always safe for this macro to do nothing.  It exists to recognize
+   opportunities to optimize the output.
+
+   For the PA, transform:
+
+	memory(X + <large int>)
+
+   into:
+
+	if (<large int> & mask) >= 16
+	  Y = (<large int> & ~mask) + mask + 1	Round up.
+	else
+	  Y = (<large int> & ~mask)		Round down.
+	Z = X + Y
+	memory (Z + (<large int> - Y));
+
+   This is for CSE to find several similar references, and only use one Z.
+
+   X can either be a SYMBOL_REF or REG, but because combine cannot
+   perform a 4->2 combination we do nothing for SYMBOL_REF + D where
+   D will not fit in 14 bits.
+
+   MODE_FLOAT references allow displacements which fit in 5 bits, so use
+   0x1f as the mask.
+
+   MODE_INT references allow displacements which fit in 14 bits, so use
+   0x3fff as the mask.
+
+   This relies on the fact that most mode MODE_FLOAT references will use FP
+   registers and most mode MODE_INT references will use integer registers.
+   (In the rare case of an FP register used in an integer MODE, we depend
+   on secondary reloads to clean things up.)
+
+
+   It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
+   manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
+   addressing modes to be used).
+
+   Put X and Z into registers.  Then put the entire expression into
+   a register.  */
+
+rtx
+hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode)
+{
+  rtx orig = x;
+
+  /* We need to canonicalize the order of operands in unscaled indexed
+     addresses since the code that checks if an address is valid doesn't
+     always try both orders.  */
+  if (!TARGET_NO_SPACE_REGS
+      && GET_CODE (x) == PLUS
+      && GET_MODE (x) == Pmode
+      && REG_P (XEXP (x, 0))
+      && REG_P (XEXP (x, 1))
+      && REG_POINTER (XEXP (x, 0))
+      && !REG_POINTER (XEXP (x, 1)))
+    return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
+
+  if (PA_SYMBOL_REF_TLS_P (x))
+    return legitimize_tls_address (x);
+  else if (flag_pic)
+    return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
+
+  /* Strip off CONST.  */
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+
+  /* Special case.  Get the SYMBOL_REF into a register and use indexing.
+     That should always be safe.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
+    {
+      rtx reg = force_reg (Pmode, XEXP (x, 1));
+      return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
+    }
+
+  /* Note we must reject symbols which represent function addresses
+     since the assembler/linker can't handle arithmetic on plabels.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	   && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
+	  || GET_CODE (XEXP (x, 0)) == REG))
+    {
+      rtx int_part, ptr_reg;
+      int newoffset;
+      int offset = INTVAL (XEXP (x, 1));
+      int mask;
+
+      mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
+	      ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
+
+      /* Choose which way to round the offset.  Round up if we
+	 are >= halfway to the next boundary.  */
+      if ((offset & mask) >= ((mask + 1) / 2))
+	newoffset = (offset & ~ mask) + mask + 1;
+      else
+	newoffset = (offset & ~ mask);
+
+      /* If the newoffset will not fit in 14 bits (ldo), then
+	 handling this would take 4 or 5 instructions (2 to load
+	 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
+	 add the new offset and the SYMBOL_REF.)  Combine can
+	 not handle 4->2 or 5->2 combinations, so do not create
+	 them.  */
+      if (! VAL_14_BITS_P (newoffset)
+	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
+	{
+	  rtx const_part = plus_constant (XEXP (x, 0), newoffset);
+	  rtx tmp_reg
+	    = force_reg (Pmode,
+			 gen_rtx_HIGH (Pmode, const_part));
+	  ptr_reg
+	    = force_reg (Pmode,
+			 gen_rtx_LO_SUM (Pmode,
+					 tmp_reg, const_part));
+	}
+      else
+	{
+	  if (! VAL_14_BITS_P (newoffset))
+	    int_part = force_reg (Pmode, GEN_INT (newoffset));
+	  else
+	    int_part = GEN_INT (newoffset);
+
+	  ptr_reg = force_reg (Pmode,
+			       gen_rtx_PLUS (Pmode,
+					     force_reg (Pmode, XEXP (x, 0)),
+					     int_part));
+	}
+      return plus_constant (ptr_reg, offset - newoffset);
+    }
+
+  /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
+
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
+      && (OBJECT_P (XEXP (x, 1))
+	  || GET_CODE (XEXP (x, 1)) == SUBREG)
+      && GET_CODE (XEXP (x, 1)) != CONST)
+    {
+      int val = INTVAL (XEXP (XEXP (x, 0), 1));
+      rtx reg1, reg2;
+
+      reg1 = XEXP (x, 1);
+      if (GET_CODE (reg1) != REG)
+	reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+      reg2 = XEXP (XEXP (x, 0), 0);
+      if (GET_CODE (reg2) != REG)
+        reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+      return force_reg (Pmode, gen_rtx_PLUS (Pmode,
+					     gen_rtx_MULT (Pmode,
+							   reg2,
+							   GEN_INT (val)),
+					     reg1));
+    }
+
+  /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
+
+     Only do so for floating point modes since this is more speculative
+     and we lose if it's an integer store.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
+      && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
+      && (mode == SFmode || mode == DFmode))
+    {
+
+      /* First, try and figure out what to use as a base register.  */
+      rtx reg1, reg2, base, idx;
+
+      reg1 = XEXP (XEXP (x, 0), 1);
+      reg2 = XEXP (x, 1);
+      base = NULL_RTX;
+      idx = NULL_RTX;
+
+      /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
+	 then emit_move_sequence will turn on REG_POINTER so we'll know
+	 it's a base register below.  */
+      if (GET_CODE (reg1) != REG)
+	reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+      if (GET_CODE (reg2) != REG)
+	reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+      /* Figure out what the base and index are.  */
+
+      if (GET_CODE (reg1) == REG
+	  && REG_POINTER (reg1))
+	{
+	  base = reg1;
+	  idx = gen_rtx_PLUS (Pmode,
+			      gen_rtx_MULT (Pmode,
+					    XEXP (XEXP (XEXP (x, 0), 0), 0),
+					    XEXP (XEXP (XEXP (x, 0), 0), 1)),
+			      XEXP (x, 1));
+	}
+      else if (GET_CODE (reg2) == REG
+	       && REG_POINTER (reg2))
+	{
+	  base = reg2;
+	  idx = XEXP (x, 0);
+	}
+
+      if (base == 0)
+	return orig;
+
+      /* If the index adds a large constant, try to scale the
+	 constant so that it can be loaded with only one insn.  */
+      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
+	  && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
+			    / INTVAL (XEXP (XEXP (idx, 0), 1)))
+	  && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
+	{
+	  /* Divide the CONST_INT by the scale factor, then add it to A.  */
+	  int val = INTVAL (XEXP (idx, 1));
+
+	  val /= INTVAL (XEXP (XEXP (idx, 0), 1));
+	  reg1 = XEXP (XEXP (idx, 0), 0);
+	  if (GET_CODE (reg1) != REG)
+	    reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
+
+	  /* We can now generate a simple scaled indexed address.  */
+	  return
+	    force_reg
+	      (Pmode, gen_rtx_PLUS (Pmode,
+				    gen_rtx_MULT (Pmode, reg1,
+						  XEXP (XEXP (idx, 0), 1)),
+				    base));
+	}
+
+      /* If B + C is still a valid base register, then add them.  */
+      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
+	  && INTVAL (XEXP (idx, 1)) <= 4096
+	  && INTVAL (XEXP (idx, 1)) >= -4096)
+	{
+	  int val = INTVAL (XEXP (XEXP (idx, 0), 1));
+	  rtx reg1, reg2;
+
+	  reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
+
+	  reg2 = XEXP (XEXP (idx, 0), 0);
+	  if (GET_CODE (reg2) != CONST_INT)
+	    reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+	  return force_reg (Pmode, gen_rtx_PLUS (Pmode,
+						 gen_rtx_MULT (Pmode,
+							       reg2,
+							       GEN_INT (val)),
+						 reg1));
+	}
+
+      /* Get the index into a register, then add the base + index and
+	 return a register holding the result.  */
+
+      /* First get A into a register.  */
+      reg1 = XEXP (XEXP (idx, 0), 0);
+      if (GET_CODE (reg1) != REG)
+	reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+      /* And get B into a register.  */
+      reg2 = XEXP (idx, 1);
+      if (GET_CODE (reg2) != REG)
+	reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+      reg1 = force_reg (Pmode,
+			gen_rtx_PLUS (Pmode,
+				      gen_rtx_MULT (Pmode, reg1,
+						    XEXP (XEXP (idx, 0), 1)),
+				      reg2));
+
+      /* Add the result to our base register and return.  */
+      return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
+
+    }
+
+  /* Uh-oh.  We might have an address for x[n-100000].  This needs
+     special handling to avoid creating an indexed memory address
+     with x-100000 as the base.
+
+     If the constant part is small enough, then it's still safe because
+     there is a guard page at the beginning and end of the data segment.
+
+     Scaled references are common enough that we want to try and rearrange the
+     terms so that we can use indexing for these addresses too.  Only
+     do the optimization for floatint point modes.  */
+
+  if (GET_CODE (x) == PLUS
+      && symbolic_expression_p (XEXP (x, 1)))
+    {
+      /* Ugly.  We modify things here so that the address offset specified
+	 by the index expression is computed first, then added to x to form
+	 the entire address.  */
+
+      rtx regx1, regx2, regy1, regy2, y;
+
+      /* Strip off any CONST.  */
+      y = XEXP (x, 1);
+      if (GET_CODE (y) == CONST)
+	y = XEXP (y, 0);
+
+      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
+	{
+	  /* See if this looks like
+		(plus (mult (reg) (shadd_const))
+		      (const (plus (symbol_ref) (const_int))))
+
+	     Where const_int is small.  In that case the const
+	     expression is a valid pointer for indexing.
+
+	     If const_int is big, but can be divided evenly by shadd_const
+	     and added to (reg).  This allows more scaled indexed addresses.  */
+	  if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
+	      && GET_CODE (XEXP (x, 0)) == MULT
+	      && GET_CODE (XEXP (y, 1)) == CONST_INT
+	      && INTVAL (XEXP (y, 1)) >= -4096
+	      && INTVAL (XEXP (y, 1)) <= 4095
+	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
+	    {
+	      int val = INTVAL (XEXP (XEXP (x, 0), 1));
+	      rtx reg1, reg2;
+
+	      reg1 = XEXP (x, 1);
+	      if (GET_CODE (reg1) != REG)
+		reg1 = force_reg (Pmode, force_operand (reg1, 0));
+
+	      reg2 = XEXP (XEXP (x, 0), 0);
+	      if (GET_CODE (reg2) != REG)
+	        reg2 = force_reg (Pmode, force_operand (reg2, 0));
+
+	      return force_reg (Pmode,
+				gen_rtx_PLUS (Pmode,
+					      gen_rtx_MULT (Pmode,
+							    reg2,
+							    GEN_INT (val)),
+					      reg1));
+	    }
+	  else if ((mode == DFmode || mode == SFmode)
+		   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
+		   && GET_CODE (XEXP (x, 0)) == MULT
+		   && GET_CODE (XEXP (y, 1)) == CONST_INT
+		   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+		   && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
+	    {
+	      regx1
+		= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
+					     / INTVAL (XEXP (XEXP (x, 0), 1))));
+	      regx2 = XEXP (XEXP (x, 0), 0);
+	      if (GET_CODE (regx2) != REG)
+		regx2 = force_reg (Pmode, force_operand (regx2, 0));
+	      regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
+							regx2, regx1));
+	      return
+		force_reg (Pmode,
+			   gen_rtx_PLUS (Pmode,
+					 gen_rtx_MULT (Pmode, regx2,
+						       XEXP (XEXP (x, 0), 1)),
+					 force_reg (Pmode, XEXP (y, 0))));
+	    }
+	  else if (GET_CODE (XEXP (y, 1)) == CONST_INT
+		   && INTVAL (XEXP (y, 1)) >= -4096
+		   && INTVAL (XEXP (y, 1)) <= 4095)
+	    {
+	      /* This is safe because of the guard page at the
+		 beginning and end of the data space.  Just
+		 return the original address.  */
+	      return orig;
+	    }
+	  else
+	    {
+	      /* Doesn't look like one we can optimize.  */
+	      regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
+	      regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
+	      regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
+	      regx1 = force_reg (Pmode,
+				 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
+						 regx1, regy2));
+	      return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
+	    }
+	}
+    }
+
+  return orig;
+}
+
+/* Implement the TARGET_REGISTER_MOVE_COST hook.
+
+   Compute extra cost of moving data between one register class
+   and another.
+
+   Make moves from SAR so expensive they should never happen.  We used to
+   have 0xffff here, but that generates overflow in rare cases.
+
+   Copies involving a FP register and a non-FP register are relatively
+   expensive because they must go through memory.
+
+   Other copies are reasonably cheap.  */
+
+static int
+hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from, reg_class_t to)
+{
+  if (from == SHIFT_REGS)
+    return 0x100;
+  else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
+    return 18;
+  else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
+           || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
+    return 16;
+  else
+    return 2;
+}
+
+/* For the HPPA, REG and REG+CONST is cost 0
+   and addresses involving symbolic constants are cost 2.
+
+   PIC addresses are very expensive.
+
+   It is no coincidence that this has the same structure
+   as GO_IF_LEGITIMATE_ADDRESS.  */
+
+static int
+hppa_address_cost (rtx X,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  switch (GET_CODE (X))
+    {
+    case REG:
+    case PLUS:
+    case LO_SUM:
+      return 1;
+    case HIGH:
+      return 2;
+    default:
+      return 4;
+    }
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) == 0)
+	*total = 0;
+      else if (INT_14_BITS (x))
+	*total = 1;
+      else
+	*total = 2;
+      return true;
+
+    case HIGH:
+      *total = 2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 4;
+      return true;
+
+    case CONST_DOUBLE:
+      if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
+	  && outer_code != SET)
+	*total = 0;
+      else
+        *total = 8;
+      return true;
+
+    case MULT:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+        *total = COSTS_N_INSNS (3);
+      else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
+	*total = COSTS_N_INSNS (8);
+      else
+	*total = COSTS_N_INSNS (20);
+      return true;
+
+    case DIV:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	{
+	  *total = COSTS_N_INSNS (14);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (60);
+      return true;
+
+    case PLUS: /* this includes shNadd insns */
+    case MINUS:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	*total = COSTS_N_INSNS (3);
+      else
+        *total = COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
+   new rtx with the correct mode.  */
+static inline rtx
+force_mode (enum machine_mode mode, rtx orig)
+{
+  if (mode == GET_MODE (orig))
+    return orig;
+
+  gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
+
+  return gen_rtx_REG (mode, REGNO (orig));
+}
+
+/* Return 1 if *X is a thread-local symbol.  */
+
+static int
+pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return PA_SYMBOL_REF_TLS_P (*x);
+}
+
+/* Return 1 if X contains a thread-local symbol.  */
+
+bool
+pa_tls_referenced_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
+}
+
+/* Emit insns to move operands[1] into operands[0].
+
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.
+
+   Note SCRATCH_REG may not be in the proper mode depending on how it
+   will be used.  This routine is responsible for creating a new copy
+   of SCRATCH_REG in the proper mode.  */
+
+int
+emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
+{
+  register rtx operand0 = operands[0];
+  register rtx operand1 = operands[1];
+  register rtx tem;
+
+  /* We can only handle indexed addresses in the destination operand
+     of floating point stores.  Thus, we need to break out indexed
+     addresses from the destination operand.  */
+  if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
+    {
+      gcc_assert (can_create_pseudo_p ());
+
+      tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
+      operand0 = replace_equiv_address (operand0, tem);
+    }
+
+  /* On targets with non-equivalent space registers, break out unscaled
+     indexed addresses from the source operand before the final CSE.
+     We have to do this because the REG_POINTER flag is not correctly
+     carried through various optimization passes and CSE may substitute
+     a pseudo without the pointer set for one with the pointer set.  As
+     a result, we loose various opportunities to create insns with
+     unscaled indexed addresses.  */
+  if (!TARGET_NO_SPACE_REGS
+      && !cse_not_expected
+      && GET_CODE (operand1) == MEM
+      && GET_CODE (XEXP (operand1, 0)) == PLUS
+      && REG_P (XEXP (XEXP (operand1, 0), 0))
+      && REG_P (XEXP (XEXP (operand1, 0), 1)))
+    operand1
+      = replace_equiv_address (operand1,
+			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand0) == REG
+      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+    operand0 = reg_equiv_mem[REGNO (operand0)];
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand0) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand0)) == REG
+	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
+				 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
+				 SUBREG_BYTE (operand0));
+      operand0 = alter_subreg (&temp);
+    }
+
+  if (scratch_reg
+      && reload_in_progress && GET_CODE (operand1) == REG
+      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
+    operand1 = reg_equiv_mem[REGNO (operand1)];
+  else if (scratch_reg
+	   && reload_in_progress && GET_CODE (operand1) == SUBREG
+	   && GET_CODE (SUBREG_REG (operand1)) == REG
+	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
+    {
+     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
+	the code which tracks sets/uses for delete_output_reload.  */
+      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
+				 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
+				 SUBREG_BYTE (operand1));
+      operand1 = alter_subreg (&temp);
+    }
+
+  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+      && ((tem = find_replacement (&XEXP (operand0, 0)))
+	  != XEXP (operand0, 0)))
+    operand0 = replace_equiv_address (operand0, tem);
+
+  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+      && ((tem = find_replacement (&XEXP (operand1, 0)))
+	  != XEXP (operand1, 0)))
+    operand1 = replace_equiv_address (operand1, tem);
+
+  /* Handle secondary reloads for loads/stores of FP registers from
+     REG+D addresses where D does not fit in 5 or 14 bits, including
+     (subreg (mem (addr))) cases.  */
+  if (scratch_reg
+      && fp_reg_operand (operand0, mode)
+      && ((GET_CODE (operand1) == MEM
+	   && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
+				 XEXP (operand1, 0)))
+	  || ((GET_CODE (operand1) == SUBREG
+	       && GET_CODE (XEXP (operand1, 0)) == MEM
+	       && !memory_address_p ((GET_MODE_SIZE (mode) == 4
+				      ? SFmode : DFmode),
+				     XEXP (XEXP (operand1, 0), 0))))))
+    {
+      if (GET_CODE (operand1) == SUBREG)
+	operand1 = XEXP (operand1, 0);
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in WORD_MODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (word_mode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (!memory_address_p (Pmode, XEXP (operand1, 0)))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg,
+			  gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
+					  Pmode,
+					  XEXP (XEXP (operand1, 0), 0),
+					  scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand1, 0));
+      emit_insn (gen_rtx_SET (VOIDmode, operand0,
+			      replace_equiv_address (operand1, scratch_reg)));
+      return 1;
+    }
+  else if (scratch_reg
+	   && fp_reg_operand (operand1, mode)
+	   && ((GET_CODE (operand0) == MEM
+		&& !memory_address_p ((GET_MODE_SIZE (mode) == 4
+					? SFmode : DFmode),
+				       XEXP (operand0, 0)))
+	       || ((GET_CODE (operand0) == SUBREG)
+		   && GET_CODE (XEXP (operand0, 0)) == MEM
+		   && !memory_address_p ((GET_MODE_SIZE (mode) == 4
+					  ? SFmode : DFmode),
+			   		 XEXP (XEXP (operand0, 0), 0)))))
+    {
+      if (GET_CODE (operand0) == SUBREG)
+	operand0 = XEXP (operand0, 0);
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in WORD_MODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (word_mode, scratch_reg);
+
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (!memory_address_p (Pmode, XEXP (operand0, 0)))
+	{
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
+								        0)),
+						       Pmode,
+						       XEXP (XEXP (operand0, 0),
+								   0),
+						       scratch_reg));
+	}
+      else
+	emit_move_insn (scratch_reg, XEXP (operand0, 0));
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      replace_equiv_address (operand0, scratch_reg),
+			      operand1));
+      return 1;
+    }
+  /* Handle secondary reloads for loads of FP registers from constant
+     expressions by forcing the constant into memory.
+
+     Use scratch_reg to hold the address of the memory location.
+
+     The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
+     NO_REGS when presented with a const_int and a register class
+     containing only FP registers.  Doing so unfortunately creates
+     more problems than it solves.   Fix this for 2.5.  */
+  else if (scratch_reg
+	   && CONSTANT_P (operand1)
+	   && fp_reg_operand (operand0, mode))
+    {
+      rtx const_mem, xoperands[2];
+
+      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
+	 it in WORD_MODE regardless of what mode it was originally given
+	 to us.  */
+      scratch_reg = force_mode (word_mode, scratch_reg);
+
+      /* Force the constant into memory and put the address of the
+	 memory location into scratch_reg.  */
+      const_mem = force_const_mem (mode, operand1);
+      xoperands[0] = scratch_reg;
+      xoperands[1] = XEXP (const_mem, 0);
+      emit_move_sequence (xoperands, Pmode, 0);
+
+      /* Now load the destination register.  */
+      emit_insn (gen_rtx_SET (mode, operand0,
+			      replace_equiv_address (const_mem, scratch_reg)));
+      return 1;
+    }
+  /* Handle secondary reloads for SAR.  These occur when trying to load
+     the SAR from memory or a constant.  */
+  else if (scratch_reg
+	   && GET_CODE (operand0) == REG
+	   && REGNO (operand0) < FIRST_PSEUDO_REGISTER
+	   && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
+	   && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
+    {
+      /* D might not fit in 14 bits either; for such cases load D into
+	 scratch reg.  */
+      if (GET_CODE (operand1) == MEM
+	  && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
+	{
+	  /* We are reloading the address into the scratch register, so we
+	     want to make sure the scratch register is a full register.  */
+	  scratch_reg = force_mode (word_mode, scratch_reg);
+
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
+								        0)),
+						       Pmode,
+						       XEXP (XEXP (operand1, 0),
+						       0),
+						       scratch_reg));
+
+	  /* Now we are going to load the scratch register from memory,
+	     we want to load it in the same width as the original MEM,
+	     which must be the same as the width of the ultimate destination,
+	     OPERAND0.  */
+	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
+
+	  emit_move_insn (scratch_reg,
+			  replace_equiv_address (operand1, scratch_reg));
+	}
+      else
+	{
+	  /* We want to load the scratch register using the same mode as
+	     the ultimate destination.  */
+	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
+
+	  emit_move_insn (scratch_reg, operand1);
+	}
+
+      /* And emit the insn to set the ultimate destination.  We know that
+	 the scratch register has the same mode as the destination at this
+	 point.  */
+      emit_move_insn (operand0, scratch_reg);
+      return 1;
+    }
+  /* Handle the most common case: storing into a register.  */
+  else if (register_operand (operand0, mode))
+    {
+      /* Legitimize TLS symbol references.  This happens for references
+	 that aren't a legitimate constant.  */
+      if (PA_SYMBOL_REF_TLS_P (operand1))
+	operand1 = legitimize_tls_address (operand1);
+
+      if (register_operand (operand1, mode)
+	  || (GET_CODE (operand1) == CONST_INT
+	      && cint_ok_for_move (INTVAL (operand1)))
+	  || (operand1 == CONST0_RTX (mode))
+	  || (GET_CODE (operand1) == HIGH
+	      && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
+	  /* Only `general_operands' can come here, so MEM is ok.  */
+	  || GET_CODE (operand1) == MEM)
+	{
+	  /* Various sets are created during RTL generation which don't
+	     have the REG_POINTER flag correctly set.  After the CSE pass,
+	     instruction recognition can fail if we don't consistently
+	     set this flag when performing register copies.  This should
+	     also improve the opportunities for creating insns that use
+	     unscaled indexing.  */
+	  if (REG_P (operand0) && REG_P (operand1))
+	    {
+	      if (REG_POINTER (operand1)
+		  && !REG_POINTER (operand0)
+		  && !HARD_REGISTER_P (operand0))
+		copy_reg_pointer (operand0, operand1);
+	    }
+	  
+	  /* When MEMs are broken out, the REG_POINTER flag doesn't
+	     get set.  In some cases, we can set the REG_POINTER flag
+	     from the declaration for the MEM.  */
+	  if (REG_P (operand0)
+	      && GET_CODE (operand1) == MEM
+	      && !REG_POINTER (operand0))
+	    {
+	      tree decl = MEM_EXPR (operand1);
+
+	      /* Set the register pointer flag and register alignment
+		 if the declaration for this memory reference is a
+		 pointer type.  */
+	      if (decl)
+		{
+		  tree type;
+
+		  /* If this is a COMPONENT_REF, use the FIELD_DECL from
+		     tree operand 1.  */
+		  if (TREE_CODE (decl) == COMPONENT_REF)
+		    decl = TREE_OPERAND (decl, 1);
+
+		  type = TREE_TYPE (decl);
+		  type = strip_array_types (type);
+
+		  if (POINTER_TYPE_P (type))
+		    {
+		      int align;
+
+		      type = TREE_TYPE (type);
+		      /* Using TYPE_ALIGN_OK is rather conservative as
+			 only the ada frontend actually sets it.  */
+		      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
+			       : BITS_PER_UNIT);
+		      mark_reg_pointer (operand0, align);
+		    }
+		}
+	    }
+
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+	  return 1;
+	}
+    }
+  else if (GET_CODE (operand0) == MEM)
+    {
+      if (mode == DFmode && operand1 == CONST0_RTX (mode)
+	  && !(reload_in_progress || reload_completed))
+	{
+	  rtx temp = gen_reg_rtx (DFmode);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
+	  return 1;
+	}
+      if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
+	{
+	  /* Run this case quickly.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
+	  return 1;
+	}
+      if (! (reload_in_progress || reload_completed))
+	{
+	  operands[0] = validize_mem (operand0);
+	  operands[1] = operand1 = force_reg (mode, operand1);
+	}
+    }
+
+  /* Simplify the source if we need to.
+     Note we do have to handle function labels here, even though we do
+     not consider them legitimate constants.  Loop optimizations can
+     call the emit_move_xxx with one as a source.  */
+  if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
+      || function_label_operand (operand1, mode)
+      || (GET_CODE (operand1) == HIGH
+	  && symbolic_operand (XEXP (operand1, 0), mode)))
+    {
+      int ishighonly = 0;
+
+      if (GET_CODE (operand1) == HIGH)
+	{
+	  ishighonly = 1;
+	  operand1 = XEXP (operand1, 0);
+	}
+      if (symbolic_operand (operand1, mode))
+	{
+	  /* Argh.  The assembler and linker can't handle arithmetic
+	     involving plabels.
+
+	     So we force the plabel into memory, load operand0 from
+	     the memory location, then add in the constant part.  */
+	  if ((GET_CODE (operand1) == CONST
+	       && GET_CODE (XEXP (operand1, 0)) == PLUS
+	       && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
+	      || function_label_operand (operand1, mode))
+	    {
+	      rtx temp, const_part;
+
+	      /* Figure out what (if any) scratch register to use.  */
+	      if (reload_in_progress || reload_completed)
+		{
+		  scratch_reg = scratch_reg ? scratch_reg : operand0;
+		  /* SCRATCH_REG will hold an address and maybe the actual
+		     data.  We want it in WORD_MODE regardless of what mode it
+		     was originally given to us.  */
+		  scratch_reg = force_mode (word_mode, scratch_reg);
+		}
+	      else if (flag_pic)
+		scratch_reg = gen_reg_rtx (Pmode);
+
+	      if (GET_CODE (operand1) == CONST)
+		{
+		  /* Save away the constant part of the expression.  */
+		  const_part = XEXP (XEXP (operand1, 0), 1);
+		  gcc_assert (GET_CODE (const_part) == CONST_INT);
+
+		  /* Force the function label into memory.  */
+		  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
+		}
+	      else
+		{
+		  /* No constant part.  */
+		  const_part = NULL_RTX;
+
+		  /* Force the function label into memory.  */
+		  temp = force_const_mem (mode, operand1);
+		}
+
+
+	      /* Get the address of the memory location.  PIC-ify it if
+		 necessary.  */
+	      temp = XEXP (temp, 0);
+	      if (flag_pic)
+		temp = legitimize_pic_address (temp, mode, scratch_reg);
+
+	      /* Put the address of the memory location into our destination
+		 register.  */
+	      operands[1] = temp;
+	      emit_move_sequence (operands, mode, scratch_reg);
+
+	      /* Now load from the memory location into our destination
+		 register.  */
+	      operands[1] = gen_rtx_MEM (Pmode, operands[0]);
+	      emit_move_sequence (operands, mode, scratch_reg);
+
+	      /* And add back in the constant part.  */
+	      if (const_part != NULL_RTX)
+		expand_inc (operand0, const_part);
+
+	      return 1;
+	    }
+
+	  if (flag_pic)
+	    {
+	      rtx temp;
+
+	      if (reload_in_progress || reload_completed)
+		{
+		  temp = scratch_reg ? scratch_reg : operand0;
+		  /* TEMP will hold an address and maybe the actual
+		     data.  We want it in WORD_MODE regardless of what mode it
+		     was originally given to us.  */
+		  temp = force_mode (word_mode, temp);
+		}
+	      else
+		temp = gen_reg_rtx (Pmode);
+
+	      /* (const (plus (symbol) (const_int))) must be forced to
+		 memory during/after reload if the const_int will not fit
+		 in 14 bits.  */
+	      if (GET_CODE (operand1) == CONST
+		       && GET_CODE (XEXP (operand1, 0)) == PLUS
+		       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
+		       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
+		       && (reload_completed || reload_in_progress)
+		       && flag_pic)
+		{
+		  rtx const_mem = force_const_mem (mode, operand1);
+		  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
+							mode, temp);
+		  operands[1] = replace_equiv_address (const_mem, operands[1]);
+		  emit_move_sequence (operands, mode, temp);
+		}
+	      else
+		{
+		  operands[1] = legitimize_pic_address (operand1, mode, temp);
+		  if (REG_P (operand0) && REG_P (operands[1]))
+		    copy_reg_pointer (operand0, operands[1]);
+		  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
+		}
+	    }
+	  /* On the HPPA, references to data space are supposed to use dp,
+	     register 27, but showing it in the RTL inhibits various cse
+	     and loop optimizations.  */
+	  else
+	    {
+	      rtx temp, set;
+
+	      if (reload_in_progress || reload_completed)
+		{
+		  temp = scratch_reg ? scratch_reg : operand0;
+		  /* TEMP will hold an address and maybe the actual
+		     data.  We want it in WORD_MODE regardless of what mode it
+		     was originally given to us.  */
+		  temp = force_mode (word_mode, temp);
+		}
+	      else
+		temp = gen_reg_rtx (mode);
+
+	      /* Loading a SYMBOL_REF into a register makes that register
+		 safe to be used as the base in an indexed address.
+
+		 Don't mark hard registers though.  That loses.  */
+	      if (GET_CODE (operand0) == REG
+		  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
+		mark_reg_pointer (operand0, BITS_PER_UNIT);
+	      if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
+		mark_reg_pointer (temp, BITS_PER_UNIT);
+
+	      if (ishighonly)
+		set = gen_rtx_SET (mode, operand0, temp);
+	      else
+		set = gen_rtx_SET (VOIDmode,
+				   operand0,
+				   gen_rtx_LO_SUM (mode, temp, operand1));
+
+	      emit_insn (gen_rtx_SET (VOIDmode,
+				      temp,
+				      gen_rtx_HIGH (mode, operand1)));
+	      emit_insn (set);
+
+	    }
+	  return 1;
+	}
+      else if (pa_tls_referenced_p (operand1))
+	{
+	  rtx tmp = operand1;
+	  rtx addend = NULL;
+
+	  if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+	    {
+	      addend = XEXP (XEXP (tmp, 0), 1);
+	      tmp = XEXP (XEXP (tmp, 0), 0);
+	    }
+
+	  gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+	  tmp = legitimize_tls_address (tmp);
+	  if (addend)
+	    {
+	      tmp = gen_rtx_PLUS (mode, tmp, addend);
+	      tmp = force_operand (tmp, operands[0]);
+	    }
+	  operands[1] = tmp;
+	}
+      else if (GET_CODE (operand1) != CONST_INT
+	       || !cint_ok_for_move (INTVAL (operand1)))
+	{
+	  rtx insn, temp;
+	  rtx op1 = operand1;
+	  HOST_WIDE_INT value = 0;
+	  HOST_WIDE_INT insv = 0;
+	  int insert = 0;
+
+	  if (GET_CODE (operand1) == CONST_INT)
+	    value = INTVAL (operand1);
+
+	  if (TARGET_64BIT
+	      && GET_CODE (operand1) == CONST_INT
+	      && HOST_BITS_PER_WIDE_INT > 32
+	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
+	    {
+	      HOST_WIDE_INT nval;
+
+	      /* Extract the low order 32 bits of the value and sign extend.
+		 If the new value is the same as the original value, we can
+		 can use the original value as-is.  If the new value is
+		 different, we use it and insert the most-significant 32-bits
+		 of the original value into the final result.  */
+	      nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
+		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
+	      if (value != nval)
+		{
+#if HOST_BITS_PER_WIDE_INT > 32
+		  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
+#endif
+		  insert = 1;
+		  value = nval;
+		  operand1 = GEN_INT (nval);
+		}
+	    }
+
+	  if (reload_in_progress || reload_completed)
+	    temp = scratch_reg ? scratch_reg : operand0;
+	  else
+	    temp = gen_reg_rtx (mode);
+
+	  /* We don't directly split DImode constants on 32-bit targets
+	     because PLUS uses an 11-bit immediate and the insn sequence
+	     generated is not as efficient as the one using HIGH/LO_SUM.  */
+	  if (GET_CODE (operand1) == CONST_INT
+	      && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
+	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+	      && !insert)
+	    {
+	      /* Directly break constant into high and low parts.  This
+		 provides better optimization opportunities because various
+		 passes recognize constants split with PLUS but not LO_SUM.
+		 We use a 14-bit signed low part except when the addition
+		 of 0x4000 to the high part might change the sign of the
+		 high part.  */
+	      HOST_WIDE_INT low = value & 0x3fff;
+	      HOST_WIDE_INT high = value & ~ 0x3fff;
+
+	      if (low >= 0x2000)
+		{
+		  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
+		    high += 0x2000;
+		  else
+		    high += 0x4000;
+		}
+
+	      low = value - high;
+
+	      emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
+	      operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
+	    }
+	  else
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, temp,
+				      gen_rtx_HIGH (mode, operand1)));
+	      operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
+	    }
+
+	  insn = emit_move_insn (operands[0], operands[1]);
+
+	  /* Now insert the most significant 32 bits of the value
+	     into the register.  When we don't have a second register
+	     available, it could take up to nine instructions to load
+	     a 64-bit integer constant.  Prior to reload, we force
+	     constants that would take more than three instructions
+	     to load to the constant pool.  During and after reload,
+	     we have to handle all possible values.  */
+	  if (insert)
+	    {
+	      /* Use a HIGH/LO_SUM/INSV sequence if we have a second
+		 register and the value to be inserted is outside the
+		 range that can be loaded with three depdi instructions.  */
+	      if (temp != operand0 && (insv >= 16384 || insv < -16384))
+		{
+		  operand1 = GEN_INT (insv);
+
+		  emit_insn (gen_rtx_SET (VOIDmode, temp,
+					  gen_rtx_HIGH (mode, operand1)));
+		  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
+		  emit_insn (gen_insv (operand0, GEN_INT (32),
+				       const0_rtx, temp));
+		}
+	      else
+		{
+		  int len = 5, pos = 27;
+
+		  /* Insert the bits using the depdi instruction.  */
+		  while (pos >= 0)
+		    {
+		      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
+		      HOST_WIDE_INT sign = v5 < 0;
+
+		      /* Left extend the insertion.  */
+		      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
+		      while (pos > 0 && (insv & 1) == sign)
+			{
+			  insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
+			  len += 1;
+			  pos -= 1;
+			}
+
+		      emit_insn (gen_insv (operand0, GEN_INT (len),
+					   GEN_INT (pos), GEN_INT (v5)));
+
+		      len = pos > 0 && pos < 5 ? pos : 5;
+		      pos -= len;
+		    }
+		}
+	    }
+
+	  set_unique_reg_note (insn, REG_EQUAL, op1);
+
+	  return 1;
+	}
+    }
+  /* Now have insn-emit do whatever it normally does.  */
+  return 0;
+}
+
+/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
+   it will need a link/runtime reloc).  */
+
+int
+reloc_needed (tree exp)
+{
+  int reloc = 0;
+
+  switch (TREE_CODE (exp))
+    {
+    case ADDR_EXPR:
+      return 1;
+
+    case POINTER_PLUS_EXPR:
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      reloc = reloc_needed (TREE_OPERAND (exp, 0));
+      reloc |= reloc_needed (TREE_OPERAND (exp, 1));
+      break;
+
+    CASE_CONVERT:
+    case NON_LVALUE_EXPR:
+      reloc = reloc_needed (TREE_OPERAND (exp, 0));
+      break;
+
+    case CONSTRUCTOR:
+      {
+	tree value;
+	unsigned HOST_WIDE_INT ix;
+
+	FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
+	  if (value)
+	    reloc |= reloc_needed (value);
+      }
+      break;
+
+    case ERROR_MARK:
+      break;
+
+    default:
+      break;
+    }
+  return reloc;
+}
+
+/* Does operand (which is a symbolic_operand) live in text space?
+   If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
+   will be true.  */
+
+int
+read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (operand) == CONST)
+    operand = XEXP (XEXP (operand, 0), 0);
+  if (flag_pic)
+    {
+      if (GET_CODE (operand) == SYMBOL_REF)
+	return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
+    }
+  else
+    {
+      if (GET_CODE (operand) == SYMBOL_REF)
+	return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
+    }
+  return 1;
+}
+
+
+/* Return the best assembler insn template
+   for moving operands[1] into operands[0] as a fullword.  */
+const char *
+singlemove_string (rtx *operands)
+{
+  HOST_WIDE_INT intval;
+
+  if (GET_CODE (operands[0]) == MEM)
+    return "stw %r1,%0";
+  if (GET_CODE (operands[1]) == MEM)
+    return "ldw %1,%0";
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      long i;
+      REAL_VALUE_TYPE d;
+
+      gcc_assert (GET_MODE (operands[1]) == SFmode);
+
+      /* Translate the CONST_DOUBLE to a CONST_INT with the same target
+	 bit pattern.  */
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (d, i);
+
+      operands[1] = GEN_INT (i);
+      /* Fall through to CONST_INT case.  */
+    }
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      intval = INTVAL (operands[1]);
+
+      if (VAL_14_BITS_P (intval))
+	return "ldi %1,%0";
+      else if ((intval & 0x7ff) == 0)
+	return "ldil L'%1,%0";
+      else if (zdepi_cint_p (intval))
+	return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
+      else
+	return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
+    }
+  return "copy %1,%0";
+}
+
+
+/* Compute position (in OP[1]) and width (in OP[2])
+   useful for copying IMM to a register using the zdepi
+   instructions.  Store the immediate value to insert in OP[0].  */
+static void
+compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
+{
+  int lsb, len;
+
+  /* Find the least significant set bit in IMM.  */
+  for (lsb = 0; lsb < 32; lsb++)
+    {
+      if ((imm & 1) != 0)
+        break;
+      imm >>= 1;
+    }
+
+  /* Choose variants based on *sign* of the 5-bit field.  */
+  if ((imm & 0x10) == 0)
+    len = (lsb <= 28) ? 4 : 32 - lsb;
+  else
+    {
+      /* Find the width of the bitstring in IMM.  */
+      for (len = 5; len < 32 - lsb; len++)
+	{
+	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
+	    break;
+	}
+
+      /* Sign extend IMM as a 5-bit value.  */
+      imm = (imm & 0xf) - 0x10;
+    }
+
+  op[0] = imm;
+  op[1] = 31 - lsb;
+  op[2] = len;
+}
+
+/* Compute position (in OP[1]) and width (in OP[2])
+   useful for copying IMM to a register using the depdi,z
+   instructions.  Store the immediate value to insert in OP[0].  */
+void
+compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
+{
+  int lsb, len, maxlen;
+
+  maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
+
+  /* Find the least significant set bit in IMM.  */
+  for (lsb = 0; lsb < maxlen; lsb++)
+    {
+      if ((imm & 1) != 0)
+        break;
+      imm >>= 1;
+    }
+
+  /* Choose variants based on *sign* of the 5-bit field.  */
+  if ((imm & 0x10) == 0)
+    len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
+  else
+    {
+      /* Find the width of the bitstring in IMM.  */
+      for (len = 5; len < maxlen - lsb; len++)
+	{
+	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
+	    break;
+	}
+
+      /* Extend length if host is narrow and IMM is negative.  */
+      if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
+	len += 32;
+
+      /* Sign extend IMM as a 5-bit value.  */
+      imm = (imm & 0xf) - 0x10;
+    }
+
+  op[0] = imm;
+  op[1] = 63 - lsb;
+  op[2] = len;
+}
+
+/* Output assembler code to perform a doubleword move insn
+   with operands OPERANDS.  */
+
+const char *
+output_move_double (rtx *operands)
+{
+  enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
+  rtx latehalf[2];
+  rtx addreg0 = 0, addreg1 = 0;
+
+  /* First classify both operands.  */
+
+  if (REG_P (operands[0]))
+    optype0 = REGOP;
+  else if (offsettable_memref_p (operands[0]))
+    optype0 = OFFSOP;
+  else if (GET_CODE (operands[0]) == MEM)
+    optype0 = MEMOP;
+  else
+    optype0 = RNDOP;
+
+  if (REG_P (operands[1]))
+    optype1 = REGOP;
+  else if (CONSTANT_P (operands[1]))
+    optype1 = CNSTOP;
+  else if (offsettable_memref_p (operands[1]))
+    optype1 = OFFSOP;
+  else if (GET_CODE (operands[1]) == MEM)
+    optype1 = MEMOP;
+  else
+    optype1 = RNDOP;
+
+  /* Check for the cases that the operand constraints are not
+     supposed to allow to happen.  */
+  gcc_assert (optype0 == REGOP || optype1 == REGOP);
+
+  /* Handle copies between general and floating registers.  */
+
+  if (optype0 == REGOP && optype1 == REGOP
+      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
+    {
+      if (FP_REG_P (operands[0]))
+	{
+	  output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
+	  output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
+	  return "{fldds|fldd} -16(%%sp),%0";
+	}
+      else
+	{
+	  output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
+	  output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
+	  return "{ldws|ldw} -12(%%sp),%R0";
+	}
+    }
+
+   /* Handle auto decrementing and incrementing loads and stores
+     specifically, since the structure of the function doesn't work
+     for them without major modification.  Do it better when we learn
+     this port about the general inc/dec addressing of PA.
+     (This was written by tege.  Chide him if it doesn't work.)  */
+
+  if (optype0 == MEMOP)
+    {
+      /* We have to output the address syntax ourselves, since print_operand
+	 doesn't deal with the addresses we want to use.  Fix this later.  */
+
+      rtx addr = XEXP (operands[0], 0);
+      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
+
+	  operands[0] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[1]) == REG
+		      && GET_CODE (operands[0]) == REG);
+
+	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
+	  
+	  /* No overlap between high target register and address
+	     register.  (We do this in a non-obvious way to
+	     save a register file writeback)  */
+	  if (GET_CODE (addr) == POST_INC)
+	    return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
+	  return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
+	}
+      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
+
+	  operands[0] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[1]) == REG
+		      && GET_CODE (operands[0]) == REG);
+	  
+	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
+	  /* No overlap between high target register and address
+	     register.  (We do this in a non-obvious way to save a
+	     register file writeback)  */
+	  if (GET_CODE (addr) == PRE_INC)
+	    return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
+	  return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
+	}
+    }
+  if (optype1 == MEMOP)
+    {
+      /* We have to output the address syntax ourselves, since print_operand
+	 doesn't deal with the addresses we want to use.  Fix this later.  */
+
+      rtx addr = XEXP (operands[1], 0);
+      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+	  operands[1] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[0]) == REG
+		      && GET_CODE (operands[1]) == REG);
+
+	  if (!reg_overlap_mentioned_p (high_reg, addr))
+	    {
+	      /* No overlap between high target register and address
+		 register.  (We do this in a non-obvious way to
+		 save a register file writeback)  */
+	      if (GET_CODE (addr) == POST_INC)
+		return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
+	      return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
+	    }
+	  else
+	    {
+	      /* This is an undefined situation.  We should load into the
+		 address register *and* update that register.  Probably
+		 we don't need to handle this at all.  */
+	      if (GET_CODE (addr) == POST_INC)
+		return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
+	      return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
+	    }
+	}
+      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	{
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+	  operands[1] = XEXP (addr, 0);
+	  gcc_assert (GET_CODE (operands[0]) == REG
+		      && GET_CODE (operands[1]) == REG);
+
+	  if (!reg_overlap_mentioned_p (high_reg, addr))
+	    {
+	      /* No overlap between high target register and address
+		 register.  (We do this in a non-obvious way to
+		 save a register file writeback)  */
+	      if (GET_CODE (addr) == PRE_INC)
+		return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
+	      return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
+	    }
+	  else
+	    {
+	      /* This is an undefined situation.  We should load into the
+		 address register *and* update that register.  Probably
+		 we don't need to handle this at all.  */
+	      if (GET_CODE (addr) == PRE_INC)
+		return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
+	      return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
+	    }
+	}
+      else if (GET_CODE (addr) == PLUS
+	       && GET_CODE (XEXP (addr, 0)) == MULT)
+	{
+	  rtx xoperands[4];
+	  rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
+
+	  if (!reg_overlap_mentioned_p (high_reg, addr))
+	    {
+	      xoperands[0] = high_reg;
+	      xoperands[1] = XEXP (addr, 1);
+	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
+	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
+	      output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
+			       xoperands);
+	      return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
+	    }
+	  else
+	    {
+	      xoperands[0] = high_reg;
+	      xoperands[1] = XEXP (addr, 1);
+	      xoperands[2] = XEXP (XEXP (addr, 0), 0);
+	      xoperands[3] = XEXP (XEXP (addr, 0), 1);
+	      output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
+			       xoperands);
+	      return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
+	    }
+	}
+    }
+
+  /* If an operand is an unoffsettable memory ref, find a register
+     we can increment temporarily to make it refer to the second word.  */
+
+  if (optype0 == MEMOP)
+    addreg0 = find_addr_reg (XEXP (operands[0], 0));
+
+  if (optype1 == MEMOP)
+    addreg1 = find_addr_reg (XEXP (operands[1], 0));
+
+  /* Ok, we can do one word at a time.
+     Normally we do the low-numbered word first.
+
+     In either case, set up in LATEHALF the operands to use
+     for the high-numbered word and in some cases alter the
+     operands in OPERANDS to be suitable for the low-numbered word.  */
+
+  if (optype0 == REGOP)
+    latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+  else if (optype0 == OFFSOP)
+    latehalf[0] = adjust_address (operands[0], SImode, 4);
+  else
+    latehalf[0] = operands[0];
+
+  if (optype1 == REGOP)
+    latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
+  else if (optype1 == OFFSOP)
+    latehalf[1] = adjust_address (operands[1], SImode, 4);
+  else if (optype1 == CNSTOP)
+    split_double (operands[1], &operands[1], &latehalf[1]);
+  else
+    latehalf[1] = operands[1];
+
+  /* If the first move would clobber the source of the second one,
+     do them in the other order.
+
+     This can happen in two cases:
+
+	mem -> register where the first half of the destination register
+ 	is the same register used in the memory's address.  Reload
+	can create such insns.
+
+	mem in this case will be either register indirect or register
+	indirect plus a valid offset.
+
+	register -> register move where REGNO(dst) == REGNO(src + 1)
+	someone (Tim/Tege?) claimed this can happen for parameter loads.
+
+     Handle mem -> register case first.  */
+  if (optype0 == REGOP
+      && (optype1 == MEMOP || optype1 == OFFSOP)
+      && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			    operands[1], 0))
+    {
+      /* Do the late half first.  */
+      if (addreg1)
+	output_asm_insn ("ldo 4(%0),%0", &addreg1);
+      output_asm_insn (singlemove_string (latehalf), latehalf);
+
+      /* Then clobber.  */
+      if (addreg1)
+	output_asm_insn ("ldo -4(%0),%0", &addreg1);
+      return singlemove_string (operands);
+    }
+
+  /* Now handle register -> register case.  */
+  if (optype0 == REGOP && optype1 == REGOP
+      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    {
+      output_asm_insn (singlemove_string (latehalf), latehalf);
+      return singlemove_string (operands);
+    }
+
+  /* Normal case: do the two words, low-numbered first.  */
+
+  output_asm_insn (singlemove_string (operands), operands);
+
+  /* Make any unoffsettable addresses point at high-numbered word.  */
+  if (addreg0)
+    output_asm_insn ("ldo 4(%0),%0", &addreg0);
+  if (addreg1)
+    output_asm_insn ("ldo 4(%0),%0", &addreg1);
+
+  /* Do that word.  */
+  output_asm_insn (singlemove_string (latehalf), latehalf);
+
+  /* Undo the adds we just did.  */
+  if (addreg0)
+    output_asm_insn ("ldo -4(%0),%0", &addreg0);
+  if (addreg1)
+    output_asm_insn ("ldo -4(%0),%0", &addreg1);
+
+  return "";
+}
+
+const char *
+output_fp_move_double (rtx *operands)
+{
+  if (FP_REG_P (operands[0]))
+    {
+      if (FP_REG_P (operands[1])
+	  || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
+	output_asm_insn ("fcpy,dbl %f1,%0", operands);
+      else
+	output_asm_insn ("fldd%F1 %1,%0", operands);
+    }
+  else if (FP_REG_P (operands[1]))
+    {
+      output_asm_insn ("fstd%F0 %1,%0", operands);
+    }
+  else
+    {
+      rtx xoperands[2];
+      
+      gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
+      
+      /* This is a pain.  You have to be prepared to deal with an
+	 arbitrary address here including pre/post increment/decrement.
+
+	 so avoid this in the MD.  */
+      gcc_assert (GET_CODE (operands[0]) == REG);
+      
+      xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      xoperands[0] = operands[0];
+      output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
+    }
+  return "";
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+   ADDR can be effectively incremented by incrementing REG.  */
+
+static rtx
+find_addr_reg (rtx addr)
+{
+  while (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	addr = XEXP (addr, 0);
+      else if (GET_CODE (XEXP (addr, 1)) == REG)
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 0)))
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 1)))
+	addr = XEXP (addr, 0);
+      else
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (addr) == REG);
+  return addr;
+}
+
+/* Emit code to perform a block move.
+
+   OPERANDS[0] is the destination pointer as a REG, clobbered.
+   OPERANDS[1] is the source pointer as a REG, clobbered.
+   OPERANDS[2] is a register for temporary storage.
+   OPERANDS[3] is a register for temporary storage.
+   OPERANDS[4] is the size as a CONST_INT
+   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
+   OPERANDS[6] is another temporary register.  */
+
+const char *
+output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
+{
+  int align = INTVAL (operands[5]);
+  unsigned long n_bytes = INTVAL (operands[4]);
+
+  /* We can't move more than a word at a time because the PA
+     has no longer integer move insns.  (Could use fp mem ops?)  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* Note that we know each loop below will execute at least twice
+     (else we would have open-coded the copy).  */
+  switch (align)
+    {
+      case 8:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 16);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("ldd,ma 8(%1),%3", operands);
+	output_asm_insn ("ldd,ma 8(%1),%6", operands);
+	output_asm_insn ("std,ma %3,8(%0)", operands);
+	output_asm_insn ("addib,>= -16,%2,.-12", operands);
+	output_asm_insn ("std,ma %6,8(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 16 != 0)
+	  {
+	    operands[4] = GEN_INT (n_bytes % 8);
+	    if (n_bytes % 16 >= 8)
+	      output_asm_insn ("ldd,ma 8(%1),%3", operands);
+	    if (n_bytes % 8 != 0)
+	      output_asm_insn ("ldd 0(%1),%6", operands);
+	    if (n_bytes % 16 >= 8)
+	      output_asm_insn ("std,ma %3,8(%0)", operands);
+	    if (n_bytes % 8 != 0)
+	      output_asm_insn ("stdby,e %6,%4(%0)", operands);
+	  }
+	return "";
+
+      case 4:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 8);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
+	output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
+	output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
+	output_asm_insn ("addib,>= -8,%2,.-12", operands);
+	output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 8 != 0)
+	  {
+	    operands[4] = GEN_INT (n_bytes % 4);
+	    if (n_bytes % 8 >= 4)
+	      output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
+	    if (n_bytes % 4 != 0)
+	      output_asm_insn ("ldw 0(%1),%6", operands);
+	    if (n_bytes % 8 >= 4)
+	      output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
+	    if (n_bytes % 4 != 0)
+	      output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
+	  }
+	return "";
+
+      case 2:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 4);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
+	output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
+	output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
+	output_asm_insn ("addib,>= -4,%2,.-12", operands);
+	output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 4 != 0)
+	  {
+	    if (n_bytes % 4 >= 2)
+	      output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
+	    if (n_bytes % 2 != 0)
+	      output_asm_insn ("ldb 0(%1),%6", operands);
+	    if (n_bytes % 4 >= 2)
+	      output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
+	    if (n_bytes % 2 != 0)
+	      output_asm_insn ("stb %6,0(%0)", operands);
+	  }
+	return "";
+
+      case 1:
+	/* Pre-adjust the loop counter.  */
+	operands[4] = GEN_INT (n_bytes - 2);
+	output_asm_insn ("ldi %4,%2", operands);
+
+	/* Copying loop.  */
+	output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
+	output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
+	output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
+	output_asm_insn ("addib,>= -2,%2,.-12", operands);
+	output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 2 != 0)
+	  {
+	    output_asm_insn ("ldb 0(%1),%3", operands);
+	    output_asm_insn ("stb %3,0(%0)", operands);
+	  }
+	return "";
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Count the number of insns necessary to handle this block move.
+
+   Basic structure is the same as emit_block_move, except that we
+   count insns rather than emit them.  */
+
+static int
+compute_movmem_length (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
+  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
+  unsigned int n_insns = 0;
+
+  /* We can't move more than four bytes at a time because the PA
+     has no longer integer move insns.  (Could use fp mem ops?)  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* The basic copying loop.  */
+  n_insns = 6;
+
+  /* Residuals.  */
+  if (n_bytes % (2 * align) != 0)
+    {
+      if ((n_bytes % (2 * align)) >= align)
+	n_insns += 2;
+
+      if ((n_bytes % align) != 0)
+	n_insns += 2;
+    }
+
+  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
+  return n_insns * 4;
+}
+
+/* Emit code to perform a block clear.
+
+   OPERANDS[0] is the destination pointer as a REG, clobbered.
+   OPERANDS[1] is a register for temporary storage.
+   OPERANDS[2] is the size as a CONST_INT
+   OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
+
+const char *
+output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
+{
+  int align = INTVAL (operands[3]);
+  unsigned long n_bytes = INTVAL (operands[2]);
+
+  /* We can't clear more than a word at a time because the PA
+     has no longer integer move insns.  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* Note that we know each loop below will execute at least twice
+     (else we would have open-coded the copy).  */
+  switch (align)
+    {
+      case 8:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 16);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("std,ma %%r0,8(%0)", operands);
+	output_asm_insn ("addib,>= -16,%1,.-4", operands);
+	output_asm_insn ("std,ma %%r0,8(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 16 != 0)
+	  {
+	    operands[2] = GEN_INT (n_bytes % 8);
+	    if (n_bytes % 16 >= 8)
+	      output_asm_insn ("std,ma %%r0,8(%0)", operands);
+	    if (n_bytes % 8 != 0)
+	      output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
+	  }
+	return "";
+
+      case 4:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 8);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+	output_asm_insn ("addib,>= -8,%1,.-4", operands);
+	output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+
+	/* Handle the residual.  There could be up to 7 bytes of
+	   residual to copy!  */
+	if (n_bytes % 8 != 0)
+	  {
+	    operands[2] = GEN_INT (n_bytes % 4);
+	    if (n_bytes % 8 >= 4)
+	      output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
+	    if (n_bytes % 4 != 0)
+	      output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
+	  }
+	return "";
+
+      case 2:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 4);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+	output_asm_insn ("addib,>= -4,%1,.-4", operands);
+	output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 4 != 0)
+	  {
+	    if (n_bytes % 4 >= 2)
+	      output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
+	    if (n_bytes % 2 != 0)
+	      output_asm_insn ("stb %%r0,0(%0)", operands);
+	  }
+	return "";
+
+      case 1:
+	/* Pre-adjust the loop counter.  */
+	operands[2] = GEN_INT (n_bytes - 2);
+	output_asm_insn ("ldi %2,%1", operands);
+
+	/* Loop.  */
+	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
+	output_asm_insn ("addib,>= -2,%1,.-4", operands);
+	output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
+
+	/* Handle the residual.  */
+	if (n_bytes % 2 != 0)
+	  output_asm_insn ("stb %%r0,0(%0)", operands);
+
+	return "";
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Count the number of insns necessary to handle this block move.
+
+   Basic structure is the same as emit_block_move, except that we
+   count insns rather than emit them.  */
+
+static int
+compute_clrmem_length (rtx insn)
+{
+  rtx pat = PATTERN (insn);
+  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
+  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
+  unsigned int n_insns = 0;
+
+  /* We can't clear more than a word at a time because the PA
+     has no longer integer move insns.  */
+  if (align > (TARGET_64BIT ? 8 : 4))
+    align = (TARGET_64BIT ? 8 : 4);
+
+  /* The basic loop.  */
+  n_insns = 4;
+
+  /* Residuals.  */
+  if (n_bytes % (2 * align) != 0)
+    {
+      if ((n_bytes % (2 * align)) >= align)
+	n_insns++;
+
+      if ((n_bytes % align) != 0)
+	n_insns++;
+    }
+
+  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
+  return n_insns * 4;
+}
+
+
+const char *
+output_and (rtx *operands)
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
+    {
+      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+      int ls0, ls1, ms0, p, len;
+
+      for (ls0 = 0; ls0 < 32; ls0++)
+	if ((mask & (1 << ls0)) == 0)
+	  break;
+
+      for (ls1 = ls0; ls1 < 32; ls1++)
+	if ((mask & (1 << ls1)) != 0)
+	  break;
+
+      for (ms0 = ls1; ms0 < 32; ms0++)
+	if ((mask & (1 << ms0)) == 0)
+	  break;
+
+      gcc_assert (ms0 == 32);
+
+      if (ls1 == 32)
+	{
+	  len = ls0;
+
+	  gcc_assert (len);
+
+	  operands[2] = GEN_INT (len);
+	  return "{extru|extrw,u} %1,31,%2,%0";
+	}
+      else
+	{
+	  /* We could use this `depi' for the case above as well, but `depi'
+	     requires one more register file access than an `extru'.  */
+
+	  p = 31 - ls0;
+	  len = ls1 - ls0;
+
+	  operands[2] = GEN_INT (p);
+	  operands[3] = GEN_INT (len);
+	  return "{depi|depwi} 0,%2,%3,%0";
+	}
+    }
+  else
+    return "and %1,%2,%0";
+}
+
+/* Return a string to perform a bitwise-and of operands[1] with operands[2]
+   storing the result in operands[0].  */
+const char *
+output_64bit_and (rtx *operands)
+{
+  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
+    {
+      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+      int ls0, ls1, ms0, p, len;
+
+      for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
+	  break;
+
+      for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
+	  break;
+
+      for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
+	  break;
+
+      gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
+
+      if (ls1 == HOST_BITS_PER_WIDE_INT)
+	{
+	  len = ls0;
+
+	  gcc_assert (len);
+
+	  operands[2] = GEN_INT (len);
+	  return "extrd,u %1,63,%2,%0";
+	}
+      else
+	{
+	  /* We could use this `depi' for the case above as well, but `depi'
+	     requires one more register file access than an `extru'.  */
+
+	  p = 63 - ls0;
+	  len = ls1 - ls0;
+
+	  operands[2] = GEN_INT (p);
+	  operands[3] = GEN_INT (len);
+	  return "depdi 0,%2,%3,%0";
+	}
+    }
+  else
+    return "and %1,%2,%0";
+}
+
+const char *
+output_ior (rtx *operands)
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int bs0, bs1, p, len;
+
+  if (INTVAL (operands[2]) == 0)
+    return "copy %1,%0";
+
+  for (bs0 = 0; bs0 < 32; bs0++)
+    if ((mask & (1 << bs0)) != 0)
+      break;
+
+  for (bs1 = bs0; bs1 < 32; bs1++)
+    if ((mask & (1 << bs1)) == 0)
+      break;
+
+  gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
+
+  p = 31 - bs0;
+  len = bs1 - bs0;
+
+  operands[2] = GEN_INT (p);
+  operands[3] = GEN_INT (len);
+  return "{depi|depwi} -1,%2,%3,%0";
+}
+
+/* Return a string to perform a bitwise-and of operands[1] with operands[2]
+   storing the result in operands[0].  */
+const char *
+output_64bit_ior (rtx *operands)
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int bs0, bs1, p, len;
+
+  if (INTVAL (operands[2]) == 0)
+    return "copy %1,%0";
+
+  for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
+    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
+      break;
+
+  for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
+    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
+      break;
+
+  gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
+	      || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
+
+  p = 63 - bs0;
+  len = bs1 - bs0;
+
+  operands[2] = GEN_INT (p);
+  operands[3] = GEN_INT (len);
+  return "depdi -1,%2,%3,%0";
+}
+
+/* Target hook for assembling integer objects.  This code handles
+   aligned SI and DI integers specially since function references
+   must be preceded by P%.  */
+
+static bool
+pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == UNITS_PER_WORD
+      && aligned_p
+      && function_label_operand (x, VOIDmode))
+    {
+      fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
+      output_addr_const (asm_out_file, x);
+      fputc ('\n', asm_out_file);
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Output an ascii string.  */
+void
+output_ascii (FILE *file, const char *p, int size)
+{
+  int i;
+  int chars_output;
+  unsigned char partial_output[16];	/* Max space 4 chars can occupy.  */
+
+  /* The HP assembler can only take strings of 256 characters at one
+     time.  This is a limitation on input line length, *not* the
+     length of the string.  Sigh.  Even worse, it seems that the
+     restriction is in number of input characters (see \xnn &
+     \whatever).  So we have to do this very carefully.  */
+
+  fputs ("\t.STRING \"", file);
+
+  chars_output = 0;
+  for (i = 0; i < size; i += 4)
+    {
+      int co = 0;
+      int io = 0;
+      for (io = 0, co = 0; io < MIN (4, size - i); io++)
+	{
+	  register unsigned int c = (unsigned char) p[i + io];
+
+	  if (c == '\"' || c == '\\')
+	    partial_output[co++] = '\\';
+	  if (c >= ' ' && c < 0177)
+	    partial_output[co++] = c;
+	  else
+	    {
+	      unsigned int hexd;
+	      partial_output[co++] = '\\';
+	      partial_output[co++] = 'x';
+	      hexd =  c  / 16 - 0 + '0';
+	      if (hexd > '9')
+		hexd -= '9' - 'a' + 1;
+	      partial_output[co++] = hexd;
+	      hexd =  c % 16 - 0 + '0';
+	      if (hexd > '9')
+		hexd -= '9' - 'a' + 1;
+	      partial_output[co++] = hexd;
+	    }
+	}
+      if (chars_output + co > 243)
+	{
+	  fputs ("\"\n\t.STRING \"", file);
+	  chars_output = 0;
+	}
+      fwrite (partial_output, 1, (size_t) co, file);
+      chars_output += co;
+      co = 0;
+    }
+  fputs ("\"\n", file);
+}
+
+/* Try to rewrite floating point comparisons & branches to avoid
+   useless add,tr insns.
+
+   CHECK_NOTES is nonzero if we should examine REG_DEAD notes
+   to see if FPCC is dead.  CHECK_NOTES is nonzero for the
+   first attempt to remove useless add,tr insns.  It is zero
+   for the second pass as reorg sometimes leaves bogus REG_DEAD
+   notes lying around.
+
+   When CHECK_NOTES is zero we can only eliminate add,tr insns
+   when there's a 1:1 correspondence between fcmp and ftest/fbranch
+   instructions.  */
+static void
+remove_useless_addtr_insns (int check_notes)
+{
+  rtx insn;
+  static int pass = 0;
+
+  /* This is fairly cheap, so always run it when optimizing.  */
+  if (optimize > 0)
+    {
+      int fcmp_count = 0;
+      int fbranch_count = 0;
+
+      /* Walk all the insns in this function looking for fcmp & fbranch
+	 instructions.  Keep track of how many of each we find.  */
+      for (insn = get_insns (); insn; insn = next_insn (insn))
+	{
+	  rtx tmp;
+
+	  /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
+	  if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
+	    continue;
+
+	  tmp = PATTERN (insn);
+
+	  /* It must be a set.  */
+	  if (GET_CODE (tmp) != SET)
+	    continue;
+
+	  /* If the destination is CCFP, then we've found an fcmp insn.  */
+	  tmp = SET_DEST (tmp);
+	  if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
+	    {
+	      fcmp_count++;
+	      continue;
+	    }
+
+	  tmp = PATTERN (insn);
+	  /* If this is an fbranch instruction, bump the fbranch counter.  */
+	  if (GET_CODE (tmp) == SET
+	      && SET_DEST (tmp) == pc_rtx
+	      && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
+	      && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
+	      && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
+	      && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
+	    {
+	      fbranch_count++;
+	      continue;
+	    }
+	}
+
+
+      /* Find all floating point compare + branch insns.  If possible,
+	 reverse the comparison & the branch to avoid add,tr insns.  */
+      for (insn = get_insns (); insn; insn = next_insn (insn))
+	{
+	  rtx tmp, next;
+
+	  /* Ignore anything that isn't an INSN.  */
+	  if (GET_CODE (insn) != INSN)
+	    continue;
+
+	  tmp = PATTERN (insn);
+
+	  /* It must be a set.  */
+	  if (GET_CODE (tmp) != SET)
+	    continue;
+
+	  /* The destination must be CCFP, which is register zero.  */
+	  tmp = SET_DEST (tmp);
+	  if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
+	    continue;
+
+	  /* INSN should be a set of CCFP.
+
+	     See if the result of this insn is used in a reversed FP
+	     conditional branch.  If so, reverse our condition and
+	     the branch.  Doing so avoids useless add,tr insns.  */
+	  next = next_insn (insn);
+	  while (next)
+	    {
+	      /* Jumps, calls and labels stop our search.  */
+	      if (GET_CODE (next) == JUMP_INSN
+		  || GET_CODE (next) == CALL_INSN
+		  || GET_CODE (next) == CODE_LABEL)
+		break;
+
+	      /* As does another fcmp insn.  */
+	      if (GET_CODE (next) == INSN
+		  && GET_CODE (PATTERN (next)) == SET
+		  && GET_CODE (SET_DEST (PATTERN (next))) == REG
+		  && REGNO (SET_DEST (PATTERN (next))) == 0)
+		break;
+
+	      next = next_insn (next);
+	    }
+
+	  /* Is NEXT_INSN a branch?  */
+	  if (next
+	      && GET_CODE (next) == JUMP_INSN)
+	    {
+	      rtx pattern = PATTERN (next);
+
+	      /* If it a reversed fp conditional branch (e.g. uses add,tr)
+		 and CCFP dies, then reverse our conditional and the branch
+		 to avoid the add,tr.  */
+	      if (GET_CODE (pattern) == SET
+		  && SET_DEST (pattern) == pc_rtx
+		  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
+		  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
+		  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
+		  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
+		  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
+		  && (fcmp_count == fbranch_count
+		      || (check_notes
+			  && find_regno_note (next, REG_DEAD, 0))))
+		{
+		  /* Reverse the branch.  */
+		  tmp = XEXP (SET_SRC (pattern), 1);
+		  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
+		  XEXP (SET_SRC (pattern), 2) = tmp;
+		  INSN_CODE (next) = -1;
+
+		  /* Reverse our condition.  */
+		  tmp = PATTERN (insn);
+		  PUT_CODE (XEXP (tmp, 1),
+			    (reverse_condition_maybe_unordered
+			     (GET_CODE (XEXP (tmp, 1)))));
+		}
+	    }
+	}
+    }
+
+  pass = !pass;
+
+}
+
+/* You may have trouble believing this, but this is the 32 bit HP-PA
+   stack layout.  Wow.
+
+   Offset		Contents
+
+   Variable arguments	(optional; any number may be allocated)
+
+   SP-(4*(N+9))		arg word N
+   	:		    :
+      SP-56		arg word 5
+      SP-52		arg word 4
+
+   Fixed arguments	(must be allocated; may remain unused)
+
+      SP-48		arg word 3
+      SP-44		arg word 2
+      SP-40		arg word 1
+      SP-36		arg word 0
+
+   Frame Marker
+
+      SP-32		External Data Pointer (DP)
+      SP-28		External sr4
+      SP-24		External/stub RP (RP')
+      SP-20		Current RP
+      SP-16		Static Link
+      SP-12		Clean up
+      SP-8		Calling Stub RP (RP'')
+      SP-4		Previous SP
+
+   Top of Frame
+
+      SP-0		Stack Pointer (points to next available address)
+
+*/
+
+/* This function saves registers as follows.  Registers marked with ' are
+   this function's registers (as opposed to the previous function's).
+   If a frame_pointer isn't needed, r4 is saved as a general register;
+   the space for the frame pointer is still allocated, though, to keep
+   things simple.
+
+
+   Top of Frame
+
+       SP (FP')		Previous FP
+       SP + 4		Alignment filler (sigh)
+       SP + 8		Space for locals reserved here.
+       .
+       .
+       .
+       SP + n		All call saved register used.
+       .
+       .
+       .
+       SP + o		All call saved fp registers used.
+       .
+       .
+       .
+       SP + p (SP')	points to next available address.
+
+*/
+
+/* Global variables set by output_function_prologue().  */
+/* Size of frame.  Need to know this to emit return insns from
+   leaf procedures.  */
+static HOST_WIDE_INT actual_fsize, local_fsize;
+static int save_fregs;
+
+/* Emit RTL to store REG at the memory location specified by BASE+DISP.
+   Handle case where DISP > 8k by using the add_high_const patterns.
+
+   Note in DISP > 8k case, we will leave the high part of the address
+   in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
+
+static void
+store_reg (int reg, HOST_WIDE_INT disp, int base)
+{
+  rtx insn, dest, src, basereg;
+
+  src = gen_rtx_REG (word_mode, reg);
+  basereg = gen_rtx_REG (Pmode, base);
+  if (VAL_14_BITS_P (disp))
+    {
+      dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
+      insn = emit_move_insn (dest, src);
+    }
+  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
+    {
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, delta);
+      insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
+      if (DO_FRAME_NOTES)
+	{
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (VOIDmode, tmpreg,
+				     gen_rtx_PLUS (Pmode, basereg, delta)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      dest = gen_rtx_MEM (word_mode, tmpreg);
+      insn = emit_move_insn (dest, src);
+    }
+  else
+    {
+      rtx delta = GEN_INT (disp);
+      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, high);
+      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+      insn = emit_move_insn (dest, src);
+      if (DO_FRAME_NOTES)
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (VOIDmode,
+				   gen_rtx_MEM (word_mode,
+						gen_rtx_PLUS (word_mode,
+							      basereg,
+							      delta)),
+				   src));
+    }
+
+  if (DO_FRAME_NOTES)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Emit RTL to store REG at the memory location specified by BASE and then
+   add MOD to BASE.  MOD must be <= 8k.  */
+
+static void
+store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
+{
+  rtx insn, basereg, srcreg, delta;
+
+  gcc_assert (VAL_14_BITS_P (mod));
+
+  basereg = gen_rtx_REG (Pmode, base);
+  srcreg = gen_rtx_REG (word_mode, reg);
+  delta = GEN_INT (mod);
+
+  insn = emit_insn (gen_post_store (basereg, srcreg, delta));
+  if (DO_FRAME_NOTES)
+    {
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      /* RTX_FRAME_RELATED_P must be set on each frame related set
+	 in a parallel with more than one element.  */
+      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
+      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
+    }
+}
+
+/* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
+   where DISP > 8k by using the add_high_const patterns.  NOTE indicates
+   whether to add a frame note or not.
+
+   In the DISP > 8k case, we leave the high part of the address in %r1.
+   There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
+
+static void
+set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
+{
+  rtx insn;
+
+  if (VAL_14_BITS_P (disp))
+    {
+      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			     plus_constant (gen_rtx_REG (Pmode, base), disp));
+    }
+  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
+    {
+      rtx basereg = gen_rtx_REG (Pmode, base);
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, delta);
+      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			     gen_rtx_PLUS (Pmode, tmpreg, basereg));
+      if (DO_FRAME_NOTES)
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (VOIDmode, tmpreg,
+				   gen_rtx_PLUS (Pmode, basereg, delta)));
+    }
+  else
+    {
+      rtx basereg = gen_rtx_REG (Pmode, base);
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg,
+		      gen_rtx_PLUS (Pmode, basereg,
+				    gen_rtx_HIGH (Pmode, delta)));
+      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			     gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+    }
+
+  if (DO_FRAME_NOTES && note)
+    RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
+{
+  int freg_saved = 0;
+  int i, j;
+
+  /* The code in hppa_expand_prologue and hppa_expand_epilogue must
+     be consistent with the rounding and size calculation done here.
+     Change them at the same time.  */
+
+  /* We do our own stack alignment.  First, round the size of the
+     stack locals up to a word boundary.  */
+  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
+
+  /* Space for previous frame pointer + filler.  If any frame is
+     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
+     waste some space here for the sake of HP compatibility.  The
+     first slot is only used when the frame pointer is needed.  */
+  if (size || frame_pointer_needed)
+    size += STARTING_FRAME_OFFSET;
+  
+  /* If the current function calls __builtin_eh_return, then we need
+     to allocate stack space for registers that will hold data for
+     the exception handler.  */
+  if (DO_FRAME_NOTES && crtl->calls_eh_return)
+    {
+      unsigned int i;
+
+      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
+	continue;
+      size += i * UNITS_PER_WORD;
+    }
+
+  /* Account for space used by the callee general register saves.  */
+  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
+    if (df_regs_ever_live_p (i))
+      size += UNITS_PER_WORD;
+
+  /* Account for space used by the callee floating point register saves.  */
+  for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
+    if (df_regs_ever_live_p (i)
+	|| (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
+      {
+	freg_saved = 1;
+
+	/* We always save both halves of the FP register, so always
+	   increment the frame size by 8 bytes.  */
+	size += 8;
+      }
+
+  /* If any of the floating registers are saved, account for the
+     alignment needed for the floating point register save block.  */
+  if (freg_saved)
+    {
+      size = (size + 7) & ~7;
+      if (fregs_live)
+	*fregs_live = 1;
+    }
+
+  /* The various ABIs include space for the outgoing parameters in the
+     size of the current function's stack frame.  We don't need to align
+     for the outgoing arguments as their alignment is set by the final
+     rounding for the frame as a whole.  */
+  size += crtl->outgoing_args_size;
+
+  /* Allocate space for the fixed frame marker.  This space must be
+     allocated for any function that makes calls or allocates
+     stack space.  */
+  if (!current_function_is_leaf || size)
+    size += TARGET_64BIT ? 48 : 32;
+
+  /* Finally, round to the preferred stack boundary.  */
+  return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
+	  & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
+}
+
+/* Generate the assembly code for function entry.  FILE is a stdio
+   stream to output the code to.  SIZE is an int: how many units of
+   temporary storage to allocate.
+
+   Refer to the array `regs_ever_live' to determine which registers to
+   save; `regs_ever_live[I]' is nonzero if register number I is ever
+   used in the function.  This function is responsible for knowing
+   which registers should not be saved even if used.  */
+
+/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
+   of memory.  If any fpu reg is used in the function, we allocate
+   such a block here, at the bottom of the frame, just in case it's needed.
+
+   If this function is a leaf procedure, then we may choose not
+   to do a "save" insn.  The decision about whether or not
+   to do this is made in regclass.c.  */
+
+static void
+pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* The function's label and associated .PROC must never be
+     separated and must be output *after* any profiling declarations
+     to avoid changing spaces/subspaces within a procedure.  */
+  ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
+  fputs ("\t.PROC\n", file);
+
+  /* hppa_expand_prologue does the dirty work now.  We just need
+     to output the assembler directives which denote the start
+     of a function.  */
+  fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
+  if (current_function_is_leaf)
+    fputs (",NO_CALLS", file);
+  else
+    fputs (",CALLS", file);
+  if (rp_saved)
+    fputs (",SAVE_RP", file);
+
+  /* The SAVE_SP flag is used to indicate that register %r3 is stored
+     at the beginning of the frame and that it is used as the frame
+     pointer for the frame.  We do this because our current frame
+     layout doesn't conform to that specified in the HP runtime
+     documentation and we need a way to indicate to programs such as
+     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
+     isn't used by HP compilers but is supported by the assembler.
+     However, SAVE_SP is supposed to indicate that the previous stack
+     pointer has been saved in the frame marker.  */
+  if (frame_pointer_needed)
+    fputs (",SAVE_SP", file);
+
+  /* Pass on information about the number of callee register saves
+     performed in the prologue.
+
+     The compiler is supposed to pass the highest register number
+     saved, the assembler then has to adjust that number before
+     entering it into the unwind descriptor (to account for any
+     caller saved registers with lower register numbers than the
+     first callee saved register).  */
+  if (gr_saved)
+    fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
+
+  if (fr_saved)
+    fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
+
+  fputs ("\n\t.ENTRY\n", file);
+
+  remove_useless_addtr_insns (0);
+}
+
+void
+hppa_expand_prologue (void)
+{
+  int merge_sp_adjust_with_store = 0;
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT offset;
+  int i;
+  rtx insn, tmpreg;
+
+  gr_saved = 0;
+  fr_saved = 0;
+  save_fregs = 0;
+
+  /* Compute total size for frame pointer, filler, locals and rounding to
+     the next word boundary.  Similar code appears in compute_frame_size
+     and must be changed in tandem with this code.  */
+  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
+  if (local_fsize || frame_pointer_needed)
+    local_fsize += STARTING_FRAME_OFFSET;
+
+  actual_fsize = compute_frame_size (size, &save_fregs);
+  if (flag_stack_usage)
+    current_function_static_stack_size = actual_fsize;
+
+  /* Compute a few things we will use often.  */
+  tmpreg = gen_rtx_REG (word_mode, 1);
+
+  /* Save RP first.  The calling conventions manual states RP will
+     always be stored into the caller's frame at sp - 20 or sp - 16
+     depending on which ABI is in use.  */
+  if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
+    {
+      store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
+      rp_saved = true;
+    }
+  else
+    rp_saved = false;
+
+  /* Allocate the local frame and set up the frame pointer if needed.  */
+  if (actual_fsize != 0)
+    {
+      if (frame_pointer_needed)
+	{
+	  /* Copy the old frame pointer temporarily into %r1.  Set up the
+	     new stack pointer, then store away the saved old frame pointer
+	     into the stack at sp and at the same time update the stack
+	     pointer by actual_fsize bytes.  Two versions, first
+	     handles small (<8k) frames.  The second handles large (>=8k)
+	     frames.  */
+	  insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
+	  if (DO_FRAME_NOTES)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  if (DO_FRAME_NOTES)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	  if (VAL_14_BITS_P (actual_fsize))
+	    store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
+	  else
+	    {
+	      /* It is incorrect to store the saved frame pointer at *sp,
+		 then increment sp (writes beyond the current stack boundary).
+
+		 So instead use stwm to store at *sp and post-increment the
+		 stack pointer as an atomic operation.  Then increment sp to
+		 finish allocating the new frame.  */
+	      HOST_WIDE_INT adjust1 = 8192 - 64;
+	      HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
+
+	      store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
+	      set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+			      adjust2, 1);
+	    }
+
+	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
+	     we need to store the previous stack pointer (frame pointer)
+	     into the frame marker on targets that use the HP unwind
+	     library.  This allows the HP unwind library to be used to
+	     unwind GCC frames.  However, we are not fully compatible
+	     with the HP library because our frame layout differs from
+	     that specified in the HP runtime specification.
+
+	     We don't want a frame note on this instruction as the frame
+	     marker moves during dynamic stack allocation.
+
+	     This instruction also serves as a blockage to prevent
+	     register spills from being scheduled before the stack
+	     pointer is raised.  This is necessary as we store
+	     registers using the frame pointer as a base register,
+	     and the frame pointer is set before sp is raised.  */
+	  if (TARGET_HPUX_UNWIND_LIBRARY)
+	    {
+	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+				       GEN_INT (TARGET_64BIT ? -8 : -4));
+
+	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
+			      hard_frame_pointer_rtx);
+	    }
+	  else
+	    emit_insn (gen_blockage ());
+	}
+      /* no frame pointer needed.  */
+      else
+	{
+	  /* In some cases we can perform the first callee register save
+	     and allocating the stack frame at the same time.   If so, just
+	     make a note of it and defer allocating the frame until saving
+	     the callee registers.  */
+	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
+	    merge_sp_adjust_with_store = 1;
+	  /* Can not optimize.  Adjust the stack frame by actual_fsize
+	     bytes.  */
+	  else
+	    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+			    actual_fsize, 1);
+	}
+    }
+
+  /* Normal register save.
+
+     Do not save the frame pointer in the frame_pointer_needed case.  It
+     was done earlier.  */
+  if (frame_pointer_needed)
+    {
+      offset = local_fsize;
+
+      /* Saving the EH return data registers in the frame is the simplest
+	 way to get the frame unwind information emitted.  We put them
+	 just before the general registers.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 4; i--)
+	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  {
+	    store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
+	    offset += UNITS_PER_WORD;
+	    gr_saved++;
+	  }
+      /* Account for %r3 which is saved in a special place.  */
+      gr_saved++;
+    }
+  /* No frame pointer needed.  */
+  else
+    {
+      offset = local_fsize - actual_fsize;
+
+      /* Saving the EH return data registers in the frame is the simplest
+         way to get the frame unwind information emitted.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      /* If merge_sp_adjust_with_store is nonzero, then we can
+		 optimize the first save.  */
+	      if (merge_sp_adjust_with_store)
+		{
+		  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
+		  merge_sp_adjust_with_store = 0;
+		}
+	      else
+		store_reg (regno, offset, STACK_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 3; i--)
+      	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  {
+	    /* If merge_sp_adjust_with_store is nonzero, then we can
+	       optimize the first GR save.  */
+	    if (merge_sp_adjust_with_store)
+	      {
+		store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
+		merge_sp_adjust_with_store = 0;
+	      }
+	    else
+	      store_reg (i, offset, STACK_POINTER_REGNUM);
+	    offset += UNITS_PER_WORD;
+	    gr_saved++;
+	  }
+
+      /* If we wanted to merge the SP adjustment with a GR save, but we never
+	 did any GR saves, then just emit the adjustment here.  */
+      if (merge_sp_adjust_with_store)
+	set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+			actual_fsize, 1);
+    }
+
+  /* The hppa calling conventions say that %r19, the pic offset
+     register, is saved at sp - 32 (in this function's frame)
+     when generating PIC code.  FIXME:  What is the correct thing
+     to do for functions which make no calls and allocate no
+     frame?  Do we need to allocate a frame, or can we just omit
+     the save?   For now we'll just omit the save.
+     
+     We don't want a note on this insn as the frame marker can
+     move if there is a dynamic stack allocation.  */
+  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
+    {
+      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
+
+      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
+
+    }
+
+  /* Align pointer properly (doubleword boundary).  */
+  offset = (offset + 7) & ~7;
+
+  /* Floating point register store.  */
+  if (save_fregs)
+    {
+      rtx base;
+
+      /* First get the frame or stack pointer to the start of the FP register
+	 save area.  */
+      if (frame_pointer_needed)
+	{
+	  set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
+	  base = hard_frame_pointer_rtx;
+	}
+      else
+	{
+	  set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
+	  base = stack_pointer_rtx;
+	}
+
+      /* Now actually save the FP registers.  */
+      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
+	{
+	  if (df_regs_ever_live_p (i)
+	      || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
+	    {
+	      rtx addr, insn, reg;
+	      addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
+	      reg = gen_rtx_REG (DFmode, i);
+	      insn = emit_move_insn (addr, reg);
+	      if (DO_FRAME_NOTES)
+		{
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  if (TARGET_64BIT)
+		    {
+		      rtx mem = gen_rtx_MEM (DFmode,
+					     plus_constant (base, offset));
+		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+				    gen_rtx_SET (VOIDmode, mem, reg));
+		    }
+		  else
+		    {
+		      rtx meml = gen_rtx_MEM (SFmode,
+					      plus_constant (base, offset));
+		      rtx memr = gen_rtx_MEM (SFmode,
+					      plus_constant (base, offset + 4));
+		      rtx regl = gen_rtx_REG (SFmode, i);
+		      rtx regr = gen_rtx_REG (SFmode, i + 1);
+		      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
+		      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
+		      rtvec vec;
+
+		      RTX_FRAME_RELATED_P (setl) = 1;
+		      RTX_FRAME_RELATED_P (setr) = 1;
+		      vec = gen_rtvec (2, setl, setr);
+		      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+				    gen_rtx_SEQUENCE (VOIDmode, vec));
+		    }
+		}
+	      offset += GET_MODE_SIZE (DFmode);
+	      fr_saved++;
+	    }
+	}
+    }
+}
+
+/* Emit RTL to load REG from the memory location specified by BASE+DISP.
+   Handle case where DISP > 8k by using the add_high_const patterns.  */
+
+static void
+load_reg (int reg, HOST_WIDE_INT disp, int base)
+{
+  rtx dest = gen_rtx_REG (word_mode, reg);
+  rtx basereg = gen_rtx_REG (Pmode, base);
+  rtx src;
+
+  if (VAL_14_BITS_P (disp))
+    src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
+  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
+    {
+      rtx delta = GEN_INT (disp);
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, delta);
+      if (TARGET_DISABLE_INDEXING)
+	{
+	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
+	  src = gen_rtx_MEM (word_mode, tmpreg);
+	}
+      else
+	src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
+    }
+  else
+    {
+      rtx delta = GEN_INT (disp);
+      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+
+      emit_move_insn (tmpreg, high);
+      src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+    }
+
+  emit_move_insn (dest, src);
+}
+
+/* Update the total code bytes output to the text section.  */
+
+static void
+update_total_code_bytes (unsigned int nbytes)
+{
+  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
+      && !IN_NAMED_SECTION_P (cfun->decl))
+    {
+      unsigned int old_total = total_code_bytes;
+
+      total_code_bytes += nbytes;
+
+      /* Be prepared to handle overflows.  */
+      if (old_total > total_code_bytes)
+        total_code_bytes = UINT_MAX;
+    }
+}
+
+/* This function generates the assembly code for function exit.
+   Args are as for output_function_prologue ().
+
+   The function epilogue should not depend on the current stack
+   pointer!  It should use the frame pointer only.  This is mandatory
+   because of alloca; we also take advantage of it to omit stack
+   adjustments before returning.  */
+
+static void
+pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  rtx insn = get_last_insn ();
+
+  last_address = 0;
+
+  /* hppa_expand_epilogue does the dirty work now.  We just need
+     to output the assembler directives which denote the end
+     of a function.
+
+     To make debuggers happy, emit a nop if the epilogue was completely
+     eliminated due to a volatile call as the last insn in the
+     current function.  That way the return address (in %r2) will
+     always point to a valid instruction in the current function.  */
+
+  /* Get the last real insn.  */
+  if (GET_CODE (insn) == NOTE)
+    insn = prev_real_insn (insn);
+
+  /* If it is a sequence, then look inside.  */
+  if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+    insn = XVECEXP (PATTERN (insn), 0, 0);
+
+  /* If insn is a CALL_INSN, then it must be a call to a volatile
+     function (otherwise there would be epilogue insns).  */
+  if (insn && GET_CODE (insn) == CALL_INSN)
+    {
+      fputs ("\tnop\n", file);
+      last_address += 4;
+    }
+
+  fputs ("\t.EXIT\n\t.PROCEND\n", file);
+
+  if (TARGET_SOM && TARGET_GAS)
+    {
+      /* We done with this subspace except possibly for some additional
+	 debug information.  Forget that we are in this subspace to ensure
+	 that the next function is output in its own subspace.  */
+      in_section = NULL;
+      cfun->machine->in_nsubspa = 2;
+    }
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      insn = get_last_nonnote_insn ();
+      last_address += INSN_ADDRESSES (INSN_UID (insn));
+      if (INSN_P (insn))
+	last_address += insn_default_length (insn);
+      last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
+		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
+    }
+  else
+    last_address = UINT_MAX;
+
+  /* Finally, update the total number of code bytes output so far.  */
+  update_total_code_bytes (last_address);
+}
+
+void
+hppa_expand_epilogue (void)
+{
+  rtx tmpreg;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT ret_off = 0;
+  int i;
+  int merge_sp_adjust_with_load = 0;
+
+  /* We will use this often.  */
+  tmpreg = gen_rtx_REG (word_mode, 1);
+
+  /* Try to restore RP early to avoid load/use interlocks when
+     RP gets used in the return (bv) instruction.  This appears to still
+     be necessary even when we schedule the prologue and epilogue.  */
+  if (rp_saved)
+    {
+      ret_off = TARGET_64BIT ? -16 : -20;
+      if (frame_pointer_needed)
+	{
+	  load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
+	  ret_off = 0;
+	}
+      else
+	{
+	  /* No frame pointer, and stack is smaller than 8k.  */
+	  if (VAL_14_BITS_P (ret_off - actual_fsize))
+	    {
+	      load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
+	      ret_off = 0;
+	    }
+	}
+    }
+
+  /* General register restores.  */
+  if (frame_pointer_needed)
+    {
+      offset = local_fsize;
+
+      /* If the current function calls __builtin_eh_return, then we need
+         to restore the saved EH data registers.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 4; i--)
+	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  {
+	    load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
+	    offset += UNITS_PER_WORD;
+	  }
+    }
+  else
+    {
+      offset = local_fsize - actual_fsize;
+
+      /* If the current function calls __builtin_eh_return, then we need
+         to restore the saved EH data registers.  */
+      if (DO_FRAME_NOTES && crtl->calls_eh_return)
+	{
+	  unsigned int i, regno;
+
+	  for (i = 0; ; ++i)
+	    {
+	      regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+
+	      /* Only for the first load.
+	         merge_sp_adjust_with_load holds the register load
+	         with which we will merge the sp adjustment.  */
+	      if (merge_sp_adjust_with_load == 0
+		  && local_fsize == 0
+		  && VAL_14_BITS_P (-actual_fsize))
+	        merge_sp_adjust_with_load = regno;
+	      else
+		load_reg (regno, offset, STACK_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+
+      for (i = 18; i >= 3; i--)
+	{
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	    {
+	      /* Only for the first load.
+	         merge_sp_adjust_with_load holds the register load
+	         with which we will merge the sp adjustment.  */
+	      if (merge_sp_adjust_with_load == 0
+		  && local_fsize == 0
+		  && VAL_14_BITS_P (-actual_fsize))
+	        merge_sp_adjust_with_load = i;
+	      else
+		load_reg (i, offset, STACK_POINTER_REGNUM);
+	      offset += UNITS_PER_WORD;
+	    }
+	}
+    }
+
+  /* Align pointer properly (doubleword boundary).  */
+  offset = (offset + 7) & ~7;
+
+  /* FP register restores.  */
+  if (save_fregs)
+    {
+      /* Adjust the register to index off of.  */
+      if (frame_pointer_needed)
+	set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
+      else
+	set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
+
+      /* Actually do the restores now.  */
+      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
+	if (df_regs_ever_live_p (i)
+	    || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
+	  {
+	    rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
+	    rtx dest = gen_rtx_REG (DFmode, i);
+	    emit_move_insn (dest, src);
+	  }
+    }
+
+  /* Emit a blockage insn here to keep these insns from being moved to
+     an earlier spot in the epilogue, or into the main instruction stream.
+
+     This is necessary as we must not cut the stack back before all the
+     restores are finished.  */
+  emit_insn (gen_blockage ());
+
+  /* Reset stack pointer (and possibly frame pointer).  The stack
+     pointer is initially set to fp + 64 to avoid a race condition.  */
+  if (frame_pointer_needed)
+    {
+      rtx delta = GEN_INT (-64);
+
+      set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
+      emit_insn (gen_pre_load (hard_frame_pointer_rtx,
+			       stack_pointer_rtx, delta));
+    }
+  /* If we were deferring a callee register restore, do it now.  */
+  else if (merge_sp_adjust_with_load)
+    {
+      rtx delta = GEN_INT (-actual_fsize);
+      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
+
+      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
+    }
+  else if (actual_fsize != 0)
+    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
+		    - actual_fsize, 0);
+
+  /* If we haven't restored %r2 yet (no frame pointer, and a stack
+     frame greater than 8k), do so now.  */
+  if (ret_off != 0)
+    load_reg (2, ret_off, STACK_POINTER_REGNUM);
+
+  if (DO_FRAME_NOTES && crtl->calls_eh_return)
+    {
+      rtx sa = EH_RETURN_STACKADJ_RTX;
+
+      emit_insn (gen_blockage ());
+      emit_insn (TARGET_64BIT
+		 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
+		 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
+    }
+}
+
+bool
+pa_can_use_return_insn (void)
+{
+  if (!reload_completed)
+    return false;
+
+  if (frame_pointer_needed)
+    return false;
+
+  if (df_regs_ever_live_p (2))
+    return false;
+
+  if (crtl->profile)
+    return false;
+
+  return compute_frame_size (get_frame_size (), 0) == 0;
+}
+
+rtx
+hppa_pic_save_rtx (void)
+{
+  return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
+}
+
+#ifndef NO_DEFERRED_PROFILE_COUNTERS
+#define NO_DEFERRED_PROFILE_COUNTERS 0
+#endif
+
+
+/* Vector of funcdef numbers.  */
+static VEC(int,heap) *funcdef_nos;
+
+/* Output deferred profile counters.  */
+static void
+output_deferred_profile_counters (void)
+{
+  unsigned int i;
+  int align, n;
+
+  if (VEC_empty (int, funcdef_nos))
+   return;
+
+  switch_to_section (data_section);
+  align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
+  ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
+
+  for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
+    {
+      targetm.asm_out.internal_label (asm_out_file, "LP", n);
+      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
+    }
+
+  VEC_free (int, heap, funcdef_nos);
+}
+
+void
+hppa_profile_hook (int label_no)
+{
+  /* We use SImode for the address of the function in both 32 and
+     64-bit code to avoid having to provide DImode versions of the
+     lcla2 and load_offset_label_address insn patterns.  */
+  rtx reg = gen_reg_rtx (SImode);
+  rtx label_rtx = gen_label_rtx ();
+  rtx begin_label_rtx, call_insn;
+  char begin_label_name[16];
+
+  ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
+			       label_no);
+  begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
+
+  if (TARGET_64BIT)
+    emit_move_insn (arg_pointer_rtx,
+		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				  GEN_INT (64)));
+
+  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
+
+  /* The address of the function is loaded into %r25 with an instruction-
+     relative sequence that avoids the use of relocations.  The sequence
+     is split so that the load_offset_label_address instruction can
+     occupy the delay slot of the call to _mcount.  */
+  if (TARGET_PA_20)
+    emit_insn (gen_lcla2 (reg, label_rtx));
+  else
+    emit_insn (gen_lcla1 (reg, label_rtx));
+
+  emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25), 
+					    reg, begin_label_rtx, label_rtx));
+
+#if !NO_DEFERRED_PROFILE_COUNTERS
+  {
+    rtx count_label_rtx, addr, r24;
+    char count_label_name[16];
+
+    VEC_safe_push (int, heap, funcdef_nos, label_no);
+    ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
+    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
+
+    addr = force_reg (Pmode, count_label_rtx);
+    r24 = gen_rtx_REG (Pmode, 24);
+    emit_move_insn (r24, addr);
+
+    call_insn =
+      emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 
+					     gen_rtx_SYMBOL_REF (Pmode, 
+								 "_mcount")),
+				GEN_INT (TARGET_64BIT ? 24 : 12)));
+
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
+  }
+#else
+
+  call_insn =
+    emit_call_insn (gen_call (gen_rtx_MEM (Pmode, 
+					   gen_rtx_SYMBOL_REF (Pmode, 
+							       "_mcount")),
+			      GEN_INT (TARGET_64BIT ? 16 : 8)));
+
+#endif
+
+  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
+  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
+
+  /* Indicate the _mcount call cannot throw, nor will it execute a
+     non-local goto.  */
+  make_reg_eh_region_note_nothrow_nononlocal (call_insn);
+}
+
+/* Fetch the return address for the frame COUNT steps up from
+   the current frame, after the prologue.  FRAMEADDR is the
+   frame pointer of the COUNT frame.
+
+   We want to ignore any export stub remnants here.  To handle this,
+   we examine the code at the return address, and if it is an export
+   stub, we return a memory rtx for the stub return address stored
+   at frame-24.
+
+   The value returned is used in two different ways:
+
+	1. To find a function's caller.
+
+	2. To change the return address for a function.
+
+   This function handles most instances of case 1; however, it will
+   fail if there are two levels of stubs to execute on the return
+   path.  The only way I believe that can happen is if the return value
+   needs a parameter relocation, which never happens for C code.
+
+   This function handles most instances of case 2; however, it will
+   fail if we did not originally have stub code on the return path
+   but will need stub code on the new return path.  This can happen if
+   the caller & callee are both in the main program, but the new
+   return location is in a shared library.  */
+
+rtx
+return_addr_rtx (int count, rtx frameaddr)
+{
+  rtx label;
+  rtx rp;
+  rtx saved_rp;
+  rtx ins;
+
+  /* The instruction stream at the return address of a PA1.X export stub is:
+
+	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
+	0x004010a1 | stub+12:  ldsid (sr0,rp),r1
+	0x00011820 | stub+16:  mtsp r1,sr0
+	0xe0400002 | stub+20:  be,n 0(sr0,rp)
+
+     0xe0400002 must be specified as -532676606 so that it won't be
+     rejected as an invalid immediate operand on 64-bit hosts.
+
+     The instruction stream at the return address of a PA2.0 export stub is:
+
+	0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
+	0xe840d002 | stub+12:  bve,n (rp)
+  */
+
+  HOST_WIDE_INT insns[4];
+  int i, len;
+
+  if (count != 0)
+    return NULL_RTX;
+
+  rp = get_hard_reg_initial_val (Pmode, 2);
+
+  if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
+    return rp;
+
+  /* If there is no export stub then just use the value saved from
+     the return pointer register.  */
+
+  saved_rp = gen_reg_rtx (Pmode);
+  emit_move_insn (saved_rp, rp);
+
+  /* Get pointer to the instruction stream.  We have to mask out the
+     privilege level from the two low order bits of the return address
+     pointer here so that ins will point to the start of the first
+     instruction that would have been executed if we returned.  */
+  ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
+  label = gen_label_rtx ();
+
+  if (TARGET_PA_20)
+    {
+      insns[0] = 0x4bc23fd1;
+      insns[1] = -398405630;
+      len = 2;
+    }
+  else
+    {
+      insns[0] = 0x4bc23fd1;
+      insns[1] = 0x004010a1;
+      insns[2] = 0x00011820;
+      insns[3] = -532676606;
+      len = 4;
+    }
+
+  /* Check the instruction stream at the normal return address for the
+     export stub.  If it is an export stub, than our return address is
+     really in -24[frameaddr].  */
+
+  for (i = 0; i < len; i++)
+    {
+      rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4)); 
+      rtx op1 = GEN_INT (insns[i]);
+      emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
+    }
+
+  /* Here we know that our return address points to an export
+     stub.  We don't want to return the address of the export stub,
+     but rather the return address of the export stub.  That return
+     address is stored at -24[frameaddr].  */
+
+  emit_move_insn (saved_rp,
+		  gen_rtx_MEM (Pmode,
+			       memory_address (Pmode,
+					       plus_constant (frameaddr,
+							      -24))));
+
+  emit_label (label);
+
+  return saved_rp;
+}
+
+void
+emit_bcond_fp (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx operand0 = operands[1];
+  rtx operand1 = operands[2];
+  rtx label = operands[3];
+
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
+		          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     gen_rtx_fmt_ee (NE,
+							      VOIDmode,
+							      gen_rtx_REG (CCFPmode, 0),
+							      const0_rtx),
+						     gen_rtx_LABEL_REF (VOIDmode, label),
+						     pc_rtx)));
+
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type attr_type;
+
+  /* Don't adjust costs for a pa8000 chip, also do not adjust any
+     true dependencies as they are described with bypasses now.  */
+  if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
+    return cost;
+
+  if (! recog_memoized (insn))
+    return 0;
+
+  attr_type = get_attr_type (insn);
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case REG_DEP_ANTI:
+      /* Anti dependency; DEP_INSN reads a register that INSN writes some
+	 cycles later.  */
+
+      if (attr_type == TYPE_FPLOAD)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPALU:
+		case TYPE_FPMULSGL:
+		case TYPE_FPMULDBL:
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* A fpload can't be issued until one cycle before a
+		     preceding arithmetic operation has finished if
+		     the target of the fpload is any of the sources
+		     (or destination) of the arithmetic operation.  */
+		  return insn_default_latency (dep_insn) - 1;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+      else if (attr_type == TYPE_FPALU)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* An ALU flop can't be issued until two cycles before a
+		     preceding divide or sqrt operation has finished if
+		     the target of the ALU flop is any of the sources
+		     (or destination) of the divide or sqrt operation.  */
+		  return insn_default_latency (dep_insn) - 2;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+
+      /* For other anti dependencies, the cost is 0.  */
+      return 0;
+
+    case REG_DEP_OUTPUT:
+      /* Output dependency; DEP_INSN writes a register that INSN writes some
+	 cycles later.  */
+      if (attr_type == TYPE_FPLOAD)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPALU:
+		case TYPE_FPMULSGL:
+		case TYPE_FPMULDBL:
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* A fpload can't be issued until one cycle before a
+		     preceding arithmetic operation has finished if
+		     the target of the fpload is the destination of the
+		     arithmetic operation. 
+
+		     Exception: For PA7100LC, PA7200 and PA7300, the cost
+		     is 3 cycles, unless they bundle together.   We also
+		     pay the penalty if the second insn is a fpload.  */
+		  return insn_default_latency (dep_insn) - 1;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+      else if (attr_type == TYPE_FPALU)
+	{
+	  rtx pat = PATTERN (insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+	  if (GET_CODE (pat) == PARALLEL)
+	    {
+	      /* This happens for the fldXs,mb patterns.  */
+	      pat = XVECEXP (pat, 0, 0);
+	    }
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    /* If this happens, we have to extend this to schedule
+	       optimally.  Return 0 for now.  */
+	  return 0;
+
+	  if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
+	    {
+	      if (! recog_memoized (dep_insn))
+		return 0;
+	      switch (get_attr_type (dep_insn))
+		{
+		case TYPE_FPDIVSGL:
+		case TYPE_FPDIVDBL:
+		case TYPE_FPSQRTSGL:
+		case TYPE_FPSQRTDBL:
+		  /* An ALU flop can't be issued until two cycles before a
+		     preceding divide or sqrt operation has finished if
+		     the target of the ALU flop is also the target of
+		     the divide or sqrt operation.  */
+		  return insn_default_latency (dep_insn) - 2;
+
+		default:
+		  return 0;
+		}
+	    }
+	}
+
+      /* For other output dependencies, the cost is 0.  */
+      return 0;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Adjust scheduling priorities.  We use this to try and keep addil
+   and the next use of %r1 close together.  */
+static int
+pa_adjust_priority (rtx insn, int priority)
+{
+  rtx set = single_set (insn);
+  rtx src, dest;
+  if (set)
+    {
+      src = SET_SRC (set);
+      dest = SET_DEST (set);
+      if (GET_CODE (src) == LO_SUM
+	  && symbolic_operand (XEXP (src, 1), VOIDmode)
+	  && ! read_only_operand (XEXP (src, 1), VOIDmode))
+	priority >>= 3;
+
+      else if (GET_CODE (src) == MEM
+	       && GET_CODE (XEXP (src, 0)) == LO_SUM
+	       && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
+	       && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
+	priority >>= 1;
+
+      else if (GET_CODE (dest) == MEM
+	       && GET_CODE (XEXP (dest, 0)) == LO_SUM
+	       && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
+	       && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
+	priority >>= 3;
+    }
+  return priority;
+}
+
+/* The 700 can only issue a single insn at a time.
+   The 7XXX processors can issue two insns at a time.
+   The 8000 can issue 4 insns at a time.  */
+static int
+pa_issue_rate (void)
+{
+  switch (pa_cpu)
+    {
+    case PROCESSOR_700:		return 1;
+    case PROCESSOR_7100:	return 2;
+    case PROCESSOR_7100LC:	return 2;
+    case PROCESSOR_7200:	return 2;
+    case PROCESSOR_7300:	return 2;
+    case PROCESSOR_8000:	return 4;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+
+/* Return any length plus adjustment needed by INSN which already has
+   its length computed as LENGTH.   Return LENGTH if no adjustment is
+   necessary.
+
+   Also compute the length of an inline block move here as it is too
+   complicated to express as a length attribute in pa.md.  */
+int
+pa_adjust_insn_length (rtx insn, int length)
+{
+  rtx pat = PATTERN (insn);
+
+  /* If length is negative or undefined, provide initial length.  */
+  if ((unsigned int) length >= INT_MAX)
+    {
+      if (GET_CODE (pat) == SEQUENCE)
+	insn = XVECEXP (pat, 0, 0);
+
+      switch (get_attr_type (insn))
+	{
+	case TYPE_MILLI:
+	  length = attr_length_millicode_call (insn);
+	  break;
+	case TYPE_CALL:
+	  length = attr_length_call (insn, 0);
+	  break;
+	case TYPE_SIBCALL:
+	  length = attr_length_call (insn, 1);
+	  break;
+	case TYPE_DYNCALL:
+	  length = attr_length_indirect_call (insn);
+	  break;
+	case TYPE_SH_FUNC_ADRS:
+	  length = attr_length_millicode_call (insn) + 20;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* Jumps inside switch tables which have unfilled delay slots need
+     adjustment.  */
+  if (GET_CODE (insn) == JUMP_INSN
+      && GET_CODE (pat) == PARALLEL
+      && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
+    length += 4;
+  /* Block move pattern.  */
+  else if (GET_CODE (insn) == INSN
+	   && GET_CODE (pat) == PARALLEL
+	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
+	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
+	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
+	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
+    length += compute_movmem_length (insn) - 4;
+  /* Block clear pattern.  */
+  else if (GET_CODE (insn) == INSN
+	   && GET_CODE (pat) == PARALLEL
+	   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+	   && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
+	   && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
+	   && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
+    length += compute_clrmem_length (insn) - 4;
+  /* Conditional branch with an unfilled delay slot.  */
+  else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
+    {
+      /* Adjust a short backwards conditional with an unfilled delay slot.  */
+      if (GET_CODE (pat) == SET
+	  && length == 4
+	  && JUMP_LABEL (insn) != NULL_RTX
+	  && ! forward_branch_p (insn))
+	length += 4;
+      else if (GET_CODE (pat) == PARALLEL
+	       && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
+	       && length == 4)
+	length += 4;
+      /* Adjust dbra insn with short backwards conditional branch with
+	 unfilled delay slot -- only for case where counter is in a
+	 general register register.  */
+      else if (GET_CODE (pat) == PARALLEL
+	       && GET_CODE (XVECEXP (pat, 0, 1)) == SET
+	       && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
+ 	       && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
+	       && length == 4
+	       && ! forward_branch_p (insn))
+	length += 4;
+    }
+  return length;
+}
+
+/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook.  */
+
+static bool
+pa_print_operand_punct_valid_p (unsigned char code)
+{
+  if (code == '@'
+      || code == '#'
+      || code == '*'
+      || code == '^')
+    return true;
+
+  return false;
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case '#':
+      /* Output a 'nop' if there's nothing for the delay slot.  */
+      if (dbr_sequence_length () == 0)
+	fputs ("\n\tnop", file);
+      return;
+    case '*':
+      /* Output a nullification completer if there's nothing for the */
+      /* delay slot or nullification is requested.  */
+      if (dbr_sequence_length () == 0 ||
+	  (final_sequence &&
+	   INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
+        fputs (",n", file);
+      return;
+    case 'R':
+      /* Print out the second register name of a register pair.
+	 I.e., R (6) => 7.  */
+      fputs (reg_names[REGNO (x) + 1], file);
+      return;
+    case 'r':
+      /* A register or zero.  */
+      if (x == const0_rtx
+	  || (x == CONST0_RTX (DFmode))
+	  || (x == CONST0_RTX (SFmode)))
+	{
+	  fputs ("%r0", file);
+	  return;
+	}
+      else
+	break;
+    case 'f':
+      /* A register or zero (floating point).  */
+      if (x == const0_rtx
+	  || (x == CONST0_RTX (DFmode))
+	  || (x == CONST0_RTX (SFmode)))
+	{
+	  fputs ("%fr0", file);
+	  return;
+	}
+      else
+	break;
+    case 'A':
+      {
+	rtx xoperands[2];
+
+	xoperands[0] = XEXP (XEXP (x, 0), 0);
+	xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
+	output_global_address (file, xoperands[1], 0);
+        fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
+	return;
+      }
+
+    case 'C':			/* Plain (C)ondition */
+    case 'X':
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("=", file);  break;
+	case NE:
+	  fputs ("<>", file);  break;
+	case GT:
+	  fputs (">", file);  break;
+	case GE:
+	  fputs (">=", file);  break;
+	case GEU:
+	  fputs (">>=", file);  break;
+	case GTU:
+	  fputs (">>", file);  break;
+	case LT:
+	  fputs ("<", file);  break;
+	case LE:
+	  fputs ("<=", file);  break;
+	case LEU:
+	  fputs ("<<=", file);  break;
+	case LTU:
+	  fputs ("<<", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'N':			/* Condition, (N)egated */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("<>", file);  break;
+	case NE:
+	  fputs ("=", file);  break;
+	case GT:
+	  fputs ("<=", file);  break;
+	case GE:
+	  fputs ("<", file);  break;
+	case GEU:
+	  fputs ("<<", file);  break;
+	case GTU:
+	  fputs ("<<=", file);  break;
+	case LT:
+	  fputs (">=", file);  break;
+	case LE:
+	  fputs (">", file);  break;
+	case LEU:
+	  fputs (">>", file);  break;
+	case LTU:
+	  fputs (">>=", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    /* For floating point comparisons.  Note that the output
+       predicates are the complement of the desired mode.  The
+       conditions for GT, GE, LT, LE and LTGT cause an invalid
+       operation exception if the result is unordered and this
+       exception is enabled in the floating-point status register.  */
+    case 'Y':
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("!=", file);  break;
+	case NE:
+	  fputs ("=", file);  break;
+	case GT:
+	  fputs ("!>", file);  break;
+	case GE:
+	  fputs ("!>=", file);  break;
+	case LT:
+	  fputs ("!<", file);  break;
+	case LE:
+	  fputs ("!<=", file);  break;
+	case LTGT:
+	  fputs ("!<>", file);  break;
+	case UNLE:
+	  fputs ("!?<=", file);  break;
+	case UNLT:
+	  fputs ("!?<", file);  break;
+	case UNGE:
+	  fputs ("!?>=", file);  break;
+	case UNGT:
+	  fputs ("!?>", file);  break;
+	case UNEQ:
+	  fputs ("!?=", file);  break;
+	case UNORDERED:
+	  fputs ("!?", file);  break;
+	case ORDERED:
+	  fputs ("?", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'S':			/* Condition, operands are (S)wapped.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("=", file);  break;
+	case NE:
+	  fputs ("<>", file);  break;
+	case GT:
+	  fputs ("<", file);  break;
+	case GE:
+	  fputs ("<=", file);  break;
+	case GEU:
+	  fputs ("<<=", file);  break;
+	case GTU:
+	  fputs ("<<", file);  break;
+	case LT:
+	  fputs (">", file);  break;
+	case LE:
+	  fputs (">=", file);  break;
+	case LEU:
+	  fputs (">>=", file);  break;
+	case LTU:
+	  fputs (">>", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'B':			/* Condition, (B)oth swapped and negate.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("<>", file);  break;
+	case NE:
+	  fputs ("=", file);  break;
+	case GT:
+	  fputs (">=", file);  break;
+	case GE:
+	  fputs (">", file);  break;
+	case GEU:
+	  fputs (">>", file);  break;
+	case GTU:
+	  fputs (">>=", file);  break;
+	case LT:
+	  fputs ("<=", file);  break;
+	case LE:
+	  fputs ("<", file);  break;
+	case LEU:
+	  fputs ("<<", file);  break;
+	case LTU:
+	  fputs ("<<=", file);  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return;
+    case 'k':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
+      return;
+    case 'Q':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
+      return;
+    case 'L':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
+      return;
+    case 'O':
+      gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      return;
+    case 'p':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
+      return;
+    case 'P':
+      gcc_assert (GET_CODE (x) == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
+      return;
+    case 'I':
+      if (GET_CODE (x) == CONST_INT)
+	fputs ("i", file);
+      return;
+    case 'M':
+    case 'F':
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PRE_DEC:
+	case PRE_INC:
+	  if (ASSEMBLER_DIALECT == 0)
+	    fputs ("s,mb", file);
+	  else
+	    fputs (",mb", file);
+	  break;
+	case POST_DEC:
+	case POST_INC:
+	  if (ASSEMBLER_DIALECT == 0)
+	    fputs ("s,ma", file);
+	  else
+	    fputs (",ma", file);
+	  break;
+	case PLUS:
+	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	      && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
+	    {
+	      if (ASSEMBLER_DIALECT == 0)
+		fputs ("x", file);
+	    }
+	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+		   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
+	    {
+	      if (ASSEMBLER_DIALECT == 0)
+		fputs ("x,s", file);
+	      else
+		fputs (",s", file);
+	    }
+	  else if (code == 'F' && ASSEMBLER_DIALECT == 0)
+	    fputs ("s", file);
+	  break;
+	default:
+	  if (code == 'F' && ASSEMBLER_DIALECT == 0)
+	    fputs ("s", file);
+	  break;
+	}
+      return;
+    case 'G':
+      output_global_address (file, x, 0);
+      return;
+    case 'H':
+      output_global_address (file, x, 1);
+      return;
+    case 0:			/* Don't do anything special */
+      break;
+    case 'Z':
+      {
+	unsigned op[3];
+	compute_zdepwi_operands (INTVAL (x), op);
+	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
+	return;
+      }
+    case 'z':
+      {
+	unsigned op[3];
+	compute_zdepdi_operands (INTVAL (x), op);
+	fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
+	return;
+      }
+    case 'c':
+      /* We can get here from a .vtable_inherit due to our
+	 CONSTANT_ADDRESS_P rejecting perfectly good constant
+	 addresses.  */
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (GET_CODE (x) == REG)
+    {
+      fputs (reg_names [REGNO (x)], file);
+      if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
+	{
+	  fputs ("R", file);
+	  return;
+	}
+      if (FP_REG_P (x)
+	  && GET_MODE_SIZE (GET_MODE (x)) <= 4
+	  && (REGNO (x) & 1) == 0)
+	fputs ("L", file);
+    }
+  else if (GET_CODE (x) == MEM)
+    {
+      int size = GET_MODE_SIZE (GET_MODE (x));
+      rtx base = NULL_RTX;
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PRE_DEC:
+	case POST_DEC:
+          base = XEXP (XEXP (x, 0), 0);
+	  fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
+	  break;
+	case PRE_INC:
+	case POST_INC:
+          base = XEXP (XEXP (x, 0), 0);
+	  fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
+	  break;
+	case PLUS:
+	  if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
+	    fprintf (file, "%s(%s)",
+		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
+		     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
+	  else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
+	    fprintf (file, "%s(%s)",
+		     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
+		     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
+	    {
+	      /* Because the REG_POINTER flag can get lost during reload,
+		 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
+		 index and base registers in the combined move patterns.  */
+	      rtx base = XEXP (XEXP (x, 0), 1);
+	      rtx index = XEXP (XEXP (x, 0), 0);
+
+	      fprintf (file, "%s(%s)",
+		       reg_names [REGNO (index)], reg_names [REGNO (base)]);
+	    }
+	  else
+	    output_address (XEXP (x, 0));
+	  break;
+	default:
+	  output_address (XEXP (x, 0));
+	  break;
+	}
+    }
+  else
+    output_addr_const (file, x);
+}
+
+/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
+
+void
+output_global_address (FILE *file, rtx x, int round_constant)
+{
+
+  /* Imagine  (high (const (plus ...))).  */
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
+    output_addr_const (file, x);
+  else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
+    {
+      output_addr_const (file, x);
+      fputs ("-$global$", file);
+    }
+  else if (GET_CODE (x) == CONST)
+    {
+      const char *sep = "";
+      int offset = 0;		/* assembler wants -$global$ at end */
+      rtx base = NULL_RTX;
+
+      switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
+	{
+	case SYMBOL_REF:
+	  base = XEXP (XEXP (x, 0), 0);
+	  output_addr_const (file, base);
+	  break;
+	case CONST_INT:
+	  offset = INTVAL (XEXP (XEXP (x, 0), 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
+	{
+	case SYMBOL_REF:
+	  base = XEXP (XEXP (x, 0), 1);
+	  output_addr_const (file, base);
+	  break;
+	case CONST_INT:
+	  offset = INTVAL (XEXP (XEXP (x, 0), 1));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* How bogus.  The compiler is apparently responsible for
+	 rounding the constant if it uses an LR field selector.
+
+	 The linker and/or assembler seem a better place since
+	 they have to do this kind of thing already.
+
+	 If we fail to do this, HP's optimizing linker may eliminate
+	 an addil, but not update the ldw/stw/ldo instruction that
+	 uses the result of the addil.  */
+      if (round_constant)
+	offset = ((offset + 0x1000) & ~0x1fff);
+
+      switch (GET_CODE (XEXP (x, 0)))
+	{
+	case PLUS:
+	  if (offset < 0)
+	    {
+	      offset = -offset;
+	      sep = "-";
+	    }
+	  else
+	    sep = "+";
+	  break;
+
+	case MINUS:
+	  gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
+	  sep = "-";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      
+      if (!read_only_operand (base, VOIDmode) && !flag_pic)
+	fputs ("-$global$", file);
+      if (offset)
+	fprintf (file, "%s%d", sep, offset);
+    }
+  else
+    output_addr_const (file, x);
+}
+
+/* Output boilerplate text to appear at the beginning of the file.
+   There are several possible versions.  */
+#define aputs(x) fputs(x, asm_out_file)
+static inline void
+pa_file_start_level (void)
+{
+  if (TARGET_64BIT)
+    aputs ("\t.LEVEL 2.0w\n");
+  else if (TARGET_PA_20)
+    aputs ("\t.LEVEL 2.0\n");
+  else if (TARGET_PA_11)
+    aputs ("\t.LEVEL 1.1\n");
+  else
+    aputs ("\t.LEVEL 1.0\n");
+}
+
+static inline void
+pa_file_start_space (int sortspace)
+{
+  aputs ("\t.SPACE $PRIVATE$");
+  if (sortspace)
+    aputs (",SORT=16");
+  aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
+         "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
+         "\n\t.SPACE $TEXT$");
+  if (sortspace)
+    aputs (",SORT=8");
+  aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
+         "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
+}
+
+static inline void
+pa_file_start_file (int want_version)
+{
+  if (write_symbols != NO_DEBUG)
+    {
+      output_file_directive (asm_out_file, main_input_filename);
+      if (want_version)
+	aputs ("\t.version\t\"01.01\"\n");
+    }
+}
+
+static inline void
+pa_file_start_mcount (const char *aswhat)
+{
+  if (profile_flag)
+    fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
+}
+  
+static void
+pa_elf_file_start (void)
+{
+  pa_file_start_level ();
+  pa_file_start_mcount ("ENTRY");
+  pa_file_start_file (0);
+}
+
+static void
+pa_som_file_start (void)
+{
+  pa_file_start_level ();
+  pa_file_start_space (0);
+  aputs ("\t.IMPORT $global$,DATA\n"
+         "\t.IMPORT $$dyncall,MILLICODE\n");
+  pa_file_start_mcount ("CODE");
+  pa_file_start_file (0);
+}
+
+static void
+pa_linux_file_start (void)
+{
+  pa_file_start_file (1);
+  pa_file_start_level ();
+  pa_file_start_mcount ("CODE");
+}
+
+static void
+pa_hpux64_gas_file_start (void)
+{
+  pa_file_start_level ();
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  if (profile_flag)
+    ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
+#endif
+  pa_file_start_file (1);
+}
+
+static void
+pa_hpux64_hpas_file_start (void)
+{
+  pa_file_start_level ();
+  pa_file_start_space (1);
+  pa_file_start_mcount ("CODE");
+  pa_file_start_file (0);
+}
+#undef aputs
+
+/* Search the deferred plabel list for SYMBOL and return its internal
+   label.  If an entry for SYMBOL is not found, a new entry is created.  */
+
+rtx
+get_deferred_plabel (rtx symbol)
+{
+  const char *fname = XSTR (symbol, 0);
+  size_t i;
+
+  /* See if we have already put this function on the list of deferred
+     plabels.  This list is generally small, so a liner search is not
+     too ugly.  If it proves too slow replace it with something faster.  */
+  for (i = 0; i < n_deferred_plabels; i++)
+    if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
+      break;
+
+  /* If the deferred plabel list is empty, or this entry was not found
+     on the list, create a new entry on the list.  */
+  if (deferred_plabels == NULL || i == n_deferred_plabels)
+    {
+      tree id;
+
+      if (deferred_plabels == 0)
+	deferred_plabels =  ggc_alloc_deferred_plabel ();
+      else
+        deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
+                                          deferred_plabels,
+                                          n_deferred_plabels + 1);
+
+      i = n_deferred_plabels++;
+      deferred_plabels[i].internal_label = gen_label_rtx ();
+      deferred_plabels[i].symbol = symbol;
+
+      /* Gross.  We have just implicitly taken the address of this
+	 function.  Mark it in the same manner as assemble_name.  */
+      id = maybe_get_identifier (targetm.strip_name_encoding (fname));
+      if (id)
+	mark_referenced (id);
+    }
+
+  return deferred_plabels[i].internal_label;
+}
+
+static void
+output_deferred_plabels (void)
+{
+  size_t i;
+
+  /* If we have some deferred plabels, then we need to switch into the
+     data or readonly data section, and align it to a 4 byte boundary
+     before outputting the deferred plabels.  */
+  if (n_deferred_plabels)
+    {
+      switch_to_section (flag_pic ? data_section : readonly_data_section);
+      ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
+    }
+
+  /* Now output the deferred plabels.  */
+  for (i = 0; i < n_deferred_plabels; i++)
+    {
+      targetm.asm_out.internal_label (asm_out_file, "L",
+		 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
+      assemble_integer (deferred_plabels[i].symbol,
+			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
+    }
+}
+
+#if HPUX_LONG_DOUBLE_LIBRARY
+/* Initialize optabs to point to HPUX long double emulation routines.  */
+static void
+pa_hpux_init_libfuncs (void)
+{
+  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
+  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
+  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
+  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
+  set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
+  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
+  set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
+  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
+  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
+
+  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
+  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
+  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
+  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
+  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
+  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
+  set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
+
+  set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
+  set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
+  set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
+  set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
+
+  set_conv_libfunc (sfix_optab,   SImode, TFmode, TARGET_64BIT
+						  ? "__U_Qfcnvfxt_quad_to_sgl"
+						  : "_U_Qfcnvfxt_quad_to_sgl");
+  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
+  set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
+  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
+
+  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
+  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
+  set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
+  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
+}
+#endif
+
+/* HP's millicode routines mean something special to the assembler.
+   Keep track of which ones we have used.  */
+
+enum millicodes { remI, remU, divI, divU, mulI, end1000 };
+static void import_milli (enum millicodes);
+static char imported[(int) end1000];
+static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
+static const char import_string[] = ".IMPORT $$....,MILLICODE";
+#define MILLI_START 10
+
+static void
+import_milli (enum millicodes code)
+{
+  char str[sizeof (import_string)];
+
+  if (!imported[(int) code])
+    {
+      imported[(int) code] = 1;
+      strcpy (str, import_string);
+      strncpy (str + MILLI_START, milli_names[(int) code], 4);
+      output_asm_insn (str, 0);
+    }
+}
+
+/* The register constraints have put the operands and return value in
+   the proper registers.  */
+
+const char *
+output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
+{
+  import_milli (mulI);
+  return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
+}
+
+/* Emit the rtl for doing a division by a constant.  */
+
+/* Do magic division millicodes exist for this value? */
+const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
+
+/* We'll use an array to keep track of the magic millicodes and
+   whether or not we've used them already. [n][0] is signed, [n][1] is
+   unsigned.  */
+
+static int div_milli[16][2];
+
+int
+emit_hpdiv_const (rtx *operands, int unsignedp)
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) > 0
+      && INTVAL (operands[2]) < 16
+      && magic_milli[INTVAL (operands[2])])
+    {
+      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+
+      emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
+      emit
+	(gen_rtx_PARALLEL
+	 (VOIDmode,
+	  gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
+				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
+						     SImode,
+						     gen_rtx_REG (SImode, 26),
+						     operands[2])),
+		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
+		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
+		     gen_rtx_CLOBBER (VOIDmode, ret))));
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
+      return 1;
+    }
+  return 0;
+}
+
+const char *
+output_div_insn (rtx *operands, int unsignedp, rtx insn)
+{
+  int divisor;
+
+  /* If the divisor is a constant, try to use one of the special
+     opcodes .*/
+  if (GET_CODE (operands[0]) == CONST_INT)
+    {
+      static char buf[100];
+      divisor = INTVAL (operands[0]);
+      if (!div_milli[divisor][unsignedp])
+	{
+	  div_milli[divisor][unsignedp] = 1;
+	  if (unsignedp)
+	    output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
+	  else
+	    output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
+	}
+      if (unsignedp)
+	{
+	  sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
+		   INTVAL (operands[0]));
+	  return output_millicode_call (insn,
+					gen_rtx_SYMBOL_REF (SImode, buf));
+	}
+      else
+	{
+	  sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
+		   INTVAL (operands[0]));
+	  return output_millicode_call (insn,
+					gen_rtx_SYMBOL_REF (SImode, buf));
+	}
+    }
+  /* Divisor isn't a special constant.  */
+  else
+    {
+      if (unsignedp)
+	{
+	  import_milli (divU);
+	  return output_millicode_call (insn,
+					gen_rtx_SYMBOL_REF (SImode, "$$divU"));
+	}
+      else
+	{
+	  import_milli (divI);
+	  return output_millicode_call (insn,
+					gen_rtx_SYMBOL_REF (SImode, "$$divI"));
+	}
+    }
+}
+
+/* Output a $$rem millicode to do mod.  */
+
+const char *
+output_mod_insn (int unsignedp, rtx insn)
+{
+  if (unsignedp)
+    {
+      import_milli (remU);
+      return output_millicode_call (insn,
+				    gen_rtx_SYMBOL_REF (SImode, "$$remU"));
+    }
+  else
+    {
+      import_milli (remI);
+      return output_millicode_call (insn,
+				    gen_rtx_SYMBOL_REF (SImode, "$$remI"));
+    }
+}
+
+void
+output_arg_descriptor (rtx call_insn)
+{
+  const char *arg_regs[4];
+  enum machine_mode arg_mode;
+  rtx link;
+  int i, output_flag = 0;
+  int regno;
+
+  /* We neither need nor want argument location descriptors for the
+     64bit runtime environment or the ELF32 environment.  */
+  if (TARGET_64BIT || TARGET_ELF32)
+    return;
+
+  for (i = 0; i < 4; i++)
+    arg_regs[i] = 0;
+
+  /* Specify explicitly that no argument relocations should take place
+     if using the portable runtime calling conventions.  */
+  if (TARGET_PORTABLE_RUNTIME)
+    {
+      fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
+	     asm_out_file);
+      return;
+    }
+
+  gcc_assert (GET_CODE (call_insn) == CALL_INSN);
+  for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
+       link; link = XEXP (link, 1))
+    {
+      rtx use = XEXP (link, 0);
+
+      if (! (GET_CODE (use) == USE
+	     && GET_CODE (XEXP (use, 0)) == REG
+	     && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+	continue;
+
+      arg_mode = GET_MODE (XEXP (use, 0));
+      regno = REGNO (XEXP (use, 0));
+      if (regno >= 23 && regno <= 26)
+	{
+	  arg_regs[26 - regno] = "GR";
+	  if (arg_mode == DImode)
+	    arg_regs[25 - regno] = "GR";
+	}
+      else if (regno >= 32 && regno <= 39)
+	{
+	  if (arg_mode == SFmode)
+	    arg_regs[(regno - 32) / 2] = "FR";
+	  else
+	    {
+#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
+	      arg_regs[(regno - 34) / 2] = "FR";
+	      arg_regs[(regno - 34) / 2 + 1] = "FU";
+#else
+	      arg_regs[(regno - 34) / 2] = "FU";
+	      arg_regs[(regno - 34) / 2 + 1] = "FR";
+#endif
+	    }
+	}
+    }
+  fputs ("\t.CALL ", asm_out_file);
+  for (i = 0; i < 4; i++)
+    {
+      if (arg_regs[i])
+	{
+	  if (output_flag++)
+	    fputc (',', asm_out_file);
+	  fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
+	}
+    }
+  fputc ('\n', asm_out_file);
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.  */
+
+static reg_class_t
+pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		     enum machine_mode mode, secondary_reload_info *sri)
+{
+  int regno;
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Handle the easy stuff first.  */
+  if (rclass == R1_REGS)
+    return NO_REGS;
+
+  if (REG_P (x))
+    {
+      regno = REGNO (x);
+      if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
+	return NO_REGS;
+    }
+  else
+    regno = -1;
+
+  /* If we have something like (mem (mem (...)), we can safely assume the
+     inner MEM will end up in a general register after reloading, so there's
+     no need for a secondary reload.  */
+  if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
+    return NO_REGS;
+
+  /* Trying to load a constant into a FP register during PIC code
+     generation requires %r1 as a scratch register.  */
+  if (flag_pic
+      && (mode == SImode || mode == DImode)
+      && FP_REG_CLASS_P (rclass)
+      && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
+    {
+      sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
+		    : CODE_FOR_reload_indi_r1);
+      return NO_REGS;
+    }
+
+  /* Secondary reloads of symbolic operands require %r1 as a scratch
+     register when we're generating PIC code and when the operand isn't
+     readonly.  */
+  if (symbolic_expression_p (x))
+    {
+      if (GET_CODE (x) == HIGH)
+	x = XEXP (x, 0);
+
+      if (flag_pic || !read_only_operand (x, VOIDmode))
+	{
+	  gcc_assert (mode == SImode || mode == DImode);
+	  sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
+			: CODE_FOR_reload_indi_r1);
+	  return NO_REGS;
+	}
+    }
+
+  /* Profiling showed the PA port spends about 1.3% of its compilation
+     time in true_regnum from calls inside pa_secondary_reload_class.  */
+  if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
+    regno = true_regnum (x);
+
+  /* In order to allow 14-bit displacements in integer loads and stores,
+     we need to prevent reload from generating out of range integer mode
+     loads and stores to the floating point registers.  Previously, we
+     used to call for a secondary reload and have emit_move_sequence()
+     fix the instruction sequence.  However, reload occasionally wouldn't
+     generate the reload and we would end up with an invalid REG+D memory
+     address.  So, now we use an intermediate general register for most
+     memory loads and stores.  */
+  if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
+      && GET_MODE_CLASS (mode) == MODE_INT
+      && FP_REG_CLASS_P (rclass))
+    {
+      /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
+	 the secondary reload needed for a pseudo.  It never passes a
+	 REG+D address.  */
+      if (GET_CODE (x) == MEM)
+	{
+	  x = XEXP (x, 0);
+
+	  /* We don't need an intermediate for indexed and LO_SUM DLT
+	     memory addresses.  When INT14_OK_STRICT is true, it might
+	     appear that we could directly allow register indirect
+	     memory addresses.  However, this doesn't work because we
+	     don't support SUBREGs in floating-point register copies
+	     and reload doesn't tell us when it's going to use a SUBREG.  */
+	  if (IS_INDEX_ADDR_P (x)
+	      || IS_LO_SUM_DLT_ADDR_P (x))
+	    return NO_REGS;
+
+	  /* Otherwise, we need an intermediate general register.  */
+	  return GENERAL_REGS;
+	}
+
+      /* Request a secondary reload with a general scratch register
+	 for everthing else.  ??? Could symbolic operands be handled
+	 directly when generating non-pic PA 2.0 code?  */
+      sri->icode = (in_p
+		    ? direct_optab_handler (reload_in_optab, mode)
+		    : direct_optab_handler (reload_out_optab, mode));
+      return NO_REGS;
+    }
+
+  /* A SAR<->FP register copy requires an intermediate general register
+     and secondary memory.  We need a secondary reload with a general
+     scratch register for spills.  */
+  if (rclass == SHIFT_REGS)
+    {
+      /* Handle spill.  */
+      if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
+	{
+	  sri->icode = (in_p
+			? direct_optab_handler (reload_in_optab, mode)
+			: direct_optab_handler (reload_out_optab, mode));
+	  return NO_REGS;
+	}
+
+      /* Handle FP copy.  */
+      if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
+	return GENERAL_REGS;
+    }
+
+  if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
+      && REGNO_REG_CLASS (regno) == SHIFT_REGS
+      && FP_REG_CLASS_P (rclass))
+    return GENERAL_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
+   is only marked as live on entry by df-scan when it is a fixed
+   register.  It isn't a fixed register in the 64-bit runtime,
+   so we need to mark it here.  */
+
+static void
+pa_extra_live_on_entry (bitmap regs)
+{
+  if (TARGET_64BIT)
+    bitmap_set_bit (regs, ARG_POINTER_REGNUM);
+}
+
+/* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
+   to prevent it from being deleted.  */
+
+rtx
+pa_eh_return_handler_rtx (void)
+{
+  rtx tmp;
+
+  tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
+		      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
+  tmp = gen_rtx_MEM (word_mode, tmp);
+  tmp->volatil = 1;
+  return tmp;
+}
+
+/* In the 32-bit runtime, arguments larger than eight bytes are passed
+   by invisible reference.  As a GCC extension, we also pass anything
+   with a zero or variable size by reference.
+
+   The 64-bit runtime does not describe passing any types by invisible
+   reference.  The internals of GCC can't currently handle passing
+   empty structures, and zero or variable length arrays when they are
+   not passed entirely on the stack or by reference.  Thus, as a GCC
+   extension, we pass these types by reference.  The HP compiler doesn't
+   support these types, so hopefully there shouldn't be any compatibility
+   issues.  This may have to be revisited when HP releases a C99 compiler
+   or updates the ABI.  */
+
+static bool
+pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
+		      enum machine_mode mode, const_tree type,
+		      bool named ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (TARGET_64BIT)
+    return size <= 0;
+  else
+    return size <= 0 || size > 8;
+}
+
+enum direction
+function_arg_padding (enum machine_mode mode, const_tree type)
+{
+  if (mode == BLKmode
+      || (TARGET_64BIT
+	  && type
+	  && (AGGREGATE_TYPE_P (type)
+	      || TREE_CODE (type) == COMPLEX_TYPE
+	      || TREE_CODE (type) == VECTOR_TYPE)))
+    {
+      /* Return none if justification is not required.  */
+      if (type
+	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
+	return none;
+
+      /* The directions set here are ignored when a BLKmode argument larger
+	 than a word is placed in a register.  Different code is used for
+	 the stack and registers.  This makes it difficult to have a
+	 consistent data representation for both the stack and registers.
+	 For both runtimes, the justification and padding for arguments on
+	 the stack and in registers should be identical.  */
+      if (TARGET_64BIT)
+	/* The 64-bit runtime specifies left justification for aggregates.  */
+        return upward;
+      else
+	/* The 32-bit runtime architecture specifies right justification.
+	   When the argument is passed on the stack, the argument is padded
+	   with garbage on the left.  The HP compiler pads with zeros.  */
+	return downward;
+    }
+
+  if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
+    return downward;
+  else
+    return none;
+}
+
+
+/* Do what is necessary for `va_start'.  We look at the current function
+   to determine if stdargs or varargs is used and fill in an initial
+   va_list.  A pointer to this constructor is returned.  */
+
+static rtx
+hppa_builtin_saveregs (void)
+{
+  rtx offset, dest;
+  tree fntype = TREE_TYPE (current_function_decl);
+  int argadj = ((!stdarg_p (fntype))
+		? UNITS_PER_WORD : 0);
+
+  if (argadj)
+    offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
+  else
+    offset = crtl->args.arg_offset_rtx;
+
+  if (TARGET_64BIT)
+    {
+      int i, off;
+
+      /* Adjust for varargs/stdarg differences.  */
+      if (argadj)
+	offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
+      else
+	offset = crtl->args.arg_offset_rtx;
+
+      /* We need to save %r26 .. %r19 inclusive starting at offset -64
+	 from the incoming arg pointer and growing to larger addresses.  */
+      for (i = 26, off = -64; i >= 19; i--, off += 8)
+	emit_move_insn (gen_rtx_MEM (word_mode,
+				     plus_constant (arg_pointer_rtx, off)),
+			gen_rtx_REG (word_mode, i));
+
+      /* The incoming args pointer points just beyond the flushback area;
+	 normally this is not a serious concern.  However, when we are doing
+	 varargs/stdargs we want to make the arg pointer point to the start
+	 of the incoming argument area.  */
+      emit_move_insn (virtual_incoming_args_rtx,
+		      plus_constant (arg_pointer_rtx, -64));
+
+      /* Now return a pointer to the first anonymous argument.  */
+      return copy_to_reg (expand_binop (Pmode, add_optab,
+					virtual_incoming_args_rtx,
+					offset, 0, 0, OPTAB_LIB_WIDEN));
+    }
+
+  /* Store general registers on the stack.  */
+  dest = gen_rtx_MEM (BLKmode,
+		      plus_constant (crtl->args.internal_arg_pointer,
+				     -16));
+  set_mem_alias_set (dest, get_varargs_alias_set ());
+  set_mem_align (dest, BITS_PER_WORD);
+  move_block_from_reg (23, dest, 4);
+
+  /* move_block_from_reg will emit code to store the argument registers
+     individually as scalar stores.
+
+     However, other insns may later load from the same addresses for
+     a structure load (passing a struct to a varargs routine).
+
+     The alias code assumes that such aliasing can never happen, so we
+     have to keep memory referencing insns from moving up beyond the
+     last argument register store.  So we emit a blockage insn here.  */
+  emit_insn (gen_blockage ());
+
+  return copy_to_reg (expand_binop (Pmode, add_optab,
+				    crtl->args.internal_arg_pointer,
+				    offset, 0, 0, OPTAB_LIB_WIDEN));
+}
+
+static void
+hppa_va_start (tree valist, rtx nextarg)
+{
+  nextarg = expand_builtin_saveregs ();
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+static tree
+hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			   gimple_seq *post_p)
+{
+  if (TARGET_64BIT)
+    {
+      /* Args grow upward.  We can use the generic routines.  */
+      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+    }
+  else /* !TARGET_64BIT */
+    {
+      tree ptr = build_pointer_type (type);
+      tree valist_type;
+      tree t, u;
+      unsigned int size, ofs;
+      bool indirect;
+
+      indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
+      if (indirect)
+	{
+	  type = ptr;
+	  ptr = build_pointer_type (type);
+	}
+      size = int_size_in_bytes (type);
+      valist_type = TREE_TYPE (valist);
+
+      /* Args grow down.  Not handled by generic routines.  */
+
+      u = fold_convert (sizetype, size_in_bytes (type));
+      u = fold_build1 (NEGATE_EXPR, sizetype, u);
+      t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
+
+      /* Align to 4 or 8 byte boundary depending on argument size.  */
+
+      u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
+      t = fold_convert (valist_type, t);
+
+      t = build2 (MODIFY_EXPR, valist_type, valist, t);
+
+      ofs = (8 - size) % 4;
+      if (ofs != 0)
+	{
+	  u = size_int (ofs);
+	  t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
+	}
+
+      t = fold_convert (ptr, t);
+      t = build_va_arg_indirect_ref (t);
+
+      if (indirect)
+	t = build_va_arg_indirect_ref (t);
+
+      return t;
+    }
+}
+
+/* True if MODE is valid for the target.  By "valid", we mean able to
+   be manipulated in non-trivial ways.  In particular, this means all
+   the arithmetic is supported.
+
+   Currently, TImode is not valid as the HP 64-bit runtime documentation
+   doesn't document the alignment and calling conventions for this type. 
+   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
+   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
+
+static bool
+pa_scalar_mode_supported_p (enum machine_mode mode)
+{
+  int precision = GET_MODE_PRECISION (mode);
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_PARTIAL_INT:
+    case MODE_INT:
+      if (precision == CHAR_TYPE_SIZE)
+	return true;
+      if (precision == SHORT_TYPE_SIZE)
+	return true;
+      if (precision == INT_TYPE_SIZE)
+	return true;
+      if (precision == LONG_TYPE_SIZE)
+	return true;
+      if (precision == LONG_LONG_TYPE_SIZE)
+	return true;
+      return false;
+
+    case MODE_FLOAT:
+      if (precision == FLOAT_TYPE_SIZE)
+	return true;
+      if (precision == DOUBLE_TYPE_SIZE)
+	return true;
+      if (precision == LONG_DOUBLE_TYPE_SIZE)
+	return true;
+      return false;
+
+    case MODE_DECIMAL_FLOAT:
+      return false;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
+   it branches into the delay slot.  Otherwise, return FALSE.  */
+
+static bool
+branch_to_delay_slot_p (rtx insn)
+{
+  rtx jump_insn;
+
+  if (dbr_sequence_length ())
+    return FALSE;
+
+  jump_insn = next_active_insn (JUMP_LABEL (insn));
+  while (insn)
+    {
+      insn = next_active_insn (insn);
+      if (jump_insn == insn)
+	return TRUE;
+
+      /* We can't rely on the length of asms.  So, we return FALSE when
+	 the branch is followed by an asm.  */
+      if (!insn
+	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
+	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX
+	  || get_attr_length (insn) > 0)
+	break;
+    }
+
+  return FALSE;
+}
+
+/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
+
+   This occurs when INSN has an unfilled delay slot and is followed
+   by an asm.  Disaster can occur if the asm is empty and the jump
+   branches into the delay slot.  So, we add a nop in the delay slot
+   when this occurs.  */
+
+static bool
+branch_needs_nop_p (rtx insn)
+{
+  rtx jump_insn;
+
+  if (dbr_sequence_length ())
+    return FALSE;
+
+  jump_insn = next_active_insn (JUMP_LABEL (insn));
+  while (insn)
+    {
+      insn = next_active_insn (insn);
+      if (!insn || jump_insn == insn)
+	return TRUE;
+
+      if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
+	   || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
+	  && get_attr_length (insn) > 0)
+	break;
+    }
+
+  return FALSE;
+}
+
+/* Return TRUE if INSN, a forward jump insn, can use nullification
+   to skip the following instruction.  This avoids an extra cycle due
+   to a mis-predicted branch when we fall through.  */
+
+static bool
+use_skip_p (rtx insn)
+{
+  rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
+
+  while (insn)
+    {
+      insn = next_active_insn (insn);
+
+      /* We can't rely on the length of asms, so we can't skip asms.  */
+      if (!insn
+	  || GET_CODE (PATTERN (insn)) == ASM_INPUT
+	  || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
+	break;
+      if (get_attr_length (insn) == 4
+	  && jump_insn == next_active_insn (insn))
+	return TRUE;
+      if (get_attr_length (insn) > 0)
+	break;
+    }
+
+  return FALSE;
+}
+
+/* This routine handles all the normal conditional branch sequences we
+   might need to generate.  It handles compare immediate vs compare
+   register, nullification of delay slots, varying length branches,
+   negated branches, and all combinations of the above.  It returns the
+   output appropriate to emit the branch corresponding to all given
+   parameters.  */
+
+const char *
+output_cbranch (rtx *operands, int negated, rtx insn)
+{
+  static char buf[100];
+  bool useskip;
+  int nullify = INSN_ANNULLED_BRANCH_P (insn);
+  int length = get_attr_length (insn);
+  int xdelay;
+
+  /* A conditional branch to the following instruction (e.g. the delay slot)
+     is asking for a disaster.  This can happen when not optimizing and
+     when jump optimization fails.
+
+     While it is usually safe to emit nothing, this can fail if the
+     preceding instruction is a nullified branch with an empty delay
+     slot and the same branch target as this branch.  We could check
+     for this but jump optimization should eliminate nop jumps.  It
+     is always safe to emit a nop.  */
+  if (branch_to_delay_slot_p (insn))
+    return "nop";
+
+  /* The doubleword form of the cmpib instruction doesn't have the LEU
+     and GTU conditions while the cmpb instruction does.  Since we accept
+     zero for cmpb, we must ensure that we use cmpb for the comparison.  */
+  if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
+    operands[2] = gen_rtx_REG (DImode, 0);
+  if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
+    operands[1] = gen_rtx_REG (DImode, 0);
+
+  /* If this is a long branch with its delay slot unfilled, set `nullify'
+     as it can nullify the delay slot and save a nop.  */
+  if (length == 8 && dbr_sequence_length () == 0)
+    nullify = 1;
+
+  /* If this is a short forward conditional branch which did not get
+     its delay slot filled, the delay slot can still be nullified.  */
+  if (! nullify && length == 4 && dbr_sequence_length () == 0)
+    nullify = forward_branch_p (insn);
+
+  /* A forward branch over a single nullified insn can be done with a
+     comclr instruction.  This avoids a single cycle penalty due to
+     mis-predicted branch if we fall through (branch not taken).  */
+  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
+
+  switch (length)
+    {
+      /* All short conditional branches except backwards with an unfilled
+	 delay slot.  */
+      case 4:
+	if (useskip)
+	  strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
+	else
+	  strcpy (buf, "{com%I2b,|cmp%I2b,}");
+	if (GET_MODE (operands[1]) == DImode)
+	  strcat (buf, "*");
+	if (negated)
+	  strcat (buf, "%B3");
+	else
+	  strcat (buf, "%S3");
+	if (useskip)
+	  strcat (buf, " %2,%r1,%%r0");
+	else if (nullify)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, ",n %2,%r1,%0%#");
+	    else
+	      strcat (buf, ",n %2,%r1,%0");
+	  }
+	else
+	  strcat (buf, " %2,%r1,%0");
+	break;
+
+     /* All long conditionals.  Note a short backward branch with an
+	unfilled delay slot is treated just like a long backward branch
+	with an unfilled delay slot.  */
+      case 8:
+	/* Handle weird backwards branch with a filled delay slot
+	   which is nullified.  */
+	if (dbr_sequence_length () != 0
+	    && ! forward_branch_p (insn)
+	    && nullify)
+	  {
+	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
+	    if (GET_MODE (operands[1]) == DImode)
+	      strcat (buf, "*");
+	    if (negated)
+	      strcat (buf, "%S3");
+	    else
+	      strcat (buf, "%B3");
+	    strcat (buf, ",n %2,%r1,.+12\n\tb %0");
+	  }
+	/* Handle short backwards branch with an unfilled delay slot.
+	   Using a comb;nop rather than comiclr;bl saves 1 cycle for both
+	   taken and untaken branches.  */
+	else if (dbr_sequence_length () == 0
+		 && ! forward_branch_p (insn)
+		 && INSN_ADDRESSES_SET_P ()
+		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	  {
+	    strcpy (buf, "{com%I2b,|cmp%I2b,}");
+	    if (GET_MODE (operands[1]) == DImode)
+	      strcat (buf, "*");
+	    if (negated)
+	      strcat (buf, "%B3 %2,%r1,%0%#");
+	    else
+	      strcat (buf, "%S3 %2,%r1,%0%#");
+	  }
+	else
+	  {
+	    strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
+	    if (GET_MODE (operands[1]) == DImode)
+	      strcat (buf, "*");
+	    if (negated)
+	      strcat (buf, "%S3");
+	    else
+	      strcat (buf, "%B3");
+	    if (nullify)
+	      strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
+	    else
+	      strcat (buf, " %2,%r1,%%r0\n\tb %0");
+	  }
+	break;
+
+      default:
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled and needs to be extracted
+	   by output_lbranch.  If the delay slot is empty or this is a
+	   nullified forward branch, the instruction after the reversed
+	   condition branch must be nullified.  */
+	if (dbr_sequence_length () == 0
+	    || (nullify && forward_branch_p (insn)))
+	  {
+	    nullify = 1;
+	    xdelay = 0;
+	    operands[4] = GEN_INT (length);
+	  }
+	else
+	  {
+	    xdelay = 1;
+	    operands[4] = GEN_INT (length + 4);
+	  }
+
+	/* Create a reversed conditional branch which branches around
+	   the following insns.  */
+	if (GET_MODE (operands[1]) != DImode)
+	  {
+	    if (nullify)
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
+	      }
+	    else
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
+	      }
+	  }
+	else
+	  {
+	    if (nullify)
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
+	      }
+	    else
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
+	      }
+	  }
+
+	output_asm_insn (buf, operands);
+	return output_lbranch (operands[0], insn, xdelay);
+    }
+  return buf;
+}
+
+/* This routine handles output of long unconditional branches that
+   exceed the maximum range of a simple branch instruction.  Since
+   we don't have a register available for the branch, we save register
+   %r1 in the frame marker, load the branch destination DEST into %r1,
+   execute the branch, and restore %r1 in the delay slot of the branch.
+
+   Since long branches may have an insn in the delay slot and the
+   delay slot is used to restore %r1, we in general need to extract
+   this insn and execute it before the branch.  However, to facilitate
+   use of this function by conditional branches, we also provide an
+   option to not extract the delay insn so that it will be emitted
+   after the long branch.  So, if there is an insn in the delay slot,
+   it is extracted if XDELAY is nonzero.
+
+   The lengths of the various long-branch sequences are 20, 16 and 24
+   bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
+
+const char *
+output_lbranch (rtx dest, rtx insn, int xdelay)
+{
+  rtx xoperands[2];
+ 
+  xoperands[0] = dest;
+
+  /* First, free up the delay slot.  */
+  if (xdelay && dbr_sequence_length () != 0)
+    {
+      /* We can't handle a jump in the delay slot.  */
+      gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
+
+      final_scan_insn (NEXT_INSN (insn), asm_out_file,
+		       optimize, 0, NULL);
+
+      /* Now delete the delay insn.  */
+      SET_INSN_DELETED (NEXT_INSN (insn));
+    }
+
+  /* Output an insn to save %r1.  The runtime documentation doesn't
+     specify whether the "Clean Up" slot in the callers frame can
+     be clobbered by the callee.  It isn't copied by HP's builtin
+     alloca, so this suggests that it can be clobbered if necessary.
+     The "Static Link" location is copied by HP builtin alloca, so
+     we avoid using it.  Using the cleanup slot might be a problem
+     if we have to interoperate with languages that pass cleanup
+     information.  However, it should be possible to handle these
+     situations with GCC's asm feature.
+
+     The "Current RP" slot is reserved for the called procedure, so
+     we try to use it when we don't have a frame of our own.  It's
+     rather unlikely that we won't have a frame when we need to emit
+     a very long branch.
+
+     Really the way to go long term is a register scavenger; goto
+     the target of the jump and find a register which we can use
+     as a scratch to hold the value in %r1.  Then, we wouldn't have
+     to free up the delay slot or clobber a slot that may be needed
+     for other purposes.  */
+  if (TARGET_64BIT)
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	/* Use the return pointer slot in the frame marker.  */
+	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
+      else
+	/* Use the slot at -40 in the frame marker since HP builtin
+	   alloca doesn't copy it.  */
+	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
+    }
+  else
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	/* Use the return pointer slot in the frame marker.  */
+	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
+      else
+	/* Use the "Clean Up" slot in the frame marker.  In GCC,
+	   the only other use of this location is for copying a
+	   floating point double argument from a floating-point
+	   register to two general registers.  The copy is done
+	   as an "atomic" operation when outputting a call, so it
+	   won't interfere with our using the location here.  */
+	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
+    }
+
+  if (TARGET_PORTABLE_RUNTIME)
+    {
+      output_asm_insn ("ldil L'%0,%%r1", xoperands);
+      output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
+      output_asm_insn ("bv %%r0(%%r1)", xoperands);
+    }
+  else if (flag_pic)
+    {
+      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+      if (TARGET_SOM || !TARGET_GAS)
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (xoperands[1]));
+	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	}
+      output_asm_insn ("bv %%r0(%%r1)", xoperands);
+    }
+  else
+    /* Now output a very long branch to the original target.  */
+    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
+
+  /* Now restore the value of %r1 in the delay slot.  */
+  if (TARGET_64BIT)
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	return "ldd -16(%%r30),%%r1";
+      else
+	return "ldd -40(%%r30),%%r1";
+    }
+  else
+    {
+      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
+	return "ldw -20(%%r30),%%r1";
+      else
+	return "ldw -12(%%r30),%%r1";
+    }
+}
+
+/* This routine handles all the branch-on-bit conditional branch sequences we
+   might need to generate.  It handles nullification of delay slots,
+   varying length branches, negated branches and all combinations of the
+   above.  it returns the appropriate output template to emit the branch.  */
+
+const char *
+output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
+{
+  static char buf[100];
+  bool useskip;
+  int nullify = INSN_ANNULLED_BRANCH_P (insn);
+  int length = get_attr_length (insn);
+  int xdelay;
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  I do not think this can happen as this pattern
+     is only used when optimizing; jump optimization should eliminate the
+     jump.  But be prepared just in case.  */
+
+  if (branch_to_delay_slot_p (insn))
+    return "nop";
+
+  /* If this is a long branch with its delay slot unfilled, set `nullify'
+     as it can nullify the delay slot and save a nop.  */
+  if (length == 8 && dbr_sequence_length () == 0)
+    nullify = 1;
+
+  /* If this is a short forward conditional branch which did not get
+     its delay slot filled, the delay slot can still be nullified.  */
+  if (! nullify && length == 4 && dbr_sequence_length () == 0)
+    nullify = forward_branch_p (insn);
+
+  /* A forward branch over a single nullified insn can be done with a
+     extrs instruction.  This avoids a single cycle penalty due to
+     mis-predicted branch if we fall through (branch not taken).  */
+  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
+
+  switch (length)
+    {
+
+      /* All short conditional branches except backwards with an unfilled
+	 delay slot.  */
+      case 4:
+	if (useskip)
+	  strcpy (buf, "{extrs,|extrw,s,}");
+	else
+	  strcpy (buf, "bb,");
+	if (useskip && GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "extrd,s,*");
+	else if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	if ((which == 0 && negated)
+	     || (which == 1 && ! negated))
+	  strcat (buf, ">=");
+	else
+	  strcat (buf, "<");
+	if (useskip)
+	  strcat (buf, " %0,%1,1,%%r0");
+	else if (nullify && negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, ",n %0,%1,%3%#");
+	    else
+	      strcat (buf, ",n %0,%1,%3");
+	  }
+	else if (nullify && ! negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, ",n %0,%1,%2%#");
+	    else
+	      strcat (buf, ",n %0,%1,%2");
+	  }
+	else if (! nullify && negated)
+	  strcat (buf, " %0,%1,%3");
+	else if (! nullify && ! negated)
+	  strcat (buf, " %0,%1,%2");
+	break;
+
+     /* All long conditionals.  Note a short backward branch with an
+	unfilled delay slot is treated just like a long backward branch
+	with an unfilled delay slot.  */
+      case 8:
+	/* Handle weird backwards branch with a filled delay slot
+	   which is nullified.  */
+	if (dbr_sequence_length () != 0
+	    && ! forward_branch_p (insn)
+	    && nullify)
+	  {
+	    strcpy (buf, "bb,");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (negated)
+	      strcat (buf, ",n %0,%1,.+12\n\tb %3");
+	    else
+	      strcat (buf, ",n %0,%1,.+12\n\tb %2");
+	  }
+	/* Handle short backwards branch with an unfilled delay slot.
+	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
+	   taken and untaken branches.  */
+	else if (dbr_sequence_length () == 0
+		 && ! forward_branch_p (insn)
+		 && INSN_ADDRESSES_SET_P ()
+		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	  {
+	    strcpy (buf, "bb,");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, ">=");
+	    else
+	      strcat (buf, "<");
+	    if (negated)
+	      strcat (buf, " %0,%1,%3%#");
+	    else
+	      strcat (buf, " %0,%1,%2%#");
+	  }
+	else
+	  {
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcpy (buf, "extrd,s,*");
+	    else
+	      strcpy (buf, "{extrs,|extrw,s,}");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (nullify && negated)
+	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
+	    else if (nullify && ! negated)
+	      strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
+	    else if (negated)
+	      strcat (buf, " %0,%1,1,%%r0\n\tb %3");
+	    else
+	      strcat (buf, " %0,%1,1,%%r0\n\tb %2");
+	  }
+	break;
+
+      default:
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled and needs to be extracted
+	   by output_lbranch.  If the delay slot is empty or this is a
+	   nullified forward branch, the instruction after the reversed
+	   condition branch must be nullified.  */
+	if (dbr_sequence_length () == 0
+	    || (nullify && forward_branch_p (insn)))
+	  {
+	    nullify = 1;
+	    xdelay = 0;
+	    operands[4] = GEN_INT (length);
+	  }
+	else
+	  {
+	    xdelay = 1;
+	    operands[4] = GEN_INT (length + 4);
+	  }
+
+	if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	else
+	  strcpy (buf, "bb,");
+	if ((which == 0 && negated)
+	    || (which == 1 && !negated))
+	  strcat (buf, "<");
+	else
+	  strcat (buf, ">=");
+	if (nullify)
+	  strcat (buf, ",n %0,%1,.+%4");
+	else
+	  strcat (buf, " %0,%1,.+%4");
+	output_asm_insn (buf, operands);
+	return output_lbranch (negated ? operands[3] : operands[2],
+			       insn, xdelay);
+    }
+  return buf;
+}
+
+/* This routine handles all the branch-on-variable-bit conditional branch
+   sequences we might need to generate.  It handles nullification of delay
+   slots, varying length branches, negated branches and all combinations
+   of the above.  it returns the appropriate output template to emit the
+   branch.  */
+
+const char *
+output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
+{
+  static char buf[100];
+  bool useskip;
+  int nullify = INSN_ANNULLED_BRANCH_P (insn);
+  int length = get_attr_length (insn);
+  int xdelay;
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  I do not think this can happen as this pattern
+     is only used when optimizing; jump optimization should eliminate the
+     jump.  But be prepared just in case.  */
+
+  if (branch_to_delay_slot_p (insn))
+    return "nop";
+
+  /* If this is a long branch with its delay slot unfilled, set `nullify'
+     as it can nullify the delay slot and save a nop.  */
+  if (length == 8 && dbr_sequence_length () == 0)
+    nullify = 1;
+
+  /* If this is a short forward conditional branch which did not get
+     its delay slot filled, the delay slot can still be nullified.  */
+  if (! nullify && length == 4 && dbr_sequence_length () == 0)
+    nullify = forward_branch_p (insn);
+
+  /* A forward branch over a single nullified insn can be done with a
+     extrs instruction.  This avoids a single cycle penalty due to
+     mis-predicted branch if we fall through (branch not taken).  */
+  useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
+
+  switch (length)
+    {
+
+      /* All short conditional branches except backwards with an unfilled
+	 delay slot.  */
+      case 4:
+	if (useskip)
+	  strcpy (buf, "{vextrs,|extrw,s,}");
+	else
+	  strcpy (buf, "{bvb,|bb,}");
+	if (useskip && GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "extrd,s,*");
+	else if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	if ((which == 0 && negated)
+	     || (which == 1 && ! negated))
+	  strcat (buf, ">=");
+	else
+	  strcat (buf, "<");
+	if (useskip)
+	  strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
+	else if (nullify && negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
+	    else
+	      strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
+	  }
+	else if (nullify && ! negated)
+	  {
+	    if (branch_needs_nop_p (insn))
+	      strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
+	    else
+	      strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
+	  }
+	else if (! nullify && negated)
+	  strcat (buf, "{ %0,%3| %0,%%sar,%3}");
+	else if (! nullify && ! negated)
+	  strcat (buf, "{ %0,%2| %0,%%sar,%2}");
+	break;
+
+     /* All long conditionals.  Note a short backward branch with an
+	unfilled delay slot is treated just like a long backward branch
+	with an unfilled delay slot.  */
+      case 8:
+	/* Handle weird backwards branch with a filled delay slot
+	   which is nullified.  */
+	if (dbr_sequence_length () != 0
+	    && ! forward_branch_p (insn)
+	    && nullify)
+	  {
+	    strcpy (buf, "{bvb,|bb,}");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (negated)
+	      strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
+	    else
+	      strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
+	  }
+	/* Handle short backwards branch with an unfilled delay slot.
+	   Using a bb;nop rather than extrs;bl saves 1 cycle for both
+	   taken and untaken branches.  */
+	else if (dbr_sequence_length () == 0
+		 && ! forward_branch_p (insn)
+		 && INSN_ADDRESSES_SET_P ()
+		 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	  {
+	    strcpy (buf, "{bvb,|bb,}");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcat (buf, "*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, ">=");
+	    else
+	      strcat (buf, "<");
+	    if (negated)
+	      strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
+	    else
+	      strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
+	  }
+	else
+	  {
+	    strcpy (buf, "{vextrs,|extrw,s,}");
+	    if (GET_MODE (operands[0]) == DImode)
+	      strcpy (buf, "extrd,s,*");
+	    if ((which == 0 && negated)
+		|| (which == 1 && ! negated))
+	      strcat (buf, "<");
+	    else
+	      strcat (buf, ">=");
+	    if (nullify && negated)
+	      strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
+	    else if (nullify && ! negated)
+	      strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
+	    else if (negated)
+	      strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
+	    else
+	      strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
+	  }
+	break;
+
+      default:
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled and needs to be extracted
+	   by output_lbranch.  If the delay slot is empty or this is a
+	   nullified forward branch, the instruction after the reversed
+	   condition branch must be nullified.  */
+	if (dbr_sequence_length () == 0
+	    || (nullify && forward_branch_p (insn)))
+	  {
+	    nullify = 1;
+	    xdelay = 0;
+	    operands[4] = GEN_INT (length);
+	  }
+	else
+	  {
+	    xdelay = 1;
+	    operands[4] = GEN_INT (length + 4);
+	  }
+
+	if (GET_MODE (operands[0]) == DImode)
+	  strcpy (buf, "bb,*");
+	else
+	  strcpy (buf, "{bvb,|bb,}");
+	if ((which == 0 && negated)
+	    || (which == 1 && !negated))
+	  strcat (buf, "<");
+	else
+	  strcat (buf, ">=");
+	if (nullify)
+	  strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
+	else
+	  strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
+	output_asm_insn (buf, operands);
+	return output_lbranch (negated ? operands[3] : operands[2],
+			       insn, xdelay);
+    }
+  return buf;
+}
+
+/* Return the output template for emitting a dbra type insn.
+
+   Note it may perform some output operations on its own before
+   returning the final output string.  */
+const char *
+output_dbra (rtx *operands, rtx insn, int which_alternative)
+{
+  int length = get_attr_length (insn);
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  Be prepared!  */
+
+  if (branch_to_delay_slot_p (insn))
+    {
+      if (which_alternative == 0)
+	return "ldo %1(%0),%0";
+      else if (which_alternative == 1)
+	{
+	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
+	  output_asm_insn ("ldw -16(%%r30),%4", operands);
+	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
+	  return "{fldws|fldw} -16(%%r30),%0";
+	}
+      else
+	{
+	  output_asm_insn ("ldw %0,%4", operands);
+	  return "ldo %1(%4),%4\n\tstw %4,%0";
+	}
+    }
+
+  if (which_alternative == 0)
+    {
+      int nullify = INSN_ANNULLED_BRANCH_P (insn);
+      int xdelay;
+
+      /* If this is a long branch with its delay slot unfilled, set `nullify'
+	 as it can nullify the delay slot and save a nop.  */
+      if (length == 8 && dbr_sequence_length () == 0)
+	nullify = 1;
+
+      /* If this is a short forward conditional branch which did not get
+	 its delay slot filled, the delay slot can still be nullified.  */
+      if (! nullify && length == 4 && dbr_sequence_length () == 0)
+	nullify = forward_branch_p (insn);
+
+      switch (length)
+	{
+	case 4:
+	  if (nullify)
+	    {
+	      if (branch_needs_nop_p (insn))
+		return "addib,%C2,n %1,%0,%3%#";
+	      else
+		return "addib,%C2,n %1,%0,%3";
+	    }
+	  else
+	    return "addib,%C2 %1,%0,%3";
+      
+	case 8:
+	  /* Handle weird backwards branch with a fulled delay slot
+	     which is nullified.  */
+	  if (dbr_sequence_length () != 0
+	      && ! forward_branch_p (insn)
+	      && nullify)
+	    return "addib,%N2,n %1,%0,.+12\n\tb %3";
+	  /* Handle short backwards branch with an unfilled delay slot.
+	     Using a addb;nop rather than addi;bl saves 1 cycle for both
+	     taken and untaken branches.  */
+	  else if (dbr_sequence_length () == 0
+		   && ! forward_branch_p (insn)
+		   && INSN_ADDRESSES_SET_P ()
+		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	      return "addib,%C2 %1,%0,%3%#";
+
+	  /* Handle normal cases.  */
+	  if (nullify)
+	    return "addi,%N2 %1,%0,%0\n\tb,n %3";
+	  else
+	    return "addi,%N2 %1,%0,%0\n\tb %3";
+
+	default:
+	  /* The reversed conditional branch must branch over one additional
+	     instruction if the delay slot is filled and needs to be extracted
+	     by output_lbranch.  If the delay slot is empty or this is a
+	     nullified forward branch, the instruction after the reversed
+	     condition branch must be nullified.  */
+	  if (dbr_sequence_length () == 0
+	      || (nullify && forward_branch_p (insn)))
+	    {
+	      nullify = 1;
+	      xdelay = 0;
+	      operands[4] = GEN_INT (length);
+	    }
+	  else
+	    {
+	      xdelay = 1;
+	      operands[4] = GEN_INT (length + 4);
+	    }
+
+	  if (nullify)
+	    output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
+	  else
+	    output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
+
+	  return output_lbranch (operands[3], insn, xdelay);
+	}
+      
+    }
+  /* Deal with gross reload from FP register case.  */
+  else if (which_alternative == 1)
+    {
+      /* Move loop counter from FP register to MEM then into a GR,
+	 increment the GR, store the GR into MEM, and finally reload
+	 the FP register from MEM from within the branch's delay slot.  */
+      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
+		       operands);
+      output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
+      if (length == 24)
+	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
+      else if (length == 28)
+	return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
+      else
+	{
+	  operands[5] = GEN_INT (length - 16);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
+	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
+	  return output_lbranch (operands[3], insn, 0);
+	}
+    }
+  /* Deal with gross reload from memory case.  */
+  else
+    {
+      /* Reload loop counter from memory, the store back to memory
+	 happens in the branch's delay slot.  */
+      output_asm_insn ("ldw %0,%4", operands);
+      if (length == 12)
+	return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
+      else if (length == 16)
+	return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
+      else
+	{
+	  operands[5] = GEN_INT (length - 4);
+	  output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
+	  return output_lbranch (operands[3], insn, 0);
+	}
+    }
+}
+
+/* Return the output template for emitting a movb type insn.
+
+   Note it may perform some output operations on its own before
+   returning the final output string.  */
+const char *
+output_movb (rtx *operands, rtx insn, int which_alternative,
+	     int reverse_comparison)
+{
+  int length = get_attr_length (insn);
+
+  /* A conditional branch to the following instruction (e.g. the delay slot) is
+     asking for a disaster.  Be prepared!  */
+
+  if (branch_to_delay_slot_p (insn))
+    {
+      if (which_alternative == 0)
+	return "copy %1,%0";
+      else if (which_alternative == 1)
+	{
+	  output_asm_insn ("stw %1,-16(%%r30)", operands);
+	  return "{fldws|fldw} -16(%%r30),%0";
+	}
+      else if (which_alternative == 2)
+	return "stw %1,%0";
+      else
+	return "mtsar %r1";
+    }
+
+  /* Support the second variant.  */
+  if (reverse_comparison)
+    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
+
+  if (which_alternative == 0)
+    {
+      int nullify = INSN_ANNULLED_BRANCH_P (insn);
+      int xdelay;
+
+      /* If this is a long branch with its delay slot unfilled, set `nullify'
+	 as it can nullify the delay slot and save a nop.  */
+      if (length == 8 && dbr_sequence_length () == 0)
+	nullify = 1;
+
+      /* If this is a short forward conditional branch which did not get
+	 its delay slot filled, the delay slot can still be nullified.  */
+      if (! nullify && length == 4 && dbr_sequence_length () == 0)
+	nullify = forward_branch_p (insn);
+
+      switch (length)
+	{
+	case 4:
+	  if (nullify)
+	    {
+	      if (branch_needs_nop_p (insn))
+		return "movb,%C2,n %1,%0,%3%#";
+	      else
+		return "movb,%C2,n %1,%0,%3";
+	    }
+	  else
+	    return "movb,%C2 %1,%0,%3";
+
+	case 8:
+	  /* Handle weird backwards branch with a filled delay slot
+	     which is nullified.  */
+	  if (dbr_sequence_length () != 0
+	      && ! forward_branch_p (insn)
+	      && nullify)
+	    return "movb,%N2,n %1,%0,.+12\n\tb %3";
+
+	  /* Handle short backwards branch with an unfilled delay slot.
+	     Using a movb;nop rather than or;bl saves 1 cycle for both
+	     taken and untaken branches.  */
+	  else if (dbr_sequence_length () == 0
+		   && ! forward_branch_p (insn)
+		   && INSN_ADDRESSES_SET_P ()
+		   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
+				      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
+	    return "movb,%C2 %1,%0,%3%#";
+	  /* Handle normal cases.  */
+	  if (nullify)
+	    return "or,%N2 %1,%%r0,%0\n\tb,n %3";
+	  else
+	    return "or,%N2 %1,%%r0,%0\n\tb %3";
+
+	default:
+	  /* The reversed conditional branch must branch over one additional
+	     instruction if the delay slot is filled and needs to be extracted
+	     by output_lbranch.  If the delay slot is empty or this is a
+	     nullified forward branch, the instruction after the reversed
+	     condition branch must be nullified.  */
+	  if (dbr_sequence_length () == 0
+	      || (nullify && forward_branch_p (insn)))
+	    {
+	      nullify = 1;
+	      xdelay = 0;
+	      operands[4] = GEN_INT (length);
+	    }
+	  else
+	    {
+	      xdelay = 1;
+	      operands[4] = GEN_INT (length + 4);
+	    }
+
+	  if (nullify)
+	    output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
+	  else
+	    output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
+
+	  return output_lbranch (operands[3], insn, xdelay);
+	}
+    }
+  /* Deal with gross reload for FP destination register case.  */
+  else if (which_alternative == 1)
+    {
+      /* Move source register to MEM, perform the branch test, then
+	 finally load the FP register from MEM from within the branch's
+	 delay slot.  */
+      output_asm_insn ("stw %1,-16(%%r30)", operands);
+      if (length == 12)
+	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
+      else if (length == 16)
+	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
+      else
+	{
+	  operands[4] = GEN_INT (length - 4);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
+	  output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
+	  return output_lbranch (operands[3], insn, 0);
+	}
+    }
+  /* Deal with gross reload from memory case.  */
+  else if (which_alternative == 2)
+    {
+      /* Reload loop counter from memory, the store back to memory
+	 happens in the branch's delay slot.  */
+      if (length == 8)
+	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
+      else if (length == 12)
+	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
+      else
+	{
+	  operands[4] = GEN_INT (length);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
+			   operands);
+	  return output_lbranch (operands[3], insn, 0);
+	}
+    }
+  /* Handle SAR as a destination.  */
+  else
+    {
+      if (length == 8)
+	return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
+      else if (length == 12)
+	return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
+      else
+	{
+	  operands[4] = GEN_INT (length);
+	  output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
+			   operands);
+	  return output_lbranch (operands[3], insn, 0);
+	}
+    }
+}
+
+/* Copy any FP arguments in INSN into integer registers.  */
+static void
+copy_fp_args (rtx insn)
+{
+  rtx link;
+  rtx xoperands[2];
+
+  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+    {
+      int arg_mode, regno;
+      rtx use = XEXP (link, 0);
+
+      if (! (GET_CODE (use) == USE
+	  && GET_CODE (XEXP (use, 0)) == REG
+	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+	continue;
+
+      arg_mode = GET_MODE (XEXP (use, 0));
+      regno = REGNO (XEXP (use, 0));
+
+      /* Is it a floating point register?  */
+      if (regno >= 32 && regno <= 39)
+	{
+	  /* Copy the FP register into an integer register via memory.  */
+	  if (arg_mode == SFmode)
+	    {
+	      xoperands[0] = XEXP (use, 0);
+	      xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
+	      output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
+	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+	    }
+	  else
+	    {
+	      xoperands[0] = XEXP (use, 0);
+	      xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
+	      output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
+	      output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
+	      output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+	    }
+	}
+    }
+}
+
+/* Compute length of the FP argument copy sequence for INSN.  */
+static int
+length_fp_args (rtx insn)
+{
+  int length = 0;
+  rtx link;
+
+  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+    {
+      int arg_mode, regno;
+      rtx use = XEXP (link, 0);
+
+      if (! (GET_CODE (use) == USE
+	  && GET_CODE (XEXP (use, 0)) == REG
+	  && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+	continue;
+
+      arg_mode = GET_MODE (XEXP (use, 0));
+      regno = REGNO (XEXP (use, 0));
+
+      /* Is it a floating point register?  */
+      if (regno >= 32 && regno <= 39)
+	{
+	  if (arg_mode == SFmode)
+	    length += 8;
+	  else
+	    length += 12;
+	}
+    }
+
+  return length;
+}
+
+/* Return the attribute length for the millicode call instruction INSN.
+   The length must match the code generated by output_millicode_call.
+   We include the delay slot in the returned length as it is better to
+   over estimate the length than to under estimate it.  */
+
+int
+attr_length_millicode_call (rtx insn)
+{
+  unsigned long distance = -1;
+  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total + insn_current_reference_address (insn));
+      if (distance < total)
+	distance = -1;
+    }
+
+  if (TARGET_64BIT)
+    {
+      if (!TARGET_LONG_CALLS && distance < 7600000)
+	return 8;
+
+      return 20;
+    }
+  else if (TARGET_PORTABLE_RUNTIME)
+    return 24;
+  else
+    {
+      if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
+	return 8;
+
+      if (TARGET_LONG_ABS_CALL && !flag_pic)
+	return 12;
+
+      return 24;
+    }
+}
+
+/* INSN is a function call.  It may have an unconditional jump
+   in its delay slot.
+
+   CALL_DEST is the routine we are calling.  */
+
+const char *
+output_millicode_call (rtx insn, rtx call_dest)
+{
+  int attr_length = get_attr_length (insn);
+  int seq_length = dbr_sequence_length ();
+  int distance;
+  rtx seq_insn;
+  rtx xoperands[3];
+
+  xoperands[0] = call_dest;
+  xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
+
+  /* Handle the common case where we are sure that the branch will
+     reach the beginning of the $CODE$ subspace.  The within reach
+     form of the $$sh_func_adrs call has a length of 28.  Because it
+     has an attribute type of sh_func_adrs, it never has a nonzero
+     sequence length (i.e., the delay slot is never filled).  */
+  if (!TARGET_LONG_CALLS
+      && (attr_length == 8
+	  || (attr_length == 28
+	      && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
+    {
+      output_asm_insn ("{bl|b,l} %0,%2", xoperands);
+    }
+  else
+    {
+      if (TARGET_64BIT)
+	{
+	  /* It might seem that one insn could be saved by accessing
+	     the millicode function using the linkage table.  However,
+	     this doesn't work in shared libraries and other dynamically
+	     loaded objects.  Using a pc-relative sequence also avoids
+	     problems related to the implicit use of the gp register.  */
+	  output_asm_insn ("b,l .+8,%%r1", xoperands);
+
+	  if (TARGET_GAS)
+	    {
+	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
+	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	    }
+	  else
+	    {
+	      xoperands[1] = gen_label_rtx ();
+	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
+	      targetm.asm_out.internal_label (asm_out_file, "L",
+					 CODE_LABEL_NUMBER (xoperands[1]));
+	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
+	    }
+
+	  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
+	}
+      else if (TARGET_PORTABLE_RUNTIME)
+	{
+	  /* Pure portable runtime doesn't allow be/ble; we also don't
+	     have PIC support in the assembler/linker, so this sequence
+	     is needed.  */
+
+	  /* Get the address of our target into %r1.  */
+	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
+
+	  /* Get our return address into %r31.  */
+	  output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
+	  output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
+
+	  /* Jump to our target address in %r1.  */
+	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	}
+      else if (!flag_pic)
+	{
+	  output_asm_insn ("ldil L'%0,%%r1", xoperands);
+	  if (TARGET_PA_20)
+	    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
+	  else
+	    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+	  output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
+
+	  if (TARGET_SOM || !TARGET_GAS)
+	    {
+	      /* The HP assembler can generate relocations for the
+		 difference of two symbols.  GAS can do this for a
+		 millicode symbol but not an arbitrary external
+		 symbol when generating SOM output.  */
+	      xoperands[1] = gen_label_rtx ();
+	      targetm.asm_out.internal_label (asm_out_file, "L",
+					 CODE_LABEL_NUMBER (xoperands[1]));
+	      output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
+	      output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
+	      output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
+			       xoperands);
+	    }
+
+	  /* Jump to our target address in %r1.  */
+	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	}
+    }
+
+  if (seq_length == 0)
+    output_asm_insn ("nop", xoperands);
+
+  /* We are done if there isn't a jump in the delay slot.  */
+  if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
+    return "";
+
+  /* This call has an unconditional jump in its delay slot.  */
+  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+
+  /* See if the return address can be adjusted.  Use the containing
+     sequence insn's address.  */
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+      distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
+		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
+
+      if (VAL_14_BITS_P (distance))
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (xoperands[1]));
+	}
+      else
+	/* ??? This branch may not reach its target.  */
+	output_asm_insn ("nop\n\tb,n %0", xoperands);
+    }
+  else
+    /* ??? This branch may not reach its target.  */
+    output_asm_insn ("nop\n\tb,n %0", xoperands);
+
+  /* Delete the jump.  */
+  SET_INSN_DELETED (NEXT_INSN (insn));
+
+  return "";
+}
+
+/* Return the attribute length of the call instruction INSN.  The SIBCALL
+   flag indicates whether INSN is a regular call or a sibling call.  The
+   length returned must be longer than the code actually generated by
+   output_call.  Since branch shortening is done before delay branch
+   sequencing, there is no way to determine whether or not the delay
+   slot will be filled during branch shortening.  Even when the delay
+   slot is filled, we may have to add a nop if the delay slot contains
+   a branch that can't reach its target.  Thus, we always have to include
+   the delay slot in the length estimate.  This used to be done in
+   pa_adjust_insn_length but we do it here now as some sequences always
+   fill the delay slot and we can save four bytes in the estimate for
+   these sequences.  */
+
+int
+attr_length_call (rtx insn, int sibcall)
+{
+  int local_call;
+  rtx call, call_dest;
+  tree call_decl;
+  int length = 0;
+  rtx pat = PATTERN (insn);
+  unsigned long distance = -1;
+
+  gcc_assert (GET_CODE (insn) == CALL_INSN);
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      unsigned long total;
+
+      total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
+      distance = (total + insn_current_reference_address (insn));
+      if (distance < total)
+	distance = -1;
+    }
+
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+
+  /* Get the call rtx.  */
+  call = XVECEXP (pat, 0, 0);
+  if (GET_CODE (call) == SET)
+    call = SET_SRC (call);
+
+  gcc_assert (GET_CODE (call) == CALL);
+
+  /* Determine if this is a local call.  */
+  call_dest = XEXP (XEXP (call, 0), 0);
+  call_decl = SYMBOL_REF_DECL (call_dest);
+  local_call = call_decl && targetm.binds_local_p (call_decl);
+
+  /* pc-relative branch.  */
+  if (!TARGET_LONG_CALLS
+      && ((TARGET_PA_20 && !sibcall && distance < 7600000)
+	  || distance < MAX_PCREL17F_OFFSET))
+    length += 8;
+
+  /* 64-bit plabel sequence.  */
+  else if (TARGET_64BIT && !local_call)
+    length += sibcall ? 28 : 24;
+
+  /* non-pic long absolute branch sequence.  */
+  else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
+    length += 12;
+
+  /* long pc-relative branch sequence.  */
+  else if (TARGET_LONG_PIC_SDIFF_CALL
+	   || (TARGET_GAS && !TARGET_SOM
+	       && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
+    {
+      length += 20;
+
+      if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
+	length += 8;
+    }
+
+  /* 32-bit plabel sequence.  */
+  else
+    {
+      length += 32;
+
+      if (TARGET_SOM)
+	length += length_fp_args (insn);
+
+      if (flag_pic)
+	length += 4;
+
+      if (!TARGET_PA_20)
+	{
+	  if (!sibcall)
+	    length += 8;
+
+	  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
+	    length += 8;
+	}
+    }
+
+  return length;
+}
+
+/* INSN is a function call.  It may have an unconditional jump
+   in its delay slot.
+
+   CALL_DEST is the routine we are calling.  */
+
+const char *
+output_call (rtx insn, rtx call_dest, int sibcall)
+{
+  int delay_insn_deleted = 0;
+  int delay_slot_filled = 0;
+  int seq_length = dbr_sequence_length ();
+  tree call_decl = SYMBOL_REF_DECL (call_dest);
+  int local_call = call_decl && targetm.binds_local_p (call_decl);
+  rtx xoperands[2];
+
+  xoperands[0] = call_dest;
+
+  /* Handle the common case where we're sure that the branch will reach
+     the beginning of the "$CODE$" subspace.  This is the beginning of
+     the current function if we are in a named section.  */
+  if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
+    {
+      xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
+      output_asm_insn ("{bl|b,l} %0,%1", xoperands);
+    }
+  else
+    {
+      if (TARGET_64BIT && !local_call)
+	{
+	  /* ??? As far as I can tell, the HP linker doesn't support the
+	     long pc-relative sequence described in the 64-bit runtime
+	     architecture.  So, we use a slightly longer indirect call.  */
+	  xoperands[0] = get_deferred_plabel (call_dest);
+	  xoperands[1] = gen_label_rtx ();
+
+	  /* If this isn't a sibcall, we put the load of %r27 into the
+	     delay slot.  We can't do this in a sibcall as we don't
+	     have a second call-clobbered scratch register available.  */
+	  if (seq_length != 0
+	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
+	      && !sibcall)
+	    {
+	      final_scan_insn (NEXT_INSN (insn), asm_out_file,
+			       optimize, 0, NULL);
+
+	      /* Now delete the delay insn.  */
+	      SET_INSN_DELETED (NEXT_INSN (insn));
+	      delay_insn_deleted = 1;
+	    }
+
+	  output_asm_insn ("addil LT'%0,%%r27", xoperands);
+	  output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
+	  output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
+
+	  if (sibcall)
+	    {
+	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
+	      output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
+	      output_asm_insn ("bve (%%r1)", xoperands);
+	    }
+	  else
+	    {
+	      output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
+	      output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
+	      output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
+	      delay_slot_filled = 1;
+	    }
+	}
+      else
+	{
+	  int indirect_call = 0;
+
+	  /* Emit a long call.  There are several different sequences
+	     of increasing length and complexity.  In most cases,
+             they don't allow an instruction in the delay slot.  */
+	  if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
+	      && !TARGET_LONG_PIC_SDIFF_CALL
+	      && !(TARGET_GAS && !TARGET_SOM
+		   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
+	      && !TARGET_64BIT)
+	    indirect_call = 1;
+
+	  if (seq_length != 0
+	      && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
+	      && !sibcall
+	      && (!TARGET_PA_20
+		  || indirect_call
+		  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
+	    {
+	      /* A non-jump insn in the delay slot.  By definition we can
+		 emit this insn before the call (and in fact before argument
+		 relocating.  */
+	      final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
+			       NULL);
+
+	      /* Now delete the delay insn.  */
+	      SET_INSN_DELETED (NEXT_INSN (insn));
+	      delay_insn_deleted = 1;
+	    }
+
+	  if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
+	    {
+	      /* This is the best sequence for making long calls in
+		 non-pic code.  Unfortunately, GNU ld doesn't provide
+		 the stub needed for external calls, and GAS's support
+		 for this with the SOM linker is buggy.  It is safe
+		 to use this for local calls.  */
+	      output_asm_insn ("ldil L'%0,%%r1", xoperands);
+	      if (sibcall)
+		output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
+	      else
+		{
+		  if (TARGET_PA_20)
+		    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
+				     xoperands);
+		  else
+		    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
+
+		  output_asm_insn ("copy %%r31,%%r2", xoperands);
+		  delay_slot_filled = 1;
+		}
+	    }
+	  else
+	    {
+	      if (TARGET_LONG_PIC_SDIFF_CALL)
+		{
+		  /* The HP assembler and linker can handle relocations
+		     for the difference of two symbols.  The HP assembler
+		     recognizes the sequence as a pc-relative call and
+		     the linker provides stubs when needed.  */
+		  xoperands[1] = gen_label_rtx ();
+		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+		  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
+		  targetm.asm_out.internal_label (asm_out_file, "L",
+					     CODE_LABEL_NUMBER (xoperands[1]));
+		  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
+		}
+	      else if (TARGET_GAS && !TARGET_SOM
+		       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
+		{
+		  /*  GAS currently can't generate the relocations that
+		      are needed for the SOM linker under HP-UX using this
+		      sequence.  The GNU linker doesn't generate the stubs
+		      that are needed for external calls on TARGET_ELF32
+		      with this sequence.  For now, we have to use a
+		      longer plabel sequence when using GAS.  */
+		  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+		  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
+				   xoperands);
+		  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
+				   xoperands);
+		}
+	      else
+		{
+		  /* Emit a long plabel-based call sequence.  This is
+		     essentially an inline implementation of $$dyncall.
+		     We don't actually try to call $$dyncall as this is
+		     as difficult as calling the function itself.  */
+		  xoperands[0] = get_deferred_plabel (call_dest);
+		  xoperands[1] = gen_label_rtx ();
+
+		  /* Since the call is indirect, FP arguments in registers
+		     need to be copied to the general registers.  Then, the
+		     argument relocation stub will copy them back.  */
+		  if (TARGET_SOM)
+		    copy_fp_args (insn);
+
+		  if (flag_pic)
+		    {
+		      output_asm_insn ("addil LT'%0,%%r19", xoperands);
+		      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
+		      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
+		    }
+		  else
+		    {
+		      output_asm_insn ("addil LR'%0-$global$,%%r27",
+				       xoperands);
+		      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
+				       xoperands);
+		    }
+
+		  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
+		  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
+		  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
+		  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
+
+		  if (!sibcall && !TARGET_PA_20)
+		    {
+		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
+		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
+			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
+		      else
+			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
+		    }
+		}
+
+	      if (TARGET_PA_20)
+		{
+		  if (sibcall)
+		    output_asm_insn ("bve (%%r1)", xoperands);
+		  else
+		    {
+		      if (indirect_call)
+			{
+			  output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
+			  output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
+			  delay_slot_filled = 1;
+			}
+		      else
+			output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
+		    }
+		}
+	      else
+		{
+		  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
+		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
+				     xoperands);
+
+		  if (sibcall)
+		    {
+		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
+			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
+		      else
+			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
+		    }
+		  else
+		    {
+		      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
+			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
+		      else
+			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
+
+		      if (indirect_call)
+			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
+		      else
+			output_asm_insn ("copy %%r31,%%r2", xoperands);
+		      delay_slot_filled = 1;
+		    }
+		}
+	    }
+	}
+    }
+
+  if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
+    output_asm_insn ("nop", xoperands);
+
+  /* We are done if there isn't a jump in the delay slot.  */
+  if (seq_length == 0
+      || delay_insn_deleted
+      || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
+    return "";
+
+  /* A sibcall should never have a branch in the delay slot.  */
+  gcc_assert (!sibcall);
+
+  /* This call has an unconditional jump in its delay slot.  */
+  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+
+  if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
+    {
+      /* See if the return address can be adjusted.  Use the containing
+         sequence insn's address.  This would break the regular call/return@
+         relationship assumed by the table based eh unwinder, so only do that
+         if the call is not possibly throwing.  */
+      rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+      int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
+		      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
+
+      if (VAL_14_BITS_P (distance)
+	  && !(can_throw_internal (insn) || can_throw_external (insn)))
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
+	  targetm.asm_out.internal_label (asm_out_file, "L",
+					  CODE_LABEL_NUMBER (xoperands[1]));
+	}
+      else
+	output_asm_insn ("nop\n\tb,n %0", xoperands);
+    }
+  else
+    output_asm_insn ("b,n %0", xoperands);
+
+  /* Delete the jump.  */
+  SET_INSN_DELETED (NEXT_INSN (insn));
+
+  return "";
+}
+
+/* Return the attribute length of the indirect call instruction INSN.
+   The length must match the code generated by output_indirect call.
+   The returned length includes the delay slot.  Currently, the delay
+   slot of an indirect call sequence is not exposed and it is used by
+   the sequence itself.  */
+
+int
+attr_length_indirect_call (rtx insn)
+{
+  unsigned long distance = -1;
+  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total + insn_current_reference_address (insn));
+      if (distance < total)
+	distance = -1;
+    }
+
+  if (TARGET_64BIT)
+    return 12;
+
+  if (TARGET_FAST_INDIRECT_CALLS
+      || (!TARGET_PORTABLE_RUNTIME
+	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
+	      || distance < MAX_PCREL17F_OFFSET)))
+    return 8;
+
+  if (flag_pic)
+    return 24;
+
+  if (TARGET_PORTABLE_RUNTIME)
+    return 20;
+
+  /* Out of reach, can use ble.  */
+  return 12;
+}
+
+const char *
+output_indirect_call (rtx insn, rtx call_dest)
+{
+  rtx xoperands[1];
+
+  if (TARGET_64BIT)
+    {
+      xoperands[0] = call_dest;
+      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
+      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
+      return "";
+    }
+
+  /* First the special case for kernels, level 0 systems, etc.  */
+  if (TARGET_FAST_INDIRECT_CALLS)
+    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 
+
+  /* Now the normal case -- we can reach $$dyncall directly or
+     we're sure that we can get there via a long-branch stub. 
+
+     No need to check target flags as the length uniquely identifies
+     the remaining cases.  */
+  if (attr_length_indirect_call (insn) == 8)
+    {
+      /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
+	 $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
+	 variant of the B,L instruction can't be used on the SOM target.  */
+      if (TARGET_PA_20 && !TARGET_SOM)
+	return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
+      else
+	return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
+    }
+
+  /* Long millicode call, but we are not generating PIC or portable runtime
+     code.  */
+  if (attr_length_indirect_call (insn) == 12)
+    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
+
+  /* Long millicode call for portable runtime.  */
+  if (attr_length_indirect_call (insn) == 20)
+    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
+
+  /* We need a long PIC call to $$dyncall.  */
+  xoperands[0] = NULL_RTX;
+  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+  if (TARGET_SOM || !TARGET_GAS)
+    {
+      xoperands[0] = gen_label_rtx ();
+      output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
+      targetm.asm_out.internal_label (asm_out_file, "L",
+				      CODE_LABEL_NUMBER (xoperands[0]));
+      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
+    }
+  else
+    {
+      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
+      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
+		       xoperands);
+    }
+  output_asm_insn ("blr %%r0,%%r2", xoperands);
+  output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
+  return "";
+}
+
+/* Return the total length of the save and restore instructions needed for
+   the data linkage table pointer (i.e., the PIC register) across the call
+   instruction INSN.  No-return calls do not require a save and restore.
+   In addition, we may be able to avoid the save and restore for calls
+   within the same translation unit.  */
+
+int
+attr_length_save_restore_dltp (rtx insn)
+{
+  if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
+    return 0;
+
+  return 8;
+}
+
+/* In HPUX 8.0's shared library scheme, special relocations are needed
+   for function labels if they might be passed to a function
+   in a shared library (because shared libraries don't live in code
+   space), and special magic is needed to construct their address.  */
+
+void
+hppa_encode_label (rtx sym)
+{
+  const char *str = XSTR (sym, 0);
+  int len = strlen (str) + 1;
+  char *newstr, *p;
+
+  p = newstr = XALLOCAVEC (char, len + 1);
+  *p++ = '@';
+  strcpy (p, str);
+
+  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
+}
+
+static void
+pa_encode_section_info (tree decl, rtx rtl, int first)
+{
+  int old_referenced = 0;
+
+  if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
+    old_referenced
+      = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
+
+  default_encode_section_info (decl, rtl, first);
+
+  if (first && TEXT_SPACE_P (decl))
+    {
+      SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+      if (TREE_CODE (decl) == FUNCTION_DECL)
+	hppa_encode_label (XEXP (rtl, 0));
+    }
+  else if (old_referenced)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
+}
+
+/* This is sort of inverse to pa_encode_section_info.  */
+
+static const char *
+pa_strip_name_encoding (const char *str)
+{
+  str += (*str == '@');
+  str += (*str == '*');
+  return str;
+}
+
+int
+function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
+}
+
+/* Returns 1 if OP is a function label involved in a simple addition
+   with a constant.  Used to keep certain patterns from matching
+   during instruction combination.  */
+int
+is_function_label_plus_const (rtx op)
+{
+  /* Strip off any CONST.  */
+  if (GET_CODE (op) == CONST)
+    op = XEXP (op, 0);
+
+  return (GET_CODE (op) == PLUS
+	  && function_label_operand (XEXP (op, 0), Pmode)
+	  && GET_CODE (XEXP (op, 1)) == CONST_INT);
+}
+
+/* Output assembly code for a thunk to FUNCTION.  */
+
+static void
+pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
+			HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			tree function)
+{
+  static unsigned int current_thunk_number;
+  int val_14 = VAL_14_BITS_P (delta);
+  unsigned int old_last_address = last_address, nbytes = 0;
+  char label[16];
+  rtx xoperands[4];
+
+  xoperands[0] = XEXP (DECL_RTL (function), 0);
+  xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
+  xoperands[2] = GEN_INT (delta);
+
+  ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
+  fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
+
+  /* Output the thunk.  We know that the function is in the same
+     translation unit (i.e., the same space) as the thunk, and that
+     thunks are output after their method.  Thus, we don't need an
+     external branch to reach the function.  With SOM and GAS,
+     functions and thunks are effectively in different sections.
+     Thus, we can always use a IA-relative branch and the linker
+     will add a long branch stub if necessary.
+
+     However, we have to be careful when generating PIC code on the
+     SOM port to ensure that the sequence does not transfer to an
+     import stub for the target function as this could clobber the
+     return value saved at SP-24.  This would also apply to the
+     32-bit linux port if the multi-space model is implemented.  */
+  if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+       && !(flag_pic && TREE_PUBLIC (function))
+       && (TARGET_GAS || last_address < 262132))
+      || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
+	  && ((targetm.have_named_sections
+	       && DECL_SECTION_NAME (thunk_fndecl) != NULL
+	       /* The GNU 64-bit linker has rather poor stub management.
+		  So, we use a long branch from thunks that aren't in
+		  the same section as the target function.  */
+	       && ((!TARGET_64BIT
+		    && (DECL_SECTION_NAME (thunk_fndecl)
+			!= DECL_SECTION_NAME (function)))
+		   || ((DECL_SECTION_NAME (thunk_fndecl)
+			== DECL_SECTION_NAME (function))
+		       && last_address < 262132)))
+	      || (targetm.have_named_sections
+		  && DECL_SECTION_NAME (thunk_fndecl) == NULL
+		  && DECL_SECTION_NAME (function) == NULL
+		  && last_address < 262132)
+	      || (!targetm.have_named_sections && last_address < 262132))))
+    {
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("b %0", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 8;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 12;
+	}
+    }
+  else if (TARGET_64BIT)
+    {
+      /* We only have one call-clobbered scratch register, so we can't
+         make use of the delay slot if delta doesn't fit in 14 bits.  */
+      if (!val_14)
+	{
+	  output_asm_insn ("addil L'%2,%%r26", xoperands);
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	}
+
+      output_asm_insn ("b,l .+8,%%r1", xoperands);
+
+      if (TARGET_GAS)
+	{
+	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	}
+      else
+	{
+	  xoperands[3] = GEN_INT (val_14 ? 8 : 16);
+	  output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
+	}
+
+      if (val_14)
+	{
+	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 20;
+	}
+      else
+	{
+	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
+	  nbytes += 24;
+	}
+    }
+  else if (TARGET_PORTABLE_RUNTIME)
+    {
+      output_asm_insn ("ldil L'%0,%%r1", xoperands);
+      output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
+
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 16;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 20;
+	}
+    }
+  else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+    {
+      /* The function is accessible from outside this module.  The only
+	 way to avoid an import stub between the thunk and function is to
+	 call the function directly with an indirect sequence similar to
+	 that used by $$dyncall.  This is possible because $$dyncall acts
+	 as the import stub in an indirect call.  */
+      ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
+      xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
+      output_asm_insn ("addil LT'%3,%%r19", xoperands);
+      output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
+      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+      output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
+      output_asm_insn ("depi 0,31,2,%%r22", xoperands);
+      output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
+      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
+
+      if (!val_14)
+	{
+	  output_asm_insn ("addil L'%2,%%r26", xoperands);
+	  nbytes += 4;
+	}
+
+      if (TARGET_PA_20)
+	{
+	  output_asm_insn ("bve (%%r22)", xoperands);
+	  nbytes += 36;
+	}
+      else if (TARGET_NO_SPACE_REGS)
+	{
+	  output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
+	  nbytes += 36;
+	}
+      else
+	{
+	  output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
+	  output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
+	  output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
+	  nbytes += 44;
+	}
+
+      if (val_14)
+	output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+      else
+	output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+    }
+  else if (flag_pic)
+    {
+      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+
+      if (TARGET_SOM || !TARGET_GAS)
+	{
+	  output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
+	}
+      else
+	{
+	  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
+	}
+
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("bv %%r0(%%r22)", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 20;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 24;
+	}
+    }
+  else
+    {
+      if (!val_14)
+	output_asm_insn ("addil L'%2,%%r26", xoperands);
+
+      output_asm_insn ("ldil L'%0,%%r22", xoperands);
+      output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
+
+      if (val_14)
+	{
+	  output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
+	  nbytes += 12;
+	}
+      else
+	{
+	  output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
+	  nbytes += 16;
+	}
+    }
+
+  fprintf (file, "\t.EXIT\n\t.PROCEND\n");
+
+  if (TARGET_SOM && TARGET_GAS)
+    {
+      /* We done with this subspace except possibly for some additional
+	 debug information.  Forget that we are in this subspace to ensure
+	 that the next function is output in its own subspace.  */
+      in_section = NULL;
+      cfun->machine->in_nsubspa = 2;
+    }
+
+  if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
+    {
+      switch_to_section (data_section);
+      output_asm_insn (".align 4", xoperands);
+      ASM_OUTPUT_LABEL (file, label);
+      output_asm_insn (".word P'%0", xoperands);
+    }
+
+  current_thunk_number++;
+  nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
+	    & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
+  last_address += nbytes;
+  if (old_last_address > last_address)
+    last_address = UINT_MAX;
+  update_total_code_bytes (nbytes);
+}
+
+/* Only direct calls to static functions are allowed to be sibling (tail)
+   call optimized.
+
+   This restriction is necessary because some linker generated stubs will
+   store return pointers into rp' in some cases which might clobber a
+   live value already in rp'.
+
+   In a sibcall the current function and the target function share stack
+   space.  Thus if the path to the current function and the path to the
+   target function save a value in rp', they save the value into the
+   same stack slot, which has undesirable consequences.
+
+   Because of the deferred binding nature of shared libraries any function
+   with external scope could be in a different load module and thus require
+   rp' to be saved when calling that function.  So sibcall optimizations
+   can only be safe for static function.
+
+   Note that GCC never needs return value relocations, so we don't have to
+   worry about static calls with return value relocations (which require
+   saving rp').
+
+   It is safe to perform a sibcall optimization when the target function
+   will never return.  */
+static bool
+pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  if (TARGET_PORTABLE_RUNTIME)
+    return false;
+
+  /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
+     single subspace mode and the call is not indirect.  As far as I know,
+     there is no operating system support for the multiple subspace mode.
+     It might be possible to support indirect calls if we didn't use
+     $$dyncall (see the indirect sequence generated in output_call).  */
+  if (TARGET_ELF32)
+    return (decl != NULL_TREE);
+
+  /* Sibcalls are not ok because the arg pointer register is not a fixed
+     register.  This prevents the sibcall optimization from occurring.  In
+     addition, there are problems with stub placement using GNU ld.  This
+     is because a normal sibcall branch uses a 17-bit relocation while
+     a regular call branch uses a 22-bit relocation.  As a result, more
+     care needs to be taken in the placement of long-branch stubs.  */
+  if (TARGET_64BIT)
+    return false;
+
+  /* Sibcalls are only ok within a translation unit.  */
+  return (decl && !TREE_PUBLIC (decl));
+}
+
+/* ??? Addition is not commutative on the PA due to the weird implicit
+   space register selection rules for memory addresses.  Therefore, we
+   don't consider a + b == b + a, as this might be inside a MEM.  */
+static bool
+pa_commutative_p (const_rtx x, int outer_code)
+{
+  return (COMMUTATIVE_P (x)
+	  && (TARGET_NO_SPACE_REGS
+	      || (outer_code != UNKNOWN && outer_code != MEM)
+	      || GET_CODE (x) != PLUS));
+}
+
+/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
+   use in fmpyadd instructions.  */
+int
+fmpyaddoperands (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  /* Must be a floating point mode.  */
+  if (mode != SFmode && mode != DFmode)
+    return 0;
+
+  /* All modes must be the same.  */
+  if (! (mode == GET_MODE (operands[1])
+	 && mode == GET_MODE (operands[2])
+	 && mode == GET_MODE (operands[3])
+	 && mode == GET_MODE (operands[4])
+	 && mode == GET_MODE (operands[5])))
+    return 0;
+
+  /* All operands must be registers.  */
+  if (! (GET_CODE (operands[1]) == REG
+	 && GET_CODE (operands[2]) == REG
+	 && GET_CODE (operands[3]) == REG
+	 && GET_CODE (operands[4]) == REG
+	 && GET_CODE (operands[5]) == REG))
+    return 0;
+
+  /* Only 2 real operands to the addition.  One of the input operands must
+     be the same as the output operand.  */
+  if (! rtx_equal_p (operands[3], operands[4])
+      && ! rtx_equal_p (operands[3], operands[5]))
+    return 0;
+
+  /* Inout operand of add cannot conflict with any operands from multiply.  */
+  if (rtx_equal_p (operands[3], operands[0])
+     || rtx_equal_p (operands[3], operands[1])
+     || rtx_equal_p (operands[3], operands[2]))
+    return 0;
+
+  /* multiply cannot feed into addition operands.  */
+  if (rtx_equal_p (operands[4], operands[0])
+      || rtx_equal_p (operands[5], operands[0]))
+    return 0;
+
+  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
+  if (mode == SFmode
+      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
+    return 0;
+
+  /* Passed.  Operands are suitable for fmpyadd.  */
+  return 1;
+}
+
+#if !defined(USE_COLLECT2)
+static void
+pa_asm_out_constructor (rtx symbol, int priority)
+{
+  if (!function_label_operand (symbol, VOIDmode))
+    hppa_encode_label (symbol);
+
+#ifdef CTORS_SECTION_ASM_OP
+  default_ctor_section_asm_out_constructor (symbol, priority);
+#else
+# ifdef TARGET_ASM_NAMED_SECTION
+  default_named_section_asm_out_constructor (symbol, priority);
+# else
+  default_stabs_asm_out_constructor (symbol, priority);
+# endif
+#endif
+}
+
+static void
+pa_asm_out_destructor (rtx symbol, int priority)
+{
+  if (!function_label_operand (symbol, VOIDmode))
+    hppa_encode_label (symbol);
+
+#ifdef DTORS_SECTION_ASM_OP
+  default_dtor_section_asm_out_destructor (symbol, priority);
+#else
+# ifdef TARGET_ASM_NAMED_SECTION
+  default_named_section_asm_out_destructor (symbol, priority);
+# else
+  default_stabs_asm_out_destructor (symbol, priority);
+# endif
+#endif
+}
+#endif
+
+/* This function places uninitialized global data in the bss section.
+   The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
+   function on the SOM port to prevent uninitialized global data from
+   being placed in the data section.  */
+   
+void
+pa_asm_output_aligned_bss (FILE *stream,
+			   const char *name,
+			   unsigned HOST_WIDE_INT size,
+			   unsigned int align)
+{
+  switch_to_section (bss_section);
+  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
+
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+#endif
+
+#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
+  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+#endif
+
+  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
+  ASM_OUTPUT_LABEL (stream, name);
+  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+}
+
+/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
+   that doesn't allow the alignment of global common storage to be directly
+   specified.  The SOM linker aligns common storage based on the rounded
+   value of the NUM_BYTES parameter in the .comm directive.  It's not
+   possible to use the .align directive as it doesn't affect the alignment
+   of the label associated with a .comm directive.  */
+
+void
+pa_asm_output_aligned_common (FILE *stream,
+			      const char *name,
+			      unsigned HOST_WIDE_INT size,
+			      unsigned int align)
+{
+  unsigned int max_common_align;
+
+  max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
+  if (align > max_common_align)
+    {
+      warning (0, "alignment (%u) for %s exceeds maximum alignment "
+	       "for global common data.  Using %u",
+	       align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
+      align = max_common_align;
+    }
+
+  switch_to_section (bss_section);
+
+  assemble_name (stream, name);
+  fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
+           MAX (size, align / BITS_PER_UNIT));
+}
+
+/* We can't use .comm for local common storage as the SOM linker effectively
+   treats the symbol as universal and uses the same storage for local symbols
+   with the same name in different object files.  The .block directive
+   reserves an uninitialized block of storage.  However, it's not common
+   storage.  Fortunately, GCC never requests common storage with the same
+   name in any given translation unit.  */
+
+void
+pa_asm_output_aligned_local (FILE *stream,
+			     const char *name,
+			     unsigned HOST_WIDE_INT size,
+			     unsigned int align)
+{
+  switch_to_section (bss_section);
+  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
+
+#ifdef LOCAL_ASM_OP
+  fprintf (stream, "%s", LOCAL_ASM_OP);
+  assemble_name (stream, name);
+  fprintf (stream, "\n");
+#endif
+
+  ASM_OUTPUT_LABEL (stream, name);
+  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+}
+
+/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
+   use in fmpysub instructions.  */
+int
+fmpysuboperands (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+
+  /* Must be a floating point mode.  */
+  if (mode != SFmode && mode != DFmode)
+    return 0;
+
+  /* All modes must be the same.  */
+  if (! (mode == GET_MODE (operands[1])
+	 && mode == GET_MODE (operands[2])
+	 && mode == GET_MODE (operands[3])
+	 && mode == GET_MODE (operands[4])
+	 && mode == GET_MODE (operands[5])))
+    return 0;
+
+  /* All operands must be registers.  */
+  if (! (GET_CODE (operands[1]) == REG
+	 && GET_CODE (operands[2]) == REG
+	 && GET_CODE (operands[3]) == REG
+	 && GET_CODE (operands[4]) == REG
+	 && GET_CODE (operands[5]) == REG))
+    return 0;
+
+  /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
+     operation, so operands[4] must be the same as operand[3].  */
+  if (! rtx_equal_p (operands[3], operands[4]))
+    return 0;
+
+  /* multiply cannot feed into subtraction.  */
+  if (rtx_equal_p (operands[5], operands[0]))
+    return 0;
+
+  /* Inout operand of sub cannot conflict with any operands from multiply.  */
+  if (rtx_equal_p (operands[3], operands[0])
+     || rtx_equal_p (operands[3], operands[1])
+     || rtx_equal_p (operands[3], operands[2]))
+    return 0;
+
+  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
+  if (mode == SFmode
+      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
+	  || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
+    return 0;
+
+  /* Passed.  Operands are suitable for fmpysub.  */
+  return 1;
+}
+
+/* Return 1 if the given constant is 2, 4, or 8.  These are the valid
+   constants for shadd instructions.  */
+int
+shadd_constant_p (int val)
+{
+  if (val == 2 || val == 4 || val == 8)
+    return 1;
+  else
+    return 0;
+}
+
+/* Return 1 if OP is valid as a base or index register in a
+   REG+REG address.  */
+
+int
+borx_reg_operand (rtx op, enum machine_mode mode)
+{
+  if (GET_CODE (op) != REG)
+    return 0;
+
+  /* We must reject virtual registers as the only expressions that
+     can be instantiated are REG and REG+CONST.  */
+  if (op == virtual_incoming_args_rtx
+      || op == virtual_stack_vars_rtx
+      || op == virtual_stack_dynamic_rtx
+      || op == virtual_outgoing_args_rtx
+      || op == virtual_cfa_rtx)
+    return 0;
+
+  /* While it's always safe to index off the frame pointer, it's not
+     profitable to do so when the frame pointer is being eliminated.  */
+  if (!reload_completed
+      && flag_omit_frame_pointer
+      && !cfun->calls_alloca
+      && op == frame_pointer_rtx)
+    return 0;
+
+  return register_operand (op, mode);
+}
+
+/* Return 1 if this operand is anything other than a hard register.  */
+
+int
+non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
+}
+
+/* Return TRUE if INSN branches forward.  */
+
+static bool
+forward_branch_p (rtx insn)
+{
+  rtx lab = JUMP_LABEL (insn);
+
+  /* The INSN must have a jump label.  */
+  gcc_assert (lab != NULL_RTX);
+
+  if (INSN_ADDRESSES_SET_P ())
+    return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));  
+
+  while (insn)
+    {
+      if (insn == lab)
+	return true;
+      else
+	insn = NEXT_INSN (insn);
+    }
+
+  return false;
+}
+
+/* Return 1 if OP is an equality comparison, else return 0.  */
+int
+eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
+}
+
+/* Return 1 if INSN is in the delay slot of a call instruction.  */
+int
+jump_in_call_delay (rtx insn)
+{
+
+  if (GET_CODE (insn) != JUMP_INSN)
+    return 0;
+
+  if (PREV_INSN (insn)
+      && PREV_INSN (PREV_INSN (insn))
+      && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
+    {
+      rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
+
+      return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
+	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
+
+    }
+  else
+    return 0;
+}
+
+/* Output an unconditional move and branch insn.  */
+
+const char *
+output_parallel_movb (rtx *operands, rtx insn)
+{
+  int length = get_attr_length (insn);
+
+  /* These are the cases in which we win.  */
+  if (length == 4)
+    return "mov%I1b,tr %1,%0,%2";
+
+  /* None of the following cases win, but they don't lose either.  */
+  if (length == 8)
+    {
+      if (dbr_sequence_length () == 0)
+	{
+	  /* Nothing in the delay slot, fake it by putting the combined
+	     insn (the copy or add) in the delay slot of a bl.  */
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    return "b %2\n\tldi %1,%0";
+	  else
+	    return "b %2\n\tcopy %1,%0";
+	}
+      else
+	{
+	  /* Something in the delay slot, but we've got a long branch.  */
+	  if (GET_CODE (operands[1]) == CONST_INT)
+	    return "ldi %1,%0\n\tb %2";
+	  else
+	    return "copy %1,%0\n\tb %2";
+	}
+    }
+
+  if (GET_CODE (operands[1]) == CONST_INT)
+    output_asm_insn ("ldi %1,%0", operands);
+  else
+    output_asm_insn ("copy %1,%0", operands);
+  return output_lbranch (operands[2], insn, 1);
+}
+
+/* Output an unconditional add and branch insn.  */
+
+const char *
+output_parallel_addb (rtx *operands, rtx insn)
+{
+  int length = get_attr_length (insn);
+
+  /* To make life easy we want operand0 to be the shared input/output
+     operand and operand1 to be the readonly operand.  */
+  if (operands[0] == operands[1])
+    operands[1] = operands[2];
+
+  /* These are the cases in which we win.  */
+  if (length == 4)
+    return "add%I1b,tr %1,%0,%3";
+
+  /* None of the following cases win, but they don't lose either.  */
+  if (length == 8)
+    {
+      if (dbr_sequence_length () == 0)
+	/* Nothing in the delay slot, fake it by putting the combined
+	   insn (the copy or add) in the delay slot of a bl.  */
+	return "b %3\n\tadd%I1 %1,%0,%0";
+      else
+	/* Something in the delay slot, but we've got a long branch.  */
+	return "add%I1 %1,%0,%0\n\tb %3";
+    }
+
+  output_asm_insn ("add%I1 %1,%0,%0", operands);
+  return output_lbranch (operands[3], insn, 1);
+}
+
+/* Return nonzero if INSN (a jump insn) immediately follows a call
+   to a named function.  This is used to avoid filling the delay slot
+   of the jump since it can usually be eliminated by modifying RP in
+   the delay slot of the call.  */
+
+int
+following_call (rtx insn)
+{
+  if (! TARGET_JUMP_IN_DELAY)
+    return 0;
+
+  /* Find the previous real insn, skipping NOTEs.  */
+  insn = PREV_INSN (insn);
+  while (insn && GET_CODE (insn) == NOTE)
+    insn = PREV_INSN (insn);
+
+  /* Check for CALL_INSNs and millicode calls.  */
+  if (insn
+      && ((GET_CODE (insn) == CALL_INSN
+	   && get_attr_type (insn) != TYPE_DYNCALL)
+	  || (GET_CODE (insn) == INSN
+	      && GET_CODE (PATTERN (insn)) != SEQUENCE
+	      && GET_CODE (PATTERN (insn)) != USE
+	      && GET_CODE (PATTERN (insn)) != CLOBBER
+	      && get_attr_type (insn) == TYPE_MILLI)))
+    return 1;
+
+  return 0;
+}
+
+/* We use this hook to perform a PA specific optimization which is difficult
+   to do in earlier passes.
+
+   We want the delay slots of branches within jump tables to be filled.
+   None of the compiler passes at the moment even has the notion that a
+   PA jump table doesn't contain addresses, but instead contains actual
+   instructions!
+
+   Because we actually jump into the table, the addresses of each entry
+   must stay constant in relation to the beginning of the table (which
+   itself must stay constant relative to the instruction to jump into
+   it).  I don't believe we can guarantee earlier passes of the compiler
+   will adhere to those rules.
+
+   So, late in the compilation process we find all the jump tables, and
+   expand them into real code -- e.g. each entry in the jump table vector
+   will get an appropriate label followed by a jump to the final target.
+
+   Reorg and the final jump pass can then optimize these branches and
+   fill their delay slots.  We end up with smaller, more efficient code.
+
+   The jump instructions within the table are special; we must be able
+   to identify them during assembly output (if the jumps don't get filled
+   we need to emit a nop rather than nullifying the delay slot)).  We
+   identify jumps in switch tables by using insns with the attribute
+   type TYPE_BTABLE_BRANCH.
+
+   We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
+   insns.  This serves two purposes, first it prevents jump.c from
+   noticing that the last N entries in the table jump to the instruction
+   immediately after the table and deleting the jumps.  Second, those
+   insns mark where we should emit .begin_brtab and .end_brtab directives
+   when using GAS (allows for better link time optimizations).  */
+
+static void
+pa_reorg (void)
+{
+  rtx insn;
+
+  remove_useless_addtr_insns (1);
+
+  if (pa_cpu < PROCESSOR_8000)
+    pa_combine_instructions ();
+
+
+  /* This is fairly cheap, so always run it if optimizing.  */
+  if (optimize > 0 && !TARGET_BIG_SWITCH)
+    {
+      /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  rtx pattern, tmp, location, label;
+	  unsigned int length, i;
+
+	  /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
+	  if (GET_CODE (insn) != JUMP_INSN
+	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
+		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
+	    continue;
+
+	  /* Emit marker for the beginning of the branch table.  */
+	  emit_insn_before (gen_begin_brtab (), insn);
+
+	  pattern = PATTERN (insn);
+	  location = PREV_INSN (insn);
+          length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
+
+	  for (i = 0; i < length; i++)
+	    {
+	      /* Emit a label before each jump to keep jump.c from
+		 removing this code.  */
+	      tmp = gen_label_rtx ();
+	      LABEL_NUSES (tmp) = 1;
+	      emit_label_after (tmp, location);
+	      location = NEXT_INSN (location);
+
+	      if (GET_CODE (pattern) == ADDR_VEC)
+		label = XEXP (XVECEXP (pattern, 0, i), 0);
+	      else
+		label = XEXP (XVECEXP (pattern, 1, i), 0);
+
+	      tmp = gen_short_jump (label);
+
+	      /* Emit the jump itself.  */
+	      tmp = emit_jump_insn_after (tmp, location);
+	      JUMP_LABEL (tmp) = label;
+	      LABEL_NUSES (label)++;
+	      location = NEXT_INSN (location);
+
+	      /* Emit a BARRIER after the jump.  */
+	      emit_barrier_after (location);
+	      location = NEXT_INSN (location);
+	    }
+
+	  /* Emit marker for the end of the branch table.  */
+	  emit_insn_before (gen_end_brtab (), location);
+	  location = NEXT_INSN (location);
+	  emit_barrier_after (location);
+
+	  /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
+	  delete_insn (insn);
+	}
+    }
+  else
+    {
+      /* Still need brtab marker insns.  FIXME: the presence of these
+	 markers disables output of the branch table to readonly memory,
+	 and any alignment directives that might be needed.  Possibly,
+	 the begin_brtab insn should be output before the label for the
+	 table.  This doesn't matter at the moment since the tables are
+	 always output in the text section.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  /* Find an ADDR_VEC insn.  */
+	  if (GET_CODE (insn) != JUMP_INSN
+	      || (GET_CODE (PATTERN (insn)) != ADDR_VEC
+		  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
+	    continue;
+
+	  /* Now generate markers for the beginning and end of the
+	     branch table.  */
+	  emit_insn_before (gen_begin_brtab (), insn);
+	  emit_insn_after (gen_end_brtab (), insn);
+	}
+    }
+}
+
+/* The PA has a number of odd instructions which can perform multiple
+   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
+   it may be profitable to combine two instructions into one instruction
+   with two outputs.  It's not profitable PA2.0 machines because the
+   two outputs would take two slots in the reorder buffers.
+
+   This routine finds instructions which can be combined and combines
+   them.  We only support some of the potential combinations, and we
+   only try common ways to find suitable instructions.
+
+      * addb can add two registers or a register and a small integer
+      and jump to a nearby (+-8k) location.  Normally the jump to the
+      nearby location is conditional on the result of the add, but by
+      using the "true" condition we can make the jump unconditional.
+      Thus addb can perform two independent operations in one insn.
+
+      * movb is similar to addb in that it can perform a reg->reg
+      or small immediate->reg copy and jump to a nearby (+-8k location).
+
+      * fmpyadd and fmpysub can perform a FP multiply and either an
+      FP add or FP sub if the operands of the multiply and add/sub are
+      independent (there are other minor restrictions).  Note both
+      the fmpy and fadd/fsub can in theory move to better spots according
+      to data dependencies, but for now we require the fmpy stay at a
+      fixed location.
+
+      * Many of the memory operations can perform pre & post updates
+      of index registers.  GCC's pre/post increment/decrement addressing
+      is far too simple to take advantage of all the possibilities.  This
+      pass may not be suitable since those insns may not be independent.
+
+      * comclr can compare two ints or an int and a register, nullify
+      the following instruction and zero some other register.  This
+      is more difficult to use as it's harder to find an insn which
+      will generate a comclr than finding something like an unconditional
+      branch.  (conditional moves & long branches create comclr insns).
+
+      * Most arithmetic operations can conditionally skip the next
+      instruction.  They can be viewed as "perform this operation
+      and conditionally jump to this nearby location" (where nearby
+      is an insns away).  These are difficult to use due to the
+      branch length restrictions.  */
+
+static void
+pa_combine_instructions (void)
+{
+  rtx anchor, new_rtx;
+
+  /* This can get expensive since the basic algorithm is on the
+     order of O(n^2) (or worse).  Only do it for -O2 or higher
+     levels of optimization.  */
+  if (optimize < 2)
+    return;
+
+  /* Walk down the list of insns looking for "anchor" insns which
+     may be combined with "floating" insns.  As the name implies,
+     "anchor" instructions don't move, while "floating" insns may
+     move around.  */
+  new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
+  new_rtx = make_insn_raw (new_rtx);
+
+  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
+    {
+      enum attr_pa_combine_type anchor_attr;
+      enum attr_pa_combine_type floater_attr;
+
+      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
+	 Also ignore any special USE insns.  */
+      if ((GET_CODE (anchor) != INSN
+	  && GET_CODE (anchor) != JUMP_INSN
+	  && GET_CODE (anchor) != CALL_INSN)
+	  || GET_CODE (PATTERN (anchor)) == USE
+	  || GET_CODE (PATTERN (anchor)) == CLOBBER
+	  || GET_CODE (PATTERN (anchor)) == ADDR_VEC
+	  || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
+	continue;
+
+      anchor_attr = get_attr_pa_combine_type (anchor);
+      /* See if anchor is an insn suitable for combination.  */
+      if (anchor_attr == PA_COMBINE_TYPE_FMPY
+	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
+	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+	      && ! forward_branch_p (anchor)))
+	{
+	  rtx floater;
+
+	  for (floater = PREV_INSN (anchor);
+	       floater;
+	       floater = PREV_INSN (floater))
+	    {
+	      if (GET_CODE (floater) == NOTE
+		  || (GET_CODE (floater) == INSN
+		      && (GET_CODE (PATTERN (floater)) == USE
+			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
+		continue;
+
+	      /* Anything except a regular INSN will stop our search.  */
+	      if (GET_CODE (floater) != INSN
+		  || GET_CODE (PATTERN (floater)) == ADDR_VEC
+		  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
+		{
+		  floater = NULL_RTX;
+		  break;
+		}
+
+	      /* See if FLOATER is suitable for combination with the
+		 anchor.  */
+	      floater_attr = get_attr_pa_combine_type (floater);
+	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+		      && floater_attr == PA_COMBINE_TYPE_FMPY))
+		{
+		  /* If ANCHOR and FLOATER can be combined, then we're
+		     done with this pass.  */
+		  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
+					SET_DEST (PATTERN (floater)),
+					XEXP (SET_SRC (PATTERN (floater)), 0),
+					XEXP (SET_SRC (PATTERN (floater)), 1)))
+		    break;
+		}
+
+	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
+		{
+		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
+		    {
+		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
+					    SET_DEST (PATTERN (floater)),
+					XEXP (SET_SRC (PATTERN (floater)), 0),
+					XEXP (SET_SRC (PATTERN (floater)), 1)))
+			break;
+		    }
+		  else
+		    {
+		      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
+					    SET_DEST (PATTERN (floater)),
+					    SET_SRC (PATTERN (floater)),
+					    SET_SRC (PATTERN (floater))))
+			break;
+		    }
+		}
+	    }
+
+	  /* If we didn't find anything on the backwards scan try forwards.  */
+	  if (!floater
+	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
+		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
+	    {
+	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
+		{
+		  if (GET_CODE (floater) == NOTE
+		      || (GET_CODE (floater) == INSN
+			  && (GET_CODE (PATTERN (floater)) == USE
+			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
+
+		    continue;
+
+		  /* Anything except a regular INSN will stop our search.  */
+		  if (GET_CODE (floater) != INSN
+		      || GET_CODE (PATTERN (floater)) == ADDR_VEC
+		      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
+		    {
+		      floater = NULL_RTX;
+		      break;
+		    }
+
+		  /* See if FLOATER is suitable for combination with the
+		     anchor.  */
+		  floater_attr = get_attr_pa_combine_type (floater);
+		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+			  && floater_attr == PA_COMBINE_TYPE_FMPY))
+		    {
+		      /* If ANCHOR and FLOATER can be combined, then we're
+			 done with this pass.  */
+		      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
+					    SET_DEST (PATTERN (floater)),
+					    XEXP (SET_SRC (PATTERN (floater)),
+						  0),
+					    XEXP (SET_SRC (PATTERN (floater)),
+						  1)))
+			break;
+		    }
+		}
+	    }
+
+	  /* FLOATER will be nonzero if we found a suitable floating
+	     insn for combination with ANCHOR.  */
+	  if (floater
+	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
+	    {
+	      /* Emit the new instruction and delete the old anchor.  */
+	      emit_insn_before (gen_rtx_PARALLEL
+				(VOIDmode,
+				 gen_rtvec (2, PATTERN (anchor),
+					    PATTERN (floater))),
+				anchor);
+
+	      SET_INSN_DELETED (anchor);
+
+	      /* Emit a special USE insn for FLOATER, then delete
+		 the floating insn.  */
+	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
+	      delete_insn (floater);
+
+	      continue;
+	    }
+	  else if (floater
+		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
+	    {
+	      rtx temp;
+	      /* Emit the new_jump instruction and delete the old anchor.  */
+	      temp
+		= emit_jump_insn_before (gen_rtx_PARALLEL
+					 (VOIDmode,
+					  gen_rtvec (2, PATTERN (anchor),
+						     PATTERN (floater))),
+					 anchor);
+
+	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
+	      SET_INSN_DELETED (anchor);
+
+	      /* Emit a special USE insn for FLOATER, then delete
+		 the floating insn.  */
+	      emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
+	      delete_insn (floater);
+	      continue;
+	    }
+	}
+    }
+}
+
+static int
+pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
+		  rtx src1, rtx src2)
+{
+  int insn_code_number;
+  rtx start, end;
+
+  /* Create a PARALLEL with the patterns of ANCHOR and
+     FLOATER, try to recognize it, then test constraints
+     for the resulting pattern.
+
+     If the pattern doesn't match or the constraints
+     aren't met keep searching for a suitable floater
+     insn.  */
+  XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
+  XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
+  INSN_CODE (new_rtx) = -1;
+  insn_code_number = recog_memoized (new_rtx);
+  if (insn_code_number < 0
+      || (extract_insn (new_rtx), ! constrain_operands (1)))
+    return 0;
+
+  if (reversed)
+    {
+      start = anchor;
+      end = floater;
+    }
+  else
+    {
+      start = floater;
+      end = anchor;
+    }
+
+  /* There's up to three operands to consider.  One
+     output and two inputs.
+
+     The output must not be used between FLOATER & ANCHOR
+     exclusive.  The inputs must not be set between
+     FLOATER and ANCHOR exclusive.  */
+
+  if (reg_used_between_p (dest, start, end))
+    return 0;
+
+  if (reg_set_between_p (src1, start, end))
+    return 0;
+
+  if (reg_set_between_p (src2, start, end))
+    return 0;
+
+  /* If we get here, then everything is good.  */
+  return 1;
+}
+
+/* Return nonzero if references for INSN are delayed.
+
+   Millicode insns are actually function calls with some special
+   constraints on arguments and register usage.
+
+   Millicode calls always expect their arguments in the integer argument
+   registers, and always return their result in %r29 (ret1).  They
+   are expected to clobber their arguments, %r1, %r29, and the return
+   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
+
+   This function tells reorg that the references to arguments and
+   millicode calls do not appear to happen until after the millicode call.
+   This allows reorg to put insns which set the argument registers into the
+   delay slot of the millicode call -- thus they act more like traditional
+   CALL_INSNs.
+
+   Note we cannot consider side effects of the insn to be delayed because
+   the branch and link insn will clobber the return pointer.  If we happened
+   to use the return pointer in the delay slot of the call, then we lose.
+
+   get_attr_type will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+int
+insn_refs_are_delayed (rtx insn)
+{
+  return ((GET_CODE (insn) == INSN
+	   && GET_CODE (PATTERN (insn)) != SEQUENCE
+	   && GET_CODE (PATTERN (insn)) != USE
+	   && GET_CODE (PATTERN (insn)) != CLOBBER
+	   && get_attr_type (insn) == TYPE_MILLI));
+}
+
+/* Promote the return value, but not the arguments.  */
+
+static enum machine_mode
+pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                          enum machine_mode mode,
+                          int *punsignedp ATTRIBUTE_UNUSED,
+                          const_tree fntype ATTRIBUTE_UNUSED,
+                          int for_return)
+{
+  if (for_return == 0)
+    return mode;
+  return promote_mode (type, mode, punsignedp);
+}
+
+/* On the HP-PA the value is found in register(s) 28(-29), unless
+   the mode is SF or DF. Then the value is returned in fr4 (32).
+
+   This must perform the same promotions as PROMOTE_MODE, else promoting
+   return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
+
+   Small structures must be returned in a PARALLEL on PA64 in order
+   to match the HP Compiler ABI.  */
+
+static rtx
+pa_function_value (const_tree valtype, 
+                   const_tree func ATTRIBUTE_UNUSED, 
+                   bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode valmode;
+
+  if (AGGREGATE_TYPE_P (valtype)
+      || TREE_CODE (valtype) == COMPLEX_TYPE
+      || TREE_CODE (valtype) == VECTOR_TYPE)
+    {
+      if (TARGET_64BIT)
+	{
+          /* Aggregates with a size less than or equal to 128 bits are
+	     returned in GR 28(-29).  They are left justified.  The pad
+	     bits are undefined.  Larger aggregates are returned in
+	     memory.  */
+	  rtx loc[2];
+	  int i, offset = 0;
+	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
+
+	  for (i = 0; i < ub; i++)
+	    {
+	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode, 28 + i),
+					  GEN_INT (offset));
+	      offset += 8;
+	    }
+
+	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
+	}
+      else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
+	{
+	  /* Aggregates 5 to 8 bytes in size are returned in general
+	     registers r28-r29 in the same manner as other non
+	     floating-point objects.  The data is right-justified and
+	     zero-extended to 64 bits.  This is opposite to the normal
+	     justification used on big endian targets and requires
+	     special treatment.  */
+	  rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
+				       gen_rtx_REG (DImode, 28), const0_rtx);
+	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
+	}
+    }
+
+  if ((INTEGRAL_TYPE_P (valtype)
+       && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
+      || POINTER_TYPE_P (valtype))
+    valmode = word_mode;
+  else
+    valmode = TYPE_MODE (valtype);
+
+  if (TREE_CODE (valtype) == REAL_TYPE
+      && !AGGREGATE_TYPE_P (valtype)
+      && TYPE_MODE (valtype) != TFmode
+      && !TARGET_SOFT_FLOAT)
+    return gen_rtx_REG (valmode, 32);
+
+  return gen_rtx_REG (valmode, 28);
+}
+
+/* Implement the TARGET_LIBCALL_VALUE hook.  */
+
+static rtx
+pa_libcall_value (enum machine_mode mode,
+		  const_rtx fun ATTRIBUTE_UNUSED)
+{
+  if (! TARGET_SOFT_FLOAT
+      && (mode == SFmode || mode == DFmode))
+    return  gen_rtx_REG (mode, 32);
+  else
+    return  gen_rtx_REG (mode, 28);
+}
+
+/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook.  */
+
+static bool
+pa_function_value_regno_p (const unsigned int regno)
+{
+  if (regno == 28
+      || (! TARGET_SOFT_FLOAT &&  regno == 32))
+    return true;
+
+  return false;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int arg_size = FUNCTION_ARG_SIZE (mode, type);
+
+  cum->nargs_prototype--;
+  cum->words += (arg_size
+		 + ((cum->words & 01)
+		    && type != NULL_TREE
+		    && arg_size > 1));
+}
+
+/* Return the location of a parameter that is passed in a register or NULL
+   if the parameter has any component that is passed in memory.
+
+   This is new code and will be pushed to into the net sources after
+   further testing.
+
+   ??? We might want to restructure this so that it looks more like other
+   ports.  */
+static rtx
+pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int max_arg_words = (TARGET_64BIT ? 8 : 4);
+  int alignment = 0;
+  int arg_size;
+  int fpr_reg_base;
+  int gpr_reg_base;
+  rtx retval;
+
+  if (mode == VOIDmode)
+    return NULL_RTX;
+
+  arg_size = FUNCTION_ARG_SIZE (mode, type);
+
+  /* If this arg would be passed partially or totally on the stack, then
+     this routine should return zero.  pa_arg_partial_bytes will
+     handle arguments which are split between regs and stack slots if
+     the ABI mandates split arguments.  */
+  if (!TARGET_64BIT)
+    {
+      /* The 32-bit ABI does not split arguments.  */
+      if (cum->words + arg_size > max_arg_words)
+	return NULL_RTX;
+    }
+  else
+    {
+      if (arg_size > 1)
+	alignment = cum->words & 1;
+      if (cum->words + alignment >= max_arg_words)
+	return NULL_RTX;
+    }
+
+  /* The 32bit ABIs and the 64bit ABIs are rather different,
+     particularly in their handling of FP registers.  We might
+     be able to cleverly share code between them, but I'm not
+     going to bother in the hope that splitting them up results
+     in code that is more easily understood.  */
+
+  if (TARGET_64BIT)
+    {
+      /* Advance the base registers to their current locations.
+
+         Remember, gprs grow towards smaller register numbers while
+	 fprs grow to higher register numbers.  Also remember that
+	 although FP regs are 32-bit addressable, we pretend that
+	 the registers are 64-bits wide.  */
+      gpr_reg_base = 26 - cum->words;
+      fpr_reg_base = 32 + cum->words;
+
+      /* Arguments wider than one word and small aggregates need special
+	 treatment.  */
+      if (arg_size > 1
+	  || mode == BLKmode
+	  || (type && (AGGREGATE_TYPE_P (type)
+		       || TREE_CODE (type) == COMPLEX_TYPE
+		       || TREE_CODE (type) == VECTOR_TYPE)))
+	{
+	  /* Double-extended precision (80-bit), quad-precision (128-bit)
+	     and aggregates including complex numbers are aligned on
+	     128-bit boundaries.  The first eight 64-bit argument slots
+	     are associated one-to-one, with general registers r26
+	     through r19, and also with floating-point registers fr4
+	     through fr11.  Arguments larger than one word are always
+	     passed in general registers.
+
+	     Using a PARALLEL with a word mode register results in left
+	     justified data on a big-endian target.  */
+
+	  rtx loc[8];
+	  int i, offset = 0, ub = arg_size;
+
+	  /* Align the base register.  */
+	  gpr_reg_base -= alignment;
+
+	  ub = MIN (ub, max_arg_words - cum->words - alignment);
+	  for (i = 0; i < ub; i++)
+	    {
+	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+					  gen_rtx_REG (DImode, gpr_reg_base),
+					  GEN_INT (offset));
+	      gpr_reg_base -= 1;
+	      offset += 8;
+	    }
+
+	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
+	}
+     }
+  else
+    {
+      /* If the argument is larger than a word, then we know precisely
+	 which registers we must use.  */
+      if (arg_size > 1)
+	{
+	  if (cum->words)
+	    {
+	      gpr_reg_base = 23;
+	      fpr_reg_base = 38;
+	    }
+	  else
+	    {
+	      gpr_reg_base = 25;
+	      fpr_reg_base = 34;
+	    }
+
+	  /* Structures 5 to 8 bytes in size are passed in the general
+	     registers in the same manner as other non floating-point
+	     objects.  The data is right-justified and zero-extended
+	     to 64 bits.  This is opposite to the normal justification
+	     used on big endian targets and requires special treatment.
+	     We now define BLOCK_REG_PADDING to pad these objects.
+	     Aggregates, complex and vector types are passed in the same
+	     manner as structures.  */
+	  if (mode == BLKmode
+	      || (type && (AGGREGATE_TYPE_P (type)
+			   || TREE_CODE (type) == COMPLEX_TYPE
+			   || TREE_CODE (type) == VECTOR_TYPE)))
+	    {
+	      rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
+					   gen_rtx_REG (DImode, gpr_reg_base),
+					   const0_rtx);
+	      return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
+	    }
+	}
+      else
+        {
+	   /* We have a single word (32 bits).  A simple computation
+	      will get us the register #s we need.  */
+	   gpr_reg_base = 26 - cum->words;
+	   fpr_reg_base = 32 + 2 * cum->words;
+	}
+    }
+
+  /* Determine if the argument needs to be passed in both general and
+     floating point registers.  */
+  if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
+       /* If we are doing soft-float with portable runtime, then there
+	  is no need to worry about FP regs.  */
+       && !TARGET_SOFT_FLOAT
+       /* The parameter must be some kind of scalar float, else we just
+	  pass it in integer registers.  */
+       && GET_MODE_CLASS (mode) == MODE_FLOAT
+       /* The target function must not have a prototype.  */
+       && cum->nargs_prototype <= 0
+       /* libcalls do not need to pass items in both FP and general
+	  registers.  */
+       && type != NULL_TREE
+       /* All this hair applies to "outgoing" args only.  This includes
+	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
+       && !cum->incoming)
+      /* Also pass outgoing floating arguments in both registers in indirect
+	 calls with the 32 bit ABI and the HP assembler since there is no
+	 way to the specify argument locations in static functions.  */
+      || (!TARGET_64BIT
+	  && !TARGET_GAS
+	  && !cum->incoming
+	  && cum->indirect
+	  && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    {
+      retval
+	= gen_rtx_PARALLEL
+	    (mode,
+	     gen_rtvec (2,
+			gen_rtx_EXPR_LIST (VOIDmode,
+					   gen_rtx_REG (mode, fpr_reg_base),
+					   const0_rtx),
+			gen_rtx_EXPR_LIST (VOIDmode,
+					   gen_rtx_REG (mode, gpr_reg_base),
+					   const0_rtx)));
+    }
+  else
+    {
+      /* See if we should pass this parameter in a general register.  */
+      if (TARGET_SOFT_FLOAT
+	  /* Indirect calls in the normal 32bit ABI require all arguments
+	     to be passed in general registers.  */
+	  || (!TARGET_PORTABLE_RUNTIME
+	      && !TARGET_64BIT
+	      && !TARGET_ELF32
+	      && cum->indirect)
+	  /* If the parameter is not a scalar floating-point parameter,
+	     then it belongs in GPRs.  */
+	  || GET_MODE_CLASS (mode) != MODE_FLOAT
+	  /* Structure with single SFmode field belongs in GPR.  */
+	  || (type && AGGREGATE_TYPE_P (type)))
+	retval = gen_rtx_REG (mode, gpr_reg_base);
+      else
+	retval = gen_rtx_REG (mode, fpr_reg_base);
+    }
+  return retval;
+}
+
+/* Arguments larger than one word are double word aligned.  */
+
+static unsigned int
+pa_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  bool singleword = (type
+		     ? (integer_zerop (TYPE_SIZE (type))
+			|| !TREE_CONSTANT (TYPE_SIZE (type))
+			|| int_size_in_bytes (type) <= UNITS_PER_WORD)
+		     : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
+
+  return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
+}
+
+/* If this arg would be passed totally in registers or totally on the stack,
+   then this routine should return zero.  */
+
+static int
+pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		      tree type, bool named ATTRIBUTE_UNUSED)
+{
+  unsigned int max_arg_words = 8;
+  unsigned int offset = 0;
+
+  if (!TARGET_64BIT)
+    return 0;
+
+  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
+    offset = 1;
+
+  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
+    /* Arg fits fully into registers.  */
+    return 0;
+  else if (cum->words + offset >= max_arg_words)
+    /* Arg fully on the stack.  */
+    return 0;
+  else
+    /* Arg is split.  */
+    return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
+}
+
+
+/* A get_unnamed_section callback for switching to the text section.
+
+   This function is only used with SOM.  Because we don't support
+   named subspaces, we can only create a new subspace or switch back
+   to the default text subspace.  */
+
+static void
+som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  gcc_assert (TARGET_SOM);
+  if (TARGET_GAS)
+    {
+      if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
+	{
+	  /* We only want to emit a .nsubspa directive once at the
+	     start of the function.  */
+	  cfun->machine->in_nsubspa = 1;
+
+	  /* Create a new subspace for the text.  This provides
+	     better stub placement and one-only functions.  */
+	  if (cfun->decl
+	      && DECL_ONE_ONLY (cfun->decl)
+	      && !DECL_WEAK (cfun->decl))
+	    {
+	      output_section_asm_op ("\t.SPACE $TEXT$\n"
+				     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
+				     "ACCESS=44,SORT=24,COMDAT");
+	      return;
+	    }
+	}
+      else
+	{
+	  /* There isn't a current function or the body of the current
+	     function has been completed.  So, we are changing to the
+	     text section to output debugging information.  Thus, we
+	     need to forget that we are in the text section so that
+	     varasm.c will call us when text_section is selected again.  */
+	  gcc_assert (!cfun || !cfun->machine
+		      || cfun->machine->in_nsubspa == 2);
+	  in_section = NULL;
+	}
+      output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
+      return;
+    }
+  output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
+}
+
+/* A get_unnamed_section callback for switching to comdat data
+   sections.  This function is only used with SOM.  */
+
+static void
+som_output_comdat_data_section_asm_op (const void *data)
+{
+  in_section = NULL;
+  output_section_asm_op (data);
+}
+
+/* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
+
+static void
+pa_som_asm_init_sections (void)
+{
+  text_section
+    = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
+
+  /* SOM puts readonly data in the default $LIT$ subspace when PIC code
+     is not being generated.  */
+  som_readonly_data_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
+
+  /* When secondary definitions are not supported, SOM makes readonly
+     data one-only by creating a new $LIT$ subspace in $TEXT$ with
+     the comdat flag.  */
+  som_one_only_readonly_data_section
+    = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
+			   "\t.SPACE $TEXT$\n"
+			   "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
+			   "ACCESS=0x2c,SORT=16,COMDAT");
+
+
+  /* When secondary definitions are not supported, SOM makes data one-only
+     by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
+  som_one_only_data_section
+    = get_unnamed_section (SECTION_WRITE,
+			   som_output_comdat_data_section_asm_op,
+			   "\t.SPACE $PRIVATE$\n"
+			   "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
+			   "ACCESS=31,SORT=24,COMDAT");
+
+  /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
+     which reference data within the $TEXT$ space (for example constant
+     strings in the $LIT$ subspace).
+
+     The assemblers (GAS and HP as) both have problems with handling
+     the difference of two symbols which is the other correct way to
+     reference constant data during PIC code generation.
+
+     So, there's no way to reference constant data which is in the
+     $TEXT$ space during PIC generation.  Instead place all constant
+     data into the $PRIVATE$ subspace (this reduces sharing, but it
+     works correctly).  */
+  readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
+
+  /* We must not have a reference to an external symbol defined in a
+     shared library in a readonly section, else the SOM linker will
+     complain.
+
+     So, we force exception information into the data section.  */
+  exception_section = data_section;
+}
+
+/* On hpux10, the linker will give an error if we have a reference
+   in the read-only data section to a symbol defined in a shared
+   library.  Therefore, expressions that might require a reloc can
+   not be placed in the read-only data section.  */
+
+static section *
+pa_select_section (tree exp, int reloc,
+		   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (TREE_CODE (exp) == VAR_DECL
+      && TREE_READONLY (exp)
+      && !TREE_THIS_VOLATILE (exp)
+      && DECL_INITIAL (exp)
+      && (DECL_INITIAL (exp) == error_mark_node
+          || TREE_CONSTANT (DECL_INITIAL (exp)))
+      && !reloc)
+    {
+      if (TARGET_SOM
+	  && DECL_ONE_ONLY (exp)
+	  && !DECL_WEAK (exp))
+	return som_one_only_readonly_data_section;
+      else
+	return readonly_data_section;
+    }
+  else if (CONSTANT_CLASS_P (exp) && !reloc)
+    return readonly_data_section;
+  else if (TARGET_SOM
+	   && TREE_CODE (exp) == VAR_DECL
+	   && DECL_ONE_ONLY (exp)
+	   && !DECL_WEAK (exp))
+    return som_one_only_data_section;
+  else
+    return data_section;
+}
+
+static void
+pa_globalize_label (FILE *stream, const char *name)
+{
+  /* We only handle DATA objects here, functions are globalized in
+     ASM_DECLARE_FUNCTION_NAME.  */
+  if (! FUNCTION_NAME_P (name))
+  {
+    fputs ("\t.EXPORT ", stream);
+    assemble_name (stream, name);
+    fputs (",DATA\n", stream);
+  }
+}
+
+/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+
+static rtx
+pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		     int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+bool
+pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* SOM ABI says that objects larger than 64 bits are returned in memory.
+     PA64 ABI says that objects larger than 128 bits are returned in memory.
+     Note, int_size_in_bytes can return -1 if the size of the object is
+     variable or larger than the maximum value that can be expressed as
+     a HOST_WIDE_INT.   It can also return zero for an empty type.  The
+     simplest way to handle variable and empty types is to pass them in
+     memory.  This avoids problems in defining the boundaries of argument
+     slots, allocating registers, etc.  */
+  return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
+	  || int_size_in_bytes (type) <= 0);
+}
+
+/* Structure to hold declaration and name of external symbols that are
+   emitted by GCC.  We generate a vector of these symbols and output them
+   at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
+   This avoids putting out names that are never really used.  */
+
+typedef struct GTY(()) extern_symbol
+{
+  tree decl;
+  const char *name;
+} extern_symbol;
+
+/* Define gc'd vector type for extern_symbol.  */
+DEF_VEC_O(extern_symbol);
+DEF_VEC_ALLOC_O(extern_symbol,gc);
+
+/* Vector of extern_symbol pointers.  */
+static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
+
+#ifdef ASM_OUTPUT_EXTERNAL_REAL
+/* Mark DECL (name NAME) as an external reference (assembler output
+   file FILE).  This saves the names to output at the end of the file
+   if actually referenced.  */
+
+void
+pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
+{
+  extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
+
+  gcc_assert (file == asm_out_file);
+  p->decl = decl;
+  p->name = name;
+}
+
+/* Output text required at the end of an assembler file.
+   This includes deferred plabels and .import directives for
+   all external symbols that were actually referenced.  */
+
+static void
+pa_hpux_file_end (void)
+{
+  unsigned int i;
+  extern_symbol *p;
+
+  if (!NO_DEFERRED_PROFILE_COUNTERS)
+    output_deferred_profile_counters ();
+
+  output_deferred_plabels ();
+
+  for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
+    {
+      tree decl = p->decl;
+
+      if (!TREE_ASM_WRITTEN (decl)
+	  && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
+	ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
+    }
+
+  VEC_free (extern_symbol, gc, extern_symbols);
+}
+#endif
+
+/* Return true if a change from mode FROM to mode TO for a register
+   in register class RCLASS is invalid.  */
+
+bool
+pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			     enum reg_class rclass)
+{
+  if (from == to)
+    return false;
+
+  /* Reject changes to/from complex and vector modes.  */
+  if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
+      || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
+    return true;
+      
+  if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
+    return false;
+
+  /* There is no way to load QImode or HImode values directly from
+     memory.  SImode loads to the FP registers are not zero extended.
+     On the 64-bit target, this conflicts with the definition of
+     LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
+     with different sizes in the floating-point registers.  */
+  if (MAYBE_FP_REG_CLASS_P (rclass))
+    return true;
+
+  /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
+     in specific sets of registers.  Thus, we cannot allow changing
+     to a larger mode when it's larger than a word.  */
+  if (GET_MODE_SIZE (to) > UNITS_PER_WORD
+      && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
+    return true;
+
+  return false;
+}
+
+/* Returns TRUE if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be FALSE for correct output.
+   
+   We should return FALSE for QImode and HImode because these modes
+   are not ok in the floating-point registers.  However, this prevents
+   tieing these modes to SImode and DImode in the general registers.
+   So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
+   CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
+   in the floating-point registers.  */
+
+bool
+pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+  /* Don't tie modes in different classes.  */
+  if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
+    return false;
+
+  return true;
+}
+
+
+/* Length in units of the trampoline instruction code.  */
+
+#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
+
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.\
+
+   The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
+   and then branches to the specified routine.
+
+   This code template is copied from text segment to stack location
+   and then patched with pa_trampoline_init to contain valid values,
+   and then entered as a subroutine.
+
+   It is best to keep this as small as possible to avoid having to
+   flush multiple lines in the cache.  */
+
+static void
+pa_asm_trampoline_template (FILE *f)
+{
+  if (!TARGET_64BIT)
+    {
+      fputs ("\tldw	36(%r22),%r21\n", f);
+      fputs ("\tbb,>=,n	%r21,30,.+16\n", f);
+      if (ASSEMBLER_DIALECT == 0)
+	fputs ("\tdepi	0,31,2,%r21\n", f);
+      else
+	fputs ("\tdepwi	0,31,2,%r21\n", f);
+      fputs ("\tldw	4(%r21),%r19\n", f);
+      fputs ("\tldw	0(%r21),%r21\n", f);
+      if (TARGET_PA_20)
+	{
+	  fputs ("\tbve	(%r21)\n", f);
+	  fputs ("\tldw	40(%r22),%r29\n", f);
+	  fputs ("\t.word	0\n", f);
+	  fputs ("\t.word	0\n", f);
+	}
+      else
+	{
+	  fputs ("\tldsid	(%r21),%r1\n", f);
+	  fputs ("\tmtsp	%r1,%sr0\n", f);
+	  fputs ("\tbe	0(%sr0,%r21)\n", f);
+	  fputs ("\tldw	40(%r22),%r29\n", f);
+	}
+      fputs ("\t.word	0\n", f);
+      fputs ("\t.word	0\n", f);
+      fputs ("\t.word	0\n", f);
+      fputs ("\t.word	0\n", f);
+    }
+  else
+    {
+      fputs ("\t.dword 0\n", f);
+      fputs ("\t.dword 0\n", f);
+      fputs ("\t.dword 0\n", f);
+      fputs ("\t.dword 0\n", f);
+      fputs ("\tmfia	%r31\n", f);
+      fputs ("\tldd	24(%r31),%r1\n", f);
+      fputs ("\tldd	24(%r1),%r27\n", f);
+      fputs ("\tldd	16(%r1),%r1\n", f);
+      fputs ("\tbve	(%r1)\n", f);
+      fputs ("\tldd	32(%r31),%r31\n", f);
+      fputs ("\t.dword 0  ; fptr\n", f);
+      fputs ("\t.dword 0  ; static link\n", f);
+    }
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.
+
+   Move the function address to the trampoline template at offset 36.
+   Move the static chain value to trampoline template at offset 40.
+   Move the trampoline address to trampoline template at offset 44.
+   Move r19 to trampoline template at offset 48.  The latter two
+   words create a plabel for the indirect call to the trampoline.
+
+   A similar sequence is used for the 64-bit port but the plabel is
+   at the beginning of the trampoline.
+
+   Finally, the cache entries for the trampoline code are flushed.
+   This is necessary to ensure that the trampoline instruction sequence
+   is written to memory prior to any attempts at prefetching the code
+   sequence.  */
+
+static void
+pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx start_addr = gen_reg_rtx (Pmode);
+  rtx end_addr = gen_reg_rtx (Pmode);
+  rtx line_length = gen_reg_rtx (Pmode);
+  rtx r_tramp, tmp;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+  r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
+
+  if (!TARGET_64BIT)
+    {
+      tmp = adjust_address (m_tramp, Pmode, 36);
+      emit_move_insn (tmp, fnaddr);
+      tmp = adjust_address (m_tramp, Pmode, 40);
+      emit_move_insn (tmp, chain_value);
+
+      /* Create a fat pointer for the trampoline.  */
+      tmp = adjust_address (m_tramp, Pmode, 44);
+      emit_move_insn (tmp, r_tramp);
+      tmp = adjust_address (m_tramp, Pmode, 48);
+      emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
+
+      /* fdc and fic only use registers for the address to flush,
+	 they do not accept integer displacements.  We align the
+	 start and end addresses to the beginning of their respective
+	 cache lines to minimize the number of lines flushed.  */
+      emit_insn (gen_andsi3 (start_addr, r_tramp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
+      emit_insn (gen_andsi3 (end_addr, tmp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
+      emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
+      emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
+				    gen_reg_rtx (Pmode),
+				    gen_reg_rtx (Pmode)));
+    }
+  else
+    {
+      tmp = adjust_address (m_tramp, Pmode, 56);
+      emit_move_insn (tmp, fnaddr);
+      tmp = adjust_address (m_tramp, Pmode, 64);
+      emit_move_insn (tmp, chain_value);
+
+      /* Create a fat pointer for the trampoline.  */
+      tmp = adjust_address (m_tramp, Pmode, 16);
+      emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
+      tmp = adjust_address (m_tramp, Pmode, 24);
+      emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
+
+      /* fdc and fic only use registers for the address to flush,
+	 they do not accept integer displacements.  We align the
+	 start and end addresses to the beginning of their respective
+	 cache lines to minimize the number of lines flushed.  */
+      tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
+      emit_insn (gen_anddi3 (start_addr, tmp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
+      emit_insn (gen_anddi3 (end_addr, tmp,
+			     GEN_INT (-MIN_CACHELINE_SIZE)));
+      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
+      emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
+      emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
+				    gen_reg_rtx (Pmode),
+				    gen_reg_rtx (Pmode)));
+    }
+}
+
+/* Perform any machine-specific adjustment in the address of the trampoline.
+   ADDR contains the address that was passed to pa_trampoline_init.
+   Adjust the trampoline address to point to the plabel at offset 44.  */
+
+static rtx
+pa_trampoline_adjust_address (rtx addr)
+{
+  if (!TARGET_64BIT)
+    addr = memory_address (Pmode, plus_constant (addr, 46));
+  return addr;
+}
+
+static rtx
+pa_delegitimize_address (rtx orig_x)
+{
+  rtx x = delegitimize_mem_from_attrs (orig_x);
+
+  if (GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 1)) == UNSPEC
+      && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
+    return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
+  return x;
+}
+
+static rtx
+pa_internal_arg_pointer (void)
+{
+  /* The argument pointer and the hard frame pointer are the same in
+     the 32-bit runtime, so we don't need a copy.  */
+  if (TARGET_64BIT)
+    return copy_to_reg (virtual_incoming_args_rtx);
+  else
+    return virtual_incoming_args_rtx;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.  */
+
+static bool
+pa_can_eliminate (const int from, const int to)
+{
+  /* The argument cannot be eliminated in the 64-bit runtime.  */
+  if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
+    return false;
+
+  return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* Define the offset between two registers, FROM to be eliminated and its
+   replacement TO, at the start of a routine.  */
+HOST_WIDE_INT
+pa_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+
+  if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
+      && to == STACK_POINTER_REGNUM)
+    offset = -compute_frame_size (get_frame_size (), 0);
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    offset = 0;
+  else
+    gcc_unreachable ();
+
+  return offset;
+}
+
+static void
+pa_conditional_register_usage (void)
+{
+  int i;
+
+  if (!TARGET_64BIT && !TARGET_PA_11)
+    {
+      for (i = 56; i <= FP_REG_LAST; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+      for (i = 33; i < 56; i += 2)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
+    {
+      for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+  if (flag_pic)
+    fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+}
+
+/* Target hook for c_mode_for_suffix.  */
+
+static enum machine_mode
+pa_c_mode_for_suffix (char suffix)
+{
+  if (HPUX_LONG_DOUBLE_LIBRARY)
+    {
+      if (suffix == 'q')
+	return TFmode;
+    }
+
+  return VOIDmode;
+}
+
+/* Target hook for function_section.  */
+
+static section *
+pa_function_section (tree decl, enum node_frequency freq,
+		     bool startup, bool exit)
+{
+  /* Put functions in text section if target doesn't have named sections.  */
+  if (!targetm.have_named_sections)
+    return text_section;
+
+  /* Force nested functions into the same section as the containing
+     function.  */
+  if (decl
+      && DECL_SECTION_NAME (decl) == NULL_TREE
+      && DECL_CONTEXT (decl) != NULL_TREE
+      && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
+      && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
+    return function_section (DECL_CONTEXT (decl));
+
+  /* Otherwise, use the default function section.  */
+  return default_function_section (decl, freq, startup, exit);
+}
+
+/* Implement TARGET_SECTION_TYPE_FLAGS.  */
+
+static unsigned int
+pa_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags;
+
+  flags = default_section_type_flags (decl, name, reloc);
+
+  /* Function labels are placed in the constant pool.  This can
+     cause a section conflict if decls are put in ".data.rel.ro"
+     or ".data.rel.ro.local" using the __attribute__ construct.  */
+  if (strcmp (name, ".data.rel.ro") == 0
+      || strcmp (name, ".data.rel.ro.local") == 0)
+    flags |= SECTION_WRITE | SECTION_RELRO;
+
+  return flags;
+}
+
+#include "gt-pa.h"
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
new file mode 100644
index 000000000..f086daa36
--- /dev/null
+++ b/gcc/config/pa/pa.h
@@ -0,0 +1,1572 @@
+/* Definitions of target machine for GNU compiler, for the HP Spectrum.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support
+   and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for
+   Software Science at the University of Utah.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* For long call handling.  */
+extern unsigned long total_code_bytes;
+
+/* Which processor to schedule for.  */
+
+enum processor_type
+{
+  PROCESSOR_700,
+  PROCESSOR_7100,
+  PROCESSOR_7100LC,
+  PROCESSOR_7200,
+  PROCESSOR_7300,
+  PROCESSOR_8000
+};
+
+/* For -mschedule= option.  */
+extern enum processor_type pa_cpu;
+
+/* For -munix= option.  */
+extern int flag_pa_unix;
+
+#define pa_cpu_attr ((enum attr_cpu)pa_cpu)
+
+/* Print subsidiary information on the compiler version in use.  */
+
+#define TARGET_VERSION fputs (" (hppa)", stderr);
+
+#define TARGET_PA_10 (!TARGET_PA_11 && !TARGET_PA_20)
+
+/* Generate code for the HPPA 2.0 architecture in 64bit mode.  */
+#ifndef TARGET_64BIT
+#define TARGET_64BIT 0
+#endif
+
+/* Generate code for ELF32 ABI.  */
+#ifndef TARGET_ELF32
+#define TARGET_ELF32 0
+#endif
+
+/* Generate code for SOM 32bit ABI.  */
+#ifndef TARGET_SOM
+#define TARGET_SOM 0
+#endif
+
+/* HP-UX UNIX features.  */
+#ifndef TARGET_HPUX
+#define TARGET_HPUX 0
+#endif
+
+/* HP-UX 10.10 UNIX 95 features.  */
+#ifndef TARGET_HPUX_10_10
+#define TARGET_HPUX_10_10 0
+#endif
+
+/* HP-UX 11.* features (11.00, 11.11, 11.23, etc.)  */
+#ifndef TARGET_HPUX_11
+#define TARGET_HPUX_11 0
+#endif
+
+/* HP-UX 11i multibyte and UNIX 98 extensions.  */
+#ifndef TARGET_HPUX_11_11
+#define TARGET_HPUX_11_11 0
+#endif
+
+/* HP-UX long double library.  */
+#ifndef HPUX_LONG_DOUBLE_LIBRARY
+#define HPUX_LONG_DOUBLE_LIBRARY 0
+#endif
+
+/* The following three defines are potential target switches.  The current
+   defines are optimal given the current capabilities of GAS and GNU ld.  */
+
+/* Define to a C expression evaluating to true to use long absolute calls.
+   Currently, only the HP assembler and SOM linker support long absolute
+   calls.  They are used only in non-pic code.  */
+#define TARGET_LONG_ABS_CALL (TARGET_SOM && !TARGET_GAS)
+
+/* Define to a C expression evaluating to true to use long PIC symbol
+   difference calls.  Long PIC symbol difference calls are only used with
+   the HP assembler and linker.  The HP assembler detects this instruction
+   sequence and treats it as long pc-relative call.  Currently, GAS only
+   allows a difference of two symbols in the same subspace, and it doesn't
+   detect the sequence as a pc-relative call.  */
+#define TARGET_LONG_PIC_SDIFF_CALL (!TARGET_GAS && TARGET_HPUX)
+
+/* Define to a C expression evaluating to true to use long PIC
+   pc-relative calls.  Long PIC pc-relative calls are only used with
+   GAS.  Currently, they are usable for calls which bind local to a
+   module but not for external calls.  */
+#define TARGET_LONG_PIC_PCREL_CALL 0
+
+/* Define to a C expression evaluating to true to use SOM secondary
+   definition symbols for weak support.  Linker support for secondary
+   definition symbols is buggy prior to HP-UX 11.X.  */
+#define TARGET_SOM_SDEF 0
+
+/* Define to a C expression evaluating to true to save the entry value
+   of SP in the current frame marker.  This is normally unnecessary.
+   However, the HP-UX unwind library looks at the SAVE_SP callinfo flag.
+   HP compilers don't use this flag but it is supported by the assembler.
+   We set this flag to indicate that register %r3 has been saved at the
+   start of the frame.  Thus, when the HP unwind library is used, we
+   need to generate additional code to save SP into the frame marker.  */
+#define TARGET_HPUX_UNWIND_LIBRARY 0
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_GAS | MASK_JUMP_IN_DELAY | MASK_BIG_SWITCH)
+#endif
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_SCHED_DEFAULT
+#define TARGET_SCHED_DEFAULT PROCESSOR_8000
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-schedule is ignored if -mschedule is specified.
+   --with-arch is ignored if -march is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+  {"schedule", "%{!mschedule=*:-mschedule=%(VALUE)}" }
+
+/* Specify the dialect of assembler to use.  New mnemonics is dialect one
+   and the old mnemonics are dialect zero.  */
+#define ASSEMBLER_DIALECT (TARGET_PA_20 ? 1 : 0)
+
+/* Override some settings from dbxelf.h.  */
+
+/* We do not have to be compatible with dbx, so we enable gdb extensions
+   by default.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* This used to be zero (no max length), but big enums and such can
+   cause huge strings which killed gas.
+
+   We also have to avoid lossage in dbxout.c -- it does not compute the
+   string size accurately, so we are real conservative here.  */
+#undef DBX_CONTIN_LENGTH
+#define DBX_CONTIN_LENGTH 3000
+
+/* GDB always assumes the current function's frame begins at the value
+   of the stack pointer upon entry to the current function.  Accessing
+   local variables and parameters passed on the stack is done using the
+   base of the frame + an offset provided by GCC.
+
+   For functions which have frame pointers this method works fine;
+   the (frame pointer) == (stack pointer at function entry) and GCC provides
+   an offset relative to the frame pointer.
+
+   This loses for functions without a frame pointer; GCC provides an offset
+   which is relative to the stack pointer after adjusting for the function's
+   frame size.  GDB would prefer the offset to be relative to the value of
+   the stack pointer at the function's entry.  Yuk!  */
+#define DEBUGGER_AUTO_OFFSET(X) \
+  ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) \
+    + (frame_pointer_needed ? 0 : compute_frame_size (get_frame_size (), 0)))
+
+#define DEBUGGER_ARG_OFFSET(OFFSET, X) \
+  ((GET_CODE (X) == PLUS ? OFFSET : 0) \
+    + (frame_pointer_needed ? 0 : compute_frame_size (get_frame_size (), 0)))
+
+#define TARGET_CPU_CPP_BUILTINS()				\
+do {								\
+     builtin_assert("cpu=hppa");				\
+     builtin_assert("machine=hppa");				\
+     builtin_define("__hppa");					\
+     builtin_define("__hppa__");				\
+     if (TARGET_PA_20)						\
+       builtin_define("_PA_RISC2_0");				\
+     else if (TARGET_PA_11)					\
+       builtin_define("_PA_RISC1_1");				\
+     else							\
+       builtin_define("_PA_RISC1_0");				\
+} while (0)
+
+/* An old set of OS defines for various BSD-like systems.  */
+#define TARGET_OS_CPP_BUILTINS()				\
+  do								\
+    {								\
+	builtin_define_std ("REVARGV");				\
+	builtin_define_std ("hp800");				\
+	builtin_define_std ("hp9000");				\
+	builtin_define_std ("hp9k8");				\
+	if (!c_dialect_cxx () && !flag_iso)			\
+	  builtin_define ("hppa");				\
+	builtin_define_std ("spectrum");			\
+	builtin_define_std ("unix");				\
+	builtin_assert ("system=bsd");				\
+	builtin_assert ("system=unix");				\
+    }								\
+  while (0)
+
+#define CC1_SPEC "%{pg:} %{p:}"
+
+#define LINK_SPEC "%{mlinker-opt:-O} %{!shared:-u main} %{shared:-b}"
+
+/* We don't want -lg.  */
+#ifndef LIB_SPEC
+#define LIB_SPEC "%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}"
+#endif
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#define SIZE_TYPE "unsigned int"
+#define PTRDIFF_TYPE "int"
+#define WCHAR_TYPE "unsigned int"
+#define WCHAR_TYPE_SIZE 32
+
+/* target machine storage layout */
+typedef struct GTY(()) machine_function
+{
+  /* Flag indicating that a .NSUBSPA directive has been output for
+     this function.  */
+  int in_nsubspa;
+} machine_function;
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases, 
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  \
+  if (GET_MODE_CLASS (MODE) == MODE_INT	\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)  	\
+    (MODE) = word_mode;
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on the HP-PA.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+
+/* Minimum number of units in a word.  If this is undefined, the default
+   is UNITS_PER_WORD.  Otherwise, it is the constant value that is the
+   smallest value that UNITS_PER_WORD can have at run-time.
+
+   FIXME: This needs to be 4 when TARGET_64BIT is true to suppress the
+   building of various TImode routines in libgcc.  The HP runtime
+   specification doesn't provide the alignment requirements and calling
+   conventions for TImode variables.  */
+#define MIN_UNITS_PER_WORD 4
+
+/* The widest floating point format supported by the hardware.  Note that
+   setting this influences some Ada floating point type sizes, currently
+   required for GNAT to operate properly.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Largest alignment required for any stack parameter, in bits.
+   Don't define this if it is equal to PARM_BOUNDARY */
+#define MAX_PARM_BOUNDARY BIGGEST_ALIGNMENT
+
+/* Boundary (in *bits*) on which stack pointer is always aligned;
+   certain optimizations in combine depend on this.
+
+   The HP-UX runtime documents mandate 64-byte and 16-byte alignment for
+   the stack on the 32 and 64-bit ports, respectively.  However, we
+   are only guaranteed that the stack is aligned to BIGGEST_ALIGNMENT
+   in main.  Thus, we treat the former as the preferred alignment.  */
+#define STACK_BOUNDARY BIGGEST_ALIGNMENT
+#define PREFERRED_STACK_BOUNDARY (TARGET_64BIT ? 128 : 512)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY BITS_PER_WORD
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT (2 * BITS_PER_WORD)
+
+/* Get around hp-ux assembler bug, and make strcpy of constants fast.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)		\
+  (TREE_CODE (EXP) == STRING_CST		\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  pa_modes_tieable_p (MODE1, MODE2)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* The HP-PA pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 30
+
+/* Fixed register for local variable access.  Always eliminated.  */
+#define FRAME_POINTER_REGNUM (TARGET_64BIT ? 61 : 89)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 3
+
+/* Don't allow hard registers to be renamed into r2 unless r2
+   is already live or already being saved (due to eh).  */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  ((NEW_REG) != 2 || df_regs_ever_live_p (2) || crtl->calls_eh_return)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM (TARGET_64BIT ? 29 : 3)
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? 31 : 29)
+
+/* Register used to address the offset table for position-independent
+   data references.  */
+#define PIC_OFFSET_TABLE_REGNUM \
+  (flag_pic ? (TARGET_64BIT ? 27 : 19) : INVALID_REGNUM)
+
+#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED 1
+
+/* Function to return the rtx used to save the pic offset table register
+   across function calls.  */
+extern struct rtx_def *hppa_pic_save_rtx (void);
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+#define PA_STRUCT_VALUE_REGNUM 28
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated.  First, the frame pointer
+   register can often be eliminated in favor of the stack pointer register.
+   Secondly, the argument pointer register can always be eliminated in the
+   32-bit runtimes.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   The argument pointer cannot be eliminated in the 64-bit runtime.  It
+   is the same register as the hard frame pointer in the 32-bit runtime.
+   So, it does not need to be listed.  */
+#define ELIMINABLE_REGS                                 \
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},    \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},         \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} }
+
+/* Define the offset between two registers, one to be eliminated,
+   and the other its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = pa_initial_elimination_offset(FROM, TO))
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 3 ? (N) + 20 : (N) == 3 ? 31 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 29)
+#define EH_RETURN_HANDLER_RTX pa_eh_return_handler_rtx ()
+
+/* Offset from the frame pointer register value to the top of stack.  */
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* The maximum number of hard registers that can be saved in the call
+   frame.  The soft frame pointer is not included.  */
+#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before the
+   prologue.  You only need to define this macro if you want to support
+   call frame debugging information like that provided by DWARF 2.  */
+#define INCOMING_RETURN_ADDR_RTX (gen_rtx_REG (word_mode, 2))
+#define DWARF_FRAME_RETURN_COLUMN (DWARF_FRAME_REGNUM (2))
+
+/* A C expression whose value is an integer giving a DWARF 2 column
+   number that may be used as an alternate return column.  This should
+   be defined only if DWARF_FRAME_RETURN_COLUMN is set to a general
+   register, but an alternate column needs to be used for signal frames.
+
+   Column 0 is not used but unfortunately its register size is set to
+   4 bytes (sizeof CCmode) so it can't be used on 64-bit targets.  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN (FIRST_PSEUDO_REGISTER - 1)
+
+/* This macro chooses the encoding of pointers embedded in the exception
+   handling sections.  If at all possible, this should be defined such
+   that the exception handling section will not require dynamic relocations,
+   and so may be read-only.
+
+   Because the HP assembler auto aligns, it is necessary to use
+   DW_EH_PE_aligned.  It's not possible to make the data read-only
+   on the HP-UX SOM port since the linker requires fixups for label
+   differences in different sections to be word aligned.  However,
+   the SOM linker can do unaligned fixups for absolute pointers.
+   We also need aligned pointers for global and function pointers.
+
+   Although the HP-UX 64-bit ELF linker can handle unaligned pc-relative
+   fixups, the runtime doesn't have a consistent relationship between
+   text and data for dynamically loaded objects.  Thus, it's not possible
+   to use pc-relative encoding for pointers on this target.  It may be
+   possible to use segment relative encodings but GAS doesn't currently
+   have a mechanism to generate these encodings.  For other targets, we
+   use pc-relative encoding for pointers.  If the pointer might require
+   dynamic relocation, we make it indirect.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (TARGET_GAS && !TARGET_HPUX						\
+   ? (DW_EH_PE_pcrel							\
+      | ((GLOBAL) || (CODE) == 2 ? DW_EH_PE_indirect : 0)		\
+      | (TARGET_64BIT ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4))		\
+   : (!TARGET_GAS || (GLOBAL) || (CODE) == 2				\
+      ? DW_EH_PE_aligned : DW_EH_PE_absptr))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  We output pc-relative, and
+   indirect pc-relative ourself since we need some special magic to
+   generate pc-relative relocations, and to handle indirect function
+   pointers.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do {									\
+    if (((ENCODING) & 0x70) == DW_EH_PE_pcrel)				\
+      {									\
+	fputs (integer_asm_op (SIZE, FALSE), FILE);			\
+	if ((ENCODING) & DW_EH_PE_indirect)				\
+	  output_addr_const (FILE, get_deferred_plabel (ADDR));		\
+	else								\
+	  assemble_name (FILE, XSTR ((ADDR), 0));			\
+	fputs ("+8-$PIC_pcrel$0", FILE);				\
+	goto DONE;							\
+      }									\
+    } while (0)
+
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define FP_REG_CLASS_P(CLASS) \
+  ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS)
+
+/* True if register is floating-point.  */
+#define FP_REGNO_P(N) ((N) >= FP_REG_FIRST && (N) <= FP_REG_LAST)
+
+#define MAYBE_FP_REG_CLASS_P(CLASS) \
+  reg_classes_intersect_p ((CLASS), FP_REGS)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+/* #define STACK_GROWS_DOWNWARD */
+
+/* Believe it or not.  */
+#define ARGS_GROW_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 0
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.
+
+   On the 32-bit ports, we reserve one slot for the previous frame
+   pointer and one fill slot.  The fill slot is for compatibility
+   with HP compiled programs.  On the 64-bit ports, we reserve one
+   slot for the previous frame pointer.  */
+#define STARTING_FRAME_OFFSET 8
+
+/* Define STACK_ALIGNMENT_NEEDED to zero to disable final alignment
+   of the stack.  The default is to align it to STACK_BOUNDARY.  */
+#define STACK_ALIGNMENT_NEEDED 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On the HP-PA, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Offset of first parameter from the argument pointer register value.
+   This value will be negated because the arguments grow down.
+   Also note that on STACK_GROWS_UPWARD machines (such as this one)
+   this is the distance from the frame pointer to the end of the first
+   argument, not it's beginning.  To get the real offset of the first
+   argument, the size of the argument must be added.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) (TARGET_64BIT ? -64 : -32)
+
+/* When a parameter is passed in a register, stack space is still
+   allocated for it.  */
+#define REG_PARM_STACK_SPACE(DECL) (TARGET_64BIT ? 64 : 16)
+
+/* Define this if the above stack space is to be considered part of the
+   space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+/* Keep the stack pointer constant throughout the function.
+   This is both an optimization and a necessity: longjmp
+   doesn't behave itself when the stack pointer moves within
+   the function!  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* The weird HPPA calling conventions require a minimum of 48 bytes on
+   the stack: 16 bytes for register saves, and 32 bytes for magic.
+   This is the difference between the logical top of stack and the
+   actual sp.
+
+   On the 64-bit port, the HP C compiler allocates a 48-byte frame
+   marker, although the runtime documentation only describes a 16
+   byte marker.  For compatibility, we allocate 48 bytes.  */
+#define STACK_POINTER_OFFSET \
+  (TARGET_64BIT ? -(crtl->outgoing_args_size + 48): -32)
+
+#define STACK_DYNAMIC_OFFSET(FNDECL)	\
+  (TARGET_64BIT				\
+   ? (STACK_POINTER_OFFSET)		\
+   : ((STACK_POINTER_OFFSET) - crtl->outgoing_args_size))
+
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the HP-PA, the WORDS field holds the number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).  Thus, 4 or
+   more means all following args should go on the stack.
+   
+   The INCOMING field tracks whether this is an "incoming" or
+   "outgoing" argument.
+   
+   The INDIRECT field indicates whether this is is an indirect
+   call or not.
+   
+   The NARGS_PROTOTYPE field indicates that an argument does not
+   have a prototype when it less than or equal to 0.  */
+
+struct hppa_args {int words, nargs_prototype, incoming, indirect; };
+
+#define CUMULATIVE_ARGS struct hppa_args
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  (CUM).words = 0, 							\
+  (CUM).incoming = 0,							\
+  (CUM).indirect = (FNTYPE) && !(FNDECL),				\
+  (CUM).nargs_prototype = (FNTYPE && prototype_p (FNTYPE)		\
+			   ? (list_length (TYPE_ARG_TYPES (FNTYPE)) - 1	\
+			      + (TYPE_MODE (TREE_TYPE (FNTYPE)) == BLKmode \
+				 || pa_return_in_memory (TREE_TYPE (FNTYPE), 0))) \
+			   : 0)
+
+
+
+/* Similar, but when scanning the definition of a procedure.  We always
+   set NARGS_PROTOTYPE large so we never return a PARALLEL.  */
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM,FNTYPE,IGNORE) \
+  (CUM).words = 0,				\
+  (CUM).incoming = 1,				\
+  (CUM).indirect = 0,				\
+  (CUM).nargs_prototype = 1000
+
+/* Figure out the size in words of the function argument.  The size
+   returned by this macro should always be greater than zero because
+   we pass variable and zero sized objects by reference.  */
+
+#define FUNCTION_ARG_SIZE(MODE, TYPE)	\
+  ((((MODE) != BLKmode \
+     ? (HOST_WIDE_INT) GET_MODE_SIZE (MODE) \
+     : int_size_in_bytes (TYPE)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On the HP-PA the first four words of args are normally in registers
+   and the rest are pushed.  But any arg that won't entirely fit in regs
+   is pushed.
+
+   Arguments passed in registers are either 1 or 2 words long.
+
+   The caller must make a distinction between calls to explicitly named
+   functions and calls through pointers to functions -- the conventions
+   are different!  Calls through pointers to functions only use general
+   registers for the first four argument words.
+
+   Of course all this is different for the portable runtime model
+   HP wants everyone to use for ELF.  Ugh.  Here's a quick description
+   of how it's supposed to work.
+
+   1) callee side remains unchanged.  It expects integer args to be
+   in the integer registers, float args in the float registers and
+   unnamed args in integer registers.
+
+   2) caller side now depends on if the function being called has
+   a prototype in scope (rather than if it's being called indirectly).
+
+      2a) If there is a prototype in scope, then arguments are passed
+      according to their type (ints in integer registers, floats in float
+      registers, unnamed args in integer registers.
+
+      2b) If there is no prototype in scope, then floating point arguments
+      are passed in both integer and float registers.  egad.
+
+  FYI: The portable parameter passing conventions are almost exactly like
+  the standard parameter passing conventions on the RS6000.  That's why
+  you'll see lots of similar code in rs6000.h.  */
+
+/* If defined, a C expression which determines whether, and in which
+   direction, to pad out an argument with extra space.  */
+#define FUNCTION_ARG_PADDING(MODE, TYPE) function_arg_padding ((MODE), (TYPE))
+
+/* Specify padding for the last element of a block move between registers
+   and memory.
+
+   The 64-bit runtime specifies that objects need to be left justified
+   (i.e., the normal justification for a big endian target).  The 32-bit
+   runtime specifies right justification for objects smaller than 64 bits.
+   We use a DImode register in the parallel for 5 to 7 byte structures
+   so that there is only one element.  This allows the object to be
+   correctly padded.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  function_arg_padding ((MODE), (TYPE))
+
+
+/* On HPPA, we emit profiling code as rtl via PROFILE_HOOK rather than
+   as assembly via FUNCTION_PROFILER.  Just output a local label.
+   We can't use the function label because the GAS SOM target can't
+   handle the difference of a global symbol and a local symbol.  */
+
+#ifndef FUNC_BEGIN_PROLOG_LABEL
+#define FUNC_BEGIN_PROLOG_LABEL        "LFBP"
+#endif
+
+#define FUNCTION_PROFILER(FILE, LABEL) \
+  (*targetm.asm_out.internal_label) (FILE, FUNC_BEGIN_PROLOG_LABEL, LABEL)
+
+#define PROFILE_HOOK(label_no) hppa_profile_hook (label_no)
+void hppa_profile_hook (int label_no);
+
+/* The profile counter if emitted must come before the prologue.  */
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* We never want final.c to emit profile counters.  When profile
+   counters are required, we have to defer emitting them to the end
+   of the current file.  */
+#define NO_PROFILE_COUNTERS 1
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+extern int may_call_alloca;
+
+#define EXIT_IGNORE_STACK	\
+ (get_frame_size () != 0	\
+  || cfun->calls_alloca || crtl->outgoing_args_size)
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52)
+
+/* Alignment required by the trampoline.  */
+
+#define TRAMPOLINE_ALIGNMENT BITS_PER_WORD
+
+/* Minimum length of a cache line.  A length of 16 will work on all
+   PA-RISC processors.  All PA 1.1 processors have a cache line of
+   32 bytes.  Most but not all PA 2.0 processors have a cache line
+   of 64 bytes.  As cache flushes are expensive and we don't support
+   PA 1.0, we use a minimum length of 32.  */
+
+#define MIN_CACHELINE_SIZE 32
+
+
+/* Addressing modes, and classification of registers for them. 
+
+   Using autoincrement addressing modes on PA8000 class machines is
+   not profitable.  */
+
+#define HAVE_POST_INCREMENT (pa_cpu < PROCESSOR_8000)
+#define HAVE_POST_DECREMENT (pa_cpu < PROCESSOR_8000)
+
+#define HAVE_PRE_DECREMENT (pa_cpu < PROCESSOR_8000)
+#define HAVE_PRE_INCREMENT (pa_cpu < PROCESSOR_8000)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* The following macros assume that X is a hard or pseudo reg number.
+   They give nonzero only if X is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(X) \
+  ((X) && ((X) < 32							\
+   || ((X) == FRAME_POINTER_REGNUM)					\
+   || ((X) >= FIRST_PSEUDO_REGISTER					\
+       && reg_renumber							\
+       && (unsigned) reg_renumber[X] < 32)))
+#define REGNO_OK_FOR_BASE_P(X) \
+  ((X) && ((X) < 32							\
+   || ((X) == FRAME_POINTER_REGNUM)					\
+   || ((X) >= FIRST_PSEUDO_REGISTER					\
+       && reg_renumber							\
+       && (unsigned) reg_renumber[X] < 32)))
+#define REGNO_OK_FOR_FP_P(X) \
+  (FP_REGNO_P (X)							\
+   || (X >= FIRST_PSEUDO_REGISTER					\
+       && reg_renumber							\
+       && FP_REGNO_P (reg_renumber[X])))
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+
+   These macros are specific to the HP-PA, and may be used only
+   in code for printing assembler insns and in conditions for
+   define_optimization.  */
+
+/* 1 if X is an fp register.  */
+
+#define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X)))
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Non-TLS symbolic references.  */
+#define PA_SYMBOL_REF_TLS_P(RTX) \
+  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0)
+
+/* Recognize any constant value that is a valid address except
+   for symbolic addresses.  We get better CSE by rejecting them
+   here and allowing hppa_legitimize_address to break them up.  We
+   use most of the constants accepted by CONSTANT_P, except CONST_DOUBLE.  */
+
+#define CONSTANT_ADDRESS_P(X) \
+  ((GET_CODE (X) == LABEL_REF 						\
+   || (GET_CODE (X) == SYMBOL_REF && !SYMBOL_REF_TLS_MODEL (X))		\
+   || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST		\
+   || GET_CODE (X) == HIGH) 						\
+   && (reload_in_progress || reload_completed || ! symbolic_expression_p (X)))
+
+/* A C expression that is nonzero if we are using the new HP assembler.  */
+
+#ifndef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 0
+#endif
+
+/* The macros below define the immediate range for CONST_INTS on
+   the 64-bit port.  Constants in this range can be loaded in three
+   instructions using a ldil/ldo/depdi sequence.  Constants outside
+   this range are forced to the constant pool prior to reload.  */
+
+#define MAX_LEGIT_64BIT_CONST_INT ((HOST_WIDE_INT) 32 << 31)
+#define MIN_LEGIT_64BIT_CONST_INT ((HOST_WIDE_INT) -32 << 31)
+#define LEGITIMATE_64BIT_CONST_INT_P(X) \
+  ((X) >= MIN_LEGIT_64BIT_CONST_INT && (X) < MAX_LEGIT_64BIT_CONST_INT)
+
+/* A C expression that is nonzero if X is a legitimate constant for an
+   immediate operand.
+
+   We include all constant integers and constant doubles, but not
+   floating-point, except for floating-point zero.  We reject LABEL_REFs
+   if we're not using gas or the new HP assembler. 
+
+   In 64-bit mode, we reject CONST_DOUBLES.  We also reject CONST_INTS
+   that need more than three instructions to load prior to reload.  This
+   limit is somewhat arbitrary.  It takes three instructions to load a
+   CONST_INT from memory but two are memory accesses.  It may be better
+   to increase the allowed range for CONST_INTS.  We may also be able
+   to handle CONST_DOUBLES.  */
+
+#define LEGITIMATE_CONSTANT_P(X)				\
+  ((GET_MODE_CLASS (GET_MODE (X)) != MODE_FLOAT			\
+    || (X) == CONST0_RTX (GET_MODE (X)))			\
+   && (NEW_HP_ASSEMBLER						\
+       || TARGET_GAS						\
+       || GET_CODE (X) != LABEL_REF)				\
+   && !PA_SYMBOL_REF_TLS_P (X)					\
+   && (!TARGET_64BIT						\
+       || GET_CODE (X) != CONST_DOUBLE)				\
+   && (!TARGET_64BIT						\
+       || HOST_BITS_PER_WIDE_INT <= 32				\
+       || GET_CODE (X) != CONST_INT				\
+       || reload_in_progress					\
+       || reload_completed					\
+       || LEGITIMATE_64BIT_CONST_INT_P (INTVAL (X))		\
+       || cint_ok_for_move (INTVAL (X)))			\
+   && !function_label_operand (X, VOIDmode))
+
+/* Target flags set on a symbol_ref.  */
+
+/* Set by ASM_OUTPUT_SYMBOL_REF when a symbol_ref is output.  */
+#define SYMBOL_FLAG_REFERENCED (1 << SYMBOL_FLAG_MACH_DEP_SHIFT)
+#define SYMBOL_REF_REFERENCED_P(RTX) \
+  ((SYMBOL_REF_FLAGS (RTX) & SYMBOL_FLAG_REFERENCED) != 0)
+
+/* Defines for constraints.md.  */
+
+/* Return 1 iff OP is a scaled or unscaled index address.  */
+#define IS_INDEX_ADDR_P(OP) \
+  (GET_CODE (OP) == PLUS				\
+   && GET_MODE (OP) == Pmode				\
+   && (GET_CODE (XEXP (OP, 0)) == MULT			\
+       || GET_CODE (XEXP (OP, 1)) == MULT		\
+       || (REG_P (XEXP (OP, 0))				\
+	   && REG_P (XEXP (OP, 1)))))
+
+/* Return 1 iff OP is a LO_SUM DLT address.  */
+#define IS_LO_SUM_DLT_ADDR_P(OP) \
+  (GET_CODE (OP) == LO_SUM				\
+   && GET_MODE (OP) == Pmode				\
+   && REG_P (XEXP (OP, 0))				\
+   && REG_OK_FOR_BASE_P (XEXP (OP, 0))			\
+   && GET_CODE (XEXP (OP, 1)) == UNSPEC)
+
+/* Nonzero if 14-bit offsets can be used for all loads and stores.
+   This is not possible when generating PA 1.x code as floating point
+   loads and stores only support 5-bit offsets.  Note that we do not
+   forbid the use of 14-bit offsets in GO_IF_LEGITIMATE_ADDRESS.
+   Instead, we use pa_secondary_reload() to reload integer mode
+   REG+D memory addresses used in floating point loads and stores.
+
+   FIXME: the ELF32 linker clobbers the LSB of the FP register number
+   in PA 2.0 floating-point insns with long displacements.  This is
+   because R_PARISC_DPREL14WR and other relocations like it are not
+   yet supported by GNU ld.  For now, we reject long displacements
+   on this target.  */
+
+#define INT14_OK_STRICT \
+  (TARGET_SOFT_FLOAT                                                   \
+   || TARGET_DISABLE_FPREGS                                            \
+   || (TARGET_PA_20 && !TARGET_ELF32))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) \
+  (REGNO (X) && (REGNO (X) < 32 				\
+   || REGNO (X) == FRAME_POINTER_REGNUM				\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) \
+  (REGNO (X) && (REGNO (X) < 32 				\
+   || REGNO (X) == FRAME_POINTER_REGNUM				\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a
+   valid memory address for an instruction.  The MODE argument is the
+   machine mode for the MEM expression that wants to use this address.
+
+   On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
+   REG+REG, and REG+(REG*SCALE).  The indexed address forms are only
+   available with floating point loads and stores, and integer loads.
+   We get better code by allowing indexed addresses in the initial
+   RTL generation.
+
+   The acceptance of indexed addresses as legitimate implies that we
+   must provide patterns for doing indexed integer stores, or the move
+   expanders must force the address of an indexed store to a register.
+   We have adopted the latter approach.
+   
+   Another function of GO_IF_LEGITIMATE_ADDRESS is to ensure that
+   the base register is a valid pointer for indexed instructions.
+   On targets that have non-equivalent space registers, we have to
+   know at the time of assembler output which register in a REG+REG
+   pair is the base register.  The REG_POINTER flag is sometimes lost
+   in reload and the following passes, so it can't be relied on during
+   code generation.  Thus, we either have to canonicalize the order
+   of the registers in REG+REG indexed addresses, or treat REG+REG
+   addresses separately and provide patterns for both permutations.
+
+   The latter approach requires several hundred additional lines of
+   code in pa.md.  The downside to canonicalizing is that a PLUS
+   in the wrong order can't combine to form to make a scaled indexed
+   memory operand.  As we won't need to canonicalize the operands if
+   the REG_POINTER lossage can be fixed, it seems better canonicalize.
+
+   We initially break out scaled indexed addresses in canonical order
+   in emit_move_sequence.  LEGITIMIZE_ADDRESS also canonicalizes
+   scaled indexed addresses during RTL generation.  However, fold_rtx
+   has its own opinion on how the operands of a PLUS should be ordered.
+   If one of the operands is equivalent to a constant, it will make
+   that operand the second operand.  As the base register is likely to
+   be equivalent to a SYMBOL_REF, we have made it the second operand.
+
+   GO_IF_LEGITIMATE_ADDRESS accepts REG+REG as legitimate when the
+   operands are in the order INDEX+BASE on targets with non-equivalent
+   space registers, and in any order on targets with equivalent space
+   registers.  It accepts both MULT+BASE and BASE+MULT for scaled indexing.
+
+   We treat a SYMBOL_REF as legitimate if it is part of the current
+   function's constant-pool, because such addresses can actually be
+   output as REG+SMALLINT.  */
+
+#define VAL_5_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x10 < 0x20)
+#define INT_5_BITS(X) VAL_5_BITS_P (INTVAL (X))
+
+#define VAL_U5_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) < 0x20)
+#define INT_U5_BITS(X) VAL_U5_BITS_P (INTVAL (X))
+
+#define VAL_11_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x400 < 0x800)
+#define INT_11_BITS(X) VAL_11_BITS_P (INTVAL (X))
+
+#define VAL_14_BITS_P(X) ((unsigned HOST_WIDE_INT)(X) + 0x2000 < 0x4000)
+#define INT_14_BITS(X) VAL_14_BITS_P (INTVAL (X))
+
+#if HOST_BITS_PER_WIDE_INT > 32
+#define VAL_32_BITS_P(X) \
+  ((unsigned HOST_WIDE_INT)(X) + ((unsigned HOST_WIDE_INT) 1 << 31)    \
+   < (unsigned HOST_WIDE_INT) 2 << 31)
+#else
+#define VAL_32_BITS_P(X) 1
+#endif
+#define INT_32_BITS(X) VAL_32_BITS_P (INTVAL (X))
+
+/* These are the modes that we allow for scaled indexing.  */
+#define MODE_OK_FOR_SCALED_INDEXING_P(MODE) \
+  ((TARGET_64BIT && (MODE) == DImode)					\
+   || (MODE) == SImode							\
+   || (MODE) == HImode							\
+   || (MODE) == SFmode							\
+   || (MODE) == DFmode)
+
+/* These are the modes that we allow for unscaled indexing.  */
+#define MODE_OK_FOR_UNSCALED_INDEXING_P(MODE) \
+  ((TARGET_64BIT && (MODE) == DImode)					\
+   || (MODE) == SImode							\
+   || (MODE) == HImode							\
+   || (MODE) == QImode							\
+   || (MODE) == SFmode							\
+   || (MODE) == DFmode)
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \
+{									\
+  if ((REG_P (X) && REG_OK_FOR_BASE_P (X))				\
+      || ((GET_CODE (X) == PRE_DEC || GET_CODE (X) == POST_DEC		\
+	   || GET_CODE (X) == PRE_INC || GET_CODE (X) == POST_INC)	\
+	  && REG_P (XEXP (X, 0))					\
+	  && REG_OK_FOR_BASE_P (XEXP (X, 0))))				\
+    goto ADDR;								\
+  else if (GET_CODE (X) == PLUS)					\
+    {									\
+      rtx base = 0, index = 0;						\
+      if (REG_P (XEXP (X, 1))						\
+	  && REG_OK_FOR_BASE_P (XEXP (X, 1)))				\
+	base = XEXP (X, 1), index = XEXP (X, 0);			\
+      else if (REG_P (XEXP (X, 0))					\
+	       && REG_OK_FOR_BASE_P (XEXP (X, 0)))			\
+	base = XEXP (X, 0), index = XEXP (X, 1);			\
+      if (base								\
+	  && GET_CODE (index) == CONST_INT				\
+	  && ((INT_14_BITS (index)					\
+	       && (((MODE) != DImode					\
+		    && (MODE) != SFmode					\
+		    && (MODE) != DFmode)				\
+		   /* The base register for DImode loads and stores	\
+		      with long displacements must be aligned because	\
+		      the lower three bits in the displacement are	\
+		      assumed to be zero.  */				\
+		   || ((MODE) == DImode					\
+		       && (!TARGET_64BIT				\
+			   || (INTVAL (index) % 8) == 0))		\
+		   /* Similarly, the base register for SFmode/DFmode	\
+		      loads and stores with long displacements must	\
+		      be aligned.  */					\
+		   || (((MODE) == SFmode || (MODE) == DFmode)		\
+		       && INT14_OK_STRICT				\
+		       && (INTVAL (index) % GET_MODE_SIZE (MODE)) == 0))) \
+	       || INT_5_BITS (index)))					\
+	goto ADDR;							\
+      if (!TARGET_DISABLE_INDEXING					\
+	  /* Only accept the "canonical" INDEX+BASE operand order	\
+	     on targets with non-equivalent space registers.  */	\
+	  && (TARGET_NO_SPACE_REGS					\
+	      ? (base && REG_P (index))					\
+	      : (base == XEXP (X, 1) && REG_P (index)			\
+		 && (reload_completed					\
+		     || (reload_in_progress && HARD_REGISTER_P (base))	\
+		     || REG_POINTER (base))				\
+		 && (reload_completed					\
+		     || (reload_in_progress && HARD_REGISTER_P (index))	\
+		     || !REG_POINTER (index))))				\
+	  && MODE_OK_FOR_UNSCALED_INDEXING_P (MODE)			\
+	  && REG_OK_FOR_INDEX_P (index)					\
+	  && borx_reg_operand (base, Pmode)				\
+	  && borx_reg_operand (index, Pmode))				\
+	goto ADDR;							\
+      if (!TARGET_DISABLE_INDEXING					\
+	  && base							\
+	  && GET_CODE (index) == MULT					\
+	  && MODE_OK_FOR_SCALED_INDEXING_P (MODE)			\
+	  && REG_P (XEXP (index, 0))					\
+	  && GET_MODE (XEXP (index, 0)) == Pmode			\
+	  && REG_OK_FOR_INDEX_P (XEXP (index, 0))			\
+	  && GET_CODE (XEXP (index, 1)) == CONST_INT			\
+	  && INTVAL (XEXP (index, 1))					\
+	     == (HOST_WIDE_INT) GET_MODE_SIZE (MODE)			\
+	  && borx_reg_operand (base, Pmode))				\
+	goto ADDR;							\
+    }									\
+  else if (GET_CODE (X) == LO_SUM					\
+	   && GET_CODE (XEXP (X, 0)) == REG				\
+	   && REG_OK_FOR_BASE_P (XEXP (X, 0))				\
+	   && CONSTANT_P (XEXP (X, 1))					\
+	   && (TARGET_SOFT_FLOAT					\
+	       /* We can allow symbolic LO_SUM addresses for PA2.0.  */	\
+	       || (TARGET_PA_20						\
+		   && !TARGET_ELF32					\
+	           && GET_CODE (XEXP (X, 1)) != CONST_INT)		\
+	       || ((MODE) != SFmode					\
+		   && (MODE) != DFmode)))				\
+    goto ADDR;								\
+  else if (GET_CODE (X) == LO_SUM					\
+	   && GET_CODE (XEXP (X, 0)) == SUBREG				\
+	   && GET_CODE (SUBREG_REG (XEXP (X, 0))) == REG		\
+	   && REG_OK_FOR_BASE_P (SUBREG_REG (XEXP (X, 0)))		\
+	   && CONSTANT_P (XEXP (X, 1))					\
+	   && (TARGET_SOFT_FLOAT					\
+	       /* We can allow symbolic LO_SUM addresses for PA2.0.  */	\
+	       || (TARGET_PA_20						\
+		   && !TARGET_ELF32					\
+	           && GET_CODE (XEXP (X, 1)) != CONST_INT)		\
+	       || ((MODE) != SFmode					\
+		   && (MODE) != DFmode)))				\
+    goto ADDR;								\
+  else if (GET_CODE (X) == CONST_INT && INT_5_BITS (X))			\
+    goto ADDR;								\
+  /* Needed for -fPIC */						\
+  else if (GET_CODE (X) == LO_SUM					\
+	   && GET_CODE (XEXP (X, 0)) == REG             		\
+	   && REG_OK_FOR_BASE_P (XEXP (X, 0))				\
+	   && GET_CODE (XEXP (X, 1)) == UNSPEC				\
+	   && (TARGET_SOFT_FLOAT					\
+	       || (TARGET_PA_20	&& !TARGET_ELF32)			\
+	       || ((MODE) != SFmode					\
+		   && (MODE) != DFmode)))				\
+    goto ADDR;								\
+}
+
+/* Look for machine dependent ways to make the invalid address AD a
+   valid address.
+
+   For the PA, transform:
+
+        memory(X + <large int>)
+
+   into:
+
+        if (<large int> & mask) >= 16
+          Y = (<large int> & ~mask) + mask + 1  Round up.
+        else
+          Y = (<large int> & ~mask)             Round down.
+        Z = X + Y
+        memory (Z + (<large int> - Y));
+
+   This makes reload inheritance and reload_cse work better since Z
+   can be reused.
+
+   There may be more opportunities to improve code with this hook.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(AD, MODE, OPNUM, TYPE, IND, WIN) 	\
+do { 									\
+  long offset, newoffset, mask;						\
+  rtx new_rtx, temp = NULL_RTX;						\
+									\
+  mask = (GET_MODE_CLASS (MODE) == MODE_FLOAT				\
+	  ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);		\
+									\
+  if (optimize && GET_CODE (AD) == PLUS)				\
+    temp = simplify_binary_operation (PLUS, Pmode,			\
+				      XEXP (AD, 0), XEXP (AD, 1));	\
+									\
+  new_rtx = temp ? temp : AD;						\
+									\
+  if (optimize								\
+      && GET_CODE (new_rtx) == PLUS						\
+      && GET_CODE (XEXP (new_rtx, 0)) == REG				\
+      && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)				\
+    {									\
+      offset = INTVAL (XEXP ((new_rtx), 1));				\
+									\
+      /* Choose rounding direction.  Round up if we are >= halfway.  */	\
+      if ((offset & mask) >= ((mask + 1) / 2))				\
+	newoffset = (offset & ~mask) + mask + 1;			\
+      else								\
+	newoffset = offset & ~mask;					\
+									\
+      /* Ensure that long displacements are aligned.  */		\
+      if (mask == 0x3fff						\
+	  && (GET_MODE_CLASS (MODE) == MODE_FLOAT			\
+	      || (TARGET_64BIT && (MODE) == DImode)))			\
+	newoffset &= ~(GET_MODE_SIZE (MODE) - 1);			\
+									\
+      if (newoffset != 0 && VAL_14_BITS_P (newoffset))			\
+	{								\
+	  temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),			\
+			       GEN_INT (newoffset));			\
+	  AD = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));\
+	  push_reload (XEXP (AD, 0), 0, &XEXP (AD, 0), 0,		\
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,		\
+		       (OPNUM), (TYPE));				\
+	  goto WIN;							\
+	}								\
+    }									\
+} while (0)
+
+
+
+#define TARGET_ASM_SELECT_SECTION  pa_select_section
+
+/* Return a nonzero value if DECL has a section attribute.  */
+#define IN_NAMED_SECTION_P(DECL) \
+  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
+   && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
+/* Define this macro if references to a symbol must be treated
+   differently depending on something about the variable or
+   function named by the symbol (such as what section it is in).
+
+   The macro definition, if any, is executed immediately after the
+   rtl for DECL or other node is created.
+   The value of the rtl will be a `mem' whose address is a
+   `symbol_ref'.
+
+   The usual thing for this macro to do is to a flag in the
+   `symbol_ref' (such as `SYMBOL_REF_FLAG') or to store a modified
+   name string in the `symbol_ref' (if one bit is not enough
+   information).
+
+   On the HP-PA we use this to indicate if a symbol is in text or
+   data space.  Also, function labels need special treatment.  */
+
+#define TEXT_SPACE_P(DECL)\
+  (TREE_CODE (DECL) == FUNCTION_DECL					\
+   || (TREE_CODE (DECL) == VAR_DECL					\
+       && TREE_READONLY (DECL) && ! TREE_SIDE_EFFECTS (DECL)		\
+       && (! DECL_INITIAL (DECL) || ! reloc_needed (DECL_INITIAL (DECL))) \
+       && !flag_pic)							\
+   || CONSTANT_CLASS_P (DECL))
+
+#define FUNCTION_NAME_P(NAME)  (*(NAME) == '@')
+
+/* Specify the machine mode that this machine uses for the index in the
+   tablejump instruction.  For small tables, an element consists of a
+   ia-relative branch and its delay slot.  When -mbig-switch is specified,
+   we use a 32-bit absolute address for non-pic code, and a 32-bit offset
+   for both 32 and 64-bit pic code.  */
+#define CASE_VECTOR_MODE (TARGET_BIG_SWITCH ? SImode : DImode)
+
+/* Jump tables must be 32-bit aligned, no matter the size of the element.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* Higher than the default as we prefer to use simple move insns
+   (better scheduling and delay slot filling) and because our
+   built-in block move is really a 2X unrolled loop. 
+
+   Believe it or not, this has to be big enough to allow for copying all
+   arguments passed in registers to avoid infinite recursion during argument
+   setup for a function call.  Why?  Consider how we copy the stack slots
+   reserved for parameters when they may be trashed by a call.  */
+#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode word_mode
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point, CCFPmode
+   should be used.  CC_NOOVmode should be used when the first operand is a
+   PLUS, MINUS, or NEG.  CCmode should be used when no special processing is
+   needed.  */
+#define SELECT_CC_MODE(OP,X,Y) \
+  (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT ? CCFPmode : CCmode)    \
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE SImode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+#define NO_FUNCTION_CSE
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Adjust the cost of branches.  */
+#define BRANCH_COST(speed_p, predictable_p) (pa_cpu == PROCESSOR_8000 ? 2 : 1)
+
+/* Handling the special cases is going to get too complicated for a macro,
+   just call `pa_adjust_insn_length' to do the real work.  */
+#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
+  ((LENGTH) = pa_adjust_insn_length ((INSN), (LENGTH)))
+
+/* Millicode insns are actually function calls with some special
+   constraints on arguments and register usage.
+
+   Millicode calls always expect their arguments in the integer argument
+   registers, and always return their result in %r29 (ret1).  They
+   are expected to clobber their arguments, %r1, %r29, and the return
+   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
+
+   This macro tells reorg that the references to arguments and
+   millicode calls do not appear to happen until after the millicode call.
+   This allows reorg to put insns which set the argument registers into the
+   delay slot of the millicode call -- thus they act more like traditional
+   CALL_INSNs.
+
+   Note we cannot consider side effects of the insn to be delayed because
+   the branch and link insn will clobber the return pointer.  If we happened
+   to use the return pointer in the delay slot of the call, then we lose.
+
+   get_attr_type will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+#define INSN_REFERENCES_ARE_DELAYED(X) (insn_refs_are_delayed (X))
+
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+
+#define ASM_COMMENT_START ";"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+
+#define ASM_OUTPUT_LABEL(FILE,NAME) \
+  do {							\
+    assemble_name ((FILE), (NAME));			\
+    if (TARGET_GAS)					\
+      fputs (":\n", (FILE));				\
+    else						\
+      fputc ('\n', (FILE));				\
+  } while (0)
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  do {					\
+    const char *xname = (NAME);		\
+    if (FUNCTION_NAME_P (NAME))		\
+      xname += 1;			\
+    if (xname[0] == '*')		\
+      xname += 1;			\
+    else				\
+      fputs (user_label_prefix, FILE);	\
+    fputs (xname, FILE);		\
+  } while (0)
+
+/* This how we output the symbol_ref X.  */
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE,X) \
+  do {                                                 \
+    SYMBOL_REF_FLAGS (X) |= SYMBOL_FLAG_REFERENCED;    \
+    assemble_name (FILE, XSTR (X, 0));                 \
+  } while (0)
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%c$%s%04ld", (PREFIX)[0], (PREFIX) + 1, (long)(NUM))
+
+/* Output the definition of a compiler-generated label named NAME.  */
+
+#define ASM_OUTPUT_INTERNAL_LABEL(FILE,NAME) \
+  do {							\
+    assemble_name_raw ((FILE), (NAME));			\
+    if (TARGET_GAS)					\
+      fputs (":\n", (FILE));				\
+    else						\
+      fputc ('\n', (FILE));				\
+  } while (0)
+
+#define TARGET_ASM_GLOBALIZE_LABEL pa_globalize_label
+
+#define ASM_OUTPUT_ASCII(FILE, P, SIZE)  \
+  output_ascii ((FILE), (P), (SIZE))
+
+/* Jump tables are always placed in the text section.  Technically, it
+   is possible to put them in the readonly data section when -mbig-switch
+   is specified.  This has the benefit of getting the table out of .text
+   and reducing branch lengths as a result.  The downside is that an
+   additional insn (addil) is needed to access the table when generating
+   PIC code.  The address difference table also has to use 32-bit
+   pc-relative relocations.  Currently, GAS does not support these
+   relocations, although it is easily modified to do this operation.
+   The table entries need to look like "$L1+(.+8-$L0)-$PIC_pcrel$0"
+   when using ELF GAS.  A simple difference can be used when using
+   SOM GAS or the HP assembler.  The final downside is GDB complains
+   about the nesting of the label for the table when debugging.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  if (TARGET_BIG_SWITCH)						\
+    fprintf (FILE, "\t.word L$%04d\n", VALUE);				\
+  else									\
+    fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE)
+
+/* This is how to output an element of a case-vector that is relative. 
+   Since we always place jump tables in the text section, the difference
+   is absolute and requires no relocation.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
+  if (TARGET_BIG_SWITCH)						\
+    fprintf (FILE, "\t.word L$%04d-L$%04d\n", VALUE, REL);		\
+  else									\
+    fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+    fprintf (FILE, "\t.align %d\n", (1<<(LOG)))
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.blockz "HOST_WIDE_INT_PRINT_UNSIGNED"\n",		\
+	   (unsigned HOST_WIDE_INT)(SIZE))
+
+/* This says how to output an assembler line to define an uninitialized
+   global variable with size SIZE (in bytes) and alignment ALIGN (in bits).
+   This macro exists to properly support languages like C++ which do not
+   have common data.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_bss (FILE, NAME, SIZE, ALIGN)
+  
+/* This says how to output an assembler line to define a global common symbol
+   with size SIZE (in bytes) and alignment ALIGN (in bits).  */
+
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)  		\
+  pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN)
+
+/* This says how to output an assembler line to define a local common symbol
+   with size SIZE (in bytes) and alignment ALIGN (in bits).  This macro
+   controls how the assembler definitions of uninitialized static variables
+   are output.  */
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN)
+  
+/* All HP assemblers use "!" to separate logical lines.  */
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '!')
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.
+
+   On the HP-PA, the CODE can be `r', meaning this is a register-only operand
+   and an immediate zero should be represented as `r0'.
+
+   Several % codes are defined:
+   O an operation
+   C compare conditions
+   N extract conditions
+   M modifier to handle preincrement addressing for memory refs.
+   F modifier to handle preincrement addressing for fp memory refs */
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+{ rtx addr = ADDR;							\
+  switch (GET_CODE (addr))						\
+    {									\
+    case REG:								\
+      fprintf (FILE, "0(%s)", reg_names [REGNO (addr)]);		\
+      break;								\
+    case PLUS:								\
+      gcc_assert (GET_CODE (XEXP (addr, 1)) == CONST_INT);		\
+      fprintf (FILE, "%d(%s)", (int)INTVAL (XEXP (addr, 1)),		\
+	       reg_names [REGNO (XEXP (addr, 0))]);			\
+      break;								\
+    case LO_SUM:							\
+      if (!symbolic_operand (XEXP (addr, 1), VOIDmode))			\
+	fputs ("R'", FILE);						\
+      else if (flag_pic == 0)						\
+	fputs ("RR'", FILE);						\
+      else								\
+	fputs ("RT'", FILE);						\
+      output_global_address (FILE, XEXP (addr, 1), 0);			\
+      fputs ("(", FILE);						\
+      output_operand (XEXP (addr, 0), 0);				\
+      fputs (")", FILE);						\
+      break;								\
+    case CONST_INT:							\
+      fprintf (FILE, HOST_WIDE_INT_PRINT_DEC "(%%r0)", INTVAL (addr));	\
+      break;								\
+    default:								\
+      output_addr_const (FILE, addr);					\
+    }}
+
+
+/* Find the return address associated with the frame given by
+   FRAMEADDR.  */
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)				 \
+  (return_addr_rtx (COUNT, FRAMEADDR))
+
+/* Used to mask out junk bits from the return address, such as
+   processor state, interrupt status, condition codes and the like.  */
+#define MASK_RETURN_ADDR						\
+  /* The privilege level is in the two low order bits, mask em out	\
+     of the return address.  */						\
+  (GEN_INT (-4))
+
+/* The number of Pmode words for the setjmp buffer.  */
+#define JMP_BUF_SIZE 50
+
+/* We need a libcall to canonicalize function pointers on TARGET_ELF32.  */
+#define CANONICALIZE_FUNCPTR_FOR_COMPARE_LIBCALL \
+  "__canonicalize_funcptr_for_compare"
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+/* The maximum offset in bytes for a PA 1.X pc-relative call to the
+   head of the preceding stub table.  The selected offsets have been
+   chosen so that approximately one call stub is allocated for every
+   86.7 instructions.  A long branch stub is two instructions when
+   not generating PIC code.  For HP-UX and ELF targets, PIC stubs are
+   seven and four instructions, respectively.  */  
+#define MAX_PCREL17F_OFFSET \
+  (flag_pic ? (TARGET_HPUX ? 198164 : 221312) : 240000)
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
new file mode 100644
index 000000000..7a032c8c0
--- /dev/null
+++ b/gcc/config/pa/pa.md
@@ -0,0 +1,9543 @@
+;;- Machine description for HP PA-RISC architecture for GCC compiler
+;;   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+;;   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+;;   Free Software Foundation, Inc.
+;;   Contributed by the Center for Software Science at the University
+;;   of Utah.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This gcc Version 2 machine description is inspired by sparc.md and
+;; mips.md.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Uses of UNSPEC in this file:
+
+(define_constants
+  [(UNSPEC_CFFC		0)	; canonicalize_funcptr_for_compare
+   (UNSPEC_GOTO		1)	; indirect_goto
+   (UNSPEC_DLTIND14R	2)	; 
+   (UNSPEC_TP		3)
+   (UNSPEC_TLSGD	4)
+   (UNSPEC_TLSLDM	5)
+   (UNSPEC_TLSLDO	6)
+   (UNSPEC_TLSLDBASE	7)
+   (UNSPEC_TLSIE	8)
+   (UNSPEC_TLSLE 	9)
+   (UNSPEC_TLSGD_PIC   10)
+   (UNSPEC_TLSLDM_PIC  11)
+   (UNSPEC_TLSIE_PIC   12)
+  ])
+
+;; UNSPEC_VOLATILE:
+
+(define_constants
+  [(UNSPECV_BLOCKAGE	0)	; blockage
+   (UNSPECV_DCACHE	1)	; dcacheflush
+   (UNSPECV_ICACHE	2)	; icacheflush
+   (UNSPECV_OPC		3)	; outline_prologue_call
+   (UNSPECV_OEC		4)	; outline_epilogue_call
+   (UNSPECV_LONGJMP	5)	; builtin_longjmp
+  ])
+
+;; Maximum pc-relative branch offsets.
+
+;; These numbers are a bit smaller than the maximum allowable offsets
+;; so that a few instructions may be inserted before the actual branch.
+
+(define_constants
+  [(MAX_12BIT_OFFSET     8184)	; 12-bit branch
+   (MAX_17BIT_OFFSET   262100)	; 17-bit branch
+  ])
+
+;; Mode and code iterators
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; This attribute defines the condition prefix for word and double word
+;; add, compare, subtract and logical instructions.
+(define_mode_attr dwc [(SI "") (DI "*")])
+
+;; Insn type.  Used to default other attribute values.
+
+;; type "unary" insns have one input operand (1) and one output operand (0)
+;; type "binary" insns have two input operands (1,2) and one output (0)
+
+(define_attr "type"
+  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload"
+  (const_string "binary"))
+
+(define_attr "pa_combine_type"
+  "fmpy,faddsub,uncond_branch,addmove,none"
+  (const_string "none"))
+
+;; Processor type (for scheduling, not code generation) -- this attribute
+;; must exactly match the processor_type enumeration in pa.h.
+;;
+;; FIXME: Add 800 scheduling for completeness?
+
+(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")))
+
+;; Length (in # of bytes).
+(define_attr "length" ""
+  (cond [(eq_attr "type" "load,fpload")
+	 (if_then_else (match_operand 1 "symbolic_memory_operand" "")
+		       (const_int 8) (const_int 4))
+
+	 (eq_attr "type" "store,fpstore")
+	 (if_then_else (match_operand 0 "symbolic_memory_operand" "")
+		       (const_int 8) (const_int 4))
+
+	 (eq_attr "type" "binary,shift,nullshift")
+	 (if_then_else (match_operand 2 "arith_operand" "")
+		       (const_int 4) (const_int 12))
+
+	 (eq_attr "type" "move,unary,shift,nullshift")
+	 (if_then_else (match_operand 1 "arith_operand" "")
+		       (const_int 4) (const_int 8))]
+
+	(const_int 4)))
+
+(define_asm_attributes
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")])
+
+;; Attributes for instruction and branch scheduling
+
+;; For conditional branches.
+(define_attr "in_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+		     (eq_attr "length" "4")
+		     (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)")
+			 (const_int 0)))
+		(const_string "true")
+		(const_string "false")))
+
+;; Disallow instructions which use the FPU since they will tie up the FPU
+;; even if the instruction is nullified.
+(define_attr "in_nullified_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")
+		     (eq_attr "length" "4")
+		     (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)")
+			 (const_int 0)))
+		(const_string "true")
+		(const_string "false")))
+
+;; For calls and millicode calls.  Allow unconditional branches in the
+;; delay slot.
+(define_attr "in_call_delay" "false,true"
+  (cond [(and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+	      (eq_attr "length" "4")
+	      (eq (symbol_ref "RTX_FRAME_RELATED_P (insn)")
+		  (const_int 0)))
+	   (const_string "true")
+	 (eq_attr "type" "uncond_branch")
+	   (if_then_else (ne (symbol_ref "TARGET_JUMP_IN_DELAY")
+			     (const_int 0))
+			 (const_string "true")
+			 (const_string "false"))]
+	(const_string "false")))
+
+
+;; Call delay slot description.
+(define_delay (eq_attr "type" "call")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+;; Sibcall delay slot description.
+(define_delay (eq_attr "type" "sibcall")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+;; Millicode call delay slot description.
+(define_delay (eq_attr "type" "milli")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+;; Return and other similar instructions.
+(define_delay (eq_attr "type" "btable_branch,branch,parallel_branch")
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
+
+;; Floating point conditional branch delay slot description.
+(define_delay (eq_attr "type" "fbranch")
+  [(eq_attr "in_branch_delay" "true")
+   (eq_attr "in_nullified_branch_delay" "true")
+   (nil)])
+
+;; Integer conditional branch delay slot description.
+;; Nullification of conditional branches on the PA is dependent on the
+;; direction of the branch.  Forward branches nullify true and
+;; backward branches nullify false.  If the direction is unknown
+;; then nullification is not allowed.
+(define_delay (eq_attr "type" "cbranch")
+  [(eq_attr "in_branch_delay" "true")
+   (and (eq_attr "in_nullified_branch_delay" "true")
+	(attr_flag "forward"))
+   (and (eq_attr "in_nullified_branch_delay" "true")
+	(attr_flag "backward"))])
+
+(define_delay (and (eq_attr "type" "uncond_branch")
+		   (eq (symbol_ref "following_call (insn)")
+		       (const_int 0)))
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
+
+;; Memory. Disregarding Cache misses, the Mustang memory times are:
+;; load: 2, fpload: 3
+;; store, fpstore: 3, no D-cache operations should be scheduled.
+
+;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.
+;; Timings:
+;; Instruction	Time	Unit	Minimum Distance (unit contention)
+;; fcpy		3	ALU	2
+;; fabs		3	ALU	2
+;; fadd		3	ALU	2
+;; fsub		3	ALU	2
+;; fcmp		3	ALU	2
+;; fcnv		3	ALU	2
+;; fmpyadd	3	ALU,MPY	2
+;; fmpysub	3	ALU,MPY 2
+;; fmpycfxt	3	ALU,MPY 2
+;; fmpy		3	MPY	2
+;; fmpyi	3	MPY	2
+;; fdiv,sgl	10	MPY	10
+;; fdiv,dbl	12	MPY	12
+;; fsqrt,sgl	14	MPY	14
+;; fsqrt,dbl	18	MPY	18
+;;
+;; We don't model fmpyadd/fmpysub properly as those instructions
+;; keep both the FP ALU and MPY units busy.  Given that these
+;; processors are obsolete, I'm not going to spend the time to
+;; model those instructions correctly.
+
+(define_automaton "pa700")
+(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")
+
+(define_insn_reservation "W0" 4
+  (and (eq_attr "type" "fpcc")
+       (eq_attr "cpu" "700"))
+  "fpalu_700*2")
+
+(define_insn_reservation "W1" 3
+  (and (eq_attr "type" "fpalu")
+       (eq_attr "cpu" "700"))
+  "fpalu_700*2")
+
+(define_insn_reservation "W2" 3
+  (and (eq_attr "type" "fpmulsgl,fpmuldbl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*2")
+
+(define_insn_reservation "W3" 10
+  (and (eq_attr "type" "fpdivsgl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*10")
+
+(define_insn_reservation "W4" 12
+  (and (eq_attr "type" "fpdivdbl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*12")
+
+(define_insn_reservation "W5" 14
+  (and (eq_attr "type" "fpsqrtsgl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*14")
+
+(define_insn_reservation "W6" 18
+  (and (eq_attr "type" "fpsqrtdbl")
+       (eq_attr "cpu" "700"))
+  "fpmpy_700*18")
+
+(define_insn_reservation "W7" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "700"))
+  "mem_700")
+
+(define_insn_reservation "W8" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "700"))
+  "mem_700")
+
+(define_insn_reservation "W9" 3
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "700"))
+  "mem_700*3")
+
+(define_insn_reservation "W10" 3
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "700"))
+  "mem_700*3")
+
+(define_insn_reservation "W11" 5
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "700"))
+  "mem_700*5")
+
+(define_insn_reservation "W12" 6
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "700"))
+  "mem_700*6")
+
+(define_insn_reservation "W13" 1
+  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore,fpstore_load,store_fpload")
+       (eq_attr "cpu" "700"))
+  "dummy_700")
+
+;; We have a bypass for all computations in the FP unit which feed an
+;; FP store as long as the sizes are the same.
+(define_bypass 2 "W1,W2" "W10,W11" "hppa_fpstore_bypass_p")
+(define_bypass 9 "W3" "W10,W11" "hppa_fpstore_bypass_p")
+(define_bypass 11 "W4" "W10,W11" "hppa_fpstore_bypass_p")
+(define_bypass 13 "W5" "W10,W11" "hppa_fpstore_bypass_p")
+(define_bypass 17 "W6" "W10,W11" "hppa_fpstore_bypass_p")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 4 "W8,W12" "W10,W11" "hppa_fpstore_bypass_p")
+
+;; Function units for the 7100 and 7150.  The 7100/7150 can dual-issue
+;; floating point computations with non-floating point computations (fp loads
+;; and stores are not fp computations).
+;;
+;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also
+;; take two cycles, during which no Dcache operations should be scheduled.
+;; Any special cases are handled in pa_adjust_cost.  The 7100, 7150 and 7100LC
+;; all have the same memory characteristics if one disregards cache misses.
+;;
+;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
+;; There's no value in modeling the ALU and MUL separately though
+;; since there can never be a functional unit conflict given the
+;; latency and issue rates for those units.
+;;
+;; Timings:
+;; Instruction	Time	Unit	Minimum Distance (unit contention)
+;; fcpy		2	ALU	1
+;; fabs		2	ALU	1
+;; fadd		2	ALU	1
+;; fsub		2	ALU	1
+;; fcmp		2	ALU	1
+;; fcnv		2	ALU	1
+;; fmpyadd	2	ALU,MPY	1
+;; fmpysub	2	ALU,MPY 1
+;; fmpycfxt	2	ALU,MPY 1
+;; fmpy		2	MPY	1
+;; fmpyi	2	MPY	1
+;; fdiv,sgl	8	DIV	8
+;; fdiv,dbl	15	DIV	15
+;; fsqrt,sgl	8	DIV	8
+;; fsqrt,dbl	15	DIV	15
+
+(define_automaton "pa7100")
+(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
+
+(define_insn_reservation "X0" 2
+  (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+       (eq_attr "cpu" "7100"))
+  "f_7100,fpmac_7100")
+
+(define_insn_reservation "X1" 8
+  (and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+       (eq_attr "cpu" "7100"))
+  "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")
+
+(define_insn_reservation "X2" 15
+  (and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+       (eq_attr "cpu" "7100"))
+  "f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
+
+(define_insn_reservation "X3" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100")
+
+(define_insn_reservation "X4" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100")
+
+(define_insn_reservation "X5" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100")
+
+(define_insn_reservation "X6" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100")
+
+(define_insn_reservation "X7" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100*3")
+
+(define_insn_reservation "X8" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7100"))
+  "i_7100+mem_7100,mem_7100*3")
+
+(define_insn_reservation "X9" 1
+  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,fpstore_load,store_fpload")
+       (eq_attr "cpu" "7100"))
+  "i_7100")
+
+;; We have a bypass for all computations in the FP unit which feed an
+;; FP store as long as the sizes are the same.
+(define_bypass 1 "X0" "X6,X7" "hppa_fpstore_bypass_p")
+(define_bypass 7 "X1" "X6,X7" "hppa_fpstore_bypass_p")
+(define_bypass 14 "X2" "X6,X7" "hppa_fpstore_bypass_p")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 3 "X4,X8" "X6,X7" "hppa_fpstore_bypass_p")
+
+;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
+;; There's no value in modeling the ALU and MUL separately though
+;; since there can never be a functional unit conflict that
+;; can be avoided given the latency, issue rates and mandatory
+;; one cycle cpu-wide lock for a double precision fp multiply.
+;;
+;; Timings:
+;; Instruction	Time	Unit	Minimum Distance (unit contention)
+;; fcpy		2	ALU	1
+;; fabs		2	ALU	1
+;; fadd		2	ALU	1
+;; fsub		2	ALU	1
+;; fcmp		2	ALU	1
+;; fcnv		2	ALU	1
+;; fmpyadd,sgl	2	ALU,MPY	1
+;; fmpyadd,dbl	3	ALU,MPY	2
+;; fmpysub,sgl	2	ALU,MPY 1
+;; fmpysub,dbl	3	ALU,MPY 2
+;; fmpycfxt,sgl	2	ALU,MPY 1
+;; fmpycfxt,dbl	3	ALU,MPY 2
+;; fmpy,sgl	2	MPY	1
+;; fmpy,dbl	3	MPY	2
+;; fmpyi	3	MPY	2
+;; fdiv,sgl	8	DIV	8
+;; fdiv,dbl	15	DIV	15
+;; fsqrt,sgl	8	DIV	8
+;; fsqrt,dbl	15	DIV	15
+;;
+;; The PA7200 is just like the PA7100LC except that there is
+;; no store-store penalty.
+;;
+;; The PA7300 is just like the PA7200 except that there is
+;; no store-load penalty.
+;;
+;; Note there are some aspects of the 7100LC we are not modeling
+;; at the moment.  I'll be reviewing the 7100LC scheduling info
+;; shortly and updating this description.
+;;
+;;   load-load pairs
+;;   store-store pairs
+;;   other issue modeling
+
+(define_automaton "pa7100lc")
+(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
+(define_cpu_unit "fpmac_7100lc" "pa7100lc")
+(define_cpu_unit "mem_7100lc" "pa7100lc")
+
+;; Double precision multiplies lock the entire CPU for one
+;; cycle.  There is no way to avoid this lock and trying to
+;; schedule around the lock is pointless and thus there is no
+;; value in trying to model this lock.
+;;
+;; Not modeling the lock allows us to treat fp multiplies just
+;; like any other FP alu instruction.  It allows for a smaller
+;; DFA and may reduce register pressure.
+(define_insn_reservation "Y0" 2
+  (and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "f_7100lc,fpmac_7100lc")
+
+;; fp division and sqrt instructions lock the entire CPU for
+;; 7 cycles (single precision) or 14 cycles (double precision).
+;; There is no way to avoid this lock and trying to schedule
+;; around the lock is pointless and thus there is no value in
+;; trying to model this lock.  Not modeling the lock allows
+;; for a smaller DFA and may reduce register pressure.
+(define_insn_reservation "Y1" 1
+  (and (eq_attr "type" "fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "f_7100lc")
+
+(define_insn_reservation "Y2" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y3" 2
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y4" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y5" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y6" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc*3")
+
+(define_insn_reservation "Y7" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7100LC"))
+  "i1_7100lc+mem_7100lc,mem_7100lc*3")
+
+(define_insn_reservation "Y8" 1
+  (and (eq_attr "type" "shift,nullshift")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "i1_7100lc")
+
+(define_insn_reservation "Y9" 1
+  (and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
+       (eq_attr "cpu" "7100LC,7200,7300"))
+  "(i0_7100lc|i1_7100lc)")
+
+;; The 7200 has a store-load penalty
+(define_insn_reservation "Y10" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y11" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc")
+
+(define_insn_reservation "Y12" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc,i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y13" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7200"))
+  "i1_7100lc,mem_7100lc,i1_7100lc+mem_7100lc")
+
+;; The 7300 has no penalty for store-store or store-load
+(define_insn_reservation "Y14" 2
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc")
+
+(define_insn_reservation "Y15" 2
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc")
+
+(define_insn_reservation "Y16" 4
+  (and (eq_attr "type" "fpstore_load")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc,i1_7100lc+mem_7100lc")
+
+(define_insn_reservation "Y17" 4
+  (and (eq_attr "type" "store_fpload")
+       (eq_attr "cpu" "7300"))
+  "i1_7100lc,i1_7100lc+mem_7100lc")
+
+;; We have an "anti-bypass" for FP loads which feed an FP store.
+(define_bypass 3 "Y3,Y7,Y13,Y17" "Y5,Y6,Y11,Y12,Y15,Y16" "hppa_fpstore_bypass_p")
+
+;; Scheduling for the PA8000 is somewhat different than scheduling for a
+;; traditional architecture.
+;;
+;; The PA8000 has a large (56) entry reorder buffer that is split between
+;; memory and non-memory operations.
+;;
+;; The PA8000 can issue two memory and two non-memory operations per cycle to
+;; the function units, with the exception of branches and multi-output
+;; instructions.  The PA8000 can retire two non-memory operations per cycle
+;; and two memory operations per cycle, only one of which may be a store.
+;;
+;; Given the large reorder buffer, the processor can hide most latencies.
+;; According to HP, they've got the best results by scheduling for retirement
+;; bandwidth with limited latency scheduling for floating point operations.
+;; Latency for integer operations and memory references is ignored.
+;;
+;;
+;; We claim floating point operations have a 2 cycle latency and are
+;; fully pipelined, except for div and sqrt which are not pipelined and
+;; take from 17 to 31 cycles to complete.
+;;
+;; It's worth noting that there is no way to saturate all the functional
+;; units on the PA8000 as there is not enough issue bandwidth.
+
+(define_automaton "pa8000")
+(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")
+(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")
+(define_cpu_unit "store_8000" "pa8000")
+(define_cpu_unit "f0_8000, f1_8000" "pa8000")
+(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")
+(define_reservation "inm_8000" "inm0_8000 | inm1_8000")
+(define_reservation "im_8000" "im0_8000 | im1_8000")
+(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")
+(define_reservation "rm_8000" "rm0_8000 | rm1_8000")
+(define_reservation "f_8000" "f0_8000 | f1_8000")
+(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000")
+
+;; We can issue any two memops per cycle, but we can only retire
+;; one memory store per cycle.  We assume that the reorder buffer
+;; will hide any memory latencies per HP's recommendation.
+(define_insn_reservation "Z0" 0
+  (and
+    (eq_attr "type" "load,fpload")
+    (eq_attr "cpu" "8000"))
+  "im_8000,rm_8000")
+
+(define_insn_reservation "Z1" 0
+  (and
+    (eq_attr "type" "store,fpstore")
+    (eq_attr "cpu" "8000"))
+  "im_8000,rm_8000+store_8000")
+
+(define_insn_reservation "Z2" 0
+  (and (eq_attr "type" "fpstore_load,store_fpload")
+       (eq_attr "cpu" "8000"))
+  "im_8000,rm_8000+store_8000,im_8000,rm_8000")
+
+;; We can issue and retire two non-memory operations per cycle with
+;; a few exceptions (branches).  This group catches those we want
+;; to assume have zero latency.
+(define_insn_reservation "Z3" 0
+  (and
+    (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl,fpstore_load,store_fpload")
+    (eq_attr "cpu" "8000"))
+  "inm_8000,rnm_8000")
+
+;; Branches use both slots in the non-memory issue and
+;; retirement unit.
+(define_insn_reservation "Z4" 0
+  (and
+    (eq_attr "type" "uncond_branch,btable_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+    (eq_attr "cpu" "8000"))
+  "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
+
+;; We partial latency schedule the floating point units.
+;; They can issue/retire two at a time in the non-memory
+;; units.  We fix their latency at 2 cycles and they
+;; are fully pipelined.
+(define_insn_reservation "Z5" 1
+ (and
+   (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
+   (eq_attr "cpu" "8000"))
+ "inm_8000,f_8000,rnm_8000")
+
+;; The fdivsqrt units are not pipelined and have a very long latency.  
+;; To keep the DFA from exploding, we do not show all the
+;; reservations for the divsqrt unit.
+(define_insn_reservation "Z6" 17
+ (and
+   (eq_attr "type" "fpdivsgl,fpsqrtsgl")
+   (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+(define_insn_reservation "Z7" 31
+ (and
+   (eq_attr "type" "fpdivdbl,fpsqrtdbl")
+   (eq_attr "cpu" "8000"))
+ "inm_8000,fdivsqrt_8000*6,rnm_8000")
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Compare instructions.
+;; This controls RTL generation and register allocation.
+
+(define_insn ""
+  [(set (reg:CCFP 0)
+	(match_operator:CCFP 2 "comparison_operator"
+			     [(match_operand:SF 0 "reg_or_0_operand" "fG")
+			      (match_operand:SF 1 "reg_or_0_operand" "fG")]))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,sgl,%Y2 %f0,%f1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+(define_insn ""
+  [(set (reg:CCFP 0)
+	(match_operator:CCFP 2 "comparison_operator"
+			     [(match_operand:DF 0 "reg_or_0_operand" "fG")
+			      (match_operand:DF 1 "reg_or_0_operand" "fG")]))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,dbl,%Y2 %f0,%f1"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+;; Provide a means to emit the movccfp0 and movccfp1 optimization
+;; placeholders.  This is necessary in rare situations when a
+;; placeholder is re-emitted (see PR 8705).
+
+(define_expand "movccfp"
+  [(set (reg:CCFP 0)
+	(match_operand 0 "const_int_operand" ""))]
+  "! TARGET_SOFT_FLOAT"
+  "
+{
+  if ((unsigned HOST_WIDE_INT) INTVAL (operands[0]) > 1)
+    FAIL;
+}")
+
+;; The following patterns are optimization placeholders.  In almost
+;; all cases, the user of the condition code will be simplified and the
+;; original condition code setting insn should be eliminated.
+
+(define_insn "*movccfp0"
+  [(set (reg:CCFP 0)
+	(const_int 0))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,dbl,= %%fr0,%%fr0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+(define_insn "*movccfp1"
+  [(set (reg:CCFP 0)
+	(const_int 1))]
+  "! TARGET_SOFT_FLOAT"
+  "fcmp,dbl,!= %%fr0,%%fr0"
+  [(set_attr "length" "4")
+   (set_attr "type" "fpcc")])
+
+;; scc insns.
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:SI 2 "reg_or_0_operand" "")
+	  (match_operand:SI 3 "arith5_operand" "")]))]
+  "!TARGET_64BIT"
+  "")
+
+;; Instruction canonicalization puts immediate operands second, which
+;; is the reverse of what we want.
+
+(define_insn "scc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 3 "comparison_operator"
+			   [(match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "arith11_operand" "rI")]))]
+  ""
+  "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 3 "comparison_operator"
+			   [(match_operand:DI 1 "register_operand" "r")
+			    (match_operand:DI 2 "arith11_operand" "rI")]))]
+  "TARGET_64BIT"
+  "cmp%I2clr,*%B3 %2,%1,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn "iorscc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operator:SI 3 "comparison_operator"
+				   [(match_operand:SI 1 "register_operand" "r")
+				    (match_operand:SI 2 "arith11_operand" "rI")])
+		(match_operator:SI 6 "comparison_operator"
+				   [(match_operand:SI 4 "register_operand" "r")
+				    (match_operand:SI 5 "arith11_operand" "rI")])))]
+  ""
+  "{com%I2clr|cmp%I2clr},%S3 %2,%1,%%r0\;{com%I5clr|cmp%I5clr},%B6 %5,%4,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operator:DI 3 "comparison_operator"
+				   [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "arith11_operand" "rI")])
+		(match_operator:DI 6 "comparison_operator"
+				   [(match_operand:DI 4 "register_operand" "r")
+				    (match_operand:DI 5 "arith11_operand" "rI")])))]
+  "TARGET_64BIT"
+  "cmp%I2clr,*%S3 %2,%1,%%r0\;cmp%I5clr,*%B6 %5,%4,%0\;ldi 1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "12")])
+
+;; Combiner patterns for common operations performed with the output
+;; from an scc insn (negscc and incscc).
+(define_insn "negscc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operator:SI 3 "comparison_operator"
+	       [(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "arith11_operand" "rI")])))]
+  ""
+  "{com%I2clr|cmp%I2clr},%B3 %2,%1,%0\;ldi -1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operator:DI 3 "comparison_operator"
+	       [(match_operand:DI 1 "register_operand" "r")
+		(match_operand:DI 2 "arith11_operand" "rI")])))]
+  "TARGET_64BIT"
+  "cmp%I2clr,*%B3 %2,%1,%0\;ldi -1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+;; Patterns for adding/subtracting the result of a boolean expression from
+;; a register.  First we have special patterns that make use of the carry
+;; bit, and output only two instructions.  For the cases we can't in
+;; general do in two instructions, the incscc pattern at the end outputs
+;; two or three instructions.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (leu:SI (match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 3 "arith11_operand" "rI"))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub%I3 %3,%2,%%r0\;{addc|add,c} %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (leu:DI (match_operand:DI 2 "register_operand" "r")
+			 (match_operand:DI 3 "arith11_operand" "rI"))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub%I3 %3,%2,%%r0\;add,dc %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; This need only accept registers for op3, since canonicalization
+; replaces geu with gtu when op3 is an integer.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (geu:SI (match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 3 "register_operand" "r"))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub %2,%3,%%r0\;{addc|add,c} %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (geu:DI (match_operand:DI 2 "register_operand" "r")
+			 (match_operand:DI 3 "register_operand" "r"))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub %2,%3,%%r0\;add,dc %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; Match only integers for op3 here.  This is used as canonical form of the
+; geu pattern when op3 is an integer.  Don't match registers since we can't
+; make better code than the general incscc pattern.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (gtu:SI (match_operand:SI 2 "register_operand" "r")
+			 (match_operand:SI 3 "int11_operand" "I"))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "addi %k3,%2,%%r0\;{addc|add,c} %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (gtu:DI (match_operand:DI 2 "register_operand" "r")
+			 (match_operand:DI 3 "int11_operand" "I"))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "addi %k3,%2,%%r0\;add,dc %%r0,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn "incscc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+ 	(plus:SI (match_operator:SI 4 "comparison_operator"
+		    [(match_operand:SI 2 "register_operand" "r,r")
+		     (match_operand:SI 3 "arith11_operand" "rI,rI")])
+		 (match_operand:SI 1 "register_operand" "0,?r")))]
+  ""
+  "@
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi 1,%0,%0
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi,tr 1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+ 	(plus:DI (match_operator:DI 4 "comparison_operator"
+		    [(match_operand:DI 2 "register_operand" "r,r")
+		     (match_operand:DI 3 "arith11_operand" "rI,rI")])
+		 (match_operand:DI 1 "register_operand" "0,?r")))]
+  "TARGET_64BIT"
+  "@
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi 1,%0,%0
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi,tr 1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (gtu:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "arith11_operand" "rI"))))]
+  ""
+  "sub%I3 %3,%2,%%r0\;{subb|sub,b} %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (gtu:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "arith11_operand" "rI"))))]
+  "TARGET_64BIT"
+  "sub%I3 %3,%2,%%r0\;sub,db %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (gtu:SI (match_operand:SI 2 "register_operand" "r")
+				    (match_operand:SI 3 "arith11_operand" "rI")))
+		  (match_operand:SI 4 "register_operand" "r")))]
+  ""
+  "sub%I3 %3,%2,%%r0\;{subb|sub,b} %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r")
+			    (gtu:DI (match_operand:DI 2 "register_operand" "r")
+				    (match_operand:DI 3 "arith11_operand" "rI")))
+		  (match_operand:DI 4 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub%I3 %3,%2,%%r0\;sub,db %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; This need only accept registers for op3, since canonicalization
+; replaces ltu with leu when op3 is an integer.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (ltu:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "sub %2,%3,%%r0\;{subb|sub,b} %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (ltu:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "register_operand" "r"))))]
+  "TARGET_64BIT"
+  "sub %2,%3,%%r0\;sub,db %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (ltu:SI (match_operand:SI 2 "register_operand" "r")
+				    (match_operand:SI 3 "register_operand" "r")))
+		  (match_operand:SI 4 "register_operand" "r")))]
+  ""
+  "sub %2,%3,%%r0\;{subb|sub,b} %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r")
+			    (ltu:DI (match_operand:DI 2 "register_operand" "r")
+				    (match_operand:DI 3 "register_operand" "r")))
+		  (match_operand:DI 4 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub %2,%3,%%r0\;sub,db %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+; Match only integers for op3 here.  This is used as canonical form of the
+; ltu pattern when op3 is an integer.  Don't match registers since we can't
+; make better code than the general incscc pattern.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (leu:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "int11_operand" "I"))))]
+  ""
+  "addi %k3,%2,%%r0\;{subb|sub,b} %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (leu:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "int11_operand" "I"))))]
+  "TARGET_64BIT"
+  "addi %k3,%2,%%r0\;sub,db %1,%%r0,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+			    (leu:SI (match_operand:SI 2 "register_operand" "r")
+				    (match_operand:SI 3 "int11_operand" "I")))
+		  (match_operand:SI 4 "register_operand" "r")))]
+  ""
+  "addi %k3,%2,%%r0\;{subb|sub,b} %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (minus:DI (match_operand:DI 1 "register_operand" "r")
+			    (leu:DI (match_operand:DI 2 "register_operand" "r")
+				    (match_operand:DI 3 "int11_operand" "I")))
+		  (match_operand:DI 4 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "addi %k3,%2,%%r0\;sub,db %1,%4,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn "decscc"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,?r")
+		  (match_operator:SI 4 "comparison_operator"
+		     [(match_operand:SI 2 "register_operand" "r,r")
+		      (match_operand:SI 3 "arith11_operand" "rI,rI")])))]
+  ""
+  "@
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi -1,%0,%0
+   {com%I3clr|cmp%I3clr},%B4 %3,%2,%%r0\;addi,tr -1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand" "0,?r")
+		  (match_operator:DI 4 "comparison_operator"
+		     [(match_operand:DI 2 "register_operand" "r,r")
+		      (match_operand:DI 3 "arith11_operand" "rI,rI")])))]
+  "TARGET_64BIT"
+  "@
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi -1,%0,%0
+   cmp%I3clr,*%B4 %3,%2,%%r0\;addi,tr -1,%1,%0\;copy %1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "8,12")])
+
+; Patterns for max and min.  (There is no need for an earlyclobber in the
+; last alternative since the middle alternative will match if op0 == op1.)
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(smin:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		 (match_operand:SI 2 "arith11_operand" "r,I,M")))]
+  ""
+  "@
+  {comclr|cmpclr},> %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},> %2,%0,%%r0\;ldi %2,%0
+  {comclr|cmpclr},> %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "smindi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(smin:DI (match_operand:DI 1 "register_operand" "%0,0,r")
+		 (match_operand:DI 2 "arith11_operand" "r,I,M")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*> %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*> %2,%0,%%r0\;ldi %2,%0
+  cmpclr,*> %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "uminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(umin:SI (match_operand:SI 1 "register_operand" "%0,0")
+		 (match_operand:SI 2 "arith11_operand" "r,I")))]
+  ""
+  "@
+  {comclr|cmpclr},>> %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},>> %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "umindi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(umin:DI (match_operand:DI 1 "register_operand" "%0,0")
+		 (match_operand:DI 2 "arith11_operand" "r,I")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*>> %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*>> %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(smax:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		 (match_operand:SI 2 "arith11_operand" "r,I,M")))]
+  ""
+  "@
+  {comclr|cmpclr},< %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},< %2,%0,%%r0\;ldi %2,%0
+  {comclr|cmpclr},< %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "smaxdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(smax:DI (match_operand:DI 1 "register_operand" "%0,0,r")
+		 (match_operand:DI 2 "arith11_operand" "r,I,M")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*< %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*< %2,%0,%%r0\;ldi %2,%0
+  cmpclr,*< %1,%r2,%0\;copy %1,%0"
+[(set_attr "type" "multi,multi,multi")
+ (set_attr "length" "8,8,8")])
+
+(define_insn "umaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(umax:SI (match_operand:SI 1 "register_operand" "%0,0")
+		 (match_operand:SI 2 "arith11_operand" "r,I")))]
+  ""
+  "@
+  {comclr|cmpclr},<< %2,%0,%%r0\;copy %2,%0
+  {comiclr|cmpiclr},<< %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "umaxdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(umax:DI (match_operand:DI 1 "register_operand" "%0,0")
+		 (match_operand:DI 2 "arith11_operand" "r,I")))]
+  "TARGET_64BIT"
+  "@
+  cmpclr,*<< %2,%0,%%r0\;copy %2,%0
+  cmpiclr,*<< %2,%0,%%r0\;ldi %2,%0"
+[(set_attr "type" "multi,multi")
+ (set_attr "length" "8,8")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "or,>= %%r0,%1,%0\;subi 0,%0,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "absdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(abs:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "or,*>= %%r0,%1,%0\;subi 0,%0,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;;; Experimental conditional move patterns
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:SI 2 "reg_or_cint_move_operand" "")
+	 (match_operand:SI 3 "reg_or_cint_move_operand" "")))]
+  ""
+  "
+{
+  if (GET_MODE (XEXP (operands[1], 0)) != SImode
+      || GET_MODE (XEXP (operands[1], 0)) != GET_MODE (XEXP (operands[1], 1)))
+    FAIL;
+}")
+
+;; We used to accept any register for op1.
+;;
+;; However, it loses sometimes because the compiler will end up using
+;; different registers for op0 and op1 in some critical cases.  local-alloc
+;; will  not tie op0 and op1 because op0 is used in multiple basic blocks.
+;;
+;; If/when global register allocation supports tying we should allow any
+;; register for op1 again.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 2 "comparison_operator"
+	    [(match_operand:SI 3 "register_operand" "r,r,r,r")
+	     (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI")])
+	 (match_operand:SI 1 "reg_or_cint_move_operand" "0,J,N,K")
+	 (const_int 0)))]
+  ""
+  "@
+   {com%I4clr|cmp%I4clr},%S2 %4,%3,%%r0\;ldi 0,%0
+   {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;ldi %1,%0
+   {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;ldil L'%1,%0
+   {com%I4clr|cmp%I4clr},%B2 %4,%3,%0\;{zdepi|depwi,z} %Z1,%0"
+  [(set_attr "type" "multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:SI
+	 (match_operator 5 "comparison_operator"
+	    [(match_operand:SI 3 "register_operand" "r,r,r,r,r,r,r,r")
+	     (match_operand:SI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")])
+	 (match_operand:SI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K")
+	 (match_operand:SI 2 "reg_or_cint_move_operand" "r,J,N,K,0,0,0,0")))]
+  ""
+  "@
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;copy %2,%0
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;ldi %2,%0
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;ldil L'%2,%0
+   {com%I4clr|cmp%I4clr},%S5 %4,%3,%%r0\;{zdepi|depwi,z} %Z2,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;copy %1,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;ldi %1,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;ldil L'%1,%0
+   {com%I4clr|cmp%I4clr},%B5 %4,%3,%%r0\;{zdepi|depwi,z} %Z1,%0"
+  [(set_attr "type" "multi,multi,multi,nullshift,multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8,8,8,8,8")])
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI
+	 (match_operand 1 "comparison_operator" "")
+	 (match_operand:DI 2 "reg_or_cint_move_operand" "")
+	 (match_operand:DI 3 "reg_or_cint_move_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  if (GET_MODE (XEXP (operands[1], 0)) != DImode
+      || GET_MODE (XEXP (operands[1], 0)) != GET_MODE (XEXP (operands[1], 1)))
+    FAIL;
+}")
+
+; We need the first constraint alternative in order to avoid
+; earlyclobbers on all other alternatives.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 2 "comparison_operator"
+	    [(match_operand:DI 3 "register_operand" "r,r,r,r,r")
+	     (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI")])
+	 (match_operand:DI 1 "reg_or_cint_move_operand" "0,r,J,N,K")
+	 (const_int 0)))]
+  "TARGET_64BIT"
+  "@
+   cmp%I4clr,*%S2 %4,%3,%%r0\;ldi 0,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;copy %1,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;ldi %1,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;ldil L'%1,%0
+   cmp%I4clr,*%B2 %4,%3,%0\;depdi,z %z1,%0"
+  [(set_attr "type" "multi,multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8,8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r,r")
+	(if_then_else:DI
+	 (match_operator 5 "comparison_operator"
+	    [(match_operand:DI 3 "register_operand" "r,r,r,r,r,r,r,r")
+	     (match_operand:DI 4 "arith11_operand" "rI,rI,rI,rI,rI,rI,rI,rI")])
+	 (match_operand:DI 1 "reg_or_cint_move_operand" "0,0,0,0,r,J,N,K")
+	 (match_operand:DI 2 "reg_or_cint_move_operand" "r,J,N,K,0,0,0,0")))]
+  "TARGET_64BIT"
+  "@
+   cmp%I4clr,*%S5 %4,%3,%%r0\;copy %2,%0
+   cmp%I4clr,*%S5 %4,%3,%%r0\;ldi %2,%0
+   cmp%I4clr,*%S5 %4,%3,%%r0\;ldil L'%2,%0
+   cmp%I4clr,*%S5 %4,%3,%%r0\;depdi,z %z2,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;copy %1,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;ldi %1,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;ldil L'%1,%0
+   cmp%I4clr,*%B5 %4,%3,%%r0\;depdi,z %z1,%0"
+  [(set_attr "type" "multi,multi,multi,nullshift,multi,multi,multi,nullshift")
+   (set_attr "length" "8,8,8,8,8,8,8,8")])
+
+;; Conditional Branches
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:DI 1 "reg_or_0_operand" "")
+                        (match_operand:DI 2 "register_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_64BIT"
+  "")
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(match_operand:SI 1 "reg_or_0_operand" "")
+                        (match_operand:SI 2 "arith5_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:SF 1 "reg_or_0_operand" "")
+                        (match_operand:SF 2 "reg_or_0_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  emit_bcond_fp (operands);
+  DONE;
+}")
+
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+		       [(match_operand:DF 1 "reg_or_0_operand" "")
+                        (match_operand:DF 2 "reg_or_0_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  emit_bcond_fp (operands);
+  DONE;
+}")
+
+;; Match the branch patterns.
+
+
+;; Note a long backward conditional branch with an annulled delay slot
+;; has a length of 12.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:SI 1 "reg_or_0_operand" "rM")
+			  (match_operand:SI 2 "arith5_operand" "rL")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return output_cbranch (operands, 0, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Match the negated branch.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:SI 1 "reg_or_0_operand" "rM")
+			  (match_operand:SI 2 "arith5_operand" "rL")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  return output_cbranch (operands, 1, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "reg_or_0_operand" "rM")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_cbranch (operands, 0, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Match the negated branch.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "reg_or_0_operand" "rM")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_cbranch (operands, 1, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "cmpib_comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "arith5_operand" "rL")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_cbranch (operands, 0, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Match the negated branch.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (match_operator 3 "cmpib_comparison_operator"
+			 [(match_operand:DI 1 "reg_or_0_operand" "rM")
+			  (match_operand:DI 2 "arith5_operand" "rL")])
+	 (pc)
+	 (label_ref (match_operand 0 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_cbranch (operands, 1, insn);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Branch on Bit patterns.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return output_bb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return output_bb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return output_bb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "uint5_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return output_bb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "uint32_operand" ""))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Branch on Variable Bit patterns.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return output_bvb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bvb (operands, 0, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return output_bvb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bvb (operands, 1, insn, 0);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "*
+{
+  return output_bvb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bvb (operands, 0, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:SI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  ""
+  "*
+{
+  return output_bvb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			      (const_int 1)
+			      (match_operand:DI 1 "register_operand" "q"))
+	     (const_int 0))
+	 (pc)
+	 (label_ref (match_operand 2 "" ""))))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_bvb (operands, 1, insn, 1);
+}"
+[(set_attr "type" "cbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+;; Floating point branches
+
+;; ??? Nullification is handled differently from other branches.
+;; If nullification is specified, the delay slot is nullified on any
+;; taken branch regardless of branch direction.
+(define_insn ""
+  [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  int length = get_attr_length (insn);
+  rtx xoperands[1];
+  int nullify, xdelay;
+
+  if (length < 16)
+    return \"ftest\;b%* %l0\";
+
+  if (dbr_sequence_length () == 0 || INSN_ANNULLED_BRANCH_P (insn))
+    {
+      nullify = 1;
+      xdelay = 0;
+      xoperands[0] = GEN_INT (length - 8);
+    }
+  else
+    {
+      nullify = 0;
+      xdelay = 1;
+      xoperands[0] = GEN_INT (length - 4);
+    }
+
+  if (nullify)
+    output_asm_insn (\"ftest\;add,tr %%r0,%%r0,%%r0\;b,n .+%0\", xoperands);
+  else
+    output_asm_insn (\"ftest\;add,tr %%r0,%%r0,%%r0\;b .+%0\", xoperands);
+  return output_lbranch (operands[0], insn, xdelay);
+}"
+[(set_attr "type" "fbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 32)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 28)]
+	  (const_int 36)))])
+
+(define_insn ""
+  [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0))
+			   (pc)
+			   (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  int length = get_attr_length (insn);
+  rtx xoperands[1];
+  int nullify, xdelay;
+
+  if (length < 16)
+    return \"ftest\;add,tr %%r0,%%r0,%%r0\;b%* %0\";
+
+  if (dbr_sequence_length () == 0 || INSN_ANNULLED_BRANCH_P (insn))
+    {
+      nullify = 1;
+      xdelay = 0;
+      xoperands[0] = GEN_INT (length - 4);
+    }
+  else
+    {
+      nullify = 0;
+      xdelay = 1;
+      xoperands[0] = GEN_INT (length);
+    }
+
+  if (nullify)
+    output_asm_insn (\"ftest\;b,n .+%0\", xoperands);
+  else
+    output_asm_insn (\"ftest\;b .+%0\", xoperands);
+  return output_lbranch (operands[0], insn, xdelay);
+}"
+[(set_attr "type" "fbranch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 12)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 28)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 24)]
+	  (const_int 32)))])
+
+;; Move instructions
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, SImode, 0))
+    DONE;
+}")
+
+;; Handle SImode input reloads requiring %r1 as a scratch register.
+(define_expand "reload_insi_r1"
+  [(set (match_operand:SI 0 "register_operand" "=Z")
+	(match_operand:SI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, SImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_insi"
+  [(set (match_operand:SI 0 "register_operand" "=Z")
+	(match_operand:SI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, SImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outsi"
+  [(set (match_operand:SI 0 "non_hard_reg_operand" "")
+	(match_operand:SI 1  "register_operand" "Z"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, SImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T,?r,?*f")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f,*f,r"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && !TARGET_SOFT_FLOAT
+   && !TARGET_64BIT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,sgl %f1,%0
+   fldw%F1 %1,%0
+   fstw%F0 %1,%0
+   {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0
+   {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore,fpstore_load,store_fpload")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && !TARGET_SOFT_FLOAT
+   && TARGET_64BIT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,sgl %f1,%0
+   fldw%F1 %1,%0
+   fstw%F0 %1,%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "indexed_memory_operand" "=R")
+	(match_operand:SI 1 "register_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstw%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+; Rewrite RTL using an indexed store.  This will allow the insn that
+; computes the address to be deleted if the register it sets is dead.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:SI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SI (match_dup 0))
+        (match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SI (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r")
+	(match_operand:SI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q"))]
+  "(register_operand (operands[0], SImode)
+    || reg_or_0_operand (operands[1], SImode))
+   && TARGET_SOFT_FLOAT"
+  "@
+   ldw RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0"
+  [(set_attr "type" "load,move,move,move,move,load,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4")])
+
+;; Load or store with base-register modification.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "int5_operand" "L"))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldw,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+; And a zero extended variant.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:SI
+			  (plus:DI
+			    (match_operand:DI 1 "register_operand" "+r")
+			    (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldw,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_expand "pre_load"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+	      (mem (plus (match_operand 1 "register_operand" "")
+			       (match_operand 2 "pre_cint_operand" ""))))
+	      (set (match_dup 1)
+		   (plus (match_dup 1) (match_dup 2)))])]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      emit_insn (gen_pre_ldd (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  emit_insn (gen_pre_ldw (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "pre_ldw"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "+r")
+			 (match_operand:SI 2 "pre_cint_operand" ""))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "*
+{
+  if (INTVAL (operands[2]) < 0)
+    return \"{ldwm|ldw,mb} %2(%1),%0\";
+  return \"{ldws|ldw},mb %2(%1),%0\";
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "pre_ldd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mem:DI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "pre_cint_operand" ""))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldd,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "pre_cint_operand" "")))
+	(match_operand:SI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  "*
+{
+  if (INTVAL (operands[1]) < 0)
+    return \"{stwm|stw,mb} %r2,%1(%0)\";
+  return \"{stws|stw},mb %r2,%1(%0)\";
+}"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (match_operand:SI 1 "register_operand" "+r")))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1)
+		 (match_operand:SI 2 "post_cint_operand" "")))]
+  ""
+  "*
+{
+  if (INTVAL (operands[2]) > 0)
+    return \"{ldwm|ldw,ma} %2(%1),%0\";
+  return \"{ldws|ldw},ma %2(%1),%0\";
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_expand "post_store"
+  [(parallel [(set (mem (match_operand 0 "register_operand" ""))
+		   (match_operand 1 "reg_or_0_operand" ""))
+	      (set (match_dup 0)
+		   (plus (match_dup 0)
+			 (match_operand 2 "post_cint_operand" "")))])]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      emit_insn (gen_post_std (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  emit_insn (gen_post_stw (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "post_stw"
+  [(set (mem:SI (match_operand:SI 0 "register_operand" "+r"))
+	(match_operand:SI 1 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (match_operand:SI 2 "post_cint_operand" "")))]
+  ""
+  "*
+{
+  if (INTVAL (operands[2]) > 0)
+    return \"{stwm|stw,ma} %r1,%2(%0)\";
+  return \"{stws|stw},ma %r1,%2(%0)\";
+}"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "post_std"
+  [(set (mem:DI (match_operand:DI 0 "register_operand" "+r"))
+	(match_operand:DI 1 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:DI (match_dup 0)
+		 (match_operand:DI 2 "post_cint_operand" "")))]
+  "TARGET_64BIT"
+  "std,ma %r1,%2(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; For loading the address of a label while generating PIC code.
+;; Note since this pattern can be created at reload time (via movsi), all
+;; the same rules for movsi apply here.  (no new pseudos, no temporaries).
+(define_insn ""
+  [(set (match_operand 0 "pmode_register_operand" "=a")
+	(match_operand 1 "pic_label_operand" ""))]
+  "TARGET_PA_20"
+  "*
+{
+  rtx xoperands[3];
+
+  xoperands[0] = operands[0];
+  xoperands[1] = operands[1];
+  xoperands[2] = gen_label_rtx ();
+
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				     CODE_LABEL_NUMBER (xoperands[2]));
+  output_asm_insn (\"mfia %0\", xoperands);
+
+  /* If we're trying to load the address of a label that happens to be
+     close, then we can use a shorter sequence.  */
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && !LABEL_REF_NONLOCAL_P (operands[1])
+      && INSN_ADDRESSES_SET_P ()
+      && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
+	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
+    output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+  else
+    {
+      output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+      output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])		; 8 or 12
+
+(define_insn ""
+  [(set (match_operand 0 "pmode_register_operand" "=a")
+	(match_operand 1 "pic_label_operand" ""))]
+  "!TARGET_PA_20"
+  "*
+{
+  rtx xoperands[3];
+
+  xoperands[0] = operands[0];
+  xoperands[1] = operands[1];
+  xoperands[2] = gen_label_rtx ();
+
+  output_asm_insn (\"bl .+8,%0\", xoperands);
+  output_asm_insn (\"depi 0,31,2,%0\", xoperands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				     CODE_LABEL_NUMBER (xoperands[2]));
+
+  /* If we're trying to load the address of a label that happens to be
+     close, then we can use a shorter sequence.  */
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && !LABEL_REF_NONLOCAL_P (operands[1])
+      && INSN_ADDRESSES_SET_P ()
+      && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
+	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
+    output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+  else
+    {
+      output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+      output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])		; 12 or 16
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (high:SI (match_operand 2 "" ""))))]
+  "symbolic_operand (operands[2], Pmode)
+   && ! function_label_operand (operands[2], Pmode)
+   && flag_pic"
+  "addil LT'%G2,%1"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=a")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+	         (high:DI (match_operand 2 "" ""))))]
+  "symbolic_operand (operands[2], Pmode)
+   && ! function_label_operand (operands[2], Pmode)
+   && TARGET_64BIT
+   && flag_pic"
+  "addil LT'%G2,%1"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+;; Always use addil rather than ldil;add sequences.  This allows the
+;; HP linker to eliminate the dp relocation if the symbolic operand
+;; lives in the TEXT space.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(high:SI (match_operand 1 "" "")))]
+  "symbolic_operand (operands[1], Pmode)
+   && ! function_label_operand (operands[1], Pmode)
+   && ! read_only_operand (operands[1], Pmode)
+   && ! flag_pic"
+  "*
+{
+  if (TARGET_LONG_LOAD_STORE)
+    return \"addil NLR'%H1,%%r27\;ldo N'%H1(%%r1),%%r1\";
+  else
+    return \"addil LR'%H1,%%r27\";
+}"
+  [(set_attr "type" "binary")
+   (set (attr "length")
+      (if_then_else (eq (symbol_ref "TARGET_LONG_LOAD_STORE") (const_int 0))
+		    (const_int 4)
+		    (const_int 8)))])
+
+
+;; This is for use in the prologue/epilogue code.  We need it
+;; to add large constants to a stack pointer or frame pointer.
+;; Because of the additional %r1 pressure, we probably do not
+;; want to use this in general code, so make it available
+;; only after reload.
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=!a,*r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r")
+		 (high:SI (match_operand 2 "const_int_operand" ""))))]
+  "reload_completed"
+  "@
+   addil L'%G2,%1
+   ldil L'%G2,%0\;{addl|add,l} %0,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=!a,*r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r,r")
+		 (high:DI (match_operand 2 "const_int_operand" ""))))]
+  "reload_completed && TARGET_64BIT"
+  "@
+   addil L'%G2,%1
+   ldil L'%G2,%0\;{addl|add,l} %0,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand 1 "" "")))]
+  "(!flag_pic || !symbolic_operand (operands[1], Pmode))
+    && !is_function_label_plus_const (operands[1])"
+  "*
+{
+  if (symbolic_operand (operands[1], Pmode))
+    return \"ldil LR'%H1,%0\";
+  else
+    return \"ldil L'%G1,%0\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(high:DI (match_operand 1 "const_int_operand" "")))]
+  "TARGET_64BIT"
+  "ldil L'%G1,%0";
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "i")))]
+  "TARGET_64BIT"
+  "ldo R'%G2(%1),%0";
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")))]
+  "!is_function_label_plus_const (operands[2])"
+  "*
+{
+  gcc_assert (!flag_pic || !symbolic_operand (operands[2], Pmode));
+  
+  if (symbolic_operand (operands[2], Pmode))
+    return \"ldo RR'%G2(%1),%0\";
+  else
+    return \"ldo R'%G2(%1),%0\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+;; Now that a symbolic_address plus a constant is broken up early
+;; in the compilation phase (for better CSE) we need a special
+;; combiner pattern to load the symbolic address plus the constant
+;; in only 2 instructions. (For cases where the symbolic address
+;; was not a common subexpression.)
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "symbolic_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "! (flag_pic && pic_label_operand (operands[1], SImode))"
+  [(set (match_dup 2) (high:SI (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+;; hppa_legitimize_address goes to a great deal of trouble to
+;; create addresses which use indexing.  In some cases, this
+;; is a lose because there isn't any store instructions which
+;; allow indexed addresses (with integer register source).
+;;
+;; These define_splits try to turn a 3 insn store into
+;; a 2 insn store with some creative RTL rewriting.
+(define_split
+  [(set (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "")
+			       (match_operand:SI 1 "shadd_operand" ""))
+		   (plus:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))))
+	(match_operand:SI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+			       (match_dup 2)))
+   (set (mem:SI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (mem:HI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "")
+			       (match_operand:SI 1 "shadd_operand" ""))
+		   (plus:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))))
+	(match_operand:HI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+			       (match_dup 2)))
+   (set (mem:HI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))]
+  "")
+
+(define_split
+  [(set (mem:QI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "")
+			       (match_operand:SI 1 "shadd_operand" ""))
+		   (plus:SI (match_operand:SI 2 "register_operand" "")
+			    (match_operand:SI 3 "const_int_operand" ""))))
+	(match_operand:QI 4 "register_operand" ""))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  ""
+  [(set (match_dup 5) (plus:SI (mult:SI (match_dup 0) (match_dup 1))
+			       (match_dup 2)))
+   (set (mem:QI (plus:SI (match_dup 5) (match_dup 3))) (match_dup 4))]
+  "")
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, HImode, 0))
+    DONE;
+}")
+
+;; Handle HImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_inhi"
+  [(set (match_operand:HI 0 "register_operand" "=Z")
+	(match_operand:HI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:HI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, HImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle HImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outhi"
+  [(set (match_operand:HI 0 "non_hard_reg_operand" "")
+	(match_operand:HI 1  "register_operand" "Z"))
+   (clobber (match_operand:HI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, HImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "move_dest_operand"
+	 		  "=r,r,r,r,r,Q,!*q,!r")
+	(match_operand:HI 1 "move_src_operand"
+			  "r,J,N,K,RQ,rM,!rM,!*q"))]
+  "(register_operand (operands[0], HImode)
+    || reg_or_0_operand (operands[1], HImode))"
+  "@
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldh%M1 %1,%0
+   sth%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %sar,%0"
+  [(set_attr "type" "move,move,move,shift,load,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "+r")
+			 (match_operand:SI 2 "int5_operand" "L"))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldhs|ldh},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "int5_operand" "L"))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldh,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+; And a zero extended variant.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:HI
+			  (plus:DI
+			    (match_operand:DI 1 "register_operand" "+r")
+			    (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldh,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI
+			  (plus:SI
+			    (match_operand:SI 1 "register_operand" "+r")
+			    (match_operand:SI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldhs|ldh},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI
+			  (plus:DI
+			    (match_operand:DI 1 "register_operand" "+r")
+			    (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1)
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldh,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "int5_operand" "L")))
+	(match_operand:HI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  "{sths|sth},mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:HI (plus:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "int5_operand" "L")))
+	(match_operand:HI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:DI (match_dup 0) (match_dup 1)))]
+  "TARGET_64BIT"
+  "sth,mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%r,r")
+		 (match_operand:HI 2 "arith_operand" "r,J")))]
+  ""
+  "@
+   {addl|add,l} %1,%2,%0
+   ldo %2(%1),%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, QImode, 0))
+    DONE;
+}")
+
+;; Handle QImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_inqi"
+  [(set (match_operand:QI 0 "register_operand" "=Z")
+	(match_operand:QI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, QImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle QImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outqi"
+  [(set (match_operand:QI 0 "non_hard_reg_operand" "")
+	(match_operand:QI 1  "register_operand" "Z"))
+   (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, QImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:QI 0 "move_dest_operand"
+			  "=r,r,r,r,r,Q,!*q,!r")
+	(match_operand:QI 1 "move_src_operand"
+			  "r,J,N,K,RQ,rM,!rM,!*q"))]
+  "(register_operand (operands[0], QImode)
+    || reg_or_0_operand (operands[1], QImode))"
+  "@
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   {zdepi|depwi,z} %Z1,%0
+   ldb%M1 %1,%0
+   stb%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0"
+  [(set_attr "type" "move,move,move,shift,load,store,move,move")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "+r")
+			 (match_operand:SI 2 "int5_operand" "L"))))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldbs|ldb},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:DI (match_operand:DI 1 "register_operand" "+r")
+			 (match_operand:DI 2 "int5_operand" "L"))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+; Now the same thing with zero extensions.
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:QI (plus:DI
+				  (match_operand:DI 1 "register_operand" "+r")
+				  (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:SI
+				  (match_operand:SI 1 "register_operand" "+r")
+				  (match_operand:SI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldbs|ldb},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:DI
+				  (match_operand:DI 1 "register_operand" "+r")
+				  (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:SI
+				  (match_operand:SI 1 "register_operand" "+r")
+				  (match_operand:SI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "{ldbs|ldb},mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:DI
+				  (match_operand:DI 1 "register_operand" "+r")
+				  (match_operand:DI 2 "int5_operand" "L")))))
+   (set (match_dup 1) (plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "ldb,mb %2(%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:QI (plus:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "int5_operand" "L")))
+	(match_operand:QI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  "{stbs|stb},mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (mem:QI (plus:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "int5_operand" "L")))
+	(match_operand:QI 2 "reg_or_0_operand" "rM"))
+   (set (match_dup 0)
+	(plus:DI (match_dup 0) (match_dup 1)))]
+  "TARGET_64BIT"
+  "stb,mb %r2,%1(%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; The definition of this insn does not really explain what it does,
+;; but it should suffice that anything generated as this insn will be
+;; recognized as a movmemsi operation, and that it will not successfully
+;; combine with anything.
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))
+	      (clobber (match_dup 7))
+	      (clobber (match_dup 8))
+	      (use (match_operand:SI 2 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* HP provides very fast block move library routine for the PA;
+     this routine includes:
+
+	4x4 byte at a time block moves,
+	1x4 byte at a time with alignment checked at runtime with
+	    attempts to align the source and destination as needed
+	1x1 byte loop
+
+     With that in mind, here's the heuristics to try and guess when
+     the inlined block move will be better than the library block
+     move:
+
+	If the size isn't constant, then always use the library routines.
+
+	If the size is large in respect to the known alignment, then use
+	the library routines.
+
+	If the size is small in respect to the known alignment, then open
+	code the copy (since that will lead to better scheduling).
+
+        Else use the block move pattern.   */
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[2]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+  align = align > 4 ? 4 : (align ? align : 1);
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block move pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
+  operands[1]
+    = replace_equiv_address (operands[1],
+			     copy_to_mode_reg (SImode, XEXP (operands[1], 0)));
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = gen_reg_rtx (SImode);
+  operands[8] = gen_reg_rtx (SImode);
+}")
+
+;; The operand constraints are written like this to support both compile-time
+;; and run-time determined byte counts.  The expander and output_block_move
+;; only support compile-time determined counts at this time.
+;;
+;; If the count is run-time determined, the register with the byte count
+;; is clobbered by the copying code, and therefore it is forced to operand 2.
+;;
+;; We used to clobber operands 0 and 1.  However, a change to regrename.c
+;; broke this semantic for pseudo registers.  We can't use match_scratch
+;; as this requires two registers in the class R1_REGS when the MEMs for
+;; operands 0 and 1 are both equivalent to symbolic MEMs.  Thus, we are
+;; forced to internally copy operands 0 and 1 to operands 7 and 8,
+;; respectively.  We then split or peephole optimize after reload.
+(define_insn "movmemsi_prereload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+	(mem:BLK (match_operand:SI 1 "register_operand" "r,r")))
+   (clobber (match_operand:SI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:SI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:SI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_operand:SI 7 "register_operand" "=&r,&r"))	;item tmp3
+   (clobber (match_operand:SI 8 "register_operand" "=&r,&r"))	;item tmp4
+   (use (match_operand:SI 4 "arith_operand" "J,2"))	 ;byte count
+   (use (match_operand:SI 5 "const_int_operand" "n,n"))] ;alignment
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (match_operand:SI 3 "register_operand" ""))
+	      (clobber (match_operand:SI 6 "register_operand" ""))
+	      (clobber (match_operand:SI 7 "register_operand" ""))
+	      (clobber (match_operand:SI 8 "register_operand" ""))
+	      (use (match_operand:SI 4 "arith_operand" ""))
+	      (use (match_operand:SI 5 "const_int_operand" ""))])]
+  "!TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), SImode)"
+  [(set (match_dup 7) (match_dup 9))
+   (set (match_dup 8) (match_dup 10))
+   (parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  operands[9] = XEXP (operands[0], 0);
+  operands[10] = XEXP (operands[1], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[7]);
+  operands[1] = replace_equiv_address (operands[1], operands[8]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (match_operand:SI 3 "register_operand" ""))
+	      (clobber (match_operand:SI 6 "register_operand" ""))
+	      (clobber (match_operand:SI 7 "register_operand" ""))
+	      (clobber (match_operand:SI 8 "register_operand" ""))
+	      (use (match_operand:SI 4 "arith_operand" ""))
+	      (use (match_operand:SI 5 "const_int_operand" ""))])]
+  "!TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), SImode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[7] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[7], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[7]);
+    }
+
+  addr = XEXP (operands[1], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[8] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[8], addr));
+      operands[1] = replace_equiv_address (operands[1], operands[8]);
+    }
+}")
+
+(define_insn "movmemsi_postreload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r"))
+	(mem:BLK (match_operand:SI 1 "register_operand" "+r,r")))
+   (clobber (match_operand:SI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:SI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:SI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_dup 0))
+   (clobber (match_dup 1))
+   (use (match_operand:SI 4 "arith_operand" "J,2"))	 ;byte count
+   (use (match_operand:SI 5 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "!TARGET_64BIT && reload_completed"
+  "* return output_block_move (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+(define_expand "movmemdi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))
+	      (clobber (match_dup 7))
+	      (clobber (match_dup 8))
+	      (use (match_operand:DI 2 "arith_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* HP provides very fast block move library routine for the PA;
+     this routine includes:
+
+	4x4 byte at a time block moves,
+	1x4 byte at a time with alignment checked at runtime with
+	    attempts to align the source and destination as needed
+	1x1 byte loop
+
+     With that in mind, here's the heuristics to try and guess when
+     the inlined block move will be better than the library block
+     move:
+
+	If the size isn't constant, then always use the library routines.
+
+	If the size is large in respect to the known alignment, then use
+	the library routines.
+
+	If the size is small in respect to the known alignment, then open
+	code the copy (since that will lead to better scheduling).
+
+        Else use the block move pattern.   */
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[2]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+  align = align > 8 ? 8 : (align ? align : 1);
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block move pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
+  operands[1]
+    = replace_equiv_address (operands[1],
+			     copy_to_mode_reg (DImode, XEXP (operands[1], 0)));
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+  operands[6] = gen_reg_rtx (DImode);
+  operands[7] = gen_reg_rtx (DImode);
+  operands[8] = gen_reg_rtx (DImode);
+}")
+
+;; The operand constraints are written like this to support both compile-time
+;; and run-time determined byte counts.  The expander and output_block_move
+;; only support compile-time determined counts at this time.
+;;
+;; If the count is run-time determined, the register with the byte count
+;; is clobbered by the copying code, and therefore it is forced to operand 2.
+;;
+;; We used to clobber operands 0 and 1.  However, a change to regrename.c
+;; broke this semantic for pseudo registers.  We can't use match_scratch
+;; as this requires two registers in the class R1_REGS when the MEMs for
+;; operands 0 and 1 are both equivalent to symbolic MEMs.  Thus, we are
+;; forced to internally copy operands 0 and 1 to operands 7 and 8,
+;; respectively.  We then split or peephole optimize after reload.
+(define_insn "movmemdi_prereload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+	(mem:BLK (match_operand:DI 1 "register_operand" "r,r")))
+   (clobber (match_operand:DI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:DI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:DI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_operand:DI 7 "register_operand" "=&r,&r"))	;item tmp3
+   (clobber (match_operand:DI 8 "register_operand" "=&r,&r"))	;item tmp4
+   (use (match_operand:DI 4 "arith_operand" "J,2"))	 ;byte count
+   (use (match_operand:DI 5 "const_int_operand" "n,n"))] ;alignment
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:DI 2 "register_operand" ""))
+	      (clobber (match_operand:DI 3 "register_operand" ""))
+	      (clobber (match_operand:DI 6 "register_operand" ""))
+	      (clobber (match_operand:DI 7 "register_operand" ""))
+	      (clobber (match_operand:DI 8 "register_operand" ""))
+	      (use (match_operand:DI 4 "arith_operand" ""))
+	      (use (match_operand:DI 5 "const_int_operand" ""))])]
+  "TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), DImode)"
+  [(set (match_dup 7) (match_dup 9))
+   (set (match_dup 8) (match_dup 10))
+   (parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  operands[9] = XEXP (operands[0], 0);
+  operands[10] = XEXP (operands[1], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[7]);
+  operands[1] = replace_equiv_address (operands[1], operands[8]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (match_operand:BLK 1 "memory_operand" ""))
+	      (clobber (match_operand:DI 2 "register_operand" ""))
+	      (clobber (match_operand:DI 3 "register_operand" ""))
+	      (clobber (match_operand:DI 6 "register_operand" ""))
+	      (clobber (match_operand:DI 7 "register_operand" ""))
+	      (clobber (match_operand:DI 8 "register_operand" ""))
+	      (use (match_operand:DI 4 "arith_operand" ""))
+	      (use (match_operand:DI 5 "const_int_operand" ""))])]
+  "TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)
+   && GET_CODE (operands[1]) == MEM
+   && register_operand (XEXP (operands[1], 0), DImode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+   	      (clobber (match_dup 2))
+   	      (clobber (match_dup 3))
+   	      (clobber (match_dup 6))
+   	      (clobber (match_dup 7))
+   	      (clobber (match_dup 8))
+   	      (use (match_dup 4))
+   	      (use (match_dup 5))
+	      (const_int 0)])]
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[7] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[7], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[7]);
+    }
+
+  addr = XEXP (operands[1], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[8] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[8], addr));
+      operands[1] = replace_equiv_address (operands[1], operands[8]);
+    }
+}")
+
+(define_insn "movmemdi_postreload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r"))
+	(mem:BLK (match_operand:DI 1 "register_operand" "+r,r")))
+   (clobber (match_operand:DI 2 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:DI 3 "register_operand" "=&r,&r"))	;item tmp1
+   (clobber (match_operand:DI 6 "register_operand" "=&r,&r"))	;item tmp2
+   (clobber (match_dup 0))
+   (clobber (match_dup 1))
+   (use (match_operand:DI 4 "arith_operand" "J,2"))	 ;byte count
+   (use (match_operand:DI 5 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "TARGET_64BIT && reload_completed"
+  "* return output_block_move (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+(define_expand "setmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (use (match_operand:SI 1 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[1]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[1]);
+  align = INTVAL (operands[3]);
+  align = align > 4 ? 4 : align;
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block clear pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (SImode, XEXP (operands[0], 0)));
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "clrmemsi_prereload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r"))
+	(const_int 0))
+   (clobber (match_operand:SI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:SI 4 "register_operand" "=&r,&r"))	;tmp1
+   (use (match_operand:SI 2 "arith_operand" "J,1"))	 ;byte count
+   (use (match_operand:SI 3 "const_int_operand" "n,n"))] ;alignment
+  "!TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:SI 1 "register_operand" ""))
+	      (clobber (match_operand:SI 4 "register_operand" ""))
+	      (use (match_operand:SI 2 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)"
+  [(set (match_dup 4) (match_dup 5))
+   (parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{
+  operands[5] = XEXP (operands[0], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[4]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:SI 1 "register_operand" ""))
+	      (clobber (match_operand:SI 4 "register_operand" ""))
+	      (use (match_operand:SI 2 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  "!TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), SImode)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[4] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[4], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[4]);
+    }
+}")
+
+(define_insn "clrmemsi_postreload"
+  [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r"))
+	(const_int 0))
+   (clobber (match_operand:SI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_dup 0))
+   (use (match_operand:SI 2 "arith_operand" "J,1"))	 ;byte count
+   (use (match_operand:SI 3 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "!TARGET_64BIT && reload_completed"
+  "* return output_block_clear (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+(define_expand "setmemdi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (use (match_operand:DI 1 "arith_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT && optimize > 0"
+  "
+{
+  int size, align;
+
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  /* Undetermined size, use the library routine.  */
+  if (GET_CODE (operands[1]) != CONST_INT)
+    FAIL;
+
+  size = INTVAL (operands[1]);
+  align = INTVAL (operands[3]);
+  align = align > 8 ? 8 : align;
+
+  /* If size/alignment is large, then use the library routines.  */
+  if (size / align > 16)
+    FAIL;
+
+  /* This does happen, but not often enough to worry much about.  */
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
+    FAIL;
+  
+  /* Fall through means we're going to use our block clear pattern.  */
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (DImode, XEXP (operands[0], 0)));
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "clrmemdi_prereload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r"))
+	(const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_operand:DI 4 "register_operand" "=&r,&r"))	;item tmp1
+   (use (match_operand:DI 2 "arith_operand" "J,1"))	 ;byte count
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))] ;alignment
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi,multi")])
+
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:DI 1 "register_operand" ""))
+	      (clobber (match_operand:DI 4 "register_operand" ""))
+	      (use (match_operand:DI 2 "arith_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT && reload_completed && !flag_peephole2
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)"
+  [(set (match_dup 4) (match_dup 5))
+   (parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{
+  operands[5] = XEXP (operands[0], 0);
+  operands[0] = replace_equiv_address (operands[0], operands[4]);
+}")
+
+(define_peephole2
+  [(parallel [(set (match_operand:BLK 0 "memory_operand" "")
+		   (const_int 0))
+	      (clobber (match_operand:DI 1 "register_operand" ""))
+	      (clobber (match_operand:DI 4 "register_operand" ""))
+	      (use (match_operand:DI 2 "arith_operand" ""))
+	      (use (match_operand:DI 3 "const_int_operand" ""))])]
+  "TARGET_64BIT
+   && GET_CODE (operands[0]) == MEM
+   && register_operand (XEXP (operands[0], 0), DImode)"
+  [(parallel [(set (match_dup 0) (const_int 0))
+   	      (clobber (match_dup 1))
+   	      (clobber (match_dup 4))
+   	      (use (match_dup 2))
+   	      (use (match_dup 3))
+	      (const_int 0)])]
+  "
+{  
+  rtx addr = XEXP (operands[0], 0);
+  if (dead_or_set_p (curr_insn, addr))
+    operands[4] = addr;
+  else
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[4], addr));
+      operands[0] = replace_equiv_address (operands[0], operands[4]);
+    }
+}")
+
+(define_insn "clrmemdi_postreload"
+  [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r"))
+	(const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=&r,&r"))	;loop cnt/tmp
+   (clobber (match_dup 0))
+   (use (match_operand:DI 2 "arith_operand" "J,1"))	 ;byte count
+   (use (match_operand:DI 3 "const_int_operand" "n,n"))  ;alignment
+   (const_int 0)]
+  "TARGET_64BIT && reload_completed"
+  "* return output_block_clear (operands, !which_alternative);"
+  [(set_attr "type" "multi,multi")])
+
+;; Floating point move insns
+
+;; This pattern forces (set (reg:DF ...) (const_double ...))
+;; to be reloaded by putting the constant into memory when
+;; reg is a floating point register.
+;;
+;; For integer registers we use ldil;ldo to set the appropriate
+;; value.
+;;
+;; This must come before the movdf pattern, and it must be present
+;; to handle obscure reloading cases.
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=?r,f")
+	(match_operand:DF 1 "" "?F,m"))]
+  "GET_CODE (operands[1]) == CONST_DOUBLE
+   && operands[1] != CONST0_RTX (DFmode)
+   && !TARGET_64BIT
+   && !TARGET_SOFT_FLOAT"
+  "* return (which_alternative == 0 ? output_move_double (operands)
+				    : \"fldd%F1 %1,%0\");"
+  [(set_attr "type" "move,fpload")
+   (set_attr "length" "16,4")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && operands[1] != CONST0_RTX (DFmode))
+    {
+      /* Reject CONST_DOUBLE loads to all hard registers when
+	 generating 64-bit code and to floating point registers
+	 when generating 32-bit code.  */
+      if (REG_P (operands[0])
+	  && HARD_REGISTER_P (operands[0])
+	  && (TARGET_64BIT || REGNO (operands[0]) >= 32))
+	FAIL;
+
+      if (TARGET_64BIT)
+	operands[1] = force_const_mem (DFmode, operands[1]);
+    }
+
+  if (emit_move_sequence (operands, DFmode, 0))
+    DONE;
+}")
+
+;; Handle DFmode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_indf"
+  [(set (match_operand:DF 0 "register_operand" "=Z")
+	(match_operand:DF 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:DF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DFmode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outdf" 
+ [(set (match_operand:DF 0 "non_hard_reg_operand" "")
+	(match_operand:DF 1  "register_operand" "Z"))
+   (clobber (match_operand:DF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, DFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:DF 0 "move_dest_operand"
+			  "=f,*r,Q,?o,?Q,f,*r,*r,?*r,?f")
+	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,*rG,f,*r,*r,RQ,o,RQ,f,*r"))]
+  "(register_operand (operands[0], DFmode)
+    || reg_or_0_operand (operands[1], DFmode))
+   && !(GET_CODE (operands[1]) == CONST_DOUBLE
+	&& GET_CODE (operands[0]) == MEM)
+   && !TARGET_64BIT
+   && !TARGET_SOFT_FLOAT"
+  "*
+{
+  if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1])
+       || operands[1] == CONST0_RTX (DFmode))
+      && !(REG_P (operands[0]) && REG_P (operands[1])
+	   && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])))
+    return output_fp_move_double (operands);
+  return output_move_double (operands);
+}"
+  [(set_attr "type" "fpalu,move,fpstore,store,store,fpload,load,load,fpstore_load,store_fpload")
+   (set_attr "length" "4,8,4,8,16,4,8,16,12,12")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "indexed_memory_operand" "=R")
+	(match_operand:DF 1 "reg_or_0_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstd%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8))
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 8))))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:SI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DF (match_dup 0))
+        (match_operand:DF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DF (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:DF 0 "move_dest_operand"
+			  "=r,?o,?Q,r,r")
+	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "rG,r,r,o,RQ"))]
+  "(register_operand (operands[0], DFmode)
+    || reg_or_0_operand (operands[1], DFmode))
+   && !TARGET_64BIT
+   && TARGET_SOFT_FLOAT"
+  "*
+{
+  return output_move_double (operands);
+}"
+  [(set_attr "type" "move,store,store,load,load")
+   (set_attr "length" "8,8,16,8,16")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "move_dest_operand"
+			  "=!*r,*r,*r,*r,*r,Q,f,f,T")
+	(match_operand:DF 1 "move_src_operand"
+			  "!*r,J,N,K,RQ,*rG,fG,RT,f"))]
+  "(register_operand (operands[0], DFmode)
+    || reg_or_0_operand (operands[1], DFmode))
+   && !TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "@
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   depdi,z %z1,%0
+   ldd%M1 %1,%0
+   std%M0 %r1,%0
+   fcpy,dbl %f1,%0
+   fldd%F1 %1,%0
+   fstd%F0 %1,%0"
+  [(set_attr "type" "move,move,move,shift,load,store,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4")])
+
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Except for zero, we don't support loading a CONST_INT directly
+     to a hard floating-point register since a scratch register is
+     needed for the operation.  While the operation could be handled
+     before register allocation, the simplest solution is to fail.  */
+  if (TARGET_64BIT
+      && GET_CODE (operands[1]) == CONST_INT
+      && operands[1] != CONST0_RTX (DImode)
+      && REG_P (operands[0])
+      && HARD_REGISTER_P (operands[0])
+      && REGNO (operands[0]) >= 32)
+    FAIL;
+
+  if (emit_move_sequence (operands, DImode, 0))
+    DONE;
+}")
+
+;; Handle DImode input reloads requiring %r1 as a scratch register.
+(define_expand "reload_indi_r1"
+  [(set (match_operand:DI 0 "register_operand" "=Z")
+	(match_operand:DI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, DImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DImode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_indi"
+  [(set (match_operand:DI 0 "register_operand" "=Z")
+	(match_operand:DI 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, DImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle DImode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outdi"
+  [(set (match_operand:DI 0 "non_hard_reg_operand" "")
+	(match_operand:DI 1 "register_operand" "Z"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, DImode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(high:DI (match_operand 1 "" "")))]
+  "!TARGET_64BIT"
+  "*
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  switch (GET_CODE (op1))
+    {
+    case CONST_INT:
+#if HOST_BITS_PER_WIDE_INT <= 32
+      operands[0] = operand_subword (op0, 1, 0, DImode);
+      output_asm_insn (\"ldil L'%1,%0\", operands);
+
+      operands[0] = operand_subword (op0, 0, 0, DImode);
+      if (INTVAL (op1) < 0)
+	output_asm_insn (\"ldi -1,%0\", operands);
+      else
+	output_asm_insn (\"ldi 0,%0\", operands);
+#else
+      operands[0] = operand_subword (op0, 1, 0, DImode);
+      operands[1] = GEN_INT (INTVAL (op1) & 0xffffffff);
+      output_asm_insn (\"ldil L'%1,%0\", operands);
+
+      operands[0] = operand_subword (op0, 0, 0, DImode);
+      operands[1] = GEN_INT (INTVAL (op1) >> 32);
+      output_asm_insn (singlemove_string (operands), operands);
+#endif
+      break;
+
+    case CONST_DOUBLE:
+      operands[0] = operand_subword (op0, 1, 0, DImode);
+      operands[1] = GEN_INT (CONST_DOUBLE_LOW (op1));
+      output_asm_insn (\"ldil L'%1,%0\", operands);
+
+      operands[0] = operand_subword (op0, 0, 0, DImode);
+      operands[1] = GEN_INT (CONST_DOUBLE_HIGH (op1));
+      output_asm_insn (singlemove_string (operands), operands);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return \"\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "move_dest_operand"
+			  "=r,o,Q,r,r,r,*f,*f,T,?r,?*f")
+	(match_operand:DI 1 "general_operand"
+			  "rM,r,r,o*R,Q,i,*fM,RT,*f,*f,r"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))
+   && !TARGET_64BIT
+   && !TARGET_SOFT_FLOAT"
+  "*
+{
+  if ((FP_REG_P (operands[0]) || FP_REG_P (operands[1])
+       || operands[1] == CONST0_RTX (DFmode))
+      && !(REG_P (operands[0]) && REG_P (operands[1])
+	   && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1])))
+    return output_fp_move_double (operands);
+  return output_move_double (operands);
+}"
+  [(set_attr "type"
+    "move,store,store,load,load,multi,fpalu,fpload,fpstore,fpstore_load,store_fpload")
+   (set_attr "length" "8,8,16,8,16,16,4,4,4,12,12")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "move_dest_operand"
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
+	(match_operand:DI 1 "move_src_operand"
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))
+   && !TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "@
+   ldd RT'%A1,%0
+   copy %1,%0
+   ldi %1,%0
+   ldil L'%1,%0
+   depdi,z %z1,%0
+   ldd%M1 %1,%0
+   std%M0 %r1,%0
+   mtsar %r1
+   {mfctl|mfctl,w} %%sar,%0
+   fcpy,dbl %f1,%0
+   fldd%F1 %1,%0
+   fstd%F0 %1,%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "indexed_memory_operand" "=R")
+	(match_operand:DI 1 "register_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && TARGET_64BIT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstd%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 8))))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 8))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:DI (match_dup 0))
+        (match_operand:DI 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:DI (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "move_dest_operand"
+			  "=r,o,Q,r,r,r")
+	(match_operand:DI 1 "general_operand"
+			  "rM,r,r,o,Q,i"))]
+  "(register_operand (operands[0], DImode)
+    || reg_or_0_operand (operands[1], DImode))
+   && !TARGET_64BIT
+   && TARGET_SOFT_FLOAT"
+  "*
+{
+  return output_move_double (operands);
+}"
+  [(set_attr "type" "move,store,store,load,load,multi")
+   (set_attr "length" "8,8,16,8,16,16")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "0,r")
+		   (match_operand:DI 2 "immediate_operand" "i,i")))]
+  "!TARGET_64BIT"
+  "*
+{
+  /* Don't output a 64-bit constant, since we can't trust the assembler to
+     handle it correctly.  */
+  if (GET_CODE (operands[2]) == CONST_DOUBLE)
+    operands[2] = GEN_INT (CONST_DOUBLE_LOW (operands[2]));
+  else if (HOST_BITS_PER_WIDE_INT > 32
+	   && GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffffffff);
+  if (which_alternative == 1)
+    output_asm_insn (\"copy %1,%0\", operands);
+  return \"ldo R'%G2(%R1),%R0\";
+}"
+  [(set_attr "type" "move,move")
+   (set_attr "length" "4,8")])
+
+;; This pattern forces (set (reg:SF ...) (const_double ...))
+;; to be reloaded by putting the constant into memory when
+;; reg is a floating point register.
+;;
+;; For integer registers we use ldil;ldo to set the appropriate
+;; value.
+;;
+;; This must come before the movsf pattern, and it must be present
+;; to handle obscure reloading cases.
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=?r,f")
+	(match_operand:SF 1 "" "?F,m"))]
+  "GET_CODE (operands[1]) == CONST_DOUBLE
+   && operands[1] != CONST0_RTX (SFmode)
+   && ! TARGET_SOFT_FLOAT"
+  "* return (which_alternative == 0 ? singlemove_string (operands)
+				    : \" fldw%F1 %1,%0\");"
+  [(set_attr "type" "move,fpload")
+   (set_attr "length" "8,4")])
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Reject CONST_DOUBLE loads to floating point registers.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && operands[1] != CONST0_RTX (SFmode)
+      && REG_P (operands[0])
+      && HARD_REGISTER_P (operands[0])
+      && REGNO (operands[0]) >= 32)
+    FAIL;
+
+  if (emit_move_sequence (operands, SFmode, 0))
+    DONE;
+}")
+
+;; Handle SFmode input reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_insf"
+  [(set (match_operand:SF 0 "register_operand" "=Z")
+	(match_operand:SF 1 "non_hard_reg_operand" ""))
+   (clobber (match_operand:SF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+;; Handle SFmode output reloads requiring a general register as a
+;; scratch register.
+(define_expand "reload_outsf"
+  [(set (match_operand:SF 0 "non_hard_reg_operand" "")
+	(match_operand:SF 1  "register_operand" "Z"))
+   (clobber (match_operand:SF 2 "register_operand" "=&r"))]
+  ""
+  "
+{
+  if (emit_move_sequence (operands, SFmode, operands[2]))
+    DONE;
+
+  /* We don't want the clobber emitted, so handle this ourselves.  */
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
+			  "=f,!*r,f,*r,Q,Q,?*r,?f")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,!*rG,RQ,RQ,f,*rG,f,*r"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && !TARGET_SOFT_FLOAT
+   && !TARGET_64BIT"
+  "@
+   fcpy,sgl %f1,%0
+   copy %r1,%0
+   fldw%F1 %1,%0
+   ldw%M1 %1,%0
+   fstw%F0 %1,%0
+   stw%M0 %r1,%0
+   {fstws|fstw} %1,-16(%%sp)\n\t{ldws|ldw} -16(%%sp),%0
+   {stws|stw} %1,-16(%%sp)\n\t{fldws|fldw} -16(%%sp),%0"
+  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store,fpstore_load,store_fpload")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4,8,8")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
+			  "=f,!*r,f,*r,Q,Q")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "fG,!*rG,RQ,RQ,f,*rG"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && !TARGET_SOFT_FLOAT
+   && TARGET_64BIT"
+  "@
+   fcpy,sgl %f1,%0
+   copy %r1,%0
+   fldw%F1 %1,%0
+   ldw%M1 %1,%0
+   fstw%F0 %1,%0
+   stw%M0 %r1,%0"
+  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4,4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "indexed_memory_operand" "=R")
+	(match_operand:SF 1 "register_operand" "f"))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && reload_completed"
+  "fstw%F0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 2 "register_operand" "")
+		 (mult:SI (match_operand:SI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 2 "register_operand" "")
+		 (mult:DI (match_operand:DI 1 "register_operand" "")
+			  (const_int 4))))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (const_int 4))
+			       (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:SI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_INDEX_P (operands[1])
+   && REG_OK_FOR_BASE_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (match_dup 1) (match_dup 2)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))
+   (set (mem:SF (match_dup 0))
+        (match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_SOFT_FLOAT
+   && !TARGET_DISABLE_INDEXING
+   && TARGET_64BIT
+   && TARGET_NO_SPACE_REGS
+   && REG_OK_FOR_BASE_P (operands[1])
+   && REG_OK_FOR_INDEX_P (operands[2])
+   && FP_REGNO_P (REGNO (operands[3]))"
+  [(set (mem:SF (plus:DI (match_dup 2) (match_dup 1)))
+	(match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 2) (match_dup 1)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "move_dest_operand"
+			  "=r,r,Q")
+	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+			  "rG,RQ,rG"))]
+  "(register_operand (operands[0], SFmode)
+    || reg_or_0_operand (operands[1], SFmode))
+   && TARGET_SOFT_FLOAT"
+  "@
+   copy %r1,%0
+   ldw%M1 %1,%0
+   stw%M0 %r1,%0"
+  [(set_attr "type" "move,load,store")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4,4")])
+
+
+
+;;- zero extension instructions
+;; We have define_expand for zero extension patterns to make sure the
+;; operands get loaded into registers.  The define_insns accept
+;; memory operands.  This gives us better overall code than just
+;; having a pattern that does or does not accept memory operands.
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI
+	 (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI
+	 (match_operand:QI 1 "move_src_operand" "r,RQ")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   {extru|extrw,u} %1,31,8,%0
+   ldb%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI
+	 (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI
+	 (match_operand:QI 1 "move_src_operand" "r,RQ")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   {extru|extrw,u} %1,31,8,%0
+   ldb%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI
+	 (match_operand:HI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI
+	 (match_operand:HI 1 "move_src_operand" "r,RQ")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   {extru|extrw,u} %1,31,16,%0
+   ldh%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	 (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	 (match_operand:QI 1 "move_src_operand" "r,RQ")))]
+  "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   extrd,u %1,63,8,%0
+   ldb%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	 (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	 (match_operand:HI 1 "move_src_operand" "r,RQ")))]
+  "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   extrd,u %1,63,16,%0
+   ldh%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	 (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_64BIT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	 (match_operand:SI 1 "move_src_operand" "r,RQ")))]
+  "TARGET_64BIT && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   extrd,u %1,63,32,%0
+   ldw%M1 %1,%0"
+  [(set_attr "type" "shift,load")
+   (set_attr "length" "4,4")])
+
+;;- sign extension instructions
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "{extrs|extrw,s} %1,31,16,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "{extrs|extrw,s} %1,31,8,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "{extrs|extrw,s} %1,31,8,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "extrd,s %1,63,8,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "extrd,s %1,63,16,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "extrd,s %1,63,32,%0"
+  [(set_attr "type" "shift") 
+  (set_attr "length" "4")])
+
+
+;; Conversions between float and double.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF
+	 (match_operand:SF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvff|fcnv},sgl,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:DF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvff|fcnv},dbl,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; Conversion between fixed point and floating point.
+;; Note that among the fix-to-float insns
+;; the ones that start with SImode come first.
+;; That is so that an operand that is a CONST_INT
+;; (and therefore lacks a specific machine mode).
+;; will be recognized as SImode (which is always valid)
+;; rather than as QImode or HImode.
+
+;; This pattern forces (set (reg:SF ...) (float:SF (const_int ...)))
+;; to be reloaded by putting the constant into memory.
+;; It must come before the more general floatsisf2 pattern.
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "const_int_operand" "m")))]
+  "! TARGET_SOFT_FLOAT"
+  "fldw%F1 %1,%0\;{fcnvxf,sgl,sgl|fcnv,w,sgl} %0,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvxf,sgl,sgl|fcnv,w,sgl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; This pattern forces (set (reg:DF ...) (float:DF (const_int ...)))
+;; to be reloaded by putting the constant into memory.
+;; It must come before the more general floatsidf2 pattern.
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "const_int_operand" "m")))]
+  "! TARGET_SOFT_FLOAT"
+  "fldw%F1 %1,%0\;{fcnvxf,sgl,dbl|fcnv,w,dbl} %0,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvxf,sgl,dbl|fcnv,w,dbl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_expand "floatunssisf2"
+  [(set (subreg:SI (match_dup 2) 4)
+	(match_operand:SI 1 "register_operand" ""))
+   (set (subreg:SI (match_dup 2) 0)
+	(const_int 0))
+   (set (match_operand:SF 0 "register_operand" "")
+	(float:SF (match_dup 2)))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "
+{
+  if (TARGET_PA_20)
+    {
+      emit_insn (gen_floatunssisf2_pa20 (operands[0], operands[1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (DImode);
+}")
+
+(define_expand "floatunssidf2"
+  [(set (subreg:SI (match_dup 2) 4)
+	(match_operand:SI 1 "register_operand" ""))
+   (set (subreg:SI (match_dup 2) 0)
+	(const_int 0))
+   (set (match_operand:DF 0 "register_operand" "")
+	(float:DF (match_dup 2)))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "
+{
+  if (TARGET_PA_20)
+    {
+      emit_insn (gen_floatunssidf2_pa20 (operands[0], operands[1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvxf,dbl,sgl|fcnv,dw,sgl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float:DF (match_operand:DI 1 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvxf,dbl,dbl|fcnv,dw,dbl} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; Convert a float to an actual integer.
+;; Truncation is performed as part of the conversion.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,sgl,sgl|fcnv,t,sgl,w} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,dbl,sgl|fcnv,t,dbl,w} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,sgl,dbl|fcnv,t,sgl,dw} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
+  "{fcnvfxt,dbl,dbl|fcnv,t,dbl,dw} %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunssidf2_pa20"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unsigned_float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,uw,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunssisf2_pa20"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,uw,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunsdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,udw,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(unsigned_float:DF (match_operand:DI 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,udw,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,sgl,uw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,dbl,uw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unsigned_fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,sgl,udw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unsigned_fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  "fcnv,t,dbl,udw %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;;- arithmetic instructions
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "adddi3_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r")
+		 (match_operand:DI 2 "arith11_operand" "rI")))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 0)
+	return \"addi %2,%R1,%R0\;{addc|add,c} %1,%%r0,%0\";
+      else
+	return \"addi %2,%R1,%R0\;{subb|sub,b} %1,%%r0,%0\";
+    }
+  else
+    return \"add %R2,%R1,%R0\;{addc|add,c} %2,%1,%0\";
+}"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,J")))]
+  "TARGET_64BIT"
+  "@
+   add,l %1,%2,%0
+   ldo %2(%1),%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "uaddcm %2,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "uaddcm %2,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "addvdi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (plus:DI (match_operand:DI 1 "reg_or_0_operand" "")
+			    (match_operand:DI 2 "arith11_operand" "")))
+	      (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+				    (sign_extend:TI (match_dup 2)))
+			   (sign_extend:TI (plus:DI (match_dup 1)
+						    (match_dup 2))))
+		       (const_int 0))])]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rM,rM")
+		 (match_operand:DI 2 "arith11_operand" "r,I")))
+   (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+			 (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (plus:DI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  "TARGET_64BIT"
+  "@
+  add,tsv,* %2,%1,%0
+  addi,tsv,* %2,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rM")
+		 (match_operand:DI 2 "arith11_operand" "rI")))
+   (trap_if (ne (plus:TI (sign_extend:TI (match_dup 1))
+			 (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (plus:DI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 0)
+	return \"addi %2,%R1,%R0\;{addco|add,c,tsv} %1,%%r0,%0\";
+      else
+	return \"addi %2,%R1,%R0\;{subbo|sub,b,tsv} %1,%%r0,%0\";
+    }
+  else
+    return \"add %R2,%R1,%R0\;{addco|add,c,tsv} %2,%1,%0\";
+}"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+;; define_splits to optimize cases of adding a constant integer
+;; to a register when the constant does not fit in 14 bits.  */
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (match_operand:SI 4 "register_operand" ""))]
+  "! cint_ok_for_move (INTVAL (operands[2]))
+   && VAL_14_BITS_P (INTVAL (operands[2]) >> 1)"
+  [(set (match_dup 4) (plus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 4) (match_dup 3)))]
+  "
+{
+  int val = INTVAL (operands[2]);
+  int low = (val < 0) ? -0x2000 : 0x1fff;
+  int rest = val - low;
+
+  operands[2] = GEN_INT (rest);
+  operands[3] = GEN_INT (low);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (match_operand:SI 4 "register_operand" ""))]
+  "! cint_ok_for_move (INTVAL (operands[2]))"
+  [(set (match_dup 4) (match_dup 2))
+   (set (match_dup 0) (plus:SI (mult:SI (match_dup 4) (match_dup 3))
+			       (match_dup 1)))]
+  "
+{
+  HOST_WIDE_INT intval = INTVAL (operands[2]);
+
+  /* Try dividing the constant by 2, then 4, and finally 8 to see
+     if we can get a constant which can be loaded into a register
+     in a single instruction (cint_ok_for_move). 
+
+     If that fails, try to negate the constant and subtract it
+     from our input operand.  */
+  if (intval % 2 == 0 && cint_ok_for_move (intval / 2))
+    {
+      operands[2] = GEN_INT (intval / 2);
+      operands[3] = const2_rtx;
+    }
+  else if (intval % 4 == 0 && cint_ok_for_move (intval / 4))
+    {
+      operands[2] = GEN_INT (intval / 4);
+      operands[3] = GEN_INT (4);
+    }
+  else if (intval % 8 == 0 && cint_ok_for_move (intval / 8))
+    {
+      operands[2] = GEN_INT (intval / 8);
+      operands[3] = GEN_INT (8);
+    }
+  else if (cint_ok_for_move (-intval))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[4], GEN_INT (-intval)));
+      emit_insn (gen_subsi3 (operands[0], operands[1], operands[4]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,r")
+		 (match_operand:SI 2 "arith_operand" "r,J")))]
+  ""
+  "@
+   {addl|add,l} %1,%2,%0
+   ldo %2(%1),%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
+   (set_attr "length" "4,4")])
+
+(define_insn "addvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rM,rM")
+		 (match_operand:SI 2 "arith11_operand" "r,I")))
+   (trap_if (ne (plus:DI (sign_extend:DI (match_dup 1))
+			 (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (plus:SI (match_dup 1)
+					 (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+  {addo|add,tsv} %2,%1,%0
+  {addio|addi,tsv} %2,%1,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "")
+		  (match_operand:DI 2 "reg_or_0_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r,!q")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I,!U")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM,!rM")))]
+  "TARGET_64BIT"
+  "@
+   sub %1,%2,%0
+   subi %1,%2,%0
+   mtsarcm %2"
+  [(set_attr "type" "binary,binary,move")
+  (set_attr "length" "4,4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 0)
+	return \"subi %1,%R2,%R0\;{subb|sub,b} %%r0,%2,%0\";
+      else
+	return \"ldi -1,%0\;subi %1,%R2,%R0\;{subb|sub,b} %0,%2,%0\";
+    }
+  else
+    return \"sub %R1,%R2,%R0\;{subb|sub,b} %1,%2,%0\";
+}"
+  [(set_attr "type" "binary")
+   (set (attr "length")
+	(if_then_else (eq_attr "alternative" "0")
+	  (const_int 8)
+	  (if_then_else (ge (symbol_ref "INTVAL (operands[1])")
+			    (const_int 0))
+	    (const_int 8)
+	    (const_int 12))))])
+
+(define_expand "subvdi3"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (minus:DI (match_operand:DI 1 "arith11_operand" "")
+			     (match_operand:DI 2 "reg_or_0_operand" "")))
+	      (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+				     (sign_extend:TI (match_dup 2)))
+			   (sign_extend:TI (minus:DI (match_dup 1)
+						     (match_dup 2))))
+		       (const_int 0))])]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))
+   (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+			  (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (minus:DI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  "TARGET_64BIT"
+  "@
+  {subo|sub,tsv} %1,%2,%0
+  {subio|subi,tsv} %1,%2,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
+	(minus:DI (match_operand:DI 1 "arith11_operand" "r,I")
+		  (match_operand:DI 2 "reg_or_0_operand" "rM,rM")))
+   (trap_if (ne (minus:TI (sign_extend:TI (match_dup 1))
+			  (sign_extend:TI (match_dup 2)))
+		(sign_extend:TI (minus:DI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  "!TARGET_64BIT"
+  "*
+{
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 0)
+	return \"subi %1,%R2,%R0\;{subbo|sub,b,tsv} %%r0,%2,%0\";
+      else
+	return \"ldi -1,%0\;subi %1,%R2,%R0\;{subbo|sub,b,tsv} %0,%2,%0\";
+    }
+  else
+    return \"sub %R1,%R2,%R0\;{subbo|sub,b,tsv} %1,%2,%0\";
+}"
+  [(set_attr "type" "binary,binary")
+   (set (attr "length")
+	(if_then_else (eq_attr "alternative" "0")
+	  (const_int 8)
+	  (if_then_else (ge (symbol_ref "INTVAL (operands[1])")
+			    (const_int 0))
+	    (const_int 8)
+	    (const_int 12))))])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "")
+		  (match_operand:SI 2 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "r,I")
+		  (match_operand:SI 2 "register_operand" "r,r")))]
+  "!TARGET_PA_20"
+  "@
+   sub %1,%2,%0
+   subi %1,%2,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,!q")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "r,I,!S")
+		  (match_operand:SI 2 "register_operand" "r,r,!r")))]
+  "TARGET_PA_20"
+  "@
+   sub %1,%2,%0
+   subi %1,%2,%0
+   mtsarcm %2"
+  [(set_attr "type" "binary,binary,move")
+   (set_attr "length" "4,4,4")])
+
+(define_insn "subvsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "arith11_operand" "rM,I")
+		  (match_operand:SI 2 "reg_or_0_operand" "rM,rM")))
+   (trap_if (ne (minus:DI (sign_extend:DI (match_dup 1))
+			  (sign_extend:DI (match_dup 2)))
+		(sign_extend:DI (minus:SI (match_dup 1)
+					  (match_dup 2))))
+	    (const_int 0))]
+  ""
+  "@
+  {subo|sub,tsv} %1,%2,%0
+  {subio|subi,tsv} %1,%2,%0"
+  [(set_attr "type" "binary,binary")
+   (set_attr "length" "4,4")])
+
+;; Clobbering a "register_operand" instead of a match_scratch
+;; in operand3 of millicode calls avoids spilling %r1 and
+;; produces better code.
+
+;; The mulsi3 insns set up registers for the millicode call.
+(define_expand "mulsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 4))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  operands[4] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+  if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
+    {
+      rtx scratch = gen_reg_rtx (DImode);
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_umulsidi3 (scratch, operands[1], operands[2]));
+      emit_insn (gen_movsi (operands[0],
+			    gen_rtx_SUBREG (SImode, scratch,
+					    GET_MODE_SIZE (SImode))));
+      DONE;
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=f")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f"))
+		 (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "f"))))]
+  "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT"
+  "xmpyu %1,%2,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=f")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f"))
+		 (match_operand:DI 2 "uint32_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && !TARGET_64BIT"
+  "xmpyu %1,%R2,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=f")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "f"))
+		 (match_operand:DI 2 "uint32_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT && TARGET_64BIT"
+  "xmpyu %1,%2R,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "* return output_mul_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "* return output_mul_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mult:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "register_operand" "")))]
+  "TARGET_64BIT && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT"
+  "
+{
+  rtx low_product = gen_reg_rtx (DImode);
+  rtx cross_product1 = gen_reg_rtx (DImode);
+  rtx cross_product2 = gen_reg_rtx (DImode);
+  rtx cross_scratch = gen_reg_rtx (DImode);
+  rtx cross_product = gen_reg_rtx (DImode);
+  rtx op1l, op1r, op2l, op2r;
+  rtx op1shifted, op2shifted;
+
+  op1shifted = gen_reg_rtx (DImode);
+  op2shifted = gen_reg_rtx (DImode);
+  op1l = gen_reg_rtx (SImode);
+  op1r = gen_reg_rtx (SImode);
+  op2l = gen_reg_rtx (SImode);
+  op2r = gen_reg_rtx (SImode);
+
+  emit_move_insn (op1shifted, gen_rtx_LSHIFTRT (DImode, operands[1],
+						GEN_INT (32)));
+  emit_move_insn (op2shifted, gen_rtx_LSHIFTRT (DImode, operands[2],
+						GEN_INT (32)));
+  op1r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[1], 4));
+  op2r = force_reg (SImode, gen_rtx_SUBREG (SImode, operands[2], 4));
+  op1l = force_reg (SImode, gen_rtx_SUBREG (SImode, op1shifted, 4));
+  op2l = force_reg (SImode, gen_rtx_SUBREG (SImode, op2shifted, 4));
+
+  /* Emit multiplies for the cross products.  */
+  emit_insn (gen_umulsidi3 (cross_product1, op2r, op1l));
+  emit_insn (gen_umulsidi3 (cross_product2, op2l, op1r));
+
+  /* Emit a multiply for the low sub-word.  */
+  emit_insn (gen_umulsidi3 (low_product, copy_rtx (op2r), copy_rtx (op1r)));
+
+  /* Sum the cross products and shift them into proper position.  */
+  emit_insn (gen_adddi3 (cross_scratch, cross_product1, cross_product2));
+  emit_insn (gen_ashldi3 (cross_product, cross_scratch, GEN_INT (32)));
+
+  /* Add the cross product to the low product and store the result
+     into the output operand .  */
+  emit_insn (gen_adddi3 (operands[0], cross_product, low_product));
+  DONE;
+}")
+
+;;; Division and mod.
+(define_expand "divsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (div:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (SImode);
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 0))
+    DONE;
+}")
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+   return output_div_insn (operands, 0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+   return output_div_insn (operands, 0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_expand "udivsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (udiv:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (SImode);
+
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 1))
+    DONE;
+}")
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+   return output_div_insn (operands, 1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+   return output_div_insn (operands, 1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_expand "modsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_insn ""
+  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+  return output_mod_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+  return output_mod_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_expand "umodsi3"
+  [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
+   (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
+   (parallel [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 25))
+	      (clobber (match_dup 5))])
+   (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
+  ""
+  "
+{
+  if (TARGET_64BIT)
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_insn ""
+  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+  return output_mod_insn (1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+(define_insn ""
+  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+  return output_mod_insn (1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_millicode_call (insn)")))])
+
+;;- and instructions
+;; We define DImode `and` so with DImode `not` we can get
+;; DImode `andn`.  Other combinations are possible.
+
+(define_expand "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(and:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "and_operand" "")))]
+  ""
+  "
+{
+  /* Both operands must be register operands.  */
+  if (!TARGET_64BIT && !register_operand (operands[2], DImode))
+    FAIL;
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "and %1,%2,%0\;and %R1,%R2,%R0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(and:DI (match_operand:DI 1 "register_operand" "%?r,0")
+		(match_operand:DI 2 "and_operand" "rO,P")))]
+  "TARGET_64BIT"
+  "* return output_64bit_and (operands); "
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+; The ? for op1 makes reload prefer zdepi instead of loading a huge
+; constant with ldil;ldo.
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%?r,0")
+		(match_operand:SI 2 "and_operand" "rO,P")))]
+  ""
+  "* return output_and (operands); "
+  [(set_attr "type" "binary,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "andcm %2,%1,%0\;andcm %R2,%R1,%R0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "andcm %2,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "r"))
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "andcm %2,%1,%0"
+  [(set_attr "type" "binary")
+  (set_attr "length" "4")])
+
+(define_expand "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ior:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "reg_or_cint_ior_operand" "")))]
+  ""
+  "
+{
+  /* Both operands must be register operands.  */
+  if (!TARGET_64BIT && !register_operand (operands[2], DImode))
+    FAIL;
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "or %1,%2,%0\;or %R1,%R2,%R0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "register_operand" "0,0")
+		(match_operand:DI 2 "cint_ior_operand" "M,i")))]
+  "TARGET_64BIT"
+  "* return output_64bit_ior (operands); "
+  [(set_attr "type" "binary,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "or %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+;; Need a define_expand because we've run out of CONST_OK... characters.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "reg_or_cint_ior_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "0,0")
+		(match_operand:SI 2 "cint_ior_operand" "M,i")))]
+  ""
+  "* return output_ior (operands); "
+  [(set_attr "type" "binary,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "or %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))]
+  ""
+  "
+{
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "xor %1,%2,%0\;xor %R1,%R2,%R0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(xor:DI (match_operand:DI 1 "register_operand" "%r")
+		(match_operand:DI 2 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "xor %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor %1,%2,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(neg:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "sub %%r0,%R1,%R0\;{subb|sub,b} %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "sub %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_expand "negvdi2"
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (neg:DI (match_operand:DI 1 "register_operand" "")))
+	      (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+				   (sign_extend:TI (neg:DI (match_dup 1))))
+		       (const_int 0))])]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+		(sign_extend:TI (neg:DI (match_dup 1))))
+	    (const_int 0))]
+  "!TARGET_64BIT"
+  "sub %%r0,%R1,%R0\;{subbo|sub,b,tsv} %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (trap_if (ne (neg:TI (sign_extend:TI (match_dup 1)))
+		(sign_extend:TI (neg:DI (match_dup 1))))
+	    (const_int 0))]
+  "TARGET_64BIT"
+  "sub,tsv %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "sub %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_insn "negvsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (neg:SI (match_operand:SI 1 "register_operand" "r")))
+   (trap_if (ne (neg:DI (sign_extend:DI (match_dup 1)))
+		(sign_extend:DI (neg:SI (match_dup 1))))
+	    (const_int 0))]
+   ""
+   "{subo|sub,tsv} %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(not:DI (match_operand:DI 1 "register_operand" "")))]
+  ""
+  "
+{
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  "!TARGET_64BIT"
+  "uaddcm %%r0,%1,%0\;uaddcm %%r0,%R1,%R0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "uaddcm %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "uaddcm %%r0,%1,%0"
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
+
+;; Floating point arithmetic instructions.
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fadd,dbl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fadd,sgl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "register_operand" "f")
+		  (match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsub,dbl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsub,sgl %1,%2,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
+   (set_attr "length" "4")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (match_operand:DF 1 "register_operand" "f")
+		 (match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fmpy,dbl %1,%2,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "pa_combine_type" "fmpy")
+   (set_attr "length" "4")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fmpy,sgl %1,%2,%0"
+  [(set_attr "type" "fpmulsgl")
+   (set_attr "pa_combine_type" "fmpy")
+   (set_attr "length" "4")])
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "register_operand" "f")
+		(match_operand:DF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fdiv,dbl %1,%2,%0"
+  [(set_attr "type" "fpdivdbl")
+   (set_attr "length" "4")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fdiv,sgl %1,%2,%0"
+  [(set_attr "type" "fpdivsgl")
+   (set_attr "length" "4")])
+
+;; Processors prior to PA 2.0 don't have a fneg instruction.  Fast
+;; negation can be done by subtracting from plus zero.  However, this
+;; violates the IEEE standard when negating plus and minus zero.
+;; The slow path toggles the sign bit in the general registers.
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "!TARGET_SOFT_FLOAT"
+{
+  if (TARGET_PA_20 || !flag_signed_zeros)
+    emit_insn (gen_negdf2_fast (operands[0], operands[1]));
+  else
+    emit_insn (gen_negdf2_slow (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negdf2_slow"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(neg:DF (match_operand:DF 1 "register_operand" "r")))]
+  "!TARGET_SOFT_FLOAT && !TARGET_PA_20"
+  "*
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    return \"and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0\";
+  else
+    return \"and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0\;copy %R1,%R0\";
+}"
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (ne (symbol_ref "rtx_equal_p (operands[0], operands[1])")
+			  (const_int 0))
+	    (const_int 12)
+	    (const_int 16)))])
+
+(define_insn "negdf2_fast"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (match_operand:DF 1 "register_operand" "f")))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  if (TARGET_PA_20)
+    return \"fneg,dbl %1,%0\";
+  else
+    return \"fsub,dbl %%fr0,%1,%0\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (match_operand:SF 1 "register_operand" "")))]
+  "!TARGET_SOFT_FLOAT"
+{
+  if (TARGET_PA_20 || !flag_signed_zeros)
+    emit_insn (gen_negsf2_fast (operands[0], operands[1]));
+  else
+    emit_insn (gen_negsf2_slow (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "negsf2_slow"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "r")))]
+  "!TARGET_SOFT_FLOAT && !TARGET_PA_20"
+  "and,< %1,%1,%0\;depi,tr 1,0,1,%0\;depi 0,0,1,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "negsf2_fast"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "!TARGET_SOFT_FLOAT"
+  "*
+{
+  if (TARGET_PA_20)
+    return \"fneg,sgl %1,%0\";
+  else
+    return \"fsub,sgl %%fr0,%1,%0\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(abs:DF (match_operand:DF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fabs,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fabs,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsqrt,dbl %1,%0"
+  [(set_attr "type" "fpsqrtdbl")
+   (set_attr "length" "4")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "! TARGET_SOFT_FLOAT"
+  "fsqrt,sgl %1,%0"
+  [(set_attr "type" "fpsqrtsgl")
+   (set_attr "length" "4")])
+
+;; PA 2.0 floating point instructions
+
+; fmpyfadd patterns
+(define_insn "fmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "register_operand" "f")
+		(match_operand:DF 2 "register_operand" "f")
+		(match_operand:DF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpyfadd,dbl %1,%2,%3,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpyfadd,sgl %1,%2,%3,%0"
+  [(set_attr "type" "fpmulsgl")
+   (set_attr "length" "4")])
+
+; fmpynfadd patterns
+(define_insn "fnmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		(match_operand:DF 2 "register_operand" "f")
+		(match_operand:DF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpynfadd,dbl %1,%2,%3,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "f")))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fmpynfadd,sgl %1,%2,%3,%0"
+  [(set_attr "type" "fpmulsgl")
+   (set_attr "length" "4")])
+
+; fnegabs patterns
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fnegabs,dbl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_PA_20 && ! TARGET_SOFT_FLOAT"
+  "fnegabs,sgl %1,%0"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" "f"))))
+   (set (match_operand:DF 2 "register_operand" "=&f") (abs:DF (match_dup 1)))]
+  "(! TARGET_SOFT_FLOAT && TARGET_PA_20
+    && ! reg_overlap_mentioned_p (operands[2], operands[1]))"
+  "#"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (abs:DF (match_operand:DF 1 "register_operand" ""))))
+   (set (match_operand:DF 2 "register_operand" "") (abs:DF (match_dup 1)))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  [(set (match_dup 2) (abs:DF (match_dup 1)))
+   (set (match_dup 0) (neg:DF (abs:DF (match_dup 1))))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" "f"))))
+   (set (match_operand:SF 2 "register_operand" "=&f") (abs:SF (match_dup 1)))]
+  "(! TARGET_SOFT_FLOAT && TARGET_PA_20
+    && ! reg_overlap_mentioned_p (operands[2], operands[1]))"
+  "#"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (abs:SF (match_operand:SF 1 "register_operand" ""))))
+   (set (match_operand:SF 2 "register_operand" "") (abs:SF (match_dup 1)))]
+  "! TARGET_SOFT_FLOAT && TARGET_PA_20"
+  [(set (match_dup 2) (abs:SF (match_dup 1)))
+   (set (match_dup 0) (neg:SF (abs:SF (match_dup 1))))]
+  "")
+
+;; Negating a multiply can be faked by adding zero in a fused multiply-add
+;; instruction if we can ignore the sign of zero.
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			 (match_operand:DF 2 "register_operand" "f"))))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros"
+  "fmpynfadd,dbl %1,%2,%%fr0,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			 (match_operand:SF 2 "register_operand" "f"))))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros"
+  "fmpynfadd,sgl %1,%2,%%fr0,%0"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "f")
+			 (match_operand:DF 2 "register_operand" "f"))))
+   (set (match_operand:DF 3 "register_operand" "=&f")
+	(mult:DF (match_dup 1) (match_dup 2)))]
+  "(!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros
+    && ! (reg_overlap_mentioned_p (operands[3], operands[1])
+          || reg_overlap_mentioned_p (operands[3], operands[2])))"
+  "#"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (mult:DF (match_operand:DF 1 "register_operand" "")
+			 (match_operand:DF 2 "register_operand" ""))))
+   (set (match_operand:DF 3 "register_operand" "")
+	(mult:DF (match_dup 1) (match_dup 2)))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros"
+  [(set (match_dup 3) (mult:DF (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (neg:DF (mult:DF (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "f")
+			 (match_operand:SF 2 "register_operand" "f"))))
+   (set (match_operand:SF 3 "register_operand" "=&f")
+	(mult:SF (match_dup 1) (match_dup 2)))]
+  "(!TARGET_SOFT_FLOAT && TARGET_PA_20 && !flag_signed_zeros
+    && ! (reg_overlap_mentioned_p (operands[3], operands[1])
+          || reg_overlap_mentioned_p (operands[3], operands[2])))"
+  "#"
+  [(set_attr "type" "fpmuldbl")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (mult:SF (match_operand:SF 1 "register_operand" "")
+			 (match_operand:SF 2 "register_operand" ""))))
+   (set (match_operand:SF 3 "register_operand" "")
+	(mult:SF (match_dup 1) (match_dup 2)))]
+  "!TARGET_SOFT_FLOAT && TARGET_PA_20&& !flag_signed_zeros"
+  [(set (match_dup 3) (mult:SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (neg:SF (mult:SF (match_dup 1) (match_dup 2))))]
+  "")
+
+;;- Shift instructions
+
+;; Optimized special case of shifting.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
+		     (const_int 24)))]
+  ""
+  "ldb%M1 %1,%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
+		     (const_int 16)))]
+  ""
+  "ldh%M1 %1,%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 2 "register_operand" "r")
+			  (match_operand:SI 3 "shadd_operand" ""))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0} "
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (match_operand:DI 2 "register_operand" "r")
+			  (match_operand:DI 3 "shadd_operand" ""))
+		 (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "shladd,l %2,%O3,%1,%0"
+  [(set_attr "type" "binary")
+   (set_attr "length" "4")])
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "lhs_lshift_operand" "")
+		   (match_operand:SI 2 "arith32_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (temp, GEN_INT (31), operands[2]));
+      if (GET_CODE (operands[1]) == CONST_INT)
+	emit_insn (gen_zvdep_imm32 (operands[0], operands[1], temp));
+      else
+	emit_insn (gen_zvdep32 (operands[0], operands[1], temp));
+      DONE;
+    }
+  /* Make sure both inputs are not constants,
+     there are no patterns for that.  */
+  operands[1] = force_reg (SImode, operands[1]);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "const_int_operand" "n")))]
+  ""
+  "{zdep|depw,z} %1,%P2,%L2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+; Match cases of op1 a CONST_INT here that zvdep_imm32 doesn't handle.
+; Doing it like this makes slightly better code since reload can
+; replace a register with a known value in range -16..15 with a
+; constant.  Ideally, we would like to merge zvdep32 and zvdep_imm32,
+; but since we have no more CONST_OK... characters, that is not
+; possible.
+(define_insn "zvdep32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "arith5_operand" "r,L")
+		   (minus:SI (const_int 31)
+			     (match_operand:SI 2 "register_operand" "q,q"))))]
+  ""
+  "@
+   {zvdep %1,32,%0|depw,z %1,%%sar,32,%0}
+   {zvdepi %1,32,%0|depwi,z %1,%%sar,32,%0}"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn "zvdep_imm32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "lhs_lshift_cint_operand" "")
+		   (minus:SI (const_int 31)
+			     (match_operand:SI 2 "register_operand" "q"))))]
+  ""
+  "*
+{
+  unsigned HOST_WIDE_INT x = UINTVAL (operands[1]);
+  operands[2] = GEN_INT (4 + exact_log2 ((x >> 4) + 1));
+  operands[1] = GEN_INT ((x & 0xf) - 0x10);
+  return \"{zvdepi %1,%2,%0|depwi,z %1,%%sar,%2,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vdepi_ior"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "")
+			   (minus:SI (const_int 31)
+				     (match_operand:SI 2 "register_operand" "q")))
+		(match_operand:SI 3 "register_operand" "0")))]
+  ; accept ...0001...1, can this be generalized?
+  "exact_log2 (INTVAL (operands[1]) + 1) > 0"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 (x + 1));
+  return \"{vdepi -1,%2,%0|depwi -1,%%sar,%2,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vdepi_and"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (rotate:SI (match_operand:SI 1 "const_int_operand" "")
+			   (minus:SI (const_int 31)
+				     (match_operand:SI 2 "register_operand" "q")))
+		(match_operand:SI 3 "register_operand" "0")))]
+  ; this can be generalized...!
+  "INTVAL (operands[1]) == -2"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 ((~x) + 1));
+  return \"{vdepi 0,%2,%0|depwi 0,%%sar,%2,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashift:DI (match_operand:DI 1 "lhs_lshift_operand" "")
+		   (match_operand:DI 2 "arith32_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (DImode);
+      emit_insn (gen_subdi3 (temp, GEN_INT (63), operands[2]));
+      if (GET_CODE (operands[1]) == CONST_INT)
+	emit_insn (gen_zvdep_imm64 (operands[0], operands[1], temp));
+      else
+	emit_insn (gen_zvdep64 (operands[0], operands[1], temp));
+      DONE;
+    }
+  /* Make sure both inputs are not constants,
+     there are no patterns for that.  */
+  operands[1] = force_reg (DImode, operands[1]);
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_64BIT"
+  "depd,z %1,%p2,%Q2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+; Match cases of op1 a CONST_INT here that zvdep_imm64 doesn't handle.
+; Doing it like this makes slightly better code since reload can
+; replace a register with a known value in range -16..15 with a
+; constant.  Ideally, we would like to merge zvdep64 and zvdep_imm64,
+; but since we have no more CONST_OK... characters, that is not
+; possible.
+(define_insn "zvdep64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "arith5_operand" "r,L")
+		   (minus:DI (const_int 63)
+			     (match_operand:DI 2 "register_operand" "q,q"))))]
+  "TARGET_64BIT"
+  "@
+   depd,z %1,%%sar,64,%0
+   depdi,z %1,%%sar,64,%0"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+(define_insn "zvdep_imm64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "lhs_lshift_cint_operand" "")
+		   (minus:DI (const_int 63)
+			     (match_operand:DI 2 "register_operand" "q"))))]
+  "TARGET_64BIT"
+  "*
+{
+  unsigned HOST_WIDE_INT x = UINTVAL (operands[1]);
+  operands[2] = GEN_INT (4 + exact_log2 ((x >> 4) + 1));
+  operands[1] = GEN_INT ((x & 0x1f) - 0x20);
+  return \"depdi,z %1,%%sar,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "const_int_operand" "")
+			   (minus:DI (const_int 63)
+				     (match_operand:DI 2 "register_operand" "q")))
+		(match_operand:DI 3 "register_operand" "0")))]
+  ; accept ...0001...1, can this be generalized?
+  "TARGET_64BIT && exact_log2 (INTVAL (operands[1]) + 1) > 0"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 (x + 1));
+  return \"depdi -1,%%sar,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (rotate:DI (match_operand:DI 1 "const_int_operand" "")
+			   (minus:DI (const_int 63)
+				     (match_operand:DI 2 "register_operand" "q")))
+		(match_operand:DI 3 "register_operand" "0")))]
+  ; this can be generalized...!
+  "TARGET_64BIT && INTVAL (operands[1]) == -2"
+  "*
+{
+  HOST_WIDE_INT x = INTVAL (operands[1]);
+  operands[2] = GEN_INT (exact_log2 ((~x) + 1));
+  return \"depdi 0,%%sar,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+		     (match_operand:SI 2 "arith32_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (temp, GEN_INT (31), operands[2]));
+      emit_insn (gen_vextrs32 (operands[0], operands[1], temp));
+      DONE;
+    }
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "const_int_operand" "n")))]
+  ""
+  "{extrs|extrw,s} %1,%P2,%L2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vextrs32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (minus:SI (const_int 31)
+			       (match_operand:SI 2 "register_operand" "q"))))]
+  ""
+  "{vextrs %1,32,%0|extrw,s %1,%%sar,32,%0}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+		     (match_operand:DI 2 "arith32_operand" "")))]
+  "TARGET_64BIT"
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (DImode);
+      emit_insn (gen_subdi3 (temp, GEN_INT (63), operands[2]));
+      emit_insn (gen_vextrs64 (operands[0], operands[1], temp));
+      DONE;
+    }
+}")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_64BIT"
+  "extrd,s %1,%p2,%Q2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "vextrs64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (minus:DI (const_int 63)
+			       (match_operand:DI 2 "register_operand" "q"))))]
+  "TARGET_64BIT"
+  "extrd,s %1,%%sar,64,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith32_operand" "q,n")))]
+  ""
+  "@
+   {vshd %%r0,%1,%0|shrpw %%r0,%1,%%sar,%0}
+   {extru|extrw,u} %1,%P2,%L2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r")
+		     (match_operand:DI 2 "arith32_operand" "q,n")))]
+  "TARGET_64BIT"
+  "@
+   shrpd %%r0,%1,%%sar,%0
+   extrd,u %1,%p2,%Q2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith32_operand" "q,n")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 31);
+      return \"{shd|shrpw} %1,%1,%2,%0\";
+    }
+  else
+    return \"{vshd %1,%1,%0|shrpw %1,%1,%%sar,%0}\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (rotate:SI (match_operand:SI 1 "register_operand" "")
+                   (match_operand:SI 2 "arith32_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx temp = gen_reg_rtx (SImode);
+      emit_insn (gen_subsi3 (temp, GEN_INT (32), operands[2]));
+      emit_insn (gen_rotrsi3 (operands[0], operands[1], temp));
+      DONE;
+    }
+  /* Else expand normally.  */
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (rotate:SI (match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "const_int_operand" "n")))]
+  ""
+  "*
+{
+  operands[2] = GEN_INT ((32 - INTVAL (operands[2])) & 31);
+  return \"{shd|shrpw} %1,%1,%2,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 5 "plus_xor_ior_operator"
+	  [(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:SI 3 "const_int_operand" "n"))
+	   (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand:SI 4 "const_int_operand" "n"))]))]
+  "INTVAL (operands[3]) + INTVAL (operands[4]) == 32"
+  "{shd|shrpw} %1,%2,%4,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 5 "plus_xor_ior_operator"
+	  [(lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand:SI 4 "const_int_operand" "n"))
+	   (ashift:SI (match_operand:SI 1 "register_operand" "r")
+		      (match_operand:SI 3 "const_int_operand" "n"))]))]
+  "INTVAL (operands[3]) + INTVAL (operands[4]) == 32"
+  "{shd|shrpw} %1,%2,%4,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "exact_log2 (1 + (INTVAL (operands[3]) >> (INTVAL (operands[2]) & 31))) > 0"
+  "*
+{
+  int cnt = INTVAL (operands[2]) & 31;
+  operands[3] = GEN_INT (exact_log2 (1 + (INTVAL (operands[3]) >> cnt)));
+  operands[2] = GEN_INT (31 - cnt);
+  return \"{zdep|depw,z} %1,%2,%3,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;; Unconditional and other jump instructions.
+
+;; Trivial return used when no epilogue is needed.
+(define_insn "return"
+  [(return)
+   (use (reg:SI 2))]
+  "pa_can_use_return_insn ()"
+  "*
+{
+  if (TARGET_PA_20)
+    return \"bve%* (%%r2)\";
+  return \"bv%* %%r0(%%r2)\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; This is used for most returns.
+(define_insn "return_internal"
+  [(return)
+   (use (reg:SI 2))]
+  ""
+  "*
+{
+  if (TARGET_PA_20)
+    return \"bve%* (%%r2)\";
+  return \"bv%* %%r0(%%r2)\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; This is used for eh returns which bypass the return stub.
+(define_insn "return_external_pic"
+  [(return)
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))]
+  "!TARGET_NO_SPACE_REGS
+   && !TARGET_PA_20
+   && flag_pic && crtl->calls_eh_return"
+  "ldsid (%%sr0,%%r2),%%r1\;mtsp %%r1,%%sr0\;be%* 0(%%sr0,%%r2)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "12")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "hppa_expand_prologue ();DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "
+{
+  hppa_expand_epilogue ();
+  DONE;
+}")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  rtx x;
+
+  /* Try to use the trivial return first.  Else use the full epilogue.  */
+  if (pa_can_use_return_insn ())
+    x = gen_return ();
+  else
+    {
+      hppa_expand_epilogue ();
+
+      /* EH returns bypass the normal return stub.  Thus, we must do an
+	 interspace branch to return from functions that call eh_return.
+	 This is only a problem for returns from shared code on ports
+	 using space registers.  */
+      if (!TARGET_NO_SPACE_REGS
+	  && !TARGET_PA_20
+	  && flag_pic && crtl->calls_eh_return)
+	x = gen_return_external_pic ();
+      else
+	x = gen_return_internal ();
+    }
+  emit_jump_insn (x);
+  DONE;
+}")
+
+; Used by hppa_profile_hook to load the starting address of the current
+; function; operand 1 contains the address of the label in operand 3
+(define_insn "load_offset_label_address"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (minus:SI (match_operand:SI 2 "" "")
+			   (label_ref:SI (match_operand 3 "" "")))))]
+  ""
+  "ldo %2-%l3(%1),%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+; Output a code label and load its address.
+(define_insn "lcla1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (label_ref:SI (match_operand 1 "" "")))
+   (const_int 0)]
+  "!TARGET_PA_20"
+  "*
+{
+  output_asm_insn (\"bl .+8,%0\;depi 0,31,2,%0\", operands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+                                     CODE_LABEL_NUMBER (operands[1]));
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "lcla2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (label_ref:SI (match_operand 1 "" "")))
+   (const_int 0)]
+  "TARGET_PA_20"
+  "*
+{
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+                                     CODE_LABEL_NUMBER (operands[1]));
+  return \"mfia %0\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 2)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  /* An unconditional branch which can reach its target.  */
+  if (get_attr_length (insn) < 16)
+    return \"b%* %l0\";
+
+  return output_lbranch (operands[0], insn, 1);
+}"
+  [(set_attr "type" "uncond_branch")
+   (set_attr "pa_combine_type" "uncond_branch")
+   (set (attr "length")
+    (cond [(eq (symbol_ref "jump_in_call_delay (insn)") (const_int 1))
+	   (if_then_else (lt (abs (minus (match_dup 0)
+					 (plus (pc) (const_int 8))))
+			     (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (const_int 8))
+	   (lt (abs (minus (match_dup 0) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 4)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 20)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 16)]
+	  (const_int 24)))])
+
+;;; Hope this is only within a function...
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "r"))]
+  "GET_MODE (operands[0]) == word_mode"
+  "bv%* %%r0(%0)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;;; An indirect jump can be optimized to a direct jump.  GAS for the
+;;; SOM target doesn't allow branching to a label inside a function.
+;;; We also don't correctly compute branch distances for labels
+;;; outside the current function.  Thus, we use an indirect jump can't
+;;; be optimized to a direct jump for all targets.  We assume that
+;;; the branch target is in the same space (i.e., nested function
+;;; jumping to a label in an outer function in the same translation
+;;; unit).
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  rtx lab = operands[1];
+  rtx stack = operands[2];
+  rtx fp = operands[3];
+
+  lab = copy_to_reg (lab);
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  /* Restore the frame pointer.  The virtual_stack_vars_rtx is saved
+     instead of the hard_frame_pointer_rtx in the save area.  As a
+     result, an extra instruction is needed to adjust for the offset
+     of the virtual stack variables and the hard frame pointer.  */
+  if (GET_CODE (fp) != REG)
+    fp = force_reg (Pmode, fp);
+  emit_move_insn (hard_frame_pointer_rtx, plus_constant (fp, -8));
+
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* Nonlocal goto jumps are only used between functions in the same
+     translation unit.  Thus, we can avoid the extra overhead of an
+     interspace jump.  */
+  emit_jump_insn (gen_indirect_goto (lab));
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn "indirect_goto"
+  [(unspec [(match_operand 0 "register_operand" "=r")] UNSPEC_GOTO)]
+  "GET_MODE (operands[0]) == word_mode"
+  "bv%* %%r0(%0)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;;; This jump is used in branch tables where the insn length is fixed.
+;;; The length of this insn is adjusted if the delay slot is not filled.
+(define_insn "short_jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))
+   (const_int 0)]
+  ""
+  "b%* %l0%#"
+  [(set_attr "type" "btable_branch")
+   (set_attr "length" "4")])
+
+;; Subroutines of "casesi".
+;; operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "const_int_operand" "")
+   (match_operand:SI 2 "const_int_operand" "")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+  "
+{
+  if (GET_CODE (operands[0]) != REG)
+    operands[0] = force_reg (SImode, operands[0]);
+
+  if (operands[1] != const0_rtx)
+    {
+      rtx index = gen_reg_rtx (SImode);
+
+      operands[1] = gen_int_mode (-INTVAL (operands[1]), SImode);
+      if (!INT_14_BITS (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_addsi3 (index, operands[0], operands[1]));
+      operands[0] = index;
+    }
+
+  if (!INT_5_BITS (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+
+  /* This branch prevents us finding an insn for the delay slot of the
+     following vectored branch.  It might be possible to use the delay
+     slot if an index value of -1 was used to transfer to the out-of-range
+     label.  In order to do this, we would have to output the -1 vector
+     element after the delay insn.  The casesi output code would have to
+     check if the casesi insn is in a delay branch sequence and output
+     the delay insn if one is found.  If this was done, then it might
+     then be worthwhile to split the casesi patterns to improve scheduling.
+     However, it's not clear that all this extra complexity is worth
+     the effort.  */
+  {
+    rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+    emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
+  }
+
+  /* In 64bit mode we must make sure to wipe the upper bits of the register
+     just in case the addition overflowed or we had random bits in the
+     high part of the register.  */
+  if (TARGET_64BIT)
+    {
+      rtx index = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendsidi2 (index, operands[0]));
+      operands[0] = index;
+    }
+
+  if (TARGET_BIG_SWITCH)
+    {
+      if (TARGET_64BIT)
+	emit_jump_insn (gen_casesi64p (operands[0], operands[3]));
+      else if (flag_pic)
+	emit_jump_insn (gen_casesi32p (operands[0], operands[3]));
+      else
+	emit_jump_insn (gen_casesi32 (operands[0], operands[3]));
+    }
+  else
+    emit_jump_insn (gen_casesi0 (operands[0], operands[3]));
+  DONE;
+}")
+
+;;; The rtl for this pattern doesn't accurately describe what the insn
+;;; actually does, particularly when case-vector elements are exploded
+;;; in pa_reorg.  However, the initial SET in these patterns must show
+;;; the connection of the insn to the following jump table.
+(define_insn "casesi0"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))]
+  ""
+  "blr,n %0,%%r0\;nop"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;;; 32-bit code, absolute branch table.
+(define_insn "casesi32"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "!flag_pic"
+  "ldil L'%l1,%2\;ldo R'%l1(%2),%2\;{ldwx|ldw},s %0(%2),%2\;bv,n %%r0(%2)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+;;; 32-bit code, relative branch table.
+(define_insn "casesi32p"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:SI 2 "=&r"))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "flag_pic"
+  "{bl .+8,%2\;depi 0,31,2,%2|mfia %2}\;ldo {%l1-.|%l1+4-.}(%2),%2\;\
+{ldwx|ldw},s %0(%2),%3\;{addl|add,l} %2,%3,%3\;bv,n %%r0(%3)"
+  [(set_attr "type" "multi")
+   (set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_PA_20") (const_int 0))
+	(const_int 20)
+	(const_int 24)))])
+
+;;; 64-bit code, 32-bit relative branch table.
+(define_insn "casesi64p"
+  [(set (pc) (mem:DI (plus:DI
+		       (mult:DI (match_operand:DI 0 "register_operand" "r")
+				(const_int 8))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_scratch:DI 2 "=&r"))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  ""
+  "mfia %2\;ldo %l1+4-.(%2),%2\;ldw,s %0(%2),%3\;extrd,s %3,63,32,%3\;\
+add,l %2,%3,%3\;bv,n %%r0(%3)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+
+;; Call patterns.
+;;- jump to subroutine
+
+(define_expand "call"
+  [(parallel [(call (match_operand:SI 0 "" "")
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 2))])]
+  ""
+  "
+{
+  rtx op;
+  rtx nb = operands[1];
+
+  if (TARGET_PORTABLE_RUNTIME)
+    op = force_reg (SImode, XEXP (operands[0], 0));
+  else
+    op = XEXP (operands[0], 0);
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Use two different patterns for calls to explicitly named functions
+     and calls through function pointers.  This is necessary as these two
+     types of calls use different calling conventions, and CSE might try
+     to change the named call into an indirect call in some cases (using
+     two patterns keeps CSE from performing this optimization).
+     
+     We now use even more call patterns as there was a subtle bug in
+     attempting to restore the pic register after a call using a simple
+     move insn.  During reload, a instruction involving a pseudo register
+     with no explicit dependence on the PIC register can be converted
+     to an equivalent load from memory using the PIC register.  If we
+     emit a simple move to restore the PIC register in the initial rtl
+     generation, then it can potentially be repositioned during scheduling.
+     and an instruction that eventually uses the PIC register may end up
+     between the call and the PIC register restore.
+     
+     This only worked because there is a post call group of instructions
+     that are scheduled with the call.  These instructions are included
+     in the same basic block as the call.  However, calls can throw in
+     C++ code and a basic block has to terminate at the call if the call
+     can throw.  This results in the PIC register restore being scheduled
+     independently from the call.  So, we now hide the save and restore
+     of the PIC register in the call pattern until after reload.  Then,
+     we split the moves out.  A small side benefit is that we now don't
+     need to have a use of the PIC register in the return pattern and
+     the final save/restore operation is not needed.
+     
+     I elected to just use register %r4 in the PIC patterns instead
+     of trying to force hppa_pic_save_rtx () to a callee saved register.
+     This might have required a new register class and constraint.  It
+     was also simpler to just handle the restore from a register than a
+     generic pseudo.  */
+  if (TARGET_64BIT)
+    {
+      rtx r4 = gen_rtx_REG (word_mode, 4);
+      if (GET_CODE (op) == SYMBOL_REF)
+	emit_call_insn (gen_call_symref_64bit (op, nb, r4));
+      else
+	{
+	  op = force_reg (word_mode, op);
+	  emit_call_insn (gen_call_reg_64bit (op, nb, r4));
+	}
+    }
+  else
+    {
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_symref_pic (op, nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_symref (op, nb));
+	}
+      else
+	{
+	  rtx tmpreg = gen_rtx_REG (word_mode, 22);
+	  emit_move_insn (tmpreg, force_reg (word_mode, op));
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_reg_pic (nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_reg (nb));
+	}
+    }
+
+  DONE;
+}")
+
+;; We use function calls to set the attribute length of calls and millicode
+;; calls.  This is necessary because of the large variety of call sequences.
+;; Implementing the calculation in rtl is difficult as well as ugly.  As
+;; we need the same calculation in several places, maintenance becomes a
+;; nightmare.
+;;
+;; However, this has a subtle impact on branch shortening.  When the
+;; expression used to set the length attribute of an instruction depends
+;; on a relative address (e.g., pc or a branch address), genattrtab
+;; notes that the insn's length is variable, and attempts to determine a
+;; worst-case default length and code to compute an insn's current length.
+
+;; The use of a function call hides the variable dependence of our calls
+;; and millicode calls.  The result is genattrtab doesn't treat the operation
+;; as variable and it only generates code for the default case using our
+;; function call.  Because of this, calls and millicode calls have a fixed
+;; length in the branch shortening pass, and some branches will use a longer
+;; code sequence than necessary.  However, the length of any given call
+;; will still reflect its final code location and it may be shorter than
+;; the initial length estimate.
+
+;; It's possible to trick genattrtab by adding an expression involving `pc'
+;; in the set.  However, when genattrtab hits a function call in its attempt
+;; to compute the default length, it marks the result as unknown and sets
+;; the default result to MAX_INT ;-(  One possible fix that would allow
+;; calls to participate in branch shortening would be to make the call to
+;; insn_default_length a target option.  Then, we could massage unknown
+;; results.  Another fix might be to change genattrtab so that it just does
+;; the call in the variable case as it already does for the fixed case.
+
+(define_insn "call_symref"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 0)")))])
+
+(define_insn "call_symref_pic"
+  [(set (match_operand:SI 2 "register_operand" "=&r") (reg:SI 19))
+   (call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (match_dup 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19))
+	      (call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19))
+	      (call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])
+   (set (reg:SI 19) (match_dup 2))]
+  "")
+
+(define_insn "*call_symref_pic_post_reload"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 0)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_symref_64bit"
+  [(set (match_operand:DI 2 "register_operand" "=&r") (reg:DI 27))
+   (call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (match_dup 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27))
+	      (call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27))
+	      (call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])
+   (set (reg:DI 27) (match_dup 2))]
+  "")
+
+(define_insn "*call_symref_64bit_post_reload"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 0)")))])
+
+(define_insn "call_reg"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_reg_pic"
+  [(set (match_operand:SI 1 "register_operand" "=&r") (reg:SI 19))
+   (call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (match_dup 1))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:SI 1 "register_operand" "") (reg:SI 19))
+	      (call (mem:SI (reg:SI 22))
+		    (match_operand 0 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 1))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 1) (reg:SI 19))
+   (parallel [(call (mem:SI (reg:SI 22))
+		    (match_dup 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:SI 1 "register_operand" "") (reg:SI 19))
+	      (call (mem:SI (reg:SI 22))
+		    (match_operand 0 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 1))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 1) (reg:SI 19))
+   (parallel [(call (mem:SI (reg:SI 22))
+		    (match_dup 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (match_dup 1))]
+  "")
+
+(define_insn "*call_reg_pic_post_reload"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_reg_64bit"
+  [(set (match_operand:DI 2 "register_operand" "=&r") (reg:DI 27))
+   (call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (match_dup 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27))
+	      (call (mem:SI (match_operand 0 "register_operand" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:DI 2 "register_operand" "") (reg:DI 27))
+	      (call (mem:SI (match_operand 0 "register_operand" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (match_dup 2))]
+  "")
+
+(define_insn "*call_reg_64bit_post_reload"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, operands[0]);
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)]
+	      (symbol_ref "attr_length_indirect_call (insn)")))])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI 2))])]
+  ""
+  "
+{
+  rtx op;
+  rtx dst = operands[0];
+  rtx nb = operands[2];
+
+  if (TARGET_PORTABLE_RUNTIME)
+    op = force_reg (SImode, XEXP (operands[1], 0));
+  else
+    op = XEXP (operands[1], 0);
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Use two different patterns for calls to explicitly named functions
+     and calls through function pointers.  This is necessary as these two
+     types of calls use different calling conventions, and CSE might try
+     to change the named call into an indirect call in some cases (using
+     two patterns keeps CSE from performing this optimization).
+
+     We now use even more call patterns as there was a subtle bug in
+     attempting to restore the pic register after a call using a simple
+     move insn.  During reload, a instruction involving a pseudo register
+     with no explicit dependence on the PIC register can be converted
+     to an equivalent load from memory using the PIC register.  If we
+     emit a simple move to restore the PIC register in the initial rtl
+     generation, then it can potentially be repositioned during scheduling.
+     and an instruction that eventually uses the PIC register may end up
+     between the call and the PIC register restore.
+     
+     This only worked because there is a post call group of instructions
+     that are scheduled with the call.  These instructions are included
+     in the same basic block as the call.  However, calls can throw in
+     C++ code and a basic block has to terminate at the call if the call
+     can throw.  This results in the PIC register restore being scheduled
+     independently from the call.  So, we now hide the save and restore
+     of the PIC register in the call pattern until after reload.  Then,
+     we split the moves out.  A small side benefit is that we now don't
+     need to have a use of the PIC register in the return pattern and
+     the final save/restore operation is not needed.
+     
+     I elected to just use register %r4 in the PIC patterns instead
+     of trying to force hppa_pic_save_rtx () to a callee saved register.
+     This might have required a new register class and constraint.  It
+     was also simpler to just handle the restore from a register than a
+     generic pseudo.  */
+  if (TARGET_64BIT)
+    {
+      rtx r4 = gen_rtx_REG (word_mode, 4);
+      if (GET_CODE (op) == SYMBOL_REF)
+	  emit_call_insn (gen_call_val_symref_64bit (dst, op, nb, r4));
+      else
+	{
+	  op = force_reg (word_mode, op);
+	  emit_call_insn (gen_call_val_reg_64bit (dst, op, nb, r4));
+	}
+    }
+  else
+    {
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_val_symref_pic (dst, op, nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_val_symref (dst, op, nb));
+	}
+      else
+	{
+	  rtx tmpreg = gen_rtx_REG (word_mode, 22);
+	  emit_move_insn (tmpreg, force_reg (word_mode, op));
+	  if (flag_pic)
+	    {
+	      rtx r4 = gen_rtx_REG (word_mode, 4);
+	      emit_call_insn (gen_call_val_reg_pic (dst, nb, r4));
+	    }
+	  else
+	    emit_call_insn (gen_call_val_reg (dst, nb));
+	}
+    }
+
+  DONE;
+}")
+
+(define_insn "call_val_symref"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 0)")))])
+
+(define_insn "call_val_symref_pic"
+  [(set (match_operand:SI 3 "register_operand" "=&r") (reg:SI 19))
+   (set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (match_dup 3))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:SI 3 "register_operand" "") (reg:SI 19))
+	      (set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 3))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:SI 3 "register_operand" "") (reg:SI 19))
+	      (set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 3))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])
+   (set (reg:SI 19) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_symref_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 0)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_symref_64bit"
+  [(set (match_operand:DI 3 "register_operand" "=&r") (reg:DI 27))
+   (set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (match_dup 3))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27))
+	      (set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27))
+	      (set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])
+   (set (reg:DI 27) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_symref_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 0)")))])
+
+(define_insn "call_val_reg"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_reg_pic"
+  [(set (match_operand:SI 2 "register_operand" "=&r") (reg:SI 19))
+   (set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (match_dup 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19))
+	      (set (match_operand 0 "" "")
+		   (call (mem:SI (reg:SI 22))
+			 (match_operand 1 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (reg:SI 22))
+			 (match_dup 1)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:SI 2 "register_operand" "") (reg:SI 19))
+	      (set (match_operand 0 "" "")
+		   (call (mem:SI (reg:SI 22))
+			 (match_operand 1 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (match_dup 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed"
+  [(set (match_dup 2) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (reg:SI 22))
+			 (match_dup 1)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (match_dup 2))]
+  "")
+
+(define_insn "*call_val_reg_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_indirect_call (insn)")))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_reg_64bit"
+  [(set (match_operand:DI 3 "register_operand" "=&r") (reg:DI 27))
+   (set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "r"))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (match_dup 3))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "#")
+
+;; Split out the PIC register save and restore after reload.  As the
+;; split is done after reload, there are some situations in which we
+;; unnecessarily save and restore %r4.  This happens when there is a
+;; single call and the PIC register is not used after the call.
+;;
+;; The split has to be done since call_from_call_insn () can't handle
+;; the pattern as is.  Noreturn calls are special because they have to
+;; terminate the basic block.  The split has to contain more than one
+;; insn.
+(define_split
+  [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27))
+	      (set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand:DI 1 "register_operand" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
+
+(define_split
+  [(parallel [(set (match_operand:DI 3 "register_operand" "") (reg:DI 27))
+	      (set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand:DI 1 "register_operand" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (match_dup 3))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (match_dup 3))]
+  "")
+
+(define_insn "*call_val_reg_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "r"))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, operands[1]);
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 12)]
+	      (symbol_ref "attr_length_indirect_call (insn)")))])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+(define_expand "sibcall"
+  [(call (match_operand:SI 0 "" "")
+	 (match_operand 1 "" ""))]
+  "!TARGET_PORTABLE_RUNTIME"
+  "
+{
+  rtx op, call_insn;
+  rtx nb = operands[1];
+
+  op = XEXP (operands[0], 0);
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Indirect sibling calls are not allowed.  */
+  if (TARGET_64BIT)
+    call_insn = gen_sibcall_internal_symref_64bit (op, operands[1]);
+  else
+    call_insn = gen_sibcall_internal_symref (op, operands[1]);
+
+  call_insn = emit_call_insn (call_insn);
+
+  if (TARGET_64BIT)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
+
+  /* We don't have to restore the PIC register.  */
+  if (flag_pic)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+
+  DONE;
+}")
+
+(define_insn "sibcall_internal_symref"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 1)")))])
+
+(define_insn "sibcall_internal_symref_64bit"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (use (reg:DI 2))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 1)")))])
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+		   (call (match_operand:SI 1 "" "")
+			 (match_operand 2 "" "")))]
+  "!TARGET_PORTABLE_RUNTIME"
+  "
+{
+  rtx op, call_insn;
+  rtx nb = operands[1];
+
+  op = XEXP (operands[1], 0);
+
+  if (TARGET_64BIT)
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  gcc_assert (INTVAL (nb) <= crtl->outgoing_args_size);
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
+
+  /* Indirect sibling calls are not allowed.  */
+  if (TARGET_64BIT)
+    call_insn
+      = gen_sibcall_value_internal_symref_64bit (operands[0], op, operands[2]);
+  else
+    call_insn
+      = gen_sibcall_value_internal_symref (operands[0], op, operands[2]);
+
+  call_insn = emit_call_insn (call_insn);
+
+  if (TARGET_64BIT)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
+
+  /* We don't have to restore the PIC register.  */
+  if (flag_pic)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+
+  DONE;
+}")
+
+(define_insn "sibcall_value_internal_symref"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 1)")))])
+
+(define_insn "sibcall_value_internal_symref_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (use (reg:DI 2))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 1);
+}"
+  [(set_attr "type" "sibcall")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 8)]
+	      (symbol_ref "attr_length_call (insn, 1)")))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "move")
+   (set_attr "length" "4")])
+
+;; These are just placeholders so we know where branch tables
+;; begin and end.
+(define_insn "begin_brtab"
+  [(const_int 1)]
+  ""
+  "*
+{
+  /* Only GAS actually supports this pseudo-op.  */
+  if (TARGET_GAS)
+    return \".begin_brtab\";
+  else
+    return \"\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "0")])
+
+(define_insn "end_brtab"
+  [(const_int 2)]
+  ""
+  "*
+{
+  /* Only GAS actually supports this pseudo-op.  */
+  if (TARGET_GAS)
+    return \".end_brtab\";
+  else
+    return \"\";
+}"
+  [(set_attr "type" "move")
+   (set_attr "length" "0")])
+
+;;; EH does longjmp's from and within the data section.  Thus,
+;;; an interspace branch is required for the longjmp implementation.
+;;; Registers r1 and r2 are used as scratch registers for the jump
+;;; when necessary.
+(define_expand "interspace_jump"
+  [(parallel
+     [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+      (clobber (match_dup 1))])]
+  ""
+  "
+{
+  operands[1] = gen_rtx_REG (word_mode, 2);
+}")
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "TARGET_PA_20 && !TARGET_64BIT"
+  "bve%* (%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "TARGET_NO_SPACE_REGS && !TARGET_64BIT"
+  "be%* 0(%%sr4,%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "!TARGET_64BIT"
+  "ldsid (%%sr0,%0),%%r2\;mtsp %%r2,%%sr0\;be%* 0(%%sr0,%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:DI 2))]
+  "TARGET_64BIT"
+  "bve%* (%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_expand "builtin_longjmp"
+  [(unspec_volatile [(match_operand 0 "register_operand" "r")] UNSPECV_LONGJMP)]
+  ""
+  "
+{
+  /* The elements of the buffer are, in order:  */
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (operands[0],
+			 POINTER_SIZE / BITS_PER_UNIT));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (operands[0],
+			   (POINTER_SIZE * 2) / BITS_PER_UNIT));
+  rtx pv = gen_rtx_REG (Pmode, 1);
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  /* Restore the frame pointer.  The virtual_stack_vars_rtx is saved
+     instead of the hard_frame_pointer_rtx in the save area.  We need
+     to adjust for the offset between these two values.  */
+  if (GET_CODE (fp) != REG)
+    fp = force_reg (Pmode, fp);
+  emit_move_insn (hard_frame_pointer_rtx, plus_constant (fp, -8));
+
+  /* This bit is the same as expand_builtin_longjmp.  */
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* Load the label we are jumping through into r1 so that we know
+     where to look for it when we get back to setjmp's function for
+     restoring the gp.  */
+  emit_move_insn (pv, lab);
+
+  /* Prevent the insns above from being scheduled into the delay slot
+     of the interspace jump because the space register could change.  */
+  emit_insn (gen_blockage ());
+
+  emit_jump_insn (gen_interspace_jump (pv));
+  emit_barrier ();
+  DONE;
+}")
+
+;;; Operands 2 and 3 are assumed to be CONST_INTs.
+(define_expand "extzv"
+  [(set (match_operand 0 "register_operand" "")
+	(zero_extract (match_operand 1 "register_operand" "")
+		      (match_operand 2 "uint32_operand" "")
+		      (match_operand 3 "uint32_operand" "")))]
+  ""
+  "
+{
+  HOST_WIDE_INT len = INTVAL (operands[2]);
+  HOST_WIDE_INT pos = INTVAL (operands[3]);
+
+  /* PA extraction insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, we reject lengths
+     equal to a word as they are better handled by the move patterns.  */
+  if (len <= 0 || len >= BITS_PER_WORD || pos < 0 || pos + len > BITS_PER_WORD)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[1], VOIDmode))
+    FAIL;
+
+  if (TARGET_64BIT)
+    emit_insn (gen_extzv_64 (operands[0], operands[1],
+			     operands[2], operands[3]));
+  else
+    emit_insn (gen_extzv_32 (operands[0], operands[1],
+			     operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extzv_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "uint5_operand" "")
+			 (match_operand:SI 3 "uint5_operand" "")))]
+  ""
+  "{extru|extrw,u} %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:SI 2 "register_operand" "q")))]
+  ""
+  "{vextru %1,1,%0|extrw,u %1,%%sar,1,%0}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "extzv_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "uint32_operand" "")
+			 (match_operand:DI 3 "uint32_operand" "")))]
+  "TARGET_64BIT"
+  "extrd,u %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "register_operand" "q")))]
+  "TARGET_64BIT"
+  "extrd,u %1,%%sar,1,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;;; Operands 2 and 3 are assumed to be CONST_INTs.
+(define_expand "extv"
+  [(set (match_operand 0 "register_operand" "")
+	(sign_extract (match_operand 1 "register_operand" "")
+		      (match_operand 2 "uint32_operand" "")
+		      (match_operand 3 "uint32_operand" "")))]
+  ""
+  "
+{
+  HOST_WIDE_INT len = INTVAL (operands[2]);
+  HOST_WIDE_INT pos = INTVAL (operands[3]);
+
+  /* PA extraction insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, we reject lengths
+     equal to a word as they are better handled by the move patterns.  */
+  if (len <= 0 || len >= BITS_PER_WORD || pos < 0 || pos + len > BITS_PER_WORD)
+    FAIL;
+
+  /* From mips.md: extract_bit_field doesn't verify that our source
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[1], VOIDmode))
+    FAIL;
+
+  if (TARGET_64BIT)
+    emit_insn (gen_extv_64 (operands[0], operands[1],
+			    operands[2], operands[3]));
+  else
+    emit_insn (gen_extv_32 (operands[0], operands[1],
+			    operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extv_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "uint5_operand" "")
+			 (match_operand:SI 3 "uint5_operand" "")))]
+  ""
+  "{extrs|extrw,s} %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:SI 2 "register_operand" "q")))]
+  "!TARGET_64BIT"
+  "{vextrs %1,1,%0|extrw,s %1,%%sar,1,%0}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "extv_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "uint32_operand" "")
+			 (match_operand:DI 3 "uint32_operand" "")))]
+  "TARGET_64BIT"
+  "extrd,s %1,%3+%2-1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (const_int 1)
+			 (match_operand:DI 2 "register_operand" "q")))]
+  "TARGET_64BIT"
+  "extrd,s %1,%%sar,1,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;;; Operands 1 and 2 are assumed to be CONST_INTs.
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "register_operand" "")
+                      (match_operand 1 "uint32_operand" "")
+                      (match_operand 2 "uint32_operand" ""))
+        (match_operand 3 "arith5_operand" ""))]
+  ""
+  "
+{
+  HOST_WIDE_INT len = INTVAL (operands[1]);
+  HOST_WIDE_INT pos = INTVAL (operands[2]);
+
+  /* PA insertion insns don't support zero length bitfields or fields
+     extending beyond the left or right-most bits.  Also, we reject lengths
+     equal to a word as they are better handled by the move patterns.  */
+  if (len <= 0 || len >= BITS_PER_WORD || pos < 0 || pos + len > BITS_PER_WORD)
+    FAIL;
+
+  /* From mips.md: insert_bit_field doesn't verify that our destination
+     matches the predicate, so check it again here.  */
+  if (!register_operand (operands[0], VOIDmode))
+    FAIL;
+
+  if (TARGET_64BIT)
+    emit_insn (gen_insv_64 (operands[0], operands[1],
+			    operands[2], operands[3]));
+  else
+    emit_insn (gen_insv_32 (operands[0], operands[1],
+			    operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "insv_32"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r,r")
+			 (match_operand:SI 1 "uint5_operand" "")
+			 (match_operand:SI 2 "uint5_operand" ""))
+	(match_operand:SI 3 "arith5_operand" "r,L"))]
+  ""
+  "@
+   {dep|depw} %3,%2+%1-1,%1,%0
+   {depi|depwi} %3,%2+%1-1,%1,%0"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+;; Optimize insertion of const_int values of type 1...1xxxx.
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:SI 1 "uint5_operand" "")
+			 (match_operand:SI 2 "uint5_operand" ""))
+	(match_operand:SI 3 "const_int_operand" ""))]
+  "(INTVAL (operands[3]) & 0x10) != 0 &&
+   (~INTVAL (operands[3]) & ((1L << INTVAL (operands[1])) - 1) & ~0xf) == 0"
+  "*
+{
+  operands[3] = GEN_INT ((INTVAL (operands[3]) & 0xf) - 0x10);
+  return \"{depi|depwi} %3,%2+%1-1,%1,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn "insv_64"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r,r")
+			 (match_operand:DI 1 "uint32_operand" "")
+			 (match_operand:DI 2 "uint32_operand" ""))
+	(match_operand:DI 3 "arith32_operand" "r,L"))]
+  "TARGET_64BIT"
+  "@
+   depd %3,%2+%1-1,%1,%0
+   depdi %3,%2+%1-1,%1,%0"
+  [(set_attr "type" "shift,shift")
+   (set_attr "length" "4,4")])
+
+;; Optimize insertion of const_int values of type 1...1xxxx.
+(define_insn ""
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+			 (match_operand:DI 1 "uint32_operand" "")
+			 (match_operand:DI 2 "uint32_operand" ""))
+	(match_operand:DI 3 "const_int_operand" ""))]
+  "(INTVAL (operands[3]) & 0x10) != 0
+   && TARGET_64BIT
+   && (~INTVAL (operands[3]) & ((1L << INTVAL (operands[1])) - 1) & ~0xf) == 0"
+  "*
+{
+  operands[3] = GEN_INT ((INTVAL (operands[3]) & 0xf) - 0x10);
+  return \"depdi %3,%2+%1-1,%1,%0\";
+}"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		   (const_int 32)))]
+  "TARGET_64BIT"
+  "depd,z %1,31,32,%0"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4")])
+
+;; This insn is used for some loop tests, typically loops reversed when
+;; strength reduction is used.  It is actually created when the instruction
+;; combination phase combines the special loop test.  Since this insn
+;; is both a jump insn and has an output, it must deal with its own
+;; reloads, hence the `m' constraints.  The `!' constraints direct reload
+;; to not choose the register alternatives in the event a reload is needed.
+(define_insn "decrement_and_branch_until_zero"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 2 "comparison_operator"
+	   [(plus:SI
+	      (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m")
+	      (match_operand:SI 1 "int5_operand" "L,L,L"))
+	    (const_int 0)])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))
+   (clobber (match_scratch:SI 4 "=X,r,r"))]
+  ""
+  "* return output_dbra (operands, insn, which_alternative); "
+;; Do not expect to understand this the first time through.
+[(set_attr "type" "cbranch,multi,multi")
+ (set (attr "length")
+      (if_then_else (eq_attr "alternative" "0")
+;; Loop counter in register case
+;; Short branch has length of 4
+;; Long branch has length of 8, 20, 24 or 28
+	(cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28))
+
+;; Loop counter in FP reg case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (eq_attr "alternative" "1")
+	  (if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 24))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 24)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 24))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 28)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 44)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 40)]
+		  (const_int 48))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 24)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 28)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 44)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 40)]
+		  (const_int 48)))
+
+;; Loop counter in memory case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 32)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28)]
+		  (const_int 36))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 32)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28)]
+		  (const_int 36))))))])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	  (match_operator 2 "movb_comparison_operator"
+	   [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
+	  (label_ref (match_operand 3 "" ""))
+	  (pc)))
+   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+	(match_dup 1))]
+  ""
+"* return output_movb (operands, insn, which_alternative, 0); "
+;; Do not expect to understand this the first time through.
+[(set_attr "type" "cbranch,multi,multi,multi")
+ (set (attr "length")
+      (if_then_else (eq_attr "alternative" "0")
+;; Loop counter in register case
+;; Short branch has length of 4
+;; Long branch has length of 8, 20, 24 or 28
+        (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28))
+
+;; Loop counter in FP reg case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (eq_attr "alternative" "1")
+	  (if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 32)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28)]
+		  (const_int 36))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 32)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28)]
+		  (const_int 36)))
+
+;; Loop counter in memory or sar case.
+;; Extra goo to deal with additional reload insns.
+	(cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		   (const_int MAX_12BIT_OFFSET))
+		(const_int 8)
+		(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		  (const_int MAX_17BIT_OFFSET))
+		(const_int 12)
+		(ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		(const_int 28)
+		(eq (symbol_ref "flag_pic") (const_int 0))
+		(const_int 24)]
+	      (const_int 32)))))])
+
+;; Handle negated branch.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	  (match_operator 2 "movb_comparison_operator"
+	   [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
+	  (pc)
+	  (label_ref (match_operand 3 "" ""))))
+   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+	(match_dup 1))]
+  ""
+"* return output_movb (operands, insn, which_alternative, 1); "
+;; Do not expect to understand this the first time through.
+[(set_attr "type" "cbranch,multi,multi,multi")
+ (set (attr "length")
+      (if_then_else (eq_attr "alternative" "0")
+;; Loop counter in register case
+;; Short branch has length of 4
+;; Long branch has length of 8
+        (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28))
+
+;; Loop counter in FP reg case.
+;; Extra goo to deal with additional reload insns.
+	(if_then_else (eq_attr "alternative" "1")
+	  (if_then_else (lt (match_dup 3) (pc))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 12))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 32)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28)]
+		  (const_int 36))
+	     (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_12BIT_OFFSET))
+		    (const_int 12)
+		    (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		      (const_int MAX_17BIT_OFFSET))
+		    (const_int 16)
+		    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		    (const_int 32)
+		    (eq (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28)]
+		  (const_int 36)))
+
+;; Loop counter in memory or SAR case.
+;; Extra goo to deal with additional reload insns.
+	(cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		   (const_int MAX_12BIT_OFFSET))
+		(const_int 8)
+		(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+		  (const_int MAX_17BIT_OFFSET))
+		(const_int 12)
+		(ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+		(const_int 28)
+		(eq (symbol_ref "flag_pic") (const_int 0))
+		(const_int 24)]
+	      (const_int 32)))))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 3 "" "" )))
+   (set (match_operand:SI 0 "ireg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "ireg_operand" "r")
+		 (match_operand:SI 2 "ireg_or_int5_operand" "rL")))]
+  "(reload_completed && operands[0] == operands[1]) || operands[0] == operands[2]"
+  "*
+{
+  return output_parallel_addb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 3) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:SF 0 "ireg_operand" "=r")
+	(match_operand:SF 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:SI 0 "ireg_operand" "=r")
+	(match_operand:SI 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:HI 0 "ireg_operand" "=r")
+	(match_operand:HI 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (pc) (label_ref (match_operand 2 "" "" )))
+   (set (match_operand:QI 0 "ireg_operand" "=r")
+	(match_operand:QI 1 "ireg_or_int5_operand" "rL"))]
+  "reload_completed"
+  "*
+{
+  return output_parallel_movb (operands, insn);
+}"
+[(set_attr "type" "parallel_branch")
+ (set (attr "length")
+    (cond [(lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_12BIT_OFFSET))
+	   (const_int 4)
+	   (lt (abs (minus (match_dup 2) (plus (pc) (const_int 8))))
+	       (const_int MAX_17BIT_OFFSET))
+	   (const_int 8)
+	   (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+	   (const_int 24)
+	   (eq (symbol_ref "flag_pic") (const_int 0))
+	   (const_int 20)]
+	  (const_int 28)))])
+
+(define_insn ""
+  [(set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))
+   (set (match_operand 3 "register_operand" "+f")
+	(plus (match_operand 4 "register_operand" "f")
+	      (match_operand 5 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpyaddoperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,dbl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,dbl %1,%2,%0,%5,%3\";
+    }
+  else
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,sgl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,sgl %1,%2,%0,%5,%3\";
+    }
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand 3 "register_operand" "+f")
+	(plus (match_operand 4 "register_operand" "f")
+	      (match_operand 5 "register_operand" "f")))
+   (set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpyaddoperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,dbl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,dbl %1,%2,%0,%5,%3\";
+    }
+  else
+    {
+      if (rtx_equal_p (operands[3], operands[5]))
+	return \"fmpyadd,sgl %1,%2,%0,%4,%3\";
+      else
+	return \"fmpyadd,sgl %1,%2,%0,%5,%3\";
+    }
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))
+   (set (match_operand 3 "register_operand" "+f")
+	(minus (match_operand 4 "register_operand" "f")
+	       (match_operand 5 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpysuboperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
+  else
+    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (match_operand 3 "register_operand" "+f")
+	(minus (match_operand 4 "register_operand" "f")
+	       (match_operand 5 "register_operand" "f")))
+   (set (match_operand 0 "register_operand" "=f")
+	(mult (match_operand 1 "register_operand" "f")
+	      (match_operand 2 "register_operand" "f")))]
+  "TARGET_PA_11 && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpysuboperands (operands)"
+  "*
+{
+  if (GET_MODE (operands[0]) == DFmode)
+    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
+  else
+    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])
+
+;; The following two patterns are used by the trampoline code for nested
+;; functions.  They flush the I and D cache lines from the start address
+;; (operand0) to the end address (operand1).  No lines are flushed if the
+;; end address is less than the start address (unsigned).
+;;
+;; Because the range of memory flushed is variable and the size of a MEM
+;; can only be a CONST_INT, the patterns specify that they perform an
+;; unspecified volatile operation on all memory.
+;;
+;; The address range for an icache flush must lie within a single
+;; space on targets with non-equivalent space registers.
+;;
+;; Operand 0 contains the start address.
+;; Operand 1 contains the end address.
+;; Operand 2 contains the line length to use.
+(define_insn "dcacheflush<P:mode>"
+  [(const_int 1)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_DCACHE)
+   (use (match_operand 0 "pmode_register_operand" "r"))
+   (use (match_operand 1 "pmode_register_operand" "r"))
+   (use (match_operand 2 "pmode_register_operand" "r"))
+   (clobber (match_scratch:P 3 "=&0"))]
+  ""
+  "cmpb,<dwc><<=,n %3,%1,.\;fdc,m %2(%3)\;sync"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "icacheflush<P:mode>"
+  [(const_int 2)
+   (unspec_volatile [(mem:BLK (scratch))] UNSPECV_ICACHE)
+   (use (match_operand 0 "pmode_register_operand" "r"))
+   (use (match_operand 1 "pmode_register_operand" "r"))
+   (use (match_operand 2 "pmode_register_operand" "r"))
+   (clobber (match_operand 3 "pmode_register_operand" "=&r"))
+   (clobber (match_operand 4 "pmode_register_operand" "=&r"))
+   (clobber (match_scratch:P 5 "=&0"))]
+  ""
+  "mfsp %%sr0,%4\;ldsid (%5),%3\;mtsp %3,%%sr0\;cmpb,<dwc><<=,n %5,%1,.\;fic,m %2(%%sr0,%5)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop"
+  [(set_attr "type" "multi")
+   (set_attr "length" "52")])
+
+;; An out-of-line prologue.
+(define_insn "outline_prologue_call"
+  [(unspec_volatile [(const_int 0)] UNSPECV_OPC)
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 22))
+   (clobber (reg:SI 21))
+   (clobber (reg:SI 20))
+   (clobber (reg:SI 19))
+   (clobber (reg:SI 1))]
+  ""
+  "*
+{
+ 
+  /* We need two different versions depending on whether or not we
+     need a frame pointer.   Also note that we return to the instruction
+     immediately after the branch rather than two instructions after the
+     break as normally is the case.  */
+  if (frame_pointer_needed)
+    {
+      /* Must import the magic millicode routine(s).  */
+      output_asm_insn (\".IMPORT __outline_prologue_fp,MILLICODE\", NULL);
+
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_prologue_fp,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_prologue_fp(%%sr0,%%r31)\",
+			   NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_prologue_fp,%%r31\", NULL);
+    }
+  else
+    {
+      /* Must import the magic millicode routine(s).  */
+      output_asm_insn (\".IMPORT __outline_prologue,MILLICODE\", NULL);
+
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_prologue,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_prologue(%%sr0,%%r31)\", NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_prologue,%%r31\", NULL);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;; An out-of-line epilogue.
+(define_insn "outline_epilogue_call"
+  [(unspec_volatile [(const_int 1)] UNSPECV_OEC)
+   (use (reg:SI 29))
+   (use (reg:SI 28))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 22))
+   (clobber (reg:SI 21))
+   (clobber (reg:SI 20))
+   (clobber (reg:SI 19))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 1))]
+  ""
+  "*
+{
+
+  /* We need two different versions depending on whether or not we
+     need a frame pointer.   Also note that we return to the instruction
+     immediately after the branch rather than two instructions after the
+     break as normally is the case.  */
+  if (frame_pointer_needed)
+    {
+      /* Must import the magic millicode routine.  */
+      output_asm_insn (\".IMPORT __outline_epilogue_fp,MILLICODE\", NULL);
+
+      /* The out-of-line prologue will make sure we return to the right
+	 instruction.  */
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_epilogue_fp,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_epilogue_fp(%%sr0,%%r31)\",
+			   NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_epilogue_fp,%%r31\", NULL);
+    }
+  else
+    {
+      /* Must import the magic millicode routine.  */
+      output_asm_insn (\".IMPORT __outline_epilogue,MILLICODE\", NULL);
+
+      /* The out-of-line prologue will make sure we return to the right
+	 instruction.  */
+      if (TARGET_PORTABLE_RUNTIME)
+	{
+	  output_asm_insn (\"ldil L'__outline_epilogue,%%r31\", NULL);
+	  output_asm_insn (\"ble,n R'__outline_epilogue(%%sr0,%%r31)\", NULL);
+	}
+      else
+	output_asm_insn (\"{bl|b,l},n __outline_epilogue,%%r31\", NULL);
+    }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;; Given a function pointer, canonicalize it so it can be 
+;; reliably compared to another function pointer.  */
+(define_expand "canonicalize_funcptr_for_compare"
+  [(set (reg:SI 26) (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC))
+	      (clobber (match_dup 2))
+	      (clobber (reg:SI 26))
+	      (clobber (reg:SI 22))
+	      (clobber (reg:SI 31))])
+   (set (match_operand:SI 0 "register_operand" "")
+	(reg:SI 29))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "
+{
+  if (TARGET_ELF32)
+    {
+      rtx canonicalize_funcptr_for_compare_libfunc
+        = init_one_libfunc (CANONICALIZE_FUNCPTR_FOR_COMPARE_LIBCALL);
+
+      emit_library_call_value (canonicalize_funcptr_for_compare_libfunc,
+      			       operands[0], LCT_NORMAL, Pmode,
+			       1, operands[1], Pmode);
+      DONE;
+    }
+
+  operands[2] = gen_reg_rtx (SImode);
+  if (GET_CODE (operands[1]) != REG)
+    {
+      rtx tmp = gen_reg_rtx (Pmode);
+      emit_move_insn (tmp, operands[1]);
+      operands[1] = tmp;
+    }
+}")
+
+(define_insn "*$$sh_func_adrs"
+  [(set (reg:SI 29) (unspec:SI [(reg:SI 26)] UNSPEC_CFFC))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 22))
+   (clobber (reg:SI 31))]
+  "!TARGET_64BIT"
+  "*
+{
+  int length = get_attr_length (insn);
+  rtx xoperands[2];
+
+  xoperands[0] = GEN_INT (length - 8);
+  xoperands[1] = GEN_INT (length - 16);
+
+  /* Must import the magic millicode routine.  */
+  output_asm_insn (\".IMPORT $$sh_func_adrs,MILLICODE\", NULL);
+
+  /* This is absolutely amazing.
+
+     First, copy our input parameter into %r29 just in case we don't
+     need to call $$sh_func_adrs.  */
+  output_asm_insn (\"copy %%r26,%%r29\", NULL);
+  output_asm_insn (\"{extru|extrw,u} %%r26,31,2,%%r31\", NULL);
+
+  /* Next, examine the low two bits in %r26, if they aren't 0x2, then
+     we use %r26 unchanged.  */
+  output_asm_insn (\"{comib|cmpib},<>,n 2,%%r31,.+%0\", xoperands);
+  output_asm_insn (\"ldi 4096,%%r31\", NULL);
+
+  /* Next, compare %r26 with 4096, if %r26 is less than or equal to
+     4096, then again we use %r26 unchanged.  */
+  output_asm_insn (\"{comb|cmpb},<<,n %%r26,%%r31,.+%1\", xoperands);
+
+  /* Finally, call $$sh_func_adrs to extract the function's real add24.  */
+  return output_millicode_call (insn,
+				gen_rtx_SYMBOL_REF (SImode,
+						    \"$$sh_func_adrs\"));
+}"
+  [(set_attr "type" "sh_func_adrs")
+   (set (attr "length")
+	(cond [(and (const_int 0) (eq (const_int 0) (pc))) (const_int 28)]
+	      (plus (symbol_ref "attr_length_millicode_call (insn)")
+		    (const_int 20))))])
+
+;; On the PA, the PIC register is call clobbered, so it must
+;; be saved & restored around calls by the caller.  If the call
+;; doesn't return normally (nonlocal goto, or an exception is
+;; thrown), then the code at the exception handler label must
+;; restore the PIC register.
+(define_expand "exception_receiver"
+  [(const_int 4)]
+  "flag_pic"
+  "
+{
+  /* On the 64-bit port, we need a blockage because there is
+     confusion regarding the dependence of the restore on the
+     frame pointer.  As a result, the frame pointer and pic
+     register restores sometimes are interchanged erroneously.  */
+  if (TARGET_64BIT)
+    emit_insn (gen_blockage ());
+  /* Restore the PIC register using hppa_pic_save_rtx ().  The
+     PIC register is not saved in the frame in 64-bit ABI.  */
+  emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+  emit_insn (gen_blockage ());
+  DONE;
+}")
+
+(define_expand "builtin_setjmp_receiver"
+  [(label_ref (match_operand 0 "" ""))]
+  "flag_pic"
+  "
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_blockage ());
+  /* Restore the PIC register.  Hopefully, this will always be from
+     a stack slot.  The only registers that are valid after a
+     builtin_longjmp are the stack and frame pointers.  */
+  emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+  emit_insn (gen_blockage ());
+  DONE;
+}")
+
+;; Allocate new stack space and update the saved stack pointer in the
+;; frame marker.  The HP C compilers also copy additional words in the
+;; frame marker.  The 64-bit compiler copies words at -48, -32 and -24.
+;; The 32-bit compiler copies the word at -16 (Static Link).  We
+;; currently don't copy these values.
+;;
+;; Since the copy of the frame marker can't be done atomically, I
+;; suspect that using it for unwind purposes may be somewhat unreliable.
+;; The HP compilers appear to raise the stack and copy the frame
+;; marker in a strict instruction sequence.  This suggests that the
+;; unwind library may check for an alloca sequence when ALLOCA_FRAME
+;; is set in the callinfo data.  We currently don't set ALLOCA_FRAME
+;; as GAS doesn't support it, or try to keep the instructions emitted
+;; here in strict sequence.
+(define_expand "allocate_stack"
+  [(match_operand 0 "" "")
+   (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx addr;
+
+  /* Since the stack grows upward, we need to store virtual_stack_dynamic_rtx
+     in operand 0 before adjusting the stack.  */
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  anti_adjust_stack (operands[1]);
+  if (TARGET_HPUX_UNWIND_LIBRARY)
+    {
+      addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+			   GEN_INT (TARGET_64BIT ? -8 : -4));
+      emit_move_insn (gen_rtx_MEM (word_mode, addr), hard_frame_pointer_rtx);
+    }
+  if (!TARGET_64BIT && flag_pic)
+    {
+      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
+      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
+    }
+  DONE;
+}")
+
+(define_expand "prefetch"
+  [(match_operand 0 "address_operand" "")
+   (match_operand 1 "const_int_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_PA_20"
+{
+  operands[0] = copy_addr_to_reg (operands[0]);
+  emit_insn (gen_prefetch_20 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "prefetch_20"
+  [(prefetch (match_operand 0 "pmode_register_operand" "r")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_PA_20"
+{
+  /* The SL cache-control completer indicates good spatial locality but
+     poor temporal locality.  The ldw instruction with a target of general
+     register 0 prefetches a cache line for a read.  The ldd instruction
+     prefetches a cache line for a write.  */
+  static const char * const instr[2][2] = {
+    {
+      "ldw,sl 0(%0),%%r0",
+      "ldd,sl 0(%0),%%r0"
+    },
+    {
+      "ldw 0(%0),%%r0",
+      "ldd 0(%0),%%r0"
+    }
+  };
+  int read_or_write = INTVAL (operands[1]) == 0 ? 0 : 1;
+  int locality = INTVAL (operands[2]) == 0 ? 0 : 1;
+
+  return instr [locality][read_or_write];
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+;; TLS Support
+(define_insn "tgd_load"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD))
+  (clobber (reg:SI 1))
+  (use (reg:SI 27))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_gdidx$,%%r27\;ldo RR'%1-$tls_gdidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tgd_load_pic"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD_PIC))
+  (clobber (reg:SI 1))
+  (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_gdidx$,%%r19\;ldo RT'%1-$tls_gdidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_load"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM))
+  (clobber (reg:SI 1))
+  (use (reg:SI 27))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_ldidx$,%%r27\;ldo RR'%1-$tls_ldidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_load_pic"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM_PIC))
+  (clobber (reg:SI 1))
+  (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_ldidx$,%%r19\;ldo RT'%1-$tls_ldidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_offset_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] 
+		 	    UNSPEC_TLSLDO)
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI 1))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_dtpoff$,%2\;ldo RR'%1-$tls_dtpoff$(%%r1),%0\"; 
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tp_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  ""
+  "mfctl %%cr27,%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_insn "tie_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE))
+   (clobber (reg:SI 1))
+   (use (reg:SI 27))]
+  ""
+  "*
+{
+  return \"addil LR'%1-$tls_ieoff$,%%r27\;ldw RR'%1-$tls_ieoff$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tie_load_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE_PIC))
+   (clobber (reg:SI 1))
+   (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_ieoff$,%%r19\;ldw RT'%1-$tls_ieoff$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tle_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (unspec:SI [(match_operand 1 "tle_symbolic_operand" "")] 
+		 	    UNSPEC_TLSLE)
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI 1))]
+  ""
+  "addil LR'%1-$tls_leoff$,%2\;ldo RR'%1-$tls_leoff$(%%r1),%0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt
new file mode 100644
index 000000000..6d10544b7
--- /dev/null
+++ b/gcc/config/pa/pa.opt
@@ -0,0 +1,118 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+march=1.0
+Target RejectNegative
+Generate PA1.0 code
+
+march=1.1
+Target RejectNegative
+Generate PA1.1 code
+
+march=2.0
+Target RejectNegative
+Generate PA2.0 code (requires binutils 2.10 or later)
+
+mbig-switch
+Target Report Mask(BIG_SWITCH)
+Generate code for huge switch statements
+
+mdisable-fpregs
+Target Report Mask(DISABLE_FPREGS)
+Disable FP regs
+
+mdisable-indexing
+Target Report Mask(DISABLE_INDEXING)
+Disable indexed addressing
+
+mfast-indirect-calls
+Target Report Mask(FAST_INDIRECT_CALLS)
+Generate fast indirect calls
+
+mfixed-range=
+Target RejectNegative Joined
+Specify range of registers to make fixed
+
+mgas
+Target Report Mask(GAS)
+Assume code will be assembled by GAS
+
+mjump-in-delay
+Target Report Mask(JUMP_IN_DELAY)
+Put jumps in call delay slots
+
+;; Not used by gcc
+mlinker-opt
+Target RejectNegative
+Enable linker optimizations
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Always generate long calls
+
+mlong-load-store
+Target Report Mask(LONG_LOAD_STORE)
+Emit long load/store sequences
+
+mnosnake
+Target RejectNegative
+Generate PA1.0 code
+
+mno-space-regs
+Target RejectNegative Report Mask(NO_SPACE_REGS)
+Disable space regs
+
+mpa-risc-1-0
+Target RejectNegative
+Generate PA1.0 code
+
+mpa-risc-1-1
+Target RejectNegative Mask(PA_11)
+Generate PA1.1 code
+
+mpa-risc-2-0
+Target RejectNegative Mask(PA_20)
+Generate PA2.0 code (requires binutils 2.10 or later)
+
+mportable-runtime
+Target Report Mask(PORTABLE_RUNTIME)
+Use portable calling conventions
+
+mschedule=
+Target RejectNegative Joined
+Specify CPU for scheduling purposes.  Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000
+
+msoft-float
+Target Report Mask(SOFT_FLOAT)
+Use software floating point
+
+msnake
+Target RejectNegative
+Generate PA1.1 code
+
+mspace-regs
+Target RejectNegative Report InverseMask(NO_SPACE_REGS)
+Do not disable space regs
+
+Mask(SIO)
+;; Generate cpp defines for server I/O.
+
+Mask(GNU_LD)
+;; Assume GNU linker by default
diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h
new file mode 100644
index 000000000..8d80a2100
--- /dev/null
+++ b/gcc/config/pa/pa32-linux.h
@@ -0,0 +1,67 @@
+/* Definitions for PA_RISC with ELF-32 format
+   Copyright (C) 2000, 2002, 2004, 2006, 2007, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Turn off various SOM crap we don't want.  */
+#undef TARGET_ELF32
+#define TARGET_ELF32 1
+
+/* The libcall __canonicalize_funcptr_for_compare is referenced in
+   crtend.o and the reference isn't resolved in objects that don't
+   compare function pointers.  Thus, we need to play games to provide
+   a reference in crtbegin.o.  The rest of the define is the same
+   as that in crtstuff.c  */
+#define CTOR_LIST_BEGIN \
+  asm (".type __canonicalize_funcptr_for_compare,@function\n"		\
+"	.text\n"							\
+"	.word __canonicalize_funcptr_for_compare-$PIC_pcrel$0");	\
+  STATIC func_ptr __CTOR_LIST__[1]					\
+    __attribute__ ((__used__, section(".ctors"),			\
+		    aligned(sizeof(func_ptr))))				\
+    = { (func_ptr) (-1) }
+
+/* This is a PIC version of CRT_CALL_STATIC_FUNCTION.  The PIC
+   register has to be saved before the call and restored after
+   the call.  We assume that register %r4 is available for this
+   purpose.  The hack prevents GCC from deleting the restore.  */
+#ifdef CRTSTUFFS_O
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+static void __attribute__((__used__))			\
+call_ ## FUNC (void)					\
+{							\
+  asm (SECTION_OP);					\
+  asm volatile ("bl " #FUNC ",%%r2\n\t"			\
+		"copy %%r19,%%r4\n\t"			\
+		"copy %%r4,%%r19\n"			\
+		:					\
+		:					\
+		: "r1", "r2", "r4", "r20", "r21",	\
+		  "r22", "r23", "r24", "r25", "r26",	\
+		  "r27", "r28", "r29", "r31");		\
+  asm (TEXT_SECTION_ASM_OP);				\
+}
+#endif
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#define MD_UNWIND_SUPPORT "config/pa/linux-unwind.h"
diff --git a/gcc/config/pa/pa32-regs.h b/gcc/config/pa/pa32-regs.h
new file mode 100644
index 000000000..9a1c06726
--- /dev/null
+++ b/gcc/config/pa/pa32-regs.h
@@ -0,0 +1,373 @@
+/* Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+   2008, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   HP-PA 1.0 has 32 fullword registers and 16 floating point
+   registers. The floating point registers hold either word or double
+   word values.
+
+   16 additional registers are reserved.
+
+   HP-PA 1.1 has 32 fullword registers and 32 floating point
+   registers. However, the floating point registers behave
+   differently: the left and right halves of registers are addressable
+   as 32-bit registers. So, we will set things up like the 68k which
+   has different fp units: define separate register sets for the 1.0
+   and 1.1 fp units.  */
+
+#define FIRST_PSEUDO_REGISTER 90  /* 32 general regs + 56 fp regs +
+				     + 1 shift reg + frame pointer */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On the HP-PA, these are:
+   Reg 0	= 0 (hardware). However, 0 is used for condition code,
+                  so is not fixed.
+   Reg 1	= ADDIL target/Temporary (hardware).
+   Reg 2	= Return Pointer
+   Reg 3	= Frame Pointer
+   Reg 4	= Frame Pointer (>8k varying frame with HP compilers only)
+   Reg 4-18	= Preserved Registers
+   Reg 19	= Linkage Table Register in HPUX 8.0 shared library scheme.
+   Reg 20-22	= Temporary Registers
+   Reg 23-26	= Temporary/Parameter Registers
+   Reg 27	= Global Data Pointer (hp)
+   Reg 28	= Temporary/Return Value register
+   Reg 29	= Temporary/Static Chain/Return Value register #2
+   Reg 30	= stack pointer
+   Reg 31	= Temporary/Millicode Return Pointer (hp)
+
+   Freg 0-3	= Status Registers	 -- Not known to the compiler.
+   Freg 4-7	= Arguments/Return Value
+   Freg 8-11	= Temporary Registers
+   Freg 12-15	= Preserved Registers
+
+   Freg 16-31	= Reserved
+
+   On the Snake, fp regs are
+
+   Freg 0-3	= Status Registers	-- Not known to the compiler.
+   Freg 4L-7R	= Arguments/Return Value
+   Freg 8L-11R	= Temporary Registers
+   Freg 12L-21R	= Preserved Registers
+   Freg 22L-31R = Temporary Registers
+
+*/
+
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 0, 0, 1, 0, \
+  /* fp registers */	  \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  /* shift register and soft frame pointer */ \
+  0, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  /* fp registers */	  \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  /* shift register and soft frame pointer */ \
+  1, 1}
+
+/* Allocate the call used registers first.  This should minimize
+   the number of registers that need to be saved (as call used
+   registers will generally not be allocated across a call).
+
+   Experimentation has shown slightly better results by allocating
+   FP registers first.  We allocate the caller-saved registers more
+   or less in reverse order to their allocation as arguments.
+
+   FP registers are ordered so that all L registers are selected before
+   R registers.  This works around a false dependency interlock on the
+   PA8000 when accessing the high and low parts of an FP register
+   independently.  */
+
+#define REG_ALLOC_ORDER \
+ {					\
+  /* caller-saved fp regs.  */		\
+  68, 70, 72, 74, 76, 78, 80, 82,	\
+  84, 86, 40, 42, 44, 46, 38, 36,	\
+  34, 32,				\
+  69, 71, 73, 75, 77, 79, 81, 83,	\
+  85, 87, 41, 43, 45, 47, 39, 37,	\
+  35, 33,				\
+  /* caller-saved general regs.  */	\
+  28, 19, 20, 21, 22, 31, 27, 29,	\
+  23, 24, 25, 26,  2,			\
+  /* callee-saved fp regs.  */		\
+  48, 50, 52, 54, 56, 58, 60, 62,	\
+  64, 66,				\
+  49, 51, 53, 55, 57, 59, 61, 63,	\
+  65, 67,				\
+  /* callee-saved general regs.  */	\
+   3,  4,  5,  6,  7,  8,  9, 10, 	\
+  11, 12, 13, 14, 15, 16, 17, 18,	\
+  /* special registers.  */		\
+   1, 30,  0, 88, 89}
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the HP-PA, general registers are 32 bits wide.  The floating
+   point registers are 64 bits wide.  Snake fp regs are treated as
+   32 bits wide since the left and right parts are independently
+   accessible.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (FP_REGNO_P (REGNO)							\
+   ? (!TARGET_PA_11							\
+      ? COMPLEX_MODE_P (MODE) ? 2 : 1					\
+      : (GET_MODE_SIZE (MODE) + 4 - 1) / 4) 	                        \
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* There are no instructions that use DImode in PA 1.0, so we only
+   allow it in PA 1.1 and later.  */
+#define VALID_FP_MODE_P(MODE)						\
+  ((MODE) == SFmode || (MODE) == DFmode					\
+   || (MODE) == SCmode || (MODE) == DCmode				\
+   || (MODE) == SImode || (TARGET_PA_11 && (MODE) == DImode))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+
+   On the HP-PA, the cpu registers can hold any mode that fits in 32 bits.
+   For the 64-bit modes, we choose a set of non-overlapping general registers
+   that includes the incoming arguments and the return value.  We specify a
+   set with no overlaps so that we don't have to specify that the destination
+   register is an early clobber in patterns using this mode.  Except for the
+   return value, the starting registers are odd.  For 128 and 256 bit modes,
+   we similarly specify non-overlapping sets of cpu registers.  However,
+   there aren't any patterns defined for modes larger than 64 bits at the
+   moment.
+
+   We limit the modes allowed in the floating point registers to the
+   set of modes used in the machine definition.  In addition, we allow
+   the complex modes SCmode and DCmode.  The real and imaginary parts
+   of complex modes are allocated to separate registers.  This might
+   allow patterns to be defined in the future to operate on these values.
+
+   The PA 2.0 architecture specifies that quad-precision floating-point
+   values should start on an even floating point register.  Thus, we
+   choose non-overlapping sets of registers starting on even register
+   boundaries for large modes.  However, there is currently no support
+   in the machine definition for modes larger than 64 bits.  TFmode is
+   supported under HP-UX using libcalls.  Since TFmode values are passed
+   by reference, they never need to be loaded into the floating-point
+   registers.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((REGNO) == 0 ? (MODE) == CCmode || (MODE) == CCFPmode		\
+   : (REGNO) == 88 ? SCALAR_INT_MODE_P (MODE)				\
+   : !TARGET_PA_11 && FP_REGNO_P (REGNO)				\
+     ? (VALID_FP_MODE_P (MODE)						\
+	&& (GET_MODE_SIZE (MODE) <= 8					\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)))	\
+   : FP_REGNO_P (REGNO)							\
+     ? (VALID_FP_MODE_P (MODE)						\
+	&& (GET_MODE_SIZE (MODE) <= 4					\
+	    || (GET_MODE_SIZE (MODE) == 8 && ((REGNO) & 1) == 0)	\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)	\
+	    || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 7) == 0)))	\
+   : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD				\
+      || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD			\
+	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))	\
+      || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD			\
+	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)			\
+      || (GET_MODE_SIZE (MODE) == 8 * UNITS_PER_WORD			\
+	  && ((REGNO) & 7) == 3 && (REGNO) <= 19)))
+
+/* How to renumber registers for dbx and gdb.
+
+   Registers 0  - 31 remain unchanged.
+
+   Registers 32 - 87 are mapped to 72 - 127
+
+   Register 88 is mapped to 32.  */
+
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((REGNO) <= 31 ? (REGNO) :						\
+   ((REGNO) <= 87 ? (REGNO) + 40 : 32))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) REG
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+  /* The HP-PA has four kinds of registers: general regs, 1.0 fp regs,
+     1.1 fp regs, and the high 1.1 fp regs, to which the operands of
+     fmpyadd and fmpysub are restricted.  */
+
+enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
+		 GENERAL_OR_FP_REGS, SHIFT_REGS, ALL_REGS, LIM_REG_CLASSES};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+  {"NO_REGS", "R1_REGS", "GENERAL_REGS", "FPUPPER_REGS", "FP_REGS", \
+   "GENERAL_OR_FP_REGS", "SHIFT_REGS", "ALL_REGS"}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES. Register 0, the "condition code" register,
+   is in no class.  */
+
+#define REG_CLASS_CONTENTS	\
+ {{0x00000000, 0x00000000, 0x00000000},	/* NO_REGS */			\
+  {0x00000002, 0x00000000, 0x00000000},	/* R1_REGS */			\
+  {0xfffffffe, 0x00000000, 0x02000000},	/* GENERAL_REGS */		\
+  {0x00000000, 0xff000000, 0x00ffffff},	/* FPUPPER_REGS */		\
+  {0x00000000, 0xffffffff, 0x00ffffff},	/* FP_REGS */			\
+  {0xfffffffe, 0xffffffff, 0x02ffffff},	/* GENERAL_OR_FP_REGS */	\
+  {0x00000000, 0x00000000, 0x01000000},	/* SHIFT_REGS */		\
+  {0xfffffffe, 0xffffffff, 0x03ffffff}}	/* ALL_REGS */
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						\
+{									\
+  GENERAL_REGS, FP_REGS, SHIFT_REGS, LIM_REG_CLASSES			\
+}
+
+/* Defines invalid mode changes.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  pa_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)						\
+  ((REGNO) == 0 ? NO_REGS 						\
+   : (REGNO) == 1 ? R1_REGS						\
+   : (REGNO) < 32 || (REGNO) == 89 ? GENERAL_REGS			\
+   : (REGNO) < 56 ? FP_REGS						\
+   : (REGNO) < 88 ? FPUPPER_REGS					\
+   : SHIFT_REGS)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS			\
+   ? (!TARGET_PA_11							\
+      ? COMPLEX_MODE_P (MODE) ? 2 : 1					\
+      : (GET_MODE_SIZE (MODE) + 4 - 1) / 4)				\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* 1 if N is a possible register number for function argument passing.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  (((N) >= 23 && (N) <= 26) || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39)) 
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"%r0",   "%r1",    "%r2",   "%r3",    "%r4",   "%r5",    "%r6",   "%r7",    \
+ "%r8",   "%r9",    "%r10",  "%r11",   "%r12",  "%r13",   "%r14",  "%r15",   \
+ "%r16",  "%r17",   "%r18",  "%r19",   "%r20",  "%r21",   "%r22",  "%r23",   \
+ "%r24",  "%r25",   "%r26",  "%r27",   "%r28",  "%r29",   "%r30",  "%r31",   \
+ "%fr4",  "%fr4R",  "%fr5",  "%fr5R",  "%fr6",  "%fr6R",  "%fr7",  "%fr7R",  \
+ "%fr8",  "%fr8R",  "%fr9",  "%fr9R",  "%fr10", "%fr10R", "%fr11", "%fr11R", \
+ "%fr12", "%fr12R", "%fr13", "%fr13R", "%fr14", "%fr14R", "%fr15", "%fr15R", \
+ "%fr16", "%fr16R", "%fr17", "%fr17R", "%fr18", "%fr18R", "%fr19", "%fr19R", \
+ "%fr20", "%fr20R", "%fr21", "%fr21R", "%fr22", "%fr22R", "%fr23", "%fr23R", \
+ "%fr24", "%fr24R", "%fr25", "%fr25R", "%fr26", "%fr26R", "%fr27", "%fr27R", \
+ "%fr28", "%fr28R", "%fr29", "%fr29R", "%fr30", "%fr30R", "%fr31", "%fr31R", \
+ "SAR",   "sfp"}
+
+#define ADDITIONAL_REGISTER_NAMES \
+{{"%fr4L",32}, {"%fr5L",34}, {"%fr6L",36}, {"%fr7L",38},		\
+ {"%fr8L",40}, {"%fr9L",42}, {"%fr10L",44}, {"%fr11L",46},		\
+ {"%fr12L",48}, {"%fr13L",50}, {"%fr14L",52}, {"%fr15L",54},		\
+ {"%fr16L",56}, {"%fr17L",58}, {"%fr18L",60}, {"%fr19L",62},		\
+ {"%fr20L",64}, {"%fr21L",66}, {"%fr22L",68}, {"%fr23L",70},		\
+ {"%fr24L",72}, {"%fr25L",74}, {"%fr26L",76}, {"%fr27L",78},		\
+ {"%fr28L",80}, {"%fr29L",82}, {"%fr30L",84}, {"%fr31R",86},		\
+ {"%cr11",88}}
+
+#define FP_SAVED_REG_LAST 66
+#define FP_SAVED_REG_FIRST 48
+#define FP_REG_STEP 2
+#define FP_REG_FIRST 32
+#define FP_REG_LAST 87
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
new file mode 100644
index 000000000..cc9724a6e
--- /dev/null
+++ b/gcc/config/pa/pa64-hpux.h
@@ -0,0 +1,442 @@
+/* Definitions of target machine for GNU compiler, for HPs running
+   HPUX using the 64bit runtime model.
+   Copyright (C) 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We can debug dynamically linked executables on hpux11; we also
+   want dereferencing of a NULL pointer to cause a SEGV.  Do not move
+   the "+Accept TypeMismatch" switch.  We check for it in collect2
+   to determine which init/fini is needed.  */
+#undef LINK_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD)
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   %{mhp-ld:+Accept TypeMismatch -z} %{mlinker-opt:-O}\
+   %{!shared:-u main %{!nostdlib:%{!nodefaultlibs:-u __cxa_finalize}}}\
+   %{static:-a archive} %{shared:%{mhp-ld:-b}%{!mhp-ld:-shared}}"
+#else
+#define LINK_SPEC \
+  "%{!shared:%{p:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{pg:-L/lib/pa20_64/libp -L/usr/lib/pa20_64/libp %{!static:\
+     %nwarning: consider linking with '-static' as system libraries with\n\
+     %n  profiling support are only provided in archive format}}}\
+   %{!shared:%{!static:%{rdynamic:-E}}}\
+   %{!mgnu-ld:+Accept TypeMismatch -z} %{mlinker-opt:-O}\
+   %{!shared:-u main %{!nostdlib:%{!nodefaultlibs:-u __cxa_finalize}}}\
+   %{static:-a archive} %{shared:%{mgnu-ld:-shared}%{!mgnu-ld:-b}}"
+#endif
+
+/* Profiling support is only provided in libc.a.  However, libprof and
+   libgprof are only available in shared form on HP-UX 11.00.  We use
+   the shared form if we are using the GNU linker or an archive form
+   isn't available.  We also usually need to link with libdld and it's
+   only available in shared form.  */
+#undef LIB_SPEC
+#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD)
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:%{!pg:%{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\
+	    %{mt|pthread:-lpthread} -lc\
+	    %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
+	   -lprof %{static:-a archive}\
+	   %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\
+	   %{mt|pthread:-lpthread} -lc\
+	   %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
+       -lgprof %{static:-a archive}\
+       %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\
+       %{mt|pthread:-lpthread} -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+   %{shared:%{mt|pthread:-lpthread}}"
+#else
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:%{!pg:%{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\
+	    %{mt|pthread:-lpthread} -lc\
+	    %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
+	   -lprof %{static:-a archive}\
+	   %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\
+	   %{mt|pthread:-lpthread} -lc\
+	   %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+     %{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
+       -lgprof %{static:-a archive}\
+       %{fopenmp:%{static:-a shared} -lrt %{static:-a archive}}\
+       %{mt|pthread:-lpthread} -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}\
+		%{!mt:%{!pthread:-a shared -lc -a archive}}}}}\
+   %{shared:%{mt|pthread:-lpthread}}"
+#endif
+
+/* The libgcc_stub.a and milli.a libraries need to come last.  */
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "\
+  %G %L %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}\
+  milli.a%s}}"
+
+/* Under hpux11, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin"
+#endif
+
+/* Default prefixes.  */
+
+#undef STANDARD_STARTFILE_PREFIX_1
+#define STANDARD_STARTFILE_PREFIX_1 "/lib/pa20_64/"
+
+#undef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 "/usr/lib/pa20_64/"
+
+/* Under hpux11 the normal location of the various pa20_64 *crt*.o files
+   is the /usr/ccs/lib/pa20_64 directory.  Some files may also be in the
+   /opt/langtools/lib/pa20_64 directory.  */
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/pa20_64/"
+#endif
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_STARTFILE_PREFIX_1
+#define MD_STARTFILE_PREFIX_1 "/opt/langtools/lib/pa20_64/"
+#endif
+
+/* This macro specifies the biggest alignment supported by the object
+   file format of this machine.
+
+   The .align directive in the HP assembler allows alignments up to
+   4096 bytes.  However, the maximum alignment of a global common symbol
+   is 16 bytes using HP ld.  Unfortunately, this macro doesn't provide
+   a method to check for common symbols.  */
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT 32768
+
+/* Due to limitations in the target structure, it isn't currently possible
+   to dynamically switch between the GNU and HP assemblers.  */
+#undef TARGET_GAS
+
+/* Configure selects the standard ELFOS defines for use with GAS.  */
+#ifdef USING_ELFOS_H
+
+/* We are using GAS.  */
+#define TARGET_GAS 1
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START pa_hpux64_gas_file_start
+
+/* This is how we output a null terminated string.  */
+#undef STRING_ASM_OP
+#define STRING_ASM_OP	"\t.stringz\t"
+
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+
+#define JCR_SECTION_NAME	".jcr"
+
+#define HP_INIT_ARRAY_SECTION_ASM_OP	"\t.section\t.init"
+#define GNU_INIT_ARRAY_SECTION_ASM_OP	"\t.section\t.init_array"
+#define HP_FINI_ARRAY_SECTION_ASM_OP	"\t.section\t.fini"
+#define GNU_FINI_ARRAY_SECTION_ASM_OP	"\t.section\t.fini_array"
+
+/* We need to override the following two macros defined in elfos.h since
+   the .comm directive has a different syntax and it can't be used for
+   local common symbols.  */
+#undef ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_common (FILE, NAME, SIZE, ALIGN)
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+  pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN)
+
+/* The define in pa.h doesn't work with the alias attribute.  The
+   default is ok with the following define for GLOBAL_ASM_OP.  */
+#undef TARGET_ASM_GLOBALIZE_LABEL
+
+/* This is how we globalize a label.  */
+#define GLOBAL_ASM_OP	"\t.globl\t"
+
+/* Hacked version from defaults.h that uses assemble_name_raw
+   instead of assemble_name.  A symbol in a type directive that
+   isn't otherwise referenced doesn't cause the symbol to be
+   placed in the symbol table of the assembled object.  */
+#undef ASM_OUTPUT_TYPE_DIRECTIVE
+#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)		\
+do {								\
+  fputs (TYPE_ASM_OP, STREAM);					\
+  assemble_name_raw (STREAM, NAME);				\
+  fputs (", ", STREAM);						\
+  fprintf (STREAM, TYPE_OPERAND_FMT, TYPE);			\
+  putc ('\n', STREAM);						\
+} while (0)
+
+/* Hacked version from elfos.h that doesn't output a label.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+do {								\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");		\
+  ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+} while (0)
+
+/* The type of external references must be set correctly for the
+   dynamic loader to work correctly.  This is equivalent to the
+   HP assembler's .IMPORT directive but relates more directly to
+   ELF object file types.  */
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)			\
+  pa_hpux_asm_output_external ((FILE), (DECL), (NAME))
+#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME)		\
+do {								\
+  if (FUNCTION_NAME_P (NAME))					\
+    ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");		\
+  else								\
+    ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");		\
+  default_elf_asm_output_external (FILE, DECL, NAME);		\
+} while (0)
+
+/* We need set the type for external libcalls.  Also note that not all
+   libcall names are passed to targetm.encode_section_info (e.g., __main).
+   Thus, we also have to do the section encoding if it hasn't been done
+   already.  */
+#undef ASM_OUTPUT_EXTERNAL_LIBCALL
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN)			\
+do {								\
+  if (!FUNCTION_NAME_P (XSTR (FUN, 0)))				\
+    hppa_encode_label (FUN);					\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, XSTR (FUN, 0), "function");	\
+} while (0)
+
+/* We need to use the HP style for internal labels.  */
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
+  sprintf (LABEL, "*%c$%s%04ld", (PREFIX)[0], (PREFIX) + 1, (long)(NUM))
+
+#else /* USING_ELFOS_H */
+
+/* We are not using GAS.  */
+#define TARGET_GAS 0
+
+/* HPUX 11 has the "new" HP assembler.  It's still lousy, but it's a whole
+   lot better than the assembler shipped with older versions of hpux.
+   However, it doesn't support weak symbols and is a bad fit with ELF.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
+
+/* It looks like DWARF2 will be the easiest debug format to handle on this
+   platform.  */
+#define DWARF2_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* This target uses the ELF object file format.  */
+#define OBJECT_FORMAT_ELF
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START pa_hpux64_hpas_file_start
+
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP		"\t.SUBSPA $CODE$\n"
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP	"\t.SUBSPA $LIT$\n"
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP		"\t.SUBSPA $DATA$\n"
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP		"\t.SUBSPA $BSS$\n"
+
+/* We provide explicit defines for CTORS_SECTION_ASM_OP and
+   DTORS_SECTION_ASM_OP since we don't yet have support for
+   named sections with the HP assembler.  */
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP "\t.SUBSPA \\.ctors,QUAD=1,ALIGN=8,ACCESS=31"
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP "\t.SUBSPA \\.dtors,QUAD=1,ALIGN=8,ACCESS=31"
+
+#define HP_INIT_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.init,QUAD=1,ALIGN=8,ACCESS=31"
+#define GNU_INIT_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.init_array,QUAD=1,ALIGN=8,ACCESS=31"
+#define HP_FINI_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.fini,QUAD=1,ALIGN=8,ACCESS=31"
+#define GNU_FINI_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.fini_array,QUAD=1,ALIGN=8,ACCESS=31"
+
+#endif /* USING_ELFOS_H */
+
+/* The following defines, used to run constructors and destructors with
+   the SOM linker under HP-UX 11, are not needed.  */
+#undef HAS_INIT_SECTION
+#undef LD_INIT_SWITCH
+#undef LD_FINI_SWITCH
+
+/* The following STARTFILE_SPEC and ENDFILE_SPEC defines provide the
+   magic needed to run initializers and finalizers.  */
+#undef STARTFILE_SPEC
+#if TARGET_HPUX_11_11
+#define STARTFILE_SPEC \
+  "%{!shared: %{!symbolic: crt0%O%s} %{munix=95:unix95.o%s} \
+     %{!munix=93:%{!munix=95:unix98%O%s}}} %{static:crtbeginT%O%s} \
+   %{!static:%{!shared:crtbegin%O%s} %{shared:crtbeginS%O%s}}"
+#else
+#define STARTFILE_SPEC \
+  "%{!shared: %{!symbolic: crt0%O%s} %{munix=95:unix95%O%s}} \
+   %{static:crtbeginT%O%s} %{!static:%{!shared:crtbegin%O%s} \
+   %{shared:crtbeginS%O%s}}"
+#endif
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+/* Since HP uses the .init and .fini sections for array initializers
+   and finalizers, we need different defines for INIT_SECTION_ASM_OP
+   and FINI_SECTION_ASM_OP.  With the implementation adopted below,
+   the sections are not actually used.  However, we still must provide
+   defines to select the proper code path.  */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP
+
+/* We are using array initializers and don't want calls in the INIT
+   and FINI sections.  */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)
+
+/* The init_priority attribute is not supported with HP ld.  This could be
+   supported if collect2 was used with LD_INIT_SWITCH.  Unfortunately, this
+   approach doesn't work with GNU ld since HP-UX doesn't support DT_INIT,
+   and therefore the -init and -fini GNU ld switches.  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY (TARGET_GNU_LD ? 1 : 0)
+
+/* We use DTOR_LIST_BEGIN to carry a bunch of hacks to allow us to use
+   the init and fini array sections with both the HP and GNU linkers.
+   The linkers setup the required dynamic entries in the dynamic segment
+   and the dynamic linker does the calls.  This approach avoids using
+   collect2.
+
+   The first hack is to implement __do_global_ctors_aux in crtbegin as
+   it needs to be the first entry in the init array so that it is called
+   last.  HP got the order of the init array backwards.  The DT_INIT_ARRAY
+   is supposed to be executed in the same order as the addresses appear in
+   the array.  DT_FINI_ARRAY is supposed to be executed in the opposite
+   order.
+
+   The second hack is a set of plabels to implement the effect of
+   CRT_CALL_STATIC_FUNCTION.  HP-UX 11 only supports DI_INIT_ARRAY and
+   DT_FINI_ARRAY and they put the arrays in .init and .fini, rather than
+   in .init_array and .fini_array.  The standard defines for .init and
+   .fini have the execute flag set.  So, the assembler has to be hacked
+   to munge the standard flags for these sections to make them agree
+   with what the HP linker expects.  With the GNU linker, we need to
+   used the .init_array and .fini_array sections.  So, we set up for
+   both just in case.  Once we have built the table, the linker does
+   the rest of the work.
+
+   The order is significant.  Placing __do_global_ctors_aux first in
+   the list, results in it being called last.  User specified initializers,
+   either using the linker +init command or a plabel, run before the
+   initializers specified here.  */
+
+/* We need to add frame_dummy to the initializer list if EH_FRAME_SECTION_NAME
+   or JCR_SECTION_NAME is defined.  */
+#if defined(EH_FRAME_SECTION_NAME) || defined(JCR_SECTION_NAME)
+#define PA_INIT_FRAME_DUMMY_ASM_OP ".dword P%frame_dummy"
+#else
+#define PA_INIT_FRAME_DUMMY_ASM_OP ""
+#endif
+
+/* The following hack sets up the .init, .init_array, .fini and
+   .fini_array sections.  */
+#define PA_CRTBEGIN_HACK \
+asm (TEXT_SECTION_ASM_OP);						\
+static void __attribute__((used))					\
+__do_global_ctors_aux (void)						\
+{									\
+  func_ptr *p = __CTOR_LIST__;						\
+  while (*(p + 1))							\
+    p++;								\
+  for (; *p != (func_ptr) -1; p--)					\
+    (*p) ();								\
+}									\
+									\
+asm (HP_INIT_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_ctors_aux");					\
+asm (PA_INIT_FRAME_DUMMY_ASM_OP);					\
+asm (GNU_INIT_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_ctors_aux");					\
+asm (PA_INIT_FRAME_DUMMY_ASM_OP);					\
+asm (HP_FINI_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_dtors_aux");					\
+asm (GNU_FINI_ARRAY_SECTION_ASM_OP);					\
+asm (".align 8");							\
+asm (".dword P%__do_global_dtors_aux")
+
+/* The following two variants of DTOR_LIST_BEGIN are identical to those
+   in crtstuff.c except for the addition of the above crtbegin hack.  */
+#ifdef DTORS_SECTION_ASM_OP
+#define DTOR_LIST_BEGIN \
+asm (DTORS_SECTION_ASM_OP);						\
+STATIC func_ptr __DTOR_LIST__[1]					\
+  __attribute__ ((aligned(sizeof(func_ptr))))				\
+  = { (func_ptr) (-1) };						\
+PA_CRTBEGIN_HACK
+#else
+#define DTOR_LIST_BEGIN \
+STATIC func_ptr __DTOR_LIST__[1]					\
+  __attribute__ ((section(".dtors"), aligned(sizeof(func_ptr))))	\
+  = { (func_ptr) (-1) };						\
+PA_CRTBEGIN_HACK
+#endif
+
+/* If using HP ld do not call pxdb.  Use size as a program that does nothing
+   and returns 0.  /bin/true cannot be used because it is a script without
+   an interpreter.  */
+#define INIT_ENVIRONMENT "LD_PXDB=/usr/ccs/bin/size"
+
+/* The HPUX dynamic linker objects to undefined weak symbols, so do
+   not use them in gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+/* We don't want undefined weak references to __register_frame_info,
+   __deregister_frame_info, _Jv_RegisterClasses and __cxa_finalize
+   introduced by crtbegin.o.  The GNU linker only resolves weak
+   references if they appear in a shared library.  Thus, it would be
+   impossible to create a static executable if the symbols were weak.
+   So, the best solution seems to be to make the symbols strong and
+   provide an archive library of empty stub functions.  */
+#define TARGET_ATTRIBUTE_WEAK
diff --git a/gcc/config/pa/pa64-hpux.opt b/gcc/config/pa/pa64-hpux.opt
new file mode 100644
index 000000000..36b1c61ea
--- /dev/null
+++ b/gcc/config/pa/pa64-hpux.opt
@@ -0,0 +1,27 @@
+; Options for the HP PA-RISC port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mgnu-ld
+Target RejectNegative Mask(GNU_LD) MaskExists
+Assume code will be linked by GNU ld
+
+mhp-ld
+Target RejectNegative InverseMask(GNU_LD)
+Assume code will be linked by HP ld
diff --git a/gcc/config/pa/pa64-linux.h b/gcc/config/pa/pa64-linux.h
new file mode 100644
index 000000000..174d7c54d
--- /dev/null
+++ b/gcc/config/pa/pa64-linux.h
@@ -0,0 +1,64 @@
+/* Definitions for PA_RISC with ELF format on 64-bit Linux
+   Copyright (C) 1999, 2000, 2002, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#if 0 /* needs some work :-( */
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},				\
+}
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  do								\
+    {								\
+      int fsize;						\
+								\
+      fsize = compute_frame_size (get_frame_size (), 0);	\
+      if ((TO) == FRAME_POINTER_REGNUM				\
+	  && (FROM) == ARG_POINTER_REGNUM)			\
+	{							\
+	  (OFFSET) = -16;					\
+	  break;						\
+	}							\
+								\
+      gcc_assert ((TO) == STACK_POINTER_REGNUM);		\
+								\
+      switch (FROM)						\
+	{							\
+	case FRAME_POINTER_REGNUM:				\
+	  (OFFSET) = - fsize;					\
+	  break;						\
+								\
+	case ARG_POINTER_REGNUM:				\
+	  (OFFSET) = - fsize - 16;				\
+	  break;						\
+								\
+	default:						\
+	  gcc_unreachable ();					\
+	}							\
+    } while (0)
+#endif
diff --git a/gcc/config/pa/pa64-regs.h b/gcc/config/pa/pa64-regs.h
new file mode 100644
index 000000000..313577b62
--- /dev/null
+++ b/gcc/config/pa/pa64-regs.h
@@ -0,0 +1,294 @@
+/* Configuration for GCC-compiler for PA-RISC.
+   Copyright (C) 1999, 2000, 2003, 2004, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Standard register usage.
+
+   It is safe to refer to actual register numbers in this file.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   HP-PA 2.0w has 32 fullword registers and 32 floating point
+   registers. However, the floating point registers behave
+   differently: the left and right halves of registers are addressable
+   as 32-bit registers.
+
+   Due to limitations within GCC itself, we do not expose the left/right
+   half addressability when in wide mode.  This is not a major performance
+   issue as using the halves independently triggers false dependency stalls
+   anyway.  */
+
+#define FIRST_PSEUDO_REGISTER 62  /* 32 general regs + 28 fp regs +
+				     + 1 shift reg + frame pointer */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On the HP-PA, these are:
+   Reg 0	= 0 (hardware). However, 0 is used for condition code,
+                  so is not fixed.
+   Reg 1	= ADDIL target/Temporary (hardware).
+   Reg 2	= Return Pointer
+   Reg 3	= Frame Pointer
+   Reg 4	= Frame Pointer (>8k varying frame with HP compilers only)
+   Reg 4-18	= Preserved Registers
+   Reg 19	= Linkage Table Register in HPUX 8.0 shared library scheme.
+   Reg 20-22	= Temporary Registers
+   Reg 23-26	= Temporary/Parameter Registers
+   Reg 27	= Global Data Pointer (hp)
+   Reg 28	= Temporary/Return Value register
+   Reg 29	= Temporary/Static Chain/Return Value register #2
+   Reg 30	= stack pointer
+   Reg 31	= Temporary/Millicode Return Pointer (hp)
+
+   Freg 0-3	= Status Registers	-- Not known to the compiler.
+   Freg 4-7	= Arguments/Return Value
+   Freg 8-11	= Temporary Registers
+   Freg 12-21	= Preserved Registers
+   Freg 22-31 = Temporary Registers
+
+*/
+
+#define FIXED_REGISTERS  \
+ {0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 0, 0, 1, 0, \
+  /* fp registers */	  \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0,		  \
+  /* shift register and soft frame pointer */	  \
+  0, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 0, 0, 0, 0, 0, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 0, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  /* fp registers */	  \
+  1, 1, 1, 1, 1, 1, 1, 1, \
+  0, 0, 0, 0, 0, 0, 0, 0, \
+  0, 0, 1, 1, 1, 1, 1, 1, \
+  1, 1, 1, 1, 		  \
+  /* shift register and soft frame pointer */    \
+  1, 1}
+
+/* Allocate the call used registers first.  This should minimize
+   the number of registers that need to be saved (as call used
+   registers will generally not be allocated across a call).
+
+   Experimentation has shown slightly better results by allocating
+   FP registers first.  We allocate the caller-saved registers more
+   or less in reverse order to their allocation as arguments.  */
+
+#define REG_ALLOC_ORDER \
+ {					\
+  /* caller-saved fp regs.  */		\
+  50, 51, 52, 53, 54, 55, 56, 57,	\
+  58, 59, 39, 38, 37, 36, 35, 34,	\
+  33, 32,				\
+  /* caller-saved general regs.  */	\
+  28, 31, 19, 20, 21, 22, 23, 24,	\
+  25, 26, 29,  2,			\
+  /* callee-saved fp regs.  */		\
+  40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49,				\
+  /* callee-saved general regs.  */	\
+   3,  4,  5,  6,  7,  8,  9, 10, 	\
+  11, 12, 13, 14, 15, 16, 17, 18,	\
+  /* special registers.  */		\
+   1, 27, 30,  0, 60, 61}
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   For PA64, GPRs and FPRs hold 64 bits worth.  We ignore the 32-bit
+   addressability of the FPRs and pretend each register holds precisely
+   WORD_SIZE bits.  Note that SCmode values are placed in a single FPR.
+   Thus, any patterns defined to operate on these values would have to
+   use the 32-bit addressability of the FPR registers.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* These are the valid FP modes.  */
+#define VALID_FP_MODE_P(MODE)						\
+  ((MODE) == SFmode || (MODE) == DFmode					\
+   || (MODE) == SCmode || (MODE) == DCmode				\
+   || (MODE) == SImode || (MODE) == DImode)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On the HP-PA, the cpu registers can hold any mode.  We
+   force this to be an even register if it cannot hold the full mode.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((REGNO) == 0								\
+   ? (MODE) == CCmode || (MODE) == CCFPmode				\
+   : (REGNO) == 60 ? SCALAR_INT_MODE_P (MODE)				\
+   /* Make wide modes be in aligned registers.  */			\
+   : FP_REGNO_P (REGNO)							\
+     ? (VALID_FP_MODE_P (MODE)						\
+	&& (GET_MODE_SIZE (MODE) <= 8					\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0)	\
+	    || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 3) == 0)))	\
+   : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD				\
+      || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD			\
+	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))	\
+      || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD			\
+	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)))
+
+/* How to renumber registers for dbx and gdb.
+
+   Registers 0  - 31 remain unchanged.
+
+   Registers 32 - 59 are mapped to 72, 74, 76 ...
+
+   Register 60 is mapped to 32.  */
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((REGNO) <= 31 ? (REGNO) : ((REGNO) < 60 ? (REGNO - 32) * 2 + 72 : 32))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) REG
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+  /* The HP-PA has four kinds of registers: general regs, 1.0 fp regs,
+     1.1 fp regs, and the high 1.1 fp regs, to which the operands of
+     fmpyadd and fmpysub are restricted.  */
+
+enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
+		 GENERAL_OR_FP_REGS, SHIFT_REGS, ALL_REGS, LIM_REG_CLASSES};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+  {"NO_REGS", "R1_REGS", "GENERAL_REGS", "FPUPPER_REGS", "FP_REGS", \
+   "GENERAL_OR_FP_REGS", "SHIFT_REGS", "ALL_REGS"}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES. Register 0, the "condition code" register,
+   is in no class.  */
+
+#define REG_CLASS_CONTENTS	\
+ {{0x00000000, 0x00000000},	/* NO_REGS */			\
+  {0x00000002, 0x00000000},	/* R1_REGS */			\
+  {0xfffffffe, 0x20000000},	/* GENERAL_REGS */		\
+  {0x00000000, 0x00000000},	/* FPUPPER_REGS */		\
+  {0x00000000, 0x0fffffff},	/* FP_REGS */			\
+  {0xfffffffe, 0x2fffffff},	/* GENERAL_OR_FP_REGS */	\
+  {0x00000000, 0x10000000},	/* SHIFT_REGS */		\
+  {0xfffffffe, 0x3fffffff}}	/* ALL_REGS */
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						\
+{									\
+  GENERAL_REGS, FP_REGS, SHIFT_REGS, LIM_REG_CLASSES			\
+}
+
+/* Defines invalid mode changes.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  pa_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)						\
+  ((REGNO) == 0 ? NO_REGS 						\
+   : (REGNO) == 1 ? R1_REGS						\
+   : (REGNO) < 32 || (REGNO) == 61 ? GENERAL_REGS			\
+   : (REGNO) < 60 ? FP_REGS						\
+   : SHIFT_REGS)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* 1 if N is a possible register number for function argument passing.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  ((((N) >= 19) && (N) <= 26) \
+   || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39))
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"%r0",   "%r1",    "%r2",   "%r3",    "%r4",   "%r5",    "%r6",   "%r7",    \
+ "%r8",   "%r9",    "%r10",  "%r11",   "%r12",  "%r13",   "%r14",  "%r15",   \
+ "%r16",  "%r17",   "%r18",  "%r19",   "%r20",  "%r21",   "%r22",  "%r23",   \
+ "%r24",  "%r25",   "%r26",  "%r27",   "%r28",  "%r29",   "%r30",  "%r31",   \
+ "%fr4",  "%fr5",   "%fr6",  "%fr7",   "%fr8",  "%fr9",   "%fr10", "%fr11",  \
+ "%fr12", "%fr13",  "%fr14", "%fr15",  "%fr16", "%fr17",  "%fr18", "%fr19",  \
+ "%fr20", "%fr21",  "%fr22", "%fr23",  "%fr24", "%fr25",  "%fr26", "%fr27",  \
+ "%fr28", "%fr29",  "%fr30", "%fr31",  "SAR",   "sfp"}
+
+#define ADDITIONAL_REGISTER_NAMES \
+ {{"%cr11",60}}
+
+#define FP_SAVED_REG_LAST 49
+#define FP_SAVED_REG_FIRST 40
+#define FP_REG_STEP 1
+#define FP_REG_FIRST 32
+#define FP_REG_LAST 59
diff --git a/gcc/config/pa/pa64-start.h b/gcc/config/pa/pa64-start.h
new file mode 100644
index 000000000..9d7b19a37
--- /dev/null
+++ b/gcc/config/pa/pa64-start.h
@@ -0,0 +1,8 @@
+/* It is currently impossible to switch between PA32 and PA64 based on a
+   runtime compiler switch.  So we might as well lose the overhead with
+   checking for TARGET_64BIT.  */
+#define TARGET_64BIT 1
+#undef TARGET_PA_11
+#define TARGET_PA_11 1
+#undef TARGET_PA_20
+#define TARGET_PA_20 1
diff --git a/gcc/config/pa/predicates.md b/gcc/config/pa/predicates.md
new file mode 100644
index 000000000..ff5dc1784
--- /dev/null
+++ b/gcc/config/pa/predicates.md
@@ -0,0 +1,524 @@
+;; Predicate definitions for HP PA-RISC.
+;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return nonzero only if OP is a register of mode MODE, or
+;; CONST0_RTX.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "subreg,reg,const_int,const_double")
+{
+  return (op == CONST0_RTX (mode) || register_operand (op, mode));
+})
+
+;; Return nonzero if OP is suitable for use in a call to a named
+;; function.
+;;
+;; For 2.5 try to eliminate either call_operand_address or
+;; function_label_operand, they perform very similar functions.
+
+(define_predicate "call_operand_address"
+  (match_code "label_ref,symbol_ref,const_int,const_double,const,high")
+{
+  return (GET_MODE (op) == word_mode
+	  && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
+})
+
+;; Return 1 iff OP is an indexed memory operand.
+
+(define_predicate "indexed_memory_operand"
+  (match_code "subreg,mem")
+{
+  if (GET_MODE (op) != mode)
+    return 0;
+
+  /* Before reload, a (SUBREG (MEM...)) forces reloading into a register.  */
+  if (reload_completed && GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
+    return 0;
+
+  op = XEXP (op, 0);
+
+  return (memory_address_p (mode, op) && IS_INDEX_ADDR_P (op));
+})
+
+;; Return 1 iff OP is a symbolic operand.
+;; Note: an inline copy of this code is present in pa_secondary_reload.
+
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return !SYMBOL_REF_TLS_MODEL (op);
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      op = XEXP (op, 0);
+      return (GET_CODE (op) == PLUS
+	      && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+		   && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
+		  || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+    default:
+      return 0;
+    }
+})
+
+;; Return truth value of statement that OP is a symbolic memory
+;; operand of mode MODE.
+
+(define_predicate "symbolic_memory_operand"
+  (match_code "subreg,mem")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (GET_CODE (op) != MEM)
+    return 0;
+  op = XEXP (op, 0);
+  return ((GET_CODE (op) == SYMBOL_REF && !SYMBOL_REF_TLS_MODEL (op))
+  	 || GET_CODE (op) == CONST || GET_CODE (op) == HIGH 
+	 || GET_CODE (op) == LABEL_REF);
+})
+
+;; Return true if OP is a symbolic operand for the TLS Global Dynamic model.
+(define_predicate "tgd_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Dynamic model.
+(define_predicate "tld_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Initial Exec model.
+(define_predicate "tie_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Exec model.
+(define_predicate "tle_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC")))
+
+
+;; Return 1 if the operand is a register operand or a non-symbolic
+;; memory operand after reload.  This predicate is used for branch
+;; patterns that internally handle register reloading.  We need to
+;; accept non-symbolic memory operands after reload to ensure that the
+;; pattern is still valid if reload didn't find a hard register for
+;; the operand.
+
+(define_predicate "reg_before_reload_operand"
+  (match_code "reg,mem")
+{
+  /* Don't accept a SUBREG since it will need a reload.  */
+  if (GET_CODE (op) == SUBREG)
+    return 0;
+
+  if (register_operand (op, mode))
+    return 1;
+
+  if (reload_completed
+      && memory_operand (op, mode)
+      && !symbolic_memory_operand (op, mode))
+    return 1;
+
+  return 0;
+})
+
+;; Return 1 if the operand is either a register, zero, or a memory
+;; operand that is not symbolic.
+
+(define_predicate "reg_or_0_or_nonsymb_mem_operand"
+  (match_code "subreg,reg,mem,const_int,const_double")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (op == CONST0_RTX (mode))
+    return 1;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* Until problems with management of the REG_POINTER flag are resolved,
+     we need to delay creating move insns with unscaled indexed addresses
+     until CSE is not expected.  */
+  if (!TARGET_NO_SPACE_REGS
+      && !cse_not_expected
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && REG_P (XEXP (XEXP (op, 0), 0))
+      && REG_P (XEXP (XEXP (op, 0), 1)))
+    return 0;
+
+  return (!symbolic_memory_operand (op, mode)
+	  && memory_address_p (mode, XEXP (op, 0)));
+})
+
+;; Accept anything that can be used as a destination operand for a
+;; move instruction.  We don't accept indexed memory operands since
+;; they are supported only for floating point stores.
+
+(define_predicate "move_dest_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (GET_MODE (op) != mode)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) != MEM || symbolic_memory_operand (op, mode))
+    return 0;
+
+  op = XEXP (op, 0);
+
+  return (memory_address_p (mode, op)
+	  && !IS_INDEX_ADDR_P (op)
+	  && !IS_LO_SUM_DLT_ADDR_P (op));
+})
+
+;; Accept anything that can be used as a source operand for a move
+;; instruction.
+
+(define_predicate "move_src_operand"
+  (match_code "subreg,reg,const_int,const_double,mem")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  if (op == CONST0_RTX (mode))
+    return 1;
+
+  if (GET_CODE (op) == CONST_INT)
+    return cint_ok_for_move (INTVAL (op));
+
+  if (GET_MODE (op) != mode)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* Until problems with management of the REG_POINTER flag are resolved,
+     we need to delay creating move insns with unscaled indexed addresses
+     until CSE is not expected.  */
+  if (!TARGET_NO_SPACE_REGS
+      && !cse_not_expected
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && REG_P (XEXP (XEXP (op, 0), 0))
+      && REG_P (XEXP (XEXP (op, 0), 1)))
+    return 0;
+
+  return memory_address_p (mode, XEXP (op, 0));
+})
+
+;; Accept REG and any CONST_INT that can be moved in one instruction
+;; into a general register.
+
+(define_predicate "reg_or_cint_move_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (register_operand (op, mode))
+    return 1;
+
+  return (GET_CODE (op) == CONST_INT && cint_ok_for_move (INTVAL (op)));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "pic_label_operand"
+  (match_code "label_ref,const")
+{
+  if (!flag_pic)
+    return 0;
+
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      op = XEXP (op, 0);
+      return (GET_CODE (XEXP (op, 0)) == LABEL_REF
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+    default:
+      return 0;
+    }
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fp_reg_operand"
+  (match_code "reg")
+{
+  return reg_renumber && FP_REG_P (op);
+})
+
+;; Return truth value of whether OP can be used as an operand in a
+;; three operand arithmetic insn that accepts registers of mode MODE
+;; or 14-bit signed integers.
+
+(define_predicate "arith_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return (register_operand (op, mode)
+	  || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
+})
+
+;; Return truth value of whether OP can be used as an operand in a
+;; three operand arithmetic insn that accepts registers of mode MODE
+;; or 11-bit signed integers.
+
+(define_predicate "arith11_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return (register_operand (op, mode)
+	  || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
+})
+
+;; A constant integer suitable for use in a PRE_MODIFY memory
+;; reference.
+
+(define_predicate "pre_cint_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
+})
+
+;; A constant integer suitable for use in a POST_MODIFY memory
+;; reference.
+
+(define_predicate "post_cint_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT
+	  && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "arith_double_operand"
+  (match_code "subreg,reg,const_double")
+{
+  return (register_operand (op, mode)
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && GET_MODE (op) == mode
+	      && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
+	      && ((CONST_DOUBLE_HIGH (op) >= 0)
+		  == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
+})
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns, or is an
+;; integer register.
+
+(define_predicate "ireg_or_int5_operand"
+  (match_code "const_int,reg")
+{
+  return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
+	  || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
+})
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns.
+
+(define_predicate "int5_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
+})
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns.
+
+(define_predicate "uint5_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
+})
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns.
+
+(define_predicate "int11_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
+})
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns.
+
+(define_predicate "uint32_operand"
+  (match_code "const_int,const_double")
+{
+#if HOST_BITS_PER_WIDE_INT > 32
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
+#else
+  return (GET_CODE (op) == CONST_INT
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && CONST_DOUBLE_HIGH (op) == 0));
+#endif
+})
+
+;; Return truth value of whether OP is an integer which fits the range
+;; constraining immediate operands in three-address insns.
+
+(define_predicate "arith5_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return register_operand (op, mode) || int5_operand (op, mode);
+})
+
+;; True iff depi or extru can be used to compute (reg & OP).
+
+(define_predicate "and_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return (register_operand (op, mode)
+	  || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
+})
+
+;; True iff depi can be used to compute (reg | OP).
+
+(define_predicate "cint_ior_operand"
+  (and (match_code "const_int")
+       (match_test "ior_mask_p (INTVAL (op))")))
+
+;; True iff OP can be used to compute (reg | OP).
+
+(define_predicate "reg_or_cint_ior_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "cint_ior_operand")))
+
+;; True iff OP is a CONST_INT of the forms 0...0xxxx, 0...01...1xxxx,
+;; or 1...1xxxx. Such values can be the left hand side x in (x << r),
+;; using the zvdepi instruction.
+
+(define_predicate "lhs_lshift_cint_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT x;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  x = INTVAL (op) >> 4;
+  return (x & (x + 1)) == 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "lhs_lshift_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "arith32_operand"
+  (match_code "subreg,reg,const_int")
+{
+  return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "pc_or_label_operand"
+  (match_code "pc,label_ref")
+{
+  return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "plus_xor_ior_operator"
+  (match_code "plus,xor,ior")
+{
+  return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
+	  || GET_CODE (op) == IOR);
+})
+
+;; Return 1 if OP is a CONST_INT with the value 2, 4, or 8.  These are
+;; the valid constant for shadd instructions.
+
+(define_predicate "shadd_operand"
+  (match_code "const_int")
+{
+  return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "div_operand"
+  (match_code "reg,const_int")
+{
+  return (mode == SImode
+	  && ((GET_CODE (op) == REG && REGNO (op) == 25)
+	      || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
+		  && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
+})
+
+;; Return nonzero if OP is an integer register, else return zero.
+
+(define_predicate "ireg_operand"
+  (match_code "reg")
+{
+  return (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32);
+})
+
+;; Return 1 if this is a comparison operator.  This allows the use of
+;; MATCH_OPERATOR to recognize all the branch insns.
+
+(define_predicate "cmpib_comparison_operator"
+  (match_code "eq,ne,lt,le,leu,gt,gtu,ge")
+{
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+          && (GET_CODE (op) == EQ
+	      || GET_CODE (op) == NE
+	      || GET_CODE (op) == GT
+	      || GET_CODE (op) == GTU
+	      || GET_CODE (op) == GE
+	      || GET_CODE (op) == LT
+	      || GET_CODE (op) == LE
+	      || GET_CODE (op) == LEU));
+})
+
+;; Return 1 if OP is an operator suitable for use in a movb
+;; instruction.
+
+(define_predicate "movb_comparison_operator"
+  (match_code "eq,ne,lt,ge")
+{
+  return (GET_CODE (op) == EQ || GET_CODE (op) == NE
+	  || GET_CODE (op) == LT || GET_CODE (op) == GE);
+})
diff --git a/gcc/config/pa/quadlib.c b/gcc/config/pa/quadlib.c
new file mode 100644
index 000000000..2c1160015
--- /dev/null
+++ b/gcc/config/pa/quadlib.c
@@ -0,0 +1,245 @@
+/* Subroutines for long double support.
+   Copyright (C) 2000, 2002, 2004, 2005, 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* HPUX TFmode compare requires a library call to _U_Qfcmp.  It takes
+   a magic number as its third argument which indicates what to do.
+   The return value is an integer to be compared against zero.  The
+   comparison conditions are the same as those listed in Table 8-12
+   of the PA-RISC 2.0 Architecture book for the fcmp instruction.  */
+
+/* Raise FP_INVALID on SNaN as a side effect.  */
+#define QCMP_INV 1
+
+/* Comparison relations.  */
+#define QCMP_UNORD 2
+#define QCMP_EQ 4
+#define QCMP_LT 8
+#define QCMP_GT 16
+
+int _U_Qfcmp (long double a, long double b, int);
+long _U_Qfcnvfxt_quad_to_sgl (long double);
+
+int _U_Qfeq (long double, long double);
+int _U_Qfne (long double, long double);
+int _U_Qfgt (long double, long double);
+int _U_Qfge (long double, long double);
+int _U_Qflt (long double, long double);
+int _U_Qfle (long double, long double);
+int _U_Qfltgt (long double, long double);
+int _U_Qfunle (long double, long double);
+int _U_Qfunlt (long double, long double);
+int _U_Qfunge (long double, long double);
+int _U_Qfungt (long double, long double);
+int _U_Qfuneq (long double, long double);
+int _U_Qfunord (long double, long double);
+int _U_Qford (long double, long double);
+
+int _U_Qfcomp (long double, long double);
+
+long double _U_Qfneg (long double);
+long double _U_Qfcopysign (long double, long double);
+
+#ifdef __LP64__
+int __U_Qfcnvfxt_quad_to_sgl (long double);
+#endif
+unsigned int _U_Qfcnvfxt_quad_to_usgl(long double);
+long double _U_Qfcnvxf_usgl_to_quad (unsigned int);
+unsigned long long _U_Qfcnvfxt_quad_to_udbl(long double);
+long double _U_Qfcnvxf_udbl_to_quad (unsigned long long);
+
+int
+_U_Qfeq (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_EQ) != 0);
+}
+
+int
+_U_Qfne (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_EQ) == 0);
+}
+	
+int
+_U_Qfgt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_GT) != 0);
+}
+
+int
+_U_Qfge (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_EQ | QCMP_GT) != 0);
+}
+
+int
+_U_Qflt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_LT) != 0);
+}
+
+int
+_U_Qfle (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_EQ | QCMP_LT) != 0);
+}
+
+int
+_U_Qfltgt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_LT | QCMP_GT) != 0);
+}
+
+int
+_U_Qfunle (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_EQ | QCMP_LT) != 0);
+}
+
+int
+_U_Qfunlt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_LT) != 0);
+}
+
+int
+_U_Qfunge (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_EQ | QCMP_GT) != 0);
+}
+
+int
+_U_Qfungt (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_GT) != 0);
+}
+
+int
+_U_Qfuneq (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD | QCMP_EQ) != 0);
+}
+
+int
+_U_Qfunord (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_UNORD) != 0);
+}
+
+int
+_U_Qford (long double a, long double b)
+{
+  return (_U_Qfcmp (a, b, QCMP_INV | QCMP_EQ | QCMP_LT | QCMP_GT) != 0);
+}
+
+int
+_U_Qfcomp (long double a, long double b)
+{
+  if (_U_Qfcmp (a, b, QCMP_EQ) == 0)
+    return 0;
+
+  return (_U_Qfcmp (a, b, QCMP_UNORD | QCMP_EQ | QCMP_GT) != 0 ? 1 : -1);
+}
+
+/* Negate long double A.  */
+long double
+_U_Qfneg (long double a)
+{
+  union
+   {
+     long double ld;
+     int i[4];
+   } u;
+
+  u.ld = a;
+  u.i[0] ^= 0x80000000;
+  return u.ld;
+}
+
+/* Return long double A with sign changed to sign of long double B.  */
+long double
+_U_Qfcopysign (long double a, long double b)
+{
+  union
+   {
+     long double ld;
+     int i[4];
+   } ua, ub;
+
+  ua.ld = a;
+  ub.ld = b;
+  ua.i[0] &= 0x7fffffff;
+  ua.i[0] |= (0x80000000 & ub.i[0]);
+  return ua.ld;
+}
+
+#ifdef __LP64__
+/* This routine is only necessary for the PA64 port; for reasons unknown
+   _U_Qfcnvfxt_quad_to_sgl returns the integer in the high 32bits of the
+   return value.  Ugh.  */
+int
+__U_Qfcnvfxt_quad_to_sgl (long double a)
+{
+  return _U_Qfcnvfxt_quad_to_sgl (a) >> 32;
+}
+#endif
+
+/* HP only has signed conversion in the C library, so need to synthesize
+   unsigned versions.  */
+unsigned int
+_U_Qfcnvfxt_quad_to_usgl (long double a)
+{
+  extern long long _U_Qfcnvfxt_quad_to_dbl (long double a);
+  return (unsigned int) _U_Qfcnvfxt_quad_to_dbl (a);
+}
+
+long double
+_U_Qfcnvxf_usgl_to_quad (unsigned int a)
+{
+  extern long double _U_Qfcnvxf_dbl_to_quad (long long);
+  return _U_Qfcnvxf_dbl_to_quad ((long long) a);
+}
+
+typedef union {
+    unsigned long long u[2];
+    long double d[1];
+} quad_type;
+
+unsigned long long
+_U_Qfcnvfxt_quad_to_udbl (long double a)
+{
+  extern quad_type _U_Qfcnvfxt_quad_to_quad (long double a);
+  quad_type u;
+  u = _U_Qfcnvfxt_quad_to_quad(a);
+  return u.u[1];
+}
+
+long double
+_U_Qfcnvxf_udbl_to_quad (unsigned long long a)
+{
+  extern long double _U_Qfcnvxf_quad_to_quad (quad_type a);
+  quad_type u;
+  u.u[0] = 0;
+  u.u[1] = a;
+  return _U_Qfcnvxf_quad_to_quad (u);
+}
diff --git a/gcc/config/pa/som.h b/gcc/config/pa/som.h
new file mode 100644
index 000000000..73095e5e0
--- /dev/null
+++ b/gcc/config/pa/som.h
@@ -0,0 +1,341 @@
+/* Definitions for SOM assembler support.
+   Copyright (C) 1999, 2001, 2002, 2003, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* So we can conditionalize small amounts of code in pa.c or pa.md.  */
+#undef TARGET_SOM
+#define TARGET_SOM 1
+
+/* We do not use BINCL stabs in SOM.
+   ??? If it does not hurt, we probably should to avoid useless divergence
+   from other embedded stabs implementations.  */
+#undef DBX_USE_BINCL
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+
+/* gdb needs a null N_SO at the end of each file for scattered loading.  */
+
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+/* HPUX has a program 'chatr' to list the dependencies of dynamically
+   linked executables and shared libraries.  */
+#define LDD_SUFFIX "chatr"
+/* Look for lines like "dynamic   /usr/lib/X11R5/libX11.sl"
+   or "static    /usr/lib/X11R5/libX11.sl". 
+
+   HPUX 10.20 also has lines like "static branch prediction ..."
+   so we filter that out explicitly.
+
+   We also try to bound our search for libraries with marker
+   lines.  What a pain.  */
+#define PARSE_LDD_OUTPUT(PTR)					\
+do {								\
+  static int in_shlib_list = 0;					\
+  while (*PTR == ' ') PTR++;					\
+  if (strncmp (PTR, "shared library list:",			\
+	       sizeof ("shared library list:") - 1) == 0)	\
+    {								\
+      PTR = 0;							\
+      in_shlib_list = 1;					\
+    }								\
+  else if (strncmp (PTR, "shared library binding:",		\
+		    sizeof ("shared library binding:") - 1) == 0)\
+    {								\
+      PTR = 0;							\
+      in_shlib_list = 0;					\
+    }								\
+  else if (strncmp (PTR, "static branch prediction disabled",	\
+		    sizeof ("static branch prediction disabled") - 1) == 0)\
+    {								\
+      PTR = 0;							\
+      in_shlib_list = 0;					\
+    }								\
+  else if (in_shlib_list					\
+	   &&  strncmp (PTR, "dynamic", sizeof ("dynamic") - 1) == 0) \
+    {								\
+      PTR += sizeof ("dynamic") - 1;				\
+      while (*p == ' ') PTR++;					\
+    }								\
+  else if (in_shlib_list					\
+	   && strncmp (PTR, "static", sizeof ("static") - 1) == 0) \
+    {								\
+      PTR += sizeof ("static") - 1;				\
+      while (*p == ' ') PTR++;					\
+    }								\
+  else								\
+    PTR = 0;							\
+} while (0)
+
+/* Output the label for a function definition.  */
+#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
+#define ASM_DOUBLE_ARG_DESCRIPTORS(FILE, ARG0, ARG1)	\
+  do { fprintf (FILE, ",ARGW%d=FR", (ARG0));		\
+       fprintf (FILE, ",ARGW%d=FU", (ARG1));} while (0)
+#define DFMODE_RETURN_STRING ",RTNVAL=FU"
+#define SFMODE_RETURN_STRING ",RTNVAL=FR"
+#else
+#define ASM_DOUBLE_ARG_DESCRIPTORS(FILE, ARG0, ARG1)	\
+  do { fprintf (FILE, ",ARGW%d=FU", (ARG0));		\
+       fprintf (FILE, ",ARGW%d=FR", (ARG1));} while (0)
+#define DFMODE_RETURN_STRING ",RTNVAL=FR"
+#define SFMODE_RETURN_STRING ",RTNVAL=FU"
+#endif
+
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+    do { tree fntype = TREE_TYPE (TREE_TYPE (DECL));			\
+	 tree tree_type = TREE_TYPE (DECL);				\
+	 tree parm;							\
+	 int i;								\
+	 if (TREE_PUBLIC (DECL) || TARGET_GAS)				\
+	   { 								\
+	     if (TREE_PUBLIC (DECL))					\
+	       {							\
+		 fputs ("\t.EXPORT ", FILE);				\
+		 assemble_name (FILE, NAME);				\
+		 fputs (",ENTRY,PRIV_LEV=3", FILE);			\
+	       }							\
+	     else							\
+	       {							\
+		 fputs ("\t.PARAM ", FILE);				\
+		 assemble_name (FILE, NAME);				\
+		 fputs (",PRIV_LEV=3", FILE);				\
+	       }							\
+	     for (parm = DECL_ARGUMENTS (DECL), i = 0; parm && i < 4;	\
+		  parm = DECL_CHAIN (parm))				\
+	       {							\
+		 if (TYPE_MODE (DECL_ARG_TYPE (parm)) == SFmode		\
+		     && ! TARGET_SOFT_FLOAT)				\
+		   fprintf (FILE, ",ARGW%d=FR", i++);			\
+		 else if (TYPE_MODE (DECL_ARG_TYPE (parm)) == DFmode	\
+			  && ! TARGET_SOFT_FLOAT)			\
+		   {							\
+		     if (i <= 2)					\
+		       {						\
+			 if (i == 1) i++;				\
+			 ASM_DOUBLE_ARG_DESCRIPTORS (FILE, i++, i++);	\
+		       }						\
+		     else						\
+		       break;						\
+		   }							\
+		 else							\
+		   {							\
+		     int arg_size =					\
+		       FUNCTION_ARG_SIZE (TYPE_MODE (DECL_ARG_TYPE (parm)),\
+					  DECL_ARG_TYPE (parm));	\
+		     /* Passing structs by invisible reference uses	\
+			one general register.  */			\
+		     if (arg_size > 2					\
+			 || TREE_ADDRESSABLE (DECL_ARG_TYPE (parm)))	\
+		       arg_size = 1;					\
+		     if (arg_size == 2 && i <= 2)			\
+		       {						\
+			 if (i == 1) i++;				\
+			 fprintf (FILE, ",ARGW%d=GR", i++);		\
+			 fprintf (FILE, ",ARGW%d=GR", i++);		\
+		       }						\
+		     else if (arg_size == 1)				\
+		       fprintf (FILE, ",ARGW%d=GR", i++);		\
+		     else						\
+		       i += arg_size;					\
+		   }							\
+	       }							\
+	     /* anonymous args */					\
+	     if (stdarg_p (tree_type))					\
+	       {							\
+		 for (; i < 4; i++)					\
+		   fprintf (FILE, ",ARGW%d=GR", i);			\
+	       }							\
+	     if (TYPE_MODE (fntype) == DFmode && ! TARGET_SOFT_FLOAT)	\
+	       fputs (DFMODE_RETURN_STRING, FILE);			\
+	     else if (TYPE_MODE (fntype) == SFmode && ! TARGET_SOFT_FLOAT) \
+	       fputs (SFMODE_RETURN_STRING, FILE);			\
+	     else if (fntype != void_type_node)				\
+	       fputs (",RTNVAL=GR", FILE);				\
+	     fputs ("\n", FILE);					\
+	   }} while (0)
+
+#define TARGET_ASM_FILE_START pa_som_file_start
+#define TARGET_ASM_INIT_SECTIONS pa_som_asm_init_sections
+
+/* String to output before writable data.  */
+#define DATA_SECTION_ASM_OP "\t.SPACE $PRIVATE$\n\t.SUBSPA $DATA$\n"
+
+/* String to output before uninitialized data.  */
+#define BSS_SECTION_ASM_OP "\t.SPACE $PRIVATE$\n\t.SUBSPA $BSS$\n"
+
+/* This is how to output a command to make the user-level label
+   named NAME defined for reference from other files.  We use
+   assemble_name_raw instead of assemble_name since a symbol in
+   a .IMPORT directive that isn't otherwise referenced is not
+   placed in the symbol table of the assembled object.
+
+   Failure to import a function reference can cause the HP linker
+   to segmentation fault!
+
+   Note that the SOM based tools need the symbol imported as a
+   CODE symbol, while the ELF based tools require the symbol to
+   be imported as an ENTRY symbol.  */
+
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+  pa_hpux_asm_output_external ((FILE), (DECL), (NAME))
+#define ASM_OUTPUT_EXTERNAL_REAL(FILE, DECL, NAME) \
+  do { fputs ("\t.IMPORT ", FILE);					\
+       assemble_name_raw (FILE, NAME);					\
+       if (FUNCTION_NAME_P (NAME))					\
+	 fputs (",CODE\n", FILE);					\
+       else								\
+	 fputs (",DATA\n", FILE);					\
+     } while (0)
+
+/* The bogus HP assembler requires ALL external references to be
+   "imported", even library calls.  They look a bit different, so
+   here's this macro.
+
+   Also note not all libcall names are passed to pa_encode_section_info
+   (__main for example).  To make sure all libcall names have section
+   info recorded in them, we do it here.
+
+   We must also ensure that a libcall that has been previously
+   exported is not subsequently imported since the HP assembler may
+   change the type from an ENTRY to a CODE symbol.  This would make
+   the symbol local.  We are forced to use the identifier node
+   associated with the real assembler name for this check as the
+   symbol_ref available in ASM_DECLARE_FUNCTION_NAME is not the
+   same as the one used here.  As a result, we can't use flags
+   in the symbol_ref for this check.  The identifier check assumes
+   assemble_external_libcall is called before the symbol is used.  */
+
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, RTL) \
+  do { const char *name;						\
+       tree id;								\
+									\
+       if (!function_label_operand (RTL, VOIDmode))			\
+	 hppa_encode_label (RTL);					\
+									\
+       name = targetm.strip_name_encoding (XSTR ((RTL), 0));		\
+       id = maybe_get_identifier (name);				\
+       if (!id || !TREE_SYMBOL_REFERENCED (id))				\
+	 {								\
+	   fputs ("\t.IMPORT ", FILE);					\
+	   assemble_name_raw (FILE, XSTR ((RTL), 0));		       	\
+	   fputs (",CODE\n", FILE);					\
+	 }								\
+     } while (0)
+
+/* We want __gcc_plt_call to appear in every program built by
+   gcc, so we make a reference to it out of __main.
+   We use the asm statement to fool the optimizer into not
+   removing the dead (but important) initialization of
+   REFERENCE.  */
+
+#define DO_GLOBAL_DTORS_BODY			\
+do {						\
+  extern void __gcc_plt_call (void);		\
+  void (*reference)(void) = &__gcc_plt_call;	\
+  func_ptr *p;					\
+  __asm__ ("" : : "r" (reference));		\
+  for (p = __DTOR_LIST__ + 1; *p; )		\
+    (*p++) ();					\
+} while (0)
+
+/* This macro specifies the biggest alignment supported by the object
+   file format of this machine.
+
+   The .align directive in the HP assembler allows alignments up to 4096
+   bytes.  However, the maximum alignment of a global common symbol is 8
+   bytes for objects smaller than the page size (4096 bytes).  For larger
+   objects, the linker provides an alignment of 32 bytes.  Unfortunately,
+   this macro doesn't provide a mechanism to test for common symbols.  */
+#define MAX_OFILE_ALIGNMENT 32768
+
+/* The SOM linker hardcodes paths into binaries.  As a result, dotdots
+   must be removed from library prefixes to prevent binaries from depending
+   on the location of the GCC tool directory.  The downside is GCC
+   cannot be moved after installation using a symlink.  */
+#define ALWAYS_STRIP_DOTDOT 1
+
+/* If GAS supports weak, we can support weak when we have working linker
+   support for secondary definitions and are generating code for GAS.
+   This is primarily for one-only support as SOM doesn't allow undefined
+   weak symbols.  */
+#ifdef HAVE_GAS_WEAK
+#define TARGET_SUPPORTS_WEAK (TARGET_SOM_SDEF && TARGET_GAS)
+#else
+#define TARGET_SUPPORTS_WEAK 0
+#endif
+
+/* CVS GAS as of 4/28/04 supports a comdat parameter for the .nsubspa
+   directive.  This provides one-only linkage semantics even though we
+   don't have weak support.  */
+#ifdef HAVE_GAS_NSUBSPA_COMDAT
+#define SUPPORTS_SOM_COMDAT (TARGET_GAS)
+#else
+#define SUPPORTS_SOM_COMDAT 0
+#endif
+
+/* We can support one only if we support weak or comdat.  */
+#define SUPPORTS_ONE_ONLY (TARGET_SUPPORTS_WEAK || SUPPORTS_SOM_COMDAT)
+
+/* We use DECL_COMMON for uninitialized one-only variables as we don't
+   have linkonce .bss.  We use SOM secondary definitions or comdat for
+   initialized variables and functions.  */
+#define MAKE_DECL_ONE_ONLY(DECL) \
+  do {									\
+    if (TREE_CODE (DECL) == VAR_DECL					\
+        && (DECL_INITIAL (DECL) == 0					\
+            || DECL_INITIAL (DECL) == error_mark_node))			\
+      DECL_COMMON (DECL) = 1;						\
+    else if (TARGET_SUPPORTS_WEAK)					\
+      DECL_WEAK (DECL) = 1;						\
+  } while (0)
+
+/* This is how we tell the assembler that a symbol is weak.  The SOM
+   weak implementation uses the secondary definition (sdef) flag.
+
+   The behavior of sdef symbols is similar to ELF weak symbols in that
+   multiple definitions can occur without incurring a link error.
+   However, they differ in the following ways:
+     1) Undefined sdef symbols are not allowed.
+     2) The linker searches for undefined sdef symbols and will load an
+	archive library member to resolve an undefined sdef symbol.
+     3) The exported symbol from a shared library is a primary symbol
+        rather than a sdef symbol.  Thus, more care is needed in the
+	ordering of libraries.
+
+   It appears that the linker discards extra copies of "weak" functions
+   when linking shared libraries, independent of whether or not they
+   are in their own section.  In linking final executables, -Wl,-O can
+   be used to remove dead procedures.  Thus, support for named sections
+   is not needed and in previous testing caused problems with various
+   HP tools.  */
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE);				\
+       assemble_name (FILE, NAME);				\
+       fputc ('\n', FILE);					\
+       targetm.asm_out.globalize_label (FILE, NAME);		\
+  } while (0)
+
+/* We can't handle weak aliases, and therefore can't support pragma weak.
+   Suppress the use of pragma weak in gthr-dce.h and gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
+
+/* Shared library suffix.  Collect2 strips the version string after
+   this suffix when generating constructor/destructor names.  */ 
+#define SHLIB_SUFFIX ".sl"
diff --git a/gcc/config/pa/stublib.c b/gcc/config/pa/stublib.c
new file mode 100644
index 000000000..d3cf559c8
--- /dev/null
+++ b/gcc/config/pa/stublib.c
@@ -0,0 +1,97 @@
+/* Stub functions.
+   Copyright (C) 2006, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_register_frame_info
+struct object;
+void  __register_frame_info (const void * __attribute__((unused)),
+			     struct object * __attribute__((unused)));
+void
+__register_frame_info (const void *p, struct object *ob)
+{
+}
+#endif
+
+#ifdef L_deregister_frame_info
+void *__deregister_frame_info (const void * __attribute__((unused)));
+void *
+__deregister_frame_info (const void *p)
+{
+  return (void *)0;
+}
+#endif
+
+#ifdef L_cxa_finalize
+void __cxa_finalize (void * __attribute__((unused)));
+void
+__cxa_finalize (void *p)
+{
+}
+#endif
+
+#ifdef L_Jv_RegisterClasses
+void _Jv_RegisterClasses (void * __attribute__((unused)));
+void
+_Jv_RegisterClasses (void *p)
+{
+}
+#endif
+
+#ifdef L_pthread_default_stacksize_np
+int pthread_default_stacksize_np (unsigned long __attribute__((unused)),
+				  unsigned long *);
+int
+pthread_default_stacksize_np (unsigned long new, unsigned long *old)
+{
+  if (old)
+    *old = 0;
+  return 0;
+}
+#endif
+
+#ifdef L_pthread_mutex_lock
+int pthread_mutex_lock (void);
+int
+pthread_mutex_lock (void)
+{
+  return 0;
+}
+#endif
+
+#ifdef L_pthread_mutex_unlock
+int pthread_mutex_unlock (void);
+int
+pthread_mutex_unlock (void)
+{
+  return 0;
+}
+#endif
+
+#ifdef L_pthread_once
+int pthread_once (void);
+int
+pthread_once (void)
+{
+  return 0;
+}
+#endif
diff --git a/gcc/config/pa/t-dce-thr b/gcc/config/pa/t-dce-thr
new file mode 100644
index 000000000..8d86a4181
--- /dev/null
+++ b/gcc/config/pa/t-dce-thr
@@ -0,0 +1,5 @@
+MULTILIB_OPTIONS = threads
+MULTILIB_DIRNAMES = threads
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/pa/t-hpux-shlib b/gcc/config/pa/t-hpux-shlib
new file mode 100644
index 000000000..d5a5b6c86
--- /dev/null
+++ b/gcc/config/pa/t-hpux-shlib
@@ -0,0 +1,46 @@
+# Copyright (C) 2001, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build a shared libgcc library.
+SHLIB_EXT = .sl
+SHLIB_NAME = @shlib_base_name@$(SHLIB_EXT)
+SHLIB_SOVERSION = 1
+SHLIB_SONAME = @shlib_base_name@.$(SHLIB_SOVERSION)
+SHLIB_OBJS = @shlib_objs@
+SHLIB_DIR = @multilib_dir@
+SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@
+
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared  -nodefaultlibs \
+	-Wl,+h -Wl,$(SHLIB_SONAME) \
+	-o $(SHLIB_DIR)/$(SHLIB_NAME).tmp @multilib_flags@ $(SHLIB_OBJS) && \
+        rm -f $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIB_NAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIB_NAME) $(SHLIB_DIR)/$(SHLIB_NAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIB_NAME).tmp $(SHLIB_DIR)/$(SHLIB_NAME) && \
+        $(LN_S) $(SHLIB_NAME) $(SHLIB_DIR)/$(SHLIB_SONAME)
+
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+        $$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL_DATA) -m 555 $(SHLIB_DIR)/$(SHLIB_NAME) \
+	$$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+        rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_NAME); \
+        $(LN_S) $(SHLIB_SONAME) \
+	$$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_NAME)
diff --git a/gcc/config/pa/t-linux b/gcc/config/pa/t-linux
new file mode 100644
index 000000000..ba42ad03c
--- /dev/null
+++ b/gcc/config/pa/t-linux
@@ -0,0 +1,39 @@
+# Copyright (C) 1999, 2001, 2002, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+#Plug millicode routines into libgcc.a  We want these on both native and
+#cross compiles.  We use the "64-bit" routines because the "32-bit" code
+#is broken for certain corner cases.
+
+LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
+LIB1ASMSRC = pa/milli64.S
+
+# Compile libgcc2.a as PIC.
+TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1
+
+LIB2FUNCS_EXTRA=fptr.c
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c
+
+fptr.c: $(srcdir)/config/pa/fptr.c
+	rm -f fptr.c
+	cp $(srcdir)/config/pa/fptr.c .
+
+# Compile crtbeginS.o and crtendS.o as PIC.
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+MULTIARCH_DIRNAME = $(call if_multiarch,hppa-linux-gnu)
diff --git a/gcc/config/pa/t-linux64 b/gcc/config/pa/t-linux64
new file mode 100644
index 000000000..cfa73606f
--- /dev/null
+++ b/gcc/config/pa/t-linux64
@@ -0,0 +1,34 @@
+# Copyright (C) 2001, 2008, 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+#Plug millicode routines into libgcc.a  We want these on both native and
+#cross compiles.
+
+LIB1ASMFUNCS =  _divI _divU _remI _remU _div_const _mulI
+LIB1ASMSRC = pa/milli64.S
+
+# Compile crtbeginS.o and crtendS.o as PIC.
+# Actually, hppa64 is always PIC but adding -fPIC does no harm.
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c
+
+# Compile libgcc2.a as PIC.
+TARGET_LIBGCC2_CFLAGS = -fPIC -Dpa64=1 -DELF=1
+
+MULTIARCH_DIRNAME = $(call if_multiarch,hppa-linux-gnu)
diff --git a/gcc/config/pa/t-pa b/gcc/config/pa/t-pa
new file mode 100644
index 000000000..cad060da0
--- /dev/null
+++ b/gcc/config/pa/t-pa
@@ -0,0 +1,7 @@
+TARGET_LIBGCC2_CFLAGS = -fPIC
+
+LIB2FUNCS_EXTRA=lib2funcs.asm
+
+lib2funcs.asm: $(srcdir)/config/pa/lib2funcs.asm
+	rm -f lib2funcs.asm
+	cp $(srcdir)/config/pa/lib2funcs.asm .
diff --git a/gcc/config/pa/t-pa-hpux b/gcc/config/pa/t-pa-hpux
new file mode 100644
index 000000000..63eab6362
--- /dev/null
+++ b/gcc/config/pa/t-pa-hpux
@@ -0,0 +1,7 @@
+lib2funcs.asm: $(srcdir)/config/pa/lib2funcs.asm
+	rm -f lib2funcs.asm
+	cp $(srcdir)/config/pa/lib2funcs.asm .
+
+quadlib.c: $(srcdir)/config/pa/quadlib.c
+	rm -f quadlib.c
+	cp $(srcdir)/config/pa/quadlib.c .
diff --git a/gcc/config/pa/t-pa-hpux10 b/gcc/config/pa/t-pa-hpux10
new file mode 100644
index 000000000..fd7ff4842
--- /dev/null
+++ b/gcc/config/pa/t-pa-hpux10
@@ -0,0 +1,2 @@
+TARGET_LIBGCC2_CFLAGS = -fPIC -frandom-seed=fixed-seed -D_T_HPUX10
+LIB2FUNCS_EXTRA=lib2funcs.asm quadlib.c
diff --git a/gcc/config/pa/t-pa-hpux11 b/gcc/config/pa/t-pa-hpux11
new file mode 100644
index 000000000..4436b4ca6
--- /dev/null
+++ b/gcc/config/pa/t-pa-hpux11
@@ -0,0 +1,31 @@
+TARGET_LIBGCC2_CFLAGS = -fPIC -frandom-seed=fixed-seed
+LIB2FUNCS_EXTRA=lib2funcs.asm quadlib.c
+LIBGCCSTUB_OBJS = pthread_default_stacksize_np-stub.o \
+	pthread_mutex_lock-stub.o \
+	pthread_mutex_unlock-stub.o \
+	pthread_once-stub.o
+
+stublib.c: $(srcdir)/config/pa/stublib.c
+	rm -f stublib.c
+	cp $(srcdir)/config/pa/stublib.c .
+
+pthread_default_stacksize_np-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_default_stacksize_np stublib.c \
+	  -o pthread_default_stacksize_np-stub.o
+
+pthread_mutex_lock-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_lock stublib.c \
+	  -o pthread_mutex_lock-stub.o
+
+pthread_mutex_unlock-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_unlock stublib.c \
+	  -o pthread_mutex_unlock-stub.o
+
+pthread_once-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_once stublib.c \
+	  -o pthread_once-stub.o
+
+$(T)libgcc_stub.a: $(LIBGCCSTUB_OBJS)
+	-rm -rf $(T)libgcc_stub.a
+	$(AR) rc $(T)libgcc_stub.a $(LIBGCCSTUB_OBJS)
+	$(RANLIB) $(T)libgcc_stub.a
diff --git a/gcc/config/pa/t-pa64 b/gcc/config/pa/t-pa64
new file mode 100644
index 000000000..e6ac7a5bb
--- /dev/null
+++ b/gcc/config/pa/t-pa64
@@ -0,0 +1,67 @@
+# Copyright (C) 2000, 2001, 2002, 2004, 2006,
+# 2007, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TARGET_LIBGCC2_CFLAGS = -fPIC -Dpa64=1 -DELF=1 -mlong-calls
+LIB2FUNCS_EXTRA = quadlib.c
+LIBGCCSTUB_OBJS = rfi-stub.o dfi-stub.o jvrc-stub.o cxaf-stub.o \
+	pthread_default_stacksize_np-stub.o \
+	pthread_mutex_lock-stub.o \
+	pthread_mutex_unlock-stub.o \
+	pthread_once-stub.o
+
+stublib.c: $(srcdir)/config/pa/stublib.c
+	rm -f stublib.c
+	cp $(srcdir)/config/pa/stublib.c .
+
+rfi-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_register_frame_info stublib.c \
+	  -o rfi-stub.o
+
+dfi-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_deregister_frame_info stublib.c \
+	  -o dfi-stub.o
+
+cxaf-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_cxa_finalize stublib.c \
+	  -o cxaf-stub.o
+
+jvrc-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_Jv_RegisterClasses stublib.c \
+	  -o jvrc-stub.o
+
+pthread_default_stacksize_np-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_default_stacksize_np stublib.c \
+	  -o pthread_default_stacksize_np-stub.o
+
+pthread_mutex_lock-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_lock stublib.c \
+	  -o pthread_mutex_lock-stub.o
+
+pthread_mutex_unlock-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_mutex_unlock stublib.c \
+	  -o pthread_mutex_unlock-stub.o
+
+pthread_once-stub.o: stublib.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -c -O2 -DL_pthread_once stublib.c \
+	  -o pthread_once-stub.o
+
+$(T)libgcc_stub.a: $(LIBGCCSTUB_OBJS)
+	-rm -rf $(T)libgcc_stub.a
+	$(AR) rc $(T)libgcc_stub.a $(LIBGCCSTUB_OBJS)
+	$(RANLIB) $(T)libgcc_stub.a
diff --git a/gcc/config/pa/t-slibgcc-dwarf-ver b/gcc/config/pa/t-slibgcc-dwarf-ver
new file mode 100644
index 000000000..fa4688d69
--- /dev/null
+++ b/gcc/config/pa/t-slibgcc-dwarf-ver
@@ -0,0 +1,3 @@
+# Set the version number of the shared libgcc library (DWARF2 EH).
+
+SHLIB_SOVERSION = 4
diff --git a/gcc/config/pa/t-slibgcc-sjlj-ver b/gcc/config/pa/t-slibgcc-sjlj-ver
new file mode 100644
index 000000000..00140cf20
--- /dev/null
+++ b/gcc/config/pa/t-slibgcc-sjlj-ver
@@ -0,0 +1,3 @@
+# Set the version number of the shared libgcc library (SJLJ EH).
+
+SHLIB_SOVERSION = 3
diff --git a/gcc/config/pdp11/constraints.md b/gcc/config/pdp11/constraints.md
new file mode 100644
index 000000000..6e5a5798e
--- /dev/null
+++ b/gcc/config/pdp11/constraints.md
@@ -0,0 +1,81 @@
+;;- Constraint definitions for the pdp11 for GNU C compiler
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "f" "FPU_REGS"
+  "Any FPU register")
+
+(define_register_constraint "a" "LOAD_FPU_REGS"
+  "FPU register that can be directly loaded from memory")
+
+(define_register_constraint "d" "MUL_REGS"
+  "General register that can be used for 16-bit multiply (odd numbered)")
+
+(define_constraint "I"
+  "Integer constant that fits in 16 bits unsigned"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff0000) == 0")))
+
+(define_constraint "J"
+  "Integer constant whose low 16 bits are zero"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0")))
+
+(define_constraint "K"
+  "Integer constant whose lower and upper 16 bit half are both non-zero"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) != 0 && (ival & 0xffff0000) != 0")))
+
+(define_constraint "L"
+  "Integer constant 1"
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "M"
+  "Integer constant -1"
+  (and (match_code "const_int")
+       (match_test "ival == -1")))
+
+(define_constraint "N"
+  "Integer constant 0"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "O"
+  "Integer constant for which several individual shifts are better than one big one"
+  (and (match_code "const_int")
+       (match_test "abs (ival) > 1 && abs (ival) <= 4")))
+
+(define_constraint "G"
+  "Defines a real zero constant."
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_constraint "Q"
+  "Memory reference that requires an additional word after the opcode"
+  (and (match_code "mem")
+       (match_test "memory_address_p (GET_MODE (op), XEXP (op, 0))
+                    && !simple_memory_operand (op, GET_MODE (op))")))
+
+(define_constraint "R"
+  "Memory reference that is encoded within the opcode"
+  (and (match_code "mem")
+       (match_test "memory_address_p (GET_MODE (op), XEXP (op, 0))
+                    && simple_memory_operand (op, GET_MODE (op))")))
+
diff --git a/gcc/config/pdp11/pdp11-modes.def b/gcc/config/pdp11/pdp11-modes.def
new file mode 100644
index 000000000..b91cde7d1
--- /dev/null
+++ b/gcc/config/pdp11/pdp11-modes.def
@@ -0,0 +1,26 @@
+/* Definitions of target machine for GNU compiler, for the pdp-11
+   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Add any extra modes needed to represent the condition code.
+   CCFPmode is used for FPU, but should we use a separate reg? */
+
+CC_MODE (CCFP);
+RESET_FLOAT_FORMAT (SF, pdp11_f_format);
+RESET_FLOAT_FORMAT (DF, pdp11_d_format);
diff --git a/gcc/config/pdp11/pdp11-protos.h b/gcc/config/pdp11/pdp11-protos.h
new file mode 100644
index 000000000..56ad909e1
--- /dev/null
+++ b/gcc/config/pdp11/pdp11-protos.h
@@ -0,0 +1,47 @@
+/* Definitions of target machine for GNU compiler, for the pdp-11
+   Copyright (C) 2000, 2003, 2004, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* declarations */
+#ifdef RTX_CODE
+extern int simple_memory_operand (rtx, enum machine_mode);
+
+extern int legitimate_const_double_p (rtx);
+extern void notice_update_cc_on_set (rtx, rtx);
+extern void output_addr_const_pdp11 (FILE *, rtx);
+extern const char *output_move_multiple (rtx *);
+extern const char *output_block_move (rtx *);
+extern const char *output_jump (enum rtx_code, int, int);
+extern void print_operand_address (FILE *, rtx);
+extern bool pdp11_cannot_change_mode_class (enum machine_mode,
+                                            enum machine_mode, enum reg_class);
+extern bool pdp11_secondary_memory_needed (reg_class_t, reg_class_t, 
+					   enum machine_mode);
+typedef enum { no_action, dec_before, inc_after } pdp11_action;
+typedef enum { little, either, big } pdp11_partorder;
+extern bool pdp11_expand_operands (rtx *, rtx [][2], int, 
+				   pdp11_action *, pdp11_partorder);
+extern int pdp11_initial_elimination_offset (int, int);
+extern enum reg_class pdp11_regno_reg_class (int);
+
+#endif /* RTX_CODE */
+
+extern void output_ascii (FILE *, const char *, int);
+extern void pdp11_asm_output_var (FILE *, const char *, int, int, bool);
diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c
new file mode 100644
index 000000000..b6ed97989
--- /dev/null
+++ b/gcc/config/pdp11/pdp11.c
@@ -0,0 +1,1923 @@
+/* Subroutines for gcc2 for pdp11.
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2004, 2005,
+   2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "function.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "tree.h"
+#include "expr.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+/* this is the current value returned by the macro FIRST_PARM_OFFSET 
+   defined in tm.h */
+int current_first_parm_offset;
+
+/* Routines to encode/decode pdp11 floats */
+static void encode_pdp11_f (const struct real_format *fmt,
+			    long *, const REAL_VALUE_TYPE *);
+static void decode_pdp11_f (const struct real_format *,
+			    REAL_VALUE_TYPE *, const long *);
+static void encode_pdp11_d (const struct real_format *fmt,
+			    long *, const REAL_VALUE_TYPE *);
+static void decode_pdp11_d (const struct real_format *,
+			    REAL_VALUE_TYPE *, const long *);
+
+/* These two are taken from the corresponding vax descriptors
+   in real.c, changing only the encode/decode routine pointers.  */
+const struct real_format pdp11_f_format =
+  {
+    encode_pdp11_f,
+    decode_pdp11_f,
+    2,
+    24,
+    24,
+    -127,
+    127,
+    15,
+    15,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false
+  };
+
+const struct real_format pdp11_d_format =
+  {
+    encode_pdp11_d,
+    decode_pdp11_d,
+    2,
+    56,
+    56,
+    -127,
+    127,
+    15,
+    15,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false,
+    false
+  };
+
+static void
+encode_pdp11_f (const struct real_format *fmt ATTRIBUTE_UNUSED, long *buf,
+		const REAL_VALUE_TYPE *r)
+{
+  (*vax_f_format.encode) (fmt, buf, r);
+  buf[0] = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+}
+
+static void
+decode_pdp11_f (const struct real_format *fmt ATTRIBUTE_UNUSED,
+		REAL_VALUE_TYPE *r, const long *buf)
+{
+  long tbuf;
+  tbuf = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+  (*vax_f_format.decode) (fmt, r, &tbuf);
+}
+
+static void
+encode_pdp11_d (const struct real_format *fmt ATTRIBUTE_UNUSED, long *buf,
+		const REAL_VALUE_TYPE *r)
+{
+  (*vax_d_format.encode) (fmt, buf, r);
+  buf[0] = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+  buf[1] = ((buf[1] >> 16) & 0xffff) | ((buf[1] & 0xffff) << 16);
+}
+
+static void
+decode_pdp11_d (const struct real_format *fmt ATTRIBUTE_UNUSED,
+		REAL_VALUE_TYPE *r, const long *buf)
+{
+  long tbuf[2];
+  tbuf[0] = ((buf[0] >> 16) & 0xffff) | ((buf[0] & 0xffff) << 16);
+  tbuf[1] = ((buf[1] >> 16) & 0xffff) | ((buf[1] & 0xffff) << 16);
+  (*vax_d_format.decode) (fmt, r, tbuf);
+}
+
+/* This is where the condition code register lives.  */
+/* rtx cc0_reg_rtx; - no longer needed? */
+
+static bool pdp11_handle_option (size_t, const char *, int);
+static void pdp11_option_init_struct (struct gcc_options *);
+static const char *singlemove_string (rtx *);
+static bool pdp11_assemble_integer (rtx, unsigned int, int);
+static void pdp11_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void pdp11_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static bool pdp11_rtx_costs (rtx, int, int, int *, bool);
+static bool pdp11_return_in_memory (const_tree, const_tree);
+static rtx pdp11_function_value (const_tree, const_tree, bool);
+static rtx pdp11_libcall_value (enum machine_mode, const_rtx);
+static bool pdp11_function_value_regno_p (const unsigned int);
+static void pdp11_trampoline_init (rtx, tree, rtx);
+static rtx pdp11_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			       const_tree, bool);
+static void pdp11_function_arg_advance (CUMULATIVE_ARGS *,
+					enum machine_mode, const_tree, bool);
+static void pdp11_conditional_register_usage (void);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options pdp11_option_optimization_table[] =
+  {
+    { OPT_LEVELS_3_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP NULL
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP NULL
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP NULL
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER pdp11_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE pdp11_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE pdp11_output_function_epilogue
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN "["
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN "]"
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+  (MASK_FPU | MASK_45 | TARGET_UNIX_ASM_DEFAULT)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION pdp11_handle_option
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE pdp11_option_optimization_table
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT pdp11_option_init_struct
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS pdp11_rtx_costs
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG pdp11_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE pdp11_function_arg_advance
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY pdp11_return_in_memory
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE pdp11_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE pdp11_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P pdp11_function_value_regno_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT pdp11_trampoline_init
+
+#undef  TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD pdp11_secondary_reload
+
+#undef  TARGET_REGISTER_MOVE_COST 
+#define TARGET_REGISTER_MOVE_COST pdp11_register_move_cost
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS pdp11_preferred_reload_class
+
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS pdp11_preferred_output_reload_class
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P pdp11_legitimate_address_p
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE pdp11_conditional_register_usage
+
+#undef  TARGET_ASM_FUNCTION_SECTION
+#define TARGET_ASM_FUNCTION_SECTION pdp11_function_section
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND pdp11_asm_print_operand
+
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pdp11_asm_print_operand_punct_valid_p
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+pdp11_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
+		     int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_m10:
+      target_flags &= ~(MASK_40 | MASK_45);
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_INIT_STRUCT.  */
+
+static void
+pdp11_option_init_struct (struct gcc_options *opts)
+{
+  opts->x_flag_finite_math_only = 0;
+  opts->x_flag_trapping_math = 0;
+  opts->x_flag_signaling_nans = 0;
+}
+
+/*
+   stream is a stdio stream to output the code to.
+   size is an int: how many units of temporary storage to allocate.
+   Refer to the array `regs_ever_live' to determine which registers
+   to save; `regs_ever_live[I]' is nonzero if register number I
+   is ever used in the function.  This macro is responsible for
+   knowing which registers should not be saved even if used.  
+*/
+
+static void
+pdp11_output_function_prologue (FILE *stream, HOST_WIDE_INT size)
+{							       
+    HOST_WIDE_INT fsize = ((size) + 1) & ~1;
+    int regno;
+    int via_ac = -1;
+
+    fprintf (stream,
+	     "\n\t;	/* function prologue %s*/\n",
+	     current_function_name ());
+
+    /* if we are outputting code for main, 
+       the switch FPU to right mode if TARGET_FPU */
+    if (MAIN_NAME_P (DECL_NAME (current_function_decl)) && TARGET_FPU)
+    {
+	fprintf(stream,
+		"\t;/* switch cpu to double float, single integer */\n");
+	fprintf(stream, "\tsetd\n");
+	fprintf(stream, "\tseti\n\n");
+    }
+    
+    if (frame_pointer_needed) 					
+    {								
+	fprintf(stream, "\tmov r5, -(sp)\n");			
+	fprintf(stream, "\tmov sp, r5\n");				
+    }								
+    else 								
+    {								
+	/* DON'T SAVE FP */
+    }								
+
+    /* make frame */
+    if (fsize)							
+	asm_fprintf (stream, "\tsub $%#wo, sp\n", fsize);
+
+    /* save CPU registers  */
+    for (regno = R0_REGNUM; regno <= PC_REGNUM; regno++)				
+      if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])	
+	    if (! ((regno == FRAME_POINTER_REGNUM)			
+		   && frame_pointer_needed))				
+		fprintf (stream, "\tmov %s, -(sp)\n", reg_names[regno]);	
+    /* fpu regs saving */
+    
+    /* via_ac specifies the ac to use for saving ac4, ac5 */
+    via_ac = -1;
+    
+    for (regno = AC0_REGNUM; regno <= AC5_REGNUM ; regno++) 
+    {
+	/* ac0 - ac3 */						
+	if (LOAD_FPU_REG_P(regno)
+	    && df_regs_ever_live_p (regno) 
+	    && ! call_used_regs[regno])
+	{
+	    fprintf (stream, "\tstd %s, -(sp)\n", reg_names[regno]);
+	    via_ac = regno;
+	}
+	
+	/* maybe make ac4, ac5 call used regs?? */
+	/* ac4 - ac5 */
+	if (NO_LOAD_FPU_REG_P(regno)
+	    && df_regs_ever_live_p (regno)
+	    && ! call_used_regs[regno])
+	{
+	  gcc_assert (via_ac != -1);
+	  fprintf (stream, "\tldd %s, %s\n",
+		   reg_names[regno], reg_names[via_ac]);
+	  fprintf (stream, "\tstd %s, -(sp)\n", reg_names[via_ac]);
+	}
+    }
+
+    fprintf (stream, "\t;/* end of prologue */\n\n");		
+}
+
+/*
+   The function epilogue should not depend on the current stack pointer!
+   It should use the frame pointer only.  This is mandatory because
+   of alloca; we also take advantage of it to omit stack adjustments
+   before returning.  */
+
+/* maybe we can make leaf functions faster by switching to the
+   second register file - this way we don't have to save regs!
+   leaf functions are ~ 50% of all functions (dynamically!) 
+
+   set/clear bit 11 (dec. 2048) of status word for switching register files - 
+   but how can we do this? the pdp11/45 manual says bit may only 
+   be set (p.24), but not cleared!
+
+   switching to kernel is probably more expensive, so we'll leave it 
+   like this and not use the second set of registers... 
+
+   maybe as option if you want to generate code for kernel mode? */
+
+static void
+pdp11_output_function_epilogue (FILE *stream, HOST_WIDE_INT size)
+{								
+    HOST_WIDE_INT fsize = ((size) + 1) & ~1;
+    int i, j, k;
+
+    int via_ac;
+    
+    fprintf (stream, "\n\t;	/*function epilogue */\n");		
+
+    if (frame_pointer_needed)					
+    {								
+	/* hope this is safe - m68k does it also .... */		
+        df_set_regs_ever_live (FRAME_POINTER_REGNUM, false);
+								
+	for (i = PC_REGNUM, j = 0 ; i >= 0 ; i--)				
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])		
+		j++;
+	
+	/* remember # of pushed bytes for CPU regs */
+	k = 2*j;
+	
+	/* change fp -> r5 due to the compile error on libgcc2.c */
+	for (i = PC_REGNUM ; i >= R0_REGNUM ; i--)					
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])		
+		fprintf(stream, "\tmov %#" HOST_WIDE_INT_PRINT "o(r5), %s\n",
+			(-fsize-2*j--)&0xffff, reg_names[i]);
+
+	/* get ACs */						
+	via_ac = AC5_REGNUM;
+	
+	for (i = AC5_REGNUM; i >= AC0_REGNUM; i--)
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	    {
+		via_ac = i;
+		k += 8;
+	    }
+	
+	for (i = AC5_REGNUM; i >= AC0_REGNUM; i--)
+	{
+	    if (LOAD_FPU_REG_P(i)
+		&& df_regs_ever_live_p (i)
+		&& ! call_used_regs[i])
+	    {
+		fprintf(stream, "\tldd %#" HOST_WIDE_INT_PRINT "o(r5), %s\n",
+			(-fsize-k)&0xffff, reg_names[i]);
+		k -= 8;
+	    }
+	    
+	    if (NO_LOAD_FPU_REG_P(i)
+		&& df_regs_ever_live_p (i)
+		&& ! call_used_regs[i])
+	    {
+	        gcc_assert (LOAD_FPU_REG_P(via_ac));
+		    
+		fprintf(stream, "\tldd %#" HOST_WIDE_INT_PRINT "o(r5), %s\n",
+			(-fsize-k)&0xffff, reg_names[via_ac]);
+		fprintf(stream, "\tstd %s, %s\n", reg_names[via_ac], reg_names[i]);
+		k -= 8;
+	    }
+	}
+	
+	fprintf(stream, "\tmov r5, sp\n");				
+	fprintf (stream, "\tmov (sp)+, r5\n");     			
+    }								
+    else								
+    {		   
+      via_ac = AC5_REGNUM;
+	
+	/* get ACs */
+	for (i = AC5_REGNUM; i >= AC0_REGNUM; i--)
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+		via_ac = i;
+	
+	for (i = AC5_REGNUM; i >= AC0_REGNUM; i--)
+	{
+	    if (LOAD_FPU_REG_P(i)
+		&& df_regs_ever_live_p (i)
+		&& ! call_used_regs[i])
+	      fprintf(stream, "\tldd (sp)+, %s\n", reg_names[i]);
+	    
+	    if (NO_LOAD_FPU_REG_P(i)
+		&& df_regs_ever_live_p (i)
+		&& ! call_used_regs[i])
+	    {
+	        gcc_assert (LOAD_FPU_REG_P(via_ac));
+		    
+		fprintf(stream, "\tldd (sp)+, %s\n", reg_names[via_ac]);
+		fprintf(stream, "\tstd %s, %s\n", reg_names[via_ac], reg_names[i]);
+	    }
+	}
+
+	for (i = PC_REGNUM; i >= 0; i--)					
+	  if (df_regs_ever_live_p (i) && !call_used_regs[i])		
+		fprintf(stream, "\tmov (sp)+, %s\n", reg_names[i]);	
+								
+	if (fsize)						
+	    fprintf((stream), "\tadd $%#" HOST_WIDE_INT_PRINT "o, sp\n",
+		    (fsize)&0xffff);      		
+    }			
+					
+    fprintf (stream, "\trts pc\n");					
+    fprintf (stream, "\t;/* end of epilogue*/\n\n\n");		
+}
+
+/* Return the best assembler insn template
+   for moving operands[1] into operands[0] as a fullword.  */
+static const char *
+singlemove_string (rtx *operands)
+{
+  if (operands[1] != const0_rtx)
+    return "mov %1,%0";
+
+  return "clr %0";
+}
+
+
+/* Expand multi-word operands (SImode or DImode) into the 2 or 4
+   corresponding HImode operands.  The number of operands is given
+   as the third argument, and the required order of the parts as
+   the fourth argument.  */
+bool
+pdp11_expand_operands (rtx *operands, rtx exops[][2], int opcount, 
+		       pdp11_action *action, pdp11_partorder order)
+{
+  int words, op, w, i, sh;
+  pdp11_partorder useorder;
+  bool sameoff = false;
+  enum { REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP } optype;
+  REAL_VALUE_TYPE r;
+  long sval[2];
+  
+  words = GET_MODE_BITSIZE (GET_MODE (operands[0])) / 16;
+  
+  /* If either piece order is accepted and one is pre-decrement
+     while the other is post-increment, set order to be high order
+     word first.  That will force the pre-decrement to be turned
+     into a pointer adjust, then offset addressing.
+     Otherwise, if either operand uses pre-decrement, that means
+     the order is low order first. 
+     Otherwise, if both operands are registers and destination is
+     higher than source and they overlap, do low order word (highest
+     register number) first.  */
+  useorder = either;
+  if (opcount == 2)
+    {
+      if (!REG_P (operands[0]) && !REG_P (operands[1]) &&
+	  !(CONSTANT_P (operands[1]) || 
+	    GET_CODE (operands[1]) == CONST_DOUBLE) &&
+	  ((GET_CODE (XEXP (operands[0], 0)) == POST_INC &&
+	    GET_CODE (XEXP (operands[1], 0)) == PRE_DEC) ||
+	   (GET_CODE (XEXP (operands[0], 0)) == PRE_DEC &&
+	    GET_CODE (XEXP (operands[1], 0)) == POST_INC)))
+	    useorder = big;
+      else if ((!REG_P (operands[0]) &&
+		GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) ||
+	       (!REG_P (operands[1]) &&
+		!(CONSTANT_P (operands[1]) || 
+		  GET_CODE (operands[1]) == CONST_DOUBLE) &&
+		GET_CODE (XEXP (operands[1], 0)) == PRE_DEC))
+	useorder = little;
+      else if (REG_P (operands[0]) && REG_P (operands[1]) &&
+	       REGNO (operands[0]) > REGNO (operands[1]) &&
+	       REGNO (operands[0]) < REGNO (operands[1]) + words)
+	    useorder = little;
+
+      /* Check for source == offset from register and dest == push of
+	 the same register.  In that case, we have to use the same
+	 offset (the one for the low order word) for all words, because
+	 the push increases the offset to each source word.
+	 In theory there are other cases like this, for example dest == pop,
+	 but those don't occur in real life so ignore those.  */
+      if (GET_CODE (operands[0]) ==  MEM 
+	  && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	  && REGNO (XEXP (XEXP (operands[0], 0), 0)) == STACK_POINTER_REGNUM
+	  && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+	sameoff = true;
+    }
+
+  /* If the caller didn't specify order, use the one we computed,
+     or high word first if we don't care either.  If the caller did
+     specify, verify we don't have a problem with that order.
+     (If it matters to the caller, constraints need to be used to
+     ensure this case doesn't occur).  */
+  if (order == either)
+    order = (useorder == either) ? big : useorder;
+  else
+    gcc_assert (useorder == either || useorder == order);
+
+  
+  for (op = 0; op < opcount; op++)
+    {
+      /* First classify the operand.  */
+      if (REG_P (operands[op]))
+	optype = REGOP;
+      else if (CONSTANT_P (operands[op])
+	       || GET_CODE (operands[op]) == CONST_DOUBLE)
+	optype = CNSTOP;
+      else if (GET_CODE (XEXP (operands[op], 0)) == POST_INC)
+	optype = POPOP;
+      else if (GET_CODE (XEXP (operands[op], 0)) == PRE_DEC)
+	optype = PUSHOP;
+      else if (!reload_in_progress || offsettable_memref_p (operands[op]))
+	optype = OFFSOP;
+      else if (GET_CODE (operands[op]) == MEM)
+	optype = MEMOP;
+      else
+	optype = RNDOP;
+
+      /* Check for the cases that the operand constraints are not
+	 supposed to allow to happen. Return failure for such cases.  */
+      if (optype == RNDOP)
+	return false;
+      
+      if (action != NULL)
+	action[op] = no_action;
+      
+      /* If the operand uses pre-decrement addressing but we
+	 want to get the parts high order first,
+	 decrement the former register explicitly
+	 and change the operand into ordinary indexing.  */
+      if (optype == PUSHOP && order == big)
+	{
+	  gcc_assert (action != NULL);
+	  action[op] = dec_before;
+	  operands[op] = gen_rtx_MEM (GET_MODE (operands[op]),
+				      XEXP (XEXP (operands[op], 0), 0));
+	  optype = OFFSOP;
+	}
+      /* If the operand uses post-increment mode but we want 
+	 to get the parts low order first, change the operand
+	 into ordinary indexing and remember to increment
+	 the register explicitly when we're done.  */
+      else if (optype == POPOP && order == little)
+	{
+	  gcc_assert (action != NULL);
+	  action[op] = inc_after;
+	  operands[op] = gen_rtx_MEM (GET_MODE (operands[op]),
+				      XEXP (XEXP (operands[op], 0), 0));
+	  optype = OFFSOP;
+	}
+
+      if (GET_CODE (operands[op]) == CONST_DOUBLE)
+	{
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[op]);
+	  REAL_VALUE_TO_TARGET_DOUBLE (r, sval);
+	}
+      
+      for (i = 0; i < words; i++)
+	{
+	  if (order == big)
+	    w = i;
+	  else if (sameoff)
+	    w = words - 1;
+	  else
+	    w = words - 1 - i;
+
+	  /* Set the output operand to be word "w" of the input.  */
+	  if (optype == REGOP)
+	    exops[i][op] = gen_rtx_REG (HImode, REGNO (operands[op]) + w);
+	  else if (optype == OFFSOP)
+	    exops[i][op] = adjust_address (operands[op], HImode, w * 2);
+	  else if (optype == CNSTOP)
+	    {
+	      if (GET_CODE (operands[op]) == CONST_DOUBLE)
+		{
+		  sh = 16 - (w & 1) * 16;
+		  exops[i][op] = gen_rtx_CONST_INT (HImode, (sval[w / 2] >> sh) & 0xffff);
+		}
+	      else
+		{
+		  sh = ((words - 1 - w) * 16);
+		  exops[i][op] = gen_rtx_CONST_INT (HImode, trunc_int_for_mode (INTVAL(operands[op]) >> sh, HImode));
+		}
+	    }
+	  else
+	    exops[i][op] = operands[op];
+	}
+    }
+  return true;
+}
+
+/* Output assembler code to perform a multiple-word move insn
+   with operands OPERANDS.  This moves 2 or 4 words depending
+   on the machine mode of the operands.  */
+
+const char *
+output_move_multiple (rtx *operands)
+{
+  rtx exops[4][2];
+  pdp11_action action[2];
+  int i, words;
+  
+  words = GET_MODE_BITSIZE (GET_MODE (operands[0])) / 16;
+
+  pdp11_expand_operands (operands, exops, 2, action, either);
+  
+  /* Check for explicit decrement before.  */
+  if (action[0] == dec_before)
+    {
+      operands[0] = XEXP (operands[0], 0);
+      output_asm_insn ("sub $4,%0", operands);
+    }
+  if (action[1] == dec_before)
+    {
+      operands[1] = XEXP (operands[1], 0);
+      output_asm_insn ("sub $4,%1", operands);
+    }
+
+  /* Do the words.  */
+  for (i = 0; i < words; i++)
+    output_asm_insn (singlemove_string (exops[i]), exops[i]);
+
+  /* Check for increment after.  */
+  if (action[0] == inc_after)
+    {
+      operands[0] = XEXP (operands[0], 0);
+      output_asm_insn ("add $4,%0", operands);
+    }
+  if (action[1] == inc_after)
+    {
+      operands[1] = XEXP (operands[1], 0);
+      output_asm_insn ("add $4,%1", operands);
+    }
+
+  return "";
+}
+
+/* Output an ascii string.  */
+void
+output_ascii (FILE *file, const char *p, int size)
+{
+  int i;
+
+  /* This used to output .byte "string", which doesn't work with the UNIX
+     assembler and I think not with DEC ones either.  */
+  fprintf (file, "\t.byte ");
+
+  for (i = 0; i < size; i++)
+    {
+      register int c = p[i];
+      if (c < 0)
+	c += 256;
+      fprintf (file, "%#o", c);
+      if (i < size - 1)
+	putc (',', file);
+    }
+  putc ('\n', file);
+}
+
+
+void
+pdp11_asm_output_var (FILE *file, const char *name, int size,
+		      int align, bool global)
+{
+  if (align > 8)
+    fprintf (file, "\n\t.even\n");
+  if (global)
+    {
+      fprintf (file, ".globl ");
+      assemble_name (file, name);
+    }
+  fprintf (file, "\n");
+  assemble_name (file, name);
+  fprintf (file, ": .=.+ %#ho\n", (unsigned short)size);
+}
+
+static void
+pdp11_asm_print_operand (FILE *file, rtx x, int code)
+{
+  REAL_VALUE_TYPE r;
+  long sval[2];
+ 
+  if (code == '#')
+    fprintf (file, "#");
+  else if (code == '@')
+    {
+      if (TARGET_UNIX_ASM)
+	fprintf (file, "*");
+      else
+	fprintf (file, "@");
+    }
+  else if (GET_CODE (x) == REG)
+    fprintf (file, "%s", reg_names[REGNO (x)]);
+  else if (GET_CODE (x) == MEM)
+    output_address (XEXP (x, 0));
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != SImode)
+    {
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (r, sval);
+      fprintf (file, "$%#lo", sval[0] >> 16);
+    }
+  else
+    {
+      putc ('$', file);
+      output_addr_const_pdp11 (file, x);
+    }
+}
+
+static bool
+pdp11_asm_print_operand_punct_valid_p (unsigned char c)
+{
+  return (c == '#' || c == '@');
+}
+
+void
+print_operand_address (FILE *file, register rtx addr)
+{
+  register rtx breg;
+  rtx offset;
+  int again = 0;
+  
+ retry:
+
+  switch (GET_CODE (addr))
+    {
+    case MEM:
+      if (TARGET_UNIX_ASM)
+	fprintf (file, "*");
+      else
+	fprintf (file, "@");
+      addr = XEXP (addr, 0);
+      again = 1;
+      goto retry;
+
+    case REG:
+      fprintf (file, "(%s)", reg_names[REGNO (addr)]);
+      break;
+
+    case PRE_MODIFY:
+    case PRE_DEC:
+      fprintf (file, "-(%s)", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_MODIFY:
+    case POST_INC:
+      fprintf (file, "(%s)+", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PLUS:
+      breg = 0;
+      offset = 0;
+      if (CONSTANT_ADDRESS_P (XEXP (addr, 0))
+	  || GET_CODE (XEXP (addr, 0)) == MEM)
+	{
+	  offset = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (CONSTANT_ADDRESS_P (XEXP (addr, 1))
+	       || GET_CODE (XEXP (addr, 1)) == MEM)
+	{
+	  offset = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      if (GET_CODE (addr) != PLUS)
+	;
+      else if (GET_CODE (XEXP (addr, 0)) == REG)
+	{
+	  breg = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (GET_CODE (XEXP (addr, 1)) == REG)
+	{
+	  breg = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      if (GET_CODE (addr) == REG)
+	{
+	  gcc_assert (breg == 0);
+	  breg = addr;
+	  addr = 0;
+	}
+      if (offset != 0)
+	{
+	  gcc_assert (addr == 0);
+	  addr = offset;
+	}
+      if (addr != 0)
+	output_addr_const_pdp11 (file, addr);
+      if (breg != 0)
+	{
+	  gcc_assert (GET_CODE (breg) == REG);
+	  fprintf (file, "(%s)", reg_names[REGNO (breg)]);
+	}
+      break;
+
+    default:
+      if (!again && GET_CODE (addr) == CONST_INT)
+	{
+	  /* Absolute (integer number) address.  */
+	  if (!TARGET_UNIX_ASM)
+	    fprintf (file, "@$");
+	}
+      output_addr_const_pdp11 (file, addr);
+    }
+}
+
+/* Target hook to assemble integer objects.  We need to use the
+   pdp-specific version of output_addr_const.  */
+
+static bool
+pdp11_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (aligned_p)
+    switch (size)
+      {
+      case 1:
+	fprintf (asm_out_file, "\t.byte\t");
+	output_addr_const_pdp11 (asm_out_file, GEN_INT (INTVAL (x) & 0xff));
+;
+	fprintf (asm_out_file, " /* char */\n");
+	return true;
+
+      case 2:
+	fprintf (asm_out_file, TARGET_UNIX_ASM ? "\t" : "\t.word\t");
+	output_addr_const_pdp11 (asm_out_file, x);
+	fprintf (asm_out_file, " /* short */\n");
+	return true;
+      }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+
+/* register move costs, indexed by regs */
+
+static const int move_costs[N_REG_CLASSES][N_REG_CLASSES] = 
+{
+             /* NO  MUL  GEN  LFPU  NLFPU FPU ALL */
+
+/* NO */     {  0,   0,   0,    0,    0,    0,   0},
+/* MUL */    {  0,   2,   2,   22,   22,   22,  22},
+/* GEN */    {  0,   2,   2,   22,   22,   22,  22},
+/* LFPU */   {  0,  22,  22,    2,    2,    2,  22},
+/* NLFPU */  {  0,  22,  22,    2,   10,   10,  22},
+/* FPU */    {  0,  22,  22,    2,   10,   10,  22},
+/* ALL */    {  0,  22,  22,   22,   22,   22,  22}
+}  ;
+
+
+/* -- note that some moves are tremendously expensive, 
+   because they require lots of tricks! do we have to 
+   charge the costs incurred by secondary reload class 
+   -- as we do here with 10 -- or not ? */
+
+static int 
+pdp11_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			  reg_class_t c1, reg_class_t c2)
+{
+    return move_costs[(int)c1][(int)c2];
+}
+
+static bool
+pdp11_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+		 bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) == 0 || INTVAL (x) == -1 || INTVAL (x) == 1)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      /* Twice as expensive as REG.  */
+      *total = 2;
+      return true;
+
+    case CONST_DOUBLE:
+      /* Twice (or 4 times) as expensive as 16 bit.  */
+      *total = 4;
+      return true;
+
+    case MULT:
+      /* ??? There is something wrong in MULT because MULT is not 
+         as cheap as total = 2 even if we can shift!  */
+      /* If optimizing for size make mult etc cheap, but not 1, so when 
+         in doubt the faster insn is chosen.  */
+      if (optimize_size)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (11);
+      return false;
+
+    case DIV:
+      if (optimize_size)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (25);
+      return false;
+
+    case MOD:
+      if (optimize_size)
+        *total = COSTS_N_INSNS (2);
+      else
+        *total = COSTS_N_INSNS (26);
+      return false;
+
+    case ABS:
+      /* Equivalent to length, so same for optimize_size.  */
+      *total = COSTS_N_INSNS (3);
+      return false;
+
+    case ZERO_EXTEND:
+      /* Only used for qi->hi.  */
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case SIGN_EXTEND:
+      if (GET_MODE (x) == HImode)
+      	*total = COSTS_N_INSNS (1);
+      else if (GET_MODE (x) == SImode)
+	*total = COSTS_N_INSNS (6);
+      else
+	*total = COSTS_N_INSNS (2);
+      return false;
+
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (optimize_size)
+        *total = COSTS_N_INSNS (1);
+      else if (GET_MODE (x) ==  QImode)
+        {
+          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+   	    *total = COSTS_N_INSNS (8); /* worst case */
+          else
+	    *total = COSTS_N_INSNS (INTVAL (XEXP (x, 1)));
+        }
+      else if (GET_MODE (x) == HImode)
+        {
+          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+            {
+	      if (abs (INTVAL (XEXP (x, 1))) == 1)
+                *total = COSTS_N_INSNS (1);
+              else
+	        *total = COSTS_N_INSNS (2.5 + 0.5 * INTVAL (XEXP (x, 1)));
+            }
+          else
+            *total = COSTS_N_INSNS (10); /* worst case */
+        }
+      else if (GET_MODE (x) == SImode)
+        {
+          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    *total = COSTS_N_INSNS (2.5 + 0.5 * INTVAL (XEXP (x, 1)));
+          else /* worst case */
+            *total = COSTS_N_INSNS (18);
+        }
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+const char *
+output_jump (enum rtx_code code, int inv, int length)
+{
+    static int x = 0;
+    
+    static char buf[1000];
+    const char *pos, *neg;
+
+    if (cc_prev_status.flags & CC_NO_OVERFLOW)
+      {
+	switch (code)
+	  {
+	  case GTU: code = GT; break;
+	  case LTU: code = LT; break;
+	  case GEU: code = GE; break;
+	  case LEU: code = LE; break;
+	  default: ;
+	  }
+      }
+    switch (code)
+      {
+      case EQ: pos = "beq", neg = "bne"; break;
+      case NE: pos = "bne", neg = "beq"; break;
+      case GT: pos = "bgt", neg = "ble"; break;
+      case GTU: pos = "bhi", neg = "blos"; break;
+      case LT: pos = "blt", neg = "bge"; break;
+      case LTU: pos = "blo", neg = "bhis"; break;
+      case GE: pos = "bge", neg = "blt"; break;
+      case GEU: pos = "bhis", neg = "blo"; break;
+      case LE: pos = "ble", neg = "bgt"; break;
+      case LEU: pos = "blos", neg = "bhi"; break;
+      default: gcc_unreachable ();
+      }
+
+#if 0
+/* currently we don't need this, because the tstdf and cmpdf 
+   copy the condition code immediately, and other float operations are not 
+   yet recognized as changing the FCC - if so, then the length-cost of all
+   jump insns increases by one, because we have to potentially copy the 
+   FCC! */
+    if (cc_status.flags & CC_IN_FPU)
+	output_asm_insn("cfcc", NULL);
+#endif
+	
+    switch (length)
+    {
+      case 2:
+	
+	sprintf(buf, "%s %%l1", inv ? neg : pos);
+	
+	return buf;
+	
+      case 6:
+	
+	sprintf(buf, "%s JMP_%d\n\tjmp %%l1\nJMP_%d:", inv ? pos : neg, x, x);
+	
+	x++;
+	
+	return buf;
+	
+      default:
+	
+	gcc_unreachable ();
+    }
+    
+}
+
+void
+notice_update_cc_on_set(rtx exp, rtx insn ATTRIBUTE_UNUSED)
+{
+    if (GET_CODE (SET_DEST (exp)) == CC0)
+    { 
+      cc_status.flags = 0;					
+      cc_status.value1 = SET_DEST (exp);			
+      cc_status.value2 = SET_SRC (exp);			
+    }							
+    else if (GET_CODE (SET_SRC (exp)) == CALL)		
+    { 
+      CC_STATUS_INIT; 
+    }
+    else if (SET_DEST(exp) == pc_rtx)
+    { 
+      /* jump */
+    }	
+    else if (GET_MODE (SET_DEST(exp)) == HImode		
+	     || GET_MODE (SET_DEST(exp)) == QImode)
+    { 
+      cc_status.flags = GET_CODE (SET_SRC(exp)) == MINUS ? 0 : CC_NO_OVERFLOW;
+      cc_status.value1 = SET_SRC (exp);   			
+      cc_status.value2 = SET_DEST (exp);			
+	
+      if (cc_status.value1 && GET_CODE (cc_status.value1) == REG	
+	  && cc_status.value2					
+	  && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+	cc_status.value2 = 0;					
+      if (cc_status.value1 && GET_CODE (cc_status.value1) == MEM	
+	  && cc_status.value2					
+	  && GET_CODE (cc_status.value2) == MEM)			
+	cc_status.value2 = 0; 					
+    }		        
+    else
+    { 
+      CC_STATUS_INIT; 
+    }
+}
+
+
+int
+simple_memory_operand(rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+    rtx addr;
+
+    /* Eliminate non-memory operations */
+    if (GET_CODE (op) != MEM)
+	return FALSE;
+
+#if 0
+    /* dword operations really put out 2 instructions, so eliminate them.  */
+    if (GET_MODE_SIZE (GET_MODE (op)) > (HAVE_64BIT_P () ? 8 : 4))
+	return FALSE;
+#endif
+
+    /* Decode the address now.  */
+
+  indirection:
+    
+    addr = XEXP (op, 0);
+
+    switch (GET_CODE (addr))
+    {
+      case REG:
+	/* (R0) - no extra cost */
+	return 1;
+	
+      case PRE_DEC:
+      case POST_INC:
+	/* -(R0), (R0)+ - cheap! */
+	return 0;
+	
+      case MEM:
+	/* cheap - is encoded in addressing mode info! 
+
+	   -- except for @(R0), which has to be @0(R0) !!! */
+
+	if (GET_CODE (XEXP (addr, 0)) == REG)
+	    return 0;
+	
+	op=addr;
+	goto indirection;
+	
+      case CONST_INT:
+      case LABEL_REF:	       
+      case CONST:
+      case SYMBOL_REF:
+	/* @#address - extra cost */
+	return 0;
+
+      case PLUS:
+	/* X(R0) - extra cost */
+	return 0;
+
+      default:
+	break;
+    }
+    
+    return FALSE;
+}
+
+
+/*
+ * output a block move:
+ *
+ * operands[0]	... to
+ * operands[1]  ... from
+ * operands[2]  ... length
+ * operands[3]  ... alignment
+ * operands[4]  ... scratch register
+ */
+
+ 
+const char *
+output_block_move(rtx *operands)
+{
+    static int count = 0;
+    char buf[200];
+    int unroll;
+    int lastbyte = 0;
+    
+    /* Move of zero bytes is a NOP.  */
+    if (operands[2] == const0_rtx)
+      return "";
+    
+    /* Look for moves by small constant byte counts, those we'll
+       expand to straight line code.  */
+    if (CONSTANT_P (operands[2]))
+    {
+	if (INTVAL (operands[2]) < 16
+	    && (!optimize_size || INTVAL (operands[2]) < 5)
+	    && INTVAL (operands[3]) == 1)
+	{
+	    register int i;
+	    
+	    for (i = 1; i <= INTVAL (operands[2]); i++)
+		output_asm_insn("movb (%1)+, (%0)+", operands);
+
+	    return "";
+	}
+	else if (INTVAL(operands[2]) < 32
+		 && (!optimize_size || INTVAL (operands[2]) < 9)
+		 && INTVAL (operands[3]) >= 2)
+	{
+	    register int i;
+	    
+	    for (i = 1; i <= INTVAL (operands[2]) / 2; i++)
+		output_asm_insn ("mov (%1)+, (%0)+", operands);
+	    if (INTVAL (operands[2]) & 1)
+	      output_asm_insn ("movb (%1), (%0)", operands);
+	    
+	    return "";
+	}
+    }
+
+    /* Ideally we'd look for moves that are multiples of 4 or 8
+       bytes and handle those by unrolling the move loop.  That
+       makes for a lot of code if done at run time, but it's ok
+       for constant counts.  Also, for variable counts we have
+       to worry about odd byte count with even aligned pointers.
+       On 11/40 and up we handle that case; on older machines
+       we don't and just use byte-wise moves all the time.  */
+
+    if (CONSTANT_P (operands[2]) )
+    {
+      if (INTVAL (operands[3]) < 2)
+	unroll = 0;
+      else
+	{
+	  lastbyte = INTVAL (operands[2]) & 1;
+
+	  if (optimize_size || INTVAL (operands[2]) & 2)
+	    unroll = 1;
+	  else if (INTVAL (operands[2]) & 4)
+	    unroll = 2;
+	  else
+	    unroll = 3;
+	}
+      
+      /* Loop count is byte count scaled by unroll.  */
+      operands[2] = GEN_INT (INTVAL (operands[2]) >> unroll);
+      output_asm_insn ("mov %2, %4", operands);
+    }
+    else
+    {
+	/* Variable byte count; use the input register
+	   as the scratch.  */
+	operands[4] = operands[2];
+
+	/* Decide whether to move by words, and check
+	   the byte count for zero.  */
+	if (TARGET_40_PLUS && INTVAL (operands[3]) > 1)
+	  {
+	    unroll = 1;
+	    output_asm_insn ("asr %4", operands);
+	  }
+	else
+	  {
+	    unroll = 0;
+	    output_asm_insn ("tst %4", operands);
+	  }
+	sprintf (buf, "beq movestrhi%d", count + 1);
+	output_asm_insn (buf, NULL);
+    }
+
+    /* Output the loop label.  */
+    sprintf (buf, "\nmovestrhi%d:", count);
+    output_asm_insn (buf, NULL);
+
+    /* Output the appropriate move instructions.  */
+    switch (unroll)
+    {
+      case 0:
+	output_asm_insn ("movb (%1)+, (%0)+", operands);
+	break;
+	
+      case 1:
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	break;
+	
+      case 2:
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	break;
+	
+      default:
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	output_asm_insn ("mov (%1)+, (%0)+", operands);
+	break;
+    }
+
+    /* Output the decrement and test.  */
+    if (TARGET_40_PLUS)
+      {
+	sprintf (buf, "sob %%4, movestrhi%d", count);
+	output_asm_insn (buf, operands);
+      }
+    else
+      {
+	output_asm_insn ("dec %4", operands);
+	sprintf (buf, "bgt movestrhi%d", count);
+	output_asm_insn (buf, NULL);
+      }
+    count ++;
+
+    /* If constant odd byte count, move the last byte.  */
+    if (lastbyte)
+      output_asm_insn ("movb (%1), (%0)", operands);
+    else if (!CONSTANT_P (operands[2]))
+      {
+	/* Output the destination label for the zero byte count check.  */
+	sprintf (buf, "\nmovestrhi%d:", count);
+	output_asm_insn (buf, NULL);
+	count++;
+    
+	/* If we did word moves, check for trailing last byte. */
+	if (unroll)
+	  {
+	    sprintf (buf, "bcc movestrhi%d", count);
+	    output_asm_insn (buf, NULL);
+	    output_asm_insn ("movb (%1), (%0)", operands);
+	    sprintf (buf, "\nmovestrhi%d:", count);
+	    output_asm_insn (buf, NULL);
+	    count++;
+	  }
+      }
+	     
+    return "";
+}
+
+/* This function checks whether a real value can be encoded as
+   a literal, i.e., addressing mode 27.  In that mode, real values
+   are one word values, so the remaining 48 bits have to be zero.  */
+int
+legitimate_const_double_p (rtx address)
+{
+  REAL_VALUE_TYPE r;
+  long sval[2];
+  REAL_VALUE_FROM_CONST_DOUBLE (r, address);
+  REAL_VALUE_TO_TARGET_DOUBLE (r, sval);
+  if ((sval[0] & 0xffff) == 0 && sval[1] == 0)
+    return 1;
+  return 0;
+}
+
+/* Implement CANNOT_CHANGE_MODE_CLASS.  */
+bool
+pdp11_cannot_change_mode_class (enum machine_mode from,
+				enum machine_mode to,
+				enum reg_class rclass)
+{
+  /* Also, FPU registers contain a whole float value and the parts of
+     it are not separately accessible.
+
+     So we disallow all mode changes involving FPRs.  */
+  if (FLOAT_MODE_P (from) != FLOAT_MODE_P (to))
+    return true;
+  
+  return reg_classes_intersect_p (FPU_REGS, rclass);
+}
+
+/* TARGET_PREFERRED_RELOAD_CLASS
+
+   Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  
+
+loading is easier into LOAD_FPU_REGS than FPU_REGS! */
+
+static reg_class_t
+pdp11_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == FPU_REGS)
+    return LOAD_FPU_REGS;
+  if (rclass == ALL_REGS)
+    {
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	return LOAD_FPU_REGS;
+      else
+	return GENERAL_REGS;
+    }
+  return rclass;
+}
+
+/* TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+
+   Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  
+
+loading is easier into LOAD_FPU_REGS than FPU_REGS! */
+
+static reg_class_t
+pdp11_preferred_output_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == FPU_REGS)
+    return LOAD_FPU_REGS;
+  if (rclass == ALL_REGS)
+    {
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	return LOAD_FPU_REGS;
+      else
+	return GENERAL_REGS;
+    }
+  return rclass;
+}
+
+
+/* TARGET_SECONDARY_RELOAD.
+
+   FPU registers AC4 and AC5 (class NO_LOAD_FPU_REGS) require an 
+   intermediate register (AC0-AC3: LOAD_FPU_REGS).  Everything else
+   can be loade/stored directly.  */
+static reg_class_t 
+pdp11_secondary_reload (bool in_p ATTRIBUTE_UNUSED,
+			rtx x,
+			reg_class_t reload_class,
+			enum machine_mode reload_mode ATTRIBUTE_UNUSED,
+			secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  if (reload_class != NO_LOAD_FPU_REGS || GET_CODE (x) != REG ||
+      REGNO_REG_CLASS (REGNO (x)) == LOAD_FPU_REGS)
+    return NO_REGS;
+  
+  return LOAD_FPU_REGS;
+}
+
+/* Target routine to check if register to register move requires memory.
+
+   The answer is yes if we're going between general register and FPU 
+   registers.  The mode doesn't matter in making this check.
+*/
+bool 
+pdp11_secondary_memory_needed (reg_class_t c1, reg_class_t c2, 
+			       enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int fromfloat = (c1 == LOAD_FPU_REGS || c1 == NO_LOAD_FPU_REGS || 
+		   c1 == FPU_REGS);
+  int tofloat = (c2 == LOAD_FPU_REGS || c2 == NO_LOAD_FPU_REGS || 
+		 c2 == FPU_REGS);
+  
+  return (fromfloat != tofloat);
+}
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+*/
+
+static bool
+pdp11_legitimate_address_p (enum machine_mode mode,
+			    rtx operand, bool strict)
+{
+    rtx xfoob;
+
+    /* accept @#address */
+    if (CONSTANT_ADDRESS_P (operand))
+      return true;
+    
+    switch (GET_CODE (operand))
+      {
+      case REG:
+	/* accept (R0) */
+	return !strict || REGNO_OK_FOR_BASE_P (REGNO (operand));
+    
+      case PLUS:
+	/* accept X(R0) */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (operand, 0))))
+	  && CONSTANT_ADDRESS_P (XEXP (operand, 1));
+
+      case PRE_DEC:
+	/* accept -(R0) */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (operand, 0))));
+
+      case POST_INC:
+	/* accept (R0)+ */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (operand, 0))));
+
+      case PRE_MODIFY:
+	/* accept -(SP) -- which uses PRE_MODIFY for byte mode */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && REGNO (XEXP (operand, 0)) == STACK_POINTER_REGNUM
+	  && GET_CODE ((xfoob = XEXP (operand, 1))) == PLUS
+	  && GET_CODE (XEXP (xfoob, 0)) == REG
+	  && REGNO (XEXP (xfoob, 0)) == STACK_POINTER_REGNUM
+	  && CONSTANT_P (XEXP (xfoob, 1))
+	  && INTVAL (XEXP (xfoob,1)) == -2;
+
+      case POST_MODIFY:
+	/* accept (SP)+ -- which uses POST_MODIFY for byte mode */
+	return GET_CODE (XEXP (operand, 0)) == REG
+	  && REGNO (XEXP (operand, 0)) == STACK_POINTER_REGNUM
+	  && GET_CODE ((xfoob = XEXP (operand, 1))) == PLUS
+	  && GET_CODE (XEXP (xfoob, 0)) == REG
+	  && REGNO (XEXP (xfoob, 0)) == STACK_POINTER_REGNUM
+	  && CONSTANT_P (XEXP (xfoob, 1))
+	  && INTVAL (XEXP (xfoob,1)) == 2;
+
+      case MEM:
+	/* handle another level of indirection ! */
+	xfoob = XEXP (operand, 0);
+
+	/* (MEM:xx (MEM:xx ())) is not valid for SI, DI and currently
+	   also forbidden for float, because we have to handle this 
+	   in output_move_double and/or output_move_quad() - we could
+	   do it, but currently it's not worth it!!! 
+	   now that DFmode cannot go into CPU register file, 
+	   maybe I should allow float ... 
+	   but then I have to handle memory-to-memory moves in movdf ??  */
+	if (GET_MODE_BITSIZE(mode) > 16)
+	  return false;
+
+	/* accept @address */
+	if (CONSTANT_ADDRESS_P (xfoob))
+	  return true;
+
+	switch (GET_CODE (xfoob))
+	  {
+	  case REG:
+	    /* accept @(R0) - which is @0(R0) */
+	    return !strict || REGNO_OK_FOR_BASE_P(REGNO (xfoob));
+
+	  case PLUS:
+	    /* accept @X(R0) */
+	    return GET_CODE (XEXP (xfoob, 0)) == REG
+	      && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (xfoob, 0))))
+	      && CONSTANT_ADDRESS_P (XEXP (xfoob, 1));
+
+	  case PRE_DEC:
+	    /* accept @-(R0) */
+	    return GET_CODE (XEXP (xfoob, 0)) == REG
+	      && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (xfoob, 0))));
+
+	  case POST_INC:
+	    /* accept @(R0)+ */
+	    return GET_CODE (XEXP (xfoob, 0)) == REG
+	      && (!strict || REGNO_OK_FOR_BASE_P (REGNO (XEXP (xfoob, 0))));
+
+	  default:
+	    /* anything else is invalid */
+	    return false;
+	  }
+
+      default:
+	/* anything else is invalid */
+	return false;
+      }
+}
+
+/* Return the class number of the smallest class containing
+   reg number REGNO.  */
+enum reg_class
+pdp11_regno_reg_class (int regno)
+{ 
+  if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
+    return GENERAL_REGS;
+  else if (regno > AC3_REGNUM)
+    return NO_LOAD_FPU_REGS;
+  else if (regno >= AC0_REGNUM)
+    return LOAD_FPU_REGS;
+  else if (regno & 1)
+    return MUL_REGS;
+  else
+    return GENERAL_REGS;
+}
+
+
+static int
+pdp11_sp_frame_offset (void)
+{
+  int offset = 0, regno;
+  offset = get_frame_size();
+  for (regno = 0; regno <= PC_REGNUM; regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      offset += 2;
+  for (regno = AC0_REGNUM; regno <= AC5_REGNUM; regno++)
+    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
+      offset += 8;
+  
+  return offset;
+}   
+
+/* Return the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+int
+pdp11_initial_elimination_offset (int from, int to)
+{
+  int spoff;
+  
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return 4;
+  else if (from == FRAME_POINTER_REGNUM
+	   && to == HARD_FRAME_POINTER_REGNUM)
+    return 0;
+  else
+    {
+      gcc_assert (to == STACK_POINTER_REGNUM);
+
+      /* Get the size of the register save area.  */
+      spoff = pdp11_sp_frame_offset ();
+      if (from == FRAME_POINTER_REGNUM)
+	return spoff;
+
+      gcc_assert (from == ARG_POINTER_REGNUM);
+
+      /* If there is a frame pointer, that is saved too.  */
+      if (frame_pointer_needed)
+	spoff += 2;
+      
+      /* Account for the saved PC in the function call.  */
+      return spoff + 2;
+    }
+}    
+
+/* A copy of output_addr_const modified for pdp11 expression syntax.
+   output_addr_const also gets called for %cDIGIT and %nDIGIT, which we don't
+   use, and for debugging output, which we don't support with this port either.
+   So this copy should get called whenever needed.
+*/
+void
+output_addr_const_pdp11 (FILE *file, rtx x)
+{
+  char buf[256];
+  int i;
+  
+ restart:
+  switch (GET_CODE (x))
+    {
+    case PC:
+      gcc_assert (flag_pic);
+      putc ('.', file);
+      break;
+
+    case SYMBOL_REF:
+      assemble_name (file, XSTR (x, 0));
+      break;
+
+    case LABEL_REF:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
+      assemble_name (file, buf);
+      break;
+
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      assemble_name (file, buf);
+      break;
+
+    case CONST_INT:
+      i = INTVAL (x);
+      if (i < 0)
+	{
+	  i = -i;
+	  fprintf (file, "-");
+	}
+      fprintf (file, "%#o", i & 0xffff);
+      break;
+
+    case CONST:
+      /* This used to output parentheses around the expression,
+	 but that does not work on the 386 (either ATT or BSD assembler).  */
+      output_addr_const_pdp11 (file, XEXP (x, 0));
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+	{
+	  /* We can use %o if the number is one word and positive.  */
+	  gcc_assert (!CONST_DOUBLE_HIGH (x));
+	  fprintf (file, "%#ho", (unsigned short) CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case PLUS:
+      /* Some assemblers need integer constants to appear last (e.g. masm).  */
+      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	{
+	  output_addr_const_pdp11 (file, XEXP (x, 1));
+	  if (INTVAL (XEXP (x, 0)) >= 0)
+	    fprintf (file, "+");
+	  output_addr_const_pdp11 (file, XEXP (x, 0));
+	}
+      else
+	{
+	  output_addr_const_pdp11 (file, XEXP (x, 0));
+	  if (INTVAL (XEXP (x, 1)) >= 0)
+	    fprintf (file, "+");
+	  output_addr_const_pdp11 (file, XEXP (x, 1));
+	}
+      break;
+
+    case MINUS:
+      /* Avoid outputting things like x-x or x+5-x,
+	 since some assemblers can't handle that.  */
+      x = simplify_subtraction (x);
+      if (GET_CODE (x) != MINUS)
+	goto restart;
+
+      output_addr_const_pdp11 (file, XEXP (x, 0));
+      if (GET_CODE (XEXP (x, 1)) != CONST_INT
+	  || INTVAL (XEXP (x, 1)) >= 0)
+	fprintf (file, "-");
+      output_addr_const_pdp11 (file, XEXP (x, 1));
+      break;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      output_addr_const_pdp11 (file, XEXP (x, 0));
+      break;
+
+    default:
+      output_operand_lossage ("invalid expression as operand");
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+pdp11_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Integers 32 bits and under, and scalar floats (if FPU), are returned
+     in registers.  The rest go into memory.  */
+  return (TYPE_MODE (type) == DImode
+	  || (FLOAT_MODE_P (TYPE_MODE (type)) && ! TARGET_AC0)
+	  || TREE_CODE (type) == VECTOR_TYPE
+	  || COMPLEX_MODE_P (TYPE_MODE (type)));
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   On the pdp11 the value is found in R0 (or ac0??? not without FPU!!!! )  */
+
+static rtx
+pdp11_function_value (const_tree valtype, 
+ 		      const_tree fntype_or_decl ATTRIBUTE_UNUSED,
+ 		      bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype),
+		      BASE_RETURN_VALUE_REG(TYPE_MODE(valtype)));
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+pdp11_libcall_value (enum machine_mode mode,
+                     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return  gen_rtx_REG (mode, BASE_RETURN_VALUE_REG(mode));
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the pdp, the first "output" reg is the only register thus used.
+
+   maybe ac0 ? - as option someday!  */
+
+static bool
+pdp11_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETVAL_REGNUM) || (TARGET_AC0 && (regno == AC0_REGNUM));
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.
+
+   trampoline - how should i do it in separate i+d ? 
+   have some allocate_trampoline magic??? 
+
+   the following should work for shared I/D:
+
+   MOV	#STATIC, $4	01270Y	0x0000 <- STATIC; Y = STATIC_CHAIN_REGNUM
+   JMP	@#FUNCTION	000137  0x0000 <- FUNCTION
+*/
+
+static void
+pdp11_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  gcc_assert (!TARGET_SPLIT);
+
+  mem = adjust_address (m_tramp, HImode, 0);
+  emit_move_insn (mem, GEN_INT (012700+STATIC_CHAIN_REGNUM));
+  mem = adjust_address (m_tramp, HImode, 2);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, HImode, 4);
+  emit_move_insn (mem, GEN_INT (000137));
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Worker function for TARGET_FUNCTION_ARG.
+
+   Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+static rtx
+pdp11_function_arg (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    const_tree type ATTRIBUTE_UNUSED,
+		    bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_FUNCTION_ARG_ADVANCE.
+
+   Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+pdp11_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			    const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum += (mode != BLKmode
+	   ? GET_MODE_SIZE (mode)
+	   : int_size_in_bytes (type));
+}
+
+/* Make sure everything's fine if we *don't* have an FPU.
+   This assumes that putting a register in fixed_regs will keep the
+   compiler's mitts completely off it.  We don't bother to zero it out
+   of register classes.  Also fix incompatible register naming with
+   the UNIX assembler.  */
+
+static void
+pdp11_conditional_register_usage (void)
+{
+  int i;
+  HARD_REG_SET x;
+  if (!TARGET_FPU)
+    {
+      COPY_HARD_REG_SET (x, reg_class_contents[(int)FPU_REGS]);
+      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++ )
+       if (TEST_HARD_REG_BIT (x, i))
+	fixed_regs[i] = call_used_regs[i] = 1;
+    }
+
+  if (TARGET_AC0)
+      call_used_regs[AC0_REGNUM] = 1;
+  if (TARGET_UNIX_ASM)
+    {
+      /* Change names of FPU registers for the UNIX assembler.  */
+      reg_names[8] = "fr0";
+      reg_names[9] = "fr1";
+      reg_names[10] = "fr2";
+      reg_names[11] = "fr3";
+      reg_names[12] = "fr4";
+      reg_names[13] = "fr5";
+    }
+}
+
+static section *
+pdp11_function_section (tree decl ATTRIBUTE_UNUSED,
+			enum node_frequency freq ATTRIBUTE_UNUSED,
+			bool startup ATTRIBUTE_UNUSED,
+			bool exit ATTRIBUTE_UNUSED)
+{
+  return NULL;
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
new file mode 100644
index 000000000..fc36f5b67
--- /dev/null
+++ b/gcc/config/pdp11/pdp11.h
@@ -0,0 +1,685 @@
+/* Definitions of target machine for GNU compiler, for the pdp-11
+   Copyright (C) 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2004, 2005,
+   2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+   Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define CONSTANT_POOL_BEFORE_FUNCTION	0
+
+/* check whether load_fpu_reg or not */
+#define LOAD_FPU_REG_P(x) ((x) >= AC0_REGNUM && (x) <= AC3_REGNUM)
+#define NO_LOAD_FPU_REG_P(x) ((x) == AC4_REGNUM || (x) == AC5_REGNUM)
+#define FPU_REG_P(x)	(LOAD_FPU_REG_P(x) || NO_LOAD_FPU_REG_P(x))
+#define CPU_REG_P(x)	((x) <= PC_REGNUM)
+
+/* Names to predefine in the preprocessor for this target machine.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("pdp11");		\
+    }						\
+  while (0)
+
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION fprintf (stderr, " (pdp11)");
+
+
+/* Generate DBX debugging information.  */
+
+#define DBX_DEBUGGING_INFO
+
+#define TARGET_40_PLUS		(TARGET_40 || TARGET_45)
+#define TARGET_10		(! TARGET_40_PLUS)
+
+#define TARGET_UNIX_ASM_DEFAULT	0
+
+#define ASSEMBLER_DIALECT	(TARGET_UNIX_ASM ? 1 : 0)
+
+
+
+/* TYPE SIZES */
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		(TARGET_INT16 ? 16 : 32)
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64     
+
+/* if we set FLOAT_TYPE_SIZE to 32, we could have the benefit 
+   of saving core for huge arrays - the definitions are 
+   already in md - but floats can never reside in 
+   an FPU register - we keep the FPU in double float mode 
+   all the time !! */
+#define FLOAT_TYPE_SIZE		(TARGET_FLOAT32 ? 32 : 64)
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* machine types from ansi */
+#define SIZE_TYPE "unsigned int" 	/* definition of size_t */
+#define WCHAR_TYPE "int" 		/* or long int???? */
+#define WCHAR_TYPE_SIZE 16
+
+#define PTRDIFF_TYPE "int"
+
+/* target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is first.  */
+#define WORDS_BIG_ENDIAN 1
+
+/* Define that floats are in VAX order, not high word first as for ints.  */
+#define FLOAT_WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes). 
+
+   UNITS OR BYTES - seems like units */
+#define UNITS_PER_WORD 2
+
+/* This machine doesn't use IEEE floats.  */
+/* Because the pdp11 (at least Unix) convention for 32-bit ints is
+   big endian, opposite for what you need for float, the vax float
+   conversion routines aren't actually used directly.  But the underlying
+   format is indeed the vax/pdp11 float format.  */
+extern const struct real_format pdp11_f_format;
+extern const struct real_format pdp11_d_format;
+
+/* Maximum sized of reasonable data type 
+   DImode or Dfmode ...*/
+#define MAX_FIXED_MODE_SIZE 64	
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 16
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 16
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 16
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 16
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 16
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   we have 8 integer registers, plus 6 float 
+   (don't use scratch float !) */
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On the pdp, these are:
+   Reg 7	= pc;
+   reg 6	= sp;
+   reg 5	= fp;  not necessarily! 
+*/
+
+#define FIXED_REGISTERS  \
+{0, 0, 0, 0, 0, 0, 1, 1, \
+ 0, 0, 0, 0, 0, 0, 1, 1 }
+
+
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+/* don't know about fp */
+#define CALL_USED_REGISTERS  \
+{1, 1, 0, 0, 0, 0, 1, 1, \
+ 0, 0, 0, 0, 0, 0, 1, 1 }
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+*/
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+((REGNO <= PC_REGNUM)?							\
+    ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)	\
+    :1)
+    
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On the pdp, the cpu registers can hold any mode other than float
+   (because otherwise we may end up being asked to move from CPU to FPU
+   register, which isn't a valid operation on the PDP11).
+   For CPU registers, check alignment.
+
+   FPU accepts SF and DF but actually holds a DF - simplifies life!
+*/
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+(((REGNO) <= PC_REGNUM)?				\
+  ((GET_MODE_BITSIZE(MODE) <= 16) 			\
+   || (GET_MODE_BITSIZE(MODE) >= 32 &&      		\
+       !((REGNO) & 1) && !FLOAT_MODE_P (MODE)))		\
+  :FLOAT_MODE_P (MODE))
+    
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) 0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register in which static-chain is passed to a function.  */
+/* ??? - i don't want to give up a reg for this! */
+#define STATIC_CHAIN_REGNUM 4
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+   
+/* The pdp has a couple of classes:
+
+MUL_REGS are used for odd numbered regs, to use in 16-bit multiplication
+         (even numbered do 32-bit multiply)
+LMUL_REGS long multiply registers (even numbered regs )
+	  (don't need them, all 32-bit regs are even numbered!)
+GENERAL_REGS is all cpu
+LOAD_FPU_REGS is the first four cpu regs, they are easier to load
+NO_LOAD_FPU_REGS is ac4 and ac5, currently - difficult to load them
+FPU_REGS is all fpu regs 
+*/
+
+enum reg_class { NO_REGS, MUL_REGS, GENERAL_REGS, LOAD_FPU_REGS, NO_LOAD_FPU_REGS, FPU_REGS, ALL_REGS, LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* have to allow this till cmpsi/tstsi are fixed in a better way !! */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Since GENERAL_REGS is the same class as ALL_REGS,
+   don't give it a different class number; just make it an alias.  */
+
+/* #define GENERAL_REGS ALL_REGS */
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES {"NO_REGS", "MUL_REGS", "GENERAL_REGS", "LOAD_FPU_REGS", "NO_LOAD_FPU_REGS", "FPU_REGS", "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS {{0}, {0x00aa}, {0xc0ff}, {0x0f00}, {0x3000}, {0x3f00}, {0xffff}}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) pdp11_regno_reg_class (REGNO)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES { GENERAL_REGS, FPU_REGS, LIM_REG_CLASSES }
+
+/* Hook for testing if memory is needed for moving between registers.  */
+#define SECONDARY_MEMORY_NEEDED(class1, class2, m) \
+  pdp11_secondary_memory_needed (class1, class2, m)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+((CLASS == GENERAL_REGS || CLASS == MUL_REGS)?				\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD):	\
+  1									\
+)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  pdp11_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.
+*/
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On the pdp11, the stack is on an even boundary */
+#define PUSH_ROUNDING(BYTES) ((BYTES + 1) & ~1)
+
+/* current_first_parm_offset stores the # of registers pushed on the 
+   stack */
+extern int current_first_parm_offset;
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+#define BASE_RETURN_VALUE_REG(MODE) \
+ (FLOAT_MODE_P (MODE) ? AC0_REGNUM : RETVAL_REGNUM) 
+
+/* 1 if N is a possible register number for function argument passing.
+   - not used on pdp */
+
+#define FUNCTION_ARG_REGNO_P(N) 0
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+*/
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   ...., the offset normally starts at 0, but starts at 1 word
+   when the function gets a structure-value-address as an
+   invisible first argument.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+   gcc_unreachable ();
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+extern int may_call_alloca;
+
+#define EXIT_IGNORE_STACK	1
+
+/* Definitions for register eliminations.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   There are two registers that can always be eliminated on the pdp11.
+   The frame pointer and the arg pointer can be replaced by either the
+   hard frame pointer or to the stack pointer, depending upon the
+   circumstances.  The hard frame pointer is not used before reload and
+   so it is not eligible for elimination.  */
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}	\
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = pdp11_initial_elimination_offset ((FROM), (TO)))
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+
+#define HAVE_PRE_DECREMENT 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_BASE_P(REGNO)  \
+  ((REGNO) <= PC_REGNUM || (unsigned) reg_renumber[REGNO] <= PC_REGNUM || \
+   (REGNO) == ARG_POINTER_REGNUM || (REGNO) == FRAME_POINTER_REGNUM)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P (REGNO)
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+*/
+
+
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_CONSTANT_P(X)                                        \
+  (GET_CODE (X) != CONST_DOUBLE || legitimate_const_double_p (X))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) (1)
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) (1)
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE HImode
+
+/* Define this if a raw index is all that is needed for a
+   `tablejump' insn.  */
+#define CASE_TAKES_INDEX_RAW
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  
+*/
+
+#define MOVE_MAX 2
+
+/* Nonzero if access to memory by byte is slow and undesirable. -
+*/
+#define SLOW_BYTE_ACCESS 0
+
+/* Do not break .stabs pseudos into continuations.  */
+#define DBX_CONTIN_LENGTH 0
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Give a comparison code (EQ, NE etc) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point, CCFPmode
+   should be used.  */
+
+#define SELECT_CC_MODE(OP,X,Y)	\
+(GET_MODE_CLASS(GET_MODE(X)) == MODE_FLOAT? CCFPmode : CCmode)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode HImode
+
+/* A function address in a call instruction
+   is a word address (for indexing purposes)
+   so give the MEM rtx a word's mode.  */
+#define FUNCTION_MODE HImode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/* #define NO_FUNCTION_CSE */
+
+
+/* Tell emit-rtl.c how to initialize special values on a per-function base.  */
+extern struct rtx_def *cc0_reg_rtx;
+
+#define CC_STATUS_MDEP rtx
+
+#define CC_STATUS_MDEP_INIT (cc_status.mdep = 0)
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  */
+
+#define CC_IN_FPU 04000 
+
+/* Do UPDATE_CC if EXP is a set, used in
+   NOTICE_UPDATE_CC 
+
+   floats only do compare correctly, else nullify ...
+
+   get cc0 out soon ...
+*/
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN) \
+{ if (GET_CODE (EXP) == SET)					\
+    {								\
+      notice_update_cc_on_set(EXP, INSN);			\
+    }								\
+  else if (GET_CODE (EXP) == PARALLEL				\
+	   && GET_CODE (XVECEXP (EXP, 0, 0)) == SET)		\
+    {								\
+      notice_update_cc_on_set(XVECEXP (EXP, 0, 0), INSN);	\
+    }								\
+  else if (GET_CODE (EXP) == CALL)				\
+    { /* all bets are off */ CC_STATUS_INIT; }			\
+  if (cc_status.value1 && GET_CODE (cc_status.value1) == REG	\
+      && cc_status.value2					\
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2)) \
+    { 								\
+      printf ("here!\n");					\
+      cc_status.value2 = 0;					\
+    }								\
+}
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* Output before read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text\n"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data\n"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"r0", "r1", "r2", "r3", "r4", "r5", "sp", "pc",     \
+ "ac0", "ac1", "ac2", "ac3", "ac4", "ac5", "fp", "ap" }
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s_%lu", PREFIX, (unsigned long)(NUM))
+
+#define ASM_OUTPUT_ASCII(FILE, P, SIZE)  \
+  output_ascii (FILE, P, SIZE)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  fprintf (FILE, "\t%sL_%d\n", TARGET_UNIX_ASM ? "" : ".word ", VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Don't define this if it is not supported.  */
+
+/* #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL) */
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes. 
+
+   who needs this????
+*/
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  switch (LOG)				\
+    {					\
+      case 0:				\
+	break;				\
+      case 1:				\
+	fprintf (FILE, "\t.even\n");	\
+	break;				\
+      default:				\
+	gcc_unreachable ();		\
+    }
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.=.+ %#ho\n", (unsigned short)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)  \
+    pdp11_asm_output_var (FILE, NAME, SIZE, ALIGN, true)
+
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \
+    pdp11_asm_output_var (FILE, NAME, SIZE, ALIGN, false)
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+ print_operand_address (FILE, ADDR)
+
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)			\
+(							\
+  fprintf (FILE, "\tmov %s, -(sp)\n", reg_names[REGNO])	\
+)
+
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)                 		\
+(                                                       	\
+  fprintf (FILE, "\tmov (sp)+, %s\n", reg_names[REGNO])     	\
+)
+
+#define TRAMPOLINE_SIZE 8
+#define TRAMPOLINE_ALIGNMENT 16
+
+/* there is no point in avoiding branches on a pdp, 
+   since branches are really cheap - I just want to find out
+   how much difference the BRANCH_COST macro makes in code */
+#define BRANCH_COST(speed_p, predictable_p) (TARGET_BRANCH_CHEAP ? 0 : 1)
+
+
+#define COMPARE_FLAG_MODE HImode
diff --git a/gcc/config/pdp11/pdp11.md b/gcc/config/pdp11/pdp11.md
new file mode 100644
index 000000000..1c6542685
--- /dev/null
+++ b/gcc/config/pdp11/pdp11.md
@@ -0,0 +1,1386 @@
+;;- Machine description for the pdp11 for GNU C compiler
+;; Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2004, 2005
+;; 2007, 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(include "predicates.md")
+(include "constraints.md")
+
+(define_constants
+  [
+   ;; Register numbers
+   (R0_REGNUM     	  0)
+   (RETVAL_REGNUM     	  0)
+   (HARD_FRAME_POINTER_REGNUM  5)
+   (STACK_POINTER_REGNUM  6)
+   (PC_REGNUM             7)
+   (AC0_REGNUM            8)
+   (AC3_REGNUM            11)
+   (AC4_REGNUM            12)
+   (AC5_REGNUM            13)
+   ;; The next two are not physical registers but are used for addressing
+   ;; arguments.
+   (FRAME_POINTER_REGNUM  14)
+   (ARG_POINTER_REGNUM    15)
+   (FIRST_PSEUDO_REGISTER 16)
+   ;; Branch offset limits, as byte offsets from instruction address
+   (MIN_BRANCH            -254)
+   (MAX_BRANCH            256)
+   (MIN_SOB               -126)
+   (MAX_SOB               0)])
+
+;; HI is 16 bit
+;; QI is 8 bit 
+
+;; Integer modes supported on the PDP11, with a mapping from machine mode
+;; to mnemonic suffix.  SImode and DImode always are special cases.
+(define_mode_iterator PDPint [QI HI])
+(define_mode_attr  isfx [(QI "b") (HI "")])
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;;- Operand classes for the register allocator:
+
+;; Compare instructions.
+
+;; currently we only support df floats, which saves us quite some
+;; hassle switching the FP mode! 
+;; we assume that CPU is always in long float mode, and 
+;; 16 bit integer mode - currently, the prologue for main does this,
+;; but maybe we should just set up a NEW crt0 properly, 
+;; -- and what about signal handling code?
+;; (we don't even let sf floats in the register file, so
+;; we only should have to worry about truncating and widening 
+;; when going to memory)
+
+;; abort() call by g++ - must define libfunc for cmp_optab
+;; and ucmp_optab for mode SImode, because we don't have that!!!
+;; - yet since no libfunc is there, we abort ()
+
+;; The only thing that remains to be done then is output 
+;; the floats in a way the assembler can handle it (and 
+;; if you're really into it, use a PDP11 float emulation
+;; library to do floating point constant folding - but 
+;; I guess you'll get reasonable results even when not
+;; doing this)
+;; the last thing to do is fix the UPDATE_CC macro to check
+;; for floating point condition codes, and set cc_status
+;; properly, also setting the CC_IN_FCCR flag. 
+
+;; define attributes
+;; currently type is only fpu or arith or unknown, maybe branch later ?
+;; default is arith
+(define_attr "type" "unknown,arith,fp" (const_string "arith"))
+
+;; length default is 2 bytes each
+(define_attr "length" "" (const_int 2))
+
+;; a user's asm statement
+(define_asm_attributes
+  [(set_attr "type" "unknown")
+; length for asm is the max length per statement.  That would be
+; 3 words, for a two-operand instruction with extra word addressing
+; modes for both operands.
+   (set_attr "length" "6")])
+
+;; define function units
+
+;; arithmetic - values here immediately when next insn issued
+;; or does it mean the number of cycles after this insn was issued?
+;; how do I say that fpu insns use cpu also? (pre-interaction phase)
+
+;(define_function_unit "cpu" 1 1 (eq_attr "type" "arith") 0 0)
+;(define_function_unit "fpu" 1 1 (eq_attr "type" "fp") 0 0)
+
+;; compare
+(define_insn "*cmpdf"
+  [(set (cc0)
+	(compare (match_operand:DF 0 "general_operand" "fR,fR,Q,QF")
+		 (match_operand:DF 1 "register_or_const0_operand" "G,a,G,a")))]
+  "TARGET_FPU"
+  "*
+{
+  cc_status.flags = CC_IN_FPU;
+  if (which_alternative == 0 || which_alternative == 2)
+    return \"{tstd|tstf} %0\;cfcc\";
+  else
+    return \"{cmpd|cmpf} %0, %1\;cfcc\";
+}"
+  [(set_attr "length" "4,4,6,6")]) 
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:PDPint 0 "general_operand" "rR,rR,rR,Q,Qi,Qi")
+		 (match_operand:PDPint 1 "general_operand" "N,rR,Qi,N,rR,Qi")))]
+  ""
+  "@
+   tst<PDPint:isfx> %0
+   cmp<PDPint:isfx> %0,%1
+   cmp<PDPint:isfx> %0,%1
+   tst<PDPint:isfx> %0
+   cmp<PDPint:isfx> %0,%1
+   cmp<PDPint:isfx> %0,%1"
+  [(set_attr "length" "2,2,4,4,4,6")])
+
+;; sob instruction - we need an assembler which can make this instruction
+;; valid under _all_ circumstances!
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (plus:HI (match_operand:HI 0 "register_operand" "+r")
+		      (const_int -1))
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (const_int -1)))]
+  "TARGET_40_PLUS"
+  "*
+{
+ static int labelcount = 0;
+ static char buf[1000];
+
+ if (get_attr_length (insn) == 2)
+    return \"sob %0, %l1\";
+
+ /* emulate sob */
+ output_asm_insn (\"dec %0\", operands);
+ 
+ sprintf (buf, \"bge LONG_SOB%d\", labelcount);
+ output_asm_insn (buf, NULL);
+
+ output_asm_insn (\"jmp %l1\", operands);
+ 
+ sprintf (buf, \"LONG_SOB%d:\", labelcount++);
+ output_asm_insn (buf, NULL);
+
+ return \"\";
+}"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 0)
+						       (pc))
+						(const_int MIN_SOB))
+					   (gt (minus (match_dup 0)
+						       (pc))
+						(const_int MAX_SOB)))
+				      (const_int 8)
+				      (const_int 2)))])
+
+;; These control RTL generation for conditional jump insns
+;; and match them for register allocation.
+
+(define_expand "cbranchdf4"
+  [(set (cc0)
+        (compare (match_operand:DF 1 "general_operand")
+		 (match_operand:DF 2 "register_or_const0_operand")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_FPU"
+  "")
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:PDPint 1 "general_operand")
+		 (match_operand:PDPint 2 "general_operand")))
+   (set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+;; problem with too short jump distance! we need an assembler which can 
+;; make this valid for all jump distances!
+;; e.g. gas!
+
+;; these must be changed to check for CC_IN_FCCR if float is to be 
+;; enabled
+
+(define_insn "*branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "* return output_jump(GET_CODE (operands[0]), 0, get_attr_length(insn));"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 1)
+						      (pc))
+					       (const_int MIN_BRANCH))
+					   (gt (minus (match_dup 1)
+						      (pc))
+					       (const_int MAX_BRANCH)))
+				      (const_int 6)
+				      (const_int 2)))])
+
+
+;; These match inverted jump insns for register allocation.
+
+(define_insn "*branch_inverted"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+		       [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "* return output_jump(GET_CODE (operands[0]), 1, get_attr_length(insn));"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 1)
+						      (pc))
+					       (const_int MIN_BRANCH))
+					   (gt (minus (match_dup 1)
+						      (pc))
+					       (const_int MAX_BRANCH)))
+				      (const_int 6)
+				      (const_int 2)))])
+
+;; Move instructions
+
+(define_insn "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,g")
+	(match_operand:DI 1 "general_operand" "rN,g"))]
+  ""
+  "* return output_move_multiple (operands);"
+;; what's the mose expensive code - say twice movsi = 16
+  [(set_attr "length" "16,32")])
+
+(define_insn "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,g,g")
+	(match_operand:SI 1 "general_operand" "rN,IJ,IJ,g"))]
+  ""
+  "* return output_move_multiple (operands);"
+;; what's the most expensive code ? - I think 8!
+;; we could split it up and make several sub-cases...
+  [(set_attr "length" "4,6,8,16")])
+
+(define_insn "mov<mode>"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(match_operand:PDPint 1 "general_operand" "rRN,Qi,rRN,Qi"))]
+  ""
+  "*
+{
+  if (operands[1] == const0_rtx)
+    return \"clr<PDPint:isfx> %0\";
+
+  return \"mov<PDPint:isfx> %1, %0\";
+}"
+  [(set_attr "length" "2,4,4,6")])
+
+(define_insn "movdf"
+  [(set (match_operand:DF 0 "float_nonimm_operand" "=a,fR,a,Q,g")
+        (match_operand:DF 1 "float_operand" "fR,a,FQ,a,g"))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0 || which_alternative == 2)
+       return \"ldd %1, %0\";
+     else if (which_alternative == 1 || which_alternative == 3)
+       return \"std %1, %0\";
+     else 
+       return output_move_multiple (operands); "
+;; last one is worst-case
+  [(set_attr "length" "2,2,4,4,24")])
+
+(define_insn "movsf"
+  [(set (match_operand:SF 0 "float_nonimm_operand" "=a,fR,a,Q,g")
+        (match_operand:SF 1 "float_operand" "fR,a,FQ,a,g"))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0 || which_alternative == 2)
+       return \"{ldcfd|movof} %1, %0\";
+     else if (which_alternative == 1 || which_alternative == 3)
+       return \"{stcdf|movfo} %1, %0\";
+     else 
+       return output_move_multiple (operands); "
+;; last one is worst-case
+  [(set_attr "length" "2,2,4,4,12")])
+
+;; maybe fiddle a bit with move_ratio, then 
+;; let constraints only accept a register ...
+
+(define_expand "movmemhi"
+  [(parallel [(set (match_operand:BLK 0 "general_operand" "=g,g")
+		   (match_operand:BLK 1 "general_operand" "g,g"))
+	      (use (match_operand:HI 2 "general_operand" "n,mr"))
+	      (use (match_operand:HI 3 "immediate_operand" "i,i"))
+	      (clobber (match_scratch:HI 4 "=&r,X"))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))
+	      (clobber (match_dup 2))])]
+  "(TARGET_BCOPY_BUILTIN)"
+  "
+{
+  operands[0]
+    = replace_equiv_address (operands[0],
+			     copy_to_mode_reg (Pmode, XEXP (operands[0], 0)));
+  operands[1]
+    = replace_equiv_address (operands[1],
+			     copy_to_mode_reg (Pmode, XEXP (operands[1], 0)));
+
+  operands[5] = XEXP (operands[0], 0);
+  operands[6] = XEXP (operands[1], 0);
+}")
+
+
+(define_insn "movmemhi1"
+  [(set (mem:BLK (match_operand:HI 0 "register_operand" "r,r"))
+	(mem:BLK (match_operand:HI 1 "register_operand" "r,r")))
+   (use (match_operand:HI 2 "general_operand" "n,r"))
+   (use (match_operand:HI 3 "immediate_operand" "i,i"))
+   (clobber (match_scratch:HI 4 "=&r,X"))
+   (clobber (match_dup 0))
+   (clobber (match_dup 1))
+   (clobber (match_dup 2))]
+  "(TARGET_BCOPY_BUILTIN)"
+  "* return output_block_move (operands);"
+;;; just a guess
+  [(set_attr "length" "80")])
+   
+
+
+;;- truncation instructions
+
+(define_insn  "truncdfsf2"
+  [(set (match_operand:SF 0 "float_nonimm_operand" "=f,R,Q")
+	(float_truncate:SF (match_operand:DF 1 "register_operand" "f,a,a")))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0)
+     {
+       return \"\";
+     }
+     else if (which_alternative == 1)
+       return \"{stcdf|movfo} %1, %0\";
+     else 
+       return \"{stcdf|movfo} %1, %0\";
+  "
+  [(set_attr "length" "0,2,4")])
+
+
+(define_expand "truncsihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(subreg:HI 
+	  (match_operand:SI 1 "general_operand" "or")
+          0))]
+  ""
+  "")
+
+
+;;- zero extension instructions
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(zero_extend:HI (match_operand:QI 1 "general_operand" "0,0")))]
+  ""
+  "bic $0177400, %0"
+  [(set_attr "length" "4,6")])
+			 
+(define_expand "zero_extendhisi2"
+  [(set (subreg:HI 
+          (match_dup 0)
+          2)
+        (match_operand:HI 1 "register_operand" "r"))
+   (set (subreg:HI 
+          (match_operand:SI 0 "register_operand" "=r")
+          0)
+        (const_int 0))]
+  ""
+  "/* operands[1] = make_safe_from (operands[1], operands[0]); */")
+
+
+;;- sign extension instructions
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=f,a,a")
+	(float_extend:DF (match_operand:SF 1 "float_operand" "f,R,Q")))]
+  "TARGET_FPU"
+  "@
+   /* nothing */
+   {ldcfd|movof} %1, %0
+   {ldcfd|movof} %1, %0"
+  [(set_attr "length" "0,2,4")])
+
+;; does movb sign extend in register-to-register move?
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_operand" "rR,Q")))]
+  ""
+  "movb %1, %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_operand" "rR,Q")))]
+  "TARGET_40_PLUS"
+  "*
+{
+  rtx latehalf[2];
+
+  /* make register pair available */
+  latehalf[0] = operands[0];
+  operands[0] = gen_rtx_REG (HImode, REGNO (operands[0])+ 1);
+
+  output_asm_insn(\"movb %1, %0\", operands);
+  output_asm_insn(\"sxt %0\", latehalf);
+    
+  return \"\";
+}"
+  [(set_attr "length" "4,6")])
+
+;; maybe we have to use define_expand to say that we have the instruction,
+;; unconditionally, and then match dependent on CPU type:
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extend:SI (match_operand:HI 1 "general_operand" "g")))]
+  ""
+  "")
+  
+(define_insn "" ; "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=o,<,r")
+	(sign_extend:SI (match_operand:HI 1 "general_operand" "g,g,g")))]
+  "TARGET_40_PLUS"
+  "*
+{
+  rtx latehalf[2];
+
+  /* we don't want to mess with auto increment */
+  
+  switch (which_alternative)
+  {
+    case 0:
+
+      latehalf[0] = operands[0];
+      operands[0] = adjust_address(operands[0], HImode, 2);
+  
+      output_asm_insn(\"mov %1, %0\", operands);
+      output_asm_insn(\"sxt %0\", latehalf);
+
+      return \"\";
+
+    case 1:
+
+      /* - auto-decrement - right direction ;-) */
+      output_asm_insn(\"mov %1, %0\", operands);
+      output_asm_insn(\"sxt %0\", operands);
+
+      return \"\";
+
+    case 2:
+
+      /* make register pair available */
+      latehalf[0] = operands[0];
+      operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+
+      output_asm_insn(\"mov %1, %0\", operands);
+      output_asm_insn(\"sxt %0\", latehalf);
+
+      return \"\";
+
+    default:
+
+      gcc_unreachable ();
+  }
+}"
+  [(set_attr "length" "10,6,6")])
+
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "general_operand" "0")))]
+  "(! TARGET_40_PLUS)"
+  "*
+{
+  static int count = 0;
+  char buf[100];
+  rtx lateoperands[2];
+
+  lateoperands[0] = operands[0];
+  operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+
+  output_asm_insn(\"tst %0\", operands);
+  sprintf(buf, \"bge extendhisi%d\", count);
+  output_asm_insn(buf, NULL);
+  output_asm_insn(\"mov -1, %0\", lateoperands);
+  sprintf(buf, \"bne extendhisi%d\", count+1);
+  output_asm_insn(buf, NULL);
+  sprintf(buf, \"\\nextendhisi%d:\", count);
+  output_asm_insn(buf, NULL);
+  output_asm_insn(\"clr %0\", lateoperands);
+  sprintf(buf, \"\\nextendhisi%d:\", count+1);
+  output_asm_insn(buf, NULL);
+
+  count += 2;
+
+  return \"\";
+}"
+  [(set_attr "length" "12")])
+
+;; make float to int and vice versa 
+;; using the cc_status.flag field we could probably cut down
+;; on seti and setl
+;; assume that we are normally in double and integer mode -
+;; what do pdp library routines do to fpu mode ?
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,a,a")
+	(float:DF (match_operand:SI 1 "general_operand" "r,R,Q")))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0)
+     {
+       rtx latehalf[2];
+
+       latehalf[0] = NULL; 
+       latehalf[1] = gen_rtx_REG (HImode, REGNO (operands[1]) + 1);
+       output_asm_insn(\"mov %1, -(sp)\", latehalf);
+       output_asm_insn(\"mov %1, -(sp)\", operands);
+       
+       output_asm_insn(\"setl\", operands);
+       output_asm_insn(\"{ldcld|movif} (sp)+, %0\", operands);
+       output_asm_insn(\"seti\", operands);
+       return \"\";
+     }
+     else if (which_alternative == 1)
+       return \"setl\;{ldcld|movif} %1, %0\;seti\";
+     else 
+       return \"setl\;{ldcld|movif} %1, %0\;seti\";
+  "
+  [(set_attr "length" "10,6,8")])
+
+(define_insn "floathidf2"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(float:DF (match_operand:HI 1 "general_operand" "rR,Qi")))]
+  "TARGET_FPU"
+  "{ldcid|movif} %1, %0"
+  [(set_attr "length" "2,4")])
+	
+;; cut float to int
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,R,Q")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "a,a,a"))))]
+  "TARGET_FPU"
+  "* if (which_alternative ==0)
+     {
+       output_asm_insn(\"setl\", operands);
+       output_asm_insn(\"{stcdl|movfi} %1, -(sp)\", operands);
+       output_asm_insn(\"seti\", operands);
+       output_asm_insn(\"mov (sp)+, %0\", operands);
+       operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+       output_asm_insn(\"mov (sp)+, %0\", operands);
+       return \"\";
+     }
+     else if (which_alternative == 1)
+       return \"setl\;{stcdl|movfi} %1, %0\;seti\";
+     else 
+       return \"setl\;{stcdl|movfi} %1, %0\;seti\";
+  "
+  [(set_attr "length" "10,6,8")])
+
+(define_insn "fix_truncdfhi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(fix:HI (fix:DF (match_operand:DF 1 "register_operand" "a,a"))))]
+  "TARGET_FPU"
+  "{stcdi|movfi} %1, %0"
+  [(set_attr "length" "2,4")])
+
+
+;;- arithmetic instructions
+;;- add instructions
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(plus:DF (match_operand:DF 1 "register_operand" "%0,0")
+		 (match_operand:DF 2 "general_operand" "fR,QF")))]
+  "TARGET_FPU"
+  "{addd|addf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,o")
+	(plus:DI (match_operand:DI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[4][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"add %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[2][1]) || INTVAL (exops[2][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[2]);
+    output_asm_insn (\"adc %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[3][1]) || INTVAL (exops[3][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[3]);
+    output_asm_insn (\"adc %0\", exops[2]);
+    output_asm_insn (\"adc %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "20,28,40,48")])
+
+;; Note that the register operand is not marked earlyclobber.
+;; The reason is that SI values go in register pairs, so they
+;; can't partially overlap.  They can be either disjoint, or
+;; source and destination can be equal.  The latter case is 
+;; handled properly because of the ordering of the individual
+;; instructions used.  Specifically, carry from the low to the
+;; high word is added at the end, so the adding of the high parts
+;; will always used the original high part and not a high part
+;; modified by carry (which would amount to double carry).
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,o,o")
+	(plus:SI (match_operand:SI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:SI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[2][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"add %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"add %1, %0\", exops[1]);
+    output_asm_insn (\"adc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "6,10,12,16")])
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(plus:HI (match_operand:HI 1 "general_operand" "%0,0,0,0")
+		 (match_operand:HI 2 "general_operand" "rRLM,Qi,rRLM,Qi")))]
+  ""
+  "*
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL(operands[2]) == 1)
+	return \"inc %0\";
+      else if (INTVAL(operands[2]) == -1)
+        return \"dec %0\";
+    }
+
+  return \"add %2, %0\";
+}"
+  [(set_attr "length" "2,4,4,6")])
+
+
+;;- subtract instructions
+;; we don't have to care for constant second 
+;; args, since they are canonical plus:xx now!
+;; also for minus:DF ??
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(minus:DF (match_operand:DF 1 "register_operand" "0,0")
+		  (match_operand:DF 2 "general_operand" "fR,Q")))]
+  "TARGET_FPU"
+  "{subd|subf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,o")
+	(minus:DI (match_operand:DI 1 "general_operand" "0,0,0,0")
+		 (match_operand:DI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[4][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"sub %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[2][1]) || INTVAL (exops[2][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[2]);
+    output_asm_insn (\"sbc %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+  if (!CONSTANT_P (exops[3][1]) || INTVAL (exops[3][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[3]);
+    output_asm_insn (\"sbc %0\", exops[2]);
+    output_asm_insn (\"sbc %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "20,28,40,48")])
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,o,o")
+	(minus:SI (match_operand:SI 1 "general_operand" "0,0,0,0")
+		 (match_operand:SI 2 "general_operand" "r,on,r,on")))]
+  ""
+  "*
+{
+  rtx inops[2];
+  rtx exops[2][2];
+  
+  inops[0] = operands[0];
+  inops[1] = operands[2];
+  pdp11_expand_operands (inops, exops, 2, NULL, either);
+  
+  if (!CONSTANT_P (exops[0][1]) || INTVAL (exops[0][1]) != 0)
+    output_asm_insn (\"sub %1, %0\", exops[0]);
+  if (!CONSTANT_P (exops[1][1]) || INTVAL (exops[1][1]) != 0)
+  {
+    output_asm_insn (\"sub %1, %0\", exops[1]);
+    output_asm_insn (\"sbc %0\", exops[0]);
+  }
+
+  return \"\";
+}"
+  [(set_attr "length" "6,10,12,16")])
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(minus:HI (match_operand:HI 1 "general_operand" "0,0,0,0")
+		  (match_operand:HI 2 "general_operand" "rR,Qi,rR,Qi")))]
+  ""
+  "*
+{
+  gcc_assert (GET_CODE (operands[2]) != CONST_INT);
+
+  return \"sub %2, %0\";
+}"
+  [(set_attr "length" "2,4,4,6")])
+
+;;;;- and instructions
+;; Bit-and on the pdp (like on the VAX) is done with a clear-bits insn.
+
+(define_expand "and<mode>3"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "")
+	(and:PDPint (not:PDPint (match_operand:PDPint 1 "general_operand" ""))
+		   (match_operand:PDPint 2 "general_operand" "")))]
+  ""
+  "
+{
+  rtx op1 = operands[1];
+
+  /* If there is a constant argument, complement that one.
+     Similarly, if one of the inputs is the same as the output,
+     complement the other input.  */
+  if ((CONST_INT_P (operands[2]) && ! CONST_INT_P (op1)) ||
+      rtx_equal_p (operands[0], operands[1]))
+    {
+      operands[1] = operands[2];
+      operands[2] = op1;
+      op1 = operands[1];
+    }
+
+  if (CONST_INT_P (op1))
+    operands[1] = GEN_INT (~INTVAL (op1));
+  else
+    operands[1] = expand_unop (<MODE>mode, one_cmpl_optab, op1, 0, 1);
+}")
+
+(define_insn "*bic<mode>"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(and:PDPint
+	     (not: PDPint (match_operand:PDPint 1 "general_operand" "rR,Qi,rR,Qi"))
+	     (match_operand:PDPint 2 "general_operand" "0,0,0,0")))]
+  ""
+  "bic<PDPint:isfx> %1, %0"
+  [(set_attr "length" "2,4,4,6")])
+
+;;- Bit set (inclusive or) instructions
+(define_insn "ior<mode>3"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,rR,Q,Q")
+	(ior:PDPint (match_operand:PDPint 1 "general_operand" "%0,0,0,0")
+		(match_operand:PDPint 2 "general_operand" "rR,Qi,rR,Qi")))]
+  ""
+  "bis<PDPint:isfx> %2, %0"
+  [(set_attr "length" "2,4,4,6")])
+
+;;- xor instructions
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(xor:HI (match_operand:HI 1 "general_operand" "%0,0")
+		(match_operand:HI 2 "register_operand" "r,r")))]
+  "TARGET_40_PLUS"
+  "xor %2, %0"
+  [(set_attr "length" "2,4")])
+
+;;- one complement instructions
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,Q")
+        (not:PDPint (match_operand:PDPint 1 "general_operand" "0,0")))]
+  ""
+  "com<PDPint:isfx> %0"
+  [(set_attr "length" "2,4")])
+
+;;- arithmetic shift instructions
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0,0")
+		   (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "ashc %2,%0"
+  [(set_attr "length" "2,4")])
+
+;; Arithmetic right shift on the pdp works by negating the shift count.
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  operands[2] = negate_rtx (HImode, operands[2]);
+}")
+
+;; define asl aslb asr asrb - ashc missing!
+
+;; asl 
+(define_insn "" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(ashift:HI (match_operand:HI 1 "general_operand" "0,0")
+		   (const_int 1)))]
+  ""
+  "asl %0"
+  [(set_attr "length" "2,4")])
+
+;; and another possibility for asr is << -1
+;; might cause problems since -1 can also be encoded as 65535!
+;; not in gcc2 ??? 
+
+;; asr
+(define_insn "" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(ashift:HI (match_operand:HI 1 "general_operand" "0,0")
+		   (const_int -1)))]
+  ""
+  "asr %0"
+  [(set_attr "length" "2,4")])
+
+;; lsr
+(define_insn "lsrhi1" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+	(lshiftrt:HI (match_operand:HI 1 "general_operand" "0,0")
+		   (const_int 1)))]
+  ""
+  "clc\;ror %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "lsrsi1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "general_operand" "0")
+                   (const_int 1)))]
+  ""
+{
+
+  rtx lateoperands[2];
+
+  lateoperands[0] = operands[0];
+  operands[0] = gen_rtx_REG (HImode, REGNO (operands[0]) + 1);
+
+  lateoperands[1] = operands[1];
+  operands[1] = gen_rtx_REG (HImode, REGNO (operands[1]) + 1);
+
+  output_asm_insn (\"clc\", operands);
+  output_asm_insn (\"ror %0\", lateoperands);
+  output_asm_insn (\"ror %0\", operands);
+
+  return \"\";
+}
+  [(set_attr "length" "10")])
+
+(define_expand "lshrsi3"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "0")
+   (match_operand:HI 2 "general_operand" "")]
+  ""
+  "
+{
+  rtx r;
+
+  if (!TARGET_40_PLUS &&
+      (GET_CODE (operands[2]) != CONST_INT ||
+       (unsigned) INTVAL (operands[2]) > 3))
+    FAIL;
+  emit_insn (gen_lsrsi1 (operands[0], operands[1]));
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      r = gen_reg_rtx (HImode);
+      emit_insn (gen_addhi3 (r, operands [2], GEN_INT (-1)));
+      emit_insn (gen_ashrsi3 (operands[0], operands[0], r));
+    }
+  else if ((unsigned) INTVAL (operands[2]) != 1)
+    {
+      emit_insn (gen_ashlsi3 (operands[0], operands[0],
+                              GEN_INT (1 - INTVAL (operands[2]))));
+    }
+  DONE;
+}
+"
+)
+
+;; shift is by arbitrary count is expensive, 
+;; shift by one cheap - so let's do that, if
+;; space doesn't matter
+(define_insn "" 
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "general_operand" "0")
+		   (match_operand:HI 2 "expand_shift_operand" "O")))]
+  "! optimize_size"
+  "*
+{
+  register int i;
+
+  for (i = 1; i <= abs(INTVAL(operands[2])); i++)
+    if (INTVAL(operands[2]) < 0)
+      output_asm_insn(\"asr %0\", operands);
+    else
+      output_asm_insn(\"asl %0\", operands);
+      
+  return \"\";
+}"
+;; longest is 4
+  [(set (attr "length") (const_int 8))])
+
+;; aslb
+(define_insn "" 
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,o")
+	(ashift:QI (match_operand:QI 1 "general_operand" "0,0")
+		   (match_operand:HI 2 "const_int_operand" "n,n")))]
+  ""
+  "*
+{ /* allowing predec or post_inc is possible, but hairy! */
+  int i, cnt;
+
+  cnt = INTVAL(operands[2]) & 0x0007;
+
+  for (i=0 ; i < cnt ; i++)
+       output_asm_insn(\"aslb %0\", operands);
+
+  return \"\";
+}"
+;; set attribute length ( match_dup 2 & 7 ) *(1 or 2) !!!
+  [(set_attr_alternative "length" 
+                         [(const_int 14)
+                          (const_int 28)])])
+
+;;; asr 
+;(define_insn "" 
+;  [(set (match_operand:HI 0 "nonimmediate_operand" "=rR,Q")
+;	(ashiftrt:HI (match_operand:HI 1 "general_operand" "0,0")
+;		     (const_int 1)))]
+;  ""
+;  "asr %0"
+;  [(set_attr "length" "2,4")])
+
+;; asrb
+(define_insn "" 
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,o")
+	(ashiftrt:QI (match_operand:QI 1 "general_operand" "0,0")
+		     (match_operand:HI 2 "const_int_operand" "n,n")))]
+  ""
+  "*
+{ /* allowing predec or post_inc is possible, but hairy! */
+  int i, cnt;
+
+  cnt = INTVAL(operands[2]) & 0x0007;
+
+  for (i=0 ; i < cnt ; i++)
+       output_asm_insn(\"asrb %0\", operands);
+
+  return \"\";
+}"
+  [(set_attr_alternative "length" 
+                         [(const_int 14)
+                          (const_int 28)])])
+
+;; the following is invalid - too complex!!! - just say 14 !!!
+;  [(set (attr "length") (plus (and (match_dup 2)
+;                                   (const_int 14))
+;                              (and (match_dup 2)
+;                                   (const_int 14))))])
+
+
+
+;; can we get +-1 in the next pattern? should 
+;; have been caught by previous patterns!
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0")
+		   (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "*
+{
+  if (GET_CODE(operands[2]) == CONST_INT)
+    {
+      if (INTVAL(operands[2]) == 1)
+	return \"asl %0\";
+      else if (INTVAL(operands[2]) == -1)
+	return \"asr %0\";
+    }
+
+  return \"ash %2,%0\";
+}"
+  [(set_attr "length" "2,4")])
+
+;; Arithmetic right shift on the pdp works by negating the shift count.
+(define_expand "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  operands[2] = negate_rtx (HImode, operands[2]);
+}")
+
+(define_expand "lshrhi3"
+  [(match_operand:HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:HI 2 "general_operand" "")]
+  ""
+  "
+{
+  rtx r;
+
+  if (!TARGET_40_PLUS &&
+      (GET_CODE (operands[2]) != CONST_INT ||
+       (unsigned) INTVAL (operands[2]) > 3))
+    FAIL;
+  emit_insn (gen_lsrhi1 (operands[0], operands[1]));
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      r = gen_reg_rtx (HImode);
+      emit_insn (gen_addhi3 (r, operands [2], GEN_INT (-1)));
+      emit_insn (gen_ashrhi3 (operands[0], operands[0], r));
+    }
+  else if ((unsigned) INTVAL (operands[2]) != 1)
+    {
+      emit_insn (gen_ashlhi3 (operands[0], operands[0],
+                              GEN_INT (1 - INTVAL (operands[2]))));
+    }
+  DONE;
+}
+"
+)
+
+;; absolute 
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=fR,Q")
+	(abs:DF (match_operand:DF 1 "general_operand" "0,0")))]
+  "TARGET_FPU"
+  "{absd|absf} %0"
+  [(set_attr "length" "2,4")])
+
+
+;; negate insns
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "float_nonimm_operand" "=fR,Q")
+	(neg:DF (match_operand:DF 1 "register_operand" "0,0")))]
+  "TARGET_FPU"
+  "{negd|negf} %0"
+  [(set_attr "length" "2,4")])
+
+(define_insn "negdi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+	(neg:DI (match_operand:DI 1 "general_operand" "0,0")))]
+  ""
+{
+  rtx exops[4][2];
+  
+  pdp11_expand_operands (operands, exops, 1, NULL, either);
+
+  output_asm_insn (\"com %0\", exops[3]);
+  output_asm_insn (\"com %0\", exops[2]);
+  output_asm_insn (\"com %0\", exops[1]);
+  output_asm_insn (\"com %0\", exops[0]);
+  output_asm_insn (\"add $1, %0\", exops[3]);
+  output_asm_insn (\"adc %0\", exops[2]);
+  output_asm_insn (\"adc %0\", exops[1]);
+  output_asm_insn (\"adc %0\", exops[0]);
+
+  return \"\";
+}
+[(set_attr "length" "18,34")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,o")
+	(neg:SI (match_operand:SI 1 "general_operand" "0,0")))]
+  ""
+{
+  rtx exops[2][2];
+  
+  pdp11_expand_operands (operands, exops, 1, NULL, either);
+
+  output_asm_insn (\"com %0\", exops[1]);
+  output_asm_insn (\"com %0\", exops[0]);
+  output_asm_insn (\"add $1, %0\", exops[1]);
+  output_asm_insn (\"adc %0\", exops[0]);
+
+  return \"\";
+}
+[(set_attr "length" "12,20")])
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:PDPint 0 "nonimmediate_operand" "=rR,Q")
+	(neg:PDPint (match_operand:PDPint 1 "general_operand" "0,0")))]
+  ""
+  "neg<isfx> %0"
+  [(set_attr "length" "2,4")])
+
+
+;; Unconditional and other jump instructions
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+ if (get_attr_length (insn) == 2)
+    return \"br %l0\";
+ return \"jmp %l0\";
+}"
+  [(set (attr "length") (if_then_else (ior (lt (minus (match_dup 0)
+						      (pc))
+					       (const_int MIN_BRANCH))
+					   (gt (minus (match_dup 0)
+						      (pc))
+					       (const_int MAX_BRANCH)))
+				      (const_int 4)
+				      (const_int 2)))])
+
+(define_insn ""
+  [(set (pc)
+    (label_ref (match_operand 0 "" "")))
+   (clobber (const_int 1))]
+  ""
+  "jmp %l0"
+  [(set_attr "length" "4")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:HI 0 "general_operand" "r,R,Q"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "@
+  jmp (%0)
+  jmp %@%0
+  jmp %@%0"
+  [(set_attr "length" "2,2,4")])
+
+;; indirect jump - let's be conservative!
+;; allow only register_operand, even though we could also 
+;; allow labels etc.
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "jmp (%0)")
+
+;;- jump to subroutine
+
+(define_insn "call"
+  [(call (match_operand:HI 0 "general_operand" "rR,Q")
+	 (match_operand:HI 1 "general_operand" "g,g"))
+;;   (use (reg:HI 0)) what was that ???
+  ]
+  ;;- Don't use operand 1 for most machines.
+  ""
+  "jsr pc, %0"
+  [(set_attr "length" "2,4")])
+
+;;- jump to subroutine
+(define_insn "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:HI 1 "general_operand" "rR,Q")
+	      (match_operand:HI 2 "general_operand" "g,g")))
+;;   (use (reg:HI 0)) - what was that ????
+  ]
+  ;;- Don't use operand 2 for most machines.
+  ""
+  "jsr pc, %1"
+  [(set_attr "length" "2,4")])
+
+;;- nop instruction
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+
+;;- multiply 
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(mult:DF (match_operand:DF 1 "register_operand" "%0,0")
+		 (match_operand:DF 2 "float_operand" "fR,QF")))]
+  "TARGET_FPU"
+  "{muld|mulf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+;; 16 bit result multiply:
+;; currently we multiply only into odd registers, so we don't use two 
+;; registers - but this is a bit inefficient at times. If we define 
+;; a register class for each register, then we can specify properly 
+;; which register need which scratch register ....
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,d") ; multiply regs
+	(mult:HI (match_operand:HI 1 "register_operand" "%0,0")
+		 (match_operand:HI 2 "float_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "mul %2, %0"
+  [(set_attr "length" "2,4")])
+
+;; 32 bit result
+(define_expand "mulhisi3"
+  [(set (match_dup 3)
+	(match_operand:HI 1 "nonimmediate_operand" "g,g"))
+   (set (match_operand:SI 0 "register_operand" "=r,r") ; even numbered!
+	(mult:SI (truncate:HI 
+                  (match_dup 0))
+		 (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "operands[3] = gen_lowpart(HImode, operands[1]);")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r") ; even numbered!
+	(mult:SI (truncate:HI 
+                  (match_operand:SI 1 "register_operand" "%0,0"))
+		 (match_operand:HI 2 "general_operand" "rR,Qi")))]
+  "TARGET_40_PLUS"
+  "mul %2, %0"
+  [(set_attr "length" "2,4")])
+
+;(define_insn "mulhisi3"
+;  [(set (match_operand:SI 0 "register_operand" "=r,r") ; even numbered!
+;	(mult:SI (truncate:HI 
+;                  (match_operand:SI 1 "register_operand" "%0,0"))
+;		 (match_operand:HI 2 "general_operand" "rR,Qi")))]
+;  "TARGET_40_PLUS"
+;  "mul %2, %0"
+;  [(set_attr "length" "2,4")])
+
+;;- divide
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=a,a")
+	(div:DF (match_operand:DF 1 "register_operand" "0,0")
+		(match_operand:DF 2 "general_operand" "fR,QF")))]
+  "TARGET_FPU"
+  "{divd|divf} %2, %0"
+  [(set_attr "length" "2,4")])
+
+	 
+(define_expand "divhi3"
+  [(set (subreg:HI (match_dup 1) 0)
+	(div:HI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))
+   (set (match_operand:HI 0 "register_operand" "=r")
+        (subreg:HI (match_dup 1) 0))]
+  "TARGET_40_PLUS"
+  "")
+
+(define_insn ""
+  [(set (subreg:HI (match_operand:SI 0 "register_operand" "=r") 0)
+	(div:HI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))]
+  "TARGET_40_PLUS"
+  "div %2,%0"
+  [(set_attr "length" "4")])
+
+(define_expand "modhi3"
+  [(set (subreg:HI (match_dup 1) 2)
+	(mod:HI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))
+   (set (match_operand:HI 0 "register_operand" "=r")
+        (subreg:HI (match_dup 1) 2))]
+  "TARGET_40_PLUS"
+  "")
+
+(define_insn ""
+  [(set (subreg:HI (match_operand:SI 0 "register_operand" "=r") 2)
+	(mod:HI (match_operand:SI 1 "general_operand" "0")
+		(match_operand:HI 2 "general_operand" "g")))]
+  "TARGET_40_PLUS"
+  "div %2,%0"
+  [(set_attr "length" "4")])
+
+;(define_expand "divmodhi4"
+;  [(parallel [(set (subreg:HI (match_dup 1) 0)
+;	           (div:HI (match_operand:SI 1 "register_operand" "0")
+;		           (match_operand:HI 2 "general_operand" "g")))
+;              (set (subreg:HI (match_dup 1) 2)
+;	           (mod:HI (match_dup 1)
+;		           (match_dup 2)))])
+;   (set (match_operand:HI 3 "register_operand" "=r")
+;        (subreg:HI (match_dup 1) 2))
+;   (set (match_operand:HI 0 "register_operand" "=r")
+;        (subreg:HI (match_dup 1) 0))]
+;  "TARGET_40_PLUS"
+;  "")
+;
+;(define_insn ""
+;  [(set (subreg:HI (match_operand:SI 0 "register_operand" "=r") 0)
+;	           (div:HI (match_operand:SI 1 "general_operand" "0")
+;		           (match_operand:HI 2 "general_operand" "g")))
+;   (set (subreg:HI (match_dup 0) 2)
+;	           (mod:HI (match_dup 1)
+;		           (match_dup 2)))]
+;  "TARGET_40_PLUS"
+;  "div %2, %0")
+;
+   
+;; is rotate doing the right thing to be included here ????
diff --git a/gcc/config/pdp11/pdp11.opt b/gcc/config/pdp11/pdp11.opt
new file mode 100644
index 000000000..9c427a3f8
--- /dev/null
+++ b/gcc/config/pdp11/pdp11.opt
@@ -0,0 +1,87 @@
+; Options for the PDP11 port of the compiler.
+
+; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m10
+Target RejectNegative
+Generate code for an 11/10
+
+m40
+Target Report Mask(40)
+Generate code for an 11/40
+
+m45
+Target Report Mask(45)
+Generate code for an 11/45
+
+mac0
+Target Report Mask(AC0)
+Return floating-point results in ac0 (fr0 in Unix assembler syntax)
+
+mbcopy
+Target RejectNegative Report Mask(BCOPY)
+Do not use inline patterns for copying memory
+
+mbcopy-builtin
+Target RejectNegative Report InverseMask(BCOPY, BCOPY_BUILTIN)
+Use inline patterns for copying memory
+
+mbranch-cheap
+Target RejectNegative Report InverseMask(BRANCH_EXPENSIVE, BRANCH_CHEAP)
+Do not pretend that branches are expensive
+
+mbranch-expensive
+Target RejectNegative Report Mask(BRANCH_EXPENSIVE)
+Pretend that branches are expensive
+
+mdec-asm
+Target RejectNegative Report InverseMask(UNIX_ASM)
+Use the DEC assembler syntax
+
+mfloat32
+Target Report Mask(FLOAT32)
+Use 32 bit float
+
+mfloat64
+Target Report InverseMask(FLOAT32, FLOAT64)
+Use 64 bit float
+
+mfpu
+Target RejectNegative Report Mask(FPU)
+Use hardware floating point
+
+mint16
+Target Report InverseMask(INT32, INT16)
+Use 16 bit int
+
+mint32
+Target Report Mask(INT32)
+Use 32 bit int
+
+msoft-float
+Target RejectNegative Report InverseMask(FPU, SOFT_FLOAT)
+Do not use hardware floating point
+
+msplit
+Target Report Mask(SPLIT)
+Target has split I&D
+
+munix-asm
+Target RejectNegative Report Mask(UNIX_ASM)
+Use UNIX assembler syntax
diff --git a/gcc/config/pdp11/predicates.md b/gcc/config/pdp11/predicates.md
new file mode 100644
index 000000000..8b24ba4fc
--- /dev/null
+++ b/gcc/config/pdp11/predicates.md
@@ -0,0 +1,55 @@
+;;- Predicate definitions for the pdp11 for GNU C compiler
+;; Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2004, 2005
+;; 2007, 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Match CONST_DOUBLE zero for tstd/tstf.
+(define_predicate "register_or_const0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+;; Accept integer arguments in the range -4..-2 and 2..4, which are the
+;; shift counts for which we unroll a shift.  This matches the rule for
+;; the "O" constraint.
+(define_predicate "expand_shift_operand"
+  (match_code "const_int")
+{
+  int sh;
+
+  sh = INTVAL (op);
+  return (abs (sh) > 1 && abs (sh) <= 4);
+})
+
+;; Accept anything general_operand accepts, except that registers must
+;; be FPU registers.
+(define_predicate "float_operand"
+  (if_then_else (match_code "reg")
+		(ior 
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == LOAD_FPU_REGS")
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == NO_LOAD_FPU_REGS"))
+		(match_test "general_operand (op, mode)")))
+
+;; Accept anything nonimmediate_operand accepts, except that registers must
+;; be FPU registers.
+(define_predicate "float_nonimm_operand"
+  (if_then_else (match_code "reg")
+		(ior 
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == LOAD_FPU_REGS")
+		 (match_test "REGNO_REG_CLASS (REGNO (op)) == NO_LOAD_FPU_REGS"))
+		(match_test "nonimmediate_operand (op, mode)")))
diff --git a/gcc/config/pdp11/t-pdp11 b/gcc/config/pdp11/t-pdp11
new file mode 100644
index 000000000..67441a0d2
--- /dev/null
+++ b/gcc/config/pdp11/t-pdp11
@@ -0,0 +1,44 @@
+# Copyright (C) 1995, 1997, 1998, 2001, 2002,
+# 2004, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TARGET_LIBGCC2_CFLAGS = -O2 -mfloat32
+LIB2FUNCS_EXTRA = $(srcdir)/config/udivmod.c $(srcdir)/config/udivmodsi4.c \
+    $(srcdir)/config/memcmp.c $(srcdir)/config/memcpy.c \
+    $(srcdir)/config/memmove.c $(srcdir)/config/memset.c
+# floating point emulation libraries
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT'                            > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c                   >> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+MULTILIB_OPTIONS = msoft-float
+
+# Because the pdp11 POINTER_SIZE is only 16, in dwarf2out.c,
+# DWARF_ARANGES_PAD_SIZE is 0, thus a loop in output_aranges that checks
+# (i < (unsigned) DWARF_ARANGES_PAD_SIZE) elicits a warning that the
+# comparison is always false.
+# We could say "-Werror -Wno-error=type-limits", alas, not all supported
+# gcc bootstrap compilers support the latter option.
+dwarf2out.o-warn = -Wno-error
diff --git a/gcc/config/picochip/constraints.md b/gcc/config/picochip/constraints.md
new file mode 100644
index 000000000..5c3fb3f50
--- /dev/null
+++ b/gcc/config/picochip/constraints.md
@@ -0,0 +1,64 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constraint "I"
+ "4-bits signed value"
+ (and (match_code "const_int")
+      (match_test " ival >= -8 && ival< 8")))
+
+(define_constraint "J"
+ "4-bits unsigned value"
+ (and (match_code "const_int")
+      (match_test "ival>=0 && ival < 16")))
+
+(define_constraint "K"
+ "8-bits signed value"
+ (and (match_code "const_int")
+      (match_test " ival >= -128 && ival < 128")))
+
+(define_constraint "M"
+ "4-bits magnitude"
+ (and (match_code "const_int")
+      (match_test " abs(ival) < 16")))
+
+(define_constraint "N"
+ "10-bits signed value"
+ (and (match_code "const_int")
+      (match_test "ival >= -512 && ival < 512")))
+
+(define_constraint "O"
+ "16-bits signed value"
+ (and (match_code "const_int")
+      (match_test " ival >= -32768 && ival < 32768 ")))
+
+(define_constraint "a"
+ "See if this is an absolute address in memory"
+  (and (match_code "mem")
+       (match_test "picochip_absolute_memory_operand(op,mode) == 1")))
+
+(define_register_constraint "k" "FRAME_REGS"
+  "Frame regs")
+(define_register_constraint "f" "PTR_REGS"
+  "Pointer regs")
+(define_register_constraint "t" "TWIN_REGS"
+  "Twin regs")
+
diff --git a/gcc/config/picochip/dfa_space.md b/gcc/config/picochip/dfa_space.md
new file mode 100644
index 000000000..cfd38148a
--- /dev/null
+++ b/gcc/config/picochip/dfa_space.md
@@ -0,0 +1,43 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following DFA description schedules instructions for space.  The
+;; schedule seeks to avoid stall cycles (e.g., memory load), but the
+;; instructions are not VLIW packed (whenever instructions are packed
+;; together, an additional byte is used to denote this, which
+;; increases the code size).
+
+;; No special handling of the long constants is necessary (as in
+;; dfa_speed.md), since VLIW packing is not used.
+
+;; Memory instructions stall for one cycle.  All other instructions
+;; complete ready for the next cycle.
+
+(define_insn_reservation "nonStallInsn" 1
+  (and (eq_attr "schedType" "space")
+       (eq_attr "type" "!mem"))
+  "slot0+slot1+slot2")
+
+(define_insn_reservation "stallInsn" 2
+  (and (eq_attr "schedType" "space")
+       (eq_attr "type" "mem"))
+  "slot0+slot1+slot2")
diff --git a/gcc/config/picochip/dfa_speed.md b/gcc/config/picochip/dfa_speed.md
new file mode 100644
index 000000000..6ee26a668
--- /dev/null
+++ b/gcc/config/picochip/dfa_speed.md
@@ -0,0 +1,123 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following DFA description schedules instructions for speed.  In
+;; addition to the scheduling of instructions to remove stall cycles
+;; (e.g., memory load), the scheduler will also pack multiple
+;; instructions into a single cycle, using VLIW.
+
+;; Each instruction comes in forms with and without long
+;; constants.  The long constant is treated as though it were also an
+;; instruction.  Thus, an instruction which used slot0, will use slot0
+;; plus one of the other slots for the constant.  This mechanism
+;; ensures that it is impossible for 3 instructions to be issued, if
+;; one of them has a long constant.  This is necessary, because the
+;; encoding of 3 instructions, plus a constant, will overrun the
+;; 64-bit limit.
+
+; Extended ALU - Slot 0
+(define_insn_reservation "picoAluInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "picoAlu") (eq_attr "longConstant" "false")))
+  "slot0")
+(define_insn_reservation "picoAluInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "picoAlu") (eq_attr "longConstant" "true")))
+  "(slot0+slot1)|(slot0+slot2)")
+
+; Basic ALU - Slot 0 or 1
+(define_insn_reservation "basicAluInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "basicAlu") (eq_attr "longConstant" "false")))
+  "(slot0|slot1)")
+(define_insn_reservation "basicAluInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "basicAlu") (eq_attr "longConstant" "true")))
+  "(slot0+slot1) | (slot1+slot2) | (slot0+slot2)")
+
+; ALU which must not set flags - Slot 1
+(define_insn_reservation "nonCcAluInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "nonCcAlu") (eq_attr "longConstant" "false")))
+  "slot1")
+(define_insn_reservation "nonCcAluInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "nonCcAlu") (eq_attr "longConstant" "true")))
+  "(slot1+slot0) | (slot1+slot2)")
+
+; Memory - Slot 1
+(define_insn_reservation "memInsn" 2
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mem") (eq_attr "longConstant" "false")))
+  "slot1,nothing")
+(define_insn_reservation "memInsnWithConst" 2
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mem") (eq_attr "longConstant" "true")))
+  "slot1+(slot0|slot2),nothing")
+
+; Multiply - Slot 2
+(define_insn_reservation "mulInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mul") (eq_attr "longConstant" "false")))
+  "slot2")
+(define_insn_reservation "mulInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mul") (eq_attr "longConstant" "true")))
+  "(slot2+slot0)|(slot2+slot1)")
+
+; MAC - Slot 2
+(define_insn_reservation "macInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mac") (eq_attr "longConstant" "false")))
+  "slot2")
+(define_insn_reservation "macInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "mac") (eq_attr "longConstant" "true")))
+  "(slot2+slot0)|(slot2+slot1)")
+
+; Branch - Real branches use slot2, while macro branches use unknown
+; resources.
+(define_insn_reservation "branchInsn" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "realBranch") (eq_attr "longConstant" "false")))
+  "slot2")
+(define_insn_reservation "branchInsnWithConst" 1
+  (and (eq_attr "schedType" "speed")
+       (and (eq_attr "type" "realBranch") (eq_attr "longConstant" "true")))
+  "(slot2+slot0)|(slot2+slot1)")
+(define_insn_reservation "branchInsnMacro" 1
+  (and (eq_attr "schedType" "speed")
+       (eq_attr "type" "realBranch"))
+  "(slot0+slot1+slot2)")
+
+; Call instructions use all slots to prevent inadvertent scheduling
+; alongside instructions which set R12.
+
+(define_insn_reservation "callInsn" 1
+  (and (eq_attr "schedType" "speed") (eq_attr "type" "call"))
+  "slot0+slot1+slot2")
+
+; Communications - Slot 1
+(define_insn_reservation "commsInsn" 1
+  (and (eq_attr "schedType" "speed") (eq_attr "type" "comms"))
+  "slot1")
+
diff --git a/gcc/config/picochip/libgccExtras/adddi3.asm b/gcc/config/picochip/libgccExtras/adddi3.asm
new file mode 100644
index 000000000..77373ed9f
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/adddi3.asm
@@ -0,0 +1,194 @@
+// picoChip ASM file
+//
+//   Support for 64-bit addition.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.align 8		
+.global __adddi3
+__adddi3:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &__adddi3 = 12 bytes
+
+        // The first operand of add is completely in registers r[2-5]
+        // The second operand of sub is in stack FP(0-3)
+        // and result need to be written pointed to by the register r0.
+        // All we need to do is to load the appropriate values, add them
+        // appropriately (with add or addc ) and then store the values back.
+
+	ldw (FP)0, r1
+	stl r[7:6], (FP)-1
+	add.0 r2, r1, r6
+	ldw (FP)1, r1
+	addc.0 r3, r1, r7
+	ldl (FP)1, r[3:2]
+	stl r[7:6], (r0)0
+	addc.0 r4, r2, r6
+	addc.0 r5, r3, r7
+	stl r[7:6], (r0)1
+	jr (r12)
+=->	ldl (FP)-1, r[7:6]
+
+_picoMark_FUNCTION_END=
+
+// picoChip Function Epilogue : __adddi3
+	
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0xe	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#61# 16#64# 16#64# 16#63# 16#69# 16#33# 16#0# // Function name `_adddi3'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
diff --git a/gcc/config/picochip/libgccExtras/ashlsi3.asm b/gcc/config/picochip/libgccExtras/ashlsi3.asm
new file mode 100644
index 000000000..688cd8d96
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/ashlsi3.asm
@@ -0,0 +1,193 @@
+// picoChip ASM file
+// picoChip ASM file
+//
+//   Support for 32-bit arithmetic shift left.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.global ___ashlsi3
+___ashlsi3:
+_picoMark_FUNCTION_BEGIN=
+// picoChip Function Prologue : &___ashlsi3 = 0 bytes
+
+	// if (R2 > 15) goto _L2
+	SUB.0 15,R2,r15
+	JMPLT _L2
+=->	SUB.0 16,R2,R5   // R5 := R5 - R4 (HI)
+
+	LSL.0 R1,R2,R1    // R3 := R1 << R2
+	LSL.0 R0,R2,R4    // R2 := R0 << R2
+
+	LSR.0 R0,R5,R5 // R5 := R12 >> R5 NEED TO CHECK - HARI
+	OR.0 R5,R1,R5 // R3 := R5 IOR R0 (HI)
+	SUB.0 R2,0,r15  
+	COPYNE R5,R1
+	JR (R12)	// Return to caller 
+=->	COPY.0 R4,R0
+
+_L2:
+	LSL.0 R0,R2,R1  // R3 := R0 << R2
+	JR (R12)	// Return to caller 
+=->	COPY.0 0,R0	// R2 := 0 (short constant)
+
+_picoMark_FUNCTION_END=
+
+// picoChip Function Epilogue : __ashlsi3
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#61# 16#73# 16#68# 16#6c# 16#73# 16#69# 16#33# 16#0# // Function name `_ashlsi3'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
diff --git a/gcc/config/picochip/libgccExtras/ashlsi3.c b/gcc/config/picochip/libgccExtras/ashlsi3.c
new file mode 100644
index 000000000..600461c0b
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/ashlsi3.c
@@ -0,0 +1,82 @@
+/*
+
+picoChip GCC support for 32-bit shift left.
+
+Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+Contributed by Picochip Ltd.
+Maintained by Daniel Towner (daniel.towner@picochip.com)
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef PICOCHIP
+#error "Intended for compilation for PICOCHIP only."
+#endif
+
+typedef int HItype __attribute__ ((mode (HI)));
+typedef unsigned int UHItype __attribute__ ((mode (HI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+
+typedef struct USIstruct {
+  UHItype low, high;
+} USIstruct;
+
+typedef union USIunion {
+  USItype l;
+  USIstruct s;
+} USIunion;
+
+USItype __ashlsi3(USIunion value, HItype count) {
+  USIunion result;
+  int temp;
+
+  /* Ignore a zero count until we get into the (count < 16)
+     clause. This is slightly slower when shifting by zero, but faster
+     and smaller in all other cases (due to the better scheduling
+     opportunities available by putting the test near computational
+     instructions. */
+  /* if (count == 0) return value.l; */
+
+  if (count < 16) {
+    /* Shift low and high words by the count. */
+    result.s.low = value.s.low << count;
+    result.s.high = value.s.high << count;
+     
+    /* There is now a hole in the lower `count' bits of the high
+       word. Shift the upper `count' bits of the low word into the
+       high word. This is only required when the count is non-zero. */
+    if (count != 0) {
+      temp = 16 - count;
+      temp = value.s.low >> temp;
+      result.s.high |= temp;
+    }
+  
+  } else {
+    /* Shift the lower word of the source into the upper word of the
+       result, and zero the result's lower word. */
+    count -= 16;
+    result.s.high = value.s.low << count;
+    result.s.low = 0;
+
+  }
+
+  return result.l;
+
+}
+
diff --git a/gcc/config/picochip/libgccExtras/ashrsi3.asm b/gcc/config/picochip/libgccExtras/ashrsi3.asm
new file mode 100644
index 000000000..fddd70b68
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/ashrsi3.asm
@@ -0,0 +1,202 @@
+// picoChip ASM file
+//
+//   Support for 32-bit arithmetic shift right.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.global ___ashrsi3
+___ashrsi3:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &___ashrsi3 = 0 bytes
+
+	// if (R2 > 15) goto _L2
+	SUB.0 15,R2,r15
+	JMPLT _L2
+=->	COPY.0 R1,R3
+
+	LSR.0 R1,R2,R1 // R1 := R1 >> R2
+	// if (R2 == 0) goto _L4
+	SUB.0 R2,0,r15
+	JMPEQ _L4
+=->	LSR.0 R0,R2,R0 // R2 := R0 >> R2
+
+	SUB.0 16,R2,R4 // R4 := R4 - R2 (HI)
+	ASR.0 R3,15,R5	// R5 = R1 >>{arith} 15
+	LSL.0 R5,R4,R5 // R5 := R5 << R4
+	LSL.0 R3,R4,R4 // R4 := R1 << R4
+	OR.0 R5,R1,R1 // R3 := R5 IOR R3 (HI)
+	BRA _L4
+	=->	OR.0 R4,R0,R0 // R2 := R4 IOR R0 (HI)
+_L2:
+	ASR.0 R1,15,R1	// R4 = R1 >>{arith} 15
+	SUB.0 16,R2,R5  // R5 := R5 - R2 (HI)
+	LSR.0 R3,R2,R0 // R2 := R1 >> R2
+	LSL.0 R1,R5,R5 // R5 := R4 << R5
+	OR.0 R5,R0,R5 // R2 := R5 IOR R2 (HI)
+	SUB.0 R2,16,r15  // R5 := R5 - R2 (HI)
+	COPYNE R5,R0
+_L4:
+	JR (R12)	// Return to caller 
+
+_picoMark_FUNCTION_END=
+
+// picoChip Function Epilogue : __ashrsi3
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#61# 16#73# 16#68# 16#72# 16#73# 16#69# 16#33# 16#0# // Function name `_ashrsi3'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/ashrsi3.c b/gcc/config/picochip/libgccExtras/ashrsi3.c
new file mode 100644
index 000000000..4f1567b13
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/ashrsi3.c
@@ -0,0 +1,113 @@
+/*
+
+picoChip GCC support for 32-bit arithmetic shift right.
+
+Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+Contributed by Picochip Ltd.
+Maintained by Daniel Towner (daniel.towner@picochip.com)
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+typedef int HItype __attribute__ ((mode (HI)));
+typedef unsigned int UHItype __attribute__ ((mode (HI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+
+typedef struct USIstruct {
+  UHItype low, high;
+} USIstruct;
+
+typedef union USIunion {
+  USItype l;
+  USIstruct s;
+} USIunion;
+
+USItype __ashrsi3(USIunion value, HItype count) {
+  USIunion result;
+  int temp;
+  int wordOfSignBits;
+
+  /* Ignore a zero count until we get into the (count < 16)
+     clause. This is slightly slower when shifting by zero, but faster
+     and smaller in all other cases (due to the better scheduling
+     opportunities available by putting the test near computational
+     instructions. */
+  /* if (count == 0) return value.l; */
+  
+  if (count < 16) {
+    /* Shift low and high words by the count. The high word must use
+       an arithmetic shift. There is no arithmetic shift-right by
+       variable, so synthesise it. */
+    int signWord;
+    int reverseCount;
+
+    /* Shift low and high parts by the count. The upper word now has
+       invalid signed bits. */
+    result.s.low = value.s.low >> count;
+    result.s.high = value.s.high >> count;
+
+    if (count != 0) {
+
+      reverseCount = 16 - count;
+  
+      /* Given a word of sign bits, shift back left to create the
+	 destination sign bits. */
+      wordOfSignBits = __builtin_asri(value.s.high, 15);
+      signWord = wordOfSignBits << reverseCount;
+      result.s.high |= signWord;
+     
+      /* There is now a hole in the upper `count' bits of the low
+	 word. Shift the lower `count' bits of the upper word into the
+	 low word. */
+      temp = value.s.high << reverseCount;
+      result.s.low |= temp;
+    }
+
+  } else {
+    int signWord;
+
+    /* Shift is greater than one word, so top word will always be set
+       to sign bits, and bottom word will be shifted from top word. */
+    result.s.low = value.s.high >> count;
+    result.s.high = __builtin_asri(value.s.high, 15);
+
+    if (count != 16) {
+
+      /* Shift the upper word of the source into the lower word of the
+	 result. Arithmetically shift the upper word as well, to retain
+	 the sign. This shift must be synthesised, as no such shift
+	 exists in the instruction set. */
+      int signWord;
+ 
+
+      /* Given a complete word of sign-bits, shift this back left to
+	 create the destination sign bits. */
+      signWord = result.s.high << (16 - count);
+      //      signWord = wordOfSignBits << (16 - count);
+
+      /* Insert the sign bits to the result's low word. */
+      result.s.low |= signWord;
+
+    }
+
+  }
+
+  return result.l;
+
+}
diff --git a/gcc/config/picochip/libgccExtras/clzsi2.asm b/gcc/config/picochip/libgccExtras/clzsi2.asm
new file mode 100644
index 000000000..835d46941
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/clzsi2.asm
@@ -0,0 +1,189 @@
+// Copyright (C) 2008 Free Software Foundation, Inc.
+//
+// This file is part of GCC.
+//
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+//
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+//
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+//
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
+
+// picoChip ASM file
+//.file "clzsi2.asm"
+
+.section .text
+
+.global __clzsi2
+__clzsi2:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &__clzsi2 = 0 bytes
+
+	// What value should be operated on? If the top word is empty
+	// then count the bits in the bottom word, and add 16. If the
+	// top word is not empty, then count the bits in the top word.
+
+	// R4 stores the constant 0
+
+	sub.0 R1,0,r15 \ copy.1 16,r2
+	copyeq r0,r1
+	copyne 0,r2
+
+	// R1 now stores value to count, and R2 stores current bit offset.
+	sbc r1,r0
+	asr.0 r1,15,r15 \ add.1 r0,1,r0
+	jr (lr) \ copyne 0,r0
+=->	add.0 r0,r2,r0
+
+_picoMark_FUNCTION_END=
+
+// picoChip Function Epilogue : __clzsi2
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5F# 16#63# 16#6C# 16#7A# 16#73# 16#69# 16#32# 16#0# // Function name `_clzsi2'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/cmpsi2.asm b/gcc/config/picochip/libgccExtras/cmpsi2.asm
new file mode 100644
index 000000000..95322f324
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/cmpsi2.asm
@@ -0,0 +1,212 @@
+// picoChip ASM file
+//.file "ucmpsi2.c"
+//
+//   Support for 32-bit signed compare.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+// Compiled from the following, and then hand optimised.
+//
+// int __cmpsi2 (USItype x, USItype y)
+// {
+//
+//   SIunion lx; lx.l = x;
+//   SIunion ly; ly.l = y;
+//
+//   if (lx.s.high < ly.s.high)
+//     return 0;
+//   else if (lx.s.high > ly.s.high)
+//     return 2;
+//   if (lx.s.low < ly.s.low)
+//     return 0;
+//   else if (lx.s.low > ly.s.low)
+//     return 2;
+//   return 1;
+// }
+	
+.section .text
+
+.align 8
+.global ___cmpsi2
+___cmpsi2:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &___cmpsi2 = 0 bytes
+
+	SUB.0 R1,R3,r15
+
+        BLT _L1
+=->     SUB.0 R3,R1,r15 \ COPY.1 0,R5
+
+        BLT _L1
+=->     SUB.0 R0,R2,r15 \ COPY.1 2,R5
+
+        BLO _L1
+=->     SUB.0 R2,R0,r15 \ COPY.1 0,R5
+
+        BLO _L1
+=->	COPY.0 2,R5
+	
+        COPY.0 1,R5
+_L1:
+	JR (R12)
+=->     COPY.0 R5,R0
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __cmpsi2
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#5f# 16#63# 16#6d# 16#70# 16#73# 16#69# 16#32# 16#0# // Function name `__cmpsi2'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/divmod15.asm b/gcc/config/picochip/libgccExtras/divmod15.asm
new file mode 100644
index 000000000..d314b3be5
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/divmod15.asm
@@ -0,0 +1,261 @@
+// picoChip ASM file
+//
+//   Support for 16-bit unsigned division/modulus.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+	
+.section .text
+
+.global __divmod15
+__divmod15:
+_picoMark_FUNCTION_BEGIN=
+	
+// picoChip Function Prologue : &__divmod15 = 0 bytes
+
+	// The picoChip instruction set has a divstep instruction which
+	// is used to perform one iteration of a binary division algorithm.
+	// The instruction allows 16-bit signed division to be implemented.
+	// It does not directly allow 16-bit unsigned division to be
+	// implemented. Thus, this function pulls out the common division
+	// iteration for 15-bits unsigned, and then special wrappers
+	// provide the logic to change this into a 16-bit signed or
+	// unsigned division, as appropriate. This allows the two
+	// versions of division to share a common implementation, reducing
+	// code size when the two are used together. It also reduces
+	// the maintenance overhead.
+
+	// Input:
+	//	r0 - dividend
+	//	r1 - divisor
+	// Output:
+	//	r0 - quotient
+	//	r1 - remainder
+	// R5 is unused
+	
+	// Check for special cases. The emphasis is on detecting these as
+	// quickly as possible, so that the main division can be started. If 
+	// the user requests division by one, division by self, and so on
+	// then they will just have to accept that this won't be particularly
+	// quick (relatively), whereas a real division (e.g., dividing a 
+	// large value by a small value) will run as fast as possible
+	// (i.e., special case detection should not slow down the common case)
+	//
+	// Special cases to consider:
+	//
+	//	Division by zero.
+	//	Division of zero.
+	//	Inputs are equal
+	//	Divisor is bigger than dividend
+	//	Division by power of two (can be shifted instead).
+	//	Division by 1 (special case of power of two division)
+	//
+	// Division/modulus by zero is undefined (ISO C:6.5.5), so
+	// don't bother handling this special case.
+	//
+	// The special cases of division by a power of 2 are ignored, since 
+	// they cause the general case to slow down. Omitting these
+	// special cases also reduces code size considerably.
+
+	// Handle divisor >= dividend separately. Note that this also handles 
+	// the case where the dividend is zero.	Note that the flags must be
+	// preserved, since they are also used at the branch destination.
+	sub.0 r1,r0,r15
+	sbc r0,r2 \ bge divisorGeDividend
+=->	sbc r1,r4
+	
+	// Compute the shift count. The amount by which the divisor
+	// must be shifted left to be aligned with the dividend.	
+	sub.0 r4,r2,r3
+		
+	// Align the divisor to the dividend. Execute a divstep (since at 
+	// least one will always be executed). Skip the remaining loop
+	// if the shift count is zero.
+	lsl.0 r1,r3,r1 \ beq skipLoop
+=->	divstep r0,r1 \ add.1 r3,1,r2
+
+	// Execute the divstep loop until temp is 0. This assumes that the
+	// loop count is at least one.
+	sub.0 r3,1,r4
+divLoop:	
+	divstep r0,r1 \ bne divLoop
+=->	sub.0 r4,1,r4
+
+skipLoop:
+				
+	// The top bits of the result are the remainder. The bottom
+	// bits are the quotient.
+	lsr.0 r0,r2,r1 \ sub.1 16,r2,r4
+	jr (lr ) \ lsl.0 r0,r4,r0
+=->	lsr.0 r0,r4,r0
+
+// Special case.
+
+divisorGeDividend:	
+	// The divisor is greater than or equal to the dividend. The flags
+	// indicate which of these alternatives it is. The COPYNE can be used 
+	// to set the result appropriately, without introducing any more
+	// branches.
+	copy.0 r0,r1 \ copy.1 0,r0
+	jr (lr) \ copyeq r0,r1
+=->	copyeq 1,r0
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __divmod15
+
+	
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#64# 16#69# 16#76# 16#6d# 16#6f# 16#64# 16#31# 16#35# 16#0# // Function name `_divmod15'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/divmodhi4.asm b/gcc/config/picochip/libgccExtras/divmodhi4.asm
new file mode 100644
index 000000000..9dad674c7
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/divmodhi4.asm
@@ -0,0 +1,246 @@
+// picoChip ASM file
+//
+//   Support for 16-bit signed division/modulus.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.align 8		
+.global __divmodhi4
+__divmodhi4:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &__divmodhi4 = 4 bytes
+
+	// 16-bit signed division. Most of the special cases are dealt
+	// with by the 15-bit signed division library (e.g., division by
+	// zero, division by 1, and so on). This wrapper simply inverts	
+	// any negative inputs, calls the 15-bit library, and flips any
+	// results as necessary.  The
+	// only special cases to be handled here are where either the 
+	// divisor or the dividend are the maximum negative values.
+
+	// Encode r5 with a bit pattern which indicates whether the
+	// outputs of the division must be negated. The MSB will be set
+	// to the sign of the dividend (which controls the remainder's
+	// sign), while the LSB will store the XOR of the two signs,
+	// which indicates the quotient's sign. R5 is not modified by the
+	// 15-bit divmod routine.
+	sub.0 r1,16#8000#,r15 \ asr.1 r0,15,r4
+	beq divisorIsLargestNegative \ lsr.0 r1,15,r3
+=->	sub.0 r0,16#8000#,r15 \ xor.1 r3,r4,r5
+
+	// Handle least negative dividend with a special case. Note that the
+	// absolute value of the divisor is also computed here.
+	add.0 [asr r1,15],r1,r3	\ beq dividendIsLargestNegative
+=->	xor.0 [asr r1,15],r3,r1 \ stw lr,(fp)-1	
+	
+	// Compute the absolute value of the dividend, and call the main
+	// divide routine.
+	add.0 r4,r0,r2 \ jl (&__divmod15)  // fn_call &__divmod15
+=->	xor.0 r4,r2,r0
+
+handleNegatedResults:	
+	// Speculatively store the negation of the results.
+	sub.0 0,r0,r2 \ sub.1 0,r1,r3
+
+	// Does the quotient need negating? The LSB indicates this.
+	and.0 r5,1,r15 \ ldw (fp)-1,lr
+	copyne r2,r0
+		
+	asr.0 r5,15,r15 \ jr (lr)
+=->	copyne r3,r1
+	
+dividendIsLargestNegative:
+
+	// Divide the constant -32768. Use the Hacker's Delight
+	// algorithm (i.e., ((dividend / 2) / divisor) * 2) gives
+	// approximate answer). This code is a special case, so no
+	// great effort is made to make it fast, only to make it
+	// small.
+
+	lsr.0 r0,1,r0 \ jl (&__divmod15)  // fn_call &__divmod15
+=->	stw r1,(fp)-2
+
+	// Load the original divisor, and compute the new quotient and
+	// remainder.	
+	lsl.0 r0,1,r0 \ ldw (fp)-2,r3
+	lsl.0 r1,1,r1 // Fill stall slot
+
+	// The error in the quotient is 0 or 1. The error can be determined
+	// by comparing the remainder to the original divisor. If the
+	// remainder is bigger, then an error of 1 has been introduced,
+	// which must be fixed.
+	sub.0 r1,r3,r15
+	blo noCompensationForError
+=->	nop	
+	add.0 r0,1,r0 \ sub.1 r1,r3,r1
+noCompensationForError:
+	bra handleNegatedResults
+=->	nop
+
+divisorIsLargestNegative:	
+	// The flags indicate whether the dividend is also the maximum negative
+	copy.0 r0,r1 \ copy.1 0,r0
+	copyeq r0,r1 \ jr (lr)
+=->	copyeq 1,r0
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __divmodhi4
+	
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x4	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#64# 16#69# 16#76# 16#6d# 16#6f# 16#64# 16#68# 16#69# 16#34# 16#0# // Function name `_divmodhi4'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+.section .endFile
diff --git a/gcc/config/picochip/libgccExtras/divmodsi4.asm b/gcc/config/picochip/libgccExtras/divmodsi4.asm
new file mode 100644
index 000000000..4fc1acb1b
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/divmodsi4.asm
@@ -0,0 +1,233 @@
+// picoChip ASM file
+//
+//   Support for 32-bit signed division/modulus.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.align 8
+.global __divmodsi4
+__divmodsi4:
+_picoMark_FUNCTION_BEGIN=
+// picoChip Function Prologue : &__divmodsi4 = 8 bytes
+
+	// Note: optimising for size is preferred over optimising for speed.
+
+	// Note: the frame is setup throughout the following instructions,
+	// and is complete at the point the udivmodsi4 function is called. 	
+
+	// Note that R9 is encoded with a pattern which indicates
+	// whether the remainder and quotient should be negated on
+	// completion. The MSB is set to the sign of the dividend
+	// (i.e., the sign of the remainder), while the LSB encodes
+	// the XOR of the two input's signs (i.e., the sign of the
+	// quotient.
+	
+	// If dividend is negative, invert the dividend and flag.
+	ASR.0 r1,15,r4
+	BEQ dividendNotNegative
+=->	STL R[9:8],(FP)-2
+
+	// Dividend is negative - negate dividend.
+        SUB.0 0,R0,R0
+        SUBB.0 0,R1,R1
+
+dividendNotNegative:
+			
+	// If divisor is negative, invert the divisor.
+	AND.0 [lsr r3,15],1,r5
+	SUB.0 R3,0, r15
+	BGE divisorNotNegative
+=->	XOR.0 r4,r5,r9
+
+	// Divisor is negative - negate divisor.
+        SUB.0 0,R2,R2
+        SUBB.0 0,R3,R3
+
+divisorNotNegative:
+	
+        STL R[13:12],(FP)-1 \ JL (&__udivmodsi4)
+=->	SUB.0 FP,8,FP  // udivmodsi expects the frame to be valid still.
+	
+	// The LSB of R9 indicates whether the quotient should be negated.
+	AND.0 r9,1,r15
+	BEQ skipQuotientNegation
+=->	LDL (FP)1,R[13:12]	// Convenient point to restore link/fp
+	
+        SUB.0 0,R4,R4
+        SUBB.0 0,R5,R5	
+
+skipQuotientNegation:		
+
+	// The MSB of R9 indicates whether the remainder should be negated.
+	ASR.0 R9,15,r15
+	BEQ epilogue
+
+        SUB.0 0,R6,R6
+        SUBB.0 0,R7,R7
+
+epilogue:	
+
+	JR (R12)
+=->	LDL (FP)-2,R[9:8]
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __divmodsi4
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x8	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#64# 16#69# 16#76# 16#6d# 16#6f# 16#64# 16#73# 16#69# 16#34# 16#0# // Function name `_divmodsi4'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/fake_libgcc.asm b/gcc/config/picochip/libgccExtras/fake_libgcc.asm
new file mode 100644
index 000000000..e4b78f1e1
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/fake_libgcc.asm
@@ -0,0 +1,6 @@
+// picoChip ASM file
+// Fake libgcc asm file. This contains nothing, but is used to prevent gcc
+// getting upset about the lack of a libgcc.S file when LIB1ASMFUNCS is defined
+// to switch off the compilation of parts of libgcc.
+
+
diff --git a/gcc/config/picochip/libgccExtras/longjmp.asm b/gcc/config/picochip/libgccExtras/longjmp.asm
new file mode 100644
index 000000000..d2a80aca7
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/longjmp.asm
@@ -0,0 +1,182 @@
+// picoChip ASM file
+//
+//   Support for 32-bit arithmetic shift right.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.global _longjmp
+_longjmp:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &_longjmp = 0 bytes
+
+        LDL (R0)0, R[3:2]
+        LDL (R0)1, R[5:4]
+        LDL (R0)2, R[7:6]
+        LDL (R0)3, R[9:8]
+        LDL (R0)4, R[11:10]
+        LDL (R0)5, R[13:12]
+        LDW (R0)12, R14
+        LDW (R0)13, R1
+        JR (R12)
+=->	COPY.0 1,R0
+        
+// picoChip Function Epilogue : longjmp
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#6c# 16#6f# 16#6e# 16#67# 16#6a# 16#6d# 16#70# 16#0# // Function name `longjmp'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/lshrsi3.asm b/gcc/config/picochip/libgccExtras/lshrsi3.asm
new file mode 100644
index 000000000..4fc539029
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/lshrsi3.asm
@@ -0,0 +1,190 @@
+// picoChip ASM file
+//
+//   Support for 32-bit logical shift right.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+.section .text
+
+.global ___lshrsi3
+___lshrsi3:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &___lshrsi3 = 4 bytes
+
+	// if (R2 > 15) goto _L2
+	SUB.0 15,R2,r15
+	JMPLT _L2
+=->	SUB.0 16,R2,R5 // R5 := R5 - R2 (HI)
+
+	LSR.0 R0,R2,R0 // R4 := R0 >> R2
+	LSR.0 R1,R2,R3 // R3 := R1 >> R2
+	// if (R2 == 0) goto _L4
+	LSL.0 R1,R5,R5 // R5 := R1 << R5
+	OR.0 R5,R0,R4 // R2 := R5 IOR R2 (HI)
+	SUB.0 R2,0,r15
+	COPYNE R4,R0		// R0 := R2
+	JR (R12)	// Return to caller 
+=->	COPY.0 R3,R1		// R1 := R3
+
+_L2:
+	LSR.0 R1,R2,R0  // R2 := R1 >> R2
+	JR (R12)	// Return to caller 
+=->	COPY.0 0,R1	// R3 := 0 (short constant)
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __lshrsi3
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x4	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#5f# 16#6c# 16#73# 16#68# 16#72# 16#72# 16#73# 16#69# 16#33# 16#0# // Function name `__lshrsi3'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+.section .endFile
diff --git a/gcc/config/picochip/libgccExtras/lshrsi3.c b/gcc/config/picochip/libgccExtras/lshrsi3.c
new file mode 100644
index 000000000..fa32dc726
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/lshrsi3.c
@@ -0,0 +1,76 @@
+/*
+
+picoChip GCC support for 32-bit logical shift right.
+
+Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+Contributed by Picochip Ltd.
+Maintained by Daniel Towner (daniel.towner@picochip.com)
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+typedef int HItype __attribute__ ((mode (HI)));
+typedef unsigned int UHItype __attribute__ ((mode (HI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+
+typedef struct USIstruct {
+  UHItype low, high;
+} USIstruct;
+
+typedef union USIunion {
+  USItype l;
+  USIstruct s;
+} USIunion;
+
+USItype __lshrsi3(USIunion value, HItype count) {
+  USIunion result;
+  int temp;
+
+  /* Ignore a zero count until we get into the (count < 16)
+     clause. This is slightly slower when shifting by zero, but faster
+     and smaller in all other cases (due to the better scheduling
+     opportunities available by putting the test near computational
+     instructions. */
+
+  if (count < 16) {
+    /* Shift low and high words by the count. */
+    result.s.low = value.s.low >> count;
+    result.s.high = value.s.high >> count;
+     
+    /* There is now a hole in the upper `count' bits of the low
+       word. Shift the lower `count' bits of the upper word into the
+       low word. This only works when count isn't zero. */
+    if (count != 0) {
+      temp = value.s.high << (16 - count);
+      result.s.low |= temp;
+    }
+
+  } else {
+    /* Shift the upper word of the source into the lower word of the
+       result, and zero the result's upper word. Note that we actually
+       ned to shift by (count - 16), but as we are only using the
+       bottom 4 bits, this is equivalent to shifting by count. */
+    result.s.low = value.s.high >> count;
+    result.s.high = 0;
+
+  }
+
+  return result.l;
+
+}
diff --git a/gcc/config/picochip/libgccExtras/parityhi2.asm b/gcc/config/picochip/libgccExtras/parityhi2.asm
new file mode 100644
index 000000000..b9d0cdc63
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/parityhi2.asm
@@ -0,0 +1,179 @@
+// picoChip ASM file
+//.file "ucmpsi2.c"
+//
+//   Support for parity checks.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+	
+.section .text
+
+.align 8
+.global ___parityhi2
+___parityhi2:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &___parityhi2 = 0 bytes
+	XOR.0 [LSR R0,8],R0,R0
+        XOR.0 [LSR R0,4],R0,R0
+        XOR.0 [LSR R0,2],R0,R0
+        JR (R12) \ XOR.0 [LSR R0,1],R0,R0
+=->	AND.0 R0,1,R0
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __parityhi2
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#5f# 16#70# 16#61# 16#72# 16#69# 16#74# 16#79# 16#68# 16#69# 16#32# 16#0# // Function name `__parityhi2'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/popcounthi2.asm b/gcc/config/picochip/libgccExtras/popcounthi2.asm
new file mode 100644
index 000000000..2da618c96
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/popcounthi2.asm
@@ -0,0 +1,201 @@
+// picoChip ASM file
+//.file "popcounthi2.S"
+//
+//   Support for 16-bit population count.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//   
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+	
+.section .text
+
+// The following code (taken from a newsgroup posting) was compiled, and then
+// hand assembled (a similar version is given in the Hacker's Delight
+// book, chapter 5).
+//
+// int 
+// popcount (int value)
+// {
+//    value = ((value & 0xAAAA) >> 1) + (value & 0x5555);
+//    value = ((value & 0xCCCC) >> 2) + (value & 0x3333);
+//    value = ((value & 0xF0F0) >> 4) + (value & 0x0F0F);
+//    return ((value & 0xFF00) >> 8) + (value & 0x00FF);
+// }	
+//
+// This assembly function is approx. 20x faster than a naive loop
+// implementation of the population count, but about 30% bigger
+// (45 bytes v. 34 bytes).
+
+.align 8
+.global ___popcounthi2
+___popcounthi2:
+
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &___popcounthi2 = 0 bytes
+
+        AND.0 [LSR R0,1],21845,R0 \ AND.1 R0,21845,R5
+        ADD.0 R0,R5,R0
+        AND.0 [LSR R0,2],13107,R0 \ AND.1 R0,13107,R5
+        ADD.0 R0,R5,R0 \ COPY.1 1807,R2
+        AND.0 [LSR R0,4],R2,R0 \ AND.1 R0,3855,R5
+        ADD.0 R0,R5,R0
+        JR (R12) \ AND.0 R0, 255, R5
+=->	ADD.0 [LSR R0,8],R5,R0
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : ___popcounthi2
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#5f# 16#70# 16#6f# 16#70# 16#63# 16#6f# 16#75# 16#6e# 16#74# 16#68# 16#69# 16#32# 16#0# // Function name `__popcounthi2'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/setjmp.asm b/gcc/config/picochip/libgccExtras/setjmp.asm
new file mode 100644
index 000000000..247c715f6
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/setjmp.asm
@@ -0,0 +1,182 @@
+// picoChip ASM file
+//
+//   Support for 32-bit arithmetic shift right.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.global _setjmp
+_setjmp:
+_picoMark_FUNCTION_BEGIN=
+
+// picoChip Function Prologue : &_setjmp = 0 bytes
+
+        STL R[3:2],(R0)0
+        STL R[5:4],(R0)1
+        STL R[7:6],(R0)2
+        STL R[9:8],(R0)3
+        STL R[11:10],(R0)4
+        STL R[13:12],(R0)5
+        STW R14,(R0)12
+        STW R1,(R0)13
+        JR (R12)
+=->	COPY.0 0,R0
+        
+// picoChip Function Epilogue : setjmp
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#73# 16#65# 16#74# 16#6a# 16#6d# 16#70# 16#0# // Function name `setjmp'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/subdi3.asm b/gcc/config/picochip/libgccExtras/subdi3.asm
new file mode 100644
index 000000000..d1c833ea8
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/subdi3.asm
@@ -0,0 +1,191 @@
+// picoChip ASM file
+//
+//   Support for 64-bit subtraction.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Hariharan Sandanagobalane (hariharan@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+
+.section .text
+
+.align 8		
+.global __subdi3
+__subdi3:
+
+_picoMark_FUNCTION_BEGIN=
+// picoChip Function Prologue : &__subdi3 = 4 bytes
+
+	// The first operand of sub is completely in registers r[2-5]
+        // The second operand of sub is in stack FP(0-3)
+        // and result need to be written pointed to by the register r0.
+	// All we need to do is to load the appropriate values, sub them 
+        // appropriately (with sub or subb) and then store the values back.
+	ldw (FP)0, r1
+	stl r[7:6], (FP)-1
+	sub.0 r2, r1, r6
+	ldw (FP)1, r1
+	subb.0 r3, r1, r7
+	ldl (FP)1, r[3:2]
+	stl r[7:6], (r0)0
+	subb.0 r4, r2, r6
+	subb.0 r5, r3, r7
+	stl r[7:6], (r0)1
+	jr (r12)
+=->	ldl (FP)2, r[7:6]
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __subdi3
+	
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x4	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#73# 16#75# 16#62# 16#64# 16#69# 16#33# 16#0# // Function name `_subdi3'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+.section .endFile
+
diff --git a/gcc/config/picochip/libgccExtras/ucmpsi2.asm b/gcc/config/picochip/libgccExtras/ucmpsi2.asm
new file mode 100644
index 000000000..10c03cfcd
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/ucmpsi2.asm
@@ -0,0 +1,209 @@
+// picoChip ASM file
+//.file "ucmpsi2.c"
+//	
+//   Support for 32-bit unsigned compare.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+//
+// Compiled from the following, and then hand optimised.
+//
+// int __ucmpsi2 (USItype x, USItype y)
+// {
+//
+//   USIunion lx; lx.l = x;
+//   USIunion ly; ly.l = y;
+//
+//   if (lx.s.high < ly.s.high)
+//     return 0;
+//   else if (lx.s.high > ly.s.high)
+//     return 2;
+//   if (lx.s.low < ly.s.low)
+//     return 0;
+//   else if (lx.s.low > ly.s.low)
+//     return 2;
+//   return 1;
+// }
+	
+.section .text
+
+.align 8
+.global ___ucmpsi2
+___ucmpsi2:
+_picoMark_FUNCTION_BEGIN=
+// picoChip Function Prologue : &___ucmpsi2 = 0 bytes
+        SUB.0 R1,R3,r15
+
+        BLO _L1
+=->     SUB.0 R3,R1,r15 \ COPY.1 0,R5
+	
+        BLO _L1
+=->     SUB.0 R0,R2,r15 \ COPY.1 2,R5
+	
+        BLO _L1
+=->     SUB.0 R2,R0,r15 \ COPY.1 0,R5
+
+        BLO _L1
+=->	COPY.0 2,R5
+	
+        COPY.0 1,R5
+_L1:
+	JR (R12)
+=->	COPY.0 R5,R0            // R0 := R5
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : __ucmpsi2
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#5f# 16#75# 16#63# 16#6d# 16#70# 16#73# 16#69# 16#32# 16#0# // Function name `__ucmpsi2'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/udivmodhi4.asm b/gcc/config/picochip/libgccExtras/udivmodhi4.asm
new file mode 100644
index 000000000..ac16fae39
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/udivmodhi4.asm
@@ -0,0 +1,238 @@
+// picoChip ASM file
+//
+//   Support for 16-bit unsigned division/modulus.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+	
+.section .text
+
+.global __udivmodhi4
+__udivmodhi4:
+_picoMark_FUNCTION_BEGIN=
+	
+// picoChip Function Prologue : &__udivmodhi4 = 6 bytes
+
+	// 16-bit unsigned division. The divstep function is only capable of
+	// handling 15-bit division (plus a sign to give 16-bits). It is not 
+	// capable of handling unsigned division directly. Instead, take 
+	// advantage of the special property that 
+	// ((divisor / 2) / dividend) * 2 will be almost good enough. The 
+	// error in the result is only 0 or 1, and this can be easily
+	// tested and corrected. A full description of the algorithm can
+	// be found in `Hacker's Delight', by Henry Warren, page 146.
+
+	// Input:
+	//	r0 - dividend
+	//	r1 - divisor
+	// Output:
+	//	r0 - quotient
+	//	r1 - remainder
+	
+	// Note that the lr, and original inputs are speculatively saved. They
+	// will only be restored if the 15-bit division function is called.
+	
+	sub.0 r1,0,r15 \ stl r[0:1],(fp)-1
+	bge divisorIs15bit
+=->	sub.0 r0,r1,r2 \ stw lr,(fp)-3
+	
+	// The divisor is >= 2^15.
+	bhs quotientIs1
+
+	// The dividend < divisor. The quotient is thus 0, and the
+	// remainder is the dividend.
+	copy.0 r0,r1 \ jr (lr)
+=->	copy.0 0,r0
+	
+quotientIs1:	
+	// The dividend >= divisor. The quotient is thus 1, and the
+	// remainder can be computed directly by subtraction (i.e., the
+	// result of the comparison already performed to branch here).
+	jr (lr) \ copy.0 r2,r1
+=->	copy.0 1,r0
+	
+divisorIs15bit:
+	// The divisor is < 2^15.
+
+	// Divide the original dividend by 2, and call the 15-bit division.
+	// Note that the original dividend is stored in r5, which is
+	// known to be unused by the called function, so that
+	// a memory stall isn't introduced immediately after the
+	// function returns, to reload this value from memory.
+	
+	jl (&__divmod15) \ copy.0 r0,r5  // fn_call &__divmod15
+=->     lsr.0 r0,1,r0
+	
+	// Compute the new quotient and remainder by multiplying them by 2.
+	// The remainder will be 1 out, if the original dividend was odd.
+	and.0 r5,1,r5 \ ldl (fp)-1,r[2:3]
+	add.0 [lsl r1,1],r5,r1 \ lsl.1 r0,1,r0
+	
+	// The error in the quotient is 0 or 1. The error can be determined
+	// by comparing the remainder to the original divisor. If the 
+	// remainder is bigger, then an error of 1 has been introduced.
+	sub.0 r1,r3,r15 \ ldw (fp)-3,lr
+	blo noCompensation
+=->	nop	
+	add.0 r0,1,r0 \ sub.1 r1,r3,r1
+noCompensation:
+	jr (lr)
+
+_picoMark_FUNCTION_END=
+// picoChip Function Epilogue : udivmodhi4
+
+	
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x6	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#75# 16#64# 16#69# 16#76# 16#6d# 16#6f# 16#64# 16#68# 16#69# 16#34# 16#0# // Function name `_udivmodhi4'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/libgccExtras/udivmodsi4.asm b/gcc/config/picochip/libgccExtras/udivmodsi4.asm
new file mode 100644
index 000000000..92c2a4983
--- /dev/null
+++ b/gcc/config/picochip/libgccExtras/udivmodsi4.asm
@@ -0,0 +1,318 @@
+// picoChip ASM file
+//
+//   Support for 32-bit unsigned division/modulus.
+//
+//   Copyright (C) 2003, 2004, 2005, 2008, 2009  Free Software Foundation, Inc.
+//   Contributed by Picochip Ltd.
+//   Maintained by Daniel Towner (daniel.towner@picochip.com)
+//
+//   This file is free software; you can redistribute it and/or modify it
+//   under the terms of the GNU General Public License as published by the
+//   Free Software Foundation; either version 3, or (at your option) any
+//   later version.
+//
+//   This file is distributed in the hope that it will be useful, but
+//   WITHOUT ANY WARRANTY; without even the implied warranty of
+//   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//   General Public License for more details.
+//
+//   Under Section 7 of GPL version 3, you are granted additional
+//   permissions described in the GCC Runtime Library Exception, version
+//   3.1, as published by the Free Software Foundation.
+//
+//   You should have received a copy of the GNU General Public License and
+//   a copy of the GCC Runtime Library Exception along with this program;
+//   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+//   <http://www.gnu.org/licenses/>.
+	
+.section .text
+
+.align 8
+.global __udivmodsi4
+__udivmodsi4:
+_picoMark_FUNCTION_BEGIN=
+// picoChip Function Prologue : &__udivmodsi4 = 24 bytes
+	
+	// Schedule the register saves alongside the special cases, so that
+	// if the special cases fail, the registers will have already
+	// been stored onto the stack.
+	SUB.0 R3,R1,r15 \ STL R[13:12],(FP)-1
+	BHS skipCommonCase \ STL R[9:8],(FP)-4
+=->	SUB.0 R2,1,r15 \ STL R[11:10],(FP)-3
+	
+_L2:
+	// Flags set above, and in _L2 caller.
+	BNE restOfCode
+=->	SUB.0 R3,0,r15
+	BNE restOfCode 
+=->	COPY.0 R0,R4 \ COPY.1 R1,R5
+	JR (R12)	// Return to caller
+=->	COPY.0 0,R6 \ COPY.1 0,R7
+	// Never reach here
+
+skipCommonCase:
+	SUB.0 R3,R1,r15
+	BNE _L3	// (Reversed branch) 
+=->	SUB.0 R2,R0,r15 // Must be set in delay slot, so ready by _L9
+
+_L9:
+	BLO _L2	// (Reversed branch)
+=->	SUB.0 R2,1,r15
+	
+_L3:
+	SUB.0 R2,R0,r15
+	BEQ _L10	// (Reversed branch)
+=->	SUB.0 R1,R3,r15 // Set flags for branch at _L10 
+	
+_L4:
+	// greater than
+	COPY.0 0,R4 \ COPY.1 0,R5 \ JR (R12)	// Return to caller
+=->	COPY.0 R0,R6 \ COPY.1 R1,R7
+	// Doesn't reach here.
+		
+_L10:
+	// Flags set in _L10 call delay slot.
+	BNE _L4 
+=->	COPY.0 1,R4 \ COPY.1 0,R5
+	JR (R12)	// Return to caller
+=->	COPY.0 0,R6 \ COPY.1 0,R7
+
+restOfCode:	
+
+// Prologue
+	
+	// Register saves scheduled alongside special cases above.
+	ADD.0 FP,-20,FP \ STW R14,(FP)-4
+
+	// The following can be scheduled together.
+	// dividend in R[9:8] (from R[1:0])
+	// divisor in R[7:6] (from R[3:2])
+	// R14 := clzsi2 (dividend)	
+	// R0 := clzsi2 (divisor)
+	JL (&__clzsi2) \ COPY.0 R0,R8 \ COPY.1 R1,R9
+=->	COPY.0 R2,R6 \ COPY.1 R3,R7
+	COPY.0 R0,R14 \ JL (&__clzsi2)
+=->	COPY.0 R6,R0 \ COPY.1 R7,R1
+
+	// R14 := R0 - R14
+	SUB.0 R0,R14,R14
+
+	ADD.0 R14,1,R0	// R0 := R14 + 1 (HI)
+	
+	// R[11:10] = R[7,6] << R14
+	SUB.0 15,R14,r15
+	LSL.0 R6,R14,R11 \ BLT setupDivstepLoop
+=->	SUB.0 0,R14,R4 \ COPY.1 0,R10
+
+	// Zero shift is a special case. Shifting by zero within a 16-bit
+	// source object is fine, but don't execute the OR of the right-shift
+	// into the final result.
+	LSL.0 R7,R14,R11 \ BEQ setupDivstepLoop
+=->	LSL.0 R6,R14,R10
+	
+	LSR.0 R6,R4,R4
+	OR.0 R11,R4,R11
+	
+setupDivstepLoop:
+
+	// R[5:4] := R[9:8] (SI)
+	COPY.0 R8,R4 \ COPY.1 R9,R5
+	COPY.0 0,R6 \ COPY.1 R0,R8
+
+	// Store original value of loopCount for use after the loop.
+	// The Subtraction is handled in the tail of the loop iteration
+	// after this point.
+	SUB.0 R4,R10,R0 \ COPY.1 R8,R14
+	
+	// workingResult in R4,5,6
+	// temps in r0,1,2 and r7
+	// alignedDivisor in R10,11
+	// loopCount in r8
+	// r3, r9 scratch, used for renaming.
+	
+loopStart:	
+	// R0 := R4 - zeroExtend (R10) - only need 33-bits (i.e., 48-bits)
+	SUBB.0 R5,R11,R1 \ LSR.1 R0,15,R3
+	SUBB.0 R6,0,R2 \ LSR.1 R1,15,R6
+
+	// if (carry) goto shiftOnly
+	SUB.0 R8,1,R8 \ BNE shiftOnly
+=->	LSR.0 R4,15,R7 \ LSL.1 R1,1,R9
+	
+	OR.0 [LSL R0,1],1,R4 \ BNE loopStart
+=->	SUB.0 R4,R10,R0 \ OR.1 R9,R3,R5
+	
+	BRA loopEnd
+	
+shiftOnly:	
+
+	OR.0 [LSL R5,1],R7,R5 \ BNE loopStart \ LSR.1 R5,15,R6
+=->	SUB.0 [LSL R4,1],R10,R0 \LSL.1 R4,1,R4
+	
+// End of loop
+loopEnd:
+
+	// Schedule the computation of the upper word after shifting
+	// alongside the decision over whether to branch, and the register
+	// restores.
+	// R10 is filled with a useful constant.
+	SUB.0 15,r14,r15 \ LDL (FP)4,R[13:12]
+	SUB.1 0,R14,R1 // Don't set flags!
+	LSL.0 R6,R1,R3 \ LDL (FP)-4,R[9:8]
+
+	BLT remainderHasMoreThan16Bits \ LSR.0 R5,R14,R7 \ COPY.1 -1,R10
+=->	LSL.0 R5,R1,R2 \ OR.1 R7,R3,R3
+
+	LSR.0 R4,R14,R3 \ COPY.1 R3,R7
+	BRA epilogue \ LSR.0 -1,R1,R0 \ COPY.1 0,R5
+=->	OR.0 R3,R2,R6 \ AND.1 R0,R4,R4
+	
+remainderHasMoreThan16Bits:	
+
+	LSL.0 R10,R14,R1 \ COPY.1 R3,R6
+	XOR.0 R10,R1,R1 \ COPY.1 0,R7
+	AND.0 R1,R5,R5
+
+epilogue:
+	
+	JR (R12) \ LDW (FP)-4,R14
+=->	LDL (FP)-3,R[11:10]
+
+_picoMark_FUNCTION_END=
+	
+// picoChip Function Epilogue : udivmodsi4
+
+//============================================================================
+// All DWARF information between this marker, and the END OF DWARF
+// marker should be included in the source file. Search for
+// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and
+// provide the relevent information. Add markers called
+// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the
+// function in question.
+//============================================================================
+
+//============================================================================
+// Frame information. 
+//============================================================================
+
+.section .debug_frame
+_picoMark_DebugFrame=
+
+// Common CIE header.
+.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin
+_picoMark_CieBegin=
+.unalignedInitLong 0xffffffff
+.initByte 0x1	// CIE Version
+.ascii 16#0#	// CIE Augmentation
+.uleb128 0x1	// CIE Code Alignment Factor
+.sleb128 2	// CIE Data Alignment Factor
+.initByte 0xc	// CIE RA Column
+.initByte 0xc	// DW_CFA_def_cfa
+.uleb128 0xd
+.uleb128 0x0
+.align 2
+_picoMark_CieEnd=
+
+// FDE 
+_picoMark_LSFDE0I900821033007563=
+.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin
+_picoMark_FdeBegin=
+.unalignedInitLong _picoMark_DebugFrame	// FDE CIE offset
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// FDE initial location
+.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x18	// <-- FUNCTION_STACK_SIZE_GOES_HERE
+.initByte 0x4	// DW_CFA_advance_loc4
+.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN
+.initByte 0xe	// DW_CFA_def_cfa_offset
+.uleb128 0x0
+.align 2
+_picoMark_FdeEnd=
+
+//============================================================================
+// Abbrevation information.
+//============================================================================
+
+.section .debug_abbrev
+_picoMark_ABBREVIATIONS=
+
+.section .debug_abbrev
+	.uleb128 0x1	// (abbrev code)
+	.uleb128 0x11	// (TAG: DW_TAG_compile_unit)
+	.initByte 0x1	// DW_children_yes
+	.uleb128 0x10	// (DW_AT_stmt_list)
+	.uleb128 0x6	// (DW_FORM_data4)
+	.uleb128 0x12	// (DW_AT_high_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x11	// (DW_AT_low_pc)
+	.uleb128 0x1	// (DW_FORM_addr)
+	.uleb128 0x25	// (DW_AT_producer)
+	.uleb128 0x8	// (DW_FORM_string)
+	.uleb128 0x13	// (DW_AT_language)
+	.uleb128 0x5	// (DW_FORM_data2)
+	.uleb128 0x3	// (DW_AT_name)
+	.uleb128 0x8	// (DW_FORM_string)
+.initByte 0x0
+.initByte 0x0
+
+	.uleb128 0x2	;# (abbrev code)
+	.uleb128 0x2e	;# (TAG: DW_TAG_subprogram)
+.initByte 0x0	;# DW_children_no
+	.uleb128 0x3	;# (DW_AT_name)
+	.uleb128 0x8	;# (DW_FORM_string)
+	.uleb128 0x11	;# (DW_AT_low_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+	.uleb128 0x12	;# (DW_AT_high_pc)
+	.uleb128 0x1	;# (DW_FORM_addr)
+.initByte 0x0
+.initByte 0x0
+
+.initByte 0x0
+
+//============================================================================
+// Line information. DwarfLib requires this to be present, but it can
+// be empty.
+//============================================================================
+
+.section .debug_line
+_picoMark_LINES=
+
+//============================================================================
+// Debug Information
+//============================================================================
+.section .debug_info
+
+//Fixed header.
+.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN
+_picoMark_DEBUG_INFO_BEGIN=
+.unalignedInitWord 0x2
+.unalignedInitLong _picoMark_ABBREVIATIONS
+.initByte 0x2
+
+// Compile unit information.
+.uleb128 0x1	// (DIE 0xb) DW_TAG_compile_unit)
+.unalignedInitLong _picoMark_LINES
+.unalignedInitWord _picoMark_FUNCTION_END
+.unalignedInitWord _picoMark_FUNCTION_BEGIN
+// Producer is `picoChip'
+.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00#
+.unalignedInitWord 0xcafe // ASM language
+.ascii 16#0# // Name. DwarfLib expects this to be present.
+
+.uleb128 0x2	;# (DIE DW_TAG_subprogram)
+
+// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex
+// digit is specified using the format 16#XX#
+.ascii 16#5f# 16#75# 16#64# 16#69# 16#76# 16#6d# 16#6f# 16#64# 16#73# 16#69# 16#34# 16#0# // Function name `_udivmodsi4'
+.unalignedInitWord _picoMark_FUNCTION_BEGIN	// DW_AT_low_pc
+.unalignedInitWord _picoMark_FUNCTION_END	// DW_AT_high_pc
+
+.initByte 0x0	// end of compile unit children.
+
+_picoMark_DEBUG_INFO_END=
+
+//============================================================================
+// END OF DWARF
+//============================================================================
+.section .endFile
+// End of picoChip ASM file
diff --git a/gcc/config/picochip/picochip-protos.h b/gcc/config/picochip/picochip-protos.h
new file mode 100644
index 000000000..965098eec
--- /dev/null
+++ b/gcc/config/picochip/picochip-protos.h
@@ -0,0 +1,128 @@
+/* Prototypes for exported functions defined in picochip.c
+
+   Copyright (C) 2000, 2001, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Picochip Ltd. (http://www.picochip.com)
+   Maintained by Daniel Towner (daniel.towner@picochip.com) and
+   Hariharan Sandanagobalane (hariharan@picochip.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not, see
+   <http://www.gnu.org/licenses/>. */
+
+extern void picochip_function_prologue (FILE *, HOST_WIDE_INT);
+extern void picochip_function_epilogue (FILE *, HOST_WIDE_INT);
+
+extern enum reg_class picochip_reg_class_from_letter (unsigned);
+extern int picochip_const_ok_for_letter_p (unsigned HOST_WIDE_INT value, unsigned c);
+
+#ifdef RTX_CODE			/* inside TREE_CODE */
+
+extern int picochip_reg_mode_ok_for_base_p (int mode, rtx x, unsigned strict);
+extern void picochip_print_operand (FILE * file, rtx op, int letter);
+extern void picochip_print_operand_address (FILE * file, rtx operand);
+
+extern const char *picochip_output_cbranch (rtx operands[]);
+extern const char *picochip_output_branch (rtx operands[], rtx insn);
+extern const char *picochip_output_compare (rtx operands[]);
+extern const char *picochip_output_jump (rtx insn);
+
+extern const char *picochip_output_put_array (int alternative,
+					      rtx operands[]);
+extern const char *picochip_output_get_array (int alternative,
+					      rtx operands[]);
+extern const char *picochip_output_testport_array (int alternative,
+						   rtx operands[]);
+
+extern int picochip_expand_movmemhi (rtx *operands);
+
+extern rtx gen_SImode_mem(rtx opnd1,rtx opnd2);
+extern bool ok_to_peephole_stw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3);
+extern bool ok_to_peephole_ldw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3);
+
+extern rtx gen_min_reg(rtx opnd1,rtx opnd2);
+
+extern int picochip_regno_nregs (int regno, int mode);
+extern int picochip_class_max_nregs (int klass, int mode);
+
+extern void picochip_order_regs_for_local_alloc (void);
+
+extern int picochip_word_aligned_memory_reference (rtx operand);
+extern int picochip_alignable_memory_operand (rtx operand, enum machine_mode mode);
+extern int picochip_absolute_memory_operand (rtx op, enum machine_mode mode);
+
+extern rtx picochip_function_value (const_tree valtype, const_tree func, bool outgoing);
+extern int picochip_symbol_offset (rtx operand);
+
+extern void picochip_get_hi_aligned_mem (rtx ref, rtx * paligned_mem, rtx * pbitnum);
+
+extern rtx picochip_get_low_const (rtx value);
+extern rtx picochip_get_high_const (rtx value);
+
+extern void picochip_expand_prologue (void);
+extern void picochip_expand_epilogue (int is_sibling_call);
+
+extern void picochip_final_prescan_insn (rtx insn, rtx * operand, int num_operands);
+extern const char *picochip_asm_output_opcode (FILE * f, const char *ptr);
+
+extern int picochip_check_conditional_copy (rtx * operands);
+
+extern rtx picochip_return_addr_rtx(int count, rtx frameaddr);
+extern rtx picochip_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED,
+                              int incoming ATTRIBUTE_UNUSED);
+
+#endif /* RTX_CODE inside TREE_CODE */
+
+extern int picochip_legitimize_reload_address (rtx *x, enum machine_mode mode,
+                                        int opnum, int type, int ind_levels);
+
+
+void picochip_output_ascii (FILE * file, const char *str, int length);
+
+extern int picochip_hard_regno_mode_ok (int regno, enum machine_mode mode);
+extern void picochip_generate_internal_label (char *str, const char *prefix,
+					      long num);
+
+extern bool picochip_return_in_memory(const_tree type,
+                                      const_tree fntype ATTRIBUTE_UNUSED);
+
+extern int initial_elimination_offset (int from, int to);
+
+extern void picochip_output_aligned_common (FILE * stream, const char *name,
+					    unsigned size, unsigned align);
+
+extern void picochip_output_global (FILE * stream, const char *name);
+
+extern void picochip_output_aligned_local (FILE * stream, const char *name,
+					   unsigned size, unsigned alignment);
+
+extern void picochip_output_label (FILE * stream, const char name[]);
+extern void picochip_output_labelref (FILE * stream, const char name[]);
+extern void picochip_weaken_label (FILE * stream, const char name[]);
+extern void picochip_output_internal_label (FILE * stream, const char *prefix,
+				     unsigned long num);
+
+extern void warn_of_byte_access (void);
+
+/* True if VLIW scheduling is enabled (i.e., second scheduling pass). */
+extern int picochip_flag_schedule_insns2;
+
+extern void picochip_asm_output_anchor (rtx symbol);
+
+/* Instruction set capability flags.  These are initialised to the
+   appropriate values by picochip_option_override, once the user has
+   selected a CPU type. */
+extern bool picochip_has_mul_unit;
+extern bool picochip_has_mac_unit;
+
diff --git a/gcc/config/picochip/picochip.c b/gcc/config/picochip/picochip.c
new file mode 100644
index 000000000..dd4e34e82
--- /dev/null
+++ b/gcc/config/picochip/picochip.c
@@ -0,0 +1,4703 @@
+/* Subroutines used for code generation on picoChip processors.
+   Copyright (C) 2001, 2008, 2009, 2010   Free Software Foundation, Inc.
+   Contributed by Picochip Ltd. (http://www.picochip.com)
+   Maintained by Daniel Towner (daniel.towner@picochip.com) and
+   Hariharan Sandanagobalane (hariharan@picochip.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "output.h"
+#include "basic-block.h"
+#include "integrate.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "params.h"
+
+#include "picochip-protos.h"
+
+#include "insn-attr.h"		/* For DFA state_t. */
+#include "insn-config.h"	/* Required by recog.h */
+#include "insn-codes.h"		/* For CODE_FOR_? */
+#include "optabs.h"		/* For GEN_FCN */
+#include "basic-block.h"	/* UPDATE_LIFE_GLOBAL* for picochip_reorg. */
+#include "timevar.h"		/* For TV_SCHED2, in picochip_reorg. */
+#include "libfuncs.h"		/* For memcpy_libfuncs, etc. */
+#include "df.h"			/* For df_regs_ever_live_df_regs_ever_live_pp, etc. */
+
+
+/* Target AE ISA information. */
+enum picochip_dfa_type picochip_schedule_type;
+
+bool picochip_has_mul_unit = false;
+bool picochip_has_mac_unit = false;
+
+/* targetm hook function prototypes. */
+
+void picochip_asm_file_start (void);
+void picochip_asm_file_end (void);
+
+void picochip_init_libfuncs (void);
+void picochip_reorg (void);
+
+int picochip_arg_partial_bytes (CUMULATIVE_ARGS * p_cum,
+				       enum machine_mode mode,
+				       tree type, bool named);
+rtx picochip_function_arg (CUMULATIVE_ARGS * p_cum,
+			   enum machine_mode mode,
+			   const_tree type, bool named);
+rtx picochip_incoming_function_arg (CUMULATIVE_ARGS * p_cum,
+				    enum machine_mode mode,
+				    const_tree type, bool named);
+void picochip_arg_advance (CUMULATIVE_ARGS * p_cum, enum machine_mode mode,
+			   const_tree type, bool named);
+unsigned int picochip_function_arg_boundary (enum machine_mode mode,
+					     const_tree type);
+
+int picochip_sched_lookahead (void);
+int picochip_sched_issue_rate (void);
+int picochip_sched_adjust_cost (rtx insn, rtx link,
+				       rtx dep_insn, int cost);
+int picochip_sched_reorder (FILE * file, int verbose, rtx * ready,
+				   int *n_readyp, int clock);
+
+void picochip_init_builtins (void);
+rtx picochip_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+bool picochip_rtx_costs (rtx x, int code, int outer_code, int* total, bool speed);
+bool picochip_return_in_memory(const_tree type,
+                              const_tree fntype ATTRIBUTE_UNUSED);
+bool picochip_legitimate_address_p (enum machine_mode, rtx, bool);
+rtx picochip_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+                             enum machine_mode mode);
+int picochip_legitimize_reload_address (rtx *x, enum machine_mode mode,
+                                        int opnum, int type, int ind_levels);
+
+rtx picochip_struct_value_rtx(tree fntype ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED);
+rtx picochip_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+                         bool outgoing ATTRIBUTE_UNUSED);
+static reg_class_t
+picochip_secondary_reload (bool in_p,
+			   rtx x ATTRIBUTE_UNUSED,
+			   reg_class_t cla ATTRIBUTE_UNUSED,
+			   enum machine_mode mode,
+			   secondary_reload_info *sri);
+void
+picochip_asm_named_section (const char *name,
+			    unsigned int flags ATTRIBUTE_UNUSED,
+			    tree decl ATTRIBUTE_UNUSED);
+
+static rtx picochip_static_chain (const_tree, bool);
+
+static void picochip_option_override (void);
+
+/* Lookup table mapping a register number to the earliest containing
+   class.  Used by REGNO_REG_CLASS.  */
+const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  TWIN_REGS, TWIN_REGS, TWIN_REGS, TWIN_REGS,
+  TWIN_REGS, TWIN_REGS, TWIN_REGS, TWIN_REGS,
+  TWIN_REGS, TWIN_REGS, TWIN_REGS, TWIN_REGS,
+  GR_REGS, FRAME_REGS, PTR_REGS, CONST_REGS,
+  ACC_REGS, CC_REGS, GR_REGS, GR_REGS
+};
+
+/* picoChip register names. */
+const char *picochip_regnames[] = REGISTER_NAMES;
+
+/* Define the maximum number of registers which may be used to pass
+ * parameters to functions. */
+#define MAX_CALL_PARAMETER_REGS 6
+
+
+/* Target scheduling information. */
+
+/* This flag indicates whether the next instruction to be output is a
+   VLIW continuation instruction.  It is used to communicate between
+   final_prescan_insn and asm_output_opcode. */
+static int picochip_vliw_continuation = 0;
+
+/* This variable is used to communicate the current instruction
+   between final_prescan_insn and functions such as asm_output_opcode,
+   and picochip_get_vliw_alu_id (which are otherwise unable to determine the
+   current instruction. */
+static rtx picochip_current_prescan_insn;
+
+static bool picochip_is_delay_slot_pending = 0;
+
+/* When final_prescan_insn is called, it computes information about
+   the current VLIW packet, and stores it in this structure. When
+   instructions are output, this state is used to make sure that the
+   instructions are output in the correct way (e.g., which ALU to use,
+   whether a macro branch was ever previously a real branch, etc.). */
+struct vliw_state
+{
+  int contains_pico_alu_insn;
+  int contains_non_cc_alu_insn;
+  int num_alu_insns_so_far;
+
+  /* Record how many instructions are contained in the packet. */
+  int num_insns_in_packet;
+
+  /* There was a case for this to be more than 1 */
+  int num_cfi_labels_deferred;
+  char cfi_label_name[2][256];	/* Used to record the name of a CFI label
+				   emitted inside a VLIW packet. */
+  char lm_label_name[256];	/* Used to record the name of an LM label. */
+};
+
+struct vliw_state picochip_current_vliw_state;
+
+/* Save/restore recog_data. */
+static int picochip_saved_which_alternative;
+static struct recog_data picochip_saved_recog_data;
+
+/* Determine which ALU to use for the instruction in
+   picochip_current_prescan_insn. */
+static char picochip_get_vliw_alu_id (void);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options picochip_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE picochip_function_prologue
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE picochip_function_epilogue
+
+#undef TARGET_ASM_INTERNAL_LABEL
+#define TARGET_ASM_INTERNAL_LABEL picochip_output_internal_label
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL picochip_output_global
+
+#undef TARGET_ASM_BYTE_OP
+#define TARGET_ASM_BYTE_OP ".initByte "
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP  ".initWord "
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP  ".unalignedInitWord "
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ".initLong "
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP ".unalignedInitLong "
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS picochip_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN picochip_expand_builtin
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS picochip_rtx_costs
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE picochip_sched_issue_rate
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER picochip_sched_reorder
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  picochip_sched_lookahead
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST picochip_sched_adjust_cost
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION picochip_asm_named_section
+
+#undef TARGET_HAVE_NAMED_SECTIONS
+#define TARGET_HAVE_NAMED_SECTIONS 1
+
+#undef TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS 1
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS picochip_init_libfuncs
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START picochip_asm_file_start
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END picochip_asm_file_end
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG picochip_reorg
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES picochip_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG picochip_function_arg
+
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG picochip_incoming_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE picochip_arg_advance
+
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY picochip_function_arg_boundary
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+/* Target support for Anchored Addresses optimization */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET 0
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 7
+#undef TARGET_ASM_OUTPUT_ANCHOR
+#define TARGET_ASM_OUTPUT_ANCHOR  picochip_asm_output_anchor
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE picochip_function_value
+/*
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE picochip_libgcc_cmp_return_mode
+*/
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P picochip_legitimate_address_p
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS picochip_legitimize_address
+
+/* Loading and storing QImode values to and from memory
+   usually requires a scratch register. */
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD picochip_secondary_reload
+#undef DONT_USE_BUILTIN_SETJMP
+#define DONT_USE_BUILTIN_SETJMP 1
+
+/* How Large Values are Returned  */
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY picochip_return_in_memory
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN picochip_static_chain
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE picochip_option_override
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE picochip_option_override
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE picochip_option_optimization_table
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* The 2nd scheduling pass option is switched off, and a machine
+   dependent reorganisation ensures that it is run later on, after the
+   second jump optimisation.  */
+#undef TARGET_DELAY_SCHED2
+#define TARGET_DELAY_SCHED2 true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Only return a value in memory if it is greater than 4 bytes.
+   int_size_in_bytes returns -1 for variable size objects, which go in
+   memory always.  The cast to unsigned makes -1 > 8.  */
+
+bool
+picochip_return_in_memory(const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 4);
+}
+
+/* Allow some options to be overriden. */
+
+static void
+picochip_option_override (void)
+{
+  /* If we are optimizing for stack, dont let inliner to inline functions
+     that could potentially increase stack size.*/
+   if (flag_conserve_stack)
+     {
+       maybe_set_param_value (PARAM_LARGE_STACK_FRAME, 0,
+			      global_options.x_param_values,
+			      global_options_set.x_param_values);
+       maybe_set_param_value (PARAM_STACK_FRAME_GROWTH, 0,
+			      global_options.x_param_values,
+			      global_options_set.x_param_values);
+     }
+
+  /* Turn off the elimination of unused types. The elaborator
+     generates various interesting types to represent constants,
+     generics, and so on, and it is useful to retain this information
+     in the debug output. The increased size of the debug information
+     is not really an issue for us. */
+  flag_eliminate_unused_debug_types = 0;
+
+  /* Even if the user specifies a -fno-omit-frame-pointer on the
+     command line, we still want to go ahead and omit frame pointer
+     usages, since we dont really have a frame pointer register.
+     So, all accesses to FP need to be converted to accesses off
+     stack pointer.*/
+  flag_omit_frame_pointer = 1;
+
+  /* Turning on anchored addresses by default. This is an optimization
+     that could decrease the code size by placing anchors in data and
+     accessing offsets from the anchor for file local data variables.*/
+  if (optimize >= 1)
+    flag_section_anchors = 1;
+
+  /* The second scheduling pass runs within picochip_reorg, to avoid
+     having the second jump optimisation trash the instruction modes
+     (e.g., instructions are changed to TImode to mark the beginning
+     of cycles).  Two types of DFA scheduling are possible: space and
+     speed.  In both cases, instructions are reordered to avoid stalls
+     (e.g., memory loads stall for one cycle).  Speed scheduling will
+     also enable VLIW instruction packing.  VLIW instructions use more
+     code space, so VLIW scheduling is disabled when scheduling for
+     size.  */
+  if (flag_schedule_insns_after_reload)
+    {
+      if (optimize_size)
+	picochip_schedule_type = DFA_TYPE_SPACE;
+      else
+	{
+	  picochip_schedule_type = DFA_TYPE_SPEED;
+	  flag_delayed_branch = 0;
+	}
+    }
+  else
+    picochip_schedule_type = DFA_TYPE_NONE;
+
+  /* Ensure that the debug level is always at least -g2. The flow
+     analyser works at its best if it always has debug
+     information. DWARF is non-intrusive, so it makes no difference to
+     code quality if debug is always enabled. */
+  if (debug_info_level < DINFO_LEVEL_NORMAL)
+  {
+    debug_info_level = DINFO_LEVEL_NORMAL;
+    write_symbols = DWARF2_DEBUG;
+  }
+
+  /* Options of the form -mae=mac, and so on will be substituted by
+     the compiler driver for the appropriate byte access and multiply
+     unit ISA options. Any unrecognised AE types will end up being
+     passed to the compiler, which should reject them as invalid. */
+  if (picochip_ae_type_string != NULL)
+    error ("invalid AE type specified (%s)", picochip_ae_type_string);
+
+  /* Override any specific capabilities of the instruction set. These
+     take precedence over any capabilities inferred from the AE type,
+     regardless of where the options appear on the command line. */
+  if (picochip_mul_type_string == NULL)
+    {
+      /* Default to MEM-type multiply, for historical compatibility. */
+      picochip_has_mac_unit = false;
+      picochip_has_mul_unit = true;
+    }
+  else
+    {
+      picochip_has_mac_unit = false;
+      picochip_has_mul_unit = false;
+
+      if (strcmp (picochip_mul_type_string, "mul") == 0)
+	picochip_has_mul_unit = true;
+      else if (strcmp (picochip_mul_type_string, "mac") == 0)
+	picochip_has_mac_unit = true;
+      else if (strcmp (picochip_mul_type_string, "none") == 0)
+	{ /* Do nothing. Unit types already set to false. */ }
+      else
+	error ("invalid mul type specified (%s) - expected mac, mul or none",
+	       picochip_mul_type_string);
+    }
+}
+
+
+/* Initialise the library functions to handle arithmetic on some of
+   the larger modes. */
+void
+picochip_init_libfuncs (void)
+{
+  /* 64-bit shifts */
+  set_optab_libfunc (ashr_optab, DImode, "__ashrdi3");
+  set_optab_libfunc (ashl_optab, DImode, "__ashldi3");
+  set_optab_libfunc (lshr_optab, DImode, "__lshrdi3");
+
+  /* 64-bit signed multiplication. */
+  set_optab_libfunc (smul_optab, DImode, "__muldi3");
+
+  /* Signed division */
+  set_optab_libfunc (sdiv_optab, HImode, "__divhi3");
+  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
+
+  /* Signed modulus */
+  set_optab_libfunc (smod_optab, HImode, "__modhi3");
+  set_optab_libfunc (smod_optab, DImode, "__moddi3");
+
+  /* 32-bit count leading Zeros*/
+  set_optab_libfunc (clz_optab, SImode, "_clzsi2");
+
+  /* 64-bit comparison */
+  set_optab_libfunc (ucmp_optab, DImode, "__ucmpdi2");
+  set_optab_libfunc (cmp_optab, DImode, "__cmpdi2");
+
+  /* 64-bit addition and subtraction*/
+  set_optab_libfunc (add_optab, DImode, "_adddi3");
+  set_optab_libfunc (sub_optab, DImode, "_subdi3");
+}
+
+/* Memcpy function */
+int
+picochip_expand_movmemhi (rtx *operands)
+{
+  rtx src_addr_reg, dst_addr_reg, count_reg, src_mem, dst_mem, tmp_reg;
+  rtx start_label;
+  int align, size;
+  src_addr_reg = gen_reg_rtx(HImode);
+  dst_addr_reg = gen_reg_rtx(HImode);
+  count_reg = gen_reg_rtx(HImode);
+  emit_insn (gen_movhi (count_reg, operands[2]));
+  emit_insn (gen_movqi (src_addr_reg, XEXP(operands[1], 0)));
+  emit_insn (gen_movqi (dst_addr_reg, XEXP(operands[0], 0)));
+  gcc_assert (GET_CODE(count_reg) == REG);
+  start_label = gen_label_rtx ();
+  emit_label (start_label);
+
+  /* We can specialise the code for different alignments */
+  align = INTVAL(operands[3]);
+  size = INTVAL(operands[2]);
+  gcc_assert(align >= 0 && size >= 0);
+  if (size != 0)
+    {
+      if (size % 4 == 0 && align % 4 == 0)
+        {
+          src_mem = gen_rtx_MEM(SImode, src_addr_reg);
+          dst_mem = gen_rtx_MEM(SImode, dst_addr_reg);
+          tmp_reg = gen_reg_rtx(SImode);
+          emit_insn (gen_movsi (tmp_reg, src_mem));
+          emit_insn (gen_movsi (dst_mem, tmp_reg));
+          emit_insn (gen_addhi3 (dst_addr_reg, dst_addr_reg, GEN_INT(4)));
+          emit_insn (gen_addhi3 (src_addr_reg, src_addr_reg, GEN_INT(4)));
+          emit_insn (gen_addhi3 (count_reg, count_reg, GEN_INT(-4)));
+          /* The sub instruction above generates cc, but we cannot just emit the branch.*/
+          emit_cmp_and_jump_insns (count_reg, const0_rtx, GT, 0, HImode, 0, start_label);
+        }
+      else if (size % 2 == 0 && align % 2 == 0)
+        {
+          src_mem = gen_rtx_MEM(HImode, src_addr_reg);
+          dst_mem = gen_rtx_MEM(HImode, dst_addr_reg);
+          tmp_reg = gen_reg_rtx(HImode);
+          emit_insn (gen_movhi (tmp_reg, src_mem));
+          emit_insn (gen_movhi (dst_mem, tmp_reg));
+          emit_insn (gen_addhi3 (dst_addr_reg, dst_addr_reg, const2_rtx));
+          emit_insn (gen_addhi3 (src_addr_reg, src_addr_reg, const2_rtx));
+          emit_insn (gen_addhi3 (count_reg, count_reg, GEN_INT(-2)));
+          /* The sub instruction above generates cc, but we cannot just emit the branch.*/
+          emit_cmp_and_jump_insns (count_reg, const0_rtx, GT, 0, HImode, 0, start_label);
+        }
+      else
+        {
+          src_mem = gen_rtx_MEM(QImode, src_addr_reg);
+          dst_mem = gen_rtx_MEM(QImode, dst_addr_reg);
+          tmp_reg = gen_reg_rtx(QImode);
+          emit_insn (gen_movqi (tmp_reg, src_mem));
+          emit_insn (gen_movqi (dst_mem, tmp_reg));
+          emit_insn (gen_addhi3 (dst_addr_reg, dst_addr_reg, const1_rtx));
+          emit_insn (gen_addhi3 (src_addr_reg, src_addr_reg, const1_rtx));
+          emit_insn (gen_addhi3 (count_reg, count_reg, GEN_INT(-1)));
+          /* The sub instruction above generates cc, but we cannot just emit the branch.*/
+          emit_cmp_and_jump_insns (count_reg, const0_rtx, GT, 0, HImode, 0, start_label);
+        }
+    }
+  return 1;
+}
+
+
+/* Return the register class for letter C.  */
+enum reg_class
+picochip_reg_class_from_letter (unsigned c)
+{
+  switch (c)
+    {
+    case 'k':
+      return FRAME_REGS;
+    case 'f':
+      return PTR_REGS;
+    case 't':
+      return TWIN_REGS;
+    case 'r':
+      return GR_REGS;
+    default:
+      return NO_REGS;
+    }
+}
+
+static const int
+pico_leaf_reg_alloc_order[] = LEAF_REG_ALLOC_ORDER;
+static const int
+pico_nonleaf_reg_alloc_order[] = REG_ALLOC_ORDER;
+
+void
+picochip_order_regs_for_local_alloc (void)
+{
+  /* We change the order for leaf functions alone. We put r12 at
+     the end since using it will prevent us to combine stw/ldws to
+     stl/ldl and it gives no benefit. In non-leaf functions, we
+     would anyway saveup/restore r12, so it makes sense to use it.*/
+
+  if (leaf_function_p())
+  {
+    memcpy ((char *)reg_alloc_order, (const char *) pico_leaf_reg_alloc_order,
+            FIRST_PSEUDO_REGISTER * sizeof (int));
+  }
+  else
+  {
+    memcpy ((char *)reg_alloc_order, (const char *) pico_nonleaf_reg_alloc_order,
+            FIRST_PSEUDO_REGISTER * sizeof (int));
+  }
+}
+
+/* Check that VALUE (an INT_CST) is ok as a constant of type C.  */
+int
+picochip_const_ok_for_letter_p (unsigned HOST_WIDE_INT value, unsigned c)
+{
+
+  switch (c)
+    {
+    case 'I':			/* 4 bits signed.  */
+      return value + 8 < 16;
+    case 'J':			/* 4 bits unsigned.  */
+      return value < 16;
+    case 'K':			/* 8 bits signed.  */
+      return value + 128 < 256;
+    case 'M':			/* 4-bit magnitude. */
+      return abs (value) < 16;
+    case 'N':			/* 10 bits signed.  */
+      return value + 512 > 1024;
+    case 'O':			/* 16 bits signed. */
+      return value + 32768 < 65536;
+    default:			/* Unknown letter. */
+      return 0;
+    }
+}
+
+/* Stack utility functions. */
+rtx
+picochip_return_addr_rtx(int count, rtx frameaddr ATTRIBUTE_UNUSED)
+{
+   if (count==0)
+     return gen_rtx_REG (Pmode, LINK_REGNUM);
+   else
+     return NULL_RTX;
+}
+
+
+/* Emit a set of parallel register expressions used to store
+   blockmode values to pass to functions. */
+static rtx
+picochip_emit_register_parallel (int size_in_units, int offset)
+{
+  int num_regs = 0;
+  rtx result;
+  rtx vector[MAX_CALL_PARAMETER_REGS];
+  int base_reg = 0;
+  int i = 0;
+
+  /* Compute the base register, and number of required registers. */
+  base_reg = offset / 2;
+  num_regs = size_in_units / 2;
+  if (size_in_units % 2 == 1)
+    num_regs++;
+
+  /* Emit a register for each part of the block mode value to be
+     passed in a register. */
+  for (i = 0; i < num_regs; i++)
+    vector[i] = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (HImode, base_reg + i),
+				   GEN_INT (i * 2));
+  result = gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (num_regs, vector));
+
+  return result;
+
+}
+
+/* Emit an instruction to allocate a suitable amount of space on the
+   stack, by decrementing the stack pointer. */
+static void
+picochip_emit_stack_allocate (int adjustment)
+{
+  rtx insn;
+  rtx stack_pointer_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+
+  /* Use an addition of a negative value. */
+  insn = emit_insn (gen_addhi3 (stack_pointer_reg, stack_pointer_reg,
+				GEN_INT (-adjustment)));
+
+  /* Make the instruction frame related.  Also add an expression note,
+     so that the correct Dwarf information is generated (see documention
+     for RTX_FRAME_RELATED_P for more details). */
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		gen_rtx_SET (VOIDmode, stack_pointer_reg,
+			     gen_rtx_PLUS (Pmode, stack_pointer_reg,
+					   GEN_INT (-adjustment))));
+
+}
+
+/* Emit an instruction to save a register of the given mode.  The
+   offset at which to save the register is given relative to the stack
+   pointer. */
+static void
+picochip_emit_save_register (rtx reg, int offset)
+{
+  rtx stack_pointer, address, mem, insn;
+
+  stack_pointer = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+
+  address = gen_rtx_PLUS (Pmode, stack_pointer, GEN_INT (offset));
+
+  mem = gen_rtx_MEM (GET_MODE (reg), address);
+
+  insn = emit_move_insn (mem, reg);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  /* For modes other than HImode, create a note explaining that
+     multiple registers have been saved.  This allows the correct DWARF
+     call frame information to be generated. */
+  switch (GET_MODE (reg))
+    {
+    case HImode:
+      /* The RTL is sufficient to explain HImode register saves. */
+      break;
+
+    case SImode:
+      /* SImode must be broken down into parallel HImode register saves. */
+      {
+	rtvec p;
+	p = rtvec_alloc (2);
+
+	RTVEC_ELT (p, 0) =
+	  gen_rtx_SET (HImode,
+		       gen_rtx_MEM (HImode,
+				    gen_rtx_PLUS (Pmode, stack_pointer,
+						  GEN_INT (offset))),
+		       gen_rtx_REG (HImode, REGNO (reg)));
+	RTX_FRAME_RELATED_P (RTVEC_ELT (p, 0)) = 1;
+
+	RTVEC_ELT (p, 1) =
+	  gen_rtx_SET (HImode, gen_rtx_MEM (HImode,
+					    gen_rtx_PLUS (Pmode,
+							  stack_pointer,
+							  GEN_INT (offset +
+								   2))),
+		       gen_rtx_REG (HImode, REGNO (reg) + 1));
+	RTX_FRAME_RELATED_P (RTVEC_ELT (p, 1)) = 1;
+
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_PARALLEL (VOIDmode, p));
+
+      }
+      break;
+
+    default:
+      internal_error
+	("unexpected mode %s encountered in picochip_emit_save_register",
+	 GET_MODE_NAME (GET_MODE (reg)));
+    }
+
+}
+
+/* Emit an instruction to restore a register of the given mode.  The
+   offset from which to restore the register is given relative to the
+   stack pointer. */
+static void
+picochip_emit_restore_register (rtx reg, int offset)
+{
+  rtx stack_pointer, address, mem;
+
+  stack_pointer = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+
+  address = gen_rtx_PLUS (Pmode, stack_pointer, GEN_INT (offset));
+
+  mem = gen_rtx_MEM (GET_MODE (reg), address);
+
+  emit_move_insn (reg, mem);
+
+}
+
+/* Check that the given byte offset is aligned to the given number of
+   bits. */
+static int
+picochip_is_aligned (int byte_offset, int bit_alignment)
+{
+  int byte_alignment = bit_alignment / BITS_PER_UNIT;
+  return (byte_offset % byte_alignment) == 0;
+}
+
+/*****************************************************************************
+ * Stack layout.
+ *
+ * The following section contains code which controls how the stack is
+ * laid out.
+ *
+ * The stack is laid out as follows (high addresses first):
+ *
+ *   Incoming arguments
+ *   Pretend arguments            (ARG PTR)
+ *   Special registers
+ *   General registers
+ *   Frame                         (FP)
+ *   Outgoing arguments            (SP)
+ *
+ * The (constant) offsets of the different areas must be calculated
+ * relative to the stack area immediately below, and aligned
+ * appropriately. For example, the frame offset is computed by
+ * determining the offset of the special register area, adding the
+ * size of the special register area, and then aligning the resulting
+ * offset correctly. In turn, the special register offset is computed
+ * from the general register offset, and so on. This enables the
+ * different offsets to change size and alignment, without requiring
+ * the code for other offset calculations to be rewritten.
+ *
+ * The argument pointer, and the frame pointer are eliminated wherever
+ * possible, by replacing them with a constant offset from the stack
+ * pointer. In the rare cases where constant offsets from the stack
+ * pointer cannot be computed, another register will be allocated to
+ * serve as the argument pointer, or the frame pointer.
+ *
+ * The save registers are stored at small offsets from the caller, to
+ * enable the more efficient SP-based ISA instructions to be used.
+ *
+ ****************************************************************************/
+
+/* Compute the size of an argument in units. */
+static int
+picochip_compute_arg_size (const_tree type, enum machine_mode mode)
+{
+  int type_size_in_units = 0;
+
+  if (type)
+    type_size_in_units = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+  else
+    type_size_in_units = GET_MODE_SIZE (mode);
+
+  return type_size_in_units;
+
+}
+
+/* Determine where the next outgoing arg should be placed. */
+rtx
+picochip_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int reg = 0;
+  int type_align_in_units = 0;
+  int type_size_in_units;
+  int new_offset = 0;
+  int offset_overflow = 0;
+
+  /* VOIDmode is passed when computing the second argument to a `call'
+     pattern. This can be ignored. */
+  if (mode == VOIDmode)
+    return 0;
+
+  /* Compute the alignment and size of the parameter. */
+  type_align_in_units =
+    picochip_function_arg_boundary (mode, type) / BITS_PER_UNIT;
+  type_size_in_units = picochip_compute_arg_size (type, mode);
+
+  /* Compute the correct offset (i.e., ensure that the offset meets
+     the alignment requirements). */
+  offset_overflow = *cum % type_align_in_units;
+  if (offset_overflow == 0)
+    new_offset = *cum;
+  else
+    new_offset = (*cum - offset_overflow) + type_align_in_units;
+
+  if (TARGET_DEBUG)
+    {
+      printf ("Function arg:\n");
+      printf ("  Type valid: %s\n", (type ? "yes" : "no"));
+      printf ("  Cumulative Value: %d\n", *cum);
+      printf ("  Mode: %s\n", GET_MODE_NAME (mode));
+      printf ("  Type size: %i units\n", type_size_in_units);
+      printf ("  Alignment: %i units\n", type_align_in_units);
+      printf ("  New offset: %i\n", new_offset);
+      printf ("\n");
+    }
+
+  /* If the new offset is outside the register space, return. */
+  if (new_offset >= MAX_CALL_PARAMETER_REGS * 2)
+    return 0;
+
+  /* If the end of the argument is outside the register space, then
+     the argument must overlap the register space. Return the first
+     available register. */
+  if ((new_offset + type_size_in_units) > (MAX_CALL_PARAMETER_REGS * 2))
+    return gen_rtx_REG (HImode, new_offset / 2);
+
+  /* Create a register of the required mode to hold the parameter. */
+  reg = new_offset / 2;
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case SFmode:
+    case DImode:
+    case DFmode:
+    case SDmode:
+    case DDmode:
+    case CHImode:
+    case CSImode:
+    case SCmode:
+    case CQImode:
+      return gen_rtx_REG (mode, reg);
+
+    case BLKmode:
+      {
+	/* Empty blockmode values can be passed as arguments (e.g.,
+	 * empty structs). These require no registers
+	 * whatsoever. Non-empty blockmode values are passed in a set
+	 * of parallel registers. */
+	if (type_size_in_units == 0)
+	  return 0;
+	else
+	  return picochip_emit_register_parallel (type_size_in_units, new_offset);
+      }
+
+    default:
+      warning
+	(0, "defaulting to stack for %s register creation",
+	 GET_MODE_NAME (mode));
+      break;
+    }
+
+  return 0;
+
+}
+
+/* Determine where the next incoming function argument will
+   appear. Normally, this works in exactly the same way as
+   picochip_function_arg, except when the function in question is a
+   varadic function. In this case, the incoming arguments all appear
+   to be passed on the stack (actually, some of the arguments are
+   passed in registers, which are then pushed onto the stack by the
+   function prologue). */
+rtx
+picochip_incoming_function_arg (CUMULATIVE_ARGS *cum,
+				enum machine_mode mode,
+				const_tree type, bool named)
+{
+
+  if (cfun->stdarg)
+    return 0;
+  else
+    return picochip_function_arg (cum, mode, type, named);
+
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+   specified mode.  */
+unsigned int
+picochip_function_arg_boundary (enum machine_mode mode,
+				const_tree type ATTRIBUTE_UNUSED)
+{
+  int align;
+
+  if (mode == BLKmode)
+    align = STACK_BOUNDARY;
+  else
+    align = GET_MODE_ALIGNMENT (mode);
+
+  if (align < PARM_BOUNDARY)
+    align = PARM_BOUNDARY;
+
+  return align;
+
+}
+
+/* Compute partial registers. */
+int
+picochip_arg_partial_bytes (CUMULATIVE_ARGS * p_cum, enum machine_mode mode,
+			    tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int type_align_in_units = 0;
+  int type_size_in_units;
+  int new_offset = 0;
+  int offset_overflow = 0;
+
+  unsigned cum = *((unsigned *) p_cum);
+
+  /* VOIDmode is passed when computing the second argument to a `call'
+     pattern. This can be ignored. */
+  if (mode == VOIDmode)
+    return 0;
+
+  /* Compute the alignment and size of the parameter. */
+  type_align_in_units =
+    picochip_function_arg_boundary (mode, type) / BITS_PER_UNIT;
+  type_size_in_units = picochip_compute_arg_size (type, mode);
+
+  /* Compute the correct offset (i.e., ensure that the offset meets
+     the alignment requirements). */
+  offset_overflow = cum % type_align_in_units;
+  if (offset_overflow == 0)
+    new_offset = cum;
+  else
+    new_offset = (cum - offset_overflow) + type_align_in_units;
+
+  if (TARGET_DEBUG)
+    {
+      printf ("Partial function arg nregs:\n");
+      printf ("  Type valid: %s\n", (type ? "yes" : "no"));
+      printf ("  Cumulative Value: %d\n", cum);
+      printf ("  Mode: %s\n", GET_MODE_NAME (mode));
+      printf ("  Type size: %i units\n", type_size_in_units);
+      printf ("  Alignment: %i units\n", type_align_in_units);
+      printf ("  New offset: %i\n", new_offset);
+      printf ("\n");
+    }
+
+  /* If the new offset is outside the register space, return. */
+  if (new_offset >= (MAX_CALL_PARAMETER_REGS * 2))
+    return 0;
+
+  /* If the end of the argument is outside the register space, then
+     the argument must overlap the register space. Return the number
+     of bytes which are passed in registers.  */
+  if ((new_offset + type_size_in_units) > (MAX_CALL_PARAMETER_REGS * 2))
+    return ((MAX_CALL_PARAMETER_REGS * 2) - new_offset);
+
+  return 0;
+
+}
+
+/* Advance the cumulative args counter CUM. */
+void
+picochip_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int type_align_in_units = 0;
+  int type_size_in_units;
+  int new_offset = 0;
+  int offset_overflow = 0;
+
+  /* VOIDmode is passed when computing the second argument to a `call'
+     pattern. This can be ignored. */
+  if (mode == VOIDmode)
+    return;
+
+  /* Compute the alignment and size of the parameter. */
+  type_align_in_units =
+    picochip_function_arg_boundary (mode, type) / BITS_PER_UNIT;
+  type_size_in_units = picochip_compute_arg_size (type, mode);
+
+  /* Compute the correct offset (i.e., ensure that the offset meets
+     the alignment requirements). */
+  offset_overflow = *cum % type_align_in_units;
+  if (offset_overflow == 0)
+    new_offset = *cum;
+  else
+    new_offset = (*cum - offset_overflow) + type_align_in_units;
+
+  /* Advance past the last argument. */
+  new_offset += type_size_in_units;
+
+  *cum = new_offset;
+}
+
+/* Determine whether a register needs saving/restoring. It does if it
+   is live in a function, and isn't a call-used register. */
+static int
+picochip_reg_needs_saving (int reg_num)
+{
+  return df_regs_ever_live_p(reg_num) && !call_used_regs[reg_num];
+}
+
+/* Compute and return offset of the main frame. */
+static int
+picochip_frame_byte_offset (void)
+{
+  gcc_assert(picochip_is_aligned
+      (crtl->outgoing_args_size, BITS_PER_WORD));
+
+  return crtl->outgoing_args_size;
+}
+
+/* Return the size of the main frame. */
+static int
+picochip_frame_size_in_bytes (void)
+{
+  int frame_size = get_frame_size();
+  int stack_align = STACK_BOUNDARY/BITS_PER_UNIT;
+  if (!picochip_is_aligned (frame_size, STACK_BOUNDARY))
+    frame_size = frame_size + (stack_align - frame_size%stack_align);
+  gcc_assert(picochip_is_aligned (frame_size, STACK_BOUNDARY));
+  return frame_size;
+}
+
+/* Compute and return the size (in bytes) of the register save/restore
+   area for the current function. This only includes the general
+   purpose registers - the special purpose stack pointer and link
+   registers are not included in this area. */
+static int
+picochip_save_area_size_in_bytes (void)
+{
+  int num_regs_to_save = 0;
+  int i = 0;
+
+  /* Read through all the registers, determining which need to be saved. */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	num_regs_to_save += 1;
+    }
+
+  return num_regs_to_save * UNITS_PER_WORD;
+
+}
+
+/* Compute and return offset of the save area base. */
+static int
+picochip_save_area_byte_offset (void)
+{
+  int base_offset = (picochip_frame_byte_offset () +
+		     picochip_frame_size_in_bytes ());
+
+  gcc_assert(picochip_is_aligned (base_offset, BITS_PER_WORD));
+
+  return base_offset;
+
+}
+
+/* Compute and return offset of the special register save area. This
+   area can be found immediately above the normal save area. It must
+   be aligned, to allow the registers to be saved and restored as a
+   pair. */
+static int
+picochip_special_save_area_byte_offset (void)
+{
+  int byte_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+  int offset = (picochip_save_area_byte_offset () +
+		picochip_save_area_size_in_bytes ());
+
+  if ((offset % byte_alignment) != 0)
+    offset = ((offset / byte_alignment) + 1) * byte_alignment;
+
+  return offset;
+
+}
+
+/* Determine whether the LNK/SP register save/restores can be eliminated. */
+static int
+picochip_can_eliminate_link_sp_save (void)
+{
+  /* This deserves some reasoning. The df_regs_ever_live_p call keeps
+    changing during optimizations phases. So, this function returns different
+    values when called from initial_elimination_offset and then again when it
+    is called from prologue/epilogue generation. This means that argument
+    accesses become wrong. This wouldnt happen only if we were not using the
+    stack at all. The following conditions ensures that.*/
+
+  return (current_function_is_leaf &&
+          !df_regs_ever_live_p(LINK_REGNUM) &&
+          !df_regs_ever_live_p(STACK_POINTER_REGNUM) &&
+          (picochip_special_save_area_byte_offset() == 0) &&
+          (crtl->args.size == 0) &&
+          (crtl->args.pretend_args_size == 0));
+}
+
+/* Compute the size of the special reg save area (SP and LNK). If the
+   SP/LNK registers don't need to be saved, this area can shrink to
+   nothing. */
+static int
+picochip_special_save_area_size_in_bytes (void)
+{
+
+
+  if (picochip_can_eliminate_link_sp_save ())
+    return 0;
+  else
+    return 2 * UNITS_PER_WORD;
+}
+
+/* Return the number of pretend arguments. If this function is
+   varadic, all the incoming arguments are effectively passed on the
+   stack. If this function has real pretend arguments (caused by a
+   value being passed partially on the stack and partially in
+   registers), then return the number of registers used. */
+static int
+picochip_pretend_arg_area_size (void)
+{
+
+  if (crtl->args.pretend_args_size != 0)
+    {
+      gcc_assert(crtl->args.pretend_args_size % 4 == 0);
+
+      return crtl->args.pretend_args_size;
+    }
+  else if (cfun->stdarg)
+    return 12;
+  else
+    return 0;
+
+}
+
+/* Compute and return the offset of the pretend arguments. The pretend
+   arguments are contiguous with the incoming arguments, and must be
+   correctly aligned. */
+static int
+picochip_pretend_arg_area_byte_offset (void)
+{
+  int base_offset = 0;
+
+  base_offset = (picochip_special_save_area_byte_offset () +
+		 picochip_special_save_area_size_in_bytes ());
+
+  gcc_assert(picochip_is_aligned (base_offset, STACK_BOUNDARY));
+  gcc_assert(picochip_is_aligned
+      (base_offset + picochip_pretend_arg_area_size (), STACK_BOUNDARY));
+
+  return base_offset;
+
+}
+
+/* Compute and return the offset of the incoming arguments. If a
+   static chain is in use, this will be passed just before the other
+   arguments.  This means that the pretend argument mechanism, used in
+   variadic functions, doesn't work properly. Thus, static chains work
+   on their own, as do variadic functions, but not the combination of
+   the two. This isn't really a problem. */
+static int
+picochip_arg_area_byte_offset (void)
+{
+  int base_offset = (picochip_pretend_arg_area_byte_offset () +
+		     picochip_pretend_arg_area_size ());
+
+  /* Add an extra 4 bytes - only an extra 16-bits are required, but
+     the alignment on a 32-bit boundary must be maintained. */
+  if (cfun->static_chain_decl != NULL)
+    {
+      gcc_assert (!cfun->stdarg);
+      base_offset += 4;
+    }
+
+  gcc_assert(picochip_is_aligned (base_offset, STACK_BOUNDARY));
+
+  return base_offset;
+
+}
+
+int
+picochip_regno_nregs (int regno ATTRIBUTE_UNUSED, int mode)
+{
+
+  /* Special case - only one register needed. */
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return 1;
+
+  /* We actually do not allocate acc0 ever. But, it seems like we need to
+  make it look like a allocatable register for the dataflow checks to work
+  properly. Note that hard_regno_mode_ok will always return 0 for acc0*/
+
+  if (regno == 16)
+    return 1;
+
+  /* General case - compute how much space in terms of units. */
+  return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+
+}
+
+int
+picochip_class_max_nregs (int reg_class, int mode)
+{
+  int size = ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+
+  if (reg_class == ACC_REGS)
+    return 1;
+
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return 1;
+  else
+    return size;
+
+}
+
+/* Eliminate a register that addresses the stack (e.g., frame pointer,
+   argument pointer) by replacing it with a constant offset from the
+   main stack register. */
+int
+initial_elimination_offset (int from, int to)
+{
+  int offset_from_sp = 0;
+
+  if (FRAME_POINTER_REGNUM == from && STACK_POINTER_REGNUM == to)
+    offset_from_sp = picochip_frame_byte_offset ();
+  else if (ARG_POINTER_REGNUM == from && STACK_POINTER_REGNUM == to)
+    offset_from_sp = picochip_pretend_arg_area_byte_offset ();
+  else
+    gcc_unreachable();
+
+  return offset_from_sp;
+
+}
+
+/* Compute and return the size of the incoming argument area. */
+static int
+picochip_arg_area_size_in_bytes (void)
+{
+  return crtl->args.size;
+}
+
+/* Determine whether the given register is valid. When the strict mode
+   is used, only hard registers are valid, otherwise any register is
+   valid. */
+static int
+picochip_legitimate_address_register (rtx x, unsigned strict)
+{
+
+  /* Sanity check - non-registers shouldn't make it here, but... */
+  if (REG != GET_CODE (x))
+    return 0;
+
+  if (strict)
+    return REGNO (x) < FIRST_NONHARD_REGISTER;
+  else
+    return 1;
+
+}
+
+/* Determine whether the given constant is in the range required for
+   the given base register. */
+static int
+picochip_const_ok_for_base (enum machine_mode mode, int regno, int offset)
+{
+  HOST_WIDE_INT corrected_offset;
+
+  if (GET_MODE_SIZE (mode) != 0)
+    {
+      if (GET_MODE_SIZE(mode) <= 4)
+      {
+         /* We used to allow incorrect offsets if strict is 0. But, this would
+            then rely on reload doing the right thing. We have had problems
+            there before, and on > 4.3 compiler, there are no benefits. */
+         if (offset % GET_MODE_SIZE (mode) != 0)
+           return 0;
+         corrected_offset = offset / GET_MODE_SIZE (mode);
+      }
+      else
+      {
+         if (offset % 4 != 0)
+           return 0;
+         corrected_offset = offset / 4;
+      }
+    }
+  else
+    {
+      /* Default to the byte offset as supplied. */
+      corrected_offset = offset;
+    }
+
+  /* The offset from the base register can be different depending upon
+     the base register.  The stack/frame/argument pointer offsets can
+     all be greater than a simple register-based offset.  Note that the
+     frame/argument pointer registers are actually eliminations of the
+     stack pointer, so a value which is valid for an offset to, for
+     example, the frame pointer, might be invalid for the stack
+     pointer once the elimination has occurred.  However, there is no
+     need to handle this special case here, as the stack offset is
+     always checked after elimination anyway, and the generated code
+     seems to have identical performance. */
+  if (regno == STACK_POINTER_REGNUM ||
+      regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
+    return picochip_const_ok_for_letter_p (corrected_offset, 'K');
+  else
+    return picochip_const_ok_for_letter_p (corrected_offset, 'J');
+
+}
+
+/* Determine whether a given rtx is a legitimate address for machine_mode
+   MODE.  STRICT is non-zero if we're being strict - any pseudo that
+   is not a hard register must be a memory reference.  */
+bool
+picochip_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  int valid = 0;
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      valid = picochip_legitimate_address_register (x, strict);
+      break;
+
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx offset = XEXP (x, 1);
+        if (strict && !REGNO_OK_FOR_BASE_P (REGNO(base)))
+        {
+          valid = 0;
+          break;
+        }
+
+	valid = (REG == GET_CODE (base) &&
+		 picochip_legitimate_address_register (base, strict) &&
+		 CONST_INT == GET_CODE (offset) &&
+		 picochip_const_ok_for_base (mode, REGNO (base),
+					     INTVAL (offset)));
+	break;
+      }
+
+    case SYMBOL_REF:
+      /* The user can select whether a symbol can be used as a memory
+         address. Typically, this will decrease execution time (no
+         register load is required first), but will increase code size
+         (because the symbol will be used several times, rather than
+         loaded once into a register.*/
+      valid = TARGET_SYMBOL_AS_ADDRESS;
+      break;
+
+    case CONST:
+      {
+	/* A constant memory address must be a (plus (symbol_ref)
+	   (const_int)), and is only allowed when the symbols are
+	   permitted addresses. */
+	rtx inner = XEXP (x, 0);
+
+	valid = (TARGET_SYMBOL_AS_ADDRESS &&
+		 PLUS == GET_CODE (inner) &&
+		 SYMBOL_REF == GET_CODE (XEXP (inner, 0)) &&
+		 CONST_INT == GET_CODE (XEXP (inner, 1)));
+
+	break;
+
+      }
+
+    default:
+      valid = 0;
+    }
+
+  return valid;
+
+}
+
+/* For all memory operations, picochip allows a uconst4 offset value. It
+   is hence beneficial to turn an
+   addr = <reg + long_const>
+   ld/st addr
+
+   into
+
+   X = reg + long_const & FFF0
+   diff = long_const - (long_const & FFF0)
+   ld/st <X + diff>
+
+   X can be reused in subsequent memory operations.
+   */
+rtx
+picochip_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+                             enum machine_mode mode)
+{
+  unsigned mask_val;
+
+  if (!optimize)
+    return x;
+
+  /* Depending on mode, the offsets allowed are either 16/32/64.*/
+  switch (mode)
+    {
+      case QImode:
+        mask_val = 0xFFF0;
+        break;
+      case HImode:
+        mask_val = 0xFFE0;
+        break;
+      case SImode:
+        mask_val = 0xFFC0;
+        break;
+      default:
+        return x;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      int high_val, low_val, offset;
+      offset = INTVAL (XEXP (x, 1));
+      /* Ignore cases with negative offsets.  */
+      if (offset < 0)
+        return x;
+      high_val = offset & mask_val;
+      low_val = offset - high_val;
+      if (high_val != 0)
+        {
+          rtx temp_reg = force_reg (Pmode, gen_rtx_PLUS (Pmode, XEXP (x, 0), GEN_INT(high_val)));
+          x = gen_rtx_PLUS (Pmode, temp_reg, GEN_INT(low_val));
+          return x;
+        }
+    }
+  return x;
+}
+
+/* For all memory operations, picochip allows a uconst4 offset value. It
+   is hence beneficial to turn an
+   addr = <reg + long_const>
+   ld/st addr
+
+   into
+
+   X = reg + long_const & FFF0
+   diff = long_const - (long_const & FFF0)
+   ld/st <X + diff>
+
+   X can be reused in subsequent memory operations.
+   */
+int
+picochip_legitimize_reload_address (rtx *x,
+                                    enum machine_mode mode,
+                                    int opnum, int type,
+                                    int ind_levels ATTRIBUTE_UNUSED)
+{
+  unsigned mask_val;
+
+  if (picochip_symbol_offset(*x))
+    {
+      *x = gen_rtx_CONST(mode, *x);
+      return 0;
+    }
+  if (!optimize)
+    return 0;
+
+  /* We should recognise addresses that we created.*/
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (*x, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (*x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT)
+    {
+      push_reload (XEXP (*x, 0), NULL_RTX, &XEXP (*x, 0), NULL,
+                   BASE_REG_CLASS, GET_MODE (*x), VOIDmode, 0, 0,
+                   opnum, (enum reload_type)type);
+      return 1;
+    }
+
+  /* Depending on mode, the offsets allowed are either 16/32/64.  */
+  switch (mode)
+    {
+      case QImode:
+        mask_val = 0xFFF0;
+        break;
+      case HImode:
+        mask_val = 0xFFE0;
+        break;
+      case SImode:
+        mask_val = 0xFFC0;
+        break;
+      default:
+        return 0;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 0)) == REG
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT)
+    {
+      int high_val, low_val, offset;
+      offset = INTVAL (XEXP (*x, 1));
+      /* Ignore cases with negative offsets.  */
+      if (offset < 0)
+        return 0;
+      high_val = offset & mask_val;
+      low_val = offset - high_val;
+      if (high_val != 0)
+        {
+          rtx temp_reg = gen_rtx_PLUS (Pmode, XEXP (*x, 0), GEN_INT(high_val));
+          *x = gen_rtx_PLUS (Pmode, temp_reg, GEN_INT(low_val));
+          push_reload (XEXP (*x, 0), NULL_RTX, &XEXP (*x, 0), NULL,
+                       BASE_REG_CLASS, GET_MODE (*x), VOIDmode, 0, 0,
+                       opnum, (enum reload_type)type);
+          return 1;
+        }
+    }
+
+  return 0;
+}
+
+/* Detect an rtx which matches (plus (symbol_ref) (const_int)). */
+int
+picochip_symbol_offset (rtx operand)
+{
+
+  return (PLUS == GET_CODE (operand) &&
+	  SYMBOL_REF == GET_CODE (XEXP (operand, 0)) &&
+	  CONST_INT == GET_CODE (XEXP (operand, 1)));
+
+}
+
+/* Assembly output. */
+
+/* The format here should match the format used in the output of
+   symbol_ref's elsewhere in this file. */
+void
+picochip_output_label (FILE * stream, const char name[])
+{
+  int is_cfi_label = (strncmp (name, "picoMark_LCFI", 13) == 0);
+
+  /* If VLIW scheduling is in use, any Call Frame Information labels
+     generated inside a packet must have their output deferred until
+     the end of the packet. */
+  if (picochip_schedule_type == DFA_TYPE_SPEED &&
+      is_cfi_label && picochip_vliw_continuation)
+    {
+      if (picochip_current_vliw_state.num_cfi_labels_deferred == 2)
+      {
+        internal_error ("LCFI labels have already been deferred");
+      }
+      strcpy (picochip_current_vliw_state.cfi_label_name[
+                picochip_current_vliw_state.num_cfi_labels_deferred], name);
+      picochip_current_vliw_state.num_cfi_labels_deferred++;
+    }
+  else
+    {
+      assemble_name (stream, name);
+
+      if (strncmp (name, "picoMark_", 9) == 0)
+	fprintf (stream, "=\n");
+      else
+	fprintf (stream, ":\n");
+
+    }
+
+}
+
+/* The format here should match the format used in the output of
+   symbol_ref's elsewhere in this file. */
+void
+picochip_output_labelref (FILE * stream, const char name[])
+{
+  fprintf (stream, "_%s", name);
+}
+
+void
+picochip_weaken_label (FILE * stream, const char name[])
+{
+  fprintf (stream, ".weak ");
+  assemble_name (stream, name);
+  fprintf (stream, "\n");
+}
+
+/* Return true if the given label (or label prefix) denotes a marker
+   label which should be emitted in the form LABEL= */
+static int
+picochip_is_marker_prefix (const char *prefix)
+{
+  return (strcmp (prefix, "L") != 0 && strcmp (prefix, "LC") != 0
+          && strcmp (prefix, "LP") != 0);
+}
+
+void
+picochip_output_internal_label (FILE * stream, const char *prefix,
+				unsigned long num)
+{
+
+  /* Emit different types of label, based upon their prefix. They
+     are handled differently to allow the assembler to ensure that
+     branch target labels are properly aligned, while other labels
+     will only serve as code markers, not branch targets. Aligning
+     labels unnecessarily can result in much code wastage. */
+  if (picochip_is_marker_prefix (prefix))
+    {
+      /* Special label marker. If it appears in the middle of a VLIW
+         packet, defer it until the end of the packet. There has
+         never been a need to handle more than one lm label at a time. */
+      if (picochip_schedule_type == DFA_TYPE_SPEED &&
+	  (strcmp (prefix, "LM")) == 0 && picochip_vliw_continuation)
+	{
+	  if (strlen (picochip_current_vliw_state.lm_label_name) != 0)
+	    internal_error ("LM label has already been deferred");
+
+	  sprintf (picochip_current_vliw_state.lm_label_name,
+		   "picoMark_%s%ld", prefix, num);
+	}
+      else if (picochip_schedule_type == DFA_TYPE_SPEED &&
+	  (strcmp (prefix, "LCFI")) == 0 && picochip_vliw_continuation)
+	{
+          if (picochip_current_vliw_state.num_cfi_labels_deferred == 2)
+          {
+            internal_error ("LCFI labels have already been deferred.");
+          }
+          sprintf(picochip_current_vliw_state.cfi_label_name[
+                    picochip_current_vliw_state.num_cfi_labels_deferred], 
+                  "picoMark_%s%ld", prefix, num);
+          picochip_current_vliw_state.num_cfi_labels_deferred++;
+	}
+      else
+	{
+	  /* Marker label. */
+	  fprintf (stream, "_picoMark_%s%ld=\n", prefix, num);
+	}
+
+    }
+  else
+    {
+      /* Normal label. */
+      fprintf (stream, "_%s%ld:\n", prefix, num);
+    }
+
+}
+
+void
+picochip_generate_internal_label (char *str, const char *prefix, long num)
+{
+  /* Two types of internal label can be generated: branch target
+     labels and code marker labels. Branch target labels must always
+     be aligned (since code will execute at these
+     points). Differentiate between the two by prepending markers with
+     a unique prefix, which can later be used in output_label to
+     figure out which label syntax to use. */
+  if (picochip_is_marker_prefix (prefix))
+    sprintf (str, "picoMark_%s%ld", prefix, num);
+  else
+    sprintf (str, "%s%ld", prefix, num);
+
+}
+
+void
+picochip_asm_output_anchor (rtx symbol)
+{
+  fprintf (asm_out_file, ".offsetData _%s, ",XSTR (symbol, 0));
+  fprintf (asm_out_file, "+ " HOST_WIDE_INT_PRINT_DEC"\n",SYMBOL_REF_BLOCK_OFFSET(symbol));
+}
+
+void
+picochip_output_aligned_common (FILE * stream, const char *name,
+				unsigned size, unsigned alignment)
+{
+
+  fprintf (stream, ".commonData ");
+  assemble_name (stream, name);
+  fprintf (stream, ", %u, %u\n", size, alignment / 8);
+  picochip_output_global (stream, name);
+
+}
+
+void
+picochip_output_aligned_local (FILE * stream, const char *name,
+			       unsigned size, unsigned alignment)
+{
+
+  fprintf (stream, ".commonData ");
+  assemble_name (stream, name);
+  fprintf (stream, ", %u, %u\n", size, alignment / 8);
+
+}
+
+void
+picochip_output_global (FILE * stream, const char *name)
+{
+  fprintf (stream, ".global ");
+  assemble_name (stream, name);
+  fprintf (stream, "\n");
+}
+
+/* Output an assembly language string. Output as a sequence of decimal
+   numbers, followed by the literal string to make it obvious what the
+   numbers represent. */
+void
+picochip_output_ascii (FILE * file, const char *str, int length)
+{
+  int i = 0;
+
+  fprintf (file, ".ascii ");
+
+  for (i = 0; i < length; ++i)
+    {
+      fprintf (file, "16#%x# ", (char) (str[i]));
+    }
+
+  fprintf (file, "  ; ");
+
+  for (i = 0; i < length; ++i)
+    {
+      char c = str[i];
+
+      switch (c)
+	{
+	case '\n':
+	  fprintf (file, "\\n");
+	  break;
+	case '\t':
+	  fprintf (file, "\\t");
+	  break;
+	case '\0':
+	  fprintf (file, "\\0");
+	  break;
+	default:
+	  fprintf (file, "%c", c);
+	}
+
+    }
+
+  fprintf (file, "\n");
+
+}
+
+/* Output the beginning of an ASM file. */
+void
+picochip_asm_file_start (void)
+{
+  default_file_start ();
+
+  fprintf (asm_out_file, "// picoChip ASM file\n");
+  fprintf (asm_out_file, "//.file \"%s\"\n", main_input_filename);
+
+  fprintf (asm_out_file, "// Has byte access: %s\n",
+	   (TARGET_HAS_BYTE_ACCESS ? "Yes" : "No"));
+
+  if (TARGET_HAS_MUL_UNIT)
+    fprintf (asm_out_file, "// Has multiply: Yes (Multiply unit)\n");
+  else if (TARGET_HAS_MAC_UNIT)
+    fprintf (asm_out_file, "// Has multiply: Yes (Mac unit)\n");
+  else
+    fprintf (asm_out_file, "// Has multiply: No\n");
+}
+
+/* Output the end of an ASM file. */
+void
+picochip_asm_file_end (void)
+{
+  /* Include a segment end to make it easy for PERL scripts to grab
+     segments. This is now done by assembler*/
+
+  fprintf (asm_out_file, "// End of picoChip ASM file\n");
+
+}
+
+/* Output frame debug information to the given stream. */
+static void
+picochip_output_frame_debug (FILE * file)
+{
+  int i = 0;
+
+  if (current_function_is_leaf)
+    fprintf (file, "\t\t// Leaf function\n");
+  else
+    fprintf (file, "\t\t// Non-leaf function\n");
+
+  if (picochip_can_eliminate_link_sp_save ())
+    fprintf (file, "\t\t// Link/fp save/restore can be eliminated\n");
+
+  if (cfun->static_chain_decl != NULL)
+    fprintf (file, "\t\t// Static chain in use\n");
+
+  fprintf (file, "\t\t// Incoming argument size: %d bytes\n",
+	   picochip_arg_area_size_in_bytes ());
+  fprintf (file, "\t\t// Incoming arg offset: %d\n",
+	   picochip_arg_area_byte_offset ());
+  fprintf (file, "\t\t// Pretend arg size: %d\n",
+	   picochip_pretend_arg_area_size ());
+  fprintf (file, "\t\t// Pretend arg offset (ARGP): %d\n",
+	   picochip_pretend_arg_area_byte_offset ());
+  fprintf (file, "\t\t// Special reg area size: %d bytes\n",
+	   picochip_special_save_area_size_in_bytes ());
+  fprintf (file, "\t\t// Special reg area offset: %d\n",
+	   picochip_special_save_area_byte_offset ());
+
+  /* Output which registers are saved. */
+  fprintf (file, "\t\t// Saved regs: ");
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	fprintf (file, "%s ", picochip_regnames[i]);
+    }
+  fprintf (file, "\t\t\n");
+
+  fprintf (file, "\t\t// Save area size: %d bytes\n",
+	   picochip_save_area_size_in_bytes ());
+  fprintf (file, "\t\t// Save area offset: %d\n",
+	   picochip_save_area_byte_offset ());
+
+  fprintf (file, "\t\t// Frame size: %ld bytes\n", get_frame_size ());
+  fprintf (file, "\t\t// Frame offset (FP): %d\n",
+	   picochip_frame_byte_offset ());
+
+  fprintf (file, "\t\t// Outgoing argument area size: %d bytes\n",
+	   crtl->outgoing_args_size);
+
+}
+
+/* Output picoChip function prologue. This contains human-readable
+   information about the function. */
+void
+picochip_function_prologue (FILE * file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Get the function's name, as described by its RTL.  This may be
+     different from the DECL_NAME name used in the source file.  The
+     real declaration name must be used, to ensure that the prologue
+     emits the right information for the linker. */
+  rtx x;
+  const char *fnname;
+  x = DECL_RTL (current_function_decl);
+  gcc_assert (MEM_P (x));
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  fnname = XSTR (x, 0);
+
+  /* Note that the name of the function is given in the &_%s
+     form. This matches the name of the function as used in labels,
+     and function calls, and enables processCallGraph to match
+     function calls to the name of the function, as defined here. */
+  fprintf (file, "// picoChip Function Prologue : &_%s = %d bytes\n",
+	   fnname, picochip_arg_area_byte_offset ());
+
+  picochip_output_frame_debug (file);
+  fprintf (file, "\n");
+
+}
+
+/* Output picoChip function epilogue. */
+void
+picochip_function_epilogue (FILE * file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+
+  rtx x;
+  const char *fnname;
+  x = DECL_RTL (current_function_decl);
+  gcc_assert (MEM_P (x));
+  x = XEXP (x, 0);
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  fnname = XSTR (x, 0);
+  fprintf (file, "\n// picoChip Function Epilogue : %s\n\n",
+	   fnname);
+}
+
+/* Manipulate the asm output. Some machines only execute the code when
+   there is actually a chance of needing it (e.g., FRV doesn't execute
+   it if the scheduling pass wasn't used). We always execute it,
+   simple to ensure that it is exercised more often, and bugs are more
+   likely to be found.
+
+   This function's prime reason for existence is to insert the VLIW
+   separators where appropriate. The separators must be inserted
+   before any comments which appear at the end of the file.
+
+*/
+const char *
+picochip_asm_output_opcode (FILE * f, const char *ptr)
+{
+  int c;
+
+  /* Flag to specify when a VLIW continuation has been inserted onto
+     the line. Continuations are either inserted before any comments,
+     or before the end of the line is reached. The flag ensures that
+     we don't insert continuations twice (i.e., at the comment and the
+     end of line). */
+  int continuation_inserted = 0;
+
+  /* If the instruction uses multiple lines (i.e., a new line
+     character appears in the opcode), then ensure that no attempt is
+     made to pack it into a VLIW. */
+  if (strchr (ptr, '\n') != NULL && picochip_vliw_continuation)
+    internal_error
+      ("picochip_asm_output_opcode - Found multiple lines in VLIW packet %s",
+       ptr);
+
+
+  /* If a delay slot is pending, output the directive to the assembler
+     before the instruction. */
+  if (picochip_is_delay_slot_pending)
+    {
+      picochip_is_delay_slot_pending = 0;
+      fputs ("=->\t", f);
+    }
+
+  /* Keep going for entire opcode. All substitution performed ourselves. */
+  while (*ptr)
+    {
+      c = *ptr++;
+
+      /* Determine whether a VLIW continuation must be inserted before
+         any comments, or the end of the opcode. A flag is set to show
+         that we have inserted a continuation on this line, so that we
+         don't try to insert another continuation when the end of the
+         opcode is reached. The only other case for a continuation
+         might have been a newline, but these aren't allowed in
+         conjunction with VLIW continuations (see above code). */
+      if (picochip_vliw_continuation &&
+	  !continuation_inserted &&
+	  ((c == '/' && (*ptr == '/')) || *ptr == '\0'))
+	{
+	  fprintf (f, "\\ ");
+	  continuation_inserted = 1;
+	}
+
+      /* Detect an explicit VLIW separator. */
+      if (c == '%' && (*ptr == '|'))
+	{
+	  fprintf (f, "\\");
+	  ptr++;
+	}
+      /* Detect the need for an ALU id operand. */
+      else if (c == '%' && (*ptr == '#'))
+	{
+	  fputc (picochip_get_vliw_alu_id (), f);
+
+	  if (TARGET_DEBUG)
+	    printf ("Generated ALU char at %s for insn %d\n", ptr,
+		    INSN_UID (picochip_current_prescan_insn));
+
+	  /* Skip past unwanted # */
+	  ptr++;
+	}
+      /* Detect the need for branch delay slot. */
+      else if (c == '%' && (*ptr == '>'))
+	{
+	  /* Only emit delay slots (NOP's, or otherwise) when delay
+	   * slot scheduling has actually been enabled, otherwise VLIW
+	   * scheduling and delay slot scheduling output combine to
+	   * produce nasty effects. */
+	  if (flag_delayed_branch)
+	    {
+	      if (dbr_sequence_length () == 0)
+		fputs ("\n=->\tNOP", f);
+	      else
+		picochip_is_delay_slot_pending = 1;
+	    }
+
+	  /* Skip past unwanted > */
+	  ptr++;
+	}
+      /* Detect any %digit specifiers. */
+      else if (c == '%' && (*ptr >= '0' && *ptr <= '9'))
+	{
+	  c = atoi (ptr);
+	  picochip_print_operand (f, recog_data.operand[c], 0);
+	  while ((c = *ptr) >= '0' && c <= '9')
+	    ptr++;
+	}
+      /* Detect any %letterdigit specifiers. */
+      else if (c == '%' && ((*ptr >= 'a' && *ptr <= 'z')
+			    || (*ptr >= 'A' && *ptr <= 'Z')))
+	{
+	  int letter = *ptr++;
+
+	  c = atoi (ptr);
+
+	  switch (letter)
+	    {
+	    case 'l':
+	      output_asm_label (recog_data.operand[c]);
+	      break;
+
+	    case 'a':
+	      output_address (recog_data.operand[c]);
+	      break;
+
+	    default:
+	      picochip_print_operand (f, recog_data.operand[c], letter);
+	    }
+
+	  while ((c = *ptr) >= '0' && c <= '9')
+	    ptr++;
+	}
+      else if (c == '%')
+	internal_error
+	  ("picochip_asm_output_opcode - can%'t output unknown operator %c",
+	   *ptr);
+      else
+	fputc (c, f);
+    }
+
+  /* Reached the end of the packet. If any labels were deferred
+     during output, emit them now. */
+  if (!picochip_vliw_continuation)
+    {
+      if (picochip_current_vliw_state.num_cfi_labels_deferred != 0)
+	{
+	  fprintf (f, "\n");
+	  assemble_name (f, picochip_current_vliw_state.cfi_label_name[0]);
+	  fprintf (f, "=");
+          if (picochip_current_vliw_state.num_cfi_labels_deferred == 2)
+          {
+	    fprintf (f, "\n");
+	    assemble_name (f, picochip_current_vliw_state.cfi_label_name[1]);
+	    fprintf (f, "=");
+          }
+	}
+
+      if (strlen (picochip_current_vliw_state.lm_label_name) != 0)
+	{
+	  fprintf (f, "\n");
+	  assemble_name (f, picochip_current_vliw_state.lm_label_name);
+	  fprintf (f, "=");
+	}
+    }
+
+  /* Output an end-of-packet marker if requested. */
+  if (!picochip_vliw_continuation &&
+      TARGET_DEBUG && picochip_schedule_type == DFA_TYPE_SPEED)
+    fprintf (f, "\n\t//-------------- End of VLIW packet -----------------");
+
+  return ptr;
+}
+
+/* Function RTL expansion. */
+
+/* Expand the prologue into RTL. */
+void
+picochip_expand_prologue (void)
+{
+  int stack_adjustment = 0;
+  int special_save_offset = 0;
+  int general_save_offset = 0;
+  int reg_save_offset = 0;
+  int i = 0;
+
+  stack_adjustment = picochip_arg_area_byte_offset ();
+  general_save_offset =
+    -(stack_adjustment - picochip_save_area_byte_offset ());
+  special_save_offset =
+    -(stack_adjustment - picochip_special_save_area_byte_offset ());
+
+  /* Save the link registers. We could try to save just one register
+     here. This would reduce the amount of stack space required.
+     There hasnt been a good reason to do that so far. */
+  if (!picochip_can_eliminate_link_sp_save ())
+    picochip_emit_save_register (gen_rtx_REG (SImode, LINK_REGNUM),
+				 special_save_offset);
+
+  /* Save callee-save registers. */
+  reg_save_offset = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	{
+
+	  /* If this register is an even numbered register, and the
+	     next register also needs to be saved, use a SImode save,
+	     which does both in one instruction. Note that a special
+	     check is performed to ensure that the double word aligned
+	     store is valid (e.g., it is possible that r6, r8, r9 need
+	     to be saved, in which case once r6 has been saved, the
+	     stored offset is no longer aligned, and an STL/LDL
+	     instruction becomes invalid). Alternately, we could store all
+	     aligned registers first and then save the single one(s). */
+	  if ((i % 2 == 0) &&
+	      picochip_reg_needs_saving (i + 1) &&
+	      picochip_is_aligned (reg_save_offset, LONG_TYPE_SIZE))
+	    {
+	      picochip_emit_save_register (gen_rtx_REG (SImode, i),
+					   general_save_offset +
+					   reg_save_offset);
+	      reg_save_offset += 2 * UNITS_PER_WORD;
+	      i++;
+	    }
+	  else
+	    {
+	      picochip_emit_save_register (gen_rtx_REG (HImode, i),
+					   general_save_offset +
+					   reg_save_offset);
+	      reg_save_offset += UNITS_PER_WORD;
+	    }
+	}
+
+    }
+
+  /* Emit a stack adjustment where required. */
+  if (stack_adjustment != 0)
+    picochip_emit_stack_allocate (stack_adjustment);
+
+  /* If this function uses varadic arguments, write any unnamed
+     registers to the stack. */
+  if (cfun->stdarg)
+    {
+      int stdarg_offset = picochip_pretend_arg_area_byte_offset ();
+
+      /* Sanity check. The pretend argument offset should be 32-bit aligned. */
+      gcc_assert(picochip_pretend_arg_area_byte_offset () % 4 == 0);
+
+      picochip_emit_save_register (gen_rtx_REG (SImode, 0), stdarg_offset);
+      picochip_emit_save_register (gen_rtx_REG (SImode, 2),
+				   stdarg_offset + 4);
+      picochip_emit_save_register (gen_rtx_REG (SImode, 4),
+				   stdarg_offset + 8);
+
+    }
+
+}
+
+/* Expand the epilogue into RTL. */
+void
+picochip_expand_epilogue (int is_sibling_call ATTRIBUTE_UNUSED)
+{
+  int stack_adjustment = 0;
+  int special_save_offset = 0;
+  int general_save_offset = 0;
+  int reg_save_offset = 0;
+  int i = 0;
+  int use_link_fp_restore_stack_adjust = 0;	/* Default to using an explicit
+						   stack restore. */
+
+  stack_adjustment = picochip_arg_area_byte_offset ();
+  general_save_offset =
+    -(stack_adjustment - picochip_save_area_byte_offset ());
+  special_save_offset =
+    -(stack_adjustment - picochip_special_save_area_byte_offset ());
+
+  /* Emit a stack adjustment where required. */
+  if (stack_adjustment != 0)
+    {
+      /* If the link/fp is already being restored, and the offset to
+         their save location is small enough, don't bother adjusting
+         the stack explicitly. */
+      if (picochip_special_save_area_byte_offset () < 512 &&
+	  !picochip_can_eliminate_link_sp_save ())
+	use_link_fp_restore_stack_adjust = 1;
+      else
+	/* Explicitly restore the stack. */
+	picochip_emit_stack_allocate (-stack_adjustment);
+    }
+
+  /* Restore the Link/FP registers. Only save the link register? */
+  if (!picochip_can_eliminate_link_sp_save ())
+    {
+      if (use_link_fp_restore_stack_adjust)
+	picochip_emit_restore_register (gen_rtx_REG (SImode, LINK_REGNUM),
+					picochip_special_save_area_byte_offset
+					());
+      else
+	picochip_emit_restore_register (gen_rtx_REG (SImode, LINK_REGNUM),
+					special_save_offset);
+    }
+
+  /* Restore callee-save registers. */
+  reg_save_offset = 0;
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (picochip_reg_needs_saving (i))
+	{
+
+	  /* If this register is an even numbered register, and the
+	     next register also needs to be saved, use a SImode save,
+	     which does both in one instruction. Note that a special
+	     check is performed to ensure that the double word aligned
+	     store is valid (e.g., it is possible that r6, r8, r9 need
+	     to be saved, in which case once r6 has been saved, the
+	     stored offset is no longer aligned, and an STL/LDL
+	     instruction becomes invalid). We could store all aligned
+	     registers first, and then save the single one(s). */
+	  if ((i % 2 == 0) &&
+	      picochip_reg_needs_saving (i + 1) &&
+	      picochip_is_aligned (reg_save_offset, LONG_TYPE_SIZE))
+	    {
+	      picochip_emit_restore_register (gen_rtx_REG (SImode, i),
+					      general_save_offset +
+					      reg_save_offset);
+	      reg_save_offset += 2 * UNITS_PER_WORD;
+	      i++;
+	    }
+	  else
+	    {
+	      picochip_emit_restore_register (gen_rtx_REG (HImode, i),
+					      general_save_offset +
+					      reg_save_offset);
+	      reg_save_offset += UNITS_PER_WORD;
+	    }
+	}
+
+    }
+
+  /* Emit a return instruction, which matches a (parallel
+     [(return) (use r12)]) */
+  {
+    rtvec p;
+    p = rtvec_alloc (2);
+
+    RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode);
+    RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode,
+				    gen_rtx_REG (Pmode, LINK_REGNUM));
+    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+  }
+
+}
+
+/* Assembly instruction output. */
+
+/* Test whether the given branch instruction is short, or long. Short
+ * branches are equivalent to real branches, and may be DFA
+ * scheduled. Long branches expand to a macro which is handled by the
+ * elaborator, and cannot be scheduled. Occasionally, the branch
+ * shortening pass, which is run after DFA scheduling, will change the
+ * code layout and cause the short branch to be reverted into a long
+ * branch. Instead of having to fix this up by emitting new assembly,
+ * the short branch is emitted anyway. There is plenty of slack in the
+ * calculation of long and short branches (10-bit offset, but only
+ * 9-bits used in computation), so there is enough slack for this to
+ * be safe. */
+static int
+picochip_is_short_branch (rtx insn)
+{
+  int isRealShortBranch = (get_attr_length(insn) == SHORT_BRANCH_LENGTH);
+
+  return (isRealShortBranch ||
+	  picochip_current_vliw_state.num_insns_in_packet > 1);
+}
+
+/* Output a compare-and-branch instruction (matching the cbranch
+   pattern). */
+const char *
+picochip_output_cbranch (rtx operands[])
+{
+
+  if (HImode != GET_MODE (operands[1]) ||
+      (HImode != GET_MODE (operands[2]) &&
+       GET_CODE (operands[2]) != CONST_INT))
+    {
+      internal_error ("%s: at least one operand can%'t be handled",
+		      __FUNCTION__);
+    }
+
+  /* Use the type of comparison to output the appropriate condition
+     test. */
+  switch (GET_CODE (operands[0]))
+    {
+    case NE:
+      return ("// if (%1 != %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPNE %l3");
+
+    case EQ:
+      return ("// if (%1 == %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPEQ %l3");
+
+    case LE:
+      /* Reverse the operand order to be GE */
+      return ("// if (%1 <= %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPGE %l3");
+
+    case LEU:
+      /* Reverse operand order of GEU. */
+      return ("// if (%1 <= %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPHS %l3");
+
+    case GE:
+      return ("// if (%1 >= %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPGE %l3");
+
+    case GEU:
+      return ("// if (%1 >= %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPHS %l3");
+
+    case LT:
+      return ("// if (%1 < %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPLT %l3");
+
+    case LTU:
+      return ("// if (%1 <{U} %2) goto %l3\n\tSUB.%# %1,%2,r15\n\tJMPLO %l3");
+
+    case GT:
+      /* Reversed operand version of LT. */
+      return ("// if (%1 > %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPLT %l3");
+
+    case GTU:
+      /* Reverse an LTU. */
+      return ("// if (%1 >{U} %2) goto %l3\n\tSUB.%# %2,%1,r15\n\tJMPLO %l3");
+
+    default:
+      gcc_unreachable();
+    }
+}
+
+/* Output a compare-and-branch instruction (matching the cbranch
+   pattern). This function is current unused since the cbranch
+   split is disabled. The function is kept around so we can use
+   it when we understand how to do cbranch split safely. */
+const char *
+picochip_output_compare (rtx operands[])
+{
+  int code;
+
+  if (HImode != GET_MODE (operands[1]) ||
+      (HImode != GET_MODE (operands[2]) &&
+       GET_CODE (operands[2]) != CONST_INT))
+    {
+      internal_error ("%s: at least one operand can%'t be handled",
+		      __FUNCTION__);
+    }
+
+  code = GET_CODE (operands[0]);
+  /* Use the type of comparison to output the appropriate condition
+     test. */
+  switch (code)
+    {
+    case NE:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case EQ:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case LE:
+      /* Reverse the operand order to be GE */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    case LEU:
+      /* Reverse operand order of GEU. */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    case GE:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case GEU:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case LT:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case LTU:
+      return ("SUB.%# %1,%2,r15\t// CC := (%0)");
+
+    case GT:
+      /* Reversed operand version of LT. */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    case GTU:
+      /* Reverse an LTU. */
+      return ("SUB.%# %2,%1,r15\t// CC := (%0)");
+
+    default:
+      gcc_unreachable();
+    }
+}
+
+/* Output the branch insn part of a compare-and-branch split. */
+const char *
+picochip_output_branch (rtx operands[], rtx insn)
+{
+
+  int code = GET_CODE(operands[2]);
+  if (picochip_is_short_branch (insn))
+    {
+      /* Short branches can be output directly using the
+         appropriate instruction. */
+      switch (code)
+	{
+	case NE:
+	  return ("BNE %l0 %>");
+	case EQ:
+	  return ("BEQ %l0 %>");
+	case LE:
+	  return ("BGE %l0 %>");
+	case LEU:
+	  return ("BHS %l0 %>");
+	case GE:
+	  return ("BGE %l0 %>");
+	case GEU:
+	  return ("BHS %l0 %>");
+	case LT:
+	  return ("BLT %l0 %>");
+	case LTU:
+	  return ("BLO %l0 %>");
+	case GT:
+	  return ("BLT %l0 %>");
+	case GTU:
+	  return ("BLO %l0 %>");
+	default:
+	  internal_error ("unknown short branch in %s (type %d)",
+			  __FUNCTION__, (int) INTVAL (operands[1]));
+	  return "UNKNOWN_BRANCH";
+	}
+    }
+  else
+    {
+      /* Long branches result in the emission of a special
+         instruction, which the assembler expands into a suitable long
+         branch. */
+
+      /* Use the type of comparison to output the appropriate condition
+         test. */
+      switch (code)
+	{
+	case NE:
+	  return ("JMPNE %l0 %>");
+	case EQ:
+	  return ("JMPEQ %l0 %>");
+	case LE:
+	  return ("JMPGE %l0 %>");
+	case LEU:
+	  return ("JMPHS %l0 %>");
+	case GE:
+	  return ("JMPGE %l0 %>");
+	case GEU:
+	  return ("JMPHS %l0 %>");
+	case LT:
+	  return ("JMPLT %l0 %>");
+	case LTU:
+	  return ("JMPLO %l0 %>");
+	case GT:
+	  return ("JMPLT %l0 %>");
+	case GTU:
+	  return ("JMPLO %l0 %>");
+
+	default:
+	  internal_error ("unknown long branch in %s (type %d)",
+			  __FUNCTION__, (int) INTVAL (operands[1]));
+	  return "UNKNOWN_BRANCH";
+	}
+
+    }
+}
+
+/* Output a jump instruction. */
+const char *
+picochip_output_jump (rtx insn)
+{
+  if (picochip_is_short_branch (insn))
+    return "BRA %l0%>";
+  else
+    return "JMPRA %l0%>";
+}
+
+const char *
+picochip_output_put_array (int alternative, rtx operands[])
+{
+  /* Local output buffer. */
+  char buf[256];
+
+  int portArraySize = INTVAL(operands[1]);
+  int portBaseIndex = INTVAL(operands[2]);
+
+  if (alternative == 0)
+    {
+      sprintf (buf, "// Array put\n\tadd.0 [lsl %%0,2],&__commTable_put_%d_%d,lr\n\tjl (lr)",
+	       portArraySize, portBaseIndex);
+      output_asm_insn (buf, operands);
+    }
+  else if (alternative == 1)
+    {
+      /* Constant port id. Emit a real instruction. */
+      int portIndex = INTVAL(operands[0]) + portBaseIndex;
+      if (portIndex < portBaseIndex ||
+	  portIndex >= (portBaseIndex + portArraySize))
+	{
+	  error ("PUT uses port array index %d, which is out of range [%d..%d)",
+		 portIndex, portBaseIndex, portBaseIndex + portArraySize);
+	}
+      sprintf(buf, "PUT R[0:1],%d", portIndex);
+      output_asm_insn (buf, operands);
+    }
+  else
+    gcc_unreachable();
+
+  /* Both alternatives output the insn directly. */
+  return "";
+}
+
+const char *picochip_output_get_array (int alternative, rtx operands[])
+{
+  /* Local output buffer. */
+  char buf[256];
+
+  int portArraySize = INTVAL(operands[1]);
+  int portBaseIndex = INTVAL(operands[2]);
+
+  if (alternative == 0)
+    {
+      sprintf (buf, "// Array get\n\tadd.0 [lsl %%0,2],&__commTable_get_%d_%d,lr\n\tjl (lr)",
+	       portArraySize, portBaseIndex);
+      output_asm_insn (buf, operands);
+    }
+  else if (alternative == 1)
+    {
+      /* Constant port id. Emit a real instruction. */
+      int portIndex = INTVAL(operands[0]) + portBaseIndex;
+      if (portIndex < portBaseIndex ||
+	  portIndex >= (portBaseIndex + portArraySize))
+	{
+	  error ("GET uses port array index %d, which is out of range [%d..%d)",
+		 portIndex, portBaseIndex, portBaseIndex + portArraySize);
+	}
+      sprintf(buf, "GET %d,R[0:1]", portIndex);
+      output_asm_insn (buf, operands);
+    }
+  else
+    gcc_unreachable();
+
+  /* Both alternatives output the insn directly. */
+  return "";
+}
+
+const char *picochip_output_testport_array (int alternative, rtx operands[])
+{
+  /* Local output buffer. */
+  char buf[256];
+
+  int portArraySize = INTVAL(operands[2]);
+  int portBaseIndex = INTVAL(operands[3]);
+
+  if (alternative == 0)
+    {
+      sprintf (buf, "// Array tstport\n\tadd.0 [lsl %%1,2],&__commTable_tstport_%d_%d,lr\n\tjl (lr)\n=->\tcopy.0 0,%%0\n\tcopyeq 1,%%0",
+	       portArraySize, portBaseIndex);
+      output_asm_insn (buf, operands);
+    }
+  else if (alternative == 1)
+    {
+      /* Constant port id. Emit a real instruction. */
+      int portIndex = INTVAL(operands[1]) + portBaseIndex;
+      if (portIndex < portBaseIndex ||
+	  portIndex >= (portBaseIndex + portArraySize))
+	{
+	  error ("PUT uses port array index %d, which is out of range [%d..%d)",
+		 portIndex, portBaseIndex, portBaseIndex + portArraySize);
+	}
+      sprintf(buf, "copy.1 0,%%0 %%| TSTPORT %d\n\tcopyeq 1,%%0", portIndex);
+      output_asm_insn (buf, operands);
+    }
+  else
+    gcc_unreachable();
+
+  /* Both alternatives output the insn directly. */
+  return "";
+}
+
+/* Output a comparison operand as a symbol (e.g., >). */
+static void
+picochip_print_comparison (FILE * file, rtx operand, int letter)
+{
+
+  if (letter == 'i')
+    {
+      /* Output just the comparison symbol. */
+      switch (GET_CODE (operand))
+	{
+	case NE:
+	  fprintf (file, "!=");
+	  break;
+	case EQ:
+	  fprintf (file, "==");
+	  break;
+	case GE:
+	  fprintf (file, ">=");
+	  break;
+	case GEU:
+	  fprintf (file, ">={U}");
+	  break;
+	case LT:
+	  fprintf (file, "<");
+	  break;
+	case LTU:
+	  fprintf (file, "<{U}");
+	  break;
+	case LE:
+	  fprintf (file, "<=");
+	  break;
+	case LEU:
+	  fprintf (file, "<={U}");
+	  break;
+	case GT:
+	  fprintf (file, ">");
+	  break;
+	case GTU:
+	  fprintf (file, ">{U}");
+	  break;
+	default:
+	  gcc_unreachable();
+	}
+    }
+  else
+    {
+      /* Output the comparison formatted as operand,symbol,operand */
+      rtx op0 = XEXP (operand, 0);
+      rtx op1 = XEXP (operand, 1);
+
+      picochip_print_operand (file, op0, 0);
+      picochip_print_comparison (file, operand, 'i');
+      picochip_print_operand (file, op1, 0);
+    }
+}
+
+/* This function generates a memory address operand in the given
+   mode.  That is, if the address contains a constant offset, then the
+   offset is divided by the required mode size to compute the
+   mode specific offset.  By default, picochip_print_operand_address calls
+   this function using the natural mode of the operand, but special
+   operand codes can be used to invoke the computation using an
+   unnatural mode (e.g., compute the HI aligned address of an SI mode
+   address). */
+static void
+picochip_print_memory_address (FILE * file, rtx operand,
+			       enum machine_mode mode)
+{
+  rtx address = XEXP (operand, 0);
+
+  /* Sanity check. */
+  if (MEM != GET_CODE (operand))
+    fatal_insn ("picochip_print_memory_address - Operand isn't memory based",
+		operand);
+
+  if (TARGET_DEBUG)
+    {
+      printf ("picochip_print_memory_address: ");
+      print_rtl (stdout, operand);
+      printf ("\n");
+    }
+
+  switch (GET_CODE (address))
+    {
+    case PLUS:
+      {
+	/* Grab the address components. */
+	rtx base = XEXP (address, 0);
+	rtx offset = XEXP (address, 1);
+
+	/* Only handle reg+const addresses */
+	if (REG == GET_CODE (base) && CONST_INT == GET_CODE (offset))
+	  {
+	    /* Sanity check.  If an FP+offset address is given, ensure
+	       that the offset lies within the given frame, or a lower
+	       frame. */
+	    if (REGNO (base) == STACK_POINTER_REGNUM )
+              gcc_assert (INTVAL (offset) <= (picochip_arg_area_byte_offset () +
+                          crtl->args.size));
+
+	    /* Print the base register - identical for all modes. */
+	    fprintf (file, "(");
+	    picochip_print_operand (file, base, 'r');
+	    fprintf (file, ")");
+
+	    /* Print the constant offset with compensation for the mode. */
+	    switch (mode)
+	      {
+	      case QImode:
+		picochip_print_operand (file, offset, 'Q');
+		break;
+
+	      case HImode:
+		picochip_print_operand (file, offset, 'H');
+		break;
+
+	      case SImode:
+	      case SFmode:
+		picochip_print_operand (file, offset, 'S');
+		break;
+
+	      case DImode:
+		picochip_print_operand (file, offset, 'D');
+		break;
+
+	      default:
+	        gcc_unreachable();
+	      }
+
+	  }
+
+      }
+
+      break;
+
+    case SYMBOL_REF:
+      picochip_print_operand (file, address, 's');
+      break;
+
+    case CONST:
+      {
+	rtx inner;
+	rtx base;
+	rtx offset;
+
+	inner = XEXP (address, 0);
+
+	/* Sanity check - the CONST memory address must be a base+offset. */
+	gcc_assert (PLUS == GET_CODE (inner));
+
+	base = XEXP (inner, 0);
+	offset = XEXP (inner, 1);
+
+	fprintf (file, "&_%s%+d", XSTR (base, 0), XINT (offset, 0));
+
+	break;
+      }
+
+    case REG:
+      /* Register operand. Provide a zero offset. */
+      fprintf (file, "(");
+      picochip_print_operand (file, address, 'r');
+      fprintf (file, ")0");
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+}
+
+/* Output an operand.  Formatting letters allow particular parts of
+   the operand to be output. */
+void
+picochip_print_operand (FILE * file, rtx operand, int letter)
+{
+
+  /* Handle special cases. */
+  switch (letter)
+    {
+      /* VLIW continuation, for explicit VLIW sequences. */
+    case '|':
+      fprintf (file, "\\");
+      return;
+
+      /* ALU selector.  */
+    case '#':
+      fputc (picochip_get_vliw_alu_id (), file);
+      return;
+
+      /* Delay slot specifier. */
+    case '>':
+      /* This should be handled in asm_output_opcode. */
+      gcc_unreachable();
+
+      /* Instruction mnemonics (e.g., lshift becomes LSL). */
+    case 'I':
+      switch (GET_CODE (operand))
+	{
+	case AND:
+	  fprintf (file, "AND");
+	  break;
+	case IOR:
+	  fprintf (file, "OR");
+	  break;
+	case XOR:
+	  fprintf (file, "XOR");
+	  break;
+	case PLUS:
+	  fprintf (file, "ADD");
+	  break;
+	case MINUS:
+	  fprintf (file, "SUB");
+	  break;
+	default:
+	  gcc_unreachable();
+	}
+      return;
+
+      /* Symbolic instructions (e.g., lshift becomes <<). */
+    case 'i':
+      switch (GET_CODE (operand))
+	{
+	case AND:
+	  fprintf (file, "&");
+	  break;
+	case IOR:
+	  fprintf (file, "|");
+	  break;
+	case XOR:
+	  fprintf (file, "^");
+	  break;
+	case PLUS:
+	  fprintf (file, "+");
+	  break;
+	case MINUS:
+	  fprintf (file, "-");
+	  break;
+	default:
+	  fprintf (file, "UNKNOWN_INSN");
+	  break;
+	}
+      return;
+
+    default:			/* Not a punctuation character - process as normal. */
+      break;
+    }
+
+  switch (GET_CODE (operand))
+    {
+    case REG:
+      switch (letter)
+	{
+	case 'R':
+	  /* Write a range of registers. */
+	  fprintf (file, "R[%d:%d]", REGNO (operand) + 1, REGNO (operand));
+	  break;
+
+	case 'U':
+	  /* The upper register of a pair is requested. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand) + 1]);
+	  break;
+
+	case 'L':
+	  /* The lower register of a pair is requested. Equivalent to the
+	     default, but included for completeness. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand)]);
+	  break;
+
+	case 'X':
+	  /* The 3rd register of a DI mode register. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand) + 2]);
+	  break;
+
+	case 'Y':
+	  /* The 4th register of a DI mode register. */
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand) + 3]);
+	  break;
+
+	default:
+	  fprintf (file, "%s", picochip_regnames[REGNO (operand)]);
+	}
+      break;
+
+    case CONST_INT:
+      /* A range of letters can be used to format integers.  The
+         letters Q/H/S are used to divide the constant by the width of
+         QI/HI/SI mode integers in bytes.  The U/L modifiers are used
+         to obtain the upper and lower 16-bits of a 32-bit
+         constant.  Where possible, signed numbers are used, since
+         signed representations of numbers may be more compact (e.g.,
+         65535 can be represented as -1, which fits into a small
+         constant, whereas 65535 requires a large constant). */
+      switch (letter)
+	{
+	case 'Q':
+	  fprintf (file, "%ld", INTVAL (operand));
+	  break;
+
+	case 'H':
+	  fprintf (file, "%ld", INTVAL (operand) / 2);
+	  break;
+
+	case 'S':
+	  fprintf (file, "%ld", INTVAL (operand) / 4);
+	  break;
+
+	case 'P':
+	  fprintf (file, "%d", exact_log2 (INTVAL(operand)));
+	  break;
+
+	case 'U':
+	  fprintf (file, "%hi", (short) ((INTVAL (operand) >> 16) & 0xFFFF));
+	  break;
+
+	case 'L':
+	  fprintf (file, "%hi", (short) (INTVAL (operand) & 0xFFFF));
+	  break;
+
+	default:
+	  fprintf (file, "%ld", INTVAL (operand));
+	  break;
+	}
+      break;
+
+    case CONST_DOUBLE:
+      {
+	long val;
+	REAL_VALUE_TYPE rv;
+
+	if (GET_MODE (operand) != SFmode)
+	  fatal_insn ("Unknown mode in print_operand (CONST_DOUBLE) :",
+		      operand);
+	REAL_VALUE_FROM_CONST_DOUBLE (rv, operand);
+	REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+
+	switch (letter)
+	  {
+	  case 'U':
+	    fprintf (file, "%hi", (short) ((val >> 16) & 0xFFFF));
+	    break;
+
+	  case 'L':
+	    fprintf (file, "%hi", (short) (val & 0xFFFF));
+	    break;
+	  }
+
+	break;
+
+      }
+
+      /* Output a symbol.  The output format must match that of
+         picochip_output_label. */
+    case SYMBOL_REF:
+      /* Ensure that the symbol is marked as referenced.  Gcc can
+         occasionally omit the function bodies when it believes them
+         to be unreferenced. */
+      if (SYMBOL_REF_DECL (operand))
+	mark_decl_referenced (SYMBOL_REF_DECL (operand));
+      fprintf (file, "&");
+      assemble_name (file, XSTR (operand, 0));
+      break;
+
+    case LABEL_REF:
+      /* This format must match that of picochip_output_label. */
+      fprintf (file, "&");
+      output_asm_label (operand);
+      break;
+
+    case MEM:
+      {
+	rtx addr = XEXP (operand, 0);
+
+	switch (letter)
+	  {
+	  case 'o':
+	    if (PLUS != GET_CODE (addr))
+	      fatal_insn ("Bad address, not (reg+disp):", addr);
+	    else
+	      picochip_print_operand (file, XEXP (addr, 1), 0);
+	    break;
+
+	  case 'M':
+	    /* Output a memory address in byte mode notation (i.e., the
+	       constant address (if any) is the actual byte address. */
+	    picochip_print_memory_address (file, operand, QImode);
+	    break;
+
+	    /* Output a constant offset of the given mode (i.e., divide
+	       the constant by the number of units in the mode to get the
+	       constant). */
+	  case 'Q':
+	    picochip_print_memory_address (file, operand, QImode);
+	    break;
+
+	  case 'H':
+	    picochip_print_memory_address (file, operand, HImode);
+	    break;
+
+	  case 'S':
+	    picochip_print_memory_address (file, operand, SImode);
+	    break;
+
+	  case 'F':
+	    picochip_print_memory_address (file, operand, SFmode);
+	    break;
+
+	  case 'b':
+	    if (PLUS != GET_CODE (addr))
+	      fatal_insn ("Bad address, not (reg+disp):", addr);
+	    else
+	      picochip_print_operand (file, XEXP (addr, 0), 0);
+	    break;
+
+          /* When the mem operand is (reg + big offset) which cannot
+            be represented in an instruction as operand, the compiler
+            automatically generates the instruction to put in (reg +
+            big offset) into another register. In such cases, it
+            returns '0' as the character. This needs to be handled
+            as well. */
+	  case 0:
+	  case 'r':
+	    if (REG != GET_CODE (addr))
+	      fatal_insn ("Bad address, not register:", addr);
+	    else
+	      picochip_print_operand (file, addr, 0);
+	    break;
+
+	  default:
+	    fprintf (file, "Unknown mem operand - letter %c ",
+		     (char) (letter));
+	    print_rtl (file, operand);
+	  }
+
+	break;
+      }
+
+    case CONST:
+      {
+	rtx const_exp = XEXP (operand, 0);
+
+	/* Handle constant offsets to symbol references. */
+	if (PLUS == GET_CODE (const_exp) &&
+	    SYMBOL_REF == GET_CODE (XEXP (const_exp, 0)) &&
+	    CONST_INT == GET_CODE (XEXP (const_exp, 1)))
+	  {
+
+	    picochip_print_operand (file, XEXP (const_exp, 0), 0);
+	    if (INTVAL (XEXP (const_exp, 1)) >= 0)
+	      fprintf (file, "+");
+	    /* else use the - from the operand (i.e., AP-2)) */
+
+	    picochip_print_operand (file, XEXP (const_exp, 1), letter);
+
+	  }
+      }
+      break;
+
+
+    case PLUS:
+      {
+	/* PLUS expressions are of the form (base + offset). Different
+	   options (analagous to those of memory PLUS expressions) are used
+	   to extract the base and offset components. */
+
+	switch (letter)
+	  {
+	  case 'b':
+	    picochip_print_operand (file, XEXP (operand, 0), 0);
+	    break;
+
+	  case 'o':
+	    picochip_print_operand (file, XEXP (operand, 1), 0);
+	    break;
+
+	  default:
+
+	    /* If the expression is composed entirely of constants,
+	       evaluate the result.  This should only occur with the
+	       picoChip specific comms instructions, which are emitted as
+	       base+offset expressions. */
+	    if (CONST_INT == GET_CODE (XEXP (operand, 0)) &&
+		CONST_INT == GET_CODE (XEXP (operand, 1)))
+	      {
+		HOST_WIDE_INT result = (XINT (XEXP (operand, 0), 0) +
+					XINT (XEXP (operand, 1), 0));
+		fprintf (file, "%ld", result);
+	      }
+	    else
+	      {
+		fprintf (file, "(");
+		picochip_print_operand (file, XEXP (operand, 0), 0);
+		fprintf (file, "+");
+		picochip_print_operand (file, XEXP (operand, 1), 0);
+		fprintf (file, ")");
+	      }
+	  }
+
+	break;
+      }
+
+      /* Comparison operations. */
+    case NE:
+    case EQ:
+    case GE:
+    case GEU:
+    case LT:
+    case LTU:
+    case LE:
+    case LEU:
+    case GT:
+    case GTU:
+      picochip_print_comparison (file, operand, letter);
+      return;
+
+    default:
+      fprintf (stderr, "Unknown operand encountered in %s\n", __FUNCTION__);
+      print_rtl (file, operand);
+      break;
+
+    }
+
+}
+
+/* Output an operand address */
+void
+picochip_print_operand_address (FILE * file, rtx operand)
+{
+
+  switch (GET_CODE (operand))
+    {
+
+    case SYMBOL_REF:
+      /* This format must match that of picochip_output_label. */
+      assemble_name (file, XSTR (operand, 0));
+      break;
+
+    case CODE_LABEL:
+      /* Note  this format must match that of picochip_output_label. */
+      fprintf (file, "_L%d", XINT (operand, 5));
+      break;
+
+    case MEM:
+      /* Pass on to a specialised memory address generator. */
+      picochip_print_memory_address (file, operand, GET_MODE (operand));
+      break;
+
+    default:
+      gcc_unreachable();
+
+    }
+
+}
+
+
+/* Scheduling functions. */
+
+/* Save some of the contents of recog_data. */
+static void
+picochip_save_recog_data (void)
+{
+  picochip_saved_which_alternative = which_alternative;
+  memcpy (&picochip_saved_recog_data, &recog_data,
+	  sizeof (struct recog_data));
+}
+
+/* Restore some of the contents of global variable recog_data. */
+static void
+picochip_restore_recog_data (void)
+{
+  which_alternative = picochip_saved_which_alternative;
+  memcpy (&recog_data, &picochip_saved_recog_data,
+	  sizeof (struct recog_data));
+}
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+   three-instruction bundle.  */
+static void
+reorder_var_tracking_notes (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB (bb)
+    {
+      rtx insn, next, last_insn = NULL_RTX;
+      rtx queue = NULL_RTX;
+
+      /* Iterate through the bb and find the last non-debug insn */
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN(BB_END (bb)); insn = NEXT_INSN(insn))
+        {
+          if (NONDEBUG_INSN_P(insn))
+            last_insn = insn;
+        }
+
+      /* In all normal cases, queue up notes and emit them just before a TImode
+         instruction. For the last instruction, emit the queued notes just after
+         the last instruction. */
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN(BB_END (bb)); insn = next)
+        {
+          next = NEXT_INSN (insn);
+
+          if (insn == last_insn)
+            {
+              while (queue)
+                {
+                  rtx next_queue = PREV_INSN (queue);
+                  PREV_INSN (NEXT_INSN(insn)) = queue;
+                  NEXT_INSN(queue) = NEXT_INSN(insn);
+                  PREV_INSN(queue) = insn;
+                  NEXT_INSN(insn) = queue;
+                  queue = next_queue;
+                }
+              /* There is no more to do for this bb. break*/
+              break;
+            }
+          else if (NONDEBUG_INSN_P (insn))
+            {
+              /* Emit queued up notes before the first instruction of a bundle.  */
+              if (GET_MODE (insn) == TImode)
+                {
+                  while (queue)
+                    {
+                      rtx next_queue = PREV_INSN (queue);
+                      NEXT_INSN (PREV_INSN(insn)) = queue;
+                      PREV_INSN (queue) = PREV_INSN(insn);
+                      PREV_INSN (insn) = queue;
+                      NEXT_INSN (queue) = insn;
+                      queue = next_queue;
+                    }
+                }
+            }
+          else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+            {
+               rtx prev = PREV_INSN (insn);
+               PREV_INSN (next) = prev;
+               NEXT_INSN (prev) = next;
+               PREV_INSN (insn) = queue;
+               queue = insn;
+            }
+        }
+        /* Make sure we are not dropping debug instructions.*/
+        gcc_assert (queue == NULL_RTX);
+    }
+}
+
+/* Perform machine dependent operations on the rtl chain INSNS. */
+void
+picochip_reorg (void)
+{
+  rtx insn, insn1, vliw_start = NULL_RTX;
+  int vliw_insn_location = 0;
+
+  /* We are freeing block_for_insn in the toplev to keep compatibility
+     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+  compute_bb_for_insn ();
+
+  if (optimize == 0)
+    split_all_insns ();
+
+  if (picochip_schedule_type != DFA_TYPE_NONE)
+    {
+      timevar_push (TV_SCHED2);
+
+      /* Process the instruction list, computing the sizes of each
+         instruction, and consequently branch distances.  This can
+         result in some branches becoming short enough to be treated
+         as a real branch instruction, rather than an assembly branch
+         macro which may expand into multiple instructions.  The
+         benefit of shortening branches is that real branch
+         instructions can be properly DFA scheduled, whereas macro
+         branches cannot. */
+      shorten_branches (get_insns ());
+
+      /* Do control and data sched analysis again,
+         and write some more of the results to dump file. */
+
+      split_all_insns ();
+
+      schedule_ebbs ();
+
+      timevar_pop (TV_SCHED2);
+
+      ggc_collect ();
+
+      if (picochip_schedule_type == DFA_TYPE_SPEED)
+	{
+	  /* Whenever a VLIW packet is generated, all instructions in
+	     that packet must appear to come from the same source
+	     location.  The following code finds all the VLIW packets,
+	     and tags their instructions with the location of the first
+	     instruction from the packet.  Clearly this will result in
+	     strange behaviour when debugging the code, but since
+	     debugging and optimisation are being used in conjunction,
+	     strange behaviour is certain to occur anyway. */
+          /* Slight bit of change. If the vliw set contains a branch
+             or call instruction, we pick its location.*/
+	  for (insn = get_insns (); insn; insn = next_real_insn (insn))
+	    {
+
+	      /* If this is the first instruction in the VLIW packet,
+	         extract its location. */
+              if (GET_MODE (insn) == TImode)
+              {
+                vliw_start = insn;
+                vliw_insn_location = INSN_LOCATOR (insn);
+              }
+              if (JUMP_P (insn) || CALL_P(insn))
+              {
+                vliw_insn_location = INSN_LOCATOR (insn);
+                for (insn1 = vliw_start; insn1 != insn ; insn1 = next_real_insn (insn1))
+                  INSN_LOCATOR (insn1) = vliw_insn_location;
+              }
+              /* Tag subsequent instructions with the same location. */
+              INSN_LOCATOR (insn) = vliw_insn_location;
+	    }
+	}
+
+    }
+
+  /* Locate the note marking the end of the function's prologue.  If
+     the note appears in the middle of a VLIW packet, move the note to
+     the end.  This avoids unpleasant consequences such as trying to
+     emit prologue markers (e.g., .loc/.file directives) in the middle
+     of VLIW packets. */
+  if (picochip_schedule_type == DFA_TYPE_SPEED)
+    {
+      rtx prologue_end_note = NULL;
+      rtx last_insn_in_packet = NULL;
+
+      for (insn = get_insns (); insn; insn = next_insn (insn))
+	{
+	  /* The prologue end must be moved to the end of the VLIW packet. */
+	  if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+	    {
+	      prologue_end_note = insn;
+	      break;
+	    }
+	}
+
+      /* Find the last instruction in this packet. */
+      for (insn = prologue_end_note; insn; insn = next_real_insn (insn))
+	{
+	  if (GET_MODE (insn) == TImode)
+	    break;
+	  else
+	    last_insn_in_packet = insn;
+	}
+
+      if (last_insn_in_packet != NULL)
+	{
+          rtx tmp_note
+	    = emit_note_after ((enum insn_note) NOTE_KIND (prologue_end_note),
+			       last_insn_in_packet);
+          memcpy(&NOTE_DATA (tmp_note), &NOTE_DATA(prologue_end_note), sizeof(NOTE_DATA(prologue_end_note)));
+	  delete_insn (prologue_end_note);
+	}
+    }
+
+  if (flag_var_tracking)
+    {
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      /* We also have to deal with variable tracking notes in the
+	 middle of VLIW packets. */
+      reorder_var_tracking_notes();
+      timevar_pop (TV_VAR_TRACKING);
+    }
+}
+
+/* Return the ALU character identifier for the current
+   instruction.  This will be 0 or 1. */
+static char
+picochip_get_vliw_alu_id (void)
+{
+  int attr_type = 0;
+
+  /* Always use ALU 0 if VLIW scheduling is disabled. */
+  if (picochip_schedule_type != DFA_TYPE_SPEED)
+    return '0';
+
+  /* Get the attribute type of the instruction.  Note that this can
+     ruin the contents of recog_data, so save/restore around the
+     call. */
+  picochip_save_recog_data ();
+  attr_type = get_attr_type (picochip_current_prescan_insn);
+  picochip_restore_recog_data ();
+
+  if (picochip_current_vliw_state.contains_pico_alu_insn)
+    {
+
+      /* If this a picoAlu insn? If it is, then stuff it into ALU 0,
+         else it must be the other ALU (either basic or nonCc)
+         instruction which goes into 1. */
+      if (attr_type == TYPE_PICOALU)
+	return '0';
+      else
+	return '1';
+
+    }
+  else if (picochip_current_vliw_state.contains_non_cc_alu_insn)
+    {
+      /* Is this the non CC instruction? If it is, then stuff it into
+         ALU 1, else it must be a picoAlu or basicAlu, in which case
+         it goes into ALU 0. */
+      if (attr_type == TYPE_NONCCALU)
+	return '1';
+      else
+	return '0';
+    }
+  else
+    {
+      /* No picoAlu/nonCc instructions in use, so purely dependent upon
+         whether an ALU instruction has already been scheduled in this
+         cycle. */
+      switch (picochip_current_vliw_state.num_alu_insns_so_far)
+	{
+	case 0:
+	  picochip_current_vliw_state.num_alu_insns_so_far++;
+	  return '0';
+
+	case 1:
+	  picochip_current_vliw_state.num_alu_insns_so_far++;
+	  return '1';
+
+	default:
+	  internal_error ("too many ALU instructions emitted (%d)",
+			  picochip_current_vliw_state.num_alu_insns_so_far);
+	  return 'X';
+	}
+    }
+
+}
+
+/* Reset any information about the current VLIW packing status. */
+static void
+picochip_reset_vliw (rtx insn)
+{
+  rtx local_insn = insn;
+
+  /* Nothing to do if VLIW scheduling isn't being used. */
+  if (picochip_schedule_type != DFA_TYPE_SPEED)
+    return;
+
+  if (TARGET_DEBUG)
+    printf ("%s on insn %d\n", __FUNCTION__, INSN_UID (insn));
+
+  /* Reset. */
+  picochip_current_vliw_state.contains_pico_alu_insn = 0;
+  picochip_current_vliw_state.contains_non_cc_alu_insn = 0;
+  picochip_current_vliw_state.num_alu_insns_so_far = 0;
+  picochip_current_vliw_state.num_cfi_labels_deferred = 0;
+  picochip_current_vliw_state.lm_label_name[0] = 0;
+  picochip_current_vliw_state.num_insns_in_packet = 0;
+
+  /* Read through the VLIW packet, classifying the instructions where
+     appropriate. */
+  local_insn = insn;
+  do
+    {
+      if (NOTE_P (local_insn) || DEBUG_INSN_P(local_insn))
+	{
+	  local_insn = NEXT_INSN (local_insn);
+	  continue;
+	}
+      else if (!INSN_P (local_insn))
+	break;
+      else
+	{
+	  /* It is an instruction, but is it ours? */
+	  if (INSN_CODE (local_insn) != -1)
+	    {
+	      int attr_type = 0;
+
+	      picochip_current_vliw_state.num_insns_in_packet += 1;
+
+	      /* Is it a picoAlu or nonCcAlu instruction? Note that the
+	         get_attr_type function can overwrite the values in
+	         the recog_data global, hence this is saved and
+	         restored around the call.  Not doing so results in
+	         asm_output_opcode being called with a different
+	         instruction to final_prescan_insn, which is fatal. */
+	      picochip_save_recog_data ();
+	      attr_type = get_attr_type (local_insn);
+	      picochip_restore_recog_data ();
+
+	      if (attr_type == TYPE_PICOALU)
+		picochip_current_vliw_state.contains_pico_alu_insn = 1;
+	      if (attr_type == TYPE_NONCCALU)
+		picochip_current_vliw_state.contains_non_cc_alu_insn = 1;
+
+	    }
+	}
+
+      /* Get the next instruction. */
+      local_insn = NEXT_INSN (local_insn);
+
+      /* Keep going while the next instruction is part of the same
+         VLIW packet (i.e., its a valid instruction and doesn't mark
+         the start of a new VLIW packet. */
+    }
+  while (local_insn &&
+	 (GET_MODE (local_insn) != TImode) && (INSN_CODE (local_insn) != -1));
+
+}
+
+int
+picochip_sched_reorder (FILE * file, int verbose,
+			rtx * ready ATTRIBUTE_UNUSED,
+			int *n_readyp ATTRIBUTE_UNUSED, int clock)
+{
+
+  if (verbose > 0)
+    fprintf (file, ";;\tClock %d\n", clock);
+
+  return picochip_sched_issue_rate ();
+
+}
+
+int
+picochip_sched_lookahead (void)
+{
+  /* It should always be enough to lookahead by 2 insns. Only slot0/1 could
+     have a conflict. */
+  return 2;
+}
+
+int
+picochip_sched_issue_rate (void)
+{
+  return 3;
+}
+
+/* Adjust the scheduling cost between the two given instructions,
+   which have the given dependency. */
+int
+picochip_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+
+  if (TARGET_DEBUG)
+    {
+      printf ("Sched Adjust Cost: %d->%d is %d\n",
+	      INSN_UID (insn), INSN_UID (dep_insn), cost);
+
+      printf ("  Dependency type:");
+      switch (REG_NOTE_KIND (link))
+	{
+	case 0:
+	  printf ("Data\n");
+	  break;
+	case REG_DEP_ANTI:
+	  printf ("ANTI\n");
+	  break;
+	case REG_DEP_OUTPUT:
+	  printf ("OUTPUT\n");
+	  break;
+	default:
+	  printf ("Unknown (%d)\n", REG_NOTE_KIND (link));
+	}
+    }
+
+  /* Anti-dependencies are used to enforce the ordering between a
+   * branch, and any subsequent instructions.  For example:
+   *
+   *   BNE someLabel
+   *   ADD.0 r0,r1,r2
+   *
+   * The ADD instruction must execute after the branch, and this is
+   * enforced using an anti-dependency.  Unfortunately, VLIW machines
+   * are happy to execute anti-dependent instructions in the same
+   * cycle, which then results in a schedule like the following being
+   * created:
+   *
+   *    BNE someLabel \ ADD.0 r0,r1,r2
+   *
+   * The instruction which would normally be conditionally executed
+   * depending upon the outcome of the branch, is now unconditionally
+   * executed every time.  To prevent this happening, any
+   * anti-dependencies between a branch and another instruction are
+   * promoted to become real dependencies.
+   */
+  if ((JUMP_P (dep_insn) || CALL_P(dep_insn)) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
+    {
+
+      if (TARGET_DEBUG)
+	printf ("Promoting anti-dependency %d->%d to a true-dependency\n",
+		INSN_UID (insn), INSN_UID (dep_insn));
+
+      return 1;
+    }
+
+  return cost;
+
+}
+
+/* Return the minimum of the two values */
+static int
+minimum (int a, int b)
+{
+  if (a < b)
+    return a;
+  if (b < a)
+    return b;
+  /* I dont expect to get to this function with a==b.*/
+  gcc_unreachable();
+}
+
+
+/* This function checks if the memory of the two stores are just off by 2 bytes.
+   It returns the lower memory operand's index.*/
+
+static int
+memory_just_off (rtx opnd1, rtx opnd2)
+{
+  int offset1 = 0, offset2 = 0;
+  int reg1, reg2;
+
+  if (GET_CODE(XEXP(opnd1, 0)) == PLUS && GET_CODE(XEXP(XEXP(opnd1, 0),1)) == CONST_INT)
+  {
+    offset1 = INTVAL(XEXP(XEXP(opnd1, 0), 1));
+    reg1 = REGNO(XEXP(XEXP(opnd1, 0), 0));
+  }
+  else
+  {
+    reg1 = REGNO(XEXP(opnd1, 0));
+  }
+  if (GET_CODE(XEXP(opnd2, 0)) == PLUS && GET_CODE(XEXP(XEXP(opnd2, 0), 1)) == CONST_INT)
+  {
+    offset2 = INTVAL(XEXP(XEXP(opnd2, 0), 1));
+    reg2 = REGNO(XEXP(XEXP(opnd2, 0), 0));
+  }
+  else
+  {
+    reg2 = REGNO(XEXP(opnd2, 0));
+  }
+
+  /* Peepholing 2 STW/LDWs has the restriction that the resulting STL/LDL's address
+     should be 4 byte aligned. We can currently guarentee that only if the base
+     address is FP(R13) and the offset is aligned. */
+
+  if (reg1 == reg2 && reg1 == 13 && abs(offset1-offset2) == 2 && minimum(offset1, offset2) % 4 == 0)
+    return (minimum(offset1, offset2) == offset1) ? 1:2;
+
+  return 0;
+}
+
+static int
+registers_just_off (rtx opnd1, rtx opnd2)
+{
+  int reg1, reg2;
+  reg1 = REGNO(opnd1);
+  reg2 = REGNO(opnd2);
+  if (abs(reg1-reg2) == 1 && minimum(reg1, reg2) % 2 == 0)
+    return (minimum(reg1, reg2) == reg1)?1:2;
+  return 0;
+}
+
+/* Check to see if the two LDWs can be peepholed together into a LDL
+   They can be if the registers getting loaded into are contiguous
+   and the memory addresses are contiguous as well.
+   for eg.
+           LDW r2,[r11]x
+           LDW r3,[r11]x+1
+   can be merged together into
+           LDL r[3:2],[r11]
+
+   NOTE:
+   1. The LDWs themselves only guarentee that r11 will be a 2-byte
+   aligned address. Only FP can be assumed to be 4 byte aligned.
+   2. The progression of addresses and the register numbers should
+   be similar. For eg., if you swap r2 and r3 in the above instructions,
+   the resultant pair cannot be merged.
+
+*/
+bool
+ok_to_peephole_ldw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3)
+{
+  int memtest=0,regtest=0;
+  regtest = registers_just_off(opnd1,opnd3);
+  if (regtest == 0)
+    return false;
+
+  memtest = memory_just_off(opnd0,opnd2);
+  if (memtest == 0)
+    return false;
+
+  if (regtest == memtest)
+  {
+    return true;
+  }
+  return false;
+}
+
+/* Similar to LDW peephole */
+bool
+ok_to_peephole_stw(rtx opnd0, rtx opnd1, rtx opnd2, rtx opnd3)
+{
+  int memtest=0,regtest=0;
+  regtest = registers_just_off(opnd1,opnd3);
+  if (regtest == 0)
+    return false;
+
+  memtest = memory_just_off(opnd0,opnd2);
+  if (memtest == 0)
+    return false;
+
+  if (regtest == memtest)
+  {
+    return true;
+  }
+  return false;
+}
+
+
+/* Generate a SImode register with the register number that is the smaller of the two */
+rtx
+gen_min_reg(rtx opnd1,rtx opnd2)
+{
+  return gen_rtx_REG (SImode, minimum(REGNO(opnd1),REGNO(opnd2)));
+}
+
+/* Generate a SImode memory with the address that is the smaller of the two */
+rtx
+gen_SImode_mem(rtx opnd1,rtx opnd2)
+{
+  int offset1=0,offset2=0;
+  rtx reg;
+  rtx address;
+  if (GET_CODE(XEXP(opnd1,0)) == PLUS && GET_CODE(XEXP(XEXP(opnd1,0),1)) == CONST_INT)
+  {
+    offset1 = INTVAL(XEXP(XEXP(opnd1,0),1));
+    reg = XEXP(XEXP(opnd1,0),0);
+  }
+  else
+  {
+    reg = XEXP(opnd1,0);
+  }
+  if (GET_CODE(XEXP(opnd2,0)) == PLUS && GET_CODE(XEXP(XEXP(opnd2,0),1)) == CONST_INT)
+  {
+    offset2 = INTVAL(XEXP(XEXP(opnd2,0),1));
+  }
+  address = gen_rtx_PLUS (HImode, reg, GEN_INT(minimum(offset1,offset2)));
+  return gen_rtx_MEM(SImode,address);
+}
+
+bool
+picochip_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int* total, bool speed)
+{
+
+  int localTotal = 0;
+
+  if (!speed)
+  {
+    /* Need to penalize immediates that need to be encoded as long constants.*/
+    if (code == CONST_INT && !(INTVAL (x) >= 0 && INTVAL (x) < 16))
+    {
+        *total = COSTS_N_INSNS(1);
+        return true;
+    }
+  }
+  switch (code)
+  {
+  case SYMBOL_REF:
+  case LABEL_REF:
+    *total = COSTS_N_INSNS (outer_code != MEM);
+    return true;
+    break;
+
+  case IF_THEN_ELSE:
+    /* if_then_else come out of cbranch instructions. It will get split into
+       a condition code generating subtraction and a branch */
+    *total = COSTS_N_INSNS (2);
+    return true;
+    break;
+
+  case AND:
+  case IOR:
+  case XOR:
+    if (GET_MODE(x) == SImode)
+      *total = COSTS_N_INSNS (2);
+    if (GET_MODE(x) == DImode)
+      *total = COSTS_N_INSNS (4);
+    return false;
+
+  case MEM:
+    /* Byte Memory access on a NO_BYTE_ACCESS machine would be expensive */
+    if (GET_MODE(x) == QImode && !TARGET_HAS_BYTE_ACCESS)
+      *total = COSTS_N_INSNS (10);
+
+    /* 64-bit accesses have to be done through 2 32-bit access */
+    if (GET_MODE(x) == DImode)
+      *total = COSTS_N_INSNS (2);
+    return false;
+    break;
+
+  case ASHIFTRT:
+
+    /* SImode shifts are expensive */
+    if (GET_MODE(x) == SImode)
+      *total = COSTS_N_INSNS (10);
+
+    /* Register shift by constant is cheap. */
+    if ((GET_MODE(x) == QImode || GET_MODE(x) == HImode)
+        && GET_CODE(XEXP(x, 0)) == REG
+        && GET_CODE(XEXP(x, 1)) == CONST_INT)
+      *total = COSTS_N_INSNS (1);
+    else
+      *total = COSTS_N_INSNS (4);
+    return false;
+    break;
+
+  case DIV:
+  case MOD:
+
+    /* Divisions are more expensive than the default 7*/
+    if (GET_MODE(x) == SImode)
+      *total = COSTS_N_INSNS (20);
+    else
+      *total = COSTS_N_INSNS (12);
+    return false;
+    break;
+
+  case MULT:
+    /* Look for the simple cases of multiplying register*register or
+       register*constant. */
+    if ((GET_MODE(x) == QImode || GET_MODE(x) == HImode)
+        && ((GET_CODE(XEXP(x, 0)) == REG
+           && (GET_CODE(XEXP(x, 1)) == REG || GET_CODE(XEXP(x,1)) == CONST_INT))
+           || (GET_CODE(XEXP(x, 0)) == ZERO_EXTEND 
+               && GET_CODE(XEXP(XEXP(x, 0),0)) == REG
+               && GET_CODE(XEXP(x, 1)) == ZERO_EXTEND 
+               && GET_CODE(XEXP(XEXP(x, 1),0)) == REG)))
+      {
+
+        /* When optimising for size, multiplication by constant
+           should be discouraged slightly over multiplication by a
+           register. */
+        if (picochip_has_mac_unit)
+          {
+            /* Single cycle multiplication, but the result must be
+               loaded back into a general register afterwards. */
+            *total = COSTS_N_INSNS(2);
+            return true;
+          }
+        else if (picochip_has_mul_unit)
+          {
+            /* Single cycle multiplication. */
+            *total = COSTS_N_INSNS(1);
+            return true;
+          }
+        /* Else no multiply available. Use default cost. */
+
+      }
+    break;
+
+  default:
+    /* Do nothing. */
+    break;
+  }
+
+  if (localTotal != 0)
+    {
+      *total = localTotal;
+      return true;
+    }
+  else
+    {
+      return false;
+    }
+
+}
+
+void
+picochip_final_prescan_insn (rtx insn, rtx * opvec ATTRIBUTE_UNUSED,
+			     int num_operands ATTRIBUTE_UNUSED)
+{
+  rtx local_insn;
+
+  picochip_current_prescan_insn = insn;
+
+  if (TARGET_DEBUG)
+    printf ("Final prescan on INSN %d with mode %s\n",
+	    INSN_UID (insn), GET_MODE_NAME (GET_MODE (insn)));
+
+  /* If this is the start of a new instruction cycle, or no scheduling
+     is used, then reset the VLIW status. */
+  if (GET_MODE (insn) == TImode || !picochip_schedule_type == DFA_TYPE_SPEED)
+    picochip_reset_vliw (insn);
+
+  /* No VLIW scheduling occured, so don't go any further. */
+  if (picochip_schedule_type != DFA_TYPE_SPEED)
+    return;
+
+  /* Look for the next printable instruction.  This loop terminates on
+     any recognisable instruction, and on any unrecognisable
+     instruction with TImode. */
+  local_insn = insn;
+  for (local_insn = NEXT_INSN (local_insn); local_insn;
+       local_insn = NEXT_INSN (local_insn))
+    {
+      if (NOTE_P (local_insn) || DEBUG_INSN_P(local_insn))
+	continue;
+      else if (!INSN_P (local_insn))
+	break;
+      else if (GET_MODE (local_insn) == TImode
+	       || INSN_CODE (local_insn) != -1)
+	break;
+    }
+
+  /* Set the continuation flag if the next instruction can be packed
+     with the current instruction (i.e., the next instruction is
+     valid, and isn't the start of a new cycle). */
+  picochip_vliw_continuation = (local_insn && NONDEBUG_INSN_P (local_insn) &&
+				(GET_MODE (local_insn) != TImode));
+
+}
+
+/* Builtin functions. */
+/* Given a builtin function taking 2 operands (i.e., target + source),
+   emit the RTL for the underlying instruction. */
+static rtx
+picochip_expand_builtin_2op (enum insn_code icode, tree call, rtx target)
+{
+  tree arg0;
+  rtx op0, pat;
+  enum machine_mode tmode, mode0;
+
+  /* Grab the incoming argument and emit its RTL. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* Determine the modes of the instruction operands. */
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+
+  /* Ensure that the incoming argument RTL is in a register of the
+     correct mode. */
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  /* If there isn't a suitable target, emit a target register. */
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Given a builtin function taking 3 operands (i.e., target + two
+   source), emit the RTL for the underlying instruction. */
+static rtx
+picochip_expand_builtin_3op (enum insn_code icode, tree call, rtx target)
+{
+  tree arg0, arg1;
+  rtx op0, op1, pat;
+  enum machine_mode tmode, mode0, mode1;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* Get the mode's of each of the instruction operands. */
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+  mode1 = insn_data[icode].operand[2].mode;
+
+  /* Ensure that each of the function argument rtl sequences are in a
+     register of the correct mode. */
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  /* If no target has been given, create a register to use as the target. */
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Expand a builtin function which takes two arguments, and returns a void. */
+static rtx
+picochip_expand_builtin_2opvoid (enum insn_code icode, tree call)
+{
+  tree arg0, arg1;
+  rtx op0, op1, pat;
+  enum machine_mode mode0, mode1;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* Get the mode's of each of the instruction operands. */
+  mode0 = insn_data[icode].operand[0].mode;
+  mode1 = insn_data[icode].operand[1].mode;
+
+  /* Ensure that each of the function argument rtl sequences are in a
+     register of the correct mode. */
+  if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  /* Emit and return the new instruction. */
+  pat = GEN_FCN (icode) (op0, op1);
+  if (!pat)
+    return 0;
+  emit_insn (pat);
+
+  return NULL_RTX;
+
+}
+
+/* Expand an array get into the corresponding RTL. */
+static rtx
+picochip_expand_array_get (tree call, rtx target)
+{
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, pat;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+  arg2 = CALL_EXPR_ARG (call, 2) ;
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* The second and third operands must be constant.  Nothing else will
+     do. */
+  if (CONST_INT != GET_CODE (op1))
+    internal_error ("%s: Second source operand is not a constant",
+		    __FUNCTION__);
+  if (CONST_INT != GET_CODE (op2))
+    internal_error ("%s: Third source operand is not a constant",
+		    __FUNCTION__);
+
+  /* If no target has been given, create a register to use as the target. */
+  if (target == 0 || GET_MODE (target) != SImode)
+    target = gen_reg_rtx (SImode);
+
+  /* The first operand must be a HImode register or a constant.  If it
+     isn't, force it into a HImode register. */
+  if (GET_MODE (op0) != HImode || REG != GET_CODE (op0))
+    op0 = copy_to_mode_reg (HImode, op0);
+
+
+  /* Emit and return the new instruction. */
+  pat = gen_commsArrayGet (target, op0, op1, op2);
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Expand an array put into the corresponding RTL. */
+static rtx
+picochip_expand_array_put (tree call, rtx target)
+{
+  tree arg0, arg1, arg2, arg3;
+  rtx op0, op1, op2, op3, pat;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+  arg2 = CALL_EXPR_ARG (call, 2);
+  arg3 = CALL_EXPR_ARG (call, 3);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op3 = expand_expr (arg3, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* The first operand must be an SImode register. */
+  if (GET_MODE (op0) != SImode || REG != GET_CODE (op0))
+    op0 = copy_to_mode_reg (SImode, op0);
+
+  /* The second (index) operand must be a HImode register, or a
+     constant.  If it isn't, force it into a HImode register. */
+  if (GET_MODE (op1) != HImode || REG != GET_CODE (op1))
+    op1 = copy_to_mode_reg (HImode, op1);
+
+  /* The remaining operands must be constant.  Nothing else will do. */
+  if (CONST_INT != GET_CODE (op2))
+    internal_error ("%s: Third source operand is not a constant",
+		    __FUNCTION__);
+  if (CONST_INT != GET_CODE (op3))
+    internal_error ("%s: Fourth source operand is not a constant",
+		    __FUNCTION__);
+
+  /* Emit and return the new instruction. */
+  pat = gen_commsArrayPut (op0, op1, op2, op3);
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Expand an array testport into the corresponding RTL. */
+static rtx
+picochip_expand_array_testport (tree call, rtx target)
+{
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2, pat;
+
+  /* Grab the function's arguments. */
+  arg0 = CALL_EXPR_ARG (call, 0);
+  arg1 = CALL_EXPR_ARG (call, 1);
+  arg2 = CALL_EXPR_ARG (call, 2);
+
+  /* Emit rtl sequences for the function arguments. */
+  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+  /* The first operand must be a HImode register, or a constant.  If it
+     isn't, force it into a HImode register. */
+  if (GET_MODE (op0) != HImode || REG != GET_CODE (op0))
+    op0 = copy_to_mode_reg (HImode, op0);
+
+  /* The second and third operands must be constant.  Nothing else will
+     do. */
+  if (CONST_INT != GET_CODE (op1))
+    internal_error ("%s: Second source operand is not a constant",
+		    __FUNCTION__);
+  if (CONST_INT != GET_CODE (op2))
+    internal_error ("%s: Third source operand is not a constant",
+		    __FUNCTION__);
+
+  /* If no target has been given, create a HImode register to use as
+     the target. */
+  if (target == 0 || GET_MODE (target) != HImode)
+    target = gen_reg_rtx (HImode);
+
+  /* Emit and return the new instruction. */
+  pat = gen_commsArrayTestPort (target, op0, op1, op2);
+  emit_insn (pat);
+
+  return target;
+
+}
+
+/* Generate a unique HALT instruction by giving the instruction a
+   unique integer. This integer makes no difference to the assembly
+   output (other than a comment indicating the supplied id), but the
+   presence of the unique integer prevents the compiler from combining
+   several different halt instructions into one instruction. This
+   means that each use of the halt instruction is unique, which in
+   turn means that assertions work as expected. */
+static rtx
+picochip_generate_halt (void)
+{
+  static int currentId = 0;
+  rtx insns;
+  rtx id = GEN_INT (currentId);
+  currentId += 1;
+
+  start_sequence();
+  emit_insn (gen_halt (id));
+
+  /* A barrier is inserted to prevent the compiler from thinking that
+     it has to continue execution after the HALT.*/
+  emit_barrier ();
+
+  insns = get_insns();
+  end_sequence();
+  emit_insn (insns);
+
+  return const0_rtx;
+}
+
+/* Initialise the builtin functions.  Start by initialising
+   descriptions of different types of functions (e.g., void fn(int),
+   int fn(void)), and then use these to define the builtins. */
+void
+picochip_init_builtins (void)
+{
+  tree noreturn;
+  tree endlink = void_list_node;
+  tree int_endlink = tree_cons (NULL_TREE, integer_type_node, endlink);
+  tree unsigned_endlink = tree_cons (NULL_TREE, unsigned_type_node, endlink);
+  tree long_endlink = tree_cons (NULL_TREE, long_integer_type_node, endlink);
+  tree int_int_endlink =
+    tree_cons (NULL_TREE, integer_type_node, int_endlink);
+  tree int_int_int_endlink =
+    tree_cons (NULL_TREE, integer_type_node, int_int_endlink);
+  tree int_long_endlink =
+    tree_cons (NULL_TREE, integer_type_node, long_endlink);
+  tree long_int_int_int_endlink =
+    tree_cons (NULL_TREE, long_integer_type_node, int_int_int_endlink);
+
+  tree int_ftype_int, int_ftype_int_int;
+  tree long_ftype_int, long_ftype_int_int_int;
+  tree void_ftype_int_long, int_ftype_int_int_int,
+    void_ftype_long_int_int_int;
+  tree void_ftype_void, unsigned_ftype_unsigned;
+
+  /* void func (void) */
+  void_ftype_void = build_function_type (void_type_node, endlink);
+
+  /* int func (int) */
+  int_ftype_int = build_function_type (integer_type_node, int_endlink);
+
+  /* unsigned int func (unsigned int) */
+  unsigned_ftype_unsigned = build_function_type (unsigned_type_node, unsigned_endlink);
+
+  /* int func(int, int) */
+  int_ftype_int_int
+    = build_function_type (integer_type_node, int_int_endlink);
+
+  /* long func(int) */
+  long_ftype_int = build_function_type (long_integer_type_node, int_endlink);
+
+  /* long func(int, int, int) */
+  long_ftype_int_int_int
+    = build_function_type (long_integer_type_node, int_int_int_endlink);
+
+  /* int func(int, int, int) */
+  int_ftype_int_int_int
+    = build_function_type (integer_type_node, int_int_int_endlink);
+
+  /* void func(int, long) */
+  void_ftype_int_long
+    = build_function_type (void_type_node, int_long_endlink);
+
+  /* void func(long, int, int, int) */
+  void_ftype_long_int_int_int
+    = build_function_type (void_type_node, long_int_int_int_endlink);
+
+  /* Initialise the sign-bit-count function. */
+  add_builtin_function ("__builtin_sbc", int_ftype_int,
+			       PICOCHIP_BUILTIN_SBC, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoSbc", int_ftype_int, PICOCHIP_BUILTIN_SBC,
+			       BUILT_IN_MD, NULL, NULL_TREE);
+
+  /* Initialise the bit reverse function. */
+  add_builtin_function ("__builtin_brev", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BREV, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoBrev", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BREV, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise the byte swap function. */
+  add_builtin_function ("__builtin_byteswap", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BYTESWAP, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoByteSwap", unsigned_ftype_unsigned,
+			       PICOCHIP_BUILTIN_BYTESWAP, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise the ASRI function (note that while this can be coded
+     using a signed shift in C, extra scratch registers are required,
+     which we avoid by having a direct builtin to map to the
+     instruction). */
+  add_builtin_function ("__builtin_asri", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_ASRI, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise saturating addition. */
+  add_builtin_function ("__builtin_adds", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_ADDS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoAdds", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_ADDS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Initialise saturating subtraction. */
+  add_builtin_function ("__builtin_subs", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_SUBS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("picoSubs", int_ftype_int_int,
+			       PICOCHIP_BUILTIN_SUBS, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Scalar comms builtins. */
+  add_builtin_function ("__builtin_get", long_ftype_int,
+			       PICOCHIP_BUILTIN_GET, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_put", void_ftype_int_long,
+			       PICOCHIP_BUILTIN_PUT, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_testport", int_ftype_int,
+			       PICOCHIP_BUILTIN_TESTPORT, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+
+  /* Array comms builtins. */
+  add_builtin_function ("__builtin_put_array",
+			       void_ftype_long_int_int_int,
+			       PICOCHIP_BUILTIN_PUT_ARRAY, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_get_array", long_ftype_int_int_int,
+			       PICOCHIP_BUILTIN_GET_ARRAY, BUILT_IN_MD, NULL,
+			       NULL_TREE);
+  add_builtin_function ("__builtin_testport_array",
+			       int_ftype_int_int_int,
+			       PICOCHIP_BUILTIN_TESTPORT_ARRAY, BUILT_IN_MD,
+			       NULL, NULL_TREE);
+
+  /* Halt instruction. Note that the builtin function is marked as
+     having the attribute `noreturn' so that the compiler realises
+     that the halt stops the program dead. */
+  noreturn = tree_cons (get_identifier ("noreturn"), NULL, NULL);
+  add_builtin_function ("__builtin_halt", void_ftype_void,
+			       PICOCHIP_BUILTIN_HALT, BUILT_IN_MD, NULL,
+			       noreturn);
+  add_builtin_function ("picoHalt", void_ftype_void,
+			       PICOCHIP_BUILTIN_HALT, BUILT_IN_MD, NULL,
+			       noreturn);
+
+}
+
+/* Expand a call to a builtin function. */
+rtx
+picochip_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED,
+			 int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case PICOCHIP_BUILTIN_ASRI:
+      return picochip_expand_builtin_3op (CODE_FOR_builtin_asri, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_ADDS:
+      return picochip_expand_builtin_3op (CODE_FOR_sataddhi3, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_SUBS:
+      return picochip_expand_builtin_3op (CODE_FOR_satsubhi3, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_SBC:
+      return picochip_expand_builtin_2op (CODE_FOR_sbc, exp, target);
+
+    case PICOCHIP_BUILTIN_BREV:
+      return picochip_expand_builtin_2op (CODE_FOR_brev, exp, target);
+
+    case PICOCHIP_BUILTIN_BYTESWAP:
+      return picochip_expand_builtin_2op (CODE_FOR_bswaphi2, exp, target);
+
+    case PICOCHIP_BUILTIN_GET:
+      return picochip_expand_builtin_2op (CODE_FOR_commsGet, exp, target);
+
+    case PICOCHIP_BUILTIN_PUT:
+      return picochip_expand_builtin_2opvoid (CODE_FOR_commsPut, exp);
+
+    case PICOCHIP_BUILTIN_TESTPORT:
+      return picochip_expand_builtin_2op (CODE_FOR_commsTestPort, exp,
+					  target);
+
+    case PICOCHIP_BUILTIN_PUT_ARRAY:
+      return picochip_expand_array_put (exp, target);
+
+    case PICOCHIP_BUILTIN_GET_ARRAY:
+      return picochip_expand_array_get (exp, target);
+
+    case PICOCHIP_BUILTIN_TESTPORT_ARRAY:
+      return picochip_expand_array_testport (exp, target);
+
+    case PICOCHIP_BUILTIN_HALT:
+      return picochip_generate_halt ();
+
+    default:
+      gcc_unreachable();
+
+    }
+
+  /* Should really do something sensible here.  */
+  return NULL_RTX;
+}
+
+/* Emit warnings. */
+static void
+picochip_warn_inefficient (const char *msg)
+{
+  if (TARGET_INEFFICIENT_WARNINGS)
+    warning (OPT_minefficient_warnings,
+	     "%s (disable warning using -mno-inefficient-warnings)", msg);
+}
+
+void
+warn_of_byte_access (void)
+{
+  static int warned = 0;
+
+  if (!warned)
+    {
+      picochip_warn_inefficient
+	("byte access is synthesised - consider using MUL AE");
+      warned = 1;
+    }
+
+}
+
+rtx
+picochip_function_value (const_tree valtype, const_tree func,
+                         bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (valtype);
+  int unsignedp = TYPE_UNSIGNED (valtype);
+
+  /* Since we define PROMOTE_FUNCTION_RETURN, we must promote the mode
+     just as PROMOTE_MODE does.  */
+  mode = promote_function_mode (valtype, mode, &unsignedp, func, 1);
+
+  return gen_rtx_REG (mode, 0);
+
+}
+
+/* Check that the value of the given mode will fit in the register of
+   the given mode. */
+int
+picochip_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+
+  if (GET_MODE_CLASS (mode) == MODE_CC)
+    return regno == CC_REGNUM;
+
+  /* If the CC register is being used, then only CC mode values are
+     allowed (which have already been tested). */
+  if (regno == CC_REGNUM || regno == ACC_REGNUM)
+    return 0;
+
+  /* Must be a valid register. */
+  if (regno > 16)
+    return 0;
+
+  /* Modes QI and HI may be placed in any register except the CC. */
+  if (mode == QImode || mode == HImode)
+    return 1;
+
+  /* DI must be in a quad register. */
+  if (mode == DImode)
+    return (regno % 4 == 0);
+
+  /* All other modes must be placed in a even numbered register. */
+  return !(regno & 1);
+
+}
+
+/* Extract the lower and upper components of a constant value. */
+
+rtx
+picochip_get_low_const (rtx value)
+{
+  return gen_int_mode (INTVAL (value) & 0xFFFF, HImode);
+}
+
+rtx
+picochip_get_high_const (rtx value)
+{
+  /*return GEN_INT ((((INTVAL (value) >> 16) & 0xFFFF) ^ 0x8000) - 0x8000); */
+  return gen_int_mode ((INTVAL (value) >> 16) & 0xFFFF, HImode);
+}
+
+
+/* Loading and storing QImode values to and from memory in a machine
+   without byte access requires might require a scratch
+   register.  However, the scratch register might correspond to the
+   register in which the value is being loaded.  To ensure that a
+   scratch register is supplied which is definitely different to the
+   output register, request a register pair.  This effectively gives a
+   choice of two registers to choose from, so that we a guaranteed to
+   get at least one register which is different to the output
+   register.  This trick is taken from the alpha implementation. */
+static reg_class_t
+picochip_secondary_reload (bool in_p,
+			   rtx x ATTRIBUTE_UNUSED,
+			   reg_class_t cla ATTRIBUTE_UNUSED,
+			   enum machine_mode mode,
+			   secondary_reload_info *sri)
+{
+  if (mode == QImode && !TARGET_HAS_BYTE_ACCESS)
+  {
+    if (in_p == 0)
+      sri->icode = CODE_FOR_reload_outqi;
+    else
+      sri->icode = CODE_FOR_reload_inqi;
+  }
+
+  /* We dont need to return a register class type when we need only a
+     scratch register. It realizes the scratch register type by looking
+     at the instruction definition for sri->icode. We only need to
+     return the register type when we need intermediaries for copies.*/
+  return NO_REGS;
+}
+
+/* Return true if the given memory operand can be aligned to a
+   word+offset memory reference (e.g., FP+3 can be converted into the
+   memory operand FP+2, with the offset 1). */
+int
+picochip_alignable_memory_operand (rtx mem_operand,
+				   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx address;
+
+  /* Not a mem operand. Refuse immediately. */
+  if (MEM != GET_CODE (mem_operand))
+    return 0;
+
+  address = XEXP (mem_operand, 0);
+
+  /* Return true if a PLUS of the SP and a (valid) constant, or SP itself. */
+  return ((PLUS == GET_CODE (address) &&
+	   REGNO (XEXP (address, 0)) == STACK_POINTER_REGNUM &&
+	   CONST_INT == GET_CODE (XEXP (address, 1)) &&
+	   picochip_const_ok_for_letter_p (INTVAL (XEXP (address, 1)), 'K'))
+	  || (REG == GET_CODE (address)
+	      && REGNO (address) == STACK_POINTER_REGNUM));
+
+}
+
+/* Return true if the given memory reference is to a word aligned
+   address.  Currently this means it must be either SP, or
+   SP+offset.  We could replace this function with alignable
+   memory references in the above function?. */
+int
+picochip_word_aligned_memory_reference (rtx operand)
+{
+
+
+  /* The address must be the SP register, or a constant, aligned
+     offset from SP which doesn't exceed the FP+offset
+     restrictions. */
+  return ((PLUS == GET_CODE (operand)
+	   && REGNO (XEXP (operand, 0)) == STACK_POINTER_REGNUM
+	   && picochip_is_aligned (INTVAL (XEXP (operand, 1)), 16)
+           && picochip_const_ok_for_letter_p (INTVAL (XEXP (operand, 1)),
+                                                'K'))
+	  || (REG == GET_CODE (operand)
+	      && REGNO (operand) == STACK_POINTER_REGNUM));
+
+}
+
+/* Given an alignable memory location, convert the memory location
+   into a HI mode access, storing the new memory reference in
+   paligned_mem, and the number of bits by which to shift in pbitnum
+   (i.e., given a reference to FP+3, this creates an aligned reference
+   of FP+2, with an 8-bit shift). This code is a modification of that
+   found in the Alpha port. */
+void
+picochip_get_hi_aligned_mem (rtx ref, rtx * paligned_mem, rtx * pbitnum)
+{
+  rtx base;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (GET_CODE (ref) == MEM);
+
+  if (reload_in_progress && !memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
+    {
+      base = find_replacement (&XEXP (ref, 0));
+
+      gcc_assert(memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    {
+      base = XEXP (ref, 0);
+    }
+
+  if (GET_CODE (base) == PLUS)
+    {
+      offset += INTVAL (XEXP (base, 1));
+      base = XEXP (base, 0);
+    }
+
+  *paligned_mem = widen_memory_access (ref, HImode, (offset & ~1) - offset);
+
+  if (offset > 0)
+    {
+      if (TARGET_DEBUG)
+	{
+	  printf
+	    ("Found non-zero offset in get_hi_aligned_mem - check that the correct value is being used (as this functionality hasn't been exploited yet).\n");
+	}
+    }
+
+  *pbitnum = GEN_INT ((offset & 1) * 8);
+
+}
+
+/* Return true if the given operand is an absolute address in memory
+   (i.e., a symbolic offset). */
+int
+picochip_absolute_memory_operand (rtx op,
+				  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+
+  if (MEM == GET_CODE (op))
+    {
+      rtx address = XEXP (op, 0);
+
+      /* Symbols are valid absolute addresses. */
+      if (SYMBOL_REF == GET_CODE (address))
+	return 1;
+
+      /* Constant offsets to symbols are valid absolute addresses. */
+      if (CONST == GET_CODE (address) &&
+	  PLUS == GET_CODE (XEXP (address, 0)) &&
+	  SYMBOL_REF == GET_CODE (XEXP (XEXP (address, 0), 0)) &&
+	  CONST_INT == GET_CODE (XEXP (XEXP (address, 0), 1)))
+	return 1;
+
+    }
+  else
+    return 0;
+
+  /* Symbols are valid absolute addresses. */
+  if (SYMBOL_REF == GET_CODE (XEXP (op, 0)))
+    return 1;
+
+
+  return 0;
+
+}
+
+void
+picochip_asm_named_section (const char *name,
+			    unsigned int flags ATTRIBUTE_UNUSED,
+			    tree decl ATTRIBUTE_UNUSED)
+{
+  fprintf (asm_out_file, ".section %s\n", name);
+}
+
+
+/* Check if we can make a conditional copy instruction.  This is emitted as an
+   instruction to set the condition register, followed by an instruction which
+   uses the condition registers to perform the conditional move. */
+int
+picochip_check_conditional_copy (rtx * operands)
+{
+
+  rtx branch_op_0 = XEXP (operands[1], 0);
+  rtx branch_op_1 = XEXP (operands[1], 1);
+
+  /* Only HI mode conditional moves are currently allowed.  Can we add
+     SI mode moves? */
+  if (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE)
+    return 0;
+
+  /* Is the comparison valid? Only allow operands which are registers
+     if they are HImode.  SI mode comparisons against 0 could be
+     handled using logical operations (e.g., SIreg != 0 when low ||
+     high). Need to find test cases to provoke this though (fixunssfdi
+     in libgcc does, but is complicated). */
+  if (register_operand(branch_op_0, GET_MODE(branch_op_0)) &&
+      GET_MODE(branch_op_0) != HImode)
+    return 0;
+  if (register_operand(branch_op_1, GET_MODE(branch_op_1)) &&
+      GET_MODE(branch_op_1) != HImode)
+    return 0;
+
+  return 1;
+
+}
+
+
+static rtx
+picochip_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p)
+{
+  rtx addr;
+  if (incoming_p)
+    addr = arg_pointer_rtx;
+  else
+    addr = plus_constant (stack_pointer_rtx, -2 * UNITS_PER_WORD);
+  return gen_frame_mem (Pmode, addr);
+}
diff --git a/gcc/config/picochip/picochip.h b/gcc/config/picochip/picochip.h
new file mode 100644
index 000000000..5f6bc7b2c
--- /dev/null
+++ b/gcc/config/picochip/picochip.h
@@ -0,0 +1,678 @@
+/* Definitions of target machine for GNU compiler for picoChip
+   Copyright (C) 2001, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   Contributed by Picochip Ltd. (http://www.picochip.com)
+   Maintained by Daniel Towner (daniel.towner@picochip.com) and
+   Hariharan Sandanagobalane (hariharan@picochip.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Which type of DFA scheduling to use - schedule for speed (VLIW), or
+   schedule for space.  When scheduling for space, attempt to schedule
+   into stall cycles, but don't pack instructions. */
+
+enum picochip_dfa_type
+{
+  DFA_TYPE_NONE,
+  DFA_TYPE_SPACE,
+  DFA_TYPE_SPEED
+};
+
+extern enum picochip_dfa_type picochip_schedule_type;
+
+/* Controlling the Compilation Driver */
+
+/* Pass through the save-temps command option. */
+#define LINK_SPEC " %{save-temps:--save-temps}"
+
+/* This is an embedded processor, and only supports a cut-down version of
+ * the standard C library. */
+#define LIB_SPEC "-lpicoC"
+
+/* The start file is automatically provided by the linker. */
+#define STARTFILE_SPEC ""
+
+/* Run-time Target Specification  */
+
+/* Define some additional pre-processor macros. */
+#define TARGET_CPU_CPP_BUILTINS()                       \
+  do                                                    \
+    {                                                   \
+      builtin_define ("NO_TRAMPOLINES");                \
+      builtin_define ("PICOCHIP");                      \
+      builtin_define ("__PICOCHIP__");                      \
+    }                                                   \
+  while (0)
+
+/* Translate requests for particular AEs into their respective ISA
+   options. Note that byte access is enabled by default. */
+#define DRIVER_SELF_SPECS					\
+  "%{mae=ANY:-mmul-type=none -mno-byte-access} %<mae=ANY",	\
+  "%{mae=ANY2:-mmul-type=none -mno-byte-access} %<mae=ANY2",	\
+  "%{mae=ANY3:-mmul-type=none} %<mae=ANY3",			\
+  "%{mae=STAN:-mmul-type=none -mno-byte-access} %<mae=STAN",	\
+  "%{mae=STAN2:-mmul-type=mac -mno-byte-access} %<mae=STAN2",	\
+  "%{mae=STAN3:-mmul-type=mac} %<mae=STAN3",			\
+  "%{mae=MAC:-mmul-type=mac -mno-byte-access} %<mae=MAC",	\
+  "%{mae=MUL:-mmul-type=mul} %<mae=MUL",			\
+  "%{mae=MEM:-mmul-type=mul} %<mae=MEM",			\
+  "%{mae=MEM2:-mmul-type=mul} %<mae=MEM2",			\
+  "%{mae=CTRL:-mmul-type=mul} %<mae=CTRL",			\
+  "%{mae=CTRL2:-mmul-type=mul} %<mae=CTRL2"
+
+/* Specify the default options, so that the multilib build doesn't
+   need to provide special cases for the defaults. */
+#define MULTILIB_DEFAULTS \
+  { "mmul-type=mul", "mbyte-access"}
+
+#define TARGET_HAS_BYTE_ACCESS (picochip_has_byte_access)
+#define TARGET_HAS_MUL_UNIT (picochip_has_mul_unit)
+#define TARGET_HAS_MAC_UNIT (picochip_has_mac_unit)
+#define TARGET_HAS_MULTIPLY (picochip_has_mac_unit || picochip_has_mul_unit)
+
+#define TARGET_VERSION fprintf(stderr, "(picoChip)");
+
+/* Storage Layout */
+
+/* picoChip processors are 16-bit machines, little endian. */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#define BITS_PER_UNIT 8
+
+#define BITS_PER_WORD 16
+#define UNITS_PER_WORD (BITS_PER_WORD / BITS_PER_UNIT)
+
+#define POINTER_SIZE BITS_PER_WORD
+
+/* Promote those modes that are smaller than an int, to int mode.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  ((GET_MODE_CLASS (MODE) == MODE_INT			\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD)		\
+      ? (MODE) = HImode : 0)
+
+/* All parameters are at least this aligned.  Parameters are passed
+   one-per-register. */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* The main stack pointer is guaranteed to be aligned to the most
+   strict data alignment. */
+#define STACK_BOUNDARY 32
+
+/* Function entry point is byte aligned. */
+#define FUNCTION_BOUNDARY 8
+
+/* This is the biggest alignment that can be allowed on this machine.
+   Since the STANs have only 256 byte memory, it doesnt make sense
+   to have alignments greater than 32 bytes. Hence the value */
+#define MAX_OFILE_ALIGNMENT 32*8
+
+/* The strictest data object alignment, which repesents a register pair. */
+#define BIGGEST_ALIGNMENT 32
+
+/* The hardware doesn't allow unaligned memory access.  */
+#define STRICT_ALIGNMENT 1
+
+/* We want the 'unix' style bitfield packing algorithm.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Support up to 64-bit integers. */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (DImode)
+
+/* We don't support floating point, but give it a sensible definition. */
+#define TARGET_FLOAT_FORMAT IEEE_FLOAT_FORMAT
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE BITS_PER_WORD
+
+/* The normal sizes for C scalar data. */
+#define CHAR_TYPE_SIZE 8
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+
+/* We don't support the following data types, but still give them
+   sensible values.  */
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 32
+#define LONG_DOUBLE_TYPE_SIZE 32
+
+/* Plain `char' is a signed type, since the hardware sign-extends
+   bytes when loading them from memory into a register. */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Note that the names of the types used in the following macros must
+   be precisely the same as those defined internally in gcc.  For
+   example, `unsigned short' wouldn't work as a type string, since gcc
+   doesn't define any type with this exact string.  The correct string
+   to use is `short unsigned int'. */
+
+#define SIZE_TYPE "unsigned int"
+
+#define PTRDIFF_TYPE "int"
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+#define WINT_TYPE "unsigned int"
+
+/* Register Usage  */
+
+/* Picochip has 16 16-bit registers, a condition code register and an
+   (inaccessible) instruction pointer.  One of these registers (r15) is
+   special, and is either used to load a constant anywhere a register
+   can normally be used, or is used to specify a dummy destination
+   (e.g., when setting condition flags).  We also define some pseudo
+   registers to represent condition codes, the frame pointer and the
+   argument pointer.  The latter two are eliminated wherever possible.
+
+   Pairs of general registers may be combined to form 32-bit registers.
+
+   The picoChip registers are as follows:
+
+   0..1 - function return value
+   0..5 - first 6 function parameters
+   6..11 - General purpose
+   12 - link register
+   13 - stack pointer
+   14 - specialized pointer
+   15 - long constant or /dev/null
+   (16) acc0
+   (17) pseudo condition code
+   (18) pseudo frame pointer
+   (19) pseudo arg pointer
+
+   Registers 0..6, 12, 13, 14, 15 are caller save
+   Registers 0..12, 14 are available to the register allocator.
+
+   In addition, the DSP variant of the ISA allows extra accumulator
+   registers to be accessed.  These are special purpose registers,
+   which are not currently used by the compiler.
+
+  */
+
+/* Basic Characteristics of Registers  */
+
+/* We have 16 hard registers plus 3 pseudo hard registers and an accumulator.  */
+#define FIRST_PSEUDO_REGISTER 20
+
+/* The first non-hard register.  Only used internally by the picoChip port. */
+#define FIRST_NONHARD_REGISTER 18
+
+/* Cannot use SP, CST, CC, FP, AP */
+#define FIXED_REGISTERS {0,0,0,0,0,0,0,0, 0,0,0,0,0,1,0,1, 1,1,1,1}
+
+/* Those that are clobbered by a function call (includes pseudo-regs) */
+#define CALL_USED_REGISTERS {1,1,1,1,1,1,0,0, 0,0,0,0,1,1,0,1, 1,1,1,1}
+#define CALL_REALLY_USED_REGISTERS {1,1,1,1,1,1,0,0, 0,0,0,0,1,1,0,0, 0,1,0,0}
+
+/* Define the number of the picoChip link and condition psuedo registers. */
+#define LINK_REGNUM 12
+#define CC_REGNUM 17
+#define ACC_REGNUM 16
+
+/* Order of Allocation of Registers  */
+
+/* The registers are allocated starting with the caller-clobbered
+   registers, in reverse order.  The registers are then listed in an
+   order which means that they are efficiently saved in pairs (i.e.,
+   one 32-bit store can be used instead of two 16-bit stores to save
+   the registers into the stack). The exception to this is the use of
+   r14 (AP) register, which also appears early on.  This is because the
+   AP register can be used to encode memory operations more
+   efficiently than other registers.  Some code can be made more
+   compact as a result. */
+   /* My current feeling is that r14 should go to the end and maybe even r12.
+   It seems like the overhead of store/load that will occur since we cant
+   pair anything up with r14 will be higher than the advantage of smaller
+   encoding.
+   Also r12 is put towards the end for leaf functions. Since leaf functions
+   do not have any calls, the prologue/epilogue for them wouldnt save up/
+   restore its value. So, it doesnt make sense for us to use it in the middle,
+   if we can avoid it. */
+#define REG_ALLOC_ORDER {5,4,3,2,1,0,12,6,7,8,9,10,11,14,16,0,0,0,0,0}
+#define LEAF_REG_ALLOC_ORDER {5,4,3,2,1,0,6,7,8,9,10,11,14,12,16,0,0,0,0,0}
+
+/* We can dynamically change the REG_ALLOC_ORDER using the following hook.
+   It would be desirable to change it for leaf functions so we can put
+   r12 at the end of this list.*/
+#define ADJUST_REG_ALLOC_ORDER picochip_order_regs_for_local_alloc ()
+
+/* How Values Fit in Registers  */
+
+/* Number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) picochip_regno_nregs((REGNO), (MODE))
+
+/* Is it ok to place MODE in REGNO?  Require that the register number
+   be aligned. */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	picochip_hard_regno_mode_ok(REGNO, MODE)
+
+#define MODES_TIEABLE_P(MODE1,MODE2) 1
+
+/* Don't copy the cc register ('cos you can't put it back).  */
+#define AVOID_CCMODE_COPIES 1
+
+/* Register Classes */
+
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  FRAME_REGS,			/* registers with a long offset  */
+  PTR_REGS,			/* registers without an offset  */
+  CONST_REGS,			/* registers for long constants  */
+  NULL_REGS,			/* registers which ignore writes  */
+  CC_REGS,			/* condition code registers  */
+  ACC_REGS,			/* Accumulator registers  */
+  TWIN_REGS,			/* registers which can be paired */
+  GR_REGS,			/* general purpose registers */
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES,		/* max value + 1 */
+
+  /* Some aliases  */
+  GENERAL_REGS = GR_REGS
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES 						\
+{									\
+  GR_REGS, LIM_REG_CLASSES						\
+}
+
+
+/* The names of the register classes  */
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "FRAME_REGS",								\
+  "PTR_REGS",								\
+  "CONST_REGS",								\
+  "NULL_REGS", 								\
+  "CC_REGS",								\
+  "ACC_REGS",								\
+  "TWIN_REGS",								\
+  "GR_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Each reg class is an array of 32-bit integers.  Each array must be
+   long enough to store one bit for every pseudo register. Thus in the
+   following code, each array only stores one 32-bit value. */
+#define REG_CLASS_CONTENTS						\
+{									\
+  {0x00000000}, /* no registers */					\
+  {0x00002000},	/* frame */						\
+  {0x00004000},	/* pointer  */						\
+  {0x00008000}, /* const */						\
+  {0x00008000},	/* null  */						\
+  {0x00020000}, /* cc */						\
+  {0x00010000}, /* acc0 */						\
+  {0x00000FFF},	/* twin */						\
+  {0x000CFFFF},	/* general registers - includes pseudo-arg */    	\
+  {0x000FFFFF}	/* all registers - includes pseudo-arg */               \
+}
+
+/* The earliest register class containing the given register.  */
+extern const enum reg_class picochip_regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) picochip_regno_reg_class[REGNO]
+
+/* Any register can be a base pointer.  */
+#define BASE_REG_CLASS GR_REGS
+
+/* Any register can be an index.  */
+#define INDEX_REG_CLASS GR_REGS
+
+#define REGNO_OK_FOR_BASE_P(REGNO) 					\
+  (REGNO_REG_CLASS (REGNO) != CC_REGS && REGNO_REG_CLASS (REGNO) != ACC_REGS)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+
+#define CLASS_MAX_NREGS(CLASS, MODE) picochip_class_max_nregs(CLASS, MODE)
+
+
+/* Stack Layout and Calling Conventions  */
+
+#define STACK_GROWS_DOWNWARD 1
+
+/* The frame pointer points to the outgoing argument area, so the
+   locals are above that.  */
+#define STARTING_FRAME_OFFSET 0
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Specify where the return address lives before entry to the
+   prologue.  This is required to enable DWARF debug information to be
+   generated. */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, LINK_REGNUM)
+
+#define RETURN_ADDR_RTX(count,frameaddr) picochip_return_addr_rtx(count,frameaddr)
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LINK_REGNUM)
+
+/* Registers that Address the Stack Frame  */
+
+#define STACK_POINTER_REGNUM 13
+#define FRAME_POINTER_REGNUM 18
+#define ARG_POINTER_REGNUM   19
+
+/* Eliminating Frame Pointer and Arg Pointer.  The frame and argument
+   pointers are eliminated wherever possible, by replacing them with
+   offsets from the stack pointer. */
+
+#define ELIMINABLE_REGS 						\
+  {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+   {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM,TO,OFFSET) \
+  OFFSET = initial_elimination_offset(FROM, TO);
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define PUSH_ARGS 0
+
+/* Passing Arguments in Registers  */
+
+/* Store the offset of the next argument. */
+#define CUMULATIVE_ARGS unsigned
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
+  ((CUM) = 0)
+
+/* The first 6 registers can hold parameters.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) ((REGNO) < 6)
+
+/* How Scalar Function Values are Returned
+   Do we need this?? */
+#define FUNCTION_VALUE(VALTYPE,FUNC) picochip_function_value(VALTYPE, FUNC, 0)
+
+#define LIBCALL_VALUE(MODE) (gen_rtx_REG (MODE, 0))
+
+/* Results are in register zero.  If an SImode register is returned,
+   reg0 will suffice to mean R[0:1]. */
+#define FUNCTION_VALUE_REGNO_P(REGNO) ((REGNO) == 0)
+
+/* Don't automatically pass struct's in memory - use the
+ * RETURN_IN_MEMORY macro to determine when structs are returned in
+ * memory, and when in registers. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Function Entry and Exit  */
+
+/* The epilogue doesn't clobber anything.  */
+#define EPILOGUE_USES(REGNO) 0
+
+/* Generating Code for Profiling.  No profiling implemented  */
+
+#define FUNCTION_PROFILER(FILE,LABELNO)
+
+/* Trampolines for Nested Functions  */
+
+/* No trampolines.  */
+#define TRAMPOLINE_SIZE 0
+
+/* Addressing Modes  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Legitimize reload address tries machine dependent means of
+   reloading addresses.  There seems to be a strange error in gcc,
+   which necessitates this macro.  Consider:
+
+     set (reg A) (symbol_ref)
+     set (reg B) (plus (reg A) (const_int))	
+			
+   A symbol_ref is a valid constant, so the symbol_ref is propagated
+   into the second instruction to generate the instruction:
+
+     set (reg B) (plus (symbol_ref) (const_int))
+
+   This is an invalid address, and find_reloads_address correctly
+   determines this.  However, that function doesn't generate a valid
+   replacement for the now invalid address, and the invalid address is
+   output into the assembly language.  To fix the problem without
+   changing gcc itself, the following macro tests when such an invalid
+   address has been computed, and wraps it up inside a constant rtx.  A
+   constant rtx can be correctly reloaded by the function, and hence
+   correct code is generated. */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	     \
+do {                                                                         \
+  if (picochip_legitimize_reload_address(&X,MODE,OPNUM,TYPE,IND_LEVELS))     \
+    goto WIN;                                                                \
+  } while(0);                                                                \
+
+/* Nonzero if the constant rtx X is a legitimate general operand.  X
+   satisfies CONSTANT_P.  */
+
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+
+/* Condition Code Status  */
+
+#define CC_STATUS_MDEP unsigned
+#define CC_STATUS_MDEP_INIT (cc_status.mdep = 0)
+
+/* Describing Relative Costs of Operations  */
+
+/* Bytes are no faster than words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* The assembler is often able to optimise function call branches, so
+   don't try to CSE them in the compiler. This was the thinking before.
+   But now, we realise that the benefits from CSE would mostly outweigh
+   the disadvantages. */
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the Output into Sections  */
+
+#define TEXT_SECTION_ASM_OP ".section .text\n"
+#define DATA_SECTION_ASM_OP ".section .data\n"
+#define BSS_SECTION_ASM_OP ".section .bss\n"
+/* picoChip is Harvard (separate data/instruction memories), so
+   read-only data must go into the data section. */
+#define READONLY_DATA_SECTION_ASM_OP ".section .data\n"
+
+/* Defining the Output Assembler Language  */
+
+/* The Overall Framework of an Assembler File  */
+
+#define ASM_FILE_COMMENT "// "
+
+#define ASM_APP_ON "// High-level ASM start\n"
+#define ASM_APP_OFF "// High-level ASM end\n"
+
+#define ASM_OUTPUT_IDENT(STREAM,STRING) fprintf(STREAM, ".ident %s\n", STRING)
+
+/* Output of Data  */
+
+#define ASM_OUTPUT_ASCII(FILE, PTR, LEN) picochip_output_ascii(FILE, PTR, LEN);
+
+/* Output of Uninitialized Variables  */
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE,NAME,SIZE,ALIGN) \
+  picochip_output_aligned_common(FILE, NAME, SIZE, ALIGN)
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE,NAME,SIZE,ALIGN) \
+  picochip_output_aligned_local(FILE, NAME, SIZE, ALIGN)
+
+/* Output and Generation of Labels  */
+
+#define ASM_OUTPUT_LABEL(STREAM,NAME) \
+  do { picochip_output_label(STREAM, NAME); } while (0);
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  { picochip_output_labelref(STREAM, NAME); }
+
+/* Format must match that of picochip_output_label. */
+#define ASM_GENERATE_INTERNAL_LABEL(STRING,PREFIX,NUM) \
+ picochip_generate_internal_label(STRING,PREFIX,(long)NUM)
+
+#define ASM_WEAKEN_LABEL(STREAM,NAME) picochip_weaken_label(STREAM,NAME);
+
+/* Store in OUTPUT a string (made with alloca) containing an
+   assembler-name for a local static variable named NAME.  LABELNO is
+   an integer which is different for each call.  The assembler can't
+   use periods to generate the name, so we use a ___ separator
+   instead. */
+
+#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO)  \
+( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 15),    \
+  sprintf ((OUTPUT), "%s___%lu", (NAME), (unsigned long)(LABELNO)))
+
+/* Macros Controlling Initialization Routines  */
+
+/* By defining this, the main function won't try to call `__main'. */
+#define HAS_INIT_SECTION
+
+/* Output of Assembler Instructions  */
+
+#define REGISTER_NAMES							\
+{"R0",  "R1",  "R2",  "R3",						\
+ "R4",  "R5",  "R6",  "R7",						\
+ "R8",  "R9",  "R10", "R11",						\
+ "R12", "FP", "R14", "R15",						\
+ "acc0", "pseudoCC", "pseudoFP", "pseudoAP"}
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "R0",	 0},							\
+  { "R1",	 1},							\
+  { "R2",	 2},							\
+  { "R3",	 3},							\
+  { "R4",	 4},							\
+  { "R5",	 5},							\
+  { "R6",	 6},							\
+  { "R7",	 7},							\
+  { "R8",	 8},							\
+  { "R9",	 9},							\
+  { "R10",	10},							\
+  { "R11",	11},							\
+  { "R12",	12},							\
+  { "FP",	13},							\
+  { "R14",	14},							\
+  { "R15",	15},							\
+  { "acc0",	16},							\
+  { "sp",	12}, /* ABI stack pointer */				\
+  { "ln",	13}, /* arch link register */				\
+  { "ptr",	14}, /* arch constant pointer */			\
+  { "rc",	15}, /* arch constant register */			\
+  { "rz",	15}, /* arch zero */					\
+}
+
+/* Final prescan insn is called just before an instruction is
+   output.  In our case, we use this to detect the VLIW slot to which
+   the instruction has been assigned, preparatory to generating the
+   VLIW output in ASM_OUTPUT_OPCODE. */
+#define FINAL_PRESCAN_INSN(insn, operand, nop) \
+  picochip_final_prescan_insn (insn, operand,nop)
+
+#define ASM_OUTPUT_OPCODE(FILE,PTR) \
+  { PTR = picochip_asm_output_opcode(FILE, PTR); }
+
+#define PRINT_OPERAND(STREAM,X,CODE) \
+  picochip_print_operand(STREAM, X, CODE)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(code) \
+  (((code) == '|') || ((code) == '#') || ((code) == '>'))
+
+#define PRINT_OPERAND_ADDRESS(STREAM,X) \
+  picochip_print_operand_address(STREAM,X)
+
+/* Output of Dispatch Tables  */
+
+/* Initialise a data memory location to an absolute code label.  Used
+   for building switch statement jump tables.  Note - the format of the
+   label must match that of the function picochip_output_label. */
+#define ASM_OUTPUT_ADDR_VEC_ELT(stream, value) \
+  fprintf (stream, ".initWord _L%d\n", value);
+
+/* Assembler Commands for Alignment  */
+
+#define ASM_OUTPUT_SKIP(STREAM,BYTES) \
+  fprintf(STREAM, ".skip "HOST_WIDE_INT_PRINT_UNSIGNED"\n", BYTES);
+#define ASM_OUTPUT_ALIGN(STREAM,POWER) \
+  fprintf(STREAM, ".align %u\n", 1 << POWER);
+
+/* The elaborator doesn't output zero bytes in the text section. */
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* Controlling Debugging Information Format  */
+
+/* Macros Affecting All Debugging Formats  */
+
+#define DBX_REGISTER_NUMBER(REGNO) (REGNO)
+
+#define DWARF2_DEBUGGING_INFO
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#define DWARF2_FRAME_INFO 1
+
+/* Generate .file/.loc directives, so that the assembler generates the
+   line table. */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+/* Miscellaneous Parameters  */
+
+#define CASE_VECTOR_MODE HImode
+#define WORD_REGISTER_OPERATIONS
+#define LOAD_EXTEND_OP(MODE) ((MODE) == QImode ? SIGN_EXTEND : ZERO_EXTEND)
+#define MOVE_MAX 4
+#define SHIFT_COUNT_TRUNCATED 1
+#define Pmode HImode
+#define FUNCTION_MODE QImode
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) 1
+
+#define ASM_LONG ":TODO:.word\t"
+
+/* Define builtins for selected special-purpose instructions. */
+enum picochip_builtins
+{
+  PICOCHIP_BUILTIN_SBC,
+  PICOCHIP_BUILTIN_PUT,
+  PICOCHIP_BUILTIN_GET,
+  PICOCHIP_BUILTIN_TESTPORT,
+  PICOCHIP_BUILTIN_COPYSW,
+  PICOCHIP_BUILTIN_ADDS,
+  PICOCHIP_BUILTIN_SUBS,
+  PICOCHIP_BUILTIN_BREV,
+  PICOCHIP_BUILTIN_BYTESWAP,
+  PICOCHIP_BUILTIN_GET_ARRAY,
+  PICOCHIP_BUILTIN_PUT_ARRAY,
+  PICOCHIP_BUILTIN_TESTPORT_ARRAY,
+  PICOCHIP_BUILTIN_ASRI,
+  PICOCHIP_BUILTIN_HALT
+};
+
+#define NO_DOLLAR_IN_LABEL 1
+#define NO_DOT_IN_LABEL 1
+
+/* The assembler does support LEB128, despite the auto-configure test
+   not detecting this. */
+#define HAVE_AS_LEB128 1
+
+/* The End */
diff --git a/gcc/config/picochip/picochip.md b/gcc/config/picochip/picochip.md
new file mode 100644
index 000000000..b2f562ff7
--- /dev/null
+++ b/gcc/config/picochip/picochip.md
@@ -0,0 +1,2622 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+;; -------------------------------------------------------------------------
+
+;; In addition to the normal output operand formats, the following
+;; letter formats are also available:
+;;
+;;  The following can be used for constants, or the constant part of a
+;;  memory offset.
+;;   Q - Output constant unaltered (byte mode).
+;;   M - Alias for Q, which only works with memory operands.
+;;   H - Divide constant by 2 (i.e., HImode is 2 bytes)
+;;   S - Divide constant by 4 (i.e., SImode is 4 bytes)
+;;
+;;  The following can be used for two part addresses (i.e., base +
+;;  offset or base[offset]).
+;;   o - Output offset only.
+;;   b - Output base only.
+;;
+;;  The following are used on SI registers and constants
+;;   R - Output register pair (i.e., R[n:m])
+;;   L - Output lower word/register
+;;   U - Output upper word/register
+;;
+;;  The following are used on DI mode registers.
+;;   X - Output 3rd register
+;;   Y - Output 4th register
+;;
+;;  Miscellaneous
+;;   | - Output VLIW separator
+;;   r - Output register value of memory operand.
+;;   I - Output an opcode (e.g., ADD for plus, LSL for lshift)
+;;   i - Output an opcode in symbolic notation (e.g., + for plus)
+
+;; Define the length of an instruction.  Used to allow different types
+;; of branches to be used for different branch offsets.  Default to 6
+;; bytes, which is the longest possible single instruction.
+(define_attr "length" "" (const_int 6))
+
+;; Define some constants which are used in conjuction with branch
+;; scheduling.  Branches must be 10-bit signed, which equates to
+;; [-512,511]. However, to compensate for the lack of branch alignment
+;; these offsets are reduced by a factor of 2.
+
+(define_constants
+  [
+   (MIN_BRANCH_OFFSET -256)
+   (MAX_BRANCH_OFFSET 255)
+   (SHORT_BRANCH_LENGTH 6)    ; The size of a schedulable short branch.
+   (LONG_BRANCH_LENGTH 16)    ; The size of an expanded JMP?? macro.
+   ]
+)
+
+;; Define identifiers for various special instructions.  These
+;; instructions may then be used in RTL expansions, or builtins.
+(define_constants
+  [
+   ; Special instruction builtins.
+   (UNSPEC_SBC             0) ; Sign-bit count
+   (UNSPEC_ADDS            1) ; Saturating addition
+   (UNSPEC_SUBS            2) ; Saturating subtraction
+   (UNSPEC_BREV            3) ; Bit reversal
+
+   ; Special internal instructions (only used by compiler)
+   (UNSPEC_COPYSW          5) ; Get status word
+   (UNSPEC_ADDC            6) ; Add with carry.
+
+   ; Scalar port communication builtins
+   (UNSPEC_PUT             7) ; Communication (put):       port[op0] := op1
+   (UNSPEC_GET             8) ; Communication (get):       op0 := get_port[op1]
+   (UNSPEC_TESTPORT        9) ; Communication (test):      op0 := testport[op1]
+
+   ; Array port communication builtins.  These all take extra
+   ; arguments giving information about the array access being used.
+   (UNSPEC_PUT_ARRAY      10) ; Array put
+   (UNSPEC_GET_ARRAY      11) ; Array get
+   (UNSPEC_TESTPORT_ARRAY 12) ; Array test port
+
+   ;; Array port expansions
+   (UNSPEC_CALL_GET_ARRAY 13) ;
+   (UNSPEC_CALL_PUT_ARRAY 14) ;
+   (UNSPEC_CALL_TESTPORT_ARRAY 15) ;
+
+   ; Array port low-level fn calls
+   (UNSPEC_CALL_GET_FN  16)
+   (UNSPEC_CALL_TESTPORT_FN  17)
+
+   ; Halt instruction.
+   (UNSPEC_HALT 18)
+
+   ; Internal TSTPORT instruction, used to generate a single TSTPORT
+   ; instruction for use in the testport branch split.
+   (UNSPEC_INTERNAL_TESTPORT        19)
+  ]
+)
+
+;; Register ID's
+(define_constants
+  [
+   (LINK_REGNUM           12) ; Function link register.
+   (CC_REGNUM             17) ; Condition flags.
+   (ACC_REGNUM             16) ; Condition flags.
+   ]
+)
+
+;;============================================================================
+;; Predicates and constraints
+;;============================================================================
+
+(include "predicates.md")
+(include "constraints.md")
+
+;;============================================================================
+;; First operand shifting patterns.  These allow certain instructions
+;; (e.g., add, and, or, xor, sub) to apply a shift-by-constant to
+;; their first operand.
+;;
+;; Note that only the first operand is matched by the shift, to ensure
+;; that non-commutative instructions (like subtract) work
+;; properly.  When a commutative instruction, with a shift in the
+;; second operand is found, the compiler will reorder the operands to
+;; match.
+;;============================================================================
+
+(define_insn "*firstOpGenericAshift"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "picochip_first_op_shift_operator"
+			[(ashift:HI
+			  (match_operand:HI 2 "register_operand" "r")
+			  (match_operand:HI 3 "picochip_J_operand" "J"))
+			 (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [LSL %2,%3],%4,%0\t// %0 := (%2 << %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+		      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+;; During combine, ashift gets converted into a multiply, necessitating the following pattern.
+;; Note that we do a log_2(imm) to get the actual LSL operand.
+
+(define_insn "*firstOpGenericAshift"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (match_operator:HI 1 "picochip_first_op_shift_operator"
+                        [(mult:HI
+                          (match_operand:HI 2 "register_operand" "r")
+                          (match_operand:HI 3 "power_of_2_imm_operand" "n"))
+                         (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [LSL %2,%P3],%4,%0\t// %0 := (%2 << %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+                      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+(define_insn "*firstOpGenericAshiftrt"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "picochip_first_op_shift_operator"
+			[(ashiftrt:HI
+			  (match_operand:HI 2 "register_operand" "r")
+			  (match_operand:HI 3 "picochip_J_operand" "J"))
+			 (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [ASR %2,%3],%4,%0\t// %0 := (%2 >>{arith} %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+		      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+(define_insn "*firstOpGenericLshiftrt"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(match_operator:HI 1 "picochip_first_op_shift_operator"
+			[(lshiftrt:HI
+			  (match_operand:HI 2 "register_operand" "r")
+			  (match_operand:HI 3 "picochip_J_operand" "J"))
+			 (match_operand:HI 4 "picochip_register_or_immediate_operand" "ri")]))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "%I1.0 [LSR %2,%3],%4,%0\t// %0 := (%2 >> %3) %i1 %4"
+  [(set_attr "type" "picoAlu")
+   ;; A long constant must be used if the operator instruction doesn't
+   ;; accept immediates, or if the constant is too big to fit the
+   ;; immediate. Note that the following condition is written in the
+   ;; way which uses the least number of predicates.
+   (set (attr "longConstant")
+     (cond [(ior (match_operand 4 "register_operand")
+                 (and (match_operand 1 "picochip_first_op_shift_operator_imm")
+		      (match_operand 1 "picochip_J_operand")))
+              (const_string "false")]
+              (const_string "true")))])
+
+;;===========================================================================
+;; Jump instructions.
+;;===========================================================================
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "JR (%0)\t// Indirect_jump to %0 %>"
+  [(set_attr "type" "realBranch")
+   (set_attr "length" "3")])
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+  "* return picochip_output_jump(insn);"
+  [(set (attr "length")
+	(if_then_else
+	 (and (ge (minus (match_dup 0) (pc)) (const_int MIN_BRANCH_OFFSET))
+	      (le (minus (match_dup 0) (pc)) (const_int MAX_BRANCH_OFFSET)))
+	 (const_int SHORT_BRANCH_LENGTH)
+	 (const_int LONG_BRANCH_LENGTH)))
+   (set (attr "type")
+	(if_then_else
+	 (eq_attr "length" "6")
+	 (const_string "realBranch")
+	 (const_string "unknown")))])
+
+(define_insn "*fn_return"
+  [(return)
+   (use (reg:HI LINK_REGNUM))]
+  ""
+  "JR (R12)\t// Return to caller %>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "false")])
+
+;; Peephole either 2 LDWs or STWs into LDL/STL.
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+        (match_operand:HI 1 "memory_operand" ""))
+   (set (match_operand:HI 2 "register_operand" "")
+        (match_operand:HI 3 "memory_operand" ""))]
+  "ok_to_peephole_ldw(operands[0],operands[1],operands[2],operands[3])"
+  [(set (match_dup 4) (match_dup 5))]
+  "{
+     operands[4] = gen_min_reg(operands[0],operands[2]);
+     operands[5] = gen_SImode_mem(operands[1],operands[3]);
+   }")
+
+(define_peephole2
+  [(set (match_operand:HI 0 "memory_operand" "")
+        (match_operand:HI 1 "register_operand" ""))
+   (set (match_operand:HI 2 "memory_operand" "")
+        (match_operand:HI 3 "register_operand" ""))]
+  "ok_to_peephole_stw(operands[0],operands[1],operands[2],operands[3])"
+  [(set (match_dup 4) (match_dup 5))]
+  "{
+     operands[4] = gen_SImode_mem(operands[0],operands[2]);
+     operands[5] = gen_min_reg(operands[1],operands[3]);
+   }")
+
+
+;; We have instructions like add,subtract,ior,and that set condition
+;; codes if they are executed on slot 0. If we have
+;;    add a = b + c
+;;    if (a!=0)
+;;    {}
+;; We would have RTL sequence like
+;;    add.# rb,rc,ra   # will be replaced by slot no, after scheduling
+;;    sub.0 ra,0,r15
+;;    bnz
+;; Instead, we can just do
+;;    add.0 rb,rc,ra
+;;    bnz
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (plus:HI (match_operand:HI 1 "register_operand" "")
+                            (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                            [(match_dup 0) (const_int 0)])
+                   (label_ref       (match_operand    6 "" ""))
+                   (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (plus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                   (label_ref (match_dup 6))
+                   (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (plus:HI (match_operand:HI 1 "register_operand" "")
+                     (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+         (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                   [(match_dup 0) (const_int 0)]))
+   (parallel [(set (pc)
+                    (if_then_else
+                          (match_operator 4 "comparison_operator"
+                              [(reg:CC CC_REGNUM) (const_int 0)])
+                     (label_ref (match_operand 5 "" ""))
+                     (pc)))
+               (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (plus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                    (label_ref (match_dup 5))
+                    (pc)))
+              (use (match_dup 6))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+
+;; If peephole happens before the cbranch split
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                    (minus:HI (match_operand:HI 1 "general_operand" "")
+                              (match_operand:HI 2 "register_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                            [(match_dup 0) (const_int 0)])
+                     (label_ref       (match_operand    6 "" ""))
+                     (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (minus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                       (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                        (label_ref (match_dup 6))
+                        (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+
+;; If peephole happens after the cbranch split
+
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+                   (minus:HI (match_operand:HI 1 "general_operand" "")
+                             (match_operand:HI 2 "register_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+         (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                 [(match_dup 0) (const_int 0)]))
+    (parallel [(set (pc)
+                     (if_then_else
+                         (match_operator 4 "comparison_operator"
+                             [(reg:CC CC_REGNUM) (const_int 0)])
+                      (label_ref (match_operand 5 "" ""))
+                      (pc)))
+                (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (minus:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 5))
+                                 (pc)))
+              (use (match_dup 6))])]
+  "{
+      operands[7] = GEN_INT(0);
+   }")
+
+;; If peephole happens before the cbranch split
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (and:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                            [(match_dup 0) (const_int 0)])
+                   (label_ref       (match_operand    6 "" ""))
+                   (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (and:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                       (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 6))
+                                 (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (and:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                    [(match_dup 0) (const_int 0)]))
+  (parallel [(set (pc)
+                  (if_then_else
+                      (match_operator 4 "comparison_operator"
+                         [(reg:CC CC_REGNUM) (const_int 0)])
+                   (label_ref (match_operand 5 "" ""))
+                   (pc)))
+              (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (and:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 5))
+                                 (pc)))
+              (use (match_dup 6))])]
+  "{
+      operands[7] = GEN_INT(0);
+   }")
+
+;; If peephole happens before the cbranch split
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (ior:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (pc)
+                   (if_then_else
+                    (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                          [(match_dup 0) (const_int 0)])
+                   (label_ref       (match_operand    6 "" ""))
+                   (pc)))
+              (clobber (reg:CC CC_REGNUM))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ior:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else
+                       (match_op_dup:HI 3 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 6))
+                                 (pc)))
+              (use (match_dup 7))])]
+  "{
+     operands[7] = GEN_INT(0);
+   }")
+
+(define_peephole2
+   [(parallel[(set (match_operand:HI 0 "register_operand" "")
+                   (ior:HI (match_operand:HI 1 "register_operand" "")
+                           (match_operand:HI 2 "general_operand" "")))
+              (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+              [(match_dup 0) (const_int 0)]))
+  (parallel [(set (pc)
+                  (if_then_else
+                     (match_operator 4 "comparison_operator"
+                        [(reg:CC CC_REGNUM) (const_int 0)])
+                   (label_ref (match_operand 5 "" ""))
+                   (pc)))
+             (use (match_operand:HI 6 "const_int_operand" ""))])]
+  ""
+  [(parallel [(set (match_dup 0)
+                   (ior:HI (match_dup 1) (match_dup 2)))
+              (set (reg:CC CC_REGNUM)
+                   (match_op_dup 3 [(const_int 0) (const_int 0)]))])
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 5))
+                                 (pc)))
+              (use (match_dup 6))])]
+  "{
+      operands[7] = GEN_INT(0);
+   }")
+
+;; Conditional branch (HI). This is split into separate compare and
+;; branch instructions if scheduling is enabled.  The branch
+;; instruction is supplied with the type of comparison on which the
+;; branch should occur.
+
+(define_insn_and_split "cbranchhi4"
+  [(set (pc)
+        (if_then_else
+            (match_operator:CC 0 "ordered_comparison_operator"
+                            [(match_operand:HI 1 "register_operand" "r")
+                             (match_operand:HI 2 "picochip_comparison_operand" "ri")])
+            (label_ref       (match_operand    3 "" ""))
+            (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "* return picochip_output_cbranch(operands);"
+  "reload_completed
+   && (picochip_schedule_type != DFA_TYPE_NONE || flag_delayed_branch)"
+  [(set (reg:CC CC_REGNUM) (match_dup 0))
+   (parallel [(set (pc)
+                   (if_then_else (match_op_dup:HI 0 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (label_ref (match_dup 3))
+                                 (pc)))
+              (use (match_dup 4))])]
+  "{
+     operands[4] = GEN_INT(GET_CODE(operands[0]));
+   }")
+
+;; The only difference between this and the next pattern is that the next pattern
+;; might introduce subtracts whose first operand is a constant. This would have to
+;; be a longConstant. But, we know that such a situation wouldnt arise for supported
+;; comparison operator and hence this pattern assumes that the second constraint combo
+;; would still generate a normal instruction.
+
+(define_insn "*supported_compare"
+  [(set (reg:CC CC_REGNUM)
+        (match_operator:CC 0 "picochip_supported_comparison_operator"
+                        [(match_operand:HI 1 "register_operand" "r,r,r")
+                         (match_operand:HI 2 "picochip_comparison_operand" "r,J,i")]))]
+  ""
+  "* return picochip_output_compare(operands);"
+  [; Must be picoAlu because it sets the condition flags.
+   (set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true")
+   (set_attr "length" "2,2,4")
+   ])
+
+;; This pattern was added to match the previous pattern. When doing if-convert
+;; the pattern generated using movhicc does not have a eq:CC but only a eq for
+;; operator. If this pattern were not to be there, Gcc decides not to use
+;; movhicc at all. Whereas, in Gcc 4.4, it seems to be cleverer.
+(define_insn "*supported_compare1"
+  [(set (reg:CC CC_REGNUM)
+        (match_operator 0 "picochip_supported_comparison_operator"
+                        [(match_operand:HI 1 "register_operand" "r,r,r")
+                         (match_operand:HI 2 "picochip_comparison_operand" "r,J,i")]))]
+  ""
+  "* return picochip_output_compare(operands);"
+  [; Must be picoAlu because it sets the condition flags.
+   (set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true")
+   (set_attr "length" "2,2,4")
+   ])
+
+(define_insn "*compare"
+  [(set (reg:CC CC_REGNUM)
+        (match_operator:CC 0 "comparison_operator"
+                        [(match_operand:HI 1 "register_operand" "r,r,r")
+                         (match_operand:HI 2 "picochip_comparison_operand" "r,M,i")]))]
+  ""
+  "* return picochip_output_compare(operands);"
+  [; Must be picoAlu because it sets the condition flags.
+   (set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true,true")
+   (set_attr "length" "2,4,4")
+   ])
+
+; Match a branch instruction, created from a tstport/cbranch split.
+; We use a "use" clause so GCC doesnt try to use this pattern generally.
+(define_insn "branch"
+  [(set (pc)
+        (if_then_else
+            (match_operator 2 "comparison_operator"
+                 [(reg:CC CC_REGNUM) (const_int 0)])
+                      (label_ref (match_operand 0 "" ""))
+                      (pc)))
+   (use (match_operand:HI 1 "const_int_operand" ""))]
+  ""
+  "* return picochip_output_branch(operands, insn);"
+  [(set (attr "length")
+        (if_then_else
+         (and (ge (minus (match_dup 0) (pc)) (const_int MIN_BRANCH_OFFSET))
+              (le (minus (match_dup 0) (pc)) (const_int MAX_BRANCH_OFFSET)))
+         (const_int SHORT_BRANCH_LENGTH)
+         (const_int LONG_BRANCH_LENGTH)))
+    (set (attr "type")
+        (if_then_else
+         (eq_attr "length" "6")
+         (const_string "realBranch")
+         (const_string "unknown")))])
+
+;; If a movqi is used which accesses memory on a machine which doesn't
+;; have byte addressing, synthesise the instruction using word load/store
+;; operations. The movqi's that are required during reload phase are
+;; handled using reload_inqi/reload_outqi.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+
+     if (!reload_completed &&
+         !TARGET_HAS_BYTE_ACCESS &&
+         (MEM == GET_CODE(operands[0]) || MEM == GET_CODE(operands[1])))
+     {
+       rtx address;
+       rtx wordAddress;
+       rtx const1;
+       rtx shiftVal;
+       rtx loadedValue;
+       rtx addressMask;
+       rtx topByteValue;
+       rtx signExtendedValue;
+
+
+       warn_of_byte_access();
+
+       /* Load the constant 1 into a register. */
+       const1 = gen_reg_rtx(HImode);
+       emit_insn(gen_rtx_SET(HImode, const1, GEN_INT(1)));
+
+       /* Load the address mask with the bitwise complement of 1. */
+       addressMask = gen_reg_rtx(HImode);
+       emit_insn(gen_rtx_SET(HImode, addressMask, GEN_INT(-2)));
+
+       /* Handle loads first, in case we are dealing with a mem := mem
+        * instruction. */
+       if (MEM == GET_CODE(operands[1]))
+       {
+	 /* Loads work as follows. The entire word containing the desired byte
+          * is loaded. The bottom bit of the address indicates which
+          * byte is required. The desired byte is moved into the most
+          * significant byte, and then an arithmetic shift right
+          * invoked to achieve sign extension. The desired byte is
+          * moved to the MSB by XOR'ing the bottom address bit by 1,
+          * multiplying the result by 8, and then shifting left by
+          * that amount. Note that shifts only operate on the bottom
+          * 4-bits of the source offset, so although the XOR may
+          * produce a value which has its upper bits set, only bit 4
+          * (i.e., the inverted, shifted bottom address bit) actually
+          * gets used.
+          */
+
+         /* Ensure the address is in a register. */
+         address = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, address, XEXP(operands[1], 0)));
+
+         /* Compute the word address by masking out the bottom bit. */
+         wordAddress = gen_reg_rtx(HImode);
+         emit_insn(gen_andhi3(wordAddress, address, addressMask));
+
+         /* Compute the shift value. This is the bottom address bit,
+          * inverted, and multiplied by 8. */
+         shiftVal = gen_reg_rtx(HImode);
+         emit_insn(gen_xorhi3(shiftVal, address, const1));
+         emit_insn(gen_ashlhi3(shiftVal, shiftVal, GEN_INT(3)));
+
+         /* Emit the memory load. */
+         loadedValue = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, loadedValue, gen_rtx_MEM(HImode, wordAddress)));
+
+	 /* Shift the desired byte to the most significant byte. */
+	 topByteValue = gen_reg_rtx (HImode);
+	 emit_insn (gen_ashlhi3 (topByteValue, loadedValue, shiftVal));
+
+         /* Sign extend the top-byte back into the bottom byte. */
+	 signExtendedValue = gen_reg_rtx(HImode);
+         emit_insn(gen_ashrhi3(signExtendedValue, topByteValue, GEN_INT(8)));
+
+         /* Final extraction of QI mode register. */
+        operands[1] = gen_rtx_SUBREG(QImode, signExtendedValue, 0);
+
+       }
+
+       if (MEM == GET_CODE(operands[0]) && GET_CODE(operands[1]) != MEM)
+       {
+         rtx zeroingByteMask;
+         rtx temp;
+         rtx tempHiMode;
+         rtx lsbByteMask;
+
+         /* Get the address. */
+         address = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, address, XEXP(operands[0], 0)));
+
+         /* Compute the word aligned address. */
+         wordAddress = gen_reg_rtx(HImode);
+         emit_insn(gen_andhi3(wordAddress, address, addressMask));
+
+         /* Compute the shift value. */
+         shiftVal = gen_reg_rtx(HImode);
+         emit_insn(gen_andhi3(shiftVal, address, const1));
+         emit_insn(gen_ashlhi3(shiftVal, shiftVal, GEN_INT(3)));
+
+         /* Emit the memory load. */
+         loadedValue = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, loadedValue, gen_rtx_MEM(HImode, wordAddress)));
+
+         /* Zero out the destination bits by AND'ing with 0xFF00
+          * shifted appropriately. */
+         zeroingByteMask = gen_reg_rtx(HImode);
+         emit_insn(gen_rtx_SET(HImode, zeroingByteMask, GEN_INT(-256)));
+         emit_insn(gen_lshrhi3(zeroingByteMask, zeroingByteMask, shiftVal));
+         emit_insn(gen_andhi3(loadedValue, loadedValue, zeroingByteMask));
+
+	 /* Grab the incoming QI register, and ensure that the top bits
+	  * are zeroed out. This is because the register may be
+	  * storing a signed value, in which case the top-bits will be
+	  * sign bits. These must be removed to ensure that the
+	  * read-modify-write (which uses an OR) doesn't pick up those
+	  * bits, instead of the original memory value which is being
+	  * modified.
+  	  */
+         tempHiMode = simplify_gen_subreg(HImode, operands[1], QImode, 0);
+         temp = gen_reg_rtx(HImode);
+	 emit_insn(gen_rtx_SET(HImode, temp, tempHiMode));
+         lsbByteMask = gen_reg_rtx (HImode);
+	 emit_insn (gen_rtx_SET (HImode, lsbByteMask, GEN_INT (0xFF)));
+	 emit_insn (gen_andhi3 (temp, temp, lsbByteMask));
+
+         /* Shift the incoming byte value by the appropriate amount,
+          * and OR into the load value. */
+         emit_insn(gen_ashlhi3(temp, temp, shiftVal));
+         emit_insn(gen_iorhi3(loadedValue, loadedValue, temp));
+
+         /* Rewrite the original assignment, to assign the new value
+          * to the word address. */
+         operands[0] = gen_rtx_MEM(HImode, wordAddress);
+         operands[1] = loadedValue;
+
+       }
+
+     }
+})
+
+(define_insn "*movqi_sign_extend"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "memory_operand" "a,m")))]
+  "TARGET_HAS_BYTE_ACCESS"
+  "@
+     LDB (%a1),%0\t\t// %0 = Mem(%a1)
+     LDB %a1,%0\t\t// %0 = Mem(%M1{byte})"
+  [(set_attr "type" "mem,mem")
+   (set_attr "longConstant" "true,false")
+   (set_attr "length" "4,4")])
+
+;; movqi instructions for machines with and without byte access.
+(define_insn "*movqi_byte"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,r,r,a,m")
+	(match_operand:QI 1 "general_operand" "r,a,m,I,i,r,r"))]
+  "TARGET_HAS_BYTE_ACCESS"
+  "@
+     COPY.%# %1, %0\t// %0 := %1
+     LDB (%a1),%0\t\t// %0 = Mem(%a1)
+     LDB %a1,%0\t\t// %0 = Mem(%M1{byte})
+     COPY.%# %1,%0\t\t// %0 := #%1 (QI) (short constant)
+     COPY.%# %1,%0\t\t// %0 := #%1 (QI) (long constant)
+     STB %1,(%a0)\t\t// Mem(%a0) := %1
+     STB %1,%a0\t\t// Mem(%M0{byte}) := %1"
+  [(set_attr "type" "basicAlu,mem,mem,basicAlu,basicAlu,mem,mem")
+   (set_attr "longConstant" "false,true,false,false,true,true,false")
+   (set_attr "length" "2,4,4,2,4,4,4")])
+
+;; Machines which don't have byte access can copy registers, and load
+;; constants, but can't access memory.  The define_expand for movqi
+;; should already have rewritten memory accesses using word
+;; operations.  The exception is qi reloads, which are handled using
+;; the reload_? patterns.
+(define_insn "*movqi_nobyte"
+  [(set (match_operand:QI 0 "register_operand" "=r,r")
+	(match_operand:QI 1 "picochip_register_or_immediate_operand" "r,i"))]
+  "!TARGET_HAS_BYTE_ACCESS"
+  "@
+     COPY.%# %1,%0\t// %0 := %1
+     COPY.%# %1,%0\t\t// %0 := #%1 (QI)")
+
+(define_insn "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,a,m,r,r")
+	(match_operand:HI 1 "general_operand" "r,a,m,r,r,I,i"))]
+  ""
+  "@
+    COPY.%# %1,%0\t\t// %0 := %1
+    LDW (%a1),%0\t\t// %0 := Mem(%a1)
+    LDW %a1,%0\t\t// %0 = Mem(%M1{byte})
+    STW %1,(%a0)\t\t// Mem(%a0) := %1
+    STW %1,%a0\t\t// Mem(%M0{byte}) := %1
+    COPY.%# %1,%0\t// %0 := %1 (short constant)
+    COPY.%# %1,%0\t// %0 := %1 (long constant)"
+   [(set_attr "type" "basicAlu,mem,mem,mem,mem,basicAlu,basicAlu")
+    (set_attr "longConstant" "false,true,false,true,false,false,true")
+    (set_attr "length" "2,4,4,4,4,2,4")])
+
+(define_insn "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,a,m")
+	(match_operand:SI 1 "general_operand" "r,a,m,i,r,r"))]
+  ""
+  "@
+    // %R0 := %R1 (SI)\n\tCOPY.%# %L1,%L0 %| COPY.1 %U1,%U0
+    LDL (%a1),%R0\t\t// %R0 = Mem(%a1)
+    LDL %a1,%R0\t\t// %R0 = Mem(%M1{byte})
+    // %R0 := #%1 (SI)\n\tCOPY.%# %L1,%L0 %| COPY.%# %U1,%U0
+    STL %R1,(%a0)\t\t// Mem(%a0) := %R1
+    STL %R1,%a0\t\t// Mem(%M0{byte}) := %R1"
+  [(set_attr "type" "unknown,mem,mem,unknown,mem,mem")
+   (set_attr "longConstant" "false,true,false,true,false,false")
+   (set_attr "length" "4,4,4,6,4,4")])
+
+; Split an SI mode register copy into separate HI mode copies, which
+; can be VLIW'd with other instructions.  Only split the instruction
+; when VLIW scheduling is enabled.  Splitting the instruction saves
+; some code space.
+;
+; This is predicated in reload_completed.  This ensures that the
+; instructions aren't broken up too early which can result in the
+; SImode code being converted into inefficient HI mode code.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))]
+  "reload_completed && picochip_schedule_type == DFA_TYPE_SPEED"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "{
+     operands[2] = gen_lowpart (HImode, operands[0]);
+     operands[3] = gen_lowpart (HImode, operands[1]);
+     operands[4] = gen_highpart (HImode, operands[0]);
+     operands[5] = gen_highpart (HImode, operands[1]);
+ }")
+
+; SI Mode split for load constant.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "{
+     operands[2] = gen_lowpart (HImode, operands[0]);
+     operands[3] = picochip_get_low_const(operands[1]);
+     operands[4] = gen_highpart (HImode, operands[0]);
+     operands[5] = picochip_get_high_const(operands[1]);
+ }")
+
+(define_insn "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:SF 1 "general_operand" "r,m,i,r"))]
+  ""
+  "@
+    // %R0 := %R1 (SF)\n\tCOPY.%# %L1,%L0 %| COPY.1 %U1,%U0
+    LDL %a1,%R0\t\t// %R0 :={SF} Mem(%M1{byte})
+    // %R0 := #%1 (SF)\n\tCOPY.%# %L1,%L0\n\tCOPY.%# %U1,%U0
+    STL %R1,%a0\t\t// Mem(%M0{byte}) :={SF} %R1")
+
+;; memcpy pattern
+;; 0 = destination (mem:BLK ...)
+;; 1 = source (mem:BLK ...)
+;; 2 = count
+;; 3 = alignment
+(define_expand "movmemhi"
+  [(match_operand 0 "memory_operand" "")
+  (match_operand 1 "memory_operand" "")
+  (match_operand:HI 2 "immediate_operand" "")
+  (match_operand 3 "" "")]
+  "picochip_schedule_type != DFA_TYPE_NONE"
+  "if (picochip_expand_movmemhi(operands)) DONE; FAIL;"
+)
+
+;;===========================================================================
+;; NOP
+;;===========================================================================
+
+;; No-operation (NOP)
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "NOP\t// nop"
+  [(set_attr "length" "1")])
+
+;;===========================================================================
+;; Function Calls.  Define expands are used to ensure that the correct
+;; type of pattern is emitted, and then the define_insn's match the
+;; pattern using the correct types.
+;;
+;; Note: The comments output as part of these instructions are detected by
+;; the linker. Don't change the comments!
+;;===========================================================================
+
+(define_expand "call"
+  [(parallel [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand 1 "const_int_operand" ""))
+         (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+(define_insn "call_for_divmod"
+  [(call (match_operand:QI 0 "memory_operand" "")
+	 (match_operand 1 "const_int_operand" ""))]
+  ""
+  "JL (%M0)\t// fn_call %M0%>"
+  [(set_attr "length" "4")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "true")])
+
+(define_insn "*call_using_symbol"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%M0)\t// fn_call %M0%>"
+  [(set_attr "length" "4")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "true")])
+
+(define_insn "*call_using_register"
+  [(call (mem:QI (match_operand:HI 0 "register_operand" "r"))
+	 (match_operand 1 "const_int_operand" ""))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%r0)\t// fn_call_unknown %r0%>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "false")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand:HI       0 "" "")
+	(call:HI (match_operand:QI 1 "memory_operand" "g")
+	      (match_operand 2 "const_int_operand" "")))
+         (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*call_value_using_symbol"
+  [(set (match_operand:HI 0 "" "")
+	(call:HI (mem:QI (match_operand:HI 1 "immediate_operand" "i"))
+	      (match_operand 2 "const_int_operand" "")))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%M1)\t// fn_call %M1 (value return)%>"
+  [(set_attr "length" "4")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "true")])
+
+(define_insn "*call_value_using_register"
+  [(set (match_operand:HI 0 "" "")
+	(call:HI (mem:QI (match_operand:HI 1 "register_operand" "r"))
+	      (match_operand 2 "const_int_operand" "")))
+         (clobber (reg:HI LINK_REGNUM))]
+  ""
+  "JL (%r1)// fn_call_unknown %r1 (value return)%>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")
+   (set_attr "longConstant" "false")])
+
+;;===========================================================================
+;; Addition
+;;===========================================================================
+
+;; Note that the addition of a negative value is transformed into the
+;; subtraction of a positive value, so that the add/sub immediate slot
+;; can make better use of the 4-bit range.
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r,r,r")
+		 (match_operand:HI 2 "general_operand" "r,M,n,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  {  if (CONST_INT == GET_CODE(operands[2]) &&
+         INTVAL(operands[2]) > -16 &&
+         INTVAL(operands[2]) < 0)
+       return "SUB.%# %1,-(%2),%0\t// %0 := %1 + %2 (HI)";
+     else
+       return "ADD.%# %1,%2,%0\t// %0 := %1 + %2 (HI)";
+  }
+  [(set_attr "type" "basicAlu,basicAlu,basicAlu,basicAlu")
+   (set_attr "longConstant" "false,false,true,true")
+   (set_attr "length" "2,2,4,4")]
+  )
+
+
+;; If we peepholed the compare instruction out, we need to make sure the add
+;; goes in slot 0. This pattern is just to accomplish that.
+
+(define_insn "addhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
+        (plus:HI (match_operand:HI 1 "register_operand" "r,r,r,r")
+                 (match_operand:HI 2 "general_operand" "r,M,n,i")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  {  if (CONST_INT == GET_CODE(operands[2]) &&
+         INTVAL(operands[2]) > -16 &&
+         INTVAL(operands[2]) < 0)
+       return "SUB.0 %1,-(%2),%0\t// %0 := %1 + %2 (HI)";
+     else
+       return "ADD.0 %1,%2,%0\t// %0 := %1 + %2 (HI)";
+  }
+  [(set_attr "type" "picoAlu,picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true,true")
+   (set_attr "length" "2,2,4,4")]
+  )
+
+;; Match an addition in which the first operand has been shifted
+;; (e.g., the comms array functions can emit such instructions).
+(define_insn "*addWith1stOpShift"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (ashift:HI (match_operand:HI 1 "register_operand" "r,r")
+			    (match_operand:HI 2 "const_int_operand" ""))
+		 (match_operand:HI 3 "immediate_operand" "I,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ADD.0 [LSL %1,%2],%3,%0\t// %0 := (%1 << %2) + %3"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")])
+
+(define_insn_and_split "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r,r")
+		 (match_operand:SI 2 "general_operand" "r,i")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// %0 := %1 + %2 (SI)\n\tADD.0 %L1,%L2,%L0\n\tADDC.0 %U1,%U2,%U0"
+  "reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op1_high = gen_highpart (HImode, operands[1]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  rtx op1_low  = gen_lowpart (HImode, operands[1]);
+  rtx op2_high, op2_low;
+
+  if (CONST_INT == GET_CODE(operands[2]))
+  {
+    op2_high = picochip_get_high_const(operands[2]);
+    op2_low = picochip_get_low_const(operands[2]);
+  } else {
+    op2_high = gen_highpart (HImode, operands[2]);
+    op2_low  = gen_lowpart (HImode, operands[2]);
+  }
+
+  operands[4] = gen_add_multi_lower (op0_low, op1_low, op2_low);
+  operands[5] = gen_add_multi_upper (op0_high, op1_high, op2_high);
+
+}")
+
+;; Perform the lowest part of a multi-part addition (SI/DI). This sets
+;; the flags, so is an picoAlu instruction (we could use a
+;; conventional addhi, but the addhi is better off being a treated as
+;; a basicAlu instruction, rather than a picoAlu instruction).
+(define_insn "add_multi_lower"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r,r")
+		 (match_operand:HI 2 "general_operand" "r,M,i")))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (plus:HI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  {  if (CONST_INT == GET_CODE(operands[2]) &&
+         INTVAL(operands[2]) > -16 &&
+         INTVAL(operands[2]) < 0)
+       return "SUB.%# %1,-(%2),%0\t// %0+carry := %1 + %2 (low multi-part)";
+     else
+       return "ADD.%# %1,%2,%0\t// %0+carry := %1 + %2 (low multi-part)";
+  }
+  [(set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,false,true")
+   (set_attr "length" "2,2,4")])
+
+;; Perform the central part of a multi-part addition (DI). This uses
+;; the CC register, and also sets the CC register, so needs to be
+;; placed in the first ALU slot.  Note that the ADDC must
+;; use the long constant to represent immediates.
+(define_insn "add_multi_mid"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (plus:HI (match_operand:HI 2 "general_operand" "r,i")
+			  (reg:CC CC_REGNUM))))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (plus:HI (match_dup 1)
+			     (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "ADDC.%# %1,%2,%0\t// %0+carry := carry + %1 + %2 (mid multi-part)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;; Perform the highest part of a multi-part addition (SI/DI). This
+;; uses the CC register, but doesn't require any registers to be set,
+;; so may be scheduled in either of the ALU's.  Note that the ADDC must
+;; use the long constant to represent immediates.
+(define_insn "add_multi_upper"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (plus:HI (match_operand:HI 2 "general_operand" "r,i")
+			  (reg:CC CC_REGNUM))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ADDC.%# %1,%2,%0\t// %0 := carry + %1 + %2 (high multi-part)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;; The lea instruction is a special type of add operation, which looks
+;; like a movhi (reg := address). It expands into reg := fp +
+;; offset.  Ideally there should be two variants, which take different
+;; sized offsets (i.e., using the long constant, or not, as
+;; appropriate).  However, the address operand may have arbitrary
+;; values added to it later (i.e., the AP will be eliminated, possibly
+;; converting a small offset into a long offset), so a long offset is
+;; always assumed.
+
+;; Note that the lea can use an addition, and hence may modify the CC
+;; register.  This upsets scheduling, so instead the lea is placed in
+;; ALU 1 where it cannot modify CC.
+
+(define_insn "*lea_add"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r")
+       (plus:HI (match_operand:HI 1 "register_operand" "r")
+		(match_operand:HI 2 "immediate_operand" "i")))]
+ ""
+ "ADD.1 %1,%2,%0\t// lea (add)")
+
+;; Note that, though this instruction looks similar to movhi pattern,
+;; "p" constraint cannot be specified for operands other than 
+;; address_operand, hence the extra pattern below.
+(define_insn "*lea_move"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r")
+        (match_operand:HI 1 "address_operand" "p"))]
+  ""
+  {
+    if (REG == GET_CODE(operands[1]))
+      return "COPY.1 %1,%0\t// %0 := %1 (lea)";
+    else
+      return "ADD.1 %b1,%o1,%0\t\t// %0 := %b1 + %o1 (lea)";
+  }
+  [(set_attr "type" "nonCcAlu")
+   (set_attr "longConstant" "true")
+   (set_attr "length" "4")])
+
+
+;;===========================================================================
+;; Subtraction.  Note that these patterns never take immediate second
+;; operands, since those cases are handled by canonicalising the
+;; instruction into the addition of a negative costant.
+;; But, if the first operand needs to be a negative constant, it
+;; is supported here.
+;;===========================================================================
+
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,I,i")
+		  (match_operand:HI 2 "register_operand" "r,r,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "SUB.%# %1,%2,%0 // %0 := %1 - %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true,true")
+   (set_attr "length" "2,4,4")])
+
+;; If we peepholed the compare instruction out, we need to make sure the
+;; sub goes in slot 0. This pattern is just to accomplish that.
+
+(define_insn "subhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,I,i")
+		  (match_operand:HI 2 "register_operand" "r,r,r")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  "SUB.0 %1,%2,%0 // %0 := %1 - %2 (HI)"
+  [(set_attr "type" "picoAlu,picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true,true")
+   (set_attr "length" "2,4,4")])
+
+(define_insn_and_split "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "general_operand" "r,i")
+		  (match_operand:SI 2 "register_operand" "r,r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// %0 := %1 - %2 (SI)\n\tSUB.%# %L1,%L2,%L0\n\tSUBB.%# %U1,%U2,%U0"
+  "reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 4)
+   (match_dup 5)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  rtx op2_high = gen_highpart (HImode, operands[2]);
+  rtx op2_low = gen_lowpart (HImode, operands[2]);
+  rtx op1_high,op1_low;
+
+  if (CONST_INT == GET_CODE(operands[1]))
+  {
+    op1_high = picochip_get_high_const(operands[1]);
+    op1_low = picochip_get_low_const(operands[1]);
+  } else {
+    op1_high = gen_highpart (HImode, operands[1]);
+    op1_low  = gen_lowpart (HImode, operands[1]);
+  }
+
+
+  operands[4] = gen_sub_multi_lower (op0_low, op1_low, op2_low);
+  operands[5] = gen_sub_multi_upper (op0_high, op1_high, op2_high);
+
+}")
+
+;; Match the patterns emitted by the multi-part subtraction splitting.
+;; This sets the CC register, so it needs to go into slot 0.
+(define_insn "sub_multi_lower"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,i")
+		  (match_operand:HI 2 "register_operand" "r,r")))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (minus:HI (match_dup 1) (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "SUB.%# %1,%2,%0\t// %0+carry := %1 - %2 (lower SI)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;; Perform the central part of a multi-part addition (DI). This uses
+;; the CC register, and also sets the CC register, so needs to be
+;; placed in the first ALU.
+(define_insn "sub_multi_mid"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,i")
+		  (minus:HI (match_operand:HI 2 "register_operand" "r,r")
+			    (reg:CC CC_REGNUM))))
+   (set (reg:CC CC_REGNUM)
+	(compare:CC (minus:HI (match_dup 1)
+			      (match_dup 2))
+		    (const_int 0)))]
+  ""
+  "SUBB.%# %1,%2,%0\t// %0+carry := carry - %1 - %2 (mid multi-part)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+(define_insn "sub_multi_upper"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(minus:HI (match_operand:HI 1 "general_operand" "r,i")
+		  (minus:HI (match_operand:HI 2 "register_operand" "r,r")
+			    (reg:CC CC_REGNUM))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "SUBB.%# %1,%2,%0\t// %0 := carry - %1 - %2 (upper SI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "2,4")])
+
+;;===========================================================================
+;; Multiplication (signed)
+;;===========================================================================
+
+(define_insn "multiply_machi"
+  [(set (reg:HI ACC_REGNUM)
+        (mult:HI (match_operand:HI 0 "register_operand" "r,r")
+                 (match_operand:HI 1
+                        "picochip_register_or_immediate_operand" "r,i")))]
+  "TARGET_HAS_MAC_UNIT"
+  "MUL %0,%1,acc0\t// acc0 := %0 * %1 (signed)"
+  [(set_attr "length" "3,5")
+   (set_attr "type" "mac,mac")
+   (set_attr "longConstant" "false,true")])
+
+(define_expand "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(mult:HI (match_operand:HI 1 "register_operand" "")
+		 (match_operand:HI 2 "picochip_register_or_immediate_operand" "")))]
+  "TARGET_HAS_MULTIPLY"
+  "")
+
+;; Different types of mulhi, depending on the AE type. If the AE has MUL unit,
+;; use the following pattern.
+(define_insn "*mulhi3_mul"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (match_operand:HI 2
+			"picochip_register_or_immediate_operand" "r,i")))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULL %1,%2,%0 // %0 := %1 * %2 (HI)"
+  [(set_attr "length" "3,5")
+   (set_attr "type" "mul,mul")
+   (set_attr "longConstant" "false,true")])
+
+;; If the AE has MAC unit, instead, use the following pattern.
+(define_insn_and_split "*mulhi3_mac"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(mult:HI (match_operand:HI 1 "register_operand" "r,r")
+		 (match_operand:HI 2
+			"picochip_register_or_immediate_operand" "r,i")))]
+  "TARGET_HAS_MAC_UNIT"
+  "// %0 := %1 * %2\n\tMUL %1,%2,acc0\n\tREADACC acc0,frac,%0"
+  "TARGET_HAS_MAC_UNIT && reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+    rtx const_rtx = GEN_INT(0);
+    operands[3] = (gen_multiply_machi(operands[1], operands[2]));
+    operands[4] = (gen_movhi_mac(operands[0],const_rtx));
+} "
+)
+
+(define_insn "umultiply_machisi"
+  [(set (reg:SI ACC_REGNUM)
+	(mult:SI (zero_extend:SI (match_operand:HI 0 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "MULUU %0,%1,acc0\t// acc0 := %0 * %1 (unsigned)"
+  [(set_attr "length" "3")
+   (set_attr "type" "mac")
+   (set_attr "longConstant" "false")])
+
+(define_insn "multiply_machisi"
+  [(set (reg:SI ACC_REGNUM)
+        (mult:SI (sign_extend:SI (match_operand:HI 0 "register_operand" "r,r"))
+                 (sign_extend:SI (match_operand:HI 1
+                        "picochip_register_or_immediate_operand" "r,i"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "MUL %0,%1,acc0\t// acc0 := %0 * %1 (signed)"
+  [(set_attr "length" "3,5")
+   (set_attr "type" "mac,mac")
+   (set_attr "longConstant" "false,true")])
+
+;; We want to prevent GCC from thinking ACC is a normal register and using
+;; this pattern. We want it to be used only when you use MAC unit 
+;; multiplication. Added a "use" clause for that sake.
+(define_insn "movsi_mac"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+        (reg:SI ACC_REGNUM))
+    (use (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_HAS_MAC_UNIT"
+  "READACC32 acc0,%R0 \t// %0 := acc0 "
+  [(set_attr "length" "3")
+   (set_attr "type" "mac")
+   (set_attr "longConstant" "false")])
+
+;; We want to prevent GCC from thinking ACC is a normal register and using
+;; this pattern. We want it to be used only when you use MAC unit 
+;; multiplication. Added a "use" clause for that sake.
+(define_insn "movhi_mac"
+   [(set (match_operand:HI 0 "register_operand" "=r")
+        (reg:HI ACC_REGNUM) )
+    (use (match_operand:HI 1 "const_int_operand" ""))]
+  "TARGET_HAS_MAC_UNIT"
+  "READACC acc0,frac,%0 \t// %0 := acc0 "
+  [(set_attr "length" "3")
+   (set_attr "type" "mac")
+   (set_attr "longConstant" "false")])
+
+;; 16-bit to 32-bit widening signed multiplication.
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MULTIPLY"
+  ""
+)
+
+(define_insn_and_split "*mulhisi3_mul"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MUL_UNIT"
+  "// %0 := %1 * %2 (HI->SI)\;MULL %1,%2,%L0\;MULH %1,%2,%U0";
+  "TARGET_HAS_MUL_UNIT && reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  operands[3] = gen_mulhisi3_mul_lower(op0_low,operands[1],operands[2]);
+  operands[4] = gen_mulhisi3_mul_higher(op0_high,operands[1],operands[2]);
+}
+  "
+)
+
+(define_insn "mulhisi3_mul_lower"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))) 0))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULL %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn "mulhisi3_mul_higher"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))) 2))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULH %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn_and_split "*mulhisi3_mac"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "// %0 := %1 * %2 (HI->SI) STAN2\;MUL %1,%2,acc0\;READACC32 acc0,%R0";
+  "TARGET_HAS_MAC_UNIT && reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+    rtx const_rtx = gen_int_mode(0,SImode);
+    operands[3] = (gen_multiply_machisi(operands[1], operands[2]));
+    operands[4] = (gen_movsi_mac(operands[0],const_rtx));
+} "
+)
+		
+;;===========================================================================
+;; Widening multiplication (unsigned)
+;;===========================================================================
+
+(define_expand "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MULTIPLY"
+  ""
+)
+
+(define_insn_and_split "*umulhisi3_mul"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MUL_UNIT"
+  "// %0 := %1 * %2 (uHI->uSI Type 1)\;MULUL %1,%2,%L0\n\tMULUH %1,%2,%U0";
+  "TARGET_HAS_MUL_UNIT && reload_completed && picochip_schedule_type != DFA_TYPE_NONE"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+  rtx op0_high = gen_highpart (HImode, operands[0]);
+  rtx op0_low  = gen_lowpart (HImode, operands[0]);
+  operands[3] = gen_umulhisi3_mul_lower(op0_low,operands[1],operands[2]);
+  operands[4] = gen_umulhisi3_mul_higher(op0_high,operands[1],operands[2]);
+}
+  "
+  )
+
+(define_insn "umulhisi3_mul_lower"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))) 0))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULUL %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn "umulhisi3_mul_higher"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(subreg:HI 
+         (mult:SI 
+          (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+	  (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))) 2))]
+  "TARGET_HAS_MUL_UNIT"
+  "MULUH %1,%2,%0"
+  [(set_attr "length" "3")
+   (set_attr "type" "mul")
+   (set_attr "longConstant" "false")])
+
+(define_insn_and_split "*umulhisi3_mac"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  "TARGET_HAS_MAC_UNIT"
+  "// %0 := %1 * %2 (uHI->uSI Type 3)\;MULUU %1,%2,acc0\;READACC32 acc0,%R0";
+  "TARGET_HAS_MAC_UNIT && reload_completed"
+  [(match_dup 3)
+   (match_dup 4)]
+  "
+{
+    rtx const_rtx = gen_int_mode(0,SImode);
+    operands[3] = (gen_umultiply_machisi(operands[1], operands[2]));
+    operands[4] = (gen_movsi_mac(operands[0],const_rtx));
+} "
+)
+
+;;===========================================================================
+;; Division (signed)
+;;===========================================================================
+
+;; Perform a divmod operation as a function call.  This results in some
+;; registers being clobbered (r0-6, r12 - ignore r13,14 as these are
+;; known not to be affected).
+(define_expand "divmodhi4"
+  [
+   ; Copy the inputs to r0 and r1.
+   (set (reg:HI 0) (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 1) (match_operand:HI 2 "register_operand" ""))
+   ; Make the function call - note that r12 (link) is clobbered. Note also
+   ; that an explicit call is generated. This ensures that gcc notices that
+   ; any function containing a div/mod is not a leaf function. 
+   (parallel [(match_dup 4)
+	      (set (reg:HI 0) (div:HI (reg:HI 0) (reg:HI 1)))
+              (set (reg:HI 1) (mod:HI (reg:HI 0) (reg:HI 1)))
+              (clobber (reg:HI 2))
+              (clobber (reg:HI 3))
+              (clobber (reg:HI 4))
+              (clobber (reg:HI 5))
+              (clobber (reg:HI 12))
+              (clobber (reg:CC CC_REGNUM))
+	      ])
+   ; Set the quotient (returned in register 0)
+   (set (match_operand:HI 0 "register_operand" "") (reg:HI 0))
+   ; Set the remainder (returned in register 1)
+   (set (match_operand:HI 3 "register_operand" "") (reg:HI 1))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_divmodhi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+; Match a call to divmodhi4.  As this is a call, the link register
+; (r12), and registers r0-5 must be clobbered.  Ignore clobbering of
+; r13/4 as these aren't used by the divide function).
+(define_insn "*divmodhi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:HI 0) (div:HI (reg:HI 0) (reg:HI 1)))
+   (set (reg:HI 1) (mod:HI (reg:HI 0) (reg:HI 1)))
+   (clobber (reg:HI 2))
+   (clobber (reg:HI 3))
+   (clobber (reg:HI 4))
+   (clobber (reg:HI 5))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))
+]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+;; Perform a udivmod operation as a function call.  This results in some
+;; registers being clobbered (r0-6, r12 - ignore r13,14 as these are
+;; known not to be affected).
+(define_expand "udivmodhi4"
+  [
+   ; Copy the inputs to r0 and r1.
+   (set (reg:HI 0) (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 1) (match_operand:HI 2 "register_operand" ""))
+   ; Make the function call - note that r12 (link) is clobbered. Note also
+   ; that an explicit call is generated. This ensures that gcc notices that
+   ; any function containing a div/mod is not a leaf function. 
+   (parallel [(match_dup 4)
+	      (set (reg:HI 0) (udiv:HI (reg:HI 0) (reg:HI 1)))
+              (set (reg:HI 1) (umod:HI (reg:HI 0) (reg:HI 1)))
+              (clobber (reg:HI 2))
+              (clobber (reg:HI 3))
+              (clobber (reg:HI 4))
+              (clobber (reg:HI 5))
+              (clobber (reg:HI 12))
+              (clobber (reg:CC CC_REGNUM))
+	      ])
+   ; Set the quotient (returned in register 0)
+   (set (match_operand:HI 0 "register_operand" "") (reg:HI 0))
+   ; Set the remainder (returned in register 1)
+   (set (match_operand:HI 3 "register_operand" "") (reg:HI 1))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_udivmodhi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+; Match a call to udivmodhi4.  As this is a call, the link register
+; (r12), and registers r0-5 must be clobbered.  Ignore clobbering of
+; r13/4 as these aren't used by the divide function).
+(define_insn "*udivmodhi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:HI 0) (udiv:HI (reg:HI 0) (reg:HI 1)))
+   (set (reg:HI 1) (umod:HI (reg:HI 0) (reg:HI 1)))
+   (clobber (reg:HI 2))
+   (clobber (reg:HI 3))
+   (clobber (reg:HI 4))
+   (clobber (reg:HI 5))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+(define_expand "udivmodsi4"
+  [
+   ; Make the function call
+   (set (reg:SI 0) (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 2) (match_operand:SI 2 "register_operand" ""))
+   (parallel [
+     (match_dup 4)
+     (set (reg:SI 4) (udiv:SI (reg:SI 0) (reg:SI 2)))
+     (set (reg:SI 6) (umod:SI (reg:SI 0) (reg:SI 2)))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 2))
+     (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 4))
+   (set (match_operand:SI 3 "register_operand" "") (reg:SI 6))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_udivmodsi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+(define_insn "*udivmodsi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:SI 4) (udiv:SI (reg:SI 0) (reg:SI 2)))
+   (set (reg:SI 6) (umod:SI (reg:SI 0) (reg:SI 2)))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 2))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+(define_expand "divmodsi4"
+  [
+   ; Make the function call
+   (set (reg:SI 0) (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 2) (match_operand:SI 2 "register_operand" ""))
+   (parallel [
+     (match_dup 4)
+     (set (reg:SI 4) (div:SI (reg:SI 0) (reg:SI 2)))
+     (set (reg:SI 6) (mod:SI (reg:SI 0) (reg:SI 2)))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 2))
+     (clobber (reg:HI 12))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 4))
+   (set (match_operand:SI 3 "register_operand" "") (reg:SI 6))]
+  ""
+{
+  rtx fnName = gen_rtx_SYMBOL_REF (HImode, "_divmodsi4");
+  operands[4] = gen_call_for_divmod (gen_rtx_MEM (QImode, fnName), GEN_INT(0));
+})
+
+(define_insn "*divmodsi4_call"
+  [(call (mem:QI (match_operand:HI 0 "immediate_operand" "i"))
+	 (match_operand 1 "const_int_operand" ""))
+   (set (reg:SI 4) (div:SI (reg:SI 0) (reg:SI 2)))
+   (set (reg:SI 6) (mod:SI (reg:SI 0) (reg:SI 2)))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 2))
+   (clobber (reg:HI 12))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "JL (%0)\t// call %0%>"
+  [(set_attr "length" "4")
+   (set_attr "longConstant" "true")
+   (set_attr "type" "call")])
+
+;;===========================================================================
+;; Bitwise AND.  The QI/SI mode instructions are automatically
+;; synthesised from the HI mode instruction.
+;;===========================================================================
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(and:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "general_operand" "r,n")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "AND.%# %1,%2,%0 // %0 := %1 AND %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;; If we peepholed the compare instruction out, we need to make sure the
+;; "and" goes in slot 0. This pattern is just to accomplish that.
+
+(define_insn "andhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (and:HI (match_operand:HI 1 "register_operand" "r,r")
+                (match_operand:HI 2 "general_operand" "r,n")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  "AND.0 %1,%2,%0 // %0 := %1 AND %2 (HI)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;;===========================================================================
+;; Bitwise inclusive-OR.  The QI mode instruction is automatically
+;; synthesised from the HI mode instruction.
+;;===========================================================================
+
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ior:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "register_operand" "r,n")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "OR.%# %1,%2,%0 // %0 := %1 IOR %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+(define_insn "iorhi3_with_use_clause"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (ior:HI (match_operand:HI 1 "register_operand" "r,r")
+                (match_operand:HI 2 "general_operand" "r,n")))
+   (set (reg:CC CC_REGNUM)
+        (match_operator:CC 3 "picochip_peephole_comparison_operator"
+                        [(const_int 0)
+                         (const_int 0)]))]
+  ""
+  "OR.0 %1,%2,%0 // %0 := %1 IOR %2 (HI)"
+  [(set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;;===========================================================================
+;; Bitwise exclusive-OR.  The QI/SI mode instructions are automatically
+;; synthesised from the HI mode instruction.
+;;===========================================================================
+
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(xor:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "picochip_register_or_immediate_operand" "r,n")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "XOR.%# %1,%2,%0 // %0 := %1 XOR %2 (HI)"
+  [(set_attr "type" "basicAlu,basicAlu")
+   (set_attr "longConstant" "false,true")
+   (set_attr "length" "3,5")])
+
+;;===========================================================================
+;; Arithmetic shift left.
+;;===========================================================================
+
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "general_operand" "r,J")))]
+  ""
+  "LSL.%# %1,%2,%0 // %0 := %1 << %2"
+  [(set_attr "type" "picoAlu,basicAlu")
+   (set_attr "length" "3,3")])
+
+;;===========================================================================
+;; Arithmetic shift right.
+;;===========================================================================
+
+(define_insn "builtin_asri"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "r")
+		     (match_operand:HI 2 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ASR.%# %1,%2,%0\t// %0 = %1 >>{arith} %2"
+  [(set_attr "type" "basicAlu")
+   (set_attr "length" "3")])
+
+;; The picoChip ISA doesn't have a variable arithmetic shift right, so
+;; synthesise it.  Shifts by constants are directly supported.
+
+(define_expand "ashrhi3"
+  [(match_operand:HI 0 "register_operand" "")
+   (match_operand:HI 1 "register_operand" "")
+   (match_operand:HI 2 "picochip_register_or_immediate_operand" "")]
+  ""
+{
+  if (GET_CODE(operands[2]) == CONST_INT)
+    /* Shift by constant is easy. */
+    emit_insn (gen_builtin_asri (operands[0], operands[1], operands[2]));
+  else
+  {
+    /* Synthesise a variable shift. */
+
+    rtx tmp1;
+    rtx tmp2;
+    rtx tmp3;
+    rtx minus_one;
+    rtx tmp4;
+
+    /* Fill a temporary with the sign bits. */
+    tmp1 = gen_reg_rtx (HImode);
+    emit_insn (gen_builtin_asri (tmp1, operands[1], GEN_INT(15)));
+
+    /* Shift the unsigned value. */
+    tmp2 = gen_reg_rtx (HImode);
+    emit_insn (gen_lshrhi3 (tmp2, operands[1], operands[2]));
+
+    /* The word of sign bits must be shifted back to the left, to zero
+     * out the unwanted lower bits.  The amount to shift left by is (15 -
+     * count). Since the shifts are computed modulo 16 (i.e., only the
+     * lower 4 bits of the count are used), the shift amount (15 - count)
+     * is equivalent to !count. */
+    tmp3 = gen_reg_rtx (HImode);
+    minus_one = GEN_INT (-1);
+    emit_insn (gen_xorhi3 (tmp3, operands[2], minus_one));
+    tmp4 = gen_reg_rtx (HImode);
+    emit_insn (gen_ashlhi3 (tmp4, tmp1, tmp3));
+
+    /* Combine the sign bits with the shifted value. */
+    emit_insn (gen_iorhi3 (operands[0], tmp2, tmp4));
+
+  }
+  DONE;
+})
+
+;;===========================================================================
+;; Logical shift right.
+;;===========================================================================
+
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "r,r")
+		(match_operand:HI 2 "general_operand" "r,J")))]
+  ""
+  "LSR.%# %1,%2,%0 // %0 := %1 >> %2"
+  [(set_attr "type" "picoAlu,basicAlu")
+   (set_attr "length" "3,3")])
+
+;;===========================================================================
+;; Negate.
+;;===========================================================================
+
+;; Negations are performed by subtracting from the constant 0, which
+;; is loaded into a register.  By using a register containing 0, the
+;; chances of being able to CSE with another 0 value are increased.
+
+(define_expand "neghi2"
+  [(set (match_dup 2) (match_dup 3))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=r")
+		   (minus:HI (match_dup 2)
+			     (match_operand:HI 1 "register_operand" "r")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "operands[2] = gen_reg_rtx(HImode);
+   operands[3] = GEN_INT(0x00000000);")
+
+(define_expand "negsi2"
+  [(set (match_dup 2) (match_dup 3))
+   (parallel [(set (match_operand:SI 0 "register_operand" "=r")
+		   (minus:SI (match_dup 2)
+			     (match_operand:SI 1 "register_operand" "r")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "operands[2] = gen_reg_rtx(SImode);
+   operands[3] = GEN_INT(0x00000000);")
+
+;;===========================================================================
+;; Absolute value. Taken from the Hacker's Delight, page 17. The second of the
+;; four options given there produces the smallest, fastest code.
+;;===========================================================================
+
+(define_insn_and_split "abshi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+   (abs:HI (match_operand:HI 1 "register_operand" "")))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 2)
+                  (plus:HI (ashiftrt:HI (match_dup 1) (const_int 15))
+			   (match_dup 1)))
+             (clobber (reg:CC CC_REGNUM))])
+  (parallel [(set (match_dup 0)
+                  (xor:HI (ashiftrt:HI (match_dup 1) (const_int 15))
+			  (match_dup 2)))
+             (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = gen_reg_rtx (HImode);
+})
+
+;;===========================================================================
+;; Bitwise complement.  Use auto-synthesised variant for SI mode. Though this
+;; internally uses xor, the compiler doesnt automatically synthesize it using
+;; xor, if this pattern was removed.
+;;===========================================================================
+
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+        (not:HI (match_operand:HI 1 "register_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "XOR.%# %1,-1,%0 // %0 := ~%1"
+  [(set_attr "type" "basicAlu")
+   (set_attr "longConstant" "true")
+   (set_attr "length" "5")])
+
+;;===========================================================================
+;; Count leading zeros. The special sign-bit-count instruction can be used
+;; to help us here.
+;;    op1:=clz(op1)
+;; The code works by checking to see if the top bit is set. If it is,
+;; then there are no leading zeros. If the top bit is cleared, then
+;; the SBC instruction is used to determine how many more leading
+;; zeros are present, and adding one more for the initial zero.
+;;===========================================================================
+
+(define_insn "clzhi2"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(clz:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "// Count leading zeros\;SBC %1,%0\;ASR.0 %1,15,r15 %| ADD.1 %0,1,%0\;COPYNE 0,%0"
+  [(set_attr "length" "11")])
+
+;;===========================================================================
+;; Count trailing zeros. This can be achieved efficiently by reversing
+;; using the bitrev instruction, and then counting the leading zeros as
+;; described above.
+;;===========================================================================
+
+(define_insn "ctzhi2"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(ctz:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "// Count trailing zeros\;BREV %1,%0\;SBC %0,%0\;AND.0 %1,0x0001,r15 %| ADD.1 %0,1,%0\;COPYNE 0,%0"
+  [(set_attr "length" "15")])
+
+;;===========================================================================
+;; Find the first set bit, starting from the least significant bit position.
+;; This is very similar to the ctz function, except that the bit index is one
+;; greater than the number of trailing zeros (i.e., SBC + 2), and the
+;; result of ffs on the zero value is defined.
+;;===========================================================================
+
+(define_insn "ffshi2"
+  [(set (match_operand:HI 0 "register_operand" "=&r")
+	(ffs:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "// First first bit\;BREV %1,%0\;SBC %0,%0\;AND.0 %1,0x0001,r15 %| ADD.1 %0,2,%0\;COPYNE 1,%0\;SUB.0 %1,0x0000,r15\;COPYEQ 0,%0"
+  [(set_attr "length" "20")])
+
+;;===========================================================================
+;; Tablejump Instruction.  Jump to an absolute address.
+;;===========================================================================
+
+(define_insn "tablejump"
+  [(set (pc) (unspec:HI [(match_operand:HI 0 "register_operand" "r")] 1))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_dup 0))]
+  ""
+  "JR (%0)\t // Table jump to %0 %>"
+  [(set_attr "length" "2")
+   (set_attr "type" "realBranch")])
+
+;; Given the memory address of a QImode value, and a scratch register,
+;; store the memory operand into the given output operand.  The scratch
+;; operand will not conflict with either of the operands.  The other
+;; two operands may conflict with each other.
+
+(define_insn "synthesised_loadqi_unaligned"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+        (match_operand:QI 1 "memory_operand" "m"))
+   (clobber (match_operand:HI 2 "register_operand" "=&r"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// Synthesised loadqi %0 = Mem(%1) (Scratch %2)\n\tAND.0 %1,-2,%2\n\tLDW (%2)0,%0 %| AND.0 %1,1,%2\n\tLSL.0 %2,3,%2\n\tSUB.0 8,%2,%2\n\tLSL.0 %0,%2,%0\n\tASR.0 %0,8,%0"
+  ; Approximate length only.  Probably a little shorter than this.
+  [(set_attr "length" "40")])
+
+;; Given a memory operand whose alignment is known (the HImode aligned
+;; base is operand 0, and the number of bits by which to shift is in
+;; operand 5),
+(define_expand "synthesised_storeqi_aligned"
+  [; s1 = mem_op
+   (set (match_operand:HI 2 "register_operand" "")
+	(match_operand:HI 0 "memory_operand" ""))
+   ; s1 = s1 and mask
+   (parallel [(set (match_dup 2) (and:HI (match_dup 2) (match_dup 5)))
+   (clobber (reg:CC CC_REGNUM))])
+   ; s2 = source << bitShift
+   (set (match_dup 3)
+	(ashift:HI (subreg:HI (match_operand:QI 1 "register_operand" "") 0)
+		   (match_operand:HI 4 "const_int_operand" "")))
+   ; s1 = s1 or s2
+   (parallel [(set (match_dup 2) (ior:HI (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC CC_REGNUM))])
+   ; mem_op = s1
+   (set (match_dup 0) (match_dup 2))]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  /* Create the byte mask 0xFF00. */
+  operands[5] = gen_int_mode(((~0xFF) >> INTVAL (operands[4])), HImode);
+})
+
+;; Reload instructions.  See picochip_secondary_reload for an
+;; explanation of why an SI mode register is used as a scratch.  The
+;; memory operand must be stored in a register (i.e., it can't be an
+;; offset to another register - this would require another scratch
+;; register into which the address of the offset could be computed).
+
+(define_expand "reload_inqi"
+  [(parallel [(match_operand:QI 0 "register_operand" "=&r")
+              (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  rtx scratch, seq;
+
+  /* Get the scratch register.  Given an SI mode value, we have a
+     choice of two HI mode scratch registers, so we can be sure that at
+     least one of the scratch registers will be different to the output
+     register, operand[0]. */
+
+  if (REGNO (operands[0]) == REGNO (operands[2]))
+    scratch = gen_rtx_REG (HImode, REGNO (operands[2]) + 1);
+  else
+    scratch = gen_rtx_REG (HImode, REGNO (operands[2]));
+
+  /* Ensure that the scratch doesn't overlap either of the other
+     two operands - however, the other two may overlap each
+     other. */
+  gcc_assert (REGNO(scratch) != REGNO(operands[0]));
+  gcc_assert (REGNO(scratch) != REGNO(operands[1]));
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  if (picochip_word_aligned_memory_reference(XEXP(operands[1], 0)))
+  {
+    /* Aligned reloads are easy, since they can use word-loads. */
+    seq = gen_synthesised_loadqi_aligned(operands[0], operands[1], scratch);
+  }
+  else
+  {
+    /* Emit the instruction using a define_insn. */
+    seq = gen_synthesised_loadqi_unaligned(operands[0], operands[1], scratch);
+  }
+  emit_insn (seq);
+
+  DONE;
+
+})
+
+(define_expand "reload_outqi"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+	      (match_operand:QI 1 "register_operand" "r")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  rtx scratch1 = gen_rtx_REG(HImode, REGNO(operands[2]));
+  rtx scratch2 = gen_rtx_REG(HImode, REGNO(operands[2]) + 1);
+  rtx seq;
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+
+  if (picochip_word_aligned_memory_reference(XEXP(operands[0], 0)))
+    {
+      rtx alignedAddr, bitShift;
+
+      /* Convert the address of the known alignment into two operands
+       * representing the aligned base address, and the number of shift bits
+       * required to access the required value. */
+      picochip_get_hi_aligned_mem(operands[0], &alignedAddr, &bitShift);
+
+      /* Emit an aligned store of the source, with the given bit offset. */
+      seq = gen_synthesised_storeqi_aligned(alignedAddr, operands[1], scratch1, scratch2, bitShift);
+
+    }
+  else
+    {
+      /* This isnt exercised at all. Moreover, with new devices, byte access
+         is available in all variants. */
+      gcc_unreachable();
+    }
+
+  emit_insn (seq);
+  DONE;
+
+})
+
+;; Perform a byte load of an alignable memory operand.
+; op0 = register to load. op1 = memory operand from which to load
+; op2 = op1, aligned to HI, op3 = const bit shift required to extract byte,
+; op4 = INTVAL(8 - op3)
+(define_expand "synthesised_loadqi_aligned"
+  [; Load memory operand into register
+   (set (match_operand:HI 2 "register_operand" "=r")
+	(match_dup 3))
+   ; Shift required byte into top byte of word.
+   (set (match_dup 2)
+	(ashift:HI (match_dup 2)
+		   (match_dup 4)))
+   ; Arithmetic shift of byte to sign extend, and move to lowest register.
+   (parallel[(set (subreg:HI (match_dup 0) 0)
+	(ashiftrt:HI (match_dup 2) 
+		     (const_int 8)))
+   (clobber (reg:CC CC_REGNUM))])
+   (use (match_operand:QI 1 "picochip_alignable_memory_operand" "g"))]
+  "!TARGET_HAS_BYTE_ACCESS"
+{
+  rtx alignedAddr, bitShift;
+
+  /* Convert the address of the known alignment into two operands
+   * representing the aligned base address, and the number of shift bits
+   * required to access the required value. */
+  picochip_get_hi_aligned_mem(operands[1], &alignedAddr, &bitShift);
+
+  operands[3] = alignedAddr;
+  operands[4] = GEN_INT(8 - INTVAL(bitShift));
+})
+
+;;============================================================================
+;; Special instructions.
+;;============================================================================
+
+; Count sign-bits.
+(define_insn "sbc"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")]
+		   UNSPEC_SBC))]
+  ""
+  "SBC %1,%0\t\t// %0 := SBC(%1)"
+  [(set_attr "type" "picoAlu")
+   (set_attr "length" "2")])
+
+; Bit reversal.
+(define_insn "brev"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")]
+		   UNSPEC_BREV))]
+  ""
+  "BREV %1,%0\t\t// %0 := BREV(%1)"
+  [(set_attr "length" "2")
+   (set_attr "type" "picoAlu")])
+
+; Byte swap.
+(define_insn "bswaphi2"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "BYTESWAP %1,%0\t\t// %0 := ByteSwap(%1)"
+  [(set_attr "length" "2")
+   (set_attr "type" "picoAlu")])
+
+; Read status word.
+(define_insn "copysw"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(unspec_volatile:HI [(reg:CC CC_REGNUM)] UNSPEC_COPYSW))]
+  ""
+  "COPYSW.%# %0\t// %0 := Flags"
+  [(set_attr "type" "basicAlu")
+   (set_attr "length" "2")])
+
+; Saturating addition.
+(define_insn "sataddhi3"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")
+		    (match_operand:HI 2 "register_operand" "r")]
+		   UNSPEC_ADDS))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ADDS %1,%2,%0\t// %0 := sat(%1 + %2)"
+  [(set_attr "type" "picoAlu")
+   (set_attr "length" "3")])
+
+; Saturating subtraction.
+(define_insn "satsubhi3"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec:HI [(match_operand:HI 1 "register_operand" "r")
+		    (match_operand:HI 2 "register_operand" "r")]
+		   UNSPEC_SUBS))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "SUBS %1,%2,%0\t// %0 := sat(%1 - %2)"
+  [(set_attr "type" "picoAlu")
+   (set_attr "length" "3")])
+
+(define_insn "halt"
+  [(unspec_volatile [(match_operand:HI 0 "const_int_operand" "i")]
+	UNSPEC_HALT)]
+  ""
+  "HALT\t// (id %0)"
+  [(set_attr "length" "1")
+   (set_attr "type" "unknown")])
+
+(define_insn "internal_testport"
+  [(set (reg:CC CC_REGNUM)
+        (unspec_volatile:CC [(match_operand:HI 0 "const_int_operand" "i")]
+           UNSPEC_INTERNAL_TESTPORT))]
+  ""
+  "TSTPORT %0"
+  [(set_attr "length" "2")
+   (set_attr "longConstant" "false")
+   (set_attr "type" "picoAlu")])
+
+;;============================================================================
+;; Communications builtins.
+;;
+;; Each builtin comes in two forms: a single port version, which maps
+;; to a single instruction, and an array port version.  The array port
+;; version is treated as a special type of instruction, which is then
+;; split into a number of smaller instructions, if the index of the
+;; port can't be converted into a constant.  When the RTL split is
+;; performed, a function call is emitted, in which the index of the
+;; port to use is used to compute the address of the function to call
+;; (i.e., each array port is a function in its own right, and the
+;; functions are stored as an array which is then indexed to determine
+;; the correct function). The communication function port array is
+;; created by the linker if and only if it is required (in a
+;; collect2-like manner).
+;;============================================================================
+
+; Simple scalar get.
+(define_insn "commsGet"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec_volatile:SI
+	 [(match_operand:HI 1 "immediate_operand" "n")]
+	 UNSPEC_GET))]
+  ""
+  "GET %1,%R0\t// %R0 := PORT(%1)"
+  [(set_attr "type" "comms")
+   (set_attr "length" "2")])
+
+; Entry point for array get (the actual port index is computed as the
+; sum of the index, and the base).
+;
+; op0 - Destination
+; op1 - Requested port index
+; op2 - size of port array (constant)
+; op3 - base index of port array (constant)
+
+(define_expand "commsArrayGet"
+  [(parallel
+      [(set (reg:SI 0)
+            (unspec_volatile:SI [(match_operand:HI 1 "general_operand" "")
+	 	     	 (match_operand:HI 2 "immediate_operand" "")
+		     	 (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_GET_ARRAY))
+       (clobber (reg:HI LINK_REGNUM))])
+   (set (match_operand:SI 0 "register_operand" "") (reg:SI 0))]
+  ""
+  "")
+
+;; The actual array get instruction. When the array index is a constant,
+;; an exact instruction may be generated. When the index is variable,
+;; a call to a special function is generated. This code could be
+;; split into individual RTL instructions, but it is so rarely
+;; used, that we won't bother.
+(define_insn "*commsArrayGetInstruction"
+  [(set (reg:SI 0)
+        (unspec_volatile:SI [(match_operand:HI 0 "general_operand" "r,i")
+	 	     (match_operand:HI 1 "immediate_operand" "")
+		     (match_operand:HI 2 "immediate_operand" "")]
+	 	UNSPEC_CALL_GET_ARRAY))
+   (clobber (reg:HI LINK_REGNUM))]
+  ""
+{
+  return picochip_output_get_array (which_alternative, operands);
+})
+
+; Scalar Put instruction.
+(define_insn "commsPut"
+  [(unspec_volatile [(match_operand:HI 0 "const_int_operand" "")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_PUT)]
+  ""
+  "PUT %R1,%0\t// PORT(%0) := %R1"
+  [(set_attr "type" "comms")
+   (set_attr "length" "2")])
+
+; Entry point for array put. The operands accepted are:
+;   op0 - Value to put
+;   op1 - Requested port index
+;   op2 - size of port array
+;   op3 - base index of port array
+; The arguments are marshalled into the fixed registers, so that
+; the actual put instruction can expand into a call if necessary
+; (e.g., if the index is variable at run-time).
+
+(define_expand "commsArrayPut"
+  [(set (reg:SI 0) (match_operand:SI 0 "general_operand" ""))
+   (parallel
+      [(unspec_volatile [(match_operand:HI 1 "general_operand" "")
+	 	     	 (match_operand:HI 2 "immediate_operand" "")
+		     	 (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_PUT_ARRAY)
+       (use (reg:SI 0))
+       (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+;; The actual array put instruction. When the array index is a constant,
+;; an exact instruction may be generated. When the index is variable,
+;; a call to a special function is generated. This code could be
+;; split into individual RTL instructions, but it is so rarely
+;; used, that we won't bother.
+(define_insn "*commsArrayPutInstruction"
+  [(unspec_volatile [(match_operand:HI 0 "general_operand" "r,i")
+	 	     (match_operand:HI 1 "immediate_operand" "")
+		     (match_operand:HI 2 "immediate_operand" "")]
+	 	UNSPEC_CALL_PUT_ARRAY)
+   (use (reg:SI 0))
+   (clobber (reg:HI LINK_REGNUM))]
+  ""
+{
+  return picochip_output_put_array (which_alternative, operands);
+})
+
+;; Scalar test port instruction.
+(define_insn "commsTestPort"
+  [(set (match_operand:HI             0 "register_operand" "=r")
+	(unspec_volatile:HI [(match_operand:HI 1 "const_int_operand" "")]
+		   UNSPEC_TESTPORT))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "// %0 := TestPort(%1)\;COPY.1 0,%0 %| TSTPORT %1\;COPYEQ 1,%0"
+  [(set_attr "length" "9")])
+
+; Entry point for array tstport (the actual port index is computed as the
+; sum of the index, and the base).
+;
+; op0 - Test value.
+; op1 - Requested port index
+; op2 - size of port array (constant)
+; op3 - base index of port array (constant)
+
+(define_expand "commsArrayTestPort"
+  [(parallel
+      [(set (match_operand:HI 0 "register_operand" "")
+            (unspec_volatile:HI [(match_operand:HI 1 "general_operand" "")
+	 	              (match_operand:HI 2 "immediate_operand" "")
+		     	      (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_TESTPORT_ARRAY))
+       (clobber (reg:HI LINK_REGNUM))])]
+  ""
+  "")
+
+;; The actual array testport instruction. When the array index is a constant,
+;; an exact instruction may be generated. When the index is variable,
+;; a call to a special function is generated. This code could be
+;; split into individual RTL instructions, but it is so rarely
+;; used, that we won't bother.
+(define_insn "*commsArrayTestportInstruction"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+        (unspec_volatile:HI [(match_operand:HI 1 "general_operand" "r,i")
+	 	     	  (match_operand:HI 2 "immediate_operand" "")
+		     	  (match_operand:HI 3 "immediate_operand" "")]
+	 	UNSPEC_CALL_TESTPORT_ARRAY))
+   (clobber (reg:HI LINK_REGNUM))]
+  ""
+{
+  return picochip_output_testport_array (which_alternative, operands);
+})
+
+;; Merge a TSTPORT instruction with the branch to which it
+;; relates.  Often the TSTPORT function (generated by a built-in), is
+;; used to control conditional execution.  The normal sequence of
+;; instructions would be:
+;;    TSTPORT p
+;;    COPYSW temp
+;;    AND temp, 0x0008, temp
+;;    SUB temp,0,discard
+;;    BEQ label
+;; This can be made more efficient by detecting the special case where
+;; the result of a TSTPORT is used to branch, to allow the following
+;; RTL sequence to be generated instead:
+;;    TSTPORT p
+;;    BEQ label
+;; A big saving in cycles and bytes!
+
+(define_insn_and_split "tstport_branch"
+ [(set (pc)
+	(if_then_else
+	    (match_operator 0 "comparison_operator"
+	                    [(unspec_volatile:HI
+				[(match_operand:HI 1 "const_int_operand" "")]
+					   UNSPEC_TESTPORT)
+			     (const_int 0)])
+            (label_ref       (match_operand    2 "" ""))
+	    (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+ ""
+ "#"
+ ""
+ [(set (reg:CC CC_REGNUM)
+       (unspec_volatile:CC [(match_dup 1)] UNSPEC_INTERNAL_TESTPORT))
+  (parallel [(set (pc)
+                  (if_then_else
+                       (match_op_dup:HI 4 [(reg:CC CC_REGNUM) (const_int 0)])
+				(label_ref (match_dup 2))
+				(pc)))
+	     (use (match_dup 3))])]
+ "{
+    /* Note that the sense of the branch is reversed, since we are
+     * comparing flag != 0. */
+    gcc_assert (GET_CODE(operands[0]) == NE || GET_CODE(operands[0]) == EQ);
+    operands[4] = gen_rtx_fmt_ee(reverse_condition(GET_CODE(operands[0])),
+                  GET_MODE(operands[0]), XEXP(operands[0], 0), XEXP(operands[0], 1));
+    operands[3] = GEN_INT (0);
+  }")
+
+;;============================================================================
+;; Epilogue/Epilogue expansion.
+;;============================================================================
+
+(define_expand "prologue"
+  [(clobber (const_int 0))]
+  ""
+{
+  picochip_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(use (const_int 0))]
+  ""
+{
+  picochip_expand_epilogue (FALSE);
+  DONE;
+})
+
+;;============================================================================
+;; Trap instruction. This is used to indicate an error. For the
+;; picoChip processors this is handled by calling a HALT instruction,
+;; which stops the processor.
+;;============================================================================
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 6))]
+  ""
+  "HALT\t// (Trap)"
+  [(set_attr "length" "2")])
+
+;;============================================================================
+;; Conditional copy instructions.  Only equal/not-equal comparisons are
+;; supported.  All other types of comparison remain as branch
+;; sequences.
+;;============================================================================
+
+;; Define expand seems to consider the resulting two instructions to be
+;; independent. With a split, guarded by reload, it works correctly.
+(define_expand "movhicc"
+   [(set (match_operand:HI 0 "register_operand" "=r,r")
+                   (if_then_else:HI (match_operand:HI 1 "" "")
+		   (match_operand:HI 2 "register_operand" "0,0")
+		   (match_operand:HI 3 "picochip_register_or_immediate_operand" "r,i")))]
+  ""
+  {if (!picochip_check_conditional_copy (operands))
+     FAIL;
+  })
+
+(define_insn_and_split "*checked_movhicc"
+   [(set (match_operand:HI 0 "register_operand" "=r,r")
+                   (if_then_else:HI (match_operator 1 "picochip_peephole_comparison_operator"
+                          [(match_operand:HI 4 "register_operand" "r,r")
+                           (match_operand:HI 5 "picochip_comparison_operand" "r,i")])
+		   (match_operand:HI 2 "register_operand" "0,0")
+		   (match_operand:HI 3 "picochip_register_or_immediate_operand" "r,i")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (reg:CC CC_REGNUM) (match_dup 1))
+   (parallel [(set (match_operand:HI 0 "register_operand" "=r,r")
+                   (if_then_else:HI (match_op_dup:HI 1 [(reg:CC CC_REGNUM) (const_int 0)])
+                                 (match_operand:HI 2 "picochip_register_or_immediate_operand" "0,0")
+                                 (match_operand:HI 3 "picochip_register_or_immediate_operand" "r,i")))
+              (use (match_dup 6))])]
+  "{
+     operands[6] = GEN_INT(GET_CODE(operands[0]));
+   }")
+
+;; We dont do any checks here. But this pattern is used only when movhicc 
+;; was checked. Put a "use" clause to make sure.
+(define_insn "*conditional_copy"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(if_then_else:HI
+            (match_operator:HI 4 "picochip_peephole_comparison_operator"
+                 [(reg:CC CC_REGNUM) (const_int 0)])
+	 (match_operand:HI 1 "register_operand" "0,0")
+	 (match_operand:HI 2 "picochip_register_or_immediate_operand" "r,i")))
+   (use (match_operand:HI 3 "const_int_operand" ""))]
+  ""
+{
+
+  gcc_assert (GET_CODE(operands[4]) == EQ || GET_CODE(operands[4]) == NE);
+  /* Note that the comparison is reversed as the pattern matches
+     the *else* part of the if_then_else */
+  switch (GET_CODE(operands[4]))
+    {
+    case EQ: return "COPYNE %2,%0\t// if (NE) %0 := %2";
+    case NE: return "COPYEQ %2,%0\t// if (EQ) %0 := %2";
+    default:
+      gcc_unreachable();
+    }
+}
+  [(set_attr "length" "2")
+   (set_attr "type" "picoAlu,picoAlu")
+   (set_attr "longConstant" "false,true")])
+
+;;============================================================================
+;; Scheduling, including delay slot scheduling.
+;;============================================================================
+
+(automata_option "v")
+(automata_option "ndfa")
+
+;; Define each VLIW slot as a CPU resource.  Note the three flavours of
+;; branch.  `realBranch' is an actual branch instruction.  `macroBranch'
+;; is a directive to the assembler, which may expand into multiple
+;; instructions.  `call' is an actual branch instruction, but one which
+;; sets the link register, and hence can't be scheduled alongside
+;; other instructions which set the link register.  When the DFA
+;; scheduler is fixed to prevent it scheduling a JL with an R12
+;; setting register, the call type branches can be replaced by
+;; realBranch types instead.
+
+(define_attr "type"
+  "picoAlu,basicAlu,nonCcAlu,mem,call,realBranch,macroBranch,mul,mac,app,comms,unknown"
+  (const_string "unknown"))
+
+(define_attr "schedType" "none,space,speed"
+  (const (symbol_ref "(enum attr_schedType) picochip_schedule_type")))
+
+;; Define whether an instruction uses a long constant.
+
+(define_attr "longConstant"
+  "true,false" (const_string "false"))
+
+;; Define three EU slots.
+(define_query_cpu_unit "slot0,slot1,slot2")
+
+;; Pull in the pipeline descriptions for speed or space scheduling.
+(include "dfa_speed.md")
+(include "dfa_space.md")
+
+; Unknown instructions are assumed to take a single cycle, and use all
+; slots.  This enables them to actually output a sequence of
+; instructions without any limitation.  For the purposes of
+; scheduling, unknown instructions are a pain, and should be removed
+; completely.  This means that RTL patterns should always be used to
+; reduce complex sequences of instructions to individual instructions.
+(define_insn_reservation "unknownInsn" 1
+  (eq_attr "type" "unknown")
+  "(slot0+slot1+slot2)")
+
+; Allow any non-branch instructions to be placed in the branch
+; slot. Branch slots are always executed.
+(define_delay (eq_attr "type" "realBranch,call")
+  [(eq_attr "type" "!realBranch,macroBranch,call,unknown") (nil) (nil)])
diff --git a/gcc/config/picochip/picochip.opt b/gcc/config/picochip/picochip.opt
new file mode 100644
index 000000000..4726f4993
--- /dev/null
+++ b/gcc/config/picochip/picochip.opt
@@ -0,0 +1,46 @@
+; Options for the picoChip port of the compiler.
+
+; Copyright (C) 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not, see
+; <http://www.gnu.org/licenses/>.
+
+mae=
+Target RejectNegative Joined Var(picochip_ae_type_string)
+Specify which type of AE to target. This option sets the mul-type and byte-access.
+
+mmul-type=
+Target RejectNegative Undocumented Joined Var(picochip_mul_type_string)
+Specify which type of multiplication to use. Can be mem, mac or none.
+
+mbyte-access
+Target Undocumented Var(picochip_has_byte_access,1) Init(1)
+Specify whether the byte access instructions should be used. Enabled by default.
+
+mdebug
+Target RejectNegative Undocumented Mask(DEBUG)
+Enable debug output to be generated.
+
+msymbol-as-address
+Target Mask(SYMBOL_AS_ADDRESS)
+Allow a symbol value to be used as an immediate value in an instruction.
+
+minefficient-warnings
+Target Mask(INEFFICIENT_WARNINGS)
+Generate warnings when inefficient code is known to be generated.
+
+minefficient
+Target Mask(INEFFICIENT_WARNINGS) MaskExists Undocumented
diff --git a/gcc/config/picochip/predicates.md b/gcc/config/picochip/predicates.md
new file mode 100644
index 000000000..f9b8af5e0
--- /dev/null
+++ b/gcc/config/picochip/predicates.md
@@ -0,0 +1,72 @@
+;; GCC machine description for picochip
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Contributed by Picochip Ltd (http://www.picochip.com)
+;; Maintained by Daniel Towner (dant@picochip.com) and Hariharan
+;; Sandanagobalane (hariharan@picochip.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not, see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "picochip_register_or_immediate_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "immediate_operand")))
+
+(define_predicate "power_of_2_imm_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    {
+      if (exact_log2 (INTVAL (op)) <= 16 && exact_log2 (INTVAL (op)) > 0)
+        return 1;
+    }
+
+  return 0;
+})
+
+;; Limit the comparison operators to a selected subset.
+(define_predicate "picochip_supported_comparison_operator"
+  (and (match_operand 0 "comparison_operator")
+       (match_code "ne,eq,ge,geu,lt,ltu")))
+(define_predicate "picochip_peephole_comparison_operator"
+  (and (match_operand 0 "comparison_operator")
+       (match_code "ne,eq")))
+
+;; Allow selected arithmetic operators to apply a shift to their first
+;; operands
+
+(define_predicate "picochip_first_op_shift_operator"
+  (match_code "and,plus,minus,ior,xor"))
+
+;; The same as the previous predicate, but only allowing those
+;; operators which can accept an immediate.
+(define_predicate "picochip_first_op_shift_operator_imm"
+  (match_code "plus,minus"))
+
+;; Predicate on a J type integer.
+(define_predicate "picochip_J_operand"
+  (match_operand 0 "immediate_operand")
+  {
+    return (CONST_INT == GET_CODE(op) &&
+            picochip_const_ok_for_letter_p (INTVAL(op), 'J'));
+  })
+
+;; Is the operand suitable for use in a compare?
+
+(define_predicate "picochip_comparison_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_operand 0 "immediate_operand")
+            (match_test "picochip_const_ok_for_letter_p(INTVAL(op), 'O')"))))
+
diff --git a/gcc/config/picochip/t-picochip b/gcc/config/picochip/t-picochip
new file mode 100644
index 000000000..c74c12d30
--- /dev/null
+++ b/gcc/config/picochip/t-picochip
@@ -0,0 +1,80 @@
+# Copyright (C) 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compile the extra library functions.
+
+LIB2FUNCS_EXTRA = \
+	$(srcdir)/config/picochip/libgccExtras/ashrsi3.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/ashlsi3.asm		\
+	$(srcdir)/config/picochip/libgccExtras/divmodhi4.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/udivmodhi4.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/divmodsi4.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/udivmodsi4.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/divmod15.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/ucmpsi2.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/cmpsi2.asm 		\
+	$(srcdir)/config/picochip/libgccExtras/clzsi2.asm			\
+	$(srcdir)/config/picochip/libgccExtras/adddi3.asm			\
+	$(srcdir)/config/picochip/libgccExtras/subdi3.asm			\
+	$(srcdir)/config/picochip/libgccExtras/lshrsi3.asm		\
+	$(srcdir)/config/picochip/libgccExtras/parityhi2.asm		\
+	$(srcdir)/config/picochip/libgccExtras/popcounthi2.asm
+
+# Prevent some of the more complicated libgcc functions from being
+# compiled. This is because they are generally too big to fit into an
+# AE anyway, so there is no point in having them. Also, some don't
+# compile properly so we'll ignore them for the moment.
+
+LIB1ASMFUNCS = _mulsc3 _divsc3
+LIB1ASMSRC = picochip/libgccExtras/fake_libgcc.asm
+
+# Turn off the building of exception handling libraries.
+LIB2ADDEH =
+LIB2ADDEHDEP =
+
+# Turn off ranlib on target libraries.
+RANLIB_FOR_TARGET = cat
+
+# Special libgcc setup. Make single/double floating point the same,
+# and use our own include files.
+TARGET_LIBGCC2_CFLAGS = -DDF=SF -I../../includes/
+
+# Switch off all debugging for the embedded libraries.
+# (embedded processors need small libraries by default).
+# NOTE: If the debug level is increased, turn off instruction scheduling.
+LIBGCC2_DEBUG_CFLAGS = -g0
+
+# Build all combinations of library for different multiply units, and
+# presence/absence of byte access.
+MULTILIB_OPTIONS = mmul-type=none/mmul-type=mac/mmul-type=mul mno-byte-access/mbyte-access
+
+# Using a mul unit (currently) implies that byte access is available.
+MULTILIB_EXCEPTIONS = mmul-type=mul/mno-byte-access
+
+# We want fine grained libraries, so use the new code
+# to build the floating point emulation libraries.
+FPBIT = fp-bit.c
+
+# Software floating point support. Floating point is not properly
+# supported, but is existence can be useful for some types of testing.
+fp-bit.c:	$(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#define FLOAT_ONLY' >> fp-bit.c
+	echo '#define SMALL_MACHINE' >> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
diff --git a/gcc/config/print-sysroot-suffix.sh b/gcc/config/print-sysroot-suffix.sh
new file mode 100644
index 000000000..55579ad86
--- /dev/null
+++ b/gcc/config/print-sysroot-suffix.sh
@@ -0,0 +1,145 @@
+#! /bin/sh
+# Script to generate SYSROOT_SUFFIX_SPEC equivalent to MULTILIB_OSDIRNAMES
+# Arguments are MULTILIB_OSDIRNAMES, MULTILIB_OPTIONS and MULTILIB_MATCHES.
+
+# Copyright (C) 2009 Free Software Foundation, Inc.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  
+
+# This shell script produces a header file fragment that defines
+# SYSROOT_SUFFIX_SPEC.  It assumes that the sysroots will have the same
+# structure and names used by the multilibs.
+
+# Invocation:
+#   print-sysroot-suffix.sh \
+#          MULTILIB_OSDIRNAMES \
+#          MULTILIB_OPTIONS \
+#          MULTILIB_MATCHES \
+#      > t-sysroot-suffix.h
+
+# The three options exactly correspond to the variables of the same
+# names defined in the tmake_file fragments.
+
+# Example:
+#   sh ./gcc/config/print-sysroot-suffix.sh "a=A" "a b/c/d" ""
+# =>
+#   #undef SYSROOT_SUFFIX_SPEC
+#   #define SYSROOT_SUFFIX_SPEC "" \
+#   "%{a:" \
+#     "%{b:A/b/;" \
+#     "c:A/c/;" \
+#     "d:A/d/;" \
+#     ":A/};" \
+#   ":}"
+
+# The script uses temporary subscripts in order to permit a recursive
+# algorithm without the use of functions.
+
+set -e
+
+dirnames="$1"
+options="$2"
+matches="$3"
+
+cat > print-sysroot-suffix3.sh <<\EOF
+#! /bin/sh
+# Print all the multilib matches for this option
+result="$1"
+EOF
+for x in $matches; do
+  l=`echo $x | sed -e 's/=.*$//' -e 's/?/=/g'`
+  r=`echo $x | sed -e 's/^.*=//' -e 's/?/=/g'`
+  echo "[ \"\$1\" = \"$l\" ] && result=\"\$result|$r\"" >> print-sysroot-suffix3.sh
+done
+echo 'echo $result' >> print-sysroot-suffix3.sh
+chmod +x print-sysroot-suffix3.sh
+
+cat > print-sysroot-suffix2.sh <<\EOF
+#! /bin/sh
+# Recursive script to enumerate all multilib combinations, match against
+# multilib directories and output a spec string of the result.
+# Will fold identical trees.
+
+padding="$1"
+optstring="$2"
+shift 2
+n="\" \\
+$padding\""
+if [ $# = 0 ]; then
+EOF
+
+pat=
+for x in $dirnames; do
+  p=`echo $x | sed -e 's,=!,/$=/,'`
+  pat="$pat -e 's=^//$p='"
+done
+echo '  optstring=`echo "/$optstring" | sed '"$pat\`" >> print-sysroot-suffix2.sh
+cat >> print-sysroot-suffix2.sh <<\EOF
+  case $optstring in
+  //*)
+    ;;
+  *)
+    echo "$optstring"
+    ;;
+  esac
+else
+  thisopt="$1"
+  shift
+  bit=
+  lastcond=
+  result=
+  for x in `echo "$thisopt" | sed -e 's,/, ,g'`; do
+    case $x in
+EOF
+for x in `echo "$options" | sed -e 's,/, ,g'`; do
+  match=`./print-sysroot-suffix3.sh "$x"`
+  echo "$x) optmatch=\"$match\" ;;" >> print-sysroot-suffix2.sh
+done
+cat >> print-sysroot-suffix2.sh <<\EOF
+    esac
+    bit=`"$0" "$padding  " "$optstring$x/" "$@"`
+    if [ -z "$lastopt" ]; then
+      lastopt="$optmatch"
+    else
+      if [ "$lastbit" = "$bit" ]; then
+	lastopt="$lastopt|$optmatch"
+      else
+	result="$result$lastopt:$lastbit;$n"
+	lastopt="$optmatch"
+      fi
+    fi
+    lastbit="$bit"
+  done
+  bit=`"$0" "$padding  " "$optstring" "$@"`
+  if [ "$bit" = "$lastbit" ]; then
+    if [ -z "$result" ]; then
+      echo "$bit"
+    else
+      echo "$n%{$result:$bit}"
+    fi
+  else
+    echo "$n%{$result$lastopt:$lastbit;$n:$bit}"
+  fi
+fi
+EOF
+
+chmod +x ./print-sysroot-suffix2.sh
+result=`./print-sysroot-suffix2.sh "" "/" $options`
+echo "#undef SYSROOT_SUFFIX_SPEC"
+echo "#define SYSROOT_SUFFIX_SPEC \"$result\""
+rm print-sysroot-suffix2.sh
+rm print-sysroot-suffix3.sh
diff --git a/gcc/config/rpath.opt b/gcc/config/rpath.opt
new file mode 100644
index 000000000..c76e2fc59
--- /dev/null
+++ b/gcc/config/rpath.opt
@@ -0,0 +1,29 @@
+; -rpath option to the driver.
+
+; Copyright (C) 2010
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rpath
+Driver Separate
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/rs6000/40x.md b/gcc/config/rs6000/40x.md
new file mode 100644
index 000000000..eaf1222ec
--- /dev/null
+++ b/gcc/config/rs6000/40x.md
@@ -0,0 +1,120 @@
+;; Scheduling description for IBM PowerPC 403 and PowerPC 405  processors.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc40x,ppc40xiu")
+(define_cpu_unit "bpu_40x,fpu_405" "ppc40x")
+(define_cpu_unit "iu_40x" "ppc40xiu")
+
+;; PPC401 / PPC403 / PPC405 32-bit integer only  IU BPU
+;; Embedded PowerPC controller
+;; In-order execution
+;; Max issue two insns/cycle (includes one branch)
+(define_insn_reservation "ppc403-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x,iu_40x")
+
+(define_insn_reservation "ppc403-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x,iu_40x,iu_40x")
+
+(define_insn_reservation "ppc403-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x,nothing,bpu_40x")
+
+(define_insn_reservation "ppc403-imul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc403"))
+  "iu_40x*4")
+
+(define_insn_reservation "ppc405-imul" 5
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc405"))
+  "iu_40x*4")
+
+(define_insn_reservation "ppc405-imul2" 3
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "ppc405"))
+  "iu_40x*2")
+
+(define_insn_reservation "ppc405-imul3" 2
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-idiv" 33
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x*33")
+
+(define_insn_reservation "ppc403-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "iu_40x")
+
+(define_insn_reservation "ppc403-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "bpu_40x")
+
+(define_insn_reservation "ppc403-cr" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc403,ppc405"))
+  "bpu_40x")
+
+(define_insn_reservation "ppc405-float" 11
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,\
+			fpcompare,fp,dmul,sdiv,ddiv")
+       (eq_attr "cpu" "ppc405"))
+  "fpu_405*10")
diff --git a/gcc/config/rs6000/440.md b/gcc/config/rs6000/440.md
new file mode 100644
index 000000000..b329e7897
--- /dev/null
+++ b/gcc/config/rs6000/440.md
@@ -0,0 +1,133 @@
+;; Scheduling description for IBM PowerPC 440 processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; PPC440 Embedded PowerPC controller
+;; dual issue
+;; i_pipe - complex integer / compare / branch
+;; j_pipe - simple integer arithmetic
+;; l_pipe - load-store
+;; f_pipe - floating point arithmetic
+
+(define_automaton "ppc440_core,ppc440_apu")
+(define_cpu_unit "ppc440_i_pipe,ppc440_j_pipe,ppc440_l_pipe" "ppc440_core")
+(define_cpu_unit "ppc440_f_pipe" "ppc440_apu")
+(define_cpu_unit "ppc440_issue_0,ppc440_issue_1" "ppc440_core")
+
+(define_reservation "ppc440_issue" "ppc440_issue_0|ppc440_issue_1")
+
+
+(define_insn_reservation "ppc440-load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-store" 3
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_l_pipe")
+
+(define_insn_reservation "ppc440-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe|ppc440_j_pipe")
+
+(define_insn_reservation "ppc440-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue_0+ppc440_issue_1,\
+   ppc440_i_pipe|ppc440_j_pipe,ppc440_i_pipe|ppc440_j_pipe")
+
+(define_insn_reservation "ppc440-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue_0+ppc440_issue_1,ppc440_i_pipe|ppc440_j_pipe,\
+   ppc440_i_pipe|ppc440_j_pipe,ppc440_i_pipe|ppc440_j_pipe")
+
+(define_insn_reservation "ppc440-imul" 3
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-imul2" 2
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-idiv" 34
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe*33")
+
+(define_insn_reservation "ppc440-branch" 1
+  (and (eq_attr "type" "branch,jmpreg,isync")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-compare" 2
+  (and (eq_attr "type" "cmp,fast_compare,compare,cr_logical,delayed_cr,mfcr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-fpcompare" 3 ; 2
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe+ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-fp" 5
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe")
+
+(define_insn_reservation "ppc440-sdiv" 19
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe*15")
+
+(define_insn_reservation "ppc440-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_f_pipe*29")
+
+(define_insn_reservation "ppc440-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
+(define_insn_reservation "ppc440-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc440"))
+  "ppc440_issue,ppc440_i_pipe")
+
diff --git a/gcc/config/rs6000/476.md b/gcc/config/rs6000/476.md
new file mode 100644
index 000000000..3f50bafa0
--- /dev/null
+++ b/gcc/config/rs6000/476.md
@@ -0,0 +1,142 @@
+;; Scheduling description for IBM PowerPC 476 processor.
+;; Copyright (C) 2009
+;; Free Software Foundation, Inc.
+;; Contributed by Peter Bergner (bergner@vnet.ibm.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; PPC476 Embedded PowerPC controller
+;; 3 issue (476) / 4 issue (476fp)
+;;
+;; i_pipe  - complex integer / compare
+;; lj_pipe - load-store / simple integer arithmetic
+;; b_pipe  - branch pipe
+;; f_pipe  - floating point arithmetic
+
+(define_automaton "ppc476_core,ppc476_apu")
+
+(define_cpu_unit "ppc476_i_pipe,ppc476_lj_pipe,ppc476_b_pipe" "ppc476_core")
+(define_cpu_unit "ppc476_issue_fp,ppc476_f_pipe" "ppc476_apu")
+(define_cpu_unit "ppc476_issue_0,ppc476_issue_1,ppc476_issue_2" "ppc476_core")
+
+(define_reservation "ppc476_issue" "ppc476_issue_0|ppc476_issue_1|ppc476_issue_2")
+(define_reservation "ppc476_issue2" "ppc476_issue_0+ppc476_issue_1\
+				    |ppc476_issue_0+ppc476_issue_2\
+				    |ppc476_issue_1+ppc476_issue_2")
+(define_reservation "ppc476_issue3" "ppc476_issue_0+ppc476_issue_1+ppc476_issue_2")
+
+(define_insn_reservation "ppc476-load" 4
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-store" 4
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpstore" 4
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-simple-integer" 1
+  (and (eq_attr "type" "integer,insert_word,var_shift_rotate,exts,shift")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-complex-integer" 1
+  (and (eq_attr "type" "cmp,cr_logical,delayed_cr,cntlz,isel,isync,sync,trap")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-compare" 4
+  (and (eq_attr "type" "compare,delayed_compare,fast_compare,mfcr,mfcrf,\
+                        mtcr,mfjmpr,mtjmpr,var_delayed_compare")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-imul" 4
+  (and (eq_attr "type" "imul,imul_compare,imul2,imul3")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-idiv" 11
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_i_pipe*11")
+
+(define_insn_reservation "ppc476-branch" 1
+  (and (eq_attr "type" "branch,jmpreg")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue,\
+   ppc476_b_pipe")
+
+(define_insn_reservation "ppc476-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue2,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue3,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe,\
+   ppc476_i_pipe|ppc476_lj_pipe")
+
+(define_insn_reservation "ppc476-fpcompare" 6
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue+ppc476_issue_fp,\
+   ppc476_f_pipe+ppc476_i_pipe")
+
+(define_insn_reservation "ppc476-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,\
+   ppc476_f_pipe")
+
+(define_insn_reservation "ppc476-sdiv" 19
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,
+   ppc476_f_pipe*19")
+
+(define_insn_reservation "ppc476-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc476"))
+  "ppc476_issue_fp,\
+   ppc476_f_pipe*33")
+
diff --git a/gcc/config/rs6000/603.md b/gcc/config/rs6000/603.md
new file mode 100644
index 000000000..a042729a1
--- /dev/null
+++ b/gcc/config/rs6000/603.md
@@ -0,0 +1,143 @@
+;; Scheduling description for PowerPC 603 processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc603,ppc603fp")
+(define_cpu_unit "iu_603" "ppc603")
+(define_cpu_unit "fpu_603" "ppc603fp")
+(define_cpu_unit "lsu_603,bpu_603,sru_603" "ppc603")
+
+;; PPC603/PPC603e 32-bit IU, LSU, FPU, BPU, SRU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+
+;; Branches go straight to the BPU.  All other insns are handled
+;; by a dispatch unit which can issue a max of 2 insns per cycle.
+
+;; The PPC603e user's manual recommends that to reduce branch mispredictions,
+;; the insn that sets CR bits should be separated from the branch insn
+;; that evaluates them; separation by more than 9 insns ensures that the CR
+;; bits will be immediately available for execution.
+;; This could be artificially achieved by exaggerating the latency of
+;; compare insns but at the expense of a poorer schedule.
+
+;; CR insns get executed in the SRU.  Not modelled.
+
+(define_insn_reservation "ppc603-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ux,load_u,load_l")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603")
+
+(define_insn_reservation "ppc603-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603*2")
+
+(define_insn_reservation "ppc603-fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603")
+
+(define_insn_reservation "ppc603-storec" 8
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "ppc603"))
+  "lsu_603")
+
+(define_insn_reservation "ppc603-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603")
+
+(define_insn_reservation "ppc603-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603,iu_603")
+
+(define_insn_reservation "ppc603-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603,iu_603,iu_603")
+
+; This takes 2 or 3 cycles
+(define_insn_reservation "ppc603-imul" 3
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603*2")
+
+(define_insn_reservation "ppc603-imul2" 2
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603*2")
+
+(define_insn_reservation "ppc603-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603*37")
+
+(define_insn_reservation "ppc603-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc603"))
+  "iu_603,nothing,bpu_603")
+
+(define_insn_reservation "ppc603-fpcompare" 3
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc603"))
+  "(fpu_603+iu_603*2),bpu_603")
+
+(define_insn_reservation "ppc603-fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603")
+
+(define_insn_reservation "ppc603-dmul" 4
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603*2")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc603-sdiv" 18
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603*18")
+
+(define_insn_reservation "ppc603-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc603"))
+  "fpu_603*33")
+
+(define_insn_reservation "ppc603-crlogical" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr,mfcr,mtcr")
+       (eq_attr "cpu" "ppc603"))
+  "sru_603")
+
+(define_insn_reservation "ppc603-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc603"))
+  "sru_603")
+
+(define_insn_reservation "ppc603-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr,isync,sync")
+       (eq_attr "cpu" "ppc603"))
+  "sru_603")
+
+(define_insn_reservation "ppc603-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "ppc603"))
+  "bpu_603")
+
diff --git a/gcc/config/rs6000/6xx.md b/gcc/config/rs6000/6xx.md
new file mode 100644
index 000000000..b0de97315
--- /dev/null
+++ b/gcc/config/rs6000/6xx.md
@@ -0,0 +1,275 @@
+;; Scheduling description for PowerPC 604, PowerPC 604e, PowerPC 620,
+;; and PowerPC 630 processors.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc6xx,ppc6xxfp,ppc6xxfp2")
+(define_cpu_unit "iu1_6xx,iu2_6xx,mciu_6xx" "ppc6xx")
+(define_cpu_unit "fpu_6xx" "ppc6xxfp")
+(define_cpu_unit "fpu1_6xx,fpu2_6xx" "ppc6xxfp2")
+(define_cpu_unit "lsu_6xx,bpu_6xx,cru_6xx" "ppc6xx")
+
+;; PPC604  32-bit 2xSCIU, MCIU, LSU, FPU, BPU
+;; PPC604e  32-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU
+;; MCIU used for imul/idiv and moves from/to spr
+;; LSU 2 stage pipelined
+;; FPU 3 stage pipelined
+;; Max issue 4 insns/clock cycle
+
+;; PPC604e is PPC604 with larger caches and a CRU.  In the 604
+;; the CR logical operations are handled in the BPU.
+;; In the 604e, the CRU shares bus with BPU so only one condition
+;; register or branch insn can be issued per clock.  Not modelled.
+
+;; PPC620  64-bit 2xSCIU, MCIU, LSU, FPU, BPU, CRU
+;; PPC630 64-bit 2xSCIU, MCIU, LSU, 2xFPU, BPU, CRU
+;; Max issue 4 insns/clock cycle
+;; Out-of-order execution, in-order completion
+
+;; No following instruction can dispatch in the same cycle as a branch
+;; instruction.  Not modelled.  This is no problem if RCSP is not
+;; enabled since the scheduler stops a schedule when it gets to a branch.
+
+;; Four insns can be dispatched per cycle.
+
+(define_insn_reservation "ppc604-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "lsu_6xx")
+
+(define_insn_reservation "ppc604-fpload" 3
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "lsu_6xx")
+
+(define_insn_reservation "ppc604-store" 3
+  (and (eq_attr "type" "store,fpstore,store_ux,store_u,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "lsu_6xx")
+
+(define_insn_reservation "ppc604-llsc" 3
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "lsu_6xx")
+  
+(define_insn_reservation "ppc630-llsc" 4
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "lsu_6xx")
+  
+(define_insn_reservation "ppc604-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx,iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-imul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc604"))
+  "mciu_6xx*2")
+
+(define_insn_reservation "ppc604e-imul" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc604e"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc620-imul" 5
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*3")
+
+(define_insn_reservation "ppc620-imul2" 4
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*3")
+
+(define_insn_reservation "ppc620-imul3" 3
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*3")
+
+(define_insn_reservation "ppc620-lmul" 7
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*5")
+
+(define_insn_reservation "ppc604-idiv" 20
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "mciu_6xx*19")
+
+(define_insn_reservation "ppc620-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc620"))
+  "mciu_6xx*36")
+
+(define_insn_reservation "ppc630-idiv" 21
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc630"))
+  "mciu_6xx*20")
+
+(define_insn_reservation "ppc620-ldiv" 37
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "mciu_6xx*36")
+
+(define_insn_reservation "ppc604-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "(iu1_6xx|iu2_6xx)")
+
+; FPU PPC604{,e},PPC620
+(define_insn_reservation "ppc604-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx")
+
+(define_insn_reservation "ppc604-fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx")
+
+(define_insn_reservation "ppc604-dmul" 3
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc604-sdiv" 18
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx*18")
+
+(define_insn_reservation "ppc604-ddiv" 32
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "fpu_6xx*32")
+
+(define_insn_reservation "ppc620-ssqrt" 31
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppc620"))
+  "fpu_6xx*31")
+
+(define_insn_reservation "ppc620-dsqrt" 31
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppc620"))
+  "fpu_6xx*31")
+
+
+; 2xFPU PPC630
+(define_insn_reservation "ppc630-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx|fpu2_6xx")
+
+(define_insn_reservation "ppc630-fp" 3
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx|fpu2_6xx")
+
+(define_insn_reservation "ppc630-sdiv" 17
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*17|fpu2_6xx*17")
+
+(define_insn_reservation "ppc630-ddiv" 21
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*21|fpu2_6xx*21")
+
+(define_insn_reservation "ppc630-ssqrt" 18
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*18|fpu2_6xx*18")
+
+(define_insn_reservation "ppc630-dsqrt" 25
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppc630"))
+  "fpu1_6xx*25|fpu2_6xx*25")
+
+(define_insn_reservation "ppc604-mfcr" 3
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc604-mtcr" 2
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "iu1_6xx|iu2_6xx")
+
+(define_insn_reservation "ppc604-crlogical" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc604"))
+  "bpu_6xx")
+
+(define_insn_reservation "ppc604e-crlogical" 2
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc604e,ppc620,ppc630"))
+  "cru_6xx")
+
+(define_insn_reservation "ppc604-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc604-mfjmpr" 3
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc630-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc630"))
+  "mciu_6xx")
+
+(define_insn_reservation "ppc604-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "ppc604,ppc604e,ppc620,ppc630"))
+  "bpu_6xx")
+
+(define_insn_reservation "ppc604-isync" 0
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "bpu_6xx")
+  
+(define_insn_reservation "ppc630-isync" 6
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "bpu_6xx")
+  
+(define_insn_reservation "ppc604-sync" 35
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "ppc604,ppc604e"))
+  "lsu_6xx")
+  
+(define_insn_reservation "ppc630-sync" 26
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "ppc620,ppc630"))
+  "lsu_6xx")
+  
diff --git a/gcc/config/rs6000/7450.md b/gcc/config/rs6000/7450.md
new file mode 100644
index 000000000..ccaa3b20d
--- /dev/null
+++ b/gcc/config/rs6000/7450.md
@@ -0,0 +1,185 @@
+;; Scheduling description for Motorola PowerPC 7450 processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc7450,ppc7450mciu,ppc7450fp,ppc7450vec")
+(define_cpu_unit "iu1_7450,iu2_7450,iu3_7450" "ppc7450")
+(define_cpu_unit "mciu_7450" "ppc7450mciu")
+(define_cpu_unit "fpu_7450" "ppc7450fp")
+(define_cpu_unit "lsu_7450,bpu_7450" "ppc7450")
+(define_cpu_unit "du1_7450,du2_7450,du3_7450" "ppc7450")
+(define_cpu_unit "vecsmpl_7450,veccmplx_7450,vecflt_7450,vecperm_7450" "ppc7450vec")
+(define_cpu_unit "vdu1_7450,vdu2_7450" "ppc7450vec")
+
+
+;; PPC7450  32-bit 3xIU, MCIU, LSU, SRU, FPU, BPU, 4xVEC
+;; IU1,IU2,IU3 can perform all integer operations
+;; MCIU performs imul and idiv, cr logical, SPR moves
+;; LSU 2 stage pipelined
+;; FPU 3 stage pipelined
+;; It also has 4 vector units, one for each type of vector instruction.
+;; However, we can only dispatch 2 instructions per cycle. 
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+;; In-order execution
+
+;; Branches go straight to the BPU.  All other insns are handled
+;; by a dispatch unit which can issue a max of 3 insns per cycle.
+(define_reservation "ppc7450_du" "du1_7450|du2_7450|du3_7450")
+(define_reservation "ppc7450_vec_du" "vdu1_7450|vdu2_7450")
+
+(define_insn_reservation "ppc7450-load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\
+		        load_ux,load_u,vecload")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,vecstore")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450*3")
+
+(define_insn_reservation "ppc7450-llsc" 3
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-sync" 35
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,lsu_7450")
+
+(define_insn_reservation "ppc7450-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,iu1_7450|iu2_7450|iu3_7450")
+
+(define_insn_reservation "ppc7450-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,iu1_7450|iu2_7450|iu3_7450,iu1_7450|iu2_7450|iu3_7450")
+
+(define_insn_reservation "ppc7450-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,iu1_7450|iu2_7450|iu3_7450,\
+   iu1_7450|iu2_7450|iu3_7450,iu1_7450|iu2_7450|iu3_7450")
+
+(define_insn_reservation "ppc7450-imul" 4
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450*2")
+
+(define_insn_reservation "ppc7450-imul2" 3
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450")
+
+(define_insn_reservation "ppc7450-idiv" 23
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450*23")
+
+(define_insn_reservation "ppc7450-compare" 2
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,(iu1_7450|iu2_7450|iu3_7450)")
+
+(define_insn_reservation "ppc7450-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450")
+
+(define_insn_reservation "ppc7450-fp" 5
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc7450-sdiv" 21
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450*21")
+
+(define_insn_reservation "ppc7450-ddiv" 35
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,fpu_7450*35")
+
+(define_insn_reservation "ppc7450-mfcr" 2
+  (and (eq_attr "type" "mfcr,mtcr")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450")
+
+(define_insn_reservation "ppc7450-crlogical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,mciu_7450")
+
+(define_insn_reservation "ppc7450-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc7450"))
+  "nothing,mciu_7450*2")
+
+(define_insn_reservation "ppc7450-mfjmpr" 3
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc7450"))
+  "nothing,mciu_7450*2")
+
+(define_insn_reservation "ppc7450-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc7450"))
+  "nothing,bpu_7450")
+
+;; Altivec
+(define_insn_reservation "ppc7450-vecsimple" 1
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,vecsmpl_7450")
+
+(define_insn_reservation "ppc7450-veccomplex" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,veccmplx_7450")
+
+(define_insn_reservation "ppc7450-veccmp" 2
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,veccmplx_7450")
+
+(define_insn_reservation "ppc7450-vecfloat" 4
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,vecflt_7450")
+
+(define_insn_reservation "ppc7450-vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppc7450"))
+  "ppc7450_du,ppc7450_vec_du,vecperm_7450")
+
diff --git a/gcc/config/rs6000/750cl.h b/gcc/config/rs6000/750cl.h
new file mode 100644
index 000000000..0fa169845
--- /dev/null
+++ b/gcc/config/rs6000/750cl.h
@@ -0,0 +1,30 @@
+/* Enable 750cl paired single support.
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Revital Eres (eres@il.ibm.com)
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_PAIRED_FLOAT
+#define TARGET_PAIRED_FLOAT rs6000_paired_float
+
+#undef ASM_CPU_SPEC 
+#define ASM_CPU_SPEC "-m750cl"
+
diff --git a/gcc/config/rs6000/7xx.md b/gcc/config/rs6000/7xx.md
new file mode 100644
index 000000000..edbde75c2
--- /dev/null
+++ b/gcc/config/rs6000/7xx.md
@@ -0,0 +1,184 @@
+;; Scheduling description for Motorola PowerPC 750 and PowerPC 7400 processors.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc7xx,ppc7xxfp")
+(define_cpu_unit "iu1_7xx,iu2_7xx" "ppc7xx")
+(define_cpu_unit "fpu_7xx" "ppc7xxfp")
+(define_cpu_unit "lsu_7xx,bpu_7xx,sru_7xx" "ppc7xx")
+(define_cpu_unit "du1_7xx,du2_7xx" "ppc7xx")
+(define_cpu_unit "veccmplx_7xx,vecperm_7xx,vdu_7xx" "ppc7xx")
+
+;; PPC740/PPC750/PPC7400  32-bit 2xIU, LSU, SRU, FPU, BPU
+;; IU1 can perform all integer operations
+;; IU2 can perform all integer operations except imul and idiv
+;; LSU 2 stage pipelined
+;; FPU 3 stage pipelined
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+;; In-order execution
+
+
+;; The PPC750 user's manual recommends that to reduce branch mispredictions,
+;; the insn that sets CR bits should be separated from the branch insn
+;; that evaluates them.  There is no advantage have more than 10 cycles
+;; of separation.
+;; This could be artificially achieved by exaggerating the latency of
+;; compare insns but at the expense of a poorer schedule.
+
+;; Branches go straight to the BPU.  All other insns are handled
+;; by a dispatch unit which can issue a max of 2 insns per cycle.
+(define_reservation "ppc750_du" "du1_7xx|du2_7xx")
+(define_reservation "ppc7400_vec_du" "vdu_7xx")
+
+(define_insn_reservation "ppc750-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\
+		        load_ux,load_u,fpload,fpload_ux,fpload_u,\
+			vecload,load_l")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,lsu_7xx")
+
+(define_insn_reservation "ppc750-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,\
+		        fpstore,fpstore_ux,fpstore_u,vecstore")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,lsu_7xx")
+
+(define_insn_reservation "ppc750-storec" 8
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,lsu_7xx")
+
+(define_insn_reservation "ppc750-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx|iu2_7xx")
+
+(define_insn_reservation "ppc750-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx")
+
+(define_insn_reservation "ppc750-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx,iu1_7xx|iu2_7xx")
+
+(define_insn_reservation "ppc750-imul" 4
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx*4")
+
+(define_insn_reservation "ppc750-imul2" 3
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx*2")
+
+(define_insn_reservation "ppc750-imul3" 2
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx")
+
+(define_insn_reservation "ppc750-idiv" 19
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx*19")
+
+(define_insn_reservation "ppc750-compare" 2
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,(iu1_7xx|iu2_7xx)")
+
+(define_insn_reservation "ppc750-fpcompare" 2
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx")
+
+(define_insn_reservation "ppc750-fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx")
+
+(define_insn_reservation "ppc750-dmul" 4
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc750"))
+  "ppc750_du,fpu_7xx*2")
+
+(define_insn_reservation "ppc7400-dmul" 3
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,fpu_7xx")
+
+; Divides are not pipelined
+(define_insn_reservation "ppc750-sdiv" 17
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx*17")
+
+(define_insn_reservation "ppc750-ddiv" 31
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,fpu_7xx*31")
+
+(define_insn_reservation "ppc750-mfcr" 2
+  (and (eq_attr "type" "mfcr,mtcr")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "ppc750_du,iu1_7xx")
+
+(define_insn_reservation "ppc750-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,sru_7xx*2")
+
+(define_insn_reservation "ppc750-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr,isync,sync")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,sru_7xx*2")
+
+(define_insn_reservation "ppc750-mfjmpr" 3
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,sru_7xx*2")
+
+(define_insn_reservation "ppc750-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc750,ppc7400"))
+  "nothing,bpu_7xx")
+
+;; Altivec
+(define_insn_reservation "ppc7400-vecsimple" 1
+  (and (eq_attr "type" "vecsimple,veccmp")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,veccmplx_7xx")
+
+(define_insn_reservation "ppc7400-veccomplex" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,veccmplx_7xx")
+
+(define_insn_reservation "ppc7400-vecfloat" 4
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,veccmplx_7xx")
+
+(define_insn_reservation "ppc7400-vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppc7400"))
+  "ppc750_du,ppc7400_vec_du,vecperm_7xx")
+
diff --git a/gcc/config/rs6000/8540.md b/gcc/config/rs6000/8540.md
new file mode 100644
index 000000000..4096dff43
--- /dev/null
+++ b/gcc/config/rs6000/8540.md
@@ -0,0 +1,250 @@
+;; Pipeline description for Motorola PowerPC 8540 processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppc8540_most,ppc8540_long,ppc8540_retire")
+(define_cpu_unit "ppc8540_decode_0,ppc8540_decode_1" "ppc8540_most")
+
+;; We don't simulate general issue queue (GIC).  If we have SU insn
+;; and then SU1 insn, they cannot be issued on the same cycle
+;; (although SU1 insn and then SU insn can be issued) because the SU
+;; insn will go to SU1 from GIC0 entry.  Fortunately, the first cycle
+;; multipass insn scheduling will find the situation and issue the SU1
+;; insn and then the SU insn.
+(define_cpu_unit "ppc8540_issue_0,ppc8540_issue_1"   "ppc8540_most")
+
+;; We could describe completion buffers slots in combination with the
+;; retirement units and the order of completion but the result
+;; automaton would behave in the same way because we cannot describe
+;; real latency time with taking in order completion into account.
+;; Actually we could define the real latency time by querying reserved
+;; automaton units but the current scheduler uses latency time before
+;; issuing insns and making any reservations.
+;;
+;; So our description is aimed to achieve a insn schedule in which the
+;; insns would not wait in the completion buffer.
+(define_cpu_unit "ppc8540_retire_0,ppc8540_retire_1" "ppc8540_retire")
+
+;; Branch unit:
+(define_cpu_unit "ppc8540_bu" "ppc8540_most")
+
+;; SU:
+(define_cpu_unit "ppc8540_su0_stage0,ppc8540_su1_stage0" "ppc8540_most")
+
+;; We could describe here MU subunits for float multiply, float add
+;; etc.  But the result automaton would behave the same way as the
+;; described one pipeline below because MU can start only one insn
+;; per cycle.  Actually we could simplify the automaton more not
+;; describing stages 1-3, the result automata would be the same.
+(define_cpu_unit "ppc8540_mu_stage0,ppc8540_mu_stage1" "ppc8540_most")
+(define_cpu_unit "ppc8540_mu_stage2,ppc8540_mu_stage3" "ppc8540_most")
+
+;; The following unit is used to describe non-pipelined division.
+(define_cpu_unit "ppc8540_mu_div" "ppc8540_long")
+
+;; Here we simplified LSU unit description not describing the stages.
+(define_cpu_unit "ppc8540_lsu" "ppc8540_most")
+
+;; The following units are used to make automata deterministic
+(define_cpu_unit "present_ppc8540_decode_0" "ppc8540_most")
+(define_cpu_unit "present_ppc8540_issue_0" "ppc8540_most")
+(define_cpu_unit "present_ppc8540_retire_0" "ppc8540_retire")
+(define_cpu_unit "present_ppc8540_su0_stage0" "ppc8540_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_ppc8540_decode_0" "ppc8540_decode_0")
+(presence_set "present_ppc8540_issue_0" "ppc8540_issue_0")
+(presence_set "present_ppc8540_retire_0" "ppc8540_retire_0")
+(presence_set "present_ppc8540_su0_stage0" "ppc8540_su0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "ppc8540_decode"
+    "ppc8540_decode_0|ppc8540_decode_1+present_ppc8540_decode_0")
+(define_reservation "ppc8540_issue"
+    "ppc8540_issue_0|ppc8540_issue_1+present_ppc8540_issue_0")
+(define_reservation "ppc8540_retire"
+   "ppc8540_retire_0|ppc8540_retire_1+present_ppc8540_retire_0")
+(define_reservation "ppc8540_su_stage0"
+   "ppc8540_su0_stage0|ppc8540_su1_stage0+present_ppc8540_su0_stage0")
+
+;; Simple SU insns
+(define_insn_reservation "ppc8540_su" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\
+                        delayed_compare,var_delayed_compare,fast_compare,\
+                        shift,trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+(define_insn_reservation "ppc8540_two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
+   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+(define_insn_reservation "ppc8540_three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
+   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
+   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Branch.  Actually this latency time is not used by the scheduler.
+(define_insn_reservation "ppc8540_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_bu,ppc8540_retire")
+
+;; Multiply
+(define_insn_reservation "ppc8540_multiply" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; Divide.  We use the average latency time here.  We omit reserving a
+;; retire unit because of the result automata will be huge.  We ignore
+;; reservation of miu_stage3 here because we use the average latency
+;; time.
+(define_insn_reservation "ppc8540_divide" 14
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
+   ppc8540_mu_div*13")
+
+;; CR logical
+(define_insn_reservation "ppc8540_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_bu,ppc8540_retire")
+
+;; Mfcr
+(define_insn_reservation "ppc8540_mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Mtcrf
+(define_insn_reservation "ppc8540_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Mtjmpr
+(define_insn_reservation "ppc8540_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Loads
+(define_insn_reservation "ppc8540_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
+
+;; Stores.
+(define_insn_reservation "ppc8540_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
+
+;; Simple FP
+(define_insn_reservation "ppc8540_simple_float" 1
+  (and (eq_attr "type" "fpsimple")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; FP
+(define_insn_reservation "ppc8540_float" 4
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; float divides.  We omit reserving a retire unit and miu_stage3
+;; because of the result automata will be huge.
+(define_insn_reservation "ppc8540_float_vector_divide" 29
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
+   ppc8540_mu_div*28")
+
+;; Brinc
+(define_insn_reservation "ppc8540_brinc" 1
+  (and (eq_attr "type" "brinc")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Simple vector
+(define_insn_reservation "ppc8540_simple_vector" 1
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Simple vector compare
+(define_insn_reservation "ppc8540_simple_vector_compare" 1
+  (and (eq_attr "type" "veccmpsimple")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")
+
+;; Vector compare
+(define_insn_reservation "ppc8540_vector_compare" 1
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; evsplatfi evsplati
+(define_insn_reservation "ppc8540_vector_perm" 1
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")
+
+;; Vector float
+(define_insn_reservation "ppc8540_float_vector" 4
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; Vector divides: Use the average.  We omit reserving a retire unit
+;; because of the result automata will be huge.  We ignore reservation
+;; of miu_stage3 here because we use the average latency time.
+(define_insn_reservation "ppc8540_vector_divide" 14
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
+   ppc8540_mu_div*13")
+
+;; Complex vector.
+(define_insn_reservation "ppc8540_complex_vector" 4
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
+   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")
+
+;; Vector load
+(define_insn_reservation "ppc8540_vector_load" 3
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
+
+;; Vector store
+(define_insn_reservation "ppc8540_vector_store" 3
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "ppc8540"))
+  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")
diff --git a/gcc/config/rs6000/a2.md b/gcc/config/rs6000/a2.md
new file mode 100644
index 000000000..851d8949f
--- /dev/null
+++ b/gcc/config/rs6000/a2.md
@@ -0,0 +1,134 @@
+;; Scheduling description for PowerPC A2 processors.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;; Contributed by Ben Elliston (bje@au.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppca2")
+
+;; CPU units
+
+;; The multiplier pipeline.
+(define_cpu_unit "mult" "ppca2")
+
+;; The auxillary processor unit (FP/vector unit).
+(define_cpu_unit "axu" "ppca2")
+
+;; D.4.6
+;; Some peculiarities for certain SPRs
+
+(define_insn_reservation "ppca2-mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppca2"))
+   "nothing")
+
+(define_insn_reservation "ppca2-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+(define_insn_reservation "ppca2-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; D.4.8
+(define_insn_reservation "ppca2-imul" 1
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; FIXME: latency and multiplier reservation for 64-bit multiply?
+(define_insn_reservation "ppca2-lmul" 6
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "ppca2"))
+  "mult*3")
+
+;; D.4.9
+(define_insn_reservation "ppca2-idiv" 32
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppca2"))
+  "mult*32")
+
+(define_insn_reservation "ppca2-ldiv" 65
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "ppca2"))
+  "mult*65")
+
+;; D.4.13
+(define_insn_reservation "ppca2-load" 5
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "ppca2"))
+  "nothing")
+
+;; D.8.1
+(define_insn_reservation "ppca2-fp" 6
+  (and (eq_attr "type" "fp")     	   ;; Ignore fpsimple insn types (SPE only).
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.4
+(define_insn_reservation "ppca2-fp-load" 6
+  (and (eq_attr "type" "fpload,fpload_u,fpload_ux")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.5
+(define_insn_reservation "ppca2-fp-store" 2
+  (and (eq_attr "type" "fpstore,fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+;; D.8.6
+(define_insn_reservation "ppca2-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppca2"))
+ "axu")
+
+;; D.8.7
+;;
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed.  Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-ddiv" 72
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppca2"))
+   "axu")
+
+(define_insn_reservation "ppca2-sdiv" 59
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppca2"))
+   "axu")
+
+;; D.8.8
+;; 
+;; Instructions from the same thread succeeding the floating-point
+;; divide cannot be executed until the floating-point divide has
+;; completed.  Since there is nothing else we can do, this thread will
+;; just have to stall.
+
+(define_insn_reservation "ppca2-dsqrt" 69
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
+
+(define_insn_reservation "ppca2-ssqrt" 65
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppca2"))
+  "axu")
diff --git a/gcc/config/rs6000/aix-stdint.h b/gcc/config/rs6000/aix-stdint.h
new file mode 100644
index 000000000..8b20c152a
--- /dev/null
+++ b/gcc/config/rs6000/aix-stdint.h
@@ -0,0 +1,51 @@
+/* Definitions for <stdint.h> types on systems using AIX.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define SIG_ATOMIC_TYPE "int"
+
+#define INT8_TYPE "signed char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "signed char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "signed char"
+#define INT_FAST16_TYPE "short int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "short unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE "long int"
+#define UINTPTR_TYPE "long unsigned int"
+
diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h
new file mode 100644
index 000000000..7f60d329b
--- /dev/null
+++ b/gcc/config/rs6000/aix.h
@@ -0,0 +1,260 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Yes!  We are AIX!  */
+#define DEFAULT_ABI ABI_AIX
+#undef  TARGET_AIX
+#define TARGET_AIX 1
+
+/* Linux64.h wants to redefine TARGET_AIX based on -m64, but it can't be used
+   in the #if conditional in options-default.h, so provide another macro.  */
+#undef  TARGET_AIX_OS
+#define TARGET_AIX_OS 1
+
+/* AIX always has a TOC.  */
+#define TARGET_NO_TOC 0
+#define TARGET_TOC 1
+#define FIXED_R2 1
+
+/* AIX allows r13 to be used in 32-bit mode.  */
+#define FIXED_R13 0
+
+/* 32-bit and 64-bit AIX stack boundary is 128.  */
+#undef  STACK_BOUNDARY
+#define STACK_BOUNDARY 128
+
+#undef  TARGET_IEEEQUAD
+#define TARGET_IEEEQUAD 0
+
+/* The AIX linker will discard static constructors in object files before
+   collect has a chance to see them, so scan the object files directly.  */
+#define COLLECT_EXPORT_LIST
+
+#if HAVE_AS_REF
+/* Issue assembly directives that create a reference to the given DWARF table
+   identifier label from the current function section.  This is defined to
+   ensure we drag frame frame tables associated with needed function bodies in
+   a link with garbage collection activated.  */
+#define ASM_OUTPUT_DWARF_TABLE_REF rs6000_aix_asm_output_dwarf_table_ref
+#endif
+
+/* This is the only version of nm that collect2 can work with.  */
+#define REAL_NM_FILE_NAME "/usr/ucb/nm"
+
+#define USER_LABEL_PREFIX  ""
+
+/* Don't turn -B into -L if the argument specifies a relative file name.  */
+#define RELATIVE_PREFIX_NOT_LINKDIR
+
+/* Because of the above, we must have gcc search itself to find libgcc.a.  */
+#define LINK_LIBGCC_SPECIAL_1
+
+#define MFWRAP_SPEC " %{static: %{fmudflap|fmudflapth: \
+ -brename:malloc,__wrap_malloc -brename:__real_malloc,malloc \
+ -brename:free,__wrap_free -brename:__real_free,free \
+ -brename:calloc,__wrap_calloc -brename:__real_calloc,calloc \
+ -brename:realloc,__wrap_realloc -brename:__real_realloc,realloc \
+ -brename:mmap,__wrap_mmap -brename:__real_mmap,mmap \
+ -brename:munmap,__wrap_munmap -brename:__real_munmap,munmap \
+ -brename:alloca,__wrap_alloca -brename:__real_alloca,alloca \
+} %{fmudflapth: \
+ -brename:pthread_create,__wrap_pthread_create \
+ -brename:__real_pthread_create,pthread_create \
+ -brename:pthread_join,__wrap_pthread_join \
+ -brename:__real_pthread_join,pthread_join \
+ -brename:pthread_exit,__wrap_pthread_exit \
+ -brename:__real_pthread_exit,pthread_exit \
+}} %{fmudflap|fmudflapth: \
+ -brename:main,__wrap_main -brename:__real_main,main \
+}"
+
+#define MFLIB_SPEC " %{fmudflap: -lmudflap \
+ %{static:%(link_gcc_c_sequence) -lmudflap}} \
+ %{fmudflapth: -lmudflapth -lpthread \
+ %{static:%(link_gcc_c_sequence) -lmudflapth}} "
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_OS_AIX_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("_IBMR2");		\
+      builtin_define ("_POWER");		\
+      builtin_define ("_AIX");			\
+      builtin_define ("_AIX32");		\
+      builtin_define ("_AIX41");		\
+      builtin_define ("_LONG_LONG");		\
+      if (TARGET_LONG_DOUBLE_128)		\
+        builtin_define ("__LONGDOUBLE128");	\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=aix");		\
+    }						\
+  while (0)
+
+/* Define appropriate architecture macros for preprocessor depending on
+   target switches.  */
+
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}\
+   %{ansi: -D_ANSI_C_SOURCE}"
+
+#define CC1_SPEC "%(cc1_cpu)"
+
+#undef ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC ""
+
+/* Tell the assembler to assume that all undefined names are external.
+
+   Don't do this until the fixed IBM assembler is more generally available.
+   When this becomes permanently defined, the ASM_OUTPUT_EXTERNAL,
+   ASM_OUTPUT_EXTERNAL_LIBCALL, and RS6000_OUTPUT_BASENAME macros will no
+   longer be needed.  Also, the extern declaration of mcount in 
+   rs6000_xcoff_file_start will no longer be needed.  */
+
+/* #define ASM_SPEC "-u %(asm_cpu)" */
+
+/* Default location of syscalls.exp under AIX */
+#define LINK_SYSCALLS_SPEC "-bI:%R/lib/syscalls.exp"
+
+/* Default location of libg.exp under AIX */
+#define LINK_LIBG_SPEC "-bexport:%R/usr/lib/libg.exp"
+
+/* Define the options for the binder: Start text at 512, align all segments
+   to 512 bytes, and warn if there is text relocation.
+
+   The -bhalt:4 option supposedly changes the level at which ld will abort,
+   but it also suppresses warnings about multiply defined symbols and is
+   used by the AIX cc command.  So we use it here.
+
+   -bnodelcsect undoes a poor choice of default relating to multiply-defined
+   csects.  See AIX documentation for more information about this.
+
+   -bM:SRE tells the linker that the output file is Shared REusable.  Note
+   that to actually build a shared library you will also need to specify an
+   export list with the -Wl,-bE option.  */
+
+#define LINK_SPEC "-T512 -H512 %{!r:-btextro} -bhalt:4 -bnodelcsect\
+%{static:-bnso %(link_syscalls) } \
+%{!shared:%{g*: %(link_libg) }} %{shared:-bM:SRE}"
+
+/* Profiled library versions are used by linking with special directories.  */
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+%{p:-L%R/lib/profiled -L%R/usr/lib/profiled} %{!shared:%{g*:-lg}} -lc"
+
+/* Static linking with shared libstdc++ requires libsupc++ as well.  */
+#define LIBSTDCXX_STATIC "supc++"
+
+/* This now supports a natural alignment mode.  */
+/* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+  ((TARGET_ALIGN_NATURAL == 0						\
+    && TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode)	\
+   ? MIN ((COMPUTED), 32)						\
+   : (COMPUTED))
+
+/* AIX increases natural record alignment to doubleword if the first
+   field is an FP double while the FP fields remain word aligned.  */
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			\
+  ((TREE_CODE (STRUCT) == RECORD_TYPE					\
+    || TREE_CODE (STRUCT) == UNION_TYPE					\
+    || TREE_CODE (STRUCT) == QUAL_UNION_TYPE)				\
+   && TARGET_ALIGN_NATURAL == 0						\
+   ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED)	\
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* The AIX ABI isn't explicit on whether aggregates smaller than a
+   word/doubleword should be padded upward or downward.  One could
+   reasonably assume that they follow the normal rules for structure
+   layout treating the parameter area as any other block of memory,
+   then map the reg param area to registers, i.e., pad upward, which
+   is the way IBM Compilers for AIX behave.
+   Setting both of the following defines results in this behavior.  */
+#define AGGREGATE_PADDING_FIXED 1
+#define AGGREGATES_PAD_UPWARD_ALWAYS 1
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+/* Indicate that jump tables go in the text section.  */
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Define any extra SPECS that the compiler needs to generate.  */
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "link_syscalls",            LINK_SYSCALLS_SPEC },			\
+  { "link_libg",                LINK_LIBG_SPEC }
+
+/* Define cutoff for using external functions to save floating point.  */
+#define FP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) == 62 || (FIRST_REG) == 63)
+/* And similarly for general purpose registers.  */
+#define GP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 32)
+
+/* If the current unwind info (FS) does not contain explicit info
+   saving R2, then we have to do a minor amount of code reading to
+   figure out if it was saved.  The big problem here is that the
+   code that does the save/restore is generated by the linker, so
+   we have no good way to determine at compile time what to do.  */
+
+#define R_LR 65
+
+#ifdef __64BIT__
+#define MD_FROB_UPDATE_CONTEXT(CTX, FS)					\
+  do {									\
+    if ((FS)->regs.reg[2].how == REG_UNSAVED)				\
+      {									\
+	unsigned int *insn						\
+	  = (unsigned int *)						\
+	    _Unwind_GetGR ((CTX), R_LR);				\
+	if (*insn == 0xE8410028)					\
+	  _Unwind_SetGRPtr ((CTX), 2, (CTX)->cfa + 40);			\
+      }									\
+  } while (0)
+#else
+#define MD_FROB_UPDATE_CONTEXT(CTX, FS)					\
+  do {									\
+    if ((FS)->regs.reg[2].how == REG_UNSAVED)				\
+      {									\
+	unsigned int *insn						\
+	  = (unsigned int *)						\
+	    _Unwind_GetGR ((CTX), R_LR);				\
+	if (*insn == 0x80410014)					\
+	  _Unwind_SetGRPtr ((CTX), 2, (CTX)->cfa + 20);			\
+      }									\
+  } while (0)
+#endif
+
+#define PROFILE_HOOK(LABEL)   output_profile_hook (LABEL)
+
+/* Print subsidiary information on the compiler version in use.  */
+#define TARGET_VERSION ;
+
+/* No version of AIX fully supports AltiVec or 64-bit instructions in
+   32-bit mode.  */
+#define OS_MISSING_POWERPC64 1
+#define OS_MISSING_ALTIVEC 1
+
+/* WINT_TYPE */
+#define WINT_TYPE "int"
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc/config/rs6000/aix43.h b/gcc/config/rs6000/aix43.h
new file mode 100644
index 000000000..8e285decb
--- /dev/null
+++ b/gcc/config/rs6000/aix43.h
@@ -0,0 +1,185 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX version 4.3.
+   Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006,
+   2007, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define NON_POWERPC_MASKS (MASK_POWER | MASK_POWER2)
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && (target_flags & NON_POWERPC_MASKS))		\
+    {									\
+      target_flags &= ~NON_POWERPC_MASKS;				\
+      warning (0, "-maix64 and POWER architecture are incompatible");	\
+    }									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      target_flags |= MASK_POWERPC64;					\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (rs6000_explicit_options.long_double)				\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpower: %{!mpower2: -mpwr}} \
+  %{mpower2: -mpwr2} \
+  %{mpowerpc*: %{!mpowerpc64: -mppc}} \
+  %{mpowerpc64: -mppc64} \
+  %{!mpower*: %{!mpowerpc*: %(asm_default)}}}} \
+%{mcpu=common: -mcom} \
+%{mcpu=power: -mpwr} \
+%{mcpu=power2: -mpwr2} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -m620} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rios: -mpwr} \
+%{mcpu=rios1: -mpwr} \
+%{mcpu=rios2: -mpwr2} \
+%{mcpu=rsc: -mpwr} \
+%{mcpu=rsc1: -mpwr} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=601: -m601} \
+%{mcpu=602: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mcom"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}\
+   %{ansi: -D_ANSI_C_SOURCE}\
+   %{maix64: -D__64BIT__}\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}\
+   %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  */
+#undef CPLUSPLUS_CPP_SPEC			
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_NEW_MNEMONICS
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604e
+
+/* AIX does not support Altivec.  */
+#undef  TARGET_ALTIVEC
+#define TARGET_ALTIVEC 0
+#undef  TARGET_ALTIVEC_ABI
+#define TARGET_ALTIVEC_ABI 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+#define	MULTILIB_DEFAULTS { "mcpu=common" }
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-L%R/usr/lib/threads -lpthreads -lc_r %R/usr/lib/libc.a}\
+   %{!pthread:-lc}"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX 4.3 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* AIX 4 uses PowerPC nop (ori 0,0,0) instruction as call glue for PowerPC
+   and "cror 31,31,31" for POWER architecture.  */
+
+#undef RS6000_CALL_GLUE
+#define RS6000_CALL_GLUE "{cror 31,31,31|nop}"
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* The IBM AIX 4.x assembler doesn't support forward references in
+   .set directives.  We handle this by deferring the output of .set
+   directives to the end of the compilation unit.  */
+#define TARGET_DEFERRED_OUTPUT_DEFS(DECL,TARGET) true
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+#define TARGET_AIX_VERSION 43
diff --git a/gcc/config/rs6000/aix51.h b/gcc/config/rs6000/aix51.h
new file mode 100644
index 000000000..90d504f61
--- /dev/null
+++ b/gcc/config/rs6000/aix51.h
@@ -0,0 +1,189 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V5.
+   Copyright (C) 2001, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define NON_POWERPC_MASKS (MASK_POWER | MASK_POWER2)
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && (target_flags & NON_POWERPC_MASKS))		\
+    {									\
+      target_flags &= ~NON_POWERPC_MASKS;				\
+      warning (0, "-maix64 and POWER architecture are incompatible");	\
+    }									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      target_flags |= MASK_POWERPC64;					\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpower: %{!mpower2: -mpwr}} \
+  %{mpower2: -mpwr2} \
+  %{mpowerpc*: %{!mpowerpc64: -mppc}} \
+  %{mpowerpc64: -mppc64} \
+  %{!mpower*: %{!mpowerpc*: %(asm_default)}}}} \
+%{mcpu=common: -mcom} \
+%{mcpu=power: -mpwr} \
+%{mcpu=power2: -mpwr2} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -m620} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rios: -mpwr} \
+%{mcpu=rios1: -mpwr} \
+%{mcpu=rios2: -mpwr2} \
+%{mcpu=rsc: -mpwr} \
+%{mcpu=rsc1: -mpwr} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=601: -m601} \
+%{mcpu=602: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m620} \
+%{mcpu=G5: -m620}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mcom"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_NEW_MNEMONICS
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604e
+
+/* AIX does not support Altivec.  */
+#undef  TARGET_ALTIVEC
+#define TARGET_ALTIVEC 0
+#undef  TARGET_ALTIVEC_ABI
+#define TARGET_ALTIVEC_ABI 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+#define	MULTILIB_DEFAULTS { "mcpu=common" }
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX V5 uses PowerPC nop (ori 0,0,0) instruction as call glue for PowerPC
+   and "cror 31,31,31" for POWER architecture.  */
+
+#undef RS6000_CALL_GLUE
+#define RS6000_CALL_GLUE "{cror 31,31,31|nop}"
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 51
diff --git a/gcc/config/rs6000/aix52.h b/gcc/config/rs6000/aix52.h
new file mode 100644
index 000000000..a0fa21886
--- /dev/null
+++ b/gcc/config/rs6000/aix52.h
@@ -0,0 +1,199 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V5.2.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define NON_POWERPC_MASKS (MASK_POWER | MASK_POWER2)
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && (target_flags & NON_POWERPC_MASKS))		\
+    {									\
+      target_flags &= ~NON_POWERPC_MASKS;				\
+      warning (0, "-maix64 and POWER architecture are incompatible");	\
+    }									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      target_flags |= MASK_POWERPC64;					\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (rs6000_explicit_options.long_double)				\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpowerpc64: -mppc64} \
+  %{!mpower64: %(asm_default)}}} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -m620} \
+%{mcpu=power5: -m620} \
+%{mcpu=power5+: -m620} \
+%{mcpu=power6: -m620} \
+%{mcpu=power6x: -m620} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m620} \
+%{mcpu=G5: -m620}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      builtin_define ("_AIX52");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  Synchronize with libstdc++ os_defines.h.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS)
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER4
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER4
+
+#undef  TARGET_POWER
+#define TARGET_POWER 0
+
+/* AIX does not support Altivec.  */
+#undef  TARGET_ALTIVEC
+#define TARGET_ALTIVEC 0
+#undef  TARGET_ALTIVEC_ABI
+#define TARGET_ALTIVEC_ABI 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX V5 uses PowerPC nop (ori 0,0,0) instruction as call glue for PowerPC
+   and "cror 31,31,31" for POWER architecture.  */
+
+#undef RS6000_CALL_GLUE
+#define RS6000_CALL_GLUE "{cror 31,31,31|nop}"
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* AIX 5.2 has the float and long double forms of math functions.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS  1
+
+#ifndef _AIX52
+extern long long int    atoll(const char *);  
+#endif
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 52
diff --git a/gcc/config/rs6000/aix53.h b/gcc/config/rs6000/aix53.h
new file mode 100644
index 000000000..381e0d662
--- /dev/null
+++ b/gcc/config/rs6000/aix53.h
@@ -0,0 +1,199 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V5.3.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define NON_POWERPC_MASKS (MASK_POWER | MASK_POWER2)
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && (target_flags & NON_POWERPC_MASKS))		\
+    {									\
+      target_flags &= ~NON_POWERPC_MASKS;				\
+      warning (0, "-maix64 and POWER architecture are incompatible");	\
+    }									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      target_flags |= MASK_POWERPC64;					\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (rs6000_explicit_options.long_double)				\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets for
+   handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
+   provide the default assembler options if the user uses -mcpu=native, so if
+   you make changes here, make them there also.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpowerpc64: -mppc64} \
+  %{maltivec: -m970} \
+  %{!maltivec: %{!mpower64: %(asm_default)}}}} \
+%{mcpu=native: %(asm_cpu_native)} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -mpwr4} \
+%{mcpu=power5: -mpwr5} \
+%{mcpu=power5+: -mpwr5x} \
+%{mcpu=power6: -mpwr6} \
+%{mcpu=power6x: -mpwr6} \
+%{mcpu=power7: -mpwr7} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m970} \
+%{mcpu=G5: -m970}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      builtin_define ("_AIX52");     \
+      builtin_define ("_AIX53");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  Synchronize with libstdc++ os_defines.h.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE				\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS)
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER5
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER5
+
+#undef  TARGET_POWER
+#define TARGET_POWER 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX V5 uses PowerPC nop (ori 0,0,0) instruction as call glue for PowerPC
+   and "cror 31,31,31" for POWER architecture.  */
+
+#undef RS6000_CALL_GLUE
+#define RS6000_CALL_GLUE "{cror 31,31,31|nop}"
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* AIX 5.2 has the float and long double forms of math functions.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS  1
+
+#ifndef _AIX52
+extern long long int    atoll(const char *);  
+#endif
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 53
diff --git a/gcc/config/rs6000/aix61.h b/gcc/config/rs6000/aix61.h
new file mode 100644
index 000000000..2170eae55
--- /dev/null
+++ b/gcc/config/rs6000/aix61.h
@@ -0,0 +1,200 @@
+/* Definitions of target machine for GNU compiler,
+   for IBM RS/6000 POWER running AIX V6.1.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by David Edelsohn (edelsohn@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define NON_POWERPC_MASKS (MASK_POWER | MASK_POWER2)
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (TARGET_64BIT && (target_flags & NON_POWERPC_MASKS))		\
+    {									\
+      target_flags &= ~NON_POWERPC_MASKS;				\
+      warning (0, "-maix64 and POWER architecture are incompatible");	\
+    }									\
+  if (TARGET_64BIT && ! TARGET_POWERPC64)				\
+    {									\
+      target_flags |= MASK_POWERPC64;					\
+      warning (0, "-maix64 requires PowerPC64 architecture remain enabled"); \
+    }									\
+  if (TARGET_SOFT_FLOAT && TARGET_LONG_DOUBLE_128)			\
+    {									\
+      rs6000_long_double_type_size = 64;				\
+      if (rs6000_explicit_options.long_double)				\
+	warning (0, "soft-float and long-double-128 are incompatible");	\
+    }									\
+  if (TARGET_POWERPC64 && ! TARGET_64BIT)				\
+    {									\
+      error ("-maix64 required: 64-bit computation with 32-bit addressing not yet supported"); \
+    }									\
+} while (0);
+
+#undef ASM_SPEC
+#define ASM_SPEC "-u %{maix64:-a64 %{!mcpu*:-mppc64}} %(asm_cpu)"
+
+/* Common ASM definitions used by ASM_SPEC amongst the various targets for
+   handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
+   provide the default assembler options if the user uses -mcpu=native, so if
+   you make changes here, make them there also.  */
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC \
+"%{!mcpu*: %{!maix64: \
+  %{mpowerpc64: -mppc64} \
+  %{maltivec: -m970} \
+  %{!maltivec: %{!mpower64: %(asm_default)}}}} \
+%{mcpu=native: %(asm_cpu_native)} \
+%{mcpu=power3: -m620} \
+%{mcpu=power4: -mpwr4} \
+%{mcpu=power5: -mpwr5} \
+%{mcpu=power5+: -mpwr5x} \
+%{mcpu=power6: -mpwr6} \
+%{mcpu=power6x: -mpwr6} \
+%{mcpu=power7: -mpwr7} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rs64a: -mppc} \
+%{mcpu=603: -m603} \
+%{mcpu=603e: -m603} \
+%{mcpu=604: -m604} \
+%{mcpu=604e: -m604} \
+%{mcpu=620: -m620} \
+%{mcpu=630: -m620} \
+%{mcpu=970: -m970} \
+%{mcpu=G5: -m970}"
+
+#undef	ASM_DEFAULT_SPEC
+#define ASM_DEFAULT_SPEC "-mppc"
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()     \
+  do                                 \
+    {                                \
+      builtin_define ("_AIX43");     \
+      builtin_define ("_AIX51");     \
+      builtin_define ("_AIX52");     \
+      builtin_define ("_AIX53");     \
+      builtin_define ("_AIX61");     \
+      TARGET_OS_AIX_CPP_BUILTINS (); \
+    }                                \
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix: -D_POSIX_SOURCE}	\
+  %{ansi: -D_ANSI_C_SOURCE}			\
+  %{maix64: -D__64BIT__}			\
+  %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+  %{pthread: -D_THREAD_SAFE}"
+
+/* The GNU C++ standard library requires that these macros be 
+   defined.  Synchronize with libstdc++ os_defines.h.  */
+#undef CPLUSPLUS_CPP_SPEC                       
+#define CPLUSPLUS_CPP_SPEC			\
+  "-D_ALL_SOURCE -D__COMPATMATH__		\
+   %{maix64: -D__64BIT__}			\
+   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
+   %{pthread: -D_THREAD_SAFE}"
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS)
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER7
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
+
+#undef  TARGET_POWER
+#define TARGET_POWER 0
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#undef	MULTILIB_DEFAULTS
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{p:-L%R/lib/profiled -L%R/usr/lib/profiled}\
+   %{!maix64:%{!shared:%{g*:-lg}}}\
+   %{mpe:-L%R/usr/lpp/ppe.poe/lib -lmpi -lvtd}\
+   %{pthread:-lpthreads} -lc"
+
+#undef LINK_SPEC
+#define LINK_SPEC "-bpT:0x10000000 -bpD:0x20000000 %{!r:-btextro} -bnodelcsect\
+   %{static:-bnso %(link_syscalls) } %{shared:-bM:SRE %{!e:-bnoentry}}\
+   %{!maix64:%{!shared:%{g*: %(link_libg) }}} %{maix64:-b64}\
+   %{mpe:-binitfini:poe_remote_main}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared:\
+   %{maix64:%{pg:gcrt0_64%O%s}%{!pg:%{p:mcrt0_64%O%s}%{!p:crt0_64%O%s}}}\
+   %{!maix64:\
+     %{pthread:%{pg:gcrt0_r%O%s}%{!pg:%{p:mcrt0_r%O%s}%{!p:crt0_r%O%s}}}\
+     %{!pthread:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}}}}}"
+
+/* AIX V5 typedefs ptrdiff_t as "long" while earlier releases used "int".  */
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE (!TARGET_64BIT ? "short unsigned int" : "unsigned int")
+
+/* Width of wchar_t in bits.  */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (!TARGET_64BIT ? 16 : 32)
+
+/* AIX V5 uses PowerPC nop (ori 0,0,0) instruction as call glue for PowerPC
+   and "cror 31,31,31" for POWER architecture.  */
+
+#undef RS6000_CALL_GLUE
+#define RS6000_CALL_GLUE "{cror 31,31,31|nop}"
+
+/* AIX 4.2 and above provides initialization and finalization function
+   support from linker command line.  */
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION
+
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "-binitfini"
+
+/* AIX 5.2 has the float and long double forms of math functions.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS  1
+
+#ifndef _AIX52
+extern long long int    atoll(const char *);  
+#endif
+
+/* This target uses the aix64.opt file.  */
+#define TARGET_USES_AIX64_OPT 1
+
+/* This target defines SUPPORTS_WEAK and TARGET_ASM_NAMED_SECTION,
+   but does not have crtbegin/end.  */
+
+#define TARGET_USE_JCR_SECTION 0
+
+#define TARGET_AIX_VERSION 61
diff --git a/gcc/config/rs6000/aix64.opt b/gcc/config/rs6000/aix64.opt
new file mode 100644
index 000000000..9a10b200e
--- /dev/null
+++ b/gcc/config/rs6000/aix64.opt
@@ -0,0 +1,38 @@
+; Options for the 64-bit flavor of AIX.
+;
+; Copyright (C) 2005, 2007, 2010, 2011 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+maix64
+Target Report RejectNegative Negative(maix32) Mask(64BIT)
+Compile for 64-bit pointers
+
+maix32
+Target Report RejectNegative Negative(maix64) InverseMask(64BIT)
+Compile for 32-bit pointers
+
+mpe
+Target Report RejectNegative Var(internal_nothing_1) Save
+Support message passing with the Parallel Environment
+
+posix
+Driver
+
+pthread
+Driver
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
new file mode 100644
index 000000000..583731b96
--- /dev/null
+++ b/gcc/config/rs6000/altivec.h
@@ -0,0 +1,493 @@
+/* PowerPC AltiVec include file.
+   Copyright (C) 2002, 2003, 2004, 2005, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+   Rewritten by Paolo Bonzini (bonzini@gnu.org).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Implemented to conform to the specification included in the AltiVec
+   Technology Programming Interface Manual (ALTIVECPIM/D 6/1999 Rev 0).  */
+
+#ifndef _ALTIVEC_H
+#define _ALTIVEC_H 1
+
+#if !defined(__VEC__) || !defined(__ALTIVEC__)
+#error Use the "-maltivec" flag to enable PowerPC AltiVec support
+#endif
+
+/* If __APPLE_ALTIVEC__ is defined, the compiler supports 'vector',
+   'pixel' and 'bool' as context-sensitive AltiVec keywords (in 
+   non-AltiVec contexts, they revert to their original meanings,
+   if any), so we do not need to define them as macros.  */
+
+#if !defined(__APPLE_ALTIVEC__)
+/* You are allowed to undef these for C++ compatibility.  */
+#define vector __vector
+#define pixel __pixel
+#define bool __bool
+#endif
+
+/* Condition register codes for AltiVec predicates. */
+
+#define __CR6_EQ		0
+#define __CR6_EQ_REV		1
+#define __CR6_LT		2
+#define __CR6_LT_REV		3
+
+/* Synonyms.  */
+#define vec_vaddcuw vec_addc
+#define vec_vand vec_and
+#define vec_vandc vec_andc
+#define vec_vrfip vec_ceil
+#define vec_vcmpbfp vec_cmpb
+#define vec_vcmpgefp vec_cmpge
+#define vec_vctsxs vec_cts
+#define vec_vctuxs vec_ctu
+#define vec_vexptefp vec_expte
+#define vec_vrfim vec_floor
+#define vec_lvx vec_ld
+#define vec_lvxl vec_ldl
+#define vec_vlogefp vec_loge
+#define vec_vmaddfp vec_madd
+#define vec_vmhaddshs vec_madds
+#define vec_vmladduhm vec_mladd
+#define vec_vmhraddshs vec_mradds
+#define vec_vnmsubfp vec_nmsub
+#define vec_vnor vec_nor
+#define vec_vor vec_or
+#define vec_vpkpx vec_packpx
+#define vec_vperm vec_perm
+#define vec_vrefp vec_re
+#define vec_vrfin vec_round
+#define vec_vrsqrtefp vec_rsqrte
+#define vec_vsel vec_sel
+#define vec_vsldoi vec_sld
+#define vec_vsl vec_sll
+#define vec_vslo vec_slo
+#define vec_vspltisb vec_splat_s8
+#define vec_vspltish vec_splat_s16
+#define vec_vspltisw vec_splat_s32
+#define vec_vsr vec_srl
+#define vec_vsro vec_sro
+#define vec_stvx vec_st
+#define vec_stvxl vec_stl
+#define vec_vsubcuw vec_subc
+#define vec_vsum2sws vec_sum2s
+#define vec_vsumsws vec_sums
+#define vec_vrfiz vec_trunc
+#define vec_vxor vec_xor
+
+/* Functions that are resolved by the backend to one of the
+   typed builtins.  */
+#define vec_vaddfp __builtin_vec_vaddfp
+#define vec_addc __builtin_vec_addc
+#define vec_vaddsws __builtin_vec_vaddsws
+#define vec_vaddshs __builtin_vec_vaddshs
+#define vec_vaddsbs __builtin_vec_vaddsbs
+#define vec_vavgsw __builtin_vec_vavgsw
+#define vec_vavguw __builtin_vec_vavguw
+#define vec_vavgsh __builtin_vec_vavgsh
+#define vec_vavguh __builtin_vec_vavguh
+#define vec_vavgsb __builtin_vec_vavgsb
+#define vec_vavgub __builtin_vec_vavgub
+#define vec_ceil __builtin_vec_ceil
+#define vec_cmpb __builtin_vec_cmpb
+#define vec_vcmpeqfp __builtin_vec_vcmpeqfp
+#define vec_cmpge __builtin_vec_cmpge
+#define vec_vcmpgtfp __builtin_vec_vcmpgtfp
+#define vec_vcmpgtsw __builtin_vec_vcmpgtsw
+#define vec_vcmpgtuw __builtin_vec_vcmpgtuw
+#define vec_vcmpgtsh __builtin_vec_vcmpgtsh
+#define vec_vcmpgtuh __builtin_vec_vcmpgtuh
+#define vec_vcmpgtsb __builtin_vec_vcmpgtsb
+#define vec_vcmpgtub __builtin_vec_vcmpgtub
+#define vec_vcfsx __builtin_vec_vcfsx
+#define vec_vcfux __builtin_vec_vcfux
+#define vec_cts __builtin_vec_cts
+#define vec_ctu __builtin_vec_ctu
+#define vec_expte __builtin_vec_expte
+#define vec_floor __builtin_vec_floor
+#define vec_loge __builtin_vec_loge
+#define vec_madd __builtin_vec_madd
+#define vec_madds __builtin_vec_madds
+#define vec_mtvscr __builtin_vec_mtvscr
+#define vec_vmaxfp __builtin_vec_vmaxfp
+#define vec_vmaxsw __builtin_vec_vmaxsw
+#define vec_vmaxsh __builtin_vec_vmaxsh
+#define vec_vmaxsb __builtin_vec_vmaxsb
+#define vec_vminfp __builtin_vec_vminfp
+#define vec_vminsw __builtin_vec_vminsw
+#define vec_vminsh __builtin_vec_vminsh
+#define vec_vminsb __builtin_vec_vminsb
+#define vec_mradds __builtin_vec_mradds
+#define vec_vmsumshm __builtin_vec_vmsumshm
+#define vec_vmsumuhm __builtin_vec_vmsumuhm
+#define vec_vmsummbm __builtin_vec_vmsummbm
+#define vec_vmsumubm __builtin_vec_vmsumubm
+#define vec_vmsumshs __builtin_vec_vmsumshs
+#define vec_vmsumuhs __builtin_vec_vmsumuhs
+#define vec_vmulesb __builtin_vec_vmulesb
+#define vec_vmulesh __builtin_vec_vmulesh
+#define vec_vmuleuh __builtin_vec_vmuleuh
+#define vec_vmuleub __builtin_vec_vmuleub
+#define vec_vmulosh __builtin_vec_vmulosh
+#define vec_vmulouh __builtin_vec_vmulouh
+#define vec_vmulosb __builtin_vec_vmulosb
+#define vec_vmuloub __builtin_vec_vmuloub
+#define vec_nmsub __builtin_vec_nmsub
+#define vec_packpx __builtin_vec_packpx
+#define vec_vpkswss __builtin_vec_vpkswss
+#define vec_vpkuwus __builtin_vec_vpkuwus
+#define vec_vpkshss __builtin_vec_vpkshss
+#define vec_vpkuhus __builtin_vec_vpkuhus
+#define vec_vpkswus __builtin_vec_vpkswus
+#define vec_vpkshus __builtin_vec_vpkshus
+#define vec_re __builtin_vec_re
+#define vec_round __builtin_vec_round
+#define vec_recipdiv __builtin_vec_recipdiv
+#define vec_rsqrt __builtin_vec_rsqrt
+#define vec_rsqrte __builtin_vec_rsqrte
+#define vec_vsubfp __builtin_vec_vsubfp
+#define vec_subc __builtin_vec_subc
+#define vec_vsubsws __builtin_vec_vsubsws
+#define vec_vsubshs __builtin_vec_vsubshs
+#define vec_vsubsbs __builtin_vec_vsubsbs
+#define vec_sum4s __builtin_vec_sum4s
+#define vec_vsum4shs __builtin_vec_vsum4shs
+#define vec_vsum4sbs __builtin_vec_vsum4sbs
+#define vec_vsum4ubs __builtin_vec_vsum4ubs
+#define vec_sum2s __builtin_vec_sum2s
+#define vec_sums __builtin_vec_sums
+#define vec_trunc __builtin_vec_trunc
+#define vec_vupkhpx __builtin_vec_vupkhpx
+#define vec_vupkhsh __builtin_vec_vupkhsh
+#define vec_vupkhsb __builtin_vec_vupkhsb
+#define vec_vupklpx __builtin_vec_vupklpx
+#define vec_vupklsh __builtin_vec_vupklsh
+#define vec_vupklsb __builtin_vec_vupklsb
+#define vec_abs __builtin_vec_abs
+#define vec_abss __builtin_vec_abss
+#define vec_add __builtin_vec_add
+#define vec_adds __builtin_vec_adds
+#define vec_and __builtin_vec_and
+#define vec_andc __builtin_vec_andc
+#define vec_avg __builtin_vec_avg
+#define vec_cmpeq __builtin_vec_cmpeq
+#define vec_cmpgt __builtin_vec_cmpgt
+#define vec_ctf __builtin_vec_ctf
+#define vec_dst __builtin_vec_dst
+#define vec_dstst __builtin_vec_dstst
+#define vec_dststt __builtin_vec_dststt
+#define vec_dstt __builtin_vec_dstt
+#define vec_ld __builtin_vec_ld
+#define vec_lde __builtin_vec_lde
+#define vec_ldl __builtin_vec_ldl
+#define vec_lvebx __builtin_vec_lvebx
+#define vec_lvehx __builtin_vec_lvehx
+#define vec_lvewx __builtin_vec_lvewx
+/* Cell only intrinsics.  */
+#ifdef __PPU__
+#define vec_lvlx __builtin_vec_lvlx
+#define vec_lvlxl __builtin_vec_lvlxl
+#define vec_lvrx __builtin_vec_lvrx
+#define vec_lvrxl __builtin_vec_lvrxl
+#endif
+#define vec_lvsl __builtin_vec_lvsl
+#define vec_lvsr __builtin_vec_lvsr
+#define vec_max __builtin_vec_max
+#define vec_mergeh __builtin_vec_mergeh
+#define vec_mergel __builtin_vec_mergel
+#define vec_min __builtin_vec_min
+#define vec_mladd __builtin_vec_mladd
+#define vec_msum __builtin_vec_msum
+#define vec_msums __builtin_vec_msums
+#define vec_mule __builtin_vec_mule
+#define vec_mulo __builtin_vec_mulo
+#define vec_nor __builtin_vec_nor
+#define vec_or __builtin_vec_or
+#define vec_pack __builtin_vec_pack
+#define vec_packs __builtin_vec_packs
+#define vec_packsu __builtin_vec_packsu
+#define vec_perm __builtin_vec_perm
+#define vec_rl __builtin_vec_rl
+#define vec_sel __builtin_vec_sel
+#define vec_sl __builtin_vec_sl
+#define vec_sld __builtin_vec_sld
+#define vec_sll __builtin_vec_sll
+#define vec_slo __builtin_vec_slo
+#define vec_splat __builtin_vec_splat
+#define vec_sr __builtin_vec_sr
+#define vec_sra __builtin_vec_sra
+#define vec_srl __builtin_vec_srl
+#define vec_sro __builtin_vec_sro
+#define vec_st __builtin_vec_st
+#define vec_ste __builtin_vec_ste
+#define vec_stl __builtin_vec_stl
+#define vec_stvebx __builtin_vec_stvebx
+#define vec_stvehx __builtin_vec_stvehx
+#define vec_stvewx __builtin_vec_stvewx
+/* Cell only intrinsics.  */
+#ifdef __PPU__
+#define vec_stvlx __builtin_vec_stvlx
+#define vec_stvlxl __builtin_vec_stvlxl
+#define vec_stvrx __builtin_vec_stvrx
+#define vec_stvrxl __builtin_vec_stvrxl
+#endif
+#define vec_sub __builtin_vec_sub
+#define vec_subs __builtin_vec_subs
+#define vec_sum __builtin_vec_sum
+#define vec_unpackh __builtin_vec_unpackh
+#define vec_unpackl __builtin_vec_unpackl
+#define vec_vaddubm __builtin_vec_vaddubm
+#define vec_vaddubs __builtin_vec_vaddubs
+#define vec_vadduhm __builtin_vec_vadduhm
+#define vec_vadduhs __builtin_vec_vadduhs
+#define vec_vadduwm __builtin_vec_vadduwm
+#define vec_vadduws __builtin_vec_vadduws
+#define vec_vcmpequb __builtin_vec_vcmpequb
+#define vec_vcmpequh __builtin_vec_vcmpequh
+#define vec_vcmpequw __builtin_vec_vcmpequw
+#define vec_vmaxub __builtin_vec_vmaxub
+#define vec_vmaxuh __builtin_vec_vmaxuh
+#define vec_vmaxuw __builtin_vec_vmaxuw
+#define vec_vminub __builtin_vec_vminub
+#define vec_vminuh __builtin_vec_vminuh
+#define vec_vminuw __builtin_vec_vminuw
+#define vec_vmrghb __builtin_vec_vmrghb
+#define vec_vmrghh __builtin_vec_vmrghh
+#define vec_vmrghw __builtin_vec_vmrghw
+#define vec_vmrglb __builtin_vec_vmrglb
+#define vec_vmrglh __builtin_vec_vmrglh
+#define vec_vmrglw __builtin_vec_vmrglw
+#define vec_vpkuhum __builtin_vec_vpkuhum
+#define vec_vpkuwum __builtin_vec_vpkuwum
+#define vec_vrlb __builtin_vec_vrlb
+#define vec_vrlh __builtin_vec_vrlh
+#define vec_vrlw __builtin_vec_vrlw
+#define vec_vslb __builtin_vec_vslb
+#define vec_vslh __builtin_vec_vslh
+#define vec_vslw __builtin_vec_vslw
+#define vec_vspltb __builtin_vec_vspltb
+#define vec_vsplth __builtin_vec_vsplth
+#define vec_vspltw __builtin_vec_vspltw
+#define vec_vsrab __builtin_vec_vsrab
+#define vec_vsrah __builtin_vec_vsrah
+#define vec_vsraw __builtin_vec_vsraw
+#define vec_vsrb __builtin_vec_vsrb
+#define vec_vsrh __builtin_vec_vsrh
+#define vec_vsrw __builtin_vec_vsrw
+#define vec_vsububs __builtin_vec_vsububs
+#define vec_vsububm __builtin_vec_vsububm
+#define vec_vsubuhm __builtin_vec_vsubuhm
+#define vec_vsubuhs __builtin_vec_vsubuhs
+#define vec_vsubuwm __builtin_vec_vsubuwm
+#define vec_vsubuws __builtin_vec_vsubuws
+#define vec_xor __builtin_vec_xor
+
+#define vec_extract __builtin_vec_extract
+#define vec_insert __builtin_vec_insert
+#define vec_splats __builtin_vec_splats
+#define vec_promote __builtin_vec_promote
+
+#ifdef __VSX__
+/* VSX additions */
+#define vec_div __builtin_vec_div
+#define vec_mul __builtin_vec_mul
+#define vec_msub __builtin_vec_msub
+#define vec_nmadd __builtin_vec_nmadd
+#define vec_nearbyint __builtin_vec_nearbyint
+#define vec_rint __builtin_vec_rint
+#define vec_sqrt __builtin_vec_sqrt
+#define vec_vsx_ld __builtin_vec_vsx_ld
+#define vec_vsx_st __builtin_vec_vsx_st
+#endif
+
+/* Predicates.
+   For C++, we use templates in order to allow non-parenthesized arguments.
+   For C, instead, we use macros since non-parenthesized arguments were
+   not allowed even in older GCC implementation of AltiVec.
+
+   In the future, we may add more magic to the back-end, so that no
+   one- or two-argument macros are used.  */
+
+#ifdef __cplusplus__
+#define __altivec_unary_pred(NAME, CALL) \
+template <class T> int NAME (T a1) { return CALL; }
+
+#define __altivec_scalar_pred(NAME, CALL) \
+template <class T, class U> int NAME (T a1, U a2) { return CALL; }
+
+/* Given the vec_step of a type, return the corresponding bool type.  */
+template <int STEP> class __altivec_bool_ret { };
+template <> class __altivec_bool_ret <4> {
+  typedef __vector __bool int __ret;
+};
+template <> class __altivec_bool_ret <8> {
+  typedef __vector __bool short __ret;
+};
+template <> class __altivec_bool_ret <16> {
+  typedef __vector __bool char __ret;
+};
+
+/* Be very liberal in the pairs we accept.  Mistakes such as passing
+   a `vector char' and `vector short' will be caught by the middle-end,
+   while any attempt to detect them here would produce hard to understand
+   error messages involving the implementation details of AltiVec.  */
+#define __altivec_binary_pred(NAME, CALL) \
+template <class T, class U> \
+typename __altivec_bool_ret <vec_step (T)>::__ret \
+NAME (T a1, U a2) \
+{ \
+  return CALL; \
+}
+
+__altivec_binary_pred(vec_cmplt,
+  __builtin_vec_cmpgt (a2, a1))
+__altivec_binary_pred(vec_cmple,
+  __builtin_vec_cmpge (a2, a1))
+
+__altivec_scalar_pred(vec_all_in,
+  __builtin_altivec_vcmpbfp_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_any_out,
+  __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, a1, a2))
+
+__altivec_unary_pred(vec_all_nan,
+  __builtin_altivec_vcmpeq_p (__CR6_EQ, a1, a1))
+__altivec_unary_pred(vec_any_nan,
+  __builtin_altivec_vcmpeq_p (__CR6_LT_REV, a1, a1))
+
+__altivec_unary_pred(vec_all_numeric,
+  __builtin_altivec_vcmpeq_p (__CR6_LT, a1, a1))
+__altivec_unary_pred(vec_any_numeric,
+  __builtin_altivec_vcmpeq_p (__CR6_EQ_REV, a1, a1))
+
+__altivec_scalar_pred(vec_all_eq,
+  __builtin_vec_vcmpeq_p (__CR6_LT, a1, a2))
+__altivec_scalar_pred(vec_all_ne,
+  __builtin_vec_vcmpeq_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_any_eq,
+  __builtin_vec_vcmpeq_p (__CR6_EQ_REV, a1, a2))
+__altivec_scalar_pred(vec_any_ne,
+  __builtin_vec_vcmpeq_p (__CR6_LT_REV, a1, a2))
+
+__altivec_scalar_pred(vec_all_gt,
+  __builtin_vec_vcmpgt_p (__CR6_LT, a1, a2))
+__altivec_scalar_pred(vec_all_lt,
+  __builtin_vec_vcmpgt_p (__CR6_LT, a2, a1))
+__altivec_scalar_pred(vec_any_gt,
+  __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a1, a2))
+__altivec_scalar_pred(vec_any_lt,
+  __builtin_vec_vcmpgt_p (__CR6_EQ_REV, a2, a1))
+
+__altivec_scalar_pred(vec_all_ngt,
+  __builtin_altivec_vcmpgt_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_all_nlt,
+  __builtin_altivec_vcmpgt_p (__CR6_EQ, a2, a1))
+__altivec_scalar_pred(vec_any_ngt,
+  __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a1, a2))
+__altivec_scalar_pred(vec_any_nlt,
+  __builtin_altivec_vcmpgt_p (__CR6_LT_REV, a2, a1))
+
+/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types,
+   while for integer types it is converted to __builtin_vec_vcmpgt_p,
+   with inverted args and condition code.  */
+__altivec_scalar_pred(vec_all_le,
+  __builtin_vec_vcmpge_p (__CR6_LT, a2, a1))
+__altivec_scalar_pred(vec_all_ge,
+  __builtin_vec_vcmpge_p (__CR6_LT, a1, a2))
+__altivec_scalar_pred(vec_any_le,
+  __builtin_vec_vcmpge_p (__CR6_EQ_REV, a2, a1))
+__altivec_scalar_pred(vec_any_ge,
+  __builtin_vec_vcmpge_p (__CR6_EQ_REV, a1, a2))
+
+__altivec_scalar_pred(vec_all_nge,
+  __builtin_altivec_vcmpge_p (__CR6_EQ, a1, a2))
+__altivec_scalar_pred(vec_all_nle,
+  __builtin_altivec_vcmpge_p (__CR6_EQ, a2, a1))
+__altivec_scalar_pred(vec_any_nge,
+  __builtin_altivec_vcmpge_p (__CR6_LT_REV, a1, a2))
+__altivec_scalar_pred(vec_any_nle,
+  __builtin_altivec_vcmpge_p (__CR6_LT_REV, a2, a1))
+
+#undef __altivec_scalar_pred
+#undef __altivec_unary_pred
+#undef __altivec_binary_pred
+#else
+#define vec_cmplt(a1, a2) __builtin_vec_cmpgt ((a2), (a1))
+#define vec_cmple(a1, a2) __builtin_vec_cmpge ((a2), (a1))
+
+#define vec_all_in(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ, (a1), (a2))
+#define vec_any_out(a1, a2) __builtin_altivec_vcmpbfp_p (__CR6_EQ_REV, (a1), (a2))
+
+#define vec_all_nan(a1) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a1))
+#define vec_any_nan(a1) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a1))
+
+#define vec_all_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a1))
+#define vec_any_numeric(a1) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a1))
+
+#define vec_all_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT, (a1), (a2))
+#define vec_all_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ, (a1), (a2))
+#define vec_any_eq(a1, a2) __builtin_vec_vcmpeq_p (__CR6_EQ_REV, (a1), (a2))
+#define vec_any_ne(a1, a2) __builtin_vec_vcmpeq_p (__CR6_LT_REV, (a1), (a2))
+
+#define vec_all_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT, (a1), (a2))
+#define vec_all_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT, (a2), (a1))
+#define vec_any_gt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a1), (a2))
+#define vec_any_lt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ_REV, (a2), (a1))
+
+#define vec_all_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a1), (a2))
+#define vec_all_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_EQ, (a2), (a1))
+#define vec_any_ngt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a1), (a2))
+#define vec_any_nlt(a1, a2) __builtin_vec_vcmpgt_p (__CR6_LT_REV, (a2), (a1))
+
+/* __builtin_vec_vcmpge_p is vcmpgefp for floating-point vector types,
+   while for integer types it is converted to __builtin_vec_vcmpgt_p,
+   with inverted args and condition code.  */
+#define vec_all_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT, (a2), (a1))
+#define vec_all_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT, (a1), (a2))
+#define vec_any_le(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a2), (a1))
+#define vec_any_ge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ_REV, (a1), (a2))
+
+#define vec_all_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a1), (a2))
+#define vec_all_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_EQ, (a2), (a1))
+#define vec_any_nge(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a1), (a2))
+#define vec_any_nle(a1, a2) __builtin_vec_vcmpge_p (__CR6_LT_REV, (a2), (a1))
+#endif
+
+/* These do not accept vectors, so they do not have a __builtin_vec_*
+   counterpart.  */
+#define vec_dss(x) __builtin_altivec_dss((x))
+#define vec_dssall() __builtin_altivec_dssall ()
+#define vec_mfvscr() ((__vector unsigned short) __builtin_altivec_mfvscr ())
+#define vec_splat_s8(x) __builtin_altivec_vspltisb ((x))
+#define vec_splat_s16(x) __builtin_altivec_vspltish ((x))
+#define vec_splat_s32(x) __builtin_altivec_vspltisw ((x))
+#define vec_splat_u8(x) ((__vector unsigned char) vec_splat_s8 ((x)))
+#define vec_splat_u16(x) ((__vector unsigned short) vec_splat_s16 ((x)))
+#define vec_splat_u32(x) ((__vector unsigned int) vec_splat_s32 ((x)))
+
+/* This also accepts a type for its parameter, so it is not enough
+   to #define vec_step to __builtin_vec_step.  */
+#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0)
+
+#endif /* _ALTIVEC_H */
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
new file mode 100644
index 000000000..9fbced173
--- /dev/null
+++ b/gcc/config/rs6000/altivec.md
@@ -0,0 +1,2749 @@
+;; AltiVec patterns.
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+   ;; 51-62 deleted
+  [(UNSPEC_VCMPBFP       64)
+   (UNSPEC_VMSUMU        65)
+   (UNSPEC_VMSUMM        66)
+   (UNSPEC_VMSUMSHM      68)
+   (UNSPEC_VMSUMUHS      69)
+   (UNSPEC_VMSUMSHS      70)
+   (UNSPEC_VMHADDSHS     71)
+   (UNSPEC_VMHRADDSHS    72)
+   (UNSPEC_VMLADDUHM     73)
+   (UNSPEC_VADDCUW       75)
+   (UNSPEC_VADDU         76)
+   (UNSPEC_VADDS         77)
+   (UNSPEC_VAVGU         80)
+   (UNSPEC_VAVGS         81)
+   (UNSPEC_VMULEUB       83)
+   (UNSPEC_VMULESB       84)
+   (UNSPEC_VMULEUH       85)
+   (UNSPEC_VMULESH       86)
+   (UNSPEC_VMULOUB       87)
+   (UNSPEC_VMULOSB       88)
+   (UNSPEC_VMULOUH       89)
+   (UNSPEC_VMULOSH       90)
+   (UNSPEC_VPKUHUM       93)
+   (UNSPEC_VPKUWUM       94)
+   (UNSPEC_VPKPX         95)
+   (UNSPEC_VPKSHSS       97)
+   (UNSPEC_VPKSWSS       99)
+   (UNSPEC_VPKUHUS      100)
+   (UNSPEC_VPKSHUS      101)
+   (UNSPEC_VPKUWUS      102)
+   (UNSPEC_VPKSWUS      103)
+   ;; 104 deleted
+   (UNSPEC_VSLV4SI      110)
+   (UNSPEC_VSLO         111)
+   (UNSPEC_VSR          118)
+   (UNSPEC_VSRO         119)
+   (UNSPEC_VSUBCUW      124)
+   (UNSPEC_VSUBU        125)
+   (UNSPEC_VSUBS        126)
+   (UNSPEC_VSUM4UBS     131)
+   (UNSPEC_VSUM4S       132)
+   (UNSPEC_VSUM2SWS     134)
+   (UNSPEC_VSUMSWS      135)
+   (UNSPEC_VPERM        144)
+   (UNSPEC_VPERM_UNS    145)
+   ;; 148 deleted
+   (UNSPEC_VRFIN        149)
+   ;; 150 deleted
+   (UNSPEC_VCFUX        151)
+   (UNSPEC_VCFSX        152)
+   (UNSPEC_VCTUXS       153)
+   (UNSPEC_VCTSXS       154)
+   (UNSPEC_VLOGEFP      155)
+   (UNSPEC_VEXPTEFP     156)
+   ;; 157-162 deleted
+   (UNSPEC_VLSDOI       163)
+   (UNSPEC_VUPKHSB      167)
+   (UNSPEC_VUPKHPX      168)
+   (UNSPEC_VUPKHSH      169)
+   (UNSPEC_VUPKLSB      170)
+   (UNSPEC_VUPKLPX      171)
+   (UNSPEC_VUPKLSH      172)
+   ;; 173 deleted
+   (UNSPEC_DST          190)
+   (UNSPEC_DSTT         191)
+   (UNSPEC_DSTST        192)
+   (UNSPEC_DSTSTT       193)
+   (UNSPEC_LVSL         194)
+   (UNSPEC_LVSR         195)
+   (UNSPEC_LVE          196)
+   (UNSPEC_STVX         201)
+   (UNSPEC_STVXL        202)
+   (UNSPEC_STVE         203)
+   (UNSPEC_SET_VSCR     213)
+   (UNSPEC_GET_VRSAVE   214)
+   (UNSPEC_LVX		215)
+   (UNSPEC_REDUC_PLUS   217)
+   (UNSPEC_VECSH        219)
+   (UNSPEC_EXTEVEN_V4SI 220)
+   (UNSPEC_EXTEVEN_V8HI 221)
+   (UNSPEC_EXTEVEN_V16QI 222)
+   (UNSPEC_EXTEVEN_V4SF 223)
+   (UNSPEC_EXTODD_V4SI  224)
+   (UNSPEC_EXTODD_V8HI  225)
+   (UNSPEC_EXTODD_V16QI 226)
+   (UNSPEC_EXTODD_V4SF  227)
+   (UNSPEC_INTERHI_V4SI 228)
+   (UNSPEC_INTERHI_V8HI 229)
+   (UNSPEC_INTERHI_V16QI 230)
+   ;; delete 231
+   (UNSPEC_INTERLO_V4SI 232)
+   (UNSPEC_INTERLO_V8HI 233)
+   (UNSPEC_INTERLO_V16QI 234)
+   ;; delete 235
+   (UNSPEC_LVLX         236)
+   (UNSPEC_LVLXL        237)
+   (UNSPEC_LVRX         238)
+   (UNSPEC_LVRXL        239)
+   (UNSPEC_STVLX        240)
+   (UNSPEC_STVLXL       241)
+   (UNSPEC_STVRX        242)
+   (UNSPEC_STVRXL       243)
+   (UNSPEC_VMULWHUB     308)
+   (UNSPEC_VMULWLUB     309)
+   (UNSPEC_VMULWHSB     310)
+   (UNSPEC_VMULWLSB     311)
+   (UNSPEC_VMULWHUH     312)
+   (UNSPEC_VMULWLUH     313)
+   (UNSPEC_VMULWHSH     314)
+   (UNSPEC_VMULWLSH     315)
+   (UNSPEC_VUPKHUB      316)
+   (UNSPEC_VUPKHUH      317)
+   (UNSPEC_VUPKLUB      318)
+   (UNSPEC_VUPKLUH      319)
+   (UNSPEC_VPERMSI	320)
+   (UNSPEC_VPERMHI	321)
+   (UNSPEC_INTERHI      322)
+   (UNSPEC_INTERLO      323)
+   (UNSPEC_VUPKHS_V4SF  324)
+   (UNSPEC_VUPKLS_V4SF  325)
+   (UNSPEC_VUPKHU_V4SF  326)
+   (UNSPEC_VUPKLU_V4SF  327)
+])
+
+(define_constants
+  [(UNSPECV_SET_VRSAVE   30)
+   (UNSPECV_MTVSCR      186)
+   (UNSPECV_MFVSCR      187)
+   (UNSPECV_DSSALL      188)
+   (UNSPECV_DSS         189)
+  ])
+
+;; Vec int modes
+(define_mode_iterator VI [V4SI V8HI V16QI])
+;; Short vec in modes
+(define_mode_iterator VIshort [V8HI V16QI])
+;; Vec float modes
+(define_mode_iterator VF [V4SF])
+;; Vec modes, pity mode iterators are not composable
+(define_mode_iterator V [V4SI V8HI V16QI V4SF])
+;; Vec modes for move/logical/permute ops, include vector types for move not
+;; otherwise handled by altivec (v2df, v2di, ti)
+(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI])
+
+;; Like VM, except don't do TImode
+(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
+
+(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
+(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
+
+;; Vector move instructions.
+(define_insn "*altivec_mov<mode>"
+  [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*o,*r,*r,v,v")
+	(match_operand:VM2 1 "input_operand" "v,Z,v,r,o,r,j,W"))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "stvx %1,%y0";
+    case 1: return "lvx %0,%y1";
+    case 2: return "vor %0,%1,%1";
+    case 3: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "vxor %0,%0,%0";
+    case 7: return output_vec_const_move (operands);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
+
+;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode
+;; is for unions.  However for plain data movement, slightly favor the vector
+;; loads
+(define_insn "*altivec_movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,v,v,?o,?r,?r,v,v")
+	(match_operand:TI 1 "input_operand" "v,Z,v,r,o,r,j,W"))]
+  "VECTOR_MEM_ALTIVEC_P (TImode)
+   && (register_operand (operands[0], TImode) 
+       || register_operand (operands[1], TImode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "stvx %1,%y0";
+    case 1: return "lvx %0,%y1";
+    case 2: return "vor %0,%1,%1";
+    case 3: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "vxor %0,%0,%0";
+    case 7: return output_vec_const_move (operands);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
+
+;; Load up a vector with the most significant bit set by loading up -1 and
+;; doing a shift left
+(define_split
+  [(set (match_operand:VM 0 "altivec_register_operand" "")
+	(match_operand:VM 1 "easy_vector_constant_msb" ""))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  enum machine_mode mode = GET_MODE (operands[0]);
+  rtvec v;
+  int i, num_elements;
+
+  if (mode == V4SFmode)
+    {
+      mode = V4SImode;
+      dest = gen_lowpart (V4SImode, dest);
+    }
+
+  num_elements = GET_MODE_NUNITS (mode);
+  v = rtvec_alloc (num_elements);
+  for (i = 0; i < num_elements; i++)
+    RTVEC_ELT (v, i) = constm1_rtx;
+
+  emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v)));
+  emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_ASHIFT (mode, dest, dest)));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:VM 0 "altivec_register_operand" "")
+	(match_operand:VM 1 "easy_vector_constant_add_self" ""))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && reload_completed"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+{
+  rtx dup = gen_easy_altivec_constant (operands[1]);
+  rtx const_vec;
+  enum machine_mode op_mode = <MODE>mode;
+
+  /* Divide the operand of the resulting VEC_DUPLICATE, and use
+     simplify_rtx to make a CONST_VECTOR.  */
+  XEXP (dup, 0) = simplify_const_binary_operation (ASHIFTRT, QImode,
+						   XEXP (dup, 0), const1_rtx);
+  const_vec = simplify_rtx (dup);
+
+  if (op_mode == V4SFmode)
+    {
+      op_mode = V4SImode;
+      operands[0] = gen_lowpart (op_mode, operands[0]);
+    }
+  if (GET_MODE (const_vec) == op_mode)
+    operands[3] = const_vec;
+  else
+    operands[3] = gen_lowpart (op_mode, const_vec);
+  operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]);
+})
+
+(define_insn "get_vrsave_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))]
+  "TARGET_ALTIVEC"
+{
+  if (TARGET_MACHO)
+     return "mfspr %0,256";
+  else
+     return "mfvrsave %0";
+}
+  [(set_attr "type" "*")])
+
+(define_insn "*set_vrsave_internal"
+  [(match_parallel 0 "vrsave_operation"
+     [(set (reg:SI 109)
+	   (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+				(reg:SI 109)] UNSPECV_SET_VRSAVE))])]
+  "TARGET_ALTIVEC"
+{
+  if (TARGET_MACHO)
+    return "mtspr 256,%1";
+  else
+    return "mtvrsave %1";
+}
+  [(set_attr "type" "*")])
+
+(define_insn "*save_world"
+ [(match_parallel 0 "save_world_operation"
+                  [(clobber (reg:SI 65))
+                   (use (match_operand:SI 1 "call_operand" "s"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"         
+ "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*restore_world"
+ [(match_parallel 0 "restore_world_operation"
+                  [(return)
+		   (use (reg:SI 65))
+                   (use (match_operand:SI 1 "call_operand" "s"))
+                   (clobber (match_operand:SI 2 "gpc_reg_operand" "=r"))])]
+ "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && TARGET_32BIT"
+ "b %z1")
+
+;; Simple binary operations.
+
+;; add
+(define_insn "add<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (plus:VI (match_operand:VI 1 "register_operand" "v")
+                 (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vaddu<VI_char>m %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_addv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (plus:V4SF (match_operand:V4SF 1 "register_operand" "v")
+	 	   (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vaddfp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vaddcuw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VADDCUW))]
+  "TARGET_ALTIVEC"
+  "vaddcuw %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddu<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VADDU))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vaddu<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vadds<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VADDS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vadds<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+;; sub
+(define_insn "sub<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (minus:VI (match_operand:VI 1 "register_operand" "v")
+                  (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vsubu<VI_char>m %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_subv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (minus:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                    (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vsubfp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vsubcuw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUBCUW))]
+  "TARGET_ALTIVEC"
+  "vsubcuw %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubu<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VSUBU))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsubu<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubs<VI_char>s"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VSUBS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsubs<VI_char>s %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+;;
+(define_insn "altivec_vavgu<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VAVGU))]
+  "TARGET_ALTIVEC"
+  "vavgu<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vavgs<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "v")
+                    (match_operand:VI 2 "register_operand" "v")]
+		   UNSPEC_VAVGS))]
+  "TARGET_ALTIVEC"
+  "vavgs<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vcmpbfp"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:V4SF 2 "register_operand" "v")] 
+                      UNSPEC_VCMPBFP))]
+  "TARGET_ALTIVEC"
+  "vcmpbfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_eq<mode>"
+  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
+	(eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
+	       (match_operand:VI 2 "altivec_register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vcmpequ<VI_char> %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gt<mode>"
+  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
+	(gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
+	       (match_operand:VI 2 "altivec_register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vcmpgts<VI_char> %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gtu<mode>"
+  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
+	(gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
+		(match_operand:VI 2 "altivec_register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vcmpgtu<VI_char> %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_eqv4sf"
+  [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
+	(eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
+		 (match_operand:V4SF 2 "altivec_register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpeqfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gtv4sf"
+  [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
+	(gt:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
+		 (match_operand:V4SF 2 "altivec_register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgtfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_gev4sf"
+  [(set (match_operand:V4SF 0 "altivec_register_operand" "=v")
+	(ge:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v")
+		 (match_operand:V4SF 2 "altivec_register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgefp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vsel<mode>"
+  [(set (match_operand:VM 0 "altivec_register_operand" "=v")
+	(if_then_else:VM
+	 (ne:CC (match_operand:VM 1 "altivec_register_operand" "v")
+		(match_operand:VM 4 "zero_constant" ""))
+	 (match_operand:VM 2 "altivec_register_operand" "v")
+	 (match_operand:VM 3 "altivec_register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vsel %0,%3,%2,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vsel<mode>_uns"
+  [(set (match_operand:VM 0 "altivec_register_operand" "=v")
+	(if_then_else:VM
+	 (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v")
+		   (match_operand:VM 4 "zero_constant" ""))
+	 (match_operand:VM 2 "altivec_register_operand" "v")
+	 (match_operand:VM 3 "altivec_register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vsel %0,%3,%2,%1"
+  [(set_attr "type" "vecperm")])
+
+;; Fused multiply add.
+
+(define_insn "*altivec_fmav4sf4"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		  (match_operand:V4SF 2 "register_operand" "v")
+		  (match_operand:V4SF 3 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vmaddfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+;; We do multiply as a fused multiply-add with an add of a -0.0 vector.
+
+(define_expand "altivec_mulv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(fma:V4SF (match_operand:V4SF 1 "register_operand" "")
+		  (match_operand:V4SF 2 "register_operand" "")
+		  (match_dup 3)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+{
+  rtx neg0;
+
+  /* Generate [-0.0, -0.0, -0.0, -0.0].  */
+  neg0 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
+  emit_insn (gen_vashlv4si3 (neg0, neg0, neg0));
+
+  operands[3] = gen_lowpart (V4SFmode, neg0);
+})
+
+;; 32-bit integer multiplication
+;; A_high = Operand_0 & 0xFFFF0000 >> 16
+;; A_low = Operand_0 & 0xFFFF
+;; B_high = Operand_1 & 0xFFFF0000 >> 16
+;; B_low = Operand_1 & 0xFFFF
+;; result = A_low * B_low + (A_high * B_low + B_high * A_low) << 16
+
+;; (define_insn "mulv4si3"
+;;   [(set (match_operand:V4SI 0 "register_operand" "=v")
+;;         (mult:V4SI (match_operand:V4SI 1 "register_operand" "v")
+;;                    (match_operand:V4SI 2 "register_operand" "v")))]
+(define_expand "mulv4si3"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+   "TARGET_ALTIVEC"
+   "
+ {
+   rtx zero;
+   rtx swap;
+   rtx small_swap;
+   rtx sixteen;
+   rtx one;
+   rtx two;
+   rtx low_product;
+   rtx high_product;
+       
+   zero = gen_reg_rtx (V4SImode);
+   emit_insn (gen_altivec_vspltisw (zero, const0_rtx));
+ 
+   sixteen = gen_reg_rtx (V4SImode);   
+   emit_insn (gen_altivec_vspltisw (sixteen,  gen_rtx_CONST_INT (V4SImode, -16)));
+ 
+   swap = gen_reg_rtx (V4SImode);
+   emit_insn (gen_vrotlv4si3 (swap, operands[2], sixteen));
+ 
+   one = gen_reg_rtx (V8HImode);
+   convert_move (one, operands[1], 0);
+ 
+   two = gen_reg_rtx (V8HImode);
+   convert_move (two, operands[2], 0);
+ 
+   small_swap = gen_reg_rtx (V8HImode);
+   convert_move (small_swap, swap, 0);
+ 
+   low_product = gen_reg_rtx (V4SImode);
+   emit_insn (gen_altivec_vmulouh (low_product, one, two));
+ 
+   high_product = gen_reg_rtx (V4SImode);
+   emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
+ 
+   emit_insn (gen_vashlv4si3 (high_product, high_product, sixteen));
+ 
+   emit_insn (gen_addv4si3 (operands[0], high_product, low_product));
+   
+   DONE;
+ }")
+ 
+(define_expand "mulv8hi3"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+   "TARGET_ALTIVEC"
+   "
+{
+   rtx odd = gen_reg_rtx (V4SImode);
+   rtx even = gen_reg_rtx (V4SImode);
+   rtx high = gen_reg_rtx (V4SImode);
+   rtx low = gen_reg_rtx (V4SImode);
+
+   emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
+   emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
+
+   emit_insn (gen_altivec_vmrghw (high, even, odd));
+   emit_insn (gen_altivec_vmrglw (low, even, odd));
+
+   emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
+
+   DONE;
+}")
+
+;; Fused multiply subtract 
+(define_insn "*altivec_vnmsubfp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(neg:V4SF
+	 (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		   (match_operand:V4SF 2 "register_operand" "v")
+		   (neg:V4SF
+		    (match_operand:V4SF 3 "register_operand" "v")))))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vnmsubfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vmsumu<VI_char>m"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
+		      (match_operand:VIshort 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMU))]
+  "TARGET_ALTIVEC"
+  "vmsumu<VI_char>m %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumm<VI_char>m"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
+		      (match_operand:VIshort 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMM))]
+  "TARGET_ALTIVEC"
+  "vmsumm<VI_char>m %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumshm"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMSHM))]
+  "TARGET_ALTIVEC"
+  "vmsumshm %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumuhs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMUHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmsumuhs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmsumshs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V4SI 3 "register_operand" "v")]
+		     UNSPEC_VMSUMSHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmsumshs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+;; max
+
+(define_insn "umax<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (umax:VI (match_operand:VI 1 "register_operand" "v")
+                 (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vmaxu<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (smax:VI (match_operand:VI 1 "register_operand" "v")
+                 (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vmaxs<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (smax:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                   (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vmaxfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "umin<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (umin:VI (match_operand:VI 1 "register_operand" "v")
+                 (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vminu<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (smin:VI (match_operand:VI 1 "register_operand" "v")
+                 (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vmins<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (smin:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                   (match_operand:V4SF 2 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vminfp %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "altivec_vmhaddshs"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V8HI 3 "register_operand" "v")]
+		     UNSPEC_VMHADDSHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmhaddshs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmhraddshs"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V8HI 3 "register_operand" "v")]
+		     UNSPEC_VMHRADDSHS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vmhraddshs %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmladduhm"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")
+                      (match_operand:V8HI 3 "register_operand" "v")]
+		     UNSPEC_VMLADDUHM))]
+  "TARGET_ALTIVEC"
+  "vmladduhm %0,%1,%2,%3"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmrghb"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v")
+					   (parallel [(const_int 0)
+					   	      (const_int 8)
+					   	      (const_int 1)
+					   	      (const_int 9)
+					   	      (const_int 2)
+					   	      (const_int 10)
+						      (const_int 3)
+						      (const_int 11)
+					   	      (const_int 4)
+					   	      (const_int 12)
+					   	      (const_int 5)
+					   	      (const_int 13)
+					   	      (const_int 6)
+					   	      (const_int 14)
+					   	      (const_int 7)
+						      (const_int 15)]))
+                        (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v")
+					   (parallel [(const_int 8)
+					   	      (const_int 0)
+					   	      (const_int 9)
+					   	      (const_int 1)
+					   	      (const_int 10)
+					   	      (const_int 2)
+						      (const_int 11)
+						      (const_int 3)
+					   	      (const_int 12)
+					   	      (const_int 4)
+					   	      (const_int 13)
+					   	      (const_int 5)
+					   	      (const_int 14)
+					   	      (const_int 6)
+					   	      (const_int 15)
+						      (const_int 7)]))
+		      (const_int 21845)))]
+  "TARGET_ALTIVEC"
+  "vmrghb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghh"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v")
+					   (parallel [(const_int 0)
+					   	      (const_int 4)
+					   	      (const_int 1)
+					   	      (const_int 5)
+					   	      (const_int 2)
+					   	      (const_int 6)
+					   	      (const_int 3)
+					   	      (const_int 7)]))
+                        (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v")
+					   (parallel [(const_int 4)
+					   	      (const_int 0)
+					   	      (const_int 5)
+					   	      (const_int 1)
+					   	      (const_int 6)
+					   	      (const_int 2)
+					   	      (const_int 7)
+					   	      (const_int 3)]))
+		      (const_int 85)))]
+  "TARGET_ALTIVEC"
+  "vmrghh %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (vec_merge:V4SI (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v")
+					 (parallel [(const_int 0)
+					 	    (const_int 2)
+						    (const_int 1)
+						    (const_int 3)]))
+                        (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v")
+					 (parallel [(const_int 2)
+					 	    (const_int 0)
+						    (const_int 3)
+						    (const_int 1)]))
+		      (const_int 5)))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+  "vmrghw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vmrghsf"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v")
+                                         (parallel [(const_int 0)
+                                                    (const_int 2)
+                                                    (const_int 1)
+                                                    (const_int 3)]))
+                        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v")
+                                         (parallel [(const_int 2)
+                                                    (const_int 0)
+                                                    (const_int 3)
+                                                    (const_int 1)]))
+                      (const_int 5)))]
+  "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
+  "vmrghw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglb"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v")
+					   (parallel [(const_int 8)
+					   	      (const_int 0)
+					   	      (const_int 9)
+					   	      (const_int 1)
+					   	      (const_int 10)
+					   	      (const_int 2)
+						      (const_int 11)
+						      (const_int 3)
+					   	      (const_int 12)
+					   	      (const_int 4)
+					   	      (const_int 13)
+					   	      (const_int 5)
+					   	      (const_int 14)
+					   	      (const_int 6)
+					   	      (const_int 15)
+						      (const_int 7)]))
+                      (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v")
+					   (parallel [(const_int 0)
+					   	      (const_int 8)
+					   	      (const_int 1)
+					   	      (const_int 9)
+					   	      (const_int 2)
+					   	      (const_int 10)
+						      (const_int 3)
+						      (const_int 11)
+					   	      (const_int 4)
+					   	      (const_int 12)
+					   	      (const_int 5)
+					   	      (const_int 13)
+					   	      (const_int 6)
+					   	      (const_int 14)
+					   	      (const_int 7)
+						      (const_int 15)]))
+		      (const_int 21845)))]
+  "TARGET_ALTIVEC"
+  "vmrglb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglh"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v")
+					   (parallel [(const_int 4)
+					   	      (const_int 0)
+					   	      (const_int 5)
+					   	      (const_int 1)
+					   	      (const_int 6)
+					   	      (const_int 2)
+					   	      (const_int 7)
+					   	      (const_int 3)]))
+                        (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v")
+					   (parallel [(const_int 0)
+					   	      (const_int 4)
+					   	      (const_int 1)
+					   	      (const_int 5)
+					   	      (const_int 2)
+					   	      (const_int 6)
+					   	      (const_int 3)
+					   	      (const_int 7)]))
+		      (const_int 85)))]
+  "TARGET_ALTIVEC"
+  "vmrglh %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (vec_merge:V4SI
+	 (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (const_int 5)))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+  "vmrglw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vmrglsf"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (vec_merge:V4SF
+	 (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (const_int 5)))]
+  "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
+  "vmrglw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmuleub"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULEUB))]
+  "TARGET_ALTIVEC"
+  "vmuleub %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesb"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULESB))]
+  "TARGET_ALTIVEC"
+  "vmulesb %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmuleuh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULEUH))]
+  "TARGET_ALTIVEC"
+  "vmuleuh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULESH))]
+  "TARGET_ALTIVEC"
+  "vmulesh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmuloub"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULOUB))]
+  "TARGET_ALTIVEC"
+  "vmuloub %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosb"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VMULOSB))]
+  "TARGET_ALTIVEC"
+  "vmulosb %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULOUH))]
+  "TARGET_ALTIVEC"
+  "vmulouh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULOSH))]
+  "TARGET_ALTIVEC"
+  "vmulosh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+
+;; logical ops.  Have the logical ops follow the memory ops in
+;; terms of whether to prefer VSX or Altivec
+
+(define_insn "*altivec_and<mode>3"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (and:VM (match_operand:VM 1 "register_operand" "v")
+		(match_operand:VM 2 "register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vand %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_ior<mode>3"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (ior:VM (match_operand:VM 1 "register_operand" "v")
+		(match_operand:VM 2 "register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vor %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_xor<mode>3"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (xor:VM (match_operand:VM 1 "register_operand" "v")
+		(match_operand:VM 2 "register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_one_cmpl<mode>2"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (not:VM (match_operand:VM 1 "register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vnor %0,%1,%1"
+  [(set_attr "type" "vecsimple")])
+  
+(define_insn "*altivec_nor<mode>3"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
+			(match_operand:VM 2 "register_operand" "v"))))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vnor %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_andc<mode>3"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (and:VM (not:VM (match_operand:VM 2 "register_operand" "v"))
+		(match_operand:VM 1 "register_operand" "v")))]
+  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+  "vandc %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vpkuhum"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+                       (match_operand:V8HI 2 "register_operand" "v")]
+		      UNSPEC_VPKUHUM))]
+  "TARGET_ALTIVEC"
+  "vpkuhum %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkuwum"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VPKUWUM))]
+  "TARGET_ALTIVEC"
+  "vpkuwum %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkpx"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VPKPX))]
+  "TARGET_ALTIVEC"
+  "vpkpx %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkshss"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+                       (match_operand:V8HI 2 "register_operand" "v")]
+		      UNSPEC_VPKSHSS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vpkshss %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkswss"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VPKSWSS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vpkswss %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkuhus"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+                       (match_operand:V8HI 2 "register_operand" "v")]
+		      UNSPEC_VPKUHUS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vpkuhus %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkshus"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+                       (match_operand:V8HI 2 "register_operand" "v")]
+		      UNSPEC_VPKSHUS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vpkshus %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkuwus"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VPKUWUS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vpkuwus %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vpkswus"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VPKSWUS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vpkswus %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vrl<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (rotate:VI (match_operand:VI 1 "register_operand" "v")
+		   (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vrl<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsl"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSLV4SI))]
+  "TARGET_ALTIVEC"
+  "vsl %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vslo"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSLO))]
+  "TARGET_ALTIVEC"
+  "vslo %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vsl<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (ashift:VI (match_operand:VI 1 "register_operand" "v")
+		   (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vsl<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_vsr<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
+		     (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vsr<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*altivec_vsra<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+        (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
+		     (match_operand:VI 2 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+  "vsra<VI_char> %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsr"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSR))]
+  "TARGET_ALTIVEC"
+  "vsr %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsro"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSRO))]
+  "TARGET_ALTIVEC"
+  "vsro %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsum4ubs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUM4UBS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsum4ubs %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vsum4s<VI_char>s"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUM4S))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsum4s<VI_char>s %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vsum2sws"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUM2SWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsum2sws %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vsumsws"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUMSWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsumsws %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vspltb"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (vec_duplicate:V16QI
+	 (vec_select:QI (match_operand:V16QI 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
+  "TARGET_ALTIVEC"
+  "vspltb %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsplth"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(vec_duplicate:V8HI
+	 (vec_select:HI (match_operand:V8HI 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
+  "TARGET_ALTIVEC"
+  "vsplth %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_duplicate:V4SI
+	 (vec_select:SI (match_operand:V4SI 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
+  "TARGET_ALTIVEC"
+  "vspltw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltsf"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(vec_duplicate:V4SF
+	 (vec_select:SF (match_operand:V4SF 1 "register_operand" "v")
+			(parallel
+			 [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vspltw %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltis<VI_char>"
+  [(set (match_operand:VI 0 "register_operand" "=v")
+	(vec_duplicate:VI
+	 (match_operand:QI 1 "s5bit_cint_operand" "i")))]
+  "TARGET_ALTIVEC"
+  "vspltis<VI_char> %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*altivec_vrfiz"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+  	(fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vrfiz %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vperm_<mode>"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vperm_<mode>_uns"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM_UNS))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vrfip"		; ceil
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_FRIP))]
+  "TARGET_ALTIVEC"
+  "vrfip %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vrfin"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_VRFIN))]
+  "TARGET_ALTIVEC"
+  "vrfin %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*altivec_vrfim"		; floor
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_FRIM))]
+  "TARGET_ALTIVEC"
+  "vrfim %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vcfux"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SI 1 "register_operand" "v")
+	              (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCFUX))]
+  "TARGET_ALTIVEC"
+  "vcfux %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vcfsx"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SI 1 "register_operand" "v")
+	              (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCFSX))]
+  "TARGET_ALTIVEC"
+  "vcfsx %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vctuxs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCTUXS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vctuxs %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vctsxs"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:QI 2 "immediate_operand" "i")]
+		     UNSPEC_VCTSXS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vctsxs %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vlogefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_VLOGEFP))]
+  "TARGET_ALTIVEC"
+  "vlogefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vexptefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_VEXPTEFP))]
+  "TARGET_ALTIVEC"
+  "vexptefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*altivec_vrsqrtefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_RSQRT))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vrsqrtefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vrefp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
+		     UNSPEC_FRES))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vrefp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_expand "altivec_copysign_v4sf3"
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))
+   (use (match_operand:V4SF 2 "register_operand" ""))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "
+{
+  rtx mask = gen_reg_rtx (V4SImode);
+  rtvec v = rtvec_alloc (4);
+  unsigned HOST_WIDE_INT mask_val = ((unsigned HOST_WIDE_INT)1) << 31;
+
+  RTVEC_ELT (v, 0) = GEN_INT (mask_val);
+  RTVEC_ELT (v, 1) = GEN_INT (mask_val);
+  RTVEC_ELT (v, 2) = GEN_INT (mask_val);
+  RTVEC_ELT (v, 3) = GEN_INT (mask_val);
+
+  emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v)));
+  emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2],
+				     gen_lowpart (V4SFmode, mask)));
+  DONE;
+}")
+
+(define_insn "altivec_vsldoi_<mode>"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+        (unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:QI 3 "immediate_operand" "i")]
+		  UNSPEC_VLSDOI))]
+  "TARGET_ALTIVEC"
+  "vsldoi %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkhsb"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+  	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+		     UNSPEC_VUPKHSB))]
+  "TARGET_ALTIVEC"
+  "vupkhsb %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkhpx"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+  	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+		     UNSPEC_VUPKHPX))]
+  "TARGET_ALTIVEC"
+  "vupkhpx %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkhsh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+  	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+		     UNSPEC_VUPKHSH))]
+  "TARGET_ALTIVEC"
+  "vupkhsh %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupklsb"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+  	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+		     UNSPEC_VUPKLSB))]
+  "TARGET_ALTIVEC"
+  "vupklsb %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupklpx"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+  	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+		     UNSPEC_VUPKLPX))]
+  "TARGET_ALTIVEC"
+  "vupklpx %0,%1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupklsh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+  	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+		     UNSPEC_VUPKLSH))]
+  "TARGET_ALTIVEC"
+  "vupklsh %0,%1"
+  [(set_attr "type" "vecperm")])
+
+;; Compare vectors producing a vector result and a predicate, setting CR6 to
+;; indicate a combined status
+(define_insn "*altivec_vcmpequ<VI_char>_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
+			   (match_operand:VI 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:VI 0 "register_operand" "=v")
+	(eq:VI (match_dup 1)
+	       (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vcmpequ<VI_char>. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgts<VI_char>_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
+			   (match_operand:VI 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:VI 0 "register_operand" "=v")
+	(gt:VI (match_dup 1)
+	       (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vcmpgts<VI_char>. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgtu<VI_char>_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
+			    (match_operand:VI 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:VI 0 "register_operand" "=v")
+	(gtu:VI (match_dup 1)
+		(match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "vcmpgtu<VI_char>. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpeqfp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v")
+			   (match_operand:V4SF 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+	(eq:V4SF (match_dup 1)
+		 (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpeqfp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgtfp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(gt:CC (match_operand:V4SF 1 "register_operand" "v")
+			   (match_operand:V4SF 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+	(gt:V4SF (match_dup 1)
+		 (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgtfp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*altivec_vcmpgefp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(ge:CC (match_operand:V4SF 1 "register_operand" "v")
+			   (match_operand:V4SF 2 "register_operand" "v"))]
+		   UNSPEC_PREDICATE))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+	(ge:V4SF (match_dup 1)
+		 (match_dup 2)))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+  "vcmpgefp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "altivec_vcmpbfp_p"
+  [(set (reg:CC 74)
+	(unspec:CC [(match_operand:V4SF 1 "register_operand" "v")
+		    (match_operand:V4SF 2 "register_operand" "v")]
+		   UNSPEC_VCMPBFP))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_dup 1)
+                      (match_dup 2)] 
+                      UNSPEC_VCMPBFP))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+  "vcmpbfp. %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "altivec_mtvscr"
+  [(set (reg:SI 110)
+	(unspec_volatile:SI
+	 [(match_operand:V4SI 0 "register_operand" "v")] UNSPECV_MTVSCR))]
+  "TARGET_ALTIVEC"
+  "mtvscr %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_mfvscr"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(unspec_volatile:V8HI [(reg:SI 110)] UNSPECV_MFVSCR))]
+  "TARGET_ALTIVEC"
+  "mfvscr %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dssall"
+  [(unspec_volatile [(const_int 0)] UNSPECV_DSSALL)]
+  "TARGET_ALTIVEC"
+  "dssall"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dss"
+  [(unspec_volatile [(match_operand:QI 0 "immediate_operand" "i")]
+		    UNSPECV_DSS)]
+  "TARGET_ALTIVEC"
+  "dss %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dst"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DST)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dst %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dstt"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTT)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dstt %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dstst"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTST)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dstst %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_dststt"
+  [(unspec [(match_operand 0 "register_operand" "b")
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operand:QI 2 "immediate_operand" "i")] UNSPEC_DSTSTT)]
+  "TARGET_ALTIVEC && GET_MODE (operands[0]) == Pmode"
+  "dststt %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "altivec_lvsl"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand 1 "memory_operand" "Z")] UNSPEC_LVSL))]
+  "TARGET_ALTIVEC"
+  "lvsl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvsr"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand 1 "memory_operand" "Z")] UNSPEC_LVSR))]
+  "TARGET_ALTIVEC"
+  "lvsr %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_expand "build_vector_mask_for_load"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+	(unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_LVSR))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx addr;
+  rtx temp;
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+
+  addr = XEXP (operands[1], 0);
+  temp = gen_reg_rtx (GET_MODE (addr));
+  emit_insn (gen_rtx_SET (VOIDmode, temp, 
+			  gen_rtx_NEG (GET_MODE (addr), addr)));
+  emit_insn (gen_altivec_lvsr (operands[0], 
+			       replace_equiv_address (operands[1], temp)));
+  DONE;
+}")
+
+;; Parallel some of the LVE* and STV*'s with unspecs because some have
+;; identical rtl but different instructions-- and gcc gets confused.
+
+(define_insn "altivec_lve<VI_char>x"
+  [(parallel
+    [(set (match_operand:VI 0 "register_operand" "=v")
+	  (match_operand:VI 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVE)])]
+  "TARGET_ALTIVEC"
+  "lve<VI_char>x %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*altivec_lvesfx"
+  [(parallel
+    [(set (match_operand:V4SF 0 "register_operand" "=v")
+	  (match_operand:V4SF 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVE)])]
+  "TARGET_ALTIVEC"
+  "lvewx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvxl"
+  [(parallel
+    [(set (match_operand:V4SI 0 "register_operand" "=v")
+	  (match_operand:V4SI 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
+  "TARGET_ALTIVEC"
+  "lvxl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVX)])]
+  "TARGET_ALTIVEC"
+  "lvx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_stvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVX)])]
+  "TARGET_ALTIVEC"
+  "stvx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvxl"
+  [(parallel
+    [(set (match_operand:V4SI 0 "memory_operand" "=Z")
+	  (match_operand:V4SI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVXL)])]
+  "TARGET_ALTIVEC"
+  "stvxl %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stve<VI_char>x"
+  [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
+	(unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
+  "TARGET_ALTIVEC"
+  "stve<VI_char>x %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*altivec_stvesfx"
+  [(set (match_operand:SF 0 "memory_operand" "=Z")
+	(unspec:SF [(match_operand:V4SF 1 "register_operand" "v")] UNSPEC_STVE))]
+  "TARGET_ALTIVEC"
+  "stvewx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+;; Generate
+;;    vspltis? SCRATCH0,0
+;;    vsubu?m SCRATCH2,SCRATCH1,%1
+;;    vmaxs? %0,%1,SCRATCH2"
+(define_expand "abs<mode>2"
+  [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
+   (set (match_dup 3)
+        (minus:VI (match_dup 2)
+                  (match_operand:VI 1 "register_operand" "v")))
+   (set (match_operand:VI 0 "register_operand" "=v")
+        (smax:VI (match_dup 1) (match_dup 3)))]
+  "TARGET_ALTIVEC"
+{
+  operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
+  operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+})
+
+;; Generate
+;;    vspltisw SCRATCH1,-1
+;;    vslw SCRATCH2,SCRATCH1,SCRATCH1
+;;    vandc %0,%1,SCRATCH2
+(define_expand "altivec_absv4sf2"
+  [(set (match_dup 2)
+	(vec_duplicate:V4SI (const_int -1)))
+   (set (match_dup 3)
+        (ashift:V4SI (match_dup 2) (match_dup 2)))
+   (set (match_operand:V4SF 0 "register_operand" "=v")
+        (and:V4SF (not:V4SF (subreg:V4SF (match_dup 3) 0))
+                  (match_operand:V4SF 1 "register_operand" "v")))]
+  "TARGET_ALTIVEC"
+{
+  operands[2] = gen_reg_rtx (V4SImode);
+  operands[3] = gen_reg_rtx (V4SImode);
+})
+
+;; Generate
+;;    vspltis? SCRATCH0,0
+;;    vsubs?s SCRATCH2,SCRATCH1,%1
+;;    vmaxs? %0,%1,SCRATCH2"
+(define_expand "altivec_abss_<mode>"
+  [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
+   (parallel [(set (match_dup 3)
+		   (unspec:VI [(match_dup 2)
+			       (match_operand:VI 1 "register_operand" "v")]
+			      UNSPEC_VSUBS))
+              (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))])
+   (set (match_operand:VI 0 "register_operand" "=v")
+        (smax:VI (match_dup 1) (match_dup 3)))]
+  "TARGET_ALTIVEC"
+{
+  operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
+  operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+})
+
+(define_insn "altivec_vsumsws_nomode"
+  [(set (match_operand 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUMSWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+  "TARGET_ALTIVEC"
+  "vsumsws %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_expand "reduc_splus_<mode>"
+  [(set (match_operand:VIshort 0 "register_operand" "=v")
+        (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")]
+			UNSPEC_REDUC_PLUS))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx vtmp1 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+  emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
+  emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
+  DONE;
+}")
+
+(define_expand "reduc_uplus_v16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_REDUC_PLUS))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx vtmp1 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+  emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
+  emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
+  DONE;
+}")
+
+(define_expand "neg<mode>2"
+  [(use (match_operand:VI 0 "register_operand" ""))
+   (use (match_operand:VI 1 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero;
+
+  vzero = gen_reg_rtx (GET_MODE (operands[0]));
+  emit_insn (gen_altivec_vspltis<VI_char> (vzero, const0_rtx));
+  emit_insn (gen_sub<mode>3 (operands[0], vzero, operands[1])); 
+  
+  DONE;
+}")
+
+(define_expand "udot_prod<mode>"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")  
+                                 (match_operand:VIshort 2 "register_operand" "v")] 
+                                UNSPEC_VMSUMU)))]
+  "TARGET_ALTIVEC"
+  "
+{  
+  emit_insn (gen_altivec_vmsumu<VI_char>m (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+   
+(define_expand "sdot_prodv8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                                 (match_operand:V8HI 2 "register_operand" "v")]
+                                UNSPEC_VMSUMSHM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 2 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")]
+                                UNSPEC_VMSUMU)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vones = gen_reg_rtx (GET_MODE (operands[1]));
+
+  emit_insn (gen_altivec_vspltis<VI_char> (vones, const1_rtx));
+  emit_insn (gen_altivec_vmsumu<VI_char>m (operands[0], operands[1], vones, operands[2]));
+  DONE;
+}")
+
+(define_expand "widen_ssumv16qi3"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 2 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")]
+                                UNSPEC_VMSUMM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vones = gen_reg_rtx (V16QImode);
+
+  emit_insn (gen_altivec_vspltisb (vones, const1_rtx));
+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1], vones, operands[2]));
+  DONE;
+}")
+
+(define_expand "widen_ssumv8hi3"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 2 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                                UNSPEC_VMSUMSHM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vones = gen_reg_rtx (V8HImode);
+
+  emit_insn (gen_altivec_vspltish (vones, const1_rtx));
+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], vones, operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+                     UNSPEC_VUPKHSB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                     UNSPEC_VUPKHSH))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+                     UNSPEC_VUPKLSB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                     UNSPEC_VUPKLSH))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn "vperm_v8hiv4si"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                   (match_operand:V4SI 2 "register_operand" "v")
+                   (match_operand:V16QI 3 "register_operand" "v")]
+                  UNSPEC_VPERMSI))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vperm_v16qiv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                   (match_operand:V8HI 2 "register_operand" "v")
+                   (match_operand:V16QI 3 "register_operand" "v")]
+                  UNSPEC_VPERMHI))]
+  "TARGET_ALTIVEC"
+  "vperm %0,%1,%2,%3"
+  [(set_attr "type" "vecperm")])
+
+
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+                     UNSPEC_VUPKHUB))]
+  "TARGET_ALTIVEC"      
+  "
+{  
+  rtx vzero = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+   
+  emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
+   
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                     UNSPEC_VUPKHUH))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ 
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
+                     UNSPEC_VUPKLUB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+
+  emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
+
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
+                     UNSPEC_VUPKLUH))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx vzero = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+
+  emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ 
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
+
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHUB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLUB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHSB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
+                      (match_operand:V16QI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLSB))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx ve = gen_reg_rtx (V8HImode);
+  rtx vo = gen_reg_rtx (V8HImode);
+  
+  emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHUH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLUH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWHSH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMULWLSH))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  
+  emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+  emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+  DONE;
+}")
+
+(define_expand "vec_pack_trunc_v8hi"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+                       (match_operand:V8HI 2 "register_operand" "v")]
+                      UNSPEC_VPKUHUM))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+                                                                                
+(define_expand "vec_pack_trunc_v4si"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VPKUWUM))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "altivec_negv4sf2"
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx neg0;
+
+  /* Generate [-0.0, -0.0, -0.0, -0.0].  */
+  neg0 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_altivec_vspltisw (neg0, constm1_rtx));
+  emit_insn (gen_vashlv4si3 (neg0, neg0, neg0));
+
+  /* XOR */
+  emit_insn (gen_xorv4sf3 (operands[0],
+			   gen_lowpart (V4SFmode, neg0), operands[1])); 
+    
+  DONE;
+}")
+
+;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
+;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
+(define_insn "altivec_lvlx"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] 
+		      UNSPEC_LVLX))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvlx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvlxl"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] 
+		      UNSPEC_LVLXL))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvlxl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvrx"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] 
+		      UNSPEC_LVRX))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvrx %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_lvrxl"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand 1 "memory_operand" "Z")] 
+		      UNSPEC_LVRXL))]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "lvrxl %0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "altivec_stvlx"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVLX)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvlx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvlxl"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVLXL)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvlxl %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvrx"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVRX)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvrx %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "altivec_stvrxl"
+  [(parallel
+    [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+	  (match_operand:V16QI 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVRXL)])]
+  "TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL"
+  "stvrxl %1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_expand "vec_extract_evenv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "")
+                      (match_operand:V4SI 2 "register_operand" "")]
+		      UNSPEC_EXTEVEN_V4SI))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 2);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 3);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 18);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 19);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 26);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 27);
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v4si (operands[0], operands[1], operands[2], mask));
+  
+  DONE;
+}")
+
+(define_expand "vec_extract_evenv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
+                      (match_operand:V4SF 2 "register_operand" "")]
+                      UNSPEC_EXTEVEN_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 2);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 3);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 18);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 19);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 26);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 27);
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v4sf (operands[0], operands[1], operands[2], mask));
+  
+  DONE;
+}")
+
+(define_expand "vec_extract_evenv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "")
+                      (match_operand:V8HI 2 "register_operand" "")]
+                      UNSPEC_EXTEVEN_V8HI))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 4);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 5);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 12);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 13);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 20);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 21);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 28);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 29);
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v8hi (operands[0], operands[1], operands[2], mask));
+  
+  DONE;
+}")
+
+(define_expand "vec_extract_evenv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
+                       (match_operand:V16QI 2 "register_operand" "")]
+                      UNSPEC_EXTEVEN_V16QI))]
+  "TARGET_ALTIVEC"
+  "
+{ 
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+  
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 2);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 4);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 6);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 12);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 14);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 18);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 20);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 22);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 26);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 28);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 30);
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v16qi (operands[0], operands[1], operands[2], mask));
+  
+  DONE;
+}")
+
+(define_expand "vec_extract_oddv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "")
+                      (match_operand:V4SI 2 "register_operand" "")]
+                      UNSPEC_EXTODD_V4SI))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 4);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 5);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 6);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 7);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 12);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 13);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 14);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 15);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 20);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 21);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 22);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 23);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 28);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 29);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 30);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 31);
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v4si (operands[0], operands[1], operands[2], mask));
+  
+  DONE;
+}")
+
+(define_expand "vec_extract_oddv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
+                      (match_operand:V4SF 2 "register_operand" "")]
+                      UNSPEC_EXTODD_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtvec v = rtvec_alloc (16);
+
+  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 4);
+  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 5);
+  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 6);
+  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 7);
+  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 12);
+  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 13);
+  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 14);
+  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 15);
+  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 20);
+  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 21);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 22);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 23);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 28);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 29);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 30);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 31);
+  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v4sf (operands[0], operands[1], operands[2], mask));
+
+  DONE;
+}")
+
+(define_insn "vpkuhum_nomode"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand 1 "register_operand" "v")
+                       (match_operand 2 "register_operand" "v")]
+                      UNSPEC_VPKUHUM))] 
+  "TARGET_ALTIVEC"
+  "vpkuhum %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vpkuwum_nomode"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand 1 "register_operand" "v")
+                      (match_operand 2 "register_operand" "v")]
+                     UNSPEC_VPKUWUM))]
+  "TARGET_ALTIVEC"
+  "vpkuwum %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+(define_expand "vec_extract_oddv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "")
+                      (match_operand:V8HI 2 "register_operand" "")]
+                      UNSPEC_EXTODD_V8HI))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_vpkuwum_nomode (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_extract_oddv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
+                      (match_operand:V16QI 2 "register_operand" "")]
+                      UNSPEC_EXTODD_V16QI))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_vpkuhum_nomode (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_interleave_high<mode>"
+ [(set (match_operand:VI 0 "register_operand" "")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "")
+                    (match_operand:VI 2 "register_operand" "")]
+                     UNSPEC_INTERHI))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vmrgh<VI_char> (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_interleave_low<mode>"
+ [(set (match_operand:VI 0 "register_operand" "")
+        (unspec:VI [(match_operand:VI 1 "register_operand" "")
+                    (match_operand:VI 2 "register_operand" "")]
+                     UNSPEC_INTERLO))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vmrgl<VI_char> (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_float_hi_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKHS_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfsx (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+(define_expand "vec_unpacks_float_lo_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKLS_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfsx (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_float_hi_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKHU_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
+  DONE;
+}")
+
+(define_expand "vec_unpacku_float_lo_v8hi"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+        (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")]
+                     UNSPEC_VUPKLU_V4SF))]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx tmp = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
+  emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
+  DONE;
+}")
diff --git a/gcc/config/rs6000/biarch64.h b/gcc/config/rs6000/biarch64.h
new file mode 100644
index 000000000..29e5b029b
--- /dev/null
+++ b/gcc/config/rs6000/biarch64.h
@@ -0,0 +1,26 @@
+/* Definitions of target machine for GNU compiler, for 32/64 bit powerpc.
+   Copyright (C) 2003, 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify this in a cover file to provide bi-architecture (32/64) support.  */
+#define RS6000_BI_ARCH 1
diff --git a/gcc/config/rs6000/cell.md b/gcc/config/rs6000/cell.md
new file mode 100644
index 000000000..dac9da943
--- /dev/null
+++ b/gcc/config/rs6000/cell.md
@@ -0,0 +1,400 @@
+;; Scheduling description for cell processor.
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009
+;; Free Software Foundation, Inc.
+;; Contributed by Sony Computer Entertainment, Inc.,
+
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: BE BOOK4 (/sfs/enc/doc/PPU_BookIV_DD3.0_latest.pdf)
+
+;; BE Architecture *DD3.0 and DD3.1*
+;; This file simulate PPU processor unit backend of pipeline, maualP24. 
+;; manual P27, stall and flush points
+;; IU, XU, VSU, dispatcher decodes and dispatch 2 insns per cycle in program
+;;  order, the grouped address are aligned by 8
+;; This file only simulate one thread situation
+;; XU executes all fixed point insns(3 units, a simple alu, a complex unit,
+;;   and load/store unit)
+;; VSU executes all scalar floating points insn(a float unit),
+;;   VMX insns(VMX unit, 4 sub units, simple, permute, complex, floating point)
+
+;; Dual issue combination
+
+;;	FXU	LSU	BR 	        VMX	               VMX
+;;                             (sx,cx,vsu_fp,fp_arith)    (perm,vsu_ls,fp_ls)
+;;FXU	X
+;;LSU		X               	X               	X	
+;;BR			X
+;;VMX(sx,cx,vsu_fp,fp_arth)		X
+;;VMX(perm,vsu_ls, fp_ls)					X
+;;    X are illegal combination.
+
+;; Dual issue exceptions:
+;;(1) nop-pipelined FXU instr in slot 0 
+;;(2) non-pipelined FPU inst in slot 0
+;; CSI instr(contex-synchronizing insn)
+;; Microcode insn
+
+;; BRU unit: bru(none register stall), bru_cr(cr register stall)
+;; VSU unit: vus(vmx simple), vup(vmx permute), vuc(vmx complex),
+;;  vuf(vmx float), fpu(floats). fpu_div is hypothetical, it is for
+;;  nonpipelined simulation
+;; micr insns will stall at least 7 cycles to get the first instr from ROM,
+;;  micro instructions are not dual issued. 
+
+;; slot0 is older than slot1
+;; non-pipelined insn need to be in slot1 to avoid 1cycle stall
+
+;; There different stall point
+;; IB2, only stall one thread if stall here, so try to stall here as much as
+;; we can 
+;; condition(1) insert nop, OR and ORI instruction form 
+;; condition(2) flush happens, in case of: RAW, WAW, D-ERAT miss, or
+;;   CR0-access while stdcx, or stwcx
+;; IS2 stall ;; Page91 for details
+;; VQ8 stall
+;; IS2 stall can be activated by VQ8 stall and trying to issue a vsu instr to
+;;  the vsu issue queue
+
+;;(define_automaton "cellxu")
+
+;;(define_cpu_unit "fxu_cell,lsu_cell,bru_cell,vsu1_cell,vsu2_cell" "cellxu")
+
+;; ndfa
+(define_automaton "cellxu,cellvsu,cellbru,cell_mis")
+
+(define_cpu_unit "fxu_cell,lsu_cell" "cellxu")
+(define_cpu_unit "bru_cell" "cellbru")
+(define_cpu_unit "vsu1_cell,vsu2_cell" "cellvsu")
+
+(define_cpu_unit "slot0,slot1" "cell_mis")
+
+(absence_set "slot0" "slot1")
+
+(define_reservation "nonpipeline" "fxu_cell+lsu_cell+vsu1_cell+vsu2_cell")
+(define_reservation "slot01" "slot0|slot1")
+
+
+;; Load/store
+;; lmw, lswi, lswx are only generated for optimize for space, MC,
+;;   these instr are not simulated
+(define_insn_reservation "cell-load" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell")
+
+;; ldux, ldu, lbzux, lbzu, hardware breaks it down to two instrs,
+;;  if with 32bytes alignment, CMC
+(define_insn_reservation "cell-load-ux" 2
+  (and (eq_attr "type" "load_ux,load_u")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell+lsu_cell")
+
+;; lha, lhax, lhau, lhaux, lwa, lwax, lwaux, MC, latency unknown
+;;   11/7, 11/8, 11/12
+(define_insn_reservation "cell-load-ext" 2
+  (and (eq_attr "type" "load_ext,load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "cell")) 
+  "slot01,fxu_cell+lsu_cell")
+
+;;lfs,lfsx,lfd,lfdx, 1 cycle
+(define_insn_reservation "cell-fpload" 1
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+lsu_cell+slot01")
+
+;; lfsu,lfsux,lfdu,lfdux 1cycle(fpr) 2 cycle(gpr)
+(define_insn_reservation "cell-fpload-update" 1
+  (and (eq_attr "type" "fpload,fpload_u,fpload_ux")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+vsu2_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-vecload" 2
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu2_cell+lsu_cell")
+
+;;st? stw(MC)
+(define_insn_reservation "cell-store" 1
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "cell"))
+  "lsu_cell+slot01")
+
+;;stdux, stdu, (hardware breaks into store and add) 2 for update reg
+(define_insn_reservation "cell-store-update" 1
+  (and (eq_attr "type" "store_ux,store_u")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-fpstore" 1
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-fpstore-update" 1
+  (and (eq_attr "type" "fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+fxu_cell+lsu_cell+slot01")
+
+(define_insn_reservation "cell-vecstore" 1
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "cell"))
+  "vsu2_cell+lsu_cell+slot01")
+
+;; Integer latency is 2 cycles
+(define_insn_reservation "cell-integer" 2
+  (and (eq_attr "type" "integer,insert_dword,shift,trap,\
+			var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell")
+
+;; Two integer latency is 4 cycles
+(define_insn_reservation "cell-two" 4
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell,fxu_cell*2")
+
+;; Three integer latency is 6 cycles
+(define_insn_reservation "cell-three" 6
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "cell"))
+  "slot01,fxu_cell,fxu_cell*4")
+
+;; rlwimi, alter cr0  
+(define_insn_reservation "cell-insert" 2
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "cell"))
+ "slot01,fxu_cell")
+
+;; cmpi, cmpli, cmpla, add, addo, sub, subo, alter cr0 
+(define_insn_reservation "cell-cmp" 1
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+slot01")
+
+;; add, addo, sub, subo, alter cr0, rldcli, rlwinm 
+(define_insn_reservation "cell-fast-cmp" 2
+  (and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\
+			    var_delayed_compare")
+            (eq_attr "cpu" "cell"))
+        (eq_attr "cell_micro" "not"))
+  "slot01,fxu_cell")
+
+(define_insn_reservation "cell-cmp-microcoded" 9
+  (and (and (eq_attr "type" "fast_compare,delayed_compare,compare,\
+			    var_delayed_compare")
+            (eq_attr "cpu" "cell"))
+        (eq_attr "cell_micro" "always"))
+  "slot0+slot1,fxu_cell,fxu_cell*7")
+
+;; mulld
+(define_insn_reservation "cell-lmul" 15
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*13")
+
+;; mulld. is microcoded
+(define_insn_reservation "cell-lmul-cmp" 22
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "cell"))
+  "slot0+slot1,nonpipeline,nonpipeline*20")
+
+;; mulli, 6 cycles
+(define_insn_reservation "cell-imul23" 6
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*4")
+
+;; mullw, 9
+(define_insn_reservation "cell-imul" 9
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*7")
+ 
+;; divide
+(define_insn_reservation "cell-idiv" 32
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*30")
+
+(define_insn_reservation "cell-ldiv" 64
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*62")
+
+;;mflr and mfctr are pipelined
+(define_insn_reservation "cell-mfjmpr" 1
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "cell"))
+  "slot01+bru_cell")
+
+;;mtlr and mtctr,
+;;mtspr fully pipelined 
+(define_insn_reservation "cell-mtjmpr" 1
+ (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot01")
+
+;; Branches
+;; b, ba, bl, bla, unconditional branch always predicts correctly n/a latency
+;; bcctr, bcctrl, latency 2, actually adjust by be to 4
+(define_insn_reservation "cell-branch" 1
+  (and (eq_attr "type" "branch")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot1")
+
+(define_insn_reservation "cell-branchreg" 1
+  (and (eq_attr "type" "jmpreg")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot1")
+
+;; cr hazard
+;; page 90, special cases for CR hazard, only one instr can access cr per cycle
+;; if insn reads CR following a stwcx, pipeline stall till stwcx finish
+(define_insn_reservation "cell-crlogical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "cell"))
+  "bru_cell+slot01")
+
+;; mfcrf and mfcr is about 34 cycles and nonpipelined
+(define_insn_reservation "cell-mfcr" 34
+  (and (eq_attr "type" "mfcrf,mfcr")
+       (eq_attr "cpu" "cell"))
+   "slot1,nonpipeline,nonpipeline*32")
+
+;; mtcrf (1 field)
+(define_insn_reservation "cell-mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "cell"))
+  "fxu_cell+slot01")
+
+; Basic FP latency is 10 cycles, thoughput is 1/cycle
+(define_insn_reservation "cell-fp" 10
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*8")
+
+(define_insn_reservation "cell-fpcompare" 1
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "cell"))
+  "vsu1_cell+slot01")
+
+;; sdiv thoughput 1/74, not pipelined but only in the FPU
+(define_insn_reservation "cell-sdiv" 74
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*72")
+
+;; fsqrt thoughput 1/84, not pipelined but only in the FPU
+(define_insn_reservation "cell-sqrt" 84
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "cell"))
+  "slot1,nonpipeline,nonpipeline*82")
+
+; VMX
+(define_insn_reservation "cell-vecsimple" 4
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*2")
+
+;; mult, div, madd
+(define_insn_reservation "cell-veccomplex" 10
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*8")
+
+;; TODO: add support for recording instructions
+(define_insn_reservation "cell-veccmp" 4
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*2")
+
+(define_insn_reservation "cell-vecfloat" 12
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu1_cell,vsu1_cell*10")
+
+(define_insn_reservation "cell-vecperm" 4
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "cell"))
+  "slot01,vsu2_cell,vsu2_cell*2")
+
+;; New for 4.2, syncs
+
+(define_insn_reservation "cell-sync" 11
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+(define_insn_reservation "cell-isync" 11
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+(define_insn_reservation "cell-load_l" 11
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+(define_insn_reservation "cell-store_c" 11
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "cell"))
+  "slot01,lsu_cell,lsu_cell*9")
+
+;; RAW register dependency
+
+;; addi r3, r3, 1
+;; lw r4,offset(r3)
+;; there are 5 cycle deplay for r3 bypassing
+;; there are 5 cycle delay for a dependent load after a load
+(define_bypass 5 "cell-integer" "cell-load")
+(define_bypass 5 "cell-integer" "cell-load-ext")
+(define_bypass 5 "cell-load,cell-load-ext" "cell-load,cell-load-ext")
+
+;; there is a 6 cycle delay after a fp compare until you can use the cr.
+(define_bypass 6 "cell-fpcompare" "cell-branch,cell-branchreg,cell-mfcr,cell-crlogical")
+
+;; VXU float RAW
+(define_bypass 11 "cell-vecfloat" "cell-vecfloat")
+
+;; VXU and FPU
+(define_bypass 6 "cell-veccomplex" "cell-vecsimple")
+;;(define_bypass 6 "cell-veccompare" "cell-branch,cell-branchreg")
+(define_bypass 3 "cell-vecfloat" "cell-veccomplex")
+; this is not correct, 
+;;  this is a stall in general and not dependent on result
+(define_bypass 13 "cell-vecstore" "cell-fpstore")
+; this is not correct, this can never be true, not dependent on result
+(define_bypass 7 "cell-fp" "cell-fpload")
+;; vsu1 should avoid writing to the same target register as vsu2 insn
+;;   within 12 cycles. 
+
+;; WAW hazard
+
+;; the target of VSU estimate should not be reused within 10 dispatch groups
+;; the target of VSU float should not be reused within 8 dispatch groups
+;; the target of VSU complex should not be reused within 5 dispatch groups
+;; FP LOAD should not reuse an FPU Arithmetic target with 6 dispatch gropus
+
+;; mtctr-bcctr/bcctrl, branch target ctr register shadow update at
+;;  ex4 stage(10 cycles)
+(define_bypass 10 "cell-mtjmpr" "cell-branchreg")
+
+;;Things are not simulated:
+;; update instruction, update address gpr are not simulated
+;; vrefp, vrsqrtefp have latency(14), currently simulated as 12 cycle float
+;;  insns
+
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
new file mode 100644
index 000000000..71b3b207e
--- /dev/null
+++ b/gcc/config/rs6000/constraints.md
@@ -0,0 +1,201 @@
+;; Constraint definitions for RS6000
+;; Copyright (C) 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Available constraint letters: "e", "k", "u", "A", "B", "C", "D"
+
+;; Register constraints
+
+(define_register_constraint "f" "rs6000_constraints[RS6000_CONSTRAINT_f]"
+  "@internal")
+
+(define_register_constraint "d" "rs6000_constraints[RS6000_CONSTRAINT_d]"
+  "@internal")
+
+(define_register_constraint "b" "BASE_REGS"
+  "@internal")
+
+(define_register_constraint "h" "SPECIAL_REGS"
+  "@internal")
+
+(define_register_constraint "q" "MQ_REGS"
+  "@internal")
+
+(define_register_constraint "c" "CTR_REGS"
+  "@internal")
+
+(define_register_constraint "l" "LINK_REGS"
+  "@internal")
+
+(define_register_constraint "v" "ALTIVEC_REGS"
+  "@internal")
+
+(define_register_constraint "x" "CR0_REGS"
+  "@internal")
+
+(define_register_constraint "y" "CR_REGS"
+  "@internal")
+
+(define_register_constraint "z" "CA_REGS"
+  "@internal")
+
+;; Use w as a prefix to add VSX modes
+;; vector double (V2DF)
+(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
+  "@internal")
+
+;; vector float (V4SF)
+(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
+  "@internal")
+
+;; scalar double (DF)
+(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
+  "@internal")
+
+;; any VSX register
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
+  "@internal")
+
+;; Altivec style load/store that ignores the bottom bits of the address
+(define_memory_constraint "wZ"
+  "Indexed or indirect memory operand, ignoring the bottom 4 bits"
+  (match_operand 0 "altivec_indexed_or_indirect_operand"))
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A signed 16-bit constant"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) (ival + 0x8000) < 0x10000")))
+
+(define_constraint "J"
+  "high-order 16 bits nonzero"
+  (and (match_code "const_int")
+       (match_test "(ival & (~ (unsigned HOST_WIDE_INT) 0xffff0000)) == 0")))
+
+(define_constraint "K"
+  "low-order 16 bits nonzero"
+  (and (match_code "const_int")
+       (match_test "(ival & (~ (HOST_WIDE_INT) 0xffff)) == 0")))
+
+(define_constraint "L"
+  "signed 16-bit constant shifted left 16 bits"
+  (and (match_code "const_int")
+       (match_test "((ival & 0xffff) == 0
+		      && (ival >> 31 == -1 || ival >> 31 == 0))")))
+
+(define_constraint "M"
+  "constant greater than 31"
+  (and (match_code "const_int")
+       (match_test "ival > 31")))
+
+(define_constraint "N"
+  "positive constant that is an exact power of two"
+  (and (match_code "const_int")
+       (match_test "ival > 0 && exact_log2 (ival) >= 0")))
+
+(define_constraint "O"
+  "constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "P"
+  "constant whose negation is signed 16-bit constant"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ((- ival) + 0x8000) < 0x10000")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "Constant that can be copied into GPR with two insns for DF/DI
+   and one for SF."
+  (and (match_code "const_double")
+       (match_test "num_insns_constant (op, mode)
+		    == (mode == SFmode ? 1 : 2)")))
+
+(define_constraint "H"
+  "DF/DI constant that takes three insns."
+  (and (match_code "const_double")
+       (match_test "num_insns_constant (op, mode) == 3")))
+
+;; Memory constraints
+
+(define_memory_constraint "es"
+  "A ``stable'' memory operand; that is, one which does not include any
+automodification of the base register.  Unlike @samp{m}, this constraint
+can be used in @code{asm} statements that might access the operand
+several times, or that might not access it at all."
+  (and (match_code "mem")
+       (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != RTX_AUTOINC")))
+
+(define_memory_constraint "Q"
+  "Memory operand that is an offset from a register (it is usually better
+to use @samp{m} or @samp{es} in @code{asm} statements)"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == REG")))
+
+(define_memory_constraint "Y"
+  "Indexed or word-aligned displacement memory operand"
+  (match_operand 0 "word_offset_memref_operand"))
+
+(define_memory_constraint "Z"
+  "Memory operand that is an indexed or indirect from a register (it is
+usually better to use @samp{m} or @samp{es} in @code{asm} statements)"
+  (match_operand 0 "indexed_or_indirect_operand"))
+
+;; Address constraints
+
+(define_address_constraint "a"
+  "Indexed or indirect address operand"
+  (match_operand 0 "indexed_or_indirect_address"))
+
+(define_constraint "R"
+  "AIX TOC entry"
+  (match_test "legitimate_constant_pool_address_p (op, QImode, false)"))
+
+;; General constraints
+
+(define_constraint "S"
+  "Constant that can be placed into a 64-bit mask operand"
+  (match_operand 0 "mask64_operand"))
+
+(define_constraint "T"
+  "Constant that can be placed into a 32-bit mask operand"
+  (match_operand 0 "mask_operand"))
+
+(define_constraint "U"
+  "V.4 small data reference"
+  (and (match_test "DEFAULT_ABI == ABI_V4")
+       (match_operand 0 "small_data_operand")))
+
+(define_constraint "t"
+  "AND masks that can be performed by two rldic{l,r} insns
+   (but excluding those that could match other constraints of anddi3)"
+  (and (and (and (match_operand 0 "mask64_2_operand")
+		 (match_test "(fixed_regs[CR0_REGNO]
+			      || !logical_operand (op, DImode))"))
+	    (not (match_operand 0 "mask_operand")))
+       (not (match_operand 0 "mask64_operand"))))
+
+(define_constraint "W"
+  "vector constant that does not require memory"
+  (match_operand 0 "easy_vector_constant"))
+
+(define_constraint "j"
+  "Zero vector constant"
+  (match_test "(op == const0_rtx || op == CONST0_RTX (GET_MODE (op)))"))
diff --git a/gcc/config/rs6000/crtresfpr.asm b/gcc/config/rs6000/crtresfpr.asm
new file mode 100644
index 000000000..9fb228cf4
--- /dev/null
+++ b/gcc/config/rs6000/crtresfpr.asm
@@ -0,0 +1,81 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ *   64-bit support written by David Edelsohn
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+/* On PowerPC64 Linux, these functions are provided by the linker.  */
+#ifndef __powerpc64__
+
+/* Routines for restoring floating point registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the floating point save area.  */
+
+CFI_STARTPROC
+HIDDEN_FUNC(_restfpr_14)	lfd	14,-144(11)	/* restore fp registers */
+HIDDEN_FUNC(_restfpr_15)	lfd	15,-136(11)
+HIDDEN_FUNC(_restfpr_16)	lfd	16,-128(11)
+HIDDEN_FUNC(_restfpr_17)	lfd	17,-120(11)
+HIDDEN_FUNC(_restfpr_18)	lfd	18,-112(11)
+HIDDEN_FUNC(_restfpr_19)	lfd	19,-104(11)
+HIDDEN_FUNC(_restfpr_20)	lfd	20,-96(11)
+HIDDEN_FUNC(_restfpr_21)	lfd	21,-88(11)
+HIDDEN_FUNC(_restfpr_22)	lfd	22,-80(11)
+HIDDEN_FUNC(_restfpr_23)	lfd	23,-72(11)
+HIDDEN_FUNC(_restfpr_24)	lfd	24,-64(11)
+HIDDEN_FUNC(_restfpr_25)	lfd	25,-56(11)
+HIDDEN_FUNC(_restfpr_26)	lfd	26,-48(11)
+HIDDEN_FUNC(_restfpr_27)	lfd	27,-40(11)
+HIDDEN_FUNC(_restfpr_28)	lfd	28,-32(11)
+HIDDEN_FUNC(_restfpr_29)	lfd	29,-24(11)
+HIDDEN_FUNC(_restfpr_30)	lfd	30,-16(11)
+HIDDEN_FUNC(_restfpr_31)	lfd	31,-8(11)
+			blr
+FUNC_END(_restfpr_31)
+FUNC_END(_restfpr_30)
+FUNC_END(_restfpr_29)
+FUNC_END(_restfpr_28)
+FUNC_END(_restfpr_27)
+FUNC_END(_restfpr_26)
+FUNC_END(_restfpr_25)
+FUNC_END(_restfpr_24)
+FUNC_END(_restfpr_23)
+FUNC_END(_restfpr_22)
+FUNC_END(_restfpr_21)
+FUNC_END(_restfpr_20)
+FUNC_END(_restfpr_19)
+FUNC_END(_restfpr_18)
+FUNC_END(_restfpr_17)
+FUNC_END(_restfpr_16)
+FUNC_END(_restfpr_15)
+FUNC_END(_restfpr_14)
+CFI_ENDPROC
+
+#endif
diff --git a/gcc/config/rs6000/crtresgpr.asm b/gcc/config/rs6000/crtresgpr.asm
new file mode 100644
index 000000000..9f9cec9f9
--- /dev/null
+++ b/gcc/config/rs6000/crtresgpr.asm
@@ -0,0 +1,81 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ *   64-bit support written by David Edelsohn
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+/* On PowerPC64 Linux, these functions are provided by the linker.  */
+#ifndef __powerpc64__
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+
+CFI_STARTPROC
+HIDDEN_FUNC(_restgpr_14)	lwz	14,-72(11)	/* restore gp registers */
+HIDDEN_FUNC(_restgpr_15)	lwz	15,-68(11)
+HIDDEN_FUNC(_restgpr_16)	lwz	16,-64(11)
+HIDDEN_FUNC(_restgpr_17)	lwz	17,-60(11)
+HIDDEN_FUNC(_restgpr_18)	lwz	18,-56(11)
+HIDDEN_FUNC(_restgpr_19)	lwz	19,-52(11)
+HIDDEN_FUNC(_restgpr_20)	lwz	20,-48(11)
+HIDDEN_FUNC(_restgpr_21)	lwz	21,-44(11)
+HIDDEN_FUNC(_restgpr_22)	lwz	22,-40(11)
+HIDDEN_FUNC(_restgpr_23)	lwz	23,-36(11)
+HIDDEN_FUNC(_restgpr_24)	lwz	24,-32(11)
+HIDDEN_FUNC(_restgpr_25)	lwz	25,-28(11)
+HIDDEN_FUNC(_restgpr_26)	lwz	26,-24(11)
+HIDDEN_FUNC(_restgpr_27)	lwz	27,-20(11)
+HIDDEN_FUNC(_restgpr_28)	lwz	28,-16(11)
+HIDDEN_FUNC(_restgpr_29)	lwz	29,-12(11)
+HIDDEN_FUNC(_restgpr_30)	lwz	30,-8(11)
+HIDDEN_FUNC(_restgpr_31)	lwz	31,-4(11)
+			blr
+FUNC_END(_restgpr_31)
+FUNC_END(_restgpr_30)
+FUNC_END(_restgpr_29)
+FUNC_END(_restgpr_28)
+FUNC_END(_restgpr_27)
+FUNC_END(_restgpr_26)
+FUNC_END(_restgpr_25)
+FUNC_END(_restgpr_24)
+FUNC_END(_restgpr_23)
+FUNC_END(_restgpr_22)
+FUNC_END(_restgpr_21)
+FUNC_END(_restgpr_20)
+FUNC_END(_restgpr_19)
+FUNC_END(_restgpr_18)
+FUNC_END(_restgpr_17)
+FUNC_END(_restgpr_16)
+FUNC_END(_restgpr_15)
+FUNC_END(_restgpr_14)
+CFI_ENDPROC
+
+#endif
diff --git a/gcc/config/rs6000/crtresxfpr.asm b/gcc/config/rs6000/crtresxfpr.asm
new file mode 100644
index 000000000..633f2db61
--- /dev/null
+++ b/gcc/config/rs6000/crtresxfpr.asm
@@ -0,0 +1,126 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ *   64-bit support written by David Edelsohn
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+/* On PowerPC64 Linux, these functions are provided by the linker.  */
+#ifndef __powerpc64__
+
+/* Routines for restoring floating point registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the floating point save area.  */
+/* In addition to restoring the fp registers, it will return to the caller's */
+/* caller */
+
+CFI_STARTPROC
+CFI_DEF_CFA_REGISTER (11)
+CFI_OFFSET (65, 4)
+CFI_OFFSET (46, -144)
+CFI_OFFSET (47, -136)
+CFI_OFFSET (48, -128)
+CFI_OFFSET (49, -120)
+CFI_OFFSET (50, -112)
+CFI_OFFSET (51, -104)
+CFI_OFFSET (52, -96)
+CFI_OFFSET (53, -88)
+CFI_OFFSET (54, -80)
+CFI_OFFSET (55, -72)
+CFI_OFFSET (56, -64)
+CFI_OFFSET (57, -56)
+CFI_OFFSET (58, -48)
+CFI_OFFSET (59, -40)
+CFI_OFFSET (60, -32)
+CFI_OFFSET (61, -24)
+CFI_OFFSET (62, -16)
+CFI_OFFSET (63, -8)
+HIDDEN_FUNC(_restfpr_14_x)	lfd	14,-144(11)	/* restore fp registers */
+CFI_RESTORE (46)
+HIDDEN_FUNC(_restfpr_15_x)	lfd	15,-136(11)
+CFI_RESTORE (47)
+HIDDEN_FUNC(_restfpr_16_x)	lfd	16,-128(11)
+CFI_RESTORE (48)
+HIDDEN_FUNC(_restfpr_17_x)	lfd	17,-120(11)
+CFI_RESTORE (49)
+HIDDEN_FUNC(_restfpr_18_x)	lfd	18,-112(11)
+CFI_RESTORE (50)
+HIDDEN_FUNC(_restfpr_19_x)	lfd	19,-104(11)
+CFI_RESTORE (51)
+HIDDEN_FUNC(_restfpr_20_x)	lfd	20,-96(11)
+CFI_RESTORE (52)
+HIDDEN_FUNC(_restfpr_21_x)	lfd	21,-88(11)
+CFI_RESTORE (53)
+HIDDEN_FUNC(_restfpr_22_x)	lfd	22,-80(11)
+CFI_RESTORE (54)
+HIDDEN_FUNC(_restfpr_23_x)	lfd	23,-72(11)
+CFI_RESTORE (55)
+HIDDEN_FUNC(_restfpr_24_x)	lfd	24,-64(11)
+CFI_RESTORE (56)
+HIDDEN_FUNC(_restfpr_25_x)	lfd	25,-56(11)
+CFI_RESTORE (57)
+HIDDEN_FUNC(_restfpr_26_x)	lfd	26,-48(11)
+CFI_RESTORE (58)
+HIDDEN_FUNC(_restfpr_27_x)	lfd	27,-40(11)
+CFI_RESTORE (59)
+HIDDEN_FUNC(_restfpr_28_x)	lfd	28,-32(11)
+CFI_RESTORE (60)
+HIDDEN_FUNC(_restfpr_29_x)	lfd	29,-24(11)
+CFI_RESTORE (61)
+HIDDEN_FUNC(_restfpr_30_x)	lfd	30,-16(11)
+CFI_RESTORE (62)
+HIDDEN_FUNC(_restfpr_31_x)	lwz	0,4(11)
+				lfd	31,-8(11)
+CFI_RESTORE (63)
+				mtlr	0
+CFI_RESTORE (65)
+				mr	1,11
+CFI_DEF_CFA_REGISTER (1)
+				blr
+FUNC_END(_restfpr_31_x)
+FUNC_END(_restfpr_30_x)
+FUNC_END(_restfpr_29_x)
+FUNC_END(_restfpr_28_x)
+FUNC_END(_restfpr_27_x)
+FUNC_END(_restfpr_26_x)
+FUNC_END(_restfpr_25_x)
+FUNC_END(_restfpr_24_x)
+FUNC_END(_restfpr_23_x)
+FUNC_END(_restfpr_22_x)
+FUNC_END(_restfpr_21_x)
+FUNC_END(_restfpr_20_x)
+FUNC_END(_restfpr_19_x)
+FUNC_END(_restfpr_18_x)
+FUNC_END(_restfpr_17_x)
+FUNC_END(_restfpr_16_x)
+FUNC_END(_restfpr_15_x)
+FUNC_END(_restfpr_14_x)
+CFI_ENDPROC
+
+#endif
diff --git a/gcc/config/rs6000/crtresxgpr.asm b/gcc/config/rs6000/crtresxgpr.asm
new file mode 100644
index 000000000..451b2b69d
--- /dev/null
+++ b/gcc/config/rs6000/crtresxgpr.asm
@@ -0,0 +1,124 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ *   64-bit support written by David Edelsohn
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+/* On PowerPC64 Linux, these functions are provided by the linker.  */
+#ifndef __powerpc64__
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+
+CFI_STARTPROC
+CFI_DEF_CFA_REGISTER (11)
+CFI_OFFSET (65, 4)
+CFI_OFFSET (14, -72)
+CFI_OFFSET (15, -68)
+CFI_OFFSET (16, -64)
+CFI_OFFSET (17, -60)
+CFI_OFFSET (18, -56)
+CFI_OFFSET (19, -52)
+CFI_OFFSET (20, -48)
+CFI_OFFSET (21, -44)
+CFI_OFFSET (22, -40)
+CFI_OFFSET (23, -36)
+CFI_OFFSET (24, -32)
+CFI_OFFSET (25, -28)
+CFI_OFFSET (26, -24)
+CFI_OFFSET (27, -20)
+CFI_OFFSET (28, -16)
+CFI_OFFSET (29, -12)
+CFI_OFFSET (30, -8)
+CFI_OFFSET (31, -4)
+HIDDEN_FUNC(_restgpr_14_x)	lwz	14,-72(11)	/* restore gp registers */
+CFI_RESTORE (14)
+HIDDEN_FUNC(_restgpr_15_x)	lwz	15,-68(11)
+CFI_RESTORE (15)
+HIDDEN_FUNC(_restgpr_16_x)	lwz	16,-64(11)
+CFI_RESTORE (16)
+HIDDEN_FUNC(_restgpr_17_x)	lwz	17,-60(11)
+CFI_RESTORE (17)
+HIDDEN_FUNC(_restgpr_18_x)	lwz	18,-56(11)
+CFI_RESTORE (18)
+HIDDEN_FUNC(_restgpr_19_x)	lwz	19,-52(11)
+CFI_RESTORE (19)
+HIDDEN_FUNC(_restgpr_20_x)	lwz	20,-48(11)
+CFI_RESTORE (20)
+HIDDEN_FUNC(_restgpr_21_x)	lwz	21,-44(11)
+CFI_RESTORE (21)
+HIDDEN_FUNC(_restgpr_22_x)	lwz	22,-40(11)
+CFI_RESTORE (22)
+HIDDEN_FUNC(_restgpr_23_x)	lwz	23,-36(11)
+CFI_RESTORE (23)
+HIDDEN_FUNC(_restgpr_24_x)	lwz	24,-32(11)
+CFI_RESTORE (24)
+HIDDEN_FUNC(_restgpr_25_x)	lwz	25,-28(11)
+CFI_RESTORE (25)
+HIDDEN_FUNC(_restgpr_26_x)	lwz	26,-24(11)
+CFI_RESTORE (26)
+HIDDEN_FUNC(_restgpr_27_x)	lwz	27,-20(11)
+CFI_RESTORE (27)
+HIDDEN_FUNC(_restgpr_28_x)	lwz	28,-16(11)
+CFI_RESTORE (28)
+HIDDEN_FUNC(_restgpr_29_x)	lwz	29,-12(11)
+CFI_RESTORE (29)
+HIDDEN_FUNC(_restgpr_30_x)	lwz	30,-8(11)
+CFI_RESTORE (30)
+HIDDEN_FUNC(_restgpr_31_x)	lwz	0,4(11)
+				lwz	31,-4(11)
+CFI_RESTORE (31)
+				mtlr	0
+CFI_RESTORE (65)
+				mr	1,11
+CFI_DEF_CFA_REGISTER (1)
+				blr
+FUNC_END(_restgpr_31_x)
+FUNC_END(_restgpr_30_x)
+FUNC_END(_restgpr_29_x)
+FUNC_END(_restgpr_28_x)
+FUNC_END(_restgpr_27_x)
+FUNC_END(_restgpr_26_x)
+FUNC_END(_restgpr_25_x)
+FUNC_END(_restgpr_24_x)
+FUNC_END(_restgpr_23_x)
+FUNC_END(_restgpr_22_x)
+FUNC_END(_restgpr_21_x)
+FUNC_END(_restgpr_20_x)
+FUNC_END(_restgpr_19_x)
+FUNC_END(_restgpr_18_x)
+FUNC_END(_restgpr_17_x)
+FUNC_END(_restgpr_16_x)
+FUNC_END(_restgpr_15_x)
+FUNC_END(_restgpr_14_x)
+CFI_ENDPROC
+
+#endif
diff --git a/gcc/config/rs6000/crtsavfpr.asm b/gcc/config/rs6000/crtsavfpr.asm
new file mode 100644
index 000000000..3cdb25033
--- /dev/null
+++ b/gcc/config/rs6000/crtsavfpr.asm
@@ -0,0 +1,81 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ *   64-bit support written by David Edelsohn
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+/* On PowerPC64 Linux, these functions are provided by the linker.  */
+#ifndef __powerpc64__
+
+/* Routines for saving floating point registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the floating point save area.  */
+
+CFI_STARTPROC
+HIDDEN_FUNC(_savefpr_14)	stfd	14,-144(11)	/* save fp registers */
+HIDDEN_FUNC(_savefpr_15)	stfd	15,-136(11)
+HIDDEN_FUNC(_savefpr_16)	stfd	16,-128(11)
+HIDDEN_FUNC(_savefpr_17)	stfd	17,-120(11)
+HIDDEN_FUNC(_savefpr_18)	stfd	18,-112(11)
+HIDDEN_FUNC(_savefpr_19)	stfd	19,-104(11)
+HIDDEN_FUNC(_savefpr_20)	stfd	20,-96(11)
+HIDDEN_FUNC(_savefpr_21)	stfd	21,-88(11)
+HIDDEN_FUNC(_savefpr_22)	stfd	22,-80(11)
+HIDDEN_FUNC(_savefpr_23)	stfd	23,-72(11)
+HIDDEN_FUNC(_savefpr_24)	stfd	24,-64(11)
+HIDDEN_FUNC(_savefpr_25)	stfd	25,-56(11)
+HIDDEN_FUNC(_savefpr_26)	stfd	26,-48(11)
+HIDDEN_FUNC(_savefpr_27)	stfd	27,-40(11)
+HIDDEN_FUNC(_savefpr_28)	stfd	28,-32(11)
+HIDDEN_FUNC(_savefpr_29)	stfd	29,-24(11)
+HIDDEN_FUNC(_savefpr_30)	stfd	30,-16(11)
+HIDDEN_FUNC(_savefpr_31)	stfd	31,-8(11)
+			blr
+FUNC_END(_savefpr_31)
+FUNC_END(_savefpr_30)
+FUNC_END(_savefpr_29)
+FUNC_END(_savefpr_28)
+FUNC_END(_savefpr_27)
+FUNC_END(_savefpr_26)
+FUNC_END(_savefpr_25)
+FUNC_END(_savefpr_24)
+FUNC_END(_savefpr_23)
+FUNC_END(_savefpr_22)
+FUNC_END(_savefpr_21)
+FUNC_END(_savefpr_20)
+FUNC_END(_savefpr_19)
+FUNC_END(_savefpr_18)
+FUNC_END(_savefpr_17)
+FUNC_END(_savefpr_16)
+FUNC_END(_savefpr_15)
+FUNC_END(_savefpr_14)
+CFI_ENDPROC
+
+#endif
diff --git a/gcc/config/rs6000/crtsavgpr.asm b/gcc/config/rs6000/crtsavgpr.asm
new file mode 100644
index 000000000..6d473963b
--- /dev/null
+++ b/gcc/config/rs6000/crtsavgpr.asm
@@ -0,0 +1,81 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ *   64-bit support written by David Edelsohn
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+/* On PowerPC64 Linux, these functions are provided by the linker.  */
+#ifndef __powerpc64__
+
+/* Routines for saving integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer save area.  */
+
+CFI_STARTPROC
+HIDDEN_FUNC(_savegpr_14)	stw	14,-72(11)	/* save gp registers */
+HIDDEN_FUNC(_savegpr_15)	stw	15,-68(11)
+HIDDEN_FUNC(_savegpr_16)	stw	16,-64(11)
+HIDDEN_FUNC(_savegpr_17)	stw	17,-60(11)
+HIDDEN_FUNC(_savegpr_18)	stw	18,-56(11)
+HIDDEN_FUNC(_savegpr_19)	stw	19,-52(11)
+HIDDEN_FUNC(_savegpr_20)	stw	20,-48(11)
+HIDDEN_FUNC(_savegpr_21)	stw	21,-44(11)
+HIDDEN_FUNC(_savegpr_22)	stw	22,-40(11)
+HIDDEN_FUNC(_savegpr_23)	stw	23,-36(11)
+HIDDEN_FUNC(_savegpr_24)	stw	24,-32(11)
+HIDDEN_FUNC(_savegpr_25)	stw	25,-28(11)
+HIDDEN_FUNC(_savegpr_26)	stw	26,-24(11)
+HIDDEN_FUNC(_savegpr_27)	stw	27,-20(11)
+HIDDEN_FUNC(_savegpr_28)	stw	28,-16(11)
+HIDDEN_FUNC(_savegpr_29)	stw	29,-12(11)
+HIDDEN_FUNC(_savegpr_30)	stw	30,-8(11)
+HIDDEN_FUNC(_savegpr_31)	stw	31,-4(11)
+			blr
+FUNC_END(_savegpr_31)
+FUNC_END(_savegpr_30)
+FUNC_END(_savegpr_29)
+FUNC_END(_savegpr_28)
+FUNC_END(_savegpr_27)
+FUNC_END(_savegpr_26)
+FUNC_END(_savegpr_25)
+FUNC_END(_savegpr_24)
+FUNC_END(_savegpr_23)
+FUNC_END(_savegpr_22)
+FUNC_END(_savegpr_21)
+FUNC_END(_savegpr_20)
+FUNC_END(_savegpr_19)
+FUNC_END(_savegpr_18)
+FUNC_END(_savegpr_17)
+FUNC_END(_savegpr_16)
+FUNC_END(_savegpr_15)
+FUNC_END(_savegpr_14)
+CFI_ENDPROC
+
+#endif
diff --git a/gcc/config/rs6000/darwin-asm.h b/gcc/config/rs6000/darwin-asm.h
new file mode 100644
index 000000000..837b7a33e
--- /dev/null
+++ b/gcc/config/rs6000/darwin-asm.h
@@ -0,0 +1,51 @@
+/*  Macro definitions to used to support 32/64-bit code in Darwin's
+ *  assembly files.
+ *
+ *   Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* These are donated from /usr/include/architecture/ppc . */
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define cmpg    MODE_CHOICE(cmpw, cmpd)
+#define lg      MODE_CHOICE(lwz, ld)
+#define stg     MODE_CHOICE(stw, std)
+#define lgx     MODE_CHOICE(lwzx, ldx)
+#define stgx    MODE_CHOICE(stwx, stdx)
+#define lgu     MODE_CHOICE(lwzu, ldu)
+#define stgu    MODE_CHOICE(stwu, stdu)
+#define lgux    MODE_CHOICE(lwzux, ldux)
+#define stgux   MODE_CHOICE(stwux, stdux)
+#define lgwa    MODE_CHOICE(lwz, lwa)
+
+#define g_long  MODE_CHOICE(long, quad)         /* usage is ".g_long" */
+
+#define GPR_BYTES       MODE_CHOICE(4,8)        /* size of a GPR in bytes */
+#define LOG2_GPR_BYTES  MODE_CHOICE(2,3)        /* log2(GPR_BYTES) */
+
+#define SAVED_LR_OFFSET MODE_CHOICE(8,16)	/* position of saved
+						   LR in frame */
diff --git a/gcc/config/rs6000/darwin-fallback.c b/gcc/config/rs6000/darwin-fallback.c
new file mode 100644
index 000000000..4591071ea
--- /dev/null
+++ b/gcc/config/rs6000/darwin-fallback.c
@@ -0,0 +1,487 @@
+/* Fallback frame-state unwinder for Darwin.
+   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __ppc__
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "dwarf2.h"
+#include "unwind.h"
+#include "unwind-dw2.h"
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#define R_LR		65
+#define R_CTR		66
+#define R_CR2		70
+#define R_XER		76
+#define R_VR0		77
+#define R_VRSAVE	109
+#define R_VSCR		110
+#define R_SPEFSCR	112
+
+typedef unsigned long reg_unit;
+
+/* Place in GPRS the parameters to the first 'sc' instruction that would
+   have been executed if we were returning from this CONTEXT, or
+   return false if an unexpected instruction is encountered.  */
+
+static bool
+interpret_libc (reg_unit gprs[32], struct _Unwind_Context *context)
+{
+  uint32_t *pc = (uint32_t *)_Unwind_GetIP (context);
+  uint32_t cr;
+  reg_unit lr = (reg_unit) pc;
+  reg_unit ctr = 0;
+  uint32_t *invalid_address = NULL;
+
+  int i;
+
+  for (i = 0; i < 13; i++)
+    gprs[i] = 1;
+  gprs[1] = _Unwind_GetCFA (context);
+  for (; i < 32; i++)
+    gprs[i] = _Unwind_GetGR (context, i);
+  cr = _Unwind_GetGR (context, R_CR2);
+
+  /* For each supported Libc, we have to track the code flow
+     all the way back into the kernel.
+  
+     This code is believed to support all released Libc/Libsystem builds since
+     Jaguar 6C115, including all the security updates.  To be precise,
+
+     Libc	Libsystem	Build(s)
+     262~1	60~37		6C115
+     262~1	60.2~4		6D52
+     262~1	61~3		6F21-6F22
+     262~1	63~24		6G30-6G37
+     262~1	63~32		6I34-6I35
+     262~1	63~64		6L29-6L60
+     262.4.1~1	63~84		6L123-6R172
+     
+     320~1	71~101		7B85-7D28
+     320~1	71~266		7F54-7F56
+     320~1	71~288		7F112
+     320~1	71~289		7F113
+     320.1.3~1	71.1.1~29	7H60-7H105
+     320.1.3~1	71.1.1~30	7H110-7H113
+     320.1.3~1	71.1.1~31	7H114
+     
+     That's a big table!  It would be insane to try to keep track of
+     every little detail, so we just read the code itself and do what
+     it would do.
+  */
+
+  for (;;)
+    {
+      uint32_t ins = *pc++;
+      
+      if ((ins & 0xFC000003) == 0x48000000)  /* b instruction */
+	{
+	  pc += ((((int32_t) ins & 0x3FFFFFC) ^ 0x2000000) - 0x2000004) / 4;
+	  continue;
+	}
+      if ((ins & 0xFC600000) == 0x2C000000)  /* cmpwi */
+	{
+	  int32_t val1 = (int16_t) ins;
+	  int32_t val2 = gprs[ins >> 16 & 0x1F];
+	  /* Only beq and bne instructions are supported, so we only
+	     need to set the EQ bit.  */
+	  uint32_t mask = 0xF << ((ins >> 21 & 0x1C) ^ 0x1C);
+	  if (val1 == val2)
+	    cr |= mask;
+	  else
+	    cr &= ~mask;
+	  continue;
+	}
+      if ((ins & 0xFEC38003) == 0x40820000)  /* forwards beq/bne */
+	{
+	  if ((cr >> ((ins >> 16 & 0x1F) ^ 0x1F) & 1) == (ins >> 24 & 1))
+	    pc += (ins & 0x7FFC) / 4 - 1;
+	  continue;
+	}
+      if ((ins & 0xFC0007FF) == 0x7C000378) /* or, including mr */
+	{
+	  gprs [ins >> 16 & 0x1F] = (gprs [ins >> 11 & 0x1F] 
+				     | gprs [ins >> 21 & 0x1F]);
+	  continue;
+	}
+      if (ins >> 26 == 0x0E)  /* addi, including li */
+	{
+	  reg_unit src = (ins >> 16 & 0x1F) == 0 ? 0 : gprs [ins >> 16 & 0x1F];
+	  gprs [ins >> 21 & 0x1F] = src + (int16_t) ins;
+	  continue;
+	}
+      if (ins >> 26 == 0x0F)  /* addis, including lis */
+	{
+	  reg_unit src = (ins >> 16 & 0x1F) == 0 ? 0 : gprs [ins >> 16 & 0x1F];
+	  gprs [ins >> 21 & 0x1F] = src + ((int16_t) ins << 16);
+	  continue;
+	}
+      if (ins >> 26 == 0x20)  /* lwz */
+	{
+	  reg_unit src = (ins >> 16 & 0x1F) == 0 ? 0 : gprs [ins >> 16 & 0x1F];
+	  uint32_t *p = (uint32_t *)(src + (int16_t) ins);
+	  if (p == invalid_address)
+	    return false;
+	  gprs [ins >> 21 & 0x1F] = *p;
+	  continue;
+	}
+      if (ins >> 26 == 0x21)  /* lwzu */
+	{
+	  uint32_t *p = (uint32_t *)(gprs [ins >> 16 & 0x1F] += (int16_t) ins);
+	  if (p == invalid_address)
+	    return false;
+	  gprs [ins >> 21 & 0x1F] = *p;
+	  continue;
+	}
+      if (ins >> 26 == 0x24)  /* stw */
+	/* What we hope this is doing is '--in_sigtramp'.  We don't want
+	   to actually store to memory, so just make a note of the
+	   address and refuse to load from it.  */
+	{
+	  reg_unit src = (ins >> 16 & 0x1F) == 0 ? 0 : gprs [ins >> 16 & 0x1F];
+	  uint32_t *p = (uint32_t *)(src + (int16_t) ins);
+	  if (p == NULL || invalid_address != NULL)
+	    return false;
+	  invalid_address = p;
+	  continue;
+	}
+      if (ins >> 26 == 0x2E) /* lmw */
+	{
+	  reg_unit src = (ins >> 16 & 0x1F) == 0 ? 0 : gprs [ins >> 16 & 0x1F];
+	  uint32_t *p = (uint32_t *)(src + (int16_t) ins);
+	  int i;
+
+	  for (i = (ins >> 21 & 0x1F); i < 32; i++)
+	    {
+	      if (p == invalid_address)
+		return false;
+	      gprs[i] = *p++;
+	    }
+	  continue;
+	}
+      if ((ins & 0xFC1FFFFF) == 0x7c0803a6)  /* mtlr */
+	{
+	  lr = gprs [ins >> 21 & 0x1F];
+	  continue;
+	}
+      if ((ins & 0xFC1FFFFF) == 0x7c0802a6)  /* mflr */
+	{
+	  gprs [ins >> 21 & 0x1F] = lr;
+	  continue;
+	}
+      if ((ins & 0xFC1FFFFF) == 0x7c0903a6)  /* mtctr */
+	{
+	  ctr = gprs [ins >> 21 & 0x1F];
+	  continue;
+	}
+      /* The PowerPC User's Manual says that bit 11 of the mtcrf
+	 instruction is reserved and should be set to zero, but it
+	 looks like the Darwin assembler doesn't do that... */
+      if ((ins & 0xFC000FFF) == 0x7c000120) /* mtcrf */
+	{
+	  int i;
+	  uint32_t mask = 0;
+	  for (i = 0; i < 8; i++)
+	    mask |= ((-(ins >> (12 + i) & 1)) & 0xF) << 4 * i;
+	  cr = (cr & ~mask) | (gprs [ins >> 21 & 0x1F] & mask);
+	  continue;
+	}
+      if (ins == 0x429f0005)  /* bcl- 20,4*cr7+so,.+4, loads pc into LR */
+	{
+	  lr = (reg_unit) pc;
+	  continue;
+	}
+      if (ins == 0x4e800420) /* bctr */
+	{
+	  pc = (uint32_t *) ctr;
+	  continue;
+	}
+      if (ins == 0x44000002) /* sc */
+	return true;
+
+      return false;
+    }
+}
+
+/* We used to include <ucontext.h> and <mach/thread_status.h>,
+   but they change so much between different Darwin system versions
+   that it's much easier to just write the structures involved here
+   directly.  */
+
+/* These defines are from the kernel's bsd/dev/ppc/unix_signal.c.  */
+#define UC_TRAD                 1
+#define UC_TRAD_VEC             6
+#define UC_TRAD64               20
+#define UC_TRAD64_VEC           25
+#define UC_FLAVOR               30
+#define UC_FLAVOR_VEC           35
+#define UC_FLAVOR64             40
+#define UC_FLAVOR64_VEC         45
+#define UC_DUAL                 50
+#define UC_DUAL_VEC             55
+
+struct gcc_ucontext 
+{
+  int onstack;
+  sigset_t sigmask;
+  void * stack_sp;
+  size_t stack_sz;
+  int stack_flags;
+  struct gcc_ucontext *link;
+  size_t mcsize;
+  struct gcc_mcontext32 *mcontext;
+};
+
+struct gcc_float_vector_state 
+{
+  double fpregs[32];
+  uint32_t fpscr_pad;
+  uint32_t fpscr;
+  uint32_t save_vr[32][4];
+  uint32_t save_vscr[4];
+};
+
+struct gcc_mcontext32 {
+  uint32_t dar;
+  uint32_t dsisr;
+  uint32_t exception;
+  uint32_t padding1[5];
+  uint32_t srr0;
+  uint32_t srr1;
+  uint32_t gpr[32];
+  uint32_t cr;
+  uint32_t xer;
+  uint32_t lr;
+  uint32_t ctr;
+  uint32_t mq;
+  uint32_t vrsave;
+  struct gcc_float_vector_state fvs;
+};
+
+/* These are based on /usr/include/ppc/ucontext.h and
+   /usr/include/mach/ppc/thread_status.h, but rewritten to be more
+   convenient, to compile on Jaguar, and to work around Radar 3712064
+   on Panther, which is that the 'es' field of 'struct mcontext64' has
+   the wrong type (doh!).  */
+
+struct gcc_mcontext64 {
+  uint64_t dar;
+  uint32_t dsisr;
+  uint32_t exception;
+  uint32_t padding1[4];
+  uint64_t srr0;
+  uint64_t srr1;
+  uint32_t gpr[32][2];
+  uint32_t cr;
+  uint32_t xer[2];  /* These are arrays because the original structure has them misaligned.  */
+  uint32_t lr[2];
+  uint32_t ctr[2];
+  uint32_t vrsave;
+  struct gcc_float_vector_state fvs;
+};
+
+#define UC_FLAVOR_SIZE \
+  (sizeof (struct gcc_mcontext32) - 33*16)
+
+#define UC_FLAVOR_VEC_SIZE (sizeof (struct gcc_mcontext32))
+
+#define UC_FLAVOR64_SIZE \
+  (sizeof (struct gcc_mcontext64) - 33*16)
+
+#define UC_FLAVOR64_VEC_SIZE (sizeof (struct gcc_mcontext64))
+
+/* Given GPRS as input to a 'sc' instruction, and OLD_CFA, update FS
+   to represent the execution of a signal return; or, if not a signal
+   return, return false.  */
+
+static bool
+handle_syscall (_Unwind_FrameState *fs, const reg_unit gprs[32],
+		_Unwind_Ptr old_cfa)
+{
+  struct gcc_ucontext *uctx;
+  bool is_64, is_vector;
+  struct gcc_float_vector_state * float_vector_state;
+  _Unwind_Ptr new_cfa;
+  int i;
+  static _Unwind_Ptr return_addr;
+  
+  /* Yay!  We're in a Libc that we understand, and it's made a
+     system call.  In Jaguar, this is a direct system call with value 103;
+     in Panther and Tiger it is a SYS_syscall call for system call number 184,
+     and in Leopard it is a direct syscall with number 184.  */
+  
+  if (gprs[0] == 0x67 /* SYS_SIGRETURN */)
+    {
+      uctx = (struct gcc_ucontext *) gprs[3];
+      is_vector = (uctx->mcsize == UC_FLAVOR64_VEC_SIZE
+		   || uctx->mcsize == UC_FLAVOR_VEC_SIZE);
+      is_64 = (uctx->mcsize == UC_FLAVOR64_VEC_SIZE
+	       || uctx->mcsize == UC_FLAVOR64_SIZE);
+    }
+  else if (gprs[0] == 0 /* SYS_syscall */ && gprs[3] == 184)
+    {
+      int ctxstyle = gprs[5];
+      uctx = (struct gcc_ucontext *) gprs[4];
+      is_vector = (ctxstyle == UC_FLAVOR_VEC || ctxstyle == UC_FLAVOR64_VEC
+		   || ctxstyle == UC_TRAD_VEC || ctxstyle == UC_TRAD64_VEC);
+      is_64 = (ctxstyle == UC_FLAVOR64_VEC || ctxstyle == UC_TRAD64_VEC
+	       || ctxstyle == UC_FLAVOR64 || ctxstyle == UC_TRAD64);
+    }
+  else if (gprs[0] == 184 /* SYS_sigreturn */)
+    {
+      int ctxstyle = gprs[4];
+      uctx = (struct gcc_ucontext *) gprs[3];
+      is_vector = (ctxstyle == UC_FLAVOR_VEC || ctxstyle == UC_FLAVOR64_VEC
+		   || ctxstyle == UC_TRAD_VEC || ctxstyle == UC_TRAD64_VEC);
+      is_64 = (ctxstyle == UC_FLAVOR64_VEC || ctxstyle == UC_TRAD64_VEC
+	       || ctxstyle == UC_FLAVOR64 || ctxstyle == UC_TRAD64);
+    }
+  else
+    return false;
+
+#define set_offset(r, addr)					\
+  (fs->regs.reg[r].how = REG_SAVED_OFFSET,			\
+   fs->regs.reg[r].loc.offset = (_Unwind_Ptr)(addr) - new_cfa)
+
+  /* Restore even the registers that are not call-saved, since they
+     might be being used in the prologue to save other registers,
+     for instance GPR0 is sometimes used to save LR.  */
+
+  /* Handle the GPRs, and produce the information needed to do the rest.  */
+  if (is_64)
+    {
+      /* The context is 64-bit, but it doesn't carry any extra information
+	 for us because only the low 32 bits of the registers are
+	 call-saved.  */
+      struct gcc_mcontext64 *m64 = (struct gcc_mcontext64 *)uctx->mcontext;
+      int i;
+
+      float_vector_state = &m64->fvs;
+
+      new_cfa = m64->gpr[1][1];
+      
+      set_offset (R_CR2, &m64->cr);
+      for (i = 0; i < 32; i++)
+	set_offset (i, m64->gpr[i] + 1);
+      set_offset (R_XER, m64->xer + 1);
+      set_offset (R_LR, m64->lr + 1);
+      set_offset (R_CTR, m64->ctr + 1);
+      if (is_vector)
+	set_offset (R_VRSAVE, &m64->vrsave);
+      
+      /* Sometimes, srr0 points to the instruction that caused the exception,
+	 and sometimes to the next instruction to be executed; we want
+	 the latter.  */
+      if (m64->exception == 3 || m64->exception == 4
+	  || m64->exception == 6
+	  || (m64->exception == 7 && !(m64->srr1 & 0x10000)))
+	return_addr = m64->srr0 + 4;
+      else
+	return_addr = m64->srr0;
+    }
+  else
+    {
+      struct gcc_mcontext32 *m = uctx->mcontext;
+      int i;
+
+      float_vector_state = &m->fvs;
+      
+      new_cfa = m->gpr[1];
+
+      set_offset (R_CR2, &m->cr);
+      for (i = 0; i < 32; i++)
+	set_offset (i, m->gpr + i);
+      set_offset (R_XER, &m->xer);
+      set_offset (R_LR, &m->lr);
+      set_offset (R_CTR, &m->ctr);
+
+      if (is_vector)
+	set_offset (R_VRSAVE, &m->vrsave);
+
+      /* Sometimes, srr0 points to the instruction that caused the exception,
+	 and sometimes to the next instruction to be executed; we want
+	 the latter.  */
+      if (m->exception == 3 || m->exception == 4
+	  || m->exception == 6
+	  || (m->exception == 7 && !(m->srr1 & 0x10000)))
+	return_addr = m->srr0 + 4;
+      else
+	return_addr = m->srr0;
+    }
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
+  fs->regs.cfa_offset = new_cfa - old_cfa;;
+  
+  /* The choice of column for the return address is somewhat tricky.
+     Fortunately, the actual choice is private to this file, and
+     the space it's reserved from is the GCC register space, not the
+     DWARF2 numbering.  So any free element of the right size is an OK
+     choice.  Thus: */
+  fs->retaddr_column = ARG_POINTER_REGNUM;
+  /* FIXME: this should really be done using a DWARF2 location expression,
+     not using a static variable.  In fact, this entire file should
+     be implemented in DWARF2 expressions.  */
+  set_offset (ARG_POINTER_REGNUM, &return_addr);
+
+  for (i = 0; i < 32; i++)
+    set_offset (32 + i, float_vector_state->fpregs + i);
+  set_offset (R_SPEFSCR, &float_vector_state->fpscr);
+  
+  if (is_vector)
+    {
+      for (i = 0; i < 32; i++)
+	set_offset (R_VR0 + i, float_vector_state->save_vr + i);
+      set_offset (R_VSCR, float_vector_state->save_vscr);
+    }
+
+  return true;
+}
+
+/* This is also prototyped in rs6000/darwin.h, inside the
+   MD_FALLBACK_FRAME_STATE_FOR macro.  */
+extern bool _Unwind_fallback_frame_state_for (struct _Unwind_Context *context,
+					      _Unwind_FrameState *fs);
+
+/* Implement the MD_FALLBACK_FRAME_STATE_FOR macro,
+   returning true iff the frame was a sigreturn() frame that we
+   can understand.  */
+
+bool
+_Unwind_fallback_frame_state_for (struct _Unwind_Context *context,
+				  _Unwind_FrameState *fs)
+{
+  reg_unit gprs[32];
+
+  if (!interpret_libc (gprs, context))
+    return false;
+  return handle_syscall (fs, gprs, _Unwind_GetCFA (context));
+}
+#endif
diff --git a/gcc/config/rs6000/darwin-fpsave.asm b/gcc/config/rs6000/darwin-fpsave.asm
new file mode 100644
index 000000000..47fdc92f8
--- /dev/null
+++ b/gcc/config/rs6000/darwin-fpsave.asm
@@ -0,0 +1,92 @@
+/*  This file contains the floating-point save and restore routines.
+ *
+ *   Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* THE SAVE AND RESTORE ROUTINES CAN HAVE ONLY ONE GLOBALLY VISIBLE
+   ENTRY POINT - callers have to jump to "saveFP+60" to save f29..f31,
+   for example.  For FP reg saves/restores, it takes one instruction
+   (4 bytes) to do the operation; for Vector regs, 2 instructions are
+   required (8 bytes.)
+
+   MORAL: DO NOT MESS AROUND WITH THESE FUNCTIONS!  */
+
+#include "darwin-asm.h"
+
+.text
+	.align 2
+
+/* saveFP saves R0 -- assumed to be the callers LR -- to 8/16(R1).  */
+
+.private_extern saveFP
+saveFP:
+	stfd f14,-144(r1)
+	stfd f15,-136(r1)
+	stfd f16,-128(r1)
+	stfd f17,-120(r1)
+	stfd f18,-112(r1)
+	stfd f19,-104(r1)
+	stfd f20,-96(r1)
+	stfd f21,-88(r1)
+	stfd f22,-80(r1)
+	stfd f23,-72(r1)
+	stfd f24,-64(r1)
+	stfd f25,-56(r1)
+	stfd f26,-48(r1)
+	stfd f27,-40(r1)
+	stfd f28,-32(r1)
+	stfd f29,-24(r1)
+	stfd f30,-16(r1)
+	stfd f31,-8(r1)
+	stg  r0,SAVED_LR_OFFSET(r1)
+	blr
+
+/* restFP restores the caller`s LR from 8/16(R1).  Note that the code for
+   this starts at the offset of F30 restoration, so calling this
+   routine in an attempt to restore only F31 WILL NOT WORK (it would
+   be a stupid thing to do, anyway.)  */
+
+.private_extern restFP
+restFP:
+	lfd f14,-144(r1)
+	lfd f15,-136(r1)
+	lfd f16,-128(r1)
+	lfd f17,-120(r1)
+	lfd f18,-112(r1)
+	lfd f19,-104(r1)
+	lfd f20,-96(r1)
+	lfd f21,-88(r1)
+	lfd f22,-80(r1)
+	lfd f23,-72(r1)
+	lfd f24,-64(r1)
+	lfd f25,-56(r1)
+	lfd f26,-48(r1)
+	lfd f27,-40(r1)
+	lfd f28,-32(r1)
+	lfd f29,-24(r1)
+			/* <OFFSET OF F30 RESTORE> restore callers LR  */
+	lg r0,SAVED_LR_OFFSET(r1)
+	lfd f30,-16(r1)
+			/* and prepare for return to caller  */
+	mtlr r0	
+	lfd f31,-8(r1)
+	blr
diff --git a/gcc/config/rs6000/darwin-ldouble-format b/gcc/config/rs6000/darwin-ldouble-format
new file mode 100644
index 000000000..3d1a06a14
--- /dev/null
+++ b/gcc/config/rs6000/darwin-ldouble-format
@@ -0,0 +1,91 @@
+Long double format
+==================
+
+  Each long double is made up of two IEEE doubles.  The value of the
+long double is the sum of the values of the two parts (except for
+-0.0).  The most significant part is required to be the value of the
+long double rounded to the nearest double, as specified by IEEE.  For
+Inf values, the least significant part is required to be one of +0.0
+or -0.0.  No other requirements are made; so, for example, 1.0 may be
+represented as (1.0, +0.0) or (1.0, -0.0), and the low part of a NaN
+is don't-care.
+
+Classification
+--------------
+
+A long double can represent any value of the form
+  s * 2^e * sum(k=0...105: f_k * 2^(-k))
+where 's' is +1 or -1, 'e' is between 1022 and -968 inclusive, f_0 is
+1, and f_k for k>0 is 0 or 1.  These are the 'normal' long doubles.
+
+A long double can also represent any value of the form
+  s * 2^-968 * sum(k=0...105: f_k * 2^(-k))
+where 's' is +1 or -1, f_0 is 0, and f_k for k>0 is 0 or 1.  These are
+the 'subnormal' long doubles.
+
+There are four long doubles that represent zero, two that represent
++0.0 and two that represent -0.0.  The sign of the high part is the
+sign of the long double, and the sign of the low part is ignored.
+
+Likewise, there are four long doubles that represent infinities, two
+for +Inf and two for -Inf.
+
+Each NaN, quiet or signalling, that can be represented as a 'double'
+can be represented as a 'long double'.  In fact, there are 2^64
+equivalent representations for each one.
+
+There are certain other valid long doubles where both parts are
+nonzero but the low part represents a value which has a bit set below
+2^(e-105).  These, together with the subnormal long doubles, make up
+the denormal long doubles.
+
+Many possible long double bit patterns are not valid long doubles.
+These do not represent any value.
+
+Limits
+------
+
+The maximum representable long double is 2^1024-2^918.  The smallest
+*normal* positive long double is 2^-968.  The smallest denormalised
+positive long double is 2^-1074 (this is the same as for 'double').
+
+Conversions
+-----------
+
+A double can be converted to a long double by adding a zero low part.
+
+A long double can be converted to a double by removing the low part.
+
+Comparisons
+-----------
+
+Two long doubles can be compared by comparing the high parts, and if
+those compare equal, comparing the low parts.
+
+Arithmetic
+----------
+
+The unary negate operation operates by negating the low and high parts.
+
+An absolute or absolute-negate operation must be done by comparing
+against zero and negating if necessary.
+
+Addition and subtraction are performed using library routines.  They
+are not at present performed perfectly accurately, the result produced
+will be within 1ulp of the range generated by adding or subtracting
+1ulp from the input values, where a 'ulp' is 2^(e-106) given the
+exponent 'e'.  In the presence of cancellation, this may be
+arbitrarily inaccurate.  Subtraction is done by negation and addition.
+
+Multiplication is also performed using a library routine.  Its result
+will be within 2ulp of the correct result.
+
+Division is also performed using a library routine.  Its result will
+be within 3ulp of the correct result.
+
+
+Copyright (C) 2004 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc/config/rs6000/darwin-ldouble.c b/gcc/config/rs6000/darwin-ldouble.c
new file mode 100644
index 000000000..d76c1b184
--- /dev/null
+++ b/gcc/config/rs6000/darwin-ldouble.c
@@ -0,0 +1,438 @@
+/* 128-bit long double support routines for Darwin.
+   Copyright (C) 1993, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Implementations of floating-point long double basic arithmetic
+   functions called by the IBM C compiler when generating code for
+   PowerPC platforms.  In particular, the following functions are
+   implemented: __gcc_qadd, __gcc_qsub, __gcc_qmul, and __gcc_qdiv.
+   Double-double algorithms are based on the paper "Doubled-Precision
+   IEEE Standard 754 Floating-Point Arithmetic" by W. Kahan, February 26,
+   1987.  An alternative published reference is "Software for
+   Doubled-Precision Floating-Point Computations", by Seppo Linnainmaa,
+   ACM TOMS vol 7 no 3, September 1981, pages 272-283.  */
+
+/* Each long double is made up of two IEEE doubles.  The value of the
+   long double is the sum of the values of the two parts.  The most
+   significant part is required to be the value of the long double
+   rounded to the nearest double, as specified by IEEE.  For Inf
+   values, the least significant part is required to be one of +0.0 or
+   -0.0.  No other requirements are made; so, for example, 1.0 may be
+   represented as (1.0, +0.0) or (1.0, -0.0), and the low part of a
+   NaN is don't-care.
+
+   This code currently assumes big-endian.  */
+
+#if (!defined (__LITTLE_ENDIAN__) \
+     && (defined (__MACH__) || defined (__powerpc__) || defined (_AIX)))
+
+#define fabs(x) __builtin_fabs(x)
+#define isless(x, y) __builtin_isless (x, y)
+#define inf() __builtin_inf()
+
+#define unlikely(x) __builtin_expect ((x), 0)
+
+#define nonfinite(a) unlikely (! isless (fabs (a), inf ()))
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
+/* All these routines actually take two long doubles as parameters,
+   but GCC currently generates poor code when a union is used to turn
+   a long double into a pair of doubles.  */
+
+long double __gcc_qadd (double, double, double, double);
+long double __gcc_qsub (double, double, double, double);
+long double __gcc_qmul (double, double, double, double);
+long double __gcc_qdiv (double, double, double, double);
+
+#if defined __ELF__ && defined SHARED \
+    && (defined __powerpc64__ || !(defined __linux__ || defined __gnu_hurd__))
+/* Provide definitions of the old symbol names to satisfy apps and
+   shared libs built against an older libgcc.  To access the _xlq
+   symbols an explicit version reference is needed, so these won't
+   satisfy an unadorned reference like _xlqadd.  If dot symbols are
+   not needed, the assembler will remove the aliases from the symbol
+   table.  */
+__asm__ (".symver __gcc_qadd,_xlqadd@GCC_3.4\n\t"
+	 ".symver __gcc_qsub,_xlqsub@GCC_3.4\n\t"
+	 ".symver __gcc_qmul,_xlqmul@GCC_3.4\n\t"
+	 ".symver __gcc_qdiv,_xlqdiv@GCC_3.4\n\t"
+	 ".symver .__gcc_qadd,._xlqadd@GCC_3.4\n\t"
+	 ".symver .__gcc_qsub,._xlqsub@GCC_3.4\n\t"
+	 ".symver .__gcc_qmul,._xlqmul@GCC_3.4\n\t"
+	 ".symver .__gcc_qdiv,._xlqdiv@GCC_3.4");
+#endif
+
+typedef union
+{
+  long double ldval;
+  double dval[2];
+} longDblUnion;
+
+/* Add two 'long double' values and return the result.	*/
+long double
+__gcc_qadd (double a, double aa, double c, double cc)
+{
+  longDblUnion x;
+  double z, q, zz, xh;
+
+  z = a + c;
+
+  if (nonfinite (z))
+    {
+      z = cc + aa + c + a;
+      if (nonfinite (z))
+	return z;
+      x.dval[0] = z;  /* Will always be DBL_MAX.  */
+      zz = aa + cc;
+      if (fabs(a) > fabs(c))
+	x.dval[1] = a - z + c + zz;
+      else
+	x.dval[1] = c - z + a + zz;
+    }
+  else
+    {
+      q = a - z;
+      zz = q + c + (a - (q + z)) + aa + cc;
+
+      /* Keep -0 result.  */
+      if (zz == 0.0)
+	return z;
+
+      xh = z + zz;
+      if (nonfinite (xh))
+	return xh;
+
+      x.dval[0] = xh;
+      x.dval[1] = z - xh + zz;
+    }
+  return x.ldval;
+}
+
+long double
+__gcc_qsub (double a, double b, double c, double d)
+{
+  return __gcc_qadd (a, b, -c, -d);
+}
+
+#ifdef __NO_FPRS__
+static double fmsub (double, double, double);
+#endif
+
+long double
+__gcc_qmul (double a, double b, double c, double d)
+{
+  longDblUnion z;
+  double t, tau, u, v, w;
+  
+  t = a * c;			/* Highest order double term.  */
+
+  if (unlikely (t == 0)		/* Preserve -0.  */
+      || nonfinite (t))
+    return t;
+
+  /* Sum terms of two highest orders. */
+  
+  /* Use fused multiply-add to get low part of a * c.  */
+#ifndef __NO_FPRS__
+  asm ("fmsub %0,%1,%2,%3" : "=f"(tau) : "f"(a), "f"(c), "f"(t));
+#else
+  tau = fmsub (a, c, t);
+#endif
+  v = a*d;
+  w = b*c;
+  tau += v + w;	    /* Add in other second-order terms.	 */
+  u = t + tau;
+
+  /* Construct long double result.  */
+  if (nonfinite (u))
+    return u;
+  z.dval[0] = u;
+  z.dval[1] = (t - u) + tau;
+  return z.ldval;
+}
+
+long double
+__gcc_qdiv (double a, double b, double c, double d)
+{
+  longDblUnion z;
+  double s, sigma, t, tau, u, v, w;
+  
+  t = a / c;                    /* highest order double term */
+  
+  if (unlikely (t == 0)		/* Preserve -0.  */
+      || nonfinite (t))
+    return t;
+
+  /* Finite nonzero result requires corrections to the highest order term.  */
+
+  s = c * t;                    /* (s,sigma) = c*t exactly.  */
+  w = -(-b + d * t);	/* Written to get fnmsub for speed, but not
+			   numerically necessary.  */
+  
+  /* Use fused multiply-add to get low part of c * t.	 */
+#ifndef __NO_FPRS__
+  asm ("fmsub %0,%1,%2,%3" : "=f"(sigma) : "f"(c), "f"(t), "f"(s));
+#else
+  sigma = fmsub (c, t, s);
+#endif
+  v = a - s;
+  
+  tau = ((v-sigma)+w)/c;   /* Correction to t.  */
+  u = t + tau;
+
+  /* Construct long double result.  */
+  if (nonfinite (u))
+    return u;
+  z.dval[0] = u;
+  z.dval[1] = (t - u) + tau;
+  return z.ldval;
+}
+
+#if defined (_SOFT_DOUBLE) && defined (__LONG_DOUBLE_128__)
+
+long double __gcc_qneg (double, double);
+int __gcc_qeq (double, double, double, double);
+int __gcc_qne (double, double, double, double);
+int __gcc_qge (double, double, double, double);
+int __gcc_qle (double, double, double, double);
+long double __gcc_stoq (float);
+long double __gcc_dtoq (double);
+float __gcc_qtos (double, double);
+double __gcc_qtod (double, double);
+int __gcc_qtoi (double, double);
+unsigned int __gcc_qtou (double, double);
+long double __gcc_itoq (int);
+long double __gcc_utoq (unsigned int);
+
+extern int __eqdf2 (double, double);
+extern int __ledf2 (double, double);
+extern int __gedf2 (double, double);
+
+/* Negate 'long double' value and return the result.	*/
+long double
+__gcc_qneg (double a, double aa)
+{
+  longDblUnion x;
+
+  x.dval[0] = -a;
+  x.dval[1] = -aa;
+  return x.ldval;
+}
+
+/* Compare two 'long double' values for equality.  */
+int
+__gcc_qeq (double a, double aa, double c, double cc)
+{
+  if (__eqdf2 (a, c) == 0)
+    return __eqdf2 (aa, cc);
+  return 1;
+}
+
+strong_alias (__gcc_qeq, __gcc_qne);
+
+/* Compare two 'long double' values for less than or equal.  */
+int
+__gcc_qle (double a, double aa, double c, double cc)
+{
+  if (__eqdf2 (a, c) == 0)
+    return __ledf2 (aa, cc);
+  return __ledf2 (a, c);
+}
+
+strong_alias (__gcc_qle, __gcc_qlt);
+
+/* Compare two 'long double' values for greater than or equal.  */
+int
+__gcc_qge (double a, double aa, double c, double cc)
+{
+  if (__eqdf2 (a, c) == 0)
+    return __gedf2 (aa, cc);
+  return __gedf2 (a, c);
+}
+
+strong_alias (__gcc_qge, __gcc_qgt);
+
+/* Convert single to long double.  */
+long double
+__gcc_stoq (float a)
+{
+  longDblUnion x;
+
+  x.dval[0] = (double) a;
+  x.dval[1] = 0.0;
+
+  return x.ldval;
+}
+
+/* Convert double to long double.  */
+long double
+__gcc_dtoq (double a)
+{
+  longDblUnion x;
+
+  x.dval[0] = a;
+  x.dval[1] = 0.0;
+
+  return x.ldval;
+}
+
+/* Convert long double to single.  */
+float
+__gcc_qtos (double a, double aa __attribute__ ((__unused__)))
+{
+  return (float) a;
+}
+
+/* Convert long double to double.  */
+double
+__gcc_qtod (double a, double aa __attribute__ ((__unused__)))
+{
+  return a;
+}
+
+/* Convert long double to int.  */
+int
+__gcc_qtoi (double a, double aa)
+{
+  double z = a + aa;
+  return (int) z;
+}
+
+/* Convert long double to unsigned int.  */
+unsigned int
+__gcc_qtou (double a, double aa)
+{
+  double z = a + aa;
+  return (unsigned int) z;
+}
+
+/* Convert int to long double.  */
+long double
+__gcc_itoq (int a)
+{
+  return __gcc_dtoq ((double) a);
+}
+
+/* Convert unsigned int to long double.  */
+long double
+__gcc_utoq (unsigned int a)
+{
+  return __gcc_dtoq ((double) a);
+}
+
+#endif
+
+#ifdef __NO_FPRS__
+
+int __gcc_qunord (double, double, double, double);
+
+extern int __eqdf2 (double, double);
+extern int __unorddf2 (double, double);
+
+/* Compare two 'long double' values for unordered.  */
+int
+__gcc_qunord (double a, double aa, double c, double cc)
+{
+  if (__eqdf2 (a, c) == 0)
+    return __unorddf2 (aa, cc);
+  return __unorddf2 (a, c);
+}
+
+#include "config/soft-fp/soft-fp.h"
+#include "config/soft-fp/double.h"
+#include "config/soft-fp/quad.h"
+
+/* Compute floating point multiply-subtract with higher (quad) precision.  */
+static double
+fmsub (double a, double b, double c)
+{
+    FP_DECL_EX;
+    FP_DECL_D(A);
+    FP_DECL_D(B);
+    FP_DECL_D(C);
+    FP_DECL_Q(X);
+    FP_DECL_Q(Y);
+    FP_DECL_Q(Z);
+    FP_DECL_Q(U);
+    FP_DECL_Q(V);
+    FP_DECL_D(R);
+    double r;
+    long double u, x, y, z;
+
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_RAW_D (A, a);
+    FP_UNPACK_RAW_D (B, b);
+    FP_UNPACK_RAW_D (C, c);
+
+    /* Extend double to quad.  */
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+    FP_EXTEND(Q,D,4,2,X,A);
+    FP_EXTEND(Q,D,4,2,Y,B);
+    FP_EXTEND(Q,D,4,2,Z,C);
+#else
+    FP_EXTEND(Q,D,2,1,X,A);
+    FP_EXTEND(Q,D,2,1,Y,B);
+    FP_EXTEND(Q,D,2,1,Z,C);
+#endif
+    FP_PACK_RAW_Q(x,X);
+    FP_PACK_RAW_Q(y,Y);
+    FP_PACK_RAW_Q(z,Z);
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Multiply.  */
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_Q(X,x);
+    FP_UNPACK_Q(Y,y);
+    FP_MUL_Q(U,X,Y);
+    FP_PACK_Q(u,U);
+    FP_HANDLE_EXCEPTIONS;
+
+    /* Subtract.  */
+    FP_INIT_ROUNDMODE;
+    FP_UNPACK_SEMIRAW_Q(U,u);
+    FP_UNPACK_SEMIRAW_Q(Z,z);
+    FP_SUB_Q(V,U,Z);
+
+    /* Truncate quad to double.  */
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+    V_f[3] &= 0x0007ffff;
+    FP_TRUNC(D,Q,2,4,R,V);
+#else
+    V_f1 &= 0x0007ffffffffffffL;
+    FP_TRUNC(D,Q,1,2,R,V);
+#endif
+    FP_PACK_SEMIRAW_D(r,R);
+    FP_HANDLE_EXCEPTIONS;
+
+    return r;
+}
+
+#endif
+
+#endif
diff --git a/gcc/config/rs6000/darwin-libgcc.10.4.ver b/gcc/config/rs6000/darwin-libgcc.10.4.ver
new file mode 100644
index 000000000..0c6f7c231
--- /dev/null
+++ b/gcc/config/rs6000/darwin-libgcc.10.4.ver
@@ -0,0 +1,93 @@
+# Copyright (C) 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdi3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixtfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunstfdi
+___floatdidf
+___floatdisf
+___floatditf
+___gcc_personality_v0
+___gcc_qadd
+___gcc_qdiv
+___gcc_qmul
+___gcc_qsub
+___lshrdi3
+___moddi3
+___muldi3
+___mulvdi3
+___mulvsi3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___trampoline_setup
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc/config/rs6000/darwin-libgcc.10.5.ver b/gcc/config/rs6000/darwin-libgcc.10.5.ver
new file mode 100644
index 000000000..c2f08924f
--- /dev/null
+++ b/gcc/config/rs6000/darwin-libgcc.10.5.ver
@@ -0,0 +1,106 @@
+# Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetIPInfo
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divtc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixtfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunstfdi
+___floatdidf
+___floatdisf
+___floatditf
+___floatundidf
+___floatundisf
+___floatunditf
+___gcc_personality_v0
+___gcc_qadd
+___gcc_qdiv
+___gcc_qmul
+___gcc_qsub
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___multc3
+___mulvdi3
+___mulvsi3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powitf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___trampoline_setup
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc/config/rs6000/darwin-tramp.asm b/gcc/config/rs6000/darwin-tramp.asm
new file mode 100644
index 000000000..5188c98ef
--- /dev/null
+++ b/gcc/config/rs6000/darwin-tramp.asm
@@ -0,0 +1,125 @@
+/*  Special support for trampolines
+ *
+ *   Copyright (C) 1996, 1997, 2000, 2004, 2005, 2009 Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+#include "darwin-asm.h"
+
+/* Set up trampolines.  */
+
+.text
+	.align	LOG2_GPR_BYTES
+Ltrampoline_initial:
+	mflr	r0
+	bl	1f
+Lfunc = .-Ltrampoline_initial
+	.g_long	0		/* will be replaced with function address */
+Lchain = .-Ltrampoline_initial
+	.g_long	0		/* will be replaced with static chain */
+1:	mflr	r11
+	lg	r12,0(r11)	/* function address */
+	mtlr	r0
+	mtctr	r12
+	lg	r11,GPR_BYTES(r11)	/* static chain */
+	bctr
+
+trampoline_size = .-Ltrampoline_initial
+
+/* R3 = stack address to store trampoline */
+/* R4 = length of trampoline area */
+/* R5 = function address */
+/* R6 = static chain */
+
+	.globl ___trampoline_setup
+___trampoline_setup:
+	mflr	r0		/* save return address */
+        bcl 20,31,LCF0		/* load up __trampoline_initial into r7 */
+LCF0:
+        mflr	r11
+        addis	r7,r11,ha16(LTRAMP-LCF0)
+	lg	r7,lo16(LTRAMP-LCF0)(r7)
+	subi	r7,r7,4
+	li	r8,trampoline_size	/* verify trampoline big enough */
+	cmpg	cr1,r8,r4
+	srwi	r4,r4,2			/* # words to move (insns always 4-byte) */
+	addi	r9,r3,-4	/* adjust pointer for lgu */
+	mtctr	r4
+	blt	cr1,Labort
+
+	mtlr	r0
+
+	/* Copy the instructions to the stack */
+Lmove:
+	lwzu	r10,4(r7)
+	stwu	r10,4(r9)
+	bdnz	Lmove
+
+	/* Store correct function and static chain */
+	stg	r5,Lfunc(r3)
+	stg	r6,Lchain(r3)
+
+	/* Now flush both caches */
+	mtctr	r4
+Lcache:
+	icbi	0,r3
+	dcbf	0,r3
+	addi	r3,r3,4
+	bdnz	Lcache
+
+	/* Ensure cache-flushing has finished.  */
+	sync
+	isync
+
+	/* Make stack writeable.  */
+	b	___enable_execute_stack
+
+Labort:
+#ifdef __DYNAMIC__
+	bl	L_abort$stub
+.data
+.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.align 2
+L_abort$stub:
+        .indirect_symbol _abort
+        mflr r0
+        bcl 20,31,L0$_abort
+L0$_abort:
+        mflr r11
+        addis r11,r11,ha16(L_abort$lazy_ptr-L0$_abort)
+        mtlr r0
+	lgu r12,lo16(L_abort$lazy_ptr-L0$_abort)(r11)
+        mtctr r12
+        bctr
+.data
+.lazy_symbol_pointer
+L_abort$lazy_ptr:
+        .indirect_symbol _abort
+	.g_long	dyld_stub_binding_helper
+#else
+	bl	_abort
+#endif
+.data
+	.align LOG2_GPR_BYTES
+LTRAMP:
+	.g_long Ltrampoline_initial
+
diff --git a/gcc/config/rs6000/darwin-unwind.h b/gcc/config/rs6000/darwin-unwind.h
new file mode 100644
index 000000000..9fdc115be
--- /dev/null
+++ b/gcc/config/rs6000/darwin-unwind.h
@@ -0,0 +1,30 @@
+/* DWARF2 EH unwinding support for Darwin.
+   Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern bool _Unwind_fallback_frame_state_for
+  (struct _Unwind_Context *context, _Unwind_FrameState *fs);
+
+#define MD_FALLBACK_FRAME_STATE_FOR(CONTEXT, FS)	\
+  (_Unwind_fallback_frame_state_for (CONTEXT, FS)	\
+   ? _URC_NO_REASON : _URC_END_OF_STACK)
diff --git a/gcc/config/rs6000/darwin-vecsave.asm b/gcc/config/rs6000/darwin-vecsave.asm
new file mode 100644
index 000000000..0a46be20c
--- /dev/null
+++ b/gcc/config/rs6000/darwin-vecsave.asm
@@ -0,0 +1,155 @@
+/*  This file contains the vector save and restore routines.
+ *
+ *   Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Vector save/restore routines for Darwin.  Note that each vector
+   save/restore requires 2 instructions (8 bytes.)
+
+   THE SAVE AND RESTORE ROUTINES CAN HAVE ONLY ONE GLOBALLY VISIBLE
+   ENTRY POINT - callers have to jump to "saveFP+60" to save f29..f31,
+   for example.  For FP reg saves/restores, it takes one instruction
+   (4 bytes) to do the operation; for Vector regs, 2 instructions are
+   required (8 bytes.).   */
+
+	.machine ppc7400
+.text
+	.align 2
+
+.private_extern saveVEC
+saveVEC:
+	li r11,-192
+	stvx v20,r11,r0
+	li r11,-176
+	stvx v21,r11,r0
+	li r11,-160
+	stvx v22,r11,r0
+	li r11,-144
+	stvx v23,r11,r0
+	li r11,-128
+	stvx v24,r11,r0
+	li r11,-112
+	stvx v25,r11,r0
+	li r11,-96
+	stvx v26,r11,r0
+	li r11,-80
+	stvx v27,r11,r0
+	li r11,-64
+	stvx v28,r11,r0
+	li r11,-48
+	stvx v29,r11,r0
+	li r11,-32
+	stvx v30,r11,r0
+	li r11,-16
+	stvx v31,r11,r0
+	blr
+
+.private_extern restVEC
+restVEC:
+	li r11,-192
+	lvx v20,r11,r0
+	li r11,-176
+	lvx v21,r11,r0
+	li r11,-160
+	lvx v22,r11,r0
+	li r11,-144
+	lvx v23,r11,r0
+	li r11,-128
+	lvx v24,r11,r0
+	li r11,-112
+	lvx v25,r11,r0
+	li r11,-96
+	lvx v26,r11,r0
+	li r11,-80
+	lvx v27,r11,r0
+	li r11,-64
+	lvx v28,r11,r0
+	li r11,-48
+	lvx v29,r11,r0
+	li r11,-32
+	lvx v30,r11,r0
+	li r11,-16
+	lvx v31,r11,r0
+	blr
+
+/* saveVEC_vr11 -- as saveVEC but VRsave is returned in R11.  */
+
+.private_extern saveVEC_vr11
+saveVEC_vr11:
+	li r11,-192
+	stvx v20,r11,r0
+	li r11,-176
+	stvx v21,r11,r0
+	li r11,-160
+	stvx v22,r11,r0
+	li r11,-144
+	stvx v23,r11,r0
+	li r11,-128
+	stvx v24,r11,r0
+	li r11,-112
+	stvx v25,r11,r0
+	li r11,-96
+	stvx v26,r11,r0
+	li r11,-80
+	stvx v27,r11,r0
+	li r11,-64
+	stvx v28,r11,r0
+	li r11,-48
+	stvx v29,r11,r0
+	li r11,-32
+	stvx v30,r11,r0
+	li r11,-16
+	stvx v31,r11,r0
+	mfspr r11,VRsave
+	blr
+
+/* As restVec, but the original VRsave value passed in R10.  */
+
+.private_extern restVEC_vr10
+restVEC_vr10:
+	li r11,-192
+	lvx v20,r11,r0
+	li r11,-176
+	lvx v21,r11,r0
+	li r11,-160
+	lvx v22,r11,r0
+	li r11,-144
+	lvx v23,r11,r0
+	li r11,-128
+	lvx v24,r11,r0
+	li r11,-112
+	lvx v25,r11,r0
+	li r11,-96
+	lvx v26,r11,r0
+	li r11,-80
+	lvx v27,r11,r0
+	li r11,-64
+	lvx v28,r11,r0
+	li r11,-48
+	lvx v29,r11,r0
+	li r11,-32
+	lvx v30,r11,r0
+	li r11,-16
+	lvx v31,r11,r0
+				/* restore VRsave from R10.  */
+	mtspr VRsave,r10
+	blr
diff --git a/gcc/config/rs6000/darwin-world.asm b/gcc/config/rs6000/darwin-world.asm
new file mode 100644
index 000000000..c0b1bf1a2
--- /dev/null
+++ b/gcc/config/rs6000/darwin-world.asm
@@ -0,0 +1,259 @@
+/*  This file contains the exception-handling save_world and
+ *  restore_world routines, which need to do a run-time check to see if
+ *  they should save and restore the vector registers.
+ *
+ *   Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.machine ppc7400
+.data
+	.align 2
+
+#ifdef __DYNAMIC__
+
+.non_lazy_symbol_pointer
+L_has_vec$non_lazy_ptr:
+	.indirect_symbol __cpu_has_altivec
+#ifdef __ppc64__
+	.quad	0
+#else
+	.long	0
+#endif
+
+#else
+
+/* For static, "pretend" we have a non-lazy-pointer.  */
+
+L_has_vec$non_lazy_ptr:
+	.long __cpu_has_altivec
+
+#endif
+
+
+.text
+	.align 2
+
+/* save_world and rest_world save/restore F14-F31 and possibly V20-V31
+   (assuming you have a CPU with vector registers; we use a global var
+   provided by the System Framework to determine this.)
+
+   SAVE_WORLD takes R0 (the caller`s caller`s return address) and R11
+   (the stack frame size) as parameters.  It returns VRsave in R0 if
+   we`re on a CPU with vector regs.
+
+   With gcc3, we now need to save and restore CR as well, since gcc3's
+   scheduled prologs can cause comparisons to be moved before calls to
+   save_world!
+
+   USES: R0 R11 R12  */
+
+.private_extern save_world
+save_world:
+	stw r0,8(r1)
+	mflr r0
+	bcl 20,31,Ls$pb
+Ls$pb:	mflr r12
+	addis r12,r12,ha16(L_has_vec$non_lazy_ptr-Ls$pb)
+	lwz r12,lo16(L_has_vec$non_lazy_ptr-Ls$pb)(r12)
+	mtlr r0
+	lwz r12,0(r12)
+				/* grab CR  */
+	mfcr r0	
+				/* test HAS_VEC  */
+	cmpwi r12,0
+	stfd f14,-144(r1)
+	stfd f15,-136(r1)
+	stfd f16,-128(r1)
+	stfd f17,-120(r1)
+	stfd f18,-112(r1)
+	stfd f19,-104(r1)
+	stfd f20,-96(r1)
+	stfd f21,-88(r1)
+	stfd f22,-80(r1)
+	stfd f23,-72(r1)
+	stfd f24,-64(r1)
+	stfd f25,-56(r1)
+	stfd f26,-48(r1)
+	stfd f27,-40(r1)
+	stfd f28,-32(r1)
+	stfd f29,-24(r1)
+	stfd f30,-16(r1)
+	stfd f31,-8(r1)
+	stmw r13,-220(r1)
+				/* stash CR  */
+	stw r0,4(r1)
+				/* set R12 pointing at Vector Reg save area  */
+	addi r12,r1,-224
+				/* allocate stack frame  */
+	stwux r1,r1,r11
+				/* ...but return if HAS_VEC is zero   */
+	bne+ L$saveVMX
+				/* Not forgetting to restore CR.  */
+	mtcr r0
+	blr
+
+L$saveVMX:
+				/* We're saving Vector regs too.  */
+				/* Restore CR from R0.  No More Branches!  */
+	mtcr r0
+
+	/* We should really use VRSAVE to figure out which vector regs
+	   we actually need to save and restore.  Some other time :-/  */
+
+	li r11,-192
+	stvx v20,r11,r12
+	li r11,-176
+	stvx v21,r11,r12
+	li r11,-160
+	stvx v22,r11,r12
+	li r11,-144
+	stvx v23,r11,r12
+	li r11,-128
+	stvx v24,r11,r12
+	li r11,-112
+	stvx v25,r11,r12
+	li r11,-96
+	stvx v26,r11,r12
+	li r11,-80
+	stvx v27,r11,r12
+	li r11,-64
+	stvx v28,r11,r12
+	li r11,-48
+	stvx v29,r11,r12
+	li r11,-32
+	stvx v30,r11,r12
+	mfspr r0,VRsave
+	li r11,-16
+	stvx v31,r11,r12
+				/* VRsave lives at -224(R1)  */
+	stw r0,0(r12)
+	blr
+
+
+/* eh_rest_world_r10 is jumped to, not called, so no need to worry about LR.
+   R10 is the C++ EH stack adjust parameter, we return to the caller`s caller.
+
+   USES: R0 R10 R11 R12   and R7 R8
+   RETURNS: C++ EH Data registers (R3 - R6.)
+
+   We now set up R7/R8 and jump to rest_world_eh_r7r8.
+
+   rest_world doesn't use the R10 stack adjust parameter, nor does it
+   pick up the R3-R6 exception handling stuff.  */
+
+.private_extern rest_world
+rest_world:
+				/* Pickup previous SP  */
+	lwz r11, 0(r1)
+	li r7, 0
+	lwz r8, 8(r11)
+	li r10, 0
+	b rest_world_eh_r7r8
+
+.private_extern eh_rest_world_r10
+eh_rest_world_r10:
+				/* Pickup previous SP  */
+	lwz r11, 0(r1)
+	mr  r7,r10
+	lwz r8, 8(r11)
+			/* pickup the C++ EH data regs (R3 - R6.)  */
+	lwz r6,-420(r11)
+	lwz r5,-424(r11)
+	lwz r4,-428(r11)
+	lwz r3,-432(r11)
+
+	b rest_world_eh_r7r8
+
+/* rest_world_eh_r7r8 is jumped to -- not called! -- when we're doing
+   the exception-handling epilog.  R7 contains the offset to add to
+   the SP, and R8 contains the 'real' return address.
+
+   USES: R0 R11 R12  [R7/R8]
+   RETURNS: C++ EH Data registers (R3 - R6.)  */
+
+rest_world_eh_r7r8:
+	bcl 20,31,Lr7r8$pb
+Lr7r8$pb: mflr r12
+	lwz r11,0(r1)
+				/* R11 := previous SP  */
+	addis r12,r12,ha16(L_has_vec$non_lazy_ptr-Lr7r8$pb)
+	lwz r12,lo16(L_has_vec$non_lazy_ptr-Lr7r8$pb)(r12)
+	lwz r0,4(r11)
+				/* R0 := old CR  */
+	lwz r12,0(r12)
+				/* R12 := HAS_VEC  */
+	mtcr r0	
+	cmpwi r12,0
+	lmw r13,-220(r11)
+	beq L.rest_world_fp_eh
+				/* restore VRsave and V20..V31  */
+	lwz r0,-224(r11)
+	li r12,-416
+	mtspr VRsave,r0
+	lvx v20,r11,r12
+	li r12,-400
+	lvx v21,r11,r12
+	li r12,-384
+	lvx v22,r11,r12
+	li r12,-368
+	lvx v23,r11,r12
+	li r12,-352
+	lvx v24,r11,r12
+	li r12,-336
+	lvx v25,r11,r12
+	li r12,-320
+	lvx v26,r11,r12
+	li r12,-304
+	lvx v27,r11,r12
+	li r12,-288
+	lvx v28,r11,r12
+	li r12,-272
+	lvx v29,r11,r12
+	li r12,-256
+	lvx v30,r11,r12
+	li r12,-240
+	lvx v31,r11,r12
+
+L.rest_world_fp_eh:
+	lfd f14,-144(r11)
+	lfd f15,-136(r11)
+	lfd f16,-128(r11)
+	lfd f17,-120(r11)
+	lfd f18,-112(r11)
+	lfd f19,-104(r11)
+	lfd f20,-96(r11)
+	lfd f21,-88(r11)
+	lfd f22,-80(r11)
+	lfd f23,-72(r11)
+	lfd f24,-64(r11)
+	lfd f25,-56(r11)
+	lfd f26,-48(r11)
+	lfd f27,-40(r11)
+	lfd f28,-32(r11)
+	lfd f29,-24(r11)
+	lfd f30,-16(r11)
+			/* R8 is the exception-handler's address  */
+	mtctr r8
+	lfd f31,-8(r11)
+			/* set SP to original value + R7 offset  */
+	add r1,r11,r7
+	bctr
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
new file mode 100644
index 000000000..244da0065
--- /dev/null
+++ b/gcc/config/rs6000/darwin.h
@@ -0,0 +1,438 @@
+/* Target definitions for PowerPC running Darwin (Mac OS X).
+   Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef DARWIN_PPC
+#define DARWIN_PPC 1
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (Darwin/PowerPC)");
+
+/* The "Darwin ABI" is mostly like AIX, but with some key differences.  */
+
+#define DEFAULT_ABI ABI_DARWIN
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __powerpc64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* The object file format is Mach-O.  */
+
+#define TARGET_OBJECT_FORMAT OBJECT_MACHO
+
+/* Size of the Obj-C jump buffer.  */
+#define OBJC_JBLEN ((TARGET_64BIT) ? (26*2 + 18*2 + 129 + 1) : (26 + 18*2 + 129 + 1))
+
+/* We're not ever going to do TOCs.  */
+
+#define TARGET_TOC 0
+#define TARGET_NO_TOC 1
+
+/* Override the default rs6000 definition.  */
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      if (!TARGET_64BIT) builtin_define ("__ppc__");	\
+      if (TARGET_64BIT) builtin_define ("__ppc64__");	\
+      builtin_define ("__POWERPC__");			\
+      builtin_define ("__NATURAL_ALIGNMENT__");		\
+      darwin_cpp_builtins (pfile);			\
+    }							\
+  while (0)
+
+/* Generate branch islands stubs if this is true.  */
+extern int darwin_emit_branch_islands;
+
+#define SUBTARGET_OVERRIDE_OPTIONS darwin_rs6000_override_options ()
+
+#define C_COMMON_OVERRIDE_OPTIONS do {					\
+  /* On powerpc, __cxa_get_exception_ptr is available starting in the	\
+     10.4.6 libstdc++.dylib.  */					\
+  if (strverscmp (darwin_macosx_version_min, "10.4.6") < 0		\
+      && flag_use_cxa_get_exception_ptr == 2)				\
+    flag_use_cxa_get_exception_ptr = 0;					\
+  if (flag_mkernel)							\
+    flag_no_builtin = 1;						\
+  SUBTARGET_C_COMMON_OVERRIDE_OPTIONS;					\
+} while (0)
+
+/* Darwin has 128-bit long double support in libc in 10.4 and later.
+   Default to 128-bit long doubles even on earlier platforms for ABI
+   consistency; arithmetic will work even if libc and libm support is
+   not available.  */
+
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
+
+
+/* We want -fPIC by default, unless we're using -static to compile for
+   the kernel or some such.  The "-faltivec" option should have been
+   called "-maltivec" all along.  */
+
+#define CC1_SPEC "\
+  %(cc1_cpu) \
+  %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }} \
+  %{static: %{Zdynamic: %e conflicting code gen style switches are used}}\
+  %{!mmacosx-version-min=*:-mmacosx-version-min=%(darwin_minversion)} \
+  %{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \
+  %{faltivec:-maltivec -include altivec.h} %{fno-altivec:-mno-altivec} \
+  %<faltivec %<fno-altivec " \
+  DARWIN_CC1_SPEC
+
+#define DARWIN_ARCH_SPEC "%{m64:ppc64;:ppc}"
+
+#define DARWIN_SUBARCH_SPEC "			\
+ %{m64: ppc64}					\
+ %{!m64:					\
+ %{mcpu=601:ppc601;				\
+   mcpu=603:ppc603;				\
+   mcpu=603e:ppc603;				\
+   mcpu=604:ppc604;				\
+   mcpu=604e:ppc604e;				\
+   mcpu=740:ppc750;				\
+   mcpu=750:ppc750;				\
+   mcpu=G3:ppc750;				\
+   mcpu=7400:ppc7400;				\
+   mcpu=G4:ppc7400;				\
+   mcpu=7450:ppc7450;				\
+   mcpu=970:ppc970;				\
+   mcpu=power4:ppc970;				\
+   mcpu=G5:ppc970;				\
+   :ppc}}"
+
+/* crt2.o is at least partially required for 10.3.x and earlier.  */
+#define DARWIN_CRT2_SPEC \
+  "%{!m64:%:version-compare(!> 10.4 mmacosx-version-min= crt2.o%s)}"
+
+/* Determine a minimum version based on compiler options.  */
+#define DARWIN_MINVERSION_SPEC					\
+  "%{m64:%{fgnu-runtime:10.4;					\
+	   ,objective-c|,objc-cpp-output:10.5;			\
+	   ,objective-c-header:10.5;				\
+	   ,objective-c++|,objective-c++-cpp-output:10.5;	\
+	   ,objective-c++-header|,objc++-cpp-output:10.5;	\
+	   :10.4};						\
+     shared-libgcc:10.3;					\
+     :10.1}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS			\
+  DARWIN_EXTRA_SPECS                            \
+  { "darwin_arch", DARWIN_ARCH_SPEC },		\
+  { "darwin_crt2", DARWIN_CRT2_SPEC },		\
+  { "darwin_subarch", DARWIN_SUBARCH_SPEC },
+
+/* Output a .machine directive.  */
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START rs6000_darwin_file_start
+
+/* Make both r2 and r13 available for allocation.  */
+#define FIXED_R2 0
+#define FIXED_R13 0
+
+/* Base register for access to local variables of the function.  */
+
+#undef  HARD_FRAME_POINTER_REGNUM
+#define HARD_FRAME_POINTER_REGNUM 30
+
+#undef  RS6000_PIC_OFFSET_TABLE_REGNUM
+#define RS6000_PIC_OFFSET_TABLE_REGNUM 31
+
+/* Pad the outgoing args area to 16 bytes instead of the usual 8.  */
+
+#undef STARTING_FRAME_OFFSET
+#define STARTING_FRAME_OFFSET						\
+  (FRAME_GROWS_DOWNWARD							\
+   ? 0									\
+   : (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
+      + RS6000_SAVE_AREA))
+
+#undef STACK_DYNAMIC_OFFSET
+#define STACK_DYNAMIC_OFFSET(FUNDECL)					\
+  (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
+   + (STACK_POINTER_OFFSET))
+
+/* Define cutoff for using external functions to save floating point.
+   Currently on Darwin, always use inline stores.  */
+
+#undef	FP_SAVE_INLINE
+#define FP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 64)
+#undef GP_SAVE_INLINE
+#define GP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 32)
+
+/* Darwin uses a function call if everything needs to be saved/restored.  */
+#undef WORLD_SAVE_P
+#define WORLD_SAVE_P(INFO) ((INFO)->world_save_p)
+
+/* The assembler wants the alternate register names, but without
+   leading percent sign.  */
+#undef REGISTER_NAMES
+#define REGISTER_NAMES							\
+{									\
+     "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",		\
+     "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",		\
+    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",		\
+    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",		\
+     "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",		\
+     "f8",  "f9", "f10", "f11", "f12", "f13", "f14", "f15",		\
+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",		\
+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",		\
+     "mq",  "lr", "ctr",  "ap",						\
+    "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",		\
+    "xer",								\
+     "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",             \
+     "v8",  "v9", "v10", "v11", "v12", "v13", "v14", "v15",             \
+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",             \
+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",             \
+    "vrsave", "vscr",							\
+    "spe_acc", "spefscr",                                               \
+    "sfp"								\
+}
+
+/* This outputs NAME to FILE.  */
+
+#undef  RS6000_OUTPUT_BASENAME
+#define RS6000_OUTPUT_BASENAME(FILE, NAME)	\
+    assemble_name (FILE, NAME)
+
+/* Globalizing directive for a label.  */
+#undef GLOBAL_ASM_OP
+#define GLOBAL_ASM_OP "\t.globl "
+#undef TARGET_ASM_GLOBALIZE_LABEL
+
+/* This is how to output an internal label prefix.  rs6000.c uses this
+   when generating traceback tables.  */
+/* Not really used for Darwin?  */
+
+#undef ASM_OUTPUT_INTERNAL_LABEL_PREFIX
+#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX)	\
+  fprintf (FILE, "%s", PREFIX)
+
+/* Override the standard rs6000 definition.  */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START ";"
+
+/* FP save and restore routines.  */
+#define	SAVE_FP_PREFIX "._savef"
+#define SAVE_FP_SUFFIX ""
+#define	RESTORE_FP_PREFIX "._restf"
+#define RESTORE_FP_SUFFIX ""
+
+/* This is how to output an assembler line that says to advance
+   the location counter to a multiple of 2**LOG bytes using the
+   "nop" instruction as padding.  */
+
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG)                   \
+  do                                                          \
+    {                                                         \
+      if ((LOG) < 3)                                          \
+        {                                                     \
+          ASM_OUTPUT_ALIGN (FILE,LOG);                        \
+        }                                                     \
+      else /* nop == ori r0,r0,0 */                           \
+        fprintf (FILE, "\t.align32 %d,0x60000000\n", (LOG));  \
+    } while (0)
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* This is supported in cctools 465 and later.  The macro test
+   above prevents using it in earlier build environments.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)          \
+  if ((LOG) != 0)                                             \
+    {                                                         \
+      if ((MAX_SKIP) == 0)                                    \
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));           \
+      else                                                    \
+        fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+    }
+#endif
+
+/* Generate insns to call the profiler.  */
+
+#define PROFILE_HOOK(LABEL)   output_profile_hook (LABEL)
+
+/* Function name to call to do profiling.  */
+
+#define RS6000_MCOUNT "*mcount"
+
+/* Default processor: G4, and G5 for 64-bit.  */
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT  PROCESSOR_PPC7400
+#undef PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64  PROCESSOR_POWER4
+
+/* Default target flag settings.  Despite the fact that STMW/LMW
+   serializes, it's still a big code size win to use them.  Use FSEL by
+   default as well.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_MULTIPLE | MASK_NEW_MNEMONICS \
+                      | MASK_PPC_GFXOPT)
+
+/* Darwin only runs on PowerPC, so short-circuit POWER patterns.  */
+#undef  TARGET_POWER
+#define TARGET_POWER 0
+#undef  TARGET_IEEEQUAD
+#define TARGET_IEEEQUAD 0
+
+/* Since Darwin doesn't do TOCs, stub this out.  */
+
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)  ((void)X, (void)MODE, 0)
+
+/* Unlike most other PowerPC targets, chars are signed, for
+   consistency with other Darwin architectures.  */
+
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR (1)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.
+
+   On the RS/6000, we have to return NO_REGS when we want to reload a
+   floating-point CONST_DOUBLE to force it to be copied to memory.
+
+   Don't allow R0 when loading the address of, or otherwise furtling with,
+   a SYMBOL_REF.  */
+
+#undef PREFERRED_RELOAD_CLASS
+#define PREFERRED_RELOAD_CLASS(X,CLASS)				\
+  ((CONSTANT_P (X)						\
+    && reg_classes_intersect_p ((CLASS), FLOAT_REGS))		\
+   ? NO_REGS							\
+   : ((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == HIGH)	\
+      && reg_class_subset_p (BASE_REGS, (CLASS)))		\
+   ? BASE_REGS							\
+   : (GET_MODE_CLASS (GET_MODE (X)) == MODE_INT			\
+      && (CLASS) == NON_SPECIAL_REGS)				\
+   ? GENERAL_REGS						\
+   : (CLASS))
+
+/* Compute field alignment.  This is similar to the version of the
+   macro in the Apple version of GCC, except that version supports
+   'mac68k' alignment, and that version uses the computed alignment
+   always for the first field of a structure.  The first-field
+   behavior is dealt with by
+   darwin_rs6000_special_round_type_align.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED)	\
+  (TARGET_ALIGN_NATURAL ? (COMPUTED)		\
+   : (COMPUTED) == 128 ? 128			\
+   : MIN ((COMPUTED), 32))
+
+/* Darwin increases natural record alignment to doubleword if the first
+   field is an FP double while the FP fields remain word aligned.  */
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			  \
+  ((TREE_CODE (STRUCT) == RECORD_TYPE					  \
+    || TREE_CODE (STRUCT) == UNION_TYPE					  \
+    || TREE_CODE (STRUCT) == QUAL_UNION_TYPE)				  \
+   && TARGET_ALIGN_NATURAL == 0						  \
+   ? darwin_rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED) \
+   : (TREE_CODE (STRUCT) == VECTOR_TYPE					  \
+      && ALTIVEC_VECTOR_MODE (TYPE_MODE (STRUCT)))			  \
+   ? MAX (MAX ((COMPUTED), (SPECIFIED)), 128)				  \
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+#define DOUBLE_INT_ASM_OP "\t.quad\t"
+
+/* For binary compatibility with 2.95; Darwin C APIs use bool from
+   stdbool.h, which was an int-sized enum in 2.95.  Users can explicitly
+   choose to have sizeof(bool)==1 with the -mone-byte-bool switch. */
+#define BOOL_TYPE_SIZE (darwin_one_byte_bool ? CHAR_TYPE_SIZE : INT_TYPE_SIZE)
+
+#undef REGISTER_TARGET_PRAGMAS
+#define REGISTER_TARGET_PRAGMAS() \
+  do \
+    { \
+      DARWIN_REGISTER_TARGET_PRAGMAS(); \
+      targetm.resolve_overloaded_builtin = altivec_resolve_overloaded_builtin; \
+    } \
+  while (0)
+
+#ifdef IN_LIBGCC2
+#include <stdbool.h>
+#endif
+
+#if !defined(__LP64__) && !defined(DARWIN_LIBSYSTEM_HAS_UNWIND)
+#define MD_UNWIND_SUPPORT "config/rs6000/darwin-unwind.h"
+#endif
+
+/* True, iff we're generating fast turn around debugging code.  When
+   true, we arrange for function prologues to start with 5 nops so
+   that gdb may insert code to redirect them, and for data to be
+   accessed indirectly.  The runtime uses this indirection to forward
+   references for data to the original instance of that data.  */
+
+#define TARGET_FIX_AND_CONTINUE (darwin_fix_and_continue)
+
+/* This is the reserved direct dispatch address for Objective-C.  */
+#define OFFS_MSGSEND_FAST		0xFFFEFF00
+
+/* This is the reserved ivar address Objective-C.  */
+#define OFFS_ASSIGNIVAR_FAST		0xFFFEFEC0
+
+/* Old versions of Mac OS/Darwin don't have C99 functions available.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS					\
+  (TARGET_64BIT							\
+   || strverscmp (darwin_macosx_version_min, "10.3") >= 0)
+
+/* When generating kernel code or kexts, we don't use Altivec by
+   default, as kernel code doesn't save/restore those registers.  */
+#define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext)
+
+/* Darwin has support for section anchors on powerpc*.  
+   It is disabled for any section containing a "zero-sized item" (because these
+   are re-written as size=1 to be compatible with the OSX ld64).
+   The re-writing would interfere with the computation of anchor offsets.
+   Therefore, we place zero-sized items in their own sections and make such
+   sections unavailable to section anchoring.  */
+
+#undef TARGET_ASM_OUTPUT_ANCHOR 
+#define TARGET_ASM_OUTPUT_ANCHOR darwin_asm_output_anchor
+
+#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+#define TARGET_USE_ANCHORS_FOR_SYMBOL_P darwin_use_anchors_for_symbol_p
+
+#undef DARWIN_SECTION_ANCHORS
+#define DARWIN_SECTION_ANCHORS 1
+
+/* PPC Darwin has to rename some of the long double builtins.  */
+#undef  SUBTARGET_INIT_BUILTINS
+#define SUBTARGET_INIT_BUILTINS						\
+do {									\
+  darwin_patch_builtins ();						\
+  rs6000_builtin_decls[(unsigned) (RS6000_BUILTIN_CFSTRING)]		\
+    = darwin_init_cfstring_builtins ((unsigned) (RS6000_BUILTIN_CFSTRING)); \
+} while(0)
diff --git a/gcc/config/rs6000/darwin.md b/gcc/config/rs6000/darwin.md
new file mode 100644
index 000000000..6b1927779
--- /dev/null
+++ b/gcc/config/rs6000/darwin.md
@@ -0,0 +1,442 @@
+/* Machine description patterns for PowerPC running Darwin (Mac OS X).
+   Copyright (C) 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_insn "adddi3_high"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+        (plus:DI (match_operand:DI 1 "gpc_reg_operand" "b")
+                 (high:DI (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "{cau|addis} %0,%1,ha16(%2)"
+  [(set_attr "length" "4")])
+
+(define_insn "movdf_low_si"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f,!r")
+        (mem:DF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_64BIT"
+  "*
+{
+  switch (which_alternative)
+    {
+      case 0:
+	return \"lfd %0,lo16(%2)(%1)\";
+      case 1:
+	{
+	  if (TARGET_POWERPC64 && TARGET_32BIT)
+	    /* Note, old assemblers didn't support relocation here.  */
+	    return \"ld %0,lo16(%2)(%1)\";
+	  else
+	    {
+	      output_asm_insn (\"{cal|la} %0,lo16(%2)(%1)\", operands);
+	      output_asm_insn (\"{l|lwz} %L0,4(%0)\", operands);
+	      return (\"{l|lwz} %0,0(%0)\");
+	    }
+	}
+      default:
+	gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4,12")])
+
+
+(define_insn "movdf_low_di"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f,!r")
+        (mem:DF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "*
+{
+  switch (which_alternative)
+    {
+      case 0:
+	return \"lfd %0,lo16(%2)(%1)\";
+      case 1:
+	return \"ld %0,lo16(%2)(%1)\";
+      default:
+	gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "load")
+   (set_attr "length" "4,4")])
+
+(define_insn "movdf_low_st_si"
+  [(set (mem:DF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" "")))
+	(match_operand:DF 0 "gpc_reg_operand" "f"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT"
+  "stfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movdf_low_st_di"
+  [(set (mem:DF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" "")))
+	(match_operand:DF 0 "gpc_reg_operand" "f"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "stfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_si"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,!r")
+        (mem:SF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT"
+  "@
+   lfs %0,lo16(%2)(%1)
+   {l|lwz} %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_di"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,!r")
+        (mem:SF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "@
+   lfs %0,lo16(%2)(%1)
+   {l|lwz} %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_st_si"
+  [(set (mem:SF (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" "")))
+	(match_operand:SF 0 "gpc_reg_operand" "f,!r"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && ! TARGET_64BIT"
+  "@
+   stfs %0,lo16(%2)(%1)
+   {st|stw} %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movsf_low_st_di"
+  [(set (mem:SF (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" "")))
+	(match_operand:SF 0 "gpc_reg_operand" "f,!r"))]
+  "TARGET_MACHO && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_64BIT"
+  "@
+   stfs %0,lo16(%2)(%1)
+   {st|stw} %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; 64-bit MachO load/store support
+(define_insn "movdi_low"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,*!d")
+        (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "@
+   {l|ld} %0,lo16(%2)(%1)
+   lfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "movsi_low_st"
+  [(set (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" "")))
+	(match_operand:SI 0 "gpc_reg_operand" "r"))]
+  "TARGET_MACHO && ! TARGET_64BIT"
+  "{st|stw} %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "movdi_low_st"
+  [(set (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b")
+                           (match_operand 2 "" "")))
+	(match_operand:DI 0 "gpc_reg_operand" "r,*!d"))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "@
+   {st|std} %0,lo16(%2)(%1)
+   stfd %0,lo16(%2)(%1)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+;; Mach-O PIC trickery.
+(define_expand "macho_high"
+  [(set (match_operand 0 "" "")
+	(high (match_operand 1 "" "")))]
+  "TARGET_MACHO"
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_macho_high_di (operands[0], operands[1]));
+  else
+    emit_insn (gen_macho_high_si (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "macho_high_si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=b*r")
+	(high:SI (match_operand 1 "" "")))]
+  "TARGET_MACHO && ! TARGET_64BIT"
+  "{liu|lis} %0,ha16(%1)")
+  
+
+(define_insn "macho_high_di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b*r")
+	(high:DI (match_operand 1 "" "")))]
+  "TARGET_MACHO && TARGET_64BIT"
+  "{liu|lis} %0,ha16(%1)")
+
+(define_expand "macho_low"
+  [(set (match_operand 0 "" "")
+	(lo_sum (match_operand 1 "" "")
+		   (match_operand 2 "" "")))]
+   "TARGET_MACHO"
+{
+  if (TARGET_64BIT)
+    emit_insn (gen_macho_low_di (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_macho_low_si (operands[0], operands[1], operands[2]));
+
+  DONE;
+})
+
+(define_insn "macho_low_si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,!*r")
+		   (match_operand 2 "" "")))]
+   "TARGET_MACHO && ! TARGET_64BIT"
+   "@
+    {cal %0,%a2@l(%1)|la %0,lo16(%2)(%1)}
+    {cal %0,%a2@l(%1)|addic %0,%1,lo16(%2)}")
+
+(define_insn "macho_low_di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,!*r")
+		   (match_operand 2 "" "")))]
+   "TARGET_MACHO && TARGET_64BIT"
+   "@
+    {cal %0,%a2@l(%1)|la %0,lo16(%2)(%1)}
+    {cal %0,%a2@l(%1)|addic %0,%1,lo16(%2)}")
+
+(define_split
+  [(set (mem:V4SI (plus:DI (match_operand:DI 0 "gpc_reg_operand" "")
+			 (match_operand:DI 1 "short_cint_operand" "")))
+	(match_operand:V4SI 2 "register_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))]
+  "TARGET_MACHO && TARGET_64BIT"
+  [(set (match_dup 3) (plus:DI (match_dup 0) (match_dup 1)))
+   (set (mem:V4SI (match_dup 3))
+	(match_dup 2))]
+  "")
+
+(define_expand "load_macho_picbase"
+  [(set (reg:SI 65)
+        (unspec [(match_operand 0 "" "")]
+                   UNSPEC_LD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic"
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_load_macho_picbase_si (operands[0]));
+  else
+    emit_insn (gen_load_macho_picbase_di (operands[0]));
+
+  DONE;
+})
+
+(define_insn "load_macho_picbase_si"
+  [(set (reg:SI 65)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+		    (pc)] UNSPEC_LD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic"
+  "bcl 20,31,%0\\n%0:"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "load_macho_picbase_di"
+  [(set (reg:DI 65)
+	(unspec:DI [(match_operand:DI 0 "immediate_operand" "s")
+		    (pc)] UNSPEC_LD_MPIC))]
+  "(DEFAULT_ABI == ABI_DARWIN) && flag_pic && TARGET_64BIT"
+  "bcl 20,31,%0\\n%0:"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_expand "macho_correct_pic"
+  [(set (match_operand 0 "" "")
+	(plus (match_operand 1 "" "")
+		 (unspec [(match_operand 2 "" "")
+			     (match_operand 3 "" "")]
+			    UNSPEC_MPIC_CORRECT)))]
+  "DEFAULT_ABI == ABI_DARWIN"
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_macho_correct_pic_si (operands[0], operands[1], operands[2],
+	       operands[3]));
+  else
+    emit_insn (gen_macho_correct_pic_di (operands[0], operands[1], operands[2],
+	       operands[3]));
+
+  DONE;
+})
+
+(define_insn "macho_correct_pic_si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "immediate_operand" "s")
+			     (match_operand:SI 3 "immediate_operand" "s")]
+			    UNSPEC_MPIC_CORRECT)))]
+  "DEFAULT_ABI == ABI_DARWIN"
+  "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)"
+  [(set_attr "length" "8")])
+
+(define_insn "macho_correct_pic_di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(plus:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+		 (unspec:DI [(match_operand:DI 2 "immediate_operand" "s")
+			     (match_operand:DI 3 "immediate_operand" "s")]
+			    16)))]
+  "DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT"
+  "addis %0,%1,ha16(%2-%3)\n\taddi %0,%0,lo16(%2-%3)"
+  [(set_attr "length" "8")])
+
+(define_insn "*call_indirect_nonlocal_darwin64"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "c,*l,c,*l"))
+	 (match_operand 1 "" "g,g,g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI 65))]
+  "DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT"
+{
+  return "b%T0l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*call_nonlocal_darwin64"
+  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI 65))]
+  "(DEFAULT_ABI == ABI_DARWIN)
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+{
+#if TARGET_MACHO
+  return output_call(insn, operands, 0, 2);
+#else
+  gcc_unreachable ();
+#endif
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_indirect_nonlocal_darwin64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "c,*l,c,*l"))
+	      (match_operand 2 "" "g,g,g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI 65))]
+  "DEFAULT_ABI == ABI_DARWIN"
+{
+  return "b%T1l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*call_value_nonlocal_darwin64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI 65))]
+  "(DEFAULT_ABI == ABI_DARWIN)
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+{
+#if TARGET_MACHO
+  return output_call(insn, operands, 1, 3);
+#else
+  gcc_unreachable ();
+#endif
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_nonlocal_darwin64"
+  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" "O,n"))
+   (use (reg:SI 65))
+   (return)]
+  "(DEFAULT_ABI == ABI_DARWIN)
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+{
+  return "b %z0";
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_value_nonlocal_darwin64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (reg:SI 65))
+   (return)]
+  "(DEFAULT_ABI == ABI_DARWIN)
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  return \"b %z1\";
+}"
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+
+(define_insn "*sibcall_symbolic_64"
+  [(call (mem:SI (match_operand:DI 0 "call_operand" "s,c")) ; 64
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (reg:SI 65))
+   (return)]
+  "TARGET_64BIT && DEFAULT_ABI == ABI_DARWIN"
+  "*
+{
+  switch (which_alternative)
+    {
+      case 0:  return \"b %z0\";
+      case 1:  return \"b%T0\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_symbolic_64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "call_operand" "s,c"))
+	      (match_operand 2 "" "")))
+   (use (match_operand:SI 3 "" ""))
+   (use (reg:SI 65))
+   (return)]
+  "TARGET_64BIT && DEFAULT_ABI == ABI_DARWIN"
+  "*
+{
+  switch (which_alternative)
+    {
+      case 0:  return \"b %z1\";
+      case 1:  return \"b%T1\";
+      default:  gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
diff --git a/gcc/config/rs6000/darwin.opt b/gcc/config/rs6000/darwin.opt
new file mode 100644
index 000000000..3787511b6
--- /dev/null
+++ b/gcc/config/rs6000/darwin.opt
@@ -0,0 +1,42 @@
+; Darwin options for PPC port.
+;
+; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Waltivec-long-deprecated
+Driver Alias(mwarn-altivec-long)
+
+faltivec
+Driver
+
+; -ffix-and-continue and -findirect-data are for compatibility for old
+; compilers.
+ffix-and-continue
+Driver RejectNegative Alias(mfix-and-continue)
+
+findirect-data
+Driver RejectNegative Alias(mfix-and-continue)
+
+m64
+Target RejectNegative Negative(m32) Mask(64BIT)
+Generate 64-bit code
+
+m32
+Target RejectNegative Negative(m64) InverseMask(64BIT)
+Generate 32-bit code
diff --git a/gcc/config/rs6000/darwin64.h b/gcc/config/rs6000/darwin64.h
new file mode 100644
index 000000000..a74ceb17f
--- /dev/null
+++ b/gcc/config/rs6000/darwin64.h
@@ -0,0 +1,35 @@
+/* Target definitions for PowerPC running Darwin (Mac OS X).
+   Copyright (C) 2006, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Apple Computer Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (Darwin/PowerPC64)");
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_POWERPC64 | MASK_64BIT \
+			| MASK_MULTIPLE	| MASK_NEW_MNEMONICS | MASK_PPC_GFXOPT)
+
+#undef DARWIN_ARCH_SPEC
+#define DARWIN_ARCH_SPEC "%{m32:ppc;:ppc64}"
+
+#undef DARWIN_SUBARCH_SPEC
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+#undef DARWIN_CRT2_SPEC
+#define DARWIN_CRT2_SPEC ""
diff --git a/gcc/config/rs6000/darwin7.h b/gcc/config/rs6000/darwin7.h
new file mode 100644
index 000000000..fdf371666
--- /dev/null
+++ b/gcc/config/rs6000/darwin7.h
@@ -0,0 +1,30 @@
+/* Target definitions for Darwin 7.x (Mac OS X) systems.
+   Copyright (C) 2004, 2005, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Machine dependent libraries.  Include libmx when compiling for
+   Darwin 7.0 and above, but before libSystem, since the functions are
+   actually in libSystem but for 7.x compatibility we want them to be
+   looked for in libmx first.  Include libmx by default because otherwise
+   libstdc++ isn't usable.  */
+
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!static:\
+  %:version-compare(!< 10.3 mmacosx-version-min= -lmx)\
+  -lSystem}"
diff --git a/gcc/config/rs6000/darwin8.h b/gcc/config/rs6000/darwin8.h
new file mode 100644
index 000000000..7cdd81db7
--- /dev/null
+++ b/gcc/config/rs6000/darwin8.h
@@ -0,0 +1,32 @@
+/* Target definitions for Darwin 8.0 and above (Mac OS X) systems.
+   Copyright (C) 2004, 2005, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Machine dependent libraries.  Include libmx when compiling on
+   Darwin 7.0 and above, but before libSystem, since the functions are
+   actually in libSystem but for 7.x compatibility we want them to be
+   looked for in libmx first---but only do this if 7.x compatibility
+   is a concern, which it's not in 64-bit mode.  Include
+   libSystemStubs when compiling on (not necessarily for) 8.0 and
+   above and not 64-bit long double.  */
+
+#undef	LIB_SPEC
+#define LIB_SPEC "%{!static:\
+  %{!mlong-double-64:%{pg:-lSystemStubs_profile;:-lSystemStubs}} \
+  %{!m64:%:version-compare(>< 10.3 10.4 mmacosx-version-min= -lmx)} -lSystem}"
diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
new file mode 100644
index 000000000..0ff49aab9
--- /dev/null
+++ b/gcc/config/rs6000/default64.h
@@ -0,0 +1,24 @@
+/* Definitions of target machine for GNU compiler,
+   for 64 bit powerpc linux defaulting to -m64.
+   Copyright (C) 2003, 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_POWERPC | MASK_PPC_GFXOPT | \
+   MASK_POWERPC64 | MASK_64BIT | MASK_NEW_MNEMONICS)
diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
new file mode 100644
index 000000000..5ffe7fcff
--- /dev/null
+++ b/gcc/config/rs6000/dfp.md
@@ -0,0 +1,594 @@
+;; Decimal Floating Point (DFP) patterns.
+;; Copyright (C) 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Ben Elliston (bje@au.ibm.com) and Peter Bergner
+;; (bergner@vnet.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; UNSPEC usage
+;;
+
+(define_constants
+  [(UNSPEC_MOVSD_LOAD		400)
+   (UNSPEC_MOVSD_STORE		401)
+  ])
+
+
+(define_expand "movsd"
+  [(set (match_operand:SD 0 "nonimmediate_operand" "")
+	(match_operand:SD 1 "any_operand" ""))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
+
+(define_split
+  [(set (match_operand:SD 0 "gpc_reg_operand" "")
+	(match_operand:SD 1 "const_double_operand" ""))]
+  "reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  long l;
+  REAL_VALUE_TYPE rv;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
+
+  if (! TARGET_POWERPC64)
+    operands[2] = operand_subword (operands[0], 0, 0, SDmode);
+  else
+    operands[2] = gen_lowpart (SImode, operands[0]);
+
+  operands[3] = gen_int_mode (l, SImode);
+}")
+
+(define_insn "movsd_hardfloat"
+  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,*q,!r,*h,!r,!r")
+	(match_operand:SD 1 "input_operand"        "r,m,r,f,r,r,h,0,G,Fn"))]
+  "(gpc_reg_operand (operands[0], SDmode)
+   || gpc_reg_operand (operands[1], SDmode))
+   && (TARGET_HARD_FLOAT && TARGET_FPRS)"
+  "@
+   mr %0,%1
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0
+   fmr %0,%1
+   mt%0 %1
+   mt%0 %1
+   mf%1 %0
+   {cror 0,0,0|nop}
+   #
+   #"
+  [(set_attr "type" "*,load,store,fp,mtjmpr,*,mfjmpr,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,8")])
+
+(define_insn "movsd_softfloat"
+  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,q,r,r,m,r,r,r,r,r,*h")
+	(match_operand:SD 1 "input_operand" "r,r,r,h,m,r,I,L,R,G,Fn,0"))]
+  "(gpc_reg_operand (operands[0], SDmode)
+   || gpc_reg_operand (operands[1], SDmode))
+   && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
+  "@
+   mr %0,%1
+   mt%0 %1
+   mt%0 %1
+   mf%1 %0
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0
+   {lil|li} %0,%1
+   {liu|lis} %0,%v1
+   {cal|la} %0,%a1
+   #
+   #
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "*,mtjmpr,*,mfjmpr,load,store,*,*,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,4")])
+
+(define_insn "movsd_store"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=m")
+	(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
+		   UNSPEC_MOVSD_STORE))]
+  "(gpc_reg_operand (operands[0], DDmode)
+   || gpc_reg_operand (operands[1], SDmode))
+   && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "stfd%U0%X0 %1,%0"
+  [(set_attr "type" "fpstore")
+   (set_attr "length" "4")])
+
+(define_insn "movsd_load"
+  [(set (match_operand:SD 0 "nonimmediate_operand" "=f")
+	(unspec:SD [(match_operand:DD 1 "input_operand" "m")]
+		   UNSPEC_MOVSD_LOAD))]
+  "(gpc_reg_operand (operands[0], SDmode)
+   || gpc_reg_operand (operands[1], DDmode))
+   && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "lfd%U1%X1 %0,%1"
+  [(set_attr "type" "fpload")
+   (set_attr "length" "4")])
+
+;; Hardware support for decimal floating point operations.
+
+(define_insn "extendsddd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(float_extend:DD (match_operand:SD 1 "gpc_reg_operand" "f")))]
+  "TARGET_DFP"
+  "dctdp %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "extendsdtd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(float_extend:TD (match_operand:SD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+{
+  rtx tmp = gen_reg_rtx (DDmode);
+  emit_insn (gen_extendsddd2 (tmp, operands[1]));
+  emit_insn (gen_extendddtd2 (operands[0], tmp));
+  DONE;
+})
+
+(define_insn "truncddsd2"
+  [(set (match_operand:SD 0 "gpc_reg_operand" "=f")
+	(float_truncate:SD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "drsp %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "negdd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(neg:DD (match_operand:DD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*negdd2_fpr"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(neg:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fneg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "absdd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(abs:DD (match_operand:DD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*absdd2_fpr"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(abs:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "*nabsdd2_fpr"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(neg:DD (abs:DD (match_operand:DD 1 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fnabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "movdd"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "")
+	(match_operand:DD 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }")
+
+(define_split
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(match_operand:DD 1 "const_int_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 1))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+
+  operands[2] = operand_subword (operands[0], endian, 0, DDmode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
+#if HOST_BITS_PER_WIDE_INT == 32
+  operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
+#else
+  operands[4] = GEN_INT (value >> 32);
+  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
+#endif
+}")
+
+(define_split
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(match_operand:DD 1 "const_double_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  long l[2];
+  REAL_VALUE_TYPE rv;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+
+  operands[2] = operand_subword (operands[0], endian, 0, DDmode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
+  operands[4] = gen_int_mode (l[endian], SImode);
+  operands[5] = gen_int_mode (l[1 - endian], SImode);
+}")
+
+(define_split
+  [(set (match_operand:DD 0 "gpc_reg_operand" "")
+	(match_operand:DD 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  long l[2];
+  REAL_VALUE_TYPE rv;
+#if HOST_BITS_PER_WIDE_INT >= 64
+  HOST_WIDE_INT val;
+#endif
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
+#if HOST_BITS_PER_WIDE_INT >= 64
+  val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
+	 | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
+
+  operands[3] = gen_int_mode (val, DImode);
+#else
+  operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
+#endif
+}")
+
+;; Don't have reload use general registers to load a constant.  First,
+;; it might not work if the output operand is the equivalent of
+;; a non-offsettable memref, but also it is less efficient than loading
+;; the constant into an FP register, since it will probably be used there.
+;; The "??" is a kludge until we can figure out a more reasonable way
+;; of handling these non-offsettable values.
+(define_insn "*movdd_hardfloat32"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r")
+	(match_operand:DD 1 "input_operand" "r,m,r,d,m,d,G,H,F"))]
+  "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
+   && (gpc_reg_operand (operands[0], DDmode)
+       || gpc_reg_operand (operands[1], DDmode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      gcc_unreachable ();
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    case 3:
+      return \"fmr %0,%1\";
+    case 4:
+      return \"lfd%U1%X1 %0,%1\";
+    case 5:
+      return \"stfd%U0%X0 %1,%0\";
+    case 6:
+    case 7:
+    case 8:
+      return \"#\";
+    }
+}"
+  [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
+   (set_attr "length" "8,16,16,4,4,4,8,12,16")])
+
+(define_insn "*movdd_softfloat32"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r")
+	(match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))]
+  "! TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], DDmode)
+       || gpc_reg_operand (operands[1], DDmode))"
+  "#"
+  [(set_attr "type" "two,load,store,*,*,*")
+   (set_attr "length" "8,8,8,8,12,16")])
+
+; ld/std require word-aligned displacements -> 'Y' constraint.
+; List Y->r and r->Y before r->r for reload.
+(define_insn "*movdd_hardfloat64_mfpgpr"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d")
+	(match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))]
+  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
+   && (gpc_reg_operand (operands[0], DDmode)
+       || gpc_reg_operand (operands[1], DDmode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   mt%0 %1
+   mf%1 %0
+   {cror 0,0,0|nop}
+   #
+   #
+   #
+   mftgpr %0,%1
+   mffgpr %0,%1"
+  [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+
+; ld/std require word-aligned displacements -> 'Y' constraint.
+; List Y->r and r->Y before r->r for reload.
+(define_insn "*movdd_hardfloat64"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r")
+	(match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))]
+  "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
+   && (gpc_reg_operand (operands[0], DDmode)
+       || gpc_reg_operand (operands[1], DDmode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   mt%0 %1
+   mf%1 %0
+   {cror 0,0,0|nop}
+   #
+   #
+   #"
+  [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
+
+(define_insn "*movdd_softfloat64"
+  [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
+	(match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
+  "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], DDmode)
+       || gpc_reg_operand (operands[1], DDmode))"
+  "@
+   ld%U1%X1 %0,%1
+   std%U0%X0 %1,%0
+   mr %0,%1
+   mt%0 %1
+   mf%1 %0
+   #
+   #
+   #
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,8,12,16,4")])
+
+(define_expand "negtd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "")
+	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*negtd2_fpr"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fneg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "abstd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "")
+	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*abstd2_fpr"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "*nabstd2_fpr"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "fnabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "movtd"
+  [(set (match_operand:TD 0 "general_operand" "")
+	(match_operand:TD 1 "any_operand" ""))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS"
+  "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }")
+
+; It's important to list the o->f and f->o moves before f->f because
+; otherwise reload, given m->f, will try to pick f->f and reload it,
+; which doesn't make progress.  Likewise r->Y must be before r->r.
+(define_insn_and_split "*movtd_internal"
+  [(set (match_operand:TD 0 "nonimmediate_operand" "=o,d,d,r,Y,r")
+	(match_operand:TD 1 "input_operand"         "d,o,d,YGHF,r,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS
+   && (gpc_reg_operand (operands[0], TDmode)
+       || gpc_reg_operand (operands[1], TDmode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,20,20,16")])
+
+;; Hardware support for decimal floating point operations.
+
+(define_insn "extendddtd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(float_extend:TD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dctqpq %0,%1"
+  [(set_attr "type" "fp")])
+
+;; The result of drdpq is an even/odd register pair with the converted
+;; value in the even register and zero in the odd register.
+;; FIXME: Avoid the register move by using a reload constraint to ensure
+;; that the result is the first of the pair receiving the result of drdpq.
+
+(define_insn "trunctddd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(float_truncate:DD (match_operand:TD 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:TD 2 "=d"))]
+  "TARGET_DFP"
+  "drdpq %2,%1\;fmr %0,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "adddd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(plus:DD (match_operand:DD 1 "gpc_reg_operand" "%d")
+		 (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "addtd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(plus:TD (match_operand:TD 1 "gpc_reg_operand" "%d")
+		 (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "daddq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "subdd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(minus:DD (match_operand:DD 1 "gpc_reg_operand" "d")
+		  (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dsub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "subtd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(minus:TD (match_operand:TD 1 "gpc_reg_operand" "d")
+		  (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dsubq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "muldd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(mult:DD (match_operand:DD 1 "gpc_reg_operand" "%d")
+		 (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dmul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "multd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(mult:TD (match_operand:TD 1 "gpc_reg_operand" "%d")
+		 (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dmulq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "divdd3"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(div:DD (match_operand:DD 1 "gpc_reg_operand" "d")
+		(match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "ddiv %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "divtd3"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(div:TD (match_operand:TD 1 "gpc_reg_operand" "d")
+		(match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "ddivq %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*cmpdd_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:DD 1 "gpc_reg_operand" "d")
+		      (match_operand:DD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dcmpu %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "*cmptd_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:TD 1 "gpc_reg_operand" "d")
+		      (match_operand:TD 2 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dcmpuq %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "floatditd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(float:TD (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dcffixq %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal64 to a decimal64 whose value is an integer.
+;; This is the first stage of converting it to an integer type.
+
+(define_insn "ftruncdd2"
+  [(set (match_operand:DD 0 "gpc_reg_operand" "=d")
+	(fix:DD (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "drintn. 0,%0,%1,1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal64 whose value is an integer to an actual integer.
+;; This is the second stage of converting decimal float to integer type.
+
+(define_insn "fixdddi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(fix:DI (match_operand:DD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dctfix %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal128 to a decimal128 whose value is an integer.
+;; This is the first stage of converting it to an integer type.
+
+(define_insn "ftrunctd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+	(fix:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "drintnq. 0,%0,%1,1"
+  [(set_attr "type" "fp")])
+
+;; Convert a decimal128 whose value is an integer to an actual integer.
+;; This is the second stage of converting decimal float to integer type.
+
+(define_insn "fixtddi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(fix:DI (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_DFP"
+  "dctfixq %0,%1"
+  [(set_attr "type" "fp")])
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
new file mode 100644
index 000000000..11e76ea96
--- /dev/null
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -0,0 +1,547 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include <stdlib.h>
+
+#ifdef _AIX
+# include <sys/systemcfg.h>
+#endif
+
+#ifdef __linux__
+# include <link.h>
+#endif
+
+#if defined (__APPLE__) || (__FreeBSD__)
+# include <sys/types.h>
+# include <sys/sysctl.h>
+#endif
+
+const char *host_detect_local_cpu (int argc, const char **argv);
+
+#if GCC_VERSION >= 0
+
+/* Returns parameters that describe L1_ASSOC associative cache of size
+   L1_SIZEKB with lines of size L1_LINE, and L2_SIZEKB.  */
+
+static char *
+describe_cache (unsigned l1_sizekb, unsigned l1_line,
+		unsigned l1_assoc ATTRIBUTE_UNUSED, unsigned l2_sizekb)
+{
+  char l1size[1000], line[1000], l2size[1000];
+
+  /* At the moment, gcc middle-end does not use the information about the
+     associativity of the cache.  */
+
+  sprintf (l1size, "--param l1-cache-size=%u", l1_sizekb);
+  sprintf (line, "--param l1-cache-line-size=%u", l1_line);
+  sprintf (l2size, "--param l2-cache-size=%u", l2_sizekb);
+
+  return concat (l1size, " ", line, " ", l2size, " ", NULL);
+}
+
+#ifdef __APPLE__
+
+/* Returns the description of caches on Darwin.  */
+
+static char *
+detect_caches_darwin (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+  size_t len = 4;
+  static int l1_size_name[2] = { CTL_HW, HW_L1DCACHESIZE };
+  static int l1_line_name[2] = { CTL_HW, HW_CACHELINE };
+  static int l2_size_name[2] = { CTL_HW, HW_L2CACHESIZE };
+
+  sysctl (l1_size_name, 2, &l1_sizekb, &len, NULL, 0);
+  sysctl (l1_line_name, 2, &l1_line, &len, NULL, 0);
+  sysctl (l2_size_name, 2, &l2_sizekb, &len, NULL, 0);
+  l1_assoc = 0;
+
+  return describe_cache (l1_sizekb / 1024, l1_line, l1_assoc,
+			 l2_sizekb / 1024);
+}
+
+static const char *
+detect_processor_darwin (void)
+{
+  unsigned int proc;
+  size_t len = 4;
+
+  sysctlbyname ("hw.cpusubtype", &proc, &len, NULL, 0);
+
+  if (len > 0)
+    switch (proc)
+      {
+      case 1:
+	return "601";
+      case 2:
+	return "602";
+      case 3:
+	return "603";
+      case 4:
+      case 5:
+	return "603e";
+      case 6:
+	return "604";
+      case 7:
+	return "604e";
+      case 8:
+	return "620";
+      case 9:
+	return "750";
+      case 10:
+	return "7400";
+      case 11:
+	return "7450";
+      case 100:
+	return "970";
+      default:
+	return "powerpc";
+      }
+
+  return "powerpc";
+}
+
+#endif /* __APPLE__ */
+
+#ifdef __FreeBSD__
+
+/* Returns the description of caches on FreeBSD PPC.  */
+
+static char *
+detect_caches_freebsd (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+  size_t len = 4;
+
+  /* Currently, as of FreeBSD-7.0, there is only the cacheline_size
+     available via sysctl.  */
+  sysctlbyname ("machdep.cacheline_size", &l1_line, &len, NULL, 0);
+
+  l1_sizekb = 32;
+  l1_assoc = 0;
+  l2_sizekb = 512;
+
+  return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb);
+}
+
+/* Currently returns default powerpc.  */
+static const char *
+detect_processor_freebsd (void)
+{
+  return "powerpc";
+}
+
+#endif /* __FreeBSD__  */
+
+#ifdef __linux__
+
+/* Returns AT_PLATFORM if present, otherwise generic PowerPC.  */
+
+static const char *
+elf_platform (void)
+{
+  int fd;
+
+  fd = open ("/proc/self/auxv", O_RDONLY);
+
+  if (fd != -1)
+    {
+      char buf[1024];
+      ElfW(auxv_t) *av;
+      ssize_t n;
+
+      n = read (fd, buf, sizeof (buf));
+      close (fd);
+
+      if (n > 0)
+	{
+	  for (av = (ElfW(auxv_t) *) buf; av->a_type != AT_NULL; ++av)
+	    switch (av->a_type)
+	      {
+	      case AT_PLATFORM:
+		return (const char *) av->a_un.a_val;
+
+	      default:
+		break;
+	      }
+	}
+    }
+  return NULL;
+}
+
+/* Returns AT_PLATFORM if present, otherwise generic 32.  */
+
+static int
+elf_dcachebsize (void)
+{
+  int fd;
+
+  fd = open ("/proc/self/auxv", O_RDONLY);
+
+  if (fd != -1)
+    {
+      char buf[1024];
+      ElfW(auxv_t) *av;
+      ssize_t n;
+
+      n = read (fd, buf, sizeof (buf));
+      close (fd);
+
+      if (n > 0)
+	{
+	  for (av = (ElfW(auxv_t) *) buf; av->a_type != AT_NULL; ++av)
+	    switch (av->a_type)
+	      {
+	      case AT_DCACHEBSIZE:
+		return av->a_un.a_val;
+
+	      default:
+		break;
+	      }
+	}
+    }
+  return 32;
+}
+
+/* Returns the description of caches on Linux.  */
+
+static char *
+detect_caches_linux (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+  const char *platform;
+
+  platform = elf_platform ();
+
+  if (platform != NULL)
+    {
+      l1_line = 128;
+
+      if (platform[5] == '6')
+	/* POWER6 and POWER6x */
+	l1_sizekb = 64;
+      else
+	l1_sizekb = 32;
+    }
+  else
+    {
+      l1_line = elf_dcachebsize ();
+      l1_sizekb = 32;
+    }
+
+  l1_assoc = 0;
+  l2_sizekb = 512;
+
+  return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb);
+}
+
+static const char *
+detect_processor_linux (void)
+{
+  const char *platform;
+
+  platform = elf_platform ();
+
+  if (platform != NULL)
+    return platform;
+  else
+    return "powerpc";
+}
+
+#endif /* __linux__ */
+
+#ifdef _AIX
+/* Returns the description of caches on AIX.  */
+
+static char *
+detect_caches_aix (void)
+{
+  unsigned l1_sizekb, l1_line, l1_assoc, l2_sizekb;
+
+  l1_sizekb = _system_configuration.dcache_size / 1024;
+  l1_line = _system_configuration.dcache_line;
+  l1_assoc = _system_configuration.dcache_asc;
+  l2_sizekb = _system_configuration.L2_cache_size / 1024;
+
+  return describe_cache (l1_sizekb, l1_line, l1_assoc, l2_sizekb);
+}
+
+
+/* Returns the processor implementation on AIX.  */
+
+static const char *
+detect_processor_aix (void)
+{
+  switch (_system_configuration.implementation)
+    {
+    case 0x0001:
+      return "rios1";
+
+    case 0x0002:
+      return "rsc";
+
+    case 0x0004:
+      return "rios2";
+
+    case 0x0008:
+      return "601";
+
+    case 0x0020:
+      return "603";
+
+    case 0x0010:
+      return "604";
+
+    case 0x0040:
+      return "620";
+
+    case 0x0080:
+      return "630";
+
+    case 0x0100:
+    case 0x0200:
+    case 0x0400:
+      return "rs64";
+
+    case 0x0800:
+      return "power4";
+
+    case 0x2000:
+      if (_system_configuration.version == 0x0F0000)
+	return "power5";
+      else
+	return "power5+";
+
+    case 0x4000:
+      return "power6";
+
+    default:
+      return "powerpc";
+    }
+}
+#endif /* _AIX */
+
+
+/*
+ * Array to map -mcpu=native names to the switches passed to the assembler.
+ * This list mirrors the specs in ASM_CPU_SPEC, and any changes made here
+ * should be made there as well.
+ */
+
+struct asm_name {
+  const char *cpu;
+  const char *asm_sw;
+};
+
+static const struct asm_name asm_names[] = {
+#if defined (_AIX)
+  { "power3",	"-m620" },
+  { "power4",	"-mpwr4" },
+  { "power5",	"-mpwr5" },
+  { "power5+",	"-mpwr5x" },
+  { "power6",	"-mpwr6" },
+  { "power6x",	"-mpwr6" },
+  { "power7",	"-mpwr7" },
+  { "powerpc",	"-mppc" },
+  { "rs64a",	"-mppc" },
+  { "603",	"-m603" },
+  { "603e",	"-m603" },
+  { "604",	"-m604" },
+  { "604e",	"-m604" },
+  { "620",	"-m620" },
+  { "630",	"-m620" },
+  { "970",	"-m970" },
+  { "G5",	"-m970" },
+  { NULL,	"\
+%{!maix64: \
+%{mpowerpc64: -mppc64} \
+%{maltivec: -m970} \
+%{!maltivec: %{!mpower64: %(asm_default)}}}" },
+
+#else
+  { "common",	"-mcom" },
+  { "cell",	"-mcell" },
+  { "power",	"-mpwr" },
+  { "power2",	"-mpwrx" },
+  { "power3",	"-mppc64" },
+  { "power4",	"-mpower4" },
+  { "power5",	"%(asm_cpu_power5)" },
+  { "power5+",	"%(asm_cpu_power5)" },
+  { "power6",	"%(asm_cpu_power6) -maltivec" },
+  { "power6x",	"%(asm_cpu_power6) -maltivec" },
+  { "power7",	"%(asm_cpu_power7)" },
+  { "powerpc",	"-mppc" },
+  { "rios",	"-mpwr" },
+  { "rios1",	"-mpwr" },
+  { "rios2",	"-mpwrx" },
+  { "rsc",	"-mpwr" },
+  { "rsc1",	"-mpwr" },
+  { "rs64a",	"-mppc64" },
+  { "401",	"-mppc" },
+  { "403",	"-m403" },
+  { "405",	"-m405" },
+  { "405fp",	"-m405" },
+  { "440",	"-m440" },
+  { "440fp",	"-m440" },
+  { "464",	"-m440" },
+  { "464fp",	"-m440" },
+  { "505",	"-mppc" },
+  { "601",	"-m601" },
+  { "602",	"-mppc" },
+  { "603",	"-mppc" },
+  { "603e",	"-mppc" },
+  { "ec603e",	"-mppc" },
+  { "604",	"-mppc" },
+  { "604e",	"-mppc" },
+  { "620",	"-mppc64" },
+  { "630",	"-mppc64" },
+  { "740",	"-mppc" },
+  { "750",	"-mppc" },
+  { "G3",	"-mppc" },
+  { "7400",	"-mppc -maltivec" },
+  { "7450",	"-mppc -maltivec" },
+  { "G4",	"-mppc -maltivec" },
+  { "801",	"-mppc" },
+  { "821",	"-mppc" },
+  { "823",	"-mppc" },
+  { "860",	"-mppc" },
+  { "970",	"-mpower4 -maltivec" },
+  { "G5",	"-mpower4 -maltivec" },
+  { "8540",	"-me500" },
+  { "8548",	"-me500" },
+  { "e300c2",	"-me300" },
+  { "e300c3",	"-me300" },
+  { "e500mc",	"-me500mc" },
+  { NULL,	"\
+%{mpower: %{!mpower2: -mpwr}} \
+%{mpower2: -mpwrx} \
+%{mpowerpc64*: -mppc64} \
+%{!mpowerpc64*: %{mpowerpc*: -mppc}} \
+%{mno-power: %{!mpowerpc*: -mcom}} \
+%{!mno-power: %{!mpower*: %(asm_default)}}" },
+#endif
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "arch" or "tune" as argument depending on if -march=native
+   or -mtune=native is to be substituted.
+
+   Additionally it will be called with "asm" to select the appropriate flags
+   for the assembler.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu = NULL;
+  const char *cache = "";
+  const char *options = "";
+  bool arch;
+  bool assembler;
+  size_t i;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = strcmp (argv[0], "cpu") == 0;
+  assembler = (!arch && strcmp (argv[0], "asm") == 0);
+  if (!arch && !assembler && strcmp (argv[0], "tune"))
+    return NULL;
+
+  if (! assembler)
+    {
+#if defined (_AIX)
+      cache = detect_caches_aix ();
+#elif defined (__APPLE__)
+      cache = detect_caches_darwin ();
+#elif defined (__FreeBSD__)
+      cache = detect_caches_freebsd ();
+      /* FreeBSD PPC does not provide any cache information yet.  */
+      cache = "";
+#elif defined (__linux__)
+      cache = detect_caches_linux ();
+      /* PPC Linux does not provide any cache information yet.  */
+      cache = "";
+#else
+      cache = "";
+#endif
+    }
+
+#if defined (_AIX)
+  cpu = detect_processor_aix ();
+#elif defined (__APPLE__)
+  cpu = detect_processor_darwin ();
+#elif defined (__FreeBSD__)
+  cpu = detect_processor_freebsd ();
+#elif defined (__linux__)
+  cpu = detect_processor_linux ();
+#else
+  cpu = "powerpc";
+#endif
+
+  if (assembler)
+    {
+      for (i = 0; i < sizeof (asm_names) / sizeof (asm_names[0]); i++)
+	{
+	  if (!asm_names[i].cpu || !strcmp (asm_names[i].cpu, cpu))
+	    return asm_names[i].asm_sw;
+	}
+
+      return NULL;
+    }
+
+  return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
+}
+
+#else /* GCC_VERSION */
+
+/* If we aren't compiling with GCC we just provide a minimal
+   default value.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  const char *cpu;
+  bool arch;
+
+  if (argc < 1)
+    return NULL;
+
+  arch = strcmp (argv[0], "cpu") == 0;
+  if (!arch && strcmp (argv[0], "tune"))
+    return NULL;
+  
+  if (arch)
+    cpu = "powerpc";
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
+
+#endif /* GCC_VERSION */
+
diff --git a/gcc/config/rs6000/e300c2c3.md b/gcc/config/rs6000/e300c2c3.md
new file mode 100644
index 000000000..3462a209f
--- /dev/null
+++ b/gcc/config/rs6000/e300c2c3.md
@@ -0,0 +1,189 @@
+;; Pipeline description for Motorola PowerPC e300c3 core.
+;;   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ppce300c3_most,ppce300c3_long,ppce300c3_retire")
+(define_cpu_unit "ppce300c3_decode_0,ppce300c3_decode_1" "ppce300c3_most")
+
+;; We don't simulate general issue queue (GIC).  If we have SU insn
+;; and then SU1 insn, they can not be issued on the same cycle
+;; (although SU1 insn and then SU insn can be issued) because the SU
+;; insn will go to SU1 from GIC0 entry.  Fortunately, the first cycle
+;; multipass insn scheduling will find the situation and issue the SU1
+;; insn and then the SU insn.
+(define_cpu_unit "ppce300c3_issue_0,ppce300c3_issue_1"   "ppce300c3_most")
+
+;; We could describe completion buffers slots in combination with the
+;; retirement units and the order of completion but the result
+;; automaton would behave in the same way because we can not describe
+;; real latency time with taking in order completion into account.
+;; Actually we could define the real latency time by querying reserved
+;; automaton units but the current scheduler uses latency time before
+;; issuing insns and making any reservations.
+;;
+;; So our description is aimed to achieve a insn schedule in which the
+;; insns would not wait in the completion buffer.
+(define_cpu_unit "ppce300c3_retire_0,ppce300c3_retire_1" "ppce300c3_retire")
+
+;; Branch unit:
+(define_cpu_unit "ppce300c3_bu" "ppce300c3_most")
+
+;; IU:
+(define_cpu_unit "ppce300c3_iu0_stage0,ppce300c3_iu1_stage0" "ppce300c3_most")
+
+;; IU: This used to describe non-pipelined division.
+(define_cpu_unit "ppce300c3_mu_div" "ppce300c3_long")
+
+;; SRU:
+(define_cpu_unit "ppce300c3_sru_stage0" "ppce300c3_most")
+
+;; Here we simplified LSU unit description not describing the stages.
+(define_cpu_unit "ppce300c3_lsu" "ppce300c3_most")
+
+;; FPU:
+(define_cpu_unit "ppce300c3_fpu" "ppce300c3_most")
+
+;; The following units are used to make automata deterministic
+(define_cpu_unit "present_ppce300c3_decode_0" "ppce300c3_most")
+(define_cpu_unit "present_ppce300c3_issue_0" "ppce300c3_most")
+(define_cpu_unit "present_ppce300c3_retire_0" "ppce300c3_retire")
+(define_cpu_unit "present_ppce300c3_iu0_stage0" "ppce300c3_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_ppce300c3_decode_0" "ppce300c3_decode_0")
+(presence_set "present_ppce300c3_issue_0" "ppce300c3_issue_0")
+(presence_set "present_ppce300c3_retire_0" "ppce300c3_retire_0")
+(presence_set "present_ppce300c3_iu0_stage0" "ppce300c3_iu0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "ppce300c3_decode"
+    "ppce300c3_decode_0|ppce300c3_decode_1+present_ppce300c3_decode_0")
+(define_reservation "ppce300c3_issue"
+    "ppce300c3_issue_0|ppce300c3_issue_1+present_ppce300c3_issue_0")
+(define_reservation "ppce300c3_retire"
+   "ppce300c3_retire_0|ppce300c3_retire_1+present_ppce300c3_retire_0")
+(define_reservation "ppce300c3_iu_stage0"
+   "ppce300c3_iu0_stage0|ppce300c3_iu1_stage0+present_ppce300c3_iu0_stage0")
+
+;; Compares can be executed either one of the IU or SRU
+(define_insn_reservation "ppce300c3_cmp" 1
+  (and (eq_attr "type" "cmp,compare,delayed_compare,fast_compare")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+(ppce300c3_iu_stage0|ppce300c3_sru_stage0) \
+        +ppce300c3_retire")
+
+;; Other one cycle IU insns
+(define_insn_reservation "ppce300c3_iu" 1
+  (and (eq_attr "type" "integer,insert_word,isel")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_retire")
+
+;; Branch.  Actually this latency time is not used by the scheduler.
+(define_insn_reservation "ppce300c3_branch" 1
+  (and (eq_attr "type" "jmpreg,branch")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_bu,ppce300c3_retire")
+
+;; Multiply is non-pipelined but can be executed in any IU
+(define_insn_reservation "ppce300c3_multiply" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0, \
+   ppce300c3_iu_stage0+ppce300c3_retire")
+
+;; Divide.  We use the average latency time here.  We omit reserving a
+;; retire unit because of the result automata will be huge.
+(define_insn_reservation "ppce300c3_divide" 20
+  (and (eq_attr "type" "idiv")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_iu_stage0+ppce300c3_mu_div,\
+   ppce300c3_mu_div*19")
+
+;; CR logical
+(define_insn_reservation "ppce300c3_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Mfcr
+(define_insn_reservation "ppce300c3_mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Mtcrf
+(define_insn_reservation "ppce300c3_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Mtjmpr
+(define_insn_reservation "ppce300c3_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_sru_stage0+ppce300c3_retire")
+
+;; Float point instructions
+(define_insn_reservation "ppce300c3_fpcompare" 3
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_fp" 3
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,nothing,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_dmul" 4
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu,nothing,ppce300c3_retire")
+
+; Divides are not pipelined
+(define_insn_reservation "ppce300c3_sdiv" 18
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu*17")
+
+(define_insn_reservation "ppce300c3_ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_fpu,ppce300c3_fpu*32")
+
+;; Loads
+(define_insn_reservation "ppce300c3_load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
+
+;; Stores.
+(define_insn_reservation "ppce300c3_store" 2
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (ior (eq_attr "cpu" "ppce300c2") (eq_attr "cpu" "ppce300c3")))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
+
+(define_insn_reservation "ppce300c3_fpstore" 2
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce300c3"))
+  "ppce300c3_decode,ppce300c3_issue+ppce300c3_lsu,ppce300c3_retire")
diff --git a/gcc/config/rs6000/e500-double.h b/gcc/config/rs6000/e500-double.h
new file mode 100644
index 000000000..5545a8c93
--- /dev/null
+++ b/gcc/config/rs6000/e500-double.h
@@ -0,0 +1,24 @@
+/* Target definitions for E500 with double precision FP.
+   Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  SUB3TARGET_OVERRIDE_OPTIONS
+#define SUB3TARGET_OVERRIDE_OPTIONS \
+  if (!rs6000_explicit_options.float_gprs) \
+    rs6000_float_gprs = 2;
diff --git a/gcc/config/rs6000/e500.h b/gcc/config/rs6000/e500.h
new file mode 100644
index 000000000..807df0900
--- /dev/null
+++ b/gcc/config/rs6000/e500.h
@@ -0,0 +1,57 @@
+/* Enable E500 support.
+   Copyright (C) 2003, 2004, 2006, 2007, 2008, 2009, 2010 Free Software
+   Foundation, Inc.
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_SPE_ABI
+#undef TARGET_SPE
+#undef TARGET_E500
+#undef TARGET_FPRS
+#undef TARGET_E500_SINGLE
+#undef TARGET_E500_DOUBLE
+#undef CHECK_E500_OPTIONS
+
+#define TARGET_SPE_ABI rs6000_spe_abi
+#define TARGET_SPE rs6000_spe
+#define TARGET_E500 (rs6000_cpu == PROCESSOR_PPC8540)
+#define TARGET_FPRS (rs6000_float_gprs == 0)
+#define TARGET_E500_SINGLE (TARGET_HARD_FLOAT && rs6000_float_gprs == 1)
+#define TARGET_E500_DOUBLE (TARGET_HARD_FLOAT && rs6000_float_gprs == 2)
+#define CHECK_E500_OPTIONS						\
+  do {									\
+    if (TARGET_E500 || TARGET_SPE || TARGET_SPE_ABI			\
+	|| TARGET_E500_SINGLE || TARGET_E500_DOUBLE)			\
+      {									\
+	if (TARGET_ALTIVEC)						\
+	  error ("AltiVec and E500 instructions cannot coexist");	\
+	if (TARGET_VSX)							\
+	  error ("VSX and E500 instructions cannot coexist");		\
+	if (TARGET_64BIT)						\
+	  error ("64-bit E500 not supported");				\
+	if (TARGET_HARD_FLOAT && TARGET_FPRS)				\
+	  error ("E500 and FPRs not supported");			\
+      }									\
+  } while (0)
+
+/* Override rs6000.h definition.  */
+#undef HARD_REGNO_CALLER_SAVE_MODE
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we
+   allocate space for DFmode.  Save gprs in the correct mode too.  */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  (TARGET_E500_DOUBLE && ((MODE) == VOIDmode || (MODE) == DFmode)	\
+   ? DFmode								\
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
diff --git a/gcc/config/rs6000/e500crtres32gpr.asm b/gcc/config/rs6000/e500crtres32gpr.asm
new file mode 100644
index 000000000..6fbff820b
--- /dev/null
+++ b/gcc/config/rs6000/e500crtres32gpr.asm
@@ -0,0 +1,73 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for restoring 32-bit integer registers, called by the compiler.  */
+/* "Bare" versions that simply return to their caller.  */
+
+HIDDEN_FUNC(_rest32gpr_14)	lwz 14,-72(11)
+HIDDEN_FUNC(_rest32gpr_15)	lwz 15,-68(11)
+HIDDEN_FUNC(_rest32gpr_16)	lwz 16,-64(11)
+HIDDEN_FUNC(_rest32gpr_17)	lwz 17,-60(11)
+HIDDEN_FUNC(_rest32gpr_18)	lwz 18,-56(11)
+HIDDEN_FUNC(_rest32gpr_19)	lwz 19,-52(11)
+HIDDEN_FUNC(_rest32gpr_20)	lwz 20,-48(11)
+HIDDEN_FUNC(_rest32gpr_21)	lwz 21,-44(11)
+HIDDEN_FUNC(_rest32gpr_22)	lwz 22,-40(11)
+HIDDEN_FUNC(_rest32gpr_23)	lwz 23,-36(11)
+HIDDEN_FUNC(_rest32gpr_24)	lwz 24,-32(11)
+HIDDEN_FUNC(_rest32gpr_25)	lwz 25,-28(11)
+HIDDEN_FUNC(_rest32gpr_26)	lwz 26,-24(11)
+HIDDEN_FUNC(_rest32gpr_27)	lwz 27,-20(11)
+HIDDEN_FUNC(_rest32gpr_28)	lwz 28,-16(11)
+HIDDEN_FUNC(_rest32gpr_29)	lwz 29,-12(11)
+HIDDEN_FUNC(_rest32gpr_30)	lwz 30,-8(11)
+HIDDEN_FUNC(_rest32gpr_31)	lwz 31,-4(11)
+				blr
+FUNC_END(_rest32gpr_31)
+FUNC_END(_rest32gpr_30)
+FUNC_END(_rest32gpr_29)
+FUNC_END(_rest32gpr_28)
+FUNC_END(_rest32gpr_27)
+FUNC_END(_rest32gpr_26)
+FUNC_END(_rest32gpr_25)
+FUNC_END(_rest32gpr_24)
+FUNC_END(_rest32gpr_23)
+FUNC_END(_rest32gpr_22)
+FUNC_END(_rest32gpr_21)
+FUNC_END(_rest32gpr_20)
+FUNC_END(_rest32gpr_19)
+FUNC_END(_rest32gpr_18)
+FUNC_END(_rest32gpr_17)
+FUNC_END(_rest32gpr_16)
+FUNC_END(_rest32gpr_15)
+FUNC_END(_rest32gpr_14)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtres64gpr.asm b/gcc/config/rs6000/e500crtres64gpr.asm
new file mode 100644
index 000000000..5182e5539
--- /dev/null
+++ b/gcc/config/rs6000/e500crtres64gpr.asm
@@ -0,0 +1,73 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ * 
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for restoring 64-bit integer registers, called by the compiler.  */
+/* "Bare" versions that return to their caller.  */
+
+HIDDEN_FUNC(_rest64gpr_14)	evldd 14,0(11)
+HIDDEN_FUNC(_rest64gpr_15)	evldd 15,8(11)
+HIDDEN_FUNC(_rest64gpr_16)	evldd 16,16(11)
+HIDDEN_FUNC(_rest64gpr_17)	evldd 17,24(11)
+HIDDEN_FUNC(_rest64gpr_18)	evldd 18,32(11)
+HIDDEN_FUNC(_rest64gpr_19)	evldd 19,40(11)
+HIDDEN_FUNC(_rest64gpr_20)	evldd 20,48(11)
+HIDDEN_FUNC(_rest64gpr_21)	evldd 21,56(11)
+HIDDEN_FUNC(_rest64gpr_22)	evldd 22,64(11)
+HIDDEN_FUNC(_rest64gpr_23)	evldd 23,72(11)
+HIDDEN_FUNC(_rest64gpr_24)	evldd 24,80(11)
+HIDDEN_FUNC(_rest64gpr_25)	evldd 25,88(11)
+HIDDEN_FUNC(_rest64gpr_26)	evldd 26,96(11)
+HIDDEN_FUNC(_rest64gpr_27)	evldd 27,104(11)
+HIDDEN_FUNC(_rest64gpr_28)	evldd 28,112(11)
+HIDDEN_FUNC(_rest64gpr_29)	evldd 29,120(11)
+HIDDEN_FUNC(_rest64gpr_30)	evldd 30,128(11)
+HIDDEN_FUNC(_rest64gpr_31)	evldd 31,136(11)
+				blr
+FUNC_END(_rest64gpr_31)
+FUNC_END(_rest64gpr_30)
+FUNC_END(_rest64gpr_29)
+FUNC_END(_rest64gpr_28)
+FUNC_END(_rest64gpr_27)
+FUNC_END(_rest64gpr_26)
+FUNC_END(_rest64gpr_25)
+FUNC_END(_rest64gpr_24)
+FUNC_END(_rest64gpr_23)
+FUNC_END(_rest64gpr_22)
+FUNC_END(_rest64gpr_21)
+FUNC_END(_rest64gpr_20)
+FUNC_END(_rest64gpr_19)
+FUNC_END(_rest64gpr_18)
+FUNC_END(_rest64gpr_17)
+FUNC_END(_rest64gpr_16)
+FUNC_END(_rest64gpr_15)
+FUNC_END(_rest64gpr_14)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtres64gprctr.asm b/gcc/config/rs6000/e500crtres64gprctr.asm
new file mode 100644
index 000000000..74309d6be
--- /dev/null
+++ b/gcc/config/rs6000/e500crtres64gprctr.asm
@@ -0,0 +1,90 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for restoring 64-bit integer registers where the number of
+   registers to be restored is passed in CTR, called by the compiler.  */
+
+HIDDEN_FUNC(_rest64gpr_ctr_14)	evldd 14,0(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_15)	evldd 15,8(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_16)	evldd 16,16(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_17)	evldd 17,24(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_18)	evldd 18,32(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_19)	evldd 19,40(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_20)	evldd 20,48(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_21)	evldd 21,56(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_22)	evldd 22,64(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_23)	evldd 23,72(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_24)	evldd 24,80(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_25)	evldd 25,88(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_26)	evldd 26,96(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_27)	evldd 27,104(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_28)	evldd 28,112(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_29)	evldd 29,120(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_30)	evldd 30,128(11)
+				bdz _rest64gpr_ctr_done
+HIDDEN_FUNC(_rest64gpr_ctr_31)	evldd 31,136(11)
+_rest64gpr_ctr_done:		blr
+FUNC_END(_rest64gpr_ctr_31)
+FUNC_END(_rest64gpr_ctr_30)
+FUNC_END(_rest64gpr_ctr_29)
+FUNC_END(_rest64gpr_ctr_28)
+FUNC_END(_rest64gpr_ctr_27)
+FUNC_END(_rest64gpr_ctr_26)
+FUNC_END(_rest64gpr_ctr_25)
+FUNC_END(_rest64gpr_ctr_24)
+FUNC_END(_rest64gpr_ctr_23)
+FUNC_END(_rest64gpr_ctr_22)
+FUNC_END(_rest64gpr_ctr_21)
+FUNC_END(_rest64gpr_ctr_20)
+FUNC_END(_rest64gpr_ctr_19)
+FUNC_END(_rest64gpr_ctr_18)
+FUNC_END(_rest64gpr_ctr_17)
+FUNC_END(_rest64gpr_ctr_16)
+FUNC_END(_rest64gpr_ctr_15)
+FUNC_END(_rest64gpr_ctr_14)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtrest32gpr.asm b/gcc/config/rs6000/e500crtrest32gpr.asm
new file mode 100644
index 000000000..4e61010dc
--- /dev/null
+++ b/gcc/config/rs6000/e500crtrest32gpr.asm
@@ -0,0 +1,75 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for restoring 32-bit integer registers, called by the compiler.  */
+/* "Tail" versions that perform a tail call.  */
+
+HIDDEN_FUNC(_rest32gpr_14_t)	lwz 14,-72(11)
+HIDDEN_FUNC(_rest32gpr_15_t)	lwz 15,-68(11)
+HIDDEN_FUNC(_rest32gpr_16_t)	lwz 16,-64(11)
+HIDDEN_FUNC(_rest32gpr_17_t)	lwz 17,-60(11)
+HIDDEN_FUNC(_rest32gpr_18_t)	lwz 18,-56(11)
+HIDDEN_FUNC(_rest32gpr_19_t)	lwz 19,-52(11)
+HIDDEN_FUNC(_rest32gpr_20_t)	lwz 20,-48(11)
+HIDDEN_FUNC(_rest32gpr_21_t)	lwz 21,-44(11)
+HIDDEN_FUNC(_rest32gpr_22_t)	lwz 22,-40(11)
+HIDDEN_FUNC(_rest32gpr_23_t)	lwz 23,-36(11)
+HIDDEN_FUNC(_rest32gpr_24_t)	lwz 24,-32(11)
+HIDDEN_FUNC(_rest32gpr_25_t)	lwz 25,-28(11)
+HIDDEN_FUNC(_rest32gpr_26_t)	lwz 26,-24(11)
+HIDDEN_FUNC(_rest32gpr_27_t)	lwz 27,-20(11)
+HIDDEN_FUNC(_rest32gpr_28_t)	lwz 28,-16(11)
+HIDDEN_FUNC(_rest32gpr_29_t)	lwz 29,-12(11)
+HIDDEN_FUNC(_rest32gpr_30_t)	lwz 30,-8(11)
+HIDDEN_FUNC(_rest32gpr_31_t)	lwz 31,-4(11)
+				lwz 0,4(11)
+				mr 1,11
+				blr
+FUNC_END(_rest32gpr_31_t)
+FUNC_END(_rest32gpr_30_t)
+FUNC_END(_rest32gpr_29_t)
+FUNC_END(_rest32gpr_28_t)
+FUNC_END(_rest32gpr_27_t)
+FUNC_END(_rest32gpr_26_t)
+FUNC_END(_rest32gpr_25_t)
+FUNC_END(_rest32gpr_24_t)
+FUNC_END(_rest32gpr_23_t)
+FUNC_END(_rest32gpr_22_t)
+FUNC_END(_rest32gpr_21_t)
+FUNC_END(_rest32gpr_20_t)
+FUNC_END(_rest32gpr_19_t)
+FUNC_END(_rest32gpr_18_t)
+FUNC_END(_rest32gpr_17_t)
+FUNC_END(_rest32gpr_16_t)
+FUNC_END(_rest32gpr_15_t)
+FUNC_END(_rest32gpr_14_t)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtrest64gpr.asm b/gcc/config/rs6000/e500crtrest64gpr.asm
new file mode 100644
index 000000000..090786fdc
--- /dev/null
+++ b/gcc/config/rs6000/e500crtrest64gpr.asm
@@ -0,0 +1,74 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* "Tail" versions that perform a tail call.  */
+
+HIDDEN_FUNC(_rest64gpr_14_t)	evldd 14,0(11)
+HIDDEN_FUNC(_rest64gpr_15_t)	evldd 15,8(11)
+HIDDEN_FUNC(_rest64gpr_16_t)	evldd 16,16(11)
+HIDDEN_FUNC(_rest64gpr_17_t)	evldd 17,24(11)
+HIDDEN_FUNC(_rest64gpr_18_t)	evldd 18,32(11)
+HIDDEN_FUNC(_rest64gpr_19_t)	evldd 19,40(11)
+HIDDEN_FUNC(_rest64gpr_20_t)	evldd 20,48(11)
+HIDDEN_FUNC(_rest64gpr_21_t)	evldd 21,56(11)
+HIDDEN_FUNC(_rest64gpr_22_t)	evldd 22,64(11)
+HIDDEN_FUNC(_rest64gpr_23_t)	evldd 23,72(11)
+HIDDEN_FUNC(_rest64gpr_24_t)	evldd 24,80(11)
+HIDDEN_FUNC(_rest64gpr_25_t)	evldd 25,88(11)
+HIDDEN_FUNC(_rest64gpr_26_t)	evldd 26,96(11)
+HIDDEN_FUNC(_rest64gpr_27_t)	evldd 27,104(11)
+HIDDEN_FUNC(_rest64gpr_28_t)	evldd 28,112(11)
+HIDDEN_FUNC(_rest64gpr_29_t)	evldd 29,120(11)
+HIDDEN_FUNC(_rest64gpr_30_t)	evldd 30,128(11)
+HIDDEN_FUNC(_rest64gpr_31_t)	lwz 0,148(11)
+				evldd 31,136(11)
+				addi 1,11,144
+				blr
+FUNC_END(_rest64gpr_31_t)
+FUNC_END(_rest64gpr_30_t)
+FUNC_END(_rest64gpr_29_t)
+FUNC_END(_rest64gpr_28_t)
+FUNC_END(_rest64gpr_27_t)
+FUNC_END(_rest64gpr_26_t)
+FUNC_END(_rest64gpr_25_t)
+FUNC_END(_rest64gpr_24_t)
+FUNC_END(_rest64gpr_23_t)
+FUNC_END(_rest64gpr_22_t)
+FUNC_END(_rest64gpr_21_t)
+FUNC_END(_rest64gpr_20_t)
+FUNC_END(_rest64gpr_19_t)
+FUNC_END(_rest64gpr_18_t)
+FUNC_END(_rest64gpr_17_t)
+FUNC_END(_rest64gpr_16_t)
+FUNC_END(_rest64gpr_15_t)
+FUNC_END(_rest64gpr_14_t)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtresx32gpr.asm b/gcc/config/rs6000/e500crtresx32gpr.asm
new file mode 100644
index 000000000..0b35245df
--- /dev/null
+++ b/gcc/config/rs6000/e500crtresx32gpr.asm
@@ -0,0 +1,75 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for restoring 32-bit integer registers, called by the compiler.  */
+/* "Exit" versions that return to the caller's caller.  */
+
+HIDDEN_FUNC(_rest32gpr_14_x)	lwz 14,-72(11)
+HIDDEN_FUNC(_rest32gpr_15_x)	lwz 15,-68(11)
+HIDDEN_FUNC(_rest32gpr_16_x)	lwz 16,-64(11)
+HIDDEN_FUNC(_rest32gpr_17_x)	lwz 17,-60(11)
+HIDDEN_FUNC(_rest32gpr_18_x)	lwz 18,-56(11)
+HIDDEN_FUNC(_rest32gpr_19_x)	lwz 19,-52(11)
+HIDDEN_FUNC(_rest32gpr_20_x)	lwz 20,-48(11)
+HIDDEN_FUNC(_rest32gpr_21_x)	lwz 21,-44(11)
+HIDDEN_FUNC(_rest32gpr_22_x)	lwz 22,-40(11)
+HIDDEN_FUNC(_rest32gpr_23_x)	lwz 23,-36(11)
+HIDDEN_FUNC(_rest32gpr_24_x)	lwz 24,-32(11)
+HIDDEN_FUNC(_rest32gpr_25_x)	lwz 25,-28(11)
+HIDDEN_FUNC(_rest32gpr_26_x)	lwz 26,-24(11)
+HIDDEN_FUNC(_rest32gpr_27_x)	lwz 27,-20(11)
+HIDDEN_FUNC(_rest32gpr_28_x)	lwz 28,-16(11)
+HIDDEN_FUNC(_rest32gpr_29_x)	lwz 29,-12(11)
+HIDDEN_FUNC(_rest32gpr_30_x)	lwz 30,-8(11)
+HIDDEN_FUNC(_rest32gpr_31_x)	lwz 0,4(11)
+				lwz 31,-4(11)
+				mr 1,11
+				mtlr 0
+				blr
+FUNC_END(_rest32gpr_31_x)
+FUNC_END(_rest32gpr_30_x)
+FUNC_END(_rest32gpr_29_x)
+FUNC_END(_rest32gpr_28_x)
+FUNC_END(_rest32gpr_27_x)
+FUNC_END(_rest32gpr_26_x)
+FUNC_END(_rest32gpr_25_x)
+FUNC_END(_rest32gpr_24_x)
+FUNC_END(_rest32gpr_23_x)
+FUNC_END(_rest32gpr_22_x)
+FUNC_END(_rest32gpr_21_x)
+FUNC_END(_rest32gpr_20_x)
+FUNC_END(_rest32gpr_19_x)
+FUNC_END(_rest32gpr_18_x)
+FUNC_END(_rest32gpr_17_x)
+FUNC_END(_rest32gpr_16_x)
+FUNC_END(_rest32gpr_15_x)
+FUNC_END(_rest32gpr_14_x)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtresx64gpr.asm b/gcc/config/rs6000/e500crtresx64gpr.asm
new file mode 100644
index 000000000..ce2a6cfa2
--- /dev/null
+++ b/gcc/config/rs6000/e500crtresx64gpr.asm
@@ -0,0 +1,75 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* "Exit" versions that return to their caller's caller.  */
+
+HIDDEN_FUNC(_rest64gpr_14_x)	evldd 14,0(11)
+HIDDEN_FUNC(_rest64gpr_15_x)	evldd 15,8(11)
+HIDDEN_FUNC(_rest64gpr_16_x)	evldd 16,16(11)
+HIDDEN_FUNC(_rest64gpr_17_x)	evldd 17,24(11)
+HIDDEN_FUNC(_rest64gpr_18_x)	evldd 18,32(11)
+HIDDEN_FUNC(_rest64gpr_19_x)	evldd 19,40(11)
+HIDDEN_FUNC(_rest64gpr_20_x)	evldd 20,48(11)
+HIDDEN_FUNC(_rest64gpr_21_x)	evldd 21,56(11)
+HIDDEN_FUNC(_rest64gpr_22_x)	evldd 22,64(11)
+HIDDEN_FUNC(_rest64gpr_23_x)	evldd 23,72(11)
+HIDDEN_FUNC(_rest64gpr_24_x)	evldd 24,80(11)
+HIDDEN_FUNC(_rest64gpr_25_x)	evldd 25,88(11)
+HIDDEN_FUNC(_rest64gpr_26_x)	evldd 26,96(11)
+HIDDEN_FUNC(_rest64gpr_27_x)	evldd 27,104(11)
+HIDDEN_FUNC(_rest64gpr_28_x)	evldd 28,112(11)
+HIDDEN_FUNC(_rest64gpr_29_x)	evldd 29,120(11)
+HIDDEN_FUNC(_rest64gpr_30_x)	evldd 30,128(11)
+HIDDEN_FUNC(_rest64gpr_31_x)	lwz 0,148(11)
+				evldd 31,136(11)
+				addi 1,11,144
+				mtlr 0
+				blr
+FUNC_END(_rest64gpr_31_x)
+FUNC_END(_rest64gpr_30_x)
+FUNC_END(_rest64gpr_29_x)
+FUNC_END(_rest64gpr_28_x)
+FUNC_END(_rest64gpr_27_x)
+FUNC_END(_rest64gpr_26_x)
+FUNC_END(_rest64gpr_25_x)
+FUNC_END(_rest64gpr_24_x)
+FUNC_END(_rest64gpr_23_x)
+FUNC_END(_rest64gpr_22_x)
+FUNC_END(_rest64gpr_21_x)
+FUNC_END(_rest64gpr_20_x)
+FUNC_END(_rest64gpr_19_x)
+FUNC_END(_rest64gpr_18_x)
+FUNC_END(_rest64gpr_17_x)
+FUNC_END(_rest64gpr_16_x)
+FUNC_END(_rest64gpr_15_x)
+FUNC_END(_rest64gpr_14_x)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtsav32gpr.asm b/gcc/config/rs6000/e500crtsav32gpr.asm
new file mode 100644
index 000000000..c89103050
--- /dev/null
+++ b/gcc/config/rs6000/e500crtsav32gpr.asm
@@ -0,0 +1,73 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for saving 32-bit integer registers, called by the compiler.  */
+/* "Bare" versions that simply return to their caller.  */
+
+HIDDEN_FUNC(_save32gpr_14)	stw 14,-72(11)
+HIDDEN_FUNC(_save32gpr_15)	stw 15,-68(11)
+HIDDEN_FUNC(_save32gpr_16)	stw 16,-64(11)
+HIDDEN_FUNC(_save32gpr_17)	stw 17,-60(11)
+HIDDEN_FUNC(_save32gpr_18)	stw 18,-56(11)
+HIDDEN_FUNC(_save32gpr_19)	stw 19,-52(11)
+HIDDEN_FUNC(_save32gpr_20)	stw 20,-48(11)
+HIDDEN_FUNC(_save32gpr_21)	stw 21,-44(11)
+HIDDEN_FUNC(_save32gpr_22)	stw 22,-40(11)
+HIDDEN_FUNC(_save32gpr_23)	stw 23,-36(11)
+HIDDEN_FUNC(_save32gpr_24)	stw 24,-32(11)
+HIDDEN_FUNC(_save32gpr_25)	stw 25,-28(11)
+HIDDEN_FUNC(_save32gpr_26)	stw 26,-24(11)
+HIDDEN_FUNC(_save32gpr_27)	stw 27,-20(11)
+HIDDEN_FUNC(_save32gpr_28)	stw 28,-16(11)
+HIDDEN_FUNC(_save32gpr_29)	stw 29,-12(11)
+HIDDEN_FUNC(_save32gpr_30)	stw 30,-8(11)
+HIDDEN_FUNC(_save32gpr_31)	stw 31,-4(11)
+				blr
+FUNC_END(_save32gpr_31)
+FUNC_END(_save32gpr_30)
+FUNC_END(_save32gpr_29)
+FUNC_END(_save32gpr_28)
+FUNC_END(_save32gpr_27)
+FUNC_END(_save32gpr_26)
+FUNC_END(_save32gpr_25)
+FUNC_END(_save32gpr_24)
+FUNC_END(_save32gpr_23)
+FUNC_END(_save32gpr_22)
+FUNC_END(_save32gpr_21)
+FUNC_END(_save32gpr_20)
+FUNC_END(_save32gpr_19)
+FUNC_END(_save32gpr_18)
+FUNC_END(_save32gpr_17)
+FUNC_END(_save32gpr_16)
+FUNC_END(_save32gpr_15)
+FUNC_END(_save32gpr_14)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtsav64gpr.asm b/gcc/config/rs6000/e500crtsav64gpr.asm
new file mode 100644
index 000000000..2a5d3e475
--- /dev/null
+++ b/gcc/config/rs6000/e500crtsav64gpr.asm
@@ -0,0 +1,72 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for saving 64-bit integer registers, called by the compiler.  */
+
+HIDDEN_FUNC(_save64gpr_14)	evstdd 14,0(11)
+HIDDEN_FUNC(_save64gpr_15)	evstdd 15,8(11)
+HIDDEN_FUNC(_save64gpr_16)	evstdd 16,16(11)
+HIDDEN_FUNC(_save64gpr_17)	evstdd 17,24(11)
+HIDDEN_FUNC(_save64gpr_18)	evstdd 18,32(11)
+HIDDEN_FUNC(_save64gpr_19)	evstdd 19,40(11)
+HIDDEN_FUNC(_save64gpr_20)	evstdd 20,48(11)
+HIDDEN_FUNC(_save64gpr_21)	evstdd 21,56(11)
+HIDDEN_FUNC(_save64gpr_22)	evstdd 22,64(11)
+HIDDEN_FUNC(_save64gpr_23)	evstdd 23,72(11)
+HIDDEN_FUNC(_save64gpr_24)	evstdd 24,80(11)
+HIDDEN_FUNC(_save64gpr_25)	evstdd 25,88(11)
+HIDDEN_FUNC(_save64gpr_26)	evstdd 26,96(11)
+HIDDEN_FUNC(_save64gpr_27)	evstdd 27,104(11)
+HIDDEN_FUNC(_save64gpr_28)	evstdd 28,112(11)
+HIDDEN_FUNC(_save64gpr_29)	evstdd 29,120(11)
+HIDDEN_FUNC(_save64gpr_30)	evstdd 30,128(11)
+HIDDEN_FUNC(_save64gpr_31)	evstdd 31,136(11)
+				blr
+FUNC_END(_save64gpr_31)
+FUNC_END(_save64gpr_30)
+FUNC_END(_save64gpr_29)
+FUNC_END(_save64gpr_28)
+FUNC_END(_save64gpr_27)
+FUNC_END(_save64gpr_26)
+FUNC_END(_save64gpr_25)
+FUNC_END(_save64gpr_24)
+FUNC_END(_save64gpr_23)
+FUNC_END(_save64gpr_22)
+FUNC_END(_save64gpr_21)
+FUNC_END(_save64gpr_20)
+FUNC_END(_save64gpr_19)
+FUNC_END(_save64gpr_18)
+FUNC_END(_save64gpr_17)
+FUNC_END(_save64gpr_16)
+FUNC_END(_save64gpr_15)
+FUNC_END(_save64gpr_14)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtsav64gprctr.asm b/gcc/config/rs6000/e500crtsav64gprctr.asm
new file mode 100644
index 000000000..dd0bdf3c8
--- /dev/null
+++ b/gcc/config/rs6000/e500crtsav64gprctr.asm
@@ -0,0 +1,91 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for saving 64-bit integer registers where the number of
+   registers to be saved is passed in CTR, called by the compiler.  */
+/* "Bare" versions that return to their caller.  */
+
+HIDDEN_FUNC(_save64gpr_ctr_14)	evstdd 14,0(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_15)	evstdd 15,8(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_16)	evstdd 16,16(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_17)	evstdd 17,24(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_18)	evstdd 18,32(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_19)	evstdd 19,40(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_20)	evstdd 20,48(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_21)	evstdd 21,56(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_22)	evstdd 22,64(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_23)	evstdd 23,72(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_24)	evstdd 24,80(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_25)	evstdd 25,88(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_26)	evstdd 26,96(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_27)	evstdd 27,104(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_28)	evstdd 28,112(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_29)	evstdd 29,120(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_30)	evstdd 30,128(11)
+				bdz _save64gpr_ctr_done
+HIDDEN_FUNC(_save64gpr_ctr_31)	evstdd 31,136(11)
+_save64gpr_ctr_done:		blr
+FUNC_END(_save64gpr_ctr_31)
+FUNC_END(_save64gpr_ctr_30)
+FUNC_END(_save64gpr_ctr_29)
+FUNC_END(_save64gpr_ctr_28)
+FUNC_END(_save64gpr_ctr_27)
+FUNC_END(_save64gpr_ctr_26)
+FUNC_END(_save64gpr_ctr_25)
+FUNC_END(_save64gpr_ctr_24)
+FUNC_END(_save64gpr_ctr_23)
+FUNC_END(_save64gpr_ctr_22)
+FUNC_END(_save64gpr_ctr_21)
+FUNC_END(_save64gpr_ctr_20)
+FUNC_END(_save64gpr_ctr_19)
+FUNC_END(_save64gpr_ctr_18)
+FUNC_END(_save64gpr_ctr_17)
+FUNC_END(_save64gpr_ctr_16)
+FUNC_END(_save64gpr_ctr_15)
+FUNC_END(_save64gpr_ctr_14)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtsavg32gpr.asm b/gcc/config/rs6000/e500crtsavg32gpr.asm
new file mode 100644
index 000000000..d14088e0d
--- /dev/null
+++ b/gcc/config/rs6000/e500crtsavg32gpr.asm
@@ -0,0 +1,73 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for saving 32-bit integer registers, called by the compiler.  */
+/* "GOT" versions that load the address of the GOT into lr before returning.  */
+
+HIDDEN_FUNC(_save32gpr_14_g)	stw 14,-72(11)
+HIDDEN_FUNC(_save32gpr_15_g)	stw 15,-68(11)
+HIDDEN_FUNC(_save32gpr_16_g)	stw 16,-64(11)
+HIDDEN_FUNC(_save32gpr_17_g)	stw 17,-60(11)
+HIDDEN_FUNC(_save32gpr_18_g)	stw 18,-56(11)
+HIDDEN_FUNC(_save32gpr_19_g)	stw 19,-52(11)
+HIDDEN_FUNC(_save32gpr_20_g)	stw 20,-48(11)
+HIDDEN_FUNC(_save32gpr_21_g)	stw 21,-44(11)
+HIDDEN_FUNC(_save32gpr_22_g)	stw 22,-40(11)
+HIDDEN_FUNC(_save32gpr_23_g)	stw 23,-36(11)
+HIDDEN_FUNC(_save32gpr_24_g)	stw 24,-32(11)
+HIDDEN_FUNC(_save32gpr_25_g)	stw 25,-28(11)
+HIDDEN_FUNC(_save32gpr_26_g)	stw 26,-24(11)
+HIDDEN_FUNC(_save32gpr_27_g)	stw 27,-20(11)
+HIDDEN_FUNC(_save32gpr_28_g)	stw 28,-16(11)
+HIDDEN_FUNC(_save32gpr_29_g)	stw 29,-12(11)
+HIDDEN_FUNC(_save32gpr_30_g)	stw 30,-8(11)
+HIDDEN_FUNC(_save32gpr_31_g)	stw 31,-4(11)
+				b _GLOBAL_OFFSET_TABLE_-4
+FUNC_END(_save32gpr_31_g)
+FUNC_END(_save32gpr_30_g)
+FUNC_END(_save32gpr_29_g)
+FUNC_END(_save32gpr_28_g)
+FUNC_END(_save32gpr_27_g)
+FUNC_END(_save32gpr_26_g)
+FUNC_END(_save32gpr_25_g)
+FUNC_END(_save32gpr_24_g)
+FUNC_END(_save32gpr_23_g)
+FUNC_END(_save32gpr_22_g)
+FUNC_END(_save32gpr_21_g)
+FUNC_END(_save32gpr_20_g)
+FUNC_END(_save32gpr_19_g)
+FUNC_END(_save32gpr_18_g)
+FUNC_END(_save32gpr_17_g)
+FUNC_END(_save32gpr_16_g)
+FUNC_END(_save32gpr_15_g)
+FUNC_END(_save32gpr_14_g)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtsavg64gpr.asm b/gcc/config/rs6000/e500crtsavg64gpr.asm
new file mode 100644
index 000000000..cbad75bc0
--- /dev/null
+++ b/gcc/config/rs6000/e500crtsavg64gpr.asm
@@ -0,0 +1,73 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for saving 64-bit integer registers, called by the compiler.  */
+/* "GOT" versions that load the address of the GOT into lr before returning.  */
+
+HIDDEN_FUNC(_save64gpr_14_g)	evstdd 14,0(11)
+HIDDEN_FUNC(_save64gpr_15_g)	evstdd 15,8(11)
+HIDDEN_FUNC(_save64gpr_16_g)	evstdd 16,16(11)
+HIDDEN_FUNC(_save64gpr_17_g)	evstdd 17,24(11)
+HIDDEN_FUNC(_save64gpr_18_g)	evstdd 18,32(11)
+HIDDEN_FUNC(_save64gpr_19_g)	evstdd 19,40(11)
+HIDDEN_FUNC(_save64gpr_20_g)	evstdd 20,48(11)
+HIDDEN_FUNC(_save64gpr_21_g)	evstdd 21,56(11)
+HIDDEN_FUNC(_save64gpr_22_g)	evstdd 22,64(11)
+HIDDEN_FUNC(_save64gpr_23_g)	evstdd 23,72(11)
+HIDDEN_FUNC(_save64gpr_24_g)	evstdd 24,80(11)
+HIDDEN_FUNC(_save64gpr_25_g)	evstdd 25,88(11)
+HIDDEN_FUNC(_save64gpr_26_g)	evstdd 26,96(11)
+HIDDEN_FUNC(_save64gpr_27_g)	evstdd 27,104(11)
+HIDDEN_FUNC(_save64gpr_28_g)	evstdd 28,112(11)
+HIDDEN_FUNC(_save64gpr_29_g)	evstdd 29,120(11)
+HIDDEN_FUNC(_save64gpr_30_g)	evstdd 30,128(11)
+HIDDEN_FUNC(_save64gpr_31_g)	evstdd 31,136(11)
+				b _GLOBAL_OFFSET_TABLE_-4
+FUNC_END(_save64gpr_31_g)
+FUNC_END(_save64gpr_30_g)
+FUNC_END(_save64gpr_29_g)
+FUNC_END(_save64gpr_28_g)
+FUNC_END(_save64gpr_27_g)
+FUNC_END(_save64gpr_26_g)
+FUNC_END(_save64gpr_25_g)
+FUNC_END(_save64gpr_24_g)
+FUNC_END(_save64gpr_23_g)
+FUNC_END(_save64gpr_22_g)
+FUNC_END(_save64gpr_21_g)
+FUNC_END(_save64gpr_20_g)
+FUNC_END(_save64gpr_19_g)
+FUNC_END(_save64gpr_18_g)
+FUNC_END(_save64gpr_17_g)
+FUNC_END(_save64gpr_16_g)
+FUNC_END(_save64gpr_15_g)
+FUNC_END(_save64gpr_14_g)
+
+#endif
diff --git a/gcc/config/rs6000/e500crtsavg64gprctr.asm b/gcc/config/rs6000/e500crtsavg64gprctr.asm
new file mode 100644
index 000000000..238df4e83
--- /dev/null
+++ b/gcc/config/rs6000/e500crtsavg64gprctr.asm
@@ -0,0 +1,90 @@
+/*
+ * Special support for e500 eabi and SVR4
+ *
+ *   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+ *   Written by Nathan Froyd
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifdef __SPE__
+
+/* Routines for saving 64-bit integer registers, called by the compiler.  */
+/* "GOT" versions that load the address of the GOT into lr before returning.  */
+
+HIDDEN_FUNC(_save64gpr_ctr_14_g)	evstdd 14,0(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_15_g)	evstdd 15,8(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_16_g)	evstdd 16,16(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_17_g)	evstdd 17,24(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_18_g)	evstdd 18,32(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_19_g)	evstdd 19,40(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_20_g)	evstdd 20,48(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_21_g)	evstdd 21,56(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_22_g)	evstdd 22,64(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_23_g)	evstdd 23,72(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_24_g)	evstdd 24,80(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_25_g)	evstdd 25,88(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_26_g)	evstdd 26,96(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_27_g)	evstdd 27,104(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_28_g)	evstdd 28,112(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_29_g)	evstdd 29,120(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_30_g)	evstdd 30,128(11)
+				bdz _save64gpr_ctr_g_done
+HIDDEN_FUNC(_save64gpr_ctr_31_g)	evstdd 31,136(11)
+_save64gpr_ctr_g_done:		b _GLOBAL_OFFSET_TABLE_-4
+FUNC_END(_save64gpr_ctr_31_g)
+FUNC_END(_save64gpr_ctr_30_g)
+FUNC_END(_save64gpr_ctr_29_g)
+FUNC_END(_save64gpr_ctr_28_g)
+FUNC_END(_save64gpr_ctr_27_g)
+FUNC_END(_save64gpr_ctr_26_g)
+FUNC_END(_save64gpr_ctr_25_g)
+FUNC_END(_save64gpr_ctr_24_g)
+FUNC_END(_save64gpr_ctr_23_g)
+FUNC_END(_save64gpr_ctr_22_g)
+FUNC_END(_save64gpr_ctr_21_g)
+FUNC_END(_save64gpr_ctr_20_g)
+FUNC_END(_save64gpr_ctr_19_g)
+FUNC_END(_save64gpr_ctr_18_g)
+FUNC_END(_save64gpr_ctr_17_g)
+FUNC_END(_save64gpr_ctr_16_g)
+FUNC_END(_save64gpr_ctr_15_g)
+FUNC_END(_save64gpr_ctr_14_g)
+
+#endif
diff --git a/gcc/config/rs6000/e500mc.md b/gcc/config/rs6000/e500mc.md
new file mode 100644
index 000000000..99a4b80ec
--- /dev/null
+++ b/gcc/config/rs6000/e500mc.md
@@ -0,0 +1,200 @@
+;; Pipeline description for Motorola PowerPC e500mc core.
+;;   Copyright (C) 2008 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; e500mc 32-bit SU(2), LSU, FPU, BPU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+;; FP is half clocked, timings of other instructions are as in the e500v2.
+
+(define_automaton "e500mc_most,e500mc_long,e500mc_retire")
+(define_cpu_unit "e500mc_decode_0,e500mc_decode_1" "e500mc_most")
+(define_cpu_unit "e500mc_issue_0,e500mc_issue_1"   "e500mc_most")
+(define_cpu_unit "e500mc_retire_0,e500mc_retire_1" "e500mc_retire")
+
+;; SU.
+(define_cpu_unit "e500mc_su0_stage0,e500mc_su1_stage0" "e500mc_most")
+
+;; MU.
+(define_cpu_unit "e500mc_mu_stage0,e500mc_mu_stage1" "e500mc_most")
+(define_cpu_unit "e500mc_mu_stage2,e500mc_mu_stage3" "e500mc_most")
+
+;; Non-pipelined division.
+(define_cpu_unit "e500mc_mu_div" "e500mc_long")
+
+;; LSU.
+(define_cpu_unit "e500mc_lsu" "e500mc_most")
+
+;; FPU.
+(define_cpu_unit "e500mc_fpu" "e500mc_most")
+
+;; Branch unit.
+(define_cpu_unit "e500mc_bu" "e500mc_most")
+
+;; The following units are used to make the automata deterministic.
+(define_cpu_unit "present_e500mc_decode_0" "e500mc_most")
+(define_cpu_unit "present_e500mc_issue_0" "e500mc_most")
+(define_cpu_unit "present_e500mc_retire_0" "e500mc_retire")
+(define_cpu_unit "present_e500mc_su0_stage0" "e500mc_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_e500mc_decode_0" "e500mc_decode_0")
+(presence_set "present_e500mc_issue_0" "e500mc_issue_0")
+(presence_set "present_e500mc_retire_0" "e500mc_retire_0")
+(presence_set "present_e500mc_su0_stage0" "e500mc_su0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "e500mc_decode"
+    "e500mc_decode_0|e500mc_decode_1+present_e500mc_decode_0")
+(define_reservation "e500mc_issue"
+    "e500mc_issue_0|e500mc_issue_1+present_e500mc_issue_0")
+(define_reservation "e500mc_retire"
+   "e500mc_retire_0|e500mc_retire_1+present_e500mc_retire_0")
+(define_reservation "e500mc_su_stage0"
+   "e500mc_su0_stage0|e500mc_su1_stage0+present_e500mc_su0_stage0")
+
+;; Simple SU insns.
+(define_insn_reservation "e500mc_su" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,cmp,compare,\
+                        delayed_compare,var_delayed_compare,fast_compare,\
+                        shift,trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+(define_insn_reservation "e500mc_two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire,\
+   e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+(define_insn_reservation "e500mc_three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire,\
+   e500mc_issue+e500mc_su_stage0+e500mc_retire,\
+   e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+;; Multiply.
+(define_insn_reservation "e500mc_multiply" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_mu_stage0,e500mc_mu_stage1,\
+   e500mc_mu_stage2,e500mc_mu_stage3+e500mc_retire")
+
+;; Divide. We use the average latency time here.
+(define_insn_reservation "e500mc_divide" 14
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_mu_stage0+e500mc_mu_div,\
+   e500mc_mu_div*13")
+
+;; Branch.
+(define_insn_reservation "e500mc_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_bu,e500mc_retire")
+
+;; CR logical.
+(define_insn_reservation "e500mc_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_bu,e500mc_retire")
+
+;; Mfcr.
+(define_insn_reservation "e500mc_mfcr" 1
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su1_stage0+e500mc_retire")
+
+;; Mtcrf.
+(define_insn_reservation "e500mc_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su1_stage0+e500mc_retire")
+
+;; Mtjmpr.
+(define_insn_reservation "e500mc_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+;; Brinc.
+(define_insn_reservation "e500mc_brinc" 1
+  (and (eq_attr "type" "brinc")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_su_stage0+e500mc_retire")
+
+;; Loads.
+(define_insn_reservation "e500mc_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire")
+
+(define_insn_reservation "e500mc_fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing*2,e500mc_retire")
+
+;; Stores.
+(define_insn_reservation "e500mc_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire")
+
+(define_insn_reservation "e500mc_fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_lsu,nothing,e500mc_retire")
+
+;; The following ignores the retire unit to avoid a large automata.
+
+;; Simple FP.
+(define_insn_reservation "e500mc_simple_float" 8
+  (and (eq_attr "type" "fpsimple")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+; "e500mc_decode,e500mc_issue+e500mc_fpu,nothing*6,e500mc_retire")
+
+;; FP.
+(define_insn_reservation "e500mc_float" 8
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+; "e500mc_decode,e500mc_issue+e500mc_fpu,nothing*6,e500mc_retire")
+
+(define_insn_reservation "e500mc_fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+
+(define_insn_reservation "e500mc_dmul" 10
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu")
+
+;; FP divides are not pipelined.
+(define_insn_reservation "e500mc_sdiv" 36
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu,e500mc_fpu*35")
+
+(define_insn_reservation "e500mc_ddiv" 66
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce500mc"))
+  "e500mc_decode,e500mc_issue+e500mc_fpu,e500mc_fpu*65")
diff --git a/gcc/config/rs6000/e500mc64.md b/gcc/config/rs6000/e500mc64.md
new file mode 100644
index 000000000..8507514f5
--- /dev/null
+++ b/gcc/config/rs6000/e500mc64.md
@@ -0,0 +1,191 @@
+;; Pipeline description for Freescale PowerPC e500mc64 core.
+;;   Copyright (C) 2009 Free Software Foundation, Inc.
+;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; e500mc64 64-bit SU(2), LSU, FPU, BPU
+;; Max issue 3 insns/clock cycle (includes 1 branch)
+
+(define_automaton "e500mc64_most,e500mc64_long,e500mc64_retire")
+(define_cpu_unit "e500mc64_decode_0,e500mc64_decode_1" "e500mc64_most")
+(define_cpu_unit "e500mc64_issue_0,e500mc64_issue_1"   "e500mc64_most")
+(define_cpu_unit "e500mc64_retire_0,e500mc64_retire_1" "e500mc64_retire")
+
+;; SU.
+(define_cpu_unit "e500mc64_su0_stage0,e500mc64_su1_stage0" "e500mc64_most")
+
+;; MU.
+(define_cpu_unit "e500mc64_mu_stage0,e500mc64_mu_stage1" "e500mc64_most")
+(define_cpu_unit "e500mc64_mu_stage2,e500mc64_mu_stage3" "e500mc64_most")
+
+;; Non-pipelined division.
+(define_cpu_unit "e500mc64_mu_div" "e500mc64_long")
+
+;; LSU.
+(define_cpu_unit "e500mc64_lsu" "e500mc64_most")
+
+;; FPU.
+(define_cpu_unit "e500mc64_fpu" "e500mc64_most")
+
+;; Branch unit.
+(define_cpu_unit "e500mc64_bu" "e500mc64_most")
+
+;; The following units are used to make the automata deterministic.
+(define_cpu_unit "present_e500mc64_decode_0" "e500mc64_most")
+(define_cpu_unit "present_e500mc64_issue_0" "e500mc64_most")
+(define_cpu_unit "present_e500mc64_retire_0" "e500mc64_retire")
+(define_cpu_unit "present_e500mc64_su0_stage0" "e500mc64_most")
+
+;; The following sets to make automata deterministic when option ndfa is used.
+(presence_set "present_e500mc64_decode_0" "e500mc64_decode_0")
+(presence_set "present_e500mc64_issue_0" "e500mc64_issue_0")
+(presence_set "present_e500mc64_retire_0" "e500mc64_retire_0")
+(presence_set "present_e500mc64_su0_stage0" "e500mc64_su0_stage0")
+
+;; Some useful abbreviations.
+(define_reservation "e500mc64_decode"
+    "e500mc64_decode_0|e500mc64_decode_1+present_e500mc64_decode_0")
+(define_reservation "e500mc64_issue"
+    "e500mc64_issue_0|e500mc64_issue_1+present_e500mc64_issue_0")
+(define_reservation "e500mc64_retire"
+   "e500mc64_retire_0|e500mc64_retire_1+present_e500mc64_retire_0")
+(define_reservation "e500mc64_su_stage0"
+   "e500mc64_su0_stage0|e500mc64_su1_stage0+present_e500mc64_su0_stage0")
+
+;; Simple SU insns.
+(define_insn_reservation "e500mc64_su" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,delayed_compare,\
+	shift,cntlz,exts")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+(define_insn_reservation "e500mc64_su2" 2
+  (and (eq_attr "type" "cmp,compare,delayed_compare,fast_compare,trap")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_delayed" 2
+  (and (eq_attr "type" "var_shift_rotate,var_delayed_compare")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\
+   e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+(define_insn_reservation "e500mc64_three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\
+   e500mc64_issue+e500mc64_su_stage0+e500mc64_retire,\
+   e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+;; Multiply.
+(define_insn_reservation "e500mc64_multiply" 4
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_mu_stage0,e500mc64_mu_stage1,\
+   e500mc64_mu_stage2,e500mc64_mu_stage3+e500mc64_retire")
+
+;; Divide. We use the average latency time here.
+(define_insn_reservation "e500mc64_divide" 14
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_mu_stage0+e500mc64_mu_div,\
+   e500mc64_mu_div*13")
+
+;; Branch.
+(define_insn_reservation "e500mc64_branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_bu,e500mc64_retire")
+
+;; CR logical.
+(define_insn_reservation "e500mc64_cr_logical" 1
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_bu,e500mc64_retire")
+
+;; Mfcr.
+(define_insn_reservation "e500mc64_mfcr" 4
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su1_stage0,e500mc64_su1_stage0*3+e500mc64_retire")
+
+;; Mtcrf.
+(define_insn_reservation "e500mc64_mtcrf" 1
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su1_stage0+e500mc64_retire")
+
+;; Mtjmpr.
+(define_insn_reservation "e500mc64_mtjmpr" 1
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+;; Brinc.
+(define_insn_reservation "e500mc64_brinc" 1
+  (and (eq_attr "type" "brinc")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_su_stage0+e500mc64_retire")
+
+;; Loads.
+(define_insn_reservation "e500mc64_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_fpload" 4
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing*2,e500mc64_retire")
+
+;; Stores.
+(define_insn_reservation "e500mc64_store" 3
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire")
+
+(define_insn_reservation "e500mc64_fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_lsu,nothing,e500mc64_retire")
+
+;; The following ignores the retire unit to avoid a large automata.
+
+;; FP.
+(define_insn_reservation "e500mc64_float" 7
+  (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_fpu")
+; "e500mc64_decode,e500mc64_issue+e500mc64_fpu,nothing*5,e500mc64_retire")
+
+;; FP divides are not pipelined.
+(define_insn_reservation "e500mc64_sdiv" 20
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_fpu,e500mc64_fpu*19")
+
+(define_insn_reservation "e500mc64_ddiv" 35
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppce500mc64"))
+  "e500mc64_decode,e500mc64_issue+e500mc64_fpu,e500mc64_fpu*34")
diff --git a/gcc/config/rs6000/eabi-ci.asm b/gcc/config/rs6000/eabi-ci.asm
new file mode 100644
index 000000000..696f33d39
--- /dev/null
+++ b/gcc/config/rs6000/eabi-ci.asm
@@ -0,0 +1,113 @@
+/* crti.s for eabi
+   Copyright (C) 1996, 2000, 2008, 2009 Free Software Foundation, Inc.
+   Written By Michael Meissner
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file just supplies labeled starting points for the .got* and other
+   special sections.  It is linked in first before other modules.  */
+ 
+	.ident	"GNU C crti.s"
+
+#include <ppc-asm.h>
+
+#ifndef __powerpc64__
+	.section ".got","aw"
+	.globl	__GOT_START__
+	.type	__GOT_START__,@object
+__GOT_START__:
+
+	.section ".got1","aw"
+	.globl	__GOT1_START__
+	.type	__GOT1_START__,@object
+__GOT1_START__:
+
+	.section ".got2","aw"
+	.globl	__GOT2_START__
+	.type	__GOT2_START__,@object
+__GOT2_START__:
+
+	.section ".fixup","aw"
+	.globl	__FIXUP_START__
+	.type	__FIXUP_START__,@object
+__FIXUP_START__:
+
+	.section ".ctors","aw"
+	.globl	__CTOR_LIST__
+	.type	__CTOR_LIST__,@object
+__CTOR_LIST__:
+
+	.section ".dtors","aw"
+	.globl	__DTOR_LIST__
+	.type	__DTOR_LIST__,@object
+__DTOR_LIST__:
+
+	.section ".sdata","aw"
+	.globl	__SDATA_START__
+	.type	__SDATA_START__,@object
+	.weak	_SDA_BASE_
+	.type	_SDA_BASE_,@object
+__SDATA_START__:
+_SDA_BASE_:
+
+	.section ".sbss","aw",@nobits
+	.globl	__SBSS_START__
+	.type	__SBSS_START__,@object
+__SBSS_START__:
+
+	.section ".sdata2","a"
+	.weak	_SDA2_BASE_
+	.type	_SDA2_BASE_,@object
+	.globl	__SDATA2_START__
+	.type	__SDATA2_START__,@object
+__SDATA2_START__:
+_SDA2_BASE_:
+
+	.section ".sbss2","a"
+	.globl	__SBSS2_START__
+	.type	__SBSS2_START__,@object
+__SBSS2_START__:
+
+	.section ".gcc_except_table","aw"
+	.globl	__EXCEPT_START__
+	.type	__EXCEPT_START__,@object
+__EXCEPT_START__:
+
+	.section ".eh_frame","aw"
+	.globl	__EH_FRAME_BEGIN__
+	.type	__EH_FRAME_BEGIN__,@object
+__EH_FRAME_BEGIN__:
+
+/* Head of __init function used for static constructors.  */
+	.section ".init","ax"
+	.align 2
+FUNC_START(__init)
+	stwu 1,-16(1)
+	mflr 0
+	stw 0,20(1)
+
+/* Head of __fini function used for static destructors.  */
+	.section ".fini","ax"
+	.align 2
+FUNC_START(__fini)
+	stwu 1,-16(1)
+	mflr 0
+	stw 0,20(1)
+#endif
diff --git a/gcc/config/rs6000/eabi-cn.asm b/gcc/config/rs6000/eabi-cn.asm
new file mode 100644
index 000000000..68774097c
--- /dev/null
+++ b/gcc/config/rs6000/eabi-cn.asm
@@ -0,0 +1,104 @@
+/* crtn.s for eabi
+   Copyright (C) 1996, 2000, 2007, 2008, 2009 Free Software Foundation, Inc.
+   Written By Michael Meissner
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file just supplies labeled ending points for the .got* and other
+   special sections.  It is linked in last after other modules.  */
+ 
+	.ident	"GNU C crtn.s"
+
+#ifndef __powerpc64__
+	.section ".got","aw"
+	.globl	__GOT_END__
+	.type	__GOT_END__,@object
+__GOT_END__:
+
+	.section ".got1","aw"
+	.globl	__GOT1_END__
+	.type	__GOT1_END__,@object
+__GOT1_END__:
+
+	.section ".got2","aw"
+	.globl	__GOT2_END__
+	.type	__GOT2_END__,@object
+__GOT2_END__:
+
+	.section ".fixup","aw"
+	.globl	__FIXUP_END__
+	.type	__FIXUP_END__,@object
+__FIXUP_END__:
+
+	.section ".ctors","aw"
+	.globl	__CTOR_END__
+	.type	__CTOR_END__,@object
+__CTOR_END__:
+
+	.section ".dtors","aw"
+	.weak	__DTOR_END__
+	.type	__DTOR_END__,@object
+__DTOR_END__:
+
+	.section ".sdata","aw"
+	.globl	__SDATA_END__
+	.type	__SDATA_END__,@object
+__SDATA_END__:
+
+	.section ".sbss","aw",@nobits
+	.globl	__SBSS_END__
+	.type	__SBSS_END__,@object
+__SBSS_END__:
+
+	.section ".sdata2","a"
+	.globl	__SDATA2_END__
+	.type	__SDATA2_END__,@object
+__SDATA2_END__:
+
+	.section ".sbss2","a"
+	.globl	__SBSS2_END__
+	.type	__SBSS2_END__,@object
+__SBSS2_END__:
+
+	.section ".gcc_except_table","aw"
+	.globl	__EXCEPT_END__
+	.type	__EXCEPT_END__,@object
+__EXCEPT_END__:
+
+	.section ".eh_frame","aw"
+	.globl	__EH_FRAME_END__
+	.type	__EH_FRAME_END__,@object
+__EH_FRAME_END__:
+        .long   0
+
+/* Tail of __init function used for static constructors.  */
+	.section ".init","ax"
+	lwz 0,20(1)
+	mtlr 0
+	addi 1,1,16
+	blr
+
+/* Tail of __fini function used for static destructors.  */
+	.section ".fini","ax"
+	lwz 0,20(1)
+	mtlr 0
+	addi 1,1,16
+	blr
+#endif
diff --git a/gcc/config/rs6000/eabi.asm b/gcc/config/rs6000/eabi.asm
new file mode 100644
index 000000000..292d88e50
--- /dev/null
+++ b/gcc/config/rs6000/eabi.asm
@@ -0,0 +1,289 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ *   Copyright (C) 1995, 1996, 1998, 2000, 2001, 2008, 2009
+ *   Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Do any initializations needed for the eabi environment */
+
+	.section ".text"
+	#include "ppc-asm.h"
+
+#ifndef __powerpc64__
+
+	 .section ".got2","aw"
+	.align	2
+.LCTOC1 = . /* +32768 */
+
+/* Table of addresses */
+.Ltable = .-.LCTOC1
+	.long	.LCTOC1				/* address we are really at */
+
+.Lsda = .-.LCTOC1
+	.long	_SDA_BASE_			/* address of the first small data area */
+
+.Lsdas = .-.LCTOC1
+	.long	__SDATA_START__			/* start of .sdata/.sbss section */
+
+.Lsdae = .-.LCTOC1
+	.long	__SBSS_END__			/* end of .sdata/.sbss section */
+
+.Lsda2 = .-.LCTOC1
+	.long	_SDA2_BASE_			/* address of the second small data area */
+
+.Lsda2s = .-.LCTOC1
+	.long	__SDATA2_START__		/* start of .sdata2/.sbss2 section */
+
+.Lsda2e = .-.LCTOC1
+	.long	__SBSS2_END__			/* end of .sdata2/.sbss2 section */
+
+#ifdef _RELOCATABLE
+.Lgots = .-.LCTOC1
+	.long	__GOT_START__			/* Global offset table start */
+
+.Lgotm1 = .-.LCTOC1
+	.long	_GLOBAL_OFFSET_TABLE_-4		/* end of GOT ptrs before BLCL + 3 reserved words */
+
+.Lgotm2 = .-.LCTOC1
+	.long	_GLOBAL_OFFSET_TABLE_+12	/* start of GOT ptrs after BLCL + 3 reserved words */
+
+.Lgote = .-.LCTOC1
+	.long	__GOT_END__			/* Global offset table end */
+
+.Lgot2s = .-.LCTOC1
+	.long	__GOT2_START__			/* -mrelocatable GOT pointers start */
+
+.Lgot2e = .-.LCTOC1
+	.long	__GOT2_END__			/* -mrelocatable GOT pointers end */
+
+.Lfixups = .-.LCTOC1
+	.long	__FIXUP_START__			/* start of .fixup section */
+
+.Lfixupe = .-.LCTOC1
+	.long	__FIXUP_END__			/* end of .fixup section */
+
+.Lctors = .-.LCTOC1
+	.long	__CTOR_LIST__			/* start of .ctor section */
+
+.Lctore = .-.LCTOC1
+	.long	__CTOR_END__			/* end of .ctor section */
+
+.Ldtors = .-.LCTOC1
+	.long	__DTOR_LIST__			/* start of .dtor section */
+
+.Ldtore = .-.LCTOC1
+	.long	__DTOR_END__			/* end of .dtor section */
+
+.Lexcepts = .-.LCTOC1
+	.long	__EXCEPT_START__		/* start of .gcc_except_table section */
+
+.Lexcepte = .-.LCTOC1
+	.long	__EXCEPT_END__			/* end of .gcc_except_table section */
+
+.Linit = .-.LCTOC1
+	.long	.Linit_p			/* address of variable to say we've been called */
+
+	.text
+	.align	2
+.Lptr:
+	.long	.LCTOC1-.Laddr			/* PC relative pointer to .got2 */
+#endif
+
+	.data
+	.align	2
+.Linit_p:
+	.long	0
+
+	.text
+
+FUNC_START(__eabi)
+
+/* Eliminate -mrelocatable code if not -mrelocatable, so that this file can
+   be assembled with other assemblers than GAS.  */
+
+#ifndef _RELOCATABLE
+	addis	10,0,.Linit_p@ha		/* init flag */
+	addis	11,0,.LCTOC1@ha			/* load address of .LCTOC1 */
+	lwz	9,.Linit_p@l(10)		/* init flag */
+	addi	11,11,.LCTOC1@l
+	cmplwi	2,9,0				/* init flag != 0? */
+	bnelr	2				/* return now, if we've been called already */
+	stw	1,.Linit_p@l(10)		/* store a nonzero value in the done flag */
+
+#else /* -mrelocatable */
+	mflr	0
+	bl	.Laddr				/* get current address */
+.Laddr:
+	mflr	12				/* real address of .Laddr */
+	lwz	11,(.Lptr-.Laddr)(12)		/* linker generated address of .LCTOC1 */
+	add	11,11,12			/* correct to real pointer */
+	lwz	12,.Ltable(11)			/* get linker's idea of where .Laddr is */
+	lwz	10,.Linit(11)			/* address of init flag */
+	subf.	12,12,11			/* calculate difference */
+	lwzx	9,10,12				/* done flag */
+	cmplwi	2,9,0				/* init flag != 0? */
+	mtlr	0				/* restore in case branch was taken */
+	bnelr	2				/* return now, if we've been called already */
+	stwx	1,10,12				/* store a nonzero value in the done flag */
+	beq+	0,.Lsdata			/* skip if we don't need to relocate */
+
+/* We need to relocate the .got2 pointers.  */
+
+	lwz	3,.Lgot2s(11)			/* GOT2 pointers start */
+	lwz	4,.Lgot2e(11)			/* GOT2 pointers end */
+	add	3,12,3				/* adjust pointers */
+	add	4,12,4
+	bl	FUNC_NAME(__eabi_convert)	/* convert pointers in .got2 section */
+
+/* Fixup the .ctor section for static constructors */
+
+	lwz	3,.Lctors(11)			/* constructors pointers start */
+	lwz	4,.Lctore(11)			/* constructors pointers end */
+	bl	FUNC_NAME(__eabi_convert)	/* convert constructors */
+
+/* Fixup the .dtor section for static destructors */
+
+	lwz	3,.Ldtors(11)			/* destructors pointers start */
+	lwz	4,.Ldtore(11)			/* destructors pointers end */
+	bl	FUNC_NAME(__eabi_convert)	/* convert destructors */
+
+/* Fixup the .gcc_except_table section for G++ exceptions */
+
+	lwz	3,.Lexcepts(11)			/* exception table pointers start */
+	lwz	4,.Lexcepte(11)			/* exception table pointers end */
+	bl	FUNC_NAME(__eabi_convert)	/* convert exceptions */
+
+/* Fixup the addresses in the GOT below _GLOBAL_OFFSET_TABLE_-4 */
+
+	lwz	3,.Lgots(11)			/* GOT table pointers start */
+	lwz	4,.Lgotm1(11)			/* GOT table pointers below _GLOBAL_OFFSET_TABLE-4 */
+	bl	FUNC_NAME(__eabi_convert)	/* convert lower GOT */
+
+/* Fixup the addresses in the GOT above _GLOBAL_OFFSET_TABLE_+12 */
+
+	lwz	3,.Lgotm2(11)			/* GOT table pointers above _GLOBAL_OFFSET_TABLE+12 */
+	lwz	4,.Lgote(11)			/* GOT table pointers end */
+	bl	FUNC_NAME(__eabi_convert)	/* convert lower GOT */
+
+/* Fixup any user initialized pointers now (the compiler drops pointers to */
+/* each of the relocs that it does in the .fixup section).  */
+
+.Lfix:
+	lwz	3,.Lfixups(11)			/* fixup pointers start */
+	lwz	4,.Lfixupe(11)			/* fixup pointers end */
+	bl	FUNC_NAME(__eabi_uconvert)	/* convert user initialized pointers */
+
+.Lsdata:
+	mtlr	0				/* restore link register */
+#endif /* _RELOCATABLE */
+
+/* Only load up register 13 if there is a .sdata and/or .sbss section */
+	lwz	3,.Lsdas(11)			/* start of .sdata/.sbss section */
+	lwz	4,.Lsdae(11)			/* end of .sdata/.sbss section */
+	cmpw	1,3,4				/* .sdata/.sbss section non-empty? */
+	beq-	1,.Lsda2l			/* skip loading r13 */
+
+	lwz	13,.Lsda(11)			/* load r13 with _SDA_BASE_ address */
+
+/* Only load up register 2 if there is a .sdata2 and/or .sbss2 section */
+
+.Lsda2l:	
+	lwz	3,.Lsda2s(11)			/* start of .sdata/.sbss section */
+	lwz	4,.Lsda2e(11)			/* end of .sdata/.sbss section */
+	cmpw	1,3,4				/* .sdata/.sbss section non-empty? */
+	beq+	1,.Ldone			/* skip loading r2 */
+
+	lwz	2,.Lsda2(11)			/* load r2 with _SDA2_BASE_ address */
+
+/* Done adjusting pointers, return by way of doing the C++ global constructors.  */
+
+.Ldone:
+	b	FUNC_NAME(__init)	/* do any C++ global constructors (which returns to caller) */
+FUNC_END(__eabi)
+
+/* Special subroutine to convert a bunch of pointers directly.
+   r0		has original link register
+   r3		has low pointer to convert
+   r4		has high pointer to convert
+   r5 .. r10	are scratch registers
+   r11		has the address of .LCTOC1 in it.
+   r12		has the value to add to each pointer
+   r13 .. r31	are unchanged */
+#ifdef _RELOCATABLE
+FUNC_START(__eabi_convert)
+        cmplw	1,3,4				/* any pointers to convert? */
+        subf	5,3,4				/* calculate number of words to convert */
+        bclr	4,4				/* return if no pointers */
+
+        srawi	5,5,2
+	addi	3,3,-4				/* start-4 for use with lwzu */
+        mtctr	5
+
+.Lcvt:
+	lwzu	6,4(3)				/* pointer to convert */
+	cmpwi	0,6,0
+	beq-	.Lcvt2				/* if pointer is null, don't convert */
+
+        add	6,6,12				/* convert pointer */
+        stw	6,0(3)
+.Lcvt2:
+        bdnz+	.Lcvt
+        blr
+
+FUNC_END(__eabi_convert)
+
+/* Special subroutine to convert the pointers the user has initialized.  The
+   compiler has placed the address of the initialized pointer into the .fixup
+   section.
+
+   r0		has original link register
+   r3		has low pointer to convert
+   r4		has high pointer to convert
+   r5 .. r10	are scratch registers
+   r11		has the address of .LCTOC1 in it.
+   r12		has the value to add to each pointer
+   r13 .. r31	are unchanged */
+	
+FUNC_START(__eabi_uconvert)
+        cmplw	1,3,4				/* any pointers to convert? */
+        subf	5,3,4				/* calculate number of words to convert */
+        bclr	4,4				/* return if no pointers */
+
+        srawi	5,5,2
+	addi	3,3,-4				/* start-4 for use with lwzu */
+        mtctr	5
+
+.Lucvt:
+	lwzu	6,4(3)				/* next pointer to pointer to convert */
+	add	6,6,12				/* adjust pointer */
+	lwz	7,0(6)				/* get the pointer it points to */
+	stw	6,0(3)				/* store adjusted pointer */
+	add	7,7,12				/* adjust */
+	stw	7,0(6)
+        bdnz+	.Lucvt
+        blr
+
+FUNC_END(__eabi_uconvert)
+#endif
+#endif
diff --git a/gcc/config/rs6000/eabi.h b/gcc/config/rs6000/eabi.h
new file mode 100644
index 000000000..3024a7586
--- /dev/null
+++ b/gcc/config/rs6000/eabi.h
@@ -0,0 +1,44 @@
+/* Core target definitions for GNU compiler
+   for IBM RS/6000 PowerPC targeted to embedded ELF systems.
+   Copyright (C) 1995, 1996, 2000, 2003, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Add -meabi to target flags.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_EABI)
+
+/* Invoke an initializer function to set up the GOT.  */
+#define NAME__MAIN "__eabi"
+#define INVOKE__main
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC Embedded)");
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()          \
+  do                                      \
+    {                                     \
+      builtin_define_std ("PPC");         \
+      builtin_define ("__embedded__");    \
+      builtin_assert ("system=embedded"); \
+      builtin_assert ("cpu=powerpc");     \
+      builtin_assert ("machine=powerpc"); \
+      TARGET_OS_SYSV_CPP_BUILTINS ();     \
+    }                                     \
+  while (0)
diff --git a/gcc/config/rs6000/eabialtivec.h b/gcc/config/rs6000/eabialtivec.h
new file mode 100644
index 000000000..417be97a4
--- /dev/null
+++ b/gcc/config/rs6000/eabialtivec.h
@@ -0,0 +1,30 @@
+/* Core target definitions for GNU compiler
+   for PowerPC targeted systems with AltiVec support.
+   Copyright (C) 2001, 2003, 2007 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Add -meabi and -maltivec to target flags.  */
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_EABI | MASK_ALTIVEC)
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC Embedded with AltiVec)");
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS	rs6000_altivec_abi = 1
diff --git a/gcc/config/rs6000/eabisim.h b/gcc/config/rs6000/eabisim.h
new file mode 100644
index 000000000..65bc14dff
--- /dev/null
+++ b/gcc/config/rs6000/eabisim.h
@@ -0,0 +1,54 @@
+/* Support for GCC on simulated PowerPC systems targeted to embedded ELF
+   systems.
+   Copyright (C) 1995, 1996, 2000, 2003, 2007 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC Simulated)");
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()           \
+  do                                       \
+    {                                      \
+      builtin_define_std ("PPC");          \
+      builtin_define ("__embedded__");     \
+      builtin_define ("__simulator__");    \
+      builtin_assert ("system=embedded");  \
+      builtin_assert ("system=simulator"); \
+      builtin_assert ("cpu=powerpc");      \
+      builtin_assert ("machine=powerpc");  \
+      TARGET_OS_SYSV_CPP_BUILTINS ();      \
+    }                                      \
+  while (0)
+
+/* Make the simulator the default */
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_sim)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_sim)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_sim)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_sim)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_sim)"
diff --git a/gcc/config/rs6000/eabispe.h b/gcc/config/rs6000/eabispe.h
new file mode 100644
index 000000000..d3fc8a6be
--- /dev/null
+++ b/gcc/config/rs6000/eabispe.h
@@ -0,0 +1,54 @@
+/* Core target definitions for GNU compiler
+   for PowerPC embedded targeted systems with SPE support.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+   Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_EABI	\
+  | MASK_STRICT_ALIGN)
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC Embedded SPE)");
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS \
+  if (rs6000_select[1].string == NULL) \
+    rs6000_cpu = PROCESSOR_PPC8540; \
+  if (!rs6000_explicit_options.spe_abi) \
+    rs6000_spe_abi = 1; \
+  if (!rs6000_explicit_options.float_gprs) \
+    rs6000_float_gprs = 1; \
+  if (!rs6000_explicit_options.spe) \
+    rs6000_spe = 1; \
+  if (target_flags & MASK_64BIT) \
+    error ("-m64 not supported in this configuration")
+
+/* The e500 ABI says that either long doubles are 128 bits, or if
+   implemented in any other size, the compiler/linker should error out.
+   We have no emulation libraries for 128 bit long doubles, and I hate
+   the dozens of failures on the regression suite.  So I'm breaking ABI
+   specifications, until I properly fix the emulation.
+
+   Enable these later.
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE (TARGET_SPE ? 128 : 64)
+*/
+
+#undef  ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc -mspe -me500"
diff --git a/gcc/config/rs6000/freebsd.h b/gcc/config/rs6000/freebsd.h
new file mode 100644
index 000000000..567263b2a
--- /dev/null
+++ b/gcc/config/rs6000/freebsd.h
@@ -0,0 +1,80 @@
+/* Definitions for PowerPC running FreeBSD using the ELF format
+   Copyright (C) 2001, 2003, 2007, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override the defaults, which exist to force the proper definition.  */
+
+#undef	CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_freebsd)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_freebsd)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_freebsd)"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_freebsd)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_freebsd)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define	LINK_OS_DEFAULT_SPEC "%(link_os_freebsd)"
+
+/* XXX: This is wrong for many platforms in sysv4.h.
+   We should work on getting that definition fixed.  */
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+/* rs6000.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/PowerPC ELF)");
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (target_flags & target_flags_explicit & MASK_RELOCATABLE)
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
diff --git a/gcc/config/rs6000/gnu.h b/gcc/config/rs6000/gnu.h
new file mode 100644
index 000000000..0f329e53f
--- /dev/null
+++ b/gcc/config/rs6000/gnu.h
@@ -0,0 +1,37 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC machines running GNU.
+   Copyright (C) 2001, 2003, 2007 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef	CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_gnu)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_gnu)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_gnu)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_gnu)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_gnu)"
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC GNU)");
diff --git a/gcc/config/rs6000/host-darwin.c b/gcc/config/rs6000/host-darwin.c
new file mode 100644
index 000000000..48afa46e1
--- /dev/null
+++ b/gcc/config/rs6000/host-darwin.c
@@ -0,0 +1,154 @@
+/* Darwin/powerpc host-specific hook definitions.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include <sys/ucontext.h>
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "diagnostic.h"
+#include "config/host-darwin.h"
+
+static void segv_crash_handler (int);
+static void segv_handler (int, siginfo_t *, void *);
+static void darwin_rs6000_extra_signals (void);
+
+#ifndef HAVE_DECL_SIGALTSTACK
+/* This doesn't have a prototype in signal.h in 10.2.x and earlier,
+   fixed in later releases.  */
+extern int sigaltstack(const struct sigaltstack *, struct sigaltstack *);
+#endif
+
+/* The fields of the mcontext_t type have acquired underscores in later
+   OS versions.  */
+#ifdef HAS_MCONTEXT_T_UNDERSCORES
+#define MC_FLD(x) __ ## x
+#else
+#define MC_FLD(x) x
+#endif
+
+#undef HOST_HOOKS_EXTRA_SIGNALS
+#define HOST_HOOKS_EXTRA_SIGNALS darwin_rs6000_extra_signals
+
+/* On Darwin/powerpc, hitting the stack limit turns into a SIGSEGV.
+   This code detects the difference between hitting the stack limit and
+   a true wild pointer dereference by looking at the instruction that
+   faulted; only a few kinds of instruction are used to access below
+   the previous bottom of the stack.  */
+
+static void
+segv_crash_handler (int sig ATTRIBUTE_UNUSED)
+{
+  internal_error ("Segmentation Fault (code)");
+}
+
+static void
+segv_handler (int sig ATTRIBUTE_UNUSED,
+	      siginfo_t *sip ATTRIBUTE_UNUSED,
+	      void *scp)
+{
+  ucontext_t *uc = (ucontext_t *)scp;
+  sigset_t sigset;
+  unsigned faulting_insn;
+
+  /* The fault might have happened when trying to run some instruction, in
+     which case the next line will segfault _again_.  Handle this case.  */
+  signal (SIGSEGV, segv_crash_handler);
+  sigemptyset (&sigset);
+  sigaddset (&sigset, SIGSEGV);
+  sigprocmask (SIG_UNBLOCK, &sigset, NULL);
+
+  faulting_insn = *(unsigned *)uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0);
+
+  /* Note that this only has to work for GCC, so we don't have to deal
+     with all the possible cases (GCC has no AltiVec code, for
+     instance).  It's complicated because Darwin allows stores to
+     below the stack pointer, and the prologue code takes advantage of
+     this.  */
+
+  if ((faulting_insn & 0xFFFF8000) == 0x94218000  /* stwu %r1, -xxx(%r1) */
+      || (faulting_insn & 0xFC1F03FF) == 0x7C01016E /* stwux xxx, %r1, xxx */
+      || (faulting_insn & 0xFC1F8000) == 0x90018000 /* stw xxx, -yyy(%r1) */
+      || (faulting_insn & 0xFC1F8000) == 0xD8018000 /* stfd xxx, -yyy(%r1) */
+      || (faulting_insn & 0xFC1F8000) == 0xBC018000 /* stmw xxx, -yyy(%r1) */)
+    {
+      char *shell_name;
+      
+      fnotice (stderr, "Out of stack space.\n");
+      shell_name = getenv ("SHELL");
+      if (shell_name != NULL)
+	shell_name = strrchr (shell_name, '/');
+      if (shell_name != NULL)
+	{
+	  static const char * shell_commands[][2] = {
+	    { "sh", "ulimit -S -s unlimited" },
+	    { "bash", "ulimit -S -s unlimited" },
+	    { "tcsh", "limit stacksize unlimited" },
+	    { "csh", "limit stacksize unlimited" },
+	    /* zsh doesn't have "unlimited", this will work under the
+	       default configuration.  */
+	    { "zsh", "limit stacksize 32m" }
+	  };
+	  size_t i;
+	  
+	  for (i = 0; i < ARRAY_SIZE (shell_commands); i++)
+	    if (strcmp (shell_commands[i][0], shell_name + 1) == 0)
+	      {
+		fnotice (stderr, 
+			 "Try running '%s' in the shell to raise its limit.\n",
+			 shell_commands[i][1]);
+	      }
+	}
+      
+      if (global_dc->abort_on_error)
+	fancy_abort (__FILE__, __LINE__, __FUNCTION__);
+
+      exit (FATAL_EXIT_CODE);
+    }
+
+  fprintf (stderr, "[address=%08lx pc=%08x]\n", 
+	   uc->uc_mcontext->MC_FLD(es).MC_FLD(dar),
+	   uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0));
+  internal_error ("Segmentation Fault");
+  exit (FATAL_EXIT_CODE);
+}
+
+static void
+darwin_rs6000_extra_signals (void)
+{
+  struct sigaction sact;
+  stack_t sigstk;
+
+  sigstk.ss_sp = (char*)xmalloc (SIGSTKSZ);
+  sigstk.ss_size = SIGSTKSZ;
+  sigstk.ss_flags = 0;
+  if (sigaltstack (&sigstk, NULL) < 0)
+    fatal_error ("While setting up signal stack: %m");
+
+  sigemptyset(&sact.sa_mask);
+  sact.sa_flags = SA_ONSTACK | SA_SIGINFO;
+  sact.sa_sigaction = segv_handler;
+  if (sigaction (SIGSEGV, &sact, 0) < 0) 
+    fatal_error ("While setting up signal handler: %m");
+}
+
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/rs6000/host-ppc64-darwin.c b/gcc/config/rs6000/host-ppc64-darwin.c
new file mode 100644
index 000000000..49a920475
--- /dev/null
+++ b/gcc/config/rs6000/host-ppc64-darwin.c
@@ -0,0 +1,30 @@
+/* ppc64-darwin host-specific hook definitions.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for ppc64 hosts; this file exists just
+   to include config/host-darwin.h.  */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc/config/rs6000/libgcc-ppc-glibc.ver b/gcc/config/rs6000/libgcc-ppc-glibc.ver
new file mode 100644
index 000000000..8862c14cb
--- /dev/null
+++ b/gcc/config/rs6000/libgcc-ppc-glibc.ver
@@ -0,0 +1,73 @@
+# Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+%ifndef _SOFT_FLOAT
+%ifndef __powerpc64__
+%exclude {
+  __multc3
+  __divtc3
+  __powitf2
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+}
+
+GCC_4.1.0 {
+  # long double support
+  __multc3
+  __divtc3
+  __powitf2
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+
+%else
+GCC_3.4.4 {
+%endif
+%else
+GCC_4.2.0 {
+%endif
+
+  # long double support
+  __gcc_qadd
+  __gcc_qsub
+  __gcc_qmul
+  __gcc_qdiv
+
+%ifdef _SOFT_DOUBLE
+  __gcc_qneg
+  __gcc_qeq
+  __gcc_qne
+  __gcc_qgt
+  __gcc_qge
+  __gcc_qlt
+  __gcc_qle
+  __gcc_stoq
+  __gcc_dtoq
+  __gcc_qtos
+  __gcc_qtod
+  __gcc_qtoi
+  __gcc_qtou
+  __gcc_itoq
+  __gcc_utoq
+%endif
+
+%ifdef __NO_FPRS__
+  __gcc_qunord
+%endif
+}
diff --git a/gcc/config/rs6000/libgcc-ppc64.ver b/gcc/config/rs6000/libgcc-ppc64.ver
new file mode 100644
index 000000000..b27b4b492
--- /dev/null
+++ b/gcc/config/rs6000/libgcc-ppc64.ver
@@ -0,0 +1,7 @@
+GCC_3.4.4 {
+  # long double support
+  __gcc_qadd
+  __gcc_qsub
+  __gcc_qmul
+  __gcc_qdiv
+}
diff --git a/gcc/config/rs6000/linux-unwind.h b/gcc/config/rs6000/linux-unwind.h
new file mode 100644
index 000000000..a16df97e9
--- /dev/null
+++ b/gcc/config/rs6000/linux-unwind.h
@@ -0,0 +1,355 @@
+/* DWARF2 EH unwinding support for PowerPC and PowerPC64 Linux.
+   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define R_LR		65
+#define R_CR2		70
+#define R_VR0		77
+#define R_VRSAVE	109
+#define R_VSCR		110
+
+struct gcc_vregs
+{
+  __attribute__ ((vector_size (16))) int vr[32];
+#ifdef __powerpc64__
+  unsigned int pad1[3];
+  unsigned int vscr;
+  unsigned int vsave;
+  unsigned int pad2[3];
+#else
+  unsigned int vsave;
+  unsigned int pad[2];
+  unsigned int vscr;
+#endif
+};
+
+struct gcc_regs
+{
+  unsigned long gpr[32];
+  unsigned long nip;
+  unsigned long msr;
+  unsigned long orig_gpr3;
+  unsigned long ctr;
+  unsigned long link;
+  unsigned long xer;
+  unsigned long ccr;
+  unsigned long softe;
+  unsigned long trap;
+  unsigned long dar;
+  unsigned long dsisr;
+  unsigned long result;
+  unsigned long pad1[4];
+  double fpr[32];
+  unsigned int pad2;
+  unsigned int fpscr;
+#ifdef __powerpc64__
+  struct gcc_vregs *vp;
+#else
+  unsigned int pad3[2];
+#endif
+  struct gcc_vregs vregs;
+};
+
+struct gcc_ucontext
+{
+#ifdef __powerpc64__
+  unsigned long pad[28];
+#else
+  unsigned long pad[12];
+#endif
+  struct gcc_regs *regs;
+  struct gcc_regs rsave;
+};
+
+#ifdef __powerpc64__
+
+enum { SIGNAL_FRAMESIZE = 128 };
+
+/* If PC is at a sigreturn trampoline, return a pointer to the
+   regs.  Otherwise return NULL.  */
+
+static struct gcc_regs *
+get_regs (struct _Unwind_Context *context)
+{
+  const unsigned int *pc = context->ra;
+
+  /* addi r1, r1, 128; li r0, 0x0077; sc  (sigreturn) */
+  /* addi r1, r1, 128; li r0, 0x00AC; sc  (rt_sigreturn) */
+  if (pc[0] != 0x38210000 + SIGNAL_FRAMESIZE || pc[2] != 0x44000002)
+    return NULL;
+  if (pc[1] == 0x38000077)
+    {
+      struct sigframe {
+	char gap[SIGNAL_FRAMESIZE];
+	unsigned long pad[7];
+	struct gcc_regs *regs;
+      } *frame = (struct sigframe *) context->cfa;
+      return frame->regs;
+    }
+  else if (pc[1] == 0x380000AC)
+    {
+      /* This works for 2.4 kernels, but not for 2.6 kernels with vdso
+	 because pc isn't pointing into the stack.  Can be removed when
+	 no one is running 2.4.19 or 2.4.20, the first two ppc64
+	 kernels released.  */
+      const struct rt_sigframe_24 {
+	int tramp[6];
+	void *pinfo;
+	struct gcc_ucontext *puc;
+      } *frame24 = (const struct rt_sigframe_24 *) context->ra;
+
+      /* Test for magic value in *puc of vdso.  */
+      if ((long) frame24->puc != -21 * 8)
+	return frame24->puc->regs;
+      else
+	{
+	  /* This works for 2.4.21 and later kernels.  */
+	  struct rt_sigframe {
+	    char gap[SIGNAL_FRAMESIZE];
+	    struct gcc_ucontext uc;
+	    unsigned long pad[2];
+	    int tramp[6];
+	    void *pinfo;
+	    struct gcc_ucontext *puc;
+	  } *frame = (struct rt_sigframe *) context->cfa;
+	  return frame->uc.regs;
+	}
+    }
+  return NULL;
+}
+
+#else  /* !__powerpc64__ */
+
+enum { SIGNAL_FRAMESIZE = 64 };
+
+static struct gcc_regs *
+get_regs (struct _Unwind_Context *context)
+{
+  const unsigned int *pc = context->ra;
+
+  /* li r0, 0x7777; sc  (sigreturn old)  */
+  /* li r0, 0x0077; sc  (sigreturn new)  */
+  /* li r0, 0x6666; sc  (rt_sigreturn old)  */
+  /* li r0, 0x00AC; sc  (rt_sigreturn new)  */
+  if (pc[1] != 0x44000002)
+    return NULL;
+  if (pc[0] == 0x38007777 || pc[0] == 0x38000077)
+    {
+      struct sigframe {
+	char gap[SIGNAL_FRAMESIZE];
+	unsigned long pad[7];
+	struct gcc_regs *regs;
+      } *frame = (struct sigframe *) context->cfa;
+      return frame->regs;
+    }
+  else if (pc[0] == 0x38006666 || pc[0] == 0x380000AC)
+    {
+      struct rt_sigframe {
+	char gap[SIGNAL_FRAMESIZE + 16];
+	char siginfo[128];
+	struct gcc_ucontext uc;
+      } *frame = (struct rt_sigframe *) context->cfa;
+      return frame->uc.regs;
+    }
+  return NULL;
+}
+#endif
+
+/* Find an entry in the process auxiliary vector.  The canonical way to
+   test for VMX is to look at AT_HWCAP.  */
+
+static long
+ppc_linux_aux_vector (long which)
+{
+  /* __libc_stack_end holds the original stack passed to a process.  */
+  extern long *__libc_stack_end;
+  long argc;
+  char **argv;
+  char **envp;
+  struct auxv
+  {
+    long a_type;
+    long a_val;
+  } *auxp;
+
+  /* The Linux kernel puts argc first on the stack.  */
+  argc = __libc_stack_end[0];
+  /* Followed by argv, NULL terminated.  */
+  argv = (char **) __libc_stack_end + 1;
+  /* Followed by environment string pointers, NULL terminated. */
+  envp = argv + argc + 1;
+  while (*envp++)
+    continue;
+  /* Followed by the aux vector, zero terminated.  */
+  for (auxp = (struct auxv *) envp; auxp->a_type != 0; ++auxp)
+    if (auxp->a_type == which)
+      return auxp->a_val;
+  return 0;
+}
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#define MD_FALLBACK_FRAME_STATE_FOR ppc_fallback_frame_state
+
+static _Unwind_Reason_Code
+ppc_fallback_frame_state (struct _Unwind_Context *context,
+			  _Unwind_FrameState *fs)
+{
+  static long hwcap = 0;
+  struct gcc_regs *regs = get_regs (context);
+  long new_cfa;
+  int i;
+
+  if (regs == NULL)
+    return _URC_END_OF_STACK;
+
+  new_cfa = regs->gpr[STACK_POINTER_REGNUM];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = STACK_POINTER_REGNUM;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  for (i = 0; i < 32; i++)
+    if (i != STACK_POINTER_REGNUM)
+      {
+	fs->regs.reg[i].how = REG_SAVED_OFFSET;
+	fs->regs.reg[i].loc.offset = (long) &regs->gpr[i] - new_cfa;
+      }
+
+  fs->regs.reg[R_CR2].how = REG_SAVED_OFFSET;
+  /* CR? regs are always 32-bit and PPC is big-endian, so in 64-bit
+     libgcc loc.offset needs to point to the low 32 bits of regs->ccr.  */
+  fs->regs.reg[R_CR2].loc.offset = (long) &regs->ccr - new_cfa
+				   + sizeof (long) - 4;
+
+  fs->regs.reg[R_LR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[R_LR].loc.offset = (long) &regs->link - new_cfa;
+
+  fs->regs.reg[ARG_POINTER_REGNUM].how = REG_SAVED_OFFSET;
+  fs->regs.reg[ARG_POINTER_REGNUM].loc.offset = (long) &regs->nip - new_cfa;
+  fs->retaddr_column = ARG_POINTER_REGNUM;
+  fs->signal_frame = 1;
+
+  if (hwcap == 0)
+    {
+      hwcap = ppc_linux_aux_vector (16);
+      /* These will already be set if we found AT_HWCAP.  A nonzero
+	 value stops us looking again if for some reason we couldn't
+	 find AT_HWCAP.  */
+#ifdef __powerpc64__
+      hwcap |= 0xc0000000;
+#else
+      hwcap |= 0x80000000;
+#endif
+    }
+
+  /* If we have a FPU...  */
+  if (hwcap & 0x08000000)
+    for (i = 0; i < 32; i++)
+      {
+	fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	fs->regs.reg[i + 32].loc.offset = (long) &regs->fpr[i] - new_cfa;
+      }
+
+  /* If we have a VMX unit...  */
+  if (hwcap & 0x10000000)
+    {
+      struct gcc_vregs *vregs;
+#ifdef __powerpc64__
+      vregs = regs->vp;
+#else
+      vregs = &regs->vregs;
+#endif
+      if (regs->msr & (1 << 25))
+	{
+	  for (i = 0; i < 32; i++)
+	    {
+	      fs->regs.reg[i + R_VR0].how = REG_SAVED_OFFSET;
+	      fs->regs.reg[i + R_VR0].loc.offset
+		= (long) &vregs->vr[i] - new_cfa;
+	    }
+
+	  fs->regs.reg[R_VSCR].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[R_VSCR].loc.offset = (long) &vregs->vscr - new_cfa;
+	}
+
+      fs->regs.reg[R_VRSAVE].how = REG_SAVED_OFFSET;
+      fs->regs.reg[R_VRSAVE].loc.offset = (long) &vregs->vsave - new_cfa;
+    }
+
+  /* If we have SPE register high-parts... we check at compile-time to
+     avoid expanding the code for all other PowerPC.  */
+#ifdef __SPE__
+  for (i = 0; i < 32; i++)
+    {
+      fs->regs.reg[i + FIRST_PSEUDO_REGISTER - 1].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + FIRST_PSEUDO_REGISTER - 1].loc.offset
+	= (long) &regs->vregs - new_cfa + 4 * i;
+    }
+#endif
+
+  return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT frob_update_context
+
+static void
+frob_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs ATTRIBUTE_UNUSED)
+{
+  const unsigned int *pc = (const unsigned int *) context->ra;
+
+  /* Fix up for 2.6.12 - 2.6.16 Linux kernels that have vDSO, but don't
+     have S flag in it.  */
+#ifdef __powerpc64__
+  /* addi r1, r1, 128; li r0, 0x0077; sc  (sigreturn) */
+  /* addi r1, r1, 128; li r0, 0x00AC; sc  (rt_sigreturn) */
+  if (pc[0] == 0x38210000 + SIGNAL_FRAMESIZE
+      && (pc[1] == 0x38000077 || pc[1] == 0x380000AC)
+      && pc[2] == 0x44000002)
+    _Unwind_SetSignalFrame (context, 1);
+#else
+  /* li r0, 0x7777; sc  (sigreturn old)  */
+  /* li r0, 0x0077; sc  (sigreturn new)  */
+  /* li r0, 0x6666; sc  (rt_sigreturn old)  */
+  /* li r0, 0x00AC; sc  (rt_sigreturn new)  */
+  if ((pc[0] == 0x38007777 || pc[0] == 0x38000077
+       || pc[0] == 0x38006666 || pc[0] == 0x380000AC)
+      && pc[1] == 0x44000002)
+    _Unwind_SetSignalFrame (context, 1);
+#endif
+
+#ifdef __powerpc64__
+  if (fs->regs.reg[2].how == REG_UNSAVED)
+    {
+      /* If the current unwind info (FS) does not contain explicit info
+	 saving R2, then we have to do a minor amount of code reading to
+	 figure out if it was saved.  The big problem here is that the
+	 code that does the save/restore is generated by the linker, so
+	 we have no good way to determine at compile time what to do.  */
+      unsigned int *insn
+	= (unsigned int *) _Unwind_GetGR (context, R_LR);
+      if (insn && *insn == 0xE8410028)
+	_Unwind_SetGRPtr (context, 2, context->cfa + 40);
+    }
+#endif
+}
diff --git a/gcc/config/rs6000/linux.h b/gcc/config/rs6000/linux.h
new file mode 100644
index 000000000..77c8f6103
--- /dev/null
+++ b/gcc/config/rs6000/linux.h
@@ -0,0 +1,134 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC machines running Linux.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2007, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Linux doesn't support saving and restoring 64-bit regs in a 32-bit
+   process.  */
+#define OS_MISSING_POWERPC64 1
+
+/* We use glibc _mcount for profiling.  */
+#define NO_PROFILE_COUNTERS 1
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#endif
+
+/* glibc has float and long double forms of math functions.  */
+#undef  TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
+
+/* Whether we have sincos that follows the GNU extension.  */
+#undef  TARGET_HAS_SINCOS
+#define TARGET_HAS_SINCOS (OPTION_GLIBC)
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("PPC");		\
+      builtin_define_std ("powerpc");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+      TARGET_OS_SYSV_CPP_BUILTINS ();		\
+    }						\
+  while (0)
+
+#undef	CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_linux)"
+
+/* The GNU C++ standard library currently requires _GNU_SOURCE being
+   defined on glibc-based systems. This temporary hack accomplishes this,
+   it should go away as soon as libstdc++-v3 has a real fix.  */
+#undef  CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_linux)"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_linux)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_linux)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_linux)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC GNU/Linux)");
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* For backward compatibility, we must continue to use the AIX
+   structure return convention.  */
+#undef  DRAFT_V4_STRUCT_RET
+#define DRAFT_V4_STRUCT_RET 1
+
+/* We are 32-bit all the time, so optimize a little.  */
+#undef TARGET_64BIT
+#define TARGET_64BIT 0
+ 
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (target_flags & target_flags_explicit & MASK_RELOCATABLE)
+
+#define TARGET_POSIX_IO
+
+#define MD_UNWIND_SUPPORT "config/rs6000/linux-unwind.h"
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* ppc32 glibc provides __stack_chk_guard in -0x7008(2).  */
+#define TARGET_THREAD_SSP_OFFSET	-0x7008
+#endif
+
+#define POWERPC_LINUX
+
+/* ppc linux has 128-bit long double support in glibc 2.4 and later.  */
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
+#endif
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
new file mode 100644
index 000000000..e6840d63e
--- /dev/null
+++ b/gcc/config/rs6000/linux64.h
@@ -0,0 +1,569 @@
+/* Definitions of target machine for GNU compiler,
+   for 64 bit PowerPC linux.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010, 2011  Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef RS6000_BI_ARCH
+
+#undef	DEFAULT_ABI
+#define	DEFAULT_ABI ABI_AIX
+
+#undef	TARGET_64BIT
+#define	TARGET_64BIT 1
+
+#define	DEFAULT_ARCH64_P 1
+#define	RS6000_BI_ARCH_P 0
+
+#else
+
+#define	DEFAULT_ARCH64_P (TARGET_DEFAULT & MASK_64BIT)
+#define	RS6000_BI_ARCH_P 1
+
+#endif
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __powerpc64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+#undef	TARGET_AIX
+#define	TARGET_AIX TARGET_64BIT
+
+#ifdef HAVE_LD_NO_DOT_SYMS
+/* New ABI uses a local sym for the function entry point.  */
+extern int dot_symbols;
+#undef DOT_SYMBOLS
+#define DOT_SYMBOLS dot_symbols
+#endif
+
+#define TARGET_PROFILE_KERNEL profile_kernel
+
+#define TARGET_USES_LINUX64_OPT 1
+#ifdef HAVE_LD_LARGE_TOC
+#undef TARGET_CMODEL
+#define TARGET_CMODEL rs6000_current_cmodel
+#define SET_CMODEL(opt) rs6000_current_cmodel = opt
+#else
+#define SET_CMODEL(opt) do {} while (0)
+#endif
+
+#undef  PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_POWER7
+#undef  PROCESSOR_DEFAULT64
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
+
+/* We don't need to generate entries in .fixup, except when
+   -mrelocatable or -mrelocatable-lib is given.  */
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP \
+  (target_flags & target_flags_explicit & MASK_RELOCATABLE)
+
+#undef	RS6000_ABI_NAME
+#define	RS6000_ABI_NAME "linux"
+
+#define INVALID_64BIT "-m%s not supported in this configuration"
+#define INVALID_32BIT INVALID_64BIT
+
+#undef	SUBSUBTARGET_OVERRIDE_OPTIONS
+#define	SUBSUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      if (!rs6000_explicit_options.alignment)			\
+	rs6000_alignment_flags = MASK_ALIGN_NATURAL;		\
+      if (TARGET_64BIT)						\
+	{							\
+	  if (DEFAULT_ABI != ABI_AIX)				\
+	    {							\
+	      rs6000_current_abi = ABI_AIX;			\
+	      error (INVALID_64BIT, "call");			\
+	    }							\
+	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
+	  if (target_flags & MASK_RELOCATABLE)			\
+	    {							\
+	      target_flags &= ~MASK_RELOCATABLE;		\
+	      error (INVALID_64BIT, "relocatable");		\
+	    }							\
+	  if (target_flags & MASK_EABI)				\
+	    {							\
+	      target_flags &= ~MASK_EABI;			\
+	      error (INVALID_64BIT, "eabi");			\
+	    }							\
+	  if (TARGET_PROTOTYPE)					\
+	    {							\
+	      target_prototype = 0;				\
+	      error (INVALID_64BIT, "prototype");		\
+	    }							\
+	  if ((target_flags & MASK_POWERPC64) == 0)		\
+	    {							\
+	      target_flags |= MASK_POWERPC64;			\
+	      error ("-m64 requires a PowerPC64 cpu");		\
+	    }							\
+	  if ((target_flags_explicit & MASK_MINIMAL_TOC) != 0)	\
+	    {							\
+	      if (rs6000_explicit_options.cmodel		\
+		  && rs6000_current_cmodel != CMODEL_SMALL)	\
+		error ("-mcmodel incompatible with other toc options"); \
+	      SET_CMODEL (CMODEL_SMALL);			\
+	    }							\
+	  else							\
+	    {							\
+	      if (!rs6000_explicit_options.cmodel)		\
+		SET_CMODEL (CMODEL_MEDIUM);			\
+	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
+		{						\
+		  TARGET_NO_FP_IN_TOC = 0;			\
+		  TARGET_NO_SUM_IN_TOC = 0;			\
+		}						\
+	    }							\
+	}							\
+      else							\
+	{							\
+	  if (!RS6000_BI_ARCH_P)				\
+	    error (INVALID_32BIT, "32");			\
+	  if (TARGET_PROFILE_KERNEL)				\
+	    {							\
+	      TARGET_PROFILE_KERNEL = 0;			\
+	      error (INVALID_32BIT, "profile-kernel");		\
+	    }							\
+	  if (rs6000_explicit_options.cmodel)			\
+	    {							\
+	      SET_CMODEL (CMODEL_SMALL);			\
+	      error (INVALID_32BIT, "cmodel");			\
+	    }							\
+	}							\
+    }								\
+  while (0)
+
+#ifdef	RS6000_BI_ARCH
+
+#undef	OPTION_TARGET_CPU_DEFAULT
+#define	OPTION_TARGET_CPU_DEFAULT \
+  (((TARGET_DEFAULT ^ target_flags) & MASK_64BIT) \
+   ? (char *) 0 : TARGET_CPU_DEFAULT)
+
+#endif
+
+#undef	ASM_DEFAULT_SPEC
+#undef	ASM_SPEC
+#undef	LINK_OS_LINUX_SPEC
+
+/* FIXME: This will quite possibly choose the wrong dynamic linker.  */
+#undef	LINK_OS_GNU_SPEC
+#define	LINK_OS_GNU_SPEC LINK_OS_LINUX_SPEC
+
+#ifndef	RS6000_BI_ARCH
+#define	ASM_DEFAULT_SPEC "-mppc64"
+#define	ASM_SPEC	 "%(asm_spec64) %(asm_spec_common)"
+#define	LINK_OS_LINUX_SPEC "%(link_os_linux_spec64)"
+#else
+#if DEFAULT_ARCH64_P
+#define	ASM_DEFAULT_SPEC "-mppc%{!m32:64}"
+#define	ASM_SPEC	 "%{m32:%(asm_spec32)}%{!m32:%(asm_spec64)} %(asm_spec_common)"
+#define	LINK_OS_LINUX_SPEC "%{m32:%(link_os_linux_spec32)}%{!m32:%(link_os_linux_spec64)}"
+#else
+#define	ASM_DEFAULT_SPEC "-mppc%{m64:64}"
+#define	ASM_SPEC	 "%{!m64:%(asm_spec32)}%{m64:%(asm_spec64)} %(asm_spec_common)"
+#define	LINK_OS_LINUX_SPEC "%{!m64:%(link_os_linux_spec32)}%{m64:%(link_os_linux_spec64)}"
+#endif
+#endif
+
+#define ASM_SPEC32 "-a32 \
+%{mrelocatable} %{mrelocatable-lib} %{fpic:-K PIC} %{fPIC:-K PIC} \
+%{memb} %{!memb: %{msdata=eabi: -memb}} \
+%{!mlittle: %{!mlittle-endian: %{!mbig: %{!mbig-endian: \
+    %{mcall-freebsd: -mbig} \
+    %{mcall-i960-old: -mlittle} \
+    %{mcall-linux: -mbig} \
+    %{mcall-gnu: -mbig} \
+    %{mcall-netbsd: -mbig} \
+}}}}"
+
+#define ASM_SPEC64 "-a64"
+
+#define ASM_SPEC_COMMON "%(asm_cpu) \
+%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+%{mlittle} %{mlittle-endian} %{mbig} %{mbig-endian}"
+
+#undef	SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS \
+  { "asm_spec_common",		ASM_SPEC_COMMON },			\
+  { "asm_spec32",		ASM_SPEC32 },				\
+  { "asm_spec64",		ASM_SPEC64 },				\
+  { "link_os_linux_spec32",	LINK_OS_LINUX_SPEC32 },			\
+  { "link_os_linux_spec64",	LINK_OS_LINUX_SPEC64 },
+
+#undef	MULTILIB_DEFAULTS
+#if DEFAULT_ARCH64_P
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+#ifndef RS6000_BI_ARCH
+
+/* 64-bit PowerPC Linux is always big-endian.  */
+#undef	TARGET_LITTLE_ENDIAN
+#define TARGET_LITTLE_ENDIAN	0
+
+/* 64-bit PowerPC Linux always has a TOC.  */
+#undef  TARGET_TOC
+#define	TARGET_TOC		1
+
+/* Some things from sysv4.h we don't do when 64 bit.  */
+#undef	TARGET_RELOCATABLE
+#define	TARGET_RELOCATABLE	0
+#undef	TARGET_EABI
+#define	TARGET_EABI		0
+#undef	TARGET_PROTOTYPE
+#define	TARGET_PROTOTYPE	0
+#undef RELOCATABLE_NEEDS_FIXUP
+#define RELOCATABLE_NEEDS_FIXUP 0
+
+#endif
+
+/* We use glibc _mcount for profiling.  */
+#define NO_PROFILE_COUNTERS 1
+#define PROFILE_HOOK(LABEL) \
+  do { if (TARGET_64BIT) output_profile_hook (LABEL); } while (0)
+
+/* PowerPC64 Linux word-aligns FP doubles when -malign-power is given.  */
+#undef  ADJUST_FIELD_ALIGN
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+  ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE)	\
+   ? 128								\
+   : (TARGET_64BIT							\
+      && TARGET_ALIGN_NATURAL == 0					\
+      && TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode)	\
+   ? MIN ((COMPUTED), 32)						\
+   : (COMPUTED))
+
+/* PowerPC64 Linux increases natural record alignment to doubleword if
+   the first field is an FP double, only if in power alignment mode.  */
+#undef  ROUND_TYPE_ALIGN
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)			\
+  ((TARGET_64BIT							\
+    && (TREE_CODE (STRUCT) == RECORD_TYPE				\
+	|| TREE_CODE (STRUCT) == UNION_TYPE				\
+	|| TREE_CODE (STRUCT) == QUAL_UNION_TYPE)			\
+    && TARGET_ALIGN_NATURAL == 0)					\
+   ? rs6000_special_round_type_align (STRUCT, COMPUTED, SPECIFIED)	\
+   : MAX ((COMPUTED), (SPECIFIED)))
+
+/* Use the default for compiling target libs.  */
+#ifdef IN_TARGET_LIBS
+#undef TARGET_ALIGN_NATURAL
+#define TARGET_ALIGN_NATURAL 1
+#endif
+
+/* Indicate that jump tables go in the text section.  */
+#undef  JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION TARGET_64BIT
+
+/* The linux ppc64 ABI isn't explicit on whether aggregates smaller
+   than a doubleword should be padded upward or downward.  You could
+   reasonably assume that they follow the normal rules for structure
+   layout treating the parameter area as any other block of memory,
+   then map the reg param area to registers.  i.e. pad upward.
+   Setting both of the following defines results in this behavior.
+   Setting just the first one will result in aggregates that fit in a
+   doubleword being padded downward, and others being padded upward.
+   Not a bad idea as this results in struct { int x; } being passed
+   the same way as an int.  */
+#define AGGREGATE_PADDING_FIXED TARGET_64BIT
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+
+/* Specify padding for the last element of a block move between
+   registers and memory.  FIRST is nonzero if this is the only
+   element.  */
+#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
+  (!(FIRST) ? upward : FUNCTION_ARG_PADDING (MODE, TYPE))
+
+/* Linux doesn't support saving and restoring 64-bit regs in a 32-bit
+   process.  */
+#define OS_MISSING_POWERPC64 !TARGET_64BIT
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC  (DEFAULT_LIBC == LIBC_GLIBC)
+#else
+#define OPTION_GLIBC  (linux_libc == LIBC_GLIBC)
+#endif
+
+/* glibc has float and long double forms of math functions.  */
+#undef  TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS (OPTION_GLIBC)
+
+/* Whether we have sincos that follows the GNU extension.  */
+#undef  TARGET_HAS_SINCOS
+#define TARGET_HAS_SINCOS (OPTION_GLIBC)
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      if (TARGET_64BIT)					\
+	{						\
+	  builtin_define ("__PPC__");			\
+	  builtin_define ("__PPC64__");			\
+	  builtin_define ("__powerpc__");		\
+	  builtin_define ("__powerpc64__");		\
+	  builtin_assert ("cpu=powerpc64");		\
+	  builtin_assert ("machine=powerpc64");		\
+	}						\
+      else						\
+	{						\
+	  builtin_define_std ("PPC");			\
+	  builtin_define_std ("powerpc");		\
+	  builtin_assert ("cpu=powerpc");		\
+	  builtin_assert ("machine=powerpc");		\
+	  TARGET_OS_SYSV_CPP_BUILTINS ();		\
+	}						\
+    }							\
+  while (0)
+
+#undef  CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_linux)"
+
+/* The GNU C++ standard library currently requires _GNU_SOURCE being
+   defined on glibc-based systems. This temporary hack accomplishes this,
+   it should go away as soon as libstdc++-v3 has a real fix.  */
+#undef  CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
+#undef  LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "%(lib_linux)"
+
+#undef  STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "%(startfile_linux)"
+
+#undef	ENDFILE_DEFAULT_SPEC
+#define ENDFILE_DEFAULT_SPEC "%(endfile_linux)"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "%(link_start_linux)"
+
+#undef	LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld64.so.1"
+#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define LINUX_DYNAMIC_LINKER32 \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER32, UCLIBC_DYNAMIC_LINKER32)
+#define LINUX_DYNAMIC_LINKER64 \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER64, UCLIBC_DYNAMIC_LINKER64)
+
+
+#define LINK_OS_LINUX_SPEC32 "-m elf32ppclinux %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " LINUX_DYNAMIC_LINKER32 "}}"
+
+#define LINK_OS_LINUX_SPEC64 "-m elf64ppc %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " LINUX_DYNAMIC_LINKER64 "}}"
+
+#undef  TOC_SECTION_ASM_OP
+#define TOC_SECTION_ASM_OP \
+  (TARGET_64BIT						\
+   ? "\t.section\t\".toc\",\"aw\""			\
+   : "\t.section\t\".got\",\"aw\"")
+
+#undef  MINIMAL_TOC_SECTION_ASM_OP
+#define MINIMAL_TOC_SECTION_ASM_OP \
+  (TARGET_64BIT						\
+   ? "\t.section\t\".toc1\",\"aw\""			\
+   : ((TARGET_RELOCATABLE || flag_pic)			\
+      ? "\t.section\t\".got2\",\"aw\""			\
+      : "\t.section\t\".got1\",\"aw\""))
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC64 GNU/Linux)");
+
+/* Must be at least as big as our pointer type.  */
+#undef	SIZE_TYPE
+#define	SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+#undef	PTRDIFF_TYPE
+#define	PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef	WCHAR_TYPE
+#define	WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override rs6000.h definition.  */
+#undef  ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* PowerPC no-op instruction.  */
+#undef  RS6000_CALL_GLUE
+#define RS6000_CALL_GLUE (TARGET_64BIT ? "nop" : "cror 31,31,31")
+
+#undef  RS6000_MCOUNT
+#define RS6000_MCOUNT "_mcount"
+
+#ifdef __powerpc64__
+/* _init and _fini functions are built from bits spread across many
+   object files, each potentially with a different TOC pointer.  For
+   that reason, place a nop after the call so that the linker can
+   restore the TOC pointer if a TOC adjusting call stub is needed.  */
+#if DOT_SYMBOLS
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl ." #FUNC "\n"				\
+"	nop\n"						\
+"	.previous");
+#else
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl " #FUNC "\n"					\
+"	nop\n"						\
+"	.previous");
+#endif
+#endif
+
+/* FP save and restore routines.  */
+#undef  SAVE_FP_PREFIX
+#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_")
+#undef  SAVE_FP_SUFFIX
+#define SAVE_FP_SUFFIX ""
+#undef  RESTORE_FP_PREFIX
+#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_")
+#undef  RESTORE_FP_SUFFIX
+#define RESTORE_FP_SUFFIX ""
+
+/* Dwarf2 debugging.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* This is how to declare the size of a function.  */
+#undef	ASM_DECLARE_FUNCTION_SIZE
+#define	ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do									\
+    {									\
+      if (!flag_inhibit_size_directive)					\
+	{								\
+	  fputs ("\t.size\t", (FILE));					\
+	  if (TARGET_64BIT && DOT_SYMBOLS)				\
+	    putc ('.', (FILE));						\
+	  assemble_name ((FILE), (FNAME));				\
+	  fputs (",.-", (FILE));					\
+	  rs6000_output_function_entry (FILE, FNAME);			\
+	  putc ('\n', (FILE));						\
+	}								\
+    }									\
+  while (0)
+
+/* Return nonzero if this entry is to be written into the constant
+   pool in a special way.  We do so if this is a SYMBOL_REF, LABEL_REF
+   or a CONST containing one of them.  If -mfp-in-toc (the default),
+   we also do this for floating-point constants.  We actually can only
+   do this if the FP formats of the target and host machines are the
+   same, but we can't check that since not every file that uses
+   the macros includes real.h.  We also do this when we can write the
+   entry into the TOC and the entry is not larger than a TOC entry.  */
+
+#undef  ASM_OUTPUT_SPECIAL_POOL_ENTRY_P
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (GET_CODE (X) == CONST_DOUBLE					\
+	   && ((TARGET_64BIT						\
+		&& (TARGET_MINIMAL_TOC					\
+		    || (SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
+			&& ! TARGET_NO_FP_IN_TOC)))			\
+	       || (!TARGET_64BIT					\
+		   && !TARGET_NO_FP_IN_TOC				\
+		   && !TARGET_RELOCATABLE				\
+		   && SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
+		   && BITS_PER_WORD == HOST_BITS_PER_INT)))))
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#undef	ASM_PREFERRED_EH_DATA_FORMAT
+#define	ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  ((TARGET_64BIT || flag_pic || TARGET_RELOCATABLE)			\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel		\
+      | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4))		\
+   : DW_EH_PE_absptr)
+
+/* For backward compatibility, we must continue to use the AIX
+   structure return convention.  */
+#undef DRAFT_V4_STRUCT_RET
+#define DRAFT_V4_STRUCT_RET (!TARGET_64BIT)
+
+#define TARGET_POSIX_IO
+
+#define LINK_GCC_C_SEQUENCE_SPEC \
+  "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#define MD_UNWIND_SUPPORT "config/rs6000/linux-unwind.h"
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* ppc32 glibc provides __stack_chk_guard in -0x7008(2),
+   ppc64 glibc provides it at -0x7010(13).  */
+#define TARGET_THREAD_SSP_OFFSET	(TARGET_64BIT ? -0x7010 : -0x7008)
+#endif
+
+#define POWERPC_LINUX
+
+/* ppc{32,64} linux has 128-bit long double support in glibc 2.4 and later.  */
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 128
+#endif
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+/* The default value isn't sufficient in 64-bit mode.  */
+#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
diff --git a/gcc/config/rs6000/linux64.opt b/gcc/config/rs6000/linux64.opt
new file mode 100644
index 000000000..28177f1ed
--- /dev/null
+++ b/gcc/config/rs6000/linux64.opt
@@ -0,0 +1,28 @@
+; Options for 64-bit PowerPC Linux.
+;
+; Copyright (C) 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mprofile-kernel
+Target Report Var(profile_kernel) Save
+Call mcount for profiling before a function prologue
+
+mcmodel=
+Target RejectNegative Joined
+Select code model
diff --git a/gcc/config/rs6000/linuxaltivec.h b/gcc/config/rs6000/linuxaltivec.h
new file mode 100644
index 000000000..a6e1523ea
--- /dev/null
+++ b/gcc/config/rs6000/linuxaltivec.h
@@ -0,0 +1,30 @@
+/* Definitions of target machine for GNU compiler,
+   for AltiVec enhanced PowerPC machines running GNU/Linux.
+   Copyright (C) 2001, 2003, 2007 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC AltiVec GNU/Linux)");
+
+/* Override rs6000.h and sysv4.h definition.  */
+#undef	TARGET_DEFAULT
+#define	TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_ALTIVEC)
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS rs6000_altivec_abi = 1
diff --git a/gcc/config/rs6000/linuxspe.h b/gcc/config/rs6000/linuxspe.h
new file mode 100644
index 000000000..3cef9d9dc
--- /dev/null
+++ b/gcc/config/rs6000/linuxspe.h
@@ -0,0 +1,44 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC e500 machines running GNU/Linux.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
+   Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldy@quesejoda.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC E500 GNU/Linux)");
+
+/* Override rs6000.h and sysv4.h definition.  */
+#undef	TARGET_DEFAULT
+#define	TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_STRICT_ALIGN)
+
+#undef  SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS \
+  if (rs6000_select[1].string == NULL) \
+    rs6000_cpu = PROCESSOR_PPC8540; \
+  if (!rs6000_explicit_options.spe_abi) \
+    rs6000_spe_abi = 1; \
+  if (!rs6000_explicit_options.float_gprs) \
+    rs6000_float_gprs = 1; \
+  if (!rs6000_explicit_options.spe) \
+    rs6000_spe = 1; \
+  if (target_flags & MASK_64BIT) \
+    error ("-m64 not supported in this configuration")
+
+#undef  ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc -mspe -me500"
diff --git a/gcc/config/rs6000/lynx.h b/gcc/config/rs6000/lynx.h
new file mode 100644
index 000000000..1e923ea38
--- /dev/null
+++ b/gcc/config/rs6000/lynx.h
@@ -0,0 +1,125 @@
+/* Definitions for Rs6000 running LynxOS.
+   Copyright (C) 1995, 1996, 2000, 2002, 2003, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by David Henkel-Wallace, Cygnus Support (gumby@cygnus.com)
+   Rewritten by Adam Nemet, LynuxWorks Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Override the definition in sysv4.h.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (PowerPC/LynxOS)", stderr);
+
+/* Undefine the definition to enable the LynxOS default from the
+   top-level lynx.h.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+
+/* Get rid off the spec definitions from rs6000/sysv4.h.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC \
+"%{msoft-float: -D_SOFT_FLOAT} \
+ %(cpp_cpu) \
+ %(cpp_os_lynx)"
+
+/* LynxOS only supports big-endian on PPC so we override the
+   definition from sysv4.h.  Since the LynxOS 4.0 compiler was set to
+   return every structure in memory regardless of their size we have
+   to emulate the same behavior here with disabling the SVR4 structure
+   returning.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC \
+"%{G*} %{mno-sdata:-msdata=none} \
+ %{maltivec:-mabi=altivec} \
+ -maix-struct-return"
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+"%(asm_cpu) \
+ %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}"
+
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#undef LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC \
+"%{!msdata=none:%{G*}} %{msdata=none:-G0} \
+ %(link_os_lynx)"
+
+/* Override the definition from sysv4.h.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__BIG_ENDIAN__");	\
+      builtin_define ("__powerpc__");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+      builtin_define ("__PPC__");		\
+    }						\
+  while (0)
+
+/* Override the rs6000.h definition.  */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+/* Override the rs6000.h definition.  */
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* LynxOS does not do anything with .fixup plus let's not create
+   writable section for linkonce.r and linkonce.t.  */
+
+#undef RELOCATABLE_NEEDS_FIXUP
+
+/* Override these from rs6000.h with the generic definition.  */
+
+#undef SIZE_TYPE
+#undef ASM_OUTPUT_ALIGN
+#undef PREFERRED_DEBUGGING_TYPE
+
+/* The file rs6000.c defines TARGET_HAVE_TLS unconditionally to the
+   value of HAVE_AS_TLS.  HAVE_AS_TLS is true as gas support for TLS
+   is detected by configure.  Override the definition to false.  */
+
+#undef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
+
+#ifdef CRT_BEGIN
+/* This function is part of crtbegin*.o which is at the beginning of
+   the link and is called from .fini which is usually toward the end
+   of the executable.  Make it longcall so that we don't limit the
+   text size of the executables to 32M.  */
+
+static void __do_global_dtors_aux (void) __attribute__ ((longcall));
+#endif	/* CRT_BEGIN */
+
+#ifdef CRT_END
+/* Similarly here.  This function resides in crtend*.o which is toward
+   to end of the link and is called from .init which is at the
+   beginning.  */
+
+static void __do_global_ctors_aux (void) __attribute__ ((longcall));
+#endif	/* CRT_END */
diff --git a/gcc/config/rs6000/milli.exp b/gcc/config/rs6000/milli.exp
new file mode 100644
index 000000000..ea3a2b757
--- /dev/null
+++ b/gcc/config/rs6000/milli.exp
@@ -0,0 +1,7 @@
+#!
+__mulh          0x3100
+__mull          0x3180
+__divss         0x3200
+__divus         0x3280
+__quoss         0x3300
+__quous         0x3380
diff --git a/gcc/config/rs6000/mpc.md b/gcc/config/rs6000/mpc.md
new file mode 100644
index 000000000..415c68872
--- /dev/null
+++ b/gcc/config/rs6000/mpc.md
@@ -0,0 +1,111 @@
+;; Scheduling description for Motorola PowerPC processor cores.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "mpc,mpcfp")
+(define_cpu_unit "iu_mpc,mciu_mpc" "mpc")
+(define_cpu_unit "fpu_mpc" "mpcfp")
+(define_cpu_unit "lsu_mpc,bpu_mpc" "mpc")
+
+;; MPCCORE 32-bit SCIU, MCIU, LSU, FPU, BPU
+;; 505/801/821/823
+
+(define_insn_reservation "mpccore-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "mpccore"))
+  "lsu_mpc")
+
+(define_insn_reservation "mpccore-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "mpccore"))
+  "lsu_mpc")
+
+(define_insn_reservation "mpccore-fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "mpccore"))
+  "lsu_mpc")
+
+(define_insn_reservation "mpccore-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc")
+
+(define_insn_reservation "mpccore-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc,iu_mpc")
+
+(define_insn_reservation "mpccore-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc,iu_mpc,iu_mpc")
+
+(define_insn_reservation "mpccore-imul" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "mpccore"))
+  "mciu_mpc")
+
+; Divide latency varies greatly from 2-11, use 6 as average
+(define_insn_reservation "mpccore-idiv" 6
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "mpccore"))
+  "mciu_mpc*6")
+
+(define_insn_reservation "mpccore-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "mpccore"))
+  "iu_mpc,nothing,bpu_mpc")
+
+(define_insn_reservation "mpccore-fpcompare" 2
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc,bpu_mpc")
+
+(define_insn_reservation "mpccore-fp" 4
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*2")
+
+(define_insn_reservation "mpccore-dmul" 5
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*5")
+
+(define_insn_reservation "mpccore-sdiv" 10
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*10")
+
+(define_insn_reservation "mpccore-ddiv" 17
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "mpccore"))
+  "fpu_mpc*17")
+
+(define_insn_reservation "mpccore-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "mpccore"))
+  "bpu_mpc")
+
+(define_insn_reservation "mpccore-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr,mfcr,mtcr,isync")
+       (eq_attr "cpu" "mpccore"))
+  "bpu_mpc")
+
diff --git a/gcc/config/rs6000/netbsd.h b/gcc/config/rs6000/netbsd.h
new file mode 100644
index 000000000..de16b3722
--- /dev/null
+++ b/gcc/config/rs6000/netbsd.h
@@ -0,0 +1,93 @@
+/* Definitions of target machine for GNU compiler,
+   for PowerPC NetBSD systems.
+   Copyright 2002, 2003, 2007, 2008, 2010 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_OS_CPP_BUILTINS	/* FIXME: sysv4.h should not define this! */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+      builtin_define ("__powerpc__");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+    }						\
+  while (0)
+
+/* Override the default from rs6000.h to avoid conflicts with macros
+   defined in NetBSD header files.  */
+
+#undef  RS6000_CPU_CPP_ENDIAN_BUILTINS
+#define RS6000_CPU_CPP_ENDIAN_BUILTINS()	\
+  do						\
+    {						\
+      if (BYTES_BIG_ENDIAN)			\
+	{					\
+	  builtin_define ("__BIG_ENDIAN__");	\
+	  builtin_assert ("machine=bigendian");	\
+	}					\
+      else					\
+	{					\
+	  builtin_define ("__LITTLE_ENDIAN__");	\
+	  builtin_assert ("machine=littleendian"); \
+	}					\
+    }						\
+  while (0)
+
+/* Make GCC agree with <machine/ansi.h>.  */
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Undo the spec mess from sysv4.h, and just define the specs
+   the way NetBSD systems actually expect.  */
+
+#undef  CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{!msdata=none:%{G*}} %{msdata=none:-G0} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "_start"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC NETBSD_STARTFILE_SPEC
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "%(netbsd_endfile_spec)"
+
+#undef  LIB_SPEC
+#define LIB_SPEC NETBSD_LIB_SPEC
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS					\
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF },		\
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },		\
+  { "netbsd_endfile_spec",	NETBSD_ENDFILE_SPEC },
+
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/powerpc ELF)");
+
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
diff --git a/gcc/config/rs6000/option-defaults.h b/gcc/config/rs6000/option-defaults.h
new file mode 100644
index 000000000..0ecbe75c0
--- /dev/null
+++ b/gcc/config/rs6000/option-defaults.h
@@ -0,0 +1,64 @@
+/* Definitions of default options for config/rs6000 configurations.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This header needs to be included after any other headers affecting
+   TARGET_DEFAULT.  */
+
+#if TARGET_AIX_OS
+#define OPT_64 "maix64"
+#define OPT_32 "maix32"
+#else
+#define OPT_64 "m64"
+#define OPT_32 "m32"
+#endif
+
+#ifndef MASK_64BIT
+#define MASK_64BIT 0
+#endif
+
+#if TARGET_DEFAULT & MASK_64BIT
+#define OPT_ARCH64 "!"OPT_32
+#define OPT_ARCH32 OPT_32
+#else
+#define OPT_ARCH64 OPT_64
+#define OPT_ARCH32 "!"OPT_64
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified; likewise --with-cpu-32
+     and --with-cpu-64.
+   --with-tune is ignored if -mtune or -mcpu is specified; likewise
+     --with-tune-32 and --with-tune-64.
+   --with-float is ignored if -mhard-float or -msoft-float are
+     specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
+  {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
+  {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
+  {"cpu_32", "%{" OPT_ARCH32 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"cpu_64", "%{" OPT_ARCH64 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" }
diff --git a/gcc/config/rs6000/paired.h b/gcc/config/rs6000/paired.h
new file mode 100644
index 000000000..57c6ca4fd
--- /dev/null
+++ b/gcc/config/rs6000/paired.h
@@ -0,0 +1,75 @@
+/* PowerPC 750CL user include file.
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc. 
+   Contributed by Revital Eres (eres@il.ibm.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _PAIRED_H
+#define _PAIRED_H
+
+#define vector __attribute__((vector_size(8)))
+
+#define paired_msub __builtin_paired_msub
+#define paired_madd __builtin_paired_madd
+#define paired_nmsub __builtin_paired_nmsub
+#define paired_nmadd __builtin_paired_nmadd
+#define paired_sum0 __builtin_paired_sum0
+#define paired_sum1 __builtin_paired_sum1
+#define paired_div __builtin_paired_divv2sf3
+#define paired_add __builtin_paired_addv2sf3
+#define paired_sub __builtin_paired_subv2sf3
+#define paired_mul __builtin_paired_mulv2sf3
+#define paired_muls0 __builtin_paired_muls0
+#define paired_muls1 __builtin_paired_muls1
+#define paired_madds0 __builtin_paired_madds0
+#define paired_madds1 __builtin_paired_madds1
+#define paired_merge00 __builtin_paired_merge00
+#define paired_merge01 __builtin_paired_merge01
+#define paired_merge10 __builtin_paired_merge10
+#define paired_merge11 __builtin_paired_merge11
+#define paired_abs __builtin_paired_absv2sf2
+#define paired_nabs __builtin_paired_nabsv2sf2
+#define paired_neg __builtin_paired_negv2sf2
+#define paired_sqrt __builtin_paired_sqrtv2sf2
+#define paired_res __builtin_paired_resv2sf2
+#define paired_stx __builtin_paired_stx
+#define paired_lx __builtin_paired_lx
+#define paired_cmpu0 __builtin_paired_cmpu0
+#define paired_cmpu1 __builtin_paired_cmpu1
+#define paired_sel __builtin_paired_selv2sf4
+
+/* Condition register codes for Paired predicates. */
+#define LT            0
+#define GT            1
+#define EQ            2
+#define UN            3
+
+#define paired_cmpu0_un(a,b) __builtin_paired_cmpu0 (UN, (a), (b))
+#define paired_cmpu0_eq(a,b) __builtin_paired_cmpu0 (EQ, (a), (b))
+#define paired_cmpu0_lt(a,b) __builtin_paired_cmpu0 (LT, (a), (b))
+#define paired_cmpu0_gt(a,b) __builtin_paired_cmpu0 (GT, (a), (b))
+#define paired_cmpu1_un(a,b) __builtin_paired_cmpu1 (UN, (a), (b))
+#define paired_cmpu1_eq(a,b) __builtin_paired_cmpu1 (EQ, (a), (b))
+#define paired_cmpu1_lt(a,b) __builtin_paired_cmpu1 (LT, (a), (b))
+#define paired_cmpu1_gt(a,b) __builtin_paired_cmpu1 (GT, (a), (b))
+
+#endif /* _PAIRED_H */
diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md
new file mode 100644
index 000000000..0533f0097
--- /dev/null
+++ b/gcc/config/rs6000/paired.md
@@ -0,0 +1,527 @@
+;; PowerPC paired single and double hummer description
+;; Copyright (C) 2007, 2009, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by David Edelsohn <edelsohn@gnu.org> and Revital Eres
+;; <eres@il.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;; 
+;; You should have received a copy of the GNU General Public License
+;; along with this program; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+[(UNSPEC_INTERHI_V2SF     330)
+ (UNSPEC_INTERLO_V2SF     331)
+ (UNSPEC_EXTEVEN_V2SF     332)
+ (UNSPEC_EXTODD_V2SF      333)
+])
+
+(define_insn "paired_negv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_neg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "sqrtv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(sqrt:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_rsqrte %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_absv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_abs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "nabsv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f"))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_nabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_addv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_add %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_subv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                    (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_sub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_mulv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "%f")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_mul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "resv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+  "TARGET_PAIRED_FLOAT && flag_finite_math_only"
+  "ps_res %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_divv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_div %0,%1,%2"
+  [(set_attr "type" "sdiv")])
+
+(define_insn "paired_madds0"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+       (vec_concat:V2SF
+	 (fma:SF
+           (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+			  (parallel [(const_int 0)]))
+	   (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)]))
+	   (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)])))
+	 (fma:SF
+	   (vec_select:SF (match_dup 1)
+                          (parallel [(const_int 1)]))
+	   (vec_select:SF (match_dup 2)
+                          (parallel [(const_int 0)]))
+	   (vec_select:SF (match_dup 3)
+                          (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_madds0 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_madds1"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+       (vec_concat:V2SF
+         (fma:SF
+	   (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)]))
+           (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+                          (parallel [(const_int 1)]))
+           (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+                          (parallel [(const_int 0)])))
+	 (fma:SF
+	   (vec_select:SF (match_dup 1)
+                          (parallel [(const_int 1)]))
+           (vec_select:SF (match_dup 2)
+                          (parallel [(const_int 1)]))
+           (vec_select:SF (match_dup 3)
+                          (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_madds1 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*paired_madd"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	  (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	  (match_operand:V2SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_madd %0,%1,%2,%3"
+  [(set_attr "type" "fp")]) 
+
+(define_insn "*paired_msub"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(fma:V2SF
+	  (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	  (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	  (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_msub %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*paired_nmadd"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF
+	  (fma:V2SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_nmadd %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*paired_nmsub"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(neg:V2SF
+	  (fma:V2SF
+	    (match_operand:V2SF 1 "gpc_reg_operand" "f")
+	    (match_operand:V2SF 2 "gpc_reg_operand" "f")
+	    (neg:V2SF (match_operand:V2SF 3 "gpc_reg_operand" "f")))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_nmsub %0,%1,%2,%3"
+  [(set_attr "type" "dmul")])
+
+(define_insn "selv2sf4"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (if_then_else:SF (ge (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+					     (parallel [(const_int 0)]))
+			      (match_operand:SF 4 "zero_fp_constant" "F"))
+			  (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+					 (parallel [(const_int 0)]))
+			  (vec_select:SF (match_operand:V2SF 3 "gpc_reg_operand" "f")
+					 (parallel [(const_int 0)])))
+	 (if_then_else:SF (ge (vec_select:SF (match_dup 1)
+					     (parallel [(const_int 1)]))
+			      (match_dup 4))
+			  (vec_select:SF (match_dup 2)
+					 (parallel [(const_int 1)]))
+			  (vec_select:SF (match_dup 3)
+					 (parallel [(const_int 1)])))))]
+
+  "TARGET_PAIRED_FLOAT"
+  "ps_sel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*movv2sf_paired"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=Z,f,f,o,r,r,f")
+		 (match_operand:V2SF 1 "input_operand" "f,Z,f,r,o,r,W"))]
+  "TARGET_PAIRED_FLOAT
+   && (register_operand (operands[0], V2SFmode) 
+       || register_operand (operands[1], V2SFmode))"
+{
+  switch (which_alternative)
+    {
+    case 0: return "psq_stx %1,%y0,0,0";
+    case 1: return "psq_lx %0,%y1,0,0";
+    case 2: return "ps_mr %0,%1";
+    case 3: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#"; 
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fpstore,fpload,fp,*,*,*,*")])
+
+(define_insn "paired_stx"
+  [(set (match_operand:V2SF 0 "memory_operand" "=Z")
+        (match_operand:V2SF 1 "gpc_reg_operand" "f"))]
+  "TARGET_PAIRED_FLOAT"
+  "psq_stx %1,%y0,0,0"
+  [(set_attr "type" "fpstore")])
+
+(define_insn "paired_lx"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (match_operand:V2SF 1 "memory_operand" "Z"))]
+  "TARGET_PAIRED_FLOAT"
+  "psq_lx %0,%y1,0,0"
+  [(set_attr "type" "fpload")])
+
+
+(define_split
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+        (match_operand:V2SF 1 "input_operand" ""))]
+  "TARGET_PAIRED_FLOAT && reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(pc)]
+  {
+  rs6000_split_multireg_move (operands[0], operands[1]); DONE;
+  })
+
+(define_insn "paired_cmpu0"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (vec_select:SF
+		       (match_operand:V2SF 1 "gpc_reg_operand" "f")
+		       (parallel [(const_int 0)]))
+		      (vec_select:SF
+		       (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		       (parallel [(const_int 0)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_cmpu0 %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "paired_cmpu1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (vec_select:SF
+		       (match_operand:V2SF 1 "gpc_reg_operand" "f")
+		       (parallel [(const_int 1)]))
+		      (vec_select:SF
+		       (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		       (parallel [(const_int 1)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_cmpu1 %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "paired_merge00"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+			(parallel [(const_int 0)]))
+	 (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+			(parallel [(const_int 0)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge00 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_merge01"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+			(parallel [(const_int 0)]))
+	 (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+			(parallel [(const_int 1)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge01 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_merge10"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+			(parallel [(const_int 1)]))
+	 (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+			(parallel [(const_int 0)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge10 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_merge11"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+			(parallel [(const_int 1)]))
+	 (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+			(parallel [(const_int 1)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge11 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_sum0"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF (plus:SF (vec_select:SF
+				   (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 0)]))
+				  (vec_select:SF
+				   (match_operand:V2SF 2 "gpc_reg_operand" "f")
+				   (parallel [(const_int 1)])))
+			 (vec_select:SF
+			  (match_operand:V2SF 3 "gpc_reg_operand" "f")
+			  (parallel [(const_int 1)]))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_sum0 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_sum1"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(vec_concat:V2SF (vec_select:SF
+			  (match_operand:V2SF 2 "gpc_reg_operand" "f")
+			  (parallel [(const_int 1)]))
+			 (plus:SF (vec_select:SF
+				   (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 0)]))
+				  (vec_select:SF
+				   (match_operand:V2SF 3 "gpc_reg_operand" "f")
+				   (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_sum1 %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "paired_muls0"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		   (vec_duplicate:V2SF
+		    (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 0)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_muls0 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+
+(define_insn "paired_muls1"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+	(mult:V2SF (match_operand:V2SF 2 "gpc_reg_operand" "f")
+		   (vec_duplicate:V2SF
+		    (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+				   (parallel [(const_int 1)])))))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_muls1 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_expand "vec_initv2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+   (match_operand 1 "" "")]
+  "TARGET_PAIRED_FLOAT"
+{
+  paired_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "*vconcatsf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (vec_concat:V2SF
+         (match_operand:SF 1 "gpc_reg_operand" "f")
+         (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+  "ps_merge00 %0, %1, %2"
+  [(set_attr "type" "fp")])
+
+(define_expand "sminv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (smin:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_subv2sf3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[2], operands[1], CONST0_RTX (SFmode)));
+  DONE;
+})
+
+(define_expand "smaxv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (smax:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "f")
+                   (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_subv2sf3 (tmp, operands[1], operands[2]));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[1], operands[2], CONST0_RTX (SFmode)));
+  DONE;
+})
+
+(define_expand "reduc_smax_v2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+   (match_operand:V2SF 1 "gpc_reg_operand" "f")]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp_swap = gen_reg_rtx (V2SFmode);
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1]));
+  emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[1], tmp_swap, CONST0_RTX (SFmode)));
+
+  DONE;
+})
+
+(define_expand "reduc_smin_v2sf"
+  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+   (match_operand:V2SF 1 "gpc_reg_operand" "f")]
+  "TARGET_PAIRED_FLOAT"
+{
+  rtx tmp_swap = gen_reg_rtx (V2SFmode);
+  rtx tmp = gen_reg_rtx (V2SFmode);
+
+  emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1]));
+  emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap));
+  emit_insn (gen_selv2sf4 (operands[0], tmp, tmp_swap, operands[1], CONST0_RTX (SFmode)));
+
+  DONE;
+})
+
+(define_expand "vec_interleave_highv2sf"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")
+                      (match_operand:V2SF 2 "gpc_reg_operand" "f")]
+                      UNSPEC_INTERHI_V2SF))]
+  "TARGET_PAIRED_FLOAT"
+  "
+{
+  emit_insn (gen_paired_merge00 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_interleave_lowv2sf"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")
+                      (match_operand:V2SF 2 "gpc_reg_operand" "f")]
+                      UNSPEC_INTERLO_V2SF))]
+  "TARGET_PAIRED_FLOAT"
+  "
+{
+  emit_insn (gen_paired_merge11 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_extract_evenv2sf"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")
+                      (match_operand:V2SF 2 "gpc_reg_operand" "f")]
+                      UNSPEC_EXTEVEN_V2SF))]
+  "TARGET_PAIRED_FLOAT"
+  "
+{
+  emit_insn (gen_paired_merge00 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "vec_extract_oddv2sf"
+ [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f")
+                      (match_operand:V2SF 2 "gpc_reg_operand" "f")]
+                      UNSPEC_EXTODD_V2SF))]
+  "TARGET_PAIRED_FLOAT"
+  "
+{
+  emit_insn (gen_paired_merge11 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+
+(define_expand "reduc_splus_v2sf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (match_operand:V2SF 1 "gpc_reg_operand" "f"))]
+  "TARGET_PAIRED_FLOAT"
+  "
+{
+  emit_insn (gen_paired_sum1 (operands[0], operands[1], operands[1], operands[1]));
+  DONE;
+}")
+
+(define_expand "movmisalignv2sf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (match_operand:V2SF 1 "gpc_reg_operand" "f"))]
+  "TARGET_PAIRED_FLOAT"
+{
+  paired_expand_vector_move (operands);
+  DONE;
+})
+
+(define_expand "vcondv2sf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+        (if_then_else:V2SF
+         (match_operator 3 "gpc_reg_operand"
+                         [(match_operand:V2SF 4 "gpc_reg_operand" "f")
+                          (match_operand:V2SF 5 "gpc_reg_operand" "f")])
+         (match_operand:V2SF 1 "gpc_reg_operand" "f")
+         (match_operand:V2SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_PAIRED_FLOAT && flag_unsafe_math_optimizations"
+  "
+{
+        if (paired_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+                                          operands[3], operands[4], operands[5]))
+        DONE;
+        else
+        FAIL;
+}")
+
diff --git a/gcc/config/rs6000/power4.md b/gcc/config/rs6000/power4.md
new file mode 100644
index 000000000..60dbffd58
--- /dev/null
+++ b/gcc/config/rs6000/power4.md
@@ -0,0 +1,410 @@
+;; Scheduling description for IBM Power4 and PowerPC 970 processors.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: IBM Red Book and White Paper on POWER4
+
+;; The POWER4 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip).
+;; Instructions that update more than one register get broken into two
+;; (split) or more internal ops.  The chip can issue up to 5
+;; internal ops per cycle.
+
+(define_automaton "power4iu,power4fpu,power4vec,power4misc")
+
+(define_cpu_unit "iu1_power4,iu2_power4" "power4iu")
+(define_cpu_unit "lsu1_power4,lsu2_power4" "power4misc")
+(define_cpu_unit "fpu1_power4,fpu2_power4" "power4fpu")
+(define_cpu_unit "bpu_power4,cru_power4" "power4misc")
+(define_cpu_unit "vec_power4,vecperm_power4" "power4vec")
+(define_cpu_unit "du1_power4,du2_power4,du3_power4,du4_power4,du5_power4"
+		 "power4misc")
+
+(define_reservation "lsq_power4"
+		    "(du1_power4,lsu1_power4)\
+		    |(du2_power4,lsu2_power4)\
+		    |(du3_power4,lsu2_power4)\
+		    |(du4_power4,lsu1_power4)")
+
+(define_reservation "lsuq_power4"
+		    "((du1_power4+du2_power4,lsu1_power4)\
+		      |(du2_power4+du3_power4,lsu2_power4)\
+		      |(du3_power4+du4_power4,lsu2_power4))\
+                     +(nothing,iu2_power4|nothing,iu1_power4)")
+
+(define_reservation "iq_power4"
+		    "(du1_power4|du2_power4|du3_power4|du4_power4),\
+                     (iu1_power4|iu2_power4)")
+
+(define_reservation "fpq_power4"
+		    "(du1_power4|du2_power4|du3_power4|du4_power4),\
+                     (fpu1_power4|fpu2_power4)")
+
+(define_reservation "vq_power4"
+		    "(du1_power4,vec_power4)\
+		    |(du2_power4,vec_power4)\
+		    |(du3_power4,vec_power4)\
+		    |(du4_power4,vec_power4)")
+
+(define_reservation "vpq_power4"
+		    "(du1_power4,vecperm_power4)\
+		    |(du2_power4,vecperm_power4)\
+		    |(du3_power4,vecperm_power4)\
+		    |(du4_power4,vecperm_power4)")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power4" "du2_power4,du3_power4,du4_power4,du5_power4")
+(absence_set "du2_power4" "du3_power4,du4_power4,du5_power4")
+(absence_set "du3_power4" "du4_power4,du5_power4")
+(absence_set "du4_power4" "du5_power4")
+
+
+; Load/store
+(define_insn_reservation "power4-load" 4 ; 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power4"))
+  "lsq_power4")
+
+(define_insn_reservation "power4-load-ext" 5
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4,lsu1_power4\
+    |du2_power4+du3_power4,lsu2_power4\
+    |du3_power4+du4_power4,lsu2_power4),\
+   nothing,nothing,\
+   (iu2_power4|iu1_power4)")
+
+(define_insn_reservation "power4-load-ext-update" 5
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   lsu1_power4+iu2_power4,nothing,nothing,iu2_power4")
+
+(define_insn_reservation "power4-load-ext-update-indexed" 5
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   iu1_power4,lsu2_power4+iu1_power4,nothing,nothing,iu2_power4")
+
+(define_insn_reservation "power4-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   iu1_power4,lsu2_power4+iu2_power4")
+
+(define_insn_reservation "power4-load-update" 4 ; 3
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power4"))
+  "lsuq_power4")
+
+(define_insn_reservation "power4-fpload" 6 ; 5
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power4"))
+  "lsq_power4")
+
+(define_insn_reservation "power4-fpload-update" 6 ; 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power4"))
+  "lsuq_power4")
+
+(define_insn_reservation "power4-vecload" 6 ; 5
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power4"))
+  "lsq_power4")
+
+(define_insn_reservation "power4-store" 12
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4,lsu1_power4)\
+    |(du2_power4,lsu2_power4)\
+    |(du3_power4,lsu2_power4)\
+    |(du4_power4,lsu1_power4)),\
+   (iu1_power4|iu2_power4)")
+
+(define_insn_reservation "power4-store-update" 12
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4+du2_power4,lsu1_power4)\
+    |(du2_power4+du3_power4,lsu2_power4)\
+    |(du3_power4+du4_power4,lsu2_power4)\
+    |(du3_power4+du4_power4,lsu2_power4))+\
+   ((nothing,iu2_power4,iu1_power4)\
+    |(nothing,iu2_power4,iu2_power4)\
+    |(nothing,iu1_power4,iu2_power4)\
+    |(nothing,iu1_power4,iu2_power4))")
+
+(define_insn_reservation "power4-store-update-indexed" 12
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power4"))
+   "du1_power4+du2_power4+du3_power4+du4_power4,\
+    iu1_power4,lsu2_power4+iu2_power4,iu2_power4")
+
+(define_insn_reservation "power4-fpstore" 12
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4,lsu1_power4)\
+    |(du2_power4,lsu2_power4)\
+    |(du3_power4,lsu2_power4)\
+    |(du4_power4,lsu1_power4)),\
+   (fpu1_power4|fpu2_power4)")
+
+(define_insn_reservation "power4-fpstore-update" 12
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4+du2_power4,lsu1_power4)\
+    |(du2_power4+du3_power4,lsu2_power4)\
+    |(du3_power4+du4_power4,lsu2_power4))\
+   +(nothing,(iu1_power4|iu2_power4),(fpu1_power4|fpu2_power4))")
+
+(define_insn_reservation "power4-vecstore" 12
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4,lsu1_power4,vec_power4)\
+  |(du2_power4,lsu2_power4,vec_power4)\
+  |(du3_power4,lsu2_power4,vec_power4)\
+  |(du4_power4,lsu1_power4,vec_power4)")
+
+(define_insn_reservation "power4-llsc" 11
+  (and (eq_attr "type" "load_l,store_c,sync")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,lsu1_power4")
+
+
+; Integer latency is 2 cycles
+(define_insn_reservation "power4-integer" 2
+  (and (eq_attr "type" "integer,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "power4"))
+  "iq_power4")
+
+(define_insn_reservation "power4-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power4"))
+  "((du1_power4+du2_power4)\
+    |(du2_power4+du3_power4)\
+    |(du3_power4+du4_power4)\
+    |(du4_power4+du1_power4)),\
+    ((iu1_power4,nothing,iu2_power4)\
+     |(iu2_power4,nothing,iu2_power4)\
+     |(iu2_power4,nothing,iu1_power4)\
+     |(iu1_power4,nothing,iu1_power4))")
+
+(define_insn_reservation "power4-three" 2
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4+du3_power4|du2_power4+du3_power4+du4_power4\
+    |du3_power4+du4_power4+du1_power4|du4_power4+du1_power4+du2_power4),\
+   ((iu1_power4,nothing,iu2_power4,nothing,iu2_power4)\
+    |(iu2_power4,nothing,iu2_power4,nothing,iu1_power4)\
+    |(iu2_power4,nothing,iu1_power4,nothing,iu1_power4)\
+    |(iu1_power4,nothing,iu2_power4,nothing,iu2_power4))")
+
+(define_insn_reservation "power4-insert" 4
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4,nothing,iu2_power4)\
+    |(iu2_power4,nothing,iu2_power4)\
+    |(iu2_power4,nothing,iu1_power4))")
+
+(define_insn_reservation "power4-cmp" 3
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power4"))
+  "iq_power4")
+
+(define_insn_reservation "power4-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4,iu2_power4)\
+    |(iu2_power4,iu2_power4)\
+    |(iu2_power4,iu1_power4))")
+
+(define_bypass 4 "power4-compare" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_insn_reservation "power4-lmul-cmp" 7
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4*6,iu2_power4)\
+    |(iu2_power4*6,iu2_power4)\
+    |(iu2_power4*6,iu1_power4))")
+
+(define_bypass 10 "power4-lmul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_insn_reservation "power4-imul-cmp" 5
+  (and (eq_attr "type" "imul_compare")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4+du2_power4|du2_power4+du3_power4|du3_power4+du4_power4),\
+   ((iu1_power4*4,iu2_power4)\
+    |(iu2_power4*4,iu2_power4)\
+    |(iu2_power4*4,iu1_power4))")
+
+(define_bypass 8 "power4-imul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_insn_reservation "power4-lmul" 7
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (iu1_power4*6|iu2_power4*6)")
+
+(define_insn_reservation "power4-imul" 5
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (iu1_power4*4|iu2_power4*4)")
+
+(define_insn_reservation "power4-imul3" 4
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (iu1_power4*3|iu2_power4*3)")
+
+
+; SPR move only executes in first IU.
+; Integer division only executes in second IU.
+(define_insn_reservation "power4-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4,iu2_power4*35")
+
+(define_insn_reservation "power4-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4,iu2_power4*67")
+
+
+(define_insn_reservation "power4-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,bpu_power4")
+
+
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power4-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power4"))
+  "(du5_power4\
+   |du4_power4+du5_power4\
+   |du3_power4+du4_power4+du5_power4\
+   |du2_power4+du3_power4+du4_power4+du5_power4\
+   |du1_power4+du2_power4+du3_power4+du4_power4+du5_power4),bpu_power4")
+
+
+; Condition Register logical ops are split if non-destructive (RT != RB)
+(define_insn_reservation "power4-crlogical" 2
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,cru_power4")
+
+(define_insn_reservation "power4-delayedcr" 4
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4,cru_power4,cru_power4")
+
+; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu
+(define_insn_reservation "power4-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,\
+   du1_power4+du2_power4+du3_power4+du4_power4+cru_power4,\
+   cru_power4,cru_power4,cru_power4")
+
+; mfcrf (1 field)
+(define_insn_reservation "power4-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,cru_power4")
+
+; mtcrf (1 field)
+(define_insn_reservation "power4-mtcr" 4
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power4"))
+  "du1_power4,iu1_power4")
+
+; Basic FP latency is 6 cycles
+(define_insn_reservation "power4-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power4"))
+  "fpq_power4")
+
+(define_insn_reservation "power4-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power4"))
+  "fpq_power4")
+
+(define_insn_reservation "power4-sdiv" 33
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (fpu1_power4*28|fpu2_power4*28)")
+
+(define_insn_reservation "power4-sqrt" 40
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "power4"))
+  "(du1_power4|du2_power4|du3_power4|du4_power4),\
+   (fpu1_power4*35|fpu2_power4*35)")
+
+(define_insn_reservation "power4-isync" 2
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power4"))
+  "du1_power4+du2_power4+du3_power4+du4_power4,lsu1_power4")
+
+
+; VMX
+(define_insn_reservation "power4-vecsimple" 2
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+(define_insn_reservation "power4-veccomplex" 5
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+; vecfp compare
+(define_insn_reservation "power4-veccmp" 8
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+(define_insn_reservation "power4-vecfloat" 8
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power4"))
+  "vq_power4")
+
+(define_insn_reservation "power4-vecperm" 2
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power4"))
+  "vpq_power4")
+
+(define_bypass 4 "power4-vecload" "power4-vecperm")
+
+(define_bypass 3 "power4-vecsimple" "power4-vecperm")
+(define_bypass 6 "power4-veccomplex" "power4-vecperm")
+(define_bypass 3 "power4-vecperm"
+		 "power4-vecsimple,power4-veccomplex,power4-vecfloat")
+(define_bypass 9 "power4-vecfloat" "power4-vecperm")
+
+(define_bypass 5 "power4-vecsimple,power4-veccomplex"
+		 "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
+
+(define_bypass 4 "power4-vecsimple,power4-vecperm" "power4-vecstore")
+(define_bypass 7 "power4-veccomplex" "power4-vecstore")
+(define_bypass 10 "power4-vecfloat" "power4-vecstore")
diff --git a/gcc/config/rs6000/power5.md b/gcc/config/rs6000/power5.md
new file mode 100644
index 000000000..b6db09312
--- /dev/null
+++ b/gcc/config/rs6000/power5.md
@@ -0,0 +1,308 @@
+;; Scheduling description for IBM POWER5 processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources: IBM Red Book and White Paper on POWER5
+
+;; The POWER5 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip).
+;; Instructions that update more than one register get broken into two
+;; (split) or more internal ops.  The chip can issue up to 5
+;; internal ops per cycle.
+
+(define_automaton "power5iu,power5fpu,power5misc")
+
+(define_cpu_unit "iu1_power5,iu2_power5" "power5iu")
+(define_cpu_unit "lsu1_power5,lsu2_power5" "power5misc")
+(define_cpu_unit "fpu1_power5,fpu2_power5" "power5fpu")
+(define_cpu_unit "bpu_power5,cru_power5" "power5misc")
+(define_cpu_unit "du1_power5,du2_power5,du3_power5,du4_power5,du5_power5"
+		 "power5misc")
+
+(define_reservation "lsq_power5"
+		    "(du1_power5,lsu1_power5)\
+		    |(du2_power5,lsu2_power5)\
+		    |(du3_power5,lsu2_power5)\
+		    |(du4_power5,lsu1_power5)")
+
+(define_reservation "iq_power5"
+		    "(du1_power5|du2_power5|du3_power5|du4_power5),\
+                     (iu1_power5|iu2_power5)")
+
+(define_reservation "fpq_power5"
+		    "(du1_power5|du2_power5|du3_power5|du4_power5),\
+                     (fpu1_power5|fpu2_power5)")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power5" "du2_power5,du3_power5,du4_power5,du5_power5")
+(absence_set "du2_power5" "du3_power5,du4_power5,du5_power5")
+(absence_set "du3_power5" "du4_power5,du5_power5")
+(absence_set "du4_power5" "du5_power5")
+
+
+; Load/store
+(define_insn_reservation "power5-load" 4 ; 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power5"))
+  "lsq_power5")
+
+(define_insn_reservation "power5-load-ext" 5
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5,nothing,nothing,iu2_power5")
+
+(define_insn_reservation "power5-load-ext-update" 5
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   lsu1_power5+iu2_power5,nothing,nothing,iu2_power5")
+
+(define_insn_reservation "power5-load-ext-update-indexed" 5
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   iu1_power5,lsu2_power5+iu1_power5,nothing,nothing,iu2_power5")
+
+(define_insn_reservation "power5-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   iu1_power5,lsu2_power5+iu2_power5")
+
+(define_insn_reservation "power5-load-update" 4 ; 3
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5")
+
+(define_insn_reservation "power5-fpload" 6 ; 5
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power5"))
+  "lsq_power5")
+
+(define_insn_reservation "power5-fpload-update" 6 ; 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5")
+
+(define_insn_reservation "power5-store" 12
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power5"))
+  "((du1_power5,lsu1_power5)\
+    |(du2_power5,lsu2_power5)\
+    |(du3_power5,lsu2_power5)\
+    |(du4_power5,lsu1_power5)),\
+    (iu1_power5|iu2_power5)")
+
+(define_insn_reservation "power5-store-update" 12
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5,iu1_power5")
+
+(define_insn_reservation "power5-store-update-indexed" 12
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power5"))
+   "du1_power5+du2_power5+du3_power5+du4_power5,\
+    iu1_power5,lsu2_power5+iu2_power5,iu2_power5")
+
+(define_insn_reservation "power5-fpstore" 12
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power5"))
+  "((du1_power5,lsu1_power5)\
+    |(du2_power5,lsu2_power5)\
+    |(du3_power5,lsu2_power5)\
+    |(du4_power5,lsu1_power5)),\
+    (fpu1_power5|fpu2_power5)")
+
+(define_insn_reservation "power5-fpstore-update" 12
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,lsu1_power5+iu2_power5,fpu1_power5")
+
+(define_insn_reservation "power5-llsc" 11
+  (and (eq_attr "type" "load_l,store_c,sync")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+  lsu1_power5")
+
+
+; Integer latency is 2 cycles
+(define_insn_reservation "power5-integer" 2
+  (and (eq_attr "type" "integer,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "power5"))
+  "iq_power5")
+
+(define_insn_reservation "power5-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power5"))
+  "((du1_power5+du2_power5)\
+    |(du2_power5+du3_power5)\
+    |(du3_power5+du4_power5)\
+    |(du4_power5+du1_power5)),\
+    ((iu1_power5,nothing,iu2_power5)\
+     |(iu2_power5,nothing,iu2_power5)\
+     |(iu2_power5,nothing,iu1_power5)\
+     |(iu1_power5,nothing,iu1_power5))")
+
+(define_insn_reservation "power5-three" 2
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5+du2_power5+du3_power5|du2_power5+du3_power5+du4_power5\
+    |du3_power5+du4_power5+du1_power5|du4_power5+du1_power5+du2_power5),\
+   ((iu1_power5,nothing,iu2_power5,nothing,iu2_power5)\
+    |(iu2_power5,nothing,iu2_power5,nothing,iu1_power5)\
+    |(iu2_power5,nothing,iu1_power5,nothing,iu1_power5)\
+    |(iu1_power5,nothing,iu2_power5,nothing,iu2_power5))")
+
+(define_insn_reservation "power5-insert" 4
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5,nothing,iu2_power5")
+
+(define_insn_reservation "power5-cmp" 3
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power5"))
+  "iq_power5")
+
+(define_insn_reservation "power5-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5,iu2_power5")
+
+(define_bypass 4 "power5-compare" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf")
+
+(define_insn_reservation "power5-lmul-cmp" 7
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5*6,iu2_power5")
+
+(define_bypass 10 "power5-lmul-cmp" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf")
+
+(define_insn_reservation "power5-imul-cmp" 5
+  (and (eq_attr "type" "imul_compare")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu1_power5*4,iu2_power5")
+
+(define_bypass 8 "power5-imul-cmp" "power5-branch,power5-crlogical,power5-delayedcr,power5-mfcr,power5-mfcrf")
+
+(define_insn_reservation "power5-lmul" 7
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*6|iu2_power5*6)")
+
+(define_insn_reservation "power5-imul" 5
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*4|iu2_power5*4)")
+
+(define_insn_reservation "power5-imul3" 4
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),(iu1_power5*3|iu2_power5*3)")
+
+
+; SPR move only executes in first IU.
+; Integer division only executes in second IU.
+(define_insn_reservation "power5-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu2_power5*35")
+
+(define_insn_reservation "power5-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,iu2_power5*67")
+
+
+(define_insn_reservation "power5-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,bpu_power5")
+
+
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power5-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power5"))
+  "(du5_power5\
+   |du4_power5+du5_power5\
+   |du3_power5+du4_power5+du5_power5\
+   |du2_power5+du3_power5+du4_power5+du5_power5\
+   |du1_power5+du2_power5+du3_power5+du4_power5+du5_power5),bpu_power5")
+
+
+; Condition Register logical ops are split if non-destructive (RT != RB)
+(define_insn_reservation "power5-crlogical" 2
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,cru_power5")
+
+(define_insn_reservation "power5-delayedcr" 4
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5,cru_power5,cru_power5")
+
+; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu
+(define_insn_reservation "power5-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+   du1_power5+du2_power5+du3_power5+du4_power5+cru_power5,\
+   cru_power5,cru_power5,cru_power5")
+
+; mfcrf (1 field)
+(define_insn_reservation "power5-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,cru_power5")
+
+; mtcrf (1 field)
+(define_insn_reservation "power5-mtcr" 4
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power5"))
+  "du1_power5,iu1_power5")
+
+; Basic FP latency is 6 cycles
+(define_insn_reservation "power5-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power5"))
+  "fpq_power5")
+
+(define_insn_reservation "power5-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power5"))
+  "fpq_power5")
+
+(define_insn_reservation "power5-sdiv" 33
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),\
+   (fpu1_power5*28|fpu2_power5*28)")
+
+(define_insn_reservation "power5-sqrt" 40
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "power5"))
+  "(du1_power5|du2_power5|du3_power5|du4_power5),\
+   (fpu1_power5*35|fpu2_power5*35)")
+
+(define_insn_reservation "power5-isync" 2 
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power5"))
+  "du1_power5+du2_power5+du3_power5+du4_power5,\
+  lsu1_power5")
+
diff --git a/gcc/config/rs6000/power6.md b/gcc/config/rs6000/power6.md
new file mode 100644
index 000000000..8d54c8129
--- /dev/null
+++ b/gcc/config/rs6000/power6.md
@@ -0,0 +1,573 @@
+;; Scheduling description for IBM POWER6 processor.
+;;   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+;;   Contributed by Peter Steinmetz (steinmtz@us.ibm.com)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Sources:
+
+;; The POWER6 has 2 iu, 2 fpu, 2 lsu, and 1 bu/cru unit per engine 
+;; (2 engines per chip).  The chip can issue up to 5 internal ops 
+;; per cycle.
+
+(define_automaton "power6iu,power6lsu,power6fpu,power6bu")
+
+(define_cpu_unit "iu1_power6,iu2_power6" "power6iu")
+(define_cpu_unit "lsu1_power6,lsu2_power6" "power6lsu")
+(define_cpu_unit "bpu_power6" "power6bu")
+(define_cpu_unit "fpu1_power6,fpu2_power6" "power6fpu")
+
+(define_reservation "LS2_power6"
+                    "lsu1_power6+lsu2_power6")
+
+(define_reservation "FPU_power6"
+                    "fpu1_power6|fpu2_power6")
+
+(define_reservation "BRU_power6"
+                    "bpu_power6")
+
+(define_reservation "LSU_power6"
+                    "lsu1_power6|lsu2_power6")
+
+(define_reservation "LSF_power6"
+                    "(lsu1_power6+fpu1_power6)\
+                    |(lsu1_power6+fpu2_power6)\
+                    |(lsu2_power6+fpu1_power6)\
+                    |(lsu2_power6+fpu2_power6)")
+
+(define_reservation "LX2_power6"
+                    "(iu1_power6+iu2_power6+lsu1_power6)\
+                    |(iu1_power6+iu2_power6+lsu2_power6)")
+
+(define_reservation "FX2_power6"
+                    "iu1_power6+iu2_power6")
+
+(define_reservation "X2F_power6"
+                    "(iu1_power6+iu2_power6+fpu1_power6)\
+                    |(iu1_power6+iu2_power6+fpu2_power6)")
+
+(define_reservation "BX2_power6"
+                    "iu1_power6+iu2_power6+bpu_power6")
+
+(define_reservation "LSX_power6"
+                    "(iu1_power6+lsu1_power6)\
+                    |(iu1_power6+lsu2_power6)\
+                    |(iu2_power6+lsu1_power6)\
+                    |(iu2_power6+lsu2_power6)")
+
+(define_reservation "FXU_power6"
+                    "iu1_power6|iu2_power6")
+
+(define_reservation "XLF_power6"
+                    "(iu1_power6+lsu1_power6+fpu1_power6)\
+                    |(iu1_power6+lsu1_power6+fpu2_power6)\
+                    |(iu1_power6+lsu2_power6+fpu1_power6)\
+                    |(iu1_power6+lsu2_power6+fpu2_power6)\
+                    |(iu2_power6+lsu1_power6+fpu1_power6)\
+                    |(iu2_power6+lsu1_power6+fpu2_power6)\
+                    |(iu2_power6+lsu2_power6+fpu1_power6)\
+                    |(iu2_power6+lsu2_power6+fpu2_power6)")
+
+(define_reservation "BRX_power6"
+                    "(bpu_power6+iu1_power6)\
+                    |(bpu_power6+iu2_power6)")
+
+; Load/store
+
+; The default for a value written by a fixed point load
+; that is read/written by a subsequent fixed point op.
+(define_insn_reservation "power6-load" 2 ; fx
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point load is used as the source value on
+; a store.
+(define_bypass 1 "power6-load,\
+                  power6-load-update,\
+                  power6-load-update-indexed"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-load-ext" 4 ; fx
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point load ext is used as the source value on
+; a store.
+(define_bypass 1 "power6-load-ext,\
+                  power6-load-ext-update,\
+	          power6-load-ext-update-indexed"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-load-update" 2 ; fx
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-load-update-indexed" 2 ; fx
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-load-ext-update" 4 ; fx
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-load-ext-update-indexed" 4 ; fx
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-fpload" 1
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-fpload-update" 1
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-store" 14
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-store-update" 14
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-store-update-indexed" 14
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power6"))
+  "LX2_power6")
+
+(define_insn_reservation "power6-fpstore" 14
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power6"))
+  "LSF_power6")
+
+(define_insn_reservation "power6-fpstore-update" 14
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power6"))
+  "XLF_power6")
+
+(define_insn_reservation "power6-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power6"))
+  "LS2_power6")
+
+(define_insn_reservation "power6-stcx" 10 ; best case
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power6"))
+  "LSX_power6")
+
+(define_insn_reservation "power6-sync" 11 ; N/A
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-integer" 1
+  (and (eq_attr "type" "integer")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-isel" 1
+  (and (eq_attr "type" "isel")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-exts" 1
+  (and (eq_attr "type" "exts")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-shift" 1
+  (and (eq_attr "type" "shift")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-insert" 1
+  (and (eq_attr "type" "insert_word")
+       (eq_attr "cpu" "power6"))
+  "FX2_power6")
+
+(define_insn_reservation "power6-insert-dword" 1
+  (and (eq_attr "type" "insert_dword")
+       (eq_attr "cpu" "power6"))
+  "FX2_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point op is used as the source value on a
+; store.
+(define_bypass 1 "power6-integer,\
+                  power6-exts,\
+                  power6-shift,\
+                  power6-insert,\
+                  power6-insert-dword"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-cntlz" 2
+  (and (eq_attr "type" "cntlz")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_bypass 1 "power6-cntlz"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-var-rotate" 4
+  (and (eq_attr "type" "var_shift_rotate")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-trap" 1 ; N/A
+  (and (eq_attr "type" "trap")
+       (eq_attr "cpu" "power6"))
+  "BRX_power6")
+
+(define_insn_reservation "power6-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6,iu1_power6)\
+  |(iu1_power6+iu2_power6,nothing)\
+  |(iu1_power6,iu2_power6)\
+  |(iu2_power6,iu1_power6)\
+  |(iu2_power6,iu2_power6)")
+
+(define_insn_reservation "power6-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6,iu1_power6,iu1_power6)\
+  |(iu1_power6,iu1_power6,iu2_power6)\
+  |(iu1_power6,iu2_power6,iu1_power6)\
+  |(iu1_power6,iu2_power6,iu2_power6)\
+  |(iu2_power6,iu1_power6,iu1_power6)\
+  |(iu2_power6,iu1_power6,iu2_power6)\
+  |(iu2_power6,iu2_power6,iu1_power6)\
+  |(iu2_power6,iu2_power6,iu2_power6)\
+  |(iu1_power6+iu2_power6,iu1_power6)\
+  |(iu1_power6+iu2_power6,iu2_power6)\
+  |(iu1_power6,iu1_power6+iu2_power6)\
+  |(iu2_power6,iu1_power6+iu2_power6)")
+
+(define_insn_reservation "power6-cmp" 1
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-compare" 1
+  (and (eq_attr "type" "compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-fast-compare" 1
+  (and (eq_attr "type" "fast_compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+; define the bypass for the case where the value written
+; by a fixed point rec form op is used as the source value
+; on a store.
+(define_bypass 1 "power6-compare,\
+                  power6-fast-compare"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-delayed-compare" 2 ; N/A
+  (and (eq_attr "type" "delayed_compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-var-delayed-compare" 4
+  (and (eq_attr "type" "var_delayed_compare")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-lmul-cmp" 16
+  (and (eq_attr "type" "lmul_compare")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-imul-cmp" 16
+  (and (eq_attr "type" "imul_compare")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-lmul" 16
+  (and (eq_attr "type" "lmul")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-imul" 16
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_insn_reservation "power6-imul3" 16
+  (and (eq_attr "type" "imul2,imul3")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*16+iu2_power6*16+fpu1_power6*16)\
+  |(iu1_power6*16+iu2_power6*16+fpu2_power6*16)");
+
+(define_bypass 9 "power6-imul,\
+                  power6-lmul,\
+                  power6-imul-cmp,\
+                  power6-lmul-cmp,\
+                  power6-imul3"
+                 "power6-store,\
+                  power6-store-update,\
+                  power6-store-update-indexed,\
+                  power6-fpstore,\
+                  power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-idiv" 44
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*44+iu2_power6*44+fpu1_power6*44)\
+  |(iu1_power6*44+iu2_power6*44+fpu2_power6*44)");
+
+; The latency for this bypass is yet to be defined
+;(define_bypass ? "power6-idiv"
+;                 "power6-store,\
+;                  power6-store-update,\
+;                  power6-store-update-indexed,\
+;                  power6-fpstore,\
+;                  power6-fpstore-update"
+;  "store_data_bypass_p")
+
+(define_insn_reservation "power6-ldiv" 56
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power6"))
+  "(iu1_power6*56+iu2_power6*56+fpu1_power6*56)\
+  |(iu1_power6*56+iu2_power6*56+fpu2_power6*56)");
+
+; The latency for this bypass is yet to be defined
+;(define_bypass ? "power6-ldiv"
+;                 "power6-store,\
+;                  power6-store-update,\
+;                  power6-store-update-indexed,\
+;                  power6-fpstore,\
+;                  power6-fpstore-update"
+;  "store_data_bypass_p")
+
+(define_insn_reservation "power6-mtjmpr" 2
+  (and (eq_attr "type" "mtjmpr,mfjmpr")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6")
+
+(define_bypass 5 "power6-mtjmpr" "power6-branch")
+
+(define_insn_reservation "power6-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power6"))
+  "BRU_power6")
+
+(define_bypass 5 "power6-branch" "power6-mtjmpr")
+
+(define_insn_reservation "power6-crlogical" 3
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power6"))
+  "BRU_power6")
+
+(define_bypass 3 "power6-crlogical" "power6-branch")
+
+(define_insn_reservation "power6-delayedcr" 3
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power6"))
+  "BRU_power6")
+
+(define_insn_reservation "power6-mfcr" 6 ; N/A
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6")
+
+; mfcrf (1 field)
+(define_insn_reservation "power6-mfcrf" 3 ; N/A
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6") ;
+
+; mtcrf (1 field)
+(define_insn_reservation "power6-mtcr" 4 ; N/A
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power6"))
+  "BX2_power6")
+
+(define_bypass 9 "power6-mtcr" "power6-branch")
+
+(define_insn_reservation "power6-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+; Any fp instruction that updates a CR has a latency
+; of 6 to a dependent branch
+(define_bypass 6 "power6-fp" "power6-branch")
+
+(define_bypass 1 "power6-fp"
+                 "power6-fpstore,power6-fpstore-update"
+  "store_data_bypass_p")
+
+(define_insn_reservation "power6-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 12 "power6-fpcompare"
+                  "power6-branch,power6-crlogical")
+
+(define_insn_reservation "power6-sdiv" 26
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-ddiv" 32
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-sqrt" 30
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-dsqrt" 42
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_insn_reservation "power6-isync" 2 ; N/A 
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power6"))
+  "FXU_power6")
+
+(define_insn_reservation "power6-vecload" 1
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power6"))
+  "LSU_power6")
+
+(define_insn_reservation "power6-vecstore" 1
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power6"))
+  "LSF_power6")
+
+(define_insn_reservation "power6-vecsimple" 3
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 6 "power6-vecsimple" "power6-veccomplex,\
+                                     power6-vecperm")
+
+(define_bypass 5 "power6-vecsimple" "power6-vecfloat")
+
+(define_bypass 4 "power6-vecsimple" "power6-vecstore" )
+
+(define_insn_reservation "power6-veccmp" 1
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 10 "power6-veccmp" "power6-branch")
+
+(define_insn_reservation "power6-vecfloat" 7
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 10 "power6-vecfloat" "power6-vecsimple")
+
+(define_bypass 11 "power6-vecfloat" "power6-veccomplex,\
+                                     power6-vecperm")
+
+(define_bypass 9 "power6-vecfloat" "power6-vecstore" )
+
+(define_insn_reservation "power6-veccomplex" 7
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 10 "power6-veccomplex" "power6-vecsimple,\
+                                       power6-vecfloat" )
+
+(define_bypass 9 "power6-veccomplex" "power6-vecperm" )
+
+(define_bypass 8 "power6-veccomplex" "power6-vecstore" )
+
+(define_insn_reservation "power6-vecperm" 4
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power6"))
+  "FPU_power6")
+
+(define_bypass 7 "power6-vecperm" "power6-vecsimple,\
+                                   power6-vecfloat" )
+
+(define_bypass 6 "power6-vecperm" "power6-veccomplex" )
+
+(define_bypass 5 "power6-vecperm" "power6-vecstore" )
+
+(define_insn_reservation "power6-mftgpr" 8
+  (and (eq_attr "type" "mftgpr")
+       (eq_attr "cpu" "power6"))
+  "X2F_power6")
+
+(define_insn_reservation "power6-mffgpr" 14
+  (and (eq_attr "type" "mffgpr")
+       (eq_attr "cpu" "power6"))
+  "LX2_power6")
+
+(define_bypass 4 "power6-mftgpr" "power6-imul,\
+                                  power6-lmul,\
+                                  power6-imul-cmp,\
+                                  power6-lmul-cmp,\
+                                  power6-imul3,\
+                                  power6-idiv,\
+                                  power6-ldiv" )
diff --git a/gcc/config/rs6000/power7.md b/gcc/config/rs6000/power7.md
new file mode 100644
index 000000000..148a7a52a
--- /dev/null
+++ b/gcc/config/rs6000/power7.md
@@ -0,0 +1,318 @@
+;; Scheduling description for IBM POWER7 processor.
+;; Copyright (C) 2009 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power7iu,power7lsu,power7vsu,power7misc")
+
+(define_cpu_unit "iu1_power7,iu2_power7" "power7iu")
+(define_cpu_unit "lsu1_power7,lsu2_power7" "power7lsu")
+(define_cpu_unit "vsu1_power7,vsu2_power7" "power7vsu")
+(define_cpu_unit "bpu_power7,cru_power7" "power7misc")
+(define_cpu_unit "du1_power7,du2_power7,du3_power7,du4_power7,du5_power7"
+                 "power7misc")
+
+
+(define_reservation "DU_power7"
+		    "du1_power7|du2_power7|du3_power7|du4_power7")
+
+(define_reservation "DU2F_power7"
+		    "du1_power7+du2_power7")
+
+(define_reservation "DU4_power7"
+		    "du1_power7+du2_power7+du3_power7+du4_power7")
+
+(define_reservation "FXU_power7"
+                    "iu1_power7|iu2_power7")
+
+(define_reservation "VSU_power7"
+                    "vsu1_power7|vsu2_power7")
+
+(define_reservation "LSU_power7"
+                    "lsu1_power7|lsu2_power7")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du1_power7" "du2_power7,du3_power7,du4_power7,du5_power7")
+(absence_set "du2_power7" "du3_power7,du4_power7,du5_power7")
+(absence_set "du3_power7" "du4_power7,du5_power7")
+(absence_set "du4_power7" "du5_power7")
+
+
+; LS Unit
+(define_insn_reservation "power7-load" 2
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-update" 2
+  (and (eq_attr "type" "load_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-update-indexed" 3
+  (and (eq_attr "type" "load_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update" 4
+  (and (eq_attr "type" "load_ext_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-load-ext-update-indexed" 4
+  (and (eq_attr "type" "load_ext_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpload" 3
+  (and (eq_attr "type" "fpload")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-fpload-update" 3
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store" 6 ; store-forwarding latency
+  (and (eq_attr "type" "store")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-store-update" 6
+  (and (eq_attr "type" "store_u")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-store-update-indexed" 6
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7+FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-fpstore" 6
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-fpstore-update" 6
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7+FXU_power7")
+
+(define_insn_reservation "power7-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecload" 3
+  (and (eq_attr "type" "vecload")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7")
+
+(define_insn_reservation "power7-vecstore" 6
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,LSU_power7+VSU_power7")
+
+(define_insn_reservation "power7-sync" 11
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,LSU_power7")
+
+
+; FX Unit
+(define_insn_reservation "power7-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts,isel")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cntlz" 2
+  (and (eq_attr "type" "cntlz")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power7"))
+  "DU_power7+DU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power7"))
+  "DU_power7+DU_power7+DU_power7,FXU_power7,FXU_power7,FXU_power7")
+
+(define_insn_reservation "power7-cmp" 1
+  (and (eq_attr "type" "cmp,fast_compare")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,FXU_power7,FXU_power7")
+
+(define_bypass 3 "power7-cmp,power7-compare" "power7-crlogical,power7-delayedcr")
+
+(define_insn_reservation "power7-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,FXU_power7")
+
+(define_insn_reservation "power7-mul-compare" 5
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,FXU_power7,nothing*3,FXU_power7")
+
+(define_insn_reservation "power7-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,iu1_power7*36|iu2_power7*36")
+
+(define_insn_reservation "power7-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power7"))
+  "DU2F_power7,iu1_power7*68|iu2_power7*68")
+
+(define_insn_reservation "power7-isync" 1 ;
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,FXU_power7")
+
+
+; CR Unit
+(define_insn_reservation "power7-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,FXU_power7")
+
+(define_insn_reservation "power7-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7+FXU_power7")
+
+(define_insn_reservation "power7-crlogical" 3
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-delayedcr" 3
+  (and (eq_attr "type" "delayed_cr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcr" 6
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,cru_power7")
+
+(define_insn_reservation "power7-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power7"))
+  "DU4_power7,cru_power7+FXU_power7")
+
+
+; BR Unit
+; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
+; grabbing previous dispatch slots once this is assigned.
+(define_insn_reservation "power7-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power7"))
+  "(du5_power7\
+   |du4_power7+du5_power7\
+   |du3_power7+du4_power7+du5_power7\
+   |du2_power7+du3_power7+du4_power7+du5_power7\
+   |du1_power7+du2_power7+du3_power7+du4_power7+du5_power7),bpu_power7")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP)
+(define_insn_reservation "power7-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_bypass 8 "power7-fp" "power7-branch")
+
+(define_insn_reservation "power7-fpcompare" 4
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sdiv" 26
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-ddiv" 32
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-sqrt" 31
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-dsqrt" 43
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power7"))
+  "DU_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecsimple" 2
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-veccmp" 7
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecfloat" 7
+  (and (eq_attr "type" "vecfloat")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_bypass 6 "power7-vecfloat" "power7-vecfloat")
+
+(define_insn_reservation "power7-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power7"))
+  "du1_power7,VSU_power7")
+
+(define_insn_reservation "power7-vecperm" 3
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "cpu" "power7"))
+  "du2_power7,VSU_power7")
diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
new file mode 100644
index 000000000..c963eb98a
--- /dev/null
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -0,0 +1,358 @@
+/* PowerPC asm definitions for GNU C.
+
+Copyright (C) 2002, 2003, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Under winnt, 1) gas supports the following as names and 2) in particular
+   defining "toc" breaks the FUNC_START macro as ".toc" becomes ".2" */
+
+#define r0	0
+#define sp	1
+#define toc	2
+#define r3	3
+#define r4	4
+#define r5	5
+#define r6	6
+#define r7	7
+#define r8	8
+#define r9	9
+#define r10	10
+#define r11	11
+#define r12	12
+#define r13	13
+#define r14	14
+#define r15	15
+#define r16	16
+#define r17	17
+#define r18	18
+#define r19     19
+#define r20	20
+#define r21	21
+#define r22	22
+#define r23	23
+#define r24	24
+#define r25	25
+#define r26	26
+#define r27	27
+#define r28	28
+#define r29	29
+#define r30	30
+#define r31	31
+
+#define cr0	0
+#define cr1	1
+#define cr2	2
+#define cr3	3
+#define cr4	4
+#define cr5	5
+#define cr6	6
+#define cr7	7
+
+#define f0	0
+#define f1	1
+#define f2	2
+#define f3	3
+#define f4	4
+#define f5	5
+#define f6	6
+#define f7	7
+#define f8	8
+#define f9	9
+#define f10	10
+#define f11	11
+#define f12	12
+#define f13	13
+#define f14	14
+#define f15	15
+#define f16	16
+#define f17	17
+#define f18	18
+#define f19	19
+#define f20	20
+#define f21	21
+#define f22	22
+#define f23	23
+#define f24	24
+#define f25	25
+#define f26	26
+#define f27	27
+#define f28	28
+#define f29	29
+#define f30	30
+#define f31	31
+
+#ifdef __VSX__
+#define f32	32
+#define f33	33
+#define f34	34
+#define f35	35
+#define f36	36
+#define f37	37
+#define f38	38
+#define f39	39
+#define f40	40
+#define f41	41
+#define f42	42
+#define f43	43
+#define f44	44
+#define f45	45
+#define f46	46
+#define f47	47
+#define f48	48
+#define f49	49
+#define f50	30
+#define f51	51
+#define f52	52
+#define f53	53
+#define f54	54
+#define f55	55
+#define f56	56
+#define f57	57
+#define f58	58
+#define f59	59
+#define f60	60
+#define f61	61
+#define f62	62
+#define f63	63
+#endif
+
+#ifdef __ALTIVEC__
+#define v0	0
+#define v1	1
+#define v2	2
+#define v3	3
+#define v4	4
+#define v5	5
+#define v6	6
+#define v7	7
+#define v8	8
+#define v9	9
+#define v10	10
+#define v11	11
+#define v12	12
+#define v13	13
+#define v14	14
+#define v15	15
+#define v16	16
+#define v17	17
+#define v18	18
+#define v19	19
+#define v20	20
+#define v21	21
+#define v22	22
+#define v23	23
+#define v24	24
+#define v25	25
+#define v26	26
+#define v27	27
+#define v28	28
+#define v29	29
+#define v30	30
+#define v31	31
+#endif
+
+#ifdef __VSX__
+#define vs0	0
+#define vs1	1
+#define vs2	2
+#define vs3	3
+#define vs4	4
+#define vs5	5
+#define vs6	6
+#define vs7	7
+#define vs8	8
+#define vs9	9
+#define vs10	10
+#define vs11	11
+#define vs12	12
+#define vs13	13
+#define vs14	14
+#define vs15	15
+#define vs16	16
+#define vs17	17
+#define vs18	18
+#define vs19	19
+#define vs20	20
+#define vs21	21
+#define vs22	22
+#define vs23	23
+#define vs24	24
+#define vs25	25
+#define vs26	26
+#define vs27	27
+#define vs28	28
+#define vs29	29
+#define vs30	30
+#define vs31	31
+#define vs32	32
+#define vs33	33
+#define vs34	34
+#define vs35	35
+#define vs36	36
+#define vs37	37
+#define vs38	38
+#define vs39	39
+#define vs40	40
+#define vs41	41
+#define vs42	42
+#define vs43	43
+#define vs44	44
+#define vs45	45
+#define vs46	46
+#define vs47	47
+#define vs48	48
+#define vs49	49
+#define vs50	30
+#define vs51	51
+#define vs52	52
+#define vs53	53
+#define vs54	54
+#define vs55	55
+#define vs56	56
+#define vs57	57
+#define vs58	58
+#define vs59	59
+#define vs60	60
+#define vs61	61
+#define vs62	62
+#define vs63	63
+#endif
+
+/*
+ * Macros to glue together two tokens.
+ */
+
+#ifdef __STDC__
+#define XGLUE(a,b) a##b
+#else
+#define XGLUE(a,b) a/**/b
+#endif
+
+#define GLUE(a,b) XGLUE(a,b)
+
+/*
+ * Macros to begin and end a function written in assembler.  If -mcall-aixdesc
+ * or -mcall-nt, create a function descriptor with the given name, and create
+ * the real function with one or two leading periods respectively.
+ */
+
+#if defined (__powerpc64__)
+#define FUNC_NAME(name) GLUE(.,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.section ".opd","aw"; \
+name: \
+	.quad GLUE(.,name); \
+	.quad .TOC.@tocbase; \
+	.quad 0; \
+	.previous; \
+	.type GLUE(.,name),@function; \
+	.globl name; \
+	.globl GLUE(.,name); \
+GLUE(.,name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden name;	\
+  .hidden GLUE(.,name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size GLUE(.,name),GLUE(.L,name)-GLUE(.,name)
+
+#elif defined(_CALL_AIXDESC)
+
+#ifdef _RELOCATABLE
+#define DESC_SECTION ".got2"
+#else
+#define DESC_SECTION ".got1"
+#endif
+
+#define FUNC_NAME(name) GLUE(.,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.section DESC_SECTION,"aw"; \
+name: \
+	.long GLUE(.,name); \
+	.long _GLOBAL_OFFSET_TABLE_; \
+	.long 0; \
+	.previous; \
+	.type GLUE(.,name),@function; \
+	.globl name; \
+	.globl GLUE(.,name); \
+GLUE(.,name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden name; \
+  .hidden GLUE(.,name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size GLUE(.,name),GLUE(.L,name)-GLUE(.,name)
+
+#else
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#if defined __PIC__ || defined __pic__
+#define JUMP_TARGET(name) FUNC_NAME(name@plt)
+#else
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#endif
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name):
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+GLUE(.L,name): \
+	.size FUNC_NAME(name),GLUE(.L,name)-FUNC_NAME(name)
+#endif
+
+#ifdef IN_GCC
+/* For HAVE_GAS_CFI_DIRECTIVE.  */
+#include "auto-host.h"
+
+#ifdef HAVE_GAS_CFI_DIRECTIVE
+# define CFI_STARTPROC			.cfi_startproc
+# define CFI_ENDPROC			.cfi_endproc
+# define CFI_OFFSET(reg, off)		.cfi_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg)	.cfi_def_cfa_register reg
+# define CFI_RESTORE(reg)		.cfi_restore reg
+#else
+# define CFI_STARTPROC
+# define CFI_ENDPROC
+# define CFI_OFFSET(reg, off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_RESTORE(reg)
+#endif
+#endif
+
+#if defined __linux__ && !defined __powerpc64__
+	.section .note.GNU-stack
+	.previous
+#endif
diff --git a/gcc/config/rs6000/ppc64-fp.c b/gcc/config/rs6000/ppc64-fp.c
new file mode 100644
index 000000000..62861ee16
--- /dev/null
+++ b/gcc/config/rs6000/ppc64-fp.c
@@ -0,0 +1,239 @@
+/* Functions needed for soft-float on powerpc64-linux, copied from
+   libgcc2.c with macros expanded to force the use of specific types.
+ 
+   Copyright (C) 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2006, 2009  Free Software Foundation,
+   Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#if defined(__powerpc64__) || defined (__64BIT__) || defined(__ppc64__)
+#define TMODES
+#include "config/fp-bit.h"
+
+extern DItype __fixtfdi (TFtype);
+extern DItype __fixdfdi (DFtype);
+extern DItype __fixsfdi (SFtype);
+extern USItype __fixunsdfsi (DFtype);
+extern USItype __fixunssfsi (SFtype);
+extern TFtype __floatditf (DItype);
+extern TFtype __floatunditf (UDItype);
+extern DFtype __floatdidf (DItype);
+extern DFtype __floatundidf (UDItype);
+extern SFtype __floatdisf (DItype);
+extern SFtype __floatundisf (UDItype);
+extern DItype __fixunstfdi (TFtype);
+
+static DItype local_fixunssfdi (SFtype);
+static DItype local_fixunsdfdi (DFtype);
+
+DItype
+__fixtfdi (TFtype a)
+{
+  if (a < 0)
+    return - __fixunstfdi (-a);
+  return __fixunstfdi (a);
+}
+
+DItype
+__fixdfdi (DFtype a)
+{
+  if (a < 0)
+    return - local_fixunsdfdi (-a);
+  return local_fixunsdfdi (a);
+}
+
+DItype
+__fixsfdi (SFtype a)
+{
+  if (a < 0)
+    return - local_fixunssfdi (-a);
+  return local_fixunssfdi (a);
+}
+
+USItype
+__fixunsdfsi (DFtype a)
+{
+  if (a >= - (DFtype) (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+    return (SItype) (a + (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+                       - (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1);
+  return (SItype) a;
+}
+
+USItype
+__fixunssfsi (SFtype a)
+{
+  if (a >= - (SFtype) (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+    return (SItype) (a + (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+                       - (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1);
+  return (SItype) a;
+}
+
+TFtype
+__floatditf (DItype u)
+{
+  DFtype dh, dl;
+
+  dh = (SItype) (u >> (sizeof (SItype) * 8));
+  dh *= 2.0 * (((UDItype) 1) << ((sizeof (SItype) * 8) - 1));
+  dl = (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return (TFtype) dh + (TFtype) dl;
+}
+
+TFtype
+__floatunditf (UDItype u)
+{
+  DFtype dh, dl;
+
+  dh = (USItype) (u >> (sizeof (SItype) * 8));
+  dh *= 2.0 * (((UDItype) 1) << ((sizeof (SItype) * 8) - 1));
+  dl = (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return (TFtype) dh + (TFtype) dl;
+}
+
+DFtype
+__floatdidf (DItype u)
+{
+  DFtype d;
+
+  d = (SItype) (u >> (sizeof (SItype) * 8));
+  d *= 2.0 * (((UDItype) 1) << ((sizeof (SItype) * 8) - 1));
+  d += (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return d;
+}
+
+DFtype
+__floatundidf (UDItype u)
+{
+  DFtype d;
+
+  d = (USItype) (u >> (sizeof (SItype) * 8));
+  d *= 2.0 * (((UDItype) 1) << ((sizeof (SItype) * 8) - 1));
+  d += (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return d;
+}
+
+SFtype
+__floatdisf (DItype u)
+{
+  DFtype f;
+
+  if (53 < (sizeof (DItype) * 8)
+      && 53 > ((sizeof (DItype) * 8) - 53 + 24))
+    {
+      if (! (- ((DItype) 1 << 53) < u
+             && u < ((DItype) 1 << 53)))
+        {
+          if ((UDItype) u & (((UDItype) 1 << ((sizeof (DItype) * 8) - 53)) - 1))
+            {
+              u &= ~ (((UDItype) 1 << ((sizeof (DItype) * 8) - 53)) - 1);
+              u |= ((UDItype) 1 << ((sizeof (DItype) * 8) - 53));
+            }
+        }
+    }
+  f = (SItype) (u >> (sizeof (SItype) * 8));
+  f *= 2.0 * (((UDItype) 1) << ((sizeof (SItype) * 8) - 1));
+  f += (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return (SFtype) f;
+}
+
+SFtype
+__floatundisf (UDItype u)
+{
+  DFtype f;
+
+  if (53 < (sizeof (DItype) * 8)
+      && 53 > ((sizeof (DItype) * 8) - 53 + 24))
+    {
+      if (u >= ((UDItype) 1 << 53))
+        {
+          if ((UDItype) u & (((UDItype) 1 << ((sizeof (DItype) * 8) - 53)) - 1))
+            {
+              u &= ~ (((UDItype) 1 << ((sizeof (DItype) * 8) - 53)) - 1);
+              u |= ((UDItype) 1 << ((sizeof (DItype) * 8) - 53));
+            }
+        }
+    }
+  f = (USItype) (u >> (sizeof (SItype) * 8));
+  f *= 2.0 * (((UDItype) 1) << ((sizeof (SItype) * 8) - 1));
+  f += (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return (SFtype) f;
+}
+
+DItype
+__fixunstfdi (TFtype a)
+{
+  if (a < 0)
+    return 0;
+
+  /* Compute high word of result, as a flonum.  */
+  const TFtype b = (a / (((UDItype) 1) << (sizeof (SItype) * 8)));
+  /* Convert that to fixed (but not to DItype!),
+     and shift it into the high word.  */
+  UDItype v = (USItype) b;
+  v <<= (sizeof (SItype) * 8);
+  /* Remove high part from the TFtype, leaving the low part as flonum.  */
+  a -= (TFtype) v;
+  /* Convert that to fixed (but not to DItype!) and add it in.
+     Sometimes A comes out negative.  This is significant, since
+     A has more bits than a long int does.  */
+  if (a < 0)
+    v -= (USItype) (-a);
+  else
+    v += (USItype) a;
+  return v;
+}
+
+/* This version is needed to prevent recursion; fixunsdfdi in libgcc
+   calls fixdfdi, which in turn calls calls fixunsdfdi.  */
+
+static DItype
+local_fixunsdfdi (DFtype a)
+{
+  USItype hi, lo;
+
+  hi = a / (((UDItype) 1) << (sizeof (SItype) * 8));
+  lo = (a - ((DFtype) hi) * (((UDItype) 1) << (sizeof (SItype) * 8)));
+  return ((UDItype) hi << (sizeof (SItype) * 8)) | lo;
+}
+
+/* This version is needed to prevent recursion; fixunssfdi in libgcc
+   calls fixsfdi, which in turn calls calls fixunssfdi.  */
+
+static DItype
+local_fixunssfdi (SFtype original_a)
+{
+  DFtype a = original_a;
+  USItype hi, lo;
+
+  hi = a / (((UDItype) 1) << (sizeof (SItype) * 8));
+  lo = (a - ((DFtype) hi) * (((UDItype) 1) << (sizeof (SItype) * 8)));
+  return ((UDItype) hi << (sizeof (SItype) * 8)) | lo;
+}
+
+#endif /* __powerpc64__ */
diff --git a/gcc/config/rs6000/ppu_intrinsics.h b/gcc/config/rs6000/ppu_intrinsics.h
new file mode 100644
index 000000000..0950f33aa
--- /dev/null
+++ b/gcc/config/rs6000/ppu_intrinsics.h
@@ -0,0 +1,727 @@
+/* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA.
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*  TODO:
+    misc ops (traps)
+    supervisor/hypervisor mode ops.  */
+
+#ifndef  _PPU_INTRINSICS_H
+#define _PPU_INTRINSICS_H
+
+#if !defined(__PPU__) && !defined(__ppc__) && !defined(__ppc64__) \
+    && !defined(__GNUC__)
+  #error ppu_intrinsics.h included on wrong platform/compiler
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif 
+
+/*
+ * unsigned int __cntlzw(unsigned int)
+ * unsigned int __cntlzd(unsigned long long)
+ * int __mulhw(int, int)
+ * unsigned int __mulhwu(unsigned int, unsigned int)
+ * long long __mulhd(long long, long long)
+ * unsigned long long __mulhdu(unsigned long long, unsigned long long)
+ *
+ * void __sync(void)
+ * void __isync(void)
+ * void __lwsync(void)
+ * void __eieio(void)
+ *
+ * void __nop(void)
+ * void __cctpl(void)
+ * void __cctpm(void)
+ * void __cctph(void)
+ * void __db8cyc(void)
+ * void __db10cyc(void)
+ * void __db12cyc(void)
+ * void __db16cyc(void)
+ *
+ * void __mtspr(unsigned int spr, unsigned long long value)
+ * unsigned long long __mfspr(unsigned int spr)
+ * unsigned long long __mftb(void)
+ *
+ * void __icbi(void *base)
+ * void __dcbi(void *base)
+ *
+ * void __dcbf(void *base)
+ * void __dcbz(void *base)
+ * void __dcbst(void *base)
+ * void __dcbtst(void *base)
+ * void __dcbt(void *base)
+ * void __dcbt_TH1000(void *EATRUNC, bool D, bool UG, int ID)
+ * void __dcbt_TH1010(bool GO, int S, int UNITCNT, bool T, bool U, int ID)
+ *
+ * unsigned __lwarx(void *base)
+ * unsigned long long __ldarx(void *base)
+ * bool __stwcx(void *base, unsigned value)
+ * bool __stdcx(void *base, unsigned long long value)
+ *
+ * unsigned short __lhbrx(void *base)
+ * unsigned int __lwbrx(void *base)
+ * unsigned long long __ldbrx(void *base)
+ * void __sthbrx(void *base, unsigned short value)
+ * void __stwbrx(void *base, unsigned int value)
+ * void __stdbrx(void *base, unsigned long long value)
+ *
+ * double __fabs(double x)
+ * float __fabsf(float x)
+ * double __fnabs(double x)
+ * float __fnabsf(float x)
+ * double __fmadd(double x, double y, double z)
+ * double __fmsub(double x, double y, double z)
+ * double __fnmadd(double x, double y, double z)
+ * double __fnmsub(double x, double y, double z)
+ * float __fmadds(float x, float y, float z)
+ * float __fmsubs(float x, float y, float z)
+ * float __fnmadds(float x, float y, float z)
+ * float __fnmsubs(float x, float y, float z)
+ * double __fsel(double x, double y, double z)
+ * float __fsels(float x, float y, float z)
+ * double __frsqrte(double x)
+ * float __fres(float x)
+ * double __fsqrt(double x)
+ * float __fsqrts(float x)
+ * long long __fctid(double x)
+ * long long __fctiw(double x)
+ * double __fcfid(long long x)
+ * double __mffs(void)
+ * void __mtfsf(int mask, double value)
+ * void __mtfsfi(int bits, int field)
+ * void __mtfsb0(int)
+ * void __mtfsb1(int)
+ * double __setflm(double)
+ *
+ * dcbt intrinsics 
+ * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID)
+ * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID)
+ * void __protected_stream_stop_all (void)
+ * void __protected_stream_stop (unsigned int ID)
+ * void __protected_stream_count (unsigned int unit_cnt, unsigned int ID)
+ * void __protected_stream_go (void)
+ */
+
+typedef int __V4SI __attribute__((vector_size(16)));
+
+#define __cntlzw(v) __builtin_clz(v)
+#define __cntlzd(v) __builtin_clzll(v)
+
+#define __mulhw(a,b) __extension__ \
+  ({int result;			   \
+  __asm__ ("mulhw %0,%1,%2"	   \
+	   : "=r" (result)	   \
+	   : "r" ((int) (a)),	   \
+	     "r" ((int) (b)));	   \
+  result; })
+
+#define __mulhwu(a,b) __extension__	\
+  ({unsigned int result;		\
+  __asm__ ("mulhwu %0,%1,%2"		\
+	   : "=r" (result)		\
+	   : "r" ((unsigned int) (a)),	\
+	     "r" ((unsigned int) (b))); \
+  result; })
+
+#ifdef __powerpc64__
+#define __mulhd(a,b) __extension__   \
+  ({ long long result;		     \
+  __asm__ ("mulhd %0,%1,%2"	     \
+	   : "=r" (result)	     \
+	   : "r" ((long long) (a)),  \
+	     "r" ((long long) (b))); \
+  result; })
+
+#define __mulhdu(a,b) __extension__	      \
+  ({unsigned long long result;		      \
+  __asm__ ("mulhdu %0,%1,%2"		      \
+	   : "=r" (result)		      \
+	   : "r" ((unsigned long long) (a)),  \
+	     "r" ((unsigned long long) (b))); \
+  result; })
+#endif /* __powerpc64__ */
+
+#define __sync() __asm__ volatile ("sync" : : : "memory")
+#define __isync() __asm__ volatile ("isync" : : : "memory")
+#define __lwsync() __asm__ volatile ("lwsync" : : : "memory")
+#define __eieio() __asm__ volatile ("eieio" : : : "memory")
+
+#define __nop() __asm__ volatile ("ori 0,0,0" : : : "memory")
+#define __cctpl() __asm__ volatile ("or 1,1,1" : : : "memory")
+#define __cctpm() __asm__ volatile ("or 2,2,2" : : : "memory")
+#define __cctph() __asm__ volatile ("or 3,3,3" : : : "memory")
+#define __db8cyc() __asm__ volatile ("or 28,28,28" : : : "memory")
+#define __db10cyc() __asm__ volatile ("or 29,29,29" : : : "memory")
+#define __db12cyc() __asm__ volatile ("or 30,30,30" : : : "memory")
+#define __db16cyc() __asm__ volatile ("or 31,31,31" : : : "memory")
+
+#ifdef __powerpc64__
+#define __mtspr(spr, value) \
+  __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value))
+  
+#define __mfspr(spr) __extension__				\
+  ({ unsigned long long result;					\
+  __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \
+  result; })
+#endif /* __powerpc64__ */
+
+#ifdef __powerpc64__
+/* Work around the hardware bug in the current Cell implementation.  */
+#define __mftb() __extension__					\
+  ({ unsigned long long result;					\
+  __asm__ volatile ("1: mftb %[current_tb]\n"			\
+      "\tcmpwi 7, %[current_tb], 0\n"				\
+      "\tbeq-  7, 1b"						\
+      : [current_tb] "=r" (result):				\
+      :"cr7");							\
+  result; })
+#else
+#define __mftb() __extension__			\
+  ({ unsigned long long result;			\
+  unsigned long t;				\
+  __asm__ volatile ("1:\n"			\
+		    "\tmftbu %0\n"		\
+		    "\tmftb %L0\n"		\
+		    "\tmftbu %1\n"		\
+		    "\tcmpw %0,%1\n"		\
+		    "\tbne 1b"			\
+		    : "=r" (result), "=r" (t));	\
+  result; })
+#endif /* __powerpc64__ */
+
+#define __dcbf(base) \
+  __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+  
+#define __dcbz(base) \
+  __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __dcbst(base) \
+  __asm__ volatile ("dcbst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __dcbtst(base) \
+  __asm__ volatile ("dcbtst %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __dcbt(base) \
+  __asm__ volatile ("dcbt %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+
+#define __icbi(base) \
+  __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
+  
+#define __dcbt_TH1000(EATRUNC, D, UG, ID)				\
+  __asm__ volatile ("dcbt %y0,8"					\
+	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F)	\
+	   		       | ((((D) & 1) << 6)			\
+	   		       | (((UG) & 1) << 5)			\
+	   		       | ((ID) & 0xF)))) : : "memory")
+
+#define __dcbt_TH1010(GO, S, UNITCNT, T, U, ID)			     \
+  __asm__ volatile ("dcbt %y0,10"				     \
+	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (GO) & 1) << 31) \
+	   		       | (((S) & 0x3) << 29)		     \
+	   		       | (((UNITCNT) & 0x3FF) << 7)	     \
+	   		       | (((T) & 1) << 6)			     \
+	   		       | (((U) & 1) << 5)			     \
+	   		       | ((ID) & 0xF))) : : "memory")
+
+#define __protected_unlimited_stream_set(DIRECTION, ADDR, ID)	\
+	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 1, (ID))
+
+#define __protected_stream_set(DIRECTION, ADDR, ID)	\
+	__dcbt_TH1000 ((ADDR), (DIRECTION)>>1, 0, (ID))
+
+#define __protected_stream_stop_all()			\
+	__dcbt_TH1010 (0, 3, 0, 0, 0, 0)
+
+#define __protected_stream_stop(ID)			\
+	__dcbt_TH1010 (0, 2, 0, 0, 0, (ID))
+
+#define __protected_stream_count(COUNT, ID)		\
+	__dcbt_TH1010 (0, 0, (COUNT), 0, 0, (ID))
+
+#define __protected_stream_go()				\
+	__dcbt_TH1010 (1, 0, 0, 0, 0, 0)
+
+#define __lhbrx(base) __extension__		\
+  ({unsigned short result;	       		\
+    typedef  struct {char a[2];} halfwordsize;	\
+    halfwordsize *ptrp = (halfwordsize*)(void*)(base);	\
+  __asm__ ("lhbrx %0,%y1"			\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+#define __lwbrx(base) __extension__		\
+  ({unsigned int result;	       		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);		\
+  __asm__ ("lwbrx %0,%y1"			\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+
+#ifdef __powerpc64__
+#define __ldbrx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ ("ldbrx %0,%y1"				\
+	   : "=r" (result)				\
+	   : "Z" (*ptrp));				\
+  result; })
+#else
+#define __ldbrx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ ("lwbrx %L0,%y1\n"				\
+	   "\tlwbrx %0,%y2"				\
+	   : "=&r" (result)				\
+	   : "Z" (*ptrp), "Z" (*((char *) ptrp + 4)));	\
+  result; })
+#endif /* __powerpc64__ */
+
+
+#define __sthbrx(base, value) do {			\
+    typedef  struct {char a[2];} halfwordsize;		\
+    halfwordsize *ptrp = (halfwordsize*)(void*)(base);		\
+    __asm__ ("sthbrx %1,%y0"				\
+	   : "=Z" (*ptrp)				\
+	   : "r" (value));				\
+   } while (0)
+
+#define __stwbrx(base, value) do {		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);		\
+    __asm__ ("stwbrx %1,%y0"			\
+	   : "=Z" (*ptrp)			\
+	   : "r" (value));			\
+   } while (0)
+
+#ifdef __powerpc64__
+#define __stdbrx(base, value) do {			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+    __asm__ ("stdbrx %1,%y0"				\
+	   : "=Z" (*ptrp)				\
+	   : "r" (value));				\
+   } while (0)
+#else
+#define __stdbrx(base, value) do {			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+    __asm__ ("stwbrx %L2,%y0\n"				\
+	     "\tstwbrx %2,%y1"				\
+	   : "=Z" (*ptrp), "=Z" (*((char *) ptrp + 4))	\
+	   : "r" (value));				\
+   } while (0)
+#endif /* __powerpc64__ */
+
+
+#define __lwarx(base) __extension__		\
+  ({unsigned int result;	       		\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);	\
+  __asm__ volatile ("lwarx %0,%y1"		\
+	   : "=r" (result)			\
+	   : "Z" (*ptrp));			\
+  result; })
+
+#ifdef __powerpc64__
+#define __ldarx(base) __extension__			\
+  ({unsigned long long result;	       			\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ volatile ("ldarx %0,%y1"			\
+	   : "=r" (result)				\
+	   : "Z" (*ptrp));				\
+  result; })
+#endif /* __powerpc64__ */
+
+#define __stwcx(base, value) __extension__	\
+  ({unsigned int result;			\
+    typedef  struct {char a[4];} wordsize;	\
+    wordsize *ptrp = (wordsize*)(void*)(base);	\
+  __asm__ volatile ("stwcx. %2,%y1\n"		\
+	   "\tmfocrf %0,0x80"			\
+	   : "=r" (result),			\
+	     "=Z" (*ptrp)			\
+	   : "r" (value) : "cr0");		\
+  ((result & 0x20000000) >> 29); })
+
+
+#ifdef __powerpc64__
+#define __stdcx(base, value) __extension__		\
+  ({unsigned long long result;				\
+    typedef  struct {char a[8];} doublewordsize;	\
+    doublewordsize *ptrp = (doublewordsize*)(void*)(base);	\
+  __asm__ volatile ("stdcx. %2,%y1\n"			\
+	   "\tmfocrf %0,0x80"				\
+	   : "=r" (result),				\
+	     "=Z" (*ptrp)				\
+	   : "r" (value) : "cr0");			\
+  ((result & 0x20000000) >> 29); })
+#endif /* __powerpc64__ */
+
+#define __mffs() __extension__			\
+  ({double result;				\
+  __asm__ volatile ("mffs %0" : "=d" (result)); \
+  result; })
+
+#define __mtfsf(mask,value) \
+  __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value)))
+  
+#define __mtfsfi(bits,field) \
+  __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field))
+
+#define __mtfsb0(bit) __asm__ volatile ("mtfsb0 %0" : : "n" (bit))
+#define __mtfsb1(bit) __asm__ volatile ("mtfsb1 %0" : : "n" (bit))
+
+#define __setflm(v) __extension__	      \
+  ({double result;			      \
+  __asm__ volatile ("mffs %0\n\tmtfsf 255,%1" \
+		    : "=&d" (result)	      \
+		    : "d" ((double) (v)));    \
+  result; })
+
+/* __builtin_fabs may perform unnecessary rounding.  */
+
+/* Rename __fabs and __fabsf to work around internal prototypes defined 
+   in bits/mathcalls.h with some glibc versions.  */ 
+#define __fabs __ppu_fabs 
+#define __fabsf __ppu_fabsf 
+
+static __inline__ double __fabs(double x) __attribute__((always_inline));
+static __inline__ double
+__fabs(double x)
+{
+  double r;
+  __asm__("fabs %0,%1" : "=d"(r) : "d"(x));
+  return r;
+}
+
+static __inline__ float __fabsf(float x) __attribute__((always_inline));
+static __inline__ float
+__fabsf(float x)
+{
+  float r;
+  __asm__("fabs %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fnabs(double x) __attribute__((always_inline));
+static __inline__ double
+__fnabs(double x)
+{
+  double r;
+  __asm__("fnabs %0,%1" : "=d"(r) : "d"(x));
+  return r;
+}
+
+static __inline__ float __fnabsf(float x) __attribute__((always_inline));
+static __inline__ float
+__fnabsf(float x)
+{
+  float r;
+  __asm__("fnabs %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fmadd(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fmadd(double x, double y, double z)
+{
+  double r;
+  __asm__("fmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ double __fmsub(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fmsub(double x, double y, double z)
+{
+  double r;
+  __asm__("fmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ double __fnmadd(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fnmadd(double x, double y, double z)
+{
+  double r;
+  __asm__("fnmadd %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ double __fnmsub(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fnmsub(double x, double y, double z)
+{
+  double r;
+  __asm__("fnmsub %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ float __fmadds(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fmadds(float x, float y, float z)
+{
+  float r;
+  __asm__("fmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ float __fmsubs(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fmsubs(float x, float y, float z)
+{
+  float r;
+  __asm__("fmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ float __fnmadds(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fnmadds(float x, float y, float z)
+{
+  float r;
+  __asm__("fnmadds %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ float __fnmsubs(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fnmsubs(float x, float y, float z)
+{
+  float r;
+  __asm__("fnmsubs %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ double __fsel(double x, double y, double z)
+  __attribute__((always_inline));
+static __inline__ double
+__fsel(double x, double y, double z)
+{
+  double r;
+  __asm__("fsel %0,%1,%2,%3" : "=d"(r) : "d"(x),"d"(y),"d"(z));
+  return r;
+}
+
+static __inline__ float __fsels(float x, float y, float z)
+  __attribute__((always_inline));
+static __inline__ float
+__fsels(float x, float y, float z)
+{
+  float r;
+  __asm__("fsel %0,%1,%2,%3" : "=f"(r) : "f"(x),"f"(y),"f"(z));
+  return r;
+}
+
+static __inline__ double __frsqrte(double x) __attribute__((always_inline));
+static __inline__ double
+__frsqrte(double x)
+{
+  double r;
+  __asm__("frsqrte %0,%1" : "=d" (r) : "d" (x));
+  return r;
+}
+
+static __inline__ float __fres(float x) __attribute__((always_inline));
+static __inline__ float
+__fres(float x)
+{
+  float r;
+  __asm__("fres %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fsqrt(double x) __attribute__((always_inline));
+static __inline__ double
+__fsqrt(double x)
+{
+  double r;
+  __asm__("fsqrt %0,%1" : "=d"(r) : "d"(x));
+  return r;
+}
+
+static __inline__ float __fsqrts(float x) __attribute__((always_inline));
+static __inline__ float
+__fsqrts(float x)
+{
+  float r;
+  __asm__("fsqrts %0,%1" : "=f"(r) : "f"(x));
+  return r;
+}
+
+static __inline__ double __fmul (double a, double b) __attribute__ ((always_inline));
+static __inline__ double
+__fmul(double a, double b)
+{
+  double d;
+  __asm__ ("fmul %0,%1,%2" : "=d" (d) : "d" (a), "d" (b));
+  return d;
+}
+
+static __inline__ float __fmuls (float a, float b) __attribute__ ((always_inline));
+static __inline__ float
+__fmuls (float a, float b)
+{
+  float d;
+  __asm__ ("fmuls %0,%1,%2" : "=d" (d) : "f" (a), "f" (b));
+  return d;
+}
+
+static __inline__ float __frsp (float a) __attribute__ ((always_inline));
+static __inline__ float
+__frsp (float a)
+{
+  float d;
+  __asm__ ("frsp %0,%1" : "=d" (d) : "f" (a));
+  return d;
+}
+
+static __inline__ double __fcfid (long long a) __attribute__((always_inline));
+static __inline__ double
+__fcfid (long long a)
+{
+  double d;
+  __asm__ ("fcfid %0,%1" : "=d" (d) : "d" (a));
+  return d;
+}
+
+static __inline__ long long __fctid (double a) __attribute__ ((always_inline));
+static __inline__ long long
+__fctid (double a)
+{
+  long long d;
+  __asm__ ("fctid %0,%1" : "=d" (d) : "d" (a));
+  return d;
+}
+
+static __inline__ long long __fctidz (double a) __attribute__ ((always_inline));
+static __inline__ long long
+__fctidz (double a)
+{
+  long long d;
+  __asm__ ("fctidz %0,%1" : "=d" (d) : "d" (a));
+  return d;
+}
+
+static __inline__ int __fctiw (double a) __attribute__ ((always_inline));
+static __inline__ int
+__fctiw (double a)
+{
+  unsigned long long d;
+  __asm__ ("fctiw %0,%1" : "=d" (d) : "d" (a));
+  return (int) d;
+}
+
+static __inline__ int __fctiwz (double a) __attribute__ ((always_inline));
+static __inline__ int
+__fctiwz (double a)
+{
+  long long d;
+  __asm__ ("fctiwz %0,%1" : "=d" (d) : "d" (a));
+  return (int) d;
+}
+
+#ifdef __powerpc64__
+#define __rldcl(a,b,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldcl %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (mb)); \
+    d; \
+  })
+
+#define __rldcr(a,b,me) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldcr %0,%1,%2,%3" : "=r" (d) : "r" (a), "r" (b), "i" (me)); \
+    d; \
+  })
+
+#define __rldic(a,sh,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldic %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
+    d; \
+  })
+
+#define __rldicl(a,sh,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldicl %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (mb)); \
+    d; \
+  })
+
+#define __rldicr(a,sh,me) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldicr %0,%1,%2,%3" : "=r" (d) : "r" (a), "i" (sh), "i" (me)); \
+    d; \
+  })
+
+#define __rldimi(a,b,sh,mb) __extension__ \
+  ({ \
+    unsigned long long d; \
+    __asm__ ("rldimi %0,%1,%2,%3" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "0" (a)); \
+    d; \
+  })
+#endif /* __powerpc64__ */
+
+#define __rlwimi(a,b,sh,mb,me) __extension__ \
+  ({ \
+    unsigned int d; \
+    __asm__ ("rlwimi %0,%1,%2,%3,%4" : "=r" (d) : "r" (b), "i" (sh), "i" (mb), "i" (me), "0" (a)); \
+    d; \
+  })
+
+#define __rlwinm(a,sh,mb,me) __extension__ \
+  ({ \
+    unsigned int d; \
+    __asm__ ("rlwinm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "i" (sh), "i" (mb), "i" (me)); \
+    d; \
+  })
+
+#define __rlwnm(a,b,mb,me) __extension__ \
+  ({ \
+    unsigned int d; \
+    __asm__ ("rlwnm %0,%1,%2,%3,%4" : "=r" (d) : "r" (a), "r" (b), "i" (mb), "i" (me)); \
+    d; \
+  })
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PPU_INTRINSICS_H */
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
new file mode 100644
index 000000000..90947452b
--- /dev/null
+++ b/gcc/config/rs6000/predicates.md
@@ -0,0 +1,1423 @@
+;; Predicate definitions for POWER and PowerPC.
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 for anything except PARALLEL.
+(define_predicate "any_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,mem"))
+
+;; Return 1 for any PARALLEL.
+(define_predicate "any_parallel_operand"
+  (match_code "parallel"))
+
+;; Return 1 if op is COUNT register.
+(define_predicate "count_register_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CTR_REGNO
+		    || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+  
+;; Return 1 if op is an Altivec register.
+(define_predicate "altivec_register_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || ALTIVEC_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a VSX register.
+(define_predicate "vsx_register_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VSX_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a vector register that operates on floating point vectors
+;; (either altivec or VSX).
+(define_predicate "vfloat_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VFLOAT_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a vector register that operates on integer vectors
+;; (only altivec, VSX doesn't support integer vectors)
+(define_predicate "vint_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VINT_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is a vector register to do logical operations on (and, or,
+;; xor, etc.)
+(define_predicate "vlogical_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || VLOGICAL_REGNO_P (REGNO (op))
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER")))
+
+;; Return 1 if op is the carry register.
+(define_predicate "ca_operand"
+  (and (match_code "reg")
+       (match_test "CA_REGNO_P (REGNO (op))")))
+
+;; Return 1 if op is a signed 5-bit constant integer.
+(define_predicate "s5bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15")))
+
+;; Return 1 if op is a unsigned 5-bit constant integer.
+(define_predicate "u5bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 31")))
+
+;; Return 1 if op is a signed 8-bit constant integer.
+;; Integer multiplication complete more quickly
+(define_predicate "s8bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= -128 && INTVAL (op) <= 127")))
+
+;; Return 1 if op is a constant integer that can fit in a D field.
+(define_predicate "short_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_I (op)")))
+
+;; Return 1 if op is a constant integer that can fit in an unsigned D field.
+(define_predicate "u_short_cint_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_K (op)")))
+
+;; Return 1 if op is a constant integer that cannot fit in a signed D field.
+(define_predicate "non_short_cint_operand"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT)
+		    (INTVAL (op) + 0x8000) >= 0x10000")))
+
+;; Return 1 if op is a positive constant integer that is an exact power of 2.
+(define_predicate "exact_log2_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) > 0 && exact_log2 (INTVAL (op)) >= 0")))
+
+;; Return 1 if op is a register that is not special.
+(define_predicate "gpc_reg_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "(GET_CODE (op) != REG
+		      || (REGNO (op) >= ARG_POINTER_REGNUM
+			  && !CA_REGNO_P (REGNO (op)))
+		      || REGNO (op) < MQ_REGNO)
+		     && !((TARGET_E500_DOUBLE || TARGET_SPE)
+			  && invalid_e500_subreg (op, mode))")))
+
+;; Return 1 if op is a register that is a condition register field.
+(define_predicate "cc_reg_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER
+		     || CR_REGNO_P (REGNO (op))")))
+
+;; Return 1 if op is a register that is a condition register field not cr0.
+(define_predicate "cc_reg_not_cr0_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER
+		     || CR_REGNO_NOT_CR0_P (REGNO (op))")))
+
+;; Return 1 if op is a register that is a condition register field and if generating microcode, not cr0.
+(define_predicate "cc_reg_not_micro_cr0_operand"
+   (and (match_operand 0 "register_operand")
+	(match_test "GET_CODE (op) != REG
+		     || REGNO (op) > LAST_VIRTUAL_REGISTER
+		     || (rs6000_gen_cell_microcode && CR_REGNO_NOT_CR0_P (REGNO (op)))
+		     || (!rs6000_gen_cell_microcode && CR_REGNO_P (REGNO (op)))")))
+
+;; Return 1 if op is a constant integer valid for D field
+;; or non-special register register.
+(define_predicate "reg_or_short_operand"
+  (if_then_else (match_code "const_int")
+    (match_operand 0 "short_cint_operand")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid whose negation is valid for
+;; D field or non-special register register.
+;; Do not allow a constant zero because all patterns that call this
+;; predicate use "addic r1,r2,-const" to set carry when r2 is greater than
+;; or equal to const, which does not work for zero.
+(define_predicate "reg_or_neg_short_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_P (op)
+		 && INTVAL (op) != 0")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid for DS field
+;; or non-special register.
+(define_predicate "reg_or_aligned_short_operand"
+  (if_then_else (match_code "const_int")
+    (and (match_operand 0 "short_cint_operand")
+	 (match_test "!(INTVAL (op) & 3)"))
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer whose high-order 16 bits are zero
+;; or non-special register.
+(define_predicate "reg_or_u_short_operand"
+  (if_then_else (match_code "const_int")
+    (match_operand 0 "u_short_cint_operand")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is any constant integer 
+;; or non-special register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid for addition
+;; or non-special register.
+(define_predicate "reg_or_add_cint_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(HOST_BITS_PER_WIDE_INT == 32
+		  && (mode == SImode || INTVAL (op) < 0x7fff8000))
+		 || ((unsigned HOST_WIDE_INT) (INTVAL (op) + 0x80008000)
+		     < (unsigned HOST_WIDE_INT) 0x100000000ll)")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is a constant integer valid for subtraction
+;; or non-special register.
+(define_predicate "reg_or_sub_cint_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(HOST_BITS_PER_WIDE_INT == 32
+		  && (mode == SImode || - INTVAL (op) < 0x7fff8000))
+		 || ((unsigned HOST_WIDE_INT) (- INTVAL (op) 
+					       + (mode == SImode
+						  ? 0x80000000 : 0x80008000))
+		     < (unsigned HOST_WIDE_INT) 0x100000000ll)")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if op is any 32-bit unsigned constant integer
+;; or non-special register.
+(define_predicate "reg_or_logical_cint_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
+		  && INTVAL (op) >= 0)
+		 || ((INTVAL (op) & GET_MODE_MASK (mode)
+		      & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)")
+    (if_then_else (match_code "const_double")
+      (match_test "GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
+		   && mode == DImode
+		   && CONST_DOUBLE_HIGH (op) == 0")
+      (match_operand 0 "gpc_reg_operand"))))
+
+;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
+;; with no more than one instruction per word.
+(define_predicate "easy_fp_constant"
+  (match_code "const_double")
+{
+  long k[4];
+  REAL_VALUE_TYPE rv;
+
+  if (GET_MODE (op) != mode
+      || (!SCALAR_FLOAT_MODE_P (mode) && mode != DImode))
+    return 0;
+
+  /* Consider all constants with -msoft-float to be easy.  */
+  if ((TARGET_SOFT_FLOAT || TARGET_E500_SINGLE 
+      || (TARGET_HARD_FLOAT && (TARGET_SINGLE_FLOAT && ! TARGET_DOUBLE_FLOAT)))
+      && mode != DImode)
+    return 1;
+
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return 0;
+
+  /* If we are using V.4 style PIC, consider all constants to be hard.  */
+  if (flag_pic && DEFAULT_ABI == ABI_V4)
+    return 0;
+
+#ifdef TARGET_RELOCATABLE
+  /* Similarly if we are using -mrelocatable, consider all constants
+     to be hard.  */
+  if (TARGET_RELOCATABLE)
+    return 0;
+#endif
+
+  switch (mode)
+    {
+    case TFmode:
+      if (TARGET_E500_DOUBLE)
+	return 0;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+      REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
+
+      return (num_insns_constant_wide ((HOST_WIDE_INT) k[0]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[1]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[2]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[3]) == 1);
+
+    case DFmode:
+      /* The constant 0.f is easy under VSX.  */
+      if (op == CONST0_RTX (DFmode) && VECTOR_UNIT_VSX_P (DFmode))
+	return 1;
+
+      /* Force constants to memory before reload to utilize
+	 compress_float_constant.
+	 Avoid this when flag_unsafe_math_optimizations is enabled
+	 because RDIV division to reciprocal optimization is not able
+	 to regenerate the division.  */
+      if (TARGET_E500_DOUBLE
+          || (!reload_in_progress && !reload_completed
+	      && !flag_unsafe_math_optimizations))
+        return 0;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+      REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
+
+      return (num_insns_constant_wide ((HOST_WIDE_INT) k[0]) == 1
+	      && num_insns_constant_wide ((HOST_WIDE_INT) k[1]) == 1);
+
+    case SFmode:
+      /* The constant 0.f is easy.  */
+      if (op == CONST0_RTX (SFmode))
+	return 1;
+
+      /* Force constants to memory before reload to utilize
+	 compress_float_constant.
+	 Avoid this when flag_unsafe_math_optimizations is enabled
+	 because RDIV division to reciprocal optimization is not able
+	 to regenerate the division.  */
+      if (!reload_in_progress && !reload_completed
+          && !flag_unsafe_math_optimizations)
+	return 0;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, k[0]);
+
+      return num_insns_constant_wide (k[0]) == 1;
+
+  case DImode:
+    return ((TARGET_POWERPC64
+	     && GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_LOW (op) == 0)
+	    || (num_insns_constant (op, DImode) <= 2));
+
+  case SImode:
+    return 1;
+
+  default:
+    gcc_unreachable ();
+  }
+})
+
+;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
+;; vector register without using memory.
+(define_predicate "easy_vector_constant"
+  (match_code "const_vector")
+{
+  /* As the paired vectors are actually FPRs it seems that there is
+     no easy way to load a CONST_VECTOR without using memory.  */
+  if (TARGET_PAIRED_FLOAT)
+    return false;
+
+  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+    {
+      if (zero_constant (op, mode))
+	return true;
+
+      return easy_altivec_constant (op, mode);
+    }
+
+  if (SPE_VECTOR_MODE (mode))
+    {
+      int cst, cst2;
+      if (zero_constant (op, mode))
+	return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+        return false;
+
+      /* Limit SPE vectors to 15 bits signed.  These we can generate with:
+	   li r0, CONSTANT1
+	   evmergelo r0, r0, r0
+	   li r0, CONSTANT2
+
+	 I don't know how efficient it would be to allow bigger constants,
+	 considering we'll have an extra 'ori' for every 'li'.  I doubt 5
+	 instructions is better than a 64-bit memory load, but I don't
+	 have the e500 timing specs.  */
+      if (mode == V2SImode)
+	{
+	  cst  = INTVAL (CONST_VECTOR_ELT (op, 0));
+	  cst2 = INTVAL (CONST_VECTOR_ELT (op, 1));
+	  return cst  >= -0x7fff && cst <= 0x7fff
+	         && cst2 >= -0x7fff && cst2 <= 0x7fff;
+	}
+    }
+
+  return false;
+})
+
+;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
+(define_predicate "easy_vector_constant_add_self"
+  (and (match_code "const_vector")
+       (and (match_test "TARGET_ALTIVEC")
+	    (match_test "easy_altivec_constant (op, mode)")))
+{
+  HOST_WIDE_INT val;
+  if (mode == V2DImode || mode == V2DFmode)
+    return 0;
+  val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+  val = ((val & 0xff) ^ 0x80) - 0x80;
+  return EASY_VECTOR_15_ADD_SELF (val);
+})
+
+;; Same as easy_vector_constant but only for EASY_VECTOR_MSB.
+(define_predicate "easy_vector_constant_msb"
+  (and (match_code "const_vector")
+       (and (match_test "TARGET_ALTIVEC")
+	    (match_test "easy_altivec_constant (op, mode)")))
+{
+  HOST_WIDE_INT val;
+  if (mode == V2DImode || mode == V2DFmode)
+    return 0;
+  val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+  return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
+})
+
+;; Return 1 if operand is constant zero (scalars and vectors).
+(define_predicate "zero_constant"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return 1 if operand is 0.0.
+;; or non-special register register field no cr0
+(define_predicate "zero_fp_constant"
+  (and (match_code "const_double")
+       (match_test "SCALAR_FLOAT_MODE_P (mode)
+		    && op == CONST0_RTX (mode)")))
+
+;; Return 1 if the operand is in volatile memory.  Note that during the
+;; RTL generation phase, memory_operand does not return TRUE for volatile
+;; memory references.  So this function allows us to recognize volatile
+;; references where it's safe.
+(define_predicate "volatile_mem_operand"
+  (and (and (match_code "mem")
+	    (match_test "MEM_VOLATILE_P (op)"))
+       (if_then_else (match_test "reload_completed")
+         (match_operand 0 "memory_operand")
+         (if_then_else (match_test "reload_in_progress")
+	   (match_test "strict_memory_address_p (mode, XEXP (op, 0))")
+	   (match_test "memory_address_p (mode, XEXP (op, 0))")))))
+
+;; Return 1 if the operand is an offsettable memory operand.
+(define_predicate "offsettable_mem_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "offsettable_nonstrict_memref_p (op)")))
+
+;; Return 1 if the operand is a memory operand with an address divisible by 4
+(define_predicate "word_offset_memref_operand"
+  (match_operand 0 "memory_operand")
+{
+  /* Address inside MEM.  */
+  op = XEXP (op, 0);
+
+  /* Extract address from auto-inc/dec.  */
+  if (GET_CODE (op) == PRE_INC
+      || GET_CODE (op) == PRE_DEC)
+    op = XEXP (op, 0);
+  else if (GET_CODE (op) == PRE_MODIFY)
+    op = XEXP (op, 1);
+
+  return (GET_CODE (op) != PLUS
+	  || ! REG_P (XEXP (op, 0))
+	  || GET_CODE (XEXP (op, 1)) != CONST_INT
+	  || INTVAL (XEXP (op, 1)) % 4 == 0);
+})
+
+;; Return 1 if the operand is an indexed or indirect memory operand.
+(define_predicate "indexed_or_indirect_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (VECTOR_MEM_ALTIVEC_P (mode)
+      && GET_CODE (op) == AND
+      && GET_CODE (XEXP (op, 1)) == CONST_INT
+      && INTVAL (XEXP (op, 1)) == -16)
+    op = XEXP (op, 0);
+
+  return indexed_or_indirect_address (op, mode);
+})
+
+;; Return 1 if the operand is an indexed or indirect memory operand with an
+;; AND -16 in it, used to recognize when we need to switch to Altivec loads
+;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
+;; while VSX uses the full address and traps)
+(define_predicate "altivec_indexed_or_indirect_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+      && GET_CODE (op) == AND
+      && GET_CODE (XEXP (op, 1)) == CONST_INT
+      && INTVAL (XEXP (op, 1)) == -16)
+    return indexed_or_indirect_address (XEXP (op, 0), mode);
+
+  return 0;
+})
+
+;; Return 1 if the operand is an indexed or indirect address.
+(define_special_predicate "indexed_or_indirect_address"
+  (and (match_test "REG_P (op)
+		    || (GET_CODE (op) == PLUS
+			/* Omit testing REG_P (XEXP (op, 0)).  */
+			&& REG_P (XEXP (op, 1)))")
+       (match_operand 0 "address_operand")))
+
+;; Used for the destination of the fix_truncdfsi2 expander.
+;; If stfiwx will be used, the result goes to memory; otherwise,
+;; we're going to emit a store and a load of a subreg, so the dest is a
+;; register.
+(define_predicate "fix_trunc_dest_operand"
+  (if_then_else (match_test "! TARGET_E500_DOUBLE && TARGET_PPC_GFXOPT")
+   (match_operand 0 "memory_operand")
+   (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if the operand is either a non-special register or can be used
+;; as the operand of a `mode' add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op)
+		 || satisfies_constraint_L (op)")
+    (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if OP is a constant but not a valid add_operand.
+(define_predicate "non_add_cint_operand"
+  (and (match_code "const_int")
+       (match_test "!satisfies_constraint_I (op)
+		    && !satisfies_constraint_L (op)")))
+
+;; Return 1 if the operand is a constant that can be used as the operand
+;; of an OR or XOR.
+(define_predicate "logical_const_operand"
+  (match_code "const_int,const_double")
+{
+  HOST_WIDE_INT opl, oph;
+
+  if (GET_CODE (op) == CONST_INT)
+    {
+      opl = INTVAL (op) & GET_MODE_MASK (mode);
+
+      if (HOST_BITS_PER_WIDE_INT <= 32
+	  && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT && opl < 0)
+	return 0;
+    }
+  else if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      gcc_assert (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT);
+
+      opl = CONST_DOUBLE_LOW (op);
+      oph = CONST_DOUBLE_HIGH (op);
+      if (oph != 0)
+	return 0;
+    }
+  else
+    return 0;
+
+  return ((opl & ~ (unsigned HOST_WIDE_INT) 0xffff) == 0
+	  || (opl & ~ (unsigned HOST_WIDE_INT) 0xffff0000) == 0);
+})
+
+;; Return 1 if the operand is a non-special register or a constant that
+;; can be used as the operand of an OR or XOR.
+(define_predicate "logical_operand"
+  (ior (match_operand 0 "gpc_reg_operand")
+       (match_operand 0 "logical_const_operand")))
+
+;; Return 1 if op is a constant that is not a logical operand, but could
+;; be split into one.
+(define_predicate "non_logical_cint_operand"
+  (and (match_code "const_int,const_double")
+       (and (not (match_operand 0 "logical_operand"))
+	    (match_operand 0 "reg_or_logical_cint_operand"))))
+
+;; Return 1 if op is a constant that can be encoded in a 32-bit mask,
+;; suitable for use with rlwinm (no more than two 1->0 or 0->1
+;; transitions).  Reject all ones and all zeros, since these should have
+;; been optimized away and confuse the making of MB and ME.
+(define_predicate "mask_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  if (TARGET_POWERPC64)
+    {
+      /* Fail if the mask is not 32-bit.  */
+      if (mode == DImode && (c & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0)
+	return 0;
+
+      /* Fail if the mask wraps around because the upper 32-bits of the
+	 mask will all be 1s, contrary to GCC's internal view.  */
+      if ((c & 0x80000001) == 0x80000001)
+	return 0;
+    }
+
+  /* We don't change the number of transitions by inverting,
+     so make sure we start with the LS bit zero.  */
+  if (c & 1)
+    c = ~c;
+
+  /* Reject all zeros or all ones.  */
+  if (c == 0)
+    return 0;
+
+  /* Find the first transition.  */
+  lsb = c & -c;
+
+  /* Invert to look for a second transition.  */
+  c = ~c;
+
+  /* Erase first transition.  */
+  c &= -lsb;
+
+  /* Find the second transition (if any).  */
+  lsb = c & -c;
+
+  /* Match if all the bits above are 1's (or c is zero).  */
+  return c == -lsb;
+})
+
+;; Return 1 for the PowerPC64 rlwinm corner case.
+(define_predicate "mask_operand_wrap"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  if ((c & 0x80000001) != 0x80000001)
+    return 0;
+
+  c = ~c;
+  if (c == 0)
+    return 0;
+
+  lsb = c & -c;
+  c = ~c;
+  c &= -lsb;
+  lsb = c & -c;
+  return c == -lsb;
+})
+
+;; Return 1 if the operand is a constant that is a PowerPC64 mask
+;; suitable for use with rldicl or rldicr (no more than one 1->0 or 0->1
+;; transition).  Reject all zeros, since zero should have been
+;; optimized away and confuses the making of MB and ME.
+(define_predicate "mask64_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  /* Reject all zeros.  */
+  if (c == 0)
+    return 0;
+
+  /* We don't change the number of transitions by inverting,
+     so make sure we start with the LS bit zero.  */
+  if (c & 1)
+    c = ~c;
+
+  /* Find the first transition.  */
+  lsb = c & -c;
+
+  /* Match if all the bits above are 1's (or c is zero).  */
+  return c == -lsb;
+})
+
+;; Like mask64_operand, but allow up to three transitions.  This
+;; predicate is used by insn patterns that generate two rldicl or
+;; rldicr machine insns.
+(define_predicate "mask64_2_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT c, lsb;
+
+  c = INTVAL (op);
+
+  /* Disallow all zeros.  */
+  if (c == 0)
+    return 0;
+
+  /* We don't change the number of transitions by inverting,
+     so make sure we start with the LS bit zero.  */
+  if (c & 1)
+    c = ~c;
+
+  /* Find the first transition.  */
+  lsb = c & -c;
+
+  /* Invert to look for a second transition.  */
+  c = ~c;
+
+  /* Erase first transition.  */
+  c &= -lsb;
+
+  /* Find the second transition.  */
+  lsb = c & -c;
+
+  /* Invert to look for a third transition.  */
+  c = ~c;
+
+  /* Erase second transition.  */
+  c &= -lsb;
+
+  /* Find the third transition (if any).  */
+  lsb = c & -c;
+
+  /* Match if all the bits above are 1's (or c is zero).  */
+  return c == -lsb;
+})
+
+;; Like and_operand, but also match constants that can be implemented
+;; with two rldicl or rldicr insns.
+(define_predicate "and64_2_operand"
+  (ior (match_operand 0 "mask64_2_operand")
+       (if_then_else (match_test "fixed_regs[CR0_REGNO]")
+	 (match_operand 0 "gpc_reg_operand")
+	 (match_operand 0 "logical_operand"))))
+
+;; Return 1 if the operand is either a non-special register or a
+;; constant that can be used as the operand of a logical AND.
+(define_predicate "and_operand"
+  (ior (match_operand 0 "mask_operand")
+       (ior (and (match_test "TARGET_POWERPC64 && mode == DImode")
+		 (match_operand 0 "mask64_operand"))
+            (if_then_else (match_test "fixed_regs[CR0_REGNO]")
+	      (match_operand 0 "gpc_reg_operand")
+	      (match_operand 0 "logical_operand")))))
+
+;; Return 1 if the operand is either a logical operand or a short cint operand.
+(define_predicate "scc_eq_operand"
+  (ior (match_operand 0 "logical_operand")
+       (match_operand 0 "short_cint_operand")))
+
+;; Return 1 if the operand is a general non-special register or memory operand.
+(define_predicate "reg_or_mem_operand"
+     (ior (match_operand 0 "memory_operand")
+	  (ior (and (match_code "mem")
+		    (match_test "macho_lo_sum_memory_operand (op, mode)"))
+	       (ior (match_operand 0 "volatile_mem_operand")
+		    (match_operand 0 "gpc_reg_operand")))))
+
+;; Return 1 if the operand is either an easy FP constant or memory or reg.
+(define_predicate "reg_or_none500mem_operand"
+  (if_then_else (match_code "mem")
+     (and (match_test "!TARGET_E500_DOUBLE")
+	  (ior (match_operand 0 "memory_operand")
+	       (ior (match_test "macho_lo_sum_memory_operand (op, mode)")
+		    (match_operand 0 "volatile_mem_operand"))))
+     (match_operand 0 "gpc_reg_operand")))
+
+;; Return 1 if the operand is CONST_DOUBLE 0, register or memory operand.
+(define_predicate "zero_reg_mem_operand"
+  (ior (match_operand 0 "zero_fp_constant")
+       (match_operand 0 "reg_or_mem_operand")))
+
+;; Return 1 if the operand is a general register or memory operand without
+;; pre_inc or pre_dec or pre_modify, which produces invalid form of PowerPC
+;; lwa instruction.
+(define_predicate "lwa_operand"
+  (match_code "reg,subreg,mem")
+{
+  rtx inner, addr, offset;
+
+  inner = op;
+  if (reload_completed && GET_CODE (inner) == SUBREG)
+    inner = SUBREG_REG (inner);
+
+  if (gpc_reg_operand (inner, mode))
+    return true;
+  if (!memory_operand (inner, mode))
+    return false;
+  addr = XEXP (inner, 0);
+  if (GET_CODE (addr) == PRE_INC
+      || GET_CODE (addr) == PRE_DEC
+      || (GET_CODE (addr) == PRE_MODIFY
+	  && !legitimate_indexed_address_p (XEXP (addr, 1), 0)))
+    return false;
+  if (GET_CODE (addr) == LO_SUM
+      && GET_CODE (XEXP (addr, 0)) == REG
+      && GET_CODE (XEXP (addr, 1)) == CONST)
+    addr = XEXP (XEXP (addr, 1), 0);
+  if (GET_CODE (addr) != PLUS)
+    return true;
+  offset = XEXP (addr, 1);
+  if (GET_CODE (offset) != CONST_INT)
+    return true;
+  return INTVAL (offset) % 4 == 0;
+})
+
+;; Return 1 if the operand, used inside a MEM, is a SYMBOL_REF.
+(define_predicate "symbol_ref_operand"
+  (and (match_code "symbol_ref")
+       (match_test "(mode == VOIDmode || GET_MODE (op) == mode)
+		    && (DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))")))
+
+;; Return 1 if op is an operand that can be loaded via the GOT.
+;; or non-special register register field no cr0
+(define_predicate "got_operand"
+  (match_code "symbol_ref,const,label_ref"))
+
+;; Return 1 if op is a simple reference that can be loaded via the GOT,
+;; excluding labels involving addition.
+(define_predicate "got_no_const_operand"
+  (match_code "symbol_ref,label_ref"))
+
+;; Return 1 if op is a SYMBOL_REF for a TLS symbol.
+(define_predicate "rs6000_tls_symbol_ref"
+  (and (match_code "symbol_ref")
+       (match_test "RS6000_SYMBOL_REF_TLS_P (op)")))
+
+;; Return 1 if the operand, used inside a MEM, is a valid first argument
+;; to CALL.  This is a SYMBOL_REF, a pseudo-register, LR or CTR.
+(define_predicate "call_operand"
+  (if_then_else (match_code "reg")
+     (match_test "REGNO (op) == LR_REGNO
+		  || REGNO (op) == CTR_REGNO
+		  || REGNO (op) >= FIRST_PSEUDO_REGISTER")
+     (match_code "symbol_ref")))
+
+;; Return 1 if the operand is a SYMBOL_REF for a function known to be in
+;; this file.
+(define_predicate "current_file_function_operand"
+  (and (match_code "symbol_ref")
+       (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
+		    && ((SYMBOL_REF_LOCAL_P (op)
+			 && (DEFAULT_ABI != ABI_AIX
+			     || !SYMBOL_REF_EXTERNAL_P (op)))
+		        || (op == XEXP (DECL_RTL (current_function_decl),
+						  0)))")))
+
+;; Return 1 if this operand is a valid input for a move insn.
+(define_predicate "input_operand"
+  (match_code "label_ref,symbol_ref,const,high,reg,subreg,mem,
+	       const_double,const_vector,const_int,plus")
+{
+  /* Memory is always valid.  */
+  if (memory_operand (op, mode))
+    return 1;
+
+  /* For floating-point, easy constants are valid.  */
+  if (SCALAR_FLOAT_MODE_P (mode)
+      && CONSTANT_P (op)
+      && easy_fp_constant (op, mode))
+    return 1;
+
+  /* Allow any integer constant.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && (GET_CODE (op) == CONST_INT
+	  || GET_CODE (op) == CONST_DOUBLE))
+    return 1;
+
+  /* Allow easy vector constants.  */
+  if (GET_CODE (op) == CONST_VECTOR
+      && easy_vector_constant (op, mode))
+    return 1;
+
+  /* Do not allow invalid E500 subregs.  */
+  if ((TARGET_E500_DOUBLE || TARGET_SPE)
+      && GET_CODE (op) == SUBREG
+      && invalid_e500_subreg (op, mode))
+    return 0;
+
+  /* For floating-point or multi-word mode, the only remaining valid type
+     is a register.  */
+  if (SCALAR_FLOAT_MODE_P (mode)
+      || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return register_operand (op, mode);
+
+  /* The only cases left are integral modes one word or smaller (we
+     do not get called for MODE_CC values).  These can be in any
+     register.  */
+  if (register_operand (op, mode))
+    return 1;
+
+  /* A SYMBOL_REF referring to the TOC is valid.  */
+  if (legitimate_constant_pool_address_p (op, mode, false))
+    return 1;
+
+  /* A constant pool expression (relative to the TOC) is valid */
+  if (toc_relative_expr_p (op))
+    return 1;
+
+  /* V.4 allows SYMBOL_REFs and CONSTs that are in the small data region
+     to be valid.  */
+  if (DEFAULT_ABI == ABI_V4
+      && (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST)
+      && small_data_operand (op, Pmode))
+    return 1;
+
+  return 0;
+})
+
+;; Return true if OP is an invalid SUBREG operation on the e500.
+(define_predicate "rs6000_nonimmediate_operand"
+  (match_code "reg,subreg,mem")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE)
+      && GET_CODE (op) == SUBREG
+      && invalid_e500_subreg (op, mode))
+    return 0;
+
+  return nonimmediate_operand (op, mode);
+})
+
+;; Return true if operand is boolean operator.
+(define_predicate "boolean_operator"
+  (match_code "and,ior,xor"))
+
+;; Return true if operand is OR-form of boolean operator.
+(define_predicate "boolean_or_operator"
+  (match_code "ior,xor"))
+
+;; Return true if operand is an equality operator.
+(define_special_predicate "equality_operator"
+  (match_code "eq,ne"))
+
+;; Return true if operand is MIN or MAX operator.
+(define_predicate "min_max_operator"
+  (match_code "smin,smax,umin,umax"))
+
+;; Return 1 if OP is a comparison operation that is valid for a branch
+;; instruction.  We check the opcode against the mode of the CC value.
+;; validate_condition_mode is an assertion.
+(define_predicate "branch_comparison_operator"
+   (and (match_operand 0 "comparison_operator")
+	(and (match_test "GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC")
+	     (match_test "validate_condition_mode (GET_CODE (op),
+						   GET_MODE (XEXP (op, 0))),
+			  1"))))
+
+(define_predicate "rs6000_cbranch_operator"
+  (if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS")
+		(match_operand 0 "ordered_comparison_operator")
+		(match_operand 0 "comparison_operator")))
+
+;; Return 1 if OP is a comparison operation that is valid for an SCC insn --
+;; it must be a positive comparison.
+(define_predicate "scc_comparison_operator"
+  (and (match_operand 0 "branch_comparison_operator")
+       (match_code "eq,lt,gt,ltu,gtu,unordered")))
+
+;; Return 1 if OP is a comparison operation whose inverse would be valid for
+;; an SCC insn.
+(define_predicate "scc_rev_comparison_operator"
+  (and (match_operand 0 "branch_comparison_operator")
+       (match_code "ne,le,ge,leu,geu,ordered")))
+
+;; Return 1 if OP is a comparison operation that is valid for a branch
+;; insn, which is true if the corresponding bit in the CC register is set.
+(define_predicate "branch_positive_comparison_operator"
+  (and (match_operand 0 "branch_comparison_operator")
+       (match_code "eq,lt,gt,ltu,gtu,unordered")))
+
+;; Return 1 if OP is a load multiple operation, known to be a PARALLEL.
+(define_predicate "load_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx src_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt)) != dest_regno + i
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is a store multiple operation, known to be a PARALLEL.
+;; The second vector element is a CLOBBER.
+(define_predicate "store_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0) - 1;
+  unsigned int src_regno;
+  rtx dest_addr;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i + 1);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != src_regno + i
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	  || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != i * 4)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is valid for a save_world call in prologue, known to be
+;; a PARLLEL.
+(define_predicate "save_world_operation"
+  (match_code "parallel")
+{
+  int index;
+  int i;
+  rtx elt;
+  int count = XVECLEN (op, 0);
+
+  if (count != 54)
+    return 0;
+
+  index = 0;
+  if (GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE)
+    return 0;
+
+  for (i=1; i <= 18; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || ! memory_operand (SET_DEST (elt), DFmode)
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != DFmode)
+	return 0;
+    }
+
+  for (i=1; i <= 12; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != V4SImode)
+	return 0;
+    }
+
+  for (i=1; i <= 19; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || ! memory_operand (SET_DEST (elt), Pmode)
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != Pmode)
+	return 0;
+    }
+
+  elt = XVECEXP (op, 0, index++);
+  if (GET_CODE (elt) != SET
+      || GET_CODE (SET_DEST (elt)) != MEM
+      || ! memory_operand (SET_DEST (elt), Pmode)
+      || GET_CODE (SET_SRC (elt)) != REG
+      || REGNO (SET_SRC (elt)) != CR2_REGNO
+      || GET_MODE (SET_SRC (elt)) != Pmode)
+    return 0;
+
+  if (GET_CODE (XVECEXP (op, 0, index++)) != SET
+      || GET_CODE (XVECEXP (op, 0, index++)) != SET)
+    return 0;
+  return 1;
+})
+
+;; Return 1 if OP is valid for a restore_world call in epilogue, known to be
+;; a PARLLEL.
+(define_predicate "restore_world_operation"
+  (match_code "parallel")
+{
+  int index;
+  int i;
+  rtx elt;
+  int count = XVECLEN (op, 0);
+
+  if (count != 59)
+    return 0;
+
+  index = 0;
+  if (GET_CODE (XVECEXP (op, 0, index++)) != RETURN
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER)
+    return 0;
+
+  elt = XVECEXP (op, 0, index++);
+  if (GET_CODE (elt) != SET
+      || GET_CODE (SET_SRC (elt)) != MEM
+      || ! memory_operand (SET_SRC (elt), Pmode)
+      || GET_CODE (SET_DEST (elt)) != REG
+      || REGNO (SET_DEST (elt)) != CR2_REGNO
+      || GET_MODE (SET_DEST (elt)) != Pmode)
+    return 0;
+
+  for (i=1; i <= 19; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || ! memory_operand (SET_SRC (elt), Pmode)
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != Pmode)
+	return 0;
+    }
+
+  for (i=1; i <= 12; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != V4SImode)
+	return 0;
+    }
+
+  for (i=1; i <= 18; i++)
+    {
+      elt = XVECEXP (op, 0, index++);
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || ! memory_operand (SET_SRC (elt), DFmode)
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != DFmode)
+	return 0;
+    }
+
+  if (GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != CLOBBER
+      || GET_CODE (XVECEXP (op, 0, index++)) != USE)
+    return 0;
+  return 1;
+})
+
+;; Return 1 if OP is valid for a vrsave call, known to be a PARALLEL.
+(define_predicate "vrsave_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno, src_regno;
+  int i;
+
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC_VOLATILE
+      || XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPECV_SET_VRSAVE)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_regno  = REGNO (XVECEXP (SET_SRC (XVECEXP (op, 0, 0)), 0, 1));
+
+  if (dest_regno != VRSAVE_REGNO || src_regno != VRSAVE_REGNO)
+    return 0;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != CLOBBER
+	  && GET_CODE (elt) != SET)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is valid for mfcr insn, known to be a PARALLEL.
+(define_predicate "mfcr_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count < 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC
+      || XVECLEN (SET_SRC (XVECEXP (op, 0, 0)), 0) != 2)
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+      rtx unspec;
+      int maskval;
+      rtx src_reg;
+
+      src_reg = XVECEXP (SET_SRC (exp), 0, 0);
+
+      if (GET_CODE (src_reg) != REG
+	  || GET_MODE (src_reg) != CCmode
+	  || ! CR_REGNO_P (REGNO (src_reg)))
+	return 0;
+
+      if (GET_CODE (exp) != SET
+	  || GET_CODE (SET_DEST (exp)) != REG
+	  || GET_MODE (SET_DEST (exp)) != SImode
+	  || ! INT_REGNO_P (REGNO (SET_DEST (exp))))
+	return 0;
+      unspec = SET_SRC (exp);
+      maskval = 1 << (MAX_CR_REGNO - REGNO (src_reg));
+
+      if (GET_CODE (unspec) != UNSPEC
+	  || XINT (unspec, 1) != UNSPEC_MOVESI_FROM_CR
+	  || XVECLEN (unspec, 0) != 2
+	  || XVECEXP (unspec, 0, 0) != src_reg
+	  || GET_CODE (XVECEXP (unspec, 0, 1)) != CONST_INT
+	  || INTVAL (XVECEXP (unspec, 0, 1)) != maskval)
+	return 0;
+    }
+  return 1;
+})
+
+;; Return 1 if OP is valid for mtcrf insn, known to be a PARALLEL.
+(define_predicate "mtcrf_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+  rtx src_reg;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count < 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC
+      || XVECLEN (SET_SRC (XVECEXP (op, 0, 0)), 0) != 2)
+    return 0;
+  src_reg = XVECEXP (SET_SRC (XVECEXP (op, 0, 0)), 0, 0);
+
+  if (GET_CODE (src_reg) != REG
+      || GET_MODE (src_reg) != SImode
+      || ! INT_REGNO_P (REGNO (src_reg)))
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+      rtx unspec;
+      int maskval;
+
+      if (GET_CODE (exp) != SET
+	  || GET_CODE (SET_DEST (exp)) != REG
+	  || GET_MODE (SET_DEST (exp)) != CCmode
+	  || ! CR_REGNO_P (REGNO (SET_DEST (exp))))
+	return 0;
+      unspec = SET_SRC (exp);
+      maskval = 1 << (MAX_CR_REGNO - REGNO (SET_DEST (exp)));
+
+      if (GET_CODE (unspec) != UNSPEC
+	  || XINT (unspec, 1) != UNSPEC_MOVESI_TO_CR
+	  || XVECLEN (unspec, 0) != 2
+	  || XVECEXP (unspec, 0, 0) != src_reg
+	  || GET_CODE (XVECEXP (unspec, 0, 1)) != CONST_INT
+	  || INTVAL (XVECEXP (unspec, 0, 1)) != maskval)
+	return 0;
+    }
+  return 1;
+})
+
+;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL.
+(define_predicate "lmw_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx src_addr;
+  unsigned int base_regno;
+  HOST_WIDE_INT offset;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+
+  if (dest_regno > 31
+      || count != 32 - (int) dest_regno)
+    return 0;
+
+  if (legitimate_indirect_address_p (src_addr, 0))
+    {
+      offset = 0;
+      base_regno = REGNO (src_addr);
+      if (base_regno == 0)
+	return 0;
+    }
+  else if (rs6000_legitimate_offset_address_p (SImode, src_addr, 0))
+    {
+      offset = INTVAL (XEXP (src_addr, 1));
+      base_regno = REGNO (XEXP (src_addr, 0));
+    }
+  else
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      rtx newaddr;
+      rtx addr_reg;
+      HOST_WIDE_INT newoffset;
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != SImode
+	  || REGNO (SET_DEST (elt)) != dest_regno + i
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_MODE (SET_SRC (elt)) != SImode)
+	return 0;
+      newaddr = XEXP (SET_SRC (elt), 0);
+      if (legitimate_indirect_address_p (newaddr, 0))
+	{
+	  newoffset = 0;
+	  addr_reg = newaddr;
+	}
+      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, 0))
+	{
+	  addr_reg = XEXP (newaddr, 0);
+	  newoffset = INTVAL (XEXP (newaddr, 1));
+	}
+      else
+	return 0;
+      if (REGNO (addr_reg) != base_regno
+	  || newoffset != offset + 4 * i)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return 1 if OP is valid for stmw insn, known to be a PARALLEL.
+(define_predicate "stmw_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx dest_addr;
+  unsigned int base_regno;
+  HOST_WIDE_INT offset;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+
+  if (src_regno > 31
+      || count != 32 - (int) src_regno)
+    return 0;
+
+  if (legitimate_indirect_address_p (dest_addr, 0))
+    {
+      offset = 0;
+      base_regno = REGNO (dest_addr);
+      if (base_regno == 0)
+	return 0;
+    }
+  else if (rs6000_legitimate_offset_address_p (SImode, dest_addr, 0))
+    {
+      offset = INTVAL (XEXP (dest_addr, 1));
+      base_regno = REGNO (XEXP (dest_addr, 0));
+    }
+  else
+    return 0;
+
+  for (i = 0; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+      rtx newaddr;
+      rtx addr_reg;
+      HOST_WIDE_INT newoffset;
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != SImode
+	  || REGNO (SET_SRC (elt)) != src_regno + i
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != SImode)
+	return 0;
+      newaddr = XEXP (SET_DEST (elt), 0);
+      if (legitimate_indirect_address_p (newaddr, 0))
+	{
+	  newoffset = 0;
+	  addr_reg = newaddr;
+	}
+      else if (rs6000_legitimate_offset_address_p (SImode, newaddr, 0))
+	{
+	  addr_reg = XEXP (newaddr, 0);
+	  newoffset = INTVAL (XEXP (newaddr, 1));
+	}
+      else
+	return 0;
+      if (REGNO (addr_reg) != base_regno
+	  || newoffset != offset + 4 * i)
+	return 0;
+    }
+
+  return 1;
+})
diff --git a/gcc/config/rs6000/rios1.md b/gcc/config/rs6000/rios1.md
new file mode 100644
index 000000000..9ad9ce3e1
--- /dev/null
+++ b/gcc/config/rs6000/rios1.md
@@ -0,0 +1,191 @@
+;; Scheduling description for IBM POWER processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "rios1,rios1fp")
+(define_cpu_unit "iu_rios1" "rios1")
+(define_cpu_unit "fpu_rios1" "rios1fp")
+(define_cpu_unit "bpu_rios1" "rios1")
+
+;; RIOS1  32-bit IU, FPU, BPU
+
+(define_insn_reservation "rios1-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+		        load_l,store_c,sync")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1")
+
+(define_insn_reservation "rios1-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1")
+
+(define_insn_reservation "rios1-fpload" 2
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1")
+
+(define_insn_reservation "ppc601-fpload" 3
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "ppc601"))
+  "iu_rios1")
+
+(define_insn_reservation "rios1-fpstore" 3
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1+fpu_rios1")
+
+(define_insn_reservation "rios1-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,\
+                        trap,var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1")
+
+(define_insn_reservation "rios1-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1,iu_rios1")
+
+(define_insn_reservation "rios1-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1,iu_rios1,iu_rios1")
+
+(define_insn_reservation "rios1-imul" 5
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1*5")
+
+(define_insn_reservation "rios1-imul2" 4
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1*4")
+
+(define_insn_reservation "rios1-imul3" 3
+  (and (eq_attr "type" "imul")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1*3")
+
+(define_insn_reservation "ppc601-imul" 5
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "ppc601"))
+  "iu_rios1*5")
+
+(define_insn_reservation "rios1-idiv" 19
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1*19")
+
+(define_insn_reservation "ppc601-idiv" 36
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "ppc601"))
+  "iu_rios1*36")
+
+; compare executes on integer unit, but feeds insns which
+; execute on the branch unit.
+(define_insn_reservation "rios1-compare" 4
+  (and (eq_attr "type" "cmp,fast_compare,compare")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1,nothing*2,bpu_rios1")
+
+(define_insn_reservation "rios1-delayed_compare" 5
+  (and (eq_attr "type" "delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1,nothing*3,bpu_rios1")
+
+(define_insn_reservation "ppc601-compare" 3
+  (and (eq_attr "type" "cmp,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "ppc601"))
+  "iu_rios1,nothing,bpu_rios1")
+
+(define_insn_reservation "rios1-fpcompare" 9
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "rios1"))
+  "fpu_rios1,nothing*3,bpu_rios1")
+
+(define_insn_reservation "ppc601-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "ppc601"))
+  "(fpu_rios1+iu_rios1*2),nothing*2,bpu_rios1")
+
+(define_insn_reservation "rios1-fp" 2
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "rios1"))
+  "fpu_rios1")
+
+(define_insn_reservation "ppc601-fp" 4
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_rios1")
+
+(define_insn_reservation "rios1-dmul" 5
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_rios1*2")
+
+(define_insn_reservation "rios1-sdiv" 19
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "rios1"))
+  "fpu_rios1*19")
+
+(define_insn_reservation "ppc601-sdiv" 17
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_rios1*17")
+
+(define_insn_reservation "ppc601-ddiv" 31
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc601"))
+  "fpu_rios1*31")
+
+(define_insn_reservation "rios1-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1,bpu_rios1")
+
+(define_insn_reservation "rios1-mtcr" 4
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1,bpu_rios1")
+
+(define_insn_reservation "rios1-crlogical" 4
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "bpu_rios1")
+
+(define_insn_reservation "rios1-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "rios1"))
+  "iu_rios1,bpu_rios1")
+
+(define_insn_reservation "ppc601-mtjmpr" 4
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "ppc601"))
+  "iu_rios1,bpu_rios1")
+
+(define_insn_reservation "rios1-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "iu_rios1,bpu_rios1")
+
+(define_insn_reservation "rios1-branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "rios1,ppc601"))
+  "bpu_rios1")
+
diff --git a/gcc/config/rs6000/rios2.md b/gcc/config/rs6000/rios2.md
new file mode 100644
index 000000000..96633af2f
--- /dev/null
+++ b/gcc/config/rs6000/rios2.md
@@ -0,0 +1,129 @@
+;; Scheduling description for IBM Power2 processor.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "rios2,rios2fp")
+(define_cpu_unit "iu1_rios2,iu2_rios2" "rios2")
+(define_cpu_unit "fpu1_rios2,fpu2_rios2" "rios2fp")
+(define_cpu_unit "bpu_rios2" "rios2")
+
+;; RIOS2 32-bit 2xIU, 2xFPU, BPU
+;; IU1 can perform all integer operations
+;; IU2 can perform all integer operations except imul and idiv
+
+(define_insn_reservation "rios2-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,\
+		        load_ux,load_u,fpload,fpload_ux,fpload_u,\
+			load_l,store_c,sync")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2|iu2_rios2")
+
+(define_insn_reservation "rios2-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2|iu2_rios2")
+
+(define_insn_reservation "rios2-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                         var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2|iu2_rios2")
+
+(define_insn_reservation "rios2-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2|iu2_rios2,iu1_rios2|iu2_rios2")
+
+(define_insn_reservation "rios2-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2|iu2_rios2,iu1_rios2|iu2_rios2,iu1_rios2|iu2_rios2")
+
+(define_insn_reservation "rios2-imul" 2
+  (and (eq_attr "type" "imul,imul2,imul3,imul_compare")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2*2")
+
+(define_insn_reservation "rios2-idiv" 13
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2*13")
+
+; compare executes on integer unit, but feeds insns which
+; execute on the branch unit.
+(define_insn_reservation "rios2-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,delayed_compare,\
+                        var_delayed_compare")
+       (eq_attr "cpu" "rios2"))
+  "(iu1_rios2|iu2_rios2),nothing,bpu_rios2")
+
+(define_insn_reservation "rios2-fp" 2
+  (and (eq_attr "type" "fp")
+       (eq_attr "cpu" "rios2"))
+  "fpu1_rios2|fpu2_rios2")
+
+(define_insn_reservation "rios2-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "rios2"))
+  "(fpu1_rios2|fpu2_rios2),nothing*3,bpu_rios2")
+
+(define_insn_reservation "rios2-dmul" 2
+  (and (eq_attr "type" "dmul")
+       (eq_attr "cpu" "rios2"))
+  "fpu1_rios2|fpu2_rios2")
+
+(define_insn_reservation "rios2-sdiv" 17
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "rios2"))
+  "(fpu1_rios2*17)|(fpu2_rios2*17)")
+
+(define_insn_reservation "rios2-ssqrt" 26
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "rios2"))
+  "(fpu1_rios2*26)|(fpu2_rios2*26)")
+
+(define_insn_reservation "rios2-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2,bpu_rios2")
+
+(define_insn_reservation "rios2-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2,bpu_rios2")
+
+(define_insn_reservation "rios2-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "rios2"))
+  "bpu_rios2")
+
+(define_insn_reservation "rios2-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2,bpu_rios2")
+
+(define_insn_reservation "rios2-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "rios2"))
+  "iu1_rios2,bpu_rios2")
+
+(define_insn_reservation "rios2-branch" 1
+  (and (eq_attr "type" "jmpreg,branch,isync")
+       (eq_attr "cpu" "rios2"))
+  "bpu_rios2")
+
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
new file mode 100644
index 000000000..7bd6b20fa
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -0,0 +1,1020 @@
+/* Builtin functions for rs6000/powerpc.
+   Copyright (C) 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Before including this file, two macros must be defined:
+   RS6000_BUILTIN	 -- 2 arguments, the enum name, and classification
+   RS6000_BUILTIN_EQUATE -- 2 arguments, enum name and value */
+
+/* AltiVec builtins.  */
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4si,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4si,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_8hi,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_8hi,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_16qi,	RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_16qi,	RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4sf,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4sf,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2df,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2df,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2di,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2di,		RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDCUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBS,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDSBS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDSHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDSWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAND,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VANDC,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VAVGSW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCFUX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCFSX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCTSXS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCTUXS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPBFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGEFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEXPTEFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VLOGEFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMADDFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXSW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMAXFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMHADDSHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMHRADDSHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMLADDUHM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGHB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGHH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGHW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGLB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGLH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMRGLW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMUBM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMMBM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMUHM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMSHM,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMUHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMSUMSHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINSW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMINFP,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUB_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULESB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULEUH_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULESH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUB_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOUH_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VMULOSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VNMSUBFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VNOR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VOR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSEL_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPERM_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUHUM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUWUM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKPX,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUHSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSHSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUWSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSWSS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUHUS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSHUS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKUWUS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VPKSWUS,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VREFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIN,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRSQRTFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRSQRTEFP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSL,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLO,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTISB,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTISH,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSPLTISW,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRAB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRAH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRAW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSRO,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUBM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUHM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUWM,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBFP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBCUW,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUBS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBSBS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBSHS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBUWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUBSWS,			RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM4UBS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM4SBS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM4SHS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUM2SWS,		RS6000_BTC_SAT)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSUMSWS,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VXOR,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLDOI_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKHSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKHPX,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKHSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKLSB,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKLPX,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VUPKLSH,			RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MTVSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MFVSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSSALL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVSL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVSR,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSTT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSTST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DSTSTT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_DST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVEBX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVEHX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVEWX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVLX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVLXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVRX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_LVRXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVEBX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVEHX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVEWX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVLX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVLXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVRX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_STVRXL,			RS6000_BTC_MEM)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPBFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUB_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUH_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQUW_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGEFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTFP_P,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSB_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSH_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTSW_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUB_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUH_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGTUW_P,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABSS_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABSS_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABSS_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_ABS_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MASK_FOR_LOAD,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_MASK_FOR_STORE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INIT_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SET_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_COPYSIGN_V4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VRECIPFP,		RS6000_BTC_FP_PURE)
+
+/* Altivec overloaded builtins.  */
+/* For now, don't set the classification for overloaded functions.
+   The function should be converted to the type specific instruction
+   before we get to the point about classifying the builtin type.  */
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPEQ_P,		RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(ALTIVEC_BUILTIN_OVERLOADED_FIRST,
+		      ALTIVEC_BUILTIN_VCMPEQ_P)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGT_P,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VCMPGE_P,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ABSS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ADDC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ADDS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_AND,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ANDC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_AVG,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXTRACT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CEIL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQ,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPEQUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPGE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPGT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPLE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CMPLT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_COPYSIGN,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CTF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CTS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_CTU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DSTST,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DSTSTT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_DSTT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXPTE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_FLOOR,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LDE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LDL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LOGE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVEBX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVEHX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVEWX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVLX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVLXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVRX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVRXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVSL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_LVSR,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MADD,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MADDS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MAX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MERGEH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MERGEL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MIN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MLADD,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MPERM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRADDS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGHB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGHH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGHW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MRGLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MSUM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MSUMS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MTVSCR,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MULE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_MULO,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_NEARBYINT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_NMSUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_NOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_OR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACK,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKPX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKSU,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PERM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RECIP,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RINT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ROUND,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RSQRT,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RSQRTE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SEL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_S16,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_S32,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_S8,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_U16,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_U32,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLAT_U8,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLTB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLTH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLTW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SQRT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SRA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SRL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SRO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ST,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVEBX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVEHX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVEWX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVLX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVLXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVRX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STVRXL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUBC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUM2S,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUM4S,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SUMS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_TRUNC,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_UNPACKH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_UNPACKL,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDSBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDSHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDSWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUWM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VADDUWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VAVGUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCFSX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCFUX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPEQUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VCMPGTUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMAXUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINSW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMINUW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGHB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGHH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGHW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMRGLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMMBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMSHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMSHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMUBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMUHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMSUMUHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULESB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULESH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULEUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULEUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOUB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VMULOUH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSHSS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSHUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSWSS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKSWUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUHUM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUHUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUWUM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VPKUWUS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VRLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VRLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VRLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSLB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSLH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSLW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSPLTB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSPLTH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSPLTW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRAB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRAH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSRW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBFP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBSBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBSHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBSWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUBM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUHM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUWM,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUBUWS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUM4SBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUM4SHS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VSUM4UBS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKHPX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKHSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKHSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKLPX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKLSB,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_VUPKLSH,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_XOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_STEP,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PROMOTE,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_INSERT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SPLATS,		RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(ALTIVEC_BUILTIN_OVERLOADED_LAST,
+		      ALTIVEC_BUILTIN_VEC_SPLATS)
+
+/* SPE builtins.  */
+RS6000_BUILTIN(SPE_BUILTIN_EVADDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVAND,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVANDC,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVDIVWS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVDIVWU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVEQV,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSDIV,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSMUL,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDDX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDHX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDWX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHESPLATX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOSSPLATX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOUSPLATX,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHEX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOSX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOUX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHSPLATX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWWSPLATX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGEHI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGEHILO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGELO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMERGELOHI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEGUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHESSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHEUSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOGUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSFAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSFANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOSSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMHOUSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLSSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUMIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWLUSIANW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSFA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWSSFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMIA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVNAND,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVNOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVORC,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVRLW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSLW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDDX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDHX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDWX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHEX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHOX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWEX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWOX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVXOR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDSMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDSSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDUMIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDUSIAAW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCNTLSW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCNTLZW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVEXTSB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVEXTSH,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFSI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFUF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCFUI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTSF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTSI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTSIZ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTUF,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTUI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCTUIZ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSNABS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSNEG,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMRA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVNEG,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVRNDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFSMIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFSSIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFUMIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBFUSIAAW,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVADDIW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDH,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHESPLAT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOSSPLAT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLHHOUSPLAT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHOU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWHSPLAT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVLWWSPLAT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVRLWI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSLWI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWIS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSRWIU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDH,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTDW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWHO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWE,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSTWWO,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSUBIFW,			RS6000_BTC_MISC)
+
+  /* Compares.  */
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPGTS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPGTU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPLTS,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVCMPLTU,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCMPEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCMPGT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSCMPLT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSTSTEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSTSTGT,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVFSTSTLT,			RS6000_BTC_MISC)
+
+/* EVSEL compares.  */
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPEQ,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPGTS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPGTU,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPLTS,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_CMPLTU,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSCMPEQ,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSCMPGT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSCMPLT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSTSTEQ,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSTSTGT,		RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSEL_FSTSTLT,		RS6000_BTC_MISC)
+
+RS6000_BUILTIN(SPE_BUILTIN_EVSPLATFI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVSPLATI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSMAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUSIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSSIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUSIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSSFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMFAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGUMIAA,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSSFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMFAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGSMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_EVMWHGUMIAN,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_MTSPEFSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_MFSPEFSCR,			RS6000_BTC_MISC)
+RS6000_BUILTIN(SPE_BUILTIN_BRINC,			RS6000_BTC_MISC)
+
+/* PAIRED builtins.  */
+RS6000_BUILTIN(PAIRED_BUILTIN_DIVV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_ABSV2SF2,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NEGV2SF2,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SQRTV2SF2,		RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_ADDV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SUBV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_RESV2SF2,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MULV2SF3,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NMSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NMADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_NABSV2SF2,		RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SUM0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SUM1,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MULS0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MULS1,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE00,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE01,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE10,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MERGE11,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MADDS0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_MADDS1,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_STX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_LX,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_SELV2SF4,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_CMPU0,			RS6000_BTC_MISC)
+RS6000_BUILTIN(PAIRED_BUILTIN_CMPU1,			RS6000_BTC_MISC)
+
+  /* VSX builtins.  */
+RS6000_BUILTIN(VSX_BUILTIN_LXSDX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DF,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DI,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVDSX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SF,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SI,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V8HI,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V16QI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXSDX,			RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DF,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SF,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V8HI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V16QI,		RS6000_BTC_MEM)
+RS6000_BUILTIN(VSX_BUILTIN_XSABSDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XSADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCMPODP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCMPUDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCPSGNDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPSXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPSXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPUXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVDPUXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVSPDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVSXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSCVUXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSDIVDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMADDADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMADDMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMAXDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMINDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMOVDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMSUBADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMSUBMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSMULDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNABSDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNEGDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMADDADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMADDMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMSUBADP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSNMSUBMDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPI,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIC,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRDPIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSREDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSRSQRTEDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSSQRTDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_CPSGNDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_CPSGNSP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XSTDIVDP_FE,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSTDIVDP_FG,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSTSQRTDP_FE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XSTSQRTDP_FG,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVABSDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVABSSP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVADDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGEDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGESP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQDP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPEQSP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGEDP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGESP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTDP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCMPGTSP_P,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCPSGNDP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVCPSGNSP,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPSXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPSXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPUXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPUXDS_UNS,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVDPUXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPSXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPSXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPUXDS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSPUXWS,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXWDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVSXWSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXDDP_UNS,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXWDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVCVUXWSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVDIVDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVDIVSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMADDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMAXDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMAXSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMINDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMINSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMSUBSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMULDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVMULSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNABSDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNABSSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNEGDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNEGSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMADDDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMADDSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVNMSUBSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPI,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIC,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRDPIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVREDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRESP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPI,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIC,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIM,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSPIZ,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSQRTEDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVRSQRTESP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSQRTDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSQRTSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSUBDP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVSUBSP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVDP_FE,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVDP_FG,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVSP_FE,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTDIVSP_FG,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTDP_FE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTDP_FG,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTSP_FE,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XVTSQRTSP_FG,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_8HI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_16QI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSEL_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_8HI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_16QI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_2DI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_4SI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_8HI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VPERM_16QI_UNS,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_4SF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_4SI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_8HI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXPERMDI_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_CONCAT_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_CONCAT_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_SET_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_SET_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_SPLAT_2DF,			RS6000_BTC_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_SPLAT_2DI,			RS6000_BTC_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGHW_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGHW_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGLW_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXMRGLW_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_16QI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_8HI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_4SI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_4SF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_2DI,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_XXSLDWI_2DF,			RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_INIT_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_INIT_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_SET_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_SET_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_EXT_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_EXT_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEL_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEL_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEH_V2DF,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEH_V2DI,		RS6000_BTC_CONST)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_RSQRT_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_RSQRT_V2DF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_RECIP_V4SF,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VSX_BUILTIN_RECIP_V2DF,			RS6000_BTC_FP_PURE)
+
+/* VSX overloaded builtins, add the overloaded functions not present in
+   Altivec.  */
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MUL,			RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_FIRST,
+		      VSX_BUILTIN_VEC_MUL)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_MSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_NMADD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUITLIN_VEC_NMSUB,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_DIV,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXMRGHW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXMRGLW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXPERMDI,		RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSLDWI,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTW,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_LD,			RS6000_BTC_MISC)
+RS6000_BUILTIN(VSX_BUILTIN_VEC_ST,			RS6000_BTC_MISC)
+RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_LAST,
+		      VSX_BUILTIN_VEC_ST)
+
+/* Combined VSX/Altivec builtins.  */
+RS6000_BUILTIN(VECTOR_BUILTIN_FLOAT_V4SI_V4SF,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF,	RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VECTOR_BUILTIN_FIX_V4SF_V4SI,		RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(VECTOR_BUILTIN_FIXUNS_V4SF_V4SI,		RS6000_BTC_FP_PURE)
+
+/* Power7 builtins, that aren't VSX instructions.  */
+RS6000_BUILTIN(POWER7_BUILTIN_BPERMD,			RS6000_BTC_CONST)
+
+/* Miscellaneous builtins.  */
+RS6000_BUILTIN(RS6000_BUILTIN_RECIP,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_RECIPF,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_RSQRTF,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_RSQRT,			RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_BSWAP_HI,			RS6000_BTC_CONST)
+
+/* Darwin CfString builtin.  */
+RS6000_BUILTIN(RS6000_BUILTIN_CFSTRING,			RS6000_BTC_MISC)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
new file mode 100644
index 000000000..3f4f90b23
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -0,0 +1,3772 @@
+/* Subroutines for the C front end on the POWER and PowerPC architectures.
+   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+   Contributed by Zack Weinberg <zack@codesourcery.com>
+   and Paolo Bonzini <bonzini@gnu.org>
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "diagnostic-core.h"
+#include "tm_p.h"
+#include "target.h"
+#include "langhooks.h"
+
+
+
+/* Handle the machine specific pragma longcall.  Its syntax is
+
+   # pragma longcall ( TOGGLE )
+
+   where TOGGLE is either 0 or 1.
+
+   rs6000_default_long_calls is set to the value of TOGGLE, changing
+   whether or not new function declarations receive a longcall
+   attribute by default.  */
+
+#define SYNTAX_ERROR(gmsgid) do {					\
+  warning (OPT_Wpragmas, gmsgid);					\
+  warning (OPT_Wpragmas, "ignoring malformed #pragma longcall");	\
+  return;								\
+} while (0)
+
+void
+rs6000_pragma_longcall (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree x, n;
+
+  /* If we get here, generic code has already scanned the directive
+     leader and the word "longcall".  */
+
+  if (pragma_lex (&x) != CPP_OPEN_PAREN)
+    SYNTAX_ERROR ("missing open paren");
+  if (pragma_lex (&n) != CPP_NUMBER)
+    SYNTAX_ERROR ("missing number");
+  if (pragma_lex (&x) != CPP_CLOSE_PAREN)
+    SYNTAX_ERROR ("missing close paren");
+
+  if (n != integer_zero_node && n != integer_one_node)
+    SYNTAX_ERROR ("number must be 0 or 1");
+
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma longcall");
+
+  rs6000_default_long_calls = (n == integer_one_node);
+}
+
+/* Handle defining many CPP flags based on TARGET_xxx.  As a general
+   policy, rather than trying to guess what flags a user might want a
+   #define for, it's better to define a flag for everything.  */
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Keep the AltiVec keywords handy for fast comparisons.  */
+static GTY(()) tree __vector_keyword;
+static GTY(()) tree vector_keyword;
+static GTY(()) tree __pixel_keyword;
+static GTY(()) tree pixel_keyword;
+static GTY(()) tree __bool_keyword;
+static GTY(()) tree bool_keyword;
+static GTY(()) tree _Bool_keyword;
+
+/* Preserved across calls.  */
+static tree expand_bool_pixel;
+
+static cpp_hashnode *
+altivec_categorize_keyword (const cpp_token *tok)
+{
+  if (tok->type == CPP_NAME)
+    {
+      cpp_hashnode *ident = tok->val.node.node;
+
+      if (ident == C_CPP_HASHNODE (vector_keyword))
+	return C_CPP_HASHNODE (__vector_keyword);
+
+      if (ident == C_CPP_HASHNODE (pixel_keyword))
+	return C_CPP_HASHNODE (__pixel_keyword);
+
+      if (ident == C_CPP_HASHNODE (bool_keyword))
+	return C_CPP_HASHNODE (__bool_keyword);
+
+      if (ident == C_CPP_HASHNODE (_Bool_keyword))
+	return C_CPP_HASHNODE (__bool_keyword);
+
+      return ident;
+    }
+
+  return 0;
+}
+
+static void
+init_vector_keywords (void)
+{
+  /* Keywords without two leading underscores are context-sensitive,
+     and hence implemented as conditional macros, controlled by the
+     rs6000_macro_to_expand() function below.  */
+
+  __vector_keyword = get_identifier ("__vector");
+  C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
+
+  __pixel_keyword = get_identifier ("__pixel");
+  C_CPP_HASHNODE (__pixel_keyword)->flags |= NODE_CONDITIONAL;
+
+  __bool_keyword = get_identifier ("__bool");
+  C_CPP_HASHNODE (__bool_keyword)->flags |= NODE_CONDITIONAL;
+
+  vector_keyword = get_identifier ("vector");
+  C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL;
+
+  pixel_keyword = get_identifier ("pixel");
+  C_CPP_HASHNODE (pixel_keyword)->flags |= NODE_CONDITIONAL;
+
+  bool_keyword = get_identifier ("bool");
+  C_CPP_HASHNODE (bool_keyword)->flags |= NODE_CONDITIONAL;
+
+  _Bool_keyword = get_identifier ("_Bool");
+  C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL;
+}
+
+/* Called to decide whether a conditional macro should be expanded.
+   Since we have exactly one such macro (i.e, 'vector'), we do not
+   need to examine the 'tok' parameter.  */
+
+static cpp_hashnode *
+rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
+{
+  cpp_hashnode *expand_this = tok->val.node.node;
+  cpp_hashnode *ident;
+
+  ident = altivec_categorize_keyword (tok);
+
+  if (ident != expand_this)
+    expand_this = NULL;
+
+  if (ident == C_CPP_HASHNODE (__vector_keyword))
+    {
+      int idx = 0;
+      do
+	tok = cpp_peek_token (pfile, idx++);
+      while (tok->type == CPP_PADDING);
+      ident = altivec_categorize_keyword (tok);
+
+      if (ident == C_CPP_HASHNODE (__pixel_keyword))
+	{
+	  expand_this = C_CPP_HASHNODE (__vector_keyword);
+	  expand_bool_pixel = __pixel_keyword;
+	}
+      else if (ident == C_CPP_HASHNODE (__bool_keyword))
+	{
+	  expand_this = C_CPP_HASHNODE (__vector_keyword);
+	  expand_bool_pixel = __bool_keyword;
+	}
+      else if (ident)
+	{
+	  enum rid rid_code = (enum rid)(ident->rid_code);
+	  if (ident->type == NT_MACRO)
+	    {
+	      do
+		(void) cpp_get_token (pfile);
+	      while (--idx > 0);
+	      do
+		tok = cpp_peek_token (pfile, idx++);
+	      while (tok->type == CPP_PADDING);
+	      ident = altivec_categorize_keyword (tok);
+	      if (ident == C_CPP_HASHNODE (__pixel_keyword))
+		{
+		  expand_this = C_CPP_HASHNODE (__vector_keyword);
+		  expand_bool_pixel = __pixel_keyword;
+		  rid_code = RID_MAX;
+		}
+	      else if (ident == C_CPP_HASHNODE (__bool_keyword))
+		{
+		  expand_this = C_CPP_HASHNODE (__vector_keyword);
+		  expand_bool_pixel = __bool_keyword;
+		  rid_code = RID_MAX;
+		}
+	      else if (ident)
+		rid_code = (enum rid)(ident->rid_code);
+	    }
+
+	  if (rid_code == RID_UNSIGNED || rid_code == RID_LONG
+	      || rid_code == RID_SHORT || rid_code == RID_SIGNED
+	      || rid_code == RID_INT || rid_code == RID_CHAR
+	      || rid_code == RID_FLOAT
+	      || (rid_code == RID_DOUBLE && TARGET_VSX))
+	    {
+	      expand_this = C_CPP_HASHNODE (__vector_keyword);
+	      /* If the next keyword is bool or pixel, it
+		 will need to be expanded as well.  */
+	      do
+		tok = cpp_peek_token (pfile, idx++);
+	      while (tok->type == CPP_PADDING);
+	      ident = altivec_categorize_keyword (tok);
+
+	      if (ident == C_CPP_HASHNODE (__pixel_keyword))
+		expand_bool_pixel = __pixel_keyword;
+	      else if (ident == C_CPP_HASHNODE (__bool_keyword))
+		expand_bool_pixel = __bool_keyword;
+	      else
+		{
+		  /* Try two tokens down, too.  */
+		  do
+		    tok = cpp_peek_token (pfile, idx++);
+		  while (tok->type == CPP_PADDING);
+		  ident = altivec_categorize_keyword (tok);
+		  if (ident == C_CPP_HASHNODE (__pixel_keyword))
+		    expand_bool_pixel = __pixel_keyword;
+		  else if (ident == C_CPP_HASHNODE (__bool_keyword))
+		    expand_bool_pixel = __bool_keyword;
+		}
+	    }
+	}
+    }
+  else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__pixel_keyword))
+    {
+      expand_this = C_CPP_HASHNODE (__pixel_keyword);
+      expand_bool_pixel = 0;
+    }
+  else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__bool_keyword))
+    {
+      expand_this = C_CPP_HASHNODE (__bool_keyword);
+      expand_bool_pixel = 0;
+    }
+
+  return expand_this;
+}
+
+void
+rs6000_cpu_cpp_builtins (cpp_reader *pfile)
+{
+  if (TARGET_POWER2)
+    builtin_define ("_ARCH_PWR2");
+  else if (TARGET_POWER)
+    builtin_define ("_ARCH_PWR");
+  if (TARGET_POWERPC)
+    builtin_define ("_ARCH_PPC");
+  if (TARGET_PPC_GPOPT)
+    builtin_define ("_ARCH_PPCSQ");
+  if (TARGET_PPC_GFXOPT)
+    builtin_define ("_ARCH_PPCGR");
+  if (TARGET_POWERPC64)
+    builtin_define ("_ARCH_PPC64");
+  if (TARGET_MFCRF)
+    builtin_define ("_ARCH_PWR4");
+  if (TARGET_POPCNTB)
+    builtin_define ("_ARCH_PWR5");
+  if (TARGET_FPRND)
+    builtin_define ("_ARCH_PWR5X");
+  if (TARGET_CMPB)
+    builtin_define ("_ARCH_PWR6");
+  if (TARGET_MFPGPR)
+    builtin_define ("_ARCH_PWR6X");
+  if (! TARGET_POWER && ! TARGET_POWER2 && ! TARGET_POWERPC)
+    builtin_define ("_ARCH_COM");
+  if (TARGET_POPCNTD)
+    builtin_define ("_ARCH_PWR7");
+  if (TARGET_ALTIVEC)
+    {
+      builtin_define ("__ALTIVEC__");
+      builtin_define ("__VEC__=10206");
+
+      /* Define the AltiVec syntactic elements.  */
+      builtin_define ("__vector=__attribute__((altivec(vector__)))");
+      builtin_define ("__pixel=__attribute__((altivec(pixel__))) unsigned short");
+      builtin_define ("__bool=__attribute__((altivec(bool__))) unsigned");
+
+      if (!flag_iso)
+	{
+	  /* Define this when supporting context-sensitive keywords.  */
+	  builtin_define ("__APPLE_ALTIVEC__");
+	  
+	  builtin_define ("vector=vector");
+	  builtin_define ("pixel=pixel");
+	  builtin_define ("bool=bool");
+	  builtin_define ("_Bool=_Bool");
+	  init_vector_keywords ();
+
+	  /* Enable context-sensitive macros.  */
+	  cpp_get_callbacks (pfile)->macro_to_expand = rs6000_macro_to_expand;
+	}
+    }
+  if (rs6000_cpu == PROCESSOR_CELL)
+    builtin_define ("__PPU__");
+  if (TARGET_SPE)
+    builtin_define ("__SPE__");
+  if (TARGET_PAIRED_FLOAT)
+    builtin_define ("__PAIRED__");
+  if (TARGET_SOFT_FLOAT)
+    builtin_define ("_SOFT_FLOAT");
+  if ((!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
+      ||(TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_DOUBLE_FLOAT))
+    builtin_define ("_SOFT_DOUBLE");
+  /* Used by lwarx/stwcx. errata work-around.  */
+  if (rs6000_cpu == PROCESSOR_PPC405)
+    builtin_define ("__PPC405__");
+  /* Used by libstdc++.  */
+  if (TARGET_NO_LWSYNC)
+    builtin_define ("__NO_LWSYNC__");
+  if (TARGET_VSX)
+    {
+      builtin_define ("__VSX__");
+
+      /* For the VSX builtin functions identical to Altivec functions, just map
+	 the altivec builtin into the vsx version (the altivec functions
+	 generate VSX code if -mvsx).  */
+      builtin_define ("__builtin_vsx_xxland=__builtin_vec_and");
+      builtin_define ("__builtin_vsx_xxlandc=__builtin_vec_andc");
+      builtin_define ("__builtin_vsx_xxlnor=__builtin_vec_nor");
+      builtin_define ("__builtin_vsx_xxlor=__builtin_vec_or");
+      builtin_define ("__builtin_vsx_xxlxor=__builtin_vec_xor");
+      builtin_define ("__builtin_vsx_xxsel=__builtin_vec_sel");
+      builtin_define ("__builtin_vsx_vperm=__builtin_vec_perm");
+
+      /* Also map the a and m versions of the multiply/add instructions to the
+	 builtin for people blindly going off the instruction manual.  */
+      builtin_define ("__builtin_vsx_xvmaddadp=__builtin_vsx_xvmadddp");
+      builtin_define ("__builtin_vsx_xvmaddmdp=__builtin_vsx_xvmadddp");
+      builtin_define ("__builtin_vsx_xvmaddasp=__builtin_vsx_xvmaddsp");
+      builtin_define ("__builtin_vsx_xvmaddmsp=__builtin_vsx_xvmaddsp");
+      builtin_define ("__builtin_vsx_xvmsubadp=__builtin_vsx_xvmsubdp");
+      builtin_define ("__builtin_vsx_xvmsubmdp=__builtin_vsx_xvmsubdp");
+      builtin_define ("__builtin_vsx_xvmsubasp=__builtin_vsx_xvmsubsp");
+      builtin_define ("__builtin_vsx_xvmsubmsp=__builtin_vsx_xvmsubsp");
+      builtin_define ("__builtin_vsx_xvnmaddadp=__builtin_vsx_xvnmadddp");
+      builtin_define ("__builtin_vsx_xvnmaddmdp=__builtin_vsx_xvnmadddp");
+      builtin_define ("__builtin_vsx_xvnmaddasp=__builtin_vsx_xvnmaddsp");
+      builtin_define ("__builtin_vsx_xvnmaddmsp=__builtin_vsx_xvnmaddsp");
+      builtin_define ("__builtin_vsx_xvnmsubadp=__builtin_vsx_xvnmsubdp");
+      builtin_define ("__builtin_vsx_xvnmsubmdp=__builtin_vsx_xvnmsubdp");
+      builtin_define ("__builtin_vsx_xvnmsubasp=__builtin_vsx_xvnmsubsp");
+      builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp");
+    }
+  if (RS6000_RECIP_HAVE_RE_P (DFmode))
+    builtin_define ("__RECIP__");
+  if (RS6000_RECIP_HAVE_RE_P (SFmode))
+    builtin_define ("__RECIPF__");
+  if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode))
+    builtin_define ("__RSQRTE__");
+  if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode))
+    builtin_define ("__RSQRTEF__");
+  if (TARGET_RECIP_PRECISION)
+    builtin_define ("__RECIP_PRECISION__");
+
+  /* Tell users they can use __builtin_bswap{16,64}.  */
+  builtin_define ("__HAVE_BSWAP__");
+
+  /* May be overridden by target configuration.  */
+  RS6000_CPU_CPP_ENDIAN_BUILTINS();
+
+  if (TARGET_LONG_DOUBLE_128)
+    {
+      builtin_define ("__LONG_DOUBLE_128__");
+      builtin_define ("__LONGDOUBLE128");
+    }
+
+  switch (TARGET_CMODEL)
+    {
+      /* Deliberately omit __CMODEL_SMALL__ since that was the default
+	 before --mcmodel support was added.  */
+    case CMODEL_MEDIUM:
+      builtin_define ("__CMODEL_MEDIUM__");
+      break;
+    case CMODEL_LARGE:
+      builtin_define ("__CMODEL_LARGE__");
+      break;
+    default:
+      break;
+    }
+
+  switch (rs6000_current_abi)
+    {
+    case ABI_V4:
+      builtin_define ("_CALL_SYSV");
+      break;
+    case ABI_AIX:
+      builtin_define ("_CALL_AIXDESC");
+      builtin_define ("_CALL_AIX");
+      break;
+    case ABI_DARWIN:
+      builtin_define ("_CALL_DARWIN");
+      break;
+    default:
+      break;
+    }
+
+  /* Let the compiled code know if 'f' class registers will not be available.  */
+  if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+    builtin_define ("__NO_FPRS__");
+
+  /* Generate defines for Xilinx FPU. */
+  if (rs6000_xilinx_fpu) 
+    {
+      builtin_define ("_XFPU");
+      if (rs6000_single_float && ! rs6000_double_float)
+	{
+	  if (rs6000_simple_fpu) 
+	    builtin_define ("_XFPU_SP_LITE"); 
+	  else 
+	    builtin_define ("_XFPU_SP_FULL");
+	}
+      if (rs6000_double_float)
+	{
+	  if (rs6000_simple_fpu) 
+	    builtin_define ("_XFPU_DP_LITE");
+	  else
+	    builtin_define ("_XFPU_DP_FULL");
+        }
+    }
+}
+
+
+struct altivec_builtin_types
+{
+  enum rs6000_builtins code;
+  enum rs6000_builtins overloaded_code;
+  signed char ret_type;
+  signed char op1;
+  signed char op2;
+  signed char op3;
+};
+
+const struct altivec_builtin_types altivec_overloaded_builtins[] = {
+  /* Unary AltiVec/VSX builtins.  */
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABSS, ALTIVEC_BUILTIN_ABSS_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_CEIL, ALTIVEC_BUILTIN_VRFIP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_CEIL, VSX_BUILTIN_XVRDPIP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_EXPTE, ALTIVEC_BUILTIN_VEXPTEFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_FLOOR, VSX_BUILTIN_XVRDPIM,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_FLOOR, ALTIVEC_BUILTIN_VRFIM,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_LOGE, ALTIVEC_BUILTIN_VLOGEFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_MTVSCR, ALTIVEC_BUILTIN_MTVSCR,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RE, ALTIVEC_BUILTIN_VREFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RE, VSX_BUILTIN_XVREDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRT, ALTIVEC_BUILTIN_VRSQRTFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRT, VSX_BUILTIN_VEC_RSQRT_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRTE, ALTIVEC_BUILTIN_VRSQRTEFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RSQRTE, VSX_BUILTIN_XVRSQRTEDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_TRUNC, ALTIVEC_BUILTIN_VRFIZ,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_TRUNC, VSX_BUILTIN_XVRDPIZ,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSB, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSB, ALTIVEC_BUILTIN_VUPKHSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSH, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSH, ALTIVEC_BUILTIN_VUPKLSH,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKLSB, ALTIVEC_BUILTIN_VUPKLSB,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V16QI, 0, 0 },
+
+  /* Binary AltiVec/VSX builtins.  */
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHM, ALTIVEC_BUILTIN_VADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBM, ALTIVEC_BUILTIN_VADDUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDC, ALTIVEC_BUILTIN_VADDCUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADDS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSWS, ALTIVEC_BUILTIN_VADDSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUWS, ALTIVEC_BUILTIN_VADDUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSHS, ALTIVEC_BUILTIN_VADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUHS, ALTIVEC_BUILTIN_VADDUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDSBS, ALTIVEC_BUILTIN_VADDSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VADDUBS, ALTIVEC_BUILTIN_VADDUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AVG, ALTIVEC_BUILTIN_VAVGSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGSW, ALTIVEC_BUILTIN_VAVGSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGUW, ALTIVEC_BUILTIN_VAVGUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGSH, ALTIVEC_BUILTIN_VAVGSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGUH, ALTIVEC_BUILTIN_VAVGUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGSB, ALTIVEC_BUILTIN_VAVGSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VAVGUB, ALTIVEC_BUILTIN_VAVGUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPB, ALTIVEC_BUILTIN_VCMPBFP,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQFP, ALTIVEC_BUILTIN_VCMPEQFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUW, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUW, ALTIVEC_BUILTIN_VCMPEQUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUH, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUH, ALTIVEC_BUILTIN_VCMPEQUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUB, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQUB, ALTIVEC_BUILTIN_VCMPEQUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+
+  { ALTIVEC_BUILTIN_VEC_CMPGE, ALTIVEC_BUILTIN_VCMPGEFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTFP, ALTIVEC_BUILTIN_VCMPGTFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSW, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUW, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSH, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSH, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUH, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUH, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSB, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTSB, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUB, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCMPGTUB, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLE, ALTIVEC_BUILTIN_VCMPGEFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_COPYSIGN, VSX_BUILTIN_CPSGNDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_COPYSIGN, ALTIVEC_BUILTIN_COPYSIGN_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFUX,
+    RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX,
+    RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEWX, ALTIVEC_BUILTIN_LVEWX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEHX, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEHX, ALTIVEC_BUILTIN_LVEHX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long_long, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVLXL, ALTIVEC_BUILTIN_LVLXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRX, ALTIVEC_BUILTIN_LVRX,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_LVRXL, ALTIVEC_BUILTIN_LVRXL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXFP, ALTIVEC_BUILTIN_VMAXFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSW, ALTIVEC_BUILTIN_VMAXSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUW, ALTIVEC_BUILTIN_VMAXUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSH, ALTIVEC_BUILTIN_VMAXSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUH, ALTIVEC_BUILTIN_VMAXUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXSB, ALTIVEC_BUILTIN_VMAXSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMAXUB, ALTIVEC_BUILTIN_VMAXUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHH, ALTIVEC_BUILTIN_VMRGHH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGHB, ALTIVEC_BUILTIN_VMRGHB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLH, ALTIVEC_BUILTIN_VMRGLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMRGLB, ALTIVEC_BUILTIN_VMRGLB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINFP, ALTIVEC_BUILTIN_VMINFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSW, ALTIVEC_BUILTIN_VMINSW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUW, ALTIVEC_BUILTIN_VMINUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSH, ALTIVEC_BUILTIN_VMINSH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINSB, ALTIVEC_BUILTIN_VMINSB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUH, ALTIVEC_BUILTIN_VMINUH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULEUH, ALTIVEC_BUILTIN_VMULEUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULESH, ALTIVEC_BUILTIN_VMULESH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOUH, ALTIVEC_BUILTIN_VMULOUH,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOSB, ALTIVEC_BUILTIN_VMULOSB,
+    RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VMULOUB, ALTIVEC_BUILTIN_VMULOUB,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRDPI,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NEARBYINT, VSX_BUILTIN_XVRSPI,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUM, ALTIVEC_BUILTIN_VPKUHUM,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKPX, ALTIVEC_BUILTIN_VPKPX,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKUHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKSHSS,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKUWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, ALTIVEC_BUILTIN_VPKSWSS,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSWSS, ALTIVEC_BUILTIN_VPKSWSS,
+    RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
+    RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKUHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKUWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRDPIC,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RINT, VSX_BUILTIN_XVRSPIC,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLH, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLH, ALTIVEC_BUILTIN_VRLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLW, ALTIVEC_BUILTIN_VSLW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLH, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLH, ALTIVEC_BUILTIN_VSLH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLB, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSLB, ALTIVEC_BUILTIN_VSLB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLL, ALTIVEC_BUILTIN_VSL,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SLO, ALTIVEC_BUILTIN_VSLO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTH, ALTIVEC_BUILTIN_VSPLTH,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSPLTB, ALTIVEC_BUILTIN_VSPLTB,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRH, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRH, ALTIVEC_BUILTIN_VSRH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRB, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRB, ALTIVEC_BUILTIN_VSRB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAH, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAH, ALTIVEC_BUILTIN_VSRAH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAB, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSRAB, ALTIVEC_BUILTIN_VSRAB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRL, ALTIVEC_BUILTIN_VSR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRO, ALTIVEC_BUILTIN_VSRO,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHM, ALTIVEC_BUILTIN_VSUBUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBM, ALTIVEC_BUILTIN_VSUBUBM,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBC, ALTIVEC_BUILTIN_VSUBCUW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUBS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSWS, ALTIVEC_BUILTIN_VSUBSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUWS, ALTIVEC_BUILTIN_VSUBUWS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSHS, ALTIVEC_BUILTIN_VSUBSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUHS, ALTIVEC_BUILTIN_VSUBUHS,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBSBS, ALTIVEC_BUILTIN_VSUBSBS,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUBUBS, ALTIVEC_BUILTIN_VSUBUBS,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4UBS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4SBS,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM4S, ALTIVEC_BUILTIN_VSUM4SHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUM4SHS, ALTIVEC_BUILTIN_VSUM4SHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUM4SBS, ALTIVEC_BUILTIN_VSUM4SBS,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_VSUM4UBS, ALTIVEC_BUILTIN_VSUM4UBS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUM2S, ALTIVEC_BUILTIN_VSUM2SWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUMS, ALTIVEC_BUILTIN_VSUMSWS,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+
+  /* Ternary AltiVec/VSX builtins.  */
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DST, ALTIVEC_BUILTIN_DST,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTST, ALTIVEC_BUILTIN_DSTST,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTSTT, ALTIVEC_BUILTIN_DSTSTT,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_V4SF, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTQI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTHI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_UINTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_unsigned_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_long, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_DSTT, ALTIVEC_BUILTIN_DSTT,
+    RS6000_BTI_void, ~RS6000_BTI_float, RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMADDFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_MRADDS, ALTIVEC_BUILTIN_VMHRADDSHS,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_MSUB, VSX_BUILTIN_XVMSUBDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUBM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMMBM,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMUHM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUM, ALTIVEC_BUILTIN_VMSUMSHM,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMSHM, ALTIVEC_BUILTIN_VMSUMSHM,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMUHM, ALTIVEC_BUILTIN_VMSUMUHM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMMBM, ALTIVEC_BUILTIN_VMSUMMBM,
+    RS6000_BTI_V4SI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMUBM, ALTIVEC_BUILTIN_VMSUMUBM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUMS, ALTIVEC_BUILTIN_VMSUMUHS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_MSUMS, ALTIVEC_BUILTIN_VMSUMSHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMSHS, ALTIVEC_BUILTIN_VMSUMSHS,
+    RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VMSUMUHS, ALTIVEC_BUILTIN_VMSUMUHS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI },
+  { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDSP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_NMADD, VSX_BUILTIN_XVNMADDDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VNMSUBFP,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_NMSUB, VSX_BUILTIN_XVNMSUBDP,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEWX, ALTIVEC_BUILTIN_STVEWX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEHX, ALTIVEC_BUILTIN_STVEHX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVLXL, ALTIVEC_BUILTIN_STVLXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRX, ALTIVEC_BUILTIN_STVRX,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_STVRXL, ALTIVEC_BUILTIN_STVRXL,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXSLDWI, VSX_BUILTIN_XXSLDWI_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_NOT_OPAQUE },
+  { VSX_BUILTIN_VEC_XXPERMDI, VSX_BUILTIN_XXPERMDI_16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_NOT_OPAQUE },
+
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V4SI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_long, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V8HI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
+    RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V2DI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF,
+    RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V4SI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI,
+    RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTSI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V8HI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI,
+    RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTHI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_unsigned_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_bool_V16QI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_UINTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_INTQI },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI,
+    RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI,
+    ~RS6000_BTI_pixel_V8HI },
+
+  /* Predicates.  */
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+
+
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_pixel_V8HI, RS6000_BTI_pixel_V8HI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+
+
+  /* cmpge is the same as cmpgt for all cases except floating point.
+     There is further code to deal with this special case in
+     altivec_build_resolved_builtin.  */
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSB_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSH_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTUW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
+  { ALTIVEC_BUILTIN_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+
+  { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
+};
+
+
+/* Convert a type stored into a struct altivec_builtin_types as ID,
+   into a tree.  The types are in rs6000_builtin_types: negative values
+   create a pointer type for the type associated to ~ID.  Note it is
+   a logical NOT, rather than a negation, otherwise you cannot represent
+   a pointer type for ID 0.  */
+
+static inline tree
+rs6000_builtin_type (int id)
+{
+  tree t;
+  t = rs6000_builtin_types[id < 0 ? ~id : id];
+  return id < 0 ? build_pointer_type (t) : t;
+}
+
+/* Check whether the type of an argument, T, is compatible with a
+   type ID stored into a struct altivec_builtin_types.  Integer
+   types are considered compatible; otherwise, the language hook
+   lang_hooks.types_compatible_p makes the decision.  */
+
+static inline bool
+rs6000_builtin_type_compatible (tree t, int id)
+{
+  tree builtin_type;
+  builtin_type = rs6000_builtin_type (id);
+  if (t == error_mark_node)
+    return false;
+  if (INTEGRAL_TYPE_P (t) && INTEGRAL_TYPE_P (builtin_type))
+    return true;
+  else
+    return lang_hooks.types_compatible_p (t, builtin_type);
+}
+
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.  
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_resolved_builtin (tree *args, int n,
+				const struct altivec_builtin_types *desc)
+{
+  tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
+  tree ret_type = rs6000_builtin_type (desc->ret_type);
+  tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
+  tree arg_type[3];
+  tree call;
+
+  int i;
+  for (i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (desc->code == ALTIVEC_BUILTIN_VCMPGE_P
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P)
+    {
+      tree t;
+      t = args[2], args[2] = args[1], args[1] = t;
+      t = arg_type[2], arg_type[2] = arg_type[1], arg_type[1] = t;
+      
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (impl_fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (impl_fndecl, 1,
+			      fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (impl_fndecl, 2,
+			      fold_convert (arg_type[0], args[0]),
+			      fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (impl_fndecl, 3,
+			      fold_convert (arg_type[0], args[0]),
+			      fold_convert (arg_type[1], args[1]),
+			      fold_convert (arg_type[2], args[2]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+tree
+altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
+				    void *passed_arglist)
+{
+  VEC(tree,gc) *arglist = (VEC(tree,gc) *) passed_arglist;
+  unsigned int nargs = VEC_length (tree, arglist);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[3], args[3];
+  const struct altivec_builtin_types *desc;
+  unsigned int n;
+
+  if ((fcode < ALTIVEC_BUILTIN_OVERLOADED_FIRST
+       || fcode > ALTIVEC_BUILTIN_OVERLOADED_LAST)
+      && (fcode < VSX_BUILTIN_OVERLOADED_FIRST
+	  || fcode > VSX_BUILTIN_OVERLOADED_LAST))
+    return NULL_TREE;
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
+      || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      VEC(constructor_elt,gc) *vec;
+      const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote";
+
+      if (nargs == 0)
+	{
+	  error ("%s only accepts %d arguments", name, (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)+1 );
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1)
+	{
+	  error ("%s only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("%s only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE (VEC_index (tree, arglist, 1))))
+	goto bad;
+
+      arg = VEC_index (tree, arglist, 0);
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case SFmode: type = V4SF_type_node; size = 4; break;
+	  case DFmode: type = V2DF_type_node; size = 2; break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec = VEC_alloc (constructor_elt, gc, size);
+      for(i = 0; i < size; i++)
+	{
+	  constructor_elt *elt;
+
+	  elt = VEC_quick_push (constructor_elt, vec, NULL);
+	  elt->index = NULL_TREE;
+	  elt->value = arg;
+	}
+	return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extaction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      enum machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("vec_extract only accepts 2 arguments");
+	  return error_mark_node;
+	}
+
+      arg2 = VEC_index (tree, arglist, 1);
+      arg1 = VEC_index (tree, arglist, 0);
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad; 
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad; 
+
+      /* If we can use the VSX xxpermdi instruction, use that for extract.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST
+	  && TREE_INT_CST_HIGH (arg2) == 0
+	  && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+	{
+	  tree call = NULL_TREE;
+
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+
+	  if (call)
+	    return build_call_expr (call, 2, arg1, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      DECL_INITIAL (decl) = arg1;
+      stmt = build1 (DECL_EXPR, arg1_type, decl);
+      TREE_ADDRESSABLE (decl) = 1;
+      SET_EXPR_LOCATION (stmt, loc);
+      stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertation, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      enum machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("vec_insert only accepts 3 arguments");
+	  return error_mark_node;
+	}
+
+      arg0 = VEC_index (tree, arglist, 0);
+      arg1 = VEC_index (tree, arglist, 1);
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = VEC_index (tree, arglist, 2);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad; 
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad; 
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST
+	  && TREE_INT_CST_HIGH (arg2) == 0
+	  && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
+	{
+	  tree call = NULL_TREE;
+
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      DECL_INITIAL (decl) = arg1;
+      stmt = build1 (DECL_EXPR, arg1_type, decl);
+      TREE_ADDRESSABLE (decl) = 1;
+      SET_EXPR_LOCATION (stmt, loc);
+      stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+		     convert (TREE_TYPE (stmt), arg0));
+      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = VEC_index (tree, arglist, n);
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= 3)
+        abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+          type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+          if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing arg %d of %qE discards qualifiers from"
+		        "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (n == 0)
+    abort ();
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  for (desc = altivec_overloaded_builtins;
+       desc->code && desc->code != fcode; desc++)
+    continue;
+
+  /* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in
+     the opX fields.  */
+  for (; desc->code == fcode; desc++)
+    if ((desc->op1 == RS6000_BTI_NOT_OPAQUE
+	 || rs6000_builtin_type_compatible (types[0], desc->op1))
+	&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
+	    || rs6000_builtin_type_compatible (types[1], desc->op2))
+	&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
+	    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+      return altivec_build_resolved_builtin (args, n, desc);
+
+ bad:
+  error ("invalid parameter combination for AltiVec intrinsic");
+  return error_mark_node;
+}
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
new file mode 100644
index 000000000..724c947f1
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -0,0 +1,41 @@
+/* Definitions of target machine for GNU compiler, for IBM RS/6000.
+   Copyright (C) 2002, 2003, 2004, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point.  ABI_V4 uses IEEE quad, AIX/Darwin
+   adjust this in rs6000_option_override_internal.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Add any extra modes needed to represent the condition code.
+
+   For the RS/6000, we need separate modes when unsigned (logical) comparisons
+   are being done and we need a separate mode for floating-point.  We also
+   use a mode for the case when we are comparing the results of two
+   comparisons, as then only the EQ bit is valid in the register.  */
+
+CC_MODE (CCUNS);
+CC_MODE (CCFP);
+CC_MODE (CCEQ);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODE (INT, DI, 1);
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
new file mode 100644
index 000000000..542fea790
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -0,0 +1,144 @@
+/* Definitions of target machine needed for option handling for GNU compiler,
+   for IBM RS/6000.
+   Copyright (C) 2010
+   Free Software Foundation, Inc.
+   Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef RS6000_OPTS_H
+#define RS6000_OPTS_H
+
+/* Processor type.  Order must match cpu attribute in MD file.  */
+enum processor_type
+ {
+   PROCESSOR_RIOS1,
+   PROCESSOR_RIOS2,
+   PROCESSOR_RS64A,
+   PROCESSOR_MPCCORE,
+   PROCESSOR_PPC403,
+   PROCESSOR_PPC405,
+   PROCESSOR_PPC440,
+   PROCESSOR_PPC476,
+   PROCESSOR_PPC601,
+   PROCESSOR_PPC603,
+   PROCESSOR_PPC604,
+   PROCESSOR_PPC604e,
+   PROCESSOR_PPC620,
+   PROCESSOR_PPC630,
+   PROCESSOR_PPC750,
+   PROCESSOR_PPC7400,
+   PROCESSOR_PPC7450,
+   PROCESSOR_PPC8540,
+   PROCESSOR_PPCE300C2,
+   PROCESSOR_PPCE300C3,
+   PROCESSOR_PPCE500MC,
+   PROCESSOR_PPCE500MC64,
+   PROCESSOR_POWER4,
+   PROCESSOR_POWER5,
+   PROCESSOR_POWER6,
+   PROCESSOR_POWER7,
+   PROCESSOR_CELL,
+   PROCESSOR_PPCA2,
+   PROCESSOR_TITAN
+};
+
+/* FP processor type.  */
+enum fpu_type_t
+{
+  FPU_NONE,			/* No FPU */
+  FPU_SF_LITE,			/* Limited Single Precision FPU */
+  FPU_DF_LITE,			/* Limited Double Precision FPU */
+  FPU_SF_FULL,			/* Full Single Precision FPU */
+  FPU_DF_FULL			/* Full Double Single Precision FPU */
+};
+
+/* Types of costly dependences.  */
+enum rs6000_dependence_cost
+{
+  max_dep_latency = 1000,
+  no_dep_costly,
+  all_deps_costly,
+  true_store_to_load_dep_costly,
+  store_to_load_dep_costly
+};
+
+/* Types of nop insertion schemes in sched target hook sched_finish.  */
+enum rs6000_nop_insertion
+{
+  sched_finish_regroup_exact = 1000,
+  sched_finish_pad_groups,
+  sched_finish_none
+};
+
+/* Dispatch group termination caused by an insn.  */
+enum group_termination
+{
+  current_group,
+  previous_group
+};
+
+/* Enumeration to give which calling sequence to use.  */
+enum rs6000_abi {
+  ABI_NONE,
+  ABI_AIX,			/* IBM's AIX */
+  ABI_V4,			/* System V.4/eabi */
+  ABI_DARWIN			/* Apple's Darwin (OS X kernel) */
+};
+
+/* Small data support types.  */
+enum rs6000_sdata_type {
+  SDATA_NONE,			/* No small data support.  */
+  SDATA_DATA,			/* Just put data in .sbss/.sdata, don't use relocs.  */
+  SDATA_SYSV,			/* Use r13 to point to .sdata/.sbss.  */
+  SDATA_EABI			/* Use r13 like above, r2 points to .sdata2/.sbss2.  */
+};
+
+/* Type of traceback to use.  */
+enum  rs6000_traceback_type {
+  traceback_default = 0,
+  traceback_none,
+  traceback_part,
+  traceback_full
+};
+
+/* Code model for 64-bit linux.
+   small: 16-bit toc offsets.
+   medium: 32-bit toc offsets, static data and code within 2G of TOC pointer.
+   large: 32-bit toc offsets, no limit on static data and code.  */
+enum rs6000_cmodel {
+  CMODEL_SMALL,
+  CMODEL_MEDIUM,
+  CMODEL_LARGE
+};
+
+/* Describe which vector unit to use for a given machine mode.  */
+enum rs6000_vector {
+  VECTOR_NONE,			/* Type is not  a vector or not supported */
+  VECTOR_ALTIVEC,		/* Use altivec for vector processing */
+  VECTOR_VSX,			/* Use VSX for vector processing */
+  VECTOR_PAIRED,		/* Use paired floating point for vectors */
+  VECTOR_SPE,			/* Use SPE for vector processing */
+  VECTOR_OTHER			/* Some other vector unit */
+};
+
+#endif
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
new file mode 100644
index 000000000..d79af36ce
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -0,0 +1,198 @@
+/* Definitions of target machine for GNU compiler, for IBM RS/6000.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
+   2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RS6000_PROTOS_H
+#define GCC_RS6000_PROTOS_H
+
+/* Declare functions in rs6000.c */
+
+#ifdef RTX_CODE
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
+				  tree, enum machine_mode);
+#endif /* TREE_CODE */
+
+extern bool easy_altivec_constant (rtx, enum machine_mode);
+extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
+extern bool macho_lo_sum_memory_operand (rtx, enum machine_mode);
+extern int num_insns_constant (rtx, enum machine_mode);
+extern int num_insns_constant_wide (HOST_WIDE_INT);
+extern int small_data_operand (rtx, enum machine_mode);
+extern bool toc_relative_expr_p (rtx);
+extern bool invalid_e500_subreg (rtx, enum machine_mode);
+extern void validate_condition_mode (enum rtx_code, enum machine_mode);
+extern bool legitimate_constant_pool_address_p (const_rtx, enum machine_mode,
+						bool);
+extern bool legitimate_indirect_address_p (rtx, int);
+extern bool legitimate_indexed_address_p (rtx, int);
+extern bool avoiding_indexed_address_p (enum machine_mode);
+
+extern rtx rs6000_got_register (rtx);
+extern rtx find_addr_reg (rtx);
+extern rtx gen_easy_altivec_constant (rtx);
+extern const char *output_vec_const_move (rtx *);
+extern void rs6000_expand_vector_init (rtx, rtx);
+extern void paired_expand_vector_init (rtx, rtx);
+extern void rs6000_expand_vector_set (rtx, rtx, int);
+extern void rs6000_expand_vector_extract (rtx, rtx, int);
+extern void build_mask64_2_operands (rtx, rtx *);
+extern int expand_block_clear (rtx[]);
+extern int expand_block_move (rtx[]);
+extern const char * rs6000_output_load_multiple (rtx[]);
+extern int includes_lshift_p (rtx, rtx);
+extern int includes_rshift_p (rtx, rtx);
+extern int includes_rldic_lshift_p (rtx, rtx);
+extern int includes_rldicr_lshift_p (rtx, rtx);
+extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
+extern int registers_ok_for_quad_peep (rtx, rtx);
+extern int mems_ok_for_quad_peep (rtx, rtx);
+extern bool gpr_or_gpr_p (rtx, rtx);
+extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
+							    enum reg_class);
+extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
+							    enum machine_mode,
+							    rtx);
+extern bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class,
+						   enum reg_class,
+						   enum machine_mode);
+extern bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
+						    enum machine_mode,
+						    enum reg_class);
+extern void rs6000_secondary_reload_inner (rtx, rtx, rtx, bool);
+extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
+                                         rtx, rtx, rtx);
+extern void paired_expand_vector_move (rtx operands[]);
+
+
+extern int ccr_bit (rtx, int);
+extern int extract_MB (rtx);
+extern int extract_ME (rtx);
+extern void rs6000_output_function_entry (FILE *, const char *);
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern enum rtx_code rs6000_reverse_condition (enum machine_mode,
+					       enum rtx_code);
+extern void rs6000_emit_sISEL (enum machine_mode, rtx[]);
+extern void rs6000_emit_sCOND (enum machine_mode, rtx[]);
+extern void rs6000_emit_cbranch (enum machine_mode, rtx[]);
+extern char * output_cbranch (rtx, const char *, int, rtx);
+extern char * output_e500_flip_gt_bit (rtx, rtx);
+extern const char * output_probe_stack_range (rtx, rtx);
+extern rtx rs6000_emit_set_const (rtx, enum machine_mode, rtx, int);
+extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
+extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
+extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
+extern void rs6000_emit_sync (enum rtx_code, enum machine_mode,
+			      rtx, rtx, rtx, rtx, bool);
+extern void rs6000_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void rs6000_split_compare_and_swap (rtx, rtx, rtx, rtx, rtx);
+extern void rs6000_expand_compare_and_swapqhi (rtx, rtx, rtx, rtx);
+extern void rs6000_split_compare_and_swapqhi (rtx, rtx, rtx, rtx, rtx, rtx);
+extern void rs6000_split_lock_test_and_set (rtx, rtx, rtx, rtx);
+extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool);
+extern void rs6000_emit_swrsqrt (rtx, rtx);
+extern void output_toc (FILE *, rtx, int, enum machine_mode);
+extern rtx rs6000_longcall_ref (rtx);
+extern void rs6000_fatal_bad_address (rtx);
+extern rtx create_TOC_reference (rtx, rtx);
+extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
+extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
+extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
+						    int, int, int, int *);
+extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx, int);
+extern rtx rs6000_find_base_term (rtx);
+extern bool rs6000_offsettable_memref_p (rtx);
+extern rtx rs6000_return_addr (int, rtx);
+extern void rs6000_output_symbol_ref (FILE*, rtx);
+extern HOST_WIDE_INT rs6000_initial_elimination_offset (int, int);
+extern void rs6000_emit_popcount (rtx, rtx);
+extern void rs6000_emit_parity (rtx, rtx);
+
+extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode,
+						   rtx);
+extern rtx rs6000_address_for_fpconvert (rtx);
+extern rtx rs6000_address_for_altivec (rtx);
+extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
+extern int rs6000_loop_align (rtx);
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern unsigned int rs6000_special_round_type_align (tree, unsigned int,
+						     unsigned int);
+extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int,
+							    unsigned int);
+extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
+extern rtx rs6000_libcall_value (enum machine_mode);
+extern rtx rs6000_va_arg (tree, tree);
+extern int function_ok_for_sibcall (tree);
+extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
+extern bool rs6000_elf_in_small_data_p (const_tree);
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction' */
+extern enum direction function_arg_padding (enum machine_mode, const_tree);
+#endif /* ARGS_SIZE_RTX */
+
+#endif /* TREE_CODE */
+
+extern int direct_return (void);
+extern int first_reg_to_save (void);
+extern int first_fp_reg_to_save (void);
+extern void output_ascii (FILE *, const char *, int);
+extern void rs6000_gen_section_name (char **, const char *, const char *);
+extern void output_function_profiler (FILE *, int);
+extern void output_profile_hook  (int);
+extern int rs6000_trampoline_size (void);
+extern alias_set_type get_TOC_alias_set (void);
+extern void rs6000_emit_prologue (void);
+extern void rs6000_emit_load_toc_table (int);
+extern unsigned int rs6000_dbx_register_number (unsigned int);
+extern void rs6000_emit_epilogue (int);
+extern void rs6000_emit_eh_reg_restore (rtx, rtx);
+extern const char * output_isel (rtx *);
+extern bool rs6000_tls_referenced_p (rtx);
+
+extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
+
+/* Declare functions in rs6000-c.c */
+
+extern void rs6000_pragma_longcall (struct cpp_reader *);
+extern void rs6000_cpu_cpp_builtins (struct cpp_reader *);
+#ifdef TREE_CODE
+extern bool rs6000_pragma_target_parse (tree, tree);
+#endif
+
+#if TARGET_MACHO
+char *output_call (rtx, rtx *, int, int);
+#endif
+
+#ifdef NO_DOLLAR_IN_LABEL
+const char * rs6000_xcoff_strip_dollar (const char *);
+#endif
+
+void rs6000_final_prescan_insn (rtx, rtx *operand, int num_operands);
+
+extern bool rs6000_hard_regno_mode_ok_p[][FIRST_PSEUDO_REGISTER];
+extern unsigned char rs6000_class_max_nregs[][LIM_REG_CLASSES];
+extern unsigned char rs6000_hard_regno_nregs[][FIRST_PSEUDO_REGISTER];
+#endif  /* rs6000-protos.h */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
new file mode 100644
index 000000000..730e23445
--- /dev/null
+++ b/gcc/config/rs6000/rs6000.c
@@ -0,0 +1,28250 @@
+/* Subroutines used for code generation on IBM RS/6000.
+   Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2013
+   Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "output.h"
+#include "basic-block.h"
+#include "integrate.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "cfglayout.h"
+#include "cfgloop.h"
+#include "sched-int.h"
+#include "gimple.h"
+#include "tree-flow.h"
+#include "intl.h"
+#include "params.h"
+#include "tm-constrs.h"
+#if TARGET_XCOFF
+#include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
+#endif
+#if TARGET_MACHO
+#include "gstab.h"  /* for N_SLINE */
+#endif
+
+#ifndef TARGET_NO_PROTOTYPE
+#define TARGET_NO_PROTOTYPE 0
+#endif
+
+#define min(A,B)	((A) < (B) ? (A) : (B))
+#define max(A,B)	((A) > (B) ? (A) : (B))
+
+/* Structure used to define the rs6000 stack */
+typedef struct rs6000_stack {
+  int reload_completed;		/* stack info won't change from here on */
+  int first_gp_reg_save;	/* first callee saved GP register used */
+  int first_fp_reg_save;	/* first callee saved FP register used */
+  int first_altivec_reg_save;	/* first callee saved AltiVec register used */
+  int lr_save_p;		/* true if the link reg needs to be saved */
+  int cr_save_p;		/* true if the CR reg needs to be saved */
+  unsigned int vrsave_mask;	/* mask of vec registers to save */
+  int push_p;			/* true if we need to allocate stack space */
+  int calls_p;			/* true if the function makes any calls */
+  int world_save_p;		/* true if we're saving *everything*:
+				   r13-r31, cr, f14-f31, vrsave, v20-v31  */
+  enum rs6000_abi abi;		/* which ABI to use */
+  int gp_save_offset;		/* offset to save GP regs from initial SP */
+  int fp_save_offset;		/* offset to save FP regs from initial SP */
+  int altivec_save_offset;	/* offset to save AltiVec regs from initial SP */
+  int lr_save_offset;		/* offset to save LR from initial SP */
+  int cr_save_offset;		/* offset to save CR from initial SP */
+  int vrsave_save_offset;	/* offset to save VRSAVE from initial SP */
+  int spe_gp_save_offset;	/* offset to save spe 64-bit gprs  */
+  int varargs_save_offset;	/* offset to save the varargs registers */
+  int ehrd_offset;		/* offset to EH return data */
+  int reg_size;			/* register size (4 or 8) */
+  HOST_WIDE_INT vars_size;	/* variable save area size */
+  int parm_size;		/* outgoing parameter size */
+  int save_size;		/* save area size */
+  int fixed_size;		/* fixed size of stack frame */
+  int gp_size;			/* size of saved GP registers */
+  int fp_size;			/* size of saved FP registers */
+  int altivec_size;		/* size of saved AltiVec registers */
+  int cr_size;			/* size to hold CR if not in save_size */
+  int vrsave_size;		/* size to hold VRSAVE if not in save_size */
+  int altivec_padding_size;	/* size of altivec alignment padding if
+				   not in save_size */
+  int spe_gp_size;		/* size of 64-bit GPR save size for SPE */
+  int spe_padding_size;
+  HOST_WIDE_INT total_size;	/* total bytes allocated for stack */
+  int spe_64bit_regs_used;
+  int savres_strategy;
+} rs6000_stack_t;
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Some local-dynamic symbol.  */
+  const char *some_ld_name;
+  /* Whether the instruction chain has been scanned already.  */
+  int insn_chain_scanned_p;
+  /* Flags if __builtin_return_address (n) with n >= 1 was used.  */
+  int ra_needs_full_frame;
+  /* Flags if __builtin_return_address (0) was used.  */
+  int ra_need_lr;
+  /* Cache lr_save_p after expansion of builtin_eh_return.  */
+  int lr_save_state;
+  /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
+     varargs save area.  */
+  HOST_WIDE_INT varargs_save_offset;
+  /* Temporary stack slot to use for SDmode copies.  This slot is
+     64-bits wide and is allocated early enough so that the offset
+     does not overflow the 16-bit load/store offset field.  */
+  rtx sdmode_stack_slot;
+} machine_function;
+
+/* Target cpu type */
+
+struct rs6000_cpu_select rs6000_select[3] =
+{
+  /* switch		name,			tune	arch */
+  { (const char *)0,	"--with-cpu=",		1,	1 },
+  { (const char *)0,	"-mcpu=",		1,	1 },
+  { (const char *)0,	"-mtune=",		1,	0 },
+};
+
+/* String variables to hold the various options.  */
+static const char *rs6000_sched_insert_nops_str;
+static const char *rs6000_sched_costly_dep_str;
+static const char *rs6000_recip_name;
+
+#ifdef USING_ELFOS_H
+static const char *rs6000_abi_name;
+static const char *rs6000_sdata_name;
+#endif
+
+/* Support targetm.vectorize.builtin_mask_for_load.  */
+static GTY(()) tree altivec_builtin_mask_for_load;
+
+/* Set to nonzero once AIX common-mode calls have been defined.  */
+static GTY(()) int common_mode_defined;
+
+/* Label number of label created for -mrelocatable, to call to so we can
+   get the address of the GOT section */
+static int rs6000_pic_labelno;
+
+#ifdef USING_ELFOS_H
+/* Counter for labels which are to be placed in .fixup.  */
+int fixuplabelno = 0;
+#endif
+
+/* Whether to use variant of AIX ABI for PowerPC64 Linux.  */
+int dot_symbols;
+
+/* Specify the machine mode that pointers have.  After generation of rtl, the
+   compiler makes no further distinction between pointers and any other objects
+   of this machine mode.  The type is unsigned since not all things that
+   include rs6000.h also include machmode.h.  */
+unsigned rs6000_pmode;
+
+/* Width in bits of a pointer.  */
+unsigned rs6000_pointer_size;
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+/* Flag whether floating point values have been passed/returned.  */
+static bool rs6000_passes_float;
+/* Flag whether vector values have been passed/returned.  */
+static bool rs6000_passes_vector;
+/* Flag whether small (<= 8 byte) structures have been returned.  */
+static bool rs6000_returns_struct;
+#endif
+
+/* Value is TRUE if register/mode pair is acceptable.  */
+bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
+
+/* Maximum number of registers needed for a given register class and mode.  */
+unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
+
+/* How many registers are needed for a given register and mode.  */
+unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
+
+/* Map register number to register class.  */
+enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
+
+/* Reload functions based on the type and the vector unit.  */
+static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
+
+/* Built in types.  */
+tree rs6000_builtin_types[RS6000_BTI_MAX];
+tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
+
+/* Flag to say the TOC is initialized */
+int toc_initialized;
+char toc_label_name[10];
+
+/* Cached value of rs6000_variable_issue. This is cached in
+   rs6000_variable_issue hook and returned from rs6000_sched_reorder2.  */
+static short cached_can_issue_more;
+
+static GTY(()) section *read_only_data_section;
+static GTY(()) section *private_data_section;
+static GTY(()) section *read_only_private_data_section;
+static GTY(()) section *sdata2_section;
+static GTY(()) section *toc_section;
+
+/* True for any options that were explicitly set.  */
+static struct {
+  bool aix_struct_ret;		/* True if -maix-struct-ret was used.  */
+  bool alignment;		/* True if -malign- was used.  */
+  bool spe_abi;			/* True if -mabi=spe/no-spe was used.  */
+  bool altivec_abi;		/* True if -mabi=altivec/no-altivec used.  */
+  bool spe;			/* True if -mspe= was used.  */
+  bool float_gprs;		/* True if -mfloat-gprs= was used.  */
+  bool long_double;	        /* True if -mlong-double- was used.  */
+  bool ieee;			/* True if -mabi=ieee/ibmlongdouble used.  */
+  bool vrsave;			/* True if -mvrsave was used.  */
+  bool cmodel;			/* True if -mcmodel was used.  */
+} rs6000_explicit_options;
+
+struct builtin_description
+{
+  /* mask is not const because we're going to alter it below.  This
+     nonsense will go away when we rewrite the -march infrastructure
+     to give us more target flag bits.  */
+  unsigned int mask;
+  const enum insn_code icode;
+  const char *const name;
+  const enum rs6000_builtins code;
+};
+
+/* Describe the vector unit used for modes.  */
+enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
+enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
+
+/* Register classes for various constraints that are based on the target
+   switches.  */
+enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
+
+/* Describe the alignment of a vector.  */
+int rs6000_vector_align[NUM_MACHINE_MODES];
+
+/* Map selected modes to types for builtins.  */
+static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
+
+/* What modes to automatically generate reciprocal divide estimate (fre) and
+   reciprocal sqrt (frsqrte) for.  */
+unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
+
+/* Masks to determine which reciprocal esitmate instructions to generate
+   automatically.  */
+enum rs6000_recip_mask {
+  RECIP_SF_DIV		= 0x001,	/* Use divide estimate */
+  RECIP_DF_DIV		= 0x002,
+  RECIP_V4SF_DIV	= 0x004,
+  RECIP_V2DF_DIV	= 0x008,
+
+  RECIP_SF_RSQRT	= 0x010,	/* Use reciprocal sqrt estimate.  */
+  RECIP_DF_RSQRT	= 0x020,
+  RECIP_V4SF_RSQRT	= 0x040,
+  RECIP_V2DF_RSQRT	= 0x080,
+
+  /* Various combination of flags for -mrecip=xxx.  */
+  RECIP_NONE		= 0,
+  RECIP_ALL		= (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
+			   | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
+			   | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
+
+  RECIP_HIGH_PRECISION	= RECIP_ALL,
+
+  /* On low precision machines like the power5, don't enable double precision
+     reciprocal square root estimate, since it isn't accurate enough.  */
+  RECIP_LOW_PRECISION	= (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
+};
+
+/* -mrecip options.  */
+static struct
+{
+  const char *string;		/* option name */
+  unsigned int mask;		/* mask bits to set */
+} recip_options[] = {
+  { "all",	 RECIP_ALL },
+  { "none",	 RECIP_NONE },
+  { "div",	 (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
+		  | RECIP_V2DF_DIV) },
+  { "divf",	 (RECIP_SF_DIV | RECIP_V4SF_DIV) },
+  { "divd",	 (RECIP_DF_DIV | RECIP_V2DF_DIV) },
+  { "rsqrt",	 (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
+		  | RECIP_V2DF_RSQRT) },
+  { "rsqrtf",	 (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
+  { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
+};
+
+/* 2 argument gen function typedef.  */
+typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx);
+
+
+/* Target cpu costs.  */
+
+struct processor_costs {
+  const int mulsi;	  /* cost of SImode multiplication.  */
+  const int mulsi_const;  /* cost of SImode multiplication by constant.  */
+  const int mulsi_const9; /* cost of SImode mult by short constant.  */
+  const int muldi;	  /* cost of DImode multiplication.  */
+  const int divsi;	  /* cost of SImode division.  */
+  const int divdi;	  /* cost of DImode division.  */
+  const int fp;		  /* cost of simple SFmode and DFmode insns.  */
+  const int dmul;	  /* cost of DFmode multiplication (and fmadd).  */
+  const int sdiv;	  /* cost of SFmode division (fdivs).  */
+  const int ddiv;	  /* cost of DFmode division (fdiv).  */
+  const int cache_line_size;    /* cache line size in bytes. */
+  const int l1_cache_size;	/* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size;	/* size of l2 cache, in kilobytes.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+					operations.  */
+};
+
+const struct processor_costs *rs6000_cost;
+
+/* Processor costs (relative to an add) */
+
+/* Instruction size costs on 32bit processors.  */
+static const
+struct processor_costs size32_cost = {
+  COSTS_N_INSNS (1),    /* mulsi */
+  COSTS_N_INSNS (1),    /* mulsi_const */
+  COSTS_N_INSNS (1),    /* mulsi_const9 */
+  COSTS_N_INSNS (1),    /* muldi */
+  COSTS_N_INSNS (1),    /* divsi */
+  COSTS_N_INSNS (1),    /* divdi */
+  COSTS_N_INSNS (1),    /* fp */
+  COSTS_N_INSNS (1),    /* dmul */
+  COSTS_N_INSNS (1),    /* sdiv */
+  COSTS_N_INSNS (1),    /* ddiv */
+  32,
+  0,
+  0,
+  0,
+};
+
+/* Instruction size costs on 64bit processors.  */
+static const
+struct processor_costs size64_cost = {
+  COSTS_N_INSNS (1),    /* mulsi */
+  COSTS_N_INSNS (1),    /* mulsi_const */
+  COSTS_N_INSNS (1),    /* mulsi_const9 */
+  COSTS_N_INSNS (1),    /* muldi */
+  COSTS_N_INSNS (1),    /* divsi */
+  COSTS_N_INSNS (1),    /* divdi */
+  COSTS_N_INSNS (1),    /* fp */
+  COSTS_N_INSNS (1),    /* dmul */
+  COSTS_N_INSNS (1),    /* sdiv */
+  COSTS_N_INSNS (1),    /* ddiv */
+  128,
+  0,
+  0,
+  0,
+};
+
+/* Instruction costs on RIOS1 processors.  */
+static const
+struct processor_costs rios1_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (19),   /* divsi */
+  COSTS_N_INSNS (19),   /* divdi */
+  COSTS_N_INSNS (2),    /* fp */
+  COSTS_N_INSNS (2),    /* dmul */
+  COSTS_N_INSNS (19),   /* sdiv */
+  COSTS_N_INSNS (19),   /* ddiv */
+  128,			/* cache line size */
+  64,			/* l1 cache */
+  512,			/* l2 cache */
+  0,			/* streams */
+};
+
+/* Instruction costs on RIOS2 processors.  */
+static const
+struct processor_costs rios2_cost = {
+  COSTS_N_INSNS (2),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (2),    /* muldi */
+  COSTS_N_INSNS (13),   /* divsi */
+  COSTS_N_INSNS (13),   /* divdi */
+  COSTS_N_INSNS (2),    /* fp */
+  COSTS_N_INSNS (2),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (17),   /* ddiv */
+  256,			/* cache line size */
+  256,			/* l1 cache */
+  1024,			/* l2 cache */
+  0,			/* streams */
+};
+
+/* Instruction costs on RS64A processors.  */
+static const
+struct processor_costs rs64a_cost = {
+  COSTS_N_INSNS (20),   /* mulsi */
+  COSTS_N_INSNS (12),   /* mulsi_const */
+  COSTS_N_INSNS (8),    /* mulsi_const9 */
+  COSTS_N_INSNS (34),   /* muldi */
+  COSTS_N_INSNS (65),   /* divsi */
+  COSTS_N_INSNS (67),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (31),   /* sdiv */
+  COSTS_N_INSNS (31),   /* ddiv */
+  128,			/* cache line size */
+  128,			/* l1 cache */
+  2048,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on MPCCORE processors.  */
+static const
+struct processor_costs mpccore_cost = {
+  COSTS_N_INSNS (2),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (2),    /* muldi */
+  COSTS_N_INSNS (6),    /* divsi */
+  COSTS_N_INSNS (6),    /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (10),   /* sdiv */
+  COSTS_N_INSNS (17),   /* ddiv */
+  32,			/* cache line size */
+  4,			/* l1 cache */
+  16,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC403 processors.  */
+static const
+struct processor_costs ppc403_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (33),   /* divsi */
+  COSTS_N_INSNS (33),   /* divdi */
+  COSTS_N_INSNS (11),   /* fp */
+  COSTS_N_INSNS (11),   /* dmul */
+  COSTS_N_INSNS (11),   /* sdiv */
+  COSTS_N_INSNS (11),   /* ddiv */
+  32,			/* cache line size */
+  4,			/* l1 cache */
+  16,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC405 processors.  */
+static const
+struct processor_costs ppc405_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (35),   /* divsi */
+  COSTS_N_INSNS (35),   /* divdi */
+  COSTS_N_INSNS (11),   /* fp */
+  COSTS_N_INSNS (11),   /* dmul */
+  COSTS_N_INSNS (11),   /* sdiv */
+  COSTS_N_INSNS (11),   /* ddiv */
+  32,			/* cache line size */
+  16,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC440 processors.  */
+static const
+struct processor_costs ppc440_cost = {
+  COSTS_N_INSNS (3),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (3),    /* muldi */
+  COSTS_N_INSNS (34),   /* divsi */
+  COSTS_N_INSNS (34),   /* divdi */
+  COSTS_N_INSNS (5),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (19),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC476 processors.  */
+static const
+struct processor_costs ppc476_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (11),   /* divsi */
+  COSTS_N_INSNS (11),   /* divdi */
+  COSTS_N_INSNS (6),    /* fp */
+  COSTS_N_INSNS (6),    /* dmul */
+  COSTS_N_INSNS (19),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* l1 cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC601 processors.  */
+static const
+struct processor_costs ppc601_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (5),    /* mulsi_const */
+  COSTS_N_INSNS (5),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (36),   /* divsi */
+  COSTS_N_INSNS (36),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (31),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC603 processors.  */
+static const
+struct processor_costs ppc603_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (3),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (37),   /* divsi */
+  COSTS_N_INSNS (37),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,			/* cache line size */
+  8,			/* l1 cache */
+  64,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC604 processors.  */
+static const
+struct processor_costs ppc604_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (20),   /* divsi */
+  COSTS_N_INSNS (20),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  32,			/* cache line size */
+  16,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC604e processors.  */
+static const
+struct processor_costs ppc604e_cost = {
+  COSTS_N_INSNS (2),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (2),    /* muldi */
+  COSTS_N_INSNS (20),   /* divsi */
+  COSTS_N_INSNS (20),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC620 processors.  */
+static const
+struct processor_costs ppc620_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (7),    /* muldi */
+  COSTS_N_INSNS (21),   /* divsi */
+  COSTS_N_INSNS (37),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (32),   /* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC630 processors.  */
+static const
+struct processor_costs ppc630_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (7),    /* muldi */
+  COSTS_N_INSNS (21),   /* divsi */
+  COSTS_N_INSNS (37),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (21),   /* ddiv */
+  128,			/* cache line size */
+  64,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on Cell processor.  */
+/* COSTS_N_INSNS (1) ~ one add.  */
+static const
+struct processor_costs ppccell_cost = {
+  COSTS_N_INSNS (9/2)+2,    /* mulsi */
+  COSTS_N_INSNS (6/2),    /* mulsi_const */
+  COSTS_N_INSNS (6/2),    /* mulsi_const9 */
+  COSTS_N_INSNS (15/2)+2,   /* muldi */
+  COSTS_N_INSNS (38/2),   /* divsi */
+  COSTS_N_INSNS (70/2),   /* divdi */
+  COSTS_N_INSNS (10/2),   /* fp */
+  COSTS_N_INSNS (10/2),   /* dmul */
+  COSTS_N_INSNS (74/2),   /* sdiv */
+  COSTS_N_INSNS (74/2),   /* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  6,			/* streams */
+};
+
+/* Instruction costs on PPC750 and PPC7400 processors.  */
+static const
+struct processor_costs ppc750_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (3),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (17),   /* divsi */
+  COSTS_N_INSNS (17),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (31),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC7450 processors.  */
+static const
+struct processor_costs ppc7450_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (3),    /* mulsi_const */
+  COSTS_N_INSNS (3),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (23),   /* divsi */
+  COSTS_N_INSNS (23),   /* divdi */
+  COSTS_N_INSNS (5),    /* fp */
+  COSTS_N_INSNS (5),    /* dmul */
+  COSTS_N_INSNS (21),   /* sdiv */
+  COSTS_N_INSNS (35),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  1,			/* streams */
+};
+
+/* Instruction costs on PPC8540 processors.  */
+static const
+struct processor_costs ppc8540_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (19),   /* divsi */
+  COSTS_N_INSNS (19),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (29),   /* sdiv */
+  COSTS_N_INSNS (29),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on E300C2 and E300C3 cores.  */
+static const
+struct processor_costs ppce300c2c3_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (19),   /* divsi */
+  COSTS_N_INSNS (19),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (4),    /* dmul */
+  COSTS_N_INSNS (18),   /* sdiv */
+  COSTS_N_INSNS (33),   /* ddiv */
+  32,
+  16,			/* l1 cache */
+  16,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on PPCE500MC processors.  */
+static const
+struct processor_costs ppce500mc_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (14),   /* divsi */
+  COSTS_N_INSNS (14),   /* divdi */
+  COSTS_N_INSNS (8),    /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (36),   /* sdiv */
+  COSTS_N_INSNS (66),   /* ddiv */
+  64,			/* cache line size */
+  32,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on PPCE500MC64 processors.  */
+static const
+struct processor_costs ppce500mc64_cost = {
+  COSTS_N_INSNS (4),    /* mulsi */
+  COSTS_N_INSNS (4),    /* mulsi_const */
+  COSTS_N_INSNS (4),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (14),   /* divsi */
+  COSTS_N_INSNS (14),   /* divdi */
+  COSTS_N_INSNS (4),    /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (36),   /* sdiv */
+  COSTS_N_INSNS (66),   /* ddiv */
+  64,			/* cache line size */
+  32,			/* l1 cache */
+  128,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on AppliedMicro Titan processors.  */
+static const
+struct processor_costs titan_cost = {
+  COSTS_N_INSNS (5),    /* mulsi */
+  COSTS_N_INSNS (5),    /* mulsi_const */
+  COSTS_N_INSNS (5),    /* mulsi_const9 */
+  COSTS_N_INSNS (5),    /* muldi */
+  COSTS_N_INSNS (18),   /* divsi */
+  COSTS_N_INSNS (18),   /* divdi */
+  COSTS_N_INSNS (10),   /* fp */
+  COSTS_N_INSNS (10),   /* dmul */
+  COSTS_N_INSNS (46),   /* sdiv */
+  COSTS_N_INSNS (72),   /* ddiv */
+  32,			/* cache line size */
+  32,			/* l1 cache */
+  512,			/* l2 cache */
+  1,			/* prefetch streams /*/
+};
+
+/* Instruction costs on POWER4 and POWER5 processors.  */
+static const
+struct processor_costs power4_cost = {
+  COSTS_N_INSNS (3),    /* mulsi */
+  COSTS_N_INSNS (2),    /* mulsi_const */
+  COSTS_N_INSNS (2),    /* mulsi_const9 */
+  COSTS_N_INSNS (4),    /* muldi */
+  COSTS_N_INSNS (18),   /* divsi */
+  COSTS_N_INSNS (34),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (17),   /* sdiv */
+  COSTS_N_INSNS (17),   /* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  1024,			/* l2 cache */
+  8,			/* prefetch streams /*/
+};
+
+/* Instruction costs on POWER6 processors.  */
+static const
+struct processor_costs power6_cost = {
+  COSTS_N_INSNS (8),    /* mulsi */
+  COSTS_N_INSNS (8),    /* mulsi_const */
+  COSTS_N_INSNS (8),    /* mulsi_const9 */
+  COSTS_N_INSNS (8),    /* muldi */
+  COSTS_N_INSNS (22),   /* divsi */
+  COSTS_N_INSNS (28),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (13),   /* sdiv */
+  COSTS_N_INSNS (16),   /* ddiv */
+  128,			/* cache line size */
+  64,			/* l1 cache */
+  2048,			/* l2 cache */
+  16,			/* prefetch streams */
+};
+
+/* Instruction costs on POWER7 processors.  */
+static const
+struct processor_costs power7_cost = {
+  COSTS_N_INSNS (2),	/* mulsi */
+  COSTS_N_INSNS (2),	/* mulsi_const */
+  COSTS_N_INSNS (2),	/* mulsi_const9 */
+  COSTS_N_INSNS (2),	/* muldi */
+  COSTS_N_INSNS (18),	/* divsi */
+  COSTS_N_INSNS (34),	/* divdi */
+  COSTS_N_INSNS (3),	/* fp */
+  COSTS_N_INSNS (3),	/* dmul */
+  COSTS_N_INSNS (13),	/* sdiv */
+  COSTS_N_INSNS (16),	/* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
+/* Instruction costs on POWER A2 processors.  */
+static const
+struct processor_costs ppca2_cost = {
+  COSTS_N_INSNS (16),    /* mulsi */
+  COSTS_N_INSNS (16),    /* mulsi_const */
+  COSTS_N_INSNS (16),    /* mulsi_const9 */
+  COSTS_N_INSNS (16),   /* muldi */
+  COSTS_N_INSNS (22),   /* divsi */
+  COSTS_N_INSNS (28),   /* divdi */
+  COSTS_N_INSNS (3),    /* fp */
+  COSTS_N_INSNS (3),    /* dmul */
+  COSTS_N_INSNS (59),   /* sdiv */
+  COSTS_N_INSNS (72),   /* ddiv */
+  64,
+  16,			/* l1 cache */
+  2048,			/* l2 cache */
+  16,			/* prefetch streams */
+};
+
+
+/* Table that classifies rs6000 builtin functions (pure, const, etc.).  */
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+#define RS6000_BUILTIN(NAME, TYPE) TYPE,
+#define RS6000_BUILTIN_EQUATE(NAME, VALUE)
+
+static const enum rs6000_btc builtin_classify[(int)RS6000_BUILTIN_COUNT] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+
+/* Support for -mveclibabi=<xxx> to control which vector library to use.  */
+static tree (*rs6000_veclib_handler) (tree, tree, tree);
+
+
+static bool rs6000_function_ok_for_sibcall (tree, tree);
+static const char *rs6000_invalid_within_doloop (const_rtx);
+static bool rs6000_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx rs6000_generate_compare (rtx, enum machine_mode);
+static void rs6000_emit_stack_tie (void);
+static void rs6000_frame_related (rtx, rtx, HOST_WIDE_INT, rtx, rtx);
+static bool spe_func_has_64bit_regs_p (void);
+static void emit_frame_save (rtx, rtx, enum machine_mode, unsigned int,
+			     int, HOST_WIDE_INT);
+static rtx gen_frame_mem_offset (enum machine_mode, rtx, int);
+static unsigned rs6000_hash_constant (rtx);
+static unsigned toc_hash_function (const void *);
+static int toc_hash_eq (const void *, const void *);
+static bool reg_offset_addressing_ok_p (enum machine_mode);
+static bool virtual_stack_registers_memory_p (rtx);
+static bool constant_pool_expr_p (rtx);
+static bool legitimate_small_data_p (enum machine_mode, rtx);
+static bool legitimate_lo_sum_address_p (enum machine_mode, rtx, int);
+static struct machine_function * rs6000_init_machine_status (void);
+static bool rs6000_assemble_integer (rtx, unsigned int, int);
+static bool no_global_regs_above (int, bool);
+#ifdef HAVE_GAS_HIDDEN
+static void rs6000_assemble_visibility (tree, int);
+#endif
+static int rs6000_ra_ever_killed (void);
+static bool rs6000_attribute_takes_identifier_p (const_tree);
+static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
+static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
+static bool rs6000_ms_bitfield_layout_p (const_tree);
+static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
+static void rs6000_eliminate_indexed_memrefs (rtx operands[2]);
+static const char *rs6000_mangle_type (const_tree);
+static void rs6000_set_default_type_attributes (tree);
+static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool);
+static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
+static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int,
+				   enum machine_mode, bool, bool, bool);
+static bool rs6000_reg_live_or_pic_offset_p (int);
+static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
+static tree rs6000_builtin_vectorized_function (tree, tree, tree);
+static void rs6000_restore_saved_cr (rtx, int);
+static bool rs6000_output_addr_const_extra (FILE *, rtx);
+static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void rs6000_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void rs6000_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
+				    tree);
+static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+static bool rs6000_return_in_memory (const_tree, const_tree);
+static rtx rs6000_function_value (const_tree, const_tree, bool);
+static void rs6000_file_start (void);
+#if TARGET_ELF
+static int rs6000_elf_reloc_rw_mask (void);
+static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
+static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
+static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
+static void rs6000_elf_asm_init_sections (void);
+static section *rs6000_elf_select_rtx_section (enum machine_mode, rtx,
+					       unsigned HOST_WIDE_INT);
+static void rs6000_elf_encode_section_info (tree, rtx, int)
+     ATTRIBUTE_UNUSED;
+#endif
+static bool rs6000_use_blocks_for_constant_p (enum machine_mode, const_rtx);
+static void rs6000_alloc_sdmode_stack_slot (void);
+static void rs6000_instantiate_decls (void);
+#if TARGET_XCOFF
+static void rs6000_xcoff_asm_output_anchor (rtx);
+static void rs6000_xcoff_asm_globalize_label (FILE *, const char *);
+static void rs6000_xcoff_asm_init_sections (void);
+static int rs6000_xcoff_reloc_rw_mask (void);
+static void rs6000_xcoff_asm_named_section (const char *, unsigned int, tree);
+static section *rs6000_xcoff_select_section (tree, int,
+					     unsigned HOST_WIDE_INT);
+static void rs6000_xcoff_unique_section (tree, int);
+static section *rs6000_xcoff_select_rtx_section
+  (enum machine_mode, rtx, unsigned HOST_WIDE_INT);
+static const char * rs6000_xcoff_strip_name_encoding (const char *);
+static unsigned int rs6000_xcoff_section_type_flags (tree, const char *, int);
+static void rs6000_xcoff_file_start (void);
+static void rs6000_xcoff_file_end (void);
+#endif
+static int rs6000_variable_issue (FILE *, int, rtx, int);
+static int rs6000_register_move_cost (enum machine_mode,
+				      reg_class_t, reg_class_t);
+static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static bool rs6000_rtx_costs (rtx, int, int, int *, bool);
+static bool rs6000_debug_rtx_costs (rtx, int, int, int *, bool);
+static int rs6000_debug_address_cost (rtx, bool);
+static int rs6000_adjust_cost (rtx, rtx, rtx, int);
+static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
+static void rs6000_sched_init (FILE *, int, int);
+static bool is_microcoded_insn (rtx);
+static bool is_nonpipeline_insn (rtx);
+static bool is_cracked_insn (rtx);
+static bool is_branch_slot_insn (rtx);
+static bool is_load_insn (rtx);
+static rtx get_store_dest (rtx pat);
+static bool is_store_insn (rtx);
+static bool set_to_load_agen (rtx,rtx);
+static bool adjacent_mem_locations (rtx,rtx);
+static int rs6000_adjust_priority (rtx, int);
+static int rs6000_issue_rate (void);
+static bool rs6000_is_costly_dependence (dep_t, int, int);
+static rtx get_next_active_insn (rtx, rtx);
+static bool insn_terminates_group_p (rtx , enum group_termination);
+static bool insn_must_be_first_in_group (rtx);
+static bool insn_must_be_last_in_group (rtx);
+static bool is_costly_group (rtx *, rtx);
+static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *);
+static int redefine_groups (FILE *, int, rtx, rtx);
+static int pad_groups (FILE *, int, rtx, rtx);
+static void rs6000_sched_finish (FILE *, int);
+static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
+static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
+static int rs6000_use_sched_lookahead (void);
+static int rs6000_use_sched_lookahead_guard (rtx);
+static void * rs6000_alloc_sched_context (void);
+static void rs6000_init_sched_context (void *, bool);
+static void rs6000_set_sched_context (void *);
+static void rs6000_free_sched_context (void *);
+static tree rs6000_builtin_reciprocal (unsigned int, bool, bool);
+static tree rs6000_builtin_mask_for_load (void);
+static tree rs6000_builtin_mul_widen_even (tree);
+static tree rs6000_builtin_mul_widen_odd (tree);
+static tree rs6000_builtin_conversion (unsigned int, tree, tree);
+static tree rs6000_builtin_vec_perm (tree, tree *);
+static bool rs6000_builtin_support_vector_misalignment (enum
+							machine_mode,
+							const_tree,
+							int, bool);
+static int rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt,
+                                              tree, int);
+static enum machine_mode rs6000_preferred_simd_mode (enum machine_mode);
+
+static void def_builtin (int, const char *, tree, int);
+static bool rs6000_vector_alignment_reachable (const_tree, bool);
+static void rs6000_init_builtins (void);
+static tree rs6000_builtin_decl (unsigned, bool);
+
+static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx);
+static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx rs6000_expand_ternop_builtin (enum insn_code, tree, rtx);
+static rtx rs6000_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void altivec_init_builtins (void);
+static unsigned builtin_hash_function (const void *);
+static int builtin_hash_eq (const void *, const void *);
+static tree builtin_function_type (enum machine_mode, enum machine_mode,
+				   enum machine_mode, enum machine_mode,
+				   enum rs6000_builtins, const char *name);
+static void rs6000_common_init_builtins (void);
+static void rs6000_init_libfuncs (void);
+
+static void paired_init_builtins (void);
+static rtx paired_expand_builtin (tree, rtx, bool *);
+static rtx paired_expand_lv_builtin (enum insn_code, tree, rtx);
+static rtx paired_expand_stv_builtin (enum insn_code, tree);
+static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
+
+static void enable_mask_for_builtins (struct builtin_description *, int,
+				      enum rs6000_builtins,
+				      enum rs6000_builtins);
+static void spe_init_builtins (void);
+static rtx spe_expand_builtin (tree, rtx, bool *);
+static rtx spe_expand_stv_builtin (enum insn_code, tree);
+static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
+static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
+static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
+static rs6000_stack_t *rs6000_stack_info (void);
+static void debug_stack_info (rs6000_stack_t *);
+
+static rtx altivec_expand_builtin (tree, rtx, bool *);
+static rtx altivec_expand_ld_builtin (tree, rtx, bool *);
+static rtx altivec_expand_st_builtin (tree, rtx, bool *);
+static rtx altivec_expand_dst_builtin (tree, rtx, bool *);
+static rtx altivec_expand_abs_builtin (enum insn_code, tree, rtx);
+static rtx altivec_expand_predicate_builtin (enum insn_code, tree, rtx);
+static rtx altivec_expand_stv_builtin (enum insn_code, tree);
+static rtx altivec_expand_vec_init_builtin (tree, tree, rtx);
+static rtx altivec_expand_vec_set_builtin (tree);
+static rtx altivec_expand_vec_ext_builtin (tree, rtx);
+static int get_element_number (tree, tree);
+static void rs6000_option_override (void);
+static void rs6000_option_init_struct (struct gcc_options *);
+static void rs6000_option_default_params (void);
+static bool rs6000_handle_option (size_t, const char *, int);
+static int rs6000_loop_align_max_skip (rtx);
+static void rs6000_parse_yes_no_option (const char *, const char *, int *);
+static int first_altivec_reg_to_save (void);
+static unsigned int compute_vrsave_mask (void);
+static void compute_save_world_info (rs6000_stack_t *info_ptr);
+static void is_altivec_return_reg (rtx, void *);
+static rtx generate_set_vrsave (rtx, rs6000_stack_t *, int);
+int easy_vector_constant (rtx, enum machine_mode);
+static rtx rs6000_dwarf_register_span (rtx);
+static void rs6000_init_dwarf_reg_sizes_extra (tree);
+static rtx rs6000_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
+static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static rtx rs6000_delegitimize_address (rtx);
+static rtx rs6000_tls_get_addr (void);
+static rtx rs6000_got_sym (void);
+static int rs6000_tls_symbol_ref_1 (rtx *, void *);
+static const char *rs6000_get_some_local_dynamic_name (void);
+static int rs6000_get_some_local_dynamic_name_1 (rtx *, void *);
+static rtx rs6000_complex_function_value (enum machine_mode);
+static rtx rs6000_spe_function_arg (const CUMULATIVE_ARGS *,
+				    enum machine_mode, const_tree);
+static void rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *,
+						      HOST_WIDE_INT, int);
+static void rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *,
+							const_tree,
+							HOST_WIDE_INT);
+static void rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *,
+					      HOST_WIDE_INT,
+					      rtx[], int *);
+static void rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *,
+						const_tree, HOST_WIDE_INT,
+						rtx[], int *);
+static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree, bool, bool);
+static rtx rs6000_mixed_function_arg (enum machine_mode, const_tree, int);
+static void rs6000_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+					 const_tree, bool);
+static rtx rs6000_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool);
+static unsigned int rs6000_function_arg_boundary (enum machine_mode,
+						  const_tree);
+static void rs6000_move_block_from_reg (int regno, rtx x, int nregs);
+static void setup_incoming_varargs (CUMULATIVE_ARGS *,
+				    enum machine_mode, tree,
+				    int *, int);
+static bool rs6000_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static int rs6000_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				     tree, bool);
+static const char *invalid_arg_for_unprototyped_fn (const_tree, const_tree, const_tree);
+#if TARGET_MACHO
+static void macho_branch_islands (void);
+static int no_previous_def (tree function_name);
+static tree get_prev_label (tree function_name);
+static void rs6000_darwin_file_start (void);
+#endif
+
+static tree rs6000_build_builtin_va_list (void);
+static void rs6000_va_start (tree, rtx);
+static tree rs6000_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
+static bool rs6000_must_pass_in_stack (enum machine_mode, const_tree);
+static bool rs6000_scalar_mode_supported_p (enum machine_mode);
+static bool rs6000_vector_mode_supported_p (enum machine_mode);
+static rtx rs6000_emit_vector_compare_inner (enum rtx_code, rtx, rtx);
+static rtx rs6000_emit_vector_compare (enum rtx_code, rtx, rtx,
+				       enum machine_mode);
+static tree rs6000_stack_protect_fail (void);
+
+static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
+					     int, int *);
+
+static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
+						   int, int, int *);
+
+rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
+					     int, int *)
+  = rs6000_legitimize_reload_address;
+
+static bool rs6000_mode_dependent_address_p (const_rtx);
+static bool rs6000_mode_dependent_address (const_rtx);
+static bool rs6000_debug_mode_dependent_address (const_rtx);
+static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
+  = rs6000_mode_dependent_address;
+
+static enum reg_class rs6000_secondary_reload_class (enum reg_class,
+						     enum machine_mode, rtx);
+static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
+							   enum machine_mode,
+							   rtx);
+enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
+						     enum machine_mode, rtx)
+  = rs6000_secondary_reload_class;
+
+static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
+static enum reg_class rs6000_debug_preferred_reload_class (rtx,
+							   enum reg_class);
+enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
+  = rs6000_preferred_reload_class;
+
+static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
+					    enum machine_mode);
+
+static bool rs6000_debug_secondary_memory_needed (enum reg_class,
+						  enum reg_class,
+						  enum machine_mode);
+
+bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
+					    enum machine_mode)
+  = rs6000_secondary_memory_needed;
+
+static bool rs6000_cannot_change_mode_class (enum machine_mode,
+					     enum machine_mode,
+					     enum reg_class);
+static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
+						   enum machine_mode,
+						   enum reg_class);
+
+bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
+					     enum machine_mode,
+					     enum reg_class)
+  = rs6000_cannot_change_mode_class;
+
+static reg_class_t rs6000_secondary_reload (bool, rtx, reg_class_t,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+
+static const reg_class_t *rs6000_ira_cover_classes (void);
+
+const int INSN_NOT_AVAILABLE = -1;
+static enum machine_mode rs6000_eh_return_filter_mode (void);
+static bool rs6000_can_eliminate (const int, const int);
+static void rs6000_conditional_register_usage (void);
+static void rs6000_trampoline_init (rtx, tree, rtx);
+static bool rs6000_cannot_force_const_mem (rtx);
+
+/* Hash table stuff for keeping track of TOC entries.  */
+
+struct GTY(()) toc_hash_struct
+{
+  /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
+     ASM_OUTPUT_SPECIAL_POOL_ENTRY_P.  */
+  rtx key;
+  enum machine_mode key_mode;
+  int labelno;
+};
+
+static GTY ((param_is (struct toc_hash_struct))) htab_t toc_hash_table;
+
+/* Hash table to keep track of the argument types for builtin functions.  */
+
+struct GTY(()) builtin_hash_struct
+{
+  tree type;
+  enum machine_mode mode[4];	/* return value + 3 arguments.  */
+  unsigned char uns_p[4];	/* and whether the types are unsigned.  */
+};
+
+static GTY ((param_is (struct builtin_hash_struct))) htab_t builtin_hash_table;
+
+static bool rs6000_valid_attribute_p (tree, tree, tree, int);
+static void rs6000_function_specific_save (struct cl_target_option *);
+static void rs6000_function_specific_restore (struct cl_target_option *);
+static void rs6000_function_specific_print (FILE *, int,
+					    struct cl_target_option *);
+static bool rs6000_can_inline_p (tree, tree);
+static void rs6000_set_current_function (tree);
+
+
+/* Default register names.  */
+char rs6000_reg_names[][8] =
+{
+      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+      "8",  "9", "10", "11", "12", "13", "14", "15",
+     "16", "17", "18", "19", "20", "21", "22", "23",
+     "24", "25", "26", "27", "28", "29", "30", "31",
+      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+      "8",  "9", "10", "11", "12", "13", "14", "15",
+     "16", "17", "18", "19", "20", "21", "22", "23",
+     "24", "25", "26", "27", "28", "29", "30", "31",
+     "mq", "lr", "ctr","ap",
+      "0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+      "ca",
+      /* AltiVec registers.  */
+      "0",  "1",  "2",  "3",  "4",  "5",  "6", "7",
+      "8",  "9",  "10", "11", "12", "13", "14", "15",
+      "16", "17", "18", "19", "20", "21", "22", "23",
+      "24", "25", "26", "27", "28", "29", "30", "31",
+      "vrsave", "vscr",
+      /* SPE registers.  */
+      "spe_acc", "spefscr",
+      /* Soft frame pointer.  */
+      "sfp"
+};
+
+#ifdef TARGET_REGNAMES
+static const char alt_reg_names[][8] =
+{
+   "%r0",   "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+   "%r8",   "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+  "%r16",  "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
+  "%r24",  "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
+   "%f0",   "%f1",  "%f2",  "%f3",  "%f4",  "%f5",  "%f6",  "%f7",
+   "%f8",   "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+  "%f16",  "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+  "%f24",  "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+    "mq",    "lr",  "ctr",   "ap",
+  "%cr0",  "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
+   "ca",
+  /* AltiVec registers.  */
+   "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6", "%v7",
+   "%v8",  "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
+  "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
+  "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
+  "vrsave", "vscr",
+  /* SPE registers.  */
+  "spe_acc", "spefscr",
+  /* Soft frame pointer.  */
+  "sfp"
+};
+#endif
+
+/* Table of valid machine attributes.  */
+
+static const struct attribute_spec rs6000_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "altivec",   1, 1, false, true,  false, rs6000_handle_altivec_attribute },
+  { "longcall",  0, 0, false, true,  true,  rs6000_handle_longcall_attribute },
+  { "shortcall", 0, 0, false, true,  true,  rs6000_handle_longcall_attribute },
+  { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute },
+  { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+  SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options rs6000_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#ifndef MASK_STRICT_ALIGN
+#define MASK_STRICT_ALIGN 0
+#endif
+#ifndef TARGET_PROFILE_KERNEL
+#define TARGET_PROFILE_KERNEL 0
+#endif
+
+/* The VRSAVE bitmask puts bit %v0 as the most significant bit.  */
+#define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
+#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
+#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
+
+/* Default unaligned ops are only provided for ELF.  Find the ops needed
+   for non-ELF systems.  */
+#ifndef OBJECT_FORMAT_ELF
+#if TARGET_XCOFF
+/* For XCOFF.  rs6000_assemble_integer will handle unaligned DIs on
+   64-bit targets.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
+#else
+/* For Darwin.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+#endif
+#endif
+
+/* This hook deals with fixups for relocatable code and DI-mode objects
+   in 64-bit code.  */
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER rs6000_assemble_integer
+
+#ifdef HAVE_GAS_HIDDEN
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
+#endif
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
+
+#undef  TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
+#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
+#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT rs6000_sched_init
+#undef TARGET_SCHED_FINISH
+#define TARGET_SCHED_FINISH rs6000_sched_finish
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER rs6000_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
+
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
+
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
+
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN rs6000_builtin_mul_widen_even
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD rs6000_builtin_mul_widen_odd
+#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
+#define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion
+#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
+#define TARGET_VECTORIZE_BUILTIN_VEC_PERM rs6000_builtin_vec_perm
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT		\
+  rs6000_builtin_support_vector_misalignment
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  rs6000_builtin_vectorization_cost
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
+  rs6000_preferred_simd_mode
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS rs6000_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL rs6000_builtin_decl
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE rs6000_mangle_type
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP rs6000_invalid_within_doloop
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS rs6000_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
+
+#undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
+#define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
+
+/* On rs6000, function arguments are promoted, as are function return
+   values.  */
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
+
+/* Always strict argument naming on rs6000.  */
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG rs6000_function_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
+
+#undef TARGET_EH_RETURN_FILTER_MODE
+#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
+
+#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
+#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION rs6000_handle_option
+
+#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
+#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE rs6000_option_override
+
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT rs6000_option_init_struct
+
+#undef TARGET_OPTION_DEFAULT_PARAMS
+#define TARGET_OPTION_DEFAULT_PARAMS rs6000_option_default_params
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE rs6000_option_optimization_table
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  rs6000_builtin_vectorized_function
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+  (TARGET_DEFAULT)
+
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
+
+/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
+   The PowerPC architecture requires only weak consistency among
+   processors--that is, memory accesses between processors need not be
+   sequentially consistent and memory accesses among processors can occur
+   in any order. The ability to order memory accesses weakly provides
+   opportunities for more efficient use of the system bus. Unless a
+   dependency exists, the 604e allows read operations to precede store
+   operations.  */
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING true
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
+#endif
+
+/* Use a 32-bit anchor range.  This leads to sequences like:
+
+	addis	tmp,anchor,high
+	add	dest,tmp,low
+
+   where tmp itself acts as an anchor, and can be shared between
+   accesses to the same 64k page.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
+
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
+
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
+
+#undef TARGET_IRA_COVER_CLASSES
+#define TARGET_IRA_COVER_CLASSES rs6000_ira_cover_classes
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
+
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE rs6000_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE rs6000_function_value
+
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
+
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE rs6000_function_specific_save
+
+#undef TARGET_OPTION_RESTORE
+#define TARGET_OPTION_RESTORE rs6000_function_specific_restore
+
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT rs6000_function_specific_print
+
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P rs6000_can_inline_p
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Simplifications for entries below.  */
+
+enum {
+  POWERPC_BASE_MASK = MASK_POWERPC | MASK_NEW_MNEMONICS,
+  POWERPC_7400_MASK = POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_ALTIVEC
+};
+
+/* Some OSs don't support saving the high part of 64-bit registers on context
+   switch.  Other OSs don't support saving Altivec registers.  On those OSs, we
+   don't touch the MASK_POWERPC64 or MASK_ALTIVEC settings; if the user wants
+   either, the user must explicitly specify them and we won't interfere with
+   the user's specification.  */
+
+enum {
+  POWER_MASKS = MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING,
+  POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT | MASK_STRICT_ALIGN
+		   | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC
+		   | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW
+		   | MASK_DLMZB | MASK_CMPB | MASK_MFPGPR | MASK_DFP
+		   | MASK_POPCNTD | MASK_VSX | MASK_ISEL | MASK_NO_UPDATE
+		   | MASK_RECIP_PRECISION)
+};
+
+/* Masks for instructions set at various powerpc ISAs.  */
+enum {
+  ISA_2_1_MASKS = MASK_MFCRF,
+  ISA_2_2_MASKS = (ISA_2_1_MASKS | MASK_POPCNTB),
+  ISA_2_4_MASKS = (ISA_2_2_MASKS | MASK_FPRND),
+
+  /* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and don't add
+     ALTIVEC, since in general it isn't a win on power6.  In ISA 2.04, fsel,
+     fre, fsqrt, etc. were no longer documented as optional.  Group masks by
+     server and embedded. */
+  ISA_2_5_MASKS_EMBEDDED = (ISA_2_2_MASKS | MASK_CMPB | MASK_RECIP_PRECISION
+			    | MASK_PPC_GFXOPT | MASK_PPC_GPOPT),
+  ISA_2_5_MASKS_SERVER = (ISA_2_5_MASKS_EMBEDDED | MASK_DFP),
+
+  /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
+     altivec is a win so enable it.  */
+  ISA_2_6_MASKS_EMBEDDED = (ISA_2_5_MASKS_EMBEDDED | MASK_POPCNTD),
+  ISA_2_6_MASKS_SERVER = (ISA_2_5_MASKS_SERVER | MASK_POPCNTD | MASK_ALTIVEC
+			  | MASK_VSX)
+};
+
+/* This table occasionally claims that a processor does not support a
+   particular feature even though it does, but the feature is slower than the
+   alternative.  Thus, it shouldn't be relied on as a complete description of
+   the processor's support.
+
+   Please keep this list in order, and don't forget to update the documentation
+   in invoke.texi when adding a new processor or flag.  */
+
+struct rs6000_ptt
+{
+  const char *const name;		/* Canonical processor name.  */
+  const enum processor_type processor;	/* Processor type enum value.  */
+  const int target_enable;		/* Target flags to enable.  */
+};
+
+static struct rs6000_ptt const processor_target_table[] =
+{
+  {"401", PROCESSOR_PPC403, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"403", PROCESSOR_PPC403,
+   POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_STRICT_ALIGN},
+  {"405", PROCESSOR_PPC405,
+   POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB},
+  {"405fp", PROCESSOR_PPC405,
+   POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
+  {"440", PROCESSOR_PPC440,
+   POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB},
+  {"440fp", PROCESSOR_PPC440,
+   POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
+  {"464", PROCESSOR_PPC440,
+   POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB},
+  {"464fp", PROCESSOR_PPC440,
+   POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
+  {"476", PROCESSOR_PPC476,
+   POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_PPC_GFXOPT | MASK_MFCRF
+   | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_MULHW | MASK_DLMZB},
+  {"476fp", PROCESSOR_PPC476,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB
+   | MASK_FPRND | MASK_CMPB | MASK_MULHW | MASK_DLMZB},
+  {"505", PROCESSOR_MPCCORE, POWERPC_BASE_MASK},
+  {"601", PROCESSOR_PPC601,
+   MASK_POWER | POWERPC_BASE_MASK | MASK_MULTIPLE | MASK_STRING},
+  {"602", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"603", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"603e", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"604", PROCESSOR_PPC604, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"604e", PROCESSOR_PPC604e, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"620", PROCESSOR_PPC620,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
+  {"630", PROCESSOR_PPC630,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
+  {"740", PROCESSOR_PPC750, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"7400", PROCESSOR_PPC7400, POWERPC_7400_MASK},
+  {"7450", PROCESSOR_PPC7450, POWERPC_7400_MASK},
+  {"750", PROCESSOR_PPC750, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"801", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"821", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"823", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"8540", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN
+   | MASK_ISEL},
+  /* 8548 has a dummy entry for now.  */
+  {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_STRICT_ALIGN
+   | MASK_ISEL},
+  {"a2", PROCESSOR_PPCA2,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_POPCNTB
+   | MASK_CMPB | MASK_NO_UPDATE },
+  {"e300c2", PROCESSOR_PPCE300C2, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"e300c3", PROCESSOR_PPCE300C3, POWERPC_BASE_MASK},
+  {"e500mc", PROCESSOR_PPCE500MC, POWERPC_BASE_MASK | MASK_PPC_GFXOPT
+   | MASK_ISEL},
+  {"e500mc64", PROCESSOR_PPCE500MC64, POWERPC_BASE_MASK | MASK_POWERPC64
+   | MASK_PPC_GFXOPT | MASK_ISEL},
+  {"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"970", PROCESSOR_POWER4,
+   POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
+  {"cell", PROCESSOR_CELL,
+   POWERPC_7400_MASK  | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
+  {"common", PROCESSOR_COMMON, MASK_NEW_MNEMONICS},
+  {"ec603e", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
+  {"G3", PROCESSOR_PPC750, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+  {"G4",  PROCESSOR_PPC7450, POWERPC_7400_MASK},
+  {"G5", PROCESSOR_POWER4,
+   POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
+  {"titan", PROCESSOR_TITAN,
+   POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
+  {"power", PROCESSOR_POWER, MASK_POWER | MASK_MULTIPLE | MASK_STRING},
+  {"power2", PROCESSOR_POWER,
+   MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING},
+  {"power3", PROCESSOR_PPC630,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
+  {"power4", PROCESSOR_POWER4,
+   POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT
+   | MASK_MFCRF},
+  {"power5", PROCESSOR_POWER5,
+   POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT
+   | MASK_MFCRF | MASK_POPCNTB},
+  {"power5+", PROCESSOR_POWER5,
+   POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT
+   | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND},
+  {"power6", PROCESSOR_POWER6,
+   POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT
+   | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP
+   | MASK_RECIP_PRECISION},
+  {"power6x", PROCESSOR_POWER6,
+   POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT
+   | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP
+   | MASK_MFPGPR | MASK_RECIP_PRECISION},
+  {"power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
+   POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
+   | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
+   | MASK_VSX | MASK_RECIP_PRECISION},
+  {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK},
+  {"powerpc64", PROCESSOR_POWERPC64,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
+  {"rios", PROCESSOR_RIOS1, MASK_POWER | MASK_MULTIPLE | MASK_STRING},
+  {"rios1", PROCESSOR_RIOS1, MASK_POWER | MASK_MULTIPLE | MASK_STRING},
+  {"rios2", PROCESSOR_RIOS2,
+   MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING},
+  {"rsc", PROCESSOR_PPC601, MASK_POWER | MASK_MULTIPLE | MASK_STRING},
+  {"rsc1", PROCESSOR_PPC601, MASK_POWER | MASK_MULTIPLE | MASK_STRING},
+  {"rs64", PROCESSOR_RS64A,
+   POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}
+};
+
+/* Look up a processor name for -mcpu=xxx and -mtune=xxx.  Return -1 if the
+   name is invalid.  */
+
+static int
+rs6000_cpu_name_lookup (const char *name)
+{
+  size_t i;
+
+  if (name != NULL)
+    {
+      for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
+	if (! strcmp (name, processor_target_table[i].name))
+	  return (int)i;
+    }
+
+  return -1;
+}
+
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   For the SPE, GPRs are 64 bits but only 32 bits are visible in
+   scalar instructions.  The upper 32 bits are only available to the
+   SIMD instructions.
+
+   POWER and PowerPC GPRs hold 32 bits worth;
+   PowerPC64 GPRs and FPRs point register holds 64 bits worth.  */
+
+static int
+rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
+{
+  unsigned HOST_WIDE_INT reg_size;
+
+  if (FP_REGNO_P (regno))
+    reg_size = (VECTOR_MEM_VSX_P (mode)
+		? UNITS_PER_VSX_WORD
+		: UNITS_PER_FP_WORD);
+
+  else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
+    reg_size = UNITS_PER_SPE_WORD;
+
+  else if (ALTIVEC_REGNO_P (regno))
+    reg_size = UNITS_PER_ALTIVEC_WORD;
+
+  /* The value returned for SCmode in the E500 double case is 2 for
+     ABI compatibility; storing an SCmode value in a single register
+     would require function_arg and rs6000_spe_function_arg to handle
+     SCmode so as to pass the value correctly in a pair of
+     registers.  */
+  else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
+	   && !DECIMAL_FLOAT_MODE_P (mode))
+    reg_size = UNITS_PER_FP_WORD;
+
+  else
+    reg_size = UNITS_PER_WORD;
+
+  return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
+}
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+static int
+rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+  int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
+
+  /* VSX registers that overlap the FPR registers are larger than for non-VSX
+     implementations.  Don't allow an item to be split between a FP register
+     and an Altivec register.  */
+  if (VECTOR_MEM_VSX_P (mode))
+    {
+      if (FP_REGNO_P (regno))
+	return FP_REGNO_P (last_regno);
+
+      if (ALTIVEC_REGNO_P (regno))
+	return ALTIVEC_REGNO_P (last_regno);
+    }
+
+  /* The GPRs can hold any mode, but values bigger than one register
+     cannot go past R31.  */
+  if (INT_REGNO_P (regno))
+    return INT_REGNO_P (last_regno);
+
+  /* The float registers (except for VSX vector modes) can only hold floating
+     modes and DImode.  This excludes the 32-bit decimal float mode for
+     now.  */
+  if (FP_REGNO_P (regno))
+    {
+      if (SCALAR_FLOAT_MODE_P (mode)
+	  && (mode != TDmode || (regno % 2) == 0)
+	  && FP_REGNO_P (last_regno))
+	return 1;
+
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
+	return 1;
+
+      if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
+	  && PAIRED_VECTOR_MODE (mode))
+	return 1;
+
+      return 0;
+    }
+
+  /* The CR register can only hold CC modes.  */
+  if (CR_REGNO_P (regno))
+    return GET_MODE_CLASS (mode) == MODE_CC;
+
+  if (CA_REGNO_P (regno))
+    return mode == BImode;
+
+  /* AltiVec only in AldyVec registers.  */
+  if (ALTIVEC_REGNO_P (regno))
+    return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode);
+
+  /* ...but GPRs can hold SIMD data on the SPE in one register.  */
+  if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
+    return 1;
+
+  /* We cannot put TImode anywhere except general register and it must be able
+     to fit within the register set.  In the future, allow TImode in the
+     Altivec or VSX registers.  */
+
+  return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+}
+
+/* Print interesting facts about registers.  */
+static void
+rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
+{
+  int r, m;
+
+  for (r = first_regno; r <= last_regno; ++r)
+    {
+      const char *comma = "";
+      int len;
+
+      if (first_regno == last_regno)
+	fprintf (stderr, "%s:\t", reg_name);
+      else
+	fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
+
+      len = 8;
+      for (m = 0; m < NUM_MACHINE_MODES; ++m)
+	if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
+	  {
+	    if (len > 70)
+	      {
+		fprintf (stderr, ",\n\t");
+		len = 8;
+		comma = "";
+	      }
+
+	    if (rs6000_hard_regno_nregs[m][r] > 1)
+	      len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
+			     rs6000_hard_regno_nregs[m][r]);
+	    else
+	      len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
+
+	    comma = ", ";
+	  }
+
+      if (call_used_regs[r])
+	{
+	  if (len > 70)
+	    {
+	      fprintf (stderr, ",\n\t");
+	      len = 8;
+	      comma = "";
+	    }
+
+	  len += fprintf (stderr, "%s%s", comma, "call-used");
+	  comma = ", ";
+	}
+
+      if (fixed_regs[r])
+	{
+	  if (len > 70)
+	    {
+	      fprintf (stderr, ",\n\t");
+	      len = 8;
+	      comma = "";
+	    }
+
+	  len += fprintf (stderr, "%s%s", comma, "fixed");
+	  comma = ", ";
+	}
+
+      if (len > 70)
+	{
+	  fprintf (stderr, ",\n\t");
+	  comma = "";
+	}
+
+      fprintf (stderr, "%sregno = %d\n", comma, r);
+    }
+}
+
+#define DEBUG_FMT_D "%-32s= %d\n"
+#define DEBUG_FMT_S "%-32s= %s\n"
+
+/* Print various interesting information with -mdebug=reg.  */
+static void
+rs6000_debug_reg_global (void)
+{
+  static const char *const tf[2] = { "false", "true" };
+  const char *nl = (const char *)0;
+  int m;
+  char costly_num[20];
+  char nop_num[20];
+  const char *costly_str;
+  const char *nop_str;
+  const char *trace_str;
+  const char *abi_str;
+  const char *cmodel_str;
+
+  /* Map enum rs6000_vector to string.  */
+  static const char *rs6000_debug_vector_unit[] = {
+    "none",
+    "altivec",
+    "vsx",
+    "paired",
+    "spe",
+    "other"
+  };
+
+  fprintf (stderr, "Register information: (last virtual reg = %d)\n",
+	   LAST_VIRTUAL_REGISTER);
+  rs6000_debug_reg_print (0, 31, "gr");
+  rs6000_debug_reg_print (32, 63, "fp");
+  rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
+			  LAST_ALTIVEC_REGNO,
+			  "vs");
+  rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
+  rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
+  rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
+  rs6000_debug_reg_print (MQ_REGNO, MQ_REGNO, "mq");
+  rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
+  rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
+  rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
+  rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
+  rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
+
+  fprintf (stderr,
+	   "\n"
+	   "d  reg_class = %s\n"
+	   "f  reg_class = %s\n"
+	   "v  reg_class = %s\n"
+	   "wa reg_class = %s\n"
+	   "wd reg_class = %s\n"
+	   "wf reg_class = %s\n"
+	   "ws reg_class = %s\n\n",
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
+
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
+      {
+	nl = "\n";
+	fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n",
+		 GET_MODE_NAME (m),
+		 rs6000_debug_vector_unit[ rs6000_vector_unit[m] ],
+		 rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]);
+      }
+
+  if (nl)
+    fputs (nl, stderr);
+
+  if (rs6000_recip_control)
+    {
+      fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
+
+      for (m = 0; m < NUM_MACHINE_MODES; ++m)
+	if (rs6000_recip_bits[m])
+	  {
+	    fprintf (stderr,
+		     "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
+		     GET_MODE_NAME (m),
+		     (RS6000_RECIP_AUTO_RE_P (m)
+		      ? "auto"
+		      : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
+		     (RS6000_RECIP_AUTO_RSQRTE_P (m)
+		      ? "auto"
+		      : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
+	  }
+
+      fputs ("\n", stderr);
+    }
+
+  if (rs6000_cpu_index >= 0)
+    fprintf (stderr, DEBUG_FMT_S, "cpu",
+	     processor_target_table[rs6000_cpu_index].name);
+
+  if (rs6000_tune_index >= 0)
+    fprintf (stderr, DEBUG_FMT_S, "tune",
+	     processor_target_table[rs6000_tune_index].name);
+
+  switch (rs6000_sched_costly_dep)
+    {
+    case max_dep_latency:
+      costly_str = "max_dep_latency";
+      break;
+
+    case no_dep_costly:
+      costly_str = "no_dep_costly";
+      break;
+
+    case all_deps_costly:
+      costly_str = "all_deps_costly";
+      break;
+
+    case true_store_to_load_dep_costly:
+      costly_str = "true_store_to_load_dep_costly";
+      break;
+
+    case store_to_load_dep_costly:
+      costly_str = "store_to_load_dep_costly";
+      break;
+
+    default:
+      costly_str = costly_num;
+      sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
+      break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
+
+  switch (rs6000_sched_insert_nops)
+    {
+    case sched_finish_regroup_exact:
+      nop_str = "sched_finish_regroup_exact";
+      break;
+
+    case sched_finish_pad_groups:
+      nop_str = "sched_finish_pad_groups";
+      break;
+
+    case sched_finish_none:
+      nop_str = "sched_finish_none";
+      break;
+
+    default:
+      nop_str = nop_num;
+      sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
+      break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
+
+  switch (rs6000_sdata)
+    {
+    default:
+    case SDATA_NONE:
+      break;
+
+    case SDATA_DATA:
+      fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
+      break;
+
+    case SDATA_SYSV:
+      fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
+      break;
+
+    case SDATA_EABI:
+      fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
+      break;
+
+    }
+
+  switch (rs6000_traceback)
+    {
+    case traceback_default:	trace_str = "default";	break;
+    case traceback_none:	trace_str = "none";	break;
+    case traceback_part:	trace_str = "part";	break;
+    case traceback_full:	trace_str = "full";	break;
+    default:			trace_str = "unknown";	break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
+
+  switch (rs6000_current_cmodel)
+    {
+    case CMODEL_SMALL:	cmodel_str = "small";	break;
+    case CMODEL_MEDIUM:	cmodel_str = "medium";	break;
+    case CMODEL_LARGE:	cmodel_str = "large";	break;
+    default:		cmodel_str = "unknown";	break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
+
+  switch (rs6000_current_abi)
+    {
+    case ABI_NONE:	abi_str = "none";	break;
+    case ABI_AIX:	abi_str = "aix";	break;
+    case ABI_V4:	abi_str = "V4";		break;
+    case ABI_DARWIN:	abi_str = "darwin";	break;
+    default:		abi_str = "unknown";	break;
+    }
+
+  fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
+
+  if (rs6000_altivec_abi)
+    fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
+
+  if (rs6000_spe_abi)
+    fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
+
+  if (rs6000_darwin64_abi)
+    fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
+
+  if (rs6000_float_gprs)
+    fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
+
+  fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
+  fprintf (stderr, DEBUG_FMT_S, "align_branch",
+	   tf[!!rs6000_align_branch_targets]);
+  fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
+  fprintf (stderr, DEBUG_FMT_D, "long_double_size",
+	   rs6000_long_double_type_size);
+  fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
+	   (int)rs6000_sched_restricted_insns_priority);
+}
+
+/* Initialize the various global tables that are based on register size.  */
+static void
+rs6000_init_hard_regno_mode_ok (bool global_init_p)
+{
+  int r, m, c;
+  int align64;
+  int align32;
+
+  /* Precalculate REGNO_REG_CLASS.  */
+  rs6000_regno_regclass[0] = GENERAL_REGS;
+  for (r = 1; r < 32; ++r)
+    rs6000_regno_regclass[r] = BASE_REGS;
+
+  for (r = 32; r < 64; ++r)
+    rs6000_regno_regclass[r] = FLOAT_REGS;
+
+  for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
+    rs6000_regno_regclass[r] = NO_REGS;
+
+  for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
+    rs6000_regno_regclass[r] = ALTIVEC_REGS;
+
+  rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
+  for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
+    rs6000_regno_regclass[r] = CR_REGS;
+
+  rs6000_regno_regclass[MQ_REGNO] = MQ_REGS;
+  rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
+  rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
+  rs6000_regno_regclass[CA_REGNO] = CA_REGS;
+  rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
+  rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
+  rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
+  rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
+  rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
+  rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
+
+  /* Precalculate vector information, this must be set up before the
+     rs6000_hard_regno_nregs_internal below.  */
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    {
+      rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE;
+      rs6000_vector_reload[m][0] = CODE_FOR_nothing;
+      rs6000_vector_reload[m][1] = CODE_FOR_nothing;
+    }
+
+  for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++)
+    rs6000_constraints[c] = NO_REGS;
+
+  /* The VSX hardware allows native alignment for vectors, but control whether the compiler
+     believes it can use native alignment or still uses 128-bit alignment.  */
+  if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
+    {
+      align64 = 64;
+      align32 = 32;
+    }
+  else
+    {
+      align64 = 128;
+      align32 = 128;
+    }
+
+  /* V2DF mode, VSX only.  */
+  if (TARGET_VSX)
+    {
+      rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
+      rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
+      rs6000_vector_align[V2DFmode] = align64;
+    }
+
+  /* V4SF mode, either VSX or Altivec.  */
+  if (TARGET_VSX)
+    {
+      rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
+      rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
+      rs6000_vector_align[V4SFmode] = align32;
+    }
+  else if (TARGET_ALTIVEC)
+    {
+      rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
+      rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
+      rs6000_vector_align[V4SFmode] = align32;
+    }
+
+  /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
+     and stores. */
+  if (TARGET_ALTIVEC)
+    {
+      rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
+      rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
+      rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
+      rs6000_vector_align[V4SImode] = align32;
+      rs6000_vector_align[V8HImode] = align32;
+      rs6000_vector_align[V16QImode] = align32;
+
+      if (TARGET_VSX)
+	{
+	  rs6000_vector_mem[V4SImode] = VECTOR_VSX;
+	  rs6000_vector_mem[V8HImode] = VECTOR_VSX;
+	  rs6000_vector_mem[V16QImode] = VECTOR_VSX;
+	}
+      else
+	{
+	  rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
+	  rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
+	  rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
+	}
+    }
+
+  /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
+     Altivec doesn't have 64-bit support.  */
+  if (TARGET_VSX)
+    {
+      rs6000_vector_mem[V2DImode] = VECTOR_VSX;
+      rs6000_vector_unit[V2DImode] = VECTOR_NONE;
+      rs6000_vector_align[V2DImode] = align64;
+    }
+
+  /* DFmode, see if we want to use the VSX unit.  */
+  if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
+    {
+      rs6000_vector_unit[DFmode] = VECTOR_VSX;
+      rs6000_vector_mem[DFmode]
+	= (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE);
+      rs6000_vector_align[DFmode] = align64;
+    }
+
+  /* TODO add SPE and paired floating point vector support.  */
+
+  /* Register class constaints for the constraints that depend on compile
+     switches.  */
+  if (TARGET_HARD_FLOAT && TARGET_FPRS)
+    rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
+
+  if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+    rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
+
+  if (TARGET_VSX)
+    {
+      /* At present, we just use VSX_REGS, but we have different constraints
+	 based on the use, in case we want to fine tune the default register
+	 class used.  wa = any VSX register, wf = register class to use for
+	 V4SF, wd = register class to use for V2DF, and ws = register classs to
+	 use for DF scalars.  */
+      rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY
+						  ? VSX_REGS
+						  : FLOAT_REGS);
+    }
+
+  if (TARGET_ALTIVEC)
+    rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
+
+  /* Set up the reload helper functions.  */
+  if (TARGET_VSX || TARGET_ALTIVEC)
+    {
+      if (TARGET_64BIT)
+	{
+	  rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store;
+	  rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load;
+	  rs6000_vector_reload[V8HImode][0]  = CODE_FOR_reload_v8hi_di_store;
+	  rs6000_vector_reload[V8HImode][1]  = CODE_FOR_reload_v8hi_di_load;
+	  rs6000_vector_reload[V4SImode][0]  = CODE_FOR_reload_v4si_di_store;
+	  rs6000_vector_reload[V4SImode][1]  = CODE_FOR_reload_v4si_di_load;
+	  rs6000_vector_reload[V2DImode][0]  = CODE_FOR_reload_v2di_di_store;
+	  rs6000_vector_reload[V2DImode][1]  = CODE_FOR_reload_v2di_di_load;
+	  rs6000_vector_reload[V4SFmode][0]  = CODE_FOR_reload_v4sf_di_store;
+	  rs6000_vector_reload[V4SFmode][1]  = CODE_FOR_reload_v4sf_di_load;
+	  rs6000_vector_reload[V2DFmode][0]  = CODE_FOR_reload_v2df_di_store;
+	  rs6000_vector_reload[V2DFmode][1]  = CODE_FOR_reload_v2df_di_load;
+	}
+      else
+	{
+	  rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store;
+	  rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load;
+	  rs6000_vector_reload[V8HImode][0]  = CODE_FOR_reload_v8hi_si_store;
+	  rs6000_vector_reload[V8HImode][1]  = CODE_FOR_reload_v8hi_si_load;
+	  rs6000_vector_reload[V4SImode][0]  = CODE_FOR_reload_v4si_si_store;
+	  rs6000_vector_reload[V4SImode][1]  = CODE_FOR_reload_v4si_si_load;
+	  rs6000_vector_reload[V2DImode][0]  = CODE_FOR_reload_v2di_si_store;
+	  rs6000_vector_reload[V2DImode][1]  = CODE_FOR_reload_v2di_si_load;
+	  rs6000_vector_reload[V4SFmode][0]  = CODE_FOR_reload_v4sf_si_store;
+	  rs6000_vector_reload[V4SFmode][1]  = CODE_FOR_reload_v4sf_si_load;
+	  rs6000_vector_reload[V2DFmode][0]  = CODE_FOR_reload_v2df_si_store;
+	  rs6000_vector_reload[V2DFmode][1]  = CODE_FOR_reload_v2df_si_load;
+	}
+    }
+
+  /* Precalculate HARD_REGNO_NREGS.  */
+  for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
+    for (m = 0; m < NUM_MACHINE_MODES; ++m)
+      rs6000_hard_regno_nregs[m][r]
+	= rs6000_hard_regno_nregs_internal (r, (enum machine_mode)m);
+
+  /* Precalculate HARD_REGNO_MODE_OK.  */
+  for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
+    for (m = 0; m < NUM_MACHINE_MODES; ++m)
+      if (rs6000_hard_regno_mode_ok (r, (enum machine_mode)m))
+	rs6000_hard_regno_mode_ok_p[m][r] = true;
+
+  /* Precalculate CLASS_MAX_NREGS sizes.  */
+  for (c = 0; c < LIM_REG_CLASSES; ++c)
+    {
+      int reg_size;
+
+      if (TARGET_VSX && VSX_REG_CLASS_P (c))
+	reg_size = UNITS_PER_VSX_WORD;
+
+      else if (c == ALTIVEC_REGS)
+	reg_size = UNITS_PER_ALTIVEC_WORD;
+
+      else if (c == FLOAT_REGS)
+	reg_size = UNITS_PER_FP_WORD;
+
+      else
+	reg_size = UNITS_PER_WORD;
+
+      for (m = 0; m < NUM_MACHINE_MODES; ++m)
+	rs6000_class_max_nregs[m][c]
+	  = (GET_MODE_SIZE (m) + reg_size - 1) / reg_size;
+    }
+
+  if (TARGET_E500_DOUBLE)
+    rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
+
+  /* Calculate which modes to automatically generate code to use a the
+     reciprocal divide and square root instructions.  In the future, possibly
+     automatically generate the instructions even if the user did not specify
+     -mrecip.  The older machines double precision reciprocal sqrt estimate is
+     not accurate enough.  */
+  memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
+  if (TARGET_FRES)
+    rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
+  if (TARGET_FRE)
+    rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
+  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
+    rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
+  if (VECTOR_UNIT_VSX_P (V2DFmode))
+    rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
+
+  if (TARGET_FRSQRTES)
+    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+  if (TARGET_FRSQRTE)
+    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
+    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+  if (VECTOR_UNIT_VSX_P (V2DFmode))
+    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
+
+  if (rs6000_recip_control)
+    {
+      if (!flag_finite_math_only)
+	warning (0, "-mrecip requires -ffinite-math or -ffast-math");
+      if (flag_trapping_math)
+	warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
+      if (!flag_reciprocal_math)
+	warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
+      if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
+	{
+	  if (RS6000_RECIP_HAVE_RE_P (SFmode)
+	      && (rs6000_recip_control & RECIP_SF_DIV) != 0)
+	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RE_P (DFmode)
+	      && (rs6000_recip_control & RECIP_DF_DIV) != 0)
+	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
+	      && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
+	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
+	      && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
+	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
+	      && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
+	    rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
+	      && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
+	    rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
+	      && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
+	    rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+
+	  if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
+	      && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
+	    rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
+	}
+    }
+
+  if (global_init_p || TARGET_DEBUG_TARGET)
+    {
+      if (TARGET_DEBUG_REG)
+	rs6000_debug_reg_global ();
+
+      if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
+	fprintf (stderr,
+		 "SImode variable mult cost       = %d\n"
+		 "SImode constant mult cost       = %d\n"
+		 "SImode short constant mult cost = %d\n"
+		 "DImode multipliciation cost     = %d\n"
+		 "SImode division cost            = %d\n"
+		 "DImode division cost            = %d\n"
+		 "Simple fp operation cost        = %d\n"
+		 "DFmode multiplication cost      = %d\n"
+		 "SFmode division cost            = %d\n"
+		 "DFmode division cost            = %d\n"
+		 "cache line size                 = %d\n"
+		 "l1 cache size                   = %d\n"
+		 "l2 cache size                   = %d\n"
+		 "simultaneous prefetches         = %d\n"
+		 "\n",
+		 rs6000_cost->mulsi,
+		 rs6000_cost->mulsi_const,
+		 rs6000_cost->mulsi_const9,
+		 rs6000_cost->muldi,
+		 rs6000_cost->divsi,
+		 rs6000_cost->divdi,
+		 rs6000_cost->fp,
+		 rs6000_cost->dmul,
+		 rs6000_cost->sdiv,
+		 rs6000_cost->ddiv,
+		 rs6000_cost->cache_line_size,
+		 rs6000_cost->l1_cache_size,
+		 rs6000_cost->l2_cache_size,
+		 rs6000_cost->simultaneous_prefetches);
+    }
+}
+
+#if TARGET_MACHO
+/* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS.  */
+
+static void
+darwin_rs6000_override_options (void)
+{
+  /* The Darwin ABI always includes AltiVec, can't be (validly) turned
+     off.  */
+  rs6000_altivec_abi = 1;
+  TARGET_ALTIVEC_VRSAVE = 1;
+  rs6000_current_abi = ABI_DARWIN;
+
+  if (DEFAULT_ABI == ABI_DARWIN
+      && TARGET_64BIT)
+      darwin_one_byte_bool = 1;
+
+  if (TARGET_64BIT && ! TARGET_POWERPC64)
+    {
+      target_flags |= MASK_POWERPC64;
+      warning (0, "-m64 requires PowerPC64 architecture, enabling");
+    }
+  if (flag_mkernel)
+    {
+      rs6000_default_long_calls = 1;
+      target_flags |= MASK_SOFT_FLOAT;
+    }
+
+  /* Make -m64 imply -maltivec.  Darwin's 64-bit ABI includes
+     Altivec.  */
+  if (!flag_mkernel && !flag_apple_kext
+      && TARGET_64BIT
+      && ! (target_flags_explicit & MASK_ALTIVEC))
+    target_flags |= MASK_ALTIVEC;
+
+  /* Unless the user (not the configurer) has explicitly overridden
+     it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
+     G4 unless targetting the kernel.  */
+  if (!flag_mkernel
+      && !flag_apple_kext
+      && strverscmp (darwin_macosx_version_min, "10.5") >= 0
+      && ! (target_flags_explicit & MASK_ALTIVEC)
+      && ! rs6000_select[1].string)
+    {
+      target_flags |= MASK_ALTIVEC;
+    }
+}
+#endif
+
+/* If not otherwise specified by a target, make 'long double' equivalent to
+   'double'.  */
+
+#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
+#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
+#endif
+
+/* Override command line options.  Mostly we process the processor type and
+   sometimes adjust other TARGET_ options.  */
+
+static bool
+rs6000_option_override_internal (bool global_init_p)
+{
+  bool ret = true;
+  const char *default_cpu = OPTION_TARGET_CPU_DEFAULT;
+  int set_masks;
+  int cpu_index;
+  int tune_index;
+  struct cl_target_option *main_target_opt
+    = ((global_init_p || target_option_default_node == NULL)
+       ? NULL : TREE_TARGET_OPTION (target_option_default_node));
+
+  /* Numerous experiment shows that IRA based loop pressure
+     calculation works better for RTL loop invariant motion on targets
+     with enough (>= 32) registers.  It is an expensive optimization.
+     So it is on only for peak performance.  */
+  if (optimize >= 3 && global_init_p)
+    flag_ira_loop_pressure = 1;
+
+  /* Set the pointer size.  */
+  if (TARGET_64BIT)
+    {
+      rs6000_pmode = (int)DImode;
+      rs6000_pointer_size = 64;
+    }
+  else
+    {
+      rs6000_pmode = (int)SImode;
+      rs6000_pointer_size = 32;
+    }
+
+  set_masks = POWER_MASKS | POWERPC_MASKS | MASK_SOFT_FLOAT;
+#ifdef OS_MISSING_POWERPC64
+  if (OS_MISSING_POWERPC64)
+    set_masks &= ~MASK_POWERPC64;
+#endif
+#ifdef OS_MISSING_ALTIVEC
+  if (OS_MISSING_ALTIVEC)
+    set_masks &= ~(MASK_ALTIVEC | MASK_VSX);
+#endif
+
+  /* Don't override by the processor default if given explicitly.  */
+  set_masks &= ~target_flags_explicit;
+
+  /* Identify the processor type.  */
+  if (!default_cpu)
+    {
+      if (TARGET_POWERPC64)
+	default_cpu = "powerpc64";
+      else if (TARGET_POWERPC)
+	default_cpu = "powerpc";
+    }
+
+  /* Process the -mcpu=<xxx> and -mtune=<xxx> argument.  If the user changed
+     the cpu in a target attribute or pragma, but did not specify a tuning
+     option, use the cpu for the tuning option rather than the option specified
+     with -mtune on the command line.  */
+  if (rs6000_cpu_index > 0)
+    cpu_index = rs6000_cpu_index;
+  else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index > 0)
+    rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
+  else
+    rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
+
+  if (rs6000_tune_index > 0)
+    tune_index = rs6000_tune_index;
+  else
+    rs6000_tune_index = tune_index = cpu_index;
+
+  if (cpu_index >= 0)
+    {
+      target_flags &= ~set_masks;
+      target_flags |= (processor_target_table[cpu_index].target_enable
+		       & set_masks);
+    }
+
+  rs6000_cpu = ((tune_index >= 0)
+		? processor_target_table[tune_index].processor
+		: (TARGET_POWERPC64
+		   ? PROCESSOR_DEFAULT64
+		   : PROCESSOR_DEFAULT));
+
+  if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
+      || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64)
+    {
+      if (TARGET_ALTIVEC)
+	error ("AltiVec not supported in this target");
+      if (TARGET_SPE)
+	error ("SPE not supported in this target");
+    }
+
+  /* Disable Cell microcode if we are optimizing for the Cell
+     and not optimizing for size.  */
+  if (rs6000_gen_cell_microcode == -1)
+    rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
+                                  && !optimize_size);
+
+  /* If we are optimizing big endian systems for space and it's OK to
+     use instructions that would be microcoded on the Cell, use the
+     load/store multiple and string instructions.  */
+  if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
+    target_flags |= ~target_flags_explicit & (MASK_MULTIPLE | MASK_STRING);
+
+  /* Don't allow -mmultiple or -mstring on little endian systems
+     unless the cpu is a 750, because the hardware doesn't support the
+     instructions used in little endian mode, and causes an alignment
+     trap.  The 750 does not cause an alignment trap (except when the
+     target is unaligned).  */
+
+  if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
+    {
+      if (TARGET_MULTIPLE)
+	{
+	  target_flags &= ~MASK_MULTIPLE;
+	  if ((target_flags_explicit & MASK_MULTIPLE) != 0)
+	    warning (0, "-mmultiple is not supported on little endian systems");
+	}
+
+      if (TARGET_STRING)
+	{
+	  target_flags &= ~MASK_STRING;
+	  if ((target_flags_explicit & MASK_STRING) != 0)
+	    warning (0, "-mstring is not supported on little endian systems");
+	}
+    }
+
+  /* Add some warnings for VSX.  */
+  if (TARGET_VSX)
+    {
+      const char *msg = NULL;
+      if (!TARGET_HARD_FLOAT || !TARGET_FPRS
+	  || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
+	{
+	  if (target_flags_explicit & MASK_VSX)
+	    msg = N_("-mvsx requires hardware floating point");
+	  else
+	    target_flags &= ~ MASK_VSX;
+	}
+      else if (TARGET_PAIRED_FLOAT)
+	msg = N_("-mvsx and -mpaired are incompatible");
+      /* The hardware will allow VSX and little endian, but until we make sure
+	 things like vector select, etc. work don't allow VSX on little endian
+	 systems at this point.  */
+      else if (!BYTES_BIG_ENDIAN)
+	msg = N_("-mvsx used with little endian code");
+      else if (TARGET_AVOID_XFORM > 0)
+	msg = N_("-mvsx needs indexed addressing");
+      else if (!TARGET_ALTIVEC && (target_flags_explicit & MASK_ALTIVEC))
+        {
+	  if (target_flags_explicit & MASK_VSX)
+	    msg = N_("-mvsx and -mno-altivec are incompatible");
+	  else
+	    msg = N_("-mno-altivec disables vsx");
+        }
+
+      if (msg)
+	{
+	  warning (0, msg);
+	  target_flags &= ~ MASK_VSX;
+	  target_flags_explicit |= MASK_VSX;
+	}
+    }
+
+  /* For the newer switches (vsx, dfp, etc.) set some of the older options,
+     unless the user explicitly used the -mno-<option> to disable the code.  */
+  if (TARGET_VSX)
+    target_flags |= (ISA_2_6_MASKS_SERVER & ~target_flags_explicit);
+  else if (TARGET_POPCNTD)
+    target_flags |= (ISA_2_6_MASKS_EMBEDDED & ~target_flags_explicit);
+  else if (TARGET_DFP)
+    target_flags |= (ISA_2_5_MASKS_SERVER & ~target_flags_explicit);
+  else if (TARGET_CMPB)
+    target_flags |= (ISA_2_5_MASKS_EMBEDDED & ~target_flags_explicit);
+  else if (TARGET_FPRND)
+    target_flags |= (ISA_2_4_MASKS & ~target_flags_explicit);
+  else if (TARGET_POPCNTB)
+    target_flags |= (ISA_2_2_MASKS & ~target_flags_explicit);
+  else if (TARGET_ALTIVEC)
+    target_flags |= (MASK_PPC_GFXOPT & ~target_flags_explicit);
+
+  /* E500mc does "better" if we inline more aggressively.  Respect the
+     user's opinion, though.  */
+  if (rs6000_block_move_inline_limit == 0
+      && (rs6000_cpu == PROCESSOR_PPCE500MC
+	  || rs6000_cpu == PROCESSOR_PPCE500MC64))
+    rs6000_block_move_inline_limit = 128;
+
+  /* store_one_arg depends on expand_block_move to handle at least the
+     size of reg_parm_stack_space.  */
+  if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
+    rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
+
+  if (global_init_p)
+    {
+      /* If the appropriate debug option is enabled, replace the target hooks
+	 with debug versions that call the real version and then prints
+	 debugging information.  */
+      if (TARGET_DEBUG_COST)
+	{
+	  targetm.rtx_costs = rs6000_debug_rtx_costs;
+	  targetm.address_cost = rs6000_debug_address_cost;
+	  targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
+	}
+
+      if (TARGET_DEBUG_ADDR)
+	{
+	  targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
+	  targetm.legitimize_address = rs6000_debug_legitimize_address;
+	  rs6000_secondary_reload_class_ptr
+	    = rs6000_debug_secondary_reload_class;
+	  rs6000_secondary_memory_needed_ptr
+	    = rs6000_debug_secondary_memory_needed;
+	  rs6000_cannot_change_mode_class_ptr
+	    = rs6000_debug_cannot_change_mode_class;
+	  rs6000_preferred_reload_class_ptr
+	    = rs6000_debug_preferred_reload_class;
+	  rs6000_legitimize_reload_address_ptr
+	    = rs6000_debug_legitimize_reload_address;
+	  rs6000_mode_dependent_address_ptr
+	    = rs6000_debug_mode_dependent_address;
+	}
+
+      if (rs6000_veclibabi_name)
+	{
+	  if (strcmp (rs6000_veclibabi_name, "mass") == 0)
+	    rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
+	  else
+	    {
+	      error ("unknown vectorization library ABI type (%s) for "
+		     "-mveclibabi= switch", rs6000_veclibabi_name);
+	      ret = false;
+	    }
+	}
+    }
+
+  if (!rs6000_explicit_options.long_double)
+    {
+      if (main_target_opt != NULL
+	  && (main_target_opt->x_rs6000_long_double_type_size
+	      != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
+	error ("target attribute or pragma changes long double size");
+      else
+	rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
+    }
+
+#ifndef POWERPC_LINUX
+  if (!rs6000_explicit_options.ieee)
+    rs6000_ieeequad = 1;
+#endif
+
+  /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
+     target attribute or pragma which automatically enables both options,
+     unless the altivec ABI was set.  This is set by default for 64-bit, but
+     not for 32-bit.  */
+  if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
+    target_flags &= ~((MASK_VSX | MASK_ALTIVEC) & ~target_flags_explicit);
+
+  /* Enable Altivec ABI for AIX -maltivec.  */
+  if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
+    {
+      if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
+	error ("target attribute or pragma changes AltiVec ABI");
+      else
+	rs6000_altivec_abi = 1;
+    }
+
+  /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux.  For
+     PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI.  It can
+     be explicitly overridden in either case.  */
+  if (TARGET_ELF)
+    {
+      if (!rs6000_explicit_options.altivec_abi
+	  && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
+	{
+	  if (main_target_opt != NULL &&
+	      !main_target_opt->x_rs6000_altivec_abi)
+	    error ("target attribute or pragma changes AltiVec ABI");
+	  else
+	    rs6000_altivec_abi = 1;
+	}
+
+      /* Enable VRSAVE for AltiVec ABI, unless explicitly overridden.  */
+      if (!rs6000_explicit_options.vrsave)
+	TARGET_ALTIVEC_VRSAVE = rs6000_altivec_abi;
+    }
+
+  /* Set the Darwin64 ABI as default for 64-bit Darwin.  
+     So far, the only darwin64 targets are also MACH-O.  */
+  if (TARGET_MACHO
+      && DEFAULT_ABI == ABI_DARWIN 
+      && TARGET_64BIT)
+    {
+      if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
+	error ("target attribute or pragma changes darwin64 ABI");
+      else
+	{
+	  rs6000_darwin64_abi = 1;
+	  /* Default to natural alignment, for better performance.  */
+	  rs6000_alignment_flags = MASK_ALIGN_NATURAL;
+	}
+    }
+
+  /* Place FP constants in the constant pool instead of TOC
+     if section anchors enabled.  */
+  if (flag_section_anchors)
+    TARGET_NO_FP_IN_TOC = 1;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+  SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+#ifdef SUB3TARGET_OVERRIDE_OPTIONS
+  SUB3TARGET_OVERRIDE_OPTIONS;
+#endif
+
+  if (TARGET_E500 || rs6000_cpu == PROCESSOR_PPCE500MC
+      || rs6000_cpu == PROCESSOR_PPCE500MC64)
+    {
+      /* The e500 and e500mc do not have string instructions, and we set
+	 MASK_STRING above when optimizing for size.  */
+      if ((target_flags & MASK_STRING) != 0)
+	target_flags = target_flags & ~MASK_STRING;
+    }
+  else if (rs6000_select[1].string != NULL)
+    {
+      /* For the powerpc-eabispe configuration, we set all these by
+	 default, so let's unset them if we manually set another
+	 CPU that is not the E500.  */
+      if (main_target_opt != NULL
+	  && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
+	      || (main_target_opt->x_rs6000_spe != rs6000_spe)
+	      || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
+	error ("target attribute or pragma changes SPE ABI");
+      else
+	{
+	  if (!rs6000_explicit_options.spe_abi)
+	    rs6000_spe_abi = 0;
+	  if (!rs6000_explicit_options.spe)
+	    rs6000_spe = 0;
+	  if (!rs6000_explicit_options.float_gprs)
+	    rs6000_float_gprs = 0;
+	}
+      if (!(target_flags_explicit & MASK_ISEL))
+	target_flags &= ~MASK_ISEL;
+    }
+
+  /* Detect invalid option combinations with E500.  */
+  CHECK_E500_OPTIONS;
+
+  rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
+			&& rs6000_cpu != PROCESSOR_POWER5
+			&& rs6000_cpu != PROCESSOR_POWER6
+			&& rs6000_cpu != PROCESSOR_POWER7
+			&& rs6000_cpu != PROCESSOR_PPCA2
+			&& rs6000_cpu != PROCESSOR_CELL);
+  rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
+			 || rs6000_cpu == PROCESSOR_POWER5
+			 || rs6000_cpu == PROCESSOR_POWER7);
+  rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
+				 || rs6000_cpu == PROCESSOR_POWER5
+				 || rs6000_cpu == PROCESSOR_POWER6
+				 || rs6000_cpu == PROCESSOR_POWER7
+				 || rs6000_cpu == PROCESSOR_PPCE500MC
+				 || rs6000_cpu == PROCESSOR_PPCE500MC64);
+
+  /* Allow debug switches to override the above settings.  These are set to -1
+     in rs6000.opt to indicate the user hasn't directly set the switch.  */
+  if (TARGET_ALWAYS_HINT >= 0)
+    rs6000_always_hint = TARGET_ALWAYS_HINT;
+
+  if (TARGET_SCHED_GROUPS >= 0)
+    rs6000_sched_groups = TARGET_SCHED_GROUPS;
+
+  if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
+    rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
+
+  rs6000_sched_restricted_insns_priority
+    = (rs6000_sched_groups ? 1 : 0);
+
+  /* Handle -msched-costly-dep option.  */
+  rs6000_sched_costly_dep
+    = (rs6000_sched_groups ? store_to_load_dep_costly : no_dep_costly);
+
+  if (rs6000_sched_costly_dep_str)
+    {
+      if (! strcmp (rs6000_sched_costly_dep_str, "no"))
+	rs6000_sched_costly_dep = no_dep_costly;
+      else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
+	rs6000_sched_costly_dep = all_deps_costly;
+      else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
+	rs6000_sched_costly_dep = true_store_to_load_dep_costly;
+      else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
+	rs6000_sched_costly_dep = store_to_load_dep_costly;
+      else
+	rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
+				   atoi (rs6000_sched_costly_dep_str));
+    }
+
+  /* Handle -minsert-sched-nops option.  */
+  rs6000_sched_insert_nops
+    = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
+
+  if (rs6000_sched_insert_nops_str)
+    {
+      if (! strcmp (rs6000_sched_insert_nops_str, "no"))
+	rs6000_sched_insert_nops = sched_finish_none;
+      else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
+	rs6000_sched_insert_nops = sched_finish_pad_groups;
+      else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
+	rs6000_sched_insert_nops = sched_finish_regroup_exact;
+      else
+	rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
+				    atoi (rs6000_sched_insert_nops_str));
+    }
+
+  if (global_init_p)
+    {
+#ifdef TARGET_REGNAMES
+      /* If the user desires alternate register names, copy in the
+	 alternate names now.  */
+      if (TARGET_REGNAMES)
+	memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
+#endif
+
+      /* Set aix_struct_return last, after the ABI is determined.
+	 If -maix-struct-return or -msvr4-struct-return was explicitly
+	 used, don't override with the ABI default.  */
+      if (!rs6000_explicit_options.aix_struct_ret)
+	aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
+
+#if 0
+      /* IBM XL compiler defaults to unsigned bitfields.  */
+      if (TARGET_XL_COMPAT)
+	flag_signed_bitfields = 0;
+#endif
+
+      if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+	REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
+
+      if (TARGET_TOC)
+	ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
+
+      /* We can only guarantee the availability of DI pseudo-ops when
+	 assembling for 64-bit targets.  */
+      if (!TARGET_64BIT)
+	{
+	  targetm.asm_out.aligned_op.di = NULL;
+	  targetm.asm_out.unaligned_op.di = NULL;
+	}
+
+
+      /* Set branch target alignment, if not optimizing for size.  */
+      if (!optimize_size)
+	{
+	  /* Cell wants to be aligned 8byte for dual issue.  Titan wants to be
+	     aligned 8byte to avoid misprediction by the branch predictor.  */
+	  if (rs6000_cpu == PROCESSOR_TITAN
+	      || rs6000_cpu == PROCESSOR_CELL)
+	    {
+	      if (align_functions <= 0)
+		align_functions = 8;
+	      if (align_jumps <= 0)
+		align_jumps = 8;
+	      if (align_loops <= 0)
+		align_loops = 8;
+	    }
+	  if (rs6000_align_branch_targets)
+	    {
+	      if (align_functions <= 0)
+		align_functions = 16;
+	      if (align_jumps <= 0)
+		align_jumps = 16;
+	      if (align_loops <= 0)
+		{
+		  can_override_loop_align = 1;
+		  align_loops = 16;
+		}
+	    }
+	  if (align_jumps_max_skip <= 0)
+	    align_jumps_max_skip = 15;
+	  if (align_loops_max_skip <= 0)
+	    align_loops_max_skip = 15;
+	}
+
+      /* Arrange to save and restore machine status around nested functions.  */
+      init_machine_status = rs6000_init_machine_status;
+
+      /* We should always be splitting complex arguments, but we can't break
+	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
+      if (DEFAULT_ABI != ABI_AIX)
+	targetm.calls.split_complex_arg = NULL;
+    }
+
+  /* Initialize rs6000_cost with the appropriate target costs.  */
+  if (optimize_size)
+    rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
+  else
+    switch (rs6000_cpu)
+      {
+      case PROCESSOR_RIOS1:
+	rs6000_cost = &rios1_cost;
+	break;
+
+      case PROCESSOR_RIOS2:
+	rs6000_cost = &rios2_cost;
+	break;
+
+      case PROCESSOR_RS64A:
+	rs6000_cost = &rs64a_cost;
+	break;
+
+      case PROCESSOR_MPCCORE:
+	rs6000_cost = &mpccore_cost;
+	break;
+
+      case PROCESSOR_PPC403:
+	rs6000_cost = &ppc403_cost;
+	break;
+
+      case PROCESSOR_PPC405:
+	rs6000_cost = &ppc405_cost;
+	break;
+
+      case PROCESSOR_PPC440:
+	rs6000_cost = &ppc440_cost;
+	break;
+
+      case PROCESSOR_PPC476:
+	rs6000_cost = &ppc476_cost;
+	break;
+
+      case PROCESSOR_PPC601:
+	rs6000_cost = &ppc601_cost;
+	break;
+
+      case PROCESSOR_PPC603:
+	rs6000_cost = &ppc603_cost;
+	break;
+
+      case PROCESSOR_PPC604:
+	rs6000_cost = &ppc604_cost;
+	break;
+
+      case PROCESSOR_PPC604e:
+	rs6000_cost = &ppc604e_cost;
+	break;
+
+      case PROCESSOR_PPC620:
+	rs6000_cost = &ppc620_cost;
+	break;
+
+      case PROCESSOR_PPC630:
+	rs6000_cost = &ppc630_cost;
+	break;
+
+      case PROCESSOR_CELL:
+	rs6000_cost = &ppccell_cost;
+	break;
+
+      case PROCESSOR_PPC750:
+      case PROCESSOR_PPC7400:
+	rs6000_cost = &ppc750_cost;
+	break;
+
+      case PROCESSOR_PPC7450:
+	rs6000_cost = &ppc7450_cost;
+	break;
+
+      case PROCESSOR_PPC8540:
+	rs6000_cost = &ppc8540_cost;
+	break;
+
+      case PROCESSOR_PPCE300C2:
+      case PROCESSOR_PPCE300C3:
+	rs6000_cost = &ppce300c2c3_cost;
+	break;
+
+      case PROCESSOR_PPCE500MC:
+	rs6000_cost = &ppce500mc_cost;
+	break;
+
+      case PROCESSOR_PPCE500MC64:
+	rs6000_cost = &ppce500mc64_cost;
+	break;
+
+      case PROCESSOR_TITAN:
+	rs6000_cost = &titan_cost;
+	break;
+
+      case PROCESSOR_POWER4:
+      case PROCESSOR_POWER5:
+	rs6000_cost = &power4_cost;
+	break;
+
+      case PROCESSOR_POWER6:
+	rs6000_cost = &power6_cost;
+	break;
+
+      case PROCESSOR_POWER7:
+	rs6000_cost = &power7_cost;
+	break;
+
+      case PROCESSOR_PPCA2:
+	rs6000_cost = &ppca2_cost;
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  if (global_init_p)
+    {
+      maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			     rs6000_cost->simultaneous_prefetches,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+			     rs6000_cost->cache_line_size,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+
+      /* If using typedef char *va_list, signal that
+	 __builtin_va_start (&ap, 0) can be optimized to
+	 ap = __builtin_next_arg (0).  */
+      if (DEFAULT_ABI != ABI_V4)
+	targetm.expand_builtin_va_start = NULL;
+    }
+
+  /* Set up single/double float flags.  
+     If TARGET_HARD_FLOAT is set, but neither single or double is set, 
+     then set both flags. */
+  if (TARGET_HARD_FLOAT && TARGET_FPRS 
+      && rs6000_single_float == 0 && rs6000_double_float == 0)
+    rs6000_single_float = rs6000_double_float = 1;
+
+  /* Reset single and double FP flags if target is E500. */
+  if (TARGET_E500) 
+  {
+    rs6000_single_float = rs6000_double_float = 0;
+    if (TARGET_E500_SINGLE)
+      rs6000_single_float = 1; 
+    if (TARGET_E500_DOUBLE)
+      rs6000_single_float = rs6000_double_float = 1;
+  }
+
+  if (main_target_opt)
+    {
+      if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
+	error ("target attribute or pragma changes single precision floating "
+	       "point");
+      if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
+	error ("target attribute or pragma changes double precision floating "
+	       "point");
+    }
+
+  /* If not explicitly specified via option, decide whether to generate indexed
+     load/store instructions.  */
+  if (TARGET_AVOID_XFORM == -1)
+    /* Avoid indexed addressing when targeting Power6 in order to avoid the
+     DERAT mispredict penalty.  However the LVE and STVE altivec instructions
+     need indexed accesses and the type used is the scalar type of the element
+     being loaded or stored.  */
+    TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
+			  && !TARGET_ALTIVEC);
+
+  /* Set the -mrecip options.  */
+  if (rs6000_recip_name)
+    {
+      char *p = ASTRDUP (rs6000_recip_name);
+      char *q;
+      unsigned int mask, i;
+      bool invert;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  p = NULL;
+	  if (*q == '!')
+	    {
+	      invert = true;
+	      q++;
+	    }
+	  else
+	    invert = false;
+
+	  if (!strcmp (q, "default"))
+	    mask = ((TARGET_RECIP_PRECISION)
+		    ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
+	  else
+	    {
+	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
+		if (!strcmp (q, recip_options[i].string))
+		  {
+		    mask = recip_options[i].mask;
+		    break;
+		  }
+
+	      if (i == ARRAY_SIZE (recip_options))
+		{
+		  error ("unknown option for -mrecip=%s", q);
+		  invert = false;
+		  mask = 0;
+		  ret = false;
+		}
+	    }
+
+	  if (invert)
+	    rs6000_recip_control &= ~mask;
+	  else
+	    rs6000_recip_control |= mask;
+	}
+    }
+
+  rs6000_init_hard_regno_mode_ok (global_init_p);
+
+  /* Save the initial options in case the user does function specific options */
+  if (global_init_p)
+    target_option_default_node = target_option_current_node
+      = build_target_option_node ();
+
+  return ret;
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  On the RS/6000 this is used to
+   define the target cpu type.  */
+
+static void
+rs6000_option_override (void)
+{
+  (void) rs6000_option_override_internal (true);
+}
+
+
+/* Implement targetm.vectorize.builtin_mask_for_load.  */
+static tree
+rs6000_builtin_mask_for_load (void)
+{
+  if (TARGET_ALTIVEC || TARGET_VSX)
+    return altivec_builtin_mask_for_load;
+  else
+    return 0;
+}
+
+/* Implement LOOP_ALIGN. */
+int
+rs6000_loop_align (rtx label)
+{
+  basic_block bb;
+  int ninsns;
+
+  /* Don't override loop alignment if -falign-loops was specified. */
+  if (!can_override_loop_align)
+    return align_loops_log;
+
+  bb = BLOCK_FOR_INSN (label);
+  ninsns = num_loop_insns(bb->loop_father);
+
+  /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
+  if (ninsns > 4 && ninsns <= 8
+      && (rs6000_cpu == PROCESSOR_POWER4
+	  || rs6000_cpu == PROCESSOR_POWER5
+	  || rs6000_cpu == PROCESSOR_POWER6
+	  || rs6000_cpu == PROCESSOR_POWER7))
+    return 5;
+  else
+    return align_loops_log;
+}
+
+/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
+static int
+rs6000_loop_align_max_skip (rtx label)
+{
+  return (1 << rs6000_loop_align (label)) - 1;
+}
+
+/* Implement targetm.vectorize.builtin_conversion.
+   Returns a decl of a function that implements conversion of an integer vector
+   into a floating-point vector, or vice-versa.  DEST_TYPE is the
+   destination type and SRC_TYPE the source type of the conversion.
+   Return NULL_TREE if it is not available.  */
+static tree
+rs6000_builtin_conversion (unsigned int tcode, tree dest_type, tree src_type)
+{
+  enum tree_code code = (enum tree_code) tcode;
+
+  switch (code)
+    {
+    case FIX_TRUNC_EXPR:
+      switch (TYPE_MODE (dest_type))
+	{
+	case V2DImode:
+	  if (!VECTOR_UNIT_VSX_P (V2DFmode))
+	    return NULL_TREE;
+
+	  return TYPE_UNSIGNED (dest_type)
+	    ? rs6000_builtin_decls[VSX_BUILTIN_XVCVDPUXDS_UNS]
+	    : rs6000_builtin_decls[VSX_BUILTIN_XVCVDPSXDS];
+
+	case V4SImode:
+	  if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
+	    return NULL_TREE;
+
+	  return TYPE_UNSIGNED (dest_type)
+	    ? rs6000_builtin_decls[VECTOR_BUILTIN_FIXUNS_V4SF_V4SI]
+	    : rs6000_builtin_decls[VECTOR_BUILTIN_FIX_V4SF_V4SI];
+
+	default:
+	  return NULL_TREE;
+	}
+
+    case FLOAT_EXPR:
+      switch (TYPE_MODE (src_type))
+	{
+	case V2DImode:
+	  if (!VECTOR_UNIT_VSX_P (V2DFmode))
+	    return NULL_TREE;
+
+	  return TYPE_UNSIGNED (src_type)
+	    ? rs6000_builtin_decls[VSX_BUILTIN_XVCVUXDDP]
+	    : rs6000_builtin_decls[VSX_BUILTIN_XVCVSXDDP];
+
+	case V4SImode:
+	  if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
+	    return NULL_TREE;
+
+	  return TYPE_UNSIGNED (src_type)
+	    ? rs6000_builtin_decls[VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF]
+	    : rs6000_builtin_decls[VECTOR_BUILTIN_FLOAT_V4SI_V4SF];
+
+	default:
+	  return NULL_TREE;
+	}
+
+    default:
+      return NULL_TREE;
+    }
+}
+
+/* Implement targetm.vectorize.builtin_mul_widen_even.  */
+static tree
+rs6000_builtin_mul_widen_even (tree type)
+{
+  if (!TARGET_ALTIVEC)
+    return NULL_TREE;
+
+  switch (TYPE_MODE (type))
+    {
+    case V8HImode:
+      return TYPE_UNSIGNED (type)
+            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH_UNS]
+            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESH];
+
+    case V16QImode:
+      return TYPE_UNSIGNED (type)
+            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB_UNS]
+            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESB];
+    default:
+      return NULL_TREE;
+    }
+}
+
+/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
+static tree
+rs6000_builtin_mul_widen_odd (tree type)
+{
+  if (!TARGET_ALTIVEC)
+    return NULL_TREE;
+
+  switch (TYPE_MODE (type))
+    {
+    case V8HImode:
+      return TYPE_UNSIGNED (type)
+            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH_UNS]
+            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSH];
+
+    case V16QImode:
+      return TYPE_UNSIGNED (type)
+            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB_UNS]
+            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSB];
+    default:
+      return NULL_TREE;
+    }
+}
+
+
+/* Return true iff, data reference of TYPE can reach vector alignment (16)
+   after applying N number of iterations.  This routine does not determine
+   how may iterations are required to reach desired alignment.  */
+
+static bool
+rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
+{
+  if (is_packed)
+    return false;
+
+  if (TARGET_32BIT)
+    {
+      if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
+        return true;
+
+      if (rs6000_alignment_flags ==  MASK_ALIGN_POWER)
+        return true;
+
+      return false;
+    }
+  else
+    {
+      if (TARGET_MACHO)
+        return false;
+
+      /* Assuming that all other types are naturally aligned. CHECKME!  */
+      return true;
+    }
+}
+
+/* Return true if the vector misalignment factor is supported by the
+   target.  */ 
+bool
+rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
+					    const_tree type,
+					    int misalignment,
+					    bool is_packed)
+{
+  if (TARGET_VSX)
+    {
+      /* Return if movmisalign pattern is not supported for this mode.  */
+      if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
+        return false;
+
+      if (misalignment == -1)
+	{
+	  /* Misalignment factor is unknown at compile time but we know
+	     it's word aligned.  */
+	  if (rs6000_vector_alignment_reachable (type, is_packed))
+            {
+              int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
+
+              if (element_size == 64 || element_size == 32)
+               return true;
+            }
+
+	  return false;
+	}
+
+      /* VSX supports word-aligned vector.  */
+      if (misalignment % 4 == 0)
+	return true;
+    }
+  return false;
+}
+
+/* Implement targetm.vectorize.builtin_vec_perm.  */
+tree
+rs6000_builtin_vec_perm (tree type, tree *mask_element_type)
+{
+  tree inner_type = TREE_TYPE (type);
+  bool uns_p = TYPE_UNSIGNED (inner_type);
+  tree d;
+
+  *mask_element_type = unsigned_char_type_node;
+
+  switch (TYPE_MODE (type))
+    {
+    case V16QImode:
+      d = (uns_p
+	   ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_16QI_UNS]
+	   : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_16QI]);
+      break;
+
+    case V8HImode:
+      d = (uns_p
+	   ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_8HI_UNS]
+	   : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_8HI]);
+      break;
+
+    case V4SImode:
+      d = (uns_p
+	   ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SI_UNS]
+	   : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SI]);
+      break;
+
+    case V4SFmode:
+      d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SF];
+      break;
+
+    case V2DFmode:
+      if (!TARGET_ALLOW_DF_PERMUTE)
+	return NULL_TREE;
+
+      d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DF];
+      break;
+
+    case V2DImode:
+      if (!TARGET_ALLOW_DF_PERMUTE)
+	return NULL_TREE;
+
+      d = (uns_p
+	   ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DI_UNS]
+	   : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_2DI]);
+      break;
+
+    default:
+      return NULL_TREE;
+    }
+
+  gcc_assert (d);
+  return d;
+}
+
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                   tree vectype, int misalign)
+{
+  unsigned elements;
+
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+      case scalar_load:
+      case scalar_store:
+      case vector_stmt:
+      case vector_load:
+      case vector_store:
+      case vec_to_scalar:
+      case scalar_to_vec:
+      case cond_branch_not_taken:
+        return 1;
+
+      case cond_branch_taken:
+        return 3;
+
+      case vec_perm:
+	if (TARGET_VSX)
+	  return 4;
+	else
+	  return 1;
+
+      case vec_promote_demote:
+	if (TARGET_VSX)
+	  return 5;
+	else
+	  return 1;
+
+      case unaligned_load:
+        if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+          {
+            elements = TYPE_VECTOR_SUBPARTS (vectype);
+            if (elements == 2)
+              /* Double word aligned.  */
+              return 2;
+
+            if (elements == 4)
+              {
+                switch (misalign)
+                  {
+                    case 8:
+                      /* Double word aligned.  */
+                      return 2;
+
+                    case -1:
+                      /* Unknown misalignment.  */
+                    case 4:
+                    case 12:
+                      /* Word aligned.  */
+                      return 22;
+
+                    default:
+                      gcc_unreachable ();
+                  }
+              }
+          }
+
+        if (TARGET_ALTIVEC)
+          /* Misaligned loads are not supported.  */
+          gcc_unreachable ();
+
+        return 2;
+
+      case unaligned_store:
+        if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+          {
+            elements = TYPE_VECTOR_SUBPARTS (vectype);
+            if (elements == 2)
+              /* Double word aligned.  */
+              return 2;
+
+            if (elements == 4)
+              {
+                switch (misalign)
+                  {
+                    case 8:
+                      /* Double word aligned.  */
+                      return 2;
+
+                    case -1:
+                      /* Unknown misalignment.  */
+                    case 4:
+                    case 12:
+                      /* Word aligned.  */
+                      return 23;
+
+                    default:
+                      gcc_unreachable ();
+                  }
+              }
+          }
+
+        if (TARGET_ALTIVEC)
+          /* Misaligned stores are not supported.  */
+          gcc_unreachable ();
+
+        return 2;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Implement targetm.vectorize.preferred_simd_mode.  */
+
+static enum machine_mode
+rs6000_preferred_simd_mode (enum machine_mode mode)
+{
+  if (TARGET_VSX)
+    switch (mode)
+      {
+      case DFmode:
+	return V2DFmode;
+      default:;
+      }
+  if (TARGET_ALTIVEC || TARGET_VSX)
+    switch (mode)
+      {
+      case SFmode:
+	return V4SFmode;
+      case DImode:
+	return V2DImode;
+      case SImode:
+	return V4SImode;
+      case HImode:
+	return V8HImode;
+      case QImode:
+	return V16QImode;
+      default:;
+      }
+  if (TARGET_SPE)
+    switch (mode)
+      {
+      case SFmode:
+	return V2SFmode;
+      case SImode:
+	return V2SImode;
+      default:;
+      }
+  if (TARGET_PAIRED_FLOAT
+      && mode == SFmode)
+    return V2SFmode;
+  return word_mode;
+}
+
+/* Handle generic options of the form -mfoo=yes/no.
+   NAME is the option name.
+   VALUE is the option value.
+   FLAG is the pointer to the flag where to store a 1 or 0, depending on
+   whether the option value is 'yes' or 'no' respectively.  */
+static void
+rs6000_parse_yes_no_option (const char *name, const char *value, int *flag)
+{
+  if (value == 0)
+    return;
+  else if (!strcmp (value, "yes"))
+    *flag = 1;
+  else if (!strcmp (value, "no"))
+    *flag = 0;
+  else
+    error ("unknown -m%s= option specified: '%s'", name, value);
+}
+
+/* Implement TARGET_OPTION_INIT_STRUCT.  */
+
+static void
+rs6000_option_init_struct (struct gcc_options *opts)
+{
+  if (DEFAULT_ABI == ABI_DARWIN)
+    /* The Darwin libraries never set errno, so we might as well
+       avoid calling them when that's the only reason we would.  */
+    opts->x_flag_errno_math = 0;
+
+  /* Enable section anchors by default.  */
+  if (!TARGET_MACHO)
+    opts->x_flag_section_anchors = 1;
+}
+
+/* Implement TARGET_OPTION_DEFAULT_PARAMS.  */
+
+static void
+rs6000_option_default_params (void)
+{
+  /* Double growth factor to counter reduced min jump length.  */
+  set_default_param_value (PARAM_MAX_GROW_COPY_BB_INSNS, 16);
+}
+
+static enum fpu_type_t
+rs6000_parse_fpu_option (const char *option)
+{
+  if (!strcmp("none", option)) return FPU_NONE;
+  if (!strcmp("sp_lite", option)) return FPU_SF_LITE;
+  if (!strcmp("dp_lite", option)) return FPU_DF_LITE;
+  if (!strcmp("sp_full", option)) return FPU_SF_FULL;
+  if (!strcmp("dp_full", option)) return FPU_DF_FULL;
+  error("unknown value %s for -mfpu", option);
+  return FPU_NONE;
+}
+
+
+/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
+   library with vectorized intrinsics.  */
+
+static tree
+rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
+{
+  char name[32];
+  const char *suffix = NULL;
+  tree fntype, new_fndecl, bdecl = NULL_TREE;
+  int n_args = 1;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* Libmass is suitable for unsafe math only as it does not correctly support
+     parts of IEEE with the required precision such as denormals.  Only support
+     it if we have VSX to use the simd d2 or f4 functions.
+     XXX: Add variable length support.  */
+  if (!flag_unsafe_math_optimizations || !TARGET_VSX)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+	case BUILT_IN_ATAN2:
+	case BUILT_IN_HYPOT:
+	case BUILT_IN_POW:
+	  n_args = 2;
+	  /* fall through */
+
+	case BUILT_IN_ACOS:
+	case BUILT_IN_ACOSH:
+	case BUILT_IN_ASIN:
+	case BUILT_IN_ASINH:
+	case BUILT_IN_ATAN:
+	case BUILT_IN_ATANH:
+	case BUILT_IN_CBRT:
+	case BUILT_IN_COS:
+	case BUILT_IN_COSH:
+	case BUILT_IN_ERF:
+	case BUILT_IN_ERFC:
+	case BUILT_IN_EXP2:
+	case BUILT_IN_EXP:
+	case BUILT_IN_EXPM1:
+	case BUILT_IN_LGAMMA:
+	case BUILT_IN_LOG10:
+	case BUILT_IN_LOG1P:
+	case BUILT_IN_LOG2:
+	case BUILT_IN_LOG:
+	case BUILT_IN_SIN:
+	case BUILT_IN_SINH:
+	case BUILT_IN_SQRT:
+	case BUILT_IN_TAN:
+	case BUILT_IN_TANH:
+	  bdecl = implicit_built_in_decls[fn];
+	  suffix = "d2";				/* pow -> powd2 */
+	  if (el_mode != DFmode
+	      || n != 2)
+	    return NULL_TREE;
+	  break;
+
+	case BUILT_IN_ATAN2F:
+	case BUILT_IN_HYPOTF:
+	case BUILT_IN_POWF:
+	  n_args = 2;
+	  /* fall through */
+
+	case BUILT_IN_ACOSF:
+	case BUILT_IN_ACOSHF:
+	case BUILT_IN_ASINF:
+	case BUILT_IN_ASINHF:
+	case BUILT_IN_ATANF:
+	case BUILT_IN_ATANHF:
+	case BUILT_IN_CBRTF:
+	case BUILT_IN_COSF:
+	case BUILT_IN_COSHF:
+	case BUILT_IN_ERFF:
+	case BUILT_IN_ERFCF:
+	case BUILT_IN_EXP2F:
+	case BUILT_IN_EXPF:
+	case BUILT_IN_EXPM1F:
+	case BUILT_IN_LGAMMAF:
+	case BUILT_IN_LOG10F:
+	case BUILT_IN_LOG1PF:
+	case BUILT_IN_LOG2F:
+	case BUILT_IN_LOGF:
+	case BUILT_IN_SINF:
+	case BUILT_IN_SINHF:
+	case BUILT_IN_SQRTF:
+	case BUILT_IN_TANF:
+	case BUILT_IN_TANHF:
+	  bdecl = implicit_built_in_decls[fn];
+	  suffix = "4";					/* powf -> powf4 */
+	  if (el_mode != SFmode
+	      || n != 4)
+	    return NULL_TREE;
+	  break;
+
+	default:
+	  return NULL_TREE;
+	}
+    }
+  else
+    return NULL_TREE;
+
+  gcc_assert (suffix != NULL);
+  bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
+  strcpy (name, bname + sizeof ("__builtin_") - 1);
+  strcat (name, suffix);
+
+  if (n_args == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else if (n_args == 2)
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+  else
+    gcc_unreachable ();
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
+				    tree type_in)
+{
+  enum machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || !TARGET_VECTORIZE_BUILTINS)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+	case BUILT_IN_COPYSIGN:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
+	  break;
+	case BUILT_IN_COPYSIGNF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
+	  break;
+	case BUILT_IN_SQRT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
+	  break;
+	case BUILT_IN_SQRTF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
+	  break;
+	case BUILT_IN_CEIL:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
+	  break;
+	case BUILT_IN_CEILF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
+	  break;
+	case BUILT_IN_FLOOR:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
+	  break;
+	case BUILT_IN_FLOORF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
+	  break;
+	case BUILT_IN_FMA:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
+	  break;
+	case BUILT_IN_FMAF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
+	  else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
+	  break;
+	case BUILT_IN_TRUNC:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
+	  break;
+	case BUILT_IN_TRUNCF:
+	  if (out_mode != SFmode || out_n != 4
+	      || in_mode != SFmode || in_n != 4)
+	    break;
+	  if (VECTOR_UNIT_VSX_P (V4SFmode))
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
+	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
+	  break;
+	case BUILT_IN_NEARBYINT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && flag_unsafe_math_optimizations
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
+	  break;
+	case BUILT_IN_NEARBYINTF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && flag_unsafe_math_optimizations
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
+	  break;
+	case BUILT_IN_RINT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && !flag_trapping_math
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
+	  break;
+	case BUILT_IN_RINTF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && !flag_trapping_math
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
+	  break;
+	default:
+	  break;
+	}
+    }
+
+  else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+    {
+      enum rs6000_builtins fn
+	= (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+	{
+	case RS6000_BUILTIN_RSQRTF:
+	  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
+	  break;
+	case RS6000_BUILTIN_RSQRT:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_VEC_RSQRT_V2DF];
+	  break;
+	case RS6000_BUILTIN_RECIPF:
+	  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
+	  break;
+	case RS6000_BUILTIN_RECIP:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
+	  break;
+	default:
+	  break;
+	}
+    }
+
+  /* Generate calls to libmass if appropriate.  */
+  if (rs6000_veclib_handler)
+    return rs6000_veclib_handler (fndecl, type_out, type_in);
+
+  return NULL_TREE;
+}
+
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+rs6000_handle_option (size_t code, const char *arg, int value)
+{
+  enum fpu_type_t fpu_type = FPU_NONE;
+  int isel;
+  char *p, *q;
+
+  switch (code)
+    {
+    case OPT_mno_power:
+      target_flags &= ~(MASK_POWER | MASK_POWER2
+			| MASK_MULTIPLE | MASK_STRING);
+      target_flags_explicit |= (MASK_POWER | MASK_POWER2
+				| MASK_MULTIPLE | MASK_STRING);
+      break;
+    case OPT_mno_powerpc:
+      target_flags &= ~(MASK_POWERPC | MASK_PPC_GPOPT
+			| MASK_PPC_GFXOPT | MASK_POWERPC64);
+      target_flags_explicit |= (MASK_POWERPC | MASK_PPC_GPOPT
+				| MASK_PPC_GFXOPT | MASK_POWERPC64);
+      break;
+    case OPT_mfull_toc:
+      target_flags &= ~MASK_MINIMAL_TOC;
+      TARGET_NO_FP_IN_TOC = 0;
+      TARGET_NO_SUM_IN_TOC = 0;
+      target_flags_explicit |= MASK_MINIMAL_TOC;
+#ifdef TARGET_USES_SYSV4_OPT
+      /* Note, V.4 no longer uses a normal TOC, so make -mfull-toc, be
+	 just the same as -mminimal-toc.  */
+      target_flags |= MASK_MINIMAL_TOC;
+      target_flags_explicit |= MASK_MINIMAL_TOC;
+#endif
+      break;
+
+#ifdef TARGET_USES_SYSV4_OPT
+    case OPT_mtoc:
+      /* Make -mtoc behave like -mminimal-toc.  */
+      target_flags |= MASK_MINIMAL_TOC;
+      target_flags_explicit |= MASK_MINIMAL_TOC;
+      break;
+#endif
+
+#if defined (HAVE_LD_LARGE_TOC) && defined (TARGET_USES_LINUX64_OPT)
+    case OPT_mcmodel_:
+      if (strcmp (arg, "small") == 0)
+	rs6000_current_cmodel = CMODEL_SMALL;
+      else if (strcmp (arg, "medium") == 0)
+	rs6000_current_cmodel = CMODEL_MEDIUM;
+      else if (strcmp (arg, "large") == 0)
+	rs6000_current_cmodel = CMODEL_LARGE;
+      else
+	{
+	  error ("invalid option for -mcmodel: '%s'", arg);
+	  return false;
+	}
+      rs6000_explicit_options.cmodel = true;
+#endif
+
+#ifdef TARGET_USES_AIX64_OPT
+    case OPT_maix64:
+#else
+    case OPT_m64:
+#endif
+      target_flags |= MASK_POWERPC64 | MASK_POWERPC;
+      target_flags |= ~target_flags_explicit & MASK_PPC_GFXOPT;
+      target_flags_explicit |= MASK_POWERPC64 | MASK_POWERPC;
+      break;
+
+#ifdef TARGET_USES_AIX64_OPT
+    case OPT_maix32:
+#else
+    case OPT_m32:
+#endif
+      target_flags &= ~MASK_POWERPC64;
+      target_flags_explicit |= MASK_POWERPC64;
+      break;
+
+    case OPT_minsert_sched_nops_:
+      rs6000_sched_insert_nops_str = arg;
+      break;
+
+    case OPT_mminimal_toc:
+      if (value == 1)
+	{
+	  TARGET_NO_FP_IN_TOC = 0;
+	  TARGET_NO_SUM_IN_TOC = 0;
+	}
+      break;
+
+    case OPT_mpower:
+      if (value == 1)
+	{
+	  target_flags |= (MASK_MULTIPLE | MASK_STRING);
+	  target_flags_explicit |= (MASK_MULTIPLE | MASK_STRING);
+	}
+      break;
+
+    case OPT_mpower2:
+      if (value == 1)
+	{
+	  target_flags |= (MASK_POWER | MASK_MULTIPLE | MASK_STRING);
+	  target_flags_explicit |= (MASK_POWER | MASK_MULTIPLE | MASK_STRING);
+	}
+      break;
+
+    case OPT_mpowerpc_gpopt:
+    case OPT_mpowerpc_gfxopt:
+      if (value == 1)
+	{
+	  target_flags |= MASK_POWERPC;
+	  target_flags_explicit |= MASK_POWERPC;
+	}
+      break;
+
+    case OPT_maix_struct_return:
+    case OPT_msvr4_struct_return:
+      rs6000_explicit_options.aix_struct_ret = true;
+      break;
+
+    case OPT_mvrsave:
+      rs6000_explicit_options.vrsave = true;
+      TARGET_ALTIVEC_VRSAVE = value;
+      break;
+
+    case OPT_mvrsave_:
+      rs6000_explicit_options.vrsave = true;
+      rs6000_parse_yes_no_option ("vrsave", arg, &(TARGET_ALTIVEC_VRSAVE));
+      break;
+
+    case OPT_misel_:
+      target_flags_explicit |= MASK_ISEL;
+      isel = 0;
+      rs6000_parse_yes_no_option ("isel", arg, &isel);
+      if (isel)
+	target_flags |= MASK_ISEL;
+      else
+	target_flags &= ~MASK_ISEL;
+      break;
+
+    case OPT_mspe:
+      rs6000_explicit_options.spe = true;
+      rs6000_spe = value;
+      break;
+
+    case OPT_mspe_:
+      rs6000_explicit_options.spe = true;
+      rs6000_parse_yes_no_option ("spe", arg, &(rs6000_spe));
+      break;
+
+    case OPT_mdebug_:
+      p = ASTRDUP (arg);
+      rs6000_debug = 0;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  unsigned mask = 0;
+	  bool invert;
+
+	  p = NULL;
+	  if (*q == '!')
+	    {
+	      invert = true;
+	      q++;
+	    }
+	  else
+	    invert = false;
+
+	  if (! strcmp (q, "all"))
+	    mask = MASK_DEBUG_ALL;
+	  else if (! strcmp (q, "stack"))
+	    mask = MASK_DEBUG_STACK;
+	  else if (! strcmp (q, "arg"))
+	    mask = MASK_DEBUG_ARG;
+	  else if (! strcmp (q, "reg"))
+	    mask = MASK_DEBUG_REG;
+	  else if (! strcmp (q, "addr"))
+	    mask = MASK_DEBUG_ADDR;
+	  else if (! strcmp (q, "cost"))
+	    mask = MASK_DEBUG_COST;
+	  else if (! strcmp (q, "target"))
+	    mask = MASK_DEBUG_TARGET;
+	  else
+	    error ("unknown -mdebug-%s switch", q);
+
+	  if (invert)
+	    rs6000_debug &= ~mask;
+	  else	
+	    rs6000_debug |= mask;
+	}
+      break;
+
+#ifdef TARGET_USES_SYSV4_OPT
+    case OPT_mcall_:
+      rs6000_abi_name = arg;
+      break;
+
+    case OPT_msdata_:
+      rs6000_sdata_name = arg;
+      break;
+
+    case OPT_mtls_size_:
+      if (strcmp (arg, "16") == 0)
+	rs6000_tls_size = 16;
+      else if (strcmp (arg, "32") == 0)
+	rs6000_tls_size = 32;
+      else if (strcmp (arg, "64") == 0)
+	rs6000_tls_size = 64;
+      else
+	error ("bad value %qs for -mtls-size switch", arg);
+      break;
+
+    case OPT_mrelocatable:
+      if (value == 1)
+	{
+	  target_flags |= MASK_MINIMAL_TOC;
+	  target_flags_explicit |= MASK_MINIMAL_TOC;
+	  TARGET_NO_FP_IN_TOC = 1;
+	}
+      break;
+
+    case OPT_mrelocatable_lib:
+      if (value == 1)
+	{
+	  target_flags |= MASK_RELOCATABLE | MASK_MINIMAL_TOC;
+	  target_flags_explicit |= MASK_RELOCATABLE | MASK_MINIMAL_TOC;
+	  TARGET_NO_FP_IN_TOC = 1;
+	}
+      else
+	{
+	  target_flags &= ~MASK_RELOCATABLE;
+	  target_flags_explicit |= MASK_RELOCATABLE;
+	}
+      break;
+#endif
+
+    case OPT_mabi_:
+      if (!strcmp (arg, "altivec"))
+	{
+	  rs6000_explicit_options.altivec_abi = true;
+	  rs6000_altivec_abi = 1;
+
+	  /* Enabling the AltiVec ABI turns off the SPE ABI.  */
+	  rs6000_spe_abi = 0;
+	}
+      else if (! strcmp (arg, "no-altivec"))
+	{
+	  rs6000_explicit_options.altivec_abi = true;
+	  rs6000_altivec_abi = 0;
+	}
+      else if (! strcmp (arg, "spe"))
+	{
+	  rs6000_explicit_options.spe_abi = true;
+	  rs6000_spe_abi = 1;
+	  rs6000_altivec_abi = 0;
+	  if (!TARGET_SPE_ABI)
+	    error ("not configured for ABI: '%s'", arg);
+	}
+      else if (! strcmp (arg, "no-spe"))
+	{
+	  rs6000_explicit_options.spe_abi = true;
+	  rs6000_spe_abi = 0;
+	}
+
+      /* These are here for testing during development only, do not
+	 document in the manual please.  */
+      else if (! strcmp (arg, "d64"))
+	{
+	  rs6000_darwin64_abi = 1;
+	  warning (0, "using darwin64 ABI");
+	}
+      else if (! strcmp (arg, "d32"))
+	{
+	  rs6000_darwin64_abi = 0;
+	  warning (0, "using old darwin ABI");
+	}
+
+      else if (! strcmp (arg, "ibmlongdouble"))
+	{
+	  rs6000_explicit_options.ieee = true;
+	  rs6000_ieeequad = 0;
+	  warning (0, "using IBM extended precision long double");
+	}
+      else if (! strcmp (arg, "ieeelongdouble"))
+	{
+	  rs6000_explicit_options.ieee = true;
+	  rs6000_ieeequad = 1;
+	  warning (0, "using IEEE extended precision long double");
+	}
+
+      else
+	{
+	  error ("unknown ABI specified: '%s'", arg);
+	  return false;
+	}
+      break;
+
+    case OPT_mcpu_:
+      rs6000_select[1].string = arg;
+      rs6000_cpu_index = rs6000_cpu_name_lookup (arg);
+      if (rs6000_cpu_index < 0)
+	error ("bad value (%s) for -mcpu", arg);
+      break;
+
+    case OPT_mtune_:
+      rs6000_select[2].string = arg;
+      rs6000_tune_index = rs6000_cpu_name_lookup (arg);
+      if (rs6000_tune_index < 0)
+	error ("bad value (%s) for -mtune", arg);
+      break;
+
+    case OPT_mtraceback_:
+      if (! strncmp (arg, "full", 4))
+	rs6000_traceback = traceback_full;
+      else if (! strncmp (arg, "part", 4))
+	rs6000_traceback = traceback_part;
+      else if (! strncmp (arg, "no", 2))
+	rs6000_traceback = traceback_none;
+      else
+	error ("unknown -mtraceback arg %qs; expecting %<full%>, "
+	       "%<partial%> or %<none%>", arg);
+      break;
+
+    case OPT_mfloat_gprs_:
+      rs6000_explicit_options.float_gprs = true;
+      if (! strcmp (arg, "yes") || ! strcmp (arg, "single"))
+	rs6000_float_gprs = 1;
+      else if (! strcmp (arg, "double"))
+	rs6000_float_gprs = 2;
+      else if (! strcmp (arg, "no"))
+	rs6000_float_gprs = 0;
+      else
+	{
+	  error ("invalid option for -mfloat-gprs: '%s'", arg);
+	  return false;
+	}
+      break;
+
+    case OPT_mlong_double_:
+      rs6000_explicit_options.long_double = true;
+      rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
+      if (value != 64 && value != 128)
+	{
+	  error ("unknown switch -mlong-double-%s", arg);
+	  rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
+	  return false;
+	}
+      else
+	rs6000_long_double_type_size = value;
+      break;
+
+    case OPT_msched_costly_dep_:
+      rs6000_sched_costly_dep_str = arg;
+      break;
+
+    case OPT_malign_:
+      rs6000_explicit_options.alignment = true;
+      if (! strcmp (arg, "power"))
+	{
+	  /* On 64-bit Darwin, power alignment is ABI-incompatible with
+	     some C library functions, so warn about it. The flag may be
+	     useful for performance studies from time to time though, so
+	     don't disable it entirely.  */
+	  if (DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT)
+	    warning (0, "-malign-power is not supported for 64-bit Darwin;"
+		     " it is incompatible with the installed C and C++ libraries");
+	  rs6000_alignment_flags = MASK_ALIGN_POWER;
+	}
+      else if (! strcmp (arg, "natural"))
+	rs6000_alignment_flags = MASK_ALIGN_NATURAL;
+      else
+	{
+	  error ("unknown -malign-XXXXX option specified: '%s'", arg);
+	  return false;
+	}
+      break;
+
+    case OPT_msingle_float:
+      if (!TARGET_SINGLE_FPU) 
+	warning (0, "-msingle-float option equivalent to -mhard-float");
+      /* -msingle-float implies -mno-double-float and TARGET_HARD_FLOAT. */
+      rs6000_double_float = 0;
+      target_flags &= ~MASK_SOFT_FLOAT;
+      target_flags_explicit |= MASK_SOFT_FLOAT;
+      break;
+
+    case OPT_mdouble_float:
+      /* -mdouble-float implies -msingle-float and TARGET_HARD_FLOAT. */
+      rs6000_single_float = 1;
+      target_flags &= ~MASK_SOFT_FLOAT;
+      target_flags_explicit |= MASK_SOFT_FLOAT;
+      break;
+
+    case OPT_msimple_fpu:
+      if (!TARGET_SINGLE_FPU) 
+	warning (0, "-msimple-fpu option ignored");
+      break;
+
+    case OPT_mhard_float:
+      /* -mhard_float implies -msingle-float and -mdouble-float. */
+      rs6000_single_float = rs6000_double_float = 1;
+      break;
+
+    case OPT_msoft_float:
+      /* -msoft_float implies -mnosingle-float and -mnodouble-float. */
+      rs6000_single_float = rs6000_double_float = 0;
+      break;
+
+    case OPT_mfpu_:
+      fpu_type = rs6000_parse_fpu_option(arg);
+      if (fpu_type != FPU_NONE) 
+      /* If -mfpu is not none, then turn off SOFT_FLOAT, turn on HARD_FLOAT. */
+      {
+        target_flags &= ~MASK_SOFT_FLOAT;
+        target_flags_explicit |= MASK_SOFT_FLOAT;
+        rs6000_xilinx_fpu = 1;
+        if (fpu_type == FPU_SF_LITE || fpu_type == FPU_SF_FULL) 
+        rs6000_single_float = 1;
+        if (fpu_type == FPU_DF_LITE || fpu_type == FPU_DF_FULL) 
+          rs6000_single_float = rs6000_double_float = 1;
+        if (fpu_type == FPU_SF_LITE || fpu_type == FPU_DF_LITE) 
+          rs6000_simple_fpu = 1;
+      }
+      else
+      {
+        /* -mfpu=none is equivalent to -msoft-float */
+        target_flags |= MASK_SOFT_FLOAT;
+        target_flags_explicit |= MASK_SOFT_FLOAT;
+        rs6000_single_float = rs6000_double_float = 0;
+      }
+      break;
+
+    case OPT_mrecip:
+      rs6000_recip_name = (value) ? "default" : "none";
+      break;
+
+    case OPT_mrecip_:
+      rs6000_recip_name = arg;
+      break;
+    }
+  return true;
+}
+
+/* Do anything needed at the start of the asm file.  */
+
+static void
+rs6000_file_start (void)
+{
+  size_t i;
+  char buffer[80];
+  const char *start = buffer;
+  struct rs6000_cpu_select *ptr;
+  const char *default_cpu = TARGET_CPU_DEFAULT;
+  FILE *file = asm_out_file;
+
+  default_file_start ();
+
+#ifdef TARGET_BI_ARCH
+  if ((TARGET_DEFAULT ^ target_flags) & MASK_64BIT)
+    default_cpu = 0;
+#endif
+
+  if (flag_verbose_asm)
+    {
+      sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
+      rs6000_select[0].string = default_cpu;
+
+      for (i = 0; i < ARRAY_SIZE (rs6000_select); i++)
+	{
+	  ptr = &rs6000_select[i];
+	  if (ptr->string != (char *)0 && ptr->string[0] != '\0')
+	    {
+	      fprintf (file, "%s %s%s", start, ptr->name, ptr->string);
+	      start = "";
+	    }
+	}
+
+      if (PPC405_ERRATUM77)
+	{
+	  fprintf (file, "%s PPC405CR_ERRATUM77", start);
+	  start = "";
+	}
+
+#ifdef USING_ELFOS_H
+      switch (rs6000_sdata)
+	{
+	case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
+	case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
+	case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
+	case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
+	}
+
+      if (rs6000_sdata && g_switch_value)
+	{
+	  fprintf (file, "%s -G %d", start,
+		   g_switch_value);
+	  start = "";
+	}
+#endif
+
+      if (*start == '\0')
+	putc ('\n', file);
+    }
+
+  if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
+    {
+      switch_to_section (toc_section);
+      switch_to_section (text_section);
+    }
+}
+
+
+/* Return nonzero if this function is known to have a null epilogue.  */
+
+int
+direct_return (void)
+{
+  if (reload_completed)
+    {
+      rs6000_stack_t *info = rs6000_stack_info ();
+
+      if (info->first_gp_reg_save == 32
+	  && info->first_fp_reg_save == 64
+	  && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
+	  && ! info->lr_save_p
+	  && ! info->cr_save_p
+	  && info->vrsave_mask == 0
+	  && ! info->push_p)
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return the number of instructions it takes to form a constant in an
+   integer register.  */
+
+int
+num_insns_constant_wide (HOST_WIDE_INT value)
+{
+  /* signed constant loadable with {cal|addi} */
+  if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
+    return 1;
+
+  /* constant loadable with {cau|addis} */
+  else if ((value & 0xffff) == 0
+	   && (value >> 31 == -1 || value >> 31 == 0))
+    return 1;
+
+#if HOST_BITS_PER_WIDE_INT == 64
+  else if (TARGET_POWERPC64)
+    {
+      HOST_WIDE_INT low  = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      HOST_WIDE_INT high = value >> 31;
+
+      if (high == 0 || high == -1)
+	return 2;
+
+      high >>= 1;
+
+      if (low == 0)
+	return num_insns_constant_wide (high) + 1;
+      else if (high == 0)
+	return num_insns_constant_wide (low) + 1;
+      else
+	return (num_insns_constant_wide (high)
+		+ num_insns_constant_wide (low) + 1);
+    }
+#endif
+
+  else
+    return 2;
+}
+
+int
+num_insns_constant (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT low, high;
+
+  switch (GET_CODE (op))
+    {
+    case CONST_INT:
+#if HOST_BITS_PER_WIDE_INT == 64
+      if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
+	  && mask64_operand (op, mode))
+	return 2;
+      else
+#endif
+	return num_insns_constant_wide (INTVAL (op));
+
+      case CONST_DOUBLE:
+	if (mode == SFmode || mode == SDmode)
+	  {
+	    long l;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	    if (DECIMAL_FLOAT_MODE_P (mode))
+	      REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
+	    else
+	      REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+	    return num_insns_constant_wide ((HOST_WIDE_INT) l);
+	  }
+
+	if (mode == VOIDmode || mode == DImode)
+	  {
+	    high = CONST_DOUBLE_HIGH (op);
+	    low  = CONST_DOUBLE_LOW (op);
+	  }
+	else
+	  {
+	    long l[2];
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	    if (DECIMAL_FLOAT_MODE_P (mode))
+	      REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
+	    else
+	      REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+	    high = l[WORDS_BIG_ENDIAN == 0];
+	    low  = l[WORDS_BIG_ENDIAN != 0];
+	  }
+
+	if (TARGET_32BIT)
+	  return (num_insns_constant_wide (low)
+		  + num_insns_constant_wide (high));
+	else
+	  {
+	    if ((high == 0 && low >= 0)
+		|| (high == -1 && low < 0))
+	      return num_insns_constant_wide (low);
+
+	    else if (mask64_operand (op, mode))
+	      return 2;
+
+	    else if (low == 0)
+	      return num_insns_constant_wide (high) + 1;
+
+	    else
+	      return (num_insns_constant_wide (high)
+		      + num_insns_constant_wide (low) + 1);
+	  }
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Interpret element ELT of the CONST_VECTOR OP as an integer value.
+   If the mode of OP is MODE_VECTOR_INT, this simply returns the
+   corresponding element of the vector, but for V4SFmode and V2SFmode,
+   the corresponding "float" is interpreted as an SImode integer.  */
+
+HOST_WIDE_INT
+const_vector_elt_as_int (rtx op, unsigned int elt)
+{
+  rtx tmp;
+
+  /* We can't handle V2DImode and V2DFmode vector constants here yet.  */
+  gcc_assert (GET_MODE (op) != V2DImode
+	      && GET_MODE (op) != V2DFmode);
+
+  tmp = CONST_VECTOR_ELT (op, elt);
+  if (GET_MODE (op) == V4SFmode
+      || GET_MODE (op) == V2SFmode)
+    tmp = gen_lowpart (SImode, tmp);
+  return INTVAL (tmp);
+}
+
+/* Return true if OP can be synthesized with a particular vspltisb, vspltish
+   or vspltisw instruction.  OP is a CONST_VECTOR.  Which instruction is used
+   depends on STEP and COPIES, one of which will be 1.  If COPIES > 1,
+   all items are set to the same value and contain COPIES replicas of the
+   vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
+   operand and the others are set to the value of the operand's msb.  */
+
+static bool
+vspltis_constant (rtx op, unsigned step, unsigned copies)
+{
+  enum machine_mode mode = GET_MODE (op);
+  enum machine_mode inner = GET_MODE_INNER (mode);
+
+  unsigned i;
+  unsigned nunits;
+  unsigned bitsize;
+  unsigned mask;
+
+  HOST_WIDE_INT val;
+  HOST_WIDE_INT splat_val;
+  HOST_WIDE_INT msb_val;
+
+  if (mode == V2DImode || mode == V2DFmode)
+    return false;
+
+  nunits = GET_MODE_NUNITS (mode);
+  bitsize = GET_MODE_BITSIZE (inner);
+  mask = GET_MODE_MASK (inner);
+
+  val = const_vector_elt_as_int (op, nunits - 1);
+  splat_val = val;
+  msb_val = val > 0 ? 0 : -1;
+
+  /* Construct the value to be splatted, if possible.  If not, return 0.  */
+  for (i = 2; i <= copies; i *= 2)
+    {
+      HOST_WIDE_INT small_val;
+      bitsize /= 2;
+      small_val = splat_val >> bitsize;
+      mask >>= bitsize;
+      if (splat_val != ((small_val << bitsize) | (small_val & mask)))
+	return false;
+      splat_val = small_val;
+    }
+
+  /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw].  */
+  if (EASY_VECTOR_15 (splat_val))
+    ;
+
+  /* Also check if we can splat, and then add the result to itself.  Do so if
+     the value is positive, of if the splat instruction is using OP's mode;
+     for splat_val < 0, the splat and the add should use the same mode.  */
+  else if (EASY_VECTOR_15_ADD_SELF (splat_val)
+           && (splat_val >= 0 || (step == 1 && copies == 1)))
+    ;
+
+  /* Also check if are loading up the most significant bit which can be done by
+     loading up -1 and shifting the value left by -1.  */
+  else if (EASY_VECTOR_MSB (splat_val, inner))
+    ;
+
+  else
+    return false;
+
+  /* Check if VAL is present in every STEP-th element, and the
+     other elements are filled with its most significant bit.  */
+  for (i = 0; i < nunits - 1; ++i)
+    {
+      HOST_WIDE_INT desired_val;
+      if (((i + 1) & (step - 1)) == 0)
+	desired_val = val;
+      else
+	desired_val = msb_val;
+
+      if (desired_val != const_vector_elt_as_int (op, i))
+	return false;
+    }
+
+  return true;
+}
+
+
+/* Return true if OP is of the given MODE and can be synthesized
+   with a vspltisb, vspltish or vspltisw.  */
+
+bool
+easy_altivec_constant (rtx op, enum machine_mode mode)
+{
+  unsigned step, copies;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+  else if (mode != GET_MODE (op))
+    return false;
+
+  /* V2DI/V2DF was added with VSX.  Only allow 0 and all 1's as easy
+     constants.  */
+  if (mode == V2DFmode)
+    return zero_constant (op, mode);
+
+  if (mode == V2DImode)
+    {
+      /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
+	 easy.  */
+      if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
+	  || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
+	return false;
+
+      if (zero_constant (op, mode))
+	return true;
+
+      if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
+	  && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
+	return true;
+
+      return false;
+    }
+
+  /* Start with a vspltisw.  */
+  step = GET_MODE_NUNITS (mode) / 4;
+  copies = 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  /* Then try with a vspltish.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  /* And finally a vspltisb.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return true;
+
+  return false;
+}
+
+/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
+   result is OP.  Abort if it is not possible.  */
+
+rtx
+gen_easy_altivec_constant (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+  int nunits = GET_MODE_NUNITS (mode);
+  rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+  unsigned step = nunits / 4;
+  unsigned copies = 1;
+
+  /* Start with a vspltisw.  */
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last));
+
+  /* Then try with a vspltish.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last));
+
+  /* And finally a vspltisb.  */
+  if (step == 1)
+    copies <<= 1;
+  else
+    step >>= 1;
+
+  if (vspltis_constant (op, step, copies))
+    return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last));
+
+  gcc_unreachable ();
+}
+
+const char *
+output_vec_const_move (rtx *operands)
+{
+  int cst, cst2;
+  enum machine_mode mode;
+  rtx dest, vec;
+
+  dest = operands[0];
+  vec = operands[1];
+  mode = GET_MODE (dest);
+
+  if (TARGET_VSX)
+    {
+      if (zero_constant (vec, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      if (mode == V2DImode
+	  && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
+	  && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
+	return "vspltisw %0,-1";
+    }
+
+  if (TARGET_ALTIVEC)
+    {
+      rtx splat_vec;
+      if (zero_constant (vec, mode))
+	return "vxor %0,%0,%0";
+
+      splat_vec = gen_easy_altivec_constant (vec);
+      gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
+      operands[1] = XEXP (splat_vec, 0);
+      if (!EASY_VECTOR_15 (INTVAL (operands[1])))
+	return "#";
+
+      switch (GET_MODE (splat_vec))
+	{
+	case V4SImode:
+	  return "vspltisw %0,%1";
+
+	case V8HImode:
+	  return "vspltish %0,%1";
+
+	case V16QImode:
+	  return "vspltisb %0,%1";
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  gcc_assert (TARGET_SPE);
+
+  /* Vector constant 0 is handled as a splitter of V2SI, and in the
+     pattern of V1DI, V4HI, and V2SF.
+
+     FIXME: We should probably return # and add post reload
+     splitters for these, but this way is so easy ;-).  */
+  cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
+  cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
+  operands[1] = CONST_VECTOR_ELT (vec, 0);
+  operands[2] = CONST_VECTOR_ELT (vec, 1);
+  if (cst == cst2)
+    return "li %0,%1\n\tevmergelo %0,%0,%0";
+  else
+    return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
+}
+
+/* Initialize TARGET of vector PAIRED to VALS.  */
+
+void
+paired_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0;
+  rtx x, new_rtx, tmp, constant_op, op1, op2;
+  int i;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!CONSTANT_P (x))
+	++n_var;
+    }
+  if (n_var == 0)
+    {
+      /* Load from constant pool.  */
+      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+      return;
+    }
+
+  if (n_var == 2)
+    {
+      /* The vector is initialized only with non-constants.  */
+      new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
+				XVECEXP (vals, 0, 1));
+
+      emit_move_insn (target, new_rtx);
+      return;
+    }
+  
+  /* One field is non-constant and the other one is a constant.  Load the
+     constant from the constant pool and use ps_merge instruction to
+     construct the whole vector.  */
+  op1 = XVECEXP (vals, 0, 0);
+  op2 = XVECEXP (vals, 0, 1);
+
+  constant_op = (CONSTANT_P (op1)) ? op1 : op2;
+
+  tmp = gen_reg_rtx (GET_MODE (constant_op));
+  emit_move_insn (tmp, constant_op);
+
+  if (CONSTANT_P (op1))
+    new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
+  else
+    new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
+
+  emit_move_insn (target, new_rtx);
+}
+
+void
+paired_expand_vector_move (rtx operands[])
+{
+  rtx op0 = operands[0], op1 = operands[1];
+
+  emit_move_insn (op0, op1);
+}
+
+/* Emit vector compare for code RCODE.  DEST is destination, OP1 and
+   OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
+   operands for the relation operation COND.  This is a recursive
+   function.  */
+
+static void
+paired_emit_vector_compare (enum rtx_code rcode,
+                            rtx dest, rtx op0, rtx op1,
+                            rtx cc_op0, rtx cc_op1)
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  rtx tmp1, max, min;
+
+  gcc_assert (TARGET_PAIRED_FLOAT);
+  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+  switch (rcode)
+    {
+    case LT:
+    case LTU:
+      paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case GE:
+    case GEU:
+      emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
+      emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
+      return;
+    case LE:
+    case LEU:
+      paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
+      return;
+    case GT:
+      paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case EQ:
+      tmp1 = gen_reg_rtx (V2SFmode);
+      max = gen_reg_rtx (V2SFmode);
+      min = gen_reg_rtx (V2SFmode);
+      gen_reg_rtx (V2SFmode);
+      
+      emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
+      emit_insn (gen_selv2sf4
+                 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
+      emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
+      emit_insn (gen_selv2sf4
+                 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
+      emit_insn (gen_subv2sf3 (tmp1, min, max));
+      emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
+      return;
+    case NE:
+      paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNLE:
+      paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNLT:
+      paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNGE:
+      paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    case UNGT:
+      paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
+      return;
+    default:
+      gcc_unreachable ();
+    }
+
+  return;
+}
+
+/* Emit vector conditional expression.
+   DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+   CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
+
+int
+paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+			      rtx cond, rtx cc_op0, rtx cc_op1)
+{
+  enum rtx_code rcode = GET_CODE (cond);
+
+  if (!TARGET_PAIRED_FLOAT)
+    return 0;
+
+  paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
+
+  return 1;
+}
+
+/* Initialize vector TARGET to VALS.  */
+
+void
+rs6000_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0, one_var = -1;
+  bool all_same = true, all_const_zero = true;
+  rtx x, mem;
+  int i;
+
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!CONSTANT_P (x))
+	++n_var, one_var = i;
+      else if (x != CONST0_RTX (inner_mode))
+	all_const_zero = false;
+
+      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+	all_same = false;
+    }
+
+  if (n_var == 0)
+    {
+      rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+      bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+      if ((int_vector_p || TARGET_VSX) && all_const_zero)
+	{
+	  /* Zero register.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, target,
+				  gen_rtx_XOR (mode, target, target)));
+	  return;
+	}
+      else if (int_vector_p && easy_vector_constant (const_vec, mode))
+	{
+	  /* Splat immediate.  */
+	  emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
+	  return;
+	}
+      else
+	{
+	  /* Load from constant pool.  */
+	  emit_move_insn (target, const_vec);
+	  return;
+	}
+    }
+
+  /* Double word values on VSX can use xxpermdi or lxvdsx.  */
+  if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+    {
+      rtx op0 = XVECEXP (vals, 0, 0);
+      rtx op1 = XVECEXP (vals, 0, 1);
+      if (all_same)
+	{
+	  if (!MEM_P (op0) && !REG_P (op0))
+	    op0 = force_reg (inner_mode, op0);
+	  if (mode == V2DFmode)
+	    emit_insn (gen_vsx_splat_v2df (target, op0));
+	  else
+	    emit_insn (gen_vsx_splat_v2di (target, op0));
+	}
+      else
+	{
+	  op0 = force_reg (inner_mode, op0);
+	  op1 = force_reg (inner_mode, op1);
+	  if (mode == V2DFmode)
+	    emit_insn (gen_vsx_concat_v2df (target, op0, op1));
+	  else
+	    emit_insn (gen_vsx_concat_v2di (target, op0, op1));
+	}
+      return;
+    }
+
+  /* With single precision floating point on VSX, know that internally single
+     precision is actually represented as a double, and either make 2 V2DF
+     vectors, and convert these vectors to single precision, or do one
+     conversion, and splat the result to the other elements.  */
+  if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
+    {
+      if (all_same)
+	{
+	  rtx freg = gen_reg_rtx (V4SFmode);
+	  rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+
+	  emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+	  emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
+	}
+      else
+	{
+	  rtx dbl_even = gen_reg_rtx (V2DFmode);
+	  rtx dbl_odd  = gen_reg_rtx (V2DFmode);
+	  rtx flt_even = gen_reg_rtx (V4SFmode);
+	  rtx flt_odd  = gen_reg_rtx (V4SFmode);
+	  rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
+	  rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
+	  rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
+
+	  emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
+	  emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
+	  emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
+	  emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
+	  emit_insn (gen_vec_extract_evenv4sf (target, flt_even, flt_odd));
+	}
+      return;
+    }
+
+  /* Store value to stack temp.  Load vector element.  Splat.  However, splat
+     of 64-bit items is not supported on Altivec.  */
+  if (all_same && GET_MODE_SIZE (mode) <= 4)
+    {
+      mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode), 0);
+      emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
+		      XVECEXP (vals, 0, 0));
+      x = gen_rtx_UNSPEC (VOIDmode,
+			  gen_rtvec (1, const0_rtx), UNSPEC_LVE);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (2,
+					      gen_rtx_SET (VOIDmode,
+							   target, mem),
+					      x)));
+      x = gen_rtx_VEC_SELECT (inner_mode, target,
+			      gen_rtx_PARALLEL (VOIDmode,
+						gen_rtvec (1, const0_rtx)));
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_DUPLICATE (mode, x)));
+      return;
+    }
+
+  /* One field is non-constant.  Load constant then overwrite
+     varying field.  */
+  if (n_var == 1)
+    {
+      rtx copy = copy_rtx (vals);
+
+      /* Load constant part of vector, substitute neighboring value for
+	 varying element.  */
+      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+      rs6000_expand_vector_init (target, copy);
+
+      /* Insert variable.  */
+      rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
+      return;
+    }
+
+  /* Construct the vector in memory one field at a time
+     and load the whole vector.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner_mode,
+				    i * GET_MODE_SIZE (inner_mode)),
+		    XVECEXP (vals, 0, i));
+  emit_move_insn (target, mem);
+}
+
+/* Set field ELT of TARGET to VAL.  */
+
+void
+rs6000_expand_vector_set (rtx target, rtx val, int elt)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  rtx reg = gen_reg_rtx (mode);
+  rtx mask, mem, x;
+  int width = GET_MODE_SIZE (inner_mode);
+  int i;
+
+  if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+    {
+      rtx (*set_func) (rtx, rtx, rtx, rtx)
+	= ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
+      emit_insn (set_func (target, target, val, GEN_INT (elt)));
+      return;
+    }
+
+  /* Load single variable value.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode), 0);
+  emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
+  x = gen_rtx_UNSPEC (VOIDmode,
+		      gen_rtvec (1, const0_rtx), UNSPEC_LVE);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode,
+			       gen_rtvec (2,
+					  gen_rtx_SET (VOIDmode,
+						       reg, mem),
+					  x)));
+
+  /* Linear sequence.  */
+  mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
+  for (i = 0; i < 16; ++i)
+    XVECEXP (mask, 0, i) = GEN_INT (i);
+
+  /* Set permute mask to insert element into target.  */
+  for (i = 0; i < width; ++i)
+    XVECEXP (mask, 0, elt*width + i)
+      = GEN_INT (i + 0x10);
+  x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
+  x = gen_rtx_UNSPEC (mode,
+		      gen_rtvec (3, target, reg,
+				 force_reg (V16QImode, x)),
+		      UNSPEC_VPERM);
+  emit_insn (gen_rtx_SET (VOIDmode, target, x));
+}
+
+/* Extract field ELT from VEC into TARGET.  */
+
+void
+rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
+{
+  enum machine_mode mode = GET_MODE (vec);
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  rtx mem;
+
+  if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+    {
+      rtx (*extract_func) (rtx, rtx, rtx)
+	= ((mode == V2DFmode) ? gen_vsx_extract_v2df : gen_vsx_extract_v2di);
+      emit_insn (extract_func (target, vec, GEN_INT (elt)));
+      return;
+    }
+
+  /* Allocate mode-sized buffer.  */
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+
+  emit_move_insn (mem, vec);
+
+  /* Add offset to field within buffer matching vector element.  */
+  mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
+
+  emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
+}
+
+/* Generates shifts and masks for a pair of rldicl or rldicr insns to
+   implement ANDing by the mask IN.  */
+void
+build_mask64_2_operands (rtx in, rtx *out)
+{
+#if HOST_BITS_PER_WIDE_INT >= 64
+  unsigned HOST_WIDE_INT c, lsb, m1, m2;
+  int shift;
+
+  gcc_assert (GET_CODE (in) == CONST_INT);
+
+  c = INTVAL (in);
+  if (c & 1)
+    {
+      /* Assume c initially something like 0x00fff000000fffff.  The idea
+	 is to rotate the word so that the middle ^^^^^^ group of zeros
+	 is at the MS end and can be cleared with an rldicl mask.  We then
+	 rotate back and clear off the MS    ^^ group of zeros with a
+	 second rldicl.  */
+      c = ~c;			/*   c == 0xff000ffffff00000 */
+      lsb = c & -c;		/* lsb == 0x0000000000100000 */
+      m1 = -lsb;		/*  m1 == 0xfffffffffff00000 */
+      c = ~c;			/*   c == 0x00fff000000fffff */
+      c &= -lsb;		/*   c == 0x00fff00000000000 */
+      lsb = c & -c;		/* lsb == 0x0000100000000000 */
+      c = ~c;			/*   c == 0xff000fffffffffff */
+      c &= -lsb;		/*   c == 0xff00000000000000 */
+      shift = 0;
+      while ((lsb >>= 1) != 0)
+	shift++;		/* shift == 44 on exit from loop */
+      m1 <<= 64 - shift;	/*  m1 == 0xffffff0000000000 */
+      m1 = ~m1;			/*  m1 == 0x000000ffffffffff */
+      m2 = ~c;			/*  m2 == 0x00ffffffffffffff */
+    }
+  else
+    {
+      /* Assume c initially something like 0xff000f0000000000.  The idea
+	 is to rotate the word so that the     ^^^  middle group of zeros
+	 is at the LS end and can be cleared with an rldicr mask.  We then
+	 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
+	 a second rldicr.  */
+      lsb = c & -c;		/* lsb == 0x0000010000000000 */
+      m2 = -lsb;		/*  m2 == 0xffffff0000000000 */
+      c = ~c;			/*   c == 0x00fff0ffffffffff */
+      c &= -lsb;		/*   c == 0x00fff00000000000 */
+      lsb = c & -c;		/* lsb == 0x0000100000000000 */
+      c = ~c;			/*   c == 0xff000fffffffffff */
+      c &= -lsb;		/*   c == 0xff00000000000000 */
+      shift = 0;
+      while ((lsb >>= 1) != 0)
+	shift++;		/* shift == 44 on exit from loop */
+      m1 = ~c;			/*  m1 == 0x00ffffffffffffff */
+      m1 >>= shift;		/*  m1 == 0x0000000000000fff */
+      m1 = ~m1;			/*  m1 == 0xfffffffffffff000 */
+    }
+
+  /* Note that when we only have two 0->1 and 1->0 transitions, one of the
+     masks will be all 1's.  We are guaranteed more than one transition.  */
+  out[0] = GEN_INT (64 - shift);
+  out[1] = GEN_INT (m1);
+  out[2] = GEN_INT (shift);
+  out[3] = GEN_INT (m2);
+#else
+  (void)in;
+  (void)out;
+  gcc_unreachable ();
+#endif
+}
+
+/* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
+
+bool
+invalid_e500_subreg (rtx op, enum machine_mode mode)
+{
+  if (TARGET_E500_DOUBLE)
+    {
+      /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
+	 subreg:TI and reg:TF.  Decimal float modes are like integer
+	 modes (only low part of each register used) for this
+	 purpose.  */
+      if (GET_CODE (op) == SUBREG
+	  && (mode == SImode || mode == DImode || mode == TImode
+	      || mode == DDmode || mode == TDmode)
+	  && REG_P (SUBREG_REG (op))
+	  && (GET_MODE (SUBREG_REG (op)) == DFmode
+	      || GET_MODE (SUBREG_REG (op)) == TFmode))
+	return true;
+
+      /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
+	 reg:TI.  */
+      if (GET_CODE (op) == SUBREG
+	  && (mode == DFmode || mode == TFmode)
+	  && REG_P (SUBREG_REG (op))
+	  && (GET_MODE (SUBREG_REG (op)) == DImode
+	      || GET_MODE (SUBREG_REG (op)) == TImode
+	      || GET_MODE (SUBREG_REG (op)) == DDmode
+	      || GET_MODE (SUBREG_REG (op)) == TDmode))
+	return true;
+    }
+
+  if (TARGET_SPE
+      && GET_CODE (op) == SUBREG
+      && mode == SImode
+      && REG_P (SUBREG_REG (op))
+      && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
+    return true;
+
+  return false;
+}
+
+/* AIX increases natural record alignment to doubleword if the first
+   field is an FP double while the FP fields remain word aligned.  */
+
+unsigned int
+rs6000_special_round_type_align (tree type, unsigned int computed,
+				 unsigned int specified)
+{
+  unsigned int align = MAX (computed, specified);
+  tree field = TYPE_FIELDS (type);
+
+  /* Skip all non field decls */
+  while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+    field = DECL_CHAIN (field);
+
+  if (field != NULL && field != type)
+    {
+      type = TREE_TYPE (field);
+      while (TREE_CODE (type) == ARRAY_TYPE)
+	type = TREE_TYPE (type);
+
+      if (type != error_mark_node && TYPE_MODE (type) == DFmode)
+	align = MAX (align, 64);
+    }
+
+  return align;
+}
+
+/* Darwin increases record alignment to the natural alignment of
+   the first field.  */
+
+unsigned int
+darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
+					unsigned int specified)
+{
+  unsigned int align = MAX (computed, specified);
+
+  if (TYPE_PACKED (type))
+    return align;
+
+  /* Find the first field, looking down into aggregates.  */
+  do {
+    tree field = TYPE_FIELDS (type);
+    /* Skip all non field decls */
+    while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+      field = DECL_CHAIN (field);
+    if (! field)
+      break;
+    /* A packed field does not contribute any extra alignment.  */
+    if (DECL_PACKED (field))
+      return align;
+    type = TREE_TYPE (field);
+    while (TREE_CODE (type) == ARRAY_TYPE)
+      type = TREE_TYPE (type);
+  } while (AGGREGATE_TYPE_P (type));
+
+  if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
+    align = MAX (align, TYPE_ALIGN (type));
+
+  return align;
+}
+
+/* Return 1 for an operand in small memory on V.4/eabi.  */
+
+int
+small_data_operand (rtx op ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+#if TARGET_ELF
+  rtx sym_ref;
+
+  if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
+    return 0;
+
+  if (DEFAULT_ABI != ABI_V4)
+    return 0;
+
+  /* Vector and float memory instructions have a limited offset on the
+     SPE, so using a vector or float variable directly as an operand is
+     not useful.  */
+  if (TARGET_SPE
+      && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    sym_ref = op;
+
+  else if (GET_CODE (op) != CONST
+	   || GET_CODE (XEXP (op, 0)) != PLUS
+	   || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
+	   || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
+    return 0;
+
+  else
+    {
+      rtx sum = XEXP (op, 0);
+      HOST_WIDE_INT summand;
+
+      /* We have to be careful here, because it is the referenced address
+	 that must be 32k from _SDA_BASE_, not just the symbol.  */
+      summand = INTVAL (XEXP (sum, 1));
+      if (summand < 0 || summand > g_switch_value)
+	return 0;
+
+      sym_ref = XEXP (sum, 0);
+    }
+
+  return SYMBOL_REF_SMALL_P (sym_ref);
+#else
+  return 0;
+#endif
+}
+
+/* Return true if either operand is a general purpose register.  */
+
+bool
+gpr_or_gpr_p (rtx op0, rtx op1)
+{
+  return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
+	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
+}
+
+
+/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p.  */
+
+static bool
+reg_offset_addressing_ok_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SFmode:
+    case V4SImode:
+    case V2DFmode:
+    case V2DImode:
+      /* AltiVec/VSX vector modes.  Only reg+reg addressing is valid.  */
+      if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+	return false;
+      break;
+
+    case V4HImode:
+    case V2SImode:
+    case V1DImode:
+    case V2SFmode:
+       /* Paired vector modes.  Only reg+reg addressing is valid.  */
+      if (TARGET_PAIRED_FLOAT)
+        return false;
+      break;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+static bool
+virtual_stack_registers_memory_p (rtx op)
+{
+  int regnum;
+
+  if (GET_CODE (op) == REG)
+    regnum = REGNO (op);
+
+  else if (GET_CODE (op) == PLUS
+	   && GET_CODE (XEXP (op, 0)) == REG
+	   && GET_CODE (XEXP (op, 1)) == CONST_INT)
+    regnum = REGNO (XEXP (op, 0));
+
+  else
+    return false;
+
+  return (regnum >= FIRST_VIRTUAL_REGISTER
+	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
+}
+
+/* Return true if memory accesses to OP are known to never straddle
+   a 32k boundary.  */
+
+static bool
+offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
+			     enum machine_mode mode)
+{
+  tree decl, type;
+  unsigned HOST_WIDE_INT dsize, dalign;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return false;
+
+  decl = SYMBOL_REF_DECL (op);
+  if (!decl)
+    {
+      if (GET_MODE_SIZE (mode) == 0)
+	return false;
+
+      /* -fsection-anchors loses the original SYMBOL_REF_DECL when
+	 replacing memory addresses with an anchor plus offset.  We
+	 could find the decl by rummaging around in the block->objects
+	 VEC for the given offset but that seems like too much work.  */
+      dalign = 1;
+      if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
+	  && SYMBOL_REF_ANCHOR_P (op)
+	  && SYMBOL_REF_BLOCK (op) != NULL)
+	{
+	  struct object_block *block = SYMBOL_REF_BLOCK (op);
+	  HOST_WIDE_INT lsb, mask;
+
+	  /* Given the alignment of the block..  */
+	  dalign = block->alignment;
+	  mask = dalign / BITS_PER_UNIT - 1;
+
+	  /* ..and the combined offset of the anchor and any offset
+	     to this block object..  */
+	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
+	  lsb = offset & -offset;
+
+	  /* ..find how many bits of the alignment we know for the
+	     object.  */
+	  mask &= lsb - 1;
+	  dalign = mask + 1;
+	}
+      return dalign >= GET_MODE_SIZE (mode);
+    }
+
+  if (DECL_P (decl))
+    {
+      if (TREE_CODE (decl) == FUNCTION_DECL)
+	return true;
+
+      if (!DECL_SIZE_UNIT (decl))
+	return false;
+
+      if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
+	return false;
+
+      dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+      if (dsize > 32768)
+	return false;
+
+      dalign = DECL_ALIGN_UNIT (decl);
+      return dalign >= dsize;
+    }
+
+  type = TREE_TYPE (decl);
+
+  if (TREE_CODE (decl) == STRING_CST)
+    dsize = TREE_STRING_LENGTH (decl);
+  else if (TYPE_SIZE_UNIT (type)
+	   && host_integerp (TYPE_SIZE_UNIT (type), 1))
+    dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+  else
+    return false;
+  if (dsize > 32768)
+    return false;
+
+  dalign = TYPE_ALIGN (type);
+  if (CONSTANT_CLASS_P (decl))
+    dalign = CONSTANT_ALIGNMENT (decl, dalign);
+  else
+    dalign = DATA_ALIGNMENT (decl, dalign);
+  dalign /= BITS_PER_UNIT;
+  return dalign >= dsize;
+}
+
+static bool
+constant_pool_expr_p (rtx op)
+{
+  rtx base, offset;
+
+  split_const (op, &base, &offset);
+  return (GET_CODE (base) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (base)
+	  && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
+}
+
+static rtx tocrel_base, tocrel_offset;
+
+bool
+toc_relative_expr_p (rtx op)
+{
+  if (GET_CODE (op) != CONST)
+    return false;
+
+  split_const (op, &tocrel_base, &tocrel_offset);
+  return (GET_CODE (tocrel_base) == UNSPEC
+	  && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
+}
+
+/* Return true if X is a constant pool address, and also for cmodel=medium
+   if X is a toc-relative address known to be offsettable within MODE.  */
+
+bool
+legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
+				    bool strict)
+{
+  return (TARGET_TOC
+	  && (GET_CODE (x) == PLUS || GET_CODE (x) == LO_SUM)
+	  && GET_CODE (XEXP (x, 0)) == REG
+	  && (REGNO (XEXP (x, 0)) == TOC_REGISTER
+	      || ((TARGET_MINIMAL_TOC
+		   || TARGET_CMODEL != CMODEL_SMALL)
+		  && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)))
+	  && toc_relative_expr_p (XEXP (x, 1))
+	  && (TARGET_CMODEL != CMODEL_MEDIUM
+	      || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
+	      || mode == QImode
+	      || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
+					      INTVAL (tocrel_offset), mode)));
+}
+
+static bool
+legitimate_small_data_p (enum machine_mode mode, rtx x)
+{
+  return (DEFAULT_ABI == ABI_V4
+	  && !flag_pic && !TARGET_TOC
+	  && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
+	  && small_data_operand (x, mode));
+}
+
+/* SPE offset addressing is limited to 5-bits worth of double words.  */
+#define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
+
+bool
+rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, int strict)
+{
+  unsigned HOST_WIDE_INT offset, extra;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+  if (GET_CODE (XEXP (x, 0)) != REG)
+    return false;
+  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
+    return false;
+  if (!reg_offset_addressing_ok_p (mode))
+    return virtual_stack_registers_memory_p (x);
+  if (legitimate_constant_pool_address_p (x, mode, strict))
+    return true;
+  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+    return false;
+
+  offset = INTVAL (XEXP (x, 1));
+  extra = 0;
+  switch (mode)
+    {
+    case V4HImode:
+    case V2SImode:
+    case V1DImode:
+    case V2SFmode:
+      /* SPE vector modes.  */
+      return SPE_CONST_OFFSET_OK (offset);
+
+    case DFmode:
+      if (TARGET_E500_DOUBLE)
+	return SPE_CONST_OFFSET_OK (offset);
+
+      /* If we are using VSX scalar loads, restrict ourselves to reg+reg
+	 addressing.  */
+      if (VECTOR_MEM_VSX_P (DFmode))
+	return false;
+
+    case DDmode:
+    case DImode:
+      /* On e500v2, we may have:
+
+	   (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
+
+         Which gets addressed with evldd instructions.  */
+      if (TARGET_E500_DOUBLE)
+	return SPE_CONST_OFFSET_OK (offset);
+
+      if (mode == DFmode || mode == DDmode || !TARGET_POWERPC64)
+	extra = 4;
+      else if (offset & 3)
+	return false;
+      break;
+
+    case TFmode:
+      if (TARGET_E500_DOUBLE)
+	return (SPE_CONST_OFFSET_OK (offset)
+		&& SPE_CONST_OFFSET_OK (offset + 8));
+
+    case TDmode:
+    case TImode:
+      if (mode == TFmode || mode == TDmode || !TARGET_POWERPC64)
+	extra = 12;
+      else if (offset & 3)
+	return false;
+      else
+	extra = 8;
+      break;
+
+    default:
+      break;
+    }
+
+  offset += 0x8000;
+  return (offset < 0x10000) && (offset + extra < 0x10000);
+}
+
+bool
+legitimate_indexed_address_p (rtx x, int strict)
+{
+  rtx op0, op1;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  op0 = XEXP (x, 0);
+  op1 = XEXP (x, 1);
+
+  /* Recognize the rtl generated by reload which we know will later be
+     replaced with proper base and index regs.  */
+  if (!strict
+      && reload_in_progress
+      && (REG_P (op0) || GET_CODE (op0) == PLUS)
+      && REG_P (op1))
+    return true;
+
+  return (REG_P (op0) && REG_P (op1)
+	  && ((INT_REG_OK_FOR_BASE_P (op0, strict)
+	       && INT_REG_OK_FOR_INDEX_P (op1, strict))
+	      || (INT_REG_OK_FOR_BASE_P (op1, strict)
+		  && INT_REG_OK_FOR_INDEX_P (op0, strict))));
+}
+
+bool
+avoiding_indexed_address_p (enum machine_mode mode)
+{
+  /* Avoid indexed addressing for modes that have non-indexed
+     load/store instruction forms.  */
+  return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
+}
+
+inline bool
+legitimate_indirect_address_p (rtx x, int strict)
+{
+  return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
+}
+
+bool
+macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
+{
+  if (!TARGET_MACHO || !flag_pic
+      || mode != SImode || GET_CODE (x) != MEM)
+    return false;
+  x = XEXP (x, 0);
+
+  if (GET_CODE (x) != LO_SUM)
+    return false;
+  if (GET_CODE (XEXP (x, 0)) != REG)
+    return false;
+  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
+    return false;
+  x = XEXP (x, 1);
+
+  return CONSTANT_P (x);
+}
+
+static bool
+legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
+{
+  if (GET_CODE (x) != LO_SUM)
+    return false;
+  if (GET_CODE (XEXP (x, 0)) != REG)
+    return false;
+  if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
+    return false;
+  /* Restrict addressing for DI because of our SUBREG hackery.  */
+  if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+			     || mode == DDmode || mode == TDmode
+			     || mode == DImode))
+    return false;
+  x = XEXP (x, 1);
+
+  if (TARGET_ELF || TARGET_MACHO)
+    {
+      if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
+	return false;
+      if (TARGET_TOC)
+	return false;
+      if (GET_MODE_NUNITS (mode) != 1)
+	return false;
+      if (GET_MODE_BITSIZE (mode) > 64
+	  || (GET_MODE_BITSIZE (mode) > 32 && !TARGET_POWERPC64
+	      && !(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+		   && (mode == DFmode || mode == DDmode))))
+	return false;
+
+      return CONSTANT_P (x);
+    }
+
+  return false;
+}
+
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This is used from only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was
+   called.  In some cases it is useful to look at this to decide what
+   needs to be done.
+
+   It is always safe for this function to do nothing.  It exists to
+   recognize opportunities to optimize the output.
+
+   On RS/6000, first check for the sum of a register with a constant
+   integer that is out of range.  If so, generate code to add the
+   constant with the low-order 16 bits masked to the register and force
+   this result into another register (this can be done with `cau').
+   Then generate an address of REG+(CONST&0xffff), allowing for the
+   possibility of bit 16 being a one.
+
+   Then check for the sum of a register and something not constant, try to
+   load the other things into a register and return the sum.  */
+
+static rtx
+rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  unsigned int extra = 0;
+
+  if (!reg_offset_addressing_ok_p (mode))
+    {
+      if (virtual_stack_registers_memory_p (x))
+	return x;
+
+      /* In theory we should not be seeing addresses of the form reg+0,
+	 but just in case it is generated, optimize it away.  */
+      if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
+	return force_reg (Pmode, XEXP (x, 0));
+
+      /* Make sure both operands are registers.  */
+      else if (GET_CODE (x) == PLUS)
+	return gen_rtx_PLUS (Pmode,
+			     force_reg (Pmode, XEXP (x, 0)),
+			     force_reg (Pmode, XEXP (x, 1)));
+      else
+	return force_reg (Pmode, x);
+    }
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
+      if (model != 0)
+	return rs6000_legitimize_tls_address (x, model);
+    }
+
+  switch (mode)
+    {
+    case DFmode:
+    case DDmode:
+      extra = 4;
+      break;
+    case DImode:
+      if (!TARGET_POWERPC64)
+	extra = 4;
+      break;
+    case TFmode:
+    case TDmode:
+      extra = 12;
+      break;
+    case TImode:
+      extra = TARGET_POWERPC64 ? 8 : 12;
+      break;
+    default:
+      break;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
+	  >= 0x10000 - extra)
+      && !((TARGET_POWERPC64
+	    && (mode == DImode || mode == TImode)
+	    && (INTVAL (XEXP (x, 1)) & 3) != 0)
+	   || SPE_VECTOR_MODE (mode)
+	   || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+				      || mode == DImode || mode == DDmode
+				      || mode == TDmode))))
+    {
+      HOST_WIDE_INT high_int, low_int;
+      rtx sum;
+      low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
+      if (low_int >= 0x8000 - extra)
+	low_int = 0;
+      high_int = INTVAL (XEXP (x, 1)) - low_int;
+      sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
+					 GEN_INT (high_int)), 0);
+      return plus_constant (sum, low_int);
+    }
+  else if (GET_CODE (x) == PLUS
+	   && GET_CODE (XEXP (x, 0)) == REG
+	   && GET_CODE (XEXP (x, 1)) != CONST_INT
+	   && GET_MODE_NUNITS (mode) == 1
+	   && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+	       || TARGET_POWERPC64
+	       || ((mode != DImode && mode != DFmode && mode != DDmode)
+		   || (TARGET_E500_DOUBLE && mode != DDmode)))
+	   && (TARGET_POWERPC64 || mode != DImode)
+	   && !avoiding_indexed_address_p (mode)
+	   && mode != TImode
+	   && mode != TFmode
+	   && mode != TDmode)
+    {
+      return gen_rtx_PLUS (Pmode, XEXP (x, 0),
+			   force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
+    }
+  else if (SPE_VECTOR_MODE (mode)
+	   || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+				      || mode == DDmode || mode == TDmode
+				      || mode == DImode)))
+    {
+      if (mode == DImode)
+	return x;
+      /* We accept [reg + reg] and [reg + OFFSET].  */
+
+      if (GET_CODE (x) == PLUS)
+       {
+         rtx op1 = XEXP (x, 0);
+         rtx op2 = XEXP (x, 1);
+         rtx y;
+
+         op1 = force_reg (Pmode, op1);
+
+         if (GET_CODE (op2) != REG
+             && (GET_CODE (op2) != CONST_INT
+                 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
+                 || (GET_MODE_SIZE (mode) > 8
+                     && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
+           op2 = force_reg (Pmode, op2);
+
+         /* We can't always do [reg + reg] for these, because [reg +
+            reg + offset] is not a legitimate addressing mode.  */
+         y = gen_rtx_PLUS (Pmode, op1, op2);
+
+         if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
+           return force_reg (Pmode, y);
+         else
+           return y;
+       }
+
+      return force_reg (Pmode, x);
+    }
+  else if (TARGET_ELF
+	   && TARGET_32BIT
+	   && TARGET_NO_TOC
+	   && ! flag_pic
+	   && GET_CODE (x) != CONST_INT
+	   && GET_CODE (x) != CONST_DOUBLE
+	   && CONSTANT_P (x)
+	   && GET_MODE_NUNITS (mode) == 1
+	   && (GET_MODE_BITSIZE (mode) <= 32
+	       || ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+		   && (mode == DFmode || mode == DDmode))))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+      emit_insn (gen_elf_high (reg, x));
+      return gen_rtx_LO_SUM (Pmode, reg, x);
+    }
+  else if (TARGET_MACHO && TARGET_32BIT && TARGET_NO_TOC
+	   && ! flag_pic
+#if TARGET_MACHO
+	   && ! MACHO_DYNAMIC_NO_PIC_P
+#endif
+	   && GET_CODE (x) != CONST_INT
+	   && GET_CODE (x) != CONST_DOUBLE
+	   && CONSTANT_P (x)
+	   && GET_MODE_NUNITS (mode) == 1
+	   && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+	       || (mode != DFmode && mode != DDmode))
+	   && mode != DImode
+	   && mode != TImode)
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+      emit_insn (gen_macho_high (reg, x));
+      return gen_rtx_LO_SUM (Pmode, reg, x);
+    }
+  else if (TARGET_TOC
+	   && GET_CODE (x) == SYMBOL_REF
+	   && constant_pool_expr_p (x)
+	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
+    {
+      rtx reg = TARGET_CMODEL != CMODEL_SMALL ? gen_reg_rtx (Pmode) : NULL_RTX;
+      return create_TOC_reference (x, reg);
+    }
+  else
+    return x;
+}
+
+/* Debug version of rs6000_legitimize_address.  */
+static rtx
+rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  rtx ret;
+  rtx insns;
+
+  start_sequence ();
+  ret = rs6000_legitimize_address (x, oldx, mode);
+  insns = get_insns ();
+  end_sequence ();
+
+  if (ret != x)
+    {
+      fprintf (stderr,
+	       "\nrs6000_legitimize_address: mode %s, old code %s, "
+	       "new code %s, modified\n",
+	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
+	       GET_RTX_NAME (GET_CODE (ret)));
+
+      fprintf (stderr, "Original address:\n");
+      debug_rtx (x);
+
+      fprintf (stderr, "oldx:\n");
+      debug_rtx (oldx);
+
+      fprintf (stderr, "New address:\n");
+      debug_rtx (ret);
+
+      if (insns)
+	{
+	  fprintf (stderr, "Insns added:\n");
+	  debug_rtx_list (insns, 20);
+	}
+    }
+  else
+    {
+      fprintf (stderr,
+	       "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
+	       GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
+
+      debug_rtx (x);
+    }
+
+  if (insns)
+    emit_insn (insns);
+
+  return ret;
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.long\t", file);
+      break;
+    case 8:
+      fputs (DOUBLE_INT_ASM_OP, file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("@dtprel+0x8000", file);
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+rs6000_delegitimize_address (rtx orig_x)
+{
+  rtx x, y;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == (TARGET_CMODEL != CMODEL_SMALL ? LO_SUM : PLUS)
+      && GET_CODE (XEXP (x, 1)) == CONST)
+    {
+      y = XEXP (XEXP (x, 1), 0);
+      if (GET_CODE (y) == UNSPEC
+          && XINT (y, 1) == UNSPEC_TOCREL
+	  && ((GET_CODE (XEXP (x, 0)) == REG
+	       && (REGNO (XEXP (x, 0)) == TOC_REGISTER
+		   || TARGET_MINIMAL_TOC
+		   || TARGET_CMODEL != CMODEL_SMALL))
+	      || (TARGET_CMODEL != CMODEL_SMALL
+		  && GET_CODE (XEXP (x, 0)) == CONST
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == REG
+		  && REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0)) == TOC_REGISTER
+		  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == HIGH
+		  && rtx_equal_p (XEXP (x, 1),
+				  XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)))))
+	{
+	  y = XVECEXP (y, 0, 0);
+	  if (!MEM_P (orig_x))
+	    return y;
+	  else
+	    return replace_equiv_address_nv (orig_x, y);
+	}
+    }
+
+  if (TARGET_MACHO
+      && GET_CODE (orig_x) == LO_SUM
+      && GET_CODE (XEXP (x, 1)) == CONST)
+    {
+      y = XEXP (XEXP (x, 1), 0);
+      if (GET_CODE (y) == UNSPEC
+	  && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
+	return XVECEXP (y, 0, 0);
+    }
+
+  return orig_x;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function.  */
+
+static GTY(()) rtx rs6000_tls_symbol;
+static rtx
+rs6000_tls_get_addr (void)
+{
+  if (!rs6000_tls_symbol)
+    rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  return rs6000_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for TLS GOT references.  */
+
+static GTY(()) rtx rs6000_got_symbol;
+static rtx
+rs6000_got_sym (void)
+{
+  if (!rs6000_got_symbol)
+    {
+      rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+      SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
+      SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
+    }
+
+  return rs6000_got_symbol;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+
+static rtx
+rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
+{
+  rtx dest, insn;
+
+  dest = gen_reg_rtx (Pmode);
+  if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
+    {
+      rtx tlsreg;
+
+      if (TARGET_64BIT)
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 13);
+	  insn = gen_tls_tprel_64 (dest, tlsreg, addr);
+	}
+      else
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 2);
+	  insn = gen_tls_tprel_32 (dest, tlsreg, addr);
+	}
+      emit_insn (insn);
+    }
+  else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
+    {
+      rtx tlsreg, tmp;
+
+      tmp = gen_reg_rtx (Pmode);
+      if (TARGET_64BIT)
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 13);
+	  insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
+	}
+      else
+	{
+	  tlsreg = gen_rtx_REG (Pmode, 2);
+	  insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
+	}
+      emit_insn (insn);
+      if (TARGET_64BIT)
+	insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
+      else
+	insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
+      emit_insn (insn);
+    }
+  else
+    {
+      rtx r3, got, tga, tmp1, tmp2, call_insn;
+
+      /* We currently use relocations like @got@tlsgd for tls, which
+	 means the linker will handle allocation of tls entries, placing
+	 them in the .got section.  So use a pointer to the .got section,
+	 not one to secondary TOC sections used by 64-bit -mminimal-toc,
+	 or to secondary GOT sections used by 32-bit -fPIC.  */
+      if (TARGET_64BIT)
+	got = gen_rtx_REG (Pmode, 2);
+      else
+	{
+	  if (flag_pic == 1)
+	    got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+	  else
+	    {
+	      rtx gsym = rs6000_got_sym ();
+	      got = gen_reg_rtx (Pmode);
+	      if (flag_pic == 0)
+		rs6000_emit_move (got, gsym, Pmode);
+	      else
+		{
+		  rtx mem, lab, last;
+
+		  tmp1 = gen_reg_rtx (Pmode);
+		  tmp2 = gen_reg_rtx (Pmode);
+		  mem = gen_const_mem (Pmode, tmp1);
+		  lab = gen_label_rtx ();
+		  emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
+		  emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
+		  emit_move_insn (tmp2, mem);
+		  last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
+		  set_unique_reg_note (last, REG_EQUAL, gsym);
+		}
+	    }
+	}
+
+      if (model == TLS_MODEL_GLOBAL_DYNAMIC)
+	{
+	  tga = rs6000_tls_get_addr ();
+	  emit_library_call_value (tga, dest, LCT_CONST, Pmode,
+				   1, const0_rtx, Pmode);
+
+	  r3 = gen_rtx_REG (Pmode, 3);
+	  if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	    insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
+	  else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
+	    insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
+	  else if (DEFAULT_ABI == ABI_V4)
+	    insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
+	  else
+	    gcc_unreachable ();
+	  call_insn = last_call_insn ();
+	  PATTERN (call_insn) = insn;
+	  if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
+	    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+		     pic_offset_table_rtx);
+	}
+      else if (model == TLS_MODEL_LOCAL_DYNAMIC)
+	{
+	  tga = rs6000_tls_get_addr ();
+	  tmp1 = gen_reg_rtx (Pmode);
+	  emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
+				   1, const0_rtx, Pmode);
+
+	  r3 = gen_rtx_REG (Pmode, 3);
+	  if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	    insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
+	  else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
+	    insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
+	  else if (DEFAULT_ABI == ABI_V4)
+	    insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
+	  else
+	    gcc_unreachable ();
+	  call_insn = last_call_insn ();
+	  PATTERN (call_insn) = insn;
+	  if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
+	    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+		     pic_offset_table_rtx);
+
+	  if (rs6000_tls_size == 16)
+	    {
+	      if (TARGET_64BIT)
+		insn = gen_tls_dtprel_64 (dest, tmp1, addr);
+	      else
+		insn = gen_tls_dtprel_32 (dest, tmp1, addr);
+	    }
+	  else if (rs6000_tls_size == 32)
+	    {
+	      tmp2 = gen_reg_rtx (Pmode);
+	      if (TARGET_64BIT)
+		insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
+	      else
+		insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
+	      emit_insn (insn);
+	      if (TARGET_64BIT)
+		insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
+	      else
+		insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
+	    }
+	  else
+	    {
+	      tmp2 = gen_reg_rtx (Pmode);
+	      if (TARGET_64BIT)
+		insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
+	      else
+		insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
+	      emit_insn (insn);
+	      insn = gen_rtx_SET (Pmode, dest,
+				  gen_rtx_PLUS (Pmode, tmp2, tmp1));
+	    }
+	  emit_insn (insn);
+	}
+      else
+	{
+	  /* IE, or 64-bit offset LE.  */
+	  tmp2 = gen_reg_rtx (Pmode);
+	  if (TARGET_64BIT)
+	    insn = gen_tls_got_tprel_64 (tmp2, got, addr);
+	  else
+	    insn = gen_tls_got_tprel_32 (tmp2, got, addr);
+	  emit_insn (insn);
+	  if (TARGET_64BIT)
+	    insn = gen_tls_tls_64 (dest, tmp2, addr);
+	  else
+	    insn = gen_tls_tls_32 (dest, tmp2, addr);
+	  emit_insn (insn);
+	}
+    }
+
+  return dest;
+}
+
+/* Return 1 if X contains a thread-local symbol.  */
+
+bool
+rs6000_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, &rs6000_tls_symbol_ref_1, 0);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+rs6000_cannot_force_const_mem (rtx x)
+{
+  if (GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH)
+    return true;
+
+  return rs6000_tls_referenced_p (x);
+}
+
+/* Return 1 if *X is a thread-local symbol.  This is the same as
+   rs6000_tls_symbol_ref except for the type of the unused argument.  */
+
+static int
+rs6000_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return RS6000_SYMBOL_REF_TLS_P (*x);
+}
+
+/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
+   replace the input X, or the original X if no replacement is called for.
+   The output parameter *WIN is 1 if the calling macro should goto WIN,
+   0 if it should not.
+
+   For RS/6000, we wish to handle large displacements off a base
+   register by splitting the addend across an addiu/addis and the mem insn.
+   This cuts number of extra insns needed from 3 to 1.
+
+   On Darwin, we use this to generate code for floating point constants.
+   A movsf_low is generated so we wind up with 2 instructions rather than 3.
+   The Darwin code is inside #if TARGET_MACHO because only then are the
+   machopic_* functions defined.  */
+static rtx
+rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
+				  int opnum, int type,
+				  int ind_levels ATTRIBUTE_UNUSED, int *win)
+{
+  bool reg_offset_p = reg_offset_addressing_ok_p (mode);
+
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  /* Likewise for (lo_sum (high ...) ...) output we have generated.  */
+  if (GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == HIGH)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+#if TARGET_MACHO
+  if (DEFAULT_ABI == ABI_DARWIN && flag_pic
+      && GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
+      && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
+      && machopic_operand_p (XEXP (x, 1)))
+    {
+      /* Result of previous invocation of this function on Darwin
+	 floating point constant.  */
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+#endif
+
+  if (TARGET_CMODEL != CMODEL_SMALL
+      && GET_CODE (x) == LO_SUM
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+      && REGNO (XEXP (XEXP (x, 0), 0)) == TOC_REGISTER
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST
+      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == HIGH
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && GET_CODE (XEXP (XEXP (x, 1), 0)) == UNSPEC
+      && XINT (XEXP (XEXP (x, 1), 0), 1) == UNSPEC_TOCREL
+      && rtx_equal_p (XEXP (XEXP (XEXP (XEXP (x, 0), 1), 0), 0), XEXP (x, 1)))
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+  /* Force ld/std non-word aligned offset into base register by wrapping
+     in offset 0.  */
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) < 32
+      && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && reg_offset_p
+      && (INTVAL (XEXP (x, 1)) & 3) != 0
+      && VECTOR_MEM_NONE_P (mode)
+      && GET_MODE_SIZE (mode) >= UNITS_PER_WORD
+      && TARGET_POWERPC64)
+    {
+      x = gen_rtx_PLUS (GET_MODE (x), x, GEN_INT (0));
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
+      && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && reg_offset_p
+      && !SPE_VECTOR_MODE (mode)
+      && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+				  || mode == DDmode || mode == TDmode
+				  || mode == DImode))
+      && VECTOR_MEM_NONE_P (mode))
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT high
+	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+      /* Check for 32-bit overflow.  */
+      if (high + low != val)
+	{
+	  *win = 0;
+	  return x;
+	}
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem directly.  */
+
+      x = gen_rtx_PLUS (GET_MODE (x),
+			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
+				      GEN_INT (high)),
+			GEN_INT (low));
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && reg_offset_p
+      && VECTOR_MEM_NONE_P (mode)
+      && !SPE_VECTOR_MODE (mode)
+#if TARGET_MACHO
+      && DEFAULT_ABI == ABI_DARWIN
+      && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
+      && machopic_symbol_defined_p (x)
+#else
+      && DEFAULT_ABI == ABI_V4
+      && !flag_pic
+#endif
+      /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
+	 The same goes for DImode without 64-bit gprs and DFmode and DDmode
+	 without fprs.  */
+      && mode != TFmode
+      && mode != TDmode
+      && (mode != DImode || TARGET_POWERPC64)
+      && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
+	  || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
+    {
+#if TARGET_MACHO
+      if (flag_pic)
+	{
+	  rtx offset = machopic_gen_offset (x);
+	  x = gen_rtx_LO_SUM (GET_MODE (x),
+		gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+		  gen_rtx_HIGH (Pmode, offset)), offset);
+	}
+      else
+#endif
+	x = gen_rtx_LO_SUM (GET_MODE (x),
+	      gen_rtx_HIGH (Pmode, x), x);
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  /* Reload an offset address wrapped by an AND that represents the
+     masking of the lower bits.  Strip the outer AND and let reload
+     convert the offset address into an indirect address.  For VSX,
+     force reload to create the address with an AND in a separate
+     register, because we can't guarantee an altivec register will
+     be used.  */
+  if (VECTOR_MEM_ALTIVEC_P (mode)
+      && GET_CODE (x) == AND
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && INTVAL (XEXP (x, 1)) == -16)
+    {
+      x = XEXP (x, 0);
+      *win = 1;
+      return x;
+    }
+
+  if (TARGET_TOC
+      && reg_offset_p
+      && GET_CODE (x) == SYMBOL_REF
+      && constant_pool_expr_p (x)
+      && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), mode))
+    {
+      x = create_TOC_reference (x, NULL_RTX);
+      if (TARGET_CMODEL != CMODEL_SMALL)
+	push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		     BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		     opnum, (enum reload_type) type);
+      *win = 1;
+      return x;
+    }
+  *win = 0;
+  return x;
+}
+
+/* Debug version of rs6000_legitimize_reload_address.  */
+static rtx
+rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
+					int opnum, int type,
+					int ind_levels, int *win)
+{
+  rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
+					      ind_levels, win);
+  fprintf (stderr,
+	   "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
+	   "type = %d, ind_levels = %d, win = %d, original addr:\n",
+	   GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
+  debug_rtx (x);
+
+  if (x == ret)
+    fprintf (stderr, "Same address returned\n");
+  else if (!ret)
+    fprintf (stderr, "NULL returned\n");
+  else
+    {
+      fprintf (stderr, "New address:\n");
+      debug_rtx (ret);
+    }
+
+  return ret;
+}
+
+/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   On the RS/6000, there are four valid address: a SYMBOL_REF that
+   refers to a constant pool entry of an address (or the sum of it
+   plus a constant), a short (16-bit signed) constant plus a register,
+   the sum of two registers, or a register indirect, possibly with an
+   auto-increment.  For DFmode, DDmode and DImode with a constant plus
+   register, we must ensure that both words are addressable or PowerPC64
+   with offset word aligned.
+
+   For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
+   32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
+   because adjacent memory cells are accessed by adding word-sized offsets
+   during assembly output.  */
+bool
+rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
+{
+  bool reg_offset_p = reg_offset_addressing_ok_p (mode);
+
+  /* If this is an unaligned stvx/ldvx type address, discard the outer AND.  */
+  if (VECTOR_MEM_ALTIVEC_P (mode)
+      && GET_CODE (x) == AND
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && INTVAL (XEXP (x, 1)) == -16)
+    x = XEXP (x, 0);
+
+  if (RS6000_SYMBOL_REF_TLS_P (x))
+    return 0;
+  if (legitimate_indirect_address_p (x, reg_ok_strict))
+    return 1;
+  if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+      && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+      && !SPE_VECTOR_MODE (mode)
+      && mode != TFmode
+      && mode != TDmode
+      /* Restrict addressing for DI because of our SUBREG hackery.  */
+      && !(TARGET_E500_DOUBLE
+	   && (mode == DFmode || mode == DDmode || mode == DImode))
+      && TARGET_UPDATE
+      && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
+    return 1;
+  if (virtual_stack_registers_memory_p (x))
+    return 1;
+  if (reg_offset_p && legitimate_small_data_p (mode, x))
+    return 1;
+  if (reg_offset_p
+      && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+    return 1;
+  /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
+  if (! reg_ok_strict
+      && reg_offset_p
+      && GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 0)) == REG
+      && (XEXP (x, 0) == virtual_stack_vars_rtx
+	  || XEXP (x, 0) == arg_pointer_rtx)
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    return 1;
+  if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict))
+    return 1;
+  if (mode != TImode
+      && mode != TFmode
+      && mode != TDmode
+      && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+	  || TARGET_POWERPC64
+	  || (mode != DFmode && mode != DDmode)
+	  || (TARGET_E500_DOUBLE && mode != DDmode))
+      && (TARGET_POWERPC64 || mode != DImode)
+      && !avoiding_indexed_address_p (mode)
+      && legitimate_indexed_address_p (x, reg_ok_strict))
+    return 1;
+  if (GET_CODE (x) == PRE_MODIFY
+      && mode != TImode
+      && mode != TFmode
+      && mode != TDmode
+      && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+	  || TARGET_POWERPC64
+	  || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
+      && (TARGET_POWERPC64 || mode != DImode)
+      && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+      && !SPE_VECTOR_MODE (mode)
+      /* Restrict addressing for DI because of our SUBREG hackery.  */
+      && !(TARGET_E500_DOUBLE
+	   && (mode == DFmode || mode == DDmode || mode == DImode))
+      && TARGET_UPDATE
+      && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
+      && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), reg_ok_strict)
+	  || (!avoiding_indexed_address_p (mode)
+	      && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
+      && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
+    return 1;
+  if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
+    return 1;
+  return 0;
+}
+
+/* Debug version of rs6000_legitimate_address_p.  */
+static bool
+rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
+				   bool reg_ok_strict)
+{
+  bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
+  fprintf (stderr,
+	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
+	   "strict = %d, code = %s\n",
+	   ret ? "true" : "false",
+	   GET_MODE_NAME (mode),
+	   reg_ok_strict,
+	   GET_RTX_NAME (GET_CODE (x)));
+  debug_rtx (x);
+
+  return ret;
+}
+
+/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
+
+static bool
+rs6000_mode_dependent_address_p (const_rtx addr)
+{
+  return rs6000_mode_dependent_address_ptr (addr);
+}
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   On the RS/6000 this is true of all integral offsets (since AltiVec
+   and VSX modes don't allow them) or is a pre-increment or decrement.
+
+   ??? Except that due to conceptual problems in offsettable_address_p
+   we can't really report the problems of integral offsets.  So leave
+   this assuming that the adjustable offset must be valid for the
+   sub-words of a TFmode operand, which is what we had before.  */
+
+static bool
+rs6000_mode_dependent_address (const_rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case PLUS:
+      /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
+	 is considered a legitimate address before reload, so there
+	 are no offset restrictions in that case.  Note that this
+	 condition is safe in strict mode because any address involving
+	 virtual_stack_vars_rtx or arg_pointer_rtx would already have
+	 been rejected as illegitimate.  */
+      if (XEXP (addr, 0) != virtual_stack_vars_rtx
+	  && XEXP (addr, 0) != arg_pointer_rtx
+	  && GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	{
+	  unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
+	  return val + 12 + 0x8000 >= 0x10000;
+	}
+      break;
+
+    case LO_SUM:
+      /* Anything in the constant pool is sufficiently aligned that
+	 all bytes have the same high part address.  */
+      return !legitimate_constant_pool_address_p (addr, QImode, false);
+
+    /* Auto-increment cases are now treated generically in recog.c.  */
+    case PRE_MODIFY:
+      return TARGET_UPDATE;
+
+    /* AND is only allowed in Altivec loads.  */
+    case AND:
+      return true;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
+/* Debug version of rs6000_mode_dependent_address.  */
+static bool
+rs6000_debug_mode_dependent_address (const_rtx addr)
+{
+  bool ret = rs6000_mode_dependent_address (addr);
+
+  fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
+	   ret ? "true" : "false");
+  debug_rtx (addr);
+
+  return ret;
+}
+
+/* Implement FIND_BASE_TERM.  */
+
+rtx
+rs6000_find_base_term (rtx op)
+{
+  rtx base, offset;
+
+  split_const (op, &base, &offset);
+  if (GET_CODE (base) == UNSPEC)
+    switch (XINT (base, 1))
+      {
+      case UNSPEC_TOCREL:
+      case UNSPEC_MACHOPIC_OFFSET:
+	/* OP represents SYM [+ OFFSET] - ANCHOR.  SYM is the base term
+	   for aliasing purposes.  */
+	return XVECEXP (base, 0, 0);
+      }
+
+  return op;
+}
+
+/* More elaborate version of recog's offsettable_memref_p predicate
+   that works around the ??? note of rs6000_mode_dependent_address.
+   In particular it accepts
+
+     (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
+
+   in 32-bit mode, that the recog predicate rejects.  */
+
+bool
+rs6000_offsettable_memref_p (rtx op)
+{
+  if (!MEM_P (op))
+    return false;
+
+  /* First mimic offsettable_memref_p.  */
+  if (offsettable_address_p (1, GET_MODE (op), XEXP (op, 0)))
+    return true;
+
+  /* offsettable_address_p invokes rs6000_mode_dependent_address, but
+     the latter predicate knows nothing about the mode of the memory
+     reference and, therefore, assumes that it is the largest supported
+     mode (TFmode).  As a consequence, legitimate offsettable memory
+     references are rejected.  rs6000_legitimate_offset_address_p contains
+     the correct logic for the PLUS case of rs6000_mode_dependent_address.  */
+  return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), 1);
+}
+
+/* Change register usage conditional on target flags.  */
+static void
+rs6000_conditional_register_usage (void)
+{
+  int i;
+
+  if (TARGET_DEBUG_TARGET)
+    fprintf (stderr, "rs6000_conditional_register_usage called\n");
+
+  /* Set MQ register fixed (already call_used) if not POWER
+     architecture (RIOS1, RIOS2, RSC, and PPC601) so that it will not
+     be allocated.  */
+  if (! TARGET_POWER)
+    fixed_regs[64] = 1;
+
+  /* 64-bit AIX and Linux reserve GPR13 for thread-private data.  */
+  if (TARGET_64BIT)
+    fixed_regs[13] = call_used_regs[13]
+      = call_really_used_regs[13] = 1;
+
+  /* Conditionally disable FPRs.  */
+  if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+    for (i = 32; i < 64; i++)
+      fixed_regs[i] = call_used_regs[i]
+	= call_really_used_regs[i] = 1;
+
+  /* The TOC register is not killed across calls in a way that is
+     visible to the compiler.  */
+  if (DEFAULT_ABI == ABI_AIX)
+    call_really_used_regs[2] = 0;
+
+  if (DEFAULT_ABI == ABI_V4
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
+      && flag_pic == 2)
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (DEFAULT_ABI == ABI_V4
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
+      && flag_pic == 1)
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (DEFAULT_ABI == ABI_DARWIN
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+      fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (TARGET_TOC && TARGET_MINIMAL_TOC)
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
+      = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
+
+  if (TARGET_SPE)
+    {
+      global_regs[SPEFSCR_REGNO] = 1;
+      /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
+         registers in prologues and epilogues.  We no longer use r14
+         for FIXED_SCRATCH, but we're keeping r14 out of the allocation
+         pool for link-compatibility with older versions of GCC.  Once
+         "old" code has died out, we can return r14 to the allocation
+         pool.  */
+      fixed_regs[14]
+	= call_used_regs[14]
+	= call_really_used_regs[14] = 1;
+    }
+
+  if (!TARGET_ALTIVEC && !TARGET_VSX)
+    {
+      for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
+	fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
+      call_really_used_regs[VRSAVE_REGNO] = 1;
+    }
+
+  if (TARGET_ALTIVEC || TARGET_VSX)
+    global_regs[VSCR_REGNO] = 1;
+
+  if (TARGET_ALTIVEC_ABI)
+    {
+      for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
+	call_used_regs[i] = call_really_used_regs[i] = 1;
+
+      /* AIX reserves VR20:31 in non-extended ABI mode.  */
+      if (TARGET_XCOFF)
+	for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
+	  fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
+    }
+}
+
+/* Try to output insns to set TARGET equal to the constant C if it can
+   be done in less than N insns.  Do all computations in MODE.
+   Returns the place where the output has been placed if it can be
+   done and the insns have been emitted.  If it would take more than N
+   insns, zero is returned and no insns and emitted.  */
+
+rtx
+rs6000_emit_set_const (rtx dest, enum machine_mode mode,
+		       rtx source, int n ATTRIBUTE_UNUSED)
+{
+  rtx result, insn, set;
+  HOST_WIDE_INT c0, c1;
+
+  switch (mode)
+    {
+      case  QImode:
+    case HImode:
+      if (dest == NULL)
+	dest = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, source));
+      return dest;
+
+    case SImode:
+      result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
+			      GEN_INT (INTVAL (source)
+				       & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_IOR (SImode, copy_rtx (result),
+					   GEN_INT (INTVAL (source) & 0xffff))));
+      result = dest;
+      break;
+
+    case DImode:
+      switch (GET_CODE (source))
+	{
+	case CONST_INT:
+	  c0 = INTVAL (source);
+	  c1 = -(c0 < 0);
+	  break;
+
+	case CONST_DOUBLE:
+#if HOST_BITS_PER_WIDE_INT >= 64
+	  c0 = CONST_DOUBLE_LOW (source);
+	  c1 = -(c0 < 0);
+#else
+	  c0 = CONST_DOUBLE_LOW (source);
+	  c1 = CONST_DOUBLE_HIGH (source);
+#endif
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      result = rs6000_emit_set_long_const (dest, c0, c1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  insn = get_last_insn ();
+  set = single_set (insn);
+  if (! CONSTANT_P (SET_SRC (set)))
+    set_unique_reg_note (insn, REG_EQUAL, source);
+
+  return result;
+}
+
+/* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
+   fall back to a straight forward decomposition.  We do this to avoid
+   exponential run times encountered when looking for longer sequences
+   with rs6000_emit_set_const.  */
+static rtx
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
+{
+  if (!TARGET_POWERPC64)
+    {
+      rtx operand1, operand2;
+
+      operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
+					DImode);
+      operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
+					DImode);
+      emit_move_insn (operand1, GEN_INT (c1));
+      emit_move_insn (operand2, GEN_INT (c2));
+    }
+  else
+    {
+      HOST_WIDE_INT ud1, ud2, ud3, ud4;
+
+      ud1 = c1 & 0xffff;
+      ud2 = (c1 & 0xffff0000) >> 16;
+#if HOST_BITS_PER_WIDE_INT >= 64
+      c2 = c1 >> 32;
+#endif
+      ud3 = c2 & 0xffff;
+      ud4 = (c2 & 0xffff0000) >> 16;
+
+      if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
+	  || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
+	{
+	  if (ud1 & 0x8000)
+	    emit_move_insn (dest, GEN_INT (((ud1 ^ 0x8000) -  0x8000)));
+	  else
+	    emit_move_insn (dest, GEN_INT (ud1));
+	}
+
+      else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
+	       || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
+	{
+	  if (ud2 & 0x8000)
+	    emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
+					   - 0x80000000));
+	  else
+	    emit_move_insn (dest, GEN_INT (ud2 << 16));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	}
+      else if (ud3 == 0 && ud4 == 0)
+	{
+	  gcc_assert (ud2 & 0x8000);
+	  emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
+					 - 0x80000000));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ZERO_EXTEND (DImode,
+					       gen_lowpart (SImode,
+							    copy_rtx (dest))));
+	}
+      else if ((ud4 == 0xffff && (ud3 & 0x8000))
+	       || (ud4 == 0 && ! (ud3 & 0x8000)))
+	{
+	  if (ud3 & 0x8000)
+	    emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
+					   - 0x80000000));
+	  else
+	    emit_move_insn (dest, GEN_INT (ud3 << 16));
+
+	  if (ud2 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud2)));
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+					  GEN_INT (16)));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud1)));
+	}
+      else
+	{
+	  if (ud4 & 0x8000)
+	    emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
+					   - 0x80000000));
+	  else
+	    emit_move_insn (dest, GEN_INT (ud4 << 16));
+
+	  if (ud3 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud3)));
+
+	  emit_move_insn (copy_rtx (dest),
+			  gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+					  GEN_INT (32)));
+	  if (ud2 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest),
+					 GEN_INT (ud2 << 16)));
+	  if (ud1 != 0)
+	    emit_move_insn (copy_rtx (dest),
+			    gen_rtx_IOR (DImode, copy_rtx (dest), GEN_INT (ud1)));
+	}
+    }
+  return dest;
+}
+
+/* Helper for the following.  Get rid of [r+r] memory refs
+   in cases where it won't work (TImode, TFmode, TDmode).  */
+
+static void
+rs6000_eliminate_indexed_memrefs (rtx operands[2])
+{
+  if (reload_in_progress)
+    return;
+
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (XEXP (operands[0], 0)) != REG
+      && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
+					       GET_MODE (operands[0]), false))
+    operands[0]
+      = replace_equiv_address (operands[0],
+			       copy_addr_to_reg (XEXP (operands[0], 0)));
+
+  if (GET_CODE (operands[1]) == MEM
+      && GET_CODE (XEXP (operands[1], 0)) != REG
+      && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
+					       GET_MODE (operands[1]), false))
+    operands[1]
+      = replace_equiv_address (operands[1],
+			       copy_addr_to_reg (XEXP (operands[1], 0)));
+}
+
+/* Emit a move from SOURCE to DEST in mode MODE.  */
+void
+rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
+{
+  rtx operands[2];
+  operands[0] = dest;
+  operands[1] = source;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr,
+	       "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
+	       "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
+	       GET_MODE_NAME (mode),
+	       reload_in_progress,
+	       reload_completed,
+	       can_create_pseudo_p ());
+      debug_rtx (dest);
+      fprintf (stderr, "source:\n");
+      debug_rtx (source);
+    }
+
+  /* Sanity checks.  Check that we get CONST_DOUBLE only when we should.  */
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && ! FLOAT_MODE_P (mode)
+      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+    {
+      /* FIXME.  This should never happen.  */
+      /* Since it seems that it does, do the safe thing and convert
+	 to a CONST_INT.  */
+      operands[1] = gen_int_mode (CONST_DOUBLE_LOW (operands[1]), mode);
+    }
+  gcc_assert (GET_CODE (operands[1]) != CONST_DOUBLE
+	      || FLOAT_MODE_P (mode)
+	      || ((CONST_DOUBLE_HIGH (operands[1]) != 0
+		   || CONST_DOUBLE_LOW (operands[1]) < 0)
+		  && (CONST_DOUBLE_HIGH (operands[1]) != -1
+		      || CONST_DOUBLE_LOW (operands[1]) >= 0)));
+
+  /* Check if GCC is setting up a block move that will end up using FP
+     registers as temporaries.  We must make sure this is acceptable.  */
+  if (GET_CODE (operands[0]) == MEM
+      && GET_CODE (operands[1]) == MEM
+      && mode == DImode
+      && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
+	  || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
+      && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
+					    ? 32 : MEM_ALIGN (operands[0])))
+	    || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
+					       ? 32
+					       : MEM_ALIGN (operands[1]))))
+      && ! MEM_VOLATILE_P (operands [0])
+      && ! MEM_VOLATILE_P (operands [1]))
+    {
+      emit_move_insn (adjust_address (operands[0], SImode, 0),
+		      adjust_address (operands[1], SImode, 0));
+      emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
+		      adjust_address (copy_rtx (operands[1]), SImode, 4));
+      return;
+    }
+
+  if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
+      && !gpc_reg_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+
+  if (mode == SFmode && ! TARGET_POWERPC
+      && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+      && GET_CODE (operands[0]) == MEM)
+    {
+      int regnum;
+
+      if (reload_in_progress || reload_completed)
+	regnum = true_regnum (operands[1]);
+      else if (GET_CODE (operands[1]) == REG)
+	regnum = REGNO (operands[1]);
+      else
+	regnum = -1;
+
+      /* If operands[1] is a register, on POWER it may have
+	 double-precision data in it, so truncate it to single
+	 precision.  */
+      if (FP_REGNO_P (regnum) || regnum >= FIRST_PSEUDO_REGISTER)
+	{
+	  rtx newreg;
+	  newreg = (!can_create_pseudo_p () ? copy_rtx (operands[1])
+		    : gen_reg_rtx (mode));
+	  emit_insn (gen_aux_truncdfsf2 (newreg, operands[1]));
+	  operands[1] = newreg;
+	}
+    }
+
+  /* Recognize the case where operand[1] is a reference to thread-local
+     data and load its address to a register.  */
+  if (rs6000_tls_referenced_p (operands[1]))
+    {
+      enum tls_model model;
+      rtx tmp = operands[1];
+      rtx addend = NULL;
+
+      if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
+	{
+          addend = XEXP (XEXP (tmp, 0), 1);
+	  tmp = XEXP (XEXP (tmp, 0), 0);
+	}
+
+      gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
+      model = SYMBOL_REF_TLS_MODEL (tmp);
+      gcc_assert (model != 0);
+
+      tmp = rs6000_legitimize_tls_address (tmp, model);
+      if (addend)
+	{
+	  tmp = gen_rtx_PLUS (mode, tmp, addend);
+	  tmp = force_operand (tmp, operands[0]);
+	}
+      operands[1] = tmp;
+    }
+
+  /* Handle the case where reload calls us with an invalid address.  */
+  if (reload_in_progress && mode == Pmode
+      && (! general_operand (operands[1], mode)
+	  || ! nonimmediate_operand (operands[0], mode)))
+    goto emit_set;
+
+  /* 128-bit constant floating-point values on Darwin should really be
+     loaded as two parts.  */
+  if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
+      && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
+			simplify_gen_subreg (DFmode, operands[1], mode, 0),
+			DFmode);
+      rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
+					     GET_MODE_SIZE (DFmode)),
+			simplify_gen_subreg (DFmode, operands[1], mode,
+					     GET_MODE_SIZE (DFmode)),
+			DFmode);
+      return;
+    }
+
+  if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
+    cfun->machine->sdmode_stack_slot =
+      eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
+
+  if (reload_in_progress
+      && mode == SDmode
+      && MEM_P (operands[0])
+      && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
+      && REG_P (operands[1]))
+    {
+      if (FP_REGNO_P (REGNO (operands[1])))
+	{
+	  rtx mem = adjust_address_nv (operands[0], DDmode, 0);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_store (mem, operands[1]));
+	}
+      else if (INT_REGNO_P (REGNO (operands[1])))
+	{
+	  rtx mem = adjust_address_nv (operands[0], mode, 4);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_hardfloat (mem, operands[1]));
+	}
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (reload_in_progress
+      && mode == SDmode
+      && REG_P (operands[0])
+      && MEM_P (operands[1])
+      && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
+    {
+      if (FP_REGNO_P (REGNO (operands[0])))
+	{
+	  rtx mem = adjust_address_nv (operands[1], DDmode, 0);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_load (operands[0], mem));
+	}
+      else if (INT_REGNO_P (REGNO (operands[0])))
+	{
+	  rtx mem = adjust_address_nv (operands[1], mode, 4);
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  emit_insn (gen_movsd_hardfloat (operands[0], mem));
+	}
+      else
+	gcc_unreachable();
+      return;
+    }
+
+  /* FIXME:  In the long term, this switch statement should go away
+     and be replaced by a sequence of tests based on things like
+     mode == Pmode.  */
+  switch (mode)
+    {
+    case HImode:
+    case QImode:
+      if (CONSTANT_P (operands[1])
+	  && GET_CODE (operands[1]) != CONST_INT)
+	operands[1] = force_const_mem (mode, operands[1]);
+      break;
+
+    case TFmode:
+    case TDmode:
+      rs6000_eliminate_indexed_memrefs (operands);
+      /* fall through */
+
+    case DFmode:
+    case DDmode:
+    case SFmode:
+    case SDmode:
+      if (CONSTANT_P (operands[1])
+	  && ! easy_fp_constant (operands[1], mode))
+	operands[1] = force_const_mem (mode, operands[1]);
+      break;
+
+    case V16QImode:
+    case V8HImode:
+    case V4SFmode:
+    case V4SImode:
+    case V4HImode:
+    case V2SFmode:
+    case V2SImode:
+    case V1DImode:
+    case V2DFmode:
+    case V2DImode:
+      if (CONSTANT_P (operands[1])
+	  && !easy_vector_constant (operands[1], mode))
+	operands[1] = force_const_mem (mode, operands[1]);
+      break;
+
+    case SImode:
+    case DImode:
+      /* Use default pattern for address of ELF small data */
+      if (TARGET_ELF
+	  && mode == Pmode
+	  && DEFAULT_ABI == ABI_V4
+	  && (GET_CODE (operands[1]) == SYMBOL_REF
+	      || GET_CODE (operands[1]) == CONST)
+	  && small_data_operand (operands[1], mode))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+	  return;
+	}
+
+      if (DEFAULT_ABI == ABI_V4
+	  && mode == Pmode && mode == SImode
+	  && flag_pic == 1 && got_operand (operands[1], mode))
+	{
+	  emit_insn (gen_movsi_got (operands[0], operands[1]));
+	  return;
+	}
+
+      if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
+	  && TARGET_NO_TOC
+	  && ! flag_pic
+	  && mode == Pmode
+	  && CONSTANT_P (operands[1])
+	  && GET_CODE (operands[1]) != HIGH
+	  && GET_CODE (operands[1]) != CONST_INT)
+	{
+	  rtx target = (!can_create_pseudo_p ()
+			? operands[0]
+			: gen_reg_rtx (mode));
+
+	  /* If this is a function address on -mcall-aixdesc,
+	     convert it to the address of the descriptor.  */
+	  if (DEFAULT_ABI == ABI_AIX
+	      && GET_CODE (operands[1]) == SYMBOL_REF
+	      && XSTR (operands[1], 0)[0] == '.')
+	    {
+	      const char *name = XSTR (operands[1], 0);
+	      rtx new_ref;
+	      while (*name == '.')
+		name++;
+	      new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
+	      CONSTANT_POOL_ADDRESS_P (new_ref)
+		= CONSTANT_POOL_ADDRESS_P (operands[1]);
+	      SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
+	      SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
+	      SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
+	      operands[1] = new_ref;
+	    }
+
+	  if (DEFAULT_ABI == ABI_DARWIN)
+	    {
+#if TARGET_MACHO
+	      if (MACHO_DYNAMIC_NO_PIC_P)
+		{
+		  /* Take care of any required data indirection.  */
+		  operands[1] = rs6000_machopic_legitimize_pic_address (
+				  operands[1], mode, operands[0]);
+		  if (operands[0] != operands[1])
+		    emit_insn (gen_rtx_SET (VOIDmode,
+					    operands[0], operands[1]));
+		  return;
+		}
+#endif
+	      emit_insn (gen_macho_high (target, operands[1]));
+	      emit_insn (gen_macho_low (operands[0], target, operands[1]));
+	      return;
+	    }
+
+	  emit_insn (gen_elf_high (target, operands[1]));
+	  emit_insn (gen_elf_low (operands[0], target, operands[1]));
+	  return;
+	}
+
+      /* If this is a SYMBOL_REF that refers to a constant pool entry,
+	 and we have put it in the TOC, we just need to make a TOC-relative
+	 reference to it.  */
+      if ((TARGET_TOC
+	   && GET_CODE (operands[1]) == SYMBOL_REF
+	   && constant_pool_expr_p (operands[1])
+	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (operands[1]),
+					       get_pool_mode (operands[1])))
+	  || (TARGET_CMODEL == CMODEL_MEDIUM
+	      && GET_CODE (operands[1]) == SYMBOL_REF
+	      && !CONSTANT_POOL_ADDRESS_P (operands[1])
+	      && SYMBOL_REF_LOCAL_P (operands[1])))
+	{
+	  rtx reg = NULL_RTX;
+	  if (TARGET_CMODEL != CMODEL_SMALL)
+	    {
+	      if (can_create_pseudo_p ())
+		reg = gen_reg_rtx (Pmode);
+	      else
+		reg = operands[0];
+	    }
+	  operands[1] = create_TOC_reference (operands[1], reg);
+	}
+      else if (mode == Pmode
+	       && CONSTANT_P (operands[1])
+	       && GET_CODE (operands[1]) != HIGH
+	       && !(TARGET_CMODEL != CMODEL_SMALL
+		    && GET_CODE (operands[1]) == CONST
+		    && GET_CODE (XEXP (operands[1], 0)) == PLUS
+		    && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == HIGH)
+	       && ((GET_CODE (operands[1]) != CONST_INT
+		    && ! easy_fp_constant (operands[1], mode))
+		   || (GET_CODE (operands[1]) == CONST_INT
+		       && (num_insns_constant (operands[1], mode)
+			   > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
+		   || (GET_CODE (operands[0]) == REG
+		       && FP_REGNO_P (REGNO (operands[0]))))
+	       && ! legitimate_constant_pool_address_p (operands[1], mode,
+							false)
+	       && ! toc_relative_expr_p (operands[1])
+	       && (TARGET_CMODEL == CMODEL_SMALL
+		   || can_create_pseudo_p ()
+		   || (REG_P (operands[0])
+		       && INT_REG_OK_FOR_BASE_P (operands[0], true))))
+	{
+
+#if TARGET_MACHO
+	  /* Darwin uses a special PIC legitimizer.  */
+	  if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
+	    {
+	      operands[1] =
+		rs6000_machopic_legitimize_pic_address (operands[1], mode,
+							operands[0]);
+	      if (operands[0] != operands[1])
+		emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+	      return;
+	    }
+#endif
+
+	  /* If we are to limit the number of things we put in the TOC and
+	     this is a symbol plus a constant we can add in one insn,
+	     just put the symbol in the TOC and add the constant.  Don't do
+	     this if reload is in progress.  */
+	  if (GET_CODE (operands[1]) == CONST
+	      && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
+	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	      && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
+	      && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
+		  || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
+	      && ! side_effects_p (operands[0]))
+	    {
+	      rtx sym =
+		force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
+	      rtx other = XEXP (XEXP (operands[1], 0), 1);
+
+	      sym = force_reg (mode, sym);
+	      emit_insn (gen_add3_insn (operands[0], sym, other));
+	      return;
+	    }
+
+	  operands[1] = force_const_mem (mode, operands[1]);
+
+	  if (TARGET_TOC
+	      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	      && constant_pool_expr_p (XEXP (operands[1], 0))
+	      && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
+			get_pool_constant (XEXP (operands[1], 0)),
+			get_pool_mode (XEXP (operands[1], 0))))
+	    {
+	      rtx tocref;
+	      rtx reg = NULL_RTX;
+	      if (TARGET_CMODEL != CMODEL_SMALL)
+		{
+		  if (can_create_pseudo_p ())
+		    reg = gen_reg_rtx (Pmode);
+		  else
+		    reg = operands[0];
+		}
+	      tocref = create_TOC_reference (XEXP (operands[1], 0), reg);
+	      operands[1] = gen_const_mem (mode, tocref);
+	      set_mem_alias_set (operands[1], get_TOC_alias_set ());
+	    }
+	}
+      break;
+
+    case TImode:
+      rs6000_eliminate_indexed_memrefs (operands);
+
+      if (TARGET_POWER)
+	{
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode,
+		       gen_rtvec (2,
+				  gen_rtx_SET (VOIDmode,
+					       operands[0], operands[1]),
+				  gen_rtx_CLOBBER (VOIDmode,
+						   gen_rtx_SCRATCH (SImode)))));
+	  return;
+	}
+      break;
+
+    default:
+      fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
+    }
+
+  /* Above, we may have called force_const_mem which may have returned
+     an invalid address.  If we can, fix this up; otherwise, reload will
+     have to deal with it.  */
+  if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
+    operands[1] = validize_mem (operands[1]);
+
+ emit_set:
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+}
+
+/* Nonzero if we can use a floating-point register to pass this arg.  */
+#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE)		\
+  (SCALAR_FLOAT_MODE_P (MODE)			\
+   && (CUM)->fregno <= FP_ARG_MAX_REG		\
+   && TARGET_HARD_FLOAT && TARGET_FPRS)
+
+/* Nonzero if we can use an AltiVec register to pass this arg.  */
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED)		\
+  (ALTIVEC_OR_VSX_VECTOR_MODE (MODE)				\
+   && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG			\
+   && TARGET_ALTIVEC_ABI					\
+   && (NAMED))
+
+/* Return a nonzero value to say to return the function value in
+   memory, just as large structures are always returned.  TYPE will be
+   the data type of the value, and FNTYPE will be the type of the
+   function doing the returning, or @code{NULL} for libcalls.
+
+   The AIX ABI for the RS/6000 specifies that all structures are
+   returned in memory.  The Darwin ABI does the same.
+   
+   For the Darwin 64 Bit ABI, a function result can be returned in
+   registers or in memory, depending on the size of the return data
+   type.  If it is returned in registers, the value occupies the same
+   registers as it would if it were the first and only function
+   argument.  Otherwise, the function places its result in memory at
+   the location pointed to by GPR3.
+   
+   The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4, 
+   but a draft put them in memory, and GCC used to implement the draft
+   instead of the final standard.  Therefore, aix_struct_return
+   controls this instead of DEFAULT_ABI; V.4 targets needing backward
+   compatibility can change DRAFT_V4_STRUCT_RET to override the
+   default, and -m switches get the final word.  See
+   rs6000_option_override_internal for more details.
+
+   The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
+   long double support is enabled.  These values are returned in memory.
+
+   int_size_in_bytes returns -1 for variable size objects, which go in
+   memory always.  The cast to unsigned makes -1 > 8.  */
+
+static bool
+rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* For the Darwin64 ABI, test if we can fit the return value in regs.  */
+  if (TARGET_MACHO
+      && rs6000_darwin64_abi
+      && TREE_CODE (type) == RECORD_TYPE
+      && int_size_in_bytes (type) > 0)
+    {
+      CUMULATIVE_ARGS valcum;
+      rtx valret;
+
+      valcum.words = 0;
+      valcum.fregno = FP_ARG_MIN_REG;
+      valcum.vregno = ALTIVEC_ARG_MIN_REG;
+      /* Do a trial code generation as if this were going to be passed
+	 as an argument; if any part goes in memory, we return NULL.  */
+      valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
+      if (valret)
+	return false;
+      /* Otherwise fall through to more conventional ABI rules.  */
+    }
+
+  if (AGGREGATE_TYPE_P (type)
+      && (aix_struct_return
+	  || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
+    return true;
+
+  /* Allow -maltivec -mabi=no-altivec without warning.  Altivec vector
+     modes only exist for GCC vector types if -maltivec.  */
+  if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
+      && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
+    return false;
+
+  /* Return synthetic vectors in memory.  */
+  if (TREE_CODE (type) == VECTOR_TYPE
+      && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
+    {
+      static bool warned_for_return_big_vectors = false;
+      if (!warned_for_return_big_vectors)
+	{
+	  warning (0, "GCC vector returned by reference: "
+		   "non-standard ABI extension with no compatibility guarantee");
+	  warned_for_return_big_vectors = true;
+	}
+      return true;
+    }
+
+  if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
+    return true;
+
+  return false;
+}
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+/* Return TRUE if a call to function FNDECL may be one that
+   potentially affects the function calling ABI of the object file.  */
+
+static bool
+call_ABI_of_interest (tree fndecl)
+{
+  if (cgraph_state == CGRAPH_STATE_EXPANSION)
+    {
+      struct cgraph_node *c_node;
+
+      /* Libcalls are always interesting.  */
+      if (fndecl == NULL_TREE)
+	return true;
+
+      /* Any call to an external function is interesting.  */
+      if (DECL_EXTERNAL (fndecl))
+	return true;
+
+      /* Interesting functions that we are emitting in this object file.  */
+      c_node = cgraph_node (fndecl);
+      return !cgraph_only_called_directly_p (c_node);
+    }
+  return false;
+}
+#endif
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
+
+   For incoming args we set the number of arguments in the prototype large
+   so we never return a PARALLEL.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED, int incoming,
+		      int libcall, int n_named_args,
+		      tree fndecl ATTRIBUTE_UNUSED,
+		      enum machine_mode return_mode ATTRIBUTE_UNUSED)
+{
+  static CUMULATIVE_ARGS zero_cumulative;
+
+  *cum = zero_cumulative;
+  cum->words = 0;
+  cum->fregno = FP_ARG_MIN_REG;
+  cum->vregno = ALTIVEC_ARG_MIN_REG;
+  cum->prototype = (fntype && prototype_p (fntype));
+  cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
+		      ? CALL_LIBCALL : CALL_NORMAL);
+  cum->sysv_gregno = GP_ARG_MIN_REG;
+  cum->stdarg = stdarg_p (fntype);
+
+  cum->nargs_prototype = 0;
+  if (incoming || cum->prototype)
+    cum->nargs_prototype = n_named_args;
+
+  /* Check for a longcall attribute.  */
+  if ((!fntype && rs6000_default_long_calls)
+      || (fntype
+	  && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
+	  && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
+    cum->call_cookie |= CALL_LONG;
+
+  if (TARGET_DEBUG_ARG)
+    {
+      fprintf (stderr, "\ninit_cumulative_args:");
+      if (fntype)
+	{
+	  tree ret_type = TREE_TYPE (fntype);
+	  fprintf (stderr, " ret code = %s,",
+		   tree_code_name[ (int)TREE_CODE (ret_type) ]);
+	}
+
+      if (cum->call_cookie & CALL_LONG)
+	fprintf (stderr, " longcall,");
+
+      fprintf (stderr, " proto = %d, nargs = %d\n",
+	       cum->prototype, cum->nargs_prototype);
+    }
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  if (DEFAULT_ABI == ABI_V4)
+    {
+      cum->escapes = call_ABI_of_interest (fndecl);
+      if (cum->escapes)
+	{
+	  tree return_type;
+
+	  if (fntype)
+	    {
+	      return_type = TREE_TYPE (fntype);
+	      return_mode = TYPE_MODE (return_type);
+	    }
+	  else
+	    return_type = lang_hooks.types.type_for_mode (return_mode, 0);
+
+	  if (return_type != NULL)
+	    {
+	      if (TREE_CODE (return_type) == RECORD_TYPE
+		  && TYPE_TRANSPARENT_AGGR (return_type))
+		{
+		  return_type = TREE_TYPE (first_field (return_type));
+		  return_mode = TYPE_MODE (return_type);
+		}
+	      if (AGGREGATE_TYPE_P (return_type)
+		  && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
+		      <= 8))
+		rs6000_returns_struct = true;
+	    }
+	  if (SCALAR_FLOAT_MODE_P (return_mode))
+	    rs6000_passes_float = true;
+	  else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
+		   || SPE_VECTOR_MODE (return_mode))
+	    rs6000_passes_vector = true;
+	}
+    }
+#endif
+
+  if (fntype
+      && !TARGET_ALTIVEC
+      && TARGET_ALTIVEC_ABI
+      && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
+    {
+      error ("cannot return value in vector register because"
+	     " altivec instructions are disabled, use -maltivec"
+	     " to enable them");
+    }
+}
+
+/* Return true if TYPE must be passed on the stack and not in registers.  */
+
+static bool
+rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+  if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
+    return must_pass_in_stack_var_size (mode, type);
+  else
+    return must_pass_in_stack_var_size_or_pad (mode, type);
+}
+
+/* If defined, a C expression which determines whether, and in which
+   direction, to pad out an argument with extra space.  The value
+   should be of type `enum direction': either `upward' to pad above
+   the argument, `downward' to pad below, or `none' to inhibit
+   padding.
+
+   For the AIX ABI structs are always stored left shifted in their
+   argument slot.  */
+
+enum direction
+function_arg_padding (enum machine_mode mode, const_tree type)
+{
+#ifndef AGGREGATE_PADDING_FIXED
+#define AGGREGATE_PADDING_FIXED 0
+#endif
+#ifndef AGGREGATES_PAD_UPWARD_ALWAYS
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+#endif
+
+  if (!AGGREGATE_PADDING_FIXED)
+    {
+      /* GCC used to pass structures of the same size as integer types as
+	 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
+	 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
+	 passed padded downward, except that -mstrict-align further
+	 muddied the water in that multi-component structures of 2 and 4
+	 bytes in size were passed padded upward.
+
+	 The following arranges for best compatibility with previous
+	 versions of gcc, but removes the -mstrict-align dependency.  */
+      if (BYTES_BIG_ENDIAN)
+	{
+	  HOST_WIDE_INT size = 0;
+
+	  if (mode == BLKmode)
+	    {
+	      if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
+		size = int_size_in_bytes (type);
+	    }
+	  else
+	    size = GET_MODE_SIZE (mode);
+
+	  if (size == 1 || size == 2 || size == 4)
+	    return downward;
+	}
+      return upward;
+    }
+
+  if (AGGREGATES_PAD_UPWARD_ALWAYS)
+    {
+      if (type != 0 && AGGREGATE_TYPE_P (type))
+	return upward;
+    }
+
+  /* Fall back to the default.  */
+  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+/* If defined, a C expression that gives the alignment boundary, in bits,
+   of an argument with the specified mode and type.  If it is not defined,
+   PARM_BOUNDARY is used for all arguments.
+
+   V.4 wants long longs and doubles to be double word aligned.  Just
+   testing the mode size is a boneheaded way to do this as it means
+   that other types such as complex int are also double word aligned.
+   However, we're stuck with this because changing the ABI might break
+   existing library interfaces.
+
+   Doubleword align SPE vectors.
+   Quadword align Altivec/VSX vectors.
+   Quadword align large synthetic vector types.   */
+
+static unsigned int
+rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  if (DEFAULT_ABI == ABI_V4
+      && (GET_MODE_SIZE (mode) == 8
+	  || (TARGET_HARD_FLOAT
+	      && TARGET_FPRS
+	      && (mode == TFmode || mode == TDmode))))
+    return 64;
+  else if (SPE_VECTOR_MODE (mode)
+	   || (type && TREE_CODE (type) == VECTOR_TYPE
+	       && int_size_in_bytes (type) >= 8
+	       && int_size_in_bytes (type) < 16))
+    return 64;
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	   || (type && TREE_CODE (type) == VECTOR_TYPE
+	       && int_size_in_bytes (type) >= 16))
+    return 128;
+  else if (TARGET_MACHO
+ 	   && rs6000_darwin64_abi
+ 	   && mode == BLKmode
+	   && type && TYPE_ALIGN (type) > 64)
+    return 128;
+  else
+    return PARM_BOUNDARY;
+}
+
+/* For a function parm of MODE and TYPE, return the starting word in
+   the parameter area.  NWORDS of the parameter area are already used.  */
+
+static unsigned int
+rs6000_parm_start (enum machine_mode mode, const_tree type,
+		   unsigned int nwords)
+{
+  unsigned int align;
+  unsigned int parm_offset;
+
+  align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
+  parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
+  return nwords + (-(parm_offset + nwords) & align);
+}
+
+/* Compute the size (in words) of a function argument.  */
+
+static unsigned long
+rs6000_arg_size (enum machine_mode mode, const_tree type)
+{
+  unsigned long size;
+
+  if (mode != BLKmode)
+    size = GET_MODE_SIZE (mode);
+  else
+    size = int_size_in_bytes (type);
+
+  if (TARGET_32BIT)
+    return (size + 3) >> 2;
+  else
+    return (size + 7) >> 3;
+}
+
+/* Use this to flush pending int fields.  */
+
+static void
+rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
+					  HOST_WIDE_INT bitpos, int final)
+{
+  unsigned int startbit, endbit;
+  int intregs, intoffset;
+  enum machine_mode mode;
+
+  /* Handle the situations where a float is taking up the first half
+     of the GPR, and the other half is empty (typically due to
+     alignment restrictions). We can detect this by a 8-byte-aligned
+     int field, or by seeing that this is the final flush for this
+     argument. Count the word and continue on.  */
+  if (cum->floats_in_gpr == 1
+      && (cum->intoffset % 64 == 0
+	  || (cum->intoffset == -1 && final)))
+    {
+      cum->words++;
+      cum->floats_in_gpr = 0;
+    }
+
+  if (cum->intoffset == -1)
+    return;
+
+  intoffset = cum->intoffset;
+  cum->intoffset = -1;
+  cum->floats_in_gpr = 0;
+
+  if (intoffset % BITS_PER_WORD != 0)
+    {
+      mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+			    MODE_INT, 0);
+      if (mode == BLKmode)
+	{
+	  /* We couldn't find an appropriate mode, which happens,
+	     e.g., in packed structs when there are 3 bytes to load.
+	     Back intoffset back to the beginning of the word in this
+	     case.  */
+	  intoffset = intoffset & -BITS_PER_WORD;
+	}
+    }
+
+  startbit = intoffset & -BITS_PER_WORD;
+  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+  intregs = (endbit - startbit) / BITS_PER_WORD;
+  cum->words += intregs;
+  /* words should be unsigned. */
+  if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
+    {
+      int pad = (endbit/BITS_PER_WORD) - cum->words;
+      cum->words += pad;
+    }
+}
+
+/* The darwin64 ABI calls for us to recurse down through structs,
+   looking for elements passed in registers.  Unfortunately, we have
+   to track int register count here also because of misalignments
+   in powerpc alignment mode.  */
+
+static void
+rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
+					    const_tree type,
+					    HOST_WIDE_INT startbitpos)
+{
+  tree f;
+
+  for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+    if (TREE_CODE (f) == FIELD_DECL)
+      {
+	HOST_WIDE_INT bitpos = startbitpos;
+	tree ftype = TREE_TYPE (f);
+	enum machine_mode mode;
+	if (ftype == error_mark_node)
+	  continue;
+	mode = TYPE_MODE (ftype);
+
+	if (DECL_SIZE (f) != 0
+	    && host_integerp (bit_position (f), 1))
+	  bitpos += int_bit_position (f);
+
+	/* ??? FIXME: else assume zero offset.  */
+
+	if (TREE_CODE (ftype) == RECORD_TYPE)
+	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
+	else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
+	  {
+	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
+	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
+	    cum->fregno += n_fpregs;
+	    /* Single-precision floats present a special problem for
+	       us, because they are smaller than an 8-byte GPR, and so
+	       the structure-packing rules combined with the standard
+	       varargs behavior mean that we want to pack float/float
+	       and float/int combinations into a single register's
+	       space. This is complicated by the arg advance flushing,
+	       which works on arbitrarily large groups of int-type
+	       fields.  */
+	    if (mode == SFmode)
+	      {
+		if (cum->floats_in_gpr == 1)
+		  {
+		    /* Two floats in a word; count the word and reset
+		       the float count.  */
+		    cum->words++;
+		    cum->floats_in_gpr = 0;
+		  }
+		else if (bitpos % 64 == 0)
+		  {
+		    /* A float at the beginning of an 8-byte word;
+		       count it and put off adjusting cum->words until
+		       we see if a arg advance flush is going to do it
+		       for us.  */
+		    cum->floats_in_gpr++;
+		  }
+		else
+		  {
+		    /* The float is at the end of a word, preceded
+		       by integer fields, so the arg advance flush
+		       just above has already set cum->words and
+		       everything is taken care of.  */
+		  }
+	      }
+	    else
+	      cum->words += n_fpregs;
+	  }
+	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
+	  {
+	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
+	    cum->vregno++;
+	    cum->words += 2;
+	  }
+	else if (cum->intoffset == -1)
+	  cum->intoffset = bitpos;
+      }
+}
+
+/* Check for an item that needs to be considered specially under the darwin 64
+   bit ABI.  These are record types where the mode is BLK or the structure is
+   8 bytes in size.  */
+static int
+rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
+{
+  return rs6000_darwin64_abi
+	 && ((mode == BLKmode 
+	      && TREE_CODE (type) == RECORD_TYPE 
+	      && int_size_in_bytes (type) > 0)
+	  || (type && TREE_CODE (type) == RECORD_TYPE 
+	      && int_size_in_bytes (type) == 8)) ? 1 : 0;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)
+
+   Note that for args passed by reference, function_arg will be called
+   with MODE and TYPE set to that of the pointer to the arg, not the arg
+   itself.  */
+
+static void
+rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			       const_tree type, bool named, int depth)
+{
+  /* Only tick off an argument if we're not recursing.  */
+  if (depth == 0)
+    cum->nargs_prototype--;
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  if (DEFAULT_ABI == ABI_V4
+      && cum->escapes)
+    {
+      if (SCALAR_FLOAT_MODE_P (mode))
+	rs6000_passes_float = true;
+      else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
+	rs6000_passes_vector = true;
+      else if (SPE_VECTOR_MODE (mode)
+	       && !cum->stdarg
+	       && cum->sysv_gregno <= GP_ARG_MAX_REG)
+	rs6000_passes_vector = true;
+    }
+#endif
+
+  if (TARGET_ALTIVEC_ABI
+      && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	  || (type && TREE_CODE (type) == VECTOR_TYPE
+	      && int_size_in_bytes (type) == 16)))
+    {
+      bool stack = false;
+
+      if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
+	{
+	  cum->vregno++;
+	  if (!TARGET_ALTIVEC)
+	    error ("cannot pass argument in vector register because"
+		   " altivec instructions are disabled, use -maltivec"
+		   " to enable them");
+
+	  /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
+	     even if it is going to be passed in a vector register.
+	     Darwin does the same for variable-argument functions.  */
+	  if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	      || (cum->stdarg && DEFAULT_ABI != ABI_V4))
+	    stack = true;
+	}
+      else
+	stack = true;
+
+      if (stack)
+	{
+	  int align;
+
+	  /* Vector parameters must be 16-byte aligned.  This places
+	     them at 2 mod 4 in terms of words in 32-bit mode, since
+	     the parameter save area starts at offset 24 from the
+	     stack.  In 64-bit mode, they just have to start on an
+	     even word, since the parameter save area is 16-byte
+	     aligned.  Space for GPRs is reserved even if the argument
+	     will be passed in memory.  */
+	  if (TARGET_32BIT)
+	    align = (2 - cum->words) & 3;
+	  else
+	    align = cum->words & 1;
+	  cum->words += align + rs6000_arg_size (mode, type);
+
+	  if (TARGET_DEBUG_ARG)
+	    {
+	      fprintf (stderr, "function_adv: words = %2d, align=%d, ",
+		       cum->words, align);
+	      fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
+		       cum->nargs_prototype, cum->prototype,
+		       GET_MODE_NAME (mode));
+	    }
+	}
+    }
+  else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
+	   && !cum->stdarg
+	   && cum->sysv_gregno <= GP_ARG_MAX_REG)
+    cum->sysv_gregno++;
+
+  else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
+    {
+      int size = int_size_in_bytes (type);
+      /* Variable sized types have size == -1 and are
+	 treated as if consisting entirely of ints.
+	 Pad to 16 byte boundary if needed.  */
+      if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
+	  && (cum->words % 2) != 0)
+	cum->words++;
+      /* For varargs, we can just go up by the size of the struct. */
+      if (!named)
+	cum->words += (size + 7) / 8;
+      else
+	{
+	  /* It is tempting to say int register count just goes up by
+	     sizeof(type)/8, but this is wrong in a case such as
+	     { int; double; int; } [powerpc alignment].  We have to
+	     grovel through the fields for these too.  */
+	  cum->intoffset = 0;
+	  cum->floats_in_gpr = 0;
+	  rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
+	  rs6000_darwin64_record_arg_advance_flush (cum,
+						    size * BITS_PER_UNIT, 1);
+	}
+	  if (TARGET_DEBUG_ARG)
+	    {
+	      fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
+		       cum->words, TYPE_ALIGN (type), size);
+	      fprintf (stderr, 
+	           "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
+		       cum->nargs_prototype, cum->prototype,
+		       GET_MODE_NAME (mode));
+	    }
+    }
+  else if (DEFAULT_ABI == ABI_V4)
+    {
+      if (TARGET_HARD_FLOAT && TARGET_FPRS
+	  && ((TARGET_SINGLE_FLOAT && mode == SFmode)
+	      || (TARGET_DOUBLE_FLOAT && mode == DFmode)
+	      || (mode == TFmode && !TARGET_IEEEQUAD)
+	      || mode == SDmode || mode == DDmode || mode == TDmode))
+	{
+	  /* _Decimal128 must use an even/odd register pair.  This assumes
+	     that the register number is odd when fregno is odd.  */
+	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	    cum->fregno++;
+
+	  if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
+	      <= FP_ARG_V4_MAX_REG)
+	    cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	  else
+	    {
+	      cum->fregno = FP_ARG_V4_MAX_REG + 1;
+	      if (mode == DFmode || mode == TFmode
+		  || mode == DDmode || mode == TDmode)
+		cum->words += cum->words & 1;
+	      cum->words += rs6000_arg_size (mode, type);
+	    }
+	}
+      else
+	{
+	  int n_words = rs6000_arg_size (mode, type);
+	  int gregno = cum->sysv_gregno;
+
+	  /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
+	     (r7,r8) or (r9,r10).  As does any other 2 word item such
+	     as complex int due to a historical mistake.  */
+	  if (n_words == 2)
+	    gregno += (1 - gregno) & 1;
+
+	  /* Multi-reg args are not split between registers and stack.  */
+	  if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+	    {
+	      /* Long long and SPE vectors are aligned on the stack.
+		 So are other 2 word items such as complex int due to
+		 a historical mistake.  */
+	      if (n_words == 2)
+		cum->words += cum->words & 1;
+	      cum->words += n_words;
+	    }
+
+	  /* Note: continuing to accumulate gregno past when we've started
+	     spilling to the stack indicates the fact that we've started
+	     spilling to the stack to expand_builtin_saveregs.  */
+	  cum->sysv_gregno = gregno + n_words;
+	}
+
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
+		   cum->words, cum->fregno);
+	  fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
+		   cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
+	  fprintf (stderr, "mode = %4s, named = %d\n",
+		   GET_MODE_NAME (mode), named);
+	}
+    }
+  else
+    {
+      int n_words = rs6000_arg_size (mode, type);
+      int start_words = cum->words;
+      int align_words = rs6000_parm_start (mode, type, start_words);
+
+      cum->words = align_words + n_words;
+
+      if (SCALAR_FLOAT_MODE_P (mode)
+	  && TARGET_HARD_FLOAT && TARGET_FPRS)
+	{
+	  /* _Decimal128 must be passed in an even/odd float register pair.
+	     This assumes that the register number is odd when fregno is
+	     odd.  */
+	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	    cum->fregno++;
+	  cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	}
+
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
+		   cum->words, cum->fregno);
+	  fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
+		   cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
+	  fprintf (stderr, "named = %d, align = %d, depth = %d\n",
+		   named, align_words - start_words, depth);
+	}
+    }
+}
+
+static void
+rs6000_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     const_tree type, bool named)
+{
+  rs6000_function_arg_advance_1 (cum, mode, type, named, 0);
+}
+
+static rtx
+spe_build_register_parallel (enum machine_mode mode, int gregno)
+{
+  rtx r1, r3, r5, r7;
+
+  switch (mode)
+    {
+    case DFmode:
+      r1 = gen_rtx_REG (DImode, gregno);
+      r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+      return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
+
+    case DCmode:
+    case TFmode:
+      r1 = gen_rtx_REG (DImode, gregno);
+      r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+      r3 = gen_rtx_REG (DImode, gregno + 2);
+      r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
+      return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
+
+    case TCmode:
+      r1 = gen_rtx_REG (DImode, gregno);
+      r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+      r3 = gen_rtx_REG (DImode, gregno + 2);
+      r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
+      r5 = gen_rtx_REG (DImode, gregno + 4);
+      r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
+      r7 = gen_rtx_REG (DImode, gregno + 6);
+      r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
+      return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Determine where to put a SIMD argument on the SPE.  */
+static rtx
+rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 const_tree type)
+{
+  int gregno = cum->sysv_gregno;
+
+  /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
+     are passed and returned in a pair of GPRs for ABI compatibility.  */
+  if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+			     || mode == DCmode || mode == TCmode))
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      /* Doubles go in an odd/even register pair (r5/r6, etc).  */
+      if (mode == DFmode)
+	gregno += (1 - gregno) & 1;
+
+      /* Multi-reg args are not split between registers and stack.  */
+      if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+	return NULL_RTX;
+
+      return spe_build_register_parallel (mode, gregno);
+    }
+  if (cum->stdarg)
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      /* SPE vectors are put in odd registers.  */
+      if (n_words == 2 && (gregno & 1) == 0)
+	gregno += 1;
+
+      if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
+	{
+	  rtx r1, r2;
+	  enum machine_mode m = SImode;
+
+	  r1 = gen_rtx_REG (m, gregno);
+	  r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
+	  r2 = gen_rtx_REG (m, gregno + 1);
+	  r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
+	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
+	}
+      else
+	return NULL_RTX;
+    }
+  else
+    {
+      if (gregno <= GP_ARG_MAX_REG)
+	return gen_rtx_REG (mode, gregno);
+      else
+	return NULL_RTX;
+    }
+}
+
+/* A subroutine of rs6000_darwin64_record_arg.  Assign the bits of the
+   structure between cum->intoffset and bitpos to integer registers.  */
+
+static void
+rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
+				  HOST_WIDE_INT bitpos, rtx rvec[], int *k)
+{
+  enum machine_mode mode;
+  unsigned int regno;
+  unsigned int startbit, endbit;
+  int this_regno, intregs, intoffset;
+  rtx reg;
+
+  if (cum->intoffset == -1)
+    return;
+
+  intoffset = cum->intoffset;
+  cum->intoffset = -1;
+
+  /* If this is the trailing part of a word, try to only load that
+     much into the register.  Otherwise load the whole register.  Note
+     that in the latter case we may pick up unwanted bits.  It's not a
+     problem at the moment but may wish to revisit.  */
+
+  if (intoffset % BITS_PER_WORD != 0)
+    {
+      mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+			  MODE_INT, 0);
+      if (mode == BLKmode)
+	{
+	  /* We couldn't find an appropriate mode, which happens,
+	     e.g., in packed structs when there are 3 bytes to load.
+	     Back intoffset back to the beginning of the word in this
+	     case.  */
+	 intoffset = intoffset & -BITS_PER_WORD;
+	 mode = word_mode;
+	}
+    }
+  else
+    mode = word_mode;
+
+  startbit = intoffset & -BITS_PER_WORD;
+  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+  intregs = (endbit - startbit) / BITS_PER_WORD;
+  this_regno = cum->words + intoffset / BITS_PER_WORD;
+
+  if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
+    cum->use_stack = 1;
+
+  intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
+  if (intregs <= 0)
+    return;
+
+  intoffset /= BITS_PER_UNIT;
+  do
+    {
+      regno = GP_ARG_MIN_REG + this_regno;
+      reg = gen_rtx_REG (mode, regno);
+      rvec[(*k)++] =
+	gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
+
+      this_regno += 1;
+      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
+      mode = word_mode;
+      intregs -= 1;
+    }
+  while (intregs > 0);
+}
+
+/* Recursive workhorse for the following.  */
+
+static void
+rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
+				    HOST_WIDE_INT startbitpos, rtx rvec[],
+				    int *k)
+{
+  tree f;
+
+  for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
+    if (TREE_CODE (f) == FIELD_DECL)
+      {
+	HOST_WIDE_INT bitpos = startbitpos;
+	tree ftype = TREE_TYPE (f);
+	enum machine_mode mode;
+	if (ftype == error_mark_node)
+	  continue;
+	mode = TYPE_MODE (ftype);
+
+	if (DECL_SIZE (f) != 0
+	    && host_integerp (bit_position (f), 1))
+	  bitpos += int_bit_position (f);
+
+	/* ??? FIXME: else assume zero offset.  */
+
+	if (TREE_CODE (ftype) == RECORD_TYPE)
+	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
+	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
+	  {
+	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+#if 0
+	    switch (mode)
+	      {
+	      case SCmode: mode = SFmode; break;
+	      case DCmode: mode = DFmode; break;
+	      case TCmode: mode = TFmode; break;
+	      default: break;
+	      }
+#endif
+	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
+	    if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
+	      {
+		gcc_assert (cum->fregno == FP_ARG_MAX_REG
+			    && (mode == TFmode || mode == TDmode));
+		/* Long double or _Decimal128 split over regs and memory.  */
+		mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
+		cum->use_stack=1;
+	      }
+	    rvec[(*k)++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode, cum->fregno++),
+				   GEN_INT (bitpos / BITS_PER_UNIT));
+	    if (mode == TFmode || mode == TDmode)
+	      cum->fregno++;
+	  }
+	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
+	  {
+	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
+	    rvec[(*k)++]
+	      = gen_rtx_EXPR_LIST (VOIDmode,
+				   gen_rtx_REG (mode, cum->vregno++),
+				   GEN_INT (bitpos / BITS_PER_UNIT));
+	  }
+	else if (cum->intoffset == -1)
+	  cum->intoffset = bitpos;
+      }
+}
+
+/* For the darwin64 ABI, we want to construct a PARALLEL consisting of
+   the register(s) to be used for each field and subfield of a struct
+   being passed by value, along with the offset of where the
+   register's value may be found in the block.  FP fields go in FP
+   register, vector fields go in vector registers, and everything
+   else goes in int registers, packed as in memory.
+
+   This code is also used for function return values.  RETVAL indicates
+   whether this is the case.
+
+   Much of this is taken from the SPARC V9 port, which has a similar
+   calling convention.  */
+
+static rtx
+rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
+			    bool named, bool retval)
+{
+  rtx rvec[FIRST_PSEUDO_REGISTER];
+  int k = 1, kbase = 1;
+  HOST_WIDE_INT typesize = int_size_in_bytes (type);
+  /* This is a copy; modifications are not visible to our caller.  */
+  CUMULATIVE_ARGS copy_cum = *orig_cum;
+  CUMULATIVE_ARGS *cum = &copy_cum;
+
+  /* Pad to 16 byte boundary if needed.  */
+  if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
+      && (cum->words % 2) != 0)
+    cum->words++;
+
+  cum->intoffset = 0;
+  cum->use_stack = 0;
+  cum->named = named;
+
+  /* Put entries into rvec[] for individual FP and vector fields, and
+     for the chunks of memory that go in int regs.  Note we start at
+     element 1; 0 is reserved for an indication of using memory, and
+     may or may not be filled in below. */
+  rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
+  rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
+
+  /* If any part of the struct went on the stack put all of it there.
+     This hack is because the generic code for
+     FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
+     parts of the struct are not at the beginning.  */
+  if (cum->use_stack)
+    {
+      if (retval)
+	return NULL_RTX;    /* doesn't go in registers at all */
+      kbase = 0;
+      rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+    }
+  if (k > 1 || cum->use_stack)
+    return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
+  else
+    return NULL_RTX;
+}
+
+/* Determine where to place an argument in 64-bit mode with 32-bit ABI.  */
+
+static rtx
+rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
+			   int align_words)
+{
+  int n_units;
+  int i, k;
+  rtx rvec[GP_ARG_NUM_REG + 1];
+
+  if (align_words >= GP_ARG_NUM_REG)
+    return NULL_RTX;
+
+  n_units = rs6000_arg_size (mode, type);
+
+  /* Optimize the simple case where the arg fits in one gpr, except in
+     the case of BLKmode due to assign_parms assuming that registers are
+     BITS_PER_WORD wide.  */
+  if (n_units == 0
+      || (n_units == 1 && mode != BLKmode))
+    return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+
+  k = 0;
+  if (align_words + n_units > GP_ARG_NUM_REG)
+    /* Not all of the arg fits in gprs.  Say that it goes in memory too,
+       using a magic NULL_RTX component.
+       This is not strictly correct.  Only some of the arg belongs in
+       memory, not all of it.  However, the normal scheme using
+       function_arg_partial_nregs can result in unusual subregs, eg.
+       (subreg:SI (reg:DF) 4), which are not handled well.  The code to
+       store the whole arg to memory is often more efficient than code
+       to store pieces, and we know that space is available in the right
+       place for the whole arg.  */
+    rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+
+  i = 0;
+  do
+    {
+      rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
+      rtx off = GEN_INT (i++ * 4);
+      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+    }
+  while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
+
+  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.  It is
+    not modified in this routine.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On RS/6000 the first eight words of non-FP are normally in registers
+   and the rest are pushed.  Under AIX, the first 13 FP args are in registers.
+   Under V.4, the first 8 FP args are in registers.
+
+   If this is floating-point and no prototype is specified, we use
+   both an FP and integer register (or possibly FP reg and stack).  Library
+   functions (when CALL_LIBCALL is set) always have the proper types for args,
+   so we can pass the FP value just in one register.  emit_library_function
+   doesn't support PARALLEL anyway.
+
+   Note that for args passed by reference, function_arg will be called
+   with MODE and TYPE set to that of the pointer to the arg, not the arg
+   itself.  */
+
+static rtx
+rs6000_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		     const_tree type, bool named)
+{
+  enum rs6000_abi abi = DEFAULT_ABI;
+
+  /* Return a marker to indicate whether CR1 needs to set or clear the
+     bit that V.4 uses to say fp args were passed in registers.
+     Assume that we don't need the marker for software floating point,
+     or compiler generated library calls.  */
+  if (mode == VOIDmode)
+    {
+      if (abi == ABI_V4
+	  && (cum->call_cookie & CALL_LIBCALL) == 0
+	  && (cum->stdarg
+	      || (cum->nargs_prototype < 0
+		  && (cum->prototype || TARGET_NO_PROTOTYPE))))
+	{
+	  /* For the SPE, we need to crxor CR6 always.  */
+	  if (TARGET_SPE_ABI)
+	    return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
+	  else if (TARGET_HARD_FLOAT && TARGET_FPRS)
+	    return GEN_INT (cum->call_cookie
+			    | ((cum->fregno == FP_ARG_MIN_REG)
+			       ? CALL_V4_SET_FP_ARGS
+			       : CALL_V4_CLEAR_FP_ARGS));
+	}
+
+      return GEN_INT (cum->call_cookie);
+    }
+
+  if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
+    {
+      rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
+      if (rslt != NULL_RTX)
+	return rslt;
+      /* Else fall through to usual handling.  */
+    }
+
+  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
+    if (TARGET_64BIT && ! cum->prototype)
+      {
+	/* Vector parameters get passed in vector register
+	   and also in GPRs or memory, in absence of prototype.  */
+	int align_words;
+	rtx slot;
+	align_words = (cum->words + 1) & ~1;
+
+	if (align_words >= GP_ARG_NUM_REG)
+	  {
+	    slot = NULL_RTX;
+	  }
+	else
+	  {
+	    slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	  }
+	return gen_rtx_PARALLEL (mode,
+		 gen_rtvec (2,
+			    gen_rtx_EXPR_LIST (VOIDmode,
+					       slot, const0_rtx),
+			    gen_rtx_EXPR_LIST (VOIDmode,
+					       gen_rtx_REG (mode, cum->vregno),
+					       const0_rtx)));
+      }
+    else
+      return gen_rtx_REG (mode, cum->vregno);
+  else if (TARGET_ALTIVEC_ABI
+	   && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	       || (type && TREE_CODE (type) == VECTOR_TYPE
+		   && int_size_in_bytes (type) == 16)))
+    {
+      if (named || abi == ABI_V4)
+	return NULL_RTX;
+      else
+	{
+	  /* Vector parameters to varargs functions under AIX or Darwin
+	     get passed in memory and possibly also in GPRs.  */
+	  int align, align_words, n_words;
+	  enum machine_mode part_mode;
+
+	  /* Vector parameters must be 16-byte aligned.  This places them at
+	     2 mod 4 in terms of words in 32-bit mode, since the parameter
+	     save area starts at offset 24 from the stack.  In 64-bit mode,
+	     they just have to start on an even word, since the parameter
+	     save area is 16-byte aligned.  */
+	  if (TARGET_32BIT)
+	    align = (2 - cum->words) & 3;
+	  else
+	    align = cum->words & 1;
+	  align_words = cum->words + align;
+
+	  /* Out of registers?  Memory, then.  */
+	  if (align_words >= GP_ARG_NUM_REG)
+	    return NULL_RTX;
+
+	  if (TARGET_32BIT && TARGET_POWERPC64)
+	    return rs6000_mixed_function_arg (mode, type, align_words);
+
+	  /* The vector value goes in GPRs.  Only the part of the
+	     value in GPRs is reported here.  */
+	  part_mode = mode;
+	  n_words = rs6000_arg_size (mode, type);
+	  if (align_words + n_words > GP_ARG_NUM_REG)
+	    /* Fortunately, there are only two possibilities, the value
+	       is either wholly in GPRs or half in GPRs and half not.  */
+	    part_mode = DImode;
+
+	  return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
+	}
+    }
+  else if (TARGET_SPE_ABI && TARGET_SPE
+	   && (SPE_VECTOR_MODE (mode)
+	       || (TARGET_E500_DOUBLE && (mode == DFmode
+					  || mode == DCmode
+					  || mode == TFmode
+					  || mode == TCmode))))
+    return rs6000_spe_function_arg (cum, mode, type);
+
+  else if (abi == ABI_V4)
+    {
+      if (TARGET_HARD_FLOAT && TARGET_FPRS
+	  && ((TARGET_SINGLE_FLOAT && mode == SFmode)
+	      || (TARGET_DOUBLE_FLOAT && mode == DFmode)
+	      || (mode == TFmode && !TARGET_IEEEQUAD)
+	      || mode == SDmode || mode == DDmode || mode == TDmode))
+	{
+	  /* _Decimal128 must use an even/odd register pair.  This assumes
+	     that the register number is odd when fregno is odd.  */
+	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	    cum->fregno++;
+
+	  if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
+	      <= FP_ARG_V4_MAX_REG)
+	    return gen_rtx_REG (mode, cum->fregno);
+	  else
+	    return NULL_RTX;
+	}
+      else
+	{
+	  int n_words = rs6000_arg_size (mode, type);
+	  int gregno = cum->sysv_gregno;
+
+	  /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
+	     (r7,r8) or (r9,r10).  As does any other 2 word item such
+	     as complex int due to a historical mistake.  */
+	  if (n_words == 2)
+	    gregno += (1 - gregno) & 1;
+
+	  /* Multi-reg args are not split between registers and stack.  */
+	  if (gregno + n_words - 1 > GP_ARG_MAX_REG)
+	    return NULL_RTX;
+
+	  if (TARGET_32BIT && TARGET_POWERPC64)
+	    return rs6000_mixed_function_arg (mode, type,
+					      gregno - GP_ARG_MIN_REG);
+	  return gen_rtx_REG (mode, gregno);
+	}
+    }
+  else
+    {
+      int align_words = rs6000_parm_start (mode, type, cum->words);
+
+      /* _Decimal128 must be passed in an even/odd float register pair.
+	 This assumes that the register number is odd when fregno is odd.  */
+      if (mode == TDmode && (cum->fregno % 2) == 1)
+	cum->fregno++;
+
+      if (USE_FP_FOR_ARG_P (cum, mode, type))
+	{
+	  rtx rvec[GP_ARG_NUM_REG + 1];
+	  rtx r;
+	  int k;
+	  bool needs_psave;
+	  enum machine_mode fmode = mode;
+	  unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+
+	  if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
+	    {
+	      /* Currently, we only ever need one reg here because complex
+		 doubles are split.  */
+	      gcc_assert (cum->fregno == FP_ARG_MAX_REG
+			  && (fmode == TFmode || fmode == TDmode));
+
+	      /* Long double or _Decimal128 split over regs and memory.  */
+	      fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
+	    }
+
+	  /* Do we also need to pass this arg in the parameter save
+	     area?  */
+	  needs_psave = (type
+			 && (cum->nargs_prototype <= 0
+			     || (DEFAULT_ABI == ABI_AIX
+				 && TARGET_XL_COMPAT
+				 && align_words >= GP_ARG_NUM_REG)));
+
+	  if (!needs_psave && mode == fmode)
+	    return gen_rtx_REG (fmode, cum->fregno);
+
+	  k = 0;
+	  if (needs_psave)
+	    {
+	      /* Describe the part that goes in gprs or the stack.
+		 This piece must come first, before the fprs.  */
+	      if (align_words < GP_ARG_NUM_REG)
+		{
+		  unsigned long n_words = rs6000_arg_size (mode, type);
+
+		  if (align_words + n_words > GP_ARG_NUM_REG
+		      || (TARGET_32BIT && TARGET_POWERPC64))
+		    {
+		      /* If this is partially on the stack, then we only
+			 include the portion actually in registers here.  */
+		      enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+		      rtx off;
+		      int i = 0;
+		      if (align_words + n_words > GP_ARG_NUM_REG)
+			/* Not all of the arg fits in gprs.  Say that it
+			   goes in memory too, using a magic NULL_RTX
+			   component.  Also see comment in
+			   rs6000_mixed_function_arg for why the normal
+			   function_arg_partial_nregs scheme doesn't work
+			   in this case. */
+			rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
+						       const0_rtx);
+		      do
+			{
+			  r = gen_rtx_REG (rmode,
+					   GP_ARG_MIN_REG + align_words);
+			  off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
+			  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+			}
+		      while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
+		    }
+		  else
+		    {
+		      /* The whole arg fits in gprs.  */
+		      r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+		      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+		    }
+		}
+	      else
+		/* It's entirely in memory.  */
+		rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	    }
+
+	  /* Describe where this piece goes in the fprs.  */
+	  r = gen_rtx_REG (fmode, cum->fregno);
+	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+
+	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+	}
+      else if (align_words < GP_ARG_NUM_REG)
+	{
+	  if (TARGET_32BIT && TARGET_POWERPC64)
+	    return rs6000_mixed_function_arg (mode, type, align_words);
+
+	  if (mode == BLKmode)
+	    mode = Pmode;
+
+	  return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	}
+      else
+	return NULL_RTX;
+    }
+}
+
+/* For an arg passed partly in registers and partly in memory, this is
+   the number of bytes passed in registers.  For args passed entirely in
+   registers or entirely in memory, zero.  When an arg is described by a
+   PARALLEL, perhaps using more than one register type, this function
+   returns the number of bytes used by the first element of the PARALLEL.  */
+
+static int
+rs6000_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			  tree type, bool named)
+{
+  int ret = 0;
+  int align_words;
+
+  if (DEFAULT_ABI == ABI_V4)
+    return 0;
+
+  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
+      && cum->nargs_prototype >= 0)
+    return 0;
+
+  /* In this complicated case we just disable the partial_nregs code.  */
+  if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
+    return 0;
+
+  align_words = rs6000_parm_start (mode, type, cum->words);
+
+  if (USE_FP_FOR_ARG_P (cum, mode, type))
+    {
+      /* If we are passing this arg in the fixed parameter save area
+	 (gprs or memory) as well as fprs, then this function should
+	 return the number of partial bytes passed in the parameter
+	 save area rather than partial bytes passed in fprs.  */
+      if (type
+	  && (cum->nargs_prototype <= 0
+	      || (DEFAULT_ABI == ABI_AIX
+		  && TARGET_XL_COMPAT
+		  && align_words >= GP_ARG_NUM_REG)))
+	return 0;
+      else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
+	       > FP_ARG_MAX_REG + 1)
+	ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
+      else if (cum->nargs_prototype >= 0)
+	return 0;
+    }
+
+  if (align_words < GP_ARG_NUM_REG
+      && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
+    ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
+
+  if (ret != 0 && TARGET_DEBUG_ARG)
+    fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
+
+  return ret;
+}
+
+/* A C expression that indicates when an argument must be passed by
+   reference.  If nonzero for an argument, a copy of that argument is
+   made in memory and a pointer to the argument is passed instead of
+   the argument itself.  The pointer is passed in whatever way is
+   appropriate for passing a pointer to that type.
+
+   Under V.4, aggregates and long double are passed by reference.
+
+   As an extension to all 32-bit ABIs, AltiVec vectors are passed by
+   reference unless the AltiVec vector extension ABI is in force.
+
+   As an extension to all ABIs, variable sized types are passed by
+   reference.  */
+
+static bool
+rs6000_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			  enum machine_mode mode, const_tree type,
+			  bool named ATTRIBUTE_UNUSED)
+{
+  if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
+      return 1;
+    }
+
+  if (!type)
+    return 0;
+
+  if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
+      return 1;
+    }
+
+  if (int_size_in_bytes (type) < 0)
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
+      return 1;
+    }
+
+  /* Allow -maltivec -mabi=no-altivec without warning.  Altivec vector
+     modes only exist for GCC vector types if -maltivec.  */
+  if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
+      return 1;
+    }
+
+  /* Pass synthetic vectors in memory.  */
+  if (TREE_CODE (type) == VECTOR_TYPE
+      && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
+    {
+      static bool warned_for_pass_big_vectors = false;
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
+      if (!warned_for_pass_big_vectors)
+	{
+	  warning (0, "GCC vector passed by reference: "
+		   "non-standard ABI extension with no compatibility guarantee");
+	  warned_for_pass_big_vectors = true;
+	}
+      return 1;
+    }
+
+  return 0;
+}
+
+static void
+rs6000_move_block_from_reg (int regno, rtx x, int nregs)
+{
+  int i;
+  enum machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
+
+  if (nregs == 0)
+    return;
+
+  for (i = 0; i < nregs; i++)
+    {
+      rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
+      if (reload_completed)
+	{
+	  if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
+	    tem = NULL_RTX;
+	  else
+	    tem = simplify_gen_subreg (reg_mode, x, BLKmode,
+				       i * GET_MODE_SIZE (reg_mode));
+	}
+      else
+	tem = replace_equiv_address (tem, XEXP (tem, 0));
+
+      gcc_assert (tem);
+
+      emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
+    }
+}
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.
+
+   CUM is as above.
+
+   MODE and TYPE are the mode and type of the current parameter.
+
+   PRETEND_SIZE is a variable that should be set to the amount of stack
+   that must be pushed by the prolog to pretend that our caller pushed
+   it.
+
+   Normally, this macro will push all remaining incoming registers on the
+   stack and set PRETEND_SIZE to the length of the registers pushed.  */
+
+static void
+setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			tree type, int *pretend_size ATTRIBUTE_UNUSED,
+			int no_rtl)
+{
+  CUMULATIVE_ARGS next_cum;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  rtx save_area = NULL_RTX, mem;
+  int first_reg_offset;
+  alias_set_type set;
+
+  /* Skip the last named argument.  */
+  next_cum = *cum;
+  rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
+
+  if (DEFAULT_ABI == ABI_V4)
+    {
+      first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
+
+      if (! no_rtl)
+	{
+	  int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
+	  HOST_WIDE_INT offset = 0;
+
+	  /* Try to optimize the size of the varargs save area.
+	     The ABI requires that ap.reg_save_area is doubleword
+	     aligned, but we don't need to allocate space for all
+	     the bytes, only those to which we actually will save
+	     anything.  */
+	  if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
+	    gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
+	  if (TARGET_HARD_FLOAT && TARGET_FPRS
+	      && next_cum.fregno <= FP_ARG_V4_MAX_REG
+	      && cfun->va_list_fpr_size)
+	    {
+	      if (gpr_reg_num)
+		fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
+			   * UNITS_PER_FP_WORD;
+	      if (cfun->va_list_fpr_size
+		  < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
+		fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
+	      else
+		fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
+			    * UNITS_PER_FP_WORD;
+	    }
+	  if (gpr_reg_num)
+	    {
+	      offset = -((first_reg_offset * reg_size) & ~7);
+	      if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
+		{
+		  gpr_reg_num = cfun->va_list_gpr_size;
+		  if (reg_size == 4 && (first_reg_offset & 1))
+		    gpr_reg_num++;
+		}
+	      gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
+	    }
+	  else if (fpr_size)
+	    offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
+		       * UNITS_PER_FP_WORD
+		     - (int) (GP_ARG_NUM_REG * reg_size);
+
+	  if (gpr_size + fpr_size)
+	    {
+	      rtx reg_save_area
+		= assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
+	      gcc_assert (GET_CODE (reg_save_area) == MEM);
+	      reg_save_area = XEXP (reg_save_area, 0);
+	      if (GET_CODE (reg_save_area) == PLUS)
+		{
+		  gcc_assert (XEXP (reg_save_area, 0)
+			      == virtual_stack_vars_rtx);
+		  gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
+		  offset += INTVAL (XEXP (reg_save_area, 1));
+		}
+	      else
+		gcc_assert (reg_save_area == virtual_stack_vars_rtx);
+	    }
+
+	  cfun->machine->varargs_save_offset = offset;
+	  save_area = plus_constant (virtual_stack_vars_rtx, offset);
+	}
+    }
+  else
+    {
+      first_reg_offset = next_cum.words;
+      save_area = virtual_incoming_args_rtx;
+
+      if (targetm.calls.must_pass_in_stack (mode, type))
+	first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
+    }
+
+  set = get_varargs_alias_set ();
+  if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
+      && cfun->va_list_gpr_size)
+    {
+      int nregs = GP_ARG_NUM_REG - first_reg_offset;
+
+      if (va_list_gpr_counter_field)
+	{
+	  /* V4 va_list_gpr_size counts number of registers needed.  */
+	  if (nregs > cfun->va_list_gpr_size)
+	    nregs = cfun->va_list_gpr_size;
+	}
+      else
+	{
+	  /* char * va_list instead counts number of bytes needed.  */
+	  if (nregs > cfun->va_list_gpr_size / reg_size)
+	    nregs = cfun->va_list_gpr_size / reg_size;
+	}
+
+      mem = gen_rtx_MEM (BLKmode,
+			 plus_constant (save_area,
+					first_reg_offset * reg_size));
+      MEM_NOTRAP_P (mem) = 1;
+      set_mem_alias_set (mem, set);
+      set_mem_align (mem, BITS_PER_WORD);
+
+      rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
+				  nregs);
+    }
+
+  /* Save FP registers if needed.  */
+  if (DEFAULT_ABI == ABI_V4
+      && TARGET_HARD_FLOAT && TARGET_FPRS
+      && ! no_rtl
+      && next_cum.fregno <= FP_ARG_V4_MAX_REG
+      && cfun->va_list_fpr_size)
+    {
+      int fregno = next_cum.fregno, nregs;
+      rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
+      rtx lab = gen_label_rtx ();
+      int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
+					       * UNITS_PER_FP_WORD);
+
+      emit_jump_insn
+	(gen_rtx_SET (VOIDmode,
+		      pc_rtx,
+		      gen_rtx_IF_THEN_ELSE (VOIDmode,
+					    gen_rtx_NE (VOIDmode, cr1,
+							const0_rtx),
+					    gen_rtx_LABEL_REF (VOIDmode, lab),
+					    pc_rtx)));
+
+      for (nregs = 0;
+	   fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
+	   fregno++, off += UNITS_PER_FP_WORD, nregs++)
+	{
+	  mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			      ? DFmode : SFmode, 
+                             plus_constant (save_area, off));
+  	  MEM_NOTRAP_P (mem) = 1;
+  	  set_mem_alias_set (mem, set);
+	  set_mem_align (mem, GET_MODE_ALIGNMENT (
+			 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			  ? DFmode : SFmode));
+	  emit_move_insn (mem, gen_rtx_REG (
+                          (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			   ? DFmode : SFmode, fregno));
+	}
+
+      emit_label (lab);
+    }
+}
+
+/* Create the va_list data type.  */
+
+static tree
+rs6000_build_builtin_va_list (void)
+{
+  tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
+
+  /* For AIX, prefer 'char *' because that's what the system
+     header files like.  */
+  if (DEFAULT_ABI != ABI_V4)
+    return build_pointer_type (char_type_node);
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      			  get_identifier ("__va_list_tag"), record);
+
+  f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
+		      unsigned_char_type_node);
+  f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
+		      unsigned_char_type_node);
+  /* Give the two bytes of padding a name, so that -Wpadded won't warn on
+     every user file.  */
+  f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+      		      get_identifier ("reserved"), short_unsigned_type_node);
+  f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+      		      get_identifier ("overflow_arg_area"),
+		      ptr_type_node);
+  f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+      		      get_identifier ("reg_save_area"),
+		      ptr_type_node);
+
+  va_list_gpr_counter_field = f_gpr;
+  va_list_fpr_counter_field = f_fpr;
+
+  DECL_FIELD_CONTEXT (f_gpr) = record;
+  DECL_FIELD_CONTEXT (f_fpr) = record;
+  DECL_FIELD_CONTEXT (f_res) = record;
+  DECL_FIELD_CONTEXT (f_ovf) = record;
+  DECL_FIELD_CONTEXT (f_sav) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_gpr;
+  DECL_CHAIN (f_gpr) = f_fpr;
+  DECL_CHAIN (f_fpr) = f_res;
+  DECL_CHAIN (f_res) = f_ovf;
+  DECL_CHAIN (f_ovf) = f_sav;
+
+  layout_type (record);
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Implement va_start.  */
+
+static void
+rs6000_va_start (tree valist, rtx nextarg)
+{
+  HOST_WIDE_INT words, n_gpr, n_fpr;
+  tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+
+  /* Only SVR4 needs something special.  */
+  if (DEFAULT_ABI != ABI_V4)
+    {
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_res = DECL_CHAIN (f_fpr);
+  f_ovf = DECL_CHAIN (f_res);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_simple_mem_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+		f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+		f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+		f_sav, NULL_TREE);
+
+  /* Count number of gp and fp argument registers used.  */
+  words = crtl->args.info.words;
+  n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
+	       GP_ARG_NUM_REG);
+  n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
+	       FP_ARG_NUM_REG);
+
+  if (TARGET_DEBUG_ARG)
+    fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
+	     HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
+	     words, n_gpr, n_fpr);
+
+  if (cfun->va_list_gpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+		  build_int_cst (NULL_TREE, n_gpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  if (cfun->va_list_fpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+		  build_int_cst (NULL_TREE, n_fpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+      if (call_ABI_of_interest (cfun->decl))
+	rs6000_passes_float = true;
+#endif
+    }
+
+  /* Find the overflow area.  */
+  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+  if (words != 0)
+    t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), t,
+	        size_int (words * UNITS_PER_WORD));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* If there were no va_arg invocations, don't set up the register
+     save area.  */
+  if (!cfun->va_list_gpr_size
+      && !cfun->va_list_fpr_size
+      && n_gpr < GP_ARG_NUM_REG
+      && n_fpr < FP_ARG_V4_MAX_REG)
+    return;
+
+  /* Find the register save area.  */
+  t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
+  if (cfun->machine->varargs_save_offset)
+    t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (sav), t,
+	        size_int (cfun->machine->varargs_save_offset));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Implement va_arg.  */
+
+tree
+rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+			gimple_seq *post_p)
+{
+  tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, reg, t, u;
+  int size, rsize, n_reg, sav_ofs, sav_scale;
+  tree lab_false, lab_over, addr;
+  int align;
+  tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
+  int regalign = 0;
+  gimple stmt;
+
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
+      return build_va_arg_indirect_ref (t);
+    }
+
+  /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
+     earlier version of gcc, with the property that it always applied alignment
+     adjustments to the va-args (even for zero-sized types).  The cheapest way
+     to deal with this is to replicate the effect of the part of 
+     std_gimplify_va_arg_expr that carries out the align adjust, for the case 
+     of relevance.  
+     We don't need to check for pass-by-reference because of the test above.
+     We can return a simplifed answer, since we know there's no offset to add.  */
+
+  if (TARGET_MACHO
+      && rs6000_darwin64_abi 
+      && integer_zerop (TYPE_SIZE (type)))
+    {
+      unsigned HOST_WIDE_INT align, boundary;
+      tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
+      align = PARM_BOUNDARY / BITS_PER_UNIT;
+      boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
+      if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+	boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+      boundary /= BITS_PER_UNIT;
+      if (boundary > align)
+	{
+	  tree t ;
+	  /* This updates arg ptr by the amount that would be necessary
+	     to align the zero-sized (but not zero-alignment) item.  */
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_build2 (POINTER_PLUS_EXPR,
+			       TREE_TYPE (valist),
+			       valist_tmp, size_int (boundary - 1)));
+	  gimplify_and_add (t, pre_p);
+
+	  t = fold_convert (sizetype, valist_tmp);
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_convert (TREE_TYPE (valist),
+				fold_build2 (BIT_AND_EXPR, sizetype, t,
+					     size_int (-boundary))));
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+	  gimplify_and_add (t, pre_p);
+	}
+      /* Since it is zero-sized there's no increment for the item itself. */
+      valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
+      return build_va_arg_indirect_ref (valist_tmp);
+    }
+
+  if (DEFAULT_ABI != ABI_V4)
+    {
+      if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
+	{
+	  tree elem_type = TREE_TYPE (type);
+	  enum machine_mode elem_mode = TYPE_MODE (elem_type);
+	  int elem_size = GET_MODE_SIZE (elem_mode);
+
+	  if (elem_size < UNITS_PER_WORD)
+	    {
+	      tree real_part, imag_part;
+	      gimple_seq post = NULL;
+
+	      real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
+						  &post);
+	      /* Copy the value into a temporary, lest the formal temporary
+		 be reused out from under us.  */
+	      real_part = get_initialized_tmp_var (real_part, pre_p, &post);
+	      gimple_seq_add_seq (pre_p, post);
+
+	      imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
+						  post_p);
+
+	      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+	    }
+	}
+
+      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+    }
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_res = DECL_CHAIN (f_fpr);
+  f_ovf = DECL_CHAIN (f_res);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_va_arg_indirect_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
+		f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
+		f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
+		f_sav, NULL_TREE);
+
+  size = int_size_in_bytes (type);
+  rsize = (size + 3) / 4;
+  align = 1;
+
+  if (TARGET_HARD_FLOAT && TARGET_FPRS
+      && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
+          || (TARGET_DOUBLE_FLOAT 
+              && (TYPE_MODE (type) == DFmode 
+ 	          || TYPE_MODE (type) == TFmode
+	          || TYPE_MODE (type) == SDmode
+	          || TYPE_MODE (type) == DDmode
+	          || TYPE_MODE (type) == TDmode))))
+    {
+      /* FP args go in FP registers, if present.  */
+      reg = fpr;
+      n_reg = (size + 7) / 8;
+      sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
+      sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
+      if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
+	align = 8;
+    }
+  else
+    {
+      /* Otherwise into GP registers.  */
+      reg = gpr;
+      n_reg = rsize;
+      sav_ofs = 0;
+      sav_scale = 4;
+      if (n_reg == 2)
+	align = 8;
+    }
+
+  /* Pull the value out of the saved registers....  */
+
+  lab_over = NULL;
+  addr = create_tmp_var (ptr_type_node, "addr");
+
+  /*  AltiVec vectors never go in registers when -mabi=altivec.  */
+  if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
+    align = 16;
+  else
+    {
+      lab_false = create_artificial_label (input_location);
+      lab_over = create_artificial_label (input_location);
+
+      /* Long long and SPE vectors are aligned in the registers.
+	 As are any other 2 gpr item such as complex int due to a
+	 historical mistake.  */
+      u = reg;
+      if (n_reg == 2 && reg == gpr)
+	{
+	  regalign = 1;
+	  u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
+		     build_int_cst (TREE_TYPE (reg), n_reg - 1));
+	  u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
+		      unshare_expr (reg), u);
+	}
+      /* _Decimal128 is passed in even/odd fpr pairs; the stored
+	 reg number is 0 for f1, so we want to make it odd.  */
+      else if (reg == fpr && TYPE_MODE (type) == TDmode)
+	{
+	  t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
+		      build_int_cst (TREE_TYPE (reg), 1));
+	  u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
+	}
+
+      t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
+      t = build2 (GE_EXPR, boolean_type_node, u, t);
+      u = build1 (GOTO_EXPR, void_type_node, lab_false);
+      t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      t = sav;
+      if (sav_ofs)
+	t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, size_int (sav_ofs));
+
+      u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
+		  build_int_cst (TREE_TYPE (reg), n_reg));
+      u = fold_convert (sizetype, u);
+      u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
+      t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, u);
+
+      /* _Decimal32 varargs are located in the second word of the 64-bit
+	 FP register for 32-bit binaries.  */
+      if (!TARGET_POWERPC64
+	  && TARGET_HARD_FLOAT && TARGET_FPRS
+	  && TYPE_MODE (type) == SDmode)
+	t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, size_int (size));
+
+      gimplify_assign (addr, t, pre_p);
+
+      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+      stmt = gimple_build_label (lab_false);
+      gimple_seq_add_stmt (pre_p, stmt);
+
+      if ((n_reg == 2 && !regalign) || n_reg > 2)
+	{
+	  /* Ensure that we don't find any more args in regs.
+	     Alignment has taken care of for special cases.  */
+	  gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
+	}
+    }
+
+  /* ... otherwise out of the overflow area.  */
+
+  /* Care for on-stack alignment if needed.  */
+  t = ovf;
+  if (align != 1)
+    {
+      t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, size_int (align - 1));
+      t = fold_convert (sizetype, t);
+      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+		  size_int (-align));
+      t = fold_convert (TREE_TYPE (ovf), t);
+    }
+  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+  gimplify_assign (unshare_expr (addr), t, pre_p);
+
+  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, size_int (size));
+  gimplify_assign (unshare_expr (ovf), t, pre_p);
+
+  if (lab_over)
+    {
+      stmt = gimple_build_label (lab_over);
+      gimple_seq_add_stmt (pre_p, stmt);
+    }
+
+  if (STRICT_ALIGNMENT
+      && (TYPE_ALIGN (type)
+	  > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
+    {
+      /* The value (of type complex double, for example) may not be
+	 aligned in memory in the saved registers, so copy via a
+	 temporary.  (This is the same code as used for SPARC.)  */
+      tree tmp = create_tmp_var (type, "va_arg_tmp");
+      tree dest_addr = build_fold_addr_expr (tmp);
+
+      tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+				   3, dest_addr, addr, size_int (rsize * 4));
+
+      gimplify_and_add (copy, pre_p);
+      addr = dest_addr;
+    }
+
+  addr = fold_convert (ptrtype, addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Builtins.  */
+
+static void
+def_builtin (int mask, const char *name, tree type, int code)
+{
+  if ((mask & target_flags) || TARGET_PAIRED_FLOAT)
+    {
+      tree t;
+      if (rs6000_builtin_decls[code])
+	fatal_error ("internal error: builtin function to %s already processed",
+		     name);
+
+      rs6000_builtin_decls[code] = t =
+        add_builtin_function (name, type, code, BUILT_IN_MD,
+			      NULL, NULL_TREE);
+
+      gcc_assert (code >= 0 && code < (int)RS6000_BUILTIN_COUNT);
+      switch (builtin_classify[code])
+	{
+	default:
+	  gcc_unreachable ();
+
+	  /* assume builtin can do anything.  */
+	case RS6000_BTC_MISC:
+	  break;
+
+	  /* const function, function only depends on the inputs.  */
+	case RS6000_BTC_CONST:
+	  TREE_READONLY (t) = 1;
+	  TREE_NOTHROW (t) = 1;
+	  break;
+
+	  /* pure function, function can read global memory.  */
+	case RS6000_BTC_PURE:
+	  DECL_PURE_P (t) = 1;
+	  TREE_NOTHROW (t) = 1;
+	  break;
+
+	  /* Function is a math function.  If rounding mode is on, then treat
+	     the function as not reading global memory, but it can have
+	     arbitrary side effects.  If it is off, then assume the function is
+	     a const function.  This mimics the ATTR_MATHFN_FPROUNDING
+	     attribute in builtin-attribute.def that is used for the math
+	     functions. */
+	case RS6000_BTC_FP_PURE:
+	  TREE_NOTHROW (t) = 1;
+	  if (flag_rounding_math)
+	    {
+	      DECL_PURE_P (t) = 1;
+	      DECL_IS_NOVOPS (t) = 1;
+	    }
+	  else
+	    TREE_READONLY (t) = 1;
+	  break;
+	}
+    }
+}
+
+/* Simple ternary operations: VECd = foo (VECa, VECb, VECc).  */
+
+static const struct builtin_description bdesc_3arg[] =
+{
+  { MASK_ALTIVEC, CODE_FOR_fmav4sf4, "__builtin_altivec_vmaddfp", ALTIVEC_BUILTIN_VMADDFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmhaddshs, "__builtin_altivec_vmhaddshs", ALTIVEC_BUILTIN_VMHADDSHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmhraddshs, "__builtin_altivec_vmhraddshs", ALTIVEC_BUILTIN_VMHRADDSHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmladduhm, "__builtin_altivec_vmladduhm", ALTIVEC_BUILTIN_VMLADDUHM},
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmsumubm, "__builtin_altivec_vmsumubm", ALTIVEC_BUILTIN_VMSUMUBM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmsummbm, "__builtin_altivec_vmsummbm", ALTIVEC_BUILTIN_VMSUMMBM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhm, "__builtin_altivec_vmsumuhm", ALTIVEC_BUILTIN_VMSUMUHM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshm, "__builtin_altivec_vmsumshm", ALTIVEC_BUILTIN_VMSUMSHM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhs, "__builtin_altivec_vmsumuhs", ALTIVEC_BUILTIN_VMSUMUHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshs, "__builtin_altivec_vmsumshs", ALTIVEC_BUILTIN_VMSUMSHS },
+  { MASK_ALTIVEC, CODE_FOR_nfmsv4sf4, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2df, "__builtin_altivec_vperm_2df", ALTIVEC_BUILTIN_VPERM_2DF },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2di, "__builtin_altivec_vperm_2di", ALTIVEC_BUILTIN_VPERM_2DI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4sf, "__builtin_altivec_vperm_4sf", ALTIVEC_BUILTIN_VPERM_4SF },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4si, "__builtin_altivec_vperm_4si", ALTIVEC_BUILTIN_VPERM_4SI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v8hi, "__builtin_altivec_vperm_8hi", ALTIVEC_BUILTIN_VPERM_8HI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_altivec_vperm_16qi", ALTIVEC_BUILTIN_VPERM_16QI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2di_uns, "__builtin_altivec_vperm_2di_uns", ALTIVEC_BUILTIN_VPERM_2DI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4si_uns, "__builtin_altivec_vperm_4si_uns", ALTIVEC_BUILTIN_VPERM_4SI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v8hi_uns, "__builtin_altivec_vperm_8hi_uns", ALTIVEC_BUILTIN_VPERM_8HI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_altivec_vperm_16qi_uns", ALTIVEC_BUILTIN_VPERM_16QI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v4sf, "__builtin_altivec_vsel_4sf", ALTIVEC_BUILTIN_VSEL_4SF },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v4si, "__builtin_altivec_vsel_4si", ALTIVEC_BUILTIN_VSEL_4SI },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v8hi, "__builtin_altivec_vsel_8hi", ALTIVEC_BUILTIN_VSEL_8HI },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v16qi, "__builtin_altivec_vsel_16qi", ALTIVEC_BUILTIN_VSEL_16QI },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v2df, "__builtin_altivec_vsel_2df", ALTIVEC_BUILTIN_VSEL_2DF },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v2di, "__builtin_altivec_vsel_2di", ALTIVEC_BUILTIN_VSEL_2DI },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v4si_uns, "__builtin_altivec_vsel_4si_uns", ALTIVEC_BUILTIN_VSEL_4SI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v8hi_uns, "__builtin_altivec_vsel_8hi_uns", ALTIVEC_BUILTIN_VSEL_8HI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v16qi_uns, "__builtin_altivec_vsel_16qi_uns", ALTIVEC_BUILTIN_VSEL_16QI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_vector_select_v2di_uns, "__builtin_altivec_vsel_2di_uns", ALTIVEC_BUILTIN_VSEL_2DI_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v16qi, "__builtin_altivec_vsldoi_16qi", ALTIVEC_BUILTIN_VSLDOI_16QI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v8hi, "__builtin_altivec_vsldoi_8hi", ALTIVEC_BUILTIN_VSLDOI_8HI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v4si, "__builtin_altivec_vsldoi_4si", ALTIVEC_BUILTIN_VSLDOI_4SI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v4sf, "__builtin_altivec_vsldoi_4sf", ALTIVEC_BUILTIN_VSLDOI_4SF },
+
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_madd", ALTIVEC_BUILTIN_VEC_MADD },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_madds", ALTIVEC_BUILTIN_VEC_MADDS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mladd", ALTIVEC_BUILTIN_VEC_MLADD },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mradds", ALTIVEC_BUILTIN_VEC_MRADDS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msum", ALTIVEC_BUILTIN_VEC_MSUM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmsumshm", ALTIVEC_BUILTIN_VEC_VMSUMSHM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmsumuhm", ALTIVEC_BUILTIN_VEC_VMSUMUHM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmsummbm", ALTIVEC_BUILTIN_VEC_VMSUMMBM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmsumubm", ALTIVEC_BUILTIN_VEC_VMSUMUBM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msums", ALTIVEC_BUILTIN_VEC_MSUMS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmsumshs", ALTIVEC_BUILTIN_VEC_VMSUMSHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmsumuhs", ALTIVEC_BUILTIN_VEC_VMSUMUHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nmsub", ALTIVEC_BUILTIN_VEC_NMSUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_perm", ALTIVEC_BUILTIN_VEC_PERM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sel", ALTIVEC_BUILTIN_VEC_SEL },
+
+  { MASK_VSX, CODE_FOR_fmav2df4, "__builtin_vsx_xvmadddp", VSX_BUILTIN_XVMADDDP },
+  { MASK_VSX, CODE_FOR_fmsv2df4, "__builtin_vsx_xvmsubdp", VSX_BUILTIN_XVMSUBDP },
+  { MASK_VSX, CODE_FOR_nfmav2df4, "__builtin_vsx_xvnmadddp", VSX_BUILTIN_XVNMADDDP },
+  { MASK_VSX, CODE_FOR_nfmsv2df4, "__builtin_vsx_xvnmsubdp", VSX_BUILTIN_XVNMSUBDP },
+
+  { MASK_VSX, CODE_FOR_fmav4sf4, "__builtin_vsx_xvmaddsp", VSX_BUILTIN_XVMADDSP },
+  { MASK_VSX, CODE_FOR_fmsv4sf4, "__builtin_vsx_xvmsubsp", VSX_BUILTIN_XVMSUBSP },
+  { MASK_VSX, CODE_FOR_nfmav4sf4, "__builtin_vsx_xvnmaddsp", VSX_BUILTIN_XVNMADDSP },
+  { MASK_VSX, CODE_FOR_nfmsv4sf4, "__builtin_vsx_xvnmsubsp", VSX_BUILTIN_XVNMSUBSP },
+
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_msub", VSX_BUILTIN_VEC_MSUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nmadd", VSX_BUILTIN_VEC_NMADD },
+
+  { MASK_VSX, CODE_FOR_vector_select_v2di, "__builtin_vsx_xxsel_2di", VSX_BUILTIN_XXSEL_2DI },
+  { MASK_VSX, CODE_FOR_vector_select_v2df, "__builtin_vsx_xxsel_2df", VSX_BUILTIN_XXSEL_2DF },
+  { MASK_VSX, CODE_FOR_vector_select_v4sf, "__builtin_vsx_xxsel_4sf", VSX_BUILTIN_XXSEL_4SF },
+  { MASK_VSX, CODE_FOR_vector_select_v4si, "__builtin_vsx_xxsel_4si", VSX_BUILTIN_XXSEL_4SI },
+  { MASK_VSX, CODE_FOR_vector_select_v8hi, "__builtin_vsx_xxsel_8hi", VSX_BUILTIN_XXSEL_8HI },
+  { MASK_VSX, CODE_FOR_vector_select_v16qi, "__builtin_vsx_xxsel_16qi", VSX_BUILTIN_XXSEL_16QI },
+  { MASK_VSX, CODE_FOR_vector_select_v2di_uns, "__builtin_vsx_xxsel_2di_uns", VSX_BUILTIN_XXSEL_2DI_UNS },
+  { MASK_VSX, CODE_FOR_vector_select_v4si_uns, "__builtin_vsx_xxsel_4si_uns", VSX_BUILTIN_XXSEL_4SI_UNS },
+  { MASK_VSX, CODE_FOR_vector_select_v8hi_uns, "__builtin_vsx_xxsel_8hi_uns", VSX_BUILTIN_XXSEL_8HI_UNS },
+  { MASK_VSX, CODE_FOR_vector_select_v16qi_uns, "__builtin_vsx_xxsel_16qi_uns", VSX_BUILTIN_XXSEL_16QI_UNS },
+
+  { MASK_VSX, CODE_FOR_altivec_vperm_v2di, "__builtin_vsx_vperm_2di", VSX_BUILTIN_VPERM_2DI },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v2df, "__builtin_vsx_vperm_2df", VSX_BUILTIN_VPERM_2DF },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v4sf, "__builtin_vsx_vperm_4sf", VSX_BUILTIN_VPERM_4SF },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v4si, "__builtin_vsx_vperm_4si", VSX_BUILTIN_VPERM_4SI },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v8hi, "__builtin_vsx_vperm_8hi", VSX_BUILTIN_VPERM_8HI },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v16qi, "__builtin_vsx_vperm_16qi", VSX_BUILTIN_VPERM_16QI },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v2di_uns, "__builtin_vsx_vperm_2di_uns", VSX_BUILTIN_VPERM_2DI_UNS },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v4si_uns, "__builtin_vsx_vperm_4si_uns", VSX_BUILTIN_VPERM_4SI_UNS },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v8hi_uns, "__builtin_vsx_vperm_8hi_uns", VSX_BUILTIN_VPERM_8HI_UNS },
+  { MASK_VSX, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_vsx_vperm_16qi_uns", VSX_BUILTIN_VPERM_16QI_UNS },
+
+  { MASK_VSX, CODE_FOR_vsx_xxpermdi_v2df, "__builtin_vsx_xxpermdi_2df", VSX_BUILTIN_XXPERMDI_2DF },
+  { MASK_VSX, CODE_FOR_vsx_xxpermdi_v2di, "__builtin_vsx_xxpermdi_2di", VSX_BUILTIN_XXPERMDI_2DI },
+  { MASK_VSX, CODE_FOR_vsx_xxpermdi_v4sf, "__builtin_vsx_xxpermdi_4sf", VSX_BUILTIN_XXPERMDI_4SF },
+  { MASK_VSX, CODE_FOR_vsx_xxpermdi_v4si, "__builtin_vsx_xxpermdi_4si", VSX_BUILTIN_XXPERMDI_4SI },
+  { MASK_VSX, CODE_FOR_vsx_xxpermdi_v8hi, "__builtin_vsx_xxpermdi_8hi", VSX_BUILTIN_XXPERMDI_8HI },
+  { MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, "__builtin_vsx_xxpermdi_16qi", VSX_BUILTIN_XXPERMDI_16QI },
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxpermdi", VSX_BUILTIN_VEC_XXPERMDI },
+  { MASK_VSX, CODE_FOR_vsx_set_v2df, "__builtin_vsx_set_2df", VSX_BUILTIN_SET_2DF },
+  { MASK_VSX, CODE_FOR_vsx_set_v2di, "__builtin_vsx_set_2di", VSX_BUILTIN_SET_2DI },
+
+  { MASK_VSX, CODE_FOR_vsx_xxsldwi_v2di, "__builtin_vsx_xxsldwi_2di", VSX_BUILTIN_XXSLDWI_2DI },
+  { MASK_VSX, CODE_FOR_vsx_xxsldwi_v2df, "__builtin_vsx_xxsldwi_2df", VSX_BUILTIN_XXSLDWI_2DF },
+  { MASK_VSX, CODE_FOR_vsx_xxsldwi_v4sf, "__builtin_vsx_xxsldwi_4sf", VSX_BUILTIN_XXSLDWI_4SF },
+  { MASK_VSX, CODE_FOR_vsx_xxsldwi_v4si, "__builtin_vsx_xxsldwi_4si", VSX_BUILTIN_XXSLDWI_4SI },
+  { MASK_VSX, CODE_FOR_vsx_xxsldwi_v8hi, "__builtin_vsx_xxsldwi_8hi", VSX_BUILTIN_XXSLDWI_8HI },
+  { MASK_VSX, CODE_FOR_vsx_xxsldwi_v16qi, "__builtin_vsx_xxsldwi_16qi", VSX_BUILTIN_XXSLDWI_16QI },
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vsx_xxsldwi", VSX_BUILTIN_VEC_XXSLDWI },
+
+  { 0, CODE_FOR_fmsv2sf4, "__builtin_paired_msub", PAIRED_BUILTIN_MSUB },
+  { 0, CODE_FOR_fmav2sf4, "__builtin_paired_madd", PAIRED_BUILTIN_MADD },
+  { 0, CODE_FOR_paired_madds0, "__builtin_paired_madds0", PAIRED_BUILTIN_MADDS0 },
+  { 0, CODE_FOR_paired_madds1, "__builtin_paired_madds1", PAIRED_BUILTIN_MADDS1 },
+  { 0, CODE_FOR_nfmsv2sf4, "__builtin_paired_nmsub", PAIRED_BUILTIN_NMSUB },
+  { 0, CODE_FOR_nfmav2sf4, "__builtin_paired_nmadd", PAIRED_BUILTIN_NMADD },
+  { 0, CODE_FOR_paired_sum0, "__builtin_paired_sum0", PAIRED_BUILTIN_SUM0 },
+  { 0, CODE_FOR_paired_sum1, "__builtin_paired_sum1", PAIRED_BUILTIN_SUM1 },
+  { 0, CODE_FOR_selv2sf4, "__builtin_paired_selv2sf4", PAIRED_BUILTIN_SELV2SF4 },
+};
+
+/* DST operations: void foo (void *, const int, const char).  */
+
+static const struct builtin_description bdesc_dst[] =
+{
+  { MASK_ALTIVEC, CODE_FOR_altivec_dst, "__builtin_altivec_dst", ALTIVEC_BUILTIN_DST },
+  { MASK_ALTIVEC, CODE_FOR_altivec_dstt, "__builtin_altivec_dstt", ALTIVEC_BUILTIN_DSTT },
+  { MASK_ALTIVEC, CODE_FOR_altivec_dstst, "__builtin_altivec_dstst", ALTIVEC_BUILTIN_DSTST },
+  { MASK_ALTIVEC, CODE_FOR_altivec_dststt, "__builtin_altivec_dststt", ALTIVEC_BUILTIN_DSTSTT },
+
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_dst", ALTIVEC_BUILTIN_VEC_DST },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_dstt", ALTIVEC_BUILTIN_VEC_DSTT },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_dstst", ALTIVEC_BUILTIN_VEC_DSTST },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_dststt", ALTIVEC_BUILTIN_VEC_DSTSTT }
+};
+
+/* Simple binary operations: VECc = foo (VECa, VECb).  */
+
+static struct builtin_description bdesc_2arg[] =
+{
+  { MASK_ALTIVEC, CODE_FOR_addv16qi3, "__builtin_altivec_vaddubm", ALTIVEC_BUILTIN_VADDUBM },
+  { MASK_ALTIVEC, CODE_FOR_addv8hi3, "__builtin_altivec_vadduhm", ALTIVEC_BUILTIN_VADDUHM },
+  { MASK_ALTIVEC, CODE_FOR_addv4si3, "__builtin_altivec_vadduwm", ALTIVEC_BUILTIN_VADDUWM },
+  { MASK_ALTIVEC, CODE_FOR_addv4sf3, "__builtin_altivec_vaddfp", ALTIVEC_BUILTIN_VADDFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vaddcuw, "__builtin_altivec_vaddcuw", ALTIVEC_BUILTIN_VADDCUW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vaddubs, "__builtin_altivec_vaddubs", ALTIVEC_BUILTIN_VADDUBS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vaddsbs, "__builtin_altivec_vaddsbs", ALTIVEC_BUILTIN_VADDSBS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vadduhs, "__builtin_altivec_vadduhs", ALTIVEC_BUILTIN_VADDUHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vaddshs, "__builtin_altivec_vaddshs", ALTIVEC_BUILTIN_VADDSHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vadduws, "__builtin_altivec_vadduws", ALTIVEC_BUILTIN_VADDUWS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vaddsws, "__builtin_altivec_vaddsws", ALTIVEC_BUILTIN_VADDSWS },
+  { MASK_ALTIVEC, CODE_FOR_andv4si3, "__builtin_altivec_vand", ALTIVEC_BUILTIN_VAND },
+  { MASK_ALTIVEC, CODE_FOR_andcv4si3, "__builtin_altivec_vandc", ALTIVEC_BUILTIN_VANDC },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vavgub, "__builtin_altivec_vavgub", ALTIVEC_BUILTIN_VAVGUB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vavgsb, "__builtin_altivec_vavgsb", ALTIVEC_BUILTIN_VAVGSB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vavguh, "__builtin_altivec_vavguh", ALTIVEC_BUILTIN_VAVGUH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vavgsh, "__builtin_altivec_vavgsh", ALTIVEC_BUILTIN_VAVGSH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vavguw, "__builtin_altivec_vavguw", ALTIVEC_BUILTIN_VAVGUW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vavgsw, "__builtin_altivec_vavgsw", ALTIVEC_BUILTIN_VAVGSW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vcfux, "__builtin_altivec_vcfux", ALTIVEC_BUILTIN_VCFUX },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vcfsx, "__builtin_altivec_vcfsx", ALTIVEC_BUILTIN_VCFSX },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vcmpbfp, "__builtin_altivec_vcmpbfp", ALTIVEC_BUILTIN_VCMPBFP },
+  { MASK_ALTIVEC, CODE_FOR_vector_eqv16qi, "__builtin_altivec_vcmpequb", ALTIVEC_BUILTIN_VCMPEQUB },
+  { MASK_ALTIVEC, CODE_FOR_vector_eqv8hi, "__builtin_altivec_vcmpequh", ALTIVEC_BUILTIN_VCMPEQUH },
+  { MASK_ALTIVEC, CODE_FOR_vector_eqv4si, "__builtin_altivec_vcmpequw", ALTIVEC_BUILTIN_VCMPEQUW },
+  { MASK_ALTIVEC, CODE_FOR_vector_eqv4sf, "__builtin_altivec_vcmpeqfp", ALTIVEC_BUILTIN_VCMPEQFP },
+  { MASK_ALTIVEC, CODE_FOR_vector_gev4sf, "__builtin_altivec_vcmpgefp", ALTIVEC_BUILTIN_VCMPGEFP },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtuv16qi, "__builtin_altivec_vcmpgtub", ALTIVEC_BUILTIN_VCMPGTUB },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtv16qi, "__builtin_altivec_vcmpgtsb", ALTIVEC_BUILTIN_VCMPGTSB },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtuv8hi, "__builtin_altivec_vcmpgtuh", ALTIVEC_BUILTIN_VCMPGTUH },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtv8hi, "__builtin_altivec_vcmpgtsh", ALTIVEC_BUILTIN_VCMPGTSH },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtuv4si, "__builtin_altivec_vcmpgtuw", ALTIVEC_BUILTIN_VCMPGTUW },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtv4si, "__builtin_altivec_vcmpgtsw", ALTIVEC_BUILTIN_VCMPGTSW },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtv4sf, "__builtin_altivec_vcmpgtfp", ALTIVEC_BUILTIN_VCMPGTFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vctsxs, "__builtin_altivec_vctsxs", ALTIVEC_BUILTIN_VCTSXS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vctuxs, "__builtin_altivec_vctuxs", ALTIVEC_BUILTIN_VCTUXS },
+  { MASK_ALTIVEC, CODE_FOR_umaxv16qi3, "__builtin_altivec_vmaxub", ALTIVEC_BUILTIN_VMAXUB },
+  { MASK_ALTIVEC, CODE_FOR_smaxv16qi3, "__builtin_altivec_vmaxsb", ALTIVEC_BUILTIN_VMAXSB },
+  { MASK_ALTIVEC, CODE_FOR_umaxv8hi3, "__builtin_altivec_vmaxuh", ALTIVEC_BUILTIN_VMAXUH },
+  { MASK_ALTIVEC, CODE_FOR_smaxv8hi3, "__builtin_altivec_vmaxsh", ALTIVEC_BUILTIN_VMAXSH },
+  { MASK_ALTIVEC, CODE_FOR_umaxv4si3, "__builtin_altivec_vmaxuw", ALTIVEC_BUILTIN_VMAXUW },
+  { MASK_ALTIVEC, CODE_FOR_smaxv4si3, "__builtin_altivec_vmaxsw", ALTIVEC_BUILTIN_VMAXSW },
+  { MASK_ALTIVEC, CODE_FOR_smaxv4sf3, "__builtin_altivec_vmaxfp", ALTIVEC_BUILTIN_VMAXFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmrghb, "__builtin_altivec_vmrghb", ALTIVEC_BUILTIN_VMRGHB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmrghh, "__builtin_altivec_vmrghh", ALTIVEC_BUILTIN_VMRGHH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmrghw, "__builtin_altivec_vmrghw", ALTIVEC_BUILTIN_VMRGHW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmrglb, "__builtin_altivec_vmrglb", ALTIVEC_BUILTIN_VMRGLB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmrglh, "__builtin_altivec_vmrglh", ALTIVEC_BUILTIN_VMRGLH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmrglw, "__builtin_altivec_vmrglw", ALTIVEC_BUILTIN_VMRGLW },
+  { MASK_ALTIVEC, CODE_FOR_uminv16qi3, "__builtin_altivec_vminub", ALTIVEC_BUILTIN_VMINUB },
+  { MASK_ALTIVEC, CODE_FOR_sminv16qi3, "__builtin_altivec_vminsb", ALTIVEC_BUILTIN_VMINSB },
+  { MASK_ALTIVEC, CODE_FOR_uminv8hi3, "__builtin_altivec_vminuh", ALTIVEC_BUILTIN_VMINUH },
+  { MASK_ALTIVEC, CODE_FOR_sminv8hi3, "__builtin_altivec_vminsh", ALTIVEC_BUILTIN_VMINSH },
+  { MASK_ALTIVEC, CODE_FOR_uminv4si3, "__builtin_altivec_vminuw", ALTIVEC_BUILTIN_VMINUW },
+  { MASK_ALTIVEC, CODE_FOR_sminv4si3, "__builtin_altivec_vminsw", ALTIVEC_BUILTIN_VMINSW },
+  { MASK_ALTIVEC, CODE_FOR_sminv4sf3, "__builtin_altivec_vminfp", ALTIVEC_BUILTIN_VMINFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmuleub, "__builtin_altivec_vmuleub", ALTIVEC_BUILTIN_VMULEUB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmuleub, "__builtin_altivec_vmuleub_uns", ALTIVEC_BUILTIN_VMULEUB_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmulesb, "__builtin_altivec_vmulesb", ALTIVEC_BUILTIN_VMULESB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmuleuh, "__builtin_altivec_vmuleuh", ALTIVEC_BUILTIN_VMULEUH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmuleuh, "__builtin_altivec_vmuleuh_uns", ALTIVEC_BUILTIN_VMULEUH_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmulesh, "__builtin_altivec_vmulesh", ALTIVEC_BUILTIN_VMULESH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmuloub, "__builtin_altivec_vmuloub", ALTIVEC_BUILTIN_VMULOUB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmuloub, "__builtin_altivec_vmuloub_uns", ALTIVEC_BUILTIN_VMULOUB_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmulosb, "__builtin_altivec_vmulosb", ALTIVEC_BUILTIN_VMULOSB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmulouh, "__builtin_altivec_vmulouh", ALTIVEC_BUILTIN_VMULOUH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmulouh, "__builtin_altivec_vmulouh_uns", ALTIVEC_BUILTIN_VMULOUH_UNS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vmulosh, "__builtin_altivec_vmulosh", ALTIVEC_BUILTIN_VMULOSH },
+  { MASK_ALTIVEC, CODE_FOR_norv4si3, "__builtin_altivec_vnor", ALTIVEC_BUILTIN_VNOR },
+  { MASK_ALTIVEC, CODE_FOR_iorv4si3, "__builtin_altivec_vor", ALTIVEC_BUILTIN_VOR },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum, "__builtin_altivec_vpkuhum", ALTIVEC_BUILTIN_VPKUHUM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum, "__builtin_altivec_vpkuwum", ALTIVEC_BUILTIN_VPKUWUM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkpx, "__builtin_altivec_vpkpx", ALTIVEC_BUILTIN_VPKPX },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkshss, "__builtin_altivec_vpkshss", ALTIVEC_BUILTIN_VPKSHSS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkswss, "__builtin_altivec_vpkswss", ALTIVEC_BUILTIN_VPKSWSS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkuhus, "__builtin_altivec_vpkuhus", ALTIVEC_BUILTIN_VPKUHUS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkshus, "__builtin_altivec_vpkshus", ALTIVEC_BUILTIN_VPKSHUS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkuwus, "__builtin_altivec_vpkuwus", ALTIVEC_BUILTIN_VPKUWUS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vpkswus, "__builtin_altivec_vpkswus", ALTIVEC_BUILTIN_VPKSWUS },
+  { MASK_ALTIVEC, CODE_FOR_recipv4sf3, "__builtin_altivec_vrecipdivfp", ALTIVEC_BUILTIN_VRECIPFP },
+  { MASK_ALTIVEC, CODE_FOR_vrotlv16qi3, "__builtin_altivec_vrlb", ALTIVEC_BUILTIN_VRLB },
+  { MASK_ALTIVEC, CODE_FOR_vrotlv8hi3, "__builtin_altivec_vrlh", ALTIVEC_BUILTIN_VRLH },
+  { MASK_ALTIVEC, CODE_FOR_vrotlv4si3, "__builtin_altivec_vrlw", ALTIVEC_BUILTIN_VRLW },
+  { MASK_ALTIVEC, CODE_FOR_vashlv16qi3, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB },
+  { MASK_ALTIVEC, CODE_FOR_vashlv8hi3, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH },
+  { MASK_ALTIVEC, CODE_FOR_vashlv4si3, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsl, "__builtin_altivec_vsl", ALTIVEC_BUILTIN_VSL },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vslo, "__builtin_altivec_vslo", ALTIVEC_BUILTIN_VSLO },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vspltb, "__builtin_altivec_vspltb", ALTIVEC_BUILTIN_VSPLTB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsplth, "__builtin_altivec_vsplth", ALTIVEC_BUILTIN_VSPLTH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vspltw, "__builtin_altivec_vspltw", ALTIVEC_BUILTIN_VSPLTW },
+  { MASK_ALTIVEC, CODE_FOR_vlshrv16qi3, "__builtin_altivec_vsrb", ALTIVEC_BUILTIN_VSRB },
+  { MASK_ALTIVEC, CODE_FOR_vlshrv8hi3, "__builtin_altivec_vsrh", ALTIVEC_BUILTIN_VSRH },
+  { MASK_ALTIVEC, CODE_FOR_vlshrv4si3, "__builtin_altivec_vsrw", ALTIVEC_BUILTIN_VSRW },
+  { MASK_ALTIVEC, CODE_FOR_vashrv16qi3, "__builtin_altivec_vsrab", ALTIVEC_BUILTIN_VSRAB },
+  { MASK_ALTIVEC, CODE_FOR_vashrv8hi3, "__builtin_altivec_vsrah", ALTIVEC_BUILTIN_VSRAH },
+  { MASK_ALTIVEC, CODE_FOR_vashrv4si3, "__builtin_altivec_vsraw", ALTIVEC_BUILTIN_VSRAW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsr, "__builtin_altivec_vsr", ALTIVEC_BUILTIN_VSR },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsro, "__builtin_altivec_vsro", ALTIVEC_BUILTIN_VSRO },
+  { MASK_ALTIVEC, CODE_FOR_subv16qi3, "__builtin_altivec_vsububm", ALTIVEC_BUILTIN_VSUBUBM },
+  { MASK_ALTIVEC, CODE_FOR_subv8hi3, "__builtin_altivec_vsubuhm", ALTIVEC_BUILTIN_VSUBUHM },
+  { MASK_ALTIVEC, CODE_FOR_subv4si3, "__builtin_altivec_vsubuwm", ALTIVEC_BUILTIN_VSUBUWM },
+  { MASK_ALTIVEC, CODE_FOR_subv4sf3, "__builtin_altivec_vsubfp", ALTIVEC_BUILTIN_VSUBFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsubcuw, "__builtin_altivec_vsubcuw", ALTIVEC_BUILTIN_VSUBCUW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsububs, "__builtin_altivec_vsububs", ALTIVEC_BUILTIN_VSUBUBS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsubsbs, "__builtin_altivec_vsubsbs", ALTIVEC_BUILTIN_VSUBSBS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsubuhs, "__builtin_altivec_vsubuhs", ALTIVEC_BUILTIN_VSUBUHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsubshs, "__builtin_altivec_vsubshs", ALTIVEC_BUILTIN_VSUBSHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsubuws, "__builtin_altivec_vsubuws", ALTIVEC_BUILTIN_VSUBUWS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsubsws, "__builtin_altivec_vsubsws", ALTIVEC_BUILTIN_VSUBSWS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsum4ubs, "__builtin_altivec_vsum4ubs", ALTIVEC_BUILTIN_VSUM4UBS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsum4sbs, "__builtin_altivec_vsum4sbs", ALTIVEC_BUILTIN_VSUM4SBS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsum4shs, "__builtin_altivec_vsum4shs", ALTIVEC_BUILTIN_VSUM4SHS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsum2sws, "__builtin_altivec_vsum2sws", ALTIVEC_BUILTIN_VSUM2SWS },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vsumsws, "__builtin_altivec_vsumsws", ALTIVEC_BUILTIN_VSUMSWS },
+  { MASK_ALTIVEC, CODE_FOR_xorv4si3, "__builtin_altivec_vxor", ALTIVEC_BUILTIN_VXOR },
+  { MASK_ALTIVEC, CODE_FOR_vector_copysignv4sf3, "__builtin_altivec_copysignfp", ALTIVEC_BUILTIN_COPYSIGN_V4SF },
+
+  { MASK_VSX, CODE_FOR_addv2df3, "__builtin_vsx_xvadddp", VSX_BUILTIN_XVADDDP },
+  { MASK_VSX, CODE_FOR_subv2df3, "__builtin_vsx_xvsubdp", VSX_BUILTIN_XVSUBDP },
+  { MASK_VSX, CODE_FOR_mulv2df3, "__builtin_vsx_xvmuldp", VSX_BUILTIN_XVMULDP },
+  { MASK_VSX, CODE_FOR_divv2df3, "__builtin_vsx_xvdivdp", VSX_BUILTIN_XVDIVDP },
+  { MASK_VSX, CODE_FOR_recipv2df3, "__builtin_vsx_xvrecipdivdp", VSX_BUILTIN_RECIP_V2DF },
+  { MASK_VSX, CODE_FOR_sminv2df3, "__builtin_vsx_xvmindp", VSX_BUILTIN_XVMINDP },
+  { MASK_VSX, CODE_FOR_smaxv2df3, "__builtin_vsx_xvmaxdp", VSX_BUILTIN_XVMAXDP },
+  { MASK_VSX, CODE_FOR_vsx_tdivv2df3_fe, "__builtin_vsx_xvtdivdp_fe", VSX_BUILTIN_XVTDIVDP_FE },
+  { MASK_VSX, CODE_FOR_vsx_tdivv2df3_fg, "__builtin_vsx_xvtdivdp_fg", VSX_BUILTIN_XVTDIVDP_FG },
+  { MASK_VSX, CODE_FOR_vector_eqv2df, "__builtin_vsx_xvcmpeqdp", VSX_BUILTIN_XVCMPEQDP },
+  { MASK_VSX, CODE_FOR_vector_gtv2df, "__builtin_vsx_xvcmpgtdp", VSX_BUILTIN_XVCMPGTDP },
+  { MASK_VSX, CODE_FOR_vector_gev2df, "__builtin_vsx_xvcmpgedp", VSX_BUILTIN_XVCMPGEDP },
+
+  { MASK_VSX, CODE_FOR_addv4sf3, "__builtin_vsx_xvaddsp", VSX_BUILTIN_XVADDSP },
+  { MASK_VSX, CODE_FOR_subv4sf3, "__builtin_vsx_xvsubsp", VSX_BUILTIN_XVSUBSP },
+  { MASK_VSX, CODE_FOR_mulv4sf3, "__builtin_vsx_xvmulsp", VSX_BUILTIN_XVMULSP },
+  { MASK_VSX, CODE_FOR_divv4sf3, "__builtin_vsx_xvdivsp", VSX_BUILTIN_XVDIVSP },
+  { MASK_VSX, CODE_FOR_recipv4sf3, "__builtin_vsx_xvrecipdivsp", VSX_BUILTIN_RECIP_V4SF },
+  { MASK_VSX, CODE_FOR_sminv4sf3, "__builtin_vsx_xvminsp", VSX_BUILTIN_XVMINSP },
+  { MASK_VSX, CODE_FOR_smaxv4sf3, "__builtin_vsx_xvmaxsp", VSX_BUILTIN_XVMAXSP },
+  { MASK_VSX, CODE_FOR_vsx_tdivv4sf3_fe, "__builtin_vsx_xvtdivsp_fe", VSX_BUILTIN_XVTDIVSP_FE },
+  { MASK_VSX, CODE_FOR_vsx_tdivv4sf3_fg, "__builtin_vsx_xvtdivsp_fg", VSX_BUILTIN_XVTDIVSP_FG },
+  { MASK_VSX, CODE_FOR_vector_eqv4sf, "__builtin_vsx_xvcmpeqsp", VSX_BUILTIN_XVCMPEQSP },
+  { MASK_VSX, CODE_FOR_vector_gtv4sf, "__builtin_vsx_xvcmpgtsp", VSX_BUILTIN_XVCMPGTSP },
+  { MASK_VSX, CODE_FOR_vector_gev4sf, "__builtin_vsx_xvcmpgesp", VSX_BUILTIN_XVCMPGESP },
+
+  { MASK_VSX, CODE_FOR_smindf3, "__builtin_vsx_xsmindp", VSX_BUILTIN_XSMINDP },
+  { MASK_VSX, CODE_FOR_smaxdf3, "__builtin_vsx_xsmaxdp", VSX_BUILTIN_XSMAXDP },
+  { MASK_VSX, CODE_FOR_vsx_tdivdf3_fe, "__builtin_vsx_xstdivdp_fe", VSX_BUILTIN_XSTDIVDP_FE },
+  { MASK_VSX, CODE_FOR_vsx_tdivdf3_fg, "__builtin_vsx_xstdivdp_fg", VSX_BUILTIN_XSTDIVDP_FG },
+  { MASK_VSX, CODE_FOR_vector_copysignv2df3, "__builtin_vsx_cpsgndp", VSX_BUILTIN_CPSGNDP },
+  { MASK_VSX, CODE_FOR_vector_copysignv4sf3, "__builtin_vsx_cpsgnsp", VSX_BUILTIN_CPSGNSP },
+
+  { MASK_VSX, CODE_FOR_vsx_concat_v2df, "__builtin_vsx_concat_2df", VSX_BUILTIN_CONCAT_2DF },
+  { MASK_VSX, CODE_FOR_vsx_concat_v2di, "__builtin_vsx_concat_2di", VSX_BUILTIN_CONCAT_2DI },
+  { MASK_VSX, CODE_FOR_vsx_splat_v2df, "__builtin_vsx_splat_2df", VSX_BUILTIN_SPLAT_2DF },
+  { MASK_VSX, CODE_FOR_vsx_splat_v2di, "__builtin_vsx_splat_2di", VSX_BUILTIN_SPLAT_2DI },
+  { MASK_VSX, CODE_FOR_vsx_xxmrghw_v4sf, "__builtin_vsx_xxmrghw", VSX_BUILTIN_XXMRGHW_4SF },
+  { MASK_VSX, CODE_FOR_vsx_xxmrghw_v4si, "__builtin_vsx_xxmrghw_4si", VSX_BUILTIN_XXMRGHW_4SI },
+  { MASK_VSX, CODE_FOR_vsx_xxmrglw_v4sf, "__builtin_vsx_xxmrglw", VSX_BUILTIN_XXMRGLW_4SF },
+  { MASK_VSX, CODE_FOR_vsx_xxmrglw_v4si, "__builtin_vsx_xxmrglw_4si", VSX_BUILTIN_XXMRGLW_4SI },
+  { MASK_VSX, CODE_FOR_vec_interleave_lowv2df, "__builtin_vsx_mergel_2df", VSX_BUILTIN_VEC_MERGEL_V2DF },
+  { MASK_VSX, CODE_FOR_vec_interleave_lowv2di, "__builtin_vsx_mergel_2di", VSX_BUILTIN_VEC_MERGEL_V2DI },
+  { MASK_VSX, CODE_FOR_vec_interleave_highv2df, "__builtin_vsx_mergeh_2df", VSX_BUILTIN_VEC_MERGEH_V2DF },
+  { MASK_VSX, CODE_FOR_vec_interleave_highv2di, "__builtin_vsx_mergeh_2di", VSX_BUILTIN_VEC_MERGEH_V2DI },
+
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduwm", ALTIVEC_BUILTIN_VEC_VADDUWM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduhm", ALTIVEC_BUILTIN_VEC_VADDUHM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddubm", ALTIVEC_BUILTIN_VEC_VADDUBM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_addc", ALTIVEC_BUILTIN_VEC_ADDC },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_adds", ALTIVEC_BUILTIN_VEC_ADDS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddsws", ALTIVEC_BUILTIN_VEC_VADDSWS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduws", ALTIVEC_BUILTIN_VEC_VADDUWS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddshs", ALTIVEC_BUILTIN_VEC_VADDSHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduhs", ALTIVEC_BUILTIN_VEC_VADDUHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddsbs", ALTIVEC_BUILTIN_VEC_VADDSBS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddubs", ALTIVEC_BUILTIN_VEC_VADDUBS },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_and", ALTIVEC_BUILTIN_VEC_AND },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_andc", ALTIVEC_BUILTIN_VEC_ANDC },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_avg", ALTIVEC_BUILTIN_VEC_AVG },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavgsw", ALTIVEC_BUILTIN_VEC_VAVGSW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavguw", ALTIVEC_BUILTIN_VEC_VAVGUW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavgsh", ALTIVEC_BUILTIN_VEC_VAVGSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavguh", ALTIVEC_BUILTIN_VEC_VAVGUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavgsb", ALTIVEC_BUILTIN_VEC_VAVGSB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavgub", ALTIVEC_BUILTIN_VEC_VAVGUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmpb", ALTIVEC_BUILTIN_VEC_CMPB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmpeq", ALTIVEC_BUILTIN_VEC_CMPEQ },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpeqfp", ALTIVEC_BUILTIN_VEC_VCMPEQFP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpequw", ALTIVEC_BUILTIN_VEC_VCMPEQUW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpequh", ALTIVEC_BUILTIN_VEC_VCMPEQUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpequb", ALTIVEC_BUILTIN_VEC_VCMPEQUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmpge", ALTIVEC_BUILTIN_VEC_CMPGE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmpgt", ALTIVEC_BUILTIN_VEC_CMPGT },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtfp", ALTIVEC_BUILTIN_VEC_VCMPGTFP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtsw", ALTIVEC_BUILTIN_VEC_VCMPGTSW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtuw", ALTIVEC_BUILTIN_VEC_VCMPGTUW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtsh", ALTIVEC_BUILTIN_VEC_VCMPGTSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtuh", ALTIVEC_BUILTIN_VEC_VCMPGTUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtsb", ALTIVEC_BUILTIN_VEC_VCMPGTSB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtub", ALTIVEC_BUILTIN_VEC_VCMPGTUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmple", ALTIVEC_BUILTIN_VEC_CMPLE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmplt", ALTIVEC_BUILTIN_VEC_CMPLT },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_copysign", ALTIVEC_BUILTIN_VEC_COPYSIGN },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_max", ALTIVEC_BUILTIN_VEC_MAX },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vmaxfp", ALTIVEC_BUILTIN_VEC_VMAXFP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsw", ALTIVEC_BUILTIN_VEC_VMAXSW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxuw", ALTIVEC_BUILTIN_VEC_VMAXUW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsh", ALTIVEC_BUILTIN_VEC_VMAXSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxuh", ALTIVEC_BUILTIN_VEC_VMAXUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsb", ALTIVEC_BUILTIN_VEC_VMAXSB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxub", ALTIVEC_BUILTIN_VEC_VMAXUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mergeh", ALTIVEC_BUILTIN_VEC_MERGEH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrghw", ALTIVEC_BUILTIN_VEC_VMRGHW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrghh", ALTIVEC_BUILTIN_VEC_VMRGHH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrghb", ALTIVEC_BUILTIN_VEC_VMRGHB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mergel", ALTIVEC_BUILTIN_VEC_MERGEL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrglw", ALTIVEC_BUILTIN_VEC_VMRGLW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrglh", ALTIVEC_BUILTIN_VEC_VMRGLH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrglb", ALTIVEC_BUILTIN_VEC_VMRGLB },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_min", ALTIVEC_BUILTIN_VEC_MIN },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vminfp", ALTIVEC_BUILTIN_VEC_VMINFP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminsw", ALTIVEC_BUILTIN_VEC_VMINSW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminuw", ALTIVEC_BUILTIN_VEC_VMINUW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminsh", ALTIVEC_BUILTIN_VEC_VMINSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminuh", ALTIVEC_BUILTIN_VEC_VMINUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminsb", ALTIVEC_BUILTIN_VEC_VMINSB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminub", ALTIVEC_BUILTIN_VEC_VMINUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mule", ALTIVEC_BUILTIN_VEC_MULE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmuleub", ALTIVEC_BUILTIN_VEC_VMULEUB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulesb", ALTIVEC_BUILTIN_VEC_VMULESB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmuleuh", ALTIVEC_BUILTIN_VEC_VMULEUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulesh", ALTIVEC_BUILTIN_VEC_VMULESH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mulo", ALTIVEC_BUILTIN_VEC_MULO },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulosh", ALTIVEC_BUILTIN_VEC_VMULOSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulouh", ALTIVEC_BUILTIN_VEC_VMULOUH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulosb", ALTIVEC_BUILTIN_VEC_VMULOSB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmuloub", ALTIVEC_BUILTIN_VEC_VMULOUB },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_nor", ALTIVEC_BUILTIN_VEC_NOR },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_or", ALTIVEC_BUILTIN_VEC_OR },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_pack", ALTIVEC_BUILTIN_VEC_PACK },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkuwum", ALTIVEC_BUILTIN_VEC_VPKUWUM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkuhum", ALTIVEC_BUILTIN_VEC_VPKUHUM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_packpx", ALTIVEC_BUILTIN_VEC_PACKPX },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_packs", ALTIVEC_BUILTIN_VEC_PACKS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkswss", ALTIVEC_BUILTIN_VEC_VPKSWSS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkuwus", ALTIVEC_BUILTIN_VEC_VPKUWUS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkshss", ALTIVEC_BUILTIN_VEC_VPKSHSS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkuhus", ALTIVEC_BUILTIN_VEC_VPKUHUS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_packsu", ALTIVEC_BUILTIN_VEC_PACKSU },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkswus", ALTIVEC_BUILTIN_VEC_VPKSWUS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkshus", ALTIVEC_BUILTIN_VEC_VPKSHUS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_recipdiv", ALTIVEC_BUILTIN_VEC_RECIP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_rl", ALTIVEC_BUILTIN_VEC_RL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vrlw", ALTIVEC_BUILTIN_VEC_VRLW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vrlh", ALTIVEC_BUILTIN_VEC_VRLH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vrlb", ALTIVEC_BUILTIN_VEC_VRLB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sl", ALTIVEC_BUILTIN_VEC_SL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vslw", ALTIVEC_BUILTIN_VEC_VSLW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vslh", ALTIVEC_BUILTIN_VEC_VSLH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vslb", ALTIVEC_BUILTIN_VEC_VSLB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sll", ALTIVEC_BUILTIN_VEC_SLL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_slo", ALTIVEC_BUILTIN_VEC_SLO },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sr", ALTIVEC_BUILTIN_VEC_SR },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsrw", ALTIVEC_BUILTIN_VEC_VSRW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsrh", ALTIVEC_BUILTIN_VEC_VSRH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsrb", ALTIVEC_BUILTIN_VEC_VSRB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sra", ALTIVEC_BUILTIN_VEC_SRA },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsraw", ALTIVEC_BUILTIN_VEC_VSRAW },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsrah", ALTIVEC_BUILTIN_VEC_VSRAH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsrab", ALTIVEC_BUILTIN_VEC_VSRAB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_srl", ALTIVEC_BUILTIN_VEC_SRL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sro", ALTIVEC_BUILTIN_VEC_SRO },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_sub", ALTIVEC_BUILTIN_VEC_SUB },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vsubfp", ALTIVEC_BUILTIN_VEC_VSUBFP },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubuwm", ALTIVEC_BUILTIN_VEC_VSUBUWM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubuhm", ALTIVEC_BUILTIN_VEC_VSUBUHM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsububm", ALTIVEC_BUILTIN_VEC_VSUBUBM },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_subc", ALTIVEC_BUILTIN_VEC_SUBC },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_subs", ALTIVEC_BUILTIN_VEC_SUBS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubsws", ALTIVEC_BUILTIN_VEC_VSUBSWS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubuws", ALTIVEC_BUILTIN_VEC_VSUBUWS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubshs", ALTIVEC_BUILTIN_VEC_VSUBSHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubuhs", ALTIVEC_BUILTIN_VEC_VSUBUHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubsbs", ALTIVEC_BUILTIN_VEC_VSUBSBS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsububs", ALTIVEC_BUILTIN_VEC_VSUBUBS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sum4s", ALTIVEC_BUILTIN_VEC_SUM4S },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsum4shs", ALTIVEC_BUILTIN_VEC_VSUM4SHS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsum4sbs", ALTIVEC_BUILTIN_VEC_VSUM4SBS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsum4ubs", ALTIVEC_BUILTIN_VEC_VSUM4UBS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sum2s", ALTIVEC_BUILTIN_VEC_SUM2S },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sums", ALTIVEC_BUILTIN_VEC_SUMS },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_xor", ALTIVEC_BUILTIN_VEC_XOR },
+
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_mul", VSX_BUILTIN_VEC_MUL },
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_div", VSX_BUILTIN_VEC_DIV },
+
+  { 0, CODE_FOR_paired_divv2sf3, "__builtin_paired_divv2sf3", PAIRED_BUILTIN_DIVV2SF3 },
+  { 0, CODE_FOR_paired_addv2sf3, "__builtin_paired_addv2sf3", PAIRED_BUILTIN_ADDV2SF3 },
+  { 0, CODE_FOR_paired_subv2sf3, "__builtin_paired_subv2sf3", PAIRED_BUILTIN_SUBV2SF3 },
+  { 0, CODE_FOR_paired_mulv2sf3, "__builtin_paired_mulv2sf3", PAIRED_BUILTIN_MULV2SF3 },
+  { 0, CODE_FOR_paired_muls0, "__builtin_paired_muls0", PAIRED_BUILTIN_MULS0 },
+  { 0, CODE_FOR_paired_muls1, "__builtin_paired_muls1", PAIRED_BUILTIN_MULS1 },
+  { 0, CODE_FOR_paired_merge00, "__builtin_paired_merge00", PAIRED_BUILTIN_MERGE00 },
+  { 0, CODE_FOR_paired_merge01, "__builtin_paired_merge01", PAIRED_BUILTIN_MERGE01 },
+  { 0, CODE_FOR_paired_merge10, "__builtin_paired_merge10", PAIRED_BUILTIN_MERGE10 },
+  { 0, CODE_FOR_paired_merge11, "__builtin_paired_merge11", PAIRED_BUILTIN_MERGE11 },
+
+  /* Place holder, leave as first spe builtin.  */
+  { 0, CODE_FOR_addv2si3, "__builtin_spe_evaddw", SPE_BUILTIN_EVADDW },
+  { 0, CODE_FOR_andv2si3, "__builtin_spe_evand", SPE_BUILTIN_EVAND },
+  { 0, CODE_FOR_spe_evandc, "__builtin_spe_evandc", SPE_BUILTIN_EVANDC },
+  { 0, CODE_FOR_divv2si3, "__builtin_spe_evdivws", SPE_BUILTIN_EVDIVWS },
+  { 0, CODE_FOR_spe_evdivwu, "__builtin_spe_evdivwu", SPE_BUILTIN_EVDIVWU },
+  { 0, CODE_FOR_spe_eveqv, "__builtin_spe_eveqv", SPE_BUILTIN_EVEQV },
+  { 0, CODE_FOR_spe_evfsadd, "__builtin_spe_evfsadd", SPE_BUILTIN_EVFSADD },
+  { 0, CODE_FOR_spe_evfsdiv, "__builtin_spe_evfsdiv", SPE_BUILTIN_EVFSDIV },
+  { 0, CODE_FOR_spe_evfsmul, "__builtin_spe_evfsmul", SPE_BUILTIN_EVFSMUL },
+  { 0, CODE_FOR_spe_evfssub, "__builtin_spe_evfssub", SPE_BUILTIN_EVFSSUB },
+  { 0, CODE_FOR_spe_evmergehi, "__builtin_spe_evmergehi", SPE_BUILTIN_EVMERGEHI },
+  { 0, CODE_FOR_spe_evmergehilo, "__builtin_spe_evmergehilo", SPE_BUILTIN_EVMERGEHILO },
+  { 0, CODE_FOR_spe_evmergelo, "__builtin_spe_evmergelo", SPE_BUILTIN_EVMERGELO },
+  { 0, CODE_FOR_spe_evmergelohi, "__builtin_spe_evmergelohi", SPE_BUILTIN_EVMERGELOHI },
+  { 0, CODE_FOR_spe_evmhegsmfaa, "__builtin_spe_evmhegsmfaa", SPE_BUILTIN_EVMHEGSMFAA },
+  { 0, CODE_FOR_spe_evmhegsmfan, "__builtin_spe_evmhegsmfan", SPE_BUILTIN_EVMHEGSMFAN },
+  { 0, CODE_FOR_spe_evmhegsmiaa, "__builtin_spe_evmhegsmiaa", SPE_BUILTIN_EVMHEGSMIAA },
+  { 0, CODE_FOR_spe_evmhegsmian, "__builtin_spe_evmhegsmian", SPE_BUILTIN_EVMHEGSMIAN },
+  { 0, CODE_FOR_spe_evmhegumiaa, "__builtin_spe_evmhegumiaa", SPE_BUILTIN_EVMHEGUMIAA },
+  { 0, CODE_FOR_spe_evmhegumian, "__builtin_spe_evmhegumian", SPE_BUILTIN_EVMHEGUMIAN },
+  { 0, CODE_FOR_spe_evmhesmf, "__builtin_spe_evmhesmf", SPE_BUILTIN_EVMHESMF },
+  { 0, CODE_FOR_spe_evmhesmfa, "__builtin_spe_evmhesmfa", SPE_BUILTIN_EVMHESMFA },
+  { 0, CODE_FOR_spe_evmhesmfaaw, "__builtin_spe_evmhesmfaaw", SPE_BUILTIN_EVMHESMFAAW },
+  { 0, CODE_FOR_spe_evmhesmfanw, "__builtin_spe_evmhesmfanw", SPE_BUILTIN_EVMHESMFANW },
+  { 0, CODE_FOR_spe_evmhesmi, "__builtin_spe_evmhesmi", SPE_BUILTIN_EVMHESMI },
+  { 0, CODE_FOR_spe_evmhesmia, "__builtin_spe_evmhesmia", SPE_BUILTIN_EVMHESMIA },
+  { 0, CODE_FOR_spe_evmhesmiaaw, "__builtin_spe_evmhesmiaaw", SPE_BUILTIN_EVMHESMIAAW },
+  { 0, CODE_FOR_spe_evmhesmianw, "__builtin_spe_evmhesmianw", SPE_BUILTIN_EVMHESMIANW },
+  { 0, CODE_FOR_spe_evmhessf, "__builtin_spe_evmhessf", SPE_BUILTIN_EVMHESSF },
+  { 0, CODE_FOR_spe_evmhessfa, "__builtin_spe_evmhessfa", SPE_BUILTIN_EVMHESSFA },
+  { 0, CODE_FOR_spe_evmhessfaaw, "__builtin_spe_evmhessfaaw", SPE_BUILTIN_EVMHESSFAAW },
+  { 0, CODE_FOR_spe_evmhessfanw, "__builtin_spe_evmhessfanw", SPE_BUILTIN_EVMHESSFANW },
+  { 0, CODE_FOR_spe_evmhessiaaw, "__builtin_spe_evmhessiaaw", SPE_BUILTIN_EVMHESSIAAW },
+  { 0, CODE_FOR_spe_evmhessianw, "__builtin_spe_evmhessianw", SPE_BUILTIN_EVMHESSIANW },
+  { 0, CODE_FOR_spe_evmheumi, "__builtin_spe_evmheumi", SPE_BUILTIN_EVMHEUMI },
+  { 0, CODE_FOR_spe_evmheumia, "__builtin_spe_evmheumia", SPE_BUILTIN_EVMHEUMIA },
+  { 0, CODE_FOR_spe_evmheumiaaw, "__builtin_spe_evmheumiaaw", SPE_BUILTIN_EVMHEUMIAAW },
+  { 0, CODE_FOR_spe_evmheumianw, "__builtin_spe_evmheumianw", SPE_BUILTIN_EVMHEUMIANW },
+  { 0, CODE_FOR_spe_evmheusiaaw, "__builtin_spe_evmheusiaaw", SPE_BUILTIN_EVMHEUSIAAW },
+  { 0, CODE_FOR_spe_evmheusianw, "__builtin_spe_evmheusianw", SPE_BUILTIN_EVMHEUSIANW },
+  { 0, CODE_FOR_spe_evmhogsmfaa, "__builtin_spe_evmhogsmfaa", SPE_BUILTIN_EVMHOGSMFAA },
+  { 0, CODE_FOR_spe_evmhogsmfan, "__builtin_spe_evmhogsmfan", SPE_BUILTIN_EVMHOGSMFAN },
+  { 0, CODE_FOR_spe_evmhogsmiaa, "__builtin_spe_evmhogsmiaa", SPE_BUILTIN_EVMHOGSMIAA },
+  { 0, CODE_FOR_spe_evmhogsmian, "__builtin_spe_evmhogsmian", SPE_BUILTIN_EVMHOGSMIAN },
+  { 0, CODE_FOR_spe_evmhogumiaa, "__builtin_spe_evmhogumiaa", SPE_BUILTIN_EVMHOGUMIAA },
+  { 0, CODE_FOR_spe_evmhogumian, "__builtin_spe_evmhogumian", SPE_BUILTIN_EVMHOGUMIAN },
+  { 0, CODE_FOR_spe_evmhosmf, "__builtin_spe_evmhosmf", SPE_BUILTIN_EVMHOSMF },
+  { 0, CODE_FOR_spe_evmhosmfa, "__builtin_spe_evmhosmfa", SPE_BUILTIN_EVMHOSMFA },
+  { 0, CODE_FOR_spe_evmhosmfaaw, "__builtin_spe_evmhosmfaaw", SPE_BUILTIN_EVMHOSMFAAW },
+  { 0, CODE_FOR_spe_evmhosmfanw, "__builtin_spe_evmhosmfanw", SPE_BUILTIN_EVMHOSMFANW },
+  { 0, CODE_FOR_spe_evmhosmi, "__builtin_spe_evmhosmi", SPE_BUILTIN_EVMHOSMI },
+  { 0, CODE_FOR_spe_evmhosmia, "__builtin_spe_evmhosmia", SPE_BUILTIN_EVMHOSMIA },
+  { 0, CODE_FOR_spe_evmhosmiaaw, "__builtin_spe_evmhosmiaaw", SPE_BUILTIN_EVMHOSMIAAW },
+  { 0, CODE_FOR_spe_evmhosmianw, "__builtin_spe_evmhosmianw", SPE_BUILTIN_EVMHOSMIANW },
+  { 0, CODE_FOR_spe_evmhossf, "__builtin_spe_evmhossf", SPE_BUILTIN_EVMHOSSF },
+  { 0, CODE_FOR_spe_evmhossfa, "__builtin_spe_evmhossfa", SPE_BUILTIN_EVMHOSSFA },
+  { 0, CODE_FOR_spe_evmhossfaaw, "__builtin_spe_evmhossfaaw", SPE_BUILTIN_EVMHOSSFAAW },
+  { 0, CODE_FOR_spe_evmhossfanw, "__builtin_spe_evmhossfanw", SPE_BUILTIN_EVMHOSSFANW },
+  { 0, CODE_FOR_spe_evmhossiaaw, "__builtin_spe_evmhossiaaw", SPE_BUILTIN_EVMHOSSIAAW },
+  { 0, CODE_FOR_spe_evmhossianw, "__builtin_spe_evmhossianw", SPE_BUILTIN_EVMHOSSIANW },
+  { 0, CODE_FOR_spe_evmhoumi, "__builtin_spe_evmhoumi", SPE_BUILTIN_EVMHOUMI },
+  { 0, CODE_FOR_spe_evmhoumia, "__builtin_spe_evmhoumia", SPE_BUILTIN_EVMHOUMIA },
+  { 0, CODE_FOR_spe_evmhoumiaaw, "__builtin_spe_evmhoumiaaw", SPE_BUILTIN_EVMHOUMIAAW },
+  { 0, CODE_FOR_spe_evmhoumianw, "__builtin_spe_evmhoumianw", SPE_BUILTIN_EVMHOUMIANW },
+  { 0, CODE_FOR_spe_evmhousiaaw, "__builtin_spe_evmhousiaaw", SPE_BUILTIN_EVMHOUSIAAW },
+  { 0, CODE_FOR_spe_evmhousianw, "__builtin_spe_evmhousianw", SPE_BUILTIN_EVMHOUSIANW },
+  { 0, CODE_FOR_spe_evmwhsmf, "__builtin_spe_evmwhsmf", SPE_BUILTIN_EVMWHSMF },
+  { 0, CODE_FOR_spe_evmwhsmfa, "__builtin_spe_evmwhsmfa", SPE_BUILTIN_EVMWHSMFA },
+  { 0, CODE_FOR_spe_evmwhsmi, "__builtin_spe_evmwhsmi", SPE_BUILTIN_EVMWHSMI },
+  { 0, CODE_FOR_spe_evmwhsmia, "__builtin_spe_evmwhsmia", SPE_BUILTIN_EVMWHSMIA },
+  { 0, CODE_FOR_spe_evmwhssf, "__builtin_spe_evmwhssf", SPE_BUILTIN_EVMWHSSF },
+  { 0, CODE_FOR_spe_evmwhssfa, "__builtin_spe_evmwhssfa", SPE_BUILTIN_EVMWHSSFA },
+  { 0, CODE_FOR_spe_evmwhumi, "__builtin_spe_evmwhumi", SPE_BUILTIN_EVMWHUMI },
+  { 0, CODE_FOR_spe_evmwhumia, "__builtin_spe_evmwhumia", SPE_BUILTIN_EVMWHUMIA },
+  { 0, CODE_FOR_spe_evmwlsmiaaw, "__builtin_spe_evmwlsmiaaw", SPE_BUILTIN_EVMWLSMIAAW },
+  { 0, CODE_FOR_spe_evmwlsmianw, "__builtin_spe_evmwlsmianw", SPE_BUILTIN_EVMWLSMIANW },
+  { 0, CODE_FOR_spe_evmwlssiaaw, "__builtin_spe_evmwlssiaaw", SPE_BUILTIN_EVMWLSSIAAW },
+  { 0, CODE_FOR_spe_evmwlssianw, "__builtin_spe_evmwlssianw", SPE_BUILTIN_EVMWLSSIANW },
+  { 0, CODE_FOR_spe_evmwlumi, "__builtin_spe_evmwlumi", SPE_BUILTIN_EVMWLUMI },
+  { 0, CODE_FOR_spe_evmwlumia, "__builtin_spe_evmwlumia", SPE_BUILTIN_EVMWLUMIA },
+  { 0, CODE_FOR_spe_evmwlumiaaw, "__builtin_spe_evmwlumiaaw", SPE_BUILTIN_EVMWLUMIAAW },
+  { 0, CODE_FOR_spe_evmwlumianw, "__builtin_spe_evmwlumianw", SPE_BUILTIN_EVMWLUMIANW },
+  { 0, CODE_FOR_spe_evmwlusiaaw, "__builtin_spe_evmwlusiaaw", SPE_BUILTIN_EVMWLUSIAAW },
+  { 0, CODE_FOR_spe_evmwlusianw, "__builtin_spe_evmwlusianw", SPE_BUILTIN_EVMWLUSIANW },
+  { 0, CODE_FOR_spe_evmwsmf, "__builtin_spe_evmwsmf", SPE_BUILTIN_EVMWSMF },
+  { 0, CODE_FOR_spe_evmwsmfa, "__builtin_spe_evmwsmfa", SPE_BUILTIN_EVMWSMFA },
+  { 0, CODE_FOR_spe_evmwsmfaa, "__builtin_spe_evmwsmfaa", SPE_BUILTIN_EVMWSMFAA },
+  { 0, CODE_FOR_spe_evmwsmfan, "__builtin_spe_evmwsmfan", SPE_BUILTIN_EVMWSMFAN },
+  { 0, CODE_FOR_spe_evmwsmi, "__builtin_spe_evmwsmi", SPE_BUILTIN_EVMWSMI },
+  { 0, CODE_FOR_spe_evmwsmia, "__builtin_spe_evmwsmia", SPE_BUILTIN_EVMWSMIA },
+  { 0, CODE_FOR_spe_evmwsmiaa, "__builtin_spe_evmwsmiaa", SPE_BUILTIN_EVMWSMIAA },
+  { 0, CODE_FOR_spe_evmwsmian, "__builtin_spe_evmwsmian", SPE_BUILTIN_EVMWSMIAN },
+  { 0, CODE_FOR_spe_evmwssf, "__builtin_spe_evmwssf", SPE_BUILTIN_EVMWSSF },
+  { 0, CODE_FOR_spe_evmwssfa, "__builtin_spe_evmwssfa", SPE_BUILTIN_EVMWSSFA },
+  { 0, CODE_FOR_spe_evmwssfaa, "__builtin_spe_evmwssfaa", SPE_BUILTIN_EVMWSSFAA },
+  { 0, CODE_FOR_spe_evmwssfan, "__builtin_spe_evmwssfan", SPE_BUILTIN_EVMWSSFAN },
+  { 0, CODE_FOR_spe_evmwumi, "__builtin_spe_evmwumi", SPE_BUILTIN_EVMWUMI },
+  { 0, CODE_FOR_spe_evmwumia, "__builtin_spe_evmwumia", SPE_BUILTIN_EVMWUMIA },
+  { 0, CODE_FOR_spe_evmwumiaa, "__builtin_spe_evmwumiaa", SPE_BUILTIN_EVMWUMIAA },
+  { 0, CODE_FOR_spe_evmwumian, "__builtin_spe_evmwumian", SPE_BUILTIN_EVMWUMIAN },
+  { 0, CODE_FOR_spe_evnand, "__builtin_spe_evnand", SPE_BUILTIN_EVNAND },
+  { 0, CODE_FOR_spe_evnor, "__builtin_spe_evnor", SPE_BUILTIN_EVNOR },
+  { 0, CODE_FOR_spe_evor, "__builtin_spe_evor", SPE_BUILTIN_EVOR },
+  { 0, CODE_FOR_spe_evorc, "__builtin_spe_evorc", SPE_BUILTIN_EVORC },
+  { 0, CODE_FOR_spe_evrlw, "__builtin_spe_evrlw", SPE_BUILTIN_EVRLW },
+  { 0, CODE_FOR_spe_evslw, "__builtin_spe_evslw", SPE_BUILTIN_EVSLW },
+  { 0, CODE_FOR_spe_evsrws, "__builtin_spe_evsrws", SPE_BUILTIN_EVSRWS },
+  { 0, CODE_FOR_spe_evsrwu, "__builtin_spe_evsrwu", SPE_BUILTIN_EVSRWU },
+  { 0, CODE_FOR_subv2si3, "__builtin_spe_evsubfw", SPE_BUILTIN_EVSUBFW },
+
+  /* SPE binary operations expecting a 5-bit unsigned literal.  */
+  { 0, CODE_FOR_spe_evaddiw, "__builtin_spe_evaddiw", SPE_BUILTIN_EVADDIW },
+
+  { 0, CODE_FOR_spe_evrlwi, "__builtin_spe_evrlwi", SPE_BUILTIN_EVRLWI },
+  { 0, CODE_FOR_spe_evslwi, "__builtin_spe_evslwi", SPE_BUILTIN_EVSLWI },
+  { 0, CODE_FOR_spe_evsrwis, "__builtin_spe_evsrwis", SPE_BUILTIN_EVSRWIS },
+  { 0, CODE_FOR_spe_evsrwiu, "__builtin_spe_evsrwiu", SPE_BUILTIN_EVSRWIU },
+  { 0, CODE_FOR_spe_evsubifw, "__builtin_spe_evsubifw", SPE_BUILTIN_EVSUBIFW },
+  { 0, CODE_FOR_spe_evmwhssfaa, "__builtin_spe_evmwhssfaa", SPE_BUILTIN_EVMWHSSFAA },
+  { 0, CODE_FOR_spe_evmwhssmaa, "__builtin_spe_evmwhssmaa", SPE_BUILTIN_EVMWHSSMAA },
+  { 0, CODE_FOR_spe_evmwhsmfaa, "__builtin_spe_evmwhsmfaa", SPE_BUILTIN_EVMWHSMFAA },
+  { 0, CODE_FOR_spe_evmwhsmiaa, "__builtin_spe_evmwhsmiaa", SPE_BUILTIN_EVMWHSMIAA },
+  { 0, CODE_FOR_spe_evmwhusiaa, "__builtin_spe_evmwhusiaa", SPE_BUILTIN_EVMWHUSIAA },
+  { 0, CODE_FOR_spe_evmwhumiaa, "__builtin_spe_evmwhumiaa", SPE_BUILTIN_EVMWHUMIAA },
+  { 0, CODE_FOR_spe_evmwhssfan, "__builtin_spe_evmwhssfan", SPE_BUILTIN_EVMWHSSFAN },
+  { 0, CODE_FOR_spe_evmwhssian, "__builtin_spe_evmwhssian", SPE_BUILTIN_EVMWHSSIAN },
+  { 0, CODE_FOR_spe_evmwhsmfan, "__builtin_spe_evmwhsmfan", SPE_BUILTIN_EVMWHSMFAN },
+  { 0, CODE_FOR_spe_evmwhsmian, "__builtin_spe_evmwhsmian", SPE_BUILTIN_EVMWHSMIAN },
+  { 0, CODE_FOR_spe_evmwhusian, "__builtin_spe_evmwhusian", SPE_BUILTIN_EVMWHUSIAN },
+  { 0, CODE_FOR_spe_evmwhumian, "__builtin_spe_evmwhumian", SPE_BUILTIN_EVMWHUMIAN },
+  { 0, CODE_FOR_spe_evmwhgssfaa, "__builtin_spe_evmwhgssfaa", SPE_BUILTIN_EVMWHGSSFAA },
+  { 0, CODE_FOR_spe_evmwhgsmfaa, "__builtin_spe_evmwhgsmfaa", SPE_BUILTIN_EVMWHGSMFAA },
+  { 0, CODE_FOR_spe_evmwhgsmiaa, "__builtin_spe_evmwhgsmiaa", SPE_BUILTIN_EVMWHGSMIAA },
+  { 0, CODE_FOR_spe_evmwhgumiaa, "__builtin_spe_evmwhgumiaa", SPE_BUILTIN_EVMWHGUMIAA },
+  { 0, CODE_FOR_spe_evmwhgssfan, "__builtin_spe_evmwhgssfan", SPE_BUILTIN_EVMWHGSSFAN },
+  { 0, CODE_FOR_spe_evmwhgsmfan, "__builtin_spe_evmwhgsmfan", SPE_BUILTIN_EVMWHGSMFAN },
+  { 0, CODE_FOR_spe_evmwhgsmian, "__builtin_spe_evmwhgsmian", SPE_BUILTIN_EVMWHGSMIAN },
+  { 0, CODE_FOR_spe_evmwhgumian, "__builtin_spe_evmwhgumian", SPE_BUILTIN_EVMWHGUMIAN },
+  { 0, CODE_FOR_spe_brinc, "__builtin_spe_brinc", SPE_BUILTIN_BRINC },
+
+  /* Place-holder.  Leave as last binary SPE builtin.  */
+  { 0, CODE_FOR_xorv2si3, "__builtin_spe_evxor", SPE_BUILTIN_EVXOR }
+};
+
+/* AltiVec predicates.  */
+
+struct builtin_description_predicates
+{
+  const unsigned int mask;
+  const enum insn_code icode;
+  const char *const name;
+  const enum rs6000_builtins code;
+};
+
+static const struct builtin_description_predicates bdesc_altivec_preds[] =
+{
+  { MASK_ALTIVEC, CODE_FOR_altivec_vcmpbfp_p, "__builtin_altivec_vcmpbfp_p",
+    ALTIVEC_BUILTIN_VCMPBFP_P },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_vector_eq_v4sf_p,
+    "__builtin_altivec_vcmpeqfp_p", ALTIVEC_BUILTIN_VCMPEQFP_P },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_vector_ge_v4sf_p,
+    "__builtin_altivec_vcmpgefp_p", ALTIVEC_BUILTIN_VCMPGEFP_P },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_vector_gt_v4sf_p,
+    "__builtin_altivec_vcmpgtfp_p", ALTIVEC_BUILTIN_VCMPGTFP_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_eq_v4si_p, "__builtin_altivec_vcmpequw_p",
+    ALTIVEC_BUILTIN_VCMPEQUW_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_gt_v4si_p, "__builtin_altivec_vcmpgtsw_p",
+    ALTIVEC_BUILTIN_VCMPGTSW_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtu_v4si_p, "__builtin_altivec_vcmpgtuw_p",
+    ALTIVEC_BUILTIN_VCMPGTUW_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_eq_v8hi_p, "__builtin_altivec_vcmpequh_p",
+    ALTIVEC_BUILTIN_VCMPEQUH_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_gt_v8hi_p, "__builtin_altivec_vcmpgtsh_p",
+    ALTIVEC_BUILTIN_VCMPGTSH_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtu_v8hi_p, "__builtin_altivec_vcmpgtuh_p",
+    ALTIVEC_BUILTIN_VCMPGTUH_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_eq_v16qi_p, "__builtin_altivec_vcmpequb_p",
+    ALTIVEC_BUILTIN_VCMPEQUB_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_gt_v16qi_p, "__builtin_altivec_vcmpgtsb_p",
+    ALTIVEC_BUILTIN_VCMPGTSB_P },
+  { MASK_ALTIVEC, CODE_FOR_vector_gtu_v16qi_p, "__builtin_altivec_vcmpgtub_p",
+    ALTIVEC_BUILTIN_VCMPGTUB_P },
+
+  { MASK_VSX, CODE_FOR_vector_eq_v4sf_p, "__builtin_vsx_xvcmpeqsp_p",
+    VSX_BUILTIN_XVCMPEQSP_P },
+  { MASK_VSX, CODE_FOR_vector_ge_v4sf_p, "__builtin_vsx_xvcmpgesp_p",
+    VSX_BUILTIN_XVCMPGESP_P },
+  { MASK_VSX, CODE_FOR_vector_gt_v4sf_p, "__builtin_vsx_xvcmpgtsp_p",
+    VSX_BUILTIN_XVCMPGTSP_P },
+  { MASK_VSX, CODE_FOR_vector_eq_v2df_p, "__builtin_vsx_xvcmpeqdp_p",
+    VSX_BUILTIN_XVCMPEQDP_P },
+  { MASK_VSX, CODE_FOR_vector_ge_v2df_p, "__builtin_vsx_xvcmpgedp_p",
+    VSX_BUILTIN_XVCMPGEDP_P },
+  { MASK_VSX, CODE_FOR_vector_gt_v2df_p, "__builtin_vsx_xvcmpgtdp_p",
+    VSX_BUILTIN_XVCMPGTDP_P },
+
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpeq_p",
+    ALTIVEC_BUILTIN_VCMPEQ_P },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpgt_p",
+    ALTIVEC_BUILTIN_VCMPGT_P },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpge_p",
+    ALTIVEC_BUILTIN_VCMPGE_P }
+};
+
+/* SPE predicates.  */
+static struct builtin_description bdesc_spe_predicates[] =
+{
+  /* Place-holder.  Leave as first.  */
+  { 0, CODE_FOR_spe_evcmpeq, "__builtin_spe_evcmpeq", SPE_BUILTIN_EVCMPEQ },
+  { 0, CODE_FOR_spe_evcmpgts, "__builtin_spe_evcmpgts", SPE_BUILTIN_EVCMPGTS },
+  { 0, CODE_FOR_spe_evcmpgtu, "__builtin_spe_evcmpgtu", SPE_BUILTIN_EVCMPGTU },
+  { 0, CODE_FOR_spe_evcmplts, "__builtin_spe_evcmplts", SPE_BUILTIN_EVCMPLTS },
+  { 0, CODE_FOR_spe_evcmpltu, "__builtin_spe_evcmpltu", SPE_BUILTIN_EVCMPLTU },
+  { 0, CODE_FOR_spe_evfscmpeq, "__builtin_spe_evfscmpeq", SPE_BUILTIN_EVFSCMPEQ },
+  { 0, CODE_FOR_spe_evfscmpgt, "__builtin_spe_evfscmpgt", SPE_BUILTIN_EVFSCMPGT },
+  { 0, CODE_FOR_spe_evfscmplt, "__builtin_spe_evfscmplt", SPE_BUILTIN_EVFSCMPLT },
+  { 0, CODE_FOR_spe_evfststeq, "__builtin_spe_evfststeq", SPE_BUILTIN_EVFSTSTEQ },
+  { 0, CODE_FOR_spe_evfststgt, "__builtin_spe_evfststgt", SPE_BUILTIN_EVFSTSTGT },
+  /* Place-holder.  Leave as last.  */
+  { 0, CODE_FOR_spe_evfststlt, "__builtin_spe_evfststlt", SPE_BUILTIN_EVFSTSTLT },
+};
+
+/* SPE evsel predicates.  */
+static struct builtin_description bdesc_spe_evsel[] =
+{
+  /* Place-holder.  Leave as first.  */
+  { 0, CODE_FOR_spe_evcmpgts, "__builtin_spe_evsel_gts", SPE_BUILTIN_EVSEL_CMPGTS },
+  { 0, CODE_FOR_spe_evcmpgtu, "__builtin_spe_evsel_gtu", SPE_BUILTIN_EVSEL_CMPGTU },
+  { 0, CODE_FOR_spe_evcmplts, "__builtin_spe_evsel_lts", SPE_BUILTIN_EVSEL_CMPLTS },
+  { 0, CODE_FOR_spe_evcmpltu, "__builtin_spe_evsel_ltu", SPE_BUILTIN_EVSEL_CMPLTU },
+  { 0, CODE_FOR_spe_evcmpeq, "__builtin_spe_evsel_eq", SPE_BUILTIN_EVSEL_CMPEQ },
+  { 0, CODE_FOR_spe_evfscmpgt, "__builtin_spe_evsel_fsgt", SPE_BUILTIN_EVSEL_FSCMPGT },
+  { 0, CODE_FOR_spe_evfscmplt, "__builtin_spe_evsel_fslt", SPE_BUILTIN_EVSEL_FSCMPLT },
+  { 0, CODE_FOR_spe_evfscmpeq, "__builtin_spe_evsel_fseq", SPE_BUILTIN_EVSEL_FSCMPEQ },
+  { 0, CODE_FOR_spe_evfststgt, "__builtin_spe_evsel_fststgt", SPE_BUILTIN_EVSEL_FSTSTGT },
+  { 0, CODE_FOR_spe_evfststlt, "__builtin_spe_evsel_fststlt", SPE_BUILTIN_EVSEL_FSTSTLT },
+  /* Place-holder.  Leave as last.  */
+  { 0, CODE_FOR_spe_evfststeq, "__builtin_spe_evsel_fststeq", SPE_BUILTIN_EVSEL_FSTSTEQ },
+};
+
+/* PAIRED predicates.  */
+static const struct builtin_description bdesc_paired_preds[] =
+{
+  /* Place-holder.  Leave as first.  */
+  { 0, CODE_FOR_paired_cmpu0, "__builtin_paired_cmpu0", PAIRED_BUILTIN_CMPU0 },
+  /* Place-holder.  Leave as last.  */
+  { 0, CODE_FOR_paired_cmpu1, "__builtin_paired_cmpu1", PAIRED_BUILTIN_CMPU1 },
+};
+
+/* ABS* operations.  */
+
+static const struct builtin_description bdesc_abs[] =
+{
+  { MASK_ALTIVEC, CODE_FOR_absv4si2, "__builtin_altivec_abs_v4si", ALTIVEC_BUILTIN_ABS_V4SI },
+  { MASK_ALTIVEC, CODE_FOR_absv8hi2, "__builtin_altivec_abs_v8hi", ALTIVEC_BUILTIN_ABS_V8HI },
+  { MASK_ALTIVEC, CODE_FOR_absv4sf2, "__builtin_altivec_abs_v4sf", ALTIVEC_BUILTIN_ABS_V4SF },
+  { MASK_ALTIVEC, CODE_FOR_absv16qi2, "__builtin_altivec_abs_v16qi", ALTIVEC_BUILTIN_ABS_V16QI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_abss_v4si, "__builtin_altivec_abss_v4si", ALTIVEC_BUILTIN_ABSS_V4SI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_abss_v8hi, "__builtin_altivec_abss_v8hi", ALTIVEC_BUILTIN_ABSS_V8HI },
+  { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI },
+  { MASK_VSX, CODE_FOR_absv2df2, "__builtin_vsx_xvabsdp", VSX_BUILTIN_XVABSDP },
+  { MASK_VSX, CODE_FOR_vsx_nabsv2df2, "__builtin_vsx_xvnabsdp", VSX_BUILTIN_XVNABSDP },
+  { MASK_VSX, CODE_FOR_absv4sf2, "__builtin_vsx_xvabssp", VSX_BUILTIN_XVABSSP },
+  { MASK_VSX, CODE_FOR_vsx_nabsv4sf2, "__builtin_vsx_xvnabssp", VSX_BUILTIN_XVNABSSP },
+};
+
+/* Simple unary operations: VECb = foo (unsigned literal) or VECb =
+   foo (VECa).  */
+
+static struct builtin_description bdesc_1arg[] =
+{
+  { MASK_ALTIVEC, CODE_FOR_altivec_vexptefp, "__builtin_altivec_vexptefp", ALTIVEC_BUILTIN_VEXPTEFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vlogefp, "__builtin_altivec_vlogefp", ALTIVEC_BUILTIN_VLOGEFP },
+  { MASK_ALTIVEC, CODE_FOR_rev4sf2, "__builtin_altivec_vrefp", ALTIVEC_BUILTIN_VREFP },
+  { MASK_ALTIVEC, CODE_FOR_vector_floorv4sf2, "__builtin_altivec_vrfim", ALTIVEC_BUILTIN_VRFIM },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vrfin, "__builtin_altivec_vrfin", ALTIVEC_BUILTIN_VRFIN },
+  { MASK_ALTIVEC, CODE_FOR_vector_ceilv4sf2, "__builtin_altivec_vrfip", ALTIVEC_BUILTIN_VRFIP },
+  { MASK_ALTIVEC, CODE_FOR_vector_btruncv4sf2, "__builtin_altivec_vrfiz", ALTIVEC_BUILTIN_VRFIZ },
+  { MASK_ALTIVEC, CODE_FOR_rsqrtv4sf2, "__builtin_altivec_vrsqrtfp", ALTIVEC_BUILTIN_VRSQRTFP },
+  { MASK_ALTIVEC, CODE_FOR_rsqrtev4sf2, "__builtin_altivec_vrsqrtefp", ALTIVEC_BUILTIN_VRSQRTEFP },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vspltisb, "__builtin_altivec_vspltisb", ALTIVEC_BUILTIN_VSPLTISB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vspltish, "__builtin_altivec_vspltish", ALTIVEC_BUILTIN_VSPLTISH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vspltisw, "__builtin_altivec_vspltisw", ALTIVEC_BUILTIN_VSPLTISW },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vupkhsb, "__builtin_altivec_vupkhsb", ALTIVEC_BUILTIN_VUPKHSB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vupkhpx, "__builtin_altivec_vupkhpx", ALTIVEC_BUILTIN_VUPKHPX },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vupkhsh, "__builtin_altivec_vupkhsh", ALTIVEC_BUILTIN_VUPKHSH },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vupklsb, "__builtin_altivec_vupklsb", ALTIVEC_BUILTIN_VUPKLSB },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vupklpx, "__builtin_altivec_vupklpx", ALTIVEC_BUILTIN_VUPKLPX },
+  { MASK_ALTIVEC, CODE_FOR_altivec_vupklsh, "__builtin_altivec_vupklsh", ALTIVEC_BUILTIN_VUPKLSH },
+
+  { MASK_VSX, CODE_FOR_negv2df2, "__builtin_vsx_xvnegdp", VSX_BUILTIN_XVNEGDP },
+  { MASK_VSX, CODE_FOR_sqrtv2df2, "__builtin_vsx_xvsqrtdp", VSX_BUILTIN_XVSQRTDP },
+  { MASK_VSX, CODE_FOR_rsqrtv2df2, "__builtin_vsx_xvrsqrtdp", VSX_BUILTIN_VEC_RSQRT_V2DF },
+  { MASK_VSX, CODE_FOR_rsqrtev2df2, "__builtin_vsx_xvrsqrtedp", VSX_BUILTIN_XVRSQRTEDP },
+  { MASK_VSX, CODE_FOR_vsx_tsqrtv2df2_fe, "__builtin_vsx_xvtsqrtdp_fe", VSX_BUILTIN_XVTSQRTDP_FE },
+  { MASK_VSX, CODE_FOR_vsx_tsqrtv2df2_fg, "__builtin_vsx_xvtsqrtdp_fg", VSX_BUILTIN_XVTSQRTDP_FG },
+  { MASK_VSX, CODE_FOR_vsx_frev2df2, "__builtin_vsx_xvredp", VSX_BUILTIN_XVREDP },
+
+  { MASK_VSX, CODE_FOR_negv4sf2, "__builtin_vsx_xvnegsp", VSX_BUILTIN_XVNEGSP },
+  { MASK_VSX, CODE_FOR_sqrtv4sf2, "__builtin_vsx_xvsqrtsp", VSX_BUILTIN_XVSQRTSP },
+  { MASK_VSX, CODE_FOR_rsqrtv4sf2, "__builtin_vsx_xvrsqrtsp", VSX_BUILTIN_VEC_RSQRT_V4SF },
+  { MASK_VSX, CODE_FOR_rsqrtev4sf2, "__builtin_vsx_xvrsqrtesp", VSX_BUILTIN_XVRSQRTESP },
+  { MASK_VSX, CODE_FOR_vsx_tsqrtv4sf2_fe, "__builtin_vsx_xvtsqrtsp_fe", VSX_BUILTIN_XVTSQRTSP_FE },
+  { MASK_VSX, CODE_FOR_vsx_tsqrtv4sf2_fg, "__builtin_vsx_xvtsqrtsp_fg", VSX_BUILTIN_XVTSQRTSP_FG },
+  { MASK_VSX, CODE_FOR_vsx_frev4sf2, "__builtin_vsx_xvresp", VSX_BUILTIN_XVRESP },
+
+  { MASK_VSX, CODE_FOR_vsx_xscvdpsp, "__builtin_vsx_xscvdpsp", VSX_BUILTIN_XSCVDPSP },
+  { MASK_VSX, CODE_FOR_vsx_xscvdpsp, "__builtin_vsx_xscvspdp", VSX_BUILTIN_XSCVSPDP },
+  { MASK_VSX, CODE_FOR_vsx_xvcvdpsp, "__builtin_vsx_xvcvdpsp", VSX_BUILTIN_XVCVDPSP },
+  { MASK_VSX, CODE_FOR_vsx_xvcvspdp, "__builtin_vsx_xvcvspdp", VSX_BUILTIN_XVCVSPDP },
+  { MASK_VSX, CODE_FOR_vsx_tsqrtdf2_fe, "__builtin_vsx_xstsqrtdp_fe", VSX_BUILTIN_XSTSQRTDP_FE },
+  { MASK_VSX, CODE_FOR_vsx_tsqrtdf2_fg, "__builtin_vsx_xstsqrtdp_fg", VSX_BUILTIN_XSTSQRTDP_FG },
+
+  { MASK_VSX, CODE_FOR_vsx_fix_truncv2dfv2di2, "__builtin_vsx_xvcvdpsxds", VSX_BUILTIN_XVCVDPSXDS },
+  { MASK_VSX, CODE_FOR_vsx_fixuns_truncv2dfv2di2, "__builtin_vsx_xvcvdpuxds", VSX_BUILTIN_XVCVDPUXDS },
+  { MASK_VSX, CODE_FOR_vsx_fixuns_truncv2dfv2di2, "__builtin_vsx_xvcvdpuxds_uns", VSX_BUILTIN_XVCVDPUXDS_UNS },
+  { MASK_VSX, CODE_FOR_vsx_floatv2div2df2, "__builtin_vsx_xvcvsxddp", VSX_BUILTIN_XVCVSXDDP },
+  { MASK_VSX, CODE_FOR_vsx_floatunsv2div2df2, "__builtin_vsx_xvcvuxddp", VSX_BUILTIN_XVCVUXDDP },
+  { MASK_VSX, CODE_FOR_vsx_floatunsv2div2df2, "__builtin_vsx_xvcvuxddp_uns", VSX_BUILTIN_XVCVUXDDP_UNS },
+
+  { MASK_VSX, CODE_FOR_vsx_fix_truncv4sfv4si2, "__builtin_vsx_xvcvspsxws", VSX_BUILTIN_XVCVSPSXWS },
+  { MASK_VSX, CODE_FOR_vsx_fixuns_truncv4sfv4si2, "__builtin_vsx_xvcvspuxws", VSX_BUILTIN_XVCVSPUXWS },
+  { MASK_VSX, CODE_FOR_vsx_floatv4siv4sf2, "__builtin_vsx_xvcvsxwsp", VSX_BUILTIN_XVCVSXWSP },
+  { MASK_VSX, CODE_FOR_vsx_floatunsv4siv4sf2, "__builtin_vsx_xvcvuxwsp", VSX_BUILTIN_XVCVUXWSP },
+
+  { MASK_VSX, CODE_FOR_vsx_xvcvdpsxws, "__builtin_vsx_xvcvdpsxws", VSX_BUILTIN_XVCVDPSXWS },
+  { MASK_VSX, CODE_FOR_vsx_xvcvdpuxws, "__builtin_vsx_xvcvdpuxws", VSX_BUILTIN_XVCVDPUXWS },
+  { MASK_VSX, CODE_FOR_vsx_xvcvsxwdp, "__builtin_vsx_xvcvsxwdp", VSX_BUILTIN_XVCVSXWDP },
+  { MASK_VSX, CODE_FOR_vsx_xvcvuxwdp, "__builtin_vsx_xvcvuxwdp", VSX_BUILTIN_XVCVUXWDP },
+  { MASK_VSX, CODE_FOR_vsx_xvrdpi, "__builtin_vsx_xvrdpi", VSX_BUILTIN_XVRDPI },
+  { MASK_VSX, CODE_FOR_vsx_xvrdpic, "__builtin_vsx_xvrdpic", VSX_BUILTIN_XVRDPIC },
+  { MASK_VSX, CODE_FOR_vsx_floorv2df2, "__builtin_vsx_xvrdpim", VSX_BUILTIN_XVRDPIM },
+  { MASK_VSX, CODE_FOR_vsx_ceilv2df2, "__builtin_vsx_xvrdpip", VSX_BUILTIN_XVRDPIP },
+  { MASK_VSX, CODE_FOR_vsx_btruncv2df2, "__builtin_vsx_xvrdpiz", VSX_BUILTIN_XVRDPIZ },
+
+  { MASK_VSX, CODE_FOR_vsx_xvcvspsxds, "__builtin_vsx_xvcvspsxds", VSX_BUILTIN_XVCVSPSXDS },
+  { MASK_VSX, CODE_FOR_vsx_xvcvspuxds, "__builtin_vsx_xvcvspuxds", VSX_BUILTIN_XVCVSPUXDS },
+  { MASK_VSX, CODE_FOR_vsx_xvcvsxdsp, "__builtin_vsx_xvcvsxdsp", VSX_BUILTIN_XVCVSXDSP },
+  { MASK_VSX, CODE_FOR_vsx_xvcvuxdsp, "__builtin_vsx_xvcvuxdsp", VSX_BUILTIN_XVCVUXDSP },
+  { MASK_VSX, CODE_FOR_vsx_xvrspi, "__builtin_vsx_xvrspi", VSX_BUILTIN_XVRSPI },
+  { MASK_VSX, CODE_FOR_vsx_xvrspic, "__builtin_vsx_xvrspic", VSX_BUILTIN_XVRSPIC },
+  { MASK_VSX, CODE_FOR_vsx_floorv4sf2, "__builtin_vsx_xvrspim", VSX_BUILTIN_XVRSPIM },
+  { MASK_VSX, CODE_FOR_vsx_ceilv4sf2, "__builtin_vsx_xvrspip", VSX_BUILTIN_XVRSPIP },
+  { MASK_VSX, CODE_FOR_vsx_btruncv4sf2, "__builtin_vsx_xvrspiz", VSX_BUILTIN_XVRSPIZ },
+
+  { MASK_VSX, CODE_FOR_vsx_xsrdpi, "__builtin_vsx_xsrdpi", VSX_BUILTIN_XSRDPI },
+  { MASK_VSX, CODE_FOR_vsx_xsrdpic, "__builtin_vsx_xsrdpic", VSX_BUILTIN_XSRDPIC },
+  { MASK_VSX, CODE_FOR_vsx_floordf2, "__builtin_vsx_xsrdpim", VSX_BUILTIN_XSRDPIM },
+  { MASK_VSX, CODE_FOR_vsx_ceildf2, "__builtin_vsx_xsrdpip", VSX_BUILTIN_XSRDPIP },
+  { MASK_VSX, CODE_FOR_vsx_btruncdf2, "__builtin_vsx_xsrdpiz", VSX_BUILTIN_XSRDPIZ },
+
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abs", ALTIVEC_BUILTIN_VEC_ABS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abss", ALTIVEC_BUILTIN_VEC_ABSS },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_ceil", ALTIVEC_BUILTIN_VEC_CEIL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_expte", ALTIVEC_BUILTIN_VEC_EXPTE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_floor", ALTIVEC_BUILTIN_VEC_FLOOR },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_loge", ALTIVEC_BUILTIN_VEC_LOGE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_mtvscr", ALTIVEC_BUILTIN_VEC_MTVSCR },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_re", ALTIVEC_BUILTIN_VEC_RE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_round", ALTIVEC_BUILTIN_VEC_ROUND },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_rsqrt", ALTIVEC_BUILTIN_VEC_RSQRT },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_rsqrte", ALTIVEC_BUILTIN_VEC_RSQRTE },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_trunc", ALTIVEC_BUILTIN_VEC_TRUNC },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_unpackh", ALTIVEC_BUILTIN_VEC_UNPACKH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupkhsh", ALTIVEC_BUILTIN_VEC_VUPKHSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupkhpx", ALTIVEC_BUILTIN_VEC_VUPKHPX },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupkhsb", ALTIVEC_BUILTIN_VEC_VUPKHSB },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_unpackl", ALTIVEC_BUILTIN_VEC_UNPACKL },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklpx", ALTIVEC_BUILTIN_VEC_VUPKLPX },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsh", ALTIVEC_BUILTIN_VEC_VUPKLSH },
+  { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsb", ALTIVEC_BUILTIN_VEC_VUPKLSB },
+
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_nearbyint", ALTIVEC_BUILTIN_VEC_NEARBYINT },
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_rint", ALTIVEC_BUILTIN_VEC_RINT },
+  { MASK_VSX, CODE_FOR_nothing, "__builtin_vec_sqrt", ALTIVEC_BUILTIN_VEC_SQRT },
+
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_floatv4siv4sf2, "__builtin_vec_float_sisf", VECTOR_BUILTIN_FLOAT_V4SI_V4SF },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_unsigned_floatv4siv4sf2, "__builtin_vec_uns_float_sisf", VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vec_fix_sfsi", VECTOR_BUILTIN_FIX_V4SF_V4SI },
+  { MASK_ALTIVEC|MASK_VSX, CODE_FOR_fixuns_truncv4sfv4si2, "__builtin_vec_fixuns_sfsi", VECTOR_BUILTIN_FIXUNS_V4SF_V4SI },
+
+  /* The SPE unary builtins must start with SPE_BUILTIN_EVABS and
+     end with SPE_BUILTIN_EVSUBFUSIAAW.  */
+  { 0, CODE_FOR_absv2si2, "__builtin_spe_evabs", SPE_BUILTIN_EVABS },
+  { 0, CODE_FOR_spe_evaddsmiaaw, "__builtin_spe_evaddsmiaaw", SPE_BUILTIN_EVADDSMIAAW },
+  { 0, CODE_FOR_spe_evaddssiaaw, "__builtin_spe_evaddssiaaw", SPE_BUILTIN_EVADDSSIAAW },
+  { 0, CODE_FOR_spe_evaddumiaaw, "__builtin_spe_evaddumiaaw", SPE_BUILTIN_EVADDUMIAAW },
+  { 0, CODE_FOR_spe_evaddusiaaw, "__builtin_spe_evaddusiaaw", SPE_BUILTIN_EVADDUSIAAW },
+  { 0, CODE_FOR_spe_evcntlsw, "__builtin_spe_evcntlsw", SPE_BUILTIN_EVCNTLSW },
+  { 0, CODE_FOR_spe_evcntlzw, "__builtin_spe_evcntlzw", SPE_BUILTIN_EVCNTLZW },
+  { 0, CODE_FOR_spe_evextsb, "__builtin_spe_evextsb", SPE_BUILTIN_EVEXTSB },
+  { 0, CODE_FOR_spe_evextsh, "__builtin_spe_evextsh", SPE_BUILTIN_EVEXTSH },
+  { 0, CODE_FOR_spe_evfsabs, "__builtin_spe_evfsabs", SPE_BUILTIN_EVFSABS },
+  { 0, CODE_FOR_spe_evfscfsf, "__builtin_spe_evfscfsf", SPE_BUILTIN_EVFSCFSF },
+  { 0, CODE_FOR_spe_evfscfsi, "__builtin_spe_evfscfsi", SPE_BUILTIN_EVFSCFSI },
+  { 0, CODE_FOR_spe_evfscfuf, "__builtin_spe_evfscfuf", SPE_BUILTIN_EVFSCFUF },
+  { 0, CODE_FOR_spe_evfscfui, "__builtin_spe_evfscfui", SPE_BUILTIN_EVFSCFUI },
+  { 0, CODE_FOR_spe_evfsctsf, "__builtin_spe_evfsctsf", SPE_BUILTIN_EVFSCTSF },
+  { 0, CODE_FOR_spe_evfsctsi, "__builtin_spe_evfsctsi", SPE_BUILTIN_EVFSCTSI },
+  { 0, CODE_FOR_spe_evfsctsiz, "__builtin_spe_evfsctsiz", SPE_BUILTIN_EVFSCTSIZ },
+  { 0, CODE_FOR_spe_evfsctuf, "__builtin_spe_evfsctuf", SPE_BUILTIN_EVFSCTUF },
+  { 0, CODE_FOR_spe_evfsctui, "__builtin_spe_evfsctui", SPE_BUILTIN_EVFSCTUI },
+  { 0, CODE_FOR_spe_evfsctuiz, "__builtin_spe_evfsctuiz", SPE_BUILTIN_EVFSCTUIZ },
+  { 0, CODE_FOR_spe_evfsnabs, "__builtin_spe_evfsnabs", SPE_BUILTIN_EVFSNABS },
+  { 0, CODE_FOR_spe_evfsneg, "__builtin_spe_evfsneg", SPE_BUILTIN_EVFSNEG },
+  { 0, CODE_FOR_spe_evmra, "__builtin_spe_evmra", SPE_BUILTIN_EVMRA },
+  { 0, CODE_FOR_negv2si2, "__builtin_spe_evneg", SPE_BUILTIN_EVNEG },
+  { 0, CODE_FOR_spe_evrndw, "__builtin_spe_evrndw", SPE_BUILTIN_EVRNDW },
+  { 0, CODE_FOR_spe_evsubfsmiaaw, "__builtin_spe_evsubfsmiaaw", SPE_BUILTIN_EVSUBFSMIAAW },
+  { 0, CODE_FOR_spe_evsubfssiaaw, "__builtin_spe_evsubfssiaaw", SPE_BUILTIN_EVSUBFSSIAAW },
+  { 0, CODE_FOR_spe_evsubfumiaaw, "__builtin_spe_evsubfumiaaw", SPE_BUILTIN_EVSUBFUMIAAW },
+
+  /* Place-holder.  Leave as last unary SPE builtin.  */
+  { 0, CODE_FOR_spe_evsubfusiaaw, "__builtin_spe_evsubfusiaaw", SPE_BUILTIN_EVSUBFUSIAAW },
+
+  { 0, CODE_FOR_paired_absv2sf2, "__builtin_paired_absv2sf2", PAIRED_BUILTIN_ABSV2SF2 },
+  { 0, CODE_FOR_nabsv2sf2, "__builtin_paired_nabsv2sf2", PAIRED_BUILTIN_NABSV2SF2 },
+  { 0, CODE_FOR_paired_negv2sf2, "__builtin_paired_negv2sf2", PAIRED_BUILTIN_NEGV2SF2 },
+  { 0, CODE_FOR_sqrtv2sf2, "__builtin_paired_sqrtv2sf2", PAIRED_BUILTIN_SQRTV2SF2 },
+  { 0, CODE_FOR_resv2sf2, "__builtin_paired_resv2sf2", PAIRED_BUILTIN_RESV2SF2 }
+};
+
+static rtx
+rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  if (icode == CODE_FOR_altivec_vspltisb
+      || icode == CODE_FOR_altivec_vspltish
+      || icode == CODE_FOR_altivec_vspltisw
+      || icode == CODE_FOR_spe_evsplatfi
+      || icode == CODE_FOR_spe_evsplati)
+    {
+      /* Only allow 5-bit *signed* literals.  */
+      if (GET_CODE (op0) != CONST_INT
+	  || INTVAL (op0) > 15
+	  || INTVAL (op0) < -16)
+	{
+	  error ("argument 1 must be a 5-bit signed literal");
+	  return const0_rtx;
+	}
+    }
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch1, scratch2;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  rtx op0 = expand_normal (arg0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  /* If we have invalid arguments, bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+
+  scratch1 = gen_reg_rtx (mode0);
+  scratch2 = gen_reg_rtx (mode0);
+
+  pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (icode == CODE_FOR_altivec_vcfux
+      || icode == CODE_FOR_altivec_vcfsx
+      || icode == CODE_FOR_altivec_vctsxs
+      || icode == CODE_FOR_altivec_vctuxs
+      || icode == CODE_FOR_altivec_vspltb
+      || icode == CODE_FOR_altivec_vsplth
+      || icode == CODE_FOR_altivec_vspltw
+      || icode == CODE_FOR_spe_evaddiw
+      || icode == CODE_FOR_spe_evldd
+      || icode == CODE_FOR_spe_evldh
+      || icode == CODE_FOR_spe_evldw
+      || icode == CODE_FOR_spe_evlhhesplat
+      || icode == CODE_FOR_spe_evlhhossplat
+      || icode == CODE_FOR_spe_evlhhousplat
+      || icode == CODE_FOR_spe_evlwhe
+      || icode == CODE_FOR_spe_evlwhos
+      || icode == CODE_FOR_spe_evlwhou
+      || icode == CODE_FOR_spe_evlwhsplat
+      || icode == CODE_FOR_spe_evlwwsplat
+      || icode == CODE_FOR_spe_evrlwi
+      || icode == CODE_FOR_spe_evslwi
+      || icode == CODE_FOR_spe_evsrwis
+      || icode == CODE_FOR_spe_evsubifw
+      || icode == CODE_FOR_spe_evsrwiu)
+    {
+      /* Only allow 5-bit unsigned literals.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
+	{
+	  error ("argument 2 must be a 5-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch;
+  tree cr6_form = CALL_EXPR_ARG (exp, 0);
+  tree arg0 = CALL_EXPR_ARG (exp, 1);
+  tree arg1 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode tmode = SImode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  int cr6_form_int;
+
+  if (TREE_CODE (cr6_form) != INTEGER_CST)
+    {
+      error ("argument 1 of __builtin_altivec_predicate must be a constant");
+      return const0_rtx;
+    }
+  else
+    cr6_form_int = TREE_INT_CST_LOW (cr6_form);
+
+  gcc_assert (mode0 == mode1);
+
+  /* If we have invalid arguments, bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  scratch = gen_reg_rtx (mode0);
+
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  /* The vec_any* and vec_all* predicates use the same opcodes for two
+     different operations, but the bits in CR6 will be different
+     depending on what information we want.  So we have to play tricks
+     with CR6 to get the right bits out.
+
+     If you think this is disgusting, look at the specs for the
+     AltiVec predicates.  */
+
+  switch (cr6_form_int)
+    {
+    case 0:
+      emit_insn (gen_cr6_test_for_zero (target));
+      break;
+    case 1:
+      emit_insn (gen_cr6_test_for_zero_reverse (target));
+      break;
+    case 2:
+      emit_insn (gen_cr6_test_for_lt (target));
+      break;
+    case 3:
+      emit_insn (gen_cr6_test_for_lt_reverse (target));
+      break;
+    default:
+      error ("argument 1 of __builtin_altivec_predicate is out of range");
+      break;
+    }
+
+  return target;
+}
+
+static rtx
+paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = Pmode;
+  enum machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (tmode, op1);
+    }
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = Pmode;
+  enum machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+    }
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+static rtx
+spe_expand_stv_builtin (enum insn_code icode, tree exp)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx pat;
+  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+
+  /* Invalid arguments.  Bail before doing anything stoopid!  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
+    op0 = copy_to_mode_reg (mode2, op0);
+  if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
+    op1 = copy_to_mode_reg (mode0, op1);
+  if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
+    op2 = copy_to_mode_reg (mode1, op2);
+
+  pat = GEN_FCN (icode) (op1, op2, op0);
+  if (pat)
+    emit_insn (pat);
+  return NULL_RTX;
+}
+
+static rtx
+paired_expand_stv_builtin (enum insn_code icode, tree exp)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx pat, addr;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode1 = Pmode;
+  enum machine_mode mode2 = Pmode;
+
+  /* Invalid arguments.  Bail before doing anything stoopid!  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
+    op0 = copy_to_mode_reg (tmode, op0);
+
+  op2 = copy_to_mode_reg (mode2, op2);
+
+  if (op1 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (tmode, op2);
+    }
+  else
+    {
+      op1 = copy_to_mode_reg (mode1, op1);
+      addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
+    }
+
+  pat = GEN_FCN (icode) (addr, op0);
+  if (pat)
+    emit_insn (pat);
+  return NULL_RTX;
+}
+
+static rtx
+altivec_expand_stv_builtin (enum insn_code icode, tree exp)
+{
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx pat, addr;
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode smode = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = Pmode;
+  enum machine_mode mode2 = Pmode;
+
+  /* Invalid arguments.  Bail before doing anything stoopid!  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
+    op0 = copy_to_mode_reg (smode, op0);
+
+  op2 = copy_to_mode_reg (mode2, op2);
+
+  if (op1 == const0_rtx)
+    {
+      addr = gen_rtx_MEM (tmode, op2);
+    }
+  else
+    {
+      op1 = copy_to_mode_reg (mode1, op1);
+      addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
+    }
+
+  pat = GEN_FCN (icode) (addr, op0);
+  if (pat)
+    emit_insn (pat);
+  return NULL_RTX;
+}
+
+static rtx
+rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  enum machine_mode mode2 = insn_data[icode].operand[3].mode;
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node
+      || arg1 == error_mark_node
+      || arg2 == error_mark_node)
+    return const0_rtx;
+
+  /* Check and prepare argument depending on the instruction code.
+
+     Note that a switch statement instead of the sequence of tests
+     would be incorrect as many of the CODE_FOR values could be
+     CODE_FOR_nothing and that would yield multiple alternatives
+     with identical values.  We'd never reach here at runtime in
+     this case.  */
+  if (icode == CODE_FOR_altivec_vsldoi_v4sf
+      || icode == CODE_FOR_altivec_vsldoi_v4si
+      || icode == CODE_FOR_altivec_vsldoi_v8hi
+      || icode == CODE_FOR_altivec_vsldoi_v16qi)
+    {
+      /* Only allow 4-bit unsigned literals.  */
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0xf)
+	{
+	  error ("argument 3 must be a 4-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_vsx_xxpermdi_v2df
+           || icode == CODE_FOR_vsx_xxpermdi_v2di
+           || icode == CODE_FOR_vsx_xxsldwi_v16qi
+           || icode == CODE_FOR_vsx_xxsldwi_v8hi
+           || icode == CODE_FOR_vsx_xxsldwi_v4si
+           || icode == CODE_FOR_vsx_xxsldwi_v4sf
+           || icode == CODE_FOR_vsx_xxsldwi_v2di
+           || icode == CODE_FOR_vsx_xxsldwi_v2df)
+    {
+      /* Only allow 2-bit unsigned literals.  */
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0x3)
+	{
+	  error ("argument 3 must be a 2-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_vsx_set_v2df
+           || icode == CODE_FOR_vsx_set_v2di)
+    {
+      /* Only allow 1-bit unsigned literals.  */
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0x1)
+	{
+	  error ("argument 3 must be a 1-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+  if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+    op2 = copy_to_mode_reg (mode2, op2);
+
+  if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
+    pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
+  else 
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+
+  return target;
+}
+
+/* Expand the lvx builtins.  */
+static rtx
+altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0;
+  enum machine_mode tmode, mode0;
+  rtx pat, op0;
+  enum insn_code icode;
+
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
+      icode = CODE_FOR_vector_altivec_load_v16qi;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
+      icode = CODE_FOR_vector_altivec_load_v8hi;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
+      icode = CODE_FOR_vector_altivec_load_v4si;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
+      icode = CODE_FOR_vector_altivec_load_v4sf;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
+      icode = CODE_FOR_vector_altivec_load_v2df;
+      break;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
+      icode = CODE_FOR_vector_altivec_load_v2di;
+      break;
+    default:
+      *expandedp = false;
+      return NULL_RTX;
+    }
+
+  *expandedp = true;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  op0 = expand_normal (arg0);
+  tmode = insn_data[icode].operand[0].mode;
+  mode0 = insn_data[icode].operand[1].mode;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand the stvx builtins.  */
+static rtx
+altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+			   bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1;
+  enum machine_mode mode0, mode1;
+  rtx pat, op0, op1;
+  enum insn_code icode;
+
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
+      icode = CODE_FOR_vector_altivec_store_v16qi;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
+      icode = CODE_FOR_vector_altivec_store_v8hi;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
+      icode = CODE_FOR_vector_altivec_store_v4si;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
+      icode = CODE_FOR_vector_altivec_store_v4sf;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
+      icode = CODE_FOR_vector_altivec_store_v2df;
+      break;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
+      icode = CODE_FOR_vector_altivec_store_v2di;
+      break;
+    default:
+      *expandedp = false;
+      return NULL_RTX;
+    }
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  op0 = expand_normal (arg0);
+  op1 = expand_normal (arg1);
+  mode0 = insn_data[icode].operand[0].mode;
+  mode1 = insn_data[icode].operand[1].mode;
+
+  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (op0, op1);
+  if (pat)
+    emit_insn (pat);
+
+  *expandedp = true;
+  return NULL_RTX;
+}
+
+/* Expand the dst builtins.  */
+static rtx
+altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+			    bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1, arg2;
+  enum machine_mode mode0, mode1;
+  rtx pat, op0, op1, op2;
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = false;
+
+  /* Handle DST variants.  */
+  d = bdesc_dst;
+  for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
+    if (d->code == fcode)
+      {
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	arg2 = CALL_EXPR_ARG (exp, 2);
+	op0 = expand_normal (arg0);
+	op1 = expand_normal (arg1);
+	op2 = expand_normal (arg2);
+	mode0 = insn_data[d->icode].operand[0].mode;
+	mode1 = insn_data[d->icode].operand[1].mode;
+
+	/* Invalid arguments, bail out before generating bad rtl.  */
+	if (arg0 == error_mark_node
+	    || arg1 == error_mark_node
+	    || arg2 == error_mark_node)
+	  return const0_rtx;
+
+	*expandedp = true;
+	STRIP_NOPS (arg2);
+	if (TREE_CODE (arg2) != INTEGER_CST
+	    || TREE_INT_CST_LOW (arg2) & ~0x3)
+	  {
+	    error ("argument to %qs must be a 2-bit unsigned literal", d->name);
+	    return const0_rtx;
+	  }
+
+	if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+	  op0 = copy_to_mode_reg (Pmode, op0);
+	if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+	  op1 = copy_to_mode_reg (mode1, op1);
+
+	pat = GEN_FCN (d->icode) (op0, op1, op2);
+	if (pat != 0)
+	  emit_insn (pat);
+
+	return NULL_RTX;
+      }
+
+  return NULL_RTX;
+}
+
+/* Expand vec_init builtin.  */
+static rtx
+altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
+{
+  enum machine_mode tmode = TYPE_MODE (type);
+  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+  int i, n_elt = GET_MODE_NUNITS (tmode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  gcc_assert (VECTOR_MODE_P (tmode));
+  gcc_assert (n_elt == call_expr_nargs (exp));
+
+  for (i = 0; i < n_elt; ++i)
+    {
+      rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
+      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+    }
+
+  if (!target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
+  return target;
+}
+
+/* Return the integer constant in ARG.  Constrain it to be in the range
+   of the subparts of VEC_TYPE; issue an error if not.  */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+  if (!host_integerp (arg, 1)
+      || (elt = tree_low_cst (arg, 1), elt > max))
+    {
+      error ("selector must be an integer constant in the range 0..%wi", max);
+      return 0;
+    }
+
+  return elt;
+}
+
+/* Expand vec_set builtin.  */
+static rtx
+altivec_expand_vec_set_builtin (tree exp)
+{
+  enum machine_mode tmode, mode1;
+  tree arg0, arg1, arg2;
+  int elt;
+  rtx op0, op1;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  tmode = TYPE_MODE (TREE_TYPE (arg0));
+  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  gcc_assert (VECTOR_MODE_P (tmode));
+
+  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+  elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+  op0 = force_reg (tmode, op0);
+  op1 = force_reg (mode1, op1);
+
+  rs6000_expand_vector_set (op0, op1, elt);
+
+  return op0;
+}
+
+/* Expand vec_ext builtin.  */
+static rtx
+altivec_expand_vec_ext_builtin (tree exp, rtx target)
+{
+  enum machine_mode tmode, mode0;
+  tree arg0, arg1;
+  int elt;
+  rtx op0;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+
+  op0 = expand_normal (arg0);
+  elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+  mode0 = TYPE_MODE (TREE_TYPE (arg0));
+  gcc_assert (VECTOR_MODE_P (mode0));
+
+  op0 = force_reg (mode0, op0);
+
+  if (optimize || !target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  rs6000_expand_vector_extract (target, op0, elt);
+
+  return target;
+}
+
+/* Expand the builtin in EXP and store the result in TARGET.  Store
+   true in *EXPANDEDP if we found a builtin to expand.  */
+static rtx
+altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
+{
+  const struct builtin_description *d;
+  const struct builtin_description_predicates *dp;
+  size_t i;
+  enum insn_code icode;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg0;
+  rtx op0, pat;
+  enum machine_mode tmode, mode0;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  if ((fcode >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+       && fcode <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+      || (fcode >= VSX_BUILTIN_OVERLOADED_FIRST
+	  && fcode <= VSX_BUILTIN_OVERLOADED_LAST))
+    {
+      *expandedp = true;
+      error ("unresolved overload for Altivec builtin %qF", fndecl);
+      return const0_rtx;
+    }
+
+  target = altivec_expand_ld_builtin (exp, target, expandedp);
+  if (*expandedp)
+    return target;
+
+  target = altivec_expand_st_builtin (exp, target, expandedp);
+  if (*expandedp)
+    return target;
+
+  target = altivec_expand_dst_builtin (exp, target, expandedp);
+  if (*expandedp)
+    return target;
+
+  *expandedp = true;
+
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_STVX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
+    case ALTIVEC_BUILTIN_STVEBX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
+    case ALTIVEC_BUILTIN_STVEHX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
+    case ALTIVEC_BUILTIN_STVEWX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
+    case ALTIVEC_BUILTIN_STVXL:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp);
+
+    case ALTIVEC_BUILTIN_STVLX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
+    case ALTIVEC_BUILTIN_STVLXL:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
+    case ALTIVEC_BUILTIN_STVRX:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
+    case ALTIVEC_BUILTIN_STVRXL:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
+
+    case VSX_BUILTIN_STXVD2X_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
+    case VSX_BUILTIN_STXVD2X_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
+    case VSX_BUILTIN_STXVW4X_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
+    case VSX_BUILTIN_STXVW4X_V4SI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
+    case VSX_BUILTIN_STXVW4X_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
+    case VSX_BUILTIN_STXVW4X_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
+
+    case ALTIVEC_BUILTIN_MFVSCR:
+      icode = CODE_FOR_altivec_mfvscr;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+
+      pat = GEN_FCN (icode) (target);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
+    case ALTIVEC_BUILTIN_MTVSCR:
+      icode = CODE_FOR_altivec_mtvscr;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      /* If we got invalid arguments bail out before generating bad rtl.  */
+      if (arg0 == error_mark_node)
+	return const0_rtx;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (op0);
+      if (pat)
+	emit_insn (pat);
+      return NULL_RTX;
+
+    case ALTIVEC_BUILTIN_DSSALL:
+      emit_insn (gen_altivec_dssall ());
+      return NULL_RTX;
+
+    case ALTIVEC_BUILTIN_DSS:
+      icode = CODE_FOR_altivec_dss;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      STRIP_NOPS (arg0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      /* If we got invalid arguments bail out before generating bad rtl.  */
+      if (arg0 == error_mark_node)
+	return const0_rtx;
+
+      if (TREE_CODE (arg0) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg0) & ~0x3)
+	{
+	  error ("argument to dss must be a 2-bit unsigned literal");
+	  return const0_rtx;
+	}
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      emit_insn (gen_altivec_dss (op0));
+      return NULL_RTX;
+
+    case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
+    case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
+    case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
+    case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
+    case VSX_BUILTIN_VEC_INIT_V2DF:
+    case VSX_BUILTIN_VEC_INIT_V2DI:
+      return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
+
+    case ALTIVEC_BUILTIN_VEC_SET_V4SI:
+    case ALTIVEC_BUILTIN_VEC_SET_V8HI:
+    case ALTIVEC_BUILTIN_VEC_SET_V16QI:
+    case ALTIVEC_BUILTIN_VEC_SET_V4SF:
+    case VSX_BUILTIN_VEC_SET_V2DF:
+    case VSX_BUILTIN_VEC_SET_V2DI:
+      return altivec_expand_vec_set_builtin (exp);
+
+    case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
+    case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
+    case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
+    case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
+    case VSX_BUILTIN_VEC_EXT_V2DF:
+    case VSX_BUILTIN_VEC_EXT_V2DI:
+      return altivec_expand_vec_ext_builtin (exp, target);
+
+    default:
+      break;
+      /* Fall through.  */
+    }
+
+  /* Expand abs* operations.  */
+  d = bdesc_abs;
+  for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
+    if (d->code == fcode)
+      return altivec_expand_abs_builtin (d->icode, exp, target);
+
+  /* Expand the AltiVec predicates.  */
+  dp = bdesc_altivec_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, dp++)
+    if (dp->code == fcode)
+      return altivec_expand_predicate_builtin (dp->icode, exp, target);
+
+  /* LV* are funky.  We initialized them differently.  */
+  switch (fcode)
+    {
+    case ALTIVEC_BUILTIN_LVSL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVSR:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVEBX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVEHX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVEWX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVLX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
+					exp, target, true);
+    case ALTIVEC_BUILTIN_LVLXL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
+					exp, target, true);
+    case ALTIVEC_BUILTIN_LVRX:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
+					exp, target, true);
+    case ALTIVEC_BUILTIN_LVRXL:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
+					exp, target, true);
+    case VSX_BUILTIN_LXVD2X_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
+					exp, target, false);
+    case VSX_BUILTIN_LXVD2X_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V4SI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
+					exp, target, false);
+    case VSX_BUILTIN_LXVW4X_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
+					exp, target, false);
+      break;
+    default:
+      break;
+      /* Fall through.  */
+    }
+
+  *expandedp = false;
+  return NULL_RTX;
+}
+
+/* Expand the builtin in EXP and store the result in TARGET.  Store
+   true in *EXPANDEDP if we found a builtin to expand.  */
+static rtx
+paired_expand_builtin (tree exp, rtx target, bool * expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = true;
+
+  switch (fcode)
+    {
+    case PAIRED_BUILTIN_STX:
+      return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
+    case PAIRED_BUILTIN_LX:
+      return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
+    default:
+      break;
+      /* Fall through.  */
+    }
+
+  /* Expand the paired predicates.  */
+  d = bdesc_paired_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
+    if (d->code == fcode)
+      return paired_expand_predicate_builtin (d->icode, exp, target);
+
+  *expandedp = false;
+  return NULL_RTX;
+}
+
+/* Binops that need to be initialized manually, but can be expanded
+   automagically by rs6000_expand_binop_builtin.  */
+static struct builtin_description bdesc_2arg_spe[] =
+{
+  { 0, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
+  { 0, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
+  { 0, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
+  { 0, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
+  { 0, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
+  { 0, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
+  { 0, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
+  { 0, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
+  { 0, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
+  { 0, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
+  { 0, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
+  { 0, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
+  { 0, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
+  { 0, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
+  { 0, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
+  { 0, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
+  { 0, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
+  { 0, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
+  { 0, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
+  { 0, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
+  { 0, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
+  { 0, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
+};
+
+/* Expand the builtin in EXP and store the result in TARGET.  Store
+   true in *EXPANDEDP if we found a builtin to expand.
+
+   This expands the SPE builtins that are not simple unary and binary
+   operations.  */
+static rtx
+spe_expand_builtin (tree exp, rtx target, bool *expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg1, arg0;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode;
+  enum machine_mode tmode, mode0;
+  rtx pat, op0;
+  struct builtin_description *d;
+  size_t i;
+
+  *expandedp = true;
+
+  /* Syntax check for a 5-bit unsigned immediate.  */
+  switch (fcode)
+    {
+    case SPE_BUILTIN_EVSTDD:
+    case SPE_BUILTIN_EVSTDH:
+    case SPE_BUILTIN_EVSTDW:
+    case SPE_BUILTIN_EVSTWHE:
+    case SPE_BUILTIN_EVSTWHO:
+    case SPE_BUILTIN_EVSTWWE:
+    case SPE_BUILTIN_EVSTWWO:
+      arg1 = CALL_EXPR_ARG (exp, 2);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
+	{
+	  error ("argument 2 must be a 5-bit unsigned literal");
+	  return const0_rtx;
+	}
+      break;
+    default:
+      break;
+    }
+
+  /* The evsplat*i instructions are not quite generic.  */
+  switch (fcode)
+    {
+    case SPE_BUILTIN_EVSPLATFI:
+      return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
+					 exp, target);
+    case SPE_BUILTIN_EVSPLATI:
+      return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
+					 exp, target);
+    default:
+      break;
+    }
+
+  d = (struct builtin_description *) bdesc_2arg_spe;
+  for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
+    if (d->code == fcode)
+      return rs6000_expand_binop_builtin (d->icode, exp, target);
+
+  d = (struct builtin_description *) bdesc_spe_predicates;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
+    if (d->code == fcode)
+      return spe_expand_predicate_builtin (d->icode, exp, target);
+
+  d = (struct builtin_description *) bdesc_spe_evsel;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
+    if (d->code == fcode)
+      return spe_expand_evsel_builtin (d->icode, exp, target);
+
+  switch (fcode)
+    {
+    case SPE_BUILTIN_EVSTDDX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
+    case SPE_BUILTIN_EVSTDHX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
+    case SPE_BUILTIN_EVSTDWX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
+    case SPE_BUILTIN_EVSTWHEX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
+    case SPE_BUILTIN_EVSTWHOX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
+    case SPE_BUILTIN_EVSTWWEX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
+    case SPE_BUILTIN_EVSTWWOX:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
+    case SPE_BUILTIN_EVSTDD:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
+    case SPE_BUILTIN_EVSTDH:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
+    case SPE_BUILTIN_EVSTDW:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
+    case SPE_BUILTIN_EVSTWHE:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
+    case SPE_BUILTIN_EVSTWHO:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
+    case SPE_BUILTIN_EVSTWWE:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
+    case SPE_BUILTIN_EVSTWWO:
+      return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
+    case SPE_BUILTIN_MFSPEFSCR:
+      icode = CODE_FOR_spe_mfspefscr;
+      tmode = insn_data[icode].operand[0].mode;
+
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+
+      pat = GEN_FCN (icode) (target);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+    case SPE_BUILTIN_MTSPEFSCR:
+      icode = CODE_FOR_spe_mtspefscr;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      mode0 = insn_data[icode].operand[0].mode;
+
+      if (arg0 == error_mark_node)
+	return const0_rtx;
+
+      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      pat = GEN_FCN (icode) (op0);
+      if (pat)
+	emit_insn (pat);
+      return NULL_RTX;
+    default:
+      break;
+    }
+
+  *expandedp = false;
+  return NULL_RTX;
+}
+
+static rtx
+paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch, tmp;
+  tree form = CALL_EXPR_ARG (exp, 0);
+  tree arg0 = CALL_EXPR_ARG (exp, 1);
+  tree arg1 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  int form_int;
+  enum rtx_code code;
+
+  if (TREE_CODE (form) != INTEGER_CST)
+    {
+      error ("argument 1 of __builtin_paired_predicate must be a constant");
+      return const0_rtx;
+    }
+  else
+    form_int = TREE_INT_CST_LOW (form);
+
+  gcc_assert (mode0 == mode1);
+
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != SImode
+      || !(*insn_data[icode].operand[0].predicate) (target, SImode))
+    target = gen_reg_rtx (SImode);
+  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  scratch = gen_reg_rtx (CCFPmode);
+
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (!pat)
+    return const0_rtx;
+
+  emit_insn (pat);
+
+  switch (form_int)
+    {
+      /* LT bit.  */
+    case 0:
+      code = LT;
+      break;
+      /* GT bit.  */
+    case 1:
+      code = GT;
+      break;
+      /* EQ bit.  */
+    case 2:
+      code = EQ;
+      break;
+      /* UN bit.  */
+    case 3:
+      emit_insn (gen_move_from_CR_ov_bit (target, scratch));
+      return target;
+    default:
+      error ("argument 1 of __builtin_paired_predicate is out of range");
+      return const0_rtx;
+    }
+
+  tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
+  emit_move_insn (target, tmp);
+  return target;
+}
+
+static rtx
+spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch, tmp;
+  tree form = CALL_EXPR_ARG (exp, 0);
+  tree arg0 = CALL_EXPR_ARG (exp, 1);
+  tree arg1 = CALL_EXPR_ARG (exp, 2);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+  int form_int;
+  enum rtx_code code;
+
+  if (TREE_CODE (form) != INTEGER_CST)
+    {
+      error ("argument 1 of __builtin_spe_predicate must be a constant");
+      return const0_rtx;
+    }
+  else
+    form_int = TREE_INT_CST_LOW (form);
+
+  gcc_assert (mode0 == mode1);
+
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != SImode
+      || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
+    target = gen_reg_rtx (SImode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  scratch = gen_reg_rtx (CCmode);
+
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  /* There are 4 variants for each predicate: _any_, _all_, _upper_,
+     _lower_.  We use one compare, but look in different bits of the
+     CR for each variant.
+
+     There are 2 elements in each SPE simd type (upper/lower).  The CR
+     bits are set as follows:
+
+     BIT0  | BIT 1  | BIT 2   | BIT 3
+     U     |   L    | (U | L) | (U & L)
+
+     So, for an "all" relationship, BIT 3 would be set.
+     For an "any" relationship, BIT 2 would be set.  Etc.
+
+     Following traditional nomenclature, these bits map to:
+
+     BIT0  | BIT 1  | BIT 2   | BIT 3
+     LT    | GT     | EQ      | OV
+
+     Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
+  */
+
+  switch (form_int)
+    {
+      /* All variant.  OV bit.  */
+    case 0:
+      /* We need to get to the OV bit, which is the ORDERED bit.  We
+	 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
+	 that's ugly and will make validate_condition_mode die.
+	 So let's just use another pattern.  */
+      emit_insn (gen_move_from_CR_ov_bit (target, scratch));
+      return target;
+      /* Any variant.  EQ bit.  */
+    case 1:
+      code = EQ;
+      break;
+      /* Upper variant.  LT bit.  */
+    case 2:
+      code = LT;
+      break;
+      /* Lower variant.  GT bit.  */
+    case 3:
+      code = GT;
+      break;
+    default:
+      error ("argument 1 of __builtin_spe_predicate is out of range");
+      return const0_rtx;
+    }
+
+  tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
+  emit_move_insn (target, tmp);
+
+  return target;
+}
+
+/* The evsel builtins look like this:
+
+     e = __builtin_spe_evsel_OP (a, b, c, d);
+
+   and work like this:
+
+     e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
+     e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
+*/
+
+static rtx
+spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat, scratch;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  tree arg2 = CALL_EXPR_ARG (exp, 2);
+  tree arg3 = CALL_EXPR_ARG (exp, 3);
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+  rtx op2 = expand_normal (arg2);
+  rtx op3 = expand_normal (arg3);
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  gcc_assert (mode0 == mode1);
+
+  if (arg0 == error_mark_node || arg1 == error_mark_node
+      || arg2 == error_mark_node || arg3 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != mode0
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
+    target = gen_reg_rtx (mode0);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode0, op1);
+  if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
+    op2 = copy_to_mode_reg (mode0, op2);
+  if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
+    op3 = copy_to_mode_reg (mode0, op3);
+
+  /* Generate the compare.  */
+  scratch = gen_reg_rtx (CCmode);
+  pat = GEN_FCN (icode) (scratch, op0, op1);
+  if (! pat)
+    return const0_rtx;
+  emit_insn (pat);
+
+  if (mode0 == V2SImode)
+    emit_insn (gen_spe_evsel (target, op2, op3, scratch));
+  else
+    emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
+
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d;
+  size_t i;
+  rtx ret;
+  bool success;
+
+  switch (fcode)
+    {
+    case RS6000_BUILTIN_RECIP:
+      return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
+
+    case RS6000_BUILTIN_RECIPF:
+      return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+
+    case RS6000_BUILTIN_RSQRTF:
+      return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
+
+    case RS6000_BUILTIN_RSQRT:
+      return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
+
+    case RS6000_BUILTIN_BSWAP_HI:
+      return rs6000_expand_unop_builtin (CODE_FOR_bswaphi2, exp, target);
+
+    case POWER7_BUILTIN_BPERMD:
+      return rs6000_expand_binop_builtin (((TARGET_64BIT)
+					   ? CODE_FOR_bpermd_di
+					   : CODE_FOR_bpermd_si), exp, target);
+
+    case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+    case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+      {
+	int icode = (int) CODE_FOR_altivec_lvsr;
+	enum machine_mode tmode = insn_data[icode].operand[0].mode;
+	enum machine_mode mode = insn_data[icode].operand[1].mode;
+	tree arg;
+	rtx op, addr, pat;
+
+	gcc_assert (TARGET_ALTIVEC);
+
+	arg = CALL_EXPR_ARG (exp, 0);
+	gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+	op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+	addr = memory_address (mode, op);
+	if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+	  op = addr;
+	else
+	  {
+	    /* For the load case need to negate the address.  */
+	    op = gen_reg_rtx (GET_MODE (addr));
+	    emit_insn (gen_rtx_SET (VOIDmode, op,
+				    gen_rtx_NEG (GET_MODE (addr), addr)));
+	  }
+	op = gen_rtx_MEM (mode, op);
+
+	if (target == 0
+	    || GET_MODE (target) != tmode
+	    || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	  target = gen_reg_rtx (tmode);
+
+	/*pat = gen_altivec_lvsr (target, op);*/
+	pat = GEN_FCN (icode) (target, op);
+	if (!pat)
+	  return 0;
+	emit_insn (pat);
+
+	return target;
+      }
+
+    case ALTIVEC_BUILTIN_VCFUX:
+    case ALTIVEC_BUILTIN_VCFSX:
+    case ALTIVEC_BUILTIN_VCTUXS:
+    case ALTIVEC_BUILTIN_VCTSXS:
+  /* FIXME: There's got to be a nicer way to handle this case than
+     constructing a new CALL_EXPR.  */
+      if (call_expr_nargs (exp) == 1)
+	{
+	  exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+				 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  if (TARGET_ALTIVEC)
+    {
+      ret = altivec_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }
+  if (TARGET_SPE)
+    {
+      ret = spe_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }
+  if (TARGET_PAIRED_FLOAT)
+    {
+      ret = paired_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }  
+
+  gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
+
+  /* Handle simple unary operations.  */
+  d = (struct builtin_description *) bdesc_1arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    if (d->code == fcode)
+      return rs6000_expand_unop_builtin (d->icode, exp, target);
+
+  /* Handle simple binary operations.  */
+  d = (struct builtin_description *) bdesc_2arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return rs6000_expand_binop_builtin (d->icode, exp, target);
+
+  /* Handle simple ternary operations.  */
+  d = bdesc_3arg;
+  for (i = 0; i < ARRAY_SIZE  (bdesc_3arg); i++, d++)
+    if (d->code == fcode)
+      return rs6000_expand_ternop_builtin (d->icode, exp, target);
+
+  gcc_unreachable ();
+}
+
+static void
+rs6000_init_builtins (void)
+{
+  tree tdecl;
+  tree ftype;
+
+  V2SI_type_node = build_vector_type (intSI_type_node, 2);
+  V2SF_type_node = build_vector_type (float_type_node, 2);
+  V2DI_type_node = build_vector_type (intDI_type_node, 2);
+  V2DF_type_node = build_vector_type (double_type_node, 2);
+  V4HI_type_node = build_vector_type (intHI_type_node, 4);
+  V4SI_type_node = build_vector_type (intSI_type_node, 4);
+  V4SF_type_node = build_vector_type (float_type_node, 4);
+  V8HI_type_node = build_vector_type (intHI_type_node, 8);
+  V16QI_type_node = build_vector_type (intQI_type_node, 16);
+
+  unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
+  unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
+  unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
+  unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
+
+  opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
+  opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
+  opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
+  opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
+
+  /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
+     types, especially in C++ land.  Similarly, 'vector pixel' is distinct from
+     'vector unsigned short'.  */
+
+  bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
+  bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
+  bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
+  bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
+  pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
+
+  long_integer_type_internal_node = long_integer_type_node;
+  long_unsigned_type_internal_node = long_unsigned_type_node;
+  long_long_integer_type_internal_node = long_long_integer_type_node;
+  long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
+  intQI_type_internal_node = intQI_type_node;
+  uintQI_type_internal_node = unsigned_intQI_type_node;
+  intHI_type_internal_node = intHI_type_node;
+  uintHI_type_internal_node = unsigned_intHI_type_node;
+  intSI_type_internal_node = intSI_type_node;
+  uintSI_type_internal_node = unsigned_intSI_type_node;
+  intDI_type_internal_node = intDI_type_node;
+  uintDI_type_internal_node = unsigned_intDI_type_node;
+  float_type_internal_node = float_type_node;
+  double_type_internal_node = double_type_node;
+  void_type_internal_node = void_type_node;
+
+  /* Initialize the modes for builtin_function_type, mapping a machine mode to
+     tree type node.  */
+  builtin_mode_to_type[QImode][0] = integer_type_node;
+  builtin_mode_to_type[HImode][0] = integer_type_node;
+  builtin_mode_to_type[SImode][0] = intSI_type_node;
+  builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
+  builtin_mode_to_type[DImode][0] = intDI_type_node;
+  builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
+  builtin_mode_to_type[SFmode][0] = float_type_node;
+  builtin_mode_to_type[DFmode][0] = double_type_node;
+  builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
+  builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
+  builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
+  builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
+  builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
+  builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
+  builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
+  builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
+  builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
+  builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
+  builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
+  builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
+  builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
+
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      		      get_identifier ("__bool char"),
+		      bool_char_type_node);
+  TYPE_NAME (bool_char_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      		      get_identifier ("__bool short"),
+		      bool_short_type_node);
+  TYPE_NAME (bool_short_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      		      get_identifier ("__bool int"),
+		      bool_int_type_node);
+  TYPE_NAME (bool_int_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("__pixel"),
+		      pixel_type_node);
+  TYPE_NAME (pixel_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
+  bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
+  bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
+  bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
+  pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
+
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      		      get_identifier ("__vector unsigned char"),
+		      unsigned_V16QI_type_node);
+  TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector signed char"),
+		      V16QI_type_node);
+  TYPE_NAME (V16QI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector __bool char"),
+		      bool_V16QI_type_node);
+  TYPE_NAME ( bool_V16QI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector unsigned short"),
+		      unsigned_V8HI_type_node);
+  TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector signed short"),
+		      V8HI_type_node);
+  TYPE_NAME (V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      		      get_identifier ("__vector __bool short"),
+		      bool_V8HI_type_node);
+  TYPE_NAME (bool_V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+      		      get_identifier ("__vector unsigned int"),
+		      unsigned_V4SI_type_node);
+  TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector signed int"),
+		      V4SI_type_node);
+  TYPE_NAME (V4SI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector __bool int"),
+		      bool_V4SI_type_node);
+  TYPE_NAME (bool_V4SI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector float"),
+		      V4SF_type_node);
+  TYPE_NAME (V4SF_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+  tdecl = build_decl (BUILTINS_LOCATION,
+      		      TYPE_DECL, get_identifier ("__vector __pixel"),
+		      pixel_V8HI_type_node);
+  TYPE_NAME (pixel_V8HI_type_node) = tdecl;
+  (*lang_hooks.decls.pushdecl) (tdecl);
+
+  if (TARGET_VSX)
+    {
+      tdecl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__vector double"),
+			  V2DF_type_node);
+      TYPE_NAME (V2DF_type_node) = tdecl;
+      (*lang_hooks.decls.pushdecl) (tdecl);
+
+      tdecl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__vector long"),
+			  V2DI_type_node);
+      TYPE_NAME (V2DI_type_node) = tdecl;
+      (*lang_hooks.decls.pushdecl) (tdecl);
+
+      tdecl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__vector unsigned long"),
+			  unsigned_V2DI_type_node);
+      TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+      (*lang_hooks.decls.pushdecl) (tdecl);
+
+      tdecl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__vector __bool long"),
+			  bool_V2DI_type_node);
+      TYPE_NAME (bool_V2DI_type_node) = tdecl;
+      (*lang_hooks.decls.pushdecl) (tdecl);
+    }
+
+  if (TARGET_PAIRED_FLOAT)
+    paired_init_builtins ();
+  if (TARGET_SPE)
+    spe_init_builtins ();
+  if (TARGET_ALTIVEC)
+    altivec_init_builtins ();
+  if (TARGET_ALTIVEC || TARGET_SPE || TARGET_PAIRED_FLOAT || TARGET_VSX)
+    rs6000_common_init_builtins ();
+  if (TARGET_FRE)
+    {
+      ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
+				     RS6000_BUILTIN_RECIP,
+				     "__builtin_recipdiv");
+      def_builtin (MASK_POPCNTB, "__builtin_recipdiv", ftype,
+		   RS6000_BUILTIN_RECIP);
+    }
+  if (TARGET_FRES)
+    {
+      ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
+				     RS6000_BUILTIN_RECIPF,
+				     "__builtin_recipdivf");
+      def_builtin (MASK_PPC_GFXOPT, "__builtin_recipdivf", ftype,
+		   RS6000_BUILTIN_RECIPF);
+    }
+  if (TARGET_FRSQRTE)
+    {
+      ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
+				     RS6000_BUILTIN_RSQRT,
+				     "__builtin_rsqrt");
+      def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrt", ftype,
+		   RS6000_BUILTIN_RSQRT);
+    }
+  if (TARGET_FRSQRTES)
+    {
+      ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
+				     RS6000_BUILTIN_RSQRTF,
+				     "__builtin_rsqrtf");
+      def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrtf", ftype,
+		   RS6000_BUILTIN_RSQRTF);
+    }
+  if (TARGET_POPCNTD)
+    {
+      enum machine_mode mode = (TARGET_64BIT) ? DImode : SImode;
+      tree ftype = builtin_function_type (mode, mode, mode, VOIDmode,
+					  POWER7_BUILTIN_BPERMD,
+					  "__builtin_bpermd");
+      def_builtin (MASK_POPCNTD, "__builtin_bpermd", ftype,
+		   POWER7_BUILTIN_BPERMD);
+    }
+  if (TARGET_POWERPC)
+    {
+      /* Don't use builtin_function_type here, as it maps HI/QI to SI.  */
+      tree ftype = build_function_type_list (unsigned_intHI_type_node,
+					     unsigned_intHI_type_node,
+					     NULL_TREE);
+      def_builtin (MASK_POWERPC, "__builtin_bswap16", ftype,
+		   RS6000_BUILTIN_BSWAP_HI);
+    }
+
+#if TARGET_XCOFF
+  /* AIX libm provides clog as __clog.  */
+  if (built_in_decls [BUILT_IN_CLOG])
+    set_user_assembler_name (built_in_decls [BUILT_IN_CLOG], "__clog");
+#endif
+
+#ifdef SUBTARGET_INIT_BUILTINS
+  SUBTARGET_INIT_BUILTINS;
+#endif
+}
+
+/* Returns the rs6000 builtin decl for CODE.  */
+
+static tree
+rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= RS6000_BUILTIN_COUNT)
+    return error_mark_node;
+
+  return rs6000_builtin_decls[code];
+}
+
+/* Search through a set of builtins and enable the mask bits.
+   DESC is an array of builtins.
+   SIZE is the total number of builtins.
+   START is the builtin enum at which to start.
+   END is the builtin enum at which to end.  */
+static void
+enable_mask_for_builtins (struct builtin_description *desc, int size,
+			  enum rs6000_builtins start,
+			  enum rs6000_builtins end)
+{
+  int i;
+
+  for (i = 0; i < size; ++i)
+    if (desc[i].code == start)
+      break;
+
+  if (i == size)
+    return;
+
+  for (; i < size; ++i)
+    {
+      /* Flip all the bits on.  */
+      desc[i].mask = target_flags;
+      if (desc[i].code == end)
+	break;
+    }
+}
+
+static void
+spe_init_builtins (void)
+{
+  tree endlink = void_list_node;
+  tree puint_type_node = build_pointer_type (unsigned_type_node);
+  tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
+  struct builtin_description *d;
+  size_t i;
+
+  tree v2si_ftype_4_v2si
+    = build_function_type
+    (opaque_V2SI_type_node,
+     tree_cons (NULL_TREE, opaque_V2SI_type_node,
+		tree_cons (NULL_TREE, opaque_V2SI_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SI_type_node,
+				      tree_cons (NULL_TREE, opaque_V2SI_type_node,
+						 endlink)))));
+
+  tree v2sf_ftype_4_v2sf
+    = build_function_type
+    (opaque_V2SF_type_node,
+     tree_cons (NULL_TREE, opaque_V2SF_type_node,
+		tree_cons (NULL_TREE, opaque_V2SF_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SF_type_node,
+				      tree_cons (NULL_TREE, opaque_V2SF_type_node,
+						 endlink)))));
+
+  tree int_ftype_int_v2si_v2si
+    = build_function_type
+    (integer_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+		tree_cons (NULL_TREE, opaque_V2SI_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SI_type_node,
+				      endlink))));
+
+  tree int_ftype_int_v2sf_v2sf
+    = build_function_type
+    (integer_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+		tree_cons (NULL_TREE, opaque_V2SF_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SF_type_node,
+				      endlink))));
+
+  tree void_ftype_v2si_puint_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SI_type_node,
+				      tree_cons (NULL_TREE, puint_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+
+  tree void_ftype_v2si_puint_char
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SI_type_node,
+				      tree_cons (NULL_TREE, puint_type_node,
+						 tree_cons (NULL_TREE,
+							    char_type_node,
+							    endlink))));
+
+  tree void_ftype_v2si_pv2si_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SI_type_node,
+				      tree_cons (NULL_TREE, opaque_p_V2SI_type_node,
+						 tree_cons (NULL_TREE,
+							    integer_type_node,
+							    endlink))));
+
+  tree void_ftype_v2si_pv2si_char
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, opaque_V2SI_type_node,
+				      tree_cons (NULL_TREE, opaque_p_V2SI_type_node,
+						 tree_cons (NULL_TREE,
+							    char_type_node,
+							    endlink))));
+
+  tree void_ftype_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+
+  tree int_ftype_void
+    = build_function_type (integer_type_node, endlink);
+
+  tree v2si_ftype_pv2si_int
+    = build_function_type (opaque_V2SI_type_node,
+			   tree_cons (NULL_TREE, opaque_p_V2SI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+
+  tree v2si_ftype_puint_int
+    = build_function_type (opaque_V2SI_type_node,
+			   tree_cons (NULL_TREE, puint_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+
+  tree v2si_ftype_pushort_int
+    = build_function_type (opaque_V2SI_type_node,
+			   tree_cons (NULL_TREE, pushort_type_node,
+				      tree_cons (NULL_TREE, integer_type_node,
+						 endlink)));
+
+  tree v2si_ftype_signed_char
+    = build_function_type (opaque_V2SI_type_node,
+			   tree_cons (NULL_TREE, signed_char_type_node,
+				      endlink));
+
+  /* The initialization of the simple binary and unary builtins is
+     done in rs6000_common_init_builtins, but we have to enable the
+     mask bits here manually because we have run out of `target_flags'
+     bits.  We really need to redesign this mask business.  */
+
+  enable_mask_for_builtins ((struct builtin_description *) bdesc_2arg,
+			    ARRAY_SIZE (bdesc_2arg),
+			    SPE_BUILTIN_EVADDW,
+			    SPE_BUILTIN_EVXOR);
+  enable_mask_for_builtins ((struct builtin_description *) bdesc_1arg,
+			    ARRAY_SIZE (bdesc_1arg),
+			    SPE_BUILTIN_EVABS,
+			    SPE_BUILTIN_EVSUBFUSIAAW);
+  enable_mask_for_builtins ((struct builtin_description *) bdesc_spe_predicates,
+			    ARRAY_SIZE (bdesc_spe_predicates),
+			    SPE_BUILTIN_EVCMPEQ,
+			    SPE_BUILTIN_EVFSTSTLT);
+  enable_mask_for_builtins ((struct builtin_description *) bdesc_spe_evsel,
+			    ARRAY_SIZE (bdesc_spe_evsel),
+			    SPE_BUILTIN_EVSEL_CMPGTS,
+			    SPE_BUILTIN_EVSEL_FSTSTEQ);
+
+  (*lang_hooks.decls.pushdecl)
+    (build_decl (BUILTINS_LOCATION, TYPE_DECL,
+		 get_identifier ("__ev64_opaque__"),
+		 opaque_V2SI_type_node));
+
+  /* Initialize irregular SPE builtins.  */
+
+  def_builtin (target_flags, "__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
+  def_builtin (target_flags, "__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
+  def_builtin (target_flags, "__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
+  def_builtin (target_flags, "__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
+  def_builtin (target_flags, "__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
+  def_builtin (target_flags, "__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
+  def_builtin (target_flags, "__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
+  def_builtin (target_flags, "__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
+  def_builtin (target_flags, "__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
+  def_builtin (target_flags, "__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
+  def_builtin (target_flags, "__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
+  def_builtin (target_flags, "__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
+  def_builtin (target_flags, "__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
+  def_builtin (target_flags, "__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
+  def_builtin (target_flags, "__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
+  def_builtin (target_flags, "__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
+  def_builtin (target_flags, "__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
+  def_builtin (target_flags, "__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
+
+  /* Loads.  */
+  def_builtin (target_flags, "__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
+  def_builtin (target_flags, "__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
+  def_builtin (target_flags, "__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
+  def_builtin (target_flags, "__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
+  def_builtin (target_flags, "__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
+  def_builtin (target_flags, "__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
+  def_builtin (target_flags, "__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
+  def_builtin (target_flags, "__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
+  def_builtin (target_flags, "__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
+  def_builtin (target_flags, "__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
+  def_builtin (target_flags, "__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
+  def_builtin (target_flags, "__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
+  def_builtin (target_flags, "__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
+  def_builtin (target_flags, "__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
+  def_builtin (target_flags, "__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
+  def_builtin (target_flags, "__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
+  def_builtin (target_flags, "__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
+  def_builtin (target_flags, "__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
+  def_builtin (target_flags, "__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
+  def_builtin (target_flags, "__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
+  def_builtin (target_flags, "__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
+  def_builtin (target_flags, "__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
+
+  /* Predicates.  */
+  d = (struct builtin_description *) bdesc_spe_predicates;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
+    {
+      tree type;
+
+      switch (insn_data[d->icode].operand[1].mode)
+	{
+	case V2SImode:
+	  type = int_ftype_int_v2si_v2si;
+	  break;
+	case V2SFmode:
+	  type = int_ftype_int_v2sf_v2sf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+
+  /* Evsel predicates.  */
+  d = (struct builtin_description *) bdesc_spe_evsel;
+  for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
+    {
+      tree type;
+
+      switch (insn_data[d->icode].operand[1].mode)
+	{
+	case V2SImode:
+	  type = v2si_ftype_4_v2si;
+	  break;
+	case V2SFmode:
+	  type = v2sf_ftype_4_v2sf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+}
+
+static void
+paired_init_builtins (void)
+{
+  const struct builtin_description *d;
+  size_t i;
+  tree endlink = void_list_node;
+
+   tree int_ftype_int_v2sf_v2sf
+    = build_function_type
+    (integer_type_node,
+     tree_cons (NULL_TREE, integer_type_node,
+                tree_cons (NULL_TREE, V2SF_type_node,
+                           tree_cons (NULL_TREE, V2SF_type_node,
+                                      endlink))));
+  tree pcfloat_type_node =
+    build_pointer_type (build_qualified_type
+			(float_type_node, TYPE_QUAL_CONST));
+
+  tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
+							   long_integer_type_node,
+							   pcfloat_type_node,
+							   NULL_TREE);
+  tree void_ftype_v2sf_long_pcfloat =
+    build_function_type_list (void_type_node,
+			      V2SF_type_node,
+			      long_integer_type_node,
+			      pcfloat_type_node,
+			      NULL_TREE);
+
+
+  def_builtin (0, "__builtin_paired_lx", v2sf_ftype_long_pcfloat,
+	       PAIRED_BUILTIN_LX);
+
+
+  def_builtin (0, "__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
+	       PAIRED_BUILTIN_STX);
+
+  /* Predicates.  */
+  d = bdesc_paired_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
+    {
+      tree type;
+
+      switch (insn_data[d->icode].operand[1].mode)
+	{
+	case V2SFmode:
+	  type = int_ftype_int_v2sf_v2sf;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+}
+
+static void
+altivec_init_builtins (void)
+{
+  const struct builtin_description *d;
+  const struct builtin_description_predicates *dp;
+  size_t i;
+  tree ftype;
+
+  tree pvoid_type_node = build_pointer_type (void_type_node);
+
+  tree pcvoid_type_node
+    = build_pointer_type (build_qualified_type (void_type_node,
+						TYPE_QUAL_CONST));
+
+  tree int_ftype_opaque
+    = build_function_type_list (integer_type_node,
+				opaque_V4SI_type_node, NULL_TREE);
+  tree opaque_ftype_opaque
+    = build_function_type (integer_type_node,
+				NULL_TREE);
+  tree opaque_ftype_opaque_int
+    = build_function_type_list (opaque_V4SI_type_node,
+				opaque_V4SI_type_node, integer_type_node, NULL_TREE);
+  tree opaque_ftype_opaque_opaque_int
+    = build_function_type_list (opaque_V4SI_type_node,
+				opaque_V4SI_type_node, opaque_V4SI_type_node,
+				integer_type_node, NULL_TREE);
+  tree int_ftype_int_opaque_opaque
+    = build_function_type_list (integer_type_node,
+                                integer_type_node, opaque_V4SI_type_node,
+                                opaque_V4SI_type_node, NULL_TREE);
+  tree int_ftype_int_v4si_v4si
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V4SI_type_node,
+				V4SI_type_node, NULL_TREE);
+  tree void_ftype_v4si
+    = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
+  tree v8hi_ftype_void
+    = build_function_type (V8HI_type_node, void_list_node);
+  tree void_ftype_void
+    = build_function_type (void_type_node, void_list_node);
+  tree void_ftype_int
+    = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
+
+  tree opaque_ftype_long_pcvoid
+    = build_function_type_list (opaque_V4SI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v16qi_ftype_long_pcvoid
+    = build_function_type_list (V16QI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v8hi_ftype_long_pcvoid
+    = build_function_type_list (V8HI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v4si_ftype_long_pcvoid
+    = build_function_type_list (V4SI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v4sf_ftype_long_pcvoid
+    = build_function_type_list (V4SF_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v2df_ftype_long_pcvoid
+    = build_function_type_list (V2DF_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+  tree v2di_ftype_long_pcvoid
+    = build_function_type_list (V2DI_type_node,
+				long_integer_type_node, pcvoid_type_node,
+				NULL_TREE);
+
+  tree void_ftype_opaque_long_pvoid
+    = build_function_type_list (void_type_node,
+				opaque_V4SI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v4si_long_pvoid
+    = build_function_type_list (void_type_node,
+				V4SI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v16qi_long_pvoid
+    = build_function_type_list (void_type_node,
+				V16QI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v8hi_long_pvoid
+    = build_function_type_list (void_type_node,
+				V8HI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v4sf_long_pvoid
+    = build_function_type_list (void_type_node,
+				V4SF_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v2df_long_pvoid
+    = build_function_type_list (void_type_node,
+				V2DF_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree void_ftype_v2di_long_pvoid
+    = build_function_type_list (void_type_node,
+				V2DI_type_node, long_integer_type_node,
+				pvoid_type_node, NULL_TREE);
+  tree int_ftype_int_v8hi_v8hi
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V8HI_type_node,
+				V8HI_type_node, NULL_TREE);
+  tree int_ftype_int_v16qi_v16qi
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V16QI_type_node,
+				V16QI_type_node, NULL_TREE);
+  tree int_ftype_int_v4sf_v4sf
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V4SF_type_node,
+				V4SF_type_node, NULL_TREE);
+  tree int_ftype_int_v2df_v2df
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V2DF_type_node,
+				V2DF_type_node, NULL_TREE);
+  tree v4si_ftype_v4si
+    = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+  tree v8hi_ftype_v8hi
+    = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+  tree v16qi_ftype_v16qi
+    = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+  tree v4sf_ftype_v4sf
+    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+  tree v2df_ftype_v2df
+    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
+  tree void_ftype_pcvoid_int_int
+    = build_function_type_list (void_type_node,
+				pcvoid_type_node, integer_type_node,
+				integer_type_node, NULL_TREE);
+
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
+  def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
+
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVD2X_V2DF);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVD2X_V2DI);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V4SF);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
+	       VSX_BUILTIN_LXVW4X_V4SI);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v8hi",
+	       v8hi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V8HI);
+  def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v16qi",
+	       v16qi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V16QI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2df",
+	       void_ftype_v2df_long_pvoid, VSX_BUILTIN_STXVD2X_V2DF);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2di",
+	       void_ftype_v2di_long_pvoid, VSX_BUILTIN_STXVD2X_V2DI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4sf",
+	       void_ftype_v4sf_long_pvoid, VSX_BUILTIN_STXVW4X_V4SF);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4si",
+	       void_ftype_v4si_long_pvoid, VSX_BUILTIN_STXVW4X_V4SI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v8hi",
+	       void_ftype_v8hi_long_pvoid, VSX_BUILTIN_STXVW4X_V8HI);
+  def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v16qi",
+	       void_ftype_v16qi_long_pvoid, VSX_BUILTIN_STXVW4X_V16QI);
+  def_builtin (MASK_VSX, "__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
+	       VSX_BUILTIN_VEC_LD);
+  def_builtin (MASK_VSX, "__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
+	       VSX_BUILTIN_VEC_ST);
+
+  if (rs6000_cpu == PROCESSOR_CELL)
+    {
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvrx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
+
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_lvlx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_lvrx",  v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
+
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvlx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvrx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
+      def_builtin (MASK_ALTIVEC, "__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
+
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_stvlx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_stvrx",  void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
+      def_builtin (MASK_ALTIVEC, "__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
+    }
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
+
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
+
+  /* Add the DST variants.  */
+  d = bdesc_dst;
+  for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
+    def_builtin (d->mask, d->name, void_ftype_pcvoid_int_int, d->code);
+
+  /* Initialize the predicates.  */
+  dp = bdesc_altivec_preds;
+  for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, dp++)
+    {
+      enum machine_mode mode1;
+      tree type;
+      bool is_overloaded = ((dp->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+			     && dp->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+			    || (dp->code >= VSX_BUILTIN_OVERLOADED_FIRST
+				&& dp->code <= VSX_BUILTIN_OVERLOADED_LAST));
+
+      if (is_overloaded)
+	mode1 = VOIDmode;
+      else
+	mode1 = insn_data[dp->icode].operand[1].mode;
+
+      switch (mode1)
+	{
+	case VOIDmode:
+	  type = int_ftype_int_opaque_opaque;
+	  break;
+	case V4SImode:
+	  type = int_ftype_int_v4si_v4si;
+	  break;
+	case V8HImode:
+	  type = int_ftype_int_v8hi_v8hi;
+	  break;
+	case V16QImode:
+	  type = int_ftype_int_v16qi_v16qi;
+	  break;
+	case V4SFmode:
+	  type = int_ftype_int_v4sf_v4sf;
+	  break;
+	case V2DFmode:
+	  type = int_ftype_int_v2df_v2df;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (dp->mask, dp->name, type, dp->code);
+    }
+
+  /* Initialize the abs* operators.  */
+  d = bdesc_abs;
+  for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
+    {
+      enum machine_mode mode0;
+      tree type;
+
+      mode0 = insn_data[d->icode].operand[0].mode;
+
+      switch (mode0)
+	{
+	case V4SImode:
+	  type = v4si_ftype_v4si;
+	  break;
+	case V8HImode:
+	  type = v8hi_ftype_v8hi;
+	  break;
+	case V16QImode:
+	  type = v16qi_ftype_v16qi;
+	  break;
+	case V4SFmode:
+	  type = v4sf_ftype_v4sf;
+	  break;
+	case V2DFmode:
+	  type = v2df_ftype_v2df;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+
+  if (TARGET_ALTIVEC)
+    {
+      tree decl;
+
+      /* Initialize target builtin that implements
+         targetm.vectorize.builtin_mask_for_load.  */
+
+      decl = add_builtin_function ("__builtin_altivec_mask_for_load",
+				   v16qi_ftype_long_pcvoid,
+				   ALTIVEC_BUILTIN_MASK_FOR_LOAD,
+				   BUILT_IN_MD, NULL, NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      /* Record the decl. Will be used by rs6000_builtin_mask_for_load.  */
+      altivec_builtin_mask_for_load = decl;
+    }
+
+  /* Access to the vec_init patterns.  */
+  ftype = build_function_type_list (V4SI_type_node, integer_type_node,
+				    integer_type_node, integer_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_init_v4si", ftype,
+	       ALTIVEC_BUILTIN_VEC_INIT_V4SI);
+
+  ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node,
+				    short_integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_init_v8hi", ftype,
+	       ALTIVEC_BUILTIN_VEC_INIT_V8HI);
+
+  ftype = build_function_type_list (V16QI_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, char_type_node,
+				    char_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_init_v16qi", ftype,
+	       ALTIVEC_BUILTIN_VEC_INIT_V16QI);
+
+  ftype = build_function_type_list (V4SF_type_node, float_type_node,
+				    float_type_node, float_type_node,
+				    float_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_init_v4sf", ftype,
+	       ALTIVEC_BUILTIN_VEC_INIT_V4SF);
+
+  if (TARGET_VSX)
+    {
+      ftype = build_function_type_list (V2DF_type_node, double_type_node,
+					double_type_node, NULL_TREE);
+      def_builtin (MASK_VSX, "__builtin_vec_init_v2df", ftype,
+		   VSX_BUILTIN_VEC_INIT_V2DF);
+
+      ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
+					intDI_type_node, NULL_TREE);
+      def_builtin (MASK_VSX, "__builtin_vec_init_v2di", ftype,
+		   VSX_BUILTIN_VEC_INIT_V2DI);
+    }
+
+  /* Access to the vec_set patterns.  */
+  ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+				    intSI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_set_v4si", ftype,
+	       ALTIVEC_BUILTIN_VEC_SET_V4SI);
+
+  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+				    intHI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_set_v8hi", ftype,
+	       ALTIVEC_BUILTIN_VEC_SET_V8HI);
+
+  ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+				    intQI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_set_v16qi", ftype,
+	       ALTIVEC_BUILTIN_VEC_SET_V16QI);
+
+  ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+				    float_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_set_v4sf", ftype,
+	       ALTIVEC_BUILTIN_VEC_SET_V4SF);
+
+  if (TARGET_VSX)
+    {
+      ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
+					double_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin (MASK_VSX, "__builtin_vec_set_v2df", ftype,
+		   VSX_BUILTIN_VEC_SET_V2DF);
+
+      ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+					intDI_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin (MASK_VSX, "__builtin_vec_set_v2di", ftype,
+		   VSX_BUILTIN_VEC_SET_V2DI);
+    }
+
+  /* Access to the vec_extract patterns.  */
+  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ext_v4si", ftype,
+	       ALTIVEC_BUILTIN_VEC_EXT_V4SI);
+
+  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ext_v8hi", ftype,
+	       ALTIVEC_BUILTIN_VEC_EXT_V8HI);
+
+  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC, "__builtin_vec_ext_v16qi", ftype,
+	       ALTIVEC_BUILTIN_VEC_EXT_V16QI);
+
+  ftype = build_function_type_list (float_type_node, V4SF_type_node,
+				    integer_type_node, NULL_TREE);
+  def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_ext_v4sf", ftype,
+	       ALTIVEC_BUILTIN_VEC_EXT_V4SF);
+
+  if (TARGET_VSX)
+    {
+      ftype = build_function_type_list (double_type_node, V2DF_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin (MASK_VSX, "__builtin_vec_ext_v2df", ftype,
+		   VSX_BUILTIN_VEC_EXT_V2DF);
+
+      ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin (MASK_VSX, "__builtin_vec_ext_v2di", ftype,
+		   VSX_BUILTIN_VEC_EXT_V2DI);
+    }
+}
+
+/* Hash function for builtin functions with up to 3 arguments and a return
+   type.  */
+static unsigned
+builtin_hash_function (const void *hash_entry)
+{
+  unsigned ret = 0;
+  int i;
+  const struct builtin_hash_struct *bh =
+    (const struct builtin_hash_struct *) hash_entry;
+
+  for (i = 0; i < 4; i++)
+    {
+      ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
+      ret = (ret * 2) + bh->uns_p[i];
+    }
+
+  return ret;
+}
+
+/* Compare builtin hash entries H1 and H2 for equivalence.  */
+static int
+builtin_hash_eq (const void *h1, const void *h2)
+{
+  const struct builtin_hash_struct *p1 = (const struct builtin_hash_struct *) h1;
+  const struct builtin_hash_struct *p2 = (const struct builtin_hash_struct *) h2;
+
+  return ((p1->mode[0] == p2->mode[0])
+	  && (p1->mode[1] == p2->mode[1])
+	  && (p1->mode[2] == p2->mode[2])
+	  && (p1->mode[3] == p2->mode[3])
+	  && (p1->uns_p[0] == p2->uns_p[0])
+	  && (p1->uns_p[1] == p2->uns_p[1])
+	  && (p1->uns_p[2] == p2->uns_p[2])
+	  && (p1->uns_p[3] == p2->uns_p[3]));
+}
+
+/* Map types for builtin functions with an explicit return type and up to 3
+   arguments.  Functions with fewer than 3 arguments use VOIDmode as the type
+   of the argument.  */
+static tree
+builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
+		       enum machine_mode mode_arg1, enum machine_mode mode_arg2,
+		       enum rs6000_builtins builtin, const char *name)
+{
+  struct builtin_hash_struct h;
+  struct builtin_hash_struct *h2;
+  void **found;
+  int num_args = 3;
+  int i;
+  tree ret_type = NULL_TREE;
+  tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
+  tree args;
+
+  /* Create builtin_hash_table.  */
+  if (builtin_hash_table == NULL)
+    builtin_hash_table = htab_create_ggc (1500, builtin_hash_function,
+					  builtin_hash_eq, NULL);
+
+  h.type = NULL_TREE;
+  h.mode[0] = mode_ret;
+  h.mode[1] = mode_arg0;
+  h.mode[2] = mode_arg1;
+  h.mode[3] = mode_arg2;
+  h.uns_p[0] = 0;
+  h.uns_p[1] = 0;
+  h.uns_p[2] = 0;
+  h.uns_p[3] = 0;
+
+  /* If the builtin is a type that produces unsigned results or takes unsigned
+     arguments, and it is returned as a decl for the vectorizer (such as
+     widening multiplies, permute), make sure the arguments and return value
+     are type correct.  */
+  switch (builtin)
+    {
+      /* unsigned 2 argument functions.  */
+    case ALTIVEC_BUILTIN_VMULEUB_UNS:
+    case ALTIVEC_BUILTIN_VMULEUH_UNS:
+    case ALTIVEC_BUILTIN_VMULOUB_UNS:
+    case ALTIVEC_BUILTIN_VMULOUH_UNS:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      h.uns_p[2] = 1;
+      break;
+
+      /* unsigned 3 argument functions.  */
+    case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
+    case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
+    case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
+    case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
+    case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
+    case VSX_BUILTIN_VPERM_16QI_UNS:
+    case VSX_BUILTIN_VPERM_8HI_UNS:
+    case VSX_BUILTIN_VPERM_4SI_UNS:
+    case VSX_BUILTIN_VPERM_2DI_UNS:
+    case VSX_BUILTIN_XXSEL_16QI_UNS:
+    case VSX_BUILTIN_XXSEL_8HI_UNS:
+    case VSX_BUILTIN_XXSEL_4SI_UNS:
+    case VSX_BUILTIN_XXSEL_2DI_UNS:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      h.uns_p[2] = 1;
+      h.uns_p[3] = 1;
+      break;
+
+      /* signed permute functions with unsigned char mask.  */
+    case ALTIVEC_BUILTIN_VPERM_16QI:
+    case ALTIVEC_BUILTIN_VPERM_8HI:
+    case ALTIVEC_BUILTIN_VPERM_4SI:
+    case ALTIVEC_BUILTIN_VPERM_4SF:
+    case ALTIVEC_BUILTIN_VPERM_2DI:
+    case ALTIVEC_BUILTIN_VPERM_2DF:
+    case VSX_BUILTIN_VPERM_16QI:
+    case VSX_BUILTIN_VPERM_8HI:
+    case VSX_BUILTIN_VPERM_4SI:
+    case VSX_BUILTIN_VPERM_4SF:
+    case VSX_BUILTIN_VPERM_2DI:
+    case VSX_BUILTIN_VPERM_2DF:
+      h.uns_p[3] = 1;
+      break;
+
+      /* unsigned args, signed return.  */
+    case VSX_BUILTIN_XVCVUXDDP_UNS:
+    case VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF:
+      h.uns_p[1] = 1;
+      break;
+
+      /* signed args, unsigned return.  */
+    case VSX_BUILTIN_XVCVDPUXDS_UNS:
+    case VECTOR_BUILTIN_FIXUNS_V4SF_V4SI:
+      h.uns_p[0] = 1;
+      break;
+
+    default:
+      break;
+    }
+
+  /* Figure out how many args are present.  */
+  while (num_args > 0 && h.mode[num_args] == VOIDmode)
+    num_args--;
+
+  if (num_args == 0)
+    fatal_error ("internal error: builtin function %s had no type", name);
+
+  ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
+  if (!ret_type && h.uns_p[0])
+    ret_type = builtin_mode_to_type[h.mode[0]][0];
+
+  if (!ret_type)
+    fatal_error ("internal error: builtin function %s had an unexpected "
+		 "return type %s", name, GET_MODE_NAME (h.mode[0]));
+
+  for (i = 0; i < num_args; i++)
+    {
+      int m = (int) h.mode[i+1];
+      int uns_p = h.uns_p[i+1];
+
+      arg_type[i] = builtin_mode_to_type[m][uns_p];
+      if (!arg_type[i] && uns_p)
+	arg_type[i] = builtin_mode_to_type[m][0];
+
+      if (!arg_type[i])
+	fatal_error ("internal error: builtin function %s, argument %d "
+		     "had unexpected argument type %s", name, i,
+		     GET_MODE_NAME (m));
+    }
+
+  found = htab_find_slot (builtin_hash_table, &h, INSERT);
+  if (*found == NULL)
+    {
+      h2 = ggc_alloc_builtin_hash_struct ();
+      *h2 = h;
+      *found = (void *)h2;
+      args = void_list_node;
+
+      for (i = num_args - 1; i >= 0; i--)
+	args = tree_cons (NULL_TREE, arg_type[i], args);
+
+      h2->type = build_function_type (ret_type, args);
+    }
+
+  return ((struct builtin_hash_struct *)(*found))->type;
+}
+
+static void
+rs6000_common_init_builtins (void)
+{
+  const struct builtin_description *d;
+  size_t i;
+
+  tree opaque_ftype_opaque = NULL_TREE;
+  tree opaque_ftype_opaque_opaque = NULL_TREE;
+  tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
+  tree v2si_ftype_qi = NULL_TREE;
+  tree v2si_ftype_v2si_qi = NULL_TREE;
+  tree v2si_ftype_int_qi = NULL_TREE;
+
+  if (!TARGET_PAIRED_FLOAT)
+    {
+      builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
+      builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
+    }
+
+  /* Add the ternary operators.  */
+  d = bdesc_3arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+    {
+      tree type;
+      int mask = d->mask;
+
+      if ((mask != 0 && (mask & target_flags) == 0)
+	  || (mask == 0 && !TARGET_PAIRED_FLOAT))
+	continue;
+
+      if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+	   && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+	  || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST
+	      && d->code <= VSX_BUILTIN_OVERLOADED_LAST))
+	{
+	  if (! (type = opaque_ftype_opaque_opaque_opaque))
+	    type = opaque_ftype_opaque_opaque_opaque
+	      = build_function_type_list (opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  NULL_TREE);
+	}
+      else
+	{
+	  enum insn_code icode = d->icode;
+          if (d->name == 0 || icode == CODE_FOR_nothing)
+	    continue;
+
+	  type = builtin_function_type (insn_data[icode].operand[0].mode,
+					insn_data[icode].operand[1].mode,
+					insn_data[icode].operand[2].mode,
+					insn_data[icode].operand[3].mode,
+					d->code, d->name);
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+
+  /* Add the binary operators.  */
+  d = bdesc_2arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    {
+      enum machine_mode mode0, mode1, mode2;
+      tree type;
+      int mask = d->mask;
+
+      if ((mask != 0 && (mask & target_flags) == 0)
+	  || (mask == 0 && !TARGET_PAIRED_FLOAT))
+	continue;
+
+      if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+	   && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+	  || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST
+	      && d->code <= VSX_BUILTIN_OVERLOADED_LAST))
+	{
+	  if (! (type = opaque_ftype_opaque_opaque))
+	    type = opaque_ftype_opaque_opaque
+	      = build_function_type_list (opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  NULL_TREE);
+	}
+      else
+	{
+	  enum insn_code icode = d->icode;
+          if (d->name == 0 || icode == CODE_FOR_nothing)
+	    continue;
+
+          mode0 = insn_data[icode].operand[0].mode;
+          mode1 = insn_data[icode].operand[1].mode;
+          mode2 = insn_data[icode].operand[2].mode;
+
+	  if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
+	    {
+	      if (! (type = v2si_ftype_v2si_qi))
+		type = v2si_ftype_v2si_qi
+		  = build_function_type_list (opaque_V2SI_type_node,
+					      opaque_V2SI_type_node,
+					      char_type_node,
+					      NULL_TREE);
+	    }
+
+	  else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
+		   && mode2 == QImode)
+	    {
+	      if (! (type = v2si_ftype_int_qi))
+		type = v2si_ftype_int_qi
+		  = build_function_type_list (opaque_V2SI_type_node,
+					      integer_type_node,
+					      char_type_node,
+					      NULL_TREE);
+	    }
+
+	  else
+	    type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
+					  d->code, d->name);
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+
+  /* Add the simple unary operators.  */
+  d = (struct builtin_description *) bdesc_1arg;
+  for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+    {
+      enum machine_mode mode0, mode1;
+      tree type;
+      int mask = d->mask;
+
+      if ((mask != 0 && (mask & target_flags) == 0)
+	  || (mask == 0 && !TARGET_PAIRED_FLOAT))
+	continue;
+
+      if ((d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST
+	   && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST)
+	  || (d->code >= VSX_BUILTIN_OVERLOADED_FIRST
+	      && d->code <= VSX_BUILTIN_OVERLOADED_LAST))
+	{
+	  if (! (type = opaque_ftype_opaque))
+	    type = opaque_ftype_opaque
+	      = build_function_type_list (opaque_V4SI_type_node,
+					  opaque_V4SI_type_node,
+					  NULL_TREE);
+	}
+      else
+        {
+	  enum insn_code icode = d->icode;
+          if (d->name == 0 || icode == CODE_FOR_nothing)
+	    continue;
+
+          mode0 = insn_data[icode].operand[0].mode;
+          mode1 = insn_data[icode].operand[1].mode;
+
+	  if (mode0 == V2SImode && mode1 == QImode)
+	    {
+	      if (! (type = v2si_ftype_qi))
+		type = v2si_ftype_qi
+		  = build_function_type_list (opaque_V2SI_type_node,
+					      char_type_node,
+					      NULL_TREE);
+	    }
+
+	  else
+	    type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
+					  d->code, d->name);
+	}
+
+      def_builtin (d->mask, d->name, type, d->code);
+    }
+}
+
+static void
+rs6000_init_libfuncs (void)
+{
+  if (DEFAULT_ABI != ABI_V4 && TARGET_XCOFF
+      && !TARGET_POWER2 && !TARGET_POWERPC)
+    {
+      /* AIX library routines for float->int conversion.  */
+      set_conv_libfunc (sfix_optab, SImode, DFmode, "__itrunc");
+      set_conv_libfunc (ufix_optab, SImode, DFmode, "__uitrunc");
+      set_conv_libfunc (sfix_optab, SImode, TFmode, "_qitrunc");
+      set_conv_libfunc (ufix_optab, SImode, TFmode, "_quitrunc");
+    }
+
+  if (!TARGET_IEEEQUAD)
+      /* AIX/Darwin/64-bit Linux quad floating point routines.  */
+    if (!TARGET_XL_COMPAT)
+      {
+	set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
+	set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
+	set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
+	set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
+
+	if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
+	  {
+	    set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
+	    set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
+	    set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
+	    set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
+	    set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
+	    set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
+	    set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
+
+	    set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
+	    set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
+	    set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
+	    set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
+	    set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
+	    set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
+	    set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
+	    set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
+	  }
+
+	if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
+	  set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
+      }
+    else
+      {
+	set_optab_libfunc (add_optab, TFmode, "_xlqadd");
+	set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
+	set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
+	set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
+      }
+  else
+    {
+      /* 32-bit SVR4 quad floating point routines.  */
+
+      set_optab_libfunc (add_optab, TFmode, "_q_add");
+      set_optab_libfunc (sub_optab, TFmode, "_q_sub");
+      set_optab_libfunc (neg_optab, TFmode, "_q_neg");
+      set_optab_libfunc (smul_optab, TFmode, "_q_mul");
+      set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
+      if (TARGET_PPC_GPOPT || TARGET_POWER2)
+	set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
+
+      set_optab_libfunc (eq_optab, TFmode, "_q_feq");
+      set_optab_libfunc (ne_optab, TFmode, "_q_fne");
+      set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
+      set_optab_libfunc (ge_optab, TFmode, "_q_fge");
+      set_optab_libfunc (lt_optab, TFmode, "_q_flt");
+      set_optab_libfunc (le_optab, TFmode, "_q_fle");
+
+      set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
+      set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
+      set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
+      set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
+      set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
+      set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
+      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
+      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
+    }
+}
+
+
+/* Expand a block clear operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the length
+   operands[3] is the alignment */
+
+int
+expand_block_clear (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx bytes_rtx	= operands[1];
+  rtx align_rtx = operands[3];
+  bool constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  HOST_WIDE_INT align;
+  HOST_WIDE_INT bytes;
+  int offset;
+  int clear_bytes;
+  int clear_step;
+
+  /* If this is not a fixed size move, just call memcpy */
+  if (! constp)
+    return 0;
+
+  /* This must be a fixed size alignment  */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+  align = INTVAL (align_rtx) * BITS_PER_UNIT;
+
+  /* Anything to clear? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return 1;
+
+  /* Use the builtin memset after a point, to avoid huge code bloat.
+     When optimize_size, avoid any significant code bloat; calling
+     memset is about 4 instructions, so allow for one instruction to
+     load zero and three to do clearing.  */
+  if (TARGET_ALTIVEC && align >= 128)
+    clear_step = 16;
+  else if (TARGET_POWERPC64 && align >= 32)
+    clear_step = 8;
+  else if (TARGET_SPE && align >= 64)
+    clear_step = 8;
+  else
+    clear_step = 4;
+
+  if (optimize_size && bytes > 3 * clear_step)
+    return 0;
+  if (! optimize_size && bytes > 8 * clear_step)
+    return 0;
+
+  for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
+    {
+      enum machine_mode mode = BLKmode;
+      rtx dest;
+
+      if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
+	{
+	  clear_bytes = 16;
+	  mode = V4SImode;
+	}
+      else if (bytes >= 8 && TARGET_SPE && align >= 64)
+        {
+          clear_bytes = 8;
+          mode = V2SImode;
+        }
+      else if (bytes >= 8 && TARGET_POWERPC64
+	       /* 64-bit loads and stores require word-aligned
+		  displacements.  */
+	       && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+	{
+	  clear_bytes = 8;
+	  mode = DImode;
+	}
+      else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
+	{			/* move 4 bytes */
+	  clear_bytes = 4;
+	  mode = SImode;
+	}
+      else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
+	{			/* move 2 bytes */
+	  clear_bytes = 2;
+	  mode = HImode;
+	}
+      else /* move 1 byte at a time */
+	{
+	  clear_bytes = 1;
+	  mode = QImode;
+	}
+
+      dest = adjust_address (orig_dest, mode, offset);
+
+      emit_move_insn (dest, CONST0_RTX (mode));
+    }
+
+  return 1;
+}
+
+
+/* Expand a block move operation, and return 1 if successful.  Return 0
+   if we should let the compiler generate normal code.
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+#define MAX_MOVE_REG 4
+
+int
+expand_block_move (rtx operands[])
+{
+  rtx orig_dest = operands[0];
+  rtx orig_src	= operands[1];
+  rtx bytes_rtx	= operands[2];
+  rtx align_rtx = operands[3];
+  int constp	= (GET_CODE (bytes_rtx) == CONST_INT);
+  int align;
+  int bytes;
+  int offset;
+  int move_bytes;
+  rtx stores[MAX_MOVE_REG];
+  int num_reg = 0;
+
+  /* If this is not a fixed size move, just call memcpy */
+  if (! constp)
+    return 0;
+
+  /* This must be a fixed size alignment */
+  gcc_assert (GET_CODE (align_rtx) == CONST_INT);
+  align = INTVAL (align_rtx) * BITS_PER_UNIT;
+
+  /* Anything to move? */
+  bytes = INTVAL (bytes_rtx);
+  if (bytes <= 0)
+    return 1;
+
+  if (bytes > rs6000_block_move_inline_limit)
+    return 0;
+
+  for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
+    {
+      union {
+	rtx (*movmemsi) (rtx, rtx, rtx, rtx);
+	rtx (*mov) (rtx, rtx);
+      } gen_func;
+      enum machine_mode mode = BLKmode;
+      rtx src, dest;
+
+      /* Altivec first, since it will be faster than a string move
+	 when it applies, and usually not significantly larger.  */
+      if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
+	{
+	  move_bytes = 16;
+	  mode = V4SImode;
+	  gen_func.mov = gen_movv4si;
+	}
+      else if (TARGET_SPE && bytes >= 8 && align >= 64)
+        {
+          move_bytes = 8;
+          mode = V2SImode;
+          gen_func.mov = gen_movv2si;
+        }
+      else if (TARGET_STRING
+	  && bytes > 24		/* move up to 32 bytes at a time */
+	  && ! fixed_regs[5]
+	  && ! fixed_regs[6]
+	  && ! fixed_regs[7]
+	  && ! fixed_regs[8]
+	  && ! fixed_regs[9]
+	  && ! fixed_regs[10]
+	  && ! fixed_regs[11]
+	  && ! fixed_regs[12])
+	{
+	  move_bytes = (bytes > 32) ? 32 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_8reg;
+	}
+      else if (TARGET_STRING
+	       && bytes > 16	/* move up to 24 bytes at a time */
+	       && ! fixed_regs[5]
+	       && ! fixed_regs[6]
+	       && ! fixed_regs[7]
+	       && ! fixed_regs[8]
+	       && ! fixed_regs[9]
+	       && ! fixed_regs[10])
+	{
+	  move_bytes = (bytes > 24) ? 24 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_6reg;
+	}
+      else if (TARGET_STRING
+	       && bytes > 8	/* move up to 16 bytes at a time */
+	       && ! fixed_regs[5]
+	       && ! fixed_regs[6]
+	       && ! fixed_regs[7]
+	       && ! fixed_regs[8])
+	{
+	  move_bytes = (bytes > 16) ? 16 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_4reg;
+	}
+      else if (bytes >= 8 && TARGET_POWERPC64
+	       /* 64-bit loads and stores require word-aligned
+		  displacements.  */
+	       && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+	{
+	  move_bytes = 8;
+	  mode = DImode;
+	  gen_func.mov = gen_movdi;
+	}
+      else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
+	{			/* move up to 8 bytes at a time */
+	  move_bytes = (bytes > 8) ? 8 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_2reg;
+	}
+      else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
+	{			/* move 4 bytes */
+	  move_bytes = 4;
+	  mode = SImode;
+	  gen_func.mov = gen_movsi;
+	}
+      else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
+	{			/* move 2 bytes */
+	  move_bytes = 2;
+	  mode = HImode;
+	  gen_func.mov = gen_movhi;
+	}
+      else if (TARGET_STRING && bytes > 1)
+	{			/* move up to 4 bytes at a time */
+	  move_bytes = (bytes > 4) ? 4 : bytes;
+	  gen_func.movmemsi = gen_movmemsi_1reg;
+	}
+      else /* move 1 byte at a time */
+	{
+	  move_bytes = 1;
+	  mode = QImode;
+	  gen_func.mov = gen_movqi;
+	}
+
+      src = adjust_address (orig_src, mode, offset);
+      dest = adjust_address (orig_dest, mode, offset);
+
+      if (mode != BLKmode)
+	{
+	  rtx tmp_reg = gen_reg_rtx (mode);
+
+	  emit_insn ((*gen_func.mov) (tmp_reg, src));
+	  stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
+	}
+
+      if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
+	{
+	  int i;
+	  for (i = 0; i < num_reg; i++)
+	    emit_insn (stores[i]);
+	  num_reg = 0;
+	}
+
+      if (mode == BLKmode)
+	{
+	  /* Move the address into scratch registers.  The movmemsi
+	     patterns require zero offset.  */
+	  if (!REG_P (XEXP (src, 0)))
+	    {
+	      rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
+	      src = replace_equiv_address (src, src_reg);
+	    }
+	  set_mem_size (src, GEN_INT (move_bytes));
+
+	  if (!REG_P (XEXP (dest, 0)))
+	    {
+	      rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
+	      dest = replace_equiv_address (dest, dest_reg);
+	    }
+	  set_mem_size (dest, GEN_INT (move_bytes));
+
+	  emit_insn ((*gen_func.movmemsi) (dest, src,
+					   GEN_INT (move_bytes & 31),
+					   align_rtx));
+	}
+    }
+
+  return 1;
+}
+
+
+/* Return a string to perform a load_multiple operation.
+   operands[0] is the vector.
+   operands[1] is the source address.
+   operands[2] is the first destination register.  */
+
+const char *
+rs6000_output_load_multiple (rtx operands[3])
+{
+  /* We have to handle the case where the pseudo used to contain the address
+     is assigned to one of the output registers.  */
+  int i, j;
+  int words = XVECLEN (operands[0], 0);
+  rtx xop[10];
+
+  if (XVECLEN (operands[0], 0) == 1)
+    return "{l|lwz} %2,0(%1)";
+
+  for (i = 0; i < words; i++)
+    if (refers_to_regno_p (REGNO (operands[2]) + i,
+			   REGNO (operands[2]) + i + 1, operands[1], 0))
+      {
+	if (i == words-1)
+	  {
+	    xop[0] = GEN_INT (4 * (words-1));
+	    xop[1] = operands[1];
+	    xop[2] = operands[2];
+	    output_asm_insn ("{lsi|lswi} %2,%1,%0\n\t{l|lwz} %1,%0(%1)", xop);
+	    return "";
+	  }
+	else if (i == 0)
+	  {
+	    xop[0] = GEN_INT (4 * (words-1));
+	    xop[1] = operands[1];
+	    xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+	    output_asm_insn ("{cal %1,4(%1)|addi %1,%1,4}\n\t{lsi|lswi} %2,%1,%0\n\t{l|lwz} %1,-4(%1)", xop);
+	    return "";
+	  }
+	else
+	  {
+	    for (j = 0; j < words; j++)
+	      if (j != i)
+		{
+		  xop[0] = GEN_INT (j * 4);
+		  xop[1] = operands[1];
+		  xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
+		  output_asm_insn ("{l|lwz} %2,%0(%1)", xop);
+		}
+	    xop[0] = GEN_INT (i * 4);
+	    xop[1] = operands[1];
+	    output_asm_insn ("{l|lwz} %1,%0(%1)", xop);
+	    return "";
+	  }
+      }
+
+  return "{lsi|lswi} %2,%1,%N0";
+}
+
+
+/* A validation routine: say whether CODE, a condition code, and MODE
+   match.  The other alternatives either don't make sense or should
+   never be generated.  */
+
+void
+validate_condition_mode (enum rtx_code code, enum machine_mode mode)
+{
+  gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
+	       || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
+	      && GET_MODE_CLASS (mode) == MODE_CC);
+
+  /* These don't make sense.  */
+  gcc_assert ((code != GT && code != LT && code != GE && code != LE)
+	      || mode != CCUNSmode);
+
+  gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
+	      || mode == CCUNSmode);
+
+  gcc_assert (mode == CCFPmode
+	      || (code != ORDERED && code != UNORDERED
+		  && code != UNEQ && code != LTGT
+		  && code != UNGT && code != UNLT
+		  && code != UNGE && code != UNLE));
+
+  /* These should never be generated except for
+     flag_finite_math_only.  */
+  gcc_assert (mode != CCFPmode
+	      || flag_finite_math_only
+	      || (code != LE && code != GE
+		  && code != UNEQ && code != LTGT
+		  && code != UNGT && code != UNLT));
+
+  /* These are invalid; the information is not there.  */
+  gcc_assert (mode != CCEQmode || code == EQ || code == NE);
+}
+
+
+/* Return 1 if ANDOP is a mask that has no bits on that are not in the
+   mask required to convert the result of a rotate insn into a shift
+   left insn of SHIFTOP bits.  Both are known to be SImode CONST_INT.  */
+
+int
+includes_lshift_p (rtx shiftop, rtx andop)
+{
+  unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
+
+  shift_mask <<= INTVAL (shiftop);
+
+  return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
+}
+
+/* Similar, but for right shift.  */
+
+int
+includes_rshift_p (rtx shiftop, rtx andop)
+{
+  unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
+
+  shift_mask >>= INTVAL (shiftop);
+
+  return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
+}
+
+/* Return 1 if ANDOP is a mask suitable for use with an rldic insn
+   to perform a left shift.  It must have exactly SHIFTOP least
+   significant 0's, then one or more 1's, then zero or more 0's.  */
+
+int
+includes_rldic_lshift_p (rtx shiftop, rtx andop)
+{
+  if (GET_CODE (andop) == CONST_INT)
+    {
+      HOST_WIDE_INT c, lsb, shift_mask;
+
+      c = INTVAL (andop);
+      if (c == 0 || c == ~0)
+	return 0;
+
+      shift_mask = ~0;
+      shift_mask <<= INTVAL (shiftop);
+
+      /* Find the least significant one bit.  */
+      lsb = c & -c;
+
+      /* It must coincide with the LSB of the shift mask.  */
+      if (-lsb != shift_mask)
+	return 0;
+
+      /* Invert to look for the next transition (if any).  */
+      c = ~c;
+
+      /* Remove the low group of ones (originally low group of zeros).  */
+      c &= -lsb;
+
+      /* Again find the lsb, and check we have all 1's above.  */
+      lsb = c & -c;
+      return c == -lsb;
+    }
+  else if (GET_CODE (andop) == CONST_DOUBLE
+	   && (GET_MODE (andop) == VOIDmode || GET_MODE (andop) == DImode))
+    {
+      HOST_WIDE_INT low, high, lsb;
+      HOST_WIDE_INT shift_mask_low, shift_mask_high;
+
+      low = CONST_DOUBLE_LOW (andop);
+      if (HOST_BITS_PER_WIDE_INT < 64)
+	high = CONST_DOUBLE_HIGH (andop);
+
+      if ((low == 0 && (HOST_BITS_PER_WIDE_INT >= 64 || high == 0))
+	  || (low == ~0 && (HOST_BITS_PER_WIDE_INT >= 64 || high == ~0)))
+	return 0;
+
+      if (HOST_BITS_PER_WIDE_INT < 64 && low == 0)
+	{
+	  shift_mask_high = ~0;
+	  if (INTVAL (shiftop) > 32)
+	    shift_mask_high <<= INTVAL (shiftop) - 32;
+
+	  lsb = high & -high;
+
+	  if (-lsb != shift_mask_high || INTVAL (shiftop) < 32)
+	    return 0;
+
+	  high = ~high;
+	  high &= -lsb;
+
+	  lsb = high & -high;
+	  return high == -lsb;
+	}
+
+      shift_mask_low = ~0;
+      shift_mask_low <<= INTVAL (shiftop);
+
+      lsb = low & -low;
+
+      if (-lsb != shift_mask_low)
+	return 0;
+
+      if (HOST_BITS_PER_WIDE_INT < 64)
+	high = ~high;
+      low = ~low;
+      low &= -lsb;
+
+      if (HOST_BITS_PER_WIDE_INT < 64 && low == 0)
+	{
+	  lsb = high & -high;
+	  return high == -lsb;
+	}
+
+      lsb = low & -low;
+      return low == -lsb && (HOST_BITS_PER_WIDE_INT >= 64 || high == ~0);
+    }
+  else
+    return 0;
+}
+
+/* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
+   to perform a left shift.  It must have SHIFTOP or more least
+   significant 0's, with the remainder of the word 1's.  */
+
+int
+includes_rldicr_lshift_p (rtx shiftop, rtx andop)
+{
+  if (GET_CODE (andop) == CONST_INT)
+    {
+      HOST_WIDE_INT c, lsb, shift_mask;
+
+      shift_mask = ~0;
+      shift_mask <<= INTVAL (shiftop);
+      c = INTVAL (andop);
+
+      /* Find the least significant one bit.  */
+      lsb = c & -c;
+
+      /* It must be covered by the shift mask.
+	 This test also rejects c == 0.  */
+      if ((lsb & shift_mask) == 0)
+	return 0;
+
+      /* Check we have all 1's above the transition, and reject all 1's.  */
+      return c == -lsb && lsb != 1;
+    }
+  else if (GET_CODE (andop) == CONST_DOUBLE
+	   && (GET_MODE (andop) == VOIDmode || GET_MODE (andop) == DImode))
+    {
+      HOST_WIDE_INT low, lsb, shift_mask_low;
+
+      low = CONST_DOUBLE_LOW (andop);
+
+      if (HOST_BITS_PER_WIDE_INT < 64)
+	{
+	  HOST_WIDE_INT high, shift_mask_high;
+
+	  high = CONST_DOUBLE_HIGH (andop);
+
+	  if (low == 0)
+	    {
+	      shift_mask_high = ~0;
+	      if (INTVAL (shiftop) > 32)
+		shift_mask_high <<= INTVAL (shiftop) - 32;
+
+	      lsb = high & -high;
+
+	      if ((lsb & shift_mask_high) == 0)
+		return 0;
+
+	      return high == -lsb;
+	    }
+	  if (high != ~0)
+	    return 0;
+	}
+
+      shift_mask_low = ~0;
+      shift_mask_low <<= INTVAL (shiftop);
+
+      lsb = low & -low;
+
+      if ((lsb & shift_mask_low) == 0)
+	return 0;
+
+      return low == -lsb && lsb != 1;
+    }
+  else
+    return 0;
+}
+
+/* Return 1 if operands will generate a valid arguments to rlwimi
+instruction for insert with right shift in 64-bit mode.  The mask may
+not start on the first bit or stop on the last bit because wrap-around
+effects of instruction do not correspond to semantics of RTL insn.  */
+
+int
+insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
+{
+  if (INTVAL (startop) > 32
+      && INTVAL (startop) < 64
+      && INTVAL (sizeop) > 1
+      && INTVAL (sizeop) + INTVAL (startop) < 64
+      && INTVAL (shiftop) > 0
+      && INTVAL (sizeop) + INTVAL (shiftop) < 32
+      && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
+    return 1;
+
+  return 0;
+}
+
+/* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
+   for lfq and stfq insns iff the registers are hard registers.   */
+
+int
+registers_ok_for_quad_peep (rtx reg1, rtx reg2)
+{
+  /* We might have been passed a SUBREG.  */
+  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
+    return 0;
+
+  /* We might have been passed non floating point registers.  */
+  if (!FP_REGNO_P (REGNO (reg1))
+      || !FP_REGNO_P (REGNO (reg2)))
+    return 0;
+
+  return (REGNO (reg1) == REGNO (reg2) - 1);
+}
+
+/* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
+   addr1 and addr2 must be in consecutive memory locations
+   (addr2 == addr1 + 8).  */
+
+int
+mems_ok_for_quad_peep (rtx mem1, rtx mem2)
+{
+  rtx addr1, addr2;
+  unsigned int reg1, reg2;
+  int offset1, offset2;
+
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
+    return 0;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  /* Extract an offset (if used) from the first addr.  */
+  if (GET_CODE (addr1) == PLUS)
+    {
+      /* If not a REG, return zero.  */
+      if (GET_CODE (XEXP (addr1, 0)) != REG)
+	return 0;
+      else
+	{
+	  reg1 = REGNO (XEXP (addr1, 0));
+	  /* The offset must be constant!  */
+	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
+	    return 0;
+	  offset1 = INTVAL (XEXP (addr1, 1));
+	}
+    }
+  else if (GET_CODE (addr1) != REG)
+    return 0;
+  else
+    {
+      reg1 = REGNO (addr1);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset1 = 0;
+    }
+
+  /* And now for the second addr.  */
+  if (GET_CODE (addr2) == PLUS)
+    {
+      /* If not a REG, return zero.  */
+      if (GET_CODE (XEXP (addr2, 0)) != REG)
+	return 0;
+      else
+	{
+	  reg2 = REGNO (XEXP (addr2, 0));
+	  /* The offset must be constant. */
+	  if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
+	    return 0;
+	  offset2 = INTVAL (XEXP (addr2, 1));
+	}
+    }
+  else if (GET_CODE (addr2) != REG)
+    return 0;
+  else
+    {
+      reg2 = REGNO (addr2);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset2 = 0;
+    }
+
+  /* Both of these must have the same base register.  */
+  if (reg1 != reg2)
+    return 0;
+
+  /* The offset for the second addr must be 8 more than the first addr.  */
+  if (offset2 != offset1 + 8)
+    return 0;
+
+  /* All the tests passed.  addr1 and addr2 are valid for lfq or stfq
+     instructions.  */
+  return 1;
+}
+
+
+rtx
+rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
+{
+  static bool eliminated = false;
+  rtx ret;
+
+  if (mode != SDmode)
+    ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+  else
+    {
+      rtx mem = cfun->machine->sdmode_stack_slot;
+      gcc_assert (mem != NULL_RTX);
+
+      if (!eliminated)
+	{
+	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
+	  cfun->machine->sdmode_stack_slot = mem;
+	  eliminated = true;
+	}
+      ret = mem;
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
+	       GET_MODE_NAME (mode));
+      if (!ret)
+	fprintf (stderr, "\tNULL_RTX\n");
+      else
+	debug_rtx (ret);
+    }
+
+  return ret;
+}
+
+static tree
+rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
+{
+  /* Don't walk into types.  */
+  if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
+    {
+      *walk_subtrees = 0;
+      return NULL_TREE;
+    }
+
+  switch (TREE_CODE (*tp))
+    {
+    case VAR_DECL:
+    case PARM_DECL:
+    case FIELD_DECL:
+    case RESULT_DECL:
+    case SSA_NAME:
+    case REAL_CST:
+    case MEM_REF:
+    case VIEW_CONVERT_EXPR:
+      if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
+	return *tp;
+      break;
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+enum reload_reg_type {
+  GPR_REGISTER_TYPE,
+  VECTOR_REGISTER_TYPE,
+  OTHER_REGISTER_TYPE
+};
+
+static enum reload_reg_type
+rs6000_reload_register_type (enum reg_class rclass)
+{
+  switch (rclass)
+    {
+    case GENERAL_REGS:
+    case BASE_REGS:
+      return GPR_REGISTER_TYPE;
+
+    case FLOAT_REGS:
+    case ALTIVEC_REGS:
+    case VSX_REGS:
+      return VECTOR_REGISTER_TYPE;
+
+    default:
+      return OTHER_REGISTER_TYPE;
+    }
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.
+
+   For VSX and Altivec, we may need a register to convert sp+offset into
+   reg+sp.  */
+
+static reg_class_t
+rs6000_secondary_reload (bool in_p,
+			 rtx x,
+			 reg_class_t rclass_i,
+			 enum machine_mode mode,
+			 secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+  reg_class_t ret = ALL_REGS;
+  enum insn_code icode;
+  bool default_p = false;
+
+  sri->icode = CODE_FOR_nothing;
+
+  /* Convert vector loads and stores into gprs to use an additional base
+     register.  */
+  icode = rs6000_vector_reload[mode][in_p != false];
+  if (icode != CODE_FOR_nothing)
+    {
+      ret = NO_REGS;
+      sri->icode = CODE_FOR_nothing;
+      sri->extra_cost = 0;
+
+      if (GET_CODE (x) == MEM)
+	{
+	  rtx addr = XEXP (x, 0);
+
+	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
+	     an extra register in that case, but it would need an extra
+	     register if the addressing is reg+reg or (reg+reg)&(-16).  */
+	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
+	    {
+	      if (!legitimate_indirect_address_p (addr, false)
+		  && !rs6000_legitimate_offset_address_p (TImode, addr, false))
+		{
+		  sri->icode = icode;
+		  /* account for splitting the loads, and converting the
+		     address from reg+reg to reg.  */
+		  sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
+				     + ((GET_CODE (addr) == AND) ? 1 : 0));
+		}
+	    }
+	  /* Loads to and stores from vector registers can only do reg+reg
+	     addressing.  Altivec registers can also do (reg+reg)&(-16).  */
+	  else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
+		   || rclass == FLOAT_REGS || rclass == NO_REGS)
+	    {
+	      if (!VECTOR_MEM_ALTIVEC_P (mode)
+		  && GET_CODE (addr) == AND
+		  && GET_CODE (XEXP (addr, 1)) == CONST_INT
+		  && INTVAL (XEXP (addr, 1)) == -16
+		  && (legitimate_indirect_address_p (XEXP (addr, 0), false)
+		      || legitimate_indexed_address_p (XEXP (addr, 0), false)))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
+				     ? 2 : 1);
+		}
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && (rclass == NO_REGS
+			   || !legitimate_indexed_address_p (addr, false)))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 1;
+		}
+	      else
+		icode = CODE_FOR_nothing;
+	    }
+	  /* Any other loads, including to pseudo registers which haven't been
+	     assigned to a register yet, default to require a scratch
+	     register.  */
+	  else
+	    {
+	      sri->icode = icode;
+	      sri->extra_cost = 2;
+	    }
+	}
+      else if (REG_P (x))
+	{
+	  int regno = true_regnum (x);
+
+	  icode = CODE_FOR_nothing;
+	  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	    default_p = true;
+	  else
+	    {
+	      enum reg_class xclass = REGNO_REG_CLASS (regno);
+	      enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
+	      enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+
+	      /* If memory is needed, use default_secondary_reload to create the
+		 stack slot.  */
+	      if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+		default_p = true;
+	      else
+		ret = NO_REGS;
+	    }
+	}
+      else
+	default_p = true;
+    }
+  else
+    default_p = true;
+
+  if (default_p)
+    ret = default_secondary_reload (in_p, x, rclass, mode, sri);
+
+  gcc_assert (ret != ALL_REGS);
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr,
+	       "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
+	       "mode = %s",
+	       reg_class_names[ret],
+	       in_p ? "true" : "false",
+	       reg_class_names[rclass],
+	       GET_MODE_NAME (mode));
+
+      if (default_p)
+	fprintf (stderr, ", default secondary reload");
+
+      if (sri->icode != CODE_FOR_nothing)
+	fprintf (stderr, ", reload func = %s, extra cost = %d\n",
+		 insn_data[sri->icode].name, sri->extra_cost);
+      else
+	fprintf (stderr, "\n");
+
+      debug_rtx (x);
+    }
+
+  return ret;
+}
+
+/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
+   to SP+reg addressing.  */
+
+void
+rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
+{
+  int regno = true_regnum (reg);
+  enum machine_mode mode = GET_MODE (reg);
+  enum reg_class rclass;
+  rtx addr;
+  rtx and_op2 = NULL_RTX;
+  rtx addr_op1;
+  rtx addr_op2;
+  rtx scratch_or_premodify = scratch;
+  rtx and_rtx;
+  rtx cc_clobber;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n",
+	       store_p ? "store" : "load");
+      fprintf (stderr, "reg:\n");
+      debug_rtx (reg);
+      fprintf (stderr, "mem:\n");
+      debug_rtx (mem);
+      fprintf (stderr, "scratch:\n");
+      debug_rtx (scratch);
+    }
+
+  gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
+  gcc_assert (GET_CODE (mem) == MEM);
+  rclass = REGNO_REG_CLASS (regno);
+  addr = XEXP (mem, 0);
+
+  switch (rclass)
+    {
+      /* GPRs can handle reg + small constant, all other addresses need to use
+	 the scratch register.  */
+    case GENERAL_REGS:
+    case BASE_REGS:
+      if (GET_CODE (addr) == AND)
+	{
+	  and_op2 = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+
+      if (GET_CODE (addr) == PRE_MODIFY)
+	{
+	  scratch_or_premodify = XEXP (addr, 0);
+	  gcc_assert (REG_P (scratch_or_premodify));
+	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
+	  addr = XEXP (addr, 1);
+	}
+
+      if (GET_CODE (addr) == PLUS
+	  && (!rs6000_legitimate_offset_address_p (TImode, addr, false)
+	      || and_op2 != NULL_RTX))
+	{
+	  addr_op1 = XEXP (addr, 0);
+	  addr_op2 = XEXP (addr, 1);
+	  gcc_assert (legitimate_indirect_address_p (addr_op1, false));
+
+	  if (!REG_P (addr_op2)
+	      && (GET_CODE (addr_op2) != CONST_INT
+		  || !satisfies_constraint_I (addr_op2)))
+	    {
+	      if (TARGET_DEBUG_ADDR)
+		{
+		  fprintf (stderr,
+			   "\nMove plus addr to register %s, mode = %s: ",
+			   rs6000_reg_names[REGNO (scratch)],
+			   GET_MODE_NAME (mode));
+		  debug_rtx (addr_op2);
+		}
+	      rs6000_emit_move (scratch, addr_op2, Pmode);
+	      addr_op2 = scratch;
+	    }
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch_or_premodify,
+				  gen_rtx_PLUS (Pmode,
+						addr_op1,
+						addr_op2)));
+
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+      else if (!legitimate_indirect_address_p (addr, false)
+	       && !rs6000_legitimate_offset_address_p (TImode, addr, false))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    {
+	      fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
+		       rs6000_reg_names[REGNO (scratch_or_premodify)],
+		       GET_MODE_NAME (mode));
+	      debug_rtx (addr);
+	    }
+	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+      break;
+
+      /* Float/Altivec registers can only handle reg+reg addressing.  Move
+	 other addresses into a scratch register.  */
+    case FLOAT_REGS:
+    case VSX_REGS:
+    case ALTIVEC_REGS:
+
+      /* With float regs, we need to handle the AND ourselves, since we can't
+	 use the Altivec instruction with an implicit AND -16.  Allow scalar
+	 loads to float registers to use reg+offset even if VSX.  */
+      if (GET_CODE (addr) == AND
+	  && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
+	      || GET_CODE (XEXP (addr, 1)) != CONST_INT
+	      || INTVAL (XEXP (addr, 1)) != -16
+	      || !VECTOR_MEM_ALTIVEC_P (mode)))
+	{
+	  and_op2 = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+
+      /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
+	 as the address later.  */
+      if (GET_CODE (addr) == PRE_MODIFY
+	  && (!VECTOR_MEM_VSX_P (mode)
+	      || and_op2 != NULL_RTX
+	      || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
+	{
+	  scratch_or_premodify = XEXP (addr, 0);
+	  gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
+						     false));
+	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
+	  addr = XEXP (addr, 1);
+	}
+
+      if (legitimate_indirect_address_p (addr, false)	/* reg */
+	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
+	  || GET_CODE (addr) == PRE_MODIFY		/* VSX pre-modify */
+	  || (GET_CODE (addr) == AND			/* Altivec memory */
+	      && GET_CODE (XEXP (addr, 1)) == CONST_INT
+	      && INTVAL (XEXP (addr, 1)) == -16
+	      && VECTOR_MEM_ALTIVEC_P (mode))
+	  || (rclass == FLOAT_REGS			/* legacy float mem */
+	      && GET_MODE_SIZE (mode) == 8
+	      && and_op2 == NULL_RTX
+	      && scratch_or_premodify == scratch
+	      && rs6000_legitimate_offset_address_p (mode, addr, false)))
+	;
+
+      else if (GET_CODE (addr) == PLUS)
+	{
+	  addr_op1 = XEXP (addr, 0);
+	  addr_op2 = XEXP (addr, 1);
+	  gcc_assert (REG_P (addr_op1));
+
+	  if (TARGET_DEBUG_ADDR)
+	    {
+	      fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
+		       rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
+	      debug_rtx (addr_op2);
+	    }
+	  rs6000_emit_move (scratch, addr_op2, Pmode);
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch_or_premodify,
+				  gen_rtx_PLUS (Pmode,
+						addr_op1,
+						scratch)));
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+
+      else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
+	       || GET_CODE (addr) == CONST_INT || REG_P (addr))
+	{
+	  if (TARGET_DEBUG_ADDR)
+	    {
+	      fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
+		       rs6000_reg_names[REGNO (scratch_or_premodify)],
+		       GET_MODE_NAME (mode));
+	      debug_rtx (addr);
+	    }
+
+	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
+	  addr = scratch_or_premodify;
+	  scratch_or_premodify = scratch;
+	}
+
+      else
+	gcc_unreachable ();
+
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* If the original address involved a pre-modify that we couldn't use the VSX
+     memory instruction with update, and we haven't taken care of already,
+     store the address in the pre-modify register and use that as the
+     address.  */
+  if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
+      addr = scratch_or_premodify;
+    }
+
+  /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
+     memory instruction, recreate the AND now, including the clobber which is
+     generated by the general ANDSI3/ANDDI3 patterns for the
+     andi. instruction.  */
+  if (and_op2 != NULL_RTX)
+    {
+      if (! legitimate_indirect_address_p (addr, false))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	  addr = scratch;
+	}
+
+      if (TARGET_DEBUG_ADDR)
+	{
+	  fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
+		   rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
+	  debug_rtx (and_op2);
+	}
+
+      and_rtx = gen_rtx_SET (VOIDmode,
+			     scratch,
+			     gen_rtx_AND (Pmode,
+					  addr,
+					  and_op2));
+
+      cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode,
+				   gen_rtvec (2, and_rtx, cc_clobber)));
+      addr = scratch;
+    }
+
+  /* Adjust the address if it changed.  */
+  if (addr != XEXP (mem, 0))
+    {
+      mem = change_address (mem, mode, addr);
+      if (TARGET_DEBUG_ADDR)
+	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
+    }
+
+  /* Now create the move.  */
+  if (store_p)
+    emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
+  else
+    emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
+
+  return;
+}
+
+/* Target hook to return the cover classes for Integrated Register Allocator.
+   Cover classes is a set of non-intersected register classes covering all hard
+   registers used for register allocation purpose.  Any move between two
+   registers of a cover class should be cheaper than load or store of the
+   registers.  The value is array of register classes with LIM_REG_CLASSES used
+   as the end marker.
+
+   We need two IRA_COVER_CLASSES, one for pre-VSX, and the other for VSX to
+   account for the Altivec and Floating registers being subsets of the VSX
+   register set under VSX, but distinct register sets on pre-VSX machines.  */
+
+static const reg_class_t *
+rs6000_ira_cover_classes (void)
+{
+  static const reg_class_t cover_pre_vsx[] = IRA_COVER_CLASSES_PRE_VSX;
+  static const reg_class_t cover_vsx[]     = IRA_COVER_CLASSES_VSX;
+
+  return (TARGET_VSX) ? cover_vsx : cover_pre_vsx;
+}
+
+/* Allocate a 64-bit stack slot to be used for copying SDmode
+   values through if this function has any SDmode references.  */
+
+static void
+rs6000_alloc_sdmode_stack_slot (void)
+{
+  tree t;
+  basic_block bb;
+  gimple_stmt_iterator gsi;
+
+  gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+
+  FOR_EACH_BB (bb)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
+	if (ret)
+	  {
+	    rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
+	    cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
+								  SDmode, 0);
+	    return;
+	  }
+      }
+
+  /* Check for any SDmode parameters of the function.  */
+  for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
+    {
+      if (TREE_TYPE (t) == error_mark_node)
+	continue;
+
+      if (TYPE_MODE (TREE_TYPE (t)) == SDmode
+	  || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
+	{
+	  rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
+	  cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
+								SDmode, 0);
+	  return;
+	}
+    }
+}
+
+static void
+rs6000_instantiate_decls (void)
+{
+  if (cfun->machine->sdmode_stack_slot != NULL_RTX)
+    instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
+}
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.
+
+   On the RS/6000, we have to return NO_REGS when we want to reload a
+   floating-point CONST_DOUBLE to force it to be copied to memory.
+
+   We also don't want to reload integer values into floating-point
+   registers if we can at all help it.  In fact, this can
+   cause reload to die, if it tries to generate a reload of CTR
+   into a FP register and discovers it doesn't have the memory location
+   required.
+
+   ??? Would it be a good idea to have reload do the converse, that is
+   try to reload floating modes into FP registers if possible?
+ */
+
+static enum reg_class
+rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  if (VECTOR_UNIT_VSX_P (mode)
+      && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+    return rclass;
+
+  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
+      && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
+      && easy_vector_constant (x, mode))
+    return ALTIVEC_REGS;
+
+  if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
+    return NO_REGS;
+
+  if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
+    return GENERAL_REGS;
+
+  /* For VSX, prefer the traditional registers for 64-bit values because we can
+     use the non-VSX loads.  Prefer the Altivec registers if Altivec is
+     handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
+     prefer Altivec loads..  */
+  if (rclass == VSX_REGS)
+    {
+      if (GET_MODE_SIZE (mode) <= 8)
+	return FLOAT_REGS;
+
+      if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode))
+	return ALTIVEC_REGS;
+
+      return rclass;
+    }
+
+  return rclass;
+}
+
+/* Debug version of rs6000_preferred_reload_class.  */
+static enum reg_class
+rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
+
+  fprintf (stderr,
+	   "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
+	   "mode = %s, x:\n",
+	   reg_class_names[ret], reg_class_names[rclass],
+	   GET_MODE_NAME (GET_MODE (x)));
+  debug_rtx (x);
+
+  return ret;
+}
+
+/* If we are copying between FP or AltiVec registers and anything else, we need
+   a memory location.  The exception is when we are targeting ppc64 and the
+   move to/from fpr to gpr instructions are available.  Also, under VSX, you
+   can copy vector registers from the FP register set to the Altivec register
+   set and vice versa.  */
+
+static bool
+rs6000_secondary_memory_needed (enum reg_class class1,
+				enum reg_class class2,
+				enum machine_mode mode)
+{
+  if (class1 == class2)
+    return false;
+
+  /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
+     ALTIVEC_REGS, and FLOAT_REGS).  We don't need to use memory to copy
+     between these classes.  But we need memory for other things that can go in
+     FLOAT_REGS like SFmode.  */
+  if (TARGET_VSX
+      && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
+      && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
+	  || class1 == FLOAT_REGS))
+    return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
+	    && class2 != FLOAT_REGS);
+
+  if (class1 == VSX_REGS || class2 == VSX_REGS)
+    return true;
+
+  if (class1 == FLOAT_REGS
+      && (!TARGET_MFPGPR || !TARGET_POWERPC64
+	  || ((mode != DFmode)
+	      && (mode != DDmode)
+	      && (mode != DImode))))
+    return true;
+
+  if (class2 == FLOAT_REGS
+      && (!TARGET_MFPGPR || !TARGET_POWERPC64
+	  || ((mode != DFmode)
+	      && (mode != DDmode)
+	      && (mode != DImode))))
+    return true;
+
+  if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+    return true;
+
+  return false;
+}
+
+/* Debug version of rs6000_secondary_memory_needed.  */
+static bool
+rs6000_debug_secondary_memory_needed (enum reg_class class1,
+				      enum reg_class class2,
+				      enum machine_mode mode)
+{
+  bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+
+  fprintf (stderr,
+	   "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
+	   "class2 = %s, mode = %s\n",
+	   ret ? "true" : "false", reg_class_names[class1],
+	   reg_class_names[class2], GET_MODE_NAME (mode));
+
+  return ret;
+}
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in RCLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+
+static enum reg_class
+rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
+			       rtx in)
+{
+  int regno;
+
+  if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
+#if TARGET_MACHO
+		     && MACHOPIC_INDIRECT
+#endif
+		     ))
+    {
+      /* We cannot copy a symbolic operand directly into anything
+	 other than BASE_REGS for TARGET_ELF.  So indicate that a
+	 register from BASE_REGS is needed as an intermediate
+	 register.
+
+	 On Darwin, pic addresses require a load from memory, which
+	 needs a base register.  */
+      if (rclass != BASE_REGS
+	  && (GET_CODE (in) == SYMBOL_REF
+	      || GET_CODE (in) == HIGH
+	      || GET_CODE (in) == LABEL_REF
+	      || GET_CODE (in) == CONST))
+	return BASE_REGS;
+    }
+
+  if (GET_CODE (in) == REG)
+    {
+      regno = REGNO (in);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  regno = true_regnum (in);
+	  if (regno >= FIRST_PSEUDO_REGISTER)
+	    regno = -1;
+	}
+    }
+  else if (GET_CODE (in) == SUBREG)
+    {
+      regno = true_regnum (in);
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	regno = -1;
+    }
+  else
+    regno = -1;
+
+  /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
+     into anything.  */
+  if (rclass == GENERAL_REGS || rclass == BASE_REGS
+      || (regno >= 0 && INT_REGNO_P (regno)))
+    return NO_REGS;
+
+  /* Constants, memory, and FP registers can go into FP registers.  */
+  if ((regno == -1 || FP_REGNO_P (regno))
+      && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
+    return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+
+  /* Memory, and FP/altivec registers can go into fp/altivec registers under
+     VSX.  */
+  if (TARGET_VSX
+      && (regno == -1 || VSX_REGNO_P (regno))
+      && VSX_REG_CLASS_P (rclass))
+    return NO_REGS;
+
+  /* Memory, and AltiVec registers can go into AltiVec registers.  */
+  if ((regno == -1 || ALTIVEC_REGNO_P (regno))
+      && rclass == ALTIVEC_REGS)
+    return NO_REGS;
+
+  /* We can copy among the CR registers.  */
+  if ((rclass == CR_REGS || rclass == CR0_REGS)
+      && regno >= 0 && CR_REGNO_P (regno))
+    return NO_REGS;
+
+  /* Otherwise, we need GENERAL_REGS.  */
+  return GENERAL_REGS;
+}
+
+/* Debug version of rs6000_secondary_reload_class.  */
+static enum reg_class
+rs6000_debug_secondary_reload_class (enum reg_class rclass,
+				     enum machine_mode mode, rtx in)
+{
+  enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
+  fprintf (stderr,
+	   "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
+	   "mode = %s, input rtx:\n",
+	   reg_class_names[ret], reg_class_names[rclass],
+	   GET_MODE_NAME (mode));
+  debug_rtx (in);
+
+  return ret;
+}
+
+/* Return nonzero if for CLASS a mode change from FROM to TO is invalid.  */
+
+static bool
+rs6000_cannot_change_mode_class (enum machine_mode from,
+				 enum machine_mode to,
+				 enum reg_class rclass)
+{
+  unsigned from_size = GET_MODE_SIZE (from);
+  unsigned to_size = GET_MODE_SIZE (to);
+
+  if (from_size != to_size)
+    {
+      enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
+      return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
+	      && reg_classes_intersect_p (xclass, rclass));
+    }
+
+  if (TARGET_E500_DOUBLE
+      && ((((to) == DFmode) + ((from) == DFmode)) == 1
+	  || (((to) == TFmode) + ((from) == TFmode)) == 1
+	  || (((to) == DDmode) + ((from) == DDmode)) == 1
+	  || (((to) == TDmode) + ((from) == TDmode)) == 1
+	  || (((to) == DImode) + ((from) == DImode)) == 1))
+    return true;
+
+  /* Since the VSX register set includes traditional floating point registers
+     and altivec registers, just check for the size being different instead of
+     trying to check whether the modes are vector modes.  Otherwise it won't
+     allow say DF and DI to change classes.  */
+  if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
+    return (from_size != 8 && from_size != 16);
+
+  if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
+      && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
+    return true;
+
+  if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
+      && reg_classes_intersect_p (GENERAL_REGS, rclass))
+    return true;
+
+  return false;
+}
+
+/* Debug version of rs6000_cannot_change_mode_class.  */
+static bool
+rs6000_debug_cannot_change_mode_class (enum machine_mode from,
+				       enum machine_mode to,
+				       enum reg_class rclass)
+{
+  bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
+
+  fprintf (stderr,
+	   "rs6000_cannot_change_mode_class, return %s, from = %s, "
+	   "to = %s, rclass = %s\n",
+	   ret ? "true" : "false",
+	   GET_MODE_NAME (from), GET_MODE_NAME (to),
+	   reg_class_names[rclass]);
+
+  return ret;
+}
+
+/* Given a comparison operation, return the bit number in CCR to test.  We
+   know this is a valid comparison.
+
+   SCC_P is 1 if this is for an scc.  That means that %D will have been
+   used instead of %C, so the bits will be in different places.
+
+   Return -1 if OP isn't a valid comparison for some reason.  */
+
+int
+ccr_bit (rtx op, int scc_p)
+{
+  enum rtx_code code = GET_CODE (op);
+  enum machine_mode cc_mode;
+  int cc_regnum;
+  int base_bit;
+  rtx reg;
+
+  if (!COMPARISON_P (op))
+    return -1;
+
+  reg = XEXP (op, 0);
+
+  gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
+
+  cc_mode = GET_MODE (reg);
+  cc_regnum = REGNO (reg);
+  base_bit = 4 * (cc_regnum - CR0_REGNO);
+
+  validate_condition_mode (code, cc_mode);
+
+  /* When generating a sCOND operation, only positive conditions are
+     allowed.  */
+  gcc_assert (!scc_p
+	      || code == EQ || code == GT || code == LT || code == UNORDERED
+	      || code == GTU || code == LTU);
+
+  switch (code)
+    {
+    case NE:
+      return scc_p ? base_bit + 3 : base_bit + 2;
+    case EQ:
+      return base_bit + 2;
+    case GT:  case GTU:  case UNLE:
+      return base_bit + 1;
+    case LT:  case LTU:  case UNGE:
+      return base_bit;
+    case ORDERED:  case UNORDERED:
+      return base_bit + 3;
+
+    case GE:  case GEU:
+      /* If scc, we will have done a cror to put the bit in the
+	 unordered position.  So test that bit.  For integer, this is ! LT
+	 unless this is an scc insn.  */
+      return scc_p ? base_bit + 3 : base_bit;
+
+    case LE:  case LEU:
+      return scc_p ? base_bit + 3 : base_bit + 1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the GOT register.  */
+
+rtx
+rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
+{
+  /* The second flow pass currently (June 1999) can't update
+     regs_ever_live without disturbing other parts of the compiler, so
+     update it here to make the prolog/epilogue code happy.  */
+  if (!can_create_pseudo_p ()
+      && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
+    df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
+
+  crtl->uses_pic_offset_table = 1;
+
+  return pic_offset_table_rtx;
+}
+
+static rs6000_stack_t stack_info;
+
+/* Function to init struct machine_function.
+   This will be called, via a pointer variable,
+   from push_function_context.  */
+
+static struct machine_function *
+rs6000_init_machine_status (void)
+{
+  stack_info.reload_completed = 0;
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* These macros test for integers and extract the low-order bits.  */
+#define INT_P(X)  \
+((GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST_DOUBLE)	\
+ && GET_MODE (X) == VOIDmode)
+
+#define INT_LOWPART(X) \
+  (GET_CODE (X) == CONST_INT ? INTVAL (X) : CONST_DOUBLE_LOW (X))
+
+int
+extract_MB (rtx op)
+{
+  int i;
+  unsigned long val = INT_LOWPART (op);
+
+  /* If the high bit is zero, the value is the first 1 bit we find
+     from the left.  */
+  if ((val & 0x80000000) == 0)
+    {
+      gcc_assert (val & 0xffffffff);
+
+      i = 1;
+      while (((val <<= 1) & 0x80000000) == 0)
+	++i;
+      return i;
+    }
+
+  /* If the high bit is set and the low bit is not, or the mask is all
+     1's, the value is zero.  */
+  if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
+    return 0;
+
+  /* Otherwise we have a wrap-around mask.  Look for the first 0 bit
+     from the right.  */
+  i = 31;
+  while (((val >>= 1) & 1) != 0)
+    --i;
+
+  return i;
+}
+
+int
+extract_ME (rtx op)
+{
+  int i;
+  unsigned long val = INT_LOWPART (op);
+
+  /* If the low bit is zero, the value is the first 1 bit we find from
+     the right.  */
+  if ((val & 1) == 0)
+    {
+      gcc_assert (val & 0xffffffff);
+
+      i = 30;
+      while (((val >>= 1) & 1) == 0)
+	--i;
+
+      return i;
+    }
+
+  /* If the low bit is set and the high bit is not, or the mask is all
+     1's, the value is 31.  */
+  if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
+    return 31;
+
+  /* Otherwise we have a wrap-around mask.  Look for the first 0 bit
+     from the left.  */
+  i = 0;
+  while (((val <<= 1) & 0x80000000) != 0)
+    ++i;
+
+  return i;
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in some tls_ld pattern.  */
+
+static const char *
+rs6000_get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn),
+			 rs6000_get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+/* Helper function for rs6000_get_some_local_dynamic_name.  */
+
+static int
+rs6000_get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    {
+      const char *str = XSTR (x, 0);
+      if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+	{
+	  cfun->machine->some_ld_name = str;
+	  return 1;
+	}
+    }
+
+  return 0;
+}
+
+/* Write out a function code label.  */
+
+void
+rs6000_output_function_entry (FILE *file, const char *fname)
+{
+  if (fname[0] != '.')
+    {
+      switch (DEFAULT_ABI)
+	{
+	default:
+	  gcc_unreachable ();
+
+	case ABI_AIX:
+	  if (DOT_SYMBOLS)
+	    putc ('.', file);
+	  else
+	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
+	  break;
+
+	case ABI_V4:
+	case ABI_DARWIN:
+	  break;
+	}
+    }
+
+  RS6000_OUTPUT_BASENAME (file, fname);
+}
+
+/* Print an operand.  Recognize special options, documented below.  */
+
+#if TARGET_ELF
+#define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
+#define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
+#else
+#define SMALL_DATA_RELOC "sda21"
+#define SMALL_DATA_REG 0
+#endif
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  int i;
+  unsigned HOST_WIDE_INT uval;
+
+  switch (code)
+    {
+    case '.':
+      /* Write out an instruction after the call which may be replaced
+	 with glue code by the loader.  This depends on the AIX version.  */
+      asm_fprintf (file, RS6000_CALL_GLUE);
+      return;
+
+      /* %a is output_address.  */
+
+    case 'A':
+      /* If X is a constant integer whose low-order 5 bits are zero,
+	 write 'l'.  Otherwise, write 'r'.  This is a kludge to fix a bug
+	 in the AIX assembler where "sri" with a zero shift count
+	 writes a trash instruction.  */
+      if (GET_CODE (x) == CONST_INT && (INTVAL (x) & 31) == 0)
+	putc ('l', file);
+      else
+	putc ('r', file);
+      return;
+
+    case 'b':
+      /* If constant, low-order 16 bits of constant, unsigned.
+	 Otherwise, write normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INT_LOWPART (x) & 0xffff);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'B':
+      /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
+	 for 64-bit mask direction.  */
+      putc (((INT_LOWPART (x) & 1) == 0 ? 'r' : 'l'), file);
+      return;
+
+      /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
+	 output_operand.  */
+
+    case 'c':
+      /* X is a CR register.  Print the number of the GT bit of the CR.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%c value");
+      else
+	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 1);
+      return;
+
+    case 'D':
+      /* Like 'J' but get to the GT bit only.  */
+      gcc_assert (GET_CODE (x) == REG);
+
+      /* Bit 1 is GT bit.  */
+      i = 4 * (REGNO (x) - CR0_REGNO) + 1;
+
+      /* Add one for shift count in rlinm for scc.  */
+      fprintf (file, "%d", i + 1);
+      return;
+
+    case 'E':
+      /* X is a CR register.  Print the number of the EQ bit of the CR */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%E value");
+      else
+	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
+      return;
+
+    case 'f':
+      /* X is a CR register.  Print the shift count needed to move it
+	 to the high-order four bits.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%f value");
+      else
+	fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
+      return;
+
+    case 'F':
+      /* Similar, but print the count for the rotate in the opposite
+	 direction.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%F value");
+      else
+	fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
+      return;
+
+    case 'G':
+      /* X is a constant integer.  If it is negative, print "m",
+	 otherwise print "z".  This is to make an aze or ame insn.  */
+      if (GET_CODE (x) != CONST_INT)
+	output_operand_lossage ("invalid %%G value");
+      else if (INTVAL (x) >= 0)
+	putc ('z', file);
+      else
+	putc ('m', file);
+      return;
+
+    case 'h':
+      /* If constant, output low-order five bits.  Otherwise, write
+	 normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INT_LOWPART (x) & 31);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'H':
+      /* If constant, output low-order six bits.  Otherwise, write
+	 normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INT_LOWPART (x) & 63);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'I':
+      /* Print `i' if this is a constant, else nothing.  */
+      if (INT_P (x))
+	putc ('i', file);
+      return;
+
+    case 'j':
+      /* Write the bit number in CCR for jump.  */
+      i = ccr_bit (x, 0);
+      if (i == -1)
+	output_operand_lossage ("invalid %%j code");
+      else
+	fprintf (file, "%d", i);
+      return;
+
+    case 'J':
+      /* Similar, but add one for shift count in rlinm for scc and pass
+	 scc flag to `ccr_bit'.  */
+      i = ccr_bit (x, 1);
+      if (i == -1)
+	output_operand_lossage ("invalid %%J code");
+      else
+	/* If we want bit 31, write a shift count of zero, not 32.  */
+	fprintf (file, "%d", i == 31 ? 0 : i + 1);
+      return;
+
+    case 'k':
+      /* X must be a constant.  Write the 1's complement of the
+	 constant.  */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%k value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INT_LOWPART (x));
+      return;
+
+    case 'K':
+      /* X must be a symbolic constant on ELF.  Write an
+	 expression suitable for an 'addi' that adds in the low 16
+	 bits of the MEM.  */
+      if (GET_CODE (x) == CONST)
+	{
+	  if (GET_CODE (XEXP (x, 0)) != PLUS
+	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
+	      || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+	    output_operand_lossage ("invalid %%K value");
+	}
+      print_operand_address (file, x);
+      fputs ("@l", file);
+      return;
+
+      /* %l is output_asm_label.  */
+
+    case 'L':
+      /* Write second word of DImode or DFmode reference.  Works on register
+	 or non-indexed memory only.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x) + 1], file);
+      else if (GET_CODE (x) == MEM)
+	{
+	  /* Handle possible auto-increment.  Since it is pre-increment and
+	     we have already done it, we can just use an offset of word.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0),
+					   UNITS_PER_WORD));
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0),
+					   UNITS_PER_WORD));
+	  else
+	    output_address (XEXP (adjust_address_nv (x, SImode,
+						     UNITS_PER_WORD),
+				  0));
+
+	  if (small_data_operand (x, GET_MODE (x)))
+	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		     reg_names[SMALL_DATA_REG]);
+	}
+      return;
+
+    case 'm':
+      /* MB value for a mask operand.  */
+      if (! mask_operand (x, SImode))
+	output_operand_lossage ("invalid %%m value");
+
+      fprintf (file, "%d", extract_MB (x));
+      return;
+
+    case 'M':
+      /* ME value for a mask operand.  */
+      if (! mask_operand (x, SImode))
+	output_operand_lossage ("invalid %%M value");
+
+      fprintf (file, "%d", extract_ME (x));
+      return;
+
+      /* %n outputs the negative of its operand.  */
+
+    case 'N':
+      /* Write the number of elements in the vector times 4.  */
+      if (GET_CODE (x) != PARALLEL)
+	output_operand_lossage ("invalid %%N value");
+      else
+	fprintf (file, "%d", XVECLEN (x, 0) * 4);
+      return;
+
+    case 'O':
+      /* Similar, but subtract 1 first.  */
+      if (GET_CODE (x) != PARALLEL)
+	output_operand_lossage ("invalid %%O value");
+      else
+	fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
+      return;
+
+    case 'p':
+      /* X is a CONST_INT that is a power of two.  Output the logarithm.  */
+      if (! INT_P (x)
+	  || INT_LOWPART (x) < 0
+	  || (i = exact_log2 (INT_LOWPART (x))) < 0)
+	output_operand_lossage ("invalid %%p value");
+      else
+	fprintf (file, "%d", i);
+      return;
+
+    case 'P':
+      /* The operand must be an indirect memory reference.  The result
+	 is the register name.  */
+      if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
+	  || REGNO (XEXP (x, 0)) >= 32)
+	output_operand_lossage ("invalid %%P value");
+      else
+	fputs (reg_names[REGNO (XEXP (x, 0))], file);
+      return;
+
+    case 'q':
+      /* This outputs the logical code corresponding to a boolean
+	 expression.  The expression may have one or both operands
+	 negated (if one, only the first one).  For condition register
+	 logical operations, it will also treat the negated
+	 CR codes as NOTs, but not handle NOTs of them.  */
+      {
+	const char *const *t = 0;
+	const char *s;
+	enum rtx_code code = GET_CODE (x);
+	static const char * const tbl[3][3] = {
+	  { "and", "andc", "nor" },
+	  { "or", "orc", "nand" },
+	  { "xor", "eqv", "xor" } };
+
+	if (code == AND)
+	  t = tbl[0];
+	else if (code == IOR)
+	  t = tbl[1];
+	else if (code == XOR)
+	  t = tbl[2];
+	else
+	  output_operand_lossage ("invalid %%q value");
+
+	if (GET_CODE (XEXP (x, 0)) != NOT)
+	  s = t[0];
+	else
+	  {
+	    if (GET_CODE (XEXP (x, 1)) == NOT)
+	      s = t[2];
+	    else
+	      s = t[1];
+	  }
+
+	fputs (s, file);
+      }
+      return;
+
+    case 'Q':
+      if (TARGET_MFCRF)
+	fputc (',', file);
+        /* FALLTHRU */
+      else
+	return;
+
+    case 'R':
+      /* X is a CR register.  Print the mask for `mtcrf'.  */
+      if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%R value");
+      else
+	fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
+      return;
+
+    case 's':
+      /* Low 5 bits of 32 - value */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%s value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INT_LOWPART (x)) & 31);
+      return;
+
+    case 'S':
+      /* PowerPC64 mask position.  All 0's is excluded.
+	 CONST_INT 32-bit mask is considered sign-extended so any
+	 transition must occur within the CONST_INT, not on the boundary.  */
+      if (! mask64_operand (x, DImode))
+	output_operand_lossage ("invalid %%S value");
+
+      uval = INT_LOWPART (x);
+
+      if (uval & 1)	/* Clear Left */
+	{
+#if HOST_BITS_PER_WIDE_INT > 64
+	  uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
+#endif
+	  i = 64;
+	}
+      else		/* Clear Right */
+	{
+	  uval = ~uval;
+#if HOST_BITS_PER_WIDE_INT > 64
+	  uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
+#endif
+	  i = 63;
+	}
+      while (uval != 0)
+	--i, uval >>= 1;
+      gcc_assert (i >= 0);
+      fprintf (file, "%d", i);
+      return;
+
+    case 't':
+      /* Like 'J' but get to the OVERFLOW/UNORDERED bit.  */
+      gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == CCmode);
+
+      /* Bit 3 is OV bit.  */
+      i = 4 * (REGNO (x) - CR0_REGNO) + 3;
+
+      /* If we want bit 31, write a shift count of zero, not 32.  */
+      fprintf (file, "%d", i == 31 ? 0 : i + 1);
+      return;
+
+    case 'T':
+      /* Print the symbolic name of a branch target register.  */
+      if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
+				  && REGNO (x) != CTR_REGNO))
+	output_operand_lossage ("invalid %%T value");
+      else if (REGNO (x) == LR_REGNO)
+	fputs (TARGET_NEW_MNEMONICS ? "lr" : "r", file);
+      else
+	fputs ("ctr", file);
+      return;
+
+    case 'u':
+      /* High-order 16 bits of constant for use in unsigned operand.  */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%u value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		 (INT_LOWPART (x) >> 16) & 0xffff);
+      return;
+
+    case 'v':
+      /* High-order 16 bits of constant for use in signed operand.  */
+      if (! INT_P (x))
+	output_operand_lossage ("invalid %%v value");
+      else
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+		 (INT_LOWPART (x) >> 16) & 0xffff);
+      return;
+
+    case 'U':
+      /* Print `u' if this has an auto-increment or auto-decrement.  */
+      if (GET_CODE (x) == MEM
+	  && (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
+	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
+	putc ('u', file);
+      return;
+
+    case 'V':
+      /* Print the trap code for this operand.  */
+      switch (GET_CODE (x))
+	{
+	case EQ:
+	  fputs ("eq", file);   /* 4 */
+	  break;
+	case NE:
+	  fputs ("ne", file);   /* 24 */
+	  break;
+	case LT:
+	  fputs ("lt", file);   /* 16 */
+	  break;
+	case LE:
+	  fputs ("le", file);   /* 20 */
+	  break;
+	case GT:
+	  fputs ("gt", file);   /* 8 */
+	  break;
+	case GE:
+	  fputs ("ge", file);   /* 12 */
+	  break;
+	case LTU:
+	  fputs ("llt", file);  /* 2 */
+	  break;
+	case LEU:
+	  fputs ("lle", file);  /* 6 */
+	  break;
+	case GTU:
+	  fputs ("lgt", file);  /* 1 */
+	  break;
+	case GEU:
+	  fputs ("lge", file);  /* 5 */
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'w':
+      /* If constant, low-order 16 bits of constant, signed.  Otherwise, write
+	 normally.  */
+      if (INT_P (x))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 ((INT_LOWPART (x) & 0xffff) ^ 0x8000) - 0x8000);
+      else
+	print_operand (file, x, 0);
+      return;
+
+    case 'W':
+      /* MB value for a PowerPC64 rldic operand.  */
+      i = clz_hwi (GET_CODE (x) == CONST_INT
+		   ? INTVAL (x) : CONST_DOUBLE_HIGH (x));
+
+#if HOST_BITS_PER_WIDE_INT == 32
+      if (GET_CODE (x) == CONST_INT && i > 0)
+	i += 32;  /* zero-extend high-part was all 0's */
+      else if (GET_CODE (x) == CONST_DOUBLE && i == 32)
+	i = clz_hwi (CONST_DOUBLE_LOW (x)) + 32;
+#endif
+
+      fprintf (file, "%d", i);
+      return;
+
+    case 'x':
+      /* X is a FPR or Altivec register used in a VSX context.  */
+      if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
+	output_operand_lossage ("invalid %%x value");
+      else
+	{
+	  int reg = REGNO (x);
+	  int vsx_reg = (FP_REGNO_P (reg)
+			 ? reg - 32
+			 : reg - FIRST_ALTIVEC_REGNO + 32);
+
+#ifdef TARGET_REGNAMES      
+	  if (TARGET_REGNAMES)
+	    fprintf (file, "%%vs%d", vsx_reg);
+	  else
+#endif
+	    fprintf (file, "%d", vsx_reg);
+	}
+      return;
+
+    case 'X':
+      if (GET_CODE (x) == MEM
+	  && (legitimate_indexed_address_p (XEXP (x, 0), 0)
+	      || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
+		  && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
+	putc ('x', file);
+      return;
+
+    case 'Y':
+      /* Like 'L', for third word of TImode  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x) + 2], file);
+      else if (GET_CODE (x) == MEM)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 8));
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 8));
+	  else
+	    output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
+	  if (small_data_operand (x, GET_MODE (x)))
+	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		     reg_names[SMALL_DATA_REG]);
+	}
+      return;
+
+    case 'z':
+      /* X is a SYMBOL_REF.  Write out the name preceded by a
+	 period and without any trailing data in brackets.  Used for function
+	 names.  If we are configured for System V (or the embedded ABI) on
+	 the PowerPC, do not emit the period, since those systems do not use
+	 TOCs and the like.  */
+      gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+      /* Mark the decl as referenced so that cgraph will output the
+	 function.  */
+      if (SYMBOL_REF_DECL (x))
+	mark_decl_referenced (SYMBOL_REF_DECL (x));
+
+      /* For macho, check to see if we need a stub.  */
+      if (TARGET_MACHO)
+	{
+	  const char *name = XSTR (x, 0);
+#if TARGET_MACHO
+	  if (darwin_emit_branch_islands
+	      && MACHOPIC_INDIRECT
+	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+	    name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+	  assemble_name (file, name);
+	}
+      else if (!DOT_SYMBOLS)
+	assemble_name (file, XSTR (x, 0));
+      else
+	rs6000_output_function_entry (file, XSTR (x, 0));
+      return;
+
+    case 'Z':
+      /* Like 'L', for last word of TImode.  */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x) + 3], file);
+      else if (GET_CODE (x) == MEM)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 12));
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 12));
+	  else
+	    output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
+	  if (small_data_operand (x, GET_MODE (x)))
+	    fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		     reg_names[SMALL_DATA_REG]);
+	}
+      return;
+
+      /* Print AltiVec or SPE memory operand.  */
+    case 'y':
+      {
+	rtx tmp;
+
+	gcc_assert (GET_CODE (x) == MEM);
+
+	tmp = XEXP (x, 0);
+
+	/* Ugly hack because %y is overloaded.  */
+	if ((TARGET_SPE || TARGET_E500_DOUBLE)
+	    && (GET_MODE_SIZE (GET_MODE (x)) == 8
+		|| GET_MODE (x) == TFmode
+		|| GET_MODE (x) == TImode))
+	  {
+	    /* Handle [reg].  */
+	    if (GET_CODE (tmp) == REG)
+	      {
+		fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
+		break;
+	      }
+	    /* Handle [reg+UIMM].  */
+	    else if (GET_CODE (tmp) == PLUS &&
+		     GET_CODE (XEXP (tmp, 1)) == CONST_INT)
+	      {
+		int x;
+
+		gcc_assert (GET_CODE (XEXP (tmp, 0)) == REG);
+
+		x = INTVAL (XEXP (tmp, 1));
+		fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
+		break;
+	      }
+
+	    /* Fall through.  Must be [reg+reg].  */
+	  }
+	if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
+	    && GET_CODE (tmp) == AND
+	    && GET_CODE (XEXP (tmp, 1)) == CONST_INT
+	    && INTVAL (XEXP (tmp, 1)) == -16)
+	  tmp = XEXP (tmp, 0);
+	else if (VECTOR_MEM_VSX_P (GET_MODE (x))
+		 && GET_CODE (tmp) == PRE_MODIFY)
+	  tmp = XEXP (tmp, 1);
+	if (GET_CODE (tmp) == REG)
+	  fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
+	else
+	  {
+	    if (!GET_CODE (tmp) == PLUS
+		|| !REG_P (XEXP (tmp, 0))
+		|| !REG_P (XEXP (tmp, 1)))
+	      {
+		output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
+		break;
+	      }
+
+	    if (REGNO (XEXP (tmp, 0)) == 0)
+	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
+		       reg_names[ REGNO (XEXP (tmp, 0)) ]);
+	    else
+	      fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
+		       reg_names[ REGNO (XEXP (tmp, 1)) ]);
+	  }
+	break;
+      }
+
+    case 0:
+      if (GET_CODE (x) == REG)
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (GET_CODE (x) == MEM)
+	{
+	  /* We need to handle PRE_INC and PRE_DEC here, since we need to
+	     know the width from the mode.  */
+	  if (GET_CODE (XEXP (x, 0)) == PRE_INC)
+	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
+		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	    fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
+		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
+	    output_address (XEXP (XEXP (x, 0), 1));
+	  else
+	    output_address (XEXP (x, 0));
+	}
+      else
+	{
+	  if (toc_relative_expr_p (x))
+	    /* This hack along with a corresponding hack in
+	       rs6000_output_addr_const_extra arranges to output addends
+	       where the assembler expects to find them.  eg.
+	       (const (plus (unspec [symbol_ref ("x") tocrel]) 4))
+	       without this hack would be output as "x@toc+4".  We
+	       want "x+4@toc".  */
+	    output_addr_const (file, tocrel_base);
+	  else
+	    output_addr_const (file, x);
+	}
+      return;
+
+    case '&':
+      assemble_name (file, rs6000_get_some_local_dynamic_name ());
+      return;
+
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+}
+
+/* Print the address of an operand.  */
+
+void
+print_operand_address (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == REG)
+    fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
+  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
+	   || GET_CODE (x) == LABEL_REF)
+    {
+      output_addr_const (file, x);
+      if (small_data_operand (x, GET_MODE (x)))
+	fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
+		 reg_names[SMALL_DATA_REG]);
+      else
+	gcc_assert (!TARGET_TOC);
+    }
+  else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == REG)
+    {
+      gcc_assert (REG_P (XEXP (x, 0)));
+      if (REGNO (XEXP (x, 0)) == 0)
+	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
+		 reg_names[ REGNO (XEXP (x, 0)) ]);
+      else
+	fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
+		 reg_names[ REGNO (XEXP (x, 1)) ]);
+    }
+  else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
+	     INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
+#if TARGET_MACHO
+  else if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == REG
+	   && CONSTANT_P (XEXP (x, 1)))
+    {
+      fprintf (file, "lo16(");
+      output_addr_const (file, XEXP (x, 1));
+      fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
+    }
+#endif
+  else if (legitimate_constant_pool_address_p (x, QImode, true))
+    {
+      /* This hack along with a corresponding hack in
+	 rs6000_output_addr_const_extra arranges to output addends
+	 where the assembler expects to find them.  eg.
+	 (lo_sum (reg 9)
+	 .       (const (plus (unspec [symbol_ref ("x") tocrel]) 8)))
+	 without this hack would be output as "x@toc+8@l(9)".  We
+	 want "x+8@toc@l(9)".  */
+      output_addr_const (file, tocrel_base);
+      if (GET_CODE (x) == LO_SUM)
+	fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
+      else
+	fprintf (file, "(%s)", reg_names[REGNO (XEXP (x, 0))]);
+    }
+#if TARGET_ELF
+  else if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == REG
+	   && CONSTANT_P (XEXP (x, 1)))
+    {
+      output_addr_const (file, XEXP (x, 1));
+      fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
+    }
+#endif
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+rs6000_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    switch (XINT (x, 1))
+      {
+      case UNSPEC_TOCREL:
+	gcc_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF);
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	if (x == tocrel_base && tocrel_offset != const0_rtx)
+	  {
+	    if (INTVAL (tocrel_offset) >= 0)
+	      fprintf (file, "+");
+	    output_addr_const (file, tocrel_offset);
+	  }
+	if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
+	  {
+	    putc ('-', file);
+	    assemble_name (file, toc_label_name);
+	  }
+	else if (TARGET_ELF)
+	  fputs ("@toc", file);
+	return true;
+
+#if TARGET_MACHO
+      case UNSPEC_MACHOPIC_OFFSET:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	putc ('-', file);
+	machopic_output_function_base_name (file);
+	return true;
+#endif
+      }
+  return false;
+}
+
+/* Target hook for assembling integer objects.  The PowerPC version has
+   to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
+   is defined.  It also needs to handle DI-mode objects on 64-bit
+   targets.  */
+
+static bool
+rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+#ifdef RELOCATABLE_NEEDS_FIXUP
+  /* Special handling for SI values.  */
+  if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
+    {
+      static int recurse = 0;
+
+      /* For -mrelocatable, we mark all addresses that need to be fixed up
+	 in the .fixup section.  */
+      if (TARGET_RELOCATABLE
+	  && in_section != toc_section
+	  && in_section != text_section
+	  && (in_section && (in_section->common.flags & SECTION_CODE)) == 0
+	  && !recurse
+	  && GET_CODE (x) != CONST_INT
+	  && GET_CODE (x) != CONST_DOUBLE
+	  && CONSTANT_P (x))
+	{
+	  char buf[256];
+
+	  recurse = 1;
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
+	  fixuplabelno++;
+	  ASM_OUTPUT_LABEL (asm_out_file, buf);
+	  fprintf (asm_out_file, "\t.long\t(");
+	  output_addr_const (asm_out_file, x);
+	  fprintf (asm_out_file, ")@fixup\n");
+	  fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
+	  ASM_OUTPUT_ALIGN (asm_out_file, 2);
+	  fprintf (asm_out_file, "\t.long\t");
+	  assemble_name (asm_out_file, buf);
+	  fprintf (asm_out_file, "\n\t.previous\n");
+	  recurse = 0;
+	  return true;
+	}
+      /* Remove initial .'s to turn a -mcall-aixdesc function
+	 address into the address of the descriptor, not the function
+	 itself.  */
+      else if (GET_CODE (x) == SYMBOL_REF
+	       && XSTR (x, 0)[0] == '.'
+	       && DEFAULT_ABI == ABI_AIX)
+	{
+	  const char *name = XSTR (x, 0);
+	  while (*name == '.')
+	    name++;
+
+	  fprintf (asm_out_file, "\t.long\t%s\n", name);
+	  return true;
+	}
+    }
+#endif /* RELOCATABLE_NEEDS_FIXUP */
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+#ifdef HAVE_GAS_HIDDEN
+/* Emit an assembler directive to set symbol visibility for DECL to
+   VISIBILITY_TYPE.  */
+
+static void
+rs6000_assemble_visibility (tree decl, int vis)
+{
+  /* Functions need to have their entry point symbol visibility set as
+     well as their descriptor symbol visibility.  */
+  if (DEFAULT_ABI == ABI_AIX
+      && DOT_SYMBOLS
+      && TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      static const char * const visibility_types[] = {
+	NULL, "internal", "hidden", "protected"
+      };
+
+      const char *name, *type;
+
+      name = ((* targetm.strip_name_encoding)
+	      (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
+      type = visibility_types[vis];
+
+      fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
+      fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
+    }
+  else
+    default_assemble_visibility (decl, vis);
+}
+#endif
+
+enum rtx_code
+rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
+{
+  /* Reversal of FP compares takes care -- an ordered compare
+     becomes an unordered compare and vice versa.  */
+  if (mode == CCFPmode
+      && (!flag_finite_math_only
+	  || code == UNLT || code == UNLE || code == UNGT || code == UNGE
+	  || code == UNEQ || code == LTGT))
+    return reverse_condition_maybe_unordered (code);
+  else
+    return reverse_condition (code);
+}
+
+/* Generate a compare for CODE.  Return a brand-new rtx that
+   represents the result of the compare.  */
+
+static rtx
+rs6000_generate_compare (rtx cmp, enum machine_mode mode)
+{
+  enum machine_mode comp_mode;
+  rtx compare_result;
+  enum rtx_code code = GET_CODE (cmp);
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+
+  if (FLOAT_MODE_P (mode))
+    comp_mode = CCFPmode;
+  else if (code == GTU || code == LTU
+	   || code == GEU || code == LEU)
+    comp_mode = CCUNSmode;
+  else if ((code == EQ || code == NE)
+	   && GET_CODE (op0) == SUBREG
+	   && GET_CODE (op1) == SUBREG
+	   && SUBREG_PROMOTED_UNSIGNED_P (op0)
+	   && SUBREG_PROMOTED_UNSIGNED_P (op1))
+    /* These are unsigned values, perhaps there will be a later
+       ordering compare that can be shared with this one.
+       Unfortunately we cannot detect the signedness of the operands
+       for non-subregs.  */
+    comp_mode = CCUNSmode;
+  else
+    comp_mode = CCmode;
+
+  /* First, the compare.  */
+  compare_result = gen_reg_rtx (comp_mode);
+
+  /* E500 FP compare instructions on the GPRs.  Yuck!  */
+  if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
+      && FLOAT_MODE_P (mode))
+    {
+      rtx cmp, or_result, compare_result2;
+      enum machine_mode op_mode = GET_MODE (op0);
+
+      if (op_mode == VOIDmode)
+	op_mode = GET_MODE (op1);
+
+      /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
+	 This explains the following mess.  */
+
+      switch (code)
+	{
+	case EQ: case UNEQ: case NE: case LTGT:
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsfeq_gpr (compare_result, op0, op1)
+		: gen_cmpsfeq_gpr (compare_result, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdfeq_gpr (compare_result, op0, op1)
+		: gen_cmpdfeq_gpr (compare_result, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttfeq_gpr (compare_result, op0, op1)
+		: gen_cmptfeq_gpr (compare_result, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	case GT: case GTU: case UNGT: case UNGE: case GE: case GEU:
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsfgt_gpr (compare_result, op0, op1)
+		: gen_cmpsfgt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdfgt_gpr (compare_result, op0, op1)
+		: gen_cmpdfgt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttfgt_gpr (compare_result, op0, op1)
+		: gen_cmptfgt_gpr (compare_result, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+
+	case LT: case LTU: case UNLT: case UNLE: case LE: case LEU:
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsflt_gpr (compare_result, op0, op1)
+		: gen_cmpsflt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdflt_gpr (compare_result, op0, op1)
+		: gen_cmpdflt_gpr (compare_result, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttflt_gpr (compare_result, op0, op1)
+		: gen_cmptflt_gpr (compare_result, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+        default:
+          gcc_unreachable ();
+	}
+
+      /* Synthesize LE and GE from LT/GT || EQ.  */
+      if (code == LE || code == GE || code == LEU || code == GEU)
+	{
+	  emit_insn (cmp);
+
+	  switch (code)
+	    {
+	    case LE: code = LT; break;
+	    case GE: code = GT; break;
+	    case LEU: code = LT; break;
+	    case GEU: code = GT; break;
+	    default: gcc_unreachable ();
+	    }
+
+	  compare_result2 = gen_reg_rtx (CCFPmode);
+
+	  /* Do the EQ.  */
+	  switch (op_mode)
+	    {
+	    case SFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstsfeq_gpr (compare_result2, op0, op1)
+		: gen_cmpsfeq_gpr (compare_result2, op0, op1);
+	      break;
+
+	    case DFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tstdfeq_gpr (compare_result2, op0, op1)
+		: gen_cmpdfeq_gpr (compare_result2, op0, op1);
+	      break;
+
+	    case TFmode:
+	      cmp = (flag_finite_math_only && !flag_trapping_math)
+		? gen_tsttfeq_gpr (compare_result2, op0, op1)
+		: gen_cmptfeq_gpr (compare_result2, op0, op1);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  emit_insn (cmp);
+
+	  /* OR them together.  */
+	  or_result = gen_reg_rtx (CCFPmode);
+	  cmp = gen_e500_cr_ior_compare (or_result, compare_result,
+					   compare_result2);
+	  compare_result = or_result;
+	  code = EQ;
+	}
+      else
+	{
+	  if (code == NE || code == LTGT)
+	    code = NE;
+	  else
+	    code = EQ;
+	}
+
+      emit_insn (cmp);
+    }
+  else
+    {
+      /* Generate XLC-compatible TFmode compare as PARALLEL with extra
+	 CLOBBERs to match cmptf_internal2 pattern.  */
+      if (comp_mode == CCFPmode && TARGET_XL_COMPAT
+	  && GET_MODE (op0) == TFmode
+	  && !TARGET_IEEEQUAD
+	  && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
+	emit_insn (gen_rtx_PARALLEL (VOIDmode,
+	  gen_rtvec (10,
+		     gen_rtx_SET (VOIDmode,
+				  compare_result,
+				  gen_rtx_COMPARE (comp_mode, op0, op1)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
+      else if (GET_CODE (op1) == UNSPEC
+	       && XINT (op1, 1) == UNSPEC_SP_TEST)
+	{
+	  rtx op1b = XVECEXP (op1, 0, 0);
+	  comp_mode = CCEQmode;
+	  compare_result = gen_reg_rtx (CCEQmode);
+	  if (TARGET_64BIT)
+	    emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
+	  else
+	    emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
+	}
+      else
+	emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+				gen_rtx_COMPARE (comp_mode, op0, op1)));
+    }
+
+  /* Some kinds of FP comparisons need an OR operation;
+     under flag_finite_math_only we don't bother.  */
+  if (FLOAT_MODE_P (mode)
+      && !flag_finite_math_only
+      && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
+      && (code == LE || code == GE
+	  || code == UNEQ || code == LTGT
+	  || code == UNGT || code == UNLT))
+    {
+      enum rtx_code or1, or2;
+      rtx or1_rtx, or2_rtx, compare2_rtx;
+      rtx or_result = gen_reg_rtx (CCEQmode);
+
+      switch (code)
+	{
+	case LE: or1 = LT;  or2 = EQ;  break;
+	case GE: or1 = GT;  or2 = EQ;  break;
+	case UNEQ: or1 = UNORDERED;  or2 = EQ;  break;
+	case LTGT: or1 = LT;  or2 = GT;  break;
+	case UNGT: or1 = UNORDERED;  or2 = GT;  break;
+	case UNLT: or1 = UNORDERED;  or2 = LT;  break;
+	default:  gcc_unreachable ();
+	}
+      validate_condition_mode (or1, comp_mode);
+      validate_condition_mode (or2, comp_mode);
+      or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
+      or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
+      compare2_rtx = gen_rtx_COMPARE (CCEQmode,
+				      gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
+				      const_true_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
+
+      compare_result = or_result;
+      code = EQ;
+    }
+
+  validate_condition_mode (code, GET_MODE (compare_result));
+
+  return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
+}
+
+
+/* Emit the RTL for an sISEL pattern.  */
+
+void
+rs6000_emit_sISEL (enum machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
+{
+  rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
+}
+
+void
+rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
+{
+  rtx condition_rtx;
+  enum machine_mode op_mode;
+  enum rtx_code cond_code;
+  rtx result = operands[0];
+
+  if (TARGET_ISEL && (mode == SImode || mode == DImode))
+    {
+      rs6000_emit_sISEL (mode, operands);
+      return;
+    }
+
+  condition_rtx = rs6000_generate_compare (operands[1], mode);
+  cond_code = GET_CODE (condition_rtx);
+
+  if (FLOAT_MODE_P (mode)
+      && !TARGET_FPRS && TARGET_HARD_FLOAT)
+    {
+      rtx t;
+
+      PUT_MODE (condition_rtx, SImode);
+      t = XEXP (condition_rtx, 0);
+
+      gcc_assert (cond_code == NE || cond_code == EQ);
+
+      if (cond_code == NE)
+	emit_insn (gen_e500_flip_gt_bit (t, t));
+
+      emit_insn (gen_move_from_CR_gt_bit (result, t));
+      return;
+    }
+
+  if (cond_code == NE
+      || cond_code == GE || cond_code == LE
+      || cond_code == GEU || cond_code == LEU
+      || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
+    {
+      rtx not_result = gen_reg_rtx (CCEQmode);
+      rtx not_op, rev_cond_rtx;
+      enum machine_mode cc_mode;
+
+      cc_mode = GET_MODE (XEXP (condition_rtx, 0));
+
+      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
+				     SImode, XEXP (condition_rtx, 0), const0_rtx);
+      not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
+      condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
+    }
+
+  op_mode = GET_MODE (XEXP (operands[1], 0));
+  if (op_mode == VOIDmode)
+    op_mode = GET_MODE (XEXP (operands[1], 1));
+
+  if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
+    {
+      PUT_MODE (condition_rtx, DImode);
+      convert_move (result, condition_rtx, 0);
+    }
+  else
+    {
+      PUT_MODE (condition_rtx, SImode);
+      emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
+    }
+}
+
+/* Emit a branch of kind CODE to location LOC.  */
+
+void
+rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
+{
+  rtx condition_rtx, loc_ref;
+
+  condition_rtx = rs6000_generate_compare (operands[0], mode);
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
+						     loc_ref, pc_rtx)));
+}
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label, or -1 if the branch is really a
+   conditional return.
+
+   OP is the conditional expression.  XEXP (OP, 0) is assumed to be a
+   condition code register and its mode specifies what kind of
+   comparison we made.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   INSN is the insn.  */
+
+char *
+output_cbranch (rtx op, const char *label, int reversed, rtx insn)
+{
+  static char string[64];
+  enum rtx_code code = GET_CODE (op);
+  rtx cc_reg = XEXP (op, 0);
+  enum machine_mode mode = GET_MODE (cc_reg);
+  int cc_regno = REGNO (cc_reg) - CR0_REGNO;
+  int need_longbranch = label != NULL && get_attr_length (insn) == 8;
+  int really_reversed = reversed ^ need_longbranch;
+  char *s = string;
+  const char *ccode;
+  const char *pred;
+  rtx note;
+
+  validate_condition_mode (code, mode);
+
+  /* Work out which way this really branches.  We could use
+     reverse_condition_maybe_unordered here always but this
+     makes the resulting assembler clearer.  */
+  if (really_reversed)
+    {
+      /* Reversal of FP compares takes care -- an ordered compare
+	 becomes an unordered compare and vice versa.  */
+      if (mode == CCFPmode)
+	code = reverse_condition_maybe_unordered (code);
+      else
+	code = reverse_condition (code);
+    }
+
+  if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
+    {
+      /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
+	 to the GT bit.  */
+      switch (code)
+	{
+	case EQ:
+	  /* Opposite of GT.  */
+	  code = GT;
+	  break;
+
+	case NE:
+	  code = UNLE;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+      /* Not all of these are actually distinct opcodes, but
+	 we distinguish them for clarity of the resulting assembler.  */
+    case NE: case LTGT:
+      ccode = "ne"; break;
+    case EQ: case UNEQ:
+      ccode = "eq"; break;
+    case GE: case GEU:
+      ccode = "ge"; break;
+    case GT: case GTU: case UNGT:
+      ccode = "gt"; break;
+    case LE: case LEU:
+      ccode = "le"; break;
+    case LT: case LTU: case UNLT:
+      ccode = "lt"; break;
+    case UNORDERED: ccode = "un"; break;
+    case ORDERED: ccode = "nu"; break;
+    case UNGE: ccode = "nl"; break;
+    case UNLE: ccode = "ng"; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Maybe we have a guess as to how likely the branch is.
+     The old mnemonics don't have a way to specify this information.  */
+  pred = "";
+  note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
+  if (note != NULL_RTX)
+    {
+      /* PROB is the difference from 50%.  */
+      int prob = INTVAL (XEXP (note, 0)) - REG_BR_PROB_BASE / 2;
+
+      /* Only hint for highly probable/improbable branches on newer
+	 cpus as static prediction overrides processor dynamic
+	 prediction.  For older cpus we may as well always hint, but
+	 assume not taken for branches that are very close to 50% as a
+	 mispredicted taken branch is more expensive than a
+	 mispredicted not-taken branch.  */
+      if (rs6000_always_hint
+	  || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
+	      && br_prob_note_reliable_p (note)))
+	{
+	  if (abs (prob) > REG_BR_PROB_BASE / 20
+	      && ((prob > 0) ^ need_longbranch))
+	    pred = "+";
+	  else
+	    pred = "-";
+	}
+    }
+
+  if (label == NULL)
+    s += sprintf (s, "{b%sr|b%slr%s} ", ccode, ccode, pred);
+  else
+    s += sprintf (s, "{b%s|b%s%s} ", ccode, ccode, pred);
+
+  /* We need to escape any '%' characters in the reg_names string.
+     Assume they'd only be the first character....  */
+  if (reg_names[cc_regno + CR0_REGNO][0] == '%')
+    *s++ = '%';
+  s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
+
+  if (label != NULL)
+    {
+      /* If the branch distance was too far, we may have to use an
+	 unconditional branch to go the distance.  */
+      if (need_longbranch)
+	s += sprintf (s, ",$+8\n\tb %s", label);
+      else
+	s += sprintf (s, ",%s", label);
+    }
+
+  return string;
+}
+
+/* Return the string to flip the GT bit on a CR.  */
+char *
+output_e500_flip_gt_bit (rtx dst, rtx src)
+{
+  static char string[64];
+  int a, b;
+
+  gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
+	      && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
+
+  /* GT bit.  */
+  a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
+  b = 4 * (REGNO (src) - CR0_REGNO) + 1;
+
+  sprintf (string, "crnot %d,%d", a, b);
+  return string;
+}
+
+/* Return insn for VSX or Altivec comparisons.  */
+
+static rtx
+rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx mask;
+  enum machine_mode mode = GET_MODE (op0);
+
+  switch (code)
+    {
+    default:
+      break;
+
+    case GE:
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	return NULL_RTX;
+
+    case EQ:
+    case GT:
+    case GTU:
+    case ORDERED:
+    case UNORDERED:
+    case UNEQ:
+    case LTGT:
+      mask = gen_reg_rtx (mode);
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      mask,
+			      gen_rtx_fmt_ee (code, mode, op0, op1)));
+      return mask;
+    }
+
+  return NULL_RTX;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+   DMODE is expected destination mode. This is a recursive function.  */
+
+static rtx
+rs6000_emit_vector_compare (enum rtx_code rcode,
+			    rtx op0, rtx op1,
+			    enum machine_mode dmode)
+{
+  rtx mask;
+  bool swap_operands = false;
+  bool try_again = false;
+
+  gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
+  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+  /* See if the comparison works as is.  */
+  mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
+  if (mask)
+    return mask;
+
+  switch (rcode)
+    {
+    case LT:
+      rcode = GT;
+      swap_operands = true;
+      try_again = true;
+      break;
+    case LTU:
+      rcode = GTU;
+      swap_operands = true;
+      try_again = true;
+      break;
+    case NE:
+    case UNLE:
+    case UNLT:
+    case UNGE:
+    case UNGT:
+      /* Invert condition and try again.
+	 e.g., A != B becomes ~(A==B).  */
+      {
+	enum rtx_code rev_code;
+	enum insn_code nor_code;
+	rtx mask2;
+
+	rev_code = reverse_condition_maybe_unordered (rcode);
+	if (rev_code == UNKNOWN)
+	  return NULL_RTX;
+
+	nor_code = optab_handler (one_cmpl_optab, dmode);
+	if (nor_code == CODE_FOR_nothing)
+	  return NULL_RTX;
+
+	mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
+	if (!mask2)
+	  return NULL_RTX;
+
+	mask = gen_reg_rtx (dmode);
+	emit_insn (GEN_FCN (nor_code) (mask, mask2));
+	return mask;
+      }
+      break;
+    case GE:
+    case GEU:
+    case LE:
+    case LEU:
+      /* Try GT/GTU/LT/LTU OR EQ */
+      {
+	rtx c_rtx, eq_rtx;
+	enum insn_code ior_code;
+	enum rtx_code new_code;
+
+	switch (rcode)
+	  {
+	  case  GE:
+	    new_code = GT;
+	    break;
+
+	  case GEU:
+	    new_code = GTU;
+	    break;
+
+	  case LE:
+	    new_code = LT;
+	    break;
+
+	  case LEU:
+	    new_code = LTU;
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	ior_code = optab_handler (ior_optab, dmode);
+	if (ior_code == CODE_FOR_nothing)
+	  return NULL_RTX;
+
+	c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
+	if (!c_rtx)
+	  return NULL_RTX;
+
+	eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
+	if (!eq_rtx)
+	  return NULL_RTX;
+
+	mask = gen_reg_rtx (dmode);
+	emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+	return mask;
+      }
+      break;
+    default:
+      return NULL_RTX;
+    }
+
+  if (try_again)
+    {
+      if (swap_operands)
+	{
+	  rtx tmp;
+	  tmp = op0;
+	  op0 = op1;
+	  op1 = tmp;
+	}
+
+      mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
+      if (mask)
+	return mask;
+    }
+
+  /* You only get two chances.  */
+  return NULL_RTX;
+}
+
+/* Emit vector conditional expression.  DEST is destination. OP_TRUE and
+   OP_FALSE are two VEC_COND_EXPR operands.  CC_OP0 and CC_OP1 are the two
+   operands for the relation operation COND.  */
+
+int
+rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
+			      rtx cond, rtx cc_op0, rtx cc_op1)
+{
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum rtx_code rcode = GET_CODE (cond);
+  enum machine_mode cc_mode = CCmode;
+  rtx mask;
+  rtx cond2;
+  rtx tmp;
+  bool invert_move = false;
+
+  if (VECTOR_UNIT_NONE_P (dest_mode))
+    return 0;
+
+  switch (rcode)
+    {
+      /* Swap operands if we can, and fall back to doing the operation as
+	 specified, and doing a NOR to invert the test.  */
+    case NE:
+    case UNLE:
+    case UNLT:
+    case UNGE:
+    case UNGT:
+      /* Invert condition and try again.
+	 e.g., A  = (B != C) ? D : E becomes A = (B == C) ? E : D.  */
+      invert_move = true;
+      rcode = reverse_condition_maybe_unordered (rcode);
+      if (rcode == UNKNOWN)
+	return 0;
+      break;
+
+      /* Mark unsigned tests with CCUNSmode.  */
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      cc_mode = CCUNSmode;
+      break;
+
+    default:
+      break;
+    }
+
+  /* Get the vector mask for the given relational operations.  */
+  mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
+
+  if (!mask)
+    return 0;
+
+  if (invert_move)
+    {
+      tmp = op_true;
+      op_true = op_false;
+      op_false = tmp;
+    }
+
+  cond2 = gen_rtx_fmt_ee (NE, cc_mode, mask, CONST0_RTX (dest_mode));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  dest,
+			  gen_rtx_IF_THEN_ELSE (dest_mode,
+						cond2,
+						op_true,
+						op_false)));
+  return 1;
+}
+
+/* Emit a conditional move: move TRUE_COND to DEST if OP of the
+   operands of the last comparison is nonzero/true, FALSE_COND if it
+   is zero/false.  Return 0 if the hardware has no such operation.  */
+
+int
+rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
+{
+  enum rtx_code code = GET_CODE (op);
+  rtx op0 = XEXP (op, 0);
+  rtx op1 = XEXP (op, 1);
+  REAL_VALUE_TYPE c1;
+  enum machine_mode compare_mode = GET_MODE (op0);
+  enum machine_mode result_mode = GET_MODE (dest);
+  rtx temp;
+  bool is_against_zero;
+
+  /* These modes should always match.  */
+  if (GET_MODE (op1) != compare_mode
+      /* In the isel case however, we can use a compare immediate, so
+	 op1 may be a small constant.  */
+      && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
+    return 0;
+  if (GET_MODE (true_cond) != result_mode)
+    return 0;
+  if (GET_MODE (false_cond) != result_mode)
+    return 0;
+
+  /* First, work out if the hardware can do this at all, or
+     if it's too slow....  */
+  if (!FLOAT_MODE_P (compare_mode))
+    {
+      if (TARGET_ISEL)
+	return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
+      return 0;
+    }
+  else if (TARGET_HARD_FLOAT && !TARGET_FPRS
+	   && SCALAR_FLOAT_MODE_P (compare_mode))
+    return 0;
+
+  is_against_zero = op1 == CONST0_RTX (compare_mode);
+
+  /* A floating-point subtract might overflow, underflow, or produce
+     an inexact result, thus changing the floating-point flags, so it
+     can't be generated if we care about that.  It's safe if one side
+     of the construct is zero, since then no subtract will be
+     generated.  */
+  if (SCALAR_FLOAT_MODE_P (compare_mode)
+      && flag_trapping_math && ! is_against_zero)
+    return 0;
+
+  /* Eliminate half of the comparisons by switching operands, this
+     makes the remaining code simpler.  */
+  if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
+      || code == LTGT || code == LT || code == UNLE)
+    {
+      code = reverse_condition_maybe_unordered (code);
+      temp = true_cond;
+      true_cond = false_cond;
+      false_cond = temp;
+    }
+
+  /* UNEQ and LTGT take four instructions for a comparison with zero,
+     it'll probably be faster to use a branch here too.  */
+  if (code == UNEQ && HONOR_NANS (compare_mode))
+    return 0;
+
+  if (GET_CODE (op1) == CONST_DOUBLE)
+    REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
+
+  /* We're going to try to implement comparisons by performing
+     a subtract, then comparing against zero.  Unfortunately,
+     Inf - Inf is NaN which is not zero, and so if we don't
+     know that the operand is finite and the comparison
+     would treat EQ different to UNORDERED, we can't do it.  */
+  if (HONOR_INFINITIES (compare_mode)
+      && code != GT && code != UNGE
+      && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
+      /* Constructs of the form (a OP b ? a : b) are safe.  */
+      && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
+	  || (! rtx_equal_p (op0, true_cond)
+	      && ! rtx_equal_p (op1, true_cond))))
+    return 0;
+
+  /* At this point we know we can use fsel.  */
+
+  /* Reduce the comparison to a comparison against zero.  */
+  if (! is_against_zero)
+    {
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_MINUS (compare_mode, op0, op1)));
+      op0 = temp;
+      op1 = CONST0_RTX (compare_mode);
+    }
+
+  /* If we don't care about NaNs we can reduce some of the comparisons
+     down to faster ones.  */
+  if (! HONOR_NANS (compare_mode))
+    switch (code)
+      {
+      case GT:
+	code = LE;
+	temp = true_cond;
+	true_cond = false_cond;
+	false_cond = temp;
+	break;
+      case UNGE:
+	code = GE;
+	break;
+      case UNEQ:
+	code = EQ;
+	break;
+      default:
+	break;
+      }
+
+  /* Now, reduce everything down to a GE.  */
+  switch (code)
+    {
+    case GE:
+      break;
+
+    case LE:
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    case ORDERED:
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    case EQ:
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_NEG (compare_mode,
+					   gen_rtx_ABS (compare_mode, op0))));
+      op0 = temp;
+      break;
+
+    case UNGE:
+      /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
+      temp = gen_reg_rtx (result_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_IF_THEN_ELSE (result_mode,
+						    gen_rtx_GE (VOIDmode,
+								op0, op1),
+						    true_cond, false_cond)));
+      false_cond = true_cond;
+      true_cond = temp;
+
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    case GT:
+      /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
+      temp = gen_reg_rtx (result_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_IF_THEN_ELSE (result_mode,
+						    gen_rtx_GE (VOIDmode,
+								op0, op1),
+						    true_cond, false_cond)));
+      true_cond = false_cond;
+      false_cond = temp;
+
+      temp = gen_reg_rtx (compare_mode);
+      emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
+      op0 = temp;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest,
+			  gen_rtx_IF_THEN_ELSE (result_mode,
+						gen_rtx_GE (VOIDmode,
+							    op0, op1),
+						true_cond, false_cond)));
+  return 1;
+}
+
+/* Same as above, but for ints (isel).  */
+
+static int
+rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
+{
+  rtx condition_rtx, cr;
+  enum machine_mode mode = GET_MODE (dest);
+  enum rtx_code cond_code;
+  rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
+  bool signedp;
+
+  if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
+    return 0;
+
+  /* We still have to do the compare, because isel doesn't do a
+     compare, it just looks at the CRx bits set by a previous compare
+     instruction.  */
+  condition_rtx = rs6000_generate_compare (op, mode);
+  cond_code = GET_CODE (condition_rtx);
+  cr = XEXP (condition_rtx, 0);
+  signedp = GET_MODE (cr) == CCmode;
+
+  isel_func = (mode == SImode
+	       ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
+	       : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
+
+  switch (cond_code)
+    {
+    case LT: case GT: case LTU: case GTU: case EQ:
+      /* isel handles these directly.  */
+      break;
+
+    default:
+      /* We need to swap the sense of the comparison.  */
+      {
+	rtx t = true_cond;
+	true_cond = false_cond;
+	false_cond = t;
+	PUT_CODE (condition_rtx, reverse_condition (cond_code));
+      }
+      break;
+    }
+
+  false_cond = force_reg (mode, false_cond);
+  if (true_cond != const0_rtx)
+    true_cond = force_reg (mode, true_cond);
+
+  emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
+
+  return 1;
+}
+
+const char *
+output_isel (rtx *operands)
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
+    {
+      gcc_assert (GET_CODE (operands[2]) == REG
+		  && GET_CODE (operands[3]) == REG);
+      PUT_CODE (operands[1], reverse_condition (code));
+      return "isel %0,%3,%2,%j1";
+    }
+
+  return "isel %0,%2,%3,%j1";
+}
+
+void
+rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  enum rtx_code c;
+  rtx target;
+
+  /* VSX/altivec have direct min/max insns.  */
+  if ((code == SMAX || code == SMIN)
+      && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
+	  || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
+    {
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      dest,
+			      gen_rtx_fmt_ee (code, mode, op0, op1)));
+      return;
+    }
+
+  if (code == SMAX || code == SMIN)
+    c = GE;
+  else
+    c = GEU;
+
+  if (code == SMAX || code == UMAX)
+    target = emit_conditional_move (dest, c, op0, op1, mode,
+				    op0, op1, mode, 0);
+  else
+    target = emit_conditional_move (dest, c, op0, op1, mode,
+				    op1, op0, mode, 0);
+  gcc_assert (target);
+  if (target != dest)
+    emit_move_insn (dest, target);
+}
+
+/* Emit instructions to perform a load-reserved/store-conditional operation.
+   The operation performed is an atomic
+   (set M (CODE:MODE M OP))
+   If not NULL, BEFORE is atomically set to M before the operation, and
+   AFTER is set to M after the operation (that is, (CODE:MODE M OP)).
+   If SYNC_P then a memory barrier is emitted before the operation.
+   Either OP or M may be wrapped in a NOT operation.  */
+
+void
+rs6000_emit_sync (enum rtx_code code, enum machine_mode mode,
+		  rtx m, rtx op, rtx before_param, rtx after_param,
+		  bool sync_p)
+{
+  enum machine_mode used_mode;
+  rtx the_op, set_before, set_after, set_atomic, cc_scratch, before, after;
+  rtx used_m;
+  rtvec vec;
+  HOST_WIDE_INT imask = GET_MODE_MASK (mode);
+  rtx shift = NULL_RTX;
+
+  if (sync_p)
+    emit_insn (gen_lwsync ());
+
+    used_m = m;
+
+  /* If this is smaller than SImode, we'll have to use SImode with
+     adjustments.  */
+  if (mode == QImode || mode == HImode)
+    {
+      rtx newop, oldop;
+
+      if (MEM_ALIGN (used_m) >= 32)
+	{
+	  int ishift = 0;
+	  if (BYTES_BIG_ENDIAN)
+	    ishift = GET_MODE_BITSIZE (SImode) - GET_MODE_BITSIZE (mode);
+
+	  shift = GEN_INT (ishift);
+	  used_m = change_address (used_m, SImode, 0);
+	}
+      else
+	{
+	  rtx addrSI, aligned_addr;
+	  int shift_mask = mode == QImode ? 0x18 : 0x10;
+
+	  addrSI = gen_lowpart_common (SImode,
+				       force_reg (Pmode, XEXP (used_m, 0)));
+	  addrSI = force_reg (SImode, addrSI);
+	  shift = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_rlwinm (shift, addrSI, GEN_INT (3),
+				 GEN_INT (shift_mask)));
+	  emit_insn (gen_xorsi3 (shift, shift, GEN_INT (shift_mask)));
+
+	  aligned_addr = expand_binop (Pmode, and_optab,
+				       XEXP (used_m, 0),
+				       GEN_INT (-4), NULL_RTX,
+				       1, OPTAB_LIB_WIDEN);
+	  used_m = change_address (used_m, SImode, aligned_addr);
+	  set_mem_align (used_m, 32);
+	}
+      /* It's safe to keep the old alias set of USED_M, because
+	 the operation is atomic and only affects the original
+	 USED_M.  */
+      m = used_m;
+
+      if (GET_CODE (op) == NOT)
+	{
+	  oldop = lowpart_subreg (SImode, XEXP (op, 0), mode);
+	  oldop = gen_rtx_NOT (SImode, oldop);
+	}
+      else
+	oldop = lowpart_subreg (SImode, op, mode);
+
+      switch (code)
+	{
+	case IOR:
+	case XOR:
+	  newop = expand_binop (SImode, and_optab,
+				oldop, GEN_INT (imask), NULL_RTX,
+				1, OPTAB_LIB_WIDEN);
+	  emit_insn (gen_ashlsi3 (newop, newop, shift));
+	  break;
+
+	case NOT: /* NAND */
+	  newop = expand_binop (SImode, ior_optab,
+				oldop, GEN_INT (~imask), NULL_RTX,
+				1, OPTAB_LIB_WIDEN);
+	  emit_insn (gen_rotlsi3 (newop, newop, shift));
+	  break;
+
+	case AND:
+	  newop = expand_binop (SImode, ior_optab,
+				oldop, GEN_INT (~imask), NULL_RTX,
+				1, OPTAB_LIB_WIDEN);
+	  emit_insn (gen_rotlsi3 (newop, newop, shift));
+	  break;
+
+	case PLUS:
+	case MINUS:
+	  {
+	    rtx mask;
+
+	    newop = expand_binop (SImode, and_optab,
+				  oldop, GEN_INT (imask), NULL_RTX,
+				  1, OPTAB_LIB_WIDEN);
+	    emit_insn (gen_ashlsi3 (newop, newop, shift));
+
+	    mask = gen_reg_rtx (SImode);
+	    emit_move_insn (mask, GEN_INT (imask));
+	    emit_insn (gen_ashlsi3 (mask, mask, shift));
+
+	    if (code == PLUS)
+	      newop = gen_rtx_PLUS (SImode, m, newop);
+	    else
+	      newop = gen_rtx_MINUS (SImode, m, newop);
+	    newop = gen_rtx_AND (SImode, newop, mask);
+	    newop = gen_rtx_IOR (SImode, newop,
+				 gen_rtx_AND (SImode,
+					      gen_rtx_NOT (SImode, mask),
+					      m));
+	    break;
+	  }
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      op = newop;
+      used_mode = SImode;
+      before = gen_reg_rtx (used_mode);
+      after = gen_reg_rtx (used_mode);
+    }
+  else
+    {
+      used_mode = mode;
+      before = before_param;
+      after = after_param;
+
+      if (before == NULL_RTX)
+	before = gen_reg_rtx (used_mode);
+      if (after == NULL_RTX)
+	after = gen_reg_rtx (used_mode);
+    }
+
+  if ((code == PLUS || code == MINUS)
+      && used_mode != mode)
+    the_op = op;  /* Computed above.  */
+  else if (GET_CODE (op) == NOT && GET_CODE (m) != NOT)
+    the_op = gen_rtx_fmt_ee (code, used_mode, op, m);
+  else if (code == NOT)
+    the_op = gen_rtx_fmt_ee (IOR, used_mode,
+			     gen_rtx_NOT (used_mode, m),
+			     gen_rtx_NOT (used_mode, op));
+  else
+    the_op = gen_rtx_fmt_ee (code, used_mode, m, op);
+
+  set_after = gen_rtx_SET (VOIDmode, after, the_op);
+  set_before = gen_rtx_SET (VOIDmode, before, used_m);
+  set_atomic = gen_rtx_SET (VOIDmode, used_m,
+			    gen_rtx_UNSPEC (used_mode,
+					    gen_rtvec (1, the_op),
+					    UNSPEC_SYNC_OP));
+  cc_scratch = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
+
+  if ((code == PLUS || code == MINUS) && used_mode != mode)
+    vec = gen_rtvec (5, set_after, set_before, set_atomic, cc_scratch,
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)));
+  else
+    vec = gen_rtvec (4, set_after, set_before, set_atomic, cc_scratch);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+
+  /* Shift and mask the return values properly.  */
+  if (used_mode != mode && before_param)
+    {
+      emit_insn (gen_lshrsi3 (before, before, shift));
+      convert_move (before_param, before, 1);
+    }
+
+  if (used_mode != mode && after_param)
+    {
+      emit_insn (gen_lshrsi3 (after, after, shift));
+      convert_move (after_param, after, 1);
+    }
+
+  /* The previous sequence will end with a branch that's dependent on
+     the conditional store, so placing an isync will ensure that no
+     other instructions (especially, no load or store instructions)
+     can start before the atomic operation completes.  */
+  if (sync_p)
+    emit_insn (gen_isync ());
+}
+
+/* A subroutine of the atomic operation splitters.  Jump to LABEL if
+   COND is true.  Mark the jump as unlikely to be taken.  */
+
+static void
+emit_unlikely_jump (rtx cond, rtx label)
+{
+  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+  rtx x;
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
+  x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
+  add_reg_note (x, REG_BR_PROB, very_unlikely);
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a load-locked
+   instruction in MODE.  */
+
+static void
+emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
+{
+  rtx (*fn) (rtx, rtx) = NULL;
+  if (mode == SImode)
+    fn = gen_load_locked_si;
+  else if (mode == DImode)
+    fn = gen_load_locked_di;
+  emit_insn (fn (reg, mem));
+}
+
+/* A subroutine of the atomic operation splitters.  Emit a store-conditional
+   instruction in MODE.  */
+
+static void
+emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
+{
+  rtx (*fn) (rtx, rtx, rtx) = NULL;
+  if (mode == SImode)
+    fn = gen_store_conditional_si;
+  else if (mode == DImode)
+    fn = gen_store_conditional_di;
+
+  /* Emit sync before stwcx. to address PPC405 Erratum.  */
+  if (PPC405_ERRATUM77)
+    emit_insn (gen_memory_barrier ());
+
+  emit_insn (fn (res, mem, val));
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  SCRATCH is
+   a scratch register.  */
+
+void
+rs6000_split_atomic_op (enum rtx_code code, rtx mem, rtx val,
+                       rtx before, rtx after, rtx scratch)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond = gen_rtx_REG (CCmode, CR0_REGNO);
+
+  emit_insn (gen_lwsync ());
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+  if (before == NULL_RTX)
+    before = scratch;
+  emit_load_locked (mode, before, mem);
+
+  if (code == NOT)
+    x = gen_rtx_IOR (mode,
+		     gen_rtx_NOT (mode, before),
+		     gen_rtx_NOT (mode, val));
+  else if (code == AND)
+    x = gen_rtx_UNSPEC (mode, gen_rtvec (2, before, val), UNSPEC_AND);
+  else
+    x = gen_rtx_fmt_ee (code, mode, before, val);
+
+  if (after != NULL_RTX)
+    emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  emit_insn (gen_isync ());
+}
+
+/* Expand an atomic compare and swap operation.  MEM is the memory on which
+   to operate.  OLDVAL is the old value to be compared.  NEWVAL is the new
+   value to be stored.  SCRATCH is a scratch GPR.  */
+
+void
+rs6000_split_compare_and_swap (rtx retval, rtx mem, rtx oldval, rtx newval,
+			       rtx scratch)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label1, label2, x, cond = gen_rtx_REG (CCmode, CR0_REGNO);
+
+  emit_insn (gen_lwsync ());
+
+  label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+  label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+  emit_label (XEXP (label1, 0));
+
+  emit_load_locked (mode, retval, mem);
+
+  x = gen_rtx_COMPARE (CCmode, retval, oldval);
+  emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label2);
+
+  emit_move_insn (scratch, newval);
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label1);
+
+  emit_insn (gen_isync ());
+  emit_label (XEXP (label2, 0));
+}
+
+/* Expand an atomic test and set operation.  MEM is the memory on which
+   to operate.  VAL is the value set.  SCRATCH is a scratch GPR.  */
+
+void
+rs6000_split_lock_test_and_set (rtx retval, rtx mem, rtx val, rtx scratch)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond = gen_rtx_REG (CCmode, CR0_REGNO);
+
+  label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_load_locked (mode, retval, mem);
+  emit_move_insn (scratch, val);
+  emit_store_conditional (mode, cond, mem, scratch);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+
+  emit_insn (gen_isync ());
+}
+
+void
+rs6000_expand_compare_and_swapqhi (rtx dst, rtx mem, rtx oldval, rtx newval)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx addrSI, align, wdst, shift, mask;
+  HOST_WIDE_INT shift_mask = mode == QImode ? 0x18 : 0x10;
+  HOST_WIDE_INT imask = GET_MODE_MASK (mode);
+
+  /* Shift amount for subword relative to aligned word.  */
+  addrSI = force_reg (GET_MODE (XEXP (mem, 0)), XEXP (mem, 0));
+  addrSI = force_reg (SImode, gen_lowpart_common (SImode, addrSI));
+  shift = gen_reg_rtx (SImode);
+  emit_insn (gen_rlwinm (shift, addrSI, GEN_INT (3),
+			 GEN_INT (shift_mask)));
+  emit_insn (gen_xorsi3 (shift, shift, GEN_INT (shift_mask)));
+
+  /* Shift and mask old value into position within word.  */
+  oldval = convert_modes (SImode, mode, oldval, 1);
+  oldval = expand_binop (SImode, and_optab,
+			 oldval, GEN_INT (imask), NULL_RTX,
+			 1, OPTAB_LIB_WIDEN);
+  emit_insn (gen_ashlsi3 (oldval, oldval, shift));
+
+  /* Shift and mask new value into position within word.  */
+  newval = convert_modes (SImode, mode, newval, 1);
+  newval = expand_binop (SImode, and_optab,
+			 newval, GEN_INT (imask), NULL_RTX,
+			 1, OPTAB_LIB_WIDEN);
+  emit_insn (gen_ashlsi3 (newval, newval, shift));
+
+  /* Mask for insertion.  */
+  mask = gen_reg_rtx (SImode);
+  emit_move_insn (mask, GEN_INT (imask));
+  emit_insn (gen_ashlsi3 (mask, mask, shift));
+
+  /* Address of aligned word containing subword.  */
+  align = expand_binop (Pmode, and_optab, XEXP (mem, 0), GEN_INT (-4),
+			NULL_RTX, 1, OPTAB_LIB_WIDEN);
+  mem = change_address (mem, SImode, align);
+  set_mem_align (mem, 32);
+  MEM_VOLATILE_P (mem) = 1;
+
+  wdst = gen_reg_rtx (SImode);
+  emit_insn (gen_sync_compare_and_swapqhi_internal (wdst, mask,
+						    oldval, newval, mem));
+
+  /* Shift the result back.  */
+  emit_insn (gen_lshrsi3 (wdst, wdst, shift));
+
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+rs6000_split_compare_and_swapqhi (rtx dest, rtx mask,
+				  rtx oldval, rtx newval, rtx mem,
+				  rtx scratch)
+{
+  rtx label1, label2, x, cond = gen_rtx_REG (CCmode, CR0_REGNO);
+
+  emit_insn (gen_lwsync ());
+  label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+  label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+  emit_label (XEXP (label1, 0));
+
+  emit_load_locked (SImode, scratch, mem);
+
+  /* Mask subword within loaded value for comparison with oldval.
+     Use UNSPEC_AND to avoid clobber.*/
+  emit_insn (gen_rtx_SET (SImode, dest,
+			  gen_rtx_UNSPEC (SImode,
+					  gen_rtvec (2, scratch, mask),
+					  UNSPEC_AND)));
+
+  x = gen_rtx_COMPARE (CCmode, dest, oldval);
+  emit_insn (gen_rtx_SET (VOIDmode, cond, x));
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label2);
+
+  /* Clear subword within loaded value for insertion of new value.  */
+  emit_insn (gen_rtx_SET (SImode, scratch,
+			  gen_rtx_AND (SImode,
+				       gen_rtx_NOT (SImode, mask), scratch)));
+  emit_insn (gen_iorsi3 (scratch, scratch, newval));
+  emit_store_conditional (SImode, cond, mem, scratch);
+
+  x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
+  emit_unlikely_jump (x, label1);
+
+  emit_insn (gen_isync ());
+  emit_label (XEXP (label2, 0));
+}
+
+
+  /* Emit instructions to move SRC to DST.  Called by splitters for
+   multi-register moves.  It will emit at most one instruction for
+   each register that is accessed; that is, it won't emit li/lis pairs
+   (or equivalent for 64-bit code).  One of SRC or DST must be a hard
+   register.  */
+
+void
+rs6000_split_multireg_move (rtx dst, rtx src)
+{
+  /* The register number of the first register being moved.  */
+  int reg;
+  /* The mode that is to be moved.  */
+  enum machine_mode mode;
+  /* The mode that the move is being done in, and its size.  */
+  enum machine_mode reg_mode;
+  int reg_mode_size;
+  /* The number of registers that will be moved.  */
+  int nregs;
+
+  reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
+  mode = GET_MODE (dst);
+  nregs = hard_regno_nregs[reg][mode];
+  if (FP_REGNO_P (reg))
+    reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : 
+	((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
+  else if (ALTIVEC_REGNO_P (reg))
+    reg_mode = V16QImode;
+  else if (TARGET_E500_DOUBLE && mode == TFmode)
+    reg_mode = DFmode;
+  else
+    reg_mode = word_mode;
+  reg_mode_size = GET_MODE_SIZE (reg_mode);
+
+  gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
+
+  if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
+    {
+      /* Move register range backwards, if we might have destructive
+	 overlap.  */
+      int i;
+      for (i = nregs - 1; i >= 0; i--)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				simplify_gen_subreg (reg_mode, dst, mode,
+						     i * reg_mode_size),
+				simplify_gen_subreg (reg_mode, src, mode,
+						     i * reg_mode_size)));
+    }
+  else
+    {
+      int i;
+      int j = -1;
+      bool used_update = false;
+      rtx restore_basereg = NULL_RTX;
+
+      if (MEM_P (src) && INT_REGNO_P (reg))
+	{
+	  rtx breg;
+
+	  if (GET_CODE (XEXP (src, 0)) == PRE_INC
+	      || GET_CODE (XEXP (src, 0)) == PRE_DEC)
+	    {
+	      rtx delta_rtx;
+	      breg = XEXP (XEXP (src, 0), 0);
+	      delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
+			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
+			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
+	      emit_insn (gen_add3_insn (breg, breg, delta_rtx));
+	      src = replace_equiv_address (src, breg);
+	    }
+	  else if (! rs6000_offsettable_memref_p (src))
+	    {
+	      if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
+		{
+		  rtx basereg = XEXP (XEXP (src, 0), 0);
+		  if (TARGET_UPDATE)
+		    {
+		      rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
+		      emit_insn (gen_rtx_SET (VOIDmode, ndst,
+				 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
+		      used_update = true;
+		    }
+		  else
+		    emit_insn (gen_rtx_SET (VOIDmode, basereg,
+			       XEXP (XEXP (src, 0), 1)));
+		  src = replace_equiv_address (src, basereg);
+		}
+	      else
+		{
+		  rtx basereg = gen_rtx_REG (Pmode, reg);
+		  emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
+		  src = replace_equiv_address (src, basereg);
+		}
+	    }
+
+	  breg = XEXP (src, 0);
+	  if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
+	    breg = XEXP (breg, 0);
+
+	  /* If the base register we are using to address memory is
+	     also a destination reg, then change that register last.  */
+	  if (REG_P (breg)
+	      && REGNO (breg) >= REGNO (dst)
+	      && REGNO (breg) < REGNO (dst) + nregs)
+	    j = REGNO (breg) - REGNO (dst);
+	}
+      else if (MEM_P (dst) && INT_REGNO_P (reg))
+	{
+	  rtx breg;
+
+	  if (GET_CODE (XEXP (dst, 0)) == PRE_INC
+	      || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+	    {
+	      rtx delta_rtx;
+	      breg = XEXP (XEXP (dst, 0), 0);
+	      delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
+			   ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
+			   : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
+
+	      /* We have to update the breg before doing the store.
+		 Use store with update, if available.  */
+
+	      if (TARGET_UPDATE)
+		{
+		  rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
+		  emit_insn (TARGET_32BIT
+			     ? (TARGET_POWERPC64
+				? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
+				: gen_movsi_update (breg, breg, delta_rtx, nsrc))
+			     : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
+		  used_update = true;
+		}
+	      else
+		emit_insn (gen_add3_insn (breg, breg, delta_rtx));
+	      dst = replace_equiv_address (dst, breg);
+	    }
+	  else if (!rs6000_offsettable_memref_p (dst)
+		   && GET_CODE (XEXP (dst, 0)) != LO_SUM)
+	    {
+	      if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
+		{
+		  rtx basereg = XEXP (XEXP (dst, 0), 0);
+		  if (TARGET_UPDATE)
+		    {
+		      rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
+		      emit_insn (gen_rtx_SET (VOIDmode,
+				 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
+		      used_update = true;
+		    }
+		  else
+		    emit_insn (gen_rtx_SET (VOIDmode, basereg,
+			       XEXP (XEXP (dst, 0), 1)));
+		  dst = replace_equiv_address (dst, basereg);
+		}
+	      else
+		{
+		  rtx basereg = XEXP (XEXP (dst, 0), 0);
+		  rtx offsetreg = XEXP (XEXP (dst, 0), 1);
+		  gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
+			      && REG_P (basereg)
+			      && REG_P (offsetreg)
+			      && REGNO (basereg) != REGNO (offsetreg));
+		  if (REGNO (basereg) == 0)
+		    {
+		      rtx tmp = offsetreg;
+		      offsetreg = basereg;
+		      basereg = tmp;
+		    }
+		  emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
+		  restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
+		  dst = replace_equiv_address (dst, basereg);
+		}
+	    }
+	  else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
+	    gcc_assert (rs6000_offsettable_memref_p (dst));
+	}
+
+      for (i = 0; i < nregs; i++)
+	{
+	  /* Calculate index to next subword.  */
+	  ++j;
+	  if (j == nregs)
+	    j = 0;
+
+	  /* If compiler already emitted move of first word by
+	     store with update, no need to do anything.  */
+	  if (j == 0 && used_update)
+	    continue;
+
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  simplify_gen_subreg (reg_mode, dst, mode,
+						       j * reg_mode_size),
+				  simplify_gen_subreg (reg_mode, src, mode,
+						       j * reg_mode_size)));
+	}
+      if (restore_basereg != NULL_RTX)
+	emit_insn (restore_basereg);
+    }
+}
+
+
+/* This page contains routines that are used to determine what the
+   function prologue and epilogue code will do and write them out.  */
+
+/* Return the first fixed-point register that is required to be
+   saved. 32 if none.  */
+
+int
+first_reg_to_save (void)
+{
+  int first_reg;
+
+  /* Find lowest numbered live register.  */
+  for (first_reg = 13; first_reg <= 31; first_reg++)
+    if (df_regs_ever_live_p (first_reg)
+	&& (! call_used_regs[first_reg]
+	    || (first_reg == RS6000_PIC_OFFSET_TABLE_REGNUM
+		&& ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
+		    || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
+		    || (TARGET_TOC && TARGET_MINIMAL_TOC)))))
+      break;
+
+#if TARGET_MACHO
+  if (flag_pic
+      && crtl->uses_pic_offset_table
+      && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
+    return RS6000_PIC_OFFSET_TABLE_REGNUM;
+#endif
+
+  return first_reg;
+}
+
+/* Similar, for FP regs.  */
+
+int
+first_fp_reg_to_save (void)
+{
+  int first_reg;
+
+  /* Find lowest numbered live register.  */
+  for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
+    if (df_regs_ever_live_p (first_reg))
+      break;
+
+  return first_reg;
+}
+
+/* Similar, for AltiVec regs.  */
+
+static int
+first_altivec_reg_to_save (void)
+{
+  int i;
+
+  /* Stack frame remains as is unless we are in AltiVec ABI.  */
+  if (! TARGET_ALTIVEC_ABI)
+    return LAST_ALTIVEC_REGNO + 1;
+
+  /* On Darwin, the unwind routines are compiled without
+     TARGET_ALTIVEC, and use save_world to save/restore the
+     altivec registers when necessary.  */
+  if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
+      && ! TARGET_ALTIVEC)
+    return FIRST_ALTIVEC_REGNO + 20;
+
+  /* Find lowest numbered live register.  */
+  for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
+    if (df_regs_ever_live_p (i))
+      break;
+
+  return i;
+}
+
+/* Return a 32-bit mask of the AltiVec registers we need to set in
+   VRSAVE.  Bit n of the return value is 1 if Vn is live.  The MSB in
+   the 32-bit word is 0.  */
+
+static unsigned int
+compute_vrsave_mask (void)
+{
+  unsigned int i, mask = 0;
+
+  /* On Darwin, the unwind routines are compiled without
+     TARGET_ALTIVEC, and use save_world to save/restore the
+     call-saved altivec registers when necessary.  */
+  if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
+      && ! TARGET_ALTIVEC)
+    mask |= 0xFFF;
+
+  /* First, find out if we use _any_ altivec registers.  */
+  for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
+    if (df_regs_ever_live_p (i))
+      mask |= ALTIVEC_REG_BIT (i);
+
+  if (mask == 0)
+    return mask;
+
+  /* Next, remove the argument registers from the set.  These must
+     be in the VRSAVE mask set by the caller, so we don't need to add
+     them in again.  More importantly, the mask we compute here is
+     used to generate CLOBBERs in the set_vrsave insn, and we do not
+     wish the argument registers to die.  */
+  for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
+    mask &= ~ALTIVEC_REG_BIT (i);
+
+  /* Similarly, remove the return value from the set.  */
+  {
+    bool yes = false;
+    diddle_return_value (is_altivec_return_reg, &yes);
+    if (yes)
+      mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
+  }
+
+  return mask;
+}
+
+/* For a very restricted set of circumstances, we can cut down the
+   size of prologues/epilogues by calling our own save/restore-the-world
+   routines.  */
+
+static void
+compute_save_world_info (rs6000_stack_t *info_ptr)
+{
+  info_ptr->world_save_p = 1;
+  info_ptr->world_save_p
+    = (WORLD_SAVE_P (info_ptr)
+       && DEFAULT_ABI == ABI_DARWIN
+       && !cfun->has_nonlocal_label
+       && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
+       && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
+       && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
+       && info_ptr->cr_save_p);
+
+  /* This will not work in conjunction with sibcalls.  Make sure there
+     are none.  (This check is expensive, but seldom executed.) */
+  if (WORLD_SAVE_P (info_ptr))
+    {
+      rtx insn;
+      for ( insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
+	if ( GET_CODE (insn) == CALL_INSN
+	     && SIBLING_CALL_P (insn))
+	  {
+	    info_ptr->world_save_p = 0;
+	    break;
+	  }
+    }
+
+  if (WORLD_SAVE_P (info_ptr))
+    {
+      /* Even if we're not touching VRsave, make sure there's room on the
+	 stack for it, if it looks like we're calling SAVE_WORLD, which
+	 will attempt to save it. */
+      info_ptr->vrsave_size  = 4;
+
+      /* If we are going to save the world, we need to save the link register too.  */
+      info_ptr->lr_save_p = 1;
+
+      /* "Save" the VRsave register too if we're saving the world.  */
+      if (info_ptr->vrsave_mask == 0)
+	info_ptr->vrsave_mask = compute_vrsave_mask ();
+
+      /* Because the Darwin register save/restore routines only handle
+	 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
+	 check.  */
+      gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
+		  && (info_ptr->first_altivec_reg_save
+		      >= FIRST_SAVED_ALTIVEC_REGNO));
+    }
+  return;
+}
+
+
+static void
+is_altivec_return_reg (rtx reg, void *xyes)
+{
+  bool *yes = (bool *) xyes;
+  if (REGNO (reg) == ALTIVEC_ARG_RETURN)
+    *yes = true;
+}
+
+
+/* Determine the strategy for savings/restoring registers.  */
+
+enum {
+  SAVRES_MULTIPLE = 0x1,
+  SAVE_INLINE_FPRS = 0x2,
+  SAVE_INLINE_GPRS = 0x4,
+  REST_INLINE_FPRS = 0x8,
+  REST_INLINE_GPRS = 0x10,
+  SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
+  SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
+  REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80
+};
+
+static int
+rs6000_savres_strategy (rs6000_stack_t *info,
+			bool using_static_chain_p)
+{
+  int strategy = 0;
+
+  if (TARGET_MULTIPLE
+      && !TARGET_POWERPC64
+      && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
+      && info->first_gp_reg_save < 31
+      && no_global_regs_above (info->first_gp_reg_save, /*gpr=*/true))
+    strategy |= SAVRES_MULTIPLE;
+
+  if (crtl->calls_eh_return
+      || cfun->machine->ra_need_lr
+      || info->total_size > 32767)
+    strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
+		 | SAVE_INLINE_GPRS | REST_INLINE_GPRS);
+
+  if (info->first_fp_reg_save == 64
+      || FP_SAVE_INLINE (info->first_fp_reg_save)
+      /* The out-of-line FP routines use double-precision stores;
+	 we can't use those routines if we don't have such stores.  */
+      || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
+      || !no_global_regs_above (info->first_fp_reg_save, /*gpr=*/false))
+    strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
+
+  if (info->first_gp_reg_save == 32
+      || GP_SAVE_INLINE (info->first_gp_reg_save)
+      || !((strategy & SAVRES_MULTIPLE)
+	   || no_global_regs_above (info->first_gp_reg_save, /*gpr=*/true)))
+    strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+
+  /* Don't bother to try to save things out-of-line if r11 is occupied
+     by the static chain.  It would require too much fiddling and the
+     static chain is rarely used anyway.  */
+  if (using_static_chain_p)
+    strategy |= SAVE_INLINE_FPRS | SAVE_INLINE_GPRS;
+
+  /* If we are going to use store multiple, then don't even bother
+     with the out-of-line routines, since the store-multiple
+     instruction will always be smaller.  */
+  if ((strategy & SAVRES_MULTIPLE))
+    strategy |= SAVE_INLINE_GPRS;
+
+  /* The situation is more complicated with load multiple.  We'd
+     prefer to use the out-of-line routines for restores, since the
+     "exit" out-of-line routines can handle the restore of LR and the
+     frame teardown.  However if doesn't make sense to use the
+     out-of-line routine if that is the only reason we'd need to save
+     LR, and we can't use the "exit" out-of-line gpr restore if we
+     have saved some fprs; In those cases it is advantageous to use
+     load multiple when available.  */
+  if ((strategy & SAVRES_MULTIPLE)
+      && (!info->lr_save_p
+	  || info->first_fp_reg_save != 64))
+    strategy |= REST_INLINE_GPRS;
+
+  /* We can only use load multiple or the out-of-line routines to
+     restore if we've used store multiple or out-of-line routines
+     in the prologue, i.e. if we've saved all the registers from
+     first_gp_reg_save.  Otherwise, we risk loading garbage.  */
+  if ((strategy & (SAVE_INLINE_GPRS | SAVRES_MULTIPLE)) == SAVE_INLINE_GPRS)
+    strategy |= REST_INLINE_GPRS;
+
+  /* Saving CR interferes with the exit routines used on the SPE, so
+     just punt here.  */
+  if (TARGET_SPE_ABI
+      && info->spe_64bit_regs_used
+      && info->cr_save_p)
+    strategy |= REST_INLINE_GPRS;
+
+#ifdef POWERPC_LINUX
+  if (TARGET_64BIT)
+    {
+      if (!(strategy & SAVE_INLINE_FPRS))
+	strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
+      else if (!(strategy & SAVE_INLINE_GPRS)
+	       && info->first_fp_reg_save == 64)
+	strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
+    }
+#else
+  if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
+    strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
+#endif
+  return strategy;
+}
+
+/* Calculate the stack information for the current function.  This is
+   complicated by having two separate calling sequences, the AIX calling
+   sequence and the V.4 calling sequence.
+
+   AIX (and Darwin/Mac OS X) stack frames look like:
+							  32-bit  64-bit
+	SP---->	+---------------------------------------+
+		| back chain to caller			| 0	  0
+		+---------------------------------------+
+		| saved CR				| 4       8 (8-11)
+		+---------------------------------------+
+		| saved LR				| 8       16
+		+---------------------------------------+
+		| reserved for compilers		| 12      24
+		+---------------------------------------+
+		| reserved for binders			| 16      32
+		+---------------------------------------+
+		| saved TOC pointer			| 20      40
+		+---------------------------------------+
+		| Parameter save area (P)		| 24      48
+		+---------------------------------------+
+		| Alloca space (A)			| 24+P    etc.
+		+---------------------------------------+
+		| Local variable space (L)		| 24+P+A
+		+---------------------------------------+
+		| Float/int conversion temporary (X)	| 24+P+A+L
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	| 24+P+A+L+X
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		| 24+P+A+L+X+W
+		+---------------------------------------+
+		| Save area for VRSAVE register (Z)	| 24+P+A+L+X+W+Y
+		+---------------------------------------+
+		| Save area for GP registers (G)	| 24+P+A+X+L+X+W+Y+Z
+		+---------------------------------------+
+		| Save area for FP registers (F)	| 24+P+A+X+L+X+W+Y+Z+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|
+		+---------------------------------------+
+
+   The required alignment for AIX configurations is two words (i.e., 8
+   or 16 bytes).
+
+
+   V.4 stack frames look like:
+
+	SP---->	+---------------------------------------+
+		| back chain to caller			| 0
+		+---------------------------------------+
+		| caller's saved LR			| 4
+		+---------------------------------------+
+		| Parameter save area (P)		| 8
+		+---------------------------------------+
+		| Alloca space (A)			| 8+P
+		+---------------------------------------+
+		| Varargs save area (V)			| 8+P+A
+		+---------------------------------------+
+		| Local variable space (L)		| 8+P+A+V
+		+---------------------------------------+
+		| Float/int conversion temporary (X)	| 8+P+A+V+L
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	| 8+P+A+V+L+X
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		| 8+P+A+V+L+X+W
+		+---------------------------------------+
+		| Save area for VRSAVE register (Z)	| 8+P+A+V+L+X+W+Y
+		+---------------------------------------+
+		| SPE: area for 64-bit GP registers	|
+		+---------------------------------------+
+		| SPE alignment padding			|
+		+---------------------------------------+
+		| saved CR (C)				| 8+P+A+V+L+X+W+Y+Z
+		+---------------------------------------+
+		| Save area for GP registers (G)	| 8+P+A+V+L+X+W+Y+Z+C
+		+---------------------------------------+
+		| Save area for FP registers (F)	| 8+P+A+V+L+X+W+Y+Z+C+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|
+		+---------------------------------------+
+
+   The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
+   given.  (But note below and in sysv4.h that we require only 8 and
+   may round up the size of our stack frame anyways.  The historical
+   reason is early versions of powerpc-linux which didn't properly
+   align the stack at program startup.  A happy side-effect is that
+   -mno-eabi libraries can be used with -meabi programs.)
+
+   The EABI configuration defaults to the V.4 layout.  However,
+   the stack alignment requirements may differ.  If -mno-eabi is not
+   given, the required stack alignment is 8 bytes; if -mno-eabi is
+   given, the required alignment is 16 bytes.  (But see V.4 comment
+   above.)  */
+
+#ifndef ABI_STACK_BOUNDARY
+#define ABI_STACK_BOUNDARY STACK_BOUNDARY
+#endif
+
+static rs6000_stack_t *
+rs6000_stack_info (void)
+{
+#ifdef ENABLE_CHECKING
+  static rs6000_stack_t info_save;
+#endif
+  rs6000_stack_t *info_ptr = &stack_info;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  int ehrd_size;
+  int save_align;
+  int first_gp;
+  HOST_WIDE_INT non_fixed_size;
+  bool using_static_chain_p;
+
+#ifdef ENABLE_CHECKING
+  memcpy (&info_save, &stack_info, sizeof stack_info);
+#else
+  if (reload_completed && info_ptr->reload_completed)
+    return info_ptr;
+#endif
+
+  memset (&stack_info, 0, sizeof (stack_info));
+  info_ptr->reload_completed = reload_completed;
+
+  if (TARGET_SPE)
+    {
+      /* Cache value so we don't rescan instruction chain over and over.  */
+      if (cfun->machine->insn_chain_scanned_p == 0)
+	cfun->machine->insn_chain_scanned_p
+	  = spe_func_has_64bit_regs_p () + 1;
+      info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
+    }
+
+  /* Select which calling sequence.  */
+  info_ptr->abi = DEFAULT_ABI;
+
+  /* Calculate which registers need to be saved & save area size.  */
+  info_ptr->first_gp_reg_save = first_reg_to_save ();
+  /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
+     even if it currently looks like we won't.  Reload may need it to
+     get at a constant; if so, it will have already created a constant
+     pool entry for it.  */
+  if (((TARGET_TOC && TARGET_MINIMAL_TOC)
+       || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
+       || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
+      && crtl->uses_const_pool
+      && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
+    first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
+  else
+    first_gp = info_ptr->first_gp_reg_save;
+
+  info_ptr->gp_size = reg_size * (32 - first_gp);
+
+  /* For the SPE, we have an additional upper 32-bits on each GPR.
+     Ideally we should save the entire 64-bits only when the upper
+     half is used in SIMD instructions.  Since we only record
+     registers live (not the size they are used in), this proves
+     difficult because we'd have to traverse the instruction chain at
+     the right time, taking reload into account.  This is a real pain,
+     so we opt to save the GPRs in 64-bits always if but one register
+     gets used in 64-bits.  Otherwise, all the registers in the frame
+     get saved in 32-bits.
+
+     So... since when we save all GPRs (except the SP) in 64-bits, the
+     traditional GP save area will be empty.  */
+  if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
+    info_ptr->gp_size = 0;
+
+  info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
+  info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
+
+  info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
+  info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
+				 - info_ptr->first_altivec_reg_save);
+
+  /* Does this function call anything?  */
+  info_ptr->calls_p = (! current_function_is_leaf
+		       || cfun->machine->ra_needs_full_frame);
+
+  /* Determine if we need to save the condition code registers.  */
+  if (df_regs_ever_live_p (CR2_REGNO)
+      || df_regs_ever_live_p (CR3_REGNO)
+      || df_regs_ever_live_p (CR4_REGNO))
+    {
+      info_ptr->cr_save_p = 1;
+      if (DEFAULT_ABI == ABI_V4)
+	info_ptr->cr_size = reg_size;
+    }
+
+  /* If the current function calls __builtin_eh_return, then we need
+     to allocate stack space for registers that will hold data for
+     the exception handler.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
+	continue;
+
+      /* SPE saves EH registers in 64-bits.  */
+      ehrd_size = i * (TARGET_SPE_ABI
+		       && info_ptr->spe_64bit_regs_used != 0
+		       ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
+    }
+  else
+    ehrd_size = 0;
+
+  /* Determine various sizes.  */
+  info_ptr->reg_size     = reg_size;
+  info_ptr->fixed_size   = RS6000_SAVE_AREA;
+  info_ptr->vars_size    = RS6000_ALIGN (get_frame_size (), 8);
+  info_ptr->parm_size    = RS6000_ALIGN (crtl->outgoing_args_size,
+					 TARGET_ALTIVEC ? 16 : 8);
+  if (FRAME_GROWS_DOWNWARD)
+    info_ptr->vars_size
+      += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
+		       + info_ptr->parm_size,
+		       ABI_STACK_BOUNDARY / BITS_PER_UNIT)
+	 - (info_ptr->fixed_size + info_ptr->vars_size
+	    + info_ptr->parm_size);
+
+  if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
+    info_ptr->spe_gp_size = 8 * (32 - first_gp);
+  else
+    info_ptr->spe_gp_size = 0;
+
+  if (TARGET_ALTIVEC_ABI)
+    info_ptr->vrsave_mask = compute_vrsave_mask ();
+  else
+    info_ptr->vrsave_mask = 0;
+
+  if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
+    info_ptr->vrsave_size  = 4;
+  else
+    info_ptr->vrsave_size  = 0;
+
+  compute_save_world_info (info_ptr);
+
+  /* Calculate the offsets.  */
+  switch (DEFAULT_ABI)
+    {
+    case ABI_NONE:
+    default:
+      gcc_unreachable ();
+
+    case ABI_AIX:
+    case ABI_DARWIN:
+      info_ptr->fp_save_offset   = - info_ptr->fp_size;
+      info_ptr->gp_save_offset   = info_ptr->fp_save_offset - info_ptr->gp_size;
+
+      if (TARGET_ALTIVEC_ABI)
+	{
+	  info_ptr->vrsave_save_offset
+	    = info_ptr->gp_save_offset - info_ptr->vrsave_size;
+
+	  /* Align stack so vector save area is on a quadword boundary.
+	     The padding goes above the vectors.  */
+	  if (info_ptr->altivec_size != 0)
+	    info_ptr->altivec_padding_size
+	      = info_ptr->vrsave_save_offset & 0xF;
+	  else
+	    info_ptr->altivec_padding_size = 0;
+
+	  info_ptr->altivec_save_offset
+	    = info_ptr->vrsave_save_offset
+	    - info_ptr->altivec_padding_size
+	    - info_ptr->altivec_size;
+	  gcc_assert (info_ptr->altivec_size == 0
+		      || info_ptr->altivec_save_offset % 16 == 0);
+
+	  /* Adjust for AltiVec case.  */
+	  info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
+	}
+      else
+	info_ptr->ehrd_offset      = info_ptr->gp_save_offset - ehrd_size;
+      info_ptr->cr_save_offset   = reg_size; /* first word when 64-bit.  */
+      info_ptr->lr_save_offset   = 2*reg_size;
+      break;
+
+    case ABI_V4:
+      info_ptr->fp_save_offset   = - info_ptr->fp_size;
+      info_ptr->gp_save_offset   = info_ptr->fp_save_offset - info_ptr->gp_size;
+      info_ptr->cr_save_offset   = info_ptr->gp_save_offset - info_ptr->cr_size;
+
+      if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
+	{
+	  /* Align stack so SPE GPR save area is aligned on a
+	     double-word boundary.  */
+	  if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
+	    info_ptr->spe_padding_size
+	      = 8 - (-info_ptr->cr_save_offset % 8);
+	  else
+	    info_ptr->spe_padding_size = 0;
+
+	  info_ptr->spe_gp_save_offset
+	    = info_ptr->cr_save_offset
+	    - info_ptr->spe_padding_size
+	    - info_ptr->spe_gp_size;
+
+	  /* Adjust for SPE case.  */
+	  info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
+	}
+      else if (TARGET_ALTIVEC_ABI)
+	{
+	  info_ptr->vrsave_save_offset
+	    = info_ptr->cr_save_offset - info_ptr->vrsave_size;
+
+	  /* Align stack so vector save area is on a quadword boundary.  */
+	  if (info_ptr->altivec_size != 0)
+	    info_ptr->altivec_padding_size
+	      = 16 - (-info_ptr->vrsave_save_offset % 16);
+	  else
+	    info_ptr->altivec_padding_size = 0;
+
+	  info_ptr->altivec_save_offset
+	    = info_ptr->vrsave_save_offset
+	    - info_ptr->altivec_padding_size
+	    - info_ptr->altivec_size;
+
+	  /* Adjust for AltiVec case.  */
+	  info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
+	}
+      else
+	info_ptr->ehrd_offset    = info_ptr->cr_save_offset;
+      info_ptr->ehrd_offset      -= ehrd_size;
+      info_ptr->lr_save_offset   = reg_size;
+      break;
+    }
+
+  save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
+  info_ptr->save_size    = RS6000_ALIGN (info_ptr->fp_size
+					 + info_ptr->gp_size
+					 + info_ptr->altivec_size
+					 + info_ptr->altivec_padding_size
+					 + info_ptr->spe_gp_size
+					 + info_ptr->spe_padding_size
+					 + ehrd_size
+					 + info_ptr->cr_size
+					 + info_ptr->vrsave_size,
+					 save_align);
+
+  non_fixed_size	 = (info_ptr->vars_size
+			    + info_ptr->parm_size
+			    + info_ptr->save_size);
+
+  info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
+				       ABI_STACK_BOUNDARY / BITS_PER_UNIT);
+
+  /* Determine if we need to save the link register.  */
+  if (info_ptr->calls_p
+      || (DEFAULT_ABI == ABI_AIX
+	  && crtl->profile
+	  && !TARGET_PROFILE_KERNEL)
+      || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
+#ifdef TARGET_RELOCATABLE
+      || (TARGET_RELOCATABLE && (get_pool_size () != 0))
+#endif
+      || rs6000_ra_ever_killed ())
+    info_ptr->lr_save_p = 1;
+
+  using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
+			  && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
+			  && call_used_regs[STATIC_CHAIN_REGNUM]);
+  info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
+						      using_static_chain_p);
+
+  if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
+      || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
+      || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
+      || !(info_ptr->savres_strategy & REST_INLINE_FPRS))
+    info_ptr->lr_save_p = 1;
+
+  if (info_ptr->lr_save_p)
+    df_set_regs_ever_live (LR_REGNO, true);
+
+  /* Determine if we need to allocate any stack frame:
+
+     For AIX we need to push the stack if a frame pointer is needed
+     (because the stack might be dynamically adjusted), if we are
+     debugging, if we make calls, or if the sum of fp_save, gp_save,
+     and local variables are more than the space needed to save all
+     non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
+     + 18*8 = 288 (GPR13 reserved).
+
+     For V.4 we don't have the stack cushion that AIX uses, but assume
+     that the debugger can handle stackless frames.  */
+
+  if (info_ptr->calls_p)
+    info_ptr->push_p = 1;
+
+  else if (DEFAULT_ABI == ABI_V4)
+    info_ptr->push_p = non_fixed_size != 0;
+
+  else if (frame_pointer_needed)
+    info_ptr->push_p = 1;
+
+  else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
+    info_ptr->push_p = 1;
+
+  else
+    info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
+
+  /* Zero offsets if we're not saving those registers.  */
+  if (info_ptr->fp_size == 0)
+    info_ptr->fp_save_offset = 0;
+
+  if (info_ptr->gp_size == 0)
+    info_ptr->gp_save_offset = 0;
+
+  if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
+    info_ptr->altivec_save_offset = 0;
+
+  if (! TARGET_ALTIVEC_ABI || info_ptr->vrsave_mask == 0)
+    info_ptr->vrsave_save_offset = 0;
+
+  if (! TARGET_SPE_ABI
+      || info_ptr->spe_64bit_regs_used == 0
+      || info_ptr->spe_gp_size == 0)
+    info_ptr->spe_gp_save_offset = 0;
+
+  if (! info_ptr->lr_save_p)
+    info_ptr->lr_save_offset = 0;
+
+  if (! info_ptr->cr_save_p)
+    info_ptr->cr_save_offset = 0;
+
+#ifdef ENABLE_CHECKING
+  gcc_assert (!(reload_completed && info_save.reload_completed)
+	      || memcmp (&info_save, &stack_info, sizeof stack_info) == 0);
+#endif
+  return info_ptr;
+}
+
+/* Return true if the current function uses any GPRs in 64-bit SIMD
+   mode.  */
+
+static bool
+spe_func_has_64bit_regs_p (void)
+{
+  rtx insns, insn;
+
+  /* Functions that save and restore all the call-saved registers will
+     need to save/restore the registers in 64-bits.  */
+  if (crtl->calls_eh_return
+      || cfun->calls_setjmp
+      || crtl->has_nonlocal_goto)
+    return true;
+
+  insns = get_insns ();
+
+  for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	{
+	  rtx i;
+
+	  /* FIXME: This should be implemented with attributes...
+
+	         (set_attr "spe64" "true")....then,
+	         if (get_spe64(insn)) return true;
+
+	     It's the only reliable way to do the stuff below.  */
+
+	  i = PATTERN (insn);
+	  if (GET_CODE (i) == SET)
+	    {
+	      enum machine_mode mode = GET_MODE (SET_SRC (i));
+
+	      if (SPE_VECTOR_MODE (mode))
+		return true;
+	      if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
+		return true;
+	    }
+	}
+    }
+
+  return false;
+}
+
+static void
+debug_stack_info (rs6000_stack_t *info)
+{
+  const char *abi_string;
+
+  if (! info)
+    info = rs6000_stack_info ();
+
+  fprintf (stderr, "\nStack information for function %s:\n",
+	   ((current_function_decl && DECL_NAME (current_function_decl))
+	    ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
+	    : "<unknown>"));
+
+  switch (info->abi)
+    {
+    default:		 abi_string = "Unknown";	break;
+    case ABI_NONE:	 abi_string = "NONE";		break;
+    case ABI_AIX:	 abi_string = "AIX";		break;
+    case ABI_DARWIN:	 abi_string = "Darwin";		break;
+    case ABI_V4:	 abi_string = "V.4";		break;
+    }
+
+  fprintf (stderr, "\tABI                 = %5s\n", abi_string);
+
+  if (TARGET_ALTIVEC_ABI)
+    fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
+
+  if (TARGET_SPE_ABI)
+    fprintf (stderr, "\tSPE ABI extensions enabled.\n");
+
+  if (info->first_gp_reg_save != 32)
+    fprintf (stderr, "\tfirst_gp_reg_save   = %5d\n", info->first_gp_reg_save);
+
+  if (info->first_fp_reg_save != 64)
+    fprintf (stderr, "\tfirst_fp_reg_save   = %5d\n", info->first_fp_reg_save);
+
+  if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
+    fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
+	     info->first_altivec_reg_save);
+
+  if (info->lr_save_p)
+    fprintf (stderr, "\tlr_save_p           = %5d\n", info->lr_save_p);
+
+  if (info->cr_save_p)
+    fprintf (stderr, "\tcr_save_p           = %5d\n", info->cr_save_p);
+
+  if (info->vrsave_mask)
+    fprintf (stderr, "\tvrsave_mask         = 0x%x\n", info->vrsave_mask);
+
+  if (info->push_p)
+    fprintf (stderr, "\tpush_p              = %5d\n", info->push_p);
+
+  if (info->calls_p)
+    fprintf (stderr, "\tcalls_p             = %5d\n", info->calls_p);
+
+  if (info->gp_save_offset)
+    fprintf (stderr, "\tgp_save_offset      = %5d\n", info->gp_save_offset);
+
+  if (info->fp_save_offset)
+    fprintf (stderr, "\tfp_save_offset      = %5d\n", info->fp_save_offset);
+
+  if (info->altivec_save_offset)
+    fprintf (stderr, "\taltivec_save_offset = %5d\n",
+	     info->altivec_save_offset);
+
+  if (info->spe_gp_save_offset)
+    fprintf (stderr, "\tspe_gp_save_offset  = %5d\n",
+	     info->spe_gp_save_offset);
+
+  if (info->vrsave_save_offset)
+    fprintf (stderr, "\tvrsave_save_offset  = %5d\n",
+	     info->vrsave_save_offset);
+
+  if (info->lr_save_offset)
+    fprintf (stderr, "\tlr_save_offset      = %5d\n", info->lr_save_offset);
+
+  if (info->cr_save_offset)
+    fprintf (stderr, "\tcr_save_offset      = %5d\n", info->cr_save_offset);
+
+  if (info->varargs_save_offset)
+    fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
+
+  if (info->total_size)
+    fprintf (stderr, "\ttotal_size          = "HOST_WIDE_INT_PRINT_DEC"\n",
+	     info->total_size);
+
+  if (info->vars_size)
+    fprintf (stderr, "\tvars_size           = "HOST_WIDE_INT_PRINT_DEC"\n",
+	     info->vars_size);
+
+  if (info->parm_size)
+    fprintf (stderr, "\tparm_size           = %5d\n", info->parm_size);
+
+  if (info->fixed_size)
+    fprintf (stderr, "\tfixed_size          = %5d\n", info->fixed_size);
+
+  if (info->gp_size)
+    fprintf (stderr, "\tgp_size             = %5d\n", info->gp_size);
+
+  if (info->spe_gp_size)
+    fprintf (stderr, "\tspe_gp_size         = %5d\n", info->spe_gp_size);
+
+  if (info->fp_size)
+    fprintf (stderr, "\tfp_size             = %5d\n", info->fp_size);
+
+  if (info->altivec_size)
+    fprintf (stderr, "\taltivec_size        = %5d\n", info->altivec_size);
+
+  if (info->vrsave_size)
+    fprintf (stderr, "\tvrsave_size         = %5d\n", info->vrsave_size);
+
+  if (info->altivec_padding_size)
+    fprintf (stderr, "\taltivec_padding_size= %5d\n",
+	     info->altivec_padding_size);
+
+  if (info->spe_padding_size)
+    fprintf (stderr, "\tspe_padding_size    = %5d\n",
+	     info->spe_padding_size);
+
+  if (info->cr_size)
+    fprintf (stderr, "\tcr_size             = %5d\n", info->cr_size);
+
+  if (info->save_size)
+    fprintf (stderr, "\tsave_size           = %5d\n", info->save_size);
+
+  if (info->reg_size != 4)
+    fprintf (stderr, "\treg_size            = %5d\n", info->reg_size);
+
+  fprintf (stderr, "\n");
+}
+
+rtx
+rs6000_return_addr (int count, rtx frame)
+{
+  /* Currently we don't optimize very well between prolog and body
+     code and for PIC code the code can be actually quite bad, so
+     don't try to be too clever here.  */
+  if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
+    {
+      cfun->machine->ra_needs_full_frame = 1;
+
+      return
+	gen_rtx_MEM
+	  (Pmode,
+	   memory_address
+	   (Pmode,
+	    plus_constant (copy_to_reg
+			   (gen_rtx_MEM (Pmode,
+					 memory_address (Pmode, frame))),
+			   RETURN_ADDRESS_OFFSET)));
+    }
+
+  cfun->machine->ra_need_lr = 1;
+  return get_hard_reg_initial_val (Pmode, LR_REGNO);
+}
+
+/* Say whether a function is a candidate for sibcall handling or not.
+   We do not allow indirect calls to be optimized into sibling calls.
+   Also, we can't do it if there are any vector parameters; there's
+   nowhere to put the VRsave code so it works; note that functions with
+   vector parameters are required to have a prototype, so the argument
+   type info must be available here.  (The tail recursion case can work
+   with vector parameters, but there's no way to distinguish here.) */
+static bool
+rs6000_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  tree type;
+  if (decl)
+    {
+      if (TARGET_ALTIVEC_VRSAVE)
+	{
+	  for (type = TYPE_ARG_TYPES (TREE_TYPE (decl));
+	       type; type = TREE_CHAIN (type))
+	    {
+	      if (TREE_CODE (TREE_VALUE (type)) == VECTOR_TYPE)
+		return false;
+	    }
+	}
+      if (DEFAULT_ABI == ABI_DARWIN
+	  || ((*targetm.binds_local_p) (decl)
+	      && (DEFAULT_ABI != ABI_AIX || !DECL_EXTERNAL (decl))))
+	{
+	  tree attr_list = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+
+	  if (!lookup_attribute ("longcall", attr_list)
+	      || lookup_attribute ("shortcall", attr_list))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* NULL if INSN insn is valid within a low-overhead loop.
+   Otherwise return why doloop cannot be applied.
+   PowerPC uses the COUNT register for branch on table instructions.  */
+
+static const char *
+rs6000_invalid_within_doloop (const_rtx insn)
+{
+  if (CALL_P (insn))
+    return "Function call in the loop.";
+
+  if (JUMP_P (insn)
+      && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+	  || GET_CODE (PATTERN (insn)) == ADDR_VEC))
+    return "Computed branch in the loop.";
+
+  return NULL;
+}
+
+static int
+rs6000_ra_ever_killed (void)
+{
+  rtx top;
+  rtx reg;
+  rtx insn;
+
+  if (cfun->is_thunk)
+    return 0;
+
+  if (cfun->machine->lr_save_state)
+    return cfun->machine->lr_save_state - 1;
+
+  /* regs_ever_live has LR marked as used if any sibcalls are present,
+     but this should not force saving and restoring in the
+     pro/epilogue.  Likewise, reg_set_between_p thinks a sibcall
+     clobbers LR, so that is inappropriate.  */
+
+  /* Also, the prologue can generate a store into LR that
+     doesn't really count, like this:
+
+        move LR->R0
+        bcl to set PIC register
+        move LR->R31
+        move R0->LR
+
+     When we're called from the epilogue, we need to avoid counting
+     this as a store.  */
+
+  push_topmost_sequence ();
+  top = get_insns ();
+  pop_topmost_sequence ();
+  reg = gen_rtx_REG (Pmode, LR_REGNO);
+
+  for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	{
+	  if (CALL_P (insn))
+	    {
+	      if (!SIBLING_CALL_P (insn))
+		return 1;
+	    }
+	  else if (find_regno_note (insn, REG_INC, LR_REGNO))
+	    return 1;
+	  else if (set_of (reg, insn) != NULL_RTX
+		   && !prologue_epilogue_contains (insn))
+	    return 1;
+    	}
+    }
+  return 0;
+}
+
+/* Emit instructions needed to load the TOC register.
+   This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
+   a constant pool; or for SVR4 -fpic.  */
+
+void
+rs6000_emit_load_toc_table (int fromprolog)
+{
+  rtx dest;
+  dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+
+  if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
+    {
+      char buf[30];
+      rtx lab, tmp1, tmp2, got;
+
+      lab = gen_label_rtx ();
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
+      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+      if (flag_pic == 2)
+	got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
+      else
+	got = rs6000_got_sym ();
+      tmp1 = tmp2 = dest;
+      if (!fromprolog)
+	{
+	  tmp1 = gen_reg_rtx (Pmode);
+	  tmp2 = gen_reg_rtx (Pmode);
+	}
+      emit_insn (gen_load_toc_v4_PIC_1 (lab));
+      emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
+      emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
+      emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
+    }
+  else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+    {
+      emit_insn (gen_load_toc_v4_pic_si ());
+      emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+    }
+  else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
+    {
+      char buf[30];
+      rtx temp0 = (fromprolog
+		   ? gen_rtx_REG (Pmode, 0)
+		   : gen_reg_rtx (Pmode));
+
+      if (fromprolog)
+	{
+	  rtx symF, symL;
+
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
+	  symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
+	  symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+
+	  emit_insn (gen_load_toc_v4_PIC_1 (symF));
+	  emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+	  emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
+	}
+      else
+	{
+	  rtx tocsym, lab;
+
+	  tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
+	  lab = gen_label_rtx ();
+	  emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
+	  emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+	  emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
+	}
+      emit_insn (gen_addsi3 (dest, temp0, dest));
+    }
+  else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
+    {
+      /* This is for AIX code running in non-PIC ELF32.  */
+      char buf[30];
+      rtx realsym;
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
+      realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+
+      emit_insn (gen_elf_high (dest, realsym));
+      emit_insn (gen_elf_low (dest, dest, realsym));
+    }
+  else
+    {
+      gcc_assert (DEFAULT_ABI == ABI_AIX);
+
+      if (TARGET_32BIT)
+	emit_insn (gen_load_toc_aix_si (dest));
+      else
+	emit_insn (gen_load_toc_aix_di (dest));
+    }
+}
+
+/* Emit instructions to restore the link register after determining where
+   its value has been stored.  */
+
+void
+rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+  rtx operands[2];
+
+  operands[0] = source;
+  operands[1] = scratch;
+
+  if (info->lr_save_p)
+    {
+      rtx frame_rtx = stack_pointer_rtx;
+      HOST_WIDE_INT sp_offset = 0;
+      rtx tmp;
+
+      if (frame_pointer_needed
+	  || cfun->calls_alloca
+	  || info->total_size > 32767)
+	{
+	  tmp = gen_frame_mem (Pmode, frame_rtx);
+	  emit_move_insn (operands[1], tmp);
+	  frame_rtx = operands[1];
+	}
+      else if (info->push_p)
+	sp_offset = info->total_size;
+
+      tmp = plus_constant (frame_rtx, info->lr_save_offset + sp_offset);
+      tmp = gen_frame_mem (Pmode, tmp);
+      emit_move_insn (tmp, operands[0]);
+    }
+  else
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
+
+  /* Freeze lr_save_p.  We've just emitted rtl that depends on the
+     state of lr_save_p so any change from here on would be a bug.  In
+     particular, stop rs6000_ra_ever_killed from considering the SET
+     of lr we may have added just above.  */ 
+  cfun->machine->lr_save_state = info->lr_save_p + 1;
+}
+
+static GTY(()) alias_set_type set = -1;
+
+alias_set_type
+get_TOC_alias_set (void)
+{
+  if (set == -1)
+    set = new_alias_set ();
+  return set;
+}
+
+/* This returns nonzero if the current function uses the TOC.  This is
+   determined by the presence of (use (unspec ... UNSPEC_TOC)), which
+   is generated by the ABI_V4 load_toc_* patterns.  */
+#if TARGET_ELF
+static int
+uses_TOC (void)
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	rtx pat = PATTERN (insn);
+	int i;
+
+	if (GET_CODE (pat) == PARALLEL)
+	  for (i = 0; i < XVECLEN (pat, 0); i++)
+	    {
+	      rtx sub = XVECEXP (pat, 0, i);
+	      if (GET_CODE (sub) == USE)
+		{
+		  sub = XEXP (sub, 0);
+		  if (GET_CODE (sub) == UNSPEC
+		      && XINT (sub, 1) == UNSPEC_TOC)
+		    return 1;
+		}
+	    }
+      }
+  return 0;
+}
+#endif
+
+rtx
+create_TOC_reference (rtx symbol, rtx largetoc_reg)
+{
+  rtx tocrel, tocreg;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      if (GET_CODE (symbol) == SYMBOL_REF)
+	fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
+		 XSTR (symbol, 0));
+      else
+	{
+	  fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
+		   GET_RTX_NAME (GET_CODE (symbol)));
+	  debug_rtx (symbol);
+	}
+    }
+
+  if (!can_create_pseudo_p ())
+    df_set_regs_ever_live (TOC_REGISTER, true);
+
+  tocrel = gen_rtx_CONST (Pmode,
+			  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol),
+					  UNSPEC_TOCREL));
+  tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    {
+      rtx hi = gen_rtx_CONST (Pmode,
+			      gen_rtx_PLUS (Pmode, tocreg, 
+					    gen_rtx_HIGH (Pmode, tocrel)));
+      if (largetoc_reg != NULL)
+	{
+	  emit_move_insn (largetoc_reg, hi);
+	  hi = largetoc_reg;
+	}
+      return gen_rtx_LO_SUM (Pmode, hi, copy_rtx (tocrel));
+    }
+  else
+    return gen_rtx_PLUS (Pmode, tocreg, tocrel);
+}
+
+/* Issue assembly directives that create a reference to the given DWARF
+   FRAME_TABLE_LABEL from the current function section.  */
+void
+rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
+{
+  fprintf (asm_out_file, "\t.ref %s\n",
+	   TARGET_STRIP_NAME_ENCODING (frame_table_label));
+}
+
+/* This ties together stack memory (MEM with an alias set of frame_alias_set)
+   and the change to the stack pointer.  */
+
+static void
+rs6000_emit_stack_tie (void)
+{
+  rtx mem = gen_frame_mem (BLKmode,
+			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
+
+  emit_insn (gen_stack_tie (mem));
+}
+
+/* Emit the correct code for allocating stack space, as insns.
+   If COPY_REG, make sure a copy of the old frame is left there.
+   The generated code may use hard register 0 as a temporary.  */
+
+static void
+rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg)
+{
+  rtx insn;
+  rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx tmp_reg = gen_rtx_REG (Pmode, 0);
+  rtx todec = gen_int_mode (-size, Pmode);
+  rtx par, set, mem;
+
+  if (INTVAL (todec) != -size)
+    {
+      warning (0, "stack frame too large");
+      emit_insn (gen_trap ());
+      return;
+    }
+
+  if (crtl->limit_stack)
+    {
+      if (REG_P (stack_limit_rtx)
+	  && REGNO (stack_limit_rtx) > 1
+	  && REGNO (stack_limit_rtx) <= 31)
+	{
+	  emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
+	  emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
+				    const0_rtx));
+	}
+      else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
+	       && TARGET_32BIT
+	       && DEFAULT_ABI == ABI_V4)
+	{
+	  rtx toload = gen_rtx_CONST (VOIDmode,
+				      gen_rtx_PLUS (Pmode,
+						    stack_limit_rtx,
+						    GEN_INT (size)));
+
+	  emit_insn (gen_elf_high (tmp_reg, toload));
+	  emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
+	  emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
+				    const0_rtx));
+	}
+      else
+	warning (0, "stack limit expression is not supported");
+    }
+
+  if (copy_reg)
+    emit_move_insn (copy_reg, stack_reg);
+
+  if (size > 32767)
+    {
+      /* Need a note here so that try_split doesn't get confused.  */
+      if (get_last_insn () == NULL_RTX)
+	emit_note (NOTE_INSN_DELETED);
+      insn = emit_move_insn (tmp_reg, todec);
+      try_split (PATTERN (insn), insn, 0);
+      todec = tmp_reg;
+    }
+  
+  insn = emit_insn (TARGET_32BIT
+		    ? gen_movsi_update_stack (stack_reg, stack_reg,
+					todec, stack_reg)
+		    : gen_movdi_di_update_stack (stack_reg, stack_reg,
+					   todec, stack_reg));
+  /* Since we didn't use gen_frame_mem to generate the MEM, grab
+     it now and set the alias set/attributes. The above gen_*_update
+     calls will generate a PARALLEL with the MEM set being the first
+     operation. */
+  par = PATTERN (insn);
+  gcc_assert (GET_CODE (par) == PARALLEL);
+  set = XVECEXP (par, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  mem = SET_DEST (set);
+  gcc_assert (MEM_P (mem));
+  MEM_NOTRAP_P (mem) = 1;
+  set_mem_alias_set (mem, get_frame_alias_set ());
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		gen_rtx_SET (VOIDmode, stack_reg,
+			     gen_rtx_PLUS (Pmode, stack_reg,
+					   GEN_INT (-size))));
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if PROBE_INTERVAL > 32768
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if (first + size <= 32768)
+    {
+      HOST_WIDE_INT i;
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+	 it exceeds SIZE.  If only one probe is needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
+
+      emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx r12 = gen_rtx_REG (Pmode, 12);
+      rtx r0 = gen_rtx_REG (Pmode, 0);
+
+      /* Sanity check for the addressing mode we're going to use.  */
+      gcc_assert (first <= 32768);
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_insn (gen_rtx_SET (VOIDmode, r12,
+			      plus_constant (stack_pointer_rtx, -first)));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      if (rounded_size > 32768)
+	{
+	  emit_move_insn (r0, GEN_INT (-rounded_size));
+	  emit_insn (gen_rtx_SET (VOIDmode, r0,
+				  gen_rtx_PLUS (Pmode, r12, r0)));
+	}
+      else
+	emit_insn (gen_rtx_SET (VOIDmode, r0,
+			        plus_constant (r12, -rounded_size)));
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+
+      if (TARGET_64BIT)
+	emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
+      else
+	emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (r12, rounded_size - size));
+    }
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg1;
+  xops[1] = reg2;
+  if (TARGET_64BIT)
+    output_asm_insn ("{cmp|cmpd} 0,%0,%1", xops);
+  else
+    output_asm_insn ("{cmp|cmpw} 0,%0,%1", xops);
+
+  fputs ("\tbeq 0,", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  output_asm_insn ("{cal %0,%1(%0)|addi %0,%0,%1}", xops);
+
+  /* Probe at TEST_ADDR and branch.  */
+  xops[1] = gen_rtx_REG (Pmode, 0);
+  output_asm_insn ("{st|stw} %1,0(%0)", xops);
+  fprintf (asm_out_file, "\tb ");
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
+   with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
+   is not NULL.  It would be nice if dwarf2out_frame_debug_expr could
+   deduce these equivalences by itself so it wasn't necessary to hold
+   its hand so much.  */
+
+static void
+rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
+		      rtx reg2, rtx rreg)
+{
+  rtx real, temp;
+
+  /* copy_rtx will not make unique copies of registers, so we need to
+     ensure we don't have unwanted sharing here.  */
+  if (reg == reg2)
+    reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
+
+  if (reg == rreg)
+    reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
+
+  real = copy_rtx (PATTERN (insn));
+
+  if (reg2 != NULL_RTX)
+    real = replace_rtx (real, reg2, rreg);
+
+  real = replace_rtx (real, reg,
+		      gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
+							STACK_POINTER_REGNUM),
+				    GEN_INT (val)));
+
+  /* We expect that 'real' is either a SET or a PARALLEL containing
+     SETs (and possibly other stuff).  In a PARALLEL, all the SETs
+     are important so they all have to be marked RTX_FRAME_RELATED_P.  */
+
+  if (GET_CODE (real) == SET)
+    {
+      rtx set = real;
+
+      temp = simplify_rtx (SET_SRC (set));
+      if (temp)
+	SET_SRC (set) = temp;
+      temp = simplify_rtx (SET_DEST (set));
+      if (temp)
+	SET_DEST (set) = temp;
+      if (GET_CODE (SET_DEST (set)) == MEM)
+	{
+	  temp = simplify_rtx (XEXP (SET_DEST (set), 0));
+	  if (temp)
+	    XEXP (SET_DEST (set), 0) = temp;
+	}
+    }
+  else
+    {
+      int i;
+
+      gcc_assert (GET_CODE (real) == PARALLEL);
+      for (i = 0; i < XVECLEN (real, 0); i++)
+	if (GET_CODE (XVECEXP (real, 0, i)) == SET)
+	  {
+	    rtx set = XVECEXP (real, 0, i);
+
+	    temp = simplify_rtx (SET_SRC (set));
+	    if (temp)
+	      SET_SRC (set) = temp;
+	    temp = simplify_rtx (SET_DEST (set));
+	    if (temp)
+	      SET_DEST (set) = temp;
+	    if (GET_CODE (SET_DEST (set)) == MEM)
+	      {
+		temp = simplify_rtx (XEXP (SET_DEST (set), 0));
+		if (temp)
+		  XEXP (SET_DEST (set), 0) = temp;
+	      }
+	    RTX_FRAME_RELATED_P (set) = 1;
+	  }
+    }
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+}
+
+/* Returns an insn that has a vrsave set operation with the
+   appropriate CLOBBERs.  */
+
+static rtx
+generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
+{
+  int nclobs, i;
+  rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
+  rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
+
+  clobs[0]
+    = gen_rtx_SET (VOIDmode,
+		   vrsave,
+		   gen_rtx_UNSPEC_VOLATILE (SImode,
+					    gen_rtvec (2, reg, vrsave),
+					    UNSPECV_SET_VRSAVE));
+
+  nclobs = 1;
+
+  /* We need to clobber the registers in the mask so the scheduler
+     does not move sets to VRSAVE before sets of AltiVec registers.
+
+     However, if the function receives nonlocal gotos, reload will set
+     all call saved registers live.  We will end up with:
+
+     	(set (reg 999) (mem))
+	(parallel [ (set (reg vrsave) (unspec blah))
+		    (clobber (reg 999))])
+
+     The clobber will cause the store into reg 999 to be dead, and
+     flow will attempt to delete an epilogue insn.  In this case, we
+     need an unspec use/set of the register.  */
+
+  for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
+    if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+      {
+	if (!epiloguep || call_used_regs [i])
+	  clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
+					     gen_rtx_REG (V4SImode, i));
+	else
+	  {
+	    rtx reg = gen_rtx_REG (V4SImode, i);
+
+	    clobs[nclobs++]
+	      = gen_rtx_SET (VOIDmode,
+			     reg,
+			     gen_rtx_UNSPEC (V4SImode,
+					     gen_rtvec (1, reg), 27));
+	  }
+      }
+
+  insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
+
+  for (i = 0; i < nclobs; ++i)
+    XVECEXP (insn, 0, i) = clobs[i];
+
+  return insn;
+}
+
+/* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
+   Save REGNO into [FRAME_REG + OFFSET] in mode MODE.  */
+
+static void
+emit_frame_save (rtx frame_reg, rtx frame_ptr, enum machine_mode mode,
+		 unsigned int regno, int offset, HOST_WIDE_INT total_size)
+{
+  rtx reg, offset_rtx, insn, mem, addr, int_rtx;
+  rtx replacea, replaceb;
+
+  int_rtx = GEN_INT (offset);
+
+  /* Some cases that need register indexed addressing.  */
+  if ((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
+      || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
+      || (TARGET_E500_DOUBLE && mode == DFmode)
+      || (TARGET_SPE_ABI
+	  && SPE_VECTOR_MODE (mode)
+	  && !SPE_CONST_OFFSET_OK (offset)))
+    {
+      /* Whomever calls us must make sure r11 is available in the
+	 flow path of instructions in the prologue.  */
+      offset_rtx = gen_rtx_REG (Pmode, 11);
+      emit_move_insn (offset_rtx, int_rtx);
+
+      replacea = offset_rtx;
+      replaceb = int_rtx;
+    }
+  else
+    {
+      offset_rtx = int_rtx;
+      replacea = NULL_RTX;
+      replaceb = NULL_RTX;
+    }
+
+  reg = gen_rtx_REG (mode, regno);
+  addr = gen_rtx_PLUS (Pmode, frame_reg, offset_rtx);
+  mem = gen_frame_mem (mode, addr);
+
+  insn = emit_move_insn (mem, reg);
+
+  rs6000_frame_related (insn, frame_ptr, total_size, replacea, replaceb);
+}
+
+/* Emit an offset memory reference suitable for a frame store, while
+   converting to a valid addressing mode.  */
+
+static rtx
+gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset)
+{
+  rtx int_rtx, offset_rtx;
+
+  int_rtx = GEN_INT (offset);
+
+  if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode))
+      || (TARGET_E500_DOUBLE && mode == DFmode))
+    {
+      offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
+      emit_move_insn (offset_rtx, int_rtx);
+    }
+  else
+    offset_rtx = int_rtx;
+
+  return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
+}
+
+/* Look for user-defined global regs.  We should not save and restore these,
+   and cannot use stmw/lmw if there are any in its range.  */
+
+static bool
+no_global_regs_above (int first, bool gpr)
+{
+  int i;
+  int last = gpr ? 32 : 64;
+  for (i = first; i < last; i++)
+    if (global_regs[i])
+      return false;
+  return true;
+}
+
+#ifndef TARGET_FIX_AND_CONTINUE
+#define TARGET_FIX_AND_CONTINUE 0
+#endif
+
+/* It's really GPR 13 and FPR 14, but we need the smaller of the two.  */
+#define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
+#define LAST_SAVRES_REGISTER 31
+#define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
+
+static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][8];
+
+/* Temporary holding space for an out-of-line register save/restore
+   routine name.  */
+static char savres_routine_name[30];
+
+/* Return the name for an out-of-line register save/restore routine.
+   We are saving/restoring GPRs if GPR is true.  */
+
+static char *
+rs6000_savres_routine_name (rs6000_stack_t *info, int regno,
+			    bool savep, bool gpr, bool lr)
+{
+  const char *prefix = "";
+  const char *suffix = "";
+
+  /* Different targets are supposed to define
+     {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
+     routine name could be defined with:
+
+     sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
+
+     This is a nice idea in practice, but in reality, things are
+     complicated in several ways:
+
+     - ELF targets have save/restore routines for GPRs.
+
+     - SPE targets use different prefixes for 32/64-bit registers, and
+       neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
+
+     - PPC64 ELF targets have routines for save/restore of GPRs that
+       differ in what they do with the link register, so having a set
+       prefix doesn't work.  (We only use one of the save routines at
+       the moment, though.)
+
+     - PPC32 elf targets have "exit" versions of the restore routines
+       that restore the link register and can save some extra space.
+       These require an extra suffix.  (There are also "tail" versions
+       of the restore routines and "GOT" versions of the save routines,
+       but we don't generate those at present.  Same problems apply,
+       though.)
+
+     We deal with all this by synthesizing our own prefix/suffix and
+     using that for the simple sprintf call shown above.  */
+  if (TARGET_SPE)
+    {
+      /* No floating point saves on the SPE.  */
+      gcc_assert (gpr);
+
+      if (savep)
+	prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
+      else
+	prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
+
+      if (lr)
+	suffix = "_x";
+    }
+  else if (DEFAULT_ABI == ABI_V4)
+    {
+      if (TARGET_64BIT)
+	goto aix_names;
+
+      if (gpr)
+	prefix = savep ? "_savegpr_" : "_restgpr_";
+      else
+	prefix = savep ? "_savefpr_" : "_restfpr_";
+
+      if (lr)
+	suffix = "_x";
+    }
+  else if (DEFAULT_ABI == ABI_AIX)
+    {
+#ifndef POWERPC_LINUX
+      /* No out-of-line save/restore routines for GPRs on AIX.  */
+      gcc_assert (!TARGET_AIX || !gpr);
+#endif
+
+    aix_names:
+      if (gpr)
+	prefix = (savep
+		  ? (lr ? "_savegpr0_" : "_savegpr1_")
+		  : (lr ? "_restgpr0_" : "_restgpr1_"));
+#ifdef POWERPC_LINUX
+      else if (lr)
+	prefix = (savep ? "_savefpr_" : "_restfpr_");
+#endif
+      else
+	{
+	  prefix = savep ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
+	  suffix = savep ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
+	}
+    }
+  else if (DEFAULT_ABI == ABI_DARWIN)
+    sorry ("out-of-line save/restore routines not supported on Darwin");
+
+  sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
+
+  return savres_routine_name;
+}
+
+/* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
+   We are saving/restoring GPRs if GPR is true.  */
+
+static rtx
+rs6000_savres_routine_sym (rs6000_stack_t *info, bool savep,
+			   bool gpr, bool lr)
+{
+  int regno = gpr ? info->first_gp_reg_save : (info->first_fp_reg_save - 32);
+  rtx sym;
+  int select = ((savep ? 1 : 0) << 2
+		| ((TARGET_SPE_ABI
+		    /* On the SPE, we never have any FPRs, but we do have
+		       32/64-bit versions of the routines.  */
+		    ? (info->spe_64bit_regs_used ? 1 : 0)
+		    : (gpr ? 1 : 0)) << 1)
+		| (lr ? 1: 0));
+
+  /* Don't generate bogus routine names.  */
+  gcc_assert (FIRST_SAVRES_REGISTER <= regno
+	      && regno <= LAST_SAVRES_REGISTER);
+
+  sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
+
+  if (sym == NULL)
+    {
+      char *name;
+
+      name = rs6000_savres_routine_name (info, regno, savep, gpr, lr);
+
+      sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
+	= gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+      SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
+    }
+
+  return sym;
+}
+
+/* Emit a sequence of insns, including a stack tie if needed, for
+   resetting the stack pointer.  If SAVRES is true, then don't reset the
+   stack pointer, but move the base of the frame into r11 for use by
+   out-of-line register restore routines.  */
+
+static rtx
+rs6000_emit_stack_reset (rs6000_stack_t *info,
+			 rtx sp_reg_rtx, rtx frame_reg_rtx,
+			 int sp_offset, bool savres)
+{
+  /* This blockage is needed so that sched doesn't decide to move
+     the sp change before the register restores.  */
+  if (DEFAULT_ABI == ABI_V4
+      || (TARGET_SPE_ABI
+	  && info->spe_64bit_regs_used != 0
+	  && info->first_gp_reg_save != 32))
+    rs6000_emit_stack_tie ();
+  
+  if (frame_reg_rtx != sp_reg_rtx)
+    {
+      if (sp_offset != 0)
+	{
+	  rtx dest_reg = savres ? gen_rtx_REG (Pmode, 11) : sp_reg_rtx;
+	  return emit_insn (gen_add3_insn (dest_reg, frame_reg_rtx,
+					   GEN_INT (sp_offset)));
+	}
+      else if (!savres)
+	return emit_move_insn (sp_reg_rtx, frame_reg_rtx);
+    }
+  else if (sp_offset != 0)
+    {
+      /* If we are restoring registers out-of-line, we will be using the
+	 "exit" variants of the restore routines, which will reset the
+	 stack for us.	But we do need to point r11 into the right place
+	 for those routines.  */
+      rtx dest_reg = (savres
+		      ? gen_rtx_REG (Pmode, 11)
+		      : sp_reg_rtx);
+
+      rtx insn = emit_insn (gen_add3_insn (dest_reg, sp_reg_rtx,
+					   GEN_INT (sp_offset)));
+      if (!savres)
+	return insn;
+    }
+  return NULL_RTX;
+}
+
+/* Construct a parallel rtx describing the effect of a call to an
+   out-of-line register save/restore routine.  */
+
+static rtx
+rs6000_make_savres_rtx (rs6000_stack_t *info,
+			rtx frame_reg_rtx, int save_area_offset,
+			enum machine_mode reg_mode,
+			bool savep, bool gpr, bool lr)
+{
+  int i;
+  int offset, start_reg, end_reg, n_regs;
+  int reg_size = GET_MODE_SIZE (reg_mode);
+  rtx sym;
+  rtvec p;
+
+  offset = 0;
+  start_reg = (gpr
+	       ? info->first_gp_reg_save
+	       : info->first_fp_reg_save);
+  end_reg = gpr ? 32 : 64;
+  n_regs = end_reg - start_reg;
+  p = rtvec_alloc ((lr ? 4 : 3) + n_regs);
+
+  if (!savep && lr)
+    RTVEC_ELT (p, offset++) = gen_rtx_RETURN (VOIDmode);
+
+  RTVEC_ELT (p, offset++)
+    = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 65));
+
+  sym = rs6000_savres_routine_sym (info, savep, gpr, lr);
+  RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
+  RTVEC_ELT (p, offset++)
+    = gen_rtx_USE (VOIDmode,
+		   gen_rtx_REG (Pmode, DEFAULT_ABI != ABI_AIX ? 11
+				       : gpr && !lr ? 12
+				       : 1));
+
+  for (i = 0; i < end_reg - start_reg; i++)
+    {
+      rtx addr, reg, mem;
+      reg = gen_rtx_REG (reg_mode, start_reg + i);
+      addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			   GEN_INT (save_area_offset + reg_size*i));
+      mem = gen_frame_mem (reg_mode, addr);
+
+      RTVEC_ELT (p, i + offset) = gen_rtx_SET (VOIDmode,
+					       savep ? mem : reg,
+					       savep ? reg : mem);
+    }
+
+  if (savep && lr)
+    {
+      rtx addr, reg, mem;
+      reg = gen_rtx_REG (Pmode, 0);
+      addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			   GEN_INT (info->lr_save_offset));
+      mem = gen_frame_mem (Pmode, addr);
+      RTVEC_ELT (p, i + offset) = gen_rtx_SET (VOIDmode, mem, reg);
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, p);
+}
+
+/* Determine whether the gp REG is really used.  */
+
+static bool
+rs6000_reg_live_or_pic_offset_p (int reg)
+{
+  /* If the function calls eh_return, claim used all the registers that would
+     be checked for liveness otherwise.  This is required for the PIC offset
+     register with -mminimal-toc on AIX, as it is advertised as "fixed" for
+     register allocation purposes in this case.  */
+
+  return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
+           && (!call_used_regs[reg]
+               || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
+		   && !TARGET_SINGLE_PIC_BASE
+                   && TARGET_TOC && TARGET_MINIMAL_TOC)))
+          || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
+	      && !TARGET_SINGLE_PIC_BASE
+              && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
+                  || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
+}
+
+/* Emit function prologue as insns.  */
+
+void
+rs6000_emit_prologue (void)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+  enum machine_mode reg_mode = Pmode;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx frame_ptr_rtx = gen_rtx_REG (Pmode, 12);
+  rtx frame_reg_rtx = sp_reg_rtx;
+  rtx cr_save_rtx = NULL_RTX;
+  rtx insn;
+  int strategy;
+  int saving_FPRs_inline;
+  int saving_GPRs_inline;
+  int using_store_multiple;
+  int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
+                              && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
+			      && call_used_regs[STATIC_CHAIN_REGNUM]);
+  HOST_WIDE_INT sp_offset = 0;
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = info->total_size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && info->total_size)
+    rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, info->total_size);
+
+  if (TARGET_FIX_AND_CONTINUE)
+    {
+      /* gdb on darwin arranges to forward a function from the old
+	 address by modifying the first 5 instructions of the function
+	 to branch to the overriding function.  This is necessary to
+	 permit function pointers that point to the old function to
+	 actually forward to the new function.  */
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+      emit_insn (gen_nop ());
+    }
+
+  if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
+    {
+      reg_mode = V2SImode;
+      reg_size = 8;
+    }
+
+  strategy = info->savres_strategy;
+  using_store_multiple = strategy & SAVRES_MULTIPLE;
+  saving_FPRs_inline = strategy & SAVE_INLINE_FPRS;
+  saving_GPRs_inline = strategy & SAVE_INLINE_GPRS;
+
+  /* For V.4, update stack before we do any saving and set back pointer.  */
+  if (! WORLD_SAVE_P (info)
+      && info->push_p
+      && (DEFAULT_ABI == ABI_V4
+	  || crtl->calls_eh_return))
+    {
+      bool need_r11 = (TARGET_SPE
+		       ? (!saving_GPRs_inline
+			  && info->spe_64bit_regs_used == 0)
+		       : (!saving_FPRs_inline || !saving_GPRs_inline));
+      rtx copy_reg = need_r11 ? gen_rtx_REG (Pmode, 11) : NULL;
+
+      if (info->total_size < 32767)
+	sp_offset = info->total_size;
+      else if (need_r11)
+	frame_reg_rtx = copy_reg;
+      else if (info->cr_save_p
+	       || info->lr_save_p
+	       || info->first_fp_reg_save < 64
+	       || info->first_gp_reg_save < 32
+	       || info->altivec_size != 0
+	       || info->vrsave_mask != 0
+	       || crtl->calls_eh_return)
+	{
+	  copy_reg = frame_ptr_rtx;
+	  frame_reg_rtx = copy_reg;
+	}
+      else
+	{
+	  /* The prologue won't be saving any regs so there is no need
+	     to set up a frame register to access any frame save area.
+	     We also won't be using sp_offset anywhere below, but set
+	     the correct value anyway to protect against future
+	     changes to this function.  */
+	  sp_offset = info->total_size;
+	}
+      rs6000_emit_allocate_stack (info->total_size, copy_reg);
+      if (frame_reg_rtx != sp_reg_rtx)
+	rs6000_emit_stack_tie ();
+    }
+
+  /* Handle world saves specially here.  */
+  if (WORLD_SAVE_P (info))
+    {
+      int i, j, sz;
+      rtx treg;
+      rtvec p;
+      rtx reg0;
+
+      /* save_world expects lr in r0. */
+      reg0 = gen_rtx_REG (Pmode, 0);
+      if (info->lr_save_p)
+	{
+	  insn = emit_move_insn (reg0,
+				 gen_rtx_REG (Pmode, LR_REGNO));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+
+      /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
+	 assumptions about the offsets of various bits of the stack
+	 frame.  */
+      gcc_assert (info->gp_save_offset == -220
+		  && info->fp_save_offset == -144
+		  && info->lr_save_offset == 8
+		  && info->cr_save_offset == 4
+		  && info->push_p
+		  && info->lr_save_p
+		  && (!crtl->calls_eh_return
+		       || info->ehrd_offset == -432)
+		  && info->vrsave_save_offset == -224
+		  && info->altivec_save_offset == -416);
+
+      treg = gen_rtx_REG (SImode, 11);
+      emit_move_insn (treg, GEN_INT (-info->total_size));
+
+      /* SAVE_WORLD takes the caller's LR in R0 and the frame size
+	 in R11.  It also clobbers R12, so beware!  */
+
+      /* Preserve CR2 for save_world prologues */
+      sz = 5;
+      sz += 32 - info->first_gp_reg_save;
+      sz += 64 - info->first_fp_reg_save;
+      sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
+      p = rtvec_alloc (sz);
+      j = 0;
+      RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
+					    gen_rtx_REG (SImode,
+							 LR_REGNO));
+      RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+					gen_rtx_SYMBOL_REF (Pmode,
+							    "*save_world"));
+      /* We do floats first so that the instruction pattern matches
+	 properly.  */
+      for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+	{
+ 	  rtx reg = gen_rtx_REG (((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+				   ? DFmode : SFmode), 
+			         info->first_fp_reg_save + i);
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->fp_save_offset
+					    + sp_offset + 8 * i));
+ 	  rtx mem = gen_frame_mem (((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+				     ? DFmode : SFmode), addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+	}
+      for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
+	{
+	  rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->altivec_save_offset
+					    + sp_offset + 16 * i));
+	  rtx mem = gen_frame_mem (V4SImode, addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+	}
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	{
+	  rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->gp_save_offset
+					    + sp_offset + reg_size * i));
+	  rtx mem = gen_frame_mem (reg_mode, addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+	}
+
+      {
+	/* CR register traditionally saved as CR2.  */
+	rtx reg = gen_rtx_REG (reg_mode, CR2_REGNO);
+	rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				 GEN_INT (info->cr_save_offset
+					  + sp_offset));
+	rtx mem = gen_frame_mem (reg_mode, addr);
+
+	RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
+      }
+      /* Explain about use of R0.  */
+      if (info->lr_save_p)
+	{
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->lr_save_offset
+					    + sp_offset));
+	  rtx mem = gen_frame_mem (reg_mode, addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg0);
+	}
+      /* Explain what happens to the stack pointer.  */
+      {
+	rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
+	RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
+      }
+
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+			    treg, GEN_INT (-info->total_size));
+      sp_offset = info->total_size;
+    }
+
+  /* If we use the link register, get it into r0.  */
+  if (!WORLD_SAVE_P (info) && info->lr_save_p)
+    {
+      rtx addr, reg, mem;
+
+      insn = emit_move_insn (gen_rtx_REG (Pmode, 0),
+			     gen_rtx_REG (Pmode, LR_REGNO));
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
+			| SAVE_NOINLINE_FPRS_SAVES_LR)))
+	{
+	  addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->lr_save_offset + sp_offset));
+	  reg = gen_rtx_REG (Pmode, 0);
+	  mem = gen_rtx_MEM (Pmode, addr);
+	  /* This should not be of rs6000_sr_alias_set, because of
+	     __builtin_return_address.  */
+
+	  insn = emit_move_insn (mem, reg);
+	  rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+				NULL_RTX, NULL_RTX);
+	}
+    }
+
+  /* If we need to save CR, put it into r12 or r11.  */
+  if (!WORLD_SAVE_P (info) && info->cr_save_p && frame_reg_rtx != frame_ptr_rtx)
+    {
+      rtx set;
+
+      cr_save_rtx
+	= gen_rtx_REG (SImode, DEFAULT_ABI == ABI_AIX && !saving_GPRs_inline
+		       ? 11 : 12);
+      insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      /* Now, there's no way that dwarf2out_frame_debug_expr is going
+	 to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
+	 But that's OK.  All we have to do is specify that _one_ condition
+	 code register is saved in this stack slot.  The thrower's epilogue
+	 will then restore all the call-saved registers.
+	 We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux.  */
+      set = gen_rtx_SET (VOIDmode, cr_save_rtx,
+			 gen_rtx_REG (SImode, CR2_REGNO));
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+    }
+
+  /* Do any required saving of fpr's.  If only one or two to save, do
+     it ourselves.  Otherwise, call function.  */
+  if (!WORLD_SAVE_P (info) && saving_FPRs_inline)
+    {
+      int i;
+      for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+	if ((df_regs_ever_live_p (info->first_fp_reg_save+i)
+	     && ! call_used_regs[info->first_fp_reg_save+i]))
+	  emit_frame_save (frame_reg_rtx, frame_ptr_rtx, 
+			   (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			    ? DFmode : SFmode,
+			   info->first_fp_reg_save + i,
+			   info->fp_save_offset + sp_offset + 8 * i,
+			   info->total_size);
+    }
+  else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
+    {
+      rtx par;
+
+      par = rs6000_make_savres_rtx (info, frame_reg_rtx,
+				    info->fp_save_offset + sp_offset,
+				    DFmode,
+				    /*savep=*/true, /*gpr=*/false,
+				    /*lr=*/(strategy
+					    & SAVE_NOINLINE_FPRS_SAVES_LR)
+					   != 0);
+      insn = emit_insn (par);
+      rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+			    NULL_RTX, NULL_RTX);
+    }
+
+  /* Save GPRs.  This is done as a PARALLEL if we are using
+     the store-multiple instructions.  */
+  if (!WORLD_SAVE_P (info)
+      && TARGET_SPE_ABI
+      && info->spe_64bit_regs_used != 0
+      && info->first_gp_reg_save != 32)
+    {
+      int i;
+      rtx spe_save_area_ptr;
+      int save_ptr_to_sp;
+      int ool_adjust = 0;
+
+      /* Determine whether we can address all of the registers that need
+	 to be saved with an offset from frame_reg_rtx that fits in
+	 the small const field for SPE memory instructions.  */
+      int spe_regs_addressable
+	= (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + sp_offset
+				+ reg_size * (32 - info->first_gp_reg_save - 1))
+	   && saving_GPRs_inline);
+      int spe_offset;
+
+      if (spe_regs_addressable)
+	{
+	  spe_save_area_ptr = frame_reg_rtx;
+	  save_ptr_to_sp = info->total_size - sp_offset;
+	  spe_offset = info->spe_gp_save_offset + sp_offset;
+	}
+      else
+	{
+	  /* Make r11 point to the start of the SPE save area.  We need
+	     to be careful here if r11 is holding the static chain.  If
+	     it is, then temporarily save it in r0.  */
+	  int offset;
+
+	  if (!saving_GPRs_inline)
+	    ool_adjust = 8 * (info->first_gp_reg_save
+			      - (FIRST_SAVRES_REGISTER + 1));
+	  offset = info->spe_gp_save_offset + sp_offset - ool_adjust;
+	  spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
+	  save_ptr_to_sp = info->total_size - sp_offset + offset;
+	  spe_offset = 0;
+
+	  if (using_static_chain_p)
+	    {
+	      rtx r0 = gen_rtx_REG (Pmode, 0);
+	      gcc_assert (info->first_gp_reg_save > 11);
+
+	      emit_move_insn (r0, spe_save_area_ptr);
+	    }
+	  emit_insn (gen_addsi3 (spe_save_area_ptr,
+				 frame_reg_rtx, GEN_INT (offset)));
+	  if (REGNO (frame_reg_rtx) == 11)
+	    sp_offset = -info->spe_gp_save_offset + ool_adjust;
+	}
+
+      if (saving_GPRs_inline)
+	{
+	  for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	    if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	      {
+		rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+		rtx offset, addr, mem;
+
+		/* We're doing all this to ensure that the offset fits into
+		   the immediate offset of 'evstdd'.  */
+		gcc_assert (SPE_CONST_OFFSET_OK (reg_size * i + spe_offset));
+
+		offset = GEN_INT (reg_size * i + spe_offset);
+		addr = gen_rtx_PLUS (Pmode, spe_save_area_ptr, offset);
+		mem = gen_rtx_MEM (V2SImode, addr);
+
+		insn = emit_move_insn (mem, reg);
+
+		rs6000_frame_related (insn,
+				      spe_save_area_ptr, save_ptr_to_sp,
+				      NULL_RTX, NULL_RTX);
+	      }
+	}
+      else
+	{
+	  rtx par;
+
+	  par = rs6000_make_savres_rtx (info, spe_save_area_ptr,
+					ool_adjust, reg_mode,
+					/*savep=*/true, /*gpr=*/true,
+					/*lr=*/false);
+	  insn = emit_insn (par);
+	  rs6000_frame_related (insn, spe_save_area_ptr, save_ptr_to_sp,
+				NULL_RTX, NULL_RTX);
+	}
+
+      /* Move the static chain pointer back.  */
+      if (using_static_chain_p && !spe_regs_addressable)
+	emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
+    }
+  else if (!WORLD_SAVE_P (info) && !saving_GPRs_inline)
+    {
+      rtx par;
+
+      /* Need to adjust r11 (r12) if we saved any FPRs.  */
+      if (info->first_fp_reg_save != 64)
+        {
+	  rtx dest_reg = gen_rtx_REG (Pmode, DEFAULT_ABI == ABI_AIX ? 12 : 11);
+	  int save_off = 8 * (64 - info->first_fp_reg_save);
+	  rtx offset = GEN_INT (sp_offset - save_off);
+
+	  if (REGNO (dest_reg) == REGNO (frame_reg_rtx))
+	    sp_offset = save_off;
+	  emit_insn (gen_add3_insn (dest_reg, frame_reg_rtx, offset));
+        }
+
+      par = rs6000_make_savres_rtx (info, frame_reg_rtx,
+				    info->gp_save_offset + sp_offset,
+				    reg_mode,
+				    /*savep=*/true, /*gpr=*/true,
+				    /*lr=*/(strategy
+					    & SAVE_NOINLINE_GPRS_SAVES_LR)
+					   != 0);
+      insn = emit_insn (par);
+      rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+			    NULL_RTX, NULL_RTX);
+    }
+  else if (!WORLD_SAVE_P (info) && using_store_multiple)
+    {
+      rtvec p;
+      int i;
+      p = rtvec_alloc (32 - info->first_gp_reg_save);
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	{
+	  rtx addr, reg, mem;
+	  reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+	  addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->gp_save_offset
+					+ sp_offset
+					+ reg_size * i));
+	  mem = gen_frame_mem (reg_mode, addr);
+
+	  RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, reg);
+	}
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+			    NULL_RTX, NULL_RTX);
+    }
+  else if (!WORLD_SAVE_P (info))
+    {
+      int i;
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+          {
+            rtx addr, reg, mem;
+            reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+
+            addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+                                 GEN_INT (info->gp_save_offset
+                                          + sp_offset
+                                          + reg_size * i));
+            mem = gen_frame_mem (reg_mode, addr);
+
+            insn = emit_move_insn (mem, reg);
+            rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+                                  NULL_RTX, NULL_RTX);
+          }
+    }
+
+  /* ??? There's no need to emit actual instructions here, but it's the
+     easiest way to get the frame unwind information emitted.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i, regno;
+
+      for (i = 0; ; ++i)
+	{
+	  regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+
+	  emit_frame_save (frame_reg_rtx, frame_ptr_rtx, reg_mode, regno,
+			   info->ehrd_offset + sp_offset
+			   + reg_size * (int) i,
+			   info->total_size);
+	}
+    }
+
+  /* In AIX ABI we need to make sure r2 is really saved.  */
+  if (TARGET_AIX && crtl->calls_eh_return)
+    {
+      rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
+      long toc_restore_insn;
+
+      gcc_assert (frame_reg_rtx == frame_ptr_rtx
+		  || frame_reg_rtx == sp_reg_rtx);
+      tmp_reg = gen_rtx_REG (Pmode, 11);
+      tmp_reg_si = gen_rtx_REG (SImode, 11);
+      if (using_static_chain_p)
+	emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
+      gcc_assert (saving_GPRs_inline && saving_FPRs_inline);
+      emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
+      /* Peek at instruction to which this function returns.  If it's
+	 restoring r2, then we know we've already saved r2.  We can't
+	 unconditionally save r2 because the value we have will already
+	 be updated if we arrived at this function via a plt call or
+	 toc adjusting stub.  */
+      emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
+      toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
+      hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
+      emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
+      compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
+      validate_condition_mode (EQ, CCUNSmode);
+      lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
+      emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+			      gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
+      toc_save_done = gen_label_rtx ();
+      jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
+				   gen_rtx_EQ (VOIDmode, compare_result,
+					       const0_rtx),
+				   gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
+				   pc_rtx);
+      jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
+      JUMP_LABEL (jump) = toc_save_done;
+      LABEL_NUSES (toc_save_done) += 1;
+
+      emit_frame_save (frame_reg_rtx, frame_ptr_rtx, reg_mode, 2,
+		       sp_offset + 5 * reg_size, info->total_size);
+      emit_label (toc_save_done);
+      if (using_static_chain_p)
+	emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
+    }
+
+  /* Save CR if we use any that must be preserved.  */
+  if (!WORLD_SAVE_P (info) && info->cr_save_p)
+    {
+      rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->cr_save_offset + sp_offset));
+      rtx mem = gen_frame_mem (SImode, addr);
+      /* See the large comment above about why CR2_REGNO is used.  */
+      rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
+
+      /* If r12 was used to hold the original sp, copy cr into r0 now
+	 that it's free.  */
+      if (REGNO (frame_reg_rtx) == 12)
+	{
+	  rtx set;
+
+	  cr_save_rtx = gen_rtx_REG (SImode, 0);
+	  insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	}
+      insn = emit_move_insn (mem, cr_save_rtx);
+
+      rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+			    NULL_RTX, NULL_RTX);
+    }
+
+  /* Update stack and set back pointer unless this is V.4,
+     for which it was done previously.  */
+  if (!WORLD_SAVE_P (info) && info->push_p
+      && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
+    {
+      rtx copy_reg = NULL;
+
+      if (info->total_size < 32767)
+	sp_offset = info->total_size;
+      else if (info->altivec_size != 0
+	       || info->vrsave_mask != 0)
+	{
+	  copy_reg = frame_ptr_rtx;
+	  frame_reg_rtx = copy_reg;
+	}
+      else
+	sp_offset = info->total_size;
+      rs6000_emit_allocate_stack (info->total_size, copy_reg);
+      if (frame_reg_rtx != sp_reg_rtx)
+	rs6000_emit_stack_tie ();
+    }
+
+  /* Set frame pointer, if needed.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
+			     sp_reg_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Save AltiVec registers if needed.  Save here because the red zone does
+     not include AltiVec registers.  */
+  if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI && info->altivec_size != 0)
+    {
+      int i;
+
+      /* There should be a non inline version of this, for when we
+         are saving lots of vector registers.  */
+      for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+        if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+          {
+            rtx areg, savereg, mem;
+            int offset;
+
+            offset = info->altivec_save_offset + sp_offset
+              + 16 * (i - info->first_altivec_reg_save);
+
+            savereg = gen_rtx_REG (V4SImode, i);
+
+            areg = gen_rtx_REG (Pmode, 0);
+            emit_move_insn (areg, GEN_INT (offset));
+
+            /* AltiVec addressing mode is [reg+reg].  */
+            mem = gen_frame_mem (V4SImode,
+                                 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
+
+            insn = emit_move_insn (mem, savereg);
+
+            rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
+                                  areg, GEN_INT (offset));
+          }
+    }
+
+  /* VRSAVE is a bit vector representing which AltiVec registers
+     are used.  The OS uses this to determine which vector
+     registers to save on a context switch.  We need to save
+     VRSAVE on the stack frame, add whatever AltiVec registers we
+     used in this function, and do the corresponding magic in the
+     epilogue.  */
+
+  if (TARGET_ALTIVEC && TARGET_ALTIVEC_VRSAVE
+      && info->vrsave_mask != 0)
+    {
+      rtx reg, mem, vrsave;
+      int offset;
+
+      /* Get VRSAVE onto a GPR.  Note that ABI_V4 might be using r12
+         as frame_reg_rtx and r11 as the static chain pointer for
+         nested functions.  */
+      reg = gen_rtx_REG (SImode, 0);
+      vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
+      if (TARGET_MACHO)
+        emit_insn (gen_get_vrsave_internal (reg));
+      else
+        emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
+
+      if (!WORLD_SAVE_P (info))
+        {
+          /* Save VRSAVE.  */
+          offset = info->vrsave_save_offset + sp_offset;
+          mem = gen_frame_mem (SImode,
+                               gen_rtx_PLUS (Pmode, frame_reg_rtx,
+                                             GEN_INT (offset)));
+          insn = emit_move_insn (mem, reg);
+        }
+
+      /* Include the registers in the mask.  */
+      emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
+
+      insn = emit_insn (generate_set_vrsave (reg, info, 0));
+    }
+
+  if (TARGET_SINGLE_PIC_BASE)
+    return; /* Do not set PIC register */
+
+  /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up.  */
+  if ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+      || (DEFAULT_ABI == ABI_V4
+	  && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
+	  && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)))
+    {
+      /* If emit_load_toc_table will use the link register, we need to save
+	 it.  We use R12 for this purpose because emit_load_toc_table
+	 can use register 0.  This allows us to use a plain 'blr' to return
+	 from the procedure more often.  */
+      int save_LR_around_toc_setup = (TARGET_ELF
+				      && DEFAULT_ABI != ABI_AIX
+				      && flag_pic
+				      && ! info->lr_save_p
+				      && EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
+      if (save_LR_around_toc_setup)
+	{
+	  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+
+	  insn = emit_move_insn (frame_ptr_rtx, lr);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  rs6000_emit_load_toc_table (TRUE);
+
+	  insn = emit_move_insn (lr, frame_ptr_rtx);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else
+	rs6000_emit_load_toc_table (TRUE);
+    }
+
+#if TARGET_MACHO
+  if (DEFAULT_ABI == ABI_DARWIN
+      && flag_pic && crtl->uses_pic_offset_table)
+    {
+      rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+      rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
+
+      /* Save and restore LR locally around this call (in R0).  */
+      if (!info->lr_save_p)
+	emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
+
+      emit_insn (gen_load_macho_picbase (src));
+
+      emit_move_insn (gen_rtx_REG (Pmode,
+				   RS6000_PIC_OFFSET_TABLE_REGNUM),
+		      lr);
+
+      if (!info->lr_save_p)
+	emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
+    }
+#endif
+}
+
+/* Write function prologue.  */
+
+static void
+rs6000_output_function_prologue (FILE *file,
+				 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+
+  if (TARGET_DEBUG_STACK)
+    debug_stack_info (info);
+
+  /* Write .extern for any function we will call to save and restore
+     fp values.  */
+  if (info->first_fp_reg_save < 64)
+    {
+      char *name;
+      int regno = info->first_fp_reg_save - 32;
+
+      if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
+	{
+	  name = rs6000_savres_routine_name (info, regno, /*savep=*/true,
+					     /*gpr=*/false, /*lr=*/false);
+	  fprintf (file, "\t.extern %s\n", name);
+	}
+      if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
+	{
+	  name = rs6000_savres_routine_name (info, regno, /*savep=*/false,
+					     /*gpr=*/false, /*lr=*/true);
+	  fprintf (file, "\t.extern %s\n", name);
+	}
+    }
+
+  /* Write .extern for AIX common mode routines, if needed.  */
+  if (! TARGET_POWER && ! TARGET_POWERPC && ! common_mode_defined)
+    {
+      fputs ("\t.extern __mulh\n", file);
+      fputs ("\t.extern __mull\n", file);
+      fputs ("\t.extern __divss\n", file);
+      fputs ("\t.extern __divus\n", file);
+      fputs ("\t.extern __quoss\n", file);
+      fputs ("\t.extern __quous\n", file);
+      common_mode_defined = 1;
+    }
+
+  if (! HAVE_prologue)
+    {
+      rtx prologue;
+
+      start_sequence ();
+
+      /* A NOTE_INSN_DELETED is supposed to be at the start and end of
+	 the "toplevel" insn chain.  */
+      emit_note (NOTE_INSN_DELETED);
+      rs6000_emit_prologue ();
+      emit_note (NOTE_INSN_DELETED);
+
+      /* Expand INSN_ADDRESSES so final() doesn't crash.  */
+      {
+	rtx insn;
+	unsigned addr = 0;
+	for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
+	  {
+	    INSN_ADDRESSES_NEW (insn, addr);
+	    addr += 4;
+	  }
+      }
+
+      prologue = get_insns ();
+      end_sequence ();
+
+      if (TARGET_DEBUG_STACK)
+	debug_rtx_list (prologue, 100);
+
+      emit_insn_before_noloc (prologue, BB_HEAD (ENTRY_BLOCK_PTR->next_bb),
+			      ENTRY_BLOCK_PTR);
+    }
+
+  rs6000_pic_labelno++;
+}
+
+/* Non-zero if vmx regs are restored before the frame pop, zero if
+   we restore after the pop when possible.  */
+#define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
+
+/* Reload CR from REG.  */
+
+static void
+rs6000_restore_saved_cr (rtx reg, int using_mfcr_multiple)
+{
+  int count = 0;
+  int i;
+
+  if (using_mfcr_multiple)
+    {
+      for (i = 0; i < 8; i++)
+	if (df_regs_ever_live_p (CR0_REGNO+i) && ! call_used_regs[CR0_REGNO+i])
+	  count++;
+      gcc_assert (count);
+    }
+
+  if (using_mfcr_multiple && count > 1)
+    {
+      rtvec p;
+      int ndx;
+
+      p = rtvec_alloc (count);
+
+      ndx = 0;
+      for (i = 0; i < 8; i++)
+	if (df_regs_ever_live_p (CR0_REGNO+i) && ! call_used_regs[CR0_REGNO+i])
+	  {
+	    rtvec r = rtvec_alloc (2);
+	    RTVEC_ELT (r, 0) = reg;
+	    RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
+	    RTVEC_ELT (p, ndx) =
+	      gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO+i),
+			   gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
+	    ndx++;
+	  }
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      gcc_assert (ndx == count);
+    }
+  else
+    for (i = 0; i < 8; i++)
+      if (df_regs_ever_live_p (CR0_REGNO+i) && ! call_used_regs[CR0_REGNO+i])
+	{
+	  emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode,
+						       CR0_REGNO+i),
+					  reg));
+	}
+}
+
+/* Return true if OFFSET from stack pointer can be clobbered by signals.
+   V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
+   below stack pointer not cloberred by signals.  */
+
+static inline bool
+offset_below_red_zone_p (HOST_WIDE_INT offset)
+{
+  return offset < (DEFAULT_ABI == ABI_V4
+		   ? 0
+		   : TARGET_32BIT ? -220 : -288);
+}
+
+/* Emit function epilogue as insns.  */
+
+void
+rs6000_emit_epilogue (int sibcall)
+{
+  rs6000_stack_t *info;
+  int restoring_GPRs_inline;
+  int restoring_FPRs_inline;
+  int using_load_multiple;
+  int using_mtcr_multiple;
+  int use_backchain_to_restore_sp;
+  int restore_lr;
+  int strategy;
+  int sp_offset = 0;
+  rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
+  rtx frame_reg_rtx = sp_reg_rtx;
+  rtx cfa_restores = NULL_RTX;
+  rtx insn;
+  rtx cr_save_reg = NULL_RTX;
+  enum machine_mode reg_mode = Pmode;
+  int reg_size = TARGET_32BIT ? 4 : 8;
+  int i;
+
+  info = rs6000_stack_info ();
+
+  if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
+    {
+      reg_mode = V2SImode;
+      reg_size = 8;
+    }
+
+  strategy = info->savres_strategy;
+  using_load_multiple = strategy & SAVRES_MULTIPLE;
+  restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
+  restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
+  using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
+			 || rs6000_cpu == PROCESSOR_PPC603
+			 || rs6000_cpu == PROCESSOR_PPC750
+			 || optimize_size);
+  /* Restore via the backchain when we have a large frame, since this
+     is more efficient than an addis, addi pair.  The second condition
+     here will not trigger at the moment;  We don't actually need a
+     frame pointer for alloca, but the generic parts of the compiler
+     give us one anyway.  */
+  use_backchain_to_restore_sp = (info->total_size > 32767
+				 || info->total_size
+				     + (info->lr_save_p ? info->lr_save_offset : 0)
+				       > 32767
+				 || (cfun->calls_alloca
+				     && !frame_pointer_needed));
+  restore_lr = (info->lr_save_p
+		&& (restoring_FPRs_inline
+		    || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
+		&& (restoring_GPRs_inline
+		    || info->first_fp_reg_save < 64));
+
+  if (WORLD_SAVE_P (info))
+    {
+      int i, j;
+      char rname[30];
+      const char *alloc_rname;
+      rtvec p;
+
+      /* eh_rest_world_r10 will return to the location saved in the LR
+	 stack slot (which is not likely to be our caller.)
+	 Input: R10 -- stack adjustment.  Clobbers R0, R11, R12, R7, R8.
+	 rest_world is similar, except any R10 parameter is ignored.
+	 The exception-handling stuff that was here in 2.95 is no
+	 longer necessary.  */
+
+      p = rtvec_alloc (9
+		       + 1
+		       + 32 - info->first_gp_reg_save
+		       + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
+		       + 63 + 1 - info->first_fp_reg_save);
+
+      strcpy (rname, ((crtl->calls_eh_return) ?
+		      "*eh_rest_world_r10" : "*rest_world"));
+      alloc_rname = ggc_strdup (rname);
+
+      j = 0;
+      RTVEC_ELT (p, j++) = gen_rtx_RETURN (VOIDmode);
+      RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+					gen_rtx_REG (Pmode,
+						     LR_REGNO));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
+      /* The instruction pattern requires a clobber here;
+	 it is shared with the restVEC helper. */
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
+
+      {
+	/* CR register traditionally saved as CR2.  */
+	rtx reg = gen_rtx_REG (reg_mode, CR2_REGNO);
+	rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				 GEN_INT (info->cr_save_offset));
+	rtx mem = gen_frame_mem (reg_mode, addr);
+
+	RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+      }
+
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	{
+	  rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->gp_save_offset
+					    + reg_size * i));
+	  rtx mem = gen_frame_mem (reg_mode, addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+	}
+      for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
+	{
+	  rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->altivec_save_offset
+					    + 16 * i));
+	  rtx mem = gen_frame_mem (V4SImode, addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+	}
+      for (i = 0; info->first_fp_reg_save + i <= 63; i++)
+	{
+ 	  rtx reg = gen_rtx_REG (((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+				   ? DFmode : SFmode), 
+ 			         info->first_fp_reg_save + i);
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->fp_save_offset
+					    + 8 * i));
+ 	  rtx mem = gen_frame_mem (((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+				     ? DFmode : SFmode), addr);
+
+	  RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, reg, mem);
+	}
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
+      RTVEC_ELT (p, j++)
+	= gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+      return;
+    }
+
+  /* frame_reg_rtx + sp_offset points to the top of this stack frame.  */
+  if (info->push_p)
+    sp_offset = info->total_size;
+
+  /* Restore AltiVec registers if we must do so before adjusting the
+     stack.  */
+  if (TARGET_ALTIVEC_ABI
+      && info->altivec_size != 0
+      && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+	  || (DEFAULT_ABI != ABI_V4
+	      && offset_below_red_zone_p (info->altivec_save_offset))))
+    {
+      int i;
+
+      if (use_backchain_to_restore_sp)
+	{
+	  frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+	  emit_move_insn (frame_reg_rtx,
+			  gen_rtx_MEM (Pmode, sp_reg_rtx));
+	  sp_offset = 0;
+	}
+      else if (frame_pointer_needed)
+	frame_reg_rtx = hard_frame_pointer_rtx;
+
+      for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+	  {
+	    rtx addr, areg, mem, reg;
+
+	    areg = gen_rtx_REG (Pmode, 0);
+	    emit_move_insn
+	      (areg, GEN_INT (info->altivec_save_offset
+			      + sp_offset
+			      + 16 * (i - info->first_altivec_reg_save)));
+
+	    /* AltiVec addressing mode is [reg+reg].  */
+	    addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
+	    mem = gen_frame_mem (V4SImode, addr);
+
+	    reg = gen_rtx_REG (V4SImode, i);
+	    emit_move_insn (reg, mem);
+	    if (offset_below_red_zone_p (info->altivec_save_offset
+					 + (i - info->first_altivec_reg_save)
+					   * 16))
+	      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					     cfa_restores);
+	  }
+    }
+
+  /* Restore VRSAVE if we must do so before adjusting the stack.  */
+  if (TARGET_ALTIVEC
+      && TARGET_ALTIVEC_VRSAVE
+      && info->vrsave_mask != 0
+      && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+	  || (DEFAULT_ABI != ABI_V4
+	      && offset_below_red_zone_p (info->vrsave_save_offset))))
+    {
+      rtx addr, mem, reg;
+
+      if (frame_reg_rtx == sp_reg_rtx)
+	{
+	  if (use_backchain_to_restore_sp)
+	    {
+	      frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+	      emit_move_insn (frame_reg_rtx,
+			      gen_rtx_MEM (Pmode, sp_reg_rtx));
+	      sp_offset = 0;
+	    }
+	  else if (frame_pointer_needed)
+	    frame_reg_rtx = hard_frame_pointer_rtx;
+	}
+
+      addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			   GEN_INT (info->vrsave_save_offset + sp_offset));
+      mem = gen_frame_mem (SImode, addr);
+      reg = gen_rtx_REG (SImode, 12);
+      emit_move_insn (reg, mem);
+
+      emit_insn (generate_set_vrsave (reg, info, 1));
+    }
+
+  insn = NULL_RTX;
+  /* If we have a large stack frame, restore the old stack pointer
+     using the backchain.  */
+  if (use_backchain_to_restore_sp)
+    {
+      if (frame_reg_rtx == sp_reg_rtx)
+	{
+	  /* Under V.4, don't reset the stack pointer until after we're done
+	     loading the saved registers.  */
+	  if (DEFAULT_ABI == ABI_V4)
+	    frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+
+	  insn = emit_move_insn (frame_reg_rtx,
+				 gen_rtx_MEM (Pmode, sp_reg_rtx));
+	  sp_offset = 0;
+	}
+      else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+	       && DEFAULT_ABI == ABI_V4)
+	/* frame_reg_rtx has been set up by the altivec restore.  */
+	;
+      else
+	{
+	  insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
+	  frame_reg_rtx = sp_reg_rtx;
+	}
+    }
+  /* If we have a frame pointer, we can restore the old stack pointer
+     from it.  */
+  else if (frame_pointer_needed)
+    {
+      frame_reg_rtx = sp_reg_rtx;
+      if (DEFAULT_ABI == ABI_V4)
+	frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+      /* Prevent reordering memory accesses against stack pointer restore.  */
+      else if (cfun->calls_alloca
+	       || offset_below_red_zone_p (-info->total_size))
+	{
+	  rtx mem1 = gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx);
+	  rtx mem2 = gen_rtx_MEM (BLKmode, sp_reg_rtx);
+	  MEM_NOTRAP_P (mem1) = 1;
+	  MEM_NOTRAP_P (mem2) = 1;
+	  emit_insn (gen_frame_tie (mem1, mem2));
+	}
+
+      insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
+				       GEN_INT (info->total_size)));
+      sp_offset = 0;
+    }
+  else if (info->push_p
+	   && DEFAULT_ABI != ABI_V4
+	   && !crtl->calls_eh_return)
+    {
+      /* Prevent reordering memory accesses against stack pointer restore.  */
+      if (cfun->calls_alloca
+	  || offset_below_red_zone_p (-info->total_size))
+	{
+	  rtx mem = gen_rtx_MEM (BLKmode, sp_reg_rtx);
+	  MEM_NOTRAP_P (mem) = 1;
+	  emit_insn (gen_stack_tie (mem));
+	}
+      insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
+				       GEN_INT (info->total_size)));
+      sp_offset = 0;
+    }
+  if (insn && frame_reg_rtx == sp_reg_rtx)
+    {
+      if (cfa_restores)
+	{
+	  REG_NOTES (insn) = cfa_restores;
+	  cfa_restores = NULL_RTX;
+	}
+      add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Restore AltiVec registers if we have not done so already.  */
+  if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+      && TARGET_ALTIVEC_ABI
+      && info->altivec_size != 0
+      && (DEFAULT_ABI == ABI_V4
+	  || !offset_below_red_zone_p (info->altivec_save_offset)))
+    {
+      int i;
+
+      for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
+	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
+	  {
+	    rtx addr, areg, mem, reg;
+
+	    areg = gen_rtx_REG (Pmode, 0);
+	    emit_move_insn
+	      (areg, GEN_INT (info->altivec_save_offset
+			      + sp_offset
+			      + 16 * (i - info->first_altivec_reg_save)));
+
+	    /* AltiVec addressing mode is [reg+reg].  */
+	    addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
+	    mem = gen_frame_mem (V4SImode, addr);
+
+	    reg = gen_rtx_REG (V4SImode, i);
+	    emit_move_insn (reg, mem);
+	    if (DEFAULT_ABI == ABI_V4)
+	      cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					     cfa_restores);
+	  }
+    }
+
+  /* Restore VRSAVE if we have not done so already.  */
+  if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
+      && TARGET_ALTIVEC
+      && TARGET_ALTIVEC_VRSAVE
+      && info->vrsave_mask != 0
+      && (DEFAULT_ABI == ABI_V4
+	  || !offset_below_red_zone_p (info->vrsave_save_offset)))
+    {
+      rtx addr, mem, reg;
+
+      addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			   GEN_INT (info->vrsave_save_offset + sp_offset));
+      mem = gen_frame_mem (SImode, addr);
+      reg = gen_rtx_REG (SImode, 12);
+      emit_move_insn (reg, mem);
+
+      emit_insn (generate_set_vrsave (reg, info, 1));
+    }
+
+  /* Get the old lr if we saved it.  If we are restoring registers
+     out-of-line, then the out-of-line routines can do this for us.  */
+  if (restore_lr && restoring_GPRs_inline)
+    {
+      rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx,
+				      info->lr_save_offset + sp_offset);
+
+      emit_move_insn (gen_rtx_REG (Pmode, 0), mem);
+    }
+
+  /* Get the old cr if we saved it.  */
+  if (info->cr_save_p)
+    {
+      rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->cr_save_offset + sp_offset));
+      rtx mem = gen_frame_mem (SImode, addr);
+
+      cr_save_reg = gen_rtx_REG (SImode,
+				 DEFAULT_ABI == ABI_AIX
+				 && !restoring_GPRs_inline
+				 && info->first_fp_reg_save < 64
+				 ? 11 : 12);
+      emit_move_insn (cr_save_reg, mem);
+    }
+
+  /* Set LR here to try to overlap restores below.  LR is always saved
+     above incoming stack, so it never needs REG_CFA_RESTORE.  */
+  if (restore_lr && restoring_GPRs_inline)
+    emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO),
+		    gen_rtx_REG (Pmode, 0));
+
+  /* Load exception handler data registers, if needed.  */
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i, regno;
+
+      if (TARGET_AIX)
+	{
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (sp_offset + 5 * reg_size));
+	  rtx mem = gen_frame_mem (reg_mode, addr);
+
+	  emit_move_insn (gen_rtx_REG (reg_mode, 2), mem);
+	}
+
+      for (i = 0; ; ++i)
+	{
+	  rtx mem;
+
+	  regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+
+	  mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
+				      info->ehrd_offset + sp_offset
+				      + reg_size * (int) i);
+
+	  emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
+	}
+    }
+
+  /* Restore GPRs.  This is done as a PARALLEL if we are using
+     the load-multiple instructions.  */
+  if (TARGET_SPE_ABI
+      && info->spe_64bit_regs_used != 0
+      && info->first_gp_reg_save != 32)
+    {
+      /* Determine whether we can address all of the registers that need
+	 to be saved with an offset from frame_reg_rtx that fits in
+	 the small const field for SPE memory instructions.  */
+      int spe_regs_addressable
+	= (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + sp_offset
+				+ reg_size * (32 - info->first_gp_reg_save - 1))
+	   && restoring_GPRs_inline);
+      int spe_offset;
+      int ool_adjust = 0;
+
+      if (spe_regs_addressable)
+	spe_offset = info->spe_gp_save_offset + sp_offset;
+      else
+	{
+	  rtx old_frame_reg_rtx = frame_reg_rtx;
+	  /* Make r11 point to the start of the SPE save area.  We worried about
+	     not clobbering it when we were saving registers in the prologue.
+	     There's no need to worry here because the static chain is passed
+	     anew to every function.  */
+
+	  if (!restoring_GPRs_inline)
+	    ool_adjust = 8 * (info->first_gp_reg_save
+			      - (FIRST_SAVRES_REGISTER + 1));
+	  frame_reg_rtx = gen_rtx_REG (Pmode, 11);
+	  emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
+				 GEN_INT (info->spe_gp_save_offset
+					  + sp_offset
+					  - ool_adjust)));
+	  /* Keep the invariant that frame_reg_rtx + sp_offset points
+	     at the top of the stack frame.  */
+	  sp_offset = -info->spe_gp_save_offset + ool_adjust;
+
+	  spe_offset = 0;
+	}
+
+      if (restoring_GPRs_inline)
+	{
+	  for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	    if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	      {
+		rtx offset, addr, mem, reg;
+
+		/* We're doing all this to ensure that the immediate offset
+		   fits into the immediate field of 'evldd'.  */
+		gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
+
+		offset = GEN_INT (spe_offset + reg_size * i);
+		addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
+		mem = gen_rtx_MEM (V2SImode, addr);
+		reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+
+		insn = emit_move_insn (reg, mem);
+		if (DEFAULT_ABI == ABI_V4)
+		  {
+		    if (frame_pointer_needed
+			&& info->first_gp_reg_save + i
+			   == HARD_FRAME_POINTER_REGNUM)
+		      {
+			add_reg_note (insn, REG_CFA_DEF_CFA,
+				      plus_constant (frame_reg_rtx,
+						     sp_offset));
+			RTX_FRAME_RELATED_P (insn) = 1;
+		      }
+
+		    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+						   cfa_restores);
+		  }
+	      }
+	}
+      else
+	{
+	  rtx par;
+
+	  par = rs6000_make_savres_rtx (info, frame_reg_rtx,
+					ool_adjust, reg_mode,
+					/*savep=*/false, /*gpr=*/true,
+					/*lr=*/true);
+	  emit_jump_insn (par);
+	  /* We don't want anybody else emitting things after we jumped
+	     back.  */
+	  return;
+	}
+    }
+  else if (!restoring_GPRs_inline)
+    {
+      /* We are jumping to an out-of-line function.  */
+      bool can_use_exit = info->first_fp_reg_save == 64;
+      rtx par;
+
+      /* Emit stack reset code if we need it.  */
+      if (can_use_exit)
+	rs6000_emit_stack_reset (info, sp_reg_rtx, frame_reg_rtx,
+				 sp_offset, can_use_exit);
+      else
+	{
+	  rtx src_reg = gen_rtx_REG (Pmode, DEFAULT_ABI == ABI_AIX ? 12 : 11);
+
+	  emit_insn (gen_add3_insn (src_reg, frame_reg_rtx,
+				    GEN_INT (sp_offset - info->fp_size)));
+	  if (REGNO (frame_reg_rtx) == REGNO (src_reg))
+	    sp_offset = info->fp_size;
+	}
+
+      par = rs6000_make_savres_rtx (info, frame_reg_rtx,
+				    info->gp_save_offset, reg_mode,
+				    /*savep=*/false, /*gpr=*/true,
+				    /*lr=*/can_use_exit);
+
+      if (can_use_exit)
+	{
+	  if (info->cr_save_p)
+	    {
+	      rs6000_restore_saved_cr (cr_save_reg, using_mtcr_multiple);
+	      if (DEFAULT_ABI == ABI_V4)
+		cfa_restores
+		  = alloc_reg_note (REG_CFA_RESTORE,
+				    gen_rtx_REG (SImode, CR2_REGNO),
+				    cfa_restores);
+	    }
+
+	  emit_jump_insn (par);
+
+	  /* We don't want anybody else emitting things after we jumped
+	     back.  */
+	  return;
+	}
+
+      insn = emit_insn (par);
+      if (DEFAULT_ABI == ABI_V4)
+	{
+	  if (frame_pointer_needed)
+	    {
+	      add_reg_note (insn, REG_CFA_DEF_CFA,
+			    plus_constant (frame_reg_rtx, sp_offset));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+
+	  for (i = info->first_gp_reg_save; i < 32; i++)
+	    cfa_restores
+	      = alloc_reg_note (REG_CFA_RESTORE,
+				gen_rtx_REG (reg_mode, i), cfa_restores);
+	}
+    }
+  else if (using_load_multiple)
+    {
+      rtvec p;
+      p = rtvec_alloc (32 - info->first_gp_reg_save);
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+	{
+	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+				   GEN_INT (info->gp_save_offset
+					    + sp_offset
+					    + reg_size * i));
+	  rtx mem = gen_frame_mem (reg_mode, addr);
+	  rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+
+	  RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, reg, mem);
+	  if (DEFAULT_ABI == ABI_V4)
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					   cfa_restores);
+	}
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      if (DEFAULT_ABI == ABI_V4 && frame_pointer_needed)
+	{
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (frame_reg_rtx, sp_offset));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+  else
+    {
+      for (i = 0; i < 32 - info->first_gp_reg_save; i++)
+        if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
+	  {
+            rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+                                     GEN_INT (info->gp_save_offset
+                                              + sp_offset
+                                              + reg_size * i));
+            rtx mem = gen_frame_mem (reg_mode, addr);
+	    rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
+
+	    insn = emit_move_insn (reg, mem);
+	    if (DEFAULT_ABI == ABI_V4)
+	      {
+	        if (frame_pointer_needed
+		    && info->first_gp_reg_save + i
+		       == HARD_FRAME_POINTER_REGNUM)
+		  {
+		    add_reg_note (insn, REG_CFA_DEF_CFA,
+				  plus_constant (frame_reg_rtx, sp_offset));
+		    RTX_FRAME_RELATED_P (insn) = 1;
+		  }
+
+		cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					       cfa_restores);
+	      }
+          }
+    }
+
+  if (restore_lr && !restoring_GPRs_inline)
+    {
+      rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx,
+				     info->lr_save_offset + sp_offset);
+
+      emit_move_insn (gen_rtx_REG (Pmode, 0), mem);
+      emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO),
+		      gen_rtx_REG (Pmode, 0));
+    }
+
+  /* Restore fpr's if we need to do it without calling a function.  */
+  if (restoring_FPRs_inline)
+    for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+      if ((df_regs_ever_live_p (info->first_fp_reg_save+i)
+	   && ! call_used_regs[info->first_fp_reg_save+i]))
+	{
+	  rtx addr, mem, reg;
+	  addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+			       GEN_INT (info->fp_save_offset
+					+ sp_offset
+					+ 8 * i));
+	  mem = gen_frame_mem (((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+				 ? DFmode : SFmode), addr);
+	  reg = gen_rtx_REG (((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
+			       ? DFmode : SFmode),
+			     info->first_fp_reg_save + i);
+
+ 	  emit_move_insn (reg, mem);
+	  if (DEFAULT_ABI == ABI_V4)
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
+					   cfa_restores);
+	}
+
+  /* If we saved cr, restore it here.  Just those that were used.  */
+  if (info->cr_save_p)
+    {
+      rs6000_restore_saved_cr (cr_save_reg, using_mtcr_multiple);
+      if (DEFAULT_ABI == ABI_V4)
+	cfa_restores
+	  = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (SImode, CR2_REGNO),
+			    cfa_restores);
+    }
+
+  /* If this is V.4, unwind the stack pointer after all of the loads
+     have been done.  */
+  insn = rs6000_emit_stack_reset (info, sp_reg_rtx, frame_reg_rtx,
+				  sp_offset, !restoring_FPRs_inline);
+  if (insn)
+    {
+      if (cfa_restores)
+	{
+	  REG_NOTES (insn) = cfa_restores;
+	  cfa_restores = NULL_RTX;
+	}
+      add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      rtx sa = EH_RETURN_STACKADJ_RTX;
+      emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
+    }
+
+  if (!sibcall)
+    {
+      rtvec p;
+      bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
+      if (! restoring_FPRs_inline)
+	p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
+      else
+	p = rtvec_alloc (2);
+
+      RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode);
+      RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
+			  ? gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 65))
+			  : gen_rtx_CLOBBER (VOIDmode,
+					     gen_rtx_REG (Pmode, 65)));
+
+      /* If we have to restore more than two FP registers, branch to the
+	 restore function.  It will return to our caller.  */
+      if (! restoring_FPRs_inline)
+	{
+	  int i;
+	  rtx sym;
+
+	  sym = rs6000_savres_routine_sym (info,
+					   /*savep=*/false,
+					   /*gpr=*/false,
+					   /*lr=*/lr);
+	  RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
+	  RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
+					  gen_rtx_REG (Pmode,
+						       DEFAULT_ABI == ABI_AIX
+						       ? 1 : 11));
+	  for (i = 0; i < 64 - info->first_fp_reg_save; i++)
+	    {
+	      rtx addr, mem;
+	      addr = gen_rtx_PLUS (Pmode, sp_reg_rtx,
+				   GEN_INT (info->fp_save_offset + 8*i));
+	      mem = gen_frame_mem (DFmode, addr);
+
+	      RTVEC_ELT (p, i+4) =
+		gen_rtx_SET (VOIDmode,
+			     gen_rtx_REG (DFmode, info->first_fp_reg_save + i),
+			     mem);
+	    }
+	}
+
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+    }
+}
+
+/* Write function epilogue.  */
+
+static void
+rs6000_output_function_epilogue (FILE *file,
+				 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (! HAVE_epilogue)
+    {
+      rtx insn = get_last_insn ();
+      /* If the last insn was a BARRIER, we don't have to write anything except
+	 the trace table.  */
+      if (GET_CODE (insn) == NOTE)
+	insn = prev_nonnote_insn (insn);
+      if (insn == 0 ||  GET_CODE (insn) != BARRIER)
+	{
+	  /* This is slightly ugly, but at least we don't have two
+	     copies of the epilogue-emitting code.  */
+	  start_sequence ();
+
+	  /* A NOTE_INSN_DELETED is supposed to be at the start
+	     and end of the "toplevel" insn chain.  */
+	  emit_note (NOTE_INSN_DELETED);
+	  rs6000_emit_epilogue (FALSE);
+	  emit_note (NOTE_INSN_DELETED);
+
+	  /* Expand INSN_ADDRESSES so final() doesn't crash.  */
+	  {
+	    rtx insn;
+	    unsigned addr = 0;
+	    for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
+	      {
+		INSN_ADDRESSES_NEW (insn, addr);
+		addr += 4;
+	      }
+	  }
+
+	  if (TARGET_DEBUG_STACK)
+	    debug_rtx_list (get_insns (), 100);
+	  final (get_insns (), file, FALSE);
+	  end_sequence ();
+	}
+    }
+
+#if TARGET_MACHO
+  macho_branch_islands ();
+  /* Mach-O doesn't support labels at the end of objects, so if
+     it looks like we might want one, insert a NOP.  */
+  {
+    rtx insn = get_last_insn ();
+    while (insn
+	   && NOTE_P (insn)
+	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
+      insn = PREV_INSN (insn);
+    if (insn
+	&& (LABEL_P (insn)
+	    || (NOTE_P (insn)
+		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
+      fputs ("\tnop\n", file);
+  }
+#endif
+
+  /* Output a traceback table here.  See /usr/include/sys/debug.h for info
+     on its format.
+
+     We don't output a traceback table if -finhibit-size-directive was
+     used.  The documentation for -finhibit-size-directive reads
+     ``don't output a @code{.size} assembler directive, or anything
+     else that would cause trouble if the function is split in the
+     middle, and the two halves are placed at locations far apart in
+     memory.''  The traceback table has this property, since it
+     includes the offset from the start of the function to the
+     traceback table itself.
+
+     System V.4 Powerpc's (and the embedded ABI derived from it) use a
+     different traceback table.  */
+  if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
+      && rs6000_traceback != traceback_none && !cfun->is_thunk)
+    {
+      const char *fname = NULL;
+      const char *language_string = lang_hooks.name;
+      int fixed_parms = 0, float_parms = 0, parm_info = 0;
+      int i;
+      int optional_tbtab;
+      rs6000_stack_t *info = rs6000_stack_info ();
+
+      if (rs6000_traceback == traceback_full)
+	optional_tbtab = 1;
+      else if (rs6000_traceback == traceback_part)
+	optional_tbtab = 0;
+      else
+	optional_tbtab = !optimize_size && !TARGET_ELF;
+
+      if (optional_tbtab)
+	{
+	  fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+	  while (*fname == '.')	/* V.4 encodes . in the name */
+	    fname++;
+
+	  /* Need label immediately before tbtab, so we can compute
+	     its offset from the function start.  */
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
+	  ASM_OUTPUT_LABEL (file, fname);
+	}
+
+      /* The .tbtab pseudo-op can only be used for the first eight
+	 expressions, since it can't handle the possibly variable
+	 length fields that follow.  However, if you omit the optional
+	 fields, the assembler outputs zeros for all optional fields
+	 anyways, giving each variable length field is minimum length
+	 (as defined in sys/debug.h).  Thus we can not use the .tbtab
+	 pseudo-op at all.  */
+
+      /* An all-zero word flags the start of the tbtab, for debuggers
+	 that have to find it by searching forward from the entry
+	 point or from the current pc.  */
+      fputs ("\t.long 0\n", file);
+
+      /* Tbtab format type.  Use format type 0.  */
+      fputs ("\t.byte 0,", file);
+
+      /* Language type.  Unfortunately, there does not seem to be any
+	 official way to discover the language being compiled, so we
+	 use language_string.
+	 C is 0.  Fortran is 1.  Pascal is 2.  Ada is 3.  C++ is 9.
+	 Java is 13.  Objective-C is 14.  Objective-C++ isn't assigned
+	 a number, so for now use 9.  LTO isn't assigned a number either,
+	 so for now use 0.  */
+      if (! strcmp (language_string, "GNU C")
+	  || ! strcmp (language_string, "GNU GIMPLE"))
+	i = 0;
+      else if (! strcmp (language_string, "GNU F77")
+	       || ! strcmp (language_string, "GNU Fortran"))
+	i = 1;
+      else if (! strcmp (language_string, "GNU Pascal"))
+	i = 2;
+      else if (! strcmp (language_string, "GNU Ada"))
+	i = 3;
+      else if (! strcmp (language_string, "GNU C++")
+	       || ! strcmp (language_string, "GNU Objective-C++"))
+	i = 9;
+      else if (! strcmp (language_string, "GNU Java"))
+	i = 13;
+      else if (! strcmp (language_string, "GNU Objective-C"))
+	i = 14;
+      else
+	gcc_unreachable ();
+      fprintf (file, "%d,", i);
+
+      /* 8 single bit fields: global linkage (not set for C extern linkage,
+	 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
+	 from start of procedure stored in tbtab, internal function, function
+	 has controlled storage, function has no toc, function uses fp,
+	 function logs/aborts fp operations.  */
+      /* Assume that fp operations are used if any fp reg must be saved.  */
+      fprintf (file, "%d,",
+	       (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
+
+      /* 6 bitfields: function is interrupt handler, name present in
+	 proc table, function calls alloca, on condition directives
+	 (controls stack walks, 3 bits), saves condition reg, saves
+	 link reg.  */
+      /* The `function calls alloca' bit seems to be set whenever reg 31 is
+	 set up as a frame pointer, even when there is no alloca call.  */
+      fprintf (file, "%d,",
+	       ((optional_tbtab << 6)
+		| ((optional_tbtab & frame_pointer_needed) << 5)
+		| (info->cr_save_p << 1)
+		| (info->lr_save_p)));
+
+      /* 3 bitfields: saves backchain, fixup code, number of fpr saved
+	 (6 bits).  */
+      fprintf (file, "%d,",
+	       (info->push_p << 7) | (64 - info->first_fp_reg_save));
+
+      /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits).  */
+      fprintf (file, "%d,", (32 - first_reg_to_save ()));
+
+      if (optional_tbtab)
+	{
+	  /* Compute the parameter info from the function decl argument
+	     list.  */
+	  tree decl;
+	  int next_parm_info_bit = 31;
+
+	  for (decl = DECL_ARGUMENTS (current_function_decl);
+	       decl; decl = DECL_CHAIN (decl))
+	    {
+	      rtx parameter = DECL_INCOMING_RTL (decl);
+	      enum machine_mode mode = GET_MODE (parameter);
+
+	      if (GET_CODE (parameter) == REG)
+		{
+		  if (SCALAR_FLOAT_MODE_P (mode))
+		    {
+		      int bits;
+
+		      float_parms++;
+
+		      switch (mode)
+			{
+			case SFmode:
+			case SDmode:
+			  bits = 0x2;
+			  break;
+
+			case DFmode:
+			case DDmode:
+			case TFmode:
+			case TDmode:
+			  bits = 0x3;
+			  break;
+
+			default:
+			  gcc_unreachable ();
+			}
+
+		      /* If only one bit will fit, don't or in this entry.  */
+		      if (next_parm_info_bit > 0)
+			parm_info |= (bits << (next_parm_info_bit - 1));
+		      next_parm_info_bit -= 2;
+		    }
+		  else
+		    {
+		      fixed_parms += ((GET_MODE_SIZE (mode)
+				       + (UNITS_PER_WORD - 1))
+				      / UNITS_PER_WORD);
+		      next_parm_info_bit -= 1;
+		    }
+		}
+	    }
+	}
+
+      /* Number of fixed point parameters.  */
+      /* This is actually the number of words of fixed point parameters; thus
+	 an 8 byte struct counts as 2; and thus the maximum value is 8.  */
+      fprintf (file, "%d,", fixed_parms);
+
+      /* 2 bitfields: number of floating point parameters (7 bits), parameters
+	 all on stack.  */
+      /* This is actually the number of fp registers that hold parameters;
+	 and thus the maximum value is 13.  */
+      /* Set parameters on stack bit if parameters are not in their original
+	 registers, regardless of whether they are on the stack?  Xlc
+	 seems to set the bit when not optimizing.  */
+      fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
+
+      if (! optional_tbtab)
+	return;
+
+      /* Optional fields follow.  Some are variable length.  */
+
+      /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
+	 11 double float.  */
+      /* There is an entry for each parameter in a register, in the order that
+	 they occur in the parameter list.  Any intervening arguments on the
+	 stack are ignored.  If the list overflows a long (max possible length
+	 34 bits) then completely leave off all elements that don't fit.  */
+      /* Only emit this long if there was at least one parameter.  */
+      if (fixed_parms || float_parms)
+	fprintf (file, "\t.long %d\n", parm_info);
+
+      /* Offset from start of code to tb table.  */
+      fputs ("\t.long ", file);
+      ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
+      RS6000_OUTPUT_BASENAME (file, fname);
+      putc ('-', file);
+      rs6000_output_function_entry (file, fname);
+      putc ('\n', file);
+
+      /* Interrupt handler mask.  */
+      /* Omit this long, since we never set the interrupt handler bit
+	 above.  */
+
+      /* Number of CTL (controlled storage) anchors.  */
+      /* Omit this long, since the has_ctl bit is never set above.  */
+
+      /* Displacement into stack of each CTL anchor.  */
+      /* Omit this list of longs, because there are no CTL anchors.  */
+
+      /* Length of function name.  */
+      if (*fname == '*')
+	++fname;
+      fprintf (file, "\t.short %d\n", (int) strlen (fname));
+
+      /* Function name.  */
+      assemble_string (fname, strlen (fname));
+
+      /* Register for alloca automatic storage; this is always reg 31.
+	 Only emit this if the alloca bit was set above.  */
+      if (frame_pointer_needed)
+	fputs ("\t.byte 31\n", file);
+
+      fputs ("\t.align 2\n", file);
+    }
+}
+
+/* A C compound statement that outputs the assembler code for a thunk
+   function, used to implement C++ virtual function calls with
+   multiple inheritance.  The thunk acts as a wrapper around a virtual
+   function, adjusting the implicit object parameter before handing
+   control off to the real function.
+
+   First, emit code to add the integer DELTA to the location that
+   contains the incoming first argument.  Assume that this argument
+   contains a pointer, and is the one used to pass the `this' pointer
+   in C++.  This is the incoming argument *before* the function
+   prologue, e.g. `%o0' on a sparc.  The addition must preserve the
+   values of all other incoming arguments.
+
+   After the addition, emit code to jump to FUNCTION, which is a
+   `FUNCTION_DECL'.  This is a direct pure jump, not a call, and does
+   not touch the return address.  Hence returning from FUNCTION will
+   return to whoever called the current `thunk'.
+
+   The effect must be as if FUNCTION had been called directly with the
+   adjusted first argument.  This macro is responsible for emitting
+   all of the code for a thunk function; output_function_prologue()
+   and output_function_epilogue() are not invoked.
+
+   The THUNK_FNDECL is redundant.  (DELTA and FUNCTION have already
+   been extracted from it.)  It might possibly be useful on some
+   targets, but probably not.
+
+   If you do not define this macro, the target-independent code in the
+   C++ frontend will generate a less efficient heavyweight thunk that
+   calls FUNCTION instead of jumping to it.  The generic approach does
+   not support varargs.  */
+
+static void
+rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			tree function)
+{
+  rtx this_rtx, insn, funexp;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in r3.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 4);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 3);
+
+  /* Apply the constant offset, if required.  */
+  if (delta)
+    emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
+
+  /* Apply the offset from the vtable, if required.  */
+  if (vcall_offset)
+    {
+      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+      rtx tmp = gen_rtx_REG (Pmode, 12);
+
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+      if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
+	{
+	  emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
+	  emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+	}
+      else
+	{
+	  rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
+
+	  emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
+	}
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    funexp = machopic_indirect_call_target (funexp);
+#endif
+
+  /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
+     generate sibcall RTL explicitly.  */
+  insn = emit_call_insn (
+	   gen_rtx_PARALLEL (VOIDmode,
+	     gen_rtvec (4,
+			gen_rtx_CALL (VOIDmode,
+				      funexp, const0_rtx),
+			gen_rtx_USE (VOIDmode, const0_rtx),
+			gen_rtx_USE (VOIDmode,
+				     gen_rtx_REG (SImode,
+						  LR_REGNO)),
+			gen_rtx_RETURN (VOIDmode))));
+  SIBLING_CALL_P (insn) = 1;
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+  insn = get_insns ();
+  insn_locators_alloc ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+/* A quick summary of the various types of 'constant-pool tables'
+   under PowerPC:
+
+   Target	Flags		Name		One table per
+   AIX		(none)		AIX TOC		object file
+   AIX		-mfull-toc	AIX TOC		object file
+   AIX		-mminimal-toc	AIX minimal TOC	translation unit
+   SVR4/EABI	(none)		SVR4 SDATA	object file
+   SVR4/EABI	-fpic		SVR4 pic	object file
+   SVR4/EABI	-fPIC		SVR4 PIC	translation unit
+   SVR4/EABI	-mrelocatable	EABI TOC	function
+   SVR4/EABI	-maix		AIX TOC		object file
+   SVR4/EABI	-maix -mminimal-toc
+				AIX minimal TOC	translation unit
+
+   Name			Reg.	Set by	entries	      contains:
+					made by	 addrs?	fp?	sum?
+
+   AIX TOC		2	crt0	as	 Y	option	option
+   AIX minimal TOC	30	prolog	gcc	 Y	Y	option
+   SVR4 SDATA		13	crt0	gcc	 N	Y	N
+   SVR4 pic		30	prolog	ld	 Y	not yet	N
+   SVR4 PIC		30	prolog	gcc	 Y	option	option
+   EABI TOC		30	prolog	gcc	 Y	option	option
+
+*/
+
+/* Hash functions for the hash table.  */
+
+static unsigned
+rs6000_hash_constant (rtx k)
+{
+  enum rtx_code code = GET_CODE (k);
+  enum machine_mode mode = GET_MODE (k);
+  unsigned result = (code << 3) ^ mode;
+  const char *format;
+  int flen, fidx;
+
+  format = GET_RTX_FORMAT (code);
+  flen = strlen (format);
+  fidx = 0;
+
+  switch (code)
+    {
+    case LABEL_REF:
+      return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
+
+    case CONST_DOUBLE:
+      if (mode != VOIDmode)
+	return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
+      flen = 2;
+      break;
+
+    case CODE_LABEL:
+      fidx = 3;
+      break;
+
+    default:
+      break;
+    }
+
+  for (; fidx < flen; fidx++)
+    switch (format[fidx])
+      {
+      case 's':
+	{
+	  unsigned i, len;
+	  const char *str = XSTR (k, fidx);
+	  len = strlen (str);
+	  result = result * 613 + len;
+	  for (i = 0; i < len; i++)
+	    result = result * 613 + (unsigned) str[i];
+	  break;
+	}
+      case 'u':
+      case 'e':
+	result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
+	break;
+      case 'i':
+      case 'n':
+	result = result * 613 + (unsigned) XINT (k, fidx);
+	break;
+      case 'w':
+	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
+	  result = result * 613 + (unsigned) XWINT (k, fidx);
+	else
+	  {
+	    size_t i;
+	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
+	      result = result * 613 + (unsigned) (XWINT (k, fidx)
+						  >> CHAR_BIT * i);
+	  }
+	break;
+      case '0':
+	break;
+      default:
+	gcc_unreachable ();
+      }
+
+  return result;
+}
+
+static unsigned
+toc_hash_function (const void *hash_entry)
+{
+  const struct toc_hash_struct *thc =
+    (const struct toc_hash_struct *) hash_entry;
+  return rs6000_hash_constant (thc->key) ^ thc->key_mode;
+}
+
+/* Compare H1 and H2 for equivalence.  */
+
+static int
+toc_hash_eq (const void *h1, const void *h2)
+{
+  rtx r1 = ((const struct toc_hash_struct *) h1)->key;
+  rtx r2 = ((const struct toc_hash_struct *) h2)->key;
+
+  if (((const struct toc_hash_struct *) h1)->key_mode
+      != ((const struct toc_hash_struct *) h2)->key_mode)
+    return 0;
+
+  return rtx_equal_p (r1, r2);
+}
+
+/* These are the names given by the C++ front-end to vtables, and
+   vtable-like objects.  Ideally, this logic should not be here;
+   instead, there should be some programmatic way of inquiring as
+   to whether or not an object is a vtable.  */
+
+#define VTABLE_NAME_P(NAME)				\
+  (strncmp ("_vt.", name, strlen ("_vt.")) == 0		\
+  || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0	\
+  || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0	\
+  || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0	\
+  || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
+
+#ifdef NO_DOLLAR_IN_LABEL
+/* Return a GGC-allocated character string translating dollar signs in
+   input NAME to underscores.  Used by XCOFF ASM_OUTPUT_LABELREF.  */
+
+const char *
+rs6000_xcoff_strip_dollar (const char *name)
+{
+  char *strip, *p;
+  int len;
+
+  p = strchr (name, '$');
+
+  if (p == 0 || p == name)
+    return name;
+
+  len = strlen (name);
+  strip = (char *) alloca (len + 1);
+  strcpy (strip, name);
+  p = strchr (strip, '$');
+  while (p)
+    {
+      *p = '_';
+      p = strchr (p + 1, '$');
+    }
+
+  return ggc_alloc_string (strip, len);
+}
+#endif
+
+void
+rs6000_output_symbol_ref (FILE *file, rtx x)
+{
+  /* Currently C++ toc references to vtables can be emitted before it
+     is decided whether the vtable is public or private.  If this is
+     the case, then the linker will eventually complain that there is
+     a reference to an unknown section.  Thus, for vtables only,
+     we emit the TOC reference to reference the symbol and not the
+     section.  */
+  const char *name = XSTR (x, 0);
+
+  if (VTABLE_NAME_P (name))
+    {
+      RS6000_OUTPUT_BASENAME (file, name);
+    }
+  else
+    assemble_name (file, name);
+}
+
+/* Output a TOC entry.  We derive the entry name from what is being
+   written.  */
+
+void
+output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
+{
+  char buf[256];
+  const char *name = buf;
+  rtx base = x;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (!TARGET_NO_TOC);
+
+  /* When the linker won't eliminate them, don't output duplicate
+     TOC entries (this happens on AIX if there is any kind of TOC,
+     and on SVR4 under -fPIC or -mrelocatable).  Don't do this for
+     CODE_LABELs.  */
+  if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
+    {
+      struct toc_hash_struct *h;
+      void * * found;
+
+      /* Create toc_hash_table.  This can't be done at TARGET_OPTION_OVERRIDE
+	 time because GGC is not initialized at that point.  */
+      if (toc_hash_table == NULL)
+	toc_hash_table = htab_create_ggc (1021, toc_hash_function,
+					  toc_hash_eq, NULL);
+
+      h = ggc_alloc_toc_hash_struct ();
+      h->key = x;
+      h->key_mode = mode;
+      h->labelno = labelno;
+
+      found = htab_find_slot (toc_hash_table, h, INSERT);
+      if (*found == NULL)
+	*found = h;
+      else  /* This is indeed a duplicate.
+	       Set this label equal to that label.  */
+	{
+	  fputs ("\t.set ", file);
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
+	  fprintf (file, "%d,", labelno);
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
+	  fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
+					      found)->labelno));
+	  return;
+	}
+    }
+
+  /* If we're going to put a double constant in the TOC, make sure it's
+     aligned properly when strict alignment is on.  */
+  if (GET_CODE (x) == CONST_DOUBLE
+      && STRICT_ALIGNMENT
+      && GET_MODE_BITSIZE (mode) >= 64
+      && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
+    ASM_OUTPUT_ALIGN (file, 3);
+  }
+
+  (*targetm.asm_out.internal_label) (file, "LC", labelno);
+
+  /* Handle FP constants specially.  Note that if we have a minimal
+     TOC, things we put here aren't actually in the TOC, so we can allow
+     FP constants.  */
+  if (GET_CODE (x) == CONST_DOUBLE &&
+      (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
+    {
+      REAL_VALUE_TYPE rv;
+      long k[4];
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
+	REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
+      else
+	REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff,
+		     k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
+		   k[0] & 0xffffffff, k[1] & 0xffffffff,
+		   k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs ("\t.long ", file);
+	  else
+	    fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff,
+		     k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
+		   k[0] & 0xffffffff, k[1] & 0xffffffff,
+		   k[2] & 0xffffffff, k[3] & 0xffffffff);
+	  return;
+	}
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE &&
+	   (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
+    {
+      REAL_VALUE_TYPE rv;
+      long k[2];
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+
+      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
+	REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
+      else
+	REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  fprintf (file, "0x%lx%08lx\n",
+		   k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs ("\t.long ", file);
+	  else
+	    fprintf (file, "\t.tc FD_%lx_%lx[TC],",
+		     k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  fprintf (file, "0x%lx,0x%lx\n",
+		   k[0] & 0xffffffff, k[1] & 0xffffffff);
+	  return;
+	}
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE &&
+	   (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
+    {
+      REAL_VALUE_TYPE rv;
+      long l;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
+	REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
+      else
+	REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
+	  fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs ("\t.long ", file);
+	  else
+	    fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
+	  fprintf (file, "0x%lx\n", l & 0xffffffff);
+	  return;
+	}
+    }
+  else if (GET_MODE (x) == VOIDmode
+	   && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
+    {
+      unsigned HOST_WIDE_INT low;
+      HOST_WIDE_INT high;
+
+      if (GET_CODE (x) == CONST_DOUBLE)
+	{
+	  low = CONST_DOUBLE_LOW (x);
+	  high = CONST_DOUBLE_HIGH (x);
+	}
+      else
+#if HOST_BITS_PER_WIDE_INT == 32
+	{
+	  low = INTVAL (x);
+	  high = (low & 0x80000000) ? ~0 : 0;
+	}
+#else
+	{
+	  low = INTVAL (x) & 0xffffffff;
+	  high = (HOST_WIDE_INT) INTVAL (x) >> 32;
+	}
+#endif
+
+      /* TOC entries are always Pmode-sized, but since this
+	 is a bigendian machine then if we're putting smaller
+	 integer constants in the TOC we have to pad them.
+	 (This is still a win over putting the constants in
+	 a separate constant pool, because then we'd have
+	 to have both a TOC entry _and_ the actual constant.)
+
+	 For a 32-bit target, CONST_INT values are loaded and shifted
+	 entirely within `low' and can be stored in one TOC entry.  */
+
+      /* It would be easy to make this work, but it doesn't now.  */
+      gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
+
+      if (POINTER_SIZE > GET_MODE_BITSIZE (mode))
+	{
+#if HOST_BITS_PER_WIDE_INT == 32
+	  lshift_double (low, high, POINTER_SIZE - GET_MODE_BITSIZE (mode),
+			 POINTER_SIZE, &low, &high, 0);
+#else
+	  low |= high << 32;
+	  low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
+	  high = (HOST_WIDE_INT) low >> 32;
+	  low &= 0xffffffff;
+#endif
+	}
+
+      if (TARGET_64BIT)
+	{
+	  if (TARGET_MINIMAL_TOC)
+	    fputs (DOUBLE_INT_ASM_OP, file);
+	  else
+	    fprintf (file, "\t.tc ID_%lx_%lx[TC],",
+		     (long) high & 0xffffffff, (long) low & 0xffffffff);
+	  fprintf (file, "0x%lx%08lx\n",
+		   (long) high & 0xffffffff, (long) low & 0xffffffff);
+	  return;
+	}
+      else
+	{
+	  if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
+	    {
+	      if (TARGET_MINIMAL_TOC)
+		fputs ("\t.long ", file);
+	      else
+		fprintf (file, "\t.tc ID_%lx_%lx[TC],",
+			 (long) high & 0xffffffff, (long) low & 0xffffffff);
+	      fprintf (file, "0x%lx,0x%lx\n",
+		       (long) high & 0xffffffff, (long) low & 0xffffffff);
+	    }
+	  else
+	    {
+	      if (TARGET_MINIMAL_TOC)
+		fputs ("\t.long ", file);
+	      else
+		fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
+	      fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
+	    }
+	  return;
+	}
+    }
+
+  if (GET_CODE (x) == CONST)
+    {
+      gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
+
+      base = XEXP (XEXP (x, 0), 0);
+      offset = INTVAL (XEXP (XEXP (x, 0), 1));
+    }
+
+  switch (GET_CODE (base))
+    {
+    case SYMBOL_REF:
+      name = XSTR (base, 0);
+      break;
+
+    case LABEL_REF:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L",
+				   CODE_LABEL_NUMBER (XEXP (base, 0)));
+      break;
+
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (TARGET_MINIMAL_TOC)
+    fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
+  else
+    {
+      fputs ("\t.tc ", file);
+      RS6000_OUTPUT_BASENAME (file, name);
+
+      if (offset < 0)
+	fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
+      else if (offset)
+	fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
+
+      fputs ("[TC],", file);
+    }
+
+  /* Currently C++ toc references to vtables can be emitted before it
+     is decided whether the vtable is public or private.  If this is
+     the case, then the linker will eventually complain that there is
+     a TOC reference to an unknown section.  Thus, for vtables only,
+     we emit the TOC reference to reference the symbol and not the
+     section.  */
+  if (VTABLE_NAME_P (name))
+    {
+      RS6000_OUTPUT_BASENAME (file, name);
+      if (offset < 0)
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
+      else if (offset > 0)
+	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
+    }
+  else
+    output_addr_const (file, x);
+  putc ('\n', file);
+}
+
+/* Output an assembler pseudo-op to write an ASCII string of N characters
+   starting at P to FILE.
+
+   On the RS/6000, we have to do this using the .byte operation and
+   write out special characters outside the quoted string.
+   Also, the assembler is broken; very long strings are truncated,
+   so we must artificially break them up early.  */
+
+void
+output_ascii (FILE *file, const char *p, int n)
+{
+  char c;
+  int i, count_string;
+  const char *for_string = "\t.byte \"";
+  const char *for_decimal = "\t.byte ";
+  const char *to_close = NULL;
+
+  count_string = 0;
+  for (i = 0; i < n; i++)
+    {
+      c = *p++;
+      if (c >= ' ' && c < 0177)
+	{
+	  if (for_string)
+	    fputs (for_string, file);
+	  putc (c, file);
+
+	  /* Write two quotes to get one.  */
+	  if (c == '"')
+	    {
+	      putc (c, file);
+	      ++count_string;
+	    }
+
+	  for_string = NULL;
+	  for_decimal = "\"\n\t.byte ";
+	  to_close = "\"\n";
+	  ++count_string;
+
+	  if (count_string >= 512)
+	    {
+	      fputs (to_close, file);
+
+	      for_string = "\t.byte \"";
+	      for_decimal = "\t.byte ";
+	      to_close = NULL;
+	      count_string = 0;
+	    }
+	}
+      else
+	{
+	  if (for_decimal)
+	    fputs (for_decimal, file);
+	  fprintf (file, "%d", c);
+
+	  for_string = "\n\t.byte \"";
+	  for_decimal = ", ";
+	  to_close = "\n";
+	  count_string = 0;
+	}
+    }
+
+  /* Now close the string if we have written one.  Then end the line.  */
+  if (to_close)
+    fputs (to_close, file);
+}
+
+/* Generate a unique section name for FILENAME for a section type
+   represented by SECTION_DESC.  Output goes into BUF.
+
+   SECTION_DESC can be any string, as long as it is different for each
+   possible section type.
+
+   We name the section in the same manner as xlc.  The name begins with an
+   underscore followed by the filename (after stripping any leading directory
+   names) with the last period replaced by the string SECTION_DESC.  If
+   FILENAME does not contain a period, SECTION_DESC is appended to the end of
+   the name.  */
+
+void
+rs6000_gen_section_name (char **buf, const char *filename,
+			 const char *section_desc)
+{
+  const char *q, *after_last_slash, *last_period = 0;
+  char *p;
+  int len;
+
+  after_last_slash = filename;
+  for (q = filename; *q; q++)
+    {
+      if (*q == '/')
+	after_last_slash = q + 1;
+      else if (*q == '.')
+	last_period = q;
+    }
+
+  len = strlen (after_last_slash) + strlen (section_desc) + 2;
+  *buf = (char *) xmalloc (len);
+
+  p = *buf;
+  *p++ = '_';
+
+  for (q = after_last_slash; *q; q++)
+    {
+      if (q == last_period)
+	{
+	  strcpy (p, section_desc);
+	  p += strlen (section_desc);
+	  break;
+	}
+
+      else if (ISALNUM (*q))
+	*p++ = *q;
+    }
+
+  if (last_period == 0)
+    strcpy (p, section_desc);
+  else
+    *p = '\0';
+}
+
+/* Emit profile function.  */
+
+void
+output_profile_hook (int labelno ATTRIBUTE_UNUSED)
+{
+  /* Non-standard profiling for kernels, which just saves LR then calls
+     _mcount without worrying about arg saves.  The idea is to change
+     the function prologue as little as possible as it isn't easy to
+     account for arg save/restore code added just for _mcount.  */
+  if (TARGET_PROFILE_KERNEL)
+    return;
+
+  if (DEFAULT_ABI == ABI_AIX)
+    {
+#ifndef NO_PROFILE_COUNTERS
+# define NO_PROFILE_COUNTERS 0
+#endif
+      if (NO_PROFILE_COUNTERS)
+	emit_library_call (init_one_libfunc (RS6000_MCOUNT),
+			   LCT_NORMAL, VOIDmode, 0);
+      else
+	{
+	  char buf[30];
+	  const char *label_name;
+	  rtx fun;
+
+	  ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+	  label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
+	  fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
+
+	  emit_library_call (init_one_libfunc (RS6000_MCOUNT),
+			     LCT_NORMAL, VOIDmode, 1, fun, Pmode);
+	}
+    }
+  else if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      const char *mcount_name = RS6000_MCOUNT;
+      int caller_addr_regno = LR_REGNO;
+
+      /* Be conservative and always set this, at least for now.  */
+      crtl->uses_pic_offset_table = 1;
+
+#if TARGET_MACHO
+      /* For PIC code, set up a stub and collect the caller's address
+	 from r0, which is where the prologue puts it.  */
+      if (MACHOPIC_INDIRECT
+	  && crtl->uses_pic_offset_table)
+	caller_addr_regno = 0;
+#endif
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
+			 LCT_NORMAL, VOIDmode, 1,
+			 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
+    }
+}
+
+/* Write function profiler code.  */
+
+void
+output_function_profiler (FILE *file, int labelno)
+{
+  char buf[100];
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      gcc_unreachable ();
+
+    case ABI_V4:
+      if (!TARGET_32BIT)
+	{
+	  warning (0, "no profiling of 64-bit code for this ABI");
+	  return;
+	}
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+      fprintf (file, "\tmflr %s\n", reg_names[0]);
+      if (NO_PROFILE_COUNTERS)
+	{
+	  asm_fprintf (file, "\t{st|stw} %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	}
+      else if (TARGET_SECURE_PLT && flag_pic)
+	{
+	  asm_fprintf (file, "\tbcl 20,31,1f\n1:\n\t{st|stw} %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
+	  asm_fprintf (file, "\t{cau|addis} %s,%s,",
+		       reg_names[12], reg_names[12]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "-1b@ha\n\t{cal|la} %s,", reg_names[0]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
+	}
+      else if (flag_pic == 1)
+	{
+	  fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
+	  asm_fprintf (file, "\t{st|stw} %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
+	  asm_fprintf (file, "\t{l|lwz} %s,", reg_names[0]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "@got(%s)\n", reg_names[12]);
+	}
+      else if (flag_pic > 1)
+	{
+	  asm_fprintf (file, "\t{st|stw} %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  /* Now, we need to get the address of the label.  */
+	  fputs ("\tbcl 20,31,1f\n\t.long ", file);
+	  assemble_name (file, buf);
+	  fputs ("-.\n1:", file);
+	  asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
+	  asm_fprintf (file, "\t{l|lwz} %s,0(%s)\n",
+		       reg_names[0], reg_names[11]);
+	  asm_fprintf (file, "\t{cax|add} %s,%s,%s\n",
+		       reg_names[0], reg_names[0], reg_names[11]);
+	}
+      else
+	{
+	  asm_fprintf (file, "\t{liu|lis} %s,", reg_names[12]);
+	  assemble_name (file, buf);
+	  fputs ("@ha\n", file);
+	  asm_fprintf (file, "\t{st|stw} %s,4(%s)\n",
+		       reg_names[0], reg_names[1]);
+	  asm_fprintf (file, "\t{cal|la} %s,", reg_names[0]);
+	  assemble_name (file, buf);
+	  asm_fprintf (file, "@l(%s)\n", reg_names[12]);
+	}
+
+      /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH.  */
+      fprintf (file, "\tbl %s%s\n",
+	       RS6000_MCOUNT, flag_pic ? "@plt" : "");
+      break;
+
+    case ABI_AIX:
+    case ABI_DARWIN:
+      if (!TARGET_PROFILE_KERNEL)
+	{
+	  /* Don't do anything, done in output_profile_hook ().  */
+	}
+      else
+	{
+	  gcc_assert (!TARGET_32BIT);
+
+	  asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
+	  asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
+
+	  if (cfun->static_chain_decl != NULL)
+	    {
+	      asm_fprintf (file, "\tstd %s,24(%s)\n",
+			   reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	      fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+	      asm_fprintf (file, "\tld %s,24(%s)\n",
+			   reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	    }
+	  else
+	    fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+	}
+      break;
+    }
+}
+
+
+
+/* The following variable value is the last issued insn.  */
+
+static rtx last_scheduled_insn;
+
+/* The following variable helps to balance issuing of load and
+   store instructions */
+
+static int load_store_pendulum;
+
+/* Power4 load update and store update instructions are cracked into a
+   load or store and an integer insn which are executed in the same cycle.
+   Branches have their own dispatch slot which does not count against the
+   GCC issue rate, but it changes the program flow so there are no other
+   instructions to issue in this cycle.  */
+
+static int
+rs6000_variable_issue_1 (rtx insn, int more)
+{
+  last_scheduled_insn = insn;
+  if (GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    {
+      cached_can_issue_more = more;
+      return cached_can_issue_more;
+    }
+
+  if (insn_terminates_group_p (insn, current_group))
+    {
+      cached_can_issue_more = 0;
+      return cached_can_issue_more;
+    }
+
+  /* If no reservation, but reach here */
+  if (recog_memoized (insn) < 0)
+    return more;
+
+  if (rs6000_sched_groups)
+    {
+      if (is_microcoded_insn (insn))
+        cached_can_issue_more = 0;
+      else if (is_cracked_insn (insn))
+        cached_can_issue_more = more > 2 ? more - 2 : 0;
+      else
+        cached_can_issue_more = more - 1;
+
+      return cached_can_issue_more;
+    }
+
+  if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
+    return 0;
+
+  cached_can_issue_more = more - 1;
+  return cached_can_issue_more;
+}
+
+static int
+rs6000_variable_issue (FILE *stream, int verbose, rtx insn, int more)
+{
+  int r = rs6000_variable_issue_1 (insn, more);
+  if (verbose)
+    fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
+  return r;
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type attr_type;
+
+  if (! recog_memoized (insn))
+    return 0;
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case REG_DEP_TRUE:
+      {
+        /* Data dependency; DEP_INSN writes a register that INSN reads
+	   some cycles later.  */
+
+	/* Separate a load from a narrower, dependent store.  */
+	if (rs6000_sched_groups
+	    && GET_CODE (PATTERN (insn)) == SET
+	    && GET_CODE (PATTERN (dep_insn)) == SET
+	    && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
+	    && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
+	    && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
+		> GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
+	  return cost + 14;
+
+        attr_type = get_attr_type (insn);
+
+        switch (attr_type)
+          {
+          case TYPE_JMPREG:
+            /* Tell the first scheduling pass about the latency between
+               a mtctr and bctr (and mtlr and br/blr).  The first
+               scheduling pass will not know about this latency since
+               the mtctr instruction, which has the latency associated
+               to it, will be generated by reload.  */
+            return TARGET_POWER ? 5 : 4;
+          case TYPE_BRANCH:
+            /* Leave some extra cycles between a compare and its
+               dependent branch, to inhibit expensive mispredicts.  */
+            if ((rs6000_cpu_attr == CPU_PPC603
+                 || rs6000_cpu_attr == CPU_PPC604
+                 || rs6000_cpu_attr == CPU_PPC604E
+                 || rs6000_cpu_attr == CPU_PPC620
+                 || rs6000_cpu_attr == CPU_PPC630
+                 || rs6000_cpu_attr == CPU_PPC750
+                 || rs6000_cpu_attr == CPU_PPC7400
+                 || rs6000_cpu_attr == CPU_PPC7450
+                 || rs6000_cpu_attr == CPU_POWER4
+                 || rs6000_cpu_attr == CPU_POWER5
+		 || rs6000_cpu_attr == CPU_POWER7
+                 || rs6000_cpu_attr == CPU_CELL)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0))
+
+              switch (get_attr_type (dep_insn))
+                {
+                case TYPE_CMP:
+                case TYPE_COMPARE:
+                case TYPE_DELAYED_COMPARE:
+                case TYPE_IMUL_COMPARE:
+                case TYPE_LMUL_COMPARE:
+                case TYPE_FPCOMPARE:
+                case TYPE_CR_LOGICAL:
+                case TYPE_DELAYED_CR:
+		  return cost + 2;
+		default:
+		  break;
+		}
+            break;
+
+          case TYPE_STORE:
+          case TYPE_STORE_U:
+          case TYPE_STORE_UX:
+          case TYPE_FPSTORE:
+          case TYPE_FPSTORE_U:
+          case TYPE_FPSTORE_UX:
+            if ((rs6000_cpu == PROCESSOR_POWER6)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0))
+              {
+
+                if (GET_CODE (PATTERN (insn)) != SET)
+                  /* If this happens, we have to extend this to schedule
+                     optimally.  Return default for now.  */
+                  return cost;
+
+                /* Adjust the cost for the case where the value written
+                   by a fixed point operation is used as the address
+                   gen value on a store. */
+                switch (get_attr_type (dep_insn))
+                  {
+                  case TYPE_LOAD:
+                  case TYPE_LOAD_U:
+                  case TYPE_LOAD_UX:
+                  case TYPE_CNTLZ:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 4;
+                      break;
+                    }
+                  case TYPE_LOAD_EXT:
+                  case TYPE_LOAD_EXT_U:
+                  case TYPE_LOAD_EXT_UX:
+                  case TYPE_VAR_SHIFT_ROTATE:
+                  case TYPE_VAR_DELAYED_COMPARE:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 6;
+                      break;
+		    }
+                  case TYPE_INTEGER:
+                  case TYPE_COMPARE:
+                  case TYPE_FAST_COMPARE:
+                  case TYPE_EXTS:
+                  case TYPE_SHIFT:
+                  case TYPE_INSERT_WORD:
+                  case TYPE_INSERT_DWORD:
+                  case TYPE_FPLOAD_U:
+                  case TYPE_FPLOAD_UX:
+                  case TYPE_STORE_U:
+                  case TYPE_STORE_UX:
+                  case TYPE_FPSTORE_U:
+                  case TYPE_FPSTORE_UX:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 3;
+                      break;
+                    }
+                  case TYPE_IMUL:
+                  case TYPE_IMUL2:
+                  case TYPE_IMUL3:
+                  case TYPE_LMUL:
+                  case TYPE_IMUL_COMPARE:
+                  case TYPE_LMUL_COMPARE:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 17;
+                      break;
+                    }
+                  case TYPE_IDIV:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 45;
+                      break;
+                    }
+                  case TYPE_LDIV:
+                    {
+                      if (! store_data_bypass_p (dep_insn, insn))
+                        return 57;
+                      break;
+                    }
+                  default:
+                    break;
+                  }
+              }
+	    break;
+
+          case TYPE_LOAD:
+          case TYPE_LOAD_U:
+          case TYPE_LOAD_UX:
+          case TYPE_LOAD_EXT:
+          case TYPE_LOAD_EXT_U:
+          case TYPE_LOAD_EXT_UX:
+            if ((rs6000_cpu == PROCESSOR_POWER6)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0))
+              {
+
+                /* Adjust the cost for the case where the value written
+                   by a fixed point instruction is used within the address
+                   gen portion of a subsequent load(u)(x) */
+                switch (get_attr_type (dep_insn))
+                  {
+                  case TYPE_LOAD:
+                  case TYPE_LOAD_U:
+                  case TYPE_LOAD_UX:
+                  case TYPE_CNTLZ:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 4;
+                      break;
+                    }
+                  case TYPE_LOAD_EXT:
+                  case TYPE_LOAD_EXT_U:
+                  case TYPE_LOAD_EXT_UX:
+                  case TYPE_VAR_SHIFT_ROTATE:
+                  case TYPE_VAR_DELAYED_COMPARE:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 6;
+                      break;
+                    }
+                  case TYPE_INTEGER:
+                  case TYPE_COMPARE:
+                  case TYPE_FAST_COMPARE:
+                  case TYPE_EXTS:
+                  case TYPE_SHIFT:
+                  case TYPE_INSERT_WORD:
+                  case TYPE_INSERT_DWORD:
+                  case TYPE_FPLOAD_U:
+                  case TYPE_FPLOAD_UX:
+                  case TYPE_STORE_U:
+                  case TYPE_STORE_UX:
+                  case TYPE_FPSTORE_U:
+                  case TYPE_FPSTORE_UX:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 3;
+                      break;
+                    }
+                  case TYPE_IMUL:
+                  case TYPE_IMUL2:
+                  case TYPE_IMUL3:
+                  case TYPE_LMUL:
+                  case TYPE_IMUL_COMPARE:
+                  case TYPE_LMUL_COMPARE:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 17;
+                      break;
+                    }
+                  case TYPE_IDIV:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 45;
+                      break;
+                    }
+                  case TYPE_LDIV:
+                    {
+                      if (set_to_load_agen (dep_insn, insn))
+                        return 57;
+                      break;
+                    }
+                  default:
+                    break;
+                  }
+              }
+            break;
+
+          case TYPE_FPLOAD:
+            if ((rs6000_cpu == PROCESSOR_POWER6)
+                && recog_memoized (dep_insn)
+                && (INSN_CODE (dep_insn) >= 0)
+                && (get_attr_type (dep_insn) == TYPE_MFFGPR))
+              return 2;
+
+          default:
+            break;
+          }
+
+	/* Fall out to return default cost.  */
+      }
+      break;
+
+    case REG_DEP_OUTPUT:
+      /* Output dependency; DEP_INSN writes a register that INSN writes some
+	 cycles later.  */
+      if ((rs6000_cpu == PROCESSOR_POWER6)
+          && recog_memoized (dep_insn)
+          && (INSN_CODE (dep_insn) >= 0))
+        {
+          attr_type = get_attr_type (insn);
+
+          switch (attr_type)
+            {
+            case TYPE_FP:
+              if (get_attr_type (dep_insn) == TYPE_FP)
+                return 1;
+              break;
+            case TYPE_FPLOAD:
+              if (get_attr_type (dep_insn) == TYPE_MFFGPR)
+                return 2;
+              break;
+            default:
+              break;
+            }
+        }
+    case REG_DEP_ANTI:
+      /* Anti dependency; DEP_INSN reads a register that INSN writes some
+	 cycles later.  */
+      return 0;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return cost;
+}
+
+/* Debug version of rs6000_adjust_cost.  */
+
+static int
+rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
+
+  if (ret != cost)
+    {
+      const char *dep;
+
+      switch (REG_NOTE_KIND (link))
+	{
+	default:	     dep = "unknown depencency"; break;
+	case REG_DEP_TRUE:   dep = "data dependency";	 break;
+	case REG_DEP_OUTPUT: dep = "output dependency";  break;
+	case REG_DEP_ANTI:   dep = "anti depencency";	 break;
+	}
+
+      fprintf (stderr,
+	       "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
+	       "%s, insn:\n", ret, cost, dep);
+
+      debug_rtx (insn);
+    }
+
+  return ret;
+}
+
+/* The function returns a true if INSN is microcoded.
+   Return false otherwise.  */
+
+static bool
+is_microcoded_insn (rtx insn)
+{
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  if (rs6000_cpu_attr == CPU_CELL)
+    return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
+
+  if (rs6000_sched_groups)
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_LOAD_EXT_U
+	  || type == TYPE_LOAD_EXT_UX
+	  || type == TYPE_LOAD_UX
+	  || type == TYPE_STORE_UX
+	  || type == TYPE_MFCR)
+	return true;
+    }
+
+  return false;
+}
+
+/* The function returns true if INSN is cracked into 2 instructions
+   by the processor (and therefore occupies 2 issue slots).  */
+
+static bool
+is_cracked_insn (rtx insn)
+{
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  if (rs6000_sched_groups)
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_LOAD_U || type == TYPE_STORE_U
+	  || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U
+	  || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX
+	  || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR
+	  || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE
+	  || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
+	  || type == TYPE_IDIV || type == TYPE_LDIV
+	  || type == TYPE_INSERT_WORD)
+	return true;
+    }
+
+  return false;
+}
+
+/* The function returns true if INSN can be issued only from
+   the branch slot.  */
+
+static bool
+is_branch_slot_insn (rtx insn)
+{
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  if (rs6000_sched_groups)
+    {
+      enum attr_type type = get_attr_type (insn);
+      if (type == TYPE_BRANCH || type == TYPE_JMPREG)
+	return true;
+      return false;
+    }
+
+  return false;
+}
+
+/* The function returns true if out_inst sets a value that is
+   used in the address generation computation of in_insn */
+static bool
+set_to_load_agen (rtx out_insn, rtx in_insn)
+{
+  rtx out_set, in_set;
+
+  /* For performance reasons, only handle the simple case where
+     both loads are a single_set. */
+  out_set = single_set (out_insn);
+  if (out_set)
+    {
+      in_set = single_set (in_insn);
+      if (in_set)
+        return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
+    }
+
+  return false;
+}
+
+/* The function returns true if the target storage location of
+   out_insn is adjacent to the target storage location of in_insn */
+/* Return 1 if memory locations are adjacent.  */
+
+static bool
+adjacent_mem_locations (rtx insn1, rtx insn2)
+{
+
+  rtx a = get_store_dest (PATTERN (insn1));
+  rtx b = get_store_dest (PATTERN (insn2));
+
+  if ((GET_CODE (XEXP (a, 0)) == REG
+       || (GET_CODE (XEXP (a, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
+      && (GET_CODE (XEXP (b, 0)) == REG
+	  || (GET_CODE (XEXP (b, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
+    {
+      HOST_WIDE_INT val0 = 0, val1 = 0, val_diff;
+      rtx reg0, reg1;
+
+      if (GET_CODE (XEXP (a, 0)) == PLUS)
+        {
+	  reg0 = XEXP (XEXP (a, 0), 0);
+	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
+        }
+      else
+	reg0 = XEXP (a, 0);
+
+      if (GET_CODE (XEXP (b, 0)) == PLUS)
+        {
+	  reg1 = XEXP (XEXP (b, 0), 0);
+	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
+        }
+      else
+	reg1 = XEXP (b, 0);
+
+      val_diff = val1 - val0;
+
+      return ((REGNO (reg0) == REGNO (reg1))
+	      && ((MEM_SIZE (a) && val_diff == INTVAL (MEM_SIZE (a)))
+		  || (MEM_SIZE (b) && val_diff == -INTVAL (MEM_SIZE (b)))));
+    }
+
+  return false;
+}
+
+/* A C statement (sans semicolon) to update the integer scheduling
+   priority INSN_PRIORITY (INSN). Increase the priority to execute the
+   INSN earlier, reduce the priority to execute INSN later.  Do not
+   define this macro if you do not need to adjust the scheduling
+   priorities of insns.  */
+
+static int
+rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
+{
+  /* On machines (like the 750) which have asymmetric integer units,
+     where one integer unit can do multiply and divides and the other
+     can't, reduce the priority of multiply/divide so it is scheduled
+     before other integer operations.  */
+
+#if 0
+  if (! INSN_P (insn))
+    return priority;
+
+  if (GET_CODE (PATTERN (insn)) == USE)
+    return priority;
+
+  switch (rs6000_cpu_attr) {
+  case CPU_PPC750:
+    switch (get_attr_type (insn))
+      {
+      default:
+	break;
+
+      case TYPE_IMUL:
+      case TYPE_IDIV:
+	fprintf (stderr, "priority was %#x (%d) before adjustment\n",
+		 priority, priority);
+	if (priority >= 0 && priority < 0x01000000)
+	  priority >>= 3;
+	break;
+      }
+  }
+#endif
+
+  if (insn_must_be_first_in_group (insn)
+      && reload_completed
+      && current_sched_info->sched_max_insns_priority
+      && rs6000_sched_restricted_insns_priority)
+    {
+
+      /* Prioritize insns that can be dispatched only in the first
+	 dispatch slot.  */
+      if (rs6000_sched_restricted_insns_priority == 1)
+	/* Attach highest priority to insn. This means that in
+	   haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
+	   precede 'priority' (critical path) considerations.  */
+	return current_sched_info->sched_max_insns_priority;
+      else if (rs6000_sched_restricted_insns_priority == 2)
+	/* Increase priority of insn by a minimal amount. This means that in
+	   haifa-sched.c:ready_sort(), only 'priority' (critical path)
+	   considerations precede dispatch-slot restriction considerations.  */
+	return (priority + 1);
+    }
+
+  if (rs6000_cpu == PROCESSOR_POWER6
+      && ((load_store_pendulum == -2 && is_load_insn (insn))
+          || (load_store_pendulum == 2 && is_store_insn (insn))))
+    /* Attach highest priority to insn if the scheduler has just issued two
+       stores and this instruction is a load, or two loads and this instruction
+       is a store. Power6 wants loads and stores scheduled alternately
+       when possible */
+    return current_sched_info->sched_max_insns_priority;
+
+  return priority;
+}
+
+/* Return true if the instruction is nonpipelined on the Cell. */
+static bool
+is_nonpipeline_insn (rtx insn)
+{
+  enum attr_type type;
+  if (!insn || !NONDEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  type = get_attr_type (insn);
+  if (type == TYPE_IMUL
+      || type == TYPE_IMUL2
+      || type == TYPE_IMUL3
+      || type == TYPE_LMUL
+      || type == TYPE_IDIV
+      || type == TYPE_LDIV
+      || type == TYPE_SDIV
+      || type == TYPE_DDIV
+      || type == TYPE_SSQRT
+      || type == TYPE_DSQRT
+      || type == TYPE_MFCR
+      || type == TYPE_MFCRF
+      || type == TYPE_MFJMPR)
+    {
+      return true;
+    }
+  return false;
+}
+
+
+/* Return how many instructions the machine can issue per cycle.  */
+
+static int
+rs6000_issue_rate (void)
+{
+  /* Unless scheduling for register pressure, use issue rate of 1 for
+     first scheduling pass to decrease degradation.  */
+  if (!reload_completed && !flag_sched_pressure)
+    return 1;
+
+  switch (rs6000_cpu_attr) {
+  case CPU_RIOS1:  /* ? */
+  case CPU_RS64A:
+  case CPU_PPC601: /* ? */
+  case CPU_PPC7450:
+    return 3;
+  case CPU_PPC440:
+  case CPU_PPC603:
+  case CPU_PPC750:
+  case CPU_PPC7400:
+  case CPU_PPC8540:
+  case CPU_CELL:
+  case CPU_PPCE300C2:
+  case CPU_PPCE300C3:
+  case CPU_PPCE500MC:
+  case CPU_PPCE500MC64:
+  case CPU_TITAN:
+    return 2;
+  case CPU_RIOS2:
+  case CPU_PPC476:
+  case CPU_PPC604:
+  case CPU_PPC604E:
+  case CPU_PPC620:
+  case CPU_PPC630:
+    return 4;
+  case CPU_POWER4:
+  case CPU_POWER5:
+  case CPU_POWER6:
+  case CPU_POWER7:
+    return 5;
+  default:
+    return 1;
+  }
+}
+
+/* Return how many instructions to look ahead for better insn
+   scheduling.  */
+
+static int
+rs6000_use_sched_lookahead (void)
+{
+  if (rs6000_cpu_attr == CPU_PPC8540)
+    return 4;
+  if (rs6000_cpu_attr == CPU_CELL)
+    return (reload_completed ? 8 : 0);
+  return 0;
+}
+
+/* We are choosing insn from the ready queue.  Return nonzero if INSN can be chosen.  */
+static int
+rs6000_use_sched_lookahead_guard (rtx insn)
+{
+  if (rs6000_cpu_attr != CPU_CELL)
+    return 1;
+
+   if (insn == NULL_RTX || !INSN_P (insn))
+     abort ();
+
+  if (!reload_completed
+      || is_nonpipeline_insn (insn)
+      || is_microcoded_insn (insn))
+    return 0;
+
+  return 1;
+}
+
+/* Determine is PAT refers to memory.  */
+
+static bool
+is_mem_ref (rtx pat)
+{
+  const char * fmt;
+  int i, j;
+  bool ret = false;
+
+  /* stack_tie does not produce any real memory traffic.  */
+  if (GET_CODE (pat) == UNSPEC
+      && XINT (pat, 1) == UNSPEC_TIE)
+    return false;
+
+  if (GET_CODE (pat) == MEM)
+    return true;
+
+  /* Recursively process the pattern.  */
+  fmt = GET_RTX_FORMAT (GET_CODE (pat));
+
+  for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0 && !ret; i--)
+    {
+      if (fmt[i] == 'e')
+	ret |= is_mem_ref (XEXP (pat, i));
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
+	  ret |= is_mem_ref (XVECEXP (pat, i, j));
+    }
+
+  return ret;
+}
+
+/* Determine if PAT is a PATTERN of a load insn.  */
+
+static bool
+is_load_insn1 (rtx pat)
+{
+  if (!pat || pat == NULL_RTX)
+    return false;
+
+  if (GET_CODE (pat) == SET)
+    return is_mem_ref (SET_SRC (pat));
+
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (is_load_insn1 (XVECEXP (pat, 0, i)))
+	  return true;
+    }
+
+  return false;
+}
+
+/* Determine if INSN loads from memory.  */
+
+static bool
+is_load_insn (rtx insn)
+{
+  if (!insn || !INSN_P (insn))
+    return false;
+
+  if (GET_CODE (insn) == CALL_INSN)
+    return false;
+
+  return is_load_insn1 (PATTERN (insn));
+}
+
+/* Determine if PAT is a PATTERN of a store insn.  */
+
+static bool
+is_store_insn1 (rtx pat)
+{
+  if (!pat || pat == NULL_RTX)
+    return false;
+
+  if (GET_CODE (pat) == SET)
+    return is_mem_ref (SET_DEST (pat));
+
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (is_store_insn1 (XVECEXP (pat, 0, i)))
+	  return true;
+    }
+
+  return false;
+}
+
+/* Determine if INSN stores to memory.  */
+
+static bool
+is_store_insn (rtx insn)
+{
+  if (!insn || !INSN_P (insn))
+    return false;
+
+  return is_store_insn1 (PATTERN (insn));
+}
+
+/* Return the dest of a store insn.  */
+
+static rtx
+get_store_dest (rtx pat)
+{
+  gcc_assert (is_store_insn1 (pat));
+
+  if (GET_CODE (pat) == SET)
+    return SET_DEST (pat);
+  else if (GET_CODE (pat) == PARALLEL)
+    {
+      int i;
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  rtx inner_pat = XVECEXP (pat, 0, i);
+	  if (GET_CODE (inner_pat) == SET
+	      && is_mem_ref (SET_DEST (inner_pat)))
+	    return inner_pat;
+	}
+    }
+  /* We shouldn't get here, because we should have either a simple
+     store insn or a store with update which are covered above.  */
+  gcc_unreachable();
+}
+
+/* Returns whether the dependence between INSN and NEXT is considered
+   costly by the given target.  */
+
+static bool
+rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
+{
+  rtx insn;
+  rtx next;
+
+  /* If the flag is not enabled - no dependence is considered costly;
+     allow all dependent insns in the same group.
+     This is the most aggressive option.  */
+  if (rs6000_sched_costly_dep == no_dep_costly)
+    return false;
+
+  /* If the flag is set to 1 - a dependence is always considered costly;
+     do not allow dependent instructions in the same group.
+     This is the most conservative option.  */
+  if (rs6000_sched_costly_dep == all_deps_costly)
+    return true;
+
+  insn = DEP_PRO (dep);
+  next = DEP_CON (dep);
+
+  if (rs6000_sched_costly_dep == store_to_load_dep_costly
+      && is_load_insn (next)
+      && is_store_insn (insn))
+    /* Prevent load after store in the same group.  */
+    return true;
+
+  if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
+      && is_load_insn (next)
+      && is_store_insn (insn)
+      && DEP_TYPE (dep) == REG_DEP_TRUE)
+     /* Prevent load after store in the same group if it is a true
+	dependence.  */
+     return true;
+
+  /* The flag is set to X; dependences with latency >= X are considered costly,
+     and will not be scheduled in the same group.  */
+  if (rs6000_sched_costly_dep <= max_dep_latency
+      && ((cost - distance) >= (int)rs6000_sched_costly_dep))
+    return true;
+
+  return false;
+}
+
+/* Return the next insn after INSN that is found before TAIL is reached,
+   skipping any "non-active" insns - insns that will not actually occupy
+   an issue slot.  Return NULL_RTX if such an insn is not found.  */
+
+static rtx
+get_next_active_insn (rtx insn, rtx tail)
+{
+  if (insn == NULL_RTX || insn == tail)
+    return NULL_RTX;
+
+  while (1)
+    {
+      insn = NEXT_INSN (insn);
+      if (insn == NULL_RTX || insn == tail)
+	return NULL_RTX;
+
+      if (CALL_P (insn)
+	  || JUMP_P (insn)
+	  || (NONJUMP_INSN_P (insn)
+	      && GET_CODE (PATTERN (insn)) != USE
+	      && GET_CODE (PATTERN (insn)) != CLOBBER
+	      && INSN_CODE (insn) != CODE_FOR_stack_tie))
+	break;
+    }
+  return insn;
+}
+
+/* We are about to begin issuing insns for this clock cycle. */
+
+static int
+rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
+                        rtx *ready ATTRIBUTE_UNUSED,
+                        int *pn_ready ATTRIBUTE_UNUSED,
+		        int clock_var ATTRIBUTE_UNUSED)
+{
+  int n_ready = *pn_ready;
+
+  if (sched_verbose)
+    fprintf (dump, "// rs6000_sched_reorder :\n");
+
+  /* Reorder the ready list, if the second to last ready insn
+     is a nonepipeline insn.  */
+  if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
+  {
+    if (is_nonpipeline_insn (ready[n_ready - 1])
+        && (recog_memoized (ready[n_ready - 2]) > 0))
+      /* Simply swap first two insns.  */
+      {
+	rtx tmp = ready[n_ready - 1];
+	ready[n_ready - 1] = ready[n_ready - 2];
+	ready[n_ready - 2] = tmp;
+      }
+  }
+
+  if (rs6000_cpu == PROCESSOR_POWER6)
+    load_store_pendulum = 0;
+
+  return rs6000_issue_rate ();
+}
+
+/* Like rs6000_sched_reorder, but called after issuing each insn.  */
+
+static int
+rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
+		         int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
+{
+  if (sched_verbose)
+    fprintf (dump, "// rs6000_sched_reorder2 :\n");
+
+  /* For Power6, we need to handle some special cases to try and keep the
+     store queue from overflowing and triggering expensive flushes.
+
+     This code monitors how load and store instructions are being issued
+     and skews the ready list one way or the other to increase the likelihood
+     that a desired instruction is issued at the proper time.
+
+     A couple of things are done.  First, we maintain a "load_store_pendulum"
+     to track the current state of load/store issue.
+
+       - If the pendulum is at zero, then no loads or stores have been
+         issued in the current cycle so we do nothing.
+
+       - If the pendulum is 1, then a single load has been issued in this
+         cycle and we attempt to locate another load in the ready list to
+         issue with it.
+
+       - If the pendulum is -2, then two stores have already been
+         issued in this cycle, so we increase the priority of the first load
+         in the ready list to increase it's likelihood of being chosen first
+         in the next cycle.
+
+       - If the pendulum is -1, then a single store has been issued in this
+         cycle and we attempt to locate another store in the ready list to
+         issue with it, preferring a store to an adjacent memory location to
+         facilitate store pairing in the store queue.
+
+       - If the pendulum is 2, then two loads have already been
+         issued in this cycle, so we increase the priority of the first store
+         in the ready list to increase it's likelihood of being chosen first
+         in the next cycle.
+
+       - If the pendulum < -2 or > 2, then do nothing.
+
+       Note: This code covers the most common scenarios.  There exist non
+             load/store instructions which make use of the LSU and which
+             would need to be accounted for to strictly model the behavior
+             of the machine.  Those instructions are currently unaccounted
+             for to help minimize compile time overhead of this code.
+   */
+  if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
+    {
+      int pos;
+      int i;
+      rtx tmp;
+
+      if (is_store_insn (last_scheduled_insn))
+        /* Issuing a store, swing the load_store_pendulum to the left */
+        load_store_pendulum--;
+      else if (is_load_insn (last_scheduled_insn))
+        /* Issuing a load, swing the load_store_pendulum to the right */
+        load_store_pendulum++;
+      else
+        return cached_can_issue_more;
+
+      /* If the pendulum is balanced, or there is only one instruction on
+         the ready list, then all is well, so return. */
+      if ((load_store_pendulum == 0) || (*pn_ready <= 1))
+        return cached_can_issue_more;
+
+      if (load_store_pendulum == 1)
+        {
+          /* A load has been issued in this cycle.  Scan the ready list
+             for another load to issue with it */
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_load_insn (ready[pos]))
+                {
+                  /* Found a load.  Move it to the head of the ready list,
+                     and adjust it's priority so that it is more likely to
+                     stay there */
+                  tmp = ready[pos];
+                  for (i=pos; i<*pn_ready-1; i++)
+                    ready[i] = ready[i + 1];
+                  ready[*pn_ready-1] = tmp;
+
+                  if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
+                    INSN_PRIORITY (tmp)++;
+                  break;
+                }
+              pos--;
+            }
+        }
+      else if (load_store_pendulum == -2)
+        {
+          /* Two stores have been issued in this cycle.  Increase the
+             priority of the first load in the ready list to favor it for
+             issuing in the next cycle. */
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_load_insn (ready[pos])
+                  && !sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[pos]))
+                {
+                  INSN_PRIORITY (ready[pos])++;
+
+                  /* Adjust the pendulum to account for the fact that a load
+                     was found and increased in priority.  This is to prevent
+                     increasing the priority of multiple loads */
+                  load_store_pendulum--;
+
+                  break;
+                }
+              pos--;
+            }
+        }
+      else if (load_store_pendulum == -1)
+        {
+          /* A store has been issued in this cycle.  Scan the ready list for
+             another store to issue with it, preferring a store to an adjacent
+             memory location */
+          int first_store_pos = -1;
+
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_store_insn (ready[pos]))
+                {
+                  /* Maintain the index of the first store found on the
+                     list */
+                  if (first_store_pos == -1)
+                    first_store_pos = pos;
+
+                  if (is_store_insn (last_scheduled_insn)
+                      && adjacent_mem_locations (last_scheduled_insn,ready[pos]))
+                    {
+                      /* Found an adjacent store.  Move it to the head of the
+                         ready list, and adjust it's priority so that it is
+                         more likely to stay there */
+                      tmp = ready[pos];
+                      for (i=pos; i<*pn_ready-1; i++)
+                        ready[i] = ready[i + 1];
+                      ready[*pn_ready-1] = tmp;
+
+                      if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
+                        INSN_PRIORITY (tmp)++;
+
+                      first_store_pos = -1;
+
+                      break;
+                    };
+                }
+              pos--;
+            }
+
+          if (first_store_pos >= 0)
+            {
+              /* An adjacent store wasn't found, but a non-adjacent store was,
+                 so move the non-adjacent store to the front of the ready
+                 list, and adjust its priority so that it is more likely to
+                 stay there. */
+              tmp = ready[first_store_pos];
+              for (i=first_store_pos; i<*pn_ready-1; i++)
+                ready[i] = ready[i + 1];
+              ready[*pn_ready-1] = tmp;
+              if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
+                INSN_PRIORITY (tmp)++;
+            }
+        }
+      else if (load_store_pendulum == 2)
+       {
+           /* Two loads have been issued in this cycle.  Increase the priority
+              of the first store in the ready list to favor it for issuing in
+              the next cycle. */
+          pos = *pn_ready-1;
+
+          while (pos >= 0)
+            {
+              if (is_store_insn (ready[pos])
+                  && !sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[pos]))
+                {
+                  INSN_PRIORITY (ready[pos])++;
+
+                  /* Adjust the pendulum to account for the fact that a store
+                     was found and increased in priority.  This is to prevent
+                     increasing the priority of multiple stores */
+                  load_store_pendulum++;
+
+                  break;
+                }
+              pos--;
+            }
+        }
+    }
+
+  return cached_can_issue_more;
+}
+
+/* Return whether the presence of INSN causes a dispatch group termination
+   of group WHICH_GROUP.
+
+   If WHICH_GROUP == current_group, this function will return true if INSN
+   causes the termination of the current group (i.e, the dispatch group to
+   which INSN belongs). This means that INSN will be the last insn in the
+   group it belongs to.
+
+   If WHICH_GROUP == previous_group, this function will return true if INSN
+   causes the termination of the previous group (i.e, the dispatch group that
+   precedes the group to which INSN belongs).  This means that INSN will be
+   the first insn in the group it belongs to).  */
+
+static bool
+insn_terminates_group_p (rtx insn, enum group_termination which_group)
+{
+  bool first, last;
+
+  if (! insn)
+    return false;
+
+  first = insn_must_be_first_in_group (insn);
+  last = insn_must_be_last_in_group (insn);
+
+  if (first && last)
+    return true;
+
+  if (which_group == current_group)
+    return last;
+  else if (which_group == previous_group)
+    return first;
+
+  return false;
+}
+
+
+static bool
+insn_must_be_first_in_group (rtx insn)
+{
+  enum attr_type type;
+
+  if (!insn
+      || GET_CODE (insn) == NOTE
+      || DEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  switch (rs6000_cpu)
+    {
+    case PROCESSOR_POWER5:
+      if (is_cracked_insn (insn))
+        return true;
+    case PROCESSOR_POWER4:
+      if (is_microcoded_insn (insn))
+        return true;
+
+      if (!rs6000_sched_groups)
+        return false;
+
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_DELAYED_CR:
+        case TYPE_CR_LOGICAL:
+        case TYPE_MTJMPR:
+        case TYPE_MFJMPR:
+        case TYPE_IDIV:
+        case TYPE_LDIV:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_ISYNC:
+        case TYPE_SYNC:
+          return true;
+        default:
+          break;
+        }
+      break;
+    case PROCESSOR_POWER6:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_INSERT_DWORD:
+        case TYPE_EXTS:
+        case TYPE_CNTLZ:
+        case TYPE_SHIFT:
+        case TYPE_VAR_SHIFT_ROTATE:
+        case TYPE_TRAP:
+        case TYPE_IMUL:
+        case TYPE_IMUL2:
+        case TYPE_IMUL3:
+        case TYPE_LMUL:
+        case TYPE_IDIV:
+        case TYPE_INSERT_WORD:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_IMUL_COMPARE:
+        case TYPE_LMUL_COMPARE:
+        case TYPE_FPCOMPARE:
+        case TYPE_MFCR:
+        case TYPE_MTCR:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+        case TYPE_ISYNC:
+        case TYPE_SYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_U:
+        case TYPE_STORE_UX:
+        case TYPE_FPLOAD_U:
+        case TYPE_FPLOAD_UX:
+        case TYPE_FPSTORE_U:
+        case TYPE_FPSTORE_UX:
+          return true;
+        default:
+          break;
+        }
+      break;
+    case PROCESSOR_POWER7:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_CR_LOGICAL:
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_IDIV:
+        case TYPE_LDIV:
+        case TYPE_COMPARE:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_VAR_DELAYED_COMPARE:
+        case TYPE_ISYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT:
+        case TYPE_LOAD_EXT_U:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_U:
+        case TYPE_STORE_UX:
+        case TYPE_FPLOAD_U:
+        case TYPE_FPLOAD_UX:
+        case TYPE_FPSTORE_U:
+        case TYPE_FPSTORE_UX:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+          return true;
+        default:
+          break;
+        }
+      break;
+    default:
+      break;
+    }
+
+  return false;
+}
+
+static bool
+insn_must_be_last_in_group (rtx insn)
+{
+  enum attr_type type;
+
+  if (!insn
+      || GET_CODE (insn) == NOTE
+      || DEBUG_INSN_P (insn)
+      || GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return false;
+
+  switch (rs6000_cpu) {
+  case PROCESSOR_POWER4:
+  case PROCESSOR_POWER5:
+    if (is_microcoded_insn (insn))
+      return true;
+
+    if (is_branch_slot_insn (insn))
+      return true;
+
+    break;
+  case PROCESSOR_POWER6:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_EXTS:
+      case TYPE_CNTLZ:
+      case TYPE_SHIFT:
+      case TYPE_VAR_SHIFT_ROTATE:
+      case TYPE_TRAP:
+      case TYPE_IMUL:
+      case TYPE_IMUL2:
+      case TYPE_IMUL3:
+      case TYPE_LMUL:
+      case TYPE_IDIV:
+      case TYPE_DELAYED_COMPARE:
+      case TYPE_IMUL_COMPARE:
+      case TYPE_LMUL_COMPARE:
+      case TYPE_FPCOMPARE:
+      case TYPE_MFCR:
+      case TYPE_MTCR:
+      case TYPE_MFJMPR:
+      case TYPE_MTJMPR:
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+        return true;
+      default:
+        break;
+    }
+    break;
+  case PROCESSOR_POWER7:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+      case TYPE_LOAD_EXT_U:
+      case TYPE_LOAD_EXT_UX:
+      case TYPE_STORE_UX:
+        return true;
+      default:
+        break;
+    }
+    break;
+  default:
+    break;
+  }
+
+  return false;
+}
+
+/* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
+   dispatch group) from the insns in GROUP_INSNS.  Return false otherwise.  */
+
+static bool
+is_costly_group (rtx *group_insns, rtx next_insn)
+{
+  int i;
+  int issue_rate = rs6000_issue_rate ();
+
+  for (i = 0; i < issue_rate; i++)
+    {
+      sd_iterator_def sd_it;
+      dep_t dep;
+      rtx insn = group_insns[i];
+
+      if (!insn)
+	continue;
+
+      FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+	{
+	  rtx next = DEP_CON (dep);
+
+	  if (next == next_insn
+	      && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Utility of the function redefine_groups.
+   Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
+   in the same dispatch group.  If so, insert nops before NEXT_INSN, in order
+   to keep it "far" (in a separate group) from GROUP_INSNS, following
+   one of the following schemes, depending on the value of the flag
+   -minsert_sched_nops = X:
+   (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
+       in order to force NEXT_INSN into a separate group.
+   (2) X < sched_finish_regroup_exact: insert exactly X nops.
+   GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
+   insertion (has a group just ended, how many vacant issue slots remain in the
+   last group, and how many dispatch groups were encountered so far).  */
+
+static int
+force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
+		 rtx next_insn, bool *group_end, int can_issue_more,
+		 int *group_count)
+{
+  rtx nop;
+  bool force;
+  int issue_rate = rs6000_issue_rate ();
+  bool end = *group_end;
+  int i;
+
+  if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
+    return can_issue_more;
+
+  if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
+    return can_issue_more;
+
+  force = is_costly_group (group_insns, next_insn);
+  if (!force)
+    return can_issue_more;
+
+  if (sched_verbose > 6)
+    fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
+	     *group_count ,can_issue_more);
+
+  if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
+    {
+      if (*group_end)
+	can_issue_more = 0;
+
+      /* Since only a branch can be issued in the last issue_slot, it is
+	 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
+	 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
+	 in this case the last nop will start a new group and the branch
+	 will be forced to the new group.  */
+      if (can_issue_more && !is_branch_slot_insn (next_insn))
+	can_issue_more--;
+
+      while (can_issue_more > 0)
+	{
+	  nop = gen_nop ();
+	  emit_insn_before (nop, next_insn);
+	  can_issue_more--;
+	}
+
+      *group_end = true;
+      return 0;
+    }
+
+  if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
+    {
+      int n_nops = rs6000_sched_insert_nops;
+
+      /* Nops can't be issued from the branch slot, so the effective
+	 issue_rate for nops is 'issue_rate - 1'.  */
+      if (can_issue_more == 0)
+	can_issue_more = issue_rate;
+      can_issue_more--;
+      if (can_issue_more == 0)
+	{
+	  can_issue_more = issue_rate - 1;
+	  (*group_count)++;
+	  end = true;
+	  for (i = 0; i < issue_rate; i++)
+	    {
+	      group_insns[i] = 0;
+	    }
+	}
+
+      while (n_nops > 0)
+	{
+	  nop = gen_nop ();
+	  emit_insn_before (nop, next_insn);
+	  if (can_issue_more == issue_rate - 1) /* new group begins */
+	    end = false;
+	  can_issue_more--;
+	  if (can_issue_more == 0)
+	    {
+	      can_issue_more = issue_rate - 1;
+	      (*group_count)++;
+	      end = true;
+	      for (i = 0; i < issue_rate; i++)
+		{
+		  group_insns[i] = 0;
+		}
+	    }
+	  n_nops--;
+	}
+
+      /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1').  */
+      can_issue_more++;
+
+      /* Is next_insn going to start a new group?  */
+      *group_end
+	= (end
+	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
+	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
+	   || (can_issue_more < issue_rate &&
+	       insn_terminates_group_p (next_insn, previous_group)));
+      if (*group_end && end)
+	(*group_count)--;
+
+      if (sched_verbose > 6)
+	fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
+		 *group_count, can_issue_more);
+      return can_issue_more;
+    }
+
+  return can_issue_more;
+}
+
+/* This function tries to synch the dispatch groups that the compiler "sees"
+   with the dispatch groups that the processor dispatcher is expected to
+   form in practice.  It tries to achieve this synchronization by forcing the
+   estimated processor grouping on the compiler (as opposed to the function
+   'pad_goups' which tries to force the scheduler's grouping on the processor).
+
+   The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
+   examines the (estimated) dispatch groups that will be formed by the processor
+   dispatcher.  It marks these group boundaries to reflect the estimated
+   processor grouping, overriding the grouping that the scheduler had marked.
+   Depending on the value of the flag '-minsert-sched-nops' this function can
+   force certain insns into separate groups or force a certain distance between
+   them by inserting nops, for example, if there exists a "costly dependence"
+   between the insns.
+
+   The function estimates the group boundaries that the processor will form as
+   follows:  It keeps track of how many vacant issue slots are available after
+   each insn.  A subsequent insn will start a new group if one of the following
+   4 cases applies:
+   - no more vacant issue slots remain in the current dispatch group.
+   - only the last issue slot, which is the branch slot, is vacant, but the next
+     insn is not a branch.
+   - only the last 2 or less issue slots, including the branch slot, are vacant,
+     which means that a cracked insn (which occupies two issue slots) can't be
+     issued in this group.
+   - less than 'issue_rate' slots are vacant, and the next insn always needs to
+     start a new group.  */
+
+static int
+redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
+{
+  rtx insn, next_insn;
+  int issue_rate;
+  int can_issue_more;
+  int slot, i;
+  bool group_end;
+  int group_count = 0;
+  rtx *group_insns;
+
+  /* Initialize.  */
+  issue_rate = rs6000_issue_rate ();
+  group_insns = XALLOCAVEC (rtx, issue_rate);
+  for (i = 0; i < issue_rate; i++)
+    {
+      group_insns[i] = 0;
+    }
+  can_issue_more = issue_rate;
+  slot = 0;
+  insn = get_next_active_insn (prev_head_insn, tail);
+  group_end = false;
+
+  while (insn != NULL_RTX)
+    {
+      slot = (issue_rate - can_issue_more);
+      group_insns[slot] = insn;
+      can_issue_more =
+	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
+      if (insn_terminates_group_p (insn, current_group))
+	can_issue_more = 0;
+
+      next_insn = get_next_active_insn (insn, tail);
+      if (next_insn == NULL_RTX)
+	return group_count + 1;
+
+      /* Is next_insn going to start a new group?  */
+      group_end
+	= (can_issue_more == 0
+	   || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
+	   || (can_issue_more <= 2 && is_cracked_insn (next_insn))
+	   || (can_issue_more < issue_rate &&
+	       insn_terminates_group_p (next_insn, previous_group)));
+
+      can_issue_more = force_new_group (sched_verbose, dump, group_insns,
+					next_insn, &group_end, can_issue_more,
+					&group_count);
+
+      if (group_end)
+	{
+	  group_count++;
+	  can_issue_more = 0;
+	  for (i = 0; i < issue_rate; i++)
+	    {
+	      group_insns[i] = 0;
+	    }
+	}
+
+      if (GET_MODE (next_insn) == TImode && can_issue_more)
+	PUT_MODE (next_insn, VOIDmode);
+      else if (!can_issue_more && GET_MODE (next_insn) != TImode)
+	PUT_MODE (next_insn, TImode);
+
+      insn = next_insn;
+      if (can_issue_more == 0)
+	can_issue_more = issue_rate;
+    } /* while */
+
+  return group_count;
+}
+
+/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
+   dispatch group boundaries that the scheduler had marked.  Pad with nops
+   any dispatch groups which have vacant issue slots, in order to force the
+   scheduler's grouping on the processor dispatcher.  The function
+   returns the number of dispatch groups found.  */
+
+static int
+pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
+{
+  rtx insn, next_insn;
+  rtx nop;
+  int issue_rate;
+  int can_issue_more;
+  int group_end;
+  int group_count = 0;
+
+  /* Initialize issue_rate.  */
+  issue_rate = rs6000_issue_rate ();
+  can_issue_more = issue_rate;
+
+  insn = get_next_active_insn (prev_head_insn, tail);
+  next_insn = get_next_active_insn (insn, tail);
+
+  while (insn != NULL_RTX)
+    {
+      can_issue_more =
+      	rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
+
+      group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
+
+      if (next_insn == NULL_RTX)
+	break;
+
+      if (group_end)
+	{
+	  /* If the scheduler had marked group termination at this location
+	     (between insn and next_insn), and neither insn nor next_insn will
+	     force group termination, pad the group with nops to force group
+	     termination.  */
+	  if (can_issue_more
+	      && (rs6000_sched_insert_nops == sched_finish_pad_groups)
+	      && !insn_terminates_group_p (insn, current_group)
+	      && !insn_terminates_group_p (next_insn, previous_group))
+	    {
+	      if (!is_branch_slot_insn (next_insn))
+		can_issue_more--;
+
+	      while (can_issue_more)
+		{
+		  nop = gen_nop ();
+		  emit_insn_before (nop, next_insn);
+		  can_issue_more--;
+		}
+	    }
+
+	  can_issue_more = issue_rate;
+	  group_count++;
+	}
+
+      insn = next_insn;
+      next_insn = get_next_active_insn (insn, tail);
+    }
+
+  return group_count;
+}
+
+/* We're beginning a new block.  Initialize data structures as necessary.  */
+
+static void
+rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		     int sched_verbose ATTRIBUTE_UNUSED,
+		     int max_ready ATTRIBUTE_UNUSED)
+{
+  last_scheduled_insn = NULL_RTX;
+  load_store_pendulum = 0;
+}
+
+/* The following function is called at the end of scheduling BB.
+   After reload, it inserts nops at insn group bundling.  */
+
+static void
+rs6000_sched_finish (FILE *dump, int sched_verbose)
+{
+  int n_groups;
+
+  if (sched_verbose)
+    fprintf (dump, "=== Finishing schedule.\n");
+
+  if (reload_completed && rs6000_sched_groups)
+    {
+      /* Do not run sched_finish hook when selective scheduling enabled.  */
+      if (sel_sched_p ())
+	return;
+
+      if (rs6000_sched_insert_nops == sched_finish_none)
+	return;
+
+      if (rs6000_sched_insert_nops == sched_finish_pad_groups)
+	n_groups = pad_groups (dump, sched_verbose,
+			       current_sched_info->prev_head,
+			       current_sched_info->next_tail);
+      else
+	n_groups = redefine_groups (dump, sched_verbose,
+				    current_sched_info->prev_head,
+				    current_sched_info->next_tail);
+
+      if (sched_verbose >= 6)
+	{
+    	  fprintf (dump, "ngroups = %d\n", n_groups);
+	  print_rtl (dump, current_sched_info->prev_head);
+	  fprintf (dump, "Done finish_sched\n");
+	}
+    }
+}
+
+struct _rs6000_sched_context
+{
+  short cached_can_issue_more;
+  rtx last_scheduled_insn;
+  int load_store_pendulum;
+};
+
+typedef struct _rs6000_sched_context rs6000_sched_context_def;
+typedef rs6000_sched_context_def *rs6000_sched_context_t;
+
+/* Allocate store for new scheduling context.  */
+static void *
+rs6000_alloc_sched_context (void)
+{
+  return xmalloc (sizeof (rs6000_sched_context_def));
+}
+
+/* If CLEAN_P is true then initializes _SC with clean data,
+   and from the global context otherwise.  */
+static void
+rs6000_init_sched_context (void *_sc, bool clean_p)
+{
+  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+  if (clean_p)
+    {
+      sc->cached_can_issue_more = 0;
+      sc->last_scheduled_insn = NULL_RTX;
+      sc->load_store_pendulum = 0;
+    }
+  else
+    {
+      sc->cached_can_issue_more = cached_can_issue_more;
+      sc->last_scheduled_insn = last_scheduled_insn;
+      sc->load_store_pendulum = load_store_pendulum;
+    }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC.  */
+static void
+rs6000_set_sched_context (void *_sc)
+{
+  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+  gcc_assert (sc != NULL);
+
+  cached_can_issue_more = sc->cached_can_issue_more;
+  last_scheduled_insn = sc->last_scheduled_insn;
+  load_store_pendulum = sc->load_store_pendulum;
+}
+
+/* Free _SC.  */
+static void
+rs6000_free_sched_context (void *_sc)
+{
+  gcc_assert (_sc != NULL);
+
+  free (_sc);
+}
+
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+int
+rs6000_trampoline_size (void)
+{
+  int ret = 0;
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      gcc_unreachable ();
+
+    case ABI_AIX:
+      ret = (TARGET_32BIT) ? 12 : 24;
+      break;
+
+    case ABI_DARWIN:
+    case ABI_V4:
+      ret = (TARGET_32BIT) ? 40 : 48;
+      break;
+    }
+
+  return ret;
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+static void
+rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  int regsize = (TARGET_32BIT) ? 4 : 8;
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx ctx_reg = force_reg (Pmode, cxt);
+  rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      gcc_unreachable ();
+
+    /* Under AIX, just build the 3 word function descriptor */
+    case ABI_AIX:
+      {
+	rtx fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
+	rtx fn_reg = gen_reg_rtx (Pmode);
+	rtx toc_reg = gen_reg_rtx (Pmode);
+
+  /* Macro to shorten the code expansions below.  */
+# define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
+
+	m_tramp = replace_equiv_address (m_tramp, addr);
+
+	emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
+	emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
+	emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
+	emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
+	emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
+
+# undef MEM_PLUS
+      }
+      break;
+
+    /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
+    case ABI_DARWIN:
+    case ABI_V4:
+      emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
+			 LCT_NORMAL, VOIDmode, 4,
+			 addr, Pmode,
+			 GEN_INT (rs6000_trampoline_size ()), SImode,
+			 fnaddr, Pmode,
+			 ctx_reg, Pmode);
+      break;
+    }
+}
+
+
+/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
+   identifier as an argument, so the front end shouldn't look it up.  */
+
+static bool
+rs6000_attribute_takes_identifier_p (const_tree attr_id)
+{
+  return is_attribute_p ("altivec", attr_id);
+}
+
+/* Handle the "altivec" attribute.  The attribute may have
+   arguments as follows:
+
+	__attribute__((altivec(vector__)))
+	__attribute__((altivec(pixel__)))	(always followed by 'unsigned short')
+	__attribute__((altivec(bool__)))	(always followed by 'unsigned')
+
+  and may appear more than once (e.g., 'vector bool char') in a
+  given declaration.  */
+
+static tree
+rs6000_handle_altivec_attribute (tree *node,
+				 tree name ATTRIBUTE_UNUSED,
+				 tree args,
+				 int flags ATTRIBUTE_UNUSED,
+				 bool *no_add_attrs)
+{
+  tree type = *node, result = NULL_TREE;
+  enum machine_mode mode;
+  int unsigned_p;
+  char altivec_type
+    = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
+	&& TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
+       ? *IDENTIFIER_POINTER (TREE_VALUE (args))
+       : '?');
+
+  while (POINTER_TYPE_P (type)
+	 || TREE_CODE (type) == FUNCTION_TYPE
+	 || TREE_CODE (type) == METHOD_TYPE
+	 || TREE_CODE (type) == ARRAY_TYPE)
+    type = TREE_TYPE (type);
+
+  mode = TYPE_MODE (type);
+
+  /* Check for invalid AltiVec type qualifiers.  */
+  if (type == long_double_type_node)
+    error ("use of %<long double%> in AltiVec types is invalid");
+  else if (type == boolean_type_node)
+    error ("use of boolean types in AltiVec types is invalid");
+  else if (TREE_CODE (type) == COMPLEX_TYPE)
+    error ("use of %<complex%> in AltiVec types is invalid");
+  else if (DECIMAL_FLOAT_MODE_P (mode))
+    error ("use of decimal floating point types in AltiVec types is invalid");
+  else if (!TARGET_VSX)
+    {
+      if (type == long_unsigned_type_node || type == long_integer_type_node)
+	{
+	  if (TARGET_64BIT)
+	    error ("use of %<long%> in AltiVec types is invalid for "
+		   "64-bit code without -mvsx");
+	  else if (rs6000_warn_altivec_long)
+	    warning (0, "use of %<long%> in AltiVec types is deprecated; "
+		     "use %<int%>");
+	}
+      else if (type == long_long_unsigned_type_node
+	       || type == long_long_integer_type_node)
+	error ("use of %<long long%> in AltiVec types is invalid without "
+	       "-mvsx");
+      else if (type == double_type_node)
+	error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+    }
+
+  switch (altivec_type)
+    {
+    case 'v':
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (mode)
+	{
+	case DImode:
+	  result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	  break;
+	case SImode:
+	  result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	  break;
+	case HImode:
+	  result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	  break;
+	case QImode:
+	  result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	  break;
+	case SFmode: result = V4SF_type_node; break;
+	case DFmode: result = V2DF_type_node; break;
+	  /* If the user says 'vector int bool', we may be handed the 'bool'
+	     attribute _before_ the 'vector' attribute, and so select the
+	     proper type in the 'b' case below.  */
+	case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
+	case V2DImode: case V2DFmode:
+	  result = type;
+	default: break;
+	}
+      break;
+    case 'b':
+      switch (mode)
+	{
+	case DImode: case V2DImode: result = bool_V2DI_type_node; break;
+	case SImode: case V4SImode: result = bool_V4SI_type_node; break;
+	case HImode: case V8HImode: result = bool_V8HI_type_node; break;
+	case QImode: case V16QImode: result = bool_V16QI_type_node;
+	default: break;
+	}
+      break;
+    case 'p':
+      switch (mode)
+	{
+	case V8HImode: result = pixel_V8HI_type_node;
+	default: break;
+	}
+    default: break;
+    }
+
+  /* Propagate qualifiers attached to the element type
+     onto the vector type.  */
+  if (result && result != type && TYPE_QUALS (type))
+    result = build_qualified_type (result, TYPE_QUALS (type));
+
+  *no_add_attrs = true;  /* No need to hang on to the attribute.  */
+
+  if (result)
+    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
+
+  return NULL_TREE;
+}
+
+/* AltiVec defines four built-in scalar types that serve as vector
+   elements; we must teach the compiler how to mangle them.  */
+
+static const char *
+rs6000_mangle_type (const_tree type)
+{
+  type = TYPE_MAIN_VARIANT (type);
+
+  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+    return NULL;
+
+  if (type == bool_char_type_node) return "U6__boolc";
+  if (type == bool_short_type_node) return "U6__bools";
+  if (type == pixel_type_node) return "u7__pixel";
+  if (type == bool_int_type_node) return "U6__booli";
+  if (type == bool_long_type_node) return "U6__booll";
+
+  /* Mangle IBM extended float long double as `g' (__float128) on
+     powerpc*-linux where long-double-64 previously was the default.  */
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_ELF
+      && TARGET_LONG_DOUBLE_128
+      && !TARGET_IEEEQUAD)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+
+/* Handle a "longcall" or "shortcall" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+rs6000_handle_longcall_attribute (tree *node, tree name,
+				  tree args ATTRIBUTE_UNUSED,
+				  int flags ATTRIBUTE_UNUSED,
+				  bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Set longcall attributes on all functions declared when
+   rs6000_default_long_calls is true.  */
+static void
+rs6000_set_default_type_attributes (tree type)
+{
+  if (rs6000_default_long_calls
+      && (TREE_CODE (type) == FUNCTION_TYPE
+	  || TREE_CODE (type) == METHOD_TYPE))
+    TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
+					NULL_TREE,
+					TYPE_ATTRIBUTES (type));
+
+#if TARGET_MACHO
+  darwin_set_default_type_attributes (type);
+#endif
+}
+
+/* Return a reference suitable for calling a function with the
+   longcall attribute.  */
+
+rtx
+rs6000_longcall_ref (rtx call_ref)
+{
+  const char *call_name;
+  tree node;
+
+  if (GET_CODE (call_ref) != SYMBOL_REF)
+    return call_ref;
+
+  /* System V adds '.' to the internal name, so skip them.  */
+  call_name = XSTR (call_ref, 0);
+  if (*call_name == '.')
+    {
+      while (*call_name == '.')
+	call_name++;
+
+      node = get_identifier (call_name);
+      call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
+    }
+
+  return force_reg (Pmode, call_ref);
+}
+
+#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
+#define TARGET_USE_MS_BITFIELD_LAYOUT 0
+#endif
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+rs6000_handle_struct_attribute (tree *node, tree name,
+				tree args ATTRIBUTE_UNUSED,
+				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  tree *type = NULL;
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) == TYPE_DECL)
+        type = &TREE_TYPE (*node);
+    }
+  else
+    type = node;
+
+  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+                 || TREE_CODE (*type) == UNION_TYPE)))
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored", name);
+      *no_add_attrs = true;
+    }
+
+  else if ((is_attribute_p ("ms_struct", name)
+            && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+           || ((is_attribute_p ("gcc_struct", name)
+                && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+    {
+      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
+               name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static bool
+rs6000_ms_bitfield_layout_p (const_tree record_type)
+{
+  return (TARGET_USE_MS_BITFIELD_LAYOUT &&
+          !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
+}
+
+#ifdef USING_ELFOS_H
+
+/* A get_unnamed_section callback, used for switching to toc_section.  */
+
+static void
+rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  if (DEFAULT_ABI == ABI_AIX
+      && TARGET_MINIMAL_TOC
+      && !TARGET_RELOCATABLE)
+    {
+      if (!toc_initialized)
+	{
+	  toc_initialized = 1;
+	  fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
+	  fprintf (asm_out_file, "\t.tc ");
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
+	  fprintf (asm_out_file, "\n");
+
+	  fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
+	  fprintf (asm_out_file, " = .+32768\n");
+	}
+      else
+	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+    }
+  else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
+    fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+  else
+    {
+      fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+      if (!toc_initialized)
+	{
+	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
+	  fprintf (asm_out_file, " = .+32768\n");
+	  toc_initialized = 1;
+	}
+    }
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+rs6000_elf_asm_init_sections (void)
+{
+  toc_section
+    = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
+
+  sdata2_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   SDATA2_SECTION_ASM_OP);
+}
+
+/* Implement TARGET_SELECT_RTX_SECTION.  */
+
+static section *
+rs6000_elf_select_rtx_section (enum machine_mode mode, rtx x,
+			       unsigned HOST_WIDE_INT align)
+{
+  if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
+    return toc_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+/* For a SYMBOL_REF, set generic flags and then perform some
+   target-specific processing.
+
+   When the AIX ABI is requested on a non-AIX system, replace the
+   function name with the real name (with a leading .) rather than the
+   function descriptor name.  This saves a lot of overriding code to
+   read the prefixes.  */
+
+static void
+rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (first
+      && TREE_CODE (decl) == FUNCTION_DECL
+      && !TARGET_AIX
+      && DEFAULT_ABI == ABI_AIX)
+    {
+      rtx sym_ref = XEXP (rtl, 0);
+      size_t len = strlen (XSTR (sym_ref, 0));
+      char *str = XALLOCAVEC (char, len + 2);
+      str[0] = '.';
+      memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
+      XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
+    }
+}
+
+static inline bool
+compare_section_name (const char *section, const char *templ)
+{
+  int len;
+
+  len = strlen (templ);
+  return (strncmp (section, templ, len) == 0
+	  && (section[len] == 0 || section[len] == '.'));
+}
+
+bool
+rs6000_elf_in_small_data_p (const_tree decl)
+{
+  if (rs6000_sdata == SDATA_NONE)
+    return false;
+
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (decl) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
+    {
+      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (compare_section_name (section, ".sdata")
+	  || compare_section_name (section, ".sdata2")
+	  || compare_section_name (section, ".gnu.linkonce.s")
+	  || compare_section_name (section, ".sbss")
+	  || compare_section_name (section, ".sbss2")
+	  || compare_section_name (section, ".gnu.linkonce.sb")
+	  || strcmp (section, ".PPC.EMB.sdata0") == 0
+	  || strcmp (section, ".PPC.EMB.sbss0") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
+
+      if (size > 0
+	  && size <= g_switch_value
+	  /* If it's not public, and we're not going to reference it there,
+	     there's no need to put it in the small data section.  */
+	  && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
+	return true;
+    }
+
+  return false;
+}
+
+#endif /* USING_ELFOS_H */
+
+/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P.  */
+
+static bool
+rs6000_use_blocks_for_constant_p (enum machine_mode mode, const_rtx x)
+{
+  return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
+}
+
+/* Return a REG that occurs in ADDR with coefficient 1.
+   ADDR can be effectively incremented by incrementing REG.
+
+   r0 is special and we must not select it as an address
+   register by this routine since our caller will try to
+   increment the returned register via an "la" instruction.  */
+
+rtx
+find_addr_reg (rtx addr)
+{
+  while (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG
+	  && REGNO (XEXP (addr, 0)) != 0)
+	addr = XEXP (addr, 0);
+      else if (GET_CODE (XEXP (addr, 1)) == REG
+	       && REGNO (XEXP (addr, 1)) != 0)
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 0)))
+	addr = XEXP (addr, 1);
+      else if (CONSTANT_P (XEXP (addr, 1)))
+	addr = XEXP (addr, 0);
+      else
+	gcc_unreachable ();
+    }
+  gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
+  return addr;
+}
+
+void
+rs6000_fatal_bad_address (rtx op)
+{
+  fatal_insn ("bad address", op);
+}
+
+#if TARGET_MACHO
+
+typedef struct branch_island_d {
+  tree function_name;
+  tree label_name;
+  int line_number;
+} branch_island;
+
+DEF_VEC_O(branch_island);
+DEF_VEC_ALLOC_O(branch_island,gc);
+
+static VEC(branch_island,gc) *branch_islands;
+
+/* Remember to generate a branch island for far calls to the given
+   function.  */
+
+static void
+add_compiler_branch_island (tree label_name, tree function_name,
+			    int line_number)
+{
+  branch_island *bi = VEC_safe_push (branch_island, gc, branch_islands, NULL);
+
+  bi->function_name = function_name;
+  bi->label_name = label_name;
+  bi->line_number = line_number;
+}
+
+/* Generate far-jump branch islands for everything recorded in
+   branch_islands.  Invoked immediately after the last instruction of
+   the epilogue has been emitted; the branch islands must be appended
+   to, and contiguous with, the function body.  Mach-O stubs are
+   generated in machopic_output_stub().  */
+
+static void
+macho_branch_islands (void)
+{
+  char tmp_buf[512];
+
+  while (!VEC_empty (branch_island, branch_islands))
+    {
+      branch_island *bi = VEC_last (branch_island, branch_islands);
+      const char *label = IDENTIFIER_POINTER (bi->label_name);
+      const char *name = IDENTIFIER_POINTER (bi->function_name);
+      char name_buf[512];
+      /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF().  */
+      if (name[0] == '*' || name[0] == '&')
+	strcpy (name_buf, name+1);
+      else
+	{
+	  name_buf[0] = '_';
+	  strcpy (name_buf+1, name);
+	}
+      strcpy (tmp_buf, "\n");
+      strcat (tmp_buf, label);
+#if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
+      if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
+	dbxout_stabd (N_SLINE, bi->line_number);
+#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
+      if (flag_pic)
+	{
+	  strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
+	  strcat (tmp_buf, label);
+	  strcat (tmp_buf, "_pic\n");
+	  strcat (tmp_buf, label);
+	  strcat (tmp_buf, "_pic:\n\tmflr r11\n");
+
+	  strcat (tmp_buf, "\taddis r11,r11,ha16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, " - ");
+	  strcat (tmp_buf, label);
+	  strcat (tmp_buf, "_pic)\n");
+
+	  strcat (tmp_buf, "\tmtlr r0\n");
+
+	  strcat (tmp_buf, "\taddi r12,r11,lo16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, " - ");
+	  strcat (tmp_buf, label);
+	  strcat (tmp_buf, "_pic)\n");
+
+	  strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
+	}
+      else
+	{
+	  strcat (tmp_buf, ":\nlis r12,hi16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
+	  strcat (tmp_buf, name_buf);
+	  strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
+	}
+      output_asm_insn (tmp_buf, 0);
+#if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
+      if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
+	dbxout_stabd (N_SLINE, bi->line_number);
+#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
+      VEC_pop (branch_island, branch_islands);
+    }
+}
+
+/* NO_PREVIOUS_DEF checks in the link list whether the function name is
+   already there or not.  */
+
+static int
+no_previous_def (tree function_name)
+{
+  branch_island *bi;
+  unsigned ix;
+
+  FOR_EACH_VEC_ELT (branch_island, branch_islands, ix, bi)
+    if (function_name == bi->function_name)
+      return 0;
+  return 1;
+}
+
+/* GET_PREV_LABEL gets the label name from the previous definition of
+   the function.  */
+
+static tree
+get_prev_label (tree function_name)
+{
+  branch_island *bi;
+  unsigned ix;
+
+  FOR_EACH_VEC_ELT (branch_island, branch_islands, ix, bi)
+    if (function_name == bi->function_name)
+      return bi->label_name;
+  return NULL_TREE;
+}
+
+/* INSN is either a function call or a millicode call.  It may have an
+   unconditional jump in its delay slot.
+
+   CALL_DEST is the routine we are calling.  */
+
+char *
+output_call (rtx insn, rtx *operands, int dest_operand_number,
+	     int cookie_operand_number)
+{
+  static char buf[256];
+  if (darwin_emit_branch_islands
+      && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
+      && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
+    {
+      tree labelname;
+      tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
+
+      if (no_previous_def (funname))
+	{
+	  rtx label_rtx = gen_label_rtx ();
+	  char *label_buf, temp_buf[256];
+	  ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
+				       CODE_LABEL_NUMBER (label_rtx));
+	  label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
+	  labelname = get_identifier (label_buf);
+	  add_compiler_branch_island (labelname, funname, insn_line (insn));
+	}
+      else
+	labelname = get_prev_label (funname);
+
+      /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
+	 instruction will reach 'foo', otherwise link as 'bl L42'".
+	 "L42" should be a 'branch island', that will do a far jump to
+	 'foo'.  Branch islands are generated in
+	 macho_branch_islands().  */
+      sprintf (buf, "jbsr %%z%d,%.246s",
+	       dest_operand_number, IDENTIFIER_POINTER (labelname));
+    }
+  else
+    sprintf (buf, "bl %%z%d", dest_operand_number);
+  return buf;
+}
+
+/* Generate PIC and indirect symbol stubs.  */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+  unsigned int length;
+  char *symbol_name, *lazy_ptr_name;
+  char *local_label_0;
+  static int label = 0;
+
+  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
+  symb = (*targetm.strip_name_encoding) (symb);
+
+
+  length = strlen (symb);
+  symbol_name = XALLOCAVEC (char, length + 32);
+  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+  lazy_ptr_name = XALLOCAVEC (char, length + 32);
+  GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
+
+  if (flag_pic == 2)
+    switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
+  else
+    switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
+
+  if (flag_pic == 2)
+    {
+      fprintf (file, "\t.align 5\n");
+
+      fprintf (file, "%s:\n", stub);
+      fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+      label++;
+      local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
+      sprintf (local_label_0, "\"L%011d$spb\"", label);
+
+      fprintf (file, "\tmflr r0\n");
+      fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
+      fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
+      fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
+	       lazy_ptr_name, local_label_0);
+      fprintf (file, "\tmtlr r0\n");
+      fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
+	       (TARGET_64BIT ? "ldu" : "lwzu"),
+	       lazy_ptr_name, local_label_0);
+      fprintf (file, "\tmtctr r12\n");
+      fprintf (file, "\tbctr\n");
+    }
+  else
+    {
+      fprintf (file, "\t.align 4\n");
+
+      fprintf (file, "%s:\n", stub);
+      fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+      fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
+      fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
+	       (TARGET_64BIT ? "ldu" : "lwzu"),
+	       lazy_ptr_name);
+      fprintf (file, "\tmtctr r12\n");
+      fprintf (file, "\tbctr\n");
+    }
+
+  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+  fprintf (file, "%s:\n", lazy_ptr_name);
+  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+  fprintf (file, "%sdyld_stub_binding_helper\n",
+	   (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
+}
+
+/* Legitimize PIC addresses.  If the address is already
+   position-independent, we return ORIG.  Newly generated
+   position-independent addresses go into a reg.  This is REG if non
+   zero, otherwise we allocate register(s) as necessary.  */
+
+#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
+
+rtx
+rs6000_machopic_legitimize_pic_address (rtx orig, enum machine_mode mode,
+					rtx reg)
+{
+  rtx base, offset;
+
+  if (reg == NULL && ! reload_in_progress && ! reload_completed)
+    reg = gen_reg_rtx (Pmode);
+
+  if (GET_CODE (orig) == CONST)
+    {
+      rtx reg_temp;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+      /* Use a different reg for the intermediate value, as
+	 it will be marked UNCHANGING.  */
+      reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
+      base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
+						     Pmode, reg_temp);
+      offset =
+	rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
+						Pmode, reg);
+
+      if (GET_CODE (offset) == CONST_INT)
+	{
+	  if (SMALL_INT (offset))
+	    return plus_constant (base, INTVAL (offset));
+	  else if (! reload_in_progress && ! reload_completed)
+	    offset = force_reg (Pmode, offset);
+	  else
+	    {
+ 	      rtx mem = force_const_mem (Pmode, orig);
+	      return machopic_legitimize_pic_address (mem, Pmode, reg);
+	    }
+	}
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  /* Fall back on generic machopic code.  */
+  return machopic_legitimize_pic_address (orig, mode, reg);
+}
+
+/* Output a .machine directive for the Darwin assembler, and call
+   the generic start_file routine.  */
+
+static void
+rs6000_darwin_file_start (void)
+{
+  static const struct
+  {
+    const char *arg;
+    const char *name;
+    int if_set;
+  } mapping[] = {
+    { "ppc64", "ppc64", MASK_64BIT },
+    { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
+    { "power4", "ppc970", 0 },
+    { "G5", "ppc970", 0 },
+    { "7450", "ppc7450", 0 },
+    { "7400", "ppc7400", MASK_ALTIVEC },
+    { "G4", "ppc7400", 0 },
+    { "750", "ppc750", 0 },
+    { "740", "ppc750", 0 },
+    { "G3", "ppc750", 0 },
+    { "604e", "ppc604e", 0 },
+    { "604", "ppc604", 0 },
+    { "603e", "ppc603", 0 },
+    { "603", "ppc603", 0 },
+    { "601", "ppc601", 0 },
+    { NULL, "ppc", 0 } };
+  const char *cpu_id = "";
+  size_t i;
+
+  rs6000_file_start ();
+  darwin_file_start ();
+
+  /* Determine the argument to -mcpu=.  Default to G3 if not specified.  */
+  for (i = 0; i < ARRAY_SIZE (rs6000_select); i++)
+    if (rs6000_select[i].set_arch_p && rs6000_select[i].string
+	&& rs6000_select[i].string[0] != '\0')
+      cpu_id = rs6000_select[i].string;
+
+  /* Look through the mapping array.  Pick the first name that either
+     matches the argument, has a bit set in IF_SET that is also set
+     in the target flags, or has a NULL name.  */
+
+  i = 0;
+  while (mapping[i].arg != NULL
+	 && strcmp (mapping[i].arg, cpu_id) != 0
+	 && (mapping[i].if_set & target_flags) == 0)
+    i++;
+
+  fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
+}
+
+#endif /* TARGET_MACHO */
+
+#if TARGET_ELF
+static int
+rs6000_elf_reloc_rw_mask (void)
+{
+  if (flag_pic)
+    return 3;
+  else if (DEFAULT_ABI == ABI_AIX)
+    return 2;
+  else
+    return 0;
+}
+
+/* Record an element in the table of global constructors.  SYMBOL is
+   a SYMBOL_REF of the function to be called; PRIORITY is a number
+   between 0 and MAX_INIT_PRIORITY.
+
+   This differs from default_named_section_asm_out_constructor in
+   that we have special handling for -mrelocatable.  */
+
+static void
+rs6000_elf_asm_out_constructor (rtx symbol, int priority)
+{
+  const char *section = ".ctors";
+  char buf[16];
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".ctors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, SECTION_WRITE, NULL));
+  assemble_align (POINTER_SIZE);
+
+  if (TARGET_RELOCATABLE)
+    {
+      fputs ("\t.long (", asm_out_file);
+      output_addr_const (asm_out_file, symbol);
+      fputs (")@fixup\n", asm_out_file);
+    }
+  else
+    assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void
+rs6000_elf_asm_out_destructor (rtx symbol, int priority)
+{
+  const char *section = ".dtors";
+  char buf[16];
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".dtors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, SECTION_WRITE, NULL));
+  assemble_align (POINTER_SIZE);
+
+  if (TARGET_RELOCATABLE)
+    {
+      fputs ("\t.long (", asm_out_file);
+      output_addr_const (asm_out_file, symbol);
+      fputs (")@fixup\n", asm_out_file);
+    }
+  else
+    assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+void
+rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
+{
+  if (TARGET_64BIT)
+    {
+      fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
+      ASM_OUTPUT_LABEL (file, name);
+      fputs (DOUBLE_INT_ASM_OP, file);
+      rs6000_output_function_entry (file, name);
+      fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
+      if (DOT_SYMBOLS)
+	{
+	  fputs ("\t.size\t", file);
+	  assemble_name (file, name);
+	  fputs (",24\n\t.type\t.", file);
+	  assemble_name (file, name);
+	  fputs (",@function\n", file);
+	  if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
+	    {
+	      fputs ("\t.globl\t.", file);
+	      assemble_name (file, name);
+	      putc ('\n', file);
+	    }
+	}
+      else
+	ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+      ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
+      rs6000_output_function_entry (file, name);
+      fputs (":\n", file);
+      return;
+    }
+
+  if (TARGET_RELOCATABLE
+      && !TARGET_SECURE_PLT
+      && (get_pool_size () != 0 || crtl->profile)
+      && uses_TOC ())
+    {
+      char buf[256];
+
+      (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
+
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
+      fprintf (file, "\t.long ");
+      assemble_name (file, buf);
+      putc ('-', file);
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
+      assemble_name (file, buf);
+      putc ('\n', file);
+    }
+
+  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+  ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
+
+  if (DEFAULT_ABI == ABI_AIX)
+    {
+      const char *desc_name, *orig_name;
+
+      orig_name = (*targetm.strip_name_encoding) (name);
+      desc_name = orig_name;
+      while (*desc_name == '.')
+	desc_name++;
+
+      if (TREE_PUBLIC (decl))
+	fprintf (file, "\t.globl %s\n", desc_name);
+
+      fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+      fprintf (file, "%s:\n", desc_name);
+      fprintf (file, "\t.long %s\n", orig_name);
+      fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
+      if (DEFAULT_ABI == ABI_AIX)
+	fputs ("\t.long 0\n", file);
+      fprintf (file, "\t.previous\n");
+    }
+  ASM_OUTPUT_LABEL (file, name);
+}
+
+static void
+rs6000_elf_file_end (void)
+{
+#ifdef HAVE_AS_GNU_ATTRIBUTE
+  if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
+    {
+      if (rs6000_passes_float)
+	fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
+		 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1 
+		  : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3 
+		  : 2));
+      if (rs6000_passes_vector)
+	fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
+		 (TARGET_ALTIVEC_ABI ? 2
+		  : TARGET_SPE_ABI ? 3
+		  : 1));
+      if (rs6000_returns_struct)
+	fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
+		 aix_struct_return ? 2 : 1);
+    }
+#endif
+#ifdef POWERPC_LINUX
+  if (TARGET_32BIT)
+    file_end_indicate_exec_stack ();
+#endif
+}
+#endif
+
+#if TARGET_XCOFF
+static void
+rs6000_xcoff_asm_output_anchor (rtx symbol)
+{
+  char buffer[100];
+
+  sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
+	   SYMBOL_REF_BLOCK_OFFSET (symbol));
+  ASM_OUTPUT_DEF (asm_out_file, XSTR (symbol, 0), buffer);
+}
+
+static void
+rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
+{
+  fputs (GLOBAL_ASM_OP, stream);
+  RS6000_OUTPUT_BASENAME (stream, name);
+  putc ('\n', stream);
+}
+
+/* A get_unnamed_decl callback, used for read-only sections.  PTR
+   points to the section string variable.  */
+
+static void
+rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
+{
+  fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
+	   *(const char *const *) directive,
+	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
+}
+
+/* Likewise for read-write sections.  */
+
+static void
+rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
+{
+  fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
+	   *(const char *const *) directive,
+	   XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
+}
+
+/* A get_unnamed_section callback, used for switching to toc_section.  */
+
+static void
+rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
+{
+  if (TARGET_MINIMAL_TOC)
+    {
+      /* toc_section is always selected at least once from
+	 rs6000_xcoff_file_start, so this is guaranteed to
+	 always be defined once and only once in each file.  */
+      if (!toc_initialized)
+	{
+	  fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
+	  fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
+	  toc_initialized = 1;
+	}
+      fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
+	       (TARGET_32BIT ? "" : ",3"));
+    }
+  else
+    fputs ("\t.toc\n", asm_out_file);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+rs6000_xcoff_asm_init_sections (void)
+{
+  read_only_data_section
+    = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
+			   &xcoff_read_only_section_name);
+
+  private_data_section
+    = get_unnamed_section (SECTION_WRITE,
+			   rs6000_xcoff_output_readwrite_section_asm_op,
+			   &xcoff_private_data_section_name);
+
+  read_only_private_data_section
+    = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
+			   &xcoff_private_data_section_name);
+
+  toc_section
+    = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
+
+  readonly_data_section = read_only_data_section;
+  exception_section = data_section;
+}
+
+static int
+rs6000_xcoff_reloc_rw_mask (void)
+{
+  return 3;
+}
+
+static void
+rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
+				tree decl ATTRIBUTE_UNUSED)
+{
+  int smclass;
+  static const char * const suffix[3] = { "PR", "RO", "RW" };
+
+  if (flags & SECTION_CODE)
+    smclass = 0;
+  else if (flags & SECTION_WRITE)
+    smclass = 2;
+  else
+    smclass = 1;
+
+  fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
+	   (flags & SECTION_CODE) ? "." : "",
+	   name, suffix[smclass], flags & SECTION_ENTSIZE);
+}
+
+static section *
+rs6000_xcoff_select_section (tree decl, int reloc,
+			     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (decl_readonly_section (decl, reloc))
+    {
+      if (TREE_PUBLIC (decl))
+	return read_only_data_section;
+      else
+	return read_only_private_data_section;
+    }
+  else
+    {
+      if (TREE_PUBLIC (decl))
+	return data_section;
+      else
+	return private_data_section;
+    }
+}
+
+static void
+rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
+{
+  const char *name;
+
+  /* Use select_section for private and uninitialized data.  */
+  if (!TREE_PUBLIC (decl)
+      || DECL_COMMON (decl)
+      || DECL_INITIAL (decl) == NULL_TREE
+      || DECL_INITIAL (decl) == error_mark_node
+      || (flag_zero_initialized_in_bss
+	  && initializer_zerop (DECL_INITIAL (decl))))
+    return;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  name = (*targetm.strip_name_encoding) (name);
+  DECL_SECTION_NAME (decl) = build_string (strlen (name), name);
+}
+
+/* Select section for constant in constant pool.
+
+   On RS/6000, all constants are in the private read-only data area.
+   However, if this is being placed in the TOC it must be output as a
+   toc entry.  */
+
+static section *
+rs6000_xcoff_select_rtx_section (enum machine_mode mode, rtx x,
+				 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
+    return toc_section;
+  else
+    return read_only_private_data_section;
+}
+
+/* Remove any trailing [DS] or the like from the symbol name.  */
+
+static const char *
+rs6000_xcoff_strip_name_encoding (const char *name)
+{
+  size_t len;
+  if (*name == '*')
+    name++;
+  len = strlen (name);
+  if (name[len - 1] == ']')
+    return ggc_alloc_string (name, len - 4);
+  else
+    return name;
+}
+
+/* Section attributes.  AIX is always PIC.  */
+
+static unsigned int
+rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int align;
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+
+  /* Align to at least UNIT size.  */
+  if (flags & SECTION_CODE)
+    align = MIN_UNITS_PER_WORD;
+  else
+    /* Increase alignment of large objects if not already stricter.  */
+    align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
+		 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
+		 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
+
+  return flags | (exact_log2 (align) & SECTION_ENTSIZE);
+}
+
+/* Output at beginning of assembler file.
+
+   Initialize the section names for the RS/6000 at this point.
+
+   Specify filename, including full path, to assembler.
+
+   We want to go into the TOC section so at least one .toc will be emitted.
+   Also, in order to output proper .bs/.es pairs, we need at least one static
+   [RW] section emitted.
+
+   Finally, declare mcount when profiling to make the assembler happy.  */
+
+static void
+rs6000_xcoff_file_start (void)
+{
+  rs6000_gen_section_name (&xcoff_bss_section_name,
+			   main_input_filename, ".bss_");
+  rs6000_gen_section_name (&xcoff_private_data_section_name,
+			   main_input_filename, ".rw_");
+  rs6000_gen_section_name (&xcoff_read_only_section_name,
+			   main_input_filename, ".ro_");
+
+  fputs ("\t.file\t", asm_out_file);
+  output_quoted_string (asm_out_file, main_input_filename);
+  fputc ('\n', asm_out_file);
+  if (write_symbols != NO_DEBUG)
+    switch_to_section (private_data_section);
+  switch_to_section (text_section);
+  if (profile_flag)
+    fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
+  rs6000_file_start ();
+}
+
+/* Output at end of assembler file.
+   On the RS/6000, referencing data should automatically pull in text.  */
+
+static void
+rs6000_xcoff_file_end (void)
+{
+  switch_to_section (text_section);
+  fputs ("_section_.text:\n", asm_out_file);
+  switch_to_section (data_section);
+  fputs (TARGET_32BIT
+	 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
+	 asm_out_file);
+}
+#endif /* TARGET_XCOFF */
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+rs6000_rtx_costs (rtx x, int code, int outer_code, int *total,
+		  bool speed)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+      /* On the RS/6000, if it is valid in the insn, it is free.  */
+    case CONST_INT:
+      if (((outer_code == SET
+	    || outer_code == PLUS
+	    || outer_code == MINUS)
+	   && (satisfies_constraint_I (x)
+	       || satisfies_constraint_L (x)))
+	  || (outer_code == AND
+	      && (satisfies_constraint_K (x)
+		  || (mode == SImode
+		      ? satisfies_constraint_L (x)
+		      : satisfies_constraint_J (x))
+		  || mask_operand (x, mode)
+		  || (mode == DImode
+		      && mask64_operand (x, DImode))))
+	  || ((outer_code == IOR || outer_code == XOR)
+	      && (satisfies_constraint_K (x)
+		  || (mode == SImode
+		      ? satisfies_constraint_L (x)
+		      : satisfies_constraint_J (x))))
+	  || outer_code == ASHIFT
+	  || outer_code == ASHIFTRT
+	  || outer_code == LSHIFTRT
+	  || outer_code == ROTATE
+	  || outer_code == ROTATERT
+	  || outer_code == ZERO_EXTRACT
+	  || (outer_code == MULT
+	      && satisfies_constraint_I (x))
+	  || ((outer_code == DIV || outer_code == UDIV
+	       || outer_code == MOD || outer_code == UMOD)
+	      && exact_log2 (INTVAL (x)) >= 0)
+	  || (outer_code == COMPARE
+	      && (satisfies_constraint_I (x)
+		  || satisfies_constraint_K (x)))
+	  || ((outer_code == EQ || outer_code == NE)
+	      && (satisfies_constraint_I (x)
+		  || satisfies_constraint_K (x)
+		  || (mode == SImode
+		      ? satisfies_constraint_L (x)
+		      : satisfies_constraint_J (x))))
+	  || (outer_code == GTU
+	      && satisfies_constraint_I (x))
+	  || (outer_code == LTU
+	      && satisfies_constraint_P (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      else if ((outer_code == PLUS
+		&& reg_or_add_cint_operand (x, VOIDmode))
+	       || (outer_code == MINUS
+		   && reg_or_sub_cint_operand (x, VOIDmode))
+	       || ((outer_code == SET
+		    || outer_code == IOR
+		    || outer_code == XOR)
+		   && (INTVAL (x)
+		       & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST_DOUBLE:
+      if (mode == DImode && code == CONST_DOUBLE)
+	{
+	  if ((outer_code == IOR || outer_code == XOR)
+	      && CONST_DOUBLE_HIGH (x) == 0
+	      && (CONST_DOUBLE_LOW (x)
+		  & ~ (unsigned HOST_WIDE_INT) 0xffff) == 0)
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  else if ((outer_code == AND && and64_2_operand (x, DImode))
+		   || ((outer_code == SET
+			|| outer_code == IOR
+			|| outer_code == XOR)
+		       && CONST_DOUBLE_HIGH (x) == 0))
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case HIGH:
+    case SYMBOL_REF:
+    case MEM:
+      /* When optimizing for size, MEM should be slightly more expensive
+	 than generating address, e.g., (plus (reg) (const)).
+	 L1 cache latency is about two instructions.  */
+      *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
+      return true;
+
+    case LABEL_REF:
+      *total = 0;
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (FLOAT_MODE_P (mode))
+	*total = rs6000_cost->fp;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case MULT:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && satisfies_constraint_I (XEXP (x, 1)))
+	{
+	  if (INTVAL (XEXP (x, 1)) >= -256
+	      && INTVAL (XEXP (x, 1)) <= 255)
+	    *total = rs6000_cost->mulsi_const9;
+	  else
+	    *total = rs6000_cost->mulsi_const;
+	}
+      else if (mode == SFmode)
+	*total = rs6000_cost->fp;
+      else if (FLOAT_MODE_P (mode))
+	*total = rs6000_cost->dmul;
+      else if (mode == DImode)
+	*total = rs6000_cost->muldi;
+      else
+	*total = rs6000_cost->mulsi;
+      return false;
+
+    case FMA:
+      if (mode == SFmode)
+	*total = rs6000_cost->fp;
+      else
+	*total = rs6000_cost->dmul;
+      break;
+
+    case DIV:
+    case MOD:
+      if (FLOAT_MODE_P (mode))
+	{
+	  *total = mode == DFmode ? rs6000_cost->ddiv
+				  : rs6000_cost->sdiv;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case UDIV:
+    case UMOD:
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
+	{
+	  if (code == DIV || code == MOD)
+	    /* Shift, addze */
+	    *total = COSTS_N_INSNS (2);
+	  else
+	    /* Shift */
+	    *total = COSTS_N_INSNS (1);
+	}
+      else
+	{
+	  if (GET_MODE (XEXP (x, 1)) == DImode)
+	    *total = rs6000_cost->divdi;
+	  else
+	    *total = rs6000_cost->divsi;
+	}
+      /* Add in shift and subtract for MOD. */
+      if (code == MOD || code == UMOD)
+	*total += COSTS_N_INSNS (2);
+      return false;
+
+    case CTZ:
+    case FFS:
+      *total = COSTS_N_INSNS (4);
+      return false;
+
+    case POPCOUNT:
+      *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
+      return false;
+
+    case PARITY:
+      *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
+      return false;
+
+    case NOT:
+      if (outer_code == AND || outer_code == IOR || outer_code == XOR)
+	{
+	  *total = 0;
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case AND:
+    case CLZ:
+    case IOR:
+    case XOR:
+    case ZERO_EXTRACT:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+      /* Handle mul_highpart.  */
+      if (outer_code == TRUNCATE
+	  && GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  if (mode == DImode)
+	    *total = rs6000_cost->muldi;
+	  else
+	    *total = rs6000_cost->mulsi;
+	  return true;
+	}
+      else if (outer_code == AND)
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      if (GET_CODE (XEXP (x, 0)) == MEM)
+	*total = 0;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case COMPARE:
+    case NEG:
+    case ABS:
+      if (!FLOAT_MODE_P (mode))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+    case FLOAT_TRUNCATE:
+      *total = rs6000_cost->fp;
+      return false;
+
+    case FLOAT_EXTEND:
+      if (mode == DFmode)
+	*total = 0;
+      else
+	*total = rs6000_cost->fp;
+      return false;
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_FRSP:
+	  *total = rs6000_cost->fp;
+	  return true;
+
+	default:
+	  break;
+	}
+      break;
+
+    case CALL:
+    case IF_THEN_ELSE:
+      if (!speed)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else if (FLOAT_MODE_P (mode)
+	       && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
+	{
+	  *total = rs6000_cost->fp;
+	  return false;
+	}
+      break;
+
+    case EQ:
+    case GTU:
+    case LTU:
+      /* Carry bit requires mode == Pmode.
+	 NEG or PLUS already counted so only add one.  */
+      if (mode == Pmode
+	  && (outer_code == NEG || outer_code == PLUS))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      if (outer_code == SET)
+	{
+	  if (XEXP (x, 1) == const0_rtx)
+	    {
+	      if (TARGET_ISEL && !TARGET_MFCRF)
+		*total = COSTS_N_INSNS (8);
+	      else
+		*total = COSTS_N_INSNS (2);
+	      return true;
+	    }
+	  else if (mode == Pmode)
+	    {
+	      *total = COSTS_N_INSNS (3);
+	      return false;
+	    }
+	}
+      /* FALLTHRU */
+
+    case GT:
+    case LT:
+    case UNORDERED:
+      if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
+	{
+	  if (TARGET_ISEL && !TARGET_MFCRF)
+	    *total = COSTS_N_INSNS (8);
+	  else
+	    *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+      /* CC COMPARE.  */
+      if (outer_code == COMPARE)
+	{
+	  *total = 0;
+	  return true;
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
+/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost.  */
+
+static bool
+rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int *total,
+			bool speed)
+{
+  bool ret = rs6000_rtx_costs (x, code, outer_code, total, speed);
+
+  fprintf (stderr,
+	   "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
+	   "total = %d, speed = %s, x:\n",
+	   ret ? "complete" : "scan inner",
+	   GET_RTX_NAME (code),
+	   GET_RTX_NAME (outer_code),
+	   *total,
+	   speed ? "true" : "false");
+
+  debug_rtx (x);
+
+  return ret;
+}
+
+/* Debug form of ADDRESS_COST that is selected if -mdebug=cost.  */
+
+static int
+rs6000_debug_address_cost (rtx x, bool speed)
+{
+  int ret = TARGET_ADDRESS_COST (x, speed);
+
+  fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
+	   ret, speed ? "true" : "false");
+  debug_rtx (x);
+
+  return ret;
+}
+
+
+/* A C expression returning the cost of moving data from a register of class
+   CLASS1 to one of CLASS2.  */
+
+static int
+rs6000_register_move_cost (enum machine_mode mode,
+			   reg_class_t from, reg_class_t to)
+{
+  int ret;
+
+  /*  Moves from/to GENERAL_REGS.  */
+  if (reg_classes_intersect_p (to, GENERAL_REGS)
+      || reg_classes_intersect_p (from, GENERAL_REGS))
+    {
+      if (! reg_classes_intersect_p (to, GENERAL_REGS))
+	from = to;
+
+      if (from == FLOAT_REGS || from == ALTIVEC_REGS || from == VSX_REGS)
+	ret = (rs6000_memory_move_cost (mode, from, false)
+	       + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+
+      /* It's more expensive to move CR_REGS than CR0_REGS because of the
+	 shift.  */
+      else if (from == CR_REGS)
+	ret = 4;
+
+      /* For those processors that have slow LR/CTR moves, make them more
+	 expensive than memory in order to bias spills to memory .*/
+      else if ((rs6000_cpu == PROCESSOR_POWER6
+		|| rs6000_cpu == PROCESSOR_POWER7)
+	       && reg_classes_intersect_p (from, LINK_OR_CTR_REGS))
+        ret = 6 * hard_regno_nregs[0][mode];
+
+      else
+	/* A move will cost one instruction per GPR moved.  */
+	ret = 2 * hard_regno_nregs[0][mode];
+    }
+
+  /* If we have VSX, we can easily move between FPR or Altivec registers.  */
+  else if (VECTOR_UNIT_VSX_P (mode)
+	   && reg_classes_intersect_p (to, VSX_REGS)
+	   && reg_classes_intersect_p (from, VSX_REGS))
+    ret = 2 * hard_regno_nregs[32][mode];
+
+  /* Moving between two similar registers is just one instruction.  */
+  else if (reg_classes_intersect_p (to, from))
+    ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
+
+  /* Everything else has to go through GENERAL_REGS.  */
+  else
+    ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
+	   + rs6000_register_move_cost (mode, from, GENERAL_REGS));
+
+  if (TARGET_DEBUG_COST)
+    fprintf (stderr,
+	     "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
+	     ret, GET_MODE_NAME (mode), reg_class_names[from],
+	     reg_class_names[to]);
+
+  return ret;
+}
+
+/* A C expressions returning the cost of moving data of MODE from a register to
+   or from memory.  */
+
+static int
+rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+			 bool in ATTRIBUTE_UNUSED)
+{
+  int ret;
+
+  if (reg_classes_intersect_p (rclass, GENERAL_REGS))
+    ret = 4 * hard_regno_nregs[0][mode];
+  else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
+    ret = 4 * hard_regno_nregs[32][mode];
+  else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
+    ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
+  else
+    ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
+
+  if (TARGET_DEBUG_COST)
+    fprintf (stderr,
+	     "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
+	     ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
+
+  return ret;
+}
+
+/* Returns a code for a target-specific builtin that implements
+   reciprocal of the function, or NULL_TREE if not available.  */
+
+static tree
+rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
+			   bool sqrt ATTRIBUTE_UNUSED)
+{
+  if (optimize_insn_for_size_p ())
+    return NULL_TREE;
+
+  if (md_fn)
+    switch (fn)
+      {
+      case VSX_BUILTIN_XVSQRTDP:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[VSX_BUILTIN_VEC_RSQRT_V2DF];
+
+      case VSX_BUILTIN_XVSQRTSP:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[VSX_BUILTIN_VEC_RSQRT_V4SF];
+
+      default:
+	return NULL_TREE;
+      }
+
+  else
+    switch (fn)
+      {
+      case BUILT_IN_SQRT:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
+
+      case BUILT_IN_SQRTF:
+	if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
+	  return NULL_TREE;
+
+	return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
+
+      default:
+	return NULL_TREE;
+      }
+}
+
+/* Load up a constant.  If the mode is a vector mode, splat the value across
+   all of the vector elements.  */
+
+static rtx
+rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
+{
+  rtx reg;
+
+  if (mode == SFmode || mode == DFmode)
+    {
+      rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
+      reg = force_reg (mode, d);
+    }
+  else if (mode == V4SFmode)
+    {
+      rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
+      rtvec v = gen_rtvec (4, d, d, d, d);
+      reg = gen_reg_rtx (mode);
+      rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
+    }
+  else if (mode == V2DFmode)
+    {
+      rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
+      rtvec v = gen_rtvec (2, d, d);
+      reg = gen_reg_rtx (mode);
+      rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
+    }
+  else
+    gcc_unreachable ();
+
+  return reg;
+}
+
+/* Generate an FMA instruction.  */
+
+static void
+rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
+{
+  enum machine_mode mode = GET_MODE (target);
+  rtx dst;
+
+  dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
+  gcc_assert (dst != NULL);
+
+  if (dst != target)
+    emit_move_insn (target, dst);
+}
+
+/* Generate a FMSUB instruction: dst = fma(m1, m2, -a).  */
+
+static void
+rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
+{
+  enum machine_mode mode = GET_MODE (target);
+  rtx dst;
+
+  /* Altivec does not support fms directly;
+     generate in terms of fma in that case.  */
+  if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
+    dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
+  else
+    {
+      a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
+      dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
+    }
+  gcc_assert (dst != NULL);
+
+  if (dst != target)
+    emit_move_insn (target, dst);
+}
+    
+/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a).  */
+
+static void
+rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx r;
+
+  /* This is a tad more complicated, since the fnma_optab is for
+     a different expression: fma(-m1, m2, a), which is the same
+     thing except in the case of signed zeros.
+
+     Fortunately we know that if FMA is supported that FNMSUB is
+     also supported in the ISA.  Just expand it directly.  */
+
+  gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
+
+  r = gen_rtx_NEG (mode, a);
+  r = gen_rtx_FMA (mode, m1, m2, r);
+  r = gen_rtx_NEG (mode, r);
+  emit_insn (gen_rtx_SET (VOIDmode, dst, r));
+}
+
+/* Newton-Raphson approximation of floating point divide with just 2 passes
+   (either single precision floating point, or newer machines with higher
+   accuracy estimates).  Support both scalar and vector divide.  Assumes no
+   trapping math and finite arguments.  */
+
+static void
+rs6000_emit_swdiv_high_precision (rtx dst, rtx n, rtx d)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx x0, e0, e1, y1, u0, v0;
+  enum insn_code code = optab_handler (smul_optab, mode);
+  gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code);
+  rtx one = rs6000_load_constant_and_splat (mode, dconst1);
+
+  gcc_assert (code != CODE_FOR_nothing);
+
+  /* x0 = 1./d estimate */
+  x0 = gen_reg_rtx (mode);
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
+					  UNSPEC_FRES)));
+
+  e0 = gen_reg_rtx (mode);
+  rs6000_emit_nmsub (e0, d, x0, one);		/* e0 = 1. - (d * x0) */
+
+  e1 = gen_reg_rtx (mode);
+  rs6000_emit_madd (e1, e0, e0, e0);		/* e1 = (e0 * e0) + e0 */
+
+  y1 = gen_reg_rtx (mode);
+  rs6000_emit_madd (y1, e1, x0, x0);		/* y1 = (e1 * x0) + x0 */
+
+  u0 = gen_reg_rtx (mode);
+  emit_insn (gen_mul (u0, n, y1));		/* u0 = n * y1 */
+
+  v0 = gen_reg_rtx (mode);
+  rs6000_emit_nmsub (v0, d, u0, n);		/* v0 = n - (d * u0) */
+
+  rs6000_emit_madd (dst, v0, y1, u0);		/* dst = (v0 * y1) + u0 */
+}
+
+/* Newton-Raphson approximation of floating point divide that has a low
+   precision estimate.  Assumes no trapping math and finite arguments.  */
+
+static void
+rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
+  enum insn_code code = optab_handler (smul_optab, mode);
+  gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code);
+
+  gcc_assert (code != CODE_FOR_nothing);
+
+  one = rs6000_load_constant_and_splat (mode, dconst1);
+
+  /* x0 = 1./d estimate */
+  x0 = gen_reg_rtx (mode);
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
+					  UNSPEC_FRES)));
+
+  e0 = gen_reg_rtx (mode);
+  rs6000_emit_nmsub (e0, d, x0, one);		/* e0 = 1. - d * x0 */
+
+  y1 = gen_reg_rtx (mode);
+  rs6000_emit_madd (y1, e0, x0, x0);		/* y1 = x0 + e0 * x0 */
+
+  e1 = gen_reg_rtx (mode);
+  emit_insn (gen_mul (e1, e0, e0));		/* e1 = e0 * e0 */
+
+  y2 = gen_reg_rtx (mode);
+  rs6000_emit_madd (y2, e1, y1, y1);		/* y2 = y1 + e1 * y1 */
+
+  e2 = gen_reg_rtx (mode);
+  emit_insn (gen_mul (e2, e1, e1));		/* e2 = e1 * e1 */
+
+  y3 = gen_reg_rtx (mode);
+  rs6000_emit_madd (y3, e2, y2, y2);		/* y3 = y2 + e2 * y2 */
+
+  u0 = gen_reg_rtx (mode);
+  emit_insn (gen_mul (u0, n, y3));		/* u0 = n * y3 */
+
+  v0 = gen_reg_rtx (mode);
+  rs6000_emit_nmsub (v0, d, u0, n);		/* v0 = n - d * u0 */
+
+  rs6000_emit_madd (dst, v0, y3, u0);		/* dst = u0 + v0 * y3 */
+}
+
+/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
+   add a reg_note saying that this was a division.  Support both scalar and
+   vector divide.  Assumes no trapping math and finite arguments.  */
+
+void
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
+{
+  enum machine_mode mode = GET_MODE (dst);
+
+  if (RS6000_RECIP_HIGH_PRECISION_P (mode))
+    rs6000_emit_swdiv_high_precision (dst, n, d);
+  else
+    rs6000_emit_swdiv_low_precision (dst, n, d);
+
+  if (note_p)
+    add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
+}
+
+/* Newton-Raphson approximation of single/double-precision floating point
+   rsqrt.  Assumes no trapping math and finite arguments.  */
+
+void
+rs6000_emit_swrsqrt (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (src);
+  rtx x0 = gen_reg_rtx (mode);
+  rtx y = gen_reg_rtx (mode);
+  int passes = (TARGET_RECIP_PRECISION) ? 2 : 3;
+  REAL_VALUE_TYPE dconst3_2;
+  int i;
+  rtx halfthree;
+  enum insn_code code = optab_handler (smul_optab, mode);
+  gen_2arg_fn_t gen_mul = (gen_2arg_fn_t) GEN_FCN (code);
+
+  gcc_assert (code != CODE_FOR_nothing);
+
+  /* Load up the constant 1.5 either as a scalar, or as a vector.  */
+  real_from_integer (&dconst3_2, VOIDmode, 3, 0, 0);
+  SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
+
+  halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
+
+  /* x0 = rsqrt estimate */
+  emit_insn (gen_rtx_SET (VOIDmode, x0,
+			  gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
+					  UNSPEC_RSQRT)));
+
+  /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
+  rs6000_emit_msub (y, src, halfthree, src);
+
+  for (i = 0; i < passes; i++)
+    {
+      rtx x1 = gen_reg_rtx (mode);
+      rtx u = gen_reg_rtx (mode);
+      rtx v = gen_reg_rtx (mode);
+
+      /* x1 = x0 * (1.5 - y * (x0 * x0)) */
+      emit_insn (gen_mul (u, x0, x0));
+      rs6000_emit_nmsub (v, y, u, halfthree);
+      emit_insn (gen_mul (x1, x0, v));
+      x0 = x1;
+    }
+
+  emit_move_insn (dst, x0);
+  return;
+}
+
+/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
+   (Power7) targets.  DST is the target, and SRC is the argument operand.  */
+
+void
+rs6000_emit_popcount (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx tmp1, tmp2;
+
+  /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can.  */
+  if (TARGET_POPCNTD)
+    {
+      if (mode == SImode)
+	emit_insn (gen_popcntdsi2 (dst, src));
+      else
+	emit_insn (gen_popcntddi2 (dst, src));
+      return;
+    }
+
+  tmp1 = gen_reg_rtx (mode);
+
+  if (mode == SImode)
+    {
+      emit_insn (gen_popcntbsi2 (tmp1, src));
+      tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
+			   NULL_RTX, 0);
+      tmp2 = force_reg (SImode, tmp2);
+      emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
+    }
+  else
+    {
+      emit_insn (gen_popcntbdi2 (tmp1, src));
+      tmp2 = expand_mult (DImode, tmp1,
+			  GEN_INT ((HOST_WIDE_INT)
+				   0x01010101 << 32 | 0x01010101),
+			  NULL_RTX, 0);
+      tmp2 = force_reg (DImode, tmp2);
+      emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
+    }
+}
+
+
+/* Emit parity intrinsic on TARGET_POPCNTB targets.  DST is the
+   target, and SRC is the argument operand.  */
+
+void
+rs6000_emit_parity (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  rtx tmp;
+
+  tmp = gen_reg_rtx (mode);
+
+  /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can.  */
+  if (TARGET_CMPB)
+    {
+      if (mode == SImode)
+	{
+	  emit_insn (gen_popcntbsi2 (tmp, src));
+	  emit_insn (gen_paritysi2_cmpb (dst, tmp));
+	}
+      else
+	{
+	  emit_insn (gen_popcntbdi2 (tmp, src));
+	  emit_insn (gen_paritydi2_cmpb (dst, tmp));
+	}
+      return;
+    }
+
+  if (mode == SImode)
+    {
+      /* Is mult+shift >= shift+xor+shift+xor?  */
+      if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
+	{
+	  rtx tmp1, tmp2, tmp3, tmp4;
+
+	  tmp1 = gen_reg_rtx (SImode);
+	  emit_insn (gen_popcntbsi2 (tmp1, src));
+
+	  tmp2 = gen_reg_rtx (SImode);
+	  emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
+	  tmp3 = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
+
+	  tmp4 = gen_reg_rtx (SImode);
+	  emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
+	  emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
+	}
+      else
+	rs6000_emit_popcount (tmp, src);
+      emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
+    }
+  else
+    {
+      /* Is mult+shift >= shift+xor+shift+xor+shift+xor?  */
+      if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
+	{
+	  rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+
+	  tmp1 = gen_reg_rtx (DImode);
+	  emit_insn (gen_popcntbdi2 (tmp1, src));
+
+	  tmp2 = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
+	  tmp3 = gen_reg_rtx (DImode);
+	  emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
+
+	  tmp4 = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
+	  tmp5 = gen_reg_rtx (DImode);
+	  emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
+
+	  tmp6 = gen_reg_rtx (DImode);
+	  emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
+	  emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
+	}
+      else
+        rs6000_emit_popcount (tmp, src);
+      emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
+    }
+}
+
+/* Return an RTX representing where to find the function value of a
+   function returning MODE.  */
+static rtx
+rs6000_complex_function_value (enum machine_mode mode)
+{
+  unsigned int regno;
+  rtx r1, r2;
+  enum machine_mode inner = GET_MODE_INNER (mode);
+  unsigned int inner_bytes = GET_MODE_SIZE (inner);
+
+  if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
+    regno = FP_ARG_RETURN;
+  else
+    {
+      regno = GP_ARG_RETURN;
+
+      /* 32-bit is OK since it'll go in r3/r4.  */
+      if (TARGET_32BIT && inner_bytes >= 4)
+	return gen_rtx_REG (mode, regno);
+    }
+
+  if (inner_bytes >= 8)
+    return gen_rtx_REG (mode, regno);
+
+  r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
+			  const0_rtx);
+  r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
+			  GEN_INT (inner_bytes));
+  return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
+}
+
+/* Target hook for TARGET_FUNCTION_VALUE.
+
+   On the SPE, both FPs and vectors are returned in r3.
+
+   On RS/6000 an integer value is in r3 and a floating-point value is in
+   fp1, unless -msoft-float.  */
+
+rtx
+rs6000_function_value (const_tree valtype,
+		       const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		       bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  unsigned int regno;
+
+  /* Special handling for structs in darwin64.  */
+  if (TARGET_MACHO 
+      && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
+    {
+      CUMULATIVE_ARGS valcum;
+      rtx valret;
+
+      valcum.words = 0;
+      valcum.fregno = FP_ARG_MIN_REG;
+      valcum.vregno = ALTIVEC_ARG_MIN_REG;
+      /* Do a trial code generation as if this were going to be passed as
+	 an argument; if any part goes in memory, we return NULL.  */
+      valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
+      if (valret)
+	return valret;
+      /* Otherwise fall through to standard ABI rules.  */
+    }
+
+  if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
+    {
+      /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
+      return gen_rtx_PARALLEL (DImode,
+	gen_rtvec (2,
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode, GP_ARG_RETURN),
+				      const0_rtx),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 1),
+				      GEN_INT (4))));
+    }
+  if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
+    {
+      return gen_rtx_PARALLEL (DCmode,
+	gen_rtvec (4,
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode, GP_ARG_RETURN),
+				      const0_rtx),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 1),
+				      GEN_INT (4)),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 2),
+				      GEN_INT (8)),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 3),
+				      GEN_INT (12))));
+    }
+
+  mode = TYPE_MODE (valtype);
+  if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
+      || POINTER_TYPE_P (valtype))
+    mode = TARGET_32BIT ? SImode : DImode;
+
+  if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
+    /* _Decimal128 must use an even/odd register pair.  */
+    regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+  else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
+	   && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
+    regno = FP_ARG_RETURN;
+  else if (TREE_CODE (valtype) == COMPLEX_TYPE
+	   && targetm.calls.split_complex_arg)
+    return rs6000_complex_function_value (mode);
+  /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
+     return register is used in both cases, and we won't see V2DImode/V2DFmode
+     for pure altivec, combine the two cases.  */
+  else if (TREE_CODE (valtype) == VECTOR_TYPE
+	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
+	   && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
+    regno = ALTIVEC_ARG_RETURN;
+  else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
+	   && (mode == DFmode || mode == DCmode
+	       || mode == TFmode || mode == TCmode))
+    return spe_build_register_parallel (mode, GP_ARG_RETURN);
+  else
+    regno = GP_ARG_RETURN;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+rtx
+rs6000_libcall_value (enum machine_mode mode)
+{
+  unsigned int regno;
+
+  if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
+    {
+      /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
+      return gen_rtx_PARALLEL (DImode,
+	gen_rtvec (2,
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode, GP_ARG_RETURN),
+				      const0_rtx),
+		   gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SImode,
+						   GP_ARG_RETURN + 1),
+				      GEN_INT (4))));
+    }
+
+  if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
+    /* _Decimal128 must use an even/odd register pair.  */
+    regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+  else if (SCALAR_FLOAT_MODE_P (mode)
+	   && TARGET_HARD_FLOAT && TARGET_FPRS
+           && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
+    regno = FP_ARG_RETURN;
+  /* VSX is a superset of Altivec and adds V2DImode/V2DFmode.  Since the same
+     return register is used in both cases, and we won't see V2DImode/V2DFmode
+     for pure altivec, combine the two cases.  */
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	   && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
+    regno = ALTIVEC_ARG_RETURN;
+  else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
+    return rs6000_complex_function_value (mode);
+  else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
+	   && (mode == DFmode || mode == DCmode
+	       || mode == TFmode || mode == TCmode))
+    return spe_build_register_parallel (mode, GP_ARG_RETURN);
+  else
+    regno = GP_ARG_RETURN;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   For the RS/6000, if frame pointer elimination is being done, we would like
+   to convert ap into fp, not sp.
+
+   We need r30 if -mminimal-toc was specified, and there are constant pool
+   references.  */
+
+bool
+rs6000_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : from == RS6000_PIC_OFFSET_TABLE_REGNUM
+            ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
+            : true);
+}
+
+/* Define the offset between two registers, FROM to be eliminated and its
+   replacement TO, at the start of a routine.  */
+HOST_WIDE_INT
+rs6000_initial_elimination_offset (int from, int to)
+{
+  rs6000_stack_t *info = rs6000_stack_info ();
+  HOST_WIDE_INT offset;
+
+  if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    offset = info->push_p ? 0 : -info->total_size;
+  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+      offset = info->push_p ? 0 : -info->total_size;
+      if (FRAME_GROWS_DOWNWARD)
+	offset += info->fixed_size + info->vars_size + info->parm_size;
+    }
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    offset = FRAME_GROWS_DOWNWARD
+	     ? info->fixed_size + info->vars_size + info->parm_size
+	     : 0;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    offset = info->total_size;
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    offset = info->push_p ? info->total_size : 0;
+  else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
+    offset = 0;
+  else
+    gcc_unreachable ();
+
+  return offset;
+}
+
+static rtx
+rs6000_dwarf_register_span (rtx reg)
+{
+  rtx parts[8];
+  int i, words;
+  unsigned regno = REGNO (reg);
+  enum machine_mode mode = GET_MODE (reg);
+
+  if (TARGET_SPE
+      && regno < 32
+      && (SPE_VECTOR_MODE (GET_MODE (reg))
+	  || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
+	      && mode != SFmode && mode != SDmode && mode != SCmode)))
+    ;
+  else
+    return NULL_RTX;
+
+  regno = REGNO (reg);
+
+  /* The duality of the SPE register size wreaks all kinds of havoc.
+     This is a way of distinguishing r0 in 32-bits from r0 in
+     64-bits.  */
+  words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
+  gcc_assert (words <= 4);
+  for (i = 0; i < words; i++, regno++)
+    {
+      if (BYTES_BIG_ENDIAN)
+	{
+	  parts[2 * i] = gen_rtx_REG (SImode, regno + 1200);
+	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
+	}
+      else
+	{
+	  parts[2 * i] = gen_rtx_REG (SImode, regno);
+	  parts[2 * i + 1] = gen_rtx_REG (SImode, regno + 1200);
+	}
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
+}
+
+/* Fill in sizes for SPE register high parts in table used by unwinder.  */
+
+static void
+rs6000_init_dwarf_reg_sizes_extra (tree address)
+{
+  if (TARGET_SPE)
+    {
+      int i;
+      enum machine_mode mode = TYPE_MODE (char_type_node);
+      rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      rtx mem = gen_rtx_MEM (BLKmode, addr);
+      rtx value = gen_int_mode (4, mode);
+
+      for (i = 1201; i < 1232; i++)
+	{
+	  int column = DWARF_REG_TO_UNWIND_COLUMN (i);
+	  HOST_WIDE_INT offset
+	    = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
+
+	  emit_move_insn (adjust_address (mem, mode, offset), value);
+	}
+    }
+}
+
+/* Map internal gcc register numbers to DWARF2 register numbers.  */
+
+unsigned int
+rs6000_dbx_register_number (unsigned int regno)
+{
+  if (regno <= 63 || write_symbols != DWARF2_DEBUG)
+    return regno;
+  if (regno == MQ_REGNO)
+    return 100;
+  if (regno == LR_REGNO)
+    return 108;
+  if (regno == CTR_REGNO)
+    return 109;
+  if (CR_REGNO_P (regno))
+    return regno - CR0_REGNO + 86;
+  if (regno == CA_REGNO)
+    return 101;  /* XER */
+  if (ALTIVEC_REGNO_P (regno))
+    return regno - FIRST_ALTIVEC_REGNO + 1124;
+  if (regno == VRSAVE_REGNO)
+    return 356;
+  if (regno == VSCR_REGNO)
+    return 67;
+  if (regno == SPE_ACC_REGNO)
+    return 99;
+  if (regno == SPEFSCR_REGNO)
+    return 612;
+  /* SPE high reg number.  We get these values of regno from
+     rs6000_dwarf_register_span.  */
+  gcc_assert (regno >= 1200 && regno < 1232);
+  return regno;
+}
+
+/* target hook eh_return_filter_mode */
+static enum machine_mode
+rs6000_eh_return_filter_mode (void)
+{
+  return TARGET_32BIT ? SImode : word_mode;
+}
+
+/* Target hook for scalar_mode_supported_p.  */
+static bool
+rs6000_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+  else
+    return default_scalar_mode_supported_p (mode);
+}
+
+/* Target hook for vector_mode_supported_p.  */
+static bool
+rs6000_vector_mode_supported_p (enum machine_mode mode)
+{
+
+  if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
+    return true;
+
+  if (TARGET_SPE && SPE_VECTOR_MODE (mode))
+    return true;
+
+  else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
+    return true;
+
+  else
+    return false;
+}
+
+/* Target hook for invalid_arg_for_unprototyped_fn. */
+static const char *
+invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
+{
+  return (!rs6000_darwin64_abi
+	  && typelist == 0
+          && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
+          && (funcdecl == NULL_TREE
+              || (TREE_CODE (funcdecl) == FUNCTION_DECL
+                  && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
+	  ? N_("AltiVec argument passed to unprototyped function")
+	  : NULL;
+}
+
+/* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
+   setup by using __stack_chk_fail_local hidden function instead of
+   calling __stack_chk_fail directly.  Otherwise it is better to call
+   __stack_chk_fail directly.  */
+
+static tree
+rs6000_stack_protect_fail (void)
+{
+  return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
+	 ? default_hidden_stack_protect_fail ()
+	 : default_external_stack_protect_fail ();
+}
+
+void
+rs6000_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+			   int num_operands ATTRIBUTE_UNUSED)
+{
+  if (rs6000_warn_cell_microcode)
+    {
+      const char *temp;
+      int insn_code_number = recog_memoized (insn);
+      location_t location = locator_location (INSN_LOCATOR (insn));
+
+      /* Punt on insns we cannot recognize.  */
+      if (insn_code_number < 0)
+	return;
+
+      temp = get_insn_template (insn_code_number, insn);
+
+      if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
+	warning_at (location, OPT_mwarn_cell_microcode,
+		    "emitting microcode insn %s\t[%s] #%d",
+		    temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); 
+      else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
+	warning_at (location, OPT_mwarn_cell_microcode,
+		    "emitting conditional microcode insn %s\t[%s] #%d",
+		    temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
+    }
+}
+
+
+/* Mask options that we want to support inside of attribute((target)) and
+   #pragma GCC target operations.  Note, we do not include things like
+   64/32-bit, endianess, hard/soft floating point, etc. that would have
+   different calling sequences.  */
+
+struct rs6000_opt_mask {
+  const char *name;		/* option name */
+  int mask;			/* mask to set */
+  bool invert;			/* invert sense of mask */
+  bool valid_target;		/* option is a target option */
+};
+
+static struct rs6000_opt_mask const rs6000_opt_masks[] =
+{
+  { "altivec",		MASK_ALTIVEC,		false, true  },
+  { "cmpb",		MASK_CMPB,		false, true  },
+  { "dlmzb",		MASK_DLMZB,		false, true  },
+  { "fprnd",		MASK_FPRND,		false, true  },
+  { "hard-dfp",		MASK_DFP,		false, true  },
+  { "isel",		MASK_ISEL,		false, true  },
+  { "mfcrf",		MASK_MFCRF,		false, true  },
+  { "mfpgpr",		MASK_MFPGPR,		false, true  },
+  { "mulhw",		MASK_MULHW,		false, true  },
+  { "multiple",		MASK_MULTIPLE,		false, true  },
+  { "update",		MASK_NO_UPDATE,		true , true  },
+  { "popcntb",		MASK_POPCNTB,		false, true  },
+  { "popcntd",		MASK_POPCNTD,		false, true  },
+  { "powerpc-gfxopt",	MASK_PPC_GFXOPT,	false, true  },
+  { "powerpc-gpopt",	MASK_PPC_GPOPT,		false, true  },
+  { "recip-precision",	MASK_RECIP_PRECISION,	false, true  },
+  { "string",		MASK_STRING,		false, true  },
+  { "vsx",		MASK_VSX,		false, true  },
+#ifdef MASK_64BIT
+#if TARGET_AIX_OS
+  { "aix64",		MASK_64BIT,		false, false },
+  { "aix32",		MASK_64BIT,		true,  false },
+#else
+  { "64",		MASK_64BIT,		false, false },
+  { "32",		MASK_64BIT,		true,  false },
+#endif
+#endif
+#ifdef MASK_EABI
+  { "eabi",		MASK_EABI,		false, false },
+#endif
+#ifdef MASK_LITTLE_ENDIAN
+  { "little",		MASK_LITTLE_ENDIAN,	false, false },
+  { "big",		MASK_LITTLE_ENDIAN,	true,  false },
+#endif
+#ifdef MASK_RELOCATABLE
+  { "relocatable",	MASK_RELOCATABLE,	false, false },
+#endif
+#ifdef MASK_STRICT_ALIGN
+  { "strict-align",	MASK_STRICT_ALIGN,	false, false },
+#endif
+  { "power",		MASK_POWER,		false, false },
+  { "power2",		MASK_POWER2,		false, false },
+  { "powerpc",		MASK_POWERPC,		false, false },
+  { "soft-float",	MASK_SOFT_FLOAT,	false, false },
+  { "string",		MASK_STRING,		false, false },
+};
+
+/* Option variables that we want to support inside attribute((target)) and
+   #pragma GCC target operations.  */
+
+struct rs6000_opt_var {
+  const char *name;		/* option name */
+  size_t global_offset;		/* offset of the option in global_options.  */
+  size_t target_offset;		/* offset of the option in target optiosn.  */
+};
+
+static struct rs6000_opt_var const rs6000_opt_vars[] =
+{
+  { "friz",
+    offsetof (struct gcc_options, x_TARGET_FRIZ),
+    offsetof (struct cl_target_option, x_TARGET_FRIZ), },
+  { "avoid-indexed-addresses",
+    offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
+    offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
+  { "paired",
+    offsetof (struct gcc_options, x_rs6000_paired_float),
+    offsetof (struct cl_target_option, x_rs6000_paired_float), },
+  { "longcall",
+    offsetof (struct gcc_options, x_rs6000_default_long_calls),
+    offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
+};
+
+/* Inner function to handle attribute((target("..."))) and #pragma GCC target
+   parsing.  Return true if there were no errors.  */
+
+static bool
+rs6000_inner_target_options (tree args, bool attr_p)
+{
+  bool ret = true;
+
+  if (args == NULL_TREE)
+    ;
+
+  else if (TREE_CODE (args) == STRING_CST)
+    {
+      char *p = ASTRDUP (TREE_STRING_POINTER (args));
+      char *q;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  bool error_p = false;
+	  bool not_valid_p = false;
+	  const char *cpu_opt = NULL;
+
+	  p = NULL;
+	  if (strncmp (q, "cpu=", 4) == 0)
+	    {
+	      int cpu_index = rs6000_cpu_name_lookup (q+4);
+	      if (cpu_index >= 0)
+		rs6000_cpu_index = cpu_index;
+	      else
+		{
+		  error_p = true;
+		  cpu_opt = q+4;
+		}
+	    }
+	  else if (strncmp (q, "tune=", 5) == 0)
+	    {
+	      int tune_index = rs6000_cpu_name_lookup (q+5);
+	      if (tune_index >= 0)
+		rs6000_tune_index = tune_index;
+	      else
+		{
+		  error_p = true;
+		  cpu_opt = q+5;
+		}
+	    }
+	  else
+	    {
+	      size_t i;
+	      bool invert = false;
+	      char *r = q;
+
+	      error_p = true;
+	      if (strncmp (r, "no-", 3) == 0)
+		{
+		  invert = true;
+		  r += 3;
+		}
+
+	      for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
+		if (strcmp (r, rs6000_opt_masks[i].name) == 0)
+		  {
+		    int mask = rs6000_opt_masks[i].mask;
+
+		    if (!rs6000_opt_masks[i].valid_target)
+		      not_valid_p = true;
+		    else
+		      {
+			error_p = false;
+			target_flags_explicit |= mask;
+
+			if (rs6000_opt_masks[i].invert)
+			  invert = !invert;
+
+			if (invert)
+			  target_flags &= ~mask;
+			else
+			  target_flags |= mask;
+		      }
+		    break;
+		  }
+
+	      if (error_p && !not_valid_p)
+		{
+		  for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
+		    if (strcmp (r, rs6000_opt_vars[i].name) == 0)
+		      {
+			size_t j = rs6000_opt_vars[i].global_offset;
+			*((int *) ((char *)&global_options + j)) = !invert;
+			error_p = false;
+			break;
+		      }
+		}
+	    }
+
+	  if (error_p)
+	    {
+	      const char *eprefix, *esuffix;
+
+	      ret = false;
+	      if (attr_p)
+		{
+		  eprefix = "__attribute__((__target__(";
+		  esuffix = ")))";
+		}
+	      else
+		{
+		  eprefix = "#pragma GCC target ";
+		  esuffix = "";
+		}
+
+	      if (cpu_opt)
+		error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
+		       q, esuffix);
+	      else if (not_valid_p)
+		error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
+	      else
+		error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
+	    }
+	}
+    }
+
+  else if (TREE_CODE (args) == TREE_LIST)
+    {
+      do
+	{
+	  tree value = TREE_VALUE (args);
+	  if (value)
+	    {
+	      bool ret2 = rs6000_inner_target_options (value, attr_p);
+	      if (!ret2)
+		ret = false;
+	    }
+	  args = TREE_CHAIN (args);
+	}
+      while (args != NULL_TREE);
+    }
+
+  else
+    gcc_unreachable ();
+
+  return ret;
+}
+
+/* Print out the target options as a list for -mdebug=target.  */
+
+static void
+rs6000_debug_target_options (tree args, const char *prefix)
+{
+  if (args == NULL_TREE)
+    fprintf (stderr, "%s<NULL>", prefix);
+
+  else if (TREE_CODE (args) == STRING_CST)
+    {
+      char *p = ASTRDUP (TREE_STRING_POINTER (args));
+      char *q;
+
+      while ((q = strtok (p, ",")) != NULL)
+	{
+	  p = NULL;
+	  fprintf (stderr, "%s\"%s\"", prefix, q);
+	  prefix = ", ";
+	}
+    }
+
+  else if (TREE_CODE (args) == TREE_LIST)
+    {
+      do
+	{
+	  tree value = TREE_VALUE (args);
+	  if (value)
+	    {
+	      rs6000_debug_target_options (value, prefix);
+	      prefix = ", ";
+	    }
+	  args = TREE_CHAIN (args);
+	}
+      while (args != NULL_TREE);
+    }
+
+  else
+    gcc_unreachable ();
+
+  return;
+}
+
+
+/* Hook to validate attribute((target("..."))).  */
+
+static bool
+rs6000_valid_attribute_p (tree fndecl,
+			  tree ARG_UNUSED (name),
+			  tree args,
+			  int flags)
+{
+  struct cl_target_option cur_target;
+  bool ret;
+  tree old_optimize = build_optimization_node ();
+  tree new_target, new_optimize;
+  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
+
+  if (TARGET_DEBUG_TARGET)
+    {
+      tree tname = DECL_NAME (fndecl);
+      fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
+      if (tname)
+	fprintf (stderr, "function: %.*s\n",
+		 (int) IDENTIFIER_LENGTH (tname),
+		 IDENTIFIER_POINTER (tname));
+      else
+	fprintf (stderr, "function: unknown\n");
+  
+      fprintf (stderr, "args:");
+      rs6000_debug_target_options (args, " ");
+      fprintf (stderr, "\n");
+
+      if (flags)
+	fprintf (stderr, "flags: 0x%x\n", flags);
+
+      fprintf (stderr, "--------------------\n");
+    }
+
+  old_optimize = build_optimization_node ();
+  func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  /* If the function changed the optimization levels as well as setting target
+     options, start with the optimizations specified.  */
+  if (func_optimize && func_optimize != old_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (func_optimize));
+
+  /* The target attributes may also change some optimization flags, so update
+     the optimization options if necessary.  */
+  cl_target_option_save (&cur_target, &global_options);
+  rs6000_cpu_index = rs6000_tune_index = -1;
+  ret = rs6000_inner_target_options (args, true);
+
+  /* Set up any additional state.  */
+  if (ret)
+    {
+      ret = rs6000_option_override_internal (false);
+      new_target = build_target_option_node ();
+    }
+  else
+    new_target = NULL;
+
+  new_optimize = build_optimization_node ();
+
+  if (!new_target)
+    ret = false;
+
+  else if (fndecl)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+      if (old_optimize != new_optimize)
+	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+    }
+
+  cl_target_option_restore (&global_options, &cur_target);
+
+  if (old_optimize != new_optimize)
+    cl_optimization_restore (&global_options,
+			     TREE_OPTIMIZATION (old_optimize));
+
+  return ret;
+}
+
+
+/* Hook to validate the current #pragma GCC target and set the state, and
+   update the macros based on what was changed.  If ARGS is NULL, then
+   POP_TARGET is used to reset the options.  */
+
+bool
+rs6000_pragma_target_parse (tree args, tree pop_target)
+{
+  tree cur_tree;
+  bool ret;
+
+  if (TARGET_DEBUG_TARGET)
+    {
+      fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
+      fprintf (stderr, "args:");
+      rs6000_debug_target_options (args, " ");
+      fprintf (stderr, "\n");
+
+      if (pop_target)
+	{
+	  fprintf (stderr, "pop_target:\n");
+	  debug_tree (pop_target);
+	}
+      else
+	fprintf (stderr, "pop_target: <NULL>\n");
+
+      fprintf (stderr, "--------------------\n");
+    }
+
+  if (! args)
+    {
+      ret = true;
+      cur_tree = ((pop_target)
+		  ? pop_target
+		  : target_option_default_node);
+      cl_target_option_restore (&global_options,
+				TREE_TARGET_OPTION (cur_tree));
+    }
+  else
+    {
+      rs6000_cpu_index = rs6000_tune_index = -1;
+      ret = rs6000_inner_target_options (args, false);
+      cur_tree = build_target_option_node ();
+
+      if (!cur_tree)
+	ret = false;
+    }
+
+  if (cur_tree)
+    target_option_current_node = cur_tree;
+
+  return ret;
+}
+
+
+/* Remember the last target of rs6000_set_current_function.  */
+static GTY(()) tree rs6000_previous_fndecl;
+
+/* Establish appropriate back-end context for processing the function
+   FNDECL.  The argument might be NULL to indicate processing at top
+   level, outside of any function scope.  */
+static void
+rs6000_set_current_function (tree fndecl)
+{
+  tree old_tree = (rs6000_previous_fndecl
+		   ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
+		   : NULL_TREE);
+
+  tree new_tree = (fndecl
+		   ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+		   : NULL_TREE);
+
+  if (TARGET_DEBUG_TARGET)
+    {
+      bool print_final = false;
+      fprintf (stderr, "\n==================== rs6000_set_current_function");
+
+      if (fndecl)
+	fprintf (stderr, ", fndecl %s (%p)",
+		 (DECL_NAME (fndecl)
+		  ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
+		  : "<unknown>"), (void *)fndecl);
+
+      if (rs6000_previous_fndecl)
+	fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
+
+      fprintf (stderr, "\n");
+      if (new_tree)
+	{
+	  fprintf (stderr, "\nnew fndecl target specific options:\n");
+	  debug_tree (new_tree);
+	  print_final = true;
+	}
+
+      if (old_tree)
+	{
+	  fprintf (stderr, "\nold fndecl target specific options:\n");
+	  debug_tree (old_tree);
+	  print_final = true;
+	}
+
+      if (print_final)
+	fprintf (stderr, "--------------------\n");
+    }
+
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want to
+     slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl && fndecl != rs6000_previous_fndecl)
+    {
+      rs6000_previous_fndecl = fndecl;
+      if (old_tree == new_tree)
+	;
+
+      else if (new_tree)
+	{
+	  cl_target_option_restore (&global_options,
+				    TREE_TARGET_OPTION (new_tree));
+	  target_reinit ();
+	}
+
+      else if (old_tree)
+	{
+	  struct cl_target_option *def
+	    = TREE_TARGET_OPTION (target_option_current_node);
+
+	  cl_target_option_restore (&global_options, def);
+	  target_reinit ();
+	}
+    }
+}
+
+
+/* Save the current options */
+
+static void
+rs6000_function_specific_save (struct cl_target_option *ptr)
+{
+  ptr->rs6000_target_flags_explicit = target_flags_explicit;
+}
+
+/* Restore the current options */
+
+static void
+rs6000_function_specific_restore (struct cl_target_option *ptr)
+{
+  target_flags_explicit = ptr->rs6000_target_flags_explicit;
+  (void) rs6000_option_override_internal (false);
+}
+
+/* Print the current options */
+
+static void
+rs6000_function_specific_print (FILE *file, int indent,
+				struct cl_target_option *ptr)
+{
+  size_t i;
+  int flags = ptr->x_target_flags;
+
+  /* Print the various mask options.  */
+  for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
+    if ((flags & rs6000_opt_masks[i].mask) != 0)
+      {
+	flags &= ~ rs6000_opt_masks[i].mask;
+	fprintf (file, "%*s-m%s%s\n", indent, "",
+		 rs6000_opt_masks[i].invert ? "no-" : "",
+		 rs6000_opt_masks[i].name);
+      }
+
+  /* Print the various options that are variables.  */
+  for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
+    {
+      size_t j = rs6000_opt_vars[i].target_offset;
+      if (((signed char *) ptr)[j])
+	fprintf (file, "%*s-m%s\n", indent, "",
+		 rs6000_opt_vars[i].name);
+    }
+}
+
+
+/* Hook to determine if one function can safely inline another.  */
+
+static bool
+rs6000_can_inline_p (tree caller, tree callee)
+{
+  bool ret = false;
+  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+  /* If callee has no option attributes, then it is ok to inline.  */
+  if (!callee_tree)
+    ret = true;
+
+  /* If caller has no option attributes, but callee does then it is not ok to
+     inline.  */
+  else if (!caller_tree)
+    ret = false;
+
+  else
+    {
+      struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+
+      /* Callee's options should a subset of the caller's, i.e. a vsx function
+	 can inline an altivec function but a non-vsx function can't inline a
+	 vsx function.  */
+      if ((caller_opts->x_target_flags & callee_opts->x_target_flags)
+	  == callee_opts->x_target_flags)
+	ret = true;
+    }
+
+  if (TARGET_DEBUG_TARGET)
+    fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
+	     (DECL_NAME (caller)
+	      ? IDENTIFIER_POINTER (DECL_NAME (caller))
+	      : "<unknown>"),
+	     (DECL_NAME (callee)
+	      ? IDENTIFIER_POINTER (DECL_NAME (callee))
+	      : "<unknown>"),
+	     (ret ? "can" : "cannot"));
+
+  return ret;
+}
+
+/* Allocate a stack temp and fixup the address so it meets the particular
+   memory requirements (either offetable or REG+REG addressing).  */
+
+rtx
+rs6000_allocate_stack_temp (enum machine_mode mode,
+			    bool offsettable_p,
+			    bool reg_reg_p)
+{
+  rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+  rtx addr = XEXP (stack, 0);
+  int strict_p = (reload_in_progress || reload_completed);
+
+  if (!legitimate_indirect_address_p (addr, strict_p))
+    {
+      if (offsettable_p
+	  && !rs6000_legitimate_offset_address_p (mode, addr, strict_p))
+	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
+
+      else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
+	stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
+    }
+
+  return stack;
+}
+
+/* Given a memory reference, if it is not a reg or reg+reg addressing, convert
+   to such a form to deal with memory reference instructions like STFIWX that
+   only take reg+reg addressing.  */
+
+rtx
+rs6000_address_for_fpconvert (rtx x)
+{
+  int strict_p = (reload_in_progress || reload_completed);
+  rtx addr;
+
+  gcc_assert (MEM_P (x));
+  addr = XEXP (x, 0);
+  if (! legitimate_indirect_address_p (addr, strict_p)
+      && ! legitimate_indexed_address_p (addr, strict_p))
+    {
+      if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
+	{
+	  rtx reg = XEXP (addr, 0);
+	  HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
+	  rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
+	  gcc_assert (REG_P (reg));
+	  emit_insn (gen_add3_insn (reg, reg, size_rtx));
+	  addr = reg;
+	}
+      else if (GET_CODE (addr) == PRE_MODIFY)
+	{
+	  rtx reg = XEXP (addr, 0);
+	  rtx expr = XEXP (addr, 1);
+	  gcc_assert (REG_P (reg));
+	  gcc_assert (GET_CODE (expr) == PLUS);
+	  emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
+	  addr = reg;
+	}
+
+      x = replace_equiv_address (x, copy_addr_to_reg (addr));
+    }
+
+  return x;
+}
+
+/* Given a memory reference, if it is not in the form for altivec memory
+   reference instructions (i.e. reg or reg+reg addressing with AND of -16),
+   convert to the altivec format.  */
+
+rtx
+rs6000_address_for_altivec (rtx x)
+{
+  gcc_assert (MEM_P (x));
+  if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
+    {
+      rtx addr = XEXP (x, 0);
+      int strict_p = (reload_in_progress || reload_completed);
+
+      if (!legitimate_indexed_address_p (addr, strict_p)
+	  && !legitimate_indirect_address_p (addr, strict_p))
+	addr = copy_to_mode_reg (Pmode, addr);
+
+      addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
+      x = change_address (x, GET_MODE (x), addr);
+    }
+
+  return x;
+}
+
+
+#include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
new file mode 100644
index 000000000..0eae3265a
--- /dev/null
+++ b/gcc/config/rs6000/rs6000.h
@@ -0,0 +1,2439 @@
+/* Definitions of target machine for GNU compiler, for IBM RS/6000.
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
+   2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Note that some other tm.h files include this one and then override
+   many of the definitions.  */
+
+#ifndef RS6000_OPTS_H
+#include "config/rs6000/rs6000-opts.h"
+#endif
+
+/* Definitions for the object file format.  These are set at
+   compile-time.  */
+
+#define OBJECT_XCOFF 1
+#define OBJECT_ELF 2
+#define OBJECT_PEF 3
+#define OBJECT_MACHO 4
+
+#define TARGET_ELF (TARGET_OBJECT_FORMAT == OBJECT_ELF)
+#define TARGET_XCOFF (TARGET_OBJECT_FORMAT == OBJECT_XCOFF)
+#define TARGET_MACOS (TARGET_OBJECT_FORMAT == OBJECT_PEF)
+#define TARGET_MACHO (TARGET_OBJECT_FORMAT == OBJECT_MACHO)
+
+#ifndef TARGET_AIX
+#define TARGET_AIX 0
+#endif
+
+#ifndef TARGET_AIX_OS
+#define TARGET_AIX_OS 0
+#endif
+
+/* Control whether function entry points use a "dot" symbol when
+   ABI_AIX.  */
+#define DOT_SYMBOLS 1
+
+/* Default string to use for cpu if not specified.  */
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT ((char *)0)
+#endif
+
+/* If configured for PPC405, support PPC405CR Erratum77.  */
+#ifdef CONFIG_PPC405CR
+#define PPC405_ERRATUM77 (rs6000_cpu == PROCESSOR_PPC405)
+#else
+#define PPC405_ERRATUM77 0
+#endif
+
+#ifndef TARGET_PAIRED_FLOAT
+#define TARGET_PAIRED_FLOAT 0
+#endif
+
+#ifdef HAVE_AS_POPCNTB
+#define ASM_CPU_POWER5_SPEC "-mpower5"
+#else
+#define ASM_CPU_POWER5_SPEC "-mpower4"
+#endif
+
+#ifdef HAVE_AS_DFP
+#define ASM_CPU_POWER6_SPEC "-mpower6 -maltivec"
+#else
+#define ASM_CPU_POWER6_SPEC "-mpower4 -maltivec"
+#endif
+
+#ifdef HAVE_AS_POPCNTD
+#define ASM_CPU_POWER7_SPEC "-mpower7"
+#else
+#define ASM_CPU_POWER7_SPEC "-mpower4 -maltivec"
+#endif
+
+#ifdef HAVE_AS_DCI
+#define ASM_CPU_476_SPEC "-m476"
+#else
+#define ASM_CPU_476_SPEC "-mpower4"
+#endif
+
+/* Common ASM definitions used by ASM_SPEC among the various targets for
+   handling -mcpu=xxx switches.  There is a parallel list in driver-rs6000.c to
+   provide the default assembler options if the user uses -mcpu=native, so if
+   you make changes here, make them also there.  */
+#define ASM_CPU_SPEC \
+"%{!mcpu*: \
+  %{mpower: %{!mpower2: -mpwr}} \
+  %{mpower2: -mpwrx} \
+  %{mpowerpc64*: -mppc64} \
+  %{!mpowerpc64*: %{mpowerpc*: -mppc}} \
+  %{mno-power: %{!mpowerpc*: -mcom}} \
+  %{!mno-power: %{!mpower*: %(asm_default)}}} \
+%{mcpu=native: %(asm_cpu_native)} \
+%{mcpu=common: -mcom} \
+%{mcpu=cell: -mcell} \
+%{mcpu=power: -mpwr} \
+%{mcpu=power2: -mpwrx} \
+%{mcpu=power3: -mppc64} \
+%{mcpu=power4: -mpower4} \
+%{mcpu=power5: %(asm_cpu_power5)} \
+%{mcpu=power5+: %(asm_cpu_power5)} \
+%{mcpu=power6: %(asm_cpu_power6) -maltivec} \
+%{mcpu=power6x: %(asm_cpu_power6) -maltivec} \
+%{mcpu=power7: %(asm_cpu_power7)} \
+%{mcpu=a2: -ma2} \
+%{mcpu=powerpc: -mppc} \
+%{mcpu=rios: -mpwr} \
+%{mcpu=rios1: -mpwr} \
+%{mcpu=rios2: -mpwrx} \
+%{mcpu=rsc: -mpwr} \
+%{mcpu=rsc1: -mpwr} \
+%{mcpu=rs64a: -mppc64} \
+%{mcpu=401: -mppc} \
+%{mcpu=403: -m403} \
+%{mcpu=405: -m405} \
+%{mcpu=405fp: -m405} \
+%{mcpu=440: -m440} \
+%{mcpu=440fp: -m440} \
+%{mcpu=464: -m440} \
+%{mcpu=464fp: -m440} \
+%{mcpu=476: %(asm_cpu_476)} \
+%{mcpu=476fp: %(asm_cpu_476)} \
+%{mcpu=505: -mppc} \
+%{mcpu=601: -m601} \
+%{mcpu=602: -mppc} \
+%{mcpu=603: -mppc} \
+%{mcpu=603e: -mppc} \
+%{mcpu=ec603e: -mppc} \
+%{mcpu=604: -mppc} \
+%{mcpu=604e: -mppc} \
+%{mcpu=620: -mppc64} \
+%{mcpu=630: -mppc64} \
+%{mcpu=740: -mppc} \
+%{mcpu=750: -mppc} \
+%{mcpu=G3: -mppc} \
+%{mcpu=7400: -mppc -maltivec} \
+%{mcpu=7450: -mppc -maltivec} \
+%{mcpu=G4: -mppc -maltivec} \
+%{mcpu=801: -mppc} \
+%{mcpu=821: -mppc} \
+%{mcpu=823: -mppc} \
+%{mcpu=860: -mppc} \
+%{mcpu=970: -mpower4 -maltivec} \
+%{mcpu=G5: -mpower4 -maltivec} \
+%{mcpu=8540: -me500} \
+%{mcpu=8548: -me500} \
+%{mcpu=e300c2: -me300} \
+%{mcpu=e300c3: -me300} \
+%{mcpu=e500mc: -me500mc} \
+%{mcpu=e500mc64: -me500mc64} \
+%{maltivec: -maltivec} \
+%{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+-many"
+
+#define CPP_DEFAULT_SPEC ""
+
+#define ASM_DEFAULT_SPEC ""
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define SUBTARGET_EXTRA_SPECS
+
+#define EXTRA_SPECS							\
+  { "cpp_default",		CPP_DEFAULT_SPEC },			\
+  { "asm_cpu",			ASM_CPU_SPEC },				\
+  { "asm_cpu_native",		ASM_CPU_NATIVE_SPEC },			\
+  { "asm_default",		ASM_DEFAULT_SPEC },			\
+  { "cc1_cpu",			CC1_CPU_SPEC },				\
+  { "asm_cpu_power5",		ASM_CPU_POWER5_SPEC },			\
+  { "asm_cpu_power6",		ASM_CPU_POWER6_SPEC },			\
+  { "asm_cpu_power7",		ASM_CPU_POWER7_SPEC },			\
+  { "asm_cpu_476",		ASM_CPU_476_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an PowerPC chip.  If changing this condition, also change
+   the condition in driver-rs6000.c.  */
+#if defined(__powerpc__) || defined(__POWERPC__) || defined(_AIX)
+/* In driver-rs6000.c.  */
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS \
+  { "local_cpu_detect", host_detect_local_cpu },
+#define HAVE_LOCAL_CPU_DETECT
+#define ASM_CPU_NATIVE_SPEC "%:local_cpu_detect(asm)"
+
+#else
+#define ASM_CPU_NATIVE_SPEC "%(asm_default)"
+#endif
+
+#ifndef CC1_CPU_SPEC
+#ifdef HAVE_LOCAL_CPU_DETECT
+#define CC1_CPU_SPEC \
+"%{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)} \
+ %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+#define CC1_CPU_SPEC ""
+#endif
+#endif
+
+/* Architecture type.  */
+
+/* Define TARGET_MFCRF if the target assembler does not support the
+   optional field operand for mfcr.  */
+
+#ifndef HAVE_AS_MFCRF
+#undef  TARGET_MFCRF
+#define TARGET_MFCRF 0
+#endif
+
+/* Define TARGET_POPCNTB if the target assembler does not support the
+   popcount byte instruction.  */
+
+#ifndef HAVE_AS_POPCNTB
+#undef  TARGET_POPCNTB
+#define TARGET_POPCNTB 0
+#endif
+
+/* Define TARGET_FPRND if the target assembler does not support the
+   fp rounding instructions.  */
+
+#ifndef HAVE_AS_FPRND
+#undef  TARGET_FPRND
+#define TARGET_FPRND 0
+#endif
+
+/* Define TARGET_CMPB if the target assembler does not support the
+   cmpb instruction.  */
+
+#ifndef HAVE_AS_CMPB
+#undef  TARGET_CMPB
+#define TARGET_CMPB 0
+#endif
+
+/* Define TARGET_MFPGPR if the target assembler does not support the
+   mffpr and mftgpr instructions. */
+
+#ifndef HAVE_AS_MFPGPR
+#undef  TARGET_MFPGPR
+#define TARGET_MFPGPR 0
+#endif
+
+/* Define TARGET_DFP if the target assembler does not support decimal
+   floating point instructions.  */
+#ifndef HAVE_AS_DFP
+#undef  TARGET_DFP
+#define TARGET_DFP 0
+#endif
+
+/* Define TARGET_POPCNTD if the target assembler does not support the
+   popcount word and double word instructions.  */
+
+#ifndef HAVE_AS_POPCNTD
+#undef  TARGET_POPCNTD
+#define TARGET_POPCNTD 0
+#endif
+
+/* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync.  If
+   not, generate the lwsync code as an integer constant.  */
+#ifdef HAVE_AS_LWSYNC
+#define TARGET_LWSYNC_INSTRUCTION 1
+#else
+#define TARGET_LWSYNC_INSTRUCTION 0
+#endif
+
+/* Define TARGET_TLS_MARKERS if the target assembler does not support
+   arg markers for __tls_get_addr calls.  */
+#ifndef HAVE_AS_TLS_MARKERS
+#undef  TARGET_TLS_MARKERS
+#define TARGET_TLS_MARKERS 0
+#else
+#define TARGET_TLS_MARKERS tls_markers
+#endif
+
+#ifndef TARGET_SECURE_PLT
+#define TARGET_SECURE_PLT 0
+#endif
+
+#ifndef TARGET_CMODEL
+#define TARGET_CMODEL CMODEL_SMALL
+#endif
+
+#define TARGET_32BIT		(! TARGET_64BIT)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+/* Return 1 for a symbol ref for a thread-local storage symbol.  */
+#define RS6000_SYMBOL_REF_TLS_P(RTX) \
+  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0)
+
+#ifdef IN_LIBGCC2
+/* For libgcc2 we make sure this is a compile time constant */
+#if defined (__64BIT__) || defined (__powerpc64__) || defined (__ppc64__)
+#undef TARGET_POWERPC64
+#define TARGET_POWERPC64	1
+#else
+#undef TARGET_POWERPC64
+#define TARGET_POWERPC64	0
+#endif
+#else
+    /* The option machinery will define this.  */
+#endif
+
+#define TARGET_DEFAULT (MASK_POWER | MASK_MULTIPLE | MASK_STRING)
+
+/* FPU operations supported. 
+   Each use of TARGET_SINGLE_FLOAT or TARGET_DOUBLE_FLOAT must 
+   also test TARGET_HARD_FLOAT.  */
+#define TARGET_SINGLE_FLOAT 1
+#define TARGET_DOUBLE_FLOAT 1
+#define TARGET_SINGLE_FPU   0
+#define TARGET_SIMPLE_FPU   0
+#define TARGET_XILINX_FPU   0
+
+/* Recast the processor type to the cpu attribute.  */
+#define rs6000_cpu_attr ((enum attr_cpu)rs6000_cpu)
+
+/* Define generic processor types based upon current deployment.  */
+#define PROCESSOR_COMMON    PROCESSOR_PPC601
+#define PROCESSOR_POWER     PROCESSOR_RIOS1
+#define PROCESSOR_POWERPC   PROCESSOR_PPC604
+#define PROCESSOR_POWERPC64 PROCESSOR_RS64A
+
+/* Define the default processor.  This is overridden by other tm.h files.  */
+#define PROCESSOR_DEFAULT   PROCESSOR_RIOS1
+#define PROCESSOR_DEFAULT64 PROCESSOR_RS64A
+
+extern enum fpu_type_t fpu_type;
+
+/* Specify the dialect of assembler to use.  New mnemonics is dialect one
+   and the old mnemonics are dialect zero.  */
+#define ASSEMBLER_DIALECT (TARGET_NEW_MNEMONICS ? 1 : 0)
+
+/* rs6000_select[0] is reserved for the default cpu defined via --with-cpu */
+struct rs6000_cpu_select
+{
+  const char *string;
+  const char *name;
+  int set_tune_p;
+  int set_arch_p;
+};
+
+extern struct rs6000_cpu_select rs6000_select[];
+
+/* Debug support */
+#define MASK_DEBUG_STACK	0x01	/* debug stack applications */
+#define	MASK_DEBUG_ARG		0x02	/* debug argument handling */
+#define MASK_DEBUG_REG		0x04	/* debug register handling */
+#define MASK_DEBUG_ADDR		0x08	/* debug memory addressing */
+#define MASK_DEBUG_COST		0x10	/* debug rtx codes */
+#define MASK_DEBUG_TARGET	0x20	/* debug target attribute/pragma */
+#define MASK_DEBUG_ALL		(MASK_DEBUG_STACK \
+				 | MASK_DEBUG_ARG \
+				 | MASK_DEBUG_REG \
+				 | MASK_DEBUG_ADDR \
+				 | MASK_DEBUG_COST \
+				 | MASK_DEBUG_TARGET)
+
+#define	TARGET_DEBUG_STACK	(rs6000_debug & MASK_DEBUG_STACK)
+#define	TARGET_DEBUG_ARG	(rs6000_debug & MASK_DEBUG_ARG)
+#define TARGET_DEBUG_REG	(rs6000_debug & MASK_DEBUG_REG)
+#define TARGET_DEBUG_ADDR	(rs6000_debug & MASK_DEBUG_ADDR)
+#define TARGET_DEBUG_COST	(rs6000_debug & MASK_DEBUG_COST)
+#define TARGET_DEBUG_TARGET	(rs6000_debug & MASK_DEBUG_TARGET)
+
+extern enum rs6000_vector rs6000_vector_unit[];
+
+#define VECTOR_UNIT_NONE_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_NONE)
+
+#define VECTOR_UNIT_VSX_P(MODE)				\
+  (rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+
+#define VECTOR_UNIT_ALTIVEC_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
+
+#define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE)		\
+  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC 	\
+   || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+
+/* Describe whether to use VSX loads or Altivec loads.  For now, just use the
+   same unit as the vector unit we are using, but we may want to migrate to
+   using VSX style loads even for types handled by altivec.  */
+extern enum rs6000_vector rs6000_vector_mem[];
+
+#define VECTOR_MEM_NONE_P(MODE)				\
+  (rs6000_vector_mem[(MODE)] == VECTOR_NONE)
+
+#define VECTOR_MEM_VSX_P(MODE)				\
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
+#define VECTOR_MEM_ALTIVEC_P(MODE)			\
+  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
+
+#define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE)		\
+  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC 	\
+   || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
+/* Return the alignment of a given vector type, which is set based on the
+   vector unit use.  VSX for instance can load 32 or 64 bit aligned words
+   without problems, while Altivec requires 128-bit aligned vectors.  */
+extern int rs6000_vector_align[];
+
+#define VECTOR_ALIGN(MODE)						\
+  ((rs6000_vector_align[(MODE)] != 0)					\
+   ? rs6000_vector_align[(MODE)]					\
+   : (int)GET_MODE_BITSIZE ((MODE)))
+
+/* Alignment options for fields in structures for sub-targets following
+   AIX-like ABI.
+   ALIGN_POWER word-aligns FP doubles (default AIX ABI).
+   ALIGN_NATURAL doubleword-aligns FP doubles (align to object size).
+
+   Override the macro definitions when compiling libobjc to avoid undefined
+   reference to rs6000_alignment_flags due to library's use of GCC alignment
+   macros which use the macros below.  */
+
+#ifndef IN_TARGET_LIBS
+#define MASK_ALIGN_POWER   0x00000000
+#define MASK_ALIGN_NATURAL 0x00000001
+#define TARGET_ALIGN_NATURAL (rs6000_alignment_flags & MASK_ALIGN_NATURAL)
+#else
+#define TARGET_ALIGN_NATURAL 0
+#endif
+
+#define TARGET_LONG_DOUBLE_128 (rs6000_long_double_type_size == 128)
+#define TARGET_IEEEQUAD rs6000_ieeequad
+#define TARGET_ALTIVEC_ABI rs6000_altivec_abi
+#define TARGET_LDBRX (TARGET_POPCNTD || rs6000_cpu == PROCESSOR_CELL)
+
+#define TARGET_SPE_ABI 0
+#define TARGET_SPE 0
+#define TARGET_E500 0
+#define TARGET_ISEL64 (TARGET_ISEL && TARGET_POWERPC64)
+#define TARGET_FPRS 1
+#define TARGET_E500_SINGLE 0
+#define TARGET_E500_DOUBLE 0
+#define CHECK_E500_OPTIONS do { } while (0)
+
+/* ISA 2.01 allowed FCFID to be done in 32-bit, previously it was 64-bit only.
+   Enable 32-bit fcfid's on any of the switches for newer ISA machines or
+   XILINX.  */
+#define TARGET_FCFID	(TARGET_POWERPC64				\
+			 || TARGET_PPC_GPOPT	/* 970/power4 */	\
+			 || TARGET_POPCNTB	/* ISA 2.02 */		\
+			 || TARGET_CMPB		/* ISA 2.05 */		\
+			 || TARGET_POPCNTD	/* ISA 2.06 */		\
+			 || TARGET_XILINX_FPU)
+
+#define TARGET_FCTIDZ	TARGET_FCFID
+#define TARGET_STFIWX	TARGET_PPC_GFXOPT
+#define TARGET_LFIWAX	TARGET_CMPB
+#define TARGET_LFIWZX	TARGET_POPCNTD
+#define TARGET_FCFIDS	TARGET_POPCNTD
+#define TARGET_FCFIDU	TARGET_POPCNTD
+#define TARGET_FCFIDUS	TARGET_POPCNTD
+#define TARGET_FCTIDUZ	TARGET_POPCNTD
+#define TARGET_FCTIWUZ	TARGET_POPCNTD
+
+/* E500 processors only support plain "sync", not lwsync.  */
+#define TARGET_NO_LWSYNC TARGET_E500
+
+/* Which machine supports the various reciprocal estimate instructions.  */
+#define TARGET_FRES	(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
+			 && TARGET_FPRS && TARGET_SINGLE_FLOAT)
+
+#define TARGET_FRE	(TARGET_HARD_FLOAT && TARGET_FPRS \
+			 && TARGET_DOUBLE_FLOAT \
+			 && (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)))
+
+#define TARGET_FRSQRTES	(TARGET_HARD_FLOAT && TARGET_POPCNTB \
+			 && TARGET_FPRS && TARGET_SINGLE_FLOAT)
+
+#define TARGET_FRSQRTE	(TARGET_HARD_FLOAT && TARGET_FPRS \
+			 && TARGET_DOUBLE_FLOAT \
+			 && (TARGET_PPC_GFXOPT || VECTOR_UNIT_VSX_P (DFmode)))
+
+/* Whether the various reciprocal divide/square root estimate instructions
+   exist, and whether we should automatically generate code for the instruction
+   by default.  */
+#define RS6000_RECIP_MASK_HAVE_RE	0x1	/* have RE instruction.  */
+#define RS6000_RECIP_MASK_AUTO_RE	0x2	/* generate RE by default.  */
+#define RS6000_RECIP_MASK_HAVE_RSQRTE	0x4	/* have RSQRTE instruction.  */
+#define RS6000_RECIP_MASK_AUTO_RSQRTE	0x8	/* gen. RSQRTE by default.  */
+
+extern unsigned char rs6000_recip_bits[];
+
+#define RS6000_RECIP_HAVE_RE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_HAVE_RE)
+
+#define RS6000_RECIP_AUTO_RE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RE)
+
+#define RS6000_RECIP_HAVE_RSQRTE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_HAVE_RSQRTE)
+
+#define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
+  (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
+
+#define RS6000_RECIP_HIGH_PRECISION_P(MODE) \
+  ((MODE) == SFmode || (MODE) == V4SFmode || TARGET_RECIP_PRECISION)
+
+/* The default CPU for TARGET_OPTION_OVERRIDE.  */
+#define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
+
+/* Target pragma.  */
+#define REGISTER_TARGET_PRAGMAS() do {				\
+  c_register_pragma (0, "longcall", rs6000_pragma_longcall);	\
+  targetm.target_option.pragma_parse = rs6000_pragma_target_parse; \
+  targetm.resolve_overloaded_builtin = altivec_resolve_overloaded_builtin; \
+} while (0)
+
+/* Target #defines.  */
+#define TARGET_CPU_CPP_BUILTINS() \
+  rs6000_cpu_cpp_builtins (pfile)
+
+/* This is used by rs6000_cpu_cpp_builtins to indicate the byte order
+   we're compiling for.  Some configurations may need to override it.  */
+#define RS6000_CPU_CPP_ENDIAN_BUILTINS()	\
+  do						\
+    {						\
+      if (BYTES_BIG_ENDIAN)			\
+	{					\
+	  builtin_define ("__BIG_ENDIAN__");	\
+	  builtin_define ("_BIG_ENDIAN");	\
+	  builtin_assert ("machine=bigendian");	\
+	}					\
+      else					\
+	{					\
+	  builtin_define ("__LITTLE_ENDIAN__");	\
+	  builtin_define ("_LITTLE_ENDIAN");	\
+	  builtin_assert ("machine=littleendian"); \
+	}					\
+    }						\
+  while (0)
+
+/* Target machine storage layout.  */
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  if (GET_MODE_CLASS (MODE) == MODE_INT		\
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    (MODE) = TARGET_32BIT ? SImode : DImode;
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+/* That is true on RS/6000.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is true on RS/6000.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+
+   For RS/6000 we can decide arbitrarily since there are no machine
+   instructions for them.  Might as well be consistent with bits and bytes.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (! TARGET_POWERPC64 ? 4 : 8)
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD UNITS_PER_WORD
+#else
+#define MIN_UNITS_PER_WORD 4
+#endif
+#define UNITS_PER_FP_WORD 8
+#define UNITS_PER_ALTIVEC_WORD 16
+#define UNITS_PER_VSX_WORD 16
+#define UNITS_PER_SPE_WORD 8
+#define UNITS_PER_PAIRED_WORD 8
+
+/* Type used for ptrdiff_t, as a string used in a declaration.  */
+#define PTRDIFF_TYPE "int"
+
+/* Type used for size_t, as a string used in a declaration.  */
+#define SIZE_TYPE "long unsigned int"
+
+/* Type used for wchar_t, as a string used in a declaration.  */
+#define WCHAR_TYPE "short unsigned int"
+
+/* Width of wchar_t in bits.  */
+#define WCHAR_TYPE_SIZE 16
+
+/* A C expression for the size in bits of the type `short' on the
+   target machine.  If you don't define this, the default is half a
+   word.  (If this would be less than one storage unit, it is
+   rounded up to one unit.)  */
+#define SHORT_TYPE_SIZE 16
+
+/* A C expression for the size in bits of the type `int' on the
+   target machine.  If you don't define this, the default is one
+   word.  */
+#define INT_TYPE_SIZE 32
+
+/* A C expression for the size in bits of the type `long' on the
+   target machine.  If you don't define this, the default is one
+   word.  */
+#define LONG_TYPE_SIZE (TARGET_32BIT ? 32 : 64)
+
+/* A C expression for the size in bits of the type `long long' on the
+   target machine.  If you don't define this, the default is two
+   words.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* A C expression for the size in bits of the type `float' on the
+   target machine.  If you don't define this, the default is one
+   word.  */
+#define FLOAT_TYPE_SIZE 32
+
+/* A C expression for the size in bits of the type `double' on the
+   target machine.  If you don't define this, the default is two
+   words.  */
+#define DOUBLE_TYPE_SIZE 64
+
+/* A C expression for the size in bits of the type `long double' on
+   the target machine.  If you don't define this, the default is two
+   words.  */
+#define LONG_DOUBLE_TYPE_SIZE rs6000_long_double_type_size
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Work around rs6000_long_double_type_size dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+extern unsigned rs6000_pointer_size;
+#define POINTER_SIZE rs6000_pointer_size
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_32BIT ? 32 : 64)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY	\
+  ((TARGET_32BIT && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI && !TARGET_VSX) \
+    ? 64 : 128)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* A C expression to compute the alignment for a variables in the
+   local store.  TYPE is the data type, and ALIGN is the alignment
+   that the object would ordinarily have.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)				\
+  DATA_ALIGNMENT (TYPE, ALIGN)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* Return 1 if a structure or array containing FIELD should be
+   accessed using `BLKMODE'.
+
+   For the SPE, simd types are V2SI, and gcc can be tempted to put the
+   entire thing in a DI and use subregs to access the internals.
+   store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
+   back-end.  Because a single GPR can hold a V2SI, but not a DI, the
+   best thing to do is set structs to BLKmode and avoid Severe Tire
+   Damage.
+
+   On e500 v2, DF and DI modes suffer from the same anomaly.  DF can
+   fit into 1, whereas DI still needs two.  */
+#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) \
+  ((TARGET_SPE && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \
+   || (TARGET_E500_DOUBLE && (MODE) == DFmode))
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Make strings word-aligned so strcpy from constants will be faster.
+   Make vector constants quadword aligned.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)                           \
+  (TREE_CODE (EXP) == STRING_CST	                         \
+   && (STRICT_ALIGNMENT || !optimize_size)                       \
+   && (ALIGN) < BITS_PER_WORD                                    \
+   ? BITS_PER_WORD                                               \
+   : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.
+   Align vectors to 128 bits.  Align SPE vectors and E500 v2 doubles to
+   64 bits.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  (TREE_CODE (TYPE) == VECTOR_TYPE					\
+   ? (((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (TYPE)))		\
+       || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) \
+      ? 64 : 128)							\
+   : ((TARGET_E500_DOUBLE						\
+       && TREE_CODE (TYPE) == REAL_TYPE					\
+       && TYPE_MODE (TYPE) == DFmode)					\
+      ? 64								\
+      : (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	 && TYPE_MODE (TREE_TYPE (TYPE)) == QImode			\
+	 && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN)))
+
+/* Nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 0
+
+/* Define this macro to be the value 1 if unaligned accesses have a cost
+   many times greater than aligned accesses, for example if they are
+   emulated in a trap handler.  */
+/* Altivec vector memory instructions simply ignore the low bits; SPE vector
+   memory instructions trap on unaligned accesses; VSX memory instructions are
+   aligned to 4 or 8 bytes.  */
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN)				\
+  (STRICT_ALIGNMENT							\
+   || (((MODE) == SFmode || (MODE) == DFmode || (MODE) == TFmode	\
+	|| (MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode	\
+	|| (MODE) == DImode)						\
+       && (ALIGN) < 32)							\
+   || (VECTOR_MODE_P ((MODE)) && (((int)(ALIGN)) < VECTOR_ALIGN (MODE))))
+
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   RS/6000 has 32 fixed-point registers, 32 floating-point registers,
+   an MQ register, a count register, a link register, and 8 condition
+   register fields, which we view here as separate registers.  AltiVec
+   adds 32 vector registers and a VRsave register.
+
+   In addition, the difference between the frame and argument pointers is
+   a function of the number of registers saved, so we need to have a
+   register for AP that will later be eliminated in favor of SP or FP.
+   This is a normal register, but it is fixed.
+
+   We also create a pseudo register for float/int conversions, that will
+   really represent the memory location used.  It is represented here as
+   a register, in order to work around problems in allocating stack storage
+   in inline functions.
+
+   Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
+   pointer, which is eventually eliminated in favor of SP or FP.  */
+
+#define FIRST_PSEUDO_REGISTER 114
+
+/* This must be included for pre gcc 3.0 glibc compatibility.  */
+#define PRE_GCC3_DWARF_FRAME_REGISTERS 77
+
+/* Add 32 dwarf columns for synthetic SPE registers.  */
+#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 1) + 32)
+
+/* The SPE has an additional 32 synthetic registers, with DWARF debug
+   info numbering for these registers starting at 1200.  While eh_frame
+   register numbering need not be the same as the debug info numbering,
+   we choose to number these regs for eh_frame at 1200 too.  This allows
+   future versions of the rs6000 backend to add hard registers and
+   continue to use the gcc hard register numbering for eh_frame.  If the
+   extra SPE registers in eh_frame were numbered starting from the
+   current value of FIRST_PSEUDO_REGISTER, then if FIRST_PSEUDO_REGISTER
+   changed we'd need to introduce a mapping in DWARF_FRAME_REGNUM to
+   avoid invalidating older SPE eh_frame info.
+
+   We must map them here to avoid huge unwinder tables mostly consisting
+   of unused space.  */
+#define DWARF_REG_TO_UNWIND_COLUMN(r) \
+  ((r) > 1200 ? ((r) - 1200 + FIRST_PSEUDO_REGISTER - 1) : (r))
+
+/* Use standard DWARF numbering for DWARF debugging information.  */
+#define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
+
+/* Use gcc hard register numbering for eh_frame.  */
+#define DWARF_FRAME_REGNUM(REGNO) (REGNO)
+
+/* Map register numbers held in the call frame info that gcc has
+   collected using DWARF_FRAME_REGNUM to those that should be output in
+   .debug_frame and .eh_frame.  We continue to use gcc hard reg numbers
+   for .eh_frame, but use the numbers mandated by the various ABIs for
+   .debug_frame.  rs6000_emit_prologue has translated any combination of
+   CR2, CR3, CR4 saves to a save of CR2.  The actual code emitted saves
+   the whole of CR, so we map CR2_REGNO to the DWARF reg for CR.  */
+#define DWARF2_FRAME_REG_OUT(REGNO, FOR_EH)	\
+  ((FOR_EH) ? (REGNO)				\
+   : (REGNO) == CR2_REGNO ? 64			\
+   : DBX_REGISTER_NUMBER (REGNO))
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On RS/6000, r1 is used for the stack.  On Darwin, r2 is available
+   as a local register; for all other OS's r2 is the TOC pointer.
+
+   cr5 is not supposed to be used.
+
+   On System V implementations, r13 is fixed and not available for use.  */
+
+#define FIXED_REGISTERS  \
+  {0, 1, FIXED_R2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FIXED_R13, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,	   \
+   /* AltiVec registers.  */			   \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1						   \
+   , 1, 1, 1                                       \
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS  \
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,	   \
+   /* AltiVec registers.  */			   \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1						   \
+   , 1, 1, 1                                       \
+}
+
+/* Like `CALL_USED_REGISTERS' except this macro doesn't require that
+   the entire set of `FIXED_REGISTERS' be included.
+   (`CALL_USED_REGISTERS' must be a superset of `FIXED_REGISTERS').
+   This macro is optional.  If not specified, it defaults to the value
+   of `CALL_USED_REGISTERS'.  */
+
+#define CALL_REALLY_USED_REGISTERS  \
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,	   \
+   /* AltiVec registers.  */			   \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+   0, 0						   \
+   , 0, 0, 0                                       \
+}
+
+#define TOTAL_ALTIVEC_REGS	(LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
+
+#define FIRST_SAVED_ALTIVEC_REGNO (FIRST_ALTIVEC_REGNO+20)
+#define FIRST_SAVED_FP_REGNO    (14+32)
+#define FIRST_SAVED_GP_REGNO 13
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.
+
+   We allocate in the following order:
+	fp0		(not saved or used for anything)
+	fp13 - fp2	(not saved; incoming fp arg registers)
+	fp1		(not saved; return value)
+	fp31 - fp14	(saved; order given to save least number)
+	cr7, cr6	(not saved or special)
+	cr1		(not saved, but used for FP operations)
+	cr0		(not saved, but used for arithmetic operations)
+	cr4, cr3, cr2	(saved)
+	r0		(not saved; cannot be base reg)
+	r9		(not saved; best for TImode)
+	r11, r10, r8-r4	(not saved; highest used first to make less conflict)
+	r3		(not saved; return value register)
+	r31 - r13	(saved; order given to save least number)
+	r12		(not saved; if used for DImode or DFmode would use r13)
+	mq		(not saved; best to use it if we can)
+	ctr		(not saved; when we have the choice ctr is better)
+	lr		(saved)
+	cr5, r1, r2, ap, ca (fixed)
+	v0 - v1		(not saved or used for anything)
+	v13 - v3	(not saved; incoming vector arg registers)
+	v2		(not saved; incoming vector arg reg; return value)
+	v19 - v14	(not saved or used for anything)
+	v31 - v20	(saved; order given to save least number)
+	vrsave, vscr	(fixed)
+	spe_acc, spefscr (fixed)
+	sfp		(fixed)
+*/
+
+#if FIXED_R2 == 1
+#define MAYBE_R2_AVAILABLE
+#define MAYBE_R2_FIXED 2,
+#else
+#define MAYBE_R2_AVAILABLE 2,
+#define MAYBE_R2_FIXED
+#endif
+
+#define REG_ALLOC_ORDER						\
+  {32,								\
+   45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,		\
+   33,								\
+   63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,		\
+   50, 49, 48, 47, 46,						\
+   75, 74, 69, 68, 72, 71, 70,					\
+   0, MAYBE_R2_AVAILABLE					\
+   9, 11, 10, 8, 7, 6, 5, 4,					\
+   3,								\
+   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19,		\
+   18, 17, 16, 15, 14, 13, 12,					\
+   64, 66, 65,							\
+   73, 1, MAYBE_R2_FIXED 67, 76,				\
+   /* AltiVec registers.  */					\
+   77, 78,							\
+   90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80,			\
+   79,								\
+   96, 95, 94, 93, 92, 91,					\
+   108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97,	\
+   109, 110,							\
+   111, 112, 113						\
+}
+
+/* True if register is floating-point.  */
+#define FP_REGNO_P(N) ((N) >= 32 && (N) <= 63)
+
+/* True if register is a condition register.  */
+#define CR_REGNO_P(N) ((N) >= CR0_REGNO && (N) <= CR7_REGNO)
+
+/* True if register is a condition register, but not cr0.  */
+#define CR_REGNO_NOT_CR0_P(N) ((N) >= CR1_REGNO && (N) <= CR7_REGNO)
+
+/* True if register is an integer register.  */
+#define INT_REGNO_P(N) \
+  ((N) <= 31 || (N) == ARG_POINTER_REGNUM || (N) == FRAME_POINTER_REGNUM)
+
+/* SPE SIMD registers are just the GPRs.  */
+#define SPE_SIMD_REGNO_P(N) ((N) <= 31)
+
+/* PAIRED SIMD registers are just the FPRs.  */
+#define PAIRED_SIMD_REGNO_P(N) ((N) >= 32 && (N) <= 63)
+
+/* True if register is the CA register.  */
+#define CA_REGNO_P(N) ((N) == CA_REGNO)
+
+/* True if register is an AltiVec register.  */
+#define ALTIVEC_REGNO_P(N) ((N) >= FIRST_ALTIVEC_REGNO && (N) <= LAST_ALTIVEC_REGNO)
+
+/* True if register is a VSX register.  */
+#define VSX_REGNO_P(N) (FP_REGNO_P (N) || ALTIVEC_REGNO_P (N))
+
+/* Alternate name for any vector register supporting floating point, no matter
+   which instruction set(s) are available.  */
+#define VFLOAT_REGNO_P(N) \
+  (ALTIVEC_REGNO_P (N) || (TARGET_VSX && FP_REGNO_P (N)))
+
+/* Alternate name for any vector register supporting integer, no matter which
+   instruction set(s) are available.  */
+#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
+
+/* Alternate name for any vector register supporting logical operations, no
+   matter which instruction set(s) are available.  */
+#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) rs6000_hard_regno_nregs[(MODE)][(REGNO)]
+
+/* When setting up caller-save slots (MODE == VOIDmode) ensure we allocate
+   enough space to account for vectors in FP regs. */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)			\
+  (TARGET_VSX								\
+   && ((MODE) == VOIDmode || ALTIVEC_OR_VSX_VECTOR_MODE (MODE))		\
+   && FP_REGNO_P (REGNO)				\
+   ? V2DFmode						\
+   : choose_hard_reg_mode ((REGNO), (NREGS), false))
+
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
+  (((TARGET_32BIT && TARGET_POWERPC64					\
+     && (GET_MODE_SIZE (MODE) > 4)					\
+     && INT_REGNO_P (REGNO)) ? 1 : 0)					\
+   || (TARGET_VSX && FP_REGNO_P (REGNO)					\
+       && GET_MODE_SIZE (MODE) > 8))
+
+#define VSX_VECTOR_MODE(MODE)		\
+	 ((MODE) == V4SFmode		\
+	  || (MODE) == V2DFmode)	\
+
+#define ALTIVEC_VECTOR_MODE(MODE)	\
+	 ((MODE) == V16QImode		\
+	  || (MODE) == V8HImode		\
+	  || (MODE) == V4SFmode		\
+	  || (MODE) == V4SImode)
+
+#define ALTIVEC_OR_VSX_VECTOR_MODE(MODE)				\
+  (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)			\
+   || (MODE) == V2DImode)
+
+#define SPE_VECTOR_MODE(MODE)		\
+	((MODE) == V4HImode          	\
+         || (MODE) == V2SFmode          \
+         || (MODE) == V1DImode          \
+         || (MODE) == V2SImode)
+
+#define PAIRED_VECTOR_MODE(MODE)        \
+         ((MODE) == V2SFmode)            
+
+/* Value is TRUE if hard register REGNO can hold a value of
+   machine-mode MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  rs6000_hard_regno_mode_ok_p[(int)(MODE)][REGNO]
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  (SCALAR_FLOAT_MODE_P (MODE1)			\
+   ? SCALAR_FLOAT_MODE_P (MODE2)		\
+   : SCALAR_FLOAT_MODE_P (MODE2)		\
+   ? SCALAR_FLOAT_MODE_P (MODE1)		\
+   : GET_MODE_CLASS (MODE1) == MODE_CC		\
+   ? GET_MODE_CLASS (MODE2) == MODE_CC		\
+   : GET_MODE_CLASS (MODE2) == MODE_CC		\
+   ? GET_MODE_CLASS (MODE1) == MODE_CC		\
+   : SPE_VECTOR_MODE (MODE1)			\
+   ? SPE_VECTOR_MODE (MODE2)			\
+   : SPE_VECTOR_MODE (MODE2)			\
+   ? SPE_VECTOR_MODE (MODE1)			\
+   : ALTIVEC_VECTOR_MODE (MODE1)		\
+   ? ALTIVEC_VECTOR_MODE (MODE2)		\
+   : ALTIVEC_VECTOR_MODE (MODE2)		\
+   ? ALTIVEC_VECTOR_MODE (MODE1)		\
+   : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
+   ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
+   : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
+   ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
+   : 1)
+
+/* Post-reload, we can't use any new AltiVec registers, as we already
+   emitted the vrsave mask.  */
+
+#define HARD_REGNO_RENAME_OK(SRC, DST) \
+  (! ALTIVEC_REGNO_P (DST) || df_regs_ever_live_p (DST))
+
+/* Specify the cost of a branch insn; roughly the number of extra insns that
+   should be added to avoid a branch.
+
+   Set this to 3 on the RS/6000 since that is roughly the average cost of an
+   unscheduled conditional branch.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 3
+
+/* Override BRANCH_COST heuristic which empirically produces worse
+   performance for removing short circuiting from the logical ops.  */
+
+#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+
+/* A fixed register used at epilogue generation to address SPE registers
+   with negative offsets.  The 64-bit load/store instructions on the SPE
+   only take positive offsets (and small ones at that), so we need to
+   reserve a register for consing up negative offsets.  */
+
+#define FIXED_SCRATCH 0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* RS/6000 pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 1
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 31
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 113
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 67
+
+/* Place to put static chain when calling a function that requires it.  */
+#define STATIC_CHAIN_REGNUM 11
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The RS/6000 has three types of registers, fixed-point, floating-point, and
+   condition registers, plus three special registers, MQ, CTR, and the link
+   register.  AltiVec adds a vector register class.  VSX registers overlap the
+   FPR registers and the Altivec registers.
+
+   However, r0 is special in that it cannot be used as a base register.
+   So make a class for registers valid as base registers.
+
+   Also, cr0 is the only condition code register that can be used in
+   arithmetic insns, so make a separate class for it.  */
+
+enum reg_class
+{
+  NO_REGS,
+  BASE_REGS,
+  GENERAL_REGS,
+  FLOAT_REGS,
+  ALTIVEC_REGS,
+  VSX_REGS,
+  VRSAVE_REGS,
+  VSCR_REGS,
+  SPE_ACC_REGS,
+  SPEFSCR_REGS,
+  NON_SPECIAL_REGS,
+  MQ_REGS,
+  LINK_REGS,
+  CTR_REGS,
+  LINK_OR_CTR_REGS,
+  SPECIAL_REGS,
+  SPEC_OR_GEN_REGS,
+  CR0_REGS,
+  CR_REGS,
+  NON_FLOAT_REGS,
+  CA_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "BASE_REGS",								\
+  "GENERAL_REGS",							\
+  "FLOAT_REGS",								\
+  "ALTIVEC_REGS",							\
+  "VSX_REGS",								\
+  "VRSAVE_REGS",							\
+  "VSCR_REGS",								\
+  "SPE_ACC_REGS",                                                       \
+  "SPEFSCR_REGS",                                                       \
+  "NON_SPECIAL_REGS",							\
+  "MQ_REGS",								\
+  "LINK_REGS",								\
+  "CTR_REGS",								\
+  "LINK_OR_CTR_REGS",							\
+  "SPECIAL_REGS",							\
+  "SPEC_OR_GEN_REGS",							\
+  "CR0_REGS",								\
+  "CR_REGS",								\
+  "NON_FLOAT_REGS",							\
+  "CA_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS						     \
+{									     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */	     \
+  { 0xfffffffe, 0x00000000, 0x00000008, 0x00020000 }, /* BASE_REGS */	     \
+  { 0xffffffff, 0x00000000, 0x00000008, 0x00020000 }, /* GENERAL_REGS */     \
+  { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 }, /* FLOAT_REGS */       \
+  { 0x00000000, 0x00000000, 0xffffe000, 0x00001fff }, /* ALTIVEC_REGS */     \
+  { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff }, /* VSX_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, /* VRSAVE_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, /* SPEFSCR_REGS */     \
+  { 0xffffffff, 0xffffffff, 0x00000008, 0x00020000 }, /* NON_SPECIAL_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000001, 0x00000000 }, /* MQ_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, /* LINK_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000004, 0x00000000 }, /* CTR_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000006, 0x00000000 }, /* LINK_OR_CTR_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000007, 0x00002000 }, /* SPECIAL_REGS */     \
+  { 0xffffffff, 0x00000000, 0x0000000f, 0x00022000 }, /* SPEC_OR_GEN_REGS */ \
+  { 0x00000000, 0x00000000, 0x00000010, 0x00000000 }, /* CR0_REGS */	     \
+  { 0x00000000, 0x00000000, 0x00000ff0, 0x00000000 }, /* CR_REGS */	     \
+  { 0xffffffff, 0x00000000, 0x00000fff, 0x00020000 }, /* NON_FLOAT_REGS */   \
+  { 0x00000000, 0x00000000, 0x00001000, 0x00000000 }, /* CA_REGS */	     \
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff }  /* ALL_REGS */	     \
+}
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.
+
+   We need two IRA_COVER_CLASSES, one for pre-VSX, and the other for VSX to
+   account for the Altivec and Floating registers being subsets of the VSX
+   register set.  */
+
+#define IRA_COVER_CLASSES_PRE_VSX					     \
+{									     \
+  GENERAL_REGS, SPECIAL_REGS, FLOAT_REGS, ALTIVEC_REGS, /* VSX_REGS, */	     \
+  /* VRSAVE_REGS,*/ VSCR_REGS, SPE_ACC_REGS, SPEFSCR_REGS,		     \
+  /* MQ_REGS, LINK_REGS, CTR_REGS, */					     \
+  CR_REGS, CA_REGS, LIM_REG_CLASSES					     \
+}
+
+#define IRA_COVER_CLASSES_VSX						     \
+{									     \
+  GENERAL_REGS, SPECIAL_REGS, /* FLOAT_REGS, ALTIVEC_REGS, */ VSX_REGS,	     \
+  /* VRSAVE_REGS,*/ VSCR_REGS, SPE_ACC_REGS, SPEFSCR_REGS,		     \
+  /* MQ_REGS, LINK_REGS, CTR_REGS, */					     \
+  CR_REGS, CA_REGS, LIM_REG_CLASSES					     \
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
+
+#if ENABLE_CHECKING
+#define REGNO_REG_CLASS(REGNO) 						\
+  (gcc_assert (IN_RANGE ((REGNO), 0, FIRST_PSEUDO_REGISTER-1)),		\
+   rs6000_regno_regclass[(REGNO)])
+
+#else
+#define REGNO_REG_CLASS(REGNO) rs6000_regno_regclass[(REGNO)]
+#endif
+
+/* Register classes for various constraints that are based on the target
+   switches.  */
+enum r6000_reg_class_enum {
+  RS6000_CONSTRAINT_d,		/* fpr registers for double values */
+  RS6000_CONSTRAINT_f,		/* fpr registers for single values */
+  RS6000_CONSTRAINT_v,		/* Altivec registers */
+  RS6000_CONSTRAINT_wa,		/* Any VSX register */
+  RS6000_CONSTRAINT_wd,		/* VSX register for V2DF */
+  RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
+  RS6000_CONSTRAINT_ws,		/* VSX register for DF */
+  RS6000_CONSTRAINT_MAX
+};
+
+extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS BASE_REGS
+
+/* Return whether a given register class can hold VSX objects.  */
+#define VSX_REG_CLASS_P(CLASS)			\
+  ((CLASS) == VSX_REGS || (CLASS) == FLOAT_REGS || (CLASS) == ALTIVEC_REGS)
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.
+
+   On the RS/6000, we have to return NO_REGS when we want to reload a
+   floating-point CONST_DOUBLE to force it to be copied to memory.
+
+   We also don't want to reload integer values into floating-point
+   registers if we can at all help it.  In fact, this can
+   cause reload to die, if it tries to generate a reload of CTR
+   into a FP register and discovers it doesn't have the memory location
+   required.
+
+   ??? Would it be a good idea to have reload do the converse, that is
+   try to reload floating modes into FP registers if possible?
+ */
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)			\
+  rs6000_preferred_reload_class_ptr (X, CLASS)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+
+#define SECONDARY_RELOAD_CLASS(CLASS,MODE,IN) \
+  rs6000_secondary_reload_class_ptr (CLASS, MODE, IN)
+
+/* If we are copying between FP or AltiVec registers and anything
+   else, we need a memory location.  The exception is when we are
+   targeting ppc64 and the move to/from fpr to gpr instructions
+   are available.*/
+
+#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE)			\
+  rs6000_secondary_memory_needed_ptr (CLASS1, CLASS2, MODE)
+
+/* For cpus that cannot load/store SDmode values from the 64-bit
+   FP registers without using a full 64-bit load/store, we need
+   to allocate a full 64-bit stack slot for them.  */
+
+#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
+  rs6000_secondary_memory_needed_rtx (MODE)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+
+   On RS/6000, this is the size of MODE in words, except in the FP regs, where
+   a single reg is enough for two words, unless we have VSX, where the FP
+   registers can hold 128 bits.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) rs6000_class_max_nregs[(MODE)][(CLASS)]
+
+/* Return nonzero if for CLASS a mode change from FROM to TO is invalid.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)			\
+  rs6000_cannot_change_mode_class_ptr (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.  */
+#define DWARF_CIE_DATA_ALIGNMENT (-((int) (TARGET_32BIT ? 4 : 8)))
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.
+
+   On the RS/6000, we grow upwards, from the area after the outgoing
+   arguments.  */
+#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0)
+
+/* Size of the outgoing register save area */
+#define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX			\
+			  || DEFAULT_ABI == ABI_DARWIN)			\
+			 ? (TARGET_64BIT ? 64 : 32)			\
+			 : 0)
+
+/* Size of the fixed area on the stack */
+#define RS6000_SAVE_AREA \
+  (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_DARWIN) ? 24 : 8)	\
+   << (TARGET_64BIT ? 1 : 0))
+
+/* MEM representing address to save the TOC register */
+#define RS6000_SAVE_TOC gen_rtx_MEM (Pmode, \
+				     plus_constant (stack_pointer_rtx, \
+						    (TARGET_32BIT ? 20 : 40)))
+
+/* Align an address */
+#define RS6000_ALIGN(n,a) (((n) + (a) - 1) & ~((a) - 1))
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.
+
+   On the RS/6000, the frame pointer is the same as the stack pointer,
+   except for dynamic allocations.  So we start after the fixed area and
+   outgoing parameter area.  */
+
+#define STARTING_FRAME_OFFSET						\
+  (FRAME_GROWS_DOWNWARD							\
+   ? 0									\
+   : (RS6000_ALIGN (crtl->outgoing_args_size,				\
+		    (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
+      + RS6000_SAVE_AREA))
+
+/* Offset from the stack pointer register to an item dynamically
+   allocated on the stack, e.g., by `alloca'.
+
+   The default value for this macro is `STACK_POINTER_OFFSET' plus the
+   length of the outgoing arguments.  The default is correct for most
+   machines.  See `function.c' for details.  */
+#define STACK_DYNAMIC_OFFSET(FUNDECL)					\
+  (RS6000_ALIGN (crtl->outgoing_args_size,				\
+		 (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
+   + (STACK_POINTER_OFFSET))
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On RS/6000, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Offset of first parameter from the argument pointer register value.
+   On the RS/6000, we define the argument pointer to the start of the fixed
+   area.  */
+#define FIRST_PARM_OFFSET(FNDECL) RS6000_SAVE_AREA
+
+/* Offset from the argument pointer register value to the top of
+   stack.  This is different from FIRST_PARM_OFFSET because of the
+   register save area.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* Define this if stack space is still allocated for a parameter passed
+   in a register.  The value is the number of bytes allocated to this
+   area.  */
+#define REG_PARM_STACK_SPACE(FNDECL)	RS6000_REG_SAVE
+
+/* Define this if the above stack space is to be considered part of the
+   space allocated by the caller.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+/* This is the difference between the logical top of stack and the actual sp.
+
+   For the RS/6000, sp points past the fixed area.  */
+#define STACK_POINTER_OFFSET RS6000_SAVE_AREA
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) rs6000_libcall_value ((MODE))
+
+/* DRAFT_V4_STRUCT_RET defaults off.  */
+#define DRAFT_V4_STRUCT_RET 0
+
+/* Let TARGET_RETURN_IN_MEMORY control what happens.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Mode of stack savearea.
+   FUNCTION is VOIDmode because calling convention maintains SP.
+   BLOCK needs Pmode for SP.
+   NONLOCAL needs twice Pmode to maintain both backchain and SP.  */
+#define STACK_SAVEAREA_MODE(LEVEL)	\
+  (LEVEL == SAVE_FUNCTION ? VOIDmode	\
+  : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : TImode) : Pmode)
+
+/* Minimum and maximum general purpose registers used to hold arguments.  */
+#define GP_ARG_MIN_REG 3
+#define GP_ARG_MAX_REG 10
+#define GP_ARG_NUM_REG (GP_ARG_MAX_REG - GP_ARG_MIN_REG + 1)
+
+/* Minimum and maximum floating point registers used to hold arguments.  */
+#define FP_ARG_MIN_REG 33
+#define	FP_ARG_AIX_MAX_REG 45
+#define	FP_ARG_V4_MAX_REG  40
+#define	FP_ARG_MAX_REG ((DEFAULT_ABI == ABI_AIX				\
+			 || DEFAULT_ABI == ABI_DARWIN)			\
+			? FP_ARG_AIX_MAX_REG : FP_ARG_V4_MAX_REG)
+#define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1)
+
+/* Minimum and maximum AltiVec registers used to hold arguments.  */
+#define ALTIVEC_ARG_MIN_REG (FIRST_ALTIVEC_REGNO + 2)
+#define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
+#define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
+
+/* Return registers */
+#define GP_ARG_RETURN GP_ARG_MIN_REG
+#define FP_ARG_RETURN FP_ARG_MIN_REG
+#define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2)
+
+/* Flags for the call/call_value rtl operations set up by function_arg */
+#define CALL_NORMAL		0x00000000	/* no special processing */
+/* Bits in 0x00000001 are unused.  */
+#define CALL_V4_CLEAR_FP_ARGS	0x00000002	/* V.4, no FP args passed */
+#define CALL_V4_SET_FP_ARGS	0x00000004	/* V.4, FP args were passed */
+#define CALL_LONG		0x00000008	/* always call indirect */
+#define CALL_LIBCALL		0x00000010	/* libcall */
+
+/* We don't have prologue and epilogue functions to save/restore
+   everything for most ABIs.  */
+#define WORLD_SAVE_P(INFO) 0
+
+/* 1 if N is a possible register number for a function value
+   as seen by the caller.
+
+   On RS/6000, this is r3, fp1, and v2 (for AltiVec).  */
+#define FUNCTION_VALUE_REGNO_P(N)					\
+  ((N) == GP_ARG_RETURN							\
+   || ((N) == FP_ARG_RETURN && TARGET_HARD_FLOAT && TARGET_FPRS)	\
+   || ((N) == ALTIVEC_ARG_RETURN && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
+
+/* 1 if N is a possible register number for function argument passing.
+   On RS/6000, these are r3-r10 and fp1-fp13.
+   On AltiVec, v2 - v13 are used for passing vectors.  */
+#define FUNCTION_ARG_REGNO_P(N)						\
+  ((unsigned) (N) - GP_ARG_MIN_REG < GP_ARG_NUM_REG			\
+   || ((unsigned) (N) - ALTIVEC_ARG_MIN_REG < ALTIVEC_ARG_NUM_REG	\
+       && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)				\
+   || ((unsigned) (N) - FP_ARG_MIN_REG < FP_ARG_NUM_REG			\
+       && TARGET_HARD_FLOAT && TARGET_FPRS))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the RS/6000, this is a structure.  The first element is the number of
+   total argument words, the second is used to store the next
+   floating-point register number, and the third says how many more args we
+   have prototype types for.
+
+   For ABI_V4, we treat these slightly differently -- `sysv_gregno' is
+   the next available GP register, `fregno' is the next available FP
+   register, and `words' is the number of words used on the stack.
+
+   The varargs/stdarg support requires that this structure's size
+   be a multiple of sizeof(int).  */
+
+typedef struct rs6000_args
+{
+  int words;			/* # words used for passing GP registers */
+  int fregno;			/* next available FP register */
+  int vregno;			/* next available AltiVec register */
+  int nargs_prototype;		/* # args left in the current prototype */
+  int prototype;		/* Whether a prototype was defined */
+  int stdarg;			/* Whether function is a stdarg function.  */
+  int call_cookie;		/* Do special things for this call */
+  int sysv_gregno;		/* next available GP register */
+  int intoffset;		/* running offset in struct (darwin64) */
+  int use_stack;		/* any part of struct on stack (darwin64) */
+  int floats_in_gpr;		/* count of SFmode floats taking up
+				   GPR space (darwin64) */
+  int named;			/* false for varargs params */
+  int escapes;			/* if function visible outside tu */
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME, FALSE, FALSE, \
+			N_NAMED_ARGS, FNDECL, VOIDmode)
+
+/* Similar, but when scanning the definition of a procedure.  We always
+   set NARGS_PROTOTYPE large so we never return an EXPR_LIST.  */
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME) \
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME, TRUE, FALSE, \
+			1000, current_function_decl, VOIDmode)
+
+/* Like INIT_CUMULATIVE_ARGS' but only used for outgoing libcalls.  */
+
+#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \
+  init_cumulative_args (&CUM, NULL_TREE, LIBNAME, FALSE, TRUE, \
+			0, NULL_TREE, MODE)
+
+/* If defined, a C expression which determines whether, and in which
+   direction, to pad out an argument with extra space.  The value
+   should be of type `enum direction': either `upward' to pad above
+   the argument, `downward' to pad below, or `none' to inhibit
+   padding.  */
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) function_arg_padding (MODE, TYPE)
+
+#define PAD_VARARGS_DOWN \
+   (FUNCTION_ARG_PADDING (TYPE_MODE (type), type) == downward)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+  output_function_profiler ((FILE), (LABELNO));
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter. No definition is equivalent to
+   always zero.
+
+   On the RS/6000, this is nonzero because we can restore the stack from
+   its backpointer, which we maintain.  */
+#define EXIT_IGNORE_STACK	1
+
+/* Define this macro as a C expression that is nonzero for registers
+   that are used by the epilogue or the return' pattern.  The stack
+   and frame pointer registers are already be assumed to be used as
+   needed.  */
+
+#define	EPILOGUE_USES(REGNO)					\
+  ((reload_completed && (REGNO) == LR_REGNO)			\
+   || (TARGET_ALTIVEC && (REGNO) == VRSAVE_REGNO)		\
+   || (crtl->calls_eh_return					\
+       && TARGET_AIX						\
+       && (REGNO) == 2))
+
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE rs6000_trampoline_size ()
+
+/* Definitions for __builtin_return_address and __builtin_frame_address.
+   __builtin_return_address (0) should give link register (65), enable
+   this.  */
+/* This should be uncommented, so that the link register is used, but
+   currently this would result in unmatched insns and spilling fixed
+   registers so we'll leave it for another day.  When these problems are
+   taken care of one additional fetch will be necessary in RETURN_ADDR_RTX.
+   (mrs) */
+/* #define RETURN_ADDR_IN_PREVIOUS_FRAME */
+
+/* Number of bytes into the frame return addresses can be found.  See
+   rs6000_stack_info in rs6000.c for more information on how the different
+   abi's store the return address.  */
+#define RETURN_ADDRESS_OFFSET						\
+ ((DEFAULT_ABI == ABI_AIX						\
+   || DEFAULT_ABI == ABI_DARWIN)	? (TARGET_32BIT ? 8 : 16) :	\
+  (DEFAULT_ABI == ABI_V4)		? 4 :				\
+  (internal_error ("RETURN_ADDRESS_OFFSET not supported"), 0))
+
+/* The current return address is in link register (65).  The return address
+   of anything farther back is accessed normally at an offset of 8 from the
+   frame pointer.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)                 \
+  (rs6000_return_addr (COUNT, FRAME))
+
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the RS/6000.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.
+
+   In addition, we use the elimination mechanism to see if r30 is needed
+   Initially we assume that it isn't.  If it is, we spill it.  This is done
+   by making it an eliminable register.  We replace it with itself so that
+   if it isn't needed, then existing uses won't be modified.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+#define ELIMINABLE_REGS					\
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { RS6000_PIC_OFFSET_TABLE_REGNUM, RS6000_PIC_OFFSET_TABLE_REGNUM } }
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = rs6000_initial_elimination_offset(FROM, TO))
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_PRE_DECREMENT 1
+#define HAVE_PRE_INCREMENT 1
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_PRE_MODIFY_REG 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO)				\
+((REGNO) < FIRST_PSEUDO_REGISTER				\
+ ? (REGNO) <= 31 || (REGNO) == 67				\
+   || (REGNO) == FRAME_POINTER_REGNUM				\
+ : (reg_renumber[REGNO] >= 0					\
+    && (reg_renumber[REGNO] <= 31 || reg_renumber[REGNO] == 67	\
+	|| reg_renumber[REGNO] == FRAME_POINTER_REGNUM)))
+
+#define REGNO_OK_FOR_BASE_P(REGNO)				\
+((REGNO) < FIRST_PSEUDO_REGISTER				\
+ ? ((REGNO) > 0 && (REGNO) <= 31) || (REGNO) == 67		\
+   || (REGNO) == FRAME_POINTER_REGNUM				\
+ : (reg_renumber[REGNO] > 0					\
+    && (reg_renumber[REGNO] <= 31 || reg_renumber[REGNO] == 67	\
+	|| reg_renumber[REGNO] == FRAME_POINTER_REGNUM)))
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg in the non-strict case.  */
+#define INT_REG_OK_FOR_INDEX_P(X, STRICT)			\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)		\
+   || REGNO_OK_FOR_INDEX_P (REGNO (X)))
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg in the non-strict case.  */
+#define INT_REG_OK_FOR_BASE_P(X, STRICT)			\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)		\
+   || REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X)   \
+  (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+   || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST		\
+   || GET_CODE (X) == HIGH)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
+
+   On the RS/6000, all integer constants are acceptable, most won't be valid
+   for particular insns, though.  Only easy FP constants are
+   acceptable.  */
+
+#define LEGITIMATE_CONSTANT_P(X)				\
+  (((GET_CODE (X) != CONST_DOUBLE				\
+     && GET_CODE (X) != CONST_VECTOR)				\
+    || GET_MODE (X) == VOIDmode					\
+    || (TARGET_POWERPC64 && GET_MODE (X) == DImode)		\
+    || easy_fp_constant (X, GET_MODE (X))			\
+    || easy_vector_constant (X, GET_MODE (X)))			\
+   && !rs6000_tls_referenced_p (X))
+
+#define EASY_VECTOR_15(n) ((n) >= -16 && (n) <= 15)
+#define EASY_VECTOR_15_ADD_SELF(n) (!EASY_VECTOR_15((n))	\
+				    && EASY_VECTOR_15((n) >> 1) \
+				    && ((n) & 1) == 0)
+
+#define EASY_VECTOR_MSB(n,mode)						\
+  (((unsigned HOST_WIDE_INT)n) ==					\
+   ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1))
+
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.
+
+   Implemented on rs6000 by rs6000_legitimize_reload_address.
+   Note that (X) is evaluated twice; this is safe in current usage.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	     \
+do {									     \
+  int win;								     \
+  (X) = rs6000_legitimize_reload_address_ptr ((X), (MODE), (OPNUM),	     \
+			(int)(TYPE), (IND_LEVELS), &win);		     \
+  if ( win )								     \
+    goto WIN;								     \
+} while (0)
+
+#define FIND_BASE_TERM rs6000_find_base_term
+
+/* The register number of the register used to address a table of
+   static data addresses in memory.  In some cases this register is
+   defined by a processor's "application binary interface" (ABI).
+   When this macro is defined, RTL is generated for this register
+   once, as with the stack pointer and frame pointer registers.  If
+   this macro is not defined, it is up to the machine-dependent files
+   to allocate such a register (if necessary).  */
+
+#define RS6000_PIC_OFFSET_TABLE_REGNUM 30
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? RS6000_PIC_OFFSET_TABLE_REGNUM : INVALID_REGNUM)
+
+#define TOC_REGISTER (TARGET_MINIMAL_TOC ? RS6000_PIC_OFFSET_TABLE_REGNUM : 2)
+
+/* Define this macro if the register defined by
+   `PIC_OFFSET_TABLE_REGNUM' is clobbered by calls.  Do not define
+   this macro if `PIC_OFFSET_TABLE_REGNUM' is not defined.  */
+
+/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent
+   code.  You can assume that X satisfies `CONSTANT_P', so you need
+   not check this.  You can also assume FLAG_PIC is true, so you need
+   not check it either.  You need not define this macro if all
+   constants (including `SYMBOL_REF') can be immediate operands when
+   generating position independent code.  */
+
+/* #define LEGITIMATE_PIC_OPERAND_P (X) */
+
+/* Define this if some processing needs to be done immediately before
+   emitting code for an insn.  */
+
+#define FINAL_PRESCAN_INSN(INSN,OPERANDS,NOPERANDS) \
+  rs6000_final_prescan_insn (INSN, OPERANDS, NOPERANDS)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* This flag, if defined, says the same insns that convert to a signed fixnum
+   also convert validly to an unsigned one.  */
+
+/* #define FIXUNS_TRUNC_LIKE_FIX_TRUNC */
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  */
+
+/* Allow pairs of registers to be used, which is the intent of the default.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_POWERPC64 ? TImode : DImode)
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX (! TARGET_POWERPC64 ? 4 : 8)
+#define MAX_MOVE_MAX 8
+
+/* Nonzero if access to memory by bytes is no faster than for words.
+   Also nonzero if doing byte operations (specifically shifts) in registers
+   is undesirable.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+
+/* The CTZ patterns return -1 for input of zero.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 1)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+extern unsigned rs6000_pmode;
+#define Pmode ((enum machine_mode)rs6000_pmode)
+
+/* Supply definition of STACK_SIZE_MODE for allocate_dynamic_stack_space.  */
+#define STACK_SIZE_MODE (TARGET_32BIT ? SImode : DImode)
+
+/* Mode of a function address in a call instruction (for indexing purposes).
+   Doesn't matter on RS/6000.  */
+#define FUNCTION_MODE SImode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+#define NO_FUNCTION_CSE
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.
+
+   The sle and sre instructions which allow SHIFT_COUNT_TRUNCATED
+   have been dropped from the PowerPC architecture.  */
+
+#define SHIFT_COUNT_TRUNCATED (TARGET_POWER ? 1 : 0)
+
+/* Adjust the length of an INSN.  LENGTH is the currently-computed length and
+   should be adjusted to reflect any required changes.  This macro is used when
+   there is some systematic length adjustment required that would be difficult
+   to express in the length attribute.  */
+
+/* #define ADJUST_INSN_LENGTH(X,LENGTH) */
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a
+   COMPARE, return the mode to be used for the comparison.  For
+   floating-point, CCFPmode should be used.  CCUNSmode should be used
+   for unsigned comparisons.  CCEQmode should be used when we are
+   doing an inequality comparison on the result of a
+   comparison.  CCmode should be used in all other cases.  */
+
+#define SELECT_CC_MODE(OP,X,Y) \
+  (SCALAR_FLOAT_MODE_P (GET_MODE (X)) ? CCFPmode	\
+   : (OP) == GTU || (OP) == LTU || (OP) == GEU || (OP) == LEU ? CCUNSmode \
+   : (((OP) == EQ || (OP) == NE) && COMPARISON_P (X)			  \
+      ? CCEQmode : CCmode))
+
+/* Can the condition code MODE be safely reversed?  This is safe in
+   all cases on this port, because at present it doesn't use the
+   trapping FP comparisons (fcmpo).  */
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* Given a condition code and a mode, return the inverse condition.  */
+#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
+
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+#define ASM_COMMENT_START " #"
+
+/* Flag to say the TOC is initialized */
+extern int toc_initialized;
+
+/* Macro to output a special constant pool entry.  Go to WIN if we output
+   it.  Otherwise, it is written the usual way.
+
+   On the RS/6000, toc entries are handled this way.  */
+
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, WIN) \
+{ if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (X, MODE))			  \
+    {									  \
+      output_toc (FILE, X, LABELNO, MODE);				  \
+      goto WIN;								  \
+    }									  \
+}
+
+#ifdef HAVE_GAS_WEAK
+#define RS6000_WEAK 1
+#else
+#define RS6000_WEAK 0
+#endif
+
+#if RS6000_WEAK
+/* Used in lieu of ASM_WEAKEN_LABEL.  */
+#define	ASM_WEAKEN_DECL(FILE, DECL, NAME, VAL)			 	\
+  do									\
+    {									\
+      fputs ("\t.weak\t", (FILE));					\
+      RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 				\
+      if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL			\
+	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	{								\
+	  if (TARGET_XCOFF)						\
+	    fputs ("[DS]", (FILE));					\
+	  fputs ("\n\t.weak\t.", (FILE));				\
+	  RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 			\
+	}								\
+      fputc ('\n', (FILE));						\
+      if (VAL)								\
+	{								\
+	  ASM_OUTPUT_DEF ((FILE), (NAME), (VAL));			\
+	  if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL		\
+	      && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	    {								\
+	      fputs ("\t.set\t.", (FILE));				\
+	      RS6000_OUTPUT_BASENAME ((FILE), (NAME));			\
+	      fputs (",.", (FILE));					\
+	      RS6000_OUTPUT_BASENAME ((FILE), (VAL));			\
+	      fputc ('\n', (FILE));					\
+	    }								\
+	}								\
+    }									\
+  while (0)
+#endif
+
+#if HAVE_GAS_WEAKREF
+#define ASM_OUTPUT_WEAKREF(FILE, DECL, NAME, VALUE)			\
+  do									\
+    {									\
+      fputs ("\t.weakref\t", (FILE));					\
+      RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 				\
+      fputs (", ", (FILE));						\
+      RS6000_OUTPUT_BASENAME ((FILE), (VALUE));				\
+      if ((DECL) && TREE_CODE (DECL) == FUNCTION_DECL			\
+	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	{								\
+	  fputs ("\n\t.weakref\t.", (FILE));				\
+	  RS6000_OUTPUT_BASENAME ((FILE), (NAME)); 			\
+	  fputs (", .", (FILE));					\
+	  RS6000_OUTPUT_BASENAME ((FILE), (VALUE));			\
+	}								\
+      fputc ('\n', (FILE));						\
+    } while (0)
+#endif
+
+/* This implements the `alias' attribute.  */
+#undef	ASM_OUTPUT_DEF_FROM_DECLS
+#define	ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)			\
+  do									\
+    {									\
+      const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		\
+      const char *name = IDENTIFIER_POINTER (TARGET);			\
+      if (TREE_CODE (DECL) == FUNCTION_DECL				\
+	  && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)			\
+	{								\
+	  if (TREE_PUBLIC (DECL))					\
+	    {								\
+	      if (!RS6000_WEAK || !DECL_WEAK (DECL))			\
+		{							\
+		  fputs ("\t.globl\t.", FILE);				\
+		  RS6000_OUTPUT_BASENAME (FILE, alias);			\
+		  putc ('\n', FILE);					\
+		}							\
+	    }								\
+	  else if (TARGET_XCOFF)					\
+	    {								\
+	      fputs ("\t.lglobl\t.", FILE);				\
+	      RS6000_OUTPUT_BASENAME (FILE, alias);			\
+	      putc ('\n', FILE);					\
+	    }								\
+	  fputs ("\t.set\t.", FILE);					\
+	  RS6000_OUTPUT_BASENAME (FILE, alias);				\
+	  fputs (",.", FILE);						\
+	  RS6000_OUTPUT_BASENAME (FILE, name);				\
+	  fputc ('\n', FILE);						\
+	}								\
+      ASM_OUTPUT_DEF (FILE, alias, name);				\
+    }									\
+   while (0)
+
+#define TARGET_ASM_FILE_START rs6000_file_start
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
+
+#define REGISTER_NAMES							\
+{									\
+  &rs6000_reg_names[ 0][0],	/* r0   */				\
+  &rs6000_reg_names[ 1][0],	/* r1	*/				\
+  &rs6000_reg_names[ 2][0],     /* r2	*/				\
+  &rs6000_reg_names[ 3][0],	/* r3	*/				\
+  &rs6000_reg_names[ 4][0],	/* r4	*/				\
+  &rs6000_reg_names[ 5][0],	/* r5	*/				\
+  &rs6000_reg_names[ 6][0],	/* r6	*/				\
+  &rs6000_reg_names[ 7][0],	/* r7	*/				\
+  &rs6000_reg_names[ 8][0],	/* r8	*/				\
+  &rs6000_reg_names[ 9][0],	/* r9	*/				\
+  &rs6000_reg_names[10][0],	/* r10  */				\
+  &rs6000_reg_names[11][0],	/* r11  */				\
+  &rs6000_reg_names[12][0],	/* r12  */				\
+  &rs6000_reg_names[13][0],	/* r13  */				\
+  &rs6000_reg_names[14][0],	/* r14  */				\
+  &rs6000_reg_names[15][0],	/* r15  */				\
+  &rs6000_reg_names[16][0],	/* r16  */				\
+  &rs6000_reg_names[17][0],	/* r17  */				\
+  &rs6000_reg_names[18][0],	/* r18  */				\
+  &rs6000_reg_names[19][0],	/* r19  */				\
+  &rs6000_reg_names[20][0],	/* r20  */				\
+  &rs6000_reg_names[21][0],	/* r21  */				\
+  &rs6000_reg_names[22][0],	/* r22  */				\
+  &rs6000_reg_names[23][0],	/* r23  */				\
+  &rs6000_reg_names[24][0],	/* r24  */				\
+  &rs6000_reg_names[25][0],	/* r25  */				\
+  &rs6000_reg_names[26][0],	/* r26  */				\
+  &rs6000_reg_names[27][0],	/* r27  */				\
+  &rs6000_reg_names[28][0],	/* r28  */				\
+  &rs6000_reg_names[29][0],	/* r29  */				\
+  &rs6000_reg_names[30][0],	/* r30  */				\
+  &rs6000_reg_names[31][0],	/* r31  */				\
+									\
+  &rs6000_reg_names[32][0],     /* fr0  */				\
+  &rs6000_reg_names[33][0],	/* fr1  */				\
+  &rs6000_reg_names[34][0],	/* fr2  */				\
+  &rs6000_reg_names[35][0],	/* fr3  */				\
+  &rs6000_reg_names[36][0],	/* fr4  */				\
+  &rs6000_reg_names[37][0],	/* fr5  */				\
+  &rs6000_reg_names[38][0],	/* fr6  */				\
+  &rs6000_reg_names[39][0],	/* fr7  */				\
+  &rs6000_reg_names[40][0],	/* fr8  */				\
+  &rs6000_reg_names[41][0],	/* fr9  */				\
+  &rs6000_reg_names[42][0],	/* fr10 */				\
+  &rs6000_reg_names[43][0],	/* fr11 */				\
+  &rs6000_reg_names[44][0],	/* fr12 */				\
+  &rs6000_reg_names[45][0],	/* fr13 */				\
+  &rs6000_reg_names[46][0],	/* fr14 */				\
+  &rs6000_reg_names[47][0],	/* fr15 */				\
+  &rs6000_reg_names[48][0],	/* fr16 */				\
+  &rs6000_reg_names[49][0],	/* fr17 */				\
+  &rs6000_reg_names[50][0],	/* fr18 */				\
+  &rs6000_reg_names[51][0],	/* fr19 */				\
+  &rs6000_reg_names[52][0],	/* fr20 */				\
+  &rs6000_reg_names[53][0],	/* fr21 */				\
+  &rs6000_reg_names[54][0],	/* fr22 */				\
+  &rs6000_reg_names[55][0],	/* fr23 */				\
+  &rs6000_reg_names[56][0],	/* fr24 */				\
+  &rs6000_reg_names[57][0],	/* fr25 */				\
+  &rs6000_reg_names[58][0],	/* fr26 */				\
+  &rs6000_reg_names[59][0],	/* fr27 */				\
+  &rs6000_reg_names[60][0],	/* fr28 */				\
+  &rs6000_reg_names[61][0],	/* fr29 */				\
+  &rs6000_reg_names[62][0],	/* fr30 */				\
+  &rs6000_reg_names[63][0],	/* fr31 */				\
+									\
+  &rs6000_reg_names[64][0],     /* mq   */				\
+  &rs6000_reg_names[65][0],	/* lr   */				\
+  &rs6000_reg_names[66][0],	/* ctr  */				\
+  &rs6000_reg_names[67][0],	/* ap   */				\
+									\
+  &rs6000_reg_names[68][0],	/* cr0  */				\
+  &rs6000_reg_names[69][0],	/* cr1  */				\
+  &rs6000_reg_names[70][0],	/* cr2  */				\
+  &rs6000_reg_names[71][0],	/* cr3  */				\
+  &rs6000_reg_names[72][0],	/* cr4  */				\
+  &rs6000_reg_names[73][0],	/* cr5  */				\
+  &rs6000_reg_names[74][0],	/* cr6  */				\
+  &rs6000_reg_names[75][0],	/* cr7  */				\
+									\
+  &rs6000_reg_names[76][0],	/* ca  */				\
+									\
+  &rs6000_reg_names[77][0],	/* v0  */				\
+  &rs6000_reg_names[78][0],	/* v1  */				\
+  &rs6000_reg_names[79][0],	/* v2  */				\
+  &rs6000_reg_names[80][0],	/* v3  */				\
+  &rs6000_reg_names[81][0],	/* v4  */				\
+  &rs6000_reg_names[82][0],	/* v5  */				\
+  &rs6000_reg_names[83][0],	/* v6  */				\
+  &rs6000_reg_names[84][0],	/* v7  */				\
+  &rs6000_reg_names[85][0],	/* v8  */				\
+  &rs6000_reg_names[86][0],	/* v9  */				\
+  &rs6000_reg_names[87][0],	/* v10  */				\
+  &rs6000_reg_names[88][0],	/* v11  */				\
+  &rs6000_reg_names[89][0],	/* v12  */				\
+  &rs6000_reg_names[90][0],	/* v13  */				\
+  &rs6000_reg_names[91][0],	/* v14  */				\
+  &rs6000_reg_names[92][0],	/* v15  */				\
+  &rs6000_reg_names[93][0],	/* v16  */				\
+  &rs6000_reg_names[94][0],	/* v17  */				\
+  &rs6000_reg_names[95][0],	/* v18  */				\
+  &rs6000_reg_names[96][0],	/* v19  */				\
+  &rs6000_reg_names[97][0],	/* v20  */				\
+  &rs6000_reg_names[98][0],	/* v21  */				\
+  &rs6000_reg_names[99][0],	/* v22  */				\
+  &rs6000_reg_names[100][0],	/* v23  */				\
+  &rs6000_reg_names[101][0],	/* v24  */				\
+  &rs6000_reg_names[102][0],	/* v25  */				\
+  &rs6000_reg_names[103][0],	/* v26  */				\
+  &rs6000_reg_names[104][0],	/* v27  */				\
+  &rs6000_reg_names[105][0],	/* v28  */				\
+  &rs6000_reg_names[106][0],	/* v29  */				\
+  &rs6000_reg_names[107][0],	/* v30  */				\
+  &rs6000_reg_names[108][0],	/* v31  */				\
+  &rs6000_reg_names[109][0],	/* vrsave  */				\
+  &rs6000_reg_names[110][0],	/* vscr  */				\
+  &rs6000_reg_names[111][0],	/* spe_acc */				\
+  &rs6000_reg_names[112][0],	/* spefscr */				\
+  &rs6000_reg_names[113][0],	/* sfp  */				\
+}
+
+/* Table of additional register names to use in user input.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+ {{"r0",    0}, {"r1",    1}, {"r2",    2}, {"r3",    3},	\
+  {"r4",    4}, {"r5",    5}, {"r6",    6}, {"r7",    7},	\
+  {"r8",    8}, {"r9",    9}, {"r10",  10}, {"r11",  11},	\
+  {"r12",  12}, {"r13",  13}, {"r14",  14}, {"r15",  15},	\
+  {"r16",  16}, {"r17",  17}, {"r18",  18}, {"r19",  19},	\
+  {"r20",  20}, {"r21",  21}, {"r22",  22}, {"r23",  23},	\
+  {"r24",  24}, {"r25",  25}, {"r26",  26}, {"r27",  27},	\
+  {"r28",  28}, {"r29",  29}, {"r30",  30}, {"r31",  31},	\
+  {"fr0",  32}, {"fr1",  33}, {"fr2",  34}, {"fr3",  35},	\
+  {"fr4",  36}, {"fr5",  37}, {"fr6",  38}, {"fr7",  39},	\
+  {"fr8",  40}, {"fr9",  41}, {"fr10", 42}, {"fr11", 43},	\
+  {"fr12", 44}, {"fr13", 45}, {"fr14", 46}, {"fr15", 47},	\
+  {"fr16", 48}, {"fr17", 49}, {"fr18", 50}, {"fr19", 51},	\
+  {"fr20", 52}, {"fr21", 53}, {"fr22", 54}, {"fr23", 55},	\
+  {"fr24", 56}, {"fr25", 57}, {"fr26", 58}, {"fr27", 59},	\
+  {"fr28", 60}, {"fr29", 61}, {"fr30", 62}, {"fr31", 63},	\
+  {"v0",   77}, {"v1",   78}, {"v2",   79}, {"v3",   80},       \
+  {"v4",   81}, {"v5",   82}, {"v6",   83}, {"v7",   84},       \
+  {"v8",   85}, {"v9",   86}, {"v10",  87}, {"v11",  88},       \
+  {"v12",  89}, {"v13",  90}, {"v14",  91}, {"v15",  92},       \
+  {"v16",  93}, {"v17",  94}, {"v18",  95}, {"v19",  96},       \
+  {"v20",  97}, {"v21",  98}, {"v22",  99}, {"v23",  100},	\
+  {"v24",  101},{"v25",  102},{"v26",  103},{"v27",  104},      \
+  {"v28",  105},{"v29",  106},{"v30",  107},{"v31",  108},      \
+  {"vrsave", 109}, {"vscr", 110},				\
+  {"spe_acc", 111}, {"spefscr", 112},				\
+  /* no additional names for: mq, lr, ctr, ap */		\
+  {"cr0",  68}, {"cr1",  69}, {"cr2",  70}, {"cr3",  71},	\
+  {"cr4",  72}, {"cr5",  73}, {"cr6",  74}, {"cr7",  75},	\
+  {"cc",   68}, {"sp",    1}, {"toc",   2},			\
+  /* CA is only part of XER, but we do not model the other parts (yet).  */ \
+  {"xer",  76},							\
+  /* VSX registers overlaid on top of FR, Altivec registers */	\
+  {"vs0",  32}, {"vs1",  33}, {"vs2",  34}, {"vs3",  35},	\
+  {"vs4",  36}, {"vs5",  37}, {"vs6",  38}, {"vs7",  39},	\
+  {"vs8",  40}, {"vs9",  41}, {"vs10", 42}, {"vs11", 43},	\
+  {"vs12", 44}, {"vs13", 45}, {"vs14", 46}, {"vs15", 47},	\
+  {"vs16", 48}, {"vs17", 49}, {"vs18", 50}, {"vs19", 51},	\
+  {"vs20", 52}, {"vs21", 53}, {"vs22", 54}, {"vs23", 55},	\
+  {"vs24", 56}, {"vs25", 57}, {"vs26", 58}, {"vs27", 59},	\
+  {"vs28", 60}, {"vs29", 61}, {"vs30", 62}, {"vs31", 63},	\
+  {"vs32", 77}, {"vs33", 78}, {"vs34", 79}, {"vs35", 80},       \
+  {"vs36", 81}, {"vs37", 82}, {"vs38", 83}, {"vs39", 84},       \
+  {"vs40", 85}, {"vs41", 86}, {"vs42", 87}, {"vs43", 88},       \
+  {"vs44", 89}, {"vs45", 90}, {"vs46", 91}, {"vs47", 92},       \
+  {"vs48", 93}, {"vs49", 94}, {"vs50", 95}, {"vs51", 96},       \
+  {"vs52", 97}, {"vs53", 98}, {"vs54", 99}, {"vs55", 100},	\
+  {"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104},      \
+  {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108} }
+
+/* Text to write out after a CALL that may be replaced by glue code by
+   the loader.  This depends on the AIX version.  */
+#define RS6000_CALL_GLUE "cror 31,31,31"
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  do { char buf[100];					\
+       fputs ("\t.long ", FILE);			\
+       ASM_GENERATE_INTERNAL_LABEL (buf, "L", VALUE);	\
+       assemble_name (FILE, buf);			\
+       putc ('-', FILE);				\
+       ASM_GENERATE_INTERNAL_LABEL (buf, "L", REL);	\
+       assemble_name (FILE, buf);			\
+       putc ('\n', FILE);				\
+     } while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* How to align the given loop. */
+#define LOOP_ALIGN(LABEL)  rs6000_loop_align(LABEL)
+
+/* Pick up the return address upon entry to a procedure. Used for
+   dwarf2 unwind information.  This also enables the table driven
+   mechanism.  */
+
+#define INCOMING_RETURN_ADDR_RTX   gen_rtx_REG (Pmode, LR_REGNO)
+#define DWARF_FRAME_RETURN_COLUMN  DWARF_FRAME_REGNUM (LR_REGNO)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 3 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, 10)
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+#define PRINT_OPERAND(FILE, X, CODE)  print_operand (FILE, X, CODE)
+
+/* Define which CODE values are valid.  */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)  \
+  ((CODE) == '.' || (CODE) == '&')
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* uncomment for disabling the corresponding default options */
+/* #define  MACHINE_no_sched_interblock */
+/* #define  MACHINE_no_sched_speculative */
+/* #define  MACHINE_no_sched_speculative_load */
+
+/* General flags.  */
+extern int frame_pointer_needed;
+
+/* Classification of the builtin functions to properly set the declaration tree
+   flags.  */
+enum rs6000_btc
+{
+  RS6000_BTC_MISC,		/* assume builtin can do anything */
+  RS6000_BTC_CONST,		/* builtin is a 'const' function.  */
+  RS6000_BTC_PURE,		/* builtin is a 'pure' function.  */
+  RS6000_BTC_FP_PURE		/* builtin is 'pure' if rounding math.  */
+};
+
+/* Convenience macros to document the instruction type.  */
+#define RS6000_BTC_MEM	RS6000_BTC_MISC	/* load/store touches memory */
+#define RS6000_BTC_SAT	RS6000_BTC_MISC	/* VMX saturate sets VSCR register */
+
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+#define RS6000_BUILTIN(NAME, TYPE) NAME,
+#define RS6000_BUILTIN_EQUATE(NAME, VALUE) NAME = VALUE,
+
+enum rs6000_builtins
+{
+#include "rs6000-builtin.def"
+
+  RS6000_BUILTIN_COUNT
+};
+
+#undef RS6000_BUILTIN
+#undef RS6000_BUILTIN_EQUATE
+
+enum rs6000_builtin_type_index
+{
+  RS6000_BTI_NOT_OPAQUE,
+  RS6000_BTI_opaque_V2SI,
+  RS6000_BTI_opaque_V2SF,
+  RS6000_BTI_opaque_p_V2SI,
+  RS6000_BTI_opaque_V4SI,
+  RS6000_BTI_V16QI,
+  RS6000_BTI_V2SI,
+  RS6000_BTI_V2SF,
+  RS6000_BTI_V2DI,
+  RS6000_BTI_V2DF,
+  RS6000_BTI_V4HI,
+  RS6000_BTI_V4SI,
+  RS6000_BTI_V4SF,
+  RS6000_BTI_V8HI,
+  RS6000_BTI_unsigned_V16QI,
+  RS6000_BTI_unsigned_V8HI,
+  RS6000_BTI_unsigned_V4SI,
+  RS6000_BTI_unsigned_V2DI,
+  RS6000_BTI_bool_char,          /* __bool char */
+  RS6000_BTI_bool_short,         /* __bool short */
+  RS6000_BTI_bool_int,           /* __bool int */
+  RS6000_BTI_bool_long,		 /* __bool long */
+  RS6000_BTI_pixel,              /* __pixel */
+  RS6000_BTI_bool_V16QI,         /* __vector __bool char */
+  RS6000_BTI_bool_V8HI,          /* __vector __bool short */
+  RS6000_BTI_bool_V4SI,          /* __vector __bool int */
+  RS6000_BTI_bool_V2DI,          /* __vector __bool long */
+  RS6000_BTI_pixel_V8HI,         /* __vector __pixel */
+  RS6000_BTI_long,	         /* long_integer_type_node */
+  RS6000_BTI_unsigned_long,      /* long_unsigned_type_node */
+  RS6000_BTI_long_long,	         /* long_long_integer_type_node */
+  RS6000_BTI_unsigned_long_long, /* long_long_unsigned_type_node */
+  RS6000_BTI_INTQI,	         /* intQI_type_node */
+  RS6000_BTI_UINTQI,		 /* unsigned_intQI_type_node */
+  RS6000_BTI_INTHI,	         /* intHI_type_node */
+  RS6000_BTI_UINTHI,		 /* unsigned_intHI_type_node */
+  RS6000_BTI_INTSI,		 /* intSI_type_node */
+  RS6000_BTI_UINTSI,		 /* unsigned_intSI_type_node */
+  RS6000_BTI_INTDI,		 /* intDI_type_node */
+  RS6000_BTI_UINTDI,		 /* unsigned_intDI_type_node */
+  RS6000_BTI_float,	         /* float_type_node */
+  RS6000_BTI_double,	         /* double_type_node */
+  RS6000_BTI_void,	         /* void_type_node */
+  RS6000_BTI_MAX
+};
+
+
+#define opaque_V2SI_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V2SI])
+#define opaque_V2SF_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V2SF])
+#define opaque_p_V2SI_type_node       (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI])
+#define opaque_V4SI_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V4SI])
+#define V16QI_type_node               (rs6000_builtin_types[RS6000_BTI_V16QI])
+#define V2DI_type_node                (rs6000_builtin_types[RS6000_BTI_V2DI])
+#define V2DF_type_node                (rs6000_builtin_types[RS6000_BTI_V2DF])
+#define V2SI_type_node                (rs6000_builtin_types[RS6000_BTI_V2SI])
+#define V2SF_type_node                (rs6000_builtin_types[RS6000_BTI_V2SF])
+#define V4HI_type_node                (rs6000_builtin_types[RS6000_BTI_V4HI])
+#define V4SI_type_node                (rs6000_builtin_types[RS6000_BTI_V4SI])
+#define V4SF_type_node                (rs6000_builtin_types[RS6000_BTI_V4SF])
+#define V8HI_type_node                (rs6000_builtin_types[RS6000_BTI_V8HI])
+#define unsigned_V16QI_type_node      (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI])
+#define unsigned_V8HI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI])
+#define unsigned_V4SI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI])
+#define unsigned_V2DI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI])
+#define bool_char_type_node           (rs6000_builtin_types[RS6000_BTI_bool_char])
+#define bool_short_type_node          (rs6000_builtin_types[RS6000_BTI_bool_short])
+#define bool_int_type_node            (rs6000_builtin_types[RS6000_BTI_bool_int])
+#define bool_long_type_node           (rs6000_builtin_types[RS6000_BTI_bool_long])
+#define pixel_type_node               (rs6000_builtin_types[RS6000_BTI_pixel])
+#define bool_V16QI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V16QI])
+#define bool_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V8HI])
+#define bool_V4SI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V4SI])
+#define bool_V2DI_type_node	      (rs6000_builtin_types[RS6000_BTI_bool_V2DI])
+#define pixel_V8HI_type_node	      (rs6000_builtin_types[RS6000_BTI_pixel_V8HI])
+
+#define long_long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long_long])
+#define long_long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long_long])
+#define long_integer_type_internal_node  (rs6000_builtin_types[RS6000_BTI_long])
+#define long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long])
+#define intQI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTQI])
+#define uintQI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTQI])
+#define intHI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTHI])
+#define uintHI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTHI])
+#define intSI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTSI])
+#define uintSI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTSI])
+#define intDI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTDI])
+#define uintDI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTDI])
+#define float_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_float])
+#define double_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_double])
+#define void_type_internal_node		 (rs6000_builtin_types[RS6000_BTI_void])
+
+extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX];
+extern GTY(()) tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
+
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
new file mode 100644
index 000000000..7befb56a3
--- /dev/null
+++ b/gcc/config/rs6000/rs6000.md
@@ -0,0 +1,16361 @@
+;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler
+;; Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+;; 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;;
+;; REGNOS
+;;
+
+(define_constants
+  [(MQ_REGNO			64)
+   (LR_REGNO			65)
+   (CTR_REGNO			66)
+   (CR0_REGNO			68)
+   (CR1_REGNO			69)
+   (CR2_REGNO			70)
+   (CR3_REGNO			71)
+   (CR4_REGNO			72)
+   (CR5_REGNO			73)
+   (CR6_REGNO			74)
+   (CR7_REGNO			75)
+   (MAX_CR_REGNO		75)
+   (CA_REGNO			76)
+   (FIRST_ALTIVEC_REGNO		77)
+   (LAST_ALTIVEC_REGNO		108)
+   (VRSAVE_REGNO		109)
+   (VSCR_REGNO			110)
+   (SPE_ACC_REGNO		111)
+   (SPEFSCR_REGNO		112)
+   (SFP_REGNO			113)
+  ])
+
+;;
+;; UNSPEC usage
+;;
+
+(define_constants
+  [(UNSPEC_FRSP			0)	; frsp for POWER machines
+   (UNSPEC_PROBE_STACK		4)	; probe stack memory reference
+   (UNSPEC_TIE			5)	; tie stack contents and stack pointer
+   (UNSPEC_TOCPTR		6)	; address of a word pointing to the TOC
+   (UNSPEC_TOC			7)	; address of the TOC (more-or-less)
+   (UNSPEC_MOVSI_GOT		8)
+   (UNSPEC_MV_CR_OV		9)	; move_from_CR_ov_bit
+   (UNSPEC_FCTIWZ		10)
+   (UNSPEC_FRIM			11)
+   (UNSPEC_FRIN			12)
+   (UNSPEC_FRIP			13)
+   (UNSPEC_FRIZ			14)
+   (UNSPEC_LD_MPIC		15)	; load_macho_picbase
+   (UNSPEC_MPIC_CORRECT		16)	; macho_correct_pic
+   (UNSPEC_TLSGD		17)
+   (UNSPEC_TLSLD		18)
+   (UNSPEC_MOVESI_FROM_CR	19)
+   (UNSPEC_MOVESI_TO_CR		20)
+   (UNSPEC_TLSDTPREL		21)
+   (UNSPEC_TLSDTPRELHA		22)
+   (UNSPEC_TLSDTPRELLO		23)
+   (UNSPEC_TLSGOTDTPREL		24)
+   (UNSPEC_TLSTPREL		25)
+   (UNSPEC_TLSTPRELHA		26)
+   (UNSPEC_TLSTPRELLO		27)
+   (UNSPEC_TLSGOTTPREL		28)
+   (UNSPEC_TLSTLS		29)
+   (UNSPEC_FIX_TRUNC_TF		30)	; fadd, rounding towards zero
+   (UNSPEC_MV_CR_GT		31)	; move_from_CR_gt_bit
+   (UNSPEC_STFIWX		32)
+   (UNSPEC_POPCNTB		33)
+   (UNSPEC_FRES			34)
+   (UNSPEC_SP_SET		35)
+   (UNSPEC_SP_TEST		36)
+   (UNSPEC_SYNC			37)
+   (UNSPEC_LWSYNC		38)
+   (UNSPEC_ISYNC		39)
+   (UNSPEC_SYNC_OP		40)
+   (UNSPEC_ATOMIC		41)
+   (UNSPEC_CMPXCHG		42)
+   (UNSPEC_XCHG			43)
+   (UNSPEC_AND			44)
+   (UNSPEC_DLMZB		45)
+   (UNSPEC_DLMZB_CR		46)
+   (UNSPEC_DLMZB_STRLEN		47)
+   (UNSPEC_RSQRT		48)
+   (UNSPEC_TOCREL		49)
+   (UNSPEC_MACHOPIC_OFFSET	50)
+   (UNSPEC_BPERM		51)
+   (UNSPEC_COPYSIGN		52)
+   (UNSPEC_PARITY		53)
+   (UNSPEC_FCTIW		54)
+   (UNSPEC_FCTID		55)
+   (UNSPEC_LFIWAX		56)
+   (UNSPEC_LFIWZX		57)
+   (UNSPEC_FCTIWUZ		58)
+  ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_constants
+  [(UNSPECV_BLOCK		0)
+   (UNSPECV_LL			1)	; load-locked
+   (UNSPECV_SC			2)	; store-conditional
+   (UNSPECV_PROBE_STACK_RANGE	3)	; probe range of stack addresses
+   (UNSPECV_EH_RR		9)	; eh_reg_restore
+  ])
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations.
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel"
+  (const_string "integer"))
+
+;; Define floating point instruction sub-types for use with Xfpu.md
+(define_attr "fp_type" "fp_default,fp_addsub_s,fp_addsub_d,fp_mul_s,fp_mul_d,fp_div_s,fp_div_d,fp_maddsub_s,fp_maddsub_d,fp_sqrt_s,fp_sqrt_d" (const_string "fp_default"))
+
+;; Length (in bytes).
+; '(pc)' in the following doesn't include the instruction itself; it is
+; calculated as if the instruction had zero size.
+(define_attr "length" ""
+  (if_then_else (eq_attr "type" "branch")
+		(if_then_else (and (ge (minus (match_dup 0) (pc))
+				       (const_int -32768))
+				   (lt (minus (match_dup 0) (pc))
+				       (const_int 32764)))
+			      (const_int 4)
+			      (const_int 8))
+		(const_int 4)))
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in rs6000.h.
+
+(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,power4,power5,power6,power7,cell,ppca2,titan"
+  (const (symbol_ref "rs6000_cpu_attr")))
+
+
+;; If this instruction is microcoded on the CELL processor
+; The default for load extended, the recorded instructions and rotate/shifts by a variable is always microcoded
+(define_attr "cell_micro" "not,conditional,always"
+  (if_then_else (eq_attr "type" "compare,delayed_compare,imul_compare,lmul_compare,load_ext,load_ext_ux,var_shift_rotate,var_delayed_compare")
+		(const_string "always")
+		(const_string "not")))
+
+(automata_option "ndfa")
+
+(include "rios1.md")
+(include "rios2.md")
+(include "rs64.md")
+(include "mpc.md")
+(include "40x.md")
+(include "440.md")
+(include "476.md")
+(include "603.md")
+(include "6xx.md")
+(include "7xx.md")
+(include "7450.md")
+(include "8540.md")
+(include "e300c2c3.md")
+(include "e500mc.md")
+(include "e500mc64.md")
+(include "power4.md")
+(include "power5.md")
+(include "power6.md")
+(include "power7.md")
+(include "cell.md")
+(include "xfpu.md")
+(include "a2.md")
+(include "titan.md")
+
+(include "predicates.md")
+(include "constraints.md")
+
+(include "darwin.md")
+
+
+;; Mode iterators
+
+; This mode iterator allows :GPR to be used to indicate the allowable size
+; of whole values in GPRs.
+(define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])
+
+; Any supported integer mode.
+(define_mode_iterator INT [QI HI SI DI TI])
+
+; Any supported integer mode that fits in one register.
+(define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")])
+
+; extend modes for DImode
+(define_mode_iterator QHSI [QI HI SI])
+
+; SImode or DImode, even if DImode doesn't fit in GPRs.
+(define_mode_iterator SDI [SI DI])
+
+; The size of a pointer.  Also, the size of the value that a record-condition
+; (one with a '.') will compare; and the size used for arithmetic carries.
+(define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
+
+; Any hardware-supported floating-point mode
+(define_mode_iterator FP [
+  (SF "TARGET_HARD_FLOAT 
+   && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) || TARGET_E500_SINGLE)")
+  (DF "TARGET_HARD_FLOAT 
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)")
+  (TF "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128")
+  (DD "TARGET_DFP")
+  (TD "TARGET_DFP")])
+
+; Any fma capable floating-point mode.
+(define_mode_iterator FMA_F [
+  (SF "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT")
+  (DF "(TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
+       || VECTOR_UNIT_VSX_P (DFmode)")
+  (V2SF "TARGET_PAIRED_FLOAT")
+  (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)")
+  (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
+  ])
+
+; These modes do not fit in integer registers in 32-bit mode.
+; but on e500v2, the gpr are 64 bit registers
+(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
+
+; Iterator for reciprocal estimate instructions
+(define_mode_iterator RECIPF [SF DF V4SF V2DF])
+
+; Iterator for just SF/DF
+(define_mode_iterator SFDF [SF DF])
+
+; Various instructions that come in SI and DI forms.
+; A generic w/d attribute, for things like cmpw/cmpd.
+(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
+
+; DImode bits
+(define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
+
+;; ISEL/ISEL64 target selection
+(define_mode_attr sel [(SI "") (DI "64")])
+
+;; Suffix for reload patterns
+(define_mode_attr ptrsize [(SI "32bit")
+			   (DI "64bit")])
+
+(define_mode_attr tptrsize [(SI "TARGET_32BIT")
+			    (DI "TARGET_64BIT")])
+
+(define_mode_attr mptrsize [(SI "si")
+			    (DI "di")])
+
+(define_mode_attr rreg [(SF   "f")
+			(DF   "ws")
+			(V4SF "wf")
+			(V2DF "wd")])
+
+(define_mode_attr rreg2 [(SF   "f")
+			 (DF   "d")])
+
+(define_mode_attr SI_CONVERT_FP [(SF "TARGET_FCFIDS")
+				 (DF "TARGET_FCFID")])
+
+(define_mode_attr E500_CONVERT [(SF "!TARGET_FPRS")
+				(DF "TARGET_E500_DOUBLE")])
+
+(define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
+				(DF "TARGET_DOUBLE_FLOAT")])
+
+;; Start with fixed-point load and store insns.  Here we put only the more
+;; complex forms.  Basic data transfer is done later.
+
+(define_expand "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn "*zero_extend<mode>di2_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
+  "TARGET_POWERPC64"
+  "@
+   l<wd>z%U1%X1 %0,%1
+   rldicl %0,%1,0,<dbits>"
+  [(set_attr "type" "load,*")])
+
+(define_insn "*zero_extend<mode>di2_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldicl. %2,%1,0,<dbits>
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*zero_extend<mode>di2_internal3"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   rldicl. %0,%1,0,<dbits>
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI (match_operand:QHSI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64"
+  "extsb %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   extsb. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   extsb. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendhidi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "@
+   lha%U1%X1 %0,%1
+   extsh %0,%1"
+  [(set_attr "type" "load_ext,exts")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
+  "extsh %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   extsh. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   extsh. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1"
+  [(set_attr "type" "load_ext,exts")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
+  "extsw %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   extsw. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 2 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_64BIT"
+  "@
+   extsw. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(sign_extend:DI (match_dup 1)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "reg_or_mem_operand" "m,r")))]
+  ""
+  "@
+   lbz%U1%X1 %0,%1
+   {rlinm|rlwinm} %0,%1,0,0xff"
+  [(set_attr "type" "load,*")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   {andil.|andi.} %2,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "@
+   {andil.|andi.} %0,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendqisi2"
+  [(use (match_operand:SI 0 "gpc_reg_operand" ""))
+   (use (match_operand:QI 1 "gpc_reg_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_POWERPC)
+    emit_insn (gen_extendqisi2_ppc (operands[0], operands[1]));
+  else if (TARGET_POWER)
+    emit_insn (gen_extendqisi2_power (operands[0], operands[1]));
+  else
+    emit_insn (gen_extendqisi2_no_power (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn "extendqisi2_ppc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC"
+  "extsb %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  "TARGET_POWERPC"
+  "@
+   extsb. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "TARGET_POWERPC && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI (match_dup 1)))]
+  "TARGET_POWERPC"
+  "@
+   extsb. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(sign_extend:SI (match_dup 1)))]
+  "TARGET_POWERPC && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendqisi2_power"
+  [(parallel [(set (match_dup 2)
+		   (ashift:SI (match_operand:QI 1 "gpc_reg_operand" "")
+			      (const_int 24)))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (ashiftrt:SI (match_dup 2)
+				(const_int 24)))
+	      (clobber (scratch:SI))])]
+  "TARGET_POWER"
+  "
+{ operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_reg_rtx (SImode); }")
+
+(define_expand "extendqisi2_no_power"
+  [(set (match_dup 2)
+	(ashift:SI (match_operand:QI 1 "gpc_reg_operand" "")
+		   (const_int 24)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashiftrt:SI (match_dup 2)
+		     (const_int 24)))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "
+{ operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_reg_rtx (SImode); }")
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "gpc_reg_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:HI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "reg_or_mem_operand" "m,r")))]
+  ""
+  "@
+   lbz%U1%X1 %0,%1
+   {rlinm|rlwinm} %0,%1,0,0xff"
+  [(set_attr "type" "load,*")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 "=r,r"))]
+  ""
+  "@
+   {andil.|andi.} %2,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:HI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:HI (match_dup 1)))]
+  ""
+  "@
+   {andil.|andi.} %0,%1,0xff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "")
+	(zero_extend:HI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:HI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendqihi2"
+  [(use (match_operand:HI 0 "gpc_reg_operand" ""))
+   (use (match_operand:QI 1 "gpc_reg_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_POWERPC)
+    emit_insn (gen_extendqihi2_ppc (operands[0], operands[1]));
+  else if (TARGET_POWER)
+    emit_insn (gen_extendqihi2_power (operands[0], operands[1]));
+  else
+    emit_insn (gen_extendqihi2_no_power (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn "extendqihi2_ppc"
+  [(set (match_operand:HI 0 "gpc_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC"
+  "extsb %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 "=r,r"))]
+  "TARGET_POWERPC"
+  "@
+   extsb. %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:HI 2 ""))]
+  "TARGET_POWERPC && reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:HI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:HI (match_dup 1)))]
+  "TARGET_POWERPC"
+  "@
+   extsb. %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:HI (match_operand:QI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "")
+	(sign_extend:HI (match_dup 1)))]
+  "TARGET_POWERPC && reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:HI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendqihi2_power"
+  [(parallel [(set (match_dup 2)
+		   (ashift:SI (match_operand:QI 1 "gpc_reg_operand" "")
+			      (const_int 24)))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_operand:HI 0 "gpc_reg_operand" "")
+		   (ashiftrt:SI (match_dup 2)
+				(const_int 24)))
+	      (clobber (scratch:SI))])]
+  "TARGET_POWER"
+  "
+{ operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_reg_rtx (SImode); }")
+
+(define_expand "extendqihi2_no_power"
+  [(set (match_dup 2)
+	(ashift:SI (match_operand:QI 1 "gpc_reg_operand" "")
+		   (const_int 24)))
+   (set (match_operand:HI 0 "gpc_reg_operand" "")
+	(ashiftrt:SI (match_dup 2)
+		     (const_int 24)))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "
+{ operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_reg_rtx (SImode); }")
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))]
+  ""
+  "@
+   lhz%U1%X1 %0,%1
+   {rlinm|rlwinm} %0,%1,0,0xffff"
+  [(set_attr "type" "load,*")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   {andil.|andi.} %2,%1,0xffff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "@
+   {andil.|andi.} %0,%1,0xffff
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))]
+  "rs6000_gen_cell_microcode"
+  "@
+   lha%U1%X1 %0,%1
+   {exts|extsh} %0,%1"
+  [(set_attr "type" "load_ext,exts")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r")))]
+  "!rs6000_gen_cell_microcode"
+  "{exts|extsh} %0,%1"
+  [(set_attr "type" "exts")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 "=r,r"))]
+  ""
+  "@
+   {exts.|extsh.} %2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI (match_dup 1)))]
+  ""
+  "@
+   {exts.|extsh.} %0,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+;; IBM 405, 440, 464 and 476 half-word multiplication operations.
+
+(define_insn "*macchwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (ashiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16))
+                                      (sign_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_dup 2)
+                           (const_int 16))
+                          (sign_extend:SI
+                           (match_dup 1)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "macchw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*macchw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16))
+                          (sign_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "macchw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*macchwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (lshiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16))
+                                      (zero_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_dup 2)
+                           (const_int 16))
+                          (zero_extend:SI
+                           (match_dup 1)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "macchwu. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*macchwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16))
+                          (zero_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "macchwu %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (ashiftrt:SI
+                                       (match_operand:SI 1 "gpc_reg_operand" "%r")
+                                       (const_int 16))
+                                      (ashiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16)))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_dup 1)
+                           (const_int 16))
+                          (ashiftrt:SI
+                           (match_dup 2)
+                           (const_int 16)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "machhw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (ashiftrt:SI
+                           (match_operand:SI 1 "gpc_reg_operand" "%r")
+                           (const_int 16))
+                          (ashiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16)))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "machhw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (lshiftrt:SI
+                                       (match_operand:SI 1 "gpc_reg_operand" "%r")
+                                       (const_int 16))
+                                      (lshiftrt:SI
+                                       (match_operand:SI 2 "gpc_reg_operand" "r")
+                                       (const_int 16)))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_dup 1)
+                           (const_int 16))
+                          (lshiftrt:SI
+                           (match_dup 2)
+                           (const_int 16)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "machhwu. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*machhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (lshiftrt:SI
+                           (match_operand:SI 1 "gpc_reg_operand" "%r")
+                           (const_int 16))
+                          (lshiftrt:SI
+                           (match_operand:SI 2 "gpc_reg_operand" "r")
+                           (const_int 16)))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "machhwu %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (sign_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                                      (sign_extend:SI
+                                       (match_operand:HI 2 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (sign_extend:SI
+                           (match_dup 1))
+                          (sign_extend:SI
+                           (match_dup 2)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "maclhw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (sign_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                          (sign_extend:SI
+                           (match_operand:HI 2 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "maclhw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (plus:SI (mult:SI (zero_extend:SI
+                                       (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                                      (zero_extend:SI
+                                       (match_operand:HI 2 "gpc_reg_operand" "r")))
+                             (match_operand:SI 4 "gpc_reg_operand" "0"))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (zero_extend:SI
+                           (match_dup 1))
+                          (zero_extend:SI
+                           (match_dup 2)))
+                 (match_dup 4)))]
+  "TARGET_MULHW"
+  "maclhwu. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*maclhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (plus:SI (mult:SI (zero_extend:SI
+                           (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                          (zero_extend:SI
+                           (match_operand:HI 2 "gpc_reg_operand" "r")))
+                 (match_operand:SI 3 "gpc_reg_operand" "0")))]
+  "TARGET_MULHW"
+  "maclhwu %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmacchwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                              (mult:SI (ashiftrt:SI
+                                        (match_operand:SI 2 "gpc_reg_operand" "r")
+                                        (const_int 16))
+                                       (sign_extend:SI
+                                        (match_operand:HI 1 "gpc_reg_operand" "r"))))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_dup 4)
+                  (mult:SI (ashiftrt:SI
+                            (match_dup 2)
+                            (const_int 16))
+                           (sign_extend:SI
+                            (match_dup 1)))))]
+  "TARGET_MULHW"
+  "nmacchw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmacchw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0")
+                  (mult:SI (ashiftrt:SI
+                            (match_operand:SI 2 "gpc_reg_operand" "r")
+                            (const_int 16))
+                           (sign_extend:SI
+                            (match_operand:HI 1 "gpc_reg_operand" "r")))))]
+  "TARGET_MULHW"
+  "nmacchw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmachhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                              (mult:SI (ashiftrt:SI
+                                        (match_operand:SI 1 "gpc_reg_operand" "%r")
+                                        (const_int 16))
+                                       (ashiftrt:SI
+                                        (match_operand:SI 2 "gpc_reg_operand" "r")
+                                        (const_int 16))))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_dup 4)
+                  (mult:SI (ashiftrt:SI
+                            (match_dup 1)
+                            (const_int 16))
+                           (ashiftrt:SI
+                            (match_dup 2)
+                            (const_int 16)))))]
+  "TARGET_MULHW"
+  "nmachhw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmachhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0")
+                  (mult:SI (ashiftrt:SI
+                            (match_operand:SI 1 "gpc_reg_operand" "%r")
+                            (const_int 16))
+                           (ashiftrt:SI
+                            (match_operand:SI 2 "gpc_reg_operand" "r")
+                            (const_int 16)))))]
+  "TARGET_MULHW"
+  "nmachhw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmaclhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (minus:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                              (mult:SI (sign_extend:SI
+                                        (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                                       (sign_extend:SI
+                                        (match_operand:HI 2 "gpc_reg_operand" "r"))))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_dup 4)
+                  (mult:SI (sign_extend:SI
+                            (match_dup 1))
+                           (sign_extend:SI
+                            (match_dup 2)))))]
+  "TARGET_MULHW"
+  "nmaclhw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*nmaclhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (minus:SI (match_operand:SI 3 "gpc_reg_operand" "0")
+                  (mult:SI (sign_extend:SI
+                            (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                           (sign_extend:SI
+                            (match_operand:HI 2 "gpc_reg_operand" "r")))))]
+  "TARGET_MULHW"
+  "nmaclhw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (ashiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16))
+                             (sign_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))
+                 (sign_extend:SI
+                  (match_dup 1))))]
+  "TARGET_MULHW"
+  "mulchw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))
+                 (sign_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mulchw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (lshiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16))
+                             (zero_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))
+                 (zero_extend:SI
+                  (match_dup 1))))]
+  "TARGET_MULHW"
+  "mulchwu. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulchwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))
+                 (zero_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mulchwu %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (ashiftrt:SI
+                              (match_operand:SI 1 "gpc_reg_operand" "%r")
+                              (const_int 16))
+                             (ashiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16)))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_dup 1)
+                  (const_int 16))
+                 (ashiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (ashiftrt:SI
+                  (match_operand:SI 1 "gpc_reg_operand" "%r")
+                  (const_int 16))
+                 (ashiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (lshiftrt:SI
+                              (match_operand:SI 1 "gpc_reg_operand" "%r")
+                              (const_int 16))
+                             (lshiftrt:SI
+                              (match_operand:SI 2 "gpc_reg_operand" "r")
+                              (const_int 16)))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_dup 1)
+                  (const_int 16))
+                 (lshiftrt:SI
+                  (match_dup 2)
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhwu. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mulhhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (lshiftrt:SI
+                  (match_operand:SI 1 "gpc_reg_operand" "%r")
+                  (const_int 16))
+                 (lshiftrt:SI
+                  (match_operand:SI 2 "gpc_reg_operand" "r")
+                  (const_int 16))))]
+  "TARGET_MULHW"
+  "mulhhwu %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhwc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (sign_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                             (sign_extend:SI
+                              (match_operand:HI 2 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (sign_extend:SI
+                  (match_dup 1))
+                 (sign_extend:SI
+                  (match_dup 2))))]
+  "TARGET_MULHW"
+  "mullhw. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhw"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (sign_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                 (sign_extend:SI
+                  (match_operand:HI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mullhw %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhwuc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (compare:CC (mult:SI (zero_extend:SI
+                              (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                             (zero_extend:SI
+                              (match_operand:HI 2 "gpc_reg_operand" "r")))
+                    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (zero_extend:SI
+                  (match_dup 1))
+                 (zero_extend:SI
+                  (match_dup 2))))]
+  "TARGET_MULHW"
+  "mullhwu. %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+(define_insn "*mullhwu"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mult:SI (zero_extend:SI
+                  (match_operand:HI 1 "gpc_reg_operand" "%r"))
+                 (zero_extend:SI
+                  (match_operand:HI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_MULHW"
+  "mullhwu %0, %1, %2"
+  [(set_attr "type" "imul3")])
+
+;; IBM 405, 440, 464 and 476 string-search dlmzb instruction support.
+(define_insn "dlmzb"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+        (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r")
+                    (match_operand:SI 2 "gpc_reg_operand" "r")]
+                   UNSPEC_DLMZB_CR))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (unspec:SI [(match_dup 1)
+                    (match_dup 2)]
+                   UNSPEC_DLMZB))]
+  "TARGET_DLMZB"
+  "dlmzb. %0, %1, %2")
+
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+        (unspec:SI [(match_operand:BLK 1 "general_operand" "")
+                    (match_operand:QI 2 "const_int_operand" "")
+                    (match_operand 3 "const_int_operand" "")]
+                   UNSPEC_DLMZB_STRLEN))
+   (clobber (match_scratch:CC 4 "=x"))]
+  "TARGET_DLMZB && WORDS_BIG_ENDIAN && !optimize_size"
+{
+  rtx result = operands[0];
+  rtx src = operands[1];
+  rtx search_char = operands[2];
+  rtx align = operands[3];
+  rtx addr, scratch_string, word1, word2, scratch_dlmzb;
+  rtx loop_label, end_label, mem, cr0, cond;
+  if (search_char != const0_rtx
+      || GET_CODE (align) != CONST_INT
+      || INTVAL (align) < 8)
+        FAIL;
+  word1 = gen_reg_rtx (SImode);
+  word2 = gen_reg_rtx (SImode);
+  scratch_dlmzb = gen_reg_rtx (SImode);
+  scratch_string = gen_reg_rtx (Pmode);
+  loop_label = gen_label_rtx ();
+  end_label = gen_label_rtx ();
+  addr = force_reg (Pmode, XEXP (src, 0));
+  emit_move_insn (scratch_string, addr);
+  emit_label (loop_label);
+  mem = change_address (src, SImode, scratch_string);
+  emit_move_insn (word1, mem);
+  emit_move_insn (word2, adjust_address (mem, SImode, 4));
+  cr0 = gen_rtx_REG (CCmode, CR0_REGNO);
+  emit_insn (gen_dlmzb (scratch_dlmzb, word1, word2, cr0));
+  cond = gen_rtx_NE (VOIDmode, cr0, const0_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode,
+                               pc_rtx,
+                               gen_rtx_IF_THEN_ELSE (VOIDmode,
+                                                     cond,
+                                                     gen_rtx_LABEL_REF
+                                                       (VOIDmode,
+                                                        end_label),
+                                                     pc_rtx)));
+  emit_insn (gen_addsi3 (scratch_string, scratch_string, GEN_INT (8)));
+  emit_jump_insn (gen_rtx_SET (VOIDmode,
+                               pc_rtx,
+                               gen_rtx_LABEL_REF (VOIDmode, loop_label)));
+  emit_barrier ();
+  emit_label (end_label);
+  emit_insn (gen_addsi3 (scratch_string, scratch_string, scratch_dlmzb));
+  emit_insn (gen_subsi3 (result, scratch_string, addr));
+  emit_insn (gen_subsi3 (result, result, const1_rtx));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(sign_extend:SI (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(sign_extend:SI (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Fixed-point arithmetic insns.
+
+(define_expand "add<mode>3"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(plus:SDI (match_operand:SDI 1 "gpc_reg_operand" "")
+		  (match_operand:SDI 2 "reg_or_add_cint_operand" "")))]
+  ""
+{
+  if (<MODE>mode == DImode && ! TARGET_POWERPC64)
+    {
+      if (non_short_cint_operand (operands[2], DImode))
+	FAIL;
+    }
+  else if (GET_CODE (operands[2]) == CONST_INT
+	   && ! add_operand (operands[2], <MODE>mode))
+    {
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (<MODE>mode));
+
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
+
+      if (<MODE>mode == DImode && !satisfies_constraint_L (GEN_INT (rest)))
+	FAIL;
+
+      /* The ordering here is important for the prolog expander.
+	 When space is allocated from the stack, adding 'low' first may
+	 produce a temporary deallocation (which would be bad).  */
+      emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest)));
+      emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low)));
+      DONE;
+    }
+})
+
+;; Discourage ai/addic because of carry but provide it in an alternative
+;; allowing register zero as source.
+(define_insn "*add<mode>3_internal1"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,?r,r")
+	(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,r,b")
+		  (match_operand:GPR 2 "add_operand" "r,I,I,L")))]
+  "!DECIMAL_FLOAT_MODE_P (GET_MODE (operands[0])) && !DECIMAL_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "@
+   {cax|add} %0,%1,%2
+   {cal %0,%2(%1)|addi %0,%1,%2}
+   {ai|addic} %0,%1,%2
+   {cau|addis} %0,%1,%v2"
+  [(set_attr "length" "4,4,4,4")])
+
+(define_insn "addsi3_high"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=b")
+        (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                 (high:SI (match_operand 2 "" ""))))]
+  "TARGET_MACHO && !TARGET_64BIT"
+  "{cau|addis} %0,%1,ha16(%2)"
+  [(set_attr "length" "4")])
+
+(define_insn "*add<mode>3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (plus:P (match_operand:P 1 "gpc_reg_operand" "%r,r,r,r")
+			    (match_operand:P 2 "reg_or_short_operand" "r,I,r,I"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=r,r,r,r"))]
+  ""
+  "@
+   {cax.|add.} %3,%1,%2
+   {ai.|addic.} %3,%1,%2
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,compare,compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			      (match_operand:GPR 2 "reg_or_short_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:GPR 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(plus:GPR (match_dup 1)
+		 (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*add<mode>3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (plus:P (match_operand:P 1 "gpc_reg_operand" "%r,r,r,r")
+			    (match_operand:P 2 "reg_or_short_operand" "r,I,r,I"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r,r")
+	(plus:P (match_dup 1)
+		(match_dup 2)))]
+  ""
+  "@
+   {cax.|add.} %0,%1,%2
+   {ai.|addic.} %0,%1,%2
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,compare,compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (plus:P (match_operand:P 1 "gpc_reg_operand" "")
+			    (match_operand:P 2 "reg_or_short_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(plus:P (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(plus:P (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Split an add that we can't do in one insn into two insns, each of which
+;; does one 16-bit part.  This is used by combine.  Note that the low-order
+;; add should be last in case the result gets used in an address.
+
+(define_split
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+		  (match_operand:GPR 2 "non_add_cint_operand" "")))]
+  ""
+  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+  HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
+
+  operands[4] = GEN_INT (low);
+  if (<MODE>mode == SImode || satisfies_constraint_L (GEN_INT (rest)))
+    operands[3] = GEN_INT (rest);
+  else if (can_create_pseudo_p ())
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      emit_move_insn (operands[3], operands[2]);
+      emit_insn (gen_adddi3 (operands[0], operands[1], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  ""
+  "nor %0,%1,%1")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 "=r,r"))]
+  ""
+  "@
+   nor. %2,%1,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(not:P (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(not:P (match_dup 1)))]
+  ""
+  "@
+   nor. %0,%1,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (not:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(not:P (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(not:P (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(minus:SI (match_operand:SI 1 "reg_or_short_operand" "rI")
+		  (match_operand:SI 2 "gpc_reg_operand" "r")))]
+  "! TARGET_POWERPC"
+  "{sf%I1|subf%I1c} %0,%2,%1")
+
+(define_insn ""
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(minus:GPR (match_operand:GPR 1 "reg_or_short_operand" "r,I")
+		   (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
+  "TARGET_POWERPC"
+  "@
+   subf %0,%2,%1
+   subfic %0,%2,%1")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (minus:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "! TARGET_POWERPC"
+  "@
+   {sf.|subfc.} %3,%2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			     (match_operand:P 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=r,r"))]
+  "TARGET_POWERPC"
+  "@
+   subf. %3,%2,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "")
+			     (match_operand:P 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(minus:P (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (minus:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWERPC"
+  "@
+   {sf.|subfc.} %0,%2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			     (match_operand:P 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(minus:P (match_dup 1)
+		  (match_dup 2)))]
+  "TARGET_POWERPC"
+  "@
+   subf. %0,%2,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (minus:P (match_operand:P 1 "gpc_reg_operand" "")
+			     (match_operand:P 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(minus:P (match_dup 1)
+		  (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(minus:P (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(minus:SDI (match_operand:SDI 1 "reg_or_short_operand" "")
+		   (match_operand:SDI 2 "reg_or_sub_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_add<mode>3 (operands[0], operands[1],
+				 negate_rtx (<MODE>mode, operands[2])));
+      DONE;
+    }
+}")
+
+;; For SMIN, SMAX, UMIN, and UMAX, we use DEFINE_EXPAND's that involve a doz[i]
+;; instruction and some auxiliary computations.  Then we just have a single
+;; DEFINE_INSN for doz[i] and the define_splits to make them if made by
+;; combine.
+
+(define_expand "sminsi3"
+  [(set (match_dup 3)
+	(if_then_else:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				(match_operand:SI 2 "reg_or_short_operand" ""))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(minus:SI (match_dup 2) (match_dup 3)))]
+  "TARGET_POWER || TARGET_ISEL"
+  "
+{
+  if (TARGET_ISEL)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
+      DONE;
+    }
+
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(smin:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		 (match_operand:SI 2 "reg_or_short_operand" "")))
+   (clobber (match_operand:SI 3 "gpc_reg_operand" ""))]
+  "TARGET_POWER"
+  [(set (match_dup 3)
+	(if_then_else:SI (gt:SI (match_dup 1) (match_dup 2))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))
+   (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 3)))]
+  "")
+
+(define_expand "smaxsi3"
+  [(set (match_dup 3)
+	(if_then_else:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				(match_operand:SI 2 "reg_or_short_operand" ""))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_dup 3) (match_dup 1)))]
+  "TARGET_POWER || TARGET_ISEL"
+  "
+{
+  if (TARGET_ISEL)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
+      DONE;
+    }
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(smax:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		 (match_operand:SI 2 "reg_or_short_operand" "")))
+   (clobber (match_operand:SI 3 "gpc_reg_operand" ""))]
+  "TARGET_POWER"
+  [(set (match_dup 3)
+	(if_then_else:SI (gt:SI (match_dup 1) (match_dup 2))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))
+   (set (match_dup 0) (plus:SI (match_dup 3) (match_dup 1)))]
+  "")
+
+(define_expand "uminsi3"
+  [(set (match_dup 3) (xor:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_dup 5)))
+   (set (match_dup 4) (xor:SI (match_operand:SI 2 "gpc_reg_operand" "")
+			      (match_dup 5)))
+   (set (match_dup 3) (if_then_else:SI (gt (match_dup 3) (match_dup 4))
+				       (const_int 0)
+				       (minus:SI (match_dup 4) (match_dup 3))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(minus:SI (match_dup 2) (match_dup 3)))]
+  "TARGET_POWER || TARGET_ISEL"
+  "
+{
+  if (TARGET_ISEL)
+    {
+      rs6000_emit_minmax (operands[0], UMIN, operands[1], operands[2]);
+      DONE;
+    }
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = GEN_INT (-2147483647 - 1);
+}")
+
+(define_expand "umaxsi3"
+  [(set (match_dup 3) (xor:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_dup 5)))
+   (set (match_dup 4) (xor:SI (match_operand:SI 2 "gpc_reg_operand" "")
+			      (match_dup 5)))
+   (set (match_dup 3) (if_then_else:SI (gt (match_dup 3) (match_dup 4))
+				       (const_int 0)
+				       (minus:SI (match_dup 4) (match_dup 3))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_dup 3) (match_dup 1)))]
+  "TARGET_POWER || TARGET_ISEL"
+  "
+{
+  if (TARGET_ISEL)
+    {
+      rs6000_emit_minmax (operands[0], UMAX, operands[1], operands[2]);
+      DONE;
+    }
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_reg_rtx (SImode);
+  operands[5] = GEN_INT (-2147483647 - 1);
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(if_then_else:SI (gt (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "reg_or_short_operand" "rI"))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))]
+  "TARGET_POWER"
+  "doz%I2 %0,%1,%2")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (if_then_else:SI (gt (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+			  (const_int 0)
+			  (minus:SI (match_dup 2) (match_dup 1)))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_POWER"
+  "@
+   doz%I2. %3,%1,%2
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (if_then_else:SI (gt (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_operand:SI 2 "reg_or_short_operand" ""))
+			  (const_int 0)
+			  (minus:SI (match_dup 2) (match_dup 1)))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 3)
+	(if_then_else:SI (gt (match_dup 1) (match_dup 2))
+			  (const_int 0)
+			  (minus:SI (match_dup 2) (match_dup 1))))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (if_then_else:SI (gt (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+			  (const_int 0)
+			  (minus:SI (match_dup 2) (match_dup 1)))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(if_then_else:SI (gt (match_dup 1) (match_dup 2))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))]
+  "TARGET_POWER"
+  "@
+   doz%I2. %0,%1,%2
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (if_then_else:SI (gt (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_operand:SI 2 "reg_or_short_operand" ""))
+			  (const_int 0)
+			  (minus:SI (match_dup 2) (match_dup 1)))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(if_then_else:SI (gt (match_dup 1) (match_dup 2))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:SI (gt (match_dup 1) (match_dup 2))
+			 (const_int 0)
+			 (minus:SI (match_dup 2) (match_dup 1))))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; We don't need abs with condition code because such comparisons should
+;; never be done.
+(define_expand "abssi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(abs:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_ISEL)
+    {
+      emit_insn (gen_abssi2_isel (operands[0], operands[1]));
+      DONE;
+    }
+  else if (! TARGET_POWER)
+    {
+      emit_insn (gen_abssi2_nopower (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_insn "*abssi2_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(abs:SI (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_POWER"
+  "abs %0,%1")
+
+(define_insn_and_split "abs<mode>2_isel"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (abs:GPR (match_operand:GPR 1 "gpc_reg_operand" "b")))
+   (clobber (match_scratch:GPR 2 "=&b"))
+   (clobber (match_scratch:CC 3 "=y"))]
+  "TARGET_ISEL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (neg:GPR (match_dup 1)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 1)
+		    (const_int 0)))
+   (set (match_dup 0)
+	(if_then_else:GPR (lt (match_dup 3)
+			      (const_int 0))
+			  (match_dup 2)
+			  (match_dup 1)))]
+  "")
+
+(define_insn_and_split "nabs<mode>2_isel"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (neg:GPR (abs:GPR (match_operand:GPR 1 "gpc_reg_operand" "b"))))
+   (clobber (match_scratch:GPR 2 "=&b"))
+   (clobber (match_scratch:CC 3 "=y"))]
+  "TARGET_ISEL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (neg:GPR (match_dup 1)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 1)
+		    (const_int 0)))
+   (set (match_dup 0)
+	(if_then_else:GPR (lt (match_dup 3)
+			      (const_int 0))
+			  (match_dup 1)
+			  (match_dup 2)))]
+  "")
+
+(define_insn_and_split "abssi2_nopower"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,r")
+        (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,0")))
+   (clobber (match_scratch:SI 2 "=&r,&r"))]
+  "! TARGET_POWER && ! TARGET_ISEL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
+   (set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (minus:SI (match_dup 0) (match_dup 2)))]
+  "")
+
+(define_insn "*nabs_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r"))))]
+  "TARGET_POWER"
+  "nabs %0,%1")
+
+(define_insn_and_split "*nabs_nopower"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,r")
+        (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,0"))))
+   (clobber (match_scratch:SI 2 "=&r,&r"))]
+  "! TARGET_POWER"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
+   (set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 0)))]
+  "")
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(neg:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn "*neg<mode>2_internal"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  ""
+  "neg %0,%1")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 "=r,r"))]
+  ""
+  "@
+   neg. %2,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:P 2 ""))]
+  "reload_completed"
+  [(set (match_dup 2)
+	(neg:P (match_dup 1)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 2)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(neg:P (match_dup 1)))]
+  ""
+  "@
+   neg. %0,%1
+   #"
+  [(set_attr "type" "fast_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_cr0_operand" "")
+	(compare:CC (neg:P (match_operand:P 1 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(neg:P (match_dup 1)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(neg:P (match_dup 1)))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(clz:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  ""
+  "{cntlz|cntlz<wd>} %0,%1"
+  [(set_attr "type" "cntlz")])
+
+(define_expand "ctz<mode>2"
+  [(set (match_dup 2)
+	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
+   (parallel [(set (match_dup 3) (and:GPR (match_dup 1)
+					  (match_dup 2)))
+	      (clobber (scratch:CC))])
+   (set (match_dup 4) (clz:GPR (match_dup 3)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(minus:GPR (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  })
+
+(define_expand "ffs<mode>2"
+  [(set (match_dup 2)
+	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
+   (parallel [(set (match_dup 3) (and:GPR (match_dup 1)
+					  (match_dup 2)))
+	      (clobber (scratch:CC))])
+   (set (match_dup 4) (clz:GPR (match_dup 3)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(minus:GPR (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+  })
+
+(define_insn "popcntb<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")]
+                     UNSPEC_POPCNTB))]
+  "TARGET_POPCNTB"
+  "popcntb %0,%1")
+
+(define_insn "popcntd<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+  "TARGET_POPCNTD"
+  "popcnt<wd> %0,%1")
+
+(define_expand "popcount<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))]
+  "TARGET_POPCNTB || TARGET_POPCNTD"
+  {
+    rs6000_emit_popcount (operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn "parity<mode>2_cmpb"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
+  "TARGET_CMPB && TARGET_POPCNTB"
+  "prty<wd> %0,%1")
+
+(define_expand "parity<mode>2"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(parity:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))]
+  "TARGET_POPCNTB"
+  {
+    rs6000_emit_parity (operands[0], operands[1]);
+    DONE;
+  })
+
+;; Since the hardware zeros the upper part of the register, save generating the
+;; AND immediate if we are converting to unsigned
+(define_insn "*bswaphi2_extenddi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extend:DI
+	 (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+  "TARGET_POWERPC64"
+  "lhbrx %0,%y1"
+  [(set_attr "length" "4")
+   (set_attr "type" "load")])
+
+(define_insn "*bswaphi2_extendsi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+  "TARGET_POWERPC"
+  "lhbrx %0,%y1"
+  [(set_attr "length" "4")
+   (set_attr "type" "load")])
+
+(define_expand "bswaphi2"
+  [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "")
+		   (bswap:HI
+		    (match_operand:HI 1 "reg_or_mem_operand" "")))
+	      (clobber (match_scratch:SI 2 ""))])]
+  ""
+{
+  if (!REG_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "bswaphi2_internal"
+  [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r")
+	(bswap:HI
+	 (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:SI 2 "=X,X,&r"))]
+  "TARGET_POWERPC"
+  "@
+   lhbrx %0,%y1
+   sthbrx %1,%y0
+   #"
+  [(set_attr "length" "4,4,12")
+   (set_attr "type" "load,store,*")])
+
+(define_split
+  [(set (match_operand:HI 0 "gpc_reg_operand" "")
+	(bswap:HI (match_operand:HI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "TARGET_POWERPC && reload_completed"
+  [(set (match_dup 3)
+	(zero_extract:SI (match_dup 4)
+			 (const_int 8)
+			 (const_int 16)))
+   (set (match_dup 2)
+	(and:SI (ashift:SI (match_dup 4)
+			   (const_int 8))
+		(const_int 65280)))		;; 0xff00
+   (set (match_dup 3)
+	(ior:SI (match_dup 3)
+		(match_dup 2)))]
+  "
+{
+  operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
+  operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+}")
+
+(define_insn "*bswapsi2_extenddi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extend:DI
+	 (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))]
+  "TARGET_POWERPC64"
+  "lwbrx %0,%y1"
+  [(set_attr "length" "4")
+   (set_attr "type" "load")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "reg_or_mem_operand" "")
+	(bswap:SI
+	 (match_operand:SI 1 "reg_or_mem_operand" "")))]
+  ""
+{
+  if (!REG_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*bswapsi2_internal"
+  [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r")
+	(bswap:SI
+	 (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
+  ""
+  "@
+   {lbrx|lwbrx} %0,%y1
+   {stbrx|stwbrx} %1,%y0
+   #"
+  [(set_attr "length" "4,4,12")
+   (set_attr "type" "load,store,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(rotate:SI (match_dup 1) (const_int 8)))
+   (set (zero_extract:SI (match_dup 0)
+			 (const_int 8)
+			 (const_int 0))
+	(match_dup 1))
+   (set (zero_extract:SI (match_dup 0)
+			 (const_int 8)
+			 (const_int 16))
+	(rotate:SI (match_dup 1)
+		   (const_int 16)))]
+  "")
+
+(define_expand "bswapdi2"
+  [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "")
+		   (bswap:DI
+		    (match_operand:DI 1 "reg_or_mem_operand" "")))
+	      (clobber (match_scratch:DI 2 ""))
+	      (clobber (match_scratch:DI 3 ""))
+	      (clobber (match_scratch:DI 4 ""))])]
+  ""
+{
+  if (!REG_P (operands[0]) && !REG_P (operands[1]))
+    operands[1] = force_reg (DImode, operands[1]);
+
+  if (!TARGET_POWERPC64)
+    {
+      /* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode
+	 that uses 64-bit registers needs the same scratch registers as 64-bit
+	 mode.  */
+      emit_insn (gen_bswapdi2_32bit (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+;; Power7/cell has ldbrx/stdbrx, so use it directly
+(define_insn "*bswapdi2_ldbrx"
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:DI 2 "=X,X,&r"))
+   (clobber (match_scratch:DI 3 "=X,X,&r"))
+   (clobber (match_scratch:DI 4 "=X,X,&r"))]
+  "TARGET_POWERPC64 && TARGET_LDBRX
+   && (REG_P (operands[0]) || REG_P (operands[1]))"
+  "@
+   ldbrx %0,%y1
+   stdbrx %1,%y0
+   #"
+  [(set_attr "length" "4,4,36")
+   (set_attr "type" "load,store,*")])
+
+;; Non-power7/cell, fall back to use lwbrx/stwbrx
+(define_insn "*bswapdi2_64bit"
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:DI 2 "=&b,&b,&r"))
+   (clobber (match_scratch:DI 3 "=&r,&r,&r"))
+   (clobber (match_scratch:DI 4 "=&r,X,&r"))]
+  "TARGET_POWERPC64 && !TARGET_LDBRX
+   && (REG_P (operands[0]) || REG_P (operands[1]))"
+  "#"
+  [(set_attr "length" "16,12,36")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" "")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 4 "gpc_reg_operand" ""))]
+  "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest   = operands[0];
+  rtx src    = operands[1];
+  rtx op2    = operands[2];
+  rtx op3    = operands[3];
+  rtx op4    = operands[4];
+  rtx op3_32 = simplify_gen_subreg (SImode, op3, DImode, 4);
+  rtx op4_32 = simplify_gen_subreg (SImode, op4, DImode, 4);
+  rtx addr1;
+  rtx addr2;
+  rtx word_high;
+  rtx word_low;
+
+  addr1 = XEXP (src, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (Pmode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (Pmode, op2, addr1);
+    }
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      word_high = change_address (src, SImode, addr1);
+      word_low  = change_address (src, SImode, addr2);
+    }
+  else
+    {
+      word_high = change_address (src, SImode, addr2);
+      word_low  = change_address (src, SImode, addr1);
+    }
+
+  emit_insn (gen_bswapsi2 (op3_32, word_low));
+  emit_insn (gen_bswapsi2 (op4_32, word_high));
+  emit_insn (gen_ashldi3 (dest, op3, GEN_INT (32)));
+  emit_insn (gen_iordi3 (dest, dest, op4));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "indexed_or_indirect_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 4 "" ""))]
+  "TARGET_POWERPC64 && !TARGET_LDBRX && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest   = operands[0];
+  rtx src    = operands[1];
+  rtx op2    = operands[2];
+  rtx op3    = operands[3];
+  rtx src_si = simplify_gen_subreg (SImode, src, DImode, 4);
+  rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, 4);
+  rtx addr1;
+  rtx addr2;
+  rtx word_high;
+  rtx word_low;
+
+  addr1 = XEXP (dest, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (Pmode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (Pmode, op2, addr1);
+    }
+
+  emit_insn (gen_lshrdi3 (op3, src, GEN_INT (32)));
+  if (BYTES_BIG_ENDIAN)
+    {
+      word_high = change_address (dest, SImode, addr1);
+      word_low  = change_address (dest, SImode, addr2);
+      emit_insn (gen_bswapsi2 (word_high, src_si));
+      emit_insn (gen_bswapsi2 (word_low, op3_si));
+    }
+  else
+    {
+      word_high = change_address (dest, SImode, addr2);
+      word_low  = change_address (dest, SImode, addr1);
+      emit_insn (gen_bswapsi2 (word_low, src_si));
+      emit_insn (gen_bswapsi2 (word_high, op3_si));
+    }
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 3 "gpc_reg_operand" ""))
+   (clobber (match_operand:DI 4 "" ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest    = operands[0];
+  rtx src     = operands[1];
+  rtx op2     = operands[2];
+  rtx op3     = operands[3];
+  rtx dest_si = simplify_gen_subreg (SImode, dest, DImode, 4);
+  rtx src_si  = simplify_gen_subreg (SImode, src, DImode, 4);
+  rtx op2_si  = simplify_gen_subreg (SImode, op2, DImode, 4);
+  rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, 4);
+
+  emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
+  emit_insn (gen_bswapsi2 (dest_si, src_si));
+  emit_insn (gen_bswapsi2 (op3_si, op2_si));
+  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
+  emit_insn (gen_iordi3 (dest, dest, op3));
+}")
+
+(define_insn "bswapdi2_32bit"
+  [(set (match_operand:DI 0 "reg_or_mem_operand" "=&r,Z,??&r")
+	(bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
+   (clobber (match_scratch:SI 2 "=&b,&b,X"))]
+  "!TARGET_POWERPC64 && (REG_P (operands[0]) || REG_P (operands[1]))"
+  "#"
+  [(set_attr "length" "16,12,36")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "indexed_or_indirect_operand" "")))
+   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "!TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest   = operands[0];
+  rtx src    = operands[1];
+  rtx op2    = operands[2];
+  rtx dest_hi = simplify_gen_subreg (SImode, dest, DImode, 0);
+  rtx dest_lo = simplify_gen_subreg (SImode, dest, DImode, 4);
+  rtx addr1;
+  rtx addr2;
+  rtx word_high;
+  rtx word_low;
+
+  addr1 = XEXP (src, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (SImode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (SImode, op2, addr1);
+    }
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      word_high = change_address (src, SImode, addr1);
+      word_low  = change_address (src, SImode, addr2);
+    }
+  else
+    {
+      word_high = change_address (src, SImode, addr2);
+      word_low  = change_address (src, SImode, addr1);
+    }
+
+  emit_insn (gen_bswapsi2 (dest_hi, word_low));
+  emit_insn (gen_bswapsi2 (dest_lo, word_high));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "indexed_or_indirect_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+  "!TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest     = operands[0];
+  rtx src      = operands[1];
+  rtx op2      = operands[2];
+  rtx src_high = simplify_gen_subreg (SImode, src, DImode, 0);
+  rtx src_low  = simplify_gen_subreg (SImode, src, DImode, 4);
+  rtx addr1;
+  rtx addr2;
+  rtx word_high;
+  rtx word_low;
+
+  addr1 = XEXP (dest, 0);
+  if (GET_CODE (addr1) == PLUS)
+    {
+      emit_insn (gen_add3_insn (op2, XEXP (addr1, 0), GEN_INT (4)));
+      if (TARGET_AVOID_XFORM)
+	{
+	  emit_insn (gen_add3_insn (op2, XEXP (addr1, 1), op2));
+	  addr2 = op2;
+	}
+      else
+	addr2 = gen_rtx_PLUS (SImode, op2, XEXP (addr1, 1));
+    }
+  else if (TARGET_AVOID_XFORM)
+    {
+      emit_insn (gen_add3_insn (op2, addr1, GEN_INT (4)));
+      addr2 = op2;
+    }
+  else
+    {
+      emit_move_insn (op2, GEN_INT (4));
+      addr2 = gen_rtx_PLUS (SImode, op2, addr1);
+    }
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      word_high = change_address (dest, SImode, addr1);
+      word_low  = change_address (dest, SImode, addr2);
+    }
+  else
+    {
+      word_high = change_address (dest, SImode, addr2);
+      word_low  = change_address (dest, SImode, addr1);
+    }
+
+  emit_insn (gen_bswapsi2 (word_high, src_low));
+  emit_insn (gen_bswapsi2 (word_low, src_high));
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(bswap:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 2 "" ""))]
+  "!TARGET_POWERPC64 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx dest      = operands[0];
+  rtx src       = operands[1];
+  rtx src_high  = simplify_gen_subreg (SImode, src, DImode, 0);
+  rtx src_low   = simplify_gen_subreg (SImode, src, DImode, 4);
+  rtx dest_high = simplify_gen_subreg (SImode, dest, DImode, 0);
+  rtx dest_low  = simplify_gen_subreg (SImode, dest, DImode, 4);
+
+  emit_insn (gen_bswapsi2 (dest_high, src_low));
+  emit_insn (gen_bswapsi2 (dest_low, src_high));
+}")
+
+(define_expand "mulsi3"
+  [(use (match_operand:SI 0 "gpc_reg_operand" ""))
+   (use (match_operand:SI 1 "gpc_reg_operand" ""))
+   (use (match_operand:SI 2 "reg_or_short_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_POWER)
+    emit_insn (gen_mulsi3_mq (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_mulsi3_no_mq (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mulsi3_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+		 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+   (clobber (match_scratch:SI 3 "=q,q"))]
+  "TARGET_POWER"
+  "@
+   {muls|mullw} %0,%1,%2
+   {muli|mulli} %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 2 "s8bit_cint_operand" "")
+		(const_string "imul3")
+             (match_operand:SI 2 "short_cint_operand" "")
+		(const_string "imul2")]
+	(const_string "imul")))])
+
+(define_insn "mulsi3_no_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+		 (match_operand:SI 2 "reg_or_short_operand" "r,I")))]
+  "! TARGET_POWER"
+  "@
+   {muls|mullw} %0,%1,%2
+   {muli|mulli} %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 2 "s8bit_cint_operand" "")
+		(const_string "imul3")
+             (match_operand:SI 2 "short_cint_operand" "")
+		(const_string "imul2")]
+	(const_string "imul")))])
+
+(define_insn "*mulsi3_mq_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))
+   (clobber (match_scratch:SI 4 "=q,q"))]
+  "TARGET_POWER"
+  "@
+   {muls.|mullw.} %3,%1,%2
+   #"
+  [(set_attr "type" "imul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			     (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 3)
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*mulsi3_no_mq_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "! TARGET_POWER"
+  "@
+   {muls.|mullw.} %3,%1,%2
+   #"
+  [(set_attr "type" "imul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			     (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "! TARGET_POWER && reload_completed"
+  [(set (match_dup 3)
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*mulsi3_mq_internal2"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 "=q,q"))]
+  "TARGET_POWER"
+  "@
+   {muls.|mullw.} %0,%1,%2
+   #"
+  [(set_attr "type" "imul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			     (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*mulsi3_no_mq_internal2"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER"
+  "@
+   {muls.|mullw.} %0,%1,%2
+   #"
+  [(set_attr "type" "imul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (mult:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			     (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(mult:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Operand 1 is divided by operand 2; quotient goes to operand
+;; 0 and remainder to operand 3.
+;; ??? At some point, see what, if anything, we can do about if (x % y == 0).
+
+(define_expand "divmodsi4"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (div:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			   (match_operand:SI 2 "gpc_reg_operand" "")))
+	      (set (match_operand:SI 3 "register_operand" "")
+		   (mod:SI (match_dup 1) (match_dup 2)))])]
+  "TARGET_POWER || (! TARGET_POWER && ! TARGET_POWERPC)"
+  "
+{
+  if (! TARGET_POWER && ! TARGET_POWERPC)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, 3), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, 4), operands[2]);
+      emit_insn (gen_divss_call ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, 3));
+      emit_move_insn (operands[3], gen_rtx_REG (SImode, 4));
+      DONE;
+    }
+}")
+
+(define_insn "*divmodsi4_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(div:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		(match_operand:SI 2 "gpc_reg_operand" "r")))
+   (set (match_operand:SI 3 "register_operand" "=q")
+	(mod:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWER"
+  "divs %0,%1,%2"
+  [(set_attr "type" "idiv")])
+
+(define_expand "udiv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+        (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+		  (match_operand:GPR 2 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC || (! TARGET_POWER && ! TARGET_POWERPC)"
+  "
+{
+  if (! TARGET_POWER && ! TARGET_POWERPC)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, 3), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, 4), operands[2]);
+      emit_insn (gen_quous_call ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, 3));
+      DONE;
+    }
+  else if (TARGET_POWER)
+    {
+      emit_insn (gen_udivsi3_mq (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "udivsi3_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (udiv:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+                 (match_operand:SI 2 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:SI 3 "=q"))]
+  "TARGET_POWERPC && TARGET_POWER"
+  "divwu %0,%1,%2"
+  [(set_attr "type" "idiv")])
+
+(define_insn "*udivsi3_no_mq"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC && ! TARGET_POWER"
+  "div<wd>u %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 0 "" "")
+		(const_string "idiv")]
+	(const_string "ldiv")))])
+
+
+;; For powers of two we can do srai/aze for divide and then adjust for
+;; modulus.  If it isn't a power of two, FAIL on POWER so divmodsi4 will be
+;; used; for PowerPC, force operands into register and do a normal divide;
+;; for AIX common-mode, use quoss call on register operands.
+(define_expand "div<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+		 (match_operand:GPR 2 "reg_or_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) > 0
+      && exact_log2 (INTVAL (operands[2])) >= 0)
+    ;
+  else if (TARGET_POWERPC)
+    {
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      if (TARGET_POWER)
+	{
+	  emit_insn (gen_divsi3_mq (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+    }
+  else if (TARGET_POWER)
+    FAIL;
+  else
+    {
+      emit_move_insn (gen_rtx_REG (SImode, 3), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, 4), operands[2]);
+      emit_insn (gen_quoss_call ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, 3));
+      DONE;
+    }
+}")
+
+(define_insn "divsi3_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (div:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+                (match_operand:SI 2 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:SI 3 "=q"))]
+  "TARGET_POWERPC && TARGET_POWER"
+  "divw %0,%1,%2"
+  [(set_attr "type" "idiv")])
+
+(define_insn "*div<mode>3_no_mq"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+        (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		 (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  "TARGET_POWERPC && ! TARGET_POWER"
+  "div<wd> %0,%1,%2"
+  [(set (attr "type")
+     (cond [(match_operand:SI 0 "" "")
+		(const_string "idiv")]
+	(const_string "ldiv")))])
+
+(define_expand "mod<mode>3"
+  [(use (match_operand:GPR 0 "gpc_reg_operand" ""))
+   (use (match_operand:GPR 1 "gpc_reg_operand" ""))
+   (use (match_operand:GPR 2 "reg_or_cint_operand" ""))]
+  ""
+  "
+{
+  int i;
+  rtx temp1;
+  rtx temp2;
+
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 0
+      || (i = exact_log2 (INTVAL (operands[2]))) < 0)
+    FAIL;
+
+  temp1 = gen_reg_rtx (<MODE>mode);
+  temp2 = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+  emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
+  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+  DONE;
+}")
+
+(define_insn ""
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		 (match_operand:GPR 2 "exact_log2_cint_operand" "N")))]
+  ""
+  "{srai|sra<wd>i} %0,%1,%p2\;{aze|addze} %0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (div:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			   (match_operand:P 2 "exact_log2_cint_operand" "N,N"))
+		    (const_int 0)))
+   (clobber (match_scratch:P 3 "=r,r"))]
+  ""
+  "@
+   {srai|sra<wd>i} %3,%1,%p2\;{aze.|addze.} %3,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")
+   (set_attr "cell_micro" "not")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			     (match_operand:GPR 2 "exact_log2_cint_operand"
+			      ""))
+		    (const_int 0)))
+   (clobber (match_scratch:GPR 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(div:<MODE> (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (div:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			   (match_operand:P 2 "exact_log2_cint_operand" "N,N"))
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(div:P (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   {srai|sra<wd>i} %0,%1,%p2\;{aze.|addze.} %0,%0
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")
+   (set_attr "cell_micro" "not")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			     (match_operand:GPR 2 "exact_log2_cint_operand"
+			      ""))
+		    (const_int 0)))
+   (set (match_operand:GPR 0 "gpc_reg_operand" "")
+	(div:GPR (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(div:<MODE> (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(udiv:SI
+	 (plus:DI (ashift:DI
+		   (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r"))
+		   (const_int 32))
+		  (zero_extend:DI (match_operand:SI 4 "register_operand" "2")))
+	 (match_operand:SI 3 "gpc_reg_operand" "r")))
+   (set (match_operand:SI 2 "register_operand" "=*q")
+	(umod:SI
+	 (plus:DI (ashift:DI
+		   (zero_extend:DI (match_dup 1)) (const_int 32))
+		  (zero_extend:DI (match_dup 4)))
+	 (match_dup 3)))]
+  "TARGET_POWER"
+  "div %0,%1,%3"
+  [(set_attr "type" "idiv")])
+
+;; To do unsigned divide we handle the cases of the divisor looking like a
+;; negative number.  If it is a constant that is less than 2**31, we don't
+;; have to worry about the branches.  So make a few subroutines here.
+;;
+;; First comes the normal case.
+(define_expand "udivmodsi4_normal"
+  [(set (match_dup 4) (const_int 0))
+   (parallel [(set (match_operand:SI 0 "" "")
+		   (udiv:SI (plus:DI (ashift:DI (zero_extend:DI (match_dup 4))
+						(const_int 32))
+				     (zero_extend:DI (match_operand:SI 1 "" "")))
+			    (match_operand:SI 2 "" "")))
+	      (set (match_operand:SI 3 "" "")
+		   (umod:SI (plus:DI (ashift:DI (zero_extend:DI (match_dup 4))
+						(const_int 32))
+				     (zero_extend:DI (match_dup 1)))
+			    (match_dup 2)))])]
+  "TARGET_POWER"
+  "
+{ operands[4] = gen_reg_rtx (SImode); }")
+
+;; This handles the branches.
+(define_expand "udivmodsi4_tests"
+  [(set (match_operand:SI 0 "" "") (const_int 0))
+   (set (match_operand:SI 3 "" "") (match_operand:SI 1 "" ""))
+   (set (match_dup 5) (compare:CCUNS (match_dup 1) (match_operand:SI 2 "" "")))
+   (set (pc) (if_then_else (ltu (match_dup 5) (const_int 0))
+			   (label_ref (match_operand:SI 4 "" "")) (pc)))
+   (set (match_dup 0) (const_int 1))
+   (set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 6) (compare:CC (match_dup 2) (const_int 0)))
+   (set (pc) (if_then_else (lt (match_dup 6) (const_int 0))
+			   (label_ref (match_dup 4)) (pc)))]
+  "TARGET_POWER"
+  "
+{ operands[5] = gen_reg_rtx (CCUNSmode);
+  operands[6] = gen_reg_rtx (CCmode);
+}")
+
+(define_expand "udivmodsi4"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (udiv:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "reg_or_cint_operand" "")))
+	      (set (match_operand:SI 3 "gpc_reg_operand" "")
+		   (umod:SI (match_dup 1) (match_dup 2)))])]
+  ""
+  "
+{
+  rtx label = 0;
+
+  if (! TARGET_POWER)
+    {
+      if (! TARGET_POWERPC)
+        {
+	  emit_move_insn (gen_rtx_REG (SImode, 3), operands[1]);
+	  emit_move_insn (gen_rtx_REG (SImode, 4), operands[2]);
+	  emit_insn (gen_divus_call ());
+	  emit_move_insn (operands[0], gen_rtx_REG (SImode, 3));
+	  emit_move_insn (operands[3], gen_rtx_REG (SImode, 4));
+	  DONE;
+        }
+      else
+        FAIL;
+    }
+
+  if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) < 0)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      label = gen_label_rtx ();
+      emit (gen_udivmodsi4_tests (operands[0], operands[1], operands[2],
+				  operands[3], label));
+    }
+  else
+    operands[2] = force_reg (SImode, operands[2]);
+
+  emit (gen_udivmodsi4_normal (operands[0], operands[1], operands[2],
+			       operands[3]));
+  if (label)
+    emit_label (label);
+
+  DONE;
+}")
+
+;; AIX architecture-independent common-mode multiply (DImode),
+;; divide/modulus, and quotient subroutine calls.  Input operands in R3 and
+;; R4; results in R3 and sometimes R4; link register always clobbered by bla
+;; instruction; R0 sometimes clobbered; also, MQ sometimes clobbered but
+;; assumed unused if generating common-mode, so ignore.
+(define_insn "mulh_call"
+  [(set (reg:SI 3)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (reg:SI 3))
+			       (sign_extend:DI (reg:SI 4)))
+		      (const_int 32))))
+   (clobber (reg:SI LR_REGNO))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "bla __mulh"
+  [(set_attr "type" "imul")])
+
+(define_insn "mull_call"
+  [(set (reg:DI 3)
+	(mult:DI (sign_extend:DI (reg:SI 3))
+		 (sign_extend:DI (reg:SI 4))))
+   (clobber (reg:SI LR_REGNO))
+   (clobber (reg:SI 0))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "bla __mull"
+  [(set_attr "type" "imul")])
+
+(define_insn "divss_call"
+  [(set (reg:SI 3)
+	(div:SI (reg:SI 3) (reg:SI 4)))
+   (set (reg:SI 4)
+	(mod:SI (reg:SI 3) (reg:SI 4)))
+   (clobber (reg:SI LR_REGNO))
+   (clobber (reg:SI 0))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "bla __divss"
+  [(set_attr "type" "idiv")])
+
+(define_insn "divus_call"
+  [(set (reg:SI 3)
+	(udiv:SI (reg:SI 3) (reg:SI 4)))
+   (set (reg:SI 4)
+	(umod:SI (reg:SI 3) (reg:SI 4)))
+   (clobber (reg:SI LR_REGNO))
+   (clobber (reg:SI 0))
+   (clobber (match_scratch:CC 0 "=x"))
+   (clobber (reg:CC CR1_REGNO))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "bla __divus"
+  [(set_attr "type" "idiv")])
+
+(define_insn "quoss_call"
+  [(set (reg:SI 3)
+	(div:SI (reg:SI 3) (reg:SI 4)))
+   (clobber (reg:SI LR_REGNO))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "bla __quoss"
+  [(set_attr "type" "idiv")])
+
+(define_insn "quous_call"
+  [(set (reg:SI 3)
+	(udiv:SI (reg:SI 3) (reg:SI 4)))
+   (clobber (reg:SI LR_REGNO))
+   (clobber (reg:SI 0))
+   (clobber (match_scratch:CC 0 "=x"))
+   (clobber (reg:CC CR1_REGNO))]
+  "! TARGET_POWER && ! TARGET_POWERPC"
+  "bla __quous"
+  [(set_attr "type" "idiv")])
+
+;; Logical instructions
+;; The logical instructions are mostly combined by using match_operator,
+;; but the plain AND insns are somewhat different because there is no
+;; plain 'andi' (only 'andi.'), no plain 'andis', and there are all
+;; those rotate-and-mask operations.  Thus, the AND insns come first.
+
+(define_expand "andsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	  (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		  (match_operand:SI 2 "and_operand" "")))
+     (clobber (match_scratch:CC 3 ""))])]
+  ""
+  "")
+
+(define_insn "andsi3_mc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r")
+		(match_operand:SI 2 "and_operand" "?r,T,K,L")))
+   (clobber (match_scratch:CC 3 "=X,X,x,x"))]
+  "rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   {rlinm|rlwinm} %0,%1,0,%m2,%M2
+   {andil.|andi.} %0,%1,%b2
+   {andiu.|andis.} %0,%1,%u2"
+  [(set_attr "type" "*,*,fast_compare,fast_compare")])
+
+(define_insn "andsi3_nomc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+		(match_operand:SI 2 "and_operand" "?r,T")))
+   (clobber (match_scratch:CC 3 "=X,X"))]
+  "!rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   {rlinm|rlwinm} %0,%1,0,%m2,%M2")
+
+(define_insn "andsi3_internal0_nomc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+        (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r")
+                (match_operand:SI 2 "and_operand" "?r,T")))]
+  "!rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   {rlinm|rlwinm} %0,%1,0,%m2,%M2")
+
+
+;; Note to set cr's other than cr0 we do the and immediate and then
+;; the test again -- this avoids a mfcr which on the higher end
+;; machines causes an execution serialization
+
+(define_insn "*andsi3_internal2_mc"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r,r,r,r,r"))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_32BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %3,%1,%2
+   {andil.|andi.} %3,%1,%b2
+   {andiu.|andis.} %3,%1,%u2
+   {rlinm.|rlwinm.} %3,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,fast_compare,fast_compare,delayed_compare,\
+		     compare,compare,compare,compare")
+   (set_attr "length" "4,4,4,4,8,8,8,8")])
+
+(define_insn "*andsi3_internal3_mc"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r,r,r,r,r"))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   #
+   {andil.|andi.} %3,%1,%b2
+   {andiu.|andis.} %3,%1,%u2
+   {rlinm.|rlwinm.} %3,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare,fast_compare,fast_compare,delayed_compare,compare,\
+		     compare,compare,compare")
+   (set_attr "length" "8,4,4,4,8,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+			     (match_operand:GPR 2 "and_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:GPR 3 ""))
+   (clobber (match_scratch:CC 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (and:<MODE> (match_dup 1)
+			       (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+;; We don't have a 32 bit "and. rt,ra,rb" for ppc64.  cr is set from the
+;; whole 64 bit reg, and we don't know what is in the high 32 bits.
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_operand" "")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (and:SI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*andsi3_internal4"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r,r,r")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_32BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %0,%1,%2
+   {andil.|andi.} %0,%1,%b2
+   {andiu.|andis.} %0,%1,%u2
+   {rlinm.|rlwinm.} %0,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,fast_compare,fast_compare,delayed_compare,\
+		     compare,compare,compare,compare")
+   (set_attr "length" "4,4,4,4,8,8,8,8")])
+
+(define_insn "*andsi3_internal5_mc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,x,?y,??y,??y,?y")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r")
+			    (match_operand:SI 2 "and_operand" "r,K,L,T,r,K,L,T"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r,r,r")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   #
+   {andil.|andi.} %0,%1,%b2
+   {andiu.|andis.} %0,%1,%u2
+   {rlinm.|rlwinm.} %0,%1,0,%m2,%M2
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare,fast_compare,fast_compare,delayed_compare,compare,\
+		     compare,compare,compare")
+   (set_attr "length" "8,4,4,4,8,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "and_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:SI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_operand" "")
+	(compare:CC (and:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:SI (match_dup 1)
+			   (match_dup 2)))
+	      (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Handle the PowerPC64 rlwinm corner case
+
+(define_insn_and_split "*andsi3_internal6"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(and:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		(match_operand:SI 2 "mask_operand_wrap" "i")))]
+  "TARGET_POWERPC64"
+  "#"
+  "TARGET_POWERPC64"
+  [(set (match_dup 0)
+	(and:SI (rotate:SI (match_dup 1) (match_dup 3))
+		(match_dup 4)))
+   (set (match_dup 0)
+	(rotate:SI (match_dup 0) (match_dup 5)))]
+  "
+{
+  int mb = extract_MB (operands[2]);
+  int me = extract_ME (operands[2]);
+  operands[3] = GEN_INT (me + 1);
+  operands[5] = GEN_INT (32 - (me + 1));
+  operands[4] = GEN_INT (~((HOST_WIDE_INT) -1 << (33 + me - mb)));
+}"
+  [(set_attr "length" "8")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_logical_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ! logical_operand (operands[2], SImode))
+    {
+      HOST_WIDE_INT value = INTVAL (operands[2]);
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (SImode));
+
+      emit_insn (gen_iorsi3 (tmp, operands[1],
+			     GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_iorsi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+}")
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(xor:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_logical_cint_operand" "")))]
+  ""
+  "
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && ! logical_operand (operands[2], SImode))
+    {
+      HOST_WIDE_INT value = INTVAL (operands[2]);
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (SImode));
+
+      emit_insn (gen_xorsi3 (tmp, operands[1],
+			     GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+      emit_insn (gen_xorsi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+}")
+
+(define_insn "*boolsi3_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+	(match_operator:SI 3 "boolean_or_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "%r,r,r")
+	  (match_operand:SI 2 "logical_operand" "r,K,L")]))]
+  ""
+  "@
+   %q3 %0,%1,%2
+   {%q3il|%q3i} %0,%1,%b2
+   {%q3iu|%q3is} %0,%1,%u2")
+
+(define_insn "*boolsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_or_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "")
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_32BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "")
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Split a logical operation that we can't do in one insn into two insns,
+;; each of which does one 16-bit part.  This is used by combine.
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_operator:SI 3 "boolean_or_operator"
+	 [(match_operand:SI 1 "gpc_reg_operand" "")
+	  (match_operand:SI 2 "non_logical_cint_operand" "")]))]
+  ""
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 0) (match_dup 5))]
+"
+{
+  rtx i;
+  i = GEN_INT (INTVAL (operands[2]) & (~ (HOST_WIDE_INT) 0xffff));
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[1], i);
+  i = GEN_INT (INTVAL (operands[2]) & 0xffff);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[0], i);
+}")
+
+(define_insn "*boolcsi3_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 3 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r")]))]
+  ""
+  "%q3 %0,%2,%1")
+
+(define_insn "*boolcsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   %q4. %3,%2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolcsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_32BIT"
+  "@
+   %q4. %0,%2,%1
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (match_operand:SI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccsi3_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 3 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r"))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" "r"))]))]
+  ""
+  "%q3 %0,%1,%2")
+
+(define_insn "*boolccsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "r,r"))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_32BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r"))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_32BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:SI 4 "boolean_operator"
+	 [(not:SI (match_operand:SI 1 "gpc_reg_operand" ""))
+	  (not:SI (match_operand:SI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; maskir insn.  We need four forms because things might be in arbitrary
+;; orders.  Don't define forms that only set CR fields because these
+;; would modify an input register.
+
+(define_insn "*maskir_internal1"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ior:SI (and:SI (not:SI (match_operand:SI 2 "gpc_reg_operand" "r"))
+			(match_operand:SI 1 "gpc_reg_operand" "0"))
+		(and:SI (match_dup 2)
+			(match_operand:SI 3 "gpc_reg_operand" "r"))))]
+  "TARGET_POWER"
+  "maskir %0,%3,%2")
+
+(define_insn "*maskir_internal2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ior:SI (and:SI (not:SI (match_operand:SI 2 "gpc_reg_operand" "r"))
+			(match_operand:SI 1 "gpc_reg_operand" "0"))
+		(and:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+			(match_dup 2))))]
+  "TARGET_POWER"
+  "maskir %0,%3,%2")
+
+(define_insn "*maskir_internal3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 2 "gpc_reg_operand" "r")
+			(match_operand:SI 3 "gpc_reg_operand" "r"))
+		(and:SI (not:SI (match_dup 2))
+			(match_operand:SI 1 "gpc_reg_operand" "0"))))]
+  "TARGET_POWER"
+  "maskir %0,%3,%2")
+
+(define_insn "*maskir_internal4"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+			(match_operand:SI 2 "gpc_reg_operand" "r"))
+		(and:SI (not:SI (match_dup 2))
+			(match_operand:SI 1 "gpc_reg_operand" "0"))))]
+  "TARGET_POWER"
+  "maskir %0,%3,%2")
+
+(define_insn "*maskir_internal5"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ior:SI (and:SI (not:SI (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+			 (match_operand:SI 1 "gpc_reg_operand" "0,0"))
+		 (and:SI (match_dup 2)
+			 (match_operand:SI 3 "gpc_reg_operand" "r,r")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ior:SI (and:SI (not:SI (match_dup 2)) (match_dup 1))
+		(and:SI (match_dup 2) (match_dup 3))))]
+  "TARGET_POWER"
+  "@
+   maskir. %0,%3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (ior:SI (and:SI (not:SI (match_operand:SI 2 "gpc_reg_operand" ""))
+			 (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (and:SI (match_dup 2)
+			 (match_operand:SI 3 "gpc_reg_operand" "")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ior:SI (and:SI (not:SI (match_dup 2)) (match_dup 1))
+		(and:SI (match_dup 2) (match_dup 3))))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(ior:SI (and:SI (not:SI (match_dup 2)) (match_dup 1))
+		(and:SI (match_dup 2) (match_dup 3))))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*maskir_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ior:SI (and:SI (not:SI (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+			 (match_operand:SI 1 "gpc_reg_operand" "0,0"))
+		 (and:SI (match_operand:SI 3 "gpc_reg_operand" "r,r")
+			 (match_dup 2)))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ior:SI (and:SI (not:SI (match_dup 2)) (match_dup 1))
+		(and:SI (match_dup 3) (match_dup 2))))]
+  "TARGET_POWER"
+  "@
+   maskir. %0,%3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (ior:SI (and:SI (not:SI (match_operand:SI 2 "gpc_reg_operand" ""))
+			 (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (and:SI (match_operand:SI 3 "gpc_reg_operand" "")
+			 (match_dup 2)))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ior:SI (and:SI (not:SI (match_dup 2)) (match_dup 1))
+		(and:SI (match_dup 3) (match_dup 2))))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(ior:SI (and:SI (not:SI (match_dup 2)) (match_dup 1))
+		(and:SI (match_dup 3) (match_dup 2))))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*maskir_internal7"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ior:SI (and:SI (match_operand:SI 2 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+		 (and:SI (not:SI (match_dup 2))
+			 (match_operand:SI 1 "gpc_reg_operand" "0,0")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ior:SI (and:SI (match_dup 2) (match_dup 3))
+		(and:SI (not:SI (match_dup 2)) (match_dup 1))))]
+  "TARGET_POWER"
+  "@
+   maskir. %0,%3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (ior:SI (and:SI (match_operand:SI 2 "gpc_reg_operand" "")
+			 (match_operand:SI 3 "gpc_reg_operand" ""))
+		 (and:SI (not:SI (match_dup 2))
+			 (match_operand:SI 1 "gpc_reg_operand" "")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ior:SI (and:SI (match_dup 2) (match_dup 3))
+		(and:SI (not:SI (match_dup 2)) (match_dup 1))))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(ior:SI (and:SI (match_dup 2) (match_dup 3))
+		(and:SI (not:SI (match_dup 2)) (match_dup 1))))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*maskir_internal8"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+		 (and:SI (not:SI (match_dup 2))
+			 (match_operand:SI 1 "gpc_reg_operand" "0,0")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ior:SI (and:SI (match_dup 3) (match_dup 2))
+		(and:SI (not:SI (match_dup 2)) (match_dup 1))))]
+  "TARGET_POWER"
+  "@
+   maskir. %0,%3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "gpc_reg_operand" ""))
+		 (and:SI (not:SI (match_dup 2))
+			 (match_operand:SI 1 "gpc_reg_operand" "")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ior:SI (and:SI (match_dup 3) (match_dup 2))
+		(and:SI (not:SI (match_dup 2)) (match_dup 1))))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(ior:SI (and:SI (match_dup 3) (match_dup 2))
+		(and:SI (not:SI (match_dup 2)) (match_dup 1))))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Rotate and shift insns, in all their variants.  These support shifts,
+;; field inserts and extracts, and various combinations thereof.
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "gpc_reg_operand" "")
+		       (match_operand:SI 1 "const_int_operand" "")
+		       (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand 3 "gpc_reg_operand" ""))]
+  ""
+  "
+{
+  /* Do not handle 16/8 bit structures that fit in HI/QI modes directly, since
+     the (SUBREG:SI (REG:HI xxx)) that is otherwise generated can confuse the
+     compiler if the address of the structure is taken later.  Likewise, do
+     not handle invalid E500 subregs.  */
+  if (GET_CODE (operands[0]) == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) < UNITS_PER_WORD
+	  || ((TARGET_E500_DOUBLE || TARGET_SPE)
+	      && invalid_e500_subreg (operands[0], GET_MODE (operands[0])))))
+    FAIL;
+
+  if (TARGET_POWERPC64 && GET_MODE (operands[0]) == DImode)
+    emit_insn (gen_insvdi (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_insvsi (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(match_operand:SI 3 "gpc_reg_operand" "r"))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (32 - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"{rlimi|rlwimi} %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal1"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(rotate:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+		   (match_operand:SI 4 "const_int_operand" "i")))]
+  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 31;
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (shift - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"{rlimi|rlwimi} %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal2"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(ashiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 31;
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (32 - shift - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"{rlimi|rlwimi} %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal3"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "(32 - (INTVAL (operands[4]) & 31)) >= INTVAL (operands[1])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 31;
+  int start = INTVAL (operands[2]) & 31;
+  int size = INTVAL (operands[1]) & 31;
+
+  operands[4] = GEN_INT (32 - shift - start - size);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"{rlimi|rlwimi} %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal4"
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(zero_extract:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+			 (match_operand:SI 4 "const_int_operand" "i")
+			 (match_operand:SI 5 "const_int_operand" "i")))]
+  "INTVAL (operands[4]) >= INTVAL (operands[1])"
+  "*
+{
+  int extract_start = INTVAL (operands[5]) & 31;
+  int extract_size = INTVAL (operands[4]) & 31;
+  int insert_start = INTVAL (operands[2]) & 31;
+  int insert_size = INTVAL (operands[1]) & 31;
+
+/* Align extract field with insert field */
+  operands[5] = GEN_INT (extract_start + extract_size - insert_start - insert_size);
+  operands[1] = GEN_INT (insert_start + insert_size - 1);
+  return \"{rlimi|rlwimi} %0,%3,%h5,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+;; combine patterns for rlwimi
+(define_insn "*insvsi_internal5"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (ior:SI (and:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                        (match_operand:SI 1 "mask_operand" "i"))
+                (and:SI (lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+                                     (match_operand:SI 2 "const_int_operand" "i"))
+                        (match_operand:SI 5 "mask_operand" "i"))))]
+  "TARGET_POWERPC && INTVAL(operands[1]) == ~INTVAL(operands[5])"
+  "*
+{
+ int me = extract_ME(operands[5]);
+ int mb = extract_MB(operands[5]);
+ operands[4] = GEN_INT(32 - INTVAL(operands[2]));
+ operands[2] = GEN_INT(mb);
+ operands[1] = GEN_INT(me);
+ return \"{rlimi|rlwimi} %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "*insvsi_internal6"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 3 "gpc_reg_operand" "r")
+                                     (match_operand:SI 2 "const_int_operand" "i"))
+                        (match_operand:SI 5 "mask_operand" "i"))
+                (and:SI (match_operand:SI 4 "gpc_reg_operand" "0")
+                        (match_operand:SI 1 "mask_operand" "i"))))]
+  "TARGET_POWERPC && INTVAL(operands[1]) == ~INTVAL(operands[5])"
+  "*
+{
+ int me = extract_ME(operands[5]);
+ int mb = extract_MB(operands[5]);
+ operands[4] = GEN_INT(32 - INTVAL(operands[2]));
+ operands[2] = GEN_INT(mb);
+ operands[1] = GEN_INT(me);
+ return \"{rlimi|rlwimi} %0,%3,%h4,%h2,%h1\";
+}"
+  [(set_attr "type" "insert_word")])
+
+(define_insn "insvdi"
+  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(match_operand:DI 3 "gpc_reg_operand" "r"))]
+  "TARGET_POWERPC64"
+  "*
+{
+  int start = INTVAL (operands[2]) & 63;
+  int size = INTVAL (operands[1]) & 63;
+
+  operands[1] = GEN_INT (64 - start - size);
+  return \"rldimi %0,%3,%H1,%H2\";
+}"
+  [(set_attr "type" "insert_dword")])
+
+(define_insn "*insvdi_internal2"
+  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(ashiftrt:DI (match_operand:DI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "TARGET_POWERPC64
+   && insvdi_rshift_rlwimi_p (operands[1], operands[2], operands[4])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 63;
+  int start = (INTVAL (operands[2]) & 63) - 32;
+  int size = INTVAL (operands[1]) & 63;
+
+  operands[4] = GEN_INT (64 - shift - start - size);
+  operands[2] = GEN_INT (start);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}")
+
+(define_insn "*insvdi_internal3"
+  [(set (zero_extract:DI (match_operand:DI 0 "gpc_reg_operand" "+r")
+			 (match_operand:SI 1 "const_int_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(lshiftrt:DI (match_operand:DI 3 "gpc_reg_operand" "r")
+		     (match_operand:SI 4 "const_int_operand" "i")))]
+  "TARGET_POWERPC64
+   && insvdi_rshift_rlwimi_p (operands[1], operands[2], operands[4])"
+  "*
+{
+  int shift = INTVAL (operands[4]) & 63;
+  int start = (INTVAL (operands[2]) & 63) - 32;
+  int size = INTVAL (operands[1]) & 63;
+
+  operands[4] = GEN_INT (64 - shift - start - size);
+  operands[2] = GEN_INT (start);
+  operands[1] = GEN_INT (start + size - 1);
+  return \"rlwimi %0,%3,%h4,%h2,%h1\";
+}")
+
+(define_expand "extzv"
+  [(set (match_operand 0 "gpc_reg_operand" "")
+	(zero_extract (match_operand 1 "gpc_reg_operand" "")
+		       (match_operand:SI 2 "const_int_operand" "")
+		       (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+  "
+{
+  /* Do not handle 16/8 bit structures that fit in HI/QI modes directly, since
+     the (SUBREG:SI (REG:HI xxx)) that is otherwise generated can confuse the
+     compiler if the address of the structure is taken later.  */
+  if (GET_CODE (operands[0]) == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) < UNITS_PER_WORD))
+    FAIL;
+
+  if (TARGET_POWERPC64 && GET_MODE (operands[1]) == DImode)
+    emit_insn (gen_extzvdi (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_insn (gen_extzvsi (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+
+(define_insn "extzvsi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[3]) & 31;
+  int size = INTVAL (operands[2]) & 31;
+
+  if (start + size >= 32)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  return \"{rlinm|rlwinm} %0,%1,%3,%s2,31\";
+}")
+
+(define_insn "*extzvsi_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "const_int_operand" "i,i")
+			 (match_operand:SI 3 "const_int_operand" "i,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r"))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[3]) & 31;
+  int size = INTVAL (operands[2]) & 31;
+
+  /* Force split for non-cc0 compare.  */
+  if (which_alternative == 1)
+     return \"#\";
+
+  /* If the bit-field being tested fits in the upper or lower half of a
+     word, it is possible to use andiu. or andil. to test it.  This is
+     useful because the condition register set-use delay is smaller for
+     andi[ul]. than for rlinm.  This doesn't work when the starting bit
+     position is 0 because the LT and GT bits may be set wrong.  */
+
+  if ((start > 0 && start + size <= 16) || start >= 16)
+    {
+      operands[3] = GEN_INT (((1 << (16 - (start & 15)))
+			      - (1 << (16 - (start & 15) - size))));
+      if (start < 16)
+	return \"{andiu.|andis.} %4,%1,%3\";
+      else
+	return \"{andil.|andi.} %4,%1,%3\";
+    }
+
+  if (start + size >= 32)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  return \"{rlinm.|rlwinm.} %4,%1,%3,%s2,31\";
+}"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "reload_completed"
+  [(set (match_dup 4)
+	(zero_extract:SI (match_dup 1) (match_dup 2)
+			 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*extzvsi_internal2"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "const_int_operand" "i,i")
+			 (match_operand:SI 3 "const_int_operand" "i,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extract:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  ""
+  "*
+{
+  int start = INTVAL (operands[3]) & 31;
+  int size = INTVAL (operands[2]) & 31;
+
+  /* Force split for non-cc0 compare.  */
+  if (which_alternative == 1)
+     return \"#\";
+
+  /* Since we are using the output value, we can't ignore any need for
+     a shift.  The bit-field must end at the LSB.  */
+  if (start >= 16 && start + size == 32)
+    {
+      operands[3] = GEN_INT ((1 << size) - 1);
+      return \"{andil.|andi.} %0,%1,%3\";
+    }
+
+  if (start + size >= 32)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  return \"{rlinm.|rlwinm.} %0,%1,%3,%s2,31\";
+}"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extract:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extract:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extract:SI (match_dup 1) (match_dup 2) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "extzvdi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  "TARGET_POWERPC64"
+  "*
+{
+  int start = INTVAL (operands[3]) & 63;
+  int size = INTVAL (operands[2]) & 63;
+
+  if (start + size >= 64)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  operands[2] = GEN_INT (64 - size);
+  return \"rldicl %0,%1,%3,%2\";
+}")
+
+(define_insn "*extzvdi_internal1"
+  [(set (match_operand:CC 0 "gpc_reg_operand" "=x")
+	(compare:CC (zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "*
+{
+  int start = INTVAL (operands[3]) & 63;
+  int size = INTVAL (operands[2]) & 63;
+
+  if (start + size >= 64)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  operands[2] = GEN_INT (64 - size);
+  return \"rldicl. %4,%1,%3,%2\";
+}"
+  [(set_attr "type" "compare")])
+
+(define_insn "*extzvdi_internal2"
+  [(set (match_operand:CC 4 "gpc_reg_operand" "=x")
+	(compare:CC (zero_extract:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			 (match_operand:SI 2 "const_int_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(zero_extract:DI (match_dup 1) (match_dup 2) (match_dup 3)))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "*
+{
+  int start = INTVAL (operands[3]) & 63;
+  int size = INTVAL (operands[2]) & 63;
+
+  if (start + size >= 64)
+    operands[3] = const0_rtx;
+  else
+    operands[3] = GEN_INT (start + size);
+  operands[2] = GEN_INT (64 - size);
+  return \"rldicl. %0,%1,%3,%2\";
+}"
+  [(set_attr "type" "compare")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  ""
+  "@
+   {rlnm|rlwnm} %0,%1,%2,0xffffffff
+   {rlinm|rlwinm} %0,%1,%h2,0xffffffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(zero_extend:DI
+	    (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		       (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_64BIT"
+  "@
+   {rlnm|rlwnm} %0,%1,%2,0xffffffff
+   {rlinm|rlwinm} %0,%1,%h2,0xffffffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %3,%1,%2,0xffffffff
+   {rlinm.|rlwinm.} %3,%1,%h2,0xffffffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(rotate:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(rotate:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %0,%1,%2,0xffffffff
+   {rlinm.|rlwinm.} %0,%1,%h2,0xffffffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(rotate:SI (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(rotate:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal4"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			   (match_operand:SI 2 "reg_or_cint_operand" "r,i"))
+		(match_operand:SI 3 "mask_operand" "n,n")))]
+  ""
+  "@
+   {rlnm|rlwnm} %0,%1,%2,%m3,%M3
+   {rlinm|rlwinm} %0,%1,%h2,%m3,%M3"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotlsi3_internal5"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:SI 3 "mask_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r,r,r"))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %4,%1,%2,%m3,%M3
+   {rlinm.|rlwinm.} %4,%1,%h2,%m3,%M3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				(match_operand:SI 2 "reg_or_cint_operand" ""))
+		     (match_operand:SI 3 "mask_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "reload_completed"
+  [(set (match_dup 4)
+	(and:SI (rotate:SI (match_dup 1)
+				(match_dup 2))
+		     (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:SI 3 "mask_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %0,%1,%2,%m3,%M3
+   {rlinm.|rlwinm.} %0,%1,%h2,%m3,%M3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:SI
+		     (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				(match_operand:SI 2 "reg_or_cint_operand" ""))
+		     (match_operand:SI 3 "mask_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(and:SI (rotate:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal7"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "ri")) 0)))]
+  ""
+  "{rl%I2nm|rlw%I2nm} %0,%1,%h2,0xff"
+  [(set (attr "cell_micro")
+     (if_then_else (match_operand:SI 2 "const_int_operand" "")
+	(const_string "not")
+	(const_string "always")))])
+
+(define_insn "*rotlsi3_internal8"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %3,%1,%2,0xff
+   {rlinm.|rlwinm.} %3,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal9"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %0,%1,%2,0xff
+   {rlinm.|rlwinm.} %0,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal10"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  ""
+  "@
+   {rlnm|rlwnm} %0,%1,%2,0xffff
+   {rlinm|rlwinm} %0,%1,%h2,0xffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+
+(define_insn "*rotlsi3_internal11"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %3,%1,%2,0xffff
+   {rlinm.|rlwinm.} %3,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal12"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  ""
+  "@
+   {rlnm.|rlwnm.} %0,%1,%2,0xffff
+   {rlinm.|rlwinm.} %0,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Note that we use "sle." instead of "sl." so that we can set
+;; SHIFT_COUNT_TRUNCATED.
+
+(define_expand "ashlsi3"
+  [(use (match_operand:SI 0 "gpc_reg_operand" ""))
+   (use (match_operand:SI 1 "gpc_reg_operand" ""))
+   (use (match_operand:SI 2 "reg_or_cint_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_POWER)
+    emit_insn (gen_ashlsi3_power (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ashlsi3_no_power (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "ashlsi3_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))
+   (clobber (match_scratch:SI 3 "=q,X"))]
+  "TARGET_POWER"
+  "@
+   sle %0,%1,%2
+   {sli|slwi} %0,%1,%h2")
+
+(define_insn "ashlsi3_no_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "! TARGET_POWER"
+  "@
+   {sl|slw} %0,%1,%2
+   {sli|slwi} %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashlsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(zero_extend:DI
+	    (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		       (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_POWERPC64"
+  "@
+   {sl|slw} %0,%1,%2
+   {sli|slwi} %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))
+   (clobber (match_scratch:SI 4 "=q,X,q,X"))]
+  "TARGET_POWER"
+  "@
+   sle. %3,%1,%2
+   {sli.|slwi.} %3,%1,%h2
+   #
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 3)
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "! TARGET_POWER && TARGET_32BIT"
+  "@
+   {sl.|slw.} %3,%1,%2
+   {sli.|slwi.} %3,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "! TARGET_POWER && TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 "=q,X,q,X"))]
+  "TARGET_POWER"
+  "@
+   sle. %0,%1,%2
+   {sli.|slwi.} %0,%1,%h2
+   #
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashift:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER && TARGET_32BIT"
+  "@
+   {sl.|slw.} %0,%1,%2
+   {sli.|slwi.} %0,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashift:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER && TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "rlwinm"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:SI 3 "mask_operand" "n")))]
+  "includes_lshift_p (operands[2], operands[3])"
+  "{rlinm|rlwinm} %0,%1,%h2,%m3,%M3")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r"))]
+  "includes_lshift_p (operands[2], operands[3])"
+  "@
+   {rlinm.|rlwinm.} %4,%1,%h2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "includes_lshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_lshift_p (operands[2], operands[3])"
+  "@
+   {rlinm.|rlwinm.} %0,%1,%h2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (ashift:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_lshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; The AIX assembler mis-handles "sri x,x,0", so write that case as
+;; "sli x,x,0".
+(define_expand "lshrsi3"
+  [(use (match_operand:SI 0 "gpc_reg_operand" ""))
+   (use (match_operand:SI 1 "gpc_reg_operand" ""))
+   (use (match_operand:SI 2 "reg_or_cint_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_POWER)
+    emit_insn (gen_lshrsi3_power (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_lshrsi3_no_power (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "lshrsi3_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,O,i")))
+   (clobber (match_scratch:SI 3 "=q,X,X"))]
+  "TARGET_POWER"
+  "@
+  sre %0,%1,%2
+  mr %0,%1
+  {s%A2i|s%A2wi} %0,%1,%h2")
+
+(define_insn "lshrsi3_no_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "O,r,i")))]
+  "! TARGET_POWER"
+  "@
+  mr %0,%1
+  {sr|srw} %0,%1,%2
+  {sri|srwi} %0,%1,%h2"
+  [(set_attr "type" "integer,var_shift_rotate,shift")])
+
+(define_insn "*lshrsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(zero_extend:DI
+	    (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_POWERPC64"
+  "@
+  {sr|srw} %0,%1,%2
+  {sri|srwi} %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,?y,?y,?y")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,O,i,r,O,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,X,r,r,X,r"))
+   (clobber (match_scratch:SI 4 "=q,X,X,q,X,X"))]
+  "TARGET_POWER"
+  "@
+  sre. %3,%1,%2
+  mr. %1,%1
+  {s%A2i.|s%A2wi.} %3,%1,%h2
+  #
+  #
+  #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,4,4,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 3)
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,?y,?y,?y")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "O,r,i,O,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=X,r,r,X,r,r"))]
+  "! TARGET_POWER && TARGET_32BIT"
+  "@
+   mr. %1,%1
+   {sr.|srw.} %3,%1,%2
+   {sri.|srwi.} %3,%1,%h2
+   #
+   #
+   #"
+  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,4,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "! TARGET_POWER && TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,?y,?y,?y")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,O,i,r,O,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 "=q,X,X,q,X,X"))]
+  "TARGET_POWER"
+  "@
+  sre. %0,%1,%2
+  mr. %0,%1
+  {s%A2i.|s%A2wi.} %0,%1,%h2
+  #
+  #
+  #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,4,4,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,?y,?y,?y")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "O,r,i,O,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER && TARGET_32BIT"
+  "@
+   mr. %0,%1
+   {sr.|srw.} %0,%1,%2
+   {sri.|srwi.} %0,%1,%h2
+   #
+   #
+   #"
+  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,4,8,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER && TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:SI 3 "mask_operand" "n")))]
+  "includes_rshift_p (operands[2], operands[3])"
+  "{rlinm|rlwinm} %0,%1,%s2,%m3,%M3")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r,r"))]
+  "includes_rshift_p (operands[2], operands[3])"
+  "@
+   {rlinm.|rlwinm.} %4,%1,%s2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "includes_rshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			      (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:SI 3 "mask_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_rshift_p (operands[2], operands[3])"
+  "@
+   {rlinm.|rlwinm.} %0,%1,%s2,%m3,%M3
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:SI (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			      (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:SI 3 "mask_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "includes_rshift_p (operands[2], operands[3]) && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 0)))]
+  "includes_rshift_p (operands[2], GEN_INT (255))"
+  "{rlinm|rlwinm} %0,%1,%s2,0xff")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   {rlinm.|rlwinm.} %3,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   {rlinm.|rlwinm.} %0,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 0)))]
+  "includes_rshift_p (operands[2], GEN_INT (65535))"
+  "{rlinm|rlwinm} %0,%1,%s2,0xffff")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   {rlinm.|rlwinm.} %3,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   {rlinm.|rlwinm.} %0,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 0))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
+  "includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "gpc_reg_operand" "r"))
+	(ashiftrt:SI (match_operand:SI 2 "gpc_reg_operand" "r")
+		     (const_int 31)))]
+  "TARGET_POWER"
+  "rrib %0,%1,%2")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "gpc_reg_operand" "r"))
+	(lshiftrt:SI (match_operand:SI 2 "gpc_reg_operand" "r")
+		     (const_int 31)))]
+  "TARGET_POWER"
+  "rrib %0,%1,%2")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "gpc_reg_operand" "+r")
+			 (const_int 1)
+			 (match_operand:SI 1 "gpc_reg_operand" "r"))
+	(zero_extract:SI (match_operand:SI 2 "gpc_reg_operand" "r")
+			 (const_int 1)
+			 (const_int 0)))]
+  "TARGET_POWER"
+  "rrib %0,%1,%2")
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		     (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_POWER)
+    emit_insn (gen_ashrsi3_power (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_ashrsi3_no_power (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "ashrsi3_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))
+   (clobber (match_scratch:SI 3 "=q,X"))]
+  "TARGET_POWER"
+  "@
+   srea %0,%1,%2
+   {srai|srawi} %0,%1,%h2"
+  [(set_attr "type" "shift")])
+
+(define_insn "ashrsi3_no_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "! TARGET_POWER"
+  "@
+   {sra|sraw} %0,%1,%2
+   {srai|srawi} %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashrsi3_64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+  	(sign_extend:DI
+	    (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_cint_operand" "r,i"))))]
+  "TARGET_POWERPC64"
+  "@
+   {sra|sraw} %0,%1,%2
+   {srai|srawi} %0,%1,%h2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))
+   (clobber (match_scratch:SI 4 "=q,X,q,X"))]
+  "TARGET_POWER"
+  "@
+   srea. %3,%1,%2
+   {srai.|srawi.} %3,%1,%h2
+   #
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 3)
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "! TARGET_POWER"
+  "@
+   {sra.|sraw.} %3,%1,%2
+   {srai.|srawi.} %3,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "! TARGET_POWER && reload_completed"
+  [(set (match_dup 3)
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 "=q,X,q,X"))]
+  "TARGET_POWER"
+  "@
+   srea. %0,%1,%2
+   {srai.|srawi.} %0,%1,%h2
+   #
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER"
+  "@
+   {sra.|sraw.} %0,%1,%2
+   {srai.|srawi.} %0,%1,%h2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+;; Builtins to replace a division to generate FRE reciprocal estimate
+;; instructions and the necessary fixup instructions
+(define_expand "recip<mode>3"
+  [(match_operand:RECIPF 0 "gpc_reg_operand" "")
+   (match_operand:RECIPF 1 "gpc_reg_operand" "")
+   (match_operand:RECIPF 2 "gpc_reg_operand" "")]
+  "RS6000_RECIP_HAVE_RE_P (<MODE>mode)"
+{
+   rs6000_emit_swdiv (operands[0], operands[1], operands[2], false);
+   DONE;
+})
+
+;; Split to create division from FRE/FRES/etc. and fixup instead of the normal
+;; hardware division.  This is only done before register allocation and with
+;; -ffast-math.  This must appear before the divsf3/divdf3 insns.
+(define_split
+  [(set (match_operand:RECIPF 0 "gpc_reg_operand" "")
+	(div:RECIPF (match_operand 1 "gpc_reg_operand" "")
+		    (match_operand 2 "gpc_reg_operand" "")))]
+  "RS6000_RECIP_AUTO_RE_P (<MODE>mode)
+   && can_create_pseudo_p () && optimize_insn_for_speed_p ()
+   && flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math"
+  [(const_int 0)]
+{
+  rs6000_emit_swdiv (operands[0], operands[1], operands[2], true);
+  DONE;
+})
+
+;; Builtins to replace 1/sqrt(x) with instructions using RSQRTE and the
+;; appropriate fixup.
+(define_expand "rsqrt<mode>2"
+  [(match_operand:RECIPF 0 "gpc_reg_operand" "")
+   (match_operand:RECIPF 1 "gpc_reg_operand" "")]
+  "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
+{
+  rs6000_emit_swrsqrt (operands[0], operands[1]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "! TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Floating-point insns, excluding normal data motion.
+;;
+;; PowerPC has a full set of single-precision floating point instructions.
+;;
+;; For the POWER architecture, we pretend that we have both SFmode and
+;; DFmode insns, while, in fact, all fp insns are actually done in double.
+;; The only conversions we will do will be when storing to memory.  In that
+;; case, we will use the "frsp" instruction before storing.
+;;
+;; Note that when we store into a single-precision memory location, we need to
+;; use the frsp insn first.  If the register being stored isn't dead, we
+;; need a scratch register for the frsp.  But this is difficult when the store
+;; is done by reload.  It is not incorrect to do the frsp on the register in
+;; this case, we just lose precision that we would have otherwise gotten but
+;; is not guaranteed.  Perhaps this should be tightened up at some point.
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn_and_split "*extendsfdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "@
+   #
+   fmr %0,%1
+   lfs%U1%X1 %0,%1"
+  "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr "type" "fp,fp,fpload")])
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*truncdfsf2_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "frsp %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "aux_truncdfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRSP))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "frsp %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(neg:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+  "")
+
+(define_insn "*negsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fneg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+  "")
+
+(define_insn "*abssf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fnabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "addsf3"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "")
+		 (match_operand:SF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fadds %0,%1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_s")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "{fa|fadd} %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_expand "subsf3"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "")
+		  (match_operand:SF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		  (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fsubs %0,%1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_s")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		  (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "{fs|fsub} %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "")
+		 (match_operand:SF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fmuls %0,%1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_mul_s")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "{fm|fmul} %0,%1,%2"
+  [(set_attr "type" "dmul")])
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
+		(match_operand:SF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		(match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+  "fdivs %0,%1,%2"
+  [(set_attr "type" "sdiv")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		(match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+  "{fd|fdiv} %0,%1,%2"
+  [(set_attr "type" "ddiv")])
+
+(define_insn "fres"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+  "TARGET_FRES"
+  "fres %0,%1"
+  [(set_attr "type" "fp")])
+
+; builtin fmaf support
+(define_insn "*fmasf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		(match_operand:SF 2 "gpc_reg_operand" "f")
+		(match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+{
+  return (TARGET_POWERPC
+	  ? "fmadds %0,%1,%2,%3"
+	  : "{fma|fmadd} %0,%1,%2,%3");
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fmssf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		(match_operand:SF 2 "gpc_reg_operand" "f")
+		(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+{
+  return (TARGET_POWERPC
+	  ? "fmsubs %0,%1,%2,%3"
+	  : "{fms|fmsub} %0,%1,%2,%3");
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*nfmasf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+			(match_operand:SF 2 "gpc_reg_operand" "f")
+			(match_operand:SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+{
+  return (TARGET_POWERPC
+	  ? "fnmadds %0,%1,%2,%3"
+	  : "{fnma|fnmadd} %0,%1,%2,%3");
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*nfmssf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+			(match_operand:SF 2 "gpc_reg_operand" "f")
+			(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+{
+  return (TARGET_POWERPC
+	  ? "fnmsubs %0,%1,%2,%3"
+	  : "{fnms|fnmsub} %0,%1,%2,%3");
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
+  "(TARGET_PPC_GPOPT || TARGET_POWER2 || TARGET_XILINX_FPU)
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && !TARGET_SIMPLE_FPU"
+  "")
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
+  "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT
+   && TARGET_FPRS && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+  "fsqrts %0,%1"
+  [(set_attr "type" "ssqrt")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
+  "TARGET_POWER2 && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+  "fsqrt %0,%1"
+  [(set_attr "type" "dsqrt")])
+
+(define_insn "*rsqrtsf_internal1"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
+		   UNSPEC_RSQRT))]
+  "TARGET_FRSQRTES"
+  "frsqrtes %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "copysign<mode>3"
+  [(set (match_dup 3)
+        (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
+   (set (match_dup 4)
+	(neg:SFDF (abs:SFDF (match_dup 1))))
+   (set (match_operand:SFDF 0 "gpc_reg_operand" "")
+        (if_then_else:SFDF (ge (match_operand:SFDF 2 "gpc_reg_operand" "")
+			       (match_dup 5))
+			 (match_dup 3)
+			 (match_dup 4)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
+   && ((TARGET_PPC_GFXOPT
+        && !HONOR_NANS (<MODE>mode)
+        && !HONOR_SIGNED_ZEROS (<MODE>mode))
+       || TARGET_CMPB
+       || VECTOR_UNIT_VSX_P (<MODE>mode))"
+{
+  if (TARGET_CMPB || VECTOR_UNIT_VSX_P (<MODE>mode))
+    {
+      emit_insn (gen_copysign<mode>3_fcpsgn (operands[0], operands[1],
+					     operands[2]));
+      DONE;
+    }
+
+   operands[3] = gen_reg_rtx (<MODE>mode);
+   operands[4] = gen_reg_rtx (<MODE>mode);
+   operands[5] = CONST0_RTX (<MODE>mode);
+  })
+
+;; Use an unspec rather providing an if-then-else in RTL, to prevent the
+;; compiler from optimizing -0.0
+(define_insn "copysign<mode>3_fcpsgn"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<rreg2>")]
+		     UNSPEC_COPYSIGN))]
+  "TARGET_CMPB && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "fcpsgn %0,%2,%1"
+  [(set_attr "type" "fp")])
+
+;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
+;; fsel instruction and some auxiliary computations.  Then we just have a
+;; single DEFINE_INSN for fsel and the define_splits to make them if made by
+;; combine.
+(define_expand "smaxsf3"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
+			     (match_operand:SF 2 "gpc_reg_operand" ""))
+			 (match_dup 1)
+			 (match_dup 2)))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
+  "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
+
+(define_expand "sminsf3"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
+			     (match_operand:SF 2 "gpc_reg_operand" ""))
+			 (match_dup 2)
+			 (match_dup 1)))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
+  "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
+
+(define_split
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(match_operator:SF 3 "min_max_operator"
+	 [(match_operand:SF 1 "gpc_reg_operand" "")
+	  (match_operand:SF 2 "gpc_reg_operand" "")]))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
+  [(const_int 0)]
+  "
+{ rs6000_emit_minmax (operands[0], GET_CODE (operands[3]),
+		      operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "mov<mode>cc"
+   [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+	 (if_then_else:GPR (match_operand 1 "comparison_operator" "")
+			   (match_operand:GPR 2 "gpc_reg_operand" "")
+			   (match_operand:GPR 3 "gpc_reg_operand" "")))]
+  "TARGET_ISEL<sel>"
+  "
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; We use the BASE_REGS for the isel input operands because, if rA is
+;; 0, the value of 0 is placed in rD upon truth.  Similarly for rB
+;; because we may switch the operands and rB may end up being rA.
+;;
+;; We need 2 patterns: an unsigned and a signed pattern.  We could
+;; leave out the mode in operand 4 and use one pattern, but reload can
+;; change the mode underneath our feet and then gets confused trying
+;; to reload the value.
+(define_insn "isel_signed_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_comparison_operator"
+			 [(match_operand:CC 4 "cc_reg_operand" "y,y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "reg_or_cint_operand" "O,b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+(define_insn "isel_unsigned_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_comparison_operator"
+			 [(match_operand:CCUNS 4 "cc_reg_operand" "y,y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "reg_or_cint_operand" "O,b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+;; These patterns can be useful for combine; they let combine know that
+;; isel can handle reversed comparisons so long as the operands are
+;; registers.
+
+(define_insn "*isel_reversed_signed_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_rev_comparison_operator"
+			 [(match_operand:CC 4 "cc_reg_operand" "y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "gpc_reg_operand" "b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "b")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+(define_insn "*isel_reversed_unsigned_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(if_then_else:GPR
+	 (match_operator 1 "scc_rev_comparison_operator"
+			 [(match_operand:CCUNS 4 "cc_reg_operand" "y")
+			  (const_int 0)])
+	 (match_operand:GPR 2 "gpc_reg_operand" "b")
+	 (match_operand:GPR 3 "gpc_reg_operand" "b")))]
+  "TARGET_ISEL<sel>"
+  "*
+{ return output_isel (operands); }"
+  [(set_attr "type" "isel")
+   (set_attr "length" "4")])
+
+(define_expand "movsfcc"
+   [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	 (if_then_else:SF (match_operand 1 "comparison_operator" "")
+			  (match_operand:SF 2 "gpc_reg_operand" "")
+			  (match_operand:SF 3 "gpc_reg_operand" "")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn "*fselsfsf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "f")
+			     (match_operand:SF 4 "zero_fp_constant" "F"))
+			 (match_operand:SF 2 "gpc_reg_operand" "f")
+			 (match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*fseldfsf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(if_then_else:SF (ge (match_operand:DF 1 "gpc_reg_operand" "d")
+			     (match_operand:DF 4 "zero_fp_constant" "F"))
+			 (match_operand:SF 2 "gpc_reg_operand" "f")
+			 (match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_SINGLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*negdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fneg %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*absdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "*nabsdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fnabs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "")
+		 (match_operand:DF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*adddf3_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
+		 (match_operand:DF 2 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "{fa|fadd} %0,%1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_d")])
+
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "")
+		  (match_operand:DF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*subdf3_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "d")
+		  (match_operand:DF 2 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "{fs|fsub} %0,%1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_d")])
+
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "")
+		 (match_operand:DF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
+
+(define_insn "*muldf3_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
+		 (match_operand:DF 2 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "{fm|fmul} %0,%1,%2"
+  [(set_attr "type" "dmul")
+   (set_attr "fp_type" "fp_mul_d")])
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
+		(match_operand:DF 2 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)
+   && !TARGET_SIMPLE_FPU"
+  "")
+
+(define_insn "*divdf3_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(div:DF (match_operand:DF 1 "gpc_reg_operand" "d")
+		(match_operand:DF 2 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "{fd|fdiv} %0,%1,%2"
+  [(set_attr "type" "ddiv")])
+
+(define_insn "*fred_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+  "TARGET_FRE && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fre %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "*rsqrtdf_internal1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
+		   UNSPEC_RSQRT))]
+  "TARGET_FRSQRTE && !VECTOR_UNIT_VSX_P (DFmode)"
+  "frsqrte %0,%1"
+  [(set_attr "type" "fp")])
+
+; builtin fma support
+(define_insn "*fmadf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+		(match_operand:DF 2 "gpc_reg_operand" "f")
+		(match_operand:DF 3 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fma|fmadd} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fmsdf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+		(match_operand:DF 2 "gpc_reg_operand" "f")
+		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fms|fmsub} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*nfmadf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+			(match_operand:DF 2 "gpc_reg_operand" "f")
+			(match_operand:DF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fnma|fnmadd} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*nfmsdf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+			(match_operand:DF 2 "gpc_reg_operand" "f")
+			(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fnms|fnmsub} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "(TARGET_PPC_GPOPT || TARGET_POWER2) && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_DOUBLE_FLOAT"
+  "")
+
+(define_insn "*sqrtdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "(TARGET_PPC_GPOPT || TARGET_POWER2) && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fsqrt %0,%1"
+  [(set_attr "type" "dsqrt")])
+
+;; The conditional move instructions allow us to perform max and min
+;; operations even when
+
+(define_expand "smaxdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
+			     (match_operand:DF 2 "gpc_reg_operand" ""))
+			 (match_dup 1)
+			 (match_dup 2)))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && !flag_trapping_math"
+  "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
+
+(define_expand "smindf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
+			     (match_operand:DF 2 "gpc_reg_operand" ""))
+			 (match_dup 2)
+			 (match_dup 1)))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && !flag_trapping_math"
+  "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
+
+(define_split
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operator:DF 3 "min_max_operator"
+	 [(match_operand:DF 1 "gpc_reg_operand" "")
+	  (match_operand:DF 2 "gpc_reg_operand" "")]))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && !flag_trapping_math"
+  [(const_int 0)]
+  "
+{ rs6000_emit_minmax (operands[0], GET_CODE (operands[3]),
+		      operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "movdfcc"
+   [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	 (if_then_else:DF (match_operand 1 "comparison_operator" "")
+			  (match_operand:DF 2 "gpc_reg_operand" "")
+			  (match_operand:DF 3 "gpc_reg_operand" "")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "
+{
+  if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn "*fseldfdf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "d")
+			     (match_operand:DF 4 "zero_fp_constant" "F"))
+			 (match_operand:DF 2 "gpc_reg_operand" "d")
+			 (match_operand:DF 3 "gpc_reg_operand" "d")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+(define_insn "*fselsfdf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(if_then_else:DF (ge (match_operand:SF 1 "gpc_reg_operand" "f")
+			     (match_operand:SF 4 "zero_fp_constant" "F"))
+			 (match_operand:DF 2 "gpc_reg_operand" "d")
+			 (match_operand:DF 3 "gpc_reg_operand" "d")))]
+  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_SINGLE_FLOAT"
+  "fsel %0,%1,%2,%3"
+  [(set_attr "type" "fp")])
+
+;; Conversions to and from floating-point.
+
+; We don't define lfiwax/lfiwzx with the normal definition, because we
+; don't want to support putting SImode in FPR registers.
+(define_insn "lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+		   UNSPEC_LFIWAX))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
+  "lfiwax %0,%y1"
+  [(set_attr "type" "fpload")])
+
+; This split must be run before register allocation because it allocates the
+; memory slot that is needed to move values to/from the FPR.  We don't allocate
+; it earlier to allow for the combiner to merge insns together where it might
+; not be needed and also in case the insns are deleted as dead code.
+
+(define_insn_and_split "floatsi<mode>2_lfiwax"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX
+   && <SI_CONVERT_FP> && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp;
+
+  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+    tmp = convert_to_mode (DImode, src, false);
+  else
+    {
+      tmp = operands[2];
+      if (GET_CODE (tmp) == SCRATCH)
+	tmp = gen_reg_rtx (DImode);
+      if (MEM_P (src))
+	{
+	  src = rs6000_address_for_fpconvert (src);
+	  emit_insn (gen_lfiwax (tmp, src));
+	}
+      else
+	{
+	  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+	  emit_move_insn (stack, src);
+	  emit_insn (gen_lfiwax (tmp, stack));
+	}
+    }
+  emit_insn (gen_floatdi<mode>2 (dest, tmp));
+  DONE;
+}"
+  [(set_attr "length" "12")
+   (set_attr "type" "fpload")])
+
+(define_insn_and_split "floatsi<mode>2_lfiwax_mem"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>")
+	(float:SFDF
+	 (sign_extend:DI
+	  (match_operand:SI 1 "memory_operand" "Z,Z"))))
+   (clobber (match_scratch:DI 2 "=0,d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX
+   && <SI_CONVERT_FP>"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  operands[1] = rs6000_address_for_fpconvert (operands[1]);
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+  emit_insn (gen_lfiwax (operands[2], operands[1]));
+  emit_insn (gen_floatdi<mode>2 (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+(define_insn "lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+		   UNSPEC_LFIWZX))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
+  "lfiwzx %0,%y1"
+  [(set_attr "type" "fpload")])
+
+(define_insn_and_split "floatunssi<mode>2_lfiwzx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX
+   && <SI_CONVERT_FP>"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp;
+
+  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+    tmp = convert_to_mode (DImode, src, true);
+  else
+    {
+      tmp = operands[2];
+      if (GET_CODE (tmp) == SCRATCH)
+	tmp = gen_reg_rtx (DImode);
+      if (MEM_P (src))
+	{
+	  src = rs6000_address_for_fpconvert (src);
+	  emit_insn (gen_lfiwzx (tmp, src));
+	}
+      else
+	{
+	  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+	  emit_move_insn (stack, src);
+	  emit_insn (gen_lfiwzx (tmp, stack));
+	}
+    }
+  emit_insn (gen_floatdi<mode>2 (dest, tmp));
+  DONE;
+}"
+  [(set_attr "length" "12")
+   (set_attr "type" "fpload")])
+
+(define_insn_and_split "floatunssi<mode>2_lfiwzx_mem"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>")
+	(unsigned_float:SFDF
+	 (zero_extend:DI
+	  (match_operand:SI 1 "memory_operand" "Z,Z"))))
+   (clobber (match_scratch:DI 2 "=0,d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX
+   && <SI_CONVERT_FP>"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  operands[1] = rs6000_address_for_fpconvert (operands[1]);
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DImode);
+  emit_insn (gen_lfiwzx (operands[2], operands[1]));
+  emit_insn (gen_floatdi<mode>2 (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+; For each of these conversions, there is a define_expand, a define_insn
+; with a '#' template, and a define_split (with C code).  The idea is
+; to allow constant folding with the template of the define_insn,
+; then to have the insns split later (between sched1 and final).
+
+(define_expand "floatsidf2"
+  [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "")
+		   (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))
+	      (clobber (match_dup 6))])]
+  "TARGET_HARD_FLOAT 
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "
+{
+  if (TARGET_E500_DOUBLE)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_spe_floatsidf2 (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_LFIWAX && TARGET_FCFID)
+    {
+      emit_insn (gen_floatsidf2_lfiwax (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_FCFID)
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, false);
+      emit_insn (gen_floatdidf2 (operands[0], dreg));
+      DONE;
+    }
+
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  operands[2] = force_reg (SImode, GEN_INT (0x43300000));
+  operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503601774854144\", DFmode));
+  operands[4] = rs6000_allocate_stack_temp (DFmode, true, false);
+  operands[5] = gen_reg_rtx (DFmode);
+  operands[6] = gen_reg_rtx (SImode);
+}")
+
+(define_insn_and_split "*floatsidf2_internal"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=&d")
+	(float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))
+   (use (match_operand:SI 2 "gpc_reg_operand" "r"))
+   (use (match_operand:DF 3 "gpc_reg_operand" "d"))
+   (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o"))
+   (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d"))
+   (clobber (match_operand:SI 6 "gpc_reg_operand" "=&r"))]
+  "! TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx lowword, highword;
+  gcc_assert (MEM_P (operands[4]));
+  highword = adjust_address (operands[4], SImode, 0);
+  lowword = adjust_address (operands[4], SImode, 4);
+  if (! WORDS_BIG_ENDIAN)
+    {
+      rtx tmp;
+      tmp = highword; highword = lowword; lowword = tmp;
+    }
+
+  emit_insn (gen_xorsi3 (operands[6], operands[1],
+			 GEN_INT (~ (HOST_WIDE_INT) 0x7fffffff)));
+  emit_move_insn (lowword, operands[6]);
+  emit_move_insn (highword, operands[2]);
+  emit_move_insn (operands[5], operands[4]);
+  emit_insn (gen_subdf3 (operands[0], operands[5], operands[3]));
+  DONE;
+}"
+  [(set_attr "length" "24")
+   (set_attr "type" "fp")])
+
+;; If we don't have a direct conversion to single precision, don't enable this
+;; conversion for 32-bit without fast math, because we don't have the insn to
+;; generate the fixup swizzle to avoid double rounding problems.
+(define_expand "floatunssisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (unsigned_float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT
+   && (!TARGET_FPRS
+       || (TARGET_FPRS
+	   && ((TARGET_FCFIDUS && TARGET_LFIWZX)
+	       || (TARGET_DOUBLE_FLOAT && TARGET_FCFID
+		   && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))"
+  "
+{
+  if (!TARGET_FPRS)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+  else if (TARGET_LFIWZX && TARGET_FCFIDUS)
+    {
+      emit_insn (gen_floatunssisf2_lfiwzx (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, true);
+      emit_insn (gen_floatdisf2 (operands[0], dreg));
+      DONE;
+    }
+}")
+
+(define_expand "floatunssidf2"
+  [(parallel [(set (match_operand:DF 0 "gpc_reg_operand" "")
+		   (unsigned_float:DF (match_operand:SI 1 "nonimmediate_operand" "")))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))])]
+  "TARGET_HARD_FLOAT
+   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "
+{
+  if (TARGET_E500_DOUBLE)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+      emit_insn (gen_spe_floatunssidf2 (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_LFIWZX && TARGET_FCFID)
+    {
+      emit_insn (gen_floatunssidf2_lfiwzx (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_FCFID)
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, true);
+      emit_insn (gen_floatdidf2 (operands[0], dreg));
+      DONE;
+    }
+
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+  operands[2] = force_reg (SImode, GEN_INT (0x43300000));
+  operands[3] = force_reg (DFmode, CONST_DOUBLE_ATOF (\"4503599627370496\", DFmode));
+  operands[4] = rs6000_allocate_stack_temp (DFmode, true, false);
+  operands[5] = gen_reg_rtx (DFmode);
+}")
+
+(define_insn_and_split "*floatunssidf2_internal"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=&d")
+	(unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))
+   (use (match_operand:SI 2 "gpc_reg_operand" "r"))
+   (use (match_operand:DF 3 "gpc_reg_operand" "d"))
+   (clobber (match_operand:DF 4 "offsettable_mem_operand" "=o"))
+   (clobber (match_operand:DF 5 "gpc_reg_operand" "=&d"))]
+  "! TARGET_FCFIDU && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !(TARGET_FCFID && TARGET_POWERPC64)"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx lowword, highword;
+  gcc_assert (MEM_P (operands[4]));
+  highword = adjust_address (operands[4], SImode, 0);
+  lowword = adjust_address (operands[4], SImode, 4);
+  if (! WORDS_BIG_ENDIAN)
+    {
+      rtx tmp;
+      tmp = highword; highword = lowword; lowword = tmp;
+    }
+
+  emit_move_insn (lowword, operands[1]);
+  emit_move_insn (highword, operands[2]);
+  emit_move_insn (operands[5], operands[4]);
+  emit_insn (gen_subdf3 (operands[0], operands[5], operands[3]));
+  DONE;
+}"
+  [(set_attr "length" "20")
+   (set_attr "type" "fp")])
+
+(define_expand "fix_trunc<mode>si2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "(TARGET_POWER2 || TARGET_POWERPC) && TARGET_HARD_FLOAT
+   && ((TARGET_FPRS && <TARGET_FLOAT>) || <E500_CONVERT>)"
+  "
+{
+  if (!<E500_CONVERT>)
+    {
+      rtx tmp, stack;
+
+      if (TARGET_STFIWX)
+	emit_insn (gen_fix_trunc<mode>si2_stfiwx (operands[0], operands[1]));
+      else
+	{
+	  tmp = gen_reg_rtx (DImode);
+	  stack = rs6000_allocate_stack_temp (DImode, true, false);
+	  emit_insn (gen_fix_trunc<mode>si2_internal (operands[0], operands[1],
+						      tmp, stack));
+	}
+      DONE;
+    }
+}")
+
+; Like the convert to float patterns, this insn must be split before
+; register allocation so that it can allocate the memory slot if it
+; needed
+(define_insn_and_split "fix_trunc<mode>si2_stfiwx"
+  [(set (match_operand:SI 0 "general_operand" "=rm")
+	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT)
+   && TARGET_STFIWX && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwz_<mode> (tmp, src));
+  if (MEM_P (dest))
+    {
+      dest = rs6000_address_for_fpconvert (dest);
+      emit_insn (gen_stfiwx (dest, tmp));
+      DONE;
+    }
+  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+    {
+      dest = gen_lowpart (DImode, dest);
+      emit_move_insn (dest, tmp);
+      DONE;
+    }
+  else
+    {
+      rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+      emit_insn (gen_stfiwx (stack, tmp));
+      emit_move_insn (dest, stack);
+      DONE;
+    }
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "fp")])
+
+(define_insn_and_split "fix_trunc<mode>si2_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,?r")
+	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>")))
+   (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d"))
+   (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))]
+  "(TARGET_POWER2 || TARGET_POWERPC) && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_DOUBLE_FLOAT"
+  "#"
+  ""
+  [(pc)]
+  "
+{
+  rtx lowword;
+  gcc_assert (MEM_P (operands[3]));
+  lowword = adjust_address (operands[3], SImode, WORDS_BIG_ENDIAN ? 4 : 0);
+
+  emit_insn (gen_fctiwz_<mode> (operands[2], operands[1]));
+  emit_move_insn (operands[3], operands[2]);
+  emit_move_insn (operands[0], lowword);
+  DONE;
+}"
+  [(set_attr "length" "16")
+   (set_attr "type" "fp")])
+
+(define_expand "fix_trunc<mode>di2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+   && TARGET_FCFID"
+  "")
+
+(define_insn "*fix_trunc<mode>di2_fctidz"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+    && TARGET_FCFID && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "fctidz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "fixuns_trunc<mode>si2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT
+   && ((TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ && TARGET_STFIWX)
+       || <E500_CONVERT>)"
+  "
+{
+  if (!<E500_CONVERT>)
+    {
+      emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_insn_and_split "fixuns_trunc<mode>si2_stfiwx"
+  [(set (match_operand:SI 0 "general_operand" "=rm")
+	(unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ
+   && TARGET_STFIWX && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwuz_<mode> (tmp, src));
+  if (MEM_P (dest))
+    {
+      dest = rs6000_address_for_fpconvert (dest);
+      emit_insn (gen_stfiwx (dest, tmp));
+      DONE;
+    }
+  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+    {
+      dest = gen_lowpart (DImode, dest);
+      emit_move_insn (dest, tmp);
+      DONE;
+    }
+  else
+    {
+      rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+      emit_insn (gen_stfiwx (stack, tmp));
+      emit_move_insn (dest, stack);
+      DONE;
+    }
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "fp")])
+
+(define_expand "fixuns_trunc<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unsigned_fix:DI (match_operand:SFDF 1 "register_operand" "")))]
+  "TARGET_HARD_FLOAT && (TARGET_FCTIDUZ || VECTOR_UNIT_VSX_P (<MODE>mode))"
+  "")
+
+(define_insn "*fixuns_trunc<mode>di2_fctiduz"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+    && TARGET_FCTIDUZ && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "fctiduz %0,%1"
+  [(set_attr "type" "fp")])
+
+; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
+; rather than (set (subreg:SI (reg)) (fix:SI ...))
+; because the first makes it clear that operand 0 is not live
+; before the instruction.
+(define_insn "fctiwz_<mode>"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))]
+		   UNSPEC_FCTIWZ))]
+  "(TARGET_POWER2 || TARGET_POWERPC) && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_DOUBLE_FLOAT"
+  "{fcirz|fctiwz} %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "fctiwuz_<mode>"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(unsigned_fix:SI
+		     (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))]
+		   UNSPEC_FCTIWUZ))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ"
+  "fctiwuz %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
+;; since the friz instruction does not truncate the value if the floating
+;; point value is < LONG_MIN or > LONG_MAX.
+(define_insn "*friz"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_FPRND
+   && !VECTOR_UNIT_VSX_P (DFmode) && flag_unsafe_math_optimizations
+   && !flag_trapping_math && TARGET_FRIZ"
+  "friz %0,%1"
+  [(set_attr "type" "fp")])
+
+;; Since FCTIWZ doesn't sign extend the upper bits, we have to do a store and a
+;; load to properly sign extend the value, but at least doing a store, load
+;; into a GPR to sign extend, a store from the GPR and a load back into the FPR
+;; if we have 32-bit memory ops
+(define_insn_and_split "*round32<mode>2_fprs"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(float:SFDF
+	 (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))))
+   (clobber (match_scratch:DI 2 "=d"))
+   (clobber (match_scratch:DI 3 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && <SI_CONVERT_FP> && TARGET_LFIWAX && TARGET_STFIWX && TARGET_FCFID
+   && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp1 = operands[2];
+  rtx tmp2 = operands[3];
+  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+
+  if (GET_CODE (tmp1) == SCRATCH)
+    tmp1 = gen_reg_rtx (DImode);
+  if (GET_CODE (tmp2) == SCRATCH)
+    tmp2 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwz_<mode> (tmp1, src));
+  emit_insn (gen_stfiwx (stack, tmp1));
+  emit_insn (gen_lfiwax (tmp2, stack));
+  emit_insn (gen_floatdi<mode>2 (dest, tmp2));
+  DONE;
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "16")])
+
+(define_insn_and_split "*roundu32<mode>2_fprs"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:SFDF
+	 (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))))
+   (clobber (match_scratch:DI 2 "=d"))
+   (clobber (match_scratch:DI 3 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && TARGET_LFIWZX && TARGET_STFIWX && TARGET_FCFIDU
+   && can_create_pseudo_p ()"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp1 = operands[2];
+  rtx tmp2 = operands[3];
+  rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
+
+  if (GET_CODE (tmp1) == SCRATCH)
+    tmp1 = gen_reg_rtx (DImode);
+  if (GET_CODE (tmp2) == SCRATCH)
+    tmp2 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_fctiwuz_<mode> (tmp1, src));
+  emit_insn (gen_stfiwx (stack, tmp1));
+  emit_insn (gen_lfiwzx (tmp2, stack));
+  emit_insn (gen_floatdi<mode>2 (dest, tmp2));
+  DONE;
+}
+  [(set_attr "type" "fpload")
+   (set_attr "length" "16")])
+
+;; No VSX equivalent to fctid
+(define_insn "lrint<mode>di2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+	(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		   UNSPEC_FCTID))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "fctid %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "btrunc<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+		     UNSPEC_FRIZ))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "")
+
+(define_insn "*btrunc<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		     UNSPEC_FRIZ))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
+   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "friz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "ceil<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+		     UNSPEC_FRIP))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "")
+
+(define_insn "*ceil<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		     UNSPEC_FRIP))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
+   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "frip %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_expand "floor<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+		     UNSPEC_FRIM))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "")
+
+(define_insn "*floor<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		     UNSPEC_FRIM))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
+   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "frim %0,%1"
+  [(set_attr "type" "fp")])
+
+;; No VSX equivalent to frin
+(define_insn "round<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+		     UNSPEC_FRIN))]
+  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "frin %0,%1"
+  [(set_attr "type" "fp")])
+
+; An UNSPEC is used so we don't have to support SImode in FP registers.
+(define_insn "stfiwx"
+  [(set (match_operand:SI 0 "memory_operand" "=Z")
+	(unspec:SI [(match_operand:DI 1 "gpc_reg_operand" "d")]
+		   UNSPEC_STFIWX))]
+  "TARGET_PPC_GFXOPT"
+  "stfiwx %1,%y0"
+  [(set_attr "type" "fpstore")])
+
+;; If we don't have a direct conversion to single precision, don't enable this
+;; conversion for 32-bit without fast math, because we don't have the insn to
+;; generate the fixup swizzle to avoid double rounding problems.
+(define_expand "floatsisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT
+   && (!TARGET_FPRS
+       || (TARGET_FPRS
+	   && ((TARGET_FCFIDS && TARGET_LFIWAX)
+	       || (TARGET_DOUBLE_FLOAT && TARGET_FCFID
+		   && (TARGET_POWERPC64 || flag_unsafe_math_optimizations)))))"
+  "
+{
+  if (!TARGET_FPRS)
+    {
+      if (!REG_P (operands[1]))
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+  else if (TARGET_FCFIDS && TARGET_LFIWAX)
+    {
+      emit_insn (gen_floatsisf2_lfiwax (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_FCFID && TARGET_LFIWAX)
+    {
+      rtx dfreg = gen_reg_rtx (DFmode);
+      emit_insn (gen_floatsidf2_lfiwax (dfreg, operands[1]));
+      emit_insn (gen_truncdfsf2 (operands[0], dfreg));
+      DONE;
+    }
+  else
+    {
+      rtx dreg = operands[1];
+      if (!REG_P (dreg))
+	dreg = force_reg (SImode, dreg);
+      dreg = convert_to_mode (DImode, dreg, false);
+      emit_insn (gen_floatdisf2 (operands[0], dreg));
+      DONE;
+    }
+}")
+
+(define_expand "floatdidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float:DF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+  "")
+
+(define_insn "*floatdidf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fcfid %0,%1"
+  [(set_attr "type" "fp")])
+
+; Allow the combiner to merge source memory operands to the conversion so that
+; the optimizer/register allocator doesn't try to load the value too early in a
+; GPR and then use store/load to move it to a FPR and suffer from a store-load
+; hit.  We will split after reload to avoid the trip through the GPRs
+
+(define_insn_and_split "*floatdidf2_mem"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float:DF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCFID"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+(define_expand "floatunsdidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(unsigned_float:DF
+	 (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))"
+  "")
+
+(define_insn "*floatunsdidf2_fcfidu"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FCFIDU && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fcfidu %0,%1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "*floatunsdidf2_mem"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unsigned_float:DF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=d"))]
+  "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (unsigned_float:DF (match_dup 2)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+(define_expand "floatdisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (float:SF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && (TARGET_FCFIDS || TARGET_POWERPC64 || flag_unsafe_math_optimizations)"
+  "
+{
+  if (!TARGET_FCFIDS)
+    {
+      rtx val = operands[1];
+      if (!flag_unsafe_math_optimizations)
+	{
+	  rtx label = gen_label_rtx ();
+	  val = gen_reg_rtx (DImode);
+	  emit_insn (gen_floatdisf2_internal2 (val, operands[1], label));
+	  emit_label (label);
+	}
+      emit_insn (gen_floatdisf2_internal1 (operands[0], val));
+      DONE;
+    }
+}")
+
+(define_insn "floatdisf2_fcfids"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS"
+  "fcfids %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn_and_split "*floatdisf2_mem"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+  "
+{
+  emit_move_insn (operands[2], operands[1]);
+  emit_insn (gen_floatdisf2_fcfids (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")])
+
+;; This is not IEEE compliant if rounding mode is "round to nearest".
+;; If the DI->DF conversion is inexact, then it's possible to suffer
+;; from double rounding.
+;; Instead of creating a new cpu type for two FP operations, just use fp
+(define_insn_and_split "floatdisf2_internal1"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+        (float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DF 2 "=d"))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+        (float:DF (match_dup 1)))
+   (set (match_dup 0)
+        (float_truncate:SF (match_dup 2)))]
+  ""
+  [(set_attr "length" "8")
+   (set_attr "type" "fp")])
+
+;; Twiddles bits to avoid double rounding.
+;; Bits that might be truncated when converting to DFmode are replaced
+;; by a bit that won't be lost at that stage, but is below the SFmode
+;; rounding position.
+(define_expand "floatdisf2_internal2"
+  [(set (match_dup 3) (ashiftrt:DI (match_operand:DI 1 "" "")
+				   (const_int 53)))
+   (parallel [(set (match_operand:DI 0 "" "") (and:DI (match_dup 1)
+						      (const_int 2047)))
+	      (clobber (scratch:CC))])
+   (set (match_dup 3) (plus:DI (match_dup 3)
+			       (const_int 1)))
+   (set (match_dup 0) (plus:DI (match_dup 0)
+			       (const_int 2047)))
+   (set (match_dup 4) (compare:CCUNS (match_dup 3)
+				     (const_int 2)))
+   (set (match_dup 0) (ior:DI (match_dup 0)
+			      (match_dup 1)))
+   (parallel [(set (match_dup 0) (and:DI (match_dup 0)
+					 (const_int -2048)))
+	      (clobber (scratch:CC))])
+   (set (pc) (if_then_else (geu (match_dup 4) (const_int 0))
+			   (label_ref (match_operand:DI 2 "" ""))
+			   (pc)))
+   (set (match_dup 0) (match_dup 1))]
+  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (CCUNSmode);
+}")
+
+(define_expand "floatunsdisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+        (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
+  "")
+
+(define_insn "floatunsdisf2_fcfidus"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+        (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
+  "fcfidus %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn_and_split "*floatunsdisf2_mem"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(unsigned_float:SF (match_operand:DI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=f"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+  "
+{
+  emit_move_insn (operands[2], operands[1]);
+  emit_insn (gen_floatunsdisf2_fcfidus (operands[0], operands[2]));
+  DONE;
+}"
+  [(set_attr "length" "8")
+   (set_attr "type" "fpload")])
+
+;; Define the DImode operations that can be done in a small number
+;; of instructions.  The & constraints are to prevent the register
+;; allocator from allocating registers that overlap with the inputs
+;; (for example, having an input in 7,8 and an output in 6,7).  We
+;; also allow for the output being the same as one of the inputs.
+
+(define_insn "*adddi3_noppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r,r,r")
+	(plus:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,0,0")
+		 (match_operand:DI 2 "reg_or_short_operand" "r,I,r,I")))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"{a|addc} %L0,%L1,%L2\;{ae|adde} %0,%1,%2\"
+	    : \"{ai|addic} %L0,%L1,%2\;{a%G2e|add%G2e} %0,%1\";
+  else
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"{a|addc} %0,%1,%2\;{ae|adde} %L0,%L1,%L2\"
+	    : \"{ai|addic} %0,%1,%2\;{a%G2e|add%G2e} %L0,%L1\";
+}"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*subdi3_noppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r,r,r,r")
+	(minus:DI (match_operand:DI 1 "reg_or_short_operand" "r,I,0,r,I")
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r,r,0,0")))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"{sf|subfc} %L0,%L2,%L1\;{sfe|subfe} %0,%2,%1\"
+	    : \"{sfi|subfic} %L0,%L2,%1\;{sf%G1e|subf%G1e} %0,%2\";
+  else
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"{sf|subfc} %0,%2,%1\;{sfe|subfe} %L0,%L2,%L1\"
+	    : \"{sfi|subfic} %0,%2,%1\;{sf%G1e|subf%G1e} %L0,%L2\";
+}"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*negdi2_noppc64"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
+	(neg:DI (match_operand:DI 1 "gpc_reg_operand" "r,0")))]
+  "! TARGET_POWERPC64"
+  "*
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"{sfi|subfic} %L0,%L1,0\;{sfze|subfze} %0,%1\"
+    : \"{sfi|subfic} %0,%1,0\;{sfze|subfze} %L0,%L1\";
+}"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "gpc_reg_operand" ""))))]
+  "! TARGET_POWERPC64"
+  "
+{
+  if (! TARGET_POWER && ! TARGET_POWERPC)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, 3), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, 4), operands[2]);
+      emit_insn (gen_mull_call ());
+      if (WORDS_BIG_ENDIAN)
+        emit_move_insn (operands[0], gen_rtx_REG (DImode, 3));
+      else
+	{
+	  emit_move_insn (operand_subword (operands[0], 0, 0, DImode),
+			  gen_rtx_REG (SImode, 3));
+	  emit_move_insn (operand_subword (operands[0], 1, 0, DImode),
+			  gen_rtx_REG (SImode, 4));
+	}
+      DONE;
+    }
+  else if (TARGET_POWER)
+    {
+      emit_insn (gen_mulsidi3_mq (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "mulsidi3_mq"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "%r"))
+		 (sign_extend:DI (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (clobber (match_scratch:SI 3 "=q"))]
+  "TARGET_POWER"
+  "mul %0,%1,%2\;mfmq %L0"
+  [(set_attr "type" "imul")
+   (set_attr "length" "8")])
+
+(define_insn "*mulsidi3_no_mq"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "%r"))
+		 (sign_extend:DI (match_operand:SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_POWERPC && ! TARGET_POWER && ! TARGET_POWERPC64"
+  "*
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"mulhw %0,%1,%2\;mullw %L0,%1,%2\"
+    : \"mulhw %L0,%1,%2\;mullw %0,%1,%2\";
+}"
+  [(set_attr "type" "imul")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "gpc_reg_operand" ""))))]
+  "TARGET_POWERPC && ! TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
+			       (sign_extend:DI (match_dup 2)))
+		      (const_int 32))))
+   (set (match_dup 4)
+	(mult:SI (match_dup 1)
+		 (match_dup 2)))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  operands[3] = operand_subword (operands[0], endian, 0, DImode);
+  operands[4] = operand_subword (operands[0], 1 - endian, 0, DImode);
+}")
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "gpc_reg_operand" ""))))]
+  "TARGET_POWERPC && ! TARGET_POWERPC64"
+  "
+{
+  if (TARGET_POWER)
+    {
+      emit_insn (gen_umulsidi3_mq (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "umulsidi3_mq"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" "%r"))
+		 (zero_extend:DI (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (clobber (match_scratch:SI 3 "=q"))]
+  "TARGET_POWERPC && TARGET_POWER"
+  "*
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"mulhwu %0,%1,%2\;mullw %L0,%1,%2\"
+    : \"mulhwu %L0,%1,%2\;mullw %0,%1,%2\";
+}"
+  [(set_attr "type" "imul")
+   (set_attr "length" "8")])
+
+(define_insn "*umulsidi3_no_mq"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" "%r"))
+		 (zero_extend:DI (match_operand:SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_POWERPC && ! TARGET_POWER && ! TARGET_POWERPC64"
+  "*
+{
+  return (WORDS_BIG_ENDIAN)
+    ? \"mulhwu %0,%1,%2\;mullw %L0,%1,%2\"
+    : \"mulhwu %L0,%1,%2\;mullw %0,%1,%2\";
+}"
+  [(set_attr "type" "imul")
+   (set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "gpc_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "gpc_reg_operand" ""))))]
+  "TARGET_POWERPC && ! TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
+			       (zero_extend:DI (match_dup 2)))
+		      (const_int 32))))
+   (set (match_dup 4)
+	(mult:SI (match_dup 1)
+		 (match_dup 2)))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  operands[3] = operand_subword (operands[0], endian, 0, DImode);
+  operands[4] = operand_subword (operands[0], 1 - endian, 0, DImode);
+}")
+
+(define_expand "smulsi3_highpart"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" ""))
+			       (sign_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "")))
+		      (const_int 32))))]
+  ""
+  "
+{
+  if (! TARGET_POWER && ! TARGET_POWERPC)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, 3), operands[1]);
+      emit_move_insn (gen_rtx_REG (SImode, 4), operands[2]);
+      emit_insn (gen_mulh_call ());
+      emit_move_insn (operands[0], gen_rtx_REG (SImode, 3));
+      DONE;
+    }
+  else if (TARGET_POWER)
+    {
+      emit_insn (gen_smulsi3_highpart_mq (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "smulsi3_highpart_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" "%r"))
+			       (sign_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "r")))
+		      (const_int 32))))
+   (clobber (match_scratch:SI 3 "=q"))]
+  "TARGET_POWER"
+  "mul %0,%1,%2"
+  [(set_attr "type" "imul")])
+
+(define_insn "*smulsi3_highpart_no_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" "%r"))
+			       (sign_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "r")))
+		      (const_int 32))))]
+  "TARGET_POWERPC && ! TARGET_POWER"
+  "mulhw %0,%1,%2"
+  [(set_attr "type" "imul")])
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" ""))
+			       (zero_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "")))
+		      (const_int 32))))]
+  "TARGET_POWERPC"
+  "
+{
+  if (TARGET_POWER)
+    {
+      emit_insn (gen_umulsi3_highpart_mq (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "umulsi3_highpart_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" "%r"))
+			       (zero_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "r")))
+		      (const_int 32))))
+   (clobber (match_scratch:SI 3 "=q"))]
+  "TARGET_POWERPC && TARGET_POWER"
+  "mulhwu %0,%1,%2"
+  [(set_attr "type" "imul")])
+
+(define_insn "*umulsi3_highpart_no_mq"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI
+				(match_operand:SI 1 "gpc_reg_operand" "%r"))
+			       (zero_extend:DI
+				(match_operand:SI 2 "gpc_reg_operand" "r")))
+		      (const_int 32))))]
+  "TARGET_POWERPC && ! TARGET_POWER"
+  "mulhwu %0,%1,%2"
+  [(set_attr "type" "imul")])
+
+;; If operands 0 and 2 are in the same register, we have a problem.  But
+;; operands 0 and 1 (the usual case) can be in the same register.  That's
+;; why we have the strange constraints below.
+(define_insn "ashldi3_power"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,&r")
+	(ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,0,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "M,i,r,r")))
+   (clobber (match_scratch:SI 3 "=X,q,q,q"))]
+  "TARGET_POWER"
+  "@
+   {sli|slwi} %0,%L1,%h2\;{cal %L0,0(0)|li %L0,0}
+   sl%I2q %L0,%L1,%h2\;sll%I2q %0,%1,%h2
+   sl%I2q %L0,%L1,%h2\;sll%I2q %0,%1,%h2
+   sl%I2q %L0,%L1,%h2\;sll%I2q %0,%1,%h2"
+  [(set_attr "length" "8")])
+
+(define_insn "lshrdi3_power"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,&r")
+	(lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,0,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "M,i,r,r")))
+   (clobber (match_scratch:SI 3 "=X,q,q,q"))]
+  "TARGET_POWER"
+  "@
+   {s%A2i|s%A2wi} %L0,%1,%h2\;{cal %0,0(0)|li %0,0}
+   sr%I2q %0,%1,%h2\;srl%I2q %L0,%L1,%h2
+   sr%I2q %0,%1,%h2\;srl%I2q %L0,%L1,%h2
+   sr%I2q %0,%1,%h2\;srl%I2q %L0,%L1,%h2"
+  [(set_attr "length" "8")])
+
+;; Shift by a variable amount is too complex to be worth open-coding.  We
+;; just handle shifts by constants.
+(define_insn "ashrdi3_power"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "const_int_operand" "M,i")))
+   (clobber (match_scratch:SI 3 "=X,q"))]
+  "TARGET_POWER"
+  "@
+   {srai|srawi} %0,%1,31\;{srai|srawi} %L0,%1,%h2
+   sraiq %0,%1,%h2\;srliq %L0,%L1,%h2"
+  [(set_attr "type" "shift")
+   (set_attr "length" "8")])
+
+(define_insn "ashrdi3_no_power"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "const_int_operand" "M,i")))]
+  "TARGET_32BIT && !TARGET_POWERPC64 && !TARGET_POWER && WORDS_BIG_ENDIAN"
+  "@
+   {srai|srawi} %0,%1,31\;{srai|srawi} %L0,%1,%h2
+   {sri|srwi} %L0,%L1,%h2\;insrwi %L0,%1,%h2,0\;{srai|srawi} %0,%1,%h2"
+  [(set_attr "type" "two,three")
+   (set_attr "length" "8,12")])
+
+(define_insn "*ashrdisi3_noppc64"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (subreg:SI (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+                                (const_int 32)) 4))]
+  "TARGET_32BIT && !TARGET_POWERPC64"
+  "*
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]))
+    return \"\";
+  else
+    return \"mr %0,%1\";
+}"
+   [(set_attr "length" "4")])
+
+
+;; PowerPC64 DImode operations.
+
+(define_expand "absdi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(abs:DI (match_operand:DI 1 "gpc_reg_operand" "")))]
+  "TARGET_POWERPC64"
+  "
+{
+  if (TARGET_ISEL)
+    emit_insn (gen_absdi2_isel (operands[0], operands[1]));
+  else
+    emit_insn (gen_absdi2_internal (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn_and_split "absdi2_internal"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
+        (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,0")))
+   (clobber (match_scratch:DI 2 "=&r,&r"))]
+  "TARGET_POWERPC64 && !TARGET_ISEL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (ashiftrt:DI (match_dup 1) (const_int 63)))
+   (set (match_dup 0) (xor:DI (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (minus:DI (match_dup 0) (match_dup 2)))]
+  "")
+
+(define_insn_and_split "*nabsdi2"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
+        (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,0"))))
+   (clobber (match_scratch:DI 2 "=&r,&r"))]
+  "TARGET_POWERPC64 && !TARGET_ISEL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (ashiftrt:DI (match_dup 1) (const_int 63)))
+   (set (match_dup 0) (xor:DI (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (minus:DI (match_dup 2) (match_dup 0)))]
+  "")
+
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+        (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
+                 (match_operand:DI 2 "reg_or_short_operand" "r,I")))]
+  "TARGET_POWERPC64"
+  "@
+   mulld %0,%1,%2
+   mulli %0,%1,%2"
+   [(set (attr "type")
+      (cond [(match_operand:SI 2 "s8bit_cint_operand" "")
+		(const_string "imul3")
+	     (match_operand:SI 2 "short_cint_operand" "")
+		(const_string "imul2")]
+	(const_string "lmul")))])
+
+(define_insn "*muldi3_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_POWERPC64"
+  "@
+   mulld. %3,%1,%2
+   #"
+  [(set_attr "type" "lmul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			     (match_operand:DI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*muldi3_internal2"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
+			     (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(mult:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64"
+  "@
+   mulld. %0,%1,%2
+   #"
+  [(set_attr "type" "lmul_compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (mult:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			     (match_operand:DI 2 "gpc_reg_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(mult:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(mult:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI (mult:TI (sign_extend:TI
+				(match_operand:DI 1 "gpc_reg_operand" "%r"))
+			       (sign_extend:TI
+				(match_operand:DI 2 "gpc_reg_operand" "r")))
+		      (const_int 64))))]
+  "TARGET_POWERPC64"
+  "mulhd %0,%1,%2"
+  [(set_attr "type" "lmul")])
+
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI (mult:TI (zero_extend:TI
+				(match_operand:DI 1 "gpc_reg_operand" "%r"))
+			       (zero_extend:TI
+				(match_operand:DI 2 "gpc_reg_operand" "r")))
+		      (const_int 64))))]
+  "TARGET_POWERPC64"
+  "mulhdu %0,%1,%2"
+  [(set_attr "type" "lmul")])
+
+(define_insn "rotldi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:DI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   rldcl %0,%1,%2,0
+   rldicl %0,%1,%H2,0"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %3,%1,%2,0
+   rldicl. %3,%1,%H2,0
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:DI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(rotate:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(rotate:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %0,%1,%2,0
+   rldicl. %0,%1,%H2,0
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:DI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(rotate:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(rotate:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal4"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(and:DI (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			   (match_operand:DI 2 "reg_or_cint_operand" "r,i"))
+		(match_operand:DI 3 "mask64_operand" "n,n")))]
+  "TARGET_POWERPC64"
+  "@
+   rldc%B3 %0,%1,%2,%S3
+   rldic%B3 %0,%1,%H2,%S3"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal5"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:DI 3 "mask64_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldc%B3. %4,%1,%2,%S3
+   rldic%B3. %4,%1,%H2,%S3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				(match_operand:DI 2 "reg_or_cint_operand" ""))
+		     (match_operand:DI 3 "mask64_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 4 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 4)
+	(and:DI (rotate:DI (match_dup 1)
+				(match_dup 2))
+		     (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				(match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i"))
+		     (match_operand:DI 3 "mask64_operand" "n,n,n,n"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_64BIT"
+  "@
+   rldc%B3. %0,%1,%2,%S3
+   rldic%B3. %0,%1,%H2,%S3
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:DI
+		     (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				(match_operand:DI 2 "reg_or_cint_operand" ""))
+		     (match_operand:DI 3 "mask64_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal7"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:QI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "TARGET_POWERPC64"
+  "@
+   rldcl %0,%1,%2,56
+   rldicl %0,%1,%H2,56"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal8"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %3,%1,%2,56
+   rldicl. %3,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:QI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal9"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %0,%1,%2,56
+   rldicl. %0,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal10"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:HI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "TARGET_POWERPC64"
+  "@
+   rldcl %0,%1,%2,48
+   rldicl %0,%1,%H2,48"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal11"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %3,%1,%2,48
+   rldicl. %3,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:HI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal12"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %0,%1,%2,48
+   rldicl. %0,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal13"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:SI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
+  "TARGET_POWERPC64"
+  "@
+   rldcl %0,%1,%2,32
+   rldicl %0,%1,%H2,32"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal14"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %3,%1,%2,32
+   rldicl. %3,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:SI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 0)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal15"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_64BIT"
+  "@
+   rldcl. %0,%1,%2,32
+   rldicl. %0,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		   (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  "TARGET_POWERPC64 || TARGET_POWER"
+  "
+{
+  if (TARGET_POWERPC64)
+    ;
+  else if (TARGET_POWER)
+    {
+      emit_insn (gen_ashldi3_power (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+(define_insn "*ashldi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		   (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   sld %0,%1,%2
+   sldi %0,%1,%H2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashldi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   sld. %3,%1,%2
+   sldi. %3,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+			       (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashift:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   sld. %0,%1,%2
+   sldi. %0,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			       (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashift:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal4"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:DI 3 "const_int_operand" "n")))]
+  "TARGET_POWERPC64 && includes_rldic_lshift_p (operands[2], operands[3])"
+  "rldic %0,%1,%H2,%W3")
+
+(define_insn "ashldi3_internal5"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:DI 3 "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r,r"))]
+  "TARGET_64BIT && includes_rldic_lshift_p (operands[2], operands[3])"
+  "@
+   rldic. %4,%1,%H2,%W3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "const_int_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldic_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 4)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal6"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		    (match_operand:DI 3 "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_64BIT && includes_rldic_lshift_p (operands[2], operands[3])"
+  "@
+   rldic. %0,%1,%H2,%W3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "const_int_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldic_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 0)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal7"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r")
+			   (match_operand:SI 2 "const_int_operand" "i"))
+		(match_operand:DI 3 "mask64_operand" "n")))]
+  "TARGET_POWERPC64 && includes_rldicr_lshift_p (operands[2], operands[3])"
+  "rldicr %0,%1,%H2,%S3")
+
+(define_insn "ashldi3_internal8"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		 (match_operand:DI 3 "mask64_operand" "n,n"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 "=r,r"))]
+  "TARGET_64BIT && includes_rldicr_lshift_p (operands[2], operands[3])"
+  "@
+   rldicr. %4,%1,%H2,%S3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "mask64_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 4 ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldicr_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 4)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashldi3_internal9"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			    (match_operand:SI 2 "const_int_operand" "i,i"))
+		    (match_operand:DI 3 "mask64_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_64BIT && includes_rldicr_lshift_p (operands[2], operands[3])"
+  "@
+   rldicr. %0,%1,%H2,%S3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (and:DI (ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:SI 2 "const_int_operand" ""))
+		 (match_operand:DI 3 "mask64_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWERPC64 && reload_completed
+   && includes_rldicr_lshift_p (operands[2], operands[3])"
+  [(set (match_dup 0)
+	(and:DI (ashift:DI (match_dup 1) (match_dup 2))
+		(match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		     (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  "TARGET_POWERPC64 || TARGET_POWER"
+  "
+{
+  if (TARGET_POWERPC64)
+    ;
+  else if (TARGET_POWER)
+    {
+      emit_insn (gen_lshrdi3_power (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+(define_insn "*lshrdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   srd %0,%1,%2
+   srdi %0,%1,%H2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*lshrdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT "
+  "@
+   srd. %3,%1,%2
+   srdi. %3,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshrdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   srd. %0,%1,%2
+   srdi. %0,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (lshiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(lshiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		     (match_operand:SI 2 "reg_or_cint_operand" "")))]
+  "WORDS_BIG_ENDIAN"
+  "
+{
+  if (TARGET_POWERPC64)
+    ;
+  else if (TARGET_POWER && GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_ashrdi3_power (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_32BIT && GET_CODE (operands[2]) == CONST_INT
+	   && WORDS_BIG_ENDIAN)
+    {
+      emit_insn (gen_ashrdi3_no_power (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+(define_insn "*ashrdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")))]
+  "TARGET_POWERPC64"
+  "@
+   srad %0,%1,%2
+   sradi %0,%1,%H2"
+  [(set_attr "type" "var_shift_rotate,shift")])
+
+(define_insn "*ashrdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT"
+  "@
+   srad. %3,%1,%2
+   sradi. %3,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3)
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*ashrdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   srad. %0,%1,%2
+   sradi. %0,%1,%H2
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0)
+	(ashiftrt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "anddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	  (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		  (match_operand:DI 2 "and64_2_operand" "")))
+     (clobber (match_scratch:CC 3 ""))])]
+  "TARGET_POWERPC64"
+  "")
+
+(define_insn "anddi3_mc"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
+	(and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r,r")
+		(match_operand:DI 2 "and64_2_operand" "?r,S,T,K,J,t")))
+   (clobber (match_scratch:CC 3 "=X,X,X,x,x,X"))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rldic%B2 %0,%1,0,%S2
+   rlwinm %0,%1,0,%m2,%M2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2
+   #"
+  [(set_attr "type" "*,*,*,fast_compare,fast_compare,*")
+   (set_attr "length" "4,4,4,4,4,8")])
+
+(define_insn "anddi3_nomc"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r")
+		(match_operand:DI 2 "and64_2_operand" "?r,S,T,t")))
+   (clobber (match_scratch:CC 3 "=X,X,X,X"))]
+  "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
+  "@
+   and %0,%1,%2
+   rldic%B2 %0,%1,0,%S2
+   rlwinm %0,%1,0,%m2,%M2
+   #"
+  [(set_attr "length" "4,4,4,8")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		(match_operand:DI 2 "mask64_2_operand" "")))
+   (clobber (match_scratch:CC 3 ""))]
+  "TARGET_POWERPC64
+    && (fixed_regs[CR0_REGNO] || !logical_operand (operands[2], DImode))
+    && !mask_operand (operands[2], DImode)
+    && !mask64_operand (operands[2], DImode)"
+  [(set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 1)
+			   (match_dup 4))
+		(match_dup 5)))
+   (set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 0)
+			   (match_dup 6))
+		(match_dup 7)))]
+{
+  build_mask64_2_operands (operands[2], &operands[4]);
+})
+
+(define_insn "*anddi3_internal2_mc"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,x,x,?y,?y,?y,??y,??y,?y")
+	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r,r,r")
+			    (match_operand:DI 2 "and64_2_operand" "r,S,T,K,J,t,r,S,T,K,J,t"))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r,r,r,r,r,r,r,r,r"))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %3,%1,%2
+   rldic%B2. %3,%1,0,%S2
+   rlwinm. %3,%1,0,%m2,%M2
+   andi. %3,%1,%b2
+   andis. %3,%1,%u2
+   #
+   #
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,delayed_compare,fast_compare,\
+		     fast_compare,compare,compare,compare,compare,compare,\
+		     compare,compare")
+   (set_attr "length" "4,4,4,4,4,8,8,8,8,8,8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_operand" "")
+        (compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+                            (match_operand:DI 2 "mask64_2_operand" ""))
+                    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_64BIT && reload_completed
+    && (fixed_regs[CR0_REGNO] || !logical_operand (operands[2], DImode))
+    && !mask_operand (operands[2], DImode)
+    && !mask64_operand (operands[2], DImode)"
+  [(set (match_dup 3)
+	(and:DI (rotate:DI (match_dup 1)
+			   (match_dup 5))
+		(match_dup 6)))
+   (parallel [(set (match_dup 0)
+		   (compare:CC (and:DI (rotate:DI (match_dup 3)
+						  (match_dup 7))
+				       (match_dup 8))
+			       (const_int 0)))
+	      (clobber (match_dup 3))])]
+  "
+{
+  build_mask64_2_operands (operands[2], &operands[5]);
+}")
+
+(define_insn "*anddi3_internal3_mc"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,x,x,x,x,?y,?y,?y,??y,??y,?y")
+	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r,r,r")
+			    (match_operand:DI 2 "and64_2_operand" "r,S,T,K,J,t,r,S,T,K,J,t"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r,r,r,r,r,r,r")
+	(and:DI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:CC 4 "=X,X,X,X,X,X,X,X,X,x,x,X"))]
+  "TARGET_64BIT && rs6000_gen_cell_microcode"
+  "@
+   and. %0,%1,%2
+   rldic%B2. %0,%1,0,%S2
+   rlwinm. %0,%1,0,%m2,%M2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2
+   #
+   #
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "fast_compare,compare,delayed_compare,fast_compare,\
+		     fast_compare,compare,compare,compare,compare,compare,\
+		     compare,compare")
+   (set_attr "length" "4,4,4,4,4,8,8,8,8,8,8,12")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			    (match_operand:DI 2 "and64_2_operand" ""))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+		    (and:DI (match_dup 1) (match_dup 2)))
+	       (clobber (match_dup 4))])
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_operand" "")
+        (compare:CC (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
+                            (match_operand:DI 2 "mask64_2_operand" ""))
+                    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(and:DI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:CC 4 ""))]
+  "TARGET_64BIT && reload_completed
+    && (fixed_regs[CR0_REGNO] || !logical_operand (operands[2], DImode))
+    && !mask_operand (operands[2], DImode)
+    && !mask64_operand (operands[2], DImode)"
+  [(set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 1)
+			   (match_dup 5))
+		(match_dup 6)))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (and:DI (rotate:DI (match_dup 0)
+						  (match_dup 7))
+				       (match_dup 8))
+			       (const_int 0)))
+	      (set (match_dup 0)
+		   (and:DI (rotate:DI (match_dup 0)
+				      (match_dup 7))
+			   (match_dup 8)))])]
+  "
+{
+  build_mask64_2_operands (operands[2], &operands[5]);
+}")
+
+(define_expand "iordi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(ior:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		(match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
+  "TARGET_POWERPC64"
+  "
+{
+  if (non_logical_cint_operand (operands[2], DImode))
+    {
+      HOST_WIDE_INT value;
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (DImode));
+
+      if (GET_CODE (operands[2]) == CONST_INT)
+        {
+          value = INTVAL (operands[2]);
+	  emit_insn (gen_iordi3 (tmp, operands[1],
+				 GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+	}
+      else
+        {
+	  value = CONST_DOUBLE_LOW (operands[2]);
+	  emit_insn (gen_iordi3 (tmp, operands[1],
+				 immed_double_const (value
+						     & (~ (HOST_WIDE_INT) 0xffff),
+						     0, DImode)));
+	}
+
+      emit_insn (gen_iordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+}")
+
+(define_expand "xordi3"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(xor:DI (match_operand:DI 1 "gpc_reg_operand" "")
+		(match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
+  "TARGET_POWERPC64"
+  "
+{
+  if (non_logical_cint_operand (operands[2], DImode))
+    {
+      HOST_WIDE_INT value;
+      rtx tmp = ((!can_create_pseudo_p ()
+		  || rtx_equal_p (operands[0], operands[1]))
+		 ? operands[0] : gen_reg_rtx (DImode));
+
+      if (GET_CODE (operands[2]) == CONST_INT)
+        {
+          value = INTVAL (operands[2]);
+	  emit_insn (gen_xordi3 (tmp, operands[1],
+				 GEN_INT (value & (~ (HOST_WIDE_INT) 0xffff))));
+	}
+      else
+        {
+	  value = CONST_DOUBLE_LOW (operands[2]);
+	  emit_insn (gen_xordi3 (tmp, operands[1],
+				 immed_double_const (value
+						     & (~ (HOST_WIDE_INT) 0xffff),
+						     0, DImode)));
+	}
+
+      emit_insn (gen_xordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
+      DONE;
+    }
+}")
+
+(define_insn "*booldi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
+	(match_operator:DI 3 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "%r,r,r")
+	  (match_operand:DI 2 "logical_operand" "r,K,JF")]))]
+  "TARGET_POWERPC64"
+  "@
+   %q3 %0,%1,%2
+   %q3i %0,%1,%b2
+   %q3is %0,%1,%u2")
+
+(define_insn "*booldi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "")
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*booldi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "%r,r")
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_64BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "")
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+;; Split a logical operation that we can't do in one insn into two insns,
+;; each of which does one 16-bit part.  This is used by combine.
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operator:DI 3 "boolean_or_operator"
+	 [(match_operand:DI 1 "gpc_reg_operand" "")
+	  (match_operand:DI 2 "non_logical_cint_operand" "")]))]
+  "TARGET_POWERPC64"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 0) (match_dup 5))]
+"
+{
+  rtx i3,i4;
+
+  if (GET_CODE (operands[2]) == CONST_DOUBLE)
+    {
+      HOST_WIDE_INT value = CONST_DOUBLE_LOW (operands[2]);
+      i3 = immed_double_const (value & (~ (HOST_WIDE_INT) 0xffff),
+					0, DImode);
+      i4 = GEN_INT (value & 0xffff);
+    }
+  else
+    {
+      i3 = GEN_INT (INTVAL (operands[2])
+			     & (~ (HOST_WIDE_INT) 0xffff));
+      i4 = GEN_INT (INTVAL (operands[2]) & 0xffff);
+    }
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+				operands[1], i3);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+				operands[0], i4);
+}")
+
+(define_insn "*boolcdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 3 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	  (match_operand:DI 2 "gpc_reg_operand" "r")]))]
+  "TARGET_POWERPC64"
+  "%q3 %0,%2,%1")
+
+(define_insn "*boolcdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r,r"))
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   %q4. %3,%2,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolcdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r"))
+	  (match_operand:DI 2 "gpc_reg_operand" "r,r")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_64BIT"
+  "@
+   %q4. %0,%2,%1
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (match_operand:DI 2 "gpc_reg_operand" "")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccdi3_internal1"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 3 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r"))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" "r"))]))]
+  "TARGET_POWERPC64"
+  "%q3 %0,%1,%2")
+
+(define_insn "*boolccdi3_internal2"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "r,r"))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r"))]
+  "TARGET_64BIT"
+  "@
+   %q4. %3,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*boolccdi3_internal3"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r"))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" "r,r"))])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(match_dup 4))]
+  "TARGET_64BIT"
+  "@
+   %q4. %0,%1,%2
+   #"
+  [(set_attr "type" "fast_compare,compare")
+   (set_attr "length" "4,8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operator:DI 4 "boolean_operator"
+	 [(not:DI (match_operand:DI 1 "gpc_reg_operand" ""))
+	  (not:DI (match_operand:DI 2 "gpc_reg_operand" ""))])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_dup 4))]
+  "TARGET_POWERPC64 && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_expand "smindi3"
+  [(match_operand:DI 0 "gpc_reg_operand" "")
+   (match_operand:DI 1 "gpc_reg_operand" "")
+   (match_operand:DI 2 "gpc_reg_operand" "")]
+  "TARGET_ISEL64"
+  "
+{
+  rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "smaxdi3"
+  [(match_operand:DI 0 "gpc_reg_operand" "")
+   (match_operand:DI 1 "gpc_reg_operand" "")
+   (match_operand:DI 2 "gpc_reg_operand" "")]
+  "TARGET_ISEL64"
+  "
+{
+  rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "umindi3"
+  [(match_operand:DI 0 "gpc_reg_operand" "")
+   (match_operand:DI 1 "gpc_reg_operand" "")
+   (match_operand:DI 2 "gpc_reg_operand" "")]
+  "TARGET_ISEL64"
+  "
+{
+  rs6000_emit_minmax (operands[0], UMIN, operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "umaxdi3"
+  [(match_operand:DI 0 "gpc_reg_operand" "")
+   (match_operand:DI 1 "gpc_reg_operand" "")
+   (match_operand:DI 2 "gpc_reg_operand" "")]
+  "TARGET_ISEL64"
+  "
+{
+  rs6000_emit_minmax (operands[0], UMAX, operands[1], operands[2]);
+  DONE;
+}")
+
+
+;; Now define ways of moving data around.
+
+;; Set up a register with a value from the GOT table
+
+(define_expand "movsi_got"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unspec:SI [(match_operand:SI 1 "got_operand" "")
+		    (match_dup 2)] UNSPEC_MOVSI_GOT))]
+  "DEFAULT_ABI == ABI_V4 && flag_pic == 1"
+  "
+{
+  if (GET_CODE (operands[1]) == CONST)
+    {
+      rtx offset = const0_rtx;
+      HOST_WIDE_INT value;
+
+      operands[1] = eliminate_constant_term (XEXP (operands[1], 0), &offset);
+      value = INTVAL (offset);
+      if (value != 0)
+	{
+	  rtx tmp = (!can_create_pseudo_p ()
+		     ? operands[0]
+		     : gen_reg_rtx (Pmode));
+	  emit_insn (gen_movsi_got (tmp, operands[1]));
+	  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+	  DONE;
+	}
+    }
+
+  operands[2] = rs6000_got_register (operands[1]);
+}")
+
+(define_insn "*movsi_got_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "got_no_const_operand" "")
+		    (match_operand:SI 2 "gpc_reg_operand" "b")]
+		   UNSPEC_MOVSI_GOT))]
+  "DEFAULT_ABI == ABI_V4 && flag_pic == 1"
+  "{l|lwz} %0,%a1@got(%2)"
+  [(set_attr "type" "load")])
+
+;; Used by sched, shorten_branches and final when the GOT pseudo reg
+;; didn't get allocated to a hard register.
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(unspec:SI [(match_operand:SI 1 "got_no_const_operand" "")
+		    (match_operand:SI 2 "memory_operand" "")]
+		   UNSPEC_MOVSI_GOT))]
+  "DEFAULT_ABI == ABI_V4
+    && flag_pic == 1
+    && (reload_in_progress || reload_completed)"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (unspec:SI [(match_dup 1)(match_dup 0)]
+				 UNSPEC_MOVSI_GOT))]
+  "")
+
+;; For SI, we special-case integers that can't be loaded in one insn.  We
+;; do the load 16-bits at a time.  We could do this by loading from memory,
+;; and this is even supposed to be faster, but it is simpler not to get
+;; integers in the TOC.
+(define_insn "movsi_low"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+                           (match_operand 2 "" ""))))]
+  "TARGET_MACHO && ! TARGET_64BIT"
+  "{l|lwz} %0,lo16(%2)(%1)"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+(define_insn "*movsi_internal1"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,r,r,m,r,r,r,r,r,*q,*c*l,*h,*h")
+	(match_operand:SI 1 "input_operand" "r,U,m,r,I,L,n,R,*h,r,r,r,0"))]
+  "!TARGET_SINGLE_FPU &&
+   (gpc_reg_operand (operands[0], SImode) || gpc_reg_operand (operands[1], SImode))"
+  "@
+   mr %0,%1
+   {cal|la} %0,%a1
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0
+   {lil|li} %0,%1
+   {liu|lis} %0,%v1
+   #
+   {cal|la} %0,%a1
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   mt%0 %1
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "*,*,load,store,*,*,*,*,mfjmpr,*,mtjmpr,*,*")
+   (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")])
+
+(define_insn "*movsi_internal1_single"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "=r,r,r,m,r,r,r,r,r,*q,*c*l,*h,*h,m,*f")
+        (match_operand:SI 1 "input_operand" "r,U,m,r,I,L,n,R,*h,r,r,r,0,f,m"))]
+  "TARGET_SINGLE_FPU &&
+   (gpc_reg_operand (operands[0], SImode) || gpc_reg_operand (operands[1], SImode))"
+  "@
+   mr %0,%1
+   {cal|la} %0,%a1
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0
+   {lil|li} %0,%1
+   {liu|lis} %0,%v1
+   #
+   {cal|la} %0,%a1
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   mt%0 %1
+   {cror 0,0,0|nop}
+   stfs%U0%X0 %1, %0
+   lfs%U1%X1 %0, %1"
+  [(set_attr "type" "*,*,load,store,*,*,*,*,mfjmpr,*,mtjmpr,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4,4,4")])
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(match_operand:SI 1 "const_int_operand" ""))]
+  "(unsigned HOST_WIDE_INT) (INTVAL (operands[1]) + 0x8000) >= 0x10000
+   && (INTVAL (operands[1]) & 0xffff) != 0"
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(ior:SI (match_dup 0)
+		(match_dup 3)))]
+  "
+{ rtx tem = rs6000_emit_set_const (operands[0], SImode, operands[1], 2);
+
+  if (tem == operands[0])
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_insn "*mov<mode>_internal2"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=y,x,?y")
+	(compare:CC (match_operand:P 1 "gpc_reg_operand" "0,r,r")
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r") (match_dup 1))]
+  ""
+  "@
+   {cmpi|cmp<wd>i} %2,%0,0
+   mr. %0,%1
+   #"
+  [(set_attr "type" "cmp,compare,cmp")
+   (set_attr "length" "4,4,8")])
+
+(define_split
+  [(set (match_operand:CC 2 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (match_operand:P 1 "gpc_reg_operand" "")
+		    (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "") (match_dup 1))]
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r,r,*q,*c*l,*h")
+	(match_operand:HI 1 "input_operand" "r,m,r,i,*h,r,r,0"))]
+  "gpc_reg_operand (operands[0], HImode)
+   || gpc_reg_operand (operands[1], HImode)"
+  "@
+   mr %0,%1
+   lhz%U1%X1 %0,%1
+   sth%U0%X0 %1,%0
+   {lil|li} %0,%w1
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "*,load,store,*,mfjmpr,*,mtjmpr,*")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:INT 0 "general_operand" "")
+	(match_operand:INT 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m,r,r,*q,*c*l,*h")
+	(match_operand:QI 1 "input_operand" "r,m,r,i,*h,r,r,0"))]
+  "gpc_reg_operand (operands[0], QImode)
+   || gpc_reg_operand (operands[1], QImode)"
+  "@
+   mr %0,%1
+   lbz%U1%X1 %0,%1
+   stb%U0%X0 %1,%0
+   {lil|li} %0,%1
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "*,load,store,*,mfjmpr,*,mtjmpr,*")])
+
+;; Here is how to move condition codes around.  When we store CC data in
+;; an integer register or memory, we store just the high-order 4 bits.
+;; This lets us not shift in the most common case of CR0.
+(define_expand "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "")
+	(match_operand:CC 1 "nonimmediate_operand" ""))]
+  ""
+  "")
+
+(define_insn "*movcc_internal1"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "=y,x,?y,y,r,r,r,r,r,q,cl,r,m")
+	(match_operand:CC 1 "general_operand" "y,r,r,O,x,y,r,I,h,r,r,m,r"))]
+  "register_operand (operands[0], CCmode)
+   || register_operand (operands[1], CCmode)"
+  "@
+   mcrf %0,%1
+   mtcrf 128,%1
+   {rlinm|rlwinm} %1,%1,%F0,0xffffffff\;mtcrf %R0,%1\;{rlinm|rlwinm} %1,%1,%f0,0xffffffff
+   crxor %0,%0,%0
+   mfcr %0%Q1
+   mfcr %0%Q1\;{rlinm|rlwinm} %0,%0,%f1,0xf0000000
+   mr %0,%1
+   {lil|li} %0,%1
+   mf%1 %0
+   mt%0 %1
+   mt%0 %1
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%U1|stw%U0%U1} %1,%0"
+  [(set (attr "type")
+     (cond [(eq_attr "alternative" "0,3")
+		(const_string "cr_logical")
+	    (eq_attr "alternative" "1,2")
+		(const_string "mtcr")
+	    (eq_attr "alternative" "6,7,9")
+		(const_string "integer")
+	    (eq_attr "alternative" "8")
+		(const_string "mfjmpr")
+	    (eq_attr "alternative" "10")
+		(const_string "mtjmpr")
+	    (eq_attr "alternative" "11")
+		(const_string "load")
+	    (eq_attr "alternative" "12")
+		(const_string "store")
+	    (ne (symbol_ref "TARGET_MFCRF") (const_int 0))
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "4,4,12,4,4,8,4,4,4,4,4,4,4")])
+
+;; For floating-point, we normally deal with the floating-point registers
+;; unless -msoft-float is used.  The sole exception is that parameter passing
+;; can produce floating-point values in fixed-point registers.  Unless the
+;; value is a simple constant or already in memory, we deal with this by
+;; allocating memory and copying the value explicitly via that memory location.
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(match_operand:SF 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
+
+(define_split
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(match_operand:SF 1 "const_double_operand" ""))]
+  "reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  long l;
+  REAL_VALUE_TYPE rv;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+
+  if (! TARGET_POWERPC64)
+    operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+  else
+    operands[2] = gen_lowpart (SImode, operands[0]);
+
+  operands[3] = gen_int_mode (l, SImode);
+}")
+
+(define_insn "*movsf_hardfloat"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,*q,!r,*h,!r,!r")
+	(match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,r,h,0,G,Fn"))]
+  "(gpc_reg_operand (operands[0], SFmode)
+   || gpc_reg_operand (operands[1], SFmode))
+   && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
+  "@
+   mr %0,%1
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0
+   fmr %0,%1
+   lfs%U1%X1 %0,%1
+   stfs%U0%X0 %1,%0
+   mt%0 %1
+   mt%0 %1
+   mf%1 %0
+   {cror 0,0,0|nop}
+   #
+   #"
+  [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,*,mfjmpr,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,8")])
+
+(define_insn "*movsf_softfloat"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,q,r,r,m,r,r,r,r,r,*h")
+	(match_operand:SF 1 "input_operand" "r,r,r,h,m,r,I,L,R,G,Fn,0"))]
+  "(gpc_reg_operand (operands[0], SFmode)
+   || gpc_reg_operand (operands[1], SFmode))
+   && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
+  "@
+   mr %0,%1
+   mt%0 %1
+   mt%0 %1
+   mf%1 %0
+   {l%U1%X1|lwz%U1%X1} %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0
+   {lil|li} %0,%1
+   {liu|lis} %0,%v1
+   {cal|la} %0,%a1
+   #
+   #
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "*,mtjmpr,*,mfjmpr,load,store,*,*,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,4")])
+
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(match_operand:DF 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], DFmode); DONE; }")
+
+(define_split
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operand:DF 1 "const_int_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 1))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+
+  operands[2] = operand_subword (operands[0], endian, 0, DFmode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
+#if HOST_BITS_PER_WIDE_INT == 32
+  operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
+#else
+  operands[4] = GEN_INT (value >> 32);
+  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
+#endif
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operand:DF 1 "const_double_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  long l[2];
+  REAL_VALUE_TYPE rv;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+
+  operands[2] = operand_subword (operands[0], endian, 0, DFmode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
+  operands[4] = gen_int_mode (l[endian], SImode);
+  operands[5] = gen_int_mode (l[1 - endian], SImode);
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operand:DF 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64 && reload_completed
+   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
+  [(set (match_dup 2) (match_dup 3))]
+  "
+{
+  int endian = (WORDS_BIG_ENDIAN == 0);
+  long l[2];
+  REAL_VALUE_TYPE rv;
+#if HOST_BITS_PER_WIDE_INT >= 64
+  HOST_WIDE_INT val;
+#endif
+
+  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+
+  operands[2] = gen_lowpart (DImode, operands[0]);
+  /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
+#if HOST_BITS_PER_WIDE_INT >= 64
+  val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
+         | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
+
+  operands[3] = gen_int_mode (val, DImode);
+#else
+  operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
+#endif
+}")
+
+;; Don't have reload use general registers to load a constant.  First,
+;; it might not work if the output operand is the equivalent of
+;; a non-offsettable memref, but also it is less efficient than loading
+;; the constant into an FP register, since it will probably be used there.
+;; The "??" is a kludge until we can figure out a more reasonable way
+;; of handling these non-offsettable values.
+(define_insn "*movdf_hardfloat32"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=!r,??r,m,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,!r,!r,!r")
+	(match_operand:DF 1 "input_operand" "r,m,r,ws,wa,Z,Z,ws,wa,d,m,d,j,G,H,F"))]
+  "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && (gpc_reg_operand (operands[0], DFmode)
+       || gpc_reg_operand (operands[1], DFmode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      gcc_unreachable ();
+    case 0:
+    case 1:
+    case 2:
+      return \"#\";
+    case 3:
+    case 4:
+      return \"xxlor %x0,%x1,%x1\";
+    case 5:
+    case 6:
+      return \"lxsd%U1x %x0,%y1\";
+    case 7:
+    case 8:
+      return \"stxsd%U0x %x1,%y0\";
+    case 9:
+      return \"fmr %0,%1\";
+    case 10:
+      return \"lfd%U1%X1 %0,%1\";
+    case 11:
+      return \"stfd%U0%X0 %1,%0\";
+    case 12:
+      return \"xxlxor %x0,%x0,%x0\";
+    case 13:
+    case 14:
+    case 15:
+      return \"#\";
+    }
+}"
+  [(set_attr "type" "two,load,store,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,*,*,*")
+   (set_attr "length" "8,16,16,4,4,4,4,4,4,4,4,4,4,8,12,16")])
+
+(define_insn "*movdf_softfloat32"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m,r,r,r")
+	(match_operand:DF 1 "input_operand" "r,m,r,G,H,F"))]
+  "! TARGET_POWERPC64 
+   && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) 
+       || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
+   && (gpc_reg_operand (operands[0], DFmode)
+       || gpc_reg_operand (operands[1], DFmode))"
+  "#"
+  [(set_attr "type" "two,load,store,*,*,*")
+   (set_attr "length" "8,8,8,8,12,16")])
+
+; ld/std require word-aligned displacements -> 'Y' constraint.
+; List Y->r and r->Y before r->r for reload.
+(define_insn "*movdf_hardfloat64_mfpgpr"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r,r,d")
+	(match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
+  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_DOUBLE_FLOAT
+   && (gpc_reg_operand (operands[0], DFmode)
+       || gpc_reg_operand (operands[1], DFmode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   xxlor %x0,%x1,%x1
+   xxlor %x0,%x1,%x1
+   lxsd%U1x %x0,%y1
+   lxsd%U1x %x0,%y1
+   stxsd%U0x %x1,%y0
+   stxsd%U0x %x1,%y0
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   xxlxor %x0,%x0,%x0
+   mt%0 %1
+   mf%1 %0
+   {cror 0,0,0|nop}
+   #
+   #
+   #
+   mftgpr %0,%1
+   mffgpr %0,%1"
+  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+
+; ld/std require word-aligned displacements -> 'Y' constraint.
+; List Y->r and r->Y before r->r for reload.
+(define_insn "*movdf_hardfloat64"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,d,d,m,wa,*c*l,!r,*h,!r,!r,!r")
+	(match_operand:DF 1 "input_operand" "r,Y,r,ws,wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F"))]
+  "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
+   && TARGET_DOUBLE_FLOAT
+   && (gpc_reg_operand (operands[0], DFmode)
+       || gpc_reg_operand (operands[1], DFmode))"
+  "@
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
+   xxlor %x0,%x1,%x1
+   xxlor %x0,%x1,%x1
+   lxsd%U1x %x0,%y1
+   lxsd%U1x %x0,%y1
+   stxsd%U0x %x1,%y0
+   stxsd%U0x %x1,%y0
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   xxlxor %x0,%x0,%x0
+   mt%0 %1
+   mf%1 %0
+   {cror 0,0,0|nop}
+   #
+   #
+   #"
+  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
+
+(define_insn "*movdf_softfloat64"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
+	(match_operand:DF 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
+  "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], DFmode)
+       || gpc_reg_operand (operands[1], DFmode))"
+  "@
+   ld%U1%X1 %0,%1
+   std%U0%X0 %1,%0
+   mr %0,%1
+   mt%0 %1
+   mf%1 %0
+   #
+   #
+   #
+   {cror 0,0,0|nop}"
+  [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,8,12,16,4")])
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "general_operand" "")
+	(match_operand:TF 1 "any_operand" ""))]
+  "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+  "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
+
+; It's important to list the o->f and f->o moves before f->f because
+; otherwise reload, given m->f, will try to pick f->f and reload it,
+; which doesn't make progress.  Likewise r->Y must be before r->r.
+(define_insn_and_split "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=o,d,d,r,Y,r")
+	(match_operand:TF 1 "input_operand"         "d,o,d,YGHF,r,r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
+   && (gpc_reg_operand (operands[0], TFmode)
+       || gpc_reg_operand (operands[1], TFmode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,20,20,16")])
+
+(define_insn_and_split "*movtf_softfloat"
+  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=r,Y,r")
+	(match_operand:TF 1 "input_operand"         "YGHF,r,r"))]
+  "!TARGET_IEEEQUAD
+   && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
+   && (gpc_reg_operand (operands[0], TFmode)
+       || gpc_reg_operand (operands[1], TFmode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "20,20,16")])
+
+(define_expand "extenddftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF (match_operand:DF 1 "input_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  if (TARGET_E500_DOUBLE)
+    emit_insn (gen_spe_extenddftf2 (operands[0], operands[1]));
+  else
+    emit_insn (gen_extenddftf2_fprs (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "extenddftf2_fprs"
+  [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "")
+		   (float_extend:TF (match_operand:DF 1 "input_operand" "")))
+	      (use (match_dup 2))])]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = CONST0_RTX (DFmode);
+  /* Generate GOT reference early for SVR4 PIC.  */
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    operands[2] = validize_mem (force_const_mem (DFmode, operands[2]));
+})
+
+(define_insn_and_split "*extenddftf2_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=o,d,&d,r")
+       (float_extend:TF (match_operand:DF 1 "input_operand" "dr,md,md,rmGHF")))
+   (use (match_operand:DF 2 "zero_reg_mem_operand" "rd,m,d,n"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word),
+		  operands[1]);
+  emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word),
+		  operands[2]);
+  DONE;
+})
+
+(define_expand "extendsftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "trunctfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+  "")
+
+(define_insn_and_split "trunctfdf2_internal1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "0,d")))]
+  "!TARGET_IEEEQUAD && !TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "@
+   #
+   fmr %0,%1"
+  "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr "type" "fp")])
+
+(define_insn "trunctfdf2_internal2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "d")))]
+  "!TARGET_IEEEQUAD && TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "fadd %0,%1,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_d")])
+
+(define_expand "trunctfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  if (TARGET_E500_DOUBLE)
+    emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1]));
+  else
+    emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn_and_split "trunctfsf2_fprs"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "d")))
+   (clobber (match_scratch:DF 2 "=d"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_dup 1)))
+   (set (match_dup 0)
+	(float_truncate:SF (match_dup 2)))]
+  "")
+
+(define_expand "floatsitf2"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+        (float:TF (match_operand:SI 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  expand_float (tmp, operands[1], false);
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+; fadd, but rounding towards zero.
+; This is probably not the optimal code sequence.
+(define_insn "fix_trunc_helper"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(unspec:DF [(match_operand:TF 1 "gpc_reg_operand" "d")]
+		   UNSPEC_FIX_TRUNC_TF))
+   (clobber (match_operand:DF 2 "gpc_reg_operand" "=&d"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "mffs %2\n\tmtfsb1 31\n\tmtfsb0 30\n\tfadd %0,%1,%L1\n\tmtfsf 1,%2"
+  [(set_attr "type" "fp")
+   (set_attr "length" "20")])
+
+(define_expand "fix_trunctfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && (TARGET_POWER2 || TARGET_POWERPC)
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+{
+  if (TARGET_E500_DOUBLE)
+    emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1]));
+  else
+    emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "fix_trunctfsi2_fprs"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))
+	      (clobber (match_dup 2))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))
+	      (clobber (match_dup 5))])]
+  "!TARGET_IEEEQUAD
+   && (TARGET_POWER2 || TARGET_POWERPC)
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = assign_stack_temp (DImode, GET_MODE_SIZE (DImode), 0);
+})
+
+(define_insn_and_split "*fix_trunctfsi2_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (fix:SI (match_operand:TF 1 "gpc_reg_operand" "d")))
+   (clobber (match_operand:DF 2 "gpc_reg_operand" "=d"))
+   (clobber (match_operand:DF 3 "gpc_reg_operand" "=&d"))
+   (clobber (match_operand:DI 4 "gpc_reg_operand" "=d"))
+   (clobber (match_operand:DI 5 "offsettable_mem_operand" "=o"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "#"
+  ""
+  [(pc)]
+{
+  rtx lowword;
+  emit_insn (gen_fix_trunc_helper (operands[2], operands[1], operands[3]));
+
+  gcc_assert (MEM_P (operands[5]));
+  lowword = adjust_address (operands[5], SImode, WORDS_BIG_ENDIAN ? 4 : 0);
+
+  emit_insn (gen_fctiwz_df (operands[4], operands[2]));
+  emit_move_insn (operands[5], operands[4]);
+  emit_move_insn (operands[0], lowword);
+  DONE;
+})
+
+(define_expand "negtf2"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+  "")
+
+(define_insn "negtf2_internal"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=d")
+	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+  "*
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    return \"fneg %L0,%L1\;fneg %0,%1\";
+  else
+    return \"fneg %0,%1\;fneg %L0,%L1\";
+}"
+  [(set_attr "type" "fp")
+   (set_attr "length" "8")])
+
+(define_expand "abstf2"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+	(abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT
+   && (TARGET_FPRS || TARGET_E500_DOUBLE)
+   && TARGET_LONG_DOUBLE_128"
+  "
+{
+  rtx label = gen_label_rtx ();
+  if (TARGET_E500_DOUBLE)
+    {
+      if (flag_finite_math_only && !flag_trapping_math)
+	emit_insn (gen_spe_abstf2_tst (operands[0], operands[1], label));
+      else
+	emit_insn (gen_spe_abstf2_cmp (operands[0], operands[1], label));
+    }
+  else
+    emit_insn (gen_abstf2_internal (operands[0], operands[1], label));
+  emit_label (label);
+  DONE;
+}")
+
+(define_expand "abstf2_internal"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "")
+	(match_operand:TF 1 "gpc_reg_operand" ""))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 5) (abs:DF (match_dup 5)))
+   (set (match_dup 4) (compare:CCFP (match_dup 3) (match_dup 5)))
+   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))
+   (set (match_dup 6) (neg:DF (match_dup 6)))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && TARGET_LONG_DOUBLE_128"
+  "
+{
+  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (CCFPmode);
+  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
+}")
+
+;; Next come the multi-word integer load and store and the load and store
+;; multiple insns.
+
+; List r->r after r->"o<>", otherwise reload will try to reload a
+; non-offsettable address by using r->r which won't make progress.
+(define_insn "*movdi_internal32"
+  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=o<>,r,r,*d,*d,m,r,?wa")
+	(match_operand:DI 1 "input_operand" "r,r,m,d,m,d,IJKnGHF,O"))]
+  "! TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   #
+   #
+   #
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   #
+   xxlxor %x0,%x0,%x0"
+  [(set_attr "type" "load,*,store,fp,fpload,fpstore,*,vecsimple")])
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "const_int_operand" ""))]
+  "! TARGET_POWERPC64 && reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 1))]
+  "
+{
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+  operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
+				       DImode);
+  operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
+				       DImode);
+#if HOST_BITS_PER_WIDE_INT == 32
+  operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
+#else
+  operands[4] = GEN_INT (value >> 32);
+  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
+#endif
+}")
+
+(define_split
+  [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
+        (match_operand:DIFD 1 "input_operand" ""))]
+  "reload_completed && !TARGET_POWERPC64
+   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
+
+(define_insn "*movdi_mfpgpr"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,r,*d,*d,m,r,*h,*h,r,*d")
+	(match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,R,d,m,d,*h,r,0,*d,r"))]
+  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   mr %0,%1
+   ld%U1%X1 %0,%1
+   std%U0%X0 %1,%0
+   li %0,%1
+   lis %0,%v1
+   #
+   {cal|la} %0,%a1
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   mf%1 %0
+   mt%0 %1
+   {cror 0,0,0|nop}
+   mftgpr %0,%1
+   mffgpr %0,%1"
+  [(set_attr "type" "*,load,store,*,*,*,*,fp,fpload,fpstore,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4")])
+
+(define_insn "*movdi_internal64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,r,*d,*d,m,r,*h,*h,?wa")
+	(match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,R,d,m,d,*h,r,0,O"))]
+  "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], DImode)
+       || gpc_reg_operand (operands[1], DImode))"
+  "@
+   mr %0,%1
+   ld%U1%X1 %0,%1
+   std%U0%X0 %1,%0
+   li %0,%1
+   lis %0,%v1
+   #
+   {cal|la} %0,%a1
+   fmr %0,%1
+   lfd%U1%X1 %0,%1
+   stfd%U0%X0 %1,%0
+   mf%1 %0
+   mt%0 %1
+   {cror 0,0,0|nop}
+   xxlxor %x0,%x0,%x0"
+  [(set_attr "type" "*,load,store,*,*,*,*,fp,fpload,fpstore,mfjmpr,mtjmpr,*,vecsimple")
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
+
+;; immediate value valid for a single instruction hiding in a const_double
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operand:DI 1 "const_double_operand" "F"))]
+  "HOST_BITS_PER_WIDE_INT == 32 && TARGET_POWERPC64
+   && GET_CODE (operands[1]) == CONST_DOUBLE
+   && num_insns_constant (operands[1], DImode) == 1"
+  "*
+{
+  return ((unsigned HOST_WIDE_INT)
+	  (CONST_DOUBLE_LOW (operands[1]) + 0x8000) < 0x10000)
+	 ? \"li %0,%1\" : \"lis %0,%v1\";
+}")
+
+;; Generate all one-bits and clear left or right.
+;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "mask64_operand" ""))]
+  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
+  [(set (match_dup 0) (const_int -1))
+   (set (match_dup 0)
+	(and:DI (rotate:DI (match_dup 0)
+			   (const_int 0))
+		(match_dup 1)))]
+  "")
+
+;; Split a load of a large constant into the appropriate five-instruction
+;; sequence.  Handle anything in a constant number of insns.
+;; When non-easy constants can go in the TOC, this should use
+;; easy_fp_constant predicate.
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "const_int_operand" ""))]
+  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
+  "
+{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
+
+  if (tem == operands[0])
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "gpc_reg_operand" "")
+	(match_operand:DI 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64 && num_insns_constant (operands[1], DImode) > 1"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 3)))]
+  "
+{ rtx tem = rs6000_emit_set_const (operands[0], DImode, operands[1], 5);
+
+  if (tem == operands[0])
+    DONE;
+  else
+    FAIL;
+}")
+
+;; TImode is similar, except that we usually want to compute the address into
+;; a register and use lsi/stsi (the exception is during reload).  MQ is also
+;; clobbered in stsi for POWER, so we need a SCRATCH for it.
+
+;; We say that MQ is clobbered in the last alternative because the first
+;; alternative would never get used otherwise since it would need a reload
+;; while the 2nd alternative would not.  We put memory cases first so they
+;; are preferred.  Otherwise, we'd try to reload the output instead of
+;; giving the SCRATCH mq.
+
+(define_insn "*movti_power"
+  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,m,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand" "r,r,r,Q,m,n"))
+   (clobber (match_scratch:SI 2 "=q,q#X,X,X,X,X"))]
+  "TARGET_POWER && ! TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      gcc_unreachable ();
+
+    case 0:
+      if (TARGET_STRING)
+        return \"{stsi|stswi} %1,%P0,16\";
+    case 1:
+    case 2:
+      return \"#\";
+    case 3:
+      /* If the address is not used in the output, we can use lsi.  Otherwise,
+	 fall through to generating four loads.  */
+      if (TARGET_STRING
+	  && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+	return \"{lsi|lswi} %0,%P1,16\";
+      /* ... fall through ...  */
+    case 4:
+    case 5:
+      return \"#\";
+    }
+}"
+  [(set_attr "type" "store,store,*,load,load,*")])
+
+(define_insn "*movti_string"
+  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,o<>,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand" "r,r,r,Q,m,n"))]
+  "! TARGET_POWER && ! TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      gcc_unreachable ();
+    case 0:
+      if (TARGET_STRING)
+        return \"{stsi|stswi} %1,%P0,16\";
+    case 1:
+    case 2:
+      return \"#\";
+    case 3:
+      /* If the address is not used in the output, we can use lsi.  Otherwise,
+	 fall through to generating four loads.  */
+      if (TARGET_STRING
+          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+	return \"{lsi|lswi} %0,%P1,16\";
+      /* ... fall through ...  */
+    case 4:
+    case 5:
+      return \"#\";
+    }
+}"
+  [(set_attr "type" "store_ux,store_ux,*,load_ux,load_ux,*")
+   (set (attr "cell_micro") (if_then_else (eq (symbol_ref "TARGET_STRING") (const_int 1))
+   			                  (const_string "always")
+					  (const_string "conditional")))])
+
+(define_insn "*movti_ppc64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o<>,r")
+	(match_operand:TI 1 "input_operand" "r,r,m"))]
+  "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
+    || gpc_reg_operand (operands[1], TImode)))
+   && VECTOR_MEM_NONE_P (TImode)"
+  "#"
+  [(set_attr "type" "*,store,load")])
+
+(define_split
+  [(set (match_operand:TI 0 "gpc_reg_operand" "")
+	(match_operand:TI 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
+				       TImode);
+  operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
+				       TImode);
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
+      operands[5] = GEN_INT (CONST_DOUBLE_LOW (operands[1]));
+    }
+  else if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      operands[4] = GEN_INT (- (INTVAL (operands[1]) < 0));
+      operands[5] = operands[1];
+    }
+  else
+    FAIL;
+}")
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "input_operand" ""))]
+  "reload_completed && VECTOR_MEM_NONE_P (TImode)
+   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_STRING && !TARGET_POWERPC64"
+  "
+{
+  int regno;
+  int count;
+  rtx op1;
+  int i;
+
+  /* Support only loading a constant number of fixed-point registers from
+     memory and only bother with this if more than two; the machine
+     doesn't support more than eight.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 2
+      || INTVAL (operands[2]) > 8
+      || GET_CODE (operands[1]) != MEM
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) >= 32)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[0]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  op1 = replace_equiv_address (operands[1],
+			       force_reg (SImode, XEXP (operands[1], 0)));
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno + i),
+		     adjust_address_nv (op1, SImode, i * 4));
+}")
+
+(define_insn "*ldmsi8"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 24))))
+     (set (match_operand:SI 9 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 28))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 8"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi7"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 20))))
+     (set (match_operand:SI 8 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 24))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 7"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi6"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))
+     (set (match_operand:SI 7 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 20))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 6"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi5"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))
+     (set (match_operand:SI 6 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 16))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 5"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi4"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))
+     (set (match_operand:SI 5 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 4"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_insn "*ldmsi3"
+  [(match_parallel 0 "load_multiple_operation"
+    [(set (match_operand:SI 2 "gpc_reg_operand" "")
+          (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b")))
+     (set (match_operand:SI 3 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 4))))
+     (set (match_operand:SI 4 "gpc_reg_operand" "")
+          (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
+  "TARGET_STRING && XVECLEN (operands[0], 0) == 3"
+  "*
+{ return rs6000_output_load_multiple (operands); }"
+  [(set_attr "type" "load_ux")
+   (set_attr "length" "32")])
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
+			  (match_operand:SI 1 "" ""))
+		     (clobber (scratch:SI))
+		     (use (match_operand:SI 2 "" ""))])]
+  "TARGET_STRING && !TARGET_POWERPC64"
+  "
+{
+  int regno;
+  int count;
+  rtx to;
+  rtx op0;
+  int i;
+
+  /* Support only storing a constant number of fixed-point registers to
+     memory and only bother with this if more than two; the machine
+     doesn't support more than eight.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) <= 2
+      || INTVAL (operands[2]) > 8
+      || GET_CODE (operands[0]) != MEM
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) >= 32)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[1]);
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + 1));
+  to = force_reg (SImode, XEXP (operands[0], 0));
+  op0 = replace_equiv_address (operands[0], to);
+
+  XVECEXP (operands[3], 0, 0)
+    = gen_rtx_SET (VOIDmode, adjust_address_nv (op0, SImode, 0), operands[1]);
+  XVECEXP (operands[3], 0, 1) = gen_rtx_CLOBBER (VOIDmode,
+						 gen_rtx_SCRATCH (SImode));
+
+  for (i = 1; i < count; i++)
+    XVECEXP (operands[3], 0, i + 1)
+      = gen_rtx_SET (VOIDmode,
+		     adjust_address_nv (op0, SImode, i * 4),
+		     gen_rtx_REG (SImode, regno + i));
+}")
+
+(define_insn "*stmsi8"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 9 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
+	  (match_operand:SI 10 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && !TARGET_POWER && XVECLEN (operands[0], 0) == 9"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi7"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 9 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && !TARGET_POWER && XVECLEN (operands[0], 0) == 8"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi6"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && !TARGET_POWER && XVECLEN (operands[0], 0) == 7"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi5"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && !TARGET_POWER && XVECLEN (operands[0], 0) == 6"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi4"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && !TARGET_POWER && XVECLEN (operands[0], 0) == 5"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi3"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=X"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && !TARGET_POWER && XVECLEN (operands[0], 0) == 4"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi8_power"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=q"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 9 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 28)))
+	  (match_operand:SI 10 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && TARGET_POWER && XVECLEN (operands[0], 0) == 9"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi7_power"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=q"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 24)))
+	  (match_operand:SI 9 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && TARGET_POWER && XVECLEN (operands[0], 0) == 8"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi6_power"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=q"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 20)))
+	  (match_operand:SI 8 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && TARGET_POWER && XVECLEN (operands[0], 0) == 7"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi5_power"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=q"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 16)))
+	  (match_operand:SI 7 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && TARGET_POWER && XVECLEN (operands[0], 0) == 6"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi4_power"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=q"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
+	  (match_operand:SI 6 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && TARGET_POWER && XVECLEN (operands[0], 0) == 5"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*stmsi3_power"
+  [(match_parallel 0 "store_multiple_operation"
+    [(set (mem:SI (match_operand:SI 1 "gpc_reg_operand" "b"))
+	  (match_operand:SI 2 "gpc_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=q"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
+	  (match_operand:SI 4 "gpc_reg_operand" "r"))
+     (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
+	  (match_operand:SI 5 "gpc_reg_operand" "r"))])]
+  "TARGET_STRING && TARGET_POWER && XVECLEN (operands[0], 0) == 4"
+  "{stsi|stswi} %2,%1,%O0"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_expand "setmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand 2 "const_int_operand" ""))
+	      (use (match_operand:SI 1 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; String/block move insn.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+{
+  if (expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; Move up to 32 bytes at a time.  The fixed registers are needed because the
+;; register allocator doesn't have a clue about allocating 8 word registers.
+;; rD/rS = r5 is preferred, efficient form.
+(define_expand "movmemsi_8reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI  5))
+	      (clobber (reg:SI  6))
+	      (clobber (reg:SI  7))
+	      (clobber (reg:SI  8))
+	      (clobber (reg:SI  9))
+	      (clobber (reg:SI 10))
+	      (clobber (reg:SI 11))
+	      (clobber (reg:SI 12))
+	      (clobber (match_scratch:SI 4 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI  6))
+   (clobber (reg:SI  7))
+   (clobber (reg:SI  8))
+   (clobber (reg:SI  9))
+   (clobber (reg:SI 10))
+   (clobber (reg:SI 11))
+   (clobber (reg:SI 12))
+   (clobber (match_scratch:SI 5 "=q"))]
+  "TARGET_STRING && TARGET_POWER
+   && ((INTVAL (operands[2]) > 24 && INTVAL (operands[2]) < 32)
+       || INTVAL (operands[2]) == 0)
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 12)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 12)
+   && REGNO (operands[4]) == 5"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI  6))
+   (clobber (reg:SI  7))
+   (clobber (reg:SI  8))
+   (clobber (reg:SI  9))
+   (clobber (reg:SI 10))
+   (clobber (reg:SI 11))
+   (clobber (reg:SI 12))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && ! TARGET_POWER
+   && ((INTVAL (operands[2]) > 24 && INTVAL (operands[2]) < 32)
+       || INTVAL (operands[2]) == 0)
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 12)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 12)
+   && REGNO (operands[4]) == 5"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 24 bytes at a time.  The fixed registers are needed because the
+;; register allocator doesn't have a clue about allocating 6 word registers.
+;; rD/rS = r5 is preferred, efficient form.
+(define_expand "movmemsi_6reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI  5))
+	      (clobber (reg:SI  6))
+	      (clobber (reg:SI  7))
+	      (clobber (reg:SI  8))
+	      (clobber (reg:SI  9))
+	      (clobber (reg:SI 10))
+	      (clobber (match_scratch:SI 4 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI  6))
+   (clobber (reg:SI  7))
+   (clobber (reg:SI  8))
+   (clobber (reg:SI  9))
+   (clobber (reg:SI 10))
+   (clobber (match_scratch:SI 5 "=q"))]
+  "TARGET_STRING && TARGET_POWER
+   && INTVAL (operands[2]) > 16 && INTVAL (operands[2]) <= 24
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 10)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 10)
+   && REGNO (operands[4]) == 5"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI  6))
+   (clobber (reg:SI  7))
+   (clobber (reg:SI  8))
+   (clobber (reg:SI  9))
+   (clobber (reg:SI 10))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && ! TARGET_POWER
+   && INTVAL (operands[2]) > 16 && INTVAL (operands[2]) <= 32
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 10)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 10)
+   && REGNO (operands[4]) == 5"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 16 bytes at a time, using 4 fixed registers to avoid spill
+;; problems with TImode.
+;; rD/rS = r5 is preferred, efficient form.
+(define_expand "movmemsi_4reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI 5))
+	      (clobber (reg:SI 6))
+	      (clobber (reg:SI 7))
+	      (clobber (reg:SI 8))
+	      (clobber (match_scratch:SI 4 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI 6))
+   (clobber (reg:SI 7))
+   (clobber (reg:SI 8))
+   (clobber (match_scratch:SI 5 "=q"))]
+  "TARGET_STRING && TARGET_POWER
+   && INTVAL (operands[2]) > 8 && INTVAL (operands[2]) <= 16
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 8)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 8)
+   && REGNO (operands[4]) == 5"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))
+   (clobber (reg:SI 6))
+   (clobber (reg:SI 7))
+   (clobber (reg:SI 8))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && ! TARGET_POWER
+   && INTVAL (operands[2]) > 8 && INTVAL (operands[2]) <= 16
+   && (REGNO (operands[0]) < 5 || REGNO (operands[0]) > 8)
+   && (REGNO (operands[1]) < 5 || REGNO (operands[1]) > 8)
+   && REGNO (operands[4]) == 5"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 8 bytes at a time.
+(define_expand "movmemsi_2reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (match_scratch:DI 4 ""))
+	      (clobber (match_scratch:SI 5 ""))])]
+  "TARGET_STRING && ! TARGET_POWERPC64"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_scratch:DI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=q"))]
+  "TARGET_STRING && TARGET_POWER && ! TARGET_POWERPC64
+   && INTVAL (operands[2]) > 4 && INTVAL (operands[2]) <= 8"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_scratch:DI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && ! TARGET_POWER && ! TARGET_POWERPC64
+   && INTVAL (operands[2]) > 4 && INTVAL (operands[2]) <= 8"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Move up to 4 bytes at a time.
+(define_expand "movmemsi_1reg"
+  [(parallel [(set (match_operand 0 "" "")
+		   (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (match_operand 3 "" ""))
+	      (clobber (match_scratch:SI 4 ""))
+	      (clobber (match_scratch:SI 5 ""))])]
+  "TARGET_STRING"
+  "")
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:SI 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:SI 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=q"))]
+  "TARGET_STRING && TARGET_POWER
+   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 4"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (mem:BLK (match_operand:P 0 "gpc_reg_operand" "b"))
+	(mem:BLK (match_operand:P 1 "gpc_reg_operand" "b")))
+   (use (match_operand:SI 2 "immediate_operand" "i"))
+   (use (match_operand:SI 3 "immediate_operand" "i"))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=X"))]
+  "TARGET_STRING && ! TARGET_POWER
+   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 4"
+  "{lsi|lswi} %4,%1,%2\;{stsi|stswi} %4,%0,%2"
+  [(set_attr "type" "store_ux")
+   (set_attr "cell_micro" "always")
+   (set_attr "length" "8")])
+
+;; Define insns that do load or store with update.  Some of these we can
+;; get by using pre-decrement or pre-increment, but the hardware can also
+;; do cases where the increment is not the size of the object.
+;;
+;; In all these cases, we use operands 0 and 1 for the register being
+;; incremented because those are the operands that local-alloc will
+;; tie and these are the pair most likely to be tieable (and the ones
+;; that will benefit the most).
+
+(define_insn "*movdi_update1"
+  [(set (match_operand:DI 3 "gpc_reg_operand" "=r,r")
+	(mem:DI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:DI 2 "reg_or_aligned_short_operand" "r,I"))))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=b,b")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (DImode)
+       || !gpc_reg_operand (operands[2], DImode))"
+  "@
+   ldux %3,%0,%2
+   ldu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "movdi_<mode>_update"
+  [(set (mem:DI (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0")
+			 (match_operand:P 2 "reg_or_aligned_short_operand" "r,I")))
+	(match_operand:DI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:P 0 "gpc_reg_operand" "=b,b")
+	(plus:P (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (Pmode)
+       || !gpc_reg_operand (operands[2], Pmode)
+       || (REG_P (operands[0])
+	   && REGNO (operands[0]) == STACK_POINTER_REGNUM))"
+  "@
+   stdux %3,%0,%2
+   stdu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+;; This pattern is only conditional on TARGET_POWERPC64, as it is
+;; needed for stack allocation, even if the user passes -mno-update.
+(define_insn "movdi_<mode>_update_stack"
+  [(set (mem:DI (plus:P (match_operand:P 1 "gpc_reg_operand" "0,0")
+			 (match_operand:P 2 "reg_or_aligned_short_operand" "r,I")))
+	(match_operand:DI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:P 0 "gpc_reg_operand" "=b,b")
+	(plus:P (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64"
+  "@
+   stdux %3,%0,%2
+   stdu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movsi_update1"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   {lux|lwzux} %3,%0,%2
+   {lu|lwzu} %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movsi_update2"
+  [(set (match_operand:DI 3 "gpc_reg_operand" "=r")
+	(sign_extend:DI
+	 (mem:SI (plus:DI (match_operand:DI 1 "gpc_reg_operand" "0")
+			  (match_operand:DI 2 "gpc_reg_operand" "r")))))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=b")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode
+   && !avoiding_indexed_address_p (DImode)"
+  "lwaux %3,%0,%2"
+  [(set_attr "type" "load_ext_ux")])
+
+(define_insn "movsi_update"
+  [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode)
+       || (REG_P (operands[0])
+	   && REGNO (operands[0]) == STACK_POINTER_REGNUM))"
+  "@
+   {stux|stwux} %3,%0,%2
+   {stu|stwu} %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+;; This is an unconditional pattern; needed for stack allocation, even
+;; if the user passes -mno-update.
+(define_insn "movsi_update_stack"
+  [(set (mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   {stux|stwux} %3,%0,%2
+   {stu|stwu} %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movhi_update1"
+  [(set (match_operand:HI 3 "gpc_reg_operand" "=r,r")
+	(mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lhzux %3,%0,%2
+   lhzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movhi_update2"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			  (match_operand:SI 2 "reg_or_short_operand" "r,I")))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lhzux %3,%0,%2
+   lhzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movhi_update3"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			  (match_operand:SI 2 "reg_or_short_operand" "r,I")))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE && rs6000_gen_cell_microcode
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lhaux %3,%0,%2
+   lhau %3,%2(%0)"
+  [(set_attr "type" "load_ext_ux,load_ext_u")])
+
+(define_insn "*movhi_update4"
+  [(set (mem:HI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:HI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   sthux %3,%0,%2
+   sthu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movqi_update1"
+  [(set (match_operand:QI 3 "gpc_reg_operand" "=r,r")
+	(mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lbzux %3,%0,%2
+   lbzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movqi_update2"
+  [(set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			  (match_operand:SI 2 "reg_or_short_operand" "r,I")))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lbzux %3,%0,%2
+   lbzu %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movqi_update3"
+  [(set (mem:QI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:QI 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stbux %3,%0,%2
+   stbu %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movsf_update1"
+  [(set (match_operand:SF 3 "gpc_reg_operand" "=f,f")
+	(mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lfsux %3,%0,%2
+   lfsu %3,%2(%0)"
+  [(set_attr "type" "fpload_ux,fpload_u")])
+
+(define_insn "*movsf_update2"
+  [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SF 3 "gpc_reg_operand" "f,f"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stfsux %3,%0,%2
+   stfsu %3,%2(%0)"
+  [(set_attr "type" "fpstore_ux,fpstore_u")])
+
+(define_insn "*movsf_update3"
+  [(set (match_operand:SF 3 "gpc_reg_operand" "=r,r")
+	(mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   {lux|lwzux} %3,%0,%2
+   {lu|lwzu} %3,%2(%0)"
+  [(set_attr "type" "load_ux,load_u")])
+
+(define_insn "*movsf_update4"
+  [(set (mem:SF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:SF 3 "gpc_reg_operand" "r,r"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   {stux|stwux} %3,%0,%2
+   {stu|stwu} %3,%2(%0)"
+  [(set_attr "type" "store_ux,store_u")])
+
+(define_insn "*movdf_update1"
+  [(set (match_operand:DF 3 "gpc_reg_operand" "=d,d")
+	(mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I"))))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   lfdux %3,%0,%2
+   lfdu %3,%2(%0)"
+  [(set_attr "type" "fpload_ux,fpload_u")])
+
+(define_insn "*movdf_update2"
+  [(set (mem:DF (plus:SI (match_operand:SI 1 "gpc_reg_operand" "0,0")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,I")))
+	(match_operand:DF 3 "gpc_reg_operand" "d,d"))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=b,b")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_UPDATE
+   && (!avoiding_indexed_address_p (SImode)
+       || !gpc_reg_operand (operands[2], SImode))"
+  "@
+   stfdux %3,%0,%2
+   stfdu %3,%2(%0)"
+  [(set_attr "type" "fpstore_ux,fpstore_u")])
+
+;; Peephole to convert two consecutive FP loads or stores into lfq/stfq.
+
+(define_insn "*lfq_power2"
+  [(set (match_operand:V2DF 0 "gpc_reg_operand" "=f")
+	(match_operand:V2DF 1 "memory_operand" ""))]
+  "TARGET_POWER2
+   && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "lfq%U1%X1 %0,%1")
+
+(define_peephole2
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))
+   (set (match_operand:DF 2 "gpc_reg_operand" "")
+	(match_operand:DF 3 "memory_operand" ""))]
+  "TARGET_POWER2
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && registers_ok_for_quad_peep (operands[0], operands[2])
+   && mems_ok_for_quad_peep (operands[1], operands[3])"
+  [(set (match_dup 0)
+	(match_dup 1))]
+  "operands[1] = widen_memory_access (operands[1], V2DFmode, 0);
+   operands[0] = gen_rtx_REG (V2DFmode, REGNO (operands[0]));")
+
+(define_insn "*stfq_power2"
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+	(match_operand:V2DF 1 "gpc_reg_operand" "f"))]
+  "TARGET_POWER2
+   && TARGET_HARD_FLOAT && TARGET_FPRS"
+  "stfq%U0%X0 %1,%0")
+
+
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "gpc_reg_operand" ""))
+   (set (match_operand:DF 2 "memory_operand" "")
+	(match_operand:DF 3 "gpc_reg_operand" ""))]
+  "TARGET_POWER2
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
+   && registers_ok_for_quad_peep (operands[1], operands[3])
+   && mems_ok_for_quad_peep (operands[0], operands[2])"
+  [(set (match_dup 0)
+	(match_dup 1))]
+  "operands[0] = widen_memory_access (operands[0], V2DFmode, 0);
+   operands[1] = gen_rtx_REG (V2DFmode, REGNO (operands[1]));")
+
+;; After inserting conditional returns we can sometimes have
+;; unnecessary register moves.  Unfortunately we cannot have a
+;; modeless peephole here, because some single SImode sets have early
+;; clobber outputs.  Although those sets expand to multi-ppc-insn
+;; sequences, using get_attr_length here will smash the operands
+;; array.  Neither is there an early_cobbler_p predicate.
+;; Disallow subregs for E500 so we don't munge frob_di_df_2.
+(define_peephole2
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(match_operand:DF 1 "any_operand" ""))
+   (set (match_operand:DF 2 "gpc_reg_operand" "")
+	(match_dup 0))]
+  "!(TARGET_E500_DOUBLE && GET_CODE (operands[2]) == SUBREG)
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (match_dup 1))])
+
+(define_peephole2
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(match_operand:SF 1 "any_operand" ""))
+   (set (match_operand:SF 2 "gpc_reg_operand" "")
+	(match_dup 0))]
+  "peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2) (match_dup 1))])
+
+
+;; TLS support.
+
+;; Mode attributes for different ABIs.
+(define_mode_iterator TLSmode [(SI "! TARGET_64BIT") (DI "TARGET_64BIT")])
+(define_mode_attr tls_abi_suffix [(SI "32") (DI "64")])
+(define_mode_attr tls_sysv_suffix [(SI "si") (DI "di")])
+(define_mode_attr tls_insn_suffix [(SI "wz") (DI "d")])
+
+(define_insn_and_split "tls_gd_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 3 "symbol_ref_operand" "s"))
+	      (match_operand 4 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 	    (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
+{
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;bl %z3\;%.";
+  else
+    return "addi %0,%1,%2@got@tlsgd\;bl %z3\;%.";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)
+			 (match_dup 2)]
+			UNSPEC_TLSGD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 3))
+		   	 (match_dup 4)))
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "type" "two")
+   (set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 16)
+     		   (const_int 12)))])
+
+(define_insn_and_split "tls_gd_sysv<TLSmode:tls_sysv_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 3 "symbol_ref_operand" "s"))
+	      (match_operand 4 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 	    (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "addi %0,%1,%2@got@tlsgd\;bl %z3+32768@plt";
+      else
+	return "addi %0,%1,%2@got@tlsgd\;bl %z3@plt";
+    }
+  else
+    return "addi %0,%1,%2@got@tlsgd\;bl %z3";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)
+			 (match_dup 2)]
+			UNSPEC_TLSGD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 3))
+		   	 (match_dup 4)))
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*tls_gd<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSGD))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS"
+  "addi %0,%1,%2@got@tlsgd"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 3)
+	(const:TLSmode
+	  (plus:TLSmode (match_dup 1)
+	    (high:TLSmode
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)))))
+   (set (match_dup 0)
+   	(lo_sum:TLSmode (match_dup 3)
+	    (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)))]
+  "
+{
+  operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_gd_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (const:TLSmode
+       (plus:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (high:TLSmode
+	   (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			   UNSPEC_TLSGD)))))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%2@got@tlsgd@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_gd_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+       (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+		       UNSPEC_TLSGD)))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addi %0,%1,%2@got@tlsgd@l"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_gd_call_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
+  "bl %z1(%3@tlsgd)\;%."
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*tls_gd_call_sysv<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
+		   UNSPEC_TLSGD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4 && TARGET_TLS_MARKERS"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "bl %z1+32768(%3@tlsgd)@plt";
+      return "bl %z1(%3@tlsgd)@plt";
+    }
+  return "bl %z1(%3@tlsgd)";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "tls_ld_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 2 "symbol_ref_operand" "s"))
+	      (match_operand 3 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+		   UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
+{
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;bl %z2\;%.";
+  else
+    return "addi %0,%1,%&@got@tlsld\;bl %z2\;%.";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)]
+			UNSPEC_TLSLD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 2))
+		   	 (match_dup 3)))
+	      (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "type" "two")
+   (set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 16)
+     		   (const_int 12)))])
+
+(define_insn_and_split "tls_ld_sysv<TLSmode:tls_sysv_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 2 "symbol_ref_operand" "s"))
+	      (match_operand 3 "" "g")))
+   (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+		   UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "addi %0,%1,%&@got@tlsld\;bl %z2+32768@plt";
+      else
+	return "addi %0,%1,%&@got@tlsld\;bl %z2@plt";
+    }
+  else
+    return "addi %0,%1,%&@got@tlsld\;bl %z2";
+}
+  "&& TARGET_TLS_MARKERS"
+  [(set (match_dup 0)
+	(unspec:TLSmode [(match_dup 1)]
+			UNSPEC_TLSLD))
+   (parallel [(set (match_dup 0)
+   	     	   (call (mem:TLSmode (match_dup 2))
+		   	 (match_dup 3)))
+	      (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  [(set_attr "length" "8")])
+
+(define_insn_and_split "*tls_ld<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
+			UNSPEC_TLSLD))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS"
+  "addi %0,%1,%&@got@tlsld"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 2)
+	(const:TLSmode
+	  (plus:TLSmode (match_dup 1)
+	    (high:TLSmode
+	      (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)))))
+   (set (match_dup 0)
+   	(lo_sum:TLSmode (match_dup 2)
+	    (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)))]
+  "
+{
+  operands[2] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_ld_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (const:TLSmode
+       (plus:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (high:TLSmode
+	   (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)))))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%&@got@tlsld@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_ld_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+       (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)))]
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
+  "addi %0,%1,%&@got@tlsld@l"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_ld_call_aix<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
+  "bl %z1(%&@tlsld)\;%."
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*tls_ld_call_sysv<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+        (call (mem:TLSmode (match_operand:TLSmode 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
+   (clobber (reg:SI LR_REGNO))]
+  "HAVE_AS_TLS && DEFAULT_ABI == ABI_V4 && TARGET_TLS_MARKERS"
+{
+  if (flag_pic)
+    {
+      if (TARGET_SECURE_PLT && flag_pic == 2)
+	return "bl %z1+32768(%&@tlsld)@plt";
+      return "bl %z1(%&@tlsld)@plt";
+    }
+  return "bl %z1(%&@tlsld)";
+}
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "tls_dtprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSDTPREL))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@dtprel")
+
+(define_insn "tls_dtprel_ha_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSDTPRELHA))]
+  "HAVE_AS_TLS"
+  "addis %0,%1,%2@dtprel@ha")
+
+(define_insn "tls_dtprel_lo_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSDTPRELLO))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@dtprel@l")
+
+(define_insn_and_split "tls_got_dtprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSGOTDTPREL))]
+  "HAVE_AS_TLS"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel(%1)"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 3)
+	(const:TLSmode
+	  (plus:TLSmode (match_dup 1)
+	    (high:TLSmode
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGOTDTPREL)))))
+   (set (match_dup 0)
+	(lo_sum:TLSmode (match_dup 3)
+	    (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGOTDTPREL)))]
+  "
+{
+  operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_got_dtprel_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (const:TLSmode
+       (plus:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (high:TLSmode
+	   (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			   UNSPEC_TLSGOTDTPREL)))))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%2@got@dtprel@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_got_dtprel_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			 UNSPEC_TLSGOTDTPREL)))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel@l(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "tls_tprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTPREL))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@tprel")
+
+(define_insn "tls_tprel_ha_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTPRELHA))]
+  "HAVE_AS_TLS"
+  "addis %0,%1,%2@tprel@ha")
+
+(define_insn "tls_tprel_lo_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTPRELLO))]
+  "HAVE_AS_TLS"
+  "addi %0,%1,%2@tprel@l")
+
+;; "b" output constraint here and on tls_tls input to support linker tls
+;; optimization.  The linker may edit the instructions emitted by a
+;; tls_got_tprel/tls_tls pair to addis,addi.
+(define_insn_and_split "tls_got_tprel_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSGOTTPREL))]
+  "HAVE_AS_TLS"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel(%1)"
+  "&& TARGET_CMODEL != CMODEL_SMALL"
+  [(set (match_dup 3)
+	(const:TLSmode
+	  (plus:TLSmode (match_dup 1)
+	    (high:TLSmode
+	      (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGOTTPREL)))))
+   (set (match_dup 0)
+	(lo_sum:TLSmode (match_dup 3)
+	    (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGOTTPREL)))]
+  "
+{
+  operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+}"
+  [(set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_CMODEL") (symbol_ref "CMODEL_SMALL"))
+     		   (const_int 8)
+     		   (const_int 4)))])
+
+(define_insn "*tls_got_tprel_high<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
+     (const:TLSmode
+       (plus:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (high:TLSmode
+	   (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			   UNSPEC_TLSGOTTPREL)))))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "addis %0,%1,%2@got@tprel@ha"
+  [(set_attr "length" "4")])
+
+(define_insn "*tls_got_tprel_low<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+     (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
+	 (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			 UNSPEC_TLSGOTTPREL)))]
+  "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
+  "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel@l(%1)"
+  [(set_attr "length" "4")])
+
+(define_insn "tls_tls_<TLSmode:tls_abi_suffix>"
+  [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
+	(unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")
+			 (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+			UNSPEC_TLSTLS))]
+  "HAVE_AS_TLS"
+  "add %0,%1,%2@tls")
+
+;; Next come insns related to the calling sequence.
+;;
+;; First, an insn to allocate new stack space for dynamic use (e.g., alloca).
+;; We move the back-chain and decrement the stack pointer.
+
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "gpc_reg_operand" "")
+	(minus (reg 1) (match_operand 1 "reg_or_short_operand" "")))
+   (set (reg 1)
+	(minus (reg 1) (match_dup 1)))]
+  ""
+  "
+{ rtx chain = gen_reg_rtx (Pmode);
+  rtx stack_bot = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+  rtx neg_op0;
+  rtx insn, par, set, mem;
+
+  emit_move_insn (chain, stack_bot);
+
+  /* Check stack bounds if necessary.  */
+  if (crtl->limit_stack)
+    {
+      rtx available;
+      available = expand_binop (Pmode, sub_optab,
+				stack_pointer_rtx, stack_limit_rtx,
+				NULL_RTX, 1, OPTAB_WIDEN);
+      emit_insn (gen_cond_trap (LTU, available, operands[1], const0_rtx));
+    }
+
+  if (GET_CODE (operands[1]) != CONST_INT
+      || INTVAL (operands[1]) < -32767
+      || INTVAL (operands[1]) > 32768)
+    {
+      neg_op0 = gen_reg_rtx (Pmode);
+      if (TARGET_32BIT)
+	emit_insn (gen_negsi2 (neg_op0, operands[1]));
+      else
+	emit_insn (gen_negdi2 (neg_op0, operands[1]));
+    }
+  else
+    neg_op0 = GEN_INT (- INTVAL (operands[1]));
+
+  insn = emit_insn ((* ((TARGET_32BIT) ? gen_movsi_update_stack
+				       : gen_movdi_di_update_stack))
+			(stack_pointer_rtx, stack_pointer_rtx, neg_op0,
+			 chain));
+  /* Since we didn't use gen_frame_mem to generate the MEM, grab
+     it now and set the alias set/attributes. The above gen_*_update
+     calls will generate a PARALLEL with the MEM set being the first
+     operation. */
+  par = PATTERN (insn);
+  gcc_assert (GET_CODE (par) == PARALLEL);
+  set = XVECEXP (par, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  mem = SET_DEST (set);
+  gcc_assert (MEM_P (mem));
+  MEM_NOTRAP_P (mem) = 1;
+  set_mem_alias_set (mem, get_frame_alias_set ());
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+}")
+
+;; These patterns say how to save and restore the stack pointer.  We need not
+;; save the stack pointer at function level since we are careful to
+;; preserve the backchain.  At block level, we have to restore the backchain
+;; when we restore the stack pointer.
+;;
+;; For nonlocal gotos, we must save both the stack pointer and its
+;; backchain and restore both.  Note that in the nonlocal case, the
+;; save area is a memory location.
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "any_operand" "")
+   (match_operand 1 "any_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_function"
+  [(match_operand 0 "any_operand" "")
+   (match_operand 1 "any_operand" "")]
+  ""
+  "DONE;")
+
+;; Adjust stack pointer (op0) to a new value (op1).
+;; First copy old stack backchain to new location, and ensure that the
+;; scheduler won't reorder the sp assignment before the backchain write.
+(define_expand "restore_stack_block"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 2))
+   (set (match_dup 5) (unspec:BLK [(match_dup 5)] UNSPEC_TIE))
+   (set (match_operand 0 "register_operand" "")
+	(match_operand 1 "register_operand" ""))]
+  ""
+  "
+{
+  operands[1] = force_reg (Pmode, operands[1]);
+  operands[2] = gen_reg_rtx (Pmode);
+  operands[3] = gen_frame_mem (Pmode, operands[0]);
+  operands[4] = gen_frame_mem (Pmode, operands[1]);
+  operands[5] = gen_frame_mem (BLKmode, operands[0]);
+}")
+
+(define_expand "save_stack_nonlocal"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_operand 0 "memory_operand" "") (match_dup 3))
+   (set (match_dup 2) (match_operand 1 "register_operand" ""))]
+  ""
+  "
+{
+  int units_per_word = (TARGET_32BIT) ? 4 : 8;
+
+  /* Copy the backchain to the first word, sp to the second.  */
+  operands[0] = adjust_address_nv (operands[0], Pmode, 0);
+  operands[2] = adjust_address_nv (operands[0], Pmode, units_per_word);
+  operands[3] = gen_reg_rtx (Pmode);
+  operands[4] = gen_frame_mem (Pmode, operands[1]);
+}")
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_dup 2) (match_operand 1 "memory_operand" ""))
+   (set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 2))
+   (set (match_dup 6) (unspec:BLK [(match_dup 6)] UNSPEC_TIE))
+   (set (match_operand 0 "register_operand" "") (match_dup 3))]
+  ""
+  "
+{
+  int units_per_word = (TARGET_32BIT) ? 4 : 8;
+
+  /* Restore the backchain from the first word, sp from the second.  */
+  operands[2] = gen_reg_rtx (Pmode);
+  operands[3] = gen_reg_rtx (Pmode);
+  operands[1] = adjust_address_nv (operands[1], Pmode, 0);
+  operands[4] = adjust_address_nv (operands[1], Pmode, units_per_word);
+  operands[5] = gen_frame_mem (Pmode, operands[3]);
+  operands[6] = gen_frame_mem (BLKmode, operands[0]);
+}")
+
+;; TOC register handling.
+
+;; Code to initialize the TOC register...
+
+(define_insn "load_toc_aix_si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+		   (unspec:SI [(const_int 0)] UNSPEC_TOC))
+	      (use (reg:SI 2))])]
+  "DEFAULT_ABI == ABI_AIX && TARGET_32BIT"
+  "*
+{
+  char buf[30];
+  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", 1);
+  operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+  operands[2] = gen_rtx_REG (Pmode, 2);
+  return \"{l|lwz} %0,%1(%2)\";
+}"
+  [(set_attr "type" "load")])
+
+(define_insn "load_toc_aix_di"
+  [(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+		   (unspec:DI [(const_int 0)] UNSPEC_TOC))
+	      (use (reg:DI 2))])]
+  "DEFAULT_ABI == ABI_AIX && TARGET_64BIT"
+  "*
+{
+  char buf[30];
+#ifdef TARGET_RELOCATABLE
+  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\",
+			       !TARGET_MINIMAL_TOC || TARGET_RELOCATABLE);
+#else
+  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", 1);
+#endif
+  if (TARGET_ELF)
+    strcat (buf, \"@toc\");
+  operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+  operands[2] = gen_rtx_REG (Pmode, 2);
+  return \"ld %0,%1(%2)\";
+}"
+  [(set_attr "type" "load")])
+
+(define_insn "load_toc_v4_pic_si"
+  [(set (reg:SI LR_REGNO)
+	(unspec:SI [(const_int 0)] UNSPEC_TOC))]
+  "DEFAULT_ABI == ABI_V4 && flag_pic == 1 && TARGET_32BIT"
+  "bl _GLOBAL_OFFSET_TABLE_@local-4"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "load_toc_v4_PIC_1"
+  [(set (reg:SI LR_REGNO)
+	(match_operand:SI 0 "immediate_operand" "s"))
+   (use (unspec [(match_dup 0)] UNSPEC_TOC))]
+  "TARGET_ELF && DEFAULT_ABI != ABI_AIX
+   && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
+  "bcl 20,31,%0\\n%0:"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "load_toc_v4_PIC_1b"
+  [(set (reg:SI LR_REGNO)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+		    (label_ref (match_operand 1 "" ""))]
+		UNSPEC_TOCPTR))
+   (match_dup 1)]
+  "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "bcl 20,31,$+8\;.long %0-$"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "load_toc_v4_PIC_2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+		   (minus:SI (match_operand:SI 2 "immediate_operand" "s")
+			     (match_operand:SI 3 "immediate_operand" "s")))))]
+  "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "{l|lwz} %0,%2-%3(%1)"
+  [(set_attr "type" "load")])
+
+(define_insn "load_toc_v4_PIC_3b"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+		 (high:SI
+		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
+			     (match_operand:SI 3 "symbol_ref_operand" "s")))))]
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
+  "{cau|addis} %0,%1,%2-%3@ha")
+
+(define_insn "load_toc_v4_PIC_3c"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
+			     (match_operand:SI 3 "symbol_ref_operand" "s"))))]
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
+  "{cal %0,%2-%3@l(%1)|addi %0,%1,%2-%3@l}")
+
+;; If the TOC is shared over a translation unit, as happens with all
+;; the kinds of PIC that we support, we need to restore the TOC
+;; pointer only when jumping over units of translation.
+;; On Darwin, we need to reload the picbase.
+
+(define_expand "builtin_setjmp_receiver"
+  [(use (label_ref (match_operand 0 "" "")))]
+  "(DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+   || (TARGET_TOC && TARGET_MINIMAL_TOC)
+   || (DEFAULT_ABI == ABI_DARWIN && flag_pic)"
+  "
+{
+#if TARGET_MACHO
+  if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      rtx picrtx = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
+      rtx picreg = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
+      rtx tmplabrtx;
+      char tmplab[20];
+
+      crtl->uses_pic_offset_table = 1;
+      ASM_GENERATE_INTERNAL_LABEL(tmplab, \"LSJR\",
+				  CODE_LABEL_NUMBER (operands[0]));
+      tmplabrtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tmplab));
+
+      emit_insn (gen_load_macho_picbase (tmplabrtx));
+      emit_move_insn (picreg, gen_rtx_REG (Pmode, LR_REGNO));
+      emit_insn (gen_macho_correct_pic (picreg, picreg, picrtx, tmplabrtx));
+    }
+  else
+#endif
+    rs6000_emit_load_toc_table (FALSE);
+  DONE;
+}")
+
+;; Elf specific ways of loading addresses for non-PIC code.
+;; The output of this could be r0, but we make a very strong
+;; preference for a base register because it will usually
+;; be needed there.
+(define_insn "elf_high"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=b*r")
+	(high:SI (match_operand 1 "" "")))]
+  "TARGET_ELF && ! TARGET_64BIT"
+  "{liu|lis} %0,%1@ha")
+
+(define_insn "elf_low"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b,!*r")
+		   (match_operand 2 "" "")))]
+   "TARGET_ELF && ! TARGET_64BIT"
+   "@
+    {cal|la} %0,%2@l(%1)
+    {ai|addic} %0,%1,%K2")
+
+;; Largetoc support
+(define_insn "largetoc_high"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+	(const:DI
+	  (plus:DI (match_operand:DI 1 "gpc_reg_operand" "b")
+		   (high:DI (match_operand:DI 2 "" "")))))]
+   "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL"
+   "{cau|addis} %0,%1,%2@ha")
+
+(define_insn "largetoc_low"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b")
+	           (match_operand:DI 2 "" "")))]
+   "TARGET_ELF && TARGET_CMODEL != CMODEL_SMALL"
+   "{cal %0,%2@l(%1)|addi %0,%1,%2@l}")
+
+;; A function pointer under AIX is a pointer to a data area whose first word
+;; contains the actual address of the function, whose second word contains a
+;; pointer to its TOC, and whose third word contains a value to place in the
+;; static chain register (r11).  Note that if we load the static chain, our
+;; "trampoline" need not have any executable code.
+
+(define_expand "call_indirect_aix32"
+  [(set (match_dup 2)
+	(mem:SI (match_operand:SI 0 "gpc_reg_operand" "")))
+   (set (match_dup 3)
+	(reg:SI 2))
+   (set (reg:SI 11)
+	(mem:SI (plus:SI (match_dup 0)
+			 (const_int 8))))
+   (parallel [(call (mem:SI (match_dup 2))
+		    (match_operand 1 "" ""))
+	      (use (match_dup 4))
+	      (set (reg:SI 2) (match_dup 3))
+	      (use (reg:SI 11))
+	      (clobber (reg:SI LR_REGNO))])]
+  "TARGET_32BIT"
+  "
+{
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = gen_rtx_MEM (SImode,
+			     gen_rtx_PLUS (SImode, stack_pointer_rtx,
+					   GEN_INT (20)));
+
+  operands[4] = gen_rtx_MEM (SImode,
+			     gen_rtx_PLUS (SImode, operands[0],
+					   GEN_INT (4)));
+
+  /* Make sure the compiler does not optimize away the store of the TOC.  */
+  MEM_VOLATILE_P (operands[3]) = 1;
+}")
+
+(define_expand "call_indirect_aix64"
+  [(set (match_dup 2)
+	(mem:DI (match_operand:DI 0 "gpc_reg_operand" "")))
+   (set (match_dup 3)
+	(reg:DI 2))
+   (set (reg:DI 11)
+	(mem:DI (plus:DI (match_dup 0)
+			 (const_int 16))))
+   (parallel [(call (mem:SI (match_dup 2))
+		    (match_operand 1 "" ""))
+	      (use (match_dup 4))
+	      (set (reg:DI 2) (match_dup 3))
+	      (use (reg:DI 11))
+	      (clobber (reg:DI LR_REGNO))])]
+  "TARGET_64BIT"
+  "
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_rtx_MEM (DImode,
+			     gen_rtx_PLUS (DImode, stack_pointer_rtx,
+					   GEN_INT (40)));
+
+  operands[4] = gen_rtx_MEM (DImode,
+			     gen_rtx_PLUS (DImode, operands[0],
+					   GEN_INT (8)));
+
+  /* Make sure the compiler does not optimize away the store of the TOC.  */
+  MEM_VOLATILE_P (operands[3]) = 1;
+}")
+
+(define_expand "call_value_indirect_aix32"
+  [(set (match_dup 3)
+	(mem:SI (match_operand:SI 1 "gpc_reg_operand" "")))
+   (set (match_dup 4)
+	(reg:SI 2))
+   (set (reg:SI 11)
+	(mem:SI (plus:SI (match_dup 1)
+			 (const_int 8))))
+   (parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_dup 3))
+			 (match_operand 2 "" "")))
+	      (use (match_dup 5))
+	      (set (reg:SI 2) (match_dup 4))
+	      (use (reg:SI 11))
+	      (clobber (reg:SI LR_REGNO))])]
+  "TARGET_32BIT"
+  "
+{
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_rtx_MEM (SImode,
+			     gen_rtx_PLUS (SImode, stack_pointer_rtx,
+					   GEN_INT (20)));
+
+  operands[5] = gen_rtx_MEM (SImode,
+			     gen_rtx_PLUS (SImode, operands[1],
+					   GEN_INT (4)));
+
+  /* Make sure the compiler does not optimize away the store of the TOC.  */
+  MEM_VOLATILE_P (operands[4]) = 1;
+}")
+
+(define_expand "call_value_indirect_aix64"
+  [(set (match_dup 3)
+	(mem:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+   (set (match_dup 4)
+	(reg:DI 2))
+   (set (reg:DI 11)
+	(mem:DI (plus:DI (match_dup 1)
+			 (const_int 16))))
+   (parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_dup 3))
+			 (match_operand 2 "" "")))
+	      (use (match_dup 5))
+	      (set (reg:DI 2) (match_dup 4))
+	      (use (reg:DI 11))
+	      (clobber (reg:DI LR_REGNO))])]
+  "TARGET_64BIT"
+  "
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_rtx_MEM (DImode,
+			     gen_rtx_PLUS (DImode, stack_pointer_rtx,
+					   GEN_INT (40)));
+
+  operands[5] = gen_rtx_MEM (DImode,
+			     gen_rtx_PLUS (DImode, operands[1],
+					   GEN_INT (8)));
+
+  /* Make sure the compiler does not optimize away the store of the TOC.  */
+  MEM_VOLATILE_P (operands[4]) = 1;
+}")
+
+;; Now the definitions for the call and call_value insns
+(define_expand "call"
+  [(parallel [(call (mem:SI (match_operand 0 "address_operand" ""))
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[0] = machopic_indirect_call_target (operands[0]);
+#endif
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+
+  operands[0] = XEXP (operands[0], 0);
+
+  if (GET_CODE (operands[0]) != SYMBOL_REF
+      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[0]))
+      || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0))
+    {
+      if (INTVAL (operands[2]) & CALL_LONG)
+	operands[0] = rs6000_longcall_ref (operands[0]);
+
+      switch (DEFAULT_ABI)
+        {
+	case ABI_V4:
+	case ABI_DARWIN:
+	  operands[0] = force_reg (Pmode, operands[0]);
+	  break;
+
+	case ABI_AIX:
+	  /* AIX function pointers are really pointers to a three word
+	     area.  */
+	  emit_call_insn (TARGET_32BIT
+			  ? gen_call_indirect_aix32 (force_reg (SImode,
+							        operands[0]),
+						     operands[1])
+			  : gen_call_indirect_aix64 (force_reg (DImode,
+							        operands[0]),
+						     operands[1]));
+	  DONE;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand 1 "address_operand" ""))
+			 (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNO))])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[1] = machopic_indirect_call_target (operands[1]);
+#endif
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  operands[1] = XEXP (operands[1], 0);
+
+  if (GET_CODE (operands[1]) != SYMBOL_REF
+      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[1]))
+      || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0))
+    {
+      if (INTVAL (operands[3]) & CALL_LONG)
+	operands[1] = rs6000_longcall_ref (operands[1]);
+
+      switch (DEFAULT_ABI)
+        {
+	case ABI_V4:
+	case ABI_DARWIN:
+	  operands[1] = force_reg (Pmode, operands[1]);
+	  break;
+
+	case ABI_AIX:
+	  /* AIX function pointers are really pointers to a three word
+	     area.  */
+	  emit_call_insn (TARGET_32BIT
+			  ? gen_call_value_indirect_aix32 (operands[0],
+							   force_reg (SImode,
+								      operands[1]),
+							   operands[2])
+			  : gen_call_value_indirect_aix64 (operands[0],
+							   force_reg (DImode,
+								      operands[1]),
+							   operands[2]));
+	  DONE;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}")
+
+;; Call to function in current module.  No TOC pointer reload needed.
+;; Operand2 is nonzero if we are using the V.4 calling sequence and
+;; either the function was not prototyped, or it was prototyped as a
+;; variable argument function.  It is > 0 if FP registers were passed
+;; and < 0 if they were not.
+
+(define_insn "*call_local32"
+  [(call (mem:SI (match_operand:SI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z0@local\" : \"bl %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_local64"
+  [(call (mem:SI (match_operand:DI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_64BIT && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z0@local\" : \"bl %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_local32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z1@local\" : \"bl %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+
+(define_insn "*call_value_local64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_64BIT && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"bl %z1@local\" : \"bl %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+;; Call to function which may be in another module.  Restore the TOC
+;; pointer (r2) after the call unless this is System V.
+;; Operand1 is nonzero if we are using the V.4 calling sequence and
+;; either the function was not prototyped, or it was prototyped as a
+;; variable argument function.  It is > 0 if FP registers were passed
+;; and < 0 if they were not.
+;; Operand2 is the address of the 3 word function pointer that offset 4 points
+;; to the value to be loaded in the TOC register.  Do not split the load from
+;; the call, as it may move the load of the TOC before any addresses using
+;; the TOC.
+
+(define_insn "*call_indirect_nonlocal_aix32"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "c,*l"))
+		 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "memory_operand" "m,m"))
+   (set (reg:SI 2) (match_operand:SI 3 "memory_operand" "m,m"))
+   (use (reg:SI 11))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_32BIT && DEFAULT_ABI == ABI_AIX"
+  "{l|lwz} 2,%2\;b%T0l\;{l|lwz} 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_nonlocal_aix32"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (use (match_operand:SI 2 "immediate_operand" "O"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_32BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "bl %z0\;%."
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+   
+(define_insn "*call_indirect_nonlocal_aix64"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "c,*l"))
+		 (match_operand 1 "" "g,g"))
+   (use (match_operand:DI 2 "memory_operand" "m,m"))
+   (set (reg:DI 2) (match_operand:DI 3 "memory_operand" "m,m"))
+   (use (reg:DI 11))
+   (clobber (reg:DI LR_REGNO))]
+  "TARGET_64BIT && DEFAULT_ABI == ABI_AIX"
+  "ld  2,%2\;b%T0l\;ld 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_nonlocal_aix64"
+  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (use (match_operand:SI 2 "immediate_operand" "O"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_64BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "bl %z0\;%."
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_indirect_nonlocal_aix32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "c,*l"))
+		      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "memory_operand" "m,m"))
+   (set (reg:SI 2) (match_operand:SI 4 "memory_operand" "m,m"))
+   (use (reg:SI 11))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_32BIT && DEFAULT_ABI == ABI_AIX"
+  "{l|lwz} 2,%3\;b%T1l\;{l|lwz} 2,%4"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_value_nonlocal_aix32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (use (match_operand:SI 3 "immediate_operand" "O"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_32BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "bl %z1\;%."
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_indirect_nonlocal_aix64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "c,*l"))
+		      (match_operand 2 "" "g,g")))
+   (use (match_operand:DI 3 "memory_operand" "m,m"))
+   (set (reg:DI 2) (match_operand:DI 4 "memory_operand" "m,m"))
+   (use (reg:DI 11))
+   (clobber (reg:DI LR_REGNO))]
+  "TARGET_64BIT && DEFAULT_ABI == ABI_AIX"
+  "ld 2,%3\;b%T1l\;ld 2,%4"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_value_nonlocal_aix64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (use (match_operand:SI 3 "immediate_operand" "O"))
+   (clobber (reg:SI LR_REGNO))]
+  "TARGET_64BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "bl %z1\;%."
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+;; A function pointer under System V is just a normal pointer
+;; operands[0] is the function pointer
+;; operands[1] is the stack size to clean up
+;; operands[2] is the value FUNCTION_ARG returns for the VOID argument
+;; which indicates how to set cr1
+
+(define_insn "*call_indirect_nonlocal_sysv<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l,c,*l"))
+	 (match_operand 1 "" "g,g,g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "DEFAULT_ABI == ABI_V4
+   || DEFAULT_ABI == ABI_DARWIN"
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  return "b%T0l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn_and_split "*call_nonlocal_sysv<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_DARWIN
+   || (DEFAULT_ABI == ABI_V4
+       && (INTVAL (operands[2]) & CALL_LONG) == 0))"
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+#if TARGET_MACHO
+  return output_call(insn, operands, 0, 2);
+#else
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return "bl %z0@plt";
+    }
+  else
+    return "bl %z0";
+#endif
+}
+  "DEFAULT_ABI == ABI_V4
+   && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[0])
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  [(parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (use (match_dup 2))
+	      (use (match_dup 3))
+	      (clobber (reg:SI LR_REGNO))])]
+{
+  operands[3] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_nonlocal_sysv_secure<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (use (match_operand:SI 3 "register_operand" "r,r"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_V4
+    && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[0])
+    && (INTVAL (operands[2]) & CALL_LONG) == 0)"
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  if (flag_pic == 2)
+    /* The magic 32768 offset here and in the other sysv call insns
+       corresponds to the offset of r30 in .got2, as given by LCTOC1.
+       See sysv4.h:toc_section.  */
+    return "bl %z0+32768@plt";
+  else
+    return "bl %z0@plt";
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_indirect_nonlocal_sysv<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l,c,*l"))
+	      (match_operand 2 "" "g,g,g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,O,n,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "DEFAULT_ABI == ABI_V4
+   || DEFAULT_ABI == ABI_DARWIN"
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  return "b%T1l";
+}
+  [(set_attr "type" "jmpreg,jmpreg,jmpreg,jmpreg")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn_and_split "*call_value_nonlocal_sysv<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_DARWIN
+   || (DEFAULT_ABI == ABI_V4
+       && (INTVAL (operands[3]) & CALL_LONG) == 0))"
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+#if TARGET_MACHO
+  return output_call(insn, operands, 1, 3);
+#else
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return "bl %z1@plt";
+    }
+  else
+    return "bl %z1";
+#endif
+}
+  "DEFAULT_ABI == ABI_V4
+   && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[1])
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (use (match_dup 3))
+	      (use (match_dup 4))
+	      (clobber (reg:SI LR_REGNO))])]
+{
+  operands[4] = pic_offset_table_rtx;
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*call_value_nonlocal_sysv_secure<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (match_operand:SI 4 "register_operand" "r,r"))
+   (clobber (reg:SI LR_REGNO))]
+  "(DEFAULT_ABI == ABI_V4
+    && TARGET_SECURE_PLT && flag_pic && !SYMBOL_REF_LOCAL_P (operands[1])
+    && (INTVAL (operands[3]) & CALL_LONG) == 0)"
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn ("crxor 6,6,6", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn ("creqv 6,6,6", operands);
+
+  if (flag_pic == 2)
+    return "bl %z1+32768@plt";
+  else
+    return "bl %z1@plt";
+}
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; sibling call patterns
+(define_expand "sibcall"
+  [(parallel [(call (mem:SI (match_operand 0 "address_operand" ""))
+		    (match_operand 1 "" ""))
+	      (use (match_operand 2 "" ""))
+	      (use (reg:SI LR_REGNO))
+	      (return)])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[0] = machopic_indirect_call_target (operands[0]);
+#endif
+
+  gcc_assert (GET_CODE (operands[0]) == MEM);
+  gcc_assert (GET_CODE (operands[1]) == CONST_INT);
+
+  operands[0] = XEXP (operands[0], 0);
+}")
+
+;; this and similar patterns must be marked as using LR, otherwise
+;; dataflow will try to delete the store into it.  This is true
+;; even when the actual reg to jump to is in CTR, when LR was
+;; saved and restored around the PIC-setting BCL.
+(define_insn "*sibcall_local32"
+  [(call (mem:SI (match_operand:SI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "(INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z0@local\" : \"b %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_local64"
+  [(call (mem:SI (match_operand:DI 0 "current_file_function_operand" "s,s"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:SI 2 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "TARGET_64BIT && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z0@local\" : \"b %z0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_value_local32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "(INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z1@local\" : \"b %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+
+(define_insn "*sibcall_value_local64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "TARGET_64BIT && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[3]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[3]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  return (DEFAULT_ABI == ABI_V4 && flag_pic) ? \"b %z1@local\" : \"b %z1\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sibcall_nonlocal_aix32"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (use (match_operand:SI 2 "immediate_operand" "O"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "TARGET_32BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "b %z0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_nonlocal_aix64"
+  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (use (match_operand:SI 2 "immediate_operand" "O"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "TARGET_64BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "b %z0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_nonlocal_aix32"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (use (match_operand:SI 3 "immediate_operand" "O"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "TARGET_32BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "b %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_nonlocal_aix64"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (use (match_operand:SI 3 "immediate_operand" "O"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "TARGET_64BIT
+   && DEFAULT_ABI == ABI_AIX
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "b %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_nonlocal_sysv<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s,s"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "(DEFAULT_ABI == ABI_DARWIN
+     || DEFAULT_ABI == ABI_V4)
+   && (INTVAL (operands[2]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return \"b %z0@plt\";
+    }
+  else
+    return \"b %z0\";
+}"
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		(call (mem:SI (match_operand 1 "address_operand" ""))
+		      (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (use (reg:SI LR_REGNO))
+	      (return)])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[1] = machopic_indirect_call_target (operands[1]);
+#endif
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  operands[1] = XEXP (operands[1], 0);
+}")
+
+(define_insn "*sibcall_value_nonlocal_sysv<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s,s"))
+	      (match_operand 2 "" "")))
+   (use (match_operand:SI 3 "immediate_operand" "O,n"))
+   (use (reg:SI LR_REGNO))
+   (return)]
+  "(DEFAULT_ABI == ABI_DARWIN
+       || DEFAULT_ABI == ABI_V4)
+   && (INTVAL (operands[3]) & CALL_LONG) == 0"
+  "*
+{
+  if (INTVAL (operands[2]) & CALL_V4_SET_FP_ARGS)
+    output_asm_insn (\"crxor 6,6,6\", operands);
+
+  else if (INTVAL (operands[2]) & CALL_V4_CLEAR_FP_ARGS)
+    output_asm_insn (\"creqv 6,6,6\", operands);
+
+  if (DEFAULT_ABI == ABI_V4 && flag_pic)
+    {
+      gcc_assert (!TARGET_SECURE_PLT);
+      return \"b %z1@plt\";
+    }
+  else
+    return \"b %z1\";
+}"
+  [(set_attr "type" "branch,branch")
+   (set_attr "length" "4,8")])
+
+(define_expand "sibcall_epilogue"
+  [(use (const_int 0))]
+  "TARGET_SCHED_PROLOG"
+  "
+{
+      rs6000_emit_epilogue (TRUE);
+      DONE;
+}")
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCK)]
+  ""
+  "")
+
+(define_insn "probe_stack"
+  [(set (match_operand 0 "memory_operand" "=m")
+        (unspec [(const_int 0)] UNSPEC_PROBE_STACK))]
+  ""
+  "*
+{
+  operands[1] = gen_rtx_REG (Pmode, 0);
+  return \"{st%U0%X0|stw%U0%X0} %1,%0\";
+}"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "probe_stack_range<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "register_operand" "r")]
+			   UNSPECV_PROBE_STACK_RANGE))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "three")])
+
+;; Compare insns are next.  Note that the RS/6000 has two types of compares,
+;; signed & unsigned, and one type of branch.
+;;
+;; Start with the DEFINE_EXPANDs to generate the rtl for compares, scc
+;; insns, and branches.
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "rs6000_cbranch_operator"
+         [(match_operand:GPR 1 "gpc_reg_operand" "")
+          (match_operand:GPR 2 "reg_or_short_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  "
+{
+  /* Take care of the possibility that operands[2] might be negative but
+     this might be a logical operation.  That insn doesn't exist.  */
+  if (GET_CODE (operands[2]) == CONST_INT
+      && INTVAL (operands[2]) < 0)
+    {
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]),
+				    GET_MODE (operands[0]),
+				    operands[1], operands[2]);
+   }
+
+  rs6000_emit_cbranch (<MODE>mode, operands);
+  DONE;
+}")
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "rs6000_cbranch_operator"
+         [(match_operand:FP 1 "gpc_reg_operand" "")
+          (match_operand:FP 2 "gpc_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  "
+{
+  rs6000_emit_cbranch (<MODE>mode, operands);
+  DONE;
+}")
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "rs6000_cbranch_operator"
+         [(match_operand:GPR 2 "gpc_reg_operand" "")
+          (match_operand:GPR 3 "reg_or_short_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+  "
+{
+  /* Take care of the possibility that operands[3] might be negative but
+     this might be a logical operation.  That insn doesn't exist.  */
+  if (GET_CODE (operands[3]) == CONST_INT
+      && INTVAL (operands[3]) < 0)
+    {
+      operands[3] = force_reg (<MODE>mode, operands[3]);
+      operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]),
+				    GET_MODE (operands[1]),
+				    operands[2], operands[3]);
+    }
+
+  /* For SNE, we would prefer that the xor/abs sequence be used for integers.
+     For SEQ, likewise, except that comparisons with zero should be done
+     with an scc insns.  However, due to the order that combine see the
+     resulting insns, we must, in fact, allow SEQ for integers.  Fail in
+     the cases we don't want to handle or are best handled by portable
+     code.  */
+  if (GET_CODE (operands[1]) == NE)
+    FAIL;
+  if ((GET_CODE (operands[1]) == LT || GET_CODE (operands[1]) == LE
+       || GET_CODE (operands[1]) == GT || GET_CODE (operands[1]) == GE)
+      && operands[3] == const0_rtx)
+    FAIL;
+  rs6000_emit_sCOND (<MODE>mode, operands);
+  DONE;
+}")
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "rs6000_cbranch_operator"
+         [(match_operand:FP 2 "gpc_reg_operand" "")
+          (match_operand:FP 3 "gpc_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+  "
+{
+  rs6000_emit_sCOND (<MODE>mode, operands);
+  DONE;
+}")
+
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, TARGET_64BIT ? 13 : 2);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_setsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_32BIT"
+  "{l%U1%X1|lwz%U1%X1} %2,%1\;{st%U0%X0|stw%U0%X0} %2,%0\;{lil|li} %2,0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "stack_protect_setdi"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_64BIT"
+  "ld%U1%X1 %2,%1\;std%U0%X0 %2,%0\;{lil|li} %2,0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx test, op0, op1;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, TARGET_64BIT ? 13 : 2);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  op0 = operands[0];
+  op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]), UNSPEC_SP_TEST);
+  test = gen_rtx_EQ (VOIDmode, op0, op1);
+  emit_jump_insn (gen_cbranchsi4 (test, op0, op1, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_testsi"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
+        (unspec:CCEQ [(match_operand:SI 1 "memory_operand" "m,m")
+		      (match_operand:SI 2 "memory_operand" "m,m")]
+		     UNSPEC_SP_TEST))
+   (set (match_scratch:SI 4 "=r,r") (const_int 0))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {l%U1%X1|lwz%U1%X1} %3,%1\;{l%U2%X2|lwz%U2%X2} %4,%2\;xor. %3,%3,%4\;{lil|li} %4,0
+   {l%U1%X1|lwz%U1%X1} %3,%1\;{l%U2%X2|lwz%U2%X2} %4,%2\;{cmpl|cmplw} %0,%3,%4\;{lil|li} %3,0\;{lil|li} %4,0"
+  [(set_attr "length" "16,20")])
+
+(define_insn "stack_protect_testdi"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=x,?y")
+        (unspec:CCEQ [(match_operand:DI 1 "memory_operand" "m,m")
+		      (match_operand:DI 2 "memory_operand" "m,m")]
+		     UNSPEC_SP_TEST))
+   (set (match_scratch:DI 4 "=r,r") (const_int 0))
+   (clobber (match_scratch:DI 3 "=&r,&r"))]
+  "TARGET_64BIT"
+  "@
+   ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;xor. %3,%3,%4\;{lil|li} %4,0
+   ld%U1%X1 %3,%1\;ld%U2%X2 %4,%2\;cmpld %0,%3,%4\;{lil|li} %3,0\;{lil|li} %4,0"
+  [(set_attr "length" "16,20")])
+
+
+;; Here are the actual compare insns.
+(define_insn "*cmp<mode>_internal1"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+	(compare:CC (match_operand:GPR 1 "gpc_reg_operand" "r")
+		    (match_operand:GPR 2 "reg_or_short_operand" "rI")))]
+  ""
+  "{cmp%I2|cmp<wd>%I2} %0,%1,%2"
+  [(set_attr "type" "cmp")])
+
+;; If we are comparing a register for equality with a large constant,
+;; we can do this with an XOR followed by a compare.  But this is profitable
+;; only if the large constant is only used for the comparison (and in this
+;; case we already have a register to reuse as scratch).
+;;
+;; For 64-bit registers, we could only do so if the constant's bit 15 is clear:
+;; otherwise we'd need to XOR with FFFFFFFF????0000 which is not available.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "logical_const_operand" ""))
+   (set (match_dup 0) (match_operator:SI 3 "boolean_or_operator"
+		       [(match_dup 0)
+			(match_operand:SI 2 "logical_const_operand" "")]))
+   (set (match_operand:CC 4 "cc_reg_operand" "")
+        (compare:CC (match_operand:SI 5 "gpc_reg_operand" "")
+                    (match_dup 0)))
+   (set (pc)
+        (if_then_else (match_operator 6 "equality_operator"
+                       [(match_dup 4) (const_int 0)])
+                      (match_operand 7 "" "")
+                      (match_operand 8 "" "")))]
+  "peep2_reg_dead_p (3, operands[0])
+   && peep2_reg_dead_p (4, operands[4])"
+ [(set (match_dup 0) (xor:SI (match_dup 5) (match_dup 9)))
+  (set (match_dup 4) (compare:CC (match_dup 0) (match_dup 10)))
+  (set (pc) (if_then_else (match_dup 6) (match_dup 7) (match_dup 8)))]
+ 
+{
+  /* Get the constant we are comparing against, and see what it looks like
+     when sign-extended from 16 to 32 bits.  Then see what constant we could
+     XOR with SEXTC to get the sign-extended value.  */
+  rtx cnst = simplify_const_binary_operation (GET_CODE (operands[3]),
+					      SImode,
+					      operands[1], operands[2]);
+  HOST_WIDE_INT c = INTVAL (cnst);
+  HOST_WIDE_INT sextc = ((c & 0xffff) ^ 0x8000) - 0x8000;
+  HOST_WIDE_INT xorv = c ^ sextc;
+
+  operands[9] = GEN_INT (xorv);
+  operands[10] = GEN_INT (sextc);
+})
+
+(define_insn "*cmpsi_internal2"
+  [(set (match_operand:CCUNS 0 "cc_reg_operand" "=y")
+	(compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "reg_or_u_short_operand" "rK")))]
+  ""
+  "{cmpl%I2|cmplw%I2} %0,%1,%b2"
+  [(set_attr "type" "cmp")])
+
+(define_insn "*cmpdi_internal2"
+  [(set (match_operand:CCUNS 0 "cc_reg_operand" "=y")
+	(compare:CCUNS (match_operand:DI 1 "gpc_reg_operand" "r")
+		       (match_operand:DI 2 "reg_or_u_short_operand" "rK")))]
+  ""
+  "cmpld%I2 %0,%1,%b2"
+  [(set_attr "type" "cmp")])
+
+;; The following two insns don't exist as single insns, but if we provide
+;; them, we can swap an add and compare, which will enable us to overlap more
+;; of the required delay between a compare and branch.  We generate code for
+;; them by splitting.
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=y")
+	(compare:CC (match_operand:SI 1 "gpc_reg_operand" "r")
+		    (match_operand:SI 2 "short_cint_operand" "i")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "i")))]
+  ""
+  "#"
+  [(set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CCUNS 3 "cc_reg_operand" "=y")
+	(compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "u_short_cint_operand" "i")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "i")))]
+  ""
+  "#"
+  [(set_attr "length" "8")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_operand" "")
+	(compare:CC (match_operand:SI 1 "gpc_reg_operand" "")
+		    (match_operand:SI 2 "short_cint_operand" "")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "")))]
+  ""
+  [(set (match_dup 3) (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
+
+(define_split
+  [(set (match_operand:CCUNS 3 "cc_reg_operand" "")
+	(compare:CCUNS (match_operand:SI 1 "gpc_reg_operand" "")
+		       (match_operand:SI 2 "u_short_cint_operand" "")))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_dup 1) (match_operand:SI 4 "short_cint_operand" "")))]
+  ""
+  [(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
+
+(define_insn "*cmpsf_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "f")
+		      (match_operand:SF 2 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "fcmpu %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+(define_insn "*cmpdf_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d")
+		      (match_operand:DF 2 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !VECTOR_UNIT_VSX_P (DFmode)"
+  "fcmpu %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+;; Only need to compare second words if first words equal
+(define_insn "*cmptf_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "d")
+		      (match_operand:TF 2 "gpc_reg_operand" "d")))]
+  "!TARGET_IEEEQUAD && !TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128"
+  "fcmpu %0,%1,%2\;bne %0,$+8\;fcmpu %0,%L1,%L2"
+  [(set_attr "type" "fpcompare")
+   (set_attr "length" "12")])
+
+(define_insn_and_split "*cmptf_internal2"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "d")
+		      (match_operand:TF 2 "gpc_reg_operand" "d")))
+    (clobber (match_scratch:DF 3 "=d"))
+    (clobber (match_scratch:DF 4 "=d"))
+    (clobber (match_scratch:DF 5 "=d"))
+    (clobber (match_scratch:DF 6 "=d"))
+    (clobber (match_scratch:DF 7 "=d"))
+    (clobber (match_scratch:DF 8 "=d"))
+    (clobber (match_scratch:DF 9 "=d"))
+    (clobber (match_scratch:DF 10 "=d"))
+    (clobber (match_scratch:GPR 11 "=b"))]
+  "!TARGET_IEEEQUAD && TARGET_XL_COMPAT
+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 14))
+   (set (match_dup 4) (match_dup 15))
+   (set (match_dup 9) (abs:DF (match_dup 5)))
+   (set (match_dup 0) (compare:CCFP (match_dup 9) (match_dup 3)))
+   (set (pc) (if_then_else (ne (match_dup 0) (const_int 0))
+			   (label_ref (match_dup 12))
+			   (pc)))
+   (set (match_dup 0) (compare:CCFP (match_dup 5) (match_dup 7)))
+   (set (pc) (label_ref (match_dup 13)))
+   (match_dup 12)
+   (set (match_dup 10) (minus:DF (match_dup 5) (match_dup 7)))
+   (set (match_dup 9) (minus:DF (match_dup 6) (match_dup 8)))
+   (set (match_dup 9) (plus:DF (match_dup 10) (match_dup 9)))
+   (set (match_dup 0) (compare:CCFP (match_dup 9) (match_dup 4)))
+   (match_dup 13)]
+{
+  REAL_VALUE_TYPE rv;
+  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+
+  operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word);
+  operands[7] = simplify_gen_subreg (DFmode, operands[2], TFmode, hi_word);
+  operands[8] = simplify_gen_subreg (DFmode, operands[2], TFmode, lo_word);
+  operands[12] = gen_label_rtx ();
+  operands[13] = gen_label_rtx ();
+  real_inf (&rv);
+  operands[14] = force_const_mem (DFmode,
+				  CONST_DOUBLE_FROM_REAL_VALUE (rv, DFmode));
+  operands[15] = force_const_mem (DFmode,
+				  CONST_DOUBLE_FROM_REAL_VALUE (dconst0,
+								DFmode));
+  if (TARGET_TOC)
+    {
+      rtx tocref;
+      tocref = create_TOC_reference (XEXP (operands[14], 0), operands[11]);
+      operands[14] = gen_const_mem (DFmode, tocref);
+      tocref = create_TOC_reference (XEXP (operands[15], 0), operands[11]);
+      operands[15] = gen_const_mem (DFmode, tocref);
+      set_mem_alias_set (operands[14], get_TOC_alias_set ());
+      set_mem_alias_set (operands[15], get_TOC_alias_set ());
+    }
+})
+
+;; Now we have the scc insns.  We can do some combinations because of the
+;; way the machine works.
+;;
+;; Note that this is probably faster if we can put an insn between the
+;; mfcr and rlinm, but this is tricky.  Let's leave it for now.  In most
+;; cases the insns below which don't use an intermediate CR field will
+;; be used instead.
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  ""
+  "mfcr %0%Q2\;{rlinm|rlwinm} %0,%0,%J1,1"
+  [(set (attr "type")
+     (cond [(ne (symbol_ref "TARGET_MFCRF") (const_int 0))
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "8")])
+
+;; Same as above, but get the GT bit.
+(define_insn "move_from_CR_gt_bit"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unspec:SI [(match_operand 1 "cc_reg_operand" "y")] UNSPEC_MV_CR_GT))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "mfcr %0\;{rlinm|rlwinm} %0,%0,%D1,31,31"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "8")])
+
+;; Same as above, but get the OV/ORDERED bit.
+(define_insn "move_from_CR_ov_bit"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unspec:SI [(match_operand 1 "cc_reg_operand" "y")] UNSPEC_MV_CR_OV))]
+  "TARGET_ISEL"
+  "mfcr %0\;{rlinm|rlwinm} %0,%0,%t1,1"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  "TARGET_POWERPC64"
+  "mfcr %0%Q2\;{rlinm|rlwinm} %0,%0,%J1,1"
+  [(set (attr "type")
+     (cond [(ne (symbol_ref "TARGET_MFCRF") (const_int 0))
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "y,y")
+					(const_int 0)])
+		    (const_int 0)))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=r,r")
+	(match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+  "TARGET_32BIT"
+  "@
+   mfcr %3%Q2\;{rlinm.|rlwinm.} %3,%3,%J1,1
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "8,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "")
+					(const_int 0)])
+		    (const_int 0)))
+   (set (match_operand:SI 3 "gpc_reg_operand" "")
+	(match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(match_op_dup 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ashift:SI (match_operator:SI 1 "scc_comparison_operator"
+				      [(match_operand 2 "cc_reg_operand" "y")
+				       (const_int 0)])
+		   (match_operand:SI 3 "const_int_operand" "n")))]
+  ""
+  "*
+{
+  int is_bit = ccr_bit (operands[1], 1);
+  int put_bit = 31 - (INTVAL (operands[3]) & 31);
+  int count;
+
+  if (is_bit >= put_bit)
+    count = is_bit - put_bit;
+  else
+    count = 32 - (put_bit - is_bit);
+
+  operands[4] = GEN_INT (count);
+  operands[5] = GEN_INT (put_bit);
+
+  return \"mfcr %0%Q2\;{rlinm|rlwinm} %0,%0,%4,%5,%5\";
+}"
+  [(set (attr "type")
+     (cond [(ne (symbol_ref "TARGET_MFCRF") (const_int 0))
+		(const_string "mfcrf")
+	   ]
+	(const_string "mfcr")))
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ashift:SI (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "y,y")
+					(const_int 0)])
+		    (match_operand:SI 3 "const_int_operand" "n,n"))
+	 (const_int 0)))
+   (set (match_operand:SI 4 "gpc_reg_operand" "=r,r")
+	(ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)])
+		   (match_dup 3)))]
+  ""
+  "*
+{
+  int is_bit = ccr_bit (operands[1], 1);
+  int put_bit = 31 - (INTVAL (operands[3]) & 31);
+  int count;
+
+  /* Force split for non-cc0 compare.  */
+  if (which_alternative == 1)
+     return \"#\";
+
+  if (is_bit >= put_bit)
+    count = is_bit - put_bit;
+  else
+    count = 32 - (put_bit - is_bit);
+
+  operands[5] = GEN_INT (count);
+  operands[6] = GEN_INT (put_bit);
+
+  return \"mfcr %4%Q2\;{rlinm.|rlwinm.} %4,%4,%5,%6,%6\";
+}"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "8,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (ashift:SI (match_operator:SI 1 "scc_comparison_operator"
+				       [(match_operand 2 "cc_reg_operand" "")
+					(const_int 0)])
+		    (match_operand:SI 3 "const_int_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 4 "gpc_reg_operand" "")
+	(ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)])
+		   (match_dup 3)))]
+  "reload_completed"
+  [(set (match_dup 4)
+	(ashift:SI (match_op_dup 1 [(match_dup 2) (const_int 0)])
+		   (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+;; There is a 3 cycle delay between consecutive mfcr instructions
+;; so it is useful to combine 2 scc instructions to use only one mfcr.
+
+(define_peephole
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(match_operator:SI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=r")
+	(match_operator:SI 4 "scc_comparison_operator"
+			   [(match_operand 5 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  "REGNO (operands[2]) != REGNO (operands[5])"
+  "mfcr %3\;{rlinm|rlwinm} %0,%3,%J1,1\;{rlinm|rlwinm} %3,%3,%J4,1"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "12")])
+
+(define_peephole
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(match_operator:DI 1 "scc_comparison_operator"
+			   [(match_operand 2 "cc_reg_operand" "y")
+			    (const_int 0)]))
+   (set (match_operand:DI 3 "gpc_reg_operand" "=r")
+	(match_operator:DI 4 "scc_comparison_operator"
+			   [(match_operand 5 "cc_reg_operand" "y")
+			    (const_int 0)]))]
+  "TARGET_POWERPC64 && REGNO (operands[2]) != REGNO (operands[5])"
+  "mfcr %3\;{rlinm|rlwinm} %0,%3,%J1,1\;{rlinm|rlwinm} %3,%3,%J4,1"
+  [(set_attr "type" "mfcr")
+   (set_attr "length" "12")])
+
+;; There are some scc insns that can be done directly, without a compare.
+;; These are faster because they don't involve the communications between
+;; the FXU and branch units.   In fact, we will be replacing all of the
+;; integer scc insns here or in the portable methods in emit_store_flag.
+;;
+;; Also support (neg (scc ..)) since that construct is used to replace
+;; branches, (plus (scc ..) ..) since that construct is common and
+;; takes no more insns than scc, and (and (neg (scc ..)) ..) in the
+;; cases where it is no more expensive than (neg (scc ..)).
+
+;; Have reload force a constant into a register for the simple insns that
+;; otherwise won't accept constants.  We do this because it is faster than
+;; the cmp/mfcr sequence we would otherwise generate.
+
+(define_mode_attr scc_eq_op2 [(SI "rKLI")
+			      (DI "rKJI")])
+
+(define_insn_and_split "*eq<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(eq:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		(match_operand:GPR 2 "scc_eq_operand" "<scc_eq_op2>")))]
+  "!TARGET_POWER"
+  "#"
+  "!TARGET_POWER"
+  [(set (match_dup 0)
+	(clz:GPR (match_dup 3)))
+   (set (match_dup 0)
+	(lshiftrt:GPR (match_dup 0) (match_dup 4)))]
+  {
+    if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 0)
+      {
+	/* Use output operand as intermediate.  */
+	operands[3] = operands[0];
+
+	if (logical_operand (operands[2], <MODE>mode))
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_XOR (<MODE>mode,
+					       operands[1], operands[2])));
+	else
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_PLUS (<MODE>mode, operands[1],
+						negate_rtx (<MODE>mode,
+							    operands[2]))));
+      }
+    else
+      operands[3] = operands[1];
+
+    operands[4] = GEN_INT (exact_log2 (GET_MODE_BITSIZE (<MODE>mode)));
+  })
+
+(define_insn_and_split "*eq<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=y")
+	(compare:CC
+	 (eq:P (match_operand:P 1 "gpc_reg_operand" "=r")
+	       (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(eq:P (match_dup 1) (match_dup 2)))]
+  "!TARGET_POWER && optimize_size"
+  "#"
+  "!TARGET_POWER && optimize_size"
+  [(set (match_dup 0)
+	(clz:P (match_dup 4)))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (lshiftrt:P (match_dup 0) (match_dup 5))
+			       (const_int 0)))
+	      (set (match_dup 0)
+		   (lshiftrt:P (match_dup 0) (match_dup 5)))])]
+  {
+    if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 0)
+      {
+	/* Use output operand as intermediate.  */
+	operands[4] = operands[0];
+
+	if (logical_operand (operands[2], <MODE>mode))
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+				  gen_rtx_XOR (<MODE>mode,
+					       operands[1], operands[2])));
+	else
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+				  gen_rtx_PLUS (<MODE>mode, operands[1],
+						negate_rtx (<MODE>mode,
+							    operands[2]))));
+      }
+    else
+      operands[4] = operands[1];
+
+    operands[5] = GEN_INT (exact_log2 (GET_MODE_BITSIZE (<MODE>mode)));
+  })
+
+(define_insn "*eqsi_power"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r,r")
+	(eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r")
+	       (match_operand:SI 2 "reg_or_cint_operand" "r,O,K,L,I")))
+   (clobber (match_scratch:SI 3 "=r,&r,r,r,r"))]
+  "TARGET_POWER"
+  "@
+   xor %0,%1,%2\;{sfi|subfic} %3,%0,0\;{ae|adde} %0,%3,%0
+   {sfi|subfic} %3,%1,0\;{ae|adde} %0,%3,%1
+   {xoril|xori} %0,%1,%b2\;{sfi|subfic} %3,%0,0\;{ae|adde} %0,%3,%0
+   {xoriu|xoris} %0,%1,%u2\;{sfi|subfic} %3,%0,0\;{ae|adde} %0,%3,%0
+   {sfi|subfic} %0,%1,%2\;{sfi|subfic} %3,%0,0\;{ae|adde} %0,%3,%0"
+  [(set_attr "type" "three,two,three,three,three")
+   (set_attr "length" "12,8,12,12,12")])
+
+;; We have insns of the form shown by the first define_insn below.  If
+;; there is something inside the comparison operation, we must split it.
+(define_split
+  [(set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (match_operator 1 "comparison_operator"
+				 [(match_operand:SI 2 "" "")
+				  (match_operand:SI 3
+						    "reg_or_cint_operand" "")])
+		 (match_operand:SI 4 "gpc_reg_operand" "")))
+   (clobber (match_operand:SI 5 "register_operand" ""))]
+  "! gpc_reg_operand (operands[2], SImode)"
+  [(set (match_dup 5) (match_dup 2))
+   (set (match_dup 2) (plus:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)])
+			       (match_dup 4)))])
+
+(define_insn "*plus_eqsi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r,&r")
+	(plus:SI (eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r")
+			(match_operand:SI 2 "scc_eq_operand" "r,O,K,L,I"))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r,r")))]
+  "TARGET_32BIT"
+  "@
+   xor %0,%1,%2\;{sfi|subfic} %0,%0,0\;{aze|addze} %0,%3
+   {sfi|subfic} %0,%1,0\;{aze|addze} %0,%3
+   {xoril|xori} %0,%1,%b2\;{sfi|subfic} %0,%0,0\;{aze|addze} %0,%3
+   {xoriu|xoris} %0,%1,%u2\;{sfi|subfic} %0,%0,0\;{aze|addze} %0,%3
+   {sfi|subfic} %0,%1,%2\;{sfi|subfic} %0,%0,0\;{aze|addze} %0,%3"
+  [(set_attr "type" "three,two,three,three,three")
+   (set_attr "length" "12,8,12,12,12")])
+
+(define_insn "*compare_plus_eqsi"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,x,x,x,?y,?y,?y,?y,?y")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r")
+		 (match_operand:SI 2 "scc_eq_operand" "r,O,K,L,I,r,O,K,L,I"))
+	  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r,r,r,r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r,&r,&r,&r,&r,&r,&r"))]
+  "TARGET_32BIT && optimize_size"
+  "@
+   xor %4,%1,%2\;{sfi|subfic} %4,%4,0\;{aze.|addze.} %4,%3
+   {sfi|subfic} %4,%1,0\;{aze.|addze.} %4,%3
+   {xoril|xori} %4,%1,%b2\;{sfi|subfic} %4,%4,0\;{aze.|addze.} %4,%3
+   {xoriu|xoris} %4,%1,%u2\;{sfi|subfic} %4,%4,0\;{aze.|addze.} %4,%3
+   {sfi|subfic} %4,%1,%2\;{sfi|subfic} %4,%4,0\;{aze.|addze.} %4,%3
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,8,12,12,12,16,12,16,16,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		 (match_operand:SI 2 "scc_eq_operand" ""))
+	  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && optimize_size && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (eq:SI (match_dup 1)
+		 (match_dup 2))
+	  (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_eqsi_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,x,x,x,?y,?y,?y,?y,?y")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r,r,r,r,r,r,r")
+		 (match_operand:SI 2 "scc_eq_operand" "r,O,K,L,I,r,O,K,L,I"))
+	  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r,r,r,r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r,&r,&r,&r,&r,&r,&r")
+	(plus:SI (eq:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && optimize_size"
+  "@
+   xor %0,%1,%2\;{sfi|subfic} %0,%0,0\;{aze.|addze.} %0,%3
+   {sfi|subfic} %0,%1,0\;{aze.|addze.} %0,%3
+   {xoril|xori} %0,%1,%b2\;{sfi|subfic} %0,%0,0\;{aze.|addze.} %0,%3
+   {xoriu|xoris} %0,%1,%u2\;{sfi|subfic} %0,%0,0\;{aze.|addze.} %0,%3
+   {sfi|subfic} %0,%1,%2\;{sfi|subfic} %0,%0,0\;{aze.|addze.} %0,%3
+   #
+   #
+   #
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,8,12,12,12,16,12,16,16,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI
+	  (eq:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		 (match_operand:SI 2 "scc_eq_operand" ""))
+	  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (eq:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && optimize_size && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (eq:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*neg_eq0<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (eq:P (match_operand:P 1 "gpc_reg_operand" "r")
+		     (const_int 0))))]
+  ""
+  "{ai|addic} %0,%1,-1\;{sfe|subfe} %0,%0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn_and_split "*neg_eq<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (eq:P (match_operand:P 1 "gpc_reg_operand" "%r")
+		     (match_operand:P 2 "scc_eq_operand" "<scc_eq_op2>"))))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (eq:P (match_dup 3) (const_int 0))))]
+  {
+    if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 0)
+      {
+	/* Use output operand as intermediate.  */
+	operands[3] = operands[0];
+
+	if (logical_operand (operands[2], <MODE>mode))
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_XOR (<MODE>mode,
+					       operands[1], operands[2])));
+	else
+	  emit_insn (gen_rtx_SET (VOIDmode, operands[3],
+				  gen_rtx_PLUS (<MODE>mode, operands[1],
+						negate_rtx (<MODE>mode,
+							    operands[2]))));
+      }
+    else
+      operands[3] = operands[1];
+  })
+
+;; Simplify (ne X (const_int 0)) on the PowerPC.  No need to on the Power,
+;; since it nabs/sr is just as fast.
+(define_insn "*ne0si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(lshiftrt:SI (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r")))
+		     (const_int 31)))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "! TARGET_POWER && TARGET_32BIT && !TARGET_ISEL"
+  "{ai|addic} %2,%1,-1\;{sfe|subfe} %0,%2,%1"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*ne0di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(lshiftrt:DI (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r")))
+		     (const_int 63)))
+   (clobber (match_scratch:DI 2 "=&r"))]
+  "TARGET_64BIT"
+  "addic %2,%1,-1\;subfe %0,%2,%1"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+;; This is what (plus (ne X (const_int 0)) Y) looks like.
+(define_insn "*plus_ne0si"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(plus:SI (lshiftrt:SI
+		  (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r")))
+		  (const_int 31))
+		 (match_operand:SI 2 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_32BIT"
+  "{ai|addic} %3,%1,-1\;{aze|addze} %0,%2"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*plus_ne0di"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+	(plus:DI (lshiftrt:DI
+		  (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r")))
+		  (const_int 63))
+		 (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  "TARGET_64BIT"
+  "addic %3,%1,-1\;addze %0,%2"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "*compare_plus_ne0si"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (lshiftrt:SI
+		   (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")))
+		   (const_int 31))
+		  (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=&r,&r"))
+   (clobber (match_scratch:SI 4 "=X,&r"))]
+  "TARGET_32BIT"
+  "@
+   {ai|addic} %3,%1,-1\;{aze.|addze.} %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (lshiftrt:SI
+		   (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "")))
+		   (const_int 31))
+		  (match_operand:SI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (plus:SI (lshiftrt:SI (neg:SI (abs:SI (match_dup 1)))
+					 (const_int 31))
+			    (match_dup 2)))
+              (clobber (match_dup 4))])
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*compare_plus_ne0di"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:DI (lshiftrt:DI
+		   (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")))
+		   (const_int 63))
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=&r,&r"))]
+  "TARGET_64BIT"
+  "@
+   addic %3,%1,-1\;addze. %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:DI (lshiftrt:DI
+		   (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+		   (const_int 63))
+		  (match_operand:DI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3)
+	(plus:DI (lshiftrt:DI (neg:DI (abs:DI (match_dup 1)))
+		   (const_int 63))
+		  (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_ne0si_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (lshiftrt:SI
+		   (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")))
+		   (const_int 31))
+		  (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(plus:SI (lshiftrt:SI (neg:SI (abs:SI (match_dup 1))) (const_int 31))
+		 (match_dup 2)))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {ai|addic} %3,%1,-1\;{aze.|addze.} %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (lshiftrt:SI
+		   (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "")))
+		   (const_int 31))
+		  (match_operand:SI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (lshiftrt:SI (neg:SI (abs:SI (match_dup 1))) (const_int 31))
+		 (match_dup 2)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(plus:SI (lshiftrt:SI (neg:SI (abs:SI (match_dup 1))) (const_int 31))
+		 (match_dup 2)))
+   (clobber (match_dup 3))])
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_ne0di_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:DI (lshiftrt:DI
+		   (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")))
+		   (const_int 63))
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(plus:DI (lshiftrt:DI (neg:DI (abs:DI (match_dup 1))) (const_int 63))
+		 (match_dup 2)))
+   (clobber (match_scratch:DI 3 "=&r,&r"))]
+  "TARGET_64BIT"
+  "@
+   addic %3,%1,-1\;addze. %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:DI (lshiftrt:DI
+		   (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "")))
+		   (const_int 63))
+		  (match_operand:DI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(plus:DI (lshiftrt:DI (neg:DI (abs:DI (match_dup 1))) (const_int 63))
+		 (match_dup 2)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(plus:DI (lshiftrt:DI (neg:DI (abs:DI (match_dup 1))) (const_int 63))
+		 (match_dup 2)))
+   (clobber (match_dup 3))])
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(le:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+	       (match_operand:SI 2 "reg_or_short_operand" "r,O")))
+   (clobber (match_scratch:SI 3 "=r,X"))]
+  "TARGET_POWER"
+  "@
+   doz %3,%2,%1\;{sfi|subfic} %0,%3,0\;{ae|adde} %0,%0,%3
+   {ai|addic} %0,%1,-1\;{aze|addze} %0,%0\;{sri|srwi} %0,%0,31"
+  [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (le:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+		(match_operand:SI 2 "reg_or_short_operand" "r,O,r,O"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(le:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 3 "=r,X,r,X"))]
+  "TARGET_POWER"
+  "@
+   doz %3,%2,%1\;{sfi|subfic} %0,%3,0\;{ae.|adde.} %0,%0,%3
+   {ai|addic} %0,%1,-1\;{aze|addze} %0,%0\;{sri.|srwi.} %0,%0,31
+   #
+   #"
+  [(set_attr "type" "compare,delayed_compare,compare,delayed_compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (le:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(le:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 0)
+	(le:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_dup 3))])
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (le:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "reg_or_short_operand" "r,O"))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r")))]
+  "TARGET_POWER"
+  "@
+   doz %0,%2,%1\;{sfi|subfic} %0,%0,0\;{aze|addze} %0,%3
+   {srai|srawi} %0,%1,31\;{sf|subfc} %0,%1,%0\;{aze|addze} %0,%3"
+  [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:SI (le:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,O,r,O"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r"))]
+  "TARGET_POWER"
+  "@
+   doz %4,%2,%1\;{sfi|subfic} %4,%4,0\;{aze.|addze.} %4,%3
+   {srai|srawi} %4,%1,31\;{sf|subfc} %4,%1,%4\;{aze.|addze.} %4,%3
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (le:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (le:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:SI (le:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,O,r,O"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:SI (le:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER"
+  "@
+   doz %0,%2,%1\;{sfi|subfic} %0,%0,0\;{aze.|addze.} %0,%3
+   {srai|srawi} %0,%1,31\;{sf|subfc} %0,%1,%0\;{aze.|addze.} %0,%3
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (le:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (le:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (le:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(neg:SI (le:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		       (match_operand:SI 2 "reg_or_short_operand" "r,O"))))]
+  "TARGET_POWER"
+  "@
+   doz %0,%2,%1\;{ai|addic} %0,%0,-1\;{sfe|subfe} %0,%0,%0
+   {ai|addic} %0,%1,-1\;{aze|addze} %0,%0\;{srai|srawi} %0,%0,31"
+  [(set_attr "length" "12")])
+
+(define_insn "*leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+	       (match_operand:P 2 "reg_or_short_operand" "rI")))]
+  ""
+  "{sf%I2|subf%I2c} %0,%1,%2\;{cal %0,0(0)|li %0,0}\;{ae|adde} %0,%0,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "*leu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (leu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		(match_operand:P 2 "reg_or_short_operand" "rI,rI"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(leu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   {sf%I2|subf%I2c} %0,%1,%2\;{cal %0,0(0)|li %0,0}\;{ae.|adde.} %0,%0,%0
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (leu:P (match_operand:P 1 "gpc_reg_operand" "")
+		(match_operand:P 2 "reg_or_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(leu:P (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(leu:P (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+	(plus:P (leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_short_operand" "rI"))
+		(match_operand:P 3 "gpc_reg_operand" "r")))]
+  ""
+  "{sf%I2|subf%I2c} %0,%1,%2\;{aze|addze} %0,%3"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {sf%I2|subf%I2c} %4,%1,%2\;{aze.|addze.} %4,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   {sf%I2|subf%I2c} %0,%1,%2\;{aze.|addze.} %0,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (leu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*neg_leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (match_operand:P 2 "reg_or_short_operand" "rI"))))]
+  ""
+  "{sf%I2|subf%I2c} %0,%1,%2\;{sfe|subfe} %0,%0,%0\;nand %0,%0,%0"
+   [(set_attr "type" "three")
+    (set_attr "length" "12")])
+
+(define_insn "*and_neg_leu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+	(and:P (neg:P
+		 (leu:P (match_operand:P 1 "gpc_reg_operand" "r")
+			(match_operand:P 2 "reg_or_short_operand" "rI")))
+		(match_operand:P 3 "gpc_reg_operand" "r")))]
+  ""
+  "{sf%I2|subf%I2c} %0,%1,%2\;{sfe|subfe} %0,%0,%0\;andc %0,%3,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {sf%I2|subf%I2c} %4,%1,%2\;{sfe|subfe} %4,%4,%4\;andc. %4,%3,%4
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2)))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			  (match_operand:SI 2 "reg_or_short_operand" "rI,rI")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   {sf%I2|subf%I2c} %0,%1,%2\;{sfe|subfe} %0,%0,%0\;andc. %0,%3,%0
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (leu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (neg:SI (leu:SI (match_dup 1) (match_dup 2)))
+		(match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(lt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+	       (match_operand:SI 2 "reg_or_short_operand" "rI")))]
+  "TARGET_POWER"
+  "doz%I2 %0,%1,%2\;nabs %0,%0\;{sri|srwi} %0,%0,31"
+   [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (lt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		(match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(lt:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWER"
+  "@
+   doz%I2 %0,%1,%2\;nabs %0,%0\;{sri.|srwi.} %0,%0,31
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (lt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(lt:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(lt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(plus:SI (lt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			(match_operand:SI 2 "reg_or_short_operand" "rI"))
+		 (match_operand:SI 3 "gpc_reg_operand" "r")))]
+  "TARGET_POWER"
+  "doz%I2 %0,%1,%2\;{ai|addic} %0,%0,-1\;{aze|addze} %0,%3"
+  [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (lt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_POWER"
+  "@
+   doz%I2 %4,%1,%2\;{ai|addic} %4,%4,-1\;{aze.|addze.} %4,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (lt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (lt:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (lt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (lt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER"
+  "@
+   doz%I2 %0,%1,%2\;{ai|addic} %0,%0,-1\;{aze.|addze.} %0,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (lt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (lt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (lt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(neg:SI (lt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "reg_or_short_operand" "rI"))))]
+  "TARGET_POWER"
+  "doz%I2 %0,%1,%2\;nabs %0,%0\;{srai|srawi} %0,%0,31"
+  [(set_attr "length" "12")])
+
+(define_insn_and_split "*ltu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+	       (match_operand:P 2 "reg_or_neg_short_operand" "r,P")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:P (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*ltu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+		(match_operand:P 2 "reg_or_neg_short_operand" "r,P,r,P"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r,r")
+	(ltu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (neg:P (match_dup 0)) (const_int 0)))
+	      (set (match_dup 0) (neg:P (match_dup 0)))])]
+  "")
+
+(define_insn_and_split "*plus_ltu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r,r")
+	(plus:P (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		       (match_operand:P 2 "reg_or_neg_short_operand" "r,P"))
+		(match_operand:P 3 "reg_or_short_operand" "rI,rI")))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*plus_ltu<mode>_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:P (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+			(match_operand:P 2 "reg_or_neg_short_operand" "r,P,r,P"))
+		 (match_operand:P 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:P (ltu:P (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (ltu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 4)
+		   (compare:CC (minus:P (match_dup 3) (match_dup 0))
+			       (const_int 0)))
+	      (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))])]
+  "")
+
+(define_insn "*neg_ltu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(neg:P (ltu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		      (match_operand:P 2 "reg_or_neg_short_operand" "r,P"))))]
+  ""
+  "@
+   {sf|subfc} %0,%2,%1\;{sfe|subfe} %0,%0,%0
+   {ai|addic} %0,%1,%n2\;{sfe|subfe} %0,%0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(ge:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+	       (match_operand:SI 2 "reg_or_short_operand" "rI")))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_POWER"
+  "doz%I2 %3,%1,%2\;{sfi|subfic} %0,%3,0\;{ae|adde} %0,%0,%3"
+   [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (ge:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		(match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(ge:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "TARGET_POWER"
+  "@
+   doz%I2 %3,%1,%2\;{sfi|subfic} %0,%3,0\;{ae.|adde.} %0,%0,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (ge:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(ge:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_POWER && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (ge:SI (match_dup 1) (match_dup 2)))
+	      (clobber (match_dup 3))])
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(plus:SI (ge:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			(match_operand:SI 2 "reg_or_short_operand" "rI"))
+		 (match_operand:SI 3 "gpc_reg_operand" "r")))]
+  "TARGET_POWER"
+  "doz%I2 %0,%1,%2\;{sfi|subfic} %0,%0,0\;{aze|addze} %0,%3"
+  [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (ge:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_POWER"
+  "@
+   doz%I2 %4,%1,%2\;{sfi|subfic} %4,%4,0\;{aze.|addze.} %4,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (ge:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (ge:SI (match_dup 1) (match_dup 2))
+		 (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (ge:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "rI,rI"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (ge:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER"
+  "@
+   doz%I2 %0,%1,%2\;{sfi|subfic} %0,%0,0\;{aze.|addze.} %0,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (ge:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (ge:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (ge:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(neg:SI (ge:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "reg_or_short_operand" "rI"))))]
+  "TARGET_POWER"
+  "doz%I2 %0,%1,%2\;{ai|addic} %0,%0,-1\;{sfe|subfe} %0,%0,%0"
+  [(set_attr "length" "12")])
+
+(define_insn "*geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+	       (match_operand:P 2 "reg_or_neg_short_operand" "r,P")))]
+  ""
+  "@
+   {sf|subfc} %0,%2,%1\;{cal %0,0(0)|li %0,0}\;{ae|adde} %0,%0,%0
+   {ai|addic} %0,%1,%n2\;{cal %0,0(0)|li %0,0}\;{ae|adde} %0,%0,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "*geu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+		(match_operand:P 2 "reg_or_neg_short_operand" "r,P,r,P"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r,r,r")
+	(geu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+   {sf|subfc} %0,%2,%1\;{cal %0,0(0)|li %0,0}\;{ae.|adde.} %0,%0,%0
+   {ai|addic} %0,%1,%n2\;{cal %0,0(0)|li %0,0}\;{ae.|adde.} %0,%0,%0
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (geu:P (match_operand:P 1 "gpc_reg_operand" "")
+		(match_operand:P 2 "reg_or_neg_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(geu:P (match_dup 1) (match_dup 2)))]
+  "reload_completed"
+  [(set (match_dup 0)
+	(geu:P (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r,&r")
+	(plus:P (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		       (match_operand:P 2 "reg_or_neg_short_operand" "r,P"))
+		(match_operand:P 3 "gpc_reg_operand" "r,r")))]
+  ""
+  "@
+   {sf|subfc} %0,%2,%1\;{aze|addze} %0,%3
+   {ai|addic} %0,%1,%n2\;{aze|addze} %0,%3"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {sf|subfc} %4,%2,%1\;{aze.|addze.} %4,%3
+   {ai|addic} %4,%1,%n2\;{aze.|addze.} %4,%3
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,8,12,12")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2))
+		  (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   {sf|subfc} %0,%2,%1\;{aze.|addze.} %0,%3
+   {ai|addic} %0,%1,%n2\;{aze.|addze.} %0,%3
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "8,8,12,12")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (geu:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*neg_geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(neg:P (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		      (match_operand:P 2 "reg_or_short_operand" "r,I"))))]
+  ""
+  "@
+   {sf|subfc} %0,%2,%1\;{sfe|subfe} %0,%0,%0\;nand %0,%0,%0
+   {sfi|subfic} %0,%1,-1\;{a%I2|add%I2c} %0,%0,%2\;{sfe|subfe} %0,%0,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn "*and_neg_geu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r,&r")
+	(and:P (neg:P
+		 (geu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+			(match_operand:P 2 "reg_or_neg_short_operand" "r,P")))
+		(match_operand:P 3 "gpc_reg_operand" "r,r")))]
+  ""
+  "@
+   {sf|subfc} %0,%2,%1\;{sfe|subfe} %0,%0,%0\;andc %0,%3,%0
+   {ai|addic} %0,%1,%n2\;{sfe|subfe} %0,%0,%0\;andc %0,%3,%0"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r,&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {sf|subfc} %4,%2,%1\;{sfe|subfe} %4,%4,%4\;andc. %4,%3,%4
+   {ai|addic} %4,%1,%n2\;{sfe|subfe} %4,%4,%4\;andc. %4,%3,%4
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 4)
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2)))
+		(match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "r,P,r,P")))
+		 (match_operand:SI 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT"
+  "@
+   {sf|subfc} %0,%2,%1\;{sfe|subfe} %0,%0,%0\;andc. %0,%3,%0
+   {ai|addic} %0,%1,%n2\;{sfe|subfe} %0,%0,%0\;andc. %0,%3,%0
+   #
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,12,16,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (and:SI (neg:SI
+		  (geu:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			  (match_operand:SI 2 "reg_or_neg_short_operand" "")))
+		 (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(and:SI (neg:SI (geu:SI (match_dup 1) (match_dup 2))) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(gt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+	       (match_operand:SI 2 "reg_or_short_operand" "r")))]
+  "TARGET_POWER"
+  "doz %0,%2,%1\;nabs %0,%0\;{sri|srwi} %0,%0,31"
+  [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		(match_operand:SI 2 "reg_or_short_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(gt:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWER"
+  "@
+   doz %0,%2,%1\;nabs %0,%0\;{sri.|srwi.} %0,%0,31
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+		(match_operand:SI 2 "reg_or_short_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(gt:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(gt:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*plus_gt0<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+	(plus:P (gt:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (const_int 0))
+		 (match_operand:P 2 "gpc_reg_operand" "r")))]
+  ""
+  "{a|addc} %0,%1,%1\;{sfe|subfe} %0,%1,%0\;{aze|addze} %0,%2"
+  [(set_attr "type" "three")
+   (set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=&r,&r"))]
+  "TARGET_32BIT"
+  "@
+   {a|addc} %3,%1,%1\;{sfe|subfe} %3,%1,%3\;{aze.|addze.} %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 3)
+	(plus:SI (gt:SI (match_dup 1) (const_int 0))
+		  (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 "=&r,&r"))]
+  "TARGET_64BIT"
+  "@
+   addc %3,%1,%1\;subfe %3,%1,%3\;addze. %3,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 3)
+	(plus:DI (gt:DI (match_dup 1) (const_int 0))
+		 (match_dup 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (gt:SI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_32BIT"
+  "@
+   {a|addc} %0,%1,%1\;{sfe|subfe} %0,%1,%0\;{aze.|addze.} %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:SI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (gt:SI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_32BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (gt:SI (match_dup 1) (const_int 0)) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:DI (gt:DI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_64BIT"
+  "@
+   addc %0,%1,%1\;subfe %0,%1,%0\;addze. %0,%2
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (plus:DI (gt:DI (match_operand:DI 1 "gpc_reg_operand" "")
+			 (const_int 0))
+		  (match_operand:DI 2 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(plus:DI (gt:DI (match_dup 1) (const_int 0)) (match_dup 2)))]
+  "TARGET_64BIT && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (gt:DI (match_dup 1) (const_int 0)) (match_dup 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+			(match_operand:SI 2 "reg_or_short_operand" "r"))
+		 (match_operand:SI 3 "gpc_reg_operand" "r")))]
+  "TARGET_POWER"
+  "doz %0,%2,%1\;{ai|addic} %0,%0,-1\;{aze|addze} %0,%3"
+  [(set_attr "length" "12")])
+
+(define_insn ""
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,r"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 "=&r,&r"))]
+  "TARGET_POWER"
+  "@
+   doz %4,%2,%1\;{ai|addic} %4,%4,-1\;{aze.|addze.} %4,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 4)
+	(plus:SI (gt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 4)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			 (match_operand:SI 2 "reg_or_short_operand" "r,r"))
+		  (match_operand:SI 3 "gpc_reg_operand" "r,r"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=&r,&r")
+	(plus:SI (gt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER"
+  "@
+   doz %0,%2,%1\;{ai|addic} %0,%0,-1\;{aze.|addze.} %0,%3
+   #"
+  [(set_attr "type" "compare")
+   (set_attr "length" "12,16")])
+
+(define_split
+  [(set (match_operand:CC 4 "cc_reg_not_cr0_operand" "")
+	(compare:CC
+	 (plus:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			 (match_operand:SI 2 "reg_or_short_operand" ""))
+		  (match_operand:SI 3 "gpc_reg_operand" ""))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(plus:SI (gt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "TARGET_POWER && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (gt:SI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 4)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(neg:SI (gt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "reg_or_short_operand" "r"))))]
+  "TARGET_POWER"
+  "doz %0,%2,%1\;nabs %0,%0\;{srai|srawi} %0,%0,31"
+  [(set_attr "length" "12")])
+
+(define_insn_and_split "*gtu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(gtu:P (match_operand:P 1 "gpc_reg_operand" "r")
+	       (match_operand:P 2 "reg_or_short_operand" "rI")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (neg:P (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*gtu<mode>_compare"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (gtu:P (match_operand:P 1 "gpc_reg_operand" "r,r")
+		 (match_operand:P 2 "reg_or_short_operand" "rI,rI"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=r,r")
+	(gtu:P (match_dup 1) (match_dup 2)))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 3)
+		   (compare:CC (neg:P (match_dup 0)) (const_int 0)))
+	      (set (match_dup 0) (neg:P (match_dup 0)))])]
+  "")
+
+(define_insn_and_split "*plus_gtu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=&r")
+        (plus:P (gtu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_short_operand" "rI"))
+		(match_operand:P 3 "reg_or_short_operand" "rI")))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))]
+  "")
+
+(define_insn_and_split "*plus_gtu<mode>_compare"
+  [(set (match_operand:CC 4 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC
+	 (plus:P (gtu:P (match_operand:P 1 "gpc_reg_operand" "r,r,r,r")
+			(match_operand:P 2 "reg_or_short_operand" "I,r,I,r"))
+		 (match_operand:P 3 "gpc_reg_operand" "r,r,r,r"))
+	 (const_int 0)))
+   (set (match_operand:P 0 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(plus:P (gtu:P (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "#"
+  "&& !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(set (match_dup 0) (neg:P (gtu:P (match_dup 1) (match_dup 2))))
+   (parallel [(set (match_dup 4)
+		   (compare:CC (minus:P (match_dup 3) (match_dup 0))
+			       (const_int 0)))
+	      (set (match_dup 0) (minus:P (match_dup 3) (match_dup 0)))])]
+  "")
+
+(define_insn "*neg_gtu<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(neg:P (gtu:P (match_operand:P 1 "gpc_reg_operand" "r")
+		      (match_operand:P 2 "reg_or_short_operand" "rI"))))]
+  ""
+  "{sf%I2|subf%I2c} %0,%1,%2\;{sfe|subfe} %0,%0,%0"
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+
+;; Define both directions of branch and return.  If we need a reload
+;; register, we'd rather use CR0 since it is much easier to copy a
+;; register CC value to there.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  return output_cbranch (operands[1], \"%l0\", 0, insn);
+}"
+  [(set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (return)
+		      (pc)))]
+  "direct_return ()"
+  "*
+{
+  return output_cbranch (operands[0], NULL, 0, insn);
+}"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  return output_cbranch (operands[1], \"%l0\", 1, insn);
+}"
+  [(set_attr "type" "branch")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "cc_reg_operand" "y")
+				       (const_int 0)])
+		      (pc)
+		      (return)))]
+  "direct_return ()"
+  "*
+{
+  return output_cbranch (operands[0], NULL, 1, insn);
+}"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "4")])
+
+;; Logic on condition register values.
+
+; This pattern matches things like
+; (set (reg:CCEQ 68) (compare:CCEQ (ior:SI (gt:SI (reg:CCFP 68) (const_int 0))
+;					   (eq:SI (reg:CCFP 68) (const_int 0)))
+;				   (const_int 1)))
+; which are generated by the branch logic.
+; Prefer destructive operations where BT = BB (for crXX BT,BA,BB)
+
+(define_insn "*cceq_ior_compare"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y")
+        (compare:CCEQ (match_operator:SI 1 "boolean_operator"
+	                [(match_operator:SI 2
+				      "branch_positive_comparison_operator"
+				      [(match_operand 3
+						      "cc_reg_operand" "y,y")
+				       (const_int 0)])
+	                 (match_operator:SI 4
+				      "branch_positive_comparison_operator"
+				      [(match_operand 5
+						      "cc_reg_operand" "0,y")
+				       (const_int 0)])])
+		      (const_int 1)))]
+  ""
+  "cr%q1 %E0,%j2,%j4"
+  [(set_attr "type" "cr_logical,delayed_cr")])
+
+; Why is the constant -1 here, but 1 in the previous pattern?
+; Because ~1 has all but the low bit set.
+(define_insn ""
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y")
+        (compare:CCEQ (match_operator:SI 1 "boolean_or_operator"
+	                [(not:SI (match_operator:SI 2
+				      "branch_positive_comparison_operator"
+				      [(match_operand 3
+						      "cc_reg_operand" "y,y")
+				       (const_int 0)]))
+	                 (match_operator:SI 4
+				"branch_positive_comparison_operator"
+				[(match_operand 5
+						"cc_reg_operand" "0,y")
+				 (const_int 0)])])
+		      (const_int -1)))]
+  ""
+  "cr%q1 %E0,%j2,%j4"
+  [(set_attr "type" "cr_logical,delayed_cr")])
+
+(define_insn "*cceq_rev_compare"
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y,?y")
+	(compare:CCEQ (match_operator:SI 1
+				      "branch_positive_comparison_operator"
+				      [(match_operand 2
+						      "cc_reg_operand" "0,y")
+				       (const_int 0)])
+		      (const_int 0)))]
+  ""
+  "{crnor %E0,%j1,%j1|crnot %E0,%j1}"
+  [(set_attr "type" "cr_logical,delayed_cr")])
+
+;; If we are comparing the result of two comparisons, this can be done
+;; using creqv or crxor.
+
+(define_insn_and_split ""
+  [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y")
+	(compare:CCEQ (match_operator 1 "branch_comparison_operator"
+			      [(match_operand 2 "cc_reg_operand" "y")
+			       (const_int 0)])
+		      (match_operator 3 "branch_comparison_operator"
+			      [(match_operand 4 "cc_reg_operand" "y")
+			       (const_int 0)])))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (compare:CCEQ (xor:SI (match_dup 1) (match_dup 3))
+				    (match_dup 5)))]
+  "
+{
+  int positive_1, positive_2;
+
+  positive_1 = branch_positive_comparison_operator (operands[1],
+						    GET_MODE (operands[1]));
+  positive_2 = branch_positive_comparison_operator (operands[3],
+						    GET_MODE (operands[3]));
+
+  if (! positive_1)
+    operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[2]),
+							    GET_CODE (operands[1])),
+				  SImode,
+				  operands[2], const0_rtx);
+  else if (GET_MODE (operands[1]) != SImode)
+    operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode,
+				  operands[2], const0_rtx);
+
+  if (! positive_2)
+    operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[4]),
+							    GET_CODE (operands[3])),
+				  SImode,
+				  operands[4], const0_rtx);
+  else if (GET_MODE (operands[3]) != SImode)
+    operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				  operands[4], const0_rtx);
+
+  if (positive_1 == positive_2)
+    {
+      operands[1] = gen_rtx_NOT (SImode, operands[1]);
+      operands[5] = constm1_rtx;
+    }
+  else
+    {
+      operands[5] = const1_rtx;
+    }
+}")
+
+;; Unconditional branch and return.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "b %l0"
+  [(set_attr "type" "branch")])
+
+(define_insn "return"
+  [(return)]
+  "direct_return ()"
+  "{br|blr}"
+  [(set_attr "type" "jmpreg")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" ""))])
+
+(define_insn "*indirect_jump<mode>"
+  [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))]
+  ""
+  "@
+   bctr
+   {br|blr}"
+  [(set_attr "type" "jmpreg")])
+
+;; Table jump for switch statements:
+(define_expand "tablejump"
+  [(use (match_operand 0 "" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "
+{
+  if (TARGET_32BIT)
+    emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
+  else
+    emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_expand "tablejumpsi"
+  [(set (match_dup 3)
+	(plus:SI (match_operand:SI 0 "" "")
+		 (match_dup 2)))
+   (parallel [(set (pc) (match_dup 3))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "TARGET_32BIT"
+  "
+{ operands[0] = force_reg (SImode, operands[0]);
+  operands[2] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1]));
+  operands[3] = gen_reg_rtx (SImode);
+}")
+
+(define_expand "tablejumpdi"
+  [(set (match_dup 4)
+        (sign_extend:DI (match_operand:SI 0 "lwa_operand" "")))
+   (set (match_dup 3)
+	(plus:DI (match_dup 4)
+		 (match_dup 2)))
+   (parallel [(set (pc) (match_dup 3))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  "TARGET_64BIT"
+  "
+{ operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1]));
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "*tablejump<mode>_internal1"
+  [(set (pc)
+	(match_operand:P 0 "register_operand" "c,*l"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "@
+   bctr
+   {br|blr}"
+  [(set_attr "type" "jmpreg")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "{cror 0,0,0|nop}")
+
+;; Define the subtract-one-and-jump insns, starting with the template
+;; so loop.c knows what to generate.
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))	; iterations; zero if unknown
+   (use (match_operand 2 "" ""))	; max iterations
+   (use (match_operand 3 "" ""))	; loop level
+   (use (match_operand 4 "" ""))]	; label
+  ""
+  "
+{
+  /* Only use this on innermost loops.  */
+  if (INTVAL (operands[3]) > 1)
+    FAIL;
+  if (TARGET_64BIT)
+    {
+      if (GET_MODE (operands[0]) != DImode)
+	FAIL;
+      emit_jump_insn (gen_ctrdi (operands[0], operands[4]));
+    }
+  else
+    {
+      if (GET_MODE (operands[0]) != SImode)
+	FAIL;
+      emit_jump_insn (gen_ctrsi (operands[0], operands[4]));
+    }
+  DONE;
+}")
+
+(define_expand "ctr<mode>"
+  [(parallel [(set (pc)
+		   (if_then_else (ne (match_operand:P 0 "register_operand" "")
+				     (const_int 1))
+				 (label_ref (match_operand 1 "" ""))
+				 (pc)))
+	      (set (match_dup 0)
+		   (plus:P (match_dup 0)
+			    (const_int -1)))
+	      (clobber (match_scratch:CC 2 ""))
+	      (clobber (match_scratch:P 3 ""))])]
+  ""
+  "")
+
+;; We need to be able to do this for any operand, including MEM, or we
+;; will cause reload to blow up since we don't allow output reloads on
+;; JUMP_INSNs.
+;; For the length attribute to be calculated correctly, the
+;; label MUST be operand 0.
+
+(define_insn "*ctr<mode>_internal1"
+  [(set (pc)
+	(if_then_else (ne (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*q*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"{bdn|bdnz} %l0\";
+  else
+    return \"bdz $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+(define_insn "*ctr<mode>_internal2"
+  [(set (pc)
+	(if_then_else (ne (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*q*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"bdz %l0\";
+  else
+    return \"{bdn|bdnz} $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+;; Similar but use EQ
+
+(define_insn "*ctr<mode>_internal5"
+  [(set (pc)
+	(if_then_else (eq (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*q*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"bdz %l0\";
+  else
+    return \"{bdn|bdnz} $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+(define_insn "*ctr<mode>_internal6"
+  [(set (pc)
+	(if_then_else (eq (match_operand:P 1 "register_operand" "c,*r,*r,*r")
+			  (const_int 1))
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))
+   (set (match_operand:P 2 "nonimmediate_operand" "=1,*r,m,*q*c*l")
+	(plus:P (match_dup 1)
+		 (const_int -1)))
+   (clobber (match_scratch:CC 3 "=X,&x,&x,&x"))
+   (clobber (match_scratch:P 4 "=X,X,&r,r"))]
+  ""
+  "*
+{
+  if (which_alternative != 0)
+    return \"#\";
+  else if (get_attr_length (insn) == 4)
+    return \"{bdn|bdnz} %l0\";
+  else
+    return \"bdz $+8\;b %l0\";
+}"
+  [(set_attr "type" "branch")
+   (set_attr "length" "*,12,16,16")])
+
+;; Now the splitters if we could not allocate the CTR register
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 2 "comparison_operator"
+				      [(match_operand:P 1 "gpc_reg_operand" "")
+				       (const_int 1)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))
+   (set (match_operand:P 0 "gpc_reg_operand" "")
+	(plus:P (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:CC 3 ""))
+   (clobber (match_scratch:P 4 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (compare:CC (plus:P (match_dup 1)
+					(const_int -1))
+			       (const_int 0)))
+	      (set (match_dup 0)
+		   (plus:P (match_dup 1)
+			    (const_int -1)))])
+   (set (pc) (if_then_else (match_dup 7)
+			   (match_dup 5)
+			   (match_dup 6)))]
+  "
+{ operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[2]), VOIDmode,
+				operands[3], const0_rtx); }")
+
+(define_split
+  [(set (pc)
+	(if_then_else (match_operator 2 "comparison_operator"
+				      [(match_operand:P 1 "gpc_reg_operand" "")
+				       (const_int 1)])
+		      (match_operand 5 "" "")
+		      (match_operand 6 "" "")))
+   (set (match_operand:P 0 "nonimmediate_operand" "")
+	(plus:P (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:CC 3 ""))
+   (clobber (match_scratch:P 4 ""))]
+  "reload_completed && ! gpc_reg_operand (operands[0], SImode)"
+  [(parallel [(set (match_dup 3)
+		   (compare:CC (plus:P (match_dup 1)
+					(const_int -1))
+			       (const_int 0)))
+	      (set (match_dup 4)
+		   (plus:P (match_dup 1)
+			    (const_int -1)))])
+   (set (match_dup 0)
+	(match_dup 4))
+   (set (pc) (if_then_else (match_dup 7)
+			   (match_dup 5)
+			   (match_dup 6)))]
+  "
+{ operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[2]), VOIDmode,
+				operands[3], const0_rtx); }")
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "{t 31,0,0|trap}"
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+			    [(match_operand:GPR 1 "register_operand")
+			     (match_operand:GPR 2 "reg_or_short_operand")])
+	    (match_operand 3 "zero_constant" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(trap_if (match_operator 0 "ordered_comparison_operator"
+                            [(match_operand:GPR 1 "register_operand" "r")
+                             (match_operand:GPR 2 "reg_or_short_operand" "rI")])
+	    (const_int 0))]
+  ""
+  "{t|t<wd>}%V0%I2 %1,%2"
+  [(set_attr "type" "trap")])
+
+;; Insns related to generating the function prologue and epilogue.
+
+(define_expand "prologue"
+  [(use (const_int 0))]
+  "TARGET_SCHED_PROLOG"
+  "
+{
+      rs6000_emit_prologue ();
+      DONE;
+}")
+
+(define_insn "*movesi_from_cr_one"
+  [(match_parallel 0 "mfcr_operation"
+		   [(set (match_operand:SI 1 "gpc_reg_operand" "=r")
+			 (unspec:SI [(match_operand:CC 2 "cc_reg_operand" "y")
+				     (match_operand 3 "immediate_operand" "n")]
+			  UNSPEC_MOVESI_FROM_CR))])]
+  "TARGET_MFCRF"
+  "*
+{
+  int mask = 0;
+  int i;
+  for (i = 0; i < XVECLEN (operands[0], 0); i++)
+  {
+    mask = INTVAL (XVECEXP (SET_SRC (XVECEXP (operands[0], 0, i)), 0, 1));
+    operands[4] = GEN_INT (mask);
+    output_asm_insn (\"mfcr %1,%4\", operands);
+  }
+  return \"\";
+}"
+  [(set_attr "type" "mfcrf")])
+
+(define_insn "movesi_from_cr"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (unspec:SI [(reg:CC CR0_REGNO) (reg:CC CR1_REGNO)
+		    (reg:CC CR2_REGNO) (reg:CC CR3_REGNO)
+		    (reg:CC CR4_REGNO) (reg:CC CR5_REGNO)
+		    (reg:CC CR6_REGNO) (reg:CC CR7_REGNO)]
+		   UNSPEC_MOVESI_FROM_CR))]
+  ""
+  "mfcr %0"
+  [(set_attr "type" "mfcr")])
+
+(define_insn "*stmw"
+  [(match_parallel 0 "stmw_operation"
+		   [(set (match_operand:SI 1 "memory_operand" "=m")
+       			 (match_operand:SI 2 "gpc_reg_operand" "r"))])]
+  "TARGET_MULTIPLE"
+  "{stm|stmw} %2,%1"
+  [(set_attr "type" "store_ux")])
+
+; The following comment applies to:
+;     save_gpregs_*
+;     save_fpregs_*
+;     restore_gpregs*
+;     return_and_restore_gpregs*
+;     return_and_restore_fpregs*
+;     return_and_restore_fpregs_aix*
+;
+; The out-of-line save / restore functions expects one input argument.
+; Since those are not standard call_insn's, we must avoid using
+; MATCH_OPERAND for that argument. That way the register rename
+; optimization will not try to rename this register.
+; Each pattern is repeated for each possible register number used in 
+; various ABIs (r11, r1, and for some functions r12)
+
+(define_insn "*save_gpregs_<mode>_r11"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 11))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_gpregs_<mode>_r12"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 12))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_gpregs_<mode>_r1"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 1))
+		    (set (match_operand:P 2 "memory_operand" "=m")
+			 (match_operand:P 3 "gpc_reg_operand" "r"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_fpregs_<mode>_r11"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 11))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "d"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_fpregs_<mode>_r12"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 12))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "d"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*save_fpregs_<mode>_r1"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+                    (use (reg:P 1))
+		    (set (match_operand:DF 2 "memory_operand" "=m")
+			 (match_operand:DF 3 "gpc_reg_operand" "d"))])]
+  ""
+  "bl %1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+; These are to explain that changes to the stack pointer should
+; not be moved over stores to stack memory.
+(define_insn "stack_tie"
+  [(set (match_operand:BLK 0 "memory_operand" "+m")
+        (unspec:BLK [(match_dup 0)] UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+; Like stack_tie, but depend on both fp and sp based memory.
+(define_insn "frame_tie"
+  [(set (match_operand:BLK 0 "memory_operand" "+m")
+	(unspec:BLK [(match_dup 0)
+		     (match_operand:BLK 1 "memory_operand" "m")] UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+
+(define_expand "epilogue"
+  [(use (const_int 0))]
+  "TARGET_SCHED_PROLOG"
+  "
+{
+      rs6000_emit_epilogue (FALSE);
+      DONE;
+}")
+
+; On some processors, doing the mtcrf one CC register at a time is
+; faster (like on the 604e).  On others, doing them all at once is
+; faster; for instance, on the 601 and 750.
+
+(define_expand "movsi_to_cr_one"
+  [(set (match_operand:CC 0 "cc_reg_operand" "")
+        (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "")
+		    (match_dup 2)] UNSPEC_MOVESI_TO_CR))]
+  ""
+  "operands[2] = GEN_INT (1 << (75 - REGNO (operands[0])));")
+
+(define_insn "*movsi_to_cr"
+  [(match_parallel 0 "mtcrf_operation"
+		   [(set (match_operand:CC 1 "cc_reg_operand" "=y")
+			 (unspec:CC [(match_operand:SI 2 "gpc_reg_operand" "r")
+				     (match_operand 3 "immediate_operand" "n")]
+				    UNSPEC_MOVESI_TO_CR))])]
+ ""
+ "*
+{
+  int mask = 0;
+  int i;
+  for (i = 0; i < XVECLEN (operands[0], 0); i++)
+    mask |= INTVAL (XVECEXP (SET_SRC (XVECEXP (operands[0], 0, i)), 0, 1));
+  operands[4] = GEN_INT (mask);
+  return \"mtcrf %4,%2\";
+}"
+  [(set_attr "type" "mtcr")])
+
+(define_insn "*mtcrfsi"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r")
+		    (match_operand 2 "immediate_operand" "n")]
+		   UNSPEC_MOVESI_TO_CR))]
+  "GET_CODE (operands[0]) == REG
+   && CR_REGNO_P (REGNO (operands[0]))
+   && GET_CODE (operands[2]) == CONST_INT
+   && INTVAL (operands[2]) == 1 << (75 - REGNO (operands[0]))"
+  "mtcrf %R0,%1"
+  [(set_attr "type" "mtcr")])
+
+; The load-multiple instructions have similar properties.
+; Note that "load_multiple" is a name known to the machine-independent
+; code that actually corresponds to the PowerPC load-string.
+
+(define_insn "*lmw"
+  [(match_parallel 0 "lmw_operation"
+		   [(set (match_operand:SI 1 "gpc_reg_operand" "=r")
+       			 (match_operand:SI 2 "memory_operand" "m"))])]
+  "TARGET_MULTIPLE"
+  "{lm|lmw} %1,%2"
+  [(set_attr "type" "load_ux")
+   (set_attr "cell_micro" "always")])
+
+(define_insn "*return_internal_<mode>"
+  [(return)
+   (use (match_operand:P 0 "register_operand" "lc"))]
+  ""
+  "b%T0"
+  [(set_attr "type" "jmpreg")])
+
+; FIXME: This would probably be somewhat simpler if the Cygnus sibcall
+; stuff was in GCC.  Oh, and "any_parallel_operand" is a bit flexible...
+
+; The following comment applies to:
+;     save_gpregs_*
+;     save_fpregs_*
+;     restore_gpregs*
+;     return_and_restore_gpregs*
+;     return_and_restore_fpregs*
+;     return_and_restore_fpregs_aix*
+;
+; The out-of-line save / restore functions expects one input argument.
+; Since those are not standard call_insn's, we must avoid using
+; MATCH_OPERAND for that argument. That way the register rename
+; optimization will not try to rename this register.
+; Each pattern is repeated for each possible register number used in 
+; various ABIs (r11, r1, and for some functions r12)
+
+(define_insn "*restore_gpregs_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(clobber (match_operand:P 1 "register_operand" "=l"))
+                   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 11))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "bl %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*restore_gpregs_<mode>_r12"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(clobber (match_operand:P 1 "register_operand" "=l"))
+                   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 12))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "bl %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*restore_gpregs_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(clobber (match_operand:P 1 "register_operand" "=l"))
+                   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 1))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "bl %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 11))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_<mode>_r12"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 12))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 1))
+		   (set (match_operand:P 3 "gpc_reg_operand" "=r")
+			(match_operand:P 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 11))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_<mode>_r12"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 12))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+                  [(return)
+		   (clobber (match_operand:P 1 "register_operand" "=l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+                   (use (reg:P 1))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_aix_<mode>_r11"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(return)
+		   (use (match_operand:P 1 "register_operand" "l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+		   (use (reg:P 11))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_fpregs_aix_<mode>_r1"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(return)
+		   (use (match_operand:P 1 "register_operand" "l"))
+		   (use (match_operand:P 2 "symbol_ref_operand" "s"))
+		   (use (reg:P 1))
+		   (set (match_operand:DF 3 "gpc_reg_operand" "=d")
+			(match_operand:DF 4 "memory_operand" "m"))])]
+ ""
+ "b %2"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand" ""))]
+  ""
+  "
+{
+  if (TARGET_32BIT)
+    emit_insn (gen_eh_set_lr_si (operands[0]));
+  else
+    emit_insn (gen_eh_set_lr_di (operands[0]));
+  DONE;
+}")
+
+; We can't expand this before we know where the link register is stored.
+(define_insn "eh_set_lr_<mode>"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+  		    UNSPECV_EH_RR)
+   (clobber (match_scratch:P 1 "=&b"))]
+  ""
+  "#")
+
+(define_split
+  [(unspec_volatile [(match_operand 0 "register_operand" "")] UNSPECV_EH_RR)
+   (clobber (match_scratch 1 ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rs6000_emit_eh_reg_restore (operands[0], operands[1]);
+  DONE;
+}")
+
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "indexed_or_indirect_address" "a")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_POWERPC"
+  "*
+{
+  if (GET_CODE (operands[0]) == REG)
+    return INTVAL (operands[1]) ? \"dcbtst 0,%0\" : \"dcbt 0,%0\";
+  return INTVAL (operands[1]) ? \"dcbtst %a0\" : \"dcbt %a0\";
+}"
+  [(set_attr "type" "load")])
+
+(define_insn "bpermd_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+	(unspec:P [(match_operand:P 1 "gpc_reg_operand" "r")
+		   (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
+  "TARGET_POPCNTD"
+  "bpermd %0,%1,%2"
+  [(set_attr "type" "integer")])
+
+
+;; Builtin fma support.  Handle 
+;; Note that the conditions for expansion are in the FMA_F iterator.
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(fma:FMA_F
+	  (match_operand:FMA_F 1 "register_operand" "")
+	  (match_operand:FMA_F 2 "register_operand" "")
+	  (match_operand:FMA_F 3 "register_operand" "")))]
+  ""
+  "")
+
+; Altivec only has fma and nfms.
+(define_expand "fms<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(fma:FMA_F
+	  (match_operand:FMA_F 1 "register_operand" "")
+	  (match_operand:FMA_F 2 "register_operand" "")
+	  (neg:FMA_F (match_operand:FMA_F 3 "register_operand" ""))))]
+  "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
+(define_expand "fnma<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_expand "fnms<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (match_operand:FMA_F 3 "register_operand" ""))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode) && !VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+; Not an official optab name, but used from builtins.
+(define_expand "nfma<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (match_operand:FMA_F 3 "register_operand" ""))))]
+  "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+; Not an official optab name, but used from builtins.
+(define_expand "nfms<mode>4"
+  [(set (match_operand:FMA_F 0 "register_operand" "")
+	(neg:FMA_F
+	  (fma:FMA_F
+	    (match_operand:FMA_F 1 "register_operand" "")
+	    (match_operand:FMA_F 2 "register_operand" "")
+	    (neg:FMA_F (match_operand:FMA_F 3 "register_operand" "")))))]
+  ""
+  "")
+
+
+
+(include "sync.md")
+(include "vector.md")
+(include "vsx.md")
+(include "altivec.md")
+(include "spe.md")
+(include "dfp.md")
+(include "paired.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
new file mode 100644
index 000000000..02a60f8f3
--- /dev/null
+++ b/gcc/config/rs6000/rs6000.opt
@@ -0,0 +1,464 @@
+; Options for the rs6000 port of the compiler
+;
+; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+; Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+HeaderInclude
+config/rs6000/rs6000-opts.h
+
+;; Current processor
+TargetVariable
+enum processor_type rs6000_cpu = PROCESSOR_RIOS1
+
+;; Always emit branch hint bits.
+TargetVariable
+unsigned char rs6000_always_hint
+
+;; Schedule instructions for group formation.
+TargetVariable
+unsigned char rs6000_sched_groups
+
+;; Align branch targets.
+TargetVariable
+unsigned char rs6000_align_branch_targets
+
+;; Support for -msched-costly-dep option.
+TargetVariable
+enum rs6000_dependence_cost rs6000_sched_costly_dep = no_dep_costly
+
+;; Support for -minsert-sched-nops option.
+TargetVariable
+enum rs6000_nop_insertion rs6000_sched_insert_nops = sched_finish_none
+
+;; Size of long double.
+TargetVariable
+unsigned char rs6000_long_double_type_size
+
+;; IEEE quad extended precision long double.
+TargetVariable
+unsigned char rs6000_ieeequad
+
+;; Nonzero to use AltiVec ABI.
+TargetVariable
+unsigned char rs6000_altivec_abi
+
+;; Nonzero if we want SPE SIMD instructions.
+TargetVariable
+int rs6000_spe
+
+;; Nonzero if we want SPE ABI extensions.
+TargetVariable
+unsigned char rs6000_spe_abi
+
+;; Nonzero if floating point operations are done in the GPRs.
+TargetVariable
+unsigned char rs6000_float_gprs
+
+;; Nonzero if we want Darwin's struct-by-value-in-regs ABI.
+TargetVariable
+unsigned char rs6000_darwin64_abi
+
+;; Non-zero to allow overriding loop alignment.
+TargetVariable
+unsigned char can_override_loop_align
+
+;; Which small data model to use (for System V targets only)
+TargetVariable
+enum rs6000_sdata_type rs6000_sdata = SDATA_DATA
+
+;; Bit size of immediate TLS offsets and string from which it is decoded.
+TargetVariable
+int rs6000_tls_size = 32
+
+;; ABI enumeration available for subtarget to use.
+TargetVariable
+enum rs6000_abi rs6000_current_abi = ABI_NONE
+
+;; Type of traceback to use.
+TargetVariable
+enum rs6000_traceback_type rs6000_traceback = traceback_default
+
+;; Control alignment for fields within structures.
+TargetVariable
+unsigned char rs6000_alignment_flags
+
+;; Code model for 64-bit linux.
+TargetVariable
+enum rs6000_cmodel rs6000_current_cmodel = CMODEL_SMALL
+
+;; What type of reciprocal estimation instructions to generate
+TargetVariable
+unsigned int rs6000_recip_control
+
+;; -mcpu=<xxx> as an index into the processor_target_table or -1
+TargetVariable
+int rs6000_cpu_index = -1
+
+;; -mtune=<xxx> as an index into the processor_target_table or -1
+TargetVariable
+int rs6000_tune_index = -1
+
+;; Debug flags
+TargetVariable
+unsigned int rs6000_debug
+
+;; Save for target_flags_explicit
+TargetSave
+int rs6000_target_flags_explicit
+
+mpower
+Target Report RejectNegative Mask(POWER)
+Use POWER instruction set
+
+mno-power
+Target Report RejectNegative
+Do not use POWER instruction set
+
+mpower2
+Target Report Mask(POWER2)
+Use POWER2 instruction set
+
+mpowerpc
+Target Report RejectNegative Mask(POWERPC)
+Use PowerPC instruction set
+
+mno-powerpc
+Target Report RejectNegative
+Do not use PowerPC instruction set
+
+mpowerpc64
+Target Report Mask(POWERPC64)
+Use PowerPC-64 instruction set
+
+mpowerpc-gpopt
+Target Report Mask(PPC_GPOPT) Save
+Use PowerPC General Purpose group optional instructions
+
+mpowerpc-gfxopt
+Target Report Mask(PPC_GFXOPT) Save
+Use PowerPC Graphics group optional instructions
+
+mmfcrf
+Target Report Mask(MFCRF) Save
+Use PowerPC V2.01 single field mfcr instruction
+
+mpopcntb
+Target Report Mask(POPCNTB) Save
+Use PowerPC V2.02 popcntb instruction
+
+mfprnd
+Target Report Mask(FPRND) Save
+Use PowerPC V2.02 floating point rounding instructions
+
+mcmpb
+Target Report Mask(CMPB) Save
+Use PowerPC V2.05 compare bytes instruction
+
+mmfpgpr
+Target Report Mask(MFPGPR) Save
+Use extended PowerPC V2.05 move floating point to/from GPR instructions
+
+maltivec
+Target Report Mask(ALTIVEC) Save
+Use AltiVec instructions
+
+mhard-dfp
+Target Report Mask(DFP) Save
+Use decimal floating point instructions
+
+mmulhw
+Target Report Mask(MULHW) Save
+Use 4xx half-word multiply instructions
+
+mdlmzb
+Target Report Mask(DLMZB) Save
+Use 4xx string-search dlmzb instruction
+
+mmultiple
+Target Report Mask(MULTIPLE) Save
+Generate load/store multiple instructions
+
+mstring
+Target Report Mask(STRING) Save
+Generate string instructions for block moves
+
+mnew-mnemonics
+Target Report RejectNegative Mask(NEW_MNEMONICS)
+Use new mnemonics for PowerPC architecture
+
+mold-mnemonics
+Target Report RejectNegative InverseMask(NEW_MNEMONICS)
+Use old mnemonics for PowerPC architecture
+
+msoft-float
+Target Report RejectNegative Mask(SOFT_FLOAT)
+Do not use hardware floating point
+
+mhard-float
+Target Report RejectNegative InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Use hardware floating point
+
+mpopcntd
+Target Report Mask(POPCNTD) Save
+Use PowerPC V2.06 popcntd instruction
+
+mfriz
+Target Report Var(TARGET_FRIZ) Init(-1) Save
+Under -ffast-math, generate a FRIZ instruction for (double)(long long) conversions
+
+mveclibabi=
+Target RejectNegative Joined Var(rs6000_veclibabi_name)
+Vector library ABI to use
+
+mvsx
+Target Report Mask(VSX) Save
+Use vector/scalar (VSX) instructions
+
+mvsx-scalar-double
+Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(-1)
+; If -mvsx, use VSX arithmetic instructions for scalar double (on by default)
+
+mvsx-scalar-memory
+Target Undocumented Report Var(TARGET_VSX_SCALAR_MEMORY)
+; If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
+
+mvsx-align-128
+Target Undocumented Report Var(TARGET_VSX_ALIGN_128)
+; If -mvsx, set alignment to 128 bits instead of 32/64
+
+mallow-movmisalign
+Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1)
+; Allow/disallow the movmisalign in DF/DI vectors
+
+mallow-df-permute
+Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE)
+; Allow/disallow permutation of DF/DI vectors
+
+msched-groups
+Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1)
+; Explicitly set/unset whether rs6000_sched_groups is set
+
+malways-hint
+Target Undocumented Report Var(TARGET_ALWAYS_HINT) Init(-1)
+; Explicitly set/unset whether rs6000_always_hint is set
+
+malign-branch-targets
+Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1)
+; Explicitly set/unset whether rs6000_align_branch_targets is set
+
+mvectorize-builtins
+Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1)
+; Explicitly control whether we vectorize the builtins or not.
+
+mno-update
+Target Report RejectNegative Mask(NO_UPDATE) Save
+Do not generate load/store with update instructions
+
+mupdate
+Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE)
+Generate load/store with update instructions
+
+msingle-pic-base
+Target Report Var(TARGET_SINGLE_PIC_BASE) Init(0)
+Do not load the PIC register in function prologues
+
+mavoid-indexed-addresses
+Target Report Var(TARGET_AVOID_XFORM) Init(-1) Save
+Avoid generation of indexed load/store instructions when possible
+
+mtls-markers
+Target Report Var(tls_markers) Init(1) Save
+Mark __tls_get_addr calls with argument info
+
+msched-epilog
+Target Undocumented Var(TARGET_SCHED_PROLOG) Init(1) Save
+
+msched-prolog
+Target Report Var(TARGET_SCHED_PROLOG) Save
+Schedule the start and end of the procedure
+
+maix-struct-return
+Target Report RejectNegative Var(aix_struct_return) Save
+Return all structures in memory (AIX default)
+
+msvr4-struct-return
+Target Report RejectNegative Var(aix_struct_return,0) Save
+Return small structures in registers (SVR4 default)
+
+mxl-compat
+Target Report Var(TARGET_XL_COMPAT) Save
+Conform more closely to IBM XLC semantics
+
+mrecip
+Target Report
+Generate software reciprocal divide and square root for better throughput.
+
+mrecip=
+Target Report RejectNegative Joined
+Generate software reciprocal divide and square root for better throughput.
+
+mrecip-precision
+Target Report Mask(RECIP_PRECISION) Save
+Assume that the reciprocal estimate instructions provide more accuracy.
+
+mno-fp-in-toc
+Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC) Save
+Do not place floating point constants in TOC
+
+mfp-in-toc
+Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC,0) Save
+Place floating point constants in TOC
+
+mno-sum-in-toc
+Target RejectNegative Var(TARGET_NO_SUM_IN_TOC) Save
+Do not place symbol+offset constants in TOC
+
+msum-in-toc
+Target RejectNegative Var(TARGET_NO_SUM_IN_TOC,0) Save
+Place symbol+offset constants in TOC
+
+;  Output only one TOC entry per module.  Normally linking fails if
+;   there are more than 16K unique variables/constants in an executable.  With
+;   this option, linking fails only if there are more than 16K modules, or
+;   if there are more than 16K unique variables/constant in a single module.
+;
+;   This is at the cost of having 2 extra loads and one extra store per
+;   function, and one less allocable register.
+mminimal-toc
+Target Report Mask(MINIMAL_TOC)
+Use only one TOC entry per procedure
+
+mfull-toc
+Target Report
+Put everything in the regular TOC
+
+mvrsave
+Target Report Var(TARGET_ALTIVEC_VRSAVE) Save
+Generate VRSAVE instructions when generating AltiVec code
+
+mvrsave=
+Target RejectNegative Joined
+-mvrsave=yes/no	Deprecated option.  Use -mvrsave/-mno-vrsave instead
+
+mblock-move-inline-limit=
+Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative Joined UInteger Save
+Specify how many bytes should be moved inline before calling out to memcpy/memmove
+
+misel
+Target Report Mask(ISEL) Save
+Generate isel instructions
+
+misel=
+Target RejectNegative Joined
+-misel=yes/no	Deprecated option.  Use -misel/-mno-isel instead
+
+mspe
+Target
+Generate SPE SIMD instructions on E500
+
+mpaired
+Target Var(rs6000_paired_float) Save
+Generate PPC750CL paired-single instructions
+
+mspe=
+Target RejectNegative Joined
+-mspe=yes/no	Deprecated option.  Use -mspe/-mno-spe instead
+
+mdebug=
+Target RejectNegative Joined
+-mdebug=	Enable debug output
+
+mabi=
+Target RejectNegative Joined
+-mabi=	Specify ABI to use
+
+mcpu=
+Target RejectNegative Joined
+-mcpu=	Use features of and schedule code for given CPU
+
+mtune=
+Target RejectNegative Joined
+-mtune=	Schedule code for given CPU
+
+mtraceback=
+Target RejectNegative Joined
+-mtraceback=	Select full, part, or no traceback table
+
+mlongcall
+Target Report Var(rs6000_default_long_calls) Save
+Avoid all range limits on call instructions
+
+mgen-cell-microcode
+Target Report Var(rs6000_gen_cell_microcode) Init(-1) Save
+Generate Cell microcode
+
+mwarn-cell-microcode
+Target Var(rs6000_warn_cell_microcode) Init(0) Warning Save
+Warn when a Cell microcoded instruction is emitted
+
+mwarn-altivec-long
+Target Var(rs6000_warn_altivec_long) Init(1) Save
+Warn about deprecated 'vector long ...' AltiVec type usage
+
+mfloat-gprs=
+Target RejectNegative Joined
+-mfloat-gprs=	Select GPR floating point method
+
+mlong-double-
+Target RejectNegative Joined UInteger
+-mlong-double-<n>	Specify size of long double (64 or 128 bits)
+
+msched-costly-dep=
+Target RejectNegative Joined 
+Determine which dependences between insns are considered costly
+
+minsert-sched-nops=
+Target RejectNegative Joined
+Specify which post scheduling nop insertion scheme to apply
+
+malign-
+Target RejectNegative Joined
+Specify alignment of structure fields default/natural
+
+mprioritize-restricted-insns=
+Target RejectNegative Joined UInteger Var(rs6000_sched_restricted_insns_priority) Save
+Specify scheduling priority for dispatch slot restricted insns
+
+msingle-float
+Target RejectNegative Var(rs6000_single_float) Save
+Single-precision floating point unit
+
+mdouble-float
+Target RejectNegative Var(rs6000_double_float) Save
+Double-precision floating point unit
+
+msimple-fpu
+Target RejectNegative Var(rs6000_simple_fpu) Save
+Floating point unit does not support divide & sqrt
+
+mfpu=
+Target RejectNegative Joined 
+-mfpu=	Specify FP (sp, dp, sp-lite, dp-lite) (implies -mxilinx-fpu)
+
+mxilinx-fpu
+Target Var(rs6000_xilinx_fpu) Save
+Specify Xilinx FPU.
+
+
diff --git a/gcc/config/rs6000/rs64.md b/gcc/config/rs6000/rs64.md
new file mode 100644
index 000000000..e221b52a3
--- /dev/null
+++ b/gcc/config/rs6000/rs64.md
@@ -0,0 +1,154 @@
+;; Scheduling description for IBM RS64 processors.
+;;   Copyright (C) 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "rs64,rs64fp")
+(define_cpu_unit "iu_rs64" "rs64")
+(define_cpu_unit "mciu_rs64" "rs64")
+(define_cpu_unit "fpu_rs64" "rs64fp")
+(define_cpu_unit "lsu_rs64,bpu_rs64" "rs64")
+
+;; RS64a 64-bit IU, LSU, FPU, BPU
+
+(define_insn_reservation "rs64a-load" 2
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-store" 2
+  (and (eq_attr "type" "store,store_ux,store_u,fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-fpload" 3
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-llsc" 2
+  (and (eq_attr "type" "load_l,store_c")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-integer" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,cntlz,exts,isel")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64")
+
+(define_insn_reservation "rs64a-two" 1
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64,iu_rs64")
+
+(define_insn_reservation "rs64a-three" 1
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64,iu_rs64,iu_rs64")
+
+(define_insn_reservation "rs64a-imul" 20
+  (and (eq_attr "type" "imul,imul_compare")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*13")
+
+(define_insn_reservation "rs64a-imul2" 12
+  (and (eq_attr "type" "imul2")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*5")
+
+(define_insn_reservation "rs64a-imul3" 8
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*2")
+
+(define_insn_reservation "rs64a-lmul" 34
+  (and (eq_attr "type" "lmul,lmul_compare")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*34")
+
+(define_insn_reservation "rs64a-idiv" 66
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*66")
+
+(define_insn_reservation "rs64a-ldiv" 66
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64*66")
+
+(define_insn_reservation "rs64a-compare" 3
+  (and (eq_attr "type" "cmp,fast_compare,compare,\
+                delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "rs64a"))
+  "iu_rs64,nothing,bpu_rs64")
+
+(define_insn_reservation "rs64a-fpcompare" 5
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64,bpu_rs64")
+
+(define_insn_reservation "rs64a-fp" 4
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64")
+
+(define_insn_reservation "rs64a-sdiv" 31
+  (and (eq_attr "type" "sdiv,ddiv")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64*31")
+
+(define_insn_reservation "rs64a-sqrt" 49
+  (and (eq_attr "type" "ssqrt,dsqrt")
+       (eq_attr "cpu" "rs64a"))
+  "mciu_rs64,fpu_rs64*49")
+
+(define_insn_reservation "rs64a-mfcr" 2
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
+(define_insn_reservation "rs64a-jmpreg" 1
+  (and (eq_attr "type" "jmpreg,branch,cr_logical,delayed_cr")
+       (eq_attr "cpu" "rs64a"))
+  "bpu_rs64")
+
+(define_insn_reservation "rs64a-isync" 6
+  (and (eq_attr "type" "isync")
+       (eq_attr "cpu" "rs64a"))
+  "bpu_rs64")
+
+(define_insn_reservation "rs64a-sync" 1
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "rs64a"))
+  "lsu_rs64")
+
diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h
new file mode 100644
index 000000000..a8bd0e75f
--- /dev/null
+++ b/gcc/config/rs6000/rtems.h
@@ -0,0 +1,56 @@
+/* Definitions for rtems targeting a PowerPC using elf.
+   Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2007
+   Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()          \
+  do                                      \
+    {                                     \
+      builtin_define_std ("PPC");         \
+      builtin_define ("__rtems__");       \
+      builtin_define ("__USE_INIT_FINI__"); \
+      builtin_assert ("system=rtems");    \
+      builtin_assert ("cpu=powerpc");     \
+      builtin_assert ("machine=powerpc"); \
+      TARGET_OS_SYSV_CPP_BUILTINS ();     \
+    }                                     \
+  while (0)
+
+#undef CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)"
+
+#define CPP_OS_RTEMS_SPEC "\
+%{!mcpu*:  %{!Dppc*: %{!Dmpc*: -Dmpc750} } }\
+%{mcpu=403:  %{!Dppc*: %{!Dmpc*: -Dppc403}  } } \
+%{mcpu=505:  %{!Dppc*: %{!Dmpc*: -Dmpc505}  } } \
+%{mcpu=601:  %{!Dppc*: %{!Dmpc*: -Dppc601}  } } \
+%{mcpu=602:  %{!Dppc*: %{!Dmpc*: -Dppc602}  } } \
+%{mcpu=603:  %{!Dppc*: %{!Dmpc*: -Dppc603}  } } \
+%{mcpu=603e: %{!Dppc*: %{!Dmpc*: -Dppc603e} } } \
+%{mcpu=604:  %{!Dppc*: %{!Dmpc*: -Dmpc604}  } } \
+%{mcpu=750:  %{!Dppc*: %{!Dmpc*: -Dmpc750}  } } \
+%{mcpu=821:  %{!Dppc*: %{!Dmpc*: -Dmpc821}  } } \
+%{mcpu=860:  %{!Dppc*: %{!Dmpc*: -Dmpc860}  } }" 
+
+#undef  SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS \
+  { "cpp_os_rtems",		CPP_OS_RTEMS_SPEC }
diff --git a/gcc/config/rs6000/secureplt.h b/gcc/config/rs6000/secureplt.h
new file mode 100644
index 000000000..f41078df3
--- /dev/null
+++ b/gcc/config/rs6000/secureplt.h
@@ -0,0 +1,20 @@
+/* Default to -msecure-plt.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define CC1_SECURE_PLT_DEFAULT_SPEC "-msecure-plt"
diff --git a/gcc/config/rs6000/sfp-machine.h b/gcc/config/rs6000/sfp-machine.h
new file mode 100644
index 000000000..a0d1631bb
--- /dev/null
+++ b/gcc/config/rs6000/sfp-machine.h
@@ -0,0 +1,68 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
+# if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+#  error "Both BIG_ENDIAN and LITTLE_ENDIAN defined!"
+# endif
+# define __BYTE_ORDER __BIG_ENDIAN
+#else
+# if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+#  define __BYTE_ORDER __LITTLE_ENDIAN
+# else
+#  error "Cannot determine current byte order"
+# endif
+#endif
+
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
diff --git a/gcc/config/rs6000/si2vmx.h b/gcc/config/rs6000/si2vmx.h
new file mode 100644
index 000000000..e925a25a3
--- /dev/null
+++ b/gcc/config/rs6000/si2vmx.h
@@ -0,0 +1,2048 @@
+/* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SI2VMX_H_
+#define _SI2VMX_H_	1
+
+#ifndef __SPU__
+
+#include <stdlib.h>
+#include <vec_types.h>
+
+
+/* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics.
+ * Users can override the action by defining it prior to including this 
+ * header file.
+ */
+#ifndef SPU_HALT_ACTION
+#define SPU_HALT_ACTION		abort()
+#endif
+
+/* Specify a default stop action for the spu_stop intrinsic.
+ * Users can override the action by defining it prior to including this 
+ * header file.
+ */
+#ifndef SPU_STOP_ACTION
+#define SPU_STOP_ACTION		abort()
+#endif
+
+
+/* Specify a default action for unsupported intrinsic.
+ * Users can override the action by defining it prior to including this 
+ * header file.
+ */
+#ifndef SPU_UNSUPPORTED_ACTION
+#define SPU_UNSUPPORTED_ACTION	abort()
+#endif
+
+
+/* Casting intrinsics - from scalar to quadword 
+ */
+
+static __inline qword si_from_uchar(unsigned char c) {
+  union {
+    qword q;
+    unsigned char c[16];
+  } x;
+  x.c[3] = c;
+  return (x.q);
+}
+
+static __inline qword si_from_char(signed char c) {
+  union {
+    qword q;
+    signed char c[16];
+  } x;
+  x.c[3] = c;
+  return (x.q);
+}
+
+static __inline qword si_from_ushort(unsigned short s) {
+  union {
+    qword q;
+    unsigned short s[8];
+  } x;
+  x.s[1] = s;
+  return (x.q);
+}
+
+static __inline qword si_from_short(short s) {
+  union {
+    qword q;
+    short s[8];
+  } x;
+  x.s[1] = s;
+  return (x.q);
+}
+
+
+static __inline qword si_from_uint(unsigned int i) {
+  union {
+    qword q;
+    unsigned int i[4];
+  } x;
+  x.i[0] = i;
+  return (x.q);
+}
+
+static __inline qword si_from_int(int i) {
+  union {
+    qword q;
+    int i[4];
+  } x;
+  x.i[0] = i;
+  return (x.q);
+}
+
+static __inline qword si_from_ullong(unsigned long long l) {
+  union {
+    qword q;
+    unsigned long long l[2];
+  } x;
+  x.l[0] = l;
+  return (x.q);
+}
+
+static __inline qword si_from_llong(long long l) {
+  union {
+    qword q;
+    long long l[2];
+  } x;
+  x.l[0] = l;
+  return (x.q);
+}
+
+static __inline qword si_from_float(float f) {
+  union {
+    qword q;
+    float f[4];
+  } x;
+  x.f[0] = f;
+  return (x.q);
+}
+
+static __inline qword si_from_double(double d) {
+  union {
+    qword q;
+    double d[2];
+  } x;
+  x.d[0] = d;
+  return (x.q);
+}
+
+static __inline qword si_from_ptr(void *ptr) {
+  union {
+    qword q;
+    void *p;
+  } x;
+  x.p = ptr;
+  return (x.q);
+}
+
+
+/* Casting intrinsics - from quadword to scalar
+ */
+static __inline unsigned char si_to_uchar(qword q) {
+  union {
+    qword q;
+    unsigned char c[16];
+  } x;
+  x.q = q;
+  return (x.c[3]);
+}
+
+static __inline signed char si_to_char(qword q) {
+  union {
+    qword q;
+    signed char c[16];
+  } x;
+  x.q = q;
+  return (x.c[3]);
+}
+
+static __inline unsigned short si_to_ushort(qword q) {
+  union {
+    qword q;
+    unsigned short s[8];
+  } x;
+  x.q = q;
+  return (x.s[1]);
+}
+
+static __inline short si_to_short(qword q) {
+  union {
+    qword q;
+    short s[8];
+  } x;
+  x.q = q;
+  return (x.s[1]);
+}
+
+static __inline unsigned int si_to_uint(qword q) {
+  union {
+    qword q;
+    unsigned int i[4];
+  } x;
+  x.q = q;
+  return (x.i[0]);
+}
+
+static __inline int si_to_int(qword q) {
+  union {
+    qword q;
+    int i[4];
+  } x;
+  x.q = q;
+  return (x.i[0]);
+}
+
+static __inline unsigned long long si_to_ullong(qword q) {
+  union {
+    qword q;
+    unsigned long long l[2];
+  } x;
+  x.q = q;
+  return (x.l[0]);
+}
+
+static __inline long long si_to_llong(qword q) {
+  union {
+    qword q;
+    long long l[2];
+  } x;
+  x.q = q;
+  return (x.l[0]);
+}
+
+static __inline float si_to_float(qword q) {
+  union {
+    qword q;
+    float f[4];
+  } x;
+  x.q = q;
+  return (x.f[0]);
+}
+
+static __inline double si_to_double(qword q) {
+  union {
+    qword q;
+    double d[2];
+  } x;
+  x.q = q;
+  return (x.d[0]);
+}
+
+static __inline void * si_to_ptr(qword q) {
+  union {
+    qword q;
+    void *p;
+  } x;
+  x.q = q;
+  return (x.p);
+}
+
+
+/* Absolute difference
+ */
+static __inline qword si_absdb(qword a, qword b)
+{
+  vec_uchar16 ac, bc, dc;
+
+  ac = (vec_uchar16)(a);
+  bc = (vec_uchar16)(b);
+  dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc));
+
+  return ((qword)(dc));
+}
+
+/* Add intrinsics 
+ */
+#define si_a(_a, _b)		((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b))))
+
+#define si_ah(_a, _b)		((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b))))
+
+static __inline qword si_ai(qword a, int b)
+{
+  return ((qword)(vec_add((vec_int4)(a), 
+			  vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+
+static __inline qword si_ahi(qword a, short b)
+{
+  return ((qword)(vec_add((vec_short8)(a), 
+			  vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+
+#define si_fa(_a, _b)	((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b))))
+
+
+static __inline qword si_dfa(qword a, qword b)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } ad, bd, dd;
+
+  ad.v = (vec_double2)(a);
+  bd.v = (vec_double2)(b);
+  dd.d[0] = ad.d[0] + bd.d[0];
+  dd.d[1] = ad.d[1] + bd.d[1];
+
+  return ((qword)(dd.v));
+}
+
+/* Add word extended
+ */
+#define si_addx(_a, _b, _c)	((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), 	\
+						 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
+
+
+/* Bit-wise AND
+ */
+#define si_and(_a, _b)		((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b))))
+
+
+static __inline qword si_andbi(qword a, signed char b)
+{
+  return ((qword)(vec_and((vec_char16)(a), 
+			  vec_splat((vec_char16)(si_from_char(b)), 3))));
+}
+
+static __inline qword si_andhi(qword a, signed short b)
+{
+  return ((qword)(vec_and((vec_short8)(a), 
+			  vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+
+static __inline qword si_andi(qword a, signed int b)
+{
+  return ((qword)(vec_and((vec_int4)(a),
+			  vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+
+/* Bit-wise AND with complement
+ */
+#define si_andc(_a, _b)		((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b))))
+
+
+/* Average byte vectors
+ */
+#define si_avgb(_a, _b)		((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b))))
+
+
+/* Branch indirect and set link on external data
+ */
+#define si_bisled(_func)	/* not mappable */
+#define si_bisledd(_func)	/* not mappable */
+#define si_bislede(_func)	/* not mappable */
+
+
+/* Borrow generate
+ */
+#define si_bg(_a, _b)		((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a))))
+
+#define si_bgx(_a, _b, _c)	((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)),		\
+							vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), 	\
+								(vec_uint4)(_c))), vec_splat_u32(1))))
+
+/* Compare absolute equal
+ */
+static __inline qword si_fcmeq(qword a, qword b)
+{
+  vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
+  
+  return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb), 
+				  vec_andc((vec_float4)(b), msb))));
+}
+
+static __inline qword si_dfcmeq(qword a, qword b)
+{
+  vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
+  vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3,  16,17,18,19,  8,9,10,11, 24,25,26,27};
+
+  vec_uint4 biteq;
+  vec_uint4 aabs;
+  vec_uint4 babs;
+  vec_uint4 a_gt;
+  vec_uint4 ahi_inf;
+  vec_uint4 anan;
+  vec_uint4 result;
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  /*  Mask out sign bits */
+  aabs = vec_and((vec_uint4)a,sign_mask);
+  babs = vec_and((vec_uint4)b,sign_mask);
+
+  /*  A)  Check for bit equality, store in high word */
+  biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs);
+  biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
+
+  /*  
+      B)  Check if a is NaN, store in high word
+        
+      B1) If the high word is greater than max_exp (indicates a NaN)
+      B2) If the low word is greater than 0 
+  */
+  a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
+
+  /*  B3) Check if the high word is equal to the inf exponent */
+  ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
+
+  /*  anan = B1[hi] or (B2[lo] and B3[hi]) */
+  anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
+
+  /*  result = A and not B  */
+  result = vec_andc(biteq, anan);
+
+  /*  Promote high words to 64 bits and return  */
+  return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
+}
+
+
+/* Compare absolute greater than
+ */
+static __inline qword si_fcmgt(qword a, qword b)
+{
+  vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
+  
+  return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb),
+				  vec_andc((vec_float4)(b), msb))));
+}
+
+static __inline qword si_dfcmgt(qword a, qword b)
+{
+  vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
+  vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  // absolute value of a,b 
+  vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
+  vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
+
+  // check if a is nan
+  vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
+  vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
+  a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
+  a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
+
+  // check if b is nan
+  vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
+  vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
+  b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
+  b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
+
+  // A) Check if the exponents are different 
+  vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs);
+
+  // B) Check if high word equal, and low word greater
+  vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs);
+  vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs);
+  vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
+
+  //  If either A or B is true, return true (unless NaNs detected) 
+  vec_uint4 r = vec_or(gt_hi, eqgt);
+
+  // splat the high words of the comparison step
+  r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
+
+  // correct for NaNs in input
+  return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
+}
+
+
+/* Compare equal
+ */
+static __inline qword si_ceqb(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_ceqh(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_ceq(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b))));
+}
+
+static __inline qword si_fceq(qword a, qword b)
+{
+  return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b))));
+}
+
+static __inline qword si_ceqbi(qword a, signed char b)
+{
+  return ((qword)(vec_cmpeq((vec_char16)(a), 
+			    vec_splat((vec_char16)(si_from_char(b)), 3))));
+}
+
+static __inline qword si_ceqhi(qword a, signed short b)
+{
+  return ((qword)(vec_cmpeq((vec_short8)(a), 
+			  vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+static __inline qword si_ceqi(qword a, signed int b)
+{
+  return ((qword)(vec_cmpeq((vec_int4)(a), 
+			  vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+static __inline qword si_dfceq(qword a, qword b)
+{
+  vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
+  vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3,  16,17,18,19,  8,9,10,11, 24,25,26,27};
+
+  vec_uint4 biteq;
+  vec_uint4 aabs;
+  vec_uint4 babs;
+  vec_uint4 a_gt;
+  vec_uint4 ahi_inf;
+  vec_uint4 anan;
+  vec_uint4 iszero;
+  vec_uint4 result;
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  /*  A)  Check for bit equality, store in high word */
+  biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b);
+  biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
+
+  /*  Mask out sign bits */
+  aabs = vec_and((vec_uint4)a,sign_mask);
+  babs = vec_and((vec_uint4)b,sign_mask);
+
+  /*  
+      B)  Check if a is NaN, store in high word
+        
+      B1) If the high word is greater than max_exp (indicates a NaN)
+      B2) If the low word is greater than 0 
+  */
+  a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
+
+  /*  B3) Check if the high word is equal to the inf exponent */
+  ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
+
+  /*  anan = B1[hi] or (B2[lo] and B3[hi]) */
+  anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
+
+  /*  C)  Check for 0 = -0 special case */
+  iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0));
+  iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
+
+  /*  result = (A or C) and not B  */
+  result = vec_or(biteq,iszero);
+  result = vec_andc(result, anan);
+
+  /*  Promote high words to 64 bits and return  */
+  return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); 
+}
+
+
+/* Compare greater than
+ */
+static __inline qword si_cgtb(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b))));
+}
+
+static __inline qword si_cgth(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b))));
+}
+
+static __inline qword si_cgt(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b))));
+}
+
+static __inline qword si_clgtb(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_clgth(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_clgt(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b))));
+}
+
+static __inline qword si_fcgt(qword a, qword b)
+{
+  return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b))));
+}
+
+static __inline qword si_dfcgt(qword a, qword b)
+{
+  vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+  vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 };
+  vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
+  vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+
+  // absolute value of a,b 
+  vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
+  vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
+
+  // check if a is nan
+  vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
+  vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
+  a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
+  a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
+
+  // check if b is nan
+  vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
+  vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
+  b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
+  b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
+
+  // sign of a
+  vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
+  asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi);
+
+  // sign of b
+  vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
+  bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi);
+
+  // negative a
+  vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs);
+  vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7)));
+  abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
+  vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1)));
+
+  // pick the one we want
+  vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel);
+
+  // negative b
+  vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs);
+  bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
+  vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1)));
+
+  // pick the one we want
+  vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel);
+
+  // A) Check if the exponents are different 
+  vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval);
+
+  // B) Check if high word equal, and low word greater
+  vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval);
+  vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval);
+  vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
+
+  //  If either A or B is true, return true (unless NaNs detected) 
+  vec_uint4 r = vec_or(gt_hi, eqgt);
+
+  // splat the high words of the comparison step
+  r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
+
+  // correct for NaNs in input
+  return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
+}
+
+static __inline qword si_cgtbi(qword a, signed char b)
+{
+  return ((qword)(vec_cmpgt((vec_char16)(a), 
+			    vec_splat((vec_char16)(si_from_char(b)), 3))));
+}
+
+static __inline qword si_cgthi(qword a, signed short b)
+{
+  return ((qword)(vec_cmpgt((vec_short8)(a), 
+			    vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+static __inline qword si_cgti(qword a, signed int b)
+{
+  return ((qword)(vec_cmpgt((vec_int4)(a), 
+			    vec_splat((vec_int4)(si_from_int(b)), 0))));
+}
+
+static __inline qword si_clgtbi(qword a, unsigned char b)
+{
+  return ((qword)(vec_cmpgt((vec_uchar16)(a), 
+			    vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
+}
+
+static __inline qword si_clgthi(qword a, unsigned short b)
+{
+  return ((qword)(vec_cmpgt((vec_ushort8)(a),
+			    vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+static __inline qword si_clgti(qword a, unsigned int b)
+{
+  return ((qword)(vec_cmpgt((vec_uint4)(a), 
+			    vec_splat((vec_uint4)(si_from_uint(b)), 0))));
+}
+
+static __inline qword si_dftsv(qword a, char b)
+{
+  vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+  vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
+  vec_uint4 result = (vec_uint4){0};
+  vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
+  sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi);
+  vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask);
+  
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+
+  /* Shift 4 bytes  */
+  x.i[3] = 4 << 3;
+  
+  /* Nan or +inf or -inf  */
+  if (b & 0x70)
+  {
+    vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
+    vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
+     /* NaN  */
+     if (b & 0x40)
+     {
+       vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
+       a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
+       a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); 
+       result = vec_or(result, a_nan);
+     }
+     /* inf  */ 
+     if (b & 0x30)
+     {
+       a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf);
+       a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi); 
+        /* +inf  */
+        if (b & 0x20)
+          result = vec_or(vec_andc(a_inf, sign), result);
+        /* -inf  */
+        if (b & 0x10)
+          result = vec_or(vec_and(a_inf, sign), result);
+     } 
+  }
+  /* 0 or denorm  */
+  if (b & 0xF)
+  {
+    vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0));
+    iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
+    /* denorm  */
+    if (b & 0x3)
+    {
+      vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF};
+      vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero);
+      isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi);
+      /* +denorm  */
+     if (b & 0x2)
+        result = vec_or(vec_andc(isdenorm, sign), result);
+      /* -denorm  */
+     if (b & 0x1)
+        result = vec_or(vec_and(isdenorm, sign), result);
+    }
+    /* 0  */
+    if (b & 0xC)
+    {
+      iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi);
+      /* +0  */
+     if (b & 0x8)
+        result = vec_or(vec_andc(iszero, sign), result);
+      /* -0  */
+     if (b & 0x4)
+        result = vec_or(vec_and(iszero, sign), result);
+    }
+  }
+  return ((qword)result);
+}
+
+
+/* Carry generate
+ */
+#define si_cg(_a, _b)		((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b))))
+
+#define si_cgx(_a, _b, _c)	((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), 		\
+						vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)),	\
+							 vec_and((vec_uint4)(_c), vec_splat_u32(1))))))
+
+
+/* Count ones for bytes
+ */
+static __inline qword si_cntb(qword a)
+{
+  vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
+  vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
+  vec_uchar16 av;
+
+  av = (vec_uchar16)(a);
+
+  return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av),
+			  vec_perm(nib_cnt, nib_cnt, vec_sr (av, four)))));
+}
+
+/* Count ones for bytes
+ */
+static __inline qword si_clz(qword a)
+{
+  vec_uchar16 av;
+  vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3;
+  vec_uchar16 four    = vec_splat_u8(4);
+  vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
+  vec_uchar16 eight   = vec_splat_u8(8);
+  vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
+  vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24};
+
+  av = (vec_uchar16)(a);
+
+  cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four));
+  cnt_lo = vec_perm(nib_cnt, nib_cnt, av);
+
+  cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four)));
+
+  tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight));
+  tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen));
+  tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour));
+
+  cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight)));
+  cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen)));
+  cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour)));
+  
+  return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour))));
+}
+
+/* Convert to float
+ */
+#define si_cuflt(_a, _b)	((qword)(vec_ctf((vec_uint4)(_a), _b)))
+#define si_csflt(_a, _b)	((qword)(vec_ctf((vec_int4)(_a), _b)))
+
+/* Convert to signed int
+ */
+#define si_cflts(_a, _b)	((qword)(vec_cts((vec_float4)(_a), _b)))
+
+/* Convert to unsigned int
+ */
+#define si_cfltu(_a, _b)	((qword)(vec_ctu((vec_float4)(_a), _b)))
+
+/* Synchronize
+ */
+#define si_dsync()		/* do nothing */
+#define si_sync()		/* do nothing */
+#define si_syncc()		/* do nothing */
+
+
+/* Equivalence
+ */
+static __inline qword si_eqv(qword a, qword b)
+{
+  vec_uchar16 d;
+
+  d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b));
+  return ((qword)(vec_nor(d, d)));
+}
+
+/* Extend
+ */
+static __inline qword si_xsbh(qword a)
+{
+  vec_char16 av;
+
+  av = (vec_char16)(a);
+  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15, 
+						              0, 0, 0, 0, 0, 0, 0, 0})))));
+}
+
+static __inline qword si_xshw(qword a)
+{
+  vec_short8 av;
+
+  av = (vec_short8)(a);
+  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7, 
+					                      10,11,14,15,
+							      0, 0, 0, 0, 
+						              0, 0, 0, 0})))));
+}
+
+static __inline qword si_xswd(qword a)
+{
+  vec_int4 av;
+
+  av = (vec_int4)(a);
+  return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})), 
+			   ((vec_uchar16){20, 21, 22, 23,  
+					   4,  5,  6,  7, 
+				          28, 29, 30, 31, 
+				          12, 13, 14, 15}))));
+}
+
+static __inline qword si_fesd(qword a)
+{
+  union {
+    double d[2];
+    vec_double2	vd;
+  } out;
+  union {
+    float f[4];
+    vec_float4 vf;
+  } in;
+
+  in.vf = (vec_float4)(a);
+  out.d[0] = (double)(in.f[0]);
+  out.d[1] = (double)(in.f[2]);
+  return ((qword)(out.vd));
+}
+
+/* Gather
+ */
+static __inline qword si_gbb(qword a)
+{
+  vec_uchar16 bits;
+  vec_uint4   bytes;
+
+  bits  = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0,
+								            7, 6, 5, 4, 3, 2, 1, 0}));
+  bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0}));
+
+  return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0,
+					                0, 0, 0, 0, 0, 0, 0, 0}))));
+}
+
+
+static __inline qword si_gbh(qword a)
+{
+  vec_ushort8 bits;
+  vec_uint4   bytes;
+
+  bits  = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0}));
+
+  bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0});
+
+  return ((qword)(vec_sld(bytes, bytes, 12)));
+}
+
+static __inline qword si_gb(qword a)
+{
+  vec_uint4 bits;
+  vec_uint4 bytes;
+
+  bits  = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0}));
+  bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0}));
+  return ((qword)(vec_sld(bytes, bytes, 12)));
+}
+
+
+/* Compare and halt 
+ */
+static __inline void si_heq(qword a, qword b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa, bb;
+
+  aa.v = (vector unsigned int)(a);
+  bb.v = (vector unsigned int)(b);
+
+  if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_heqi(qword a, unsigned int b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa;
+
+  aa.v = (vector unsigned int)(a);
+
+  if (aa.i[0] == b) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hgt(qword a, qword b)
+{
+  union {
+    vector signed int v;
+    signed int i[4];
+  } aa, bb;
+
+  aa.v = (vector signed int)(a);
+  bb.v = (vector signed int)(b);
+
+  if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hgti(qword a, signed int b)
+{
+  union {
+    vector signed int v;
+    signed int i[4];
+  } aa;
+
+  aa.v = (vector signed int)(a);
+
+  if (aa.i[0] > b) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hlgt(qword a, qword b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa, bb;
+
+  aa.v = (vector unsigned int)(a);
+  bb.v = (vector unsigned int)(b);
+
+  if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
+}
+
+static __inline void si_hlgti(qword a, unsigned int b)
+{
+  union {
+    vector unsigned int v;
+    unsigned int i[4];
+  } aa;
+
+  aa.v = (vector unsigned int)(a);
+
+  if (aa.i[0] > b) { SPU_HALT_ACTION; };
+}
+
+
+/* Multiply and Add
+ */
+static __inline qword si_mpya(qword a, qword b, qword c)
+{
+  return ((qword)(vec_msum(vec_and((vec_short8)(a), 
+				   ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})), 
+			   (vec_short8)(b), (vec_int4)(c))));
+}
+
+static __inline qword si_fma(qword a, qword b, qword c)
+{
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
+}
+
+static __inline qword si_dfma(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0];
+  dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Form Mask
+ */
+#define si_fsmbi(_a)	si_fsmb(si_from_int(_a))
+
+static __inline qword si_fsmb(qword a)
+{
+  vec_char16 mask;
+  vec_ushort8 in;
+
+  in = (vec_ushort8)(a);
+  mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2,
+					              3, 3, 3, 3, 3, 3, 3, 3})));
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7,
+				                      0, 1, 2, 3, 4, 5, 6, 7})),
+			  vec_splat_u8(7))));
+}
+
+
+static __inline qword si_fsmh(qword a)
+{
+  vec_uchar16 in;
+  vec_short8 mask;
+
+  in = (vec_uchar16)(a);
+  mask = (vec_short8)(vec_splat(in, 3));
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})), 
+			  vec_splat_u16(15))));
+}
+
+static __inline qword si_fsm(qword a)
+{
+  vec_uchar16 in;
+  vec_int4 mask;
+
+  in = (vec_uchar16)(a);
+  mask = (vec_int4)(vec_splat(in, 3));
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})),
+			  ((vec_uint4){31,31,31,31}))));
+}
+
+/* Move from/to registers
+ */
+#define si_fscrrd()		((qword)((vec_uint4){0}))
+#define si_fscrwr(_a)
+
+#define si_mfspr(_reg)		((qword)((vec_uint4){0}))
+#define si_mtspr(_reg, _a)
+
+/* Multiply High High Add
+ */
+static __inline qword si_mpyhha(qword a, qword b, qword c)
+{
+  return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c))));
+}
+
+static __inline qword si_mpyhhau(qword a, qword b, qword c)
+{
+  return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c))));
+}
+
+/* Multiply Subtract
+ */
+static __inline qword si_fms(qword a, qword b, qword c)
+{
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), 
+			   vec_sub(((vec_float4){0.0f}), (vec_float4)(c)))));
+}
+
+static __inline qword si_dfms(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0];
+  dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Multiply
+ */
+static __inline qword si_fm(qword a, qword b)
+{
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f}))));
+}
+
+static __inline qword si_dfm(qword a, qword b)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  dd.d[0] = aa.d[0] * bb.d[0];
+  dd.d[1] = aa.d[1] * bb.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Multiply High
+ */
+static __inline qword si_mpyh(qword a, qword b)
+{
+  vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16};
+
+  return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen)));
+}
+
+
+/* Multiply High High
+ */
+static __inline qword si_mpyhh(qword a, qword b)
+{
+  return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b))));
+}
+
+static __inline qword si_mpyhhu(qword a, qword b)
+{
+  return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+/* Multiply Odd
+ */
+static __inline qword si_mpy(qword a, qword b)
+{
+  return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b))));
+}
+
+static __inline qword si_mpyu(qword a, qword b)
+{
+  return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_mpyi(qword a, short b)
+{
+  return ((qword)(vec_mulo((vec_short8)(a), 
+			   vec_splat((vec_short8)(si_from_short(b)), 1))));
+}
+
+static __inline qword si_mpyui(qword a, unsigned short b)
+{
+  return ((qword)(vec_mulo((vec_ushort8)(a), 
+			   vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+/* Multiply and Shift Right
+ */
+static __inline qword si_mpys(qword a, qword b)
+{
+  return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16}))));
+}
+
+/* Nand
+ */
+static __inline qword si_nand(qword a, qword b)
+{
+  vec_uchar16 d;
+
+  d = vec_and((vec_uchar16)(a), (vec_uchar16)(b));
+  return ((qword)(vec_nor(d, d)));
+}
+
+/* Negative Multiply Add
+ */
+static __inline qword si_dfnma(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0];
+  dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Negative Multiply and Subtract
+ */
+static __inline qword si_fnms(qword a, qword b, qword c)
+{
+  return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
+}
+
+static __inline qword si_dfnms(qword a, qword b, qword c)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, cc, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  cc.v = (vec_double2)(c);
+  dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0];
+  dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1];
+  return ((qword)(dd.v));
+}
+
+/* Nor
+ */
+static __inline qword si_nor(qword a, qword b)
+{
+  return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+/* Or
+ */
+static __inline qword si_or(qword a, qword b)
+{
+  return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_orbi(qword a, unsigned char b)
+{
+  return ((qword)(vec_or((vec_uchar16)(a), 
+			 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
+}
+
+static __inline qword si_orhi(qword a, unsigned short b)
+{
+  return ((qword)(vec_or((vec_ushort8)(a), 
+			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+static __inline qword si_ori(qword a, unsigned int b)
+{
+  return ((qword)(vec_or((vec_uint4)(a), 
+			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
+}
+
+/* Or Complement
+ */
+static __inline qword si_orc(qword a, qword b)
+{
+  return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b)))));
+}
+
+
+/* Or Across
+ */
+static __inline qword si_orx(qword a)
+{
+  vec_uchar16 tmp;
+  tmp = (vec_uchar16)(a);
+  tmp = vec_or(tmp, vec_sld(tmp, tmp, 8));
+  tmp = vec_or(tmp, vec_sld(tmp, tmp, 4));
+  return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00,
+				              0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}))));
+}
+
+
+/* Estimates
+ */
+static __inline qword si_frest(qword a)
+{
+  return ((qword)(vec_re((vec_float4)(a))));
+}
+
+static __inline qword si_frsqest(qword a)
+{
+  return ((qword)(vec_rsqrte((vec_float4)(a))));
+}
+
+#define si_fi(_a, _d)		(_d)
+
+/* Channel Read and Write
+ */
+#define si_rdch(_channel)		((qword)(vec_splat_u8(0)))	/* not mappable */
+#define si_rchcnt(_channel)		((qword)(vec_splat_u8(0)))	/* not mappable */
+#define si_wrch(_channel, _a)		/* not mappable */
+
+/* Rotate Left
+ */
+static __inline qword si_roth(qword a, qword b)
+{
+  return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b))));
+}
+
+static __inline qword si_rot(qword a, qword b)
+{
+  return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b))));
+}
+
+static __inline qword si_rothi(qword a, int b)
+{
+  return ((qword)(vec_rl((vec_ushort8)(a), 
+			 vec_splat((vec_ushort8)(si_from_int(b)), 1))));
+}
+
+static __inline qword si_roti(qword a, int b)
+{
+  return ((qword)(vec_rl((vec_uint4)(a), 
+			 vec_splat((vec_uint4)(si_from_int(b)), 0))));
+}
+
+/* Rotate Left with Mask
+ */
+static __inline qword si_rothm(qword a, qword b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
+}
+
+static __inline qword si_rotm(qword a, qword b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
+}
+
+static __inline qword si_rothmi(qword a, int b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
+}
+
+static __inline qword si_rotmi(qword a, int b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
+}
+
+
+/* Rotate Left Algebraic with Mask
+ */
+static __inline qword si_rotmah(qword a, qword b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
+}
+
+static __inline qword si_rotma(qword a, qword b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
+}
+
+
+static __inline qword si_rotmahi(qword a, int b)
+{
+  vec_ushort8 neg_b;
+  vec_ushort8 mask;
+
+  neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
+  mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
+}
+
+static __inline qword si_rotmai(qword a, int b)
+{
+  vec_uint4 neg_b;
+  vec_uint4 mask;
+
+  neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
+  mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
+}
+
+
+/* Rotate Left Quadword by Bytes with Mask
+ */
+static __inline qword si_rotqmbyi(qword a, int count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  vec_uchar16 mask;
+
+  count = 0 - count;
+  x.i[3] = count << 3;
+  mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
+
+  return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
+}
+
+
+static __inline qword si_rotqmby(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  int cnt;
+  vec_uchar16 mask;
+
+  x.v = (vec_uchar16)(count);
+  x.i[0] = cnt = (0 - x.i[0]) << 3;
+
+  x.v = vec_splat(x.v, 3);
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+
+  return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
+}
+
+
+/* Rotate Left Quadword by Bytes
+ */
+static __inline qword si_rotqbyi(qword a, int count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } left, right;
+ 
+  count <<= 3;
+  left.i[3] = count;
+  right.i[3] = 0 - count;
+  return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v))));
+}
+
+static __inline qword si_rotqby(qword a, qword count)
+{
+  vec_uchar16 left, right;
+ 
+  left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
+  right = vec_sub(vec_splat_u8(0), left);
+  return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
+}
+
+/* Rotate Left Quadword by Bytes Bit Count
+ */
+static __inline qword si_rotqbybi(qword a, qword count)
+{
+  vec_uchar16 left, right;
+
+  left = vec_splat((vec_uchar16)(count), 3);
+  right = vec_sub(vec_splat_u8(7), left);
+  return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
+}
+
+
+/* Rotate Left Quadword by Bytes Bit Count
+ */
+static __inline qword si_rotqbii(qword a, int count)
+{
+  vec_uchar16 x, y;
+  vec_uchar16 result;
+ 
+  x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3);
+  y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
+			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
+  result = vec_or(vec_sll((qword)(a), x), y);
+  return ((qword)(result));
+}
+
+static __inline qword si_rotqbi(qword a, qword count)
+{
+  vec_uchar16 x, y;
+  vec_uchar16 result;
+ 
+  x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7));
+  y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
+			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
+  
+  result = vec_or(vec_sll((qword)(a), x), y);
+  return ((qword)(result));
+}
+
+
+/* Rotate Left Quadword and Mask by Bits
+ */
+static __inline qword si_rotqmbii(qword a, int count)
+{
+  return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3))));
+}
+
+static __inline qword si_rotqmbi(qword a, qword count)
+{
+  return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3)))));
+}
+
+
+/* Rotate Left Quadword and Mask by Bytes with Bit Count
+ */
+static __inline qword si_rotqmbybi(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  int cnt;
+  vec_uchar16 mask;
+
+  x.v = (vec_uchar16)(count);
+  x.i[0] = cnt = 0 - (x.i[0] & ~7);
+  x.v = vec_splat(x.v, 3);
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+
+  return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
+}
+
+
+
+
+/* Round Double to Float
+ */
+static __inline qword si_frds(qword a)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } d;
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.v = (vec_double2)(a);
+  d.v = (vec_float4){0.0f};
+  d.f[0] = (float)in.d[0];
+  d.f[2] = (float)in.d[1];
+
+  return ((qword)(d.v));
+}
+
+/* Select Bits
+ */
+static __inline qword si_selb(qword a, qword b, qword c)
+{
+  return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c))));
+}
+
+
+/* Shuffle Bytes
+ */
+static __inline qword si_shufb(qword a, qword b, qword pattern)
+{
+  vec_uchar16 pat;
+
+  pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), 
+		vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)),
+		vec_sra((vec_uchar16)(pattern), vec_splat_u8(7)));
+  return ((qword)(vec_perm(vec_perm(a, b, pattern), 
+			   ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0,
+				          0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),
+			   pat)));
+}
+
+
+/* Shift Left
+ */
+static __inline qword si_shlh(qword a, qword b)
+{
+  vec_ushort8 mask;
+
+  mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask)));
+}
+
+static __inline qword si_shl(qword a, qword b)
+{
+  vec_uint4 mask;
+
+  mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask)));
+}
+
+
+static __inline qword si_shlhi(qword a, unsigned int b)
+{
+  vec_ushort8 mask;
+  vec_ushort8 bv;
+
+  bv = vec_splat((vec_ushort8)(si_from_int(b)), 1);
+  mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15));
+  return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask)));
+}
+
+static __inline qword si_shli(qword a, unsigned int b)
+{
+  vec_uint4 bv;
+  vec_uint4 mask;
+
+  bv = vec_splat((vec_uint4)(si_from_uint(b)), 0);
+  mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
+  return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask)));
+}
+
+
+/* Shift Left Quadword
+ */
+static __inline qword si_shlqbii(qword a, unsigned int count)
+{
+  vec_uchar16 x;
+
+  x = vec_splat((vec_uchar16)(si_from_uint(count)), 3);
+  return ((qword)(vec_sll((vec_uchar16)(a), x)));
+}
+
+static __inline qword si_shlqbi(qword a, qword count)
+{
+  vec_uchar16 x;
+
+  x = vec_splat((vec_uchar16)(count), 3);
+  return ((qword)(vec_sll((vec_uchar16)(a), x)));
+}
+
+
+/* Shift Left Quadword by Bytes
+ */
+static __inline qword si_shlqbyi(qword a, unsigned int count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  vec_uchar16 mask;
+
+  x.i[3] = count << 3;
+  mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
+  return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
+}
+
+static __inline qword si_shlqby(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    unsigned int i[4];
+  } x;
+  unsigned int cnt;
+  vec_uchar16 mask;
+
+  x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
+  cnt = x.i[0];
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+  return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
+}
+
+/* Shift Left Quadword by Bytes with Bit Count
+ */
+static __inline qword si_shlqbybi(qword a, qword count)
+{
+  union {
+    vec_uchar16 v;
+    int i[4];
+  } x;
+  unsigned int cnt;
+  vec_uchar16 mask;
+
+  x.v = vec_splat((vec_uchar16)(count), 3);
+  cnt = x.i[0];
+  mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
+  return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
+}
+
+
+/* Stop and Signal
+ */
+#define si_stop(_type)		SPU_STOP_ACTION
+#define si_stopd(a, b, c)	SPU_STOP_ACTION
+
+
+/* Subtract
+ */
+static __inline qword si_sfh(qword a, qword b)
+{
+  return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a))));
+}
+
+static __inline qword si_sf(qword a, qword b)
+{
+  return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a))));
+}
+
+static __inline qword si_fs(qword a, qword b)
+{
+  return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b))));
+}
+
+static __inline qword si_dfs(qword a, qword b)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } aa, bb, dd;
+
+  aa.v = (vec_double2)(a);
+  bb.v = (vec_double2)(b);
+  dd.d[0] = aa.d[0] - bb.d[0];
+  dd.d[1] = aa.d[1] - bb.d[1];
+  return ((qword)(dd.v));
+}
+
+static __inline qword si_sfhi(qword a, short b)
+{
+  return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1),
+			  (vec_short8)(a))));
+}
+
+static __inline qword si_sfi(qword a, int b)
+{
+  return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0),
+			  (vec_int4)(a))));
+}
+
+/* Subtract word extended
+ */
+#define si_sfx(_a, _b, _c)	((qword)(vec_add(vec_add((vec_uint4)(_b), 				\
+							 vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), 	\
+						 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
+
+
+/* Sum Bytes into Shorts
+ */
+static __inline qword si_sumb(qword a, qword b)
+{
+  vec_uint4 zero = (vec_uint4){0};
+  vec_ushort8 sum_a, sum_b;
+  
+  sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero);
+  sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero);
+
+  return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19,  2,  3, 22, 23,  6,  7,
+					                26, 27, 10, 11, 30, 31, 14, 15}))));
+}
+
+/* Exclusive OR
+ */
+static __inline qword si_xor(qword a, qword b)
+{
+  return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static __inline qword si_xorbi(qword a, unsigned char b)
+{
+  return ((qword)(vec_xor((vec_uchar16)(a), 
+			  vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
+}
+
+static __inline qword si_xorhi(qword a, unsigned short b)
+{
+  return ((qword)(vec_xor((vec_ushort8)(a), 
+			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
+}
+
+static __inline qword si_xori(qword a, unsigned int b)
+{
+  return ((qword)(vec_xor((vec_uint4)(a), 
+			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
+}
+
+
+/* Generate Controls for Sub-Quadword Insertion
+ */
+static __inline qword si_cbd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned char c[16];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cdd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned long long ll[2];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_chd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned short s[8];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cwd(qword a, int imm)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cbx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned char c[16];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03;
+  return ((qword)(shmask.v));
+}
+
+
+static __inline qword si_cdx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned long long ll[2];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_chx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned short s[8];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203;
+  return ((qword)(shmask.v));
+}
+
+static __inline qword si_cwx(qword a, qword b)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } shmask;
+
+  shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
+  shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203;
+  return ((qword)(shmask.v));
+}
+
+
+/* Constant Formation
+ */
+static __inline qword si_il(signed short imm)
+{
+  return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0)));
+}
+
+
+static __inline qword si_ila(unsigned int imm)
+{
+  return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0)));
+}
+
+static __inline qword si_ilh(signed short imm)
+{
+  return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1)));
+}
+
+static __inline qword si_ilhu(signed short imm)
+{
+  return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0)));
+}
+
+static __inline qword si_iohl(qword a, unsigned short imm)
+{
+  return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0))));
+}
+
+/* No Operation
+ */
+#define si_lnop()		/* do nothing */
+#define si_nop()		/* do nothing */
+
+
+/* Memory Load and Store
+ */
+static __inline qword si_lqa(unsigned int imm)
+{
+  return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
+}
+
+static __inline qword si_lqd(qword a, unsigned int imm)
+{
+  return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm))));
+}
+
+static __inline qword si_lqr(unsigned int imm)
+{
+  return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
+}
+
+static __inline qword si_lqx(qword a, qword b)
+{
+  return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0))));
+}
+
+static __inline void si_stqa(qword a, unsigned int imm)
+{
+  vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
+}
+
+static __inline void si_stqd(qword a, qword b, unsigned int imm)
+{
+  vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm));
+}
+
+static __inline void si_stqr(qword a, unsigned int imm)
+{
+  vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
+}
+
+static __inline void si_stqx(qword a, qword b, qword c)
+{
+  vec_st((vec_uchar16)(a), 
+	 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))),
+	 (vector unsigned char *)(0));
+}
+
+#endif /* !__SPU__ */
+#endif /* !_SI2VMX_H_ */
+
diff --git a/gcc/config/rs6000/singlefp.h b/gcc/config/rs6000/singlefp.h
new file mode 100644
index 000000000..36e093c1a
--- /dev/null
+++ b/gcc/config/rs6000/singlefp.h
@@ -0,0 +1,40 @@
+/* Definitions for PowerPC single-precision floating point unit
+   such as Xilinx PowerPC 405/440 APU.
+
+   Copyright (C) 2008 Free Software Foundation, Inc.
+   Contributed by Michael Eager (eager@eagercon.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Undefine definitions from rs6000.h. */
+#undef TARGET_SINGLE_FLOAT
+#undef TARGET_DOUBLE_FLOAT
+#undef TARGET_SINGLE_FPU
+#undef TARGET_SIMPLE_FPU
+#undef UNITS_PER_FP_WORD
+
+/* FPU operations supported. 
+   If TARGET_SINGLE_FPU set, processor supports single fp options. */
+#define TARGET_SINGLE_FLOAT (rs6000_single_float)
+#define TARGET_DOUBLE_FLOAT (rs6000_double_float)
+#define TARGET_SINGLE_FPU   1
+#define TARGET_SIMPLE_FPU   (rs6000_simple_fpu)
+
+/* FP word width depends on single/double fp support. */
+#define UNITS_PER_FP_WORD ((TARGET_SOFT_FLOAT || TARGET_DOUBLE_FLOAT) ? 8 : 4)
+
diff --git a/gcc/config/rs6000/sol-ci.asm b/gcc/config/rs6000/sol-ci.asm
new file mode 100644
index 000000000..7c2fbae97
--- /dev/null
+++ b/gcc/config/rs6000/sol-ci.asm
@@ -0,0 +1,94 @@
+# crti.s for sysv4
+
+#   Copyright (C) 1996, 2008, 2009 Free Software Foundation, Inc.
+#   Written By Michael Meissner
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just supplies labeled starting points for the .got* and other
+# special sections.  It is linked in first before other modules.
+ 
+	.ident	"GNU C scrti.s"
+
+#ifndef __powerpc64__
+# Start of .text
+	.section ".text"
+	.globl	_ex_text0
+_ex_text0:
+
+# Exception range
+	.section ".exception_ranges","aw"
+	.globl	_ex_range0
+_ex_range0:
+
+# List of C++ constructors
+	.section ".ctors","aw"
+	.globl	__CTOR_LIST__
+	.type	__CTOR_LIST__,@object
+__CTOR_LIST__:
+
+# List of C++ destructors
+	.section ".dtors","aw"
+	.globl	__DTOR_LIST__
+	.type	__DTOR_LIST__,@object
+__DTOR_LIST__:
+
+# Head of _init function used for static constructors
+	.section ".init","ax"
+	.align 2
+	.globl _init
+	.type _init,@function
+_init:	stwu	%r1,-16(%r1)
+	mflr	%r0
+	stw	%r31,12(%r1)
+	stw	%r0,16(%r1)
+
+	bl	_GLOBAL_OFFSET_TABLE_-4	# get the GOT address
+	mflr	%r31
+
+#	lwz	%r3,_ex_shared0@got(%r31)
+#	lwz	%r4,-8(%r3)		# _ex_register or 0
+#	cmpi	%cr0,%r4,0
+#	beq	.Lno_reg
+#	mtlr	%r4
+#	blrl
+#.Lno_reg:
+
+# Head of _fini function used for static destructors
+	.section ".fini","ax"
+	.align 2
+	.globl _fini
+	.type _fini,@function
+_fini:	stwu	%r1,-16(%r1)
+	mflr	%r0
+	stw	%r31,12(%r1)
+	stw	%r0,16(%r1)
+
+	bl	_GLOBAL_OFFSET_TABLE_-4	# get the GOT address
+	mflr	%r31
+
+# _environ and its evil twin environ, pointing to the environment
+	.section ".sdata","aw"
+	.align 2
+	.globl _environ
+	.space 4
+	.weak	environ
+	.set	environ,_environ
+#endif
diff --git a/gcc/config/rs6000/sol-cn.asm b/gcc/config/rs6000/sol-cn.asm
new file mode 100644
index 000000000..4aeacaf2c
--- /dev/null
+++ b/gcc/config/rs6000/sol-cn.asm
@@ -0,0 +1,72 @@
+# crtn.s for sysv4
+
+# Copyright (C) 1996, 2007, 2008, 2009 Free Software Foundation, Inc.
+#   Written By Michael Meissner
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+# 
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just supplies labeled ending points for the .got* and other
+# special sections.  It is linked in last after other modules.
+ 
+	.ident	"GNU C scrtn.s"
+
+#ifndef __powerpc64__
+# Default versions of exception handling register/deregister
+	.weak	_ex_register
+	.weak	_ex_deregister
+	.set	_ex_register,0
+	.set	_ex_deregister,0
+
+# End list of C++ constructors
+	.section ".ctors","aw"
+	.globl	__CTOR_END__
+	.type	__CTOR_END__,@object
+__CTOR_END__:
+
+# End list of C++ destructors
+	.section ".dtors","aw"
+	.weak	__DTOR_END__
+	.type	__DTOR_END__,@object
+__DTOR_END__:
+
+	.section ".text"
+	.globl	_ex_text1
+_ex_text1:
+
+	.section ".exception_ranges","aw"
+	.globl	_ex_range1
+_ex_range1:
+
+# Tail of _init used for static constructors
+	.section ".init","ax"
+	lwz	%r0,16(%r1)
+	lwz	%r31,12(%r1)
+	mtlr	%r0
+	addi	%r1,%r1,16
+	blr
+
+# Tail of _fini used for static destructors
+	.section ".fini","ax"
+	lwz	%r0,16(%r1)
+	lwz	%r31,12(%r1)
+	mtlr	%r0
+	addi	%r1,%r1,16
+	blr
+#endif
diff --git a/gcc/config/rs6000/spe.h b/gcc/config/rs6000/spe.h
new file mode 100644
index 000000000..a79318099
--- /dev/null
+++ b/gcc/config/rs6000/spe.h
@@ -0,0 +1,1107 @@
+/* PowerPC E500 user include file.
+   Copyright (C) 2002, 2003, 2004, 2009 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez (aldyh@redhat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPE_H
+#define _SPE_H
+
+#define __vector __attribute__((vector_size(8)))
+
+typedef int 	 		int32_t;
+typedef unsigned 		uint32_t;
+typedef short    		int16_t;
+typedef unsigned short  	uint16_t;
+typedef long long 		int64_t;
+typedef unsigned long long	uint64_t;
+
+typedef short 			__vector __ev64_s16__;
+typedef unsigned short  	__vector __ev64_u16__;
+typedef int 			__vector __ev64_s32__;
+typedef unsigned 		__vector __ev64_u32__;
+typedef long long 		__vector __ev64_s64__;
+typedef unsigned long long 	__vector __ev64_u64__;
+typedef float 			__vector __ev64_fs__;
+
+#define __v2si __ev64_opaque__
+#define __v2sf __ev64_fs__
+
+#define __ev_addw __builtin_spe_evaddw
+#define __ev_addiw __builtin_spe_evaddiw
+#define __ev_subfw(a,b) __builtin_spe_evsubfw ((b), (a))
+#define __ev_subw __builtin_spe_evsubfw
+#define __ev_subifw(a,b) __builtin_spe_evsubifw ((b), (a))
+#define __ev_subiw __builtin_spe_evsubifw
+#define __ev_abs __builtin_spe_evabs
+#define __ev_neg __builtin_spe_evneg
+#define __ev_extsb __builtin_spe_evextsb
+#define __ev_extsh __builtin_spe_evextsh
+#define __ev_and __builtin_spe_evand
+#define __ev_or __builtin_spe_evor
+#define __ev_xor __builtin_spe_evxor
+#define __ev_nand __builtin_spe_evnand
+#define __ev_nor __builtin_spe_evnor
+#define __ev_eqv __builtin_spe_eveqv
+#define __ev_andc __builtin_spe_evandc
+#define __ev_orc __builtin_spe_evorc
+#define __ev_rlw __builtin_spe_evrlw
+#define __ev_rlwi __builtin_spe_evrlwi
+#define __ev_slw __builtin_spe_evslw
+#define __ev_slwi __builtin_spe_evslwi
+#define __ev_srws __builtin_spe_evsrws
+#define __ev_srwu __builtin_spe_evsrwu
+#define __ev_srwis __builtin_spe_evsrwis
+#define __ev_srwiu __builtin_spe_evsrwiu
+#define __ev_cntlzw __builtin_spe_evcntlzw
+#define __ev_cntlsw __builtin_spe_evcntlsw
+#define __ev_rndw __builtin_spe_evrndw
+#define __ev_mergehi __builtin_spe_evmergehi
+#define __ev_mergelo __builtin_spe_evmergelo
+#define __ev_mergelohi __builtin_spe_evmergelohi
+#define __ev_mergehilo __builtin_spe_evmergehilo
+#define __ev_splati __builtin_spe_evsplati
+#define __ev_splatfi __builtin_spe_evsplatfi
+#define __ev_divws __builtin_spe_evdivws
+#define __ev_divwu __builtin_spe_evdivwu
+#define __ev_mra __builtin_spe_evmra
+
+#define __brinc __builtin_spe_brinc
+
+/* Loads.  */
+
+#define __ev_lddx __builtin_spe_evlddx
+#define __ev_ldwx __builtin_spe_evldwx
+#define __ev_ldhx __builtin_spe_evldhx
+#define __ev_lwhex __builtin_spe_evlwhex
+#define __ev_lwhoux __builtin_spe_evlwhoux
+#define __ev_lwhosx __builtin_spe_evlwhosx
+#define __ev_lwwsplatx __builtin_spe_evlwwsplatx
+#define __ev_lwhsplatx __builtin_spe_evlwhsplatx
+#define __ev_lhhesplatx __builtin_spe_evlhhesplatx
+#define __ev_lhhousplatx __builtin_spe_evlhhousplatx
+#define __ev_lhhossplatx __builtin_spe_evlhhossplatx
+#define __ev_ldd __builtin_spe_evldd
+#define __ev_ldw __builtin_spe_evldw
+#define __ev_ldh __builtin_spe_evldh
+#define __ev_lwhe __builtin_spe_evlwhe
+#define __ev_lwhou __builtin_spe_evlwhou
+#define __ev_lwhos __builtin_spe_evlwhos
+#define __ev_lwwsplat __builtin_spe_evlwwsplat
+#define __ev_lwhsplat __builtin_spe_evlwhsplat
+#define __ev_lhhesplat __builtin_spe_evlhhesplat
+#define __ev_lhhousplat __builtin_spe_evlhhousplat
+#define __ev_lhhossplat __builtin_spe_evlhhossplat
+
+/* Stores.  */
+
+#define __ev_stddx __builtin_spe_evstddx
+#define __ev_stdwx __builtin_spe_evstdwx
+#define __ev_stdhx __builtin_spe_evstdhx
+#define __ev_stwwex __builtin_spe_evstwwex
+#define __ev_stwwox __builtin_spe_evstwwox
+#define __ev_stwhex __builtin_spe_evstwhex
+#define __ev_stwhox __builtin_spe_evstwhox
+#define __ev_stdd __builtin_spe_evstdd
+#define __ev_stdw __builtin_spe_evstdw
+#define __ev_stdh __builtin_spe_evstdh
+#define __ev_stwwe __builtin_spe_evstwwe
+#define __ev_stwwo __builtin_spe_evstwwo
+#define __ev_stwhe __builtin_spe_evstwhe
+#define __ev_stwho __builtin_spe_evstwho
+
+/* Fixed point complex.  */
+
+#define __ev_mhossf __builtin_spe_evmhossf
+#define __ev_mhosmf __builtin_spe_evmhosmf
+#define __ev_mhosmi __builtin_spe_evmhosmi
+#define __ev_mhoumi __builtin_spe_evmhoumi
+#define __ev_mhessf __builtin_spe_evmhessf
+#define __ev_mhesmf __builtin_spe_evmhesmf
+#define __ev_mhesmi __builtin_spe_evmhesmi
+#define __ev_mheumi __builtin_spe_evmheumi
+#define __ev_mhossfa __builtin_spe_evmhossfa
+#define __ev_mhosmfa __builtin_spe_evmhosmfa
+#define __ev_mhosmia __builtin_spe_evmhosmia
+#define __ev_mhoumia __builtin_spe_evmhoumia
+#define __ev_mhessfa __builtin_spe_evmhessfa
+#define __ev_mhesmfa __builtin_spe_evmhesmfa
+#define __ev_mhesmia __builtin_spe_evmhesmia
+#define __ev_mheumia __builtin_spe_evmheumia
+
+#define __ev_mhoumf __ev_mhoumi
+#define __ev_mheumf __ev_mheumi
+#define __ev_mhoumfa __ev_mhoumia
+#define __ev_mheumfa __ev_mheumia
+
+#define __ev_mhossfaaw __builtin_spe_evmhossfaaw
+#define __ev_mhossiaaw __builtin_spe_evmhossiaaw
+#define __ev_mhosmfaaw __builtin_spe_evmhosmfaaw
+#define __ev_mhosmiaaw __builtin_spe_evmhosmiaaw
+#define __ev_mhousiaaw __builtin_spe_evmhousiaaw
+#define __ev_mhoumiaaw __builtin_spe_evmhoumiaaw
+#define __ev_mhessfaaw __builtin_spe_evmhessfaaw
+#define __ev_mhessiaaw __builtin_spe_evmhessiaaw
+#define __ev_mhesmfaaw __builtin_spe_evmhesmfaaw
+#define __ev_mhesmiaaw __builtin_spe_evmhesmiaaw
+#define __ev_mheusiaaw __builtin_spe_evmheusiaaw
+#define __ev_mheumiaaw __builtin_spe_evmheumiaaw
+
+#define __ev_mhousfaaw __ev_mhousiaaw
+#define __ev_mhoumfaaw __ev_mhoumiaaw
+#define __ev_mheusfaaw __ev_mheusiaaw
+#define __ev_mheumfaaw __ev_mheumiaaw
+
+#define __ev_mhossfanw __builtin_spe_evmhossfanw
+#define __ev_mhossianw __builtin_spe_evmhossianw
+#define __ev_mhosmfanw __builtin_spe_evmhosmfanw
+#define __ev_mhosmianw __builtin_spe_evmhosmianw
+#define __ev_mhousianw __builtin_spe_evmhousianw
+#define __ev_mhoumianw __builtin_spe_evmhoumianw
+#define __ev_mhessfanw __builtin_spe_evmhessfanw
+#define __ev_mhessianw __builtin_spe_evmhessianw
+#define __ev_mhesmfanw __builtin_spe_evmhesmfanw
+#define __ev_mhesmianw __builtin_spe_evmhesmianw
+#define __ev_mheusianw __builtin_spe_evmheusianw
+#define __ev_mheumianw __builtin_spe_evmheumianw
+
+#define __ev_mhousfanw __ev_mhousianw
+#define __ev_mhoumfanw __ev_mhoumianw
+#define __ev_mheusfanw __ev_mheusianw
+#define __ev_mheumfanw __ev_mheumianw
+
+#define __ev_mhogsmfaa __builtin_spe_evmhogsmfaa
+#define __ev_mhogsmiaa __builtin_spe_evmhogsmiaa
+#define __ev_mhogumiaa __builtin_spe_evmhogumiaa
+#define __ev_mhegsmfaa __builtin_spe_evmhegsmfaa
+#define __ev_mhegsmiaa __builtin_spe_evmhegsmiaa
+#define __ev_mhegumiaa __builtin_spe_evmhegumiaa
+
+#define __ev_mhogumfaa __ev_mhogumiaa
+#define __ev_mhegumfaa __ev_mhegumiaa
+
+#define __ev_mhogsmfan __builtin_spe_evmhogsmfan
+#define __ev_mhogsmian __builtin_spe_evmhogsmian
+#define __ev_mhogumian __builtin_spe_evmhogumian
+#define __ev_mhegsmfan __builtin_spe_evmhegsmfan
+#define __ev_mhegsmian __builtin_spe_evmhegsmian
+#define __ev_mhegumian __builtin_spe_evmhegumian
+
+#define __ev_mhogumfan __ev_mhogumian
+#define __ev_mhegumfan __ev_mhegumian
+
+#define __ev_mwhssf __builtin_spe_evmwhssf
+#define __ev_mwhsmf __builtin_spe_evmwhsmf
+#define __ev_mwhsmi __builtin_spe_evmwhsmi
+#define __ev_mwhumi __builtin_spe_evmwhumi
+#define __ev_mwhssfa __builtin_spe_evmwhssfa
+#define __ev_mwhsmfa __builtin_spe_evmwhsmfa
+#define __ev_mwhsmia __builtin_spe_evmwhsmia
+#define __ev_mwhumia __builtin_spe_evmwhumia
+
+#define __ev_mwhumf __ev_mwhumi
+#define __ev_mwhumfa __ev_mwhumia
+
+#define __ev_mwlumi __builtin_spe_evmwlumi
+#define __ev_mwlumia __builtin_spe_evmwlumia
+#define __ev_mwlumiaaw __builtin_spe_evmwlumiaaw
+
+#define __ev_mwlssiaaw __builtin_spe_evmwlssiaaw
+#define __ev_mwlsmiaaw __builtin_spe_evmwlsmiaaw
+#define __ev_mwlusiaaw __builtin_spe_evmwlusiaaw
+#define __ev_mwlusiaaw __builtin_spe_evmwlusiaaw
+
+#define __ev_mwlssianw __builtin_spe_evmwlssianw
+#define __ev_mwlsmianw __builtin_spe_evmwlsmianw
+#define __ev_mwlusianw __builtin_spe_evmwlusianw
+#define __ev_mwlumianw __builtin_spe_evmwlumianw
+
+#define __ev_mwssf __builtin_spe_evmwssf
+#define __ev_mwsmf __builtin_spe_evmwsmf
+#define __ev_mwsmi __builtin_spe_evmwsmi
+#define __ev_mwumi __builtin_spe_evmwumi
+#define __ev_mwssfa __builtin_spe_evmwssfa
+#define __ev_mwsmfa __builtin_spe_evmwsmfa
+#define __ev_mwsmia __builtin_spe_evmwsmia
+#define __ev_mwumia __builtin_spe_evmwumia
+
+#define __ev_mwumf __ev_mwumi
+#define __ev_mwumfa __ev_mwumia
+
+#define __ev_mwssfaa __builtin_spe_evmwssfaa
+#define __ev_mwsmfaa __builtin_spe_evmwsmfaa
+#define __ev_mwsmiaa __builtin_spe_evmwsmiaa
+#define __ev_mwumiaa __builtin_spe_evmwumiaa
+
+#define __ev_mwumfaa __ev_mwumiaa
+
+#define __ev_mwssfan __builtin_spe_evmwssfan
+#define __ev_mwsmfan __builtin_spe_evmwsmfan
+#define __ev_mwsmian __builtin_spe_evmwsmian
+#define __ev_mwumian __builtin_spe_evmwumian
+
+#define __ev_mwumfan __ev_mwumian
+
+#define __ev_addssiaaw __builtin_spe_evaddssiaaw
+#define __ev_addsmiaaw __builtin_spe_evaddsmiaaw
+#define __ev_addusiaaw __builtin_spe_evaddusiaaw
+#define __ev_addumiaaw __builtin_spe_evaddumiaaw
+
+#define __ev_addusfaaw __ev_addusiaaw
+#define __ev_addumfaaw __ev_addumiaaw
+#define __ev_addsmfaaw __ev_addsmiaaw
+#define __ev_addssfaaw __ev_addssiaaw
+
+#define __ev_subfssiaaw __builtin_spe_evsubfssiaaw
+#define __ev_subfsmiaaw __builtin_spe_evsubfsmiaaw
+#define __ev_subfusiaaw __builtin_spe_evsubfusiaaw
+#define __ev_subfumiaaw __builtin_spe_evsubfumiaaw
+
+#define __ev_subfusfaaw __ev_subfusiaaw
+#define __ev_subfumfaaw __ev_subfumiaaw
+#define __ev_subfsmfaaw __ev_subfsmiaaw
+#define __ev_subfssfaaw __ev_subfssiaaw
+
+/* Floating Point SIMD Instructions  */
+
+#define __ev_fsabs __builtin_spe_evfsabs
+#define __ev_fsnabs __builtin_spe_evfsnabs
+#define __ev_fsneg __builtin_spe_evfsneg
+#define __ev_fsadd __builtin_spe_evfsadd
+#define __ev_fssub __builtin_spe_evfssub
+#define __ev_fsmul __builtin_spe_evfsmul
+#define __ev_fsdiv __builtin_spe_evfsdiv
+#define __ev_fscfui __builtin_spe_evfscfui
+#define __ev_fscfsi __builtin_spe_evfscfsi
+#define __ev_fscfuf __builtin_spe_evfscfuf
+#define __ev_fscfsf __builtin_spe_evfscfsf
+#define __ev_fsctui __builtin_spe_evfsctui
+#define __ev_fsctsi __builtin_spe_evfsctsi
+#define __ev_fsctuf __builtin_spe_evfsctuf
+#define __ev_fsctsf __builtin_spe_evfsctsf
+#define __ev_fsctuiz __builtin_spe_evfsctuiz
+#define __ev_fsctsiz __builtin_spe_evfsctsiz
+
+/* NOT SUPPORTED IN FIRST e500, support via two instructions:  */
+
+#define __ev_mwhusfaaw  __ev_mwhusiaaw
+#define __ev_mwhumfaaw  __ev_mwhumiaaw
+#define __ev_mwhusfanw  __ev_mwhusianw
+#define __ev_mwhumfanw  __ev_mwhumianw
+#define __ev_mwhgumfaa  __ev_mwhgumiaa
+#define __ev_mwhgumfan  __ev_mwhgumian
+
+#define __ev_mwhgssfaa __internal_ev_mwhgssfaa
+#define __ev_mwhgsmfaa __internal_ev_mwhgsmfaa
+#define __ev_mwhgsmiaa __internal_ev_mwhgsmiaa
+#define __ev_mwhgumiaa __internal_ev_mwhgumiaa
+#define __ev_mwhgssfan __internal_ev_mwhgssfan
+#define __ev_mwhgsmfan __internal_ev_mwhgsmfan
+#define __ev_mwhgsmian __internal_ev_mwhgsmian
+#define __ev_mwhgumian __internal_ev_mwhgumian
+#define __ev_mwhssiaaw __internal_ev_mwhssiaaw
+#define __ev_mwhssfaaw __internal_ev_mwhssfaaw
+#define __ev_mwhsmfaaw __internal_ev_mwhsmfaaw
+#define __ev_mwhsmiaaw __internal_ev_mwhsmiaaw
+#define __ev_mwhusiaaw __internal_ev_mwhusiaaw
+#define __ev_mwhumiaaw __internal_ev_mwhumiaaw
+#define __ev_mwhssfanw __internal_ev_mwhssfanw
+#define __ev_mwhssianw __internal_ev_mwhssianw
+#define __ev_mwhsmfanw __internal_ev_mwhsmfanw
+#define __ev_mwhsmianw __internal_ev_mwhsmianw
+#define __ev_mwhusianw __internal_ev_mwhusianw
+#define __ev_mwhumianw __internal_ev_mwhumianw
+
+static inline __ev64_opaque__
+__internal_ev_mwhssfaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_addssiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhssiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+  
+  t = __ev_mwhsmi (a, b);
+  return __ev_addssiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhsmfaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_addsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhsmiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_addsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhusiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_addusiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhumiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_addumiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhssfanw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_subfssiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhssianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_subfssiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhsmfanw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_subfsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhsmianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_subfsmiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhusianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_subfusiaaw (t);
+}
+ 
+static inline __ev64_opaque__
+__internal_ev_mwhumianw (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_subfumiaaw (t);
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgssfaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmfaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmiaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_mwsmiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgumiaa (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_mwumiaa (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgssfan (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhssf (a, b);
+  return __ev_mwsmian (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmfan (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmf (a, b);
+  return __ev_mwsmian (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgsmian (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhsmi (a, b);
+  return __ev_mwsmian (t, ((__ev64_s32__){1, 1}));
+}
+
+static inline __ev64_opaque__
+__internal_ev_mwhgumian (__ev64_opaque__ a, __ev64_opaque__ b)
+{
+  __ev64_opaque__ t;
+
+  t = __ev_mwhumi (a, b);
+  return __ev_mwumian (t, ((__ev64_s32__){1, 1}));
+}
+
+/* END OF NOT SUPPORTED */
+
+/* __ev_create* functions.  */
+
+#define __ev_create_ufix32_u32 __ev_create_u32
+#define __ev_create_sfix32_s32 __ev_create_s32
+
+static inline __ev64_opaque__
+__ev_create_s16 (int16_t a, int16_t b, int16_t c, int16_t d)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int16_t i[4];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+  u.i[2] = c;
+  u.i[3] = d;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_u16 (uint16_t a, uint16_t b, uint16_t c, uint16_t d)
+				  
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint16_t i[4];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+  u.i[2] = c;
+  u.i[3] = d;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_s32 (int32_t a, int32_t b)
+{
+  union
+  {
+    __ev64_opaque__ v;
+   int32_t i[2];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_u32 (uint32_t a, uint32_t b)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint32_t i[2];
+  } u;
+
+  u.i[0] = a;
+  u.i[1] = b;
+
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_fs (float a, float b)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    float f[2];
+  } u;
+
+  u.f[0] = a;
+  u.f[1] = b;
+  
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_sfix32_fs (float a, float b)
+{
+  __ev64_opaque__ ev;
+
+  ev = (__ev64_opaque__) __ev_create_fs (a, b);
+  return (__ev64_opaque__) __builtin_spe_evfsctsf ((__v2sf) ev);
+}
+
+static inline __ev64_opaque__
+__ev_create_ufix32_fs (float a, float b)
+{
+  __ev64_opaque__ ev;
+
+  ev = (__ev64_opaque__) __ev_create_fs (a, b);
+  return (__ev64_opaque__) __builtin_spe_evfsctuf ((__v2sf) ev);
+}
+
+static inline __ev64_opaque__
+__ev_create_s64 (int64_t a)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int64_t i;
+  } u;
+
+  u.i = a;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_create_u64 (uint64_t a)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint64_t i;
+  } u;
+
+  u.i = a;
+  return u.v;
+}
+
+static inline uint64_t
+__ev_convert_u64 (__ev64_opaque__ a)
+{
+  return (uint64_t) a;
+}
+
+static inline int64_t
+__ev_convert_s64 (__ev64_opaque__ a)
+{
+  return (int64_t) a;
+}
+
+/* __ev_get_* functions.  */
+
+#define __ev_get_upper_u32(a) __ev_get_u32_internal ((a), 0)
+#define __ev_get_lower_u32(a) __ev_get_u32_internal ((a), 1)
+#define __ev_get_upper_s32(a) __ev_get_s32_internal ((a), 0)
+#define __ev_get_lower_s32(a) __ev_get_s32_internal ((a), 1)
+#define __ev_get_upper_fs(a) __ev_get_fs_internal ((a), 0)
+#define __ev_get_lower_fs(a) __ev_get_fs_internal ((a), 1)
+#define __ev_get_upper_ufix32_u32 __ev_get_upper_u32
+#define __ev_get_lower_ufix32_u32 __ev_get_lower_u32
+#define __ev_get_upper_sfix32_s32 __ev_get_upper_s32
+#define __ev_get_lower_sfix32_s32 __ev_get_lower_s32
+#define __ev_get_upper_sfix32_fs(a)  __ev_get_sfix32_fs ((a), 0)
+#define __ev_get_lower_sfix32_fs(a)  __ev_get_sfix32_fs ((a), 1)
+#define __ev_get_upper_ufix32_fs(a)  __ev_get_ufix32_fs ((a), 0)
+#define __ev_get_lower_ufix32_fs(a)  __ev_get_ufix32_fs ((a), 1)
+
+#define __ev_get_u32 __ev_get_u32_internal
+#define __ev_get_s32 __ev_get_s32_internal
+#define __ev_get_fs __ev_get_fs_internal
+#define __ev_get_u16 __ev_get_u16_internal
+#define __ev_get_s16 __ev_get_s16_internal
+
+#define __ev_get_ufix32_u32 __ev_get_u32
+#define __ev_get_sfix32_s32 __ev_get_s32
+#define __ev_get_ufix32_fs     __ev_get_ufix32_fs_internal
+#define __ev_get_sfix32_fs     __ev_get_sfix32_fs_internal
+
+static inline uint32_t
+__ev_get_u32_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint32_t i[2];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+static inline int32_t
+__ev_get_s32_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int32_t i[2];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+static inline float
+__ev_get_fs_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    float f[2];
+  } u;
+
+  u.v = a;
+  return u.f[pos];
+}
+
+static inline float
+__ev_get_sfix32_fs_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  __ev64_fs__ v;
+
+  v = __builtin_spe_evfscfsf ((__v2sf) a);
+  return __ev_get_fs_internal ((__ev64_opaque__) v, pos);
+}
+
+static inline float
+__ev_get_ufix32_fs_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  __ev64_fs__ v;
+
+  v = __builtin_spe_evfscfuf ((__v2sf) a);
+  return __ev_get_fs_internal ((__ev64_opaque__) v, pos);
+}
+
+static inline uint16_t
+__ev_get_u16_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint16_t i[4];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+static inline int16_t
+__ev_get_s16_internal (__ev64_opaque__ a, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int16_t i[4];
+  } u;
+
+  u.v = a;
+  return u.i[pos];
+}
+
+/* __ev_set_* functions.  */
+
+#define __ev_set_u32 __ev_set_u32_internal
+#define __ev_set_s32 __ev_set_s32_internal
+#define __ev_set_fs __ev_set_fs_internal
+#define __ev_set_u16 __ev_set_u16_internal
+#define __ev_set_s16 __ev_set_s16_internal
+
+#define __ev_set_ufix32_u32 __ev_set_u32
+#define __ev_set_sfix32_s32 __ev_set_s32
+
+#define __ev_set_sfix32_fs  __ev_set_sfix32_fs_internal
+#define __ev_set_ufix32_fs  __ev_set_ufix32_fs_internal
+
+#define __ev_set_upper_u32(a, b) __ev_set_u32 (a, b, 0)
+#define __ev_set_lower_u32(a, b) __ev_set_u32 (a, b, 1)
+#define __ev_set_upper_s32(a, b) __ev_set_s32 (a, b, 0)
+#define __ev_set_lower_s32(a, b) __ev_set_s32 (a, b, 1)
+#define __ev_set_upper_fs(a, b) __ev_set_fs (a, b, 0)
+#define __ev_set_lower_fs(a, b) __ev_set_fs (a, b, 1)
+#define __ev_set_upper_ufix32_u32 __ev_set_upper_u32
+#define __ev_set_lower_ufix32_u32 __ev_set_lower_u32
+#define __ev_set_upper_sfix32_s32 __ev_set_upper_s32
+#define __ev_set_lower_sfix32_s32 __ev_set_lower_s32
+#define __ev_set_upper_sfix32_fs(a, b)  __ev_set_sfix32_fs (a, b, 0)
+#define __ev_set_lower_sfix32_fs(a, b)  __ev_set_sfix32_fs (a, b, 1)
+#define __ev_set_upper_ufix32_fs(a, b)  __ev_set_ufix32_fs (a, b, 0)
+#define __ev_set_lower_ufix32_fs(a, b)  __ev_set_ufix32_fs (a, b, 1)
+
+#define __ev_set_acc_vec64 __builtin_spe_evmra
+
+static inline __ev64_opaque__
+__ev_set_acc_u64 (uint64_t a)
+{
+  __ev64_opaque__ ev32;
+  ev32 = __ev_create_u64 (a);
+  __ev_mra (ev32);
+  return ev32;
+}
+
+static inline __ev64_opaque__
+__ev_set_acc_s64 (int64_t a)
+{
+  __ev64_opaque__ ev32;
+  ev32 = __ev_create_s64 (a);
+  __ev_mra (ev32);
+  return ev32;
+}
+
+static inline __ev64_opaque__
+__ev_set_u32_internal (__ev64_opaque__ a, uint32_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint32_t i[2];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_s32_internal (__ev64_opaque__ a, int32_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int32_t i[2];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    float f[2];
+  } u;
+
+  u.v = a;
+  u.f[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_sfix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
+{
+  __ev64_opaque__ v;
+  float other;
+
+  /* Get other half.  */
+  other = __ev_get_fs_internal (a, pos ^ 1);
+
+  /* Make an sfix32 with 'b'.  */
+  v = __ev_create_sfix32_fs (b, b);
+
+  /* Set other half to what it used to be.  */
+  return __ev_set_fs_internal (v, other, pos ^ 1);
+}
+
+static inline __ev64_opaque__
+__ev_set_ufix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
+{
+  __ev64_opaque__ v;
+  float other;
+
+  /* Get other half.  */
+  other = __ev_get_fs_internal (a, pos ^ 1);
+
+  /* Make an ufix32 with 'b'.  */
+  v = __ev_create_ufix32_fs (b, b);
+
+  /* Set other half to what it used to be.  */
+  return __ev_set_fs_internal (v, other, pos ^ 1);
+}
+
+static inline __ev64_opaque__
+__ev_set_u16_internal (__ev64_opaque__ a, uint16_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    uint16_t i[4];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+static inline __ev64_opaque__
+__ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos)
+{
+  union
+  {
+    __ev64_opaque__ v;
+    int16_t i[4];
+  } u;
+
+  u.v = a;
+  u.i[pos] = b;
+  return u.v;
+}
+
+/* Predicates.  */
+
+#define __pred_all	0
+#define __pred_any	1
+#define __pred_upper	2
+#define __pred_lower	3
+
+#define __ev_any_gts(a, b)		__builtin_spe_evcmpgts (__pred_any, (a), (b))
+#define __ev_all_gts(a, b)		__builtin_spe_evcmpgts (__pred_all, (a), (b))
+#define __ev_upper_gts(a, b)		__builtin_spe_evcmpgts (__pred_upper, (a), (b))
+#define __ev_lower_gts(a, b)		__builtin_spe_evcmpgts (__pred_lower, (a), (b))
+#define __ev_select_gts			__builtin_spe_evsel_gts
+
+#define __ev_any_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_any, (a), (b))
+#define __ev_all_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_all, (a), (b))
+#define __ev_upper_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_upper, (a), (b))
+#define __ev_lower_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_lower, (a), (b))
+#define __ev_select_gtu			__builtin_spe_evsel_gtu
+
+#define __ev_any_lts(a, b)		__builtin_spe_evcmplts (__pred_any, (a), (b))
+#define __ev_all_lts(a, b)		__builtin_spe_evcmplts (__pred_all, (a), (b))
+#define __ev_upper_lts(a, b)		__builtin_spe_evcmplts (__pred_upper, (a), (b))
+#define __ev_lower_lts(a, b)		__builtin_spe_evcmplts (__pred_lower, (a), (b))
+#define __ev_select_lts(a, b, c, d) 	((__v2si) __builtin_spe_evsel_lts ((a), (b), (c), (d)))
+
+#define __ev_any_ltu(a, b)		__builtin_spe_evcmpltu (__pred_any, (a), (b))
+#define __ev_all_ltu(a, b)		__builtin_spe_evcmpltu (__pred_all, (a), (b))
+#define __ev_upper_ltu(a, b)		__builtin_spe_evcmpltu (__pred_upper, (a), (b))
+#define __ev_lower_ltu(a, b)		__builtin_spe_evcmpltu (__pred_lower, (a), (b))
+#define __ev_select_ltu 		__builtin_spe_evsel_ltu
+#define __ev_any_eq(a, b)		__builtin_spe_evcmpeq (__pred_any, (a), (b))
+#define __ev_all_eq(a, b)		__builtin_spe_evcmpeq (__pred_all, (a), (b))
+#define __ev_upper_eq(a, b)		__builtin_spe_evcmpeq (__pred_upper, (a), (b))
+#define __ev_lower_eq(a, b)		__builtin_spe_evcmpeq (__pred_lower, (a), (b))
+#define __ev_select_eq			__builtin_spe_evsel_eq
+
+#define __ev_any_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_any, (a), (b))
+#define __ev_all_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_all, (a), (b))
+#define __ev_upper_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_upper, (a), (b))
+#define __ev_lower_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_lower, (a), (b))
+#define __ev_select_fs_gt		__builtin_spe_evsel_fsgt
+
+#define __ev_any_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_any, (a), (b))
+#define __ev_all_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_all, (a), (b))
+#define __ev_upper_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_upper, (a), (b))
+#define __ev_lower_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_lower, (a), (b))
+#define __ev_select_fs_lt		__builtin_spe_evsel_fslt
+
+#define __ev_any_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_any, (a), (b))
+#define __ev_all_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_all, (a), (b))
+#define __ev_upper_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_upper, (a), (b))
+#define __ev_lower_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_lower, (a), (b))
+#define __ev_select_fs_eq		__builtin_spe_evsel_fseq
+
+#define __ev_any_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_any, (a), (b))
+#define __ev_all_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_all, (a), (b))
+#define __ev_upper_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_upper, (a), (b))
+#define __ev_lower_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_lower, (a), (b))
+#define __ev_select_fs_tst_gt           __builtin_spe_evsel_fststgt
+
+#define __ev_any_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_any, (a), (b))
+#define __ev_all_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_all, (a), (b))
+#define __ev_upper_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_upper, (a), (b))
+#define __ev_lower_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_lower, (a), (b))
+#define __ev_select_fs_tst_lt		__builtin_spe_evsel_fststlt
+
+#define __ev_any_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_any, (a), (b))
+#define __ev_all_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_all, (a), (b))
+#define __ev_upper_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_upper, (a), (b))
+#define __ev_lower_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_lower, (a), (b))
+#define __ev_select_fs_tst_eq		__builtin_spe_evsel_fststeq
+
+/* SPEFSCR accessor functions.  */
+
+#define __SPEFSCR_SOVH		0x80000000
+#define __SPEFSCR_OVH		0x40000000
+#define __SPEFSCR_FGH		0x20000000
+#define __SPEFSCR_FXH		0x10000000
+#define __SPEFSCR_FINVH		0x08000000
+#define __SPEFSCR_FDBZH		0x04000000
+#define __SPEFSCR_FUNFH		0x02000000
+#define __SPEFSCR_FOVFH		0x01000000
+/* 2 unused bits.  */
+#define __SPEFSCR_FINXS		0x00200000
+#define __SPEFSCR_FINVS		0x00100000
+#define __SPEFSCR_FDBZS		0x00080000
+#define __SPEFSCR_FUNFS		0x00040000
+#define __SPEFSCR_FOVFS		0x00020000
+#define __SPEFSCR_MODE		0x00010000
+#define __SPEFSCR_SOV		0x00008000
+#define __SPEFSCR_OV		0x00004000
+#define __SPEFSCR_FG		0x00002000
+#define __SPEFSCR_FX		0x00001000
+#define __SPEFSCR_FINV		0x00000800
+#define __SPEFSCR_FDBZ		0x00000400
+#define __SPEFSCR_FUNF		0x00000200
+#define __SPEFSCR_FOVF		0x00000100
+/* 1 unused bit.  */
+#define __SPEFSCR_FINXE		0x00000040
+#define __SPEFSCR_FINVE		0x00000020
+#define __SPEFSCR_FDBZE		0x00000010
+#define __SPEFSCR_FUNFE		0x00000008
+#define __SPEFSCR_FOVFE		0x00000004
+#define __SPEFSCR_FRMC		0x00000003
+
+#define __ev_get_spefscr_sovh() (__builtin_spe_mfspefscr () & __SPEFSCR_SOVH)
+#define __ev_get_spefscr_ovh() (__builtin_spe_mfspefscr () & __SPEFSCR_OVH)
+#define __ev_get_spefscr_fgh() (__builtin_spe_mfspefscr () & __SPEFSCR_FGH)
+#define __ev_get_spefscr_fxh() (__builtin_spe_mfspefscr () & __SPEFSCR_FXH)
+#define __ev_get_spefscr_finvh() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVH)
+#define __ev_get_spefscr_fdbzh() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZH)
+#define __ev_get_spefscr_funfh() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFH)
+#define __ev_get_spefscr_fovfh() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFH)
+#define __ev_get_spefscr_finxs() (__builtin_spe_mfspefscr () & __SPEFSCR_FINXS)
+#define __ev_get_spefscr_finvs() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVS)
+#define __ev_get_spefscr_fdbzs() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZS)
+#define __ev_get_spefscr_funfs() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFS)
+#define __ev_get_spefscr_fovfs() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFS)
+#define __ev_get_spefscr_mode() (__builtin_spe_mfspefscr () & __SPEFSCR_MODE)
+#define __ev_get_spefscr_sov() (__builtin_spe_mfspefscr () & __SPEFSCR_SOV)
+#define __ev_get_spefscr_ov() (__builtin_spe_mfspefscr () & __SPEFSCR_OV)
+#define __ev_get_spefscr_fg() (__builtin_spe_mfspefscr () & __SPEFSCR_FG)
+#define __ev_get_spefscr_fx() (__builtin_spe_mfspefscr () & __SPEFSCR_FX)
+#define __ev_get_spefscr_finv() (__builtin_spe_mfspefscr () & __SPEFSCR_FINV)
+#define __ev_get_spefscr_fdbz() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZ)
+#define __ev_get_spefscr_funf() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNF)
+#define __ev_get_spefscr_fovf() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVF)
+#define __ev_get_spefscr_finxe() (__builtin_spe_mfspefscr () & __SPEFSCR_FINXE)
+#define __ev_get_spefscr_finve() (__builtin_spe_mfspefscr () & __SPEFSCR_FINVE)
+#define __ev_get_spefscr_fdbze() (__builtin_spe_mfspefscr () & __SPEFSCR_FDBZE)
+#define __ev_get_spefscr_funfe() (__builtin_spe_mfspefscr () & __SPEFSCR_FUNFE)
+#define __ev_get_spefscr_fovfe() (__builtin_spe_mfspefscr () & __SPEFSCR_FOVFE)
+#define __ev_get_spefscr_frmc() (__builtin_spe_mfspefscr () & __SPEFSCR_FRMC)
+
+static inline void
+__ev_clr_spefscr_field (int mask)
+{
+  int i;
+
+  i = __builtin_spe_mfspefscr ();
+  i &= ~mask;
+  __builtin_spe_mtspefscr (i);
+}
+
+#define __ev_clr_spefscr_sovh() __ev_clr_spefscr_field (__SPEFSCR_SOVH)
+#define __ev_clr_spefscr_sov() __ev_clr_spefscr_field (__SPEFSCR_SOV)
+#define __ev_clr_spefscr_finxs() __ev_clr_spefscr_field (__SPEFSCR_FINXS)
+#define __ev_clr_spefscr_finvs() __ev_clr_spefscr_field (__SPEFSCR_FINVS)
+#define __ev_clr_spefscr_fdbzs() __ev_clr_spefscr_field (__SPEFSCR_FDBZS)
+#define __ev_clr_spefscr_funfs() __ev_clr_spefscr_field (__SPEFSCR_FUNFS)
+#define __ev_clr_spefscr_fovfs() __ev_clr_spefscr_field (__SPEFSCR_FOVFS)
+
+/* Set rounding mode:
+     rnd = 0 (nearest)
+     rnd = 1 (zero)
+     rnd = 2 (+inf)
+     rnd = 3 (-inf).  */
+
+static inline void
+__ev_set_spefscr_frmc (int rnd)
+{
+  int i;
+
+  i = __builtin_spe_mfspefscr ();
+  i &= ~__SPEFSCR_FRMC;
+  i |= rnd;
+  __builtin_spe_mtspefscr (i);
+}
+
+/* The SPE PIM says these are declared in <spe.h>, although they are
+   not provided by GCC: they must be taken from a separate
+   library.  */
+extern short int atosfix16 (const char *);
+extern int atosfix32 (const char *);
+extern long long atosfix64 (const char *);
+
+extern unsigned short atoufix16 (const char *);
+extern unsigned int atoufix32 (const char *);
+extern unsigned long long atoufix64 (const char *);
+
+extern short int strtosfix16 (const char *, char **);
+extern int strtosfix32 (const char *, char **);
+extern long long strtosfix64 (const char *, char **);
+
+extern unsigned short int strtoufix16 (const char *, char **);
+extern unsigned int strtoufix32 (const char *, char **);
+extern unsigned long long strtoufix64 (const char *, char **);
+
+#endif /* _SPE_H */
diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md
new file mode 100644
index 000000000..d50ad1aad
--- /dev/null
+++ b/gcc/config/rs6000/spe.md
@@ -0,0 +1,3190 @@
+;; e500 SPE description
+;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+;; Free Software Foundation, Inc.
+;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(CMPDFEQ_GPR		1006)
+   (TSTDFEQ_GPR		1007)
+   (CMPDFGT_GPR		1008)
+   (TSTDFGT_GPR		1009)
+   (CMPDFLT_GPR		1010)
+   (TSTDFLT_GPR		1011)
+   (CMPTFEQ_GPR		1012)
+   (TSTTFEQ_GPR		1013)
+   (CMPTFGT_GPR		1014)
+   (TSTTFGT_GPR		1015)
+   (CMPTFLT_GPR		1016)
+   (TSTTFLT_GPR		1017)
+   (E500_CR_IOR_COMPARE 1018)
+   ])
+
+;; Modes using a 64-bit register.
+(define_mode_iterator SPE64 [DF V4HI V2SF V1DI V2SI])
+
+;; Likewise, but allow TFmode (two registers) as well.
+(define_mode_iterator SPE64TF [DF V4HI V2SF V1DI V2SI TF])
+
+;; DImode and TImode.
+(define_mode_iterator DITI [DI TI])
+
+(define_insn "*negsf2_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (neg:SF (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsneg %0,%1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "*abssf2_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsabs %0,%1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "*nabssf2_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "r"))))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsnabs %0,%1"
+  [(set_attr "type" "fpsimple")])
+
+(define_insn "*addsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "%r")
+		 (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsadd %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*subsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "r")
+		  (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efssub %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*mulsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%r")
+                 (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsmul %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "*divsf3_gpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (div:SF (match_operand:SF 1 "gpc_reg_operand" "r")
+                (match_operand:SF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsdiv %0,%1,%2"
+  [(set_attr "type" "vecfdiv")])
+
+;; Floating point conversion instructions.
+
+(define_insn "spe_fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unsigned_fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdctuiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_extendsfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(float_extend:DF (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdcfs %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(unsigned_fix:SI (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsctuiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_fix_truncsfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(fix:SI (match_operand:SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efsctsiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_fix_truncdfsi2"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(fix:SI (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdctsiz %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatunssisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (unsigned_float:SF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efscfui %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatunssidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+        (unsigned_float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdcfui %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatsisf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+        (float:SF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "efscfsi %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "spe_floatsidf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(float:DF (match_operand:SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdcfsi %0,%1"
+  [(set_attr "type" "fp")])
+
+;; SPE SIMD instructions
+
+(define_insn "absv2si2"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(abs:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evabs %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evandc"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (not:V2SI (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evandc %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "andv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evand %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+;; Vector compare instructions
+
+(define_insn "spe_evcmpeq"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+	(unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 500))]
+  "TARGET_SPE"
+  "evcmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmpgts"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 501))]
+  "TARGET_SPE"
+  "evcmpgts %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmpgtu"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 502))]
+  "TARGET_SPE"
+  "evcmpgtu %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmplts"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 503))]
+  "TARGET_SPE"
+  "evcmplts %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcmpltu"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 504))]
+  "TARGET_SPE"
+  "evcmpltu %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+;; Floating point vector compare instructions
+
+(define_insn "spe_evfscmpeq"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 538))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfscmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscmpgt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 539))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfscmpgt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscmplt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 540))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfscmplt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfststeq"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 541))]
+  "TARGET_SPE"
+  "evfststeq %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfststgt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 542))]
+  "TARGET_SPE"
+  "evfststgt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfststlt"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+        (unspec:CC [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")] 543))]
+  "TARGET_SPE"
+  "evfststlt %0,%1,%2"
+  [(set_attr "type" "veccmp")
+   (set_attr  "length" "4")])
+
+;; End of vector compare instructions
+
+(define_insn "spe_evcntlsw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 505))]
+  "TARGET_SPE"
+  "evcntlsw %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evcntlzw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 506))]
+  "TARGET_SPE"
+  "evcntlzw %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_eveqv"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (not:V2SI (xor:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+			    (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "eveqv %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evextsb"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 507))]
+  "TARGET_SPE"
+  "evextsb %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evextsh"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 508))]
+  "TARGET_SPE"
+  "evextsh %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhesplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand"  "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand"   "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 509)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlhhesplat %0,%2*2(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhesplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 510)]
+  "TARGET_SPE"
+  "evlhhesplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhossplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 511)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlhhossplat %0,%2*2(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhossplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 512)]
+  "TARGET_SPE"
+  "evlhhossplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhousplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 513)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlhhousplat %0,%2*2(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlhhousplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 514)]
+  "TARGET_SPE"
+  "evlhhousplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhsplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 515)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhsplat %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhsplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 516)]
+  "TARGET_SPE"
+  "evlwhsplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwwsplat"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 517)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwwsplat %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwwsplatx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 518)]
+  "TARGET_SPE"
+  "evlwwsplatx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergehi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_merge:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+			(vec_select:V2SI
+			 (match_operand:V2SI 2 "gpc_reg_operand" "r")
+			 (parallel [(const_int 1)
+				    (const_int 0)]))
+			(const_int 2)))]
+  "TARGET_SPE"
+  "evmergehi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergehilo"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_merge:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+			(match_operand:V2SI 2 "gpc_reg_operand" "r")
+			(const_int 2)))]
+  "TARGET_SPE"
+  "evmergehilo %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergelo"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_merge:V2SI (vec_select:V2SI
+			 (match_operand:V2SI 1 "gpc_reg_operand" "r")
+			 (parallel [(const_int 1)
+				    (const_int 0)]))
+			(match_operand:V2SI 2 "gpc_reg_operand" "r")
+			(const_int 2)))]
+  "TARGET_SPE"
+  "evmergelo %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmergelohi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(vec_merge:V2SI (vec_select:V2SI
+			 (match_operand:V2SI 1 "gpc_reg_operand" "r")
+			 (parallel [(const_int 1)
+				    (const_int 0)]))
+			(vec_select:V2SI
+			 (match_operand:V2SI 2 "gpc_reg_operand" "r")
+			 (parallel [(const_int 1)
+				    (const_int 0)]))
+			(const_int 2)))]
+  "TARGET_SPE"
+  "evmergelohi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evnand"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (not:V2SI (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+                            (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evnand %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "negv2si2"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (neg:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evneg %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evnor"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (not:V2SI  (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+                             (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evnor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evorc"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (not:V2SI (match_operand:V2SI 2 "gpc_reg_operand" "r"))))]
+  "TARGET_SPE"
+  "evorc %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evor"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (ior:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evrlwi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")] 519))]
+  "TARGET_SPE"
+  "evrlwi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evrlw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 520))]
+  "TARGET_SPE"
+  "evrlw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evrndw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 521))]
+  "TARGET_SPE"
+  "evrndw %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsel"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (match_operand:CC 3 "cc_reg_operand" "y")] 522))]
+  "TARGET_SPE"
+  "evsel %0,%1,%2,%3"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "4")])
+
+(define_insn "spe_evsel_fs"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+	(unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SF 2 "gpc_reg_operand" "r")
+		      (match_operand:CC 3 "cc_reg_operand" "y")] 725))]
+  "TARGET_SPE"
+  "evsel %0,%1,%2,%3"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "4")])
+
+(define_insn "spe_evslwi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")]
+		     523))]
+  "TARGET_SPE"
+  "evslwi %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evslw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 524))]
+  "TARGET_SPE"
+  "evslw %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrwis"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")]
+		     525))]
+  "TARGET_SPE"
+  "evsrwis %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrwiu"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")]
+		     526))]
+  "TARGET_SPE"
+  "evsrwiu %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrws"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 527))]
+  "TARGET_SPE"
+  "evsrws %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsrwu"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 528))]
+  "TARGET_SPE"
+  "evsrwu %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+;; vector xors
+
+(define_insn "xorv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (xor:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "xorv4hi3"
+  [(set (match_operand:V4HI 0 "gpc_reg_operand" "=r")
+        (xor:V4HI (match_operand:V4HI 1 "gpc_reg_operand" "r")
+		  (match_operand:V4HI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "xorv1di3"
+  [(set (match_operand:V1DI 0 "gpc_reg_operand" "=r")
+        (xor:V1DI (match_operand:V1DI 1 "gpc_reg_operand" "r")
+		  (match_operand:V1DI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evxor %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+;; end of vector xors
+
+(define_insn "spe_evfsabs"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evfsabs %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsadd"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfsadd %0,%1,%2"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfsf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 529))]
+  "TARGET_SPE"
+  "evfscfsf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfsi"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (float:V2SF (match_operand:V2SI 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evfscfsi %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfuf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 530))]
+  "TARGET_SPE"
+  "evfscfuf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfscfui"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+	(unspec:V2SF [(match_operand:V2SI 1 "gpc_reg_operand" "r")] 701))]
+  "TARGET_SPE"
+  "evfscfui %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctsf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 531))]
+  "TARGET_SPE"
+  "evfsctsf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctsi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 532))]
+  "TARGET_SPE"
+  "evfsctsi %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctsiz"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 533))]
+  "TARGET_SPE"
+  "evfsctsiz %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctuf"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 534))]
+  "TARGET_SPE"
+  "evfsctuf %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctui"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 535))]
+  "TARGET_SPE"
+  "evfsctui %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsctuiz"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 536))]
+  "TARGET_SPE"
+  "evfsctuiz %0,%1"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsdiv"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfsdiv %0,%1,%2"
+  [(set_attr "type" "vecfdiv")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsmul"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfsmul %0,%1,%2"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsnabs"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+	(unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "r")] 537))]
+  "TARGET_SPE"
+  "evfsnabs %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfsneg"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evfsneg %0,%1"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evfssub"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=r")
+        (minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evfssub %0,%1,%2"
+  [(set_attr "type" "vecfloat")
+   (set_attr  "length" "4")])
+
+;; SPE SIMD load instructions.
+
+;; Only the hardware engineer who designed the SPE understands the
+;; plethora of load and store instructions ;-).  We have no way of
+;; differentiating between them with RTL so use an unspec of const_int 0 
+;; to avoid identical RTL.
+
+(define_insn "spe_evldd"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 544)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evldd %0,%2*8(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlddx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 545)]
+  "TARGET_SPE"
+  "evlddx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldh"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 546)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evldh %0,%2*8(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldhx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 547)]
+  "TARGET_SPE"
+  "evldhx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 548)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evldw %0,%2*8(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evldwx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 549)]
+  "TARGET_SPE"
+  "evldwx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhe"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 550)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhe %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhex"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 551)]
+  "TARGET_SPE"
+  "evlwhex %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhos"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 552)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhos %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhosx"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 553)]
+  "TARGET_SPE"
+  "evlwhosx %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhou"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:QI 2 "immediate_operand" "i"))))
+   (unspec [(const_int 0)] 554)]
+  "TARGET_SPE && INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 31"
+  "evlwhou %0,%2*4(%1)"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evlwhoux"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+	(mem:V2SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
+			   (match_operand:SI 2 "gpc_reg_operand" "r"))))
+   (unspec [(const_int 0)] 555)]
+  "TARGET_SPE"
+  "evlwhoux %0,%1,%2"
+  [(set_attr "type" "vecload")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_brinc"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "gpc_reg_operand" "r")
+		    (match_operand:SI 2 "gpc_reg_operand" "r")] 556))]
+  "TARGET_SPE"
+  "brinc %0,%1,%2"
+  [(set_attr "type" "brinc")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 557))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 558))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 559))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 560))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 561))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhegumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 562))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhegumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 563))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 564))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 565))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 566))]
+  "TARGET_SPE"
+  "evmhesmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 567))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 568))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 569))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhesmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhesmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 570))]
+  "TARGET_SPE"
+  "evmhesmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 571))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 572))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 573))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 574))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmhessf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 575))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhessianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 576))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhessianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 577))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheumiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 578))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheumianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 579))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 580))]
+  "TARGET_SPE"
+  "evmheumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 581))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheusiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmheusianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 582))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmheusianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 583))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 584))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 585))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 586))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 587))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhogumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 588))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhogumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 589))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 590))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 591))]
+  "TARGET_SPE"
+  "evmhosmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 592))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 593))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 594))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 595))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhosmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhosmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 596))]
+  "TARGET_SPE"
+  "evmhosmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossfaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 597))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossfaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossfanw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 598))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossfanw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 599))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 600))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmhossf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 601))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhossianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 602))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhossianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 603))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhoumiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 604))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhoumianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 605))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhoumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhoumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 606))]
+  "TARGET_SPE"
+  "evmhoumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhousiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 607))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhousiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmhousianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 608))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmhousianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmmlssfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 609))]
+  "TARGET_SPE"
+  "evmmlssfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmmlssf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 610))]
+  "TARGET_SPE"
+  "evmmlssf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 611))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 612))]
+  "TARGET_SPE"
+  "evmwhsmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 613))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 614))]
+  "TARGET_SPE"
+  "evmwhsmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 615))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhusian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 626))]
+  "TARGET_SPE"
+  "evmwhusian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 628))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmwhssf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 629))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 630))]
+  "TARGET_SPE"
+  "evmwhumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlsmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 635))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlsmiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlsmianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 636))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlsmianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlssiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 641))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlssiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlssianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 642))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlssianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 643))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlumiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 644))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlumianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 645))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 646))]
+  "TARGET_SPE"
+  "evmwlumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 647))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlusiaaw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwlusianw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 648))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwlusianw %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 649))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 650))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 651))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 652))]
+  "TARGET_SPE"
+  "evmwsmf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 653))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 654))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 655))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwsmia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwsmi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 656))]
+  "TARGET_SPE"
+  "evmwsmi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 657))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwssfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 658))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwssfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssfa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 659))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwssfa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwssf"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 660))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evmwssf %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 661))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 662))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumia"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 663))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwumia %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwumi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 664))]
+  "TARGET_SPE"
+  "evmwumi %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (plus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evaddw %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 673))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddusiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 674))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddumiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddssiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 675))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddssiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddsmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 676))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evaddsmiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evaddiw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")] 677))]
+  "TARGET_SPE"
+  "evaddiw %0,%1,%2"
+  [(set_attr "type" "vecsimple")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubifw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (match_operand:QI 2 "immediate_operand" "i")] 678))]
+  "TARGET_SPE"
+  "evsubifw %0,%2,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (minus:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		    (match_operand:V2SI 2 "gpc_reg_operand" "r")))]
+  "TARGET_SPE"
+  "evsubfw %0,%2,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfusiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 679))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfusiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfumiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 680))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfumiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfssiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 681))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfssiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsubfsmiaaw"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+		      (reg:V2SI SPE_ACC_REGNO)] 682))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evsubfsmiaaw %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmra"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (match_operand:V2SI 1 "gpc_reg_operand" "r"))
+   (set (reg:V2SI SPE_ACC_REGNO)
+	(unspec:V2SI [(match_dup 1)] 726))]
+  "TARGET_SPE"
+  "evmra %0,%1"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "divv2si3"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (div:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		  (match_operand:V2SI 2 "gpc_reg_operand" "r")))
+   (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evdivws %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evdivwu"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (udiv:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r")
+		   (match_operand:V2SI 2 "gpc_reg_operand" "r")))
+      (clobber (reg:SI SPEFSCR_REGNO))]
+  "TARGET_SPE"
+  "evdivwu %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsplatfi"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:QI 1 "immediate_operand" "i")] 684))]
+  "TARGET_SPE"
+  "evsplatfi %0,%1"
+  [(set_attr "type" "vecperm")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evsplati"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:QI 1 "immediate_operand" "i")] 685))]
+  "TARGET_SPE"
+  "evsplati %0,%1"
+  [(set_attr "type" "vecperm")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdd"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 686)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstdd %2,%1*8(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstddx"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 687)]
+  "TARGET_SPE"
+  "evstddx %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdh"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 688)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstdh %2,%1*8(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdhx"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 689)]
+  "TARGET_SPE"
+  "evstdhx %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdw"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 690)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstdw %2,%1*8(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstdwx"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 691)]
+  "TARGET_SPE"
+  "evstdwx %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwhe"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 692)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwhe %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwhex"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 693)]
+  "TARGET_SPE"
+  "evstwhex %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwho"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 694)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwho %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwhox"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 695)]
+  "TARGET_SPE"
+  "evstwhox %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwe"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 696)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwwe %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwex"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 697)]
+  "TARGET_SPE"
+  "evstwwex %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwo"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:QI 1 "immediate_operand" "i")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 698)]
+  "TARGET_SPE && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 31"
+  "evstwwo %2,%1*4(%0)"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evstwwox"
+  [(set (mem:V2SI (plus:SI (match_operand:SI 0 "gpc_reg_operand" "b")
+			   (match_operand:SI 1 "gpc_reg_operand" "r")))
+	(match_operand:V2SI 2 "gpc_reg_operand" "r"))
+   (unspec [(const_int 0)] 699)]
+  "TARGET_SPE"
+  "evstwwox %2,%0,%1"
+  [(set_attr "type" "vecstore")
+   (set_attr  "length" "4")])
+
+;; Double-precision floating point instructions.
+
+;; FIXME: Add o=r option.
+(define_insn "*frob_<SPE64:mode>_<DITI:mode>"
+  [(set (match_operand:SPE64 0 "nonimmediate_operand" "=r,r")
+        (subreg:SPE64 (match_operand:DITI 1 "input_operand" "r,m") 0))]
+  "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode)
+   || (TARGET_SPE && <SPE64:MODE>mode != DFmode)"
+  "@
+   evmergelo %0,%1,%L1
+   evldd%X1 %0,%y1")
+
+(define_insn "*frob_tf_ti"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=r")
+        (subreg:TF (match_operand:TI 1 "gpc_reg_operand" "r") 0))]
+  "TARGET_E500_DOUBLE"
+  "evmergelo %0,%1,%L1\;evmergelo %L0,%Y1,%Z1"
+  [(set_attr "length" "8")])
+
+(define_insn "*frob_<mode>_di_2"
+  [(set (subreg:DI (match_operand:SPE64TF 0 "nonimmediate_operand" "+&r,r") 0)
+        (match_operand:DI 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   evmergelo %0,%1,%L1
+   evldd%X1 %0,%y1")
+
+(define_insn "*frob_tf_di_8_2"
+  [(set (subreg:DI (match_operand:TF 0 "nonimmediate_operand" "+&r,r") 8)
+        (match_operand:DI 1 "input_operand" "r,m"))]
+  "TARGET_E500_DOUBLE"
+  "@
+   evmergelo %L0,%1,%L1
+   evldd%X1 %L0,%y1")
+
+(define_insn "*frob_di_<mode>"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&r")
+        (subreg:DI (match_operand:SPE64TF 1 "input_operand" "r") 0))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "evmergehi %0,%1,%1\;mr %L0,%1"
+  [(set_attr "length" "8")])
+
+(define_insn "*frob_ti_tf"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=&r")
+        (subreg:TI (match_operand:TF 1 "input_operand" "r") 0))]
+  "TARGET_E500_DOUBLE"
+  "evmergehi %0,%1,%1\;mr %L0,%1\;evmergehi %Y0,%L1,%L1\;mr %Z0,%L1"
+  [(set_attr "length" "16")])
+
+(define_insn "*frob_<DITI:mode>_<SPE64:mode>_2"
+  [(set (subreg:SPE64 (match_operand:DITI 0 "register_operand" "+&r,r") 0)
+	(match_operand:SPE64 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && <SPE64:MODE>mode == DFmode)
+   || (TARGET_SPE && <SPE64:MODE>mode != DFmode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    default: 
+      gcc_unreachable ();
+    case 0:
+      return \"evmergehi %0,%1,%1\;mr %L0,%1\";
+    case 1:
+      /* If the address is not offsettable we need to load the whole
+	 doubleword into a 64-bit register and then copy the high word
+	 to form the correct output layout.  */
+      if (!offsettable_nonstrict_memref_p (operands[1]))
+	return \"evldd%X1 %L0,%y1\;evmergehi %0,%L0,%L0\";
+      /* If the low-address word is used in the address, we must load
+	it last.  Otherwise, load it first.  Note that we cannot have
+	auto-increment in that case since the address register is
+	known to be dead.  */
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands[1], 0))
+	return \"{l|lwz} %L0,%L1\;{l|lwz} %0,%1\";
+      else
+        return \"{l%U1%X1|lwz%U1%X1} %0,%1\;{l|lwz} %L0,%L1\";
+    }
+}"
+  [(set_attr "length" "8,8")])
+
+; As the above, but TImode at offset 8.
+(define_insn "*frob_ti_<mode>_8_2"
+  [(set (subreg:SPE64 (match_operand:TI 0 "register_operand" "+&r,r") 8)
+	(match_operand:SPE64 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && <MODE>mode == DFmode)
+   || (TARGET_SPE && <MODE>mode != DFmode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    default: 
+      gcc_unreachable ();
+    case 0:
+      return \"evmergehi %Y0,%1,%1\;mr %Z0,%1\";
+    case 1:
+      if (!offsettable_nonstrict_memref_p (operands[1]))
+	return \"evldd%X1 %Z0,%y1\;evmergehi %Y0,%Z0,%Z0\";
+      if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
+			     operands[1], 0))
+	return \"{l|lwz} %Z0,%L1\;{l|lwz} %Y0,%1\";
+      else
+        return \"{l%U1%X1|lwz%U1%X1} %Y0,%1\;{l|lwz} %Z0,%L1\";
+    }
+}"
+  [(set_attr "length" "8,8")])
+
+(define_insn "*frob_ti_tf_2"
+  [(set (subreg:TF (match_operand:TI 0 "gpc_reg_operand" "=&r") 0)
+	(match_operand:TF 1 "input_operand" "r"))]
+  "TARGET_E500_DOUBLE"
+  "evmergehi %0,%1,%1\;mr %L0,%1\;evmergehi %Y0,%L1,%L1\;mr %Z0,%L1"
+  [(set_attr "length" "16")])
+
+(define_insn "*mov_si<mode>_e500_subreg0"
+  [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,&r") 0)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   evmergelo %0,%1,%0
+   evmergelohi %0,%0,%0\;{l%U1%X1|lwz%U1%X1} %0,%1\;evmergelohi %0,%0,%0"
+  [(set_attr "length" "4,12")])
+
+;; ??? Could use evstwwe for memory stores in some cases, depending on
+;; the offset.
+(define_insn "*mov_si<mode>_e500_subreg0_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:SPE64TF 1 "register_operand" "+r,&r") 0))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   evmergehi %0,%0,%1
+   evmergelohi %1,%1,%1\;{st%U0%X0|stw%U0%X0} %1,%0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*mov_si<mode>_e500_subreg4"
+  [(set (subreg:SI (match_operand:SPE64TF 0 "register_operand" "+r,r") 4)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   mr %0,%1
+   {l%U1%X1|lwz%U1%X1} %0,%1")
+
+(define_insn "*mov_si<mode>_e500_subreg4_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:SPE64TF 1 "register_operand" "r,r") 4))]
+  "(TARGET_E500_DOUBLE && (<MODE>mode == DFmode || <MODE>mode == TFmode))
+   || (TARGET_SPE && <MODE>mode != DFmode && <MODE>mode != TFmode)"
+  "@
+   mr %0,%1
+   {st%U0%X0|stw%U0%X0} %1,%0")
+
+(define_insn "*mov_sitf_e500_subreg8"
+  [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,&r") 8)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "TARGET_E500_DOUBLE"
+  "@
+   evmergelo %L0,%1,%L0
+   evmergelohi %L0,%L0,%L0\;{l%U1%X1|lwz%U1%X1} %L0,%1\;evmergelohi %L0,%L0,%L0"
+  [(set_attr "length" "4,12")])
+
+(define_insn "*mov_sitf_e500_subreg8_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:TF 1 "register_operand" "+r,&r") 8))]
+  "TARGET_E500_DOUBLE"
+  "@
+   evmergehi %0,%0,%L1
+   evmergelohi %L1,%L1,%L1\;{st%U0%X0|stw%U0%X0} %L1,%0"
+  [(set_attr "length" "4,8")])
+
+(define_insn "*mov_sitf_e500_subreg12"
+  [(set (subreg:SI (match_operand:TF 0 "register_operand" "+r,r") 12)
+	(match_operand:SI 1 "input_operand" "r,m"))]
+  "TARGET_E500_DOUBLE"
+  "@
+   mr %L0,%1
+   {l%U1%X1|lwz%U1%X1} %L0,%1")
+
+(define_insn "*mov_sitf_e500_subreg12_2"
+  [(set (match_operand:SI 0 "rs6000_nonimmediate_operand" "+r,m")
+	(subreg:SI (match_operand:TF 1 "register_operand" "r,r") 12))]
+  "TARGET_E500_DOUBLE"
+  "@
+   mr %0,%L1
+   {st%U0%X0|stw%U0%X0} %L1,%0")
+
+;; FIXME: Allow r=CONST0.
+(define_insn "*movdf_e500_double"
+  [(set (match_operand:DF 0 "rs6000_nonimmediate_operand" "=r,r,m")
+	(match_operand:DF 1 "input_operand" "r,m,r"))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+    && (gpc_reg_operand (operands[0], DFmode)
+        || gpc_reg_operand (operands[1], DFmode))"
+  "*
+ {
+   switch (which_alternative)
+     {
+     case 0:
+       return \"evor %0,%1,%1\";
+     case 1:
+       return \"evldd%X1 %0,%y1\";
+     case 2:
+       return \"evstdd%X0 %1,%y0\";
+     default:
+       gcc_unreachable ();
+     }
+ }"
+  [(set_attr "type" "*,vecload,vecstore")
+   (set_attr "length" "*,*,*")])
+
+(define_insn "spe_truncdfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efscfd %0,%1")
+
+(define_insn "spe_absdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdabs %0,%1")
+
+(define_insn "spe_nabsdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "r"))))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdnabs %0,%1")
+
+(define_insn "spe_negdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdneg %0,%1")
+
+(define_insn "spe_adddf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		 (match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdadd %0,%1,%2")
+
+(define_insn "spe_subdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		  (match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdsub %0,%1,%2")
+
+(define_insn "spe_muldf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		 (match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efdmul %0,%1,%2")
+
+(define_insn "spe_divdf3"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r")
+	(div:DF (match_operand:DF 1 "gpc_reg_operand" "r")
+		(match_operand:DF 2 "gpc_reg_operand" "r")))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE"
+  "efddiv %0,%1,%2")
+
+;; Double-precision floating point instructions for IBM long double.
+
+(define_insn_and_split "spe_trunctfdf2_internal1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=r,?r")
+	(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "0,r")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "@
+   #
+   evor %0,%1,%1"
+  "&& reload_completed && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
+(define_insn_and_split "spe_trunctfsf2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=r")
+	(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "r")))
+   (clobber (match_scratch:DF 2 "=r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(float_truncate:DF (match_dup 1)))
+   (set (match_dup 0)
+	(float_truncate:SF (match_dup 2)))]
+  "")
+
+(define_insn "spe_extenddftf2"
+  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=r,?r,r,o")
+	(float_extend:TF (match_operand:DF 1 "input_operand" "0,r,m,r")))
+   (clobber (match_scratch:DF 2 "=X,X,X,&r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "@
+   evxor %L0,%L0,%L0
+   evor %0,%1,%1\;evxor %L0,%L0,%L0
+   evldd%X1 %0,%y1\;evxor %L0,%L0,%L0
+   evstdd%X0 %1,%y0\;evxor %2,%2,%2\;evstdd %2,%Y0"
+  [(set_attr "length" "4,8,8,12")])
+
+(define_expand "spe_fix_trunctfsi2"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "")
+		   (fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))
+	      (clobber (match_dup 2))
+	      (clobber (match_dup 3))
+	      (clobber (match_dup 4))])]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+{
+  operands[2] = gen_reg_rtx (DFmode);
+  operands[3] = gen_reg_rtx (SImode);
+  operands[4] = gen_reg_rtx (SImode);
+})
+
+; Like fix_trunc_helper, add with rounding towards 0.
+(define_insn "spe_fix_trunctfsi2_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+        (fix:SI (match_operand:TF 1 "gpc_reg_operand" "r")))
+   (clobber (match_operand:DF 2 "gpc_reg_operand" "=r"))
+   (clobber (match_operand:SI 3 "gpc_reg_operand" "=&r"))
+   (clobber (match_operand:SI 4 "gpc_reg_operand" "=&r"))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "mfspefscr %3\;rlwinm %4,%3,0,0,29\;ori %4,%4,1\;efdadd %2,%1,%L1\;mtspefscr %3\;efdctsiz %0, %2"
+  [(set_attr "length" "24")])
+
+(define_insn "spe_negtf2_internal"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=r")
+	(neg:TF (match_operand:TF 1 "gpc_reg_operand" "r")))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "*
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+    return \"efdneg %L0,%L1\;efdneg %0,%1\";
+  else
+    return \"efdneg %0,%1\;efdneg %L0,%L1\";
+}"
+  [(set_attr "length" "8")])
+
+(define_expand "spe_abstf2_cmp"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=f")
+	(match_operand:TF 1 "gpc_reg_operand" "f"))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 5) (abs:DF (match_dup 5)))
+   (set (match_dup 4) (unspec:CCFP [(compare:CCFP (match_dup 3)
+                                                  (match_dup 5))] CMPDFEQ_GPR))
+   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))
+   (set (match_dup 6) (neg:DF (match_dup 6)))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "
+{
+  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (CCFPmode);
+  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
+}")
+
+(define_expand "spe_abstf2_tst"
+  [(set (match_operand:TF 0 "gpc_reg_operand" "=f")
+	(match_operand:TF 1 "gpc_reg_operand" "f"))
+   (set (match_dup 3) (match_dup 5))
+   (set (match_dup 5) (abs:DF (match_dup 5)))
+   (set (match_dup 4) (unspec:CCFP [(compare:CCFP (match_dup 3)
+                                                  (match_dup 5))] TSTDFEQ_GPR))
+   (set (pc) (if_then_else (eq (match_dup 4) (const_int 0))
+			   (label_ref (match_operand 2 "" ""))
+			   (pc)))
+   (set (match_dup 6) (neg:DF (match_dup 6)))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
+  "
+{
+  const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+  const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (CCFPmode);
+  operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
+  operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
+}")
+
+;; Vector move instructions.
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+	(match_operand:V2SI 1 "any_operand" ""))]
+  "TARGET_SPE"
+  "{ rs6000_emit_move (operands[0], operands[1], V2SImode); DONE; }")
+
+(define_insn "*movv2si_internal"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V2SI 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V2SImode)
+       || gpc_reg_operand (operands[1], V2SImode))"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0: return \"evstdd%X0 %1,%y0\";
+    case 1: return \"evldd%X1 %0,%y1\";
+    case 2: return \"evor %0,%1,%1\";
+    case 3: return output_vec_const_move (operands);
+    default: gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "vecload,vecstore,*,*")
+   (set_attr "length" "*,*,*,12")])
+
+(define_split
+  [(set (match_operand:V2SI 0 "register_operand" "")
+	(match_operand:V2SI 1 "zero_constant" ""))]
+  "TARGET_SPE && reload_completed"
+  [(set (match_dup 0)
+	(xor:V2SI (match_dup 0) (match_dup 0)))]
+  "")
+
+(define_expand "movv1di"
+  [(set (match_operand:V1DI 0 "nonimmediate_operand" "")
+	(match_operand:V1DI 1 "any_operand" ""))]
+  "TARGET_SPE"
+  "{ rs6000_emit_move (operands[0], operands[1], V1DImode); DONE; }")
+
+(define_insn "*movv1di_internal"
+  [(set (match_operand:V1DI 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V1DI 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V1DImode)
+       || gpc_reg_operand (operands[1], V1DImode))"
+  "@
+   evstdd%X0 %1,%y0
+   evldd%X1 %0,%y1
+   evor %0,%1,%1
+   evxor %0,%0,%0"
+  [(set_attr "type" "vecload,vecstore,*,*")
+   (set_attr "length" "*,*,*,*")])
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+	(match_operand:V4HI 1 "any_operand" ""))]
+  "TARGET_SPE"
+  "{ rs6000_emit_move (operands[0], operands[1], V4HImode); DONE; }")
+
+(define_insn "*movv4hi_internal"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V4HI 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V4HImode)
+       || gpc_reg_operand (operands[1], V4HImode))"
+  "@
+   evstdd%X0 %1,%y0
+   evldd%X1 %0,%y1
+   evor %0,%1,%1
+   evxor %0,%0,%0"
+  [(set_attr "type" "vecload")])
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+	(match_operand:V2SF 1 "any_operand" ""))]
+  "TARGET_SPE || TARGET_PAIRED_FLOAT"
+  "{ rs6000_emit_move (operands[0], operands[1], V2SFmode); DONE; }")
+
+(define_insn "*movv2sf_internal"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,r,r,r")
+	(match_operand:V2SF 1 "input_operand" "r,m,r,W"))]
+  "TARGET_SPE
+   && (gpc_reg_operand (operands[0], V2SFmode)
+       || gpc_reg_operand (operands[1], V2SFmode))"
+  "@
+   evstdd%X0 %1,%y0
+   evldd%X1 %0,%y1
+   evor %0,%1,%1
+   evxor %0,%0,%0"
+  [(set_attr "type" "vecload,vecstore,*,*")
+   (set_attr "length" "*,*,*,*")])
+
+;; End of vector move instructions.
+
+(define_insn "spe_evmwhssfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 702))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssmaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 703))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssmaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 704))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 705))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhusiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 706))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhusiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 707))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 708))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhssian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 709))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhssian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 710))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 711))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 713))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgssfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 714))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgssfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmfaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 715))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmfaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 716))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgumiaa"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 717))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgumiaa %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgssfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 718))
+   (clobber (reg:SI SPEFSCR_REGNO))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgssfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmfan"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 719))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmfan %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgsmian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 720))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgsmian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_evmwhgumian"
+  [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r")
+        (unspec:V2SI [(match_operand:V2SI 1 "gpc_reg_operand" "r")
+                      (match_operand:V2SI 2 "gpc_reg_operand" "r")] 721))
+   (set (reg:V2SI SPE_ACC_REGNO) (unspec:V2SI  [(const_int 0)] 0))]
+  "TARGET_SPE"
+  "evmwhgumian %0,%1,%2"
+  [(set_attr "type" "veccomplex")
+   (set_attr  "length" "4")])
+
+(define_insn "spe_mtspefscr"
+  [(set (reg:SI SPEFSCR_REGNO)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+			    722))]
+  "TARGET_SPE"
+  "mtspefscr %0"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "spe_mfspefscr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(reg:SI SPEFSCR_REGNO)] 723))]
+  "TARGET_SPE"
+  "mfspefscr %0"
+  [(set_attr "type" "vecsimple")])
+
+;; FP comparison stuff.
+
+;; Flip the GT bit.
+(define_insn "e500_flip_gt_bit"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(match_operand:CCFP 1 "cc_reg_operand" "y")] 999))]
+  "!TARGET_FPRS && TARGET_HARD_FLOAT"
+  "*
+{
+  return output_e500_flip_gt_bit (operands[0], operands[1]);
+}"
+  [(set_attr "type" "cr_logical")])
+
+;; MPC8540 single-precision FP instructions on GPRs.
+;; We have 2 variants for each.  One for IEEE compliant math and one
+;; for non IEEE compliant math.
+
+(define_insn "cmpsfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1000))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efscmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstsfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1001))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && flag_finite_math_only && !flag_trapping_math"
+  "efststeq %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpsfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1002))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efscmpgt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstsfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1003))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && flag_finite_math_only && !flag_trapping_math"
+  "efststgt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpsflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1004))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efscmplt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstsflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "r")
+			(match_operand:SF 2 "gpc_reg_operand" "r"))]
+	 1005))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS
+   && flag_finite_math_only && !flag_trapping_math"
+  "efststlt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+;; Same thing, but for double-precision.
+
+(define_insn "cmpdfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 CMPDFEQ_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpeq %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstdfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 TSTDFEQ_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtsteq %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpdfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 CMPDFGT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpgt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstdfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 TSTDFGT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstgt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+(define_insn "cmpdflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 CMPDFLT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmplt %0,%1,%2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "tstdflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "r")
+			(match_operand:DF 2 "gpc_reg_operand" "r"))]
+	 TSTDFLT_GPR))]
+  "TARGET_HARD_FLOAT && TARGET_E500_DOUBLE
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstlt %0,%1,%2"
+  [(set_attr "type" "veccmpsimple")])
+
+;; Same thing, but for IBM long double.
+
+(define_insn "cmptfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 CMPTFEQ_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmpeq %0,%L1,%L2"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "12")])
+
+(define_insn "tsttfeq_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 TSTTFEQ_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtsteq %0,%1,%2\;bng %0,$+8\;efdtsteq %0,%L1,%L2"
+  [(set_attr "type" "veccmpsimple")
+   (set_attr "length" "12")])
+
+(define_insn "cmptfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 CMPTFGT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmpgt %0,%1,%2\;bgt %0,$+16\;efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmpgt %0,%L1,%L2"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "20")])
+
+(define_insn "tsttfgt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 TSTTFGT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstgt %0,%1,%2\;bgt %0,$+16\;efdtsteq %0,%1,%2\;bng %0,$+8\;efdtstgt %0,%L1,%L2"
+  [(set_attr "type" "veccmpsimple")
+   (set_attr "length" "20")])
+
+(define_insn "cmptflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 CMPTFLT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && !(flag_finite_math_only && !flag_trapping_math)"
+  "efdcmplt %0,%1,%2\;bgt %0,$+16\;efdcmpeq %0,%1,%2\;bng %0,$+8\;efdcmplt %0,%L1,%L2"
+  [(set_attr "type" "veccmp")
+   (set_attr "length" "20")])
+
+(define_insn "tsttflt_gpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP
+	 [(compare:CCFP (match_operand:TF 1 "gpc_reg_operand" "r")
+			(match_operand:TF 2 "gpc_reg_operand" "r"))]
+	 TSTTFLT_GPR))]
+  "!TARGET_IEEEQUAD
+   && TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128
+   && flag_finite_math_only && !flag_trapping_math"
+  "efdtstlt %0,%1,%2\;bgt %0,$+16\;efdtsteq %0,%1,%2\;bng %0,$+8\;efdtstlt %0,%L1,%L2"
+  [(set_attr "type" "veccmpsimple")
+   (set_attr "length" "20")])
+
+;; Like cceq_ior_compare, but compare the GT bits.
+(define_insn "e500_cr_ior_compare"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+	(unspec:CCFP [(match_operand 1 "cc_reg_operand" "y")
+		      (match_operand 2 "cc_reg_operand" "y")]
+		     E500_CR_IOR_COMPARE))]
+  "TARGET_HARD_FLOAT && !TARGET_FPRS"
+  "cror 4*%0+gt,4*%1+gt,4*%2+gt"
+  [(set_attr "type" "cr_logical")])
+
+;; Out-of-line prologues and epilogues.
+(define_insn "*save_gpregs_spe"
+  [(match_parallel 0 "any_parallel_operand"
+		   [(clobber (reg:P 65))
+		    (use (match_operand:P 1 "symbol_ref_operand" "s"))
+		    (use (reg:P 11))
+		    (set (match_operand:V2SI 2 "memory_operand" "=m")
+			 (match_operand:V2SI 3 "gpc_reg_operand" "r"))])]
+  "TARGET_SPE_ABI"
+  "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*restore_gpregs_spe"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(clobber (reg:P 65))
+		   (use (match_operand:P 1 "symbol_ref_operand" "s"))
+		   (use (reg:P 11))
+		   (set (match_operand:V2SI 2 "gpc_reg_operand" "=r")
+			(match_operand:V2SI 3 "memory_operand" "m"))])]
+ "TARGET_SPE_ABI"
+ "bl %z1"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
+
+(define_insn "*return_and_restore_gpregs_spe"
+ [(match_parallel 0 "any_parallel_operand"
+		  [(return)
+		   (clobber (reg:P 65))
+		   (use (match_operand:P 1 "symbol_ref_operand" "s"))
+		   (use (reg:P 11))
+		   (set (match_operand:V2SI 2 "gpc_reg_operand" "=r")
+			(match_operand:V2SI 3 "memory_operand" "m"))])]
+ "TARGET_SPE_ABI"
+ "b %z1"
+ [(set_attr "type" "branch")
+  (set_attr "length" "4")])
diff --git a/gcc/config/rs6000/spu2vmx.h b/gcc/config/rs6000/spu2vmx.h
new file mode 100644
index 000000000..1ccb71c70
--- /dev/null
+++ b/gcc/config/rs6000/spu2vmx.h
@@ -0,0 +1,2415 @@
+/* Cell SPU 2 VMX intrinsics header
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU2VMX_H_
+#define _SPU2VMX_H_	1
+
+#ifdef __cplusplus
+
+#ifndef __SPU__
+
+#include <si2vmx.h>
+
+/* spu_absd (absolute difference)
+ * ========
+ */
+static __inline vec_uchar16 spu_absd(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_absdb((qword)(a), (qword)(b))));
+
+}
+
+
+/* spu_add
+ * =======
+ */
+static __inline vec_uint4 spu_add(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_a((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_add(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_a((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_add(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_ah((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_add(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_ah((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_add(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_ai((qword)(a), (int)(b))));
+}
+
+static __inline vec_int4 spu_add(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_ai((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_add(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_ahi((qword)(a), (short)(b))));
+}
+
+static __inline vec_short8 spu_add(vec_short8 a, short b)
+{
+  return ((vec_short8)(si_ahi((qword)(a), b)));
+}
+
+static __inline vec_float4 spu_add(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_fa((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_add(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_dfa((qword)(a), (qword)(b))));
+}
+
+
+/* spu_addx
+ * ========
+ */
+static __inline vec_uint4 spu_addx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_addx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_int4 spu_addx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_addx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_and
+ * =======
+ */
+static __inline vec_uchar16 spu_and(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_and(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_and(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_and(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_and(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_and(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_and(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_and(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_and(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_and(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_and((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_and(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_andbi((qword)(a), (signed char)(b))));
+}
+
+
+static __inline vec_char16 spu_and(vec_char16 a, signed char b)
+{
+  return ((vec_char16)(si_andbi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_and(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_andhi((qword)(a), (signed short)(b))));
+}
+
+static __inline vec_short8 spu_and(vec_short8 a, signed short b)
+{
+  return ((vec_short8)(si_andhi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_and(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_andi((qword)(a), (signed int)(b))));
+}
+
+static __inline vec_int4 spu_and(vec_int4 a, signed int b)
+{
+  return ((vec_int4)(si_andi((qword)(a), b)));
+}
+
+
+/* spu_andc
+ * ========
+ */
+#define spu_andc(_a, _b)	vec_andc(_a, _b)
+
+
+/* spu_avg
+ * =======
+ */
+#define spu_avg(_a, _b)		vec_avg(_a, _b)
+  
+
+/* spu_bisled
+ * spu_bisled_d
+ * spu_bisled_e
+ * ============
+ */
+#define spu_bisled(_func)	/* not mappable */
+#define spu_bisled_d(_func)	/* not mappable */
+#define spu_bisled_e(_func)	/* not mappable */
+
+/* spu_cmpabseq
+ * ============
+ */
+static __inline vec_uint4 spu_cmpabseq(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fcmeq((qword)(a), (qword)(b))));
+
+}
+
+static __inline vec_ullong2 spu_cmpabseq(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfcmeq((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cmpabsgt
+ * ============
+ */
+static __inline vec_uint4 spu_cmpabsgt(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fcmgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_cmpabsgt(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfcmgt((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cmpeq
+ * ========
+ */
+static __inline vec_uchar16 spu_cmpeq(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_ceqb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpeq(vec_char16 a, vec_char16 b)
+{
+  return ((vec_uchar16)(si_ceqb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_ceqh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_short8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_ceqh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_ceq((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_int4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_ceq((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fceq((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpeq(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_ceqbi((qword)(a), (signed char)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpeq(vec_char16 a, signed char b)
+{
+  return ((vec_uchar16)(si_ceqbi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_ceqhi((qword)(a), (signed short)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpeq(vec_short8 a, signed short b)
+{
+  return ((vec_ushort8)(si_ceqhi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_ceqi((qword)(a), (signed int)(b))));
+}
+
+static __inline vec_uint4 spu_cmpeq(vec_int4 a, signed int b)
+{
+  return ((vec_uint4)(si_ceqi((qword)(a), b)));
+}
+
+static __inline vec_ullong2 spu_cmpeq(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfceq((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cmpgt
+ * ========
+ */
+static __inline vec_uchar16 spu_cmpgt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_clgtb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpgt(vec_char16 a, vec_char16 b)
+{
+  return ((vec_uchar16)(si_cgtb((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_clgth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_short8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_cgth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_clgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_int4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_cgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_float4 a, vec_float4 b)
+{
+  return ((vec_uint4)(si_fcgt((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_cmpgt(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_clgtbi((qword)(a), b)));
+}
+
+static __inline vec_uchar16 spu_cmpgt(vec_char16 a, signed char b)
+{
+  return ((vec_uchar16)(si_cgtbi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_clgthi((qword)(a), b)));
+}
+
+static __inline vec_ushort8 spu_cmpgt(vec_short8 a, signed short b)
+{
+  return ((vec_ushort8)(si_cgthi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_clgti((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_cmpgt(vec_int4 a, signed int b)
+{
+  return ((vec_uint4)(si_cgti((qword)(a), b)));
+}
+
+static __inline vec_ullong2 spu_cmpgt(vec_double2 a, vec_double2 b)
+{
+  return ((vec_ullong2)(si_dfcgt((qword)(a), (qword)(b))));
+}
+
+
+/* spu_cntb
+ * ========
+ */
+static __inline vec_uchar16 spu_cntb(vec_uchar16 a)
+{
+  return ((vec_uchar16)(si_cntb((qword)(a))));
+}
+
+
+static __inline vec_uchar16 spu_cntb(vec_char16 a)
+{
+  return ((vec_uchar16)(si_cntb((qword)(a))));
+}
+
+/* spu_cntlz
+ * =========
+ */
+static __inline vec_uint4 spu_cntlz(vec_uint4 a)
+{
+  return ((vec_uint4)(si_clz((qword)(a))));
+}
+
+static __inline vec_uint4 spu_cntlz(vec_int4 a)
+{
+  return ((vec_uint4)(si_clz((qword)(a))));
+}
+
+static __inline vec_uint4 spu_cntlz(vec_float4 a)
+{
+  return ((vec_uint4)(si_clz((qword)(a))));
+}
+
+/* spu_testsv
+ * ==========
+ */
+static __inline vec_ullong2 spu_testsv(vec_double2 a, char b)
+{
+  return ((vec_ullong2)(si_dftsv((qword)(a), b)));
+}
+
+/* spu_convtf
+ * ==========
+ */
+#define spu_convtf(_a, _b)	(vec_ctf(_a, _b))
+
+/* spu_convts
+ * ==========
+ */
+#define spu_convts(_a, _b)	(vec_cts(_a, _b))
+
+/* spu_convtu
+ * ==========
+ */
+#define spu_convtu(_a, _b)	(vec_ctu(_a, _b))
+
+
+/* spu_dsync
+ * ========
+ */
+#define spu_dsync()
+
+/* spu_eqv
+ * =======
+ */
+static __inline vec_uchar16 spu_eqv(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_eqv(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_eqv(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_eqv(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_eqv(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_eqv(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_eqv(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_eqv(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_eqv(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_eqv((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_eqv(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_eqv((qword)(a), (qword)(b))));
+}
+
+/* spu_extend
+ * ========
+ */
+static __inline vec_short8 spu_extend(vec_char16 a)
+{
+  return ((vec_short8)(si_xsbh((qword)(a))));
+}
+
+
+static __inline vec_int4 spu_extend(vec_short8 a)
+{
+  return ((vec_int4)(si_xshw((qword)(a))));
+}
+
+static __inline vec_llong2 spu_extend(vec_int4 a)
+{
+  return ((vec_llong2)(si_xswd((qword)(a))));
+}
+
+
+static __inline vec_double2 spu_extend(vec_float4 a)
+{
+  return ((vec_double2)(si_fesd((qword)(a))));
+}
+
+
+/* spu_extract
+ * ========
+ */
+static __inline unsigned char spu_extract(vec_uchar16 a, int element)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.v = a;
+  return (in.c[element & 15]);
+}
+
+static __inline signed char spu_extract(vec_char16 a, int element)
+{
+  union {
+    vec_char16 v;
+    signed char c[16];
+  } in;
+
+  in.v = a;
+  return (in.c[element & 15]);
+}
+
+static __inline unsigned short spu_extract(vec_ushort8 a, int element)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.v = a;
+  return (in.s[element & 7]);
+}
+
+static __inline signed short spu_extract(vec_short8 a, int element)
+{
+  union {
+    vec_short8 v;
+    signed short s[8];
+  } in;
+
+  in.v = a;
+  return (in.s[element & 7]);
+}
+
+static __inline unsigned int spu_extract(vec_uint4 a, int element)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.v = a;
+  return (in.i[element & 3]);
+}
+
+static __inline signed int spu_extract(vec_int4 a, int element)
+{
+  union {
+    vec_int4 v;
+    signed int i[4];
+  } in;
+
+  in.v = a;
+  return (in.i[element & 3]);
+}
+
+static __inline float spu_extract(vec_float4 a, int element)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.v = a;
+  return (in.f[element & 3]);
+}
+
+static __inline unsigned long long  spu_extract(vec_ullong2 a, int element)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.v = a;
+  return (in.l[element & 1]);
+}
+
+static __inline signed long long  spu_extract(vec_llong2 a, int element)
+{
+  union {
+    vec_llong2 v;
+    signed long long l[2];
+  } in;
+
+  in.v = a;
+  return (in.l[element & 1]);
+}
+
+static __inline double spu_extract(vec_double2 a, int element)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.v = a;
+  return (in.d[element & 1]);
+}
+
+/* spu_gather
+ * ========
+ */
+static __inline vec_uint4 spu_gather(vec_uchar16 a)
+{
+  return ((vec_uint4)(si_gbb((qword)(a))));
+}
+
+
+static __inline vec_uint4 spu_gather(vec_char16 a)
+{
+  return ((vec_uint4)(si_gbb((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_ushort8 a)
+{
+  return ((vec_uint4)(si_gbh((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_short8 a)
+{
+  return ((vec_uint4)(si_gbh((qword)(a))));
+}
+
+
+static __inline vec_uint4 spu_gather(vec_uint4 a)
+{
+  return ((vec_uint4)(si_gb((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_int4 a)
+{
+  return ((vec_uint4)(si_gb((qword)(a))));
+}
+
+static __inline vec_uint4 spu_gather(vec_float4 a)
+{
+  return ((vec_uint4)(si_gb((qword)(a))));
+}
+
+/* spu_genb
+ * ========
+ */
+static __inline vec_uint4 spu_genb(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_bg((qword)(b), (qword)(a))));
+}
+
+static __inline vec_int4 spu_genb(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_bg((qword)(b), (qword)(a))));
+}
+
+/* spu_genbx
+ * =========
+ */
+static __inline vec_uint4 spu_genbx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_bgx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+static __inline vec_int4 spu_genbx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_bgx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+
+/* spu_genc
+ * ========
+ */
+static __inline vec_uint4 spu_genc(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_cg((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_genc(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_cg((qword)(a), (qword)(b))));
+}
+
+/* spu_gencx
+ * =========
+ */
+static __inline vec_uint4 spu_gencx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_cgx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_int4 spu_gencx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_cgx((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_hcmpeq
+ * ========
+ */
+#define spu_hcmpeq(_a, _b)	if (_a == _b) { SPU_HALT_ACTION; };
+
+
+/* spu_hcmpgt
+ * ========
+ */
+#define spu_hcmpgt(_a, _b)	if (_a > _b) { SPU_HALT_ACTION; };
+
+
+/* spu_idisable
+ * ============
+ */
+#define spu_idisable()		SPU_UNSUPPORTED_ACTION
+
+
+/* spu_ienable
+ * ===========
+ */
+#define spu_ienable()		SPU_UNSUPPORTED_ACTION
+
+
+/* spu_insert
+ * ========
+ */
+static __inline vec_uchar16 spu_insert(unsigned char a, vec_uchar16 b, int element)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.v = b;
+  in.c[element & 15] = a;
+  return (in.v);
+}
+
+static __inline vec_char16 spu_insert(signed char a, vec_char16 b, int element)
+{
+  return ((vec_char16)spu_insert((unsigned char)(a), (vec_uchar16)(b), element));
+}
+
+static __inline vec_ushort8 spu_insert(unsigned short a, vec_ushort8 b, int element)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.v = b;
+  in.s[element & 7] = a;
+  return (in.v);
+}
+
+static __inline vec_short8 spu_insert(signed short a, vec_short8 b, int element)
+{
+  return ((vec_short8)spu_insert((unsigned short)(a), (vec_ushort8)(b), element));
+}
+
+static __inline vec_uint4 spu_insert(unsigned int a, vec_uint4 b, int element)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.v = b;
+  in.i[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_int4 spu_insert(signed int a, vec_int4 b, int element)
+{
+  return ((vec_int4)spu_insert((unsigned int)(a), (vec_uint4)(b), element));
+}
+
+static __inline vec_float4 spu_insert(float a, vec_float4 b, int element)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.v = b;
+  in.f[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_ullong2 spu_insert(unsigned long long a, vec_ullong2 b, int element)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.v = b;
+  in.l[element & 1] = a;
+  return (in.v);
+}
+
+static __inline vec_llong2 spu_insert(signed long long a, vec_llong2 b, int element)
+{
+  return ((vec_llong2)spu_insert((unsigned long long)(a), (vec_ullong2)(b), element));
+}
+
+static __inline vec_double2 spu_insert(double a, vec_double2 b, int element)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.v = b;
+  in.d[element & 1] = a;
+  return (in.v);
+}
+
+
+/* spu_madd
+ * ========
+ */
+static __inline vec_int4 spu_madd(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return ((vec_int4)(si_mpya((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_float4 spu_madd(vec_float4 a, vec_float4 b, vec_float4 c)
+{
+  return ((vec_float4)(si_fma((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_double2 spu_madd(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfma((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_maskb
+ * ========
+ */
+#define spu_maskb(_a)	(vec_uchar16)(si_fsmb(si_from_int((int)(_a))))
+
+/* spu_maskh
+ * ========
+ */
+#define spu_maskh(_a)	(vec_ushort8)(si_fsmh(si_from_int((int)(_a))))
+
+
+/* spu_maskw
+ * ========
+ */
+#define spu_maskw(_a)	(vec_uint4)(si_fsm(si_from_int((int)(_a))))
+
+
+/* spu_mfcdma32
+ * ========
+ */
+#define spu_mfcdma32(_ls, _ea, _size, _tagid, _cmd)
+
+
+/* spu_mfcdma64
+ * ========
+ */
+#define spu_mfcdma64(_ls, _eahi, _ealow,  _size, _tagid, _cmd)
+
+/* spu_mfcstat
+ * ========
+ */
+#define spu_mfcstat(_type)	0xFFFFFFFF
+
+
+
+/* spu_mffpscr
+ * ===========
+ */
+#define spu_mffpscr()		(vec_uint4)(si_fscrrd())
+
+
+/* spu_mfspr
+ * ========
+ */
+
+#define spu_mfspr(_reg)		si_to_uint(si_mfspr(_reg))
+
+
+
+/* spu_mhhadd
+ * ==========
+ */
+static __inline vec_int4 spu_mhhadd(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return ((vec_int4)(si_mpyhha((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+static __inline vec_uint4 spu_mhhadd(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_mpyhhau((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_msub
+ * ========
+ */
+static __inline vec_float4 spu_msub(vec_float4 a, vec_float4 b, vec_float4 c)
+{
+  return ((vec_float4)(si_fms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_double2 spu_msub(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_mtfpscr
+ * ===========
+ */
+#define spu_mtfpscr(_a)
+
+
+/* spu_mtspr
+ * ========
+ */
+#define spu_mtspr(_reg, _a)
+
+
+/* spu_mul
+ * ========
+ */
+static __inline vec_float4 spu_mul(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_fm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_mul(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_dfm((qword)(a), (qword)(b))));
+}
+
+
+/* spu_mulh
+ * ========
+ */
+static __inline vec_int4 spu_mulh(vec_short8 a, vec_short8 b)
+{
+  return ((vec_int4)(si_mpyh((qword)(a), (qword)(b))));
+}
+
+/* spu_mule
+ * =========
+ */
+#define spu_mule(_a, _b)	vec_mule(_a, _b)
+
+
+
+/* spu_mulo
+ * ========
+ */
+static __inline vec_int4 spu_mulo(vec_short8 a, vec_short8 b)
+{
+  return ((vec_int4)(si_mpy((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_uint4 spu_mulo(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_uint4)(si_mpyu((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_int4 spu_mulo(vec_short8 a, short b)
+{
+  return ((vec_int4)(si_mpyi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_mulo(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_uint4)(si_mpyui((qword)(a), b)));
+}
+
+
+/* spu_mulsr
+ * =========
+ */
+static __inline vec_int4 spu_mulsr(vec_short8 a, vec_short8 b)
+{
+  return ((vec_int4)(si_mpys((qword)(a), (qword)(b))));
+}
+
+
+/* spu_nand
+ * ========
+ */
+static __inline vec_uchar16 spu_nand(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_nand(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_nand(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_nand(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_nand(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_nand(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_nand(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_nand((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_nand(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_nand((qword)(a), (qword)(b)))); 
+}
+
+static __inline vec_llong2 spu_nand(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_nand((qword)(a), (qword)(b)))); 
+}
+
+static __inline vec_double2 spu_nand(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_nand((qword)(a), (qword)(b))));
+}
+
+
+/* spu_nmadd
+ * =========
+ */
+static __inline vec_double2 spu_nmadd(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfnma((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_nmsub
+ * =========
+ */
+static __inline vec_float4 spu_nmsub(vec_float4 a, vec_float4 b, vec_float4 c)
+{
+  return ((vec_float4)(si_fnms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+static __inline vec_double2 spu_nmsub(vec_double2 a, vec_double2 b, vec_double2 c)
+{
+  return ((vec_double2)(si_dfnms((qword)(a), (qword)(b), (qword)(c))));
+}
+
+
+/* spu_nor
+ * =======
+ */
+#define spu_nor(_a, _b)		vec_nor(_a, _b)
+
+
+/* spu_or
+ * ======
+ */
+static __inline vec_uchar16 spu_or(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_or(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_or(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_or(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_or(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_or(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_or(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_or(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_or(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_or((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_or(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_or((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_uchar16 spu_or(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_orbi((qword)(a), b)));
+}
+
+static __inline vec_char16 spu_or(vec_char16 a, signed char b)
+{
+  return ((vec_char16)(si_orbi((qword)(a), (unsigned char)(b))));
+}
+
+static __inline vec_ushort8 spu_or(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_orhi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_or(vec_short8 a, signed short b)
+{
+  return ((vec_short8)(si_orhi((qword)(a), (unsigned short)(b))));
+}
+
+static __inline vec_uint4 spu_or(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_ori((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_or(vec_int4 a, signed int b)
+{
+  return ((vec_int4)(si_ori((qword)(a), (unsigned int)(b))));
+}
+
+
+/* spu_orc
+ * =======
+ */
+#define spu_orc(_a, _b)		vec_or(_a, vec_nor(_b, _b))
+
+
+/* spu_orx
+ * =======
+ */
+static __inline vec_uint4 spu_orx(vec_uint4 a)
+{
+  return ((vec_uint4)(si_orx((qword)(a))));
+}
+
+static __inline vec_int4 spu_orx(vec_int4 a)
+{
+  return ((vec_int4)(si_orx((qword)(a))));
+}
+
+
+/* spu_promote
+ * ===========
+ */
+static __inline vec_uchar16 spu_promote(unsigned char a, int element)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.c[element & 15] = a;
+  return (in.v);
+}
+
+static __inline vec_char16 spu_promote(signed char a, int element)
+{
+  union {
+    vec_char16 v;
+    signed char c[16];
+  } in;
+
+  in.c[element & 15] = a;
+  return (in.v);
+}
+
+static __inline vec_ushort8 spu_promote(unsigned short a, int element)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.s[element & 7] = a;
+  return (in.v);
+}
+
+static __inline vec_short8 spu_promote(signed short a, int element)
+{
+  union {
+    vec_short8 v;
+    signed short s[8];
+  } in;
+
+  in.s[element & 7] = a;
+  return (in.v);
+}
+
+static __inline vec_uint4 spu_promote(unsigned int a, int element)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.i[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_int4 spu_promote(signed int a, int element)
+{
+  union {
+    vec_int4 v;
+    signed int i[4];
+  } in;
+
+  in.i[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_float4 spu_promote(float a, int element)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.f[element & 3] = a;
+  return (in.v);
+}
+
+static __inline vec_ullong2 spu_promote(unsigned long long a, int element)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.l[element & 1] = a;
+  return (in.v);
+}
+
+static __inline vec_llong2 spu_promote(signed long long a, int element)
+{
+  union {
+    vec_llong2 v;
+    signed long long l[2];
+  } in;
+
+  in.l[element & 1] = a;
+  return (in.v);
+}
+
+static __inline vec_double2 spu_promote(double a, int element)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.d[element & 1] = a;
+  return (in.v);
+}
+
+/* spu_re
+ * ======
+ */
+#define spu_re(_a)		vec_re(_a)
+
+
+/* spu_readch
+ * ==========
+ */
+#define spu_readch(_channel)		0	/* not mappable */
+
+
+/* spu_readchcnt
+ * =============
+ */
+#define spu_readchcnt(_channel)		0	/* not mappable */
+
+
+/* spu_readchqw
+ * ============
+ */
+#define spu_readchqw(_channel) __extension__ ({ vec_uint4 result = { 0, 0, 0, 0 }; result; })
+
+/* spu_rl
+ * ======
+ */
+static __inline vec_ushort8 spu_rl(vec_ushort8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_roth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_rl(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_roth((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_rl(vec_uint4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_rot((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_rl(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_rot((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rl(vec_ushort8 a, int b)
+{
+  return ((vec_ushort8)(si_rothi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_rl(vec_short8 a, int b)
+{
+  return ((vec_short8)(si_rothi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_rl(vec_uint4 a, int b)
+{
+  return ((vec_uint4)(si_roti((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_rl(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_roti((qword)(a), b)));
+}
+
+
+/* spu_rlmask
+ * ==========
+ */
+static __inline vec_ushort8 spu_rlmask(vec_ushort8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_rothm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_rlmask(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_rothm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_rlmask(vec_uint4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_rotm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_rlmask(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_rotm((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rlmask(vec_ushort8 a, int b)
+{
+  return ((vec_ushort8)(si_rothmi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_rlmask(vec_short8 a, int b)
+{
+  return ((vec_short8)(si_rothmi((qword)(a), b)));
+}
+
+
+static __inline vec_uint4 spu_rlmask(vec_uint4 a, int b)
+{
+  return ((vec_uint4)(si_rotmi((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_rlmask(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_rotmi((qword)(a), b)));
+}
+
+/* spu_rlmaska
+ * ===========
+ */
+static __inline vec_short8 spu_rlmaska(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_rotmah((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rlmaska(vec_ushort8 a, vec_short8 b)
+{
+  return ((vec_ushort8)(si_rotmah((qword)(a), (qword)(b))));
+}
+
+
+static __inline vec_int4 spu_rlmaska(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_rotma((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_rlmaska(vec_uint4 a, vec_int4 b)
+{
+  return ((vec_uint4)(si_rotma((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_rlmaska(vec_ushort8 a, int b)
+{
+  return ((vec_ushort8)(si_rotmahi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_rlmaska(vec_short8 a, int b)
+{
+  return ((vec_short8)(si_rotmahi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_rlmaska(vec_uint4 a, int b)
+{
+  return ((vec_uint4)(si_rotmai((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_rlmaska(vec_int4 a, int b)
+{
+  return ((vec_int4)(si_rotmai((qword)(a), b)));
+}
+
+
+/* spu_rlmaskqw
+ * ============
+ */
+static __inline vec_uchar16 spu_rlmaskqw(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlmaskqw(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlmaskqw(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlmaskqw(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlmaskqw(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlmaskqw(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlmaskqw(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlmaskqw(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlmaskqw(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlmaskqw(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqmbi((qword)(a), si_from_int(count))));
+}
+
+/* spu_rlmaskqwbyte
+ * ================
+ */
+static __inline vec_uchar16 spu_rlmaskqwbyte(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlmaskqwbyte(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlmaskqwbyte(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlmaskqwbyte(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlmaskqwbyte(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlmaskqwbyte(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlmaskqwbyte(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlmaskqwbyte(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlmaskqwbyte(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlmaskqwbyte(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqmby((qword)(a), si_from_int(count))));
+}
+
+/* spu_rlmaskqwbytebc
+ * ==================
+ */
+static __inline vec_uchar16 spu_rlmaskqwbytebc(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlmaskqwbytebc(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlmaskqwbytebc(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlmaskqwbytebc(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlmaskqwbytebc(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlmaskqwbytebc(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlmaskqwbytebc(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlmaskqwbytebc(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlmaskqwbytebc(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlmaskqwbytebc(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqmbybi((qword)(a), si_from_int(count))));
+}
+
+
+/* spu_rlqwbyte
+ * ============
+ */
+static __inline vec_uchar16 spu_rlqwbyte(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqby((qword)(a), si_from_int(count))));
+}  
+
+static __inline vec_char16 spu_rlqwbyte(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlqwbyte(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqby((qword)(a), si_from_int(count))));
+}  
+
+static __inline vec_short8 spu_rlqwbyte(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlqwbyte(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlqwbyte(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlqwbyte(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlqwbyte(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlqwbyte(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlqwbyte(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqby((qword)(a), si_from_int(count))));
+}
+
+
+/* spu_rlqwbytebc
+ * ==============
+ */
+static __inline vec_uchar16 spu_rlqwbytebc(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlqwbytebc(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlqwbytebc(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlqwbytebc(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlqwbytebc(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlqwbytebc(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlqwbytebc(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlqwbytebc(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlqwbytebc(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlqwbytebc(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqbybi((qword)(a), si_from_int(count))));
+}
+
+/* spu_rlqw
+ * ========
+ */
+static __inline vec_uchar16 spu_rlqw(vec_uchar16 a, int count)
+{
+  return ((vec_uchar16)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_char16 spu_rlqw(vec_char16 a, int count)
+{
+  return ((vec_char16)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ushort8 spu_rlqw(vec_ushort8 a, int count)
+{
+  return ((vec_ushort8)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_short8 spu_rlqw(vec_short8 a, int count)
+{
+  return ((vec_short8)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_uint4 spu_rlqw(vec_uint4 a, int count)
+{
+  return ((vec_uint4)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_int4 spu_rlqw(vec_int4 a, int count)
+{
+  return ((vec_int4)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_float4 spu_rlqw(vec_float4 a, int count)
+{
+  return ((vec_float4)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_ullong2 spu_rlqw(vec_ullong2 a, int count)
+{
+  return ((vec_ullong2)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_llong2 spu_rlqw(vec_llong2 a, int count)
+{
+  return ((vec_llong2)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+static __inline vec_double2 spu_rlqw(vec_double2 a, int count)
+{
+  return ((vec_double2)(si_rotqbi((qword)(a), si_from_int(count))));
+}
+
+/* spu_roundtf
+ * ===========
+ */
+static __inline vec_float4 spu_roundtf(vec_double2 a)
+{
+  return ((vec_float4)(si_frds((qword)(a))));
+}
+
+
+/* spu_rsqrte
+ * ==========
+ */
+#define spu_rsqrte(_a)		vec_rsqrte(_a)
+
+
+/* spu_sel
+ * =======
+ */
+static __inline vec_uchar16 spu_sel(vec_uchar16 a, vec_uchar16 b, vec_uchar16 pattern)
+{
+  return ((vec_uchar16)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_char16 spu_sel(vec_char16 a, vec_char16 b, vec_uchar16 pattern)
+{
+  return ((vec_char16)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ushort8 spu_sel(vec_ushort8 a, vec_ushort8 b, vec_ushort8 pattern)
+{
+  return ((vec_ushort8)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_short8 spu_sel(vec_short8 a, vec_short8 b, vec_ushort8 pattern)
+{
+  return ((vec_short8)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_uint4 spu_sel(vec_uint4 a, vec_uint4 b, vec_uint4 pattern)
+{
+  return ((vec_uint4)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_int4 spu_sel(vec_int4 a, vec_int4 b, vec_uint4 pattern)
+{
+  return ((vec_int4)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_float4 spu_sel(vec_float4 a, vec_float4 b, vec_uint4 pattern)
+{
+  return ((vec_float4)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ullong2 spu_sel(vec_ullong2 a, vec_ullong2 b, vec_ullong2 pattern)
+{
+  return ((vec_ullong2)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_llong2 spu_sel(vec_llong2 a, vec_llong2 b, vec_ullong2 pattern)
+{
+  return ((vec_llong2)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_double2 spu_sel(vec_double2 a, vec_double2 b, vec_ullong2 pattern)
+{
+  return ((vec_double2)(si_selb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+
+
+/* spu_shuffle
+ * ===========
+ */
+static __inline vec_uchar16 spu_shuffle(vec_uchar16 a, vec_uchar16 b, vec_uchar16 pattern)
+{
+  return ((vec_uchar16)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_char16 spu_shuffle(vec_char16 a, vec_char16 b, vec_uchar16 pattern)
+{
+  return ((vec_char16)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ushort8 spu_shuffle(vec_ushort8 a, vec_ushort8 b, vec_uchar16 pattern)
+{
+  return ((vec_ushort8)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_short8 spu_shuffle(vec_short8 a, vec_short8 b, vec_uchar16 pattern)
+{
+  return ((vec_short8)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_uint4 spu_shuffle(vec_uint4 a, vec_uint4 b, vec_uchar16 pattern)
+{
+  return ((vec_uint4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_int4 spu_shuffle(vec_int4 a, vec_int4 b, vec_uchar16 pattern)
+{
+  return ((vec_int4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_float4 spu_shuffle(vec_float4 a, vec_float4 b, vec_uchar16 pattern)
+{
+  return ((vec_float4)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_ullong2 spu_shuffle(vec_ullong2 a, vec_ullong2 b, vec_uchar16 pattern)
+{
+  return ((vec_ullong2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_llong2 spu_shuffle(vec_llong2 a, vec_llong2 b, vec_uchar16 pattern)
+{
+  return ((vec_llong2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+static __inline vec_double2 spu_shuffle(vec_double2 a, vec_double2 b, vec_uchar16 pattern)
+{
+  return ((vec_double2)(si_shufb((qword)(a), (qword)(b), (qword)(pattern))));
+}
+
+
+/* spu_sl
+ * ======
+ */
+static __inline vec_ushort8 spu_sl(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_shlh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_sl(vec_short8 a, vec_ushort8 b)
+{
+  return ((vec_short8)(si_shlh((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_sl(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_shl((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_sl(vec_int4 a, vec_uint4 b)
+{
+  return ((vec_int4)(si_shl((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_sl(vec_ushort8 a, unsigned int b)
+{
+  return ((vec_ushort8)(si_shlhi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_sl(vec_short8 a, unsigned int b)
+{
+  return ((vec_short8)(si_shlhi((qword)(a), b)));
+}
+
+static __inline vec_uint4 spu_sl(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_shli((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_sl(vec_int4 a, unsigned int b)
+{
+  return ((vec_int4)(si_shli((qword)(a), b)));
+}
+
+
+/* spu_slqw
+ * ========
+ */
+static __inline vec_uchar16 spu_slqw(vec_uchar16 a, unsigned int count)
+{
+  return ((vec_uchar16)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_char16 spu_slqw(vec_char16 a, unsigned int count)
+{
+  return ((vec_char16)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ushort8 spu_slqw(vec_ushort8 a, unsigned int count)
+{
+  return ((vec_ushort8)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_short8 spu_slqw(vec_short8 a, unsigned int count)
+{
+  return ((vec_short8)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_uint4 spu_slqw(vec_uint4 a, unsigned int count)
+{
+  return ((vec_uint4)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_int4 spu_slqw(vec_int4 a, unsigned int count)
+{
+  return ((vec_int4)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_float4 spu_slqw(vec_float4 a, unsigned int count)
+{
+  return ((vec_float4)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ullong2 spu_slqw(vec_ullong2 a, unsigned int count)
+{
+  return ((vec_ullong2)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_llong2 spu_slqw(vec_llong2 a, unsigned int count)
+{
+  return ((vec_llong2)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_double2 spu_slqw(vec_double2 a, unsigned int count)
+{
+  return ((vec_double2)(si_shlqbi((qword)(a), si_from_uint(count))));
+}
+
+/* spu_slqwbyte
+ * ============
+ */
+static __inline vec_uchar16 spu_slqwbyte(vec_uchar16 a, unsigned int count)
+{
+  return ((vec_uchar16)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_char16 spu_slqwbyte(vec_char16 a, unsigned int count)
+{
+  return ((vec_char16)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ushort8 spu_slqwbyte(vec_ushort8 a, unsigned int count)
+{
+  return ((vec_ushort8)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_short8 spu_slqwbyte(vec_short8 a, unsigned int count)
+{
+  return ((vec_short8)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_uint4 spu_slqwbyte(vec_uint4 a, unsigned int count)
+{
+  return ((vec_uint4)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_int4 spu_slqwbyte(vec_int4 a, unsigned int count)
+{
+  return ((vec_int4)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_float4 spu_slqwbyte(vec_float4 a, unsigned int count)
+{
+  return ((vec_float4)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ullong2 spu_slqwbyte(vec_ullong2 a, unsigned int count)
+{
+  return ((vec_ullong2)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_llong2 spu_slqwbyte(vec_llong2 a, unsigned int count)
+{
+  return ((vec_llong2)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_double2 spu_slqwbyte(vec_double2 a, unsigned int count)
+{
+  return ((vec_double2)(si_shlqby((qword)(a), si_from_uint(count))));
+}
+
+/* spu_slqwbytebc
+ * ==============
+ */
+static __inline vec_uchar16 spu_slqwbytebc(vec_uchar16 a, unsigned int count)
+{
+  return ((vec_uchar16)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_char16 spu_slqwbytebc(vec_char16 a, unsigned int count)
+{
+  return ((vec_char16)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ushort8 spu_slqwbytebc(vec_ushort8 a, unsigned int count)
+{
+  return ((vec_ushort8)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_short8 spu_slqwbytebc(vec_short8 a, unsigned int count)
+{
+  return ((vec_short8)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_uint4 spu_slqwbytebc(vec_uint4 a, unsigned int count)
+{
+  return ((vec_uint4)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_int4 spu_slqwbytebc(vec_int4 a, unsigned int count)
+{
+  return ((vec_int4)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_float4 spu_slqwbytebc(vec_float4 a, unsigned int count)
+{
+  return ((vec_float4)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_ullong2 spu_slqwbytebc(vec_ullong2 a, unsigned int count)
+{
+  return ((vec_ullong2)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_llong2 spu_slqwbytebc(vec_llong2 a, unsigned int count)
+{
+  return ((vec_llong2)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+static __inline vec_double2 spu_slqwbytebc(vec_double2 a, unsigned int count)
+{
+  return ((vec_double2)(si_shlqbybi((qword)(a), si_from_uint(count))));
+}
+
+/* spu_splats
+ * ==========
+ */
+static __inline vec_uchar16 spu_splats(unsigned char a)
+{
+  union {
+    vec_uchar16 v;
+    unsigned char c[16];
+  } in;
+
+  in.c[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_char16 spu_splats(signed char a)
+{
+  return ((vec_char16)spu_splats((unsigned char)(a)));
+}
+
+static __inline vec_ushort8 spu_splats(unsigned short a)
+{
+  union {
+    vec_ushort8 v;
+    unsigned short s[8];
+  } in;
+
+  in.s[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_short8 spu_splats(signed short a)
+{
+  return ((vec_short8)spu_splats((unsigned short)(a)));
+}
+
+static __inline vec_uint4 spu_splats(unsigned int a)
+{
+  union {
+    vec_uint4 v;
+    unsigned int i[4];
+  } in;
+
+  in.i[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_int4 spu_splats(signed int a)
+{
+  return ((vec_int4)spu_splats((unsigned int)(a)));
+}
+
+static __inline vec_float4 spu_splats(float a)
+{
+  union {
+    vec_float4 v;
+    float f[4];
+  } in;
+
+  in.f[0] = a;
+  return (vec_splat(in.v, 0));
+}
+
+static __inline vec_ullong2 spu_splats(unsigned long long a)
+{
+  union {
+    vec_ullong2 v;
+    unsigned long long l[2];
+  } in;
+
+  in.l[0] = a;
+  in.l[1] = a;
+  return (in.v);
+}
+
+static __inline vec_llong2 spu_splats(signed long long a)
+{
+  return ((vec_llong2)spu_splats((unsigned long long)(a)));
+}
+
+static __inline vec_double2 spu_splats(double a)
+{
+  union {
+    vec_double2 v;
+    double d[2];
+  } in;
+
+  in.d[0] = a;
+  in.d[1] = a;
+  return (in.v);
+}
+
+
+/* spu_stop
+ * ========
+ */
+#define spu_stop(_type)	si_stop(_type)
+
+
+/* spu_sub
+ * =======
+ */
+static __inline vec_ushort8 spu_sub(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_sfh((qword)(b), (qword)(a))));
+}
+
+static __inline vec_short8 spu_sub(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_sfh((qword)(b), (qword)(a))));
+}
+
+static __inline vec_uint4 spu_sub(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_sf((qword)(b), (qword)(a))));
+}
+
+static __inline vec_int4 spu_sub(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_sf((qword)(b), (qword)(a))));
+}
+
+static __inline vec_float4 spu_sub(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_fs((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_sub(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_dfs((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_sub(unsigned int a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_sfi((qword)b, (int)a)));
+}
+
+static __inline vec_int4 spu_sub(signed int a, vec_int4 b)
+{
+  return ((vec_int4)(si_sfi((qword)b, (int)a)));
+}
+
+static __inline vec_ushort8 spu_sub(unsigned short a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_sfhi((qword)b, (short)a)));
+}
+
+static __inline vec_short8 spu_sub(signed short a, vec_short8 b)
+{
+  return ((vec_short8)(si_sfhi((qword)b, (short)a)));
+}
+
+/* spu_subx
+ * ========
+ */
+static __inline vec_uint4 spu_subx(vec_uint4 a, vec_uint4 b, vec_uint4 c)
+{
+  return ((vec_uint4)(si_sfx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+static __inline vec_int4 spu_subx(vec_int4 a, vec_int4 b, vec_int4 c)
+{
+  return ((vec_int4)(si_sfx((qword)(b), (qword)(a), (qword)(c))));
+}
+
+/* spu_sumb
+ * ========
+ */
+static __inline vec_ushort8 spu_sumb(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_ushort8)(si_sumb((qword)(a), (qword)(b))));
+}  
+
+
+/* spu_sync
+ * spu_sync_c
+ * ========
+ */
+#define spu_sync()	/* do nothing */
+
+#define spu_sync_c()	/* do nothing */
+
+
+/* spu_writech
+ * ===========
+ */
+#define spu_writech(_channel, _a)	/* not mappable */
+
+/* spu_writechqw
+ * =============
+ */
+#define spu_writechqw(_channel, _a)	/* not mappable */
+
+
+/* spu_xor
+ * =======
+ */
+static __inline vec_uchar16 spu_xor(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_char16 spu_xor(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ushort8 spu_xor(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_short8 spu_xor(vec_short8 a, vec_short8 b)
+{
+  return ((vec_short8)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uint4 spu_xor(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_int4 spu_xor(vec_int4 a, vec_int4 b)
+{
+  return ((vec_int4)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_float4 spu_xor(vec_float4 a, vec_float4 b)
+{
+  return ((vec_float4)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_ullong2 spu_xor(vec_ullong2 a, vec_ullong2 b)
+{
+  return ((vec_ullong2)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_llong2 spu_xor(vec_llong2 a, vec_llong2 b)
+{
+  return ((vec_llong2)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_double2 spu_xor(vec_double2 a, vec_double2 b)
+{
+  return ((vec_double2)(si_xor((qword)(a), (qword)(b))));
+}
+
+static __inline vec_uchar16 spu_xor(vec_uchar16 a, unsigned char b)
+{
+  return ((vec_uchar16)(si_xorbi((qword)(a), b)));
+}
+
+static __inline vec_char16 spu_xor(vec_char16 a, signed char b)
+{
+  return ((vec_char16)(si_xorbi((qword)(a), (unsigned char)(b))));
+}
+
+static __inline vec_ushort8 spu_xor(vec_ushort8 a, unsigned short b)
+{
+  return ((vec_ushort8)(si_xorhi((qword)(a), b)));
+}
+
+static __inline vec_short8 spu_xor(vec_short8 a, signed short b)
+{
+  return ((vec_short8)(si_xorhi((qword)(a), (unsigned short)(b))));
+}
+
+static __inline vec_uint4 spu_xor(vec_uint4 a, unsigned int b)
+{
+  return ((vec_uint4)(si_xori((qword)(a), b)));
+}
+
+static __inline vec_int4 spu_xor(vec_int4 a, signed int b)
+{
+  return ((vec_int4)(si_xori((qword)(a), (unsigned int)(b))));
+}
+
+#endif /* !__SPU__ */
+#endif /* __cplusplus */
+#endif /* !_SPU2VMX_H_ */
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
new file mode 100644
index 000000000..f6cc91d3d
--- /dev/null
+++ b/gcc/config/rs6000/sync.md
@@ -0,0 +1,622 @@
+;; Machine description for PowerPC synchronization instructions.
+;; Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+;; Contributed by Geoffrey Keating.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
+(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")])
+(define_code_attr fetchop_pred
+  [(plus "add_operand") (minus "gpc_reg_operand")
+   (ior "logical_operand") (xor "logical_operand") (and "and_operand")])
+(define_code_attr fetchopsi_constr
+  [(plus "rIL") (minus "r") (ior "rKL") (xor "rKL") (and "rTKL")])
+(define_code_attr fetchopdi_constr
+  [(plus "rIL") (minus "r") (ior "rKJF") (xor "rKJF") (and "rSTKJ")])
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sync_internal"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
+  ""
+  "{dcs|sync}"
+  [(set_attr "type" "sync")])
+
+(define_insn "load_locked_<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(unspec_volatile:GPR
+	  [(match_operand:GPR 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_POWERPC"
+  "<larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+(define_insn "store_conditional_<mode>"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:GPR 1 "memory_operand" "=Z")
+	(match_operand:GPR 2 "gpc_reg_operand" "r"))]
+  "TARGET_POWERPC"
+  "<stcx> %2,%y1"
+  [(set_attr "type" "store_c")])
+
+(define_insn_and_split "sync_compare_and_swap<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+	(match_operand:GPR 1 "memory_operand" "+Z"))
+   (set (match_dup 1)
+	(unspec:GPR
+	  [(match_operand:GPR 2 "reg_or_short_operand" "rI")
+	   (match_operand:GPR 3 "gpc_reg_operand" "r")]
+	  UNSPEC_CMPXCHG))
+   (clobber (match_scratch:GPR 4 "=&r"))
+   (clobber (match_scratch:CC 5 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_compare_and_swap (operands[0], operands[1], operands[2],
+				 operands[3], operands[4]);
+  DONE;
+})
+
+(define_expand "sync_compare_and_swaphi"
+  [(match_operand:HI 0 "gpc_reg_operand" "")
+   (match_operand:HI 1 "memory_operand" "")
+   (match_operand:HI 2 "gpc_reg_operand" "")
+   (match_operand:HI 3 "gpc_reg_operand" "")]
+  "TARGET_POWERPC"
+{
+  rs6000_expand_compare_and_swapqhi (operands[0], operands[1],
+				     operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "sync_compare_and_swapqi"
+  [(match_operand:QI 0 "gpc_reg_operand" "")
+   (match_operand:QI 1 "memory_operand" "")
+   (match_operand:QI 2 "gpc_reg_operand" "")
+   (match_operand:QI 3 "gpc_reg_operand" "")]
+  "TARGET_POWERPC"
+{
+  rs6000_expand_compare_and_swapqhi (operands[0], operands[1],
+				     operands[2], operands[3]);
+  DONE;
+})
+
+(define_insn_and_split "sync_compare_and_swapqhi_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(match_operand:SI 4 "memory_operand" "+Z"))
+   (set (match_dup 4)
+        (unspec:SI
+          [(match_operand:SI 1 "gpc_reg_operand" "r")
+           (match_operand:SI 2 "gpc_reg_operand" "r")
+           (match_operand:SI 3 "gpc_reg_operand" "r")]
+          UNSPEC_CMPXCHG))
+   (clobber (match_scratch:SI 5 "=&r"))
+   (clobber (match_scratch:CC 6 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_compare_and_swapqhi (operands[0], operands[1],
+				    operands[2], operands[3], operands[4],
+				    operands[5]);
+  DONE;
+})
+
+(define_insn_and_split "sync_lock_test_and_set<mode>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+	(match_operand:GPR 1 "memory_operand" "+Z"))
+   (set (match_dup 1)
+	(unspec:GPR
+	  [(match_operand:GPR 2 "reg_or_short_operand" "rL")]
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:GPR 3 "=&r"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_lock_test_and_set (operands[0], operands[1], operands[2],
+				  operands[3]);
+  DONE;
+})
+
+(define_expand "sync_<fetchop_name><mode>"
+  [(parallel [(set (match_operand:INT1 0 "memory_operand" "")
+		   (unspec:INT1
+		     [(FETCHOP:INT1 (match_dup 0)
+			(match_operand:INT1 1 "<fetchop_pred>" ""))]
+		     UNSPEC_ATOMIC))
+	      (clobber (scratch:INT1))
+	      (clobber (scratch:CC))])]
+  "TARGET_POWERPC"
+  "
+{
+  if (<MODE>mode != SImode && <MODE>mode != DImode)
+    {
+      if (PPC405_ERRATUM77)
+	FAIL;
+      rs6000_emit_sync (<CODE>, <MODE>mode, operands[0], operands[1],
+			NULL_RTX, NULL_RTX, true);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*sync_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "memory_operand" "+Z")
+	(unspec:SI
+	  [(FETCHOP:SI (match_dup 0)
+	     (match_operand:SI 1 "<fetchop_pred>" "<fetchopsi_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 2 "=&b"))
+   (clobber (match_scratch:CC 3 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (<CODE>, operands[0], operands[1],
+			  NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_insn_and_split "*sync_<fetchop_name>di_internal"
+  [(set (match_operand:DI 0 "memory_operand" "+Z")
+	(unspec:DI
+	  [(FETCHOP:DI (match_dup 0)
+	     (match_operand:DI 1 "<fetchop_pred>" "<fetchopdi_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:DI 2 "=&b"))
+   (clobber (match_scratch:CC 3 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (<CODE>, operands[0], operands[1],
+			  NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_expand "sync_nand<mode>"
+  [(parallel [(set (match_operand:INT1 0 "memory_operand" "")
+	      (unspec:INT1
+		[(ior:INT1 (not:INT1 (match_dup 0))
+			   (not:INT1 (match_operand:INT1 1 "gpc_reg_operand" "")))]
+		UNSPEC_ATOMIC))
+	      (clobber (scratch:INT1))
+	      (clobber (scratch:CC))])]
+  "TARGET_POWERPC"
+  "
+{
+  if (<MODE>mode != SImode && <MODE>mode != DImode)
+    {
+      FAIL;
+      if (PPC405_ERRATUM77)
+	FAIL;
+      rs6000_emit_sync (NOT, <MODE>mode, operands[0], operands[1],
+			NULL_RTX, NULL_RTX, true);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*sync_nand<mode>_internal"
+  [(set (match_operand:GPR 0 "memory_operand" "+Z")
+	(unspec:GPR
+	  [(ior:GPR (not:GPR (match_dup 0))
+		    (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:GPR 2 "=&r"))
+   (clobber (match_scratch:CC 3 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (NOT, operands[0], operands[1],
+			  NULL_RTX, NULL_RTX, operands[2]);
+  DONE;
+})
+
+(define_expand "sync_old_<fetchop_name><mode>"
+  [(parallel [(set (match_operand:INT1 0 "gpc_reg_operand" "")
+		   (match_operand:INT1 1 "memory_operand" ""))
+	      (set (match_dup 1)
+		   (unspec:INT1
+		     [(FETCHOP:INT1 (match_dup 1)
+			(match_operand:INT1 2 "<fetchop_pred>" ""))]
+		     UNSPEC_ATOMIC))
+	      (clobber (scratch:INT1))
+	      (clobber (scratch:CC))])]
+  "TARGET_POWERPC"
+  "
+{ 
+  if (<MODE>mode != SImode && <MODE>mode != DImode)
+    {
+      if (PPC405_ERRATUM77)
+	FAIL;
+      rs6000_emit_sync (<CODE>, <MODE>mode, operands[1], operands[2],
+			operands[0], NULL_RTX, true);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*sync_old_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(match_operand:SI 1 "memory_operand" "+Z"))
+   (set (match_dup 1)
+	(unspec:SI
+	  [(FETCHOP:SI (match_dup 1)
+	     (match_operand:SI 2 "<fetchop_pred>" "<fetchopsi_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&b"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (<CODE>, operands[1], operands[2],
+			  operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_insn_and_split "*sync_old_<fetchop_name>di_internal"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(match_operand:DI 1 "memory_operand" "+Z"))
+   (set (match_dup 1)
+	(unspec:DI
+	  [(FETCHOP:DI (match_dup 1)
+	     (match_operand:DI 2 "<fetchop_pred>" "<fetchopdi_constr>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:DI 3 "=&b"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (<CODE>, operands[1], operands[2],
+			  operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "sync_old_nand<mode>"
+  [(parallel [(set (match_operand:INT1 0 "gpc_reg_operand" "")
+		   (match_operand:INT1 1 "memory_operand" ""))
+	      (set (match_dup 1)
+		   (unspec:INT1
+		     [(ior:INT1 (not:INT1 (match_dup 1))
+				(not:INT1 (match_operand:INT1 2 "gpc_reg_operand" "")))]
+		     UNSPEC_ATOMIC))
+	      (clobber (scratch:INT1))
+	      (clobber (scratch:CC))])]
+  "TARGET_POWERPC"
+  "
+{
+  if (<MODE>mode != SImode && <MODE>mode != DImode)
+    {
+      FAIL;
+      if (PPC405_ERRATUM77)
+	FAIL;
+      rs6000_emit_sync (NOT, <MODE>mode, operands[1], operands[2],
+			operands[0], NULL_RTX, true);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*sync_old_nand<mode>_internal"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+	(match_operand:GPR 1 "memory_operand" "+Z"))
+   (set (match_dup 1)
+	(unspec:GPR
+	  [(ior:GPR (not:GPR (match_dup 1))
+		    (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:GPR 3 "=&r"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (NOT, operands[1], operands[2],
+			  operands[0], NULL_RTX, operands[3]);
+  DONE;
+})
+
+(define_expand "sync_new_<fetchop_name><mode>"
+  [(parallel [(set (match_operand:INT1 0 "gpc_reg_operand" "")
+		   (FETCHOP:INT1
+		     (match_operand:INT1 1 "memory_operand" "")
+		     (match_operand:INT1 2 "<fetchop_pred>" "")))
+	      (set (match_dup 1)
+		   (unspec:INT1
+		     [(FETCHOP:INT1 (match_dup 1) (match_dup 2))]
+		     UNSPEC_ATOMIC))
+	      (clobber (scratch:INT1))
+	      (clobber (scratch:CC))])]
+  "TARGET_POWERPC"
+  "
+{
+  if (<MODE>mode != SImode && <MODE>mode != DImode)
+    {
+      if (PPC405_ERRATUM77)
+	FAIL;
+      rs6000_emit_sync (<CODE>, <MODE>mode, operands[1], operands[2],
+			NULL_RTX, operands[0], true);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*sync_new_<fetchop_name>si_internal"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+	(FETCHOP:SI
+	  (match_operand:SI 1 "memory_operand" "+Z")
+	  (match_operand:SI 2 "<fetchop_pred>" "<fetchopsi_constr>")))
+   (set (match_dup 1)
+	(unspec:SI
+	  [(FETCHOP:SI (match_dup 1) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&b"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (<CODE>, operands[1], operands[2],
+			  NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+(define_insn_and_split "*sync_new_<fetchop_name>di_internal"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+	(FETCHOP:DI
+	  (match_operand:DI 1 "memory_operand" "+Z")
+	  (match_operand:DI 2 "<fetchop_pred>" "<fetchopdi_constr>")))
+   (set (match_dup 1)
+	(unspec:DI
+	  [(FETCHOP:DI (match_dup 1) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:DI 3 "=&b"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (<CODE>, operands[1], operands[2],
+			  NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+(define_expand "sync_new_nand<mode>"
+  [(parallel [(set (match_operand:INT1 0 "gpc_reg_operand" "")
+		   (ior:INT1
+		     (not:INT1 (match_operand:INT1 1 "memory_operand" ""))
+		     (not:INT1 (match_operand:INT1 2 "gpc_reg_operand" ""))))
+	      (set (match_dup 1)
+		   (unspec:INT1
+		     [(ior:INT1 (not:INT1 (match_dup 1))
+				(not:INT1 (match_dup 2)))]
+		     UNSPEC_ATOMIC))
+	      (clobber (scratch:INT1))
+	      (clobber (scratch:CC))])]
+  "TARGET_POWERPC"
+  "
+{
+  if (<MODE>mode != SImode && <MODE>mode != DImode)
+    {
+      FAIL;
+      if (PPC405_ERRATUM77)
+	FAIL;
+      rs6000_emit_sync (NOT, <MODE>mode, operands[1], operands[2],
+			NULL_RTX, operands[0], true);
+      DONE;
+    }
+}")
+
+(define_insn_and_split "*sync_new_nand<mode>_internal"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+	(ior:GPR
+	  (not:GPR (match_operand:GPR 1 "memory_operand" "+Z"))
+	  (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r"))))
+   (set (match_dup 1)
+	(unspec:GPR
+	  [(ior:GPR (not:GPR (match_dup 1)) (not:GPR (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:GPR 3 "=&r"))
+   (clobber (match_scratch:CC 4 "=&x"))]
+  "TARGET_POWERPC"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_atomic_op (NOT, operands[1], operands[2],
+			  NULL_RTX, operands[0], operands[3]);
+  DONE;
+})
+
+; and<mode> without cr0 clobber to avoid generation of additional clobber 
+; in atomic splitters causing internal consistency failure.
+; cr0 already clobbered by larx/stcx.
+(define_insn "*atomic_andsi"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(unspec:SI [(match_operand:SI 1 "gpc_reg_operand" "%r,r,r,r")
+		    (match_operand:SI 2 "and_operand" "?r,T,K,L")]
+		    UNSPEC_AND))]
+  ""
+  "@
+   and %0,%1,%2
+   {rlinm|rlwinm} %0,%1,0,%m2,%M2
+   {andil.|andi.} %0,%1,%b2
+   {andiu.|andis.} %0,%1,%u2"
+  [(set_attr "type" "*,*,compare,compare")])
+
+(define_insn "*atomic_anddi"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r")
+	(unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "%r,r,r,r,r")
+		    (match_operand:DI 2 "and_operand" "?r,S,T,K,J")]
+		    UNSPEC_AND))]
+  "TARGET_POWERPC64"
+  "@
+   and %0,%1,%2
+   rldic%B2 %0,%1,0,%S2
+   rlwinm %0,%1,0,%m2,%M2
+   andi. %0,%1,%b2
+   andis. %0,%1,%u2"
+  [(set_attr "type" "*,*,*,compare,compare")
+   (set_attr "length" "4,4,4,4,4")])
+
+; the sync_*_internal patterns all have these operands:
+; 0 - memory location
+; 1 - operand
+; 2 - value in memory after operation
+; 3 - value in memory immediately before operation
+
+(define_insn "*sync_addshort_internal"
+  [(set (match_operand:SI 2 "gpc_reg_operand" "=&r")
+	(ior:SI (and:SI (plus:SI (match_operand:SI 0 "memory_operand" "+Z")
+				 (match_operand:SI 1 "add_operand" "rI"))
+			(match_operand:SI 4 "gpc_reg_operand" "r"))
+		(and:SI (not:SI (match_dup 4)) (match_dup 0))))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=&b") (match_dup 0))
+   (set (match_dup 0)
+	(unspec:SI [(ior:SI (and:SI (plus:SI (match_dup 0) (match_dup 1))
+				    (match_dup 4))
+			    (and:SI (not:SI (match_dup 4)) (match_dup 0)))]
+		   UNSPEC_SYNC_OP))
+   (clobber (match_scratch:CC 5 "=&x"))
+   (clobber (match_scratch:SI 6 "=&r"))]
+  "TARGET_POWERPC && !PPC405_ERRATUM77"
+  "lwarx %3,%y0\n\tadd%I1 %2,%3,%1\n\tandc %6,%3,%4\n\tand %2,%2,%4\n\tor %2,%2,%6\n\tstwcx. %2,%y0\n\tbne- $-24"
+  [(set_attr "length" "28")])
+
+(define_insn "*sync_subshort_internal"
+  [(set (match_operand:SI 2 "gpc_reg_operand" "=&r")
+	(ior:SI (and:SI (minus:SI (match_operand:SI 0 "memory_operand" "+Z")
+				  (match_operand:SI 1 "add_operand" "rI"))
+			(match_operand:SI 4 "gpc_reg_operand" "r"))
+		(and:SI (not:SI (match_dup 4)) (match_dup 0))))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=&b") (match_dup 0))
+   (set (match_dup 0)
+	(unspec:SI [(ior:SI (and:SI (minus:SI (match_dup 0) (match_dup 1))
+				    (match_dup 4))
+			    (and:SI (not:SI (match_dup 4)) (match_dup 0)))]
+		   UNSPEC_SYNC_OP))
+   (clobber (match_scratch:CC 5 "=&x"))
+   (clobber (match_scratch:SI 6 "=&r"))]
+  "TARGET_POWERPC && !PPC405_ERRATUM77"
+  "lwarx %3,%y0\n\tsubf %2,%1,%3\n\tandc %6,%3,%4\n\tand %2,%2,%4\n\tor %2,%2,%6\n\tstwcx. %2,%y0\n\tbne- $-24"
+  [(set_attr "length" "28")])
+
+(define_insn "*sync_andsi_internal"
+  [(set (match_operand:SI 2 "gpc_reg_operand" "=&r,&r,&r,&r")
+	(and:SI (match_operand:SI 0 "memory_operand" "+Z,Z,Z,Z")
+		(match_operand:SI 1 "and_operand" "r,T,K,L")))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=&b,&b,&b,&b") (match_dup 0))
+   (set (match_dup 0)
+	(unspec:SI [(and:SI (match_dup 0) (match_dup 1))]
+		   UNSPEC_SYNC_OP))
+   (clobber (match_scratch:CC 4 "=&x,&x,&x,&x"))]
+  "TARGET_POWERPC && !PPC405_ERRATUM77"
+  "@
+   lwarx %3,%y0\n\tand %2,%3,%1\n\tstwcx. %2,%y0\n\tbne- $-12
+   lwarx %3,%y0\n\trlwinm %2,%3,0,%m1,%M1\n\tstwcx. %2,%y0\n\tbne- $-12
+   lwarx %3,%y0\n\tandi. %2,%3,%b1\n\tstwcx. %2,%y0\n\tbne- $-12
+   lwarx %3,%y0\n\tandis. %2,%3,%u1\n\tstwcx. %2,%y0\n\tbne- $-12"
+  [(set_attr "length" "16,16,16,16")])
+
+(define_insn "*sync_boolsi_internal"
+  [(set (match_operand:SI 2 "gpc_reg_operand" "=&r,&r,&r")
+	(match_operator:SI 4 "boolean_or_operator"
+	 [(match_operand:SI 0 "memory_operand" "+Z,Z,Z")
+	  (match_operand:SI 1 "logical_operand" "r,K,L")]))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=&b,&b,&b") (match_dup 0))
+   (set (match_dup 0) (unspec:SI [(match_dup 4)] UNSPEC_SYNC_OP))
+   (clobber (match_scratch:CC 5 "=&x,&x,&x"))]
+  "TARGET_POWERPC && !PPC405_ERRATUM77"
+  "@
+   lwarx %3,%y0\n\t%q4 %2,%3,%1\n\tstwcx. %2,%y0\n\tbne- $-12
+   lwarx %3,%y0\n\t%q4i %2,%3,%b1\n\tstwcx. %2,%y0\n\tbne- $-12
+   lwarx %3,%y0\n\t%q4is %2,%3,%u1\n\tstwcx. %2,%y0\n\tbne- $-12"
+  [(set_attr "length" "16,16,16")])
+
+; This pattern could also take immediate values of operand 1,
+; since the non-NOT version of the operator is used; but this is not
+; very useful, since in practice operand 1 is a full 32-bit value.
+; Likewise, operand 5 is in practice either <= 2^16 or it is a register.
+(define_insn "*sync_boolcshort_internal"
+  [(set (match_operand:SI 2 "gpc_reg_operand" "=&r")
+	(match_operator:SI 4 "boolean_or_operator"
+	 [(xor:SI (not:SI (match_operand:SI 0 "memory_operand" "+Z"))
+		  (not:SI (match_operand:SI 5 "logical_operand" "rK")))
+	 (match_operand:SI 1 "gpc_reg_operand" "r")]))
+   (set (match_operand:SI 3 "gpc_reg_operand" "=&b") (match_dup 0))
+   (set (match_dup 0) (unspec:SI [(match_dup 4)] UNSPEC_SYNC_OP))
+   (clobber (match_scratch:CC 6 "=&x"))]
+  "TARGET_POWERPC && !PPC405_ERRATUM77"
+  "lwarx %3,%y0\n\txor%I2 %2,%3,%5\n\t%q4 %2,%2,%1\n\tstwcx. %2,%y0\n\tbne- $-16"
+  [(set_attr "length" "20")])
+
+(define_insn "isync"
+  [(set (mem:BLK (match_scratch 0 "X"))
+	(unspec_volatile:BLK [(mem:BLK (match_scratch 1 "X"))] UNSPEC_ISYNC))]
+  ""
+  "{ics|isync}"
+  [(set_attr "type" "isync")])
+
+(define_expand "sync_lock_release<mode>"
+  [(set (match_operand:INT 0 "memory_operand")
+	(match_operand:INT 1 "any_operand"))]
+  ""
+  "
+{
+  emit_insn (gen_lwsync ());
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+}")
+
+; Some AIX assemblers don't accept lwsync, so we use a .long.
+(define_insn "lwsync"
+  [(set (mem:BLK (match_scratch 0 "X"))
+	(unspec_volatile:BLK [(mem:BLK (match_scratch 1 "X"))] UNSPEC_LWSYNC))]
+  ""
+{
+  if (TARGET_NO_LWSYNC)
+    return "sync";
+  else
+    return (TARGET_LWSYNC_INSTRUCTION) ? "lwsync" : ".long 0x7c2004ac";
+}
+  [(set_attr "type" "sync")])
+
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
new file mode 100644
index 000000000..aeb934f6e
--- /dev/null
+++ b/gcc/config/rs6000/sysv4.h
@@ -0,0 +1,1039 @@
+/* Target definitions for GNU compiler for PowerPC running System V.4
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Header files should be C++ aware in general.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/* Yes!  We are ELF.  */
+#define	TARGET_OBJECT_FORMAT OBJECT_ELF
+
+/* Default ABI to compile code for.  */
+#define DEFAULT_ABI rs6000_current_abi
+
+/* Default ABI to use.  */
+#define RS6000_ABI_NAME "sysv"
+
+/* Override rs6000.h definition.  */
+#undef	ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc"
+
+#define	TARGET_TOC		((target_flags & MASK_64BIT)		\
+				 || ((target_flags & (MASK_RELOCATABLE	\
+						      | MASK_MINIMAL_TOC)) \
+				     && flag_pic > 1)			\
+				 || DEFAULT_ABI == ABI_AIX)
+
+#define	TARGET_BITFIELD_TYPE	(! TARGET_NO_BITFIELD_TYPE)
+#define	TARGET_BIG_ENDIAN	(! TARGET_LITTLE_ENDIAN)
+#define	TARGET_PROTOTYPE	target_prototype
+#define	TARGET_NO_PROTOTYPE	(! TARGET_PROTOTYPE)
+#define	TARGET_NO_TOC		(! TARGET_TOC)
+#define	TARGET_NO_EABI		(! TARGET_EABI)
+#define	TARGET_REGNAMES		rs6000_regnames
+
+#ifdef HAVE_AS_REL16
+#undef TARGET_SECURE_PLT
+#define TARGET_SECURE_PLT	secure_plt
+#endif
+
+#define SDATA_DEFAULT_SIZE 8
+
+/* The macro SUBTARGET_OVERRIDE_OPTIONS is provided for subtargets, to
+   get control in TARGET_OPTION_OVERRIDE.  */
+
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if (!global_options_set.x_g_switch_value)				\
+    g_switch_value = SDATA_DEFAULT_SIZE;				\
+									\
+  if (rs6000_abi_name == NULL)						\
+    rs6000_abi_name = RS6000_ABI_NAME;					\
+									\
+  if (!strcmp (rs6000_abi_name, "sysv"))				\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "sysv-noeabi"))			\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      target_flags &= ~ MASK_EABI;					\
+    }									\
+  else if (!strcmp (rs6000_abi_name, "sysv-eabi")			\
+	   || !strcmp (rs6000_abi_name, "eabi"))			\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      target_flags |= MASK_EABI;					\
+    }									\
+  else if (!strcmp (rs6000_abi_name, "aixdesc"))			\
+    rs6000_current_abi = ABI_AIX;					\
+  else if (!strcmp (rs6000_abi_name, "freebsd"))			\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "linux"))				\
+    {									\
+      if (TARGET_64BIT)							\
+	rs6000_current_abi = ABI_AIX;					\
+      else								\
+	rs6000_current_abi = ABI_V4;					\
+    }									\
+  else if (!strcmp (rs6000_abi_name, "gnu"))				\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "netbsd"))				\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "openbsd"))			\
+    rs6000_current_abi = ABI_V4;					\
+  else if (!strcmp (rs6000_abi_name, "i960-old"))			\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      target_flags |= (MASK_LITTLE_ENDIAN | MASK_EABI);			\
+      target_flags &= ~MASK_STRICT_ALIGN;				\
+      TARGET_NO_BITFIELD_WORD = 1;					\
+    }									\
+  else									\
+    {									\
+      rs6000_current_abi = ABI_V4;					\
+      error ("bad value for -mcall-%s", rs6000_abi_name);		\
+    }									\
+									\
+  if (rs6000_sdata_name)						\
+    {									\
+      if (!strcmp (rs6000_sdata_name, "none"))				\
+	rs6000_sdata = SDATA_NONE;					\
+      else if (!strcmp (rs6000_sdata_name, "data"))			\
+	rs6000_sdata = SDATA_DATA;					\
+      else if (!strcmp (rs6000_sdata_name, "default"))			\
+	rs6000_sdata = (TARGET_EABI) ? SDATA_EABI : SDATA_SYSV;		\
+      else if (!strcmp (rs6000_sdata_name, "sysv"))			\
+	rs6000_sdata = SDATA_SYSV;					\
+      else if (!strcmp (rs6000_sdata_name, "eabi"))			\
+	rs6000_sdata = SDATA_EABI;					\
+      else								\
+	error ("bad value for -msdata=%s", rs6000_sdata_name);		\
+    }									\
+  else if (DEFAULT_ABI == ABI_V4)					\
+    {									\
+      rs6000_sdata = SDATA_DATA;					\
+      rs6000_sdata_name = "data";					\
+    }									\
+  else									\
+    {									\
+      rs6000_sdata = SDATA_NONE;					\
+      rs6000_sdata_name = "none";					\
+    }									\
+									\
+  if (TARGET_RELOCATABLE &&						\
+      (rs6000_sdata == SDATA_EABI || rs6000_sdata == SDATA_SYSV))	\
+    {									\
+      rs6000_sdata = SDATA_DATA;					\
+      error ("-mrelocatable and -msdata=%s are incompatible",		\
+	     rs6000_sdata_name);					\
+    }									\
+									\
+  else if (flag_pic && DEFAULT_ABI != ABI_AIX				\
+	   && (rs6000_sdata == SDATA_EABI				\
+	       || rs6000_sdata == SDATA_SYSV))				\
+    {									\
+      rs6000_sdata = SDATA_DATA;					\
+      error ("-f%s and -msdata=%s are incompatible",			\
+	     (flag_pic > 1) ? "PIC" : "pic",				\
+	     rs6000_sdata_name);					\
+    }									\
+									\
+  if ((rs6000_sdata != SDATA_NONE && DEFAULT_ABI != ABI_V4)		\
+      || (rs6000_sdata == SDATA_EABI && !TARGET_EABI))			\
+    {									\
+      rs6000_sdata = SDATA_NONE;					\
+      error ("-msdata=%s and -mcall-%s are incompatible",		\
+	     rs6000_sdata_name, rs6000_abi_name);			\
+    }									\
+									\
+  targetm.have_srodata_section = rs6000_sdata == SDATA_EABI;		\
+									\
+  if (TARGET_RELOCATABLE && !TARGET_MINIMAL_TOC)			\
+    {									\
+      target_flags |= MASK_MINIMAL_TOC;					\
+      error ("-mrelocatable and -mno-minimal-toc are incompatible");	\
+    }									\
+									\
+  if (TARGET_RELOCATABLE && rs6000_current_abi == ABI_AIX)		\
+    {									\
+      target_flags &= ~MASK_RELOCATABLE;				\
+      error ("-mrelocatable and -mcall-%s are incompatible",		\
+	     rs6000_abi_name);						\
+    }									\
+									\
+  if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi == ABI_AIX)	\
+    {									\
+      flag_pic = 0;							\
+      error ("-fPIC and -mcall-%s are incompatible",			\
+	     rs6000_abi_name);						\
+    }									\
+									\
+  if (rs6000_current_abi == ABI_AIX && TARGET_LITTLE_ENDIAN)		\
+    {									\
+      target_flags &= ~MASK_LITTLE_ENDIAN;				\
+      error ("-mcall-aixdesc must be big endian");			\
+    }									\
+									\
+  if (TARGET_SECURE_PLT != secure_plt)					\
+    {									\
+      error ("-msecure-plt not supported by your assembler");		\
+    }									\
+									\
+  /* Treat -fPIC the same as -mrelocatable.  */				\
+  if (flag_pic > 1 && DEFAULT_ABI != ABI_AIX)				\
+    {									\
+      target_flags |= MASK_RELOCATABLE | MASK_MINIMAL_TOC;		\
+      TARGET_NO_FP_IN_TOC = 1;						\
+    }									\
+									\
+  else if (TARGET_RELOCATABLE)						\
+    if (!flag_pic)							\
+      flag_pic = 2;							\
+} while (0)
+
+#ifndef RS6000_BI_ARCH
+# define SUBSUBTARGET_OVERRIDE_OPTIONS					\
+do {									\
+  if ((TARGET_DEFAULT ^ target_flags) & MASK_64BIT)			\
+    error ("-m%s not supported in this configuration",			\
+	   (target_flags & MASK_64BIT) ? "64" : "32");			\
+} while (0)
+#endif
+
+/* Override rs6000.h definition.  */
+#undef	TARGET_DEFAULT
+#define	TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS)
+
+/* Override rs6000.h definition.  */
+#undef	PROCESSOR_DEFAULT
+#define	PROCESSOR_DEFAULT PROCESSOR_PPC750
+
+/* SVR4 only defined for PowerPC, so short-circuit POWER patterns.  */
+#undef  TARGET_POWER
+#define TARGET_POWER 0
+
+#define FIXED_R2 1
+/* System V.4 uses register 13 as a pointer to the small data area,
+   so it is not available to the normal user.  */
+#define FIXED_R13 1
+
+/* Override default big endianism definitions in rs6000.h.  */
+#undef	BYTES_BIG_ENDIAN
+#undef	WORDS_BIG_ENDIAN
+#define	BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+#define	WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN)
+
+/* Define cutoff for using external functions to save floating point.
+   When optimizing for size, use external functions when profitable.  */
+#define FP_SAVE_INLINE(FIRST_REG) (optimize_size			\
+				   ? ((FIRST_REG) == 62			\
+				      || (FIRST_REG) == 63)		\
+				   : (FIRST_REG) < 64)
+/* And similarly for general purpose registers.  */
+#define GP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 32	\
+				   && !optimize_size)
+
+/* Put jump tables in read-only memory, rather than in .text.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+/* Prefix and suffix to use to saving floating point.  */
+#define	SAVE_FP_PREFIX "_savefpr_"
+#define SAVE_FP_SUFFIX ""
+
+/* Prefix and suffix to use to restoring floating point.  */
+#define	RESTORE_FP_PREFIX "_restfpr_"
+#define RESTORE_FP_SUFFIX ""
+
+/* Type used for size_t, as a string used in a declaration.  */
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+/* Type used for ptrdiff_t, as a string used in a declaration.  */
+#define PTRDIFF_TYPE "int"
+
+#undef	WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef	WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Make int foo : 8 not cause structures to be aligned to an int boundary.  */
+/* Override elfos.h definition.  */
+#undef	PCC_BITFIELD_TYPE_MATTERS
+#define	PCC_BITFIELD_TYPE_MATTERS (TARGET_BITFIELD_TYPE)
+
+#undef	BITFIELD_NBYTES_LIMITED
+#define	BITFIELD_NBYTES_LIMITED (TARGET_NO_BITFIELD_WORD)
+
+/* Define this macro to be the value 1 if instructions will fail to
+   work if given data not on the nominal alignment.  If instructions
+   will merely go slower in that case, define this macro as 0.  */
+#undef	STRICT_ALIGNMENT
+#define	STRICT_ALIGNMENT (TARGET_STRICT_ALIGN)
+
+/* Define this macro if you wish to preserve a certain alignment for
+   the stack pointer, greater than what the hardware enforces.  The
+   definition is a C expression for the desired alignment (measured
+   in bits).  This macro must evaluate to a value equal to or larger
+   than STACK_BOUNDARY.
+   For the SYSV ABI and variants the alignment of the stack pointer
+   is usually controlled manually in rs6000.c. However, to maintain
+   alignment across alloca () in all circumstances,
+   PREFERRED_STACK_BOUNDARY needs to be set as well.
+   This has the additional advantage of allowing a bigger maximum
+   alignment of user objects on the stack.  */
+
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY 128
+
+/* Real stack boundary as mandated by the appropriate ABI.  */
+#define ABI_STACK_BOUNDARY \
+  ((TARGET_EABI && !TARGET_ALTIVEC && !TARGET_ALTIVEC_ABI) ? 64 : 128)
+
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED)				      \
+	((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE)     \
+	 ? 128 : COMPUTED)
+
+#undef  BIGGEST_FIELD_ALIGNMENT
+
+/* Use ELF style section commands.  */
+
+#define	TEXT_SECTION_ASM_OP	"\t.section\t\".text\""
+
+#define	DATA_SECTION_ASM_OP	"\t.section\t\".data\""
+
+#define	BSS_SECTION_ASM_OP	"\t.section\t\".bss\""
+
+/* Override elfos.h definition.  */
+#undef	INIT_SECTION_ASM_OP
+#define	INIT_SECTION_ASM_OP "\t.section\t\".init\",\"ax\""
+
+/* Override elfos.h definition.  */
+#undef	FINI_SECTION_ASM_OP
+#define	FINI_SECTION_ASM_OP "\t.section\t\".fini\",\"ax\""
+
+#define	TOC_SECTION_ASM_OP "\t.section\t\".got\",\"aw\""
+
+/* Put PC relative got entries in .got2.  */
+#define	MINIMAL_TOC_SECTION_ASM_OP \
+  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI != ABI_AIX)		\
+   ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
+
+#define	SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
+#define	SDATA2_SECTION_ASM_OP "\t.section\t\".sdata2\",\"a\""
+#define	SBSS_SECTION_ASM_OP "\t.section\t\".sbss\",\"aw\",@nobits"
+
+/* Override default elf definitions.  */
+#define TARGET_ASM_INIT_SECTIONS rs6000_elf_asm_init_sections
+#undef  TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK rs6000_elf_reloc_rw_mask
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION rs6000_elf_select_rtx_section
+
+/* Return nonzero if this entry is to be written into the constant pool
+   in a special way.  We do so if this is a SYMBOL_REF, LABEL_REF or a CONST
+   containing one of them.  If -mfp-in-toc (the default), we also do
+   this for floating-point constants.  We actually can only do this
+   if the FP formats of the target and host machines are the same, but
+   we can't check that since not every file that uses these target macros
+   includes real.h.
+
+   Unlike AIX, we don't key off of -mminimal-toc, but instead do not
+   allow floating point constants in the TOC if -mrelocatable.  */
+
+#undef	ASM_OUTPUT_SPECIAL_POOL_ENTRY_P
+#define	ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (!TARGET_NO_FP_IN_TOC						\
+	   && !TARGET_RELOCATABLE					\
+	   && GET_CODE (X) == CONST_DOUBLE				\
+	   && SCALAR_FLOAT_MODE_P (GET_MODE (X))			\
+	   && BITS_PER_WORD == HOST_BITS_PER_INT)))
+
+/* These macros generate the special .type and .size directives which
+   are used to set the corresponding fields of the linker symbol table
+   entries in an ELF object file under SVR4.  These macros also output
+   the starting labels for the relevant functions/objects.  */
+
+/* Write the extra assembler code needed to declare a function properly.
+   Some svr4 assemblers need to also have something extra said about the
+   function's return value.  We allow for that here.  */
+
+/* Override elfos.h definition.  */
+#undef	ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
+  rs6000_elf_declare_function_name ((FILE), (NAME), (DECL))
+
+/* The USER_LABEL_PREFIX stuff is affected by the -fleading-underscore
+   flag.  The LOCAL_LABEL_PREFIX variable is used by dbxelf.h.  */
+
+#define	LOCAL_LABEL_PREFIX "."
+#define	USER_LABEL_PREFIX ""
+
+#define	ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX)	\
+  asm_fprintf (FILE, "%L%s", PREFIX)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#define	LOCAL_ASM_OP	"\t.local\t"
+
+#define	LCOMM_ASM_OP	"\t.lcomm\t"
+
+/* Describe how to emit uninitialized local items.  */
+#define	ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
+do {									\
+  if ((DECL) && rs6000_elf_in_small_data_p (DECL))			\
+    {									\
+      switch_to_section (sbss_section);					\
+      ASM_OUTPUT_ALIGN (FILE, exact_log2 (ALIGN / BITS_PER_UNIT));	\
+      ASM_OUTPUT_LABEL (FILE, NAME);					\
+      ASM_OUTPUT_SKIP (FILE, SIZE);					\
+      if (!flag_inhibit_size_directive && (SIZE) > 0)			\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			\
+    }									\
+  else									\
+    {									\
+      fprintf (FILE, "%s", LCOMM_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+    }									\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+} while (0)
+
+/* Describe how to emit uninitialized external linkage items.  */
+#define	ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
+do {									\
+  ASM_OUTPUT_ALIGNED_DECL_LOCAL (FILE, DECL, NAME, SIZE, ALIGN);	\
+} while (0)
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* To support -falign-* switches we need to use .p2align so
+   that alignment directives in code sections will be padded
+   with no-op instructions, rather than zeroes.  */
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
+  if ((LOG) != 0)							\
+    {									\
+      if ((MAX_SKIP) == 0)						\
+	fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
+	fprintf ((FILE), "\t.p2align %d,,%d\n",	(LOG), (MAX_SKIP));	\
+    }
+#endif
+
+/* This is how to output code to push a register on the stack.
+   It need not be very fast code.
+
+   On the rs6000, we must keep the backchain up to date.  In order
+   to simplify things, always allocate 16 bytes for a push (System V
+   wants to keep stack aligned to a 16 byte boundary).  */
+
+#define	ASM_OUTPUT_REG_PUSH(FILE, REGNO)				\
+do {									\
+  if (DEFAULT_ABI == ABI_V4)						\
+    asm_fprintf (FILE,							\
+		 "\t{stu|stwu} %s,-16(%s)\n\t{st|stw} %s,12(%s)\n",	\
+		 reg_names[1], reg_names[1], reg_names[REGNO],		\
+		 reg_names[1]);						\
+} while (0)
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define	ASM_OUTPUT_REG_POP(FILE, REGNO)					\
+do {									\
+  if (DEFAULT_ABI == ABI_V4)						\
+    asm_fprintf (FILE,							\
+		 "\t{l|lwz} %s,12(%s)\n\t{ai|addic} %s,%s,16\n",	\
+		 reg_names[REGNO], reg_names[1], reg_names[1],		\
+		 reg_names[1]);						\
+} while (0)
+
+extern int fixuplabelno;
+
+/* Handle constructors specially for -mrelocatable.  */
+#define TARGET_ASM_CONSTRUCTOR  rs6000_elf_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR   rs6000_elf_asm_out_destructor
+
+/* This is the end of what might become sysv4.h.  */
+
+/* Use DWARF 2 debugging information by default.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Historically we have also supported stabs debugging.  */
+#define DBX_DEBUGGING_INFO 1
+
+#define TARGET_ENCODE_SECTION_INFO  rs6000_elf_encode_section_info
+#define TARGET_IN_SMALL_DATA_P  rs6000_elf_in_small_data_p
+
+/* The ELF version doesn't encode [DS] or whatever at the end of symbols.  */
+
+#define	RS6000_OUTPUT_BASENAME(FILE, NAME)	\
+    assemble_name (FILE, NAME)
+
+/* We have to output the stabs for the function name *first*, before
+   outputting its label.  */
+
+#define	DBX_FUNCTION_FIRST
+
+/* This is the end of what might become sysv4dbx.h.  */
+
+#ifndef	TARGET_VERSION
+#define	TARGET_VERSION fprintf (stderr, " (PowerPC System V.4)");
+#endif
+
+#define TARGET_OS_SYSV_CPP_BUILTINS()		\
+  do						\
+    {						\
+      if (target_flags_explicit			\
+	  & MASK_RELOCATABLE)			\
+	builtin_define ("_RELOCATABLE");	\
+    }						\
+  while (0)
+
+#ifndef	TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("PPC");		\
+      builtin_define_std ("unix");		\
+      builtin_define ("__svr4__");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=svr4");		\
+      builtin_assert ("cpu=powerpc");		\
+      builtin_assert ("machine=powerpc");	\
+      TARGET_OS_SYSV_CPP_BUILTINS ();		\
+    }						\
+  while (0)
+#endif
+
+#undef	ASM_SPEC
+#define	ASM_SPEC "%(asm_cpu) \
+%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+%{mrelocatable} %{mrelocatable-lib} %{fpic|fpie|fPIC|fPIE:-K PIC} \
+%{memb|msdata=eabi: -memb} \
+%{mlittle|mlittle-endian:-mlittle; \
+  mbig|mbig-endian      :-mbig;    \
+  mcall-aixdesc |		   \
+  mcall-freebsd |		   \
+  mcall-netbsd  |		   \
+  mcall-openbsd |		   \
+  mcall-linux   |		   \
+  mcall-gnu             :-mbig;    \
+  mcall-i960-old        :-mlittle}"
+
+#define	CC1_ENDIAN_BIG_SPEC ""
+
+#define	CC1_ENDIAN_LITTLE_SPEC "\
+%{!mstrict-align: %{!mno-strict-align: \
+    %{!mcall-i960-old: \
+	-mstrict-align \
+    } \
+}}"
+
+#define	CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_big)"
+
+#ifndef CC1_SECURE_PLT_DEFAULT_SPEC
+#define CC1_SECURE_PLT_DEFAULT_SPEC ""
+#endif
+
+/* Pass -G xxx to the compiler and set correct endian mode.  */
+#define	CC1_SPEC "%{G*} %(cc1_cpu) \
+%{mlittle|mlittle-endian: %(cc1_endian_little);           \
+  mbig   |mbig-endian   : %(cc1_endian_big);              \
+  mcall-aixdesc |					  \
+  mcall-freebsd |					  \
+  mcall-netbsd  |					  \
+  mcall-openbsd |					  \
+  mcall-linux   |					  \
+  mcall-gnu             : -mbig %(cc1_endian_big);        \
+  mcall-i960-old        : -mlittle %(cc1_endian_little);  \
+                        : %(cc1_endian_default)}          \
+%{meabi: %{!mcall-*: -mcall-sysv }} \
+%{!meabi: %{!mno-eabi: \
+    %{mrelocatable: -meabi } \
+    %{mcall-freebsd: -mno-eabi } \
+    %{mcall-i960-old: -meabi } \
+    %{mcall-linux: -mno-eabi } \
+    %{mcall-gnu: -mno-eabi } \
+    %{mcall-netbsd: -mno-eabi } \
+    %{mcall-openbsd: -mno-eabi }}} \
+%{msdata: -msdata=default} \
+%{mno-sdata: -msdata=none} \
+%{!mbss-plt: %{!msecure-plt: %(cc1_secure_plt_default)}} \
+%{profile: -p}"
+
+/* Default starting address if specified.  */
+#define LINK_START_SPEC "\
+%{mads         : %(link_start_ads)         ; \
+  myellowknife : %(link_start_yellowknife) ; \
+  mmvme        : %(link_start_mvme)        ; \
+  msim         : %(link_start_sim)         ; \
+  mcall-freebsd: %(link_start_freebsd)     ; \
+  mcall-linux  : %(link_start_linux)       ; \
+  mcall-gnu    : %(link_start_gnu)         ; \
+  mcall-netbsd : %(link_start_netbsd)      ; \
+  mcall-openbsd: %(link_start_openbsd)     ; \
+               : %(link_start_default)     }"
+
+#define LINK_START_DEFAULT_SPEC ""
+
+#undef	LINK_SPEC
+#define	LINK_SPEC "\
+%{h*} %{v:-V} %{!msdata=none:%{G*}} %{msdata=none:-G0} \
+%{R*} \
+%(link_shlib) \
+%{!T*: %(link_start) } \
+%(link_target) \
+%(link_os)"
+
+/* Shared libraries are not default.  */
+#define LINK_SHLIB_SPEC "\
+%{!mshlib: %{!shared: %{!symbolic: -dn -Bstatic}}} \
+%{static: } \
+%{shared:-G -dy -z text } \
+%{symbolic:-Bsymbolic -G -dy -z text }"
+
+/* Override the default target of the linker.  */
+#define	LINK_TARGET_SPEC "\
+%{mlittle: --oformat elf32-powerpcle } %{mlittle-endian: --oformat elf32-powerpcle } \
+%{!mlittle: %{!mlittle-endian: %{!mbig: %{!mbig-endian: \
+    %{mcall-i960-old: --oformat elf32-powerpcle} \
+  }}}}"
+
+/* Any specific OS flags.  */
+#define LINK_OS_SPEC "\
+%{mads         : %(link_os_ads)         ; \
+  myellowknife : %(link_os_yellowknife) ; \
+  mmvme        : %(link_os_mvme)        ; \
+  msim         : %(link_os_sim)         ; \
+  mcall-freebsd: %(link_os_freebsd)     ; \
+  mcall-linux  : %(link_os_linux)       ; \
+  mcall-gnu    : %(link_os_gnu)         ; \
+  mcall-netbsd : %(link_os_netbsd)      ; \
+  mcall-openbsd: %(link_os_openbsd)     ; \
+               : %(link_os_default)     }"
+
+#define LINK_OS_DEFAULT_SPEC ""
+
+#define DRIVER_SELF_SPECS "%{mfpu=none: %<mfpu=* \
+ 	%<msingle-float %<mdouble-float}"
+
+/* Override rs6000.h definition.  */
+#undef	CPP_SPEC
+#define	CPP_SPEC "%{posix: -D_POSIX_SOURCE} \
+%{mads         : %(cpp_os_ads)         ; \
+  myellowknife : %(cpp_os_yellowknife) ; \
+  mmvme        : %(cpp_os_mvme)        ; \
+  msim         : %(cpp_os_sim)         ; \
+  mcall-freebsd: %(cpp_os_freebsd)     ; \
+  mcall-linux  : %(cpp_os_linux)       ; \
+  mcall-gnu    : %(cpp_os_gnu)         ; \
+  mcall-netbsd : %(cpp_os_netbsd)      ; \
+  mcall-openbsd: %(cpp_os_openbsd)     ; \
+               : %(cpp_os_default)     }"
+
+#define	CPP_OS_DEFAULT_SPEC ""
+
+#undef	STARTFILE_SPEC
+#define	STARTFILE_SPEC "\
+%{mads         : %(startfile_ads)         ; \
+  myellowknife : %(startfile_yellowknife) ; \
+  mmvme        : %(startfile_mvme)        ; \
+  msim         : %(startfile_sim)         ; \
+  mcall-freebsd: %(startfile_freebsd)     ; \
+  mcall-linux  : %(startfile_linux)       ; \
+  mcall-gnu    : %(startfile_gnu)         ; \
+  mcall-netbsd : %(startfile_netbsd)      ; \
+  mcall-openbsd: %(startfile_openbsd)     ; \
+               : %(startfile_default)     }"
+
+#define	STARTFILE_DEFAULT_SPEC "ecrti.o%s crtbegin.o%s"
+
+#undef	LIB_SPEC
+#define	LIB_SPEC "\
+%{mads         : %(lib_ads)         ; \
+  myellowknife : %(lib_yellowknife) ; \
+  mmvme        : %(lib_mvme)        ; \
+  msim         : %(lib_sim)         ; \
+  mcall-freebsd: %(lib_freebsd)     ; \
+  mcall-linux  : %(lib_linux)       ; \
+  mcall-gnu    : %(lib_gnu)         ; \
+  mcall-netbsd : %(lib_netbsd)      ; \
+  mcall-openbsd: %(lib_openbsd)     ; \
+               : %(lib_default)     }"
+
+#define LIB_DEFAULT_SPEC "-lc"
+
+#undef	ENDFILE_SPEC
+#define	ENDFILE_SPEC "\
+%{mads         : %(endfile_ads)         ; \
+  myellowknife : %(endfile_yellowknife) ; \
+  mmvme        : %(endfile_mvme)        ; \
+  msim         : %(endfile_sim)         ; \
+  mcall-freebsd: %(endfile_freebsd)     ; \
+  mcall-linux  : %(endfile_linux)       ; \
+  mcall-gnu    : %(endfile_gnu)         ; \
+  mcall-netbsd : %(endfile_netbsd)      ; \
+  mcall-openbsd: %(endfile_openbsd)     ; \
+               : %(crtsavres_default) %(endfile_default)     }"
+
+#define CRTSAVRES_DEFAULT_SPEC ""
+
+#define	ENDFILE_DEFAULT_SPEC "crtend.o%s ecrtn.o%s"
+
+/* Motorola ADS support.  */
+#define LIB_ADS_SPEC "--start-group -lads -lc --end-group"
+
+#define	STARTFILE_ADS_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_ADS_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_ADS_SPEC "-T ads.ld%s"
+
+#define LINK_OS_ADS_SPEC ""
+
+#define CPP_OS_ADS_SPEC ""
+
+/* Motorola Yellowknife support.  */
+#define LIB_YELLOWKNIFE_SPEC "--start-group -lyk -lc --end-group"
+
+#define	STARTFILE_YELLOWKNIFE_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_YELLOWKNIFE_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_YELLOWKNIFE_SPEC "-T yellowknife.ld%s"
+
+#define LINK_OS_YELLOWKNIFE_SPEC ""
+
+#define CPP_OS_YELLOWKNIFE_SPEC ""
+
+/* Motorola MVME support.  */
+#define LIB_MVME_SPEC "--start-group -lmvme -lc --end-group"
+
+#define	STARTFILE_MVME_SPEC "ecrti.o%s crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_MVME_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_MVME_SPEC "-Ttext 0x40000"
+
+#define LINK_OS_MVME_SPEC ""
+
+#define CPP_OS_MVME_SPEC ""
+
+/* PowerPC simulator based on netbsd system calls support.  */
+#define LIB_SIM_SPEC "--start-group -lsim -lc --end-group"
+
+#define	STARTFILE_SIM_SPEC "ecrti.o%s sim-crt0.o%s crtbegin.o%s"
+
+#define	ENDFILE_SIM_SPEC "crtend.o%s ecrtn.o%s"
+
+#define LINK_START_SIM_SPEC ""
+
+#define LINK_OS_SIM_SPEC "-m elf32ppcsim"
+
+#define CPP_OS_SIM_SPEC ""
+
+/* FreeBSD support.  */
+
+#define CPP_OS_FREEBSD_SPEC	"\
+  -D__PPC__ -D__ppc__ -D__PowerPC__ -D__powerpc__ \
+  -Acpu=powerpc -Amachine=powerpc"
+
+#define	STARTFILE_FREEBSD_SPEC	FBSD_STARTFILE_SPEC
+#define ENDFILE_FREEBSD_SPEC	FBSD_ENDFILE_SPEC
+#define LIB_FREEBSD_SPEC	FBSD_LIB_SPEC
+#define LINK_START_FREEBSD_SPEC	""
+
+#define LINK_OS_FREEBSD_SPEC "\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \
+  %{v:-V} \
+  %{assert*} %{R*} %{rpath*} %{defsym*} \
+  %{shared:-Bshareable %{h*} %{soname*}} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic: -export-dynamic} \
+      -dynamic-linker %(fbsd_dynamic_linker) } \
+    %{static:-Bstatic}} \
+  %{symbolic:-Bsymbolic}"
+
+/* GNU/Linux support.  */
+#define LIB_LINUX_SPEC "%{mnewlib: --start-group -llinux -lc --end-group } \
+%{!mnewlib: %{pthread:-lpthread} %{shared:-lc} \
+%{!shared: %{profile:-lc_p} %{!profile:-lc}}}"
+
+#ifdef HAVE_LD_PIE
+#define	STARTFILE_LINUX_SPEC "\
+%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
+%{mnewlib:ecrti.o%s;:crti.o%s} \
+%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define	STARTFILE_LINUX_SPEC "\
+%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
+%{mnewlib:ecrti.o%s;:crti.o%s} \
+%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+#define	ENDFILE_LINUX_SPEC "\
+%{shared|pie:crtendS.o%s;:crtend.o%s} \
+%{mnewlib:ecrtn.o%s;:crtn.o%s}"
+
+#define LINK_START_LINUX_SPEC ""
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif !defined (DEFAULT_LIBC) || DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define LINUX_DYNAMIC_LINKER \
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER)
+
+#define LINK_OS_LINUX_SPEC "-m elf32ppclinux %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker " LINUX_DYNAMIC_LINKER "}}"
+
+#if defined(HAVE_LD_EH_FRAME_HDR)
+# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif
+
+#define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \
+%{!undef:							  \
+  %{!ansi:							  \
+    %{!std=*:-Dunix -D__unix -Dlinux -D__linux}			  \
+    %{std=gnu*:-Dunix -D__unix -Dlinux -D__linux}}}		  \
+-Asystem=linux -Asystem=unix -Asystem=posix %{pthread:-D_REENTRANT}"
+
+/* GNU/Hurd support.  */
+#define LIB_GNU_SPEC "%{mnewlib: --start-group -lgnu -lc --end-group } \
+%{!mnewlib: %{shared:-lc} %{!shared: %{pthread:-lpthread } \
+%{profile:-lc_p} %{!profile:-lc}}}"
+
+#define	STARTFILE_GNU_SPEC "\
+%{!shared: %{!static: %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} %{!p:crt1.o%s}}}} \
+%{static: %{pg:gcrt0.o%s} %{!pg:%{p:gcrt0.o%s} %{!p:crt0.o%s}}} \
+%{mnewlib: ecrti.o%s} %{!mnewlib: crti.o%s} \
+%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#define	ENDFILE_GNU_SPEC "%{!shared:crtend.o%s} %{shared:crtendS.o%s} \
+%{mnewlib: ecrtn.o%s} %{!mnewlib: crtn.o%s}"
+
+#define LINK_START_GNU_SPEC ""
+
+#define LINK_OS_GNU_SPEC "-m elf32ppclinux %{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker /lib/ld.so.1}}"
+
+#define CPP_OS_GNU_SPEC "-D__unix__ -D__gnu_hurd__ -D__GNU__	\
+%{!undef:					                \
+  %{!ansi: -Dunix -D__unix}}			                \
+-Asystem=gnu -Asystem=unix -Asystem=posix %{pthread:-D_REENTRANT}"
+
+/* NetBSD support.  */
+#define LIB_NETBSD_SPEC "\
+-lc"
+
+#define	STARTFILE_NETBSD_SPEC "\
+ncrti.o%s crt0.o%s \
+%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#define ENDFILE_NETBSD_SPEC "\
+%{!shared:crtend.o%s} %{shared:crtendS.o%s} \
+ncrtn.o%s"
+
+#define LINK_START_NETBSD_SPEC "\
+"
+
+#define LINK_OS_NETBSD_SPEC "\
+%{!shared: %{!static: \
+  %{rdynamic:-export-dynamic} \
+  -dynamic-linker /usr/libexec/ld.elf_so}}"
+
+#define CPP_OS_NETBSD_SPEC "\
+-D__powerpc__ -D__NetBSD__ -D__KPRINTF_ATTRIBUTE__"
+
+/* OpenBSD support.  */
+#ifndef	LIB_OPENBSD_SPEC
+#define LIB_OPENBSD_SPEC "%{!shared:%{pthread:-lpthread%{p:_p}%{!p:%{pg:_p}}}} %{!shared:-lc%{p:_p}%{!p:%{pg:_p}}}"
+#endif
+
+#ifndef	STARTFILE_OPENBSD_SPEC
+#define	STARTFILE_OPENBSD_SPEC "\
+%{!shared: %{pg:gcrt0.o%s} %{!pg:%{p:gcrt0.o%s} %{!p:crt0.o%s}}} \
+%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+#endif
+
+#ifndef	ENDFILE_OPENBSD_SPEC
+#define	ENDFILE_OPENBSD_SPEC "\
+%{!shared:crtend.o%s} %{shared:crtendS.o%s}"
+#endif
+
+#ifndef LINK_START_OPENBSD_SPEC
+#define LINK_START_OPENBSD_SPEC "-Ttext 0x400074"
+#endif
+
+#ifndef LINK_OS_OPENBSD_SPEC
+#define LINK_OS_OPENBSD_SPEC ""
+#endif
+
+#ifndef CPP_OS_OPENBSD_SPEC
+#define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
+#endif
+
+/* Define any extra SPECS that the compiler needs to generate.  */
+/* Override rs6000.h definition.  */
+#undef	SUBTARGET_EXTRA_SPECS
+#define	SUBTARGET_EXTRA_SPECS						\
+  { "crtsavres_default",	CRTSAVRES_DEFAULT_SPEC },		\
+  { "lib_ads",			LIB_ADS_SPEC },				\
+  { "lib_yellowknife",		LIB_YELLOWKNIFE_SPEC },			\
+  { "lib_mvme",			LIB_MVME_SPEC },			\
+  { "lib_sim",			LIB_SIM_SPEC },				\
+  { "lib_freebsd",		LIB_FREEBSD_SPEC },			\
+  { "lib_gnu",			LIB_GNU_SPEC },				\
+  { "lib_linux",		LIB_LINUX_SPEC },			\
+  { "lib_netbsd",		LIB_NETBSD_SPEC },			\
+  { "lib_openbsd",		LIB_OPENBSD_SPEC },			\
+  { "lib_default",		LIB_DEFAULT_SPEC },			\
+  { "startfile_ads",		STARTFILE_ADS_SPEC },			\
+  { "startfile_yellowknife",	STARTFILE_YELLOWKNIFE_SPEC },		\
+  { "startfile_mvme",		STARTFILE_MVME_SPEC },			\
+  { "startfile_sim",		STARTFILE_SIM_SPEC },			\
+  { "startfile_freebsd",	STARTFILE_FREEBSD_SPEC },		\
+  { "startfile_gnu",		STARTFILE_GNU_SPEC },			\
+  { "startfile_linux",		STARTFILE_LINUX_SPEC },			\
+  { "startfile_netbsd",		STARTFILE_NETBSD_SPEC },		\
+  { "startfile_openbsd",	STARTFILE_OPENBSD_SPEC },		\
+  { "startfile_default",	STARTFILE_DEFAULT_SPEC },		\
+  { "endfile_ads",		ENDFILE_ADS_SPEC },			\
+  { "endfile_yellowknife",	ENDFILE_YELLOWKNIFE_SPEC },		\
+  { "endfile_mvme",		ENDFILE_MVME_SPEC },			\
+  { "endfile_sim",		ENDFILE_SIM_SPEC },			\
+  { "endfile_freebsd",		ENDFILE_FREEBSD_SPEC },			\
+  { "endfile_gnu",		ENDFILE_GNU_SPEC },			\
+  { "endfile_linux",		ENDFILE_LINUX_SPEC },			\
+  { "endfile_netbsd",		ENDFILE_NETBSD_SPEC },			\
+  { "endfile_openbsd",		ENDFILE_OPENBSD_SPEC },			\
+  { "endfile_default",		ENDFILE_DEFAULT_SPEC },			\
+  { "link_shlib",		LINK_SHLIB_SPEC },			\
+  { "link_target",		LINK_TARGET_SPEC },			\
+  { "link_start",		LINK_START_SPEC },			\
+  { "link_start_ads",		LINK_START_ADS_SPEC },			\
+  { "link_start_yellowknife",	LINK_START_YELLOWKNIFE_SPEC },		\
+  { "link_start_mvme",		LINK_START_MVME_SPEC },			\
+  { "link_start_sim",		LINK_START_SIM_SPEC },			\
+  { "link_start_freebsd",	LINK_START_FREEBSD_SPEC },		\
+  { "link_start_gnu",		LINK_START_GNU_SPEC },			\
+  { "link_start_linux",		LINK_START_LINUX_SPEC },		\
+  { "link_start_netbsd",	LINK_START_NETBSD_SPEC },		\
+  { "link_start_openbsd",	LINK_START_OPENBSD_SPEC },		\
+  { "link_start_default",	LINK_START_DEFAULT_SPEC },		\
+  { "link_os",			LINK_OS_SPEC },				\
+  { "link_os_ads",		LINK_OS_ADS_SPEC },			\
+  { "link_os_yellowknife",	LINK_OS_YELLOWKNIFE_SPEC },		\
+  { "link_os_mvme",		LINK_OS_MVME_SPEC },			\
+  { "link_os_sim",		LINK_OS_SIM_SPEC },			\
+  { "link_os_freebsd",		LINK_OS_FREEBSD_SPEC },			\
+  { "link_os_linux",		LINK_OS_LINUX_SPEC },			\
+  { "link_os_gnu",		LINK_OS_GNU_SPEC },			\
+  { "link_os_netbsd",		LINK_OS_NETBSD_SPEC },			\
+  { "link_os_openbsd",		LINK_OS_OPENBSD_SPEC },			\
+  { "link_os_default",		LINK_OS_DEFAULT_SPEC },			\
+  { "cc1_endian_big",		CC1_ENDIAN_BIG_SPEC },			\
+  { "cc1_endian_little",	CC1_ENDIAN_LITTLE_SPEC },		\
+  { "cc1_endian_default",	CC1_ENDIAN_DEFAULT_SPEC },		\
+  { "cc1_secure_plt_default",	CC1_SECURE_PLT_DEFAULT_SPEC },		\
+  { "cpp_os_ads",		CPP_OS_ADS_SPEC },			\
+  { "cpp_os_yellowknife",	CPP_OS_YELLOWKNIFE_SPEC },		\
+  { "cpp_os_mvme",		CPP_OS_MVME_SPEC },			\
+  { "cpp_os_sim",		CPP_OS_SIM_SPEC },			\
+  { "cpp_os_freebsd",		CPP_OS_FREEBSD_SPEC },			\
+  { "cpp_os_gnu",		CPP_OS_GNU_SPEC },			\
+  { "cpp_os_linux",		CPP_OS_LINUX_SPEC },			\
+  { "cpp_os_netbsd",		CPP_OS_NETBSD_SPEC },			\
+  { "cpp_os_openbsd",		CPP_OS_OPENBSD_SPEC },			\
+  { "cpp_os_default",		CPP_OS_DEFAULT_SPEC },			\
+  { "fbsd_dynamic_linker",	FBSD_DYNAMIC_LINKER },			\
+  SUBSUBTARGET_EXTRA_SPECS
+
+#define	SUBSUBTARGET_EXTRA_SPECS
+
+/* Define this macro as a C expression for the initializer of an
+   array of string to tell the driver program which options are
+   defaults for this target and thus do not need to be handled
+   specially when using `MULTILIB_OPTIONS'.
+
+   Do not define this macro if `MULTILIB_OPTIONS' is not defined in
+   the target makefile fragment or if none of the options listed in
+   `MULTILIB_OPTIONS' are set by default.  *Note Target Fragment::.  */
+
+#define	MULTILIB_DEFAULTS { "mbig", "mcall-sysv" }
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Function name to call to do profiling.  */
+#define RS6000_MCOUNT "_mcount"
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			     \
+  ((flag_pic || TARGET_RELOCATABLE)					     \
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \
+   : DW_EH_PE_absptr)
+
+#define DOUBLE_INT_ASM_OP "\t.quad\t"
+
+/* Generate entries in .fixup for relocatable addresses.  */
+#define RELOCATABLE_NEEDS_FIXUP 1
+
+#define TARGET_ASM_FILE_END rs6000_elf_file_end
+
+/* This target uses the sysv4.opt file.  */
+#define TARGET_USES_SYSV4_OPT 1
+
+#undef DBX_REGISTER_NUMBER
diff --git a/gcc/config/rs6000/sysv4.opt b/gcc/config/rs6000/sysv4.opt
new file mode 100644
index 000000000..214420543
--- /dev/null
+++ b/gcc/config/rs6000/sysv4.opt
@@ -0,0 +1,145 @@
+; SYSV4 options for PPC port.
+;
+; Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mcall-
+Target RejectNegative Joined
+Select ABI calling convention
+
+msdata=
+Target RejectNegative Joined
+Select method for sdata handling
+
+mtls-size=
+Target RejectNegative Joined
+Specify bit size of immediate TLS offsets
+
+mbit-align
+Target Report Var(TARGET_NO_BITFIELD_TYPE) Save
+Align to the base type of the bit-field
+
+mstrict-align
+Target Report Mask(STRICT_ALIGN)
+Align to the base type of the bit-field
+Don't assume that unaligned accesses are handled by the system
+
+mrelocatable
+Target Report Mask(RELOCATABLE)
+Produce code relocatable at runtime
+
+mrelocatable-lib
+Target
+Produce code relocatable at runtime
+
+mlittle-endian
+Target Report RejectNegative Mask(LITTLE_ENDIAN)
+Produce little endian code
+
+mlittle
+Target Report RejectNegative Mask(LITTLE_ENDIAN) MaskExists
+Produce little endian code
+
+mbig-endian
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Produce big endian code
+
+mbig
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Produce big endian code
+
+;; FIXME: This does nothing.  What should be done?
+mno-toc
+Target RejectNegative
+no description yet
+
+mtoc
+Target RejectNegative
+no description yet
+
+mprototype
+Target Var(target_prototype) Save
+Assume all variable arg functions are prototyped
+
+;; FIXME: Does nothing.
+mno-traceback
+Target RejectNegative
+no description yet
+
+meabi
+Target Report Mask(EABI)
+Use EABI
+
+mbit-word
+Target Report Var(TARGET_NO_BITFIELD_WORD) Save
+Allow bit-fields to cross word boundaries
+
+mregnames
+Target Var(rs6000_regnames) Save
+Use alternate register names
+
+;; This option does nothing and only exists because the compiler
+;; driver passes all -m* options through.
+msdata
+Target
+Use default method for sdata handling
+
+msim
+Target RejectNegative
+Link with libsim.a, libc.a and sim-crt0.o
+
+mads
+Target RejectNegative
+Link with libads.a, libc.a and crt0.o
+
+myellowknife
+Target RejectNegative
+Link with libyk.a, libc.a and crt0.o
+
+mmvme
+Target RejectNegative
+Link with libmvme.a, libc.a and crt0.o
+
+memb
+Target RejectNegative
+Set the PPC_EMB bit in the ELF flags header
+
+mshlib
+Target RejectNegative
+no description yet
+
+m64
+Target Report RejectNegative Negative(m32) Mask(64BIT)
+Generate 64-bit code
+
+m32
+Target Report RejectNegative Negative(m64) InverseMask(64BIT)
+Generate 32-bit code
+
+mnewlib
+Target RejectNegative
+no description yet
+
+msecure-plt
+Target Report RejectNegative Var(secure_plt, 1) Save
+Generate code to use a non-exec PLT and GOT
+
+mbss-plt
+Target Report RejectNegative Var(secure_plt, 0) Save
+Generate code for old exec BSS PLT
diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h
new file mode 100644
index 000000000..155977766
--- /dev/null
+++ b/gcc/config/rs6000/sysv4le.h
@@ -0,0 +1,36 @@
+/* Target definitions for GCC for a little endian PowerPC
+   running System V.4
+   Copyright (C) 1995, 2000, 2003, 2007 Free Software Foundation, Inc.
+   Contributed by Cygnus Support.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_LITTLE_ENDIAN)
+
+#undef	CC1_ENDIAN_DEFAULT_SPEC
+#define	CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_little)"
+
+#undef	LINK_TARGET_SPEC
+#define	LINK_TARGET_SPEC "\
+%{mbig: --oformat elf32-powerpc } %{mbig-endian: --oformat elf32-powerpc } \
+%{!mlittle: %{!mlittle-endian: %{!mbig: %{!mbig-endian: \
+    %{mcall-linux: --oformat elf32-powerpc} \
+  }}}}"
+
+#undef	MULTILIB_DEFAULTS
+#define	MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
diff --git a/gcc/config/rs6000/t-aix43 b/gcc/config/rs6000/t-aix43
new file mode 100644
index 000000000..3cac73bd9
--- /dev/null
+++ b/gcc/config/rs6000/t-aix43
@@ -0,0 +1,95 @@
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+# 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Build the libraries for pthread and all of the 
+# different processor models
+
+MULTILIB_OPTIONS	= pthread \
+			  mcpu=common/mcpu=power/mcpu=powerpc/maix64
+
+MULTILIB_DIRNAMES	= pthread \
+			  common power powerpc ppc64
+
+MULTILIB_MATCHES	= mcpu?power=mcpu?power \
+			  mcpu?power=mcpu?power2 \
+			  mcpu?powerpc=mcpu?power3 \
+			  mcpu?powerpc=mcpu?power4 \
+			  mcpu?powerpc=mcpu?powerpc \
+			  mcpu?power=mcpu?rios1 \
+			  mcpu?power=mcpu?rios2 \
+			  mcpu?power=mcpu?rsc \
+			  mcpu?power=mcpu?rsc1 \
+			  mcpu?powerpc=mcpu?rs64a \
+			  mcpu?powerpc=mcpu?601 \
+			  mcpu?powerpc=mcpu?602 \
+			  mcpu?powerpc=mcpu?603 \
+			  mcpu?powerpc=mcpu?603e \
+			  mcpu?powerpc=mcpu?604 \
+			  mcpu?powerpc=mcpu?604e \
+			  mcpu?powerpc=mcpu?620 \
+			  mcpu?powerpc=mcpu?630
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# Build a shared libgcc library.
+SHLIB_EXT = .a
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,-bE:@shlib_map_file@ -o @multilib_dir@/shr.o \
+	@multilib_flags@ @shlib_objs@ -lc \
+	`case @multilib_dir@ in \
+	*pthread*) echo -L/usr/lib/threads -lpthreads -lc_r /usr/lib/libc.a ;; \
+	*) echo -lc ;; esac` ; \
+	rm -f @multilib_dir@/tmp-@shlib_base_name@.a ; \
+	$(AR_CREATE_FOR_TARGET) @multilib_dir@/tmp-@shlib_base_name@.a \
+	@multilib_dir@/shr.o ; \
+	mv @multilib_dir@/tmp-@shlib_base_name@.a \
+	   @multilib_dir@/@shlib_base_name@.a ; \
+	rm -f @multilib_dir@/shr.o
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@; \
+	$(INSTALL_DATA) @multilib_dir@/@shlib_base_name@.a \
+		$$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@/
+SHLIB_LIBS = -lc `case @multilib_dir@ in *pthread*) echo -lpthread ;; esac`
+SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver $(srcdir)/config/rs6000/libgcc-ppc64.ver
+SHLIB_NM_FLAGS = -Bpg -X32_64
+
+# GCC 128-bit long double support routines.
+LIB2FUNCS_EXTRA =  $(srcdir)/config/rs6000/ppc64-fp.c \
+	$(srcdir)/config/rs6000/darwin-ldouble.c
+TARGET_LIBGCC2_CFLAGS = -mlong-double-128
+
+# Either 32-bit and 64-bit objects in archives.
+AR_FLAGS_FOR_TARGET = -X32_64
+
diff --git a/gcc/config/rs6000/t-aix52 b/gcc/config/rs6000/t-aix52
new file mode 100644
index 000000000..0b4540278
--- /dev/null
+++ b/gcc/config/rs6000/t-aix52
@@ -0,0 +1,75 @@
+# Copyright (C) 2002, 2003, 2004, 2005, 2006,
+# 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Build the libraries for pthread and all of the 
+# different processor models
+
+MULTILIB_OPTIONS	= pthread maix64
+
+MULTILIB_DIRNAMES	= pthread ppc64
+
+MULTILIB_MATCHES	= 
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# Build a shared libgcc library.
+SHLIB_EXT = .a
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,-bE:@shlib_map_file@ -o @multilib_dir@/shr.o \
+	@multilib_flags@ @shlib_objs@ -lc \
+	`case @multilib_dir@ in \
+	*pthread*) echo -L$(TARGET_SYSTEM_ROOT)/usr/lib/threads -lpthreads -lc_r $(TARGET_SYSTEM_ROOT)/usr/lib/libc.a ;; \
+	*) echo -lc ;; esac` ; \
+	rm -f @multilib_dir@/tmp-@shlib_base_name@.a ; \
+	$(AR_CREATE_FOR_TARGET) @multilib_dir@/tmp-@shlib_base_name@.a \
+	@multilib_dir@/shr.o ; \
+	mv @multilib_dir@/tmp-@shlib_base_name@.a \
+	   @multilib_dir@/@shlib_base_name@.a ; \
+	rm -f @multilib_dir@/shr.o
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@; \
+	$(INSTALL_DATA) @multilib_dir@/@shlib_base_name@.a \
+		$$(DESTDIR)$$(slibdir)@shlib_slibdir_qual@/
+SHLIB_LIBS = -lc `case @multilib_dir@ in *pthread*) echo -lpthread ;; esac`
+SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver $(srcdir)/config/rs6000/libgcc-ppc64.ver
+SHLIB_NM_FLAGS = -Bpg -X32_64
+
+# GCC 128-bit long double support routines.
+LIB2FUNCS_EXTRA = $(srcdir)/config/rs6000/ppc64-fp.c \
+	$(srcdir)/config/rs6000/darwin-ldouble.c
+TARGET_LIBGCC2_CFLAGS = -mlong-double-128
+
+# Either 32-bit and 64-bit objects in archives.
+AR_FLAGS_FOR_TARGET = -X32_64
diff --git a/gcc/config/rs6000/t-darwin b/gcc/config/rs6000/t-darwin
new file mode 100644
index 000000000..11cdcb3dc
--- /dev/null
+++ b/gcc/config/rs6000/t-darwin
@@ -0,0 +1,55 @@
+# Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006,
+# 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/rs6000/darwin-tramp.asm \
+	$(srcdir)/config/rs6000/ppc64-fp.c \
+	$(srcdir)/config/darwin-64.c \
+	$(srcdir)/config/rs6000/darwin-ldouble.c \
+	$(srcdir)/config/rs6000/darwin-world.asm
+
+LIB2FUNCS_STATIC_EXTRA = \
+	$(srcdir)/config/rs6000/darwin-fpsave.asm  \
+	$(srcdir)/config/rs6000/darwin-vecsave.asm
+
+DARWIN_EXTRA_CRT_BUILD_CFLAGS = -mlongcall -mmacosx-version-min=10.4
+
+# The .asm files above are designed to run on all processors,
+# even though they use AltiVec instructions.  -Wa is used because
+# -force_cpusubtype_ALL doesn't work with -dynamiclib.
+#
+# -pipe because there's an assembler bug, 4077127, which causes
+# it to not properly process the first # directive, causing temporary
+# file names to appear in stabs, causing the bootstrap to fail.  Using -pipe
+# works around this by not having any temporary file names.
+TARGET_LIBGCC2_CFLAGS = -Wa,-force_cpusubtype_ALL -pipe -mmacosx-version-min=10.4
+
+# Export the _xlq* symbols from darwin-ldouble.c.
+SHLIB_MAPFILES += $(srcdir)/config/rs6000/libgcc-ppc64.ver
+
+LIB2ADDEH += $(srcdir)/config/rs6000/darwin-fallback.c
+
+darwin-fpsave.o:	$(srcdir)/config/rs6000/darwin-asm.h
+darwin-tramp.o:		$(srcdir)/config/rs6000/darwin-asm.h
+
+# Explain how to build crt2.o
+$(T)crt2$(objext): $(srcdir)/config/darwin-crt2.c $(GCC_PASSES) \
+	$(TCONFIG_H) stmp-int-hdrs tsystem.h
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \
+	  $(DARWIN_EXTRA_CRT_BUILD_CFLAGS) \
+	  -c $(srcdir)/config/darwin-crt2.c -o $(T)crt2$(objext)
diff --git a/gcc/config/rs6000/t-darwin64 b/gcc/config/rs6000/t-darwin64
new file mode 100644
index 000000000..2a24d7f28
--- /dev/null
+++ b/gcc/config/rs6000/t-darwin64
@@ -0,0 +1,12 @@
+LIB2_SIDITI_CONV_FUNCS=yes
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/rs6000/darwin-tramp.asm \
+	$(srcdir)/config/darwin-64.c \
+	$(srcdir)/config/rs6000/darwin-ldouble.c \
+	$(srcdir)/config/rs6000/darwin-world.asm
+
+MULTILIB_OPTIONS = m32
+MULTILIB_DIRNAMES = ppc
+
+#LIBGCC = stmp-multilib
+#INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/rs6000/t-darwin8 b/gcc/config/rs6000/t-darwin8
new file mode 100644
index 000000000..2f3bb32f8
--- /dev/null
+++ b/gcc/config/rs6000/t-darwin8
@@ -0,0 +1,3 @@
+# 64-bit libraries can only be built in Darwin 8.x or later.
+MULTILIB_OPTIONS = m64
+MULTILIB_DIRNAMES = ppc64
diff --git a/gcc/config/rs6000/t-fprules b/gcc/config/rs6000/t-fprules
new file mode 100644
index 000000000..51ca4cd3d
--- /dev/null
+++ b/gcc/config/rs6000/t-fprules
@@ -0,0 +1,29 @@
+# Copyright (C) 2002, 2005, 2006, 2008, 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+SOFT_FLOAT_CPUS = e300c2 401 403 405 440 464 476 ec603e 801 821 823 860
+MULTILIB_MATCHES_FLOAT = $(foreach cpu, $(SOFT_FLOAT_CPUS), msoft-float=mcpu?$(cpu))
+
+# Build the libraries for both hard and soft floating point by default
+
+MULTILIB_OPTIONS = msoft-float
+MULTILIB_DIRNAMES = soft-float
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/rs6000/t-fprules-fpbit b/gcc/config/rs6000/t-fprules-fpbit
new file mode 100644
index 000000000..a6c7246fa
--- /dev/null
+++ b/gcc/config/rs6000/t-fprules-fpbit
@@ -0,0 +1,29 @@
+# Copyright (C) 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
diff --git a/gcc/config/rs6000/t-fprules-softfp b/gcc/config/rs6000/t-fprules-softfp
new file mode 100644
index 000000000..10b271f03
--- /dev/null
+++ b/gcc/config/rs6000/t-fprules-softfp
@@ -0,0 +1,6 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := rs6000/sfp-machine.h
+softfp_exclude_libgcc2 := y
diff --git a/gcc/config/rs6000/t-freebsd b/gcc/config/rs6000/t-freebsd
new file mode 100644
index 000000000..038e60833
--- /dev/null
+++ b/gcc/config/rs6000/t-freebsd
@@ -0,0 +1,24 @@
+# Overrides for FreeBSD PowerPC 
+#
+# Copyright (C) 2011 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We do not want to build darwin-ldouble.c, so set the LIB2FUNCS_EXTRA again. 
+# Invoke this file after rs6000/t-ppccomm.
+
+LIB2FUNCS_EXTRA = tramp.S
diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux
new file mode 100644
index 000000000..017a293cd
--- /dev/null
+++ b/gcc/config/rs6000/t-linux
@@ -0,0 +1,9 @@
+# do not define the multiarch name if configured for a soft-float cpu
+# or soft-float.
+ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
+ifneq (,$(findstring spe,$(target)))
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1)
+else
+MULTIARCH_DIRNAME = powerpc-linux-gnu
+endif
+endif
diff --git a/gcc/config/rs6000/t-linux64 b/gcc/config/rs6000/t-linux64
new file mode 100644
index 000000000..1596c0f74
--- /dev/null
+++ b/gcc/config/rs6000/t-linux64
@@ -0,0 +1,45 @@
+#rs6000/t-linux64
+
+# Copyright (C) 2002, 2003, 2004, 2006, 2007,
+# 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_EXTRA += $(srcdir)/config/rs6000/ppc64-fp.c
+LIB2FUNCS_EXTRA := $(sort $(LIB2FUNCS_EXTRA))
+
+TARGET_LIBGCC2_CFLAGS += -mno-minimal-toc
+
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS        = m64/m32 msoft-float
+MULTILIB_DIRNAMES       = 64 32 nof
+MULTILIB_EXTRA_OPTS     = fPIC mstrict-align
+MULTILIB_EXCEPTIONS     = m64/msoft-float
+MULTILIB_EXCLUSIONS     = m64/!m32/msoft-float
+MULTILIB_OSDIRNAMES	= ../lib64$(call if_multiarch,:powerpc64-linux-gnu)
+MULTILIB_OSDIRNAMES    += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
+MULTILIB_OSDIRNAMES    += nof
+MULTILIB_MATCHES        = $(MULTILIB_MATCHES_FLOAT)
+
+softfp_wrap_start := '\#ifndef __powerpc64__'
+softfp_wrap_end := '\#endif'
diff --git a/gcc/config/rs6000/t-lynx b/gcc/config/rs6000/t-lynx
new file mode 100644
index 000000000..4befd5e7d
--- /dev/null
+++ b/gcc/config/rs6000/t-lynx
@@ -0,0 +1,56 @@
+# Copyright (C) 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_EXTRA = tramp.S
+
+tramp.S: $(srcdir)/config/rs6000/tramp.asm
+	cat $(srcdir)/config/rs6000/tramp.asm > tramp.S
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+MULTILIB_OPTIONS    += msoft-float
+MULTILIB_DIRNAMES   += soft-float
+
+MULTILIB_OPTIONS    += maltivec
+MULTILIB_DIRNAMES   += altivec
+
+MULTILIB_EXCEPTIONS = *msoft-float/*maltivec*
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o 
+
+# If .sdata is enabled __CTOR_{LIST,END}__ go into .sdata instead of
+# .ctors.
+CRTSTUFF_T_CFLAGS = -mno-sdata 
+ 
+# Compile crtbeginS.o and crtendS.o with pic. 
+CRTSTUFF_T_CFLAGS_S = -fPIC -mno-sdata 
+
+Local Variables:
+mode: makefile
+End:
diff --git a/gcc/config/rs6000/t-netbsd b/gcc/config/rs6000/t-netbsd
new file mode 100644
index 000000000..bad21beaa
--- /dev/null
+++ b/gcc/config/rs6000/t-netbsd
@@ -0,0 +1,90 @@
+# Support for NetBSD PowerPC ELF targets (SVR4 ABI).
+#
+# Copyright (C) 2002, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_EXTRA = tramp.S
+
+LIB2FUNCS_STATIC_EXTRA = crtsavfpr.S crtresfpr.S \
+  crtsavgpr.S crtresgpr.S \
+  crtresxfpr.S crtresxgpr.S
+
+tramp.S: $(srcdir)/config/rs6000/tramp.asm
+	cat $(srcdir)/config/rs6000/tramp.asm > tramp.S
+
+crtsavfpr.S: $(srcdir)/config/rs6000/crtsavfpr.asm
+	cat $(srcdir)/config/rs6000/crtsavfpr.asm >crtsavfpr.S
+
+crtresfpr.S: $(srcdir)/config/rs6000/crtresfpr.asm
+	cat $(srcdir)/config/rs6000/crtresfpr.asm >crtresfpr.S
+
+crtsavgpr.S: $(srcdir)/config/rs6000/crtsavgpr.asm
+	cat $(srcdir)/config/rs6000/crtsavgpr.asm >crtsavgpr.S
+
+crtresgpr.S: $(srcdir)/config/rs6000/crtresgpr.asm
+	cat $(srcdir)/config/rs6000/crtresgpr.asm >crtresgpr.S
+
+crtresxfpr.S: $(srcdir)/config/rs6000/crtresxfpr.asm
+	cat $(srcdir)/config/rs6000/crtresxfpr.asm >crtresxfpr.S
+
+crtresxgpr.S: $(srcdir)/config/rs6000/crtresxgpr.asm
+	cat $(srcdir)/config/rs6000/crtresxgpr.asm >crtresxgpr.S
+
+# It is important that crtbegin.o, etc., aren't surprised by stuff in .sdata.
+CRTSTUFF_T_CFLAGS += -msdata=none
+CRTSTUFF_T_CFLAGS_S += -msdata=none
+
+# Switch synonyms
+MULTILIB_MATCHES_FLOAT	= msoft-float=mcpu?401 \
+			  msoft-float=mcpu?403 \
+			  msoft-float=mcpu?405 \
+			  msoft-float=mcpu?ec603e \
+			  msoft-float=mcpu?801 \
+			  msoft-float=mcpu?821 \
+			  msoft-float=mcpu?823 \
+			  msoft-float=mcpu?860
+
+MULTILIB_OPTIONS	= msoft-float
+MULTILIB_DIRNAMES	= soft-float
+MULTILIB_EXTRA_OPTS	= fPIC mstrict-align
+MULTILIB_EXCEPTIONS	=
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+EXTRA_MULTILIB_PARTS = crtbegin$(objext) crtend$(objext) \
+  crtbeginS$(objext) crtendS$(objext) crtbeginT$(objext)
+
+$(T)crtsavfpr$(objext): crtsavfpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtsavfpr.S -o $(T)crtsavfpr$(objext)
+
+$(T)crtresfpr$(objext): crtresfpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresfpr.S -o $(T)crtresfpr$(objext)
+
+$(T)crtsavgpr$(objext): crtsavgpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtsavgpr.S -o $(T)crtsavgpr$(objext)
+
+$(T)crtresgpr$(objext): crtresgpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresgpr.S -o $(T)crtresgpr$(objext)
+
+$(T)crtresxfpr$(objext): crtresxfpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresxfpr.S -o $(T)crtresxfpr$(objext)
+
+$(T)crtresxgpr$(objext): crtresxgpr.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c crtresxgpr.S -o $(T)crtresxgpr$(objext)
diff --git a/gcc/config/rs6000/t-ppccomm b/gcc/config/rs6000/t-ppccomm
new file mode 100644
index 000000000..d91801eaa
--- /dev/null
+++ b/gcc/config/rs6000/t-ppccomm
@@ -0,0 +1,75 @@
+# Common support for PowerPC ELF targets (both EABI and SVR4).
+#
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2006, 2007,
+# 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB2FUNCS_EXTRA += tramp.S $(srcdir)/config/rs6000/darwin-ldouble.c
+
+# These can't end up in shared libgcc
+LIB2FUNCS_STATIC_EXTRA = eabi.S
+
+eabi.S: $(srcdir)/config/rs6000/eabi.asm
+	cat $(srcdir)/config/rs6000/eabi.asm > eabi.S
+
+tramp.S: $(srcdir)/config/rs6000/tramp.asm
+	cat $(srcdir)/config/rs6000/tramp.asm > tramp.S
+
+# Switch synonyms
+MULTILIB_MATCHES_ENDIAN	= mlittle=mlittle-endian mbig=mbig-endian
+MULTILIB_MATCHES_SYSV	= mcall-sysv=mcall-sysv-eabi mcall-sysv=mcall-sysv-noeabi mcall-sysv=mcall-linux mcall-sysv=mcall-netbsd
+
+EXTRA_MULTILIB_PARTS = crtbegin$(objext) crtend$(objext) \
+  crtbeginS$(objext) crtendS$(objext) crtbeginT$(objext) \
+  ecrti$(objext) ecrtn$(objext) \
+  ncrti$(objext) ncrtn$(objext)
+
+# We build {e,n}crti.o and {e,n}crtn.o, which serve to add begin and
+# end labels to all of the special sections used when we link using gcc.
+
+# Assemble startup files.
+ecrti.S: $(srcdir)/config/rs6000/eabi-ci.asm
+	cat $(srcdir)/config/rs6000/eabi-ci.asm >ecrti.S
+
+ecrtn.S: $(srcdir)/config/rs6000/eabi-cn.asm
+	cat $(srcdir)/config/rs6000/eabi-cn.asm >ecrtn.S
+
+ncrti.S: $(srcdir)/config/rs6000/sol-ci.asm
+	cat $(srcdir)/config/rs6000/sol-ci.asm >ncrti.S
+
+ncrtn.S: $(srcdir)/config/rs6000/sol-cn.asm
+	cat $(srcdir)/config/rs6000/sol-cn.asm >ncrtn.S
+
+# Build multiple copies of ?crt{i,n}.o, one for each target switch.
+$(T)ecrti$(objext): ecrti.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c ecrti.S -o $(T)ecrti$(objext)
+
+$(T)ecrtn$(objext): ecrtn.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c ecrtn.S -o $(T)ecrtn$(objext)
+
+$(T)ncrti$(objext): ncrti.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c ncrti.S -o $(T)ncrti$(objext)
+
+$(T)ncrtn$(objext): ncrtn.S
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -c ncrtn.S -o $(T)ncrtn$(objext)
+
+# It is important that crtbegin.o, etc., aren't surprised by stuff in .sdata.
+CRTSTUFF_T_CFLAGS = -msdata=none
+# Make sure crt*.o are built with -fPIC even if configured with 
+# --enable-shared --disable-multilib
+CRTSTUFF_T_CFLAGS_S = -fPIC -msdata=none
diff --git a/gcc/config/rs6000/t-ppcendian b/gcc/config/rs6000/t-ppcendian
new file mode 100644
index 000000000..093ee411a
--- /dev/null
+++ b/gcc/config/rs6000/t-ppcendian
@@ -0,0 +1,30 @@
+# Multilibs for powerpc embedded ELF targets with altivec.
+#
+# Copyright (C) 2002 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS	= msoft-float \
+			  mlittle/mbig
+
+MULTILIB_DIRNAMES	= nof \
+			  le be
+
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT} \
+			  ${MULTILIB_MATCHES_ENDIAN} \
+			  ${MULTILIB_MATCHES_SYSV}
diff --git a/gcc/config/rs6000/t-ppcgas b/gcc/config/rs6000/t-ppcgas
new file mode 100644
index 000000000..264c11218
--- /dev/null
+++ b/gcc/config/rs6000/t-ppcgas
@@ -0,0 +1,33 @@
+# Multilibs for powerpc embedded ELF targets.
+#
+# Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000,
+# 2003 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS	= msoft-float \
+			  mlittle/mbig \
+			  fleading-underscore
+
+MULTILIB_DIRNAMES	= nof \
+			  le be \
+			  und
+
+MULTILIB_EXTRA_OPTS	= mrelocatable-lib mno-eabi mstrict-align
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT} \
+			  ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc/config/rs6000/t-ppcos b/gcc/config/rs6000/t-ppcos
new file mode 100644
index 000000000..819863bea
--- /dev/null
+++ b/gcc/config/rs6000/t-ppcos
@@ -0,0 +1,8 @@
+# Multilibs for a powerpc hosted ELF target (linux, SVR4)
+
+MULTILIB_OPTIONS	= msoft-float
+MULTILIB_DIRNAMES	= nof
+MULTILIB_EXTRA_OPTS	= fPIC mstrict-align
+MULTILIB_EXCEPTIONS	= 
+
+MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
new file mode 100644
index 000000000..925870ee1
--- /dev/null
+++ b/gcc/config/rs6000/t-rs6000
@@ -0,0 +1,71 @@
+# General rules that all rs6000/ targets must have.
+#
+# Copyright (C) 1995, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2008, 2009,
+# 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/rs6000/rs6000-builtin.def
+
+rs6000.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(REGS_H) hard-reg-set.h \
+  real.h insn-config.h conditions.h insn-attr.h flags.h $(RECOG_H) \
+  $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \
+  output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) toplev.h $(GGC_H) $(HASHTAB_H) \
+  $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h gt-rs6000.h \
+  cfglayout.h cfgloop.h
+
+rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c \
+    $(srcdir)/config/rs6000/rs6000-protos.h \
+    $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(CPPLIB_H) \
+    $(TM_P_H) $(C_PRAGMA_H) errors.h coretypes.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/rs6000/rs6000-c.c
+
+# The rs6000 backend doesn't cause warnings in these files.
+insn-conditions.o-warn =
+
+MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \
+	$(srcdir)/config/rs6000/rios2.md \
+	$(srcdir)/config/rs6000/rs64.md \
+	$(srcdir)/config/rs6000/mpc.md \
+	$(srcdir)/config/rs6000/40x.md \
+	$(srcdir)/config/rs6000/440.md \
+	$(srcdir)/config/rs6000/603.md \
+	$(srcdir)/config/rs6000/6xx.md \
+	$(srcdir)/config/rs6000/7xx.md \
+	$(srcdir)/config/rs6000/7450.md \
+	$(srcdir)/config/rs6000/8540.md \
+	$(srcdir)/config/rs6000/e300c2c3.md \
+	$(srcdir)/config/rs6000/e500mc.md \
+	$(srcdir)/config/rs6000/power4.md \
+	$(srcdir)/config/rs6000/power5.md \
+	$(srcdir)/config/rs6000/power6.md \
+	$(srcdir)/config/rs6000/power7.md \
+	$(srcdir)/config/rs6000/cell.md \
+	$(srcdir)/config/rs6000/xfpu.md \
+	$(srcdir)/config/rs6000/a2.md \
+	$(srcdir)/config/rs6000/predicates.md \
+	$(srcdir)/config/rs6000/constraints.md \
+	$(srcdir)/config/rs6000/darwin.md \
+	$(srcdir)/config/rs6000/sync.md \
+	$(srcdir)/config/rs6000/vector.md \
+	$(srcdir)/config/rs6000/vsx.md \
+	$(srcdir)/config/rs6000/altivec.md \
+	$(srcdir)/config/rs6000/spe.md \
+	$(srcdir)/config/rs6000/dfp.md \
+	$(srcdir)/config/rs6000/paired.md
diff --git a/gcc/config/rs6000/t-rtems b/gcc/config/rs6000/t-rtems
new file mode 100644
index 000000000..cad98c51c
--- /dev/null
+++ b/gcc/config/rs6000/t-rtems
@@ -0,0 +1,82 @@
+# Multilibs for powerpc RTEMS targets.
+#
+# Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS	= \
+mcpu=403/mcpu=505/mcpu=601/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400 \
+Dmpc8260 \
+msoft-float
+
+MULTILIB_DIRNAMES	= \
+m403 m505 m601 m603e m604 m860 m7400 \
+mpc8260 \
+nof
+
+# MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
+MULTILIB_MATCHES	=
+MULTILIB_MATCHES  	+= ${MULTILIB_MATCHES_ENDIAN}
+MULTILIB_MATCHES	+= ${MULTILIB_MATCHES_SYSV}
+# Map 405 to 403
+MULTILIB_MATCHES	+= mcpu?403=mcpu?405
+# Map 602, 603e, 603 to 603e
+MULTILIB_MATCHES	+= mcpu?603e=mcpu?602
+MULTILIB_MATCHES	+= mcpu?603e=mcpu?603
+# Map 801, 821, 823 to 860
+MULTILIB_MATCHES 	+= mcpu?860=mcpu?801
+MULTILIB_MATCHES 	+= mcpu?860=mcpu?821
+MULTILIB_MATCHES 	+= mcpu?860=mcpu?823
+# Map 7450 to 7400
+MULTILIB_MATCHES	+= mcpu?7400=mcpu?7450
+
+# Map 750 to .
+MULTILIB_MATCHES	+= mcpu?750=
+
+# Soft-float only, default implies msoft-float
+# NOTE: Must match with MULTILIB_MATCHES_FLOAT and MULTILIB_MATCHES
+MULTILIB_SOFTFLOAT_ONLY = \
+*mcpu=401/*msoft-float* \
+*mcpu=403/*msoft-float* \
+*mcpu=405/*msoft-float* \
+*mcpu=801/*msoft-float* \
+*mcpu=821/*msoft-float* \
+*mcpu=823/*msoft-float* \
+*mcpu=860/*msoft-float*
+
+# Hard-float only, take out msoft-float
+MULTILIB_HARDFLOAT_ONLY = \
+*mcpu=505/*msoft-float*
+
+MULTILIB_EXCEPTIONS =
+
+# Disallow -Dppc and -Dmpc without other options
+MULTILIB_EXCEPTIONS 	+= Dppc* Dmpc*
+
+MULTILIB_EXCEPTIONS	+= \
+${MULTILIB_SOFTFLOAT_ONLY} \
+${MULTILIB_HARDFLOAT_ONLY}
+
+# Special rules
+# Take out all variants we don't want
+MULTILIB_EXCEPTIONS += *mcpu=403/Dmpc*
+MULTILIB_EXCEPTIONS += *mcpu=505/Dmpc*
+MULTILIB_EXCEPTIONS += *mcpu=601/Dmpc*
+MULTILIB_EXCEPTIONS += *mcpu=604/Dmpc*
+MULTILIB_EXCEPTIONS += *mcpu=750/Dmpc*
+MULTILIB_EXCEPTIONS += *mcpu=860/Dmpc*
+MULTILIB_EXCEPTIONS += *mcpu=7400/Dmpc*
diff --git a/gcc/config/rs6000/t-spe b/gcc/config/rs6000/t-spe
new file mode 100644
index 000000000..7c0c31506
--- /dev/null
+++ b/gcc/config/rs6000/t-spe
@@ -0,0 +1,86 @@
+# Multilibs for e500
+#
+# Copyright (C) 2003 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# What we really want are these variants:
+#	-mcpu=7400
+#	-mcpu=7400 -maltivec -mabi=altivec
+#	-mcpu=7400 -msoft-float
+#	-msoft-float
+#	-mspe=no -mabi=no-spe -misel=no
+# so we'll need to create exceptions later below.
+
+MULTILIB_OPTIONS	= mcpu=7400 \
+			  maltivec \
+			  mabi=altivec \
+			  msoft-float \
+			  mspe=no \
+			  mabi=no-spe \
+			  misel=no \
+			  mlittle
+
+MULTILIB_DIRNAMES	= mpc7400 altivec abi-altivec \
+			  nof no-spe no-abi-spe no-isel le
+
+MULTILIB_EXCEPTIONS	= maltivec mabi=altivec mspe=no mabi=no-spe misel=no \
+			  maltivec/mabi=altivec \
+			  mcpu=7400/maltivec \
+			  mcpu=7400/mabi=altivec \
+			  *mcpu=7400/*mspe=no* \
+			  *mcpu=7400/*mabi=no-spe* \
+			  *mcpu=7400/*misel=no* \
+			  *maltivec/*msoft-float* \
+			  *maltivec/*mspe=no* \
+			  *maltivec/*mabi=no-spe* \
+			  *maltivec/*misel=no* \
+			  *mabi=altivec/*msoft-float* \
+			  *mabi=altivec/*mspe=no* \
+			  *mabi=altivec/*mabi=no-spe* \
+			  *mabi=altivec/*misel=no* \
+			  *msoft-float/*mspe=no* \
+			  *msoft-float/*mabi=no-spe* \
+			  *msoft-float/*misel=no* \
+			  mspe=no/mabi=no-spe \
+			  mspe=no/misel=no \
+			  mabi=no-spe/misel=no \
+			  misel=no/mlittle \
+			  mabi=no-spe/misel=no/mlittle \
+			  mspe=no/mlittle \
+			  mabi=spe/mlittle \
+			  mcpu=7400/mabi=altivec/mlittle \
+			  mcpu=7400/maltivec/mlittle \
+			  mabi=no-spe/mlittle \
+			  mspe=no/misel=no/mlittle \
+			  mspe=no/mabi=no-spe/mlittle \
+			  mabi=altivec/mlittle \
+			  maltivec/mlittle \
+			  maltivec/mabi=altivec/mlittle
diff --git a/gcc/config/rs6000/t-vxworks b/gcc/config/rs6000/t-vxworks
new file mode 100644
index 000000000..8a3d394ed
--- /dev/null
+++ b/gcc/config/rs6000/t-vxworks
@@ -0,0 +1,34 @@
+# Multilibs for VxWorks.
+#
+# Copyright (C) 2002, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The base multilib is -mhard-float.
+MULTILIB_OPTIONS = mrtp fPIC msoft-float
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC*
+
+# This is set from the common config/t-vxworks but clobbered by t-ppccomm
+# on this target.
+EXTRA_MULTILIB_PARTS = 
+
+# Similarily, LIB2FUNCS_EXTRA is set from config/t-vxworks and
+# t-ppccomm *adds* to it, but the common contents are useful to us.
+# In particular the base trampoline_setup bits are expected to be
+# provided there.
diff --git a/gcc/config/rs6000/t-vxworksae b/gcc/config/rs6000/t-vxworksae
new file mode 100644
index 000000000..5f682627e
--- /dev/null
+++ b/gcc/config/rs6000/t-vxworksae
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks AE.
+
+MULTILIB_OPTIONS = mvthreads msoft-float
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS = 
diff --git a/gcc/config/rs6000/t-xilinx b/gcc/config/rs6000/t-xilinx
new file mode 100644
index 000000000..11102f4d0
--- /dev/null
+++ b/gcc/config/rs6000/t-xilinx
@@ -0,0 +1,56 @@
+# Multilibs for Xilinx powerpc embedded ELF targets.
+#
+# Copyright (C) 2009 Free Software Foundation, Inc.
+# Contributed by Michael Eager, eager@eagercon.com
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Switch synonyms
+MULTILIB_MATCHES        = mfpu?sp_lite=msingle-float mfpu?dp_lite=mdouble-float mfpu?dp_lite=mhard-float mfpu?sp_lite=mfpu?sp_full mfpu?dp_lite=mfpu?dp_full 
+
+MULTILIB_OPTIONS        = mfpu=sp_lite/mfpu=dp_lite 
+
+MULTILIB_DIRNAMES       = single double 
+
+# Multilibs for Xilinx powerpc embedded ELF targets.
+#
+# Copyright (C) 2009 Free Software Foundation, Inc.
+# Contributed by Michael Eager, eager@eagercon.com
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Switch synonyms
+MULTILIB_MATCHES        = mfpu?sp_lite=msingle-float mfpu?dp_lite=mdouble-float mfpu?dp_lite=mhard-float mfpu?sp_lite=mfpu?sp_full mfpu?dp_lite=mfpu?dp_full 
+
+MULTILIB_OPTIONS        = mfpu=sp_lite/mfpu=dp_lite 
+
+MULTILIB_DIRNAMES       = single double 
+
diff --git a/gcc/config/rs6000/titan.md b/gcc/config/rs6000/titan.md
new file mode 100644
index 000000000..744d7770f
--- /dev/null
+++ b/gcc/config/rs6000/titan.md
@@ -0,0 +1,171 @@
+;; Pipeline description for the AppliedMicro Titan core.
+;;   Copyright (C) 2010 Free Software Foundation, Inc.
+;;   Contributed by Theobroma Systems Design und Consulting GmbH
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; AppliedMicro Titan core complex
+
+(automata_option "progress")
+
+(define_automaton "titan_core,titan_fpu,titan_fxu,titan_bpu,titan_lsu")
+(define_cpu_unit "titan_issue_0,titan_issue_1" "titan_core")
+
+;; Some useful abbreviations.
+(define_reservation "titan_issue" "titan_issue_0|titan_issue_1")
+
+;; === FXU scheduling ===
+
+(define_cpu_unit "titan_fxu_sh,titan_fxu_wb" "titan_fxu")
+
+;; The 1-cycle adder executes add, addi, subf, neg, compare and trap
+;; instructions. It provides its own, dedicated result-bus, so we
+;; don't need the titan_fxu_wb reservation to complete.
+(define_insn_reservation "titan_fxu_adder" 1
+  (and (eq_attr "type" "cmp,fast_compare,trap")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh")
+
+;; Keep the titan_imul and titan_mulhw (half-word) rules in order, to
+;; ensure the proper match: the half-word instructions are tagged as
+;; imul3 only, whereas regular multiplys will always carry a imul tag.
+
+(define_insn_reservation "titan_imul" 5
+  (and (eq_attr "type" "imul,imul2,imul_compare")
+       (eq_attr "cpu" "titan"))       
+  "titan_issue,titan_fxu_sh,nothing*5,titan_fxu_wb")  
+
+(define_insn_reservation "titan_mulhw" 4
+  (and (eq_attr "type" "imul3")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh,nothing*4,titan_fxu_wb")
+
+(define_bypass 2 "titan_mulhw" "titan_mulhw")
+
+(define_insn_reservation "titan_fxu_shift_and_rotate" 2
+  (and (eq_attr "type" "insert_word,shift,var_shift_rotate,cntlz")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh,nothing*2,titan_fxu_wb")
+
+;; We model the divider for the worst-case (i.e. a full 32-bit
+;; divide).  To model the bypass for byte-wise completion, a
+;; define_bypass with a guard-function could be used... however, this
+;; would be an optimization of doubtful value, as a large number of
+;; divides will operate on 32-bit variables.
+
+;; To avoid an unmanagably large automata (generating the automata
+;; would require well over 2GB in memory), we don't model the shared
+;; result bus on this one. The divider-pipeline is thus modeled
+;; through its latency and initial disptach bottlenecks (i.e. issue
+;; slots and fxu scheduler availability)
+(define_insn_reservation "titan_fxu_div" 34
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh")
+
+(define_insn_reservation "titan_fxu_alu" 1
+  (and (eq_attr "type" "integer,exts")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fxu_sh,nothing,titan_fxu_wb")
+
+;; === BPU scheduling ===
+
+(define_cpu_unit "titan_bpu_sh" "titan_bpu")
+
+(define_insn_reservation "titan_bpu" 2
+  (and (eq_attr "type" "branch,jmpreg,cr_logical,delayed_cr")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_bpu_sh")
+
+;; === LSU scheduling ===
+
+(define_cpu_unit "titan_lsu_sh" "titan_lsu")
+
+;; Loads.
+(define_insn_reservation "titan_lsu_load" 3
+  (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\
+			load_l,sync")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+(define_insn_reservation "titan_lsu_fpload" 12
+  (and (eq_attr "type" "fpload,fpload_ux,fpload_u")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+;; Note that the isync is not clearly placed within any execution
+;; unit. We've made the assumption that it will be running out of the
+;; LSU, as msync is also executed within the LSU.
+(define_insn_reservation "titan_lsu_sync" 20
+  (and (eq_attr "type" "sync")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh*20")
+
+;; Stores.
+(define_insn_reservation "titan_lsu_store" 12
+  (and (eq_attr "type" "store,store_ux,store_u,store_c")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+(define_insn_reservation "titan_lsu_fpstore" 12
+  (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_lsu_sh")
+
+;; === FPU scheduling ===
+
+;; In order to keep the automaton for the Titan FPU efficient and
+;; maintainable, we've kept in as concise as possible and created a
+;; mapping for the main "choke points" only instead of modelling the
+;; overall flow of instructions through the FP-pipeline(s).
+
+;; The key elements modelled are:
+;;  * each FP-instruction takes up one of the two issue slots 
+;;  * the FPU runs at half the core frequency
+;;  * divides are not pipelined (but execute in a separate unit)
+;;  * the FPU has a shared result bus for all its units
+
+(define_cpu_unit "titan_fp0,titan_fpdiv,titan_fpwb" "titan_fpu")
+
+(define_insn_reservation "titan_fp_div_double" 72
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fpdiv*72,titan_fpwb")
+
+(define_insn_reservation "titan_fp_div_single" 46
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fpdiv*46,titan_fpwb")
+
+(define_insn_reservation "titan_fp_single" 12
+  (and (eq_attr "fp_type" "fp_addsub_s,fp_mul_s,fp_maddsub_s")       
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fp0*2,nothing*10,titan_fpwb")
+
+;; Make sure the "titan_fp" rule stays last, as it's a catch all for
+;; double-precision and unclassified (e.g. fsel) FP-instructions
+(define_insn_reservation "titan_fp" 10
+  (and (eq_attr "type" "fpcompare,fp,dmul")
+       (eq_attr "cpu" "titan"))
+  "titan_issue,titan_fp0*2,nothing*8,titan_fpwb")
+
+;; Please note, that the non-pipelined FP-instructions "mcrfs",
+;; "mtfsb0[.]", "mtfsb1[.]", "mtfsf[.]", "mtfsfi[.]" are not
+;; accessible from regular language constructs (i.e. they are not used
+;; by the code generator, except for special purpose sequences defined
+;; in rs6000.md), no special provisions are made for these.
+
diff --git a/gcc/config/rs6000/tramp.asm b/gcc/config/rs6000/tramp.asm
new file mode 100644
index 000000000..133b98840
--- /dev/null
+++ b/gcc/config/rs6000/tramp.asm
@@ -0,0 +1,107 @@
+/*  Special support for trampolines
+ *
+ *   Copyright (C) 1996, 1997, 2000, 2007, 2008, 2009 Free Software Foundation, Inc.
+ *   Written By Michael Meissner
+ * 
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */ 
+
+/* Set up trampolines.  */
+
+	.section ".text"
+#include "ppc-asm.h"
+#include "config.h"
+
+#ifndef __powerpc64__
+	.type	trampoline_initial,@object
+	.align	2
+trampoline_initial:
+	mflr	r0
+	bcl	20,31,1f
+.Lfunc = .-trampoline_initial
+	.long	0			/* will be replaced with function address */
+.Lchain = .-trampoline_initial
+	.long	0			/* will be replaced with static chain */
+1:	mflr	r11
+	mtlr	r0
+	lwz	r0,0(r11)		/* function address */
+	lwz	r11,4(r11)		/* static chain */
+	mtctr	r0
+	bctr
+
+trampoline_size = .-trampoline_initial
+	.size	trampoline_initial,trampoline_size
+
+
+/* R3 = stack address to store trampoline */
+/* R4 = length of trampoline area */
+/* R5 = function address */
+/* R6 = static chain */
+
+FUNC_START(__trampoline_setup)
+	mflr	r0		/* save return address */
+        bcl	20,31,.LCF0	/* load up __trampoline_initial into r7 */
+.LCF0:
+        mflr	r11
+        addi	r7,r11,trampoline_initial-4-.LCF0 /* trampoline address -4 */
+
+	li	r8,trampoline_size	/* verify that the trampoline is big enough */
+	cmpw	cr1,r8,r4
+	srwi	r4,r4,2		/* # words to move */
+	addi	r9,r3,-4	/* adjust pointer for lwzu */
+	mtctr	r4
+	blt	cr1,.Labort
+
+	mtlr	r0
+
+	/* Copy the instructions to the stack */
+.Lmove:
+	lwzu	r10,4(r7)
+	stwu	r10,4(r9)
+	bdnz	.Lmove
+
+	/* Store correct function and static chain */
+	stw	r5,.Lfunc(r3)
+	stw	r6,.Lchain(r3)
+
+	/* Now flush both caches */
+	mtctr	r4
+.Lcache:
+	icbi	0,r3
+	dcbf	0,r3
+	addi	r3,r3,4
+	bdnz	.Lcache
+
+	/* Finally synchronize things & return */
+	sync
+	isync
+	blr
+
+.Labort:
+#if (defined __PIC__ || defined __pic__) && defined HAVE_AS_REL16
+	bcl	20,31,1f
+1:	mflr	r30
+	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-1b@ha
+	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-1b@l
+#endif
+	bl	JUMP_TARGET(abort)
+FUNC_END(__trampoline_setup)
+
+#endif
diff --git a/gcc/config/rs6000/vec_types.h b/gcc/config/rs6000/vec_types.h
new file mode 100644
index 000000000..dca637d7c
--- /dev/null
+++ b/gcc/config/rs6000/vec_types.h
@@ -0,0 +1,52 @@
+/* Cell single token vector types
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Single token vector data types for the PowerPC SIMD/Vector Multi-media 
+   eXtension */
+
+#ifndef _VEC_TYPES_H_
+#define _VEC_TYPES_H_	1
+
+#define qword		__vector unsigned char
+
+#define vec_uchar16	__vector unsigned char
+#define vec_char16	__vector signed char
+#define vec_bchar16	__vector bool char
+
+#define vec_ushort8	__vector unsigned short
+#define vec_short8	__vector signed short
+#define vec_bshort8	__vector bool short
+
+#define vec_pixel8	__vector pixel
+
+#define vec_uint4	__vector unsigned int
+#define vec_int4	__vector signed int
+#define vec_bint4	__vector bool int
+
+#define vec_float4	__vector float
+
+#define vec_ullong2	__vector bool char
+#define vec_llong2	__vector bool short
+
+#define vec_double2	__vector bool int
+
+#endif /* _VEC_TYPES_H_ */
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
new file mode 100644
index 000000000..6474cb6eb
--- /dev/null
+++ b/gcc/config/rs6000/vector.md
@@ -0,0 +1,1175 @@
+;; Expander definitions for vector support between altivec & vsx.  No
+;; instructions are in this file, this file provides the generic vector
+;; expander, and the actual vector instructions will be in altivec.md and
+;; vsx.md
+
+;; Copyright (C) 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; Vector int modes
+(define_mode_iterator VEC_I [V16QI V8HI V4SI])
+
+;; Vector float modes
+(define_mode_iterator VEC_F [V4SF V2DF])
+
+;; Vector arithmetic modes
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
+
+;; Vector modes that need alginment via permutes
+(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
+
+;; Vector logical modes
+(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+
+;; Vector modes for moves.  Don't do TImode here.
+(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Vector modes for types that don't need a realignment under VSX
+(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
+
+;; Vector comparison modes
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
+
+;; Vector init/extract modes
+(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Vector modes for 64-bit base types
+(define_mode_iterator VEC_64 [V2DI V2DF])
+
+;; Vector reload iterator
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
+
+;; Base type from vector mode
+(define_mode_attr VEC_base [(V16QI "QI")
+			    (V8HI  "HI")
+			    (V4SI  "SI")
+			    (V2DI  "DI")
+			    (V4SF  "SF")
+			    (V2DF  "DF")
+			    (TI    "TI")])
+
+;; Same size integer type for floating point data
+(define_mode_attr VEC_int [(V4SF  "v4si")
+			   (V2DF  "v2di")])
+
+(define_mode_attr VEC_INT [(V4SF  "V4SI")
+			   (V2DF  "V2DI")])
+
+;; constants for unspec
+(define_constants
+  [(UNSPEC_PREDICATE	400)])
+
+
+;; Vector move instructions.
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
+	(match_operand:VEC_M 1 "any_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  if (can_create_pseudo_p ())
+    {
+      if (CONSTANT_P (operands[1])
+	  && !easy_vector_constant (operands[1], <MODE>mode))
+	operands[1] = force_const_mem (<MODE>mode, operands[1]);
+
+      else if (!vlogical_operand (operands[0], <MODE>mode)
+	       && !vlogical_operand (operands[1], <MODE>mode))
+	operands[1] = force_reg (<MODE>mode, operands[1]);
+    }
+})
+
+;; Generic vector floating point load/store instructions.  These will match
+;; insns defined in vsx.md or altivec.md depending on the switches.
+(define_expand "vector_load_<mode>"
+  [(set (match_operand:VEC_M 0 "vfloat_operand" "")
+	(match_operand:VEC_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vfloat_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Splits if a GPR register was chosen for the move
+(define_split
+  [(set (match_operand:VEC_L 0 "nonimmediate_operand" "")
+        (match_operand:VEC_L 1 "input_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
+   && reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(pc)]
+{
+  rs6000_split_multireg_move (operands[0], operands[1]);
+  DONE;
+})
+
+;; Vector floating point load/store instructions that uses the Altivec
+;; instructions even if we are compiling for VSX, since the Altivec
+;; instructions silently ignore the bottom 3 bits of the address, and VSX does
+;; not.
+(define_expand "vector_altivec_load_<mode>"
+  [(set (match_operand:VEC_M 0 "vfloat_operand" "")
+	(match_operand:VEC_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+  if (VECTOR_MEM_VSX_P (<MODE>mode))
+    {
+      operands[1] = rs6000_address_for_altivec (operands[1]);
+      emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "vector_altivec_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vfloat_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode));
+
+  if (VECTOR_MEM_VSX_P (<MODE>mode))
+    {
+      operands[0] = rs6000_address_for_altivec (operands[0]);
+      emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+
+
+;; Reload patterns for vector operations.  We may need an addtional base
+;; register to convert the reg+offset addressing to reg+reg for vector
+;; registers and reg+reg or (reg+reg)&(-16) addressing to just an index
+;; register for gpr registers.
+(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_store"
+  [(parallel [(match_operand:VEC_R 0 "memory_operand" "m")
+              (match_operand:VEC_R 1 "gpc_reg_operand" "r")
+              (match_operand:P 2 "register_operand" "=&b")])]
+  "<P:tptrsize>"
+{
+  rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true);
+  DONE;
+})
+
+(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_load"
+  [(parallel [(match_operand:VEC_R 0 "gpc_reg_operand" "=&r")
+              (match_operand:VEC_R 1 "memory_operand" "m")
+              (match_operand:P 2 "register_operand" "=&b")])]
+  "<P:tptrsize>"
+{
+  rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+;; Reload sometimes tries to move the address to a GPR, and can generate
+;; invalid RTL for addresses involving AND -16.  Allow addresses involving
+;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16.
+
+(define_insn_and_split "*vec_reload_and_plus_<mptrsize>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
+	(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_cint_operand" "rI"))
+	       (const_int -16)))]
+  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(plus:P (match_dup 1)
+		(match_dup 2)))
+   (parallel [(set (match_dup 0)
+		   (and:P (match_dup 0)
+			  (const_int -16)))
+	      (clobber:CC (scratch:CC))])])
+
+;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16
+;; address to a register because there is no clobber of a (scratch), so we add
+;; it here.
+(define_insn_and_split "*vec_reload_and_reg_<mptrsize>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
+	(and:P (match_operand:P 1 "gpc_reg_operand" "r")
+	       (const_int -16)))]
+  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (and:P (match_dup 1)
+			  (const_int -16)))
+	      (clobber:CC (scratch:CC))])])
+
+;; Generic floating point vector arithmetic support
+(define_expand "add<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(plus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		     (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_expand "div<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(div:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		   (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1]));
+      DONE;
+    }
+}")
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VEC_F 0 "register_operand" "")
+        (smin:VEC_F (match_operand:VEC_F 1 "register_operand" "")
+		    (match_operand:VEC_F 2 "register_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VEC_F 0 "register_operand" "")
+        (smax:VEC_F (match_operand:VEC_F 1 "register_operand" "")
+		    (match_operand:VEC_F 2 "register_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(sqrt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "rsqrte<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+        (unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+		      UNSPEC_RSQRT))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "re<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "f")]
+		      UNSPEC_FRES))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "ftrunc<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+  	(fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_ceil<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+		      UNSPEC_FRIP))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_floor<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")]
+		      UNSPEC_FRIM))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_btrunc<mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_copysign<mode>3"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unspec:VEC_F [(match_operand:VEC_F 1 "vfloat_operand" "")
+		       (match_operand:VEC_F 2 "vfloat_operand" "")] UNSPEC_COPYSIGN))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_copysign_v4sf3 (operands[0], operands[1],
+					     operands[2]));
+      DONE;
+    }
+}")
+
+
+;; Vector comparisons
+(define_expand "vcond<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(if_then_else:VEC_F
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:VEC_F 4 "vfloat_operand" "")
+			  (match_operand:VEC_F 5 "vfloat_operand" "")])
+	 (match_operand:VEC_F 1 "vfloat_operand" "")
+	 (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vcond<mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(if_then_else:VEC_I
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:VEC_I 4 "vint_operand" "")
+			  (match_operand:VEC_I 5 "vint_operand" "")])
+	 (match_operand:VEC_I 1 "vint_operand" "")
+	 (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vcondu<mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(if_then_else:VEC_I
+	 (match_operator 3 "comparison_operator"
+			 [(match_operand:VEC_I 4 "vint_operand" "")
+			  (match_operand:VEC_I 5 "vint_operand" "")])
+	 (match_operand:VEC_I 1 "vint_operand" "")
+	 (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "
+{
+  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+				    operands[3], operands[4], operands[5]))
+    DONE;
+  else
+    FAIL;
+}")
+
+(define_expand "vector_eq<mode>"
+  [(set (match_operand:VEC_C 0 "vlogical_operand" "")
+	(eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
+		  (match_operand:VEC_C 2 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gt<mode>"
+  [(set (match_operand:VEC_C 0 "vlogical_operand" "")
+	(gt:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
+		  (match_operand:VEC_C 2 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_ge<mode>"
+  [(set (match_operand:VEC_C 0 "vlogical_operand" "")
+	(ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "")
+		  (match_operand:VEC_C 2 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gtu<mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		   (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_geu<mode>"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		   (match_operand:VEC_I 2 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "")
+
+(define_insn_and_split "*vector_uneq<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(gt:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(gt:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(not:VEC_F (ior:VEC_F (match_dup 3)
+			      (match_dup 4))))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+(define_insn_and_split "*vector_ltgt<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(ltgt:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(gt:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(gt:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(ior:VEC_F (match_dup 3)
+		   (match_dup 4)))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+(define_insn_and_split "*vector_ordered<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(ordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		       (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(ge:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(ge:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(ior:VEC_F (match_dup 3)
+		   (match_dup 4)))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+(define_insn_and_split "*vector_unordered<mode>"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unordered:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+			 (match_operand:VEC_F 2 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "#"
+  ""
+  [(set (match_dup 3)
+	(ge:VEC_F (match_dup 1)
+		  (match_dup 2)))
+   (set (match_dup 4)
+	(ge:VEC_F (match_dup 2)
+		  (match_dup 1)))
+   (set (match_dup 0)
+	(not:VEC_F (ior:VEC_F (match_dup 3)
+			      (match_dup 4))))]
+  "
+{
+  operands[3] = gen_reg_rtx (<MODE>mode);
+  operands[4] = gen_reg_rtx (<MODE>mode);
+}")
+
+;; Note the arguments for __builtin_altivec_vsel are op2, op1, mask
+;; which is in the reverse order that we want
+(define_expand "vector_select_<mode>"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+	(if_then_else:VEC_L
+	 (ne:CC (match_operand:VEC_L 3 "vlogical_operand" "")
+		(match_dup 4))
+	 (match_operand:VEC_L 2 "vlogical_operand" "")
+	 (match_operand:VEC_L 1 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
+
+(define_expand "vector_select_<mode>_uns"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+	(if_then_else:VEC_L
+	 (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand" "")
+		   (match_dup 4))
+	 (match_operand:VEC_L 2 "vlogical_operand" "")
+	 (match_operand:VEC_L 1 "vlogical_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
+
+;; Expansions that compare vectors producing a vector result and a predicate,
+;; setting CR6 to indicate a combined status
+(define_expand "vector_eq_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(eq:CC (match_operand:VEC_A 1 "vlogical_operand" "")
+			     (match_operand:VEC_A 2 "vlogical_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_A 0 "vlogical_operand" "")
+	  (eq:VEC_A (match_dup 1)
+		    (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gt_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(gt:CC (match_operand:VEC_A 1 "vlogical_operand" "")
+			     (match_operand:VEC_A 2 "vlogical_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_A 0 "vlogical_operand" "")
+	  (gt:VEC_A (match_dup 1)
+		    (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_ge_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(ge:CC (match_operand:VEC_F 1 "vfloat_operand" "")
+			     (match_operand:VEC_F 2 "vfloat_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_F 0 "vfloat_operand" "")
+	  (ge:VEC_F (match_dup 1)
+		    (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vector_gtu_<mode>_p"
+  [(parallel
+    [(set (reg:CC 74)
+	  (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand" "")
+			      (match_operand:VEC_I 2 "vint_operand" ""))]
+		     UNSPEC_PREDICATE))
+     (set (match_operand:VEC_I 0 "vlogical_operand" "")
+	  (gtu:VEC_I (match_dup 1)
+		     (match_dup 2)))])]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; AltiVec/VSX predicates.
+
+(define_expand "cr6_test_for_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (reg:CC 74)
+	       (const_int 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+(define_expand "cr6_test_for_zero_reverse"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (reg:CC 74)
+	       (const_int 0)))
+   (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+(define_expand "cr6_test_for_lt"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lt:SI (reg:CC 74)
+	       (const_int 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+(define_expand "cr6_test_for_lt_reverse"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lt:SI (reg:CC 74)
+	       (const_int 0)))
+   (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))]
+  "TARGET_ALTIVEC || TARGET_VSX"
+  "")
+
+
+;; Vector logical instructions
+(define_expand "xor<mode>3"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+        (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+        (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "and<mode>3"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+        (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+        (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "nor<mode>3"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+        (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+			      (match_operand:VEC_L 2 "vlogical_operand" ""))))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "andc<mode>3"
+  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+        (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))
+		   (match_operand:VEC_L 1 "vlogical_operand" "")))]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+;; Same size conversions
+(define_expand "float<VEC_int><mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+(define_expand "unsigned_float<VEC_int><mode>2"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(unsigned_float:VEC_F (match_operand:<VEC_INT> 1 "vint_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+(define_expand "fix_trunc<mode><VEC_int>2"
+  [(set (match_operand:<VEC_INT> 0 "vint_operand" "")
+	(fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+(define_expand "fixuns_trunc<mode><VEC_int>2"
+  [(set (match_operand:<VEC_INT> 0 "vint_operand" "")
+	(unsigned_fix:<VEC_INT> (match_operand:VEC_F 1 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "
+{
+  if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
+    {
+      emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx));
+      DONE;
+    }
+}")
+
+
+;; Vector initialization, set, extract
+(define_expand "vec_init<mode>"
+  [(match_operand:VEC_E 0 "vlogical_operand" "")
+   (match_operand:VEC_E 1 "" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rs6000_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VEC_E 0 "vlogical_operand" "")
+   (match_operand:<VEC_base> 1 "register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<VEC_base> 0 "register_operand" "")
+   (match_operand:VEC_E 1 "vlogical_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  rs6000_expand_vector_extract (operands[0], operands[1],
+				INTVAL (operands[2]));
+  DONE;
+})
+
+;; Interleave patterns
+(define_expand "vec_interleave_highv4sf"
+  [(set (match_operand:V4SF 0 "vfloat_operand" "")
+        (vec_merge:V4SF
+	 (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (const_int 5)))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+  "")
+
+(define_expand "vec_interleave_lowv4sf"
+  [(set (match_operand:V4SF 0 "vfloat_operand" "")
+        (vec_merge:V4SF
+	 (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "")
+			  (parallel [(const_int 2)
+				     (const_int 0)
+				     (const_int 3)
+				     (const_int 1)]))
+	 (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "")
+			  (parallel [(const_int 0)
+				     (const_int 2)
+				     (const_int 1)
+				     (const_int 3)]))
+	 (const_int 5)))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+  "")
+
+(define_expand "vec_interleave_high<mode>"
+  [(set (match_operand:VEC_64 0 "vfloat_operand" "")
+	(vec_concat:VEC_64
+	 (vec_select:<VEC_base> (match_operand:VEC_64 1 "vfloat_operand" "")
+				(parallel [(const_int 0)]))
+	 (vec_select:<VEC_base> (match_operand:VEC_64 2 "vfloat_operand" "")
+				(parallel [(const_int 0)]))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vec_interleave_low<mode>"
+  [(set (match_operand:VEC_64 0 "vfloat_operand" "")
+	(vec_concat:VEC_64
+	 (vec_select:<VEC_base> (match_operand:VEC_64 1 "vfloat_operand" "")
+				(parallel [(const_int 1)]))
+	 (vec_select:<VEC_base> (match_operand:VEC_64 2 "vfloat_operand" "")
+				(parallel [(const_int 1)]))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+
+;; Convert double word types to single word types
+(define_expand "vec_pack_trunc_v2df"
+  [(match_operand:V4SF 0 "vfloat_operand" "")
+   (match_operand:V2DF 1 "vfloat_operand" "")
+   (match_operand:V2DF 2 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+  rtx r1 = gen_reg_rtx (V4SFmode);
+  rtx r2 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_vsx_xvcvdpsp (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvdpsp (r2, operands[2]));
+  emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2));
+  DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+  [(match_operand:V4SI 0 "vint_operand" "")
+   (match_operand:V2DF 1 "vfloat_operand" "")
+   (match_operand:V2DF 2 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+  rtx r1 = gen_reg_rtx (V4SImode);
+  rtx r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2]));
+  emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
+  DONE;
+})
+
+(define_expand "vec_pack_ufix_trunc_v2df"
+  [(match_operand:V4SI 0 "vint_operand" "")
+   (match_operand:V2DF 1 "vfloat_operand" "")
+   (match_operand:V2DF 2 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+  rtx r1 = gen_reg_rtx (V4SImode);
+  rtx r2 = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1]));
+  emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2]));
+  emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
+  DONE;
+})
+
+;; Convert single word types to double word
+(define_expand "vec_unpacks_hi_v4sf"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SF 1 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+  rtx reg = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1]));
+  emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SF 1 "vfloat_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+  rtx reg = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1]));
+  emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
+  emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
+  emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
+  emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+  DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+  [(match_operand:V2DF 0 "vfloat_operand" "")
+   (match_operand:V4SI 1 "vint_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+  rtx reg = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
+  emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+  DONE;
+})
+
+
+;; Align vector loads with a permute.
+(define_expand "vec_realign_load_<mode>"
+  [(match_operand:VEC_K 0 "vlogical_operand" "")
+   (match_operand:VEC_K 1 "vlogical_operand" "")
+   (match_operand:VEC_K 2 "vlogical_operand" "")
+   (match_operand:V16QI 3 "vlogical_operand" "")]
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+{
+  emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2],
+				       operands[3]));
+  DONE;
+})
+
+;; Under VSX, vectors of 4/8 byte alignments do not need to be aligned
+;; since the load already handles it.
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VEC_N 0 "nonimmediate_operand" "")
+       (match_operand:VEC_N 1 "any_operand" ""))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN"
+ "")
+
+
+;; Vector shift left in bits.  Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vslo + vsl. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shl_<mode>"
+  [(match_operand:VEC_L 0 "vlogical_operand" "")
+   (match_operand:VEC_L 1 "vlogical_operand" "")
+   (match_operand:QI 2 "reg_or_short_operand" "")]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx bitshift = operands[2];
+  rtx shift;
+  rtx insn;
+  HOST_WIDE_INT bitshift_val;
+  HOST_WIDE_INT byteshift_val;
+
+  if (! CONSTANT_P (bitshift))
+    FAIL;
+  bitshift_val = INTVAL (bitshift);
+  if (bitshift_val & 0x7)
+    FAIL;
+  byteshift_val = bitshift_val >> 3;
+  if (TARGET_VSX && (byteshift_val & 0x3) == 0)
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
+      insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
+				     shift);
+    }
+  else
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val);
+      insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+					shift);
+    }
+
+  emit_insn (insn);
+  DONE;
+}")
+
+;; Vector shift right in bits. Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vsro + vsr. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shr_<mode>"
+  [(match_operand:VEC_L 0 "vlogical_operand" "")
+   (match_operand:VEC_L 1 "vlogical_operand" "")
+   (match_operand:QI 2 "reg_or_short_operand" "")]
+  "TARGET_ALTIVEC"
+  "
+{
+  rtx bitshift = operands[2];
+  rtx shift;
+  rtx insn;
+  HOST_WIDE_INT bitshift_val;
+  HOST_WIDE_INT byteshift_val;
+
+  if (! CONSTANT_P (bitshift))
+    FAIL;
+  bitshift_val = INTVAL (bitshift);
+  if (bitshift_val & 0x7)
+    FAIL;
+  byteshift_val = 16 - (bitshift_val >> 3);
+  if (TARGET_VSX && (byteshift_val & 0x3) == 0)
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
+      insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
+				     shift);
+    }
+  else
+    {
+      shift = gen_rtx_CONST_INT (QImode, byteshift_val);
+      insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+					shift);
+    }
+
+  emit_insn (insn);
+  DONE;
+}")
+
+;; Expanders for rotate each element in a vector
+(define_expand "vrotl<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		      (match_operand:VEC_I 2 "vint_operand" "")))]
+  "TARGET_ALTIVEC"
+  "")
+
+;; Expanders for arithmetic shift left on each vector element
+(define_expand "vashl<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+		      (match_operand:VEC_I 2 "vint_operand" "")))]
+  "TARGET_ALTIVEC"
+  "")
+
+;; Expanders for logical shift right on each vector element
+(define_expand "vlshr<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+			(match_operand:VEC_I 2 "vint_operand" "")))]
+  "TARGET_ALTIVEC"
+  "")
+
+;; Expanders for arithmetic shift right on each vector element
+(define_expand "vashr<mode>3"
+  [(set (match_operand:VEC_I 0 "vint_operand" "")
+	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
+			(match_operand:VEC_I 2 "vint_operand" "")))]
+  "TARGET_ALTIVEC"
+  "")
+
+;;; Expanders for vector insn patterns shared between the SPE and TARGET_PAIRED systems.
+
+(define_expand "absv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(abs:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "")
+
+(define_expand "negv2sf2"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(neg:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "")
+
+(define_expand "addv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(plus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_PLUS (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "subv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(minus:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		    (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_MINUS (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "mulv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(mult:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		   (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_MULT (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
+
+(define_expand "divv2sf3"
+  [(set (match_operand:V2SF 0 "gpc_reg_operand" "")
+	(div:V2SF (match_operand:V2SF 1 "gpc_reg_operand" "")
+		  (match_operand:V2SF 2 "gpc_reg_operand" "")))]
+  "TARGET_PAIRED_FLOAT || TARGET_SPE"
+  "
+{
+  if (TARGET_SPE)
+    {
+      /* We need to make a note that we clobber SPEFSCR.  */
+      rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+      XVECEXP (par, 0, 0) = gen_rtx_SET (VOIDmode, operands[0],
+                                         gen_rtx_DIV (V2SFmode, operands[1], operands[2]));
+      XVECEXP (par, 0, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, SPEFSCR_REGNO));
+      emit_insn (par);
+      DONE;
+    }
+}")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
new file mode 100644
index 000000000..05c794e1d
--- /dev/null
+++ b/gcc/config/rs6000/vsx.md
@@ -0,0 +1,1152 @@
+;; VSX patterns.
+;; Copyright (C) 2009, 2010, 2011
+;; Free Software Foundation, Inc.
+;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Iterator for both scalar and vector floating point types supported by VSX
+(define_mode_iterator VSX_B [DF V4SF V2DF])
+
+;; Iterator for the 2 64-bit vector types
+(define_mode_iterator VSX_D [V2DF V2DI])
+
+;; Iterator for the 2 32-bit vector types
+(define_mode_iterator VSX_W [V4SF V4SI])
+
+;; Iterator for the DF types
+(define_mode_iterator VSX_DF [V2DF DF])
+
+;; Iterator for vector floating point types supported by VSX
+(define_mode_iterator VSX_F [V4SF V2DF])
+
+;; Iterator for logical types supported by VSX
+(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+
+;; Iterator for memory move.  Handle TImode specially to allow
+;; it to use gprs as well as vsx registers.
+(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; Map into the appropriate load/store name based on the type
+(define_mode_attr VSm  [(V16QI "vw4")
+			(V8HI  "vw4")
+			(V4SI  "vw4")
+			(V4SF  "vw4")
+			(V2DF  "vd2")
+			(V2DI  "vd2")
+			(DF    "d")
+			(TI    "vw4")])
+
+;; Map into the appropriate suffix based on the type
+(define_mode_attr VSs	[(V16QI "sp")
+			 (V8HI  "sp")
+			 (V4SI  "sp")
+			 (V4SF  "sp")
+			 (V2DF  "dp")
+			 (V2DI  "dp")
+			 (DF    "dp")
+			 (SF	"sp")
+			 (TI    "sp")])
+
+;; Map the register class used
+(define_mode_attr VSr	[(V16QI "v")
+			 (V8HI  "v")
+			 (V4SI  "v")
+			 (V4SF  "wf")
+			 (V2DI  "wd")
+			 (V2DF  "wd")
+			 (DF    "ws")
+			 (SF	"d")
+			 (TI    "wd")])
+
+;; Map the register class used for float<->int conversions
+(define_mode_attr VSr2	[(V2DF  "wd")
+			 (V4SF  "wf")
+			 (DF    "ws")])
+
+(define_mode_attr VSr3	[(V2DF  "wa")
+			 (V4SF  "wa")
+			 (DF    "ws")])
+
+;; Map the register class for sp<->dp float conversions, destination
+(define_mode_attr VSr4	[(SF	"ws")
+			 (DF	"f")
+			 (V2DF  "wd")
+			 (V4SF	"v")])
+
+;; Map the register class for sp<->dp float conversions, destination
+(define_mode_attr VSr5	[(SF	"ws")
+			 (DF	"f")
+			 (V2DF  "v")
+			 (V4SF	"wd")])
+
+;; Same size integer type for floating point data
+(define_mode_attr VSi [(V4SF  "v4si")
+		       (V2DF  "v2di")
+		       (DF    "di")])
+
+(define_mode_attr VSI [(V4SF  "V4SI")
+		       (V2DF  "V2DI")
+		       (DF    "DI")])
+
+;; Word size for same size conversion
+(define_mode_attr VSc [(V4SF "w")
+		       (V2DF "d")
+		       (DF   "d")])
+
+;; Map into either s or v, depending on whether this is a scalar or vector
+;; operation
+(define_mode_attr VSv	[(V16QI "v")
+			 (V8HI  "v")
+			 (V4SI  "v")
+			 (V4SF  "v")
+			 (V2DI  "v")
+			 (V2DF  "v")
+			 (TI    "v")
+			 (DF    "s")])
+
+;; Appropriate type for add ops (and other simple FP ops)
+(define_mode_attr VStype_simple	[(V2DF "vecfloat")
+				 (V4SF "vecfloat")
+				 (DF   "fp")])
+
+(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
+				   (V4SF "fp_addsub_s")
+				   (DF   "fp_addsub_d")])
+
+;; Appropriate type for multiply ops
+(define_mode_attr VStype_mul	[(V2DF "vecfloat")
+				 (V4SF "vecfloat")
+				 (DF   "dmul")])
+
+(define_mode_attr VSfptype_mul	[(V2DF "fp_mul_d")
+				 (V4SF "fp_mul_s")
+				 (DF   "fp_mul_d")])
+
+;; Appropriate type for divide ops.  For now, just lump the vector divide with
+;; the scalar divides
+(define_mode_attr VStype_div	[(V2DF "ddiv")
+				 (V4SF "sdiv")
+				 (DF   "ddiv")])
+
+(define_mode_attr VSfptype_div	[(V2DF "fp_div_d")
+				 (V4SF "fp_div_s")
+				 (DF   "fp_div_d")])
+
+;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
+;; the scalar sqrt
+(define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
+				 (V4SF "sdiv")
+				 (DF   "ddiv")])
+
+(define_mode_attr VSfptype_sqrt	[(V2DF "fp_sqrt_d")
+				 (V4SF "fp_sqrt_s")
+				 (DF   "fp_sqrt_d")])
+
+;; Iterator and modes for sp<->dp conversions
+;; Because scalar SF values are represented internally as double, use the
+;; V4SF type to represent this than SF.
+(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
+
+(define_mode_attr VS_spdp_res [(DF	"V4SF")
+			       (V4SF	"V2DF")
+			       (V2DF	"V4SF")])
+
+(define_mode_attr VS_spdp_insn [(DF	"xscvdpsp")
+				(V4SF	"xvcvspdp")
+				(V2DF	"xvcvdpsp")])
+
+(define_mode_attr VS_spdp_type [(DF	"fp")
+				(V4SF	"vecfloat")
+				(V2DF	"vecfloat")])
+
+;; Map the scalar mode for a vector type
+(define_mode_attr VS_scalar [(V2DF	"DF")
+			     (V2DI	"DI")
+			     (V4SF	"SF")
+			     (V4SI	"SI")
+			     (V8HI	"HI")
+			     (V16QI	"QI")])
+			     
+;; Constants for creating unspecs
+(define_constants
+  [(UNSPEC_VSX_CONCAT		500)
+   (UNSPEC_VSX_CVDPSXWS		501)
+   (UNSPEC_VSX_CVDPUXWS		502)
+   (UNSPEC_VSX_CVSPDP		503)
+   (UNSPEC_VSX_CVSXWDP		504)
+   (UNSPEC_VSX_CVUXWDP		505)
+   (UNSPEC_VSX_CVSXDSP		506)
+   (UNSPEC_VSX_CVUXDSP		507)
+   (UNSPEC_VSX_CVSPSXDS		508)
+   (UNSPEC_VSX_CVSPUXDS		509)
+   ;; 510-514 deleted
+   (UNSPEC_VSX_TDIV		515)
+   (UNSPEC_VSX_TSQRT		516)
+   (UNSPEC_VSX_XXPERMDI		517)
+   (UNSPEC_VSX_SET		518)
+   (UNSPEC_VSX_ROUND_I		519)
+   (UNSPEC_VSX_ROUND_IC		520)
+   (UNSPEC_VSX_SLDWI		521)])
+
+;; VSX moves
+(define_insn "*vsx_mov<mode>"
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*o,*r,*r,<VSr>,?wa,v,wZ,v")
+	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,o,r,j,j,W,v,wZ"))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 3:
+      gcc_assert (MEM_P (operands[0])
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
+      return "stx<VSm>x %x1,%y0";
+
+    case 1:
+    case 4:
+      gcc_assert (MEM_P (operands[1])
+		  && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
+		  && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
+		  && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
+      return "lx<VSm>x %x0,%y1";
+
+    case 2:
+    case 5:
+      return "xxlor %x0,%x1,%x1";
+
+    case 6:
+    case 7:
+    case 8:
+      return "#";
+
+    case 9:
+    case 10:
+      return "xxlxor %x0,%x0,%x0";
+
+    case 11:
+      return output_vec_const_move (operands);
+
+    case 12:
+      gcc_assert (MEM_P (operands[0])
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
+      return "stvx %1,%y0";
+
+    case 13:
+      gcc_assert (MEM_P (operands[0])
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
+		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
+      return "lvx %0,%y1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,vecstore,vecload")])
+
+;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
+;; unions.  However for plain data movement, slightly favor the vector loads
+(define_insn "*vsx_movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?o,?r,?r,wa,v,v,wZ")
+	(match_operand:TI 1 "input_operand" "wa,Z,wa,r,o,r,j,W,wZ,v"))]
+  "VECTOR_MEM_VSX_P (TImode)
+   && (register_operand (operands[0], TImode) 
+       || register_operand (operands[1], TImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "stxvd2x %x1,%y0";
+
+    case 1:
+      return "lxvd2x %x0,%y1";
+
+    case 2:
+      return "xxlor %x0,%x1,%x1";
+
+    case 3:
+    case 4:
+    case 5:
+      return "#";
+
+    case 6:
+      return "xxlxor %x0,%x0,%x0";
+
+    case 7:
+      return output_vec_const_move (operands);
+
+    case 8:
+      return "stvx %1,%y0";
+
+    case 9:
+      return "lvx %0,%y1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
+
+;; Explicit  load/store expanders for the builtin functions
+(define_expand "vsx_load_<mode>"
+  [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
+	(match_operand:VSX_M 1 "memory_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "vsx_store_<mode>"
+  [(set (match_operand:VEC_M 0 "memory_operand" "")
+	(match_operand:VEC_M 1 "vsx_register_operand" ""))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "")
+
+
+;; VSX scalar and vector floating point arithmetic instructions
+(define_insn "*vsx_add<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>add<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_sub<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		     (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>sub<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_mul<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>mul<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_div<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+	 	   (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>div<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_div>")
+   (set_attr "fp_type" "<VSfptype_div>")])
+
+;; *tdiv* instruction returning the FG flag
+(define_expand "vsx_tdiv<mode>3_fg"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
+		      (match_operand:VSX_B 2 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TDIV))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(gt:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+;; *tdiv* instruction returning the FE flag
+(define_expand "vsx_tdiv<mode>3_fe"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
+		      (match_operand:VSX_B 2 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TDIV))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(eq:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+(define_insn "*vsx_tdiv<mode>3_internal"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		      (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+		   UNSPEC_VSX_TDIV))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>tdiv<VSs> %0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fre<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRES))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>re<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_neg<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>neg<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_abs<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>abs<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_nabs<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_B
+	 (abs:VSX_B
+	  (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>nabs<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_smax<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>max<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_smin<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>min<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Special VSX version of smin/smax for single precision floating point.  Since
+;; both numbers are rounded to single precision, we can just use the DP version
+;; of the instruction.
+
+(define_insn "*vsx_smaxsf3"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
+        (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
+		 (match_operand:SF 2 "vsx_register_operand" "f")))]
+  "VECTOR_UNIT_VSX_P (DFmode)"
+  "xsmaxdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_d")])
+
+(define_insn "*vsx_sminsf3"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
+        (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
+		 (match_operand:SF 2 "vsx_register_operand" "f")))]
+  "VECTOR_UNIT_VSX_P (DFmode)"
+  "xsmindp %x0,%x1,%x2"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_d")])
+
+(define_insn "*vsx_sqrt<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+        (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>sqrt<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_sqrt>")
+   (set_attr "fp_type" "<VSfptype_sqrt>")])
+
+(define_insn "*vsx_rsqrte<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_RSQRT))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>rsqrte<VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; *tsqrt* returning the fg flag
+(define_expand "vsx_tsqrt<mode>2_fg"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TSQRT))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(gt:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+;; *tsqrt* returning the fe flag
+(define_expand "vsx_tsqrt<mode>2_fe"
+  [(set (match_dup 3)
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
+		     UNSPEC_VSX_TSQRT))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(eq:SI (match_dup 3)
+	       (const_int 0)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+{
+  operands[3] = gen_reg_rtx (CCFPmode);
+})
+
+(define_insn "*vsx_tsqrt<mode>2_internal"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
+	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		     UNSPEC_VSX_TSQRT))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>tsqrt<VSs> %0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Fused vector multiply/add instructions
+
+(define_insn "*vsx_fma<mode>4"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(fma:VSX_B
+	  (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "@
+   x<VSv>madda<VSs> %x0,%x1,%x2
+   x<VSv>maddm<VSs> %x0,%x1,%x3
+   x<VSv>madda<VSs> %x0,%x1,%x2
+   x<VSv>maddm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_fms<mode>4"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(fma:VSX_B
+	  (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (neg:VSX_B
+	    (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "@
+   x<VSv>msuba<VSs> %x0,%x1,%x2
+   x<VSv>msubm<VSs> %x0,%x1,%x3
+   x<VSv>msuba<VSs> %x0,%x1,%x2
+   x<VSv>msubm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_nfma<mode>4"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(neg:VSX_B
+	 (fma:VSX_B
+	  (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "@
+   x<VSv>nmadda<VSs> %x0,%x1,%x2
+   x<VSv>nmaddm<VSs> %x0,%x1,%x3
+   x<VSv>nmadda<VSs> %x0,%x1,%x2
+   x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+(define_insn "*vsx_nfms<mode>4"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(neg:VSX_B
+	 (fma:VSX_B
+	   (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+	   (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	   (neg:VSX_B
+	     (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "@
+   x<VSv>nmsuba<VSs> %x0,%x1,%x2
+   x<VSv>nmsubm<VSs> %x0,%x1,%x3
+   x<VSv>nmsuba<VSs> %x0,%x1,%x2
+   x<VSv>nmsubm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
+;; Vector conditional expressions (no scalar version for these instructions)
+(define_insn "vsx_eq<mode>"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpeq<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_gt<mode>"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpgt<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_ge<mode>"
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpge<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Floating point scalar compare
+(define_insn "*vsx_cmpdf_internal1"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
+	(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
+		      (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_VSX_P (DFmode)"
+  "xscmpudp %0,%x1,%x2"
+  [(set_attr "type" "fpcompare")])
+
+;; Compare vectors producing a vector result and a predicate, setting CR6 to
+;; indicate a combined status
+(define_insn "*vsx_eq_<mode>_p"
+  [(set (reg:CC 74)
+	(unspec:CC
+	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+	 UNSPEC_PREDICATE))
+   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(eq:VSX_F (match_dup 1)
+		  (match_dup 2)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpeq<VSs>. %x0,%x1,%x2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*vsx_gt_<mode>_p"
+  [(set (reg:CC 74)
+	(unspec:CC
+	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+	 UNSPEC_PREDICATE))
+   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(gt:VSX_F (match_dup 1)
+		  (match_dup 2)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpgt<VSs>. %x0,%x1,%x2"
+  [(set_attr "type" "veccmp")])
+
+(define_insn "*vsx_ge_<mode>_p"
+  [(set (reg:CC 74)
+	(unspec:CC
+	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa")
+		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))]
+	 UNSPEC_PREDICATE))
+   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(ge:VSX_F (match_dup 1)
+		  (match_dup 2)))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "xvcmpge<VSs>. %x0,%x1,%x2"
+  [(set_attr "type" "veccmp")])
+
+;; Vector select
+(define_insn "*vsx_xxsel<mode>"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+	(if_then_else:VSX_L
+	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+		(match_operand:VSX_L 4 "zero_constant" ""))
+	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxsel %x0,%x3,%x2,%x1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxsel<mode>_uns"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+	(if_then_else:VSX_L
+	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+		   (match_operand:VSX_L 4 "zero_constant" ""))
+	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxsel %x0,%x3,%x2,%x1"
+  [(set_attr "type" "vecperm")])
+
+;; Copy sign
+(define_insn "vsx_copysign<mode>3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B
+	 [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
+	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+	 UNSPEC_COPYSIGN))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; For the conversions, limit the register class for the integer value to be
+;; the fprs because we don't want to add the altivec registers to movdi/movsi.
+;; For the unsigned tests, there isn't a generic double -> unsigned conversion
+;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
+;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
+(define_insn "vsx_float<VSi><mode>2"
+  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
+	(float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cvsx<VSc><VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_floatuns<VSi><mode>2"
+  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa")
+	(unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cvux<VSc><VSs> %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fix_trunc<mode><VSi>2"
+  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
+	(fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_fixuns_trunc<mode><VSi>2"
+  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
+	(unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+;; Math rounding functions
+(define_insn "vsx_x<VSv>r<VSs>i"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_VSX_ROUND_I))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>i %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_x<VSv>r<VSs>ic"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_VSX_ROUND_IC))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>ic %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_btrunc<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>iz %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "*vsx_b2trunc<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRIZ))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>iz %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_floor<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRIM))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>im %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+(define_insn "vsx_ceil<mode>2"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+		      UNSPEC_FRIP))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "x<VSv>r<VSs>ip %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+
+;; VSX convert to/from double vector
+
+;; Convert between single and double precision
+;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
+;; scalar single precision instructions internally use the double format.
+;; Prefer the altivec registers, since we likely will need to do a vperm
+(define_insn "vsx_<VS_spdp_insn>"
+  [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa")
+	(unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")]
+			      UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "<VS_spdp_insn> %x0,%x1"
+  [(set_attr "type" "<VS_spdp_type>")])
+
+;; xscvspdp, represent the scalar SF type as V4SF
+(define_insn "vsx_xscvspdp"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (DFmode)"
+  "xscvspdp %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
+;; format of scalars is actually DF.
+(define_insn "vsx_xscvdpsp_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVSPDP))]
+  "VECTOR_UNIT_VSX_P (DFmode)"
+  "xscvdpsp %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Convert from 64-bit to 32-bit types
+;; Note, favor the Altivec registers since the usual use of these instructions
+;; is in vector converts and we need to use the Altivec vperm instruction.
+
+(define_insn "vsx_xvcvdpsxws"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSXWS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpsxws %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvdpuxws"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPUXWS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpuxws %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvsxdsp"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVSXDSP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvsxdsp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxdsp"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVUXDSP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvuxwdp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+;; Convert from 32-bit to 64-bit types
+(define_insn "vsx_xvcvsxwdp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVSXWDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvsxwdp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxwdp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+		     UNSPEC_VSX_CVUXWDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvuxwdp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvspsxds"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVSPSXDS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvspsxds %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvspuxds"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
+	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVSPUXDS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvspuxds %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
+;; since the xsrdpiz instruction does not truncate the value if the floating
+;; point value is < LONG_MIN or > LONG_MAX.
+(define_insn "*vsx_float_fix_<mode>2"
+  [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?wa")
+	(float:VSX_DF
+	 (fix:<VSI>
+	  (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?wa"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
+   && !flag_trapping_math && TARGET_FRIZ"
+  "x<VSv>r<VSs>iz %x0,%x1"
+  [(set_attr "type" "<VStype_simple>")
+   (set_attr "fp_type" "<VSfptype_simple>")])
+
+
+;; Logical and permute operations
+(define_insn "*vsx_and<mode>3"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+        (and:VSX_L
+	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxland %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_ior<mode>3"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+        (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+		   (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlor %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_xor<mode>3"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+        (xor:VSX_L
+	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlxor %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_one_cmpl<mode>2"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+        (not:VSX_L
+	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlnor %x0,%x1,%x1"
+  [(set_attr "type" "vecsimple")])
+  
+(define_insn "*vsx_nor<mode>3"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+        (not:VSX_L
+	 (ior:VSX_L
+	  (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
+	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlnor %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_andc<mode>3"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+        (and:VSX_L
+	 (not:VSX_L
+	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
+	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxlandc %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
+
+;; Permute operations
+
+;; Build a V2DF/V2DI vector from two scalars
+(define_insn "vsx_concat_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:VSX_D
+	 [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
+	  (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
+	 UNSPEC_VSX_CONCAT))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxpermdi %x0,%x1,%x2,0"
+  [(set_attr "type" "vecperm")])
+
+;; Special purpose concat using xxpermdi to glue two single precision values
+;; together, relying on the fact that internally scalar floats are represented
+;; as doubles.  This is used to initialize a V4SF vector with 4 floats
+(define_insn "vsx_concat_v2sf"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:V2DF
+	 [(match_operand:SF 1 "vsx_register_operand" "f,f")
+	  (match_operand:SF 2 "vsx_register_operand" "f,f")]
+	 UNSPEC_VSX_CONCAT))]
+  "VECTOR_MEM_VSX_P (V2DFmode)"
+  "xxpermdi %x0,%x1,%x2,0"
+  [(set_attr "type" "vecperm")])
+
+;; Set the element of a V2DI/VD2F mode
+(define_insn "vsx_set_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa")
+		       (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")
+		       (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
+		      UNSPEC_VSX_SET))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  if (INTVAL (operands[3]) == 0)
+    return \"xxpermdi %x0,%x2,%x1,1\";
+  else if (INTVAL (operands[3]) == 1)
+    return \"xxpermdi %x0,%x1,%x2,0\";
+  else
+    gcc_unreachable ();
+}
+  [(set_attr "type" "vecperm")])
+
+;; Extract a DF/DI element from V2DF/V2DI
+(define_insn "vsx_extract_<mode>"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
+		       (parallel
+			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  gcc_assert (UINTVAL (operands[2]) <= 1);
+  operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
+  return \"xxpermdi %x0,%x1,%x1,%3\";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Optimize extracting element 0 from memory
+(define_insn "*vsx_extract_<mode>_zero"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 0)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set_attr "type" "fpload")
+   (set_attr "length" "4")])  
+
+;; General double word oriented permute, allow the other vector types for
+;; optimizing the permute instruction.
+(define_insn "vsx_xxpermdi_<mode>"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wd,?wa")
+	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wd,wa")
+		       (match_operand:VSX_L 2 "vsx_register_operand" "wd,wa")
+		       (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
+		      UNSPEC_VSX_XXPERMDI))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxpermdi %x0,%x1,%x2,%3"
+  [(set_attr "type" "vecperm")])
+
+;; Varient of xxpermdi that is emitted by the vec_interleave functions
+(define_insn "*vsx_xxpermdi2_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
+	(vec_concat:VSX_D
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 1 "vsx_register_operand" "wd")
+	  (parallel
+	   [(match_operand:QI 2 "u5bit_cint_operand" "i")]))
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 3 "vsx_register_operand" "wd")
+	  (parallel
+	   [(match_operand:QI 4 "u5bit_cint_operand" "i")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  gcc_assert ((UINTVAL (operands[2]) <= 1) && (UINTVAL (operands[4]) <= 1));
+  operands[5] = GEN_INT (((INTVAL (operands[2]) & 1) << 1)
+			 | (INTVAL (operands[4]) & 1));
+  return \"xxpermdi %x0,%x1,%x3,%5\";
+}
+  [(set_attr "type" "vecperm")])
+
+;; V2DF/V2DI splat
+(define_insn "vsx_splat_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa")
+	(vec_duplicate:VSX_D
+	 (match_operand:<VS_scalar> 1 "input_operand" "ws,f,Z,wa,wa,Z")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "@
+   xxpermdi %x0,%x1,%x1,0
+   xxpermdi %x0,%x1,%x1,0
+   lxvdsx %x0,%y1
+   xxpermdi %x0,%x1,%x1,0
+   xxpermdi %x0,%x1,%x1,0
+   lxvdsx %x0,%y1"
+  [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
+
+;; V4SF/V4SI splat
+(define_insn "vsx_xxspltw_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+	(vec_duplicate:VSX_W
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+	  (parallel
+	   [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxspltw %x0,%x1,%2"
+  [(set_attr "type" "vecperm")])
+
+;; V4SF/V4SI interleave
+(define_insn "vsx_xxmrghw_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+        (vec_merge:VSX_W
+	 (vec_select:VSX_W
+	  (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+	  (parallel [(const_int 0)
+		     (const_int 2)
+		     (const_int 1)
+		     (const_int 3)]))
+	 (vec_select:VSX_W
+	  (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa")
+	  (parallel [(const_int 2)
+		     (const_int 0)
+		     (const_int 3)
+		     (const_int 1)]))
+	 (const_int 5)))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxmrghw %x0,%x1,%x2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vsx_xxmrglw_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+        (vec_merge:VSX_W
+	 (vec_select:VSX_W
+	  (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+	  (parallel [(const_int 2)
+		     (const_int 0)
+		     (const_int 3)
+		     (const_int 1)]))
+	 (vec_select:VSX_W
+	  (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa")
+	  (parallel [(const_int 0)
+		     (const_int 2)
+		     (const_int 1)
+		     (const_int 3)]))
+	 (const_int 5)))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxmrglw %x0,%x1,%x2"
+  [(set_attr "type" "vecperm")])
+
+;; Shift left double by word immediate
+(define_insn "vsx_xxsldwi_<mode>"
+  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
+	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
+		       (match_operand:VSX_L 2 "vsx_register_operand" "wa")
+		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
+		      UNSPEC_VSX_SLDWI))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxsldwi %x0,%x1,%x2,%3"
+  [(set_attr "type" "vecperm")])
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
new file mode 100644
index 000000000..95aedf706
--- /dev/null
+++ b/gcc/config/rs6000/vxworks.h
@@ -0,0 +1,146 @@
+/* Definitions of target machine for GNU compiler.  Vxworks PowerPC version.
+   Copyright (C) 1996, 2000, 2002, 2003, 2004, 2005, 2007, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Note to future editors: VxWorks is mostly an EABI target.  We do
+   not use rs6000/eabi.h because we would have to override most of
+   it anyway.  However, if you change that file, consider making
+   analogous changes here too.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC VxWorks)");
+
+/* CPP predefined macros.  */
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__ppc");			\
+      builtin_define ("__PPC__");		\
+      builtin_define ("__EABI__");		\
+      builtin_define ("__ELF__");		\
+      if (!TARGET_SOFT_FLOAT)			\
+	builtin_define ("__hardfp");		\
+						\
+      /* C89 namespace violation! */		\
+      builtin_define ("CPU_FAMILY=PPC");	\
+        					\
+      VXWORKS_OS_CPP_BUILTINS ();		\
+    }		\
+  while (0)
+
+/* Only big endian PPC is supported by VxWorks.  */
+#undef BYTES_BIG_ENDIAN
+#define BYTES_BIG_ENDIAN 1
+
+/* We have to kill off the entire specs set created by rs6000/sysv4.h
+   and substitute our own set.  The top level vxworks.h has done some
+   of this for us.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#undef CPP_SPEC
+#undef CC1_SPEC
+#undef ASM_SPEC
+
+#define SUBTARGET_EXTRA_SPECS /* none needed */
+
+/* FIXME: The only reason we allow no -mcpu switch at all is because
+   config-ml.in insists on a "." multilib. */
+#define CPP_SPEC \
+"%{!DCPU=*:		  \
+   %{mcpu=403 : -DCPU=PPC403  ; \
+     mcpu=405 : -DCPU=PPC405  ; \
+     mcpu=440 : -DCPU=PPC440  ; \
+     mcpu=464 : -DCPU=PPC464  ; \
+     mcpu=476 : -DCPU=PPC476  ; \
+     mcpu=603 : -DCPU=PPC603  ; \
+     mcpu=604 : -DCPU=PPC604  ; \
+     mcpu=860 : -DCPU=PPC860  ; \
+     mcpu=8540: -DCPU=PPC85XX ; \
+              : -DCPU=PPC604  }}" \
+VXWORKS_ADDITIONAL_CPP_SPEC
+
+#define CC1_SPEC						\
+"%{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default}	\
+ %{mlittle|mlittle-endian:-mstrict-align}"
+
+#define ASM_SPEC \
+"%(asm_cpu) \
+ %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+ %{mrelocatable} %{mrelocatable-lib} %{fpic:-K PIC} %{fPIC:-K PIC} -mbig"
+
+#undef  LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef  LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_POWERPC | MASK_NEW_MNEMONICS | MASK_EABI | MASK_STRICT_ALIGN)
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604
+
+/* Nor sdata, for kernel mode.  We use this in
+   SUBSUBTARGET_INITIALIZE_OPTIONS, after rs6000_rtp has been initialized.  */
+#undef SDATA_DEFAULT_SIZE
+#define SDATA_DEFAULT_SIZE (TARGET_VXWORKS_RTP ? 8 : 0)
+
+/* Enforce 16bytes alignment for the stack pointer, to permit general
+   compliance with e.g. Altivec instructions requirements.  Make sure
+   this isn't overruled by the EABI constraints.  */
+
+#undef  STACK_BOUNDARY
+#define STACK_BOUNDARY (16*BITS_PER_UNIT)
+
+#undef  PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
+
+#undef  ABI_STACK_BOUNDARY
+
+/* Make -mcpu=8540 imply SPE.  ISEL is automatically enabled, the
+   others must be done by hand.  Handle -mrtp.  Disable -fPIC
+   for -mrtp - the VxWorks PIC model is not compatible with it.  */
+#undef SUBSUBTARGET_OVERRIDE_OPTIONS
+#define SUBSUBTARGET_OVERRIDE_OPTIONS		\
+  do {						\
+    if (TARGET_E500)				\
+      {						\
+	rs6000_spe = 1;				\
+	rs6000_spe_abi = 1;			\
+	rs6000_float_gprs = 1;			\
+      }						\
+						\
+  if (!global_options_set.x_g_switch_value)	\
+    g_switch_value = SDATA_DEFAULT_SIZE;	\
+  VXWORKS_OVERRIDE_OPTIONS;			\
+  } while (0)
+
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
diff --git a/gcc/config/rs6000/vxworksae.h b/gcc/config/rs6000/vxworksae.h
new file mode 100644
index 000000000..dd95bb1e4
--- /dev/null
+++ b/gcc/config/rs6000/vxworksae.h
@@ -0,0 +1,23 @@
+/* PowerPC VxWorks AE target definitions for GNU compiler.
+   Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (PowerPC VxWorks AE)");
+
diff --git a/gcc/config/rs6000/x-aix b/gcc/config/rs6000/x-aix
new file mode 100644
index 000000000..d40690f2d
--- /dev/null
+++ b/gcc/config/rs6000/x-aix
@@ -0,0 +1,6 @@
+# genautomata requires more than 256MB of data
+build/genautomata : override LDFLAGS += -Wl,-bmaxdata:0x20000000
+
+# jc1 requires more than 256MB of data
+$(COMPILERS) : override LDFLAGS += -Wl,-bmaxdata:0x40000000
+
diff --git a/gcc/config/rs6000/x-darwin b/gcc/config/rs6000/x-darwin
new file mode 100644
index 000000000..5672c698b
--- /dev/null
+++ b/gcc/config/rs6000/x-darwin
@@ -0,0 +1,5 @@
+host-ppc-darwin.o : $(srcdir)/config/rs6000/host-darwin.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) toplev.h \
+  config/host-darwin.h $(DIAGNOSTIC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \
+		$(INCLUDES) $< -o $@
diff --git a/gcc/config/rs6000/x-darwin64 b/gcc/config/rs6000/x-darwin64
new file mode 100644
index 000000000..921d555ba
--- /dev/null
+++ b/gcc/config/rs6000/x-darwin64
@@ -0,0 +1,5 @@
+host-ppc64-darwin.o : $(srcdir)/config/rs6000/host-ppc64-darwin.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) toplev.h \
+  config/host-darwin.h $(DIAGNOSTIC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \
+		$(INCLUDES) $< -o $@
diff --git a/gcc/config/rs6000/x-linux-relax b/gcc/config/rs6000/x-linux-relax
new file mode 100644
index 000000000..2743a94e4
--- /dev/null
+++ b/gcc/config/rs6000/x-linux-relax
@@ -0,0 +1,2 @@
+# At -O0 cc1 etc. are too large and -Wl,--relax is needed
+$(COMPILERS) : override LDFLAGS += -Wl,--relax
diff --git a/gcc/config/rs6000/x-rs6000 b/gcc/config/rs6000/x-rs6000
new file mode 100644
index 000000000..9e31f24cd
--- /dev/null
+++ b/gcc/config/rs6000/x-rs6000
@@ -0,0 +1,3 @@
+driver-rs6000.o : $(srcdir)/config/rs6000/driver-rs6000.c \
+  $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/rs6000/xcoff.h b/gcc/config/rs6000/xcoff.h
new file mode 100644
index 000000000..e5c478223
--- /dev/null
+++ b/gcc/config/rs6000/xcoff.h
@@ -0,0 +1,333 @@
+/* Definitions of target machine for GNU compiler,
+   for some generic XCOFF file format
+   Copyright (C) 2001, 2002, 2003, 2004, 2007, 2008
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OBJECT_FORMAT OBJECT_XCOFF
+
+/* The RS/6000 uses the XCOFF format.  */
+#define XCOFF_DEBUGGING_INFO 1
+
+/* Define if the object format being used is COFF or a superset.  */
+#define OBJECT_FORMAT_COFF
+
+/* Define the magic numbers that we recognize as COFF.
+ 
+    AIX 4.3 adds U803XTOCMAGIC (0757) for 64-bit objects and AIX V5 adds
+    U64_TOCMAGIC (0767), but collect2.c does not include files in the
+    correct order to conditionally define the symbolic name in this macro.
+ 
+    The AIX linker accepts import/export files as object files,
+    so accept "#!" (0x2321) magic number.  */
+#define MY_ISCOFF(magic) \
+  ((magic) == U802WRMAGIC || (magic) == U802ROMAGIC \
+   || (magic) == U802TOCMAGIC || (magic) == 0757 || (magic) == 0767 \
+   || (magic) == 0x2321)
+
+/* We don't have GAS for the RS/6000 yet, so don't write out special
+    .stabs in cc1plus.  */
+
+#define FASCIST_ASSEMBLER
+
+/* We define this to prevent the name mangler from putting dollar signs into
+   function names.  */
+
+#define NO_DOLLAR_IN_LABEL
+
+/* We define this to 0 so that gcc will never accept a dollar sign in a
+   variable name.  This is needed because the AIX assembler will not accept
+   dollar signs.  */
+
+#define DOLLARS_IN_IDENTIFIERS 0
+
+/* AIX .align pseudo-op accept value from 0 to 12, corresponding to
+   log base 2 of the alignment in bytes; 12 = 4096 bytes = 32768 bits.  */
+
+#define MAX_OFILE_ALIGNMENT 32768
+
+/* Default alignment factor for csect directives, chosen to honor
+   BIGGEST_ALIGNMENT.  */
+#define XCOFF_CSECT_DEFAULT_ALIGNMENT_STR "4"
+
+/* Return nonzero if this entry is to be written into the constant
+   pool in a special way.  We do so if this is a SYMBOL_REF, LABEL_REF
+   or a CONST containing one of them.  If -mfp-in-toc (the default),
+   we also do this for floating-point constants.  We actually can only
+   do this if the FP formats of the target and host machines are the
+   same, but we can't check that since not every file that uses these
+   target macros includes real.h.  We also do this when we can write the
+   entry into the TOC and the entry is not larger than a TOC entry.  */
+
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (GET_CODE (X) == CONST_DOUBLE					\
+	   && (TARGET_MINIMAL_TOC					\
+	       || (SCALAR_FLOAT_MODE_P (GET_MODE (X))			\
+		   && ! TARGET_NO_FP_IN_TOC)))))
+
+#define TARGET_ASM_OUTPUT_ANCHOR  rs6000_xcoff_asm_output_anchor
+#define TARGET_ASM_GLOBALIZE_LABEL  rs6000_xcoff_asm_globalize_label
+#define TARGET_ASM_INIT_SECTIONS  rs6000_xcoff_asm_init_sections
+#define TARGET_ASM_RELOC_RW_MASK  rs6000_xcoff_reloc_rw_mask
+#define TARGET_ASM_NAMED_SECTION  rs6000_xcoff_asm_named_section
+#define TARGET_ASM_SELECT_SECTION  rs6000_xcoff_select_section
+#define TARGET_ASM_SELECT_RTX_SECTION  rs6000_xcoff_select_rtx_section
+#define TARGET_ASM_UNIQUE_SECTION  rs6000_xcoff_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+#define TARGET_STRIP_NAME_ENCODING  rs6000_xcoff_strip_name_encoding
+#define TARGET_SECTION_TYPE_FLAGS  rs6000_xcoff_section_type_flags
+
+/* FP save and restore routines.  */
+#define	SAVE_FP_PREFIX "._savef"
+#define SAVE_FP_SUFFIX ""
+#define	RESTORE_FP_PREFIX "._restf"
+#define RESTORE_FP_SUFFIX ""
+
+/* Function name to call to do profiling.  */
+#undef  RS6000_MCOUNT
+#define RS6000_MCOUNT ".__mcount"
+
+/* This outputs NAME to FILE up to the first null or '['.  */
+
+#define RS6000_OUTPUT_BASENAME(FILE, NAME) \
+  assemble_name ((FILE), (*targetm.strip_name_encoding) (NAME))
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+
+#define ASM_OUTPUT_LABEL(FILE,NAME)	\
+  do { RS6000_OUTPUT_BASENAME (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+/* This is how to output a command to make the user-level label named NAME
+   defined for reference from other files.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START rs6000_xcoff_file_start
+#define TARGET_ASM_FILE_END rs6000_xcoff_file_end
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false
+
+/* This macro produces the initial definition of a function name.
+   On the RS/6000, we need to place an extra '.' in the function name and
+   output the function descriptor.
+   Dollar signs are converted to underscores.
+
+   The csect for the function will have already been created when
+   text_section was selected.  We do have to go back to that csect, however.
+
+   The third and fourth parameters to the .function pseudo-op (16 and 044)
+   are placeholders which no longer have any use.  */
+
+#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL)		\
+{ char *buffer = (char *) alloca (strlen (NAME) + 1);		\
+  char *p;							\
+  int dollar_inside = 0;					\
+  strcpy (buffer, NAME);					\
+  p = strchr (buffer, '$');					\
+  while (p) {							\
+    *p = '_';							\
+    dollar_inside++;						\
+    p = strchr (p + 1, '$');					\
+  }								\
+  if (TREE_PUBLIC (DECL))					\
+    {								\
+      if (!RS6000_WEAK || !DECL_WEAK (decl))			\
+	{							\
+          if (dollar_inside) {					\
+              fprintf(FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME);	\
+              fprintf(FILE, "\t.rename %s,\"%s\"\n", buffer, NAME);	\
+	    }							\
+	  fputs ("\t.globl .", FILE);				\
+	  RS6000_OUTPUT_BASENAME (FILE, buffer);		\
+	  putc ('\n', FILE);					\
+	}							\
+    }								\
+  else								\
+    {								\
+      if (dollar_inside) {					\
+          fprintf(FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME);	\
+          fprintf(FILE, "\t.rename %s,\"%s\"\n", buffer, NAME);	\
+	}							\
+      fputs ("\t.lglobl .", FILE);				\
+      RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+      putc ('\n', FILE);					\
+    }								\
+  fputs ("\t.csect ", FILE);					\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", FILE);		\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (":\n", FILE);						\
+  fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", FILE);	\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (", TOC[tc0], 0\n", FILE);				\
+  in_section = NULL;						\
+  switch_to_section (function_section (DECL));			\
+  putc ('.', FILE);						\
+  RS6000_OUTPUT_BASENAME (FILE, buffer);			\
+  fputs (":\n", FILE);						\
+  if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (DECL))	\
+    xcoffout_declare_function (FILE, DECL, buffer);		\
+}
+
+/* Output a reference to SYM on FILE.  */
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \
+  rs6000_output_symbol_ref (FILE, SYM)
+
+/* This says how to output an external.
+   Dollar signs are converted to underscores.  */
+
+#undef  ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)				\
+{ char *buffer = (char *) alloca (strlen (NAME) + 1);			\
+  char *p;								\
+  rtx _symref = XEXP (DECL_RTL (DECL), 0);				\
+  int dollar_inside = 0;						\
+  strcpy (buffer, NAME);						\
+  p = strchr (buffer, '$');						\
+  while (p) {								\
+    *p = '_';								\
+    dollar_inside++;							\
+    p = strchr (p + 1, '$');						\
+  }									\
+  if (dollar_inside) {							\
+      fputs ("\t.extern .", FILE);					\
+      RS6000_OUTPUT_BASENAME (FILE, buffer);				\
+      putc ('\n', FILE);						\
+      fprintf(FILE, "\t.rename .%s,\".%s\"\n", buffer, NAME);		\
+    }									\
+  if ((TREE_CODE (DECL) == VAR_DECL					\
+       || TREE_CODE (DECL) == FUNCTION_DECL)				\
+      && (NAME)[strlen (NAME) - 1] != ']')				\
+    {									\
+      XSTR (_symref, 0) = concat (XSTR (_symref, 0),			\
+				  (TREE_CODE (DECL) == FUNCTION_DECL	\
+				   ? "[DS]" : "[RW]"),			\
+				  NULL);				\
+    }									\
+}
+
+/* This is how to output a reference to a user-level label named NAME.
+   `assemble_name' uses this.  */
+
+#define ASM_OUTPUT_LABELREF(FILE,NAME)	\
+  asm_fprintf ((FILE), "%U%s", rs6000_xcoff_strip_dollar (NAME));
+
+/* This is how to output an internal label prefix.  rs6000.c uses this
+   when generating traceback tables.  */
+
+#define ASM_OUTPUT_INTERNAL_LABEL_PREFIX(FILE,PREFIX)   \
+  fprintf (FILE, "%s..", PREFIX)
+
+/* This is how to output a label for a jump table.  Arguments are the same as
+   for (*targetm.asm_out.internal_label), except the insn for the jump table is
+   passed.  */
+
+#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN)	\
+{ ASM_OUTPUT_ALIGN (FILE, 2); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); }
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s..%u", rs6000_xcoff_strip_dollar (PREFIX), (unsigned) (NUM))
+
+/* This is how to output an assembler line to define N characters starting
+   at P to FILE.  */
+
+#define ASM_OUTPUT_ASCII(FILE, P, N)  output_ascii ((FILE), (P), (N))
+
+/* This is how to advance the location counter by SIZE bytes.  */
+
+#define SKIP_ASM_OP "\t.space "
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n", SKIP_ASM_OP, (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define COMMON_ASM_OP "\t.comm "
+
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)	\
+  do { fputs (COMMON_ASM_OP, (FILE));			\
+       RS6000_OUTPUT_BASENAME ((FILE), (NAME));		\
+       if ((ALIGN) > 32)				\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", (SIZE), \
+		  exact_log2 ((ALIGN) / BITS_PER_UNIT)); \
+       else if ((SIZE) > 4)				\
+         fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",3\n", (SIZE)); \
+       else						\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)); \
+  } while (0)
+
+/* This says how to output an assembler line
+   to define a local common symbol.
+   Alignment cannot be specified, but we can try to maintain
+   alignment after preceding TOC section if it was aligned
+   for 64-bit mode.  */
+
+#define LOCAL_COMMON_ASM_OP "\t.lcomm "
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  do { fputs (LOCAL_COMMON_ASM_OP, (FILE));		\
+       RS6000_OUTPUT_BASENAME ((FILE), (NAME));		\
+       fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s\n", \
+		(TARGET_32BIT ? (SIZE) : (ROUNDED)),	\
+		xcoff_bss_section_name);		\
+     } while (0)
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+#define SET_ASM_OP "\t.set "
+
+/* This is how we tell the assembler to equate two values.  */
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {	fprintf ((FILE), "%s", SET_ASM_OP);				\
+	RS6000_OUTPUT_BASENAME (FILE, LABEL1);				\
+	fprintf (FILE, ",");						\
+	RS6000_OUTPUT_BASENAME (FILE, LABEL2);				\
+	fprintf (FILE, "\n");						\
+  } while (0)
+
+/* Used by rs6000_assemble_integer, among others.  */
+#define DOUBLE_INT_ASM_OP "\t.llong\t"
+
+/* Output before instructions.  */
+#define TEXT_SECTION_ASM_OP "\t.csect .text[PR]"
+
+/* Output before writable data.  */
+#define DATA_SECTION_ASM_OP \
+  "\t.csect .data[RW]," XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
+
+
+/* Define to prevent DWARF2 unwind info in the data section rather
+   than in the .eh_frame section.  We do this because the AIX linker
+   would otherwise garbage collect these sections.  */
+#define EH_FRAME_IN_DATA_SECTION 1
diff --git a/gcc/config/rs6000/xfpu.h b/gcc/config/rs6000/xfpu.h
new file mode 100644
index 000000000..af6311636
--- /dev/null
+++ b/gcc/config/rs6000/xfpu.h
@@ -0,0 +1,26 @@
+/* Definitions for Xilinx PowerPC 405/440 APU.
+
+   Copyright (C) 2008 Free Software Foundation, Inc.
+   Contributed by Michael Eager (eager@eagercon.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Undefine definitions from rs6000.h. */
+#undef TARGET_XILINX_FPU 
+
+#define TARGET_XILINX_FPU  (rs6000_xilinx_fpu)
diff --git a/gcc/config/rs6000/xfpu.md b/gcc/config/rs6000/xfpu.md
new file mode 100644
index 000000000..25c449a51
--- /dev/null
+++ b/gcc/config/rs6000/xfpu.md
@@ -0,0 +1,140 @@
+;; Scheduling description for the Xilinx PowerPC 405 APU Floating Point Unit.
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Contributed by Michael Eager (eager@eagercon.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;----------------------------------------------------
+;; Xilinx APU FPU Pipeline Description
+;;
+;;  - attr 'type' and 'fp_type' should definitely
+;;    be cleaned up at some point in the future.
+;;    ddiv,sdiv,dmul,smul etc are quite confusing.
+;;    Should use consistent fp* attrs. 'fp_type'
+;;    should also go away, leaving us only with 'fp'
+;;
+;;----------------------------------------------------
+
+;; -------------------------------------------------------------------------
+;; Latencies
+;; Latest latency figures (all in FCB cycles). PowerPC to FPU frequency ratio
+;; assumed to be 1/2. (most common deployment)
+;; Add 2 PPC cycles for (register file access + wb) and 2 PPC cycles 
+;; for issue (from PPC)
+;;                          SP          DP
+;; Loads:                    4           6
+;; Stores:                   1           2      (from availability of data)
+;; Move/Abs/Neg:             1           1
+;; Add/Subtract:             5           7
+;; Multiply:                 4          11
+;; Multiply-add:            10          19
+;; Convert (any):            4           6
+;; Divide/Sqrt:             27          56
+;; Compares:                 1           2
+;;
+;; bypasses needed for forwarding capability of the FPU. 
+;; Add this at some future time.
+;; -------------------------------------------------------------------------
+(define_automaton "Xfpu")
+(define_cpu_unit "Xfpu_issue,Xfpu_addsub,Xfpu_mul,Xfpu_div,Xfpu_sqrt" "Xfpu")
+
+
+(define_insn_reservation "fp-default" 2
+  (and (and 
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_default"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2")
+
+(define_insn_reservation "fp-compare" 6
+  (and (eq_attr "type" "fpcompare")                     ;; Inconsistent naming
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_addsub")
+
+(define_insn_reservation "fp-addsub-s" 14
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_addsub_s"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_addsub")
+
+(define_insn_reservation "fp-addsub-d" 18
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_addsub_d"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_addsub")
+
+(define_insn_reservation "fp-mul-s" 12
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_mul_s"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul")
+
+(define_insn_reservation "fp-mul-d" 16    ;; Actually 28. Long latencies are killing the automaton formation. Need to figure out why.
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_mul_d"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul")
+
+(define_insn_reservation "fp-div-s" 24                   ;; Actually 34
+   (and (eq_attr "type" "sdiv")                          ;; Inconsistent attr naming
+        (eq_attr "cpu" "ppc405"))
+   "Xfpu_issue*2,Xfpu_div*10")                           ;; Unpipelined
+
+(define_insn_reservation "fp-div-d" 34                   ;; Actually 116
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "ppc405"))                         ;; Inconsistent attr naming
+  "Xfpu_issue*2,Xfpu_div*10")                            ;; Unpipelined
+
+(define_insn_reservation "fp-maddsub-s" 24
+  (and (and
+        (eq_attr "type" "fp")
+        (eq_attr "fp_type" "fp_maddsub_s"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul,nothing*7,Xfpu_addsub")
+
+(define_insn_reservation "fp-maddsub-d" 34              ;; Actually 42
+  (and (and
+        (eq_attr "type" "dmul")                         ;; Inconsistent attr naming
+        (eq_attr "fp_type" "fp_maddsub_d"))
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_mul,nothing*7,Xfpu_addsub")
+
+(define_insn_reservation "fp-load" 10                   ;; FIXME. Is double/single precision the same ?
+  (and (eq_attr "type" "fpload, fpload_ux, fpload_u")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*10")
+
+(define_insn_reservation "fp-store" 4 
+  (and (eq_attr "type" "fpstore, fpstore_ux, fpstore_u")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*4")
+
+(define_insn_reservation "fp-sqrt-s" 24         ;; Actually 56
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_sqrt*10")                  ;; Unpipelined
+
+
+(define_insn_reservation "fp-sqrt-d" 34         ;; Actually 116
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "ppc405"))
+  "Xfpu_issue*2,Xfpu_sqrt*10")                  ;; Unpipelined
+
diff --git a/gcc/config/rs6000/xilinx.h b/gcc/config/rs6000/xilinx.h
new file mode 100644
index 000000000..3b59141a2
--- /dev/null
+++ b/gcc/config/rs6000/xilinx.h
@@ -0,0 +1,47 @@
+/* Support for GCC on Xilinx embedded PowerPC systems
+   Copyright (C) 2008 Free Software Foundation, Inc.
+   Contributed by Michael Eager, eager@eagercon.com
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Set defaults for Xilinx embedded target boards. */
+
+#undef  CPP_SPEC
+#define CPP_SPEC "\
+-mxilinx-fpu                                    \
+%{mfpu=sp_lite: -DHAVE_XFPU_SP_LITE}            \
+%{mfpu=sp_full: -DHAVE_XFPU_SP_FULL}            \
+%{mfpu=dp_lite: -DHAVE_XFPU_DP_LITE}            \
+%{mfpu=dp_full: -DHAVE_XFPU_DP_FULL}            \
+%{mfpu=*:   -DHAVE_XFPU}"
+
+#undef	LIB_DEFAULT_SPEC
+#define LIB_DEFAULT_SPEC "\
+%{!nostdlib: --start-group -lxil -lc -lm --end-group   \
+%{mppcperflib: %{mfpu=*: -lppcstr405 -lgcc}            \
+%{!mfpu=*: -lppcstr405 -lppcfp -lgcc}}                 \
+%{!mppcperflib: -lgcc}}"
+
+#undef	STARTFILE_DEFAULT_SPEC
+#define STARTFILE_DEFAULT_SPEC "\
+ecrti.o%s %{pg: %{!mno-clearbss: xil-pgcrt0.o%s} \
+%{mno-clearbss: xil-sim-pgcrt0.o%s}}            \
+%{!pg: %{!mno-clearbss: xil-crt0.o%s}           \
+%{mno-clearbss: xil-sim-crt0.o%s}} crtbegin.o%s"
+
+#undef	LINK_START_DEFAULT_SPEC
+#define LINK_START_DEFAULT_SPEC "-T xilinx.ld%s"
diff --git a/gcc/config/rs6000/xilinx.opt b/gcc/config/rs6000/xilinx.opt
new file mode 100644
index 000000000..da6c0c066
--- /dev/null
+++ b/gcc/config/rs6000/xilinx.opt
@@ -0,0 +1,33 @@
+; Xilinx embedded PowerPC options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+mno-clearbss
+Target RejectNegative
+
+mppcperflib
+Target RejectNegative
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/rtems.h b/gcc/config/rtems.h
new file mode 100644
index 000000000..d9264f689
--- /dev/null
+++ b/gcc/config/rtems.h
@@ -0,0 +1,45 @@
+/* Configuration common to all targets running RTEMS. 
+   Copyright (C) 2000, 2002, 2004, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The system headers under RTEMS are C++-aware.  */
+#undef NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/*
+ * Dummy start/end specification to let linker work as
+ * needed by autoconf scripts using this compiler.
+ */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC   ""
+
+/*
+ * Some targets do not set up LIB_SPECS, override it, here.
+ */
+#define STD_LIB_SPEC "%{!shared:%{g*:-lg} %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!qrtems: " STD_LIB_SPEC "} " \
+"%{!nostdlib: %{qrtems: --start-group \
+ -lrtemsbsp -lrtemscpu \
+ -lc -lgcc --end-group %{!qnolinkcmds: -T linkcmds%s}}}"
+
+#define TARGET_POSIX_IO
diff --git a/gcc/config/rtems.opt b/gcc/config/rtems.opt
new file mode 100644
index 000000000..053ab8f09
--- /dev/null
+++ b/gcc/config/rtems.opt
@@ -0,0 +1,33 @@
+; RTEMS options.
+
+; Copyright (C) 2010, 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+qnolinkcmds
+Driver
+
+qrtems
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md
new file mode 100644
index 000000000..9f7dc9ff8
--- /dev/null
+++ b/gcc/config/rx/constraints.md
@@ -0,0 +1,88 @@
+;; Constraint definitions for Renesas RX.
+;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constraint "Symbol"
+  "@internal Constraint on the type of rtx allowed in call insns"
+  (match_test "GET_CODE (op) == SYMBOL_REF")
+)
+
+
+(define_constraint "Int08"
+  "@internal A signed or unsigned 8-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 8), (1 << 8) - 1)")
+  )
+)
+
+(define_constraint "Sint08"
+  "@internal A signed 8-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 7), (1 << 7) - 1)")
+  )
+)
+
+(define_constraint "Sint16"
+  "@internal A signed 16-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 15), (1 << 15) - 1)")
+  )
+)
+
+(define_constraint "Sint24"
+  "@internal A signed 24-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, (-1 << 23), (1 << 23) - 1)")
+  )
+)
+
+;; This constraint is used by the SUBSI3 pattern because the
+;; RX SUB instruction can only take a 4-bit unsigned integer
+;; value.  Also used by the MVTIPL instruction.
+(define_constraint "Uint04"
+  "@internal An unsigned 4-bit immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 15)")
+  )
+)
+
+(define_constraint "NEGint4"
+  "@internal An signed 4-bit negative immediate value"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -15, -1)")
+  )
+)
+
+;; This is used in arithmetic and logic instructions for
+;; a source operand that lies in memory and which satisfies
+;; rx_restricted_memory_address().
+
+(define_memory_constraint "Q"
+  "A MEM which only uses REG or REG+INT addressing."
+  (and (match_code "mem")
+       (ior (match_code "reg" "0")
+	    (and (match_code "plus" "0")
+	         (and (match_code "reg,subreg" "00")
+		      (match_code "const_int" "01")
+		 )
+	    )
+       )
+  )
+)
diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md
new file mode 100644
index 000000000..82cac42da
--- /dev/null
+++ b/gcc/config/rx/predicates.md
@@ -0,0 +1,297 @@
+;; Predicate definitions for Renesas RX.
+;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+
+;; Check that the operand is suitable for a call insn.
+;; Only registers and symbol refs are allowed.
+
+(define_predicate "rx_call_operand"
+  (match_code "symbol_ref,reg")
+)
+
+;; For sibcall operations we can only use a symbolic address.
+
+(define_predicate "rx_symbolic_call_operand"
+  (match_code "symbol_ref")
+)
+
+;; Check that the operand is suitable for a shift insn
+;; Only small integers or a value in a register are permitted.
+
+(define_predicate "rx_shift_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+)
+
+(define_predicate "rx_constshift_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)"))
+)
+
+(define_predicate "rx_restricted_mem_operand"
+  (and (match_code "mem")
+       (match_test "rx_is_restricted_memory_address (XEXP (op, 0), mode)"))
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a logic or arithmeitc instruction.  Registers, integers
+;; and a restricted subset of memory addresses are allowed.
+
+(define_predicate "rx_source_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "immediate_operand")
+       (match_operand 0 "rx_restricted_mem_operand"))
+)
+
+;; Check that the operand is suitable as the source operand
+;; for a comparison instruction.  This is the same as
+;; rx_source_operand except that SUBREGs are allowed but
+;; CONST_INTs are not.
+
+(define_predicate "rx_compare_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "rx_restricted_mem_operand"))
+)
+
+;; Return true if OP is a store multiple operation.  This looks like:
+;;
+;;   [(set (SP) (MINUS (SP) (INT)))
+;;    (set (MEM (SP)) (REG))
+;;    (set (MEM (MINUS (SP) (INT))) (REG)) {optionally repeated}
+;;   ]
+
+(define_special_predicate "rx_store_multiple_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != MINUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_SRC (element))
+      || GET_MODE (SET_SRC (element)) != SImode
+      || ! MEM_P (SET_DEST (element))
+      || GET_MODE (SET_DEST (element)) != SImode
+      || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+      || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+      ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+      || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+        != GET_MODE_SIZE (SImode))
+    return false;
+
+  src_regno = REGNO (SET_SRC (element));
+
+  /* Check that the remaining elements use SP-<disp>
+     addressing and decreasing register numbers.  */
+  for (i = 2; i < count; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || REGNO (SET_SRC (element)) != src_regno - (i - 1)
+	  || ! MEM_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || GET_CODE (XEXP (SET_DEST (element), 0)) != MINUS
+          || ! REG_P (XEXP (XEXP (SET_DEST (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_DEST (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_DEST (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_DEST (element), 0), 1))
+	     != i * GET_MODE_SIZE (SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a load multiple operation.
+;; This looks like:
+;;  [(set (SP) (PLUS (SP) (INT)))
+;;   (set (REG) (MEM (SP)))
+;;   (set (REG) (MEM (PLUS (SP) (INT)))) {optionally repeated}
+;;  ]
+
+(define_special_predicate "rx_load_multiple_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != PLUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      || ! MEM_P (SET_SRC (element))
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (element));
+
+  /* Check that the remaining elements use SP+<disp>
+     addressing and incremental register numbers.  */
+  for (i = 2; i < count; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || REGNO (SET_DEST (element)) != dest_regno + (i - 1)
+	  || ! MEM_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (element), 0)) != PLUS
+          || ! REG_P (XEXP (XEXP (SET_SRC (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_SRC (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_SRC (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (element), 0), 1))
+	     != (i - 1) * GET_MODE_SIZE (SImode))
+	return false;
+    }
+  return true;
+})
+
+;; Return true if OP is a pop-and-return load multiple operation.
+;; This looks like:
+;;  [(set (SP) (PLUS (SP) (INT)))
+;;   (set (REG) (MEM (SP)))
+;;   (set (REG) (MEM (PLUS (SP) (INT)))) {optional and possibly repeated}
+;;   (return)
+;;  ]
+
+(define_special_predicate "rx_rtsd_vector"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx element;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 2)
+    return false;
+
+  /* Check that the first element of the vector is the stack adjust.  */
+  element = XVECEXP (op, 0, 0);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      ||   REGNO (SET_DEST (element)) != SP_REG
+      ||   GET_CODE (SET_SRC (element)) != PLUS
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG
+      || ! CONST_INT_P (XEXP (SET_SRC (element), 1)))
+    return false;
+	 
+  /* Check that the next element is the first push.  */
+  element = XVECEXP (op, 0, 1);
+  if (   ! SET_P (element)
+      || ! REG_P (SET_DEST (element))
+      || ! MEM_P (SET_SRC (element))
+      || ! REG_P (XEXP (SET_SRC (element), 0))
+      ||   REGNO (XEXP (SET_SRC (element), 0)) != SP_REG)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (element));
+
+  /* Check that the remaining elements, if any, and except
+     for the last one, use SP+<disp> addressing and incremental
+     register numbers.  */
+  for (i = 2; i < count - 1; i++)
+    {
+      element = XVECEXP (op, 0, i);
+
+      if (   ! SET_P (element)
+	  || ! REG_P (SET_DEST (element))
+	  || GET_MODE (SET_DEST (element)) != SImode
+	  || REGNO (SET_DEST (element)) != dest_regno + (i - 1)
+	  || ! MEM_P (SET_SRC (element))
+	  || GET_MODE (SET_SRC (element)) != SImode
+	  || GET_CODE (XEXP (SET_SRC (element), 0)) != PLUS
+          || ! REG_P (XEXP (XEXP (SET_SRC (element), 0), 0))
+          ||   REGNO (XEXP (XEXP (SET_SRC (element), 0), 0)) != SP_REG
+	  || ! CONST_INT_P (XEXP (XEXP (SET_SRC (element), 0), 1))
+	  || INTVAL (XEXP (XEXP (SET_SRC (element), 0), 1))
+	     != (i - 1) * GET_MODE_SIZE (SImode))
+	return false;
+    }
+
+  /* The last element must be a RETURN.  */    
+  element = XVECEXP (op, 0, count - 1);
+  return GET_CODE (element) == RETURN;
+})
+
+(define_predicate "label_ref_operand"
+  (match_code "label_ref")
+)
+
+(define_predicate "rx_z_comparison_operator"
+  (match_code "eq,ne")
+)
+
+(define_predicate "rx_zs_comparison_operator"
+  (match_code "eq,ne")
+)
+
+;; GT and LE omitted due to operand swap required.
+(define_predicate "rx_fp_comparison_operator"
+  (match_code "eq,ne,lt,ge,ordered,unordered")
+)
+
+(define_predicate "rshift_operator"
+  (match_code "ashiftrt,lshiftrt")
+)
diff --git a/gcc/config/rx/rx-modes.def b/gcc/config/rx/rx-modes.def
new file mode 100644
index 000000000..31e3225c6
--- /dev/null
+++ b/gcc/config/rx/rx-modes.def
@@ -0,0 +1,25 @@
+/* Definitions of target specific machine modes for the RX.
+   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+CC_MODE (CC_ZS);
+CC_MODE (CC_ZSO);
+CC_MODE (CC_ZSC);
+
+CC_MODE (CC_F);		/* fcmp */
diff --git a/gcc/config/rx/rx-protos.h b/gcc/config/rx/rx-protos.h
new file mode 100644
index 000000000..8c8862ef2
--- /dev/null
+++ b/gcc/config/rx/rx-protos.h
@@ -0,0 +1,46 @@
+/* Exported function prototypes from the Renesas RX backend.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RX_PROTOS_H
+#define GCC_RX_PROTOS_H
+
+/* A few abbreviations to make the prototypes shorter.  */
+#define Mmode 	enum machine_mode
+#define Fargs	CUMULATIVE_ARGS
+#define Rcode	enum rtx_code
+
+extern void		rx_expand_prologue (void);
+extern int		rx_initial_elimination_offset (int, int);
+
+#ifdef RTX_CODE
+extern int 		rx_align_for_label (rtx, int);
+extern void             rx_emit_stack_popm (rtx *, bool);
+extern void             rx_emit_stack_pushm (rtx *);
+extern void		rx_expand_epilogue (bool);
+extern char *		rx_gen_move_template (rtx *, bool);
+extern bool		rx_is_legitimate_constant (rtx);
+extern bool		rx_is_restricted_memory_address (rtx, Mmode);
+extern bool		rx_match_ccmode (rtx, Mmode);
+extern void		rx_notice_update_cc (rtx, rtx);
+extern void		rx_split_cbranch (Mmode, Rcode, rtx, rtx, rtx);
+extern Mmode		rx_select_cc_mode (Rcode, rtx, rtx);
+#endif
+
+#endif /* GCC_RX_PROTOS_H */
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
new file mode 100644
index 000000000..392e2ff80
--- /dev/null
+++ b/gcc/config/rx/rx.c
@@ -0,0 +1,2935 @@
+/* Subroutines used for code generation on Renesas RX processors.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* To Do:
+
+ * Re-enable memory-to-memory copies and fix up reload.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "toplev.h"
+#include "reload.h"
+#include "df.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+
+static void rx_print_operand (FILE *, rtx, int);
+
+#define CC_FLAG_S	(1 << 0)
+#define CC_FLAG_Z	(1 << 1)
+#define CC_FLAG_O	(1 << 2)
+#define CC_FLAG_C	(1 << 3)
+#define CC_FLAG_FP	(1 << 4)	/* Fake, to differentiate CC_Fmode.  */
+
+static unsigned int flags_from_mode (enum machine_mode mode);
+static unsigned int flags_from_code (enum rtx_code code);
+
+enum rx_cpu_types  rx_cpu_type = RX600;
+
+/* Return true if OP is a reference to an object in a small data area.  */
+
+static bool
+rx_small_data_operand (rtx op)
+{
+  if (rx_small_data_limit == 0)
+    return false;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return SYMBOL_REF_SMALL_P (op);
+
+  return false;
+}
+
+static bool
+rx_is_legitimate_address (Mmode mode, rtx x, bool strict ATTRIBUTE_UNUSED)
+{
+  if (RTX_OK_FOR_BASE (x, strict))
+    /* Register Indirect.  */
+    return true;
+
+  if ((GET_MODE_SIZE (mode) == 4
+       || GET_MODE_SIZE (mode) == 2
+       || GET_MODE_SIZE (mode) == 1)
+      && (GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC))
+    /* Pre-decrement Register Indirect or
+       Post-increment Register Indirect.  */
+    return RTX_OK_FOR_BASE (XEXP (x, 0), strict);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx arg1 = XEXP (x, 0);
+      rtx arg2 = XEXP (x, 1);
+      rtx index = NULL_RTX;
+
+      if (REG_P (arg1) && RTX_OK_FOR_BASE (arg1, strict))
+	index = arg2;
+      else if (REG_P (arg2) && RTX_OK_FOR_BASE (arg2, strict))
+	index = arg1;
+      else
+	return false;
+
+      switch (GET_CODE (index))
+	{
+	case CONST_INT:
+	  {
+	    /* Register Relative: REG + INT.
+	       Only positive, mode-aligned, mode-sized
+	       displacements are allowed.  */
+	    HOST_WIDE_INT val = INTVAL (index);
+	    int factor;
+
+	    if (val < 0)
+	      return false;
+	    
+	    switch (GET_MODE_SIZE (mode))
+	      {
+	      default: 
+	      case 4: factor = 4; break;
+	      case 2: factor = 2; break;
+	      case 1: factor = 1; break;
+	      }
+
+	    if (val > (65535 * factor))
+	      return false;
+	    return (val % factor) == 0;
+	  }
+
+	case REG:
+	  /* Unscaled Indexed Register Indirect: REG + REG
+	     Size has to be "QI", REG has to be valid.  */
+	  return GET_MODE_SIZE (mode) == 1 && RTX_OK_FOR_BASE (index, strict);
+
+	case MULT:
+	  {
+	    /* Scaled Indexed Register Indirect: REG + (REG * FACTOR)
+	       Factor has to equal the mode size, REG has to be valid.  */
+	    rtx factor;
+
+	    factor = XEXP (index, 1);
+	    index = XEXP (index, 0);
+
+	    return REG_P (index)
+	      && RTX_OK_FOR_BASE (index, strict)
+	      && CONST_INT_P (factor)
+	      && GET_MODE_SIZE (mode) == INTVAL (factor);
+	  }
+
+	default:
+	  return false;
+	}
+    }
+
+  /* Small data area accesses turn into register relative offsets.  */
+  return rx_small_data_operand (x);
+}
+
+/* Returns TRUE for simple memory addreses, ie ones
+   that do not involve register indirect addressing
+   or pre/post increment/decrement.  */
+
+bool
+rx_is_restricted_memory_address (rtx mem, enum machine_mode mode)
+{
+  rtx base, index;
+
+  if (! rx_is_legitimate_address
+      (mode, mem, reload_in_progress || reload_completed))
+    return false;
+
+  switch (GET_CODE (mem))
+    {
+    case REG:
+      /* Simple memory addresses are OK.  */
+      return true;
+
+    case PRE_DEC:
+    case POST_INC:
+      return false;
+
+    case PLUS:
+      /* Only allow REG+INT addressing.  */
+      base = XEXP (mem, 0);
+      index = XEXP (mem, 1);
+
+      if (! RX_REG_P (base) || ! CONST_INT_P (index))
+	  return false;
+
+      return IN_RANGE (INTVAL (index), 0, (0x10000 * GET_MODE_SIZE (mode)) - 1);
+
+    case SYMBOL_REF:
+      /* Can happen when small data is being supported.
+         Assume that it will be resolved into GP+INT.  */
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P.  */
+
+static bool
+rx_mode_dependent_address_p (const_rtx addr)
+{
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+      /* --REG and REG++ only work in SImode.  */
+    case PRE_DEC:
+    case POST_INC:
+      return true;
+
+    case MINUS:
+    case PLUS:
+      if (! REG_P (XEXP (addr, 0)))
+	return true;
+
+      addr = XEXP (addr, 1);
+
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	  /* REG+REG only works in SImode.  */
+	  return true;
+
+	case CONST_INT:
+	  /* REG+INT is only mode independent if INT is a
+	     multiple of 4, positive and will fit into 8-bits.  */
+	  if (((INTVAL (addr) & 3) == 0)
+	      && IN_RANGE (INTVAL (addr), 4, 252))
+	    return false;
+	  return true;
+
+	case SYMBOL_REF:
+	case LABEL_REF:
+	  return true;
+
+	case MULT:
+	  gcc_assert (REG_P (XEXP (addr, 0)));
+	  gcc_assert (CONST_INT_P (XEXP (addr, 1)));
+	  /* REG+REG*SCALE is always mode dependent.  */
+	  return true;
+
+	default:
+	  /* Not recognized, so treat as mode dependent.  */
+	  return true;
+	}
+
+    case CONST_INT:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case REG:
+      /* These are all mode independent.  */
+      return false;
+
+    default:
+      /* Everything else is unrecognized,
+	 so treat as mode dependent.  */
+      return true;
+    }
+}
+
+/* A C compound statement to output to stdio stream FILE the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  */
+
+static void
+rx_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "[");
+      rx_print_operand (file, addr, 0);
+      fprintf (file, "]");
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "[-");
+      rx_print_operand (file, XEXP (addr, 0), 0);
+      fprintf (file, "]");
+      break;
+
+    case POST_INC:
+      fprintf (file, "[");
+      rx_print_operand (file, XEXP (addr, 0), 0);
+      fprintf (file, "+]");
+      break;
+
+    case PLUS:
+      {
+	rtx arg1 = XEXP (addr, 0);
+	rtx arg2 = XEXP (addr, 1);
+	rtx base, index;
+
+	if (REG_P (arg1) && RTX_OK_FOR_BASE (arg1, true))
+	  base = arg1, index = arg2;
+	else if (REG_P (arg2) && RTX_OK_FOR_BASE (arg2, true))
+	  base = arg2, index = arg1;
+	else
+	  {
+	    rx_print_operand (file, arg1, 0);
+	    fprintf (file, " + ");
+	    rx_print_operand (file, arg2, 0);
+	    break;
+	  }
+
+	if (REG_P (index) || GET_CODE (index) == MULT)
+	  {
+	    fprintf (file, "[");
+	    rx_print_operand (file, index, 'A');
+	    fprintf (file, ",");
+	  }
+	else /* GET_CODE (index) == CONST_INT  */
+	  {
+	    rx_print_operand (file, index, 'A');
+	    fprintf (file, "[");
+	  }
+	rx_print_operand (file, base, 0);
+	fprintf (file, "]");
+	break;
+      }
+
+    case CONST:
+      if (GET_CODE (XEXP (addr, 0)) == UNSPEC)
+	{
+	  addr = XEXP (addr, 0);
+	  gcc_assert (XINT (addr, 1) == UNSPEC_CONST);
+      
+	  addr = XVECEXP (addr, 0, 0);
+	  gcc_assert (CONST_INT_P (addr));
+	}
+      /* Fall through.  */
+    case LABEL_REF:
+    case SYMBOL_REF:
+      fprintf (file, "#");
+
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static void
+rx_print_integer (FILE * file, HOST_WIDE_INT val)
+{
+  if (IN_RANGE (val, -64, 64))
+    fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+  else
+    fprintf (file,
+	     TARGET_AS100_SYNTAX
+	     ? "0%" HOST_WIDE_INT_PRINT "xH" : HOST_WIDE_INT_PRINT_HEX,
+	     val);
+}
+
+static bool
+rx_assemble_integer (rtx x, unsigned int size, int is_aligned)
+{
+  const char *  op = integer_asm_op (size, is_aligned);
+
+  if (! CONST_INT_P (x))
+    return default_assemble_integer (x, size, is_aligned);
+
+  if (op == NULL)
+    return false;
+  fputs (op, asm_out_file);
+
+  rx_print_integer (asm_out_file, INTVAL (x));
+  fputc ('\n', asm_out_file);
+  return true;
+}
+
+
+/* Handles the insertion of a single operand into the assembler output.
+   The %<letter> directives supported are:
+
+     %A  Print an operand without a leading # character.
+     %B  Print an integer comparison name.
+     %C  Print a control register name.
+     %F  Print a condition code flag name.
+     %H  Print high part of a DImode register, integer or address.
+     %L  Print low part of a DImode register, integer or address.
+     %N  Print the negation of the immediate value.
+     %Q  If the operand is a MEM, then correctly generate
+         register indirect or register relative addressing.
+     %R  Like %Q but for zero-extending loads.  */
+
+static void
+rx_print_operand (FILE * file, rtx op, int letter)
+{
+  bool unsigned_load = false;
+
+  switch (letter)
+    {
+    case 'A':
+      /* Print an operand without a leading #.  */
+      if (MEM_P (op))
+	op = XEXP (op, 0);
+
+      switch (GET_CODE (op))
+	{
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  output_addr_const (file, op);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "%ld", (long) INTVAL (op));
+	  break;
+	default:
+	  rx_print_operand (file, op, 0);
+	  break;
+	}
+      break;
+
+    case 'B':
+      {
+	enum rtx_code code = GET_CODE (op);
+	enum machine_mode mode = GET_MODE (XEXP (op, 0));
+	const char *ret;
+
+	if (mode == CC_Fmode)
+	  {
+	    /* C flag is undefined, and O flag carries unordered.  None of the
+	       branch combinations that include O use it helpfully.  */
+	    switch (code)
+	      {
+	      case ORDERED:
+		ret = "no";
+		break;
+	      case UNORDERED:
+		ret = "o";
+		break;
+	      case LT:
+		ret = "n";
+		break;
+	      case GE:
+		ret = "pz";
+		break;
+	      case EQ:
+		ret = "eq";
+		break;
+	      case NE:
+		ret = "ne";
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	  }
+	else
+	  {
+	    unsigned int flags = flags_from_mode (mode);
+
+	    switch (code)
+	      {
+	      case LT:
+		ret = (flags & CC_FLAG_O ? "lt" : "n");
+		break;
+	      case GE:
+		ret = (flags & CC_FLAG_O ? "ge" : "pz");
+		break;
+	      case GT:
+		ret = "gt";
+		break;
+	      case LE:
+		ret = "le";
+		break;
+	      case GEU:
+		ret = "geu";
+		break;
+	      case LTU:
+		ret = "ltu";
+		break;
+	      case GTU:
+		ret = "gtu";
+		break;
+	      case LEU:
+		ret = "leu";
+		break;
+	      case EQ:
+		ret = "eq";
+		break;
+	      case NE:
+		ret = "ne";
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+	    gcc_checking_assert ((flags_from_code (code) & ~flags) == 0);
+	  }
+	fputs (ret, file);
+	break;
+      }
+
+    case 'C':
+      gcc_assert (CONST_INT_P (op));
+      switch (INTVAL (op))
+	{
+	case 0:   fprintf (file, "psw"); break;
+	case 2:   fprintf (file, "usp"); break;
+	case 3:   fprintf (file, "fpsw"); break;
+	case 4:   fprintf (file, "cpen"); break;
+	case 8:   fprintf (file, "bpsw"); break;
+	case 9:   fprintf (file, "bpc"); break;
+	case 0xa: fprintf (file, "isp"); break;
+	case 0xb: fprintf (file, "fintv"); break;
+	case 0xc: fprintf (file, "intb"); break;
+	default:
+	  warning (0, "unreocgnized control register number: %d - using 'psw'",
+		   (int) INTVAL (op));
+	  fprintf (file, "psw");
+	  break;
+	}
+      break;
+
+    case 'F':
+      gcc_assert (CONST_INT_P (op));
+      switch (INTVAL (op))
+	{
+	case 0: case 'c': case 'C': fprintf (file, "C"); break;
+	case 1:	case 'z': case 'Z': fprintf (file, "Z"); break;
+	case 2: case 's': case 'S': fprintf (file, "S"); break;
+	case 3: case 'o': case 'O': fprintf (file, "O"); break;
+	case 8: case 'i': case 'I': fprintf (file, "I"); break;
+	case 9: case 'u': case 'U': fprintf (file, "U"); break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'H':
+      switch (GET_CODE (op))
+	{
+	case REG:
+	  fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
+	  break;
+	case CONST_INT:
+	  {
+	    HOST_WIDE_INT v = INTVAL (op);
+
+	    fprintf (file, "#");
+	    /* Trickery to avoid problems with shifting 32 bits at a time.  */
+	    v = v >> 16;
+	    v = v >> 16;	  
+	    rx_print_integer (file, v);
+	    break;
+	  }
+	case CONST_DOUBLE:
+	  fprintf (file, "#");
+	  rx_print_integer (file, CONST_DOUBLE_HIGH (op));
+	  break;
+	case MEM:
+	  if (! WORDS_BIG_ENDIAN)
+	    op = adjust_address (op, SImode, 4);
+	  output_address (XEXP (op, 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'L':
+      switch (GET_CODE (op))
+	{
+	case REG:
+	  fprintf (file, "%s", reg_names [REGNO (op) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
+	  break;
+	case CONST_INT:
+	  fprintf (file, "#");
+	  rx_print_integer (file, INTVAL (op) & 0xffffffff);
+	  break;
+	case CONST_DOUBLE:
+	  fprintf (file, "#");
+	  rx_print_integer (file, CONST_DOUBLE_LOW (op));
+	  break;
+	case MEM:
+	  if (WORDS_BIG_ENDIAN)
+	    op = adjust_address (op, SImode, 4);
+	  output_address (XEXP (op, 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'N':
+      gcc_assert (CONST_INT_P (op));
+      fprintf (file, "#");
+      rx_print_integer (file, - INTVAL (op));
+      break;
+
+    case 'R':
+      gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4);
+      unsigned_load = true;
+      /* Fall through.  */
+    case 'Q':
+      if (MEM_P (op))
+	{
+	  HOST_WIDE_INT offset;
+	  rtx mem = op;
+
+	  op = XEXP (op, 0);
+
+	  if (REG_P (op))
+	    offset = 0;
+	  else if (GET_CODE (op) == PLUS)
+	    {
+	      rtx displacement;
+
+	      if (REG_P (XEXP (op, 0)))
+		{
+		  displacement = XEXP (op, 1);
+		  op = XEXP (op, 0);
+		}
+	      else
+		{
+		  displacement = XEXP (op, 0);
+		  op = XEXP (op, 1);
+		  gcc_assert (REG_P (op));
+		}
+
+	      gcc_assert (CONST_INT_P (displacement));
+	      offset = INTVAL (displacement);
+	      gcc_assert (offset >= 0);
+
+	      fprintf (file, "%ld", offset);
+	    }
+	  else
+	    gcc_unreachable ();
+
+	  fprintf (file, "[");
+	  rx_print_operand (file, op, 0);
+	  fprintf (file, "].");
+
+	  switch (GET_MODE_SIZE (GET_MODE (mem)))
+	    {
+	    case 1:
+	      gcc_assert (offset <= 65535 * 1);
+	      fprintf (file, unsigned_load ? "UB" : "B");
+	      break;
+	    case 2:
+	      gcc_assert (offset % 2 == 0);
+	      gcc_assert (offset <= 65535 * 2);
+	      fprintf (file, unsigned_load ? "UW" : "W");
+	      break;
+	    case 4:
+	      gcc_assert (offset % 4 == 0);
+	      gcc_assert (offset <= 65535 * 4);
+	      fprintf (file, "L");
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  break;
+	}
+
+      /* Fall through.  */
+
+    default:
+      switch (GET_CODE (op))
+	{
+	case MULT:
+	  /* Should be the scaled part of an
+	     indexed register indirect address.  */
+	  {
+	    rtx base = XEXP (op, 0);
+	    rtx index = XEXP (op, 1);
+
+	    /* Check for a swaped index register and scaling factor.
+	       Not sure if this can happen, but be prepared to handle it.  */
+	    if (CONST_INT_P (base) && REG_P (index))
+	      {
+		rtx tmp = base;
+		base = index;
+		index = tmp;
+	      }
+
+	    gcc_assert (REG_P (base));
+	    gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
+	    gcc_assert (CONST_INT_P (index));
+	    /* Do not try to verify the value of the scalar as it is based
+	       on the mode of the MEM not the mode of the MULT.  (Which
+	       will always be SImode).  */
+	    fprintf (file, "%s", reg_names [REGNO (base)]);
+	    break;
+	  }
+
+	case MEM:
+	  output_address (XEXP (op, 0));
+	  break;
+
+	case PLUS:
+	  output_address (op);
+	  break;
+
+	case REG:
+	  gcc_assert (REGNO (op) < FIRST_PSEUDO_REGISTER);
+	  fprintf (file, "%s", reg_names [REGNO (op)]);
+	  break;
+
+	case SUBREG:
+	  gcc_assert (subreg_regno (op) < FIRST_PSEUDO_REGISTER);
+	  fprintf (file, "%s", reg_names [subreg_regno (op)]);
+	  break;
+
+	  /* This will only be single precision....  */
+	case CONST_DOUBLE:
+	  {
+	    unsigned long val;
+	    REAL_VALUE_TYPE rv;
+
+	    REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
+	    REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+	    fprintf (file, TARGET_AS100_SYNTAX ? "#0%lxH" : "#0x%lx", val);
+	    break;
+	  }
+
+	case CONST_INT:
+	  fprintf (file, "#");
+	  rx_print_integer (file, INTVAL (op));
+	  break;
+
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	case CODE_LABEL:
+	case UNSPEC:
+	  rx_print_operand_address (file, op);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    }
+}
+
+/* Returns an assembler template for a move instruction.  */
+
+char *
+rx_gen_move_template (rtx * operands, bool is_movu)
+{
+  static char  out_template [64];
+  const char * extension = TARGET_AS100_SYNTAX ? ".L" : "";
+  const char * src_template;
+  const char * dst_template;
+  rtx          dest = operands[0];
+  rtx          src  = operands[1];
+
+  /* Decide which extension, if any, should be given to the move instruction.  */
+  switch (CONST_INT_P (src) ? GET_MODE (dest) : GET_MODE (src))
+    {
+    case QImode:
+      /* The .B extension is not valid when
+	 loading an immediate into a register.  */
+      if (! REG_P (dest) || ! CONST_INT_P (src))
+	extension = ".B";
+      break;
+    case HImode:
+      if (! REG_P (dest) || ! CONST_INT_P (src))
+	/* The .W extension is not valid when
+	   loading an immediate into a register.  */
+	extension = ".W";
+      break;
+    case SFmode:
+    case SImode:
+      extension = ".L";
+      break;
+    case VOIDmode:
+      /* This mode is used by constants.  */
+      break;
+    default:
+      debug_rtx (src);
+      gcc_unreachable ();
+    }
+
+  if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0)))
+    src_template = "%%gp(%A1)[r13]";
+  else
+    src_template = "%1";
+
+  if (MEM_P (dest) && rx_small_data_operand (XEXP (dest, 0)))
+    dst_template = "%%gp(%A0)[r13]";
+  else
+    dst_template = "%0";
+
+  sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
+	   extension, src_template, dst_template);
+  return out_template;
+}
+
+/* Return VALUE rounded up to the next ALIGNMENT boundary.  */
+
+static inline unsigned int
+rx_round_up (unsigned int value, unsigned int alignment)
+{
+  alignment -= 1;
+  return (value + alignment) & (~ alignment);
+}
+
+/* Return the number of bytes in the argument registers
+   occupied by an argument of type TYPE and mode MODE.  */
+
+static unsigned int
+rx_function_arg_size (Mmode mode, const_tree type)
+{
+  unsigned int num_bytes;
+
+  num_bytes = (mode == BLKmode)
+    ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  return rx_round_up (num_bytes, UNITS_PER_WORD);
+}
+
+#define NUM_ARG_REGS		4
+#define MAX_NUM_ARG_BYTES	(NUM_ARG_REGS * UNITS_PER_WORD)
+
+/* Return an RTL expression describing the register holding a function
+   parameter of mode MODE and type TYPE or NULL_RTX if the parameter should
+   be passed on the stack.  CUM describes the previous parameters to the
+   function and NAMED is false if the parameter is part of a variable
+   parameter list, or the last named parameter before the start of a
+   variable parameter list.  */
+
+static rtx
+rx_function_arg (Fargs * cum, Mmode mode, const_tree type, bool named)
+{
+  unsigned int next_reg;
+  unsigned int bytes_so_far = *cum;
+  unsigned int size;
+  unsigned int rounded_size;
+
+  /* An exploded version of rx_function_arg_size.  */
+  size = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  /* If the size is not known it cannot be passed in registers.  */
+  if (size < 1)
+    return NULL_RTX;
+
+  rounded_size = rx_round_up (size, UNITS_PER_WORD);
+
+  /* Don't pass this arg via registers if there
+     are insufficient registers to hold all of it.  */
+  if (rounded_size + bytes_so_far > MAX_NUM_ARG_BYTES)
+    return NULL_RTX;
+
+  /* Unnamed arguments and the last named argument in a
+     variadic function are always passed on the stack.  */
+  if (!named)
+    return NULL_RTX;
+
+  /* Structures must occupy an exact number of registers,
+     otherwise they are passed on the stack.  */
+  if ((type == NULL || AGGREGATE_TYPE_P (type))
+      && (size % UNITS_PER_WORD) != 0)
+    return NULL_RTX;
+
+  next_reg = (bytes_so_far / UNITS_PER_WORD) + 1;
+
+  return gen_rtx_REG (mode, next_reg);
+}
+
+static void
+rx_function_arg_advance (Fargs * cum, Mmode mode, const_tree type,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  *cum += rx_function_arg_size (mode, type);
+}
+
+static unsigned int
+rx_function_arg_boundary (Mmode mode ATTRIBUTE_UNUSED,
+			  const_tree type ATTRIBUTE_UNUSED)
+{
+  return 32;
+}
+
+/* Return an RTL describing where a function return value of type RET_TYPE
+   is held.  */
+
+static rtx
+rx_function_value (const_tree ret_type,
+		   const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		   bool       outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (ret_type);
+
+  /* RX ABI specifies that small integer types are
+     promoted to int when returned by a function.  */
+  if (GET_MODE_SIZE (mode) > 0
+      && GET_MODE_SIZE (mode) < 4
+      && ! COMPLEX_MODE_P (mode)
+      )
+    return gen_rtx_REG (SImode, FUNC_RETURN_REGNUM);
+    
+  return gen_rtx_REG (mode, FUNC_RETURN_REGNUM);
+}
+
+/* TARGET_PROMOTE_FUNCTION_MODE must behave in the same way with
+   regard to function returns as does TARGET_FUNCTION_VALUE.  */
+
+static enum machine_mode
+rx_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+			  enum machine_mode mode,
+			  int * punsignedp ATTRIBUTE_UNUSED,
+			  const_tree funtype ATTRIBUTE_UNUSED,
+			  int for_return)
+{
+  if (for_return != 1
+      || GET_MODE_SIZE (mode) >= 4
+      || COMPLEX_MODE_P (mode)
+      || GET_MODE_SIZE (mode) < 1)
+    return mode;
+
+  return SImode;
+}
+
+static bool
+rx_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size;
+
+  if (TYPE_MODE (type) != BLKmode
+      && ! AGGREGATE_TYPE_P (type))
+    return false;
+
+  size = int_size_in_bytes (type);
+  /* Large structs and those whose size is not an
+     exact multiple of 4 are returned in memory.  */
+  return size < 1
+    || size > 16
+    || (size % UNITS_PER_WORD) != 0;
+}
+
+static rtx
+rx_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
+		     int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, STRUCT_VAL_REGNUM);
+}
+
+static bool
+rx_return_in_msb (const_tree valtype)
+{
+  return TARGET_BIG_ENDIAN_DATA
+    && (AGGREGATE_TYPE_P (valtype) || TREE_CODE (valtype) == COMPLEX_TYPE);
+}
+
+/* Returns true if the provided function has the specified attribute.  */
+
+static inline bool
+has_func_attr (const_tree decl, const char * func_attr)
+{
+  if (decl == NULL_TREE)
+    decl = current_function_decl;
+
+  return lookup_attribute (func_attr, DECL_ATTRIBUTES (decl)) != NULL_TREE;
+}
+
+/* Returns true if the provided function has the "fast_interrupt" attribute.  */
+
+static inline bool
+is_fast_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "fast_interrupt");
+}
+
+/* Returns true if the provided function has the "interrupt" attribute.  */
+
+static inline bool
+is_interrupt_func (const_tree decl)
+{
+  return has_func_attr (decl, "interrupt");
+}
+
+/* Returns true if the provided function has the "naked" attribute.  */
+
+static inline bool
+is_naked_func (const_tree decl)
+{
+  return has_func_attr (decl, "naked");
+}
+
+static bool use_fixed_regs = false;
+
+static void
+rx_conditional_register_usage (void)
+{
+  static bool using_fixed_regs = false;
+
+  if (rx_small_data_limit > 0)
+    fixed_regs[GP_BASE_REGNUM] = call_used_regs [GP_BASE_REGNUM] = 1;
+
+  if (use_fixed_regs != using_fixed_regs)
+    {
+      static char saved_fixed_regs[FIRST_PSEUDO_REGISTER];
+      static char saved_call_used_regs[FIRST_PSEUDO_REGISTER];
+
+      if (use_fixed_regs)
+	{
+	  unsigned int r;
+
+	  memcpy (saved_fixed_regs, fixed_regs, sizeof fixed_regs);
+	  memcpy (saved_call_used_regs, call_used_regs, sizeof call_used_regs);
+
+	  /* This is for fast interrupt handlers.  Any register in
+	     the range r10 to r13 (inclusive) that is currently
+	     marked as fixed is now a viable, call-used register.  */	  
+	  for (r = 10; r <= 13; r++)
+	    if (fixed_regs[r])
+	      {
+		fixed_regs[r] = 0;
+		call_used_regs[r] = 1;
+	      }
+
+	  /* Mark r7 as fixed.  This is just a hack to avoid
+	     altering the reg_alloc_order array so that the newly
+	     freed r10-r13 registers are the preferred registers.  */
+	  fixed_regs[7] = call_used_regs[7] = 1;
+	}
+      else
+	{
+	  /* Restore the normal register masks.  */
+	  memcpy (fixed_regs, saved_fixed_regs, sizeof fixed_regs);
+	  memcpy (call_used_regs, saved_call_used_regs, sizeof call_used_regs);
+	}
+
+      using_fixed_regs = use_fixed_regs;
+    }
+}
+
+/* Perform any actions necessary before starting to compile FNDECL.
+   For the RX we use this to make sure that we have the correct
+   set of register masks selected.  If FNDECL is NULL then we are
+   compiling top level things.  */
+
+static void
+rx_set_current_function (tree fndecl)
+{
+  /* Remember the last target of rx_set_current_function.  */
+  static tree rx_previous_fndecl;
+  bool prev_was_fast_interrupt;
+  bool current_is_fast_interrupt;
+
+  /* Only change the context if the function changes.  This hook is called
+     several times in the course of compiling a function, and we don't want
+     to slow things down too much or call target_reinit when it isn't safe.  */
+  if (fndecl == rx_previous_fndecl)
+    return;
+
+  prev_was_fast_interrupt
+    = rx_previous_fndecl
+    ? is_fast_interrupt_func (rx_previous_fndecl) : false;
+
+  current_is_fast_interrupt
+    = fndecl ? is_fast_interrupt_func (fndecl) : false;
+      
+  if (prev_was_fast_interrupt != current_is_fast_interrupt)
+    {
+      use_fixed_regs = current_is_fast_interrupt;
+      target_reinit ();
+    }
+
+  rx_previous_fndecl = fndecl;
+}
+
+/* Typical stack layout should looks like this after the function's prologue:
+
+                            |    |
+                              --                       ^
+                            |    | \                   |
+                            |    |   arguments saved   | Increasing
+                            |    |   on the stack      |  addresses
+    PARENT   arg pointer -> |    | /
+  -------------------------- ---- -------------------
+    CHILD                   |ret |   return address
+                              --
+                            |    | \
+                            |    |   call saved
+                            |    |   registers
+			    |    | /
+                              --
+                            |    | \
+                            |    |   local
+                            |    |   variables
+        frame pointer ->    |    | /
+                              --
+                            |    | \
+                            |    |   outgoing          | Decreasing
+                            |    |   arguments         |  addresses
+   current stack pointer -> |    | /                   |
+  -------------------------- ---- ------------------   V
+                            |    |                 */
+
+static unsigned int
+bit_count (unsigned int x)
+{
+  const unsigned int m1 = 0x55555555;
+  const unsigned int m2 = 0x33333333;
+  const unsigned int m4 = 0x0f0f0f0f;
+
+  x -= (x >> 1) & m1;
+  x = (x & m2) + ((x >> 2) & m2);
+  x = (x + (x >> 4)) & m4;
+  x += x >>  8;
+
+  return (x + (x >> 16)) & 0x3f;
+}
+
+#define MUST_SAVE_ACC_REGISTER			\
+  (TARGET_SAVE_ACC_REGISTER			\
+   && (is_interrupt_func (NULL_TREE)		\
+       || is_fast_interrupt_func (NULL_TREE)))
+
+/* Returns either the lowest numbered and highest numbered registers that
+   occupy the call-saved area of the stack frame, if the registers are
+   stored as a contiguous block, or else a bitmask of the individual
+   registers if they are stored piecemeal.
+
+   Also computes the size of the frame and the size of the outgoing
+   arguments block (in bytes).  */
+
+static void
+rx_get_stack_layout (unsigned int * lowest,
+		     unsigned int * highest,
+		     unsigned int * register_mask,
+		     unsigned int * frame_size,
+		     unsigned int * stack_size)
+{
+  unsigned int reg;
+  unsigned int low;
+  unsigned int high;
+  unsigned int fixed_reg = 0;
+  unsigned int save_mask;
+  unsigned int pushed_mask;
+  unsigned int unneeded_pushes;
+
+  if (is_naked_func (NULL_TREE))
+    {
+      /* Naked functions do not create their own stack frame.
+	 Instead the programmer must do that for us.  */
+      * lowest = 0;
+      * highest = 0;
+      * register_mask = 0;
+      * frame_size = 0;
+      * stack_size = 0;
+      return;
+    }
+
+  for (save_mask = high = low = 0, reg = 1; reg < CC_REGNUM; reg++)
+    {
+      if ((df_regs_ever_live_p (reg)
+	   /* Always save all call clobbered registers inside non-leaf
+	      interrupt handlers, even if they are not live - they may
+	      be used in (non-interrupt aware) routines called from this one.  */
+	   || (call_used_regs[reg]
+	       && is_interrupt_func (NULL_TREE)
+	       && ! current_function_is_leaf))
+	  && (! call_used_regs[reg]
+	      /* Even call clobbered registered must
+		 be pushed inside interrupt handlers.  */
+	      || is_interrupt_func (NULL_TREE)
+	      /* Likewise for fast interrupt handlers, except registers r10 -
+		 r13.  These are normally call-saved, but may have been set
+		 to call-used by rx_conditional_register_usage.  If so then
+		 they can be used in the fast interrupt handler without
+		 saving them on the stack.  */
+	      || (is_fast_interrupt_func (NULL_TREE)
+		  && ! IN_RANGE (reg, 10, 13))))
+	{
+	  if (low == 0)
+	    low = reg;
+	  high = reg;
+
+	  save_mask |= 1 << reg;
+	}
+
+      /* Remember if we see a fixed register
+	 after having found the low register.  */
+      if (low != 0 && fixed_reg == 0 && fixed_regs [reg])
+	fixed_reg = reg;
+    }
+
+  /* If we have to save the accumulator register, make sure
+     that at least two registers are pushed into the frame.  */
+  if (MUST_SAVE_ACC_REGISTER
+      && bit_count (save_mask) < 2)
+    {
+      save_mask |= (1 << 13) | (1 << 14);
+      if (low == 0)
+	low = 13;
+      if (high == 0 || low == high)
+	high = low + 1;
+    }
+
+  /* Decide if it would be faster fill in the call-saved area of the stack
+     frame using multiple PUSH instructions instead of a single PUSHM
+     instruction.
+
+     SAVE_MASK is a bitmask of the registers that must be stored in the
+     call-save area.  PUSHED_MASK is a bitmask of the registers that would
+     be pushed into the area if we used a PUSHM instruction.  UNNEEDED_PUSHES
+     is a bitmask of those registers in pushed_mask that are not in
+     save_mask.
+
+     We use a simple heuristic that says that it is better to use
+     multiple PUSH instructions if the number of unnecessary pushes is
+     greater than the number of necessary pushes.
+
+     We also use multiple PUSH instructions if there are any fixed registers
+     between LOW and HIGH.  The only way that this can happen is if the user
+     has specified --fixed-<reg-name> on the command line and in such
+     circumstances we do not want to touch the fixed registers at all.
+
+     FIXME: Is it worth improving this heuristic ?  */
+  pushed_mask = (-1 << low) & ~(-1 << (high + 1));
+  unneeded_pushes = (pushed_mask & (~ save_mask)) & pushed_mask;
+
+  if ((fixed_reg && fixed_reg <= high)
+      || (optimize_function_for_speed_p (cfun)
+	  && bit_count (save_mask) < bit_count (unneeded_pushes)))
+    {
+      /* Use multiple pushes.  */
+      * lowest = 0;
+      * highest = 0;
+      * register_mask = save_mask;
+    }
+  else
+    {
+      /* Use one push multiple instruction.  */
+      * lowest = low;
+      * highest = high;
+      * register_mask = 0;
+    }
+
+  * frame_size = rx_round_up
+    (get_frame_size (), STACK_BOUNDARY / BITS_PER_UNIT);
+
+  if (crtl->args.size > 0)
+    * frame_size += rx_round_up
+      (crtl->args.size, STACK_BOUNDARY / BITS_PER_UNIT);
+
+  * stack_size = rx_round_up
+    (crtl->outgoing_args_size, STACK_BOUNDARY / BITS_PER_UNIT);
+}
+
+/* Generate a PUSHM instruction that matches the given operands.  */
+
+void
+rx_emit_stack_pushm (rtx * operands)
+{
+  HOST_WIDE_INT last_reg;
+  rtx first_push;
+
+  gcc_assert (CONST_INT_P (operands[0]));
+  last_reg = (INTVAL (operands[0]) / UNITS_PER_WORD) - 1;
+
+  gcc_assert (GET_CODE (operands[1]) == PARALLEL);
+  first_push = XVECEXP (operands[1], 0, 1);
+  gcc_assert (SET_P (first_push));
+  first_push = SET_SRC (first_push);
+  gcc_assert (REG_P (first_push));
+
+  asm_fprintf (asm_out_file, "\tpushm\t%s-%s\n",
+	       reg_names [REGNO (first_push) - last_reg],
+	       reg_names [REGNO (first_push)]);
+}
+
+/* Generate a PARALLEL that will pass the rx_store_multiple_vector predicate.  */
+
+static rtx
+gen_rx_store_vector (unsigned int low, unsigned int high)
+{
+  unsigned int i;
+  unsigned int count = (high - low) + 2;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		 gen_rtx_MINUS (SImode, stack_pointer_rtx,
+				GEN_INT ((count - 1) * UNITS_PER_WORD)));
+
+  for (i = 0; i < count - 1; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (VOIDmode,
+		   gen_rtx_MEM (SImode,
+				gen_rtx_MINUS (SImode, stack_pointer_rtx,
+					       GEN_INT ((i + 1) * UNITS_PER_WORD))),
+		   gen_rtx_REG (SImode, high - i));
+  return vector;
+}
+
+/* Mark INSN as being frame related.  If it is a PARALLEL
+   then mark each element as being frame related as well.  */
+
+static void
+mark_frame_related (rtx insn)
+{
+  RTX_FRAME_RELATED_P (insn) = 1;
+  insn = PATTERN (insn);
+
+  if (GET_CODE (insn) == PARALLEL)
+    {
+      unsigned int i;
+
+      for (i = 0; i < (unsigned) XVECLEN (insn, 0); i++)
+	RTX_FRAME_RELATED_P (XVECEXP (insn, 0, i)) = 1;
+    }
+}
+
+static bool
+ok_for_max_constant (HOST_WIDE_INT val)
+{
+  if (rx_max_constant_size == 0  || rx_max_constant_size == 4)
+    /* If there is no constraint on the size of constants
+       used as operands, then any value is legitimate.  */
+    return true;
+
+  /* rx_max_constant_size specifies the maximum number
+     of bytes that can be used to hold a signed value.  */
+  return IN_RANGE (val, (-1 << (rx_max_constant_size * 8)),
+		        ( 1 << (rx_max_constant_size * 8)));
+}
+
+/* Generate an ADD of SRC plus VAL into DEST.
+   Handles the case where VAL is too big for max_constant_value.
+   Sets FRAME_RELATED_P on the insn if IS_FRAME_RELATED is true.  */
+
+static void
+gen_safe_add (rtx dest, rtx src, rtx val, bool is_frame_related)
+{
+  rtx insn;
+
+  if (val == NULL_RTX || INTVAL (val) == 0)
+    {
+      gcc_assert (dest != src);
+
+      insn = emit_move_insn (dest, src);
+    }
+  else if (ok_for_max_constant (INTVAL (val)))
+    insn = emit_insn (gen_addsi3 (dest, src, val));
+  else
+    {
+      /* Wrap VAL in an UNSPEC so that rx_is_legitimate_constant
+	 will not reject it.  */
+      val = gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_CONST));
+      insn = emit_insn (gen_addsi3 (dest, src, val));
+
+      if (is_frame_related)
+	/* We have to provide our own frame related note here
+	   as the dwarf2out code cannot be expected to grok
+	   our unspec.  */
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		      gen_rtx_SET (SImode, dest,
+				   gen_rtx_PLUS (SImode, src, val)));
+      return;
+    }
+
+  if (is_frame_related)
+    RTX_FRAME_RELATED_P (insn) = 1;
+  return;
+}
+
+void
+rx_expand_prologue (void)
+{
+  unsigned int stack_size;
+  unsigned int frame_size;
+  unsigned int mask;
+  unsigned int low;
+  unsigned int high;
+  unsigned int reg;
+  rtx insn;
+
+  /* Naked functions use their own, programmer provided prologues.  */
+  if (is_naked_func (NULL_TREE))
+    return;
+
+  rx_get_stack_layout (& low, & high, & mask, & frame_size, & stack_size);
+
+  /* If we use any of the callee-saved registers, save them now.  */
+  if (mask)
+    {
+      /* Push registers in reverse order.  */
+      for (reg = CC_REGNUM; reg --;)
+	if (mask & (1 << reg))
+	  {
+	    insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
+	    mark_frame_related (insn);
+	  }
+    }
+  else if (low)
+    {
+      if (high == low)
+	insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
+      else
+	insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
+						    * UNITS_PER_WORD),
+					   gen_rx_store_vector (low, high)));
+      mark_frame_related (insn);
+    }
+
+  if (MUST_SAVE_ACC_REGISTER)
+    {
+      unsigned int acc_high, acc_low;
+
+      /* Interrupt handlers have to preserve the accumulator
+	 register if so requested by the user.  Use the first
+         two pushed registers as intermediaries.  */
+      if (mask)
+	{
+	  acc_low = acc_high = 0;
+
+	  for (reg = 1; reg < CC_REGNUM; reg ++)
+	    if (mask & (1 << reg))
+	      {
+		if (acc_low == 0)
+		  acc_low = reg;
+		else
+		  {
+		    acc_high = reg;
+		    break;
+		  }
+	      }
+	    
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high != 0);
+
+	  /* Note - the bottom 16 bits of the accumulator are inaccessible.
+	     We just assume that they are zero.  */
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_stack_push (gen_rtx_REG (SImode, acc_high)));
+	}
+      else
+	{
+	  acc_low = low;
+	  acc_high = low + 1;
+
+	  /* We have assumed that there are at least two registers pushed... */
+	  gcc_assert (acc_high <= high);
+
+	  emit_insn (gen_mvfacmi (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvfachi (gen_rtx_REG (SImode, acc_high)));
+	  emit_insn (gen_stack_pushm (GEN_INT (2 * UNITS_PER_WORD),
+				      gen_rx_store_vector (acc_low, acc_high)));
+	}
+    }
+
+  /* If needed, set up the frame pointer.  */
+  if (frame_pointer_needed)
+    gen_safe_add (frame_pointer_rtx, stack_pointer_rtx,
+		  GEN_INT (- (HOST_WIDE_INT) frame_size), true);
+
+  /* Allocate space for the outgoing args.
+     If the stack frame has not already been set up then handle this as well.  */
+  if (stack_size)
+    {
+      if (frame_size)
+	{
+	  if (frame_pointer_needed)
+	    gen_safe_add (stack_pointer_rtx, frame_pointer_rtx,
+			  GEN_INT (- (HOST_WIDE_INT) stack_size), true);
+	  else
+	    gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+			  GEN_INT (- (HOST_WIDE_INT) (frame_size + stack_size)),
+			  true);
+	}
+      else
+	gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		      GEN_INT (- (HOST_WIDE_INT) stack_size), true);
+    }
+  else if (frame_size)
+    {
+      if (! frame_pointer_needed)
+	gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		      GEN_INT (- (HOST_WIDE_INT) frame_size), true);
+      else
+	gen_safe_add (stack_pointer_rtx, frame_pointer_rtx, NULL_RTX,
+		      true);
+    }
+}
+
+static void
+rx_output_function_prologue (FILE * file,
+			     HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
+{
+  if (is_fast_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Fast Interrupt Handler\n");
+
+  if (is_interrupt_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Interrupt Handler\n");
+
+  if (is_naked_func (NULL_TREE))
+    asm_fprintf (file, "\t; Note: Naked Function\n");
+
+  if (cfun->static_chain_decl != NULL)
+    asm_fprintf (file, "\t; Note: Nested function declared "
+		 "inside another function.\n");
+
+  if (crtl->calls_eh_return)
+    asm_fprintf (file, "\t; Note: Calls __builtin_eh_return.\n");
+}
+
+/* Generate a POPM or RTSD instruction that matches the given operands.  */
+
+void
+rx_emit_stack_popm (rtx * operands, bool is_popm)
+{
+  HOST_WIDE_INT stack_adjust;
+  HOST_WIDE_INT last_reg;
+  rtx first_push;
+
+  gcc_assert (CONST_INT_P (operands[0]));
+  stack_adjust = INTVAL (operands[0]);
+  
+  gcc_assert (GET_CODE (operands[1]) == PARALLEL);
+  last_reg = XVECLEN (operands[1], 0) - (is_popm ? 2 : 3);
+
+  first_push = XVECEXP (operands[1], 0, 1);
+  gcc_assert (SET_P (first_push));
+  first_push = SET_DEST (first_push);
+  gcc_assert (REG_P (first_push));
+
+  if (is_popm)
+    asm_fprintf (asm_out_file, "\tpopm\t%s-%s\n",
+		 reg_names [REGNO (first_push)],
+		 reg_names [REGNO (first_push) + last_reg]);
+  else
+    asm_fprintf (asm_out_file, "\trtsd\t#%d, %s-%s\n",
+		 (int) stack_adjust,
+		 reg_names [REGNO (first_push)],
+		 reg_names [REGNO (first_push) + last_reg]);
+}
+
+/* Generate a PARALLEL which will satisfy the rx_rtsd_vector predicate.  */
+
+static rtx
+gen_rx_rtsd_vector (unsigned int adjust, unsigned int low, unsigned int high)
+{
+  unsigned int i;
+  unsigned int bias = 3;
+  unsigned int count = (high - low) + bias;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		 plus_constant (stack_pointer_rtx, adjust));
+
+  for (i = 0; i < count - 2; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (VOIDmode,
+		   gen_rtx_REG (SImode, low + i),
+		   gen_rtx_MEM (SImode,
+				i == 0 ? stack_pointer_rtx
+				: plus_constant (stack_pointer_rtx,
+						 i * UNITS_PER_WORD)));
+
+  XVECEXP (vector, 0, count - 1) = gen_rtx_RETURN (VOIDmode);
+
+  return vector;
+}
+  
+/* Generate a PARALLEL which will satisfy the rx_load_multiple_vector predicate.  */
+
+static rtx
+gen_rx_popm_vector (unsigned int low, unsigned int high)
+{
+  unsigned int i;  
+  unsigned int count = (high - low) + 2;
+  rtx vector;
+
+  vector = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  XVECEXP (vector, 0, 0) =
+    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+		 plus_constant (stack_pointer_rtx,
+				(count - 1) * UNITS_PER_WORD));
+
+  for (i = 0; i < count - 1; i++)
+    XVECEXP (vector, 0, i + 1) =
+      gen_rtx_SET (VOIDmode,
+		   gen_rtx_REG (SImode, low + i),
+		   gen_rtx_MEM (SImode,
+				i == 0 ? stack_pointer_rtx
+				: plus_constant (stack_pointer_rtx,
+						 i * UNITS_PER_WORD)));
+
+  return vector;
+}
+  
+void
+rx_expand_epilogue (bool is_sibcall)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int register_mask;
+  unsigned int regs_size;
+  unsigned int reg;
+  unsigned HOST_WIDE_INT total_size;
+
+  /* FIXME: We do not support indirect sibcalls at the moment becaause we
+     cannot guarantee that the register holding the function address is a
+     call-used register.  If it is a call-saved register then the stack
+     pop instructions generated in the epilogue will corrupt the address
+     before it is used.
+
+     Creating a new call-used-only register class works but then the
+     reload pass gets stuck because it cannot always find a call-used
+     register for spilling sibcalls.
+
+     The other possible solution is for this pass to scan forward for the
+     sibcall instruction (if it has been generated) and work out if it
+     is an indirect sibcall using a call-saved register.  If it is then
+     the address can copied into a call-used register in this epilogue
+     code and the sibcall instruction modified to use that register.  */
+
+  if (is_naked_func (NULL_TREE))
+    {
+      gcc_assert (! is_sibcall);
+
+      /* Naked functions use their own, programmer provided epilogues.
+	 But, in order to keep gcc happy we have to generate some kind of
+	 epilogue RTL.  */
+      emit_jump_insn (gen_naked_return ());
+      return;
+    }
+
+  rx_get_stack_layout (& low, & high, & register_mask,
+		       & frame_size, & stack_size);
+
+  total_size = frame_size + stack_size;
+  regs_size = ((high - low) + 1) * UNITS_PER_WORD;
+
+  /* See if we are unable to use the special stack frame deconstruct and
+     return instructions.  In most cases we can use them, but the exceptions
+     are:
+
+     - Sibling calling functions deconstruct the frame but do not return to
+       their caller.  Instead they branch to their sibling and allow their
+       return instruction to return to this function's parent.
+
+     - Fast and normal interrupt handling functions have to use special
+       return instructions.
+
+     - Functions where we have pushed a fragmented set of registers into the
+       call-save area must have the same set of registers popped.  */
+  if (is_sibcall
+      || is_fast_interrupt_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE)
+      || register_mask)
+    {
+      /* Cannot use the special instructions - deconstruct by hand.  */
+      if (total_size)
+	gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		      GEN_INT (total_size), false);
+
+      if (MUST_SAVE_ACC_REGISTER)
+	{
+	  unsigned int acc_low, acc_high;
+
+	  /* Reverse the saving of the accumulator register onto the stack.
+	     Note we must adjust the saved "low" accumulator value as it
+	     is really the middle 32-bits of the accumulator.  */
+	  if (register_mask)
+	    {
+	      acc_low = acc_high = 0;
+
+	      for (reg = 1; reg < CC_REGNUM; reg ++)
+		if (register_mask & (1 << reg))
+		  {
+		    if (acc_low == 0)
+		      acc_low = reg;
+		    else
+		      {
+			acc_high = reg;
+			break;
+		      }
+		  }
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_high)));
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, acc_low)));
+	    }
+	  else
+	    {
+	      acc_low = low;
+	      acc_high = low + 1;
+	      emit_insn (gen_stack_popm (GEN_INT (2 * UNITS_PER_WORD),
+					 gen_rx_popm_vector (acc_low, acc_high)));
+	    }
+
+	  emit_insn (gen_ashlsi3 (gen_rtx_REG (SImode, acc_low),
+				  gen_rtx_REG (SImode, acc_low),
+				  GEN_INT (16)));
+	  emit_insn (gen_mvtaclo (gen_rtx_REG (SImode, acc_low)));
+	  emit_insn (gen_mvtachi (gen_rtx_REG (SImode, acc_high)));
+	}
+
+      if (register_mask)
+	{
+	  for (reg = 0; reg < CC_REGNUM; reg ++)
+	    if (register_mask & (1 << reg))
+	      emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
+	}
+      else if (low)
+	{
+	  if (high == low)
+	    emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
+	  else
+	    emit_insn (gen_stack_popm (GEN_INT (regs_size),
+				       gen_rx_popm_vector (low, high)));
+	}
+
+      if (is_fast_interrupt_func (NULL_TREE))
+	{
+	  gcc_assert (! is_sibcall);
+	  emit_jump_insn (gen_fast_interrupt_return ());
+	}
+      else if (is_interrupt_func (NULL_TREE))
+	{
+	  gcc_assert (! is_sibcall);
+	  emit_jump_insn (gen_exception_return ());
+	}
+      else if (! is_sibcall)
+	emit_jump_insn (gen_simple_return ());
+
+      return;
+    }
+
+  /* If we allocated space on the stack, free it now.  */
+  if (total_size)
+    {
+      unsigned HOST_WIDE_INT rtsd_size;
+
+      /* See if we can use the RTSD instruction.  */
+      rtsd_size = total_size + regs_size;
+      if (rtsd_size < 1024 && (rtsd_size % 4) == 0)
+	{
+	  if (low)
+	    emit_jump_insn (gen_pop_and_return
+			    (GEN_INT (rtsd_size),
+			     gen_rx_rtsd_vector (rtsd_size, low, high)));
+	  else
+	    emit_jump_insn (gen_deallocate_and_return (GEN_INT (total_size)));
+
+	  return;
+	}
+
+      gen_safe_add (stack_pointer_rtx, stack_pointer_rtx,
+		    GEN_INT (total_size), false);
+    }
+
+  if (low)
+    emit_jump_insn (gen_pop_and_return (GEN_INT (regs_size),
+					gen_rx_rtsd_vector (regs_size,
+							    low, high)));
+  else
+    emit_jump_insn (gen_simple_return ());
+}
+
+
+/* Compute the offset (in words) between FROM (arg pointer
+   or frame pointer) and TO (frame pointer or stack pointer).
+   See ASCII art comment at the start of rx_expand_prologue
+   for more information.  */
+
+int
+rx_initial_elimination_offset (int from, int to)
+{
+  unsigned int low;
+  unsigned int high;
+  unsigned int frame_size;
+  unsigned int stack_size;
+  unsigned int mask;
+
+  rx_get_stack_layout (& low, & high, & mask, & frame_size, & stack_size);
+
+  if (from == ARG_POINTER_REGNUM)
+    {
+      /* Extend the computed size of the stack frame to
+	 include the registers pushed in the prologue.  */
+      if (low)
+	frame_size += ((high - low) + 1) * UNITS_PER_WORD;
+      else
+	frame_size += bit_count (mask) * UNITS_PER_WORD;
+
+      /* Remember to include the return address.  */
+      frame_size += 1 * UNITS_PER_WORD;
+
+      if (to == FRAME_POINTER_REGNUM)
+	return frame_size;
+
+      gcc_assert (to == STACK_POINTER_REGNUM);
+      return frame_size + stack_size;
+    }
+
+  gcc_assert (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM);
+  return stack_size;
+}
+
+/* Decide if a variable should go into one of the small data sections.  */
+
+static bool
+rx_in_small_data (const_tree decl)
+{
+  int size;
+  const_tree section;
+
+  if (rx_small_data_limit == 0)
+    return false;
+
+  if (TREE_CODE (decl) != VAR_DECL)
+    return false;
+
+  /* We do not put read-only variables into a small data area because
+     they would be placed with the other read-only sections, far away
+     from the read-write data sections, and we only have one small
+     data area pointer.
+     Similarly commons are placed in the .bss section which might be
+     far away (and out of alignment with respect to) the .data section.  */
+  if (TREE_READONLY (decl) || DECL_COMMON (decl))
+    return false;
+
+  section = DECL_SECTION_NAME (decl);
+  if (section)
+    {
+      const char * const name = TREE_STRING_POINTER (section);
+
+      return (strcmp (name, "D_2") == 0) || (strcmp (name, "B_2") == 0);
+    }
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+  return (size > 0) && (size <= rx_small_data_limit);
+}
+
+/* Return a section for X.
+   The only special thing we do here is to honor small data.  */
+
+static section *
+rx_select_rtx_section (enum machine_mode mode,
+		       rtx x,
+		       unsigned HOST_WIDE_INT align)
+{
+  if (rx_small_data_limit > 0
+      && GET_MODE_SIZE (mode) <= rx_small_data_limit
+      && align <= (unsigned HOST_WIDE_INT) rx_small_data_limit * BITS_PER_UNIT)
+    return sdata_section;
+
+  return default_elf_select_rtx_section (mode, x, align);
+}
+
+static section *
+rx_select_section (tree decl,
+		   int reloc,
+		   unsigned HOST_WIDE_INT align)
+{
+  if (rx_small_data_limit > 0)
+    {
+      switch (categorize_decl_for_section (decl, reloc))
+	{
+	case SECCAT_SDATA:	return sdata_section;
+	case SECCAT_SBSS:	return sbss_section;
+	case SECCAT_SRODATA:
+	  /* Fall through.  We do not put small, read only
+	     data into the C_2 section because we are not
+	     using the C_2 section.  We do not use the C_2
+	     section because it is located with the other
+	     read-only data sections, far away from the read-write
+	     data sections and we only have one small data
+	     pointer (r13).  */
+	default:
+	  break;
+	}
+    }
+
+  /* If we are supporting the Renesas assembler
+     we cannot use mergeable sections.  */
+  if (TARGET_AS100_SYNTAX)
+    switch (categorize_decl_for_section (decl, reloc))
+      {
+      case SECCAT_RODATA_MERGE_CONST:
+      case SECCAT_RODATA_MERGE_STR_INIT:
+      case SECCAT_RODATA_MERGE_STR:
+	return readonly_data_section;
+
+      default:
+	break;
+      }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+enum rx_builtin
+{
+  RX_BUILTIN_BRK,
+  RX_BUILTIN_CLRPSW,
+  RX_BUILTIN_INT,
+  RX_BUILTIN_MACHI,
+  RX_BUILTIN_MACLO,
+  RX_BUILTIN_MULHI,
+  RX_BUILTIN_MULLO,
+  RX_BUILTIN_MVFACHI,
+  RX_BUILTIN_MVFACMI,
+  RX_BUILTIN_MVFC,
+  RX_BUILTIN_MVTACHI,
+  RX_BUILTIN_MVTACLO,
+  RX_BUILTIN_MVTC,
+  RX_BUILTIN_MVTIPL,
+  RX_BUILTIN_RACW,
+  RX_BUILTIN_REVW,
+  RX_BUILTIN_RMPA,
+  RX_BUILTIN_ROUND,
+  RX_BUILTIN_SETPSW,
+  RX_BUILTIN_WAIT,
+  RX_BUILTIN_max
+};
+
+static void
+rx_init_builtins (void)
+{
+#define ADD_RX_BUILTIN1(UC_NAME, LC_NAME, RET_TYPE, ARG_TYPE)		\
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE##_type_node, \
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN2(UC_NAME, LC_NAME, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+#define ADD_RX_BUILTIN3(UC_NAME,LC_NAME,RET_TYPE,ARG_TYPE1,ARG_TYPE2,ARG_TYPE3) \
+  add_builtin_function ("__builtin_rx_" LC_NAME,			\
+			build_function_type_list (RET_TYPE##_type_node, \
+						  ARG_TYPE1##_type_node,\
+						  ARG_TYPE2##_type_node,\
+						  ARG_TYPE3##_type_node,\
+						  NULL_TREE),		\
+			RX_BUILTIN_##UC_NAME,				\
+			BUILT_IN_MD, NULL, NULL_TREE)
+
+  ADD_RX_BUILTIN1 (BRK,     "brk",     void,  void);
+  ADD_RX_BUILTIN1 (CLRPSW,  "clrpsw",  void,  integer);
+  ADD_RX_BUILTIN1 (SETPSW,  "setpsw",  void,  integer);
+  ADD_RX_BUILTIN1 (INT,     "int",     void,  integer);
+  ADD_RX_BUILTIN2 (MACHI,   "machi",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MACLO,   "maclo",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MULHI,   "mulhi",   void,  intSI, intSI);
+  ADD_RX_BUILTIN2 (MULLO,   "mullo",   void,  intSI, intSI);
+  ADD_RX_BUILTIN1 (MVFACHI, "mvfachi", intSI, void);
+  ADD_RX_BUILTIN1 (MVFACMI, "mvfacmi", intSI, void);
+  ADD_RX_BUILTIN1 (MVTACHI, "mvtachi", void,  intSI);
+  ADD_RX_BUILTIN1 (MVTACLO, "mvtaclo", void,  intSI);
+  ADD_RX_BUILTIN1 (RMPA,    "rmpa",    void,  void);
+  ADD_RX_BUILTIN1 (MVFC,    "mvfc",    intSI, integer);
+  ADD_RX_BUILTIN2 (MVTC,    "mvtc",    void,  integer, integer);
+  ADD_RX_BUILTIN1 (MVTIPL,  "mvtipl",  void,  integer);
+  ADD_RX_BUILTIN1 (RACW,    "racw",    void,  integer);
+  ADD_RX_BUILTIN1 (ROUND,   "round",   intSI, float);
+  ADD_RX_BUILTIN1 (REVW,    "revw",    intSI, intSI);
+  ADD_RX_BUILTIN1 (WAIT,    "wait",    void,  void);
+}
+
+static rtx
+rx_expand_void_builtin_1_arg (rtx arg, rtx (* gen_func)(rtx), bool reg)
+{
+  if (reg && ! REG_P (arg))
+    arg = force_reg (SImode, arg);
+
+  emit_insn (gen_func (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mvtc (tree exp)
+{
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+  if (! CONST_INT_P (arg1))
+    return NULL_RTX;
+
+  if (! REG_P (arg2))
+    arg2 = force_reg (SImode, arg2);
+
+  emit_insn (gen_mvtc (arg1, arg2));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mvfc (tree t_arg, rtx target)
+{
+  rtx arg = expand_normal (t_arg);
+
+  if (! CONST_INT_P (arg))
+    return NULL_RTX;
+
+  if (target == NULL_RTX)
+    return NULL_RTX;
+
+  if (! REG_P (target))
+    target = force_reg (SImode, target);
+
+  emit_insn (gen_mvfc (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin_mvtipl (rtx arg)
+{
+  /* The RX610 does not support the MVTIPL instruction.  */
+  if (rx_cpu_type == RX610)
+    return NULL_RTX;
+
+  if (! CONST_INT_P (arg) || ! IN_RANGE (INTVAL (arg), 0, (1 << 4) - 1))
+    return NULL_RTX;
+
+  emit_insn (gen_mvtipl (arg));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_builtin_mac (tree exp, rtx (* gen_func)(rtx, rtx))
+{
+  rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
+  rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
+
+  if (! REG_P (arg1))
+    arg1 = force_reg (SImode, arg1);
+
+  if (! REG_P (arg2))
+    arg2 = force_reg (SImode, arg2);
+
+  emit_insn (gen_func (arg1, arg2));
+
+  return NULL_RTX;
+}
+
+static rtx
+rx_expand_int_builtin_1_arg (rtx arg,
+			     rtx target,
+			     rtx (* gen_func)(rtx, rtx),
+			     bool mem_ok)
+{
+  if (! REG_P (arg))
+    if (!mem_ok || ! MEM_P (arg))
+      arg = force_reg (SImode, arg);
+
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_func (target, arg));
+
+  return target;
+}
+
+static rtx
+rx_expand_int_builtin_0_arg (rtx target, rtx (* gen_func)(rtx))
+{
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_func (target));
+
+  return target;
+}
+
+static rtx
+rx_expand_builtin_round (rtx arg, rtx target)
+{
+  if ((! REG_P (arg) && ! MEM_P (arg))
+      || GET_MODE (arg) != SFmode)
+    arg = force_reg (SFmode, arg);
+
+  if (target == NULL_RTX || ! REG_P (target))
+    target = gen_reg_rtx (SImode);
+
+  emit_insn (gen_lrintsf2 (target, arg));
+
+  return target;
+}
+
+static int
+valid_psw_flag (rtx op, const char *which)
+{
+  static int mvtc_inform_done = 0;
+
+  if (GET_CODE (op) == CONST_INT)
+    switch (INTVAL (op))
+      {
+      case 0: case 'c': case 'C':
+      case 1: case 'z': case 'Z':
+      case 2: case 's': case 'S':
+      case 3: case 'o': case 'O':
+      case 8: case 'i': case 'I':
+      case 9: case 'u': case 'U':
+	return 1;
+      }
+
+  error ("__builtin_rx_%s takes 'C', 'Z', 'S', 'O', 'I', or 'U'", which);
+  if (!mvtc_inform_done)
+    error ("use __builtin_rx_mvtc (0, ... ) to write arbitrary values to PSW");
+  mvtc_inform_done = 1;
+
+  return 0;
+}
+
+static rtx
+rx_expand_builtin (tree exp,
+		   rtx target,
+		   rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED,
+		   int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree arg    = call_expr_nargs (exp) >= 1 ? CALL_EXPR_ARG (exp, 0) : NULL_TREE;
+  rtx  op     = arg ? expand_normal (arg) : NULL_RTX;
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  switch (fcode)
+    {
+    case RX_BUILTIN_BRK:     emit_insn (gen_brk ()); return NULL_RTX;
+    case RX_BUILTIN_CLRPSW:
+      if (!valid_psw_flag (op, "clrpsw"))
+	return NULL_RTX;
+      return rx_expand_void_builtin_1_arg (op, gen_clrpsw, false);
+    case RX_BUILTIN_SETPSW:
+      if (!valid_psw_flag (op, "setpsw"))
+	return NULL_RTX;
+      return rx_expand_void_builtin_1_arg (op, gen_setpsw, false);
+    case RX_BUILTIN_INT:     return rx_expand_void_builtin_1_arg
+	(op, gen_int, false);
+    case RX_BUILTIN_MACHI:   return rx_expand_builtin_mac (exp, gen_machi);
+    case RX_BUILTIN_MACLO:   return rx_expand_builtin_mac (exp, gen_maclo);
+    case RX_BUILTIN_MULHI:   return rx_expand_builtin_mac (exp, gen_mulhi);
+    case RX_BUILTIN_MULLO:   return rx_expand_builtin_mac (exp, gen_mullo);
+    case RX_BUILTIN_MVFACHI: return rx_expand_int_builtin_0_arg
+	(target, gen_mvfachi);
+    case RX_BUILTIN_MVFACMI: return rx_expand_int_builtin_0_arg
+	(target, gen_mvfacmi);
+    case RX_BUILTIN_MVTACHI: return rx_expand_void_builtin_1_arg
+	(op, gen_mvtachi, true);
+    case RX_BUILTIN_MVTACLO: return rx_expand_void_builtin_1_arg
+	(op, gen_mvtaclo, true);
+    case RX_BUILTIN_RMPA:    emit_insn (gen_rmpa ()); return NULL_RTX;
+    case RX_BUILTIN_MVFC:    return rx_expand_builtin_mvfc (arg, target);
+    case RX_BUILTIN_MVTC:    return rx_expand_builtin_mvtc (exp);
+    case RX_BUILTIN_MVTIPL:  return rx_expand_builtin_mvtipl (op);
+    case RX_BUILTIN_RACW:    return rx_expand_void_builtin_1_arg
+	(op, gen_racw, false);
+    case RX_BUILTIN_ROUND:   return rx_expand_builtin_round (op, target);
+    case RX_BUILTIN_REVW:    return rx_expand_int_builtin_1_arg
+	(op, target, gen_revw, false);
+    case RX_BUILTIN_WAIT:    emit_insn (gen_wait ()); return NULL_RTX;
+
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+/* Place an element into a constructor or destructor section.
+   Like default_ctor_section_asm_out_constructor in varasm.c
+   except that it uses .init_array (or .fini_array) and it
+   handles constructor priorities.  */
+
+static void
+rx_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
+{
+  section * s;
+
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      char buf[18];
+
+      sprintf (buf, "%s.%.5u",
+	       is_ctor ? ".init_array" : ".fini_array",
+	       priority);
+      s = get_section (buf, SECTION_WRITE, NULL_TREE);
+    }
+  else if (is_ctor)
+    s = ctors_section;
+  else
+    s = dtors_section;
+
+  switch_to_section (s);
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void
+rx_elf_asm_constructor (rtx symbol, int priority)
+{
+  rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */true);
+}
+
+static void
+rx_elf_asm_destructor (rtx symbol, int priority)
+{
+  rx_elf_asm_cdtor (symbol, priority, /* is_ctor= */false);
+}
+
+/* Check "fast_interrupt", "interrupt" and "naked" attributes.  */
+
+static tree
+rx_handle_func_attribute (tree * node,
+			  tree   name,
+			  tree   args,
+			  int    flags ATTRIBUTE_UNUSED,
+			  bool * no_add_attrs)
+{
+  gcc_assert (DECL_P (* node));
+  gcc_assert (args == NULL_TREE);
+
+  if (TREE_CODE (* node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      * no_add_attrs = true;
+    }
+
+  /* FIXME: We ought to check for conflicting attributes.  */
+
+  /* FIXME: We ought to check that the interrupt and exception
+     handler attributes have been applied to void functions.  */
+  return NULL_TREE;
+}
+
+/* Table of RX specific attributes.  */
+const struct attribute_spec rx_attribute_table[] =
+{
+  /* Name, min_len, max_len, decl_req, type_req, fn_type_req, handler.  */
+  { "fast_interrupt", 0, 0, true, false, false, rx_handle_func_attribute },
+  { "interrupt",      0, 0, true, false, false, rx_handle_func_attribute },
+  { "naked",          0, 0, true, false, false, rx_handle_func_attribute },
+  { NULL,             0, 0, false, false, false, NULL }
+};
+
+/* Extra processing for target specific command line options.  */
+
+static bool
+rx_handle_option (size_t code, const char *  arg ATTRIBUTE_UNUSED, int value)
+{
+  switch (code)
+    {
+    case OPT_mint_register_:
+      switch (value)
+	{
+	case 4:
+	  fixed_regs[10] = call_used_regs [10] = 1;
+	  /* Fall through.  */
+	case 3:
+	  fixed_regs[11] = call_used_regs [11] = 1;
+	  /* Fall through.  */
+	case 2:
+	  fixed_regs[12] = call_used_regs [12] = 1;
+	  /* Fall through.  */
+	case 1:
+	  fixed_regs[13] = call_used_regs [13] = 1;
+	  /* Fall through.  */
+	case 0:
+	  return true;
+	default:
+	  return false;
+	}
+      break;
+
+    case OPT_mmax_constant_size_:
+      /* Make sure that the -mmax-constant_size option is in range.  */
+      return value >= 0 && value <= 4;
+
+    case OPT_mcpu_:
+      if (strcasecmp (arg, "RX610") == 0)
+	rx_cpu_type = RX610;
+      else if (strcasecmp (arg, "RX200") == 0)
+	{
+	  target_flags |= MASK_NO_USE_FPU;
+	  rx_cpu_type = RX200;
+	}
+      else if (strcasecmp (arg, "RX600") != 0)
+	warning (0, "unrecognized argument '%s' to -mcpu= option", arg);
+      break;
+      
+    case OPT_fpu:
+      if (rx_cpu_type == RX200)
+	error ("the RX200 cpu does not have FPU hardware");
+      break;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE.  */
+
+static void
+rx_override_options_after_change (void)
+{
+  static bool first_time = TRUE;
+
+  if (first_time)
+    {
+      /* If this is the first time through and the user has not disabled
+	 the use of RX FPU hardware then enable -ffinite-math-only,
+	 since the FPU instructions do not support NaNs and infinities.  */
+      if (TARGET_USE_FPU)
+	flag_finite_math_only = 1;
+
+      first_time = FALSE;
+    }
+  else
+    {
+      /* Alert the user if they are changing the optimization options
+	 to use IEEE compliant floating point arithmetic with RX FPU insns.  */
+      if (TARGET_USE_FPU
+	  && !flag_finite_math_only)
+	warning (0, "RX FPU instructions do not support NaNs and infinities");
+    }
+}
+
+static void
+rx_option_override (void)
+{
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  rx_override_options_after_change ();
+
+  if (align_jumps == 0 && ! optimize_size)
+    align_jumps = 3;
+  if (align_loops == 0 && ! optimize_size)
+    align_loops = 3;
+  if (align_labels == 0 && ! optimize_size)
+    align_labels = 3;
+}
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options rx_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+
+static bool
+rx_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return ! is_naked_func (NULL_TREE);
+}
+
+static bool
+rx_func_attr_inlinable (const_tree decl)
+{
+  return ! is_fast_interrupt_func (decl)
+    &&   ! is_interrupt_func (decl)
+    &&   ! is_naked_func (decl);  
+}
+
+/* Return nonzero if it is ok to make a tail-call to DECL,
+   a function_decl or NULL if this is an indirect call, using EXP  */
+
+static bool
+rx_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Do not allow indirect tailcalls.  The
+     sibcall patterns do not support them.  */
+  if (decl == NULL)
+    return false;
+
+  /* Never tailcall from inside interrupt handlers or naked functions.  */
+  if (is_fast_interrupt_func (NULL_TREE)
+      || is_interrupt_func (NULL_TREE)
+      || is_naked_func (NULL_TREE))
+    return false;
+
+  return true;
+}
+
+static void
+rx_file_start (void)
+{
+  if (! TARGET_AS100_SYNTAX)
+    default_file_start ();
+}
+
+static bool
+rx_is_ms_bitfield_layout (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  /* The packed attribute overrides the MS behaviour.  */
+  return ! TYPE_PACKED (record_type);
+}
+
+/* Returns true if X a legitimate constant for an immediate
+   operand on the RX.  X is already known to satisfy CONSTANT_P.  */
+
+bool
+rx_is_legitimate_constant (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (! CONST_INT_P (XEXP (x, 1)))
+	    return false;
+
+	  /* GCC would not pass us CONST_INT + CONST_INT so we
+	     know that we have {SYMBOL|LABEL} + CONST_INT.  */
+	  x = XEXP (x, 0);
+	  gcc_assert (! CONST_INT_P (x));
+	}
+
+      switch (GET_CODE (x))
+	{
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  return true;
+
+	case UNSPEC:
+	  return XINT (x, 1) == UNSPEC_CONST;
+
+	default:
+	  /* FIXME: Can this ever happen ?  */
+	  gcc_unreachable ();
+	}
+      break;
+      
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return true;
+    case CONST_DOUBLE:
+      return (rx_max_constant_size == 0 || rx_max_constant_size == 4);
+    case CONST_VECTOR:
+      return false;
+    default:
+      gcc_assert (CONST_INT_P (x));
+      break;
+    }
+
+  return ok_for_max_constant (INTVAL (x));
+}
+
+static int
+rx_address_cost (rtx addr, bool speed)
+{
+  rtx a, b;
+
+  if (GET_CODE (addr) != PLUS)
+    return COSTS_N_INSNS (1);
+
+  a = XEXP (addr, 0);
+  b = XEXP (addr, 1);
+
+  if (REG_P (a) && REG_P (b))
+    /* Try to discourage REG+REG addressing as it keeps two registers live.  */
+    return COSTS_N_INSNS (4);
+
+  if (speed)
+    /* [REG+OFF] is just as fast as [REG].  */
+    return COSTS_N_INSNS (1);
+
+  if (CONST_INT_P (b)
+      && ((INTVAL (b) > 128) || INTVAL (b) < -127))
+    /* Try to discourage REG + <large OFF> when optimizing for size.  */
+    return COSTS_N_INSNS (2);
+    
+  return COSTS_N_INSNS (1);
+}
+
+static bool
+rx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  /* We can always eliminate to the frame pointer.
+     We can eliminate to the stack pointer unless a frame
+     pointer is needed.  */
+
+  return to == FRAME_POINTER_REGNUM
+    || ( to == STACK_POINTER_REGNUM && ! frame_pointer_needed);
+}
+
+
+static void
+rx_trampoline_template (FILE * file)
+{
+  /* Output assembler code for a block containing the constant
+     part of a trampoline, leaving space for the variable parts.
+
+     On the RX, (where r8 is the static chain regnum) the trampoline
+     looks like:
+
+	   mov 		#<static chain value>, r8
+	   mov          #<function's address>, r9
+	   jmp		r9
+
+     In big-endian-data-mode however instructions are read into the CPU
+     4 bytes at a time.  These bytes are then swapped around before being
+     passed to the decoder.  So...we must partition our trampoline into
+     4 byte packets and swap these packets around so that the instruction
+     reader will reverse the process.  But, in order to avoid splitting
+     the 32-bit constants across these packet boundaries, (making inserting
+     them into the constructed trampoline very difficult) we have to pad the
+     instruction sequence with NOP insns.  ie:
+
+           nop
+	   nop
+           mov.l	#<...>, r8
+	   nop
+	   nop
+           mov.l	#<...>, r9
+           jmp		r9
+	   nop
+	   nop             */
+
+  if (! TARGET_BIG_ENDIAN_DATA)
+    {
+      asm_fprintf (file, "\tmov.L\t#0deadbeefH, r%d\n", STATIC_CHAIN_REGNUM);
+      asm_fprintf (file, "\tmov.L\t#0deadbeefH, r%d\n", TRAMPOLINE_TEMP_REGNUM);
+      asm_fprintf (file, "\tjmp\tr%d\n",                TRAMPOLINE_TEMP_REGNUM);
+    }
+  else
+    {
+      char r8 = '0' + STATIC_CHAIN_REGNUM;
+      char r9 = '0' + TRAMPOLINE_TEMP_REGNUM;
+
+      if (TARGET_AS100_SYNTAX)
+        {
+          asm_fprintf (file, "\t.BYTE 0%c2H, 0fbH, 003H,  003H\n", r8);
+          asm_fprintf (file, "\t.BYTE 0deH,  0adH, 0beH,  0efH\n");
+          asm_fprintf (file, "\t.BYTE 0%c2H, 0fbH, 003H,  003H\n", r9);
+          asm_fprintf (file, "\t.BYTE 0deH,  0adH, 0beH,  0efH\n");
+          asm_fprintf (file, "\t.BYTE 003H,  003H, 00%cH, 07fH\n", r9);
+        }
+      else
+        {
+          asm_fprintf (file, "\t.byte 0x%c2, 0xfb, 0x03,  0x03\n", r8);
+          asm_fprintf (file, "\t.byte 0xde,  0xad, 0xbe,  0xef\n");
+          asm_fprintf (file, "\t.byte 0x%c2, 0xfb, 0x03,  0x03\n", r9);
+          asm_fprintf (file, "\t.byte 0xde,  0xad, 0xbe,  0xef\n");
+          asm_fprintf (file, "\t.byte 0x03,  0x03, 0x0%c, 0x7f\n", r9);
+        }
+    }
+}
+
+static void
+rx_trampoline_init (rtx tramp, tree fndecl, rtx chain)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  if (TARGET_BIG_ENDIAN_DATA)
+    {
+      emit_move_insn (adjust_address (tramp, SImode, 4), chain);
+      emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr);
+    }
+  else
+    {
+      emit_move_insn (adjust_address (tramp, SImode, 2), chain);
+      emit_move_insn (adjust_address (tramp, SImode, 6 + 2), fnaddr);
+    }
+}
+
+static int
+rx_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		     reg_class_t regclass ATTRIBUTE_UNUSED,
+		     bool in)
+{
+  return (in ? 2 : 0) + REGISTER_MOVE_COST (mode, regclass, regclass);
+}
+
+/* Convert a CC_MODE to the set of flags that it represents.  */
+
+static unsigned int
+flags_from_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case CC_ZSmode:
+      return CC_FLAG_S | CC_FLAG_Z;
+    case CC_ZSOmode:
+      return CC_FLAG_S | CC_FLAG_Z | CC_FLAG_O;
+    case CC_ZSCmode:
+      return CC_FLAG_S | CC_FLAG_Z | CC_FLAG_C;
+    case CCmode:
+      return CC_FLAG_S | CC_FLAG_Z | CC_FLAG_O | CC_FLAG_C;
+    case CC_Fmode:
+      return CC_FLAG_FP;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Convert a set of flags to a CC_MODE that can implement it.  */
+
+static enum machine_mode
+mode_from_flags (unsigned int f)
+{
+  if (f & CC_FLAG_FP)
+    return CC_Fmode;
+  if (f & CC_FLAG_O)
+    {
+      if (f & CC_FLAG_C)
+	return CCmode;
+      else
+	return CC_ZSOmode;
+    }
+  else if (f & CC_FLAG_C)
+    return CC_ZSCmode;
+  else
+    return CC_ZSmode;
+}
+
+/* Convert an RTX_CODE to the set of flags needed to implement it.
+   This assumes an integer comparison.  */
+
+static unsigned int
+flags_from_code (enum rtx_code code)
+{
+  switch (code)
+    {
+    case LT:
+    case GE:
+      return CC_FLAG_S;
+    case GT:
+    case LE:
+      return CC_FLAG_S | CC_FLAG_O | CC_FLAG_Z;
+    case GEU:
+    case LTU:
+      return CC_FLAG_C;
+    case GTU:
+    case LEU:
+      return CC_FLAG_C | CC_FLAG_Z;
+    case EQ:
+    case NE:
+      return CC_FLAG_Z;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return a CC_MODE of which both M1 and M2 are subsets.  */
+
+static enum machine_mode
+rx_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+  unsigned f;
+
+  /* Early out for identical modes.  */
+  if (m1 == m2)
+    return m1;
+
+  /* There's no valid combination for FP vs non-FP.  */
+  f = flags_from_mode (m1) | flags_from_mode (m2);
+  if (f & CC_FLAG_FP)
+    return VOIDmode;
+
+  /* Otherwise, see what mode can implement all the flags.  */
+  return mode_from_flags (f);
+}
+
+/* Return the minimal CC mode needed to implement (CMP_CODE X Y).  */
+
+enum machine_mode
+rx_select_cc_mode (enum rtx_code cmp_code, rtx x, rtx y)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    return CC_Fmode;
+
+  if (y != const0_rtx)
+    return CCmode;
+
+  return mode_from_flags (flags_from_code (cmp_code));
+}
+
+/* Split the conditional branch.  Emit (COMPARE C1 C2) into CC_REG with
+   CC_MODE, and use that in branches based on that compare.  */
+
+void
+rx_split_cbranch (enum machine_mode cc_mode, enum rtx_code cmp1,
+		  rtx c1, rtx c2, rtx label)
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (cc_mode, CC_REG);
+  x = gen_rtx_COMPARE (cc_mode, c1, c2);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (cmp1, VOIDmode, flags, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx);
+  x = gen_rtx_SET (VOIDmode, pc_rtx, x);
+  emit_jump_insn (x);
+}
+
+/* A helper function for matching parallels that set the flags.  */
+
+bool
+rx_match_ccmode (rtx insn, enum machine_mode cc_mode)
+{
+  rtx op1, flags;
+  enum machine_mode flags_mode;
+
+  gcc_checking_assert (XVECLEN (PATTERN (insn), 0) == 2);
+
+  op1 = XVECEXP (PATTERN (insn), 0, 1);
+  gcc_checking_assert (GET_CODE (SET_SRC (op1)) == COMPARE);
+
+  flags = SET_DEST (op1);
+  flags_mode = GET_MODE (flags);
+
+  if (GET_MODE (SET_SRC (op1)) != flags_mode)
+    return false;
+  if (GET_MODE_CLASS (flags_mode) != MODE_CC)
+    return false;
+
+  /* Ensure that the mode of FLAGS is compatible with CC_MODE.  */
+  if (flags_from_mode (flags_mode) & ~flags_from_mode (cc_mode))
+    return false;
+
+  return true;
+}
+
+
+int
+rx_align_for_label (rtx lab, int uses_threshold)
+{
+  /* This is a simple heuristic to guess when an alignment would not be useful
+     because the delay due to the inserted NOPs would be greater than the delay
+     due to the misaligned branch.  If uses_threshold is zero then the alignment
+     is always useful.  */
+  if (LABEL_P (lab) && LABEL_NUSES (lab) < uses_threshold)
+    return 0;
+
+  return optimize_size ? 1 : 3;
+}
+
+static int
+rx_max_skip_for_label (rtx lab)
+{
+  int opsize;
+  rtx op;
+
+  if (lab == NULL_RTX)
+    return 0;
+  op = lab;
+  do
+    {
+      op = next_nonnote_nondebug_insn (op);
+    }
+  while (op && (LABEL_P (op)
+		|| (INSN_P (op) && GET_CODE (PATTERN (op)) == USE)));
+  if (!op)
+    return 0;
+
+  opsize = get_attr_length (op);
+  if (opsize >= 0 && opsize < 8)
+    return opsize - 1;
+  return 0;
+}
+
+#undef  TARGET_ASM_JUMP_ALIGN_MAX_SKIP
+#define TARGET_ASM_JUMP_ALIGN_MAX_SKIP			rx_max_skip_for_label
+#undef  TARGET_ASM_LOOP_ALIGN_MAX_SKIP
+#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP			rx_max_skip_for_label
+#undef  TARGET_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
+#define TARGET_LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP	rx_max_skip_for_label
+#undef  TARGET_ASM_LABEL_ALIGN_MAX_SKIP
+#define TARGET_ASM_LABEL_ALIGN_MAX_SKIP			rx_max_skip_for_label
+
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE		rx_function_value
+
+#undef  TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB		rx_return_in_msb
+
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P		rx_in_small_data
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY		rx_return_in_memory
+
+#undef  TARGET_HAVE_SRODATA_SECTION
+#define TARGET_HAVE_SRODATA_SECTION	true
+
+#undef	TARGET_ASM_SELECT_RTX_SECTION
+#define	TARGET_ASM_SELECT_RTX_SECTION	rx_select_rtx_section
+
+#undef	TARGET_ASM_SELECT_SECTION
+#define	TARGET_ASM_SELECT_SECTION	rx_select_section
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS		rx_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN		rx_expand_builtin
+
+#undef  TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR		rx_elf_asm_constructor
+
+#undef  TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR		rx_elf_asm_destructor
+
+#undef  TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX		rx_struct_value_rtx
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE		rx_attribute_table
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START			rx_file_start
+
+#undef  TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P		rx_is_ms_bitfield_layout
+
+#undef  TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P		rx_is_legitimate_address
+
+#undef  TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P		rx_mode_dependent_address_p
+
+#undef  TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS	rx_allocate_stack_slots_for_args
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE 		rx_output_function_prologue
+
+#undef  TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P 	rx_func_attr_inlinable
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL		rx_function_ok_for_sibcall
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG     		rx_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     	rx_function_arg_advance
+
+#undef	TARGET_FUNCTION_ARG_BOUNDARY
+#define	TARGET_FUNCTION_ARG_BOUNDARY		rx_function_arg_boundary
+
+#undef  TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION		rx_set_current_function
+
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION			rx_handle_option
+
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER			rx_assemble_integer
+
+#undef  TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P	hook_bool_mode_const_rtx_true
+
+#undef  TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET		32
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST			rx_address_cost
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE			rx_can_eliminate
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE	rx_conditional_register_usage
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE		rx_trampoline_template
+
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT			rx_trampoline_init
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND			rx_print_operand
+
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS		rx_print_operand_address
+
+#undef  TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE		rx_cc_modes_compatible
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST			rx_memory_move_cost
+
+#undef  TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE			rx_option_override
+
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE	rx_option_optimization_table
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE		rx_promote_function_mode
+
+#undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE	rx_override_options_after_change
+
+#undef  TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO		sjlj_except_unwind_info
+
+#undef  TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM			CC_REG
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* #include "gt-rx.h" */
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
new file mode 100644
index 000000000..89f2e2c7e
--- /dev/null
+++ b/gcc/config/rx/rx.h
@@ -0,0 +1,643 @@
+/* GCC backend definitions for the Renesas RX processor.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do                                            \
+    {                                           \
+      builtin_define ("__RX__"); 		\
+      builtin_assert ("cpu=RX"); 		\
+      if (rx_cpu_type == RX610)			\
+	{					\
+          builtin_define ("__RX610__");		\
+          builtin_assert ("machine=RX610");	\
+	}					\
+     else					\
+        builtin_assert ("machine=RX600");	\
+      						\
+      if (TARGET_BIG_ENDIAN_DATA)		\
+	builtin_define ("__RX_BIG_ENDIAN__");	\
+      else					\
+	builtin_define ("__RX_LITTLE_ENDIAN__");\
+      						\
+      if (TARGET_64BIT_DOUBLES)			\
+	builtin_define ("__RX_64BIT_DOUBLES__");\
+      else					\
+	builtin_define ("__RX_32BIT_DOUBLES__");\
+      						\
+      if (ALLOW_RX_FPU_INSNS)			\
+	builtin_define ("__RX_FPU_INSNS__");	\
+						\
+      if (TARGET_AS100_SYNTAX)			\
+	builtin_define ("__RX_AS100_SYNTAX__"); \
+      else					\
+	builtin_define ("__RX_GAS_SYNTAX__");   \
+    }                                           \
+  while (0)
+
+enum rx_cpu_types
+{
+  RX600,
+  RX610,
+  RX200
+};
+
+extern enum rx_cpu_types  rx_cpu_type;
+
+#undef  CC1_SPEC
+#define CC1_SPEC "\
+  %{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}} \
+  %{mcpu=rx200:%{fpu:%erx200 cpu does not have FPU hardware}}"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s} crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+#undef  ASM_SPEC
+#define ASM_SPEC "\
+%{mbig-endian-data:-mbig-endian-data} \
+%{m64bit-doubles:-m64bit-doubles} \
+%{!m64bit-doubles:-m32bit-doubles} \
+%{msmall-data-limit*:-msmall-data-limit} \
+%{mrelax:-relax} \
+"
+
+#undef  LIB_SPEC
+#define LIB_SPEC "					\
+--start-group						\
+-lc							\
+%{msim:-lsim}%{!msim:-lnosys}				\
+%{fprofile-arcs|fprofile-generate|coverage:-lgcov} 	\
+--end-group					   	\
+%{!T*: %{msim:%Trx-sim.ld}%{!msim:%Trx.ld}}		\
+"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{mbig-endian-data:--oformat elf32-rx-be} %{mrelax:-relax}"
+
+
+#define BITS_BIG_ENDIAN 		0
+#define BYTES_BIG_ENDIAN 		TARGET_BIG_ENDIAN_DATA
+#define WORDS_BIG_ENDIAN 		TARGET_BIG_ENDIAN_DATA
+
+#define UNITS_PER_WORD 			4
+
+#define INT_TYPE_SIZE			32
+#define LONG_TYPE_SIZE			32
+#define LONG_LONG_TYPE_SIZE		64
+
+#define FLOAT_TYPE_SIZE 		32
+#define DOUBLE_TYPE_SIZE 		(TARGET_64BIT_DOUBLES ? 64 : 32)
+#define LONG_DOUBLE_TYPE_SIZE		DOUBLE_TYPE_SIZE
+
+#ifdef __RX_32BIT_DOUBLES__
+#define LIBGCC2_HAS_DF_MODE		0
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   32
+#else
+#define LIBGCC2_HAS_DF_MODE		1
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE   64
+#endif
+
+#define DEFAULT_SIGNED_CHAR		0
+
+#define STRICT_ALIGNMENT 		1
+#define FUNCTION_BOUNDARY 		8
+#define BIGGEST_ALIGNMENT 		32
+#define STACK_BOUNDARY 			32
+#define PARM_BOUNDARY 			8
+
+#define STACK_GROWS_DOWNWARD		1
+#define FRAME_GROWS_DOWNWARD		0
+#define FIRST_PARM_OFFSET(FNDECL) 	0
+
+#define MAX_REGS_PER_ADDRESS 		2
+
+#define Pmode 				SImode
+#define POINTER_SIZE			32
+#undef  SIZE_TYPE
+#define SIZE_TYPE			"long unsigned int"
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE			"long int"
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE			"long int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE			BITS_PER_WORD
+#define POINTERS_EXTEND_UNSIGNED	1
+#define FUNCTION_MODE 			QImode
+#define CASE_VECTOR_MODE		Pmode
+#define WORD_REGISTER_OPERATIONS	1
+#define HAS_LONG_COND_BRANCH		0
+#define HAS_LONG_UNCOND_BRANCH		0
+
+#define MOVE_MAX 			4
+#define STARTING_FRAME_OFFSET		0
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)   1
+
+#define LEGITIMATE_CONSTANT_P(X) 	rx_is_legitimate_constant (X)
+
+#define HAVE_PRE_DECREMENT		1
+#define HAVE_POST_INCREMENT		1
+
+#define MOVE_RATIO(SPEED) 		((SPEED) ? 4 : 2)
+#define SLOW_BYTE_ACCESS		1
+
+#define STORE_FLAG_VALUE		1
+#define LOAD_EXTEND_OP(MODE)		SIGN_EXTEND
+#define SHORT_IMMEDIATES_SIGN_EXTEND	1
+
+enum reg_class
+{
+  NO_REGS,			/* No registers in set.  */
+  GR_REGS,			/* Integer registers.  */
+  ALL_REGS,			/* All registers.  */
+  LIM_REG_CLASSES		/* Max value + 1.  */
+};
+
+#define REG_CLASS_NAMES					\
+{							\
+  "NO_REGS",						\
+  "GR_REGS",						\
+  "ALL_REGS"						\
+}
+
+#define REG_CLASS_CONTENTS				\
+{							\
+  { 0x00000000 },	/* No registers,  */		\
+  { 0x0000ffff },	/* Integer registers.  */	\
+  { 0x0000ffff }	/* All registers.  */		\
+}
+
+#define IRA_COVER_CLASSES				\
+  {							\
+    GR_REGS, LIM_REG_CLASSES				\
+  }
+
+#define SMALL_REGISTER_CLASSES 		0
+#define N_REG_CLASSES			(int) LIM_REG_CLASSES
+#define CLASS_MAX_NREGS(CLASS, MODE)    ((GET_MODE_SIZE (MODE) \
+					  + UNITS_PER_WORD - 1) \
+					 / UNITS_PER_WORD)
+
+#define GENERAL_REGS			GR_REGS
+#define BASE_REG_CLASS  		GR_REGS
+#define INDEX_REG_CLASS			GR_REGS
+
+#define FIRST_PSEUDO_REGISTER 		17
+
+#define REGNO_REG_CLASS(REGNO)          ((REGNO) < FIRST_PSEUDO_REGISTER \
+					 ? GR_REGS : NO_REGS)
+
+#define STACK_POINTER_REGNUM 	        0
+#define FUNC_RETURN_REGNUM              1
+#define FRAME_POINTER_REGNUM 		6
+#define ARG_POINTER_REGNUM 		7
+#define STATIC_CHAIN_REGNUM 		8
+#define TRAMPOLINE_TEMP_REGNUM		9
+#define STRUCT_VAL_REGNUM		15
+#define CC_REGNUM                       16
+
+/* This is the register which is used to hold the address of the start
+   of the small data area, if that feature is being used.  Note - this
+   register must not be call_used because otherwise library functions
+   that are compiled without small data support might clobber it.
+
+   FIXME: The function gcc/config/rx/rx.c:rx_gen_move_template() has a
+   built in copy of this register's name, rather than constructing the
+   name from this #define.  */
+#define GP_BASE_REGNUM			13
+
+#define ELIMINABLE_REGS					\
+{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM },	\
+ { ARG_POINTER_REGNUM,   FRAME_POINTER_REGNUM },	\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)	\
+  (OFFSET) = rx_initial_elimination_offset ((FROM), (TO))
+
+
+#define FUNCTION_ARG_REGNO_P(N)	  	(((N) >= 1) && ((N) <= 4))
+#define FUNCTION_VALUE_REGNO_P(N) 	((N) == FUNC_RETURN_REGNUM)
+#define DEFAULT_PCC_STRUCT_RETURN	0
+
+#define FIXED_REGISTERS					\
+{							\
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1	\
+}
+
+#define CALL_USED_REGISTERS				\
+{							\
+  1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1	\
+}
+
+#define LIBCALL_VALUE(MODE)				\
+  gen_rtx_REG (((GET_MODE_CLASS (MODE) != MODE_INT	\
+                 || COMPLEX_MODE_P (MODE)		\
+		 || GET_MODE_SIZE (MODE) >= 4)		\
+		? (MODE)				\
+		: SImode),				\
+	       FUNC_RETURN_REGNUM)
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER						\
+{  7,  10,  11,  12,  13,  14,  4,  3,  2,  1, 9, 8, 6, 5, 15	\
+}
+
+#define REGNO_IN_RANGE(REGNO, MIN, MAX)		\
+  (IN_RANGE ((REGNO), (MIN), (MAX)) 		\
+   || (reg_renumber != NULL			\
+       && reg_renumber[(REGNO)] >= (MIN)	\
+       && reg_renumber[(REGNO)] <= (MAX)))
+
+#ifdef REG_OK_STRICT
+#define REGNO_OK_FOR_BASE_P(regno)      REGNO_IN_RANGE (regno, 0, 15)
+#else
+#define REGNO_OK_FOR_BASE_P(regno)	1
+#endif
+
+#define REGNO_OK_FOR_INDEX_P(regno)	REGNO_OK_FOR_BASE_P (regno)
+
+#define RTX_OK_FOR_BASE(X, STRICT)				\
+  ((STRICT) ?							\
+   (   (REG_P (X)						\
+        && REGNO_IN_RANGE (REGNO (X), 0, 15))			\
+    || (GET_CODE (X) == SUBREG					\
+        && REG_P (SUBREG_REG (X))				\
+        && REGNO_IN_RANGE (REGNO (SUBREG_REG (X)), 0, 15)))	\
+   :								\
+    ( (REG_P (X)						\
+       || (GET_CODE (X) == SUBREG				\
+	   && REG_P (SUBREG_REG (X))))))
+
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)				\
+  ((COUNT) == 0								\
+   ? gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, arg_pointer_rtx, GEN_INT (-4))) \
+   : NULL_RTX)
+
+#define INCOMING_RETURN_ADDR_RTX	gen_rtx_MEM (Pmode, stack_pointer_rtx)
+
+#define ACCUMULATE_OUTGOING_ARGS	1
+
+typedef unsigned int CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+
+#define TRAMPOLINE_SIZE 	(! TARGET_BIG_ENDIAN_DATA ? 14 : 20)
+#define TRAMPOLINE_ALIGNMENT 	32
+
+#define NO_PROFILE_COUNTERS     1
+#define PROFILE_BEFORE_PROLOGUE 1
+
+#define FUNCTION_PROFILER(FILE, LABELNO)	\
+    fprintf (FILE, "\tbsr\t__mcount\n");
+
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   CLASS_MAX_NREGS (0, MODE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 			\
+  REGNO_REG_CLASS (REGNO) == GR_REGS
+
+#define MODES_TIEABLE_P(MODE1, MODE2)				\
+  (   (   GET_MODE_CLASS (MODE1) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)		\
+   == (   GET_MODE_CLASS (MODE2) == MODE_FLOAT			\
+       || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+
+#define REGISTER_NAMES						\
+  {								\
+    "r0",  "r1",  "r2",   "r3",   "r4",   "r5",   "r6",   "r7",	\
+      "r8",  "r9",  "r10",  "r11",  "r12",  "r13",  "r14",  "r15", "cc"	\
+  }
+
+#define ADDITIONAL_REGISTER_NAMES	\
+{					\
+    { "sp",    STACK_POINTER_REGNUM }	\
+  , { "fp",    FRAME_POINTER_REGNUM }	\
+  , { "arg",   ARG_POINTER_REGNUM }	\
+  , { "chain", STATIC_CHAIN_REGNUM }	\
+}
+
+#define DATA_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION D,DATA" 		\
+   : "\t.section D,\"aw\",@progbits\n\t.p2align 2")
+
+#define SDATA_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION D_2,DATA,ALIGN=2" 	\
+   : "\t.section D_2,\"aw\",@progbits\n\t.p2align 1")
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP  			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION C,ROMDATA,ALIGN=4" \
+   : "\t.section C,\"a\",@progbits\n\t.p2align 2")
+
+#define BSS_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION B,DATA,ALIGN=4" 	\
+   : "\t.section B,\"w\",@nobits\n\t.p2align 2")
+
+#define SBSS_SECTION_ASM_OP	      			\
+  (TARGET_AS100_SYNTAX ? "\t.SECTION B_2,DATA,ALIGN=2" 	\
+   : "\t.section B_2,\"w\",@nobits\n\t.p2align 1")
+
+/* The following definitions are conditional depending upon whether the
+   compiler is being built or crtstuff.c is being compiled by the built
+   compiler.  */
+#if defined CRT_BEGIN || defined CRT_END
+# ifdef __RX_AS100_SYNTAX
+#  define TEXT_SECTION_ASM_OP	      "\t.SECTION P,CODE"
+#  define CTORS_SECTION_ASM_OP	      "\t.SECTION init_array,CODE"
+#  define DTORS_SECTION_ASM_OP	      "\t.SECTION fini_array,CODE"
+#  define INIT_ARRAY_SECTION_ASM_OP   "\t.SECTION init_array,CODE"
+#  define FINI_ARRAY_SECTION_ASM_OP   "\t.SECTION fini_array,CODE"
+# else
+#  define TEXT_SECTION_ASM_OP	      "\t.section P,\"ax\""
+#  define CTORS_SECTION_ASM_OP	      \
+  "\t.section\t.init_array,\"aw\",@init_array"
+#  define DTORS_SECTION_ASM_OP	      \
+  "\t.section\t.fini_array,\"aw\",@fini_array"
+#  define INIT_ARRAY_SECTION_ASM_OP   \
+  "\t.section\t.init_array,\"aw\",@init_array"
+#  define FINI_ARRAY_SECTION_ASM_OP   \
+  "\t.section\t.fini_array,\"aw\",@fini_array"
+# endif
+#else
+# define TEXT_SECTION_ASM_OP	      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION P,CODE" : "\t.section P,\"ax\"")
+
+# define CTORS_SECTION_ASM_OP			      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION init_array,CODE" \
+   : "\t.section\t.init_array,\"aw\",@init_array")
+
+# define DTORS_SECTION_ASM_OP			      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION fini_array,CODE" \
+   : "\t.section\t.fini_array,\"aw\",@fini_array")
+
+# define INIT_ARRAY_SECTION_ASM_OP		      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION init_array,CODE" \
+   : "\t.section\t.init_array,\"aw\",@init_array")
+
+# define FINI_ARRAY_SECTION_ASM_OP		      \
+  (TARGET_AS100_SYNTAX ? "\t.SECTION fini_array,CODE" \
+   : "\t.section\t.fini_array,\"aw\",@fini_array")
+#endif
+
+#define GLOBAL_ASM_OP 		\
+  (TARGET_AS100_SYNTAX ? "\t.GLB\t" : "\t.global\t")
+#define ASM_COMMENT_START	" ;"
+#define ASM_APP_ON		""
+#define ASM_APP_OFF 		""
+#define LOCAL_LABEL_PREFIX	"L"
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX	"_"
+
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)		\
+  do						\
+    {						\
+      if ((LOG) == 0)				\
+        break;					\
+      if (TARGET_AS100_SYNTAX)			\
+	{					\
+	  if ((LOG) >= 2)			\
+	    fprintf (STREAM, "\t.ALIGN 4\t; %d alignment actually requested\n", 1 << (LOG)); \
+	  else					\
+	    fprintf (STREAM, "\t.ALIGN 2\n");	\
+	}					\
+      else					\
+	fprintf (STREAM, "\t.balign %d\n", 1 << (LOG));	\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, TARGET_AS100_SYNTAX ? "\t.LWORD L%d\n" : "\t.long .L%d\n", \
+	   VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   Note: The local label referenced by the "3b" below is emitted by
+   the tablejump insn.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  fprintf (FILE, TARGET_AS100_SYNTAX \
+	   ? "\t.LWORD L%d - ?-\n" : "\t.long .L%d - 1b\n", VALUE)
+
+#define ASM_OUTPUT_SIZE_DIRECTIVE(STREAM, NAME, SIZE)			\
+  do									\
+    {									\
+      HOST_WIDE_INT size_ = (SIZE);					\
+									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+									\
+      fputs (SIZE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fprintf (STREAM, ", " HOST_WIDE_INT_PRINT_DEC "\n", size_);	\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_MEASURED_SIZE(STREAM, NAME)				\
+  do									\
+    {									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+      fputs (SIZE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fputs (", .-", STREAM);						\
+      assemble_name (STREAM, NAME);					\
+      putc ('\n', STREAM);						\
+    }									\
+  while (0)
+
+#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE)			\
+  do									\
+    {									\
+      /* The as100 assembler does not have an equivalent of the SVR4    \
+	 .size pseudo-op.  */						\
+      if (TARGET_AS100_SYNTAX)						\
+	break;								\
+      fputs (TYPE_ASM_OP, STREAM);					\
+      assemble_name (STREAM, NAME);					\
+      fputs (", ", STREAM);						\
+      fprintf (STREAM, TYPE_OPERAND_FMT, TYPE);				\
+      putc ('\n', STREAM);						\
+    }									\
+  while (0)
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
+  do								\
+    {								\
+      sprintf (LABEL, TARGET_AS100_SYNTAX ? "*%s%u" : "*.%s%u", \
+	       PREFIX, (unsigned) (NUM));			\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME)			\
+  do								\
+    {								\
+      if (TARGET_AS100_SYNTAX)					\
+	targetm.asm_out.globalize_label (FILE, NAME);		\
+      default_elf_asm_output_external (FILE, DECL, NAME);	\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_AS100_SYNTAX)						\
+	{								\
+	  fprintf ((FILE), "\t.GLB\t");					\
+	  assemble_name ((FILE), (NAME));				\
+	  fprintf ((FILE), "\n");					\
+          assemble_name ((FILE), (NAME));				\
+	  switch ((ALIGN) / BITS_PER_UNIT)				\
+            {								\
+            case 4:							\
+              fprintf ((FILE), ":\t.BLKL\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE) / 4);					\
+	      break;							\
+            case 2:							\
+              fprintf ((FILE), ":\t.BLKW\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE) / 2);					\
+	      break;							\
+            default:							\
+              fprintf ((FILE), ":\t.BLKB\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\
+		       (SIZE));						\
+	      break;							\
+            }								\
+        }								\
+      else								\
+        {								\
+          fprintf ((FILE), "%s", COMMON_ASM_OP);			\
+          assemble_name ((FILE), (NAME));				\
+          fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",	\
+	           (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+	}								\
+    }									\
+  while (0)
+
+#undef  SKIP_ASM_OP
+#define SKIP_ASM_OP   (TARGET_AS100_SYNTAX ? "\t.BLKB\t" : "\t.zero\t")
+
+#undef  ASM_OUTPUT_LIMITED_STRING
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR)		\
+  do							\
+    {							\
+      const unsigned char *_limited_str =		\
+	(const unsigned char *) (STR);			\
+      unsigned ch;					\
+							\
+      fprintf ((FILE), TARGET_AS100_SYNTAX 		\
+	       ? "\t.BYTE\t\"" : "\t.string\t\"");	\
+							\
+      for (; (ch = *_limited_str); _limited_str++)	\
+        {						\
+	  int escape;					\
+							\
+	  switch (escape = ESCAPES[ch])			\
+	    {						\
+	    case 0:					\
+	      putc (ch, (FILE));			\
+	      break;					\
+	    case 1:					\
+	      fprintf ((FILE), "\\%03o", ch);		\
+	      break;					\
+	    default:					\
+	      putc ('\\', (FILE));			\
+	      putc (escape, (FILE));			\
+	      break;					\
+	    }						\
+        }						\
+							\
+      fprintf ((FILE), TARGET_AS100_SYNTAX ? "\"\n\t.BYTE\t0\n" : "\"\n");\
+    }							\
+  while (0)
+
+#undef  IDENT_ASM_OP
+#define IDENT_ASM_OP  (TARGET_AS100_SYNTAX \
+		       ? "\t.END\t; Built by: ": "\t.ident\t")
+
+/* For PIC put jump tables into the text section so that the offsets that
+   they contain are always computed between two same-section symbols.  */
+#define JUMP_TABLES_IN_TEXT_SECTION	(flag_pic)
+
+/* This is a version of REG_P that also returns TRUE for SUBREGs.  */
+#define RX_REG_P(rtl) (REG_P (rtl) || GET_CODE (rtl) == SUBREG)
+
+/* Like REG_P except that this macro is true for SET expressions.  */
+#define SET_P(rtl)    (GET_CODE (rtl) == SET)
+
+/* The AS100 assembler does not support .leb128 and .uleb128, but
+   the compiler-build-time configure tests will have enabled their
+   use because GAS supports them.  So default to generating STABS
+   debug information instead of DWARF2 when generating AS100
+   compatible output.  */
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE (TARGET_AS100_SYNTAX \
+				  ? DBX_DEBUG : DWARF2_DEBUG)
+
+#define INCOMING_FRAME_SP_OFFSET		4
+#define ARG_POINTER_CFA_OFFSET(FNDECL)		4
+#define FRAME_POINTER_CFA_OFFSET(FNDECL)	4
+
+#define TARGET_USE_FPU		(! TARGET_NO_USE_FPU)
+
+/* This macro is used to decide when RX FPU instructions can be used.  */
+#define ALLOW_RX_FPU_INSNS	(TARGET_USE_FPU)
+
+#define BRANCH_COST(SPEED,PREDICT)       1
+#define REGISTER_MOVE_COST(MODE,FROM,TO) 2
+
+#define SELECT_CC_MODE(OP,X,Y)  rx_select_cc_mode((OP), (X), (Y))
+
+/* Compute the alignment needed for label X in various situations.
+   If the user has specified an alignment then honour that, otherwise
+   use rx_align_for_label.  */
+#define JUMP_ALIGN(x)				(align_jumps ? align_jumps : rx_align_for_label (x, 0))
+#define LABEL_ALIGN(x)				(align_labels ? align_labels : rx_align_for_label (x, 3))
+#define LOOP_ALIGN(x)				(align_loops ? align_loops : rx_align_for_label (x, 2))
+#define LABEL_ALIGN_AFTER_BARRIER(x)		rx_align_for_label (x, 0)
+
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP)	\
+  do						\
+    {						\
+      if ((LOG) == 0 || (MAX_SKIP) == 0)	\
+        break;					\
+      if (TARGET_AS100_SYNTAX)			\
+	{					\
+	  if ((LOG) >= 2)			\
+	    fprintf (STREAM, "\t.ALIGN 4\t; %d alignment actually requested\n", 1 << (LOG)); \
+	  else					\
+	    fprintf (STREAM, "\t.ALIGN 2\n");	\
+	}					\
+      else					\
+	fprintf (STREAM, "\t.balign %d,3,%d\n", 1 << (LOG), (MAX_SKIP));	\
+    }						\
+  while (0)
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
new file mode 100644
index 000000000..48705b74b
--- /dev/null
+++ b/gcc/config/rx/rx.md
@@ -0,0 +1,2501 @@
+;;  Machine Description for Renesas RX processors
+;;  Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+;;  Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This code iterator is used for sign- and zero- extensions.
+(define_mode_iterator small_int_modes [(HI "") (QI "")])
+
+;; We do not handle DFmode here because it is either
+;; the same as SFmode, or if -m64bit-doubles is active
+;; then all operations on doubles have to be handled by
+;; library functions.
+(define_mode_iterator register_modes
+  [(SF "ALLOW_RX_FPU_INSNS") (SI "") (HI "") (QI "")])
+
+(define_constants
+  [
+   (SP_REG 0)
+   (CC_REG 		   16)
+
+   (UNSPEC_LOW_REG         0)
+   (UNSPEC_HIGH_REG        1)
+
+   (UNSPEC_RTE             10)
+   (UNSPEC_RTFI            11)
+   (UNSPEC_NAKED           12)
+   (UNSPEC_CONST           13)
+   
+   (UNSPEC_MOVSTR          20)
+   (UNSPEC_MOVMEM          21)
+   (UNSPEC_SETMEM          22)
+   (UNSPEC_STRLEN          23)
+   (UNSPEC_CMPSTRN         24)
+
+   (UNSPEC_BUILTIN_BRK     30)
+   (UNSPEC_BUILTIN_CLRPSW  31)
+   (UNSPEC_BUILTIN_INT     32)
+   (UNSPEC_BUILTIN_MACHI   33)
+   (UNSPEC_BUILTIN_MACLO   34)
+   (UNSPEC_BUILTIN_MULHI   35)
+   (UNSPEC_BUILTIN_MULLO   36)
+   (UNSPEC_BUILTIN_MVFACHI 37)
+   (UNSPEC_BUILTIN_MVFACMI 38)
+   (UNSPEC_BUILTIN_MVFC    39)
+   (UNSPEC_BUILTIN_MVFCP   40)
+   (UNSPEC_BUILTIN_MVTACHI 41)
+   (UNSPEC_BUILTIN_MVTACLO 42)
+   (UNSPEC_BUILTIN_MVTC    43)
+   (UNSPEC_BUILTIN_MVTIPL  44)
+   (UNSPEC_BUILTIN_RACW	   45)
+   (UNSPEC_BUILTIN_REVW    46)
+   (UNSPEC_BUILTIN_RMPA	   47)
+   (UNSPEC_BUILTIN_ROUND   48)
+   (UNSPEC_BUILTIN_SAT     49)
+   (UNSPEC_BUILTIN_SETPSW  50)
+   (UNSPEC_BUILTIN_WAIT	   51)
+  ]
+)
+
+(define_attr "length" "" (const_int 8))
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Pipeline description.
+
+;; The RX only has a single pipeline.  It has five stages (fetch,
+;; decode, execute, memory access, writeback) each of which normally
+;; takes a single CPU clock cycle.
+
+;; The timings attribute consists of two numbers, the first is the
+;; throughput, which is the number of cycles the instruction takes
+;; to execute and generate a result.  The second is the latency
+;; which is the effective number of cycles the instruction takes to
+;; execute if its result is used by the following instruction.  The
+;; latency is always greater than or equal to the throughput.
+;; These values were taken from tables 2.13 and 2.14 in section 2.8
+;; of the RX610 Group Hardware Manual v0.11
+
+;; Note - it would be nice to use strings rather than integers for
+;; the possible values of this attribute, so that we can have the
+;; gcc build mechanism check for values that are not supported by
+;; the reservations below.  But this will not work because the code
+;; in rx_adjust_sched_cost() needs integers not strings.
+
+(define_attr "timings" "" (const_int 11))
+
+(define_automaton "pipelining")
+(define_cpu_unit "throughput" "pipelining")
+
+(define_insn_reservation "throughput__1_latency__1"  1
+  (eq_attr "timings" "11") "throughput")
+(define_insn_reservation "throughput__1_latency__2"  2
+  (eq_attr "timings" "12") "throughput,nothing")
+(define_insn_reservation "throughput__2_latency__2"  1
+  (eq_attr "timings" "22") "throughput*2")
+(define_insn_reservation "throughput__3_latency__3"  1
+  (eq_attr "timings" "33") "throughput*3")
+(define_insn_reservation "throughput__3_latency__4"  2
+  (eq_attr "timings" "34") "throughput*3,nothing")
+(define_insn_reservation "throughput__4_latency__4"  1
+  (eq_attr "timings" "44") "throughput*4")
+(define_insn_reservation "throughput__4_latency__5"  2
+  (eq_attr "timings" "45") "throughput*4,nothing")
+(define_insn_reservation "throughput__5_latency__5"  1
+  (eq_attr "timings" "55") "throughput*5")
+(define_insn_reservation "throughput__5_latency__6"  2
+  (eq_attr "timings" "56") "throughput*5,nothing")
+(define_insn_reservation "throughput__6_latency__6"  1
+  (eq_attr "timings" "66") "throughput*6")
+(define_insn_reservation "throughput_10_latency_10"  1
+  (eq_attr "timings" "1010") "throughput*10")
+(define_insn_reservation "throughput_11_latency_11"  1
+  (eq_attr "timings" "1111") "throughput*11")
+(define_insn_reservation "throughput_16_latency_16"  1
+  (eq_attr "timings" "1616") "throughput*16")
+(define_insn_reservation "throughput_18_latency_18"  1
+  (eq_attr "timings" "1818") "throughput*18")
+
+;; ----------------------------------------------------------------------------
+
+;; Comparisons
+
+;; Note - we do not specify the two instructions necessary to perform
+;; a compare-and-branch in the cbranchsi4 pattern because that would
+;; allow the comparison to be moved away from the jump before the reload
+;; pass has completed.  That would be problematical because reload can
+;; generate ADDSI3 instructions which would corrupt the PSW flags.
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "comparison_operator"
+	    [(match_operand:SI 1 "register_operand")
+	     (match_operand:SI 2 "rx_source_operand")])
+	  (label_ref (match_operand 3 ""))
+	  (pc)))]
+  ""
+)
+
+(define_insn_and_split "*cbranchsi4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:SI  0 "register_operand"  "r")
+	     (match_operand:SI  1 "rx_source_operand" "riQ")])
+	  (match_operand        2 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rx_split_cbranch (CCmode, GET_CODE (operands[3]),
+		    operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*cmpsi"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI 0 "register_operand"  "r,r,r,r,r,r,r")
+		    (match_operand:SI 1 "rx_source_operand" "r,Uint04,Int08,Sint16,Sint24,i,Q")))]
+  "reload_completed"
+  "cmp\t%Q1, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,5")]
+)
+
+;; Canonical method for representing TST.
+(define_insn_and_split "*cbranchsi4_tst"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "rx_zs_comparison_operator"
+	    [(and:SI (match_operand:SI  0 "register_operand"  "r")
+		     (match_operand:SI  1 "rx_source_operand" "riQ"))
+	     (const_int 0)])
+	  (match_operand 2 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rx_split_cbranch (CC_ZSmode, GET_CODE (operands[3]),
+		    XEXP (operands[3], 0), XEXP (operands[3], 1),
+		    operands[2]);
+  DONE;
+})
+
+;; Various other ways that GCC codes "var & const"
+(define_insn_and_split "*cbranchsi4_tst_ext"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 4 "rx_z_comparison_operator"
+	    [(zero_extract:SI
+		(match_operand:SI 0 "register_operand" "r")
+		(match_operand:SI 1 "rx_constshift_operand" "")
+		(match_operand:SI 2 "rx_constshift_operand" ""))
+	     (const_int 0)])
+	  (match_operand 3 "label_ref_operand" "")
+	  (pc)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT mask;
+  rtx x;
+
+  mask = 1;
+  mask <<= INTVAL (operands[1]);
+  mask -= 1;
+  mask <<= INTVAL (operands[2]);
+  x = gen_rtx_AND (SImode, operands[0], gen_int_mode (mask, SImode));
+
+  rx_split_cbranch (CC_ZSmode, GET_CODE (operands[4]),
+		    x, const0_rtx, operands[3]);
+  DONE;
+})
+
+(define_insn "*tstsi"
+  [(set (reg:CC_ZS CC_REG)
+	(compare:CC_ZS
+	  (and:SI (match_operand:SI 0 "register_operand"  "r,r,r")
+		  (match_operand:SI 1 "rx_source_operand" "r,i,Q"))
+	  (const_int 0)))]
+  "reload_completed"
+  "tst\t%Q1, %0"
+  [(set_attr "timings" "11,11,33")
+   (set_attr "length"  "3,7,6")]
+)
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 0 "rx_fp_comparison_operator"
+	    [(match_operand:SF 1 "register_operand")
+	     (match_operand:SF 2 "rx_source_operand")])
+	  (label_ref (match_operand 3 ""))
+	  (pc)))]
+  "ALLOW_RX_FPU_INSNS"
+)
+
+(define_insn_and_split "*cbranchsf4"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "rx_fp_comparison_operator"
+	    [(match_operand:SF  0 "register_operand"  "r")
+	     (match_operand:SF  1 "rx_source_operand" "rFQ")])
+	  (match_operand        2 "label_ref_operand" "")
+	  (pc)))]
+  "ALLOW_RX_FPU_INSNS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rx_split_cbranch (CC_Fmode, GET_CODE (operands[3]),
+		    operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*cmpsf"
+  [(set (reg:CC_F CC_REG)
+	(compare:CC_F
+	  (match_operand:SF 0 "register_operand"  "r,r,r")
+	  (match_operand:SF 1 "rx_source_operand" "r,F,Q")))]
+  "ALLOW_RX_FPU_INSNS && reload_completed"
+  "fcmp\t%1, %0"
+  [(set_attr "timings" "11,11,33")
+   (set_attr "length" "3,7,5")]
+)
+
+;; Flow Control Instructions:
+
+(define_insn "*conditional_branch"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "comparison_operator"
+	    [(reg CC_REG) (const_int 0)])
+	  (label_ref (match_operand 0 "" ""))
+	  (pc)))]
+  "reload_completed"
+  "b%B1\t%0"
+  [(set_attr "length" "8")    ;; This length is wrong, but it is
+                              ;; too hard to compute statically.
+   (set_attr "timings" "33")] ;; The timing assumes that the branch is taken.
+)
+
+;; ----------------------------------------------------------------------------
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "bra\t%0"
+  [(set_attr "length" "4")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "indirect_jump"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "tablejump"
+  [(set (pc)
+	(match_operand:SI          0 "register_operand" "r"))
+   (use (label_ref (match_operand  1 "" "")))]
+  ""
+  { return flag_pic ? (TARGET_AS100_SYNTAX ? "\n?:\tbra\t%0"
+					   : "\n1:\tbra\t%0")
+	                                   : "jmp\t%0";
+  }
+  [(set_attr "timings" "33")
+   (set_attr "length" "2")]
+)
+
+(define_insn "simple_return"
+  [(return)]
+  ""
+  "rts"
+  [(set_attr "length" "1")
+   (set_attr "timings" "55")]
+)
+
+;; Unspec used so that the constant will not be invalid
+;; if -mmax-constant-size has been specified.
+(define_insn "deallocate_and_return"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const:SI (unspec:SI [(match_operand 0 "const_int_operand" "n")] UNSPEC_CONST))))
+   (return)]
+  ""
+  "rtsd\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "55")]
+)
+
+(define_insn "pop_and_return"
+  [(match_parallel 1 "rx_rtsd_vector"
+     [(set (reg:SI SP_REG)
+	   (plus:SI (reg:SI SP_REG)
+		    (match_operand:SI 0 "const_int_operand" "n")))])
+   (return)]
+  "reload_completed"
+  {
+    rx_emit_stack_popm (operands, false);
+    return "";
+  }
+  [(set_attr "length" "3")
+   (set_attr "timings" "56")]
+)
+
+(define_insn "fast_interrupt_return"
+  [(unspec_volatile [(return)] UNSPEC_RTFI) ]
+  ""
+  "rtfi"
+  [(set_attr "length" "2")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "exception_return"
+  [(unspec_volatile [(return)] UNSPEC_RTE) ]
+  ""
+  "rte"
+  [(set_attr "length" "2")
+   (set_attr "timings" "66")]
+)
+
+(define_insn "naked_return"
+  [(unspec_volatile [(return)] UNSPEC_NAKED) ]
+  ""
+  "; Naked function: epilogue provided by programmer."
+)
+
+
+;; Note - the following set of patterns do not use the "memory_operand"
+;; predicate or an "m" constraint because we do not allow symbol_refs
+;; or label_refs as legitmate memory addresses.  This matches the
+;; behaviour of most of the RX instructions.  Only the call/branch
+;; instructions are allowed to refer to symbols/labels directly.
+;; The call operands are in QImode because that is the value of
+;; FUNCTION_MODE
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand")
+	 (match_operand:SI 1 "general_operand"))]
+  ""
+  {
+    rtx dest = XEXP (operands[0], 0);
+
+    if (! rx_call_operand (dest, Pmode))
+      dest = force_reg (Pmode, dest);
+    emit_call_insn (gen_call_internal (dest));
+    DONE;
+  }
+)
+
+(define_insn "call_internal"
+  [(call (mem:QI (match_operand:SI 0 "rx_call_operand" "r,Symbol"))
+	 (const_int 0))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  jsr\t%0
+  bsr\t%A0"
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "33")]
+)
+
+(define_expand "call_value"
+  [(set (match_operand          0 "register_operand")
+	(call (match_operand:QI 1 "general_operand")
+	      (match_operand:SI 2 "general_operand")))]
+  ""
+  {
+    rtx dest = XEXP (operands[1], 0);
+
+    if (! rx_call_operand (dest, Pmode))
+      dest = force_reg (Pmode, dest);
+    emit_call_insn (gen_call_value_internal (operands[0], dest));
+    DONE;
+  }
+)
+
+(define_insn "call_value_internal"
+  [(set (match_operand                  0 "register_operand" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "rx_call_operand"   "r,Symbol"))
+	      (const_int 0)))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  jsr\t%1
+  bsr\t%A1"
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "33")]
+)
+
+;; Note - we do not allow indirect sibcalls (with the address
+;; held in a register) because we cannot guarantee that the register
+;; chosen will be a call-used one.  If it is a call-saved register,
+;; then the epilogue code will corrupt it by popping the saved value
+;; off of the stack.
+(define_expand "sibcall"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "rx_symbolic_call_operand"))
+	   (match_operand:SI         1 "general_operand"))
+     (return)])]
+  ""
+  {
+    if (MEM_P (operands[0]))
+      operands[0] = XEXP (operands[0], 0);
+    emit_call_insn (gen_sibcall_internal (operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "sibcall_internal"
+  [(call (mem:QI (match_operand:SI 0 "rx_symbolic_call_operand" "Symbol"))
+	 (const_int 0))
+   (return)]
+  ""
+  "bra\t%A0"
+  [(set_attr "length"  "4")
+   (set_attr "timings" "33")]
+)
+
+(define_expand "sibcall_value"
+ [(parallel
+   [(set (match_operand                  0 "register_operand")
+	 (call (mem:QI (match_operand:SI 1 "rx_symbolic_call_operand"))
+	       (match_operand:SI         2 "general_operand")))
+    (return)])]
+  ""
+  {
+    if (MEM_P (operands[1]))
+      operands[1] = XEXP (operands[1], 0);
+    emit_call_insn (gen_sibcall_value_internal (operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "sibcall_value_internal"
+ [(set (match_operand                  0 "register_operand"         "=r")
+       (call (mem:QI (match_operand:SI 1 "rx_symbolic_call_operand" "Symbol"))
+	     (const_int 0)))
+  (return)]
+  ""
+  "bra\t%A1"
+  [(set_attr "length"  "4")
+   (set_attr "timings" "33")]
+)
+
+;; Function Prologue/Epilogue Instructions
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "rx_expand_prologue (); DONE;"
+)
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "rx_expand_epilogue (false); DONE;"
+)
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "rx_expand_epilogue (true); DONE;"
+)
+
+;; Move Instructions
+
+;; Note - we do not allow memory to memory moves, even though the ISA
+;; supports them.  The reason is that the conditions on such moves are
+;; too restrictive, specifically the source addressing mode is limited
+;; by the destination addressing mode and vice versa.  (For example it
+;; is not possible to use indexed register indirect addressing for one
+;; of the operands if the other operand is anything other than a register,
+;; but it is possible to use register relative addressing when the other
+;; operand also uses register relative or register indirect addressing).
+;;
+;; GCC does not support computing legitimate addresses based on the
+;; nature of other operands involved in the instruction, and reload is
+;; not smart enough to cope with a whole variety of different memory
+;; addressing constraints, so it is simpler and safer to just refuse
+;; to support memory to memory moves.
+
+(define_expand "mov<register_modes:mode>"
+  [(set (match_operand:register_modes 0 "general_operand")
+	(match_operand:register_modes 1 "general_operand"))]
+  ""
+  {
+    if (MEM_P (operand0) && MEM_P (operand1))
+      operands[1] = copy_to_mode_reg (<register_modes:MODE>mode, operand1);
+    if (CONST_INT_P (operand1)
+        && ! rx_is_legitimate_constant (operand1))
+      FAIL;
+  }
+)
+
+(define_insn "*mov<register_modes:mode>_internal"
+  [(set (match_operand:register_modes
+	 0 "nonimmediate_operand" "=r,r,r,r,r,r,m,Q,Q,Q,Q")
+	(match_operand:register_modes
+	 1 "general_operand" "Int08,Sint16,Sint24,i,r,m,r,Int08,Sint16,Sint24,i"))]
+  ""
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "3,4,5,6,2,4,6,5,6,7,8")
+   (set_attr "timings" "11,11,11,11,11,12,11,11,11,11,11")]
+)
+
+(define_insn "extend<small_int_modes:mode>si2"
+  [(set (match_operand:SI 0 "register_operand"    "=r,r")
+        (sign_extend:SI (match_operand:small_int_modes
+			  1 "nonimmediate_operand" "r,m")))]
+  ""
+  { return rx_gen_move_template (operands, false); }
+  [(set_attr "length" "2,6")
+   (set_attr "timings" "11,12")]
+)
+
+(define_insn "zero_extend<small_int_modes:mode>si2"
+  [(set (match_operand:SI 0 "register_operand"     "=r,r")
+        (zero_extend:SI (match_operand:small_int_modes
+			  1 "nonimmediate_operand"  "r,m")))]
+  ""
+  { return rx_gen_move_template (operands, true); }
+  [(set_attr "length" "2,4")
+   (set_attr "timings" "11,12")]
+)
+
+(define_insn "stack_push"
+  [(set (reg:SI SP_REG)
+	(minus:SI (reg:SI SP_REG)
+		  (const_int 4)))
+   (set (mem:SI (reg:SI SP_REG))
+	(match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "push.l\t%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "stack_pushm"
+  [(match_parallel 1 "rx_store_multiple_vector"
+     [(set (reg:SI SP_REG)
+	   (minus:SI (reg:SI SP_REG)
+		     (match_operand:SI 0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_pushm (operands);
+    return "";
+  }
+  [(set_attr "length" "2")
+   (set_attr "timings" "44")] ;; The timing is a guesstimate average timing.
+)
+
+(define_insn "stack_pop"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (reg:SI SP_REG)))
+   (set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (const_int 4)))]
+  ""
+  "pop\t%0"
+  [(set_attr "length" "2")
+   (set_attr "timings" "12")]
+)
+
+(define_insn "stack_popm"
+  [(match_parallel 1 "rx_load_multiple_vector"
+     [(set (reg:SI SP_REG)
+	   (plus:SI (reg:SI SP_REG)
+		    (match_operand:SI 0 "const_int_operand" "n")))])]
+  "reload_completed"
+  {
+    rx_emit_stack_popm (operands, true);
+    return "";
+  }
+  [(set_attr "length" "2")
+   (set_attr "timings" "45")] ;; The timing is a guesstimate average timing.
+)
+
+(define_insn_and_split "cstoresi4"
+  [(set (match_operand:SI   0 "register_operand" "=r")
+	(match_operator:SI  1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand"  "r")
+	   (match_operand:SI 3 "rx_source_operand" "riQ")]))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (CCmode, CC_REG);
+  x = gen_rtx_COMPARE (CCmode, operands[2], operands[3]);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, flags, const0_rtx);
+  x = gen_rtx_SET (VOIDmode, operands[0], x);
+  emit_insn (x);
+  DONE;
+})
+
+(define_insn "*sccc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	  [(reg CC_REG) (const_int 0)]))]
+  "reload_completed"
+  "sc%B1.L\t%0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn_and_split "cstoresf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "rx_fp_comparison_operator"
+	 [(match_operand:SF 2 "register_operand" "r")
+	  (match_operand:SF 3 "rx_source_operand" "rFQ")]))]
+  "ALLOW_RX_FPU_INSNS"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (CC_Fmode, CC_REG);
+  x = gen_rtx_COMPARE (CC_Fmode, operands[2], operands[3]);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  x = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, flags, const0_rtx);
+  x = gen_rtx_SET (VOIDmode, operands[0], x);
+  emit_insn (x);
+  DONE;
+})
+
+(define_expand "movsicc"
+  [(parallel
+    [(set (match_operand:SI                  0 "register_operand")
+	  (if_then_else:SI (match_operand:SI 1 "comparison_operator")
+			   (match_operand:SI 2 "nonmemory_operand")
+			   (match_operand:SI 3 "nonmemory_operand")))
+     (clobber (reg:CC CC_REG))])]
+  ""
+{
+  /* One operand must be a constant or a register, the other must be a register.  */
+  if (   ! CONSTANT_P (operands[2])
+      && ! CONSTANT_P (operands[3])
+      && ! (REG_P (operands[2]) && REG_P (operands[3])))
+    FAIL;
+})
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI     0 "register_operand" "=r,r,r")
+	(if_then_else:SI
+	  (match_operator     5 "comparison_operator"
+	   [(match_operand:SI 3 "register_operand"  "r,r,r")
+	    (match_operand:SI 4 "rx_source_operand" "riQ,riQ,riQ")])
+	  (match_operand:SI   1 "nonmemory_operand" "i,ri,r")
+	  (match_operand:SI   2 "nonmemory_operand" "ri,i,r")))
+   (clobber (reg:CC CC_REG))]
+  "(CONSTANT_P (operands[1]) || CONSTANT_P (operands[2]))
+    || (REG_P (operands[1]) && REG_P (operands[2]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx x, flags, op0, op1, op2;
+  enum rtx_code cmp_code;
+
+  flags = gen_rtx_REG (CCmode, CC_REG);
+  x = gen_rtx_COMPARE (CCmode, operands[3], operands[4]);
+  emit_insn (gen_rtx_SET (VOIDmode, flags, x));
+
+  cmp_code = GET_CODE (operands[5]);
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = operands[2];
+
+  /* If OP2 is the constant, reverse the sense of the move.
+     Likewise if both operands are registers but OP1 == OP0.  */
+  if ((! CONSTANT_P (operands[1]) && CONSTANT_P (operands[2]))
+      || (REG_P (operands[1]) && REG_P (operands[2])
+          && rtx_equal_p (op0, op1)))
+    {
+      x = op1, op1 = op2, op2 = x;
+      cmp_code = reverse_condition (cmp_code);
+    }
+
+  /* If OP2 does not match the output, copy it into place.  We have allowed
+     these alternatives so that the destination can legitimately be one of
+     the comparison operands without increasing register pressure.  */
+  if (! rtx_equal_p (op0, op2))
+    emit_move_insn (op0, op2);
+
+  x = gen_rtx_fmt_ee (cmp_code, VOIDmode, flags, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (SImode, x, op1, op0);
+  emit_insn (gen_rtx_SET (VOIDmode, op0, x));
+  DONE;
+})
+
+(define_insn "*stcc"
+  [(set (match_operand:SI 0 "register_operand" "+r,r,r,r")
+	(if_then_else:SI
+	  (match_operator 2 "rx_z_comparison_operator"
+	    [(reg CC_REG) (const_int 0)])
+	  (match_operand:SI 1 "immediate_operand" "Sint08,Sint16,Sint24,i")
+	  (match_dup 0)))]
+  "reload_completed
+   && ((GET_CODE (operands[2]) == EQ) || (GET_CODE (operands[2]) == NE))"
+  {
+    if (GET_CODE (operands[2]) == EQ)
+      return "stz\t%1, %0";
+    else
+     return "stnz\t%1, %0";
+  }
+  [(set_attr "length" "4,5,6,7")]
+)
+
+(define_insn "*stcc_reg"
+  [(set (match_operand:SI 0 "register_operand" "+r,r,r,r,r,r")
+	(if_then_else:SI
+	  (match_operator 2 "comparison_operator"
+	    [(reg CC_REG) (const_int 0)])
+	  (match_operand:SI 1 "nonmemory_operand"
+		              "r,Uint04,Sint08,Sint16,Sint24,i")
+	  (match_dup 0)))]
+  "reload_completed"
+  {
+    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
+    return "b%B2 1f\n\tmov %1, %0\n1:";
+  }
+  [(set_attr "length" "3,3,4,5,6,7")]
+)
+
+;; Arithmetic Instructions
+
+(define_insn "abssi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (abs:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  abs\t%0
+  abs\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "*abssi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (abs:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (set (reg CC_REG)
+	(compare (abs:SI (match_dup 1))
+		 (const_int 0)))]
+  ;; Note - although the ABS instruction does set the O bit in the processor
+  ;; status word, it does not do so in a way that is comparable with the CMP
+  ;; instruction.  Hence we use CC_ZSmode rather than CC_ZSOmode.
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  abs\t%0
+  abs\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "addsi3"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,0,r,r,r,r,r,r,0")
+		 (match_operand:SI 2 "rx_source_operand" "r,Uint04,NEGint4,Sint08,Sint16,Sint24,i,0,r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  add\t%2, %0
+  add\t%2, %0
+  sub\t%N2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,11,11,11,11,11,33")
+   (set_attr "length"   "2,2,2,3,4,5,6,2,3,3,4,5,6,5")]
+)
+
+(define_insn "*addsi3_flags"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,0,r,r,r,r,r,r,0")
+		 (match_operand:SI 2 "rx_source_operand" "r,Uint04,NEGint4,Sint08,Sint16,Sint24,i,0,r,Sint08,Sint16,Sint24,i,Q")))
+   (set (reg CC_REG)
+	(compare (plus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
+  "@
+  add\t%2, %0
+  add\t%2, %0
+  sub\t%N2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%2, %0
+  add\t%1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%2, %1, %0
+  add\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,11,11,11,11,11,33")
+   (set_attr "length"   "2,2,2,3,4,5,6,2,3,3,4,5,6,5")]
+)
+
+;; A helper to expand the above with the CC_MODE filled in.
+(define_expand "addsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (plus:SI (match_operand:SI 1 "register_operand")
+			    (match_operand:SI 2 "rx_source_operand")))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (plus:SI (match_dup 1) (match_dup 2))
+				   (const_int 0)))])]
+)
+
+(define_insn "adc_internal"
+  [(set (match_operand:SI     0 "register_operand"  "=r,r,r,r,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0"))
+	  (match_operand:SI   2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
+    (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "adc %2,%0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length"   "3,4,5,6,7,6")]
+)
+
+(define_insn "*adc_flags"
+  [(set (match_operand:SI     0 "register_operand"  "=r,r,r,r,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0"))
+	  (match_operand:SI   2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
+   (set (reg CC_REG)
+	(compare 
+	  (plus:SI
+	    (plus:SI
+	      (ltu:SI (reg:CC CC_REG) (const_int 0))
+	      (match_dup 1))
+	    (match_dup 2))
+	  (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
+  "adc %2,%0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length"   "3,4,5,6,7,6")]
+)
+
+;; Peepholes to match:
+;;   (set (reg A) (reg B))
+;;   (set (CC) (compare:CC (reg A/reg B) (const_int 0)))
+;; and replace them with the addsi3_flags pattern, using an add
+;; of zero to copy the register and set the condition code bits.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "register_operand"))
+   (set (reg:CC CC_REG)
+        (compare:CC (match_dup 0)
+                    (const_int 0)))]
+  ""
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 1) (const_int 0)))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (plus:SI (match_dup 1) (const_int 0))
+				   (const_int 0)))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+        (match_operand:SI 1 "register_operand"))
+   (set (reg:CC CC_REG)
+        (compare:CC (match_dup 1)
+                    (const_int 0)))]
+  ""
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 1) (const_int 0)))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (plus:SI (match_dup 1) (const_int 0))
+				   (const_int 0)))])]
+)
+
+(define_expand "adddi3"
+  [(set (match_operand:DI          0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "rx_source_operand")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  emit_insn (gen_adddi3_internal (op0l, op0h, op1l, op2l, op1h, op2h));
+  DONE;
+})
+
+(define_insn_and_split "adddi3_internal"
+  [(set (match_operand:SI          0 "register_operand"  "=r")
+	(plus:SI (match_operand:SI 2 "register_operand"  "r")
+		 (match_operand:SI 3 "rx_source_operand" "riQ")))
+   (set (match_operand:SI          1 "register_operand"  "=r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (plus:SI (match_dup 2) (match_dup 3)) (match_dup 2))
+	    (match_operand:SI      4 "register_operand"  "%1"))
+	  (match_operand:SI        5 "rx_source_operand" "riQ")))
+   (clobber (match_scratch:SI      6                     "=&r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op0l = operands[0];
+  rtx op0h = operands[1];
+  rtx op1l = operands[2];
+  rtx op2l = operands[3];
+  rtx op1h = operands[4];
+  rtx op2h = operands[5];
+  rtx scratch = operands[6];
+  rtx x;
+
+  if (reg_overlap_mentioned_p (op0l, op1h))
+    {
+      emit_move_insn (scratch, op0l);
+      op1h = scratch;
+      if (reg_overlap_mentioned_p (op0l, op2h))
+	op2h = scratch;
+    }
+  else if (reg_overlap_mentioned_p (op0l, op2h))
+    {
+      emit_move_insn (scratch, op0l);
+      op2h = scratch;
+    }
+
+  if (rtx_equal_p (op0l, op1l))
+    ;
+  /* It is preferable that op0l == op1l...  */
+  else if (rtx_equal_p (op0l, op2l))
+    x = op1l, op1l = op2l, op2l = x;
+  /* ... but it is only a requirement if op2l == MEM.  */
+  else if (MEM_P (op2l))
+    {
+      /* Let's hope that we still have a scratch register free.  */
+      gcc_assert (op1h != scratch);
+      emit_move_insn (scratch, op2l);
+      op2l = scratch;
+    }
+
+  emit_insn (gen_addsi3_flags (op0l, op1l, op2l));
+
+  if (rtx_equal_p (op0h, op1h))
+    ;
+  else if (rtx_equal_p (op0h, op2h))
+    x = op1h, op1h = op2h, op2h = x;
+  else
+    {
+      emit_move_insn (op0h, op1h);
+      op1h = op0h;
+    }
+  emit_insn (gen_adc_internal (op0h, op1h, op2h));
+  DONE;
+})
+
+(define_insn "andsi3"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
+		(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%1, %0
+  and\t%2, %1, %0
+  and\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length" "2,2,3,4,5,6,2,5,5")]
+)
+
+(define_insn "*andsi3_flags"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
+		(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (set (reg CC_REG)
+	(compare (and:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%2, %0
+  and\t%1, %0
+  and\t%2, %1, %0
+  and\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length" "2,2,3,4,5,6,2,5,5")]
+)
+
+;; Byte swap (single 32-bit value).
+(define_insn "bswapsi2"
+  [(set (match_operand:SI           0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand"  "r")))]
+  ""
+  "revl\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Byte swap (single 16-bit value).  Note - we ignore the swapping of the high 16-bits.
+(define_insn "bswaphi2"
+  [(set (match_operand:HI           0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand"  "r")))]
+  ""
+  "revw\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "divsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(div:SI (match_operand:SI 1 "register_operand"  "0,0,0,0,0,0")
+		(match_operand:SI 2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "div\t%Q2, %0"
+  [(set_attr "timings" "1111") ;; Strictly speaking the timing should be
+                               ;; 2222, but that is a worst case sceanario.
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r")
+	(udiv:SI (match_operand:SI 1 "register_operand"   "0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"  "r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "divu\t%Q2, %0"
+  [(set_attr "timings" "1010") ;; Strictly speaking the timing should be
+                               ;; 2020, but that is a worst case sceanario.
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+;; Note - these patterns are suppressed in big-endian mode because they
+;; generate a little endian result.  ie the most significant word of the
+;; result is placed in the higher numbered register of the destination
+;; register pair.
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI          0 "register_operand"  "=r,r,r,r,r,r")
+        (mult:DI (sign_extend:DI (match_operand:SI
+				  1 "register_operand"  "%0,0,0,0,0,0"))
+                 (sign_extend:DI (match_operand:SI
+				  2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q"))))]
+  "! TARGET_BIG_ENDIAN_DATA"
+  "emul\t%Q2, %0"
+  [(set_attr "length" "3,4,5,6,7,6")   
+   (set_attr "timings" "22,22,22,22,22,44")]
+)
+
+;; See comment for mulsidi3.
+;; Note - the zero_extends are to distinguish this pattern from the
+;; mulsidi3 pattern.  Immediate mode addressing is not supported
+;; because gcc cannot handle the expression: (zero_extend (const_int)).
+(define_insn "umulsidi3"
+  [(set (match_operand:DI                          0 "register_operand"	 "=r,r")
+        (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand"  "%0,0"))
+                 (zero_extend:DI (match_operand:SI 2 "rx_compare_operand" "r,Q"))))]
+  "! TARGET_BIG_ENDIAN_DATA"
+  "emulu\t%Q2, %0"
+  [(set_attr "length" "3,6")
+   (set_attr "timings" "22,44")]
+)
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smax:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "max\t%Q2, %0"
+  [(set_attr "length" "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r")
+	(smin:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+		 (match_operand:SI 2 "rx_source_operand"
+				   "r,Sint08,Sint16,Sint24,i,Q")))]
+  ""
+  "min\t%Q2, %0"
+  [(set_attr "length"  "3,4,5,6,7,6")
+   (set_attr "timings" "11,11,11,11,11,33")]
+)
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI          0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+        (mult:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,0,r,r")
+                 (match_operand:SI 2 "rx_source_operand"
+				   "r,Uint04,Sint08,Sint16,Sint24,i,Q,0,r")))]
+  ""
+  "@
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%2, %0
+  mul\t%Q2, %0
+  mul\t%Q2, %0
+  mul\t%1, %0
+  mul\t%2, %1, %0"
+  [(set_attr "length"  "2,2,3,4,5,6,5,2,3")
+   (set_attr "timings" "11,11,11,11,11,11,33,11,11")]
+)
+
+(define_insn "negsi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (neg:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  neg\t%0
+  neg\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+;; Note that the O and C flags are not set as per a normal compare,
+;; and thus are unusable in that context.
+(define_insn "*negsi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+        (neg:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (set (reg CC_REG)
+	(compare (neg:SI (match_dup 1))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  neg\t%0
+  neg\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+	(not:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  not\t%0
+  not\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "*one_cmplsi2_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r")
+	(not:SI (match_operand:SI 1 "register_operand"  "0,r")))
+   (set (reg CC_REG)
+	(compare (not:SI (match_dup 1))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  not\t%0
+  not\t%1, %0"
+  [(set_attr "length" "2,3")]
+)
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
+	        (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%Q2, %0
+  or\t%1, %0
+  or\t%2, %1, %0
+  or\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,2,3,5")]
+)
+
+(define_insn "*iorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
+	        (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
+   (set (reg CC_REG)
+	(compare (ior:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%2, %0
+  or\t%Q2, %0
+  or\t%1, %0
+  or\t%2, %1, %0
+  or\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
+   (set_attr "length"  "2,2,3,4,5,6,2,3,5")]
+)
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand"  "0")
+		   (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "rotl\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*rotlsi3_flags"
+  [(set (match_operand:SI            0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand"  "0")
+		   (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (set (reg CC_REG)
+	(compare (rotate:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "rotl\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r")
+	(rotatert:SI (match_operand:SI 1 "register_operand"  "0")
+		     (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "rotr\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*rotrsi3_flags"
+  [(set (match_operand:SI              0 "register_operand" "=r")
+	(rotatert:SI (match_operand:SI 1 "register_operand"  "0")
+		     (match_operand:SI 2 "rx_shift_operand" "rn")))
+   (set (reg CC_REG)
+	(compare (rotatert:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "rotr\t%2, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  shar\t%2, %0
+  shar\t%2, %0
+  shar\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "*ashrsi3_flags"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (set (reg CC_REG)
+	(compare (ashiftrt:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  shar\t%2, %0
+  shar\t%2, %0
+  shar\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  shlr\t%2, %0
+  shlr\t%2, %0
+  shlr\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "*lshrsi3_flags"
+  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (set (reg CC_REG)
+	(compare (lshiftrt:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  shlr\t%2, %0
+  shlr\t%2, %0
+  shlr\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI            0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+	           (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  shll\t%2, %0
+  shll\t%2, %0
+  shll\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+(define_insn "*ashlsi3_flags"
+  [(set (match_operand:SI            0 "register_operand" "=r,r,r")
+	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+	           (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
+   (set (reg CC_REG)
+	(compare (ashift:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "@
+  shll\t%2, %0
+  shll\t%2, %0
+  shll\t%2, %1, %0"
+  [(set_attr "length" "3,2,3")]
+)
+
+;; Saturate to 32-bits
+(define_insn_and_split "ssaddsi3"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(ss_plus:SI (match_operand:SI 1 "register_operand"  "r")
+		    (match_operand:SI 2 "rx_source_operand" "riQ")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (match_dup 1) (match_dup 2)))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC
+		     (plus:SI (match_dup 1) (match_dup 2))
+		     (const_int 0)))])
+   (set (match_dup 0)
+	(unspec:SI [(match_dup 0) (reg:CC CC_REG)] 
+		   UNSPEC_BUILTIN_SAT))]
+   ""
+)
+
+(define_insn "*sat"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"  "0")
+		    (reg:CC CC_REG)]
+		   UNSPEC_BUILTIN_SAT))]
+  "reload_completed"
+  "sat\t%0"
+  [(set_attr "length" "2")]
+)
+
+(define_insn "subsi3"
+  [(set (match_operand:SI           0 "register_operand" "=r,r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
+		  (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "@
+  sub\t%2, %0
+  sub\t%2, %0
+  add\t%N2, %0
+  sub\t%2, %1, %0
+  sub\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,33")
+   (set_attr "length" "2,2,6,3,5")]
+)
+
+;; Note that the O flag is set as if (compare op1 op2) not for
+;; what is described here, (compare op0 0).
+(define_insn "*subsi3_flags"
+  [(set (match_operand:SI           0 "register_operand" "=r,r,r,r,r")
+	(minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
+		  (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q")))
+   (set (reg CC_REG)
+	(compare (minus:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
+  "@
+  sub\t%2, %0
+  sub\t%2, %0
+  add\t%N2, %0
+  sub\t%2, %1, %0
+  sub\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,33")
+   (set_attr "length" "2,2,6,3,5")]
+)
+
+;; A helper to expand the above with the CC_MODE filled in.
+(define_expand "subsi3_flags"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+		   (minus:SI (match_operand:SI 1 "register_operand")
+			     (match_operand:SI 2 "rx_source_operand")))
+	      (set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC (minus:SI (match_dup 1) (match_dup 2))
+				   (const_int 0)))])]
+)
+
+(define_insn "sbb_internal"
+  [(set (match_operand:SI     0 "register_operand"   "=r,r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI 1 "register_operand"   " 0,0")
+	    (match_operand:SI 2 "rx_compare_operand" " r,Q"))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))
+    (clobber (reg:CC CC_REG))]
+  "reload_completed"
+  "sbb\t%2, %0"
+  [(set_attr "timings" "11,33")
+   (set_attr "length"  "3,6")]
+)
+
+(define_insn "*sbb_flags"
+  [(set (match_operand:SI     0 "register_operand"   "=r,r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI 1 "register_operand"   " 0,0")
+	    (match_operand:SI 2 "rx_compare_operand" " r,Q"))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))
+   (set (reg CC_REG)
+	(compare
+	  (minus:SI
+	    (minus:SI (match_dup 1) (match_dup 2))
+	    (geu:SI (reg:CC CC_REG) (const_int 0)))
+	  (const_int 0)))]
+  "reload_completed"
+  "sbb\t%2, %0"
+  [(set_attr "timings" "11,33")
+   (set_attr "length"  "3,6")]
+)
+
+(define_expand "subdi3"
+  [(set (match_operand:DI           0 "register_operand")
+	(minus:DI (match_operand:DI 1 "register_operand")
+		  (match_operand:DI 2 "rx_compare_operand")))]
+  ""
+{
+  rtx op0l, op0h, op1l, op1h, op2l, op2h;
+
+  op0l = gen_lowpart (SImode, operands[0]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  op0h = gen_highpart (SImode, operands[0]);
+  op1h = gen_highpart (SImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+
+  emit_insn (gen_subdi3_internal (op0l, op0h, op1l, op2l, op1h, op2h));
+  DONE;
+})
+
+(define_insn_and_split "subdi3_internal"
+  [(set (match_operand:SI          0 "register_operand"   "=&r,&r")
+	(minus:SI (match_operand:SI 2 "register_operand"  "  0, r")
+		  (match_operand:SI 3 "rx_compare_operand" "rQ, r")))
+   (set (match_operand:SI          1 "register_operand"   "= r, r")
+	(minus:SI
+	  (minus:SI
+	    (match_operand:SI      4 "register_operand"   "  1, 1")
+	    (match_operand:SI      5 "rx_compare_operand" " rQ,rQ"))
+	  (geu:SI (match_dup 2) (match_dup 3))))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  emit_insn (gen_subsi3_flags (operands[0], operands[2], operands[3]));
+  emit_insn (gen_sbb_internal (operands[1], operands[4], operands[5]));
+  DONE;
+})
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+	        (match_operand:SI 2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "xor\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+(define_insn "*xorsi3_flags"
+  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+	        (match_operand:SI 2 "rx_source_operand"
+				  "r,Sint08,Sint16,Sint24,i,Q")))
+   (set (reg CC_REG)
+	(compare (xor:SI (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
+  "xor\t%Q2, %0"
+  [(set_attr "timings" "11,11,11,11,11,33")
+   (set_attr "length" "3,4,5,6,7,6")]
+)
+
+;; A set of peepholes to catch extending loads followed by arithmetic operations.
+;; We use iterators where possible to reduce the amount of typing and hence the
+;; possibilities for typos.
+
+(define_code_iterator extend_types [(zero_extend "") (sign_extend "")])
+(define_code_attr     letter       [(zero_extend "R") (sign_extend "Q")])
+
+(define_code_iterator memex_commutative [(plus "") (and "") (ior "") (xor "")])
+(define_code_iterator memex_noncomm     [(div "") (udiv "") (minus "")])
+(define_code_iterator memex_nocc        [(smax "") (smin "") (mult "")])
+
+(define_code_attr     op                [(plus "add") (and "and") (div "div") (udiv "divu") (smax "max") (smin "min") (mult "mul") (ior "or") (minus "sub") (xor "xor")])
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+		   (memex_commutative:SI (match_dup 0)
+					 (match_dup 2)))
+	      (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(parallel [(set:SI (match_dup 2)
+		      (memex_commutative:SI (match_dup 2)
+					    (extend_types:SI (match_dup 1))))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+		   (memex_commutative:SI (match_dup 2)
+					 (match_dup 0)))
+	      (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(parallel [(set:SI (match_dup 2)
+		      (memex_commutative:SI (match_dup 2)
+					    (extend_types:SI (match_dup 1))))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (parallel [(set (match_operand:SI                    2 "register_operand")
+		   (memex_noncomm:SI (match_dup 2)
+				     (match_dup 0)))
+	      (clobber (reg:CC CC_REG))])]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(parallel [(set:SI (match_dup 2)
+		      (memex_noncomm:SI (match_dup 2)
+					(extend_types:SI (match_dup 1))))
+	      (clobber (reg:CC CC_REG))])]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (set (match_operand:SI                               2 "register_operand")
+	(memex_nocc:SI (match_dup 0)
+		       (match_dup 2)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(set:SI (match_dup 2)
+	   (memex_nocc:SI (match_dup 2)
+			  (extend_types:SI (match_dup 1))))]
+)
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (set (match_operand:SI                               2 "register_operand")
+	(memex_nocc:SI (match_dup 2)
+		       (match_dup 0)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(set:SI (match_dup 2)
+	   (memex_nocc:SI (match_dup 2)
+			  (extend_types:SI (match_dup 1))))]
+)
+
+(define_insn "*<memex_commutative:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                                     0 "register_operand" "=r")
+	(memex_commutative:SI (match_operand:SI                               1 "register_operand" "%0")
+ 		              (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))
+   (clobber (reg:CC CC_REG))]
+  "(optimize < 3 || optimize_size)"
+  "<memex_commutative:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined separate patterns 
+)                            ;; rather than using iterators we could specify exact sizes.
+
+(define_insn "*<memex_noncomm:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                                 0 "register_operand" "=r")
+	(memex_noncomm:SI (match_operand:SI                               1 "register_operand" "0")
+                          (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))
+   (clobber (reg:CC CC_REG))]
+  "(optimize < 3 || optimize_size)"
+  "<memex_noncomm:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined separate patterns 
+)                            ;; rather than using iterators we could specify exact sizes.
+
+(define_insn "*<memex_nocc:code>si3_<extend_types:code><small_int_modes:mode>"
+  [(set (match_operand:SI                                              0 "register_operand" "=r")
+	(memex_nocc:SI (match_operand:SI                               1 "register_operand" "%0")
+		       (extend_types:SI (match_operand:small_int_modes 2 "rx_restricted_mem_operand" "Q"))))]
+  "(optimize < 3 || optimize_size)"
+  "<memex_nocc:op>\t%<extend_types:letter>2, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined separate patterns 
+)                            ;; rather than using iterators we could specify exact sizes.
+
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand")))
+   (set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI                   2 "register_operand")
+		    (match_dup 0)))]
+  "peep2_regno_dead_p (2, REGNO (operands[0])) && (optimize < 3 || optimize_size)"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_dup 2)
+		    (extend_types:SI (match_dup 1))))]
+)
+
+;; Convert:
+;;   (set (reg1) (sign_extend (mem))
+;;   (set (reg2) (zero_extend (reg1))
+;; into
+;;   (set (reg2) (zero_extend (mem)))
+(define_peephole2
+  [(set (match_operand:SI                              0 "register_operand")
+	(sign_extend:SI (match_operand:small_int_modes 1 "memory_operand")))
+   (set (match_operand:SI                              2 "register_operand")
+	(zero_extend:SI (match_operand:small_int_modes 3 "register_operand")))]
+  "REGNO (operands[0]) == REGNO (operands[3])
+   && (REGNO (operands[0]) == REGNO (operands[2])
+       || peep2_regno_dead_p (2, REGNO (operands[0])))"
+  [(set (match_dup 2)
+	(zero_extend:SI (match_dup 1)))]
+)
+
+;; Remove the redundant sign extension from:
+;;   (set (reg) (extend (mem)))
+;;   (set (reg) (extend (reg)))
+(define_peephole2
+  [(set (match_operand:SI                               0 "register_operand")
+	(extend_types:SI (match_operand:small_int_modes 1 "memory_operand")))
+   (set (match_dup 0)
+	(extend_types:SI (match_operand:small_int_modes 2 "register_operand")))]
+  "REGNO (operands[0]) == REGNO (operands[2])"
+  [(set (match_dup 0) (extend_types:SI (match_dup 1)))]
+)
+
+(define_insn "*comparesi3_<extend_types:code><small_int_modes:mode>"
+  [(set (reg:CC CC_REG)
+	(compare:CC (match_operand:SI                               0 "register_operand" "=r")
+		    (extend_types:SI (match_operand:small_int_modes 1 "rx_restricted_mem_operand" "Q"))))]
+  "(optimize < 3 || optimize_size)"
+  "cmp\t%<extend_types:letter>1, %0"
+  [(set_attr "timings" "33")
+   (set_attr "length"  "5")] ;; Worst case sceanario.  FIXME: If we defined separate patterns 
+)                            ;; rather than using iterators we could specify exact sizes.
+
+;; Floating Point Instructions
+
+(define_insn "addsf3"
+  [(set (match_operand:SF          0 "register_operand"  "=r,r,r")
+	(plus:SF (match_operand:SF 1 "register_operand"  "%0,0,0")
+		 (match_operand:SF 2 "rx_source_operand"  "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fadd\t%2, %0"
+  [(set_attr "timings" "44,44,66")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "divsf3"
+  [(set (match_operand:SF         0 "register_operand" "=r,r,r")
+	(div:SF (match_operand:SF 1 "register_operand"  "0,0,0")
+		(match_operand:SF 2 "rx_source_operand" "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fdiv\t%2, %0"
+  [(set_attr "timings" "1616,1616,1818")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF          0 "register_operand" "=r,r,r")
+	(mult:SF (match_operand:SF 1 "register_operand" "%0,0,0")
+		(match_operand:SF  2 "rx_source_operand" "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fmul\t%2, %0"
+  [(set_attr "timings" "33,33,55")
+   (set_attr "length"  "3,7,5")]
+)
+
+(define_insn "subsf3"
+  [(set (match_operand:SF           0 "register_operand" "=r,r,r")
+	(minus:SF (match_operand:SF 1 "register_operand"  "0,0,0")
+		  (match_operand:SF 2 "rx_source_operand" "r,F,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "fsub\t%Q2, %0"
+  [(set_attr "timings" "44,44,66")
+   (set_attr "length" "3,7,5")]
+)
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI         0 "register_operand"  "=r,r")
+	(fix:SI (match_operand:SF 1 "rx_compare_operand" "r,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "ftoi\t%Q1, %0"
+  [(set_attr "timings" "22,44")
+   (set_attr "length" "3,5")]
+)
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF           0 "register_operand"  "=r,r")
+	(float:SF (match_operand:SI 1 "rx_compare_operand" "r,Q")))
+   (clobber (reg:CC CC_REG))]
+  "ALLOW_RX_FPU_INSNS"
+  "itof\t%Q1, %0"
+  [(set_attr "timings" "22,44")
+   (set_attr "length" "3,6")]
+)
+
+;; Bit manipulation instructions.
+
+;; ??? The *_in_memory patterns will not be matched without further help.
+;; At one time we had the insv expander generate them, but I suspect that
+;; in general we get better performance by exposing the register load to
+;; the optimizers.
+;;
+;; An alternate solution would be to re-organize these patterns such
+;; that allow both register and memory operands.  This would allow the
+;; register allocator to spill and not load the register operand.  This
+;; would be possible only for operations for which we have a constant
+;; bit offset, so that we can adjust the address by ofs/8 and replace
+;; the offset in the insn by ofs%8.
+
+(define_insn "*bitset"
+  [(set (match_operand:SI                    0 "register_operand" "=r")
+	(ior:SI (ashift:SI (const_int 1)
+			   (match_operand:SI 1 "rx_shift_operand" "ri"))
+		(match_operand:SI            2 "register_operand" "0")))]
+  ""
+  "bset\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*bitset_in_memory"
+  [(set (match_operand:QI                    0 "rx_restricted_mem_operand" "+Q")
+	(ior:QI (ashift:QI (const_int 1)
+			   (match_operand:QI 1 "nonmemory_operand" "ri"))
+		(match_dup 0)))]
+  ""
+  "bset\t%1, %0.B"
+  [(set_attr "length" "3")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "*bitinvert"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(xor:SI (ashift:SI (const_int 1)
+			   (match_operand:SI 1 "rx_shift_operand" "ri"))
+		(match_operand:SI 2 "register_operand" "0")))]
+  ""
+  "bnot\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*bitinvert_in_memory"
+  [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
+	(xor:QI (ashift:QI (const_int 1)
+			   (match_operand:QI 1 "nonmemory_operand" "ri"))
+		(match_dup 0)))]
+  ""
+  "bnot\t%1, %0.B"
+  [(set_attr "length" "5")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "*bitclr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (not:SI
+		  (ashift:SI
+		    (const_int 1)
+		    (match_operand:SI 1 "rx_shift_operand" "ri")))
+		(match_operand:SI 2 "register_operand" "0")))]
+  ""
+  "bclr\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+(define_insn "*bitclr_in_memory"
+  [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
+	(and:QI (not:QI
+		  (ashift:QI
+		    (const_int 1)
+		    (match_operand:QI 1 "nonmemory_operand" "ri")))
+		(match_dup 0)))]
+  ""
+  "bclr\t%1, %0.B"
+  [(set_attr "length" "3")
+   (set_attr "timings" "33")]
+)
+
+(define_insn "*insv_imm"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "rx_shift_operand" "ri"))
+	(match_operand:SI 2 "const_int_operand" ""))]
+  ""
+{
+  if (INTVAL (operands[2]) & 1)
+    return "bset\t%1, %0";
+  else
+    return "bclr\t%1, %0";
+}
+  [(set_attr "length" "3")]
+)
+
+(define_insn_and_split "rx_insv_reg"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operand:SI 2 "register_operand" "r"))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1))
+	(match_dup 3))]
+{
+  rtx flags, x;
+
+  /* Emit tst #1, op2.  */
+  flags = gen_rtx_REG (CC_ZSmode, CC_REG);
+  x = gen_rtx_AND (SImode, operands[2], const1_rtx);
+  x = gen_rtx_COMPARE (CC_ZSmode, x, const0_rtx);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  /* Emit bmne.  */
+  operands[3] = gen_rtx_NE (SImode, flags, const0_rtx);
+})
+
+(define_insn_and_split "*insv_cond"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operator:SI 4 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "r")
+	   (match_operand:SI 3 "rx_source_operand" "riQ")]))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1))
+	(match_dup 4))]
+{
+  rtx flags, x;
+
+  flags = gen_rtx_REG (CCmode, CC_REG);
+  x = gen_rtx_COMPARE (CCmode, operands[2], operands[3]);
+  x = gen_rtx_SET (VOIDmode, flags, x);
+  emit_insn (x);
+
+  operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[4]), SImode,
+			        flags, const0_rtx);
+})
+
+(define_insn "*bmcc"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operator:SI 2 "comparison_operator"
+	  [(reg CC_REG) (const_int 0)]))]
+  "reload_completed"
+  "bm%B2\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Work around the fact that X=Y<0 is preferentially expanded as a shift.
+(define_insn_and_split "*insv_cond_lt"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand" "+r")
+	  (const_int 1)
+	  (match_operand:SI 1 "const_int_operand" ""))
+	(match_operator:SI 3 "rshift_operator"
+	  [(match_operand:SI 2 "register_operand" "r")
+	   (const_int 31)]))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  ""
+  [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 1) (match_dup 1))
+		   (lt:SI (match_dup 2) (const_int 0)))
+	      (clobber (reg:CC CC_REG))])]
+  ""
+)
+
+(define_expand "insv"
+  [(set (zero_extract:SI
+	  (match_operand:SI 0 "register_operand")	;; Destination
+	  (match_operand:SI 1 "const_int_operand")	;; # of bits to set
+	  (match_operand:SI 2 "nonmemory_operand"))	;; Starting bit
+	(match_operand:SI   3 "nonmemory_operand"))]	;; Bits to insert
+  ""
+{
+  /* We only handle single-bit inserts.  */
+  if (!CONST_INT_P (operands[1]) || INTVAL (operands[1]) != 1)
+    FAIL;
+
+  /* Either the bit to insert or the position must be constant.  */
+  if (CONST_INT_P (operands[3]))
+    operands[3] = GEN_INT (INTVAL (operands[3]) & 1);
+  else if (CONST_INT_P (operands[2]))
+    {
+      emit_insn (gen_rx_insv_reg (operands[0], operands[2], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+;; Atomic exchange operation.
+
+(define_insn "sync_lock_test_and_setsi"
+  [(set (match_operand:SI 0 "register_operand"   "=r,r")
+	(match_operand:SI 1 "rx_compare_operand" "=r,Q"))
+   (set (match_dup 1)
+	(match_operand:SI 2 "register_operand"    "0,0"))]
+  ""
+  "xchg\t%1, %0"
+  [(set_attr "length" "3,6")
+   (set_attr "timings" "22")]
+)
+
+;; Block move functions.
+
+(define_expand "movstr"
+  [(set (match_operand:BLK 1 "memory_operand")    ;; Dest
+	(match_operand:BLK 2 "memory_operand"))   ;; Source
+   (use (match_operand:SI  0 "register_operand")) ;; Updated Dest
+  ]
+  ""
+  {
+    rtx addr1 = gen_rtx_REG (SImode, 1);
+    rtx addr2 = gen_rtx_REG (SImode, 2);
+    rtx len   = gen_rtx_REG (SImode, 3);
+    rtx dest_copy = gen_reg_rtx (SImode);
+
+    emit_move_insn (len, GEN_INT (-1));
+    emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+    operands[1] = replace_equiv_address_nv (operands[1], addr1);
+    operands[2] = replace_equiv_address_nv (operands[2], addr2);
+    emit_move_insn (dest_copy, addr1);
+    emit_insn (gen_rx_movstr ());
+    emit_move_insn (len, GEN_INT (-1));
+    emit_insn (gen_rx_strend (operands[0], dest_copy));
+    DONE;
+  }
+)
+
+(define_insn "rx_movstr"
+  [(set (mem:BLK (reg:SI 1))
+	(mem:BLK (reg:SI 2)))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVSTR)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))]
+  ""
+  "smovu"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_insn "rx_strend"
+  [(set (match_operand:SI                      0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand"  "r")
+				(reg:SI 3)] UNSPEC_STRLEN))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:CC CC_REG))
+   ]
+  ""
+  "mov\t%1, r1\n\tmov\t#0, r2\n\tsuntil.b\n\tmov\tr1, %0\n\tsub\t#1, %0"
+  [(set_attr "length" "10")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "movmemsi"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand")    ;; Dest
+	  (match_operand:BLK 1 "memory_operand"))   ;; Source
+     (use (match_operand:SI  2 "register_operand")) ;; Length in bytes
+     (match_operand          3 "immediate_operand") ;; Align
+     (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)]
+    )]
+  ""
+  {
+    rtx addr1 = gen_rtx_REG (SImode, 1);
+    rtx addr2 = gen_rtx_REG (SImode, 2);
+    rtx len   = gen_rtx_REG (SImode, 3);
+
+    /* Do not use when the source or destination are volatile - the SMOVF
+       instruction will read and write in word sized blocks, which may be
+       outside of the valid address range.  */
+    if (MEM_P (operands[0]) && MEM_VOLATILE_P (operands[0]))
+      FAIL;
+    if (MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))
+      FAIL;
+
+    if (REG_P (operands[0]) && (REGNO (operands[0]) == 2
+				      || REGNO (operands[0]) == 3))
+      FAIL;
+    if (REG_P (operands[1]) && (REGNO (operands[1]) == 1
+				      || REGNO (operands[1]) == 3))
+      FAIL;
+    if (REG_P (operands[2]) && (REGNO (operands[2]) == 1
+				      || REGNO (operands[2]) == 2))
+      FAIL;
+
+    emit_move_insn (addr1, force_operand (XEXP (operands[0], 0), NULL_RTX));
+    emit_move_insn (addr2, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[2], NULL_RTX));
+    operands[0] = replace_equiv_address_nv (operands[0], addr1);
+    operands[1] = replace_equiv_address_nv (operands[1], addr2);
+    emit_insn (gen_rx_movmem ());
+    DONE;
+  }
+)
+
+(define_insn "rx_movmem"
+  [(set (mem:BLK (reg:SI 1))
+	(mem:BLK (reg:SI 2)))
+   (use (reg:SI 3))
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))]
+  ""
+  "smovf"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "setmemsi"
+  [(set (match_operand:BLK 0 "memory_operand")     ;; Dest
+        (match_operand:QI  2 "nonmemory_operand")) ;; Value
+   (use (match_operand:SI  1 "nonmemory_operand")) ;; Length
+   (match_operand          3 "immediate_operand")  ;; Align
+   (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_SETMEM)]
+  ""
+  {
+    rtx addr = gen_rtx_REG (SImode, 1);
+    rtx val  = gen_rtx_REG (QImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+
+    emit_move_insn (addr, force_operand (XEXP (operands[0], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[1], NULL_RTX));
+    emit_move_insn (val, operands[2]);
+    emit_insn (gen_rx_setmem ());
+    DONE;
+  }
+)
+
+(define_insn "rx_setmem"
+  [(set (mem:BLK (reg:SI 1))
+	(unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_SETMEM))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 3))]
+  ""
+  "sstr.b"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI                       0 "register_operand")   ;; Result
+	(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand")     ;; String1
+			     (match_operand:BLK 2 "memory_operand")]    ;; String2
+			    UNSPEC_CMPSTRN))
+   (use (match_operand:SI                       3 "register_operand"))  ;; Max Length
+   (match_operand:SI                            4 "immediate_operand")] ;; Known Align
+  ""
+  {
+    rtx str1 = gen_rtx_REG (SImode, 1);
+    rtx str2 = gen_rtx_REG (SImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+  
+    emit_move_insn (str1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+    emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+    emit_move_insn (len, force_operand (operands[3], NULL_RTX));
+
+    emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_expand "cmpstrsi"
+  [(set (match_operand:SI                       0 "register_operand")   ;; Result
+	(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand")     ;; String1
+			     (match_operand:BLK 2 "memory_operand")]    ;; String2
+			    UNSPEC_CMPSTRN))
+   (match_operand:SI                            3 "immediate_operand")] ;; Known Align
+  ""
+  {
+    rtx str1 = gen_rtx_REG (SImode, 1);
+    rtx str2 = gen_rtx_REG (SImode, 2);
+    rtx len  = gen_rtx_REG (SImode, 3);
+  
+    emit_move_insn (str1, force_reg (SImode, XEXP (operands[1], 0)));
+    emit_move_insn (str2, force_reg (SImode, XEXP (operands[2], 0)));
+    emit_move_insn (len, GEN_INT (-1));
+
+    emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
+(define_insn "rx_cmpstrn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(reg:SI 1) (reg:SI 2) (reg:SI 3)]
+			    UNSPEC_CMPSTRN))
+   (use (match_operand:BLK 1 "memory_operand" "m"))
+   (use (match_operand:BLK 2 "memory_operand" "m"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "scmpu		; Perform the string comparison
+   mov     #-1, %0      ; Set up -1 result (which cannot be created
+                        ; by the SC insn)
+   bnc	   ?+		; If Carry is not set skip over
+   scne.L  %0		; Set result based on Z flag
+?:              	
+"
+  [(set_attr "length" "9")
+   (set_attr "timings" "1111")] ;; The timing is a guesstimate.
+)
+
+;;   Builtin Functions
+;;
+;; GCC does not have the ability to generate the following instructions
+;; on its own so they are provided as builtins instead.  To use them from
+;; a program for example invoke them as __builtin_rx_<insn_name>.  For
+;; example:
+;;
+;;    int short_byte_swap (int arg) { return __builtin_rx_revw (arg); }
+
+;;---------- Accumulator Support ------------------------
+
+;; Multiply & Accumulate (high)
+(define_insn "machi"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MACHI)]
+  ""
+  "machi\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply & Accumulate (low)
+(define_insn "maclo"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MACLO)]
+  ""
+  "maclo\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply (high)
+(define_insn "mulhi"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MULHI)]
+  ""
+  "mulhi\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Multiply (low)
+(define_insn "mullo"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")]
+	      UNSPEC_BUILTIN_MULLO)]
+  ""
+  "mullo\t%0, %1"
+  [(set_attr "length" "3")]
+)
+
+;; Move from Accumulator (high)
+(define_insn "mvfachi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)]
+		   UNSPEC_BUILTIN_MVFACHI))]
+  ""
+  "mvfachi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move from Accumulator (middle)
+(define_insn "mvfacmi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(const_int 0)]
+		   UNSPEC_BUILTIN_MVFACMI))]
+  ""
+  "mvfacmi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to Accumulator (high)
+(define_insn "mvtachi"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACHI)]
+  ""
+  "mvtachi\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to Accumulator (low)
+(define_insn "mvtaclo"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+		       UNSPEC_BUILTIN_MVTACLO)]
+  ""
+  "mvtaclo\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Round Accumulator
+(define_insn "racw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_RACW)]
+  ""
+  "racw\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Repeat multiply and accumulate
+(define_insn "rmpa"
+  [(unspec:SI [(const_int 0) (reg:SI 1) (reg:SI 2) (reg:SI 3)
+	       (reg:SI 4) (reg:SI 5) (reg:SI 6)]
+	      UNSPEC_BUILTIN_RMPA)
+  (clobber (reg:SI 1))
+  (clobber (reg:SI 2))
+  (clobber (reg:SI 3))]
+  ""
+  "rmpa"
+  [(set_attr "length" "2")
+   (set_attr "timings" "1010")]
+)
+
+;;---------- Arithmetic ------------------------
+
+;; Byte swap (two 16-bit values).
+(define_insn "revw"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand"  "r")]
+		   UNSPEC_BUILTIN_REVW))]
+  ""
+  "revw\t%1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Round to integer.
+(define_insn "lrintsf2"
+  [(set (match_operand:SI             0 "register_operand"  "=r,r")
+	(unspec:SI [(match_operand:SF 1 "rx_compare_operand" "r,Q")]
+		   UNSPEC_BUILTIN_ROUND))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "round\t%1, %0"
+  [(set_attr "timings" "22,44")   
+   (set_attr "length" "3,5")]
+)
+
+;;---------- Control Registers ------------------------
+
+;; Clear Processor Status Word
+(define_insn "clrpsw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+	      UNSPEC_BUILTIN_CLRPSW)
+   (clobber (reg:CC CC_REG))]
+  ""
+  "clrpsw\t%F0"
+  [(set_attr "length" "2")]
+)
+
+;; Set Processor Status Word
+(define_insn "setpsw"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+	      UNSPEC_BUILTIN_SETPSW)
+   (clobber (reg:CC CC_REG))]
+  ""
+  "setpsw\t%F0"
+  [(set_attr "length" "2")]
+)
+
+;; Move from control register
+(define_insn "mvfc"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")]
+		   UNSPEC_BUILTIN_MVFC))]
+  ""
+  "mvfc\t%C1, %0"
+  [(set_attr "length" "3")]
+)
+
+;; Move to control register
+(define_insn "mvtc"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i,i")
+	       (match_operand:SI 1 "nonmemory_operand" "r,i")]
+	      UNSPEC_BUILTIN_MVTC)]
+  ""
+  "mvtc\t%1, %C0"
+  [(set_attr "length" "3,7")]
+  ;; Ignore possible clobbering of the comparison flags in the
+  ;; PSW register.  This is a cc0 target so any cc0 setting
+  ;; instruction will always be paired with a cc0 user, without
+  ;; the possibility of this instruction being placed in between
+  ;; them.
+)
+
+;; Move to interrupt priority level
+(define_insn "mvtipl"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "Uint04")]
+	      UNSPEC_BUILTIN_MVTIPL)]
+  ""
+  "mvtipl\t%0"
+  [(set_attr "length" "3")]
+)
+
+;;---------- Interrupts ------------------------
+
+;; Break
+(define_insn "brk"
+  [(unspec_volatile [(const_int 0)]
+		    UNSPEC_BUILTIN_BRK)]
+  ""
+  "brk"
+  [(set_attr "length" "1")
+   (set_attr "timings" "66")]
+)
+
+;; Interrupt
+(define_insn "int"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")]
+		       UNSPEC_BUILTIN_INT)]
+  ""
+  "int\t%0"
+  [(set_attr "length" "3")]
+)
+
+;; Wait
+(define_insn "wait"
+  [(unspec_volatile [(const_int 0)]
+		    UNSPEC_BUILTIN_WAIT)]
+  ""
+  "wait"
+  [(set_attr "length" "2")]
+)
+
+;;---------- CoProcessor Support ------------------------
+
+;; FIXME: The instructions are currently commented out because
+;; the bit patterns have not been finalized, so the assembler
+;; does not support them.  Once they are decided and the assembler
+;; supports them, enable the instructions here.
+
+;; Move from co-processor register
+(define_insn "mvfcp"
+  [(set (match_operand:SI             0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "immediate_operand" "i")
+		    (match_operand:SI 2 "immediate_operand" "i")]
+		   UNSPEC_BUILTIN_MVFCP))]
+  ""
+  "; mvfcp\t%1, %0, %2"
+  [(set_attr "length" "5")]
+)
+
+;;---------- Misc ------------------------
+
+;; Required by cfglayout.c...
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "1")]
+)
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
new file mode 100644
index 000000000..35143dd0f
--- /dev/null
+++ b/gcc/config/rx/rx.opt
@@ -0,0 +1,99 @@
+; Command line options for the Renesas RX port of GCC.
+; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+; Contributed by Red Hat.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+;---------------------------------------------------
+
+; The default is -fpu -m32bit-doubles.
+
+m64bit-doubles
+Target RejectNegative Mask(64BIT_DOUBLES) Report
+Store doubles in 64 bits.
+
+m32bit-doubles
+Target RejectNegative InverseMask(64BIT_DOUBLES) Report
+Stores doubles in 32 bits.  This is the default.
+
+nofpu
+Target RejectNegative Alias(mnofpu)
+Disable the use of RX FPU instructions.  
+
+mnofpu
+Target RejectNegative Mask(NO_USE_FPU) Report Undocumented
+
+fpu
+Target RejectNegative InverseMask(NO_USE_FPU) Report
+Enable the use of RX FPU instructions.  This is the default.
+
+;---------------------------------------------------
+
+mcpu=
+Target RejectNegative Joined Var(rx_cpu_name) Report
+Specify the target RX cpu type.
+
+;---------------------------------------------------
+
+mbig-endian-data
+Target RejectNegative Mask(BIG_ENDIAN_DATA) Report
+Data is stored in big-endian format.
+
+mlittle-endian-data
+Target RejectNegative InverseMask(BIG_ENDIAN_DATA) Report
+Data is stored in little-endian format.  (Default).
+
+;---------------------------------------------------
+
+msmall-data-limit=
+Target RejectNegative Joined UInteger Var(rx_small_data_limit) Init(0)
+Maximum size of global and static variables which can be placed into the small data area.
+
+;---------------------------------------------------
+
+msim
+Target
+Use the simulator runtime.
+
+;---------------------------------------------------
+
+mas100-syntax
+Target Mask(AS100_SYNTAX) Report
+Generate assembler output that is compatible with the Renesas AS100 assembler.  This may restrict some of the compiler's capabilities.  The default is to generate GAS compatable syntax.
+
+;---------------------------------------------------
+
+mrelax
+Target
+Enable linker relaxation.
+
+;---------------------------------------------------
+
+mmax-constant-size=
+Target RejectNegative Joined UInteger Var(rx_max_constant_size) Init(0)
+Maximum size in bytes of constant values allowed as operands.
+
+;---------------------------------------------------
+
+mint-register=
+Target RejectNegative Joined UInteger Var(rx_interrupt_registers) Init(0)
+Specifies the number of registers to reserve for interrupt handlers.
+
+;---------------------------------------------------
+
+msave-acc-in-interrupts
+Target Mask(SAVE_ACC_REGISTER)
+Specifies whether interrupt functions should save and restore the accumulator register.
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
new file mode 100644
index 000000000..7990bcfaa
--- /dev/null
+++ b/gcc/config/rx/t-rx
@@ -0,0 +1,34 @@
+# Makefile fragment for building GCC for the Renesas RX target.
+# Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+# Contributed by Red Hat.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3, or (at your
+# option) any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   See
+# the GNU General Public License for more details.
+#
+# You should have received a copy of the  GNU General Public
+# License along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Enable multilibs:
+
+MULTILIB_OPTIONS    = m64bit-doubles  nofpu        mbig-endian-data 
+MULTILIB_DIRNAMES   =  64-bit-double  no-fpu-libs   big-endian-data 
+
+MULTILIB_MATCHES    = nofpu=mnofpu  nofpu=mcpu?rx200  nofpu=mcpu?RX200
+
+MULTILIB_EXCEPTIONS =
+MULTILIB_EXTRA_OPTS = 
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
diff --git a/gcc/config/s390/2064.md b/gcc/config/s390/2064.md
new file mode 100644
index 000000000..143978334
--- /dev/null
+++ b/gcc/config/s390/2064.md
@@ -0,0 +1,135 @@
+;; Scheduling description for z900 (cpu 2064).
+;;   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                  Ulrich Weigand (uweigand@de.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; References:
+;;   The microarchitecture of the IBM eServer z900 processor.
+;;   E.M. Schwarz et al.
+;;   IBM Journal of Research and Development Vol. 46 No 4/5, 2002.
+;;
+;;            z900 (cpu 2064) pipeline
+;;
+;;                 dec
+;;              --> | <---
+;;  LA bypass  |  agen    |
+;;             |    |     |
+;;              --- c1    |  Load bypass
+;;                  |     |
+;;                  c2----
+;;                  |
+;;                  e1
+;;                  |
+;;                  wr
+
+;; This scheduler description is also used for the g5 and g6.
+
+(define_automaton "z_ipu")
+(define_cpu_unit "z_e1"   "z_ipu")
+(define_cpu_unit "z_wr"   "z_ipu")
+
+
+(define_insn_reservation "z_la" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "la"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_larl" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "larl"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_load" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "load"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_store" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "store"))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_sem" 2
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "sem"))
+  "z_e1*2,z_wr")
+
+(define_insn_reservation "z_call" 5
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (eq_attr "type" "jsr"))
+  "z_e1*5,z_wr")
+
+(define_insn_reservation "z_mul" 5
+  (and (eq_attr "cpu" "g5,g6,z900")
+       (eq_attr "type" "imulsi,imulhi"))
+  "z_e1*5,z_wr")
+
+(define_insn_reservation "z_inf" 10
+  (and (eq_attr "cpu" "g5,g6,z900")
+       (eq_attr "type" "idiv,imuldi"))
+  "z_e1*10,z_wr")
+
+;; For everything else we check the atype flag.
+
+(define_insn_reservation "z_int" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (and (not (eq_attr "type" "la,larl,load,store,jsr"))
+            (eq_attr "atype" "reg")))
+  "z_e1,z_wr")
+
+(define_insn_reservation "z_agen" 1
+  (and (eq_attr "cpu" "z900,g5,g6")
+       (and (not (eq_attr "type" "la,larl,load,store,jsr"))
+            (eq_attr "atype" "agen")))
+  "z_e1,z_wr")
+
+;;
+;; s390_agen_dep_p returns 1, if a register is set in the
+;; first insn and used in the dependent insn to form a address.
+;;
+
+;;
+;; If an instruction uses a register to address memory, it needs
+;; to be set 5 cycles in advance.
+;;
+
+(define_bypass 5 "z_int,z_agen"
+	       "z_agen,z_la,z_call,z_load,z_store" "s390_agen_dep_p")
+
+;;
+;; A load type instruction uses a bypass to feed the result back
+;; to the address generation pipeline stage.
+;;
+
+(define_bypass 3 "z_load"
+	         "z_agen,z_la,z_call,z_load,z_store" "s390_agen_dep_p")
+
+;;
+;; A load address type instruction uses a bypass to feed the
+;; result back to the address generation pipeline stage.
+;;
+
+(define_bypass 2 "z_larl,z_la"
+	         "z_agen,z_la,z_call,z_load,z_store" "s390_agen_dep_p")
+
+
+
+
+
diff --git a/gcc/config/s390/2084.md b/gcc/config/s390/2084.md
new file mode 100644
index 000000000..9ce5530b6
--- /dev/null
+++ b/gcc/config/s390/2084.md
@@ -0,0 +1,310 @@
+;; Scheduling description for z990 (cpu 2084).
+;;   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010
+;;   Free Software Foundation, Inc.
+;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                  Ulrich Weigand (uweigand@de.ibm.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "x_ipu")
+
+(define_cpu_unit "x_e1_r,x_e1_s,x_e1_t"  "x_ipu")
+(define_cpu_unit "x_wr_r,x_wr_s,x_wr_t,x_wr_fp" "x_ipu")
+(define_cpu_unit "x_s1,x_s2,x_s3,x_s4"   "x_ipu")
+(define_cpu_unit "x_t1,x_t2,x_t3,x_t4"   "x_ipu")
+(define_cpu_unit "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6"   "x_ipu")
+(define_cpu_unit "x_store_tok"   "x_ipu")
+(define_cpu_unit "x_ms,x_mt"   "x_ipu")
+
+(define_reservation "x-e1-st" "(x_e1_s | x_e1_t)")
+
+(define_reservation "x-e1-np" "(x_e1_r + x_e1_s + x_e1_t)")
+
+(absence_set "x_e1_r" "x_e1_s,x_e1_t")
+(absence_set "x_e1_s" "x_e1_t")
+
+;; Try to avoid int <-> fp transitions.
+
+(define_reservation "x-x" "x_s1|x_t1,x_s2|x_t2,x_s3|x_t3,x_s4|x_t4")
+(define_reservation "x-f" "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6")
+(define_reservation "x-wr-st" "((x_wr_s | x_wr_t),x-x)")
+(define_reservation "x-wr-np" "((x_wr_r + x_wr_s + x_wr_t),x-x)")
+(define_reservation "x-wr-fp" "x_wr_fp,x-f")
+(define_reservation "x-mem"   "x_ms|x_mt")
+
+(absence_set "x_wr_fp"
+             "x_s1,x_s2,x_s3,x_s4,x_t1,x_t2,x_t3,x_t4,x_wr_s,x_wr_t")
+
+(absence_set "x_e1_r,x_wr_r,x_wr_s,x_wr_t"
+             "x_f1,x_f2,x_f3,x_f4,x_f5,x_f6,x_wr_fp")
+
+;; Don't have any load type insn in same group as store
+
+(absence_set "x_ms,x_mt" "x_store_tok")
+
+
+;;
+;; Simple insns
+;;
+
+(define_insn_reservation "x_int" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (and (eq_attr "type" "integer")
+            (eq_attr "atype" "reg")))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_agen" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (and (eq_attr "type" "integer")
+            (eq_attr "atype" "agen")))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_lr" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "lr"))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_la" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "la"))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_larl" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "larl"))
+  "x-e1-st,x-wr-st")
+
+(define_insn_reservation "x_load" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "load"))
+  "x-e1-st+x-mem,x-wr-st")
+
+(define_insn_reservation "x_store" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "store"))
+  "x-e1-st+x_store_tok,x-wr-st")
+
+(define_insn_reservation "x_branch" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "branch"))
+  "x_e1_r,x_wr_r")
+
+(define_insn_reservation "x_call" 5
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "jsr"))
+  "x-e1-np*5,x-wr-np")
+
+(define_insn_reservation "x_mul_hi" 2
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "imulhi"))
+  "x-e1-np*2,x-wr-np")
+
+(define_insn_reservation "x_mul_sidi" 4
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "imulsi,imuldi"))
+  "x-e1-np*4,x-wr-np")
+
+(define_insn_reservation "x_div" 10
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "idiv"))
+  "x-e1-np*10,x-wr-np")
+
+(define_insn_reservation "x_sem" 17
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "sem"))
+  "x-e1-np+x-mem,x-e1-np*16,x-wr-st")
+
+;;
+;; Multicycle insns
+;;
+
+(define_insn_reservation "x_cs" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "cs"))
+  "x-e1-np,x-wr-np")
+
+(define_insn_reservation "x_vs" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "vs"))
+  "x-e1-np*10,x-wr-np")
+
+(define_insn_reservation "x_stm" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "stm"))
+  "(x-e1-np+x_store_tok)*10,x-wr-np")
+
+(define_insn_reservation "x_lm" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "lm"))
+  "x-e1-np*10,x-wr-np")
+
+(define_insn_reservation "x_other" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "other"))
+  "x-e1-np,x-wr-np")
+
+;;
+;; Floating point insns
+;;
+
+(define_insn_reservation "x_fsimptf" 7
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fsimptf,fhex"))
+  "x_e1_t*2,x-wr-fp")
+
+(define_insn_reservation "x_fsimpdf" 6
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fsimpdf,fmuldf,fmadddf,fhex"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_fsimpsf" 6
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fsimpsf,fmulsf,fmaddsf,fhex"))
+  "x_e1_t,x-wr-fp")
+
+
+(define_insn_reservation "x_fmultf" 33
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fmultf"))
+  "x_e1_t*27,x-wr-fp")
+
+
+(define_insn_reservation "x_fdivtf" 82
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fdivtf,fsqrttf"))
+  "x_e1_t*76,x-wr-fp")
+
+(define_insn_reservation "x_fdivdf" 36
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fdivdf,fsqrtdf"))
+  "x_e1_t*30,x-wr-fp")
+
+(define_insn_reservation "x_fdivsf" 36
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fdivsf,fsqrtsf"))
+  "x_e1_t*30,x-wr-fp")
+
+
+(define_insn_reservation "x_floadtf" 6
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "floadtf"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_floaddf" 6
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "floaddf"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_floadsf" 6
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "floadsf"))
+  "x_e1_t,x-wr-fp")
+
+
+(define_insn_reservation "x_fstoredf" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fstoredf"))
+  "x_e1_t,x-wr-fp")
+
+(define_insn_reservation "x_fstoresf" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "fstoresf"))
+  "x_e1_t,x-wr-fp")
+
+
+(define_insn_reservation "x_ftrunctf" 16
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "ftrunctf"))
+  "x_e1_t*10,x-wr-fp")
+
+(define_insn_reservation "x_ftruncdf" 11
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "ftruncdf"))
+  "x_e1_t*5,x-wr-fp")
+
+
+(define_insn_reservation "x_ftoi" 1
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "ftoi"))
+  "x_e1_t*3,x-wr-fp")
+
+(define_insn_reservation "x_itof" 7
+  (and (eq_attr "cpu" "z990,z9_109")
+       (eq_attr "type" "itoftf,itofdf,itofsf"))
+  "x_e1_t*3,x-wr-fp")
+
+(define_bypass 1 "x_fsimpdf" "x_fstoredf")
+
+(define_bypass 1 "x_fsimpsf" "x_fstoresf")
+
+(define_bypass 1 "x_floaddf" "x_fsimpdf,x_fstoredf,x_floaddf")
+
+(define_bypass 1 "x_floadsf" "x_fsimpsf,x_fstoresf,x_floadsf")
+
+;;
+;; s390_agen_dep_p returns 1, if a register is set in the
+;; first insn and used in the dependent insn to form a address.
+;;
+
+;;
+;; If an instruction uses a register to address memory, it needs
+;; to be set 5 cycles in advance.
+;;
+
+(define_bypass 5 "x_int,x_agen,x_lr"
+                 "x_agen,x_la,x_branch,x_call,x_load,x_store,x_cs,x_stm,x_lm,x_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 9 "x_int,x_agen,x_lr"
+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+	         "s390_agen_dep_p")
+;;
+;; A load type instruction uses a bypass to feed the result back
+;; to the address generation pipeline stage.
+;;
+
+(define_bypass 4 "x_load"
+                 "x_agen,x_la,x_branch,x_call,x_load,x_store,x_cs,x_stm,x_lm,x_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 5 "x_load"
+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+	         "s390_agen_dep_p")
+
+;;
+;; A load address type instruction uses a bypass to feed the
+;; result back to the address generation pipeline stage.
+;;
+
+(define_bypass 3 "x_larl,x_la"
+                 "x_agen,x_la,x_branch,x_call,x_load,x_store,x_cs,x_stm,x_lm,x_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 5 "x_larl, x_la"
+                 "x_floadtf, x_floaddf, x_floadsf, x_fstoredf, x_fstoresf,\
+		  x_fsimpdf, x_fsimpsf, x_fdivdf, x_fdivsf"
+	         "s390_agen_dep_p")
+
+;;
+;; Operand forwarding
+;;
+
+(define_bypass 0 "x_lr,x_la,x_load" "x_int,x_lr")
+
+
diff --git a/gcc/config/s390/2097.md b/gcc/config/s390/2097.md
new file mode 100644
index 000000000..77c206ecd
--- /dev/null
+++ b/gcc/config/s390/2097.md
@@ -0,0 +1,764 @@
+;; Scheduling description for z10 (cpu 2097).
+;; Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+;; Contributed by Wolfgang Gellerich (gellerich@de.ibm.com).
+
+
+; General naming conventions used in this file:
+; - The two pipelines are called S and T, respectively.
+; - A name ending "_S" or "_T" indicates that something happens in
+;   (or belongs to) this pipeline.
+; - A name ending "_ANY" indicates that something happens in (or belongs
+;   to) either of the two pipelines.
+; - A name ending "_BOTH" indicates that something happens in (or belongs
+;   to) both pipelines.
+
+
+;; Automaton and components.
+
+(define_automaton "z10_cpu")
+
+(define_cpu_unit "z10_e1_S, z10_e1_T"  "z10_cpu")
+(define_reservation "z10_e1_ANY" "(z10_e1_S | z10_e1_T)")
+(define_reservation "z10_e1_BOTH" "(z10_e1_S + z10_e1_T)")
+
+
+; Both pipelines can execute a branch instruction, and branch
+; instructions can be grouped with all other groupable instructions
+; but not with a second branch instruction.
+
+(define_cpu_unit "z10_branch_ANY"  "z10_cpu")
+
+(define_insn_reservation "z10_branch" 4
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "branch"))
+  "z10_branch_ANY + z10_e1_ANY, z10_Gate_ANY")
+
+
+; Z10 operand and result forwarding.
+
+; Instructions marked with the attributes as z10_fwd or z10_fr can
+; forward a value they load from one of their operants into a register
+; if the instruction in the second pipeline reads the same register.
+; The second operation must be superscalar.  Instructions marked as
+; z10_rec or z10_fr can receive a value they read from a register is
+; this register gets updated by an instruction in the first pipeline.
+; The first instruction must be superscalar.
+
+
+; Forwarding from z10_fwd and z10_fr to z10_super.
+
+(define_bypass 0 "z10_la_fwd, z10_la_fwd_A1, z10_larl_fwd, z10_larl_fwd_A3, \
+                  z10_load_fwd, z10_load_fwd_A3, \
+                  z10_other_fwd, z10_other_fwd_A1, z10_other_fwd_A3, \
+	 	  z10_other_fr, z10_other_fr_A3, z10_other_fr_E1, \
+                  z10_other_fwd_E1, z10_lr_fr, z10_lr_fr_E1, \
+                  z10_int_fwd, z10_int_fwd_A1, z10_int_fwd_A3, \
+                  z10_int_fwd_E1, z10_int_fr, z10_int_fr_E1, \
+                  z10_int_fr_A3"
+                  "z10_other_super, z10_other_super_c_E1, z10_other_super_E1, \
+                  z10_int_super, z10_int_super_E1, \
+                  z10_lr, z10_store_super"
+                  " ! s390_agen_dep_p")
+
+
+; Forwarding from z10_super to frz10_ and z10_rec.
+
+(define_bypass 0 "z10_other_super, z10_other_super_E1, z10_other_super_c_E1, \
+                  z10_int_super, z10_int_super_E1, \
+                  z10_larl_super_E1, z10_larl_super, \
+                  z10_store_super"
+                  "z10_int_fr, z10_int_fr_E1, z10_int_fr_A3, \
+                  z10_other_fr, z10_other_fr_A3, z10_lr_fr, z10_lr_fr_E1, \
+                  z10_other_fr_E1, z10_store_rec"
+                  " ! s390_agen_dep_p")
+
+
+; Forwarding from z10_fwd and z10_fr to z10_rec and z10_fr.
+
+(define_bypass 0 "z10_la_fwd, z10_la_fwd_A1, z10_larl_fwd, z10_larl_fwd_A3, \
+                  z10_load_fwd, z10_load_fwd_A3, \
+                  z10_other_fwd, z10_other_fwd_A1, z10_other_fwd_A3, \
+                  z10_other_fr, z10_other_fr_A3, z10_other_fr_E1, \
+                  z10_other_fwd_E1, \
+                  z10_lr_fr, z10_lr_fr_E1, \
+                  z10_int_fwd, z10_int_fwd_A1, z10_int_fwd_A3, \
+                  z10_int_fwd_E1, z10_int_fr, z10_int_fr_E1, \
+                  z10_int_fr_A3"
+                  "z10_int_fr, z10_int_fr_E1, z10_int_fr_A3, \
+                  z10_other_fr, z10_other_fr_A3, z10_lr_fr, z10_lr_fr_E1, \
+                  z10_other_fr_E1, z10_store_rec"
+                  " ! s390_agen_dep_p")
+
+
+;
+; Simple insns
+;
+
+; Here is the cycle diagram for FXU-executed instructions:
+; ... A1 A2 A3 E1 P1 P2 P3 R0 ...
+;        ^              ^  ^
+;        |              |  updated GPR is available
+;        |              write to GPR
+;        instruction reads GPR during this cycle
+
+
+; Variants of z10_int follow.
+
+(define_insn_reservation "z10_int" 6
+  (and (and (eq_attr "cpu" "z10")
+            (eq_attr "type" "integer"))
+       (and (eq_attr "atype" "reg")
+             (and (and (eq_attr "z10prop" "!z10_super")
+                       (eq_attr "z10prop" "!z10_super_c"))
+                  (and (and (and (and (eq_attr "z10prop" "!z10_super_E1")
+                                      (eq_attr "z10prop" "!z10_super_c_E1"))
+                                  (eq_attr "z10prop" "!z10_fwd"))
+                             (and (eq_attr "z10prop" "!z10_fwd_A1")
+                                  (eq_attr "z10prop" "!z10_fwd_A3")))
+                        (and (and (eq_attr "z10prop" "!z10_fwd_E1")
+                                  (eq_attr "z10prop" "!z10_fr"))
+                             (and (eq_attr "z10prop" "!z10_fr_E1")
+                                  (eq_attr "z10prop" "!z10_fr_A3")))))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (ior (eq_attr "z10prop" "z10_super")
+                      (eq_attr "z10prop" "z10_super_c")))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_super_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (ior (eq_attr "z10prop" "z10_super_E1")
+                      (eq_attr "z10prop" "z10_super_c_E1")))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd_A1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd_A3"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fwd_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fwd_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fr"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fr_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fr_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_int_fr_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (and (eq_attr "atype" "reg")
+                 (eq_attr "z10prop" "z10_fr_A3"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+; END of z10_int variants
+
+
+(define_insn_reservation "z10_agen" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "integer")
+            (eq_attr "atype" "agen")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+
+(define_insn_reservation "z10_lr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "lr")
+            (and (eq_attr "z10prop" "!z10_fr")
+                 (eq_attr "z10prop" "!z10_fr_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_lr_fr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "lr")
+            (eq_attr "z10prop" "z10_fr")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_lr_fr_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "lr")
+            (eq_attr "z10prop" "z10_fr_E1")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_la" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "la")
+            (and (eq_attr "z10prop" "!z10_fwd")
+                 (eq_attr "z10prop" "!z10_fwd_A1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_la_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "la")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_la_fwd_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "la")
+            (eq_attr "z10prop" "z10_fwd_A1")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+
+; larl-type instructions
+
+(define_insn_reservation "z10_larl" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+                 (and (eq_attr "z10prop" "!z10_super_A1")
+                      (and (eq_attr "z10prop" "!z10_fwd")
+                           (and (eq_attr "z10prop" "!z10_fwd_A3")
+                                (and (eq_attr "z10prop" "!z10_super")
+                                     (eq_attr "z10prop" "!z10_super_c"))
+                                (and (eq_attr "z10prop" "!z10_super_E1")
+                                     (eq_attr "z10prop" "!z10_super_c_E1")))))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_larl_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (and (eq_attr "z10prop" "z10_super")
+                 (eq_attr "z10prop" "z10_super_c"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_larl_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_larl_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (eq_attr "z10prop" "z10_fwd_A3")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+
+(define_insn_reservation "z10_larl_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (eq_attr "z10prop" "z10_super_A1")))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+(define_insn_reservation "z10_larl_super_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "larl")
+            (ior (eq_attr "z10prop" "z10_super_E1")
+                 (eq_attr "z10prop" "z10_super_c_E1"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+
+(define_insn_reservation "z10_load" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "load")
+            (and (eq_attr "z10prop" "!z10_fwd")
+                 (eq_attr "z10prop" "!z10_fwd_A3"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_load_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "load")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+(define_insn_reservation "z10_load_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "load")
+            (eq_attr "z10prop" "z10_fwd_A3")))
+  "z10_e1_ANY, z10_Gate_ANY")
+;  "z10_e1_ANY")
+
+(define_insn_reservation "z10_store" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "store")
+            (and (eq_attr "z10prop" "!z10_rec")
+                 (and (eq_attr "z10prop" "!z10_super")
+                      (eq_attr "z10prop" "!z10_super_c")))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_store_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "store")
+            (ior (eq_attr "z10prop" "z10_super")
+                 (eq_attr "z10prop" "z10_super_c"))))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+(define_insn_reservation "z10_store_rec" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "store")
+            (eq_attr "z10prop" "z10_rec")))
+  "z10_e1_ANY, z10_Gate_ANY")
+
+; The default_latency is chosen to drain off the pipeline.
+(define_insn_reservation "z10_call" 14
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "jsr"))
+  "z10_e1_BOTH*4, z10_Gate_BOTH")
+
+; The default latency is for worst case.  CS and CSG take one
+; cycle only (i.e. latency would be 6).
+(define_insn_reservation "z10_sem" 9
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "sem"))
+  "z10_e1_BOTH*5, z10_Gate_ANY")
+
+(define_insn_reservation "z10_cs" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "cs"))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_vs" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "vs"))
+  "z10_e1_BOTH*4, z10_Gate_BOTH")
+
+; Load and store multiple. Actual number of cycles
+; in unknown at compile.time.
+(define_insn_reservation "z10_stm" 10
+  (and (eq_attr "cpu" "z10")
+       (ior (eq_attr "type" "stm")
+            (eq_attr "type" "lm")))
+  "z10_e1_BOTH*4, z10_Gate_BOTH")
+
+
+; Subsets of z10_other follow.
+
+(define_insn_reservation "z10_other" 6
+  (and (and (eq_attr "cpu" "z10")
+            (eq_attr "type" "other"))
+       (and (and (eq_attr "z10prop" "!z10_fwd")
+                 (eq_attr "z10prop" "!z10_fwd_A1"))
+            (and (and  (and (eq_attr "z10prop" "!z10_fr_A3")
+                            (eq_attr "z10prop" "!z10_fwd_A3"))
+                       (and (eq_attr "z10prop" "!z10_fr")
+                            (eq_attr "z10prop" "!z10_fr_E1")))
+                 (and  (and (and (eq_attr "z10prop" "!z10_super")
+                                  (eq_attr "z10prop" "!z10_super_c"))
+                            (eq_attr "z10prop" "!z10_super_c_E1"))
+                       (and (eq_attr "z10prop" "!z10_super_E1")
+                            (eq_attr "z10prop" "!z10_fwd_E1"))))))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fr_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fr_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_super_c_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_super_c_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_super_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_super_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd_E1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd_E1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd_A3")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fwd_A1" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fwd_A1")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fr" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fr")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_fr_A3" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (eq_attr "z10prop" "z10_fr_A3")))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+(define_insn_reservation "z10_other_super" 6
+  (and (eq_attr "cpu" "z10")
+       (and (eq_attr "type" "other")
+            (ior (eq_attr "z10prop" "z10_super")
+                 (eq_attr "z10prop" "z10_super_c"))))
+  "z10_e1_BOTH, z10_Gate_BOTH")
+
+; END of z10_other subsets.
+
+
+;
+; Floating point insns
+;
+
+; Z10 executes the following integer operations in the BFU pipeline.
+
+(define_insn_reservation "z10_mul_sidi" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "imulsi,imuldi,imulhi"))
+  "z10_e1_BOTH, z10_Gate_FP")
+
+; Some variants take fewer cycles, but that is not relevant here.
+(define_insn_reservation "z10_div" 162
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "idiv"))
+  "z10_e1_BOTH*4, z10_Gate_FP")
+
+
+; BFP multiplication and general instructions
+
+(define_insn_reservation "z10_fsimpdf" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpdf,fmuldf,fmadddf"))
+  "z10_e1_BOTH, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsimpsf" 6
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpsf,fmulsf,fmaddsf"))
+  "z10_e1_BOTH, z10_Gate_FP")
+
+(define_insn_reservation "z10_fmultf" 52
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fmultf"))
+  "z10_e1_BOTH*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsimptf" 14
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimptf"))
+  "z10_e1_BOTH*2, z10_Gate_FP")
+
+
+; BFP division
+
+(define_insn_reservation "z10_fdivtf" 113
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivtf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fdivdf" 41
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivdf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fdivsf" 34
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivsf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+
+; BFP sqrt
+
+(define_insn_reservation "z10_fsqrtsf" 41
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsqrtsf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsqrtdf" 54
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsqrtdf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+(define_insn_reservation "z10_fsqrtf" 122
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsqrttf"))
+  "z10_e1_T*4, z10_Gate_FP")
+
+
+; BFP load and store
+
+(define_insn_reservation "z10_floadtf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floadtf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_floaddf" 1
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floaddf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_floadsf" 1
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floadsf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_fstoredf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fstoredf,fstoredd"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_fstoresf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fstoresf,fstoresd"))
+  "z10_e1_T, z10_Gate_FP")
+
+
+; BFP truncate
+(define_insn_reservation "z10_ftrunctf" 16
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftrunctf"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_ftruncdf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftruncdf"))
+  "z10_e1_T, z10_Gate_FP")
+
+
+; Conversion between BFP and int.
+(define_insn_reservation "z10_ftoi" 13
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftoi"))
+  "z10_e1_T, z10_Gate_FP")
+
+(define_insn_reservation "z10_itoftf" 14
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itoftf"))
+  "z10_e1_T*2, z10_Gate_FP")
+
+(define_insn_reservation "z10_itofsfdf" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itofdf,itofsf"))
+  "z10_e1_T, z10_Gate_FP")
+
+
+
+; BFP-related bypasses.  There is no bypass for extended mode.
+(define_bypass 1 "z10_fsimpdf" "z10_fstoredf")
+(define_bypass 1 "z10_fsimpsf" "z10_fstoresf")
+(define_bypass 1 "z10_floaddf" "z10_fsimpdf, z10_fstoredf")
+(define_bypass 1 "z10_floadsf" "z10_fsimpsf, z10_fstoresf")
+
+
+;
+; insn_reservations for DFP instructions.
+;
+
+; Exact number of cycles is not known at compile-time.
+(define_insn_reservation "z10_fdivddtd" 40
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fdivdd,fdivtd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_ftruncsd" 38
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftruncsd"))
+  "z10_e1_BOTH*4,z10_Gate_DFU")
+
+(define_insn_reservation "z10_ftruncdd" 340
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftruncsd"))
+  "z10_e1_BOTH*4,z10_Gate_DFU")
+
+(define_insn_reservation "z10_floaddd" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floaddd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_floadsd" 12
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "floadsd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+; Exact number of cycles is not known at compile-time.
+(define_insn_reservation "z10_fmulddtd" 35
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fmuldd,fmultd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_fsimpdd" 17
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpdd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_fsimpsd" 17
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimpsd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_fsimptd" 18
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "fsimptd"))
+  "z10_e1_BOTH,z10_Gate_DFU")
+
+(define_insn_reservation "z10_itofdd" 36
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itofdd"))
+  "z10_e1_BOTH*3,z10_Gate_DFU")
+
+(define_insn_reservation "z10_itoftd" 49
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "itoftd"))
+  "z10_e1_BOTH*3,z10_Gate_DFU")
+
+; Exact number of cycles is not known at compile-time.
+(define_insn_reservation "z10_ftoidfp" 30
+  (and (eq_attr "cpu" "z10")
+       (eq_attr "type" "ftoidfp"))
+  "z10_e1_BOTH*3,z10_Gate_DFU")
+
+
+;
+; Address-related bypasses
+;
+
+; Here is the cycle diagram for address-related bypasses:
+; ... G1 G2 G3 A0 A1 A2 A3 E1 P1 P2 P3 R0 ...
+;         ^  ^    ^     ^  ^        ^
+;         |  |    |     |  |        without bypass, its available AFTER this cycle
+;         |  |    |     |  E1-type bypasses provide the new value AFTER this cycle
+;         |  |    |     A3-type bypasses provide the new value AFTER this cycle
+;         |  |    A1-type bypasses provide the new value AFTER this cycle
+;         |  AGI resolution, actual USE of new value is DURING this cycle
+;         AGI detection
+
+(define_bypass 3 "z10_larl_A1, z10_la_fwd_A1, z10_other_fwd_A1, \
+                  z10_int_fwd_A1"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 5 "z10_larl_fwd_A3, z10_load_fwd_A3, z10_other_fwd_A3, \
+                  z10_other_fr_A3, z10_int_fwd_A3, z10_int_fr_A3"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+	         "s390_agen_dep_p")
+
+(define_bypass 6 "z10_other_fr_E1, z10_other_super_c_E1, z10_other_super_E1, \
+                  z10_other_fwd_E1, \
+                  z10_lr_fr_E1, z10_larl_super_E1, \
+                  z10_int_super_E1, z10_int_fwd_E1, z10_int_fr_E1"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+ 	         "s390_agen_dep_p")
+
+(define_bypass 9 "z10_int_super, z10_int_fwd, z10_int_fr"
+                 "z10_agen, z10_la, z10_branch, z10_call, z10_load, \
+                  z10_store, \
+                  z10_cs, z10_stm, z10_other"
+ 	         "s390_agen_dep_p")
+
+
+
+;
+; Try to avoid transitions between DFU-, BFU- and FXU-executed instructions as there is a
+; dispatch delay required.
+;
+
+
+; Declaration for some pseudo-pipeline stages that reflect the
+; dispatch gap when issueing an INT/FXU/BFU-executed instruction after
+; an instruction executed by a different unit has been executed.  The
+; approach is that we pretend a pipelined execution of BFU operations
+; with as many stages as the gap is long and request that none of
+; these stages is busy when issueing a FXU- or DFU-executed
+; instruction.  Similar for FXU- and DFU-executed instructions.
+
+; Declaration for FPU stages.
+(define_cpu_unit "z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, z10_f5, z10_f6, \
+                  z10_f7, z10_f8, z10_f9, z10_f10, z10_f11, z10_f12" "z10_cpu")
+(define_reservation "z10_FP_PP" "z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, \
+                  z10_f5, z10_f6, z10_f7, z10_f8, z10_f9, z10_f10, z10_f11, \
+                  z10_f12")
+
+; Declaration for FXU stages.
+(define_cpu_unit "z10_S1, z10_S2, z10_S3, z10_S4, z10_S5, z10_S6"  "z10_cpu")
+(define_cpu_unit "z10_T1, z10_T2, z10_T3, z10_T4, z10_T5, z10_T6"  "z10_cpu")
+(define_reservation "z10_INT_PP" "z10_S1 | z10_T1, z10_S2 | z10_T2, z10_S3 \
+                                  | z10_T3, z10_S4 | z10_T4, z10_S5 | \
+                                  z10_T5, z10_S6 | z10_T6")
+
+; Declaration for DFU stages.
+(define_cpu_unit "z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, z10_d5, z10_d6"
+                 "z10_cpu")
+(define_reservation "z10_DFU_PP" "z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, \
+                                 z10_d5, z10_d6")
+
+
+; Pseudo-units representing whether the respective unit is available
+; in the sense that using it does not cause a dispatch delay.
+
+(define_cpu_unit "z10_S_avail, z10_T_avail, z10_FP_avail, z10_DFU_avail"
+                 "z10_cpu")
+
+(absence_set "z10_FP_avail"
+             "z10_S1, z10_S2, z10_S3, z10_S4, z10_S5, z10_S6, z10_T1, z10_T2, z10_T3, z10_T4, \
+              z10_T5, z10_T6, \
+              z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, z10_d5, z10_d6")
+
+(absence_set "z10_S_avail,z10_T_avail"
+             "z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, z10_f5, z10_f6, z10_f7, \
+              z10_f8, z10_f9, z10_f10, z10_f11, z10_f12, \
+              z10_d0, z10_d1, z10_d2, z10_d3, z10_d4, z10_d5, z10_d6")
+
+(absence_set "z10_DFU_avail"
+             "z10_S1, z10_S2, z10_S3, z10_S4, z10_S5, z10_S6, z10_T1, z10_T2, z10_T3, z10_T4, \
+              z10_T5, z10_T6, \
+              z10_f0, z10_f1, z10_f2, z10_f3, z10_f4, z10_f5, z10_f6, z10_f7, \
+              z10_f8, z10_f9, z10_f10, z10_f11, z10_f12")
+
+
+; Pseudo-units to be used in insn_reservations.
+
+(define_reservation "z10_Gate_ANY" "((z10_S_avail | z10_T_avail), z10_INT_PP)")
+(define_reservation "z10_Gate_BOTH" "((z10_S_avail + z10_T_avail), z10_INT_PP)")
+
+(define_reservation "z10_Gate_FP" "z10_FP_avail, z10_FP_PP")
+
+(define_reservation "z10_Gate_DFU" "z10_DFU_avail, z10_DFU_PP")
diff --git a/gcc/config/s390/2817.md b/gcc/config/s390/2817.md
new file mode 100644
index 000000000..ea181b01f
--- /dev/null
+++ b/gcc/config/s390/2817.md
@@ -0,0 +1,315 @@
+;; Scheduling description for z196 (cpu 2817).
+;;   Copyright (C) 2010
+;;   Free Software Foundation, Inc.
+;;   Contributed by Christian Borntraeger (Christian.Borntraeger@de.ibm.com)
+;;                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "z196_ipu")
+
+;; Fetch + Decoder
+(define_cpu_unit "z196_g1" "z196_ipu")
+(define_cpu_unit "z196_g2" "z196_ipu")
+(define_cpu_unit "z196_g3" "z196_ipu")
+(define_cpu_unit "z196_cr1" "z196_ipu")
+(define_cpu_unit "z196_cr2" "z196_ipu")
+(define_cpu_unit "z196_cr3" "z196_ipu")
+
+(final_presence_set "z196_g2" "z196_g1")
+(final_presence_set "z196_g3" "z196_g2")
+(final_presence_set "z196_cr2" "z196_cr1")
+(final_presence_set "z196_cr3" "z196_cr2")
+(exclusion_set "z196_g1" "z196_cr1")
+
+;; Instructions can be groupable, end a group, or be alone in a group.
+(define_reservation "z196_simple" "( z196_g1 | z196_g2 | z196_g3 )")
+(define_reservation "z196_ends" "( z196_g3 | ( z196_g2 + z196_g3 ) | ( z196_g1 + z196_g2 + z196_g3 ) )")
+
+;; Try to keep cracked and alone insns together in a clump.  This will also
+;; improve the clumping of "normal" insns.  We also allow crackes insns
+;; to go as a last instruction together with normal ones.
+(define_reservation "z196_crack"  "( z196_cr1 | z196_cr2 | z196_cr3 | z196_g3)")
+(define_reservation "z196_alone"  "( z196_cr1 | z196_cr2 | z196_cr3 )")
+
+;; Most simple instruction a fast enough to be handled by OOO even with
+;; latency == 0.  This reduces life ranges and spilling. We want to increase
+;; life range for longer running ops, though, thats why we do not use
+;; -fno-schedule-insns.
+(define_insn_reservation "z196_simple_LSU" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "load,store,lr")
+            (eq_attr "z196prop" "none")))
+  "z196_simple")
+
+(define_insn_reservation "z196_simple_FXU" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,other")
+            (and (eq_attr "z196prop" "none")
+                 (eq_attr "op_type" "RR"))))
+  "z196_simple")
+
+(define_insn_reservation "z196_simple_DUAL" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,other")
+            (and (eq_attr "z196prop" "none")
+                 (eq_attr "op_type" "!RR"))))
+  "z196_simple")
+
+(define_insn_reservation "z196_cracked" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,load,lr,store,other")
+            (eq_attr "z196prop" "z196_cracked")))
+  "z196_crack")
+
+(define_insn_reservation "z196_alone" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,load,lr,store,other")
+            (eq_attr "z196prop" "z196_alone")))
+  "z196_alone")
+
+(define_insn_reservation "z196_ends" 0
+  (and (eq_attr "cpu" "z196")
+       (and (eq_attr "type" "integer,la,larl,load,lr,store,other")
+            (eq_attr "z196prop" "z196_ends")))
+  "z196_ends")
+
+(define_insn_reservation "z196_branch" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "branch"))
+  "z196_ends")
+
+(define_insn_reservation "z196_call" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "jsr"))
+  "z196_ends")
+
+(define_insn_reservation "z196_mul_hi" 10
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "imulhi"))
+  "z196_simple")
+
+(define_insn_reservation "z196_mul_si" 12
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "imulsi"))
+  "z196_simple")
+
+(define_insn_reservation "z196_mul_di" 14
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "imuldi"))
+  "z196_simple")
+
+(define_insn_reservation "z196_div" 73
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "idiv"))
+  "z196_alone")
+
+(define_insn_reservation "z196_sem" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "sem"))
+  "z196_crack")
+
+(define_insn_reservation "z196_cs" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "cs"))
+  "z196_crack")
+
+(define_insn_reservation "z196_vs" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "vs"))
+  "z196_alone")
+
+(define_insn_reservation "z196_lm_stm" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "stm,lm"))
+  "z196_crack")
+
+
+;;
+;; Binary Floating Point
+;;
+
+(define_insn_reservation "z196_fsimptf" 18
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimptf,fhex"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fmultf" 47
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmultf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fsimpdf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpdf,fmuldf,fhex"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fmadddf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmadddf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fsimpsf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpsf,fmulsf,fhex"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fmaddsf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmaddsf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fdivtf" 108
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivtf,fsqrttf"))
+  "z196_alone")
+
+(define_insn_reservation "z196_fdivdf" 36
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivdf,fsqrtdf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fdivsf" 29
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivsf,fsqrtsf"))
+  "z196_simple")
+
+
+;; Loads and stores are cheap as well.
+(define_insn_reservation "z196_floaddf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "floaddf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_floadsf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "floadsf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fstoredf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fstoredf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_fstoresf" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fstoresf"))
+  "z196_simple")
+
+
+(define_insn_reservation "z196_ftrunctf" 9
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftrunctf"))
+  "z196_simple")
+
+(define_insn_reservation "z196_ftruncdf" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftruncdf"))
+  "z196_simple")
+
+
+(define_insn_reservation "z196_ftoi" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftoi"))
+  "z196_crack")
+
+(define_insn_reservation "z196_itof" 7
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "itoftf,itofdf,itofsf"))
+  "z196_crack")
+
+;;
+;; Decimal Floating Point
+;;
+
+;; DDTR
+(define_insn_reservation "z196_fdivdd" 33
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivdd"))
+  "z196_simple")
+
+;; DXTR
+(define_insn_reservation "z196_fdivtd" 35
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fdivtd"))
+  "z196_alone")
+
+;; LEDTR
+(define_insn_reservation "z196_ftruncsd" 34
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftruncsd"))
+  "z196_simple")
+
+;; LDXTR
+(define_insn_reservation "z196_ftruncdd" 36
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftruncdd"))
+  "z196_simple")
+
+;; These are normal fp loads/stores - which are cheap.
+(define_insn_reservation "z196_floadsddd" 0
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "floadsd,floaddd,fstoredd,fstoresd"))
+  "z196_simple")
+
+;; MDTR
+(define_insn_reservation "z196_fmuldd" 23
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmuldd"))
+  "z196_simple")
+
+;; MXTR
+(define_insn_reservation "z196_fmultd" 25
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fmultd"))
+  "z196_alone")
+
+;; multiple different isns like add, sub etc.
+;; Just use the same defaults as z10.
+(define_insn_reservation "z196_fsimpsd" 17
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpsd"))
+  "z196_simple")
+(define_insn_reservation "z196_fsimpdd" 17
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimpdd"))
+  "z196_simple")
+(define_insn_reservation "z196_fsimptd" 18
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "fsimptd"))
+  "z196_alone")
+
+;; CDGTR
+(define_insn_reservation "z196_itofdd" 45
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "itofdd"))
+  "z196_crack")
+
+;; CXGTR
+(define_insn_reservation "z196_itoftd" 33
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "itoftd"))
+  "z196_crack")
+
+;; CGXTR, CGDTR
+(define_insn_reservation "z196_ftoidfp" 33
+  (and (eq_attr "cpu" "z196")
+       (eq_attr "type" "ftoidfp"))
+  "z196_crack")
+
+
+
diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md
new file mode 100644
index 000000000..8564b6619
--- /dev/null
+++ b/gcc/config/s390/constraints.md
@@ -0,0 +1,492 @@
+;; Constraints definitions belonging to the gcc backend for IBM S/390.
+;; Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+;; Written by Wolfgang Gellerich, using code and information found in
+;; files s390.md, s390.h, and s390.c.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;
+;; Special constraints for s/390 machine description:
+;;
+;;    a -- Any address register from 1 to 15.
+;;    b -- Memory operand whose address is a symbol reference or a symbol
+;;         reference + constant which can be proven to be naturally aligned.
+;;    c -- Condition code register 33.
+;;    d -- Any register from 0 to 15.
+;;    f -- Floating point registers.
+;;    t -- Access registers 36 and 37.
+;;    C -- A signed 8-bit constant (-128..127)
+;;    D -- An unsigned 16-bit constant (0..65535)
+;;    G -- Const double zero operand
+;;    I -- An 8-bit constant (0..255).
+;;    J -- A 12-bit constant (0..4095).
+;;    K -- A 16-bit constant (-32768..32767).
+;;    L -- Value appropriate as displacement.
+;;         (0..4095) for short displacement
+;;         (-524288..524287) for long displacement
+;;    M -- Constant integer with a value of 0x7fffffff.
+;;    N -- Multiple letter constraint followed by 4 parameter letters.
+;;         0..9,x:  number of the part counting from most to least significant
+;;         H,Q:     mode of the part
+;;         D,S,H:   mode of the containing operand
+;;         0,F:     value of the other parts (F - all bits set)
+;;
+;;         The constraint matches if the specified part of a constant
+;;         has a value different from its other parts.  If the letter x
+;;         is specified instead of a part number, the constraint matches
+;;         if there is any single part with non-default value.
+;;    O -- Multiple letter constraint followed by 1 parameter.
+;;         s:  Signed extended immediate value (-2G .. 2G-1).
+;;         p:  Positive extended immediate value (0 .. 4G-1).
+;;         n:  Negative extended immediate value (-4G+1 .. -1).
+;;         These constraints do not accept any operand if the machine does
+;;         not provide the extended-immediate facility.
+;;    P -- Any integer constant that can be loaded without literal pool.
+;;    Q -- Memory reference without index register and with short displacement.
+;;    R -- Memory reference with index register and short displacement.
+;;    S -- Memory reference without index register but with long displacement.
+;;    T -- Memory reference with index register and long displacement.
+;;    A -- Multiple letter constraint followed by Q, R, S, or T:
+;;         Offsettable memory reference of type specified by second letter.
+;;    B -- Multiple letter constraint followed by Q, R, S, or T:
+;;         Memory reference of the type specified by second letter that
+;;         does *not* refer to a literal pool entry.
+;;    U -- Pointer with short displacement. (deprecated - use ZQZR)
+;;    W -- Pointer with long displacement. (deprecated - use ZSZT)
+;;    Y -- Shift count operand.
+;;    ZQ -- Pointer without index register and with short displacement.
+;;    ZR -- Pointer with index register and short displacement.
+;;    ZS -- Pointer without index register but with long displacement.
+;;    ZT -- Pointer with index register and long displacement.
+;;
+;;
+
+
+;;
+;;  Register constraints.
+;;
+
+(define_register_constraint "a"
+  "ADDR_REGS"
+  "Any address register from 1 to 15.")
+
+
+(define_register_constraint "c"
+  "CC_REGS"
+  "Condition code register 33")
+
+
+(define_register_constraint "d"
+  "GENERAL_REGS"
+  "Any register from 0 to 15")
+
+
+(define_register_constraint "f"
+  "FP_REGS"
+  "Floating point registers")
+
+
+(define_register_constraint "t"
+  "ACCESS_REGS"
+  "@internal
+   Access registers 36 and 37")
+
+
+;;
+;;  General constraints for constants.
+;;
+
+(define_constraint "C"
+  "@internal
+   An 8-bit signed immediate constant (-128..127)"
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+
+(define_constraint "D"
+  "An unsigned 16-bit constant (0..65535)"
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+
+(define_constraint "G"
+  "@internal
+   Const double zero operand"
+   (and (match_code "const_double")
+        (match_test "s390_float_const_zero_p (op)")))
+
+
+(define_constraint "I"
+  "An 8-bit constant (0..255)"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 255")))
+
+
+(define_constraint "J"
+  "A 12-bit constant (0..4095)"
+  (and (match_code "const_int")
+       (match_test "(unsigned HOST_WIDE_INT) ival <= 4095")))
+
+
+(define_constraint "K"
+  "A 16-bit constant (-32768..32767)"
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+
+(define_constraint "L"
+  "Value appropriate as displacement.
+      (0..4095) for short displacement
+      (-524288..524287) for long displacement"
+  (and (match_code "const_int")
+       (match_test "TARGET_LONG_DISPLACEMENT ?
+              (ival >= -524288 && ival <= 524287)
+            : (ival >= 0 && ival <= 4095)")))
+
+
+(define_constraint "M"
+  "Constant integer with a value of 0x7fffffff"
+  (and (match_code "const_int")
+       (match_test "ival == 2147483647")))
+
+
+(define_constraint "P"
+  "@internal
+   Any integer constant that can be loaded without literal pool"
+   (and (match_code "const_int")
+        (match_test "legitimate_reload_constant_p (GEN_INT (ival))")))
+
+
+(define_address_constraint "Y"
+  "Shift count operand"
+
+;; Simply check for the basic form of a shift count.  Reload will
+;; take care of making sure we have a proper base register.
+
+  (match_test "s390_decompose_shift_count (op, NULL, NULL)"  ))
+
+
+;;    N -- Multiple letter constraint followed by 4 parameter letters.
+;;         0..9,x:  number of the part counting from most to least significant
+;;         H,Q:     mode of the part
+;;         D,S,H:   mode of the containing operand
+;;         0,F:     value of the other parts (F = all bits set)
+;;
+;;         The constraint matches if the specified part of a constant
+;;         has a value different from its other parts.  If the letter x
+;;         is specified instead of a part number, the constraint matches
+;;         if there is any single part with non-default value.
+;;
+;; The following patterns define only those constraints that are actually
+;; used in s390.md.  If you need an additional one, simply add it in the
+;; obvious way.  Function s390_N_constraint_str is ready to handle all
+;; combinations.
+;;
+
+
+(define_constraint "NxQS0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQS0\", ival)")))
+
+
+(define_constraint "NxQD0"
+  "@internal"
+   (and (match_code "const_int")
+        (match_test "s390_N_constraint_str (\"xQD0\", ival)")))
+
+
+(define_constraint "N3HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"3HD0\", ival)")))
+
+
+(define_constraint "N2HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"2HD0\", ival)")))
+
+
+(define_constraint "N1SD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1SD0\", ival)")))
+
+
+(define_constraint "N1HS0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HS0\", ival)")))
+
+
+(define_constraint "N1HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HD0\", ival)")))
+
+
+(define_constraint "N0SD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0SD0\", ival)")))
+
+
+(define_constraint "N0HS0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HS0\", ival)")))
+
+
+(define_constraint "N0HD0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HD0\", ival)")))
+
+
+(define_constraint "NxQDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQDF\", ival)")))
+
+
+(define_constraint "N1SDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1SDF\", ival)")))
+
+
+(define_constraint "N0SDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0SDF\", ival)")))
+
+
+(define_constraint "N3HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"3HDF\", ival)")))
+
+
+(define_constraint "N2HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"2HDF\", ival)")))
+
+
+(define_constraint "N1HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HDF\", ival)")))
+
+
+(define_constraint "N0HDF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HDF\", ival)")))
+
+
+(define_constraint "N0HSF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"0HSF\", ival)")))
+
+
+(define_constraint "N1HSF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"1HSF\", ival)")))
+
+
+(define_constraint "NxQSF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQSF\", ival)")))
+
+
+(define_constraint "NxQHF"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQHF\", ival)")))
+
+
+(define_constraint "NxQH0"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "s390_N_constraint_str (\"xQH0\", ival)")))
+
+
+
+
+;;
+;; Double-letter constraints starting with O follow.
+;;
+
+
+(define_constraint "Os"
+  "@internal
+   Signed extended immediate value (-2G .. 2G-1).
+   This constraint will only match if the machine provides
+   the extended-immediate facility."
+  (and (match_code "const_int")
+       (match_test "s390_O_constraint_str ('s', ival)")))
+
+
+(define_constraint "Op"
+  "@internal
+   Positive extended immediate value (0 .. 4G-1).
+   This constraint will only match if the machine provides
+   the extended-immediate facility."
+  (and (match_code "const_int")
+       (match_test "s390_O_constraint_str ('p', ival)")))
+
+
+(define_constraint "On"
+  "@internal
+   Negative extended immediate value (-4G+1 .. -1).
+   This constraint will only match if the machine provides
+   the extended-immediate facility."
+  (and (match_code "const_int")
+       (match_test "s390_O_constraint_str ('n', ival)")))
+
+
+
+
+;;
+;; Memory constraints follow.
+;;
+
+(define_memory_constraint "Q"
+  "Memory reference without index register and with short displacement"
+  (match_test "s390_mem_constraint (\"Q\", op)"))
+
+
+(define_memory_constraint "R"
+  "Memory reference with index register and short displacement"
+  (match_test "s390_mem_constraint (\"R\", op)"))
+
+
+(define_memory_constraint "S"
+  "Memory reference without index register but with long displacement"
+  (match_test "s390_mem_constraint (\"S\", op)"))
+
+
+(define_memory_constraint "T"
+  "Memory reference with index register and long displacement"
+  (match_test "s390_mem_constraint (\"T\", op)"))
+
+
+(define_memory_constraint "b"
+  "Memory reference whose address is a naturally aligned symbol reference."
+  (match_test "MEM_P (op)
+               && s390_check_symref_alignment (XEXP (op, 0),
+                                               GET_MODE_SIZE (GET_MODE (op)))"))
+
+(define_memory_constraint "e"
+  "Matches all memory references available on the current architecture
+level.  This constraint will never be used and using it in an inline
+assembly is *always* a bug since there is no instruction accepting all
+those addresses.  It just serves as a placeholder for a generic memory
+constraint."
+  (match_test "strict_memory_address_p (GET_MODE (op), op)"))
+
+; This defines 'm' as normal memory constraint.  This is only possible
+; since the standard memory constraint is re-defined in s390.h using
+; the TARGET_MEM_CONSTRAINT macro.
+(define_memory_constraint "m"
+  "Matches the most general memory address for pre-z10 machines."
+  (match_test "s390_mem_constraint (\"R\", op)
+               || s390_mem_constraint (\"T\", op)"))
+
+(define_memory_constraint "AQ"
+  "@internal
+   Offsettable memory reference without index register and with short displacement"
+  (match_test "s390_mem_constraint (\"AQ\", op)"))
+
+
+(define_memory_constraint "AR"
+  "@internal
+   Offsettable memory reference with index register and short displacement"
+  (match_test "s390_mem_constraint (\"AR\", op)"))
+
+
+(define_memory_constraint "AS"
+  "@internal
+   Offsettable memory reference without index register but with long displacement"
+  (match_test "s390_mem_constraint (\"AS\", op)"))
+
+
+(define_memory_constraint "AT"
+  "@internal
+   Offsettable memory reference with index register and long displacement"
+  (match_test "s390_mem_constraint (\"AT\", op)"))
+
+
+
+(define_constraint "BQ"
+  "@internal
+   Memory reference without index register and with short
+   displacement that does *not* refer to a literal pool entry."
+  (match_test "s390_mem_constraint (\"BQ\", op)"))
+
+
+(define_constraint "BR"
+  "@internal
+   Memory reference with index register and short displacement that
+   does *not* refer to a literal pool entry. "
+  (match_test "s390_mem_constraint (\"BR\", op)"))
+
+
+(define_constraint "BS"
+  "@internal
+   Memory reference without index register but with long displacement
+   that does *not* refer to a literal pool entry. "
+  (match_test "s390_mem_constraint (\"BS\", op)"))
+
+
+(define_constraint "BT"
+  "@internal
+   Memory reference with index register and long displacement that
+   does *not* refer to a literal pool entry. "
+  (match_test "s390_mem_constraint (\"BT\", op)"))
+
+
+(define_address_constraint "U"
+  "Pointer with short displacement. (deprecated - use ZQZR)"
+  (match_test "s390_mem_constraint (\"U\", op)"))
+
+(define_address_constraint "W"
+  "Pointer with long displacement. (deprecated - use ZSZT)"
+  (match_test "s390_mem_constraint (\"W\", op)"))
+
+
+(define_address_constraint "ZQ"
+  "Pointer without index register and with short displacement."
+  (match_test "s390_mem_constraint (\"ZQ\", op)"))
+
+(define_address_constraint "ZR"
+  "Pointer with index register and short displacement."
+  (match_test "s390_mem_constraint (\"ZR\", op)"))
+
+(define_address_constraint "ZS"
+  "Pointer without index register but with long displacement."
+  (match_test "s390_mem_constraint (\"ZS\", op)"))
+
+(define_address_constraint "ZT"
+  "Pointer with index register and long displacement."
+  (match_test "s390_mem_constraint (\"ZT\", op)"))
diff --git a/gcc/config/s390/linux-unwind.h b/gcc/config/s390/linux-unwind.h
new file mode 100644
index 000000000..558087fad
--- /dev/null
+++ b/gcc/config/s390/linux-unwind.h
@@ -0,0 +1,130 @@
+/* DWARF2 EH unwinding support for S/390 Linux.
+   Copyright (C) 2004, 2005, 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#define MD_FALLBACK_FRAME_STATE_FOR s390_fallback_frame_state
+
+static _Unwind_Reason_Code
+s390_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  long new_cfa;
+  int i;
+
+  typedef struct
+  {
+    unsigned long psw_mask;
+    unsigned long psw_addr;
+    unsigned long gprs[16];
+    unsigned int  acrs[16];
+    unsigned int  fpc;
+    unsigned int  __pad;
+    double        fprs[16];
+  } __attribute__ ((__aligned__ (8))) sigregs_;
+
+  sigregs_ *regs;
+  int *signo;
+
+  /* svc $__NR_sigreturn or svc $__NR_rt_sigreturn  */
+  if (pc[0] != 0x0a || (pc[1] != 119 && pc[1] != 173))
+    return _URC_END_OF_STACK;
+
+  /* Legacy frames:
+       old signal mask (8 bytes)
+       pointer to sigregs (8 bytes) - points always to next location
+       sigregs
+       retcode
+     This frame layout was used on kernels < 2.6.9 for non-RT frames,
+     and on kernels < 2.4.13 for RT frames as well.  Note that we need
+     to look at RA to detect this layout -- this means that if you use
+     sa_restorer to install a different signal restorer on a legacy
+     kernel, unwinding from signal frames will not work.  */
+  if (context->ra == context->cfa + 16 + sizeof (sigregs_))
+    {
+      regs = (sigregs_ *)(context->cfa + 16);
+      signo = NULL;
+    }
+
+  /* New-style RT frame:
+     retcode + alignment (8 bytes)
+     siginfo (128 bytes)
+     ucontext (contains sigregs)  */
+  else if (pc[1] == 173 /* __NR_rt_sigreturn */)
+    {
+      struct ucontext_
+      {
+	unsigned long     uc_flags;
+	struct ucontext_ *uc_link;
+	unsigned long     uc_stack[3];
+	sigregs_          uc_mcontext;
+      } *uc = context->cfa + 8 + 128;
+
+      regs = &uc->uc_mcontext;
+      signo = context->cfa + sizeof(long);
+    }
+
+  /* New-style non-RT frame:
+     old signal mask (8 bytes)
+     pointer to sigregs (followed by signal number)  */
+  else
+    {
+      regs = *(sigregs_ **)(context->cfa + 8);
+      signo = (int *)(regs + 1);
+    }
+
+  new_cfa = regs->gprs[15] + 16*sizeof(long) + 32;
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 15;
+  fs->regs.cfa_offset =
+    new_cfa - (long) context->cfa + 16*sizeof(long) + 32;
+
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset =
+	(long)&regs->gprs[i] - new_cfa;
+    }
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[16+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[16+i].loc.offset =
+	(long)&regs->fprs[i] - new_cfa;
+    }
+
+  /* Load return addr from PSW into dummy register 32.  */
+
+  fs->regs.reg[32].how = REG_SAVED_OFFSET;
+  fs->regs.reg[32].loc.offset = (long)&regs->psw_addr - new_cfa;
+  fs->retaddr_column = 32;
+  /* SIGILL, SIGFPE and SIGTRAP are delivered with psw_addr
+     after the faulting instruction rather than before it.
+     Don't set FS->signal_frame in that case.  */
+  if (!signo || (*signo != 4 && *signo != 5 && *signo != 8))
+    fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
diff --git a/gcc/config/s390/linux.h b/gcc/config/s390/linux.h
new file mode 100644
index 000000000..95cead119
--- /dev/null
+++ b/gcc/config/s390/linux.h
@@ -0,0 +1,104 @@
+/* Definitions for Linux for S/390.
+   Copyright (C) 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _LINUX_H
+#define _LINUX_H
+
+/* Target specific version string.  */
+
+#ifdef DEFAULT_TARGET_64BIT
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (Linux for zSeries)");
+#else
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (Linux for S/390)");
+#endif
+
+
+/* Target specific type definitions.  */
+
+/* ??? Do we really want long as size_t on 31-bit?  */
+#undef  SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "long unsigned int")
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Target specific preprocessor settings.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      LINUX_TARGET_OS_CPP_BUILTINS();		\
+    }						\
+  while (0)
+
+
+/* Target specific assembler settings.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC "%{m31&m64}%{mesa&mzarch}%{march=*}"
+
+
+/* Target specific linker settings.  */
+
+#ifdef DEFAULT_TARGET_64BIT
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m31" }
+#endif
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER64 "/lib/ld64.so.1"
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{m31:-m elf_s390}%{m64:-m elf64_s390} \
+   %{shared:-shared} \
+   %{!shared: \
+      %{static:-static} \
+      %{!static: \
+	%{rdynamic:-export-dynamic} \
+	%{m31:-dynamic-linker " LINUX_DYNAMIC_LINKER32 "} \
+	%{m64:-dynamic-linker " LINUX_DYNAMIC_LINKER64 "}}}"
+
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#define MD_UNWIND_SUPPORT "config/s390/linux-unwind.h"
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* s390 glibc provides __stack_chk_guard in 0x14(tp),
+   s390x glibc provides it at 0x28(tp).  */
+#define TARGET_THREAD_SSP_OFFSET        (TARGET_64BIT ? 0x28 : 0x14)
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+#endif
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
new file mode 100644
index 000000000..9d619fbc0
--- /dev/null
+++ b/gcc/config/s390/predicates.md
@@ -0,0 +1,406 @@
+;; Predicate definitions for S/390 and zSeries.
+;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+;; Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                Ulrich Weigand (uweigand@de.ibm.com).
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; OP is the current operation.
+;; MODE is the current operation mode.
+
+;; operands --------------------------------------------------------------
+
+;; Return true if OP a (const_int 0) operand.
+
+(define_predicate "const0_operand"
+  (and (match_code "const_int, const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return true if OP is constant.
+
+(define_special_predicate "consttable_operand"
+  (and (match_code "symbol_ref, label_ref, const, const_int, const_double")
+       (match_test "CONSTANT_P (op)")))
+
+;; Return true if OP is a valid S-type operand.
+
+(define_predicate "s_operand"
+  (and (match_code "subreg, mem")
+       (match_operand 0 "general_operand"))
+{
+  /* Just like memory_operand, allow (subreg (mem ...))
+     after reload.  */
+  if (reload_completed
+      && GET_CODE (op) == SUBREG
+      && GET_CODE (SUBREG_REG (op)) == MEM)
+    op = SUBREG_REG (op);
+
+  if (GET_CODE (op) != MEM)
+    return false;
+  if (!s390_legitimate_address_without_index_p (op))
+    return false;
+
+  return true;
+})
+
+;; Return true if OP is a valid operand for the BRAS instruction.
+;; Allow SYMBOL_REFs and @PLT stubs.
+
+(define_special_predicate "bras_sym_operand"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!flag_pic || SYMBOL_REF_LOCAL_P (op)"))
+       (and (match_code "const")
+	    (and (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+		 (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_PLT")))))
+
+;; Return true if OP is a PLUS that is not a legitimate
+;; operand for the LA instruction.
+
+(define_predicate "s390_plus_operand"
+  (and (match_code "plus")
+       (and (match_test "mode == Pmode")
+	    (match_test "!legitimate_la_operand_p (op)"))))
+
+;; Return true if OP is a valid operand as shift count or setmem.
+
+(define_predicate "shift_count_or_setmem_operand"
+  (match_code "reg, subreg, plus, const_int")
+{
+  HOST_WIDE_INT offset;
+  rtx base;
+
+  /* Extract base register and offset.  */
+  if (!s390_decompose_shift_count (op, &base, &offset))
+    return false;
+
+  /* Don't allow any non-base hard registers.  Doing so without
+     confusing reload and/or regrename would be tricky, and doesn't
+     buy us much anyway.  */
+  if (base && REGNO (base) < FIRST_PSEUDO_REGISTER && !ADDR_REG_P (base))
+    return false;
+
+  /* Unfortunately we have to reject constants that are invalid
+     for an address, or else reload will get confused.  */
+  if (!DISP_IN_RANGE (offset))
+    return false;
+
+  return true;
+})
+
+;;  Return true if OP a valid operand for the LARL instruction.
+
+(define_predicate "larl_operand"
+  (match_code "label_ref, symbol_ref, const, const_int, const_double")
+{
+  /* Allow labels and local symbols.  */
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+  if (GET_CODE (op) == SYMBOL_REF)
+    return (!SYMBOL_REF_ALIGN1_P (op)
+	    && SYMBOL_REF_TLS_MODEL (op) == 0
+	    && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+
+  /* Everything else must have a CONST, so strip it.  */
+  if (GET_CODE (op) != CONST)
+    return false;
+  op = XEXP (op, 0);
+
+  /* Allow adding *even* in-range constants.  */
+  if (GET_CODE (op) == PLUS)
+    {
+      if (GET_CODE (XEXP (op, 1)) != CONST_INT
+          || (INTVAL (XEXP (op, 1)) & 1) != 0)
+        return false;
+      if (INTVAL (XEXP (op, 1)) >= (HOST_WIDE_INT)1 << 31
+	  || INTVAL (XEXP (op, 1)) < -((HOST_WIDE_INT)1 << 31))
+        return false;
+      op = XEXP (op, 0);
+    }
+
+  /* Labels and local symbols allowed here as well.  */
+  if (GET_CODE (op) == LABEL_REF)
+    return true;
+  if (GET_CODE (op) == SYMBOL_REF)
+    return ((SYMBOL_REF_FLAGS (op) & SYMBOL_FLAG_ALIGN1) == 0
+	    && SYMBOL_REF_TLS_MODEL (op) == 0
+	    && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+
+  /* Now we must have a @GOTENT offset or @PLT stub
+     or an @INDNTPOFF TLS offset.  */
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_GOTENT)
+    return true;
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_PLT)
+    return true;
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_INDNTPOFF)
+    return true;
+
+  return false;
+})
+
+;; operators --------------------------------------------------------------
+
+;; Return nonzero if OP is a valid comparison operator
+;; for a branch condition.
+
+(define_predicate "s390_comparison"
+  (match_code "eq, ne, lt, gt, le, ge, ltu, gtu, leu, geu,
+	       uneq, unlt, ungt, unle, unge, ltgt,
+	       unordered, ordered")
+{
+  if (GET_CODE (XEXP (op, 0)) != REG
+      || REGNO (XEXP (op, 0)) != CC_REGNUM
+      || XEXP (op, 1) != const0_rtx)
+    return false;
+
+  return (s390_branch_condition_mask (op) >= 0);
+})
+
+;; Return true if op is the cc register.
+(define_predicate "cc_reg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CC_REGNUM")))
+
+(define_predicate "s390_signed_integer_comparison"
+  (match_code "eq, ne, lt, gt, le, ge")
+{
+  return (s390_compare_and_branch_condition_mask (op) >= 0);
+})
+
+(define_predicate "s390_unsigned_integer_comparison"
+  (match_code "eq, ne, ltu, gtu, leu, geu")
+{
+  return (s390_compare_and_branch_condition_mask (op) >= 0);
+})
+
+;; Return nonzero if OP is a valid comparison operator for the
+;; cstore expanders -- respectively cstorecc4 and integer cstore.
+(define_predicate "s390_eqne_operator"
+  (match_code "eq, ne"))
+
+(define_predicate "s390_scond_operator"
+  (match_code "ltu, gtu, leu, geu"))
+
+(define_predicate "s390_brx_operator"
+  (match_code "le, gt"))
+
+;; Return nonzero if OP is a valid comparison operator
+;; for an ALC condition.
+
+(define_predicate "s390_alc_comparison"
+  (match_code "zero_extend, sign_extend, ltu, gtu, leu, geu")
+{
+  while (GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND)
+    op = XEXP (op, 0);
+
+  if (!COMPARISON_P (op))
+    return false;
+
+  if (GET_CODE (XEXP (op, 0)) != REG
+      || REGNO (XEXP (op, 0)) != CC_REGNUM
+      || XEXP (op, 1) != const0_rtx)
+    return false;
+
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case CCL1mode:
+      return GET_CODE (op) == LTU;
+
+    case CCL2mode:
+      return GET_CODE (op) == LEU;
+
+    case CCL3mode:
+      return GET_CODE (op) == GEU;
+
+    case CCUmode:
+      return GET_CODE (op) == GTU;
+
+    case CCURmode:
+      return GET_CODE (op) == LTU;
+
+    case CCSmode:
+      return GET_CODE (op) == UNGT;
+
+    case CCSRmode:
+      return GET_CODE (op) == UNLT;
+
+    default:
+      return false;
+    }
+})
+
+;; Return nonzero if OP is a valid comparison operator
+;; for an SLB condition.
+
+(define_predicate "s390_slb_comparison"
+  (match_code "zero_extend, sign_extend, ltu, gtu, leu, geu")
+{
+  while (GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND)
+    op = XEXP (op, 0);
+
+  if (!COMPARISON_P (op))
+    return false;
+
+  if (GET_CODE (XEXP (op, 0)) != REG
+      || REGNO (XEXP (op, 0)) != CC_REGNUM
+      || XEXP (op, 1) != const0_rtx)
+    return false;
+
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case CCL1mode:
+      return GET_CODE (op) == GEU;
+
+    case CCL2mode:
+      return GET_CODE (op) == GTU;
+
+    case CCL3mode:
+      return GET_CODE (op) == LTU;
+
+    case CCUmode:
+      return GET_CODE (op) == LEU;
+
+    case CCURmode:
+      return GET_CODE (op) == GEU;
+
+    case CCSmode:
+      return GET_CODE (op) == LE;
+
+    case CCSRmode:
+      return GET_CODE (op) == GE;
+
+    default:
+      return false;
+    }
+})
+
+;; Return true if OP is a load multiple operation.  It is known to be a
+;; PARALLEL and the first section will be tested.
+
+(define_special_predicate "load_multiple_operation"
+  (match_code "parallel")
+{
+  enum machine_mode elt_mode;
+  int count = XVECLEN (op, 0);
+  unsigned int dest_regno;
+  rtx src_addr;
+  int i, off;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return false;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+  src_addr = XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0);
+  elt_mode = GET_MODE (SET_DEST (XVECEXP (op, 0, 0)));
+
+  /* Check, is base, or base + displacement.  */
+
+  if (GET_CODE (src_addr) == REG)
+    off = 0;
+  else if (GET_CODE (src_addr) == PLUS
+	   && GET_CODE (XEXP (src_addr, 0)) == REG
+	   && GET_CODE (XEXP (src_addr, 1)) == CONST_INT)
+    {
+      off = INTVAL (XEXP (src_addr, 1));
+      src_addr = XEXP (src_addr, 0);
+    }
+  else
+    return false;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_DEST (elt)) != REG
+	  || GET_MODE (SET_DEST (elt)) != elt_mode
+	  || REGNO (SET_DEST (elt)) != dest_regno + i
+	  || GET_CODE (SET_SRC (elt)) != MEM
+	  || GET_MODE (SET_SRC (elt)) != elt_mode
+	  || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+	  || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1))
+	     != off + i * GET_MODE_SIZE (elt_mode))
+	return false;
+    }
+
+  return true;
+})
+
+;; Return true if OP is a store multiple operation.  It is known to be a
+;; PARALLEL and the first section will be tested.
+
+(define_special_predicate "store_multiple_operation"
+  (match_code "parallel")
+{
+  enum machine_mode elt_mode;
+  int count = XVECLEN (op, 0);
+  unsigned int src_regno;
+  rtx dest_addr;
+  int i, off;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return false;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+  dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, 0)), 0);
+  elt_mode = GET_MODE (SET_SRC (XVECEXP (op, 0, 0)));
+
+  /* Check, is base, or base + displacement.  */
+
+  if (GET_CODE (dest_addr) == REG)
+    off = 0;
+  else if (GET_CODE (dest_addr) == PLUS
+	   && GET_CODE (XEXP (dest_addr, 0)) == REG
+	   && GET_CODE (XEXP (dest_addr, 1)) == CONST_INT)
+    {
+      off = INTVAL (XEXP (dest_addr, 1));
+      dest_addr = XEXP (dest_addr, 0);
+    }
+  else
+    return false;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+	  || GET_CODE (SET_SRC (elt)) != REG
+	  || GET_MODE (SET_SRC (elt)) != elt_mode
+	  || REGNO (SET_SRC (elt)) != src_regno + i
+	  || GET_CODE (SET_DEST (elt)) != MEM
+	  || GET_MODE (SET_DEST (elt)) != elt_mode
+	  || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+	  || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+	  || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+	  || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1))
+	     != off + i * GET_MODE_SIZE (elt_mode))
+	return false;
+    }
+  return true;
+})
diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
new file mode 100644
index 000000000..be2bf6ea7
--- /dev/null
+++ b/gcc/config/s390/s390-modes.def
@@ -0,0 +1,174 @@
+/* Definitions of target machine for GNU compiler, for IBM S/390
+   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 256-bit integer mode is needed for STACK_SAVEAREA_MODE.  */
+INT_MODE (OI, 32);
+
+/* Define TFmode to work around reload problem PR 20927.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Add any extra modes needed to represent the condition code.  */
+
+/*
+
+Condition Codes
+
+Check for zero
+
+CCZ:  EQ          NE           NE          NE
+CCZ1: EQ          NE                                  (CS)
+
+Unsigned compares
+
+CCU:  EQ          LTU          GTU         NE         (CLG/R, CL/R/Y, CLM/Y, CLI/Y)
+CCUR: EQ          GTU          LTU         NE         (CLGF/R)
+
+Signed compares
+
+CCS:  EQ          LT           GT          UNORDERED  (LTGFR, LTGR, LTR, ICM/Y,
+                                                       LTDBR, LTDR, LTEBR, LTER,
+                                                       CG/R, C/R/Y, CGHI, CHI,
+                                                       CDB/R, CD/R, CEB/R, CE/R,
+                                                       ADB/R, AEB/R, SDB/R, SEB/R,
+                                                       SRAG, SRA, SRDA)
+CCSR: EQ          GT           LT          UNORDERED  (CGF/R, CH/Y)
+
+Condition codes resulting from add with overflow
+
+CCA:  EQ          LT           GT          Overflow
+CCAP: EQ          LT           GT          LT         (AGHI, AHI)
+CCAN: EQ          LT           GT          GT         (AGHI, AHI)
+
+Condition codes of unsigned adds and subs
+
+CCL:  EQ          NE           EQ          NE         (ALGF/R, ALG/R, AL/R/Y,
+                                                       ALCG/R, ALC/R,
+                                                       SLGF/R, SLG/R, SL/R/Y,
+                                                       SLBG/R, SLB/R)
+CCL1: GEU         GEU          LTU         LTU        (ALG/R, AL/R/Y)
+CCL2: GTU         GTU          LEU         LEU        (SLG/R, SL/R/Y)
+CCL3: EQ          LTU          EQ          GTU        (SLG/R, SL/R/Y)
+
+Test under mask checks
+
+CCT:  EQ          NE           NE          NE         (ICM/Y, TML, CG/R, CGHI,
+                                                       C/R/Y, CHI, NG/R, N/R/Y,
+                                                       OG/R, O/R/Y, XG/R, X/R/Y)
+CCT1: NE          EQ           NE          NE         (TMH, TML)
+CCT2: NE          NE           EQ          NE         (TMH, TML)
+CCT3: NE          NE           NE          EQ         (TMH, TML)
+
+CCA and CCT modes are request only modes. These modes are never returned by
+s390_select_cc_mode. They are only intended to match other modes.
+
+Requested mode            -> Destination CC register mode
+
+CCS, CCU, CCT, CCSR, CCUR -> CCZ
+CCA                       -> CCAP, CCAN
+
+
+*** Comments ***
+
+CCAP, CCAN
+
+The CC obtained from add instruction usually can't be used for comparisons
+because its coupling with overflow flag. In case of an overflow the
+less than/greater than data are lost. Nevertheless a comparison can be done
+whenever immediate values are involved because they are known at compile time.
+If you know whether the used constant is positive or negative you can predict
+the sign of the result even in case of an overflow.
+
+
+CCT, CCT1, CCT2, CCT3
+
+If bits of an integer masked with an AND instruction are checked, the test under
+mask instructions turn out to be very handy for a set of special cases.
+The simple cases are checks whether all masked bits are zero or ones:
+
+  int a;
+  if ((a & (16 + 128)) == 0)          -> CCT/CCZ
+  if ((a & (16 + 128)) == 16 + 128)   -> CCT3
+
+Using two extra modes makes it possible to do complete checks on two bits of an
+integer (This is possible on register operands only. TM does not provide the
+information necessary for CCT1 and CCT2 modes.):
+
+  int a;
+  if ((a & (16 + 128)) == 16)         -> CCT1
+  if ((a & (16 + 128)) == 128)        -> CCT2
+
+
+CCSR, CCUR
+
+There are several instructions comparing 32 bit with 64-bit unsigned/signed
+values. Such instructions can be considered to have a builtin zero/sign_extend.
+The problem is that in the RTL (to be canonical) the zero/sign extended operand
+has to be the first one but the machine instructions like it the other way
+around. The following both modes can be considered as CCS and CCU modes with
+exchanged operands.
+
+
+CCL1, CCL2
+
+These modes represent the result of overflow checks.
+
+if (a + b < a) -> CCL1 state of the carry bit   (CC2 | CC3)
+if (a - b > a) -> CCL2 state of the borrow bit  (CC0 | CC1)
+
+They are used when multi word numbers are computed dealing one SImode part after
+another or whenever manual overflow checks like the examples above are
+compiled.
+
+
+CCL3
+
+A logical subtract instruction sets the borrow bit in case of an overflow.
+The resulting condition code of those instructions is represented by the
+CCL3 mode. Together with the CCU mode this mode is used for jumpless
+implementations of several if-constructs - see s390_expand_addcc for more
+details.
+
+CCZ1
+
+The compare and swap instructions sets the condition code to 0/1 if the
+operands were equal/unequal. The CCZ1 mode ensures the result can be
+effectively placed into a register.
+
+*/
+
+
+CC_MODE (CCZ);
+CC_MODE (CCZ1);
+CC_MODE (CCA);
+CC_MODE (CCAP);
+CC_MODE (CCAN);
+CC_MODE (CCL);
+CC_MODE (CCL1);
+CC_MODE (CCL2);
+CC_MODE (CCL3);
+CC_MODE (CCU);
+CC_MODE (CCUR);
+CC_MODE (CCS);
+CC_MODE (CCSR);
+CC_MODE (CCT);
+CC_MODE (CCT1);
+CC_MODE (CCT2);
+CC_MODE (CCT3);
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
new file mode 100644
index 000000000..399d75715
--- /dev/null
+++ b/gcc/config/s390/s390-protos.h
@@ -0,0 +1,114 @@
+/* Definitions of target machine for GNU compiler, for IBM S/390.
+   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+
+   Contributed by Hartmut Penner (hpenner@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+
+/* Prototypes of functions used for constraint evaluation in
+   constraints.c.  */
+
+extern int s390_mem_constraint (const char *str, rtx op);
+extern int s390_O_constraint_str (const char c, HOST_WIDE_INT value);
+extern int s390_N_constraint_str (const char *str, HOST_WIDE_INT value);
+extern int s390_float_const_zero_p (rtx value);
+extern bool s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment);
+
+
+/* Declare functions in s390.c.  */
+
+extern HOST_WIDE_INT s390_initial_elimination_offset (int, int);
+extern void s390_emit_prologue (void);
+extern void s390_emit_epilogue (bool);
+extern void s390_function_profiler (FILE *, int);
+extern void s390_set_has_landing_pad_p (bool);
+extern bool s390_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int s390_class_max_nregs (enum reg_class, enum machine_mode);
+
+#ifdef RTX_CODE
+extern int s390_extra_constraint_str (rtx, int, const char *);
+extern int s390_const_ok_for_constraint_p (HOST_WIDE_INT, int, const char *);
+extern int s390_const_double_ok_for_constraint_p (rtx, int, const char *);
+extern int s390_single_part (rtx, enum machine_mode, enum machine_mode, int);
+extern unsigned HOST_WIDE_INT s390_extract_part (rtx, enum machine_mode, int);
+extern bool s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT, int, int *, int *);
+extern bool s390_split_ok_p (rtx, rtx, enum machine_mode, int);
+extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT);
+extern bool s390_offset_p (rtx, rtx, rtx);
+extern int tls_symbolic_operand (rtx);
+
+extern bool s390_match_ccmode (rtx, enum machine_mode);
+extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool);
+extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
+extern void s390_canonicalize_comparison (enum rtx_code *, rtx *, rtx *);
+extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
+extern void s390_emit_jump (rtx, rtx);
+extern bool symbolic_reference_mentioned_p (rtx);
+extern bool tls_symbolic_reference_mentioned_p (rtx);
+extern bool legitimate_la_operand_p (rtx);
+extern bool preferred_la_operand_p (rtx, rtx);
+extern int legitimate_pic_operand_p (rtx);
+extern int legitimate_constant_p (rtx);
+extern bool legitimate_reload_constant_p (rtx);
+extern rtx legitimize_pic_address (rtx, rtx);
+extern rtx legitimize_reload_address (rtx, enum machine_mode, int, int);
+extern enum reg_class s390_secondary_input_reload_class (enum reg_class,
+							 enum machine_mode,
+							 rtx);
+extern enum reg_class s390_secondary_output_reload_class (enum reg_class,
+							  enum machine_mode,
+							  rtx);
+extern void s390_reload_larl_operand (rtx , rtx , rtx);
+extern void s390_reload_symref_address (rtx , rtx , rtx , bool);
+extern void s390_expand_plus_operand (rtx, rtx, rtx);
+extern void emit_symbolic_move (rtx *);
+extern void s390_load_address (rtx, rtx);
+extern void s390_expand_movmem (rtx, rtx, rtx);
+extern void s390_expand_setmem (rtx, rtx, rtx);
+extern void s390_expand_cmpmem (rtx, rtx, rtx, rtx);
+extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
+extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx);
+extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
+				rtx, rtx, rtx, bool);
+extern rtx s390_return_addr_rtx (int, rtx);
+extern rtx s390_back_chain_rtx (void);
+extern rtx s390_emit_call (rtx, rtx, rtx, rtx);
+extern void s390_expand_logical_operator (enum rtx_code,
+					  enum machine_mode, rtx *);
+extern bool s390_logical_operator_ok_p (rtx *);
+extern void s390_narrow_logical_operator (enum rtx_code, rtx *, rtx *);
+extern void s390_split_access_reg (rtx, rtx *, rtx *);
+
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern void s390_output_pool_entry (rtx, enum machine_mode, unsigned int);
+extern int s390_label_align (rtx);
+extern int s390_agen_dep_p (rtx, rtx);
+extern rtx s390_load_got (void);
+extern rtx s390_get_thread_pointer (void);
+extern void s390_emit_tpf_eh_return (rtx);
+extern bool s390_legitimate_address_without_index_p (rtx);
+extern bool s390_decompose_shift_count (rtx, rtx *, HOST_WIDE_INT *);
+extern int s390_branch_condition_mask (rtx);
+extern int s390_compare_and_branch_condition_mask (rtx);
+
+#endif /* RTX_CODE */
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
new file mode 100644
index 000000000..9b275b0ae
--- /dev/null
+++ b/gcc/config/s390/s390.c
@@ -0,0 +1,10845 @@
+/* Subroutines used for code generation on IBM S/390 and zSeries
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com) and
+                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "reload.h"
+#include "diagnostic-core.h"
+#include "basic-block.h"
+#include "integrate.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "optabs.h"
+#include "gimple.h"
+#include "df.h"
+#include "params.h"
+#include "cfgloop.h"
+
+
+/* Define the specific costs for a given cpu.  */
+
+struct processor_costs
+{
+  /* multiplication */
+  const int m;        /* cost of an M instruction.  */
+  const int mghi;     /* cost of an MGHI instruction.  */
+  const int mh;       /* cost of an MH instruction.  */
+  const int mhi;      /* cost of an MHI instruction.  */
+  const int ml;       /* cost of an ML instruction.  */
+  const int mr;       /* cost of an MR instruction.  */
+  const int ms;       /* cost of an MS instruction.  */
+  const int msg;      /* cost of an MSG instruction.  */
+  const int msgf;     /* cost of an MSGF instruction.  */
+  const int msgfr;    /* cost of an MSGFR instruction.  */
+  const int msgr;     /* cost of an MSGR instruction.  */
+  const int msr;      /* cost of an MSR instruction.  */
+  const int mult_df;  /* cost of multiplication in DFmode.  */
+  const int mxbr;
+  /* square root */
+  const int sqxbr;    /* cost of square root in TFmode.  */
+  const int sqdbr;    /* cost of square root in DFmode.  */
+  const int sqebr;    /* cost of square root in SFmode.  */
+  /* multiply and add */
+  const int madbr;    /* cost of multiply and add in DFmode.  */
+  const int maebr;    /* cost of multiply and add in SFmode.  */
+  /* division */
+  const int dxbr;
+  const int ddbr;
+  const int debr;
+  const int dlgr;
+  const int dlr;
+  const int dr;
+  const int dsgfr;
+  const int dsgr;
+};
+
+const struct processor_costs *s390_cost;
+
+static const
+struct processor_costs z900_cost =
+{
+  COSTS_N_INSNS (5),     /* M     */
+  COSTS_N_INSNS (10),    /* MGHI  */
+  COSTS_N_INSNS (5),     /* MH    */
+  COSTS_N_INSNS (4),     /* MHI   */
+  COSTS_N_INSNS (5),     /* ML    */
+  COSTS_N_INSNS (5),     /* MR    */
+  COSTS_N_INSNS (4),     /* MS    */
+  COSTS_N_INSNS (15),    /* MSG   */
+  COSTS_N_INSNS (7),     /* MSGF  */
+  COSTS_N_INSNS (7),     /* MSGFR */
+  COSTS_N_INSNS (10),    /* MSGR  */
+  COSTS_N_INSNS (4),     /* MSR   */
+  COSTS_N_INSNS (7),     /* multiplication in DFmode */
+  COSTS_N_INSNS (13),    /* MXBR */
+  COSTS_N_INSNS (136),   /* SQXBR */
+  COSTS_N_INSNS (44),    /* SQDBR */
+  COSTS_N_INSNS (35),    /* SQEBR */
+  COSTS_N_INSNS (18),    /* MADBR */
+  COSTS_N_INSNS (13),    /* MAEBR */
+  COSTS_N_INSNS (134),   /* DXBR */
+  COSTS_N_INSNS (30),    /* DDBR */
+  COSTS_N_INSNS (27),    /* DEBR */
+  COSTS_N_INSNS (220),   /* DLGR */
+  COSTS_N_INSNS (34),    /* DLR */
+  COSTS_N_INSNS (34),    /* DR */
+  COSTS_N_INSNS (32),    /* DSGFR */
+  COSTS_N_INSNS (32),    /* DSGR */
+};
+
+static const
+struct processor_costs z990_cost =
+{
+  COSTS_N_INSNS (4),     /* M     */
+  COSTS_N_INSNS (2),     /* MGHI  */
+  COSTS_N_INSNS (2),     /* MH    */
+  COSTS_N_INSNS (2),     /* MHI   */
+  COSTS_N_INSNS (4),     /* ML    */
+  COSTS_N_INSNS (4),     /* MR    */
+  COSTS_N_INSNS (5),     /* MS    */
+  COSTS_N_INSNS (6),     /* MSG   */
+  COSTS_N_INSNS (4),     /* MSGF  */
+  COSTS_N_INSNS (4),     /* MSGFR */
+  COSTS_N_INSNS (4),     /* MSGR  */
+  COSTS_N_INSNS (4),     /* MSR   */
+  COSTS_N_INSNS (1),     /* multiplication in DFmode */
+  COSTS_N_INSNS (28),    /* MXBR */
+  COSTS_N_INSNS (130),   /* SQXBR */
+  COSTS_N_INSNS (66),    /* SQDBR */
+  COSTS_N_INSNS (38),    /* SQEBR */
+  COSTS_N_INSNS (1),     /* MADBR */
+  COSTS_N_INSNS (1),     /* MAEBR */
+  COSTS_N_INSNS (60),    /* DXBR */
+  COSTS_N_INSNS (40),    /* DDBR */
+  COSTS_N_INSNS (26),    /* DEBR */
+  COSTS_N_INSNS (176),   /* DLGR */
+  COSTS_N_INSNS (31),    /* DLR */
+  COSTS_N_INSNS (31),    /* DR */
+  COSTS_N_INSNS (31),    /* DSGFR */
+  COSTS_N_INSNS (31),    /* DSGR */
+};
+
+static const
+struct processor_costs z9_109_cost =
+{
+  COSTS_N_INSNS (4),     /* M     */
+  COSTS_N_INSNS (2),     /* MGHI  */
+  COSTS_N_INSNS (2),     /* MH    */
+  COSTS_N_INSNS (2),     /* MHI   */
+  COSTS_N_INSNS (4),     /* ML    */
+  COSTS_N_INSNS (4),     /* MR    */
+  COSTS_N_INSNS (5),     /* MS    */
+  COSTS_N_INSNS (6),     /* MSG   */
+  COSTS_N_INSNS (4),     /* MSGF  */
+  COSTS_N_INSNS (4),     /* MSGFR */
+  COSTS_N_INSNS (4),     /* MSGR  */
+  COSTS_N_INSNS (4),     /* MSR   */
+  COSTS_N_INSNS (1),     /* multiplication in DFmode */
+  COSTS_N_INSNS (28),    /* MXBR */
+  COSTS_N_INSNS (130),   /* SQXBR */
+  COSTS_N_INSNS (66),    /* SQDBR */
+  COSTS_N_INSNS (38),    /* SQEBR */
+  COSTS_N_INSNS (1),     /* MADBR */
+  COSTS_N_INSNS (1),     /* MAEBR */
+  COSTS_N_INSNS (60),    /* DXBR */
+  COSTS_N_INSNS (40),    /* DDBR */
+  COSTS_N_INSNS (26),    /* DEBR */
+  COSTS_N_INSNS (30),    /* DLGR */
+  COSTS_N_INSNS (23),    /* DLR */
+  COSTS_N_INSNS (23),    /* DR */
+  COSTS_N_INSNS (24),    /* DSGFR */
+  COSTS_N_INSNS (24),    /* DSGR */
+};
+
+static const
+struct processor_costs z10_cost =
+{
+  COSTS_N_INSNS (10),    /* M     */
+  COSTS_N_INSNS (10),    /* MGHI  */
+  COSTS_N_INSNS (10),    /* MH    */
+  COSTS_N_INSNS (10),    /* MHI   */
+  COSTS_N_INSNS (10),    /* ML    */
+  COSTS_N_INSNS (10),    /* MR    */
+  COSTS_N_INSNS (10),    /* MS    */
+  COSTS_N_INSNS (10),    /* MSG   */
+  COSTS_N_INSNS (10),    /* MSGF  */
+  COSTS_N_INSNS (10),    /* MSGFR */
+  COSTS_N_INSNS (10),    /* MSGR  */
+  COSTS_N_INSNS (10),    /* MSR   */
+  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
+  COSTS_N_INSNS (50),    /* MXBR */
+  COSTS_N_INSNS (120),   /* SQXBR */
+  COSTS_N_INSNS (52),    /* SQDBR */
+  COSTS_N_INSNS (38),    /* SQEBR */
+  COSTS_N_INSNS (1),     /* MADBR */
+  COSTS_N_INSNS (1),     /* MAEBR */
+  COSTS_N_INSNS (111),   /* DXBR */
+  COSTS_N_INSNS (39),    /* DDBR */
+  COSTS_N_INSNS (32),    /* DEBR */
+  COSTS_N_INSNS (160),   /* DLGR */
+  COSTS_N_INSNS (71),    /* DLR */
+  COSTS_N_INSNS (71),    /* DR */
+  COSTS_N_INSNS (71),    /* DSGFR */
+  COSTS_N_INSNS (71),    /* DSGR */
+};
+
+static const
+struct processor_costs z196_cost =
+{
+  COSTS_N_INSNS (7),     /* M     */
+  COSTS_N_INSNS (5),     /* MGHI  */
+  COSTS_N_INSNS (5),     /* MH    */
+  COSTS_N_INSNS (5),     /* MHI   */
+  COSTS_N_INSNS (7),     /* ML    */
+  COSTS_N_INSNS (7),     /* MR    */
+  COSTS_N_INSNS (6),     /* MS    */
+  COSTS_N_INSNS (8),     /* MSG   */
+  COSTS_N_INSNS (6),     /* MSGF  */
+  COSTS_N_INSNS (6),     /* MSGFR */
+  COSTS_N_INSNS (8),     /* MSGR  */
+  COSTS_N_INSNS (6),     /* MSR   */
+  COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
+  COSTS_N_INSNS (40),    /* MXBR B+40 */
+  COSTS_N_INSNS (100),   /* SQXBR B+100 */
+  COSTS_N_INSNS (42),    /* SQDBR B+42 */
+  COSTS_N_INSNS (28),    /* SQEBR B+28 */
+  COSTS_N_INSNS (1),     /* MADBR B */
+  COSTS_N_INSNS (1),     /* MAEBR B */
+  COSTS_N_INSNS (101),   /* DXBR B+101 */
+  COSTS_N_INSNS (29),    /* DDBR */
+  COSTS_N_INSNS (22),    /* DEBR */
+  COSTS_N_INSNS (160),   /* DLGR cracked */
+  COSTS_N_INSNS (160),   /* DLR cracked */
+  COSTS_N_INSNS (160),   /* DR expanded */
+  COSTS_N_INSNS (160),   /* DSGFR cracked */
+  COSTS_N_INSNS (160),   /* DSGR cracked */
+};
+
+extern int reload_completed;
+
+/* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
+static rtx last_scheduled_insn;
+
+/* Structure used to hold the components of a S/390 memory
+   address.  A legitimate address on S/390 is of the general
+   form
+          base + index + displacement
+   where any of the components is optional.
+
+   base and index are registers of the class ADDR_REGS,
+   displacement is an unsigned 12-bit immediate constant.  */
+
+struct s390_address
+{
+  rtx base;
+  rtx indx;
+  rtx disp;
+  bool pointer;
+  bool literal_pool;
+};
+
+/* Which cpu are we tuning for.  */
+enum processor_type s390_tune = PROCESSOR_max;
+int s390_tune_flags;
+/* Which instruction set architecture to use.  */
+enum processor_type s390_arch;
+int s390_arch_flags;
+
+HOST_WIDE_INT s390_warn_framesize = 0;
+HOST_WIDE_INT s390_stack_size = 0;
+HOST_WIDE_INT s390_stack_guard = 0;
+
+/* The following structure is embedded in the machine
+   specific part of struct function.  */
+
+struct GTY (()) s390_frame_layout
+{
+  /* Offset within stack frame.  */
+  HOST_WIDE_INT gprs_offset;
+  HOST_WIDE_INT f0_offset;
+  HOST_WIDE_INT f4_offset;
+  HOST_WIDE_INT f8_offset;
+  HOST_WIDE_INT backchain_offset;
+
+  /* Number of first and last gpr where slots in the register
+     save area are reserved for.  */
+  int first_save_gpr_slot;
+  int last_save_gpr_slot;
+
+  /* Number of first and last gpr to be saved, restored.  */
+  int first_save_gpr;
+  int first_restore_gpr;
+  int last_save_gpr;
+  int last_restore_gpr;
+
+  /* Bits standing for floating point registers. Set, if the
+     respective register has to be saved. Starting with reg 16 (f0)
+     at the rightmost bit.
+     Bit 15 -  8  7  6  5  4  3  2  1  0
+     fpr 15 -  8  7  5  3  1  6  4  2  0
+     reg 31 - 24 23 22 21 20 19 18 17 16  */
+  unsigned int fpr_bitmap;
+
+  /* Number of floating point registers f8-f15 which must be saved.  */
+  int high_fprs;
+
+  /* Set if return address needs to be saved.
+     This flag is set by s390_return_addr_rtx if it could not use
+     the initial value of r14 and therefore depends on r14 saved
+     to the stack.  */
+  bool save_return_addr_p;
+
+  /* Size of stack frame.  */
+  HOST_WIDE_INT frame_size;
+};
+
+/* Define the structure for the machine field in struct function.  */
+
+struct GTY(()) machine_function
+{
+  struct s390_frame_layout frame_layout;
+
+  /* Literal pool base register.  */
+  rtx base_reg;
+
+  /* True if we may need to perform branch splitting.  */
+  bool split_branches_pending_p;
+
+  /* Some local-dynamic TLS symbol name.  */
+  const char *some_ld_name;
+
+  bool has_landing_pad_p;
+};
+
+/* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
+
+#define cfun_frame_layout (cfun->machine->frame_layout)
+#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
+#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot -           \
+  cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
+#define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |=    \
+  (1 << (BITNUM)))
+#define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
+  (1 << (BITNUM))))
+
+/* Number of GPRs and FPRs used for argument passing.  */
+#define GP_ARG_NUM_REG 5
+#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
+
+/* A couple of shortcuts.  */
+#define CONST_OK_FOR_J(x) \
+	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
+#define CONST_OK_FOR_K(x) \
+	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
+#define CONST_OK_FOR_Os(x) \
+        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
+#define CONST_OK_FOR_Op(x) \
+        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
+#define CONST_OK_FOR_On(x) \
+        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
+
+#define REGNO_PAIR_OK(REGNO, MODE)                               \
+  (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
+
+/* That's the read ahead of the dynamic branch prediction unit in
+   bytes on a z10 (or higher) CPU.  */
+#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
+
+/* Return the alignment for LABEL.  We default to the -falign-labels
+   value except for the literal pool base label.  */
+int
+s390_label_align (rtx label)
+{
+  rtx prev_insn = prev_active_insn (label);
+
+  if (prev_insn == NULL_RTX)
+    goto old;
+
+  prev_insn = single_set (prev_insn);
+
+  if (prev_insn == NULL_RTX)
+    goto old;
+
+  prev_insn = SET_SRC (prev_insn);
+
+  /* Don't align literal pool base labels.  */
+  if (GET_CODE (prev_insn) == UNSPEC
+      && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
+    return 0;
+
+ old:
+  return align_labels_log;
+}
+
+static enum machine_mode
+s390_libgcc_cmp_return_mode (void)
+{
+  return TARGET_64BIT ? DImode : SImode;
+}
+
+static enum machine_mode
+s390_libgcc_shift_count_mode (void)
+{
+  return TARGET_64BIT ? DImode : SImode;
+}
+
+static enum machine_mode
+s390_unwind_word_mode (void)
+{
+  return TARGET_64BIT ? DImode : SImode;
+}
+
+/* Return true if the back end supports mode MODE.  */
+static bool
+s390_scalar_mode_supported_p (enum machine_mode mode)
+{
+  /* In contrast to the default implementation reject TImode constants on 31bit
+     TARGET_ZARCH for ABI compliance.  */
+  if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
+    return false;
+
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
+
+void
+s390_set_has_landing_pad_p (bool value)
+{
+  cfun->machine->has_landing_pad_p = value;
+}
+
+/* If two condition code modes are compatible, return a condition code
+   mode which is compatible with both.  Otherwise, return
+   VOIDmode.  */
+
+static enum machine_mode
+s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+  if (m1 == m2)
+    return m1;
+
+  switch (m1)
+    {
+    case CCZmode:
+      if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
+	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
+        return m2;
+      return VOIDmode;
+
+    case CCSmode:
+    case CCUmode:
+    case CCTmode:
+    case CCSRmode:
+    case CCURmode:
+    case CCZ1mode:
+      if (m2 == CCZmode)
+	return m1;
+
+      return VOIDmode;
+
+    default:
+      return VOIDmode;
+    }
+  return VOIDmode;
+}
+
+/* Return true if SET either doesn't set the CC register, or else
+   the source and destination have matching CC modes and that
+   CC mode is at least as constrained as REQ_MODE.  */
+
+static bool
+s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
+{
+  enum machine_mode set_mode;
+
+  gcc_assert (GET_CODE (set) == SET);
+
+  if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
+    return 1;
+
+  set_mode = GET_MODE (SET_DEST (set));
+  switch (set_mode)
+    {
+    case CCSmode:
+    case CCSRmode:
+    case CCUmode:
+    case CCURmode:
+    case CCLmode:
+    case CCL1mode:
+    case CCL2mode:
+    case CCL3mode:
+    case CCT1mode:
+    case CCT2mode:
+    case CCT3mode:
+      if (req_mode != set_mode)
+        return 0;
+      break;
+
+    case CCZmode:
+      if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
+	  && req_mode != CCSRmode && req_mode != CCURmode)
+        return 0;
+      break;
+
+    case CCAPmode:
+    case CCANmode:
+      if (req_mode != CCAmode)
+        return 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return (GET_MODE (SET_SRC (set)) == set_mode);
+}
+
+/* Return true if every SET in INSN that sets the CC register
+   has source and destination with matching CC modes and that
+   CC mode is at least as constrained as REQ_MODE.
+   If REQ_MODE is VOIDmode, always return false.  */
+
+bool
+s390_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+  int i;
+
+  /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
+  if (req_mode == VOIDmode)
+    return false;
+
+  if (GET_CODE (PATTERN (insn)) == SET)
+    return s390_match_ccmode_set (PATTERN (insn), req_mode);
+
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+      for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+        {
+          rtx set = XVECEXP (PATTERN (insn), 0, i);
+          if (GET_CODE (set) == SET)
+            if (!s390_match_ccmode_set (set, req_mode))
+              return false;
+        }
+
+  return true;
+}
+
+/* If a test-under-mask instruction can be used to implement
+   (compare (and ... OP1) OP2), return the CC mode required
+   to do that.  Otherwise, return VOIDmode.
+   MIXED is true if the instruction can distinguish between
+   CC1 and CC2 for mixed selected bits (TMxx), it is false
+   if the instruction cannot (TM).  */
+
+enum machine_mode
+s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
+{
+  int bit0, bit1;
+
+  /* ??? Fixme: should work on CONST_DOUBLE as well.  */
+  if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
+    return VOIDmode;
+
+  /* Selected bits all zero: CC0.
+     e.g.: int a; if ((a & (16 + 128)) == 0) */
+  if (INTVAL (op2) == 0)
+    return CCTmode;
+
+  /* Selected bits all one: CC3.
+     e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
+  if (INTVAL (op2) == INTVAL (op1))
+    return CCT3mode;
+
+  /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
+     int a;
+     if ((a & (16 + 128)) == 16)         -> CCT1
+     if ((a & (16 + 128)) == 128)        -> CCT2  */
+  if (mixed)
+    {
+      bit1 = exact_log2 (INTVAL (op2));
+      bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
+      if (bit0 != -1 && bit1 != -1)
+        return bit0 > bit1 ? CCT1mode : CCT2mode;
+    }
+
+  return VOIDmode;
+}
+
+/* Given a comparison code OP (EQ, NE, etc.) and the operands
+   OP0 and OP1 of a COMPARE, return the mode to be used for the
+   comparison.  */
+
+enum machine_mode
+s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
+{
+  switch (code)
+    {
+      case EQ:
+      case NE:
+	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCAPmode;
+	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
+	  return CCAPmode;
+	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
+	     || GET_CODE (op1) == NEG)
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCLmode;
+
+	if (GET_CODE (op0) == AND)
+	  {
+	    /* Check whether we can potentially do it via TM.  */
+	    enum machine_mode ccmode;
+	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
+	    if (ccmode != VOIDmode)
+	      {
+		/* Relax CCTmode to CCZmode to allow fall-back to AND
+		   if that turns out to be beneficial.  */
+	        return ccmode == CCTmode ? CCZmode : ccmode;
+	      }
+	  }
+
+	if (register_operand (op0, HImode)
+	    && GET_CODE (op1) == CONST_INT
+	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
+	  return CCT3mode;
+	if (register_operand (op0, QImode)
+	    && GET_CODE (op1) == CONST_INT
+	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
+	  return CCT3mode;
+
+	return CCZmode;
+
+      case LE:
+      case LT:
+      case GE:
+      case GT:
+	/* The only overflow condition of NEG and ABS happens when
+	   -INT_MAX is used as parameter, which stays negative. So
+	   we have an overflow from a positive value to a negative.
+	   Using CCAP mode the resulting cc can be used for comparisons.  */
+	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCAPmode;
+
+ 	/* If constants are involved in an add instruction it is possible to use
+ 	   the resulting cc for comparisons with zero. Knowing the sign of the
+	   constant the overflow behavior gets predictable. e.g.:
+ 	     int a, b; if ((b = a + c) > 0)
+ 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
+	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
+	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
+	  {
+	    if (INTVAL (XEXP((op0), 1)) < 0)
+	      return CCANmode;
+	    else
+	      return CCAPmode;
+	  }
+	/* Fall through.  */
+      case UNORDERED:
+      case ORDERED:
+      case UNEQ:
+      case UNLE:
+      case UNLT:
+      case UNGE:
+      case UNGT:
+      case LTGT:
+	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	    && GET_CODE (op1) != CONST_INT)
+	  return CCSRmode;
+	return CCSmode;
+
+      case LTU:
+      case GEU:
+	if (GET_CODE (op0) == PLUS
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCL1mode;
+
+	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	    && GET_CODE (op1) != CONST_INT)
+	  return CCURmode;
+	return CCUmode;
+
+      case LEU:
+      case GTU:
+	if (GET_CODE (op0) == MINUS
+	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+	  return CCL2mode;
+
+	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+	    && GET_CODE (op1) != CONST_INT)
+	  return CCURmode;
+	return CCUmode;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
+   that we can implement more efficiently.  */
+
+void
+s390_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1)
+{
+  /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
+  if ((*code == EQ || *code == NE)
+      && *op1 == const0_rtx
+      && GET_CODE (*op0) == ZERO_EXTRACT
+      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
+      && GET_CODE (XEXP (*op0, 2)) == CONST_INT
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
+    {
+      rtx inner = XEXP (*op0, 0);
+      HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
+      HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
+      HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
+
+      if (len > 0 && len < modesize
+	  && pos >= 0 && pos + len <= modesize
+	  && modesize <= HOST_BITS_PER_WIDE_INT)
+	{
+	  unsigned HOST_WIDE_INT block;
+	  block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
+	  block <<= modesize - pos - len;
+
+	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
+			      gen_int_mode (block, GET_MODE (inner)));
+	}
+    }
+
+  /* Narrow AND of memory against immediate to enable TM.  */
+  if ((*code == EQ || *code == NE)
+      && *op1 == const0_rtx
+      && GET_CODE (*op0) == AND
+      && GET_CODE (XEXP (*op0, 1)) == CONST_INT
+      && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
+    {
+      rtx inner = XEXP (*op0, 0);
+      rtx mask = XEXP (*op0, 1);
+
+      /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
+      if (GET_CODE (inner) == SUBREG
+	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
+	  && (GET_MODE_SIZE (GET_MODE (inner))
+	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+	  && ((INTVAL (mask)
+               & GET_MODE_MASK (GET_MODE (inner))
+               & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
+	      == 0))
+	inner = SUBREG_REG (inner);
+
+      /* Do not change volatile MEMs.  */
+      if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
+	{
+	  int part = s390_single_part (XEXP (*op0, 1),
+				       GET_MODE (inner), QImode, 0);
+	  if (part >= 0)
+	    {
+	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
+	      inner = adjust_address_nv (inner, QImode, part);
+	      *op0 = gen_rtx_AND (QImode, inner, mask);
+	    }
+	}
+    }
+
+  /* Narrow comparisons against 0xffff to HImode if possible.  */
+  if ((*code == EQ || *code == NE)
+      && GET_CODE (*op1) == CONST_INT
+      && INTVAL (*op1) == 0xffff
+      && SCALAR_INT_MODE_P (GET_MODE (*op0))
+      && (nonzero_bits (*op0, GET_MODE (*op0))
+	  & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
+    {
+      *op0 = gen_lowpart (HImode, *op0);
+      *op1 = constm1_rtx;
+    }
+
+  /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible.  */
+  if (GET_CODE (*op0) == UNSPEC
+      && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
+      && XVECLEN (*op0, 0) == 1
+      && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
+      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
+      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
+      && *op1 == const0_rtx)
+    {
+      enum rtx_code new_code = UNKNOWN;
+      switch (*code)
+	{
+	  case EQ: new_code = EQ;  break;
+	  case NE: new_code = NE;  break;
+	  case LT: new_code = GTU; break;
+	  case GT: new_code = LTU; break;
+	  case LE: new_code = GEU; break;
+	  case GE: new_code = LEU; break;
+	  default: break;
+	}
+
+      if (new_code != UNKNOWN)
+	{
+	  *op0 = XVECEXP (*op0, 0, 0);
+	  *code = new_code;
+	}
+    }
+
+  /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible.  */
+  if (GET_CODE (*op0) == UNSPEC
+      && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
+      && XVECLEN (*op0, 0) == 1
+      && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
+      && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
+      && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
+      && *op1 == const0_rtx)
+    {
+      enum rtx_code new_code = UNKNOWN;
+      switch (*code)
+	{
+	  case EQ: new_code = EQ;  break;
+	  case NE: new_code = NE;  break;
+	  default: break;
+	}
+
+      if (new_code != UNKNOWN)
+	{
+	  *op0 = XVECEXP (*op0, 0, 0);
+	  *code = new_code;
+	}
+    }
+
+  /* Simplify cascaded EQ, NE with const0_rtx.  */
+  if ((*code == NE || *code == EQ)
+      && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
+      && GET_MODE (*op0) == SImode
+      && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
+      && REG_P (XEXP (*op0, 0))
+      && XEXP (*op0, 1) == const0_rtx
+      && *op1 == const0_rtx)
+    {
+      if ((*code == EQ && GET_CODE (*op0) == NE)
+          || (*code == NE && GET_CODE (*op0) == EQ))
+	*code = EQ;
+      else
+	*code = NE;
+      *op0 = XEXP (*op0, 0);
+    }
+
+  /* Prefer register over memory as first operand.  */
+  if (MEM_P (*op0) && REG_P (*op1))
+    {
+      rtx tem = *op0; *op0 = *op1; *op1 = tem;
+      *code = swap_condition (*code);
+    }
+}
+
+/* Emit a compare instruction suitable to implement the comparison
+   OP0 CODE OP1.  Return the correct condition RTL to be placed in
+   the IF_THEN_ELSE of the conditional branch testing the result.  */
+
+rtx
+s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  enum machine_mode mode = s390_select_ccmode (code, op0, op1);
+  rtx cc;
+
+  /* Do not output a redundant compare instruction if a compare_and_swap
+     pattern already computed the result and the machine modes are compatible.  */
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+    {
+      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
+		  == GET_MODE (op0));
+      cc = op0;
+    }
+  else
+    {
+      cc = gen_rtx_REG (mode, CC_REGNUM);
+      emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
+}
+
+/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
+   matches CMP.
+   Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
+   conditional branch testing the result.  */
+
+static rtx
+s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new_rtx)
+{
+  emit_insn (gen_sync_compare_and_swapsi (old, mem, cmp, new_rtx));
+  return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), const0_rtx);
+}
+
+/* Emit a jump instruction to TARGET.  If COND is NULL_RTX, emit an
+   unconditional jump, else a conditional jump under condition COND.  */
+
+void
+s390_emit_jump (rtx target, rtx cond)
+{
+  rtx insn;
+
+  target = gen_rtx_LABEL_REF (VOIDmode, target);
+  if (cond)
+    target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
+
+  insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
+  emit_jump_insn (insn);
+}
+
+/* Return branch condition mask to implement a branch
+   specified by CODE.  Return -1 for invalid comparisons.  */
+
+int
+s390_branch_condition_mask (rtx code)
+{
+  const int CC0 = 1 << 3;
+  const int CC1 = 1 << 2;
+  const int CC2 = 1 << 1;
+  const int CC3 = 1 << 0;
+
+  gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
+  gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
+  gcc_assert (XEXP (code, 1) == const0_rtx);
+
+  switch (GET_MODE (XEXP (code, 0)))
+    {
+    case CCZmode:
+    case CCZ1mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+	case NE:	return CC1 | CC2 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCT1mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC1;
+	case NE:	return CC0 | CC2 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCT2mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC2;
+	case NE:	return CC0 | CC1 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCT3mode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC3;
+	case NE:	return CC0 | CC1 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCLmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0 | CC2;
+	case NE:	return CC1 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCL1mode:
+      switch (GET_CODE (code))
+        {
+	case LTU:	return CC2 | CC3;  /* carry */
+	case GEU:	return CC0 | CC1;  /* no carry */
+	default:	return -1;
+        }
+      break;
+
+    case CCL2mode:
+      switch (GET_CODE (code))
+        {
+	case GTU:	return CC0 | CC1;  /* borrow */
+	case LEU:	return CC2 | CC3;  /* no borrow */
+	default:	return -1;
+        }
+      break;
+
+    case CCL3mode:
+      switch (GET_CODE (code))
+	{
+	case EQ:	return CC0 | CC2;
+	case NE:	return CC1 | CC3;
+	case LTU:	return CC1;
+	case GTU:	return CC3;
+	case LEU:	return CC1 | CC2;
+	case GEU:	return CC2 | CC3;
+	default:	return -1;
+	}
+
+    case CCUmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LTU:	return CC1;
+        case GTU:	return CC2;
+        case LEU:	return CC0 | CC1;
+        case GEU:	return CC0 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCURmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC2 | CC1 | CC3;
+        case LTU:	return CC2;
+        case GTU:	return CC1;
+        case LEU:	return CC0 | CC2;
+        case GEU:	return CC0 | CC1;
+	default:	return -1;
+        }
+      break;
+
+    case CCAPmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LT:	return CC1 | CC3;
+        case GT:	return CC2;
+        case LE:	return CC0 | CC1 | CC3;
+        case GE:	return CC0 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCANmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LT:	return CC1;
+        case GT:	return CC2 | CC3;
+        case LE:	return CC0 | CC1;
+        case GE:	return CC0 | CC2 | CC3;
+	default:	return -1;
+        }
+      break;
+
+    case CCSmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC1 | CC2 | CC3;
+        case LT:	return CC1;
+        case GT:	return CC2;
+        case LE:	return CC0 | CC1;
+        case GE:	return CC0 | CC2;
+	case UNORDERED:	return CC3;
+	case ORDERED:	return CC0 | CC1 | CC2;
+	case UNEQ:	return CC0 | CC3;
+        case UNLT:	return CC1 | CC3;
+        case UNGT:	return CC2 | CC3;
+        case UNLE:	return CC0 | CC1 | CC3;
+        case UNGE:	return CC0 | CC2 | CC3;
+	case LTGT:	return CC1 | CC2;
+	default:	return -1;
+        }
+      break;
+
+    case CCSRmode:
+      switch (GET_CODE (code))
+        {
+        case EQ:	return CC0;
+        case NE:	return CC2 | CC1 | CC3;
+        case LT:	return CC2;
+        case GT:	return CC1;
+        case LE:	return CC0 | CC2;
+        case GE:	return CC0 | CC1;
+	case UNORDERED:	return CC3;
+	case ORDERED:	return CC0 | CC2 | CC1;
+	case UNEQ:	return CC0 | CC3;
+        case UNLT:	return CC2 | CC3;
+        case UNGT:	return CC1 | CC3;
+        case UNLE:	return CC0 | CC2 | CC3;
+        case UNGE:	return CC0 | CC1 | CC3;
+	case LTGT:	return CC2 | CC1;
+	default:	return -1;
+        }
+      break;
+
+    default:
+      return -1;
+    }
+}
+
+
+/* Return branch condition mask to implement a compare and branch
+   specified by CODE.  Return -1 for invalid comparisons.  */
+
+int
+s390_compare_and_branch_condition_mask (rtx code)
+{
+  const int CC0 = 1 << 3;
+  const int CC1 = 1 << 2;
+  const int CC2 = 1 << 1;
+
+  switch (GET_CODE (code))
+    {
+    case EQ:
+      return CC0;
+    case NE:
+      return CC1 | CC2;
+    case LT:
+    case LTU:
+      return CC1;
+    case GT:
+    case GTU:
+      return CC2;
+    case LE:
+    case LEU:
+      return CC0 | CC1;
+    case GE:
+    case GEU:
+      return CC0 | CC2;
+    default:
+      gcc_unreachable ();
+    }
+  return -1;
+}
+
+/* If INV is false, return assembler mnemonic string to implement
+   a branch specified by CODE.  If INV is true, return mnemonic
+   for the corresponding inverted branch.  */
+
+static const char *
+s390_branch_condition_mnemonic (rtx code, int inv)
+{
+  int mask;
+
+  static const char *const mnemonic[16] =
+    {
+      NULL, "o", "h", "nle",
+      "l", "nhe", "lh", "ne",
+      "e", "nlh", "he", "nl",
+      "le", "nh", "no", NULL
+    };
+
+  if (GET_CODE (XEXP (code, 0)) == REG
+      && REGNO (XEXP (code, 0)) == CC_REGNUM
+      && XEXP (code, 1) == const0_rtx)
+    mask = s390_branch_condition_mask (code);
+  else
+    mask = s390_compare_and_branch_condition_mask (code);
+
+  gcc_assert (mask >= 0);
+
+  if (inv)
+    mask ^= 15;
+
+  gcc_assert (mask >= 1 && mask <= 14);
+
+  return mnemonic[mask];
+}
+
+/* Return the part of op which has a value different from def.
+   The size of the part is determined by mode.
+   Use this function only if you already know that op really
+   contains such a part.  */
+
+unsigned HOST_WIDE_INT
+s390_extract_part (rtx op, enum machine_mode mode, int def)
+{
+  unsigned HOST_WIDE_INT value = 0;
+  int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
+  int part_bits = GET_MODE_BITSIZE (mode);
+  unsigned HOST_WIDE_INT part_mask
+    = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
+  int i;
+
+  for (i = 0; i < max_parts; i++)
+    {
+      if (i == 0)
+	value = (unsigned HOST_WIDE_INT) INTVAL (op);
+      else
+	value >>= part_bits;
+
+      if ((value & part_mask) != (def & part_mask))
+	return value & part_mask;
+    }
+
+  gcc_unreachable ();
+}
+
+/* If OP is an integer constant of mode MODE with exactly one
+   part of mode PART_MODE unequal to DEF, return the number of that
+   part. Otherwise, return -1.  */
+
+int
+s390_single_part (rtx op,
+		  enum machine_mode mode,
+		  enum machine_mode part_mode,
+		  int def)
+{
+  unsigned HOST_WIDE_INT value = 0;
+  int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
+  unsigned HOST_WIDE_INT part_mask
+    = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
+  int i, part = -1;
+
+  if (GET_CODE (op) != CONST_INT)
+    return -1;
+
+  for (i = 0; i < n_parts; i++)
+    {
+      if (i == 0)
+	value = (unsigned HOST_WIDE_INT) INTVAL (op);
+      else
+	value >>= GET_MODE_BITSIZE (part_mode);
+
+      if ((value & part_mask) != (def & part_mask))
+	{
+	  if (part != -1)
+	    return -1;
+	  else
+	    part = i;
+	}
+    }
+  return part == -1 ? -1 : n_parts - 1 - part;
+}
+
+/* Return true if IN contains a contiguous bitfield in the lower SIZE
+   bits and no other bits are set in IN.  POS and LENGTH can be used
+   to obtain the start position and the length of the bitfield.
+
+   POS gives the position of the first bit of the bitfield counting
+   from the lowest order bit starting with zero.  In order to use this
+   value for S/390 instructions this has to be converted to "bits big
+   endian" style.  */
+
+bool
+s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
+			   int *pos, int *length)
+{
+  int tmp_pos = 0;
+  int tmp_length = 0;
+  int i;
+  unsigned HOST_WIDE_INT mask = 1ULL;
+  bool contiguous = false;
+
+  for (i = 0; i < size; mask <<= 1, i++)
+    {
+      if (contiguous)
+	{
+	  if (mask & in)
+	    tmp_length++;
+	  else
+	    break;
+	}
+      else
+	{
+	  if (mask & in)
+	    {
+	      contiguous = true;
+	      tmp_length++;
+	    }
+	  else
+	    tmp_pos++;
+	}
+    }
+
+  if (!tmp_length)
+    return false;
+
+  /* Calculate a mask for all bits beyond the contiguous bits.  */
+  mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
+
+  if (mask & in)
+    return false;
+
+  if (tmp_length + tmp_pos - 1 > size)
+    return false;
+
+  if (length)
+    *length = tmp_length;
+
+  if (pos)
+    *pos = tmp_pos;
+
+  return true;
+}
+
+/* Check whether we can (and want to) split a double-word
+   move in mode MODE from SRC to DST into two single-word
+   moves, moving the subword FIRST_SUBWORD first.  */
+
+bool
+s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
+{
+  /* Floating point registers cannot be split.  */
+  if (FP_REG_P (src) || FP_REG_P (dst))
+    return false;
+
+  /* We don't need to split if operands are directly accessible.  */
+  if (s_operand (src, mode) || s_operand (dst, mode))
+    return false;
+
+  /* Non-offsettable memory references cannot be split.  */
+  if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
+      || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
+    return false;
+
+  /* Moving the first subword must not clobber a register
+     needed to move the second subword.  */
+  if (register_operand (dst, mode))
+    {
+      rtx subreg = operand_subword (dst, first_subword, 0, mode);
+      if (reg_overlap_mentioned_p (subreg, src))
+        return false;
+    }
+
+  return true;
+}
+
+/* Return true if it can be proven that [MEM1, MEM1 + SIZE]
+   and [MEM2, MEM2 + SIZE] do overlap and false
+   otherwise.  */
+
+bool
+s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
+{
+  rtx addr1, addr2, addr_delta;
+  HOST_WIDE_INT delta;
+
+  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
+    return true;
+
+  if (size == 0)
+    return false;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
+
+  /* This overlapping check is used by peepholes merging memory block operations.
+     Overlapping operations would otherwise be recognized by the S/390 hardware
+     and would fall back to a slower implementation. Allowing overlapping
+     operations would lead to slow code but not to wrong code. Therefore we are
+     somewhat optimistic if we cannot prove that the memory blocks are
+     overlapping.
+     That's why we return false here although this may accept operations on
+     overlapping memory areas.  */
+  if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
+    return false;
+
+  delta = INTVAL (addr_delta);
+
+  if (delta == 0
+      || (delta > 0 && delta < size)
+      || (delta < 0 && -delta < size))
+    return true;
+
+  return false;
+}
+
+/* Check whether the address of memory reference MEM2 equals exactly
+   the address of memory reference MEM1 plus DELTA.  Return true if
+   we can prove this to be the case, false otherwise.  */
+
+bool
+s390_offset_p (rtx mem1, rtx mem2, rtx delta)
+{
+  rtx addr1, addr2, addr_delta;
+
+  if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
+    return false;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
+  if (!addr_delta || !rtx_equal_p (addr_delta, delta))
+    return false;
+
+  return true;
+}
+
+/* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
+
+void
+s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
+			      rtx *operands)
+{
+  enum machine_mode wmode = mode;
+  rtx dst = operands[0];
+  rtx src1 = operands[1];
+  rtx src2 = operands[2];
+  rtx op, clob, tem;
+
+  /* If we cannot handle the operation directly, use a temp register.  */
+  if (!s390_logical_operator_ok_p (operands))
+    dst = gen_reg_rtx (mode);
+
+  /* QImode and HImode patterns make sense only if we have a destination
+     in memory.  Otherwise perform the operation in SImode.  */
+  if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
+    wmode = SImode;
+
+  /* Widen operands if required.  */
+  if (mode != wmode)
+    {
+      if (GET_CODE (dst) == SUBREG
+	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
+	dst = tem;
+      else if (REG_P (dst))
+	dst = gen_rtx_SUBREG (wmode, dst, 0);
+      else
+        dst = gen_reg_rtx (wmode);
+
+      if (GET_CODE (src1) == SUBREG
+	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
+	src1 = tem;
+      else if (GET_MODE (src1) != VOIDmode)
+	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
+
+      if (GET_CODE (src2) == SUBREG
+	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
+	src2 = tem;
+      else if (GET_MODE (src2) != VOIDmode)
+	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
+    }
+
+  /* Emit the instruction.  */
+  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
+  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+
+  /* Fix up the destination if needed.  */
+  if (dst != operands[0])
+    emit_move_insn (operands[0], gen_lowpart (mode, dst));
+}
+
+/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
+
+bool
+s390_logical_operator_ok_p (rtx *operands)
+{
+  /* If the destination operand is in memory, it needs to coincide
+     with one of the source operands.  After reload, it has to be
+     the first source operand.  */
+  if (GET_CODE (operands[0]) == MEM)
+    return rtx_equal_p (operands[0], operands[1])
+	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
+
+  return true;
+}
+
+/* Narrow logical operation CODE of memory operand MEMOP with immediate
+   operand IMMOP to switch from SS to SI type instructions.  */
+
+void
+s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
+{
+  int def = code == AND ? -1 : 0;
+  HOST_WIDE_INT mask;
+  int part;
+
+  gcc_assert (GET_CODE (*memop) == MEM);
+  gcc_assert (!MEM_VOLATILE_P (*memop));
+
+  mask = s390_extract_part (*immop, QImode, def);
+  part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
+  gcc_assert (part >= 0);
+
+  *memop = adjust_address (*memop, QImode, part);
+  *immop = gen_int_mode (mask, QImode);
+}
+
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+s390_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Change optimizations to be performed, depending on the
+   optimization level.  */
+
+static const struct default_options s390_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+
+    /* ??? There are apparently still problems with -fcaller-saves.  */
+    { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
+
+    /* Use MVCLE instructions to decrease code size if requested.  */
+    { OPT_LEVELS_SIZE, OPT_mmvcle, NULL, 1 },
+
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Implement TARGET_OPTION_INIT_STRUCT.  */
+
+static void
+s390_option_init_struct (struct gcc_options *opts)
+{
+  /* By default, always emit DWARF-2 unwind info.  This allows debugging
+     without maintaining a stack frame back-chain.  */
+  opts->x_flag_asynchronous_unwind_tables = 1;
+}
+
+/* Return true if ARG is the name of a processor.  Set *TYPE and *FLAGS
+   to the associated processor_type and processor_flags if so.  */
+
+static bool
+s390_handle_arch_option (const char *arg,
+			 enum processor_type *type,
+			 int *flags)
+{
+  static struct pta
+    {
+      const char *const name;		/* processor name or nickname.  */
+      const enum processor_type processor;
+      const int flags;			/* From enum processor_flags. */
+    }
+  const processor_alias_table[] =
+    {
+      {"g5", PROCESSOR_9672_G5, PF_IEEE_FLOAT},
+      {"g6", PROCESSOR_9672_G6, PF_IEEE_FLOAT},
+      {"z900", PROCESSOR_2064_Z900, PF_IEEE_FLOAT | PF_ZARCH},
+      {"z990", PROCESSOR_2084_Z990, PF_IEEE_FLOAT | PF_ZARCH
+				    | PF_LONG_DISPLACEMENT},
+      {"z9-109", PROCESSOR_2094_Z9_109, PF_IEEE_FLOAT | PF_ZARCH
+                                       | PF_LONG_DISPLACEMENT | PF_EXTIMM},
+      {"z9-ec", PROCESSOR_2094_Z9_109, PF_IEEE_FLOAT | PF_ZARCH
+                             | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP },
+      {"z10", PROCESSOR_2097_Z10, PF_IEEE_FLOAT | PF_ZARCH
+       | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10},
+      {"z196", PROCESSOR_2817_Z196, PF_IEEE_FLOAT | PF_ZARCH
+       | PF_LONG_DISPLACEMENT | PF_EXTIMM | PF_DFP | PF_Z10 | PF_Z196 },
+    };
+  size_t i;
+
+  for (i = 0; i < ARRAY_SIZE (processor_alias_table); i++)
+    if (strcmp (arg, processor_alias_table[i].name) == 0)
+      {
+	*type = processor_alias_table[i].processor;
+	*flags = processor_alias_table[i].flags;
+	return true;
+      }
+
+  *type = PROCESSOR_max;
+  *flags = 0;
+  return false;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+s390_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_march_:
+      return s390_handle_arch_option (arg, &s390_arch, &s390_arch_flags);
+
+    case OPT_mstack_guard_:
+      if (sscanf (arg, HOST_WIDE_INT_PRINT_DEC, &s390_stack_guard) != 1)
+	return false;
+      if (exact_log2 (s390_stack_guard) == -1)
+	error ("stack guard value must be an exact power of 2");
+      return true;
+
+    case OPT_mstack_size_:
+      if (sscanf (arg, HOST_WIDE_INT_PRINT_DEC, &s390_stack_size) != 1)
+	return false;
+      if (exact_log2 (s390_stack_size) == -1)
+	error ("stack size must be an exact power of 2");
+      return true;
+
+    case OPT_mtune_:
+      return s390_handle_arch_option (arg, &s390_tune, &s390_tune_flags);
+
+    case OPT_mwarn_framesize_:
+      return sscanf (arg, HOST_WIDE_INT_PRINT_DEC, &s390_warn_framesize) == 1;
+
+    default:
+      return true;
+    }
+}
+
+static void
+s390_option_override (void)
+{
+  /* Set up function hooks.  */
+  init_machine_status = s390_init_machine_status;
+
+  /* Architecture mode defaults according to ABI.  */
+  if (!(target_flags_explicit & MASK_ZARCH))
+    {
+      if (TARGET_64BIT)
+	target_flags |= MASK_ZARCH;
+      else
+	target_flags &= ~MASK_ZARCH;
+    }
+
+  /* Determine processor architectural level.  */
+  if (!s390_arch_string)
+    {
+      s390_arch_string = TARGET_ZARCH? "z900" : "g5";
+      s390_handle_arch_option (s390_arch_string, &s390_arch, &s390_arch_flags);
+    }
+
+  /* This check is triggered when the user specified a wrong -march=
+     string and prevents subsequent error messages from being
+     issued.  */
+  if (s390_arch == PROCESSOR_max)
+    return;
+
+  /* Determine processor to tune for.  */
+  if (s390_tune == PROCESSOR_max)
+    {
+      s390_tune = s390_arch;
+      s390_tune_flags = s390_arch_flags;
+    }
+
+  /* Sanity checks.  */
+  if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
+    error ("z/Architecture mode not supported on %s", s390_arch_string);
+  if (TARGET_64BIT && !TARGET_ZARCH)
+    error ("64-bit ABI not supported in ESA/390 mode");
+
+  if (TARGET_HARD_DFP && !TARGET_DFP)
+    {
+      if (target_flags_explicit & MASK_HARD_DFP)
+	{
+	  if (!TARGET_CPU_DFP)
+	    error ("hardware decimal floating point instructions"
+		   " not available on %s", s390_arch_string);
+	  if (!TARGET_ZARCH)
+	    error ("hardware decimal floating point instructions"
+		   " not available in ESA/390 mode");
+	}
+      else
+	target_flags &= ~MASK_HARD_DFP;
+    }
+
+  if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
+    {
+      if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
+	error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
+
+      target_flags &= ~MASK_HARD_DFP;
+    }
+
+  /* Set processor cost function.  */
+  switch (s390_tune)
+    {
+    case PROCESSOR_2084_Z990:
+      s390_cost = &z990_cost;
+      break;
+    case PROCESSOR_2094_Z9_109:
+      s390_cost = &z9_109_cost;
+      break;
+    case PROCESSOR_2097_Z10:
+      s390_cost = &z10_cost;
+    case PROCESSOR_2817_Z196:
+      s390_cost = &z196_cost;
+      break;
+    default:
+      s390_cost = &z900_cost;
+    }
+
+  if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
+    error ("-mbackchain -mpacked-stack -mhard-float are not supported "
+	   "in combination");
+
+  if (s390_stack_size)
+    {
+      if (s390_stack_guard >= s390_stack_size)
+	error ("stack size must be greater than the stack guard value");
+      else if (s390_stack_size > 1 << 16)
+	error ("stack size must not be greater than 64k");
+    }
+  else if (s390_stack_guard)
+    error ("-mstack-guard implies use of -mstack-size");
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+
+  if (s390_tune == PROCESSOR_2097_Z10
+      || s390_tune == PROCESSOR_2817_Z196)
+    {
+      maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+      maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
+			     global_options.x_param_values,
+			     global_options_set.x_param_values);
+    }
+
+  maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  /* values for loop prefetching */
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  /* s390 has more than 2 levels and the size is much larger.  Since
+     we are always running virtualized assume that we only get a small
+     part of the caches above l1.  */
+  maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+
+  /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
+     requires the arch flags to be evaluated already.  Since prefetching
+     is beneficial on s390, we enable it if available.  */
+  if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
+    flag_prefetch_loop_arrays = 1;
+}
+
+/* Map for smallest class containing reg regno.  */
+
+const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
+{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,
+  ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,
+  ACCESS_REGS,	ACCESS_REGS
+};
+
+/* Return attribute type of insn.  */
+
+static enum attr_type
+s390_safe_attr_type (rtx insn)
+{
+  if (recog_memoized (insn) >= 0)
+    return get_attr_type (insn);
+  else
+    return TYPE_NONE;
+}
+
+/* Return true if DISP is a valid short displacement.  */
+
+static bool
+s390_short_displacement (rtx disp)
+{
+  /* No displacement is OK.  */
+  if (!disp)
+    return true;
+
+  /* Without the long displacement facility we don't need to
+     distingiush between long and short displacement.  */
+  if (!TARGET_LONG_DISPLACEMENT)
+    return true;
+
+  /* Integer displacement in range.  */
+  if (GET_CODE (disp) == CONST_INT)
+    return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
+
+  /* GOT offset is not OK, the GOT can be large.  */
+  if (GET_CODE (disp) == CONST
+      && GET_CODE (XEXP (disp, 0)) == UNSPEC
+      && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
+          || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
+    return false;
+
+  /* All other symbolic constants are literal pool references,
+     which are OK as the literal pool must be small.  */
+  if (GET_CODE (disp) == CONST)
+    return true;
+
+  return false;
+}
+
+/* Decompose a RTL expression ADDR for a memory address into
+   its components, returned in OUT.
+
+   Returns false if ADDR is not a valid memory address, true
+   otherwise.  If OUT is NULL, don't return the components,
+   but check for validity only.
+
+   Note: Only addresses in canonical form are recognized.
+   LEGITIMIZE_ADDRESS should convert non-canonical forms to the
+   canonical form so that they will be recognized.  */
+
+static int
+s390_decompose_address (rtx addr, struct s390_address *out)
+{
+  HOST_WIDE_INT offset = 0;
+  rtx base = NULL_RTX;
+  rtx indx = NULL_RTX;
+  rtx disp = NULL_RTX;
+  rtx orig_disp;
+  bool pointer = false;
+  bool base_ptr = false;
+  bool indx_ptr = false;
+  bool literal_pool = false;
+
+  /* We may need to substitute the literal pool base register into the address
+     below.  However, at this point we do not know which register is going to
+     be used as base, so we substitute the arg pointer register.  This is going
+     to be treated as holding a pointer below -- it shouldn't be used for any
+     other purpose.  */
+  rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
+
+  /* Decompose address into base + index + displacement.  */
+
+  if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
+    base = addr;
+
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+      enum rtx_code code0 = GET_CODE (op0);
+      enum rtx_code code1 = GET_CODE (op1);
+
+      if (code0 == REG || code0 == UNSPEC)
+	{
+	  if (code1 == REG || code1 == UNSPEC)
+	    {
+	      indx = op0;	/* index + base */
+	      base = op1;
+	    }
+
+	  else
+	    {
+	      base = op0;	/* base + displacement */
+	      disp = op1;
+	    }
+	}
+
+      else if (code0 == PLUS)
+	{
+	  indx = XEXP (op0, 0);	/* index + base + disp */
+	  base = XEXP (op0, 1);
+	  disp = op1;
+	}
+
+      else
+	{
+	  return false;
+	}
+    }
+
+  else
+    disp = addr;		/* displacement */
+
+  /* Extract integer part of displacement.  */
+  orig_disp = disp;
+  if (disp)
+    {
+      if (GET_CODE (disp) == CONST_INT)
+	{
+	  offset = INTVAL (disp);
+	  disp = NULL_RTX;
+	}
+      else if (GET_CODE (disp) == CONST
+	       && GET_CODE (XEXP (disp, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
+	{
+	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
+	  disp = XEXP (XEXP (disp, 0), 0);
+	}
+    }
+
+  /* Strip off CONST here to avoid special case tests later.  */
+  if (disp && GET_CODE (disp) == CONST)
+    disp = XEXP (disp, 0);
+
+  /* We can convert literal pool addresses to
+     displacements by basing them off the base register.  */
+  if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
+    {
+      /* Either base or index must be free to hold the base register.  */
+      if (!base)
+        base = fake_pool_base, literal_pool = true;
+      else if (!indx)
+        indx = fake_pool_base, literal_pool = true;
+      else
+        return false;
+
+      /* Mark up the displacement.  */
+      disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
+			     UNSPEC_LTREL_OFFSET);
+    }
+
+  /* Validate base register.  */
+  if (base)
+    {
+      if (GET_CODE (base) == UNSPEC)
+	switch (XINT (base, 1))
+	  {
+	  case UNSPEC_LTREF:
+	    if (!disp)
+	      disp = gen_rtx_UNSPEC (Pmode,
+				     gen_rtvec (1, XVECEXP (base, 0, 0)),
+				     UNSPEC_LTREL_OFFSET);
+	    else
+	      return false;
+
+	    base = XVECEXP (base, 0, 1);
+	    break;
+
+	  case UNSPEC_LTREL_BASE:
+	    if (XVECLEN (base, 0) == 1)
+	      base = fake_pool_base, literal_pool = true;
+	    else
+	      base = XVECEXP (base, 0, 1);
+	    break;
+
+	  default:
+	    return false;
+	  }
+
+      if (!REG_P (base)
+	  || (GET_MODE (base) != SImode
+	      && GET_MODE (base) != Pmode))
+	return false;
+
+      if (REGNO (base) == STACK_POINTER_REGNUM
+	  || REGNO (base) == FRAME_POINTER_REGNUM
+	  || ((reload_completed || reload_in_progress)
+	      && frame_pointer_needed
+	      && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
+	  || REGNO (base) == ARG_POINTER_REGNUM
+          || (flag_pic
+              && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
+        pointer = base_ptr = true;
+
+      if ((reload_completed || reload_in_progress)
+	  && base == cfun->machine->base_reg)
+        pointer = base_ptr = literal_pool = true;
+    }
+
+  /* Validate index register.  */
+  if (indx)
+    {
+      if (GET_CODE (indx) == UNSPEC)
+	switch (XINT (indx, 1))
+	  {
+	  case UNSPEC_LTREF:
+	    if (!disp)
+	      disp = gen_rtx_UNSPEC (Pmode,
+				     gen_rtvec (1, XVECEXP (indx, 0, 0)),
+				     UNSPEC_LTREL_OFFSET);
+	    else
+	      return false;
+
+	    indx = XVECEXP (indx, 0, 1);
+	    break;
+
+	  case UNSPEC_LTREL_BASE:
+	    if (XVECLEN (indx, 0) == 1)
+	      indx = fake_pool_base, literal_pool = true;
+	    else
+	      indx = XVECEXP (indx, 0, 1);
+	    break;
+
+	  default:
+	    return false;
+	  }
+
+      if (!REG_P (indx)
+	  || (GET_MODE (indx) != SImode
+	      && GET_MODE (indx) != Pmode))
+	return false;
+
+      if (REGNO (indx) == STACK_POINTER_REGNUM
+	  || REGNO (indx) == FRAME_POINTER_REGNUM
+	  || ((reload_completed || reload_in_progress)
+	      && frame_pointer_needed
+	      && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
+	  || REGNO (indx) == ARG_POINTER_REGNUM
+          || (flag_pic
+              && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
+        pointer = indx_ptr = true;
+
+      if ((reload_completed || reload_in_progress)
+	  && indx == cfun->machine->base_reg)
+        pointer = indx_ptr = literal_pool = true;
+    }
+
+  /* Prefer to use pointer as base, not index.  */
+  if (base && indx && !base_ptr
+      && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
+    {
+      rtx tmp = base;
+      base = indx;
+      indx = tmp;
+    }
+
+  /* Validate displacement.  */
+  if (!disp)
+    {
+      /* If virtual registers are involved, the displacement will change later
+	 anyway as the virtual registers get eliminated.  This could make a
+	 valid displacement invalid, but it is more likely to make an invalid
+	 displacement valid, because we sometimes access the register save area
+	 via negative offsets to one of those registers.
+	 Thus we don't check the displacement for validity here.  If after
+	 elimination the displacement turns out to be invalid after all,
+	 this is fixed up by reload in any case.  */
+      if (base != arg_pointer_rtx
+	  && indx != arg_pointer_rtx
+	  && base != return_address_pointer_rtx
+	  && indx != return_address_pointer_rtx
+	  && base != frame_pointer_rtx
+	  && indx != frame_pointer_rtx
+	  && base != virtual_stack_vars_rtx
+	  && indx != virtual_stack_vars_rtx)
+	if (!DISP_IN_RANGE (offset))
+	  return false;
+    }
+  else
+    {
+      /* All the special cases are pointers.  */
+      pointer = true;
+
+      /* In the small-PIC case, the linker converts @GOT
+         and @GOTNTPOFF offsets to possible displacements.  */
+      if (GET_CODE (disp) == UNSPEC
+          && (XINT (disp, 1) == UNSPEC_GOT
+	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
+	  && flag_pic == 1)
+        {
+	  ;
+        }
+
+      /* Accept pool label offsets.  */
+      else if (GET_CODE (disp) == UNSPEC
+	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
+	;
+
+      /* Accept literal pool references.  */
+      else if (GET_CODE (disp) == UNSPEC
+	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
+        {
+	  /* In case CSE pulled a non literal pool reference out of
+	     the pool we have to reject the address.  This is
+	     especially important when loading the GOT pointer on non
+	     zarch CPUs.  In this case the literal pool contains an lt
+	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
+	     will most likely exceed the displacement.  */
+	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
+	    return false;
+
+	  orig_disp = gen_rtx_CONST (Pmode, disp);
+	  if (offset)
+	    {
+	      /* If we have an offset, make sure it does not
+		 exceed the size of the constant pool entry.  */
+	      rtx sym = XVECEXP (disp, 0, 0);
+	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
+		return false;
+
+              orig_disp = plus_constant (orig_disp, offset);
+	    }
+        }
+
+      else
+	return false;
+    }
+
+  if (!base && !indx)
+    pointer = true;
+
+  if (out)
+    {
+      out->base = base;
+      out->indx = indx;
+      out->disp = orig_disp;
+      out->pointer = pointer;
+      out->literal_pool = literal_pool;
+    }
+
+  return true;
+}
+
+/* Decompose a RTL expression OP for a shift count into its components,
+   and return the base register in BASE and the offset in OFFSET.
+
+   Return true if OP is a valid shift count, false if not.  */
+
+bool
+s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
+{
+  HOST_WIDE_INT off = 0;
+
+  /* We can have an integer constant, an address register,
+     or a sum of the two.  */
+  if (GET_CODE (op) == CONST_INT)
+    {
+      off = INTVAL (op);
+      op = NULL_RTX;
+    }
+  if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
+    {
+      off = INTVAL (XEXP (op, 1));
+      op = XEXP (op, 0);
+    }
+  while (op && GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (op && GET_CODE (op) != REG)
+    return false;
+
+  if (offset)
+    *offset = off;
+  if (base)
+    *base = op;
+
+   return true;
+}
+
+
+/* Return true if CODE is a valid address without index.  */
+
+bool
+s390_legitimate_address_without_index_p (rtx op)
+{
+  struct s390_address addr;
+
+  if (!s390_decompose_address (XEXP (op, 0), &addr))
+    return false;
+  if (addr.indx)
+    return false;
+
+  return true;
+}
+
+
+/* Return true if ADDR is of kind symbol_ref or symbol_ref + const_int
+   and return these parts in SYMREF and ADDEND.  You can pass NULL in
+   SYMREF and/or ADDEND if you are not interested in these values.
+   Literal pool references are *not* considered symbol references.  */
+
+static bool
+s390_symref_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
+{
+  HOST_WIDE_INT tmpaddend = 0;
+
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
+	  && !CONSTANT_POOL_ADDRESS_P (XEXP (addr, 0))
+	  && CONST_INT_P (XEXP (addr, 1)))
+	{
+	  tmpaddend = INTVAL (XEXP (addr, 1));
+	  addr = XEXP (addr, 0);
+	}
+      else
+	return false;
+    }
+  else
+    if (GET_CODE (addr) != SYMBOL_REF || CONSTANT_POOL_ADDRESS_P (addr))
+	return false;
+
+  if (symref)
+    *symref = addr;
+  if (addend)
+    *addend = tmpaddend;
+
+  return true;
+}
+
+
+/* Return true if the address in OP is valid for constraint letter C
+   if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
+   pool MEMs should be accepted.  Only the Q, R, S, T constraint
+   letters are allowed for C.  */
+
+static int
+s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
+{
+  struct s390_address addr;
+  bool decomposed = false;
+
+  /* This check makes sure that no symbolic address (except literal
+     pool references) are accepted by the R or T constraints.  */
+  if (s390_symref_operand_p (op, NULL, NULL))
+    return 0;
+
+  /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
+  if (!lit_pool_ok)
+    {
+      if (!s390_decompose_address (op, &addr))
+	return 0;
+      if (addr.literal_pool)
+	return 0;
+      decomposed = true;
+    }
+
+  switch (c)
+    {
+    case 'Q': /* no index short displacement */
+      if (!decomposed && !s390_decompose_address (op, &addr))
+	return 0;
+      if (addr.indx)
+	return 0;
+      if (!s390_short_displacement (addr.disp))
+	return 0;
+      break;
+
+    case 'R': /* with index short displacement */
+      if (TARGET_LONG_DISPLACEMENT)
+	{
+	  if (!decomposed && !s390_decompose_address (op, &addr))
+	    return 0;
+	  if (!s390_short_displacement (addr.disp))
+	    return 0;
+	}
+      /* Any invalid address here will be fixed up by reload,
+	 so accept it for the most generic constraint.  */
+      break;
+
+    case 'S': /* no index long displacement */
+      if (!TARGET_LONG_DISPLACEMENT)
+	return 0;
+      if (!decomposed && !s390_decompose_address (op, &addr))
+	return 0;
+      if (addr.indx)
+	return 0;
+      if (s390_short_displacement (addr.disp))
+	return 0;
+      break;
+
+    case 'T': /* with index long displacement */
+      if (!TARGET_LONG_DISPLACEMENT)
+	return 0;
+      /* Any invalid address here will be fixed up by reload,
+	 so accept it for the most generic constraint.  */
+      if ((decomposed || s390_decompose_address (op, &addr))
+	  && s390_short_displacement (addr.disp))
+	return 0;
+      break;
+    default:
+      return 0;
+    }
+  return 1;
+}
+
+
+/* Evaluates constraint strings described by the regular expression
+   ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
+   the constraint given in STR, or 0 else.  */
+
+int
+s390_mem_constraint (const char *str, rtx op)
+{
+  char c = str[0];
+
+  switch (c)
+    {
+    case 'A':
+      /* Check for offsettable variants of memory constraints.  */
+      if (!MEM_P (op) || MEM_VOLATILE_P (op))
+	return 0;
+      if ((reload_completed || reload_in_progress)
+	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
+	return 0;
+      return s390_check_qrst_address (str[1], XEXP (op, 0), true);
+    case 'B':
+      /* Check for non-literal-pool variants of memory constraints.  */
+      if (!MEM_P (op))
+	return 0;
+      return s390_check_qrst_address (str[1], XEXP (op, 0), false);
+    case 'Q':
+    case 'R':
+    case 'S':
+    case 'T':
+      if (GET_CODE (op) != MEM)
+	return 0;
+      return s390_check_qrst_address (c, XEXP (op, 0), true);
+    case 'U':
+      return (s390_check_qrst_address ('Q', op, true)
+	      || s390_check_qrst_address ('R', op, true));
+    case 'W':
+      return (s390_check_qrst_address ('S', op, true)
+	      || s390_check_qrst_address ('T', op, true));
+    case 'Y':
+      /* Simply check for the basic form of a shift count.  Reload will
+	 take care of making sure we have a proper base register.  */
+      if (!s390_decompose_shift_count (op, NULL, NULL))
+	return 0;
+      break;
+    case 'Z':
+      return s390_check_qrst_address (str[1], op, true);
+    default:
+      return 0;
+    }
+  return 1;
+}
+
+
+/* Evaluates constraint strings starting with letter O.  Input
+   parameter C is the second letter following the "O" in the constraint
+   string. Returns 1 if VALUE meets the respective constraint and 0
+   otherwise.  */
+
+int
+s390_O_constraint_str (const char c, HOST_WIDE_INT value)
+{
+  if (!TARGET_EXTIMM)
+    return 0;
+
+  switch (c)
+    {
+    case 's':
+      return trunc_int_for_mode (value, SImode) == value;
+
+    case 'p':
+      return value == 0
+	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
+
+    case 'n':
+      return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Evaluates constraint strings starting with letter N.  Parameter STR
+   contains the letters following letter "N" in the constraint string.
+   Returns true if VALUE matches the constraint.  */
+
+int
+s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
+{
+  enum machine_mode mode, part_mode;
+  int def;
+  int part, part_goal;
+
+
+  if (str[0] == 'x')
+    part_goal = -1;
+  else
+    part_goal = str[0] - '0';
+
+  switch (str[1])
+    {
+    case 'Q':
+      part_mode = QImode;
+      break;
+    case 'H':
+      part_mode = HImode;
+      break;
+    case 'S':
+      part_mode = SImode;
+      break;
+    default:
+      return 0;
+    }
+
+  switch (str[2])
+    {
+    case 'H':
+      mode = HImode;
+      break;
+    case 'S':
+      mode = SImode;
+      break;
+    case 'D':
+      mode = DImode;
+      break;
+    default:
+      return 0;
+    }
+
+  switch (str[3])
+    {
+    case '0':
+      def = 0;
+      break;
+    case 'F':
+      def = -1;
+      break;
+    default:
+      return 0;
+    }
+
+  if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
+    return 0;
+
+  part = s390_single_part (GEN_INT (value), mode, part_mode, def);
+  if (part < 0)
+    return 0;
+  if (part_goal != -1 && part_goal != part)
+    return 0;
+
+  return 1;
+}
+
+
+/* Returns true if the input parameter VALUE is a float zero.  */
+
+int
+s390_float_const_zero_p (rtx value)
+{
+  return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
+	  && value == CONST0_RTX (GET_MODE (value)));
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.  */
+
+static int
+s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                         reg_class_t from, reg_class_t to)
+{
+/* On s390, copy between fprs and gprs is expensive.  */
+  if ((reg_classes_intersect_p (from, GENERAL_REGS)
+       && reg_classes_intersect_p (to, FP_REGS))
+      || (reg_classes_intersect_p (from, FP_REGS)
+	  && reg_classes_intersect_p (to, GENERAL_REGS)))
+    return 10;
+
+  return 1;
+}
+
+/* Implement TARGET_MEMORY_MOVE_COST.  */
+
+static int
+s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+		       reg_class_t rclass ATTRIBUTE_UNUSED,
+		       bool in ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.
+   CODE contains GET_CODE (x), OUTER_CODE contains the code
+   of the superexpression of x.  */
+
+static bool
+s390_rtx_costs (rtx x, int code, int outer_code, int *total,
+		bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST:
+    case CONST_INT:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_DOUBLE:
+    case MEM:
+      *total = 0;
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+    case AND:
+    case IOR:
+    case XOR:
+    case NEG:
+    case NOT:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case PLUS:
+    case MINUS:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case MULT:
+      switch (GET_MODE (x))
+	{
+	case SImode:
+	  {
+	    rtx left = XEXP (x, 0);
+	    rtx right = XEXP (x, 1);
+	    if (GET_CODE (right) == CONST_INT
+		&& CONST_OK_FOR_K (INTVAL (right)))
+	      *total = s390_cost->mhi;
+	    else if (GET_CODE (left) == SIGN_EXTEND)
+	      *total = s390_cost->mh;
+	    else
+	      *total = s390_cost->ms;  /* msr, ms, msy */
+	    break;
+	  }
+	case DImode:
+	  {
+	    rtx left = XEXP (x, 0);
+	    rtx right = XEXP (x, 1);
+	    if (TARGET_ZARCH)
+	      {
+		if (GET_CODE (right) == CONST_INT
+		    && CONST_OK_FOR_K (INTVAL (right)))
+		  *total = s390_cost->mghi;
+		else if (GET_CODE (left) == SIGN_EXTEND)
+		  *total = s390_cost->msgf;
+		else
+		  *total = s390_cost->msg;  /* msgr, msg */
+	      }
+	    else /* TARGET_31BIT */
+	      {
+		if (GET_CODE (left) == SIGN_EXTEND
+		    && GET_CODE (right) == SIGN_EXTEND)
+		  /* mulsidi case: mr, m */
+		  *total = s390_cost->m;
+		else if (GET_CODE (left) == ZERO_EXTEND
+			 && GET_CODE (right) == ZERO_EXTEND
+			 && TARGET_CPU_ZARCH)
+		  /* umulsidi case: ml, mlr */
+		  *total = s390_cost->ml;
+		else
+		  /* Complex calculation is required.  */
+		  *total = COSTS_N_INSNS (40);
+	      }
+	    break;
+	  }
+	case SFmode:
+	case DFmode:
+	  *total = s390_cost->mult_df;
+	  break;
+	case TFmode:
+	  *total = s390_cost->mxbr;
+	  break;
+	default:
+	  return false;
+	}
+      return false;
+
+    case FMA:
+      switch (GET_MODE (x))
+	{
+	case DFmode:
+	  *total = s390_cost->madbr;
+	  break;
+	case SFmode:
+	  *total = s390_cost->maebr;
+	  break;
+	default:
+	  return false;
+	}
+      /* Negate in the third argument is free: FMSUB.  */
+      if (GET_CODE (XEXP (x, 2)) == NEG)
+	{
+	  *total += (rtx_cost (XEXP (x, 0), FMA, speed)
+		     + rtx_cost (XEXP (x, 1), FMA, speed)
+		     + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, speed));
+	  return true;
+	}
+      return false;
+
+    case UDIV:
+    case UMOD:
+      if (GET_MODE (x) == TImode) 	       /* 128 bit division */
+	*total = s390_cost->dlgr;
+      else if (GET_MODE (x) == DImode)
+	{
+	  rtx right = XEXP (x, 1);
+	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
+	    *total = s390_cost->dlr;
+	  else 	                               /* 64 by 64 bit division */
+	    *total = s390_cost->dlgr;
+	}
+      else if (GET_MODE (x) == SImode)         /* 32 bit division */
+	*total = s390_cost->dlr;
+      return false;
+
+    case DIV:
+    case MOD:
+      if (GET_MODE (x) == DImode)
+	{
+	  rtx right = XEXP (x, 1);
+	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
+	    if (TARGET_ZARCH)
+	      *total = s390_cost->dsgfr;
+	    else
+	      *total = s390_cost->dr;
+	  else 	                               /* 64 by 64 bit division */
+	    *total = s390_cost->dsgr;
+	}
+      else if (GET_MODE (x) == SImode)         /* 32 bit division */
+	*total = s390_cost->dlr;
+      else if (GET_MODE (x) == SFmode)
+	{
+	  *total = s390_cost->debr;
+	}
+      else if (GET_MODE (x) == DFmode)
+	{
+	  *total = s390_cost->ddbr;
+	}
+      else if (GET_MODE (x) == TFmode)
+	{
+	  *total = s390_cost->dxbr;
+	}
+      return false;
+
+    case SQRT:
+      if (GET_MODE (x) == SFmode)
+	*total = s390_cost->sqebr;
+      else if (GET_MODE (x) == DFmode)
+	*total = s390_cost->sqdbr;
+      else /* TFmode */
+	*total = s390_cost->sqxbr;
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      if (outer_code == MULT || outer_code == DIV || outer_code == MOD
+	  || outer_code == PLUS || outer_code == MINUS
+	  || outer_code == COMPARE)
+	*total = 0;
+      return false;
+
+    case COMPARE:
+      *total = COSTS_N_INSNS (1);
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+	{
+	  rtx op0 = XEXP (XEXP (x, 0), 0);
+	  rtx op1 = XEXP (XEXP (x, 0), 1);
+	  rtx op2 = XEXP (x, 1);
+
+	  if (memory_operand (op0, GET_MODE (op0))
+	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
+	    return true;
+	  if (register_operand (op0, GET_MODE (op0))
+	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
+	    return true;
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Return the cost of an address rtx ADDR.  */
+
+static int
+s390_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+{
+  struct s390_address ad;
+  if (!s390_decompose_address (addr, &ad))
+    return 1000;
+
+  return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
+}
+
+/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
+   otherwise return 0.  */
+
+int
+tls_symbolic_operand (rtx op)
+{
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+  return SYMBOL_REF_TLS_MODEL (op);
+}
+
+/* Split DImode access register reference REG (on 64-bit) into its constituent
+   low and high parts, and store them into LO and HI.  Note that gen_lowpart/
+   gen_highpart cannot be used as they assume all registers are word-sized,
+   while our access registers have only half that size.  */
+
+void
+s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
+{
+  gcc_assert (TARGET_64BIT);
+  gcc_assert (ACCESS_REG_P (reg));
+  gcc_assert (GET_MODE (reg) == DImode);
+  gcc_assert (!(REGNO (reg) & 1));
+
+  *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
+  *hi = gen_rtx_REG (SImode, REGNO (reg));
+}
+
+/* Return true if OP contains a symbol reference */
+
+bool
+symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Return true if OP contains a reference to a thread-local symbol.  */
+
+bool
+tls_symbolic_reference_mentioned_p (rtx op)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return tls_symbolic_operand (op);
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return true;
+	}
+
+      else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
+	return true;
+    }
+
+  return false;
+}
+
+
+/* Return true if OP is a legitimate general operand when
+   generating PIC code.  It is given that flag_pic is on
+   and that OP satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+int
+legitimate_pic_operand_p (rtx op)
+{
+  /* Accept all non-symbolic constants.  */
+  if (!SYMBOLIC_CONST (op))
+    return 1;
+
+  /* Reject everything else; must be handled
+     via emit_symbolic_move.  */
+  return 0;
+}
+
+/* Returns true if the constant value OP is a legitimate general operand.
+   It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+int
+legitimate_constant_p (rtx op)
+{
+  /* Accept all non-symbolic constants.  */
+  if (!SYMBOLIC_CONST (op))
+    return 1;
+
+  /* Accept immediate LARL operands.  */
+  if (TARGET_CPU_ZARCH && larl_operand (op, VOIDmode))
+    return 1;
+
+  /* Thread-local symbols are never legal constants.  This is
+     so that emit_call knows that computing such addresses
+     might require a function call.  */
+  if (TLS_SYMBOLIC_CONST (op))
+    return 0;
+
+  /* In the PIC case, symbolic constants must *not* be
+     forced into the literal pool.  We accept them here,
+     so that they will be handled by emit_symbolic_move.  */
+  if (flag_pic)
+    return 1;
+
+  /* All remaining non-PIC symbolic constants are
+     forced into the literal pool.  */
+  return 0;
+}
+
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible if X contains the address of a symbol that is
+   not constant (TLS) or not known at final link time (PIC).  */
+
+static bool
+s390_cannot_force_const_mem (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+      /* Accept all non-symbolic constants.  */
+      return false;
+
+    case LABEL_REF:
+      /* Labels are OK iff we are non-PIC.  */
+      return flag_pic != 0;
+
+    case SYMBOL_REF:
+      /* 'Naked' TLS symbol references are never OK,
+         non-TLS symbols are OK iff we are non-PIC.  */
+      if (tls_symbolic_operand (x))
+	return true;
+      else
+	return flag_pic != 0;
+
+    case CONST:
+      return s390_cannot_force_const_mem (XEXP (x, 0));
+    case PLUS:
+    case MINUS:
+      return s390_cannot_force_const_mem (XEXP (x, 0))
+	     || s390_cannot_force_const_mem (XEXP (x, 1));
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
+	case UNSPEC_LTREL_OFFSET:
+	case UNSPEC_GOT:
+	case UNSPEC_GOTOFF:
+	case UNSPEC_PLTOFF:
+	case UNSPEC_TLSGD:
+	case UNSPEC_TLSLDM:
+	case UNSPEC_NTPOFF:
+	case UNSPEC_DTPOFF:
+	case UNSPEC_GOTNTPOFF:
+	case UNSPEC_INDNTPOFF:
+	  return false;
+
+	/* If the literal pool shares the code section, be put
+	   execute template placeholders into the pool as well.  */
+	case UNSPEC_INSN:
+	  return TARGET_CPU_ZARCH;
+
+	default:
+	  return true;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Returns true if the constant value OP is a legitimate general
+   operand during and after reload.  The difference to
+   legitimate_constant_p is that this function will not accept
+   a constant that would need to be forced to the literal pool
+   before it can be used as operand.
+   This function accepts all constants which can be loaded directly
+   into a GPR.  */
+
+bool
+legitimate_reload_constant_p (rtx op)
+{
+  /* Accept la(y) operands.  */
+  if (GET_CODE (op) == CONST_INT
+      && DISP_IN_RANGE (INTVAL (op)))
+    return true;
+
+  /* Accept l(g)hi/l(g)fi operands.  */
+  if (GET_CODE (op) == CONST_INT
+      && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
+    return true;
+
+  /* Accept lliXX operands.  */
+  if (TARGET_ZARCH
+      && GET_CODE (op) == CONST_INT
+      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
+      && s390_single_part (op, word_mode, HImode, 0) >= 0)
+  return true;
+
+  if (TARGET_EXTIMM
+      && GET_CODE (op) == CONST_INT
+      && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
+      && s390_single_part (op, word_mode, SImode, 0) >= 0)
+    return true;
+
+  /* Accept larl operands.  */
+  if (TARGET_CPU_ZARCH
+      && larl_operand (op, VOIDmode))
+    return true;
+
+  /* Accept floating-point zero operands that fit into a single GPR.  */
+  if (GET_CODE (op) == CONST_DOUBLE
+      && s390_float_const_zero_p (op)
+      && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
+    return true;
+
+  /* Accept double-word operands that can be split.  */
+  if (GET_CODE (op) == CONST_INT
+      && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
+    {
+      enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
+      rtx hi = operand_subword (op, 0, 0, dword_mode);
+      rtx lo = operand_subword (op, 1, 0, dword_mode);
+      return legitimate_reload_constant_p (hi)
+	     && legitimate_reload_constant_p (lo);
+    }
+
+  /* Everything else cannot be handled without reload.  */
+  return false;
+}
+
+/* Returns true if the constant value OP is a legitimate fp operand
+   during and after reload.
+   This function accepts all constants which can be loaded directly
+   into an FPR.  */
+
+static bool
+legitimate_reload_fp_constant_p (rtx op)
+{
+  /* Accept floating-point zero operands if the load zero instruction
+     can be used.  */
+  if (TARGET_Z196
+      && GET_CODE (op) == CONST_DOUBLE
+      && s390_float_const_zero_p (op))
+    return true;
+
+  return false;
+}
+
+/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
+   return the class of reg to actually use.  */
+
+static reg_class_t
+s390_preferred_reload_class (rtx op, reg_class_t rclass)
+{
+  switch (GET_CODE (op))
+    {
+      /* Constants we cannot reload into general registers
+	 must be forced into the literal pool.  */
+      case CONST_DOUBLE:
+      case CONST_INT:
+	if (reg_class_subset_p (GENERAL_REGS, rclass)
+	    && legitimate_reload_constant_p (op))
+	  return GENERAL_REGS;
+	else if (reg_class_subset_p (ADDR_REGS, rclass)
+		 && legitimate_reload_constant_p (op))
+	  return ADDR_REGS;
+	else if (reg_class_subset_p (FP_REGS, rclass)
+		 && legitimate_reload_fp_constant_p (op))
+	  return FP_REGS;
+	return NO_REGS;
+
+      /* If a symbolic constant or a PLUS is reloaded,
+	 it is most likely being used as an address, so
+	 prefer ADDR_REGS.  If 'class' is not a superset
+	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
+      case LABEL_REF:
+      case SYMBOL_REF:
+      case CONST:
+	if (reg_class_subset_p (ADDR_REGS, rclass)
+	    && legitimate_reload_constant_p (op))
+          return ADDR_REGS;
+	else
+	  return NO_REGS;
+      case PLUS:
+	/* load address will be used for this reload.  */
+	if (reg_class_subset_p (ADDR_REGS, rclass))
+	  return ADDR_REGS;
+	else
+	  return NO_REGS;
+
+      default:
+	break;
+    }
+
+  return rclass;
+}
+
+/* Return true if ADDR is SYMBOL_REF + addend with addend being a
+   multiple of ALIGNMENT and the SYMBOL_REF being naturally
+   aligned.  */
+
+bool
+s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
+{
+  HOST_WIDE_INT addend;
+  rtx symref;
+
+  if (!s390_symref_operand_p (addr, &symref, &addend))
+    return false;
+
+  return (!SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)
+	  && !(addend & (alignment - 1)));
+}
+
+/* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
+   operand SCRATCH is used to reload the even part of the address and
+   adding one.  */
+
+void
+s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
+{
+  HOST_WIDE_INT addend;
+  rtx symref;
+
+  if (!s390_symref_operand_p (addr, &symref, &addend))
+    gcc_unreachable ();
+
+  if (!(addend & 1))
+    /* Easy case.  The addend is even so larl will do fine.  */
+    emit_move_insn (reg, addr);
+  else
+    {
+      /* We can leave the scratch register untouched if the target
+	 register is a valid base register.  */
+      if (REGNO (reg) < FIRST_PSEUDO_REGISTER
+	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
+	scratch = reg;
+
+      gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
+      gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
+
+      if (addend != 1)
+	emit_move_insn (scratch,
+			gen_rtx_CONST (Pmode,
+				       gen_rtx_PLUS (Pmode, symref,
+						     GEN_INT (addend - 1))));
+      else
+	emit_move_insn (scratch, symref);
+
+      /* Increment the address using la in order to avoid clobbering cc.  */
+      emit_move_insn (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
+    }
+}
+
+/* Generate what is necessary to move between REG and MEM using
+   SCRATCH.  The direction is given by TOMEM.  */
+
+void
+s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
+{
+  /* Reload might have pulled a constant out of the literal pool.
+     Force it back in.  */
+  if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
+      || GET_CODE (mem) == CONST)
+    mem = force_const_mem (GET_MODE (reg), mem);
+
+  gcc_assert (MEM_P (mem));
+
+  /* For a load from memory we can leave the scratch register
+     untouched if the target register is a valid base register.  */
+  if (!tomem
+      && REGNO (reg) < FIRST_PSEUDO_REGISTER
+      && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
+      && GET_MODE (reg) == GET_MODE (scratch))
+    scratch = reg;
+
+  /* Load address into scratch register.  Since we can't have a
+     secondary reload for a secondary reload we have to cover the case
+     where larl would need a secondary reload here as well.  */
+  s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
+
+  /* Now we can use a standard load/store to do the move.  */
+  if (tomem)
+    emit_move_insn (replace_equiv_address (mem, scratch), reg);
+  else
+    emit_move_insn (reg, replace_equiv_address (mem, scratch));
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.  */
+
+static reg_class_t
+s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		       enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Intermediate register needed.  */
+  if (reg_classes_intersect_p (CC_REGS, rclass))
+    return GENERAL_REGS;
+
+  if (TARGET_Z10)
+    {
+      HOST_WIDE_INT offset;
+      rtx symref;
+
+      /* On z10 several optimizer steps may generate larl operands with
+	 an odd addend.  */
+      if (in_p
+	  && s390_symref_operand_p (x, &symref, &offset)
+	  && mode == Pmode
+	  && !SYMBOL_REF_ALIGN1_P (symref)
+	  && (offset & 1) == 1)
+	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
+		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
+
+      /* On z10 we need a scratch register when moving QI, TI or floating
+	 point mode values from or to a memory location with a SYMBOL_REF
+	 or if the symref addend of a SI or DI move is not aligned to the
+	 width of the access.  */
+      if (MEM_P (x)
+	  && s390_symref_operand_p (XEXP (x, 0), NULL, NULL)
+	  && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
+	      || (!TARGET_ZARCH && mode == DImode)
+	      || ((mode == HImode || mode == SImode || mode == DImode)
+		  && (!s390_check_symref_alignment (XEXP (x, 0),
+						    GET_MODE_SIZE (mode))))))
+	{
+#define __SECONDARY_RELOAD_CASE(M,m)					\
+	  case M##mode:							\
+	    if (TARGET_64BIT)						\
+	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
+                                  CODE_FOR_reload##m##di_tomem_z10;	\
+	    else							\
+  	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
+                                  CODE_FOR_reload##m##si_tomem_z10;	\
+	  break;
+
+	  switch (GET_MODE (x))
+	    {
+	      __SECONDARY_RELOAD_CASE (QI, qi);
+	      __SECONDARY_RELOAD_CASE (HI, hi);
+	      __SECONDARY_RELOAD_CASE (SI, si);
+	      __SECONDARY_RELOAD_CASE (DI, di);
+	      __SECONDARY_RELOAD_CASE (TI, ti);
+	      __SECONDARY_RELOAD_CASE (SF, sf);
+	      __SECONDARY_RELOAD_CASE (DF, df);
+	      __SECONDARY_RELOAD_CASE (TF, tf);
+	      __SECONDARY_RELOAD_CASE (SD, sd);
+	      __SECONDARY_RELOAD_CASE (DD, dd);
+	      __SECONDARY_RELOAD_CASE (TD, td);
+
+	    default:
+	      gcc_unreachable ();
+	    }
+#undef __SECONDARY_RELOAD_CASE
+	}
+    }
+
+  /* We need a scratch register when loading a PLUS expression which
+     is not a legitimate operand of the LOAD ADDRESS instruction.  */
+  if (in_p && s390_plus_operand (x, mode))
+    sri->icode = (TARGET_64BIT ?
+		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
+
+  /* Performing a multiword move from or to memory we have to make sure the
+     second chunk in memory is addressable without causing a displacement
+     overflow.  If that would be the case we calculate the address in
+     a scratch register.  */
+  if (MEM_P (x)
+      && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
+			 + GET_MODE_SIZE (mode) - 1))
+    {
+      /* For GENERAL_REGS a displacement overflow is no problem if occurring
+	 in a s_operand address since we may fallback to lm/stm.  So we only
+	 have to care about overflows in the b+i+d case.  */
+      if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
+	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
+	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
+	  /* For FP_REGS no lm/stm is available so this check is triggered
+	     for displacement overflows in b+i+d and b+d like addresses.  */
+	  || (reg_classes_intersect_p (FP_REGS, rclass)
+	      && s390_class_max_nregs (FP_REGS, mode) > 1))
+	{
+	  if (in_p)
+	    sri->icode = (TARGET_64BIT ?
+			  CODE_FOR_reloaddi_nonoffmem_in :
+			  CODE_FOR_reloadsi_nonoffmem_in);
+	  else
+	    sri->icode = (TARGET_64BIT ?
+			  CODE_FOR_reloaddi_nonoffmem_out :
+			  CODE_FOR_reloadsi_nonoffmem_out);
+	}
+    }
+
+  /* A scratch address register is needed when a symbolic constant is
+     copied to r0 compiling with -fPIC.  In other cases the target
+     register might be used as temporary (see legitimize_pic_address).  */
+  if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
+    sri->icode = (TARGET_64BIT ?
+		  CODE_FOR_reloaddi_PIC_addr :
+		  CODE_FOR_reloadsi_PIC_addr);
+
+  /* Either scratch or no register needed.  */
+  return NO_REGS;
+}
+
+/* Generate code to load SRC, which is PLUS that is not a
+   legitimate operand for the LA instruction, into TARGET.
+   SCRATCH may be used as scratch register.  */
+
+void
+s390_expand_plus_operand (rtx target, rtx src,
+			  rtx scratch)
+{
+  rtx sum1, sum2;
+  struct s390_address ad;
+
+  /* src must be a PLUS; get its two operands.  */
+  gcc_assert (GET_CODE (src) == PLUS);
+  gcc_assert (GET_MODE (src) == Pmode);
+
+  /* Check if any of the two operands is already scheduled
+     for replacement by reload.  This can happen e.g. when
+     float registers occur in an address.  */
+  sum1 = find_replacement (&XEXP (src, 0));
+  sum2 = find_replacement (&XEXP (src, 1));
+  src = gen_rtx_PLUS (Pmode, sum1, sum2);
+
+  /* If the address is already strictly valid, there's nothing to do.  */
+  if (!s390_decompose_address (src, &ad)
+      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
+    {
+      /* Otherwise, one of the operands cannot be an address register;
+         we reload its value into the scratch register.  */
+      if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
+	{
+	  emit_move_insn (scratch, sum1);
+	  sum1 = scratch;
+	}
+      if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
+	{
+	  emit_move_insn (scratch, sum2);
+	  sum2 = scratch;
+	}
+
+      /* According to the way these invalid addresses are generated
+         in reload.c, it should never happen (at least on s390) that
+         *neither* of the PLUS components, after find_replacements
+         was applied, is an address register.  */
+      if (sum1 == scratch && sum2 == scratch)
+	{
+	  debug_rtx (src);
+	  gcc_unreachable ();
+	}
+
+      src = gen_rtx_PLUS (Pmode, sum1, sum2);
+    }
+
+  /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
+     is only ever performed on addresses, so we can mark the
+     sum as legitimate for LA in any case.  */
+  s390_load_address (target, src);
+}
+
+
+/* Return true if ADDR is a valid memory address.
+   STRICT specifies whether strict register checking applies.  */
+
+static bool
+s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  struct s390_address ad;
+
+  if (TARGET_Z10
+      && larl_operand (addr, VOIDmode)
+      && (mode == VOIDmode
+	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
+    return true;
+
+  if (!s390_decompose_address (addr, &ad))
+    return false;
+
+  if (strict)
+    {
+      if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	return false;
+
+      if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
+	return false;
+    }
+  else
+    {
+      if (ad.base
+	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
+	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
+	return false;
+
+      if (ad.indx
+	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
+	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
+	  return false;
+    }
+  return true;
+}
+
+/* Return true if OP is a valid operand for the LA instruction.
+   In 31-bit, we need to prove that the result is used as an
+   address, as LA performs only a 31-bit addition.  */
+
+bool
+legitimate_la_operand_p (rtx op)
+{
+  struct s390_address addr;
+  if (!s390_decompose_address (op, &addr))
+    return false;
+
+  return (TARGET_64BIT || addr.pointer);
+}
+
+/* Return true if it is valid *and* preferable to use LA to
+   compute the sum of OP1 and OP2.  */
+
+bool
+preferred_la_operand_p (rtx op1, rtx op2)
+{
+  struct s390_address addr;
+
+  if (op2 != const0_rtx)
+    op1 = gen_rtx_PLUS (Pmode, op1, op2);
+
+  if (!s390_decompose_address (op1, &addr))
+    return false;
+  if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
+    return false;
+  if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
+    return false;
+
+  /* Avoid LA instructions with index register on z196; it is
+     preferable to use regular add instructions when possible.  */
+  if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
+    return false;
+
+  if (!TARGET_64BIT && !addr.pointer)
+    return false;
+
+  if (addr.pointer)
+    return true;
+
+  if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
+      || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
+    return true;
+
+  return false;
+}
+
+/* Emit a forced load-address operation to load SRC into DST.
+   This will use the LOAD ADDRESS instruction even in situations
+   where legitimate_la_operand_p (SRC) returns false.  */
+
+void
+s390_load_address (rtx dst, rtx src)
+{
+  if (TARGET_64BIT)
+    emit_move_insn (dst, src);
+  else
+    emit_insn (gen_force_la_31 (dst, src));
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+   register REG.  If REG is 0, a new pseudo is generated.
+
+   There are two types of references that must be handled:
+
+   1. Global data references must load the address from the GOT, via
+      the PIC reg.  An insn is emitted to do this load, and the reg is
+      returned.
+
+   2. Static data references, constant pool addresses, and code labels
+      compute the address as an offset from the GOT, whose base is in
+      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
+      differentiate them from global data objects.  The returned
+      address is the PIC reg + an unspec constant.
+
+   TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
+   reg also appears in the address.  */
+
+rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+  rtx addr = orig;
+  rtx new_rtx = orig;
+  rtx base;
+
+  gcc_assert (!TLS_SYMBOLIC_CONST (addr));
+
+  if (GET_CODE (addr) == LABEL_REF
+      || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr)))
+    {
+      /* This is a local symbol.  */
+      if (TARGET_CPU_ZARCH && larl_operand (addr, VOIDmode))
+        {
+          /* Access local symbols PC-relative via LARL.
+             This is the same as in the non-PIC case, so it is
+             handled automatically ...  */
+        }
+      else
+        {
+          /* Access local symbols relative to the GOT.  */
+
+          rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+	  if (reload_in_progress || reload_completed)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+          addr = gen_rtx_CONST (Pmode, addr);
+          addr = force_const_mem (Pmode, addr);
+	  emit_move_insn (temp, addr);
+
+          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+          if (reg != 0)
+            {
+              s390_load_address (reg, new_rtx);
+              new_rtx = reg;
+            }
+        }
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      if (reg == 0)
+        reg = gen_reg_rtx (Pmode);
+
+      if (flag_pic == 1)
+        {
+          /* Assume GOT offset < 4k.  This is handled the same way
+             in both 31- and 64-bit code (@GOT).  */
+
+	  if (reload_in_progress || reload_completed)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+          new_rtx = gen_const_mem (Pmode, new_rtx);
+          emit_move_insn (reg, new_rtx);
+          new_rtx = reg;
+        }
+      else if (TARGET_CPU_ZARCH)
+        {
+          /* If the GOT offset might be >= 4k, we determine the position
+             of the GOT entry via a PC-relative LARL (@GOTENT).  */
+
+          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+
+	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
+		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
+
+          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
+          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+          emit_move_insn (temp, new_rtx);
+
+          new_rtx = gen_const_mem (Pmode, temp);
+          emit_move_insn (reg, new_rtx);
+          new_rtx = reg;
+        }
+      else
+        {
+          /* If the GOT offset might be >= 4k, we have to load it
+             from the literal pool (@GOT).  */
+
+          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+
+	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
+		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
+
+	  if (reload_in_progress || reload_completed)
+	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+          addr = gen_rtx_CONST (Pmode, addr);
+          addr = force_const_mem (Pmode, addr);
+          emit_move_insn (temp, addr);
+
+          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+          new_rtx = gen_const_mem (Pmode, new_rtx);
+          emit_move_insn (reg, new_rtx);
+          new_rtx = reg;
+        }
+    }
+  else
+    {
+      if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+	  if (GET_CODE (addr) == UNSPEC)
+	    {
+	      gcc_assert (XVECLEN (addr, 0) == 1);
+              switch (XINT (addr, 1))
+                {
+                  /* If someone moved a GOT-relative UNSPEC
+                     out of the literal pool, force them back in.  */
+                  case UNSPEC_GOTOFF:
+                  case UNSPEC_PLTOFF:
+                    new_rtx = force_const_mem (Pmode, orig);
+                    break;
+
+                  /* @GOT is OK as is if small.  */
+		  case UNSPEC_GOT:
+		    if (flag_pic == 2)
+		      new_rtx = force_const_mem (Pmode, orig);
+		    break;
+
+                  /* @GOTENT is OK as is.  */
+                  case UNSPEC_GOTENT:
+                    break;
+
+                  /* @PLT is OK as is on 64-bit, must be converted to
+                     GOT-relative @PLTOFF on 31-bit.  */
+                  case UNSPEC_PLT:
+                    if (!TARGET_CPU_ZARCH)
+                      {
+                        rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+			if (reload_in_progress || reload_completed)
+			  df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+                        addr = XVECEXP (addr, 0, 0);
+                        addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+					       UNSPEC_PLTOFF);
+                        addr = gen_rtx_CONST (Pmode, addr);
+                        addr = force_const_mem (Pmode, addr);
+	                emit_move_insn (temp, addr);
+
+                        new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+                        if (reg != 0)
+                          {
+                            s390_load_address (reg, new_rtx);
+                            new_rtx = reg;
+                          }
+                      }
+                    break;
+
+                  /* Everything else cannot happen.  */
+                  default:
+                    gcc_unreachable ();
+                }
+	    }
+	  else
+	    gcc_assert (GET_CODE (addr) == PLUS);
+	}
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+	  gcc_assert (!TLS_SYMBOLIC_CONST (op0));
+	  gcc_assert (!TLS_SYMBOLIC_CONST (op1));
+
+	  /* Check first to see if this is a constant offset
+             from a local symbol reference.  */
+	  if ((GET_CODE (op0) == LABEL_REF
+		|| (GET_CODE (op0) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (op0)))
+	      && GET_CODE (op1) == CONST_INT)
+	    {
+              if (TARGET_CPU_ZARCH
+		  && larl_operand (op0, VOIDmode)
+		  && INTVAL (op1) < (HOST_WIDE_INT)1 << 31
+		  && INTVAL (op1) >= -((HOST_WIDE_INT)1 << 31))
+                {
+                  if (INTVAL (op1) & 1)
+                    {
+                      /* LARL can't handle odd offsets, so emit a
+                         pair of LARL and LA.  */
+                      rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+                      if (!DISP_IN_RANGE (INTVAL (op1)))
+                        {
+                          HOST_WIDE_INT even = INTVAL (op1) - 1;
+                          op0 = gen_rtx_PLUS (Pmode, op0, GEN_INT (even));
+			  op0 = gen_rtx_CONST (Pmode, op0);
+                          op1 = const1_rtx;
+                        }
+
+                      emit_move_insn (temp, op0);
+                      new_rtx = gen_rtx_PLUS (Pmode, temp, op1);
+
+                      if (reg != 0)
+                        {
+                          s390_load_address (reg, new_rtx);
+                          new_rtx = reg;
+                        }
+                    }
+                  else
+                    {
+                      /* If the offset is even, we can just use LARL.
+                         This will happen automatically.  */
+                    }
+                }
+              else
+                {
+                  /* Access local symbols relative to the GOT.  */
+
+                  rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+		  if (reload_in_progress || reload_completed)
+		    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+                  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
+					 UNSPEC_GOTOFF);
+                  addr = gen_rtx_PLUS (Pmode, addr, op1);
+                  addr = gen_rtx_CONST (Pmode, addr);
+                  addr = force_const_mem (Pmode, addr);
+		  emit_move_insn (temp, addr);
+
+                  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+                  if (reg != 0)
+                    {
+                      s390_load_address (reg, new_rtx);
+                      new_rtx = reg;
+                    }
+                }
+	    }
+
+          /* Now, check whether it is a GOT relative symbol plus offset
+             that was pulled out of the literal pool.  Force it back in.  */
+
+	  else if (GET_CODE (op0) == UNSPEC
+	           && GET_CODE (op1) == CONST_INT
+	           && XINT (op0, 1) == UNSPEC_GOTOFF)
+            {
+	      gcc_assert (XVECLEN (op0, 0) == 1);
+
+              new_rtx = force_const_mem (Pmode, orig);
+            }
+
+          /* Otherwise, compute the sum.  */
+	  else
+	    {
+	      base = legitimize_pic_address (XEXP (addr, 0), reg);
+	      new_rtx  = legitimize_pic_address (XEXP (addr, 1),
+					     base == reg ? NULL_RTX : reg);
+	      if (GET_CODE (new_rtx) == CONST_INT)
+		new_rtx = plus_constant (base, INTVAL (new_rtx));
+	      else
+		{
+		  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
+		    {
+		      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+		      new_rtx = XEXP (new_rtx, 1);
+		    }
+		  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
+		}
+
+	      if (GET_CODE (new_rtx) == CONST)
+		new_rtx = XEXP (new_rtx, 0);
+              new_rtx = force_operand (new_rtx, 0);
+	    }
+	}
+    }
+  return new_rtx;
+}
+
+/* Load the thread pointer into a register.  */
+
+rtx
+s390_get_thread_pointer (void)
+{
+  rtx tp = gen_reg_rtx (Pmode);
+
+  emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
+  mark_reg_pointer (tp, BITS_PER_WORD);
+
+  return tp;
+}
+
+/* Emit a tls call insn. The call target is the SYMBOL_REF stored
+   in s390_tls_symbol which always refers to __tls_get_offset.
+   The returned offset is written to RESULT_REG and an USE rtx is
+   generated for TLS_CALL.  */
+
+static GTY(()) rtx s390_tls_symbol;
+
+static void
+s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
+{
+  rtx insn;
+
+  gcc_assert (flag_pic);
+
+  if (!s390_tls_symbol)
+    s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
+
+  insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
+			 gen_rtx_REG (Pmode, RETURN_REGNUM));
+
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
+  RTL_CONST_CALL_P (insn) = 1;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  REG may be used as temporary.  */
+
+static rtx
+legitimize_tls_address (rtx addr, rtx reg)
+{
+  rtx new_rtx, tls_call, temp, base, r2, insn;
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (tls_symbolic_operand (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	start_sequence ();
+	r2 = gen_rtx_REG (Pmode, 2);
+	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
+	new_rtx = gen_rtx_CONST (Pmode, tls_call);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+	emit_move_insn (r2, new_rtx);
+	s390_emit_tls_call_insn (r2, tls_call);
+	insn = get_insns ();
+	end_sequence ();
+
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+	temp = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, temp, r2, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	start_sequence ();
+	r2 = gen_rtx_REG (Pmode, 2);
+	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
+	new_rtx = gen_rtx_CONST (Pmode, tls_call);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+	emit_move_insn (r2, new_rtx);
+	s390_emit_tls_call_insn (r2, tls_call);
+	insn = get_insns ();
+	end_sequence ();
+
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
+	temp = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, temp, r2, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	base = gen_reg_rtx (Pmode);
+	s390_load_address (base, new_rtx);
+
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
+	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+	temp = gen_reg_rtx (Pmode);
+	emit_move_insn (temp, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	if (flag_pic == 1)
+	  {
+	    /* Assume GOT offset < 4k.  This is handled the same way
+	       in both 31- and 64-bit code.  */
+
+	    if (reload_in_progress || reload_completed)
+	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+	    new_rtx = gen_const_mem (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+	  }
+	else if (TARGET_CPU_ZARCH)
+	  {
+	    /* If the GOT offset might be >= 4k, we determine the position
+	       of the GOT entry via a PC-relative LARL.  */
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+
+	    new_rtx = gen_const_mem (Pmode, temp);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+	  }
+	else if (flag_pic)
+	  {
+	    /* If the GOT offset might be >= 4k, we have to load it
+	       from the literal pool.  */
+
+	    if (reload_in_progress || reload_completed)
+	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    new_rtx = force_const_mem (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+
+            new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+	    new_rtx = gen_const_mem (Pmode, new_rtx);
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
+	  }
+	else
+	  {
+	    /* In position-dependent code, load the absolute address of
+	       the GOT entry from the literal pool.  */
+
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	    new_rtx = force_const_mem (Pmode, new_rtx);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new_rtx);
+
+	    new_rtx = temp;
+	    new_rtx = gen_const_mem (Pmode, new_rtx);
+	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
+	  }
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	new_rtx = force_const_mem (Pmode, new_rtx);
+        temp = gen_reg_rtx (Pmode);
+	emit_move_insn (temp, new_rtx);
+
+	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new_rtx);
+	    new_rtx = reg;
+	  }
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
+    {
+      switch (XINT (XEXP (addr, 0), 1))
+	{
+	case UNSPEC_INDNTPOFF:
+	  gcc_assert (TARGET_CPU_ZARCH);
+	  new_rtx = addr;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
+	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+    {
+      new_rtx = XEXP (XEXP (addr, 0), 0);
+      if (GET_CODE (new_rtx) != SYMBOL_REF)
+	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+
+      new_rtx = legitimize_tls_address (new_rtx, reg);
+      new_rtx = plus_constant (new_rtx, INTVAL (XEXP (XEXP (addr, 0), 1)));
+      new_rtx = force_operand (new_rtx, 0);
+    }
+
+  else
+    gcc_unreachable ();  /* for now ... */
+
+  return new_rtx;
+}
+
+/* Emit insns making the address in operands[1] valid for a standard
+   move to operands[0].  operands[1] is replaced by an address which
+   should be used instead of the former RTX to emit the move
+   pattern.  */
+
+void
+emit_symbolic_move (rtx *operands)
+{
+  rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (GET_CODE (operands[0]) == MEM)
+    operands[1] = force_reg (Pmode, operands[1]);
+  else if (TLS_SYMBOLIC_CONST (operands[1]))
+    operands[1] = legitimize_tls_address (operands[1], temp);
+  else if (flag_pic)
+    operands[1] = legitimize_pic_address (operands[1], temp);
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address X
+   to be legitimate.  If we find one, return the new, valid address.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   MODE is the mode of the operand pointed to by X.
+
+   When -fpic is used, special handling is needed for symbolic references.
+   See comments by legitimize_pic_address for details.  */
+
+static rtx
+s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx constant_term = const0_rtx;
+
+  if (TLS_SYMBOLIC_CONST (x))
+    {
+      x = legitimize_tls_address (x, 0);
+
+      if (s390_legitimate_address_p (mode, x, FALSE))
+	return x;
+    }
+  else if (GET_CODE (x) == PLUS
+	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
+	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
+    {
+      return x;
+    }
+  else if (flag_pic)
+    {
+      if (SYMBOLIC_CONST (x)
+          || (GET_CODE (x) == PLUS
+              && (SYMBOLIC_CONST (XEXP (x, 0))
+                  || SYMBOLIC_CONST (XEXP (x, 1)))))
+	  x = legitimize_pic_address (x, 0);
+
+      if (s390_legitimate_address_p (mode, x, FALSE))
+	return x;
+    }
+
+  x = eliminate_constant_term (x, &constant_term);
+
+  /* Optimize loading of large displacements by splitting them
+     into the multiple of 4K and the rest; this allows the
+     former to be CSE'd if possible.
+
+     Don't do this if the displacement is added to a register
+     pointing into the stack frame, as the offsets will
+     change later anyway.  */
+
+  if (GET_CODE (constant_term) == CONST_INT
+      && !TARGET_LONG_DISPLACEMENT
+      && !DISP_IN_RANGE (INTVAL (constant_term))
+      && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
+    {
+      HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
+      HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
+
+      rtx temp = gen_reg_rtx (Pmode);
+      rtx val  = force_operand (GEN_INT (upper), temp);
+      if (val != temp)
+	emit_move_insn (temp, val);
+
+      x = gen_rtx_PLUS (Pmode, x, temp);
+      constant_term = GEN_INT (lower);
+    }
+
+  if (GET_CODE (x) == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 0)) == REG)
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 1), temp);
+	  if (val != temp)
+	    emit_move_insn (temp, val);
+
+	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
+	}
+
+      else if (GET_CODE (XEXP (x, 1)) == REG)
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx val  = force_operand (XEXP (x, 0), temp);
+	  if (val != temp)
+	    emit_move_insn (temp, val);
+
+	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
+	}
+    }
+
+  if (constant_term != const0_rtx)
+    x = gen_rtx_PLUS (Pmode, x, constant_term);
+
+  return x;
+}
+
+/* Try a machine-dependent way of reloading an illegitimate address AD
+   operand.  If we find one, push the reload and and return the new address.
+
+   MODE is the mode of the enclosing MEM.  OPNUM is the operand number
+   and TYPE is the reload type of the current reload.  */
+
+rtx
+legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
+			   int opnum, int type)
+{
+  if (!optimize || TARGET_LONG_DISPLACEMENT)
+    return NULL_RTX;
+
+  if (GET_CODE (ad) == PLUS)
+    {
+      rtx tem = simplify_binary_operation (PLUS, Pmode,
+					   XEXP (ad, 0), XEXP (ad, 1));
+      if (tem)
+	ad = tem;
+    }
+
+  if (GET_CODE (ad) == PLUS
+      && GET_CODE (XEXP (ad, 0)) == REG
+      && GET_CODE (XEXP (ad, 1)) == CONST_INT
+      && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
+    {
+      HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
+      HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
+      rtx cst, tem, new_rtx;
+
+      cst = GEN_INT (upper);
+      if (!legitimate_reload_constant_p (cst))
+	cst = force_const_mem (Pmode, cst);
+
+      tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
+      new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
+
+      push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+      return new_rtx;
+    }
+
+  return NULL_RTX;
+}
+
+/* Emit code to move LEN bytes from DST to SRC.  */
+
+void
+s390_expand_movmem (rtx dst, rtx src, rtx len)
+{
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+    {
+      if (INTVAL (len) > 0)
+        emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
+    }
+
+  else if (TARGET_MVCLE)
+    {
+      emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
+    }
+
+  else
+    {
+      rtx dst_addr, src_addr, count, blocks, temp;
+      rtx loop_start_label = gen_label_rtx ();
+      rtx loop_end_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+      enum machine_mode mode;
+
+      mode = GET_MODE (len);
+      if (mode == VOIDmode)
+        mode = Pmode;
+
+      dst_addr = gen_reg_rtx (Pmode);
+      src_addr = gen_reg_rtx (Pmode);
+      count = gen_reg_rtx (mode);
+      blocks = gen_reg_rtx (mode);
+
+      convert_move (count, len, 1);
+      emit_cmp_and_jump_insns (count, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, end_label);
+
+      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
+      emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
+      dst = change_address (dst, VOIDmode, dst_addr);
+      src = change_address (src, VOIDmode, src_addr);
+
+      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+			   OPTAB_DIRECT);
+      if (temp != count)
+        emit_move_insn (count, temp);
+
+      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_label (loop_start_label);
+
+      if (TARGET_Z10
+	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
+	{
+	  rtx prefetch;
+
+	  /* Issue a read prefetch for the +3 cache line.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
+				   const0_rtx, const0_rtx);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	  emit_insn (prefetch);
+
+	  /* Issue a write prefetch for the +3 cache line.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
+				   const1_rtx, const0_rtx);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	  emit_insn (prefetch);
+	}
+
+      emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
+      s390_load_address (dst_addr,
+			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
+      s390_load_address (src_addr,
+			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
+
+      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_jump (loop_start_label);
+      emit_label (loop_end_label);
+
+      emit_insn (gen_movmem_short (dst, src,
+				   convert_to_mode (Pmode, count, 1)));
+      emit_label (end_label);
+    }
+}
+
+/* Emit code to set LEN bytes at DST to VAL.
+   Make use of clrmem if VAL is zero.  */
+
+void
+s390_expand_setmem (rtx dst, rtx len, rtx val)
+{
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
+    return;
+
+  gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
+
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
+    {
+      if (val == const0_rtx && INTVAL (len) <= 256)
+        emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
+      else
+	{
+	  /* Initialize memory by storing the first byte.  */
+	  emit_move_insn (adjust_address (dst, QImode, 0), val);
+
+	  if (INTVAL (len) > 1)
+	    {
+	      /* Initiate 1 byte overlap move.
+	         The first byte of DST is propagated through DSTP1.
+		 Prepare a movmem for:  DST+1 = DST (length = LEN - 1).
+		 DST is set to size 1 so the rest of the memory location
+		 does not count as source operand.  */
+	      rtx dstp1 = adjust_address (dst, VOIDmode, 1);
+	      set_mem_size (dst, const1_rtx);
+
+	      emit_insn (gen_movmem_short (dstp1, dst,
+					   GEN_INT (INTVAL (len) - 2)));
+	    }
+	}
+    }
+
+  else if (TARGET_MVCLE)
+    {
+      val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
+      emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
+    }
+
+  else
+    {
+      rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
+      rtx loop_start_label = gen_label_rtx ();
+      rtx loop_end_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+      enum machine_mode mode;
+
+      mode = GET_MODE (len);
+      if (mode == VOIDmode)
+        mode = Pmode;
+
+      dst_addr = gen_reg_rtx (Pmode);
+      count = gen_reg_rtx (mode);
+      blocks = gen_reg_rtx (mode);
+
+      convert_move (count, len, 1);
+      emit_cmp_and_jump_insns (count, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, end_label);
+
+      emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
+      dst = change_address (dst, VOIDmode, dst_addr);
+
+      if (val == const0_rtx)
+        temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+			     OPTAB_DIRECT);
+      else
+	{
+	  dstp1 = adjust_address (dst, VOIDmode, 1);
+	  set_mem_size (dst, const1_rtx);
+
+	  /* Initialize memory by storing the first byte.  */
+	  emit_move_insn (adjust_address (dst, QImode, 0), val);
+
+	  /* If count is 1 we are done.  */
+	  emit_cmp_and_jump_insns (count, const1_rtx,
+				   EQ, NULL_RTX, mode, 1, end_label);
+
+	  temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
+			       OPTAB_DIRECT);
+	}
+      if (temp != count)
+        emit_move_insn (count, temp);
+
+      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_label (loop_start_label);
+
+      if (TARGET_Z10
+	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
+	{
+	  /* Issue a write prefetch for the +4 cache line.  */
+	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
+						     GEN_INT (1024)),
+				       const1_rtx, const0_rtx);
+	  emit_insn (prefetch);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	}
+
+      if (val == const0_rtx)
+	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
+      else
+	emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
+      s390_load_address (dst_addr,
+			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
+
+      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_jump (loop_start_label);
+      emit_label (loop_end_label);
+
+      if (val == const0_rtx)
+        emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
+      else
+        emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
+      emit_label (end_label);
+    }
+}
+
+/* Emit code to compare LEN bytes at OP0 with those at OP1,
+   and return the result in TARGET.  */
+
+void
+s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
+{
+  rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
+  rtx tmp;
+
+  /* As the result of CMPINT is inverted compared to what we need,
+     we have to swap the operands.  */
+  tmp = op0; op0 = op1; op1 = tmp;
+
+  if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
+    {
+      if (INTVAL (len) > 0)
+        {
+          emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
+          emit_insn (gen_cmpint (target, ccreg));
+        }
+      else
+        emit_move_insn (target, const0_rtx);
+    }
+  else if (TARGET_MVCLE)
+    {
+      emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
+      emit_insn (gen_cmpint (target, ccreg));
+    }
+  else
+    {
+      rtx addr0, addr1, count, blocks, temp;
+      rtx loop_start_label = gen_label_rtx ();
+      rtx loop_end_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+      enum machine_mode mode;
+
+      mode = GET_MODE (len);
+      if (mode == VOIDmode)
+        mode = Pmode;
+
+      addr0 = gen_reg_rtx (Pmode);
+      addr1 = gen_reg_rtx (Pmode);
+      count = gen_reg_rtx (mode);
+      blocks = gen_reg_rtx (mode);
+
+      convert_move (count, len, 1);
+      emit_cmp_and_jump_insns (count, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, end_label);
+
+      emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
+      emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
+      op0 = change_address (op0, VOIDmode, addr0);
+      op1 = change_address (op1, VOIDmode, addr1);
+
+      temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
+			   OPTAB_DIRECT);
+      if (temp != count)
+        emit_move_insn (count, temp);
+
+      temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_label (loop_start_label);
+
+      if (TARGET_Z10
+	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
+	{
+	  rtx prefetch;
+
+	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
+				   const0_rtx, const0_rtx);
+	  emit_insn (prefetch);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+
+	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
+	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
+				   const0_rtx, const0_rtx);
+	  emit_insn (prefetch);
+	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
+	}
+
+      emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
+      temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
+      temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+      emit_jump_insn (temp);
+
+      s390_load_address (addr0,
+			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
+      s390_load_address (addr1,
+			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
+
+      temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
+			   OPTAB_DIRECT);
+      if (temp != blocks)
+        emit_move_insn (blocks, temp);
+
+      emit_cmp_and_jump_insns (blocks, const0_rtx,
+			       EQ, NULL_RTX, mode, 1, loop_end_label);
+
+      emit_jump (loop_start_label);
+      emit_label (loop_end_label);
+
+      emit_insn (gen_cmpmem_short (op0, op1,
+				   convert_to_mode (Pmode, count, 1)));
+      emit_label (end_label);
+
+      emit_insn (gen_cmpint (target, ccreg));
+    }
+}
+
+
+/* Expand conditional increment or decrement using alc/slb instructions.
+   Should generate code setting DST to either SRC or SRC + INCREMENT,
+   depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
+   Returns true if successful, false otherwise.
+
+   That makes it possible to implement some if-constructs without jumps e.g.:
+   (borrow = CC0 | CC1 and carry = CC2 | CC3)
+   unsigned int a, b, c;
+   if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
+   if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
+   if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
+   if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
+
+   Checks for EQ and NE with a nonzero value need an additional xor e.g.:
+   if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
+   if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
+   if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
+   if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
+
+bool
+s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
+		   rtx dst, rtx src, rtx increment)
+{
+  enum machine_mode cmp_mode;
+  enum machine_mode cc_mode;
+  rtx op_res;
+  rtx insn;
+  rtvec p;
+  int ret;
+
+  if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
+      && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
+    cmp_mode = SImode;
+  else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
+	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
+    cmp_mode = DImode;
+  else
+    return false;
+
+  /* Try ADD LOGICAL WITH CARRY.  */
+  if (increment == const1_rtx)
+    {
+      /* Determine CC mode to use.  */
+      if (cmp_code == EQ || cmp_code == NE)
+	{
+	  if (cmp_op1 != const0_rtx)
+	    {
+	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
+					     NULL_RTX, 0, OPTAB_WIDEN);
+	      cmp_op1 = const0_rtx;
+	    }
+
+	  cmp_code = cmp_code == EQ ? LEU : GTU;
+	}
+
+      if (cmp_code == LTU || cmp_code == LEU)
+	{
+	  rtx tem = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tem;
+	  cmp_code = swap_condition (cmp_code);
+	}
+
+      switch (cmp_code)
+	{
+	  case GTU:
+	    cc_mode = CCUmode;
+	    break;
+
+	  case GEU:
+	    cc_mode = CCL3mode;
+	    break;
+
+	  default:
+	    return false;
+	}
+
+      /* Emit comparison instruction pattern. */
+      if (!register_operand (cmp_op0, cmp_mode))
+	cmp_op0 = force_reg (cmp_mode, cmp_op0);
+
+      insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
+      /* We use insn_invalid_p here to add clobbers if required.  */
+      ret = insn_invalid_p (emit_insn (insn));
+      gcc_assert (!ret);
+
+      /* Emit ALC instruction pattern.  */
+      op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
+			       gen_rtx_REG (cc_mode, CC_REGNUM),
+			       const0_rtx);
+
+      if (src != const0_rtx)
+	{
+	  if (!register_operand (src, GET_MODE (dst)))
+	    src = force_reg (GET_MODE (dst), src);
+
+	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
+	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
+	}
+
+      p = rtvec_alloc (2);
+      RTVEC_ELT (p, 0) =
+        gen_rtx_SET (VOIDmode, dst, op_res);
+      RTVEC_ELT (p, 1) =
+	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+      return true;
+    }
+
+  /* Try SUBTRACT LOGICAL WITH BORROW.  */
+  if (increment == constm1_rtx)
+    {
+      /* Determine CC mode to use.  */
+      if (cmp_code == EQ || cmp_code == NE)
+	{
+	  if (cmp_op1 != const0_rtx)
+	    {
+	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
+					     NULL_RTX, 0, OPTAB_WIDEN);
+	      cmp_op1 = const0_rtx;
+	    }
+
+	  cmp_code = cmp_code == EQ ? LEU : GTU;
+	}
+
+      if (cmp_code == GTU || cmp_code == GEU)
+	{
+	  rtx tem = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tem;
+	  cmp_code = swap_condition (cmp_code);
+	}
+
+      switch (cmp_code)
+	{
+	  case LEU:
+	    cc_mode = CCUmode;
+	    break;
+
+	  case LTU:
+	    cc_mode = CCL3mode;
+	    break;
+
+	  default:
+	    return false;
+	}
+
+      /* Emit comparison instruction pattern. */
+      if (!register_operand (cmp_op0, cmp_mode))
+	cmp_op0 = force_reg (cmp_mode, cmp_op0);
+
+      insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
+			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
+      /* We use insn_invalid_p here to add clobbers if required.  */
+      ret = insn_invalid_p (emit_insn (insn));
+      gcc_assert (!ret);
+
+      /* Emit SLB instruction pattern.  */
+      if (!register_operand (src, GET_MODE (dst)))
+	src = force_reg (GET_MODE (dst), src);
+
+      op_res = gen_rtx_MINUS (GET_MODE (dst),
+			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
+			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
+					      gen_rtx_REG (cc_mode, CC_REGNUM),
+					      const0_rtx));
+      p = rtvec_alloc (2);
+      RTVEC_ELT (p, 0) =
+        gen_rtx_SET (VOIDmode, dst, op_res);
+      RTVEC_ELT (p, 1) =
+	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+      return true;
+    }
+
+  return false;
+}
+
+/* Expand code for the insv template. Return true if successful.  */
+
+bool
+s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
+{
+  int bitsize = INTVAL (op1);
+  int bitpos = INTVAL (op2);
+
+  /* On z10 we can use the risbg instruction to implement insv.  */
+  if (TARGET_Z10
+      && ((GET_MODE (dest) == DImode && GET_MODE (src) == DImode)
+	  || (GET_MODE (dest) == SImode && GET_MODE (src) == SImode)))
+    {
+      rtx op;
+      rtx clobber;
+
+      op = gen_rtx_SET (GET_MODE(src),
+			gen_rtx_ZERO_EXTRACT (GET_MODE (dest), dest, op1, op2),
+			src);
+      clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
+
+      return true;
+    }
+
+  /* We need byte alignment.  */
+  if (bitsize % BITS_PER_UNIT)
+    return false;
+
+  if (bitpos == 0
+      && memory_operand (dest, VOIDmode)
+      && (register_operand (src, word_mode)
+	  || const_int_operand (src, VOIDmode)))
+    {
+      /* Emit standard pattern if possible.  */
+      enum machine_mode mode = smallest_mode_for_size (bitsize, MODE_INT);
+      if (GET_MODE_BITSIZE (mode) == bitsize)
+	emit_move_insn (adjust_address (dest, mode, 0), gen_lowpart (mode, src));
+
+      /* (set (ze (mem)) (const_int)).  */
+      else if (const_int_operand (src, VOIDmode))
+	{
+	  int size = bitsize / BITS_PER_UNIT;
+	  rtx src_mem = adjust_address (force_const_mem (word_mode, src), BLKmode,
+					GET_MODE_SIZE (word_mode) - size);
+
+	  dest = adjust_address (dest, BLKmode, 0);
+	  set_mem_size (dest, GEN_INT (size));
+	  s390_expand_movmem (dest, src_mem, GEN_INT (size));
+	}
+
+      /* (set (ze (mem)) (reg)).  */
+      else if (register_operand (src, word_mode))
+	{
+	  if (bitsize <= GET_MODE_BITSIZE (SImode))
+	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
+						  const0_rtx), src);
+	  else
+	    {
+	      /* Emit st,stcmh sequence.  */
+	      int stcmh_width = bitsize - GET_MODE_BITSIZE (SImode);
+	      int size = stcmh_width / BITS_PER_UNIT;
+
+	      emit_move_insn (adjust_address (dest, SImode, size),
+			      gen_lowpart (SImode, src));
+	      set_mem_size (dest, GEN_INT (size));
+	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, GEN_INT
+						    (stcmh_width), const0_rtx),
+			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT
+						(GET_MODE_BITSIZE (SImode))));
+	    }
+	}
+      else
+	return false;
+
+      return true;
+    }
+
+  /* (set (ze (reg)) (const_int)).  */
+  if (TARGET_ZARCH
+      && register_operand (dest, word_mode)
+      && (bitpos % 16) == 0
+      && (bitsize % 16) == 0
+      && const_int_operand (src, VOIDmode))
+    {
+      HOST_WIDE_INT val = INTVAL (src);
+      int regpos = bitpos + bitsize;
+
+      while (regpos > bitpos)
+	{
+	  enum machine_mode putmode;
+	  int putsize;
+
+	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
+	    putmode = SImode;
+	  else
+	    putmode = HImode;
+
+	  putsize = GET_MODE_BITSIZE (putmode);
+	  regpos -= putsize;
+	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
+						GEN_INT (putsize),
+						GEN_INT (regpos)),
+			  gen_int_mode (val, putmode));
+	  val >>= putsize;
+	}
+      gcc_assert (regpos == bitpos);
+      return true;
+    }
+
+  return false;
+}
+
+/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
+   register that holds VAL of mode MODE shifted by COUNT bits.  */
+
+static inline rtx
+s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
+{
+  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
+			     NULL_RTX, 1, OPTAB_DIRECT);
+  return expand_simple_binop (SImode, ASHIFT, val, count,
+			      NULL_RTX, 1, OPTAB_DIRECT);
+}
+
+/* Structure to hold the initial parameters for a compare_and_swap operation
+   in HImode and QImode.  */
+
+struct alignment_context
+{
+  rtx memsi;	  /* SI aligned memory location.  */
+  rtx shift;	  /* Bit offset with regard to lsb.  */
+  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
+  rtx modemaski;  /* ~modemask */
+  bool aligned;	  /* True if memory is aligned, false else.  */
+};
+
+/* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
+   structure AC for transparent simplifying, if the memory alignment is known
+   to be at least 32bit.  MEM is the memory location for the actual operation
+   and MODE its mode.  */
+
+static void
+init_alignment_context (struct alignment_context *ac, rtx mem,
+			enum machine_mode mode)
+{
+  ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
+  ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
+
+  if (ac->aligned)
+    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
+  else
+    {
+      /* Alignment is unknown.  */
+      rtx byteoffset, addr, align;
+
+      /* Force the address into a register.  */
+      addr = force_reg (Pmode, XEXP (mem, 0));
+
+      /* Align it to SImode.  */
+      align = expand_simple_binop (Pmode, AND, addr,
+				   GEN_INT (-GET_MODE_SIZE (SImode)),
+				   NULL_RTX, 1, OPTAB_DIRECT);
+      /* Generate MEM.  */
+      ac->memsi = gen_rtx_MEM (SImode, align);
+      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
+      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
+      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
+
+      /* Calculate shiftcount.  */
+      byteoffset = expand_simple_binop (Pmode, AND, addr,
+					GEN_INT (GET_MODE_SIZE (SImode) - 1),
+					NULL_RTX, 1, OPTAB_DIRECT);
+      /* As we already have some offset, evaluate the remaining distance.  */
+      ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
+				      NULL_RTX, 1, OPTAB_DIRECT);
+
+    }
+  /* Shift is the byte count, but we need the bitcount.  */
+  ac->shift = expand_simple_binop (SImode, MULT, ac->shift, GEN_INT (BITS_PER_UNIT),
+				  NULL_RTX, 1, OPTAB_DIRECT);
+  /* Calculate masks.  */
+  ac->modemask = expand_simple_binop (SImode, ASHIFT,
+				     GEN_INT (GET_MODE_MASK (mode)), ac->shift,
+				     NULL_RTX, 1, OPTAB_DIRECT);
+  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
+}
+
+/* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
+   the memory location, CMP the old value to compare MEM with and NEW_RTX the value
+   to set if CMP == MEM.
+   CMP is never in memory for compare_and_swap_cc because
+   expand_bool_compare_and_swap puts it into a register for later compare.  */
+
+void
+s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new_rtx)
+{
+  struct alignment_context ac;
+  rtx cmpv, newv, val, resv, cc;
+  rtx res = gen_reg_rtx (SImode);
+  rtx csloop = gen_label_rtx ();
+  rtx csend = gen_label_rtx ();
+
+  gcc_assert (register_operand (target, VOIDmode));
+  gcc_assert (MEM_P (mem));
+
+  init_alignment_context (&ac, mem, mode);
+
+  /* Shift the values to the correct bit positions.  */
+  if (!(ac.aligned && MEM_P (cmp)))
+    cmp = s390_expand_mask_and_shift (cmp, mode, ac.shift);
+  if (!(ac.aligned && MEM_P (new_rtx)))
+    new_rtx = s390_expand_mask_and_shift (new_rtx, mode, ac.shift);
+
+  /* Load full word.  Subsequent loads are performed by CS.  */
+  val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
+			     NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Start CS loop.  */
+  emit_label (csloop);
+  /* val = "<mem>00..0<mem>"
+   * cmp = "00..0<cmp>00..0"
+   * new = "00..0<new>00..0"
+   */
+
+  /* Patch cmp and new with val at correct position.  */
+  if (ac.aligned && MEM_P (cmp))
+    {
+      cmpv = force_reg (SImode, val);
+      store_bit_field (cmpv, GET_MODE_BITSIZE (mode), 0, SImode, cmp);
+    }
+  else
+    cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
+						   NULL_RTX, 1, OPTAB_DIRECT));
+  if (ac.aligned && MEM_P (new_rtx))
+    {
+      newv = force_reg (SImode, val);
+      store_bit_field (newv, GET_MODE_BITSIZE (mode), 0, SImode, new_rtx);
+    }
+  else
+    newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
+						   NULL_RTX, 1, OPTAB_DIRECT));
+
+  /* Jump to end if we're done (likely?).  */
+  s390_emit_jump (csend, s390_emit_compare_and_swap (EQ, res, ac.memsi,
+						     cmpv, newv));
+
+  /* Check for changes outside mode.  */
+  resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
+			      NULL_RTX, 1, OPTAB_DIRECT);
+  cc = s390_emit_compare (NE, resv, val);
+  emit_move_insn (val, resv);
+  /* Loop internal if so.  */
+  s390_emit_jump (csloop, cc);
+
+  emit_label (csend);
+
+  /* Return the correct part of the bitfield.  */
+  convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+					     NULL_RTX, 1, OPTAB_DIRECT), 1);
+}
+
+/* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
+   and VAL the value to play with.  If AFTER is true then store the value
+   MEM holds after the operation, if AFTER is false then store the value MEM
+   holds before the operation.  If TARGET is zero then discard that value, else
+   store it to TARGET.  */
+
+void
+s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
+		    rtx target, rtx mem, rtx val, bool after)
+{
+  struct alignment_context ac;
+  rtx cmp;
+  rtx new_rtx = gen_reg_rtx (SImode);
+  rtx orig = gen_reg_rtx (SImode);
+  rtx csloop = gen_label_rtx ();
+
+  gcc_assert (!target || register_operand (target, VOIDmode));
+  gcc_assert (MEM_P (mem));
+
+  init_alignment_context (&ac, mem, mode);
+
+  /* Shift val to the correct bit positions.
+     Preserve "icm", but prevent "ex icm".  */
+  if (!(ac.aligned && code == SET && MEM_P (val)))
+    val = s390_expand_mask_and_shift (val, mode, ac.shift);
+
+  /* Further preparation insns.  */
+  if (code == PLUS || code == MINUS)
+    emit_move_insn (orig, val);
+  else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
+    val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
+			       NULL_RTX, 1, OPTAB_DIRECT);
+
+  /* Load full word.  Subsequent loads are performed by CS.  */
+  cmp = force_reg (SImode, ac.memsi);
+
+  /* Start CS loop.  */
+  emit_label (csloop);
+  emit_move_insn (new_rtx, cmp);
+
+  /* Patch new with val at correct position.  */
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      val = expand_simple_binop (SImode, code, new_rtx, orig,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      val = expand_simple_binop (SImode, AND, val, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* FALLTHRU */
+    case SET:
+      if (ac.aligned && MEM_P (val))
+	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0, SImode, val);
+      else
+	{
+	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
+				     NULL_RTX, 1, OPTAB_DIRECT);
+	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
+				     NULL_RTX, 1, OPTAB_DIRECT);
+	}
+      break;
+    case AND:
+    case IOR:
+    case XOR:
+      new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+    case MULT: /* NAND */
+      new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
+						      ac.memsi, cmp, new_rtx));
+
+  /* Return the correct part of the bitfield.  */
+  if (target)
+    convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
+					       after ? new_rtx : cmp, ac.shift,
+					       NULL_RTX, 1, OPTAB_DIRECT), 1);
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+
+static void
+s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.long\t", file);
+      break;
+    case 8:
+      fputs ("\t.quad\t", file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("@DTPOFF", file);
+}
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+s390_mangle_type (const_tree type)
+{
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+s390_delegitimize_address (rtx orig_x)
+{
+  rtx x, y;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+  x = orig_x;
+  if (GET_CODE (x) != MEM)
+    return orig_x;
+
+  x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && GET_CODE (XEXP (x, 0)) == REG
+      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+    {
+      y = XEXP (XEXP (x, 1), 0);
+      if (GET_CODE (y) == UNSPEC
+	  && XINT (y, 1) == UNSPEC_GOT)
+	y = XVECEXP (y, 0, 0);
+      else
+	return orig_x;
+    }
+  else if (GET_CODE (x) == CONST)
+    {
+      y = XEXP (x, 0);
+      if (GET_CODE (y) == UNSPEC
+	  && XINT (y, 1) == UNSPEC_GOTENT)
+	y = XVECEXP (y, 0, 0);
+      else
+	return orig_x;
+    }
+  else
+    return orig_x;
+
+  if (GET_MODE (orig_x) != Pmode)
+    {
+      if (GET_MODE (orig_x) == BLKmode)
+	return orig_x;
+      y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
+      if (y == NULL_RTX)
+	return orig_x;
+    }
+  return y;
+}
+
+/* Output operand OP to stdio stream FILE.
+   OP is an address (register + offset) which is not used to address data;
+   instead the rightmost bits are interpreted as the value.  */
+
+static void
+print_shift_count_operand (FILE *file, rtx op)
+{
+  HOST_WIDE_INT offset;
+  rtx base;
+
+  /* Extract base register and offset.  */
+  if (!s390_decompose_shift_count (op, &base, &offset))
+    gcc_unreachable ();
+
+  /* Sanity check.  */
+  if (base)
+    {
+      gcc_assert (GET_CODE (base) == REG);
+      gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
+      gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
+    }
+
+  /* Offsets are constricted to twelve bits.  */
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
+  if (base)
+    fprintf (file, "(%s)", reg_names[REGNO (base)]);
+}
+
+/* See 'get_some_local_dynamic_name'.  */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+    {
+      x = get_pool_constant (x);
+      return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in local-dynamic base patterns.  */
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+        && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+/* Output machine-dependent UNSPECs occurring in address constant X
+   in assembler syntax to stdio stream FILE.  Returns true if the
+   constant X could be recognized, false otherwise.  */
+
+static bool
+s390_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
+    switch (XINT (x, 1))
+      {
+      case UNSPEC_GOTENT:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOTENT");
+	return true;
+      case UNSPEC_GOT:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOT");
+	return true;
+      case UNSPEC_GOTOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOTOFF");
+	return true;
+      case UNSPEC_PLT:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@PLT");
+	return true;
+      case UNSPEC_PLTOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@PLTOFF");
+	return true;
+      case UNSPEC_TLSGD:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@TLSGD");
+	return true;
+      case UNSPEC_TLSLDM:
+	assemble_name (file, get_some_local_dynamic_name ());
+	fprintf (file, "@TLSLDM");
+	return true;
+      case UNSPEC_DTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@DTPOFF");
+	return true;
+      case UNSPEC_NTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@NTPOFF");
+	return true;
+      case UNSPEC_GOTNTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@GOTNTPOFF");
+	return true;
+      case UNSPEC_INDNTPOFF:
+	output_addr_const (file, XVECEXP (x, 0, 0));
+	fprintf (file, "@INDNTPOFF");
+	return true;
+      }
+
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
+    switch (XINT (x, 1))
+      {
+      case UNSPEC_POOL_OFFSET:
+	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
+	output_addr_const (file, x);
+	return true;
+      }
+  return false;
+}
+
+/* Output address operand ADDR in assembler syntax to
+   stdio stream FILE.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  struct s390_address ad;
+
+  if (s390_symref_operand_p (addr, NULL, NULL))
+    {
+      if (!TARGET_Z10)
+	{
+	  output_operand_lossage ("symbolic memory references are "
+				  "only supported on z10 or later");
+	  return;
+	}
+      output_addr_const (file, addr);
+      return;
+    }
+
+  if (!s390_decompose_address (addr, &ad)
+      || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+      || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
+    output_operand_lossage ("cannot decompose address");
+
+  if (ad.disp)
+    output_addr_const (file, ad.disp);
+  else
+    fprintf (file, "0");
+
+  if (ad.base && ad.indx)
+    fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
+                              reg_names[REGNO (ad.base)]);
+  else if (ad.base)
+    fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
+}
+
+/* Output operand X in assembler syntax to stdio stream FILE.
+   CODE specified the format flag.  The following format flags
+   are recognized:
+
+    'C': print opcode suffix for branch condition.
+    'D': print opcode suffix for inverse branch condition.
+    'E': print opcode suffix for branch on index instruction.
+    'J': print tls_load/tls_gdcall/tls_ldcall suffix
+    'G': print the size of the operand in bytes.
+    'O': print only the displacement of a memory reference.
+    'R': print only the base register of a memory reference.
+    'S': print S-type memory reference (base+displacement).
+    'N': print the second word of a DImode operand.
+    'M': print the second word of a TImode operand.
+    'Y': print shift count operand.
+
+    'b': print integer X as if it's an unsigned byte.
+    'c': print integer X as if it's an signed byte.
+    'x': print integer X as if it's an unsigned halfword.
+    'h': print integer X as if it's a signed halfword.
+    'i': print the first nonzero HImode part of X.
+    'j': print the first HImode part unequal to -1 of X.
+    'k': print the first nonzero SImode part of X.
+    'm': print the first SImode part unequal to -1 of X.
+    'o': print integer X as if it's an unsigned 32bit word.  */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'C':
+      fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
+      return;
+
+    case 'D':
+      fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
+      return;
+
+    case 'E':
+      if (GET_CODE (x) == LE)
+	fprintf (file, "l");
+      else if (GET_CODE (x) == GT)
+	fprintf (file, "h");
+      else
+	output_operand_lossage ("invalid comparison operator "
+				"for 'E' output modifier");
+      return;
+
+    case 'J':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  fprintf (file, "%s", ":tls_load:");
+	  output_addr_const (file, x);
+	}
+      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
+	{
+	  fprintf (file, "%s", ":tls_gdcall:");
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	}
+      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
+	{
+	  fprintf (file, "%s", ":tls_ldcall:");
+	  assemble_name (file, get_some_local_dynamic_name ());
+	}
+      else
+	output_operand_lossage ("invalid reference for 'J' output modifier");
+      return;
+
+    case 'G':
+      fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
+      return;
+
+    case 'O':
+      {
+        struct s390_address ad;
+	int ret;
+
+	if (!MEM_P (x))
+	  {
+	    output_operand_lossage ("memory reference expected for "
+				    "'O' output modifier");
+	    return;
+	  }
+
+	ret = s390_decompose_address (XEXP (x, 0), &ad);
+
+	if (!ret
+	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	    || ad.indx)
+	  {
+	    output_operand_lossage ("invalid address for 'O' output modifier");
+	    return;
+	  }
+
+        if (ad.disp)
+          output_addr_const (file, ad.disp);
+        else
+          fprintf (file, "0");
+      }
+      return;
+
+    case 'R':
+      {
+        struct s390_address ad;
+	int ret;
+
+	if (!MEM_P (x))
+	  {
+	    output_operand_lossage ("memory reference expected for "
+				    "'R' output modifier");
+	    return;
+	  }
+
+	ret = s390_decompose_address (XEXP (x, 0), &ad);
+
+	if (!ret
+	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	    || ad.indx)
+	  {
+	    output_operand_lossage ("invalid address for 'R' output modifier");
+	    return;
+	  }
+
+        if (ad.base)
+          fprintf (file, "%s", reg_names[REGNO (ad.base)]);
+        else
+          fprintf (file, "0");
+      }
+      return;
+
+    case 'S':
+      {
+	struct s390_address ad;
+	int ret;
+
+	if (!MEM_P (x))
+	  {
+	    output_operand_lossage ("memory reference expected for "
+				    "'S' output modifier");
+	    return;
+	  }
+	ret = s390_decompose_address (XEXP (x, 0), &ad);
+
+	if (!ret
+	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
+	    || ad.indx)
+	  {
+	    output_operand_lossage ("invalid address for 'S' output modifier");
+	    return;
+	  }
+
+	if (ad.disp)
+	  output_addr_const (file, ad.disp);
+	else
+	  fprintf (file, "0");
+
+	if (ad.base)
+	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
+      }
+      return;
+
+    case 'N':
+      if (GET_CODE (x) == REG)
+	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
+      else if (GET_CODE (x) == MEM)
+	x = change_address (x, VOIDmode, plus_constant (XEXP (x, 0), 4));
+      else
+	output_operand_lossage ("register or memory expression expected "
+				"for 'N' output modifier");
+      break;
+
+    case 'M':
+      if (GET_CODE (x) == REG)
+	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
+      else if (GET_CODE (x) == MEM)
+	x = change_address (x, VOIDmode, plus_constant (XEXP (x, 0), 8));
+      else
+	output_operand_lossage ("register or memory expression expected "
+				"for 'M' output modifier");
+      break;
+
+    case 'Y':
+      print_shift_count_operand (file, x);
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fprintf (file, "%s", reg_names[REGNO (x)]);
+      break;
+
+    case MEM:
+      output_address (XEXP (x, 0));
+      break;
+
+    case CONST:
+    case CODE_LABEL:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      output_addr_const (file, x);
+      break;
+
+    case CONST_INT:
+      if (code == 'b')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xff);
+      else if (code == 'c')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xff) ^ 0x80) - 0x80);
+      else if (code == 'x')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
+      else if (code == 'h')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
+      else if (code == 'i')
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 s390_extract_part (x, HImode, 0));
+      else if (code == 'j')
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 s390_extract_part (x, HImode, -1));
+      else if (code == 'k')
+ 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+ 		 s390_extract_part (x, SImode, 0));
+      else if (code == 'm')
+ 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+ 		 s390_extract_part (x, SImode, -1));
+      else if (code == 'o')
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffffffff);
+      else
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+
+    case CONST_DOUBLE:
+      gcc_assert (GET_MODE (x) == VOIDmode);
+      if (code == 'b')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
+      else if (code == 'x')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
+      else if (code == 'h')
+        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
+      else
+	{
+	  if (code == 0)
+	    output_operand_lossage ("invalid constant - try using "
+				    "an output modifier");
+	  else
+	    output_operand_lossage ("invalid constant for output modifier '%c'",
+				    code);
+	}
+      break;
+
+    default:
+      if (code == 0)
+	output_operand_lossage ("invalid expression - try using "
+				"an output modifier");
+      else
+	output_operand_lossage ("invalid expression for output "
+				"modifier '%c'", code);
+      break;
+    }
+}
+
+/* Target hook for assembling integer objects.  We need to define it
+   here to work a round a bug in some versions of GAS, which couldn't
+   handle values smaller than INT_MIN when printed in decimal.  */
+
+static bool
+s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  if (size == 8 && aligned_p
+      && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
+    {
+      fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
+	       INTVAL (x));
+      return true;
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Returns true if register REGNO is used  for forming
+   a memory address in expression X.  */
+
+static bool
+reg_used_in_mem_p (int regno, rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  int i, j;
+  const char *fmt;
+
+  if (code == MEM)
+    {
+      if (refers_to_regno_p (regno, regno+1,
+			     XEXP (x, 0), 0))
+	return true;
+    }
+  else if (code == SET
+	   && GET_CODE (SET_DEST (x)) == PC)
+    {
+      if (refers_to_regno_p (regno, regno+1,
+			     SET_SRC (x), 0))
+	return true;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e'
+	  && reg_used_in_mem_p (regno, XEXP (x, i)))
+	return true;
+
+      else if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
+	    return true;
+    }
+  return false;
+}
+
+/* Returns true if expression DEP_RTX sets an address register
+   used by instruction INSN to address memory.  */
+
+static bool
+addr_generation_dependency_p (rtx dep_rtx, rtx insn)
+{
+  rtx target, pat;
+
+  if (GET_CODE (dep_rtx) == INSN)
+      dep_rtx = PATTERN (dep_rtx);
+
+  if (GET_CODE (dep_rtx) == SET)
+    {
+      target = SET_DEST (dep_rtx);
+      if (GET_CODE (target) == STRICT_LOW_PART)
+	target = XEXP (target, 0);
+      while (GET_CODE (target) == SUBREG)
+	target = SUBREG_REG (target);
+
+      if (GET_CODE (target) == REG)
+	{
+	  int regno = REGNO (target);
+
+	  if (s390_safe_attr_type (insn) == TYPE_LA)
+	    {
+	      pat = PATTERN (insn);
+	      if (GET_CODE (pat) == PARALLEL)
+		{
+		  gcc_assert (XVECLEN (pat, 0) == 2);
+		  pat = XVECEXP (pat, 0, 0);
+		}
+	      gcc_assert (GET_CODE (pat) == SET);
+	      return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
+	    }
+	  else if (get_attr_atype (insn) == ATYPE_AGEN)
+	    return reg_used_in_mem_p (regno, PATTERN (insn));
+	}
+    }
+  return false;
+}
+
+/* Return 1, if dep_insn sets register used in insn in the agen unit.  */
+
+int
+s390_agen_dep_p (rtx dep_insn, rtx insn)
+{
+  rtx dep_rtx = PATTERN (dep_insn);
+  int i;
+
+  if (GET_CODE (dep_rtx) == SET
+      && addr_generation_dependency_p (dep_rtx, insn))
+    return 1;
+  else if (GET_CODE (dep_rtx) == PARALLEL)
+    {
+      for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
+	{
+	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
+	    return 1;
+	}
+    }
+  return 0;
+}
+
+
+/* A C statement (sans semicolon) to update the integer scheduling priority
+   INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
+   reduce the priority to execute INSN later.  Do not define this macro if
+   you do not need to adjust the scheduling priorities of insns.
+
+   A STD instruction should be scheduled earlier,
+   in order to use the bypass.  */
+static int
+s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
+{
+  if (! INSN_P (insn))
+    return priority;
+
+  if (s390_tune != PROCESSOR_2084_Z990
+      && s390_tune != PROCESSOR_2094_Z9_109
+      && s390_tune != PROCESSOR_2097_Z10
+      && s390_tune != PROCESSOR_2817_Z196)
+    return priority;
+
+  switch (s390_safe_attr_type (insn))
+    {
+      case TYPE_FSTOREDF:
+      case TYPE_FSTORESF:
+	priority = priority << 3;
+	break;
+      case TYPE_STORE:
+      case TYPE_STM:
+	priority = priority << 1;
+	break;
+      default:
+        break;
+    }
+  return priority;
+}
+
+
+/* The number of instructions that can be issued per cycle.  */
+
+static int
+s390_issue_rate (void)
+{
+  switch (s390_tune)
+    {
+    case PROCESSOR_2084_Z990:
+    case PROCESSOR_2094_Z9_109:
+    case PROCESSOR_2817_Z196:
+      return 3;
+    case PROCESSOR_2097_Z10:
+      return 2;
+    default:
+      return 1;
+    }
+}
+
+static int
+s390_first_cycle_multipass_dfa_lookahead (void)
+{
+  return 4;
+}
+
+/* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
+   Fix up MEMs as required.  */
+
+static void
+annotate_constant_pool_refs (rtx *x)
+{
+  int i, j;
+  const char *fmt;
+
+  gcc_assert (GET_CODE (*x) != SYMBOL_REF
+	      || !CONSTANT_POOL_ADDRESS_P (*x));
+
+  /* Literal pool references can only occur inside a MEM ...  */
+  if (GET_CODE (*x) == MEM)
+    {
+      rtx memref = XEXP (*x, 0);
+
+      if (GET_CODE (memref) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (memref))
+	{
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
+				     UNSPEC_LTREF);
+
+	  *x = replace_equiv_address (*x, addr);
+	  return;
+	}
+
+      if (GET_CODE (memref) == CONST
+	  && GET_CODE (XEXP (memref, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
+	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
+	{
+	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
+	  rtx sym = XEXP (XEXP (memref, 0), 0);
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
+				     UNSPEC_LTREF);
+
+	  *x = replace_equiv_address (*x, plus_constant (addr, off));
+	  return;
+	}
+    }
+
+  /* ... or a load-address type pattern.  */
+  if (GET_CODE (*x) == SET)
+    {
+      rtx addrref = SET_SRC (*x);
+
+      if (GET_CODE (addrref) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (addrref))
+	{
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
+				     UNSPEC_LTREF);
+
+	  SET_SRC (*x) = addr;
+	  return;
+	}
+
+      if (GET_CODE (addrref) == CONST
+	  && GET_CODE (XEXP (addrref, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
+	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
+	{
+	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
+	  rtx sym = XEXP (XEXP (addrref, 0), 0);
+	  rtx base = cfun->machine->base_reg;
+	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
+				     UNSPEC_LTREF);
+
+	  SET_SRC (*x) = plus_constant (addr, off);
+	  return;
+	}
+    }
+
+  /* Annotate LTREL_BASE as well.  */
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
+    {
+      rtx base = cfun->machine->base_reg;
+      *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
+				  UNSPEC_LTREL_BASE);
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (*x));
+  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          annotate_constant_pool_refs (&XEXP (*x, i));
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (*x, i); j++)
+            annotate_constant_pool_refs (&XVECEXP (*x, i, j));
+        }
+    }
+}
+
+/* Split all branches that exceed the maximum distance.
+   Returns true if this created a new literal pool entry.  */
+
+static int
+s390_split_branches (void)
+{
+  rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  int new_literal = 0, ret;
+  rtx insn, pat, tmp, target;
+  rtx *label;
+
+  /* We need correct insn addresses.  */
+
+  shorten_branches (get_insns ());
+
+  /* Find all branches that exceed 64KB, and split them.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) != JUMP_INSN)
+	continue;
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
+	pat = XVECEXP (pat, 0, 0);
+      if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
+	continue;
+
+      if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
+	{
+	  label = &SET_SRC (pat);
+	}
+      else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
+	{
+	  if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
+	    label = &XEXP (SET_SRC (pat), 1);
+          else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
+            label = &XEXP (SET_SRC (pat), 2);
+	  else
+	    continue;
+        }
+      else
+	continue;
+
+      if (get_attr_length (insn) <= 4)
+	continue;
+
+      /* We are going to use the return register as scratch register,
+	 make sure it will be saved/restored by the prologue/epilogue.  */
+      cfun_frame_layout.save_return_addr_p = 1;
+
+      if (!flag_pic)
+	{
+	  new_literal = 1;
+	  tmp = force_const_mem (Pmode, *label);
+	  tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
+	  INSN_ADDRESSES_NEW (tmp, -1);
+	  annotate_constant_pool_refs (&PATTERN (tmp));
+
+	  target = temp_reg;
+	}
+      else
+	{
+	  new_literal = 1;
+	  target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
+				   UNSPEC_LTREL_OFFSET);
+	  target = gen_rtx_CONST (Pmode, target);
+	  target = force_const_mem (Pmode, target);
+	  tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
+	  INSN_ADDRESSES_NEW (tmp, -1);
+	  annotate_constant_pool_refs (&PATTERN (tmp));
+
+          target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
+							cfun->machine->base_reg),
+				   UNSPEC_LTREL_BASE);
+	  target = gen_rtx_PLUS (Pmode, temp_reg, target);
+	}
+
+      ret = validate_change (insn, label, target, 0);
+      gcc_assert (ret);
+    }
+
+  return new_literal;
+}
+
+
+/* Find an annotated literal pool symbol referenced in RTX X,
+   and store it at REF.  Will abort if X contains references to
+   more than one such pool symbol; multiple references to the same
+   symbol are allowed, however.
+
+   The rtx pointed to by REF must be initialized to NULL_RTX
+   by the caller before calling this routine.  */
+
+static void
+find_constant_pool_ref (rtx x, rtx *ref)
+{
+  int i, j;
+  const char *fmt;
+
+  /* Ignore LTREL_BASE references.  */
+  if (GET_CODE (x) == UNSPEC
+      && XINT (x, 1) == UNSPEC_LTREL_BASE)
+    return;
+  /* Likewise POOL_ENTRY insns.  */
+  if (GET_CODE (x) == UNSPEC_VOLATILE
+      && XINT (x, 1) == UNSPECV_POOL_ENTRY)
+    return;
+
+  gcc_assert (GET_CODE (x) != SYMBOL_REF
+              || !CONSTANT_POOL_ADDRESS_P (x));
+
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
+    {
+      rtx sym = XVECEXP (x, 0, 0);
+      gcc_assert (GET_CODE (sym) == SYMBOL_REF
+	          && CONSTANT_POOL_ADDRESS_P (sym));
+
+      if (*ref == NULL_RTX)
+	*ref = sym;
+      else
+	gcc_assert (*ref == sym);
+
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          find_constant_pool_ref (XEXP (x, i), ref);
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (x, i); j++)
+            find_constant_pool_ref (XVECEXP (x, i, j), ref);
+        }
+    }
+}
+
+/* Replace every reference to the annotated literal pool
+   symbol REF in X by its base plus OFFSET.  */
+
+static void
+replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
+{
+  int i, j;
+  const char *fmt;
+
+  gcc_assert (*x != ref);
+
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_LTREF
+      && XVECEXP (*x, 0, 0) == ref)
+    {
+      *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
+      return;
+    }
+
+  if (GET_CODE (*x) == PLUS
+      && GET_CODE (XEXP (*x, 1)) == CONST_INT
+      && GET_CODE (XEXP (*x, 0)) == UNSPEC
+      && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
+      && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
+    {
+      rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
+      *x = plus_constant (addr, INTVAL (XEXP (*x, 1)));
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (*x));
+  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (*x, i); j++)
+            replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
+        }
+    }
+}
+
+/* Check whether X contains an UNSPEC_LTREL_BASE.
+   Return its constant pool symbol if found, NULL_RTX otherwise.  */
+
+static rtx
+find_ltrel_base (rtx x)
+{
+  int i, j;
+  const char *fmt;
+
+  if (GET_CODE (x) == UNSPEC
+      && XINT (x, 1) == UNSPEC_LTREL_BASE)
+    return XVECEXP (x, 0, 0);
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          rtx fnd = find_ltrel_base (XEXP (x, i));
+	  if (fnd)
+	    return fnd;
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (x, i); j++)
+	    {
+              rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
+	      if (fnd)
+		return fnd;
+	    }
+        }
+    }
+
+  return NULL_RTX;
+}
+
+/* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base.  */
+
+static void
+replace_ltrel_base (rtx *x)
+{
+  int i, j;
+  const char *fmt;
+
+  if (GET_CODE (*x) == UNSPEC
+      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
+    {
+      *x = XVECEXP (*x, 0, 1);
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (*x));
+  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+        {
+          replace_ltrel_base (&XEXP (*x, i));
+        }
+      else if (fmt[i] == 'E')
+        {
+          for (j = 0; j < XVECLEN (*x, i); j++)
+            replace_ltrel_base (&XVECEXP (*x, i, j));
+        }
+    }
+}
+
+
+/* We keep a list of constants which we have to add to internal
+   constant tables in the middle of large functions.  */
+
+#define NR_C_MODES 11
+enum machine_mode constant_modes[NR_C_MODES] =
+{
+  TFmode, TImode, TDmode,
+  DFmode, DImode, DDmode,
+  SFmode, SImode, SDmode,
+  HImode,
+  QImode
+};
+
+struct constant
+{
+  struct constant *next;
+  rtx value;
+  rtx label;
+};
+
+struct constant_pool
+{
+  struct constant_pool *next;
+  rtx first_insn;
+  rtx pool_insn;
+  bitmap insns;
+  rtx emit_pool_after;
+
+  struct constant *constants[NR_C_MODES];
+  struct constant *execute;
+  rtx label;
+  int size;
+};
+
+/* Allocate new constant_pool structure.  */
+
+static struct constant_pool *
+s390_alloc_pool (void)
+{
+  struct constant_pool *pool;
+  int i;
+
+  pool = (struct constant_pool *) xmalloc (sizeof *pool);
+  pool->next = NULL;
+  for (i = 0; i < NR_C_MODES; i++)
+    pool->constants[i] = NULL;
+
+  pool->execute = NULL;
+  pool->label = gen_label_rtx ();
+  pool->first_insn = NULL_RTX;
+  pool->pool_insn = NULL_RTX;
+  pool->insns = BITMAP_ALLOC (NULL);
+  pool->size = 0;
+  pool->emit_pool_after = NULL_RTX;
+
+  return pool;
+}
+
+/* Create new constant pool covering instructions starting at INSN
+   and chain it to the end of POOL_LIST.  */
+
+static struct constant_pool *
+s390_start_pool (struct constant_pool **pool_list, rtx insn)
+{
+  struct constant_pool *pool, **prev;
+
+  pool = s390_alloc_pool ();
+  pool->first_insn = insn;
+
+  for (prev = pool_list; *prev; prev = &(*prev)->next)
+    ;
+  *prev = pool;
+
+  return pool;
+}
+
+/* End range of instructions covered by POOL at INSN and emit
+   placeholder insn representing the pool.  */
+
+static void
+s390_end_pool (struct constant_pool *pool, rtx insn)
+{
+  rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
+
+  if (!insn)
+    insn = get_last_insn ();
+
+  pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
+  INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+}
+
+/* Add INSN to the list of insns covered by POOL.  */
+
+static void
+s390_add_pool_insn (struct constant_pool *pool, rtx insn)
+{
+  bitmap_set_bit (pool->insns, INSN_UID (insn));
+}
+
+/* Return pool out of POOL_LIST that covers INSN.  */
+
+static struct constant_pool *
+s390_find_pool (struct constant_pool *pool_list, rtx insn)
+{
+  struct constant_pool *pool;
+
+  for (pool = pool_list; pool; pool = pool->next)
+    if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
+      break;
+
+  return pool;
+}
+
+/* Add constant VAL of mode MODE to the constant pool POOL.  */
+
+static void
+s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
+{
+  struct constant *c;
+  int i;
+
+  for (i = 0; i < NR_C_MODES; i++)
+    if (constant_modes[i] == mode)
+      break;
+  gcc_assert (i != NR_C_MODES);
+
+  for (c = pool->constants[i]; c != NULL; c = c->next)
+    if (rtx_equal_p (val, c->value))
+      break;
+
+  if (c == NULL)
+    {
+      c = (struct constant *) xmalloc (sizeof *c);
+      c->value = val;
+      c->label = gen_label_rtx ();
+      c->next = pool->constants[i];
+      pool->constants[i] = c;
+      pool->size += GET_MODE_SIZE (mode);
+    }
+}
+
+/* Return an rtx that represents the offset of X from the start of
+   pool POOL.  */
+
+static rtx
+s390_pool_offset (struct constant_pool *pool, rtx x)
+{
+  rtx label;
+
+  label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
+  x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
+		      UNSPEC_POOL_OFFSET);
+  return gen_rtx_CONST (GET_MODE (x), x);
+}
+
+/* Find constant VAL of mode MODE in the constant pool POOL.
+   Return an RTX describing the distance from the start of
+   the pool to the location of the new constant.  */
+
+static rtx
+s390_find_constant (struct constant_pool *pool, rtx val,
+		    enum machine_mode mode)
+{
+  struct constant *c;
+  int i;
+
+  for (i = 0; i < NR_C_MODES; i++)
+    if (constant_modes[i] == mode)
+      break;
+  gcc_assert (i != NR_C_MODES);
+
+  for (c = pool->constants[i]; c != NULL; c = c->next)
+    if (rtx_equal_p (val, c->value))
+      break;
+
+  gcc_assert (c);
+
+  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
+}
+
+/* Check whether INSN is an execute.  Return the label_ref to its
+   execute target template if so, NULL_RTX otherwise.  */
+
+static rtx
+s390_execute_label (rtx insn)
+{
+  if (GET_CODE (insn) == INSN
+      && GET_CODE (PATTERN (insn)) == PARALLEL
+      && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
+      && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
+    return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
+
+  return NULL_RTX;
+}
+
+/* Add execute target for INSN to the constant pool POOL.  */
+
+static void
+s390_add_execute (struct constant_pool *pool, rtx insn)
+{
+  struct constant *c;
+
+  for (c = pool->execute; c != NULL; c = c->next)
+    if (INSN_UID (insn) == INSN_UID (c->value))
+      break;
+
+  if (c == NULL)
+    {
+      c = (struct constant *) xmalloc (sizeof *c);
+      c->value = insn;
+      c->label = gen_label_rtx ();
+      c->next = pool->execute;
+      pool->execute = c;
+      pool->size += 6;
+    }
+}
+
+/* Find execute target for INSN in the constant pool POOL.
+   Return an RTX describing the distance from the start of
+   the pool to the location of the execute target.  */
+
+static rtx
+s390_find_execute (struct constant_pool *pool, rtx insn)
+{
+  struct constant *c;
+
+  for (c = pool->execute; c != NULL; c = c->next)
+    if (INSN_UID (insn) == INSN_UID (c->value))
+      break;
+
+  gcc_assert (c);
+
+  return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
+}
+
+/* For an execute INSN, extract the execute target template.  */
+
+static rtx
+s390_execute_target (rtx insn)
+{
+  rtx pattern = PATTERN (insn);
+  gcc_assert (s390_execute_label (insn));
+
+  if (XVECLEN (pattern, 0) == 2)
+    {
+      pattern = copy_rtx (XVECEXP (pattern, 0, 1));
+    }
+  else
+    {
+      rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
+      int i;
+
+      for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
+	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
+
+      pattern = gen_rtx_PARALLEL (VOIDmode, vec);
+    }
+
+  return pattern;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is the case for
+   execute insns that carry a unique label.  */
+
+static bool
+s390_cannot_copy_insn_p (rtx insn)
+{
+  rtx label = s390_execute_label (insn);
+  return label && label != const0_rtx;
+}
+
+/* Dump out the constants in POOL.  If REMOTE_LABEL is true,
+   do not emit the pool base label.  */
+
+static void
+s390_dump_pool (struct constant_pool *pool, bool remote_label)
+{
+  struct constant *c;
+  rtx insn = pool->pool_insn;
+  int i;
+
+  /* Switch to rodata section.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      insn = emit_insn_after (gen_pool_section_start (), insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  /* Ensure minimum pool alignment.  */
+  if (TARGET_CPU_ZARCH)
+    insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
+  else
+    insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+
+  /* Emit pool base label.  */
+  if (!remote_label)
+    {
+      insn = emit_label_after (pool->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  /* Dump constants in descending alignment requirement order,
+     ensuring proper alignment for every constant.  */
+  for (i = 0; i < NR_C_MODES; i++)
+    for (c = pool->constants[i]; c; c = c->next)
+      {
+	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
+	rtx value = copy_rtx (c->value);
+	if (GET_CODE (value) == CONST
+	    && GET_CODE (XEXP (value, 0)) == UNSPEC
+	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
+	    && XVECLEN (XEXP (value, 0), 0) == 1)
+	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
+
+	insn = emit_label_after (c->label, insn);
+	INSN_ADDRESSES_NEW (insn, -1);
+
+	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
+					 gen_rtvec (1, value),
+					 UNSPECV_POOL_ENTRY);
+	insn = emit_insn_after (value, insn);
+	INSN_ADDRESSES_NEW (insn, -1);
+      }
+
+  /* Ensure minimum alignment for instructions.  */
+  insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+
+  /* Output in-pool execute template insns.  */
+  for (c = pool->execute; c; c = c->next)
+    {
+      insn = emit_label_after (c->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      insn = emit_insn_after (s390_execute_target (c->value), insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  /* Switch back to previous section.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      insn = emit_insn_after (gen_pool_section_end (), insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+    }
+
+  insn = emit_barrier_after (insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+
+  /* Remove placeholder insn.  */
+  remove_insn (pool->pool_insn);
+}
+
+/* Free all memory used by POOL.  */
+
+static void
+s390_free_pool (struct constant_pool *pool)
+{
+  struct constant *c, *next;
+  int i;
+
+  for (i = 0; i < NR_C_MODES; i++)
+    for (c = pool->constants[i]; c; c = next)
+      {
+	next = c->next;
+	free (c);
+      }
+
+  for (c = pool->execute; c; c = next)
+    {
+      next = c->next;
+      free (c);
+    }
+
+  BITMAP_FREE (pool->insns);
+  free (pool);
+}
+
+
+/* Collect main literal pool.  Return NULL on overflow.  */
+
+static struct constant_pool *
+s390_mainpool_start (void)
+{
+  struct constant_pool *pool;
+  rtx insn;
+
+  pool = s390_alloc_pool ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == INSN
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
+	{
+	  gcc_assert (!pool->pool_insn);
+	  pool->pool_insn = insn;
+	}
+
+      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
+	{
+	  s390_add_execute (pool, insn);
+	}
+      else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+	{
+	  rtx pool_ref = NULL_RTX;
+	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
+	  if (pool_ref)
+	    {
+	      rtx constant = get_pool_constant (pool_ref);
+	      enum machine_mode mode = get_pool_mode (pool_ref);
+	      s390_add_constant (pool, constant, mode);
+	    }
+	}
+
+      /* If hot/cold partitioning is enabled we have to make sure that
+	 the literal pool is emitted in the same section where the
+	 initialization of the literal pool base pointer takes place.
+	 emit_pool_after is only used in the non-overflow case on non
+	 Z cpus where we can emit the literal pool at the end of the
+	 function body within the text section.  */
+      if (NOTE_P (insn)
+	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
+	  && !pool->emit_pool_after)
+	pool->emit_pool_after = PREV_INSN (insn);
+    }
+
+  gcc_assert (pool->pool_insn || pool->size == 0);
+
+  if (pool->size >= 4096)
+    {
+      /* We're going to chunkify the pool, so remove the main
+	 pool placeholder insn.  */
+      remove_insn (pool->pool_insn);
+
+      s390_free_pool (pool);
+      pool = NULL;
+    }
+
+  /* If the functions ends with the section where the literal pool
+     should be emitted set the marker to its end.  */
+  if (pool && !pool->emit_pool_after)
+    pool->emit_pool_after = get_last_insn ();
+
+  return pool;
+}
+
+/* POOL holds the main literal pool as collected by s390_mainpool_start.
+   Modify the current function to output the pool constants as well as
+   the pool register setup instruction.  */
+
+static void
+s390_mainpool_finish (struct constant_pool *pool)
+{
+  rtx base_reg = cfun->machine->base_reg;
+  rtx insn;
+
+  /* If the pool is empty, we're done.  */
+  if (pool->size == 0)
+    {
+      /* We don't actually need a base register after all.  */
+      cfun->machine->base_reg = NULL_RTX;
+
+      if (pool->pool_insn)
+	remove_insn (pool->pool_insn);
+      s390_free_pool (pool);
+      return;
+    }
+
+  /* We need correct insn addresses.  */
+  shorten_branches (get_insns ());
+
+  /* On zSeries, we use a LARL to load the pool register.  The pool is
+     located in the .rodata section, so we emit it after the function.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      insn = gen_main_base_64 (base_reg, pool->label);
+      insn = emit_insn_after (insn, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+      remove_insn (pool->pool_insn);
+
+      insn = get_last_insn ();
+      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+      s390_dump_pool (pool, 0);
+    }
+
+  /* On S/390, if the total size of the function's code plus literal pool
+     does not exceed 4096 bytes, we use BASR to set up a function base
+     pointer, and emit the literal pool at the end of the function.  */
+  else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
+	   + pool->size + 8 /* alignment slop */ < 4096)
+    {
+      insn = gen_main_base_31_small (base_reg, pool->label);
+      insn = emit_insn_after (insn, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+      remove_insn (pool->pool_insn);
+
+      insn = emit_label_after (pool->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      /* emit_pool_after will be set by s390_mainpool_start to the
+	 last insn of the section where the literal pool should be
+	 emitted.  */
+      insn = pool->emit_pool_after;
+
+      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+      s390_dump_pool (pool, 1);
+    }
+
+  /* Otherwise, we emit an inline literal pool and use BASR to branch
+     over it, setting up the pool register at the same time.  */
+  else
+    {
+      rtx pool_end = gen_label_rtx ();
+
+      insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
+      insn = emit_insn_after (insn, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+      remove_insn (pool->pool_insn);
+
+      insn = emit_label_after (pool->label, insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+      insn = emit_label_after (pool_end, pool->pool_insn);
+      INSN_ADDRESSES_NEW (insn, -1);
+
+      s390_dump_pool (pool, 1);
+    }
+
+
+  /* Replace all literal pool references.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	replace_ltrel_base (&PATTERN (insn));
+
+      if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+        {
+          rtx addr, pool_ref = NULL_RTX;
+          find_constant_pool_ref (PATTERN (insn), &pool_ref);
+          if (pool_ref)
+            {
+	      if (s390_execute_label (insn))
+		addr = s390_find_execute (pool, insn);
+	      else
+		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
+						 get_pool_mode (pool_ref));
+
+              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
+              INSN_CODE (insn) = -1;
+            }
+        }
+    }
+
+
+  /* Free the pool.  */
+  s390_free_pool (pool);
+}
+
+/* POOL holds the main literal pool as collected by s390_mainpool_start.
+   We have decided we cannot use this pool, so revert all changes
+   to the current function that were done by s390_mainpool_start.  */
+static void
+s390_mainpool_cancel (struct constant_pool *pool)
+{
+  /* We didn't actually change the instruction stream, so simply
+     free the pool memory.  */
+  s390_free_pool (pool);
+}
+
+
+/* Chunkify the literal pool.  */
+
+#define S390_POOL_CHUNK_MIN	0xc00
+#define S390_POOL_CHUNK_MAX	0xe00
+
+static struct constant_pool *
+s390_chunkify_start (void)
+{
+  struct constant_pool *curr_pool = NULL, *pool_list = NULL;
+  int extra_size = 0;
+  bitmap far_labels;
+  rtx pending_ltrel = NULL_RTX;
+  rtx insn;
+
+  rtx (*gen_reload_base) (rtx, rtx) =
+    TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
+
+
+  /* We need correct insn addresses.  */
+
+  shorten_branches (get_insns ());
+
+  /* Scan all insns and move literals to pool chunks.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      bool section_switch_p = false;
+
+      /* Check for pending LTREL_BASE.  */
+      if (INSN_P (insn))
+	{
+	  rtx ltrel_base = find_ltrel_base (PATTERN (insn));
+	  if (ltrel_base)
+	    {
+	      gcc_assert (ltrel_base == pending_ltrel);
+	      pending_ltrel = NULL_RTX;
+	    }
+	}
+
+      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
+	{
+	  if (!curr_pool)
+	    curr_pool = s390_start_pool (&pool_list, insn);
+
+	  s390_add_execute (curr_pool, insn);
+	  s390_add_pool_insn (curr_pool, insn);
+	}
+      else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+	{
+	  rtx pool_ref = NULL_RTX;
+	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
+	  if (pool_ref)
+	    {
+	      rtx constant = get_pool_constant (pool_ref);
+	      enum machine_mode mode = get_pool_mode (pool_ref);
+
+	      if (!curr_pool)
+		curr_pool = s390_start_pool (&pool_list, insn);
+
+	      s390_add_constant (curr_pool, constant, mode);
+	      s390_add_pool_insn (curr_pool, insn);
+
+	      /* Don't split the pool chunk between a LTREL_OFFSET load
+		 and the corresponding LTREL_BASE.  */
+	      if (GET_CODE (constant) == CONST
+		  && GET_CODE (XEXP (constant, 0)) == UNSPEC
+		  && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
+		{
+		  gcc_assert (!pending_ltrel);
+		  pending_ltrel = pool_ref;
+		}
+	    }
+	}
+
+      if (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CODE_LABEL)
+	{
+	  if (curr_pool)
+	    s390_add_pool_insn (curr_pool, insn);
+	  /* An LTREL_BASE must follow within the same basic block.  */
+	  gcc_assert (!pending_ltrel);
+	}
+
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	section_switch_p = true;
+
+      if (!curr_pool
+	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
+          || INSN_ADDRESSES (INSN_UID (insn)) == -1)
+	continue;
+
+      if (TARGET_CPU_ZARCH)
+	{
+	  if (curr_pool->size < S390_POOL_CHUNK_MAX)
+	    continue;
+
+	  s390_end_pool (curr_pool, NULL_RTX);
+	  curr_pool = NULL;
+	}
+      else
+	{
+          int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
+			   - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
+			 + extra_size;
+
+	  /* We will later have to insert base register reload insns.
+	     Those will have an effect on code size, which we need to
+	     consider here.  This calculation makes rather pessimistic
+	     worst-case assumptions.  */
+	  if (GET_CODE (insn) == CODE_LABEL)
+	    extra_size += 6;
+
+	  if (chunk_size < S390_POOL_CHUNK_MIN
+	      && curr_pool->size < S390_POOL_CHUNK_MIN
+	      && !section_switch_p)
+	    continue;
+
+	  /* Pool chunks can only be inserted after BARRIERs ...  */
+	  if (GET_CODE (insn) == BARRIER)
+	    {
+	      s390_end_pool (curr_pool, insn);
+	      curr_pool = NULL;
+	      extra_size = 0;
+	    }
+
+	  /* ... so if we don't find one in time, create one.  */
+          else if (chunk_size > S390_POOL_CHUNK_MAX
+	           || curr_pool->size > S390_POOL_CHUNK_MAX
+		   || section_switch_p)
+	    {
+              rtx label, jump, barrier;
+
+	      if (!section_switch_p)
+		{
+		  /* We can insert the barrier only after a 'real' insn.  */
+		  if (GET_CODE (insn) != INSN && GET_CODE (insn) != CALL_INSN)
+		    continue;
+		  if (get_attr_length (insn) == 0)
+		    continue;
+		  /* Don't separate LTREL_BASE from the corresponding
+		 LTREL_OFFSET load.  */
+		  if (pending_ltrel)
+		    continue;
+		}
+	      else
+		{
+		  gcc_assert (!pending_ltrel);
+
+		  /* The old pool has to end before the section switch
+		     note in order to make it part of the current
+		     section.  */
+		  insn = PREV_INSN (insn);
+		}
+
+	      label = gen_label_rtx ();
+	      jump = emit_jump_insn_after (gen_jump (label), insn);
+	      barrier = emit_barrier_after (jump);
+	      insn = emit_label_after (label, barrier);
+	      JUMP_LABEL (jump) = label;
+	      LABEL_NUSES (label) = 1;
+
+	      INSN_ADDRESSES_NEW (jump, -1);
+	      INSN_ADDRESSES_NEW (barrier, -1);
+	      INSN_ADDRESSES_NEW (insn, -1);
+
+	      s390_end_pool (curr_pool, barrier);
+	      curr_pool = NULL;
+	      extra_size = 0;
+	    }
+	}
+    }
+
+  if (curr_pool)
+    s390_end_pool (curr_pool, NULL_RTX);
+  gcc_assert (!pending_ltrel);
+
+  /* Find all labels that are branched into
+     from an insn belonging to a different chunk.  */
+
+  far_labels = BITMAP_ALLOC (NULL);
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      /* Labels marked with LABEL_PRESERVE_P can be target
+	 of non-local jumps, so we have to mark them.
+	 The same holds for named labels.
+
+	 Don't do that, however, if it is the label before
+	 a jump table.  */
+
+      if (GET_CODE (insn) == CODE_LABEL
+	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
+	{
+	  rtx vec_insn = next_real_insn (insn);
+	  rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
+			PATTERN (vec_insn) : NULL_RTX;
+	  if (!vec_pat
+	      || !(GET_CODE (vec_pat) == ADDR_VEC
+		   || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
+	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
+	}
+
+      /* If we have a direct jump (conditional or unconditional)
+	 or a casesi jump, check all potential targets.  */
+      else if (GET_CODE (insn) == JUMP_INSN)
+	{
+          rtx pat = PATTERN (insn);
+	  if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
+	    pat = XVECEXP (pat, 0, 0);
+
+          if (GET_CODE (pat) == SET)
+            {
+	      rtx label = JUMP_LABEL (insn);
+	      if (label)
+		{
+	          if (s390_find_pool (pool_list, label)
+		      != s390_find_pool (pool_list, insn))
+		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
+		}
+            }
+	  else if (GET_CODE (pat) == PARALLEL
+		   && XVECLEN (pat, 0) == 2
+		   && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+		   && GET_CODE (XVECEXP (pat, 0, 1)) == USE
+		   && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
+	    {
+	      /* Find the jump table used by this casesi jump.  */
+	      rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
+	      rtx vec_insn = next_real_insn (vec_label);
+	      rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
+			    PATTERN (vec_insn) : NULL_RTX;
+	      if (vec_pat
+		  && (GET_CODE (vec_pat) == ADDR_VEC
+		      || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
+		{
+		  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
+
+		  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
+		    {
+		      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
+
+		      if (s390_find_pool (pool_list, label)
+			  != s390_find_pool (pool_list, insn))
+			bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
+		    }
+		}
+	    }
+        }
+    }
+
+  /* Insert base register reload insns before every pool.  */
+
+  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
+    {
+      rtx new_insn = gen_reload_base (cfun->machine->base_reg,
+				      curr_pool->label);
+      rtx insn = curr_pool->first_insn;
+      INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
+    }
+
+  /* Insert base register reload insns at every far label.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == CODE_LABEL
+        && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
+      {
+	struct constant_pool *pool = s390_find_pool (pool_list, insn);
+	if (pool)
+	  {
+	    rtx new_insn = gen_reload_base (cfun->machine->base_reg,
+					    pool->label);
+	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
+	  }
+      }
+
+
+  BITMAP_FREE (far_labels);
+
+
+  /* Recompute insn addresses.  */
+
+  init_insn_lengths ();
+  shorten_branches (get_insns ());
+
+  return pool_list;
+}
+
+/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
+   After we have decided to use this list, finish implementing
+   all changes to the current function as required.  */
+
+static void
+s390_chunkify_finish (struct constant_pool *pool_list)
+{
+  struct constant_pool *curr_pool = NULL;
+  rtx insn;
+
+
+  /* Replace all literal pool references.  */
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	replace_ltrel_base (&PATTERN (insn));
+
+      curr_pool = s390_find_pool (pool_list, insn);
+      if (!curr_pool)
+	continue;
+
+      if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+        {
+          rtx addr, pool_ref = NULL_RTX;
+          find_constant_pool_ref (PATTERN (insn), &pool_ref);
+          if (pool_ref)
+            {
+	      if (s390_execute_label (insn))
+		addr = s390_find_execute (curr_pool, insn);
+	      else
+		addr = s390_find_constant (curr_pool,
+					   get_pool_constant (pool_ref),
+					   get_pool_mode (pool_ref));
+
+              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
+              INSN_CODE (insn) = -1;
+            }
+        }
+    }
+
+  /* Dump out all literal pools.  */
+
+  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
+    s390_dump_pool (curr_pool, 0);
+
+  /* Free pool list.  */
+
+  while (pool_list)
+    {
+      struct constant_pool *next = pool_list->next;
+      s390_free_pool (pool_list);
+      pool_list = next;
+    }
+}
+
+/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
+   We have decided we cannot use this list, so revert all changes
+   to the current function that were done by s390_chunkify_start.  */
+
+static void
+s390_chunkify_cancel (struct constant_pool *pool_list)
+{
+  struct constant_pool *curr_pool = NULL;
+  rtx insn;
+
+  /* Remove all pool placeholder insns.  */
+
+  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
+    {
+      /* Did we insert an extra barrier?  Remove it.  */
+      rtx barrier = PREV_INSN (curr_pool->pool_insn);
+      rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
+      rtx label = NEXT_INSN (curr_pool->pool_insn);
+
+      if (jump && GET_CODE (jump) == JUMP_INSN
+	  && barrier && GET_CODE (barrier) == BARRIER
+	  && label && GET_CODE (label) == CODE_LABEL
+	  && GET_CODE (PATTERN (jump)) == SET
+	  && SET_DEST (PATTERN (jump)) == pc_rtx
+	  && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
+	  && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
+	{
+	  remove_insn (jump);
+	  remove_insn (barrier);
+	  remove_insn (label);
+	}
+
+      remove_insn (curr_pool->pool_insn);
+    }
+
+  /* Remove all base register reload insns.  */
+
+  for (insn = get_insns (); insn; )
+    {
+      rtx next_insn = NEXT_INSN (insn);
+
+      if (GET_CODE (insn) == INSN
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
+	remove_insn (insn);
+
+      insn = next_insn;
+    }
+
+  /* Free pool list.  */
+
+  while (pool_list)
+    {
+      struct constant_pool *next = pool_list->next;
+      s390_free_pool (pool_list);
+      pool_list = next;
+    }
+}
+
+/* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
+
+void
+s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
+{
+  REAL_VALUE_TYPE r;
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_FLOAT:
+    case MODE_DECIMAL_FLOAT:
+      gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
+      assemble_real (r, mode, align);
+      break;
+
+    case MODE_INT:
+      assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
+      mark_symbol_refs_as_used (exp);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Return an RTL expression representing the value of the return address
+   for the frame COUNT steps up from the current frame.  FRAME is the
+   frame pointer of that frame.  */
+
+rtx
+s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  int offset;
+  rtx addr;
+
+  /* Without backchain, we fail for all but the current frame.  */
+
+  if (!TARGET_BACKCHAIN && count > 0)
+    return NULL_RTX;
+
+  /* For the current frame, we need to make sure the initial
+     value of RETURN_REGNUM is actually saved.  */
+
+  if (count == 0)
+    {
+      /* On non-z architectures branch splitting could overwrite r14.  */
+      if (TARGET_CPU_ZARCH)
+	return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
+      else
+	{
+	  cfun_frame_layout.save_return_addr_p = true;
+	  return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
+	}
+    }
+
+  if (TARGET_PACKED_STACK)
+    offset = -2 * UNITS_PER_LONG;
+  else
+    offset = RETURN_REGNUM * UNITS_PER_LONG;
+
+  addr = plus_constant (frame, offset);
+  addr = memory_address (Pmode, addr);
+  return gen_rtx_MEM (Pmode, addr);
+}
+
+/* Return an RTL expression representing the back chain stored in
+   the current stack frame.  */
+
+rtx
+s390_back_chain_rtx (void)
+{
+  rtx chain;
+
+  gcc_assert (TARGET_BACKCHAIN);
+
+  if (TARGET_PACKED_STACK)
+    chain = plus_constant (stack_pointer_rtx,
+			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
+  else
+    chain = stack_pointer_rtx;
+
+  chain = gen_rtx_MEM (Pmode, chain);
+  return chain;
+}
+
+/* Find first call clobbered register unused in a function.
+   This could be used as base register in a leaf function
+   or for holding the return address before epilogue.  */
+
+static int
+find_unused_clobbered_reg (void)
+{
+  int i;
+  for (i = 0; i < 6; i++)
+    if (!df_regs_ever_live_p (i))
+      return i;
+  return 0;
+}
+
+
+/* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
+   clobbered hard regs in SETREG.  */
+
+static void
+s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
+{
+  int *regs_ever_clobbered = (int *)data;
+  unsigned int i, regno;
+  enum machine_mode mode = GET_MODE (setreg);
+
+  if (GET_CODE (setreg) == SUBREG)
+    {
+      rtx inner = SUBREG_REG (setreg);
+      if (!GENERAL_REG_P (inner))
+	return;
+      regno = subreg_regno (setreg);
+    }
+  else if (GENERAL_REG_P (setreg))
+    regno = REGNO (setreg);
+  else
+    return;
+
+  for (i = regno;
+       i < regno + HARD_REGNO_NREGS (regno, mode);
+       i++)
+    regs_ever_clobbered[i] = 1;
+}
+
+/* Walks through all basic blocks of the current function looking
+   for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
+   of the passed integer array REGS_EVER_CLOBBERED are set to one for
+   each of those regs.  */
+
+static void
+s390_regs_ever_clobbered (int *regs_ever_clobbered)
+{
+  basic_block cur_bb;
+  rtx cur_insn;
+  unsigned int i;
+
+  memset (regs_ever_clobbered, 0, 16 * sizeof (int));
+
+  /* For non-leaf functions we have to consider all call clobbered regs to be
+     clobbered.  */
+  if (!current_function_is_leaf)
+    {
+      for (i = 0; i < 16; i++)
+	regs_ever_clobbered[i] = call_really_used_regs[i];
+    }
+
+  /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
+     this work is done by liveness analysis (mark_regs_live_at_end).
+     Special care is needed for functions containing landing pads.  Landing pads
+     may use the eh registers, but the code which sets these registers is not
+     contained in that function.  Hence s390_regs_ever_clobbered is not able to
+     deal with this automatically.  */
+  if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
+    for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
+      if (crtl->calls_eh_return
+	  || (cfun->machine->has_landing_pad_p
+	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
+	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
+
+  /* For nonlocal gotos all call-saved registers have to be saved.
+     This flag is also set for the unwinding code in libgcc.
+     See expand_builtin_unwind_init.  For regs_ever_live this is done by
+     reload.  */
+  if (cfun->has_nonlocal_label)
+    for (i = 0; i < 16; i++)
+      if (!call_really_used_regs[i])
+	regs_ever_clobbered[i] = 1;
+
+  FOR_EACH_BB (cur_bb)
+    {
+      FOR_BB_INSNS (cur_bb, cur_insn)
+	{
+	  if (INSN_P (cur_insn))
+	    note_stores (PATTERN (cur_insn),
+			 s390_reg_clobbered_rtx,
+			 regs_ever_clobbered);
+	}
+    }
+}
+
+/* Determine the frame area which actually has to be accessed
+   in the function epilogue. The values are stored at the
+   given pointers AREA_BOTTOM (address of the lowest used stack
+   address) and AREA_TOP (address of the first item which does
+   not belong to the stack frame).  */
+
+static void
+s390_frame_area (int *area_bottom, int *area_top)
+{
+  int b, t;
+  int i;
+
+  b = INT_MAX;
+  t = INT_MIN;
+
+  if (cfun_frame_layout.first_restore_gpr != -1)
+    {
+      b = (cfun_frame_layout.gprs_offset
+	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
+      t = b + (cfun_frame_layout.last_restore_gpr
+	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
+    }
+
+  if (TARGET_64BIT && cfun_save_high_fprs_p)
+    {
+      b = MIN (b, cfun_frame_layout.f8_offset);
+      t = MAX (t, (cfun_frame_layout.f8_offset
+		   + cfun_frame_layout.high_fprs * 8));
+    }
+
+  if (!TARGET_64BIT)
+    for (i = 2; i < 4; i++)
+      if (cfun_fpr_bit_p (i))
+	{
+	  b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
+	  t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
+	}
+
+  *area_bottom = b;
+  *area_top = t;
+}
+
+/* Fill cfun->machine with info about register usage of current function.
+   Return in CLOBBERED_REGS which GPRs are currently considered set.  */
+
+static void
+s390_register_info (int clobbered_regs[])
+{
+  int i, j;
+
+  /* fprs 8 - 15 are call saved for 64 Bit ABI.  */
+  cfun_frame_layout.fpr_bitmap = 0;
+  cfun_frame_layout.high_fprs = 0;
+  if (TARGET_64BIT)
+    for (i = 24; i < 32; i++)
+      if (df_regs_ever_live_p (i) && !global_regs[i])
+	{
+	  cfun_set_fpr_bit (i - 16);
+	  cfun_frame_layout.high_fprs++;
+	}
+
+  /* Find first and last gpr to be saved.  We trust regs_ever_live
+     data, except that we don't save and restore global registers.
+
+     Also, all registers with special meaning to the compiler need
+     to be handled extra.  */
+
+  s390_regs_ever_clobbered (clobbered_regs);
+
+  for (i = 0; i < 16; i++)
+    clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
+
+  if (frame_pointer_needed)
+    clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
+
+  if (flag_pic)
+    clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
+      |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+
+  clobbered_regs[BASE_REGNUM]
+    |= (cfun->machine->base_reg
+        && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
+
+  clobbered_regs[RETURN_REGNUM]
+    |= (!current_function_is_leaf
+	|| TARGET_TPF_PROFILING
+	|| cfun->machine->split_branches_pending_p
+	|| cfun_frame_layout.save_return_addr_p
+	|| crtl->calls_eh_return
+	|| cfun->stdarg);
+
+  clobbered_regs[STACK_POINTER_REGNUM]
+    |= (!current_function_is_leaf
+	|| TARGET_TPF_PROFILING
+	|| cfun_save_high_fprs_p
+	|| get_frame_size () > 0
+	|| cfun->calls_alloca
+	|| cfun->stdarg);
+
+  for (i = 6; i < 16; i++)
+    if (df_regs_ever_live_p (i) || clobbered_regs[i])
+      break;
+  for (j = 15; j > i; j--)
+    if (df_regs_ever_live_p (j) || clobbered_regs[j])
+      break;
+
+  if (i == 16)
+    {
+      /* Nothing to save/restore.  */
+      cfun_frame_layout.first_save_gpr_slot = -1;
+      cfun_frame_layout.last_save_gpr_slot = -1;
+      cfun_frame_layout.first_save_gpr = -1;
+      cfun_frame_layout.first_restore_gpr = -1;
+      cfun_frame_layout.last_save_gpr = -1;
+      cfun_frame_layout.last_restore_gpr = -1;
+    }
+  else
+    {
+      /* Save slots for gprs from i to j.  */
+      cfun_frame_layout.first_save_gpr_slot = i;
+      cfun_frame_layout.last_save_gpr_slot = j;
+
+      for (i = cfun_frame_layout.first_save_gpr_slot;
+	   i < cfun_frame_layout.last_save_gpr_slot + 1;
+	   i++)
+	if (clobbered_regs[i])
+	  break;
+
+      for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
+	if (clobbered_regs[j])
+	  break;
+
+      if (i == cfun_frame_layout.last_save_gpr_slot + 1)
+	{
+	  /* Nothing to save/restore.  */
+	  cfun_frame_layout.first_save_gpr = -1;
+	  cfun_frame_layout.first_restore_gpr = -1;
+	  cfun_frame_layout.last_save_gpr = -1;
+	  cfun_frame_layout.last_restore_gpr = -1;
+	}
+      else
+	{
+	  /* Save / Restore from gpr i to j.  */
+	  cfun_frame_layout.first_save_gpr = i;
+	  cfun_frame_layout.first_restore_gpr = i;
+	  cfun_frame_layout.last_save_gpr = j;
+	  cfun_frame_layout.last_restore_gpr = j;
+	}
+    }
+
+  if (cfun->stdarg)
+    {
+      /* Varargs functions need to save gprs 2 to 6.  */
+      if (cfun->va_list_gpr_size
+	  && crtl->args.info.gprs < GP_ARG_NUM_REG)
+	{
+	  int min_gpr = crtl->args.info.gprs;
+	  int max_gpr = min_gpr + cfun->va_list_gpr_size;
+	  if (max_gpr > GP_ARG_NUM_REG)
+	    max_gpr = GP_ARG_NUM_REG;
+
+	  if (cfun_frame_layout.first_save_gpr == -1
+	      || cfun_frame_layout.first_save_gpr > 2 + min_gpr)
+	    {
+	      cfun_frame_layout.first_save_gpr = 2 + min_gpr;
+	      cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr;
+	    }
+
+	  if (cfun_frame_layout.last_save_gpr == -1
+	      || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1)
+	    {
+	      cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1;
+	      cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1;
+	    }
+	}
+
+      /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved.  */
+      if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size
+	  && crtl->args.info.fprs < FP_ARG_NUM_REG)
+	{
+	  int min_fpr = crtl->args.info.fprs;
+	  int max_fpr = min_fpr + cfun->va_list_fpr_size;
+	  if (max_fpr > FP_ARG_NUM_REG)
+	    max_fpr = FP_ARG_NUM_REG;
+
+	  /* ??? This is currently required to ensure proper location
+	     of the fpr save slots within the va_list save area.  */
+	  if (TARGET_PACKED_STACK)
+	    min_fpr = 0;
+
+	  for (i = min_fpr; i < max_fpr; i++)
+	    cfun_set_fpr_bit (i);
+	}
+    }
+
+  if (!TARGET_64BIT)
+    for (i = 2; i < 4; i++)
+      if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16])
+	cfun_set_fpr_bit (i);
+}
+
+/* Fill cfun->machine with info about frame of current function.  */
+
+static void
+s390_frame_info (void)
+{
+  int i;
+
+  cfun_frame_layout.frame_size = get_frame_size ();
+  if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
+    fatal_error ("total size of local variables exceeds architecture limit");
+
+  if (!TARGET_PACKED_STACK)
+    {
+      cfun_frame_layout.backchain_offset = 0;
+      cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
+      cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
+      cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
+      cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
+				       * UNITS_PER_LONG);
+    }
+  else if (TARGET_BACKCHAIN) /* kernel stack layout */
+    {
+      cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
+					    - UNITS_PER_LONG);
+      cfun_frame_layout.gprs_offset
+	= (cfun_frame_layout.backchain_offset
+	   - (STACK_POINTER_REGNUM - cfun_frame_layout.first_save_gpr_slot + 1)
+	   * UNITS_PER_LONG);
+
+      if (TARGET_64BIT)
+	{
+	  cfun_frame_layout.f4_offset
+	    = (cfun_frame_layout.gprs_offset
+	       - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
+
+	  cfun_frame_layout.f0_offset
+	    = (cfun_frame_layout.f4_offset
+	       - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
+	}
+      else
+	{
+	  /* On 31 bit we have to care about alignment of the
+	     floating point regs to provide fastest access.  */
+	  cfun_frame_layout.f0_offset
+	    = ((cfun_frame_layout.gprs_offset
+		& ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
+	       - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
+
+	  cfun_frame_layout.f4_offset
+	    = (cfun_frame_layout.f0_offset
+	       - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
+	}
+    }
+  else /* no backchain */
+    {
+      cfun_frame_layout.f4_offset
+	= (STACK_POINTER_OFFSET
+	   - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
+
+      cfun_frame_layout.f0_offset
+	= (cfun_frame_layout.f4_offset
+	   - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
+
+      cfun_frame_layout.gprs_offset
+	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
+    }
+
+  if (current_function_is_leaf
+      && !TARGET_TPF_PROFILING
+      && cfun_frame_layout.frame_size == 0
+      && !cfun_save_high_fprs_p
+      && !cfun->calls_alloca
+      && !cfun->stdarg)
+    return;
+
+  if (!TARGET_PACKED_STACK)
+    cfun_frame_layout.frame_size += (STACK_POINTER_OFFSET
+				     + crtl->outgoing_args_size
+				     + cfun_frame_layout.high_fprs * 8);
+  else
+    {
+      if (TARGET_BACKCHAIN)
+	cfun_frame_layout.frame_size += UNITS_PER_LONG;
+
+      /* No alignment trouble here because f8-f15 are only saved under
+	 64 bit.  */
+      cfun_frame_layout.f8_offset = (MIN (MIN (cfun_frame_layout.f0_offset,
+					       cfun_frame_layout.f4_offset),
+					  cfun_frame_layout.gprs_offset)
+				     - cfun_frame_layout.high_fprs * 8);
+
+      cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
+
+      for (i = 0; i < 8; i++)
+	if (cfun_fpr_bit_p (i))
+	  cfun_frame_layout.frame_size += 8;
+
+      cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
+
+      /* If under 31 bit an odd number of gprs has to be saved we have to adjust
+	 the frame size to sustain 8 byte alignment of stack frames.  */
+      cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
+				       STACK_BOUNDARY / BITS_PER_UNIT - 1)
+				      & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
+
+      cfun_frame_layout.frame_size += crtl->outgoing_args_size;
+    }
+}
+
+/* Generate frame layout.  Fills in register and frame data for the current
+   function in cfun->machine.  This routine can be called multiple times;
+   it will re-do the complete frame layout every time.  */
+
+static void
+s390_init_frame_layout (void)
+{
+  HOST_WIDE_INT frame_size;
+  int base_used;
+  int clobbered_regs[16];
+
+  /* On S/390 machines, we may need to perform branch splitting, which
+     will require both base and return address register.  We have no
+     choice but to assume we're going to need them until right at the
+     end of the machine dependent reorg phase.  */
+  if (!TARGET_CPU_ZARCH)
+    cfun->machine->split_branches_pending_p = true;
+
+  do
+    {
+      frame_size = cfun_frame_layout.frame_size;
+
+      /* Try to predict whether we'll need the base register.  */
+      base_used = cfun->machine->split_branches_pending_p
+		  || crtl->uses_const_pool
+		  || (!DISP_IN_RANGE (frame_size)
+		      && !CONST_OK_FOR_K (frame_size));
+
+      /* Decide which register to use as literal pool base.  In small
+	 leaf functions, try to use an unused call-clobbered register
+	 as base register to avoid save/restore overhead.  */
+      if (!base_used)
+	cfun->machine->base_reg = NULL_RTX;
+      else if (current_function_is_leaf && !df_regs_ever_live_p (5))
+	cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
+      else
+	cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
+
+      s390_register_info (clobbered_regs);
+      s390_frame_info ();
+    }
+  while (frame_size != cfun_frame_layout.frame_size);
+}
+
+/* Update frame layout.  Recompute actual register save data based on
+   current info and update regs_ever_live for the special registers.
+   May be called multiple times, but may never cause *more* registers
+   to be saved than s390_init_frame_layout allocated room for.  */
+
+static void
+s390_update_frame_layout (void)
+{
+  int clobbered_regs[16];
+
+  s390_register_info (clobbered_regs);
+
+  df_set_regs_ever_live (BASE_REGNUM,
+			 clobbered_regs[BASE_REGNUM] ? true : false);
+  df_set_regs_ever_live (RETURN_REGNUM,
+			 clobbered_regs[RETURN_REGNUM] ? true : false);
+  df_set_regs_ever_live (STACK_POINTER_REGNUM,
+			 clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
+
+  if (cfun->machine->base_reg)
+    df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true);
+}
+
+/* Return true if it is legal to put a value with MODE into REGNO.  */
+
+bool
+s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  switch (REGNO_REG_CLASS (regno))
+    {
+    case FP_REGS:
+      if (REGNO_PAIR_OK (regno, mode))
+	{
+	  if (mode == SImode || mode == DImode)
+	    return true;
+
+	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+	    return true;
+	}
+      break;
+    case ADDR_REGS:
+      if (FRAME_REGNO_P (regno) && mode == Pmode)
+	return true;
+
+      /* fallthrough */
+    case GENERAL_REGS:
+      if (REGNO_PAIR_OK (regno, mode))
+	{
+	  if (TARGET_ZARCH
+	      || (mode != TFmode && mode != TCmode && mode != TDmode))
+	    return true;
+	}
+      break;
+    case CC_REGS:
+      if (GET_MODE_CLASS (mode) == MODE_CC)
+	return true;
+      break;
+    case ACCESS_REGS:
+      if (REGNO_PAIR_OK (regno, mode))
+	{
+	  if (mode == SImode || mode == Pmode)
+	    return true;
+	}
+      break;
+    default:
+      return false;
+    }
+
+  return false;
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+bool
+s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
+{
+   /* Once we've decided upon a register to use as base register, it must
+      no longer be used for any other purpose.  */
+  if (cfun->machine->base_reg)
+    if (REGNO (cfun->machine->base_reg) == old_reg
+	|| REGNO (cfun->machine->base_reg) == new_reg)
+      return false;
+
+  return true;
+}
+
+/* Maximum number of registers to represent a value of mode MODE
+   in a register of class RCLASS.  */
+
+int
+s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
+{
+  switch (rclass)
+    {
+    case FP_REGS:
+      if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
+      else
+	return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
+    case ACCESS_REGS:
+      return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
+    default:
+      break;
+    }
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Return true if register FROM can be eliminated via register TO.  */
+
+static bool
+s390_can_eliminate (const int from, const int to)
+{
+  /* On zSeries machines, we have not marked the base register as fixed.
+     Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
+     If a function requires the base register, we say here that this
+     elimination cannot be performed.  This will cause reload to free
+     up the base register (as if it were fixed).  On the other hand,
+     if the current function does *not* require the base register, we
+     say here the elimination succeeds, which in turn allows reload
+     to allocate the base register for any other purpose.  */
+  if (from == BASE_REGNUM && to == BASE_REGNUM)
+    {
+      if (TARGET_CPU_ZARCH)
+	{
+	  s390_init_frame_layout ();
+	  return cfun->machine->base_reg == NULL_RTX;
+	}
+
+      return false;
+    }
+
+  /* Everything else must point into the stack frame.  */
+  gcc_assert (to == STACK_POINTER_REGNUM
+	      || to == HARD_FRAME_POINTER_REGNUM);
+
+  gcc_assert (from == FRAME_POINTER_REGNUM
+	      || from == ARG_POINTER_REGNUM
+	      || from == RETURN_ADDRESS_POINTER_REGNUM);
+
+  /* Make sure we actually saved the return address.  */
+  if (from == RETURN_ADDRESS_POINTER_REGNUM)
+    if (!crtl->calls_eh_return
+	&& !cfun->stdarg
+	&& !cfun_frame_layout.save_return_addr_p)
+      return false;
+
+  return true;
+}
+
+/* Return offset between register FROM and TO initially after prolog.  */
+
+HOST_WIDE_INT
+s390_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT offset;
+  int index;
+
+  /* ??? Why are we called for non-eliminable pairs?  */
+  if (!s390_can_eliminate (from, to))
+    return 0;
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      offset = (get_frame_size()
+		+ STACK_POINTER_OFFSET
+		+ crtl->outgoing_args_size);
+      break;
+
+    case ARG_POINTER_REGNUM:
+      s390_init_frame_layout ();
+      offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
+      break;
+
+    case RETURN_ADDRESS_POINTER_REGNUM:
+      s390_init_frame_layout ();
+      index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot;
+      gcc_assert (index >= 0);
+      offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset;
+      offset += index * UNITS_PER_LONG;
+      break;
+
+    case BASE_REGNUM:
+      offset = 0;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return offset;
+}
+
+/* Emit insn to save fpr REGNUM at offset OFFSET relative
+   to register BASE.  Return generated insn.  */
+
+static rtx
+save_fpr (rtx base, int offset, int regnum)
+{
+  rtx addr;
+  addr = gen_rtx_MEM (DFmode, plus_constant (base, offset));
+
+  if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
+    set_mem_alias_set (addr, get_varargs_alias_set ());
+  else
+    set_mem_alias_set (addr, get_frame_alias_set ());
+
+  return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
+}
+
+/* Emit insn to restore fpr REGNUM from offset OFFSET relative
+   to register BASE.  Return generated insn.  */
+
+static rtx
+restore_fpr (rtx base, int offset, int regnum)
+{
+  rtx addr;
+  addr = gen_rtx_MEM (DFmode, plus_constant (base, offset));
+  set_mem_alias_set (addr, get_frame_alias_set ());
+
+  return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
+}
+
+/* Return true if REGNO is a global register, but not one
+   of the special ones that need to be saved/restored in anyway.  */
+
+static inline bool
+global_not_special_regno_p (int regno)
+{
+  return (global_regs[regno]
+	  /* These registers are special and need to be
+	     restored in any case.  */
+	  && !(regno == STACK_POINTER_REGNUM
+	       || regno == RETURN_REGNUM
+	       || regno == BASE_REGNUM
+	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
+}
+
+/* Generate insn to save registers FIRST to LAST into
+   the register save area located at offset OFFSET
+   relative to register BASE.  */
+
+static rtx
+save_gprs (rtx base, int offset, int first, int last)
+{
+  rtx addr, insn, note;
+  int i;
+
+  addr = plus_constant (base, offset);
+  addr = gen_rtx_MEM (Pmode, addr);
+
+  set_mem_alias_set (addr, get_frame_alias_set ());
+
+  /* Special-case single register.  */
+  if (first == last)
+    {
+      if (TARGET_64BIT)
+        insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
+      else
+        insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
+
+      if (!global_not_special_regno_p (first))
+	RTX_FRAME_RELATED_P (insn) = 1;
+      return insn;
+    }
+
+
+  insn = gen_store_multiple (addr,
+			     gen_rtx_REG (Pmode, first),
+			     GEN_INT (last - first + 1));
+
+  if (first <= 6 && cfun->stdarg)
+    for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+      {
+	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
+
+	if (first + i <= 6)
+	  set_mem_alias_set (mem, get_varargs_alias_set ());
+      }
+
+  /* We need to set the FRAME_RELATED flag on all SETs
+     inside the store-multiple pattern.
+
+     However, we must not emit DWARF records for registers 2..5
+     if they are stored for use by variable arguments ...
+
+     ??? Unfortunately, it is not enough to simply not the
+     FRAME_RELATED flags for those SETs, because the first SET
+     of the PARALLEL is always treated as if it had the flag
+     set, even if it does not.  Therefore we emit a new pattern
+     without those registers as REG_FRAME_RELATED_EXPR note.  */
+
+  if (first >= 6 && !global_not_special_regno_p (first))
+    {
+      rtx pat = PATTERN (insn);
+
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
+								     0, i)))))
+	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else if (last >= 6)
+    {
+      int start;
+
+      for (start = first >= 6 ? first : 6; start <= last; start++)
+	if (!global_not_special_regno_p (start))
+	  break;
+
+      if (start > last)
+	return insn;
+
+      addr = plus_constant (base, offset + (start - first) * UNITS_PER_LONG);
+      note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
+				 gen_rtx_REG (Pmode, start),
+				 GEN_INT (last - start + 1));
+      note = PATTERN (note);
+
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
+
+      for (i = 0; i < XVECLEN (note, 0); i++)
+	if (GET_CODE (XVECEXP (note, 0, i)) == SET
+	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
+								     0, i)))))
+	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  return insn;
+}
+
+/* Generate insn to restore registers FIRST to LAST from
+   the register save area located at offset OFFSET
+   relative to register BASE.  */
+
+static rtx
+restore_gprs (rtx base, int offset, int first, int last)
+{
+  rtx addr, insn;
+
+  addr = plus_constant (base, offset);
+  addr = gen_rtx_MEM (Pmode, addr);
+  set_mem_alias_set (addr, get_frame_alias_set ());
+
+  /* Special-case single register.  */
+  if (first == last)
+    {
+      if (TARGET_64BIT)
+        insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
+      else
+        insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
+
+      return insn;
+    }
+
+  insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
+			    addr,
+			    GEN_INT (last - first + 1));
+  return insn;
+}
+
+/* Return insn sequence to load the GOT register.  */
+
+static GTY(()) rtx got_symbol;
+rtx
+s390_load_got (void)
+{
+  rtx insns;
+
+  if (!got_symbol)
+    {
+      got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+      SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
+    }
+
+  start_sequence ();
+
+  if (TARGET_CPU_ZARCH)
+    {
+      emit_move_insn (pic_offset_table_rtx, got_symbol);
+    }
+  else
+    {
+      rtx offset;
+
+      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
+			       UNSPEC_LTREL_OFFSET);
+      offset = gen_rtx_CONST (Pmode, offset);
+      offset = force_const_mem (Pmode, offset);
+
+      emit_move_insn (pic_offset_table_rtx, offset);
+
+      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
+			       UNSPEC_LTREL_BASE);
+      offset = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, offset);
+
+      emit_move_insn (pic_offset_table_rtx, offset);
+    }
+
+  insns = get_insns ();
+  end_sequence ();
+  return insns;
+}
+
+/* This ties together stack memory (MEM with an alias set of frame_alias_set)
+   and the change to the stack pointer.  */
+
+static void
+s390_emit_stack_tie (void)
+{
+  rtx mem = gen_frame_mem (BLKmode,
+			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
+
+  emit_insn (gen_stack_tie (mem));
+}
+
+/* Expand the prologue into a bunch of separate insns.  */
+
+void
+s390_emit_prologue (void)
+{
+  rtx insn, addr;
+  rtx temp_reg;
+  int i;
+  int offset;
+  int next_fpr = 0;
+
+  /* Complete frame layout.  */
+
+  s390_update_frame_layout ();
+
+  /* Annotate all constant pool references to let the scheduler know
+     they implicitly use the base register.  */
+
+  push_topmost_sequence ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	annotate_constant_pool_refs (&PATTERN (insn));
+	df_insn_rescan (insn);
+      }
+
+  pop_topmost_sequence ();
+
+  /* Choose best register to use for temp use within prologue.
+     See below for why TPF must use the register 1.  */
+
+  if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
+      && !current_function_is_leaf
+      && !TARGET_TPF_PROFILING)
+    temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  else
+    temp_reg = gen_rtx_REG (Pmode, 1);
+
+  /* Save call saved gprs.  */
+  if (cfun_frame_layout.first_save_gpr != -1)
+    {
+      insn = save_gprs (stack_pointer_rtx,
+			cfun_frame_layout.gprs_offset +
+			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
+					  - cfun_frame_layout.first_save_gpr_slot),
+			cfun_frame_layout.first_save_gpr,
+			cfun_frame_layout.last_save_gpr);
+      emit_insn (insn);
+    }
+
+  /* Dummy insn to mark literal pool slot.  */
+
+  if (cfun->machine->base_reg)
+    emit_insn (gen_main_pool (cfun->machine->base_reg));
+
+  offset = cfun_frame_layout.f0_offset;
+
+  /* Save f0 and f2.  */
+  for (i = 0; i < 2; i++)
+    {
+      if (cfun_fpr_bit_p (i))
+	{
+	  save_fpr (stack_pointer_rtx, offset, i + 16);
+	  offset += 8;
+	}
+      else if (!TARGET_PACKED_STACK)
+	  offset += 8;
+    }
+
+  /* Save f4 and f6.  */
+  offset = cfun_frame_layout.f4_offset;
+  for (i = 2; i < 4; i++)
+    {
+      if (cfun_fpr_bit_p (i))
+	{
+	  insn = save_fpr (stack_pointer_rtx, offset, i + 16);
+	  offset += 8;
+
+	  /* If f4 and f6 are call clobbered they are saved due to stdargs and
+	     therefore are not frame related.  */
+	  if (!call_really_used_regs[i + 16])
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
+      else if (!TARGET_PACKED_STACK)
+	offset += 8;
+    }
+
+  if (TARGET_PACKED_STACK
+      && cfun_save_high_fprs_p
+      && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
+    {
+      offset = (cfun_frame_layout.f8_offset
+		+ (cfun_frame_layout.high_fprs - 1) * 8);
+
+      for (i = 15; i > 7 && offset >= 0; i--)
+	if (cfun_fpr_bit_p (i))
+	  {
+	    insn = save_fpr (stack_pointer_rtx, offset, i + 16);
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    offset -= 8;
+	  }
+      if (offset >= cfun_frame_layout.f8_offset)
+	next_fpr = i + 16;
+    }
+
+  if (!TARGET_PACKED_STACK)
+    next_fpr = cfun_save_high_fprs_p ? 31 : 0;
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = cfun_frame_layout.frame_size;
+
+  /* Decrement stack pointer.  */
+
+  if (cfun_frame_layout.frame_size > 0)
+    {
+      rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+      rtx real_frame_off;
+
+      if (s390_stack_size)
+  	{
+	  HOST_WIDE_INT stack_guard;
+
+	  if (s390_stack_guard)
+	    stack_guard = s390_stack_guard;
+	  else
+	    {
+	      /* If no value for stack guard is provided the smallest power of 2
+		 larger than the current frame size is chosen.  */
+	      stack_guard = 1;
+	      while (stack_guard < cfun_frame_layout.frame_size)
+		stack_guard <<= 1;
+	    }
+
+	  if (cfun_frame_layout.frame_size >= s390_stack_size)
+	    {
+	      warning (0, "frame size of function %qs is "
+		       HOST_WIDE_INT_PRINT_DEC
+		       " bytes exceeding user provided stack limit of "
+		       HOST_WIDE_INT_PRINT_DEC " bytes.  "
+		       "An unconditional trap is added.",
+		       current_function_name(), cfun_frame_layout.frame_size,
+		       s390_stack_size);
+	      emit_insn (gen_trap ());
+	    }
+	  else
+	    {
+	      /* stack_guard has to be smaller than s390_stack_size.
+		 Otherwise we would emit an AND with zero which would
+		 not match the test under mask pattern.  */
+	      if (stack_guard >= s390_stack_size)
+		{
+		  warning (0, "frame size of function %qs is "
+			   HOST_WIDE_INT_PRINT_DEC
+			   " bytes which is more than half the stack size. "
+			   "The dynamic check would not be reliable. "
+			   "No check emitted for this function.",
+			   current_function_name(),
+			   cfun_frame_layout.frame_size);
+		}
+	      else
+		{
+		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
+						    & ~(stack_guard - 1));
+
+		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
+				       GEN_INT (stack_check_mask));
+		  if (TARGET_64BIT)
+		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
+							 t, const0_rtx),
+					     t, const0_rtx, const0_rtx));
+		  else
+		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
+							 t, const0_rtx),
+					     t, const0_rtx, const0_rtx));
+		}
+	    }
+  	}
+
+      if (s390_warn_framesize > 0
+	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
+	warning (0, "frame size of %qs is " HOST_WIDE_INT_PRINT_DEC " bytes",
+		 current_function_name (), cfun_frame_layout.frame_size);
+
+      if (s390_warn_dynamicstack_p && cfun->calls_alloca)
+	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
+
+      /* Save incoming stack pointer into temp reg.  */
+      if (TARGET_BACKCHAIN || next_fpr)
+	insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
+
+      /* Subtract frame size from stack pointer.  */
+
+      if (DISP_IN_RANGE (INTVAL (frame_off)))
+	{
+	  insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					    frame_off));
+	  insn = emit_insn (insn);
+	}
+      else
+	{
+	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
+	    frame_off = force_const_mem (Pmode, frame_off);
+
+          insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
+	  annotate_constant_pool_refs (&PATTERN (insn));
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+				 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					       real_frame_off)));
+
+      /* Set backchain.  */
+
+      if (TARGET_BACKCHAIN)
+	{
+	  if (cfun_frame_layout.backchain_offset)
+	    addr = gen_rtx_MEM (Pmode,
+				plus_constant (stack_pointer_rtx,
+				  cfun_frame_layout.backchain_offset));
+	  else
+	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+	  set_mem_alias_set (addr, get_frame_alias_set ());
+	  insn = emit_insn (gen_move_insn (addr, temp_reg));
+	}
+
+      /* If we support non-call exceptions (e.g. for Java),
+	 we need to make sure the backchain pointer is set up
+	 before any possibly trapping memory access.  */
+      if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
+	{
+	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+	  emit_clobber (addr);
+	}
+    }
+
+  /* Save fprs 8 - 15 (64 bit ABI).  */
+
+  if (cfun_save_high_fprs_p && next_fpr)
+    {
+      /* If the stack might be accessed through a different register
+	 we have to make sure that the stack pointer decrement is not
+	 moved below the use of the stack slots.  */
+      s390_emit_stack_tie ();
+
+      insn = emit_insn (gen_add2_insn (temp_reg,
+				       GEN_INT (cfun_frame_layout.f8_offset)));
+
+      offset = 0;
+
+      for (i = 24; i <= next_fpr; i++)
+	if (cfun_fpr_bit_p (i - 16))
+	  {
+	    rtx addr = plus_constant (stack_pointer_rtx,
+				      cfun_frame_layout.frame_size
+				      + cfun_frame_layout.f8_offset
+				      + offset);
+
+	    insn = save_fpr (temp_reg, offset, i);
+	    offset += 8;
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode,
+				       gen_rtx_MEM (DFmode, addr),
+				       gen_rtx_REG (DFmode, i)));
+	  }
+    }
+
+  /* Set frame pointer, if needed.  */
+
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Set up got pointer, if needed.  */
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    {
+      rtx insns = s390_load_got ();
+
+      for (insn = insns; insn; insn = NEXT_INSN (insn))
+	annotate_constant_pool_refs (&PATTERN (insn));
+
+      emit_insn (insns);
+    }
+
+  if (TARGET_TPF_PROFILING)
+    {
+      /* Generate a BAS instruction to serve as a function
+	 entry intercept to facilitate the use of tracing
+	 algorithms located at the branch target.  */
+      emit_insn (gen_prologue_tpf ());
+
+      /* Emit a blockage here so that all code
+	 lies between the profiling mechanisms.  */
+      emit_insn (gen_blockage ());
+    }
+}
+
+/* Expand the epilogue into a bunch of separate insns.  */
+
+void
+s390_emit_epilogue (bool sibcall)
+{
+  rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
+  int area_bottom, area_top, offset = 0;
+  int next_offset;
+  rtvec p;
+  int i;
+
+  if (TARGET_TPF_PROFILING)
+    {
+
+      /* Generate a BAS instruction to serve as a function
+	 entry intercept to facilitate the use of tracing
+	 algorithms located at the branch target.  */
+
+      /* Emit a blockage here so that all code
+         lies between the profiling mechanisms.  */
+      emit_insn (gen_blockage ());
+
+      emit_insn (gen_epilogue_tpf ());
+    }
+
+  /* Check whether to use frame or stack pointer for restore.  */
+
+  frame_pointer = (frame_pointer_needed
+		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
+
+  s390_frame_area (&area_bottom, &area_top);
+
+  /* Check whether we can access the register save area.
+     If not, increment the frame pointer as required.  */
+
+  if (area_top <= area_bottom)
+    {
+      /* Nothing to restore.  */
+    }
+  else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
+           && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
+    {
+      /* Area is in range.  */
+      offset = cfun_frame_layout.frame_size;
+    }
+  else
+    {
+      rtx insn, frame_off, cfa;
+
+      offset = area_bottom < 0 ? -area_bottom : 0;
+      frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
+
+      cfa = gen_rtx_SET (VOIDmode, frame_pointer,
+			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
+      if (DISP_IN_RANGE (INTVAL (frame_off)))
+	{
+	  insn = gen_rtx_SET (VOIDmode, frame_pointer,
+			      gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
+	  insn = emit_insn (insn);
+	}
+      else
+	{
+	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
+	    frame_off = force_const_mem (Pmode, frame_off);
+
+	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
+	  annotate_constant_pool_refs (&PATTERN (insn));
+	}
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Restore call saved fprs.  */
+
+  if (TARGET_64BIT)
+    {
+      if (cfun_save_high_fprs_p)
+	{
+	  next_offset = cfun_frame_layout.f8_offset;
+	  for (i = 24; i < 32; i++)
+	    {
+	      if (cfun_fpr_bit_p (i - 16))
+		{
+		  restore_fpr (frame_pointer,
+			       offset + next_offset, i);
+		  cfa_restores
+		    = alloc_reg_note (REG_CFA_RESTORE,
+				      gen_rtx_REG (DFmode, i), cfa_restores);
+		  next_offset += 8;
+		}
+	    }
+	}
+
+    }
+  else
+    {
+      next_offset = cfun_frame_layout.f4_offset;
+      for (i = 18; i < 20; i++)
+	{
+	  if (cfun_fpr_bit_p (i - 16))
+	    {
+	      restore_fpr (frame_pointer,
+			   offset + next_offset, i);
+	      cfa_restores
+		= alloc_reg_note (REG_CFA_RESTORE,
+				  gen_rtx_REG (DFmode, i), cfa_restores);
+	      next_offset += 8;
+	    }
+	  else if (!TARGET_PACKED_STACK)
+	    next_offset += 8;
+	}
+
+    }
+
+  /* Return register.  */
+
+  return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+
+  /* Restore call saved gprs.  */
+
+  if (cfun_frame_layout.first_restore_gpr != -1)
+    {
+      rtx insn, addr;
+      int i;
+
+      /* Check for global register and save them
+	 to stack location from where they get restored.  */
+
+      for (i = cfun_frame_layout.first_restore_gpr;
+	   i <= cfun_frame_layout.last_restore_gpr;
+	   i++)
+	{
+	  if (global_not_special_regno_p (i))
+	    {
+	      addr = plus_constant (frame_pointer,
+				    offset + cfun_frame_layout.gprs_offset
+				    + (i - cfun_frame_layout.first_save_gpr_slot)
+				    * UNITS_PER_LONG);
+	      addr = gen_rtx_MEM (Pmode, addr);
+	      set_mem_alias_set (addr, get_frame_alias_set ());
+	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
+	    }
+	  else
+	    cfa_restores
+	      = alloc_reg_note (REG_CFA_RESTORE,
+				gen_rtx_REG (Pmode, i), cfa_restores);
+	}
+
+      if (! sibcall)
+	{
+	  /* Fetch return address from stack before load multiple,
+	     this will do good for scheduling.  */
+
+	  if (cfun_frame_layout.save_return_addr_p
+	      || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
+		  && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
+	    {
+	      int return_regnum = find_unused_clobbered_reg();
+	      if (!return_regnum)
+		return_regnum = 4;
+	      return_reg = gen_rtx_REG (Pmode, return_regnum);
+
+	      addr = plus_constant (frame_pointer,
+				    offset + cfun_frame_layout.gprs_offset
+				    + (RETURN_REGNUM
+				       - cfun_frame_layout.first_save_gpr_slot)
+				    * UNITS_PER_LONG);
+	      addr = gen_rtx_MEM (Pmode, addr);
+	      set_mem_alias_set (addr, get_frame_alias_set ());
+	      emit_move_insn (return_reg, addr);
+	    }
+	}
+
+      insn = restore_gprs (frame_pointer,
+			   offset + cfun_frame_layout.gprs_offset
+			   + (cfun_frame_layout.first_restore_gpr
+			      - cfun_frame_layout.first_save_gpr_slot)
+			   * UNITS_PER_LONG,
+			   cfun_frame_layout.first_restore_gpr,
+			   cfun_frame_layout.last_restore_gpr);
+      insn = emit_insn (insn);
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA,
+		    plus_constant (stack_pointer_rtx, STACK_POINTER_OFFSET));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (! sibcall)
+    {
+
+      /* Return to caller.  */
+
+      p = rtvec_alloc (2);
+
+      RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode);
+      RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
+      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+    }
+}
+
+
+/* Return the size in bytes of a function argument of
+   type TYPE and/or mode MODE.  At least one of TYPE or
+   MODE must be specified.  */
+
+static int
+s390_function_arg_size (enum machine_mode mode, const_tree type)
+{
+  if (type)
+    return int_size_in_bytes (type);
+
+  /* No type info available for some library calls ...  */
+  if (mode != BLKmode)
+    return GET_MODE_SIZE (mode);
+
+  /* If we have neither type nor mode, abort */
+  gcc_unreachable ();
+}
+
+/* Return true if a function argument of type TYPE and mode MODE
+   is to be passed in a floating-point register, if available.  */
+
+static bool
+s390_function_arg_float (enum machine_mode mode, const_tree type)
+{
+  int size = s390_function_arg_size (mode, type);
+  if (size > 8)
+    return false;
+
+  /* Soft-float changes the ABI: no floating-point registers are used.  */
+  if (TARGET_SOFT_FLOAT)
+    return false;
+
+  /* No type info available for some library calls ...  */
+  if (!type)
+    return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
+
+  /* The ABI says that record types with a single member are treated
+     just like that member would be.  */
+  while (TREE_CODE (type) == RECORD_TYPE)
+    {
+      tree field, single = NULL_TREE;
+
+      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+	{
+	  if (TREE_CODE (field) != FIELD_DECL)
+	    continue;
+
+	  if (single == NULL_TREE)
+	    single = TREE_TYPE (field);
+	  else
+	    return false;
+	}
+
+      if (single == NULL_TREE)
+	return false;
+      else
+	type = single;
+    }
+
+  return TREE_CODE (type) == REAL_TYPE;
+}
+
+/* Return true if a function argument of type TYPE and mode MODE
+   is to be passed in an integer register, or a pair of integer
+   registers, if available.  */
+
+static bool
+s390_function_arg_integer (enum machine_mode mode, const_tree type)
+{
+  int size = s390_function_arg_size (mode, type);
+  if (size > 8)
+    return false;
+
+  /* No type info available for some library calls ...  */
+  if (!type)
+    return GET_MODE_CLASS (mode) == MODE_INT
+	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
+
+  /* We accept small integral (and similar) types.  */
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == NULLPTR_TYPE
+      || TREE_CODE (type) == OFFSET_TYPE
+      || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
+    return true;
+
+  /* We also accept structs of size 1, 2, 4, 8 that are not
+     passed in floating-point registers.  */
+  if (AGGREGATE_TYPE_P (type)
+      && exact_log2 (size) >= 0
+      && !s390_function_arg_float (mode, type))
+    return true;
+
+  return false;
+}
+
+/* Return 1 if a function argument of type TYPE and mode MODE
+   is to be passed by reference.  The ABI specifies that only
+   structures of size 1, 2, 4, or 8 bytes are passed by value,
+   all other structures (and complex numbers) are passed by
+   reference.  */
+
+static bool
+s390_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  int size = s390_function_arg_size (mode, type);
+  if (size > 8)
+    return true;
+
+  if (type)
+    {
+      if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
+        return 1;
+
+      if (TREE_CODE (type) == COMPLEX_TYPE
+	  || TREE_CODE (type) == VECTOR_TYPE)
+        return 1;
+    }
+
+  return 0;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.).  The boolean NAMED specifies whether the
+   argument is a named argument (as opposed to an unnamed argument
+   matching an ellipsis).  */
+
+static void
+s390_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  if (s390_function_arg_float (mode, type))
+    {
+      cum->fprs += 1;
+    }
+  else if (s390_function_arg_integer (mode, type))
+    {
+      int size = s390_function_arg_size (mode, type);
+      cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On S/390, we use general purpose registers 2 through 6 to
+   pass integer, pointer, and certain structure arguments, and
+   floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
+   to pass floating point arguments.  All remaining arguments
+   are pushed to the stack.  */
+
+static rtx
+s390_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  if (s390_function_arg_float (mode, type))
+    {
+      if (cum->fprs + 1 > FP_ARG_NUM_REG)
+	return 0;
+      else
+	return gen_rtx_REG (mode, cum->fprs + 16);
+    }
+  else if (s390_function_arg_integer (mode, type))
+    {
+      int size = s390_function_arg_size (mode, type);
+      int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
+
+      if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
+	return 0;
+      else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
+	return gen_rtx_REG (mode, cum->gprs + 2);
+      else if (n_gprs == 2)
+	{
+	  rtvec p = rtvec_alloc (2);
+
+	  RTVEC_ELT (p, 0)
+	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
+					 const0_rtx);
+	  RTVEC_ELT (p, 1)
+	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
+					 GEN_INT (4));
+
+	  return gen_rtx_PARALLEL (mode, p);
+	}
+    }
+
+  /* After the real arguments, expand_call calls us once again
+     with a void_type_node type.  Whatever we return here is
+     passed as operand 2 to the call expanders.
+
+     We don't need this feature ...  */
+  else if (type == void_type_node)
+    return const0_rtx;
+
+  gcc_unreachable ();
+}
+
+/* Return true if return values of type TYPE should be returned
+   in a memory buffer whose address is passed by the caller as
+   hidden first argument.  */
+
+static bool
+s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
+{
+  /* We accept small integral (and similar) types.  */
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == OFFSET_TYPE
+      || TREE_CODE (type) == REAL_TYPE)
+    return int_size_in_bytes (type) > 8;
+
+  /* Aggregates and similar constructs are always returned
+     in memory.  */
+  if (AGGREGATE_TYPE_P (type)
+      || TREE_CODE (type) == COMPLEX_TYPE
+      || TREE_CODE (type) == VECTOR_TYPE)
+    return true;
+
+  /* ??? We get called on all sorts of random stuff from
+     aggregate_value_p.  We can't abort, but it's not clear
+     what's safe to return.  Pretend it's a struct I guess.  */
+  return true;
+}
+
+/* Function arguments and return values are promoted to word size.  */
+
+static enum machine_mode
+s390_promote_function_mode (const_tree type, enum machine_mode mode,
+                            int *punsignedp,
+                            const_tree fntype ATTRIBUTE_UNUSED,
+                            int for_return ATTRIBUTE_UNUSED)
+{
+  if (INTEGRAL_MODE_P (mode)
+      && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
+    {
+      if (POINTER_TYPE_P (type))
+	*punsignedp = POINTERS_EXTEND_UNSIGNED;
+      return Pmode;
+    }
+
+  return mode;
+}
+
+/* Define where to return a (scalar) value of type RET_TYPE.
+   If RET_TYPE is null, define where to return a (scalar)
+   value of mode MODE from a libcall.  */
+
+static rtx
+s390_function_and_libcall_value (enum machine_mode mode,
+				 const_tree ret_type,
+				 const_tree fntype_or_decl,
+				 bool outgoing ATTRIBUTE_UNUSED)
+{
+  /* For normal functions perform the promotion as
+     promote_function_mode would do.  */
+  if (ret_type)
+    {
+      int unsignedp = TYPE_UNSIGNED (ret_type);
+      mode = promote_function_mode (ret_type, mode, &unsignedp,
+				    fntype_or_decl, 1);
+    }
+
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
+  gcc_assert (GET_MODE_SIZE (mode) <= 8);
+
+  if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
+    return gen_rtx_REG (mode, 16);
+  else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
+	   || UNITS_PER_LONG == UNITS_PER_WORD)
+    return gen_rtx_REG (mode, 2);
+  else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
+    {
+      /* This case is triggered when returning a 64 bit value with
+	 -m31 -mzarch.  Although the value would fit into a single
+	 register it has to be forced into a 32 bit register pair in
+	 order to match the ABI.  */
+      rtvec p = rtvec_alloc (2);
+
+      RTVEC_ELT (p, 0)
+	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
+      RTVEC_ELT (p, 1)
+	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
+
+      return gen_rtx_PARALLEL (mode, p);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Define where to return a scalar return value of type RET_TYPE.  */
+
+static rtx
+s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
+		     bool outgoing)
+{
+  return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
+					  fn_decl_or_type, outgoing);
+}
+
+/* Define where to return a scalar libcall return value of mode
+   MODE.  */
+
+static rtx
+s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return s390_function_and_libcall_value (mode, NULL_TREE,
+					  NULL_TREE, true);
+}
+
+
+/* Create and return the va_list datatype.
+
+   On S/390, va_list is an array type equivalent to
+
+      typedef struct __va_list_tag
+        {
+            long __gpr;
+            long __fpr;
+            void *__overflow_arg_area;
+            void *__reg_save_area;
+        } va_list[1];
+
+   where __gpr and __fpr hold the number of general purpose
+   or floating point arguments used up to now, respectively,
+   __overflow_arg_area points to the stack location of the
+   next argument passed on the stack, and __reg_save_area
+   always points to the start of the register area in the
+   call frame of the current function.  The function prologue
+   saves all registers used for argument passing into this
+   area if the function uses variable arguments.  */
+
+static tree
+s390_build_builtin_va_list (void)
+{
+  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+  record = lang_hooks.types.make_type (RECORD_TYPE);
+
+  type_decl =
+    build_decl (BUILTINS_LOCATION,
+		TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_gpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__gpr"),
+		      long_integer_type_node);
+  f_fpr = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__fpr"),
+		      long_integer_type_node);
+  f_ovf = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
+		      ptr_type_node);
+  f_sav = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__reg_save_area"),
+		      ptr_type_node);
+
+  va_list_gpr_counter_field = f_gpr;
+  va_list_fpr_counter_field = f_fpr;
+
+  DECL_FIELD_CONTEXT (f_gpr) = record;
+  DECL_FIELD_CONTEXT (f_fpr) = record;
+  DECL_FIELD_CONTEXT (f_ovf) = record;
+  DECL_FIELD_CONTEXT (f_sav) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_gpr;
+  DECL_CHAIN (f_gpr) = f_fpr;
+  DECL_CHAIN (f_fpr) = f_ovf;
+  DECL_CHAIN (f_ovf) = f_sav;
+
+  layout_type (record);
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Implement va_start by filling the va_list structure VALIST.
+   STDARG_P is always true, and ignored.
+   NEXTARG points to the first anonymous stack argument.
+
+   The following global variables are used to initialize
+   the va_list structure:
+
+     crtl->args.info:
+       holds number of gprs and fprs used for named arguments.
+     crtl->args.arg_offset_rtx:
+       holds the offset of the first anonymous stack argument
+       (relative to the virtual arg pointer).  */
+
+static void
+s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT n_gpr, n_fpr;
+  int off;
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, t;
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_simple_mem_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+  /* Count number of gp and fp argument registers used.  */
+
+  n_gpr = crtl->args.info.gprs;
+  n_fpr = crtl->args.info.fprs;
+
+  if (cfun->va_list_gpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+		  build_int_cst (NULL_TREE, n_gpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  if (cfun->va_list_fpr_size)
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+	          build_int_cst (NULL_TREE, n_fpr));
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* Find the overflow area.  */
+  if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
+      || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
+    {
+      t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+
+      off = INTVAL (crtl->args.arg_offset_rtx);
+      off = off < 0 ? 0 : off;
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
+		 (int)n_gpr, (int)n_fpr, off);
+
+      t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), t, size_int (off));
+
+      t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* Find the register save area.  */
+  if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
+      || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
+    {
+      t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
+      t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (sav), t,
+	          size_int (-RETURN_REGNUM * UNITS_PER_LONG));
+
+      t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+    }
+}
+
+/* Implement va_arg by updating the va_list structure
+   VALIST as required to retrieve an argument of type
+   TYPE, and returning that argument.
+
+   Generates code equivalent to:
+
+   if (integral value) {
+     if (size  <= 4 && args.gpr < 5 ||
+         size  > 4 && args.gpr < 4 )
+       ret = args.reg_save_area[args.gpr+8]
+     else
+       ret = *args.overflow_arg_area++;
+   } else if (float value) {
+     if (args.fgpr < 2)
+       ret = args.reg_save_area[args.fpr+64]
+     else
+       ret = *args.overflow_arg_area++;
+   } else if (aggregate value) {
+     if (args.gpr < 5)
+       ret = *args.reg_save_area[args.gpr]
+     else
+       ret = **args.overflow_arg_area++;
+   } */
+
+static tree
+s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		      gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_gpr, f_fpr, f_ovf, f_sav;
+  tree gpr, fpr, ovf, sav, reg, t, u;
+  int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
+  tree lab_false, lab_over, addr;
+
+  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_fpr = DECL_CHAIN (f_gpr);
+  f_ovf = DECL_CHAIN (f_fpr);
+  f_sav = DECL_CHAIN (f_ovf);
+
+  valist = build_va_arg_indirect_ref (valist);
+  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+  /* The tree for args* cannot be shared between gpr/fpr and ovf since
+     both appear on a lhs.  */
+  valist = unshare_expr (valist);
+  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+
+  size = int_size_in_bytes (type);
+
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "va_arg: aggregate type");
+	  debug_tree (type);
+	}
+
+      /* Aggregates are passed by reference.  */
+      indirect_p = 1;
+      reg = gpr;
+      n_reg = 1;
+
+      /* kernel stack layout on 31 bit: It is assumed here that no padding
+	 will be added by s390_frame_info because for va_args always an even
+	 number of gprs has to be saved r15-r2 = 14 regs.  */
+      sav_ofs = 2 * UNITS_PER_LONG;
+      sav_scale = UNITS_PER_LONG;
+      size = UNITS_PER_LONG;
+      max_reg = GP_ARG_NUM_REG - n_reg;
+    }
+  else if (s390_function_arg_float (TYPE_MODE (type), type))
+    {
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "va_arg: float type");
+	  debug_tree (type);
+	}
+
+      /* FP args go in FP registers, if present.  */
+      indirect_p = 0;
+      reg = fpr;
+      n_reg = 1;
+      sav_ofs = 16 * UNITS_PER_LONG;
+      sav_scale = 8;
+      max_reg = FP_ARG_NUM_REG - n_reg;
+    }
+  else
+    {
+      if (TARGET_DEBUG_ARG)
+	{
+	  fprintf (stderr, "va_arg: other type");
+	  debug_tree (type);
+	}
+
+      /* Otherwise into GP registers.  */
+      indirect_p = 0;
+      reg = gpr;
+      n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
+
+      /* kernel stack layout on 31 bit: It is assumed here that no padding
+	 will be added by s390_frame_info because for va_args always an even
+	 number of gprs has to be saved r15-r2 = 14 regs.  */
+      sav_ofs = 2 * UNITS_PER_LONG;
+
+      if (size < UNITS_PER_LONG)
+	sav_ofs += UNITS_PER_LONG - size;
+
+      sav_scale = UNITS_PER_LONG;
+      max_reg = GP_ARG_NUM_REG - n_reg;
+    }
+
+  /* Pull the value out of the saved registers ...  */
+
+  lab_false = create_artificial_label (UNKNOWN_LOCATION);
+  lab_over = create_artificial_label (UNKNOWN_LOCATION);
+  addr = create_tmp_var (ptr_type_node, "addr");
+
+  t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
+  t = build2 (GT_EXPR, boolean_type_node, reg, t);
+  u = build1 (GOTO_EXPR, void_type_node, lab_false);
+  t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+  gimplify_and_add (t, pre_p);
+
+  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav,
+	      size_int (sav_ofs));
+  u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
+	      fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
+  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, fold_convert (sizetype, u));
+
+  gimplify_assign (addr, t, pre_p);
+
+  gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+  gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+
+
+  /* ... Otherwise out of the overflow area.  */
+
+  t = ovf;
+  if (size < UNITS_PER_LONG)
+    t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
+		size_int (UNITS_PER_LONG - size));
+
+  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+  gimplify_assign (addr, t, pre_p);
+
+  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t,
+	      size_int (size));
+  gimplify_assign (ovf, t, pre_p);
+
+  gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+
+
+  /* Increment register save count.  */
+
+  u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
+	      fold_convert (TREE_TYPE (reg), size_int (n_reg)));
+  gimplify_and_add (u, pre_p);
+
+  if (indirect_p)
+    {
+      t = build_pointer_type_for_mode (build_pointer_type (type),
+				       ptr_mode, true);
+      addr = fold_convert (t, addr);
+      addr = build_va_arg_indirect_ref (addr);
+    }
+  else
+    {
+      t = build_pointer_type_for_mode (type, ptr_mode, true);
+      addr = fold_convert (t, addr);
+    }
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+
+/* Builtins.  */
+
+enum s390_builtin
+{
+  S390_BUILTIN_THREAD_POINTER,
+  S390_BUILTIN_SET_THREAD_POINTER,
+
+  S390_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin_64[S390_BUILTIN_max] = {
+  CODE_FOR_get_tp_64,
+  CODE_FOR_set_tp_64
+};
+
+static enum insn_code const code_for_builtin_31[S390_BUILTIN_max] = {
+  CODE_FOR_get_tp_31,
+  CODE_FOR_set_tp_31
+};
+
+static void
+s390_init_builtins (void)
+{
+  tree ftype;
+
+  ftype = build_function_type (ptr_type_node, void_list_node);
+  add_builtin_function ("__builtin_thread_pointer", ftype,
+			S390_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+			NULL, NULL_TREE);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+  add_builtin_function ("__builtin_set_thread_pointer", ftype,
+			S390_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD,
+			NULL, NULL_TREE);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+  enum insn_code const *code_for_builtin =
+    TARGET_64BIT ? code_for_builtin_64 : code_for_builtin_31;
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+  tree arg;
+  call_expr_arg_iterator iter;
+
+  if (fcode >= S390_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  arity = 0;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity > MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+      arity++;
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+        pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, op[0], op[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
+/* Output assembly code for the trampoline template to
+   stdio stream FILE.
+
+   On S/390, we use gpr 1 internally in the trampoline code;
+   gpr 0 is used to hold the static chain.  */
+
+static void
+s390_asm_trampoline_template (FILE *file)
+{
+  rtx op[2];
+  op[0] = gen_rtx_REG (Pmode, 0);
+  op[1] = gen_rtx_REG (Pmode, 1);
+
+  if (TARGET_64BIT)
+    {
+      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
+      output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
+      output_asm_insn ("br\t%1", op);             /* 2 byte */
+      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
+    }
+  else
+    {
+      output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
+      output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
+      output_asm_insn ("br\t%1", op);             /* 2 byte */
+      ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
+    }
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+static void
+s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
+  emit_move_insn (mem, cxt);
+  mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
+  emit_move_insn (mem, fnaddr);
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+void
+s390_function_profiler (FILE *file, int labelno)
+{
+  rtx op[7];
+
+  char label[128];
+  ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
+
+  fprintf (file, "# function profiler \n");
+
+  op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  op[1] = gen_rtx_MEM (Pmode, plus_constant (op[1], UNITS_PER_LONG));
+
+  op[2] = gen_rtx_REG (Pmode, 1);
+  op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
+  SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
+
+  op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
+  if (flag_pic)
+    {
+      op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
+      op[4] = gen_rtx_CONST (Pmode, op[4]);
+    }
+
+  if (TARGET_64BIT)
+    {
+      output_asm_insn ("stg\t%0,%1", op);
+      output_asm_insn ("larl\t%2,%3", op);
+      output_asm_insn ("brasl\t%0,%4", op);
+      output_asm_insn ("lg\t%0,%1", op);
+    }
+  else if (!flag_pic)
+    {
+      op[6] = gen_label_rtx ();
+
+      output_asm_insn ("st\t%0,%1", op);
+      output_asm_insn ("bras\t%2,%l6", op);
+      output_asm_insn (".long\t%4", op);
+      output_asm_insn (".long\t%3", op);
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
+      output_asm_insn ("l\t%0,0(%2)", op);
+      output_asm_insn ("l\t%2,4(%2)", op);
+      output_asm_insn ("basr\t%0,%0", op);
+      output_asm_insn ("l\t%0,%1", op);
+    }
+  else
+    {
+      op[5] = gen_label_rtx ();
+      op[6] = gen_label_rtx ();
+
+      output_asm_insn ("st\t%0,%1", op);
+      output_asm_insn ("bras\t%2,%l6", op);
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
+      output_asm_insn (".long\t%4-%l5", op);
+      output_asm_insn (".long\t%3-%l5", op);
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
+      output_asm_insn ("lr\t%0,%2", op);
+      output_asm_insn ("a\t%0,0(%2)", op);
+      output_asm_insn ("a\t%2,4(%2)", op);
+      output_asm_insn ("basr\t%0,%0", op);
+      output_asm_insn ("l\t%0,%1", op);
+    }
+}
+
+/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
+   into its SYMBOL_REF_FLAGS.  */
+
+static void
+s390_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == VAR_DECL)
+    {
+      /* If a variable has a forced alignment to < 2 bytes, mark it
+	 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
+	 operand.  */
+      if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
+	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
+      if (!DECL_SIZE (decl)
+	  || !DECL_ALIGN (decl)
+	  || !host_integerp (DECL_SIZE (decl), 0)
+	  || (DECL_ALIGN (decl) <= 64
+	      && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
+	SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
+    }
+
+  /* Literal pool references don't have a decl so they are handled
+     differently here.  We rely on the information in the MEM_ALIGN
+     entry to decide upon natural alignment.  */
+  if (MEM_P (rtl)
+      && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
+      && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
+      && (MEM_ALIGN (rtl) == 0
+	  || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
+	  || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
+}
+
+/* Output thunk to FILE that implements a C++ virtual function call (with
+   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
+   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
+   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
+   relative to the resulting this pointer.  */
+
+static void
+s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  rtx op[10];
+  int nonlocal = 0;
+
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), file, 1);
+
+  /* Operand 0 is the target function.  */
+  op[0] = XEXP (DECL_RTL (function), 0);
+  if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
+    {
+      nonlocal = 1;
+      op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
+			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
+      op[0] = gen_rtx_CONST (Pmode, op[0]);
+    }
+
+  /* Operand 1 is the 'this' pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    op[1] = gen_rtx_REG (Pmode, 3);
+  else
+    op[1] = gen_rtx_REG (Pmode, 2);
+
+  /* Operand 2 is the delta.  */
+  op[2] = GEN_INT (delta);
+
+  /* Operand 3 is the vcall_offset.  */
+  op[3] = GEN_INT (vcall_offset);
+
+  /* Operand 4 is the temporary register.  */
+  op[4] = gen_rtx_REG (Pmode, 1);
+
+  /* Operands 5 to 8 can be used as labels.  */
+  op[5] = NULL_RTX;
+  op[6] = NULL_RTX;
+  op[7] = NULL_RTX;
+  op[8] = NULL_RTX;
+
+  /* Operand 9 can be used for temporary register.  */
+  op[9] = NULL_RTX;
+
+  /* Generate code.  */
+  if (TARGET_64BIT)
+    {
+      /* Setup literal pool pointer if required.  */
+      if ((!DISP_IN_RANGE (delta)
+	   && !CONST_OK_FOR_K (delta)
+	   && !CONST_OK_FOR_Os (delta))
+	  || (!DISP_IN_RANGE (vcall_offset)
+	      && !CONST_OK_FOR_K (vcall_offset)
+	      && !CONST_OK_FOR_Os (vcall_offset)))
+	{
+	  op[5] = gen_label_rtx ();
+	  output_asm_insn ("larl\t%4,%5", op);
+	}
+
+      /* Add DELTA to this pointer.  */
+      if (delta)
+	{
+	  if (CONST_OK_FOR_J (delta))
+	    output_asm_insn ("la\t%1,%2(%1)", op);
+	  else if (DISP_IN_RANGE (delta))
+	    output_asm_insn ("lay\t%1,%2(%1)", op);
+	  else if (CONST_OK_FOR_K (delta))
+	    output_asm_insn ("aghi\t%1,%2", op);
+ 	  else if (CONST_OK_FOR_Os (delta))
+ 	    output_asm_insn ("agfi\t%1,%2", op);
+	  else
+	    {
+	      op[6] = gen_label_rtx ();
+	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
+	    }
+	}
+
+      /* Perform vcall adjustment.  */
+      if (vcall_offset)
+	{
+	  if (DISP_IN_RANGE (vcall_offset))
+	    {
+	      output_asm_insn ("lg\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,%3(%4)", op);
+	    }
+	  else if (CONST_OK_FOR_K (vcall_offset))
+	    {
+	      output_asm_insn ("lghi\t%4,%3", op);
+	      output_asm_insn ("ag\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,0(%4)", op);
+	    }
+ 	  else if (CONST_OK_FOR_Os (vcall_offset))
+ 	    {
+ 	      output_asm_insn ("lgfi\t%4,%3", op);
+ 	      output_asm_insn ("ag\t%4,0(%1)", op);
+ 	      output_asm_insn ("ag\t%1,0(%4)", op);
+ 	    }
+	  else
+	    {
+	      op[7] = gen_label_rtx ();
+	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
+	      output_asm_insn ("ag\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,0(%4)", op);
+	    }
+	}
+
+      /* Jump to target.  */
+      output_asm_insn ("jg\t%0", op);
+
+      /* Output literal pool if required.  */
+      if (op[5])
+	{
+	  output_asm_insn (".align\t4", op);
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[5]));
+	}
+      if (op[6])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[6]));
+	  output_asm_insn (".long\t%2", op);
+	}
+      if (op[7])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[7]));
+	  output_asm_insn (".long\t%3", op);
+	}
+    }
+  else
+    {
+      /* Setup base pointer if required.  */
+      if (!vcall_offset
+	  || (!DISP_IN_RANGE (delta)
+              && !CONST_OK_FOR_K (delta)
+	      && !CONST_OK_FOR_Os (delta))
+	  || (!DISP_IN_RANGE (delta)
+              && !CONST_OK_FOR_K (vcall_offset)
+	      && !CONST_OK_FOR_Os (vcall_offset)))
+	{
+	  op[5] = gen_label_rtx ();
+	  output_asm_insn ("basr\t%4,0", op);
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[5]));
+	}
+
+      /* Add DELTA to this pointer.  */
+      if (delta)
+	{
+	  if (CONST_OK_FOR_J (delta))
+	    output_asm_insn ("la\t%1,%2(%1)", op);
+	  else if (DISP_IN_RANGE (delta))
+	    output_asm_insn ("lay\t%1,%2(%1)", op);
+	  else if (CONST_OK_FOR_K (delta))
+	    output_asm_insn ("ahi\t%1,%2", op);
+	  else if (CONST_OK_FOR_Os (delta))
+ 	    output_asm_insn ("afi\t%1,%2", op);
+	  else
+	    {
+	      op[6] = gen_label_rtx ();
+	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
+	    }
+	}
+
+      /* Perform vcall adjustment.  */
+      if (vcall_offset)
+        {
+	  if (CONST_OK_FOR_J (vcall_offset))
+	    {
+	      output_asm_insn ("l\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,%3(%4)", op);
+	    }
+	  else if (DISP_IN_RANGE (vcall_offset))
+	    {
+	      output_asm_insn ("l\t%4,0(%1)", op);
+	      output_asm_insn ("ay\t%1,%3(%4)", op);
+	    }
+	  else if (CONST_OK_FOR_K (vcall_offset))
+	    {
+	      output_asm_insn ("lhi\t%4,%3", op);
+	      output_asm_insn ("a\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,0(%4)", op);
+	    }
+	  else if (CONST_OK_FOR_Os (vcall_offset))
+ 	    {
+ 	      output_asm_insn ("iilf\t%4,%3", op);
+ 	      output_asm_insn ("a\t%4,0(%1)", op);
+ 	      output_asm_insn ("a\t%1,0(%4)", op);
+ 	    }
+	  else
+	    {
+	      op[7] = gen_label_rtx ();
+	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
+	      output_asm_insn ("a\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,0(%4)", op);
+	    }
+
+	  /* We had to clobber the base pointer register.
+	     Re-setup the base pointer (with a different base).  */
+	  op[5] = gen_label_rtx ();
+	  output_asm_insn ("basr\t%4,0", op);
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[5]));
+	}
+
+      /* Jump to target.  */
+      op[8] = gen_label_rtx ();
+
+      if (!flag_pic)
+	output_asm_insn ("l\t%4,%8-%5(%4)", op);
+      else if (!nonlocal)
+	output_asm_insn ("a\t%4,%8-%5(%4)", op);
+      /* We cannot call through .plt, since .plt requires %r12 loaded.  */
+      else if (flag_pic == 1)
+	{
+	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
+	  output_asm_insn ("l\t%4,%0(%4)", op);
+	}
+      else if (flag_pic == 2)
+	{
+	  op[9] = gen_rtx_REG (Pmode, 0);
+	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
+	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
+	  output_asm_insn ("ar\t%4,%9", op);
+	  output_asm_insn ("l\t%4,0(%4)", op);
+	}
+
+      output_asm_insn ("br\t%4", op);
+
+      /* Output literal pool.  */
+      output_asm_insn (".align\t4", op);
+
+      if (nonlocal && flag_pic == 2)
+	output_asm_insn (".long\t%0", op);
+      if (nonlocal)
+	{
+	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
+	}
+
+      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
+      if (!flag_pic)
+	output_asm_insn (".long\t%0", op);
+      else
+	output_asm_insn (".long\t%0-%5", op);
+
+      if (op[6])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[6]));
+	  output_asm_insn (".long\t%2", op);
+	}
+      if (op[7])
+	{
+	  targetm.asm_out.internal_label (file, "L",
+					  CODE_LABEL_NUMBER (op[7]));
+	  output_asm_insn (".long\t%3", op);
+	}
+    }
+  final_end_function ();
+}
+
+static bool
+s390_valid_pointer_mode (enum machine_mode mode)
+{
+  return (mode == SImode || (TARGET_64BIT && mode == DImode));
+}
+
+/* Checks whether the given CALL_EXPR would use a caller
+   saved register.  This is used to decide whether sibling call
+   optimization could be performed on the respective function
+   call.  */
+
+static bool
+s390_call_saved_register_used (tree call_expr)
+{
+  CUMULATIVE_ARGS cum;
+  tree parameter;
+  enum machine_mode mode;
+  tree type;
+  rtx parm_rtx;
+  int reg, i;
+
+  INIT_CUMULATIVE_ARGS (cum, NULL, NULL, 0, 0);
+
+  for (i = 0; i < call_expr_nargs (call_expr); i++)
+    {
+      parameter = CALL_EXPR_ARG (call_expr, i);
+      gcc_assert (parameter);
+
+      /* For an undeclared variable passed as parameter we will get
+	 an ERROR_MARK node here.  */
+      if (TREE_CODE (parameter) == ERROR_MARK)
+	return true;
+
+      type = TREE_TYPE (parameter);
+      gcc_assert (type);
+
+      mode = TYPE_MODE (type);
+      gcc_assert (mode);
+
+      if (pass_by_reference (&cum, mode, type, true))
+ 	{
+ 	  mode = Pmode;
+ 	  type = build_pointer_type (type);
+ 	}
+
+       parm_rtx = s390_function_arg (&cum, mode, type, 0);
+
+       s390_function_arg_advance (&cum, mode, type, 0);
+
+       if (!parm_rtx)
+	 continue;
+
+       if (REG_P (parm_rtx))
+  	 {
+	   for (reg = 0;
+		reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
+		reg++)
+	     if (!call_used_regs[reg + REGNO (parm_rtx)])
+ 	       return true;
+	 }
+
+       if (GET_CODE (parm_rtx) == PARALLEL)
+	 {
+	   int i;
+
+	   for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
+	     {
+	       rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
+
+	       gcc_assert (REG_P (r));
+
+	       for (reg = 0;
+		    reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
+		    reg++)
+		 if (!call_used_regs[reg + REGNO (r)])
+		   return true;
+	     }
+	 }
+
+    }
+  return false;
+}
+
+/* Return true if the given call expression can be
+   turned into a sibling call.
+   DECL holds the declaration of the function to be called whereas
+   EXP is the call expression itself.  */
+
+static bool
+s390_function_ok_for_sibcall (tree decl, tree exp)
+{
+  /* The TPF epilogue uses register 1.  */
+  if (TARGET_TPF_PROFILING)
+    return false;
+
+  /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
+     which would have to be restored before the sibcall.  */
+  if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
+    return false;
+
+  /* Register 6 on s390 is available as an argument register but unfortunately
+     "caller saved". This makes functions needing this register for arguments
+     not suitable for sibcalls.  */
+  return !s390_call_saved_register_used (exp);
+}
+
+/* Return the fixed registers used for condition codes.  */
+
+static bool
+s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CC_REGNUM;
+  *p2 = INVALID_REGNUM;
+
+  return true;
+}
+
+/* This function is used by the call expanders of the machine description.
+   It emits the call insn itself together with the necessary operations
+   to adjust the target address and returns the emitted insn.
+   ADDR_LOCATION is the target address rtx
+   TLS_CALL the location of the thread-local symbol
+   RESULT_REG the register where the result of the call should be stored
+   RETADDR_REG the register where the return address should be stored
+               If this parameter is NULL_RTX the call is considered
+               to be a sibling call.  */
+
+rtx
+s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
+		rtx retaddr_reg)
+{
+  bool plt_call = false;
+  rtx insn;
+  rtx call;
+  rtx clobber;
+  rtvec vec;
+
+  /* Direct function calls need special treatment.  */
+  if (GET_CODE (addr_location) == SYMBOL_REF)
+    {
+      /* When calling a global routine in PIC mode, we must
+         replace the symbol itself with the PLT stub.  */
+      if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
+        {
+	  if (retaddr_reg != NULL_RTX)
+	    {
+	      addr_location = gen_rtx_UNSPEC (Pmode,
+					      gen_rtvec (1, addr_location),
+					      UNSPEC_PLT);
+	      addr_location = gen_rtx_CONST (Pmode, addr_location);
+	      plt_call = true;
+	    }
+	  else
+	    /* For -fpic code the PLT entries might use r12 which is
+	       call-saved.  Therefore we cannot do a sibcall when
+	       calling directly using a symbol ref.  When reaching
+	       this point we decided (in s390_function_ok_for_sibcall)
+	       to do a sibcall for a function pointer but one of the
+	       optimizers was able to get rid of the function pointer
+	       by propagating the symbol ref into the call.  This
+	       optimization is illegal for S/390 so we turn the direct
+	       call into a indirect call again.  */
+	    addr_location = force_reg (Pmode, addr_location);
+        }
+
+      /* Unless we can use the bras(l) insn, force the
+         routine address into a register.  */
+      if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
+        {
+	  if (flag_pic)
+	    addr_location = legitimize_pic_address (addr_location, 0);
+	  else
+	    addr_location = force_reg (Pmode, addr_location);
+	}
+    }
+
+  /* If it is already an indirect call or the code above moved the
+     SYMBOL_REF to somewhere else make sure the address can be found in
+     register 1.  */
+  if (retaddr_reg == NULL_RTX
+      && GET_CODE (addr_location) != SYMBOL_REF
+      && !plt_call)
+    {
+      emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
+      addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
+    }
+
+  addr_location = gen_rtx_MEM (QImode, addr_location);
+  call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
+
+  if (result_reg != NULL_RTX)
+    call = gen_rtx_SET (VOIDmode, result_reg, call);
+
+  if (retaddr_reg != NULL_RTX)
+    {
+      clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
+
+      if (tls_call != NULL_RTX)
+	vec = gen_rtvec (3, call, clobber,
+			 gen_rtx_USE (VOIDmode, tls_call));
+      else
+	vec = gen_rtvec (2, call, clobber);
+
+      call = gen_rtx_PARALLEL (VOIDmode, vec);
+    }
+
+  insn = emit_call_insn (call);
+
+  /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
+  if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
+    {
+      /* s390_function_ok_for_sibcall should
+	 have denied sibcalls in this case.  */
+      gcc_assert (retaddr_reg != NULL_RTX);
+
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+    }
+  return insn;
+}
+
+/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+s390_conditional_register_usage (void)
+{
+  int i;
+
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  if (TARGET_CPU_ZARCH)
+    {
+      fixed_regs[BASE_REGNUM] = 0;
+      call_used_regs[BASE_REGNUM] = 0;
+      fixed_regs[RETURN_REGNUM] = 0;
+      call_used_regs[RETURN_REGNUM] = 0;
+    }
+  if (TARGET_64BIT)
+    {
+      for (i = 24; i < 32; i++)
+	call_used_regs[i] = call_really_used_regs[i] = 0;
+    }
+  else
+    {
+      for (i = 18; i < 20; i++)
+	call_used_regs[i] = call_really_used_regs[i] = 0;
+    }
+
+  if (TARGET_SOFT_FLOAT)
+    {
+      for (i = 16; i < 32; i++)
+	call_used_regs[i] = fixed_regs[i] = 1;
+    }
+}
+
+/* Corresponding function to eh_return expander.  */
+
+static GTY(()) rtx s390_tpf_eh_return_symbol;
+void
+s390_emit_tpf_eh_return (rtx target)
+{
+  rtx insn, reg;
+
+  if (!s390_tpf_eh_return_symbol)
+    s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
+
+  reg = gen_rtx_REG (Pmode, 2);
+
+  emit_move_insn (reg, target);
+  insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
+                                     gen_rtx_REG (Pmode, RETURN_REGNUM));
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
+
+  emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
+}
+
+/* Rework the prologue/epilogue to avoid saving/restoring
+   registers unnecessarily.  */
+
+static void
+s390_optimize_prologue (void)
+{
+  rtx insn, new_insn, next_insn;
+
+  /* Do a final recompute of the frame-related data.  */
+
+  s390_update_frame_layout ();
+
+  /* If all special registers are in fact used, there's nothing we
+     can do, so no point in walking the insn list.  */
+
+  if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
+      && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
+      && (TARGET_CPU_ZARCH
+          || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
+              && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
+    return;
+
+  /* Search for prologue/epilogue insns and replace them.  */
+
+  for (insn = get_insns (); insn; insn = next_insn)
+    {
+      int first, last, off;
+      rtx set, base, offset;
+
+      next_insn = NEXT_INSN (insn);
+
+      if (GET_CODE (insn) != INSN)
+	continue;
+
+      if (GET_CODE (PATTERN (insn)) == PARALLEL
+	  && store_multiple_operation (PATTERN (insn), VOIDmode))
+	{
+	  set = XVECEXP (PATTERN (insn), 0, 0);
+	  first = REGNO (SET_SRC (set));
+	  last = first + XVECLEN (PATTERN (insn), 0) - 1;
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+	  if (cfun_frame_layout.first_save_gpr != -1
+	      && (cfun_frame_layout.first_save_gpr < first
+		  || cfun_frame_layout.last_save_gpr > last))
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+	  if (first > BASE_REGNUM || last < BASE_REGNUM)
+	    continue;
+
+	  if (cfun_frame_layout.first_save_gpr != -1)
+	    {
+	      new_insn 	= save_gprs (base,
+				     off + (cfun_frame_layout.first_save_gpr
+					    - first) * UNITS_PER_LONG,
+				     cfun_frame_layout.first_save_gpr,
+				     cfun_frame_layout.last_save_gpr);
+	      new_insn = emit_insn_before (new_insn, insn);
+	      INSN_ADDRESSES_NEW (new_insn, -1);
+	    }
+
+	  remove_insn (insn);
+	  continue;
+	}
+
+      if (cfun_frame_layout.first_save_gpr == -1
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == REG
+	  && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM
+	      || (!TARGET_CPU_ZARCH
+		  && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM))
+	  && GET_CODE (SET_DEST (PATTERN (insn))) == MEM)
+	{
+	  set = PATTERN (insn);
+	  first = REGNO (SET_SRC (set));
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+
+	  remove_insn (insn);
+	  continue;
+	}
+
+      if (GET_CODE (PATTERN (insn)) == PARALLEL
+	  && load_multiple_operation (PATTERN (insn), VOIDmode))
+	{
+	  set = XVECEXP (PATTERN (insn), 0, 0);
+	  first = REGNO (SET_DEST (set));
+	  last = first + XVECLEN (PATTERN (insn), 0) - 1;
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+	  if (cfun_frame_layout.first_restore_gpr != -1
+	      && (cfun_frame_layout.first_restore_gpr < first
+		  || cfun_frame_layout.last_restore_gpr > last))
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+	  if (first > BASE_REGNUM || last < BASE_REGNUM)
+	    continue;
+
+	  if (cfun_frame_layout.first_restore_gpr != -1)
+	    {
+	      new_insn = restore_gprs (base,
+				       off + (cfun_frame_layout.first_restore_gpr
+					      - first) * UNITS_PER_LONG,
+				       cfun_frame_layout.first_restore_gpr,
+				       cfun_frame_layout.last_restore_gpr);
+	      new_insn = emit_insn_before (new_insn, insn);
+	      INSN_ADDRESSES_NEW (new_insn, -1);
+	    }
+
+	  remove_insn (insn);
+	  continue;
+	}
+
+      if (cfun_frame_layout.first_restore_gpr == -1
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_DEST (PATTERN (insn))) == REG
+	  && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM
+	      || (!TARGET_CPU_ZARCH
+		  && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM))
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
+	{
+	  set = PATTERN (insn);
+	  first = REGNO (SET_DEST (set));
+	  offset = const0_rtx;
+	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
+	  off = INTVAL (offset);
+
+	  if (GET_CODE (base) != REG || off < 0)
+	    continue;
+	  if (REGNO (base) != STACK_POINTER_REGNUM
+	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
+	    continue;
+
+	  remove_insn (insn);
+	  continue;
+	}
+    }
+}
+
+/* On z10 and later the dynamic branch prediction must see the
+   backward jump within a certain windows.  If not it falls back to
+   the static prediction.  This function rearranges the loop backward
+   branch in a way which makes the static prediction always correct.
+   The function returns true if it added an instruction.  */
+static bool
+s390_fix_long_loop_prediction (rtx insn)
+{
+  rtx set = single_set (insn);
+  rtx code_label, label_ref, new_label;
+  rtx uncond_jump;
+  rtx cur_insn;
+  rtx tmp;
+  int distance;
+
+  /* This will exclude branch on count and branch on index patterns
+     since these are correctly statically predicted.  */
+  if (!set
+      || SET_DEST (set) != pc_rtx
+      || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
+    return false;
+
+  label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
+	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
+
+  gcc_assert (GET_CODE (label_ref) == LABEL_REF);
+
+  code_label = XEXP (label_ref, 0);
+
+  if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
+      || INSN_ADDRESSES (INSN_UID (insn)) == -1
+      || (INSN_ADDRESSES (INSN_UID (insn))
+	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
+    return false;
+
+  for (distance = 0, cur_insn = PREV_INSN (insn);
+       distance < PREDICT_DISTANCE - 6;
+       distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
+    if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
+      return false;
+
+  new_label = gen_label_rtx ();
+  uncond_jump = emit_jump_insn_after (
+		  gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
+		  insn);
+  emit_label_after (new_label, uncond_jump);
+
+  tmp = XEXP (SET_SRC (set), 1);
+  XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
+  XEXP (SET_SRC (set), 2) = tmp;
+  INSN_CODE (insn) = -1;
+
+  XEXP (label_ref, 0) = new_label;
+  JUMP_LABEL (insn) = new_label;
+  JUMP_LABEL (uncond_jump) = code_label;
+
+  return true;
+}
+
+/* Returns 1 if INSN reads the value of REG for purposes not related
+   to addressing of memory, and 0 otherwise.  */
+static int
+s390_non_addr_reg_read_p (rtx reg, rtx insn)
+{
+  return reg_referenced_p (reg, PATTERN (insn))
+    && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
+}
+
+/* Starting from INSN find_cond_jump looks downwards in the insn
+   stream for a single jump insn which is the last user of the
+   condition code set in INSN.  */
+static rtx
+find_cond_jump (rtx insn)
+{
+  for (; insn; insn = NEXT_INSN (insn))
+    {
+      rtx ite, cc;
+
+      if (LABEL_P (insn))
+	break;
+
+      if (!JUMP_P (insn))
+	{
+	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
+	    break;
+	  continue;
+	}
+
+      /* This will be triggered by a return.  */
+      if (GET_CODE (PATTERN (insn)) != SET)
+	break;
+
+      gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
+      ite = SET_SRC (PATTERN (insn));
+
+      if (GET_CODE (ite) != IF_THEN_ELSE)
+	break;
+
+      cc = XEXP (XEXP (ite, 0), 0);
+      if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
+	break;
+
+      if (find_reg_note (insn, REG_DEAD, cc))
+	return insn;
+      break;
+    }
+
+  return NULL_RTX;
+}
+
+/* Swap the condition in COND and the operands in OP0 and OP1 so that
+   the semantics does not change.  If NULL_RTX is passed as COND the
+   function tries to find the conditional jump starting with INSN.  */
+static void
+s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
+{
+  rtx tmp = *op0;
+
+  if (cond == NULL_RTX)
+    {
+      rtx jump = find_cond_jump (NEXT_INSN (insn));
+      jump = jump ? single_set (jump) : NULL_RTX;
+
+      if (jump == NULL_RTX)
+	return;
+
+      cond = XEXP (XEXP (jump, 1), 0);
+    }
+
+  *op0 = *op1;
+  *op1 = tmp;
+  PUT_CODE (cond, swap_condition (GET_CODE (cond)));
+}
+
+/* On z10, instructions of the compare-and-branch family have the
+   property to access the register occurring as second operand with
+   its bits complemented.  If such a compare is grouped with a second
+   instruction that accesses the same register non-complemented, and
+   if that register's value is delivered via a bypass, then the
+   pipeline recycles, thereby causing significant performance decline.
+   This function locates such situations and exchanges the two
+   operands of the compare.  The function return true whenever it
+   added an insn.  */
+static bool
+s390_z10_optimize_cmp (rtx insn)
+{
+  rtx prev_insn, next_insn;
+  bool insn_added_p = false;
+  rtx cond, *op0, *op1;
+
+  if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      /* Handle compare and branch and branch on count
+	 instructions.  */
+      rtx pattern = single_set (insn);
+
+      if (!pattern
+	  || SET_DEST (pattern) != pc_rtx
+	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
+	return false;
+
+      cond = XEXP (SET_SRC (pattern), 0);
+      op0 = &XEXP (cond, 0);
+      op1 = &XEXP (cond, 1);
+    }
+  else if (GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx src, dest;
+
+      /* Handle normal compare instructions.  */
+      src = SET_SRC (PATTERN (insn));
+      dest = SET_DEST (PATTERN (insn));
+
+      if (!REG_P (dest)
+	  || !CC_REGNO_P (REGNO (dest))
+	  || GET_CODE (src) != COMPARE)
+	return false;
+
+      /* s390_swap_cmp will try to find the conditional
+	 jump when passing NULL_RTX as condition.  */
+      cond = NULL_RTX;
+      op0 = &XEXP (src, 0);
+      op1 = &XEXP (src, 1);
+    }
+  else
+    return false;
+
+  if (!REG_P (*op0) || !REG_P (*op1))
+    return false;
+
+  if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
+    return false;
+
+  /* Swap the COMPARE arguments and its mask if there is a
+     conflicting access in the previous insn.  */
+  prev_insn = prev_active_insn (insn);
+  if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+      && reg_referenced_p (*op1, PATTERN (prev_insn)))
+    s390_swap_cmp (cond, op0, op1, insn);
+
+  /* Check if there is a conflict with the next insn. If there
+     was no conflict with the previous insn, then swap the
+     COMPARE arguments and its mask.  If we already swapped
+     the operands, or if swapping them would cause a conflict
+     with the previous insn, issue a NOP after the COMPARE in
+     order to separate the two instuctions.  */
+  next_insn = next_active_insn (insn);
+  if (next_insn != NULL_RTX && INSN_P (next_insn)
+      && s390_non_addr_reg_read_p (*op1, next_insn))
+    {
+      if (prev_insn != NULL_RTX && INSN_P (prev_insn)
+	  && s390_non_addr_reg_read_p (*op0, prev_insn))
+	{
+	  if (REGNO (*op1) == 0)
+	    emit_insn_after (gen_nop1 (), insn);
+	  else
+	    emit_insn_after (gen_nop (), insn);
+	  insn_added_p = true;
+	}
+      else
+	s390_swap_cmp (cond, op0, op1, insn);
+    }
+  return insn_added_p;
+}
+
+/* Perform machine-dependent processing.  */
+
+static void
+s390_reorg (void)
+{
+  bool pool_overflow = false;
+
+  /* Make sure all splits have been performed; splits after
+     machine_dependent_reorg might confuse insn length counts.  */
+  split_all_insns_noflow ();
+
+  /* Install the main literal pool and the associated base
+     register load insns.
+
+     In addition, there are two problematic situations we need
+     to correct:
+
+     - the literal pool might be > 4096 bytes in size, so that
+       some of its elements cannot be directly accessed
+
+     - a branch target might be > 64K away from the branch, so that
+       it is not possible to use a PC-relative instruction.
+
+     To fix those, we split the single literal pool into multiple
+     pool chunks, reloading the pool base register at various
+     points throughout the function to ensure it always points to
+     the pool chunk the following code expects, and / or replace
+     PC-relative branches by absolute branches.
+
+     However, the two problems are interdependent: splitting the
+     literal pool can move a branch further away from its target,
+     causing the 64K limit to overflow, and on the other hand,
+     replacing a PC-relative branch by an absolute branch means
+     we need to put the branch target address into the literal
+     pool, possibly causing it to overflow.
+
+     So, we loop trying to fix up both problems until we manage
+     to satisfy both conditions at the same time.  Note that the
+     loop is guaranteed to terminate as every pass of the loop
+     strictly decreases the total number of PC-relative branches
+     in the function.  (This is not completely true as there
+     might be branch-over-pool insns introduced by chunkify_start.
+     Those never need to be split however.)  */
+
+  for (;;)
+    {
+      struct constant_pool *pool = NULL;
+
+      /* Collect the literal pool.  */
+      if (!pool_overflow)
+	{
+	  pool = s390_mainpool_start ();
+	  if (!pool)
+	    pool_overflow = true;
+	}
+
+      /* If literal pool overflowed, start to chunkify it.  */
+      if (pool_overflow)
+        pool = s390_chunkify_start ();
+
+      /* Split out-of-range branches.  If this has created new
+	 literal pool entries, cancel current chunk list and
+	 recompute it.  zSeries machines have large branch
+	 instructions, so we never need to split a branch.  */
+      if (!TARGET_CPU_ZARCH && s390_split_branches ())
+        {
+          if (pool_overflow)
+            s390_chunkify_cancel (pool);
+	  else
+            s390_mainpool_cancel (pool);
+
+          continue;
+        }
+
+      /* If we made it up to here, both conditions are satisfied.
+	 Finish up literal pool related changes.  */
+      if (pool_overflow)
+	s390_chunkify_finish (pool);
+      else
+	s390_mainpool_finish (pool);
+
+      /* We're done splitting branches.  */
+      cfun->machine->split_branches_pending_p = false;
+      break;
+    }
+
+  /* Generate out-of-pool execute target insns.  */
+  if (TARGET_CPU_ZARCH)
+    {
+      rtx insn, label, target;
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  label = s390_execute_label (insn);
+	  if (!label)
+	    continue;
+
+	  gcc_assert (label != const0_rtx);
+
+	  target = emit_label (XEXP (label, 0));
+	  INSN_ADDRESSES_NEW (target, -1);
+
+	  target = emit_insn (s390_execute_target (insn));
+	  INSN_ADDRESSES_NEW (target, -1);
+	}
+    }
+
+  /* Try to optimize prologue and epilogue further.  */
+  s390_optimize_prologue ();
+
+  /* Walk over the insns and do some >=z10 specific changes.  */
+  if (s390_tune == PROCESSOR_2097_Z10
+      || s390_tune == PROCESSOR_2817_Z196)
+    {
+      rtx insn;
+      bool insn_added_p = false;
+
+      /* The insn lengths and addresses have to be up to date for the
+	 following manipulations.  */
+      shorten_branches (get_insns ());
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+	    continue;
+
+	  if (JUMP_P (insn))
+	    insn_added_p |= s390_fix_long_loop_prediction (insn);
+
+	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
+	       || GET_CODE (PATTERN (insn)) == SET)
+	      && s390_tune == PROCESSOR_2097_Z10)
+	    insn_added_p |= s390_z10_optimize_cmp (insn);
+	}
+
+      /* Adjust branches if we added new instructions.  */
+      if (insn_added_p)
+	shorten_branches (get_insns ());
+    }
+}
+
+/* Return true if INSN is a fp load insn writing register REGNO.  */
+static inline bool
+s390_fpload_toreg (rtx insn, unsigned int regno)
+{
+  rtx set;
+  enum attr_type flag = s390_safe_attr_type (insn);
+
+  if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
+    return false;
+
+  set = single_set (insn);
+
+  if (set == NULL_RTX)
+    return false;
+
+  if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
+    return false;
+
+  if (REGNO (SET_DEST (set)) != regno)
+    return false;
+
+  return true;
+}
+
+/* This value describes the distance to be avoided between an
+   aritmetic fp instruction and an fp load writing the same register.
+   Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
+   fine but the exact value has to be avoided. Otherwise the FP
+   pipeline will throw an exception causing a major penalty.  */
+#define Z10_EARLYLOAD_DISTANCE 7
+
+/* Rearrange the ready list in order to avoid the situation described
+   for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
+   moved to the very end of the ready list.  */
+static void
+s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
+{
+  unsigned int regno;
+  int nready = *nready_p;
+  rtx tmp;
+  int i;
+  rtx insn;
+  rtx set;
+  enum attr_type flag;
+  int distance;
+
+  /* Skip DISTANCE - 1 active insns.  */
+  for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
+       distance > 0 && insn != NULL_RTX;
+       distance--, insn = prev_active_insn (insn))
+    if (CALL_P (insn) || JUMP_P (insn))
+      return;
+
+  if (insn == NULL_RTX)
+    return;
+
+  set = single_set (insn);
+
+  if (set == NULL_RTX || !REG_P (SET_DEST (set))
+      || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
+    return;
+
+  flag = s390_safe_attr_type (insn);
+
+  if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
+    return;
+
+  regno = REGNO (SET_DEST (set));
+  i = nready - 1;
+
+  while (!s390_fpload_toreg (ready[i], regno) && i > 0)
+    i--;
+
+  if (!i)
+    return;
+
+  tmp = ready[i];
+  memmove (&ready[1], &ready[0], sizeof (rtx) * i);
+  ready[0] = tmp;
+}
+
+/* This function is called via hook TARGET_SCHED_REORDER before
+   issueing one insn from list READY which contains *NREADYP entries.
+   For target z10 it reorders load instructions to avoid early load
+   conflicts in the floating point pipeline  */
+static int
+s390_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		    rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
+{
+  if (s390_tune == PROCESSOR_2097_Z10)
+    if (reload_completed && *nreadyp > 1)
+      s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
+
+  return s390_issue_rate ();
+}
+
+/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
+   the scheduler has issued INSN.  It stores the last issued insn into
+   last_scheduled_insn in order to make it available for
+   s390_sched_reorder.  */
+static int
+s390_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
+                           int verbose ATTRIBUTE_UNUSED,
+                         rtx insn, int more)
+{
+  last_scheduled_insn = insn;
+
+  if (GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    return more - 1;
+  else
+    return more;
+}
+
+static void
+s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
+		 int verbose ATTRIBUTE_UNUSED,
+		 int max_ready ATTRIBUTE_UNUSED)
+{
+  last_scheduled_insn = NULL_RTX;
+}
+
+/* This function checks the whole of insn X for memory references. The
+   function always returns zero because the framework it is called
+   from would stop recursively analyzing the insn upon a return value
+   other than zero. The real result of this function is updating
+   counter variable MEM_COUNT.  */
+static int
+check_dpu (rtx *x, unsigned *mem_count)
+{
+  if (*x != NULL_RTX && MEM_P (*x))
+    (*mem_count)++;
+  return 0;
+}
+
+/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
+   a new number struct loop *loop should be unrolled if tuned for cpus with
+   a built-in stride prefetcher.
+   The loop is analyzed for memory accesses by calling check_dpu for
+   each rtx of the loop. Depending on the loop_depth and the amount of
+   memory accesses a new number <=nunroll is returned to improve the
+   behaviour of the hardware prefetch unit.  */
+static unsigned
+s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+  basic_block *bbs;
+  rtx insn;
+  unsigned i;
+  unsigned mem_count = 0;
+
+  if (s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196)
+    return nunroll;
+
+  /* Count the number of memory references within the loop body.  */
+  bbs = get_loop_body (loop);
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
+	if (INSN_P (insn) && INSN_CODE (insn) != -1)
+            for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
+    }
+  free (bbs);
+
+  /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
+  if (mem_count == 0)
+    return nunroll;
+
+  switch (loop_depth(loop))
+    {
+    case 1:
+      return MIN (nunroll, 28 / mem_count);
+    case 2:
+      return MIN (nunroll, 22 / mem_count);
+    default:
+      return MIN (nunroll, 16 / mem_count);
+    }
+}
+
+/* Initialize GCC target structure.  */
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef  TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+#undef  TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER s390_assemble_integer
+
+#undef  TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+
+#undef  TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION s390_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE s390_option_override
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE s390_option_optimization_table
+
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT s390_option_init_struct
+
+#undef	TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY s390_return_in_memory
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS s390_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN s390_expand_builtin
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef  TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE s390_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER s390_sched_reorder
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT s390_sched_init
+
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS s390_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST s390_address_cost
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST s390_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG s390_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE s390_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE s390_libcall_value
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
+
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
+#endif
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE s390_mangle_type
+#endif
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD s390_secondary_reload
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
+
+#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
+#define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE s390_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
+
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT s390_trampoline_init
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-s390.h"
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
new file mode 100644
index 000000000..ec395e295
--- /dev/null
+++ b/gcc/config/s390/s390.h
@@ -0,0 +1,954 @@
+/* Definitions of target machine for GNU compiler, for IBM S/390
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _S390_H
+#define _S390_H
+
+/* Which processor to generate code or schedule for. The cpu attribute
+   defines a list that mirrors this list, so changes to s390.md must be
+   made at the same time.  */
+
+enum processor_type
+{
+  PROCESSOR_9672_G5,
+  PROCESSOR_9672_G6,
+  PROCESSOR_2064_Z900,
+  PROCESSOR_2084_Z990,
+  PROCESSOR_2094_Z9_109,
+  PROCESSOR_2097_Z10,
+  PROCESSOR_2817_Z196,
+  PROCESSOR_max
+};
+
+/* Optional architectural facilities supported by the processor.  */
+
+enum processor_flags
+{
+  PF_IEEE_FLOAT = 1,
+  PF_ZARCH = 2,
+  PF_LONG_DISPLACEMENT = 4,
+  PF_EXTIMM = 8,
+  PF_DFP = 16,
+  PF_Z10 = 32,
+  PF_Z196 = 64
+};
+
+extern enum processor_type s390_tune;
+extern int s390_tune_flags;
+
+/* This is necessary to avoid a warning about comparing different enum
+   types.  */
+#define s390_tune_attr ((enum attr_cpu)s390_tune)
+
+extern enum processor_type s390_arch;
+extern int s390_arch_flags;
+
+/* These flags indicate that the generated code should run on a cpu
+   providing the respective hardware facility regardless of the
+   current cpu mode (ESA or z/Architecture).  */
+
+#define TARGET_CPU_IEEE_FLOAT \
+	(s390_arch_flags & PF_IEEE_FLOAT)
+#define TARGET_CPU_ZARCH \
+	(s390_arch_flags & PF_ZARCH)
+#define TARGET_CPU_LONG_DISPLACEMENT \
+	(s390_arch_flags & PF_LONG_DISPLACEMENT)
+#define TARGET_CPU_EXTIMM \
+ 	(s390_arch_flags & PF_EXTIMM)
+#define TARGET_CPU_DFP \
+ 	(s390_arch_flags & PF_DFP)
+#define TARGET_CPU_Z10 \
+ 	(s390_arch_flags & PF_Z10)
+#define TARGET_CPU_Z196 \
+ 	(s390_arch_flags & PF_Z196)
+
+/* These flags indicate that the generated code should run on a cpu
+   providing the respective hardware facility when run in
+   z/Architecture mode.  */
+
+#define TARGET_LONG_DISPLACEMENT \
+       (TARGET_ZARCH && TARGET_CPU_LONG_DISPLACEMENT)
+#define TARGET_EXTIMM \
+       (TARGET_ZARCH && TARGET_CPU_EXTIMM)
+#define TARGET_DFP \
+       (TARGET_ZARCH && TARGET_CPU_DFP && TARGET_HARD_FLOAT)
+#define TARGET_Z10 \
+       (TARGET_ZARCH && TARGET_CPU_Z10)
+#define TARGET_Z196 \
+       (TARGET_ZARCH && TARGET_CPU_Z196)
+
+
+#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
+
+/* Run-time target specification.  */
+
+/* Defaults for option flags defined only on some subtargets.  */
+#ifndef TARGET_TPF_PROFILING
+#define TARGET_TPF_PROFILING 0
+#endif
+
+/* This will be overridden by OS headers.  */
+#define TARGET_TPF 0
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_assert ("cpu=s390");			\
+      builtin_assert ("machine=s390");			\
+      builtin_define ("__s390__");			\
+      if (TARGET_ZARCH)					\
+	builtin_define ("__zarch__");			\
+      if (TARGET_64BIT)					\
+        builtin_define ("__s390x__");			\
+      if (TARGET_LONG_DOUBLE_128)			\
+        builtin_define ("__LONG_DOUBLE_128__");		\
+    }							\
+  while (0)
+
+#ifdef DEFAULT_TARGET_64BIT
+#define TARGET_DEFAULT             (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP)
+#else
+#define TARGET_DEFAULT             0
+#endif
+
+/* Support for configure-time defaults.  */
+#define OPTION_DEFAULT_SPECS 					\
+  { "mode", "%{!mesa:%{!mzarch:-m%(VALUE)}}" },			\
+  { "arch", "%{!march=*:-march=%(VALUE)}" },			\
+  { "tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+/* Defaulting rules.  */
+#ifdef DEFAULT_TARGET_64BIT
+#define DRIVER_SELF_SPECS					\
+  "%{!m31:%{!m64:-m64}}",					\
+  "%{!mesa:%{!mzarch:%{m31:-mesa}%{m64:-mzarch}}}",		\
+  "%{!march=*:%{mesa:-march=g5}%{mzarch:-march=z900}}"
+#else
+#define DRIVER_SELF_SPECS					\
+  "%{!m31:%{!m64:-m31}}",					\
+  "%{!mesa:%{!mzarch:%{m31:-mesa}%{m64:-mzarch}}}",		\
+  "%{!march=*:%{mesa:-march=g5}%{mzarch:-march=z900}}"
+#endif
+
+/* Target version string.  Overridden by the OS header.  */
+#ifdef DEFAULT_TARGET_64BIT
+#define TARGET_VERSION fprintf (stderr, " (zSeries)");
+#else
+#define TARGET_VERSION fprintf (stderr, " (S/390)");
+#endif
+
+/* Constants needed to control the TEST DATA CLASS (TDC) instruction.  */
+#define S390_TDC_POSITIVE_ZERO                     (1 << 11)
+#define S390_TDC_NEGATIVE_ZERO                     (1 << 10)
+#define S390_TDC_POSITIVE_NORMALIZED_BFP_NUMBER    (1 << 9)
+#define S390_TDC_NEGATIVE_NORMALIZED_BFP_NUMBER    (1 << 8)
+#define S390_TDC_POSITIVE_DENORMALIZED_BFP_NUMBER  (1 << 7)
+#define S390_TDC_NEGATIVE_DENORMALIZED_BFP_NUMBER  (1 << 6)
+#define S390_TDC_POSITIVE_INFINITY                 (1 << 5)
+#define S390_TDC_NEGATIVE_INFINITY                 (1 << 4)
+#define S390_TDC_POSITIVE_QUIET_NAN                (1 << 3)
+#define S390_TDC_NEGATIVE_QUIET_NAN                (1 << 2)
+#define S390_TDC_POSITIVE_SIGNALING_NAN            (1 << 1)
+#define S390_TDC_NEGATIVE_SIGNALING_NAN            (1 << 0)
+
+/* The following values are different for DFP.  */
+#define S390_TDC_POSITIVE_DENORMALIZED_DFP_NUMBER (1 << 9)
+#define S390_TDC_NEGATIVE_DENORMALIZED_DFP_NUMBER (1 << 8)
+#define S390_TDC_POSITIVE_NORMALIZED_DFP_NUMBER   (1 << 7)
+#define S390_TDC_NEGATIVE_NORMALIZED_DFP_NUMBER   (1 << 6)
+
+/* For signbit, the BFP-DFP-difference makes no difference. */
+#define S390_TDC_SIGNBIT_SET (S390_TDC_NEGATIVE_ZERO \
+                          | S390_TDC_NEGATIVE_NORMALIZED_BFP_NUMBER \
+                          | S390_TDC_NEGATIVE_DENORMALIZED_BFP_NUMBER\
+                          | S390_TDC_NEGATIVE_INFINITY \
+                          | S390_TDC_NEGATIVE_QUIET_NAN \
+			  | S390_TDC_NEGATIVE_SIGNALING_NAN )
+
+#define S390_TDC_INFINITY (S390_TDC_POSITIVE_INFINITY \
+			  | S390_TDC_NEGATIVE_INFINITY )
+
+/* Target machine storage layout.  */
+
+/* Everything is big-endian.  */
+#define BITS_BIG_ENDIAN 1
+#define BYTES_BIG_ENDIAN 1
+#define WORDS_BIG_ENDIAN 1
+
+#define STACK_SIZE_MODE (Pmode)
+
+#ifndef IN_LIBGCC2
+
+/* Width of a word, in units (bytes).  */
+  #define UNITS_PER_WORD (TARGET_ZARCH ? 8 : 4)
+
+/* Width of a pointer.  To be used instead of UNITS_PER_WORD in
+   ABI-relevant contexts.  This always matches
+   GET_MODE_SIZE (Pmode).  */
+  #define UNITS_PER_LONG (TARGET_64BIT ? 8 : 4)
+  #define MIN_UNITS_PER_WORD 4
+  #define MAX_BITS_PER_WORD 64
+#else
+
+  /* In libgcc, UNITS_PER_WORD has ABI-relevant effects, e.g. whether
+     the library should export TImode functions or not.  Thus, we have
+     to redefine UNITS_PER_WORD depending on __s390x__ for libgcc.  */
+  #ifdef __s390x__
+    #define UNITS_PER_WORD 8
+  #else
+    #define UNITS_PER_WORD 4
+  #endif
+#endif
+
+/* Width of a pointer, in bits.  */
+#define POINTER_SIZE (TARGET_64BIT ? 64 : 32)
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_64BIT ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY 64
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT 64
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Alignment on even addresses for LARL instruction.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+
+/* Alignment is not required by the hardware.  */
+#define STRICT_ALIGNMENT 0
+
+/* Mode of stack savearea.
+   FUNCTION is VOIDmode because calling convention maintains SP.
+   BLOCK needs Pmode for SP.
+   NONLOCAL needs twice Pmode to maintain both backchain and SP.  */
+#define STACK_SAVEAREA_MODE(LEVEL)      \
+  (LEVEL == SAVE_FUNCTION ? VOIDmode    \
+  : LEVEL == SAVE_NONLOCAL ? (TARGET_64BIT ? OImode : TImode) : Pmode)
+
+
+/* Type layout.  */
+
+/* Sizes in bits of the source language data types.  */
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Work around target_flags dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* We use "unsigned char" as default.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+
+/* Register usage.  */
+
+/* We have 16 general purpose registers (registers 0-15),
+   and 16 floating point registers (registers 16-31).
+   (On non-IEEE machines, we have only 4 fp registers.)
+
+   Amongst the general purpose registers, some are used
+   for specific purposes:
+   GPR 11: Hard frame pointer (if needed)
+   GPR 12: Global offset table pointer (if needed)
+   GPR 13: Literal pool base register
+   GPR 14: Return address register
+   GPR 15: Stack pointer
+
+   Registers 32-35 are 'fake' hard registers that do not
+   correspond to actual hardware:
+   Reg 32: Argument pointer
+   Reg 33: Condition code
+   Reg 34: Frame pointer
+   Reg 35: Return address pointer
+
+   Registers 36 and 37 are mapped to access registers
+   0 and 1, used to implement thread-local storage.  */
+
+#define FIRST_PSEUDO_REGISTER 38
+
+/* Standard register usage.  */
+#define GENERAL_REGNO_P(N)	((int)(N) >= 0 && (N) < 16)
+#define ADDR_REGNO_P(N)		((N) >= 1 && (N) < 16)
+#define FP_REGNO_P(N)		((N) >= 16 && (N) < 32)
+#define CC_REGNO_P(N)		((N) == 33)
+#define FRAME_REGNO_P(N)	((N) == 32 || (N) == 34 || (N) == 35)
+#define ACCESS_REGNO_P(N)	((N) == 36 || (N) == 37)
+
+#define GENERAL_REG_P(X)	(REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+#define ADDR_REG_P(X)		(REG_P (X) && ADDR_REGNO_P (REGNO (X)))
+#define FP_REG_P(X)		(REG_P (X) && FP_REGNO_P (REGNO (X)))
+#define CC_REG_P(X)		(REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define FRAME_REG_P(X)		(REG_P (X) && FRAME_REGNO_P (REGNO (X)))
+#define ACCESS_REG_P(X)		(REG_P (X) && ACCESS_REGNO_P (REGNO (X)))
+
+/* Set up fixed registers and calling convention:
+
+   GPRs 0-5 are always call-clobbered,
+   GPRs 6-15 are always call-saved.
+   GPR 12 is fixed if used as GOT pointer.
+   GPR 13 is always fixed (as literal pool pointer).
+   GPR 14 is always fixed on S/390 machines (as return address).
+   GPR 15 is always fixed (as stack pointer).
+   The 'fake' hard registers are call-clobbered and fixed.
+   The access registers are call-saved and fixed.
+
+   On 31-bit, FPRs 18-19 are call-clobbered;
+   on 64-bit, FPRs 24-31 are call-clobbered.
+   The remaining FPRs are call-saved.  */
+
+#define FIXED_REGISTERS				\
+{ 0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 1, 1, 1,					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  1, 1, 1, 1,					\
+  1, 1 }
+
+#define CALL_USED_REGISTERS			\
+{ 1, 1, 1, 1, 					\
+  1, 1, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 1, 1, 1,					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1,					\
+  1, 1 }
+
+#define CALL_REALLY_USED_REGISTERS		\
+{ 1, 1, 1, 1, 					\
+  1, 1, 0, 0, 					\
+  0, 0, 0, 0, 					\
+  0, 0, 0, 0,					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1, 					\
+  1, 1, 1, 1,					\
+  0, 0 }
+
+/* Preferred register allocation order.  */
+#define REG_ALLOC_ORDER                                         \
+{  1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13,            \
+   16, 17, 18, 19, 20, 21, 22, 23,                              \
+   24, 25, 26, 27, 28, 29, 30, 31,                              \
+   15, 32, 33, 34, 35, 36, 37 }
+
+
+/* Fitting values into registers.  */
+
+/* Integer modes <= word size fit into any GPR.
+   Integer modes > word size fit into successive GPRs, starting with
+   an even-numbered register.
+   SImode and DImode fit into FPRs as well.
+
+   Floating point modes <= word size fit into any FPR or GPR.
+   Floating point modes > word size (i.e. DFmode on 32-bit) fit
+   into any FPR, or an even-odd GPR pair.
+   TFmode fits only into an even-odd FPR pair.
+
+   Complex floating point modes fit either into two FPRs, or into
+   successive GPRs (again starting with an even number).
+   TCmode fits only into two successive even-odd FPR pairs.
+
+   Condition code modes fit only into the CC register.  */
+
+/* Because all registers in a class have the same size HARD_REGNO_NREGS
+   is equivalent to CLASS_MAX_NREGS.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)                           \
+  s390_class_max_nregs (REGNO_REG_CLASS (REGNO), (MODE))
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)         \
+  s390_hard_regno_mode_ok ((REGNO), (MODE))
+
+#define HARD_REGNO_RENAME_OK(FROM, TO)          \
+  s390_hard_regno_rename_ok (FROM, TO)
+
+#define MODES_TIEABLE_P(MODE1, MODE2)		\
+   (((MODE1) == SFmode || (MODE1) == DFmode)	\
+   == ((MODE2) == SFmode || (MODE2) == DFmode))
+
+/* When generating code that runs in z/Architecture mode,
+   but conforms to the 31-bit ABI, GPRs can hold 8 bytes;
+   the ABI guarantees only that the lower 4 bytes are
+   saved across calls, however.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)		\
+  (!TARGET_64BIT && TARGET_ZARCH				\
+   && GET_MODE_SIZE (MODE) > 4					\
+   && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32))
+
+/* Maximum number of registers to represent a value of mode MODE
+   in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)   					\
+  s390_class_max_nregs ((CLASS), (MODE))
+
+/* If a 4-byte value is loaded into a FPR, it is placed into the
+   *upper* half of the register, not the lower.  Therefore, we
+   cannot use SUBREGs to switch between modes in FP registers.
+   Likewise for access registers, since they have only half the
+   word size on 64-bit.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			        \
+   ? ((reg_classes_intersect_p (FP_REGS, CLASS)				\
+       && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8))		\
+      || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
+
+/* Register classes.  */
+
+/* We use the following register classes:
+   GENERAL_REGS     All general purpose registers
+   ADDR_REGS        All general purpose registers except %r0
+                    (These registers can be used in address generation)
+   FP_REGS          All floating point registers
+   CC_REGS          The condition code register
+   ACCESS_REGS      The access registers
+
+   GENERAL_FP_REGS  Union of GENERAL_REGS and FP_REGS
+   ADDR_FP_REGS     Union of ADDR_REGS and FP_REGS
+   GENERAL_CC_REGS  Union of GENERAL_REGS and CC_REGS
+   ADDR_CC_REGS     Union of ADDR_REGS and CC_REGS
+
+   NO_REGS          No registers
+   ALL_REGS         All registers
+
+   Note that the 'fake' frame pointer and argument pointer registers
+   are included amongst the address registers here.  */
+
+enum reg_class
+{
+  NO_REGS, CC_REGS, ADDR_REGS, GENERAL_REGS, ACCESS_REGS,
+  ADDR_CC_REGS, GENERAL_CC_REGS,
+  FP_REGS, ADDR_FP_REGS, GENERAL_FP_REGS,
+  ALL_REGS, LIM_REG_CLASSES
+};
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES							\
+{ "NO_REGS", "CC_REGS", "ADDR_REGS", "GENERAL_REGS", "ACCESS_REGS",	\
+  "ADDR_CC_REGS", "GENERAL_CC_REGS",					\
+  "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", "ALL_REGS" }
+
+/* Class -> register mapping.  */
+#define REG_CLASS_CONTENTS \
+{				       			\
+  { 0x00000000, 0x00000000 },	/* NO_REGS */		\
+  { 0x00000000, 0x00000002 },	/* CC_REGS */		\
+  { 0x0000fffe, 0x0000000d },	/* ADDR_REGS */		\
+  { 0x0000ffff, 0x0000000d },	/* GENERAL_REGS */	\
+  { 0x00000000, 0x00000030 },	/* ACCESS_REGS */	\
+  { 0x0000fffe, 0x0000000f },	/* ADDR_CC_REGS */	\
+  { 0x0000ffff, 0x0000000f },	/* GENERAL_CC_REGS */	\
+  { 0xffff0000, 0x00000000 },	/* FP_REGS */		\
+  { 0xfffffffe, 0x0000000d },	/* ADDR_FP_REGS */	\
+  { 0xffffffff, 0x0000000d },	/* GENERAL_FP_REGS */	\
+  { 0xffffffff, 0x0000003f },	/* ALL_REGS */		\
+}
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						     \
+{									     \
+  GENERAL_REGS, FP_REGS, CC_REGS, ACCESS_REGS, LIM_REG_CLASSES		     \
+}
+
+/* In some case register allocation order is not enough for IRA to
+   generate a good code.  The following macro (if defined) increases
+   cost of REGNO for a pseudo approximately by pseudo usage frequency
+   multiplied by the macro value.
+
+   We avoid usage of BASE_REGNUM by nonzero macro value because the
+   reload can decide not to use the hard register because some
+   constant was forced to be in memory.  */
+#define IRA_HARD_REGNO_ADD_COST_MULTIPLIER(regno)	\
+  (regno == BASE_REGNUM ? 0.0 : 0.5)
+
+/* Register -> class mapping.  */
+extern const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO])
+
+/* ADDR_REGS can be used as base or index register.  */
+#define INDEX_REG_CLASS ADDR_REGS
+#define BASE_REG_CLASS ADDR_REGS
+
+/* Check whether REGNO is a hard register of the suitable class
+   or a pseudo register currently allocated to one such.  */
+#define REGNO_OK_FOR_INDEX_P(REGNO)					\
+    (((REGNO) < FIRST_PSEUDO_REGISTER 					\
+      && REGNO_REG_CLASS ((REGNO)) == ADDR_REGS) 			\
+     || ADDR_REGNO_P (reg_renumber[REGNO]))
+#define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO)
+
+
+/* We need secondary memory to move data between GPRs and FPRs.  With
+   DFP the ldgr lgdr instructions are available.  But these
+   instructions do not handle GPR pairs so it is not possible for 31
+   bit.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+ ((CLASS1) != (CLASS2)                                \
+  && ((CLASS1) == FP_REGS || (CLASS2) == FP_REGS)     \
+  && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8))
+
+/* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
+   because the movsi and movsf patterns don't handle r/f moves.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE)		\
+ (GET_MODE_BITSIZE (MODE) < 32				\
+  ? mode_for_size (32, GET_MODE_CLASS (MODE), 0)	\
+  : MODE)
+
+
+/* Stack layout and calling conventions.  */
+
+/* Our stack grows from higher to lower addresses.  However, local variables
+   are accessed by positive offsets, and function arguments are stored at
+   increasing addresses.  */
+#define STACK_GROWS_DOWNWARD
+#define FRAME_GROWS_DOWNWARD 1
+/* #undef ARGS_GROW_DOWNWARD */
+
+/* The basic stack layout looks like this: the stack pointer points
+   to the register save area for called functions.  Above that area
+   is the location to place outgoing arguments.  Above those follow
+   dynamic allocations (alloca), and finally the local variables.  */
+
+/* Offset from stack-pointer to first location of outgoing args.  */
+#define STACK_POINTER_OFFSET (TARGET_64BIT ? 160 : 96)
+
+/* Offset within stack frame to start allocating local variables at.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset from the stack pointer register to an item dynamically
+   allocated on the stack, e.g., by `alloca'.  */
+#define STACK_DYNAMIC_OFFSET(FUNDECL) \
+  (STACK_POINTER_OFFSET + crtl->outgoing_args_size)
+
+/* Offset of first parameter from the argument pointer register value.
+   We have a fake argument pointer register that points directly to
+   the argument area.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Defining this macro makes __builtin_frame_address(0) and
+   __builtin_return_address(0) work with -fomit-frame-pointer.  */
+#define INITIAL_FRAME_ADDRESS_RTX                                             \
+  (plus_constant (arg_pointer_rtx, -STACK_POINTER_OFFSET))
+
+/* The return address of the current frame is retrieved
+   from the initial value of register RETURN_REGNUM.
+   For frames farther back, we use the stack slot where
+   the corresponding RETURN_REGNUM register was saved.  */
+#define DYNAMIC_CHAIN_ADDRESS(FRAME)                                          \
+  (TARGET_PACKED_STACK ?                                                      \
+   plus_constant ((FRAME), STACK_POINTER_OFFSET - UNITS_PER_LONG) : (FRAME))
+
+/* For -mpacked-stack this adds 160 - 8 (96 - 4) to the output of
+   builtin_frame_address.  Otherwise arg pointer -
+   STACK_POINTER_OFFSET would be returned for
+   __builtin_frame_address(0) what might result in an address pointing
+   somewhere into the middle of the local variables since the packed
+   stack layout generally does not need all the bytes in the register
+   save area.  */
+#define FRAME_ADDR_RTX(FRAME)			\
+  DYNAMIC_CHAIN_ADDRESS ((FRAME))
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)					      \
+  s390_return_addr_rtx ((COUNT), DYNAMIC_CHAIN_ADDRESS ((FRAME)))
+
+/* In 31-bit mode, we need to mask off the high bit of return addresses.  */
+#define MASK_RETURN_ADDR (TARGET_64BIT ? constm1_rtx : GEN_INT (0x7fffffff))
+
+
+/* Exception handling.  */
+
+/* Describe calling conventions for DWARF-2 exception handling.  */
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, RETURN_REGNUM)
+#define INCOMING_FRAME_SP_OFFSET STACK_POINTER_OFFSET
+#define DWARF_FRAME_RETURN_COLUMN  14
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 6 : INVALID_REGNUM)
+#define EH_RETURN_HANDLER_RTX gen_rtx_MEM (Pmode, return_address_pointer_rtx)
+
+/* Select a format to encode pointers in exception handling data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			    \
+  (flag_pic								    \
+    ? ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \
+   : DW_EH_PE_absptr)
+
+/* Register save slot alignment.  */
+#define DWARF_CIE_DATA_ALIGNMENT (-UNITS_PER_LONG)
+
+
+/* Frame registers.  */
+
+#define STACK_POINTER_REGNUM 15
+#define FRAME_POINTER_REGNUM 34
+#define HARD_FRAME_POINTER_REGNUM 11
+#define ARG_POINTER_REGNUM 32
+#define RETURN_ADDRESS_POINTER_REGNUM 35
+
+/* The static chain must be call-clobbered, but not used for
+   function argument passing.  As register 1 is clobbered by
+   the trampoline code, we only have one option.  */
+#define STATIC_CHAIN_REGNUM 0
+
+/* Number of hardware registers that go into the DWARF-2 unwind info.
+   To avoid ABI incompatibility, this number must not change even as
+   'fake' hard registers are added or removed.  */
+#define DWARF_FRAME_REGISTERS 34
+
+
+/* Frame pointer and argument pointer elimination.  */
+
+#define ELIMINABLE_REGS						\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },		\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM },	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },	\
+ { BASE_REGNUM, BASE_REGNUM }}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = s390_initial_elimination_offset ((FROM), (TO))
+
+
+/* Stack arguments.  */
+
+/* We need current_function_outgoing_args to be valid.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+
+/* Register arguments.  */
+
+typedef struct s390_arg_structure
+{
+  int gprs;			/* gpr so far */
+  int fprs;			/* fpr so far */
+}
+CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, NN, N_NAMED_ARGS) \
+  ((CUM).gprs=0, (CUM).fprs=0)
+
+/* Arguments can be placed in general registers 2 to 6, or in floating
+   point registers 0 and 2 for 31 bit and fprs 0, 2, 4 and 6 for 64
+   bit.  */
+#define FUNCTION_ARG_REGNO_P(N) (((N) >=2 && (N) <7) || \
+  (N) == 16 || (N) == 17 || (TARGET_64BIT && ((N) == 18 || (N) == 19)))
+
+
+/* Only gpr 2 and fpr 0 are ever used as return registers.  */
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == 2 || (N) == 16)
+
+
+/* Function entry and exit.  */
+
+/* When returning from a function, the stack pointer does not matter.  */
+#define EXIT_IGNORE_STACK       1
+
+
+/* Profiling.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) 			\
+  s390_function_profiler ((FILE), ((LABELNO)))
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+
+/* Trampolines for nested functions.  */
+
+#define TRAMPOLINE_SIZE		(TARGET_64BIT ? 32 : 16)
+#define TRAMPOLINE_ALIGNMENT	BITS_PER_WORD
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X) 0
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* This definition replaces the formerly used 'm' constraint with a
+   different constraint letter in order to avoid changing semantics of
+   the 'm' constraint when accepting new address formats in
+   TARGET_LEGITIMATE_ADDRESS_P.  The constraint letter defined here
+   must not be used in insn definitions or inline assemblies.  */
+#define TARGET_MEM_CONSTRAINT 'e'
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(AD, MODE, OPNUM, TYPE, IND, WIN)	\
+do {									\
+  rtx new_rtx = legitimize_reload_address (AD, MODE, OPNUM, (int)(TYPE));	\
+  if (new_rtx)								\
+    {									\
+      (AD) = new_rtx;							\
+      goto WIN;								\
+    }									\
+} while (0)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+#define LEGITIMATE_CONSTANT_P(X) \
+     legitimate_constant_p (X)
+
+/* Helper macro for s390.c and s390.md to check for symbolic constants.  */
+#define SYMBOLIC_CONST(X)       \
+(GET_CODE (X) == SYMBOL_REF                                             \
+ || GET_CODE (X) == LABEL_REF                                           \
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+#define TLS_SYMBOLIC_CONST(X)	\
+((GET_CODE (X) == SYMBOL_REF && tls_symbolic_operand (X))	\
+ || (GET_CODE (X) == CONST && tls_symbolic_reference_mentioned_p (X)))
+
+
+/* Condition codes.  */
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  */
+#define SELECT_CC_MODE(OP, X, Y) s390_select_ccmode ((OP), (X), (Y))
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
+  s390_canonicalize_comparison (&(CODE), &(OP0), &(OP1))
+
+/* Relative costs of operations.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) 1
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* An integer expression for the size in bits of the largest integer machine
+   mode that should actually be used.  We allow pairs of registers.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TARGET_64BIT ? TImode : DImode)
+
+/* The maximum number of bytes that a single instruction can move quickly
+   between memory and registers or between two memory locations.  */
+#define MOVE_MAX (TARGET_ZARCH ? 16 : 8)
+#define MOVE_MAX_PIECES (TARGET_ZARCH ? 8 : 4)
+#define MAX_MOVE_MAX 16
+
+/* Determine whether to use move_by_pieces or block move insn.  */
+#define MOVE_BY_PIECES_P(SIZE, ALIGN)		\
+  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
+    || (TARGET_ZARCH && (SIZE) == 8) )
+
+/* Determine whether to use clear_by_pieces or block clear insn.  */
+#define CLEAR_BY_PIECES_P(SIZE, ALIGN)		\
+  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
+    || (TARGET_ZARCH && (SIZE) == 8) )
+
+/* This macro is used to determine whether store_by_pieces should be
+   called to "memcpy" storage when the source is a constant string.  */
+#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
+
+/* Likewise to decide whether to "memset" storage with byte values
+   other than zero.  */
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
+
+/* Don't perform CSE on function addresses.  */
+#define NO_FUNCTION_CSE
+
+/* This value is used in tree-sra to decide whether it might benefical
+   to split a struct move into several word-size moves.  For S/390
+   only small values make sense here since struct moves are relatively
+   cheap thanks to mvc so the small default value choosen for archs
+   with memmove patterns should be ok.  But this value is multiplied
+   in tree-sra with UNITS_PER_WORD to make a decision so we adjust it
+   here to compensate for that factor since mvc costs exactly the same
+   on 31 and 64 bit.  */
+#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4)
+
+
+/* Sections.  */
+
+/* Output before read-only data.  */
+#define TEXT_SECTION_ASM_OP ".text"
+
+/* Output before writable (initialized) data.  */
+#define DATA_SECTION_ASM_OP ".data"
+
+/* Output before writable (uninitialized) data.  */
+#define BSS_SECTION_ASM_OP ".bss"
+
+/* S/390 constant pool breaks the devices in crtstuff.c to control section
+   in where code resides.  We have to write it as asm code.  */
+#ifndef __s390x__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\
+	bras\t%r2,1f\n\
+0:	.long\t" USER_LABEL_PREFIX #FUNC " - 0b\n\
+1:	l\t%r3,0(%r2)\n\
+	bas\t%r14,0(%r3,%r2)\n\
+	.previous");
+#endif
+
+
+/* Position independent code.  */
+
+#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? 12 : INVALID_REGNUM)
+
+#define LEGITIMATE_PIC_OPERAND_P(X)  legitimate_pic_operand_p (X)
+
+
+/* Assembler file format.  */
+
+/* Character to start a comment.  */
+#define ASM_COMMENT_START "#"
+
+/* Declare an uninitialized external linkage data object.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".globl "
+
+/* Advance the location counter to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(FILE, LOG) \
+  if ((LOG)) fprintf ((FILE), "\t.align\t%d\n", 1 << (LOG))
+
+/* Advance the location counter by SIZE bytes.  */
+#define ASM_OUTPUT_SKIP(FILE, SIZE) \
+  fprintf ((FILE), "\t.set\t.,.+"HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* The LOCAL_LABEL_PREFIX variable is used by dbxelf.h.  */
+#define LOCAL_LABEL_PREFIX "."
+
+#define LABEL_ALIGN(LABEL) \
+  s390_label_align (LABEL)
+
+/* How to refer to registers in assembler output.  This sequence is
+   indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES							\
+{ "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",	\
+  "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",	\
+  "%f0",  "%f2",  "%f4",  "%f6",  "%f1",  "%f3",  "%f5",  "%f7",	\
+  "%f8",  "%f10", "%f12", "%f14", "%f9",  "%f11", "%f13", "%f15",	\
+  "%ap",  "%cc",  "%fp",  "%rp",  "%a0",  "%a1"				\
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.  */
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* Output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)				\
+do {									\
+  char buf[32];								\
+  fputs (integer_asm_op (UNITS_PER_LONG, TRUE), (FILE));		\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "L", (VALUE));			\
+  assemble_name ((FILE), buf);						\
+  fputc ('\n', (FILE));							\
+} while (0)
+
+/* Output an element of a case-vector that is relative.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
+do {									\
+  char buf[32];								\
+  fputs (integer_asm_op (UNITS_PER_LONG, TRUE), (FILE));		\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "L", (VALUE));			\
+  assemble_name ((FILE), buf);						\
+  fputc ('-', (FILE));							\
+  ASM_GENERATE_INTERNAL_LABEL (buf, "L", (REL));			\
+  assemble_name ((FILE), buf);						\
+  fputc ('\n', (FILE));							\
+} while (0)
+
+
+/* Miscellaneous parameters.  */
+
+/* Specify the machine mode that this machine uses for the index in the
+   tablejump instruction.  */
+#define CASE_VECTOR_MODE (TARGET_64BIT ? DImode : SImode)
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC)  1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode ((enum machine_mode) (TARGET_64BIT ? DImode : SImode))
+
+/* This is -1 for "pointer mode" extend.  See ptr_extend in s390.md.  */
+#define POINTERS_EXTEND_UNSIGNED -1
+
+/* A function address in a call instruction is a byte address (for
+   indexing purposes) so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Specify the value which is used when clz operand is zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_ALIGN1	          (SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_ALIGN1_P(X)		\
+  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ALIGN1))
+#define SYMBOL_FLAG_NOT_NATURALLY_ALIGNED (SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_NOT_NATURALLY_ALIGNED_P(X) \
+  ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_NOT_NATURALLY_ALIGNED))
+
+/* Check whether integer displacement is in range.  */
+#define DISP_IN_RANGE(d) \
+  (TARGET_LONG_DISPLACEMENT? ((d) >= -524288 && (d) <= 524287) \
+                           : ((d) >= 0 && (d) <= 4095))
+
+/* Reads can reuse write prefetches, used by tree-ssa-prefetch-loops.c.  */
+#define READ_CAN_USE_WRITE_PREFETCH 1
+#endif
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
new file mode 100644
index 000000000..ac5b7a014
--- /dev/null
+++ b/gcc/config/s390/s390.md
@@ -0,0 +1,9410 @@
+;;- Machine description for GNU compiler -- S/390 / zSeries version.
+;;  Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+;;  2009, 2010 Free Software Foundation, Inc.
+;;  Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+;;                 Ulrich Weigand (uweigand@de.ibm.com) and
+;;                 Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;
+;; See constraints.md for a description of constraints specific to s390.
+;;
+
+;; Special formats used for outputting 390 instructions.
+;;
+;;     %C: print opcode suffix for branch condition.
+;;     %D: print opcode suffix for inverse branch condition.
+;;     %J: print tls_load/tls_gdcall/tls_ldcall suffix
+;;     %G: print the size of the operand in bytes.
+;;     %O: print only the displacement of a memory reference.
+;;     %R: print only the base register of a memory reference.
+;;     %S: print S-type memory reference (base+displacement).
+;;     %N: print the second word of a DImode operand.
+;;     %M: print the second word of a TImode operand.
+;;     %Y: print shift count operand.
+;;
+;;     %b: print integer X as if it's an unsigned byte.
+;;     %c: print integer X as if it's an signed byte.
+;;     %x: print integer X as if it's an unsigned halfword.
+;;     %h: print integer X as if it's a signed halfword.
+;;     %i: print the first nonzero HImode part of X.
+;;     %j: print the first HImode part unequal to -1 of X.
+;;     %k: print the first nonzero SImode part of X.
+;;     %m: print the first SImode part unequal to -1 of X.
+;;     %o: print integer X as if it's an unsigned 32bit word.
+;;
+;; We have a special constraint for pattern matching.
+;;
+;;   s_operand -- Matches a valid S operand in a RS, SI or SS type instruction.
+;;
+
+;;
+;; UNSPEC usage
+;;
+
+(define_constants
+  [; Miscellaneous
+   (UNSPEC_ROUND		1)
+   (UNSPEC_CCU_TO_INT		2)
+   (UNSPEC_CCZ_TO_INT		3)
+   (UNSPEC_ICM			10)
+   (UNSPEC_TIE                  11)
+
+   ; GOT/PLT and lt-relative accesses
+   (UNSPEC_LTREL_OFFSET		100)
+   (UNSPEC_LTREL_BASE		101)
+   (UNSPEC_POOL_OFFSET		102)
+   (UNSPEC_GOTENT		110)
+   (UNSPEC_GOT			111)
+   (UNSPEC_GOTOFF		112)
+   (UNSPEC_PLT			113)
+   (UNSPEC_PLTOFF		114)
+
+   ; Literal pool
+   (UNSPEC_RELOAD_BASE		210)
+   (UNSPEC_MAIN_BASE		211)
+   (UNSPEC_LTREF		212)
+   (UNSPEC_INSN			213)
+   (UNSPEC_EXECUTE		214)
+
+   ; Atomic Support
+   (UNSPEC_MB			400)
+
+   ; TLS relocation specifiers
+   (UNSPEC_TLSGD		500)
+   (UNSPEC_TLSLDM		501)
+   (UNSPEC_NTPOFF               502)
+   (UNSPEC_DTPOFF               503)
+   (UNSPEC_GOTNTPOFF            504)
+   (UNSPEC_INDNTPOFF            505)
+
+   ; TLS support
+   (UNSPEC_TLSLDM_NTPOFF	511)
+   (UNSPEC_TLS_LOAD		512)
+
+   ; String Functions
+   (UNSPEC_SRST			600)
+   (UNSPEC_MVST			601)
+
+   ; Stack Smashing Protector
+   (UNSPEC_SP_SET 		700)
+   (UNSPEC_SP_TEST		701)
+
+   ; Test Data Class (TDC)
+   (UNSPEC_TDC_INSN		800)
+
+   ; Population Count
+   (UNSPEC_POPCNT               900)
+   (UNSPEC_COPYSIGN             901)
+ ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_constants
+  [; Blockage
+   (UNSPECV_BLOCKAGE		0)
+
+   ; TPF Support
+   (UNSPECV_TPF_PROLOGUE        20)
+   (UNSPECV_TPF_EPILOGUE        21)
+
+   ; Literal pool
+   (UNSPECV_POOL		200)
+   (UNSPECV_POOL_SECTION	201)
+   (UNSPECV_POOL_ALIGN		202)
+   (UNSPECV_POOL_ENTRY		203)
+   (UNSPECV_MAIN_POOL		300)
+
+   ; TLS support
+   (UNSPECV_SET_TP		500)
+
+   ; Atomic Support
+   (UNSPECV_CAS			700)
+   (UNSPECV_ATOMIC_OP           701)
+  ])
+
+;;
+;; Registers
+;;
+
+; Registers with special meaning
+
+(define_constants
+  [
+   ; Sibling call register.
+   (SIBCALL_REGNUM		 1)
+   ; Literal pool base register.
+   (BASE_REGNUM			13)
+   ; Return address register.
+   (RETURN_REGNUM		14)
+   ; Condition code register.
+   (CC_REGNUM			33)
+   ; Thread local storage pointer register.
+   (TP_REGNUM			36)
+  ])
+
+; Hardware register names
+
+(define_constants
+  [
+   ; General purpose registers
+   (GPR0_REGNUM                  0)
+   ; Floating point registers.
+   (FPR0_REGNUM                 16)
+   (FPR2_REGNUM                 18)
+  ])
+
+;;
+;; PFPO GPR0 argument format
+;;
+
+(define_constants
+  [
+   ; PFPO operation type
+   (PFPO_CONVERT          0x1000000)
+   ; PFPO operand types
+   (PFPO_OP_TYPE_SF             0x5)
+   (PFPO_OP_TYPE_DF             0x6)
+   (PFPO_OP_TYPE_TF             0x7)
+   (PFPO_OP_TYPE_SD             0x8)
+   (PFPO_OP_TYPE_DD             0x9)
+   (PFPO_OP_TYPE_TD             0xa)
+   ; Bitposition of operand types
+   (PFPO_OP0_TYPE_SHIFT          16)
+   (PFPO_OP1_TYPE_SHIFT           8)
+  ])
+
+
+;; Instruction operand type as used in the Principles of Operation.
+;; Used to determine defaults for length and other attribute values.
+
+(define_attr "op_type"
+  "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS"
+  (const_string "NN"))
+
+;; Instruction type attribute used for scheduling.
+
+(define_attr "type" "none,integer,load,lr,la,larl,lm,stm,
+	             cs,vs,store,sem,idiv,
+                     imulhi,imulsi,imuldi,
+		     branch,jsr,fsimptf,fsimpdf,fsimpsf,fhex,
+		     floadtf,floaddf,floadsf,fstoredf,fstoresf,
+		     fmultf,fmuldf,fmulsf,fdivtf,fdivdf,fdivsf,
+		     ftoi,fsqrttf,fsqrtdf,fsqrtsf,
+		     fmadddf,fmaddsf,
+                     ftrunctf,ftruncdf, ftruncsd, ftruncdd,
+                     itoftf, itofdf, itofsf, itofdd, itoftd,
+                     fdivdd, fdivtd, floaddd, floadsd, fmuldd, fmultd,
+                     fsimpdd, fsimpsd, fsimptd, fstoredd, fstoresd,
+                     ftoidfp, other"
+  (cond [(eq_attr "op_type" "NN")  (const_string "other")
+         (eq_attr "op_type" "SS")  (const_string "cs")]
+    (const_string "integer")))
+
+;; Another attribute used for scheduling purposes:
+;;   agen: Instruction uses the address generation unit
+;;   reg: Instruction does not use the agen unit
+
+(define_attr "atype" "agen,reg"
+  (if_then_else (eq_attr "op_type" "E,RR,RI,RRE,RSI,RIL,RIE,RRF,RRR")
+		(const_string "reg")
+		(const_string "agen")))
+
+;; Properties concerning Z10 execution grouping and value forwarding.
+;; z10_super: instruction is superscalar.
+;; z10_super_c: instruction is superscalar and meets the condition of z10_c.
+;; z10_fwd: The instruction reads the value of an operand and stores it into a
+;;   target register.  It can forward this value to a second instruction that reads
+;;   the same register if that second instruction is issued in the same group.
+;; z10_rec: The instruction is in the T pipeline and reads a register. If the
+;;   instruction in the S pipe writes to the register, then the T instruction
+;;   can immediately read the new value.
+;; z10_fr: union of Z10_fwd and z10_rec.
+;; z10_c: second operand of instruction is a register and read with complemented bits.
+;;
+;; An additional suffix A1, A3, or E1 indicates the respective AGI bypass.
+
+
+(define_attr "z10prop" "none,
+                        z10_super, z10_super_E1, z10_super_A1, z10_super_c, z10_super_c_E1,
+                        z10_fwd, z10_fwd_A1, z10_fwd_A3, z10_fwd_E1,
+                        z10_rec,
+                        z10_fr, z10_fr_A3, z10_fr_E1,
+                        z10_c"
+             (const_string "none"))
+
+;; Properties concerning Z196 decoding
+;; z196_alone: must group alone
+;; z196_end: ends a group
+;; z196_cracked: instruction is cracked or expanded
+(define_attr "z196prop" "none,
+                         z196_alone, z196_ends,
+                         z196_cracked"
+             (const_string "none"))
+
+;; Length in bytes.
+
+(define_attr "length" ""
+  (cond [(eq_attr "op_type" "E,RR")		              (const_int 2)
+         (eq_attr "op_type" "RX,RI,RRE,RS,RSI,S,SI,RRF,RRR")  (const_int 4)]
+    (const_int 6)))
+
+
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in s390.h.  The current machine description does not
+;; distinguish between g5 and g6, but there are differences between the two
+;; CPUs could in theory be modeled.
+
+(define_attr "cpu" "g5,g6,z900,z990,z9_109,z10,z196"
+  (const (symbol_ref "s390_tune_attr")))
+
+(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196"
+  (const_string "standard"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "cpu_facility" "standard")
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "ieee")
+	      (ne (symbol_ref "TARGET_CPU_IEEE_FLOAT") (const_int 0)))
+	 (const_int 1)
+
+	 (and (eq_attr "cpu_facility" "zarch")
+	      (ne (symbol_ref "TARGET_ZARCH") (const_int 0)))
+	 (const_int 1)
+
+	 (and (eq_attr "cpu_facility" "longdisp")
+	      (ne (symbol_ref "TARGET_LONG_DISPLACEMENT") (const_int 0)))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "extimm")
+	      (ne (symbol_ref "TARGET_EXTIMM") (const_int 0)))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "dfp")
+	      (ne (symbol_ref "TARGET_DFP") (const_int 0)))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "z10")
+              (ne (symbol_ref "TARGET_Z10") (const_int 0)))
+	 (const_int 1)
+
+         (and (eq_attr "cpu_facility" "z196")
+              (ne (symbol_ref "TARGET_Z196") (const_int 0)))
+	 (const_int 1)]
+	(const_int 0)))
+
+;; Pipeline description for z900.  For lack of anything better,
+;; this description is also used for the g5 and g6.
+(include "2064.md")
+
+;; Pipeline description for z990, z9-109 and z9-ec.
+(include "2084.md")
+
+;; Pipeline description for z10
+(include "2097.md")
+
+;; Pipeline description for z196
+(include "2817.md")
+
+;; Predicates
+(include "predicates.md")
+
+;; Constraint definitions
+(include "constraints.md")
+
+;; Other includes
+(include "tpf.md")
+
+;; Iterators
+
+;; These mode iterators allow floating point patterns to be generated from the
+;; same template.
+(define_mode_iterator FP_ALL [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")
+                              (SD "TARGET_HARD_DFP")])
+(define_mode_iterator FP [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")])
+(define_mode_iterator FPALL [TF DF SF TD DD SD])
+(define_mode_iterator BFP [TF DF SF])
+(define_mode_iterator DFP [TD DD])
+(define_mode_iterator DFP_ALL [TD DD SD])
+(define_mode_iterator DSF [DF SF])
+(define_mode_iterator SD_SF [SF SD])
+(define_mode_iterator DD_DF [DF DD])
+(define_mode_iterator TD_TF [TF TD])
+
+;; This mode iterator allows 31-bit and 64-bit TDSI patterns to be generated
+;; from the same template.
+(define_mode_iterator TDSI [(TI "TARGET_64BIT") DI SI])
+
+;; These mode iterators allow 31-bit and 64-bit GPR patterns to be generated
+;; from the same template.
+(define_mode_iterator GPR [(DI "TARGET_ZARCH") SI])
+(define_mode_iterator DSI [DI SI])
+
+;; These mode iterators allow :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(DI "TARGET_64BIT") (SI "!TARGET_64BIT")])
+
+;; These macros refer to the actual word_mode of the configuration. This is equal
+;; to Pmode except on 31-bit machines in zarch mode.
+(define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")])
+(define_mode_iterator W  [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")])
+
+;; This mode iterator allows the QI and HI patterns to be defined from
+;; the same template.
+(define_mode_iterator HQI [HI QI])
+
+;; This mode iterator allows the integer patterns to be defined from the
+;; same template.
+(define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI])
+(define_mode_iterator INTALL [TI DI SI HI QI])
+
+;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from
+;; the same template.
+(define_code_iterator SHIFT [ashift lshiftrt])
+
+;; This iterator and attribute allow to combine most atomic operations.
+(define_code_iterator ATOMIC [and ior xor plus minus mult])
+(define_code_iterator ATOMIC_Z196 [and ior xor plus])
+(define_code_attr atomic [(and "and") (ior "ior") (xor "xor")
+			  (plus "add") (minus "sub") (mult "nand")])
+(define_code_attr noxa [(and "n") (ior "o") (xor "x") (plus "a")])
+
+;; In FP templates, a string like "lt<de>br" will expand to "ltxbr" in
+;; TF/TDmode, "ltdbr" in DF/DDmode, and "ltebr" in SF/SDmode.
+(define_mode_attr xde [(TF "x") (DF "d") (SF "e") (TD "x") (DD "d") (SD "e")])
+
+;; In FP templates, a <dee> in "m<dee><bt>r" will expand to "mx<bt>r" in
+;; TF/TDmode, "md<bt>r" in DF/DDmode, "mee<bt>r" in SFmode and "me<bt>r in
+;; SDmode.
+(define_mode_attr xdee [(TF "x") (DF "d") (SF "ee") (TD "x") (DD "d") (SD "e")])
+
+;; In FP templates, "<RRe>" will expand to "RRE" in TFmode and "RR" otherwise.
+;; Likewise for "<RXe>".
+(define_mode_attr RRe [(TF "RRE") (DF "RR") (SF "RR")])
+(define_mode_attr RXe [(TF "RXE") (DF "RX") (SF "RX")])
+
+;; The decimal floating point variants of add, sub, div and mul support 3
+;; fp register operands.  The following attributes allow to merge the bfp and
+;; dfp variants in a single insn definition.
+
+;; This attribute is used to set op_type accordingly.
+(define_mode_attr RRer [(TF "RRE") (DF "RRE") (SF "RRE") (TD "RRR")
+                        (DD "RRR") (SD "RRR")])
+
+;; This attribute is used in the operand constraint list in order to have the
+;; first and the second operand match for bfp modes.
+(define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")])
+
+;; This attribute is used in the operand list of the instruction to have an
+;; additional operand for the dfp instructions.
+(define_mode_attr op1 [(TF "") (DF "") (SF "")
+                       (TD "%1,") (DD "%1,") (SD "%1,")])
+
+
+;; This attribute is used in the operand constraint list
+;; for instructions dealing with the sign bit of 32 or 64bit fp values.
+;; TFmode values are represented by a fp register pair.  Since the
+;; sign bit instructions only handle single source and target fp registers
+;; these instructions can only be used for TFmode values if the source and
+;; target operand uses the same fp register.
+(define_mode_attr fT0 [(TF "0") (DF "f") (SF "f")])
+
+;; In FP templates, "<Rf>" will expand to "f" in TFmode and "R" otherwise.
+;; This is used to disable the memory alternative in TFmode patterns.
+(define_mode_attr Rf [(TF "f") (DF "R") (SF "R") (TD "f") (DD "f") (SD "f")])
+
+;; This attribute adds b for bfp instructions and t for dfp instructions and is used
+;; within instruction mnemonics.
+(define_mode_attr bt [(TF "b") (DF "b") (SF "b") (TD "t") (DD "t") (SD "t")])
+
+;; This attribute is used within instruction mnemonics.  It evaluates to d for dfp
+;; modes and to an empty string for bfp modes.
+(define_mode_attr _d [(TF "") (DF "") (SF "") (TD "d") (DD "d") (SD "d")])
+
+;; In GPR and P templates, a constraint like "<d0>" will expand to "d" in DImode
+;; and "0" in SImode. This allows to combine instructions of which the 31bit
+;; version only operates on one register.
+(define_mode_attr d0 [(DI "d") (SI "0")])
+
+;; In combination with d0 this allows to combine instructions of which the 31bit
+;; version only operates on one register. The DImode version needs an additional
+;; register for the assembler output.
+(define_mode_attr 1 [(DI "%1,") (SI "")])
+
+;; In SHIFT templates, a string like "s<lr>dl" will expand to "sldl" in
+;; 'ashift' and "srdl" in 'lshiftrt'.
+(define_code_attr lr [(ashift "l") (lshiftrt "r")])
+
+;; In SHIFT templates, this attribute holds the correct standard name for the
+;; pattern itself and the corresponding function calls.
+(define_code_attr shift [(ashift "ashl") (lshiftrt "lshr")])
+
+;; This attribute handles differences in the instruction 'type' and will result
+;; in "RRE" for DImode and "RR" for SImode.
+(define_mode_attr E [(DI "E") (SI "")])
+
+;; This attribute handles differences in the instruction 'type' and makes RX<Y>
+;; to result in "RXY" for DImode and "RX" for SImode.
+(define_mode_attr Y [(DI "Y") (SI "")])
+
+;; This attribute handles differences in the instruction 'type' and will result
+;; in "RSE" for TImode and "RS" for DImode.
+(define_mode_attr TE [(TI "E") (DI "")])
+
+;; In GPR templates, a string like "lc<g>r" will expand to "lcgr" in DImode
+;; and "lcr" in SImode.
+(define_mode_attr g [(DI "g") (SI "")])
+
+;; In GPR templates, a string like "sl<y>" will expand to "slg" in DImode
+;; and "sly" in SImode. This is useful because on 64bit the ..g instructions
+;; were enhanced with long displacements whereas 31bit instructions got a ..y
+;; variant for long displacements.
+(define_mode_attr y [(DI "g") (SI "y")])
+
+;; In DW templates, a string like "cds<g>" will expand to "cdsg" in TImode
+;; and "cds" in DImode.
+(define_mode_attr tg [(TI "g") (DI "")])
+
+;; In GPR templates, a string like "c<gf>dbr" will expand to "cgdbr" in DImode
+;; and "cfdbr" in SImode.
+(define_mode_attr gf [(DI "g") (SI "f")])
+
+;; In GPR templates, a string like sll<gk> will expand to sllg for DI
+;; and sllk for SI.  This way it is possible to merge the new z196 SI
+;; 3 operands shift instructions into the existing patterns.
+(define_mode_attr gk [(DI "g") (SI "k")])
+
+;; ICM mask required to load MODE value into the lowest subreg
+;; of a SImode register.
+(define_mode_attr icm_lo [(HI "3") (QI "1")])
+
+;; In HQI templates, a string like "llg<hc>" will expand to "llgh" in
+;; HImode and "llgc" in QImode.
+(define_mode_attr hc [(HI "h") (QI "c")])
+
+;; In P templates, the mode <DBL> will expand to "TI" in DImode and "DI"
+;; in SImode.
+(define_mode_attr DBL [(DI "TI") (SI "DI")])
+
+;; This attribute expands to DF for TFmode and to DD for TDmode .  It is
+;; used for Txmode splitters splitting a Txmode copy into 2 Dxmode copies.
+(define_mode_attr HALF_TMODE [(TF "DF") (TD "DD")])
+
+;; Maximum unsigned integer that fits in MODE.
+(define_mode_attr max_uint [(HI "65535") (QI "255")])
+
+;;
+;;- Compare instructions.
+;;
+
+; Test-under-Mask instructions
+
+(define_insn "*tmqi_mem"
+  [(set (reg CC_REGNUM)
+        (compare (and:QI (match_operand:QI 0 "memory_operand" "Q,S")
+                         (match_operand:QI 1 "immediate_operand" "n,n"))
+                 (match_operand:QI 2 "immediate_operand" "n,n")))]
+  "s390_match_ccmode (insn, s390_tm_ccmode (operands[1], operands[2], false))"
+  "@
+   tm\t%S0,%b1
+   tmy\t%S0,%b1"
+  [(set_attr "op_type" "SI,SIY")
+   (set_attr "z10prop" "z10_super,z10_super")])
+
+(define_insn "*tmdi_reg"
+  [(set (reg CC_REGNUM)
+        (compare (and:DI (match_operand:DI 0 "nonimmediate_operand" "d,d,d,d")
+                         (match_operand:DI 1 "immediate_operand"
+					     "N0HD0,N1HD0,N2HD0,N3HD0"))
+                 (match_operand:DI 2 "immediate_operand" "n,n,n,n")))]
+  "TARGET_ZARCH
+   && s390_match_ccmode (insn, s390_tm_ccmode (operands[1], operands[2], true))
+   && s390_single_part (operands[1], DImode, HImode, 0) >= 0"
+  "@
+   tmhh\t%0,%i1
+   tmhl\t%0,%i1
+   tmlh\t%0,%i1
+   tmll\t%0,%i1"
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super,z10_super,z10_super,z10_super")])
+
+(define_insn "*tmsi_reg"
+  [(set (reg CC_REGNUM)
+        (compare (and:SI (match_operand:SI 0 "nonimmediate_operand" "d,d")
+                         (match_operand:SI 1 "immediate_operand" "N0HS0,N1HS0"))
+                 (match_operand:SI 2 "immediate_operand" "n,n")))]
+  "s390_match_ccmode (insn, s390_tm_ccmode (operands[1], operands[2], true))
+   && s390_single_part (operands[1], SImode, HImode, 0) >= 0"
+  "@
+   tmh\t%0,%i1
+   tml\t%0,%i1"
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super,z10_super")])
+
+(define_insn "*tm<mode>_full"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "register_operand" "d")
+                 (match_operand:HQI 1 "immediate_operand" "n")))]
+  "s390_match_ccmode (insn, s390_tm_ccmode (constm1_rtx, operands[1], true))"
+  "tml\t%0,<max_uint>"
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super")])
+
+
+;
+; Load-and-Test instructions
+;
+
+; tst(di|si) instruction pattern(s).
+
+(define_insn "*tstdi_sign"
+  [(set (reg CC_REGNUM)
+        (compare
+          (ashiftrt:DI
+            (ashift:DI
+              (subreg:DI (match_operand:SI 0 "nonimmediate_operand" "d,RT") 0)
+	      (const_int 32)) (const_int 32))
+	  (match_operand:DI 1 "const0_operand" "")))
+   (set (match_operand:DI 2 "register_operand" "=d,d")
+        (sign_extend:DI (match_dup 0)))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_ZARCH"
+  "ltgfr\t%2,%0
+   ltgf\t%2,%0"
+  [(set_attr "op_type"      "RRE,RXY")
+   (set_attr "cpu_facility" "*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1") ])
+
+; ltr, lt, ltgr, ltg
+(define_insn "*tst<mode>_extimm"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "nonimmediate_operand" "d,RT")
+                 (match_operand:GPR 1 "const0_operand" "")))
+   (set (match_operand:GPR 2 "register_operand" "=d,d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_EXTIMM"
+  "@
+   lt<g>r\t%2,%0
+   lt<g>\t%2,%0"
+  [(set_attr "op_type" "RR<E>,RXY")
+   (set_attr "z10prop" "z10_fr_E1,z10_fwd_A3") ])
+
+; ltr, lt, ltgr, ltg
+(define_insn "*tst<mode>_cconly_extimm"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "nonimmediate_operand" "d,RT")
+                 (match_operand:GPR 1 "const0_operand" "")))
+   (clobber (match_scratch:GPR 2 "=X,d"))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_EXTIMM"
+  "@
+   lt<g>r\t%0,%0
+   lt<g>\t%2,%0"
+  [(set_attr "op_type" "RR<E>,RXY")
+   (set_attr "z10prop" "z10_fr_E1,z10_fwd_A3")])
+
+(define_insn "*tstdi"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "register_operand" "d")
+                 (match_operand:DI 1 "const0_operand" "")))
+   (set (match_operand:DI 2 "register_operand" "=d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_ZARCH && !TARGET_EXTIMM"
+  "ltgr\t%2,%0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "z10prop" "z10_fr_E1")])
+
+(define_insn "*tstsi"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "d,Q,S")
+                 (match_operand:SI 1 "const0_operand" "")))
+   (set (match_operand:SI 2 "register_operand" "=d,d,d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode) && !TARGET_EXTIMM"
+  "@
+   ltr\t%2,%0
+   icm\t%2,15,%S0
+   icmy\t%2,15,%S0"
+  [(set_attr "op_type" "RR,RS,RSY")
+   (set_attr "z10prop" "z10_fr_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*tstsi_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "d,Q,S")
+                 (match_operand:SI 1 "const0_operand" "")))
+   (clobber (match_scratch:SI 2 "=X,d,d"))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   ltr\t%0,%0
+   icm\t%2,15,%S0
+   icmy\t%2,15,%S0"
+  [(set_attr "op_type" "RR,RS,RSY")
+   (set_attr "z10prop" "z10_fr_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*tstdi_cconly_31"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "register_operand" "d")
+                 (match_operand:DI 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCSmode) && !TARGET_ZARCH"
+  "srda\t%0,0"
+  [(set_attr "op_type" "RS")
+   (set_attr "atype"   "reg")])
+
+; ltr, ltgr
+(define_insn "*tst<mode>_cconly2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "register_operand" "d")
+                 (match_operand:GPR 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "lt<g>r\t%0,%0"
+  [(set_attr "op_type" "RR<E>")
+   (set_attr "z10prop" "z10_fr_E1")])
+
+; tst(hi|qi) instruction pattern(s).
+
+(define_insn "*tst<mode>CCT"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "nonimmediate_operand" "?Q,?S,d")
+                 (match_operand:HQI 1 "const0_operand" "")))
+   (set (match_operand:HQI 2 "register_operand" "=d,d,0")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\t%2,<icm_lo>,%S0
+   icmy\t%2,<icm_lo>,%S0
+   tml\t%0,<max_uint>"
+  [(set_attr "op_type" "RS,RSY,RI")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super")])
+
+(define_insn "*tsthiCCT_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HI 0 "nonimmediate_operand" "Q,S,d")
+                 (match_operand:HI 1 "const0_operand" "")))
+   (clobber (match_scratch:HI 2 "=d,d,X"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\t%2,3,%S0
+   icmy\t%2,3,%S0
+   tml\t%0,65535"
+  [(set_attr "op_type" "RS,RSY,RI")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super")])
+
+(define_insn "*tstqiCCT_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:QI 0 "nonimmediate_operand" "?Q,?S,d")
+                 (match_operand:QI 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   cli\t%S0,0
+   cliy\t%S0,0
+   tml\t%0,255"
+  [(set_attr "op_type" "SI,SIY,RI")
+   (set_attr "z10prop" "z10_super,z10_super,z10_super")])
+
+(define_insn "*tst<mode>"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "s_operand" "Q,S")
+                 (match_operand:HQI 1 "const0_operand" "")))
+   (set (match_operand:HQI 2 "register_operand" "=d,d")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   icm\t%2,<icm_lo>,%S0
+   icmy\t%2,<icm_lo>,%S0"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*tst<mode>_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HQI 0 "s_operand" "Q,S")
+                 (match_operand:HQI 1 "const0_operand" "")))
+   (clobber (match_scratch:HQI 2 "=d,d"))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   icm\t%2,<icm_lo>,%S0
+   icmy\t%2,<icm_lo>,%S0"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+
+; Compare (equality) instructions
+
+(define_insn "*cmpdi_cct"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "nonimmediate_operand" "%d,d,d,d,Q")
+                 (match_operand:DI 1 "general_operand" "d,K,Os,RT,BQ")))]
+  "s390_match_ccmode (insn, CCTmode) && TARGET_ZARCH"
+  "@
+   cgr\t%0,%1
+   cghi\t%0,%h1
+   cgfi\t%0,%1
+   cg\t%0,%1
+   #"
+  [(set_attr "op_type" "RRE,RI,RIL,RXY,SS")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,*")])
+
+(define_insn "*cmpsi_cct"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "%d,d,d,d,d,Q")
+                 (match_operand:SI 1 "general_operand" "d,K,Os,R,T,BQ")))]
+  "s390_match_ccmode (insn, CCTmode)"
+  "@
+   cr\t%0,%1
+   chi\t%0,%h1
+   cfi\t%0,%1
+   c\t%0,%1
+   cy\t%0,%1
+   #"
+  [(set_attr "op_type" "RR,RI,RIL,RX,RXY,SS")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,*")])
+
+; Compare (signed) instructions
+
+(define_insn "*cmpdi_ccs_sign"
+  [(set (reg CC_REGNUM)
+        (compare (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand"
+						     "d,RT,b"))
+                 (match_operand:DI 0 "register_operand" "d, d,d")))]
+  "s390_match_ccmode(insn, CCSRmode) && TARGET_ZARCH"
+  "@
+   cgfr\t%0,%1
+   cgf\t%0,%1
+   cgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "z10prop" "z10_c,*,*")
+   (set_attr "type"         "*,*,larl")])
+
+
+
+(define_insn "*cmpsi_ccs_sign"
+  [(set (reg CC_REGNUM)
+        (compare (sign_extend:SI (match_operand:HI 1 "memory_operand" "R,T,b"))
+                 (match_operand:SI 0 "register_operand" "d,d,d")))]
+  "s390_match_ccmode(insn, CCSRmode)"
+  "@
+   ch\t%0,%1
+   chy\t%0,%1
+   chrl\t%0,%1"
+  [(set_attr "op_type"      "RX,RXY,RIL")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "z196prop" "z196_cracked,z196_cracked,z196_cracked")])
+
+(define_insn "*cmphi_ccs_z10"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HI 0 "s_operand"         "Q")
+                 (match_operand:HI 1 "immediate_operand" "K")))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_Z10"
+  "chhsi\t%0,%1"
+  [(set_attr "op_type" "SIL")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*cmpdi_ccs_signhi_rl"
+  [(set (reg CC_REGNUM)
+	(compare (sign_extend:DI (match_operand:HI 1 "memory_operand" "RT,b"))
+		 (match_operand:GPR 0 "register_operand"  "d,d")))]
+  "s390_match_ccmode(insn, CCSRmode) && TARGET_Z10"
+  "@
+   cgh\t%0,%1
+   cghrl\t%0,%1"
+  [(set_attr "op_type" "RXY,RIL")
+   (set_attr "type"    "*,larl")])
+
+; cr, chi, cfi, c, cy, cgr, cghi, cgfi, cg, chsi, cghsi, crl, cgrl
+(define_insn "*cmp<mode>_ccs"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 0 "nonimmediate_operand"
+                                      "d,d,Q, d,d,d,d")
+                 (match_operand:GPR 1 "general_operand"
+                                      "d,K,K,Os,R,T,b")))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   c<g>r\t%0,%1
+   c<g>hi\t%0,%h1
+   c<g>hsi\t%0,%h1
+   c<g>fi\t%0,%1
+   c<g>\t%0,%1
+   c<y>\t%0,%1
+   c<g>rl\t%0,%1"
+  [(set_attr "op_type" "RR<E>,RI,SIL,RIL,RX<Y>,RXY,RIL")
+   (set_attr "cpu_facility" "*,*,z10,extimm,*,*,z10")
+   (set_attr "type" "*,*,*,*,*,*,larl")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,z10_super,z10_super")])
+
+
+; Compare (unsigned) instructions
+
+(define_insn "*cmpsi_ccu_zerohi_rlsi"
+  [(set (reg CC_REGNUM)
+ 	(compare (zero_extend:SI (mem:HI (match_operand:SI 1
+					  "larl_operand" "X")))
+		 (match_operand:SI 0 "register_operand" "d")))]
+  "s390_match_ccmode(insn, CCURmode) && TARGET_Z10"
+  "clhrl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_super")])
+
+; clhrl, clghrl
+(define_insn "*cmp<GPR:mode>_ccu_zerohi_rldi"
+  [(set (reg CC_REGNUM)
+ 	(compare (zero_extend:GPR (mem:HI (match_operand:DI 1
+					  "larl_operand" "X")))
+		 (match_operand:GPR 0 "register_operand" "d")))]
+  "s390_match_ccmode(insn, CCURmode) && TARGET_Z10"
+  "cl<g>hrl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_super")])
+
+(define_insn "*cmpdi_ccu_zero"
+  [(set (reg CC_REGNUM)
+        (compare (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand"
+                                                        "d,RT,b"))
+                 (match_operand:DI 0 "register_operand" "d, d,d")))]
+  "s390_match_ccmode (insn, CCURmode) && TARGET_ZARCH"
+  "@
+   clgfr\t%0,%1
+   clgf\t%0,%1
+   clgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "z10prop" "z10_super_c,z10_super_E1,z10_super")])
+
+(define_insn "*cmpdi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:DI 0 "nonimmediate_operand"
+                                     "d, d,d,Q, d, Q,BQ")
+                 (match_operand:DI 1 "general_operand"
+                                     "d,Op,b,D,RT,BQ,Q")))]
+  "s390_match_ccmode (insn, CCUmode) && TARGET_ZARCH"
+  "@
+   clgr\t%0,%1
+   clgfi\t%0,%1
+   clgrl\t%0,%1
+   clghsi\t%0,%x1
+   clg\t%0,%1
+   #
+   #"
+  [(set_attr "op_type" "RRE,RIL,RIL,SIL,RXY,SS,SS")
+   (set_attr "cpu_facility" "*,extimm,z10,z10,*,*,*")
+   (set_attr "type"         "*,*,larl,*,*,*,*")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,*,*")])
+
+(define_insn "*cmpsi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:SI 0 "nonimmediate_operand" "d, d,d,Q,d,d, Q,BQ")
+                 (match_operand:SI 1 "general_operand"      "d,Os,b,D,R,T,BQ, Q")))]
+  "s390_match_ccmode (insn, CCUmode)"
+  "@
+   clr\t%0,%1
+   clfi\t%0,%o1
+   clrl\t%0,%1
+   clfhsi\t%0,%x1
+   cl\t%0,%1
+   cly\t%0,%1
+   #
+   #"
+  [(set_attr "op_type" "RR,RIL,RIL,SIL,RX,RXY,SS,SS")
+   (set_attr "cpu_facility" "*,extimm,z10,z10,*,*,*,*")
+   (set_attr "type"         "*,*,larl,*,*,*,*,*")
+   (set_attr "z10prop" "z10_super_c,z10_super,z10_super,z10_super,z10_super,z10_super,*,*")])
+
+(define_insn "*cmphi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:HI 0 "nonimmediate_operand" "d,d,Q,Q,BQ")
+                 (match_operand:HI 1 "general_operand"      "Q,S,D,BQ,Q")))]
+  "s390_match_ccmode (insn, CCUmode)
+   && !register_operand (operands[1], HImode)"
+  "@
+   clm\t%0,3,%S1
+   clmy\t%0,3,%S1
+   clhhsi\t%0,%1
+   #
+   #"
+  [(set_attr "op_type" "RS,RSY,SIL,SS,SS")
+   (set_attr "cpu_facility" "*,*,z10,*,*")
+   (set_attr "z10prop" "*,*,z10_super,*,*")])
+
+(define_insn "*cmpqi_ccu"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:QI 0 "nonimmediate_operand" "d,d,Q,S,Q,BQ")
+                 (match_operand:QI 1 "general_operand" "Q,S,n,n,BQ,Q")))]
+  "s390_match_ccmode (insn, CCUmode)
+   && !register_operand (operands[1], QImode)"
+  "@
+   clm\t%0,1,%S1
+   clmy\t%0,1,%S1
+   cli\t%S0,%b1
+   cliy\t%S0,%b1
+   #
+   #"
+  [(set_attr "op_type" "RS,RSY,SI,SIY,SS,SS")
+   (set_attr "z10prop" "*,*,z10_super,z10_super,*,*")])
+
+
+; Block compare (CLC) instruction patterns.
+
+(define_insn "*clc"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:BLK 0 "memory_operand" "Q")
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))]
+  "s390_match_ccmode (insn, CCUmode)
+   && INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "clc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+(define_split
+  [(set (reg CC_REGNUM)
+        (compare (match_operand 0 "memory_operand" "")
+                 (match_operand 1 "memory_operand" "")))]
+  "reload_completed
+   && s390_match_ccmode (insn, CCUmode)
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (match_dup 1))
+     (use (match_dup 2))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+
+  operands[1] = gen_rtx_COMPARE (GET_MODE (SET_DEST (PATTERN (curr_insn))),
+				 operands[0], operands[1]);
+  operands[0] = SET_DEST (PATTERN (curr_insn));
+})
+
+
+; (TF|DF|SF|TD|DD|SD) instructions
+
+; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
+(define_insn "*cmp<mode>_ccs_0"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:FP 0 "register_operand" "f")
+                 (match_operand:FP 1 "const0_operand"   "")))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lt<xde><bt>r\t%0,%0"
+   [(set_attr "op_type" "RRE")
+    (set_attr "type"  "fsimp<mode>")])
+
+; cxtr, cxbr, cdbr, cebr, cdb, ceb, cxbtr, cdbtr
+(define_insn "*cmp<mode>_ccs"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:FP 0 "register_operand" "f,f")
+                 (match_operand:FP 1 "general_operand"  "f,<Rf>")))]
+  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   c<xde><bt>r\t%0,%1
+   c<xde>b\t%0,%1"
+   [(set_attr "op_type" "RRE,RXE")
+    (set_attr "type"  "fsimp<mode>")])
+
+
+; Compare and Branch instructions
+
+; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
+; The following instructions do a complementary access of their second
+; operand (z01 only): crj_c, cgrjc, cr, cgr
+(define_insn "*cmp_and_br_signed_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_signed_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,C")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "c<g>ij%C0\t%1,%c2,%l3" : "c<g>rj%C0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "c<g>fi\t%1,%c2\;jg%C0\t%l3" : "c<g>r\t%1,%2\;jg%C0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
+                                                       ; 10 byte for cgr/jg
+
+; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
+; The following instructions do a complementary access of their second
+; operand (z10 only): clrj, clgrj, clr, clgr
+(define_insn "*cmp_and_br_unsigned_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,I")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "cl<g>ij%C0\t%1,%b2,%l3" : "cl<g>rj%C0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "cl<g>fi\t%1,%b2\;jg%C0\t%l3" : "cl<g>r\t%1,%2\;jg%C0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
+                                                       ; 10 byte for clgr/jg
+
+; And now the same two patterns as above but with a negated CC mask.
+
+; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
+; The following instructions do a complementary access of their second
+; operand (z01 only): crj_c, cgrjc, cr, cgr
+(define_insn "*icmp_and_br_signed_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_signed_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,C")])
+		      (pc)
+		      (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
+                                                       ; 10 byte for cgr/jg
+
+; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
+; The following instructions do a complementary access of their second
+; operand (z10 only): clrj, clgrj, clr, clgr
+(define_insn "*icmp_and_br_unsigned_<mode>"
+  [(set (pc)
+	(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
+			[(match_operand:GPR 1 "register_operand"  "d,d")
+			 (match_operand:GPR 2 "nonmemory_operand" "d,I")])
+		      (pc)
+		      (label_ref (match_operand 3 "" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_AVOID_CMP_AND_BRANCH"
+{
+  if (get_attr_length (insn) == 6)
+    return which_alternative ?
+      "cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
+  else
+    return which_alternative ?
+      "cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
+                      (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
+                                                       ; 10 byte for clgr/jg
+
+;;
+;;- Move instructions.
+;;
+
+;
+; movti instruction pattern(s).
+;
+
+(define_insn "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,d,o")
+        (match_operand:TI 1 "general_operand" "QS,d,dPRT,d"))]
+  "TARGET_ZARCH"
+  "@
+   lmg\t%0,%N0,%S1
+   stmg\t%1,%N1,%S0
+   #
+   #"
+  [(set_attr "op_type" "RSY,RSY,*,*")
+   (set_attr "type" "lm,stm,*,*")])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], TImode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, TImode);
+  operands[3] = operand_subword (operands[0], 1, 0, TImode);
+  operands[4] = operand_subword (operands[1], 0, 0, TImode);
+  operands[5] = operand_subword (operands[1], 1, 0, TImode);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], TImode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, TImode);
+  operands[3] = operand_subword (operands[0], 0, 0, TImode);
+  operands[4] = operand_subword (operands[1], 1, 0, TImode);
+  operands[5] = operand_subword (operands[1], 0, 0, TImode);
+})
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+        (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_ZARCH && reload_completed
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, TImode);
+  addr = gen_lowpart (Pmode, addr);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+
+;
+; Patterns used for secondary reloads
+;
+
+; z10 provides move instructions accepting larl memory operands.
+; Unfortunately there is no such variant for QI, TI and FP mode moves.
+; These patterns are also used for unaligned SI and DI accesses.
+
+(define_expand "reload<INTALL:mode><P:mode>_tomem_z10"
+  [(parallel [(match_operand:INTALL 0 "memory_operand"   "")
+	      (match_operand:INTALL 1 "register_operand" "=d")
+	      (match_operand:P 2 "register_operand" "=&a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
+  DONE;
+})
+
+(define_expand "reload<INTALL:mode><P:mode>_toreg_z10"
+  [(parallel [(match_operand:INTALL 0 "register_operand" "=d")
+	      (match_operand:INTALL 1 "memory_operand"   "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "reload<FPALL:mode><P:mode>_tomem_z10"
+  [(parallel [(match_operand:FPALL 0 "memory_operand"   "")
+	      (match_operand:FPALL 1 "register_operand" "=d")
+	      (match_operand:P 2 "register_operand" "=&a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
+  DONE;
+})
+
+(define_expand "reload<FPALL:mode><P:mode>_toreg_z10"
+  [(parallel [(match_operand:FPALL 0 "register_operand" "=d")
+	      (match_operand:FPALL 1 "memory_operand"   "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  "TARGET_Z10"
+{
+  s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
+  DONE;
+})
+
+(define_expand "reload<P:mode>_larl_odd_addend_z10"
+  [(parallel [(match_operand:P 0 "register_operand" "=d")
+	      (match_operand:P 1 "larl_operand"     "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  "TARGET_Z10"
+{
+  s390_reload_larl_operand (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+; Handles loading a PLUS (load address) expression
+
+(define_expand "reload<mode>_plus"
+  [(parallel [(match_operand:P 0 "register_operand"  "=a")
+              (match_operand:P 1 "s390_plus_operand" "")
+              (match_operand:P 2 "register_operand"  "=&a")])]
+  ""
+{
+  s390_expand_plus_operand (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+; Handles assessing a non-offsetable memory address
+
+(define_expand "reload<mode>_nonoffmem_in"
+  [(parallel [(match_operand 0   "register_operand" "")
+              (match_operand 1   "" "")
+              (match_operand:P 2 "register_operand" "=&a")])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+  s390_load_address (operands[2], find_replacement (&XEXP (operands[1], 0)));
+  operands[1] = replace_equiv_address (operands[1], operands[2]);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reload<mode>_nonoffmem_out"
+  [(parallel [(match_operand   0 "" "")
+              (match_operand   1 "register_operand" "")
+              (match_operand:P 2 "register_operand" "=&a")])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+  s390_load_address (operands[2], find_replacement (&XEXP (operands[0], 0)));
+  operands[0] = replace_equiv_address (operands[0], operands[2]);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reload<mode>_PIC_addr"
+  [(parallel [(match_operand   0 "register_operand" "=d")
+	      (match_operand   1 "larl_operand"     "")
+	      (match_operand:P 2 "register_operand" "=a")])]
+  ""
+{
+  rtx new_rtx = legitimize_pic_address (operands[1], operands[2]);
+  emit_move_insn (operands[0], new_rtx);
+})
+
+;
+; movdi instruction pattern(s).
+;
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  /* Handle symbolic constants.  */
+  if (TARGET_64BIT
+      && (SYMBOLIC_CONST (operands[1])
+	  || (GET_CODE (operands[1]) == PLUS
+	      && XEXP (operands[1], 0) == pic_offset_table_rtx
+	      && SYMBOLIC_CONST (XEXP (operands[1], 1)))))
+    emit_symbolic_move (operands);
+})
+
+(define_insn "*movdi_larl"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (match_operand:DI 1 "larl_operand" "X"))]
+  "TARGET_64BIT
+   && !FP_REG_P (operands[0])"
+  "larl\t%0,%1"
+   [(set_attr "op_type" "RIL")
+    (set_attr "type"    "larl")
+    (set_attr "z10prop" "z10_super_A1")])
+
+(define_insn "*movdi_64"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                            "=d,d,d,d,d,d,d,d,f,d,d,d,d,d,
+                             RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t")
+        (match_operand:DI 1 "general_operand"
+                            "K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT,
+                             d,*f,R,T,*f,*f,d,K,t,d,t,Q"))]
+  "TARGET_ZARCH"
+  "@
+   lghi\t%0,%h1
+   llihh\t%0,%i1
+   llihl\t%0,%i1
+   llilh\t%0,%i1
+   llill\t%0,%i1
+   lgfi\t%0,%1
+   llihf\t%0,%k1
+   llilf\t%0,%k1
+   ldgr\t%0,%1
+   lgdr\t%0,%1
+   lay\t%0,%a1
+   lgrl\t%0,%1
+   lgr\t%0,%1
+   lg\t%0,%1
+   stg\t%1,%0
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   stgrl\t%1,%0
+   mvghi\t%0,%1
+   #
+   #
+   stam\t%1,%N1,%S0
+   lam\t%0,%N0,%S1"
+  [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RXY,RIL,RRE,RXY,
+                        RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS")
+   (set_attr "type" "*,*,*,*,*,*,*,*,floaddf,floaddf,la,larl,lr,load,store,
+                     floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,
+                     *,*")
+   (set_attr "cpu_facility" "*,*,*,*,*,extimm,extimm,extimm,dfp,dfp,longdisp,
+                             z10,*,*,*,*,*,longdisp,*,longdisp,
+                             z10,z10,*,*,*,*")
+   (set_attr "z10prop" "z10_fwd_A1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_A1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        *,
+                        *,
+                        z10_fwd_A1,
+                        z10_fwd_A3,
+                        z10_fr_E1,
+                        z10_fwd_A3,
+                        z10_rec,
+                        *,
+                        *,
+                        *,
+                        *,
+                        *,
+                        z10_rec,
+                        z10_super,
+                        *,
+                        *,
+                        *,
+                        *")
+])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_ZARCH && ACCESS_REG_P (operands[1])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (strict_low_part (match_dup 2)) (match_dup 4))]
+  "operands[2] = gen_lowpart (SImode, operands[0]);
+   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_ZARCH && ACCESS_REG_P (operands[0])
+   && dead_or_set_p (insn, operands[1])"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 1) (lshiftrt:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 4) (match_dup 2))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);
+   s390_split_access_reg (operands[0], &operands[3], &operands[4]);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "TARGET_ZARCH && ACCESS_REG_P (operands[0])
+   && !dead_or_set_p (insn, operands[1])"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 1) (rotate:DI (match_dup 1) (const_int 32)))
+   (set (match_dup 4) (match_dup 2))
+   (set (match_dup 1) (rotate:DI (match_dup 1) (const_int 32)))]
+  "operands[2] = gen_lowpart (SImode, operands[1]);
+   s390_split_access_reg (operands[0], &operands[3], &operands[4]);")
+
+(define_insn "*movdi_31"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                            "=d,d,Q,S,d   ,o,!*f,!*f,!*f,!R,!T,d")
+        (match_operand:DI 1 "general_operand"
+                            " Q,S,d,d,dPRT,d, *f,  R,  T,*f,*f,b"))]
+  "!TARGET_ZARCH"
+  "@
+   lm\t%0,%N0,%S1
+   lmy\t%0,%N0,%S1
+   stm\t%1,%N1,%S0
+   stmy\t%1,%N1,%S0
+   #
+   #
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   #"
+  [(set_attr "op_type" "RS,RSY,RS,RSY,*,*,RR,RX,RXY,RX,RXY,*")
+   (set_attr "type" "lm,lm,stm,stm,*,*,floaddf,floaddf,floaddf,fstoredf,fstoredf,*")
+   (set_attr "cpu_facility" "*,*,*,*,*,*,*,*,*,*,*,z10")])
+
+; For a load from a symbol ref we can use one of the target registers
+; together with larl to load the address.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "!TARGET_ZARCH && reload_completed && TARGET_Z10
+   && larl_operand (XEXP (operands[1], 0), SImode)"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (match_dup 1))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, DImode);
+  operands[3] = XEXP (operands[1], 0);
+  operands[1] = replace_equiv_address (operands[1], operands[2]);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], DImode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, DImode);
+  operands[3] = operand_subword (operands[0], 1, 0, DImode);
+  operands[4] = operand_subword (operands[1], 0, 0, DImode);
+  operands[5] = operand_subword (operands[1], 1, 0, DImode);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (match_operand:DI 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], DImode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, DImode);
+  operands[3] = operand_subword (operands[0], 0, 0, DImode);
+  operands[4] = operand_subword (operands[1], 1, 0, DImode);
+  operands[5] = operand_subword (operands[1], 0, 0, DImode);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && !FP_REG_P (operands[0])
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, DImode);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+        (mem:DI (match_operand 1 "address_operand" "")))]
+  "TARGET_ZARCH
+   && !FP_REG_P (operands[0])
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == DImode
+   && legitimate_reload_constant_p (get_pool_constant (operands[1]))"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+(define_insn "*la_64"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (match_operand:QI 1 "address_operand" "ZQZR,ZSZT"))]
+  "TARGET_64BIT"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "type"    "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+          (match_operand:QI 1 "address_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_64BIT
+   && preferred_la_operand_p (operands[1], const0_rtx)"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))
+   (parallel
+    [(set (match_dup 0)
+          (plus:DI (match_dup 0)
+                   (match_operand:DI 2 "nonmemory_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_64BIT
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && preferred_la_operand_p (operands[1], operands[2])"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
+;
+; movsi instruction pattern(s).
+;
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+        (match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Handle symbolic constants.  */
+  if (!TARGET_64BIT
+      && (SYMBOLIC_CONST (operands[1])
+	  || (GET_CODE (operands[1]) == PLUS
+	      && XEXP (operands[1], 0) == pic_offset_table_rtx
+	      && SYMBOLIC_CONST (XEXP(operands[1], 1)))))
+    emit_symbolic_move (operands);
+})
+
+(define_insn "*movsi_larl"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (match_operand:SI 1 "larl_operand" "X"))]
+  "!TARGET_64BIT && TARGET_CPU_ZARCH
+   && !FP_REG_P (operands[0])"
+  "larl\t%0,%1"
+   [(set_attr "op_type" "RIL")
+    (set_attr "type"    "larl")
+    (set_attr "z10prop" "z10_fwd_A1")])
+
+(define_insn "*movsi_zarch"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+			    "=d,d,d,d,d,d,d,d,d,R,T,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t")
+        (match_operand:SI 1 "general_operand"
+			    "K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d,*f,R,T,*f,*f,t,d,t,d,K,Q"))]
+  "TARGET_ZARCH"
+  "@
+   lhi\t%0,%h1
+   llilh\t%0,%i1
+   llill\t%0,%i1
+   iilf\t%0,%o1
+   lay\t%0,%a1
+   lrl\t%0,%1
+   lr\t%0,%1
+   l\t%0,%1
+   ly\t%0,%1
+   st\t%1,%0
+   sty\t%1,%0
+   ler\t%0,%1
+   le\t%0,%1
+   ley\t%0,%1
+   ste\t%1,%0
+   stey\t%1,%0
+   ear\t%0,%1
+   sar\t%0,%1
+   stam\t%1,%1,%S0
+   strl\t%1,%0
+   mvhi\t%0,%1
+   lam\t%0,%0,%S1"
+  [(set_attr "op_type" "RI,RI,RI,RIL,RXY,RIL,RR,RX,RXY,RX,RXY,
+                        RR,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS")
+   (set_attr "type" "*,
+                     *,
+                     *,
+                     *,
+                     la,
+                     larl,
+                     lr,
+                     load,
+                     load,
+                     store,
+                     store,
+                     floadsf,
+                     floadsf,
+                     floadsf,
+                     fstoresf,
+                     fstoresf,
+                     *,
+                     *,
+                     *,
+                     larl,
+                     *,
+                     *")
+   (set_attr "cpu_facility" "*,*,*,extimm,longdisp,z10,*,*,longdisp,*,longdisp,
+                             *,*,longdisp,*,longdisp,*,*,*,z10,z10,*")
+   (set_attr "z10prop" "z10_fwd_A1,
+                        z10_fwd_E1,
+                        z10_fwd_E1,
+                        z10_fwd_A1,
+                        z10_fwd_A1,
+                        z10_fwd_A3,
+                        z10_fr_E1,
+                        z10_fwd_A3,
+                        z10_fwd_A3,
+                        z10_rec,
+                        z10_rec,
+                        *,
+                        *,
+                        *,
+                        *,
+                        *,
+                        z10_super_E1,
+                        z10_super,
+                        *,
+                        z10_rec,
+                        z10_super,
+                        *")])
+
+(define_insn "*movsi_esa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!R,d,t,Q,t")
+        (match_operand:SI 1 "general_operand" "K,d,R,d,*f,R,*f,t,d,t,Q"))]
+  "!TARGET_ZARCH"
+  "@
+   lhi\t%0,%h1
+   lr\t%0,%1
+   l\t%0,%1
+   st\t%1,%0
+   ler\t%0,%1
+   le\t%0,%1
+   ste\t%1,%0
+   ear\t%0,%1
+   sar\t%0,%1
+   stam\t%1,%1,%S0
+   lam\t%0,%0,%S1"
+  [(set_attr "op_type" "RI,RR,RX,RX,RR,RX,RX,RRE,RRE,RS,RS")
+   (set_attr "type" "*,lr,load,store,floadsf,floadsf,fstoresf,*,*,*,*")
+   (set_attr "z10prop" "z10_fwd_A1,
+                        z10_fr_E1,
+                        z10_fwd_A3,
+                        z10_rec,
+                        *,
+                        *,
+                        *,
+                        z10_super_E1,
+                        z10_super,
+                        *,
+                        *")
+])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (mem:SI (match_operand 1 "address_operand" "")))]
+  "!FP_REG_P (operands[0])
+   && GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == SImode
+   && legitimate_reload_constant_p (get_pool_constant (operands[1]))"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+(define_insn "*la_31"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (match_operand:QI 1 "address_operand" "ZQZR,ZSZT"))]
+  "!TARGET_64BIT && legitimate_la_operand_p (operands[1])"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "type"     "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+          (match_operand:QI 1 "address_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "!TARGET_64BIT
+   && preferred_la_operand_p (operands[1], const0_rtx)"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel
+    [(set (match_dup 0)
+          (plus:SI (match_dup 0)
+                   (match_operand:SI 2 "nonmemory_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "!TARGET_64BIT
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && preferred_la_operand_p (operands[1], operands[2])"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_insn "*la_31_and"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (and:SI (match_operand:QI 1 "address_operand" "ZQZR,ZSZT")
+                (const_int 2147483647)))]
+  "!TARGET_64BIT"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "type"     "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+(define_insn_and_split "*la_31_and_cc"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (and:SI (match_operand:QI 1 "address_operand" "p")
+                (const_int 2147483647)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (and:SI (match_dup 1) (const_int 2147483647)))]
+  ""
+  [(set_attr "op_type"  "RX")
+   (set_attr "type"     "la")])
+
+(define_insn "force_la_31"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (match_operand:QI 1 "address_operand" "ZQZR,ZSZT"))
+   (use (const_int 0))]
+  "!TARGET_64BIT"
+  "@
+   la\t%0,%a1
+   lay\t%0,%a1"
+  [(set_attr "op_type"  "RX")
+   (set_attr "type"     "la")
+   (set_attr "z10prop" "z10_fwd_A1,z10_fwd_A1")])
+
+;
+; movhi instruction pattern(s).
+;
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+        (match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Make it explicit that loading a register from memory
+     always sign-extends (at least) to SImode.  */
+  if (optimize && can_create_pseudo_p ()
+      && register_operand (operands[0], VOIDmode)
+      && GET_CODE (operands[1]) == MEM)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      rtx ext = gen_rtx_SIGN_EXTEND (SImode, operands[1]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, ext));
+      operands[1] = gen_lowpart (HImode, tmp);
+    }
+})
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q")
+        (match_operand:HI 1 "general_operand"      " d,n,R,T,b,d,d,d,K"))]
+  ""
+  "@
+   lr\t%0,%1
+   lhi\t%0,%h1
+   lh\t%0,%1
+   lhy\t%0,%1
+   lhrl\t%0,%1
+   sth\t%1,%0
+   sthy\t%1,%0
+   sthrl\t%1,%0
+   mvhhi\t%0,%1"
+  [(set_attr "op_type"      "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL")
+   (set_attr "type"         "lr,*,*,*,larl,store,store,store,*")
+   (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10")
+   (set_attr "z10prop" "z10_fr_E1,
+                       z10_fwd_A1,
+                       z10_super_E1,
+                       z10_super_E1,
+                       z10_super_E1,
+                       z10_rec,
+                       z10_rec,
+                       z10_rec,
+                       z10_super")])
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+        (mem:HI (match_operand 1 "address_operand" "")))]
+  "GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == HImode
+   && GET_CODE (get_pool_constant (operands[1])) == CONST_INT"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+;
+; movqi instruction pattern(s).
+;
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* On z/Architecture, zero-extending from memory to register
+     is just as fast as a QImode load.  */
+  if (TARGET_ZARCH && optimize && can_create_pseudo_p ()
+      && register_operand (operands[0], VOIDmode)
+      && GET_CODE (operands[1]) == MEM)
+    {
+      rtx tmp = gen_reg_rtx (DImode);
+      rtx ext = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, ext));
+      operands[1] = gen_lowpart (QImode, tmp);
+    }
+})
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q")
+        (match_operand:QI 1 "general_operand"      " d,n,R,T,d,d,n,n,?Q"))]
+  ""
+  "@
+   lr\t%0,%1
+   lhi\t%0,%b1
+   ic\t%0,%1
+   icy\t%0,%1
+   stc\t%1,%0
+   stcy\t%1,%0
+   mvi\t%S0,%b1
+   mviy\t%S0,%b1
+   #"
+  [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS")
+   (set_attr "type" "lr,*,*,*,store,store,store,store,*")
+   (set_attr "z10prop" "z10_fr_E1,
+                        z10_fwd_A1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_rec,
+                        z10_rec,
+                        z10_super,
+                        z10_super,
+                        *")])
+
+(define_peephole2
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+        (mem:QI (match_operand 1 "address_operand" "")))]
+  "GET_CODE (operands[1]) == SYMBOL_REF
+   && CONSTANT_POOL_ADDRESS_P (operands[1])
+   && get_pool_mode (operands[1]) == QImode
+   && GET_CODE (get_pool_constant (operands[1])) == CONST_INT"
+  [(set (match_dup 0) (match_dup 2))]
+  "operands[2] = get_pool_constant (operands[1]);")
+
+;
+; movstrictqi instruction pattern(s).
+;
+
+(define_insn "*movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+d,d"))
+                         (match_operand:QI 1 "memory_operand" "R,T"))]
+  ""
+  "@
+   ic\t%0,%1
+   icy\t%0,%1"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; movstricthi instruction pattern(s).
+;
+
+(define_insn "*movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+d,d"))
+                         (match_operand:HI 1 "memory_operand" "Q,S"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   icm\t%0,3,%S1
+   icmy\t%0,3,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; movstrictsi instruction pattern(s).
+;
+
+(define_insn "movstrictsi"
+  [(set (strict_low_part (match_operand:SI 0 "register_operand" "+d,d,d,d"))
+                         (match_operand:SI 1 "general_operand" "d,R,T,t"))]
+  "TARGET_ZARCH"
+  "@
+   lr\t%0,%1
+   l\t%0,%1
+   ly\t%0,%1
+   ear\t%0,%1"
+  [(set_attr "op_type" "RR,RX,RXY,RRE")
+   (set_attr "type" "lr,load,load,*")
+   (set_attr "z10prop" "z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_super_E1")])
+
+;
+; mov(tf|td) instruction pattern(s).
+;
+
+(define_expand "mov<mode>"
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
+        (match_operand:TD_TF 1 "general_operand"      ""))]
+  ""
+  "")
+
+(define_insn "*mov<mode>_64"
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "=f,f,f,o, d,QS,  d,o")
+        (match_operand:TD_TF 1 "general_operand"      " G,f,o,f,QS, d,dRT,d"))]
+  "TARGET_ZARCH"
+  "@
+   lzxr\t%0
+   lxr\t%0,%1
+   #
+   #
+   lmg\t%0,%N0,%S1
+   stmg\t%1,%N1,%S0
+   #
+   #"
+  [(set_attr "op_type"      "RRE,RRE,*,*,RSY,RSY,*,*")
+   (set_attr "type"         "fsimptf,fsimptf,*,*,lm,stm,*,*")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*")])
+
+(define_insn "*mov<mode>_31"
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "=f,f,f,o")
+        (match_operand:TD_TF 1 "general_operand"      " G,f,o,f"))]
+  "!TARGET_ZARCH"
+  "@
+   lzxr\t%0
+   lxr\t%0,%1
+   #
+   #"
+  [(set_attr "op_type"      "RRE,RRE,*,*")
+   (set_attr "type"         "fsimptf,fsimptf,*,*")
+   (set_attr "cpu_facility" "z196,*,*,*")])
+
+; TFmode in GPRs splitters
+
+(define_split
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
+        (match_operand:TD_TF 1 "general_operand"      ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
+        (match_operand:TD_TF 1 "general_operand"      ""))]
+  "TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:TD_TF 0 "register_operand" "")
+        (match_operand:TD_TF 1 "memory_operand"   ""))]
+  "TARGET_ZARCH && reload_completed
+   && !FP_REG_P (operands[0])
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, <MODE>mode);
+  addr = gen_lowpart (Pmode, addr);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+; TFmode in BFPs splitters
+
+(define_split
+  [(set (match_operand:TD_TF 0 "register_operand" "")
+        (match_operand:TD_TF 1 "memory_operand" ""))]
+  "reload_completed && offsettable_memref_p (operands[1])
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = simplify_gen_subreg (<HALF_TMODE>mode, operands[0],
+                                     <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (<HALF_TMODE>mode, operands[0],
+                                     <MODE>mode, 8);
+  operands[4] = adjust_address_nv (operands[1], <HALF_TMODE>mode, 0);
+  operands[5] = adjust_address_nv (operands[1], <HALF_TMODE>mode, 8);
+})
+
+(define_split
+  [(set (match_operand:TD_TF 0 "memory_operand" "")
+        (match_operand:TD_TF 1 "register_operand" ""))]
+  "reload_completed && offsettable_memref_p (operands[0])
+   && FP_REG_P (operands[1])"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = adjust_address_nv (operands[0], <HALF_TMODE>mode, 0);
+  operands[3] = adjust_address_nv (operands[0], <HALF_TMODE>mode, 8);
+  operands[4] = simplify_gen_subreg (<HALF_TMODE>mode, operands[1],
+				     <MODE>mode, 0);
+  operands[5] = simplify_gen_subreg (<HALF_TMODE>mode, operands[1],
+                                     <MODE>mode, 8);
+})
+
+;
+; mov(df|dd) instruction pattern(s).
+;
+
+(define_expand "mov<mode>"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
+        (match_operand:DD_DF 1 "general_operand"  ""))]
+  ""
+  "")
+
+(define_insn "*mov<mode>_64dfp"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand"
+			       "=f,f,f,d,f,f,R,T,d,d, d,RT")
+        (match_operand:DD_DF 1 "general_operand"
+			       " G,f,d,f,R,T,f,f,G,d,RT, d"))]
+  "TARGET_DFP"
+  "@
+   lzdr\t%0
+   ldr\t%0,%1
+   ldgr\t%0,%1
+   lgdr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   lghi\t%0,0
+   lgr\t%0,%1
+   lg\t%0,%1
+   stg\t%1,%0"
+  [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
+   (set_attr "type" "fsimpdf,floaddf,floaddf,floaddf,floaddf,floaddf,
+                     fstoredf,fstoredf,*,lr,load,store")
+   (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+
+(define_insn "*mov<mode>_64"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d, d,RT")
+        (match_operand:DD_DF 1 "general_operand"      " G,f,R,T,f,f,G,d,RT, d"))]
+  "TARGET_ZARCH"
+  "@
+   lzdr\t%0
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   lghi\t%0,0
+   lgr\t%0,%1
+   lg\t%0,%1
+   stg\t%1,%0"
+  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
+   (set_attr "type"    "fsimpdf,fload<mode>,fload<mode>,fload<mode>,
+                        fstore<mode>,fstore<mode>,*,lr,load,store")
+   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*")])
+
+(define_insn "*mov<mode>_31"
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand"
+                               "=f,f,f,f,R,T,d,d,Q,S,   d,o")
+        (match_operand:DD_DF 1 "general_operand"
+                               " G,f,R,T,f,f,Q,S,d,d,dPRT,d"))]
+  "!TARGET_ZARCH"
+  "@
+   lzdr\t%0
+   ldr\t%0,%1
+   ld\t%0,%1
+   ldy\t%0,%1
+   std\t%1,%0
+   stdy\t%1,%0
+   lm\t%0,%N0,%S1
+   lmy\t%0,%N0,%S1
+   stm\t%1,%N1,%S0
+   stmy\t%1,%N1,%S0
+   #
+   #"
+  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RS,RSY,RS,RSY,*,*")
+   (set_attr "type"    "fsimpdf,fload<mode>,fload<mode>,fload<mode>,
+                        fstore<mode>,fstore<mode>,lm,lm,stm,stm,*,*")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+
+(define_split
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
+        (match_operand:DD_DF 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
+        (match_operand:DD_DF 1 "general_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+{
+  operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode);
+  operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode);
+  operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode);
+})
+
+(define_split
+  [(set (match_operand:DD_DF 0 "register_operand" "")
+        (match_operand:DD_DF 1 "memory_operand" ""))]
+  "!TARGET_ZARCH && reload_completed
+   && !FP_REG_P (operands[0])
+   && !s_operand (operands[1], VOIDmode)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx addr = operand_subword (operands[0], 1, 0, <MODE>mode);
+  s390_load_address (addr, XEXP (operands[1], 0));
+  operands[1] = replace_equiv_address (operands[1], addr);
+})
+
+;
+; mov(sf|sd) instruction pattern(s).
+;
+
+(define_insn "mov<mode>"
+  [(set (match_operand:SD_SF 0 "nonimmediate_operand"
+			       "=f,f,f,f,R,T,d,d,d,d,R,T")
+        (match_operand:SD_SF 1 "general_operand"
+			       " G,f,R,T,f,f,G,d,R,T,d,d"))]
+  ""
+  "@
+   lzer\t%0
+   ler\t%0,%1
+   le\t%0,%1
+   ley\t%0,%1
+   ste\t%1,%0
+   stey\t%1,%0
+   lhi\t%0,0
+   lr\t%0,%1
+   l\t%0,%1
+   ly\t%0,%1
+   st\t%1,%0
+   sty\t%1,%0"
+  [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RR,RX,RXY,RX,RXY")
+   (set_attr "type"    "fsimpsf,fload<mode>,fload<mode>,fload<mode>,
+                        fstore<mode>,fstore<mode>,*,lr,load,load,store,store")
+   (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec")
+   (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+
+;
+; movcc instruction pattern
+;
+
+(define_insn "movcc"
+  [(set (match_operand:CC 0 "nonimmediate_operand" "=d,c,d,d,d,R,T")
+	(match_operand:CC 1 "nonimmediate_operand" "d,d,c,R,T,d,d"))]
+  ""
+  "@
+   lr\t%0,%1
+   tmh\t%1,12288
+   ipm\t%0
+   st\t%0,%1
+   sty\t%0,%1
+   l\t%1,%0
+   ly\t%1,%0"
+  [(set_attr "op_type" "RR,RI,RRE,RX,RXY,RX,RXY")
+   (set_attr "type" "lr,*,*,store,store,load,load")
+   (set_attr "z10prop" "z10_fr_E1,z10_super,*,z10_rec,z10_rec,z10_fwd_A3,z10_fwd_A3")
+   (set_attr "z196prop" "*,*,z196_ends,*,*,*,*")])
+
+;
+; Block move (MVC) patterns.
+;
+
+(define_insn "*mvc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (match_operand:BLK 1 "memory_operand" "Q"))
+   (use (match_operand 2 "const_int_operand" "n"))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "mvc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+; This splitter converts a QI to QI mode copy into a BLK mode copy in
+; order to have it implemented with mvc.
+
+(define_split
+  [(set (match_operand:QI 0 "memory_operand" "")
+        (match_operand:QI 1 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+{
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+     (use (match_operand 2 "const_int_operand" ""))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (match_operand:BLK 4 "memory_operand" ""))
+     (use (match_operand 5 "const_int_operand" ""))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (match_dup 7))
+     (use (match_dup 8))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+
+;
+; load_multiple pattern(s).
+;
+; ??? Due to reload problems with replacing registers inside match_parallel
+; we currently support load_multiple/store_multiple only after reload.
+;
+
+(define_expand "load_multiple"
+  [(match_par_dup 3 [(set (match_operand 0 "" "")
+			  (match_operand 1 "" ""))
+		     (use (match_operand 2 "" ""))])]
+  "reload_completed"
+{
+  enum machine_mode mode;
+  int regno;
+  int count;
+  rtx from;
+  int i, off;
+
+  /* Support only loading a constant number of fixed-point registers from
+     memory and only bother with this if more than two */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || INTVAL (operands[2]) > 16
+      || GET_CODE (operands[1]) != MEM
+      || GET_CODE (operands[0]) != REG
+      || REGNO (operands[0]) >= 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[0]);
+  mode = GET_MODE (operands[0]);
+  if (mode != SImode && (!TARGET_ZARCH || mode != DImode))
+    FAIL;
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+  if (!can_create_pseudo_p ())
+    {
+      if (GET_CODE (XEXP (operands[1], 0)) == REG)
+	{
+	  from = XEXP (operands[1], 0);
+	  off = 0;
+	}
+      else if (GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == REG
+	       && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT)
+	{
+	  from = XEXP (XEXP (operands[1], 0), 0);
+	  off = INTVAL (XEXP (XEXP (operands[1], 0), 1));
+	}
+      else
+	FAIL;
+    }
+  else
+    {
+      from = force_reg (Pmode, XEXP (operands[1], 0));
+      off = 0;
+    }
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, regno + i),
+		     change_address (operands[1], mode,
+		       plus_constant (from, off + i * GET_MODE_SIZE (mode))));
+})
+
+(define_insn "*load_multiple_di"
+  [(match_parallel 0 "load_multiple_operation"
+		   [(set (match_operand:DI 1 "register_operand" "=r")
+			 (match_operand:DI 2 "s_operand" "QS"))])]
+  "reload_completed && TARGET_ZARCH"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[1]) + words - 1);
+  return "lmg\t%1,%0,%S2";
+}
+   [(set_attr "op_type" "RSY")
+    (set_attr "type"    "lm")])
+
+(define_insn "*load_multiple_si"
+  [(match_parallel 0 "load_multiple_operation"
+		   [(set (match_operand:SI 1 "register_operand" "=r,r")
+			 (match_operand:SI 2 "s_operand" "Q,S"))])]
+  "reload_completed"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (SImode, REGNO (operands[1]) + words - 1);
+  return which_alternative == 0 ? "lm\t%1,%0,%S2" : "lmy\t%1,%0,%S2";
+}
+   [(set_attr "op_type" "RS,RSY")
+    (set_attr "type"    "lm")])
+
+;
+; store multiple pattern(s).
+;
+
+(define_expand "store_multiple"
+  [(match_par_dup 3 [(set (match_operand 0 "" "")
+			  (match_operand 1 "" ""))
+		     (use (match_operand 2 "" ""))])]
+  "reload_completed"
+{
+  enum machine_mode mode;
+  int regno;
+  int count;
+  rtx to;
+  int i, off;
+
+  /* Support only storing a constant number of fixed-point registers to
+     memory and only bother with this if more than two.  */
+  if (GET_CODE (operands[2]) != CONST_INT
+      || INTVAL (operands[2]) < 2
+      || INTVAL (operands[2]) > 16
+      || GET_CODE (operands[0]) != MEM
+      || GET_CODE (operands[1]) != REG
+      || REGNO (operands[1]) >= 16)
+    FAIL;
+
+  count = INTVAL (operands[2]);
+  regno = REGNO (operands[1]);
+  mode = GET_MODE (operands[1]);
+  if (mode != SImode && (!TARGET_ZARCH || mode != DImode))
+    FAIL;
+
+  operands[3] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
+
+  if (!can_create_pseudo_p ())
+    {
+      if (GET_CODE (XEXP (operands[0], 0)) == REG)
+	{
+	  to = XEXP (operands[0], 0);
+	  off = 0;
+	}
+      else if (GET_CODE (XEXP (operands[0], 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (operands[0], 0), 0)) == REG
+	       && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == CONST_INT)
+	{
+	  to = XEXP (XEXP (operands[0], 0), 0);
+	  off = INTVAL (XEXP (XEXP (operands[0], 0), 1));
+	}
+      else
+	FAIL;
+    }
+  else
+    {
+      to = force_reg (Pmode, XEXP (operands[0], 0));
+      off = 0;
+    }
+
+  for (i = 0; i < count; i++)
+    XVECEXP (operands[3], 0, i)
+      = gen_rtx_SET (VOIDmode,
+		     change_address (operands[0], mode,
+		       plus_constant (to, off + i * GET_MODE_SIZE (mode))),
+		     gen_rtx_REG (mode, regno + i));
+})
+
+(define_insn "*store_multiple_di"
+  [(match_parallel 0 "store_multiple_operation"
+		   [(set (match_operand:DI 1 "s_operand" "=QS")
+			 (match_operand:DI 2 "register_operand" "r"))])]
+  "reload_completed && TARGET_ZARCH"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[2]) + words - 1);
+  return "stmg\t%2,%0,%S1";
+}
+   [(set_attr "op_type" "RSY")
+    (set_attr "type"    "stm")])
+
+
+(define_insn "*store_multiple_si"
+  [(match_parallel 0 "store_multiple_operation"
+		   [(set (match_operand:SI 1 "s_operand" "=Q,S")
+			 (match_operand:SI 2 "register_operand" "r,r"))])]
+  "reload_completed"
+{
+  int words = XVECLEN (operands[0], 0);
+  operands[0] = gen_rtx_REG (SImode, REGNO (operands[2]) + words - 1);
+  return which_alternative == 0 ? "stm\t%2,%0,%S1" : "stmy\t%2,%0,%S1";
+}
+   [(set_attr "op_type" "RS,RSY")
+    (set_attr "type"    "stm")])
+
+;;
+;; String instructions.
+;;
+
+(define_insn "*execute_rl"
+  [(match_parallel 0 ""
+    [(unspec [(match_operand 1    "register_operand" "a")
+	      (match_operand 2    "" "")
+              (match_operand:SI 3 "larl_operand" "X")] UNSPEC_EXECUTE)])]
+  "TARGET_Z10 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[1])) <= UNITS_PER_WORD"
+  "exrl\t%1,%3"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "cs")])
+
+(define_insn "*execute"
+  [(match_parallel 0 ""
+    [(unspec [(match_operand 1 "register_operand" "a")
+              (match_operand:BLK 2 "memory_operand" "R")
+              (match_operand 3 "" "")] UNSPEC_EXECUTE)])]
+  "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[1])) <= UNITS_PER_WORD"
+  "ex\t%1,%2"
+  [(set_attr "op_type" "RX")
+   (set_attr "type" "cs")])
+
+
+;
+; strlenM instruction pattern(s).
+;
+
+(define_expand "strlen<mode>"
+  [(set (reg:SI 0) (match_operand:SI 2 "immediate_operand" ""))
+   (parallel
+    [(set (match_dup 4)
+	  (unspec:P [(const_int 0)
+		      (match_operand:BLK 1 "memory_operand" "")
+		      (reg:SI 0)
+		      (match_operand 3 "immediate_operand" "")] UNSPEC_SRST))
+     (clobber (scratch:P))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:P 0 "register_operand" "")
+          (minus:P (match_dup 4) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  operands[4] = gen_reg_rtx (Pmode);
+  operands[5] = gen_reg_rtx (Pmode);
+  emit_move_insn (operands[5], force_operand (XEXP (operands[1], 0), NULL_RTX));
+  operands[1] = replace_equiv_address (operands[1], operands[5]);
+})
+
+(define_insn "*strlen<mode>"
+  [(set (match_operand:P 0 "register_operand" "=a")
+	(unspec:P [(match_operand:P 2 "general_operand" "0")
+		    (mem:BLK (match_operand:P 3 "register_operand" "1"))
+		    (reg:SI 0)
+		    (match_operand 4 "immediate_operand" "")] UNSPEC_SRST))
+   (clobber (match_scratch:P 1 "=a"))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "srst\t%0,%1\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+;
+; cmpstrM instruction pattern(s).
+;
+
+(define_expand "cmpstrsi"
+  [(set (reg:SI 0) (const_int 0))
+   (parallel
+    [(clobber (match_operand 3 "" ""))
+     (clobber (match_dup 4))
+     (set (reg:CCU CC_REGNUM)
+	  (compare:CCU (match_operand:BLK 1 "memory_operand" "")
+	 	       (match_operand:BLK 2 "memory_operand" "")))
+     (use (reg:SI 0))])
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+	  (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_CCU_TO_INT))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  /* As the result of CMPINT is inverted compared to what we need,
+     we have to swap the operands.  */
+  rtx op1 = operands[2];
+  rtx op2 = operands[1];
+  rtx addr1 = gen_reg_rtx (Pmode);
+  rtx addr2 = gen_reg_rtx (Pmode);
+
+  emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
+  emit_move_insn (addr2, force_operand (XEXP (op2, 0), NULL_RTX));
+  operands[1] = replace_equiv_address_nv (op1, addr1);
+  operands[2] = replace_equiv_address_nv (op2, addr2);
+  operands[3] = addr1;
+  operands[4] = addr2;
+})
+
+(define_insn "*cmpstr<mode>"
+  [(clobber (match_operand:P 0 "register_operand" "=d"))
+   (clobber (match_operand:P 1 "register_operand" "=d"))
+   (set (reg:CCU CC_REGNUM)
+	(compare:CCU (mem:BLK (match_operand:P 2 "register_operand" "0"))
+		     (mem:BLK (match_operand:P 3 "register_operand" "1"))))
+   (use (reg:SI 0))]
+  ""
+  "clst\t%0,%1\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+;
+; movstr instruction pattern.
+;
+
+(define_expand "movstr"
+  [(set (reg:SI 0) (const_int 0))
+   (parallel
+    [(clobber (match_dup 3))
+     (set (match_operand:BLK 1 "memory_operand" "")
+	  (match_operand:BLK 2 "memory_operand" ""))
+     (set (match_operand 0 "register_operand" "")
+	  (unspec [(match_dup 1)
+		   (match_dup 2)
+		   (reg:SI 0)] UNSPEC_MVST))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  rtx addr1 = gen_reg_rtx (Pmode);
+  rtx addr2 = gen_reg_rtx (Pmode);
+
+  emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+  emit_move_insn (addr2, force_operand (XEXP (operands[2], 0), NULL_RTX));
+  operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  operands[2] = replace_equiv_address_nv (operands[2], addr2);
+  operands[3] = addr2;
+})
+
+(define_insn "*movstr"
+  [(clobber (match_operand:P 2 "register_operand" "=d"))
+   (set (mem:BLK (match_operand:P 1 "register_operand" "0"))
+	(mem:BLK (match_operand:P 3 "register_operand" "2")))
+   (set (match_operand:P 0 "register_operand" "=d")
+	(unspec [(mem:BLK (match_dup 1))
+		 (mem:BLK (match_dup 3))
+		 (reg:SI 0)] UNSPEC_MVST))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "mvst\t%1,%2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+
+;
+; movmemM instruction pattern(s).
+;
+
+(define_expand "movmem<mode>"
+  [(set (match_operand:BLK 0 "memory_operand" "")   ; destination
+        (match_operand:BLK 1 "memory_operand" ""))  ; source
+   (use (match_operand:GPR 2 "general_operand" "")) ; count
+   (match_operand 3 "" "")]
+  ""
+  "s390_expand_movmem (operands[0], operands[1], operands[2]); DONE;")
+
+; Move a block that is up to 256 bytes in length.
+; The block length is taken as (operands[2] % 256) + 1.
+
+(define_expand "movmem_short"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+     (use (match_operand 2 "nonmemory_operand" ""))
+     (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+     (clobber (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_SCRATCH (Pmode);")
+
+(define_insn "*movmem_short"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q,Q,Q,Q")
+        (match_operand:BLK 1 "memory_operand" "Q,Q,Q,Q"))
+   (use (match_operand 2 "nonmemory_operand" "n,a,a,a"))
+   (use (match_operand 3 "immediate_operand" "X,R,X,X"))
+   (clobber (match_scratch 4 "=X,X,X,&a"))]
+  "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)
+   && GET_MODE (operands[4]) == Pmode"
+  "#"
+  [(set_attr "type"         "cs")
+   (set_attr "cpu_facility" "*,*,z10,*")])
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "const_int_operand" ""))
+   (use (match_operand 3 "immediate_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (match_dup 1))
+     (use (match_dup 2))])]
+  "operands[2] = GEN_INT ((INTVAL (operands[2]) & 0xff) + 1);")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "register_operand" ""))
+   (use (match_operand 3 "memory_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (match_dup 3)
+              (const_int 0)] UNSPEC_EXECUTE)
+     (set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+  "")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (scratch))]
+  "TARGET_Z10 && reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (const_int 0)
+              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+  "operands[3] = gen_label_rtx ();")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:BLK 1 "memory_operand" ""))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (match_operand 3 "register_operand" ""))]
+  "reload_completed && TARGET_CPU_ZARCH"
+  [(set (match_dup 3) (label_ref (match_dup 4)))
+   (parallel
+    [(unspec [(match_dup 2) (mem:BLK (match_dup 3))
+              (label_ref (match_dup 4))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (match_dup 1))
+     (use (const_int 1))])]
+  "operands[4] = gen_label_rtx ();")
+
+; Move a block of arbitrary length.
+
+(define_expand "movmem_long"
+  [(parallel
+    [(clobber (match_dup 2))
+     (clobber (match_dup 3))
+     (set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand:BLK 1 "memory_operand" ""))
+     (use (match_operand 2 "general_operand" ""))
+     (use (match_dup 3))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  enum machine_mode sreg_mode = TARGET_ZARCH ? DImode : SImode;
+  enum machine_mode dreg_mode = TARGET_ZARCH ? TImode : DImode;
+  rtx reg0 = gen_reg_rtx (dreg_mode);
+  rtx reg1 = gen_reg_rtx (dreg_mode);
+  rtx addr0 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg0));
+  rtx addr1 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg1));
+  rtx len0 = gen_lowpart (Pmode, reg0);
+  rtx len1 = gen_lowpart (Pmode, reg1);
+
+  emit_clobber (reg0);
+  emit_move_insn (addr0, force_operand (XEXP (operands[0], 0), NULL_RTX));
+  emit_move_insn (len0, operands[2]);
+
+  emit_clobber (reg1);
+  emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+  emit_move_insn (len1, operands[2]);
+
+  operands[0] = replace_equiv_address_nv (operands[0], addr0);
+  operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  operands[2] = reg0;
+  operands[3] = reg1;
+})
+
+(define_insn "*movmem_long"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (clobber (match_operand:<DBL> 1 "register_operand" "=d"))
+   (set (mem:BLK (subreg:P (match_operand:<DBL> 2 "register_operand" "0") 0))
+        (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "1") 0)))
+   (use (match_dup 2))
+   (use (match_dup 3))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_64BIT || !TARGET_ZARCH"
+  "mvcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*movmem_long_31z"
+  [(clobber (match_operand:TI 0 "register_operand" "=d"))
+   (clobber (match_operand:TI 1 "register_operand" "=d"))
+   (set (mem:BLK (subreg:SI (match_operand:TI 2 "register_operand" "0") 4))
+        (mem:BLK (subreg:SI (match_operand:TI 3 "register_operand" "1") 4)))
+   (use (match_dup 2))
+   (use (match_dup 3))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT && TARGET_ZARCH"
+  "mvcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+
+;
+; Test data class.
+;
+
+(define_expand "signbit<mode>2"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
+                     (match_dup 2)]
+                     UNSPEC_TDC_INSN))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET);
+})
+
+(define_expand "isinf<mode>2"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
+                     (match_dup 2)]
+                     UNSPEC_TDC_INSN))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))]
+  "TARGET_HARD_FLOAT"
+{
+  operands[2] = GEN_INT (S390_TDC_INFINITY);
+})
+
+; This insn is used to generate all variants of the Test Data Class
+; instruction, namely tcxb, tcdb, and tceb.  The insn's first operand
+; is the register to be tested and the second one is the bit mask
+; specifying the required test(s).
+;
+(define_insn "*TDC_insn_<mode>"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:FP_ALL 0 "register_operand" "f")
+                     (match_operand:SI 1 "const_int_operand")] UNSPEC_TDC_INSN))]
+  "TARGET_HARD_FLOAT"
+  "t<_d>c<xde><bt>\t%0,%1"
+   [(set_attr "op_type" "RXE")
+    (set_attr "type"  "fsimp<mode>")])
+
+(define_insn_and_split "*ccz_to_int"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand:CCZ 1 "register_operand" "0")]
+                   UNSPEC_CCZ_TO_INT))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))])
+
+
+;
+; setmemM instruction pattern(s).
+;
+
+(define_expand "setmem<mode>"
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (match_operand:QI 2 "general_operand" ""))
+   (use (match_operand:GPR 1 "general_operand" ""))
+   (match_operand 3 "" "")]
+  ""
+  "s390_expand_setmem (operands[0], operands[1], operands[2]); DONE;")
+
+; Clear a block that is up to 256 bytes in length.
+; The block length is taken as (operands[1] % 256) + 1.
+
+(define_expand "clrmem_short"
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (const_int 0))
+     (use (match_operand 1 "nonmemory_operand" ""))
+     (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+     (clobber (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "operands[2] = gen_rtx_SCRATCH (Pmode);")
+
+(define_insn "*clrmem_short"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q,Q,Q,Q")
+        (const_int 0))
+   (use (match_operand 1 "nonmemory_operand" "n,a,a,a"))
+   (use (match_operand 2 "immediate_operand" "X,R,X,X"))
+   (clobber (match_scratch 3 "=X,X,X,&a"))
+   (clobber (reg:CC CC_REGNUM))]
+  "(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)
+   && GET_MODE (operands[3]) == Pmode"
+  "#"
+  [(set_attr "type" "cs")
+   (set_attr "cpu_facility" "*,*,z10,*")])
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "const_int_operand" ""))
+   (use (match_operand 2 "immediate_operand" ""))
+   (clobber (scratch))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (const_int 0))
+     (use (match_dup 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[1] = GEN_INT ((INTVAL (operands[1]) & 0xff) + 1);")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "register_operand" ""))
+   (use (match_operand 2 "memory_operand" ""))
+   (clobber (scratch))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(unspec [(match_dup 1) (match_dup 2)
+              (const_int 0)] UNSPEC_EXECUTE)
+     (set (match_dup 0) (const_int 0))
+     (use (const_int 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (scratch))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && reload_completed"
+  [(parallel
+    [(unspec [(match_dup 1) (const_int 0)
+              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (const_int 0))
+     (use (const_int 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = gen_label_rtx ();")
+
+(define_split
+  [(set (match_operand:BLK 0 "memory_operand" "")
+        (const_int 0))
+   (use (match_operand 1 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (match_operand 2 "register_operand" ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed && TARGET_CPU_ZARCH"
+  [(set (match_dup 2) (label_ref (match_dup 3)))
+   (parallel
+    [(unspec [(match_dup 1) (mem:BLK (match_dup 2))
+              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+     (set (match_dup 0) (const_int 0))
+     (use (const_int 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = gen_label_rtx ();")
+
+; Initialize a block of arbitrary length with (operands[2] % 256).
+
+(define_expand "setmem_long"
+  [(parallel
+    [(clobber (match_dup 1))
+     (set (match_operand:BLK 0 "memory_operand" "")
+          (match_operand 2 "shift_count_or_setmem_operand" ""))
+     (use (match_operand 1 "general_operand" ""))
+     (use (match_dup 3))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+{
+  enum machine_mode sreg_mode = TARGET_ZARCH ? DImode : SImode;
+  enum machine_mode dreg_mode = TARGET_ZARCH ? TImode : DImode;
+  rtx reg0 = gen_reg_rtx (dreg_mode);
+  rtx reg1 = gen_reg_rtx (dreg_mode);
+  rtx addr0 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg0));
+  rtx len0 = gen_lowpart (Pmode, reg0);
+
+  emit_clobber (reg0);
+  emit_move_insn (addr0, force_operand (XEXP (operands[0], 0), NULL_RTX));
+  emit_move_insn (len0, operands[1]);
+
+  emit_move_insn (reg1, const0_rtx);
+
+  operands[0] = replace_equiv_address_nv (operands[0], addr0);
+  operands[1] = reg0;
+  operands[3] = reg1;
+})
+
+(define_insn "*setmem_long"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (set (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "0") 0))
+        (match_operand 2 "shift_count_or_setmem_operand" "Y"))
+   (use (match_dup 3))
+   (use (match_operand:<DBL> 1 "register_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_64BIT || !TARGET_ZARCH"
+  "mvcle\t%0,%1,%Y2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*setmem_long_and"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (set (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "0") 0))
+        (and (match_operand 2 "shift_count_or_setmem_operand" "Y")
+	     (match_operand 4 "const_int_operand"             "n")))
+   (use (match_dup 3))
+   (use (match_operand:<DBL> 1 "register_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_64BIT || !TARGET_ZARCH) &&
+   (INTVAL (operands[4]) & 255) == 255"
+  "mvcle\t%0,%1,%Y2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*setmem_long_31z"
+  [(clobber (match_operand:TI 0 "register_operand" "=d"))
+   (set (mem:BLK (subreg:SI (match_operand:TI 3 "register_operand" "0") 4))
+        (match_operand 2 "shift_count_or_setmem_operand" "Y"))
+   (use (match_dup 3))
+   (use (match_operand:TI 1 "register_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT && TARGET_ZARCH"
+  "mvcle\t%0,%1,%Y2\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+;
+; cmpmemM instruction pattern(s).
+;
+
+(define_expand "cmpmemsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (compare:SI (match_operand:BLK 1 "memory_operand" "")
+                    (match_operand:BLK 2 "memory_operand" "") ) )
+   (use (match_operand:SI 3 "general_operand" ""))
+   (use (match_operand:SI 4 "" ""))]
+  ""
+  "s390_expand_cmpmem (operands[0], operands[1],
+                       operands[2], operands[3]); DONE;")
+
+; Compare a block that is up to 256 bytes in length.
+; The block length is taken as (operands[2] % 256) + 1.
+
+(define_expand "cmpmem_short"
+  [(parallel
+    [(set (reg:CCU CC_REGNUM)
+          (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                       (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "nonmemory_operand" ""))
+     (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+     (clobber (match_dup 3))])]
+  ""
+  "operands[3] = gen_rtx_SCRATCH (Pmode);")
+
+(define_insn "*cmpmem_short"
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "Q,Q,Q,Q")
+                     (match_operand:BLK 1 "memory_operand" "Q,Q,Q,Q")))
+   (use (match_operand 2 "nonmemory_operand" "n,a,a,a"))
+   (use (match_operand 3 "immediate_operand" "X,R,X,X"))
+   (clobber (match_scratch 4 "=X,X,X,&a"))]
+  "(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)
+   && GET_MODE (operands[4]) == Pmode"
+  "#"
+  [(set_attr "type" "cs")
+   (set_attr "cpu_facility" "*,*,z10,*")])
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "const_int_operand" ""))
+   (use (match_operand 3 "immediate_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))])]
+  "operands[2] = GEN_INT ((INTVAL (operands[2]) & 0xff) + 1);")
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "register_operand" ""))
+   (use (match_operand 3 "memory_operand" ""))
+   (clobber (scratch))]
+  "reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (match_dup 3)
+              (const_int 0)] UNSPEC_EXECUTE)
+     (set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (const_int 1))])]
+  "")
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (scratch))]
+  "TARGET_Z10 && reload_completed"
+  [(parallel
+    [(unspec [(match_dup 2) (const_int 0)
+              (label_ref (match_dup 4))] UNSPEC_EXECUTE)
+     (set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (const_int 1))])]
+  "operands[4] = gen_label_rtx ();")
+
+(define_split
+  [(set (reg:CCU CC_REGNUM)
+        (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                     (match_operand:BLK 1 "memory_operand" "")))
+   (use (match_operand 2 "register_operand" ""))
+   (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
+   (clobber (match_operand 3 "register_operand" ""))]
+  "reload_completed && TARGET_CPU_ZARCH"
+  [(set (match_dup 3) (label_ref (match_dup 4)))
+   (parallel
+    [(unspec [(match_dup 2) (mem:BLK (match_dup 3))
+              (label_ref (match_dup 4))] UNSPEC_EXECUTE)
+     (set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
+     (use (const_int 1))])]
+  "operands[4] = gen_label_rtx ();")
+
+; Compare a block of arbitrary length.
+
+(define_expand "cmpmem_long"
+  [(parallel
+    [(clobber (match_dup 2))
+     (clobber (match_dup 3))
+     (set (reg:CCU CC_REGNUM)
+          (compare:CCU (match_operand:BLK 0 "memory_operand" "")
+                       (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "general_operand" ""))
+     (use (match_dup 3))])]
+  ""
+{
+  enum machine_mode sreg_mode = TARGET_ZARCH ? DImode : SImode;
+  enum machine_mode dreg_mode = TARGET_ZARCH ? TImode : DImode;
+  rtx reg0 = gen_reg_rtx (dreg_mode);
+  rtx reg1 = gen_reg_rtx (dreg_mode);
+  rtx addr0 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg0));
+  rtx addr1 = gen_lowpart (Pmode, gen_highpart (sreg_mode, reg1));
+  rtx len0 = gen_lowpart (Pmode, reg0);
+  rtx len1 = gen_lowpart (Pmode, reg1);
+
+  emit_clobber (reg0);
+  emit_move_insn (addr0, force_operand (XEXP (operands[0], 0), NULL_RTX));
+  emit_move_insn (len0, operands[2]);
+
+  emit_clobber (reg1);
+  emit_move_insn (addr1, force_operand (XEXP (operands[1], 0), NULL_RTX));
+  emit_move_insn (len1, operands[2]);
+
+  operands[0] = replace_equiv_address_nv (operands[0], addr0);
+  operands[1] = replace_equiv_address_nv (operands[1], addr1);
+  operands[2] = reg0;
+  operands[3] = reg1;
+})
+
+(define_insn "*cmpmem_long"
+  [(clobber (match_operand:<DBL> 0 "register_operand" "=d"))
+   (clobber (match_operand:<DBL> 1 "register_operand" "=d"))
+   (set (reg:CCU CC_REGNUM)
+        (compare:CCU (mem:BLK (subreg:P (match_operand:<DBL> 2 "register_operand" "0") 0))
+                     (mem:BLK (subreg:P (match_operand:<DBL> 3 "register_operand" "1") 0))))
+   (use (match_dup 2))
+   (use (match_dup 3))]
+  "TARGET_64BIT || !TARGET_ZARCH"
+  "clcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "length" "8")
+   (set_attr "type" "vs")])
+
+(define_insn "*cmpmem_long_31z"
+  [(clobber (match_operand:TI 0 "register_operand" "=d"))
+   (clobber (match_operand:TI 1 "register_operand" "=d"))
+   (set (reg:CCU CC_REGNUM)
+        (compare:CCU (mem:BLK (subreg:SI (match_operand:TI 2 "register_operand" "0") 4))
+                     (mem:BLK (subreg:SI (match_operand:TI 3 "register_operand" "1") 4))))
+   (use (match_dup 2))
+   (use (match_dup 3))]
+  "!TARGET_64BIT && TARGET_ZARCH"
+  "clcle\t%0,%1,0\;jo\t.-4"
+  [(set_attr "op_type" "NN")
+   (set_attr "type"    "vs")
+   (set_attr "length"  "8")])
+
+; Convert CCUmode condition code to integer.
+; Result is zero if EQ, positive if LTU, negative if GTU.
+
+(define_insn_and_split "cmpint"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                   UNSPEC_CCU_TO_INT))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 2)))
+   (parallel
+    [(set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 30)))
+     (clobber (reg:CC CC_REGNUM))])])
+
+(define_insn_and_split "*cmpint_cc"
+  [(set (reg CC_REGNUM)
+        (compare (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                            UNSPEC_CCU_TO_INT)
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT))]
+  "s390_match_ccmode (insn, CCSmode)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 2)))
+   (parallel
+    [(set (match_dup 2) (match_dup 3))
+     (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 30)))])]
+{
+  rtx result = gen_rtx_ASHIFTRT (SImode, operands[0], GEN_INT (30));
+  operands[2] = SET_DEST (XVECEXP (PATTERN (curr_insn), 0, 0));
+  operands[3] = gen_rtx_COMPARE (GET_MODE (operands[2]), result, const0_rtx);
+})
+
+(define_insn_and_split "*cmpint_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                                   UNSPEC_CCU_TO_INT)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:DI (match_dup 0) (const_int 34)))
+   (parallel
+    [(set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 62)))
+     (clobber (reg:CC CC_REGNUM))])])
+
+(define_insn_and_split "*cmpint_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (ashift:DI (subreg:DI
+                   (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
+                              UNSPEC_CCU_TO_INT) 0)
+                   (const_int 32)) (const_int 32))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:DI (match_dup 0) (const_int 34)))
+   (parallel
+    [(set (match_dup 2) (match_dup 3))
+     (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 62)))])]
+{
+  rtx result = gen_rtx_ASHIFTRT (DImode, operands[0], GEN_INT (62));
+  operands[2] = SET_DEST (XVECEXP (PATTERN (curr_insn), 0, 0));
+  operands[3] = gen_rtx_COMPARE (GET_MODE (operands[2]), result, const0_rtx);
+})
+
+
+;;
+;;- Conversion instructions.
+;;
+
+(define_insn "*sethighpartsi"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+	(unspec:SI [(match_operand:BLK 1 "s_operand" "Q,S")
+		    (match_operand 2 "const_int_operand" "n,n")] UNSPEC_ICM))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   icm\t%0,%2,%S1
+   icmy\t%0,%2,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*sethighpartdi_64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:BLK 1 "s_operand" "QS")
+		    (match_operand 2 "const_int_operand" "n")] UNSPEC_ICM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "icmh\t%0,%2,%S1"
+  [(set_attr "op_type" "RSY")
+   (set_attr "z10prop" "z10_super")])
+
+(define_insn "*sethighpartdi_31"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(unspec:DI [(match_operand:BLK 1 "s_operand" "Q,S")
+		    (match_operand 2 "const_int_operand" "n,n")] UNSPEC_ICM))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "@
+   icm\t%0,%2,%S1
+   icmy\t%0,%2,%S1"
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+
+(define_insn_and_split "*extzv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(zero_extract:GPR (match_operand:QI 1 "s_operand" "QS")
+		          (match_operand 2 "const_int_operand" "n")
+		          (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) > 0
+   && INTVAL (operands[2]) <= GET_MODE_BITSIZE (SImode)"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0) (unspec:GPR [(match_dup 1) (match_dup 3)] UNSPEC_ICM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_dup 0) (lshiftrt:GPR (match_dup 0) (match_dup 2)))]
+{
+  int bitsize = INTVAL (operands[2]);
+  int size = (bitsize - 1) / BITS_PER_UNIT + 1; /* round up */
+  int mask = ((1ul << size) - 1) << (GET_MODE_SIZE (SImode) - size);
+
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+  set_mem_size (operands[1], GEN_INT (size));
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - bitsize);
+  operands[3] = GEN_INT (mask);
+})
+
+(define_insn_and_split "*extv<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(sign_extract:GPR (match_operand:QI 1 "s_operand" "QS")
+		          (match_operand 2 "const_int_operand" "n")
+		          (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) > 0
+   && INTVAL (operands[2]) <= GET_MODE_BITSIZE (SImode)"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0) (unspec:GPR [(match_dup 1) (match_dup 3)] UNSPEC_ICM))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (ashiftrt:GPR (match_dup 0) (match_dup 2)))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  int bitsize = INTVAL (operands[2]);
+  int size = (bitsize - 1) / BITS_PER_UNIT + 1; /* round up */
+  int mask = ((1ul << size) - 1) << (GET_MODE_SIZE (SImode) - size);
+
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+  set_mem_size (operands[1], GEN_INT (size));
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - bitsize);
+  operands[3] = GEN_INT (mask);
+})
+
+;
+; insv instruction patterns
+;
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+		      (match_operand 1 "const_int_operand" "")
+		      (match_operand 2 "const_int_operand" ""))
+	(match_operand 3 "general_operand" ""))]
+  ""
+{
+  if (s390_expand_insv (operands[0], operands[1], operands[2], operands[3]))
+    DONE;
+  FAIL;
+})
+
+(define_insn "*insv<mode>_z10"
+  [(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d")
+			  (match_operand 1 "const_int_operand"    "I")
+			  (match_operand 2 "const_int_operand"    "I"))
+	(match_operand:GPR 3 "nonimmediate_operand" "d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && (INTVAL (operands[1]) + INTVAL (operands[2])) <=
+      GET_MODE_BITSIZE (<MODE>mode)"
+{
+  int start = INTVAL (operands[2]);
+  int size = INTVAL (operands[1]);
+  int offset = 64 - GET_MODE_BITSIZE (<MODE>mode);
+
+  operands[2] = GEN_INT (offset + start);              /* start bit position */
+  operands[1] = GEN_INT (offset + start + size - 1);   /* end bit position */
+  operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) -
+			 start - size);       /* left shift count */
+
+  return "risbg\t%0,%3,%b2,%b1,%b4";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; and op1 with a mask being 1 for the selected bits and 0 for the rest
+; and op3=op0 with a mask being 0 for the selected bits and 1 for the rest
+(define_insn "*insv<mode>_z10_noshift"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
+			  (match_operand 2 "const_int_operand" "n"))
+		 (and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0")
+			  (match_operand 4 "const_int_operand" "n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && s390_contiguous_bitmask_p (INTVAL (operands[2]),
+                                 GET_MODE_BITSIZE (<MODE>mode), NULL, NULL)
+   && INTVAL (operands[2]) == ~(INTVAL (operands[4]))"
+
+{
+  int start;
+  int size;
+
+  s390_contiguous_bitmask_p (INTVAL (operands[2]),
+                             GET_MODE_BITSIZE (<MODE>mode), &start, &size);
+
+  operands[5] = GEN_INT (64 - start - size); /* start bit position */
+  operands[6] = GEN_INT (64 - 1 - start);    /* end bit position */
+  operands[7] = const0_rtx;                  /* left shift count */
+
+  return "risbg\t%0,%1,%b5,%b6,%b7";
+}
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; and op1 with a mask being 1 for the selected bits and 0 for the rest
+(define_insn "*insv<mode>_or_z10_noshift"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
+			  (match_operand 2 "const_int_operand" "n"))
+		(match_operand:GPR 3 "nonimmediate_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10
+   && s390_contiguous_bitmask_p (INTVAL (operands[2]),
+                                 GET_MODE_BITSIZE (<MODE>mode), NULL, NULL)"
+{
+  int start;
+  int size;
+
+  s390_contiguous_bitmask_p (INTVAL (operands[2]),
+                             GET_MODE_BITSIZE (<MODE>mode), &start, &size);
+
+  operands[4] = GEN_INT (64 - start - size); /* start bit position */
+  operands[5] = GEN_INT (64 - 1 - start);    /* end bit position */
+  operands[6] = const0_rtx;                  /* left shift count */
+
+  return "rosbg\t%0,%1,%b4,%b5,%b6";
+}
+  [(set_attr "op_type" "RIE")])
+
+(define_insn "*insv<mode>_mem_reg"
+  [(set (zero_extract:W (match_operand:QI 0 "memory_operand" "+Q,S")
+			(match_operand 1 "const_int_operand" "n,n")
+			(const_int 0))
+	(match_operand:W 2 "register_operand" "d,d"))]
+  "INTVAL (operands[1]) > 0
+   && INTVAL (operands[1]) <= GET_MODE_BITSIZE (SImode)
+   && INTVAL (operands[1]) % BITS_PER_UNIT == 0"
+{
+    int size = INTVAL (operands[1]) / BITS_PER_UNIT;
+
+    operands[1] = GEN_INT ((1ul << size) - 1);
+    return (which_alternative == 0) ? "stcm\t%2,%1,%S0"
+				    : "stcmy\t%2,%1,%S0";
+}
+  [(set_attr "op_type" "RS,RSY")
+   (set_attr "z10prop" "z10_super,z10_super")])
+
+(define_insn "*insvdi_mem_reghigh"
+  [(set (zero_extract:DI (match_operand:QI 0 "memory_operand" "+QS")
+			 (match_operand 1 "const_int_operand" "n")
+			 (const_int 0))
+	(lshiftrt:DI (match_operand:DI 2 "register_operand" "d")
+		     (const_int 32)))]
+  "TARGET_ZARCH
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[1]) <= GET_MODE_BITSIZE (SImode)
+   && INTVAL (operands[1]) % BITS_PER_UNIT == 0"
+{
+    int size = INTVAL (operands[1]) / BITS_PER_UNIT;
+
+    operands[1] = GEN_INT ((1ul << size) - 1);
+    return "stcmh\t%2,%1,%S0";
+}
+[(set_attr "op_type" "RSY")
+ (set_attr "z10prop" "z10_super")])
+
+(define_insn "*insvdi_reg_imm"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d")
+			 (const_int 16)
+			 (match_operand 1 "const_int_operand" "n"))
+	(match_operand:DI 2 "const_int_operand" "n"))]
+  "TARGET_ZARCH
+   && INTVAL (operands[1]) >= 0
+   && INTVAL (operands[1]) < BITS_PER_WORD
+   && INTVAL (operands[1]) % 16 == 0"
+{
+  switch (BITS_PER_WORD - INTVAL (operands[1]))
+    {
+      case 64: return "iihh\t%0,%x2"; break;
+      case 48: return "iihl\t%0,%x2"; break;
+      case 32: return "iilh\t%0,%x2"; break;
+      case 16: return "iill\t%0,%x2"; break;
+      default: gcc_unreachable();
+    }
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; Update the left-most 32 bit of a DI.
+(define_insn "*insv_h_di_reg_extimm"
+  [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d")
+			 (const_int 32)
+			 (const_int 0))
+	(match_operand:DI 1 "const_int_operand" "n"))]
+  "TARGET_EXTIMM"
+  "iihf\t%0,%o1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "z10prop" "z10_fwd_E1")])
+
+; Update the right-most 32 bit of a DI, or the whole of a SI.
+(define_insn "*insv_l<mode>_reg_extimm"
+  [(set (zero_extract:P (match_operand:P 0 "register_operand" "+d")
+			(const_int 32)
+			(match_operand 1 "const_int_operand" "n"))
+	(match_operand:P 2 "const_int_operand" "n"))]
+  "TARGET_EXTIMM
+   && BITS_PER_WORD - INTVAL (operands[1]) == 32"
+  "iilf\t%0,%o2"
+  [(set_attr "op_type" "RIL")
+   (set_attr "z10prop" "z10_fwd_A1")])
+
+;
+; extendsidi2 instruction pattern(s).
+;
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_ZARCH)
+    {
+      emit_clobber (operands[0]);
+      emit_move_insn (gen_highpart (SImode, operands[0]), operands[1]);
+      emit_move_insn (gen_lowpart (SImode, operands[0]), const0_rtx);
+      emit_insn (gen_ashrdi3 (operands[0], operands[0], GEN_INT (32)));
+      DONE;
+    }
+})
+
+(define_insn "*extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,RT,b")))]
+  "TARGET_ZARCH"
+  "@
+   lgfr\t%0,%1
+   lgf\t%0,%1
+   lgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1")])
+
+;
+; extend(hi|qi)(si|di)2 instruction pattern(s).
+;
+
+(define_expand "extend<HQI:mode><DSI:mode>2"
+  [(set (match_operand:DSI 0 "register_operand" "")
+        (sign_extend:DSI (match_operand:HQI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (<DSI:MODE>mode == DImode && !TARGET_ZARCH)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_extend<HQI:mode>si2 (tmp, operands[1]));
+      emit_insn (gen_extendsidi2 (operands[0], tmp));
+      DONE;
+    }
+  else if (!TARGET_EXTIMM)
+    {
+      rtx bitcount = GEN_INT (GET_MODE_BITSIZE (<DSI:MODE>mode) -
+			      GET_MODE_BITSIZE (<HQI:MODE>mode));
+
+      operands[1] = gen_lowpart (<DSI:MODE>mode, operands[1]);
+      emit_insn (gen_ashl<DSI:mode>3 (operands[0], operands[1], bitcount));
+      emit_insn (gen_ashr<DSI:mode>3 (operands[0], operands[0], bitcount));
+      DONE;
+    }
+})
+
+;
+; extendhidi2 instruction pattern(s).
+;
+
+(define_insn "*extendhidi2_extimm"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (sign_extend:DI (match_operand:HI 1 "general_operand" "d,RT,b")))]
+  "TARGET_ZARCH && TARGET_EXTIMM"
+  "@
+   lghr\t%0,%1
+   lgh\t%0,%1
+   lghrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "extimm,extimm,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (sign_extend:DI (match_operand:HI 1 "memory_operand" "RT")))]
+  "TARGET_ZARCH"
+  "lgh\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_super_E1")])
+
+;
+; extendhisi2 instruction pattern(s).
+;
+
+(define_insn "*extendhisi2_extimm"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" " d,R,T,b")))]
+  "TARGET_EXTIMM"
+  "@
+   lhr\t%0,%1
+   lh\t%0,%1
+   lhy\t%0,%1
+   lhrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RX,RXY,RIL")
+   (set_attr "type"         "*,*,*,larl")
+   (set_attr "cpu_facility" "extimm,extimm,extimm,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1,z10_super_E1")])
+
+(define_insn "*extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (sign_extend:SI (match_operand:HI 1 "memory_operand" "R,T")))]
+  "!TARGET_EXTIMM"
+  "@
+   lh\t%0,%1
+   lhy\t%0,%1"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+;
+; extendqi(si|di)2 instruction pattern(s).
+;
+
+; lbr, lgbr, lb, lgb
+(define_insn "*extendqi<mode>2_extimm"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (sign_extend:GPR (match_operand:QI 1 "nonimmediate_operand" "d,RT")))]
+  "TARGET_EXTIMM"
+  "@
+   l<g>br\t%0,%1
+   l<g>b\t%0,%1"
+  [(set_attr "op_type" "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+; lb, lgb
+(define_insn "*extendqi<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (sign_extend:GPR (match_operand:QI 1 "memory_operand" "RT")))]
+  "!TARGET_EXTIMM && TARGET_LONG_DISPLACEMENT"
+  "l<g>b\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn_and_split "*extendqi<mode>2_short_displ"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (sign_extend:GPR (match_operand:QI 1 "s_operand" "Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_EXTIMM && !TARGET_LONG_DISPLACEMENT"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0) (unspec:GPR [(match_dup 1) (const_int 8)] UNSPEC_ICM))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (ashiftrt:GPR (match_dup 0) (match_dup 2)))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+  set_mem_size (operands[1], GEN_INT (GET_MODE_SIZE (QImode)));
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)
+			 - GET_MODE_BITSIZE (QImode));
+})
+
+;
+; zero_extendsidi2 instruction pattern(s).
+;
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_ZARCH)
+    {
+      emit_clobber (operands[0]);
+      emit_move_insn (gen_lowpart (SImode, operands[0]), operands[1]);
+      emit_move_insn (gen_highpart (SImode, operands[0]), const0_rtx);
+      DONE;
+    }
+})
+
+(define_insn "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,RT,b")))]
+  "TARGET_ZARCH"
+  "@
+   llgfr\t%0,%1
+   llgf\t%0,%1
+   llgfrl\t%0,%1"
+  [(set_attr "op_type"      "RRE,RXY,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3")])
+
+;
+; LLGT-type instructions (zero-extend from 31 bit to 64 bit).
+;
+
+(define_insn "*llgt_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (and:DI (subreg:DI (match_operand:SI 1 "memory_operand" "RT") 0)
+		(const_int 2147483647)))]
+  "TARGET_ZARCH"
+  "llgt\t%0,%1"
+  [(set_attr "op_type"  "RXE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn_and_split "*llgt_sidi_split"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (and:DI (subreg:DI (match_operand:SI 1 "memory_operand" "RT") 0)
+		(const_int 2147483647)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+        (and:DI (subreg:DI (match_dup 1) 0)
+		(const_int 2147483647)))]
+  "")
+
+(define_insn "*llgt_sisi"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand" "d,RT")
+		(const_int 2147483647)))]
+  "TARGET_ZARCH"
+  "@
+   llgtr\t%0,%1
+   llgt\t%0,%1"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*llgt_didi"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (and:DI (match_operand:DI 1 "nonimmediate_operand" "d,o")
+                (const_int 2147483647)))]
+  "TARGET_ZARCH"
+  "@
+   llgtr\t%0,%1
+   llgt\t%0,%N1"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_split
+  [(set (match_operand:DSI 0 "register_operand" "")
+        (and:DSI (match_operand:DSI 1 "nonimmediate_operand" "")
+                 (const_int 2147483647)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && reload_completed"
+  [(set (match_dup 0)
+        (and:DSI (match_dup 1)
+                 (const_int 2147483647)))]
+  "")
+
+;
+; zero_extend(hi|qi)(si|di)2 instruction pattern(s).
+;
+
+(define_expand "zero_extend<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (zero_extend:DI (match_operand:HQI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_ZARCH)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_zero_extend<mode>si2 (tmp, operands[1]));
+      emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
+      DONE;
+    }
+  else if (!TARGET_EXTIMM)
+    {
+      rtx bitcount = GEN_INT (GET_MODE_BITSIZE(DImode) -
+			      GET_MODE_BITSIZE(<MODE>mode));
+      operands[1] = gen_lowpart (DImode, operands[1]);
+      emit_insn (gen_ashldi3 (operands[0], operands[1], bitcount));
+      emit_insn (gen_lshrdi3 (operands[0], operands[0], bitcount));
+      DONE;
+    }
+})
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (match_operand:HQI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (!TARGET_EXTIMM)
+    {
+      operands[1] = gen_lowpart (SImode, operands[1]);
+      emit_insn (gen_andsi3 (operands[0], operands[1],
+                   GEN_INT ((1 << GET_MODE_BITSIZE(<MODE>mode)) - 1)));
+      DONE;
+    }
+})
+
+; llhrl, llghrl
+(define_insn "*zero_extendhi<mode>2_z10"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
+        (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,RT,b")))]
+  "TARGET_Z10"
+  "@
+   ll<g>hr\t%0,%1
+   ll<g>h\t%0,%1
+   ll<g>hrl\t%0,%1"
+  [(set_attr "op_type"      "RXY,RRE,RIL")
+   (set_attr "type"         "*,*,larl")
+   (set_attr "cpu_facility" "*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3")])
+
+; llhr, llcr, llghr, llgcr, llh, llc, llgh, llgc
+(define_insn "*zero_extend<HQI:mode><GPR:mode>2_extimm"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,RT")))]
+  "TARGET_EXTIMM"
+  "@
+   ll<g><hc>r\t%0,%1
+   ll<g><hc>\t%0,%1"
+  [(set_attr "op_type" "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3")])
+
+; llgh, llgc
+(define_insn "*zero_extend<HQI:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (zero_extend:GPR (match_operand:HQI 1 "memory_operand" "RT")))]
+  "TARGET_ZARCH && !TARGET_EXTIMM"
+  "llg<hc>\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_fwd_A3")])
+
+(define_insn_and_split "*zero_extendhisi2_31"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+        (zero_extend:SI (match_operand:HI 1 "s_operand" "QS")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (parallel
+    [(set (strict_low_part (match_dup 2)) (match_dup 1))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
+
+(define_insn_and_split "*zero_extendqisi2_31"
+  [(set (match_operand:SI 0 "register_operand" "=&d")
+        (zero_extend:SI (match_operand:QI 1 "memory_operand" "RT")))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;
+; zero_extendqihi2 instruction pattern(s).
+;
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+        (zero_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_ZARCH && !TARGET_EXTIMM"
+{
+  operands[1] = gen_lowpart (HImode, operands[1]);
+  emit_insn (gen_andhi3 (operands[0], operands[1], GEN_INT (0xff)));
+  DONE;
+})
+
+(define_insn "*zero_extendqihi2_64"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+        (zero_extend:HI (match_operand:QI 1 "memory_operand" "RT")))]
+  "TARGET_ZARCH && !TARGET_EXTIMM"
+  "llgc\t%0,%1"
+  [(set_attr "op_type" "RXY")
+   (set_attr "z10prop" "z10_fwd_A3")])
+
+(define_insn_and_split "*zero_extendqihi2_31"
+  [(set (match_operand:HI 0 "register_operand" "=&d")
+        (zero_extend:HI (match_operand:QI 1 "memory_operand" "RT")))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;
+; fixuns_trunc(dd|td)di2 instruction pattern(s).
+;
+
+(define_expand "fixuns_truncdddi2"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unsigned_fix:DI (match_operand:DD 1 "register_operand" "")))
+     (unspec:DI [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+
+  "TARGET_HARD_DFP"
+{
+  if (!TARGET_Z196)
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx temp = gen_reg_rtx (TDmode);
+      REAL_VALUE_TYPE cmp, sub;
+
+      decimal_real_from_string (&cmp, "9223372036854775808.0");  /* 2^63 */
+      decimal_real_from_string (&sub, "18446744073709551616.0"); /* 2^64 */
+
+      /* 2^63 can't be represented as 64bit DFP number with full precision.  The
+         solution is doing the check and the subtraction in TD mode and using a
+         TD -> DI convert afterwards.  */
+      emit_insn (gen_extendddtd2 (temp, operands[1]));
+      temp = force_reg (TDmode, temp);
+      emit_cmp_and_jump_insns (temp,
+	    CONST_DOUBLE_FROM_REAL_VALUE (cmp, TDmode),
+	    LT, NULL_RTX, VOIDmode, 0, label1);
+      emit_insn (gen_subtd3 (temp, temp,
+	    CONST_DOUBLE_FROM_REAL_VALUE (sub, TDmode)));
+      emit_insn (gen_fix_trunctddi2_dfp (operands[0], temp, GEN_INT (11)));
+      emit_jump (label2);
+
+      emit_label (label1);
+      emit_insn (gen_fix_truncdddi2_dfp (operands[0], operands[1], GEN_INT (9)));
+      emit_label (label2);
+      DONE;
+    }
+})
+
+(define_expand "fixuns_trunctddi2"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+	  (unsigned_fix:DI (match_operand:TD 1 "register_operand" "")))
+     (unspec:DI [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+
+  "TARGET_HARD_DFP"
+{
+  if (!TARGET_Z196)
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx temp = gen_reg_rtx (TDmode);
+      REAL_VALUE_TYPE cmp, sub;
+
+      operands[1] = force_reg (TDmode, operands[1]);
+      decimal_real_from_string (&cmp, "9223372036854775808.0");  /* 2^63 */
+      decimal_real_from_string (&sub, "18446744073709551616.0"); /* 2^64 */
+
+      emit_cmp_and_jump_insns (operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (cmp, TDmode),
+	    LT, NULL_RTX, VOIDmode, 0, label1);
+      emit_insn (gen_subtd3 (temp, operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (sub, TDmode)));
+      emit_insn (gen_fix_trunctddi2_dfp (operands[0], temp, GEN_INT (11)));
+      emit_jump (label2);
+
+      emit_label (label1);
+      emit_insn (gen_fix_trunctddi2_dfp (operands[0], operands[1], GEN_INT (9)));
+      emit_label (label2);
+      DONE;
+    }
+})
+
+;
+; fixuns_trunc(sf|df|tf)(si|di)2 and fix_trunc(sf|df|tf)(si|di)2
+; instruction pattern(s).
+;
+
+(define_expand "fixuns_trunc<BFP:mode><GPR:mode>2"
+  [(parallel
+    [(set (match_operand:GPR 0 "register_operand" "")
+	  (unsigned_fix:GPR (match_operand:BFP 1 "register_operand" "")))
+     (unspec:GPR [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+{
+  if (!TARGET_Z196)
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx temp = gen_reg_rtx (<BFP:MODE>mode);
+      REAL_VALUE_TYPE cmp, sub;
+
+      operands[1] = force_reg (<BFP:MODE>mode, operands[1]);
+      real_2expN (&cmp, GET_MODE_BITSIZE(<GPR:MODE>mode) - 1, <BFP:MODE>mode);
+      real_2expN (&sub, GET_MODE_BITSIZE(<GPR:MODE>mode), <BFP:MODE>mode);
+
+      emit_cmp_and_jump_insns (operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (cmp, <BFP:MODE>mode),
+	    LT, NULL_RTX, VOIDmode, 0, label1);
+      emit_insn (gen_sub<BFP:mode>3 (temp, operands[1],
+	    CONST_DOUBLE_FROM_REAL_VALUE (sub, <BFP:MODE>mode)));
+      emit_insn (gen_fix_trunc<BFP:mode><GPR:mode>2_bfp (operands[0], temp,
+	    GEN_INT (7)));
+      emit_jump (label2);
+
+      emit_label (label1);
+      emit_insn (gen_fix_trunc<BFP:mode><GPR:mode>2_bfp (operands[0],
+	    operands[1], GEN_INT (5)));
+      emit_label (label2);
+      DONE;
+    }
+})
+
+; fixuns_trunc(td|dd)si2 expander
+(define_expand "fixuns_trunc<mode>si2"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (unsigned_fix:SI (match_operand:DFP 1 "register_operand" "")))
+     (unspec:SI [(const_int 5)] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196 && TARGET_HARD_DFP"
+  "")
+
+; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns.
+
+; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr
+;         clfdtr, clfxtr,         clgdtr, clgxtr
+(define_insn "*fixuns_trunc<FP:mode><GPR:mode>2_z196"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f")))
+   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_Z196"
+   "cl<GPR:gf><FP:xde><FP:bt>r\t%0,%h2,%1,0"
+   [(set_attr "op_type" "RRF")
+    (set_attr "type"    "ftoi")])
+
+(define_expand "fix_trunc<DSF:mode><GPR:mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "")
+        (fix:GPR (match_operand:DSF 1 "register_operand" "")))]
+  "TARGET_HARD_FLOAT"
+{
+  emit_insn (gen_fix_trunc<DSF:mode><GPR:mode>2_bfp (operands[0], operands[1],
+             GEN_INT (5)));
+  DONE;
+})
+
+; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr
+(define_insn "fix_trunc<BFP:mode><GPR:mode>2_bfp"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (fix:GPR (match_operand:BFP 1 "register_operand" "f")))
+   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "c<GPR:gf><BFP:xde>br\t%0,%h2,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"    "ftoi")])
+
+
+;
+; fix_trunc(td|dd)di2 instruction pattern(s).
+;
+
+(define_expand "fix_trunc<mode>di2"
+  [(set (match_operand:DI 0 "register_operand" "")
+        (fix:DI (match_operand:DFP 1 "nonimmediate_operand" "")))]
+  "TARGET_ZARCH && TARGET_HARD_DFP"
+{
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  emit_insn (gen_fix_trunc<mode>di2_dfp (operands[0], operands[1],
+      GEN_INT (9)));
+  DONE;
+})
+
+; cgxtr, cgdtr
+(define_insn "fix_trunc<DFP:mode>di2_dfp"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (fix:DI (match_operand:DFP 1 "register_operand" "f")))
+   (unspec:DI [(match_operand:DI 2 "immediate_operand" "K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && TARGET_HARD_DFP"
+  "cg<DFP:xde>tr\t%0,%h2,%1"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "ftoidfp")])
+
+
+;
+; fix_trunctf(si|di)2 instruction pattern(s).
+;
+
+(define_expand "fix_trunctf<mode>2"
+  [(parallel [(set (match_operand:GPR 0 "register_operand" "")
+		   (fix:GPR (match_operand:TF 1 "register_operand" "")))
+	      (unspec:GPR [(const_int 5)] UNSPEC_ROUND)
+	      (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+  "")
+
+
+;
+; float(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
+;
+
+; cxgbr, cdgbr, cegbr, cxgtr, cdgtr
+(define_insn "floatdi<mode>2"
+  [(set (match_operand:FP 0 "register_operand" "=f")
+        (float:FP (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_ZARCH && TARGET_HARD_FLOAT"
+  "c<xde>g<bt>r\t%0,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"    "itof<mode>" )])
+
+; cxfbr, cdfbr, cefbr
+(define_insn "floatsi<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (float:BFP (match_operand:SI 1 "register_operand" "d")))]
+  "TARGET_HARD_FLOAT"
+  "c<xde>fbr\t%0,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"   "itof<mode>" )])
+
+; cxftr, cdftr
+(define_insn "floatsi<mode>2"
+  [(set (match_operand:DFP 0 "register_operand" "=f")
+        (float:DFP (match_operand:SI 1 "register_operand" "d")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT"
+  "c<xde>ftr\t%0,0,%1,0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"   "itof<mode>" )])
+
+;
+; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
+;
+
+; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr
+; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr
+(define_insn "floatuns<GPR:mode><FP:mode>2"
+  [(set (match_operand:FP 0 "register_operand" "=f")
+        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT"
+  "c<FP:xde>l<GPR:gf><FP:bt>r\t%0,0,%1,0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type"    "itof<FP:mode>" )])
+
+;
+; truncdfsf2 instruction pattern(s).
+;
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+        (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "ledbr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"   "ftruncdf")])
+
+;
+; trunctf(df|sf)2 instruction pattern(s).
+;
+
+; ldxbr, lexbr
+(define_insn "trunctf<mode>2"
+  [(set (match_operand:DSF 0 "register_operand" "=f")
+        (float_truncate:DSF (match_operand:TF 1 "register_operand" "f")))
+   (clobber (match_scratch:TF 2 "=f"))]
+  "TARGET_HARD_FLOAT"
+  "l<xde>xbr\t%2,%1\;l<xde>r\t%0,%2"
+  [(set_attr "length" "6")
+   (set_attr "type"   "ftrunctf")])
+
+;
+; trunctddd2 and truncddsd2 instruction pattern(s).
+;
+
+(define_insn "trunctddd2"
+  [(set (match_operand:DD 0 "register_operand" "=f")
+	(float_truncate:DD (match_operand:TD 1 "register_operand" "f")))
+   (clobber (match_scratch:TD 2 "=f"))]
+  "TARGET_HARD_DFP"
+  "ldxtr\t%2,0,%1,0\;ldr\t%0,%2"
+  [(set_attr "length"  "6")
+   (set_attr "type"    "ftruncdd")])
+
+(define_insn "truncddsd2"
+  [(set (match_operand:SD 0 "register_operand" "=f")
+	(float_truncate:SD (match_operand:DD 1 "register_operand" "f")))]
+  "TARGET_HARD_DFP"
+  "ledtr\t%0,0,%1,0"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "ftruncsd")])
+
+;
+; extend(sf|df)(df|tf)2 instruction pattern(s).
+;
+
+; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb
+(define_insn "extend<DSF:mode><BFP:mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+        (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand"  "f,R")))]
+  "TARGET_HARD_FLOAT
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)"
+  "@
+   l<BFP:xde><DSF:xde>br\t%0,%1
+   l<BFP:xde><DSF:xde>b\t%0,%1"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "type"   "fsimp<BFP:mode>, fload<BFP:mode>")])
+
+;
+; extendddtd2 and extendsddd2 instruction pattern(s).
+;
+
+(define_insn "extendddtd2"
+  [(set (match_operand:TD 0 "register_operand" "=f")
+	(float_extend:TD (match_operand:DD 1 "register_operand" "f")))]
+  "TARGET_HARD_DFP"
+  "lxdtr\t%0,%1,0"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "fsimptf")])
+
+(define_insn "extendsddd2"
+  [(set (match_operand:DD 0 "register_operand" "=f")
+	(float_extend:DD (match_operand:SD 1 "register_operand" "f")))]
+  "TARGET_HARD_DFP"
+  "ldetr\t%0,%1,0"
+  [(set_attr "op_type" "RRF")
+   (set_attr "type"    "fsimptf")])
+
+; Binary <-> Decimal floating point trunc patterns
+;
+
+(define_insn "*trunc<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:DFP_ALL FPR0_REGNUM)
+        (float_truncate:DFP_ALL (reg:BFP FPR2_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_insn "*trunc<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:BFP FPR0_REGNUM)
+        (float_truncate:BFP (reg:DFP_ALL FPR2_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_expand "trunc<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:BFP FPR2_REGNUM) (match_operand:BFP 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:DFP_ALL FPR0_REGNUM)
+          (float_truncate:DFP_ALL (reg:BFP FPR2_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:DFP_ALL 0 "nonimmediate_operand" "")
+        (reg:DFP_ALL FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DFP_ALL:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+(define_expand "trunc<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:DFP_ALL FPR2_REGNUM)
+        (match_operand:DFP_ALL 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:BFP FPR0_REGNUM) (float_truncate:BFP (reg:DFP_ALL FPR2_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:BFP 0 "nonimmediate_operand" "") (reg:BFP FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<DFP_ALL:MODE>mode) >= GET_MODE_SIZE (<BFP:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+;
+; Binary <-> Decimal floating point extend patterns
+;
+
+(define_insn "*extend<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:DFP_ALL FPR0_REGNUM) (float_extend:DFP_ALL (reg:BFP FPR2_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_insn "*extend<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:BFP FPR0_REGNUM) (float_extend:BFP (reg:DFP_ALL FPR2_REGNUM)))
+   (use (reg:SI GPR0_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_DFP"
+  "pfpo")
+
+(define_expand "extend<BFP:mode><DFP_ALL:mode>2"
+  [(set (reg:BFP FPR2_REGNUM) (match_operand:BFP 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:DFP_ALL FPR0_REGNUM)
+          (float_extend:DFP_ALL (reg:BFP FPR2_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:DFP_ALL 0 "nonimmediate_operand" "")
+        (reg:DFP_ALL FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<BFP:MODE>mode) <= GET_MODE_SIZE (<DFP_ALL:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+(define_expand "extend<DFP_ALL:mode><BFP:mode>2"
+  [(set (reg:DFP_ALL FPR2_REGNUM)
+        (match_operand:DFP_ALL 1 "nonimmediate_operand" ""))
+   (set (reg:SI GPR0_REGNUM) (match_dup 2))
+   (parallel
+    [(set (reg:BFP FPR0_REGNUM) (float_extend:BFP (reg:DFP_ALL FPR2_REGNUM)))
+     (use (reg:SI GPR0_REGNUM))
+     (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:BFP 0 "nonimmediate_operand" "") (reg:BFP FPR0_REGNUM))]
+  "TARGET_HARD_DFP
+   && GET_MODE_SIZE (<DFP_ALL:MODE>mode) < GET_MODE_SIZE (<BFP:MODE>mode)"
+{
+  HOST_WIDE_INT flags;
+
+  flags = (PFPO_CONVERT |
+           PFPO_OP_TYPE_<BFP:MODE> << PFPO_OP0_TYPE_SHIFT |
+           PFPO_OP_TYPE_<DFP_ALL:MODE> << PFPO_OP1_TYPE_SHIFT);
+
+  operands[2] = GEN_INT (flags);
+})
+
+
+;;
+;; ARITHMETIC OPERATIONS
+;;
+;  arithmetic operations set the ConditionCode,
+;  because of unpredictable Bits in Register for Halfword and Byte
+;  the ConditionCode can be set wrong in operations for Halfword and Byte
+
+;;
+;;- Add instructions.
+;;
+
+;
+; addti3 instruction pattern(s).
+;
+
+(define_insn_and_split "addti3"
+  [(set (match_operand:TI 0 "register_operand" "=&d")
+        (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+                 (match_operand:TI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL1 CC_REGNUM)
+          (compare:CCL1 (plus:DI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (plus:DI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (plus:DI
+                          (plus:DI (ltu:DI (reg:CCL1 CC_REGNUM) (const_int 0))
+                                   (match_dup 4)) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, TImode);
+   operands[4] = operand_subword (operands[1], 0, 0, TImode);
+   operands[5] = operand_subword (operands[2], 0, 0, TImode);
+   operands[6] = operand_subword (operands[0], 1, 0, TImode);
+   operands[7] = operand_subword (operands[1], 1, 0, TImode);
+   operands[8] = operand_subword (operands[2], 1, 0, TImode);")
+
+;
+; adddi3 instruction pattern(s).
+;
+
+(define_expand "adddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "nonimmediate_operand" "")
+          (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+                   (match_operand:DI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*adddi3_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (plus:DI (sign_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                 (match_operand:DI 1 "register_operand" "0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   agfr\t%0,%2
+   agf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z196prop" "z196_cracked,z196_cracked")])
+
+(define_insn "*adddi3_zero_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                          (match_operand:DI 1 "register_operand" "0,0"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d,d")
+        (plus:DI (zero_extend:DI (match_dup 2)) (match_dup 1)))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   algfr\t%0,%2
+   algf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*adddi3_zero_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                          (match_operand:DI 1 "register_operand" "0,0"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   algfr\t%0,%2
+   algf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn "*adddi3_zero"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                 (match_operand:DI 1 "register_operand" "0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   algfr\t%0,%2
+   algf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+
+(define_insn_and_split "*adddi3_31z"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&d")
+        (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+                 (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL1 CC_REGNUM)
+          (compare:CCL1 (plus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (plus:SI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (plus:SI
+			  (plus:SI (ltu:SI (reg:CCL1 CC_REGNUM) (const_int 0))
+				   (match_dup 4)) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);")
+
+(define_insn_and_split "*adddi3_31"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=&d")
+        (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+                 (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 3) (plus:SI (match_dup 4) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CCL1 CC_REGNUM)
+          (compare:CCL1 (plus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (plus:SI (match_dup 7) (match_dup 8)))])
+   (set (pc)
+        (if_then_else (ltu (reg:CCL1 CC_REGNUM) (const_int 0))
+                      (pc)
+                      (label_ref (match_dup 9))))
+   (parallel
+    [(set (match_dup 3) (plus:SI (match_dup 3) (const_int 1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (match_dup 9)]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);
+   operands[9] = gen_label_rtx ();")
+
+;
+; addsi3 instruction pattern(s).
+;
+
+(define_expand "addsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "nonimmediate_operand" "")
+          (plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                   (match_operand:SI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*addsi3_sign"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (plus:SI (sign_extend:SI (match_operand:HI 2 "memory_operand" "R,T"))
+                 (match_operand:SI 1 "register_operand" "0,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   ah\t%0,%2
+   ahy\t%0,%2"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "z196prop" "z196_cracked,z196_cracked")])
+
+;
+; add(di|si)3 instruction pattern(s).
+;
+
+; ark, agrk, ar, ahi, ahik, aghik, alfi, slfi, a, ay, agr, aghi, algfi, slgfi, ag, asi, agsi
+(define_insn "*add<mode>3"
+  [(set (match_operand:GPR 0 "nonimmediate_operand"           "=d,d,d,d, d, d,d,d,QS")
+        (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,d, 0, 0,0,0, 0")
+		  (match_operand:GPR 2 "general_operand"      " d,d,K,K,Op,On,R,T, C") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   a<g>r\t%0,%2
+   a<g>rk\t%0,%1,%2
+   a<g>hi\t%0,%h2
+   a<g>hik\t%0,%1,%h2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   a<g>\t%0,%2
+   a<y>\t%0,%2
+   a<g>si\t%0,%c2"
+  [(set_attr "op_type"  "RR<E>,RRF,RI,RIE,RIL,RIL,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,*,z196,extimm,extimm,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*,z10_super_E1,z10_super_E1,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, alfi, slfi, al, aly, alrk, alhsik, algr, algfi, slgfi, alg, alsi, algsi, algrk, alghsik
+(define_insn "*add<mode>3_carry1_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0,0")
+			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T,C"))
+                 (match_dup 1)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,d")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   al<g>hsik\t%0,%1,%h2
+   al<g>\t%0,%2
+   al<y>\t%0,%2
+   al<g>si\t%0,%c2"
+  [(set_attr "op_type"      "RR<E>,RRF,RIL,RIL,RIE,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,extimm,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_carry1_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,0")
+			   (match_operand:GPR 2 "general_operand"       "d,d,R,T"))
+                 (match_dup 1)))
+   (clobber (match_scratch:GPR 0                                       "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; alr, alfi, slfi, al, aly, algr, algfi, slgfi, alg, alsi, algsi, alrk, algrk, alhsik, alghsik
+(define_insn "*add<mode>3_carry2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0, 0")
+			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T, C"))
+                 (match_dup 2)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,RS")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   al<g>hsik\t%0,%1,%h2
+   al<g>\t%0,%2
+   al<y>\t%0,%2
+   al<g>si\t%0,%c2"
+  [(set_attr "op_type"  "RR<E>,RRF,RIL,RIL,RIE,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,extimm,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_carry2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,0")
+			   (match_operand:GPR 2 "general_operand"       "d,d,R,T"))
+                 (match_dup 2)))
+   (clobber (match_scratch:GPR 0                                       "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL1mode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; alr, alfi, slfi, al, aly, algr, algfi, slgfi, alg, alsi, algsi, alrk, algrk, alhsik, alghsik
+(define_insn "*add<mode>3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0, 0")
+			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T, C"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,RS")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>fi\t%0,%2
+   sl<g>fi\t%0,%n2
+   al<g>hsik\t%0,%1,%h2
+   al<g>\t%0,%2
+   al<y>\t%0,%2
+   al<g>si\t%0,%c2"
+  [(set_attr "op_type"  "RR<E>,RRF,RIL,RIL,RIE,RX<Y>,RXY,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,extimm,z196,*,*,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1,
+                        *,z10_super_E1,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d,0,0")
+			   (match_operand:GPR 2 "general_operand"       "d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                       "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; alr, al, aly, algr, alg, alrk, algrk
+(define_insn "*add<mode>3_cconly2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 1 "nonimmediate_operand"    "%0,d,0,0")
+                 (neg:GPR (match_operand:GPR 2 "general_operand" "d,d,R,T"))))
+   (clobber (match_scratch:GPR 0                                "=d,d,d,d"))]
+  "s390_match_ccmode(insn, CCLmode)"
+  "@
+   al<g>r\t%0,%2
+   al<g>rk\t%0,%1,%2
+   al<g>\t%0,%2
+   al<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+; ahi, afi, aghi, agfi, asi, agsi
+(define_insn "*add<mode>3_imm_cc"
+  [(set (reg CC_REGNUM)
+        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" " 0, d,0, 0")
+			   (match_operand:GPR 2 "const_int_operand"    " K, K,Os, C"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d, d,d,QS")
+        (plus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCAmode)
+   && (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'K', \"K\")
+       || CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'O', \"Os\")
+       || CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'C', \"C\"))
+   && INTVAL (operands[2]) != -((HOST_WIDE_INT)1 << (GET_MODE_BITSIZE(<MODE>mode) - 1))"
+  "@
+   a<g>hi\t%0,%h2
+   a<g>hik\t%0,%1,%h2
+   a<g>fi\t%0,%2
+   a<g>si\t%0,%c2"
+  [(set_attr "op_type"      "RI,RIE,RIL,SIY")
+   (set_attr "cpu_facility" "*,z196,extimm,z10")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+;
+; add(tf|df|sf|td|dd)3 instruction pattern(s).
+;
+
+; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+(define_insn "add<mode>3"
+  [(set (match_operand:FP 0 "register_operand"              "=f,   f")
+        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+		 (match_operand:FP 2 "general_operand"      " f,<Rf>")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "@
+   a<xde><bt>r\t%0,<op1>%2
+   a<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+(define_insn "*add<mode>3_cc"
+  [(set (reg CC_REGNUM)
+	(compare (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+			  (match_operand:FP 2 "general_operand"      " f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (set (match_operand:FP 0 "register_operand" "=f,f")
+	(plus:FP (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   a<xde><bt>r\t%0,<op1>%2
+   a<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+(define_insn "*add<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+	(compare (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+			   (match_operand:FP 2 "general_operand"      " f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (clobber (match_scratch:FP 0 "=f,f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   a<xde><bt>r\t%0,<op1>%2
+   a<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Subtract instructions.
+;;
+
+;
+; subti3 instruction pattern(s).
+;
+
+(define_insn_and_split "subti3"
+  [(set (match_operand:TI 0 "register_operand" "=&d")
+        (minus:TI (match_operand:TI 1 "register_operand" "0")
+                  (match_operand:TI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL2 CC_REGNUM)
+          (compare:CCL2 (minus:DI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (minus:DI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (minus:DI (minus:DI (match_dup 4) (match_dup 5))
+                                  (gtu:DI (reg:CCL2 CC_REGNUM) (const_int 0))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, TImode);
+   operands[4] = operand_subword (operands[1], 0, 0, TImode);
+   operands[5] = operand_subword (operands[2], 0, 0, TImode);
+   operands[6] = operand_subword (operands[0], 1, 0, TImode);
+   operands[7] = operand_subword (operands[1], 1, 0, TImode);
+   operands[8] = operand_subword (operands[2], 1, 0, TImode);")
+
+;
+; subdi3 instruction pattern(s).
+;
+
+(define_expand "subdi3"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+          (minus:DI (match_operand:DI 1 "register_operand" "")
+                    (match_operand:DI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*subdi3_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                  (sign_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   sgfr\t%0,%2
+   sgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*subdi3_zero_cc"
+  [(set (reg CC_REGNUM)
+        (compare (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                           (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT")))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d,d")
+        (minus:DI (match_dup 1) (zero_extend:DI (match_dup 2))))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   slgfr\t%0,%2
+   slgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_c_E1,z10_super_E1")])
+
+(define_insn "*subdi3_zero_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                           (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT")))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_ZARCH"
+  "@
+   slgfr\t%0,%2
+   slgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_c_E1,z10_super_E1")])
+
+(define_insn "*subdi3_zero"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0,0")
+                  (zero_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "@
+   slgfr\t%0,%2
+   slgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_super_c_E1,z10_super_E1")])
+
+(define_insn_and_split "*subdi3_31z"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (reg:CCL2 CC_REGNUM)
+          (compare:CCL2 (minus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (minus:SI (match_dup 7) (match_dup 8)))])
+   (parallel
+    [(set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
+                                  (gtu:SI (reg:CCL2 CC_REGNUM) (const_int 0))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);")
+
+(define_insn_and_split "*subdi3_31"
+  [(set (match_operand:DI 0 "register_operand" "=&d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:DI 2 "general_operand" "do") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 3) (minus:SI (match_dup 4) (match_dup 5)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CCL2 CC_REGNUM)
+          (compare:CCL2 (minus:SI (match_dup 7) (match_dup 8))
+                        (match_dup 7)))
+     (set (match_dup 6) (minus:SI (match_dup 7) (match_dup 8)))])
+   (set (pc)
+        (if_then_else (gtu (reg:CCL2 CC_REGNUM) (const_int 0))
+                      (pc)
+                      (label_ref (match_dup 9))))
+   (parallel
+    [(set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (match_dup 9)]
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);
+   operands[9] = gen_label_rtx ();")
+
+;
+; subsi3 instruction pattern(s).
+;
+
+(define_expand "subsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+          (minus:SI (match_operand:SI 1 "register_operand" "")
+                    (match_operand:SI 2 "general_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*subsi3_sign"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,0")
+                  (sign_extend:SI (match_operand:HI 2 "memory_operand" "R,T"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   sh\t%0,%2
+   shy\t%0,%2"
+  [(set_attr "op_type"  "RX,RXY")
+   (set_attr "z196prop" "z196_cracked,z196_cracked")])
+
+;
+; sub(di|si)3 instruction pattern(s).
+;
+
+; sr, s, sy, sgr, sg, srk, sgrk
+(define_insn "*sub<mode>3"
+  [(set (match_operand:GPR 0 "register_operand"           "=d,d,d,d")
+        (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+		   (match_operand:GPR 2 "general_operand"  "d,d,R,T") ) )
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   s<g>r\t%0,%2
+   s<g>rk\t%0,%1,%2
+   s<g>\t%0,%2
+   s<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_borrow_cc"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand"                    "=d,d,d,d")
+        (minus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL2mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_borrow_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (match_dup 1)))
+   (clobber (match_scratch:GPR 0                                   "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL2mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand"                    "=d,d,d,d")
+        (minus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cc2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 1 "register_operand" "0,d,0,0")
+                 (match_operand:GPR 2 "general_operand"  "d,d,R,T")))
+   (set (match_operand:GPR 0 "register_operand"         "=d,d,d,d")
+        (minus:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL3mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                   "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCLmode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+
+; slr, sl, sly, slgr, slg, slrk, slgrk
+(define_insn "*sub<mode>3_cconly2"
+  [(set (reg CC_REGNUM)
+        (compare (match_operand:GPR 1 "register_operand" "0,d,0,0")
+                 (match_operand:GPR 2 "general_operand"  "d,d,R,T")))
+   (clobber (match_scratch:GPR 0                        "=d,d,d,d"))]
+  "s390_match_ccmode (insn, CCL3mode)"
+  "@
+   sl<g>r\t%0,%2
+   sl<g>rk\t%0,%1,%2
+   sl<g>\t%0,%2
+   sl<y>\t%0,%2"
+  [(set_attr "op_type"  "RR<E>,RRF,RX<Y>,RXY")
+   (set_attr "cpu_facility" "*,z196,*,*")
+   (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
+
+
+;
+; sub(tf|df|sf|td|dd)3 instruction pattern(s).
+;
+
+; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+(define_insn "sub<mode>3"
+  [(set (match_operand:FP 0 "register_operand"            "=f,  f")
+        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,0")
+                  (match_operand:FP 2 "general_operand"  "f,<Rf>")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "@
+   s<xde><bt>r\t%0,<op1>%2
+   s<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+(define_insn "*sub<mode>3_cc"
+  [(set (reg CC_REGNUM)
+	(compare (minus:FP (match_operand:FP 1 "nonimmediate_operand" "<f0>,0")
+                           (match_operand:FP 2 "general_operand"      "f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (set (match_operand:FP 0 "register_operand" "=f,f")
+	(minus:FP (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   s<xde><bt>r\t%0,<op1>%2
+   s<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
+(define_insn "*sub<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+	(compare (minus:FP (match_operand:FP 1 "nonimmediate_operand" "<f0>,0")
+			   (match_operand:FP 2 "general_operand"      "f,<Rf>"))
+		 (match_operand:FP 3 "const0_operand" "")))
+   (clobber (match_scratch:FP 0 "=f,f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "@
+   s<xde><bt>r\t%0,<op1>%2
+   s<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Conditional add/subtract instructions.
+;;
+
+;
+; add(di|si)cc instruction pattern(s).
+;
+
+; the following 4 patterns are used when the result of an add with
+; carry is checked for an overflow condition
+
+; op1 + op2 + c < op1
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry1_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z196prop" "z196_alone,z196_alone")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry1_cconly"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 1)))
+   (clobber (match_scratch:GPR 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z196prop" "z196_alone,z196_alone")])
+
+; op1 + op2 + c < op2
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry2_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 2)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_carry2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (match_dup 2)))
+   (clobber (match_scratch:GPR 0 "=d,d"))]
+  "s390_match_ccmode (insn, CCL1mode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                              (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                    (match_operand:GPR 2 "general_operand" "d,RT"))
+          (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; alcr, alc, alcgr, alcg
+(define_insn "*add<mode>3_alc"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
+                            (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
+                  (match_operand:GPR 2 "general_operand" "d,RT")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "@
+   alc<g>r\t%0,%2
+   alc<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")])
+
+; slbr, slb, slbgr, slbg
+(define_insn "*sub<mode>3_slb_cc"
+  [(set (reg CC_REGNUM)
+        (compare
+          (minus:GPR (minus:GPR (match_operand:GPR 1 "nonimmediate_operand" "0,0")
+                                (match_operand:GPR 2 "general_operand" "d,RT"))
+                     (match_operand:GPR 3 "s390_slb_comparison" ""))
+          (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+        (minus:GPR (minus:GPR (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  "s390_match_ccmode (insn, CCLmode) && TARGET_CPU_ZARCH"
+  "@
+   slb<g>r\t%0,%2
+   slb<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")])
+
+; slbr, slb, slbgr, slbg
+(define_insn "*sub<mode>3_slb"
+  [(set (match_operand:GPR 0 "register_operand" "=d,d")
+        (minus:GPR (minus:GPR (match_operand:GPR 1 "nonimmediate_operand" "0,0")
+                              (match_operand:GPR 2 "general_operand" "d,RT"))
+                   (match_operand:GPR 3 "s390_slb_comparison" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "@
+   slb<g>r\t%0,%2
+   slb<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")])
+
+(define_expand "add<mode>cc"
+  [(match_operand:GPR 0 "register_operand" "")
+   (match_operand 1 "comparison_operator" "")
+   (match_operand:GPR 2 "register_operand" "")
+   (match_operand:GPR 3 "const_int_operand" "")]
+  "TARGET_CPU_ZARCH"
+  "if (!s390_expand_addcc (GET_CODE (operands[1]),
+			   XEXP (operands[1], 0), XEXP (operands[1], 1),
+			   operands[0], operands[2],
+			   operands[3])) FAIL; DONE;")
+
+;
+; scond instruction pattern(s).
+;
+
+(define_insn_and_split "*scond<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+        (match_operand:GPR 1 "s390_alc_comparison" ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (parallel
+    [(set (match_dup 0) (plus:GPR (plus:GPR (match_dup 1) (match_dup 0))
+                                  (match_dup 0)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+(define_insn_and_split "*scond<mode>_neg"
+  [(set (match_operand:GPR 0 "register_operand" "=&d")
+        (match_operand:GPR 1 "s390_slb_comparison" ""))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (const_int 0))
+   (parallel
+    [(set (match_dup 0) (minus:GPR (minus:GPR (match_dup 0) (match_dup 0))
+                                   (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (neg:GPR (match_dup 0)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "")
+
+
+(define_expand "cstore<mode>4"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operator:SI 1 "s390_scond_operator"
+  	 [(match_operand:GPR 2 "register_operand" "")
+          (match_operand:GPR 3 "general_operand" "")]))]
+  "TARGET_CPU_ZARCH"
+  "if (!s390_expand_addcc (GET_CODE (operands[1]), operands[2], operands[3],
+			   operands[0], const0_rtx, const1_rtx)) FAIL; DONE;")
+
+(define_expand "cstorecc4"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+	  (match_operator:SI 1 "s390_eqne_operator"
+           [(match_operand:CCZ1 2 "register_operand")
+	    (match_operand 3 "const0_operand")]))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "emit_insn (gen_sne (operands[0], operands[2]));
+   if (GET_CODE (operands[1]) == EQ)
+     emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+   DONE;")
+
+(define_insn_and_split "sne"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(ne:SI (match_operand:CCZ1 1 "register_operand" "0")
+	       (const_int 0)))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 28)))
+     (clobber (reg:CC CC_REGNUM))])])
+
+
+;;
+;; - Conditional move instructions (introduced with z196)
+;;
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	(if_then_else:GPR (match_operand 1 "comparison_operator" "")
+			  (match_operand:GPR 2 "nonimmediate_operand" "")
+			  (match_operand:GPR 3 "nonimmediate_operand" "")))]
+  "TARGET_Z196"
+  "operands[1] = s390_emit_compare (GET_CODE (operands[1]),
+                                    XEXP (operands[1], 0), XEXP (operands[1], 1));")
+
+; locr, loc, stoc, locgr, locg, stocg
+(define_insn_and_split "*mov<mode>cc"
+  [(set (match_operand:GPR 0 "nonimmediate_operand"   "=d,d, d, d,QS,QS,&d")
+	(if_then_else:GPR
+	  (match_operator 1 "s390_comparison"
+	    [(match_operand 2 "cc_reg_operand"        " c,c, c, c, c, c, c")
+	     (const_int 0)])
+	  (match_operand:GPR 3 "nonimmediate_operand" " d,0,QS, 0, d, 0,QS")
+	  (match_operand:GPR 4 "nonimmediate_operand" " 0,d, 0,QS, 0, d,QS")))]
+  "TARGET_Z196"
+  "@
+   loc<g>r%C1\t%0,%3
+   loc<g>r%D1\t%0,%4
+   loc<g>%C1\t%0,%3
+   loc<g>%D1\t%0,%4
+   stoc<g>%C1\t%3,%0
+   stoc<g>%D1\t%4,%0
+   #"
+  "&& reload_completed
+   && MEM_P (operands[3]) && MEM_P (operands[4])"
+  [(set (match_dup 0)
+	(if_then_else:GPR
+	 (match_op_dup 1 [(match_dup 2) (const_int 0)])
+	 (match_dup 3)
+	 (match_dup 0)))
+   (set (match_dup 0)
+	(if_then_else:GPR
+	 (match_op_dup 1 [(match_dup 2) (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 4)))]
+  ""
+  [(set_attr "op_type" "RRF,RRF,RSY,RSY,RSY,RSY,*")])
+
+;;
+;;- Multiply instructions.
+;;
+
+;
+; muldi3 instruction pattern(s).
+;
+
+(define_insn "*muldi3_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (mult:DI (sign_extend:DI (match_operand:SI 2 "general_operand" "d,RT"))
+                 (match_operand:DI 1 "register_operand" "0,0")))]
+  "TARGET_ZARCH"
+  "@
+   msgfr\t%0,%2
+   msgf\t%0,%2"
+  [(set_attr "op_type"      "RRE,RXY")
+   (set_attr "type"         "imuldi")])
+
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d,d")
+        (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,0")
+                 (match_operand:DI 2 "general_operand" "d,K,RT,Os")))]
+  "TARGET_ZARCH"
+  "@
+   msgr\t%0,%2
+   mghi\t%0,%h2
+   msg\t%0,%2
+   msgfi\t%0,%2"
+  [(set_attr "op_type"      "RRE,RI,RXY,RIL")
+   (set_attr "type"         "imuldi")
+   (set_attr "cpu_facility" "*,*,*,z10")])
+
+;
+; mulsi3 instruction pattern(s).
+;
+
+(define_insn "*mulsi3_sign"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (mult:SI (sign_extend:SI (match_operand:HI 2 "memory_operand" "R,T"))
+                 (match_operand:SI 1 "register_operand" "0,0")))]
+  ""
+  "@
+   mh\t%0,%2
+   mhy\t%0,%2"
+  [(set_attr "op_type"      "RX,RXY")
+   (set_attr "type"         "imulhi")
+   (set_attr "cpu_facility" "*,z10")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d,d")
+        (mult:SI  (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0,0")
+                  (match_operand:SI 2 "general_operand" "d,K,R,T,Os")))]
+  ""
+  "@
+   msr\t%0,%2
+   mhi\t%0,%h2
+   ms\t%0,%2
+   msy\t%0,%2
+   msfi\t%0,%2"
+  [(set_attr "op_type"      "RRE,RI,RX,RXY,RIL")
+   (set_attr "type"         "imulsi,imulhi,imulsi,imulsi,imulsi")
+   (set_attr "cpu_facility" "*,*,*,*,z10")])
+
+;
+; mulsidi3 instruction pattern(s).
+;
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
+        (mult:DI (sign_extend:DI
+	           (match_operand:SI 1 "register_operand" "%0,0,0"))
+                 (sign_extend:DI
+	           (match_operand:SI 2 "nonimmediate_operand" "d,R,T"))))]
+  "!TARGET_ZARCH"
+  "@
+   mr\t%0,%2
+   m\t%0,%2
+   mfy\t%0,%2"
+  [(set_attr "op_type"      "RR,RX,RXY")
+   (set_attr "type"         "imulsi")
+   (set_attr "cpu_facility" "*,*,z10")])
+
+;
+; umulsidi3 instruction pattern(s).
+;
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (mult:DI (zero_extend:DI
+	           (match_operand:SI 1 "register_operand" "%0,0"))
+                 (zero_extend:DI
+	           (match_operand:SI 2 "nonimmediate_operand" "d,RT"))))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "@
+   mlr\t%0,%2
+   ml\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "imulsi")])
+
+;
+; mul(tf|df|sf|td|dd)3 instruction pattern(s).
+;
+
+; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr
+(define_insn "mul<mode>3"
+  [(set (match_operand:FP 0 "register_operand"              "=f,f")
+        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
+                 (match_operand:FP 2 "general_operand"      "f,<Rf>")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   m<xdee><bt>r\t%0,<op1>%2
+   m<xdee>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fmul<mode>")])
+
+; madbr, maebr, maxb, madb, maeb
+(define_insn "fma<mode>4"
+  [(set (match_operand:DSF 0 "register_operand" "=f,f")
+	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
+		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
+		 (match_operand:DSF 3 "register_operand" "0,0")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   ma<xde>br\t%0,%1,%2
+   ma<xde>b\t%0,%1,%2"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "type"     "fmadd<mode>")])
+
+; msxbr, msdbr, msebr, msxb, msdb, mseb
+(define_insn "fms<mode>4"
+  [(set (match_operand:DSF 0 "register_operand" "=f,f")
+	(fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
+		 (match_operand:DSF 2 "nonimmediate_operand" "f,R")
+		 (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))]
+  "TARGET_HARD_FLOAT"
+  "@
+   ms<xde>br\t%0,%1,%2
+   ms<xde>b\t%0,%1,%2"
+  [(set_attr "op_type"  "RRE,RXE")
+   (set_attr "type"     "fmadd<mode>")])
+
+;;
+;;- Divide and modulo instructions.
+;;
+
+;
+; divmoddi4 instruction pattern(s).
+;
+
+(define_expand "divmoddi4"
+  [(parallel [(set (match_operand:DI 0 "general_operand" "")
+		   (div:DI (match_operand:DI 1 "register_operand" "")
+			   (match_operand:DI 2 "general_operand" "")))
+	      (set (match_operand:DI 3 "general_operand" "")
+		   (mod:DI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "TARGET_ZARCH"
+{
+  rtx insn, div_equal, mod_equal;
+
+  div_equal = gen_rtx_DIV (DImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_MOD (DImode, operands[1], operands[2]);
+
+  operands[4] = gen_reg_rtx(TImode);
+  emit_insn (gen_divmodtidi3 (operands[4], operands[1], operands[2]));
+
+  insn = emit_move_insn (operands[0], gen_lowpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "divmodtidi3"
+  [(set (match_operand:TI 0 "register_operand" "=d,d")
+        (ior:TI
+          (ashift:TI
+            (zero_extend:TI
+              (mod:DI (match_operand:DI 1 "register_operand" "0,0")
+                      (match_operand:DI 2 "general_operand" "d,RT")))
+            (const_int 64))
+          (zero_extend:TI (div:DI (match_dup 1) (match_dup 2)))))]
+  "TARGET_ZARCH"
+  "@
+   dsgr\t%0,%2
+   dsg\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+(define_insn "divmodtisi3"
+  [(set (match_operand:TI 0 "register_operand" "=d,d")
+        (ior:TI
+          (ashift:TI
+            (zero_extend:TI
+              (mod:DI (match_operand:DI 1 "register_operand" "0,0")
+                      (sign_extend:DI
+                        (match_operand:SI 2 "nonimmediate_operand" "d,RT"))))
+            (const_int 64))
+          (zero_extend:TI
+            (div:DI (match_dup 1) (sign_extend:DI (match_dup 2))))))]
+  "TARGET_ZARCH"
+  "@
+   dsgfr\t%0,%2
+   dsgf\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+;
+; udivmoddi4 instruction pattern(s).
+;
+
+(define_expand "udivmoddi4"
+  [(parallel [(set (match_operand:DI 0 "general_operand" "")
+		   (udiv:DI (match_operand:DI 1 "general_operand" "")
+			    (match_operand:DI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:DI 3 "general_operand" "")
+		   (umod:DI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "TARGET_ZARCH"
+{
+  rtx insn, div_equal, mod_equal, equal;
+
+  div_equal = gen_rtx_UDIV (DImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_UMOD (DImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (TImode,
+		       gen_rtx_ASHIFT (TImode,
+				       gen_rtx_ZERO_EXTEND (TImode, mod_equal),
+				       GEN_INT (64)),
+		       gen_rtx_ZERO_EXTEND (TImode, div_equal));
+
+  operands[4] = gen_reg_rtx(TImode);
+  emit_clobber (operands[4]);
+  emit_move_insn (gen_lowpart (DImode, operands[4]), operands[1]);
+  emit_move_insn (gen_highpart (DImode, operands[4]), const0_rtx);
+
+  insn = emit_insn (gen_udivmodtidi3 (operands[4], operands[4], operands[2]));
+  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+  insn = emit_move_insn (operands[0], gen_lowpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (DImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "udivmodtidi3"
+  [(set (match_operand:TI 0 "register_operand" "=d,d")
+        (ior:TI
+          (ashift:TI
+            (zero_extend:TI
+              (truncate:DI
+                (umod:TI (match_operand:TI 1 "register_operand" "0,0")
+                         (zero_extend:TI
+                           (match_operand:DI 2 "nonimmediate_operand" "d,RT")))))
+            (const_int 64))
+          (zero_extend:TI
+            (truncate:DI
+              (udiv:TI (match_dup 1) (zero_extend:TI (match_dup 2)))))))]
+  "TARGET_ZARCH"
+  "@
+   dlgr\t%0,%2
+   dlg\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+;
+; divmodsi4 instruction pattern(s).
+;
+
+(define_expand "divmodsi4"
+  [(parallel [(set (match_operand:SI 0 "general_operand" "")
+		   (div:SI (match_operand:SI 1 "general_operand" "")
+			   (match_operand:SI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SI 3 "general_operand" "")
+		   (mod:SI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "!TARGET_ZARCH"
+{
+  rtx insn, div_equal, mod_equal, equal;
+
+  div_equal = gen_rtx_DIV (SImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_MOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, mod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, div_equal));
+
+  operands[4] = gen_reg_rtx(DImode);
+  emit_insn (gen_extendsidi2 (operands[4], operands[1]));
+
+  insn = emit_insn (gen_divmoddisi3 (operands[4], operands[4], operands[2]));
+  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+  insn = emit_move_insn (operands[0], gen_lowpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "divmoddisi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (ior:DI
+          (ashift:DI
+            (zero_extend:DI
+              (truncate:SI
+                (mod:DI (match_operand:DI 1 "register_operand" "0,0")
+                        (sign_extend:DI
+                          (match_operand:SI 2 "nonimmediate_operand" "d,R")))))
+            (const_int 32))
+          (zero_extend:DI
+            (truncate:SI
+              (div:DI (match_dup 1) (sign_extend:DI (match_dup 2)))))))]
+  "!TARGET_ZARCH"
+  "@
+   dr\t%0,%2
+   d\t%0,%2"
+  [(set_attr "op_type"  "RR,RX")
+   (set_attr "type"     "idiv")])
+
+;
+; udivsi3 and umodsi3 instruction pattern(s).
+;
+
+(define_expand "udivmodsi4"
+  [(parallel [(set (match_operand:SI 0 "general_operand" "")
+		   (udiv:SI (match_operand:SI 1 "general_operand" "")
+			    (match_operand:SI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:SI 3 "general_operand" "")
+		   (umod:SI (match_dup 1) (match_dup 2)))])
+   (clobber (match_dup 4))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+{
+  rtx insn, div_equal, mod_equal, equal;
+
+  div_equal = gen_rtx_UDIV (SImode, operands[1], operands[2]);
+  mod_equal = gen_rtx_UMOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, mod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, div_equal));
+
+  operands[4] = gen_reg_rtx(DImode);
+  emit_clobber (operands[4]);
+  emit_move_insn (gen_lowpart (SImode, operands[4]), operands[1]);
+  emit_move_insn (gen_highpart (SImode, operands[4]), const0_rtx);
+
+  insn = emit_insn (gen_udivmoddisi3 (operands[4], operands[4], operands[2]));
+  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+  insn = emit_move_insn (operands[0], gen_lowpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, div_equal);
+
+  insn = emit_move_insn (operands[3], gen_highpart (SImode, operands[4]));
+  set_unique_reg_note (insn, REG_EQUAL, mod_equal);
+
+  DONE;
+})
+
+(define_insn "udivmoddisi3"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+        (ior:DI
+          (ashift:DI
+            (zero_extend:DI
+              (truncate:SI
+                (umod:DI (match_operand:DI 1 "register_operand" "0,0")
+                         (zero_extend:DI
+                           (match_operand:SI 2 "nonimmediate_operand" "d,RT")))))
+            (const_int 32))
+          (zero_extend:DI
+            (truncate:SI
+              (udiv:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+  "@
+   dlr\t%0,%2
+   dl\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "type"     "idiv")])
+
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (udiv:SI (match_operand:SI 1 "general_operand" "")
+                 (match_operand:SI 2 "general_operand" "")))
+   (clobber (match_dup 3))]
+  "!TARGET_ZARCH && !TARGET_CPU_ZARCH"
+{
+  rtx insn, udiv_equal, umod_equal, equal;
+
+  udiv_equal = gen_rtx_UDIV (SImode, operands[1], operands[2]);
+  umod_equal = gen_rtx_UMOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, umod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, udiv_equal));
+
+  operands[3] = gen_reg_rtx (DImode);
+
+  if (CONSTANT_P (operands[2]))
+    {
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0)
+        {
+          rtx label1 = gen_label_rtx ();
+
+	  operands[1] = make_safe_from (operands[1], operands[0]);
+          emit_move_insn (operands[0], const0_rtx);
+	  emit_cmp_and_jump_insns (operands[1], operands[2], LT, NULL_RTX,
+				   SImode, 1, label1);
+          emit_move_insn (operands[0], const1_rtx);
+          emit_label (label1);
+        }
+      else
+        {
+          operands[2] = force_reg (SImode, operands[2]);
+          operands[2] = make_safe_from (operands[2], operands[0]);
+
+	  emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+	  insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					     operands[2]));
+  	  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+	  insn = emit_move_insn (operands[0],
+				 gen_lowpart (SImode, operands[3]));
+  	  set_unique_reg_note (insn, REG_EQUAL, udiv_equal);
+        }
+    }
+  else
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx label3 = gen_label_rtx ();
+
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[1] = make_safe_from (operands[1], operands[0]);
+      operands[2] = force_reg (SImode, operands[2]);
+      operands[2] = make_safe_from (operands[2], operands[0]);
+
+      emit_move_insn (operands[0], const0_rtx);
+      emit_cmp_and_jump_insns (operands[2], operands[1], GT, NULL_RTX,
+			       SImode, 1, label3);
+      emit_cmp_and_jump_insns (operands[2], const0_rtx, LT, NULL_RTX,
+			       SImode, 0, label2);
+      emit_cmp_and_jump_insns (operands[2], const1_rtx, EQ, NULL_RTX,
+			       SImode, 0, label1);
+      emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+      insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					 operands[2]));
+      set_unique_reg_note (insn, REG_EQUAL, equal);
+
+      insn = emit_move_insn (operands[0],
+			     gen_lowpart (SImode, operands[3]));
+      set_unique_reg_note (insn, REG_EQUAL, udiv_equal);
+
+      emit_jump (label3);
+      emit_label (label1);
+      emit_move_insn (operands[0], operands[1]);
+      emit_jump (label3);
+      emit_label (label2);
+      emit_move_insn (operands[0], const1_rtx);
+      emit_label (label3);
+    }
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+})
+
+(define_expand "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (umod:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                 (match_operand:SI 2 "nonimmediate_operand" "")))
+   (clobber (match_dup 3))]
+  "!TARGET_ZARCH && !TARGET_CPU_ZARCH"
+{
+  rtx insn, udiv_equal, umod_equal, equal;
+
+  udiv_equal = gen_rtx_UDIV (SImode, operands[1], operands[2]);
+  umod_equal = gen_rtx_UMOD (SImode, operands[1], operands[2]);
+  equal = gen_rtx_IOR (DImode,
+		       gen_rtx_ASHIFT (DImode,
+				       gen_rtx_ZERO_EXTEND (DImode, umod_equal),
+				       GEN_INT (32)),
+		       gen_rtx_ZERO_EXTEND (DImode, udiv_equal));
+
+  operands[3] = gen_reg_rtx (DImode);
+
+  if (CONSTANT_P (operands[2]))
+    {
+      if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) <= 0)
+        {
+          rtx label1 = gen_label_rtx ();
+
+          operands[1] = make_safe_from (operands[1], operands[0]);
+	  emit_move_insn (operands[0], operands[1]);
+          emit_cmp_and_jump_insns (operands[0], operands[2], LT, NULL_RTX,
+			           SImode, 1, label1);
+	  emit_insn (gen_abssi2 (operands[0], operands[2]));
+          emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+          emit_label (label1);
+        }
+      else
+        {
+          operands[2] = force_reg (SImode, operands[2]);
+          operands[2] = make_safe_from (operands[2], operands[0]);
+
+	  emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+	  insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					     operands[2]));
+	  set_unique_reg_note (insn, REG_EQUAL, equal);
+
+	  insn = emit_move_insn (operands[0],
+				 gen_highpart (SImode, operands[3]));
+	  set_unique_reg_note (insn, REG_EQUAL, umod_equal);
+        }
+    }
+  else
+    {
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx label3 = gen_label_rtx ();
+
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[1] = make_safe_from (operands[1], operands[0]);
+      operands[2] = force_reg (SImode, operands[2]);
+      operands[2] = make_safe_from (operands[2], operands[0]);
+
+      emit_move_insn(operands[0], operands[1]);
+      emit_cmp_and_jump_insns (operands[2], operands[1], GT, NULL_RTX,
+			       SImode, 1, label3);
+      emit_cmp_and_jump_insns (operands[2], const0_rtx, LT, NULL_RTX,
+			       SImode, 0, label2);
+      emit_cmp_and_jump_insns (operands[2], const1_rtx, EQ, NULL_RTX,
+			       SImode, 0, label1);
+      emit_insn (gen_zero_extendsidi2 (operands[3], operands[1]));
+      insn = emit_insn (gen_divmoddisi3 (operands[3], operands[3],
+					 operands[2]));
+      set_unique_reg_note (insn, REG_EQUAL, equal);
+
+      insn = emit_move_insn (operands[0],
+			     gen_highpart (SImode, operands[3]));
+      set_unique_reg_note (insn, REG_EQUAL, umod_equal);
+
+      emit_jump (label3);
+      emit_label (label1);
+      emit_move_insn (operands[0], const0_rtx);
+      emit_jump (label3);
+      emit_label (label2);
+      emit_insn (gen_subsi3 (operands[0], operands[0], operands[2]));
+      emit_label (label3);
+    }
+  DONE;
+})
+
+;
+; div(df|sf)3 instruction pattern(s).
+;
+
+; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr
+(define_insn "div<mode>3"
+  [(set (match_operand:FP 0 "register_operand"          "=f,f")
+        (div:FP (match_operand:FP 1 "register_operand" "<f0>,0")
+                 (match_operand:FP 2 "general_operand"  "f,<Rf>")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   d<xde><bt>r\t%0,<op1>%2
+   d<xde>b\t%0,%2"
+  [(set_attr "op_type"  "<RRer>,RXE")
+   (set_attr "type"     "fdiv<mode>")])
+
+
+;;
+;;- And instructions.
+;;
+
+(define_expand "and<mode>3"
+  [(set (match_operand:INT 0 "nonimmediate_operand" "")
+        (and:INT (match_operand:INT 1 "nonimmediate_operand" "")
+                 (match_operand:INT 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s390_expand_logical_operator (AND, <MODE>mode, operands); DONE;")
+
+;
+; anddi3 instruction pattern(s).
+;
+
+(define_insn "*anddi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand"                      "=d,d, d")
+        (and:DI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   ngr\t%0,%2
+   ngrk\t%0,%1,%2
+   ng\t%0,%2"
+  [(set_attr "op_type"  "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*anddi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0                                     "=d,d, d"))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH
+   /* Do not steal TM patterns.  */
+   && s390_single_part (operands[2], DImode, HImode, 0) < 0"
+  "@
+   ngr\t%0,%2
+   ngrk\t%0,%1,%2
+   ng\t%0,%2"
+  [(set_attr "op_type"  "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*anddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                            "=d,d,    d,    d,    d,    d,    d,    d,d,d, d,   AQ,Q")
+        (and:DI (match_operand:DI 1 "nonimmediate_operand"
+                            "%d,o,    0,    0,    0,    0,    0,    0,0,d, 0,    0,0")
+                (match_operand:DI 2 "general_operand"
+                            "M, M,N0HDF,N1HDF,N2HDF,N3HDF,N0SDF,N1SDF,d,d,RT,NxQDF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   #
+   #
+   nihh\t%0,%j2
+   nihl\t%0,%j2
+   nilh\t%0,%j2
+   nill\t%0,%j2
+   nihf\t%0,%m2
+   nilf\t%0,%m2
+   ngr\t%0,%2
+   ngrk\t%0,%1,%2
+   ng\t%0,%2
+   #
+   #"
+  [(set_attr "op_type" "RRE,RXE,RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,*,*,extimm,extimm,*,z196,*,*,*")
+   (set_attr "z10prop" "*,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_split
+  [(set (match_operand:DI 0 "s_operand" "")
+        (and:DI (match_dup 0) (match_operand:DI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+
+
+;
+; andsi3 instruction pattern(s).
+;
+
+(define_insn "*andsi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"                      "=d,d,d,d,d")
+        (and:SI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   nilf\t%0,%o2
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   n\t%0,%2
+   ny\t%0,%2"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*andsi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:SI 0                                     "=d,d,d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode)
+   /* Do not steal TM patterns.  */
+   && s390_single_part (operands[2], SImode, HImode, 0) < 0"
+  "@
+   nilf\t%0,%o2
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   n\t%0,%2
+   ny\t%0,%2"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1")])
+
+(define_insn "*andsi3_zarch"
+  [(set (match_operand:SI 0 "nonimmediate_operand"
+                            "=d,d,    d,    d, d,d,d,d,d,   AQ,Q")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand"
+			    "%d,o,    0,    0, 0,0,d,0,0,    0,0")
+                (match_operand:SI 2 "general_operand"
+			    " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxQSF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   #
+   #
+   nilh\t%0,%j2
+   nill\t%0,%j2
+   nilf\t%0,%o2
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   n\t%0,%2
+   ny\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RRE,RXE,RI,RI,RIL,RR,RRF,RX,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,*,*,z196,*,*,*,*")
+   (set_attr "z10prop" "*,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_insn "*andsi3_esa"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=d,d,   AQ,Q")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,    0,0")
+                (match_operand:SI 2 "general_operand"      " d,R,NxQSF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   n\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RX,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,*")])
+
+
+(define_split
+  [(set (match_operand:SI 0 "s_operand" "")
+        (and:SI (match_dup 0) (match_operand:SI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+
+;
+; andhi3 instruction pattern(s).
+;
+
+(define_insn "*andhi3_zarch"
+  [(set (match_operand:HI 0 "nonimmediate_operand"         "=d,d,d,   AQ,Q")
+        (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,d,0,    0,0")
+                (match_operand:HI 2 "general_operand"      " d,d,n,NxQHF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   nill\t%0,%x2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RRF,RI,SI,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*,*")
+])
+
+(define_insn "*andhi3_esa"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,AQ,Q")
+        (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:HI 2 "general_operand" "d,NxQHF,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,*,*")
+])
+
+(define_split
+  [(set (match_operand:HI 0 "s_operand" "")
+        (and:HI (match_dup 0) (match_operand:HI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+
+;
+; andqi3 instruction pattern(s).
+;
+
+(define_insn "*andqi3_zarch"
+  [(set (match_operand:QI 0 "nonimmediate_operand"         "=d,d,d,Q,S,Q")
+        (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,d,0,0,0,0")
+                (match_operand:QI 2 "general_operand"      " d,d,n,n,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   nrk\t%0,%1,%2
+   nill\t%0,%b2
+   ni\t%S0,%b2
+   niy\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RR,RRF,RI,SI,SIY,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super,z10_super,*")])
+
+(define_insn "*andqi3_esa"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,Q,Q")
+        (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:QI 2 "general_operand" "d,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   nr\t%0,%2
+   ni\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super,*")])
+
+;
+; Block and (NC) patterns.
+;
+
+(define_insn "*nc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (and:BLK (match_dup 0)
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "nc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_split
+  [(set (match_operand 0 "memory_operand" "")
+        (and (match_dup 0)
+             (match_operand 1 "memory_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (and:BLK (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (and:BLK (match_dup 0)
+                   (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (and:BLK (match_dup 3)
+                   (match_operand:BLK 4 "memory_operand" "")))
+     (use (match_operand 5 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (and:BLK (match_dup 6) (match_dup 7)))
+     (use (match_dup 8))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+
+;;
+;;- Bit set (inclusive or) instructions.
+;;
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:INT 0 "nonimmediate_operand" "")
+        (ior:INT (match_operand:INT 1 "nonimmediate_operand" "")
+                 (match_operand:INT 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s390_expand_logical_operator (IOR, <MODE>mode, operands); DONE;")
+
+;
+; iordi3 instruction pattern(s).
+;
+
+(define_insn "*iordi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand"                      "=d,d, d")
+        (ior:DI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   ogr\t%0,%2
+   ogrk\t%0,%1,%2
+   og\t%0,%2"
+  [(set_attr "op_type"  "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*iordi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d,0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0                                     "=d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   ogr\t%0,%2
+   ogrk\t%0,%1,%2
+   og\t%0,%2"
+  [(set_attr "op_type"  "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*iordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                               "=d,    d,    d,    d,    d,    d,d,d, d,   AQ,Q")
+        (ior:DI (match_operand:DI 1 "nonimmediate_operand"
+                            "   %0,    0,    0,    0,    0,    0,0,d, 0,    0,0")
+                (match_operand:DI 2 "general_operand"
+                            "N0HD0,N1HD0,N2HD0,N3HD0,N0SD0,N1SD0,d,d,RT,NxQD0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   oihh\t%0,%i2
+   oihl\t%0,%i2
+   oilh\t%0,%i2
+   oill\t%0,%i2
+   oihf\t%0,%k2
+   oilf\t%0,%k2
+   ogr\t%0,%2
+   ogrk\t%0,%1,%2
+   og\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,extimm,extimm,*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_split
+  [(set (match_operand:DI 0 "s_operand" "")
+        (ior:DI (match_dup 0) (match_operand:DI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ior:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (IOR, &operands[0], &operands[1]);")
+
+;
+; iorsi3 instruction pattern(s).
+;
+
+(define_insn "*iorsi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"                      "=d,d,d,d,d")
+        (ior:SI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   oilf\t%0,%o2
+   or\t%0,%2
+   ork\t%0,%1,%2
+   o\t%0,%2
+   oy\t%0,%2"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*iorsi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:SI 0                                     "=d,d,d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   oilf\t%0,%o2
+   or\t%0,%2
+   ork\t%0,%1,%2
+   o\t%0,%2
+   oy\t%0,%2"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")])
+
+(define_insn "*iorsi3_zarch"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=d,    d, d,d,d,d,d,   AQ,Q")
+        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,    0, 0,0,d,0,0,    0,0")
+                (match_operand:SI 2 "general_operand"   "N0HS0,N1HS0,Os,d,d,R,T,NxQS0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   oilh\t%0,%i2
+   oill\t%0,%i2
+   oilf\t%0,%o2
+   or\t%0,%2
+   ork\t%0,%1,%2
+   o\t%0,%2
+   oy\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RI,RI,RIL,RR,RRF,RX,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,*,*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        z10_super_E1,
+                        z10_super_E1,
+                        *,
+                        *")])
+
+(define_insn "*iorsi3_esa"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,AQ,Q")
+        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0")
+                (match_operand:SI 2 "general_operand" "d,R,NxQS0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   o\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RX,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "s_operand" "")
+        (ior:SI (match_dup 0) (match_operand:SI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ior:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (IOR, &operands[0], &operands[1]);")
+
+;
+; iorhi3 instruction pattern(s).
+;
+
+(define_insn "*iorhi3_zarch"
+  [(set (match_operand:HI 0 "nonimmediate_operand"         "=d,d,d,   AQ,Q")
+        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,d,0,    0,0")
+                (match_operand:HI 2 "general_operand"      " d,d,n,NxQH0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   ork\t%0,%1,%2
+   oill\t%0,%x2
+   #
+   #"
+  [(set_attr "op_type"  "RR,RRF,RI,SI,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,*,*")])
+
+(define_insn "*iorhi3_esa"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,AQ,Q")
+        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:HI 2 "general_operand" "d,NxQH0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:HI 0 "s_operand" "")
+        (ior:HI (match_dup 0) (match_operand:HI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (ior:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (IOR, &operands[0], &operands[1]);")
+
+;
+; iorqi3 instruction pattern(s).
+;
+
+(define_insn "*iorqi3_zarch"
+  [(set (match_operand:QI 0 "nonimmediate_operand"         "=d,d,d,Q,S,Q")
+        (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,d,0,0,0,0")
+                (match_operand:QI 2 "general_operand"      " d,d,n,n,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   ork\t%0,%1,%2
+   oill\t%0,%b2
+   oi\t%S0,%b2
+   oiy\t%S0,%b2
+   #"
+  [(set_attr "op_type" "RR,RRF,RI,SI,SIY,SS")
+   (set_attr "cpu_facility" "*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,
+                        z10_super,z10_super,*")])
+
+(define_insn "*iorqi3_esa"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,Q,Q")
+        (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+                (match_operand:QI 2 "general_operand" "d,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   or\t%0,%2
+   oi\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RR,SI,SS")
+   (set_attr "z10prop" "z10_super_E1,z10_super,*")])
+
+;
+; Block inclusive or (OC) patterns.
+;
+
+(define_insn "*oc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (ior:BLK (match_dup 0)
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "oc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_split
+  [(set (match_operand 0 "memory_operand" "")
+        (ior (match_dup 0)
+             (match_operand 1 "memory_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (ior:BLK (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (ior:BLK (match_dup 0)
+                   (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (ior:BLK (match_dup 3)
+                   (match_operand:BLK 4 "memory_operand" "")))
+     (use (match_operand 5 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (ior:BLK (match_dup 6) (match_dup 7)))
+     (use (match_dup 8))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+
+;;
+;;- Xor instructions.
+;;
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:INT 0 "nonimmediate_operand" "")
+        (xor:INT (match_operand:INT 1 "nonimmediate_operand" "")
+                 (match_operand:INT 2 "general_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "s390_expand_logical_operator (XOR, <MODE>mode, operands); DONE;")
+
+;
+; xordi3 instruction pattern(s).
+;
+
+(define_insn "*xordi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand"                      "=d,d, d")
+        (xor:DI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   xgr\t%0,%2
+   xgrk\t%0,%1,%2
+   xg\t%0,%2"
+  [(set_attr "op_type" "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*xordi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
+                         (match_operand:DI 2 "general_operand"      " d,d,RT"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0                                     "=d,d, d"))]
+  "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+  "@
+   xgr\t%0,%2
+   xgrk\t%0,%1,%2
+   xg\t%0,%2"
+  [(set_attr "op_type" "RRE,RRF,RXY")
+   (set_attr "cpu_facility" "*,z196,*")
+   (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+
+(define_insn "*xordi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand"         "=d,    d,d,d, d,   AQ,Q")
+        (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,    0,0,d, 0,    0,0")
+                (match_operand:DI 2 "general_operand"   "N0SD0,N1SD0,d,d,RT,NxQD0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
+  "@
+   xihf\t%0,%k2
+   xilf\t%0,%k2
+   xgr\t%0,%2
+   xgrk\t%0,%1,%2
+   xg\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RIL,RIL,RRE,RRF,RXY,SI,SS")
+   (set_attr "cpu_facility" "extimm,extimm,*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,z10_super_E1,
+                        *,z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:DI 0 "s_operand" "")
+        (xor:DI (match_dup 0) (match_operand:DI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (XOR, &operands[0], &operands[1]);")
+
+;
+; xorsi3 instruction pattern(s).
+;
+
+(define_insn "*xorsi3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"                      "=d,d,d,d,d")
+        (xor:SI (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   xilf\t%0,%o2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   x\t%0,%2
+   xy\t%0,%2"
+  [(set_attr "op_type" "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1")])
+
+(define_insn "*xorsi3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
+                         (match_operand:SI 2 "general_operand"      "Os,d,d,R,T"))
+                 (const_int 0)))
+   (clobber (match_scratch:SI 0                                     "=d,d,d,d,d"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   xilf\t%0,%o2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   x\t%0,%2
+   xy\t%0,%2"
+  [(set_attr "op_type" "RIL,RR,RRF,RX,RXY")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1")])
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand"         "=d,d,d,d,d,   AQ,Q")
+        (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0,    0,0")
+                (match_operand:SI 2 "general_operand"      "Os,d,d,R,T,NxQS0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "s390_logical_operator_ok_p (operands)"
+  "@
+   xilf\t%0,%o2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   x\t%0,%2
+   xy\t%0,%2
+   #
+   #"
+  [(set_attr "op_type"  "RIL,RR,RRF,RX,RXY,SI,SS")
+   (set_attr "cpu_facility" "*,*,z196,*,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+                        z10_super_E1,z10_super_E1,*,*")])
+
+(define_split
+  [(set (match_operand:SI 0 "s_operand" "")
+        (xor:SI (match_dup 0) (match_operand:SI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (XOR, &operands[0], &operands[1]);")
+
+;
+; xorhi3 instruction pattern(s).
+;
+
+(define_insn "*xorhi3"
+  [(set (match_operand:HI 0 "nonimmediate_operand"         "=d,d,d,   AQ,Q")
+        (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,d,    0,0")
+                (match_operand:HI 2 "general_operand"      "Os,d,d,NxQH0,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "s390_logical_operator_ok_p (operands)"
+  "@
+   xilf\t%0,%x2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   #
+   #"
+  [(set_attr "op_type"  "RIL,RR,RRF,SI,SS")
+   (set_attr "cpu_facility" "*,*,z196,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,*,*")])
+
+(define_split
+  [(set (match_operand:HI 0 "s_operand" "")
+        (xor:HI (match_dup 0) (match_operand:HI 1 "immediate_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed"
+  [(parallel
+    [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_narrow_logical_operator (XOR, &operands[0], &operands[1]);")
+
+;
+; xorqi3 instruction pattern(s).
+;
+
+(define_insn "*xorqi3"
+  [(set (match_operand:QI 0 "nonimmediate_operand"         "=d,d,d,Q,S,Q")
+        (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,d,0,0,0")
+                (match_operand:QI 2 "general_operand"      "Os,d,d,n,n,Q")))
+   (clobber (reg:CC CC_REGNUM))]
+  "s390_logical_operator_ok_p (operands)"
+  "@
+   xilf\t%0,%b2
+   xr\t%0,%2
+   xrk\t%0,%1,%2
+   xi\t%S0,%b2
+   xiy\t%S0,%b2
+   #"
+  [(set_attr "op_type"  "RIL,RR,RRF,SI,SIY,SS")
+   (set_attr "cpu_facility" "*,*,z196,*,*,*")
+   (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super,z10_super,*")])
+
+
+;
+; Block exclusive or (XC) patterns.
+;
+
+(define_insn "*xc"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (xor:BLK (match_dup 0)
+                 (match_operand:BLK 1 "memory_operand" "Q")))
+   (use (match_operand 2 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 256"
+  "xc\t%O0(%2,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+(define_split
+  [(set (match_operand 0 "memory_operand" "")
+        (xor (match_dup 0)
+             (match_operand 1 "memory_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && GET_MODE_SIZE (GET_MODE (operands[0])) > 0"
+  [(parallel
+    [(set (match_dup 0) (xor:BLK (match_dup 0) (match_dup 1)))
+     (use (match_dup 2))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[2] = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[0])));
+  operands[0] = adjust_address (operands[0], BLKmode, 0);
+  operands[1] = adjust_address (operands[1], BLKmode, 0);
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (xor:BLK (match_dup 0)
+                   (match_operand:BLK 1 "memory_operand" "")))
+     (use (match_operand 2 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 3 "memory_operand" "")
+          (xor:BLK (match_dup 3)
+                   (match_operand:BLK 4 "memory_operand" "")))
+     (use (match_operand 5 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[3], operands[2])
+   && s390_offset_p (operands[1], operands[4], operands[2])
+   && !s390_overlap_p (operands[0], operands[1],
+                       INTVAL (operands[2]) + INTVAL (operands[5]))
+   && INTVAL (operands[2]) + INTVAL (operands[5]) <= 256"
+  [(parallel
+    [(set (match_dup 6) (xor:BLK (match_dup 6) (match_dup 7)))
+     (use (match_dup 8))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[6] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[7] = gen_rtx_MEM (BLKmode, XEXP (operands[1], 0));
+   operands[8] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[5]));")
+
+;
+; Block xor (XC) patterns with src == dest.
+;
+
+(define_insn "*xc_zero"
+  [(set (match_operand:BLK 0 "memory_operand" "=Q")
+        (const_int 0))
+   (use (match_operand 1 "const_int_operand" "n"))
+   (clobber (reg:CC CC_REGNUM))]
+  "INTVAL (operands[1]) >= 1 && INTVAL (operands[1]) <= 256"
+  "xc\t%O0(%1,%R0),%S0"
+  [(set_attr "op_type" "SS")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:BLK 0 "memory_operand" "")
+          (const_int 0))
+     (use (match_operand 1 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (match_operand:BLK 2 "memory_operand" "")
+          (const_int 0))
+     (use (match_operand 3 "const_int_operand" ""))
+     (clobber (reg:CC CC_REGNUM))])]
+  "s390_offset_p (operands[0], operands[2], operands[1])
+   && INTVAL (operands[1]) + INTVAL (operands[3]) <= 256"
+  [(parallel
+    [(set (match_dup 4) (const_int 0))
+     (use (match_dup 5))
+     (clobber (reg:CC CC_REGNUM))])]
+  "operands[4] = gen_rtx_MEM (BLKmode, XEXP (operands[0], 0));
+   operands[5] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[3]));")
+
+
+;;
+;;- Negate instructions.
+;;
+
+;
+; neg(di|si)2 instruction pattern(s).
+;
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:DSI 0 "register_operand" "=d")
+          (neg:DSI (match_operand:DSI 1 "register_operand" "d")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*negdi2_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:DI (ashiftrt:DI (ashift:DI (subreg:DI
+                           (match_operand:SI 1 "register_operand" "d") 0)
+                           (const_int 32)) (const_int 32)))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (sign_extend:DI (match_dup 1))))]
+  "TARGET_ZARCH && s390_match_ccmode (insn, CCAmode)"
+  "lcgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+(define_insn "*negdi2_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "lcgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+; lcr, lcgr
+(define_insn "*neg<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (match_operand:GPR 1 "register_operand" "d"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d")
+        (neg:GPR (match_dup 1)))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lc<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_super_c_E1")])
+
+; lcr, lcgr
+(define_insn "*neg<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (match_operand:GPR 1 "register_operand" "d"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=d"))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lc<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_super_c_E1")])
+
+; lcr, lcgr
+(define_insn "*neg<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (neg:GPR (match_operand:GPR 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "lc<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_super_c_E1")])
+
+(define_insn_and_split "*negdi2_31"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (match_operand:DI 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 2) (neg:SI (match_dup 3)))
+     (clobber (reg:CC CC_REGNUM))])
+   (parallel
+    [(set (reg:CCAP CC_REGNUM)
+          (compare:CCAP (neg:SI (match_dup 5)) (const_int 0)))
+     (set (match_dup 4) (neg:SI (match_dup 5)))])
+   (set (pc)
+        (if_then_else (ne (reg:CCAP CC_REGNUM) (const_int 0))
+                      (pc)
+                      (label_ref (match_dup 6))))
+   (parallel
+    [(set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+     (clobber (reg:CC CC_REGNUM))])
+   (match_dup 6)]
+  "operands[2] = operand_subword (operands[0], 0, 0, DImode);
+   operands[3] = operand_subword (operands[1], 0, 0, DImode);
+   operands[4] = operand_subword (operands[0], 1, 0, DImode);
+   operands[5] = operand_subword (operands[1], 1, 0, DImode);
+   operands[6] = gen_label_rtx ();")
+
+;
+; neg(df|sf)2 instruction pattern(s).
+;
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:BFP 0 "register_operand" "=f")
+          (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+  "")
+
+; lcxbr, lcdbr, lcebr
+(define_insn "*neg<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (match_dup 1)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lc<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lcxbr, lcdbr, lcebr
+(define_insn "*neg<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (clobber (match_scratch:BFP 0 "=f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lc<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lcdfr
+(define_insn "*neg<mode>2_nocc"
+  [(set (match_operand:FP 0 "register_operand"         "=f")
+        (neg:FP (match_operand:FP 1 "register_operand" "<fT0>")))]
+  "TARGET_DFP"
+  "lcdfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lcxbr, lcdbr, lcebr
+(define_insn "*neg<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "lc<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Absolute value instructions.
+;;
+
+;
+; abs(di|si)2 instruction pattern(s).
+;
+
+(define_insn "*absdi2_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (abs:DI (ashiftrt:DI (ashift:DI (subreg:DI
+                           (match_operand:SI 1 "register_operand" "d") 0)
+                           (const_int 32)) (const_int 32)))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (abs:DI (sign_extend:DI (match_dup 1))))]
+  "TARGET_ZARCH && s390_match_ccmode (insn, CCAmode)"
+  "lpgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+(define_insn "*absdi2_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (abs:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "lpgfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+; lpr, lpgr
+(define_insn "*abs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (abs:GPR (match_operand:DI 1 "register_operand" "d"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d")
+        (abs:GPR (match_dup 1)))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lp<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lpr, lpgr
+(define_insn "*abs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (abs:GPR (match_operand:GPR 1 "register_operand" "d"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=d"))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "lp<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lpr, lpgr
+(define_insn "abs<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+        (abs:GPR (match_operand:GPR 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "lp<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+;
+; abs(df|sf)2 instruction pattern(s).
+;
+
+(define_expand "abs<mode>2"
+  [(parallel
+    [(set (match_operand:BFP 0 "register_operand" "=f")
+          (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT"
+  "")
+
+; lpxbr, lpdbr, lpebr
+(define_insn "*abs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (abs:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (set (match_operand:BFP 0 "register_operand" "=f")
+        (abs:BFP (match_dup 1)))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lp<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lpxbr, lpdbr, lpebr
+(define_insn "*abs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (abs:BFP (match_operand:BFP 1 "register_operand" "f"))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (clobber (match_scratch:BFP 0 "=f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "lp<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lpdfr
+(define_insn "*abs<mode>2_nocc"
+  [(set (match_operand:FP 0 "register_operand"         "=f")
+        (abs:FP (match_operand:FP 1 "register_operand" "<fT0>")))]
+  "TARGET_DFP"
+  "lpdfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lpxbr, lpdbr, lpebr
+(define_insn "*abs<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "lp<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+
+;;
+;;- Negated absolute value instructions
+;;
+
+;
+; Integer
+;
+
+(define_insn "*negabsdi2_sign_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:DI (abs:DI (ashiftrt:DI (ashift:DI (subreg:DI
+                           (match_operand:SI 1 "register_operand" "d") 0)
+                           (const_int 32)) (const_int 32))))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (neg:DI (abs:DI (sign_extend:DI (match_dup 1)))))]
+  "TARGET_ZARCH && s390_match_ccmode (insn, CCAmode)"
+  "lngfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+(define_insn "*negabsdi2_sign"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(neg:DI (abs:DI (sign_extend:DI
+                          (match_operand:SI 1 "register_operand" "d")))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+  "lngfr\t%0,%1"
+  [(set_attr "op_type" "RRE")
+   (set_attr "z10prop" "z10_c")])
+
+; lnr, lngr
+(define_insn "*negabs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (abs:GPR (match_operand:GPR 1 "register_operand" "d")))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand" "=d")
+        (neg:GPR (abs:GPR (match_dup 1))))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "ln<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lnr, lngr
+(define_insn "*negabs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:GPR (abs:GPR (match_operand:GPR 1 "register_operand" "d")))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0 "=d"))]
+  "s390_match_ccmode (insn, CCAmode)"
+  "ln<g>r\t%0,%1"
+  [(set_attr "op_type"  "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+; lnr, lngr
+(define_insn "*negabs<mode>2"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(neg:GPR (abs:GPR (match_operand:GPR 1 "register_operand" "d"))))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "ln<g>r\t%0,%1"
+  [(set_attr "op_type" "RR<E>")
+   (set_attr "z10prop" "z10_c")])
+
+;
+; Floating point
+;
+
+; lnxbr, lndbr, lnebr
+(define_insn "*negabs<mode>2_cc"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (abs:BFP (match_dup 1))))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "ln<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lnxbr, lndbr, lnebr
+(define_insn "*negabs<mode>2_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+                 (match_operand:BFP 2 "const0_operand" "")))
+   (clobber (match_scratch:BFP 0 "=f"))]
+  "s390_match_ccmode (insn, CCSmode) && TARGET_HARD_FLOAT"
+  "ln<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lndfr
+(define_insn "*negabs<mode>2_nocc"
+  [(set (match_operand:FP 0 "register_operand"                  "=f")
+        (neg:FP (abs:FP (match_operand:BFP 1 "register_operand" "<fT0>"))))]
+  "TARGET_DFP"
+  "lndfr\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+; lnxbr, lndbr, lnebr
+(define_insn "*negabs<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f")
+        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_HARD_FLOAT"
+  "ln<xde>br\t%0,%1"
+  [(set_attr "op_type"  "RRE")
+   (set_attr "type"     "fsimp<mode>")])
+
+;;
+;;- Square root instructions.
+;;
+
+;
+; sqrt(df|sf)2 instruction pattern(s).
+;
+
+; sqxbr, sqdbr, sqebr, sqdb, sqeb
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+	(sqrt:BFP (match_operand:BFP 1 "general_operand" "f,<Rf>")))]
+  "TARGET_HARD_FLOAT"
+  "@
+   sq<xde>br\t%0,%1
+   sq<xde>b\t%0,%1"
+  [(set_attr "op_type" "RRE,RXE")
+   (set_attr "type" "fsqrt<mode>")])
+
+
+;;
+;;- One complement instructions.
+;;
+
+;
+; one_cmpl(di|si|hi|qi)2 instruction pattern(s).
+;
+
+(define_expand "one_cmpl<mode>2"
+  [(parallel
+    [(set (match_operand:INT 0 "register_operand" "")
+          (xor:INT (match_operand:INT 1 "register_operand" "")
+		   (const_int -1)))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+
+;;
+;; Find leftmost bit instructions.
+;;
+
+(define_expand "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(clz:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_EXTIMM && TARGET_ZARCH"
+{
+  rtx insn, clz_equal;
+  rtx wide_reg = gen_reg_rtx (TImode);
+  rtx msb = gen_rtx_CONST_INT (DImode, (unsigned HOST_WIDE_INT) 1 << 63);
+
+  clz_equal = gen_rtx_CLZ (DImode, operands[1]);
+
+  emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
+
+  insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
+  set_unique_reg_note (insn, REG_EQUAL, clz_equal);
+
+  DONE;
+})
+
+(define_insn "clztidi2"
+  [(set (match_operand:TI 0 "register_operand" "=d")
+	(ior:TI
+	  (ashift:TI
+            (zero_extend:TI
+   	      (xor:DI (match_operand:DI 1 "register_operand" "d")
+                      (lshiftrt (match_operand:DI 2 "const_int_operand" "")
+				(subreg:SI (clz:DI (match_dup 1)) 4))))
+
+	    (const_int 64))
+          (zero_extend:TI (clz:DI (match_dup 1)))))
+   (clobber (reg:CC CC_REGNUM))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2])
+   == (unsigned HOST_WIDE_INT) 1 << 63
+   && TARGET_EXTIMM && TARGET_ZARCH"
+  "flogr\t%0,%1"
+  [(set_attr "op_type"  "RRE")])
+
+
+;;
+;;- Rotate instructions.
+;;
+
+;
+; rotl(di|si)3 instruction pattern(s).
+;
+
+; rll, rllg
+(define_insn "rotl<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(rotate:GPR (match_operand:GPR 1 "register_operand" "d")
+		    (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+  "TARGET_CPU_ZARCH"
+  "rll<g>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RSE")
+   (set_attr "atype"    "reg")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; rll, rllg
+(define_insn "*rotl<mode>3_and"
+  [(set (match_operand:GPR 0 "register_operand" "=d")
+	(rotate:GPR (match_operand:GPR 1 "register_operand" "d")
+		    (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+			    (match_operand:SI 3 "const_int_operand"   "n"))))]
+  "TARGET_CPU_ZARCH && (INTVAL (operands[3]) & 63) == 63"
+  "rll<g>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RSE")
+   (set_attr "atype"    "reg")
+   (set_attr "z10prop" "z10_super_E1")])
+
+
+;;
+;;- Shift instructions.
+;;
+
+;
+; (ashl|lshr)(di|si)3 instruction pattern(s).
+; Left shifts and logical right shifts
+
+(define_expand "<shift><mode>3"
+  [(set (match_operand:DSI 0 "register_operand" "")
+        (SHIFT:DSI (match_operand:DSI 1 "register_operand" "")
+                   (match_operand:SI 2 "shift_count_or_setmem_operand" "")))]
+  ""
+  "")
+
+; sldl, srdl
+(define_insn "*<shift>di3_31"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (SHIFT:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+  "!TARGET_ZARCH"
+  "s<lr>dl\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")
+   (set_attr "z196prop" "z196_cracked")])
+
+; sll, srl, sllg, srlg, sllk, srlk
+(define_insn "*<shift><mode>3"
+  [(set (match_operand:GPR 0 "register_operand"                          "=d,d")
+        (SHIFT:GPR (match_operand:GPR 1 "register_operand"             "<d0>,d")
+                   (match_operand:SI 2 "shift_count_or_setmem_operand"    "Y,Y")))]
+  ""
+  "@
+   s<lr>l<g>\t%0,<1>%Y2
+   s<lr>l<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sldl, srdl
+(define_insn "*<shift>di3_31_and"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (SHIFT:DI (match_operand:DI 1 "register_operand" "0")
+                  (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+			  (match_operand:SI 3 "const_int_operand"   "n"))))]
+  "!TARGET_ZARCH && (INTVAL (operands[3]) & 63) == 63"
+  "s<lr>dl\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+; sll, srl, sllg, srlg, sllk, srlk
+(define_insn "*<shift><mode>3_and"
+  [(set (match_operand:GPR 0 "register_operand"                                 "=d,d")
+        (SHIFT:GPR (match_operand:GPR 1 "register_operand"                    "<d0>,d")
+                   (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand"   "Y,Y")
+			   (match_operand:SI 3 "const_int_operand"               "n,n"))))]
+  "(INTVAL (operands[3]) & 63) == 63"
+  "@
+   s<lr>l<g>\t%0,<1>%Y2
+   s<lr>l<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+;
+; ashr(di|si)3 instruction pattern(s).
+; Arithmetic right shifts
+
+(define_expand "ashr<mode>3"
+  [(parallel
+    [(set (match_operand:DSI 0 "register_operand" "")
+          (ashiftrt:DSI (match_operand:DSI 1 "register_operand" "")
+                        (match_operand:SI 2 "shift_count_or_setmem_operand" "")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "")
+
+(define_insn "*ashrdi3_cc_31"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (match_operand:SI 2 "shift_count_or_setmem_operand" "Y"))
+                 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_dup 1) (match_dup 2)))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_cconly_31"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (match_operand:SI 2 "shift_count_or_setmem_operand" "Y"))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d"))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_31"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                     (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cc"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"          "<d0>,d")
+                               (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y"))
+                 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand"                                   "=d,d")
+        (ashiftrt:GPR (match_dup 1) (match_dup 2)))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cconly"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"          "<d0>,d")
+                               (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y"))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                                  "=d,d"))]
+  "s390_match_ccmode(insn, CCSmode)"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag
+(define_insn "*ashr<mode>3"
+  [(set (match_operand:GPR 0 "register_operand"                          "=d,d")
+        (ashiftrt:GPR (match_operand:GPR 1 "register_operand"          "<d0>,d")
+                      (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+
+; shift pattern with implicit ANDs
+
+(define_insn "*ashrdi3_cc_31_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+				      (match_operand:SI 3 "const_int_operand"   "n")))
+		 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_dup 1) (and:SI (match_dup 2) (match_dup 3))))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)
+   && (INTVAL (operands[3]) & 63) == 63"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_cconly_31_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                              (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+				      (match_operand:SI 3 "const_int_operand"   "n")))
+                 (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d"))]
+  "!TARGET_ZARCH && s390_match_ccmode(insn, CCSmode)
+   && (INTVAL (operands[3]) & 63) == 63"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+(define_insn "*ashrdi3_31_and"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+                     (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")
+			     (match_operand:SI 3 "const_int_operand"   "n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && (INTVAL (operands[3]) & 63) == 63"
+  "srda\t%0,%Y2"
+  [(set_attr "op_type"  "RS")
+   (set_attr "atype"    "reg")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cc_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"                  "<d0>,d")
+                               (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")
+				       (match_operand:SI 3 "const_int_operand"             "n,n")))
+		 (const_int 0)))
+   (set (match_operand:GPR 0 "register_operand"                                           "=d,d")
+        (ashiftrt:GPR (match_dup 1) (and:SI (match_dup 2) (match_dup 3))))]
+  "s390_match_ccmode(insn, CCSmode) && (INTVAL (operands[3]) & 63) == 63"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_cconly_and"
+  [(set (reg CC_REGNUM)
+        (compare (ashiftrt:GPR (match_operand:GPR 1 "register_operand"                  "<d0>,d")
+                               (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")
+				       (match_operand:SI 3 "const_int_operand"             "n,n")))
+                 (const_int 0)))
+   (clobber (match_scratch:GPR 0                                                          "=d,d"))]
+  "s390_match_ccmode(insn, CCSmode) && (INTVAL (operands[3]) & 63) == 63"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+; sra, srag, srak
+(define_insn "*ashr<mode>3_and"
+  [(set (match_operand:GPR 0 "register_operand"                                  "=d,d")
+        (ashiftrt:GPR (match_operand:GPR 1 "register_operand"                  "<d0>,d")
+                      (and:SI (match_operand:SI 2 "shift_count_or_setmem_operand" "Y,Y")
+			      (match_operand:SI 3 "const_int_operand"             "n,n"))))
+   (clobber (reg:CC CC_REGNUM))]
+  "(INTVAL (operands[3]) & 63) == 63"
+  "@
+   sra<g>\t%0,<1>%Y2
+   sra<gk>\t%0,%1,%Y2"
+  [(set_attr "op_type"  "RS<E>,RSY")
+   (set_attr "atype"    "reg,reg")
+   (set_attr "cpu_facility" "*,z196")
+   (set_attr "z10prop" "z10_super_E1,*")])
+
+
+;;
+;; Branch instruction patterns.
+;;
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+        	       [(match_operand:GPR 1 "register_operand" "")
+                        (match_operand:GPR 2 "general_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  ""
+  "s390_emit_jump (operands[3],
+    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+   DONE;")
+
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "comparison_operator"
+        	       [(match_operand:FP 1 "register_operand" "")
+                        (match_operand:FP 2 "general_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_HARD_FLOAT"
+  "s390_emit_jump (operands[3],
+    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+   DONE;")
+
+(define_expand "cbranchcc4"
+  [(set (pc)
+        (if_then_else (match_operator 0 "s390_eqne_operator"
+        	       [(match_operand 1 "cc_reg_operand" "")
+                        (match_operand 2 "const0_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_HARD_FLOAT"
+  "s390_emit_jump (operands[3],
+    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+   DONE;")
+
+
+
+;;
+;;- Conditional jump instructions.
+;;
+
+(define_insn "*cjump_64"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))]
+  "TARGET_CPU_ZARCH"
+{
+  if (get_attr_length (insn) == 4)
+    return "j%C1\t%l0";
+  else
+    return "jg%C1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 6)))])
+
+(define_insn "*cjump_31"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))]
+  "!TARGET_CPU_ZARCH"
+{
+  gcc_assert (get_attr_length (insn) == 4);
+  return "j%C1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (eq (symbol_ref "flag_pic") (const_int 0))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+(define_insn "*cjump_long"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (match_operand 0 "address_operand" "ZQZR")
+          (pc)))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "b%C1r\t%0";
+  else
+    return "b%C1\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+
+;;
+;;- Negated conditional jump instructions.
+;;
+
+(define_insn "*icjump_64"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (pc)
+          (label_ref (match_operand 0 "" ""))))]
+  "TARGET_CPU_ZARCH"
+{
+  if (get_attr_length (insn) == 4)
+    return "j%D1\t%l0";
+  else
+    return "jg%D1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 6)))])
+
+(define_insn "*icjump_31"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (pc)
+          (label_ref (match_operand 0 "" ""))))]
+  "!TARGET_CPU_ZARCH"
+{
+  gcc_assert (get_attr_length (insn) == 4);
+  return "j%D1\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")
+   (set (attr "length")
+        (if_then_else (eq (symbol_ref "flag_pic") (const_int 0))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+(define_insn "*icjump_long"
+  [(set (pc)
+        (if_then_else
+          (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+          (pc)
+          (match_operand 0 "address_operand" "ZQZR")))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "b%D1r\t%0";
+  else
+    return "b%D1\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+;;
+;;- Trap instructions.
+;;
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "j\t.+2"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")])
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "comparison_operator"
+             [(match_operand:GPR 1 "register_operand" "")
+              (match_operand:GPR 2 "general_operand" "")])
+	     (match_operand 3 "const0_operand" ""))]
+  ""
+  {
+    rtx cond = s390_emit_compare (GET_CODE (operands[0]),
+                                  operands[1], operands[2]);
+    emit_insn (gen_condtrap (cond, XEXP (cond, 0)));
+    DONE;
+  })
+
+(define_expand "ctrap<mode>4"
+  [(trap_if (match_operator 0 "comparison_operator"
+             [(match_operand:FP 1 "register_operand" "")
+              (match_operand:FP 2 "general_operand" "")])
+	     (match_operand 3 "const0_operand" ""))]
+  ""
+  {
+    rtx cond = s390_emit_compare (GET_CODE (operands[0]),
+                                  operands[1], operands[2]);
+    emit_insn (gen_condtrap (cond, XEXP (cond, 0)));
+    DONE;
+  })
+
+(define_insn "condtrap"
+  [(trap_if (match_operator 0 "s390_comparison"
+             [(match_operand 1 "cc_reg_operand" "c")
+              (const_int 0)])
+	    (const_int 0))]
+  ""
+  "j%C0\t.+2";
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")])
+
+; crt, cgrt, cit, cgit
+(define_insn "*cmp_and_trap_signed_int<mode>"
+  [(trap_if (match_operator 0 "s390_signed_integer_comparison"
+	       [(match_operand:GPR 1 "register_operand"  "d,d")
+		(match_operand:GPR 2 "nonmemory_operand" "d,K")])
+	    (const_int 0))]
+  "TARGET_Z10"
+  "@
+   c<g>rt%C0\t%1,%2
+   c<g>it%C0\t%1,%h2"
+  [(set_attr "op_type" "RRF,RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")])
+
+; clrt, clgrt, clfit, clgit
+(define_insn "*cmp_and_trap_unsigned_int<mode>"
+  [(trap_if (match_operator 0 "s390_unsigned_integer_comparison"
+	       [(match_operand:GPR 1 "register_operand"  "d,d")
+		(match_operand:GPR 2 "nonmemory_operand" "d,D")])
+	    (const_int 0))]
+  "TARGET_Z10"
+  "@
+   cl<g>rt%C0\t%1,%2
+   cl<gf>it%C0\t%1,%x2"
+  [(set_attr "op_type" "RRF,RIE")
+   (set_attr "type"    "branch")
+   (set_attr "z10prop" "z10_super_c,z10_super")])
+
+;;
+;;- Loop instructions.
+;;
+;;  This is all complicated by the fact that since this is a jump insn
+;;  we must handle our own output reloads.
+
+;; branch on index
+
+; This splitter will be matched by combine and has to add the 2 moves
+; necessary to load the compare and the increment values into a
+; register pair as needed by brxle.
+
+(define_insn_and_split "*brx_stage1_<GPR:mode>"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 6 "s390_brx_operator"
+	    [(plus:GPR (match_operand:GPR 1 "register_operand" "")
+		       (match_operand:GPR 2 "general_operand"  ""))
+	     (match_operand:GPR 3 "register_operand" "")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (set (match_operand:GPR 4 "nonimmediate_operand" "")
+        (plus:GPR (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:GPR 5 ""))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "!reload_completed && !reload_in_progress"
+  [(set (match_dup 7) (match_dup 2)) ; the increment
+   (set (match_dup 8) (match_dup 3)) ; the comparison value
+   (parallel [(set (pc)
+		   (if_then_else
+		    (match_op_dup 6
+		       [(plus:GPR (match_dup 1) (match_dup 7))
+			(match_dup 8)])
+		    (label_ref (match_dup 0))
+		    (pc)))
+	      (set (match_dup 4)
+		   (plus:GPR (match_dup 1) (match_dup 7)))
+	      (clobber (match_dup 5))
+	      (clobber (reg:CC CC_REGNUM))])]
+  {
+    rtx dreg = gen_reg_rtx (word_mode == DImode ? TImode : DImode);
+    operands[7] = gen_lowpart (<GPR:MODE>mode,
+			       gen_highpart (word_mode, dreg));
+    operands[8] = gen_lowpart (<GPR:MODE>mode,
+			       gen_lowpart (word_mode, dreg));
+  })
+
+; brxlg, brxhg
+
+(define_insn_and_split "*brxg_64bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	     [(plus:DI (match_operand:DI 1 "register_operand" "d,d,d")
+		       (subreg:DI (match_operand:TI 2 "register_operand" "d,d,d") 0))
+              (subreg:DI (match_dup 2) 8)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:DI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:DI (match_dup 1)
+		 (subreg:DI (match_dup 2) 0)))
+   (clobber (match_scratch:DI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%E5g\t%1,%2,%l0";
+  else
+    return "agr\t%1,%2\;cgr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:DI (match_dup 4) (subreg:DI (match_dup 2) 0)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:DI (match_dup 2) 8)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RIE")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 16)))])
+
+; brxle, brxh
+
+(define_insn_and_split "*brx_64bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	     [(plus:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		       (subreg:SI (match_operand:TI 2 "register_operand" "d,d,d") 4))
+              (subreg:SI (match_dup 2) 12)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1)
+		 (subreg:SI (match_dup 2) 4)))
+   (clobber (match_scratch:SI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%C5\t%1,%2,%l0";
+  else
+    return "ar\t%1,%2\;cr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:SI (match_dup 4) (subreg:SI (match_dup 2) 4)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:SI (match_dup 2) 12)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RSI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 14)))])
+
+; brxle, brxh
+
+(define_insn_and_split "*brx_31bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	    [(plus:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		      (subreg:SI (match_operand:DI 2 "register_operand" "d,d,d") 0))
+	     (subreg:SI (match_dup 2) 4)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1)
+		 (subreg:SI (match_dup 2) 0)))
+   (clobber (match_scratch:SI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_ZARCH && TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%C5\t%1,%2,%l0";
+  else
+    return "ar\t%1,%2\;cr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:SI (match_dup 4) (subreg:SI (match_dup 2) 0)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:SI (match_dup 2) 4)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RSI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 14)))])
+
+
+;; branch on count
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))        ; loop pseudo
+   (use (match_operand 1 "" ""))        ; iterations; zero if unknown
+   (use (match_operand 2 "" ""))        ; max iterations
+   (use (match_operand 3 "" ""))        ; loop level
+   (use (match_operand 4 "" ""))]       ; label
+  ""
+{
+  if (GET_MODE (operands[0]) == SImode && !TARGET_CPU_ZARCH)
+    emit_jump_insn (gen_doloop_si31 (operands[4], operands[0], operands[0]));
+  else if (GET_MODE (operands[0]) == SImode && TARGET_CPU_ZARCH)
+    emit_jump_insn (gen_doloop_si64 (operands[4], operands[0], operands[0]));
+  else if (GET_MODE (operands[0]) == DImode && TARGET_ZARCH)
+    emit_jump_insn (gen_doloop_di (operands[4], operands[0], operands[0]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_insn_and_split "doloop_si64"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:SI 1 "register_operand" "d,d,d")
+              (const_int 1))
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 2 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:SI 3 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 4)
+    return "brct\t%1,%l0";
+  else
+    return "ahi\t%1,-1\;jgne\t%l0";
+}
+  "&& reload_completed
+   && (! REG_P (operands[2])
+       || ! rtx_equal_p (operands[1], operands[2]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (reg:CCAN CC_REGNUM)
+                   (compare:CCAN (plus:SI (match_dup 3) (const_int -1))
+                                 (const_int 0)))
+              (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
+   (set (match_dup 2) (match_dup 3))
+   (set (pc) (if_then_else (ne (reg:CCAN CC_REGNUM) (const_int 0))
+                           (label_ref (match_dup 0))
+                           (pc)))]
+  ""
+  [(set_attr "op_type"  "RI")
+   ; Strictly speaking, the z10 properties are valid for brct only, however, it does not
+   ; hurt us in the (rare) case of ahi.
+   (set_attr "z10prop"  "z10_super_E1")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 10)))])
+
+(define_insn_and_split "doloop_si31"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:SI 1 "register_operand" "d,d,d")
+              (const_int 1))
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 2 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:SI 3 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 4)
+    return "brct\t%1,%l0";
+  else
+    gcc_unreachable ();
+}
+  "&& reload_completed
+   && (! REG_P (operands[2])
+       || ! rtx_equal_p (operands[1], operands[2]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (reg:CCAN CC_REGNUM)
+                   (compare:CCAN (plus:SI (match_dup 3) (const_int -1))
+                                 (const_int 0)))
+              (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
+   (set (match_dup 2) (match_dup 3))
+   (set (pc) (if_then_else (ne (reg:CCAN CC_REGNUM) (const_int 0))
+                           (label_ref (match_dup 0))
+                           (pc)))]
+  ""
+  [(set_attr "op_type"  "RI")
+   ; Strictly speaking, the z10 properties are valid for brct only, however, it does not
+   ; hurt us in the (rare) case of ahi.
+   (set_attr "z10prop"  "z10_super_E1")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (eq (symbol_ref "flag_pic") (const_int 0))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+(define_insn "*doloop_si_long"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:SI 1 "register_operand" "d")
+              (const_int 1))
+          (match_operand 0 "address_operand" "ZQZR")
+          (pc)))
+   (set (match_operand:SI 2 "register_operand" "=1")
+        (plus:SI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:SI 3 "=X"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_CPU_ZARCH"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "bctr\t%1,%0";
+  else
+    return "bct\t%1,%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")
+   (set_attr "z10prop"  "z10_c")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn_and_split "doloop_di"
+  [(set (pc)
+        (if_then_else
+          (ne (match_operand:DI 1 "register_operand" "d,d,d")
+              (const_int 1))
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:DI 2 "nonimmediate_operand" "=1,?X,?X")
+        (plus:DI (match_dup 1) (const_int -1)))
+   (clobber (match_scratch:DI 3 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 4)
+    return "brctg\t%1,%l0";
+  else
+    return "aghi\t%1,-1\;jgne\t%l0";
+}
+  "&& reload_completed
+   && (! REG_P (operands[2])
+       || ! rtx_equal_p (operands[1], operands[2]))"
+  [(set (match_dup 3) (match_dup 1))
+   (parallel [(set (reg:CCAN CC_REGNUM)
+                   (compare:CCAN (plus:DI (match_dup 3) (const_int -1))
+                                 (const_int 0)))
+              (set (match_dup 3) (plus:DI (match_dup 3) (const_int -1)))])
+   (set (match_dup 2) (match_dup 3))
+   (set (pc) (if_then_else (ne (reg:CCAN CC_REGNUM) (const_int 0))
+                           (label_ref (match_dup 0))
+                           (pc)))]
+  ""
+  [(set_attr "op_type"  "RI")
+   ; Strictly speaking, the z10 properties are valid for brct only, however, it does not
+   ; hurt us in the (rare) case of ahi.
+   (set_attr "z10prop"  "z10_super_E1")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 10)))])
+
+;;
+;;- Unconditional jump instructions.
+;;
+
+;
+; jump instruction pattern(s).
+;
+
+(define_expand "jump"
+  [(match_operand 0 "" "")]
+  ""
+  "s390_emit_jump (operands[0], NULL_RTX); DONE;")
+
+(define_insn "*jump64"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "TARGET_CPU_ZARCH"
+{
+  if (get_attr_length (insn) == 4)
+    return "j\t%l0";
+  else
+    return "jg\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 4) (const_int 6)))])
+
+(define_insn "*jump31"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "!TARGET_CPU_ZARCH"
+{
+  gcc_assert (get_attr_length (insn) == 4);
+  return "j\t%l0";
+}
+  [(set_attr "op_type" "RI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (eq (symbol_ref "flag_pic") (const_int 0))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 6))
+          (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                        (const_int 4) (const_int 8))))])
+
+;
+; indirect-jump instruction pattern(s).
+;
+
+(define_insn "indirect_jump"
+ [(set (pc) (match_operand 0 "address_operand" "ZQZR"))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "br\t%0";
+  else
+    return "b\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+;
+; casesi instruction pattern(s).
+;
+
+(define_insn "casesi_jump"
+ [(set (pc) (match_operand 0 "address_operand" "ZQZR"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "br\t%0";
+  else
+    return "b\t%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (label_ref (match_operand 3 "" ""))
+   (label_ref (match_operand 4 "" ""))]
+  ""
+{
+   rtx index  = gen_reg_rtx (SImode);
+   rtx base   = gen_reg_rtx (Pmode);
+   rtx target = gen_reg_rtx (Pmode);
+
+   emit_move_insn (index, operands[0]);
+   emit_insn (gen_subsi3 (index, index, operands[1]));
+   emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1,
+                            operands[4]);
+
+   if (Pmode != SImode)
+     index = convert_to_mode (Pmode, index, 1);
+   if (GET_CODE (index) != REG)
+     index = copy_to_mode_reg (Pmode, index);
+
+   if (TARGET_64BIT)
+       emit_insn (gen_ashldi3 (index, index, GEN_INT (3)));
+   else
+       emit_insn (gen_ashlsi3 (index, index, const2_rtx));
+
+   emit_move_insn (base, gen_rtx_LABEL_REF (Pmode, operands[3]));
+
+   index = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, base, index));
+   emit_move_insn (target, index);
+
+   if (flag_pic)
+     target = gen_rtx_PLUS (Pmode, base, target);
+   emit_jump_insn (gen_casesi_jump (target, operands[3]));
+
+   DONE;
+})
+
+
+;;
+;;- Jump to subroutine.
+;;
+;;
+
+;
+; untyped call instruction pattern(s).
+;
+
+;; Call subroutine returning any type.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+                    (const_int 0))
+              (match_operand 1 "" "")
+              (match_operand 2 "" "")])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type"    "none")
+   (set_attr "length"  "0")])
+
+;
+; sibcall patterns
+;
+
+(define_expand "sibcall"
+  [(call (match_operand 0 "" "")
+	 (match_operand 1 "" ""))]
+  ""
+{
+  s390_emit_call (XEXP (operands[0], 0), NULL_RTX, NULL_RTX, NULL_RTX);
+  DONE;
+})
+
+(define_insn "*sibcall_br"
+  [(call (mem:QI (reg SIBCALL_REGNUM))
+         (match_operand 0 "const_int_operand" "n"))]
+  "SIBLING_CALL_P (insn)
+   && GET_MODE (XEXP (XEXP (PATTERN (insn), 0), 0)) == Pmode"
+  "br\t%%r1"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+(define_insn "*sibcall_brc"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))]
+  "SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC"
+  "j\t%0"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")])
+
+(define_insn "*sibcall_brcl"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))]
+  "SIBLING_CALL_P (insn) && TARGET_CPU_ZARCH"
+  "jg\t%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "branch")])
+
+;
+; sibcall_value patterns
+;
+
+(define_expand "sibcall_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand 1 "" "")
+	      (match_operand 2 "" "")))]
+  ""
+{
+  s390_emit_call (XEXP (operands[1], 0), NULL_RTX, operands[0], NULL_RTX);
+  DONE;
+})
+
+(define_insn "*sibcall_value_br"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (reg SIBCALL_REGNUM))
+	      (match_operand 1 "const_int_operand" "n")))]
+  "SIBLING_CALL_P (insn)
+   && GET_MODE (XEXP (XEXP (XEXP (PATTERN (insn), 1), 0), 0)) == Pmode"
+  "br\t%%r1"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"  "branch")
+   (set_attr "atype" "agen")])
+
+(define_insn "*sibcall_value_brc"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+	      (match_operand 2 "const_int_operand" "n")))]
+  "SIBLING_CALL_P (insn) && TARGET_SMALL_EXEC"
+  "j\t%1"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "branch")])
+
+(define_insn "*sibcall_value_brcl"
+  [(set (match_operand 0 "" "")
+	(call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+	      (match_operand 2 "const_int_operand" "n")))]
+  "SIBLING_CALL_P (insn) && TARGET_CPU_ZARCH"
+  "jg\t%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "branch")])
+
+
+;
+; call instruction pattern(s).
+;
+
+(define_expand "call"
+  [(call (match_operand 0 "" "")
+         (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))]
+  ""
+{
+  s390_emit_call (XEXP (operands[0], 0), NULL_RTX, NULL_RTX,
+		  gen_rtx_REG (Pmode, RETURN_REGNUM));
+  DONE;
+})
+
+(define_insn "*bras"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))
+   (clobber (match_operand 2 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_SMALL_EXEC
+   && GET_MODE (operands[2]) == Pmode"
+  "bras\t%2,%0"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*brasl"
+  [(call (mem:QI (match_operand 0 "bras_sym_operand" "X"))
+         (match_operand 1 "const_int_operand" "n"))
+   (clobber (match_operand 2 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_CPU_ZARCH
+   && GET_MODE (operands[2]) == Pmode"
+  "brasl\t%2,%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*basr"
+  [(call (mem:QI (match_operand 0 "address_operand" "ZQZR"))
+         (match_operand 1 "const_int_operand" "n"))
+   (clobber (match_operand 2 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn) && GET_MODE (operands[2]) == Pmode"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "basr\t%2,%0";
+  else
+    return "bas\t%2,%a0";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 0 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")
+   (set_attr "z196prop" "z196_cracked")])
+
+;
+; call_value instruction pattern(s).
+;
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+        (call (match_operand 1 "" "")
+              (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))]
+  ""
+{
+  s390_emit_call (XEXP (operands[1], 0), NULL_RTX, operands[0],
+		  gen_rtx_REG (Pmode, RETURN_REGNUM));
+  DONE;
+})
+
+(define_insn "*bras_r"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_SMALL_EXEC
+   && GET_MODE (operands[3]) == Pmode"
+  "bras\t%3,%1"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*brasl_r"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_CPU_ZARCH
+   && GET_MODE (operands[3]) == Pmode"
+  "brasl\t%3,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*basr_r"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "address_operand" "ZQZR"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))]
+  "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "basr\t%3,%1";
+  else
+    return "bas\t%3,%a1";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 1 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")
+   (set_attr "z196prop" "z196_cracked")])
+
+;;
+;;- Thread-local storage support.
+;;
+
+(define_expand "get_tp_64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "") (reg:DI TP_REGNUM))]
+  "TARGET_64BIT"
+  "")
+
+(define_expand "get_tp_31"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "") (reg:SI TP_REGNUM))]
+  "!TARGET_64BIT"
+  "")
+
+(define_expand "set_tp_64"
+  [(set (reg:DI TP_REGNUM) (match_operand:DI 0 "nonimmediate_operand" ""))
+   (set (reg:DI TP_REGNUM) (unspec_volatile:DI [(reg:DI TP_REGNUM)] UNSPECV_SET_TP))]
+  "TARGET_64BIT"
+  "")
+
+(define_expand "set_tp_31"
+  [(set (reg:SI TP_REGNUM) (match_operand:SI 0 "nonimmediate_operand" ""))
+   (set (reg:SI TP_REGNUM) (unspec_volatile:SI [(reg:SI TP_REGNUM)] UNSPECV_SET_TP))]
+  "!TARGET_64BIT"
+  "")
+
+(define_insn "*set_tp"
+  [(set (reg TP_REGNUM) (unspec_volatile [(reg TP_REGNUM)] UNSPECV_SET_TP))]
+  ""
+  ""
+  [(set_attr "type" "none")
+   (set_attr "length" "0")])
+
+(define_insn "*tls_load_64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "RT")
+                    (match_operand:DI 2 "" "")]
+		   UNSPEC_TLS_LOAD))]
+  "TARGET_64BIT"
+  "lg\t%0,%1%J2"
+  [(set_attr "op_type" "RXE")
+   (set_attr "z10prop" "z10_fwd_A3")])
+
+(define_insn "*tls_load_31"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "R,T")
+                    (match_operand:SI 2 "" "")]
+		   UNSPEC_TLS_LOAD))]
+  "!TARGET_64BIT"
+  "@
+   l\t%0,%1%J2
+   ly\t%0,%1%J2"
+  [(set_attr "op_type" "RX,RXY")
+   (set_attr "type" "load")
+   (set_attr "z10prop" "z10_fwd_A3,z10_fwd_A3")])
+
+(define_insn "*bras_tls"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))
+   (use (match_operand 4 "" ""))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_SMALL_EXEC
+   && GET_MODE (operands[3]) == Pmode"
+  "bras\t%3,%1%J4"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*brasl_tls"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "bras_sym_operand" "X"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))
+   (use (match_operand 4 "" ""))]
+  "!SIBLING_CALL_P (insn)
+   && TARGET_CPU_ZARCH
+   && GET_MODE (operands[3]) == Pmode"
+  "brasl\t%3,%1%J4"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "*basr_tls"
+  [(set (match_operand 0 "" "")
+        (call (mem:QI (match_operand 1 "address_operand" "ZQZR"))
+              (match_operand 2 "const_int_operand" "n")))
+   (clobber (match_operand 3 "register_operand" "=r"))
+   (use (match_operand 4 "" ""))]
+  "!SIBLING_CALL_P (insn) && GET_MODE (operands[3]) == Pmode"
+{
+  if (get_attr_op_type (insn) == OP_TYPE_RR)
+    return "basr\t%3,%1%J4";
+  else
+    return "bas\t%3,%a1%J4";
+}
+  [(set (attr "op_type")
+        (if_then_else (match_operand 1 "register_operand" "")
+                      (const_string "RR") (const_string "RX")))
+   (set_attr "type"  "jsr")
+   (set_attr "atype" "agen")
+   (set_attr "z196prop" "z196_cracked")])
+
+;;
+;;- Atomic operations
+;;
+
+;
+; memory barrier pattern.
+;
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+  "bcr\t15,0"
+  [(set_attr "op_type" "RR")])
+
+; Although bcr is superscalar on Z10, this variant will never become part of
+; an execution group.
+
+;
+; compare and swap patterns.
+;
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+    [(set (match_operand:TDSI 0 "register_operand" "")
+	  (match_operand:TDSI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec_volatile:TDSI
+	    [(match_dup 1)
+	     (match_operand:TDSI 2 "register_operand" "")
+	     (match_operand:TDSI 3 "register_operand" "")]
+	    UNSPECV_CAS))
+     (set (reg:CCZ1 CC_REGNUM)
+	  (compare:CCZ1 (match_dup 1) (match_dup 2)))])]
+  "")
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+    [(set (match_operand:HQI 0 "register_operand" "")
+	  (match_operand:HQI 1 "memory_operand" ""))
+     (set (match_dup 1)
+	  (unspec_volatile:HQI
+	    [(match_dup 1)
+	     (match_operand:HQI 2 "general_operand" "")
+	     (match_operand:HQI 3 "general_operand" "")]
+	    UNSPECV_CAS))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1],
+		       operands[2], operands[3]); DONE;")
+
+; cds, cdsg
+(define_insn "*sync_compare_and_swap<mode>"
+  [(set (match_operand:DW 0 "register_operand" "=r")
+	(match_operand:DW 1 "memory_operand" "+Q"))
+   (set (match_dup 1)
+	(unspec_volatile:DW
+	  [(match_dup 1)
+	   (match_operand:DW 2 "register_operand" "0")
+	   (match_operand:DW 3 "register_operand" "r")]
+	  UNSPECV_CAS))
+   (set (reg:CCZ1 CC_REGNUM)
+	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
+  ""
+  "cds<tg>\t%0,%3,%S1"
+  [(set_attr "op_type" "RS<TE>")
+   (set_attr "type"   "sem")])
+
+; cs, csg
+(define_insn "*sync_compare_and_swap<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(match_operand:GPR 1 "memory_operand" "+Q"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	  [(match_dup 1)
+	   (match_operand:GPR 2 "register_operand" "0")
+	   (match_operand:GPR 3 "register_operand" "r")]
+	  UNSPECV_CAS))
+   (set (reg:CCZ1 CC_REGNUM)
+	(compare:CCZ1 (match_dup 1) (match_dup 2)))]
+  ""
+  "cs<g>\t%0,%3,%S1"
+  [(set_attr "op_type" "RS<E>")
+   (set_attr "type"   "sem")])
+
+
+;
+; Other atomic instruction patterns.
+;
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:HQI 0 "register_operand")
+   (match_operand:HQI 1 "memory_operand")
+   (match_operand:HQI 2 "general_operand")]
+  ""
+  "s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
+		       operands[2], false); DONE;")
+
+; z196 load and add, xor, or and and instructions
+
+; lan, lang, lao, laog, lax, laxg, laa, laag
+(define_insn "sync_<atomic><mode>"
+  [(parallel
+    [(set (match_operand:GPR 0 "memory_operand" "+QS")
+	  (unspec_volatile:GPR
+	   [(ATOMIC_Z196:GPR (match_dup 0)
+			     (match_operand:GPR 1 "general_operand" "d"))]
+	   UNSPECV_ATOMIC_OP))
+     (clobber (match_scratch:GPR 2 "=d"))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196"
+  "la<noxa><g>\t%2,%1,%0")
+
+; lan, lang, lao, laog, lax, laxg, laa, laag
+(define_insn "sync_old_<atomic><mode>"
+  [(parallel
+    [(set (match_operand:GPR 0 "register_operand" "=d")
+	  (match_operand:GPR 1 "memory_operand"   "+QS"))
+     (set (match_dup 1)
+	  (unspec_volatile:GPR
+	   [(ATOMIC_Z196:GPR (match_dup 1)
+			     (match_operand:GPR 2 "general_operand" "d"))]
+	   UNSPECV_ATOMIC_OP))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196"
+  "la<noxa><g>\t%0,%2,%1")
+
+
+(define_expand "sync_<atomic><mode>"
+  [(set (match_operand:HQI 0 "memory_operand")
+	(ATOMIC:HQI (match_dup 0)
+		    (match_operand:HQI 1 "general_operand")))]
+  ""
+  "s390_expand_atomic (<MODE>mode, <CODE>, NULL_RTX, operands[0],
+		       operands[1], false); DONE;")
+
+(define_expand "sync_old_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(match_operand:HQI 1 "memory_operand"))
+   (set (match_dup 1)
+	(ATOMIC:HQI (match_dup 1)
+		    (match_operand:HQI 2 "general_operand")))]
+  ""
+  "s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
+		       operands[2], false); DONE;")
+
+(define_expand "sync_new_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
+		    (match_operand:HQI 2 "general_operand")))
+   (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))]
+  ""
+  "s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
+		       operands[2], true); DONE;")
+
+;;
+;;- Miscellaneous instructions.
+;;
+
+;
+; allocate stack instruction pattern(s).
+;
+
+(define_expand "allocate_stack"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+ "TARGET_BACKCHAIN"
+{
+  rtx temp = gen_reg_rtx (Pmode);
+
+  emit_move_insn (temp, s390_back_chain_rtx ());
+  anti_adjust_stack (operands[1]);
+  emit_move_insn (s390_back_chain_rtx (), temp);
+
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  DONE;
+})
+
+
+;
+; setjmp instruction pattern.
+;
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+{
+  emit_insn (s390_load_got ());
+  emit_use (pic_offset_table_rtx);
+  DONE;
+})
+
+;; These patterns say how to save and restore the stack pointer.  We need not
+;; save the stack pointer at function level since we are careful to
+;; preserve the backchain.  At block level, we have to restore the backchain
+;; when we restore the stack pointer.
+;;
+;; For nonlocal gotos, we must save both the stack pointer and its
+;; backchain and restore both.  Note that in the nonlocal case, the
+;; save area is a memory location.
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_block"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "register_operand" "")]
+  "TARGET_BACKCHAIN"
+{
+  rtx temp = gen_reg_rtx (Pmode);
+
+  emit_move_insn (temp, s390_back_chain_rtx ());
+  emit_move_insn (operands[0], operands[1]);
+  emit_move_insn (s390_back_chain_rtx (), temp);
+
+  DONE;
+})
+
+(define_expand "save_stack_nonlocal"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "register_operand" "")]
+  ""
+{
+  rtx base = gen_rtx_REG (Pmode, BASE_REGNUM);
+
+  /* Copy the backchain to the first word, sp to the second and the
+     literal pool base to the third.  */
+
+  rtx save_bc = adjust_address (operands[0], Pmode, 0);
+  rtx save_sp = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
+  rtx save_bp = adjust_address (operands[0], Pmode, 2 * GET_MODE_SIZE (Pmode));
+
+  if (TARGET_BACKCHAIN)
+    emit_move_insn (save_bc, force_reg (Pmode, s390_back_chain_rtx ()));
+
+  emit_move_insn (save_sp, operands[1]);
+  emit_move_insn (save_bp, base);
+
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+  rtx base = gen_rtx_REG (Pmode, BASE_REGNUM);
+  rtx temp = NULL_RTX;
+
+  /* Restore the backchain from the first word, sp from the second and the
+     literal pool base from the third.  */
+
+  rtx save_bc = adjust_address (operands[1], Pmode, 0);
+  rtx save_sp = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
+  rtx save_bp = adjust_address (operands[1], Pmode, 2 * GET_MODE_SIZE (Pmode));
+
+  if (TARGET_BACKCHAIN)
+    temp = force_reg (Pmode, save_bc);
+
+  emit_move_insn (base, save_bp);
+  emit_move_insn (operands[0], save_sp);
+
+  if (temp)
+    emit_move_insn (s390_back_chain_rtx (), temp);
+
+  emit_use (base);
+  DONE;
+})
+
+(define_expand "exception_receiver"
+  [(const_int 0)]
+  ""
+{
+  s390_set_has_landing_pad_p (true);
+  DONE;
+})
+
+;
+; nop instruction pattern(s).
+;
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "lr\t0,0"
+  [(set_attr "op_type" "RR")
+   (set_attr "z10prop"  "z10_fr_E1")])
+
+(define_insn "nop1"
+  [(const_int 1)]
+  ""
+  "lr\t1,1"
+  [(set_attr "op_type" "RR")])
+
+
+;
+; Special literal pool access instruction pattern(s).
+;
+
+(define_insn "*pool_entry"
+  [(unspec_volatile [(match_operand 0 "consttable_operand" "X")]
+                    UNSPECV_POOL_ENTRY)]
+  ""
+{
+  enum machine_mode mode = GET_MODE (PATTERN (insn));
+  unsigned int align = GET_MODE_BITSIZE (mode);
+  s390_output_pool_entry (operands[0], mode, align);
+  return "";
+}
+  [(set (attr "length")
+        (symbol_ref "GET_MODE_SIZE (GET_MODE (PATTERN (insn)))"))])
+
+(define_insn "pool_align"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "n")]
+                    UNSPECV_POOL_ALIGN)]
+  ""
+  ".align\t%0"
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))])
+
+(define_insn "pool_section_start"
+  [(unspec_volatile [(const_int 1)] UNSPECV_POOL_SECTION)]
+  ""
+  ".section\t.rodata"
+  [(set_attr "length" "0")])
+
+(define_insn "pool_section_end"
+  [(unspec_volatile [(const_int 0)] UNSPECV_POOL_SECTION)]
+  ""
+  ".previous"
+  [(set_attr "length" "0")])
+
+(define_insn "main_base_31_small"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))]
+  "!TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "basr\t%0,0"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "la")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "main_base_31_large"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))
+   (set (pc) (label_ref (match_operand 2 "" "")))]
+  "!TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "bras\t%0,%2"
+  [(set_attr "op_type" "RI")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "main_base_64"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))]
+  "TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "larl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_fwd_A1")])
+
+(define_insn "main_pool"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec_volatile [(const_int 0)] UNSPECV_MAIN_POOL))]
+  "GET_MODE (operands[0]) == Pmode"
+{
+  gcc_unreachable ();
+}
+  [(set (attr "type")
+        (if_then_else (ne (symbol_ref "TARGET_CPU_ZARCH") (const_int 0))
+                      (const_string "larl") (const_string "la")))])
+
+(define_insn "reload_base_31"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_RELOAD_BASE))]
+  "!TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "basr\t%0,0\;la\t%0,%1-.(%0)"
+  [(set_attr "length" "6")
+   (set_attr "type" "la")
+   (set_attr "z196prop" "z196_cracked")])
+
+(define_insn "reload_base_64"
+  [(set (match_operand 0 "register_operand" "=a")
+        (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_RELOAD_BASE))]
+  "TARGET_CPU_ZARCH && GET_MODE (operands[0]) == Pmode"
+  "larl\t%0,%1"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "larl")
+   (set_attr "z10prop" "z10_fwd_A1")])
+
+(define_insn "pool"
+  [(unspec_volatile [(match_operand 0 "const_int_operand" "n")] UNSPECV_POOL)]
+  ""
+{
+  gcc_unreachable ();
+}
+  [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))])
+
+;;
+;; Insns related to generating the function prologue and epilogue.
+;;
+
+
+(define_expand "prologue"
+  [(use (const_int 0))]
+  ""
+  "s390_emit_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(use (const_int 1))]
+  ""
+  "s390_emit_epilogue (false); DONE;")
+
+(define_expand "sibcall_epilogue"
+  [(use (const_int 0))]
+  ""
+  "s390_emit_epilogue (true); DONE;")
+
+(define_insn "*return"
+  [(return)
+   (use (match_operand 0 "register_operand" "a"))]
+  "GET_MODE (operands[0]) == Pmode"
+  "br\t%0"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "jsr")
+   (set_attr "atype"   "agen")])
+
+
+;; Instruction definition to extend a 31-bit pointer into a 64-bit
+;; pointer. This is used for compatibility.
+
+(define_expand "ptr_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (match_operand:SI 1 "register_operand" "r"))]
+  "TARGET_64BIT"
+{
+  emit_insn (gen_anddi3 (operands[0],
+			 gen_lowpart (DImode, operands[1]),
+			 GEN_INT (0x7fffffff)));
+  DONE;
+})
+
+;; Instruction definition to expand eh_return macro to support
+;; swapping in special linkage return addresses.
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  "TARGET_TPF"
+{
+  s390_emit_tpf_eh_return (operands[0]);
+  DONE;
+})
+
+;
+; Stack Protector Patterns
+;
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "memory_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1]
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
+                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#endif
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_set<mode>"
+  [(set (match_operand:DSI 0 "memory_operand" "=Q")
+        (unspec:DSI [(match_operand:DSI 1 "memory_operand" "Q")] UNSPEC_SP_SET))]
+  ""
+  "mvc\t%O0(%G0,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+(define_expand "stack_protect_test"
+  [(set (reg:CC CC_REGNUM)
+	(compare (match_operand 0 "memory_operand" "")
+		 (match_operand 1 "memory_operand" "")))
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx cc_reg, test;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  operands[1]
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
+                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#endif
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_testdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_testsi (operands[0], operands[1]));
+
+  cc_reg = gen_rtx_REG (CCZmode, CC_REGNUM);
+  test = gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx);
+  emit_jump_insn (gen_cbranchcc4 (test, cc_reg, const0_rtx, operands[2]));
+  DONE;
+})
+
+(define_insn "stack_protect_test<mode>"
+  [(set (reg:CCZ CC_REGNUM)
+        (unspec:CCZ [(match_operand:DSI 0 "memory_operand" "Q")
+		     (match_operand:DSI 1 "memory_operand" "Q")] UNSPEC_SP_TEST))]
+  ""
+  "clc\t%O0(%G0,%R0),%S1"
+  [(set_attr "op_type" "SS")])
+
+; This is used in s390_emit_prologue in order to prevent insns
+; adjusting the stack pointer to be moved over insns writing stack
+; slots using a copy of the stack pointer in a different register.
+(define_insn "stack_tie"
+  [(set (match_operand:BLK 0 "memory_operand" "+m")
+        (unspec:BLK [(match_dup 0)] UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+
+;
+; Data prefetch patterns
+;
+
+(define_insn "prefetch"
+  [(prefetch (match_operand 0    "address_operand"   "ZQZRZSZT,X")
+	     (match_operand:SI 1 "const_int_operand" "       n,n")
+	     (match_operand:SI 2 "const_int_operand" "       n,n"))]
+  "TARGET_Z10"
+{
+  switch (which_alternative)
+    {
+      case 0:
+        return INTVAL (operands[1]) == 1 ? "pfd\t2,%a0" : "pfd\t1,%a0";
+      case 1:
+        if (larl_operand (operands[0], Pmode))
+	  return INTVAL (operands[1]) == 1 ? "pfdrl\t2,%a0" : "pfdrl\t1,%a0";
+      default:
+
+        /* This might be reached for symbolic operands with an odd
+           addend.  We simply omit the prefetch for such rare cases.  */
+
+        return "";
+     }
+}
+  [(set_attr "type" "load,larl")
+   (set_attr "op_type" "RXY,RIL")
+   (set_attr "z10prop" "z10_super")
+   (set_attr "z196prop" "z196_alone")])
+
+
+;
+; Byte swap instructions
+;
+
+(define_insn "bswap<mode>2"
+  [(set (match_operand:GPR 0            "register_operand"     "=d, d")
+	(bswap:GPR (match_operand:GPR 1 "nonimmediate_operand" " d,RT")))]
+  "TARGET_CPU_ZARCH"
+  "@
+   lrv<g>r\t%0,%1
+   lrv<g>\t%0,%1"
+  [(set_attr "type" "*,load")
+   (set_attr "op_type" "RRE,RXY")
+   (set_attr "z10prop" "z10_super")])
+
+
+;
+; Population count instruction
+;
+
+; The S/390 popcount instruction counts the bits of op1 in 8 byte
+; portions and stores the result in the corresponding bytes in op0.
+(define_insn "*popcount<mode>"
+  [(set (match_operand:INT 0 "register_operand" "=d")
+	(unspec:INT [(match_operand:INT 1 "register_operand" "d")] UNSPEC_POPCNT))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z196"
+  "popcnt\t%0,%1"
+  [(set_attr "op_type" "RRE")])
+
+(define_expand "popcountdi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:DI 0 "register_operand" "")
+		   (unspec:DI [(match_operand:DI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllg op2, op0, 32
+   (set (match_dup 2) (ashift:DI (match_dup 0) (const_int 32)))
+   ; agr op0, op2
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllg op2, op0, 16
+   (set (match_dup 2)
+	(ashift:DI (match_dup 0) (const_int 16)))
+   ; agr op0, op2
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllg op2, op0, 8
+   (set (match_dup 2) (ashift:DI (match_dup 0) (const_int 8)))
+   ; agr op0, op2
+   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; srlg op0, op0, 56
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
+  "TARGET_Z196 && TARGET_64BIT"
+  "operands[2] = gen_reg_rtx (DImode);")
+
+(define_expand "popcountsi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (unspec:SI [(match_operand:SI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllk op2, op0, 16
+   (set (match_dup 2)
+	(ashift:SI (match_dup 0) (const_int 16)))
+   ; ar op0, op2
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllk op2, op0, 8
+   (set (match_dup 2) (ashift:SI (match_dup 0) (const_int 8)))
+   ; ar op0, op2
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; srl op0, op0, 24
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))]
+  "TARGET_Z196"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_expand "popcounthi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (unspec:HI [(match_operand:HI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; sllk op2, op0, 8
+   (set (match_dup 2)
+	(ashift:SI (match_dup 0) (const_int 8)))
+   ; ar op0, op2
+   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+	      (clobber (reg:CC CC_REGNUM))])
+   ; srl op0, op0, 8
+   (set (match_dup 0) (lshiftrt:HI (match_dup 0) (const_int 8)))]
+  "TARGET_Z196"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_expand "popcountqi2"
+  [; popcnt op0, op1
+   (parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (unspec:QI [(match_operand:QI 1 "register_operand")]
+			      UNSPEC_POPCNT))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196"
+  "")
+
+;;
+;;- Copy sign instructions
+;;
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:FP 0 "register_operand" "=f")
+      (unspec:FP [(match_operand:FP 1 "register_operand" "<fT0>")
+                  (match_operand:FP 2 "register_operand" "f")]
+                  UNSPEC_COPYSIGN))]
+  "TARGET_Z196"
+  "cpsdr\t%0,%2,%1"
+  [(set_attr "op_type"  "RRF")
+   (set_attr "type"     "fsimp<mode>")])
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
new file mode 100644
index 000000000..3a3cd42bf
--- /dev/null
+++ b/gcc/config/s390/s390.opt
@@ -0,0 +1,99 @@
+; Options for the S/390 / zSeries port of the compiler.
+
+; Copyright (C) 2005, 2006, 2007, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+m31
+Target Report RejectNegative Negative(m64) InverseMask(64BIT)
+31 bit ABI
+
+m64
+Target Report RejectNegative Negative(m31) Mask(64BIT)
+64 bit ABI
+
+march=
+Target RejectNegative Joined Var(s390_arch_string)
+Generate code for given CPU
+
+mbackchain
+Target Report Mask(BACKCHAIN)
+Maintain backchain pointer
+
+mdebug
+Target Report Mask(DEBUG_ARG)
+Additional debug prints
+
+mesa
+Target Report RejectNegative Negative(mzarch) InverseMask(ZARCH)
+ESA/390 architecture
+
+mhard-dfp
+Target Report Mask(HARD_DFP)
+Enable decimal floating point hardware support
+
+mhard-float
+Target Report RejectNegative Negative(msoft-float) InverseMask(SOFT_FLOAT, HARD_FLOAT)
+Enable hardware floating point
+
+mlong-double-128
+Target Report RejectNegative Negative(mlong-double-64) Mask(LONG_DOUBLE_128)
+Use 128-bit long double
+
+mlong-double-64
+Target Report RejectNegative Negative(mlong-double-128) InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double
+
+mpacked-stack
+Target Report Mask(PACKED_STACK)
+Use packed stack layout
+
+msmall-exec
+Target Report Mask(SMALL_EXEC)
+Use bras for executable < 64k
+
+msoft-float
+Target Report RejectNegative Negative(mhard-float) Mask(SOFT_FLOAT)
+Disable hardware floating point
+
+mstack-guard=
+Target RejectNegative Joined
+Set the max. number of bytes which has to be left to stack size before a trap instruction is triggered
+
+mstack-size=
+Target RejectNegative Joined
+Emit extra code in the function prologue in order to trap if the stack size exceeds the given limit
+
+mtune=
+Target RejectNegative Joined
+Schedule code for given CPU
+
+mmvcle
+Target Report Mask(MVCLE)
+mvcle use
+
+mwarn-dynamicstack
+Target RejectNegative Var(s390_warn_dynamicstack_p)
+Warn if a function uses alloca or creates an array with dynamic size
+
+mwarn-framesize=
+Target RejectNegative Joined
+Warn if a single function's framesize exceeds the given framesize
+
+mzarch
+Target Report RejectNegative Negative(mesa) Mask(ZARCH)
+z/Architecture
diff --git a/gcc/config/s390/s390x.h b/gcc/config/s390/s390x.h
new file mode 100644
index 000000000..3712eb156
--- /dev/null
+++ b/gcc/config/s390/s390x.h
@@ -0,0 +1,27 @@
+/* Definitions of target machine for IBM zSeries 64-bit
+   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _S390X_H
+#define _S390X_H
+
+#define DEFAULT_TARGET_64BIT
+
+#endif
diff --git a/gcc/config/s390/t-linux64 b/gcc/config/s390/t-linux64
new file mode 100644
index 000000000..cc6ab3670
--- /dev/null
+++ b/gcc/config/s390/t-linux64
@@ -0,0 +1,11 @@
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS = m64/m31
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:s390x-linux-gnu)
+MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:s390-linux-gnu)
diff --git a/gcc/config/s390/tpf-unwind.h b/gcc/config/s390/tpf-unwind.h
new file mode 100644
index 000000000..33fd5f5c8
--- /dev/null
+++ b/gcc/config/s390/tpf-unwind.h
@@ -0,0 +1,252 @@
+/* DWARF2 EH unwinding support for TPF OS.
+   Copyright (C) 2004, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by P.J. Darcy (darcypj@us.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <dlfcn.h>
+
+/* Function Name: __isPATrange
+   Parameters passed into it:  address to check
+   Return Value: A 1 if address is in pat code "range", 0 if not
+   Description: This function simply checks to see if the address
+   passed to it is in the CP pat code range.  */
+
+#define MIN_PATRANGE 0x10000
+#define MAX_PATRANGE 0x800000
+
+static inline unsigned int
+__isPATrange (void *addr)
+{
+  if (addr > (void *)MIN_PATRANGE && addr < (void *)MAX_PATRANGE)
+    return 1;
+  else
+    return 0;
+}
+
+/* TPF return address offset from start of stack frame.  */
+#define TPFRA_OFFSET 168
+
+/* Exceptions macro defined for TPF so that functions without
+   dwarf frame information can be used with exceptions.  */
+#define MD_FALLBACK_FRAME_STATE_FOR s390_fallback_frame_state
+
+static _Unwind_Reason_Code
+s390_fallback_frame_state (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs)
+{
+  unsigned long int regs;
+  unsigned long int new_cfa;
+  int i;
+
+  regs = *((unsigned long int *)
+        (((unsigned long int) context->cfa) - STACK_POINTER_OFFSET));
+
+  /* Are we going through special linkage code?  */
+  if (__isPATrange (context->ra))
+    {
+
+      /* Our return register isn't zero for end of stack, so
+         check backward stackpointer to see if it is zero.  */
+      if (regs == NULL)
+         return _URC_END_OF_STACK;
+
+      /* No stack frame.  */
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = 15;
+      fs->regs.cfa_offset = STACK_POINTER_OFFSET;
+
+      /* All registers remain unchanged ...  */
+      for (i = 0; i < 32; i++)
+	{
+	  fs->regs.reg[i].how = REG_SAVED_REG;
+	  fs->regs.reg[i].loc.reg = i;
+	}
+
+      /* ... except for %r14, which is stored at CFA-112
+	 and used as return address.  */
+      fs->regs.reg[14].how = REG_SAVED_OFFSET;
+      fs->regs.reg[14].loc.offset = TPFRA_OFFSET - STACK_POINTER_OFFSET;
+      fs->retaddr_column = 14;
+
+      return _URC_NO_REASON;
+    }
+
+  regs = *((unsigned long int *)
+        (((unsigned long int) context->cfa) - STACK_POINTER_OFFSET));
+  new_cfa = regs + STACK_POINTER_OFFSET;
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 15;
+  fs->regs.cfa_offset = new_cfa -
+        (unsigned long int) context->cfa + STACK_POINTER_OFFSET;
+
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset = regs + i*8 - new_cfa;
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      fs->regs.reg[16 + i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[16 + i].loc.offset = regs + 16*8 + i*8 - new_cfa;
+    }
+
+  fs->retaddr_column = 14;
+
+  return _URC_NO_REASON;
+}
+
+/* Function Name: __tpf_eh_return
+   Parameters passed into it: Destination address to jump to.
+   Return Value: Converted Destination address if a Pat Stub exists.
+   Description: This function swaps the unwinding return address
+      with the cp stub code.  The original target return address is
+      then stored into the tpf return address field.  The cp stub
+      code is searched for by climbing back up the stack and
+      comparing the tpf stored return address object address to
+      that of the targets object address.  */
+
+#define CURRENT_STACK_PTR() \
+  ({ register unsigned long int *stack_ptr asm ("%r15"); stack_ptr; })
+
+#define PREVIOUS_STACK_PTR() \
+  ((unsigned long int *)(*(CURRENT_STACK_PTR())))
+
+#define RA_OFFSET 112
+#define R15_OFFSET 120
+#define TPFAREA_OFFSET 160
+#define TPFAREA_SIZE STACK_POINTER_OFFSET-TPFAREA_OFFSET
+#define INVALID_RETURN 0
+
+void * __tpf_eh_return (void *target);
+
+void *
+__tpf_eh_return (void *target)
+{
+  Dl_info targetcodeInfo, currentcodeInfo;
+  int retval;
+  void *current, *stackptr, *destination_frame;
+  unsigned long int shifter, is_a_stub;
+
+  is_a_stub = 0;
+
+  /* Get code info for target return's address.  */
+  retval = dladdr (target, &targetcodeInfo);
+
+  /* Ensure the code info is valid (for target).  */
+  if (retval != INVALID_RETURN)
+    {
+
+      /* Get the stack pointer of the stack frame to be modified by
+         the exception unwinder.  So that we can begin our climb
+         there.  */
+      stackptr = (void *) *((unsigned long int *) (*(PREVIOUS_STACK_PTR())));
+
+      /* Begin looping through stack frames.  Stop if invalid
+         code information is retrieved or if a match between the
+         current stack frame iteration shared object's address
+         matches that of the target, calculated above.  */
+      do
+        {
+          /* Get return address based on our stackptr iterator.  */
+          current = (void *) *((unsigned long int *)
+                      (stackptr+RA_OFFSET));
+
+          /* Is it a Pat Stub?  */
+          if (__isPATrange (current))
+            {
+              /* Yes it was, get real return address
+                 in TPF stack area.  */
+              current = (void *) *((unsigned long int *)
+                          (stackptr+TPFRA_OFFSET));
+              is_a_stub = 1;
+            }
+
+          /* Get codeinfo on RA so that we can figure out
+             the module address.  */
+          retval = dladdr (current, &currentcodeInfo);
+
+          /* Check that codeinfo for current stack frame is valid.
+             Then compare the module address of current stack frame
+             to target stack frame to determine if we have the pat
+             stub address we want.  Also ensure we are dealing
+             with a module crossing, stub return address. */
+          if (is_a_stub && retval != INVALID_RETURN
+             && targetcodeInfo.dli_fbase == currentcodeInfo.dli_fbase)
+             {
+               /* Yes! They are in the same module.
+                  Force copy of TPF private stack area to
+                  destination stack frame TPF private area. */
+               destination_frame = (void *) *((unsigned long int *)
+                   (*PREVIOUS_STACK_PTR() + R15_OFFSET));
+
+               /* Copy TPF linkage area from current frame to
+                  destination frame.  */
+               memcpy((void *) (destination_frame + TPFAREA_OFFSET),
+                 (void *) (stackptr + TPFAREA_OFFSET), TPFAREA_SIZE);
+
+               /* Now overlay the
+                  real target address into the TPF stack area of
+                  the target frame we are jumping to.  */
+               *((unsigned long int *) (destination_frame +
+                   TPFRA_OFFSET)) = (unsigned long int) target;
+
+               /* Before returning the desired pat stub address to
+                  the exception handling unwinder so that it can
+                  actually do the "leap" shift out the low order
+                  bit designated to determine if we are in 64BIT mode.
+                  This is necessary for CTOA stubs.
+                  Otherwise we leap one byte past where we want to
+                  go to in the TPF pat stub linkage code.  */
+               shifter = *((unsigned long int *)
+                     (stackptr + RA_OFFSET));
+
+               shifter &= ~1ul;
+
+               /* Store Pat Stub Address in destination Stack Frame.  */
+               *((unsigned long int *) (destination_frame +
+                   RA_OFFSET)) = shifter;
+
+               /* Re-adjust pat stub address to go to correct place
+                  in linkage.  */
+               shifter = shifter - 4;
+
+               return (void *) shifter;
+             }
+
+          /* Desired module pat stub not found ...
+             Bump stack frame iterator.  */
+          stackptr = (void *) *(unsigned long int *) stackptr;
+
+          is_a_stub = 0;
+
+        }  while (stackptr && retval != INVALID_RETURN
+                && targetcodeInfo.dli_fbase != currentcodeInfo.dli_fbase);
+    }
+
+  /* No pat stub found, could be a problem?  Simply return unmodified
+     target address.  */
+  return target;
+}
+
diff --git a/gcc/config/s390/tpf.h b/gcc/config/s390/tpf.h
new file mode 100644
index 000000000..d2a0f966b
--- /dev/null
+++ b/gcc/config/s390/tpf.h
@@ -0,0 +1,130 @@
+/* Definitions for target OS TPF for GNU compiler, for IBM S/390 hardware
+   Copyright (C) 2003, 2004, 2005, 2007, 2009,
+   2010 Free Software Foundation, Inc.
+   Contributed by P.J. Darcy (darcypj@us.ibm.com),
+                  Hartmut Penner (hpenner@de.ibm.com), and
+                  Ulrich Weigand (uweigand@de.ibm.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _TPF_H
+#define _TPF_H
+
+/* TPF wants the following macros defined/undefined as follows.  */
+#undef TARGET_TPF
+#define TARGET_TPF 1
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+#define NO_IMPLICIT_EXTERN_C
+#define TARGET_POSIX_IO
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE ("long unsigned int")
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE ("long int")
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "int"
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Basic record keeping for the TPF OS name.  */
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (TPF: zSeries)");
+
+/* TPF OS specific stack-pointer offset.  */
+#undef STACK_POINTER_OFFSET
+#define STACK_POINTER_OFFSET 		448
+
+/* When building for TPF, set a generic default target that is 64 bits. Also
+   enable TPF profiling support and the standard backchain by default.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_TPF_PROFILING | MASK_64BIT | MASK_ZARCH \
+			| MASK_HARD_DFP | MASK_BACKCHAIN)
+
+/* Exception handling.  */
+
+/* Select a format to encode pointers in exception handling data.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) DW_EH_PE_absptr
+
+/* TPF OS specific compiler settings.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()                \
+  do                                            \
+    {                                           \
+      builtin_define_std ("tpf");               \
+      builtin_assert ("system=tpf");            \
+      builtin_define ("__ELF__");               \
+    }                                           \
+  while (0)
+
+
+#define EXTRA_SPECS                             \
+  { "entry_spec", ENTRY_SPEC }
+
+/* Make TPF specific spec file settings here.  */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{mmain:crt0%O%s} crtbeginS%O%s crt3%O%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtendS%O%s"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{!fverbose-asm: -fverbose-asm}"
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{m31&m64}%{mesa&mzarch}%{march=*} \
+                  -alshd=%b.lst"
+
+/* It would be nice to get the system linker script define the ones that it
+   needed.  */
+#undef LIB_SPEC
+#define LIB_SPEC "-lCTIS -lCISO -lCLBM -lCTAL -lCFVS -lCTBX -lCTXO \
+                  -lCJ00 -lCTDF -lCOMX -lCOMS -lCTHD -lCTAD -lTPFSTUB"
+
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS 1
+
+#define ENTRY_SPEC "%{mmain:-entry=_start} \
+                    %{!mmain:-entry=0}"
+
+/* All linking is done shared on TPF-OS.  */
+/* FIXME: When binutils patch for new emulation is committed
+   then change emulation to elf64_s390_tpf.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-m elf64_s390 \
+   %{static:%estatic is not supported on TPF-OS} \
+   %{shared: -shared} \
+   %{!shared:-shared} \
+   %(entry_spec)"
+
+#define MD_UNWIND_SUPPORT "config/s390/tpf-unwind.h"
+
+/* IBM copies these libraries over with these names.  */
+#define MATH_LIBRARY "CLBM"
+#define LIBSTDCXX "CPP1"
+#endif /* ! _TPF_H */
diff --git a/gcc/config/s390/tpf.md b/gcc/config/s390/tpf.md
new file mode 100644
index 000000000..e1106a052
--- /dev/null
+++ b/gcc/config/s390/tpf.md
@@ -0,0 +1,33 @@
+;; S390 TPF-OS specific machine patterns
+;; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "prologue_tpf"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TPF_PROLOGUE)
+   (clobber (reg:DI 1))]
+  "TARGET_TPF_PROFILING"
+  "larl\t%%r1,.+14\;tm\t4065,255\;bnz\t4064"
+  [(set_attr "length"   "14")])
+
+
+(define_insn "epilogue_tpf"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TPF_EPILOGUE)
+   (clobber (reg:DI 1))]
+  "TARGET_TPF_PROFILING"
+  "larl\t%%r1,.+14\;tm\t4071,255\;bnz\t4070"
+  [(set_attr "length"   "14")])
diff --git a/gcc/config/s390/tpf.opt b/gcc/config/s390/tpf.opt
new file mode 100644
index 000000000..c3cde22b6
--- /dev/null
+++ b/gcc/config/s390/tpf.opt
@@ -0,0 +1,27 @@
+; Options for the TPF-OS port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mtpf-trace
+Target Report Mask(TPF_PROFILING)
+Enable TPF-OS tracing code
+
+mmain
+Target Report
+Specify main object for TPF-OS
diff --git a/gcc/config/score/constraints.md b/gcc/config/score/constraints.md
new file mode 100644
index 000000000..d642e1278
--- /dev/null
+++ b/gcc/config/score/constraints.md
@@ -0,0 +1,93 @@
+;; Constraint definitions for S+CORE
+;; Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+;; Contributed by Sunnorth.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+;; -------------------------------------------------------------------------
+;; Constraints
+;; -------------------------------------------------------------------------
+
+;; Register constraints.
+(define_register_constraint "d" "G32_REGS"
+  "r0 to r31")
+
+(define_register_constraint "e" "G16_REGS"
+  "r0 to r15")
+
+(define_register_constraint "t" "T32_REGS"
+  "r8 to r11 | r22 to r27")
+
+(define_register_constraint "h" "HI_REG"
+  "hi")
+
+(define_register_constraint "l" "LO_REG"
+  "lo")
+
+(define_register_constraint "x" "CE_REGS"
+  "hi + lo")
+
+(define_register_constraint "q" "CN_REG"
+  "cnt")
+
+(define_register_constraint "y" "LC_REG"
+  "lcb")
+
+(define_register_constraint "z" "SC_REG"
+  "scb")
+
+(define_register_constraint "a" "SP_REGS"
+  "cnt + lcb + scb")
+
+(define_register_constraint "c" "CR_REGS"
+  "cr0 to cr15")
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "High 16-bit constant (32-bit constant with 16 LSBs zero)."
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0")))
+
+(define_constraint "J"
+  "Unsigned 5 bit integer (in the range 0 to 31)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "K"
+  "Unsigned 16 bit integer (in the range 0 to 65535)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "L"
+  "Signed 16 bit integer (in the range −32768 to 32767)."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "M"
+  "Unsigned 14 bit integer (in the range 0 to 16383)."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 16383")))
+
+(define_constraint "N"
+  "Signed 14 bit integer (in the range −8192 to 8191)."
+  (and (match_code "const_int")
+       (match_test "ival >= -8192 && ival <= 8191")))
+
+(define_constraint "Z"
+  "Any SYMBOL_REF."
+  (and (match_code "symbol_ref")
+       (match_test "GET_CODE (op) == SYMBOL_REF")))
diff --git a/gcc/config/score/crti.asm b/gcc/config/score/crti.asm
new file mode 100644
index 000000000..4cd00cf99
--- /dev/null
+++ b/gcc/config/score/crti.asm
@@ -0,0 +1,131 @@
+# crti.asm for Sunplus S+CORE
+#
+#   Copyright (C) 2005, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file makes a stack frame for the contents of the .init and
+# .fini sections.
+.extern _stack
+
+#ifndef __pic__
+.section .init, "ax", @progbits
+        .weak   _start
+        .ent    _start
+        .frame  r0, 0, r3, 0
+        .mask   0x00000000, 0
+_start:
+        la      r28, _gp
+        la      r8, __bss_start
+        la      r9, __bss_end__
+        sub!    r9, r8
+        srli!   r9, 2
+        addi    r9, -1
+        mtsr    r9, sr0
+        li      r9, 0
+1:
+        sw      r9, [r8]+, 4
+        bcnz    1b
+        la      r0, _stack
+        jl      _init
+        la      r4, _end
+        jl      _init_argv
+        jl      exit
+        .end    _start
+
+        .weak   _init_argv
+        .ent
+        .frame  r0, 0, r3, 0
+        .mask   0x00000000, 0
+_init_argv:
+        ldiu!   r4, 0
+        ldiu!   r5, 0
+        j       main
+        .end    _init_argv
+
+        .globl  _init
+        .type   _init, %function
+_init:
+        addi    r0, -32
+        sw      r3, [r0, 20]
+
+        .section .fini, "ax", @progbits
+        .globl  _fini
+        .type   _fini, %function
+_fini:
+        addi    r0, -32
+        sw      r3, [r0, 20]
+#else
+.section .init, "ax", @progbits
+        .set    pic
+        .weak   _start
+        .ent    _start
+        .frame  r0, 0, r3, 0
+        .mask   0x00000000, 0
+_start:
+        mv      r29, r3
+        bl      0f
+0:
+        .cpload r3
+        mv      r3, r29
+        la      r8, __bss_start
+        la      r9, __bss_end__
+        sub!    r9, r8
+        srli!   r9, 2
+        addi    r9, -1
+        mtsr    r9, sr0
+        li      r9, 0
+1:
+        sw      r9, [r8]+, 4
+        bcnz    1b
+        la      r0, _stack
+        bl      _init
+        la      r4, _end
+        la      r29, _init_argv
+        brl     r29
+        la      r29, exit
+        brl     r29
+        .end    _start
+
+        .weak   _init_argv
+        .ent _init_argv
+        .frame  r0, 0, r3, 0
+        .mask   0x00000000, 0
+_init_argv:
+        ldiu!   r4, 0
+        ldiu!   r5, 0
+        la      r29, main
+        brl     r29
+        .end    _init_argv
+
+        .globl  _init
+        .type   _init, %function
+_init:
+        addi    r0, -32
+        sw      r3, [r0, 20]
+
+        .section .fini, "ax", @progbits
+        .globl  _fini
+        .type   _fini, %function
+_fini:
+        addi    r0, -32
+        sw      r3, [r0, 20]
+
+#endif
diff --git a/gcc/config/score/crtn.asm b/gcc/config/score/crtn.asm
new file mode 100644
index 000000000..8132388a0
--- /dev/null
+++ b/gcc/config/score/crtn.asm
@@ -0,0 +1,50 @@
+# crtn.asm for Sunplus S+CORE
+
+#   Copyright (C) 2005, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file makes sure that the .init and .fini sections do in
+# fact return.
+
+#ifndef __pic__
+.section .init, "ax", @progbits
+        lw      r3, [r0, 20]
+        addi    r0, 32
+        br      r3
+
+.section .fini, "ax", @progbits
+        lw      r3, [r0, 20]
+        addi    r0, 32
+        br      r3
+#else
+        .set    pic
+.section .init, "ax", @progbits
+        lw      r3, [r0, 20]
+        addi    r0, 32
+        br      r3
+
+        .set    pic
+.section .fini, "ax", @progbits
+        lw      r3, [r0, 20]
+        addi    r0, 32
+        br      r3
+#endif
+
diff --git a/gcc/config/score/elf.h b/gcc/config/score/elf.h
new file mode 100644
index 000000000..30fc7f016
--- /dev/null
+++ b/gcc/config/score/elf.h
@@ -0,0 +1,97 @@
+/* elf.h for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define OBJECT_FORMAT_ELF
+
+/* Biggest alignment supported by the object file format of this machine.  */
+#undef  MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT        (32768 * 8)
+
+/* Switch into a generic section.  */
+#undef  TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  */
+#define TYPE_OPERAND_FMT        "@%s"
+
+#undef TYPE_ASM_OP
+#define TYPE_ASM_OP             "\t.type\t"
+
+#undef SIZE_ASM_OP
+#define SIZE_ASM_OP             "\t.size\t"
+
+/* A c expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  */
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP      "\t.section\t.bss"
+#endif
+
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS asm_output_aligned_bss
+#endif
+
+#define ASM_OUTPUT_DEF(FILE, LABEL1, LABEL2)                       \
+  do {                                                             \
+    fputc ('\t', FILE);                                            \
+    assemble_name (FILE, LABEL1);                                  \
+    fputs (" = ", FILE);                                           \
+    assemble_name (FILE, LABEL2);                                  \
+    fputc ('\n', FILE);                                            \
+ } while (0)
+
+
+/* This is how we tell the assembler that a symbol is weak.  */
+#undef  ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE, NAME) ASM_OUTPUT_WEAK_ALIAS (FILE, NAME, 0)
+
+#define ASM_OUTPUT_WEAK_ALIAS(FILE, NAME, VALUE)      \
+  do {                                                \
+    fputs ("\t.weak\t", FILE);                        \
+    assemble_name (FILE, NAME);                       \
+    if (VALUE)                                        \
+      {                                               \
+        fputc (' ', FILE);                            \
+        assemble_name (FILE, VALUE);                  \
+      }                                               \
+    fputc ('\n', FILE);                               \
+ } while (0)
+
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* On elf, we *do* have support for the .init and .fini sections, and we
+   can put stuff in there to be executed before and after `main'.  We let
+   crtstuff.c and other files know this by defining the following symbols.
+   The definitions say how to change sections to the .init and .fini
+   sections.  This is the same for all known elf assemblers.  */
+#undef  INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP     "\t.section\t.init"
+#undef  FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP     "\t.section\t.fini"
+
+/* Don't set the target flags, this is done by the linker script */
+#undef  LIB_SPEC
+#define LIB_SPEC ""
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC          "crti%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC            "crtend%O%s crtn%O%s"
diff --git a/gcc/config/score/predicates.md b/gcc/config/score/predicates.md
new file mode 100644
index 000000000..7270cf174
--- /dev/null
+++ b/gcc/config/score/predicates.md
@@ -0,0 +1,152 @@
+;; Predicate definitions for Sunplus S+CORE.
+;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "const_uimm5"
+  (match_code "const_int")
+{
+  return IMM_IN_RANGE (INTVAL (op), 5, 0);
+})
+
+(define_predicate "const_simm12"
+  (match_code "const_int")
+{
+  return IMM_IN_RANGE (INTVAL (op), 12, 1);
+})
+
+(define_predicate "const_simm15"
+  (match_code "const_int")
+{
+  return IMM_IN_RANGE (INTVAL (op), 15, 1);
+})
+
+(define_predicate "arith_operand"
+  (ior (match_code "const_int")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "score_register_operand"
+  (match_code "reg,subreg")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  return (GET_CODE (op) == REG)
+          && (REGNO (op) != CC_REGNUM);
+})
+
+(define_predicate "const_call_insn_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  enum score_symbol_type symbol_type;
+
+  return (score_symbolic_constant_p (op, &symbol_type)
+          && (symbol_type == SYMBOL_GENERAL));
+})
+
+(define_predicate "call_insn_operand"
+  (ior (match_operand 0 "const_call_insn_operand")
+       (match_operand 0 "register_operand")))
+
+(define_predicate "hireg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == HI_REGNUM")))
+
+(define_predicate "loreg_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == LO_REGNUM")))
+
+(define_predicate "sr0_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == CN_REGNUM")))
+
+(define_predicate "g32reg_operand"
+  (and (match_code "reg")
+       (match_test "GP_REG_P (REGNO (op))")))
+
+(define_predicate "branch_n_operator"
+  (match_code "lt,ge"))
+
+(define_predicate "branch_nz_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "score_load_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int dest_regno;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != MEM)
+    return 0;
+
+  dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0)));
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || GET_CODE (SET_DEST (elt)) != REG
+          || GET_MODE (SET_DEST (elt)) != SImode
+          || REGNO (SET_DEST (elt)) != (unsigned) (dest_regno + i)
+          || GET_CODE (SET_SRC (elt)) != MEM
+          || GET_MODE (SET_SRC (elt)) != SImode
+          || GET_CODE (XEXP (SET_SRC (elt), 0)) != POST_INC)
+        return 0;
+    }
+
+  return 1;
+})
+
+(define_predicate "score_store_multiple_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int src_regno;
+  int i;
+
+  /* Perform a quick check so we don't blow up below.  */
+  if (count <= 1
+      || GET_CODE (XVECEXP (op, 0, 0)) != SET
+      || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != MEM
+      || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != REG)
+    return 0;
+
+  src_regno = REGNO (SET_SRC (XVECEXP (op, 0, 0)));
+
+  for (i = 1; i < count; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i);
+
+      if (GET_CODE (elt) != SET
+          || GET_CODE (SET_SRC (elt)) != REG
+          || GET_MODE (SET_SRC (elt)) != SImode
+          || REGNO (SET_SRC (elt)) != (unsigned) (src_regno + i)
+          || GET_CODE (SET_DEST (elt)) != MEM
+          || GET_MODE (SET_DEST (elt)) != SImode
+          || GET_CODE (XEXP (SET_DEST (elt), 0)) != PRE_DEC)
+        return 0;
+    }
+
+  return 1;
+})
+
diff --git a/gcc/config/score/score-conv.h b/gcc/config/score/score-conv.h
new file mode 100644
index 000000000..f7cc5ce00
--- /dev/null
+++ b/gcc/config/score/score-conv.h
@@ -0,0 +1,78 @@
+/* score-conv.h for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SCORE_CONV_H
+#define GCC_SCORE_CONV_H
+
+#define GP_REG_FIRST                    0U
+#define GP_REG_LAST                     31U
+#define GP_REG_NUM                      (GP_REG_LAST - GP_REG_FIRST + 1U)
+#define GP_DBX_FIRST                    0U
+
+#define CE_REG_FIRST                    48U
+#define CE_REG_LAST                     49U
+#define CE_REG_NUM                      (CE_REG_LAST - CE_REG_FIRST + 1U)
+
+#define ARG_REG_FIRST                   4U
+#define ARG_REG_LAST                    7U
+#define ARG_REG_NUM                     (ARG_REG_LAST - ARG_REG_FIRST + 1U)
+
+#define REG_CONTAIN(REGNO, FIRST, NUM) \
+  ((unsigned int)((int) (REGNO) - (FIRST)) < (NUM))
+
+#define GP_REG_P(REGNO)        REG_CONTAIN (REGNO, GP_REG_FIRST, GP_REG_NUM)
+
+#define G8_REG_P(REGNO)        REG_CONTAIN (REGNO, GP_REG_FIRST, 8)
+
+#define G16_REG_P(REGNO)       REG_CONTAIN (REGNO, GP_REG_FIRST, 16)
+
+#define CE_REG_P(REGNO)        REG_CONTAIN (REGNO, CE_REG_FIRST, CE_REG_NUM)
+
+#define GR_REG_CLASS_P(C)        ((C) == G16_REGS || (C) == G32_REGS)
+#define SP_REG_CLASS_P(C) \
+  ((C) == CN_REG || (C) == LC_REG || (C) == SC_REG || (C) == SP_REGS)
+#define CP_REG_CLASS_P(C) \
+  ((C) == CP1_REGS || (C) == CP2_REGS || (C) == CP3_REGS || (C) == CPA_REGS)
+#define CE_REG_CLASS_P(C) \
+  ((C) == HI_REG || (C) == LO_REG || (C) == CE_REGS)
+
+#define UIMM_IN_RANGE(V, W) \
+  ((V) >= 0 \
+   && ((unsigned HOST_WIDE_INT) (V) \
+       <= (((unsigned HOST_WIDE_INT) 2 << ((W) - 1)) - 1)))
+
+#define SIMM_IN_RANGE(V, W)                            \
+  ((V) >= ((HOST_WIDE_INT) -1 << ((W) - 1))      \
+   && (V) <= (((HOST_WIDE_INT) 1 << ((W) - 1)) - 1))
+
+#define IMM_IN_RANGE(V, W, S)  \
+  ((S) ? SIMM_IN_RANGE (V, W) : UIMM_IN_RANGE (V, W))
+
+#define IMM_IS_POW_OF_2(V, E1, E2)                 \
+  ((V) >= ((unsigned HOST_WIDE_INT) 1 << (E1))     \
+   && (V) <= ((unsigned HOST_WIDE_INT) 1 << (E2))  \
+   && ((V) & ((V) - 1)) == 0)
+
+enum score_symbol_type
+{
+  SYMBOL_GENERAL,
+  SYMBOL_SMALL_DATA  /* The symbol refers to something in a small data section  */
+};
+
+#endif
diff --git a/gcc/config/score/score-generic.md b/gcc/config/score/score-generic.md
new file mode 100644
index 000000000..4ddc05b1b
--- /dev/null
+++ b/gcc/config/score/score-generic.md
@@ -0,0 +1,45 @@
+;;  Machine description for Sunplus S+CORE
+;;  Sunplus S+CORE Pipeline Description
+;;  Copyright (C) 2005, 2007, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by Sunnorth.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "score")
+
+(define_cpu_unit "core" "score")
+
+(define_insn_reservation "memory" 3
+                         (eq_attr "type" "load")
+                         "core")
+
+(define_insn_reservation "mul" 3
+                         (eq_attr "type" "mul,div")
+                         "core")
+
+(define_insn_reservation "fce" 1
+                         (eq_attr "type" "fce")
+                         "core")
+
+(define_insn_reservation "tsr" 1
+                         (eq_attr "type" "tsr,fsr")
+                         "core")
+
+(define_insn_reservation "up_c" 1
+                         (eq_attr "up_c" "yes")
+                         "core")
diff --git a/gcc/config/score/score-modes.def b/gcc/config/score/score-modes.def
new file mode 100644
index 000000000..01031881a
--- /dev/null
+++ b/gcc/config/score/score-modes.def
@@ -0,0 +1,24 @@
+/* score-modes.def for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* CC_NZmode should be used if the N (sign) and Z (zero) flag is set correctly.
+   CC_Nmode should be used if only the N flag is set correctly.  */
+
+CC_MODE (CC_N);
+CC_MODE (CC_NZ);
diff --git a/gcc/config/score/score-protos.h b/gcc/config/score/score-protos.h
new file mode 100644
index 000000000..385532940
--- /dev/null
+++ b/gcc/config/score/score-protos.h
@@ -0,0 +1,86 @@
+/* score-protos.h for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SCORE_PROTOS_H
+#define GCC_SCORE_PROTOS_H
+
+/* Machine Print.  */
+enum score_mem_unit {SCORE_BYTE = 0, SCORE_HWORD = 1, SCORE_WORD = 2};
+
+#define SCORE_ALIGN_UNIT(V, UNIT)   !(V & ((1 << UNIT) - 1))
+
+extern void score_prologue (void);
+extern void score_epilogue (int sibcall_p);
+extern void score_call (rtx *ops, bool sib);
+extern void score_call_value (rtx *ops, bool sib);
+extern void score_movdi (rtx *ops);
+extern void score_zero_extract_andi (rtx *ops);
+extern const char * score_linsn (rtx *ops, enum score_mem_unit unit, bool sign);
+extern const char * score_sinsn (rtx *ops, enum score_mem_unit unit);
+extern const char * score_limm (rtx *ops);
+extern const char * score_move (rtx *ops);
+extern bool score_unaligned_load (rtx* ops);
+extern bool score_unaligned_store (rtx* ops);
+extern bool score_block_move (rtx* ops);
+extern int score_address_cost (rtx addr, bool speed);
+extern int score_address_p (enum machine_mode mode, rtx x, int strict);
+extern int score_reg_class (int regno);
+extern int score_register_move_cost (enum machine_mode mode, enum reg_class to,
+                                     enum reg_class from);
+extern int score_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern int score_const_ok_for_letter_p (HOST_WIDE_INT value, char c);
+extern int score_extra_constraint (rtx op, char c);
+extern rtx score_return_addr (int count, rtx frame);
+extern int score_regno_mode_ok_for_base_p (int regno, int strict);
+extern void score_init_cumulative_args (CUMULATIVE_ARGS *cum,
+                                        tree fntype, rtx libname);
+extern void score_declare_object (FILE *stream, const char *name,
+                                  const char *directive, const char *fmt, ...);
+extern int score_output_external (FILE *file, tree decl, const char *name);
+extern enum reg_class score_secondary_reload_class (enum reg_class rclass,
+                                                    enum machine_mode mode,
+                                                    rtx x);
+extern rtx score_function_value (const_tree valtype, const_tree func,
+                                 enum machine_mode mode);
+extern enum reg_class score_preferred_reload_class (rtx x,
+                                                    enum reg_class rclass);
+extern HOST_WIDE_INT score_initial_elimination_offset (int from, int to);
+extern void score_print_operand (FILE *file, rtx op, int letter);
+extern void score_print_operand_address (FILE *file, rtx addr);
+extern int score_arg_partial_bytes (CUMULATIVE_ARGS *cum,
+                                    enum machine_mode mode,
+                                    tree type, bool named);
+extern int score_symbolic_constant_p (rtx x,
+                                      enum score_symbol_type *symbol_type);
+extern void score_movsicc (rtx *ops);
+extern const char * score_select_add_imm (rtx *ops, bool set_cc);
+extern const char * score_select (rtx *ops, const char *inst_pre, bool commu,
+                                  const char *letter, bool set_cc);
+extern const char * score_output_casesi (rtx *operands);
+extern const char * score_rpush (rtx *ops);
+extern const char * score_rpop (rtx *ops);
+extern bool score_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
+
+#ifdef RTX_CODE
+extern enum machine_mode score_select_cc_mode (enum rtx_code op, rtx x, rtx y);
+#endif
+
+extern struct extern_list *extern_head;
+
+#endif /* GCC_SCORE_PROTOS_H  */
diff --git a/gcc/config/score/score.c b/gcc/config/score/score.c
new file mode 100644
index 000000000..99695bfc5
--- /dev/null
+++ b/gcc/config/score/score.c
@@ -0,0 +1,736 @@
+/* Output routines for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Sunnorth.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "output.h"
+#include "tree.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "gstab.h"
+#include "hashtab.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "integrate.h"
+#include "langhooks.h"
+#include "score7.h"
+#include "df.h"
+
+static void score_option_override (void);
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options score_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#undef  TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START           score_asm_file_start
+
+#undef  TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END             score_asm_file_end
+
+#undef  TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE    score_function_prologue
+
+#undef  TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE    score_function_epilogue
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS     TARGET_DEFAULT
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION            score_handle_option
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE          score_option_override
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE score_option_optimization_table
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS	score_legitimize_address
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE         score_issue_rate
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION   score_select_rtx_section
+
+#undef  TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P          score_in_small_data_p
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL  score_function_ok_for_sibcall
+
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING   hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK      score_output_mi_thunk
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE    default_promote_function_mode_always_promote
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES       hook_bool_const_tree_true
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK       must_pass_in_stack_var_size
+
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES        score_arg_partial_bytes
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG             score_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE     score_function_arg_advance
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE        score_pass_by_reference
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY         score_return_in_memory
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS                score_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST             score_address_cost
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	score_legitimate_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE            score_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE score_conditional_register_usage
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	score_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		score_trampoline_init
+
+struct extern_list *extern_head = 0;
+
+/* default 0 = NO_REGS  */
+enum reg_class score_char_to_class[256];
+
+/* Implement TARGET_RETURN_IN_MEMORY.  In S+core,
+   small structures are returned in a register.
+   Objects with varying size must still be returned in memory.  */
+static bool
+score_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_return_in_memory (type, fndecl);
+  else
+    gcc_unreachable ();
+}
+
+/* Return nonzero when an argument must be passed by reference.  */
+static bool
+score_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+                         enum machine_mode mode, const_tree type,
+                         bool named ATTRIBUTE_UNUSED)
+{
+  /* If we have a variable-sized parameter, we have no choice.  */
+  return targetm.calls.must_pass_in_stack (mode, type);
+}
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+   in order to avoid duplicating too much logic from elsewhere.  */
+static void
+score_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+                       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+                       tree function)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+static bool
+score_function_ok_for_sibcall (ATTRIBUTE_UNUSED tree decl,
+                               ATTRIBUTE_UNUSED tree exp)
+{
+  return true;
+}
+
+/* Set up the stack and frame (if desired) for the function.  */
+static void
+score_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_function_prologue (file, size);
+  else
+    gcc_unreachable ();
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+static void
+score_function_epilogue (FILE *file,
+                         HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_function_epilogue (file, size);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  */
+static int
+score_issue_rate (void)
+{
+  return 1;
+}
+
+/* Choose the section to use for the constant rtx expression X that has
+   mode MODE.  */
+static section *
+score_select_rtx_section (enum machine_mode mode, rtx x,
+                          unsigned HOST_WIDE_INT align)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_select_rtx_section (mode, x, align);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  */
+static bool
+score_in_small_data_p (const_tree decl)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_in_small_data_p (decl);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_ASM_FILE_START.  */
+static void
+score_asm_file_start (void)
+{
+  if (TARGET_SCORE7D)
+    fprintf (asm_out_file, "# Sunplus S+core7d %s rev=%s\n",
+             TARGET_LITTLE_ENDIAN ? "el" : "eb", SCORE_GCC_VERSION);
+  else if (TARGET_SCORE7)
+    fprintf (asm_out_file, "# Sunplus S+core7 %s rev=%s\n",
+             TARGET_LITTLE_ENDIAN ? "el" : "eb", SCORE_GCC_VERSION);
+  else
+    fprintf (asm_out_file, "# Sunplus S+core unknown %s rev=%s\n",
+             TARGET_LITTLE_ENDIAN ? "el" : "eb", SCORE_GCC_VERSION);
+
+  default_file_start ();
+
+  if (flag_pic)
+    fprintf (asm_out_file, "\t.set pic\n");
+}
+
+/* Implement TARGET_ASM_FILE_END.  When using assembler macros, emit
+   .externs for any small-data variables that turned out to be external.  */
+static void
+score_asm_file_end (void)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_asm_file_end ();
+  else
+    gcc_unreachable ();
+}
+
+#define MASK_ALL_CPU_BITS	(MASK_SCORE7 | MASK_SCORE7D)
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+score_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mscore7d:
+      target_flags &= ~(MASK_ALL_CPU_BITS);
+      target_flags |= MASK_SCORE7 | MASK_SCORE7D;
+      return true;
+
+    case OPT_march_:
+      if (strcmp (arg, "score7") == 0)
+        {
+          target_flags &= ~(MASK_ALL_CPU_BITS);
+          target_flags |= MASK_SCORE7;
+          return true;
+        }
+      else if (strcmp (arg, "score7d") == 0)
+        {
+          target_flags &= ~(MASK_ALL_CPU_BITS);
+          target_flags |= MASK_SCORE7 | MASK_SCORE7D;
+          return true;
+        }
+      else
+        return false;
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE hook.  */
+static void
+score_option_override (void)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_option_override ();
+}
+
+/* Implement REGNO_REG_CLASS macro.  */
+int
+score_reg_class (int regno)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_reg_class (regno);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement PREFERRED_RELOAD_CLASS macro.  */
+enum reg_class
+score_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, enum reg_class rclass)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_preferred_reload_class (x, rclass);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement SECONDARY_INPUT_RELOAD_CLASS
+   and SECONDARY_OUTPUT_RELOAD_CLASS macro.  */
+enum reg_class
+score_secondary_reload_class (enum reg_class rclass,
+                              enum machine_mode mode ATTRIBUTE_UNUSED,
+                              rtx x)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_secondary_reload_class (rclass, mode, x);
+  else
+    gcc_unreachable ();
+}
+
+
+/* Return truth value on whether or not a given hard register
+   can support a given mode.  */
+int
+score_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_hard_regno_mode_ok (regno, mode);
+  else
+    gcc_unreachable ();
+}
+
+/* We can always eliminate to the hard frame pointer.  We can eliminate
+   to the stack pointer unless a frame pointer is needed.  */
+
+static bool
+score_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == HARD_FRAME_POINTER_REGNUM
+          || (to  == STACK_POINTER_REGNUM && !frame_pointer_needed));
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer or argument pointer.  TO is either the stack pointer or
+   hard frame pointer.  */
+HOST_WIDE_INT
+score_initial_elimination_offset (int from,
+                                  int to ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_initial_elimination_offset (from, to);
+  else
+    gcc_unreachable ();
+}
+
+/* Argument support functions.  */
+
+/* Initialize CUMULATIVE_ARGS for a function.  */
+void
+score_init_cumulative_args (CUMULATIVE_ARGS *cum,
+                            tree fntype ATTRIBUTE_UNUSED,
+                            rtx libname ATTRIBUTE_UNUSED)
+{
+  memset (cum, 0, sizeof (CUMULATIVE_ARGS));
+}
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE hook.  */
+static void
+score_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                            const_tree type, bool named)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_function_arg_advance (cum, mode, type, named);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES macro.  */
+int
+score_arg_partial_bytes (CUMULATIVE_ARGS *cum,
+                         enum machine_mode mode, tree type, bool named)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_arg_partial_bytes (cum, mode, type, named);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_FUNCTION_ARG hook.  */
+static rtx
+score_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                    const_tree type, bool named)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_function_arg (cum, mode, type, named);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
+   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
+   VALTYPE is null and MODE is the mode of the return value.  */
+rtx
+score_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+                      enum machine_mode mode)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_function_value (valtype, func, mode);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+static void
+score_asm_trampoline_template (FILE *f)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_asm_trampoline_template (f);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+static void
+score_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  if ( TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_trampoline_init (m_tramp, fndecl, chain_value);
+  else  
+    gcc_unreachable ();
+}
+
+/* This function is used to implement REG_MODE_OK_FOR_BASE_P macro.  */
+int
+score_regno_mode_ok_for_base_p (int regno, int strict)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_regno_mode_ok_for_base_p (regno, strict);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_LEGITIMIZE_ADDRESS_P.  */
+static bool
+score_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_legitimate_address_p (mode, x, strict);
+  else
+    gcc_unreachable ();
+}
+
+/* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
+   be legitimized in a way that the generic machinery might not expect,
+   return the new address, else return X.  */
+static rtx
+score_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_legitimize_address (x);
+  else
+    gcc_unreachable ();
+}
+
+/* Return a number assessing the cost of moving a register in class
+   FROM to class TO. */
+int
+score_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                          enum reg_class from, enum reg_class to)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_register_move_cost (mode, from, to);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_RTX_COSTS macro.  */
+bool
+score_rtx_costs (rtx x, int code, int outer_code, int *total,
+		 bool speed ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_rtx_costs (x, code, outer_code, total, speed);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_ADDRESS_COST macro.  */
+int
+score_address_cost (rtx addr,
+		    bool speed ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_address_cost (addr);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL macro.  */
+int
+score_output_external (FILE *file ATTRIBUTE_UNUSED,
+                       tree decl, const char *name)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_output_external (file, decl, name);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement RETURN_ADDR_RTX.  Note, we do not support moving
+   back to a previous frame.  */
+rtx
+score_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_return_addr (count, frame);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement PRINT_OPERAND macro.  */
+void
+score_print_operand (FILE *file, rtx op, int c)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_print_operand (file, op, c);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement PRINT_OPERAND_ADDRESS macro.  */
+void
+score_print_operand_address (FILE *file, rtx x)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_print_operand_address (file, x);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement SELECT_CC_MODE macro.  */
+enum machine_mode
+score_select_cc_mode (enum rtx_code op, rtx x, rtx y)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_select_cc_mode (op, x, y);
+  else
+    gcc_unreachable ();
+}
+
+/* Return true if X is a symbolic constant that can be calculated in
+   the same way as a bare symbol.  If it is, store the type of the
+   symbol in *SYMBOL_TYPE.  */
+int
+score_symbolic_constant_p (rtx x, enum score_symbol_type *symbol_type)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_symbolic_constant_p (x, symbol_type);
+  else
+    gcc_unreachable ();
+}
+
+/* Generate the prologue instructions for entry into a S+core function.  */
+void
+score_prologue (void)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_prologue ();
+  else
+    gcc_unreachable ();
+}
+
+/* Generate the epilogue instructions in a S+core function.  */
+void
+score_epilogue (int sibcall_p)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_epilogue (sibcall_p);
+  else
+    gcc_unreachable ();
+}
+
+/* Call and sibcall pattern all need call this function.  */
+void
+score_call (rtx *ops, bool sib)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_call (ops, sib);
+  else
+    gcc_unreachable ();
+}
+
+/* Call value and sibcall value pattern all need call this function.  */
+void
+score_call_value (rtx *ops, bool sib)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_call_value (ops, sib);
+  else
+    gcc_unreachable ();
+}
+
+void
+score_movsicc (rtx *ops)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_movsicc (ops);
+  else
+    gcc_unreachable ();
+}
+
+/* Machine Split  */
+void
+score_movdi (rtx *ops)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_movdi (ops);
+  else
+    gcc_unreachable ();
+}
+
+void
+score_zero_extract_andi (rtx *ops)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    score7_zero_extract_andi (ops);
+  else
+    gcc_unreachable ();
+}
+
+/* Output asm insn for move.  */
+const char *
+score_move (rtx *ops)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_move (ops);
+  else
+    gcc_unreachable ();
+}
+
+/* Output asm insn for load.  */
+const char *
+score_linsn (rtx *ops, enum score_mem_unit unit, bool sign)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_linsn (ops, unit, sign);
+  else
+    gcc_unreachable ();
+}
+
+/* Output asm insn for store.  */
+const char *
+score_sinsn (rtx *ops, enum score_mem_unit unit)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_sinsn (ops, unit);
+  else
+    gcc_unreachable ();
+}
+
+/* Output asm insn for load immediate.  */
+const char *
+score_limm (rtx *ops)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_limm (ops);
+  else
+    gcc_unreachable ();
+}
+
+
+/* Generate add insn.  */
+const char *
+score_select_add_imm (rtx *ops, bool set_cc)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_select_add_imm (ops, set_cc);
+  else
+    gcc_unreachable ();
+}
+
+/* Output arith insn.  */
+const char *
+score_select (rtx *ops, const char *inst_pre,
+            bool commu, const char *letter, bool set_cc)
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    return score7_select (ops, inst_pre, commu, letter, set_cc);
+  else
+    gcc_unreachable ();
+}
+
+static void
+score_conditional_register_usage (void)
+{
+   if (!flag_pic)
+     fixed_regs[PIC_OFFSET_TABLE_REGNUM] =
+     call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 0;
+}
+
+struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/score/score.h b/gcc/config/score/score.h
new file mode 100644
index 000000000..4554e2673
--- /dev/null
+++ b/gcc/config/score/score.h
@@ -0,0 +1,898 @@
+/* score.h for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Sunnorth.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "score-conv.h"
+
+#undef CC1_SPEC
+#define CC1_SPEC                 "%{!mel:-meb} %{mel:-mel } \
+%{!mscore*:-mscore7}    \
+%{mscore7:-mscore7}     \
+%{mscore7d:-mscore7d}   \
+%{G*}"
+
+#undef ASM_SPEC
+#define ASM_SPEC                 "%{!mel:-EB} %{mel:-EL} \
+%{!mscore*:-march=score7}         \
+%{mscore7:-march=score7}          \
+%{mscore7d:-march=score7}         \
+%{march=score7:-march=score7}     \
+%{march=score7d:-march=score7}    \
+%{G*}"
+
+#undef LINK_SPEC
+#define LINK_SPEC                "%{!mel:-EB} %{mel:-EL} \
+%{!mscore*:-mscore7_elf}          \
+%{mscore7:-mscore7_elf}           \
+%{mscore7d:-mscore7_elf}          \
+%{march=score7:-mscore7_elf}      \
+%{march=score7d:-mscore7_elf}     \
+%{G*}"
+
+/* Run-time Target Specification.  */
+#define TARGET_CPU_CPP_BUILTINS()               \
+  do {                                          \
+    builtin_define ("SUNPLUS");                 \
+    builtin_define ("__SCORE__");               \
+    builtin_define ("__score__");               \
+    if (TARGET_LITTLE_ENDIAN)                   \
+      builtin_define ("__scorele__");           \
+    else                                        \
+      builtin_define ("__scorebe__");           \
+    if (TARGET_SCORE7)                          \
+      builtin_define ("__score7__");            \
+    if (TARGET_SCORE7D)                         \
+      builtin_define ("__score7d__");           \
+  } while (0)
+
+#define TARGET_DEFAULT         0
+
+#define SCORE_GCC_VERSION      "1.6"
+
+#define TARGET_VERSION \
+      fprintf (stderr, "Sunplus S+core rev=%s", SCORE_GCC_VERSION);
+
+/* Target machine storage layout.  */
+#define BITS_BIG_ENDIAN        0
+#define BYTES_BIG_ENDIAN       (TARGET_LITTLE_ENDIAN == 0)
+#define WORDS_BIG_ENDIAN       (TARGET_LITTLE_ENDIAN == 0)
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD                 4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)     \
+  if (GET_MODE_CLASS (MODE) == MODE_INT         \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    (MODE) = SImode;
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY                  BITS_PER_WORD
+#define STACK_BOUNDARY                 BITS_PER_WORD
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY              BITS_PER_WORD
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT              LONG_DOUBLE_TYPE_SIZE
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)                                      \
+  ((((ALIGN) < BITS_PER_WORD)                                            \
+    && (TREE_CODE (TYPE) == ARRAY_TYPE                                   \
+        || TREE_CODE (TYPE) == UNION_TYPE                                \
+        || TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment given to a
+   constant that is being placed in memory.  EXP is the constant
+   and ALIGN is the alignment that the object would ordinarily have.
+   The value of this macro is used instead of that alignment to align
+   the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that `strcpy' calls that copy
+   constants can be done inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)                                  \
+  ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)   \
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)                                    \
+  ((TREE_CODE (TYPE) == ARRAY_TYPE                                      \
+    && TYPE_MODE (TREE_TYPE (TYPE)) == QImode                           \
+    && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN))
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY           32
+
+/* All accesses must be aligned.  */
+#define STRICT_ALIGNMENT               1
+
+/* Score requires that structure alignment is affected by bitfields.  */
+#define PCC_BITFIELD_TYPE_MATTERS      1
+
+/* long double is not a fixed mode, but the idea is that, if we
+   support long double, we also want a 128-bit integer type.  */
+#define MAX_FIXED_MODE_SIZE            LONG_DOUBLE_TYPE_SIZE
+
+/* Layout of Data Type.  */
+/* Set the sizes of the core types.  */
+#define INT_TYPE_SIZE                   32
+#define SHORT_TYPE_SIZE                 16
+#define LONG_TYPE_SIZE                  32
+#define LONG_LONG_TYPE_SIZE             64
+#define CHAR_TYPE_SIZE                  8
+#define FLOAT_TYPE_SIZE                 32
+#define DOUBLE_TYPE_SIZE                64
+#define LONG_DOUBLE_TYPE_SIZE           64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#undef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR             1
+
+/* Default definitions for size_t and ptrdiff_t.  */
+#define SIZE_TYPE                       "unsigned int"
+
+#define UINTPTR_TYPE			"long unsigned int"
+
+/* Register Usage
+
+   S+core have:
+   - 32 integer registers
+   - 16 control registers (cond)
+   - 16 special registers (ceh/cel/cnt/lcr/scr/arg/fp)
+   - 32 coprocessors 1 registers
+   - 32 coprocessors 2 registers
+   - 32 coprocessors 3 registers.  */
+#define FIRST_PSEUDO_REGISTER           160
+
+/* By default, fix the kernel registers (r30 and r31), the global
+   pointer (r28) and the stack pointer (r0).  This can change
+   depending on the command-line options.
+
+   Regarding coprocessor registers: without evidence to the contrary,
+   it's best to assume that each coprocessor register has a unique
+   use.  This can be overridden, in, e.g., TARGET_OPTION_OVERRIDE or
+   TARGET_CONDITIONAL_REGISTER_USAGE should the assumption be inappropriate
+   for a particular target.  */
+
+/* Control Registers, use mfcr/mtcr insn
+    32        cr0         PSR
+    33        cr1         Condition
+    34        cr2         ECR
+    35        cr3         EXCPVec
+    36        cr4         CCR
+    37        cr5         EPC
+    38        cr6         EMA
+    39        cr7         TLBLock
+    40        cr8         TLBPT
+    41        cr8         PEADDR
+    42        cr10        TLBRPT
+    43        cr11        PEVN
+    44        cr12        PECTX
+    45        cr13
+    46        cr14
+    47        cr15
+
+    Custom Engine Register, use mfce/mtce
+    48        CEH        CEH
+    49        CEL        CEL
+
+    Special-Purpose Register, use mfsr/mtsr
+    50        sr0        CNT
+    51        sr1        LCR
+    52        sr2        SCR
+
+    53        ARG_POINTER_REGNUM
+    54        FRAME_POINTER_REGNUM
+    but Control register have 32 registers, cr16-cr31.  */
+#define FIXED_REGISTERS                                  \
+{                                                        \
+  /* General Purpose Registers  */                       \
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,        \
+  /* Control Registers  */                               \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CEH/ CEL/ CNT/ LCR/ SCR / ARG_POINTER_REGNUM/ FRAME_POINTER_REGNUM */\
+  0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 1 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 2 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 3 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+}
+
+#define CALL_USED_REGISTERS                              \
+{                                                        \
+  /* General purpose register  */                        \
+  1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,        \
+  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* Control Registers  */                               \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 1 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 2 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  /* CP 3 Registers  */                                  \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,        \
+}
+
+#define REG_ALLOC_ORDER                                                   \
+{   0,  1,  6,  7,  8,  9, 10, 11,  4,  5, 22, 23, 24, 25, 26, 27,        \
+   12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31,  2,  3,        \
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,        \
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,        \
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,        \
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,        \
+   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,        \
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,        \
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,        \
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159        }
+
+/* Macro to conditionally modify fixed_regs/call_used_regs.  */
+#define PIC_OFFSET_TABLE_REGNUM          29
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Return true if REGNO is suitable for holding a quantity of type MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) score_hard_regno_mode_ok (REGNO, MODE)
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)                             \
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT                          \
+    || GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)              \
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT                       \
+       || GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+/* Register Classes.  */
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.  */
+enum reg_class
+{
+  NO_REGS,
+  G16_REGS,    /* r0 ~ r15 */
+  G32_REGS,    /* r0 ~ r31 */
+  T32_REGS,    /* r8 ~ r11 | r22 ~ r27 */
+
+  HI_REG,      /* hi                 */
+  LO_REG,      /* lo                 */
+  CE_REGS,     /* hi + lo            */
+
+  CN_REG,      /* cnt                */
+  LC_REG,      /* lcb                */
+  SC_REG,      /* scb                */
+  SP_REGS,     /* cnt + lcb + scb    */
+
+  CR_REGS,     /* cr0 - cr15         */
+
+  CP1_REGS,    /* cp1                */
+  CP2_REGS,    /* cp2                */
+  CP3_REGS,    /* cp3                */
+  CPA_REGS,    /* cp1 + cp2 + cp3    */
+
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES                  ((int) LIM_REG_CLASSES)
+
+#define GENERAL_REGS                   G32_REGS
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES           \
+{                                 \
+  "NO_REGS",                      \
+  "G16_REGS",                     \
+  "G32_REGS",                     \
+  "T32_REGS",                     \
+                                  \
+  "HI_REG",                       \
+  "LO_REG",                       \
+  "CE_REGS",                      \
+                                  \
+  "CN_REG",                       \
+  "LC_REG",                       \
+  "SC_REG",                       \
+  "SP_REGS",                      \
+                                  \
+  "CR_REGS",                      \
+                                  \
+  "CP1_REGS",                     \
+  "CP2_REGS",                     \
+  "CP3_REGS",                     \
+  "CPA_REGS",                     \
+                                  \
+  "ALL_REGS",                     \
+}
+
+/* Define which registers fit in which classes.  */
+#define REG_CLASS_CONTENTS                                        \
+{                                                                 \
+  /* NO_REGS/G16/G32/T32  */                                      \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x0fc00f00, 0x00000000, 0x00000000, 0x00000000, 0x00000000},  \
+  /* HI/LO/CE  */                                                 \
+  { 0x00000000, 0x00010000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00020000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00030000, 0x00000000, 0x00000000, 0x00000000},  \
+  /* CN/LC/SC/SP/CR  */                                           \
+  { 0x00000000, 0x00040000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00080000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00100000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x001c0000, 0x00000000, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000},  \
+  /* CP1/CP2/CP3/CPA  */                                          \
+  { 0x00000000, 0x00000000, 0xffffffff, 0x00000000, 0x00000000},  \
+  { 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0x00000000},  \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff},  \
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff},  \
+  /* ALL_REGS  */                                                 \
+  { 0xffffffff, 0x001fffff, 0xffffffff, 0xffffffff, 0xffffffff},  \
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+#define REGNO_REG_CLASS(REGNO) (enum reg_class) score_reg_class (REGNO)
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+#define IRA_COVER_CLASSES					\
+{								\
+  G32_REGS, CE_REGS, SP_REGS, LIM_REG_CLASSES			\
+}
+
+/* A macro whose definition is the name of the class to which a
+   valid base register must belong.  A base register is one used in
+   an address which is the register value plus a displacement.  */
+#define BASE_REG_CLASS                 G16_REGS
+
+/* The class value for index registers.  */
+#define INDEX_REG_CLASS                NO_REGS
+
+extern enum reg_class score_char_to_class[256];
+#define REG_CLASS_FROM_LETTER(C)       score_char_to_class[(unsigned char) (C)]
+
+/* Addressing modes, and classification of registers for them.  */
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
+  score_regno_mode_ok_for_base_p (REGNO, 1)
+
+#define REGNO_OK_FOR_INDEX_P(NUM)       0
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+  score_preferred_reload_class (X, CLASS)
+
+/* If we need to load shorts byte-at-a-time, then we need a scratch.  */
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  score_secondary_reload_class (CLASS, MODE, X)
+
+/* Return the register class of a scratch register needed to copy IN into
+   or out of a register in CLASS in MODE.  If it can be done directly,
+   NO_REGS is returned.  */
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \
+  score_secondary_reload_class (CLASS, MODE, X)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)    \
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)        \
+   ? reg_classes_intersect_p (HI_REG, (CLASS)) : 0)
+
+
+/* Basic Stack Layout.  */
+/* Stack layout; function entry, exit and calling.  */
+#define STACK_GROWS_DOWNWARD
+
+#define STACK_PUSH_CODE                 PRE_DEC
+#define STACK_POP_CODE                  POST_INC
+
+/* The offset of the first local variable from the beginning of the frame.
+   See compute_frame_size for details about the frame layout.  */
+#define STARTING_FRAME_OFFSET           crtl->outgoing_args_size
+
+/* The argument pointer always points to the first argument.  */
+#define FIRST_PARM_OFFSET(FUNDECL)      0
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.  */
+#define RETURN_ADDR_RTX(count, frame)   score_return_addr (count, frame)
+
+/* Pick up the return address upon entry to a procedure.  */
+#define INCOMING_RETURN_ADDR_RTX        gen_rtx_REG (VOIDmode, RA_REGNUM)
+
+/* Exception handling Support.  */
+/* Use r0 to r3 to pass exception handling information.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < 4 ? (N) + ARG_REG_FIRST : INVALID_REGNUM)
+
+/* The register that holds the return address in exception handlers.  */
+#define EH_RETURN_STACKADJ_RTX          gen_rtx_REG (Pmode, EH_REGNUM)
+#define EH_RETURN_HANDLER_RTX  		gen_rtx_REG (SImode, 30)
+
+/* Registers That Address the Stack Frame.  */
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM            SP_REGNUM
+
+/* These two registers don't really exist: they get eliminated to either
+   the stack or hard frame pointer.  */
+#define FRAME_POINTER_REGNUM            53
+
+/*  we use r2 as the frame pointer.  */
+#define HARD_FRAME_POINTER_REGNUM       FP_REGNUM
+
+#define ARG_POINTER_REGNUM              54
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM             23
+
+/* Elimination Frame Pointer and Arg Pointer  */
+
+#define ELIMINABLE_REGS                                \
+  {{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},        \
+   { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},   \
+   { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},      \
+   { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = score_initial_elimination_offset ((FROM), (TO))
+
+/* Passing Function Arguments on the Stack.  */
+/* Allocate stack space for arguments at the beginning of each function.  */
+#define ACCUMULATE_OUTGOING_ARGS        1
+
+/* reserve stack space for all argument registers.  */
+#define REG_PARM_STACK_SPACE(FNDECL)    UNITS_PER_WORD
+
+/* Define this if it is the responsibility of the caller to
+   allocate the area reserved for arguments passed in registers.
+   If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect
+   of this macro is to determine whether the space is included in
+   `crtl->outgoing_args_size'.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+/* Passing Arguments in Registers  */
+/* A C type for declaring a variable that is used as the first argument of
+   `FUNCTION_ARG' and other related values.  For some target machines, the
+   type `int' suffices and can hold the number of bytes of argument so far.  */
+typedef struct score_args
+{
+  unsigned int arg_number;             /* how many arguments have been seen  */
+  unsigned int num_gprs;               /* number of gprs in use  */
+  unsigned int stack_words;            /* number of words in stack  */
+} score_args_t;
+
+#define CUMULATIVE_ARGS                score_args_t
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, n_named_args) \
+  score_init_cumulative_args (&CUM, FNTYPE, LIBNAME)
+
+/* 1 if N is a possible register number for function argument passing.
+   We have no FP argument registers when soft-float.  When FP registers
+   are 32 bits, we can't directly reference the odd numbered ones.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  REG_CONTAIN (REGNO, ARG_REG_FIRST, ARG_REG_NUM)
+
+/* How Scalar Function Values Are Returned.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+  score_function_value ((VALTYPE), (FUNC), VOIDmode)
+
+#define LIBCALL_VALUE(MODE)  score_function_value (NULL_TREE, NULL, (MODE))
+
+/* 1 if N is a possible register number for a function value.  */
+#define FUNCTION_VALUE_REGNO_P(REGNO)   ((REGNO) == (ARG_REG_FIRST))
+
+#define PIC_FUNCTION_ADDR_REGNUM        (GP_REG_FIRST + 25)
+
+/* How Large Values Are Returned.  */
+#define STRUCT_VALUE                    0
+
+/* Function Entry and Exit  */
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK               1
+
+/* Generating Code for Profiling  */
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)                              \
+  do {                                                                \
+    if (TARGET_SCORE7)                                                \
+      {                                                               \
+        fprintf (FILE, " .set r1  \n");                               \
+        fprintf (FILE, " mv   r%d,r%d \n", AT_REGNUM, RA_REGNUM);     \
+        fprintf (FILE, " subi r%d, %d \n", STACK_POINTER_REGNUM, 8);  \
+        fprintf (FILE, " jl   _mcount \n");                           \
+        fprintf (FILE, " .set nor1 \n");                              \
+      }                                                               \
+  } while (0)
+
+/* Trampolines for Nested Functions.  */
+#define TRAMPOLINE_INSNS                6
+
+/* A C expression for the size in bytes of the trampoline, as an integer.  */
+#define TRAMPOLINE_SIZE                (24 + GET_MODE_SIZE (ptr_mode) * 2)
+
+#define HAVE_PRE_INCREMENT              1
+#define HAVE_PRE_DECREMENT              1
+#define HAVE_POST_INCREMENT             1
+#define HAVE_POST_DECREMENT             1
+#define HAVE_PRE_MODIFY_DISP            1
+#define HAVE_POST_MODIFY_DISP           1
+#define HAVE_PRE_MODIFY_REG             0
+#define HAVE_POST_MODIFY_REG            0
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS            1
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects them all.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Some source files that are used after register allocation
+   need to be strict.  */
+#ifndef REG_OK_STRICT
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  score_regno_mode_ok_for_base_p (REGNO (X), 0)
+#else
+#define REG_MODE_OK_FOR_BASE_P(X, MODE) \
+  score_regno_mode_ok_for_base_p (REGNO (X), 1)
+#endif
+
+#define REG_OK_FOR_INDEX_P(X) 0
+
+#define LEGITIMATE_CONSTANT_P(X)        1
+
+/* Condition Code Status.  */
+#define SELECT_CC_MODE(OP, X, Y)        score_select_cc_mode (OP, X, Y)
+
+/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+   floating point inequality comparison.  */
+#define REVERSIBLE_CC_MODE(MODE)        1
+
+/* Describing Relative Costs of Operations  */
+/* Compute extra cost of moving data between one register class and another.  */
+#define REGISTER_MOVE_COST(MODE, FROM, TO) \
+  score_register_move_cost (MODE, FROM, TO)
+
+/* Moves to and from memory are quite expensive */
+#define MEMORY_MOVE_COST(MODE, CLASS, TO_P) \
+  (4 + memory_move_secondary_cost ((MODE), (CLASS), (TO_P)))
+
+/* Try to generate sequences that don't involve branches.  */
+#define BRANCH_COST(speed_p, predictable_p) 2
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS                1
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE                 1
+
+/* Dividing the Output into Sections (Texts, Data, ...).  */
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP             "\t.text"
+#define DATA_SECTION_ASM_OP             "\t.data"
+#define SDATA_SECTION_ASM_OP            "\t.sdata"
+
+#undef  READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP    "\t.rdata"
+
+/* The Overall Framework of an Assembler File  */
+/* How to start an assembler comment.
+   The leading space is important.  */
+#define ASM_COMMENT_START               "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON                     "#APP\n\t.set volatile\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF                     "#NO_APP\n\t.set optimize\n"
+
+/* Output of Uninitialized Variables.  */
+/* This says how to define a global common symbol.  */
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGN)     \
+  do {                                                                      \
+    fputs ("\n\t.comm\t", STREAM);                                          \
+    assemble_name (STREAM, NAME);                                           \
+    fprintf (STREAM, " , " HOST_WIDE_INT_PRINT_UNSIGNED ", %u\n",           \
+             SIZE, ALIGN / BITS_PER_UNIT);                                  \
+  } while (0)
+
+/* This says how to define a local common symbol (i.e., not visible to
+   linker).  */
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN)                 \
+  do {                                                                      \
+    fputs ("\n\t.lcomm\t", STREAM);                                         \
+    assemble_name (STREAM, NAME);                                           \
+    fprintf (STREAM, " , " HOST_WIDE_INT_PRINT_UNSIGNED ", %u\n",           \
+             SIZE, ALIGN / BITS_PER_UNIT);                                  \
+  } while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP                   "\t.globl\t"
+
+/* Output and Generation of Labels  */
+/* This is how to declare a function name.  The actual work of
+   emitting the label is moved to function_prologue, so that we can
+   get the line number correctly emitted before the .ent directive,
+   and after any .file directives.  Define as empty so that the function
+   is not declared before the .ent directive elsewhere.  */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)   \
+  do {                                                \
+    assemble_name (STREAM, NAME);                     \
+    fprintf (STREAM, ":\n");                          \
+  } while (0)
+
+/* This says how to output an external.  It would be possible not to
+   output anything and let undefined symbol become external. However
+   the assembler uses length information on externals to allocate in
+   data/sdata bss/sbss, thereby saving exec time.  */
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(STREAM, DECL, NAME) \
+  score_output_external (STREAM, DECL, NAME)
+
+/* This handles the magic '..CURRENT_FUNCTION' symbol, which means
+   'the start of the function that this code is output in'.  */
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+  fprintf ((STREAM), "%s", (NAME))
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.  */
+#define LOCAL_LABEL_PREFIX              (TARGET_SCORE7 ? "." : "$")
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM))
+
+/* Output of Assembler Instructions.  */
+#define REGISTER_NAMES                                                    \
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",                         \
+  "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",                   \
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",                 \
+  "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",                 \
+                                                                          \
+  "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",                 \
+  "cr8", "cr9", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15",           \
+                                                                          \
+  "ceh", "cel", "sr0", "sr1", "sr2", "_arg", "_frame", "",                \
+  "cr24", "cr25", "cr26", "cr27", "cr28", "cr29", "cr30", "cr31",         \
+                                                                          \
+  "c1r0", "c1r1", "c1r2", "c1r3", "c1r4", "c1r5", "c1r6", "c1r7",         \
+  "c1r8", "c1r9", "c1r10", "c1r11", "c1r12", "c1r13", "c1r14", "c1r15",   \
+  "c1r16", "c1r17", "c1r18", "c1r19", "c1r20", "c1r21", "c1r22", "c1r23", \
+  "c1r24", "c1r25", "c1r26", "c1r27", "c1r28", "c1r29", "c1r30", "c1r31", \
+                                                                          \
+  "c2r0", "c2r1", "c2r2", "c2r3", "c2r4", "c2r5", "c2r6", "c2r7",         \
+  "c2r8", "c2r9", "c2r10", "c2r11", "c2r12", "c2r13", "c2r14", "c2r15",   \
+  "c2r16", "c2r17", "c2r18", "c2r19", "c2r20", "c2r21", "c2r22", "c2r23", \
+  "c2r24", "c2r25", "c2r26", "c2r27", "c2r28", "c2r29", "c2r30", "c2r31", \
+                                                                          \
+  "c3r0", "c3r1", "c3r2", "c3r3", "c3r4", "c3r5", "c3r6", "c3r7",         \
+  "c3r8", "c3r9", "c3r10", "c3r11", "c3r12", "c3r13", "c3r14", "c3r15",   \
+  "c3r16", "c3r17", "c3r18", "c3r19", "c3r20", "c3r21", "c3r22", "c3r23", \
+  "c3r24", "c3r25", "c3r26", "c3r27", "c3r28", "c3r29", "c3r30", "c3r31", \
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.  */
+#define PRINT_OPERAND(STREAM, X, CODE)  score_print_operand (STREAM, X, CODE)
+
+/* A C expression which evaluates to true if CODE is a valid
+   punctuation character for use in the `PRINT_OPERAND' macro.  */
+#define PRINT_OPERAND_PUNCT_VALID_P(C)  ((C) == '[' || (C) == ']')
+
+/* Print a memory address as an operand to reference that memory location.  */
+#define PRINT_OPERAND_ADDRESS(STREAM, X) \
+  score_print_operand_address (STREAM, X)
+
+/* By default on the S+core, external symbols do not have an underscore
+   prepended.  */
+#define USER_LABEL_PREFIX        ""
+
+/* This is how to output an insn to push a register on the stack.  */
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)           \
+  do {                                               \
+    if (TARGET_SCORE7)                               \
+        fprintf (STREAM, "\tpush! %s,[%s]\n",        \
+                 reg_names[REGNO],                   \
+                 reg_names[STACK_POINTER_REGNUM]);   \
+  } while (0)
+
+/* This is how to output an insn to pop a register from the stack.  */
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO)            \
+  do {                                               \
+    if (TARGET_SCORE7)                               \
+      fprintf (STREAM, "\tpop! %s,[%s]\n",           \
+               reg_names[REGNO],                     \
+               reg_names[STACK_POINTER_REGNUM]);     \
+  } while (0)
+
+/* Output of Dispatch Tables.  */
+/* This is how to output an element of a case-vector.  We can make the
+   entries PC-relative in GP-relative when .gp(d)word is supported.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)                \
+  do {                                                                    \
+    if (TARGET_SCORE7)                                                    \
+      if (flag_pic)                                                       \
+        fprintf (STREAM, "\t.gpword %sL%d\n", LOCAL_LABEL_PREFIX, VALUE); \
+      else                                                                \
+        fprintf (STREAM, "\t.word %sL%d\n", LOCAL_LABEL_PREFIX, VALUE);   \
+  } while (0)
+
+/* Jump table alignment is explicit in ASM_OUTPUT_CASE_LABEL.  */
+#define ADDR_VEC_ALIGN(JUMPTABLE) (GET_MODE (PATTERN (JUMPTABLE)) == SImode ? 2 \
+                                   : GET_MODE (PATTERN (JUMPTABLE)) == HImode ? 1 : 0)
+
+/* This is how to output a label which precedes a jumptable.  Since
+   Score3 instructions are 2 bytes, we may need explicit alignment here.  */
+#undef  ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)             \
+  do {                                                                  \
+      if ((TARGET_SCORE7) && GET_MODE (PATTERN (JUMPTABLE)) == SImode)  \
+        ASM_OUTPUT_ALIGN (FILE, 2);                                     \
+      (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM);            \
+  } while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE                SImode
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  fprintf (STREAM, "\t.word %sL%d\n", LOCAL_LABEL_PREFIX, VALUE)
+
+/* Assembler Commands for Exception Regions  */
+/* Since the S+core is encoded in the least-significant bit
+   of the address, mask it off return addresses for purposes of
+   finding exception handling regions.  */
+#define MASK_RETURN_ADDR               constm1_rtx
+
+/* Assembler Commands for Alignment  */
+/* This is how to output an assembler line to advance the location
+   counter by SIZE bytes.  */
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, SIZE) \
+  fprintf (STREAM, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(STREAM, LOG) \
+  fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+/* Macros Affecting All Debugging Formats.  */
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE         DWARF2_DEBUG
+#endif
+
+/* Specific Options for DBX Output.  */
+#define DBX_DEBUGGING_INFO              1
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS          1
+
+#define DBX_CONTIN_LENGTH               0
+
+/* File Names in DBX Format.  */
+#define DWARF2_DEBUGGING_INFO           1
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN       3
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/*  All references are zero extended.  */
+#define LOAD_EXTEND_OP(MODE)            ZERO_EXTEND
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX                        4
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED           1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode                           SImode
+
+/* Give call MEMs SImode since it is the "most permissive" mode
+   for 32-bit targets.  */
+#define FUNCTION_MODE                   Pmode
+
+struct GTY ((chain_next ("%h.next"))) extern_list
+{
+  struct extern_list *next;             /* next external  */
+  const char *name;                     /* name of the external  */
+  int size;                             /* size in bytes  */
+};
+
+extern GTY (()) struct extern_list      *extern_head;
diff --git a/gcc/config/score/score.md b/gcc/config/score/score.md
new file mode 100644
index 000000000..9ae046ed5
--- /dev/null
+++ b/gcc/config/score/score.md
@@ -0,0 +1,1880 @@
+;;  Machine description for Sunplus S+CORE
+;;  Copyright (C) 2005, 2007, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by Sunnorth.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+; branch        conditional branch
+; jump          unconditional jump
+; call          unconditional call
+; load          load instruction(s)
+; store         store instruction(s)
+; cmp           integer compare
+; arith         integer arithmetic instruction
+; move          data movement within same register set
+; const         load constant
+; nop           no operation
+; mul           integer multiply
+; div           integer divide
+; cndmv         conditional moves
+; fce           transfer from hi/lo registers
+; tce           transfer to   hi/lo registers
+; fsr           transfer from special registers
+; tsr           transfer to   special registers
+
+(define_constants
+  [(CC_REGNUM       33)
+   (T_REGNUM        34)
+   (RA_REGNUM       3)
+   (SP_REGNUM       0)
+   (AT_REGNUM       1)
+   (FP_REGNUM       2)
+   (RT_REGNUM       4)
+   (GP_REGNUM       28)
+   (EH_REGNUM       29)
+   (HI_REGNUM       48)
+   (LO_REGNUM       49)
+   (CN_REGNUM       50)
+   (LC_REGNUM       51)
+   (SC_REGNUM       52)])
+
+(define_constants
+   [(BITTST         0)
+    (CPLOAD         1)
+    (CPRESTORE      2)
+
+    (SCB            3)
+    (SCW            4)
+    (SCE            5)
+    (SCLC           6)
+
+    (LCB            7)
+    (LCW            8)
+    (LCE            9)
+
+    (SFFS           10)])
+
+(define_attr "type"
+  "unknown,branch,jump,call,load,store,cmp,arith,move,const,nop,mul,div,cndmv,fce,tce,fsr,tsr,fcr,tcr"
+  (const_string "unknown"))
+
+(define_attr "mode" "unknown,QI,HI,SI,DI"
+  (const_string "unknown"))
+
+(define_attr "length" "" (const_int 4))
+
+(define_attr "up_c" "yes,no"
+  (const_string "no"))
+
+(include "constraints.md")
+(include "score-generic.md")
+(include "predicates.md")
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand")
+        (match_operand:QI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], QImode))
+    {
+      operands[1] = force_reg (QImode, operands[1]);
+    }
+})
+
+(define_insn "*movqi_insns_score7"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,d,*x,d,*a")
+        (match_operand:QI 1 "general_operand" "i,d,m,d,*x,d,*a,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], QImode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_limm (operands);
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_BYTE, false);
+    case 3: return score_sinsn (operands, SCORE_BYTE);
+    case 4: return TARGET_SCORE7D ? \"mf%1%S0 %0\" : \"mf%1    %0\";
+    case 5: return TARGET_SCORE7D ? \"mt%0%S1 %1\" : \"mt%0    %1\";
+    case 6: return \"mfsr\t%0, %1\";
+    case 7: return \"mtsr\t%1, %0\";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store,fce,tce,fsr,tsr")
+   (set_attr "mode" "QI")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand")
+        (match_operand:HI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], HImode))
+    {
+      operands[1] = force_reg (HImode, operands[1]);
+    }
+})
+
+(define_insn "*movhi_insns_score7"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,m,d,*x,d,*a")
+        (match_operand:HI 1 "general_operand" "i,d,m,d,*x,d,*a,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], HImode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_limm (operands);
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_HWORD, false);
+    case 3: return score_sinsn (operands, SCORE_HWORD);
+    case 4: return TARGET_SCORE7D ? \"mf%1%S0 %0\" : \"mf%1    %0\";
+    case 5: return TARGET_SCORE7D ? \"mt%0%S1 %1\" : \"mt%0    %1\";
+    case 6: return \"mfsr\t%0, %1\";
+    case 7: return \"mtsr\t%1, %0\";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store,fce,tce,fsr,tsr")
+   (set_attr "mode" "HI")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+        (match_operand:SI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], SImode))
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+    }
+})
+
+(define_insn "*movsi_insns_score7"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,m,d,*x,d,*a,d,*c")
+        (match_operand:SI 1 "general_operand" "i,d,m,d,*x,d,*a,d,*c,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], SImode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (GET_CODE (operands[1]) != CONST_INT)
+        return \"la\t%0, %1\";
+      else
+        return score_limm (operands);
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_WORD, false);
+    case 3: return score_sinsn (operands, SCORE_WORD);
+    case 4: return TARGET_SCORE7D ? \"mf%1%S0 %0\" : \"mf%1    %0\";
+    case 5: return TARGET_SCORE7D ? \"mt%0%S1 %1\" : \"mt%0    %1\";
+    case 6: return \"mfsr\t%0, %1\";
+    case 7: return \"mtsr\t%1, %0\";
+    case 8: return \"mfcr\t%0, %1\";
+    case 9: return \"mtcr\t%1, %0\";
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store,fce,tce,fsr,tsr,fcr,tcr")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,d,m,d,*x")
+        (match_operand:DI 1 "general_operand" "i,d,m,d,*x,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  score_movdi (operands);
+  DONE;
+})
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+        (match_operand:SF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && !register_operand (operands[1], SFmode))
+    {
+      operands[1] = force_reg (SFmode, operands[1]);
+    }
+})
+
+(define_insn "*movsf_insns_score7"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,d,m")
+        (match_operand:SF 1 "general_operand" "i,d,m,d"))]
+  "(!MEM_P (operands[0]) || register_operand (operands[1], SFmode))
+   && (TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"li\t%0, %D1\";;
+    case 1: return score_move (operands);
+    case 2: return score_linsn (operands, SCORE_WORD, false);
+    case 3: return score_sinsn (operands, SCORE_WORD);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,move,load,store")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=d,d,d,m")
+        (match_operand:DF 1 "general_operand" "i,d,m,d"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  score_movdi (operands);
+  DONE;
+})
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "score_register_operand" )
+        (plus:SI (match_operand:SI 1 "score_register_operand")
+                 (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*addsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (plus:SI (match_operand:SI 1 "register_operand" "0,0,d,d")
+                 (match_operand:SI 2 "arith_operand" "I,L,N,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"addis\t%0, %U2\";
+    case 1: return score_select_add_imm (operands, false);
+    case 2: return \"addri\t%0, %1, %c2\";
+    case 3: return score_select (operands, "add", true, "", false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (plus:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,L,N,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d,d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"addis.c\t%0, %U2\";
+    case 1: return score_select_add_imm (operands, true);
+    case 2: return \"addri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "add", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (plus:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,L,N,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (plus:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"addis.c\t%0, %U2\";
+    case 1: return score_select_add_imm (operands, true);
+    case 2: return \"addri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "add", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "adddi3"
+  [(parallel
+    [(set (match_operand:DI 0 "score_register_operand")
+          (plus:DI (match_operand:DI 1 "score_register_operand")
+                   (match_operand:DI 2 "score_register_operand")))
+    (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*adddi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=e,d")
+        (plus:DI (match_operand:DI 1 "register_operand" "0,d")
+                 (match_operand:DI 2 "register_operand" "e,d")))
+  (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   add!    %L0, %L2\;addc!   %H0, %H2
+   add.c   %L0, %L1, %L2\;addc    %H0, %H1, %H2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "DI")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (minus:SI (match_operand:SI 1 "score_register_operand")
+                  (match_operand:SI 2 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*subsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (minus:SI (match_operand:SI 1 "register_operand" "d")
+                  (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "sub", false, "", false);
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*subsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "d")
+                                 (match_operand:SI 2 "register_operand" "d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "sub", false, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "g32reg_operand" "")
+        (minus:SI (match_operand:SI 1 "g32reg_operand" "")
+                  (match_operand:SI 2 "g32reg_operand" "")))
+   (set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))]
+  ""
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+        (minus:SI (match_dup 1) (match_dup 2)))])
+
+(define_insn "subsi3_ucc_pcmp"
+  [(parallel
+    [(set (reg:CC CC_REGNUM)
+          (compare:CC (match_operand:SI 1 "score_register_operand" "d")
+                      (match_operand:SI 2 "score_register_operand" "d")))
+     (set (match_operand:SI 0 "score_register_operand" "=d")
+          (minus:SI (match_dup 1) (match_dup 2)))])]
+  ""
+{
+  return score_select (operands, "sub", false, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "subsi3_ucc"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (minus:SI (match_operand:SI 1 "score_register_operand" "d")
+                                 (match_operand:SI 2 "score_register_operand" "d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "score_register_operand" "=d")
+        (minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+{
+  return score_select (operands, "sub", false, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "subdi3"
+  [(parallel
+    [(set (match_operand:DI 0 "score_register_operand")
+          (minus:DI (match_operand:DI 1 "score_register_operand")
+                    (match_operand:DI 2 "score_register_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*subdi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=e,d")
+        (minus:DI (match_operand:DI 1 "register_operand" "0,d")
+                  (match_operand:DI 2 "register_operand" "e,d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   sub!    %L0, %L2\;subc    %H0, %H1, %H2
+   sub.c   %L0, %L1, %L2\;subc    %H0, %H1, %H2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "DI")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (and:SI (match_operand:SI 1 "score_register_operand")
+                (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*andsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (and:SI (match_operand:SI 1 "register_operand" "0,0,d,d")
+                (match_operand:SI 2 "arith_operand" "I,K,M,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"andis\t%0, %U2\";
+    case 1: return \"andi\t%0, %c2";
+    case 2: return \"andri\t%0, %1, %c2\";
+    case 3: return score_select (operands, "and", true, "", false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "andsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (and:SI (match_operand:SI 1 "register_operand" "0,0,0,d")
+                               (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d,d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"andis.c\t%0, %U2\";
+    case 1: return \"andi.c\t%0, %c2";
+    case 2: return \"andri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "and", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*andsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (and:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (and:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"andis.c\t%0, %U2\";
+    case 1: return \"andi.c\t%0, %c2";
+    case 2: return \"andri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "and", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*zero_extract_andi"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (zero_extract:SI
+                     (match_operand:SI 0 "score_register_operand" "d")
+                     (match_operand:SI 1 "const_uimm5" "")
+                     (match_operand:SI 2 "const_uimm5" ""))
+                    (const_int 0)))]
+  ""
+  "#"
+  ""
+  [(const_int 1)]
+{
+  score_zero_extract_andi (operands);
+  DONE;
+})
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (ior:SI (match_operand:SI 1 "score_register_operand")
+                (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*iorsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (ior:SI (match_operand:SI 1 "register_operand" "0,0,d,d")
+                (match_operand:SI 2 "arith_operand" "I,K,M,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"oris\t%0, %U2\";
+    case 1: return \"ori\t%0, %c2\";
+    case 2: return \"orri\t%0, %1, %c2\";
+    case 3: return score_select (operands, "or", true, "", false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ior:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d,d,d")
+        (ior:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"oris.c\t%0, %U2\";
+    case 1: return \"ori.c\t%0, %c2\";
+    case 2: return \"orri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "or", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*iorsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ior:SI
+                        (match_operand:SI 1 "register_operand" "0,0,d,d")
+                        (match_operand:SI 2 "arith_operand" "I,K,M,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d,d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"oris.c\t%0, %U2\";
+    case 1: return \"ori.c\t%0, %c2\";
+    case 2: return \"orri.c\t%0, %1, %c2\";
+    case 3: return score_select (operands, "or", true, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (xor:SI (match_operand:SI 1 "score_register_operand")
+                (match_operand:SI 2 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (xor:SI (match_operand:SI 1 "register_operand" "d")
+                (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "xor", true, "", false);
+}
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (xor:SI (match_operand:SI 1 "register_operand" "d")
+                               (match_operand:SI 2 "register_operand" "d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (xor:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  return score_select (operands, "xor", true, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*xorsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (xor:SI (match_operand:SI 1 "register_operand" "d")
+                               (match_operand:SI 2 "register_operand" "d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  ""
+{
+  return score_select (operands, "xor", true, "", true);
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*extendqisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extsb\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_BYTE, true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith,load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (sign_extend:SI (match_operand:QI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*extendhisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extsh\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_HWORD, true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith, load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendhisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+  (set (match_operand:SI 0 "register_operand" "=d")
+       (sign_extend:SI (match_operand:HI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*extendhisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (ashiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extsh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*zero_extendqisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extzb\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_BYTE, false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith, load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI (match_operand:QI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 24))
+                       (const_int 24))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzb.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*zero_extendhisi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d,m")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"extzh\t%0, %1\";
+    case 1: return score_linsn (operands, SCORE_HWORD, false);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith, load")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendhisi2_ucc_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+  (set (match_operand:SI 0 "register_operand" "=d")
+       (zero_extend:SI (match_operand:HI 2 "register_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendhisi2_cmp_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (lshiftrt:SI
+                       (ashift:SI (match_operand:SI 1 "register_operand" "d")
+                                  (const_int 16))
+                       (const_int 16))
+                      (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "extzh.c %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "mulsi3"
+    [(set (match_operand:SI 0 "score_register_operand")
+          (mult:SI (match_operand:SI 1 "score_register_operand")
+                   (match_operand:SI 2 "score_register_operand")))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_insn (gen_mulsi3_score7 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mulsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+        (mult:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))
+   (clobber (reg:SI HI_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mul     %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "mode" "SI")])
+
+(define_expand "mulsidi3"
+    [(set (match_operand:DI 0 "score_register_operand")
+          (mult:DI (sign_extend:DI
+                    (match_operand:SI 1 "score_register_operand"))
+                   (sign_extend:DI
+                    (match_operand:SI 2 "score_register_operand"))))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_insn (gen_mulsidi3_score7 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mulsidi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (mult:DI (sign_extend:DI
+                  (match_operand:SI 1 "register_operand" "d"))
+                 (sign_extend:DI
+                  (match_operand:SI 2 "register_operand" "d"))))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mul     %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "mode" "DI")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "score_register_operand")
+        (mult:DI (zero_extend:DI
+                  (match_operand:SI 1 "score_register_operand"))
+                 (zero_extend:DI
+                  (match_operand:SI 2 "score_register_operand"))))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_insn (gen_umulsidi3_score7 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "umulsidi3_score7"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (mult:DI (zero_extend:DI
+                  (match_operand:SI 1 "register_operand" "d"))
+                 (zero_extend:DI
+                  (match_operand:SI 2 "register_operand" "d"))))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mulu    %1, %2"
+  [(set_attr "type" "mul")
+   (set_attr "mode" "DI")])
+
+(define_expand "divmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (div:SI (match_operand:SI 1 "score_register_operand")
+                  (match_operand:SI 2 "score_register_operand")))
+     (set (match_operand:SI 3 "score_register_operand")
+          (mod:SI (match_dup 1) (match_dup 2)))])]
+  ""
+  ""
+)
+
+(define_insn "*divmodsi4_score7"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+        (div:SI (match_operand:SI 1 "register_operand" "d")
+                (match_operand:SI 2 "register_operand" "d")))
+   (set (match_operand:SI 3 "register_operand" "=h")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "div     %1, %2"
+  [(set_attr "type" "div")
+   (set_attr "mode" "SI")])
+
+(define_expand "udivmodsi4"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (udiv:SI (match_operand:SI 1 "score_register_operand")
+                   (match_operand:SI 2 "score_register_operand")))
+     (set (match_operand:SI 3 "score_register_operand")
+          (umod:SI (match_dup 1) (match_dup 2)))])]
+  ""
+  ""
+)
+
+(define_insn "*udivmodsi4_score7"
+  [(set (match_operand:SI 0 "register_operand" "=l")
+        (udiv:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))
+   (set (match_operand:SI 3 "register_operand" "=h")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "divu    %1, %2"
+  [(set_attr "type" "div")
+   (set_attr "mode" "SI")])
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (ashift:SI (match_operand:SI 1 "score_register_operand")
+                   (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*ashlsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashift:SI (match_operand:SI 1 "register_operand" "d,d")
+                   (match_operand:SI 2 "arith_operand" "J,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   slli    %0, %1, %c2
+   sll     %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashift:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashift:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "slli", false, "c", true);
+    case 1: return score_select (operands, "sll", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashift:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "slli", false, "c", true);
+    case 1: return score_select (operands, "sll", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (ashiftrt:SI (match_operand:SI 1 "score_register_operand")
+                     (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*ashrsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand" "J,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   srai    %0, %1, %c2
+   sra     %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d")
+        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"srai.c\t%0, %1, %c2\";
+    case 1: return score_select (operands, "sra", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (ashiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return \"srai.c\t%0, %1, %c2\";
+    case 1: return score_select (operands, "sra", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (lshiftrt:SI (match_operand:SI 1 "score_register_operand")
+                     (match_operand:SI 2 "arith_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*lshrsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (lshiftrt:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand" "J,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   srli    %0, %1, %c2
+   srl     %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (lshiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=d,d")
+        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "srli", false, "c", true);
+    case 1: return score_select (operands, "srl", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (lshiftrt:SI
+                        (match_operand:SI 1 "register_operand" "d,d")
+                        (match_operand:SI 2 "arith_operand" "J,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=d,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  switch (which_alternative)
+    {
+    case 0: return score_select (operands, "srli", false, "c", true);
+    case 1: return score_select (operands, "srl", false, "", true);
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (neg:SI (match_operand:SI 1 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*negsi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (neg:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "neg     %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=e,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   neg!    %0, %1
+   neg.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=e,d")
+        (neg:SI (match_dup 1)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   neg!    %0, %1
+   neg.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "score_register_operand")
+        (not:SI (match_operand:SI 1 "score_register_operand")))]
+  ""
+  ""
+)
+
+(define_insn "*one_cmplsi2_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (not:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "not\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_ucc_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (not:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=e,d")
+        (not:SI (match_dup 1)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   not!    %0, %1
+   not.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_cmp_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (not:SI (match_operand:SI 1 "register_operand" "e,d"))
+                       (const_int 0)))
+   (clobber (match_scratch:SI 0 "=e,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   not!    %0, %1
+   not.c   %0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_expand "rotlsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (rotate:SI (match_operand:SI 1 "score_register_operand")
+                     (match_operand:SI 2 "arith_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*rotlsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (rotate:SI (match_operand:SI 1 "register_operand" "d,d")
+                   (match_operand:SI 2 "arith_operand" "J,d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   roli.c  %0, %1, %c2
+   rol.c   %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_expand "rotrsi3"
+  [(parallel
+    [(set (match_operand:SI 0 "score_register_operand")
+          (rotatert:SI (match_operand:SI 1 "score_register_operand")
+                       (match_operand:SI 2 "arith_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*rotrsi3_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d,d")
+        (rotatert:SI (match_operand:SI 1 "register_operand" "d,d")
+                     (match_operand:SI 2 "arith_operand" "J,d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   rori.c  %0, %1, %c2
+   ror.c   %0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_expand "cbranchsi4"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 1 "score_register_operand" "")
+                    (match_operand:SI 2 "arith_operand" "")))
+   (set (pc)
+        (if_then_else
+	 (match_operator 0 "ordered_comparison_operator"
+			 [(reg:CC CC_REGNUM)
+		 	  (const_int 0)]) 
+         (label_ref (match_operand 3 "" ""))
+         (pc)))]
+  ""
+  "")
+
+(define_insn "cmpsi_nz_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (match_operand:SI 0 "register_operand" "d,e,d")
+                       (match_operand:SI 1 "arith_operand" "L,e,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   cmpi.c  %0, %c1
+   cmp!    %0, %1
+   cmp.c   %0, %1"
+   [(set_attr "type" "cmp")
+    (set_attr "up_c" "yes")
+    (set_attr "mode" "SI")])
+
+(define_insn "cmpsi_n_score7"
+  [(set (reg:CC_N CC_REGNUM)
+        (compare:CC_N (match_operand:SI 0 "register_operand" "d,e,d")
+                      (match_operand:SI 1 "arith_operand" "L,e,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   cmpi.c  %0, %c1
+   cmp!    %0, %1
+   cmp.c   %0, %1"
+   [(set_attr "type" "cmp")
+    (set_attr "up_c" "yes")
+    (set_attr "mode" "SI")])
+
+(define_insn "*cmpsi_to_addsi_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (match_operand:SI 1 "register_operand" "0,d")
+                       (neg:SI (match_operand:SI 2 "register_operand" "e,d"))))
+   (clobber (match_scratch:SI 0 "=e,d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   add!    %0, %2
+   add.c   %0, %1, %2"
+   [(set_attr "type" "cmp")
+    (set_attr "up_c" "yes")
+    (set_attr "mode" "SI")])
+
+(define_insn "cmpsi_cc_score7"
+  [(set (reg:CC CC_REGNUM)
+        (compare:CC (match_operand:SI 0 "register_operand" "d,e,d")
+                    (match_operand:SI 1 "arith_operand" "L,e,d")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   cmpi.c  %0, %c1
+   cmp!    %0, %1
+   cmp.c   %0, %1"
+  [(set_attr "type" "cmp")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "*branch_n_score7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "branch_n_operator"
+                         [(reg:CC_N CC_REGNUM)
+                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "b%C0    %1"
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_nz_score7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "branch_nz_operator"
+                         [(reg:CC_NZ CC_REGNUM)
+                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "b%C0    %1"
+  [(set_attr "type" "branch")])
+
+(define_insn "*branch_cc_score7"
+  [(set (pc)
+        (if_then_else
+         (match_operator 0 "comparison_operator"
+                         [(reg:CC CC_REGNUM)
+                          (const_int 0)])
+         (label_ref (match_operand 1 "" ""))
+         (pc)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "b%C0    %1"
+  [(set_attr "type" "branch")])
+
+(define_insn "jump"
+  [(set (pc)
+        (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (!flag_pic)
+    return \"j\t%0\";
+  else
+    return \"b\t%0\";
+}
+  [(set_attr "type" "jump")
+   (set_attr "length" "4")])
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "" "")
+                    (match_operand 1 "" ""))
+              (use (match_operand 2 "" ""))])]
+  ""
+{
+  score_call (operands, true);
+  DONE;
+})
+
+(define_insn "sibcall_internal_score7"
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "t,Z"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RT_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && SIBLING_CALL_P (insn)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"br%S0\t%0\";
+      case 1: return \"j\t%0\";
+      default: gcc_unreachable ();
+      }
+  else
+    switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %0\;br\tr29\";
+      case 1: return \"la\tr29, %0\;br\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "" "")
+              (call (match_operand 1 "" "") (match_operand 2 "" "")))
+              (use (match_operand 3 "" ""))])]
+  ""
+{
+  score_call_value (operands, true);
+  DONE;
+})
+
+(define_insn "sibcall_value_internal_score7"
+  [(set (match_operand 0 "register_operand" "=d,d")
+        (call (mem:SI (match_operand:SI 1 "call_insn_operand" "t,Z"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RT_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && SIBLING_CALL_P (insn)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"br%S1\t%1\";
+      case 1: return \"j\t%1\";
+      default: gcc_unreachable ();
+      }
+  else
+    switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %1\;br\tr29\";
+      case 1: return \"la\tr29, %1\;br\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "" "") (match_operand 1 "" ""))
+              (use (match_operand 2 "" ""))])]
+  ""
+{
+  score_call (operands, false);
+  DONE;
+})
+
+(define_insn "call_internal_score7"
+  [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "d,Z"))
+         (match_operand 1 "" ""))
+   (clobber (reg:SI RA_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"brl%S0\t%0\";
+      case 1: return \"jl\t%0\";
+      default: gcc_unreachable ();
+      }
+  else
+     switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %0\;brl\tr29\";
+      case 1: return \"la\tr29, %0\;brl\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "") (match_operand 2 "" "")))
+              (use (match_operand 3 "" ""))])]
+  ""
+{
+  score_call_value (operands, false);
+  DONE;
+})
+
+(define_insn "call_value_internal_score7"
+  [(set (match_operand 0 "register_operand" "=d,d")
+        (call (mem:SI (match_operand:SI 1 "call_insn_operand" "d,Z"))
+              (match_operand 2 "" "")))
+   (clobber (reg:SI RA_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  if (!flag_pic)
+    switch (which_alternative)
+      {
+      case 0: return \"brl%S1\t%1\";
+      case 1: return \"jl\t%1\";
+      default: gcc_unreachable ();
+      }
+  else
+    switch (which_alternative)
+      {
+      case 0: return \"mv\tr29, %1\;brl\tr29\";
+      case 1: return \"la\tr29, %1\;brl\tr29\";
+      default: gcc_unreachable ();
+      }
+}
+  [(set_attr "type" "call")])
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "score_register_operand" "d"))]
+  ""
+{
+  rtx dest;
+  dest = operands[0];
+  if (GET_CODE (dest) != REG
+      || GET_MODE (dest) != Pmode)
+    operands[0] = copy_to_mode_reg (Pmode, dest);
+
+  emit_jump_insn (gen_indirect_jump_internal_score (operands[0]));
+  DONE;
+})
+
+(define_insn "indirect_jump_internal_score"
+  [(set (pc) (match_operand:SI 0 "score_register_operand" "d"))]
+  ""
+  "br%S0   %0"
+  [(set_attr "type" "jump")])
+
+(define_expand "tablejump"
+  [(set (pc)
+        (match_operand 0 "score_register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+{
+  if (TARGET_SCORE7 || TARGET_SCORE7D)
+    emit_jump_insn (gen_tablejump_internal_score7 (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "tablejump_internal_score7"
+  [(set (pc)
+        (match_operand:SI 0 "register_operand" "d"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+{
+  if (flag_pic)
+    return \"mv\tr29, %0\;.cpadd\tr29\;br\tr29\";
+  else
+    return \"br%S0\t%0\";
+}
+  [(set_attr "type" "jump")])
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+{
+  score_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+{
+  score_epilogue (false);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(const_int 2)]
+  ""
+{
+  score_epilogue (true);
+  DONE;
+})
+
+(define_insn "return_internal_score7"
+  [(return)
+   (use (match_operand 0 "pmode_register_operand" "d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "br%S0\t%0")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "#nop!"
+)
+
+(define_insn "cpload_score7"
+  [(unspec_volatile:SI [(const_int 1)] CPLOAD)]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && flag_pic"
+  ".cpload\tr29"
+)
+
+(define_insn "cprestore_use_fp_score7"
+  [(unspec_volatile:SI [(match_operand:SI 0 "" "")] CPRESTORE)
+   (use (reg:SI FP_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && flag_pic"
+  ".cprestore\tr2, %0"
+)
+
+(define_insn "cprestore_use_sp_score7"
+  [(unspec_volatile:SI [(match_operand:SI 0 "" "")] CPRESTORE)
+   (use (reg:SI SP_REGNUM))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)
+   && flag_pic"
+  ".cprestore\tr0, %0"
+)
+
+(define_insn "pushsi_score7"
+  [(set (match_operand:SI 0 "push_operand" "=<")
+        (match_operand:SI 1 "register_operand" "d"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "push!\t%1, [r0]"
+  [(set_attr "type" "store")
+   (set_attr "mode" "SI")])
+
+(define_insn "popsi_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (match_operand:SI 1 "pop_operand" ">"))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "pop!\t%0, [r0]"
+  [(set_attr "type" "store")
+   (set_attr "mode" "SI")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "g32reg_operand" "")
+        (match_operand:SI 1 "loreg_operand" ""))
+   (set (match_operand:SI 2 "g32reg_operand" "")
+        (match_operand:SI 3 "hireg_operand" ""))]
+  ""
+  [(parallel
+       [(set (match_dup 0) (match_dup 1))
+        (set (match_dup 2) (match_dup 3))])])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "g32reg_operand" "")
+        (match_operand:SI 1 "hireg_operand" ""))
+   (set (match_operand:SI 2 "g32reg_operand" "")
+        (match_operand:SI 3 "loreg_operand" ""))]
+  ""
+  [(parallel
+       [(set (match_dup 2) (match_dup 3))
+        (set (match_dup 0) (match_dup 1))])])
+
+(define_insn "movhilo"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=d")
+          (match_operand:SI 1 "loreg_operand" ""))
+     (set (match_operand:SI 2 "register_operand" "=d")
+          (match_operand:SI 3 "hireg_operand" ""))])]
+  ""
+  "mfcehl\t%2, %0"
+  [(set_attr "type" "fce")
+   (set_attr "mode" "SI")])
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (if_then_else:SI (match_operator 1 "comparison_operator"
+                          [(reg:CC CC_REGNUM) (const_int 0)])
+                         (match_operand:SI 2 "register_operand" "")
+                         (match_operand:SI 3 "register_operand" "")))]
+  ""
+{
+  score_movsicc (operands);
+})
+
+(define_insn "movsicc_internal_score7"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (if_then_else:SI (match_operator 1 "comparison_operator"
+                          [(reg:CC CC_REGNUM) (const_int 0)])
+                         (match_operand:SI 2 "arith_operand" "d")
+                         (match_operand:SI 3 "arith_operand" "0")))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "mv%C1\t%0, %2"
+  [(set_attr "type" "cndmv")
+   (set_attr "mode" "SI")])
+
+(define_insn "zero_extract_bittst_score7"
+  [(set (reg:CC_NZ CC_REGNUM)
+        (compare:CC_NZ (unspec:SI
+                        [(match_operand:SI 0 "register_operand" "*e,d")
+                         (match_operand:SI 1 "const_uimm5" "")]
+                        BITTST)
+                       (const_int 0)))]
+  "(TARGET_SCORE7 || TARGET_SCORE7D)"
+  "@
+   bittst!\t%0, %c1
+   bittst.c\t%0, %c1"
+  [(set_attr "type" "arith")
+   (set_attr "up_c" "yes")
+   (set_attr "mode" "SI")])
+
+(define_insn "andsi3_extzh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (and:SI (match_operand:SI 1 "register_operand" "d")
+                (const_int 65535)))]
+  ""
+  "extzh\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")
+   (set_attr "mode" "SI")])
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (clz:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "clz\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (smax:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (smin:SI (match_operand:SI 1 "register_operand" "d")
+                 (match_operand:SI 2 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (abs:SI (match_operand:SI 1 "register_operand" "d")))]
+  "(TARGET_SCORE7D)"
+  "abs\t%0, %1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "sffs"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "d")] SFFS))]
+  "(TARGET_SCORE7D)"
+  "bitrev\t%0, %1, r0\;clz\t%0, %0\;addi\t%0, 0x1"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "register_operand")
+        (ffs:SI (match_operand:SI 1 "register_operand")))]
+  "(TARGET_SCORE7D)"
+{
+  emit_insn (gen_sffs (operands[0], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CC_NZmode, CC_REGNUM),
+                          gen_rtx_COMPARE (CC_NZmode, operands[0],
+                                           GEN_INT (33))));
+  if (TARGET_SCORE7D)
+    emit_insn (gen_movsicc_internal_score7 (operands[0],
+               gen_rtx_fmt_ee (EQ, VOIDmode, operands[0], GEN_INT (33)),
+               GEN_INT (0),
+               operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "loreg_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "hireg_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "(TARGET_SCORE7D)"
+  [(parallel
+       [(set (match_dup 0) (match_dup 1))
+        (set (match_dup 2) (match_dup 3))])])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "hireg_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "loreg_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "(TARGET_SCORE7D)"
+  [(parallel
+       [(set (match_dup 2) (match_dup 3))
+        (set (match_dup 0) (match_dup 1))])])
+
+(define_insn "movtohilo"
+  [(parallel
+       [(set (match_operand:SI 0 "loreg_operand" "=l")
+             (match_operand:SI 1 "register_operand" "d"))
+        (set (match_operand:SI 2 "hireg_operand" "=h")
+             (match_operand:SI 3 "register_operand" "d"))])]
+  "(TARGET_SCORE7D)"
+  "mtcehl\t%3, %1"
+  [(set_attr "type" "fce")
+   (set_attr "mode" "SI")])
+
+(define_insn "mulsi3addsi"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,d")
+        (plus:SI (mult:SI (match_operand:SI 2 "register_operand" "d,d,d")
+                          (match_operand:SI 3 "register_operand" "d,d,d"))
+                 (match_operand:SI 1 "register_operand" "0,d,l")))
+   (clobber (reg:SI HI_REGNUM))]
+  "(TARGET_SCORE7D)"
+  "@
+   mad\t%2, %3
+   mtcel%S1\t%1\;mad\t%2, %3
+   mad\t%2, %3\;mfcel%S0\t%0"
+  [(set_attr "mode" "SI")])
+
+(define_insn "mulsi3subsi"
+  [(set (match_operand:SI 0 "register_operand" "=l,l,d")
+        (minus:SI (match_operand:SI 1 "register_operand" "0,d,l")
+                  (mult:SI (match_operand:SI 2 "register_operand" "d,d,d")
+                           (match_operand:SI 3 "register_operand" "d,d,d"))))
+   (clobber (reg:SI HI_REGNUM))]
+  "(TARGET_SCORE7D)"
+  "@
+   msb\t%2, %3
+   mtcel%S1\t%1\;msb\t%2, %3
+   msb\t%2, %3\;mfcel%S0\t%0"
+  [(set_attr "mode" "SI")])
+
+(define_insn "mulsidi3adddi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (plus:DI (mult:DI
+                  (sign_extend:DI (match_operand:SI 2 "register_operand" "%d"))
+                  (sign_extend:DI (match_operand:SI 3 "register_operand" "d")))
+                 (match_operand:DI 1 "register_operand" "0")))]
+  "(TARGET_SCORE7D)"
+  "mad\t%2, %3"
+  [(set_attr "mode" "DI")])
+
+(define_insn "umulsidi3adddi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (plus:DI (mult:DI
+                  (zero_extend:DI (match_operand:SI 2 "register_operand" "%d"))
+                  (zero_extend:DI (match_operand:SI 3 "register_operand" "d")))
+                 (match_operand:DI 1 "register_operand" "0")))]
+  "(TARGET_SCORE7D)"
+  "madu\t%2, %3"
+  [(set_attr "mode" "DI")])
+
+(define_insn "mulsidi3subdi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (minus:DI
+         (match_operand:DI 1 "register_operand" "0")
+         (mult:DI
+          (sign_extend:DI (match_operand:SI 2 "register_operand" "%d"))
+          (sign_extend:DI (match_operand:SI 3 "register_operand" "d")))))]
+  "(TARGET_SCORE7D)"
+  "msb\t%2, %3"
+  [(set_attr "mode" "DI")])
+
+(define_insn "umulsidi3subdi"
+  [(set (match_operand:DI 0 "register_operand" "=x")
+        (minus:DI
+         (match_operand:DI 1 "register_operand" "0")
+         (mult:DI (zero_extend:DI
+                   (match_operand:SI 2 "register_operand" "%d"))
+                  (zero_extend:DI
+                   (match_operand:SI 3 "register_operand" "d")))))]
+  "(TARGET_SCORE7D)"
+  "msbu\t%2, %3"
+  [(set_attr "mode" "DI")])
+
diff --git a/gcc/config/score/score.opt b/gcc/config/score/score.opt
new file mode 100644
index 000000000..dc8cd351e
--- /dev/null
+++ b/gcc/config/score/score.opt
@@ -0,0 +1,47 @@
+; Options for the Sunnorth port of the compiler.
+
+; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+meb
+Target RejectNegative Report InverseMask(LITTLE_ENDIAN)
+Generate big-endian code
+
+mel
+Target RejectNegative Report Mask(LITTLE_ENDIAN)
+Generate little-endian code
+
+mnhwloop
+Target RejectNegative Report Mask(NHWLOOP)
+Disable bcnz instruction
+
+muls
+Target RejectNegative Report Mask(ULS)
+Enable unaligned load/store instruction
+
+mscore7
+Target RejectNegative Report Mask(SCORE7)
+Support SCORE 7 ISA
+
+mscore7d
+Target RejectNegative Report Mask(SCORE7D)
+Support SCORE 7D ISA
+
+march=
+Target RejectNegative Joined
+Specify the name of the target architecture
diff --git a/gcc/config/score/score7.c b/gcc/config/score/score7.c
new file mode 100644
index 000000000..8a57362ba
--- /dev/null
+++ b/gcc/config/score/score7.c
@@ -0,0 +1,1797 @@
+/* score7.c for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Sunnorth
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "output.h"
+#include "tree.h"
+#include "function.h"
+#include "expr.h"
+#include "optabs.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "gstab.h"
+#include "hashtab.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "integrate.h"
+#include "langhooks.h"
+#include "cfglayout.h"
+#include "score7.h"
+#include "df.h"
+
+#define BITSET_P(VALUE, BIT)      (((VALUE) & (1L << (BIT))) != 0)
+#define INS_BUF_SZ                128
+
+extern enum reg_class score_char_to_class[256];
+
+static int score7_sdata_max;
+static char score7_ins[INS_BUF_SZ + 8];
+
+/* Return true if SYMBOL is a SYMBOL_REF and OFFSET + SYMBOL points
+   to the same object as SYMBOL.  */
+static int
+score7_offset_within_object_p (rtx symbol, HOST_WIDE_INT offset)
+{
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return 0;
+
+  if (CONSTANT_POOL_ADDRESS_P (symbol)
+      && offset >= 0
+      && offset < (int)GET_MODE_SIZE (get_pool_mode (symbol)))
+    return 1;
+
+  if (SYMBOL_REF_DECL (symbol) != 0
+      && offset >= 0
+      && offset < int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (symbol))))
+    return 1;
+
+  return 0;
+}
+
+/* Split X into a base and a constant offset, storing them in *BASE
+   and *OFFSET respectively.  */
+static void
+score7_split_const (rtx x, rtx *base, HOST_WIDE_INT *offset)
+{
+  *offset = 0;
+
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      *offset += INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+    }
+
+  *base = x;
+}
+
+/* Classify symbol X, which must be a SYMBOL_REF or a LABEL_REF.  */
+static enum score_symbol_type
+score7_classify_symbol (rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    return SYMBOL_GENERAL;
+
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (CONSTANT_POOL_ADDRESS_P (x))
+    {
+      if (GET_MODE_SIZE (get_pool_mode (x)) <= SCORE7_SDATA_MAX)
+        return SYMBOL_SMALL_DATA;
+      return SYMBOL_GENERAL;
+    }
+  if (SYMBOL_REF_SMALL_P (x))
+    return SYMBOL_SMALL_DATA;
+  return SYMBOL_GENERAL;
+}
+
+/* Return true if the current function must save REGNO.  */
+static int
+score7_save_reg_p (unsigned int regno)
+{
+  /* Check call-saved registers.  */
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return 1;
+
+  /* We need to save the old frame pointer before setting up a new one.  */
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return 1;
+
+  /* We need to save the incoming return address if it is ever clobbered
+     within the function.  */
+  if (regno == RA_REGNUM && df_regs_ever_live_p (regno))
+    return 1;
+
+  return 0;
+}
+
+/* Return one word of double-word value OP, taking into account the fixed
+   endianness of certain registers.  HIGH_P is true to select the high part,
+   false to select the low part.  */
+static rtx
+score7_subw (rtx op, int high_p)
+{
+  unsigned int byte;
+  enum machine_mode mode = GET_MODE (op);
+
+  if (mode == VOIDmode)
+    mode = DImode;
+
+  byte = (TARGET_LITTLE_ENDIAN ? high_p : !high_p) ? UNITS_PER_WORD : 0;
+
+  if (GET_CODE (op) == REG && REGNO (op) == HI_REGNUM)
+    return gen_rtx_REG (SImode, high_p ? HI_REGNUM : LO_REGNUM);
+
+  if (GET_CODE (op) == MEM)
+    return adjust_address (op, SImode, byte);
+
+  return simplify_gen_subreg (SImode, op, mode, byte);
+}
+
+static struct score7_frame_info *
+score7_cached_frame (void)
+{
+  static struct score7_frame_info _frame_info;
+  return &_frame_info;
+}
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  SIZE is the size (in bytes) of the local variables.  */
+static struct score7_frame_info *
+score7_compute_frame_size (HOST_WIDE_INT size)
+{
+  unsigned int regno;
+  struct score7_frame_info *f = score7_cached_frame ();
+
+  memset (f, 0, sizeof (struct score7_frame_info));
+  f->gp_reg_size = 0;
+  f->mask = 0;
+  f->var_size = SCORE7_STACK_ALIGN (size);
+  f->args_size = crtl->outgoing_args_size;
+  f->cprestore_size = flag_pic ? UNITS_PER_WORD : 0;
+  if (f->var_size == 0 && current_function_is_leaf)
+    f->args_size = f->cprestore_size = 0;
+
+  if (f->args_size == 0 && cfun->calls_alloca)
+    f->args_size = UNITS_PER_WORD;
+
+  f->total_size = f->var_size + f->args_size + f->cprestore_size;
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    {
+      if (score7_save_reg_p (regno))
+        {
+          f->gp_reg_size += GET_MODE_SIZE (SImode);
+          f->mask |= 1 << (regno - GP_REG_FIRST);
+        }
+    }
+
+  if (crtl->calls_eh_return)
+    {
+      unsigned int i;
+      for (i = 0;; ++i)
+        {
+          regno = EH_RETURN_DATA_REGNO (i);
+          if (regno == INVALID_REGNUM)
+            break;
+          f->gp_reg_size += GET_MODE_SIZE (SImode);
+          f->mask |= 1 << (regno - GP_REG_FIRST);
+        }
+    }
+
+  f->total_size += f->gp_reg_size;
+  f->num_gp = f->gp_reg_size / UNITS_PER_WORD;
+
+  if (f->mask)
+    {
+      HOST_WIDE_INT offset;
+      offset = (f->args_size + f->cprestore_size + f->var_size
+                + f->gp_reg_size - GET_MODE_SIZE (SImode));
+      f->gp_sp_offset = offset;
+    }
+  else
+    f->gp_sp_offset = 0;
+
+  return f;
+}
+
+/* Return true if X is a valid base register for the given mode.
+   Allow only hard registers if STRICT.  */
+static int
+score7_valid_base_register_p (rtx x, int strict)
+{
+  if (!strict && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (GET_CODE (x) == REG
+          && score7_regno_mode_ok_for_base_p (REGNO (x), strict));
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT is true if we should only accept
+   hard base registers.  */
+static int
+score7_classify_address (struct score7_address_info *info,
+                         enum machine_mode mode, rtx x, int strict)
+{
+  info->code = GET_CODE (x);
+
+  switch (info->code)
+    {
+    case REG:
+    case SUBREG:
+      info->type = SCORE7_ADD_REG;
+      info->reg = x;
+      info->offset = const0_rtx;
+      return score7_valid_base_register_p (info->reg, strict);
+    case PLUS:
+      info->type = SCORE7_ADD_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      return (score7_valid_base_register_p (info->reg, strict)
+              && GET_CODE (info->offset) == CONST_INT
+              && IMM_IN_RANGE (INTVAL (info->offset), 15, 1));
+    case PRE_DEC:
+    case POST_DEC:
+    case PRE_INC:
+    case POST_INC:
+      if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
+        return false;
+      info->type = SCORE7_ADD_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = GEN_INT (GET_MODE_SIZE (mode));
+      return score7_valid_base_register_p (info->reg, strict);
+    case CONST_INT:
+      info->type = SCORE7_ADD_CONST_INT;
+      return IMM_IN_RANGE (INTVAL (x), 15, 1);
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      info->type = SCORE7_ADD_SYMBOLIC;
+      return (score7_symbolic_constant_p (x, &info->symbol_type)
+              && (info->symbol_type == SYMBOL_GENERAL
+                  || info->symbol_type == SYMBOL_SMALL_DATA));
+    default:
+      return 0;
+    }
+}
+
+bool
+score7_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+    return ((TYPE_MODE (type) == BLKmode)
+            || (int_size_in_bytes (type) > 2 * UNITS_PER_WORD)
+            || (int_size_in_bytes (type) == -1));
+}
+
+/* Return a legitimate address for REG + OFFSET.  */
+static rtx
+score7_add_offset (rtx reg, HOST_WIDE_INT offset)
+{
+  if (!IMM_IN_RANGE (offset, 15, 1))
+    {
+      reg = expand_simple_binop (GET_MODE (reg), PLUS,
+                                 gen_int_mode (offset & 0xffffc000,
+                                               GET_MODE (reg)),
+                                 reg, NULL, 0, OPTAB_WIDEN);
+      offset &= 0x3fff;
+    }
+
+  return plus_constant (reg, offset);
+}
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+   in order to avoid duplicating too much logic from elsewhere.  */
+void
+score7_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+                        HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+                        tree function)
+{
+  rtx this_rtx, temp1, insn, fnaddr;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* We need two temporary registers in some cases.  */
+  temp1 = gen_rtx_REG (Pmode, 8);
+
+  /* Find out which register contains the "this" pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, ARG_REG_FIRST + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, ARG_REG_FIRST);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (delta != 0)
+    {
+      rtx offset = GEN_INT (delta);
+      if (!(delta >= -32768 && delta <= 32767))
+        {
+          emit_move_insn (temp1, offset);
+          offset = temp1;
+        }
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+    }
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset != 0)
+    {
+      rtx addr;
+
+      /* Set TEMP1 to *THIS_RTX.  */
+      emit_move_insn (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+      addr = score7_add_offset (temp1, vcall_offset);
+
+      /* Load the offset and add it to THIS_RTX.  */
+      emit_move_insn (temp1, gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+    }
+
+  /* Jump to the target function.  */
+  fnaddr = XEXP (DECL_RTL (function), 0);
+  insn = emit_call_insn (gen_sibcall_internal_score7 (fnaddr, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from alpha.c.  */
+  insn = get_insns ();
+  insn_locators_alloc ();
+  split_all_insns_noflow ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  /* Clean up the vars set above.  Note that final_end_function resets
+     the global pointer for us.  */
+  reload_completed = 0;
+}
+
+/* Copy VALUE to a register and return that register.  If new psuedos
+   are allowed, copy it into a new register, otherwise use DEST.  */
+static rtx
+score7_force_temporary (rtx dest, rtx value)
+{
+  if (can_create_pseudo_p ())
+    return force_reg (Pmode, value);
+  else
+    {
+      emit_move_insn (copy_rtx (dest), value);
+      return dest;
+    }
+}
+
+/* Return a LO_SUM expression for ADDR.  TEMP is as for score_force_temporary
+   and is used to load the high part into a register.  */
+static rtx
+score7_split_symbol (rtx temp, rtx addr)
+{
+  rtx high = score7_force_temporary (temp,
+                                     gen_rtx_HIGH (Pmode, copy_rtx (addr)));
+  return gen_rtx_LO_SUM (Pmode, high, addr);
+}
+
+/* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
+   be legitimized in a way that the generic machinery might not expect,
+   return the new address.  */
+rtx
+score7_legitimize_address (rtx x)
+{
+  enum score_symbol_type symbol_type;
+
+  if (score7_symbolic_constant_p (x, &symbol_type)
+      && symbol_type == SYMBOL_GENERAL)
+    return score7_split_symbol (0, x);
+
+  if (GET_CODE (x) == PLUS
+      && GET_CODE (XEXP (x, 1)) == CONST_INT)
+    {
+      rtx reg = XEXP (x, 0);
+      if (!score7_valid_base_register_p (reg, 0))
+        reg = copy_to_mode_reg (Pmode, reg);
+      return score7_add_offset (reg, INTVAL (XEXP (x, 1)));
+    }
+
+  return x;
+}
+
+/* Fill INFO with information about a single argument.  CUM is the
+   cumulative state for earlier arguments.  MODE is the mode of this
+   argument and TYPE is its type (if known).  NAMED is true if this
+   is a named (fixed) argument rather than a variable one.  */
+static void
+score7_classify_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                     const_tree type, bool named, struct score7_arg_info *info)
+{
+  int even_reg_p;
+  unsigned int num_words, max_regs;
+
+  even_reg_p = 0;
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      || GET_MODE_CLASS (mode) == MODE_FLOAT)
+    even_reg_p = (GET_MODE_SIZE (mode) > UNITS_PER_WORD);
+  else
+    if (type != NULL_TREE && TYPE_ALIGN (type) > BITS_PER_WORD && named)
+      even_reg_p = 1;
+
+  if (TARGET_MUST_PASS_IN_STACK (mode, type))
+    info->reg_offset = ARG_REG_NUM;
+  else
+    {
+      info->reg_offset = cum->num_gprs;
+      if (even_reg_p)
+        info->reg_offset += info->reg_offset & 1;
+    }
+
+  if (mode == BLKmode)
+    info->num_bytes = int_size_in_bytes (type);
+  else
+    info->num_bytes = GET_MODE_SIZE (mode);
+
+  num_words = (info->num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  max_regs = ARG_REG_NUM - info->reg_offset;
+
+  /* Partition the argument between registers and stack.  */
+  info->reg_words = MIN (num_words, max_regs);
+  info->stack_words = num_words - info->reg_words;
+
+  /* The alignment applied to registers is also applied to stack arguments.  */
+  if (info->stack_words)
+    {
+      info->stack_offset = cum->stack_words;
+      if (even_reg_p)
+        info->stack_offset += info->stack_offset & 1;
+    }
+}
+
+/* Set up the stack and frame (if desired) for the function.  */
+void
+score7_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  const char *fnname;
+  struct score7_frame_info *f = score7_cached_frame ();
+  HOST_WIDE_INT tsize = f->total_size;
+
+  fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent\t", file);
+      assemble_name (file, fnname);
+      fputs ("\n", file);
+    }
+  assemble_name (file, fnname);
+  fputs (":\n", file);
+
+  if (!flag_inhibit_size_directive)
+    {
+      fprintf (file,
+               "\t.frame\t%s," HOST_WIDE_INT_PRINT_DEC ",%s, %d\t\t"
+               "# vars= " HOST_WIDE_INT_PRINT_DEC ", regs= %d"
+               ", args= " HOST_WIDE_INT_PRINT_DEC
+               ", gp= " HOST_WIDE_INT_PRINT_DEC "\n",
+               (reg_names[(frame_pointer_needed)
+                ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM]),
+               tsize,
+               reg_names[RA_REGNUM],
+               current_function_is_leaf ? 1 : 0,
+               f->var_size,
+               f->num_gp,
+               f->args_size,
+               f->cprestore_size);
+
+      fprintf(file, "\t.mask\t0x%08x," HOST_WIDE_INT_PRINT_DEC "\n",
+              f->mask,
+              (f->gp_sp_offset - f->total_size));
+    }
+}
+
+/* Do any necessary cleanup after a function to restore stack, frame,
+   and regs.  */
+void
+score7_function_epilogue (FILE *file,
+                          HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  if (!flag_inhibit_size_directive)
+    {
+      const char *fnname;
+      fnname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+      fputs ("\t.end\t", file);
+      assemble_name (file, fnname);
+      fputs ("\n", file);
+    }
+}
+
+/* Returns true if X contains a SYMBOL_REF.  */
+static bool
+score7_symbolic_expression_p (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    return true;
+
+  if (GET_CODE (x) == CONST)
+    return score7_symbolic_expression_p (XEXP (x, 0));
+
+  if (UNARY_P (x))
+    return score7_symbolic_expression_p (XEXP (x, 0));
+
+  if (ARITHMETIC_P (x))
+    return (score7_symbolic_expression_p (XEXP (x, 0))
+            || score7_symbolic_expression_p (XEXP (x, 1)));
+
+  return false;
+}
+
+/* Choose the section to use for the constant rtx expression X that has
+   mode MODE.  */
+section *
+score7_select_rtx_section (enum machine_mode mode, rtx x,
+                           unsigned HOST_WIDE_INT align)
+{
+  if (GET_MODE_SIZE (mode) <= SCORE7_SDATA_MAX)
+    return get_named_section (0, ".sdata", 0);
+  else if (flag_pic && score7_symbolic_expression_p (x))
+    return get_named_section (0, ".data.rel.ro", 3);
+  else
+    return mergeable_constant_section (mode, align, 0);
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  */
+bool
+score7_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+
+  if (TREE_CODE (decl) == STRING_CST
+      || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+      name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (name, ".sdata") != 0
+          && strcmp (name, ".sbss") != 0)
+        return true;
+      if (!DECL_EXTERNAL (decl))
+        return false;
+    }
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  return (size > 0 && size <= SCORE7_SDATA_MAX);
+}
+
+/* Implement TARGET_ASM_FILE_START.  */
+void
+score7_asm_file_start (void)
+{
+  default_file_start ();
+  fprintf (asm_out_file, ASM_COMMENT_START
+           "GCC for S+core %s \n", SCORE_GCC_VERSION);
+
+  if (flag_pic)
+    fprintf (asm_out_file, "\t.set pic\n");
+}
+
+/* Implement TARGET_ASM_FILE_END.  When using assembler macros, emit
+   .externs for any small-data variables that turned out to be external.  */
+void
+score7_asm_file_end (void)
+{
+  tree name_tree;
+  struct extern_list *p;
+  if (extern_head)
+    {
+      fputs ("\n", asm_out_file);
+      for (p = extern_head; p != 0; p = p->next)
+        {
+          name_tree = get_identifier (p->name);
+          if (!TREE_ASM_WRITTEN (name_tree)
+              && TREE_SYMBOL_REFERENCED (name_tree))
+            {
+              TREE_ASM_WRITTEN (name_tree) = 1;
+              fputs ("\t.extern\t", asm_out_file);
+              assemble_name (asm_out_file, p->name);
+              fprintf (asm_out_file, ", %d\n", p->size);
+            }
+        }
+    }
+}
+
+/* Implement TARGET_OPTION_OVERRIDE hook.  */
+void
+score7_option_override (void)
+{
+  flag_pic = false;
+  score7_sdata_max = SCORE7_DEFAULT_SDATA_MAX;
+
+}
+
+/* Implement REGNO_REG_CLASS macro.  */
+int
+score7_reg_class (int regno)
+{
+  int c;
+  gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
+
+  if (regno == FRAME_POINTER_REGNUM
+      || regno == ARG_POINTER_REGNUM)
+    return ALL_REGS;
+
+  for (c = 0; c < N_REG_CLASSES; c++)
+    if (TEST_HARD_REG_BIT (reg_class_contents[c], regno))
+      return c;
+
+  return NO_REGS;
+}
+
+/* Implement PREFERRED_RELOAD_CLASS macro.  */
+enum reg_class
+score7_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, enum reg_class rclass)
+{
+  if (reg_class_subset_p (G16_REGS, rclass))
+    return G16_REGS;
+  if (reg_class_subset_p (G32_REGS, rclass))
+    return G32_REGS;
+  return rclass;
+}
+
+/* Implement SECONDARY_INPUT_RELOAD_CLASS
+   and SECONDARY_OUTPUT_RELOAD_CLASS macro.  */
+enum reg_class
+score7_secondary_reload_class (enum reg_class rclass,
+                               enum machine_mode mode ATTRIBUTE_UNUSED,
+                               rtx x)
+{
+  int regno = -1;
+  if (GET_CODE (x) == REG || GET_CODE(x) == SUBREG)
+    regno = true_regnum (x);
+
+  if (!GR_REG_CLASS_P (rclass))
+    return GP_REG_P (regno) ? NO_REGS : G32_REGS;
+  return NO_REGS;
+}
+
+
+/* Return truth value on whether or not a given hard register
+   can support a given mode.  */
+int
+score7_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  int size = GET_MODE_SIZE (mode);
+  enum mode_class mclass = GET_MODE_CLASS (mode);
+
+  if (mclass == MODE_CC)
+    return regno == CC_REGNUM;
+  else if (regno == FRAME_POINTER_REGNUM
+           || regno == ARG_POINTER_REGNUM)
+    return mclass == MODE_INT;
+  else if (GP_REG_P (regno))
+    /* ((regno <= (GP_REG_LAST- HARD_REGNO_NREGS (dummy, mode)) + 1)  */
+    return !(regno & 1) || (size <= UNITS_PER_WORD);
+  else if (CE_REG_P (regno))
+    return (mclass == MODE_INT
+            && ((size <= UNITS_PER_WORD)
+                || (regno == CE_REG_FIRST && size == 2 * UNITS_PER_WORD)));
+  else
+    return (mclass == MODE_INT) && (size <= UNITS_PER_WORD);
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
+   pointer or argument pointer.  TO is either the stack pointer or
+   hard frame pointer.  */
+HOST_WIDE_INT
+score7_initial_elimination_offset (int from,
+                                   int to ATTRIBUTE_UNUSED)
+{
+  struct score7_frame_info *f = score7_compute_frame_size (get_frame_size ());
+  switch (from)
+    {
+    case ARG_POINTER_REGNUM:
+      return f->total_size;
+    case FRAME_POINTER_REGNUM:
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE hook.  */
+void
+score7_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                             const_tree type, bool named)
+{
+  struct score7_arg_info info;
+  score7_classify_arg (cum, mode, type, named, &info);
+  cum->num_gprs = info.reg_offset + info.reg_words;
+  if (info.stack_words > 0)
+    cum->stack_words = info.stack_offset + info.stack_words;
+  cum->arg_number++;
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES macro.  */
+int
+score7_arg_partial_bytes (CUMULATIVE_ARGS *cum,
+                          enum machine_mode mode, tree type, bool named)
+{
+  struct score7_arg_info info;
+  score7_classify_arg (cum, mode, type, named, &info);
+  return info.stack_words > 0 ? info.reg_words * UNITS_PER_WORD : 0;
+}
+
+/* Implement TARGET_FUNCTION_ARG hook.  */
+rtx
+score7_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+                     const_tree type, bool named)
+{
+  struct score7_arg_info info;
+
+  if (mode == VOIDmode || !named)
+    return 0;
+
+  score7_classify_arg (cum, mode, type, named, &info);
+
+  if (info.reg_offset == ARG_REG_NUM)
+    return 0;
+
+  if (!info.stack_words)
+    return gen_rtx_REG (mode, ARG_REG_FIRST + info.reg_offset);
+  else
+    {
+      rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc (info.reg_words));
+      unsigned int i, part_offset = 0;
+      for (i = 0; i < info.reg_words; i++)
+        {
+          rtx reg;
+          reg = gen_rtx_REG (SImode, ARG_REG_FIRST + info.reg_offset + i);
+          XVECEXP (ret, 0, i) = gen_rtx_EXPR_LIST (SImode, reg,
+                                                   GEN_INT (part_offset));
+          part_offset += UNITS_PER_WORD;
+        }
+      return ret;
+    }
+}
+
+/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
+   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
+   VALTYPE is null and MODE is the mode of the return value.  */
+rtx
+score7_function_value (const_tree valtype, const_tree func,
+		       enum machine_mode mode)
+{
+  if (valtype)
+    {
+      int unsignedp;
+      mode = TYPE_MODE (valtype);
+      unsignedp = TYPE_UNSIGNED (valtype);
+      mode = promote_function_mode (valtype, mode, &unsignedp, func, 1);
+    }
+  return gen_rtx_REG (mode, RT_REGNUM);
+}
+
+/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+void
+score7_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\t.set r1\n");
+  fprintf (f, "\tmv r31, r3\n");
+  fprintf (f, "\tbl nextinsn\n");
+  fprintf (f, "nextinsn:\n");
+  fprintf (f, "\tlw r1, [r3, 6*4-8]\n");
+  fprintf (f, "\tlw r23, [r3, 6*4-4]\n");
+  fprintf (f, "\tmv r3, r31\n");
+  fprintf (f, "\tbr! r1\n");
+  fprintf (f, "\tnop!\n");
+  fprintf (f, "\t.set nor1\n");
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+void
+score7_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+#define CODE_SIZE        (TRAMPOLINE_INSNS * UNITS_PER_WORD)
+
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx addr = XEXP (m_tramp, 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, CODE_SIZE);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, SImode, CODE_SIZE + GET_MODE_SIZE (SImode));
+  emit_move_insn (mem, chain_value);
+
+#undef CODE_SIZE
+}
+
+/* This function is used to implement REG_MODE_OK_FOR_BASE_P macro.  */
+int
+score7_regno_mode_ok_for_base_p (int regno, int strict)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!strict)
+        return 1;
+      regno = reg_renumber[regno];
+    }
+  if (regno == ARG_POINTER_REGNUM
+      || regno == FRAME_POINTER_REGNUM)
+    return 1;
+  return GP_REG_P (regno);
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P macro.  */
+bool
+score7_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  struct score7_address_info addr;
+
+  return score7_classify_address (&addr, mode, x, strict);
+}
+
+/* Return a number assessing the cost of moving a register in class
+   FROM to class TO. */
+int
+score7_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                           enum reg_class from, enum reg_class to)
+{
+  if (GR_REG_CLASS_P (from))
+    {
+      if (GR_REG_CLASS_P (to))
+        return 2;
+      else if (SP_REG_CLASS_P (to))
+        return 4;
+      else if (CP_REG_CLASS_P (to))
+        return 5;
+      else if (CE_REG_CLASS_P (to))
+        return 6;
+    }
+  if (GR_REG_CLASS_P (to))
+    {
+      if (GR_REG_CLASS_P (from))
+        return 2;
+      else if (SP_REG_CLASS_P (from))
+        return 4;
+      else if (CP_REG_CLASS_P (from))
+        return 5;
+      else if (CE_REG_CLASS_P (from))
+        return 6;
+    }
+  return 12;
+}
+
+/* Return the number of instructions needed to load a symbol of the
+   given type into a register.  */
+static int
+score7_symbol_insns (enum score_symbol_type type)
+{
+  switch (type)
+    {
+    case SYMBOL_GENERAL:
+      return 2;
+
+    case SYMBOL_SMALL_DATA:
+      return 1;
+    }
+
+  gcc_unreachable ();
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at X.  Return 0 if X isn't valid for MODE.  */
+static int
+score7_address_insns (rtx x, enum machine_mode mode)
+{
+  struct score7_address_info addr;
+  int factor;
+
+  if (mode == BLKmode)
+    factor = 1;
+  else
+    factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (score7_classify_address (&addr, mode, x, false))
+    switch (addr.type)
+      {
+      case SCORE7_ADD_REG:
+      case SCORE7_ADD_CONST_INT:
+        return factor;
+
+      case SCORE7_ADD_SYMBOLIC:
+        return factor * score7_symbol_insns (addr.symbol_type);
+      }
+  return 0;
+}
+
+/* Implement TARGET_RTX_COSTS macro.  */
+bool
+score7_rtx_costs (rtx x, int code, int outer_code, int *total,
+		  bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (outer_code == SET)
+        {
+          if (((INTVAL (x) & 0xffff) == 0) 
+              || (INTVAL (x) >= -32768 && INTVAL (x) <= 32767))
+            *total = COSTS_N_INSNS (1);
+          else
+            *total = COSTS_N_INSNS (2);
+        }
+      else if (outer_code == PLUS || outer_code == MINUS)
+        {
+          if (INTVAL (x) >= -8192 && INTVAL (x) <= 8191)
+            *total = 0;
+          else if (((INTVAL (x) & 0xffff) == 0)
+                   || (INTVAL (x) >= -32768 && INTVAL (x) <= 32767))
+            *total = 1;
+          else
+            *total = COSTS_N_INSNS (2);
+        }
+      else if (outer_code == AND || outer_code == IOR)
+        {
+          if (INTVAL (x) >= 0 && INTVAL (x) <= 16383)
+            *total = 0;
+          else if (((INTVAL (x) & 0xffff) == 0)
+                   || (INTVAL (x) >= 0 && INTVAL (x) <= 65535))
+            *total = 1;
+          else
+            *total = COSTS_N_INSNS (2);
+        }
+      else
+        {
+          *total = 0;
+        }
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case MEM:
+      {
+        /* If the address is legitimate, return the number of
+           instructions it needs, otherwise use the default handling.  */
+        int n = score7_address_insns (XEXP (x, 0), GET_MODE (x));
+        if (n > 0)
+          {
+            *total = COSTS_N_INSNS (n + 1);
+            return true;
+          }
+        return false;
+      }
+
+    case FFS:
+      *total = COSTS_N_INSNS (6);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS (2);
+          return true;
+        }
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS ((GET_CODE (XEXP (x, 1)) == CONST_INT)
+                                  ? 4 : 12);
+          return true;
+        }
+      return false;
+
+    case ABS:
+      *total = COSTS_N_INSNS (4);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS (4);
+          return true;
+        }
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case NEG:
+      if (mode == DImode)
+        {
+          *total = COSTS_N_INSNS (4);
+          return true;
+        }
+      return false;
+
+    case MULT:
+      *total = optimize_size ? COSTS_N_INSNS (2) : COSTS_N_INSNS (12);
+      return true;
+
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+      *total = optimize_size ? COSTS_N_INSNS (2) : COSTS_N_INSNS (33);
+      return true;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      switch (GET_MODE (XEXP (x, 0)))
+        {
+        case QImode:
+        case HImode:
+          if (GET_CODE (XEXP (x, 0)) == MEM)
+            {
+              *total = COSTS_N_INSNS (2);
+
+              if (!TARGET_LITTLE_ENDIAN &&
+                  side_effects_p (XEXP (XEXP (x, 0), 0)))
+                *total = 100;
+            }
+          else
+            *total = COSTS_N_INSNS (1);
+          break;
+
+        default:
+          *total = COSTS_N_INSNS (1);
+          break;
+        }
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_ADDRESS_COST macro.  */
+int
+score7_address_cost (rtx addr)
+{
+  return score7_address_insns (addr, SImode);
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL macro.  */
+int
+score7_output_external (FILE *file ATTRIBUTE_UNUSED,
+                        tree decl, const char *name)
+{
+  register struct extern_list *p;
+
+  if (score7_in_small_data_p (decl))
+    {
+      p = ggc_alloc_extern_list ();
+      p->next = extern_head;
+      p->name = name;
+      p->size = int_size_in_bytes (TREE_TYPE (decl));
+      extern_head = p;
+    }
+  return 0;
+}
+
+/* Implement RETURN_ADDR_RTX.  Note, we do not support moving
+   back to a previous frame.  */
+rtx
+score7_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+  return get_hard_reg_initial_val (Pmode, RA_REGNUM);
+}
+
+/* Implement PRINT_OPERAND macro.  */
+/* Score-specific operand codes:
+   '['        print .set nor1 directive
+   ']'        print .set r1 directive
+   'U'        print hi part of a CONST_INT rtx
+   'E'        print log2(v)
+   'F'        print log2(~v)
+   'D'        print SFmode const double
+   'S'        selectively print "!" if operand is 15bit instruction accessible
+   'V'        print "v!" if operand is 15bit instruction accessible, or "lfh!"
+   'L'        low  part of DImode reg operand
+   'H'        high part of DImode reg operand
+   'C'        print part of opcode for a branch condition.  */
+void
+score7_print_operand (FILE *file, rtx op, int c)
+{
+  enum rtx_code code = UNKNOWN;
+  if (!PRINT_OPERAND_PUNCT_VALID_P (c))
+    code = GET_CODE (op);
+
+  if (c == '[')
+    {
+      fprintf (file, ".set r1\n");
+    }
+  else if (c == ']')
+    {
+      fprintf (file, "\n\t.set nor1");
+    }
+  else if (c == 'U')
+    {
+      gcc_assert (code == CONST_INT);
+      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
+               (INTVAL (op) >> 16) & 0xffff);
+    }
+  else if (c == 'D')
+    {
+      if (GET_CODE (op) == CONST_DOUBLE)
+        {
+          rtx temp = gen_lowpart (SImode, op);
+          gcc_assert (GET_MODE (op) == SFmode);
+          fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (temp) & 0xffffffff);
+        }
+      else
+        output_addr_const (file, op);
+    }
+  else if (c == 'S')
+    {
+      gcc_assert (code == REG);
+      if (G16_REG_P (REGNO (op)))
+        fprintf (file, "!");
+    }
+  else if (c == 'V')
+    {
+      gcc_assert (code == REG);
+      fprintf (file, G16_REG_P (REGNO (op)) ? "v!" : "lfh!");
+    }
+  else if (c == 'C')
+    {
+      enum machine_mode mode = GET_MODE (XEXP (op, 0));
+
+      switch (code)
+        {
+        case EQ: fputs ("eq", file); break;
+        case NE: fputs ("ne", file); break;
+        case GT: fputs ("gt", file); break;
+        case GE: fputs (mode != CCmode ? "pl" : "ge", file); break;
+        case LT: fputs (mode != CCmode ? "mi" : "lt", file); break;
+        case LE: fputs ("le", file); break;
+        case GTU: fputs ("gtu", file); break;
+        case GEU: fputs ("cs", file); break;
+        case LTU: fputs ("cc", file); break;
+        case LEU: fputs ("leu", file); break;
+        default:
+          output_operand_lossage ("invalid operand for code: '%c'", code);
+        }
+    }
+  else if (c == 'E')
+    {
+      unsigned HOST_WIDE_INT i;
+      unsigned HOST_WIDE_INT pow2mask = 1;
+      unsigned HOST_WIDE_INT val;
+
+      val = INTVAL (op);
+      for (i = 0; i < 32; i++)
+        {
+          if (val == pow2mask)
+            break;
+          pow2mask <<= 1;
+        }
+      gcc_assert (i < 32);
+      fprintf (file, HOST_WIDE_INT_PRINT_HEX, i);
+    }
+  else if (c == 'F')
+    {
+      unsigned HOST_WIDE_INT i;
+      unsigned HOST_WIDE_INT pow2mask = 1;
+      unsigned HOST_WIDE_INT val;
+
+      val = ~INTVAL (op);
+      for (i = 0; i < 32; i++)
+        {
+          if (val == pow2mask)
+            break;
+          pow2mask <<= 1;
+        }
+      gcc_assert (i < 32);
+      fprintf (file, HOST_WIDE_INT_PRINT_HEX, i);
+    }
+  else if (code == REG)
+    {
+      int regnum = REGNO (op);
+      if ((c == 'H' && !WORDS_BIG_ENDIAN)
+          || (c == 'L' && WORDS_BIG_ENDIAN))
+        regnum ++;
+      fprintf (file, "%s", reg_names[regnum]);
+    }
+  else
+    {
+      switch (code)
+        {
+        case MEM:
+          score7_print_operand_address (file, op);
+          break;
+        default:
+          output_addr_const (file, op);
+        }
+    }
+}
+
+/* Implement PRINT_OPERAND_ADDRESS macro.  */
+void
+score7_print_operand_address (FILE *file, rtx x)
+{
+  struct score7_address_info addr;
+  enum rtx_code code = GET_CODE (x);
+  enum machine_mode mode = GET_MODE (x);
+
+  if (code == MEM)
+    x = XEXP (x, 0);
+
+  if (score7_classify_address (&addr, mode, x, true))
+    {
+      switch (addr.type)
+        {
+        case SCORE7_ADD_REG:
+          {
+            switch (addr.code)
+              {
+              case PRE_DEC:
+                fprintf (file, "[%s,-%ld]+", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              case POST_DEC:
+                fprintf (file, "[%s]+,-%ld", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              case PRE_INC:
+                fprintf (file, "[%s, %ld]+", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              case POST_INC:
+                fprintf (file, "[%s]+, %ld", reg_names[REGNO (addr.reg)],
+                         INTVAL (addr.offset));
+                break;
+              default:
+                if (INTVAL(addr.offset) == 0)
+                  fprintf(file, "[%s]", reg_names[REGNO (addr.reg)]);
+                else
+                  fprintf(file, "[%s, %ld]", reg_names[REGNO (addr.reg)],
+                          INTVAL(addr.offset));
+                break;
+              }
+          }
+          return;
+        case SCORE7_ADD_CONST_INT:
+        case SCORE7_ADD_SYMBOLIC:
+          output_addr_const (file, x);
+          return;
+        }
+    }
+  print_rtl (stderr, x);
+  gcc_unreachable ();
+}
+
+/* Implement SELECT_CC_MODE macro.  */
+enum machine_mode
+score7_select_cc_mode (enum rtx_code op, rtx x, rtx y)
+{
+  if ((op == EQ || op == NE || op == LT || op == GE)
+      && y == const0_rtx
+      && GET_MODE (x) == SImode)
+    {
+      switch (GET_CODE (x))
+        {
+        case PLUS:
+        case MINUS:
+        case NEG:
+        case AND:
+        case IOR:
+        case XOR:
+        case NOT:
+        case ASHIFT:
+        case LSHIFTRT:
+        case ASHIFTRT:
+          return CC_NZmode;
+
+        case SIGN_EXTEND:
+        case ZERO_EXTEND:
+        case ROTATE:
+        case ROTATERT:
+          return (op == LT || op == GE) ? CC_Nmode : CCmode;
+
+        default:
+          return CCmode;
+        }
+    }
+
+  if ((op == EQ || op == NE)
+      && (GET_CODE (y) == NEG)
+      && register_operand (XEXP (y, 0), SImode)
+      && register_operand (x, SImode))
+    {
+      return CC_NZmode;
+    }
+
+  return CCmode;
+}
+
+/* Generate the prologue instructions for entry into a S+core function.  */
+void
+score7_prologue (void)
+{
+#define EMIT_PL(_rtx)        RTX_FRAME_RELATED_P (_rtx) = 1
+
+  struct score7_frame_info *f = score7_compute_frame_size (get_frame_size ());
+  HOST_WIDE_INT size;
+  int regno;
+
+  size = f->total_size - f->gp_reg_size;
+
+  if (flag_pic)
+    emit_insn (gen_cpload_score7 ());
+
+  for (regno = (int) GP_REG_LAST; regno >= (int) GP_REG_FIRST; regno--)
+    {
+      if (BITSET_P (f->mask, regno - GP_REG_FIRST))
+        {
+          rtx mem = gen_rtx_MEM (SImode,
+                                 gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
+          rtx reg = gen_rtx_REG (SImode, regno);
+          if (!crtl->calls_eh_return)
+            MEM_READONLY_P (mem) = 1;
+          EMIT_PL (emit_insn (gen_pushsi_score7 (mem, reg)));
+        }
+    }
+
+  if (size > 0)
+    {
+      rtx insn;
+
+      if (size >= -32768 && size <= 32767)
+        EMIT_PL (emit_insn (gen_add3_insn (stack_pointer_rtx,
+                                           stack_pointer_rtx,
+                                           GEN_INT (-size))));
+      else
+        {
+          EMIT_PL (emit_move_insn (gen_rtx_REG (Pmode, SCORE7_PROLOGUE_TEMP_REGNUM),
+                                   GEN_INT (size)));
+          EMIT_PL (emit_insn
+                   (gen_sub3_insn (stack_pointer_rtx,
+                                   stack_pointer_rtx,
+                                   gen_rtx_REG (Pmode,
+                                                SCORE7_PROLOGUE_TEMP_REGNUM))));
+        }
+      insn = get_last_insn ();
+      REG_NOTES (insn) =
+        alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+                         gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+                                      plus_constant (stack_pointer_rtx,
+                                                     -size)),
+                                      REG_NOTES (insn));
+    }
+
+  if (frame_pointer_needed)
+    EMIT_PL (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+  if (flag_pic && f->cprestore_size)
+    {
+      if (frame_pointer_needed)
+        emit_insn (gen_cprestore_use_fp_score7 (GEN_INT (size - f->cprestore_size)));
+      else
+        emit_insn (gen_cprestore_use_sp_score7 (GEN_INT (size - f->cprestore_size)));
+    }
+
+#undef EMIT_PL
+}
+
+/* Generate the epilogue instructions in a S+core function.  */
+void
+score7_epilogue (int sibcall_p)
+{
+  struct score7_frame_info *f = score7_compute_frame_size (get_frame_size ());
+  HOST_WIDE_INT size;
+  int regno;
+  rtx base;
+
+  size = f->total_size - f->gp_reg_size;
+
+  if (!frame_pointer_needed)
+    base = stack_pointer_rtx;
+  else
+    base = hard_frame_pointer_rtx;
+
+  if (size)
+    {
+      if (size >= -32768 && size <= 32767)
+        emit_insn (gen_add3_insn (base, base, GEN_INT (size)));
+      else
+        {
+          emit_move_insn (gen_rtx_REG (Pmode, SCORE7_EPILOGUE_TEMP_REGNUM),
+                          GEN_INT (size));
+          emit_insn (gen_add3_insn (base, base,
+                                    gen_rtx_REG (Pmode,
+                                                 SCORE7_EPILOGUE_TEMP_REGNUM)));
+        }
+    }
+
+  if (base != stack_pointer_rtx)
+    emit_move_insn (stack_pointer_rtx, base);
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_add3_insn (stack_pointer_rtx,
+                              stack_pointer_rtx,
+                              EH_RETURN_STACKADJ_RTX));
+
+  for (regno = (int) GP_REG_FIRST; regno <= (int) GP_REG_LAST; regno++)
+    {
+      if (BITSET_P (f->mask, regno - GP_REG_FIRST))
+        {
+          rtx mem = gen_rtx_MEM (SImode,
+                                 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
+          rtx reg = gen_rtx_REG (SImode, regno);
+
+          if (!crtl->calls_eh_return)
+            MEM_READONLY_P (mem) = 1;
+
+          emit_insn (gen_popsi_score7 (reg, mem));
+        }
+    }
+
+  if (!sibcall_p)
+    emit_jump_insn (gen_return_internal_score7 (gen_rtx_REG (Pmode, RA_REGNUM)));
+}
+
+/* Return true if X is a symbolic constant that can be calculated in
+   the same way as a bare symbol.  If it is, store the type of the
+   symbol in *SYMBOL_TYPE.  */
+int
+score7_symbolic_constant_p (rtx x, enum score_symbol_type *symbol_type)
+{
+  HOST_WIDE_INT offset;
+
+  score7_split_const (x, &x, &offset);
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    *symbol_type = score7_classify_symbol (x);
+  else
+    return 0;
+
+  if (offset == 0)
+    return 1;
+
+  /* if offset > 15bit, must reload  */
+  if (!IMM_IN_RANGE (offset, 15, 1))
+    return 0;
+
+  switch (*symbol_type)
+    {
+    case SYMBOL_GENERAL:
+      return 1;
+    case SYMBOL_SMALL_DATA:
+      return score7_offset_within_object_p (x, offset);
+    }
+  gcc_unreachable ();
+}
+
+void
+score7_movsicc (rtx *ops)
+{
+  enum machine_mode mode;
+
+  mode = score7_select_cc_mode (GET_CODE (ops[1]), ops[2], ops[3]);
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, CC_REGNUM),
+                          gen_rtx_COMPARE (mode, XEXP (ops[1], 0),
+					   XEXP (ops[1], 1))));
+}
+
+/* Call and sibcall pattern all need call this function.  */
+void
+score7_call (rtx *ops, bool sib)
+{
+  rtx addr = XEXP (ops[0], 0);
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      rtx oaddr = addr;
+      addr = gen_reg_rtx (Pmode);
+      gen_move_insn (addr, oaddr);
+    }
+
+  if (sib)
+    emit_call_insn (gen_sibcall_internal_score7 (addr, ops[1]));
+  else
+    emit_call_insn (gen_call_internal_score7 (addr, ops[1]));
+}
+
+/* Call value and sibcall value pattern all need call this function.  */
+void
+score7_call_value (rtx *ops, bool sib)
+{
+  rtx result = ops[0];
+  rtx addr = XEXP (ops[1], 0);
+  rtx arg = ops[2];
+
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      rtx oaddr = addr;
+      addr = gen_reg_rtx (Pmode);
+      gen_move_insn (addr, oaddr);
+    }
+
+  if (sib)
+    emit_call_insn (gen_sibcall_value_internal_score7 (result, addr, arg));
+  else
+    emit_call_insn (gen_call_value_internal_score7 (result, addr, arg));
+}
+
+/* Machine Split  */
+void
+score7_movdi (rtx *ops)
+{
+  rtx dst = ops[0];
+  rtx src = ops[1];
+  rtx dst0 = score7_subw (dst, 0);
+  rtx dst1 = score7_subw (dst, 1);
+  rtx src0 = score7_subw (src, 0);
+  rtx src1 = score7_subw (src, 1);
+
+  if (GET_CODE (dst0) == REG && reg_overlap_mentioned_p (dst0, src))
+    {
+      emit_move_insn (dst1, src1);
+      emit_move_insn (dst0, src0);
+    }
+  else
+    {
+      emit_move_insn (dst0, src0);
+      emit_move_insn (dst1, src1);
+    }
+}
+
+void
+score7_zero_extract_andi (rtx *ops)
+{
+  if (INTVAL (ops[1]) == 1 && const_uimm5 (ops[2], SImode))
+    emit_insn (gen_zero_extract_bittst_score7 (ops[0], ops[2]));
+  else
+    {
+      unsigned HOST_WIDE_INT mask;
+      mask = (0xffffffffU & ((1U << INTVAL (ops[1])) - 1U));
+      mask = mask << INTVAL (ops[2]);
+      emit_insn (gen_andsi3_cmp_score7 (ops[3], ops[0],
+                                 gen_int_mode (mask, SImode)));
+    }
+}
+
+/* Check addr could be present as PRE/POST mode.  */
+static bool
+score7_pindex_mem (rtx addr)
+{
+  if (GET_CODE (addr) == MEM)
+    {
+      switch (GET_CODE (XEXP (addr, 0)))
+        {
+        case PRE_DEC:
+        case POST_DEC:
+        case PRE_INC:
+        case POST_INC:
+          return true;
+        default:
+          break;
+        }
+    }
+  return false;
+}
+
+/* Output asm code for ld/sw insn.  */
+static int
+score7_pr_addr_post (rtx *ops, int idata, int iaddr, char *ip, enum score_mem_unit unit)
+{
+  struct score7_address_info ai;
+
+  gcc_assert (GET_CODE (ops[idata]) == REG);
+  gcc_assert (score7_classify_address (&ai, SImode, XEXP (ops[iaddr], 0), true));
+
+  if (!score7_pindex_mem (ops[iaddr])
+      && ai.type == SCORE7_ADD_REG
+      && GET_CODE (ai.offset) == CONST_INT
+      && G16_REG_P (REGNO (ops[idata]))
+      && G16_REG_P (REGNO (ai.reg)))
+    {
+      if (INTVAL (ai.offset) == 0)
+        {
+          ops[iaddr] = ai.reg;
+          return snprintf (ip, INS_BUF_SZ,
+                           "!\t%%%d, [%%%d]", idata, iaddr);
+        }
+      if (REGNO (ai.reg) == HARD_FRAME_POINTER_REGNUM)
+        {
+          HOST_WIDE_INT offset = INTVAL (ai.offset);
+          if (SCORE_ALIGN_UNIT (offset, unit)
+              && (((offset >> unit) >= 0) && ((offset >> unit) <= 31)))
+            {
+              ops[iaddr] = ai.offset;
+              return snprintf (ip, INS_BUF_SZ,
+                               "p!\t%%%d, %%c%d", idata, iaddr);
+            }
+        }
+    }
+  return snprintf (ip, INS_BUF_SZ, "\t%%%d, %%a%d", idata, iaddr);
+}
+
+/* Output asm insn for load.  */
+const char *
+score7_linsn (rtx *ops, enum score_mem_unit unit, bool sign)
+{
+  const char *pre_ins[] =
+    {"lbu", "lhu", "lw", "??", "lb", "lh", "lw", "??"};
+  char *ip;
+
+  strcpy (score7_ins, pre_ins[(sign ? 4 : 0) + unit]);
+  ip = score7_ins + strlen (score7_ins);
+
+  if ((!sign && unit != SCORE_HWORD)
+      || (sign && unit != SCORE_BYTE))
+    score7_pr_addr_post (ops, 0, 1, ip, unit);
+  else
+    snprintf (ip, INS_BUF_SZ, "\t%%0, %%a1");
+
+  return score7_ins;
+}
+
+/* Output asm insn for store.  */
+const char *
+score7_sinsn (rtx *ops, enum score_mem_unit unit)
+{
+  const char *pre_ins[] = {"sb", "sh", "sw"};
+  char *ip;
+
+  strcpy (score7_ins, pre_ins[unit]);
+  ip = score7_ins + strlen (score7_ins);
+  score7_pr_addr_post (ops, 1, 0, ip, unit);
+  return score7_ins;
+}
+
+/* Output asm insn for load immediate.  */
+const char *
+score7_limm (rtx *ops)
+{
+  HOST_WIDE_INT v;
+
+  gcc_assert (GET_CODE (ops[0]) == REG);
+  gcc_assert (GET_CODE (ops[1]) == CONST_INT);
+
+  v = INTVAL (ops[1]);
+  if (G16_REG_P (REGNO (ops[0])) && IMM_IN_RANGE (v, 8, 0))
+    return "ldiu!\t%0, %c1";
+  else if (IMM_IN_RANGE (v, 16, 1))
+    return "ldi\t%0, %c1";
+  else if ((v & 0xffff) == 0)
+    return "ldis\t%0, %U1";
+  else
+    return "li\t%0, %c1";
+}
+
+/* Output asm insn for move.  */
+const char *
+score7_move (rtx *ops)
+{
+  gcc_assert (GET_CODE (ops[0]) == REG);
+  gcc_assert (GET_CODE (ops[1]) == REG);
+
+  if (G16_REG_P (REGNO (ops[0])))
+    {
+      if (G16_REG_P (REGNO (ops[1])))
+        return "mv!\t%0, %1";
+      else
+        return "mlfh!\t%0, %1";
+    }
+  else if (G16_REG_P (REGNO (ops[1])))
+    return "mhfl!\t%0, %1";
+  else
+    return "mv\t%0, %1";
+}
+
+/* Generate add insn.  */
+const char *
+score7_select_add_imm (rtx *ops, bool set_cc)
+{
+  HOST_WIDE_INT v = INTVAL (ops[2]);
+
+  gcc_assert (GET_CODE (ops[2]) == CONST_INT);
+  gcc_assert (REGNO (ops[0]) == REGNO (ops[1]));
+
+  if (set_cc && G16_REG_P (REGNO (ops[0])))
+    {
+      if (v > 0 && IMM_IS_POW_OF_2 ((unsigned HOST_WIDE_INT) v, 0, 15))
+        {
+          ops[2] = GEN_INT (ffs (v) - 1);
+          return "addei!\t%0, %c2";
+        }
+
+      if (v < 0 && IMM_IS_POW_OF_2 ((unsigned HOST_WIDE_INT) (-v), 0, 15))
+        {
+          ops[2] = GEN_INT (ffs (-v) - 1);
+          return "subei!\t%0, %c2";
+        }
+    }
+
+  if (set_cc)
+    return "addi.c\t%0, %c2";
+  else
+    return "addi\t%0, %c2";
+}
+
+/* Output arith insn.  */
+const char *
+score7_select (rtx *ops, const char *inst_pre,
+               bool commu, const char *letter, bool set_cc)
+{
+  gcc_assert (GET_CODE (ops[0]) == REG);
+  gcc_assert (GET_CODE (ops[1]) == REG);
+
+  if (set_cc && G16_REG_P (REGNO (ops[0]))
+      && (GET_CODE (ops[2]) == REG ? G16_REG_P (REGNO (ops[2])) : 1)
+      && REGNO (ops[0]) == REGNO (ops[1]))
+    {
+      snprintf (score7_ins, INS_BUF_SZ, "%s!\t%%0, %%%s2", inst_pre, letter);
+      return score7_ins;
+    }
+
+  if (commu && set_cc && G16_REG_P (REGNO (ops[0]))
+      && G16_REG_P (REGNO (ops[1]))
+      && REGNO (ops[0]) == REGNO (ops[2]))
+    {
+      gcc_assert (GET_CODE (ops[2]) == REG);
+      snprintf (score7_ins, INS_BUF_SZ, "%s!\t%%0, %%%s1", inst_pre, letter);
+      return score7_ins;
+    }
+
+  if (set_cc)
+    snprintf (score7_ins, INS_BUF_SZ, "%s.c\t%%0, %%1, %%%s2", inst_pre, letter);
+  else
+    snprintf (score7_ins, INS_BUF_SZ, "%s\t%%0, %%1, %%%s2", inst_pre, letter);
+  return score7_ins;
+}
+
diff --git a/gcc/config/score/score7.h b/gcc/config/score/score7.h
new file mode 100644
index 000000000..71654aae1
--- /dev/null
+++ b/gcc/config/score/score7.h
@@ -0,0 +1,158 @@
+/* score7.h for Sunplus S+CORE processor
+   Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Sunnorth
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SCORE7_H
+#define GCC_SCORE7_H
+
+enum score7_address_type
+{
+  SCORE7_ADD_REG,
+  SCORE7_ADD_CONST_INT,
+  SCORE7_ADD_SYMBOLIC
+};
+
+struct score7_frame_info
+{
+  HOST_WIDE_INT total_size;       /* bytes that the entire frame takes up  */
+  HOST_WIDE_INT var_size;         /* bytes that variables take up  */
+  HOST_WIDE_INT args_size;        /* bytes that outgoing arguments take up  */
+  HOST_WIDE_INT gp_reg_size;      /* bytes needed to store gp regs  */
+  HOST_WIDE_INT gp_sp_offset;     /* offset from new sp to store gp registers  */
+  HOST_WIDE_INT cprestore_size;   /* # bytes that the .cprestore slot takes up  */
+  unsigned int  mask;             /* mask of saved gp registers  */
+  int num_gp;                     /* number of gp registers saved  */
+};
+
+struct score7_arg_info
+{
+  unsigned int num_bytes;     /* The argument's size in bytes  */
+  unsigned int reg_words;     /* The number of words passed in registers  */
+  unsigned int reg_offset;    /* The offset of the first register from  */
+                              /* GP_ARG_FIRST or FP_ARG_FIRST etc  */
+  unsigned int stack_words;   /* The number of words that must be passed  */
+                              /* on the stack  */
+  unsigned int stack_offset;  /* The offset from the start of the stack  */
+                              /* overflow area  */
+};
+
+#ifdef RTX_CODE
+struct score7_address_info
+{
+  enum score7_address_type type;
+  rtx reg;
+  rtx offset;
+  enum rtx_code code;
+  enum score_symbol_type symbol_type;
+};
+#endif
+
+#define SCORE7_SDATA_MAX                score7_sdata_max
+#define SCORE7_STACK_ALIGN(LOC)         (((LOC) + 3) & ~3)
+#define SCORE7_PROLOGUE_TEMP_REGNUM     (GP_REG_FIRST + 8)
+#define SCORE7_EPILOGUE_TEMP_REGNUM     (GP_REG_FIRST + 8)
+#define SCORE7_DEFAULT_SDATA_MAX        8
+
+extern int score7_symbolic_constant_p (rtx x,
+                                       enum score_symbol_type *symbol_type);
+extern bool score7_return_in_memory (const_tree type,
+                                     const_tree fndecl ATTRIBUTE_UNUSED);
+extern void score7_output_mi_thunk (FILE *file,
+                                    tree thunk_fndecl ATTRIBUTE_UNUSED,
+                                    HOST_WIDE_INT delta,
+                                    HOST_WIDE_INT vcall_offset,
+                                    tree function);
+extern rtx score7_legitimize_address (rtx x);
+extern void
+score7_function_prologue (FILE *file,
+                          HOST_WIDE_INT size ATTRIBUTE_UNUSED);
+extern void
+score7_function_epilogue (FILE *file,
+                          HOST_WIDE_INT size ATTRIBUTE_UNUSED);
+extern section *score7_select_rtx_section (enum machine_mode mode, rtx x,
+                                           unsigned HOST_WIDE_INT align);
+extern bool score7_in_small_data_p (const_tree decl);
+extern void score7_asm_file_start (void);
+extern void score7_asm_file_end (void);
+extern void score7_option_override (void);
+extern int score7_reg_class (int regno);
+extern enum reg_class score7_preferred_reload_class (rtx x ATTRIBUTE_UNUSED,
+                                                     enum reg_class rclass);
+extern enum
+reg_class score7_secondary_reload_class (enum reg_class rclass,
+                                         enum machine_mode mode ATTRIBUTE_UNUSED,
+                                         rtx x);
+extern int score7_const_ok_for_letter_p (HOST_WIDE_INT value, char c);
+extern int score7_extra_constraint (rtx op, char c);
+extern int score7_hard_regno_mode_ok (unsigned int regno,
+                                      enum machine_mode mode);
+extern HOST_WIDE_INT
+score7_initial_elimination_offset (int from,
+                                   int to ATTRIBUTE_UNUSED);
+extern void score7_function_arg_advance (CUMULATIVE_ARGS *cum,
+                                         enum machine_mode mode,
+                                         const_tree type,
+                                         bool named);
+extern int score7_arg_partial_bytes (CUMULATIVE_ARGS *cum,
+                                     enum machine_mode mode,
+                                     tree type,
+                                     bool named);
+extern rtx score7_function_arg (const CUMULATIVE_ARGS *cum,
+                                enum machine_mode mode,
+                                const_tree type,
+                                bool named);
+extern rtx score7_function_value (const_tree valtype,
+                                  const_tree func ATTRIBUTE_UNUSED,
+                                  enum machine_mode mode);
+extern void score7_asm_trampoline_template (FILE *);
+extern void score7_trampoline_init (rtx, tree, rtx);
+extern int score7_regno_mode_ok_for_base_p (int regno, int strict);
+extern bool score7_legitimate_address_p (enum machine_mode mode, rtx x,
+					 bool strict);
+extern int score7_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+                                      enum reg_class from,
+                                      enum reg_class to);
+extern bool score7_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed);
+extern int score7_address_cost (rtx addr);
+extern int score7_output_external (FILE *file ATTRIBUTE_UNUSED,
+                                   tree decl,
+                                   const char *name);
+extern rtx score7_return_addr (int count, rtx frame ATTRIBUTE_UNUSED);
+extern void score7_print_operand (FILE *file, rtx op, int c);
+extern void score7_print_operand_address (FILE *file, rtx x);
+extern enum machine_mode score7_select_cc_mode (enum rtx_code op,
+                                                rtx x,
+                                                rtx y);
+extern void score7_prologue (void);
+extern void score7_epilogue (int sibcall_p);
+extern void score7_call (rtx *ops, bool sib);
+extern void score7_call_value (rtx *ops, bool sib);
+extern void score7_movsicc (rtx *ops);
+extern void score7_movdi (rtx *ops);
+extern void score7_zero_extract_andi (rtx *ops);
+extern const char * score7_select_add_imm (rtx *ops, bool set_cc);
+extern const char * score7_select (rtx *ops, const char *inst_pre, bool commu,
+                                   const char *letter, bool set_cc);
+extern const char * score7_move (rtx *ops);
+extern const char * score7_limm (rtx *ops);
+extern const char *
+score7_linsn (rtx *ops, enum score_mem_unit unit, bool sign);
+extern const char *
+score7_sinsn (rtx *ops, enum score_mem_unit unit);
+#endif
diff --git a/gcc/config/score/sfp-machine.h b/gcc/config/score/sfp-machine.h
new file mode 100644
index 000000000..98f9f1bf4
--- /dev/null
+++ b/gcc/config/score/sfp-machine.h
@@ -0,0 +1,57 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+/* The type of the result of a floating point comparison.  This must
+   match `__libgcc_cmp_return__' in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define	__LITTLE_ENDIAN	1234
+#define	__BIG_ENDIAN	4321
+
+# define __BYTE_ORDER __BIG_ENDIAN
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+
diff --git a/gcc/config/score/t-score-elf b/gcc/config/score/t-score-elf
new file mode 100644
index 000000000..f02c482f5
--- /dev/null
+++ b/gcc/config/score/t-score-elf
@@ -0,0 +1,33 @@
+# Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Additional Backend Files
+score7.o: $(srcdir)/config/score/score7.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h $(TM_H) $(RTL_H) output.h flags.h $(TREE_H) \
+  expr.h toplev.h $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/score/score7.c
+
+# Assemble startup files.
+$(T)crti.o: $(srcdir)/config/score/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/score/crti.asm
+
+$(T)crtn.o: $(srcdir)/config/score/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/score/crtn.asm
diff --git a/gcc/config/score/t-score-softfp b/gcc/config/score/t-score-softfp
new file mode 100644
index 000000000..b658ef89b
--- /dev/null
+++ b/gcc/config/score/t-score-softfp
@@ -0,0 +1,9 @@
+softfp_float_modes := sf df
+softfp_int_modes := si di
+softfp_extensions := sfdf
+softfp_truncations := dfsf
+softfp_machine_header := score/sfp-machine.h
+softfp_exclude_libgcc2 := y
+
+# softfp seems to be missing a whole bunch of prototypes.
+TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
diff --git a/gcc/config/sh/constraints.md b/gcc/config/sh/constraints.md
new file mode 100644
index 000000000..6b0e5d27c
--- /dev/null
+++ b/gcc/config/sh/constraints.md
@@ -0,0 +1,265 @@
+;; Constraint definitions for Renesas / SuperH SH.
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Overview of uppercase letter constraints:
+;; Bxx: miscellaneous constraints
+;;  Bsc: SCRATCH - for the scratch register in movsi_ie in the
+;;       fldi0 / fldi0 cases
+;; Cxx: Constants other than only CONST_INT
+;;  Css: signed 16-bit constant, literal or symbolic
+;;  Csu: unsigned 16-bit constant, literal or symbolic
+;;  Csy: label or symbol
+;;  Cpg: non-explicit constants that can be directly loaded into a general
+;;       purpose register in PIC code.  like 's' except we don't allow
+;;       PIC_ADDR_P
+;; IJKLMNOP: CONT_INT constants
+;;  Ixx: signed xx bit
+;;  J16: 0xffffffff00000000 | 0x00000000ffffffff
+;;  Kxx: unsigned xx bit
+;;  M: 1
+;;  N: 0
+;;  P27: 1 | 2 | 8 | 16
+;;  Pso: 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128
+;;  Psz: ~1 | ~2 | ~4 | ~8 | ~16 | ~32 | ~64 | ~128
+;; Q: pc relative load operand
+;; Rxx: reserved for exotic register classes.
+;; Sxx: extra memory (storage) constraints
+;;  Sua: unaligned memory operations
+;; W: vector
+;; Z: zero in any mode
+;;
+;; unused CONST_INT constraint letters: LO
+;; unused EXTRA_CONSTRAINT letters: D T U Y
+
+;; Register constraints
+(define_register_constraint "a" "ALL_REGS"
+  "@internal")
+
+(define_register_constraint "b" "TARGET_REGS"
+  "Branch target registers.")
+
+(define_register_constraint "c" "FPSCR_REGS"
+  "Floating-point status register.")
+
+(define_register_constraint "d" "DF_REGS"
+  "Double precision floating-point register.")
+
+(define_register_constraint "e" "TARGET_FMOVD ? NO_REGS : FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "f" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "k" "SIBCALL_REGS"
+  "@internal")
+
+(define_register_constraint "l" "PR_REGS"
+  "PR register.")
+
+(define_register_constraint "t" "T_REGS"
+  "T register.")
+
+(define_register_constraint "w" "FP0_REGS"
+  "Floating-point register 0.")
+
+(define_register_constraint "x" "MAC_REGS"
+  "MACH and MACL registers.")
+
+(define_register_constraint "y" "FPUL_REGS"
+  "FPUL register.")
+
+(define_register_constraint "z" "R0_REGS"
+  "R0 register.")
+
+;; Integer constraints
+(define_constraint "I06"
+  "A signed 6-bit constant, as used in SHmedia beqi, bnei and xori."
+  (and (match_code "const_int")
+       (match_test "ival >= -32 && ival <= 31")))
+
+(define_constraint "I08"
+  "A signed 8-bit constant, as used in add, sub, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+(define_constraint "I10"
+  "A signed 10-bit constant, as used in in SHmedia andi, ori."
+  (and (match_code "const_int")
+       (match_test "ival >= -512 && ival <= 511")))
+
+(define_constraint "I16"
+  "A signed 16-bit constant, as used in SHmedia movi."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "I20"
+  "A signed 20-bit constant, as used in SH2A movi20."
+  (and (match_code "const_int")
+       (match_test "ival >= -524288 && ival <= 524287")
+       (match_test "TARGET_SH2A")))
+
+(define_constraint "I28"
+  "A signed 28-bit constant, as used in SH2A movi20s."
+  (and (match_code "const_int")
+       (match_test "ival >=  -134217728 && ival <= 134217727")
+       (match_test "(ival & 255) == 0")
+       (match_test "TARGET_SH2A")))
+(define_constraint "J16"
+  "0xffffffff00000000 or 0x00000000ffffffff."
+  (and (match_code "const_int")
+       (match_test "CONST_OK_FOR_J16 (ival)")))
+
+(define_constraint "K03"
+  "An unsigned 3-bit constant, as used in SH2A bclr, bset, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+(define_constraint "K08"
+  "An unsigned 8-bit constant, as used in and, or, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 255")))
+ 
+(define_constraint "K12"
+  "An unsigned 8-bit constant, as used in SH2A 12-bit display."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 4095")))
+
+(define_constraint "K16"
+  "An unsigned 16-bit constant, as used in SHmedia shori."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+ 
+(define_constraint "P27"
+  "A constant for shift operand 1,2,8 or 16."
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 2 || ival == 8 || ival == 16")))
+
+(define_constraint "M"
+  "Integer constant 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "N"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Double constant 0."
+  (and (match_code "const_double")
+       (match_test "fp_zero_operand (op) && fldi_ok ()")))
+
+(define_constraint "H"
+  "Double constant 1."
+  (and (match_code "const_double")
+       (match_test "fp_one_operand (op) && fldi_ok ()")))
+
+;; Extra constraints
+(define_constraint "Q"
+  "A pc relative load operand."
+  (and (match_code "mem")
+       (match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "Bsc"
+  "Constraint for selecting FLDI0 or FLDI1 instruction.  If the clobber
+   operand is not SCRATCH (i.e. REG) then R0 is probably being used,
+   hence mova is being used, hence do not select this pattern."
+  (match_code "scratch"))
+
+(define_constraint "Css"
+  "A signed 16-bit constant, literal or symbolic."
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_S16")))
+
+(define_constraint "Csu"
+  "An unsigned 16-bit constant, literal or symbolic."
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_U16")))
+
+(define_constraint "Csy"
+  "A label or a symbol."
+  (ior (match_test "NON_PIC_REFERENCE_P (op)")
+       (match_test "PIC_ADDR_P (op)")))
+
+(define_constraint "Z"
+  "A zero in any shape or form."
+  (match_test "op == CONST0_RTX (GET_MODE (op))"))
+
+(define_constraint "W"
+  "Any vector constant we can handle."
+  (and (match_code "const_vector")
+       (ior (match_test "sh_rep_vec (op, VOIDmode)")
+	    (match_test "HOST_BITS_PER_WIDE_INT >= 64
+			 ? sh_const_vec (op, VOIDmode)
+			 : sh_1el_vec (op, VOIDmode)"))))
+
+(define_constraint "Cpg"
+  "A non-explicit constant that can be loaded directly into a general
+   purpose register.  This is like 's' except we don't allow
+   PIC_ADDR_P."
+  (match_test "IS_NON_EXPLICIT_CONSTANT_P (op)"))
+
+(define_constraint "Pso"
+  "Integer constant with a single bit set in its lower 8-bit."
+  (and (match_code "const_int")
+       (ior (match_test "ival == 1")
+	    (match_test "ival == 2")
+	    (match_test "ival == 4")
+	    (match_test "ival == 8")
+	    (match_test "ival == 16")
+	    (match_test "ival == 32")
+	    (match_test "ival == 64")
+	    (match_test "ival == 128"))))
+
+(define_constraint "Psz"
+  "Integer constant with a single zero bit in the lower 8-bit."
+  (and (match_code "const_int")
+       (ior (match_test "~ival == 1")
+	    (match_test "~ival == 2")
+	    (match_test "~ival == 4")
+	    (match_test "~ival == 8")
+	    (match_test "~ival == 16")
+	    (match_test "~ival == 32")
+	    (match_test "~ival == 64")
+	    (match_test "~ival == 128"))))
+
+(define_memory_constraint "Sr0"
+  "@internal"
+  (and (match_test "memory_operand (op, GET_MODE (op))")
+       (match_test "!refers_to_regno_p (R0_REG, R0_REG + 1, op, (rtx *) 0)")))
+
+(define_memory_constraint "Sua"
+  "@internal"
+  (and (match_test "memory_operand (op, GET_MODE (op))")
+       (match_test "GET_CODE (XEXP (op, 0)) != PLUS")))
+
+(define_memory_constraint "Sbv"
+  "A memory reference, as used in SH2A bclr.b, bset.b, etc."
+  (and (match_test "MEM_P (op) && GET_MODE (op) == QImode")
+       (match_test "REG_P (XEXP (op, 0))")))
+
+(define_memory_constraint "Sbw"
+  "A memory reference, as used in SH2A bclr.b, bset.b, etc."
+  (and (match_test "MEM_P (op) && GET_MODE (op) == QImode")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
diff --git a/gcc/config/sh/crt1.asm b/gcc/config/sh/crt1.asm
new file mode 100644
index 000000000..e2857904f
--- /dev/null
+++ b/gcc/config/sh/crt1.asm
@@ -0,0 +1,1369 @@
+/* Copyright (C) 2000, 2001, 2003, 2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+   This file was pretty much copied from newlib.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+#ifdef MMU_SUPPORT
+	/* Section used for exception/timer interrupt stack area */
+	.section .data.vbr.stack,"aw"
+	.align 4
+	.global __ST_VBR
+__ST_VBR:
+	.zero 1024 * 2          /* ; 2k for VBR handlers */
+/* Label at the highest stack address where the stack grows from */
+__timer_stack:
+#endif /* MMU_SUPPORT */
+	
+	/* ;----------------------------------------
+	Normal newlib crt1.asm */
+
+#ifdef __SH5__
+	.section .data,"aw"
+	.global ___data
+___data:
+
+	.section .rodata,"a"
+	.global ___rodata
+___rodata:
+
+#define ICCR_BASE  0x01600000
+#define OCCR_BASE  0x01e00000
+#define MMUIR_BASE 0x00000000
+#define MMUDR_BASE 0x00800000
+
+#define PTE_ENABLED     1
+#define PTE_DISABLED    0
+
+#define PTE_SHARED (1 << 1)
+#define PTE_NOT_SHARED  0
+
+#define PTE_CB_UNCACHEABLE  0
+#define PTE_CB_DEVICE       1
+#define PTE_CB_CACHEABLE_WB 2
+#define PTE_CB_CACHEABLE_WT 3
+
+#define PTE_SZ_4KB   (0 << 3)
+#define PTE_SZ_64KB  (1 << 3)
+#define PTE_SZ_1MB   (2 << 3)
+#define PTE_SZ_512MB (3 << 3)
+
+#define PTE_PRR      (1 << 6)
+#define PTE_PRX      (1 << 7)
+#define PTE_PRW      (1 << 8)
+#define PTE_PRU      (1 << 9)
+
+#define SR_MMU_BIT          31
+#define SR_BL_BIT           28
+
+#define ALIGN_4KB  (0xfff)
+#define ALIGN_1MB  (0xfffff)
+#define ALIGN_512MB (0x1fffffff)
+
+#define DYNACON_BASE               0x0f000000
+#define DM_CB_DLINK_BASE           0x0c000000
+#define DM_DB_DLINK_BASE           0x0b000000
+
+#define FEMI_AREA_0                0x00000000
+#define FEMI_AREA_1                0x04000000
+#define FEMI_AREA_2                0x05000000
+#define FEMI_AREA_3                0x06000000
+#define FEMI_AREA_4                0x07000000
+#define FEMI_CB                    0x08000000
+
+#define EMI_BASE                   0X80000000
+
+#define DMA_BASE                   0X0e000000
+
+#define CPU_BASE                   0X0d000000
+
+#define PERIPH_BASE                0X09000000
+#define DMAC_BASE                  0x0e000000
+#define INTC_BASE                  0x0a000000
+#define CPRC_BASE                  0x0a010000
+#define TMU_BASE                   0x0a020000
+#define SCIF_BASE                  0x0a030000
+#define RTC_BASE                   0x0a040000
+
+
+
+#define LOAD_CONST32(val, reg) \
+	movi	((val) >> 16) & 65535, reg; \
+	shori	(val) & 65535, reg
+
+#define LOAD_PTEH_VAL(sym, align, bits, scratch_reg, reg) \
+	LOAD_ADDR (sym, reg); \
+	LOAD_CONST32 ((align), scratch_reg); \
+	andc	reg, scratch_reg, reg; \
+	LOAD_CONST32 ((bits), scratch_reg); \
+	or	reg, scratch_reg, reg
+
+#define LOAD_PTEL_VAL(sym, align, bits, scratch_reg, reg) \
+	LOAD_ADDR (sym, reg); \
+	LOAD_CONST32 ((align), scratch_reg); \
+	andc	reg, scratch_reg, reg; \
+	LOAD_CONST32 ((bits), scratch_reg); \
+	or	reg, scratch_reg, reg
+
+#define SET_PTE(pte_addr_reg, pteh_val_reg, ptel_val_reg) \
+	putcfg  pte_addr_reg, 0, r63; \
+	putcfg  pte_addr_reg, 1, ptel_val_reg; \
+	putcfg  pte_addr_reg, 0, pteh_val_reg
+
+#if __SH5__ == 64
+	.section .text,"ax"
+#define LOAD_ADDR(sym, reg) \
+	movi	(sym >> 48) & 65535, reg; \
+	shori	(sym >> 32) & 65535, reg; \
+	shori	(sym >> 16) & 65535, reg; \
+	shori	sym & 65535, reg
+#else
+	.mode	SHmedia
+	.section .text..SHmedia32,"ax"
+#define LOAD_ADDR(sym, reg) \
+	movi	(sym >> 16) & 65535, reg; \
+	shori	sym & 65535, reg
+#endif
+	.global start
+start:
+	LOAD_ADDR (_stack, r15)
+
+#ifdef MMU_SUPPORT
+	! Set up the VM using the MMU and caches
+
+	! .vm_ep is first instruction to execute
+	! after VM initialization
+	pt/l	.vm_ep, tr1
+	
+	! Configure instruction cache (ICCR)
+	movi	3, r2
+	movi	0, r3
+	LOAD_ADDR (ICCR_BASE, r1)
+	putcfg	r1, 0, r2
+	putcfg	r1, 1, r3
+
+	! movi	7, r2 ! write through
+	! Configure operand cache (OCCR)
+	LOAD_ADDR (OCCR_BASE, r1)
+	putcfg	r1, 0, r2
+	putcfg	r1, 1, r3
+
+	! Disable all PTE translations
+	LOAD_ADDR (MMUIR_BASE, r1)
+	LOAD_ADDR (MMUDR_BASE, r2)
+	movi	64, r3
+	pt/l	.disable_ptes_loop, tr0
+.disable_ptes_loop:
+	putcfg	r1, 0, r63
+	putcfg	r2, 0, r63
+	addi	r1, 16, r1
+	addi	r2, 16, r2
+	addi	r3, -1, r3
+	bgt	r3, r63, tr0
+
+	LOAD_ADDR (MMUIR_BASE, r1)
+
+	! FEMI instruction mappings
+	!   Area 0 - 1Mb cacheable at 0x00000000
+	!   Area 1 - None
+	!   Area 2 - 1Mb cacheable at 0x05000000
+	!          - 1Mb cacheable at 0x05100000
+	!   Area 3 - None
+	!   Area 4 - None
+
+	! Map a 1Mb page for instructions at 0x00000000
+	LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for instructions at 0x05000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for instructions at 0x05100000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 512M page for instructions at EMI base
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for instructions at DM_DB_DLINK_BASE
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRX | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	LOAD_ADDR (MMUDR_BASE, r1)
+
+	! FEMI data mappings
+	!   Area 0 - 1Mb cacheable at 0x00000000
+	!   Area 1 - 1Mb device at 0x04000000
+	!   Area 2 - 1Mb cacheable at 0x05000000
+	!          - 1Mb cacheable at 0x05100000
+	!   Area 3 - None
+	!   Area 4 - None
+	!   CB     - 1Mb device at 0x08000000
+
+	! Map a 1Mb page for data at 0x00000000
+	LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for data at 0x04000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for data at 0x05000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1Mb page for data at 0x05100000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for registers at 0x08000000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (FEMI_CB, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (FEMI_CB, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 512M page for data at EMI
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for DYNACON at DYNACON_BASE
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DYNACON_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DYNACON_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for instructions at DM_DB_DLINK_BASE
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for data at DM_DB_DLINK_BASE+0x1000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_CB_UNCACHEABLE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for stack DM_DB_DLINK_BASE+0x2000
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c000000 - 0x0c0fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c100000 - 0x0c1fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c200000 - 0x0c2fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c400000 - 0x0c4fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK 
+	! 0x0c800000 - 0x0c8fffff
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map a 4K page for DMA control registers
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DMA_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DMA_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map lots of 4K pages for peripherals
+
+	! /* peripheral */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (PERIPH_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (PERIPH_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* dmac */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (DMAC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (DMAC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* intc */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (INTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (INTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* rtc */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (RTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (RTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* dmac */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (TMU_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (TMU_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* scif */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (SCIF_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (SCIF_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	! /* cprc */
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (CPRC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (CPRC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Map CPU WPC registers 
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL (CPU_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL (CPU_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+	addi	r1, 16, r1
+
+	LOAD_PTEH_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	addi	r1, 16, r1
+	LOAD_PTEH_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
+	LOAD_PTEL_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
+	SET_PTE (r1, r2, r3)
+
+	! Switch over to virtual addressing and enabled cache
+	getcon	sr, r1
+	movi	1, r2
+	shlli	r2, SR_BL_BIT, r2
+	or	r1, r2, r1
+	putcon	r1, ssr
+	getcon	sr, r1
+	movi	1, r2
+	shlli	r2, SR_MMU_BIT, r2
+	or	r1, r2, r1
+	putcon	r1, ssr
+	gettr	tr1, r1
+	putcon	r1, spc
+	synco
+	rte
+
+	! VM entry point.  From now on, we are in VM mode.
+.vm_ep:
+
+	! Install the trap handler, by seeding vbr with the
+	! correct value, and by assigning sr.bl = 0.
+
+	LOAD_ADDR (vbr_start, r1)
+	putcon	r1, vbr
+	movi	~(1<<28), r1
+	getcon	sr, r2
+	and     r1, r2, r2
+	putcon	r2, sr
+#endif /* MMU_SUPPORT */
+
+	pt/l	.Lzero_bss_loop, tr0
+	pt/l	_init, tr5
+	pt/l	___setup_argv_and_call_main, tr6
+	pt/l	_exit, tr7
+
+	! zero out bss
+	LOAD_ADDR (_edata, r0)
+	LOAD_ADDR (_end, r1)
+.Lzero_bss_loop:
+	stx.q	r0, r63, r63
+	addi	r0, 8, r0
+	bgt/l	r1, r0, tr0
+
+	LOAD_ADDR (___data, r26)
+	LOAD_ADDR (___rodata, r27)
+
+#ifdef __SH_FPU_ANY__
+	getcon	sr, r0
+	! enable the FP unit, by resetting SR.FD
+	! also zero out SR.FR, SR.SZ and SR.PR, as mandated by the ABI
+	movi	0, r1
+	shori	0xf000, r1
+	andc	r0, r1, r0
+	putcon	r0, sr
+#if __SH5__ == 32
+	pt/l ___set_fpscr, tr0
+	movi	0, r4
+	blink	tr0, r18
+#endif
+#endif
+
+	! arrange for exit to call fini
+	pt/l	_atexit, tr1
+	LOAD_ADDR (_fini, r2)
+	blink	tr1, r18
+
+	! call init
+	blink	tr5, r18
+
+	! call the mainline
+	blink	tr6, r18
+
+	! call exit
+	blink	tr7, r18
+	! We should never return from _exit but in case we do we would enter the
+	! the following tight loop. This avoids executing any data that might follow.
+limbo:
+	pt/l limbo, tr0
+	blink tr0, r63
+	
+#ifdef MMU_SUPPORT
+	! All these traps are handled in the same place. 
+	.balign 256
+vbr_start:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+	.balign 256
+vbr_100:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+vbr_100_end:
+	.balign 256
+vbr_200:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+	.balign 256
+vbr_300:
+	pt/l handler, tr0	! tr0 trashed.
+	blink tr0, r63
+	.balign 256	
+vbr_400:	! Should be at vbr+0x400
+handler:
+	/* If the trap handler is there call it */
+	LOAD_ADDR (__superh_trap_handler, r2)
+	pta chandler,tr2
+	beq r2, r63, tr2 /* If zero, ie not present branch around to chandler */
+	/* Now call the trap handler with as much of the context unchanged as possible.
+	   Move trapping address into R18 to make it look like the trap point */
+	getcon spc, r18
+	pt/l __superh_trap_handler, tr0
+	blink tr0, r7
+chandler:	
+	getcon	spc, r62
+	getcon expevt, r2
+	pt/l	_exit, tr0
+	blink	tr0, r63
+
+	/* Simulated trap handler */
+	.section	.text..SHmedia32,"ax"
+gcc2_compiled.:
+	.section	.debug_abbrev
+.Ldebug_abbrev0:
+	.section	.text..SHmedia32
+.Ltext0:
+	.section	.debug_info
+.Ldebug_info0:
+	.section	.debug_line
+.Ldebug_line0:
+	.section	.text..SHmedia32,"ax"
+	.align 5
+	.global	__superh_trap_handler
+	.type	__superh_trap_handler,@function
+__superh_trap_handler:
+.LFB1:
+	ptabs	r18, tr0
+	addi.l	r15, -8, r15
+	st.l	r15, 4, r14
+	addi.l	r15, -8, r15
+	add.l	r15, r63, r14
+	st.l	r14, 0, r2
+	 ptabs r7, tr0 
+	addi.l	r14, 8, r14
+	add.l	r14, r63, r15
+	ld.l	r15, 4, r14
+	addi.l	r15, 8, r15
+	blink	tr0, r63
+.LFE1:
+.Lfe1:
+	.size	__superh_trap_handler,.Lfe1-__superh_trap_handler
+
+	.section	.text..SHmedia32
+.Letext0:
+
+	.section	.debug_info
+	.ualong	0xa7
+	.uaword	0x2
+	.ualong	.Ldebug_abbrev0
+	.byte	0x4
+	.byte	0x1
+	.ualong	.Ldebug_line0
+	.ualong	.Letext0
+	.ualong	.Ltext0
+	.string	"trap_handler.c"
+
+	.string	"xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+	.string	"GNU C 2.97-sh5-010522"
+
+	.byte	0x1
+	.byte	0x2
+	.ualong	0x9a
+	.byte	0x1
+	.string	"_superh_trap_handler"
+
+	.byte	0x1
+	.byte	0x2
+	.byte	0x1
+	.ualong	.LFB1
+	.ualong	.LFE1
+	.byte	0x1
+	.byte	0x5e
+	.byte	0x3
+	.string	"trap_reason"
+
+	.byte	0x1
+	.byte	0x1
+	.ualong	0x9a
+	.byte	0x2
+	.byte	0x91
+	.byte	0x0
+	.byte	0x0
+	.byte	0x4
+	.string	"unsigned int"
+
+	.byte	0x4
+	.byte	0x7
+	.byte	0x0
+
+	.section	.debug_abbrev
+	.byte	0x1
+	.byte	0x11
+	.byte	0x1
+	.byte	0x10
+	.byte	0x6
+	.byte	0x12
+	.byte	0x1
+	.byte	0x11
+	.byte	0x1
+	.byte	0x3
+	.byte	0x8
+	.byte	0x1b
+	.byte	0x8
+	.byte	0x25
+	.byte	0x8
+	.byte	0x13
+	.byte	0xb
+	.byte	0,0
+	.byte	0x2
+	.byte	0x2e
+	.byte	0x1
+	.byte	0x1
+	.byte	0x13
+	.byte	0x3f
+	.byte	0xc
+	.byte	0x3
+	.byte	0x8
+	.byte	0x3a
+	.byte	0xb
+	.byte	0x3b
+	.byte	0xb
+	.byte	0x27
+	.byte	0xc
+	.byte	0x11
+	.byte	0x1
+	.byte	0x12
+	.byte	0x1
+	.byte	0x40
+	.byte	0xa
+	.byte	0,0
+	.byte	0x3
+	.byte	0x5
+	.byte	0x0
+	.byte	0x3
+	.byte	0x8
+	.byte	0x3a
+	.byte	0xb
+	.byte	0x3b
+	.byte	0xb
+	.byte	0x49
+	.byte	0x13
+	.byte	0x2
+	.byte	0xa
+	.byte	0,0
+	.byte	0x4
+	.byte	0x24
+	.byte	0x0
+	.byte	0x3
+	.byte	0x8
+	.byte	0xb
+	.byte	0xb
+	.byte	0x3e
+	.byte	0xb
+	.byte	0,0
+	.byte	0
+
+	.section	.debug_pubnames
+	.ualong	0x27
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.ualong	0xab
+	.ualong	0x5b
+	.string	"_superh_trap_handler"
+
+	.ualong	0x0
+
+	.section	.debug_aranges
+	.ualong	0x1c
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.byte	0x4
+	.byte	0x0
+	.uaword	0x0,0
+	.ualong	.Ltext0
+	.ualong	.Letext0-.Ltext0
+	.ualong	0x0
+	.ualong	0x0
+	.ident	"GCC: (GNU) 2.97-sh5-010522"
+#endif /* MMU_SUPPORT */
+#else /* ! __SH5__ */
+
+	! make a place to keep any previous value of the vbr register
+	! this will only have a value if it has been set by redboot (for example)
+	.section .bss
+old_vbr:
+	.long 0
+#ifdef PROFILE
+profiling_enabled:
+	.long 0
+#endif
+
+
+	.section .text
+	.global	start
+	.import ___rtos_profiler_start_timer
+	.weak   ___rtos_profiler_start_timer
+start:
+	mov.l	stack_k,r15
+
+#if defined (__SH3__) || (defined (__SH_FPU_ANY__) && ! defined (__SH2A__)) || defined (__SH4_NOFPU__)
+#define VBR_SETUP
+	! before zeroing the bss ...
+	! if the vbr is already set to vbr_start then the program has been restarted
+	! (i.e. it is not the first time the program has been run since reset)
+	! reset the vbr to its old value before old_vbr (in bss) is wiped
+	! this ensures that the later code does not create a circular vbr chain
+	stc	vbr, r1
+	mov.l	vbr_start_k, r2
+	cmp/eq	r1, r2
+	bf	0f
+	! reset the old vbr value
+	mov.l	old_vbr_k, r1
+	mov.l	@r1, r2
+	ldc	r2, vbr
+0:	
+#endif /* VBR_SETUP */
+	
+	! zero out bss
+	mov.l	edata_k,r0
+	mov.l	end_k,r1
+	mov	#0,r2
+start_l:
+	mov.l	r2,@r0
+	add	#4,r0
+	cmp/ge	r0,r1
+	bt	start_l
+
+#if defined (__SH_FPU_ANY__)
+	mov.l set_fpscr_k, r1
+	mov #4,r4
+	jsr @r1
+	shll16 r4	! Set DN bit (flush denormal inputs to zero)
+	lds r3,fpscr	! Switch to default precision
+#endif /* defined (__SH_FPU_ANY__) */
+
+#ifdef VBR_SETUP
+	! save the existing contents of the vbr
+	! there will only be a prior value when using something like redboot
+	! otherwise it will be zero
+	stc	vbr, r1
+	mov.l	old_vbr_k, r2
+	mov.l	r1, @r2
+	! setup vbr
+	mov.l	vbr_start_k, r1
+	ldc	r1,vbr
+#endif /* VBR_SETUP */
+
+	! if an rtos is exporting a timer start fn,
+	! then pick up an SR which does not enable ints
+	! (the rtos will take care of this)
+	mov.l rtos_start_fn, r0
+	mov.l sr_initial_bare, r1
+	tst	r0, r0
+	bt	set_sr
+
+	mov.l sr_initial_rtos, r1
+
+set_sr:
+	! Set status register (sr)
+	ldc	r1, sr
+
+	! arrange for exit to call fini
+	mov.l	atexit_k,r0
+	mov.l	fini_k,r4
+	jsr	@r0
+	nop
+
+#ifdef PROFILE
+	! arrange for exit to call _mcleanup (via stop_profiling)
+	mova    stop_profiling,r0
+	mov.l   atexit_k,r1
+	jsr     @r1
+	mov	r0, r4
+
+	! Call profiler startup code
+	mov.l monstartup_k, r0
+	mov.l start_k, r4
+	mov.l etext_k, r5
+	jsr @r0
+	nop
+
+	! enable profiling trap
+	! until now any trap 33s will have been ignored
+	! This means that all library functions called before this point
+	! (directly or indirectly) may have the profiling trap at the start.
+	! Therefore, only mcount itself may not have the extra header.
+	mov.l	profiling_enabled_k2, r0
+	mov	#1, r1
+	mov.l	r1, @r0
+#endif /* PROFILE */
+
+	! call init
+	mov.l	init_k,r0
+	jsr	@r0
+	nop
+
+	! call the mainline	
+	mov.l	main_k,r0
+	jsr	@r0
+	nop
+
+	! call exit
+	mov	r0,r4
+	mov.l	exit_k,r0
+	jsr	@r0
+	nop
+	
+		.balign 4
+#ifdef PROFILE
+stop_profiling:
+	# stop mcount counting
+	mov.l	profiling_enabled_k2, r0
+	mov	#0, r1
+	mov.l	r1, @r0
+
+	# call mcleanup
+	mov.l	mcleanup_k, r0
+	jmp	@r0
+	nop
+		
+		.balign 4
+mcleanup_k:
+	.long __mcleanup
+monstartup_k:
+	.long ___monstartup
+profiling_enabled_k2:
+	.long profiling_enabled
+start_k:
+	.long _start
+etext_k:
+	.long __etext
+#endif /* PROFILE */
+
+	.align 2
+#if defined (__SH_FPU_ANY__)
+set_fpscr_k:
+	.long	___set_fpscr
+#endif /*  defined (__SH_FPU_ANY__) */
+
+stack_k:
+	.long	_stack	
+edata_k:
+	.long	_edata
+end_k:
+	.long	_end
+main_k:
+	.long	___setup_argv_and_call_main
+exit_k:
+	.long	_exit
+atexit_k:
+	.long	_atexit
+init_k:
+	.long	_init
+fini_k:
+	.long	_fini
+#ifdef VBR_SETUP
+old_vbr_k:
+	.long	old_vbr
+vbr_start_k:
+	.long	vbr_start
+#endif /* VBR_SETUP */
+	
+sr_initial_rtos:
+	! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work.
+	! Whether profiling or not, keep interrupts masked,
+	! the RTOS will enable these if required.
+	.long 0x600000f1 
+
+rtos_start_fn:
+	.long ___rtos_profiler_start_timer
+	
+#ifdef PROFILE
+sr_initial_bare:
+	! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work.
+	! For bare machine, we need to enable interrupts to get profiling working
+	.long 0x60000001
+#else
+
+sr_initial_bare:
+	! Privileged mode RB 1 BL 0. Keep BL 0 to allow default trap handlers to work.
+	! Keep interrupts disabled - the application will enable as required.
+	.long 0x600000f1
+#endif
+
+	! supplied for backward compatibility only, in case of linking
+	! code whose main() was compiled with an older version of GCC.
+	.global ___main
+___main:
+	rts
+	nop
+#ifdef VBR_SETUP
+! Exception handlers	
+	.section .text.vbr, "ax"
+vbr_start:
+
+	.org 0x100
+vbr_100:
+#ifdef PROFILE
+	! Note on register usage.
+	! we use r0..r3 as scratch in this code. If we are here due to a trapa for profiling
+	! then this is OK as we are just before executing any function code.
+	! The other r4..r7 we save explicityl on the stack
+	! Remaining registers are saved by normal ABI conventions and we assert we do not
+	! use floating point registers.
+	mov.l expevt_k1, r1
+	mov.l @r1, r1
+	mov.l event_mask, r0
+	and r0,r1
+	mov.l trapcode_k, r2
+	cmp/eq r1,r2
+	bt 1f
+	bra handler_100   ! if not a trapa, go to default handler
+	nop
+1:	
+	mov.l trapa_k, r0
+	mov.l @r0, r0
+	shlr2 r0      ! trapa code is shifted by 2.
+	cmp/eq #33, r0
+	bt 2f
+	bra handler_100
+	nop
+2:	
+	
+	! If here then it looks like we have trap #33
+	! Now we need to call mcount with the following convention
+	! Save and restore r4..r7
+	mov.l	r4,@-r15
+	mov.l	r5,@-r15
+	mov.l	r6,@-r15
+	mov.l	r7,@-r15
+	sts.l	pr,@-r15
+
+	! r4 is frompc.
+	! r5 is selfpc
+	! r0 is the branch back address.
+	! The code sequence emitted by gcc for the profiling trap is
+	! .align 2
+	! trapa #33
+	! .align 2
+	! .long lab Where lab is planted by the compiler. This is the address
+	! of a datum that needs to be incremented. 
+	sts pr,  r4     ! frompc
+	stc spc, r5	! selfpc
+	mov #2, r2
+	not r2, r2      ! pattern to align to 4
+	and r2, r5      ! r5 now has aligned address
+!	add #4, r5      ! r5 now has address of address
+	mov r5, r2      ! Remember it.
+!	mov.l @r5, r5   ! r5 has value of lable (lab in above example)
+	add #8, r2
+	ldc r2, spc     ! our return address avoiding address word
+
+	! only call mcount if profiling is enabled
+	mov.l profiling_enabled_k, r0
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	bt 3f
+	! call mcount
+	mov.l mcount_k, r2
+	jsr @r2
+	nop
+3:
+	lds.l @r15+,pr
+	mov.l @r15+,r7
+	mov.l @r15+,r6
+	mov.l @r15+,r5
+	mov.l @r15+,r4
+	rte
+	nop
+	.balign 4
+event_mask:
+	.long 0xfff
+trapcode_k:	
+	.long 0x160
+expevt_k1:
+	.long 0xff000024 ! Address of expevt
+trapa_k:	
+	.long 0xff000020
+mcount_k:
+	.long __call_mcount
+profiling_enabled_k:
+	.long profiling_enabled
+#endif
+	! Non profiling case.
+handler_100:
+	mov.l 2f, r0     ! load the old vbr setting (if any)
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	bf 1f
+	! no previous vbr - jump to own generic handler
+	bra handler
+	nop	
+1:	! there was a previous handler - chain them
+	add #0x7f, r0	 ! 0x7f
+	add #0x7f, r0	 ! 0xfe
+	add #0x2, r0     ! add 0x100 without corrupting another register
+	jmp @r0
+	nop
+	.balign 4
+2:	
+	.long old_vbr
+
+	.org 0x400
+vbr_400:	! Should be at vbr+0x400
+	mov.l 2f, r0     ! load the old vbr setting (if any)
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	! no previous vbr - jump to own generic handler
+	bt handler
+	! there was a previous handler - chain them
+	rotcr r0
+	rotcr r0
+	add #0x7f, r0	 ! 0x1fc
+	add #0x7f, r0	 ! 0x3f8
+	add #0x02, r0	 ! 0x400
+	rotcl r0
+	rotcl r0	 ! Add 0x400 without corrupting another register
+	jmp @r0
+	nop
+	.balign 4
+2:
+	.long old_vbr
+handler:
+	/* If the trap handler is there call it */
+	mov.l	superh_trap_handler_k, r0
+	cmp/eq	#0, r0       ! True if zero.
+	bf 3f
+	bra   chandler
+	nop
+3:	
+	! Here handler available, call it. 
+	/* Now call the trap handler with as much of the context unchanged as possible.
+	   Move trapping address into PR to make it look like the trap point */
+	stc spc, r1
+	lds r1, pr
+	mov.l expevt_k, r4
+	mov.l @r4, r4 ! r4 is value of expevt, first parameter.
+	mov r1, r5   ! Remember trapping pc.
+	mov r1, r6   ! Remember trapping pc.
+	mov.l chandler_k, r1
+	mov.l superh_trap_handler_k, r2
+	! jmp to trap handler to avoid disturbing pr. 
+	jmp @r2
+	nop
+
+	.org 0x600
+vbr_600:
+#ifdef PROFILE	
+	! Should be at vbr+0x600
+	! Now we are in the land of interrupts so need to save more state. 
+	! Save register state
+	mov.l interrupt_stack_k, r15 ! r15 has been saved to sgr.
+	mov.l	r0,@-r15	
+	mov.l	r1,@-r15
+	mov.l	r2,@-r15
+	mov.l	r3,@-r15
+	mov.l	r4,@-r15
+	mov.l	r5,@-r15
+	mov.l	r6,@-r15
+	mov.l	r7,@-r15
+	sts.l	pr,@-r15
+	sts.l	mach,@-r15
+	sts.l	macl,@-r15
+#if defined(__SH_FPU_ANY__)
+	! Save fpul and fpscr, save fr0-fr7 in 64 bit mode
+	! and set the pervading precision for the timer_handler
+	mov	#0,r0
+	sts.l	fpul,@-r15
+	sts.l	fpscr,@-r15
+	lds	r0,fpscr	! Clear fpscr
+	fmov	fr0,@-r15
+	fmov	fr1,@-r15
+	fmov	fr2,@-r15
+	fmov	fr3,@-r15
+	mov.l	pervading_precision_k,r0
+	fmov	fr4,@-r15
+	fmov	fr5,@-r15
+	mov.l	@r0,r0
+	fmov	fr6,@-r15
+	fmov	fr7,@-r15
+	lds	r0,fpscr
+#endif /* __SH_FPU_ANY__ */
+	! Pass interrupted pc to timer_handler as first parameter (r4).
+	stc    spc, r4
+	mov.l timer_handler_k, r0
+	jsr @r0
+	nop
+#if defined(__SH_FPU_ANY__)
+	mov	#0,r0
+	lds	r0,fpscr	! Clear the fpscr
+	fmov	@r15+,fr7
+	fmov	@r15+,fr6
+	fmov	@r15+,fr5
+	fmov	@r15+,fr4
+	fmov	@r15+,fr3
+	fmov	@r15+,fr2
+	fmov	@r15+,fr1
+	fmov	@r15+,fr0
+	lds.l	@r15+,fpscr
+	lds.l	@r15+,fpul
+#endif /* __SH_FPU_ANY__ */
+	lds.l @r15+,macl
+	lds.l @r15+,mach
+	lds.l @r15+,pr
+	mov.l @r15+,r7
+	mov.l @r15+,r6
+	mov.l @r15+,r5
+	mov.l @r15+,r4
+	mov.l @r15+,r3
+	mov.l @r15+,r2
+	mov.l @r15+,r1
+	mov.l @r15+,r0
+	stc sgr, r15    ! Restore r15, destroyed by this sequence. 
+	rte
+	nop
+#if defined(__SH_FPU_ANY__)
+	.balign 4
+pervading_precision_k:
+#define CONCAT1(A,B) A##B
+#define CONCAT(A,B) CONCAT1(A,B)
+	.long CONCAT(__USER_LABEL_PREFIX__,__fpscr_values)+4
+#endif
+#else
+	mov.l 2f, r0     ! Load the old vbr setting (if any).
+	mov.l @r0, r0
+	cmp/eq #0, r0
+	! no previous vbr - jump to own handler
+	bt chandler
+	! there was a previous handler - chain them
+	rotcr r0
+	rotcr r0
+	add #0x7f, r0	 ! 0x1fc
+	add #0x7f, r0	 ! 0x3f8
+	add #0x7f, r0	 ! 0x5f4
+	add #0x03, r0	 ! 0x600
+	rotcl r0
+	rotcl r0	 ! Add 0x600 without corrupting another register
+	jmp @r0
+	nop
+	.balign 4
+2:
+	.long old_vbr
+#endif	 /* PROFILE code */
+chandler:
+	mov.l expevt_k, r4
+	mov.l @r4, r4 ! r4 is value of expevt hence making this the return code
+	mov.l handler_exit_k,r0
+	jsr   @r0
+	nop
+	! We should never return from _exit but in case we do we would enter the
+	! the following tight loop
+limbo:
+	bra limbo
+	nop
+	.balign 4
+#ifdef PROFILE
+interrupt_stack_k:
+	.long __timer_stack	! The high end of the stack
+timer_handler_k:
+	.long __profil_counter
+#endif
+expevt_k:
+	.long 0xff000024 ! Address of expevt
+chandler_k:	
+	.long chandler	
+superh_trap_handler_k:
+	.long	__superh_trap_handler
+handler_exit_k:
+	.long _exit
+	.align 2
+! Simulated compile of trap handler.
+	.section	.debug_abbrev,"",@progbits
+.Ldebug_abbrev0:
+	.section	.debug_info,"",@progbits
+.Ldebug_info0:
+	.section	.debug_line,"",@progbits
+.Ldebug_line0:
+	.text
+.Ltext0:
+	.align 5
+	.type	__superh_trap_handler,@function
+__superh_trap_handler:
+.LFB1:
+	mov.l	r14,@-r15
+.LCFI0:
+	add	#-4,r15
+.LCFI1:
+	mov	r15,r14
+.LCFI2:
+	mov.l	r4,@r14
+	lds	r1, pr
+	add	#4,r14
+	mov	r14,r15
+	mov.l	@r15+,r14
+	rts	
+	nop
+.LFE1:
+.Lfe1:
+	.size	__superh_trap_handler,.Lfe1-__superh_trap_handler
+	.section	.debug_frame,"",@progbits
+.Lframe0:
+	.ualong	.LECIE0-.LSCIE0
+.LSCIE0:
+	.ualong	0xffffffff
+	.byte	0x1
+	.string	""
+	.uleb128 0x1
+	.sleb128 -4
+	.byte	0x11
+	.byte	0xc
+	.uleb128 0xf
+	.uleb128 0x0
+	.align 2
+.LECIE0:
+.LSFDE0:
+	.ualong	.LEFDE0-.LASFDE0
+.LASFDE0:
+	.ualong	.Lframe0
+	.ualong	.LFB1
+	.ualong	.LFE1-.LFB1
+	.byte	0x4
+	.ualong	.LCFI0-.LFB1
+	.byte	0xe
+	.uleb128 0x4
+	.byte	0x4
+	.ualong	.LCFI1-.LCFI0
+	.byte	0xe
+	.uleb128 0x8
+	.byte	0x8e
+	.uleb128 0x1
+	.byte	0x4
+	.ualong	.LCFI2-.LCFI1
+	.byte	0xd
+	.uleb128 0xe
+	.align 2
+.LEFDE0:
+	.text
+.Letext0:
+	.section	.debug_info
+	.ualong	0xb3
+	.uaword	0x2
+	.ualong	.Ldebug_abbrev0
+	.byte	0x4
+	.uleb128 0x1
+	.ualong	.Ldebug_line0
+	.ualong	.Letext0
+	.ualong	.Ltext0
+	.string	"trap_handler.c"
+	.string	"xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+	.string	"GNU C 3.2 20020529 (experimental)"
+	.byte	0x1
+	.uleb128 0x2
+	.ualong	0xa6
+	.byte	0x1
+	.string	"_superh_trap_handler"
+	.byte	0x1
+	.byte	0x2
+	.byte	0x1
+	.ualong	.LFB1
+	.ualong	.LFE1
+	.byte	0x1
+	.byte	0x5e
+	.uleb128 0x3
+	.string	"trap_reason"
+	.byte	0x1
+	.byte	0x1
+	.ualong	0xa6
+	.byte	0x2
+	.byte	0x91
+	.sleb128 0
+	.byte	0x0
+	.uleb128 0x4
+	.string	"unsigned int"
+	.byte	0x4
+	.byte	0x7
+	.byte	0x0
+	.section	.debug_abbrev
+	.uleb128 0x1
+	.uleb128 0x11
+	.byte	0x1
+	.uleb128 0x10
+	.uleb128 0x6
+	.uleb128 0x12
+	.uleb128 0x1
+	.uleb128 0x11
+	.uleb128 0x1
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0x1b
+	.uleb128 0x8
+	.uleb128 0x25
+	.uleb128 0x8
+	.uleb128 0x13
+	.uleb128 0xb
+	.byte	0x0
+	.byte	0x0
+	.uleb128 0x2
+	.uleb128 0x2e
+	.byte	0x1
+	.uleb128 0x1
+	.uleb128 0x13
+	.uleb128 0x3f
+	.uleb128 0xc
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0x3a
+	.uleb128 0xb
+	.uleb128 0x3b
+	.uleb128 0xb
+	.uleb128 0x27
+	.uleb128 0xc
+	.uleb128 0x11
+	.uleb128 0x1
+	.uleb128 0x12
+	.uleb128 0x1
+	.uleb128 0x40
+	.uleb128 0xa
+	.byte	0x0
+	.byte	0x0
+	.uleb128 0x3
+	.uleb128 0x5
+	.byte	0x0
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0x3a
+	.uleb128 0xb
+	.uleb128 0x3b
+	.uleb128 0xb
+	.uleb128 0x49
+	.uleb128 0x13
+	.uleb128 0x2
+	.uleb128 0xa
+	.byte	0x0
+	.byte	0x0
+	.uleb128 0x4
+	.uleb128 0x24
+	.byte	0x0
+	.uleb128 0x3
+	.uleb128 0x8
+	.uleb128 0xb
+	.uleb128 0xb
+	.uleb128 0x3e
+	.uleb128 0xb
+	.byte	0x0
+	.byte	0x0
+	.byte	0x0
+	.section	.debug_pubnames,"",@progbits
+	.ualong	0x27
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.ualong	0xb7
+	.ualong	0x67
+	.string	"_superh_trap_handler"
+	.ualong	0x0
+	.section	.debug_aranges,"",@progbits
+	.ualong	0x1c
+	.uaword	0x2
+	.ualong	.Ldebug_info0
+	.byte	0x4
+	.byte	0x0
+	.uaword	0x0
+	.uaword	0x0
+	.ualong	.Ltext0
+	.ualong	.Letext0-.Ltext0
+	.ualong	0x0
+	.ualong	0x0
+#endif /* VBR_SETUP */
+#endif /* ! __SH5__ */
diff --git a/gcc/config/sh/crti.asm b/gcc/config/sh/crti.asm
new file mode 100644
index 000000000..ef5cd719d
--- /dev/null
+++ b/gcc/config/sh/crti.asm
@@ -0,0 +1,125 @@
+/* Copyright (C) 2000, 2001, 2009 Free Software Foundation, Inc.
+   This file was adapted from glibc sources.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* The code in sections .init and .fini is supposed to be a single
+   regular function.  The function in .init is called directly from
+   start in crt1.asm.  The function in .fini is atexit()ed in crt1.asm
+   too.
+
+   crti.asm contributes the prologue of a function to these sections,
+   and crtn.asm comes up the epilogue.  STARTFILE_SPEC should list
+   crti.o before any other object files that might add code to .init
+   or .fini sections, and ENDFILE_SPEC should list crtn.o after any
+   such object files.  */
+
+	.section .init
+/* The alignment below can't be smaller, otherwise the mova below
+   breaks.  Yes, we might align just the label, but then we'd be
+   exchanging an alignment here for one there, since the code fragment
+   below ensures 4-byte alignment on __ELF__.  */
+#ifdef __ELF__
+	.p2align 2
+#else
+	.p2align 1
+#endif
+	.global	 _init
+_init:
+#if __SHMEDIA__
+	addi	r15, -16, r15
+	st.q	r15, 8, r14
+	st.q	r15, 0, r18
+	add	r15, r63, r14
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r15,r0
+	add	#-8,r15
+	mov.l	r14,@-r0
+	sts.l	pr,@-r0
+	mov	r15,r14
+	nop
+#else
+#ifdef __ELF__
+	mov.l	r12,@-r15
+	mova	0f,r0
+	mov.l	0f,r12
+#endif
+	mov.l	r14,@-r15
+#ifdef __ELF__
+	add	r0,r12
+#endif
+	sts.l	pr,@-r15
+#ifdef __ELF__
+	bra	1f
+#endif
+	mov	r15,r14
+#ifdef __ELF__
+0:	.long	_GLOBAL_OFFSET_TABLE_
+1:
+#endif
+#endif /* __SHMEDIA__ */
+
+	.section .fini
+/* The alignment below can't be smaller, otherwise the mova below
+   breaks.  Yes, we might align just the label, but then we'd be
+   exchanging an alignment here for one there, since the code fragment
+   below ensures 4-byte alignment on __ELF__.  */
+#ifdef __ELF__
+	.p2align 2
+#else
+	.p2align 1
+#endif
+	.global  _fini
+_fini:	
+#if __SHMEDIA__
+	addi	r15, -16, r15
+	st.q	r15, 8, r14
+	st.q	r15, 0, r18
+	add	r15, r63, r14
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r15,r0
+	add	#-8,r15
+	mov.l	r14,@-r0
+	sts.l	pr,@-r0
+	mov	r15,r14
+	nop
+#else
+#ifdef __ELF__
+	mov.l	r12,@-r15
+	mova	0f,r0
+	mov.l	0f,r12
+#endif
+	mov.l	r14,@-r15
+#ifdef __ELF__
+	add	r0,r12
+#endif
+	sts.l	pr,@-r15
+#ifdef __ELF__
+	bra	1f
+#endif
+	mov	r15,r14
+#ifdef __ELF__
+0:	.long	_GLOBAL_OFFSET_TABLE_
+1:
+#endif
+#endif /* __SHMEDIA__ */
diff --git a/gcc/config/sh/crtn.asm b/gcc/config/sh/crtn.asm
new file mode 100644
index 000000000..670d90f7b
--- /dev/null
+++ b/gcc/config/sh/crtn.asm
@@ -0,0 +1,77 @@
+/* Copyright (C) 2000, 2001, 2009 Free Software Foundation, Inc.
+   This file was adapted from glibc sources.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* See an explanation about .init and .fini in crti.asm.  */
+
+	.section .init
+#if __SHMEDIA__
+	add	r14, r63, r15
+	ld.q	r15, 0, r18
+	ptabs	r18, tr0
+	ld.q	r15, 8, r14
+	addi	r15, 16, r15
+	blink	tr0, r63
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r14,r15
+	lds.l	@r14+,pr
+	mov.l	@r14,r14
+	rts
+	add	#8,r15
+#else
+	mov	r14,r15
+	lds.l	@r15+,pr
+	mov.l	@r15+,r14
+	rts
+#ifdef __ELF__
+	mov.l	@r15+,r12
+#else
+	nop
+#endif
+#endif /* __SHMEDIA__ */
+
+	.section .fini
+#if __SHMEDIA__
+	add	r14, r63, r15
+	ld.q	r15, 0, r18
+	ptabs	r18, tr0
+	ld.q	r15, 8, r14
+	addi	r15, 16, r15
+	blink	tr0, r63
+#elif __SH5__ && ! __SHMEDIA__
+	mov	r14,r15
+	lds.l	@r14+,pr
+	mov.l	@r14,r14
+	rts
+	add	#8,r15
+#else
+	mov	r14,r15
+	lds.l	@r15+,pr
+	mov.l	@r15+,r14
+	rts
+#ifdef __ELF__
+	mov.l	@r15+,r12
+#else
+	nop
+#endif
+#endif /* __SHMEDIA__ */
diff --git a/gcc/config/sh/divcost-analysis b/gcc/config/sh/divcost-analysis
new file mode 100644
index 000000000..d55bb6621
--- /dev/null
+++ b/gcc/config/sh/divcost-analysis
@@ -0,0 +1,88 @@
+Analysis of cycle costs for SH4:
+
+-> udiv_le128:            5
+-> udiv_ge64k:            6
+-> udiv udiv_25:         10
+-> pos_divisor:           3
+-> pos_result linear:     5
+-> pos_result - -:        5
+-> div_le128:             7
+-> div_ge64k:             9
+sdivsi3 -> udiv_25             13
+udiv25 -> div_ge64k_end:       15
+div_ge64k_end -> rts:          13
+div_le128 -> div_le128_2:       2, r1 latency 3
+udiv_le128 -> div_le128_2:      2, r1 latency 3
+(u)div_le128 -> div_by_1:       9
+(u)div_le128 -> rts:           17
+div_by_1(_neg) -> rts:          4
+div_ge64k -> div_r8:            2
+div_ge64k -> div_ge64k_2:       3
+udiv_ge64k -> udiv_r8:          3
+udiv_ge64k -> div_ge64k_2:      3 + LS
+(u)div_ge64k -> div_ge64k_end: 13
+div_r8 -> div_r8_2:             2
+udiv_r8 -> div_r8_2:            2 + LS
+(u)div_r8 -> rts:              21
+
+-> - + neg_result:             5
+-> + - neg_result:             5
+-> div_le128_neg:              7
+-> div_ge64k_neg:              9
+-> div_r8_neg:                11
+-> <64k div_ge64k_neg_end:    28
+-> >=64k div_ge64k_neg_end:   22
+div_ge64k_neg_end ft -> rts:  14
+div_r8_neg_end -> rts:         4
+div_r8_neg -> div_r8_neg_end: 18
+div_le128_neg -> div_by_1_neg: 4
+div_le128_neg -> rts          18
+
+         sh4-200    absolute divisor range:
+            1  [2..128]  [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256|
+udiv       18     22         38            32                   30
+sdiv pos:  20     24         41            35                   32
+sdiv neg:  15     25         42            36                   33
+
+         sh4-300    absolute divisor range:
+                 8 bit      16 bit       24 bit              > 24 bit
+udiv              15         35            28                   25
+sdiv              14         36            34                   31
+
+
+fp-based:
+
+unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
+signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
+
+call-div1:    divisor range:
+              [1..64K)  >= 64K
+unsigned:       63        58
+signed:         76        76
+
+SFUNC_STATIC call overhead:
+mov.l 0f,r1
+bsrf r1
+
+SFUNC_GOT call overhead - current:
+mov.l 0f,r1
+mova 0f,r0
+mov.l 1f,r2
+add r1,r0
+mov.l @(r0,r2),r0
+jmp @r0
+; 3 cycles worse than SFUNC_STATIC
+
+SFUNC_GOT call overhead - improved assembler:
+mov.l 0f,r1
+mova 0f,r0
+mov.l @(r0,r1),r0
+jmp @r0
+; 2 cycles worse than SFUNC_STATIC
+
+
+Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc/config/sh/divtab-sh4-300.c b/gcc/config/sh/divtab-sh4-300.c
new file mode 100644
index 000000000..c8a65cfbc
--- /dev/null
+++ b/gcc/config/sh/divtab-sh4-300.c
@@ -0,0 +1,77 @@
+/* Copyright (C) 2004, 2006, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Calculate division table for ST40-300 integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@st.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  double q, r, err, max_err = 0, max_s_err = 0;
+
+  puts("/* This table has been generated by divtab-sh4.c.  */");
+  puts ("\t.balign 4");
+  for (i = -128; i < 128; i++)
+    {
+      int n = 0;
+      if (i == 0)
+	{
+	  /* output some dummy number for 1/0.  */
+	  puts ("LOCAL(div_table_clz):\n\t.byte\t0");
+	  continue;
+	}
+      for (j = i < 0 ? -i : i; j < 128; j += j)
+	n++;
+      printf ("\t.byte\t%d\n", n - 7);
+    }
+  puts("\
+/* 1/-128 .. 1/127, normalized.  There is an implicit leading 1 in bit 32,\n\
+   or in bit 33 for powers of two.  */\n\
+	.balign 4");
+  for (i = -128; i < 128; i++)
+    {
+      if (i == 0)
+	{
+	  puts ("LOCAL(div_table_inv):\n\t.long\t0x0");
+	  continue;
+	}
+      j = i < 0 ? -i : i;
+      while (j < 64)
+	j += j;
+      q = 4.*(1<<30)*128/j;
+      r = ceil (q);
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+      err = err * j / 128;
+      if (err > max_s_err)
+	max_s_err = err;
+    }
+  printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+  exit (0);
+}
diff --git a/gcc/config/sh/divtab-sh4.c b/gcc/config/sh/divtab-sh4.c
new file mode 100644
index 000000000..758508130
--- /dev/null
+++ b/gcc/config/sh/divtab-sh4.c
@@ -0,0 +1,85 @@
+/* Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Calculate division table for SH2..4 integer division
+   Contributed by Joern Rernnecke
+   joern.rennecke@superh.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  double q, r, err, max_err = 0, max_s_err = 0;
+
+  puts("/* This table has been generated by divtab-sh4.c.  */");
+  puts ("\t.balign 4");
+  puts ("LOCAL(div_table_clz):");
+  /* output some dummy number for 1/0.  */
+  printf ("\t.byte\t%d\n", 0);
+  for (i = 1; i <= 128; i++)
+    {
+      int n = 0;
+      if (i == 128)
+	puts ("\
+/* Lookup table translating positive divisor to index into table of\n\
+   normalized inverse.  N.B. the '0' entry is also the last entry of the\n\
+ previous table, and causes an unaligned access for division by zero.  */\n\
+LOCAL(div_table_ix):");
+      for (j = i; j <= 128; j += j)
+	n++;
+      printf ("\t.byte\t%d\n", n - 7);
+    }
+  for (i = 1; i <= 128; i++)
+    {
+      j = i < 0 ? -i : i;
+      while (j < 128)
+	j += j;
+      printf ("\t.byte\t%d\n", j * 2 - 96*4);
+    }
+  puts("\
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */\n\
+	.balign 4\n\
+LOCAL(zero_l):");
+  for (i = 64; i < 128; i++)
+    {
+      if (i == 96)
+	puts ("LOCAL(div_table):");
+      q = 4.*(1<<30)*128/i;
+      r = ceil (q);
+      /* The value for 64 is actually differently scaled that it would
+	 appear from this calculation.  The implicit part is %01, not 10.
+	 Still, since the value in the table is 0 either way, this
+	 doesn't matter here.  Still, the 1/64 entry is effectively a 1/128
+	 entry.  */
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+      err = err * i / 128;
+      if (err > max_s_err)
+	max_s_err = err;
+    }
+  printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+  exit (0);
+}
diff --git a/gcc/config/sh/divtab.c b/gcc/config/sh/divtab.c
new file mode 100644
index 000000000..f8db2f508
--- /dev/null
+++ b/gcc/config/sh/divtab.c
@@ -0,0 +1,200 @@
+/* Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Calculate division table for SH5Media integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@superh.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+#define BITS 5
+#define N_ENTRIES (1 << BITS)
+#define CUTOFF_BITS 20
+
+#define BIAS (-330)
+
+double max_defect = 0.;
+double max_defect_x;
+
+double min_defect = 1e9;
+double min_defect_x;
+
+double max_defect2 = 0.;
+double max_defect2_x;
+
+double min_defect2 = 0.;
+double min_defect2_x;
+
+double min_defect3 = 01e9;
+double min_defect3_x;
+int min_defect3_val;
+
+double max_defect3 = 0.;
+double max_defect3_x;
+int max_defect3_val;
+
+static double note_defect3 (int val, double d2, double y2d, double x)
+{
+  int cutoff_val = val >> CUTOFF_BITS;
+  double cutoff;
+  double defect;
+
+  if (val < 0)
+    cutoff_val++;
+  cutoff = (cutoff_val * (1<<CUTOFF_BITS) - val) * y2d;
+  defect = cutoff + val * d2;
+  if (val < 0)
+    defect = - defect;
+  if (defect > max_defect3)
+    {
+      max_defect3 = defect;
+      max_defect3_x = x;
+      max_defect3_val = val;
+    }
+  if (defect < min_defect3)
+    {
+      min_defect3 = defect;
+      min_defect3_x = x;
+      min_defect3_val = val;
+    }
+}
+
+/* This function assumes 32-bit integers.  */
+static double
+calc_defect (double x, int constant, int factor)
+{
+  double y0 = (constant - (int) floor ((x * factor * 64.))) / 16384.;
+  double y1 = 2 * y0 -y0 * y0 * (x + BIAS / (1.*(1LL<<30)));
+  double y2d0, y2d;
+  int y2d1;
+  double d, d2;
+
+  y1 = floor (y1 * (1024 * 1024 * 1024)) / (1024 * 1024 * 1024);
+  d = y1 - 1 / x;
+  if (d > max_defect)
+    {
+      max_defect = d;
+      max_defect_x = x;
+    }
+  if (d < min_defect)
+    {
+      min_defect = d;
+      min_defect_x = x;
+    }
+  y2d0 = floor (y1 * x * (1LL << 60-16));
+  y2d1 = (int) (long long) y2d0;
+  y2d = - floor ((y1 - y0 / (1<<30-14)) * y2d1) / (1LL<<44);
+  d2 = y1 + y2d - 1/x;
+  if (d2 > max_defect2)
+    {
+      max_defect2 = d2;
+      max_defect2_x = x;
+    }
+  if (d2 < min_defect2)
+    {
+      min_defect2 = d2;
+      min_defect2_x = x;
+    }
+  /* zero times anything is trivially zero.  */
+  note_defect3 ((1 << CUTOFF_BITS) - 1, d2, y2d, x);
+  note_defect3 (1 << CUTOFF_BITS, d2, y2d, x);
+  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS), d2, y2d, x);
+  note_defect3 ((1U << 31) - 1, d2, y2d, x);
+  note_defect3 (-1, d2, y2d, x);
+  note_defect3 (-(1 << CUTOFF_BITS), d2, y2d, x);
+  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS) + 1, d2, y2d, x);
+  note_defect3 (-(1U << 31), d2, y2d, x);
+  return d;
+}
+
+int
+main ()
+{
+  int i;
+  unsigned char factors[N_ENTRIES];
+  short constants[N_ENTRIES];
+  int steps = N_ENTRIES / 2;
+  double step = 1. / steps;
+  double eps30 = 1. / (1024 * 1024 * 1024);
+
+  for (i = 0; i < N_ENTRIES; i++)
+    {
+      double x_low = (i < steps ? 1. : -3.) + i * step;
+      double x_high = x_low + step - eps30;
+      double x_med;
+      int factor, constant;
+      double low_defect, med_defect, high_defect, max_defect;
+
+      factor = (1./x_low- 1./x_high) / step * 256. + 0.5;
+      if (factor == 256)
+	factor = 255;
+      factors[i] = factor;
+      /* Use minimum of error function for x_med.  */
+      x_med = sqrt (256./factor);
+      if (x_low < 0)
+	x_med = - x_med;
+      low_defect = 1. / x_low + x_low * factor / 256.;
+      high_defect = 1. / x_high + x_high * factor / 256.;
+      med_defect = 1. / x_med + x_med * factor / 256.;
+      max_defect
+	= ((low_defect > high_defect) ^ (x_med < 0)) ? low_defect : high_defect;
+      constant = (med_defect + max_defect) * 0.5 * 16384. + 0.5;
+      if (constant < -32768 || constant > 32767)
+	abort ();
+      constants[i] = constant;
+      calc_defect (x_low, constant, factor);
+      calc_defect (x_med, constant, factor);
+      calc_defect (x_high, constant, factor);
+    }
+    printf ("/* This table has been generated by divtab.c .\n");
+    printf ("Defects for bias %d:\n", BIAS);
+    printf ("   Max defect: %e at %e\n", max_defect, max_defect_x);
+    printf ("   Min defect: %e at %e\n", min_defect, min_defect_x);
+    printf ("   Max 2nd step defect: %e at %e\n", max_defect2, max_defect2_x);
+    printf ("   Min 2nd step defect: %e at %e\n", min_defect2, min_defect2_x);
+    printf ("   Max div defect: %e at %d:%e\n", max_defect3, max_defect3_val, max_defect3_x);
+    printf ("   Min div defect: %e at %d:%e\n", min_defect3, min_defect3_val, min_defect3_x);
+    printf ("   Defect at 1: %e\n",
+	    calc_defect (1., constants[0], factors[0]));
+    printf ("   Defect at -2: %e */\n",
+	    calc_defect (-2., constants[steps], factors[steps]));
+    printf ("\t.section\t.rodata\n");
+    printf ("\t.balign 2\n");
+    printf ("/* negative division constants */\n");
+    for (i = steps; i < 2 * steps; i++)
+      printf ("\t.word\t%d\n", constants[i]);
+    printf ("/* negative division factors */\n");
+    for (i = steps; i < 2*steps; i++)
+      printf ("\t.byte\t%d\n", factors[i]);
+    printf ("\t.skip %d\n", steps);
+    printf ("\t.global	GLOBAL(div_table):\n");
+    printf ("GLOBAL(div_table):\n");
+    printf ("\t.skip %d\n", steps);
+    printf ("/* positive division factors */\n");
+    for (i = 0; i < steps; i++)
+      printf ("\t.byte\t%d\n", factors[i]);
+    printf ("/* positive division constants */\n");
+    for (i = 0; i < steps; i++)
+      printf ("\t.word\t%d\n", constants[i]);
+  exit (0);
+}
diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h
new file mode 100644
index 000000000..336743cc8
--- /dev/null
+++ b/gcc/config/sh/elf.h
@@ -0,0 +1,90 @@
+/* Definitions of target machine for gcc for Renesas / SuperH SH using ELF.
+   Copyright (C) 1996, 1997, 2000, 2001, 2002, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <ian@cygnus.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Let sh.c know this is ELF.  */
+#undef TARGET_ELF
+#define TARGET_ELF 1
+
+/* Generate DWARF2 debugging information and make it the default */
+#define DWARF2_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* use a more compact format for line information */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#undef WCHAR_TYPE
+/* #define WCHAR_TYPE (TARGET_SH5 ? "int" : "long int") */
+#define WCHAR_TYPE SH_ELF_WCHAR_TYPE
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int")
+
+/* Pass -ml and -mrelax to the assembler and linker.  */
+#undef ASM_SPEC
+#define ASM_SPEC SH_ASM_SPEC
+
+#undef LINK_SPEC
+#define LINK_SPEC SH_LINK_SPEC
+#undef LINK_EMUL_PREFIX
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define LINK_EMUL_PREFIX "sh%{!mb:l}elf"
+#else
+#define LINK_EMUL_PREFIX "sh%{ml:l}elf"
+#endif
+
+#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO)
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+  sprintf ((STRING), "*%s%s%ld", LOCAL_LABEL_PREFIX, (PREFIX), (long)(NUM))
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1.o%s} crti.o%s \
+   %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* ASM_OUTPUT_CASE_LABEL is defined in elfos.h.  With it,
+   a redundant .align was generated.  */
+#undef  ASM_OUTPUT_CASE_LABEL
diff --git a/gcc/config/sh/embed-elf.h b/gcc/config/sh/embed-elf.h
new file mode 100644
index 000000000..a9f6d9438
--- /dev/null
+++ b/gcc/config/sh/embed-elf.h
@@ -0,0 +1,36 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH 
+   non-Linux embedded targets.
+   Copyright (C) 2002, 2003, 2007, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by J"orn Rennecke <joern.rennecke@superh.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* While the speed-optimized implementations of udivsi3_i4i / sdivsi3_i4i
+   in libgcc are not available for SH2, the space-optimized ones in
+   libgcc-Os-4-200 are.  Thus, when not optimizing for space, link
+   libgcc-Os-4-200 after libgcc, so that -mdiv=call-table works for -m2.  */
+#define LIBGCC_SPEC "%{!shared: \
+  %{m4-100*:-lic_invalidate_array_4-100} \
+  %{m4-200*:-lic_invalidate_array_4-200} \
+  %{m4-300*|m4-340:-lic_invalidate_array_4a %{!Os: -lgcc-4-300}} \
+  %{m4a*:-lic_invalidate_array_4a}} \
+  %{Os: -lgcc-Os-4-200} \
+  -lgcc \
+  %{!Os: -lgcc-Os-4-200}"
diff --git a/gcc/config/sh/lib1funcs-4-300.asm b/gcc/config/sh/lib1funcs-4-300.asm
new file mode 100644
index 000000000..b131877f1
--- /dev/null
+++ b/gcc/config/sh/lib1funcs-4-300.asm
@@ -0,0 +1,936 @@
+/* Copyright (C) 2004, 2006, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* libgcc routines for the STMicroelectronics ST40-300 CPU.
+   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
+
+#include "lib1funcs.h"
+
+#if !__SHMEDIA__
+#ifdef L_div_table
+#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2.  */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4-300.
+   Uses a lookup table for divisors in the range -128 .. +127, and
+   div1 with case distinction for larger divisors in three more ranges.
+   The code is lumped together with the table to allow the use of mova.  */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+	.global	GLOBAL(udivsi3_i4i)
+	.global	GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+	FUNC(GLOBAL(sdivsi3_i4i))
+
+	.balign 4
+LOCAL(div_ge8m): ! 10 cycles up to here
+	rotcr r1 ! signed shift must use original sign from r4
+	div0s r5,r4
+	mov #24,r7
+	shld r7,r6
+	shad r0,r1
+	rotcl r6
+	div1 r5,r1
+	swap.w r5,r0 ! detect -0x80000000 : 0x800000
+	rotcl r6
+	swap.w r4,r7
+	div1 r5,r1
+	swap.b r7,r7
+	rotcl r6
+	or r7,r0
+	div1 r5,r1
+	swap.w r0,r7
+	rotcl r6
+	or r7,r0
+	div1 r5,r1
+	add #-0x80,r0
+	rotcl r6
+	extu.w r0,r0
+	div1 r5,r1
+	neg r0,r0
+	rotcl r6
+	swap.w r0,r0
+	div1 r5,r1
+	mov.l @r15+,r7
+	and r6,r0
+	rotcl r6
+	div1 r5,r1
+	shll2 r0
+	rotcl r6
+	exts.b r0,r0
+	div1 r5,r1
+	swap.w r0,r0
+	exts.w r0,r1
+	exts.b r6,r0
+	mov.l @r15+,r6
+	rotcl r0
+	rts
+	sub r1,r0
+	! 31 cycles up to here
+
+	.balign 4
+LOCAL(udiv_ge64k): ! 3 cycles up to here
+	mov r4,r0
+	shlr8 r0
+	div0u
+	cmp/hi r0,r5
+	bt LOCAL(udiv_r8)
+	mov.l r5,@-r15
+	shll8 r5
+	! 7 cycles up to here
+	.rept 8
+	div1 r5,r0
+	.endr
+	extu.b r4,r1 ! 15 cycles up to here
+	extu.b r0,r6
+	xor r1,r0
+	xor r6,r0
+	swap.b r6,r6
+	.rept 8
+	div1 r5,r0
+	.endr ! 25 cycles up to here
+	extu.b r0,r0
+	mov.l @r15+,r5
+	or r6,r0
+	mov.l @r15+,r6
+	rts
+	rotcl r0 ! 28 cycles up to here
+
+	.balign 4
+LOCAL(udiv_r8): ! 6 cycles up to here
+	mov.l r4,@-r15
+	shll16 r4
+	shll8 r4
+	!
+	shll r4
+	mov r0,r1
+	div1 r5,r1
+	mov r4,r0
+	rotcl r0
+	mov.l @r15+,r4
+	div1 r5,r1
+	! 12 cycles up to here
+	.rept 6
+	rotcl r0; div1 r5,r1
+	.endr
+	mov.l @r15+,r6 ! 24 cycles up to here
+	rts
+	rotcl r0
+
+	.balign 4
+LOCAL(div_ge32k): ! 6 cycles up to here
+	mov.l r7,@-r15
+	swap.w r5,r6
+	exts.b r6,r7
+	exts.w r6,r6
+	cmp/eq r6,r7
+	extu.b r1,r6
+	bf/s LOCAL(div_ge8m)
+	cmp/hi r1,r4 ! copy sign bit of r4 into T
+	rotcr r1 ! signed shift must use original sign from r4
+	div0s r5,r4
+	shad r0,r1
+	shll8 r5
+	div1 r5,r1
+	mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
+	div1 r5,r1
+	shlr8 r7
+	div1 r5,r1
+	swap.w r4,r0
+	div1 r5,r1
+	swap.b r0,r0
+	div1 r5,r1
+	or r0,r7
+	div1 r5,r1
+	add #-80,r7
+	div1 r5,r1
+	swap.w r7,r0
+	div1 r5,r1
+	or r0,r7
+	extu.b r1,r0
+	xor r6,r1
+	xor r0,r1
+	exts.b r0,r0
+	div1 r5,r1
+	extu.w r7,r7
+	div1 r5,r1
+	neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
+	div1 r5,r1
+	and r0,r7
+	div1 r5,r1
+	swap.w r7,r7 ! 26 cycles up to here.
+	div1 r5,r1
+	shll8 r0
+	div1 r5,r1
+	exts.w r7,r7
+	div1 r5,r1
+	add r0,r0
+	div1 r5,r1
+	sub r7,r0
+	extu.b r1,r1
+	mov.l @r15+,r7
+	rotcl r1
+	mov.l @r15+,r6
+	add r1,r0
+	mov #-8,r1
+	rts
+	shad r1,r5 ! 34 cycles up to here
+
+	.balign 4
+GLOBAL(udivsi3_i4i):
+	mov.l r6,@-r15
+	extu.w r5,r6
+	cmp/eq r5,r6
+	mov #0x7f,r0
+	bf LOCAL(udiv_ge64k)
+	cmp/hi r0,r5
+	bf LOCAL(udiv_le128)
+	mov r4,r1
+	shlr8 r1
+	div0u
+	shlr r1
+	shll16 r6
+	div1 r6,r1
+	extu.b r4,r0 ! 7 cycles up to here
+	.rept 8
+	div1 r6,r1
+	.endr     ! 15 cycles up to here
+	xor r1,r0 ! xor dividend with result lsb
+	.rept 6
+	div1 r6,r1
+	.endr
+	mov.l r7,@-r15 ! 21 cycles up to here
+	div1 r6,r1
+	extu.b r0,r7
+	div1 r6,r1
+	shll8 r7
+	extu.w r1,r0
+	xor r7,r1 ! replace lsb of result with lsb of dividend
+	div1 r6,r1
+	mov #0,r7
+	div1 r6,r1
+	!
+	div1 r6,r1
+	bra LOCAL(div_end)
+	div1 r6,r1 ! 28 cycles up to here
+
+	/* This is link-compatible with a GLOBAL(sdivsi3) call,
+	   but we effectively clobber only r1, macl and mach  */
+        /* Because negative quotients are calculated as one's complements,
+	   -0x80000000 divided by the smallest positive number of a number
+	   range (0x80, 0x8000, 0x800000) causes saturation in the one's
+           complement representation, and we have to suppress the
+	   one's -> two's complement adjustment.  Since positive numbers
+	   don't get such an adjustment, it's OK to also compute one's -> two's
+	   complement adjustment suppression for a dividend of 0.  */
+	.balign 4
+GLOBAL(sdivsi3_i4i):
+	mov.l r6,@-r15
+	exts.b r5,r6
+	cmp/eq r5,r6
+	mov #-1,r1
+	bt/s LOCAL(div_le128)
+	cmp/pz r4
+	addc r4,r1
+	exts.w r5,r6
+	cmp/eq r5,r6
+	mov #-7,r0
+	bf/s LOCAL(div_ge32k)
+	cmp/hi r1,r4 ! copy sign bit of r4 into T
+	rotcr r1
+	shll16 r6  ! 7 cycles up to here
+	shad r0,r1
+	div0s r5,r4
+	div1 r6,r1
+	mov.l r7,@-r15
+	div1 r6,r1
+	mov r4,r0 ! re-compute adjusted dividend
+	div1 r6,r1
+	mov #-31,r7
+	div1 r6,r1
+	shad r7,r0
+	div1 r6,r1
+	add r4,r0 ! adjusted dividend
+	div1 r6,r1
+	mov.l r8,@-r15
+	div1 r6,r1
+	swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
+	div1 r6,r1
+	swap.b r8,r8
+	xor r1,r0 ! xor dividend with result lsb
+	div1 r6,r1
+	div1 r6,r1
+	or r5,r8
+	div1 r6,r1
+	add #-0x80,r8 ! r8 is 0 iff there is a match
+	div1 r6,r1
+	swap.w r8,r7 ! or upper 16 bits...
+	div1 r6,r1
+	or r7,r8 !...into lower 16 bits
+	div1 r6,r1
+	extu.w r8,r8
+	div1 r6,r1
+	extu.b r0,r7
+	div1 r6,r1
+	shll8 r7
+	exts.w r1,r0
+	xor r7,r1 ! replace lsb of result with lsb of dividend
+	div1 r6,r1
+	neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
+	div1 r6,r1
+	and r0,r8
+	div1 r6,r1
+	swap.w r8,r7
+	div1 r6,r1
+	mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
+LOCAL(div_end):
+	div1 r6,r1
+	shll8 r0
+	div1 r6,r1
+	exts.w r7,r7
+	div1 r6,r1
+	add r0,r0
+	div1 r6,r1
+	sub r7,r0
+	extu.b r1,r1
+	mov.l @r15+,r7
+	rotcl r1
+	mov.l @r15+,r6
+	rts
+	add r1,r0
+
+	.balign 4
+LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
+	mova LOCAL(div_table_inv),r0
+	shll2 r6
+	mov.l @(r0,r6),r1
+	mova LOCAL(div_table_clz),r0
+	lds r4,mach
+	!
+	!
+	!
+	tst r1,r1
+	!
+	bt 0f
+	dmulu.l r1,r4
+0:	mov.b @(r0,r5),r1
+	clrt
+	!
+	!
+	sts mach,r0
+	addc r4,r0
+	rotcr r0
+	mov.l @r15+,r6
+	rts
+	shld r1,r0
+
+	.balign 4
+LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
+	mova LOCAL(div_table_inv),r0
+	shll2 r6
+	mov.l @(r0,r6),r1
+	mova LOCAL(div_table_clz),r0
+	neg r4,r6
+	bf 0f
+	mov r4,r6
+0:	lds r6,mach
+	tst r1,r1
+	bt 0f
+	dmulu.l r1,r6
+0:	div0s r4,r5
+	mov.b @(r0,r5),r1
+	bt/s LOCAL(le128_neg)
+	clrt
+	!
+	sts mach,r0
+	addc r6,r0
+	rotcr r0
+	mov.l @r15+,r6
+	rts
+	shld r1,r0
+
+/* Could trap divide by zero for the cost of one cycle more mispredict penalty:
+...
+	dmulu.l r1,r6
+0:	div0s r4,r5
+	bt/s LOCAL(le128_neg)
+	tst r5,r5
+	bt LOCAL(div_by_zero)
+	mov.b @(r0,r5),r1
+	sts mach,r0
+	addc r6,r0
+...
+LOCAL(div_by_zero):
+	trapa #
+	.balign 4
+LOCAL(le128_neg):
+	bt LOCAL(div_by_zero)
+	mov.b @(r0,r5),r1
+	sts mach,r0
+	addc r6,r0
+...  */
+
+	.balign 4
+LOCAL(le128_neg):
+	sts mach,r0
+	addc r6,r0
+	rotcr r0
+	mov.l @r15+,r6
+	shad r1,r0
+	rts
+	neg r0,r0
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+
+/* This table has been generated by divtab-sh4.c.  */
+	.balign 4
+	.byte	-7
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-1
+	.byte	-1
+	.byte	0
+LOCAL(div_table_clz):
+	.byte	0
+	.byte	0
+	.byte	-1
+	.byte	-1
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+/* 1/-128 .. 1/127, normalized.  There is an implicit leading 1 in bit 32,
+   or in bit 33 for powers of two.  */
+	.balign 4
+	.long   0x0
+	.long	0x2040811
+	.long	0x4104105
+	.long	0x624DD30
+	.long	0x8421085
+	.long	0xA6810A7
+	.long	0xC9714FC
+	.long	0xECF56BF
+	.long	0x11111112
+	.long	0x135C8114
+	.long	0x15B1E5F8
+	.long	0x18118119
+	.long	0x1A7B9612
+	.long	0x1CF06ADB
+	.long	0x1F7047DD
+	.long	0x21FB7813
+	.long	0x24924925
+	.long	0x27350B89
+	.long	0x29E4129F
+	.long	0x2C9FB4D9
+	.long	0x2F684BDB
+	.long	0x323E34A3
+	.long	0x3521CFB3
+	.long	0x38138139
+	.long	0x3B13B13C
+	.long	0x3E22CBCF
+	.long	0x41414142
+	.long	0x446F8657
+	.long	0x47AE147B
+	.long	0x4AFD6A06
+	.long	0x4E5E0A73
+	.long	0x51D07EAF
+	.long	0x55555556
+	.long	0x58ED2309
+	.long	0x5C9882BA
+	.long	0x60581606
+	.long	0x642C8591
+	.long	0x68168169
+	.long	0x6C16C16D
+	.long	0x702E05C1
+	.long	0x745D1746
+	.long	0x78A4C818
+	.long	0x7D05F418
+	.long	0x81818182
+	.long	0x86186187
+	.long	0x8ACB90F7
+	.long	0x8F9C18FA
+	.long	0x948B0FCE
+	.long	0x9999999A
+	.long	0x9EC8E952
+	.long	0xA41A41A5
+	.long	0xA98EF607
+	.long	0xAF286BCB
+	.long	0xB4E81B4F
+	.long	0xBACF914D
+	.long	0xC0E07039
+	.long	0xC71C71C8
+	.long	0xCD856891
+	.long	0xD41D41D5
+	.long	0xDAE6076C
+	.long	0xE1E1E1E2
+	.long	0xE9131AC0
+	.long	0xF07C1F08
+	.long	0xF81F81F9
+	.long	0x0
+	.long	0x4104105
+	.long	0x8421085
+	.long	0xC9714FC
+	.long	0x11111112
+	.long	0x15B1E5F8
+	.long	0x1A7B9612
+	.long	0x1F7047DD
+	.long	0x24924925
+	.long	0x29E4129F
+	.long	0x2F684BDB
+	.long	0x3521CFB3
+	.long	0x3B13B13C
+	.long	0x41414142
+	.long	0x47AE147B
+	.long	0x4E5E0A73
+	.long	0x55555556
+	.long	0x5C9882BA
+	.long	0x642C8591
+	.long	0x6C16C16D
+	.long	0x745D1746
+	.long	0x7D05F418
+	.long	0x86186187
+	.long	0x8F9C18FA
+	.long	0x9999999A
+	.long	0xA41A41A5
+	.long	0xAF286BCB
+	.long	0xBACF914D
+	.long	0xC71C71C8
+	.long	0xD41D41D5
+	.long	0xE1E1E1E2
+	.long	0xF07C1F08
+	.long	0x0
+	.long	0x8421085
+	.long	0x11111112
+	.long	0x1A7B9612
+	.long	0x24924925
+	.long	0x2F684BDB
+	.long	0x3B13B13C
+	.long	0x47AE147B
+	.long	0x55555556
+	.long	0x642C8591
+	.long	0x745D1746
+	.long	0x86186187
+	.long	0x9999999A
+	.long	0xAF286BCB
+	.long	0xC71C71C8
+	.long	0xE1E1E1E2
+	.long	0x0
+	.long	0x11111112
+	.long	0x24924925
+	.long	0x3B13B13C
+	.long	0x55555556
+	.long	0x745D1746
+	.long	0x9999999A
+	.long	0xC71C71C8
+	.long	0x0
+	.long	0x24924925
+	.long	0x55555556
+	.long	0x9999999A
+	.long	0x0
+	.long	0x55555556
+	.long	0x0
+	.long	0x0
+LOCAL(div_table_inv):
+	.long	0x0
+	.long	0x0
+	.long	0x0
+	.long	0x55555556
+	.long	0x0
+	.long	0x9999999A
+	.long	0x55555556
+	.long	0x24924925
+	.long	0x0
+	.long	0xC71C71C8
+	.long	0x9999999A
+	.long	0x745D1746
+	.long	0x55555556
+	.long	0x3B13B13C
+	.long	0x24924925
+	.long	0x11111112
+	.long	0x0
+	.long	0xE1E1E1E2
+	.long	0xC71C71C8
+	.long	0xAF286BCB
+	.long	0x9999999A
+	.long	0x86186187
+	.long	0x745D1746
+	.long	0x642C8591
+	.long	0x55555556
+	.long	0x47AE147B
+	.long	0x3B13B13C
+	.long	0x2F684BDB
+	.long	0x24924925
+	.long	0x1A7B9612
+	.long	0x11111112
+	.long	0x8421085
+	.long	0x0
+	.long	0xF07C1F08
+	.long	0xE1E1E1E2
+	.long	0xD41D41D5
+	.long	0xC71C71C8
+	.long	0xBACF914D
+	.long	0xAF286BCB
+	.long	0xA41A41A5
+	.long	0x9999999A
+	.long	0x8F9C18FA
+	.long	0x86186187
+	.long	0x7D05F418
+	.long	0x745D1746
+	.long	0x6C16C16D
+	.long	0x642C8591
+	.long	0x5C9882BA
+	.long	0x55555556
+	.long	0x4E5E0A73
+	.long	0x47AE147B
+	.long	0x41414142
+	.long	0x3B13B13C
+	.long	0x3521CFB3
+	.long	0x2F684BDB
+	.long	0x29E4129F
+	.long	0x24924925
+	.long	0x1F7047DD
+	.long	0x1A7B9612
+	.long	0x15B1E5F8
+	.long	0x11111112
+	.long	0xC9714FC
+	.long	0x8421085
+	.long	0x4104105
+	.long	0x0
+	.long	0xF81F81F9
+	.long	0xF07C1F08
+	.long	0xE9131AC0
+	.long	0xE1E1E1E2
+	.long	0xDAE6076C
+	.long	0xD41D41D5
+	.long	0xCD856891
+	.long	0xC71C71C8
+	.long	0xC0E07039
+	.long	0xBACF914D
+	.long	0xB4E81B4F
+	.long	0xAF286BCB
+	.long	0xA98EF607
+	.long	0xA41A41A5
+	.long	0x9EC8E952
+	.long	0x9999999A
+	.long	0x948B0FCE
+	.long	0x8F9C18FA
+	.long	0x8ACB90F7
+	.long	0x86186187
+	.long	0x81818182
+	.long	0x7D05F418
+	.long	0x78A4C818
+	.long	0x745D1746
+	.long	0x702E05C1
+	.long	0x6C16C16D
+	.long	0x68168169
+	.long	0x642C8591
+	.long	0x60581606
+	.long	0x5C9882BA
+	.long	0x58ED2309
+	.long	0x55555556
+	.long	0x51D07EAF
+	.long	0x4E5E0A73
+	.long	0x4AFD6A06
+	.long	0x47AE147B
+	.long	0x446F8657
+	.long	0x41414142
+	.long	0x3E22CBCF
+	.long	0x3B13B13C
+	.long	0x38138139
+	.long	0x3521CFB3
+	.long	0x323E34A3
+	.long	0x2F684BDB
+	.long	0x2C9FB4D9
+	.long	0x29E4129F
+	.long	0x27350B89
+	.long	0x24924925
+	.long	0x21FB7813
+	.long	0x1F7047DD
+	.long	0x1CF06ADB
+	.long	0x1A7B9612
+	.long	0x18118119
+	.long	0x15B1E5F8
+	.long	0x135C8114
+	.long	0x11111112
+	.long	0xECF56BF
+	.long	0xC9714FC
+	.long	0xA6810A7
+	.long	0x8421085
+	.long	0x624DD30
+	.long	0x4104105
+	.long	0x2040811
+	/* maximum error: 0.987342 scaled: 0.921875*/
+
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+#endif /* !__SHMEDIA__ */
diff --git a/gcc/config/sh/lib1funcs-Os-4-200.asm b/gcc/config/sh/lib1funcs-Os-4-200.asm
new file mode 100644
index 000000000..aae57ccd3
--- /dev/null
+++ b/gcc/config/sh/lib1funcs-Os-4-200.asm
@@ -0,0 +1,322 @@
+/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Moderately Space-optimized libgcc routines for the Renesas SH /
+   STMicroelectronics ST40 CPUs.
+   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
+
+#include "lib1funcs.h"
+
+#if !__SHMEDIA__
+#ifdef L_udivsi3_i4i
+
+/* 88 bytes; sh4-200 cycle counts:
+   divisor  >= 2G: 11 cycles
+   dividend <  2G: 48 cycles
+   dividend >= 2G: divisor != 1: 54 cycles
+   dividend >= 2G, divisor == 1: 22 cycles */
+#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in r0, clobber r1
+
+	.global GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	mova L1,r0
+	cmp/pz r5
+	sts fpscr,r1
+	lds.l @r0+,fpscr
+	sts.l fpul,@-r15
+	bf LOCAL(huge_divisor)
+	mov.l r1,@-r15
+	lds r4,fpul
+	cmp/pz r4
+#ifdef FMOVD_WORKS
+	fmov.d dr0,@-r15
+	float fpul,dr0
+	fmov.d dr2,@-r15
+	bt LOCAL(dividend_adjusted)
+	mov #1,r1
+	fmov.d @r0,dr2
+	cmp/eq r1,r5
+	bt LOCAL(div_by_1)
+	fadd dr2,dr0
+LOCAL(dividend_adjusted):
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+LOCAL(div_by_1):
+	fmov.d @r15+,dr2
+	ftrc dr0,fpul
+	fmov.d @r15+,dr0
+#else /* !FMOVD_WORKS */
+	fmov.s DR01,@-r15
+	mov #1,r1
+	fmov.s DR00,@-r15
+	float fpul,dr0
+	fmov.s DR21,@-r15
+	bt/s LOCAL(dividend_adjusted)
+	fmov.s DR20,@-r15
+	cmp/eq r1,r5
+	bt LOCAL(div_by_1)
+	fmov.s @r0+,DR20
+	fmov.s @r0,DR21
+	fadd dr2,dr0
+LOCAL(dividend_adjusted):
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+LOCAL(div_by_1):
+	fmov.s @r15+,DR20
+	fmov.s @r15+,DR21
+	ftrc dr0,fpul
+	fmov.s @r15+,DR00
+	fmov.s @r15+,DR01
+#endif /* !FMOVD_WORKS */
+	lds.l @r15+,fpscr
+	sts fpul,r0
+	rts
+	lds.l @r15+,fpul
+
+#ifdef FMOVD_WORKS
+	.p2align 3        ! make double below 8 byte aligned.
+#endif
+LOCAL(huge_divisor):
+	lds r1,fpscr
+	add #4,r15
+	cmp/hs r5,r4
+	rts
+	movt r0
+
+	.p2align 2
+L1:
+#ifndef FMOVD_WORKS
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+	.double 4294967296
+
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+#elif !defined (__sh1__)  /* !__SH_FPU_DOUBLE__ */
+
+#if 0
+/* With 36 bytes, the following would probably be the most compact
+   implementation, but with 139 cycles on an sh4-200, it is extremely slow.  */
+GLOBAL(udivsi3_i4i):
+	mov.l r2,@-r15
+	mov #0,r1
+	div0u
+	mov r1,r2
+	mov.l r3,@-r15
+	mov r1,r3
+	sett
+	mov r4,r0
+LOCAL(loop):
+	rotcr r2
+	;
+	bt/s LOCAL(end)
+	cmp/gt r2,r3
+	rotcl r0
+	bra LOCAL(loop)
+	div1 r5,r1
+LOCAL(end):
+	rotcl r0
+	mov.l @r15+,r3
+	rts
+	mov.l @r15+,r2
+#endif /* 0 */
+
+/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
+   sh4-200 run times:
+   udiv small divisor: 55 cycles
+   udiv large divisor: 52 cycles
+   sdiv small divisor, positive result: 59 cycles
+   sdiv large divisor, positive result: 56 cycles
+   sdiv small divisor, negative result: 65 cycles (*)
+   sdiv large divisor, negative result: 62 cycles (*)
+   (*): r2 is restored in the rts delay slot and has a lingering latency
+        of two more cycles.  */
+	.balign 4
+	.global	GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+	FUNC(GLOBAL(sdivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	sts pr,r1
+	mov.l r4,@-r15
+	extu.w r5,r0
+	cmp/eq r5,r0
+	swap.w r4,r0
+	shlr16 r4
+	bf/s LOCAL(large_divisor)
+	div0u
+	mov.l r5,@-r15
+	shll16 r5
+LOCAL(sdiv_small_divisor):
+	div1 r5,r4
+	bsr LOCAL(div6)
+	div1 r5,r4
+	div1 r5,r4
+	bsr LOCAL(div6)
+	div1 r5,r4
+	xtrct r4,r0
+	xtrct r0,r4
+	bsr LOCAL(div7)
+	swap.w r4,r4
+	div1 r5,r4
+	bsr LOCAL(div7)
+	div1 r5,r4
+	xtrct r4,r0
+	mov.l @r15+,r5
+	swap.w r0,r0
+	mov.l @r15+,r4
+	jmp @r1
+	rotcl r0
+LOCAL(div7):
+	div1 r5,r4
+LOCAL(div6):
+	            div1 r5,r4; div1 r5,r4; div1 r5,r4
+	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
+
+LOCAL(divx3):
+	rotcl r0
+	div1 r5,r4
+	rotcl r0
+	div1 r5,r4
+	rotcl r0
+	rts
+	div1 r5,r4
+
+LOCAL(large_divisor):
+	mov.l r5,@-r15
+LOCAL(sdiv_large_divisor):
+	xor r4,r0
+	.rept 4
+	rotcl r0
+	bsr LOCAL(divx3)
+	div1 r5,r4
+	.endr
+	mov.l @r15+,r5
+	mov.l @r15+,r4
+	jmp @r1
+	rotcl r0
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+
+	.global	GLOBAL(sdivsi3_i4i)
+GLOBAL(sdivsi3_i4i):
+	mov.l r4,@-r15
+	cmp/pz r5
+	mov.l r5,@-r15
+	bt/s LOCAL(pos_divisor)
+	cmp/pz r4
+	neg r5,r5
+	extu.w r5,r0
+	bt/s LOCAL(neg_result)
+	cmp/eq r5,r0
+	neg r4,r4
+LOCAL(pos_result):
+	swap.w r4,r0
+	bra LOCAL(sdiv_check_divisor)
+	sts pr,r1
+LOCAL(pos_divisor):
+	extu.w r5,r0
+	bt/s LOCAL(pos_result)
+	cmp/eq r5,r0
+	neg r4,r4
+LOCAL(neg_result):
+	mova LOCAL(negate_result),r0
+	;
+	mov r0,r1
+	swap.w r4,r0
+	lds r2,macl
+	sts pr,r2
+LOCAL(sdiv_check_divisor):
+	shlr16 r4
+	bf/s LOCAL(sdiv_large_divisor)
+	div0u
+	bra LOCAL(sdiv_small_divisor)
+	shll16 r5
+	.balign 4
+LOCAL(negate_result):
+	neg r0,r0
+	jmp @r2
+	sts macl,r2
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* !__SH_FPU_DOUBLE__ */
+#endif /* L_udivsi3_i4i */
+
+#ifdef L_sdivsi3_i4i
+#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
+/* 48 bytes, 45 cycles on sh4-200  */
+!! args in r4 and r5, result in r0, clobber r1
+
+	.global GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(sdivsi3_i4i))
+GLOBAL(sdivsi3_i4i):
+	sts.l fpscr,@-r15
+	sts fpul,r1
+	mova L1,r0
+	lds.l @r0+,fpscr
+	lds r4,fpul
+#ifdef FMOVD_WORKS
+	fmov.d dr0,@-r15
+	float fpul,dr0
+	lds r5,fpul
+	fmov.d dr2,@-r15
+#else
+	fmov.s DR01,@-r15
+	fmov.s DR00,@-r15
+	float fpul,dr0
+	lds r5,fpul
+	fmov.s DR21,@-r15
+	fmov.s DR20,@-r15
+#endif
+	float fpul,dr2
+	fdiv dr2,dr0
+#ifdef FMOVD_WORKS
+	fmov.d @r15+,dr2
+#else
+	fmov.s @r15+,DR20
+	fmov.s @r15+,DR21
+#endif
+	ftrc dr0,fpul
+#ifdef FMOVD_WORKS
+	fmov.d @r15+,dr0
+#else
+	fmov.s @r15+,DR00
+	fmov.s @r15+,DR01
+#endif
+	lds.l @r15+,fpscr
+	sts fpul,r0
+	rts
+	lds r1,fpul
+
+	.p2align 2
+L1:
+#ifndef FMOVD_WORKS
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* __SH_FPU_DOUBLE__ */
+#endif /* L_sdivsi3_i4i */
+#endif /* !__SHMEDIA__ */
diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm
new file mode 100644
index 000000000..2f0ca16cd
--- /dev/null
+++ b/gcc/config/sh/lib1funcs.asm
@@ -0,0 +1,3933 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+!! libgcc routines for the Renesas / SuperH SH CPUs.
+!! Contributed by Steve Chamberlain.
+!! sac@cygnus.com
+
+!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
+!! recoded in assembly by Toshiyasu Morita
+!! tm@netcom.com
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",%progbits
+.previous
+#endif
+
+/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
+   ELF local label prefixes by J"orn Rennecke
+   amylaar@cygnus.com  */
+
+#include "lib1funcs.h"
+
+/* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
+   so it is more convenient to define NO_FPSCR_VALUES here than to
+   define it on the command line.  */
+#if defined __vxworks && defined __PIC__
+#define NO_FPSCR_VALUES
+#endif
+	
+#if ! __SH5__
+#ifdef L_ashiftrt
+	.global	GLOBAL(ashiftrt_r4_0)
+	.global	GLOBAL(ashiftrt_r4_1)
+	.global	GLOBAL(ashiftrt_r4_2)
+	.global	GLOBAL(ashiftrt_r4_3)
+	.global	GLOBAL(ashiftrt_r4_4)
+	.global	GLOBAL(ashiftrt_r4_5)
+	.global	GLOBAL(ashiftrt_r4_6)
+	.global	GLOBAL(ashiftrt_r4_7)
+	.global	GLOBAL(ashiftrt_r4_8)
+	.global	GLOBAL(ashiftrt_r4_9)
+	.global	GLOBAL(ashiftrt_r4_10)
+	.global	GLOBAL(ashiftrt_r4_11)
+	.global	GLOBAL(ashiftrt_r4_12)
+	.global	GLOBAL(ashiftrt_r4_13)
+	.global	GLOBAL(ashiftrt_r4_14)
+	.global	GLOBAL(ashiftrt_r4_15)
+	.global	GLOBAL(ashiftrt_r4_16)
+	.global	GLOBAL(ashiftrt_r4_17)
+	.global	GLOBAL(ashiftrt_r4_18)
+	.global	GLOBAL(ashiftrt_r4_19)
+	.global	GLOBAL(ashiftrt_r4_20)
+	.global	GLOBAL(ashiftrt_r4_21)
+	.global	GLOBAL(ashiftrt_r4_22)
+	.global	GLOBAL(ashiftrt_r4_23)
+	.global	GLOBAL(ashiftrt_r4_24)
+	.global	GLOBAL(ashiftrt_r4_25)
+	.global	GLOBAL(ashiftrt_r4_26)
+	.global	GLOBAL(ashiftrt_r4_27)
+	.global	GLOBAL(ashiftrt_r4_28)
+	.global	GLOBAL(ashiftrt_r4_29)
+	.global	GLOBAL(ashiftrt_r4_30)
+	.global	GLOBAL(ashiftrt_r4_31)
+	.global	GLOBAL(ashiftrt_r4_32)
+
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
+	HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
+
+	.align	1
+GLOBAL(ashiftrt_r4_32):
+GLOBAL(ashiftrt_r4_31):
+	rotcl	r4
+	rts
+	subc	r4,r4
+
+GLOBAL(ashiftrt_r4_30):
+	shar	r4
+GLOBAL(ashiftrt_r4_29):
+	shar	r4
+GLOBAL(ashiftrt_r4_28):
+	shar	r4
+GLOBAL(ashiftrt_r4_27):
+	shar	r4
+GLOBAL(ashiftrt_r4_26):
+	shar	r4
+GLOBAL(ashiftrt_r4_25):
+	shar	r4
+GLOBAL(ashiftrt_r4_24):
+	shlr16	r4
+	shlr8	r4
+	rts
+	exts.b	r4,r4
+
+GLOBAL(ashiftrt_r4_23):
+	shar	r4
+GLOBAL(ashiftrt_r4_22):
+	shar	r4
+GLOBAL(ashiftrt_r4_21):
+	shar	r4
+GLOBAL(ashiftrt_r4_20):
+	shar	r4
+GLOBAL(ashiftrt_r4_19):
+	shar	r4
+GLOBAL(ashiftrt_r4_18):
+	shar	r4
+GLOBAL(ashiftrt_r4_17):
+	shar	r4
+GLOBAL(ashiftrt_r4_16):
+	shlr16	r4
+	rts
+	exts.w	r4,r4
+
+GLOBAL(ashiftrt_r4_15):
+	shar	r4
+GLOBAL(ashiftrt_r4_14):
+	shar	r4
+GLOBAL(ashiftrt_r4_13):
+	shar	r4
+GLOBAL(ashiftrt_r4_12):
+	shar	r4
+GLOBAL(ashiftrt_r4_11):
+	shar	r4
+GLOBAL(ashiftrt_r4_10):
+	shar	r4
+GLOBAL(ashiftrt_r4_9):
+	shar	r4
+GLOBAL(ashiftrt_r4_8):
+	shar	r4
+GLOBAL(ashiftrt_r4_7):
+	shar	r4
+GLOBAL(ashiftrt_r4_6):
+	shar	r4
+GLOBAL(ashiftrt_r4_5):
+	shar	r4
+GLOBAL(ashiftrt_r4_4):
+	shar	r4
+GLOBAL(ashiftrt_r4_3):
+	shar	r4
+GLOBAL(ashiftrt_r4_2):
+	shar	r4
+GLOBAL(ashiftrt_r4_1):
+	rts
+	shar	r4
+
+GLOBAL(ashiftrt_r4_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashiftrt_r4_0))
+	ENDFUNC(GLOBAL(ashiftrt_r4_1))
+	ENDFUNC(GLOBAL(ashiftrt_r4_2))
+	ENDFUNC(GLOBAL(ashiftrt_r4_3))
+	ENDFUNC(GLOBAL(ashiftrt_r4_4))
+	ENDFUNC(GLOBAL(ashiftrt_r4_5))
+	ENDFUNC(GLOBAL(ashiftrt_r4_6))
+	ENDFUNC(GLOBAL(ashiftrt_r4_7))
+	ENDFUNC(GLOBAL(ashiftrt_r4_8))
+	ENDFUNC(GLOBAL(ashiftrt_r4_9))
+	ENDFUNC(GLOBAL(ashiftrt_r4_10))
+	ENDFUNC(GLOBAL(ashiftrt_r4_11))
+	ENDFUNC(GLOBAL(ashiftrt_r4_12))
+	ENDFUNC(GLOBAL(ashiftrt_r4_13))
+	ENDFUNC(GLOBAL(ashiftrt_r4_14))
+	ENDFUNC(GLOBAL(ashiftrt_r4_15))
+	ENDFUNC(GLOBAL(ashiftrt_r4_16))
+	ENDFUNC(GLOBAL(ashiftrt_r4_17))
+	ENDFUNC(GLOBAL(ashiftrt_r4_18))
+	ENDFUNC(GLOBAL(ashiftrt_r4_19))
+	ENDFUNC(GLOBAL(ashiftrt_r4_20))
+	ENDFUNC(GLOBAL(ashiftrt_r4_21))
+	ENDFUNC(GLOBAL(ashiftrt_r4_22))
+	ENDFUNC(GLOBAL(ashiftrt_r4_23))
+	ENDFUNC(GLOBAL(ashiftrt_r4_24))
+	ENDFUNC(GLOBAL(ashiftrt_r4_25))
+	ENDFUNC(GLOBAL(ashiftrt_r4_26))
+	ENDFUNC(GLOBAL(ashiftrt_r4_27))
+	ENDFUNC(GLOBAL(ashiftrt_r4_28))
+	ENDFUNC(GLOBAL(ashiftrt_r4_29))
+	ENDFUNC(GLOBAL(ashiftrt_r4_30))
+	ENDFUNC(GLOBAL(ashiftrt_r4_31))
+	ENDFUNC(GLOBAL(ashiftrt_r4_32))
+#endif
+
+#ifdef L_ashiftrt_n
+
+!
+! GLOBAL(ashrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+
+	.global	GLOBAL(ashrsi3)
+	HIDDEN_FUNC(GLOBAL(ashrsi3))
+	.align	2
+GLOBAL(ashrsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashrsi3_table):
+	.byte		LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
+	.byte		LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
+
+LOCAL(ashrsi3_31):
+	rotcl	r0
+	rts
+	subc	r0,r0
+
+LOCAL(ashrsi3_30):
+	shar	r0
+LOCAL(ashrsi3_29):
+	shar	r0
+LOCAL(ashrsi3_28):
+	shar	r0
+LOCAL(ashrsi3_27):
+	shar	r0
+LOCAL(ashrsi3_26):
+	shar	r0
+LOCAL(ashrsi3_25):
+	shar	r0
+LOCAL(ashrsi3_24):
+	shlr16	r0
+	shlr8	r0
+	rts
+	exts.b	r0,r0
+
+LOCAL(ashrsi3_23):
+	shar	r0
+LOCAL(ashrsi3_22):
+	shar	r0
+LOCAL(ashrsi3_21):
+	shar	r0
+LOCAL(ashrsi3_20):
+	shar	r0
+LOCAL(ashrsi3_19):
+	shar	r0
+LOCAL(ashrsi3_18):
+	shar	r0
+LOCAL(ashrsi3_17):
+	shar	r0
+LOCAL(ashrsi3_16):
+	shlr16	r0
+	rts
+	exts.w	r0,r0
+
+LOCAL(ashrsi3_15):
+	shar	r0
+LOCAL(ashrsi3_14):
+	shar	r0
+LOCAL(ashrsi3_13):
+	shar	r0
+LOCAL(ashrsi3_12):
+	shar	r0
+LOCAL(ashrsi3_11):
+	shar	r0
+LOCAL(ashrsi3_10):
+	shar	r0
+LOCAL(ashrsi3_9):
+	shar	r0
+LOCAL(ashrsi3_8):
+	shar	r0
+LOCAL(ashrsi3_7):
+	shar	r0
+LOCAL(ashrsi3_6):
+	shar	r0
+LOCAL(ashrsi3_5):
+	shar	r0
+LOCAL(ashrsi3_4):
+	shar	r0
+LOCAL(ashrsi3_3):
+	shar	r0
+LOCAL(ashrsi3_2):
+	shar	r0
+LOCAL(ashrsi3_1):
+	rts
+	shar	r0
+
+LOCAL(ashrsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashrsi3))
+#endif
+
+#ifdef L_ashiftlt
+
+!
+! GLOBAL(ashlsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+	.global	GLOBAL(ashlsi3)
+	HIDDEN_FUNC(GLOBAL(ashlsi3))
+	.align	2
+GLOBAL(ashlsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(ashlsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(ashlsi3_table):
+	.byte		LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
+	.byte		LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
+
+LOCAL(ashlsi3_6):
+	shll2	r0
+LOCAL(ashlsi3_4):
+	shll2	r0
+LOCAL(ashlsi3_2):
+	rts
+	shll2	r0
+
+LOCAL(ashlsi3_7):
+	shll2	r0
+LOCAL(ashlsi3_5):
+	shll2	r0
+LOCAL(ashlsi3_3):
+	shll2	r0
+LOCAL(ashlsi3_1):
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_14):
+	shll2	r0
+LOCAL(ashlsi3_12):
+	shll2	r0
+LOCAL(ashlsi3_10):
+	shll2	r0
+LOCAL(ashlsi3_8):
+	rts
+	shll8	r0
+
+LOCAL(ashlsi3_15):
+	shll2	r0
+LOCAL(ashlsi3_13):
+	shll2	r0
+LOCAL(ashlsi3_11):
+	shll2	r0
+LOCAL(ashlsi3_9):
+	shll8	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_22):
+	shll2	r0
+LOCAL(ashlsi3_20):
+	shll2	r0
+LOCAL(ashlsi3_18):
+	shll2	r0
+LOCAL(ashlsi3_16):
+	rts
+	shll16	r0
+
+LOCAL(ashlsi3_23):
+	shll2	r0
+LOCAL(ashlsi3_21):
+	shll2	r0
+LOCAL(ashlsi3_19):
+	shll2	r0
+LOCAL(ashlsi3_17):
+	shll16	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_30):
+	shll2	r0
+LOCAL(ashlsi3_28):
+	shll2	r0
+LOCAL(ashlsi3_26):
+	shll2	r0
+LOCAL(ashlsi3_24):
+	shll16	r0
+	rts
+	shll8	r0
+
+LOCAL(ashlsi3_31):
+	shll2	r0
+LOCAL(ashlsi3_29):
+	shll2	r0
+LOCAL(ashlsi3_27):
+	shll2	r0
+LOCAL(ashlsi3_25):
+	shll16	r0
+	shll8	r0
+	rts
+	shll	r0
+
+LOCAL(ashlsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(ashlsi3))
+#endif
+
+#ifdef L_lshiftrt
+
+!
+! GLOBAL(lshrsi3)
+!
+! Entry:
+!
+! r4: Value to shift
+! r5: Shifts
+!
+! Exit:
+!
+! r0: Result
+!
+! Destroys:
+!
+! (none)
+!
+	.global	GLOBAL(lshrsi3)
+	HIDDEN_FUNC(GLOBAL(lshrsi3))
+	.align	2
+GLOBAL(lshrsi3):
+	mov	#31,r0
+	and	r0,r5
+	mova	LOCAL(lshrsi3_table),r0
+	mov.b	@(r0,r5),r5
+#ifdef __sh1__
+	add	r5,r0
+	jmp	@r0
+#else
+	braf	r5
+#endif
+	mov	r4,r0
+
+	.align	2
+LOCAL(lshrsi3_table):
+	.byte		LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
+	.byte		LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
+
+LOCAL(lshrsi3_6):
+	shlr2	r0
+LOCAL(lshrsi3_4):
+	shlr2	r0
+LOCAL(lshrsi3_2):
+	rts
+	shlr2	r0
+
+LOCAL(lshrsi3_7):
+	shlr2	r0
+LOCAL(lshrsi3_5):
+	shlr2	r0
+LOCAL(lshrsi3_3):
+	shlr2	r0
+LOCAL(lshrsi3_1):
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_14):
+	shlr2	r0
+LOCAL(lshrsi3_12):
+	shlr2	r0
+LOCAL(lshrsi3_10):
+	shlr2	r0
+LOCAL(lshrsi3_8):
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi3_15):
+	shlr2	r0
+LOCAL(lshrsi3_13):
+	shlr2	r0
+LOCAL(lshrsi3_11):
+	shlr2	r0
+LOCAL(lshrsi3_9):
+	shlr8	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_22):
+	shlr2	r0
+LOCAL(lshrsi3_20):
+	shlr2	r0
+LOCAL(lshrsi3_18):
+	shlr2	r0
+LOCAL(lshrsi3_16):
+	rts
+	shlr16	r0
+
+LOCAL(lshrsi3_23):
+	shlr2	r0
+LOCAL(lshrsi3_21):
+	shlr2	r0
+LOCAL(lshrsi3_19):
+	shlr2	r0
+LOCAL(lshrsi3_17):
+	shlr16	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_30):
+	shlr2	r0
+LOCAL(lshrsi3_28):
+	shlr2	r0
+LOCAL(lshrsi3_26):
+	shlr2	r0
+LOCAL(lshrsi3_24):
+	shlr16	r0
+	rts
+	shlr8	r0
+
+LOCAL(lshrsi3_31):
+	shlr2	r0
+LOCAL(lshrsi3_29):
+	shlr2	r0
+LOCAL(lshrsi3_27):
+	shlr2	r0
+LOCAL(lshrsi3_25):
+	shlr16	r0
+	shlr8	r0
+	rts
+	shlr	r0
+
+LOCAL(lshrsi3_0):
+	rts
+	nop
+
+	ENDFUNC(GLOBAL(lshrsi3))
+#endif
+
+#ifdef L_movmem
+	.text
+	.balign	4
+	.global	GLOBAL(movmem)
+	HIDDEN_FUNC(GLOBAL(movmem))
+	HIDDEN_ALIAS(movstr,movmem)
+	/* This would be a lot simpler if r6 contained the byte count
+	   minus 64, and we wouldn't be called here for a byte count of 64.  */
+GLOBAL(movmem):
+	sts.l	pr,@-r15
+	shll2	r6
+	bsr	GLOBAL(movmemSI52+2)
+	mov.l	@(48,r5),r0
+	.balign	4
+LOCAL(movmem_loop): /* Reached with rts */
+	mov.l	@(60,r5),r0
+	add	#-64,r6
+	mov.l	r0,@(60,r4)
+	tst	r6,r6
+	mov.l	@(56,r5),r0
+	bt	LOCAL(movmem_done)
+	mov.l	r0,@(56,r4)
+	cmp/pl	r6
+	mov.l	@(52,r5),r0
+	add	#64,r5
+	mov.l	r0,@(52,r4)
+	add	#64,r4
+	bt	GLOBAL(movmemSI52)
+! done all the large groups, do the remainder
+! jump to movmem+
+	mova	GLOBAL(movmemSI4)+4,r0
+	add	r6,r0
+	jmp	@r0
+LOCAL(movmem_done): ! share slot insn, works out aligned.
+	lds.l	@r15+,pr
+	mov.l	r0,@(56,r4)
+	mov.l	@(52,r5),r0
+	rts
+	mov.l	r0,@(52,r4)
+	.balign	4
+! ??? We need aliases movstr* for movmem* for the older libraries.  These
+! aliases will be removed at the some point in the future.
+	.global	GLOBAL(movmemSI64)
+	HIDDEN_FUNC(GLOBAL(movmemSI64))
+	HIDDEN_ALIAS(movstrSI64,movmemSI64)
+GLOBAL(movmemSI64):
+	mov.l	@(60,r5),r0
+	mov.l	r0,@(60,r4)
+	.global	GLOBAL(movmemSI60)
+	HIDDEN_FUNC(GLOBAL(movmemSI60))
+	HIDDEN_ALIAS(movstrSI60,movmemSI60)
+GLOBAL(movmemSI60):
+	mov.l	@(56,r5),r0
+	mov.l	r0,@(56,r4)
+	.global	GLOBAL(movmemSI56)
+	HIDDEN_FUNC(GLOBAL(movmemSI56))
+	HIDDEN_ALIAS(movstrSI56,movmemSI56)
+GLOBAL(movmemSI56):
+	mov.l	@(52,r5),r0
+	mov.l	r0,@(52,r4)
+	.global	GLOBAL(movmemSI52)
+	HIDDEN_FUNC(GLOBAL(movmemSI52))
+	HIDDEN_ALIAS(movstrSI52,movmemSI52)
+GLOBAL(movmemSI52):
+	mov.l	@(48,r5),r0
+	mov.l	r0,@(48,r4)
+	.global	GLOBAL(movmemSI48)
+	HIDDEN_FUNC(GLOBAL(movmemSI48))
+	HIDDEN_ALIAS(movstrSI48,movmemSI48)
+GLOBAL(movmemSI48):
+	mov.l	@(44,r5),r0
+	mov.l	r0,@(44,r4)
+	.global	GLOBAL(movmemSI44)
+	HIDDEN_FUNC(GLOBAL(movmemSI44))
+	HIDDEN_ALIAS(movstrSI44,movmemSI44)
+GLOBAL(movmemSI44):
+	mov.l	@(40,r5),r0
+	mov.l	r0,@(40,r4)
+	.global	GLOBAL(movmemSI40)
+	HIDDEN_FUNC(GLOBAL(movmemSI40))
+	HIDDEN_ALIAS(movstrSI40,movmemSI40)
+GLOBAL(movmemSI40):
+	mov.l	@(36,r5),r0
+	mov.l	r0,@(36,r4)
+	.global	GLOBAL(movmemSI36)
+	HIDDEN_FUNC(GLOBAL(movmemSI36))
+	HIDDEN_ALIAS(movstrSI36,movmemSI36)
+GLOBAL(movmemSI36):
+	mov.l	@(32,r5),r0
+	mov.l	r0,@(32,r4)
+	.global	GLOBAL(movmemSI32)
+	HIDDEN_FUNC(GLOBAL(movmemSI32))
+	HIDDEN_ALIAS(movstrSI32,movmemSI32)
+GLOBAL(movmemSI32):
+	mov.l	@(28,r5),r0
+	mov.l	r0,@(28,r4)
+	.global	GLOBAL(movmemSI28)
+	HIDDEN_FUNC(GLOBAL(movmemSI28))
+	HIDDEN_ALIAS(movstrSI28,movmemSI28)
+GLOBAL(movmemSI28):
+	mov.l	@(24,r5),r0
+	mov.l	r0,@(24,r4)
+	.global	GLOBAL(movmemSI24)
+	HIDDEN_FUNC(GLOBAL(movmemSI24))
+	HIDDEN_ALIAS(movstrSI24,movmemSI24)
+GLOBAL(movmemSI24):
+	mov.l	@(20,r5),r0
+	mov.l	r0,@(20,r4)
+	.global	GLOBAL(movmemSI20)
+	HIDDEN_FUNC(GLOBAL(movmemSI20))
+	HIDDEN_ALIAS(movstrSI20,movmemSI20)
+GLOBAL(movmemSI20):
+	mov.l	@(16,r5),r0
+	mov.l	r0,@(16,r4)
+	.global	GLOBAL(movmemSI16)
+	HIDDEN_FUNC(GLOBAL(movmemSI16))
+	HIDDEN_ALIAS(movstrSI16,movmemSI16)
+GLOBAL(movmemSI16):
+	mov.l	@(12,r5),r0
+	mov.l	r0,@(12,r4)
+	.global	GLOBAL(movmemSI12)
+	HIDDEN_FUNC(GLOBAL(movmemSI12))
+	HIDDEN_ALIAS(movstrSI12,movmemSI12)
+GLOBAL(movmemSI12):
+	mov.l	@(8,r5),r0
+	mov.l	r0,@(8,r4)
+	.global	GLOBAL(movmemSI8)
+	HIDDEN_FUNC(GLOBAL(movmemSI8))
+	HIDDEN_ALIAS(movstrSI8,movmemSI8)
+GLOBAL(movmemSI8):
+	mov.l	@(4,r5),r0
+	mov.l	r0,@(4,r4)
+	.global	GLOBAL(movmemSI4)
+	HIDDEN_FUNC(GLOBAL(movmemSI4))
+	HIDDEN_ALIAS(movstrSI4,movmemSI4)
+GLOBAL(movmemSI4):
+	mov.l	@(0,r5),r0
+	rts
+	mov.l	r0,@(0,r4)
+
+	ENDFUNC(GLOBAL(movmemSI64))
+	ENDFUNC(GLOBAL(movmemSI60))
+	ENDFUNC(GLOBAL(movmemSI56))
+	ENDFUNC(GLOBAL(movmemSI52))
+	ENDFUNC(GLOBAL(movmemSI48))
+	ENDFUNC(GLOBAL(movmemSI44))
+	ENDFUNC(GLOBAL(movmemSI40))
+	ENDFUNC(GLOBAL(movmemSI36))
+	ENDFUNC(GLOBAL(movmemSI32))
+	ENDFUNC(GLOBAL(movmemSI28))
+	ENDFUNC(GLOBAL(movmemSI24))
+	ENDFUNC(GLOBAL(movmemSI20))
+	ENDFUNC(GLOBAL(movmemSI16))
+	ENDFUNC(GLOBAL(movmemSI12))
+	ENDFUNC(GLOBAL(movmemSI8))
+	ENDFUNC(GLOBAL(movmemSI4))
+	ENDFUNC(GLOBAL(movmem))
+#endif
+
+#ifdef L_movmem_i4
+	.text
+	.global	GLOBAL(movmem_i4_even)
+	.global	GLOBAL(movmem_i4_odd)
+	.global	GLOBAL(movmemSI12_i4)
+
+	HIDDEN_FUNC(GLOBAL(movmem_i4_even))
+	HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
+	HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
+
+	HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
+	HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
+	HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
+
+	.p2align	5
+L_movmem_2mod4_end:
+	mov.l	r0,@(16,r4)
+	rts
+	mov.l	r1,@(20,r4)
+
+	.p2align	2
+
+GLOBAL(movmem_i4_even):
+	mov.l	@r5+,r0
+	bra	L_movmem_start_even
+	mov.l	@r5+,r1
+
+GLOBAL(movmem_i4_odd):
+	mov.l	@r5+,r1
+	add	#-4,r4
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r1,@(4,r4)
+	mov.l	r2,@(8,r4)
+
+L_movmem_loop:
+	mov.l	r3,@(12,r4)
+	dt	r6
+	mov.l	@r5+,r0
+	bt/s	L_movmem_2mod4_end
+	mov.l	@r5+,r1
+	add	#16,r4
+L_movmem_start_even:
+	mov.l	@r5+,r2
+	mov.l	@r5+,r3
+	mov.l	r0,@r4
+	dt	r6
+	mov.l	r1,@(4,r4)
+	bf/s	L_movmem_loop
+	mov.l	r2,@(8,r4)
+	rts
+	mov.l	r3,@(12,r4)
+
+	ENDFUNC(GLOBAL(movmem_i4_even))
+	ENDFUNC(GLOBAL(movmem_i4_odd))
+
+	.p2align	4
+GLOBAL(movmemSI12_i4):
+	mov.l	@r5,r0
+	mov.l	@(4,r5),r1
+	mov.l	@(8,r5),r2
+	mov.l	r0,@r4
+	mov.l	r1,@(4,r4)
+	rts
+	mov.l	r2,@(8,r4)
+
+	ENDFUNC(GLOBAL(movmemSI12_i4))
+#endif
+
+#ifdef L_mulsi3
+
+
+	.global	GLOBAL(mulsi3)
+	HIDDEN_FUNC(GLOBAL(mulsi3))
+
+! r4 =       aabb
+! r5 =       ccdd
+! r0 = aabb*ccdd  via partial products
+!
+! if aa == 0 and cc = 0
+! r0 = bb*dd
+!
+! else
+! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
+!
+
+GLOBAL(mulsi3):
+	mulu.w  r4,r5		! multiply the lsws  macl=bb*dd
+	mov     r5,r3		! r3 = ccdd
+	swap.w  r4,r2		! r2 = bbaa
+	xtrct   r2,r3		! r3 = aacc
+	tst  	r3,r3		! msws zero ?
+	bf      hiset
+	rts			! yes - then we have the answer
+	sts     macl,r0
+
+hiset:	sts	macl,r0		! r0 = bb*dd
+	mulu.w	r2,r5		! brewing macl = aa*dd
+	sts	macl,r1
+	mulu.w	r3,r4		! brewing macl = cc*bb
+	sts	macl,r2
+	add	r1,r2
+	shll16	r2
+	rts
+	add	r2,r0
+
+	ENDFUNC(GLOBAL(mulsi3))
+#endif
+#endif /* ! __SH5__ */
+#ifdef L_sdivsi3_i4
+	.title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber dr0, dr2
+
+	.global	GLOBAL(sdivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+	ENDFUNC(GLOBAL(sdivsi3_i4))
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
+!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
+
+#if ! __SH5__ || __SH5__ == 32
+#if __SH5__
+	.mode	SHcompact
+#endif
+	.global	GLOBAL(sdivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
+GLOBAL(sdivsi3_i4):
+	sts.l fpscr,@-r15
+	mov #8,r2
+	swap.w r2,r2
+	lds r2,fpscr
+	lds r4,fpul
+	float fpul,dr0
+	lds r5,fpul
+	float fpul,dr2
+	fdiv dr2,dr0
+	ftrc dr0,fpul
+	rts
+	lds.l @r15+,fpscr
+
+	ENDFUNC(GLOBAL(sdivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_sdivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+!!
+!! Steve Chamberlain
+!! sac@cygnus.com
+!!
+!!
+
+!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
+
+	.global	GLOBAL(sdivsi3)
+#if __SHMEDIA__
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns divsi3_i1 and
+   divsi3_i1_media.
+	
+int __sdivsi3 (i, j)
+     int i, j;
+{
+  register unsigned long long r18 asm ("r18");
+  register unsigned long long r19 asm ("r19");
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r1 asm ("r1") = 1;
+  register int r2 asm ("r2") = i >> 31;
+  register int r3 asm ("r3") = j >> 31;
+
+  r2 = r2 ? r2 : r1;
+  r3 = r3 ? r3 : r1;
+  r18 = i * r2;
+  r19 = j * r3;
+  r2 *= r3;
+  
+  r19 <<= 31;
+  r1 <<= 31;
+  do
+    if (r18 >= r19)
+      r0 |= r1, r18 -= r19;
+  while (r19 >>= 1, r1 >>= 1);
+
+  return r2 * (int)r0;
+}
+*/
+GLOBAL(sdivsi3):
+	pt/l	LOCAL(sdivsi3_dontadd), tr2
+	pt/l	LOCAL(sdivsi3_loop), tr1
+	ptabs/l	r18, tr0
+	movi	0, r0
+	movi	1, r1
+	shari.l	r4, 31, r2
+	shari.l	r5, 31, r3
+	cmveq	r2, r1, r2
+	cmveq	r3, r1, r3
+	muls.l	r4, r2, r18
+	muls.l	r5, r3, r19
+	muls.l	r2, r3, r2
+	shlli	r19, 31, r19
+	shlli	r1, 31, r1
+LOCAL(sdivsi3_loop):
+	bgtu	r19, r18, tr2
+	or	r0, r1, r0
+	sub	r18, r19, r18
+LOCAL(sdivsi3_dontadd):
+	shlri	r1, 1, r1
+	shlri	r19, 1, r19
+	bnei	r1, 0, tr1
+	muls.l	r0, r2, r0
+	add.l	r0, r63, r0
+	blink	tr0, r63
+#elif 0 /* ! 0 */
+ // inputs: r4,r5
+ // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
+ // result in r0
+GLOBAL(sdivsi3):
+ // can create absolute value without extra latency,
+ // but dependent on proper sign extension of inputs:
+ // shari.l r5,31,r2
+ // xor r5,r2,r20
+ // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ shari.l r5,31,r2
+ ori r2,1,r2
+ muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
+ movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
+ shari.l r4,31,r3
+ nsb r20,r0
+ shlld r20,r0,r25
+ shlri r25,48,r25
+ sub r19,r25,r1
+ mmulfx.w r1,r1,r2
+ mshflo.w r1,r63,r1
+ // If r4 was to be used in-place instead of r21, could use this sequence
+ // to compute absolute:
+ // sub r63,r4,r19 // compute absolute value of r4
+ // shlri r4,32,r3 // into lower 32 bit of r4, keeping
+ // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
+ ori r3,1,r3
+ mmulfx.w r25,r2,r2
+ sub r19,r0,r0
+ muls.l r4,r3,r21
+ msub.w r1,r2,r2
+ addi r2,-2,r1
+ mulu.l r21,r1,r19
+ mmulfx.w r2,r2,r2
+ shlli r1,15,r1
+ shlrd r19,r0,r19
+ mulu.l r19,r20,r3
+ mmacnfx.wl r25,r2,r1
+ ptabs r18,tr0
+ sub r21,r3,r25
+
+ mulu.l r25,r1,r2
+ addi r0,14,r0
+ xor r4,r5,r18
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ add r19,r2,r19
+ shari.l r18,31,r18
+ sub r25,r3,r25
+
+ mulu.l r25,r1,r2
+ sub r25,r20,r25
+ add r19,r18,r19
+ shlrd r2,r0,r2
+ mulu.l r2,r20,r3
+ addi r25,1,r25
+ add r19,r2,r19
+
+ cmpgt r25,r3,r25
+ add.l r19,r25,r0
+ xor r0,r18,r0
+ blink tr0,r63
+#else /* ! 0 && ! 0 */
+
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+	HIDDEN_FUNC(GLOBAL(sdivsi3_2))
+#ifndef __pic__
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3): /* this is the shcompact entry point */
+ // The special SHmedia entry point sdivsi3_1 prevents accidental linking
+ // with the SHcompact implementation, which clobbers tr1 / tr2.
+ .global GLOBAL(sdivsi3_1)
+GLOBAL(sdivsi3_1):
+ .global GLOBAL(div_table_internal)
+ movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
+ shori GLOBAL(div_table_internal) & 65535, r20
+#endif
+ .global GLOBAL(sdivsi3_2)
+ // div_table in r20
+ // clobbered: r1,r18,r19,r21,r25,tr0
+GLOBAL(sdivsi3_2):
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+#ifndef __pic__
+	ENDFUNC(GLOBAL(sdivsi3))
+#endif
+	ENDFUNC(GLOBAL(sdivsi3_2))
+#endif
+#elif defined __SHMEDIA__
+/* m5compact-nofpu */
+ // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+	pt/l LOCAL(sdivsi3_dontsub), tr0
+	pt/l LOCAL(sdivsi3_loop), tr1
+	ptabs/l r18,tr2
+	shari.l r4,31,r18
+	shari.l r5,31,r19
+	xor r4,r18,r20
+	xor r5,r19,r21
+	sub.l r20,r18,r20
+	sub.l r21,r19,r21
+	xor r18,r19,r19
+	shlli r21,32,r25
+	addi r25,-1,r21
+	addz.l r20,r63,r20
+LOCAL(sdivsi3_loop):
+	shlli r20,1,r20
+	bgeu/u r21,r20,tr0
+	sub r20,r21,r20
+LOCAL(sdivsi3_dontsub):
+	addi.l r25,-1,r25
+	bnei r25,-32,tr1
+	xor r20,r19,r20
+	sub.l r20,r19,r0
+	blink tr2,r63
+	ENDFUNC(GLOBAL(sdivsi3))
+#else /* ! __SHMEDIA__ */
+	FUNC(GLOBAL(sdivsi3))
+GLOBAL(sdivsi3):
+	mov	r4,r1
+	mov	r5,r0
+
+	tst	r0,r0
+	bt	div0
+	mov	#0,r2
+	div0s	r2,r1
+	subc	r3,r3
+	subc	r2,r1
+	div0s	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	div1	r0,r3
+	rotcl	r1
+	addc	r2,r1
+	rts
+	mov	r1,r0
+
+
+div0:	rts
+	mov	#0,r0
+
+	ENDFUNC(GLOBAL(sdivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* ! __SH4__ */
+#endif
+#ifdef L_udivsi3_i4
+
+	.title "SH DIVIDE"
+!! 4 byte integer Divide code for the Renesas SH
+#ifdef __SH4__
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
+!! and t bit
+
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	mov #1,r1
+	cmp/hi r1,r5
+	bf trivial
+	rotr r1
+	xor r1,r4
+	lds r4,fpul
+	mova L1,r0
+#ifdef FMOVD_WORKS
+	fmov.d @r0+,dr4
+#else
+	fmov.s @r0+,DR40
+	fmov.s @r0,DR41
+#endif
+	float fpul,dr0
+	xor r1,r5
+	lds r5,fpul
+	float fpul,dr2
+	fadd dr4,dr0
+	fadd dr4,dr2
+	fdiv dr2,dr0
+	rts
+	ftrc dr0,fpul
+
+trivial:
+	rts
+	lds r4,fpul
+
+	.align 2
+#ifdef FMOVD_WORKS
+	.align 3	! make double below 8 byte aligned.
+#endif
+L1:
+	.double 2147483648
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
+#if ! __SH5__ || __SH5__ == 32
+!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
+	.mode	SHmedia
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	addz.l	r4,r63,r20
+	addz.l	r5,r63,r21
+	fmov.qd	r20,dr0
+	fmov.qd	r21,dr32
+	ptabs	r18,tr0
+	float.qd dr0,dr0
+	float.qd dr32,dr32
+	fdiv.d	dr0,dr32,dr0
+	ftrc.dq dr0,dr32
+	fmov.s fr33,fr32
+	blink tr0,r63
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH5__ || __SH5__ == 32 */
+#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
+
+	.global	GLOBAL(udivsi3_i4)
+	HIDDEN_FUNC(GLOBAL(udivsi3_i4))
+GLOBAL(udivsi3_i4):
+	mov #1,r1
+	cmp/hi r1,r5
+	bf trivial
+	sts.l fpscr,@-r15
+	mova L1,r0
+	lds.l @r0+,fpscr
+	rotr r1
+	xor r1,r4
+	lds r4,fpul
+#ifdef FMOVD_WORKS
+	fmov.d @r0+,dr4
+#else
+	fmov.s @r0+,DR40
+	fmov.s @r0,DR41
+#endif
+	float fpul,dr0
+	xor r1,r5
+	lds r5,fpul
+	float fpul,dr2
+	fadd dr4,dr0
+	fadd dr4,dr2
+	fdiv dr2,dr0
+	ftrc dr0,fpul
+	rts
+	lds.l @r15+,fpscr
+
+#ifdef FMOVD_WORKS
+	.align 3	! make double below 8 byte aligned.
+#endif
+trivial:
+	rts
+	lds r4,fpul
+
+	.align 2
+L1:
+#ifndef FMOVD_WORKS
+	.long 0x80000
+#else
+	.long 0x180000
+#endif
+	.double 2147483648
+
+	ENDFUNC(GLOBAL(udivsi3_i4))
+#endif /* ! __SH4__ */
+#endif
+
+#ifdef L_udivsi3
+/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
+   sh2e/sh3e code.  */
+#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
+
+!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
+	.global	GLOBAL(udivsi3)
+	HIDDEN_FUNC(GLOBAL(udivsi3))
+
+#if __SHMEDIA__
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	2
+#if 0
+/* The assembly code that follows is a hand-optimized version of the C
+   code that follows.  Note that the registers that are modified are
+   exactly those listed as clobbered in the patterns udivsi3_i1 and
+   udivsi3_i1_media.
+	
+unsigned 
+__udivsi3 (i, j)
+    unsigned i, j; 
+{
+  register unsigned long long r0 asm ("r0") = 0;
+  register unsigned long long r18 asm ("r18") = 1;
+  register unsigned long long r4 asm ("r4") = i;
+  register unsigned long long r19 asm ("r19") = j;
+
+  r19 <<= 31;
+  r18 <<= 31;
+  do
+    if (r4 >= r19)
+      r0 |= r18, r4 -= r19;
+  while (r19 >>= 1, r18 >>= 1);
+
+  return r0;
+}
+*/
+GLOBAL(udivsi3):
+	pt/l	LOCAL(udivsi3_dontadd), tr2
+	pt/l	LOCAL(udivsi3_loop), tr1
+	ptabs/l	r18, tr0
+	movi	0, r0
+	movi	1, r18
+	addz.l	r5, r63, r19
+	addz.l	r4, r63, r4
+	shlli	r19, 31, r19
+	shlli	r18, 31, r18
+LOCAL(udivsi3_loop):
+	bgtu	r19, r4, tr2
+	or	r0, r18, r0
+	sub	r4, r19, r4
+LOCAL(udivsi3_dontadd):
+	shlri	r18, 1, r18
+	shlri	r19, 1, r19
+	bnei	r18, 0, tr1
+	blink	tr0, r63
+#else
+GLOBAL(udivsi3):
+ // inputs: r4,r5
+ // clobbered: r18,r19,r20,r21,r22,r25,tr0
+ // result in r0.
+ addz.l r5,r63,r22
+ nsb r22,r0
+ shlld r22,r0,r25
+ shlri r25,48,r25
+ movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
+ sub r20,r25,r21
+ mmulfx.w r21,r21,r19
+ mshflo.w r21,r63,r21
+ ptabs r18,tr0
+ mmulfx.w r25,r19,r19
+ sub r20,r0,r0
+ /* bubble */
+ msub.w r21,r19,r19
+ addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
+		    before the msub.w, but we need a different value for
+		    r19 to keep errors under control.  */
+ mulu.l r4,r21,r18
+ mmulfx.w r19,r19,r19
+ shlli r21,15,r21
+ shlrd r18,r0,r18
+ mulu.l r18,r22,r20
+ mmacnfx.wl r25,r19,r21
+ /* bubble */
+ sub r4,r20,r25
+
+ mulu.l r25,r21,r19
+ addi r0,14,r0
+ /* bubble */
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ add r18,r19,r18
+ /* bubble */
+ sub.l r25,r20,r25
+
+ mulu.l r25,r21,r19
+ addz.l r25,r63,r25
+ sub r25,r22,r25
+ shlrd r19,r0,r19
+ mulu.l r19,r22,r20
+ addi r25,1,r25
+ add r18,r19,r18
+
+ cmpgt r25,r20,r25
+ add.l r18,r25,r0
+ blink tr0,r63
+#endif
+#elif defined (__SHMEDIA__)
+/* m5compact-nofpu - more emphasis on code size than on speed, but don't
+   ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
+   So use a short shmedia loop.  */
+ // clobbered: r20,r21,r25,tr0,tr1,tr2
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+GLOBAL(udivsi3):
+ pt/l LOCAL(udivsi3_dontsub), tr0
+ pt/l LOCAL(udivsi3_loop), tr1
+ ptabs/l r18,tr2
+ shlli r5,32,r25
+ addi r25,-1,r21
+ addz.l r4,r63,r20
+LOCAL(udivsi3_loop):
+ shlli r20,1,r20
+ bgeu/u r21,r20,tr0
+ sub r20,r21,r20
+LOCAL(udivsi3_dontsub):
+ addi.l r25,-1,r25
+ bnei r25,-32,tr1
+ add.l r20,r63,r0
+ blink tr2,r63
+#else /* ! defined (__SHMEDIA__) */
+LOCAL(div8):
+ div1 r5,r4
+LOCAL(div7):
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+LOCAL(divx4):
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ div1 r5,r4; rotcl r0
+ rts; div1 r5,r4
+
+GLOBAL(udivsi3):
+ sts.l pr,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+#ifdef __sh1__
+ bf LOCAL(large_divisor)
+#else
+ bf/s LOCAL(large_divisor)
+#endif
+ div0u
+ swap.w r4,r0
+ shlr16 r4
+ bsr LOCAL(div8)
+ shll16 r5
+ bsr LOCAL(div7)
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(div8)
+ swap.w r4,r4
+ bsr LOCAL(div7)
+ div1 r5,r4
+ lds.l @r15+,pr
+ xtrct r4,r0
+ swap.w r0,r0
+ rotcl r0
+ rts
+ shlr16 r5
+
+LOCAL(large_divisor):
+#ifdef __sh1__
+ div0u
+#endif
+ mov #0,r0
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ bsr LOCAL(divx4)
+ rotcl r0
+ lds.l @r15+,pr
+ rts
+ rotcl r0
+
+	ENDFUNC(GLOBAL(udivsi3))
+#endif /* ! __SHMEDIA__ */
+#endif /* __SH4__ */
+#endif /* L_udivsi3 */
+
+#ifdef L_udivdi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(udivdi3)
+	FUNC(GLOBAL(udivdi3))
+GLOBAL(udivdi3):
+	HIDDEN_ALIAS(udivdi3_internal,udivdi3)
+	shlri r3,1,r4
+	nsb r4,r22
+	shlld r3,r22,r6
+	shlri r6,49,r5
+	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+	sub r21,r5,r1
+	mmulfx.w r1,r1,r4
+	mshflo.w r1,r63,r1
+	sub r63,r22,r20 // r63 == 64 % 64
+	mmulfx.w r5,r4,r4
+	pta LOCAL(large_divisor),tr0
+	addi r20,32,r9
+	msub.w r1,r4,r1
+	madd.w r1,r1,r1
+	mmulfx.w r1,r1,r4
+	shlri r6,32,r7
+	bgt/u r9,r63,tr0 // large_divisor
+	mmulfx.w r5,r4,r4
+	shlri r2,32+14,r19
+	addi r22,-31,r0
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r19,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	mulu.l r5,r3,r8
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	shlld r8,r0,r8
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r2,r8,r2
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+	shlri r2,22,r21
+	mulu.l r21,r1,r21
+	shlld r5,r0,r8
+	addi r20,30-22,r0
+	shlrd r21,r0,r21
+	mulu.l r21,r3,r5
+	add r8,r21,r8
+	mcmpgt.l r21,r63,r21 // See Note 1
+	addi r20,30,r0
+	mshfhi.l r63,r21,r21
+	sub r2,r5,r2
+	andc r2,r21,r2
+
+	/* small divisor: need a third divide step */
+	mulu.l r2,r1,r7
+	ptabs r18,tr0
+	addi r2,1,r2
+	shlrd r7,r0,r7
+	mulu.l r7,r3,r5
+	add r8,r7,r8
+	sub r2,r3,r2
+	cmpgt r2,r5,r5
+	add r8,r5,r2
+	/* could test r3 here to check for divide by zero.  */
+	blink tr0,r63
+
+LOCAL(large_divisor):
+	mmulfx.w r5,r4,r4
+	shlrd r2,r9,r25
+	shlri r25,32,r8
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r8,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	shlri r5,14-1,r8
+	mulu.l r8,r7,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r25,r5,r25
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+	shlri r25,22,r21
+	mulu.l r21,r1,r21
+	pta LOCAL(no_lo_adj),tr0
+	addi r22,32,r0
+	shlri r21,40,r21
+	mulu.l r21,r7,r5
+	add r8,r21,r8
+	shlld r2,r0,r2
+	sub r25,r5,r25
+	bgtu/u r7,r25,tr0 // no_lo_adj
+	addi r8,1,r8
+	sub r25,r7,r25
+LOCAL(no_lo_adj):
+	mextr4 r2,r25,r2
+
+	/* large_divisor: only needs a few adjustments.  */
+	mulu.l r8,r6,r5
+	ptabs r18,tr0
+	/* bubble */
+	cmpgtu r5,r2,r5
+	sub r8,r5,r2
+	blink tr0,r63
+	ENDFUNC(GLOBAL(udivdi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_udivdi3 */
+
+#ifdef L_divdi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(divdi3)
+	FUNC(GLOBAL(divdi3))
+GLOBAL(divdi3):
+	pta GLOBAL(udivdi3_internal),tr0
+	shari r2,63,r22
+	shari r3,63,r23
+	xor r2,r22,r2
+	xor r3,r23,r3
+	sub r2,r22,r2
+	sub r3,r23,r3
+	beq/u r22,r23,tr0
+	ptabs r18,tr1
+	blink tr0,r18
+	sub r63,r2,r2
+	blink tr1,r63
+	ENDFUNC(GLOBAL(divdi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_divdi3 */
+
+#ifdef L_umoddi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(umoddi3)
+	FUNC(GLOBAL(umoddi3))
+GLOBAL(umoddi3):
+	HIDDEN_ALIAS(umoddi3_internal,umoddi3)
+	shlri r3,1,r4
+	nsb r4,r22
+	shlld r3,r22,r6
+	shlri r6,49,r5
+	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
+	sub r21,r5,r1
+	mmulfx.w r1,r1,r4
+	mshflo.w r1,r63,r1
+	sub r63,r22,r20 // r63 == 64 % 64
+	mmulfx.w r5,r4,r4
+	pta LOCAL(large_divisor),tr0
+	addi r20,32,r9
+	msub.w r1,r4,r1
+	madd.w r1,r1,r1
+	mmulfx.w r1,r1,r4
+	shlri r6,32,r7
+	bgt/u r9,r63,tr0 // large_divisor
+	mmulfx.w r5,r4,r4
+	shlri r2,32+14,r19
+	addi r22,-31,r0
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r19,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	mulu.l r5,r3,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	shlld r5,r0,r5
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r2,r5,r2
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
+
+	shlri r2,22,r21
+	mulu.l r21,r1,r21
+	addi r20,30-22,r0
+	/* bubble */ /* could test r3 here to check for divide by zero.  */
+	shlrd r21,r0,r21
+	mulu.l r21,r3,r5
+	mcmpgt.l r21,r63,r21 // See Note 1
+	addi r20,30,r0
+	mshfhi.l r63,r21,r21
+	sub r2,r5,r2
+	andc r2,r21,r2
+
+	/* small divisor: need a third divide step */
+	mulu.l r2,r1,r7
+	ptabs r18,tr0
+	sub r2,r3,r8 /* re-use r8 here for rest - r3 */
+	shlrd r7,r0,r7
+	mulu.l r7,r3,r5
+	/* bubble */
+	addi r8,1,r7
+	cmpgt r7,r5,r7
+	cmvne r7,r8,r2
+	sub r2,r5,r2
+	blink tr0,r63
+
+LOCAL(large_divisor):
+	mmulfx.w r5,r4,r4
+	shlrd r2,r9,r25
+	shlri r25,32,r8
+	msub.w r1,r4,r1
+
+	mulu.l r1,r7,r4
+	addi r1,-3,r5
+	mulu.l r5,r8,r5
+	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
+	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
+	                 the case may be, %0000000000000000 000.11111111111, still */
+	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
+	shlri r5,14-1,r8
+	mulu.l r8,r7,r5
+	mshalds.l r1,r21,r1
+	shari r4,26,r4
+	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
+	sub r25,r5,r25
+	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
+
+	shlri r25,22,r21
+	mulu.l r21,r1,r21
+	pta LOCAL(no_lo_adj),tr0
+	addi r22,32,r0
+	shlri r21,40,r21
+	mulu.l r21,r7,r5
+	add r8,r21,r8
+	shlld r2,r0,r2
+	sub r25,r5,r25
+	bgtu/u r7,r25,tr0 // no_lo_adj
+	addi r8,1,r8
+	sub r25,r7,r25
+LOCAL(no_lo_adj):
+	mextr4 r2,r25,r2
+
+	/* large_divisor: only needs a few adjustments.  */
+	mulu.l r8,r6,r5
+	ptabs r18,tr0
+	add r2,r6,r7
+	cmpgtu r5,r2,r8
+	cmvne r8,r7,r2
+	sub r2,r5,r2
+	shlrd r2,r22,r2
+	blink tr0,r63
+	ENDFUNC(GLOBAL(umoddi3))
+/* Note 1: To shift the result of the second divide stage so that the result
+   always fits into 32 bits, yet we still reduce the rest sufficiently
+   would require a lot of instructions to do the shifts just right.  Using
+   the full 64 bit shift result to multiply with the divisor would require
+   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
+   Fortunately, if the upper 32 bits of the shift result are nonzero, we
+   know that the rest after taking this partial result into account will
+   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
+   upper 32 bits of the partial result are nonzero.  */
+#endif /* __SHMEDIA__ */
+#endif /* L_umoddi3 */
+
+#ifdef L_moddi3
+#ifdef __SHMEDIA__
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(moddi3)
+	FUNC(GLOBAL(moddi3))
+GLOBAL(moddi3):
+	pta GLOBAL(umoddi3_internal),tr0
+	shari r2,63,r22
+	shari r3,63,r23
+	xor r2,r22,r2
+	xor r3,r23,r3
+	sub r2,r22,r2
+	sub r3,r23,r3
+	beq/u r22,r63,tr0
+	ptabs r18,tr1
+	blink tr0,r18
+	sub r63,r2,r2
+	blink tr1,r63
+	ENDFUNC(GLOBAL(moddi3))
+#endif /* __SHMEDIA__ */
+#endif /* L_moddi3 */
+
+#ifdef L_set_fpscr
+#if !defined (__SH2A_NOFPU__)
+#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
+#ifdef __SH5__
+	.mode	SHcompact
+#endif
+	.global GLOBAL(set_fpscr)
+	HIDDEN_FUNC(GLOBAL(set_fpscr))
+GLOBAL(set_fpscr):
+	lds r4,fpscr
+#ifdef __PIC__
+	mov.l	r12,@-r15
+#ifdef __vxworks
+	mov.l	LOCAL(set_fpscr_L0_base),r12
+	mov.l	LOCAL(set_fpscr_L0_index),r0
+	mov.l	@r12,r12
+	mov.l	@(r0,r12),r12
+#else
+	mova	LOCAL(set_fpscr_L0),r0
+	mov.l	LOCAL(set_fpscr_L0),r12
+	add	r0,r12
+#endif
+	mov.l	LOCAL(set_fpscr_L1),r0
+	mov.l	@(r0,r12),r1
+	mov.l	@r15+,r12
+#else
+	mov.l LOCAL(set_fpscr_L1),r1
+#endif
+	swap.w r4,r0
+	or #24,r0
+#ifndef FMOVD_WORKS
+	xor #16,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+	swap.w r0,r3
+	mov.l r3,@(4,r1)
+#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r2
+	mov.l r2,@r1
+#endif
+#ifndef FMOVD_WORKS
+	xor #8,r0
+#else
+	xor #24,r0
+#endif
+#if defined(__SH4__) || defined (__SH2A_DOUBLE__)
+	swap.w r0,r2
+	rts
+	mov.l r2,@r1
+#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
+	swap.w r0,r3
+	rts
+	mov.l r3,@(4,r1)
+#endif
+	.align 2
+#ifdef __PIC__
+#ifdef __vxworks
+LOCAL(set_fpscr_L0_base):
+	.long ___GOTT_BASE__
+LOCAL(set_fpscr_L0_index):
+	.long ___GOTT_INDEX__
+#else
+LOCAL(set_fpscr_L0):
+	.long _GLOBAL_OFFSET_TABLE_
+#endif
+LOCAL(set_fpscr_L1):
+	.long GLOBAL(fpscr_values@GOT)
+#else
+LOCAL(set_fpscr_L1):
+	.long GLOBAL(fpscr_values)
+#endif
+
+	ENDFUNC(GLOBAL(set_fpscr))
+#ifndef NO_FPSCR_VALUES
+#ifdef __ELF__
+        .comm   GLOBAL(fpscr_values),8,4
+#else
+        .comm   GLOBAL(fpscr_values),8
+#endif /* ELF */
+#endif /* NO_FPSCR_VALUES */
+#endif /* SH2E / SH3E / SH4 */
+#endif /* __SH2A_NOFPU__ */
+#endif /* L_set_fpscr */
+#ifdef L_ic_invalidate
+#if __SH5__ == 32
+	.mode	SHmedia
+	.section	.text..SHmedia32,"ax"
+	.align	2
+	.global	GLOBAL(init_trampoline)
+	HIDDEN_FUNC(GLOBAL(init_trampoline))
+GLOBAL(init_trampoline):
+	st.l	r0,8,r2
+#ifdef __LITTLE_ENDIAN__
+	movi	9,r20
+	shori	0x402b,r20
+	shori	0xd101,r20
+	shori	0xd002,r20
+#else
+	movi	0xffffffffffffd002,r20
+	shori	0xd101,r20
+	shori	0x402b,r20
+	shori	9,r20
+#endif
+	st.q	r0,0,r20
+	st.l	r0,12,r3
+	ENDFUNC(GLOBAL(init_trampoline))
+	.global	GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+	ocbwb	r0,0
+	synco
+	icbi	r0, 0
+	ptabs	r18, tr0
+	synci
+	blink	tr0, r63
+	ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4A__)
+	.global GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+	ocbwb	@r4
+	synco
+	icbi	@r4
+	rts
+	  nop
+	ENDFUNC(GLOBAL(ic_invalidate))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+	/* For system code, we use ic_invalidate_line_i, but user code
+	   needs a different mechanism.  A kernel call is generally not
+	   available, and it would also be slow.  Different SH4 variants use
+	   different sizes and associativities of the Icache.  We use a small
+	   bit of dispatch code that can be put hidden in every shared object,
+	   which calls the actual processor-specific invalidation code in a
+	   separate module.
+	   Or if you have operating system support, the OS could mmap the
+	   procesor-specific code from a single page, since it is highly
+	   repetitive.  */
+	.global GLOBAL(ic_invalidate)
+	HIDDEN_FUNC(GLOBAL(ic_invalidate))
+GLOBAL(ic_invalidate):
+#ifdef __pic__
+#ifdef __vxworks
+	mov.l	1f,r1
+	mov.l	2f,r0
+	mov.l	@r1,r1
+	mov.l	0f,r2
+	mov.l	@(r0,r1),r0
+#else
+	mov.l	1f,r1
+	mova	1f,r0
+	mov.l	0f,r2
+	add	r1,r0
+#endif
+	mov.l	@(r0,r2),r1
+#else
+	mov.l	0f,r1
+#endif
+	ocbwb	@r4
+	mov.l	@(8,r1),r0
+	sub	r1,r4
+	and	r4,r0
+	add	r1,r0
+	jmp	@r0
+	mov.l	@(4,r1),r0
+	.align	2
+#ifndef __pic__
+0:	.long   GLOBAL(ic_invalidate_array)
+#else /* __pic__ */
+	.global GLOBAL(ic_invalidate_array)
+0:	.long   GLOBAL(ic_invalidate_array)@GOT
+#ifdef __vxworks
+1:	.long	___GOTT_BASE__
+2:	.long	___GOTT_INDEX__
+#else
+1:	.long   _GLOBAL_OFFSET_TABLE_
+#endif
+	ENDFUNC(GLOBAL(ic_invalidate))
+#endif /* __pic__ */
+#endif /* SH4 */
+#endif /* L_ic_invalidate */
+
+#ifdef L_ic_invalidate_array
+#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
+	.global GLOBAL(ic_invalidate_array)
+	/* This is needed when an SH4 dso with trampolines is used on SH4A.  */
+	.global GLOBAL(ic_invalidate_array)
+	FUNC(GLOBAL(ic_invalidate_array))
+GLOBAL(ic_invalidate_array):
+	add	r1,r4
+	synco
+	icbi	@r4
+	rts
+	  nop
+	.align 2
+	.long	0
+	ENDFUNC(GLOBAL(ic_invalidate_array))
+#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
+	.global GLOBAL(ic_invalidate_array)
+	.p2align 5
+	FUNC(GLOBAL(ic_invalidate_array))
+/* This must be aligned to the beginning of a cache line.  */
+GLOBAL(ic_invalidate_array):
+#ifndef WAYS
+#define WAYS 4
+#define WAY_SIZE 0x4000
+#endif
+#if WAYS == 1
+	.rept	WAY_SIZE * WAYS / 32
+	rts
+	nop
+	.rept	7
+	.long	WAY_SIZE - 32
+	.endr
+	.endr
+#elif WAYS <= 6
+	.rept	WAY_SIZE * WAYS / 32
+	braf	r0
+	add	#-8,r0
+	.long	WAY_SIZE + 8
+	.long	WAY_SIZE - 32
+	.rept	WAYS-2
+	braf	r0
+	nop
+	.endr
+	.rept	7 - WAYS
+	rts
+	nop
+	.endr
+	.endr
+#else /* WAYS > 6 */
+	/* This variant needs two different pages for mmap-ing.  */
+ 	.rept	WAYS-1
+	.rept	WAY_SIZE / 32
+	braf	r0
+	nop
+	.long	WAY_SIZE
+	.rept 6
+	.long	WAY_SIZE - 32
+	.endr
+	.endr
+	.endr
+	.rept	WAY_SIZE / 32
+	rts
+	.rept	15
+	nop
+	.endr
+	.endr
+#endif /* WAYS */
+	ENDFUNC(GLOBAL(ic_invalidate_array))
+#endif /* SH4 */
+#endif /* L_ic_invalidate_array */
+
+#if defined (__SH5__) && __SH5__ == 32
+#ifdef L_shcompact_call_trampoline
+	.section	.rodata
+	.align	1
+LOCAL(ct_main_table):
+.word	LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
+.word	LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	
+     /* This function loads 64-bit general-purpose registers from the
+	stack, from a memory address contained in them or from an FP
+	register, according to a cookie passed in r1.  Its execution
+	time is linear on the number of registers that actually have
+	to be copied.  See sh.h for details on the actual bit pattern.
+
+	The function to be called is passed in r0.  If a 32-bit return
+	value is expected, the actual function will be tail-called,
+	otherwise the return address will be stored in r10 (that the
+	caller should expect to be clobbered) and the return value
+	will be expanded into r2/r3 upon return.  */
+	
+	.global	GLOBAL(GCC_shcompact_call_trampoline)
+	FUNC(GLOBAL(GCC_shcompact_call_trampoline))
+GLOBAL(GCC_shcompact_call_trampoline):
+	ptabs/l	r0, tr0	/* Prepare to call the actual function.  */
+	movi	((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
+	pt/l	LOCAL(ct_loop), tr1
+	addz.l	r1, r63, r1
+	shori	((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
+LOCAL(ct_loop):
+	nsb	r1, r28
+	shlli	r28, 1, r29
+	ldx.w	r0, r29, r30
+LOCAL(ct_main_label):
+	ptrel/l	r30, tr2
+	blink	tr2, r63
+LOCAL(ct_r2_fp):	/* Copy r2 from an FP register.  */
+	/* It must be dr0, so just do it.  */
+	fmov.dq	dr0, r2
+	movi	7, r30
+	shlli	r30, 29, r31
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r3_fp):	/* Copy r3 from an FP register.  */
+	/* It is either dr0 or dr2.  */
+	movi	7, r30
+	shlri	r1, 26, r32
+	shlli	r30, 26, r31
+	andc	r1, r31, r1
+	fmov.dq	dr0, r3
+	beqi/l	r32, 4, tr1
+	fmov.dq	dr2, r3
+	blink	tr1, r63
+LOCAL(ct_r4_fp):	/* Copy r4 from an FP register.  */
+	shlri	r1, 23 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
+LOCAL(ct_r4_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 23, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r4_fp_copy):
+	fmov.dq	dr0, r4
+	blink	tr1, r63
+	fmov.dq	dr2, r4
+	blink	tr1, r63
+	fmov.dq	dr4, r4
+	blink	tr1, r63
+LOCAL(ct_r5_fp):	/* Copy r5 from an FP register.  */
+	shlri	r1, 20 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
+LOCAL(ct_r5_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 20, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r5_fp_copy):
+	fmov.dq	dr0, r5
+	blink	tr1, r63
+	fmov.dq	dr2, r5
+	blink	tr1, r63
+	fmov.dq	dr4, r5
+	blink	tr1, r63
+	fmov.dq	dr6, r5
+	blink	tr1, r63
+LOCAL(ct_r6_fph):	/* Copy r6 from a high FP register.  */
+	/* It must be dr8.  */
+	fmov.dq	dr8, r6
+	movi	15, r30
+	shlli	r30, 16, r31
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r6_fpl):	/* Copy r6 from a low FP register.  */
+	shlri	r1, 16 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
+LOCAL(ct_r6_fp_base):
+	ptrel/l	r32, tr2
+	movi	7, r30
+	shlli	r30, 16, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r6_fp_copy):
+	fmov.dq	dr0, r6
+	blink	tr1, r63
+	fmov.dq	dr2, r6
+	blink	tr1, r63
+	fmov.dq	dr4, r6
+	blink	tr1, r63
+	fmov.dq	dr6, r6
+	blink	tr1, r63
+LOCAL(ct_r7_fph):	/* Copy r7 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 12, r31
+	shlri	r1, 12, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r7
+	beqi/l	r32, 8, tr1
+	fmov.dq	dr10, r7
+	blink	tr1, r63
+LOCAL(ct_r7_fpl):	/* Copy r7 from a low FP register.  */
+	shlri	r1, 12 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
+LOCAL(ct_r7_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 12, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r7_fp_copy):
+	fmov.dq	dr0, r7
+	blink	tr1, r63
+	fmov.dq	dr2, r7
+	blink	tr1, r63
+	fmov.dq	dr4, r7
+	blink	tr1, r63
+	fmov.dq	dr6, r7
+	blink	tr1, r63
+LOCAL(ct_r8_fph):	/* Copy r8 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 8, r31
+	andi	r1, 1 << 8, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r8
+	beq/l	r32, r63, tr1
+	fmov.dq	dr10, r8
+	blink	tr1, r63
+LOCAL(ct_r8_fpl):	/* Copy r8 from a low FP register.  */
+	shlri	r1, 8 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
+LOCAL(ct_r8_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 8, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r8_fp_copy):
+	fmov.dq	dr0, r8
+	blink	tr1, r63
+	fmov.dq	dr2, r8
+	blink	tr1, r63
+	fmov.dq	dr4, r8
+	blink	tr1, r63
+	fmov.dq	dr6, r8
+	blink	tr1, r63
+LOCAL(ct_r9_fph):	/* Copy r9 from a high FP register.  */
+	/* It is either dr8 or dr10.  */
+	movi	15 << 4, r31
+	andi	r1, 1 << 4, r32
+	andc	r1, r31, r1
+	fmov.dq	dr8, r9
+	beq/l	r32, r63, tr1
+	fmov.dq	dr10, r9
+	blink	tr1, r63
+LOCAL(ct_r9_fpl):	/* Copy r9 from a low FP register.  */
+	shlri	r1, 4 - 3, r34
+	andi	r34, 3 << 3, r33
+	addi	r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
+LOCAL(ct_r9_fp_base):
+	ptrel/l	r32, tr2
+	movi	7 << 4, r31
+	andc	r1, r31, r1
+	blink	tr2, r63
+LOCAL(ct_r9_fp_copy):
+	fmov.dq	dr0, r9
+	blink	tr1, r63
+	fmov.dq	dr2, r9
+	blink	tr1, r63
+	fmov.dq	dr4, r9
+	blink	tr1, r63
+	fmov.dq	dr6, r9
+	blink	tr1, r63
+LOCAL(ct_r2_ld):	/* Copy r2 from a memory address.  */
+	pt/l	LOCAL(ct_r2_load), tr2
+	movi	3, r30
+	shlli	r30, 29, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r2, 8, r3
+	ldx.q	r2, r63, r2
+	/* Fall through.  */
+LOCAL(ct_r3_ld):	/* Copy r3 from a memory address.  */
+	pt/l	LOCAL(ct_r3_load), tr2
+	movi	3, r30
+	shlli	r30, 26, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r3, 8, r4
+	ldx.q	r3, r63, r3
+LOCAL(ct_r4_ld):	/* Copy r4 from a memory address.  */
+	pt/l	LOCAL(ct_r4_load), tr2
+	movi	3, r30
+	shlli	r30, 23, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r4, 8, r5
+	ldx.q	r4, r63, r4
+LOCAL(ct_r5_ld):	/* Copy r5 from a memory address.  */
+	pt/l	LOCAL(ct_r5_load), tr2
+	movi	3, r30
+	shlli	r30, 20, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r5, 8, r6
+	ldx.q	r5, r63, r5
+LOCAL(ct_r6_ld):	/* Copy r6 from a memory address.  */
+	pt/l	LOCAL(ct_r6_load), tr2
+	movi	3 << 16, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r6, 8, r7
+	ldx.q	r6, r63, r6
+LOCAL(ct_r7_ld):	/* Copy r7 from a memory address.  */
+	pt/l	LOCAL(ct_r7_load), tr2
+	movi	3 << 12, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r7, 8, r8
+	ldx.q	r7, r63, r7
+LOCAL(ct_r8_ld):	/* Copy r8 from a memory address.  */
+	pt/l	LOCAL(ct_r8_load), tr2
+	movi	3 << 8, r31
+	and	r1, r31, r32
+	andc	r1, r31, r1
+	beq/l	r31, r32, tr2
+	addi.l	r8, 8, r9
+	ldx.q	r8, r63, r8
+LOCAL(ct_r9_ld):	/* Copy r9 from a memory address.  */
+	pt/l	LOCAL(ct_check_tramp), tr2
+	ldx.q	r9, r63, r9
+	blink	tr2, r63
+LOCAL(ct_r2_load):
+	ldx.q	r2, r63, r2
+	blink	tr1, r63
+LOCAL(ct_r3_load):
+	ldx.q	r3, r63, r3
+	blink	tr1, r63
+LOCAL(ct_r4_load):
+	ldx.q	r4, r63, r4
+	blink	tr1, r63
+LOCAL(ct_r5_load):
+	ldx.q	r5, r63, r5
+	blink	tr1, r63
+LOCAL(ct_r6_load):
+	ldx.q	r6, r63, r6
+	blink	tr1, r63
+LOCAL(ct_r7_load):
+	ldx.q	r7, r63, r7
+	blink	tr1, r63
+LOCAL(ct_r8_load):
+	ldx.q	r8, r63, r8
+	blink	tr1, r63
+LOCAL(ct_r2_pop):	/* Pop r2 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r2
+	shlli	r30, 29, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r3_pop):	/* Pop r3 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r3
+	shlli	r30, 26, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r4_pop):	/* Pop r4 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r4
+	shlli	r30, 23, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r5_pop):	/* Pop r5 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r5
+	shlli	r30, 20, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r6_pop):	/* Pop r6 from the stack.  */
+	movi	1, r30
+	ldx.q	r15, r63, r6
+	shlli	r30, 16, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r7_pop):	/* Pop r7 from the stack.  */
+	ldx.q	r15, r63, r7
+	movi	1 << 12, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_r8_pop):	/* Pop r8 from the stack.  */
+	ldx.q	r15, r63, r8
+	movi	1 << 8, r31
+	addi.l	r15, 8, r15
+	andc	r1, r31, r1
+	blink	tr1, r63
+LOCAL(ct_pop_seq):	/* Pop a sequence of registers off the stack.  */
+	andi	r1, 7 << 1, r30
+	movi	(LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
+	shlli	r30, 2, r31
+	shori	LOCAL(ct_end_of_pop_seq) & 65535, r32
+	sub.l	r32, r31, r33
+	ptabs/l	r33, tr2
+	blink	tr2, r63
+LOCAL(ct_start_of_pop_seq):	/* Beginning of pop sequence.  */
+	ldx.q	r15, r63, r3
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r4
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r5
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r6
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r7
+	addi.l	r15, 8, r15
+	ldx.q	r15, r63, r8
+	addi.l	r15, 8, r15
+LOCAL(ct_r9_pop):	/* Pop r9 from the stack.  */
+	ldx.q	r15, r63, r9
+	addi.l	r15, 8, r15
+LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction.  */
+LOCAL(ct_check_tramp):	/* Check whether we need a trampoline.  */
+	pt/u	LOCAL(ct_ret_wide), tr2
+	andi	r1, 1, r1
+	bne/u	r1, r63, tr2
+LOCAL(ct_call_func):	/* Just branch to the function.  */
+	blink	tr0, r63
+LOCAL(ct_ret_wide):	/* Call the function, so that we can unpack its 
+			   64-bit return value.  */
+	add.l	r18, r63, r10
+	blink	tr0, r18
+	ptabs	r10, tr0
+#if __LITTLE_ENDIAN__
+	shari	r2, 32, r3
+	add.l	r2, r63, r2
+#else
+	add.l	r2, r63, r3
+	shari	r2, 32, r2
+#endif
+	blink	tr0, r63
+
+	ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
+#endif /* L_shcompact_call_trampoline */
+
+#ifdef L_shcompact_return_trampoline
+     /* This function does the converse of the code in `ret_wide'
+	above.  It is tail-called by SHcompact functions returning
+	64-bit non-floating-point values, to pack the 32-bit values in
+	r2 and r3 into r2.  */
+
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	.global	GLOBAL(GCC_shcompact_return_trampoline)
+	HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
+GLOBAL(GCC_shcompact_return_trampoline):
+	ptabs/l	r18, tr0
+#if __LITTLE_ENDIAN__
+	addz.l	r2, r63, r2
+	shlli	r3, 32, r3
+#else
+	addz.l	r3, r63, r3
+	shlli	r2, 32, r2
+#endif
+	or	r3, r2, r2
+	blink	tr0, r63
+
+	ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
+#endif /* L_shcompact_return_trampoline */
+
+#ifdef L_shcompact_incoming_args
+	.section	.rodata
+	.align	1
+LOCAL(ia_main_table):
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
+.word	1 /* Invalid, just loop */
+.word	1 /* Invalid, just loop */
+.word	LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+.word	LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
+	.mode	SHmedia
+	.section	.text..SHmedia32, "ax"
+	.align	2
+	
+     /* This function stores 64-bit general-purpose registers back in
+	the stack, and loads the address in which each register
+	was stored into itself.  The lower 32 bits of r17 hold the address
+	to begin storing, and the upper 32 bits of r17 hold the cookie.
+	Its execution time is linear on the
+	number of registers that actually have to be copied, and it is
+	optimized for structures larger than 64 bits, as opposed to
+	individual `long long' arguments.  See sh.h for details on the
+	actual bit pattern.  */
+	
+	.global	GLOBAL(GCC_shcompact_incoming_args)
+ 	FUNC(GLOBAL(GCC_shcompact_incoming_args))
+GLOBAL(GCC_shcompact_incoming_args):
+	ptabs/l	r18, tr0	/* Prepare to return.  */
+	shlri	r17, 32, r0	/* Load the cookie.  */
+	movi	((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
+	pt/l	LOCAL(ia_loop), tr1
+	add.l	r17, r63, r17
+	shori	((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
+LOCAL(ia_loop):
+	nsb	r0, r36
+	shlli	r36, 1, r37
+	ldx.w	r43, r37, r38
+LOCAL(ia_main_label):
+	ptrel/l	r38, tr2
+	blink	tr2, r63
+LOCAL(ia_r2_ld):	/* Store r2 and load its address.  */
+	movi	3, r38
+	shlli	r38, 29, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r2
+	add.l	r17, r63, r2
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r3_ld):	/* Store r3 and load its address.  */
+	movi	3, r38
+	shlli	r38, 26, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r3
+	add.l	r17, r63, r3
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r4_ld):	/* Store r4 and load its address.  */
+	movi	3, r38
+	shlli	r38, 23, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r4
+	add.l	r17, r63, r4
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r5_ld):	/* Store r5 and load its address.  */
+	movi	3, r38
+	shlli	r38, 20, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r5
+	add.l	r17, r63, r5
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r6_ld):	/* Store r6 and load its address.  */
+	movi	3, r38
+	shlli	r38, 16, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r6
+	add.l	r17, r63, r6
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r7_ld):	/* Store r7 and load its address.  */
+	movi	3 << 12, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r7
+	add.l	r17, r63, r7
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r8_ld):	/* Store r8 and load its address.  */
+	movi	3 << 8, r39
+	and	r0, r39, r40
+	andc	r0, r39, r0
+	stx.q	r17, r63, r8
+	add.l	r17, r63, r8
+	addi.l	r17, 8, r17
+	beq/u	r39, r40, tr1
+LOCAL(ia_r9_ld):	/* Store r9 and load its address.  */
+	stx.q	r17, r63, r9
+	add.l	r17, r63, r9
+	blink	tr0, r63
+LOCAL(ia_r2_push):	/* Push r2 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 29, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r2
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r3_push):	/* Push r3 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 26, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r3
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r4_push):	/* Push r4 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 23, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r4
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r5_push):	/* Push r5 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 20, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r5
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r6_push):	/* Push r6 onto the stack.  */
+	movi	1, r38
+	shlli	r38, 16, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r6
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r7_push):	/* Push r7 onto the stack.  */
+	movi	1 << 12, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r7
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_r8_push):	/* Push r8 onto the stack.  */
+	movi	1 << 8, r39
+	andc	r0, r39, r0
+	stx.q	r17, r63, r8
+	addi.l	r17, 8, r17
+	blink	tr1, r63
+LOCAL(ia_push_seq):	/* Push a sequence of registers onto the stack.  */
+	andi	r0, 7 << 1, r38
+	movi	(LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
+	shlli	r38, 2, r39
+	shori	LOCAL(ia_end_of_push_seq) & 65535, r40
+	sub.l	r40, r39, r41
+	ptabs/l	r41, tr2
+	blink	tr2, r63
+LOCAL(ia_stack_of_push_seq):	 /* Beginning of push sequence.  */
+	stx.q	r17, r63, r3
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r4
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r5
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r6
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r7
+	addi.l	r17, 8, r17
+	stx.q	r17, r63, r8
+	addi.l	r17, 8, r17
+LOCAL(ia_r9_push):	/* Push r9 onto the stack.  */
+	stx.q	r17, r63, r9
+LOCAL(ia_return):	/* Return.  */
+	blink	tr0, r63
+LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction.  */
+	ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
+#endif /* L_shcompact_incoming_args */
+#endif
+#if __SH5__
+#ifdef L_nested_trampoline
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+	.align	3 /* It is copied in units of 8 bytes in SHmedia mode.  */
+	.global	GLOBAL(GCC_nested_trampoline)
+	HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
+GLOBAL(GCC_nested_trampoline):
+	.mode	SHmedia
+	ptrel/u	r63, tr0
+	gettr	tr0, r0
+#if __SH5__ == 64
+	ld.q	r0, 24, r1
+#else
+	ld.l	r0, 24, r1
+#endif
+	ptabs/l	r1, tr1
+#if __SH5__ == 64
+	ld.q	r0, 32, r1
+#else
+	ld.l	r0, 28, r1
+#endif
+	blink	tr1, r63
+
+	ENDFUNC(GLOBAL(GCC_nested_trampoline))
+#endif /* L_nested_trampoline */
+#endif /* __SH5__ */
+#if __SH5__ == 32
+#ifdef L_push_pop_shmedia_regs
+	.section	.text..SHmedia32,"ax"
+	.mode	SHmedia
+	.align	2
+#ifndef __SH4_NOFPU__	
+	.global	GLOBAL(GCC_push_shmedia_regs)
+	FUNC(GLOBAL(GCC_push_shmedia_regs))
+GLOBAL(GCC_push_shmedia_regs):
+	addi.l	r15, -14*8, r15
+	fst.d	r15, 13*8, dr62
+	fst.d	r15, 12*8, dr60
+	fst.d	r15, 11*8, dr58
+	fst.d	r15, 10*8, dr56
+	fst.d	r15,  9*8, dr54
+	fst.d	r15,  8*8, dr52
+	fst.d	r15,  7*8, dr50
+	fst.d	r15,  6*8, dr48
+	fst.d	r15,  5*8, dr46
+	fst.d	r15,  4*8, dr44
+	fst.d	r15,  3*8, dr42
+	fst.d	r15,  2*8, dr40
+	fst.d	r15,  1*8, dr38
+	fst.d	r15,  0*8, dr36
+#else /* ! __SH4_NOFPU__ */
+	.global	GLOBAL(GCC_push_shmedia_regs_nofpu)
+	FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+GLOBAL(GCC_push_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__ */
+	ptabs/l	r18, tr0
+	addi.l	r15, -27*8, r15
+	gettr	tr7, r62
+	gettr	tr6, r61
+	gettr	tr5, r60
+	st.q	r15, 26*8, r62
+	st.q	r15, 25*8, r61
+	st.q	r15, 24*8, r60
+	st.q	r15, 23*8, r59
+	st.q	r15, 22*8, r58
+	st.q	r15, 21*8, r57
+	st.q	r15, 20*8, r56
+	st.q	r15, 19*8, r55
+	st.q	r15, 18*8, r54
+	st.q	r15, 17*8, r53
+	st.q	r15, 16*8, r52
+	st.q	r15, 15*8, r51
+	st.q	r15, 14*8, r50
+	st.q	r15, 13*8, r49
+	st.q	r15, 12*8, r48
+	st.q	r15, 11*8, r47
+	st.q	r15, 10*8, r46
+	st.q	r15,  9*8, r45
+	st.q	r15,  8*8, r44
+	st.q	r15,  7*8, r35
+	st.q	r15,  6*8, r34
+	st.q	r15,  5*8, r33
+	st.q	r15,  4*8, r32
+	st.q	r15,  3*8, r31
+	st.q	r15,  2*8, r30
+	st.q	r15,  1*8, r29
+	st.q	r15,  0*8, r28
+	blink	tr0, r63
+#ifndef __SH4_NOFPU__	
+	ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
+#else
+	ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
+#endif
+#ifndef __SH4_NOFPU__	
+	.global	GLOBAL(GCC_pop_shmedia_regs)
+	FUNC(GLOBAL(GCC_pop_shmedia_regs))
+GLOBAL(GCC_pop_shmedia_regs):
+	pt	.L0, tr1
+	movi	41*8, r0
+	fld.d	r15, 40*8, dr62
+	fld.d	r15, 39*8, dr60
+	fld.d	r15, 38*8, dr58
+	fld.d	r15, 37*8, dr56
+	fld.d	r15, 36*8, dr54
+	fld.d	r15, 35*8, dr52
+	fld.d	r15, 34*8, dr50
+	fld.d	r15, 33*8, dr48
+	fld.d	r15, 32*8, dr46
+	fld.d	r15, 31*8, dr44
+	fld.d	r15, 30*8, dr42
+	fld.d	r15, 29*8, dr40
+	fld.d	r15, 28*8, dr38
+	fld.d	r15, 27*8, dr36
+	blink	tr1, r63
+#else /* ! __SH4_NOFPU__	*/
+	.global	GLOBAL(GCC_pop_shmedia_regs_nofpu)
+	FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+GLOBAL(GCC_pop_shmedia_regs_nofpu):
+#endif /* ! __SH4_NOFPU__	*/
+	movi	27*8, r0
+.L0:
+	ptabs	r18, tr0
+	ld.q	r15, 26*8, r62
+	ld.q	r15, 25*8, r61
+	ld.q	r15, 24*8, r60
+	ptabs	r62, tr7
+	ptabs	r61, tr6
+	ptabs	r60, tr5
+	ld.q	r15, 23*8, r59
+	ld.q	r15, 22*8, r58
+	ld.q	r15, 21*8, r57
+	ld.q	r15, 20*8, r56
+	ld.q	r15, 19*8, r55
+	ld.q	r15, 18*8, r54
+	ld.q	r15, 17*8, r53
+	ld.q	r15, 16*8, r52
+	ld.q	r15, 15*8, r51
+	ld.q	r15, 14*8, r50
+	ld.q	r15, 13*8, r49
+	ld.q	r15, 12*8, r48
+	ld.q	r15, 11*8, r47
+	ld.q	r15, 10*8, r46
+	ld.q	r15,  9*8, r45
+	ld.q	r15,  8*8, r44
+	ld.q	r15,  7*8, r35
+	ld.q	r15,  6*8, r34
+	ld.q	r15,  5*8, r33
+	ld.q	r15,  4*8, r32
+	ld.q	r15,  3*8, r31
+	ld.q	r15,  2*8, r30
+	ld.q	r15,  1*8, r29
+	ld.q	r15,  0*8, r28
+	add.l	r15, r0, r15
+	blink	tr0, r63
+
+#ifndef __SH4_NOFPU__
+	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
+#else
+	ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
+#endif
+#endif /* __SH5__ == 32 */
+#endif /* L_push_pop_shmedia_regs */
+
+#ifdef L_div_table
+#if __SH5__
+#if defined(__pic__) && defined(__SHMEDIA__)
+	.global	GLOBAL(sdivsi3)
+	FUNC(GLOBAL(sdivsi3))
+#if __SH5__ == 32
+	.section	.text..SHmedia32,"ax"
+#else
+	.text
+#endif
+#if 0
+/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
+   in a text section does not work (at least for shared libraries):
+   the linker sets the LSB of the address as if this was SHmedia code.  */
+#define TEXT_DATA_BUG
+#endif
+	.align	2
+ // inputs: r4,r5
+ // clobbered: r1,r18,r19,r20,r21,r25,tr0
+ // result in r0
+ .global GLOBAL(sdivsi3)
+GLOBAL(sdivsi3):
+#ifdef TEXT_DATA_BUG
+ ptb datalabel Local_div_table,tr0
+#else
+ ptb GLOBAL(div_table_internal),tr0
+#endif
+ nsb r5, r1
+ shlld r5, r1, r25    // normalize; [-2 ..1, 1..2) in s2.62
+ shari r25, 58, r21   // extract 5(6) bit index (s2.4 with hole -1..1)
+ /* bubble */
+ gettr tr0,r20
+ ldx.ub r20, r21, r19 // u0.8
+ shari r25, 32, r25   // normalize to s2.30
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+  ptabs r18, tr0
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+ muls.l r19, r19, r21 // u0.28
+  sub r63, r1, r1
+  addi r1, 92, r1
+ muls.l r25, r21, r18 // s2.58
+ shlli r19, 45, r19   // multiply by two and convert to s2.58
+  /* bubble */
+ sub r19, r18, r18
+ shari r18, 28, r18   // some 22 bit inverse in s1.30
+ muls.l r18, r25, r0  // s2.60
+  muls.l r18, r4, r25 // s32.30
+  /* bubble */
+ shari r0, 16, r19   // s-16.44
+ muls.l r19, r18, r19 // s-16.74
+  shari r25, 63, r0
+  shari r4, 14, r18   // s19.-14
+ shari r19, 30, r19   // s-16.44
+ muls.l r19, r18, r19 // s15.30
+  xor r21, r0, r21    // You could also use the constant 1 << 27.
+  add r21, r25, r21
+ sub r21, r19, r21
+ shard r21, r1, r21
+ sub r21, r0, r0
+ blink tr0, r63
+	ENDFUNC(GLOBAL(sdivsi3))
+/* This table has been generated by divtab.c .
+Defects for bias -330:
+   Max defect: 6.081536e-07 at -1.000000e+00
+   Min defect: 2.849516e-08 at 1.030651e+00
+   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
+   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
+   Defect at 1: 1.238659e-07
+   Defect at -2: 1.061708e-07 */
+#else /* ! __pic__ || ! __SHMEDIA__ */
+	.section	.rodata
+#endif /* __pic__ */
+#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
+	.balign 2
+	.type	Local_div_table,@object
+	.size	Local_div_table,128
+/* negative division constants */
+	.word	-16638
+	.word	-17135
+	.word	-17737
+	.word	-18433
+	.word	-19103
+	.word	-19751
+	.word	-20583
+	.word	-21383
+	.word	-22343
+	.word	-23353
+	.word	-24407
+	.word	-25582
+	.word	-26863
+	.word	-28382
+	.word	-29965
+	.word	-31800
+/* negative division factors */
+	.byte	66
+	.byte	70
+	.byte	75
+	.byte	81
+	.byte	87
+	.byte	93
+	.byte	101
+	.byte	109
+	.byte	119
+	.byte	130
+	.byte	142
+	.byte	156
+	.byte	172
+	.byte	192
+	.byte	214
+	.byte	241
+	.skip 16
+Local_div_table:
+	.skip 16
+/* positive division factors */
+	.byte	241
+	.byte	214
+	.byte	192
+	.byte	172
+	.byte	156
+	.byte	142
+	.byte	130
+	.byte	119
+	.byte	109
+	.byte	101
+	.byte	93
+	.byte	87
+	.byte	81
+	.byte	75
+	.byte	70
+	.byte	66
+/* positive division constants */
+	.word	31801
+	.word	29966
+	.word	28383
+	.word	26864
+	.word	25583
+	.word	24408
+	.word	23354
+	.word	22344
+	.word	21384
+	.word	20584
+	.word	19752
+	.word	19104
+	.word	18434
+	.word	17738
+	.word	17136
+	.word	16639
+	.section	.rodata
+#endif /* TEXT_DATA_BUG */
+	.balign 2
+	.type	GLOBAL(div_table),@object
+	.size	GLOBAL(div_table),128
+/* negative division constants */
+	.word	-16638
+	.word	-17135
+	.word	-17737
+	.word	-18433
+	.word	-19103
+	.word	-19751
+	.word	-20583
+	.word	-21383
+	.word	-22343
+	.word	-23353
+	.word	-24407
+	.word	-25582
+	.word	-26863
+	.word	-28382
+	.word	-29965
+	.word	-31800
+/* negative division factors */
+	.byte	66
+	.byte	70
+	.byte	75
+	.byte	81
+	.byte	87
+	.byte	93
+	.byte	101
+	.byte	109
+	.byte	119
+	.byte	130
+	.byte	142
+	.byte	156
+	.byte	172
+	.byte	192
+	.byte	214
+	.byte	241
+	.skip 16
+	.global	GLOBAL(div_table)
+GLOBAL(div_table):
+	HIDDEN_ALIAS(div_table_internal,div_table)
+	.skip 16
+/* positive division factors */
+	.byte	241
+	.byte	214
+	.byte	192
+	.byte	172
+	.byte	156
+	.byte	142
+	.byte	130
+	.byte	119
+	.byte	109
+	.byte	101
+	.byte	93
+	.byte	87
+	.byte	81
+	.byte	75
+	.byte	70
+	.byte	66
+/* positive division constants */
+	.word	31801
+	.word	29966
+	.word	28383
+	.word	26864
+	.word	25583
+	.word	24408
+	.word	23354
+	.word	22344
+	.word	21384
+	.word	20584
+	.word	19752
+	.word	19104
+	.word	18434
+	.word	17738
+	.word	17136
+	.word	16639
+
+#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
+/* This code used shld, thus is not suitable for SH1 / SH2.  */
+
+/* Signed / unsigned division without use of FPU, optimized for SH4.
+   Uses a lookup table for divisors in the range -128 .. +128, and
+   div1 with case distinction for larger divisors in three more ranges.
+   The code is lumped together with the table to allow the use of mova.  */
+#ifdef __LITTLE_ENDIAN__
+#define L_LSB 0
+#define L_LSWMSB 1
+#define L_MSWLSB 2
+#else
+#define L_LSB 3
+#define L_LSWMSB 2
+#define L_MSWLSB 1
+#endif
+
+	.balign 4
+	.global	GLOBAL(udivsi3_i4i)
+	FUNC(GLOBAL(udivsi3_i4i))
+GLOBAL(udivsi3_i4i):
+	mov.w LOCAL(c128_w), r1
+	div0u
+	mov r4,r0
+	shlr8 r0
+	cmp/hi r1,r5
+	extu.w r5,r1
+	bf LOCAL(udiv_le128)
+	cmp/eq r5,r1
+	bf LOCAL(udiv_ge64k)
+	shlr r0
+	mov r5,r1
+	shll16 r5
+	mov.l r4,@-r15
+	div1 r5,r0
+	mov.l r1,@-r15
+	div1 r5,r0
+	div1 r5,r0
+	bra LOCAL(udiv_25)
+	div1 r5,r0
+
+LOCAL(div_le128):
+	mova LOCAL(div_table_ix),r0
+	bra LOCAL(div_le128_2)
+	mov.b @(r0,r5),r1
+LOCAL(udiv_le128):
+	mov.l r4,@-r15
+	mova LOCAL(div_table_ix),r0
+	mov.b @(r0,r5),r1
+	mov.l r5,@-r15
+LOCAL(div_le128_2):
+	mova LOCAL(div_table_inv),r0
+	mov.l @(r0,r1),r1
+	mov r5,r0
+	tst #0xfe,r0
+	mova LOCAL(div_table_clz),r0
+	dmulu.l r1,r4
+	mov.b @(r0,r5),r1
+	bt/s LOCAL(div_by_1)
+	mov r4,r0
+	mov.l @r15+,r5
+	sts mach,r0
+	/* clrt */
+	addc r4,r0
+	mov.l @r15+,r4
+	rotcr r0
+	rts
+	shld r1,r0
+
+LOCAL(div_by_1_neg):
+	neg r4,r0
+LOCAL(div_by_1):
+	mov.l @r15+,r5
+	rts
+	mov.l @r15+,r4
+
+LOCAL(div_ge64k):
+	bt/s LOCAL(div_r8)
+	div0u
+	shll8 r5
+	bra LOCAL(div_ge64k_2)
+	div1 r5,r0
+LOCAL(udiv_ge64k):
+	cmp/hi r0,r5
+	mov r5,r1
+	bt LOCAL(udiv_r8)
+	shll8 r5
+	mov.l r4,@-r15
+	div1 r5,r0
+	mov.l r1,@-r15
+LOCAL(div_ge64k_2):
+	div1 r5,r0
+	mov.l LOCAL(zero_l),r1
+	.rept 4
+	div1 r5,r0
+	.endr
+	mov.l r1,@-r15
+	div1 r5,r0
+	mov.w LOCAL(m256_w),r1
+	div1 r5,r0
+	mov.b r0,@(L_LSWMSB,r15)
+	xor r4,r0
+	and r1,r0
+	bra LOCAL(div_ge64k_end)
+	xor r4,r0
+	
+LOCAL(div_r8):
+	shll16 r4
+	bra LOCAL(div_r8_2)
+	shll8 r4
+LOCAL(udiv_r8):
+	mov.l r4,@-r15
+	shll16 r4
+	clrt
+	shll8 r4
+	mov.l r5,@-r15
+LOCAL(div_r8_2):
+	rotcl r4
+	mov r0,r1
+	div1 r5,r1
+	mov r4,r0
+	rotcl r0
+	mov r5,r4
+	div1 r5,r1
+	.rept 5
+	rotcl r0; div1 r5,r1
+	.endr
+	rotcl r0
+	mov.l @r15+,r5
+	div1 r4,r1
+	mov.l @r15+,r4
+	rts
+	rotcl r0
+
+	ENDFUNC(GLOBAL(udivsi3_i4i))
+
+	.global	GLOBAL(sdivsi3_i4i)
+	FUNC(GLOBAL(sdivsi3_i4i))
+	/* This is link-compatible with a GLOBAL(sdivsi3) call,
+	   but we effectively clobber only r1.  */
+GLOBAL(sdivsi3_i4i):
+	mov.l r4,@-r15
+	cmp/pz r5
+	mov.w LOCAL(c128_w), r1
+	bt/s LOCAL(pos_divisor)
+	cmp/pz r4
+	mov.l r5,@-r15
+	neg r5,r5
+	bt/s LOCAL(neg_result)
+	cmp/hi r1,r5
+	neg r4,r4
+LOCAL(pos_result):
+	extu.w r5,r0
+	bf LOCAL(div_le128)
+	cmp/eq r5,r0
+	mov r4,r0
+	shlr8 r0
+	bf/s LOCAL(div_ge64k)
+	cmp/hi r0,r5
+	div0u
+	shll16 r5
+	div1 r5,r0
+	div1 r5,r0
+	div1 r5,r0
+LOCAL(udiv_25):
+	mov.l LOCAL(zero_l),r1
+	div1 r5,r0
+	div1 r5,r0
+	mov.l r1,@-r15
+	.rept 3
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_MSWLSB,r15)
+	xtrct r4,r0
+	swap.w r0,r0
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_end):
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+	extu.b r0,r0
+	mov.l @r15+,r5
+	or r4,r0
+	mov.l @r15+,r4
+	rts
+	rotcl r0
+
+LOCAL(div_le128_neg):
+	tst #0xfe,r0
+	mova LOCAL(div_table_ix),r0
+	mov.b @(r0,r5),r1
+	mova LOCAL(div_table_inv),r0
+	bt/s LOCAL(div_by_1_neg)
+	mov.l @(r0,r1),r1
+	mova LOCAL(div_table_clz),r0
+	dmulu.l r1,r4
+	mov.b @(r0,r5),r1
+	mov.l @r15+,r5
+	sts mach,r0
+	/* clrt */
+	addc r4,r0
+	mov.l @r15+,r4
+	rotcr r0
+	shld r1,r0
+	rts
+	neg r0,r0
+
+LOCAL(pos_divisor):
+	mov.l r5,@-r15
+	bt/s LOCAL(pos_result)
+	cmp/hi r1,r5
+	neg r4,r4
+LOCAL(neg_result):
+	extu.w r5,r0
+	bf LOCAL(div_le128_neg)
+	cmp/eq r5,r0
+	mov r4,r0
+	shlr8 r0
+	bf/s LOCAL(div_ge64k_neg)
+	cmp/hi r0,r5
+	div0u
+	mov.l LOCAL(zero_l),r1
+	shll16 r5
+	div1 r5,r0
+	mov.l r1,@-r15
+	.rept 7
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_MSWLSB,r15)
+	xtrct r4,r0
+	swap.w r0,r0
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.b r0,@(L_LSWMSB,r15)
+LOCAL(div_ge64k_neg_end):
+	.rept 8
+	div1 r5,r0
+	.endr
+	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
+	extu.b r0,r1
+	mov.l @r15+,r5
+	or r4,r1
+LOCAL(div_r8_neg_end):
+	mov.l @r15+,r4
+	rotcl r1
+	rts
+	neg r1,r0
+
+LOCAL(div_ge64k_neg):
+	bt/s LOCAL(div_r8_neg)
+	div0u
+	shll8 r5
+	mov.l LOCAL(zero_l),r1
+	.rept 6
+	div1 r5,r0
+	.endr
+	mov.l r1,@-r15
+	div1 r5,r0
+	mov.w LOCAL(m256_w),r1
+	div1 r5,r0
+	mov.b r0,@(L_LSWMSB,r15)
+	xor r4,r0
+	and r1,r0
+	bra LOCAL(div_ge64k_neg_end)
+	xor r4,r0
+
+LOCAL(c128_w):
+	.word 128
+
+LOCAL(div_r8_neg):
+	clrt
+	shll16 r4
+	mov r4,r1
+	shll8 r1
+	mov r5,r4
+	.rept 7
+	rotcl r1; div1 r5,r0
+	.endr
+	mov.l @r15+,r5
+	rotcl r1
+	bra LOCAL(div_r8_neg_end)
+	div1 r4,r0
+
+LOCAL(m256_w):
+	.word 0xff00
+/* This table has been generated by divtab-sh4.c.  */
+	.balign 4
+LOCAL(div_table_clz):
+	.byte	0
+	.byte	1
+	.byte	0
+	.byte	-1
+	.byte	-1
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-2
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-3
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-4
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-5
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+	.byte	-6
+/* Lookup table translating positive divisor to index into table of
+   normalized inverse.  N.B. the '0' entry is also the last entry of the
+ previous table, and causes an unaligned access for division by zero.  */
+LOCAL(div_table_ix):
+	.byte	-6
+	.byte	-128
+	.byte	-128
+	.byte	0
+	.byte	-128
+	.byte	-64
+	.byte	0
+	.byte	64
+	.byte	-128
+	.byte	-96
+	.byte	-64
+	.byte	-32
+	.byte	0
+	.byte	32
+	.byte	64
+	.byte	96
+	.byte	-128
+	.byte	-112
+	.byte	-96
+	.byte	-80
+	.byte	-64
+	.byte	-48
+	.byte	-32
+	.byte	-16
+	.byte	0
+	.byte	16
+	.byte	32
+	.byte	48
+	.byte	64
+	.byte	80
+	.byte	96
+	.byte	112
+	.byte	-128
+	.byte	-120
+	.byte	-112
+	.byte	-104
+	.byte	-96
+	.byte	-88
+	.byte	-80
+	.byte	-72
+	.byte	-64
+	.byte	-56
+	.byte	-48
+	.byte	-40
+	.byte	-32
+	.byte	-24
+	.byte	-16
+	.byte	-8
+	.byte	0
+	.byte	8
+	.byte	16
+	.byte	24
+	.byte	32
+	.byte	40
+	.byte	48
+	.byte	56
+	.byte	64
+	.byte	72
+	.byte	80
+	.byte	88
+	.byte	96
+	.byte	104
+	.byte	112
+	.byte	120
+	.byte	-128
+	.byte	-124
+	.byte	-120
+	.byte	-116
+	.byte	-112
+	.byte	-108
+	.byte	-104
+	.byte	-100
+	.byte	-96
+	.byte	-92
+	.byte	-88
+	.byte	-84
+	.byte	-80
+	.byte	-76
+	.byte	-72
+	.byte	-68
+	.byte	-64
+	.byte	-60
+	.byte	-56
+	.byte	-52
+	.byte	-48
+	.byte	-44
+	.byte	-40
+	.byte	-36
+	.byte	-32
+	.byte	-28
+	.byte	-24
+	.byte	-20
+	.byte	-16
+	.byte	-12
+	.byte	-8
+	.byte	-4
+	.byte	0
+	.byte	4
+	.byte	8
+	.byte	12
+	.byte	16
+	.byte	20
+	.byte	24
+	.byte	28
+	.byte	32
+	.byte	36
+	.byte	40
+	.byte	44
+	.byte	48
+	.byte	52
+	.byte	56
+	.byte	60
+	.byte	64
+	.byte	68
+	.byte	72
+	.byte	76
+	.byte	80
+	.byte	84
+	.byte	88
+	.byte	92
+	.byte	96
+	.byte	100
+	.byte	104
+	.byte	108
+	.byte	112
+	.byte	116
+	.byte	120
+	.byte	124
+	.byte	-128
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
+	.balign 4
+LOCAL(zero_l):
+	.long	0x0
+	.long	0xF81F81F9
+	.long	0xF07C1F08
+	.long	0xE9131AC0
+	.long	0xE1E1E1E2
+	.long	0xDAE6076C
+	.long	0xD41D41D5
+	.long	0xCD856891
+	.long	0xC71C71C8
+	.long	0xC0E07039
+	.long	0xBACF914D
+	.long	0xB4E81B4F
+	.long	0xAF286BCB
+	.long	0xA98EF607
+	.long	0xA41A41A5
+	.long	0x9EC8E952
+	.long	0x9999999A
+	.long	0x948B0FCE
+	.long	0x8F9C18FA
+	.long	0x8ACB90F7
+	.long	0x86186187
+	.long	0x81818182
+	.long	0x7D05F418
+	.long	0x78A4C818
+	.long	0x745D1746
+	.long	0x702E05C1
+	.long	0x6C16C16D
+	.long	0x68168169
+	.long	0x642C8591
+	.long	0x60581606
+	.long	0x5C9882BA
+	.long	0x58ED2309
+LOCAL(div_table_inv):
+	.long	0x55555556
+	.long	0x51D07EAF
+	.long	0x4E5E0A73
+	.long	0x4AFD6A06
+	.long	0x47AE147B
+	.long	0x446F8657
+	.long	0x41414142
+	.long	0x3E22CBCF
+	.long	0x3B13B13C
+	.long	0x38138139
+	.long	0x3521CFB3
+	.long	0x323E34A3
+	.long	0x2F684BDB
+	.long	0x2C9FB4D9
+	.long	0x29E4129F
+	.long	0x27350B89
+	.long	0x24924925
+	.long	0x21FB7813
+	.long	0x1F7047DD
+	.long	0x1CF06ADB
+	.long	0x1A7B9612
+	.long	0x18118119
+	.long	0x15B1E5F8
+	.long	0x135C8114
+	.long	0x11111112
+	.long	0xECF56BF
+	.long	0xC9714FC
+	.long	0xA6810A7
+	.long	0x8421085
+	.long	0x624DD30
+	.long	0x4104105
+	.long	0x2040811
+	/* maximum error: 0.987342 scaled: 0.921875*/
+
+	ENDFUNC(GLOBAL(sdivsi3_i4i))
+#endif /* SH3 / SH4 */
+
+#endif /* L_div_table */
+
+#ifdef L_udiv_qrnnd_16
+#if !__SHMEDIA__
+	HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
+	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
+	/* n1 < d, but n1 might be larger than d1.  */
+	.global GLOBAL(udiv_qrnnd_16)
+	.balign 8
+GLOBAL(udiv_qrnnd_16):
+	div0u
+	cmp/hi r6,r0
+	bt .Lots
+	.rept 16
+	div1 r6,r0 
+	.endr
+	extu.w r0,r1
+	bt 0f
+	add r6,r0
+0:	rotcl r1
+	mulu.w r1,r5
+	xtrct r4,r0
+	swap.w r0,r0
+	sts macl,r2
+	cmp/hs r2,r0
+	sub r2,r0
+	bt 0f
+	addc r5,r0
+	add #-1,r1
+	bt 0f
+1:	add #-1,r1
+	rts
+	add r5,r0
+	.balign 8
+.Lots:
+	sub r5,r0
+	swap.w r4,r1
+	xtrct r0,r1
+	clrt
+	mov r1,r0
+	addc r5,r0
+	mov #-1,r1
+	SL1(bf, 1b,
+	shlr16 r1)
+0:	rts
+	nop
+	ENDFUNC(GLOBAL(udiv_qrnnd_16))
+#endif /* !__SHMEDIA__ */
+#endif /* L_udiv_qrnnd_16 */
diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h
new file mode 100644
index 000000000..af4b41cc3
--- /dev/null
+++ b/gcc/config/sh/lib1funcs.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+   2004, 2005, 2006, 2009
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef __ELF__
+#define LOCAL(X)	.L_##X
+#define FUNC(X)		.type X,@function
+#define HIDDEN_FUNC(X)	FUNC(X); .hidden X
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X)
+#define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)	ENDFUNC0(X)
+#else
+#define LOCAL(X)	L_##X
+#define FUNC(X)
+#define HIDDEN_FUNC(X)
+#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y)
+#define ENDFUNC(X)
+#endif
+
+#define	CONCAT(A,B)	A##B
+#define	GLOBAL0(U,X)	CONCAT(U,__##X)
+#define	GLOBAL(X)	GLOBAL0(__USER_LABEL_PREFIX__,X)
+
+#define ALIAS(X,Y)	.global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y)
+
+#if defined __SH2A__ && defined __FMOVD_ENABLED__
+#undef  FMOVD_WORKS
+#define FMOVD_WORKS
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define DR00 fr1
+#define DR01 fr0
+#define DR20 fr3
+#define DR21 fr2
+#define DR40 fr5
+#define DR41 fr4
+#else /* !__LITTLE_ENDIAN__ */
+#define DR00 fr0
+#define DR01 fr1
+#define DR20 fr2
+#define DR21 fr3
+#define DR40 fr4
+#define DR41 fr5
+#endif /* !__LITTLE_ENDIAN__ */
+
+#ifdef __sh1__
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+	in_slot, in_slot_arg2; branch dest
+#define SL1(branch, dest, in_slot) \
+	in_slot; branch dest
+#else /* ! __sh1__ */
+#define SL(branch, dest, in_slot, in_slot_arg2) \
+	branch##.s dest; in_slot, in_slot_arg2
+#define SL1(branch, dest, in_slot) \
+	branch##/s dest; in_slot
+#endif /* !__sh1__ */
diff --git a/gcc/config/sh/libgcc-excl.ver b/gcc/config/sh/libgcc-excl.ver
new file mode 100644
index 000000000..325c74054
--- /dev/null
+++ b/gcc/config/sh/libgcc-excl.ver
@@ -0,0 +1,8 @@
+# Exclude various symbols which should not be visible in libgcc.so for SH.
+%exclude {
+  __ashlsi3
+  __ashrsi3
+  __lshrsi3
+  __mulsi3 # this is an SH1-only symbol.
+  __udivsi3
+}
diff --git a/gcc/config/sh/libgcc-glibc.ver b/gcc/config/sh/libgcc-glibc.ver
new file mode 100644
index 000000000..b8ec32653
--- /dev/null
+++ b/gcc/config/sh/libgcc-glibc.ver
@@ -0,0 +1,48 @@
+# Copyright (C) 2002, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+# Note that we cannot use the default libgcc-glibc.ver file on sh,
+# because GLIBC_2.0 does not exist on this architecture, as the first 
+# ever glibc release on the platform was GLIBC_2.2.
+
+%exclude {
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%inherit GCC_3.0 GLIBC_2.2
+GLIBC_2.2 {
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
diff --git a/gcc/config/sh/linux-atomic.asm b/gcc/config/sh/linux-atomic.asm
new file mode 100644
index 000000000..743c61bb7
--- /dev/null
+++ b/gcc/config/sh/linux-atomic.asm
@@ -0,0 +1,223 @@
+/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+!! Linux specific atomic routines for the Renesas / SuperH SH CPUs.
+!! Linux kernel for SH3/4 has implemented the support for software
+!! atomic sequences.
+
+#define FUNC(X)		.type X,@function
+#define HIDDEN_FUNC(X)	FUNC(X); .hidden X
+#define ENDFUNC0(X)	.Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)	ENDFUNC0(X)
+
+#if ! __SH5__
+
+#define ATOMIC_TEST_AND_SET(N,T,EXT) \
+	.global	__sync_lock_test_and_set_##N; \
+	HIDDEN_FUNC(__sync_lock_test_and_set_##N); \
+	.align	2; \
+__sync_lock_test_and_set_##N:; \
+	mova	1f, r0; \
+	nop; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	mov.##T	r5, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 EXT	r2, r0; \
+	ENDFUNC(__sync_lock_test_and_set_##N)
+
+ATOMIC_TEST_AND_SET (1,b,extu.b)
+ATOMIC_TEST_AND_SET (2,w,extu.w)
+ATOMIC_TEST_AND_SET (4,l,mov)
+
+#define ATOMIC_COMPARE_AND_SWAP(N,T,EXTS,EXT) \
+	.global	__sync_val_compare_and_swap_##N; \
+	HIDDEN_FUNC(__sync_val_compare_and_swap_##N); \
+	.align	2; \
+__sync_val_compare_and_swap_##N:; \
+	mova	1f, r0; \
+	EXTS	r5, r5; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	cmp/eq	r2, r5; \
+	bf	1f; \
+	mov.##T	r6, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 EXT	r2, r0; \
+	ENDFUNC(__sync_val_compare_and_swap_##N)
+
+ATOMIC_COMPARE_AND_SWAP (1,b,exts.b,extu.b)
+ATOMIC_COMPARE_AND_SWAP (2,w,exts.w,extu.w)
+ATOMIC_COMPARE_AND_SWAP (4,l,mov,mov)
+
+#define ATOMIC_BOOL_COMPARE_AND_SWAP(N,T,EXTS) \
+	.global	__sync_bool_compare_and_swap_##N; \
+	HIDDEN_FUNC(__sync_bool_compare_and_swap_##N); \
+	.align	2; \
+__sync_bool_compare_and_swap_##N:; \
+	mova	1f, r0; \
+	EXTS	r5, r5; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	cmp/eq	r2, r5; \
+	bf	1f; \
+	mov.##T	r6, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 movt	r0; \
+	ENDFUNC(__sync_bool_compare_and_swap_##N)
+
+ATOMIC_BOOL_COMPARE_AND_SWAP (1,b,exts.b)
+ATOMIC_BOOL_COMPARE_AND_SWAP (2,w,exts.w)
+ATOMIC_BOOL_COMPARE_AND_SWAP (4,l,mov)
+
+#define ATOMIC_FETCH_AND_OP(OP,N,T,EXT) \
+	.global	__sync_fetch_and_##OP##_##N; \
+	HIDDEN_FUNC(__sync_fetch_and_##OP##_##N); \
+	.align	2; \
+__sync_fetch_and_##OP##_##N:; \
+	mova	1f, r0; \
+	nop; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	mov	r5, r3; \
+	OP	r2, r3; \
+	mov.##T	r3, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 EXT	r2, r0; \
+	ENDFUNC(__sync_fetch_and_##OP##_##N)
+
+ATOMIC_FETCH_AND_OP(add,1,b,extu.b)
+ATOMIC_FETCH_AND_OP(add,2,w,extu.w)
+ATOMIC_FETCH_AND_OP(add,4,l,mov)
+
+ATOMIC_FETCH_AND_OP(or,1,b,extu.b)
+ATOMIC_FETCH_AND_OP(or,2,w,extu.w)
+ATOMIC_FETCH_AND_OP(or,4,l,mov)
+
+ATOMIC_FETCH_AND_OP(and,1,b,extu.b)
+ATOMIC_FETCH_AND_OP(and,2,w,extu.w)
+ATOMIC_FETCH_AND_OP(and,4,l,mov)
+
+ATOMIC_FETCH_AND_OP(xor,1,b,extu.b)
+ATOMIC_FETCH_AND_OP(xor,2,w,extu.w)
+ATOMIC_FETCH_AND_OP(xor,4,l,mov)
+
+#define ATOMIC_FETCH_AND_COMBOP(OP,OP0,OP1,N,T,EXT) \
+	.global	__sync_fetch_and_##OP##_##N; \
+	HIDDEN_FUNC(__sync_fetch_and_##OP##_##N); \
+	.align	2; \
+__sync_fetch_and_##OP##_##N:; \
+	mova	1f, r0; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	mov	r5, r3; \
+	OP0	r2, r3; \
+	OP1	r3, r3; \
+	mov.##T	r3, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 EXT	r2, r0; \
+	ENDFUNC(__sync_fetch_and_##OP##_##N)
+
+ATOMIC_FETCH_AND_COMBOP(sub,sub,neg,1,b,extu.b)
+ATOMIC_FETCH_AND_COMBOP(sub,sub,neg,2,w,extu.w)
+ATOMIC_FETCH_AND_COMBOP(sub,sub,neg,4,l,mov)
+
+ATOMIC_FETCH_AND_COMBOP(nand,and,not,1,b,extu.b)
+ATOMIC_FETCH_AND_COMBOP(nand,and,not,2,w,extu.w)
+ATOMIC_FETCH_AND_COMBOP(nand,and,not,4,l,mov)
+
+#define ATOMIC_OP_AND_FETCH(OP,N,T,EXT) \
+	.global	__sync_##OP##_and_fetch_##N; \
+	HIDDEN_FUNC(__sync_##OP##_and_fetch_##N); \
+	.align	2; \
+__sync_##OP##_and_fetch_##N:; \
+	mova	1f, r0; \
+	nop; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	mov	r5, r3; \
+	OP	r2, r3; \
+	mov.##T	r3, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 EXT	r3, r0; \
+	ENDFUNC(__sync_##OP##_and_fetch_##N)
+
+ATOMIC_OP_AND_FETCH(add,1,b,extu.b)
+ATOMIC_OP_AND_FETCH(add,2,w,extu.w)
+ATOMIC_OP_AND_FETCH(add,4,l,mov)
+
+ATOMIC_OP_AND_FETCH(or,1,b,extu.b)
+ATOMIC_OP_AND_FETCH(or,2,w,extu.w)
+ATOMIC_OP_AND_FETCH(or,4,l,mov)
+
+ATOMIC_OP_AND_FETCH(and,1,b,extu.b)
+ATOMIC_OP_AND_FETCH(and,2,w,extu.w)
+ATOMIC_OP_AND_FETCH(and,4,l,mov)
+
+ATOMIC_OP_AND_FETCH(xor,1,b,extu.b)
+ATOMIC_OP_AND_FETCH(xor,2,w,extu.w)
+ATOMIC_OP_AND_FETCH(xor,4,l,mov)
+
+#define ATOMIC_COMBOP_AND_FETCH(OP,OP0,OP1,N,T,EXT) \
+	.global	__sync_##OP##_and_fetch_##N; \
+	HIDDEN_FUNC(__sync_##OP##_and_fetch_##N); \
+	.align	2; \
+__sync_##OP##_and_fetch_##N:; \
+	mova	1f, r0; \
+	mov	r15, r1; \
+	mov	#(0f-1f), r15; \
+0:	mov.##T	@r4, r2; \
+	mov	r5, r3; \
+	OP0	r2, r3; \
+	OP1	r3, r3; \
+	mov.##T	r3, @r4; \
+1:	mov	r1, r15; \
+	rts; \
+	 EXT	r3, r0; \
+	ENDFUNC(__sync_##OP##_and_fetch_##N)
+
+ATOMIC_COMBOP_AND_FETCH(sub,sub,neg,1,b,extu.b)
+ATOMIC_COMBOP_AND_FETCH(sub,sub,neg,2,w,extu.w)
+ATOMIC_COMBOP_AND_FETCH(sub,sub,neg,4,l,mov)
+
+ATOMIC_COMBOP_AND_FETCH(nand,and,not,1,b,extu.b)
+ATOMIC_COMBOP_AND_FETCH(nand,and,not,2,w,extu.w)
+ATOMIC_COMBOP_AND_FETCH(nand,and,not,4,l,mov)
+
+.section .note.GNU-stack,"",%progbits
+.previous
+
+#endif /* ! __SH5__ */
diff --git a/gcc/config/sh/linux-unwind.h b/gcc/config/sh/linux-unwind.h
new file mode 100644
index 000000000..5a78e3172
--- /dev/null
+++ b/gcc/config/sh/linux-unwind.h
@@ -0,0 +1,256 @@
+/* DWARF2 EH unwinding support for SH Linux.
+   Copyright (C) 2004, 2005, 2006, 2007, 2009, 2012 Free Software Foundation,
+   Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include "insn-constants.h"
+
+# if defined (__SH5__)
+#define SH_DWARF_FRAME_GP0	0
+#define SH_DWARF_FRAME_FP0	77
+#define SH_DWARF_FRAME_BT0	68
+#define SH_DWARF_FRAME_PR_MEDIA	18
+#define SH_DWARF_FRAME_SR	65
+#define SH_DWARF_FRAME_FPSCR	76
+#else
+#define SH_DWARF_FRAME_GP0	0
+#define SH_DWARF_FRAME_FP0	25
+#define SH_DWARF_FRAME_XD0	87
+#define SH_DWARF_FRAME_PR	17
+#define SH_DWARF_FRAME_GBR	18
+#define SH_DWARF_FRAME_MACH	20
+#define SH_DWARF_FRAME_MACL	21
+#define SH_DWARF_FRAME_PC	16
+#define SH_DWARF_FRAME_SR	22
+#define SH_DWARF_FRAME_FPUL	23
+#define SH_DWARF_FRAME_FPSCR	24
+#endif /* defined (__SH5__) */
+
+#if defined (__SH5__)
+
+#define MD_FALLBACK_FRAME_STATE_FOR shmedia_fallback_frame_state
+
+static _Unwind_Reason_Code
+shmedia_fallback_frame_state (struct _Unwind_Context *context,
+			      _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+  int i, r;
+
+  /* movi 0x10,r9; shori 0x77,r9; trapa	r9; nop (sigreturn)  */
+  /* movi 0x10,r9; shori 0xad,r9; trapa	r9; nop (rt_sigreturn)  */
+  if ((*(unsigned long *) (pc-1)  == 0xcc004090)
+      && (*(unsigned long *) (pc+3)  == 0xc801dc90)
+      && (*(unsigned long *) (pc+7)  == 0x6c91fff0)
+      && (*(unsigned long *) (pc+11)  == 0x6ff0fff0))
+    sc = context->cfa;
+  else if ((*(unsigned long *) (pc-1)  == 0xcc004090)
+	   && (*(unsigned long *) (pc+3)  == 0xc802b490)
+	   && (*(unsigned long *) (pc+7)  == 0x6c91fff0)
+	   && (*(unsigned long *) (pc+11)  == 0x6ff0fff0))
+    {
+      struct rt_sigframe {
+	siginfo_t *pinfo;
+	void *puc;
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_regs[15];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 15;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  for (i = 0; i < 63; i++)
+    {
+      if (i == 15)
+	continue;
+
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= (long)&(sc->sc_regs[i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset
+    = (long)&(sc->sc_sr) - new_cfa;
+
+  r = SH_DWARF_FRAME_BT0;
+  for (i = 0; i < 8; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_tregs[i]) - new_cfa;
+    }
+
+  r = SH_DWARF_FRAME_FP0;
+  for (i = 0; i < 32; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_fpregs[i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset
+    = (long)&(sc->sc_fpscr) - new_cfa;
+
+  /* We use the slot for the zero register to save return address.  */
+  fs->regs.reg[63].how = REG_SAVED_OFFSET;
+  fs->regs.reg[63].loc.offset
+    = (long)&(sc->sc_pc) - new_cfa;
+  fs->retaddr_column = 63;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#else /* defined (__SH5__) */
+
+#define MD_FALLBACK_FRAME_STATE_FOR sh_fallback_frame_state
+
+static _Unwind_Reason_Code
+sh_fallback_frame_state (struct _Unwind_Context *context,
+			 _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+  int i;
+#if defined (__SH3E__) || defined (__SH4__)
+  int r;
+#endif
+
+  /* mov.w 1f,r3; trapa #0x10; 1: .short 0x77  (sigreturn)  */
+  /* mov.w 1f,r3; trapa #0x10; 1: .short 0xad  (rt_sigreturn)  */
+  /* Newer kernel uses pad instructions to avoid an SH-4 core bug.  */
+  /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0;
+     or r0,r0; 1: .short 0x77  (sigreturn)  */
+  /* mov.w 1f,r3; trapa #0x10; or r0,r0; or r0,r0; or r0,r0; or r0,r0;
+     or r0,r0; 1: .short 0xad  (rt_sigreturn)  */
+  if (((*(unsigned short *) (pc+0)  == 0x9300)
+       && (*(unsigned short *) (pc+2)  == 0xc310)
+       && (*(unsigned short *) (pc+4)  == 0x0077))
+      || (((*(unsigned short *) (pc+0)  == 0x9305)
+	   && (*(unsigned short *) (pc+2)  == 0xc310)
+	   && (*(unsigned short *) (pc+14)  == 0x0077))))
+    sc = context->cfa;
+  else if (((*(unsigned short *) (pc+0) == 0x9300)
+	    && (*(unsigned short *) (pc+2)  == 0xc310)
+	    && (*(unsigned short *) (pc+4)  == 0x00ad))
+	   || (((*(unsigned short *) (pc+0) == 0x9305)
+		&& (*(unsigned short *) (pc+2)  == 0xc310)
+		&& (*(unsigned short *) (pc+14)  == 0x00ad))))
+    {
+      struct rt_sigframe {
+	siginfo_t info;
+	struct ucontext uc;
+      } *rt_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+         The aliasing warning is correct, but should not be a problem
+         because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_regs[15];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 15;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+  for (i = 0; i < 15; i++)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= (long)&(sc->sc_regs[i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_PR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_PR].loc.offset
+    = (long)&(sc->sc_pr) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset
+    = (long)&(sc->sc_sr) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_GBR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_GBR].loc.offset
+    = (long)&(sc->sc_gbr) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_MACH].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_MACH].loc.offset
+    = (long)&(sc->sc_mach) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_MACL].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_MACL].loc.offset
+    = (long)&(sc->sc_macl) - new_cfa;
+
+#if defined (__SH3E__) || defined (__SH4__)
+  r = SH_DWARF_FRAME_FP0;
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_fpregs[i]) - new_cfa;
+    }
+
+  r = SH_DWARF_FRAME_XD0;
+  for (i = 0; i < 8; i++)
+    {
+      fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[r+i].loc.offset
+	= (long)&(sc->sc_xfpregs[2*i]) - new_cfa;
+    }
+
+  fs->regs.reg[SH_DWARF_FRAME_FPUL].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_FPUL].loc.offset
+    = (long)&(sc->sc_fpul) - new_cfa;
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset
+    = (long)&(sc->sc_fpscr) - new_cfa;
+#endif
+
+  fs->regs.reg[SH_DWARF_FRAME_PC].how = REG_SAVED_OFFSET;
+  fs->regs.reg[SH_DWARF_FRAME_PC].loc.offset
+    = (long)&(sc->sc_pc) - new_cfa;
+  fs->retaddr_column = SH_DWARF_FRAME_PC;
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+#endif /* defined (__SH5__) */
+
+#endif /* inhibit_libc */
diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h
new file mode 100644
index 000000000..a090dae1c
--- /dev/null
+++ b/gcc/config/sh/linux.h
@@ -0,0 +1,137 @@
+/* Definitions for SH running Linux-based GNU systems using ELF
+   Copyright (C) 1999, 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#undef TARGET_VERSION
+#define TARGET_VERSION  fputs (" (SH GNU/Linux with ELF)", stderr);
+
+/* Enable DWARF 2 exceptions.  */
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 1
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "\
+   %{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS} \
+"
+
+#define TARGET_OS_CPP_BUILTINS() \
+  do						\
+    {						\
+      LINUX_TARGET_OS_CPP_BUILTINS();		\
+    }						\
+  while (0)
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | MASK_USERMODE | TARGET_ENDIAN_DEFAULT \
+   | TARGET_OPT_DEFAULT)
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#undef SUBTARGET_LINK_SPEC
+#define SUBTARGET_LINK_SPEC \
+  "%{shared:-shared} \
+   %{!static: \
+     %{rdynamic:-export-dynamic} \
+     -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+   %{static:-static}"
+
+/* Output assembler code to STREAM to call the profiler.  */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM,LABELNO)				\
+  do {									\
+    if (TARGET_SHMEDIA)							\
+      {									\
+	fprintf (STREAM, "\tpt\t1f,tr1\n");				\
+	fprintf (STREAM, "\taddi.l\tr15,-8,r15\n");			\
+	fprintf (STREAM, "\tst.l\tr15,0,r18\n");			\
+	if (flag_pic)							\
+	  {								\
+	    const char *gofs = "(datalabel _GLOBAL_OFFSET_TABLE_-(0f-.))"; \
+	    fprintf (STREAM, "\tmovi\t((%s>>16)&0xffff),r21\n", gofs);	\
+	    fprintf (STREAM, "\tshori\t(%s & 0xffff),r21\n", gofs);	\
+	    fprintf (STREAM, "0:\tptrel/u\tr21,tr0\n");			\
+	    fprintf (STREAM, "\tmovi\t((mcount@GOTPLT)&0xffff),r22\n");	\
+	    fprintf (STREAM, "\tgettr\ttr0,r21\n");			\
+	    fprintf (STREAM, "\tadd.l\tr21,r22,r21\n");			\
+	    fprintf (STREAM, "\tld.l\tr21,0,r21\n");			\
+	    fprintf (STREAM, "\tptabs\tr21,tr0\n");			\
+	  }								\
+	else								\
+	  fprintf (STREAM, "\tpt\tmcount,tr0\n");			\
+	fprintf (STREAM, "\tgettr\ttr1,r18\n");				\
+	fprintf (STREAM, "\tblink\ttr0,r63\n");				\
+	fprintf (STREAM, "1:\tld.l\tr15,0,r18\n");			\
+	fprintf (STREAM, "\taddi.l\tr15,8,r15\n");			\
+      }									\
+    else								\
+      {									\
+	if (flag_pic)							\
+	  {								\
+	    fprintf (STREAM, "\tmov.l\t3f,r1\n");			\
+	    fprintf (STREAM, "\tmova\t3f,r0\n");			\
+	    fprintf (STREAM, "\tadd\tr1,r0\n");				\
+	    fprintf (STREAM, "\tmov.l\t1f,r1\n");			\
+	    fprintf (STREAM, "\tmov.l\t@(r0,r1),r1\n");			\
+	  }								\
+	else								\
+	  fprintf (STREAM, "\tmov.l\t1f,r1\n");				\
+	fprintf (STREAM, "\tsts.l\tpr,@-r15\n");			\
+	fprintf (STREAM, "\tmova\t2f,r0\n");				\
+	fprintf (STREAM, "\tjmp\t@r1\n");				\
+	fprintf (STREAM, "\tlds\tr0,pr\n");				\
+	fprintf (STREAM, "\t.align\t2\n");				\
+	if (flag_pic)							\
+	  {								\
+	    fprintf (STREAM, "1:\t.long\tmcount@GOT\n");		\
+	    fprintf (STREAM, "3:\t.long\t_GLOBAL_OFFSET_TABLE_\n");	\
+	  }								\
+	else								\
+	  fprintf (STREAM, "1:\t.long\tmcount\n");			\
+	fprintf (STREAM, "2:\tlds.l\t@r15+,pr\n");			\
+      }									\
+  } while (0)
+
+#define MD_UNWIND_SUPPORT "config/sh/linux-unwind.h"
+
+/* For SH3 and SH4, we use a slot of the unwind frame which correspond
+   to a fake register number 16 as a placeholder for the return address
+   in MD_FALLBACK_FRAME_STATE_FOR and its content will be read with
+   _Unwind_GetGR which uses dwarf_reg_size_table to get the size of
+   the register.  So the entry of dwarf_reg_size_table corresponding to
+   this slot must be set.  To do this, we redefine DBX_REGISTER_NUMBER
+   so as to return itself for 16.  */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((! TARGET_SH5 && (REGNO) == 16) ? 16 : SH_DBX_REGISTER_NUMBER (REGNO))
+
+/* Since libgcc is compiled with -fpic for this target, we can't use
+   __sdivsi3_1 as the division strategy for -O0 and -Os.  */
+#undef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2
+#undef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call2"
diff --git a/gcc/config/sh/little.h b/gcc/config/sh/little.h
new file mode 100644
index 000000000..f87c7b77d
--- /dev/null
+++ b/gcc/config/sh/little.h
@@ -0,0 +1,21 @@
+/* Definition of little endian SH machine for GNU compiler.
+
+   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_ENDIAN_DEFAULT MASK_LITTLE_ENDIAN
diff --git a/gcc/config/sh/netbsd-elf.h b/gcc/config/sh/netbsd-elf.h
new file mode 100644
index 000000000..50bb2f2db
--- /dev/null
+++ b/gcc/config/sh/netbsd-elf.h
@@ -0,0 +1,117 @@
+/* Definitions for SH running NetBSD using ELF
+   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define TARGET_VERSION_ENDIAN "le"
+#else
+#define TARGET_VERSION_ENDIAN ""
+#endif
+
+#if TARGET_CPU_DEFAULT & MASK_SH5
+#if TARGET_CPU_DEFAULT & MASK_SH_E
+#define TARGET_VERSION_CPU "sh5"
+#else
+#define TARGET_VERSION_CPU "sh64"
+#endif /* MASK_SH_E */
+#else
+#define TARGET_VERSION_CPU "sh"
+#endif /* MASK_SH5 */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION	fprintf (stderr, " (NetBSD/%s%s ELF)",		\
+                                 TARGET_VERSION_CPU, TARGET_VERSION_ENDIAN)
+
+
+/* Extra specs needed for NetBSD SuperH ELF targets.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+      NETBSD_OS_CPP_BUILTINS_ELF();					\
+      builtin_define ("__NO_LEADING_UNDERSCORES__");			\
+    }									\
+  while (0)
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/sh ELF target.
+   We use the SH_LINK_SPEC from sh/sh.h, and define the appropriate
+   SUBTARGET_LINK_SPEC that pulls in what we need from a generic
+   NetBSD ELF LINK_SPEC.  */
+
+/* LINK_EMUL_PREFIX from sh/elf.h */
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_nbsd"
+
+#undef SUBTARGET_LINK_SPEC
+#define SUBTARGET_LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#undef LINK_SPEC
+#define LINK_SPEC SH_LINK_SPEC
+
+#define NETBSD_ENTRY_POINT "__start"
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | MASK_USERMODE | TARGET_ENDIAN_DEFAULT)
+
+/* Define because we use the label and we do not need them.  */
+#define NO_PROFILE_COUNTERS 1
+ 
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM,LABELNO)				\
+do									\
+  {									\
+    if (TARGET_SHMEDIA32 || TARGET_SHMEDIA64)				\
+      {									\
+	/* FIXME */							\
+	sorry ("unimplemented-shmedia profiling");			\
+      }									\
+    else								\
+      {									\
+        fprintf((STREAM), "\tmov.l\t%sLP%d,r1\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "\tmova\t%sLP%dr,r0\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "\tjmp\t@r1\n");				\
+        fprintf((STREAM), "\tnop\n");					\
+        fprintf((STREAM), "\t.align\t2\n");				\
+        fprintf((STREAM), "%sLP%d:\t.long\t__mcount\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "%sLP%dr:\n", LOCAL_LABEL_PREFIX, (LABELNO));	\
+      }									\
+  }									\
+while (0)
+
+/* Since libgcc is compiled with -fpic for this target, we can't use
+   __sdivsi3_1 as the division strategy for -O0 and -Os.  */
+#undef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2
+#undef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call2"
diff --git a/gcc/config/sh/newlib.h b/gcc/config/sh/newlib.h
new file mode 100644
index 000000000..13099c1f8
--- /dev/null
+++ b/gcc/config/sh/newlib.h
@@ -0,0 +1,25 @@
+/* Definitions of target machine for gcc for Super-H using sh-superh-elf.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This header file is used when with_libgloss is enabled during gcc
+   configuration.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lgloss"
diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
new file mode 100644
index 000000000..b6508b70d
--- /dev/null
+++ b/gcc/config/sh/predicates.md
@@ -0,0 +1,833 @@
+;; Predicate definitions for Renesas / SuperH SH.
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; TODO: Add a comment here.
+
+(define_predicate "trapping_target_operand"
+  (match_code "if_then_else")
+{
+  rtx cond, mem, res, tar, and_expr;
+
+  if (GET_MODE (op) != PDImode)
+    return 0;
+  cond = XEXP (op, 0);
+  mem = XEXP (op, 1);
+  res = XEXP (op, 2);
+  if (!MEM_P (mem)
+      || (GET_CODE (res) != SIGN_EXTEND && GET_CODE (res) != TRUNCATE))
+    return 0;
+  tar = XEXP (res, 0);
+  if (!rtx_equal_p (XEXP (mem, 0), tar)
+      || GET_MODE (tar) != Pmode)
+    return 0;
+  if (GET_CODE (cond) == CONST)
+    {
+      cond = XEXP (cond, 0);
+      if (!satisfies_constraint_Csy (tar))
+	return 0;
+      if (GET_CODE (tar) == CONST)
+	tar = XEXP (tar, 0);
+    }
+  else if (!arith_reg_operand (tar, VOIDmode)
+	   && ! satisfies_constraint_Csy (tar))
+    return 0;
+  if (GET_CODE (cond) != EQ)
+    return 0;
+  and_expr = XEXP (cond, 0);
+  return (GET_CODE (and_expr) == AND
+	  && rtx_equal_p (XEXP (and_expr, 0), tar)
+	  && CONST_INT_P (XEXP (and_expr, 1))
+	  && CONST_INT_P (XEXP (cond, 1))
+	  && INTVAL (XEXP (and_expr, 1)) == 3
+	  && INTVAL (XEXP (cond, 1)) == 3);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "and_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (logical_operand (op, mode))
+    return 1;
+
+  /* Check mshflo.l / mshflhi.l opportunities.  */
+  if (TARGET_SHMEDIA
+      && mode == DImode
+      && satisfies_constraint_J16 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like arith_reg_dest, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+
+(define_special_predicate "any_arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  return arith_reg_dest (op, mode);
+})
+
+;; Like register_operand, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+
+(define_special_predicate "any_register_operand"
+  (match_code "subreg,reg")
+{
+  return register_operand (op, mode);
+})
+
+;; Returns 1 if OP is a valid source operand for an arithmetic insn.
+
+(define_predicate "arith_operand"
+  (match_code "subreg,reg,const_int,truncate")
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    {
+      /* FIXME: We should be checking whether the CONST_INT fits in a
+	 signed 16-bit here, but this causes reload_cse to crash when
+	 attempting to transform a sequence of two 64-bit sets of the
+	 same register from literal constants into a set and an add,
+	 when the difference is too wide for an add.  */
+      if (CONST_INT_P (op)
+	  || satisfies_constraint_Css (op))
+	return 1;
+      else if (GET_CODE (op) == TRUNCATE
+	       && REG_P (XEXP (op, 0))
+	       && ! system_reg_operand (XEXP (op, 0), VOIDmode)
+	       && (mode == VOIDmode || mode == GET_MODE (op))
+	       && (GET_MODE_SIZE (GET_MODE (op))
+		   < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
+	       && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
+		   || GET_MODE_SIZE (GET_MODE (op)) == 4))
+	return register_operand (XEXP (op, 0), VOIDmode);
+      else
+	return 0;
+    }
+  else if (satisfies_constraint_I08 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like above, but for DImode destinations: forbid paradoxical DImode
+;; subregs, because this would lead to missing sign extensions when
+;; truncating from DImode to SImode.
+
+(define_predicate "arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  if (mode == DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
+      && TARGET_SHMEDIA)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns 1 if OP is a normal arithmetic register.
+
+(define_predicate "arith_reg_operand"
+  (match_code "subreg,reg,sign_extend")
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (REG_P (op))
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)))
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno != T_REG && regno != PR_REG
+	      && ! TARGET_REGISTER_P (regno)
+	      && (regno != FPUL_REG || TARGET_SH4)
+	      && regno != MACH_REG && regno != MACL_REG);
+    }
+  /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
+     We allow SImode here, as not using an FP register is just a matter of
+     proper register allocation.  */
+  if (TARGET_SHMEDIA
+      && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
+      && GET_MODE (XEXP (op, 0)) == SImode
+      && GET_CODE (XEXP (op, 0)) != SUBREG)
+    return register_operand (XEXP (op, 0), VOIDmode);
+#if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars.  */
+  if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
+      && GET_MODE (XEXP (op, 0)) == HImode
+      && REG_P (XEXP (op, 0))
+      && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
+    return register_operand (XEXP (op, 0), VOIDmode);
+#endif
+  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
+      && GET_CODE (op) == SUBREG
+      && GET_MODE (SUBREG_REG (op)) == DImode
+      && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
+      && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
+      && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
+    return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
+  return 0;
+})
+
+;; Returns 1 if OP is a valid source operand for a compare insn.
+
+(define_predicate "arith_reg_or_0_operand"
+  (match_code "subreg,reg,const_int,const_vector")
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (satisfies_constraint_Z (op))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "binary_float_operator"
+  (and (match_code "plus,minus,mult,div")
+       (match_test "GET_MODE (op) == mode")))
+
+;; TODO: Add a comment here.
+
+(define_predicate "binary_logical_operator"
+  (and (match_code "and,ior,xor")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return 1 of OP is an address suitable for a cache manipulation operation.
+;; MODE has the meaning as in address_operand.
+
+(define_special_predicate "cache_address_operand"
+  (match_code "plus,reg")
+{
+  if (GET_CODE (op) == PLUS)
+    {
+      if (!REG_P (XEXP (op, 0)))
+	return 0;
+      if (!CONST_INT_P (XEXP (op, 1))
+	  || (INTVAL (XEXP (op, 1)) & 31))
+	return 0;
+    }
+  else if (!REG_P (op))
+    return 0;
+  return address_operand (op, mode);
+})
+
+;; Return 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu.
+
+(define_predicate "cmp_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (satisfies_constraint_N (op))
+    return 1;
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "cmpsi_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (REG_P (op) && REGNO (op) == T_REG
+      && GET_MODE (op) == SImode
+      && TARGET_SH1)
+    return 1;
+  return arith_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "commutative_float_operator"
+  (and (match_code "plus,mult")
+       (match_test "GET_MODE (op) == mode")))
+
+;; TODO: Add a comment here.
+
+(define_predicate "equality_comparison_operator"
+  (match_code "eq,ne"))
+
+;; TODO: Add a comment here.
+
+(define_predicate "extend_reg_operand"
+  (match_code "subreg,reg,truncate")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : arith_reg_operand) (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "extend_reg_or_0_operand"
+  (match_code "subreg,reg,truncate,const_int")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : arith_reg_or_0_operand) (op, mode);
+})
+
+;; Like arith_reg_operand, but this predicate does not accept SIGN_EXTEND.
+
+(define_predicate "ext_dest_operand"
+  (match_code "subreg,reg")
+{
+  return arith_reg_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fp_arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  if (mode == DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
+    return 0;
+  return fp_arith_reg_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fp_arith_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (REG_P (op))
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)))
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno >= FIRST_PSEUDO_REGISTER
+	      || FP_REGISTER_P (regno));
+    }
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fpscr_operand"
+  (match_code "reg")
+{
+  return (REG_P (op)
+	  && (REGNO (op) == FPSCR_REG
+	      || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		  && !(reload_in_progress || reload_completed)))
+	  && GET_MODE (op) == PSImode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "fpul_operand"
+  (match_code "reg")
+{
+  if (TARGET_SHMEDIA)
+    return fp_arith_reg_operand (op, mode);
+
+  return (REG_P (op)
+	  && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
+	  && GET_MODE (op) == mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "general_extend_operand"
+  (match_code "subreg,reg,mem,truncate")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : nonimmediate_operand) (op, mode);
+})
+
+;; Returns 1 if OP can be source of a simple move operation. Same as
+;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as
+;; are subregs of system registers.
+
+(define_predicate "general_movsrc_operand"
+  (match_code "subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,const,const_vector")
+{
+  if (MEM_P (op))
+    {
+      rtx inside = XEXP (op, 0);
+      if (GET_CODE (inside) == CONST)
+	inside = XEXP (inside, 0);
+
+      if (GET_CODE (inside) == LABEL_REF)
+	return 1;
+
+      if (GET_CODE (inside) == PLUS
+	  && GET_CODE (XEXP (inside, 0)) == LABEL_REF
+	  && CONST_INT_P (XEXP (inside, 1)))
+	return 1;
+
+      /* Only post inc allowed.  */
+      if (GET_CODE (inside) == PRE_DEC)
+	return 0;
+    }
+
+  if (TARGET_SHMEDIA
+      && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
+      && sh_rep_vec (op, mode))
+    return 1;
+  if (TARGET_SHMEDIA && 1
+      && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
+      && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
+    /* FIXME */ abort (); /* return 1; */
+  return general_operand (op, mode);
+})
+
+;; Returns 1 if OP can be a destination of a move. Same as
+;; general_operand, but no preinc allowed.
+
+(define_predicate "general_movdst_operand"
+  (match_code "subreg,reg,mem")
+{
+  /* Only pre dec allowed.  */
+  if (MEM_P (op) && GET_CODE (XEXP (op, 0)) == POST_INC)
+    return 0;
+  if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
+      && ! (high_life_started || reload_completed))
+    return 0;
+
+  return general_operand (op, mode);
+})
+
+
+;; Returns 1 if OP is a POST_INC on stack pointer register.
+
+(define_predicate "sh_no_delay_pop_operand"
+  (match_code "mem")
+{
+  rtx inside;
+  inside = XEXP (op, 0);
+
+  if (GET_CODE (op) == MEM && GET_MODE (op) == SImode 
+      && GET_CODE (inside) == POST_INC 
+      && GET_CODE (XEXP (inside, 0)) == REG
+      && REGNO (XEXP (inside, 0)) == SP_REG)
+    return 1;
+
+  return 0;
+})
+
+
+;; Returns 1 if OP is a MEM that can be source of a simple move operation.
+
+(define_predicate "unaligned_load_operand"
+  (match_code "mem")
+{
+  rtx inside;
+
+  if (!MEM_P (op) || GET_MODE (op) != mode)
+    return 0;
+
+  inside = XEXP (op, 0);
+
+  if (GET_CODE (inside) == POST_INC)
+    inside = XEXP (inside, 0);
+
+  if (REG_P (inside))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "greater_comparison_operator"
+  (match_code "gt,ge,gtu,geu"))
+
+;; TODO: Add a comment here.
+
+(define_predicate "inqhi_operand"
+  (match_code "truncate")
+{
+  if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
+    return 0;
+  op = XEXP (op, 0);
+  /* Can't use true_regnum here because copy_cost wants to know about
+     SECONDARY_INPUT_RELOAD_CLASS.  */
+  return REG_P (op) && FP_REGISTER_P (REGNO (op));
+})
+
+;; TODO: Add a comment here.
+
+(define_special_predicate "int_gpr_dest"
+  (match_code "subreg,reg")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (GET_MODE_CLASS (op_mode) != MODE_INT
+      || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
+    return 0;
+  if (! reload_completed)
+    return 0;
+  return true_regnum (op) <= LAST_GENERAL_REG;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "less_comparison_operator"
+  (match_code "lt,le,ltu,leu"))
+
+;; Returns 1 if OP is a valid source operand for a logical operation.
+
+(define_predicate "logical_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    {
+      if (satisfies_constraint_I10 (op))
+	return 1;
+      else
+	return 0;
+    }
+  else if (satisfies_constraint_K08 (op))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "logical_operator"
+  (match_code "and,ior,xor"))
+
+;; Like arith_reg_operand, but for register source operands of narrow
+;; logical SHMEDIA operations: forbid subregs of DImode / TImode regs.
+
+(define_predicate "logical_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (TARGET_SHMEDIA
+      && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
+      && mode != DImode)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "mextr_bit_offset"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+
+  if (!CONST_INT_P (op))
+    return 0;
+  i = INTVAL (op);
+  return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "minuend_operand"
+  (match_code "subreg,reg,truncate,const_int")
+{
+  return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "noncommutative_float_operator"
+  (and (match_code "minus,div")
+       (match_test "GET_MODE (op) == mode")))
+
+;; UNORDERED is only supported on SHMEDIA.
+
+(define_predicate "sh_float_comparison_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (and (match_test "TARGET_SHMEDIA")
+	    (match_code "unordered"))))
+
+(define_predicate "shmedia_cbranch_comparison_operator"
+  (ior (match_operand 0 "equality_comparison_operator")
+       (match_operand 0 "greater_comparison_operator")))
+
+;; TODO: Add a comment here.
+
+(define_predicate "sh_const_vec"
+  (match_code "const_vector")
+{
+  int i;
+
+  if (GET_CODE (op) != CONST_VECTOR
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  i = XVECLEN (op, 0) - 1;
+  for (; i >= 0; i--)
+    if (!CONST_INT_P (XVECEXP (op, 0, i)))
+      return 0;
+  return 1;
+})
+
+;; Determine if OP is a constant vector matching MODE with only one
+;; element that is not a sign extension.  Two byte-sized elements
+;; count as one.
+
+(define_predicate "sh_1el_vec"
+  (match_code "const_vector")
+{
+  int unit_size;
+  int i, last, least, sign_ix;
+  rtx sign;
+
+  if (GET_CODE (op) != CONST_VECTOR
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  /* Determine numbers of last and of least significant elements.  */
+  last = XVECLEN (op, 0) - 1;
+  least = TARGET_LITTLE_ENDIAN ? 0 : last;
+  if (!CONST_INT_P (XVECEXP (op, 0, least)))
+    return 0;
+  sign_ix = least;
+  if (GET_MODE_UNIT_SIZE (mode) == 1)
+    sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
+  if (!CONST_INT_P (XVECEXP (op, 0, sign_ix)))
+    return 0;
+  unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
+  sign = (INTVAL (XVECEXP (op, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
+	  ? constm1_rtx : const0_rtx);
+  i = XVECLEN (op, 0) - 1;
+  do
+    if (i != least && i != sign_ix && XVECEXP (op, 0, i) != sign)
+      return 0;
+  while (--i);
+  return 1;
+})
+
+;; Like register_operand, but take into account that SHMEDIA can use
+;; the constant zero like a general register.
+
+(define_predicate "sh_register_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
+    return 1;
+  return register_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "sh_rep_vec"
+  (match_code "const_vector,parallel")
+{
+  int i;
+  rtx x, y;
+
+  if ((GET_CODE (op) != CONST_VECTOR && GET_CODE (op) != PARALLEL)
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  i = XVECLEN (op, 0) - 2;
+  x = XVECEXP (op, 0, i + 1);
+  if (GET_MODE_UNIT_SIZE (mode) == 1)
+    {
+      y = XVECEXP (op, 0, i);
+      for (i -= 2; i >= 0; i -= 2)
+	if (! rtx_equal_p (XVECEXP (op, 0, i + 1), x)
+	    || ! rtx_equal_p (XVECEXP (op, 0, i), y))
+	  return 0;
+    }
+  else
+    for (; i >= 0; i--)
+      if (XVECEXP (op, 0, i) != x)
+	return 0;
+  return 1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "shift_count_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,zero_extend,sign_extend")
+{
+  return (CONSTANT_P (op)
+	  ? (CONST_INT_P (op)
+	     ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
+	     : nonmemory_operand (op, mode))
+	  : shift_count_reg_operand (op, mode));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "shift_count_reg_operand"
+  (match_code "subreg,reg,zero_extend,sign_extend")
+{
+  if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
+       || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
+      && (mode == VOIDmode || mode == GET_MODE (op))
+      && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
+      && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
+    {
+      mode = VOIDmode;
+      do
+	op = XEXP (op, 0);
+      while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
+	      || GET_CODE (op) == TRUNCATE)
+	     && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
+	     && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
+
+    }
+  return arith_reg_operand (op, mode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "shift_operator"
+  (match_code "ashift,ashiftrt,lshiftrt"))
+
+;; TODO: Add a comment here.
+
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; Same as target_reg_operand, except that label_refs and symbol_refs
+;; are accepted before reload.
+
+(define_special_predicate "target_operand"
+  (match_code "subreg,reg,label_ref,symbol_ref,const,unspec")
+{
+  if (mode != VOIDmode && mode != Pmode)
+    return 0;
+
+  if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
+      && satisfies_constraint_Csy (op))
+    return ! reload_completed;
+
+  return target_reg_operand (op, mode);
+})
+
+;; Accept pseudos and branch target registers.
+
+(define_special_predicate "target_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (mode == VOIDmode
+     ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
+     : mode != GET_MODE (op))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+
+  if (!REG_P (op))
+    return 0;
+
+  /* We must protect ourselves from matching pseudos that are virtual
+     register, because they will eventually be replaced with hardware
+     registers that aren't branch-target registers.  */
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER
+      || TARGET_REGISTER_P (REGNO (op)))
+    return 1;
+
+  return 0;
+})
+
+;; TODO: Add a comment here.
+
+(define_special_predicate "trunc_hi_operand"
+  (match_code "subreg,reg,truncate")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (op_mode != SImode && op_mode != DImode
+      && op_mode != V4HImode && op_mode != V2SImode)
+    return 0;
+  return extend_reg_operand (op, mode);
+})
+
+;; Return 1 of OP is an address suitable for an unaligned access instruction.
+
+(define_special_predicate "ua_address_operand"
+  (match_code "subreg,reg,plus")
+{
+  if (GET_CODE (op) == PLUS
+      && (! satisfies_constraint_I06 (XEXP (op, 1))))
+    return 0;
+  return address_operand (op, QImode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "ua_offset"
+  (match_code "const_int")
+{
+  return satisfies_constraint_I06 (op);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "unary_float_operator"
+  (and (match_code "abs,neg,sqrt")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return 1 if OP is a valid source operand for xor.
+
+(define_predicate "xor_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (CONST_INT_P (op))
+    return (TARGET_SHMEDIA
+	    ? (satisfies_constraint_I06 (op)
+	       || (!can_create_pseudo_p () && INTVAL (op) == 0xff))
+	    : satisfies_constraint_K08 (op));
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+(define_predicate "bitwise_memory_operand"
+  (match_code "mem")
+{
+  if (MEM_P (op))
+    {
+      if (REG_P (XEXP (op, 0)))
+	return 1;
+
+      if (GET_CODE (XEXP (op, 0)) == PLUS
+	  && REG_P (XEXP (XEXP (op, 0), 0))
+	  && satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1)))
+        return 1;
+    }
+  return 0;
+})
diff --git a/gcc/config/sh/rtems.h b/gcc/config/sh/rtems.h
new file mode 100644
index 000000000..61fab07e0
--- /dev/null
+++ b/gcc/config/sh/rtems.h
@@ -0,0 +1,26 @@
+/* Definitions for rtems targeting a SH using COFF.
+   Copyright (C) 1997, 1998, 2000, 2002, 2007 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc/config/sh/rtemself.h b/gcc/config/sh/rtemself.h
new file mode 100644
index 000000000..aba98f686
--- /dev/null
+++ b/gcc/config/sh/rtemself.h
@@ -0,0 +1,26 @@
+/* Definitions for rtems targeting a SH using elf.
+   Copyright (C) 1997, 1998, 2000, 2002, 2007 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c
new file mode 100644
index 000000000..2fdff542b
--- /dev/null
+++ b/gcc/config/sh/sh-c.c
@@ -0,0 +1,68 @@
+/* Pragma handling for GCC for Renesas / SuperH SH.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+   2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+   Contributed by Joern Rennecke <joern.rennecke@st.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tm_p.h"
+
+/* Handle machine specific pragmas to be semi-compatible with Renesas
+   compiler.  */
+
+/* Add ATTR to the attributes of the current function.  If there is no
+   such function, save it to be added to the attributes of the next
+   function.  */
+static void
+sh_add_function_attribute (const char *attr)
+{
+  tree id = get_identifier (attr);
+
+  if (current_function_decl)
+    decl_attributes (&current_function_decl,
+		     tree_cons (id, NULL_TREE, NULL_TREE), 0);
+  else
+    {
+      *sh_deferred_function_attributes_tail
+	= tree_cons (id, NULL_TREE, *sh_deferred_function_attributes_tail);
+      sh_deferred_function_attributes_tail
+	= &TREE_CHAIN (*sh_deferred_function_attributes_tail);
+    }
+}
+
+void
+sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("interrupt_handler");
+}
+
+void
+sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("trapa_handler");
+}
+
+void
+sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("nosave_low_regs");
+}
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
new file mode 100644
index 000000000..98e974a1a
--- /dev/null
+++ b/gcc/config/sh/sh-modes.def
@@ -0,0 +1,34 @@
+/* SH extra machine modes. 
+   Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The SH uses a partial integer mode to represent the FPSCR register.  */
+PARTIAL_INT_MODE (SI);
+/* PDI mode is used to represent a function address in a target register.  */
+PARTIAL_INT_MODE (DI);
+
+/* Vector modes.  */
+VECTOR_MODE  (INT, QI, 2);    /*                 V2QI */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
+VECTOR_MODE (INT, DI, 8);     /*                 V8DI */
+VECTOR_MODE (FLOAT, SF, 16);  /*                V16SF */
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
new file mode 100644
index 000000000..d29147c98
--- /dev/null
+++ b/gcc/config/sh/sh-protos.h
@@ -0,0 +1,186 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2003,
+   2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SH_PROTOS_H
+#define GCC_SH_PROTOS_H
+
+enum sh_function_kind {
+  /* A function with normal C ABI  */
+  FUNCTION_ORDINARY,
+  /* A special function that guarantees that some otherwise call-clobbered
+     registers are not clobbered.  These can't go through the SH5 resolver,
+     because it only saves argument passing registers.  */
+  SFUNC_GOT,
+  /* A special function that should be linked statically.  These are typically
+     smaller or not much larger than a PLT entry.
+     Some also have a non-standard ABI which precludes dynamic linking.  */
+  SFUNC_STATIC
+};
+
+#ifdef RTX_CODE
+extern rtx sh_fsca_sf2int (void);
+extern rtx sh_fsca_df2int (void);
+extern rtx sh_fsca_int2sf (void);
+
+/* Declare functions defined in sh.c and used in templates.  */
+
+extern const char *output_branch (int, rtx, rtx *);
+extern const char *output_ieee_ccmpeq (rtx, rtx *);
+extern const char *output_branchy_insn (enum rtx_code, const char *, rtx, rtx *);
+extern const char *output_movedouble (rtx, rtx[], enum machine_mode);
+extern const char *output_movepcrel (rtx, rtx[], enum machine_mode);
+extern const char *output_far_jump (rtx, rtx);
+
+extern struct rtx_def *sfunc_uses_reg (rtx);
+extern int barrier_align (rtx);
+extern int sh_loop_align (rtx);
+extern int fp_zero_operand (rtx);
+extern int fp_one_operand (rtx);
+extern int fp_int_operand (rtx);
+extern rtx get_fpscr_rtx (void);
+extern bool sh_legitimate_index_p (enum machine_mode, rtx);
+extern bool sh_legitimize_reload_address (rtx *, enum machine_mode, int, int);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern int nonpic_symbol_mentioned_p (rtx);
+extern void emit_sf_insn (rtx);
+extern void emit_df_insn (rtx);
+extern void output_pic_addr_const (FILE *, rtx);
+extern int expand_block_move (rtx *);
+extern int prepare_move_operands (rtx[], enum machine_mode mode);
+extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
+					       enum rtx_code comparison);
+extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
+extern bool expand_cbranchdi4 (rtx *operands, enum rtx_code comparison);
+extern void sh_emit_scc_to_t (enum rtx_code, rtx, rtx);
+extern rtx sh_emit_cheap_store_flag (enum machine_mode, enum rtx_code, rtx, rtx);
+extern void sh_emit_compare_and_branch (rtx *, enum machine_mode);
+extern void sh_emit_compare_and_set (rtx *, enum machine_mode);
+extern int shift_insns_rtx (rtx);
+extern void gen_ashift (int, int, rtx);
+extern void gen_ashift_hi (int, int, rtx);
+extern void gen_shifty_op (int, rtx *);
+extern void gen_shifty_hi_op (int, rtx *);
+extern int expand_ashiftrt (rtx *);
+extern int sh_dynamicalize_shift_p (rtx);
+extern int shl_and_kind (rtx, rtx, int *);
+extern int shl_and_length (rtx);
+extern int shl_and_scr_length (rtx);
+extern int gen_shl_and (rtx, rtx, rtx, rtx);
+extern int shl_sext_kind (rtx, rtx, int *);
+extern int shl_sext_length (rtx);
+extern int gen_shl_sext (rtx, rtx, rtx, rtx);
+extern rtx gen_datalabel_ref (rtx);
+extern int regs_used (rtx, int);
+extern void fixup_addr_diff_vecs (rtx);
+extern int get_dest_uid (rtx, int);
+extern void final_prescan_insn (rtx, rtx *, int);
+extern int symbol_ref_operand (rtx, enum machine_mode);
+extern enum tls_model tls_symbolic_operand (rtx, enum machine_mode);
+extern int system_reg_operand (rtx, enum machine_mode);
+extern int general_movsrc_operand (rtx, enum machine_mode);
+extern int general_movdst_operand (rtx, enum machine_mode);
+extern int arith_reg_operand (rtx, enum machine_mode);
+extern int fp_arith_reg_operand (rtx, enum machine_mode);
+extern int arith_operand (rtx, enum machine_mode);
+extern int arith_reg_or_0_operand (rtx, enum machine_mode);
+extern int logical_operand (rtx, enum machine_mode);
+extern int tertiary_reload_operand (rtx, enum machine_mode);
+extern int fpscr_operand (rtx, enum machine_mode);
+extern int fpul_operand (rtx, enum machine_mode);
+extern int commutative_float_operator (rtx, enum machine_mode);
+extern int noncommutative_float_operator (rtx, enum machine_mode);
+extern int reg_unused_after (rtx, rtx);
+extern void expand_sf_unop (rtx (*)(rtx, rtx, rtx), rtx *);
+extern void expand_sf_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
+extern void expand_df_unop (rtx (*)(rtx, rtx, rtx), rtx *);
+extern void expand_df_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
+extern void expand_fp_branch (rtx (*)(void), rtx (*)(void));
+extern int sh_insn_length_adjustment (rtx);
+extern int sh_can_redirect_branch (rtx, rtx);
+extern void sh_expand_unop_v2sf (enum rtx_code, rtx, rtx);
+extern void sh_expand_binop_v2sf (enum rtx_code, rtx, rtx, rtx);
+extern int sh_expand_t_scc (rtx *);
+extern rtx sh_gen_truncate (enum machine_mode, rtx, int);
+extern bool sh_vector_mode_supported_p (enum machine_mode);
+#endif /* RTX_CODE */
+
+extern const char *output_jump_label_table (void);
+extern int sh_handle_pragma (int (*)(void), void (*)(int), const char *);
+extern struct rtx_def *get_fpscr_rtx (void);
+extern int sh_media_register_for_return (void);
+extern void sh_expand_prologue (void);
+extern void sh_expand_epilogue (bool);
+extern int sh_need_epilogue (void);
+extern void sh_set_return_address (rtx, rtx);
+extern int initial_elimination_offset (int, int);
+extern int fldi_ok (void);
+extern int sh_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int sh_cfun_interrupt_handler_p (void);
+extern int sh_cfun_resbank_handler_p (void);
+extern int sh_attr_renesas_p (const_tree);
+extern int sh_cfun_attr_renesas_p (void);
+extern bool sh_cannot_change_mode_class
+	      (enum machine_mode, enum machine_mode, enum reg_class);
+extern bool sh_small_register_classes_for_mode_p (enum machine_mode);
+extern void sh_mark_label (rtx, int);
+extern int check_use_sfunc_addr (rtx, rtx);
+
+#ifdef HARD_CONST
+extern void fpscr_set_from_mem (int, HARD_REG_SET);
+#endif
+
+extern void sh_pr_interrupt (struct cpp_reader *);
+extern void sh_pr_trapa (struct cpp_reader *);
+extern void sh_pr_nosave_low_regs (struct cpp_reader *);
+extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+extern rtx sh_get_pr_initial_val (void);
+
+extern int sh_pass_in_reg_p (CUMULATIVE_ARGS *, enum machine_mode, tree);
+extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, signed int, enum machine_mode);
+extern rtx sh_dwarf_register_span (rtx);
+
+extern rtx replace_n_hard_rtx (rtx, rtx *, int , int);
+extern int shmedia_cleanup_truncate (rtx *, void *);
+
+extern int sh_contains_memref_p (rtx);
+extern int sh_loads_bankedreg_p (rtx);
+extern rtx shmedia_prepare_call_address (rtx fnaddr, int is_sibcall);
+extern int sh2a_get_function_vector_number (rtx);
+extern int sh2a_is_function_vector_call (rtx);
+extern void sh_fix_range (const char *);
+extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode);
+#endif /* ! GCC_SH_PROTOS_H */
+
+#ifdef SYMBIAN
+extern const char * sh_symbian_strip_name_encoding    (const char *);
+extern bool         sh_symbian_is_dllexported_name    (const char *);
+#ifdef TREE_CODE
+extern bool         sh_symbian_is_dllexported         (tree);
+extern int          sh_symbian_import_export_class    (tree, int);
+extern tree         sh_symbian_handle_dll_attribute   (tree *, tree, tree, int, bool *);
+#ifdef RTX_CODE
+extern void         sh_symbian_encode_section_info    (tree, rtx, int);
+#endif
+#endif
+#endif /* SYMBIAN */
+
diff --git a/gcc/config/sh/sh-symbian.h b/gcc/config/sh/sh-symbian.h
new file mode 100644
index 000000000..2e37d2bbc
--- /dev/null
+++ b/gcc/config/sh/sh-symbian.h
@@ -0,0 +1,42 @@
+/* header file for GCC for a Symbian OS targeted SH backend.
+   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by RedHat.
+   Most of this code is stolen from i386/winnt.c.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* A unique character to encode declspec encoded objects.  */
+#define SH_SYMBIAN_FLAG_CHAR "$"
+
+/* Unique strings to prefix exported and imported objects.  */
+#define DLL_IMPORT_PREFIX SH_SYMBIAN_FLAG_CHAR "i."
+#define DLL_EXPORT_PREFIX SH_SYMBIAN_FLAG_CHAR "e."
+
+/* Select the level of debugging information to display.
+   0 for no debugging.
+   1 for informative messages about decisions to add attributes
+   2 for verbose information about what is being done.  */
+#define SYMBIAN_DEBUG 0
+/* #define SYMBIAN_DEBUG 1 */
+/* #define SYMBIAN_DEBUG 2 */
+
+/* Functions exported from symbian-base.c.  */
+extern tree sh_symbian_associated_type (tree);
+
+/* Functions exported from symbian-[c|c++].c.  */
+extern bool sh_symbian_is_dllimported (tree);
+
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
new file mode 100644
index 000000000..11e537b1f
--- /dev/null
+++ b/gcc/config/sh/sh.c
@@ -0,0 +1,12610 @@
+/* Output routines for GCC for Renesas / SuperH SH.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+   2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "insn-config.h"
+#include "rtl.h"
+#include "tree.h"
+#include "flags.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "function.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "integrate.h"
+#include "dwarf2.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "basic-block.h"
+#include "df.h"
+#include "cfglayout.h"
+#include "intl.h"
+#include "sched-int.h"
+#include "params.h"
+#include "ggc.h"
+#include "gimple.h"
+#include "cfgloop.h"
+#include "alloc-pool.h"
+#include "tm-constrs.h"
+
+
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
+#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
+#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+/* These are some macros to abstract register modes.  */
+#define CONST_OK_FOR_ADD(size) \
+  (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
+#define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
+#define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
+#define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
+
+/* Used to simplify the logic below.  Find the attributes wherever
+   they may be.  */
+#define SH_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+		  : DECL_ATTRIBUTES (decl) \
+		  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+/* Set to 1 by expand_prologue() when the function is an interrupt handler.  */
+int current_function_interrupt;
+
+tree sh_deferred_function_attributes;
+tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
+
+/* Global variables for machine-dependent things.  */
+
+/* Which cpu are we scheduling for.  */
+enum processor_type sh_cpu;
+
+/* Definitions used in ready queue reordering for first scheduling pass.  */
+
+/* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID.  */
+static short *regmode_weight[2];
+
+/* Total SFmode and SImode weights of scheduled insns.  */
+static int curr_regmode_pressure[2];
+
+/* Number of r0 life regions.  */
+static int r0_life_regions;
+
+/* If true, skip cycles for Q -> R movement.  */
+static int skip_cycles = 0;
+
+/* Cached value of can_issue_more. This is cached in sh_variable_issue hook
+   and returned from sh_reorder2.  */
+static short cached_can_issue_more;
+
+/* Unique number for UNSPEC_BBR pattern.  */
+static unsigned int unspec_bbr_uid = 1;
+
+/* Provides the class number of the smallest class containing
+   reg number.  */
+
+enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
+  TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
+  MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
+  GENERAL_REGS, GENERAL_REGS,
+};
+
+char sh_register_names[FIRST_PSEUDO_REGISTER] \
+  [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
+
+char sh_additional_register_names[ADDREGNAMES_SIZE] \
+  [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
+  = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
+
+int assembler_dialect;
+
+static bool shmedia_space_reserved_for_target_registers;
+
+static bool sh_handle_option (size_t, const char *, int);
+static void split_branches (rtx);
+static int branch_dest (rtx);
+static void force_into (rtx, rtx);
+static void print_slot (rtx);
+static rtx add_constant (rtx, enum machine_mode, rtx);
+static void dump_table (rtx, rtx);
+static int hi_const (rtx);
+static int broken_move (rtx);
+static int mova_p (rtx);
+static rtx find_barrier (int, rtx, rtx);
+static int noncall_uses_reg (rtx, rtx, rtx *);
+static rtx gen_block_redirect (rtx, int, int);
+static void sh_reorg (void);
+static void sh_option_override (void);
+static void sh_option_init_struct (struct gcc_options *);
+static void sh_option_default_params (void);
+static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
+static rtx frame_insn (rtx);
+static rtx push (int);
+static void pop (int);
+static void push_regs (HARD_REG_SET *, int);
+static int calc_live_regs (HARD_REG_SET *);
+static HOST_WIDE_INT rounded_frame_size (int);
+static bool sh_frame_pointer_required (void);
+static rtx mark_constant_pool_use (rtx);
+static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_resbank_handler_attribute (tree *, tree,
+						 tree, int, bool *);
+static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
+							   tree, int, bool *);
+static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
+static void sh_print_operand (FILE *, rtx, int);
+static void sh_print_operand_address (FILE *, rtx);
+static bool sh_print_operand_punct_valid_p (unsigned char code);
+static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
+static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void sh_insert_attributes (tree, tree *);
+static const char *sh_check_pch_target_flags (int);
+static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int sh_adjust_cost (rtx, rtx, rtx, int);
+static int sh_issue_rate (void);
+static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
+static short find_set_regmode_weight (rtx, enum machine_mode);
+static short find_insn_regmode_weight (rtx, enum machine_mode);
+static void find_regmode_weight (basic_block, enum machine_mode);
+static int find_r0_life_regions (basic_block);
+static void  sh_md_init_global (FILE *, int, int);
+static void  sh_md_finish_global (FILE *, int);
+static int rank_for_reorder (const void *, const void *);
+static void swap_reorder (rtx *, int);
+static void ready_reorder (rtx *, int);
+static short high_pressure (enum machine_mode);
+static int sh_reorder (FILE *, int, rtx *, int *, int);
+static int sh_reorder2 (FILE *, int, rtx *, int *, int);
+static void sh_md_init (FILE *, int, int);
+static int sh_variable_issue (FILE *, int, rtx, int);
+
+static bool sh_function_ok_for_sibcall (tree, tree);
+
+static bool sh_cannot_modify_jumps_p (void);
+static reg_class_t sh_target_reg_class (void);
+static bool sh_optimize_target_register_callee_saved (bool);
+static bool sh_ms_bitfield_layout_p (const_tree);
+
+static void sh_init_builtins (void);
+static tree sh_builtin_decl (unsigned, bool);
+static void sh_media_init_builtins (void);
+static tree sh_media_builtin_decl (unsigned, bool);
+static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
+static void sh_file_start (void);
+static int flow_dependent_p (rtx, rtx);
+static void flow_dependent_p_1 (rtx, const_rtx, void *);
+static int shiftcosts (rtx);
+static int andcosts (rtx);
+static int addsubcosts (rtx);
+static int multcosts (rtx);
+static bool unspec_caller_rtx_p (rtx);
+static bool sh_cannot_copy_insn_p (rtx);
+static bool sh_rtx_costs (rtx, int, int, int *, bool);
+static int sh_address_cost (rtx, bool);
+static int sh_pr_n_sets (void);
+static rtx sh_allocate_initial_value (rtx);
+static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
+                                        enum machine_mode,
+                                        struct secondary_reload_info *);
+static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx sh_delegitimize_address (rtx);
+static int shmedia_target_regs_stack_space (HARD_REG_SET *);
+static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
+static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
+static int scavenge_reg (HARD_REG_SET *s);
+struct save_schedule_s;
+static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
+						struct save_schedule_s *, int);
+
+static rtx sh_struct_value_rtx (tree, int);
+static rtx sh_function_value (const_tree, const_tree, bool);
+static bool sh_function_value_regno_p (const unsigned int);
+static rtx sh_libcall_value (enum machine_mode, const_rtx);
+static bool sh_return_in_memory (const_tree, const_tree);
+static rtx sh_builtin_saveregs (void);
+static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
+static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
+static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
+static tree sh_build_builtin_va_list (void);
+static void sh_va_start (tree, rtx);
+static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool sh_promote_prototypes (const_tree);
+static enum machine_mode sh_promote_function_mode (const_tree type,
+						   enum machine_mode,
+						   int *punsignedp,
+						   const_tree funtype,
+						   int for_return);
+static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
+				  const_tree, bool);
+static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
+			      const_tree, bool);
+static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+			         tree, bool);
+static void sh_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				     const_tree, bool);
+static rtx sh_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			    const_tree, bool);
+static bool sh_scalar_mode_supported_p (enum machine_mode);
+static int sh_dwarf_calling_convention (const_tree);
+static void sh_encode_section_info (tree, rtx, int);
+static int sh2a_function_vector_p (tree);
+static void sh_trampoline_init (rtx, tree, rtx);
+static rtx sh_trampoline_adjust_address (rtx);
+static void sh_conditional_register_usage (void);
+
+static const struct attribute_spec sh_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt_handler", 0, 0, true,  false, false, sh_handle_interrupt_handler_attribute },
+  { "sp_switch",         1, 1, true,  false, false, sh_handle_sp_switch_attribute },
+  { "trap_exit",         1, 1, true,  false, false, sh_handle_trap_exit_attribute },
+  { "renesas",           0, 0, false, true, false, sh_handle_renesas_attribute },
+  { "trapa_handler",     0, 0, true,  false, false, sh_handle_interrupt_handler_attribute },
+  { "nosave_low_regs",   0, 0, true,  false, false, sh_handle_interrupt_handler_attribute },
+  { "resbank",           0, 0, true,  false, false, sh_handle_resbank_handler_attribute },
+  { "function_vector",   1, 1, true,  false, false, sh2a_handle_function_vector_handler_attribute },
+#ifdef SYMBIAN
+  /* Symbian support adds three new attributes:
+     dllexport - for exporting a function/variable that will live in a dll
+     dllimport - for importing a function/variable from a dll
+
+     Microsoft allows multiple declspecs in one __declspec, separating
+     them with spaces.  We do NOT support this.  Instead, use __declspec
+     multiple times.  */
+  { "dllimport",         0, 0, true,  false, false, sh_symbian_handle_dll_attribute },
+  { "dllexport",         0, 0, true,  false, false, sh_symbian_handle_dll_attribute },
+#endif
+  { NULL,                0, 0, false, false, false, NULL }
+};
+
+/* Set default optimization options.  */
+static const struct default_options sh_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_1_PLUS_SPEED_ONLY, OPT_mdiv_, "inv:minlat", 1 },
+    { OPT_LEVELS_SIZE, OPT_mdiv_, SH_DIV_STR_FOR_SIZE, 1 },
+    { OPT_LEVELS_0_ONLY, OPT_mdiv_, "", 1 },
+    { OPT_LEVELS_SIZE, OPT_mcbranchdi, NULL, 0 },
+    /* We can't meaningfully test TARGET_SHMEDIA here, because -m
+       options haven't been parsed yet, hence we'd read only the
+       default.  sh_target_reg_class will return NO_REGS if this is
+       not SHMEDIA, so it's OK to always set
+       flag_branch_target_load_optimize.  */
+    { OPT_LEVELS_2_PLUS, OPT_fbranch_target_load_optimize, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE sh_attribute_table
+
+/* The next two are used for debug info when compiling with -gdwarf.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
+
+/* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE.  */
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE sh_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE sh_option_optimization_table
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT sh_option_init_struct
+#undef TARGET_OPTION_DEFAULT_PARAMS
+#define TARGET_OPTION_DEFAULT_PARAMS sh_option_default_params
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND sh_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
+ 
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START sh_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION sh_handle_option
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST sh_register_move_cost
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST sh_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sh_issue_rate
+
+/* The next 5 hooks have been implemented for reenabling sched1.  With the
+   help of these macros we are limiting the movement of insns in sched1 to
+   reduce the register pressure.  The overall idea is to keep count of SImode
+   and SFmode regs required by already scheduled insns. When these counts
+   cross some threshold values; give priority to insns that free registers.
+   The insn that frees registers is most likely to be the insn with lowest
+   LUID (original insn order); but such an insn might be there in the stalled
+   queue (Q) instead of the ready queue (R).  To solve this, we skip cycles
+   upto a max of 8 cycles so that such insns may move from Q -> R.
+
+   The description of the hooks are as below:
+
+   TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
+   scheduler; it is called inside the sched_init function just after
+   find_insn_reg_weights function call. It is used to calculate the SImode
+   and SFmode weights of insns of basic blocks; much similar to what
+   find_insn_reg_weights does.
+   TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
+
+   TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
+   indicated by TARGET_SCHED_REORDER2; doing this may move insns from
+   (Q)->(R).
+
+   TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
+   high; reorder the ready queue so that the insn with lowest LUID will be
+   issued next.
+
+   TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
+   TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
+
+   TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
+   can be returned from TARGET_SCHED_REORDER2.
+
+   TARGET_SCHED_INIT: Reset the register pressure counting variables.  */
+
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
+
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER sh_reorder
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 sh_reorder2
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT sh_md_init
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
+
+#undef TARGET_CANNOT_MODIFY_JUMPS_P
+#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
+#undef TARGET_BRANCH_TARGET_REGISTER_CLASS
+#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
+#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
+#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
+ sh_optimize_target_register_callee_saved
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sh_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL sh_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sh_expand_builtin
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
+
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS sh_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST sh_address_cost
+#undef TARGET_ALLOCATE_INITIAL_VALUE
+#define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE sh_function_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE sh_libcall_value
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY sh_return_in_memory
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES sh_callee_copies
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG sh_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
+
+#undef TARGET_CHECK_PCH_TARGET_FLAGS
+#define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
+
+#undef TARGET_DWARF_CALLING_CONVENTION
+#define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
+
+/* Return regmode weight for insn.  */
+#define INSN_REGMODE_WEIGHT(INSN, MODE)  regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
+
+/* Return current register pressure for regmode.  */
+#define CURR_REGMODE_PRESSURE(MODE) 	curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO	sh_encode_section_info
+
+#ifdef SYMBIAN
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO	sh_symbian_encode_section_info
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING	sh_symbian_strip_name_encoding
+#undef  TARGET_CXX_IMPORT_EXPORT_CLASS
+#define TARGET_CXX_IMPORT_EXPORT_CLASS  sh_symbian_import_export_class
+
+#endif /* SYMBIAN */
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD sh_secondary_reload
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	sh_legitimate_address_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		sh_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION    (SYMBOL_FLAG_MACH_DEP << 0)
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
+		  int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_m1:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
+      return true;
+
+    case OPT_m2:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
+      return true;
+
+    case OPT_m2a:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
+      return true;
+
+    case OPT_m2a_nofpu:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
+      return true;
+
+    case OPT_m2a_single:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
+      return true;
+
+    case OPT_m2a_single_only:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
+      return true;
+
+    case OPT_m2e:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
+      return true;
+
+    case OPT_m3:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
+      return true;
+
+    case OPT_m3e:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
+      return true;
+
+    case OPT_m4:
+    case OPT_m4_100:
+    case OPT_m4_200:
+    case OPT_m4_300:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
+      return true;
+
+    case OPT_m4_nofpu:
+    case OPT_m4_100_nofpu:
+    case OPT_m4_200_nofpu:
+    case OPT_m4_300_nofpu:
+    case OPT_m4_340:
+    case OPT_m4_400:
+    case OPT_m4_500:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
+      return true;
+
+    case OPT_m4_single:
+    case OPT_m4_100_single:
+    case OPT_m4_200_single:
+    case OPT_m4_300_single:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
+      return true;
+
+    case OPT_m4_single_only:
+    case OPT_m4_100_single_only:
+    case OPT_m4_200_single_only:
+    case OPT_m4_300_single_only:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
+      return true;
+
+    case OPT_m4a:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
+      return true;
+
+    case OPT_m4a_nofpu:
+    case OPT_m4al:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
+      return true;
+
+    case OPT_m4a_single:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
+      return true;
+
+    case OPT_m4a_single_only:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
+      return true;
+
+    case OPT_m5_32media:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
+      return true;
+
+    case OPT_m5_32media_nofpu:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
+      return true;
+
+    case OPT_m5_64media:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
+      return true;
+
+    case OPT_m5_64media_nofpu:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
+      return true;
+
+    case OPT_m5_compact:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
+      return true;
+
+    case OPT_m5_compact_nofpu:
+      target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_INIT_STRUCT.  */
+static void
+sh_option_init_struct (struct gcc_options *opts)
+{
+  /* We can't meaningfully test TARGET_SH2E / TARGET_IEEE
+     here, so leave it to TARGET_OPTION_OVERRIDE to set
+     flag_finite_math_only.  We set it to 2 here so we know if the user
+     explicitly requested this to be on or off.  */
+  opts->x_flag_finite_math_only = 2;
+}
+
+/* Implement TARGET_OPTION_DEFAULT_PARAMS.  */
+static void
+sh_option_default_params (void)
+{
+  set_default_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2);
+}
+
+/* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override 
+   various options, and do some machine dependent initialization.  */
+static void
+sh_option_override (void)
+{
+  int regno;
+
+  SUBTARGET_OVERRIDE_OPTIONS;
+  if (optimize > 1 && !optimize_size)
+    target_flags |= MASK_SAVE_ALL_TARGET_REGS;
+  sh_cpu = PROCESSOR_SH1;
+  assembler_dialect = 0;
+  if (TARGET_SH2)
+    sh_cpu = PROCESSOR_SH2;
+  if (TARGET_SH2E)
+    sh_cpu = PROCESSOR_SH2E;
+  if (TARGET_SH2A)
+    sh_cpu = PROCESSOR_SH2A;
+  if (TARGET_SH3)
+    sh_cpu = PROCESSOR_SH3;
+  if (TARGET_SH3E)
+    sh_cpu = PROCESSOR_SH3E;
+  if (TARGET_SH4)
+    {
+      assembler_dialect = 1;
+      sh_cpu = PROCESSOR_SH4;
+    }
+  if (TARGET_SH4A_ARCH)
+    {
+      assembler_dialect = 1;
+      sh_cpu = PROCESSOR_SH4A;
+    }
+  if (TARGET_SH5)
+    {
+      sh_cpu = PROCESSOR_SH5;
+      target_flags |= MASK_ALIGN_DOUBLE;
+      if (TARGET_SHMEDIA_FPU)
+	target_flags |= MASK_FMOVD;
+      if (TARGET_SHMEDIA)
+	{
+	  /* There are no delay slots on SHmedia.  */
+	  flag_delayed_branch = 0;
+	  /* Relaxation isn't yet supported for SHmedia */
+	  target_flags &= ~MASK_RELAX;
+	  /* After reload, if conversion does little good but can cause
+	     ICEs:
+	     - find_if_block doesn't do anything for SH because we don't
+	       have conditional execution patterns.  (We use conditional
+	       move patterns, which are handled differently, and only
+	       before reload).
+	     - find_cond_trap doesn't do anything for the SH because we
+	       don't have conditional traps.
+	     - find_if_case_1 uses redirect_edge_and_branch_force in
+	       the only path that does an optimization, and this causes
+	       an ICE when branch targets are in registers.
+	     - find_if_case_2 doesn't do anything for the SHmedia after
+	       reload except when it can redirect a tablejump - and
+	       that's rather rare.  */
+	  flag_if_conversion2 = 0;
+	  if (! strcmp (sh_div_str, "call"))
+	    sh_div_strategy = SH_DIV_CALL;
+	  else if (! strcmp (sh_div_str, "call2"))
+	    sh_div_strategy = SH_DIV_CALL2;
+	  if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
+	    sh_div_strategy = SH_DIV_FP;
+	  else if (! strcmp (sh_div_str, "inv"))
+	    sh_div_strategy = SH_DIV_INV;
+	  else if (! strcmp (sh_div_str, "inv:minlat"))
+	    sh_div_strategy = SH_DIV_INV_MINLAT;
+	  else if (! strcmp (sh_div_str, "inv20u"))
+	    sh_div_strategy = SH_DIV_INV20U;
+	  else if (! strcmp (sh_div_str, "inv20l"))
+	    sh_div_strategy = SH_DIV_INV20L;
+	  else if (! strcmp (sh_div_str, "inv:call2"))
+	    sh_div_strategy = SH_DIV_INV_CALL2;
+	  else if (! strcmp (sh_div_str, "inv:call"))
+	    sh_div_strategy = SH_DIV_INV_CALL;
+	  else if (! strcmp (sh_div_str, "inv:fp"))
+	    {
+	      if (TARGET_FPU_ANY)
+		sh_div_strategy = SH_DIV_INV_FP;
+	      else
+		sh_div_strategy = SH_DIV_INV;
+	    }
+	  TARGET_CBRANCHDI4 = 0;
+	  /* Assembler CFI isn't yet fully supported for SHmedia.  */
+	  flag_dwarf2_cfi_asm = 0;
+	}
+    }
+  else
+    {
+       /* Only the sh64-elf assembler fully supports .quad properly.  */
+       targetm.asm_out.aligned_op.di = NULL;
+       targetm.asm_out.unaligned_op.di = NULL;
+    }
+  if (TARGET_SH1)
+    {
+      if (! strcmp (sh_div_str, "call-div1"))
+	sh_div_strategy = SH_DIV_CALL_DIV1;
+      else if (! strcmp (sh_div_str, "call-fp")
+	       && (TARGET_FPU_DOUBLE
+		   || (TARGET_HARD_SH4 && TARGET_SH2E)
+		   || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
+	sh_div_strategy = SH_DIV_CALL_FP;
+      else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
+	sh_div_strategy = SH_DIV_CALL_TABLE;
+      else
+	/* Pick one that makes most sense for the target in general.
+	   It is not much good to use different functions depending
+	   on -Os, since then we'll end up with two different functions
+	   when some of the code is compiled for size, and some for
+	   speed.  */
+
+	/* SH4 tends to emphasize speed.  */
+	if (TARGET_HARD_SH4)
+	  sh_div_strategy = SH_DIV_CALL_TABLE;
+	/* These have their own way of doing things.  */
+	else if (TARGET_SH2A)
+	  sh_div_strategy = SH_DIV_INTRINSIC;
+	/* ??? Should we use the integer SHmedia function instead?  */
+	else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
+	  sh_div_strategy = SH_DIV_CALL_FP;
+        /* SH1 .. SH3 cores often go into small-footprint systems, so
+	   default to the smallest implementation available.  */
+	else if (TARGET_SH2)	/* ??? EXPERIMENTAL */
+	  sh_div_strategy = SH_DIV_CALL_TABLE;
+	else
+	  sh_div_strategy = SH_DIV_CALL_DIV1;
+    }
+  if (!TARGET_SH1)
+    TARGET_PRETEND_CMOVE = 0;
+  if (sh_divsi3_libfunc[0])
+    ; /* User supplied - leave it alone.  */
+  else if (TARGET_DIVIDE_CALL_FP)
+    sh_divsi3_libfunc = "__sdivsi3_i4";
+  else if (TARGET_DIVIDE_CALL_TABLE)
+    sh_divsi3_libfunc = "__sdivsi3_i4i";
+  else if (TARGET_SH5)
+    sh_divsi3_libfunc = "__sdivsi3_1";
+  else
+    sh_divsi3_libfunc = "__sdivsi3";
+  if (sh_branch_cost == -1)
+    sh_branch_cost
+      = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (! VALID_REGISTER_P (regno))
+      sh_register_names[regno][0] = '\0';
+
+  for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
+    if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
+      sh_additional_register_names[regno][0] = '\0';
+
+  if ((flag_pic && ! TARGET_PREFERGOT)
+      || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
+    flag_no_function_cse = 1;
+
+  if (targetm.small_register_classes_for_mode_p (VOIDmode))		\
+    {
+      /* Never run scheduling before reload, since that can
+	 break global alloc, and generates slower code anyway due
+	 to the pressure on R0.  */
+      /* Enable sched1 for SH4 if the user explicitly requests.
+	 When sched1 is enabled, the ready queue will be reordered by
+	 the target hooks if pressure is high.  We can not do this for
+	 PIC, SH3 and lower as they give spill failures for R0.  */
+      if (!TARGET_HARD_SH4 || flag_pic)
+        flag_schedule_insns = 0;
+      /* ??? Current exception handling places basic block boundaries
+	 after call_insns.  It causes the high pressure on R0 and gives
+	 spill failures for R0 in reload.  See PR 22553 and the thread
+	 on gcc-patches
+         <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>.  */
+      else if (flag_exceptions)
+	{
+	  if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
+	    warning (0, "ignoring -fschedule-insns because of exception handling bug");
+	  flag_schedule_insns = 0;
+	}
+      else if (flag_schedule_insns
+	       && !global_options_set.x_flag_schedule_insns)
+	flag_schedule_insns = 0;
+    }
+
+  /* Unwind info is not correct around the CFG unless either a frame 
+     pointer is present or M_A_O_A is set.  Fixing this requires rewriting 
+     unwind info generation to be aware of the CFG and propagating states 
+     around edges.  */
+  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+       || flag_exceptions || flag_non_call_exceptions)   
+      && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
+    {
+	warning (0, "unwind tables currently require either a frame pointer "
+		 "or -maccumulate-outgoing-args for correctness");
+	TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
+    }
+
+  /* Unwinding with -freorder-blocks-and-partition does not work on this
+     architecture, because it requires far jumps to label crossing between
+     hot/cold sections which are rejected on this architecture.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      if (flag_exceptions)
+	{
+	  inform (input_location, 
+		  "-freorder-blocks-and-partition does not work with "
+		  "exceptions on this architecture");
+	  flag_reorder_blocks_and_partition = 0;
+	  flag_reorder_blocks = 1;
+	}
+      else if (flag_unwind_tables)
+	{
+	  inform (input_location,
+		  "-freorder-blocks-and-partition does not support unwind "
+		  "info on this architecture");
+	  flag_reorder_blocks_and_partition = 0;
+	  flag_reorder_blocks = 1;
+	}
+    }
+
+  if (align_loops == 0)
+    align_loops =  1 << (TARGET_SH5 ? 3 : 2);
+  if (align_jumps == 0)
+    align_jumps = 1 << CACHE_LOG;
+  else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
+    align_jumps = TARGET_SHMEDIA ? 4 : 2;
+
+  /* Allocation boundary (in *bytes*) for the code of a function.
+     SH1: 32 bit alignment is faster, because instructions are always
+     fetched as a pair from a longword boundary.
+     SH2 .. SH5 : align to cache line start.  */
+  if (align_functions == 0)
+    align_functions
+      = optimize_size ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+  /* The linker relaxation code breaks when a function contains
+     alignments that are larger than that at the start of a
+     compilation unit.  */
+  if (TARGET_RELAX)
+    {
+      int min_align
+	= align_loops > align_jumps ? align_loops : align_jumps;
+
+      /* Also take possible .long constants / mova tables int account.	*/
+      if (min_align < 4)
+	min_align = 4;
+      if (align_functions < min_align)
+	align_functions = min_align;
+    }
+
+  /* If the -mieee option was not explicitly set by the user, turn it on
+     unless -ffinite-math-only was specified.  See also PR 33135.  */
+  if (! global_options_set.x_TARGET_IEEE)
+    TARGET_IEEE = ! flag_finite_math_only;
+
+  if (sh_fixed_range_str)
+    sh_fix_range (sh_fixed_range_str);
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+}
+
+/* Print the operand address in x to the stream.  */
+
+static void
+sh_print_operand_address (FILE *stream, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      fprintf (stream, "@%s", reg_names[true_regnum (x)]);
+      break;
+
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx index = XEXP (x, 1);
+
+	switch (GET_CODE (index))
+	  {
+	  case CONST_INT:
+	    fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
+		     reg_names[true_regnum (base)]);
+	    break;
+
+	  case REG:
+	  case SUBREG:
+	    {
+	      int base_num = true_regnum (base);
+	      int index_num = true_regnum (index);
+
+	      fprintf (stream, "@(r0,%s)",
+		       reg_names[MAX (base_num, index_num)]);
+	      break;
+	    }
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+      break;
+
+    case PRE_DEC:
+      fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    default:
+      x = mark_constant_pool_use (x);
+      output_addr_const (stream, x);
+      break;
+    }
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+   according to modifier code.
+
+   '.'  print a .s if insn needs delay slot
+   ','  print LOCAL_LABEL_PREFIX
+   '@'  print trap, rte or rts depending upon pragma interruptness
+   '#'  output a nop if there is nothing to put in the delay slot
+   '''  print likelihood suffix (/u for unlikely).
+   '>'  print branch target if -fverbose-asm
+   'O'  print a constant without the #
+   'R'  print the LSW of a dp value - changes if in little endian
+   'S'  print the MSW of a dp value - changes if in little endian
+   'T'  print the next word of a dp value - same as 'R' in big endian mode.
+   'M'  SHMEDIA: print an `x' if `m' will print `base,index'.
+        otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
+   'N'  print 'r63' if the operand is (const_int 0).
+   'd'  print a V2SF reg as dN instead of fpN.
+   'm'  print a pair `base,offset' or `base,index', for LD and ST.
+   'U'  Likewise for {LD,ST}{HI,LO}.
+   'V'  print the position of a single bit set.
+   'W'  print the position of a single bit cleared.
+   't'  print a memory address which is a register.
+   'u'  prints the lowest 16 bits of CONST_INT, as an unsigned value.
+   'o'  output an operator.  */
+
+static void
+sh_print_operand (FILE *stream, rtx x, int code)
+{
+  int regno;
+  enum machine_mode mode;
+
+  switch (code)
+    {
+      tree trapa_attr;
+
+    case '.':
+      if (final_sequence
+	  && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
+	  && get_attr_length (XVECEXP (final_sequence, 0, 1)))
+	fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+      break;
+    case ',':
+      fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
+      break;
+    case '@':
+      trapa_attr = lookup_attribute ("trap_exit",
+				      DECL_ATTRIBUTES (current_function_decl));
+      if (trapa_attr)
+	fprintf (stream, "trapa #%ld",
+		 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
+      else if (sh_cfun_interrupt_handler_p ())
+	{
+	  if (sh_cfun_resbank_handler_p ())
+	    fprintf (stream, "resbank\n");
+	  fprintf (stream, "rte");
+	}
+      else
+	fprintf (stream, "rts");
+      break;
+    case '#':
+      /* Output a nop if there's nothing in the delay slot.  */
+      if (dbr_sequence_length () == 0)
+	fprintf (stream, "\n\tnop");
+      break;
+    case '\'':
+      {
+	rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+
+	if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
+	  fputs ("/u", stream);
+	break;
+      }
+    case '>':
+      if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
+	{
+	  fputs ("\t! target: ", stream);
+	  output_addr_const (stream, JUMP_LABEL (current_output_insn));
+	}
+      break;
+    case 'O':
+      x = mark_constant_pool_use (x);
+      output_addr_const (stream, x);
+      break;
+    /* N.B.: %R / %S / %T adjust memory addresses by four.
+       For SHMEDIA, that means they can be used to access the first and
+       second 32 bit part of a 64 bit (or larger) value that
+       might be held in floating point registers or memory.
+       While they can be used to access 64 bit parts of a larger value
+       held in general purpose registers, that won't work with memory -
+       neither for fp registers, since the frxx names are used.  */
+    case 'R':
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	{
+	  regno = true_regnum (x);
+	  regno += FP_REGISTER_P (regno) ? 1 : LSW;
+	  fputs (reg_names[regno], (stream));
+	}
+      else if (MEM_P (x))
+	{
+	  x = adjust_address (x, SImode, 4 * LSW);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	}
+      else
+	{
+	  rtx sub = NULL_RTX;
+
+	  mode = GET_MODE (x);
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  if (GET_MODE_SIZE (mode) >= 8)
+	    sub = simplify_subreg (SImode, x, mode, 4 * LSW);
+	  if (sub)
+	    sh_print_operand (stream, sub, 0);
+	  else
+	    output_operand_lossage ("invalid operand to %%R");
+	}
+      break;
+    case 'S':
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	{
+	  regno = true_regnum (x);
+	  regno += FP_REGISTER_P (regno) ? 0 : MSW;
+	  fputs (reg_names[regno], (stream));
+	}
+      else if (MEM_P (x))
+	{
+	  x = adjust_address (x, SImode, 4 * MSW);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	}
+      else
+	{
+	  rtx sub = NULL_RTX;
+
+	  mode = GET_MODE (x);
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  if (GET_MODE_SIZE (mode) >= 8)
+	    sub = simplify_subreg (SImode, x, mode, 4 * MSW);
+	  if (sub)
+	    sh_print_operand (stream, sub, 0);
+	  else
+	    output_operand_lossage ("invalid operand to %%S");
+	}
+      break;
+    case 'T':
+      /* Next word of a double.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], (stream));
+	  break;
+	case MEM:
+	  if (GET_CODE (XEXP (x, 0)) != PRE_DEC
+	      && GET_CODE (XEXP (x, 0)) != POST_INC)
+	    x = adjust_address (x, SImode, 4);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 't':
+      gcc_assert (MEM_P (x));
+      x = XEXP (x, 0);
+      switch (GET_CODE (x))
+	{
+	case REG:
+	case SUBREG:
+	  sh_print_operand (stream, x, 0);
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 'o':
+      switch (GET_CODE (x))
+	{
+	case PLUS:  fputs ("add", stream); break;
+	case MINUS: fputs ("sub", stream); break;
+	case MULT:  fputs ("mul", stream); break;
+	case DIV:   fputs ("div", stream); break;
+	case EQ:    fputs ("eq",  stream); break;
+	case NE:    fputs ("ne",  stream); break;
+	case GT:  case LT:  fputs ("gt",  stream); break;
+	case GE:  case LE:  fputs ("ge",  stream); break;
+	case GTU: case LTU: fputs ("gtu", stream); break;
+	case GEU: case LEU: fputs ("geu", stream); break;
+	default:
+	  break;
+	}
+      break;
+    case 'M':
+      if (TARGET_SHMEDIA)
+	{
+	  if (MEM_P (x)
+	      && GET_CODE (XEXP (x, 0)) == PLUS
+	      && (REG_P (XEXP (XEXP (x, 0), 1))
+		  || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
+	    fputc ('x', stream);
+	}
+      else
+	{
+	  if (MEM_P (x))
+	    {
+	      switch (GET_MODE (x))
+		{
+		case QImode: fputs (".b", stream); break;
+		case HImode: fputs (".w", stream); break;
+		case SImode: fputs (".l", stream); break;
+		case SFmode: fputs (".s", stream); break;
+		case DFmode: fputs (".d", stream); break;
+		default: gcc_unreachable ();
+		}
+	    }
+	}
+      break;
+
+    case 'm':
+      gcc_assert (MEM_P (x));
+      x = XEXP (x, 0);
+      /* Fall through.  */
+    case 'U':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	case SUBREG:
+	  sh_print_operand (stream, x, 0);
+	  fputs (", 0", stream);
+	  break;
+
+	case PLUS:
+	  sh_print_operand (stream, XEXP (x, 0), 0);
+	  fputs (", ", stream);
+	  sh_print_operand (stream, XEXP (x, 1), 0);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'V':
+      {
+	int num = exact_log2 (INTVAL (x));
+	gcc_assert (num >= 0);
+	fprintf (stream, "#%d", num);
+      }
+      break;
+
+    case 'W':
+      {
+	int num = exact_log2 (~INTVAL (x));
+	gcc_assert (num >= 0);
+	fprintf (stream, "#%d", num);
+      }
+      break;
+
+    case 'd':
+      gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
+
+      fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
+      break;
+
+    case 'N':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	{
+	  fprintf ((stream), "r63");
+	  break;
+	}
+      goto default_output;
+    case 'u':
+      if (CONST_INT_P (x))
+	{
+	  fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
+	  break;
+	}
+      /* Fall through.  */
+
+    default_output:
+    default:
+      regno = 0;
+      mode = GET_MODE (x);
+
+      switch (GET_CODE (x))
+	{
+	case TRUNCATE:
+	  {
+	    rtx inner = XEXP (x, 0);
+	    int offset = 0;
+	    enum machine_mode inner_mode;
+
+	    /* We might see SUBREGs with vector mode registers inside.  */
+	    if (GET_CODE (inner) == SUBREG
+		&& (GET_MODE_SIZE (GET_MODE (inner))
+		    == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+		&& subreg_lowpart_p (inner))
+	      inner = SUBREG_REG (inner);
+	    if (CONST_INT_P (inner))
+	      {
+		x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
+		goto default_output;
+	      }
+	    inner_mode = GET_MODE (inner);
+	    if (GET_CODE (inner) == SUBREG
+		&& (GET_MODE_SIZE (GET_MODE (inner))
+		    < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+		&& REG_P (SUBREG_REG (inner)))
+	      {
+		offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
+					      GET_MODE (SUBREG_REG (inner)),
+					      SUBREG_BYTE (inner),
+					      GET_MODE (inner));
+		inner = SUBREG_REG (inner);
+	      }
+	    if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
+	      abort ();
+	    /* Floating point register pairs are always big endian;
+	       general purpose registers are 64 bit wide.  */
+	    regno = REGNO (inner);
+	    regno = (HARD_REGNO_NREGS (regno, inner_mode)
+		     - HARD_REGNO_NREGS (regno, mode))
+		     + offset;
+	    x = inner;
+	    goto reg;
+	  }
+	case SIGN_EXTEND:
+	  x = XEXP (x, 0);
+	  goto reg;
+	  /* FIXME: We need this on SHmedia32 because reload generates
+	     some sign-extended HI or QI loads into DImode registers
+	     but, because Pmode is SImode, the address ends up with a
+	     subreg:SI of the DImode register.  Maybe reload should be
+	     fixed so as to apply alter_subreg to such loads?  */
+	case IF_THEN_ELSE:
+	  gcc_assert (trapping_target_operand (x, VOIDmode));
+	  x = XEXP (XEXP (x, 2), 0);
+	  goto default_output;
+	case SUBREG:
+	  gcc_assert (SUBREG_BYTE (x) == 0
+		      && REG_P (SUBREG_REG (x)));
+
+	  x = SUBREG_REG (x);
+	  /* Fall through.  */
+
+	reg:
+	case REG:
+	  regno += REGNO (x);
+	  if (FP_REGISTER_P (regno)
+	      && mode == V16SFmode)
+	    fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
+	  else if (FP_REGISTER_P (REGNO (x))
+		   && mode == V4SFmode)
+	    fprintf ((stream), "fv%s", reg_names[regno] + 2);
+	  else if (REG_P (x)
+		   && mode == V2SFmode)
+	    fprintf ((stream), "fp%s", reg_names[regno] + 2);
+	  else if (FP_REGISTER_P (REGNO (x))
+		   && GET_MODE_SIZE (mode) > 4)
+	    fprintf ((stream), "d%s", reg_names[regno] + 1);
+	  else
+	    fputs (reg_names[regno], (stream));
+	  break;
+
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  break;
+
+	default:
+	  if (TARGET_SH1)
+	    fputc ('#', stream);
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+static bool
+sh_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '@' || code == ','
+          || code == '$' || code == '\'' || code == '>');
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+sh_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_DATALABEL:
+	  fputs ("datalabel ", file);
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_PIC:
+	  /* GLOBAL_OFFSET_TABLE or local symbols, no suffix.  */
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_GOTPLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTPLT", file);
+	  break;
+	case UNSPEC_DTPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@DTPOFF", file);
+	  break;
+	case UNSPEC_GOTTPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTTPOFF", file);
+	  break;
+	case UNSPEC_TPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", file);
+	  break;
+	case UNSPEC_CALLER:
+	  {
+	    char name[32];
+	    /* LPCS stands for Label for PIC Call Site.  */
+	    targetm.asm_out.generate_internal_label (name, "LPCS",
+						     INTVAL (XVECEXP (x, 0, 0)));
+	    assemble_name (file, name);
+	  }
+	  break;
+	case UNSPEC_EXTRACT_S16:
+	case UNSPEC_EXTRACT_U16:
+	  {
+	    rtx val, shift;
+
+	    val = XVECEXP (x, 0, 0);
+	    shift = XVECEXP (x, 0, 1);
+	    fputc ('(', file);
+	    if (shift != const0_rtx)
+	        fputc ('(', file);
+	    if (GET_CODE (val) == CONST
+	        || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
+	      {
+		fputc ('(', file);
+		output_addr_const (file, val);
+		fputc (')', file);
+	      }
+	    else
+	      output_addr_const (file, val);
+	    if (shift != const0_rtx)
+	      {
+		fputs (" >> ", file);
+		output_addr_const (file, shift);
+		fputc (')', file);
+	      }
+	    fputs (" & 65535)", file);
+	  }
+	  break;
+	case UNSPEC_SYMOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputc ('-', file);
+	  if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
+	    {
+	      fputc ('(', file);
+	      output_addr_const (file, XVECEXP (x, 0, 1));
+	      fputc (')', file);
+	    }
+	  else
+	    output_addr_const (file, XVECEXP (x, 0, 1));
+	  break;
+	case UNSPEC_PCREL_SYMOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("-(", file);
+	  output_addr_const (file, XVECEXP (x, 0, 1));
+	  fputs ("-.)", file);
+	  break;
+	default:
+	  return false;
+	}
+      return true;
+    }
+  else
+    return false;
+}
+
+
+/* Encode symbol attributes of a SYMBOL_REF into its
+   SYMBOL_REF_FLAGS.  */
+static void
+sh_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && sh2a_function_vector_p (decl) && TARGET_SH2A)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
+}
+
+/* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
+static void
+force_into (rtx value, rtx target)
+{
+  value = force_operand (value, target);
+  if (! rtx_equal_p (value, target))
+    emit_insn (gen_move_insn (target, value));
+}
+
+/* Emit code to perform a block move.  Choose the best method.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the size.
+   OPERANDS[3] is the alignment safe to use.  */
+
+int
+expand_block_move (rtx *operands)
+{
+  int align = INTVAL (operands[3]);
+  int constp = (CONST_INT_P (operands[2]));
+  int bytes = (constp ? INTVAL (operands[2]) : 0);
+
+  if (! constp)
+    return 0;
+
+  /* If we could use mov.l to move words and dest is word-aligned, we
+     can use movua.l for loads and still generate a relatively short
+     and efficient sequence.  */
+  if (TARGET_SH4A_ARCH && align < 4
+      && MEM_ALIGN (operands[0]) >= 32
+      && can_move_by_pieces (bytes, 32))
+    {
+      rtx dest = copy_rtx (operands[0]);
+      rtx src = copy_rtx (operands[1]);
+      /* We could use different pseudos for each copied word, but
+	 since movua can only load into r0, it's kind of
+	 pointless.  */
+      rtx temp = gen_reg_rtx (SImode);
+      rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
+      int copied = 0;
+
+      while (copied + 4 <= bytes)
+	{
+	  rtx to = adjust_address (dest, SImode, copied);
+	  rtx from = adjust_automodify_address (src, BLKmode,
+						src_addr, copied);
+
+	  set_mem_size (from, GEN_INT (4));
+	  emit_insn (gen_movua (temp, from));
+	  emit_move_insn (src_addr, plus_constant (src_addr, 4));
+	  emit_move_insn (to, temp);
+	  copied += 4;
+	}
+
+      if (copied < bytes)
+	move_by_pieces (adjust_address (dest, BLKmode, copied),
+			adjust_automodify_address (src, BLKmode,
+						   src_addr, copied),
+			bytes - copied, align, 0);
+
+      return 1;
+    }
+
+  /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
+     alignment, or if it isn't a multiple of 4 bytes, then fail.  */
+  if (align < 4 || (bytes % 4 != 0))
+    return 0;
+
+  if (TARGET_HARD_SH4)
+    {
+      if (bytes < 12)
+	return 0;
+      else if (bytes == 12)
+	{
+	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
+	  rtx r4 = gen_rtx_REG (SImode, 4);
+	  rtx r5 = gen_rtx_REG (SImode, 5);
+
+	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+	  force_into (XEXP (operands[0], 0), r4);
+	  force_into (XEXP (operands[1], 0), r5);
+	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+	  return 1;
+	}
+      else if (! optimize_size)
+	{
+	  const char *entry_name;
+	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
+	  int dwords;
+	  rtx r4 = gen_rtx_REG (SImode, 4);
+	  rtx r5 = gen_rtx_REG (SImode, 5);
+	  rtx r6 = gen_rtx_REG (SImode, 6);
+
+	  entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
+	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+	  force_into (XEXP (operands[0], 0), r4);
+	  force_into (XEXP (operands[1], 0), r5);
+
+	  dwords = bytes >> 3;
+	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+	  return 1;
+	}
+      else
+	return 0;
+    }
+  if (bytes < 64)
+    {
+      char entry[30];
+      rtx func_addr_rtx = gen_reg_rtx (Pmode);
+      rtx r4 = gen_rtx_REG (SImode, 4);
+      rtx r5 = gen_rtx_REG (SImode, 5);
+
+      sprintf (entry, "__movmemSI%d", bytes);
+      function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+      force_into (XEXP (operands[0], 0), r4);
+      force_into (XEXP (operands[1], 0), r5);
+      emit_insn (gen_block_move_real (func_addr_rtx));
+      return 1;
+    }
+
+  /* This is the same number of bytes as a memcpy call, but to a different
+     less common function name, so this will occasionally use more space.  */
+  if (! optimize_size)
+    {
+      rtx func_addr_rtx = gen_reg_rtx (Pmode);
+      int final_switch, while_loop;
+      rtx r4 = gen_rtx_REG (SImode, 4);
+      rtx r5 = gen_rtx_REG (SImode, 5);
+      rtx r6 = gen_rtx_REG (SImode, 6);
+
+      function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+      force_into (XEXP (operands[0], 0), r4);
+      force_into (XEXP (operands[1], 0), r5);
+
+      /* r6 controls the size of the move.  16 is decremented from it
+	 for each 64 bytes moved.  Then the negative bit left over is used
+	 as an index into a list of move instructions.  e.g., a 72 byte move
+	 would be set up with size(r6) = 14, for one iteration through the
+	 big while loop, and a switch of -2 for the last part.  */
+
+      final_switch = 16 - ((bytes / 4) % 16);
+      while_loop = ((bytes / 4) / 16 - 1) * 16;
+      emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
+      emit_insn (gen_block_lump_real (func_addr_rtx));
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Prepare operands for a move define_expand; specifically, one of the
+   operands must be in a register.  */
+
+int
+prepare_move_operands (rtx operands[], enum machine_mode mode)
+{
+  if ((mode == SImode || mode == DImode)
+      && flag_pic
+      && ! ((mode == Pmode || mode == ptr_mode)
+	    && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
+    {
+      rtx temp;
+      if (SYMBOLIC_CONST_P (operands[1]))
+	{
+	  if (MEM_P (operands[0]))
+	    operands[1] = force_reg (Pmode, operands[1]);
+	  else if (TARGET_SHMEDIA
+		   && GET_CODE (operands[1]) == LABEL_REF
+		   && target_reg_operand (operands[0], mode))
+	    /* It's ok.  */;
+	  else
+	    {
+	      temp = (!can_create_pseudo_p ()
+		      ? operands[0]
+		      : gen_reg_rtx (Pmode));
+	      operands[1] = legitimize_pic_address (operands[1], mode, temp);
+	    }
+	}
+      else if (GET_CODE (operands[1]) == CONST
+	       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
+	{
+	  temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+	  temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
+					 mode, temp);
+	  operands[1] = expand_binop (mode, add_optab, temp,
+				      XEXP (XEXP (operands[1], 0), 1),
+				      (!can_create_pseudo_p ()
+				       ? temp
+				       : gen_reg_rtx (Pmode)),
+				      0, OPTAB_LIB_WIDEN);
+	}
+    }
+
+  if (! reload_in_progress && ! reload_completed)
+    {
+      /* Copy the source to a register if both operands aren't registers.  */
+      if (! register_operand (operands[0], mode)
+	  && ! sh_register_operand (operands[1], mode))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+
+      if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
+	{
+	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
+	     except that we can't use that function because it is static.  */
+	  rtx new_rtx = change_address (operands[0], mode, 0);
+	  MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
+	  operands[0] = new_rtx;
+	}
+
+      /* This case can happen while generating code to move the result
+	 of a library call to the target.  Reject `st r0,@(rX,rY)' because
+	 reload will fail to find a spill register for rX, since r0 is already
+	 being used for the source.  */
+      else if (TARGET_SH1
+	       && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
+	       && MEM_P (operands[0])
+	       && GET_CODE (XEXP (operands[0], 0)) == PLUS
+	       && REG_P (XEXP (XEXP (operands[0], 0), 1)))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+    }
+
+  if (mode == Pmode || mode == ptr_mode)
+    {
+      rtx op0, op1, opc;
+      enum tls_model tls_kind;
+
+      op0 = operands[0];
+      op1 = operands[1];
+      if (GET_CODE (op1) == CONST
+	  && GET_CODE (XEXP (op1, 0)) == PLUS
+	  && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
+	      != TLS_MODEL_NONE))
+	{
+	  opc = XEXP (XEXP (op1, 0), 1);
+	  op1 = XEXP (XEXP (op1, 0), 0);
+	}
+      else
+	opc = NULL_RTX;
+
+      if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
+	{
+	  rtx tga_op1, tga_ret, tmp, tmp2;
+
+	  switch (tls_kind)
+	    {
+	    case TLS_MODEL_GLOBAL_DYNAMIC:
+	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
+	      op1 = tga_ret;
+	      break;
+
+	    case TLS_MODEL_LOCAL_DYNAMIC:
+	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
+
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_move_insn (tmp, tga_ret);
+
+	      if (register_operand (op0, Pmode))
+		tmp2 = op0;
+	      else
+		tmp2 = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
+	      op1 = tmp2;
+	      break;
+
+	    case TLS_MODEL_INITIAL_EXEC:
+	      if (! flag_pic)
+		{
+		  /* Don't schedule insns for getting GOT address when
+		     the first scheduling is enabled, to avoid spill
+		     failures for R0.  */
+		  if (flag_schedule_insns)
+		    emit_insn (gen_blockage ());
+		  emit_insn (gen_GOTaddr2picreg ());
+		  emit_use (gen_rtx_REG (SImode, PIC_REG));
+		  if (flag_schedule_insns)
+		    emit_insn (gen_blockage ());
+		}
+	      tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
+	      tmp = gen_sym2GOTTPOFF (op1);
+	      emit_insn (gen_tls_initial_exec (tga_op1, tmp));
+	      op1 = tga_op1;
+	      break;
+
+	    case TLS_MODEL_LOCAL_EXEC:
+	      tmp2 = gen_reg_rtx (Pmode);
+	      emit_insn (gen_load_gbr (tmp2));
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_insn (gen_symTPOFF2reg (tmp, op1));
+
+	      if (register_operand (op0, Pmode))
+		op1 = op0;
+	      else
+		op1 = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_addsi3 (op1, tmp, tmp2));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  if (opc)
+	    emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
+	  operands[1] = op1;
+	}
+    }
+
+  return 0;
+}
+
+enum rtx_code
+prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
+			  enum rtx_code comparison)
+{
+  rtx op1;
+  rtx scratch = NULL_RTX;
+
+  if (comparison == LAST_AND_UNUSED_RTX_CODE)
+    comparison = GET_CODE (operands[0]);
+  else
+    scratch = operands[4];
+  if (CONST_INT_P (operands[1])
+      && !CONST_INT_P (operands[2]))
+    {
+      rtx tmp = operands[1];
+
+      operands[1] = operands[2];
+      operands[2] = tmp;
+      comparison = swap_condition (comparison);
+    }
+  if (CONST_INT_P (operands[2]))
+    {
+      HOST_WIDE_INT val = INTVAL (operands[2]);
+      if ((val == -1 || val == -0x81)
+	  && (comparison == GT || comparison == LE))
+	{
+	  comparison = (comparison == GT) ? GE : LT;
+	  operands[2] = gen_int_mode (val + 1, mode);
+	}
+      else if ((val == 1 || val == 0x80)
+	       && (comparison == GE || comparison == LT))
+	{
+	  comparison = (comparison == GE) ? GT : LE;
+	  operands[2] = gen_int_mode (val - 1, mode);
+	}
+      else if (val == 1 && (comparison == GEU || comparison == LTU))
+	{
+	  comparison = (comparison == GEU) ? NE : EQ;
+	  operands[2] = CONST0_RTX (mode);
+	}
+      else if (val == 0x80 && (comparison == GEU || comparison == LTU))
+	{
+	  comparison = (comparison == GEU) ? GTU : LEU;
+	  operands[2] = gen_int_mode (val - 1, mode);
+	}
+      else if (val == 0 && (comparison == GTU || comparison == LEU))
+	comparison = (comparison == GTU) ? NE : EQ;
+      else if (mode == SImode
+	       && ((val == 0x7fffffff
+		    && (comparison == GTU || comparison == LEU))
+		   || ((unsigned HOST_WIDE_INT) val
+			== (unsigned HOST_WIDE_INT) 0x7fffffff + 1
+		       && (comparison == GEU || comparison == LTU))))
+	{
+	  comparison = (comparison == GTU || comparison == GEU) ? LT : GE;
+	  operands[2] = CONST0_RTX (mode);
+	}
+    }
+  op1 = operands[1];
+  if (can_create_pseudo_p ())
+    operands[1] = force_reg (mode, op1);
+  /* When we are handling DImode comparisons, we want to keep constants so
+     that we can optimize the component comparisons; however, memory loads
+     are better issued as a whole so that they can be scheduled well.
+     SImode equality comparisons allow I08 constants, but only when they
+     compare r0.  Hence, if operands[1] has to be loaded from somewhere else
+     into a register, that register might as well be r0, and we allow the
+     constant.  If it is already in a register, this is likely to be
+     allocated to a different hard register, thus we load the constant into
+     a register unless it is zero.  */
+  if (!REG_P (operands[2])
+      && (!CONST_INT_P (operands[2])
+	  || (mode == SImode && operands[2] != CONST0_RTX (SImode)
+	      && ((comparison != EQ && comparison != NE)
+		  || (REG_P (op1) && REGNO (op1) != R0_REG)
+		  || !satisfies_constraint_I08 (operands[2])))))
+    {
+      if (scratch && GET_MODE (scratch) == mode)
+	{
+	  emit_move_insn (scratch, operands[2]);
+	  operands[2] = scratch;
+	}
+      else if (can_create_pseudo_p ())
+	operands[2] = force_reg (mode, operands[2]);
+    }
+  return comparison;
+}
+
+void
+expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
+{
+  rtx (*branch_expander) (rtx) = gen_branch_true;
+  rtx jump;
+
+  comparison = prepare_cbranch_operands (operands, SImode, comparison);
+  switch (comparison)
+    {
+    case NE: case LT: case LE: case LTU: case LEU:
+      comparison = reverse_condition (comparison);
+      branch_expander = gen_branch_false;
+    default: ;
+    }
+  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
+                          gen_rtx_fmt_ee (comparison, SImode,
+                                          operands[1], operands[2])));
+  jump = emit_jump_insn (branch_expander (operands[3]));
+  if (probability >= 0)
+    add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
+
+}
+
+/* ??? How should we distribute probabilities when more than one branch
+   is generated.  So far we only have soem ad-hoc observations:
+   - If the operands are random, they are likely to differ in both parts.
+   - If comparing items in a hash chain, the operands are random or equal;
+     operation should be EQ or NE.
+   - If items are searched in an ordered tree from the root, we can expect
+     the highpart to be unequal about half of the time; operation should be
+     an inequality comparison, operands non-constant, and overall probability
+     about 50%.  Likewise for quicksort.
+   - Range checks will be often made against constants.  Even if we assume for
+     simplicity an even distribution of the non-constant operand over a
+     sub-range here, the same probability could be generated with differently
+     wide sub-ranges - as long as the ratio of the part of the subrange that
+     is before the threshold to the part that comes after the threshold stays
+     the same.  Thus, we can't really tell anything here;
+     assuming random distribution is at least simple.
+ */
+
+bool
+expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
+{
+  enum rtx_code msw_taken, msw_skip, lsw_taken;
+  rtx skip_label = NULL_RTX;
+  rtx op1h, op1l, op2h, op2l;
+  int num_branches;
+  int prob, rev_prob;
+  int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
+  rtx scratch = operands[4];
+
+  comparison = prepare_cbranch_operands (operands, DImode, comparison);
+  op1h = gen_highpart_mode (SImode, DImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
+  prob = split_branch_probability;
+  rev_prob = REG_BR_PROB_BASE - prob;
+  switch (comparison)
+    {
+    /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
+       That costs 1 cycle more when the first branch can be predicted taken,
+       but saves us mispredicts because only one branch needs prediction.
+       It also enables generating the cmpeqdi_t-1 pattern.  */
+    case EQ:
+      if (TARGET_CMPEQDI_T)
+	{
+	  emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+	  emit_jump_insn (gen_branch_true (operands[3]));
+	  return true;
+	}
+      msw_skip = NE;
+      lsw_taken = EQ;
+      if (prob >= 0)
+	{
+	  /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
+	   */
+	  msw_skip_prob = rev_prob;
+	  if (REG_BR_PROB_BASE <= 65535)
+	    lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
+	  else
+	    {
+	      gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
+	      lsw_taken_prob
+		= (prob
+		   ? (REG_BR_PROB_BASE
+		      - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
+			 / ((HOST_WIDEST_INT) prob << 32)))
+		   : 0);
+	    }
+	}
+      break;
+    case NE:
+      if (TARGET_CMPEQDI_T)
+	{
+	  emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+	  emit_jump_insn (gen_branch_false (operands[3]));
+	  return true;
+	}
+      msw_taken = NE;
+      msw_taken_prob = prob;
+      lsw_taken = NE;
+      lsw_taken_prob = 0;
+      break;
+    case GTU: case GT:
+      msw_taken = comparison;
+      if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
+	break;
+      if (comparison != GTU || op2h != CONST0_RTX (SImode))
+	msw_skip = swap_condition (msw_taken);
+      lsw_taken = GTU;
+      break;
+    case GEU: case GE:
+      if (op2l == CONST0_RTX (SImode))
+	msw_taken = comparison;
+      else
+	{
+	  msw_taken = comparison == GE ? GT : GTU;
+	  msw_skip = swap_condition (msw_taken);
+	  lsw_taken = GEU;
+	}
+      break;
+    case LTU: case LT:
+      msw_taken = comparison;
+      if (op2l == CONST0_RTX (SImode))
+	break;
+      msw_skip = swap_condition (msw_taken);
+      lsw_taken = LTU;
+      break;
+    case LEU: case LE:
+      if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
+	msw_taken = comparison;
+      else
+	{
+	  lsw_taken = LEU;
+	  if (comparison == LE)
+	    msw_taken = LT;
+	  else if (op2h != CONST0_RTX (SImode))
+	    msw_taken = LTU;
+	  else
+	    {
+	      msw_skip = swap_condition (LTU);
+	      break;
+	    }
+	  msw_skip = swap_condition (msw_taken);
+	}
+      break;
+    default: return false;
+    }
+  num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
+		  + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+		  + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
+  if (comparison != EQ && comparison != NE && num_branches > 1)
+    {
+      if (!CONSTANT_P (operands[2])
+	  && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
+	  && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
+	{
+	  msw_taken_prob = prob / 2U;
+	  msw_skip_prob
+	    = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
+	  lsw_taken_prob = prob;
+	}
+      else
+	{
+	  msw_taken_prob = prob;
+	  msw_skip_prob = REG_BR_PROB_BASE;
+	  /* ??? If we have a constant op2h, should we use that when
+	     calculating lsw_taken_prob?  */
+	  lsw_taken_prob = prob;
+	}
+    }
+  operands[1] = op1h;
+  operands[2] = op2h;
+  operands[4] = NULL_RTX;
+  if (reload_completed
+      && ! arith_reg_or_0_operand (op2h, SImode)
+      && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
+      && (msw_taken != LAST_AND_UNUSED_RTX_CODE
+	  || msw_skip != LAST_AND_UNUSED_RTX_CODE))
+    {
+      emit_move_insn (scratch, operands[2]);
+      operands[2] = scratch;
+    }
+  if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
+    expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
+  if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+    {
+      rtx taken_label = operands[3];
+
+      /* Operands were possibly modified, but msw_skip doesn't expect this.
+	 Always use the original ones.  */
+      if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
+	{
+	  operands[1] = op1h;
+	  operands[2] = op2h;
+	  if (reload_completed
+	      && ! arith_reg_or_0_operand (op2h, SImode)
+	      && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
+	    {
+	      emit_move_insn (scratch, operands[2]);
+	      operands[2] = scratch;
+	    }
+	}
+
+      operands[3] = skip_label = gen_label_rtx ();
+      expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
+      operands[3] = taken_label;
+    }
+  operands[1] = op1l;
+  operands[2] = op2l;
+  if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
+    {
+      if (reload_completed
+	  && ! arith_reg_or_0_operand (op2l, SImode)
+	  && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
+	{
+	  emit_move_insn (scratch, operands[2]);
+	  operands[2] = scratch;
+	}
+      expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
+    }
+  if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+    emit_label (skip_label);
+  return true;
+}
+
+/* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4.  */
+
+static void
+sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
+{
+  if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      insn = gen_rtx_PARALLEL (VOIDmode,
+	               gen_rtvec (2, insn,
+			          gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
+      (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
+    }
+  else
+    emit_insn (insn);
+}
+
+/* Prepare the operands for an scc instruction; make sure that the
+   compare has been done and the result is in T_REG.  */
+void
+sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx t_reg = gen_rtx_REG (SImode, T_REG);
+  enum rtx_code oldcode = code;
+  enum machine_mode mode;
+
+  /* First need a compare insn.  */
+  switch (code)
+    {
+    case NE:
+      /* It isn't possible to handle this case.  */
+      gcc_unreachable ();
+    case LT:
+      code = GT;
+      break;
+    case LE:
+      code = GE;
+      break;
+    case LTU:
+      code = GTU;
+      break;
+    case LEU:
+      code = GEU;
+      break;
+    default:
+      break;
+    }
+  if (code != oldcode)
+    {
+      rtx tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
+
+  op0 = force_reg (mode, op0);
+  if ((code != EQ && code != NE
+       && (op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || (mode == DImode && op1 != const0_rtx)
+      || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    op1 = force_reg (mode, op1);
+
+  sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
+			           gen_rtx_fmt_ee (code, SImode, op0, op1)),
+		      mode);
+}
+
+rtx
+sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
+			  rtx op0, rtx op1)
+{
+  rtx target = gen_reg_rtx (SImode);
+  rtx tmp;
+
+  gcc_assert (TARGET_SHMEDIA);
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case LT:
+    case UNORDERED:
+    case GTU:
+    case LTU:
+      tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
+      emit_insn (gen_cstore4_media (target, tmp, op0, op1));
+      code = NE;
+      break;
+
+    case NE:
+    case GE:
+    case LE:
+    case ORDERED:
+    case GEU:
+    case LEU:
+      tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
+      emit_insn (gen_cstore4_media (target, tmp, op0, op1));
+      code = EQ;
+      break;
+
+    case UNEQ:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
+    case LTGT:
+      return NULL_RTX;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (mode == DImode)
+    {
+      rtx t2 = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (t2, target));
+      target = t2;
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
+}
+
+/* Called from the md file, set up the operands of a compare instruction.  */
+
+void
+sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  enum rtx_code branch_code;
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx insn, tem;
+  bool need_ccmpeq = false;
+
+  if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      op0 = force_reg (mode, op0);
+      op1 = force_reg (mode, op1);
+    }
+  else
+    {
+      if (code != EQ || mode == DImode)
+        {
+          /* Force args into regs, since we can't use constants here.  */
+          op0 = force_reg (mode, op0);
+          if (op1 != const0_rtx || code == GTU  || code == GEU)
+	    op1 = force_reg (mode, op1);
+        }
+    }
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      if (code == LT
+	  || (code == LE && TARGET_IEEE && TARGET_SH2E)
+	  || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
+	{
+	  tem = op0, op0 = op1, op1 = tem;
+	  code = swap_condition (code);
+	}
+
+      /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only.  */
+      if (code == GE)
+	{
+	  gcc_assert (TARGET_IEEE && TARGET_SH2E);
+          need_ccmpeq = true;
+	  code = GT;
+	}
+
+      /* Now we can have EQ, NE, GT, LE.  NE and LE are then transformed
+	 to EQ/GT respectively.  */
+      gcc_assert (code == EQ || code == GT || code == NE || code == LE);
+    }
+
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case GE:
+    case GTU:
+    case GEU:
+      branch_code = code;
+      break;
+    case NE:
+    case LT:
+    case LE:
+    case LTU:
+    case LEU:
+      branch_code = reverse_condition (code);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  insn = gen_rtx_SET (VOIDmode,
+		      gen_rtx_REG (SImode, T_REG),
+		      gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
+
+  sh_emit_set_t_insn (insn, mode);
+  if (need_ccmpeq)
+    sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
+
+  if (branch_code == code)
+    emit_jump_insn (gen_branch_true (operands[3]));
+  else
+    emit_jump_insn (gen_branch_false (operands[3]));
+}
+
+void
+sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx lab = NULL_RTX;
+  bool invert = false;
+  rtx tem;
+
+  op0 = force_reg (mode, op0);
+  if ((code != EQ && code != NE
+       && (op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || (mode == DImode && op1 != const0_rtx)
+      || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    op1 = force_reg (mode, op1);
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      if (code == LT || code == LE)
+	{
+	  code = swap_condition (code);
+	  tem = op0, op0 = op1, op1 = tem;
+	}
+      if (code == GE)
+        {
+          if (TARGET_IEEE)
+            {
+              lab = gen_label_rtx ();
+              sh_emit_scc_to_t (EQ, op0, op1);
+              emit_jump_insn (gen_branch_true (lab));
+              code = GT;
+           }
+          else
+            {
+              code = LT;
+              invert = true;
+	    }
+        }
+    }
+
+  if (code == NE)
+    {
+      code = EQ;
+      invert = true;
+    }
+
+  sh_emit_scc_to_t (code, op0, op1);
+  if (lab)
+    emit_label (lab);
+  if (invert)
+    emit_insn (gen_movnegt (operands[0]));
+  else
+    emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
+}
+
+/* Functions to output assembly code.  */
+
+/* Return a sequence of instructions to perform DI or DF move.
+
+   Since the SH cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+
+const char *
+output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+		   enum machine_mode mode)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (MEM_P (dst)
+      && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+    return "mov.l	%T1,%0\n\tmov.l	%1,%0";
+
+  if (register_operand (dst, mode)
+      && register_operand (src, mode))
+    {
+      if (REGNO (src) == MACH_REG)
+	return "sts	mach,%S0\n\tsts	macl,%R0";
+
+      /* When mov.d r1,r2 do r2->r3 then r1->r2;
+         when mov.d r1,r0 do r1->r0 then r2->r1.  */
+
+      if (REGNO (src) + 1 == REGNO (dst))
+	return "mov	%T1,%T0\n\tmov	%1,%0";
+      else
+	return "mov	%1,%0\n\tmov	%T1,%T0";
+    }
+  else if (CONST_INT_P (src))
+    {
+      if (INTVAL (src) < 0)
+	output_asm_insn ("mov	#-1,%S0", operands);
+      else
+	output_asm_insn ("mov	#0,%S0", operands);
+
+      return "mov	%1,%R0";
+    }
+  else if (MEM_P (src))
+    {
+      int ptrreg = -1;
+      int dreg = REGNO (dst);
+      rtx inside = XEXP (src, 0);
+
+      switch (GET_CODE (inside))
+	{
+	case REG:
+	  ptrreg = REGNO (inside);
+	  break;
+
+	case SUBREG:
+	  ptrreg = subreg_regno (inside);
+	  break;
+
+	case PLUS:
+	  ptrreg = REGNO (XEXP (inside, 0));
+	  /* ??? A r0+REG address shouldn't be possible here, because it isn't
+	     an offsettable address.  Unfortunately, offsettable addresses use
+	     QImode to check the offset, and a QImode offsettable address
+	     requires r0 for the other operand, which is not currently
+	     supported, so we can't use the 'o' constraint.
+	     Thus we must check for and handle r0+REG addresses here.
+	     We punt for now, since this is likely very rare.  */
+	  gcc_assert (!REG_P (XEXP (inside, 1)));
+	  break;
+	  
+	case LABEL_REF:
+	  return "mov.l	%1,%0\n\tmov.l	%1+4,%T0";
+	case POST_INC:
+	  return "mov.l	%1,%0\n\tmov.l	%1,%T0";
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Work out the safe way to copy.  Copy into the second half first.  */
+      if (dreg == ptrreg)
+	return "mov.l	%T1,%T0\n\tmov.l	%1,%0";
+    }
+
+  return "mov.l	%1,%0\n\tmov.l	%T1,%T0";
+}
+
+/* Print an instruction which would have gone into a delay slot after
+   another instruction, but couldn't because the other instruction expanded
+   into a sequence where putting the slot insn at the end wouldn't work.  */
+
+static void
+print_slot (rtx insn)
+{
+  final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
+
+  INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
+}
+
+const char *
+output_far_jump (rtx insn, rtx op)
+{
+  struct { rtx lab, reg, op; } this_jmp;
+  rtx braf_base_lab = NULL_RTX;
+  const char *jump;
+  int far;
+  int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+  rtx prev;
+
+  this_jmp.lab = gen_label_rtx ();
+
+  if (TARGET_SH2
+      && offset >= -32764
+      && offset - get_attr_length (insn) <= 32766)
+    {
+      far = 0;
+      jump = "mov.w	%O0,%1; braf	%1";
+    }
+  else
+    {
+      far = 1;
+      if (flag_pic)
+	{
+	  if (TARGET_SH2)
+	    jump = "mov.l	%O0,%1; braf	%1";
+	  else
+	    jump = "mov.l	r0,@-r15; mova	%O0,r0; mov.l	@r0,%1; add	r0,%1; mov.l	@r15+,r0; jmp	@%1";
+	}
+      else
+	jump = "mov.l	%O0,%1; jmp	@%1";
+    }
+  /* If we have a scratch register available, use it.  */
+  if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
+      && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+    {
+      this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
+      if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
+	jump = "mov.l	r1,@-r15; mova	%O0,r0; mov.l	@r0,r1; add	r1,r0; mov.l	@r15+,r1; jmp	@%1";
+      output_asm_insn (jump, &this_jmp.lab);
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+      else
+	output_asm_insn ("nop", 0);
+    }
+  else
+    {
+      /* Output the delay slot insn first if any.  */
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+
+      this_jmp.reg = gen_rtx_REG (SImode, 13);
+      /* We must keep the stack aligned to 8-byte boundaries on SH5.
+	 Fortunately, MACL is fixed and call-clobbered, and we never
+	 need its value across jumps, so save r13 in it instead of in
+	 the stack.  */
+      if (TARGET_SH5)
+	output_asm_insn ("lds	r13, macl", 0);
+      else
+	output_asm_insn ("mov.l	r13,@-r15", 0);
+      output_asm_insn (jump, &this_jmp.lab);
+      if (TARGET_SH5)
+	output_asm_insn ("sts	macl, r13", 0);
+      else
+	output_asm_insn ("mov.l	@r15+,r13", 0);
+    }
+  if (far && flag_pic && TARGET_SH2)
+    {
+      braf_base_lab = gen_label_rtx ();
+      (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				 CODE_LABEL_NUMBER (braf_base_lab));
+    }
+  if (far)
+    output_asm_insn (".align	2", 0);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
+  this_jmp.op = op;
+  if (far && flag_pic)
+    {
+      if (TARGET_SH2)
+	this_jmp.lab = braf_base_lab;
+      output_asm_insn (".long	%O2-%O0", &this_jmp.lab);
+    }
+  else
+    output_asm_insn (far ? ".long	%O2" : ".word %O2-%O0", &this_jmp.lab);
+  return "";
+}
+
+/* Local label counter, used for constants in the pool and inside
+   pattern branches.  */
+
+static int lf = 100;
+
+/* Output code for ordinary branches.  */
+
+const char *
+output_branch (int logic, rtx insn, rtx *operands)
+{
+  switch (get_attr_length (insn))
+    {
+    case 6:
+      /* This can happen if filling the delay slot has caused a forward
+	 branch to exceed its range (we could reverse it, but only
+	 when we know we won't overextend other branches; this should
+	 best be handled by relaxation).
+	 It can also happen when other condbranches hoist delay slot insn
+	 from their destination, thus leading to code size increase.
+	 But the branch will still be in the range -4092..+4098 bytes.  */
+
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+	  /* The call to print_slot will clobber the operands.  */
+	  rtx op0 = operands[0];
+
+	  /* If the instruction in the delay slot is annulled (true), then
+	     there is no delay slot where we can put it now.  The only safe
+	     place for it is after the label.  final will do that by default.  */
+
+	  if (final_sequence
+	      && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
+	      && get_attr_length (XVECEXP (final_sequence, 0, 1)))
+	    {
+	      asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+	                   ASSEMBLER_DIALECT ? "/" : ".", label);
+	      print_slot (final_sequence);
+	    }
+	  else
+	    asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
+
+	  output_asm_insn ("bra\t%l0", &op0);
+	  fprintf (asm_out_file, "\tnop\n");
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
+
+	  return "";
+	}
+      /* When relaxing, handle this like a short branch.  The linker
+	 will fix it up if it still doesn't fit after relaxation.  */
+    case 2:
+      return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+
+      /* These are for SH2e, in which we have to account for the
+	 extra nop because of the hardware bug in annulled branches.  */
+    case 8:
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+
+	  gcc_assert (!final_sequence
+		      || !(INSN_ANNULLED_BRANCH_P
+			   (XVECEXP (final_sequence, 0, 0))));
+	  asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
+		       logic ? "f" : "t",
+		       ASSEMBLER_DIALECT ? "/" : ".", label);
+	  fprintf (asm_out_file, "\tnop\n");
+	  output_asm_insn ("bra\t%l0", operands);
+	  fprintf (asm_out_file, "\tnop\n");
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
+
+	  return "";
+	}
+      /* When relaxing, fall through.  */
+    case 4:
+      {
+	char buffer[10];
+
+	sprintf (buffer, "b%s%ss\t%%l0",
+		 logic ? "t" : "f",
+		 ASSEMBLER_DIALECT ? "/" : ".");
+	output_asm_insn (buffer, &operands[0]);
+	return "nop";
+      }
+
+    default:
+      /* There should be no longer branches now - that would
+	 indicate that something has destroyed the branches set
+	 up in machine_dependent_reorg.  */
+      gcc_unreachable ();
+    }
+}
+
+/* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
+   fill in operands 9 as a label to the successor insn.
+   We try to use jump threading where possible.
+   IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
+   we assume the jump is taken.  I.e. EQ means follow jmp and bf, NE means
+   follow jmp and bt, if the address is in range.  */
+const char *
+output_branchy_insn (enum rtx_code code, const char *templ,
+		     rtx insn, rtx *operands)
+{
+  rtx next_insn = NEXT_INSN (insn);
+
+  if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
+    {
+      rtx src = SET_SRC (PATTERN (next_insn));
+      if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+	{
+	  /* Following branch not taken */
+	  operands[9] = gen_label_rtx ();
+	  emit_label_after (operands[9], next_insn);
+	  INSN_ADDRESSES_NEW (operands[9],
+			      INSN_ADDRESSES (INSN_UID (next_insn))
+			      + get_attr_length (next_insn));
+	  return templ;
+	}
+      else
+	{
+	  int offset = (branch_dest (next_insn)
+			- INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
+	  if (offset >= -252 && offset <= 258)
+	    {
+	      if (GET_CODE (src) == IF_THEN_ELSE)
+		/* branch_true */
+		src = XEXP (src, 1);
+	      operands[9] = src;
+	      return templ;
+	    }
+	}
+    }
+  operands[9] = gen_label_rtx ();
+  emit_label_after (operands[9], insn);
+  INSN_ADDRESSES_NEW (operands[9],
+		      INSN_ADDRESSES (INSN_UID (insn))
+		      + get_attr_length (insn));
+  return templ;
+}
+
+const char *
+output_ieee_ccmpeq (rtx insn, rtx *operands)
+{
+  return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
+			      insn, operands);
+}
+
+/* Output the start of the assembler file.  */
+
+static void
+sh_file_start (void)
+{
+  default_file_start ();
+
+#ifdef SYMBIAN
+  /* Declare the .directive section before it is used.  */
+  fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
+  fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
+#endif
+
+  if (TARGET_ELF)
+    /* We need to show the text section with the proper
+       attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
+       emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
+       will complain.  We can teach GAS specifically about the
+       default attributes for our choice of text section, but
+       then we would have to change GAS again if/when we change
+       the text section name.  */
+    fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
+  else
+    /* Switch to the data section so that the coffsem symbol
+       isn't in the text section.  */
+    switch_to_section (data_section);
+
+  if (TARGET_LITTLE_ENDIAN)
+    fputs ("\t.little\n", asm_out_file);
+
+  if (!TARGET_ELF)
+    {
+      if (TARGET_SHCOMPACT)
+	fputs ("\t.mode\tSHcompact\n", asm_out_file);
+      else if (TARGET_SHMEDIA)
+	fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
+		 TARGET_SHMEDIA64 ? 64 : 32);
+    }
+}
+
+/* Check if PAT includes UNSPEC_CALLER unspec pattern.  */
+
+static bool
+unspec_caller_rtx_p (rtx pat)
+{
+  rtx base, offset;
+  int i;
+
+  split_const (pat, &base, &offset);
+  if (GET_CODE (base) == UNSPEC)
+    {
+      if (XINT (base, 1) == UNSPEC_CALLER)
+	return true;
+      for (i = 0; i < XVECLEN (base, 0); i++)
+	if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
+	  return true;
+    }
+  return false;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is true for insn
+   that generates a unique label.  */
+
+static bool
+sh_cannot_copy_insn_p (rtx insn)
+{
+  rtx pat;
+
+  if (!reload_completed || !flag_pic)
+    return false;
+
+  if (!NONJUMP_INSN_P (insn))
+    return false;
+  if (asm_noperands (insn) >= 0)
+    return false;
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) != SET)
+    return false;
+  pat = SET_SRC (pat);
+
+  if (unspec_caller_rtx_p (pat))
+    return true;
+
+  return false;
+}
+
+/* Actual number of instructions used to make a shift by N.  */
+static const char ashiftrt_insns[] =
+  { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
+
+/* Left shift and logical right shift are the same.  */
+static const char shift_insns[]    =
+  { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+/* Individual shift amounts needed to get the above length sequences.
+   One bit right shifts clobber the T bit, so when possible, put one bit
+   shifts in the middle of the sequence, so the ends are eligible for
+   branch delay slots.  */
+static const short shift_amounts[32][5] = {
+  {0}, {1}, {2}, {2, 1},
+  {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
+  {8}, {8, 1}, {8, 2}, {8, 1, 2},
+  {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
+  {16}, {16, 1}, {16, 2}, {16, 1, 2},
+  {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+  {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+  {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Likewise, but for shift amounts < 16, up to three highmost bits
+   might be clobbered.  This is typically used when combined with some
+   kind of sign or zero extension.  */
+
+static const char ext_shift_insns[]    =
+  { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+static const short ext_shift_amounts[32][4] = {
+  {0}, {1}, {2}, {2, 1},
+  {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
+  {8}, {8, 1}, {8, 2}, {8, 1, 2},
+  {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
+  {16}, {16, 1}, {16, 2}, {16, 1, 2},
+  {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+  {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+  {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Assuming we have a value that has been sign-extended by at least one bit,
+   can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
+   to shift it by N without data loss, and quicker than by other means?  */
+#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
+
+/* This is used in length attributes in sh.md to help compute the length
+   of arbitrary constant shift instructions.  */
+
+int
+shift_insns_rtx (rtx insn)
+{
+  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  int shift_count = INTVAL (XEXP (set_src, 1)) & 31;
+  enum rtx_code shift_code = GET_CODE (set_src);
+
+  switch (shift_code)
+    {
+    case ASHIFTRT:
+      return ashiftrt_insns[shift_count];
+    case LSHIFTRT:
+    case ASHIFT:
+      return shift_insns[shift_count];
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the cost of a shift.  */
+
+static inline int
+shiftcosts (rtx x)
+{
+  int value;
+
+  if (TARGET_SHMEDIA)
+    return 1;
+
+  if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+    {
+      if (GET_MODE (x) == DImode
+	  && CONST_INT_P (XEXP (x, 1))
+	  && INTVAL (XEXP (x, 1)) == 1)
+	return 2;
+
+      /* Everything else is invalid, because there is no pattern for it.  */
+      return MAX_COST;
+    }
+  /* If shift by a non constant, then this will be expensive.  */
+  if (!CONST_INT_P (XEXP (x, 1)))
+    return SH_DYNAMIC_SHIFT_COST;
+
+  /* Otherwise, return the true cost in instructions.  Cope with out of range
+     shift counts more or less arbitrarily.  */
+  value = INTVAL (XEXP (x, 1)) & 31;
+
+  if (GET_CODE (x) == ASHIFTRT)
+    {
+      int cost = ashiftrt_insns[value];
+      /* If SH3, then we put the constant in a reg and use shad.  */
+      if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+	cost = 1 + SH_DYNAMIC_SHIFT_COST;
+      return cost;
+    }
+  else
+    return shift_insns[value];
+}
+
+/* Return the cost of an AND operation.  */
+
+static inline int
+andcosts (rtx x)
+{
+  int i;
+
+  /* Anding with a register is a single cycle and instruction.  */
+  if (!CONST_INT_P (XEXP (x, 1)))
+    return 1;
+
+  i = INTVAL (XEXP (x, 1));
+
+  if (TARGET_SHMEDIA)
+    {
+      if (satisfies_constraint_I10 (XEXP (x, 1))
+	  || satisfies_constraint_J16 (XEXP (x, 1)))
+	return 1;
+      else
+	return 1 + rtx_cost (XEXP (x, 1), AND, !optimize_size);
+    }
+
+  /* These constants are single cycle extu.[bw] instructions.  */
+  if (i == 0xff || i == 0xffff)
+    return 1;
+  /* Constants that can be used in an and immediate instruction in a single
+     cycle, but this requires r0, so make it a little more expensive.  */
+  if (CONST_OK_FOR_K08 (i))
+    return 2;
+  /* Constants that can be loaded with a mov immediate and an and.
+     This case is probably unnecessary.  */
+  if (CONST_OK_FOR_I08 (i))
+    return 2;
+  /* Any other constants requires a 2 cycle pc-relative load plus an and.
+     This case is probably unnecessary.  */
+  return 3;
+}
+
+/* Return the cost of an addition or a subtraction.  */
+
+static inline int
+addsubcosts (rtx x)
+{
+  /* Adding a register is a single cycle insn.  */
+  if (REG_P (XEXP (x, 1))
+      || GET_CODE (XEXP (x, 1)) == SUBREG)
+    return 1;
+
+  /* Likewise for small constants.  */
+  if (CONST_INT_P (XEXP (x, 1))
+      && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    switch (GET_CODE (XEXP (x, 1)))
+      {
+      case CONST:
+      case LABEL_REF:
+      case SYMBOL_REF:
+	return TARGET_SHMEDIA64 ? 5 : 3;
+
+      case CONST_INT:
+	if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
+          return 2;
+	else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
+	  return 3;
+	else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
+	  return 4;
+
+	/* Fall through.  */
+      default:
+	return 5;
+      }
+
+  /* Any other constant requires a 2 cycle pc-relative load plus an
+     addition.  */
+  return 3;
+}
+
+/* Return the cost of a multiply.  */
+static inline int
+multcosts (rtx x ATTRIBUTE_UNUSED)
+{
+  if (sh_multcost >= 0)
+    return sh_multcost;
+  if (TARGET_SHMEDIA)
+    /* ??? We have a mul insn, but it has a latency of three, and doesn't
+       accept constants.  Ideally, we would use a cost of one or two and
+       add the cost of the operand, but disregard the latter when inside loops
+       and loop invariant code motion is still to follow.
+       Using a multiply first and splitting it later if it's a loss
+       doesn't work because of different sign / zero extension semantics
+       of multiplies vs. shifts.  */
+    return optimize_size ? 2 : 3;
+
+  if (TARGET_SH2)
+    {
+      /* We have a mul insn, so we can never take more than the mul and the
+	 read of the mac reg, but count more because of the latency and extra
+	 reg usage.  */
+      if (optimize_size)
+	return 2;
+      return 3;
+    }
+
+  /* If we're aiming at small code, then just count the number of
+     insns in a multiply call sequence.  */
+  if (optimize_size)
+    return 5;
+
+  /* Otherwise count all the insns in the routine we'd be calling too.  */
+  return 20;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+sh_rtx_costs (rtx x, int code, int outer_code, int *total,
+	      bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (TARGET_SHMEDIA)
+        {
+	  if (INTVAL (x) == 0)
+	    *total = 0;
+	  else if (outer_code == AND && and_operand ((x), DImode))
+	    *total = 0;
+	  else if ((outer_code == IOR || outer_code == XOR
+	            || outer_code == PLUS)
+		   && CONST_OK_FOR_I10 (INTVAL (x)))
+	    *total = 0;
+	  else if (CONST_OK_FOR_I16 (INTVAL (x)))
+            *total = COSTS_N_INSNS (outer_code != SET);
+	  else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 1);
+	  else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 2);
+          else
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 3);
+	  return true;
+        }
+      if (CONST_OK_FOR_I08 (INTVAL (x)))
+        *total = 0;
+      else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
+	       && CONST_OK_FOR_K08 (INTVAL (x)))
+        *total = 1;
+      /* prepare_cmp_insn will force costly constants int registers before
+	 the cbranch[sd]i4 patterns can see them, so preserve potentially
+	 interesting ones not covered by I08 above.  */
+      else if (outer_code == COMPARE
+	       && ((unsigned HOST_WIDE_INT) INTVAL (x)
+		    == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
+		    || INTVAL (x) == 0x7fffffff
+		   || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
+        *total = 1;
+      else
+        *total = 8;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_SHMEDIA64)
+        *total = COSTS_N_INSNS (4);
+      else if (TARGET_SHMEDIA32)
+        *total = COSTS_N_INSNS (2);
+      else
+	*total = 5;
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_SHMEDIA)
+        *total = COSTS_N_INSNS (4);
+      /* prepare_cmp_insn will force costly constants int registers before
+	 the cbranchdi4 pattern can see them, so preserve potentially
+	 interesting ones.  */
+      else if (outer_code == COMPARE && GET_MODE (x) == DImode)
+        *total = 1;
+      else
+        *total = 10;
+      return true;
+    case CONST_VECTOR:
+      if (x == CONST0_RTX (GET_MODE (x)))
+	*total = 0;
+      else if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+      if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+      *total = COSTS_N_INSNS (3) + (outer_code != SET);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      *total = COSTS_N_INSNS (addsubcosts (x));
+      return true;
+
+    case AND:
+      *total = COSTS_N_INSNS (andcosts (x));
+      return true;
+
+    case MULT:
+      *total = COSTS_N_INSNS (multcosts (x));
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (shiftcosts (x));
+      return true;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (20);
+      return true;
+
+    case PARALLEL:
+      if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+      if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+      *total = COSTS_N_INSNS (3) + (outer_code != SET);
+      return true;
+
+    case FLOAT:
+    case FIX:
+      *total = 100;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Compute the cost of an address.  For the SH, all valid addresses are
+   the same cost.  Use a slightly higher cost for reg + reg addressing,
+   since it increases pressure on r0.  */
+
+static int
+sh_address_cost (rtx X,
+	         bool speed ATTRIBUTE_UNUSED)
+{
+  return (GET_CODE (X) == PLUS
+	  && ! CONSTANT_P (XEXP (X, 1))
+	  && ! TARGET_SHMEDIA ? 1 : 0);
+}
+
+/* Code to expand a shift.  */
+
+void
+gen_ashift (int type, int n, rtx reg)
+{
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  switch (type)
+    {
+    case ASHIFTRT:
+      emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
+      break;
+    case LSHIFTRT:
+      if (n == 1)
+	emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
+      else
+	emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
+      break;
+    case ASHIFT:
+      emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
+      break;
+    }
+}
+
+/* Same for HImode */
+
+void
+gen_ashift_hi (int type, int n, rtx reg)
+{
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  switch (type)
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* We don't have HImode right shift operations because using the
+	 ordinary 32 bit shift instructions for that doesn't generate proper
+	 zero/sign extension.
+	 gen_ashift_hi is only called in contexts where we know that the
+	 sign extension works out correctly.  */
+      {
+	int offset = 0;
+	if (GET_CODE (reg) == SUBREG)
+	  {
+	    offset = SUBREG_BYTE (reg);
+	    reg = SUBREG_REG (reg);
+	  }
+	gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
+	break;
+      }
+    case ASHIFT:
+      emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
+      break;
+    }
+}
+
+/* Output RTL to split a constant shift into its component SH constant
+   shift instructions.  */
+
+void
+gen_shifty_op (int code, rtx *operands)
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+
+  /* Truncate the shift count in case it is out of bounds.  */
+  value = value & 31;
+
+  if (value == 31)
+    {
+      if (code == LSHIFTRT)
+	{
+	  emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+	  emit_insn (gen_movt (operands[0]));
+	  return;
+	}
+      else if (code == ASHIFT)
+	{
+	  /* There is a two instruction sequence for 31 bit left shifts,
+	     but it requires r0.  */
+	  if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
+	    {
+	      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+	      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+	      return;
+	    }
+	}
+    }
+  else if (value == 0)
+    {
+      /* This can happen even when optimizing, if there were subregs before
+	 reload.  Don't output a nop here, as this is never optimized away;
+	 use a no-op move instead.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
+      return;
+    }
+
+  max = shift_insns[value];
+  for (i = 0; i < max; i++)
+    gen_ashift (code, shift_amounts[value][i], operands[0]);
+}
+
+/* Same as above, but optimized for values where the topmost bits don't
+   matter.  */
+
+void
+gen_shifty_hi_op (int code, rtx *operands)
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+  void (*gen_fun) (int, int, rtx);
+
+  /* This operation is used by and_shl for SImode values with a few
+     high bits known to be cleared.  */
+  value &= 31;
+  if (value == 0)
+    {
+      emit_insn (gen_nop ());
+      return;
+    }
+
+  gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
+  if (code == ASHIFT)
+    {
+      max = ext_shift_insns[value];
+      for (i = 0; i < max; i++)
+	gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+    }
+  else
+    /* When shifting right, emit the shifts in reverse order, so that
+       solitary negative values come first.  */
+    for (i = ext_shift_insns[value] - 1; i >= 0; i--)
+      gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+}
+
+/* Output RTL for an arithmetic right shift.  */
+
+/* ??? Rewrite to use super-optimizer sequences.  */
+
+int
+expand_ashiftrt (rtx *operands)
+{
+  rtx wrk;
+  char func[18];
+  int value;
+
+  if (TARGET_SH3)
+    {
+      if (!CONST_INT_P (operands[2]))
+	{
+	  rtx count = copy_to_mode_reg (SImode, operands[2]);
+	  emit_insn (gen_negsi2 (count, count));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return 1;
+	}
+      else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+	       > 1 + SH_DYNAMIC_SHIFT_COST)
+	{
+	  rtx count
+	    = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return 1;
+	}
+    }
+  if (!CONST_INT_P (operands[2]))
+    return 0;
+
+  value = INTVAL (operands[2]) & 31;
+
+  if (value == 31)
+    {
+      /* If we are called from abs expansion, arrange things so that we
+	 we can use a single MT instruction that doesn't clobber the source,
+	 if LICM can hoist out the load of the constant zero.  */
+      if (currently_expanding_to_rtl)
+	{
+	  emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
+				    operands[1]));
+	  emit_insn (gen_mov_neg_si_t (operands[0]));
+	  return 1;
+	}
+      emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
+      return 1;
+    }
+  else if (value >= 16 && value <= 19)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
+      value -= 16;
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return 1;
+    }
+  /* Expand a short sequence inline, longer call a magic routine.  */
+  else if (value <= 5)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_move_insn (wrk, operands[1]);
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return 1;
+    }
+
+  wrk = gen_reg_rtx (Pmode);
+
+  /* Load the value into an arg reg and call a helper.  */
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  sprintf (func, "__ashiftrt_r4_%d", value);
+  function_symbol (wrk, func, SFUNC_STATIC);
+  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
+  return 1;
+}
+
+int
+sh_dynamicalize_shift_p (rtx count)
+{
+  return shift_insns[INTVAL (count) & 31] > 1 + SH_DYNAMIC_SHIFT_COST;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                           (match_operand:SI 2 "const_int_operand" "n"))
+                (match_operand:SI 3 "const_int_operand" "n"))) .
+  LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
+  return 0 for simple right / left or left/right shift combination.
+  return 1 for a combination of shifts with zero_extend.
+  return 2 for a combination of shifts with an AND that needs r0.
+  return 3 for a combination of shifts with an AND that needs an extra
+    scratch register, when the three highmost bits of the AND mask are clear.
+  return 4 for a combination of shifts with an AND that needs an extra
+    scratch register, when any of the three highmost bits of the AND mask
+    is set.
+  If ATTRP is set, store an initial right shift width in ATTRP[0],
+  and the instruction length in ATTRP[1] .  These values are not valid
+  when returning 0.
+  When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
+  shift_amounts for the last shift value that is to be used before the
+  sign extend.  */
+int
+shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
+{
+  unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
+  int left = INTVAL (left_rtx), right;
+  int best = 0;
+  int cost, best_cost = 10000;
+  int best_right = 0, best_len = 0;
+  int i;
+  int can_ext;
+
+  if (left < 0 || left > 31)
+    return 0;
+  if (CONST_INT_P (mask_rtx))
+    mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
+  else
+    mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
+  /* Can this be expressed as a right shift / left shift pair?  */
+  lsb = ((mask ^ (mask - 1)) >> 1) + 1;
+  right = exact_log2 (lsb);
+  mask2 = ~(mask + lsb - 1);
+  lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
+  /* mask has no zeroes but trailing zeroes <==> ! mask2 */
+  if (! mask2)
+    best_cost = shift_insns[right] + shift_insns[right + left];
+  /* mask has no trailing zeroes <==> ! right */
+  else if (! right && mask2 == ~(lsb2 - 1))
+    {
+      int late_right = exact_log2 (lsb2);
+      best_cost = shift_insns[left + late_right] + shift_insns[late_right];
+    }
+  /* Try to use zero extend.  */
+  if (mask2 == ~(lsb2 - 1))
+    {
+      int width, first;
+
+      for (width = 8; width <= 16; width += 8)
+	{
+	  /* Can we zero-extend right away?  */
+	  if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
+	    {
+	      cost
+		= 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = -1;
+		}
+	      continue;
+	    }
+	  /* ??? Could try to put zero extend into initial right shift,
+	     or even shift a bit left before the right shift.  */
+	  /* Determine value of first part of left shift, to get to the
+	     zero extend cut-off point.  */
+	  first = width - exact_log2 (lsb2) + right;
+	  if (first >= 0 && right + left - first >= 0)
+	    {
+	      cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
+		+ ext_shift_insns[right + left - first];
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = first;
+		}
+	    }
+	}
+    }
+  /* Try to use r0 AND pattern */
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      if (! CONST_OK_FOR_K08 (mask >> i))
+	continue;
+      cost = (i != 0) + 2 + ext_shift_insns[left + i];
+      if (cost < best_cost)
+	{
+	  best = 2;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1;
+	}
+    }
+  /* Try to use a scratch register to hold the AND operand.  */
+  can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
+	+ (can_ext ? ext_shift_insns : shift_insns)[left + i];
+      if (cost < best_cost)
+	{
+	  best = 4 - can_ext;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
+	}
+    }
+
+  if (attrp)
+    {
+      attrp[0] = best_right;
+      attrp[1] = best_len;
+    }
+  return best;
+}
+
+/* This is used in length attributes of the unnamed instructions
+   corresponding to shl_and_kind return values of 1 and 2.  */
+int
+shl_and_length (rtx insn)
+{
+  rtx set_src, left_rtx, mask_rtx;
+  int attributes[3];
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  mask_rtx = XEXP (set_src, 1);
+  shl_and_kind (left_rtx, mask_rtx, attributes);
+  return attributes[1];
+}
+
+/* This is used in length attribute of the and_shl_scratch instruction.  */
+
+int
+shl_and_scr_length (rtx insn)
+{
+  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  int len = shift_insns[INTVAL (XEXP (set_src, 1)) & 31];
+  rtx op = XEXP (set_src, 0);
+  len += shift_insns[INTVAL (XEXP (op, 1)) & 31] + 1;
+  op = XEXP (XEXP (op, 0), 0);
+  return len + shift_insns[INTVAL (XEXP (op, 1)) & 31];
+}
+
+/* Generate rtl for instructions for which shl_and_kind advised a particular
+   method of generating them, i.e. returned zero.  */
+
+int
+gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
+{
+  int attributes[3];
+  unsigned HOST_WIDE_INT mask;
+  int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
+  int right, total_shift;
+  void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
+
+  right = attributes[0];
+  total_shift = INTVAL (left_rtx) + right;
+  mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
+  switch (kind)
+    {
+    default:
+      return -1;
+    case 1:
+      {
+	int first = attributes[2];
+	rtx operands[3];
+
+	if (first < 0)
+	  {
+	    emit_insn ((mask << right) <= 0xff
+		       ? gen_zero_extendqisi2 (dest,
+					       gen_lowpart (QImode, source))
+		       : gen_zero_extendhisi2 (dest,
+					       gen_lowpart (HImode, source)));
+	    source = dest;
+	  }
+	if (source != dest)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (right)
+	  {
+	    operands[2] = GEN_INT (right);
+	    gen_shifty_hi_op (LSHIFTRT, operands);
+	  }
+	if (first > 0)
+	  {
+	    operands[2] = GEN_INT (first);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    total_shift -= first;
+	    mask <<= first;
+	  }
+	if (first >= 0)
+	  emit_insn (mask <= 0xff
+		     ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		     : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	if (total_shift > 0)
+	  {
+	    operands[2] = GEN_INT (total_shift);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	break;
+      }
+    case 4:
+      shift_gen_fun = gen_shifty_op;
+    case 3:
+      /* If the topmost bit that matters is set, set the topmost bits
+	 that don't matter.  This way, we might be able to get a shorter
+	 signed constant.  */
+      if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
+	mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
+    case 2:
+      /* Don't expand fine-grained when combining, because that will
+         make the pattern fail.  */
+      if (currently_expanding_to_rtl
+	  || reload_in_progress || reload_completed)
+	{
+	  rtx operands[3];
+
+	  /* Cases 3 and 4 should be handled by this split
+	     only while combining  */
+	  gcc_assert (kind <= 2);
+	  if (right)
+	    {
+	      emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
+	      source = dest;
+	    }
+	  emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
+	  if (total_shift)
+	    {
+	      operands[0] = dest;
+	      operands[1] = dest;
+	      operands[2] = GEN_INT (total_shift);
+	      shift_gen_fun (ASHIFT, operands);
+	    }
+	  break;
+	}
+      else
+	{
+	  int neg = 0;
+	  if (kind != 4 && total_shift < 16)
+	    {
+	      neg = -ext_shift_amounts[total_shift][1];
+	      if (neg > 0)
+		neg -= ext_shift_amounts[total_shift][2];
+	      else
+		neg = 0;
+	    }
+	  emit_insn (gen_and_shl_scratch (dest, source,
+					  GEN_INT (right),
+					  GEN_INT (mask),
+					  GEN_INT (total_shift + neg),
+					  GEN_INT (neg)));
+	  emit_insn (gen_movsi (dest, dest));
+	  break;
+	}
+    }
+  return 0;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                                    (match_operand:SI 2 "const_int_operand" "n")
+                         (match_operand:SI 3 "const_int_operand" "n")
+                         (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
+  return 0 for simple left / right shift combination.
+  return 1 for left shift / 8 bit sign extend / left shift.
+  return 2 for left shift / 16 bit sign extend / left shift.
+  return 3 for left shift / 8 bit sign extend / shift / sign extend.
+  return 4 for left shift / 16 bit sign extend / shift / sign extend.
+  return 5 for left shift / 16 bit sign extend / right shift
+  return 6 for < 8 bit sign extend / left shift.
+  return 7 for < 8 bit sign extend / left shift / single right shift.
+  If COSTP is nonzero, assign the calculated cost to *COSTP.  */
+
+int
+shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
+{
+  int left, size, insize, ext;
+  int cost = 0, best_cost;
+  int kind;
+
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  gcc_assert (insize > 0);
+  /* Default to left / right shift.  */
+  kind = 0;
+  best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
+  if (size <= 16)
+    {
+      /* 16 bit shift / sign extend / 16 bit shift */
+      cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
+      /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
+	 below, by alternative 3 or something even better.  */
+      if (cost < best_cost)
+	{
+	  kind = 5;
+	  best_cost = cost;
+	}
+    }
+  /* Try a plain sign extend between two shifts.  */
+  for (ext = 16; ext >= insize; ext -= 8)
+    {
+      if (ext <= size)
+	{
+	  cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
+	  if (cost < best_cost)
+	    {
+	      kind = ext / (unsigned) 8;
+	      best_cost = cost;
+	    }
+	}
+      /* Check if we can do a sloppy shift with a final signed shift
+	 restoring the sign.  */
+      if (EXT_SHIFT_SIGNED (size - ext))
+	cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
+      /* If not, maybe it's still cheaper to do the second shift sloppy,
+	 and do a final sign extend?  */
+      else if (size <= 16)
+	cost = ext_shift_insns[ext - insize] + 1
+	  + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
+      else
+	continue;
+      if (cost < best_cost)
+	{
+	  kind = ext / (unsigned) 8 + 2;
+	  best_cost = cost;
+	}
+    }
+  /* Check if we can sign extend in r0 */
+  if (insize < 8)
+    {
+      cost = 3 + shift_insns[left];
+      if (cost < best_cost)
+	{
+	  kind = 6;
+	  best_cost = cost;
+	}
+      /* Try the same with a final signed shift.  */
+      if (left < 31)
+	{
+	  cost = 3 + ext_shift_insns[left + 1] + 1;
+	  if (cost < best_cost)
+	    {
+	      kind = 7;
+	      best_cost = cost;
+	    }
+	}
+    }
+  if (TARGET_SH3)
+    {
+      /* Try to use a dynamic shift.  */
+      cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
+      if (cost < best_cost)
+	{
+	  kind = 0;
+	  best_cost = cost;
+	}
+    }
+  if (costp)
+    *costp = cost;
+  return kind;
+}
+
+/* Function to be used in the length attribute of the instructions
+   implementing this pattern.  */
+
+int
+shl_sext_length (rtx insn)
+{
+  rtx set_src, left_rtx, size_rtx;
+  int cost;
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  size_rtx = XEXP (set_src, 1);
+  shl_sext_kind (left_rtx, size_rtx, &cost);
+  return cost;
+}
+
+/* Generate rtl for this pattern */
+
+int
+gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
+{
+  int kind;
+  int left, size, insize, cost;
+  rtx operands[3];
+
+  kind = shl_sext_kind (left_rtx, size_rtx, &cost);
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  switch (kind)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      {
+	int ext = kind & 1 ? 8 : 16;
+	int shift2 = size - ext;
+
+	/* Don't expand fine-grained when combining, because that will
+	   make the pattern fail.  */
+	if (! currently_expanding_to_rtl
+	    && ! reload_in_progress && ! reload_completed)
+	  {
+	    emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	    emit_insn (gen_movsi (dest, source));
+	    break;
+	  }
+	if (dest != source)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (ext - insize)
+	  {
+	    operands[2] = GEN_INT (ext - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	emit_insn (kind & 1
+		   ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		   : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	if (kind <= 2)
+	  {
+	    if (shift2)
+	      {
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_op (ASHIFT, operands);
+	      }
+	  }
+	else
+	  {
+	    if (shift2 > 0)
+	      {
+		if (EXT_SHIFT_SIGNED (shift2))
+		  {
+		    operands[2] = GEN_INT (shift2 + 1);
+		    gen_shifty_op (ASHIFT, operands);
+		    operands[2] = const1_rtx;
+		    gen_shifty_op (ASHIFTRT, operands);
+		    break;
+		  }
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_hi_op (ASHIFT, operands);
+	      }
+	    else if (shift2)
+	      {
+		operands[2] = GEN_INT (-shift2);
+		gen_shifty_hi_op (LSHIFTRT, operands);
+	      }
+	    emit_insn (size <= 8
+		       ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		       : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	break;
+      }
+    case 5:
+      {
+	int i = 16 - size;
+	if (! currently_expanding_to_rtl
+	    && ! reload_in_progress && ! reload_completed)
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	else
+	  {
+	    operands[0] = dest;
+	    operands[2] = GEN_INT (16 - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	/* Don't use gen_ashrsi3 because it generates new pseudos.  */
+	while (--i >= 0)
+	  gen_ashift (ASHIFTRT, 1, dest);
+	break;
+      }
+    case 6:
+    case 7:
+      /* Don't expand fine-grained when combining, because that will
+	 make the pattern fail.  */
+      if (! currently_expanding_to_rtl
+	  && ! reload_in_progress && ! reload_completed)
+	{
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	  emit_insn (gen_movsi (dest, source));
+	  break;
+	}
+      emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
+      emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
+      emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
+      operands[0] = dest;
+      operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
+      gen_shifty_op (ASHIFT, operands);
+      if (kind == 7)
+	emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
+      break;
+    default:
+      return -1;
+    }
+  return 0;
+}
+
+/* Prefix a symbol_ref name with "datalabel".  */
+
+rtx
+gen_datalabel_ref (rtx sym)
+{
+  const char *str;
+
+  if (GET_CODE (sym) == LABEL_REF)
+    return gen_rtx_CONST (GET_MODE (sym),
+			  gen_rtx_UNSPEC (GET_MODE (sym),
+					  gen_rtvec (1, sym),
+					  UNSPEC_DATALABEL));
+
+  gcc_assert (GET_CODE (sym) == SYMBOL_REF);
+
+  str = XSTR (sym, 0);
+  /* Share all SYMBOL_REF strings with the same value - that is important
+     for cse.  */
+  str = IDENTIFIER_POINTER (get_identifier (str));
+  XSTR (sym, 0) = str;
+
+  return sym;
+}
+
+
+static alloc_pool label_ref_list_pool;
+
+typedef struct label_ref_list_d
+{
+  rtx label;
+  struct label_ref_list_d *next;
+} *label_ref_list_t;
+
+/* The SH cannot load a large constant into a register, constants have to
+   come from a pc relative load.  The reference of a pc relative load
+   instruction must be less than 1k in front of the instruction.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow things
+   down and make things bigger.
+
+   Worst case code looks like:
+
+   mov.l L1,rn
+   bra   L2
+   nop
+   align
+   L1:   .long value
+   L2:
+   ..
+
+   mov.l L3,rn
+   bra   L4
+   nop
+   align
+   L3:   .long value
+   L4:
+   ..
+
+   We fix this by performing a scan before scheduling, which notices which
+   instructions need to have their operands fetched from the constant table
+   and builds the table.
+
+   The algorithm is:
+
+   scan, find an instruction which needs a pcrel move.  Look forward, find the
+   last barrier which is within MAX_COUNT bytes of the requirement.
+   If there isn't one, make one.  Process all the instructions between
+   the find and the barrier.
+
+   In the above example, we can tell that L3 is within 1k of L1, so
+   the first move can be shrunk from the 3 insn+constant sequence into
+   just 1 insn, and the constant moved to L3 to make:
+
+   mov.l        L1,rn
+   ..
+   mov.l        L3,rn
+   bra          L4
+   nop
+   align
+   L3:.long value
+   L4:.long value
+
+   Then the second move becomes the target for the shortening process.  */
+
+typedef struct
+{
+  rtx value;			/* Value in table.  */
+  rtx label;			/* Label of value.  */
+  label_ref_list_t wend;	/* End of window.  */
+  enum machine_mode mode;	/* Mode of value.  */
+
+  /* True if this constant is accessed as part of a post-increment
+     sequence.  Note that HImode constants are never accessed in this way.  */
+  bool part_of_sequence_p;
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+   constants in the range 0..510 are at least 2 bytes long, and in the
+   range from there to 1018 at least 4 bytes.  */
+
+#define MAX_POOL_SIZE 372
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+static rtx pool_window_label;
+static int pool_window_last;
+
+static int max_labelno_before_reorg;
+
+/* ??? If we need a constant in HImode which is the truncated value of a
+   constant we need in SImode, we could combine the two entries thus saving
+   two bytes.  Is this common enough to be worth the effort of implementing
+   it?  */
+
+/* ??? This stuff should be done at the same time that we shorten branches.
+   As it is now, we must assume that all branches are the maximum size, and
+   this causes us to almost always output constant pools sooner than
+   necessary.  */
+
+/* Add a constant to the pool and return its label.  */
+
+static rtx
+add_constant (rtx x, enum machine_mode mode, rtx last_value)
+{
+  int i;
+  rtx lab, new_rtx;
+  label_ref_list_t ref, newref;
+
+  /* First see if we've already got it.  */
+  for (i = 0; i < pool_size; i++)
+    {
+      if (x->code == pool_vector[i].value->code
+	  && mode == pool_vector[i].mode)
+	{
+	  if (x->code == CODE_LABEL)
+	    {
+	      if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
+		continue;
+	    }
+	  if (rtx_equal_p (x, pool_vector[i].value))
+	    {
+	      lab = new_rtx = 0;
+	      if (! last_value
+		  || ! i
+		  || ! rtx_equal_p (last_value, pool_vector[i-1].value))
+		{
+		  new_rtx = gen_label_rtx ();
+		  LABEL_REFS (new_rtx) = pool_vector[i].label;
+		  pool_vector[i].label = lab = new_rtx;
+		}
+	      if (lab && pool_window_label)
+		{
+		  newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+		  newref->label = pool_window_label;
+		  ref = pool_vector[pool_window_last].wend;
+		  newref->next = ref;
+		  pool_vector[pool_window_last].wend = newref;
+		}
+	      if (new_rtx)
+		pool_window_label = new_rtx;
+	      pool_window_last = i;
+	      return lab;
+	    }
+	}
+    }
+
+  /* Need a new one.  */
+  pool_vector[pool_size].value = x;
+  if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
+    {
+      lab = 0;
+      pool_vector[pool_size - 1].part_of_sequence_p = true;
+    }
+  else
+    lab = gen_label_rtx ();
+  pool_vector[pool_size].mode = mode;
+  pool_vector[pool_size].label = lab;
+  pool_vector[pool_size].wend = NULL;
+  pool_vector[pool_size].part_of_sequence_p = (lab == 0);
+  if (lab && pool_window_label)
+    {
+      newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+      newref->label = pool_window_label;
+      ref = pool_vector[pool_window_last].wend;
+      newref->next = ref;
+      pool_vector[pool_window_last].wend = newref;
+    }
+  if (lab)
+    pool_window_label = lab;
+  pool_window_last = pool_size;
+  pool_size++;
+  return lab;
+}
+
+/* Output the literal table.  START, if nonzero, is the first instruction
+   this table is needed for, and also indicates that there is at least one
+   casesi_worker_2 instruction; We have to emit the operand3 labels from
+   these insns at a 4-byte  aligned position.  BARRIER is the barrier
+   after which we are to place the table.  */
+
+static void
+dump_table (rtx start, rtx barrier)
+{
+  rtx scan = barrier;
+  int i;
+  int need_align = 1;
+  rtx lab;
+  label_ref_list_t ref;
+  int have_df = 0;
+
+  /* Do two passes, first time dump out the HI sized constants.  */
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      if (p->mode == HImode)
+	{
+	  if (need_align)
+	    {
+	      scan = emit_insn_after (gen_align_2 (), scan);
+	      need_align = 0;
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
+				  scan);
+	  for (ref = p->wend; ref; ref = ref->next)
+	    {
+	      lab = ref->label;
+	      scan = emit_insn_after (gen_consttable_window_end (lab), scan);
+	    }
+	}
+      else if (p->mode == DFmode)
+	have_df = 1;
+    }
+
+  need_align = 1;
+
+  if (start)
+    {
+      scan = emit_insn_after (gen_align_4 (), scan);
+      need_align = 0;
+      for (; start != barrier; start = NEXT_INSN (start))
+	if (NONJUMP_INSN_P (start)
+	    && recog_memoized (start) == CODE_FOR_casesi_worker_2)
+	  {
+	    rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
+	    rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
+
+	    scan = emit_label_after (lab, scan);
+	  }
+    }
+  if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
+    {
+      rtx align_insn = NULL_RTX;
+
+      scan = emit_label_after (gen_label_rtx (), scan);
+      scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
+      need_align = 0;
+
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  switch (p->mode)
+	    {
+	    case HImode:
+	      break;
+	    case SImode:
+	    case SFmode:
+	      if (align_insn && !p->part_of_sequence_p)
+		{
+		  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		    emit_label_before (lab, align_insn);
+		  emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
+				    align_insn);
+		  for (ref = p->wend; ref; ref = ref->next)
+		    {
+		      lab = ref->label;
+		      emit_insn_before (gen_consttable_window_end (lab),
+					align_insn);
+		    }
+		  delete_insn (align_insn);
+		  align_insn = NULL_RTX;
+		  continue;
+		}
+	      else
+		{
+		  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		    scan = emit_label_after (lab, scan);
+		  scan = emit_insn_after (gen_consttable_4 (p->value,
+							    const0_rtx), scan);
+		  need_align = ! need_align;
+		}
+	      break;
+	    case DFmode:
+	      if (need_align)
+		{
+		  scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
+		  align_insn = scan;
+		  need_align = 0;
+		}
+	    case DImode:
+	      for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		scan = emit_label_after (lab, scan);
+	      scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
+				      scan);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  if (p->mode != HImode)
+	    {
+	      for (ref = p->wend; ref; ref = ref->next)
+		{
+		  lab = ref->label;
+		  scan = emit_insn_after (gen_consttable_window_end (lab),
+					  scan);
+		}
+	    }
+	}
+
+      pool_size = 0;
+    }
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      switch (p->mode)
+	{
+	case HImode:
+	  break;
+	case SImode:
+	case SFmode:
+	  if (need_align)
+	    {
+	      need_align = 0;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
+				  scan);
+	  break;
+	case DFmode:
+	case DImode:
+	  if (need_align)
+	    {
+	      need_align = 0;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
+				  scan);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (p->mode != HImode)
+	{
+	  for (ref = p->wend; ref; ref = ref->next)
+	    {
+	      lab = ref->label;
+	      scan = emit_insn_after (gen_consttable_window_end (lab), scan);
+	    }
+	}
+    }
+
+  scan = emit_insn_after (gen_consttable_end (), scan);
+  scan = emit_barrier_after (scan);
+  pool_size = 0;
+  pool_window_label = NULL_RTX;
+  pool_window_last = 0;
+}
+
+/* Return nonzero if constant would be an ok source for a
+   mov.w instead of a mov.l.  */
+
+static int
+hi_const (rtx src)
+{
+  return (CONST_INT_P (src)
+	  && INTVAL (src) >= -32768
+	  && INTVAL (src) <= 32767);
+}
+
+#define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
+
+/* Nonzero if the insn is a move instruction which needs to be fixed.  */
+
+/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
+   CONST_DOUBLE input value is CONST_OK_FOR_I08.  For a SFmode move, we don't
+   need to fix it if the input value is CONST_OK_FOR_I08.  */
+
+static int
+broken_move (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn))
+    {
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      if (GET_CODE (pat) == SET
+	  /* We can load any 8-bit value if we don't care what the high
+	     order bits end up as.  */
+	  && GET_MODE (SET_DEST (pat)) != QImode
+	  && (CONSTANT_P (SET_SRC (pat))
+	      /* Match mova_const.  */
+	      || (GET_CODE (SET_SRC (pat)) == UNSPEC
+		  && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
+		  && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
+	  && ! (TARGET_SH2E
+		&& GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+		&& (fp_zero_operand (SET_SRC (pat))
+		    || fp_one_operand (SET_SRC (pat)))
+		/* In general we don't know the current setting of fpscr, so disable fldi.
+		   There is an exception if this was a register-register move
+		   before reload - and hence it was ascertained that we have
+		   single precision setting - and in a post-reload optimization
+		   we changed this to do a constant load.  In that case
+		   we don't have an r0 clobber, hence we must use fldi.  */
+		&& (TARGET_FMOVD
+		    || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
+			== SCRATCH))
+		&& REG_P (SET_DEST (pat))
+		&& FP_REGISTER_P (REGNO (SET_DEST (pat))))
+	  && ! (TARGET_SH2A
+		&& GET_MODE (SET_DEST (pat)) == SImode
+		&& (satisfies_constraint_I20 (SET_SRC (pat))
+		   || satisfies_constraint_I28 (SET_SRC (pat))))
+	  && ! satisfies_constraint_I08 (SET_SRC (pat)))
+	return 1;
+    }
+
+  return 0;
+}
+
+static int
+mova_p (rtx insn)
+{
+  return (NONJUMP_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
+	  /* Don't match mova_const.  */
+	  && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
+}
+
+/* Fix up a mova from a switch that went out of range.  */
+static void
+fixup_mova (rtx mova)
+{
+  PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
+  if (! flag_pic)
+    {
+      SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
+      INSN_CODE (mova) = -1;
+    }
+  else
+    {
+      rtx worker = mova;
+      rtx lab = gen_label_rtx ();
+      rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
+
+      do
+	{
+	  worker = NEXT_INSN (worker);
+	  gcc_assert (worker
+		      && !LABEL_P (worker)
+		      && !JUMP_P (worker));
+	} while (NOTE_P (worker)
+		 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
+      wpat = PATTERN (worker);
+      wpat0 = XVECEXP (wpat, 0, 0);
+      wpat1 = XVECEXP (wpat, 0, 1);
+      wsrc = SET_SRC (wpat0);
+      PATTERN (worker) = (gen_casesi_worker_2
+			  (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
+			   XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
+			   XEXP (wpat1, 0)));
+      INSN_CODE (worker) = -1;
+      target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+      base = gen_rtx_LABEL_REF (Pmode, lab);
+      diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
+      SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
+      INSN_CODE (mova) = -1;
+    }
+}
+
+/* NEW_MOVA is a mova we've just encountered while scanning forward.  Update
+   *num_mova, and check if the new mova is not nested within the first one.
+   return 0 if *first_mova was replaced, 1 if new_mova was replaced,
+   2 if new_mova has been assigned to *first_mova, -1 otherwise..  */
+static int
+untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
+{
+  int n_addr = 0; /* Initialization to shut up spurious warning.  */
+  int f_target, n_target = 0; /* Likewise.  */
+
+  if (optimize)
+    {
+      /* If NEW_MOVA has no address yet, it will be handled later.  */
+      if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
+	return -1;
+
+      n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
+      n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
+      if (n_addr > n_target || n_addr + 1022 < n_target)
+	{
+	  /* Change the mova into a load.
+	     broken_move will then return true for it.  */
+	  fixup_mova (new_mova);
+	  return 1;
+	}
+    }
+  if (!(*num_mova)++)
+    {
+      *first_mova = new_mova;
+      return 2;
+    }
+  if (!optimize
+      || ((f_target
+	   = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
+	  >= n_target))
+    return -1;
+
+  (*num_mova)--;
+  if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
+      > n_target - n_addr)
+    {
+      fixup_mova (*first_mova);
+      return 0;
+    }
+  else
+    {
+      fixup_mova (new_mova);
+      return 1;
+    }
+}
+
+/* Find the last barrier from insn FROM which is close enough to hold the
+   constant pool.  If we can't find one, then create one near the end of
+   the range.  */
+
+static rtx
+find_barrier (int num_mova, rtx mova, rtx from)
+{
+  int count_si = 0;
+  int count_hi = 0;
+  int found_hi = 0;
+  int found_si = 0;
+  int found_di = 0;
+  int hi_align = 2;
+  int si_align = 2;
+  int leading_mova = num_mova;
+  rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
+  int si_limit;
+  int hi_limit;
+  rtx orig = from;
+  rtx last_got = NULL_RTX;
+  rtx last_symoff = NULL_RTX;
+
+  /* For HImode: range is 510, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 for the jump instruction
+     that we may need to emit before the table, subtract 2 for the instruction
+     that fills the jump delay slot (in very rare cases, reorg will take an
+     instruction from after the constant pool or will leave the delay slot
+     empty).  This gives 510.
+     For SImode: range is 1020, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 in case pc is 2 byte
+     aligned, subtract 2 for the jump instruction that we may need to emit
+     before the table, subtract 2 for the instruction that fills the jump
+     delay slot.  This gives 1018.  */
+
+  /* The branch will always be shortened now that the reference address for
+     forward branches is the successor address, thus we need no longer make
+     adjustments to the [sh]i_limit for -O0.  */
+
+  si_limit = 1018;
+  hi_limit = 510;
+
+  while (from && count_si < si_limit && count_hi < hi_limit)
+    {
+      int inc = get_attr_length (from);
+      int new_align = 1;
+
+      /* If this is a label that existed at the time of the compute_alignments
+	 call, determine the alignment.  N.B.  When find_barrier recurses for
+	 an out-of-reach mova, we might see labels at the start of previously
+	 inserted constant tables.  */
+      if (LABEL_P (from)
+	  && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
+	{
+	  if (optimize)
+	    new_align = 1 << label_to_alignment (from);
+	  else if (BARRIER_P (prev_nonnote_insn (from)))
+	    new_align = 1 << barrier_align (from);
+	  else
+	    new_align = 1;
+	  inc = 0;
+	}
+      /* In case we are scanning a constant table because of recursion, check
+	 for explicit alignments.  If the table is long, we might be forced
+	 to emit the new table in front of it; the length of the alignment
+	 might be the last straw.  */
+      else if (NONJUMP_INSN_P (from)
+	       && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
+	new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
+      /* When we find the end of a constant table, paste the new constant
+	 at the end.  That is better than putting it in front because
+	 this way, we don't need extra alignment for adding a 4-byte-aligned
+	 mov(a) label to a 2/4 or 8/4 byte aligned table.  */
+      else if (NONJUMP_INSN_P (from)
+	       && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
+	return from;
+
+      if (BARRIER_P (from))
+	{
+	  rtx next;
+
+	  found_barrier = from;
+
+	  /* If we are at the end of the function, or in front of an alignment
+	     instruction, we need not insert an extra alignment.  We prefer
+	     this kind of barrier.  */
+	  if (barrier_align (from) > 2)
+	    good_barrier = from;
+
+	  /* If we are at the end of a hot/cold block, dump the constants
+	     here.  */
+	  next = NEXT_INSN (from);
+	  if (next
+	      && NOTE_P (next)
+	      && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	    break;
+	}
+
+      if (broken_move (from))
+	{
+	  rtx pat, src, dst;
+	  enum machine_mode mode;
+
+	  pat = PATTERN (from);
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+	  src = SET_SRC (pat);
+	  dst = SET_DEST (pat);
+	  mode = GET_MODE (dst);
+
+	  /* GOT pcrelat setting comes in pair of
+	     mova	.L8,r0
+	     mov.l	.L8,r12
+	     instructions.  (plus add r0,r12).
+	     Remember if we see one without the other.  */
+          if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
+            last_got = last_got ? NULL_RTX : from;
+          else if (PIC_ADDR_P (src))
+            last_got = last_got ? NULL_RTX : from;
+
+	  /* We must explicitly check the mode, because sometimes the
+	     front end will generate code to load unsigned constants into
+	     HImode targets without properly sign extending them.  */
+	  if (mode == HImode
+	      || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
+	    {
+	      found_hi += 2;
+	      /* We put the short constants before the long constants, so
+		 we must count the length of short constants in the range
+		 for the long constants.  */
+	      /* ??? This isn't optimal, but is easy to do.  */
+	      si_limit -= 2;
+	    }
+	  else
+	    {
+	      /* We dump DF/DI constants before SF/SI ones, because
+		 the limit is the same, but the alignment requirements
+		 are higher.  We may waste up to 4 additional bytes
+		 for alignment, and the DF/DI constant may have
+		 another SF/SI constant placed before it.  */
+	      if (TARGET_SHCOMPACT
+		  && ! found_di
+		  && (mode == DFmode || mode == DImode))
+		{
+		  found_di = 1;
+		  si_limit -= 8;
+		}
+	      while (si_align > 2 && found_si + si_align - 2 > count_si)
+		si_align >>= 1;
+	      if (found_si > count_si)
+		count_si = found_si;
+	      found_si += GET_MODE_SIZE (mode);
+	      if (num_mova)
+		si_limit -= GET_MODE_SIZE (mode);
+	    }
+	}
+
+      if (mova_p (from))
+	{
+	  switch (untangle_mova (&num_mova, &mova, from))
+	    {
+	      case 1:
+		if (flag_pic)
+		  {
+		    rtx src = SET_SRC (PATTERN (from));
+		    if (GET_CODE (src) == CONST
+			&& GET_CODE (XEXP (src, 0)) == UNSPEC
+			&& XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
+		      last_symoff = from;
+		  }
+		break;
+	      case 0:	return find_barrier (0, 0, mova);
+	      case 2:
+		{
+		  leading_mova = 0;
+		  barrier_before_mova
+		    = good_barrier ? good_barrier : found_barrier;
+		}
+	      default:	break;
+	    }
+	  if (found_si > count_si)
+	    count_si = found_si;
+	}
+      else if (JUMP_TABLE_DATA_P (from))
+	{
+	  if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
+	      || (num_mova
+		  && (prev_nonnote_insn (from)
+		      == XEXP (MOVA_LABELREF (mova), 0))))
+	    num_mova--;
+	  if (barrier_align (next_real_insn (from)) == align_jumps_log)
+	    {
+	      /* We have just passed the barrier in front of the
+		 ADDR_DIFF_VEC, which is stored in found_barrier.  Since
+		 the ADDR_DIFF_VEC is accessed as data, just like our pool
+		 constants, this is a good opportunity to accommodate what
+		 we have gathered so far.
+		 If we waited any longer, we could end up at a barrier in
+		 front of code, which gives worse cache usage for separated
+		 instruction / data caches.  */
+	      good_barrier = found_barrier;
+	      break;
+	    }
+	  else
+	    {
+	      rtx body = PATTERN (from);
+	      inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
+	    }
+	}
+      /* For the SH1, we generate alignments even after jumps-around-jumps.  */
+      else if (JUMP_P (from)
+	       && ! TARGET_SH2
+	       && ! optimize_size)
+	new_align = 4;
+
+      /* There is a possibility that a bf is transformed into a bf/s by the
+	 delay slot scheduler.  */
+      if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from) 
+	  && get_attr_type (from) == TYPE_CBRANCH
+	  && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (from)))) != SEQUENCE)
+	inc += 2;
+
+      if (found_si)
+	{
+	  count_si += inc;
+	  if (new_align > si_align)
+	    {
+	      si_limit -= (count_si - 1) & (new_align - si_align);
+	      si_align = new_align;
+	    }
+	  count_si = (count_si + new_align - 1) & -new_align;
+	}
+      if (found_hi)
+	{
+	  count_hi += inc;
+	  if (new_align > hi_align)
+	    {
+	      hi_limit -= (count_hi - 1) & (new_align - hi_align);
+	      hi_align = new_align;
+	    }
+	  count_hi = (count_hi + new_align - 1) & -new_align;
+	}
+      from = NEXT_INSN (from);
+    }
+
+  if (num_mova)
+    {
+      if (leading_mova)
+	{
+	  /* Try as we might, the leading mova is out of range.  Change
+	     it into a load (which will become a pcload) and retry.  */
+	  fixup_mova (mova);
+	  return find_barrier (0, 0, mova);
+	}
+      else
+	{
+	  /* Insert the constant pool table before the mova instruction,
+	     to prevent the mova label reference from going out of range.  */
+	  from = mova;
+	  good_barrier = found_barrier = barrier_before_mova;
+	}
+    }
+
+  if (found_barrier)
+    {
+      if (good_barrier && next_real_insn (found_barrier))
+	found_barrier = good_barrier;
+    }
+  else
+    {
+      /* We didn't find a barrier in time to dump our stuff,
+	 so we'll make one.  */
+      rtx label = gen_label_rtx ();
+
+      /* Don't emit a constant table in the middle of insns for
+	 casesi_worker_2.  This is a bit overkill but is enough
+	 because casesi_worker_2 wouldn't appear so frequently.  */
+      if (last_symoff)
+	from = last_symoff;
+
+      /* If we exceeded the range, then we must back up over the last
+	 instruction we looked at.  Otherwise, we just need to undo the
+	 NEXT_INSN at the end of the loop.  */
+      if (PREV_INSN (from) != orig
+	  && (count_hi > hi_limit || count_si > si_limit))
+	from = PREV_INSN (PREV_INSN (from));
+      else
+	from = PREV_INSN (from);
+
+      /* Don't emit a constant table int the middle of global pointer setting,
+	 since that that would move the addressing base GOT into another table. 
+	 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
+	 in the pool anyway, so just move up the whole constant pool.  */
+      if (last_got)
+        from = PREV_INSN (last_got);
+
+      /* Don't insert the constant pool table at the position which
+	 may be the landing pad.  */
+      if (flag_exceptions
+	  && CALL_P (from)
+	  && find_reg_note (from, REG_EH_REGION, NULL_RTX))
+	from = PREV_INSN (from);
+
+      /* Walk back to be just before any jump or label.
+	 Putting it before a label reduces the number of times the branch
+	 around the constant pool table will be hit.  Putting it before
+	 a jump makes it more likely that the bra delay slot will be
+	 filled.  */
+      while (NOTE_P (from) || JUMP_P (from)
+	     || LABEL_P (from))
+	from = PREV_INSN (from);
+
+      from = emit_jump_insn_after (gen_jump (label), from);
+      JUMP_LABEL (from) = label;
+      LABEL_NUSES (label) = 1;
+      found_barrier = emit_barrier_after (from);
+      emit_label_after (label, found_barrier);
+    }
+
+  return found_barrier;
+}
+
+/* If the instruction INSN is implemented by a special function, and we can
+   positively find the register that is used to call the sfunc, and this
+   register is not used anywhere else in this instruction - except as the
+   destination of a set, return this register; else, return 0.  */
+rtx
+sfunc_uses_reg (rtx insn)
+{
+  int i;
+  rtx pattern, part, reg_part, reg;
+
+  if (!NONJUMP_INSN_P (insn))
+    return 0;
+  pattern = PATTERN (insn);
+  if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
+    return 0;
+
+  for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
+	reg_part = part;
+    }
+  if (! reg_part)
+    return 0;
+  reg = XEXP (reg_part, 0);
+  for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (part == reg_part || GET_CODE (part) == CLOBBER)
+	continue;
+      if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
+				  && REG_P (SET_DEST (part)))
+				 ? SET_SRC (part) : part)))
+	return 0;
+    }
+  return reg;
+}
+
+/* See if the only way in which INSN uses REG is by calling it, or by
+   setting it while calling it.  Set *SET to a SET rtx if the register
+   is set by INSN.  */
+
+static int
+noncall_uses_reg (rtx reg, rtx insn, rtx *set)
+{
+  rtx pattern, reg2;
+
+  *set = NULL_RTX;
+
+  reg2 = sfunc_uses_reg (insn);
+  if (reg2 && REGNO (reg2) == REGNO (reg))
+    {
+      pattern = single_set (insn);
+      if (pattern
+	  && REG_P (SET_DEST (pattern))
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	*set = pattern;
+      return 0;
+    }
+  if (!CALL_P (insn))
+    {
+      /* We don't use rtx_equal_p because we don't care if the mode is
+	 different.  */
+      pattern = single_set (insn);
+      if (pattern
+	  && REG_P (SET_DEST (pattern))
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	{
+	  rtx par, part;
+	  int i;
+
+	  *set = pattern;
+	  par = PATTERN (insn);
+	  if (GET_CODE (par) == PARALLEL)
+	    for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+	      {
+		part = XVECEXP (par, 0, i);
+		if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
+		  return 1;
+	      }
+	  return reg_mentioned_p (reg, SET_SRC (pattern));
+	}
+
+      return 1;
+    }
+
+  pattern = PATTERN (insn);
+
+  if (GET_CODE (pattern) == PARALLEL)
+    {
+      int i;
+
+      for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+	if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
+	  return 1;
+      pattern = XVECEXP (pattern, 0, 0);
+    }
+
+  if (GET_CODE (pattern) == SET)
+    {
+      if (reg_mentioned_p (reg, SET_DEST (pattern)))
+	{
+	  /* We don't use rtx_equal_p, because we don't care if the
+             mode is different.  */
+	  if (!REG_P (SET_DEST (pattern))
+	      || REGNO (reg) != REGNO (SET_DEST (pattern)))
+	    return 1;
+
+	  *set = pattern;
+	}
+
+      pattern = SET_SRC (pattern);
+    }
+
+  if (GET_CODE (pattern) != CALL
+      || !MEM_P (XEXP (pattern, 0))
+      || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
+    return 1;
+
+  return 0;
+}
+
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+   general registers.  Bits 0..15 mean that the respective registers
+   are used as inputs in the instruction.  Bits 16..31 mean that the
+   registers 0..15, respectively, are used as outputs, or are clobbered.
+   IS_DEST should be set to 16 if X is the destination of a SET, else to 0.  */
+int
+regs_used (rtx x, int is_dest)
+{
+  enum rtx_code code;
+  const char *fmt;
+  int i, used = 0;
+
+  if (! x)
+    return used;
+  code = GET_CODE (x);
+  switch (code)
+    {
+    case REG:
+      if (REGNO (x) < 16)
+	return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		<< (REGNO (x) + is_dest));
+      return 0;
+    case SUBREG:
+      {
+	rtx y = SUBREG_REG (x);
+
+	if (!REG_P (y))
+	  break;
+	if (REGNO (y) < 16)
+	  return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		  << (REGNO (y) +
+		      subreg_regno_offset (REGNO (y),
+					   GET_MODE (y),
+					   SUBREG_BYTE (x),
+					   GET_MODE (x)) + is_dest));
+	return 0;
+      }
+    case SET:
+      return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+    case RETURN:
+      /* If there was a return value, it must have been indicated with USE.  */
+      return 0x00ffff00;
+    case CLOBBER:
+      is_dest = 1;
+      break;
+    case MEM:
+      is_dest = 0;
+      break;
+    case CALL:
+      used |= 0x00ff00f0;
+      break;
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    used |= regs_used (XVECEXP (x, i, j), is_dest);
+	}
+      else if (fmt[i] == 'e')
+	used |= regs_used (XEXP (x, i), is_dest);
+    }
+  return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+   to the destination of the JUMP with address ADDR.
+   If the branch needs to be implemented as an indirect jump, try to find
+   a scratch register for it.
+   If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+   If any preceding insn that doesn't fit into a delay slot is good enough,
+   pass 1.  Pass 2 if a definite blocking insn is needed.
+   -1 is used internally to avoid deep recursion.
+   If a blocking instruction is made or recognized, return it.  */
+
+static rtx
+gen_block_redirect (rtx jump, int addr, int need_block)
+{
+  int dead = 0;
+  rtx prev = prev_nonnote_insn (jump);
+  rtx dest;
+
+  /* First, check if we already have an instruction that satisfies our need.  */
+  if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
+    {
+      if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+	return prev;
+      if (GET_CODE (PATTERN (prev)) == USE
+	  || GET_CODE (PATTERN (prev)) == CLOBBER
+	  || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	prev = jump;
+      else if ((need_block &= ~1) < 0)
+	return prev;
+      else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+	need_block = 0;
+    }
+  if (GET_CODE (PATTERN (jump)) == RETURN)
+    {
+      if (! need_block)
+	return prev;
+      /* Reorg even does nasty things with return insns that cause branches
+	 to go out of range - see find_end_label and callers.  */
+      return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
+    }
+  /* We can't use JUMP_LABEL here because it might be undefined
+     when not optimizing.  */
+  dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+  /* If the branch is out of range, try to find a scratch register for it.  */
+  if (optimize
+      && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
+	  > 4092 + 4098))
+    {
+      rtx scan;
+      /* Don't look for the stack pointer as a scratch register,
+	 it would cause trouble if an interrupt occurred.  */
+      unsigned attempt = 0x7fff, used;
+      int jump_left = flag_expensive_optimizations + 1;
+
+      /* It is likely that the most recent eligible instruction is wanted for
+	 the delay slot.  Therefore, find out which registers it uses, and
+	 try to avoid using them.  */
+
+      for (scan = jump; (scan = PREV_INSN (scan)); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (code == CODE_LABEL || code == JUMP_INSN)
+	    break;
+	  if (code == INSN
+	      && GET_CODE (PATTERN (scan)) != USE
+	      && GET_CODE (PATTERN (scan)) != CLOBBER
+	      && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+	    {
+	      attempt &= ~regs_used (PATTERN (scan), 0);
+	      break;
+	    }
+	}
+      for (used = dead = 0, scan = JUMP_LABEL (jump);
+	   (scan = NEXT_INSN (scan)); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (INSN_P (scan))
+	    {
+	      used |= regs_used (PATTERN (scan), 0);
+	      if (code == CALL_INSN)
+		used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+	      dead |= (used >> 16) & ~used;
+	      if (dead & attempt)
+		{
+		  dead &= attempt;
+		  break;
+		}
+	      if (code == JUMP_INSN)
+		{
+		  if (jump_left-- && simplejump_p (scan))
+		    scan = JUMP_LABEL (scan);
+		  else
+		    break;
+		}
+	    }
+	}
+      /* Mask out the stack pointer again, in case it was
+	 the only 'free' register we have found.  */
+      dead &= 0x7fff;
+    }
+  /* If the immediate destination is still in range, check for possible
+     threading with a jump beyond the delay slot insn.
+     Don't check if we are called recursively; the jump has been or will be
+     checked in a different invocation then.  */
+
+  else if (optimize && need_block >= 0)
+    {
+      rtx next = next_active_insn (next_active_insn (dest));
+      if (next && JUMP_P (next)
+	  && GET_CODE (PATTERN (next)) == SET
+	  && recog_memoized (next) == CODE_FOR_jump_compact)
+	{
+	  dest = JUMP_LABEL (next);
+	  if (dest
+	      && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
+		  > 4092 + 4098))
+	    gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
+	}
+    }
+
+  if (dead)
+    {
+      rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
+
+      /* It would be nice if we could convert the jump into an indirect
+	 jump / far branch right now, and thus exposing all constituent
+	 instructions to further optimization.  However, reorg uses
+	 simplejump_p to determine if there is an unconditional jump where
+	 it should try to schedule instructions from the target of the
+	 branch; simplejump_p fails for indirect jumps even if they have
+	 a JUMP_LABEL.  */
+      rtx insn = emit_insn_before (gen_indirect_jump_scratch
+				   (reg, GEN_INT (unspec_bbr_uid++)),
+				   jump);
+      /* ??? We would like this to have the scope of the jump, but that
+	 scope will change when a delay slot insn of an inner scope is added.
+	 Hence, after delay slot scheduling, we'll have to expect
+	 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
+	 the jump.  */
+
+      INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
+      INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+      return insn;
+    }
+  else if (need_block)
+    /* We can't use JUMP_LABEL here because it might be undefined
+       when not optimizing.  */
+    return emit_insn_before (gen_block_branch_redirect
+			     (GEN_INT (unspec_bbr_uid++)),
+			     jump);
+  return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+  /* A label (to be placed) in front of the jump
+     that jumps to our ultimate destination.  */
+  rtx near_label;
+  /* Where we are going to insert it if we cannot move the jump any farther,
+     or the jump itself if we have picked up an existing jump.  */
+  rtx insert_place;
+  /* The ultimate destination.  */
+  rtx far_label;
+  struct far_branch *prev;
+  /* If the branch has already been created, its address;
+     else the address of its first prospective user.  */
+  int address;
+};
+
+static void gen_far_branch (struct far_branch *);
+enum mdep_reorg_phase_e mdep_reorg_phase;
+static void
+gen_far_branch (struct far_branch *bp)
+{
+  rtx insn = bp->insert_place;
+  rtx jump;
+  rtx label = gen_label_rtx ();
+  int ok;
+
+  emit_label_after (label, insn);
+  if (bp->far_label)
+    {
+      jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+      LABEL_NUSES (bp->far_label)++;
+    }
+  else
+    jump = emit_jump_insn_after (gen_return (), insn);
+  /* Emit a barrier so that reorg knows that any following instructions
+     are not reachable via a fall-through path.
+     But don't do this when not optimizing, since we wouldn't suppress the
+     alignment for the barrier then, and could end up with out-of-range
+     pc-relative loads.  */
+  if (optimize)
+    emit_barrier_after (jump);
+  emit_label_after (bp->near_label, insn);
+  JUMP_LABEL (jump) = bp->far_label;
+  ok = invert_jump (insn, label, 1);
+  gcc_assert (ok);
+  
+  /* If we are branching around a jump (rather than a return), prevent
+     reorg from using an insn from the jump target as the delay slot insn -
+     when reorg did this, it pessimized code (we rather hide the delay slot)
+     and it could cause branches to go out of range.  */
+  if (bp->far_label)
+    (emit_insn_after
+     (gen_stuff_delay_slot
+      (GEN_INT (unspec_bbr_uid++),
+       GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
+      insn));
+  /* Prevent reorg from undoing our splits.  */
+  gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+/* Fix up ADDR_DIFF_VECs.  */
+void
+fixup_addr_diff_vecs (rtx first)
+{
+  rtx insn;
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    {
+      rtx vec_lab, pat, prev, prevpat, x, braf_label;
+
+      if (!JUMP_P (insn)
+	  || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+	continue;
+      pat = PATTERN (insn);
+      vec_lab = XEXP (XEXP (pat, 0), 0);
+
+      /* Search the matching casesi_jump_2.  */
+      for (prev = vec_lab; ; prev = PREV_INSN (prev))
+	{
+	  if (!JUMP_P (prev))
+	    continue;
+	  prevpat = PATTERN (prev);
+	  if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+	    continue;
+	  x = XVECEXP (prevpat, 0, 1);
+	  if (GET_CODE (x) != USE)
+	    continue;
+	  x = XEXP (x, 0);
+	  if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+	    break;
+	}
+      /* FIXME: This is a bug in the optimizer, but it seems harmless
+	 to just avoid panicing.  */
+      if (!prev)
+	continue;
+
+      /* Emit the reference label of the braf where it belongs, right after
+	 the casesi_jump_2 (i.e. braf).  */
+      braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+      emit_label_after (braf_label, prev);
+
+      /* Fix up the ADDR_DIF_VEC to be relative
+	 to the reference address of the braf.  */
+      XEXP (XEXP (pat, 0), 0) = braf_label;
+    }
+}
+
+/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
+   a barrier.  Return the base 2 logarithm of the desired alignment.  */
+int
+barrier_align (rtx barrier_or_label)
+{
+  rtx next = next_real_insn (barrier_or_label), pat, prev;
+  int slot, credit, jump_to_next = 0;
+
+  if (! next)
+    return 0;
+
+  pat = PATTERN (next);
+
+  if (GET_CODE (pat) == ADDR_DIFF_VEC)
+    return 2;
+
+  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
+    /* This is a barrier in front of a constant table.  */
+    return 0;
+
+  prev = prev_real_insn (barrier_or_label);
+  if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
+    {
+      pat = PATTERN (prev);
+      /* If this is a very small table, we want to keep the alignment after
+	 the table to the minimum for proper code alignment.  */
+      return ((optimize_size
+	       || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+		   <= (unsigned) 1 << (CACHE_LOG - 2)))
+	      ? 1 << TARGET_SHMEDIA : align_jumps_log);
+    }
+
+  if (optimize_size)
+    return 0;
+
+  if (! TARGET_SH2 || ! optimize)
+    return align_jumps_log;
+
+  /* When fixing up pcloads, a constant table might be inserted just before
+     the basic block that ends with the barrier.  Thus, we can't trust the
+     instruction lengths before that.  */
+  if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
+    {
+      /* Check if there is an immediately preceding branch to the insn beyond
+	 the barrier.  We must weight the cost of discarding useful information
+	 from the current cache line when executing this branch and there is
+	 an alignment, against that of fetching unneeded insn in front of the
+	 branch target when there is no alignment.  */
+
+      /* There are two delay_slot cases to consider.  One is the simple case
+	 where the preceding branch is to the insn beyond the barrier (simple
+	 delay slot filling), and the other is where the preceding branch has
+	 a delay slot that is a duplicate of the insn after the barrier
+	 (fill_eager_delay_slots) and the branch is to the insn after the insn
+	 after the barrier.  */
+
+      /* PREV is presumed to be the JUMP_INSN for the barrier under
+	 investigation.  Skip to the insn before it.  */
+      prev = prev_real_insn (prev);
+
+      for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
+	   credit >= 0 && prev && NONJUMP_INSN_P (prev);
+	   prev = prev_real_insn (prev))
+	{
+	  jump_to_next = 0;
+	  if (GET_CODE (PATTERN (prev)) == USE
+	      || GET_CODE (PATTERN (prev)) == CLOBBER)
+	    continue;
+	  if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+	    {
+	      prev = XVECEXP (PATTERN (prev), 0, 1);
+	      if (INSN_UID (prev) == INSN_UID (next))
+		{
+	  	  /* Delay slot was filled with insn at jump target.  */
+		  jump_to_next = 1;
+		  continue;
+  		}
+	    }
+
+	  if (slot &&
+	      get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	    slot = 0;
+	  credit -= get_attr_length (prev);
+	}
+      if (prev
+	  && JUMP_P (prev)
+	  && JUMP_LABEL (prev))
+	{
+	  rtx x;
+	  if (jump_to_next
+	      || next_real_insn (JUMP_LABEL (prev)) == next
+	      /* If relax_delay_slots() decides NEXT was redundant
+		 with some previous instruction, it will have
+		 redirected PREV's jump to the following insn.  */
+	      || JUMP_LABEL (prev) == next_nonnote_insn (next)
+	      /* There is no upper bound on redundant instructions
+		 that might have been skipped, but we must not put an
+		 alignment where none had been before.  */
+	      || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
+		  (INSN_P (x)
+		   && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
+		       || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
+		       || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
+	    {
+	      rtx pat = PATTERN (prev);
+	      if (GET_CODE (pat) == PARALLEL)
+		pat = XVECEXP (pat, 0, 0);
+	      if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
+		return 0;
+	    }
+	}
+    }
+
+  return align_jumps_log;
+}
+
+/* If we are inside a phony loop, almost any kind of label can turn up as the
+   first one in the loop.  Aligning a braf label causes incorrect switch
+   destination addresses; we can detect braf labels because they are
+   followed by a BARRIER.
+   Applying loop alignment to small constant or switch tables is a waste
+   of space, so we suppress this too.  */
+int
+sh_loop_align (rtx label)
+{
+  rtx next = label;
+
+  do
+    next = next_nonnote_insn (next);
+  while (next && LABEL_P (next));
+
+  if (! next
+      || ! INSN_P (next)
+      || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
+      || recog_memoized (next) == CODE_FOR_consttable_2)
+    return 0;
+
+  return align_loops_log;
+}
+
+/* Do a final pass over the function, just before delayed branch
+   scheduling.  */
+
+static void
+sh_reorg (void)
+{
+  rtx first, insn, mova = NULL_RTX;
+  int num_mova;
+  rtx r0_rtx = gen_rtx_REG (Pmode, 0);
+  rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
+
+  first = get_insns ();
+  max_labelno_before_reorg = max_label_num ();
+
+  /* We must split call insns before introducing `mova's.  If we're
+     optimizing, they'll have already been split.  Otherwise, make
+     sure we don't split them too late.  */
+  if (! optimize)
+    split_all_insns_noflow ();
+
+  if (TARGET_SHMEDIA)
+    return;
+
+  /* If relaxing, generate pseudo-ops to associate function calls with
+     the symbols they call.  It does no harm to not generate these
+     pseudo-ops.  However, when we can generate them, it enables to
+     linker to potentially relax the jsr to a bsr, and eliminate the
+     register load and, possibly, the constant pool entry.  */
+
+  mdep_reorg_phase = SH_INSERT_USES_LABELS;
+  if (TARGET_RELAX)
+    {
+      /* Remove all REG_LABEL_OPERAND notes.  We want to use them for our
+	 own purposes.  This works because none of the remaining passes
+	 need to look at them.
+
+	 ??? But it may break in the future.  We should use a machine
+	 dependent REG_NOTE, or some other approach entirely.  */
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  if (INSN_P (insn))
+	    {
+	      rtx note;
+
+	      while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
+					    NULL_RTX)) != 0)
+		remove_note (insn, note);
+	    }
+	}
+
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx pattern, reg, link, set, scan, dies, label;
+	  int rescan = 0, foundinsn = 0;
+
+	  if (CALL_P (insn))
+	    {
+	      pattern = PATTERN (insn);
+
+	      if (GET_CODE (pattern) == PARALLEL)
+		pattern = XVECEXP (pattern, 0, 0);
+	      if (GET_CODE (pattern) == SET)
+		pattern = SET_SRC (pattern);
+
+	      if (GET_CODE (pattern) != CALL
+		  || !MEM_P (XEXP (pattern, 0)))
+		continue;
+
+	      reg = XEXP (XEXP (pattern, 0), 0);
+	    }
+	  else
+	    {
+	      reg = sfunc_uses_reg (insn);
+	      if (! reg)
+		continue;
+	    }
+
+	  if (!REG_P (reg))
+	    continue;
+
+	  /* Try scanning backward to find where the register is set.  */
+	  link = NULL;
+	  for (scan = PREV_INSN (insn);
+	       scan && !LABEL_P (scan);
+	       scan = PREV_INSN (scan))
+	    {
+	      if (! INSN_P (scan))
+	        continue;
+
+	      if (! reg_mentioned_p (reg, scan))
+	        continue;
+
+	      if (noncall_uses_reg (reg, scan, &set))
+	        break;
+
+	      if (set)
+		{
+		  link = scan;
+		  break;
+		}
+	    }
+
+	  if (! link)
+	    continue;
+
+	  /* The register is set at LINK.  */
+
+	  /* We can only optimize the function call if the register is
+             being set to a symbol.  In theory, we could sometimes
+             optimize calls to a constant location, but the assembler
+             and linker do not support that at present.  */
+	  if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
+	      && GET_CODE (SET_SRC (set)) != LABEL_REF)
+	    continue;
+
+	  /* Scan forward from LINK to the place where REG dies, and
+             make sure that the only insns which use REG are
+             themselves function calls.  */
+
+	  /* ??? This doesn't work for call targets that were allocated
+	     by reload, since there may not be a REG_DEAD note for the
+	     register.  */
+
+	  dies = NULL_RTX;
+	  for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
+	    {
+	      rtx scanset;
+
+	      /* Don't try to trace forward past a CODE_LABEL if we haven't
+		 seen INSN yet.  Ordinarily, we will only find the setting insn
+		 if it is in the same basic block.  However,
+		 cross-jumping can insert code labels in between the load and
+		 the call, and can result in situations where a single call
+		 insn may have two targets depending on where we came from.  */
+
+	      if (LABEL_P (scan) && ! foundinsn)
+		break;
+
+	      if (! INSN_P (scan))
+		continue;
+
+	      /* Don't try to trace forward past a JUMP.  To optimize
+                 safely, we would have to check that all the
+                 instructions at the jump destination did not use REG.  */
+
+	      if (JUMP_P (scan))
+		break;
+
+	      if (! reg_mentioned_p (reg, scan))
+		continue;
+
+	      if (noncall_uses_reg (reg, scan, &scanset))
+		break;
+
+	      if (scan == insn)
+		foundinsn = 1;
+
+	      if (scan != insn
+		  && (CALL_P (scan) || sfunc_uses_reg (scan)))
+		{
+		  /* There is a function call to this register other
+                     than the one we are checking.  If we optimize
+                     this call, we need to rescan again below.  */
+		  rescan = 1;
+		}
+
+	      /* ??? We shouldn't have to worry about SCANSET here.
+		 We should just be able to check for a REG_DEAD note
+		 on a function call.  However, the REG_DEAD notes are
+		 apparently not dependable around libcalls; c-torture
+		 execute/920501-2 is a test case.  If SCANSET is set,
+		 then this insn sets the register, so it must have
+		 died earlier.  Unfortunately, this will only handle
+		 the cases in which the register is, in fact, set in a
+		 later insn.  */
+
+	      /* ??? We shouldn't have to use FOUNDINSN here.
+		 This dates back to when we used LOG_LINKS to find 
+		 the most recent insn which sets the register.  */
+
+	      if (foundinsn
+		  && (scanset
+		      || find_reg_note (scan, REG_DEAD, reg)))
+		{
+		  dies = scan;
+		  break;
+		}
+	    }
+
+	  if (! dies)
+	    {
+	      /* Either there was a branch, or some insn used REG
+                 other than as a function call address.  */
+	      continue;
+	    }
+
+	  /* Create a code label, and put it in a REG_LABEL_OPERAND note
+             on the insn which sets the register, and on each call insn
+             which uses the register.  In final_prescan_insn we look for
+             the REG_LABEL_OPERAND notes, and output the appropriate label
+             or pseudo-op.  */
+
+	  label = gen_label_rtx ();
+	  add_reg_note (link, REG_LABEL_OPERAND, label);
+	  add_reg_note (insn, REG_LABEL_OPERAND, label);
+	  if (rescan)
+	    {
+	      scan = link;
+	      do
+		{
+		  rtx reg2;
+
+		  scan = NEXT_INSN (scan);
+		  if (scan != insn
+		      && ((CALL_P (scan)
+			   && reg_mentioned_p (reg, scan))
+			  || ((reg2 = sfunc_uses_reg (scan))
+			      && REGNO (reg2) == REGNO (reg))))
+		    add_reg_note (scan, REG_LABEL_OPERAND, label);
+		}
+	      while (scan != dies);
+	    }
+	}
+    }
+
+  if (TARGET_SH2)
+    fixup_addr_diff_vecs (first);
+
+  if (optimize)
+    {
+      mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+      shorten_branches (first);
+    }
+
+  /* Scan the function looking for move instructions which have to be
+     changed to pc-relative loads and insert the literal tables.  */
+  label_ref_list_pool = create_alloc_pool ("label references list",
+					   sizeof (struct label_ref_list_d),
+					   30);
+  mdep_reorg_phase = SH_FIXUP_PCLOAD;
+  for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
+    {
+      if (mova_p (insn))
+	{
+	  /* ??? basic block reordering can move a switch table dispatch
+	     below the switch table.  Check if that has happened.
+	     We only have the addresses available when optimizing; but then,
+	     this check shouldn't be needed when not optimizing.  */
+	  if (!untangle_mova (&num_mova, &mova, insn))
+	    {
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	}
+      else if (JUMP_P (insn)
+	       && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+	       && num_mova
+	       /* ??? loop invariant motion can also move a mova out of a
+		  loop.  Since loop does this code motion anyway, maybe we
+		  should wrap UNSPEC_MOVA into a CONST, so that reload can
+		  move it back.  */
+	       && ((num_mova > 1
+		    && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
+		   || (prev_nonnote_insn (insn)
+		       == XEXP (MOVA_LABELREF (mova), 0))))
+	{
+	  rtx scan;
+	  int total;
+
+	  num_mova--;
+
+	  /* Some code might have been inserted between the mova and
+	     its ADDR_DIFF_VEC.  Check if the mova is still in range.  */
+	  for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+	    total += get_attr_length (scan);
+
+	  /* range of mova is 1020, add 4 because pc counts from address of
+	     second instruction after this one, subtract 2 in case pc is 2
+	     byte aligned.  Possible alignment needed for the ADDR_DIFF_VEC
+	     cancels out with alignment effects of the mova itself.  */
+	  if (total > 1022)
+	    {
+	      /* Change the mova into a load, and restart scanning
+		 there.  broken_move will then return true for mova.  */
+	      fixup_mova (mova);
+	      insn = mova;
+	    }
+	}
+      if (broken_move (insn)
+	  || (NONJUMP_INSN_P (insn)
+	      && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
+	{
+	  rtx scan;
+	  /* Scan ahead looking for a barrier to stick the constant table
+	     behind.  */
+	  rtx barrier = find_barrier (num_mova, mova, insn);
+	  rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
+	  int need_aligned_label = 0;
+
+	  if (num_mova && ! mova_p (mova))
+	    {
+	      /* find_barrier had to change the first mova into a
+		 pcload; thus, we have to start with this new pcload.  */
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	  /* Now find all the moves between the points and modify them.  */
+	  for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
+	    {
+	      if (LABEL_P (scan))
+		last_float = 0;
+	      if (NONJUMP_INSN_P (scan)
+		  && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
+		need_aligned_label = 1;
+	      if (broken_move (scan))
+		{
+		  rtx *patp = &PATTERN (scan), pat = *patp;
+		  rtx src, dst;
+		  rtx lab;
+		  rtx newsrc;
+		  enum machine_mode mode;
+
+		  if (GET_CODE (pat) == PARALLEL)
+		    patp = &XVECEXP (pat, 0, 0), pat = *patp;
+		  src = SET_SRC (pat);
+		  dst = SET_DEST (pat);
+		  mode = GET_MODE (dst);
+
+		  if (mode == SImode && hi_const (src)
+		      && REGNO (dst) != FPUL_REG)
+		    {
+		      int offset = 0;
+
+		      mode = HImode;
+		      while (GET_CODE (dst) == SUBREG)
+			{
+			  offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
+							 GET_MODE (SUBREG_REG (dst)),
+							 SUBREG_BYTE (dst),
+							 GET_MODE (dst));
+			  dst = SUBREG_REG (dst);
+			}
+		      dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
+		    }
+		  if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
+		    {
+		      /* This must be an insn that clobbers r0.  */
+		      rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
+						XVECLEN (PATTERN (scan), 0)
+						- 1);
+		      rtx clobber = *clobberp;
+
+		      gcc_assert (GET_CODE (clobber) == CLOBBER
+				  && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
+
+		      if (last_float
+			  && reg_set_between_p (r0_rtx, last_float_move, scan))
+			last_float = 0;
+		      if (last_float
+			  && TARGET_SHCOMPACT
+			  && GET_MODE_SIZE (mode) != 4
+			  && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
+			last_float = 0;
+		      lab = add_constant (src, mode, last_float);
+		      if (lab)
+			emit_insn_before (gen_mova (lab), scan);
+		      else
+			{
+			  /* There will be a REG_UNUSED note for r0 on
+			     LAST_FLOAT_MOVE; we have to change it to REG_INC,
+			     lest reorg:mark_target_live_regs will not
+			     consider r0 to be used, and we end up with delay
+			     slot insn in front of SCAN that clobbers r0.  */
+			  rtx note
+			    = find_regno_note (last_float_move, REG_UNUSED, 0);
+
+			  /* If we are not optimizing, then there may not be
+			     a note.  */
+			  if (note)
+			    PUT_REG_NOTE_KIND (note, REG_INC);
+
+			  *last_float_addr = r0_inc_rtx;
+			}
+		      last_float_move = scan;
+		      last_float = src;
+		      newsrc = gen_const_mem (mode,
+					(((TARGET_SH4 && ! TARGET_FMOVD)
+					  || REGNO (dst) == FPUL_REG)
+					 ? r0_inc_rtx
+					 : r0_rtx));
+		      last_float_addr = &XEXP (newsrc, 0);
+
+		      /* Remove the clobber of r0.  */
+		      *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
+						   gen_rtx_SCRATCH (Pmode));
+		    }
+		  /* This is a mova needing a label.  Create it.  */
+		  else if (GET_CODE (src) == UNSPEC
+			   && XINT (src, 1) == UNSPEC_MOVA
+			   && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
+		    {
+		      lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
+		      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+		      newsrc = gen_rtx_UNSPEC (SImode,
+					       gen_rtvec (1, newsrc),
+					       UNSPEC_MOVA);
+		    }
+		  else
+		    {
+		      lab = add_constant (src, mode, 0);
+		      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+		      newsrc = gen_const_mem (mode, newsrc);
+		    }
+		  *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
+		  INSN_CODE (scan) = -1;
+		}
+	    }
+	  dump_table (need_aligned_label ? insn : 0, barrier);
+	  insn = barrier;
+	}
+    }
+  free_alloc_pool (label_ref_list_pool);
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    PUT_MODE (insn, VOIDmode);
+
+  mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+  INSN_ADDRESSES_FREE ();
+  split_branches (first);
+
+  /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+     also has an effect on the register that holds the address of the sfunc.
+     Insert an extra dummy insn in front of each sfunc that pretends to
+     use this register.  */
+  if (flag_delayed_branch)
+    {
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx reg = sfunc_uses_reg (insn);
+
+	  if (! reg)
+	    continue;
+	  emit_insn_before (gen_use_sfunc_addr (reg), insn);
+	}
+    }
+#if 0
+  /* fpscr is not actually a user variable, but we pretend it is for the
+     sake of the previous optimization passes, since we want it handled like
+     one.  However, we don't have any debugging information for it, so turn
+     it into a non-user variable now.  */
+  if (TARGET_SH4)
+    REG_USERVAR_P (get_fpscr_rtx ()) = 0;
+#endif
+  mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+int
+get_dest_uid (rtx label, int max_uid)
+{
+  rtx dest = next_real_insn (label);
+  int dest_uid;
+  if (! dest)
+    /* This can happen for an undefined label.  */
+    return 0;
+  dest_uid = INSN_UID (dest);
+  /* If this is a newly created branch redirection blocking instruction,
+     we cannot index the branch_uid or insn_addresses arrays with its
+     uid.  But then, we won't need to, because the actual destination is
+     the following branch.  */
+  while (dest_uid >= max_uid)
+    {
+      dest = NEXT_INSN (dest);
+      dest_uid = INSN_UID (dest);
+    }
+  if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
+    return 0;
+  return dest_uid;
+}
+
+/* Split condbranches that are out of range.  Also add clobbers for
+   scratch registers that are needed in far jumps.
+   We do this before delay slot scheduling, so that it can take our
+   newly created instructions into account.  It also allows us to
+   find branches with common targets more easily.  */
+
+static void
+split_branches (rtx first)
+{
+  rtx insn;
+  struct far_branch **uid_branch, *far_branch_list = 0;
+  int max_uid = get_max_uid ();
+  int ok;
+
+  /* Find out which branches are out of range.  */
+  shorten_branches (first);
+
+  uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+  memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    if (! INSN_P (insn))
+      continue;
+    else if (INSN_DELETED_P (insn))
+      {
+	/* Shorten_branches would split this instruction again,
+	   so transform it into a note.  */
+	SET_INSN_DELETED (insn);
+      }
+    else if (JUMP_P (insn)
+	     /* Don't mess with ADDR_DIFF_VEC */
+	     && (GET_CODE (PATTERN (insn)) == SET
+		 || GET_CODE (PATTERN (insn)) == RETURN))
+      {
+	enum attr_type type = get_attr_type (insn);
+	if (type == TYPE_CBRANCH)
+	  {
+	    rtx next, beyond;
+
+	    if (get_attr_length (insn) > 4)
+	      {
+		rtx src = SET_SRC (PATTERN (insn));
+		rtx olabel = XEXP (XEXP (src, 1), 0);
+		int addr = INSN_ADDRESSES (INSN_UID (insn));
+		rtx label = 0;
+		int dest_uid = get_dest_uid (olabel, max_uid);
+		struct far_branch *bp = uid_branch[dest_uid];
+
+		/* redirect_jump needs a valid JUMP_LABEL, and it might delete
+		   the label if the LABEL_NUSES count drops to zero.  There is
+		   always a jump_optimize pass that sets these values, but it
+		   proceeds to delete unreferenced code, and then if not
+		   optimizing, to un-delete the deleted instructions, thus
+		   leaving labels with too low uses counts.  */
+		if (! optimize)
+		  {
+		    JUMP_LABEL (insn) = olabel;
+		    LABEL_NUSES (olabel)++;
+		  }
+		if (! bp)
+		  {
+		    bp = (struct far_branch *) alloca (sizeof *bp);
+		    uid_branch[dest_uid] = bp;
+		    bp->prev = far_branch_list;
+		    far_branch_list = bp;
+		    bp->far_label
+		      = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+		    LABEL_NUSES (bp->far_label)++;
+		  }
+		else
+		  {
+		    label = bp->near_label;
+		    if (! label && bp->address - addr >= CONDJUMP_MIN)
+		      {
+			rtx block = bp->insert_place;
+
+			if (GET_CODE (PATTERN (block)) == RETURN)
+			  block = PREV_INSN (block);
+			else
+			  block = gen_block_redirect (block,
+						      bp->address, 2);
+			label = emit_label_after (gen_label_rtx (),
+						  PREV_INSN (block));
+			bp->near_label = label;
+		      }
+		    else if (label && ! NEXT_INSN (label))
+		      {
+			if (addr + 2 - bp->address <= CONDJUMP_MAX)
+			  bp->insert_place = insn;
+			else
+			  gen_far_branch (bp);
+		      }
+		  }
+		if (! label
+		    || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
+		  {
+		    bp->near_label = label = gen_label_rtx ();
+		    bp->insert_place = insn;
+		    bp->address = addr;
+		  }
+		ok = redirect_jump (insn, label, 0);
+		gcc_assert (ok);
+	      }
+	    else
+	      {
+		/* get_attr_length (insn) == 2 */
+		/* Check if we have a pattern where reorg wants to redirect
+		   the branch to a label from an unconditional branch that
+		   is too far away.  */
+		/* We can't use JUMP_LABEL here because it might be undefined
+		   when not optimizing.  */
+		/* A syntax error might cause beyond to be NULL_RTX.  */
+		beyond
+		  = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+					    0));
+
+		if (beyond
+		    && (JUMP_P (beyond)
+			|| ((beyond = next_active_insn (beyond))
+			    && JUMP_P (beyond)))
+		    && GET_CODE (PATTERN (beyond)) == SET
+		    && recog_memoized (beyond) == CODE_FOR_jump_compact
+		    && ((INSN_ADDRESSES
+			 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
+			 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
+			> 252 + 258 + 2))
+		  gen_block_redirect (beyond,
+				      INSN_ADDRESSES (INSN_UID (beyond)), 1);
+	      }
+
+	    next = next_active_insn (insn);
+
+	    if (next
+		&& (JUMP_P (next)
+		    || ((next = next_active_insn (next))
+			&& JUMP_P (next)))
+		&& GET_CODE (PATTERN (next)) == SET
+		&& recog_memoized (next) == CODE_FOR_jump_compact
+		&& ((INSN_ADDRESSES
+		     (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
+		     - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
+		    > 252 + 258 + 2))
+	      gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
+	  }
+	else if (type == TYPE_JUMP || type == TYPE_RETURN)
+	  {
+	    int addr = INSN_ADDRESSES (INSN_UID (insn));
+	    rtx far_label = 0;
+	    int dest_uid = 0;
+	    struct far_branch *bp;
+
+	    if (type == TYPE_JUMP)
+	      {
+		far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+		dest_uid = get_dest_uid (far_label, max_uid);
+		if (! dest_uid)
+		  {
+		    /* Parse errors can lead to labels outside
+		      the insn stream.  */
+		    if (! NEXT_INSN (far_label))
+		      continue;
+
+		    if (! optimize)
+		      {
+			JUMP_LABEL (insn) = far_label;
+			LABEL_NUSES (far_label)++;
+		      }
+		    redirect_jump (insn, NULL_RTX, 1);
+		    far_label = 0;
+		  }
+	      }
+	    bp = uid_branch[dest_uid];
+	    if (! bp)
+	      {
+		bp = (struct far_branch *) alloca (sizeof *bp);
+		uid_branch[dest_uid] = bp;
+		bp->prev = far_branch_list;
+		far_branch_list = bp;
+		bp->near_label = 0;
+		bp->far_label = far_label;
+		if (far_label)
+		  LABEL_NUSES (far_label)++;
+	      }
+	    else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+	      if (addr - bp->address <= CONDJUMP_MAX)
+		emit_label_after (bp->near_label, PREV_INSN (insn));
+	      else
+		{
+		  gen_far_branch (bp);
+		  bp->near_label = 0;
+		}
+	    else
+	      bp->near_label = 0;
+	    bp->address = addr;
+	    bp->insert_place = insn;
+	    if (! far_label)
+	      emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+	    else
+	      gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+	  }
+      }
+  /* Generate all pending far branches,
+     and free our references to the far labels.  */
+  while (far_branch_list)
+    {
+      if (far_branch_list->near_label
+	  && ! NEXT_INSN (far_branch_list->near_label))
+	gen_far_branch (far_branch_list);
+      if (optimize
+	  && far_branch_list->far_label
+	  && ! --LABEL_NUSES (far_branch_list->far_label))
+	delete_insn (far_branch_list->far_label);
+      far_branch_list = far_branch_list->prev;
+    }
+
+  /* Instruction length information is no longer valid due to the new
+     instructions that have been generated.  */
+  init_insn_lengths ();
+}
+
+/* Dump out instruction addresses, which is useful for debugging the
+   constant pool table stuff.
+
+   If relaxing, output the label and pseudo-ops used to link together
+   calls and the instruction which set the registers.  */
+
+/* ??? The addresses printed by this routine for insns are nonsense for
+   insns which are inside of a sequence where none of the inner insns have
+   variable length.  This is because the second pass of shorten_branches
+   does not bother to update them.  */
+
+void
+final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
+		    int noperands ATTRIBUTE_UNUSED)
+{
+  if (TARGET_DUMPISIZE)
+    fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
+
+  if (TARGET_RELAX)
+    {
+      rtx note;
+
+      note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
+      if (note)
+	{
+	  rtx pattern;
+
+	  pattern = PATTERN (insn);
+	  if (GET_CODE (pattern) == PARALLEL)
+	    pattern = XVECEXP (pattern, 0, 0);
+	  switch (GET_CODE (pattern))
+	    {
+	    case SET:
+	      if (GET_CODE (SET_SRC (pattern)) != CALL
+		  && get_attr_type (insn) != TYPE_SFUNC)
+		{
+		  targetm.asm_out.internal_label
+		    (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
+		  break;
+		}
+	      /* else FALLTHROUGH */
+	    case CALL:
+	      asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
+			   CODE_LABEL_NUMBER (XEXP (note, 0)));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+}
+
+/* Dump out any constants accumulated in the final pass.  These will
+   only be labels.  */
+
+const char *
+output_jump_label_table (void)
+{
+  int i;
+
+  if (pool_size)
+    {
+      fprintf (asm_out_file, "\t.align 2\n");
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (p->label));
+	  output_asm_insn (".long	%O0", &p->value);
+	}
+      pool_size = 0;
+    }
+
+  return "";
+}
+
+/* A full frame looks like:
+
+   arg-5
+   arg-4
+   [ if current_function_anonymous_args
+   arg-3
+   arg-2
+   arg-1
+   arg-0 ]
+   saved-fp
+   saved-r10
+   saved-r11
+   saved-r12
+   saved-pr
+   local-n
+   ..
+   local-1
+   local-0        <- fp points here.  */
+
+/* Number of bytes pushed for anonymous args, used to pass information
+   between expand_prologue and expand_epilogue.  */
+
+/* Adjust the stack by SIZE bytes.  REG holds the rtl of the register to be
+   adjusted.  If epilogue_p is zero, this is for a prologue; otherwise, it's
+   for an epilogue and a negative value means that it's for a sibcall
+   epilogue.  If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
+   all the registers that are about to be restored, and hence dead.  */
+
+static void
+output_stack_adjust (int size, rtx reg, int epilogue_p,
+		     HARD_REG_SET *live_regs_mask, bool frame_p)
+{
+  rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
+  if (size)
+    {
+      HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
+
+/* This test is bogus, as output_stack_adjust is used to re-align the
+   stack.  */
+#if 0
+      gcc_assert (!(size % align));
+#endif
+
+      if (CONST_OK_FOR_ADD (size))
+	emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
+      /* Try to do it with two partial adjustments; however, we must make
+	 sure that the stack is properly aligned at all times, in case
+	 an interrupt occurs between the two partial adjustments.  */
+      else if (CONST_OK_FOR_ADD (size / 2 & -align)
+	       && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
+	{
+	  emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
+	  emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
+	}
+      else
+	{
+	  rtx const_reg;
+	  rtx insn;
+	  int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
+	  int i;
+
+	  /* If TEMP is invalid, we could temporarily save a general
+	     register to MACL.  However, there is currently no need
+	     to handle this case, so just die when we see it.  */
+	  if (epilogue_p < 0
+	      || current_function_interrupt
+	      || ! call_really_used_regs[temp] || fixed_regs[temp])
+	    temp = -1;
+	  if (temp < 0 && ! current_function_interrupt
+	      && (TARGET_SHMEDIA || epilogue_p >= 0))
+	    {
+	      HARD_REG_SET temps;
+	      COPY_HARD_REG_SET (temps, call_used_reg_set);
+	      AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
+	      if (epilogue_p > 0)
+		{
+		  int nreg = 0;
+		  if (crtl->return_rtx)
+		    {
+		      enum machine_mode mode;
+		      mode = GET_MODE (crtl->return_rtx);
+		      if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
+			nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
+		    }
+		  for (i = 0; i < nreg; i++)
+		    CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
+		  if (crtl->calls_eh_return)
+		    {
+		      CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
+		      for (i = 0; i <= 3; i++)
+			CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
+		    }
+		}
+	      if (TARGET_SHMEDIA && epilogue_p < 0)
+		for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+		  CLEAR_HARD_REG_BIT (temps, i);
+	      if (epilogue_p <= 0)
+		{
+		  for (i = FIRST_PARM_REG;
+		       i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
+		    CLEAR_HARD_REG_BIT (temps, i);
+		  if (cfun->static_chain_decl != NULL)
+		    CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
+		}
+	      temp = scavenge_reg (&temps);
+	    }
+	  if (temp < 0 && live_regs_mask)
+	    {
+	      HARD_REG_SET temps;
+
+	      COPY_HARD_REG_SET (temps, *live_regs_mask);
+	      CLEAR_HARD_REG_BIT (temps, REGNO (reg));
+	      temp = scavenge_reg (&temps);
+	    }
+	  if (temp < 0)
+	    {
+	      rtx adj_reg, tmp_reg, mem;
+	      
+	      /* If we reached here, the most likely case is the (sibcall)
+		 epilogue for non SHmedia.  Put a special push/pop sequence
+		 for such case as the last resort.  This looks lengthy but
+		 would not be problem because it seems to be very
+		 rare.  */
+	      
+	      gcc_assert (!TARGET_SHMEDIA && epilogue_p);
+	      
+
+	       /* ??? There is still the slight possibility that r4 or
+		  r5 have been reserved as fixed registers or assigned
+		  as global registers, and they change during an
+		  interrupt.  There are possible ways to handle this:
+		     
+		  - If we are adjusting the frame pointer (r14), we can do
+		    with a single temp register and an ordinary push / pop
+		    on the stack.
+		  - Grab any call-used or call-saved registers (i.e. not
+		    fixed or globals) for the temps we need.  We might
+		    also grab r14 if we are adjusting the stack pointer.
+		    If we can't find enough available registers, issue
+		    a diagnostic and die - the user must have reserved
+		    way too many registers.
+		 But since all this is rather unlikely to happen and
+		 would require extra testing, we just die if r4 / r5
+		 are not available.  */
+	      gcc_assert (!fixed_regs[4] && !fixed_regs[5]
+			  && !global_regs[4] && !global_regs[5]);
+
+	      adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
+	      tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
+	      emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
+	      emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
+	      emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+	      emit_move_insn (mem, tmp_reg);
+	      emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+	      emit_move_insn (mem, tmp_reg);
+	      emit_move_insn (reg, adj_reg);
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
+	      emit_move_insn (adj_reg, mem);
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
+	      emit_move_insn (tmp_reg, mem);
+	      /* Tell flow the insns that pop r4/r5 aren't dead.  */
+	      emit_use (tmp_reg);
+	      emit_use (adj_reg);
+	      return;
+	    }
+	  const_reg = gen_rtx_REG (GET_MODE (reg), temp);
+
+	  /* If SIZE is negative, subtract the positive value.
+	     This sometimes allows a constant pool entry to be shared
+	     between prologue and epilogue code.  */
+	  if (size < 0)
+	    {
+	      emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
+	      insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
+	    }
+	  else
+	    {
+	      emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
+	      insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
+	    }
+	  if (! epilogue_p)
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_rtx_SET (VOIDmode, reg,
+				       gen_rtx_PLUS (SImode, reg,
+						     GEN_INT (size))));
+	}
+    }
+}
+
+static rtx
+frame_insn (rtx x)
+{
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Output RTL to push register RN onto the stack.  */
+
+static rtx
+push (int rn)
+{
+  rtx x;
+  if (rn == FPUL_REG)
+    x = gen_push_fpul ();
+  else if (rn == FPSCR_REG)
+    x = gen_push_fpscr ();
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+	   && FP_OR_XD_REGISTER_P (rn))
+    {
+      if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
+	return NULL_RTX;
+      x = gen_push_4 (gen_rtx_REG (DFmode, rn));
+    }
+  else if (TARGET_SH2E && FP_REGISTER_P (rn))
+    x = gen_push_e (gen_rtx_REG (SFmode, rn));
+  else
+    x = gen_push (gen_rtx_REG (SImode, rn));
+
+  x = frame_insn (x);
+  add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
+  return x;
+}
+
+/* Output RTL to pop register RN from the stack.  */
+
+static void
+pop (int rn)
+{
+  rtx x;
+  if (rn == FPUL_REG)
+    x = gen_pop_fpul ();
+  else if (rn == FPSCR_REG)
+    x = gen_pop_fpscr ();
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+	   && FP_OR_XD_REGISTER_P (rn))
+    {
+      if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
+	return;
+      x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
+    }
+  else if (TARGET_SH2E && FP_REGISTER_P (rn))
+    x = gen_pop_e (gen_rtx_REG (SFmode, rn));
+  else
+    x = gen_pop (gen_rtx_REG (SImode, rn));
+
+  x = emit_insn (x);
+  add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
+}
+
+/* Generate code to push the regs specified in the mask.  */
+
+static void
+push_regs (HARD_REG_SET *mask, int interrupt_handler)
+{
+  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+  int skip_fpscr = 0;
+
+  /* Push PR last; this gives better latencies after the prologue, and
+     candidates for the return delay slot when there are no general
+     registers pushed.  */
+  for (; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      /* If this is an interrupt handler, and the SZ bit varies,
+	 and we have to push any floating point register, we need
+	 to switch to the correct precision first.  */
+      if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
+	  && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
+	{
+	  HARD_REG_SET unsaved;
+
+	  push (FPSCR_REG);
+	  COMPL_HARD_REG_SET (unsaved, *mask);
+	  fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
+	  skip_fpscr = 1;
+	}
+      if (i != PR_REG
+	  && (i != FPSCR_REG || ! skip_fpscr)
+	  && TEST_HARD_REG_BIT (*mask, i))
+           {
+  	/* If the ISR has RESBANK attribute assigned, don't push any of
+   	   the following registers - R0-R14, MACH, MACL and GBR.  */
+      if (! (sh_cfun_resbank_handler_p ()
+	     && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
+		 || i == MACH_REG
+		 || i == MACL_REG
+		 || i == GBR_REG)))
+	  push (i);
+  	}
+    }
+
+  /* Push banked registers last to improve delay slot opportunities.  */
+  if (interrupt_handler)
+    {
+      bool use_movml = false;
+
+      if (TARGET_SH2A)
+	{
+	  unsigned int count = 0;
+
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    if (TEST_HARD_REG_BIT (*mask, i))
+	      count++;
+	    else
+	      break;
+
+	  /* Use movml when all banked registers are pushed.  */
+	  if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+	    use_movml = true;
+	}
+
+      if (use_movml)
+	{
+	  rtx x, mem, reg, set;
+	  rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	  /* We must avoid scheduling multiple store insn with another
+	     insns.  */
+	  emit_insn (gen_blockage ());
+	  x = gen_movml_push_banked (sp_reg);
+	  x = frame_insn (x);
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    {
+	      mem = gen_rtx_MEM (SImode, plus_constant (sp_reg, i * 4));
+	      reg = gen_rtx_REG (SImode, i);
+	      add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
+	    }
+
+	  set = gen_rtx_SET (SImode, sp_reg, plus_constant (sp_reg, - 32));
+	  add_reg_note (x, REG_CFA_ADJUST_CFA, set);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  if (TEST_HARD_REG_BIT (*mask, i))
+	    push (i);
+    }
+
+  /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
+  if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
+    push (PR_REG);
+}
+
+/* Calculate how much extra space is needed to save all callee-saved
+   target registers.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+
+static int
+shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
+{
+  int reg;
+  int stack_space = 0;
+  int interrupt_handler = sh_cfun_interrupt_handler_p ();
+
+  for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+    if ((! call_really_used_regs[reg] || interrupt_handler)
+        && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
+      /* Leave space to save this target register on the stack,
+	 in case target register allocation wants to use it.  */
+      stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+  return stack_space;
+}
+
+/* Decide whether we should reserve space for callee-save target registers,
+   in case target register allocation wants to use them.  REGS_SAVED is
+   the space, in bytes, that is already required for register saves.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+
+static int
+shmedia_reserve_space_for_target_registers_p (int regs_saved,
+					      HARD_REG_SET *live_regs_mask)
+{
+  if (optimize_size)
+    return 0;
+  return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
+}
+
+/* Decide how much space to reserve for callee-save target registers
+   in case target register allocation wants to use them.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+
+static int
+shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
+{
+  if (shmedia_space_reserved_for_target_registers)
+    return shmedia_target_regs_stack_space (live_regs_mask);
+  else
+    return 0;
+}
+
+/* Work out the registers which need to be saved, both as a mask and a
+   count of saved words.  Return the count.
+
+   If doing a pragma interrupt function, then push all regs used by the
+   function, and if we call another function (we can tell by looking at PR),
+   make sure that all the regs it clobbers are safe too.  */
+
+static int
+calc_live_regs (HARD_REG_SET *live_regs_mask)
+{
+  unsigned int reg;
+  int count;
+  tree attrs;
+  bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
+  bool nosave_low_regs;
+  int pr_live, has_call;
+
+  attrs = DECL_ATTRIBUTES (current_function_decl);
+  interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
+  trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
+  interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
+  nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
+
+  CLEAR_HARD_REG_SET (*live_regs_mask);
+  if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
+      && df_regs_ever_live_p (FPSCR_REG))
+    target_flags &= ~MASK_FPU_SINGLE;
+  /* If we can save a lot of saves by switching to double mode, do that.  */
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
+    for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+      if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
+	  && (! call_really_used_regs[reg]
+	      || interrupt_handler)
+	  && ++count > 2)
+	{
+	  target_flags &= ~MASK_FPU_SINGLE;
+	  break;
+	}
+  /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
+     knows how to use it.  That means the pseudo originally allocated for
+     the initial value can become the PR_MEDIA_REG hard register, as seen for
+     execute/20010122-1.c:test9.  */
+  if (TARGET_SHMEDIA)
+    /* ??? this function is called from initial_elimination_offset, hence we
+       can't use the result of sh_media_register_for_return here.  */
+    pr_live = sh_pr_n_sets ();
+  else
+    {
+      rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
+      pr_live = (pr_initial
+		 ? (!REG_P (pr_initial)
+		    || REGNO (pr_initial) != (PR_REG))
+		 : df_regs_ever_live_p (PR_REG));
+      /* For Shcompact, if not optimizing, we end up with a memory reference
+	 using the return address pointer for __builtin_return_address even
+	 though there is no actual need to put the PR register on the stack.  */
+      pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
+    }
+  /* Force PR to be live if the prologue has to call the SHmedia
+     argument decoder or register saver.  */
+  if (TARGET_SHCOMPACT
+      && ((crtl->args.info.call_cookie
+	   & ~ CALL_COOKIE_RET_TRAMP (1))
+	  || crtl->saves_all_registers))
+    pr_live = 1;
+  has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
+  for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
+    {
+      if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
+	  ? pr_live
+	  : interrupt_handler
+	  ? (/* Need to save all the regs ever live.  */
+	     (df_regs_ever_live_p (reg)
+	      || (call_really_used_regs[reg]
+		  && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
+		      || reg == PIC_OFFSET_TABLE_REGNUM)
+		  && has_call)
+	      || (TARGET_SHMEDIA && has_call
+		  && REGISTER_NATURAL_MODE (reg) == SImode
+		  && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
+	     && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+	     && reg != RETURN_ADDRESS_POINTER_REGNUM
+	     && reg != T_REG && reg != GBR_REG
+	     /* Push fpscr only on targets which have FPU */
+	     && (reg != FPSCR_REG || TARGET_FPU_ANY))
+	  : (/* Only push those regs which are used and need to be saved.  */
+	     (TARGET_SHCOMPACT
+	      && flag_pic
+	      && crtl->args.info.call_cookie
+	      && reg == PIC_OFFSET_TABLE_REGNUM)
+	     || (df_regs_ever_live_p (reg)
+		 && ((!call_really_used_regs[reg]
+		      && !(reg != PIC_OFFSET_TABLE_REGNUM
+			   && fixed_regs[reg] && call_used_regs[reg]))
+		     || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
+	     || (crtl->calls_eh_return
+		 && (reg == EH_RETURN_DATA_REGNO (0)
+		     || reg == EH_RETURN_DATA_REGNO (1)
+		     || reg == EH_RETURN_DATA_REGNO (2)
+		     || reg == EH_RETURN_DATA_REGNO (3)))
+	     || ((reg == MACL_REG || reg == MACH_REG)
+		 && df_regs_ever_live_p (reg)
+		 && sh_cfun_attr_renesas_p ())
+	     ))
+	{
+	  SET_HARD_REG_BIT (*live_regs_mask, reg);
+	  count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+
+	  if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
+	      && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
+	    {
+	      if (FP_REGISTER_P (reg))
+		{
+		  if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
+		    {
+		      SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
+		      count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
+		    }
+		}
+	      else if (XD_REGISTER_P (reg))
+		{
+		  /* Must switch to double mode to access these registers.  */
+		  target_flags &= ~MASK_FPU_SINGLE;
+		}
+	    }
+	}
+      if (nosave_low_regs && reg == R8_REG)
+	break;
+    }
+  /* If we have a target register optimization pass after prologue / epilogue
+     threading, we need to assume all target registers will be live even if
+     they aren't now.  */
+  if (flag_branch_target_load_optimize2
+      && TARGET_SAVE_ALL_TARGET_REGS
+      && shmedia_space_reserved_for_target_registers)
+    for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+      if ((! call_really_used_regs[reg] || interrupt_handler)
+	  && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
+	{
+	  SET_HARD_REG_BIT (*live_regs_mask, reg);
+	  count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+	}
+  /* If this is an interrupt handler, we don't have any call-clobbered
+     registers we can conveniently use for target register save/restore.
+     Make sure we save at least one general purpose register when we need
+     to save target registers.  */
+  if (interrupt_handler
+      && hard_reg_set_intersect_p (*live_regs_mask,
+				   reg_class_contents[TARGET_REGS])
+      && ! hard_reg_set_intersect_p (*live_regs_mask,
+				     reg_class_contents[GENERAL_REGS]))
+    {
+      SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
+      count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
+    }
+
+  return count;
+}
+
+/* Code to generate prologue and epilogue sequences */
+
+/* PUSHED is the number of bytes that are being pushed on the
+   stack for register saves.  Return the frame size, padded
+   appropriately so that the stack stays properly aligned.  */
+static HOST_WIDE_INT
+rounded_frame_size (int pushed)
+{
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
+
+  if (ACCUMULATE_OUTGOING_ARGS)
+    size += crtl->outgoing_args_size;
+
+  return ((size + pushed + align - 1) & -align) - pushed;
+}
+
+/* Choose a call-clobbered target-branch register that remains
+   unchanged along the whole function.  We set it up as the return
+   value in the prologue.  */
+int
+sh_media_register_for_return (void)
+{
+  int regno;
+  int tr0_used;
+
+  if (! current_function_is_leaf)
+    return -1;
+  if (lookup_attribute ("interrupt_handler",
+			DECL_ATTRIBUTES (current_function_decl)))
+    return -1;
+  if (sh_cfun_interrupt_handler_p ())
+    return -1;
+
+  tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+
+  for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
+    if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
+      return regno;
+
+  return -1;
+}
+
+/* The maximum registers we need to save are:
+   - 62 general purpose registers (r15 is stack pointer, r63 is zero)
+   - 32 floating point registers (for each pair, we save none,
+         one single precision value, or a double precision value).
+   -  8 target registers
+   -  add 1 entry for a delimiter.  */
+#define MAX_SAVED_REGS (62+32+8)
+
+typedef struct save_entry_s
+{
+  unsigned char reg;
+  unsigned char mode;
+  short offset;
+} save_entry;
+
+#define MAX_TEMPS 4
+
+/* There will be a delimiter entry with VOIDmode both at the start and the
+   end of a filled in schedule.  The end delimiter has the offset of the
+   save with the smallest (i.e. most negative) offset.  */
+typedef struct save_schedule_s
+{
+  save_entry entries[MAX_SAVED_REGS + 2];
+  int temps[MAX_TEMPS+1];
+} save_schedule;
+
+/* Fill in SCHEDULE according to LIVE_REGS_MASK.  If RESTORE is nonzero,
+   use reverse order.  Returns the last entry written to (not counting
+   the delimiter).  OFFSET_BASE is a number to be added to all offset
+   entries.  */
+
+static save_entry *
+sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
+		    int offset_base)
+{
+  int align, i;
+  save_entry *entry = schedule->entries;
+  int tmpx = 0;
+  int offset;
+
+  if (! current_function_interrupt)
+    for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
+      if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
+	  && ! FUNCTION_ARG_REGNO_P (i)
+	  && i != FIRST_RET_REG
+	  && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
+	  && ! (crtl->calls_eh_return
+		&& (i == EH_RETURN_STACKADJ_REGNO
+		    || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
+			&& (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
+	schedule->temps[tmpx++] = i;
+  entry->reg = -1;
+  entry->mode = VOIDmode;
+  entry->offset = offset_base;
+  entry++;
+  /* We loop twice: first, we save 8-byte aligned registers in the
+     higher addresses, that are known to be aligned.  Then, we
+     proceed to saving 32-bit registers that don't need 8-byte
+     alignment.
+     If this is an interrupt function, all registers that need saving
+     need to be saved in full.  moreover, we need to postpone saving
+     target registers till we have saved some general purpose registers
+     we can then use as scratch registers.  */
+  offset = offset_base;
+  for (align = 1; align >= 0; align--)
+    {
+      for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
+	if (TEST_HARD_REG_BIT (*live_regs_mask, i))
+	  {
+	    enum machine_mode mode = REGISTER_NATURAL_MODE (i);
+	    int reg = i;
+
+	    if (current_function_interrupt)
+	      {
+		if (TARGET_REGISTER_P (i))
+		  continue;
+		if (GENERAL_REGISTER_P (i))
+		  mode = DImode;
+	      }
+	    if (mode == SFmode && (i % 2) == 1
+		&& ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
+		&& (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
+	      {
+		mode = DFmode;
+		i--;
+		reg--;
+	      }
+
+	    /* If we're doing the aligned pass and this is not aligned,
+	       or we're doing the unaligned pass and this is aligned,
+	       skip it.  */
+	    if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
+		!= align)
+	      continue;
+
+	    if (current_function_interrupt
+		&& GENERAL_REGISTER_P (i)
+		&& tmpx < MAX_TEMPS)
+	      schedule->temps[tmpx++] = i;
+
+	    offset -= GET_MODE_SIZE (mode);
+	    entry->reg = i;
+	    entry->mode = mode;
+	    entry->offset = offset;
+	    entry++;
+	  }
+      if (align && current_function_interrupt)
+	for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
+	  if (TEST_HARD_REG_BIT (*live_regs_mask, i))
+	    {
+	      offset -= GET_MODE_SIZE (DImode);
+	      entry->reg = i;
+	      entry->mode = DImode;
+	      entry->offset = offset;
+	      entry++;
+	    }
+    }
+  entry->reg = -1;
+  entry->mode = VOIDmode;
+  entry->offset = offset;
+  schedule->temps[tmpx] = -1;
+  return entry - 1;
+}
+
+void
+sh_expand_prologue (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d, i;
+  int d_rounding = 0;
+  int save_flags = target_flags;
+  int pretend_args;
+  int stack_usage;
+  tree sp_switch_attr
+    = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
+
+  current_function_interrupt = sh_cfun_interrupt_handler_p ();
+
+  /* We have pretend args if we had an object sent partially in registers
+     and partially on the stack, e.g. a large structure.  */
+  pretend_args = crtl->args.pretend_args_size;
+  if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
+      && (NPARM_REGS(SImode)
+	  > crtl->args.info.arg_count[(int) SH_ARG_INT]))
+    pretend_args = 0;
+
+  output_stack_adjust (-pretend_args
+		       - crtl->args.info.stack_regs * 8,
+		       stack_pointer_rtx, 0, NULL, true);
+  stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
+
+  if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
+    /* We're going to use the PIC register to load the address of the
+       incoming-argument decoder and/or of the return trampoline from
+       the GOT, so make sure the PIC register is preserved and
+       initialized.  */
+    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
+    {
+      int reg;
+
+      /* First, make all registers with incoming arguments that will
+	 be pushed onto the stack live, so that register renaming
+	 doesn't overwrite them.  */
+      for (reg = 0; reg < NPARM_REGS (SImode); reg++)
+	if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
+	    >= NPARM_REGS (SImode) - reg)
+	  for (; reg < NPARM_REGS (SImode); reg++)
+	    emit_insn (gen_shcompact_preserve_incoming_args
+		       (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
+	else if (CALL_COOKIE_INT_REG_GET
+		 (crtl->args.info.call_cookie, reg) == 1)
+	  emit_insn (gen_shcompact_preserve_incoming_args
+		     (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
+
+      emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
+		      stack_pointer_rtx);
+      emit_move_insn (gen_rtx_REG (SImode, R0_REG),
+		      GEN_INT (crtl->args.info.call_cookie));
+      emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
+		      gen_rtx_REG (SImode, R0_REG));
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      int tr = sh_media_register_for_return ();
+
+      if (tr >= 0)
+	emit_move_insn (gen_rtx_REG (DImode, tr),
+			gen_rtx_REG (DImode, PR_MEDIA_REG));
+    }
+
+  /* Emit the code for SETUP_VARARGS.  */
+  if (cfun->stdarg)
+    {
+      if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
+	{
+	  /* Push arg regs as if they'd been provided by caller in stack.  */
+	  for (i = 0; i < NPARM_REGS(SImode); i++)
+	    {
+	      int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
+
+	      if (i >= (NPARM_REGS(SImode)
+			- crtl->args.info.arg_count[(int) SH_ARG_INT]
+			))
+		break;
+	      push (rn);
+	      stack_usage += GET_MODE_SIZE (SImode);
+	    }
+	}
+    }
+
+  /* If we're supposed to switch stacks at function entry, do so now.  */
+  if (sp_switch_attr)
+    {
+      rtx lab, newsrc;
+      /* The argument specifies a variable holding the address of the
+	 stack the interrupt function should switch to/from at entry/exit.  */
+      tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
+      const char *s
+	= ggc_strdup (TREE_STRING_POINTER (arg));
+      rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
+
+      lab = add_constant (sp_switch, SImode, 0);
+      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+      newsrc = gen_const_mem (SImode, newsrc);
+
+      emit_insn (gen_sp_switch_1 (newsrc));
+    }
+
+  d = calc_live_regs (&live_regs_mask);
+  /* ??? Maybe we could save some switching if we can move a mode switch
+     that already happens to be at the function start into the prologue.  */
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+
+  if (TARGET_SH5)
+    {
+      int offset_base, offset;
+      rtx r0 = NULL_RTX;
+      int offset_in_r0 = -1;
+      int sp_in_r0 = 0;
+      int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
+      int total_size, save_size;
+      save_schedule schedule;
+      save_entry *entry;
+      int *tmp_pnt;
+
+      if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
+	  && ! current_function_interrupt)
+	r0 = gen_rtx_REG (Pmode, R0_REG);
+
+      /* D is the actual number of bytes that we need for saving registers,
+	 however, in initial_elimination_offset we have committed to using
+	 an additional TREGS_SPACE amount of bytes - in order to keep both
+	 addresses to arguments supplied by the caller and local variables
+	 valid, we must keep this gap.  Place it between the incoming
+	 arguments and the actually saved registers in a bid to optimize
+	 locality of reference.  */
+      total_size = d + tregs_space;
+      total_size += rounded_frame_size (total_size);
+      save_size = total_size - rounded_frame_size (d);
+      if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
+	d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+			- save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+      /* If adjusting the stack in a single step costs nothing extra, do so.
+	 I.e. either if a single addi is enough, or we need a movi anyway,
+	 and we don't exceed the maximum offset range (the test for the
+	 latter is conservative for simplicity).  */
+      if (TARGET_SHMEDIA
+	  && (CONST_OK_FOR_I10 (-total_size)
+	      || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
+		  && total_size <= 2044)))
+	d_rounding = total_size - save_size;
+
+      offset_base = d + d_rounding;
+
+      output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
+			   0, NULL, true);
+      stack_usage += save_size + d_rounding;
+
+      sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
+      tmp_pnt = schedule.temps;
+      for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
+        {
+	  enum machine_mode mode = (enum machine_mode) entry->mode;
+	  unsigned int reg = entry->reg;
+	  rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
+	  rtx orig_reg_rtx;
+
+	  offset = entry->offset;
+
+	  reg_rtx = gen_rtx_REG (mode, reg);
+
+	  mem_rtx = gen_frame_mem (mode,
+				   gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (offset)));
+
+	  if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
+	    {
+	      gcc_assert (r0);
+	      mem_rtx = NULL_RTX;
+	    }
+
+	  if (HAVE_PRE_DECREMENT
+	      && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
+		  || mem_rtx == NULL_RTX
+		  || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
+	    {
+	      pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
+
+	      if (!memory_address_p (mode, XEXP (pre_dec, 0)))
+		pre_dec = NULL_RTX;
+	      else
+		{
+		  mem_rtx = NULL_RTX;
+		  offset += GET_MODE_SIZE (mode);
+		}
+	    }
+
+	  if (mem_rtx != NULL_RTX)
+	    goto addr_ok;
+
+	  if (offset_in_r0 == -1)
+	    {
+	      emit_move_insn (r0, GEN_INT (offset));
+	      offset_in_r0 = offset;
+	    }
+	  else if (offset != offset_in_r0)
+	    {
+	      emit_move_insn (r0,
+			      gen_rtx_PLUS
+			      (Pmode, r0,
+			       GEN_INT (offset - offset_in_r0)));
+	      offset_in_r0 += offset - offset_in_r0;
+	    }
+
+	  if (pre_dec != NULL_RTX)
+	    {
+	      if (! sp_in_r0)
+		{
+		  emit_move_insn (r0,
+				  gen_rtx_PLUS
+				  (Pmode, r0, stack_pointer_rtx));
+		  sp_in_r0 = 1;
+		}
+
+	      offset -= GET_MODE_SIZE (mode);
+	      offset_in_r0 -= GET_MODE_SIZE (mode);
+
+	      mem_rtx = pre_dec;
+	    }
+	  else if (sp_in_r0)
+	    mem_rtx = gen_frame_mem (mode, r0);
+	  else
+	    mem_rtx = gen_frame_mem (mode,
+				     gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   r0));
+
+	  /* We must not use an r0-based address for target-branch
+	     registers or for special registers without pre-dec
+	     memory addresses, since we store their values in r0
+	     first.  */
+	  gcc_assert (!TARGET_REGISTER_P (reg)
+		      && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
+			  || mem_rtx == pre_dec));
+	  
+	addr_ok:
+	  orig_reg_rtx = reg_rtx;
+	  if (TARGET_REGISTER_P (reg)
+	      || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
+		  && mem_rtx != pre_dec))
+	    {
+	      rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
+
+	      emit_move_insn (tmp_reg, reg_rtx);
+
+	      if (REGNO (tmp_reg) == R0_REG)
+		{
+		  offset_in_r0 = -1;
+		  sp_in_r0 = 0;
+		  gcc_assert (!refers_to_regno_p
+			      (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
+		}
+
+	      if (*++tmp_pnt <= 0)
+		tmp_pnt = schedule.temps;
+
+	      reg_rtx = tmp_reg;
+	    }
+	  {
+	    rtx insn;
+
+	    /* Mark as interesting for dwarf cfi generator */
+	    insn = emit_move_insn (mem_rtx, reg_rtx);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    /* If we use an intermediate register for the save, we can't
+	       describe this exactly in cfi as a copy of the to-be-saved
+	       register into the temporary register and then the temporary
+	       register on the stack, because the temporary register can
+	       have a different natural size than the to-be-saved register.
+	       Thus, we gloss over the intermediate copy and pretend we do
+	       a direct save from the to-be-saved register.  */
+	    if (REGNO (reg_rtx) != reg)
+	      {
+		rtx set;
+
+		set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	      }
+
+	    if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
+	      {
+		rtx reg_rtx = gen_rtx_REG (mode, reg);
+		rtx set;
+		rtx mem_rtx = gen_frame_mem (mode,
+					     gen_rtx_PLUS (Pmode,
+							   stack_pointer_rtx,
+							   GEN_INT (offset)));
+
+		set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	      }
+	  }
+	}
+
+      gcc_assert (entry->offset == d_rounding);
+    }
+  else
+    {
+      push_regs (&live_regs_mask, current_function_interrupt);
+      stack_usage += d;
+    }
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    emit_insn (gen_GOTaddr2picreg ());
+
+  if (SHMEDIA_REGS_STACK_ADJUST ())
+    {
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		       (TARGET_FPU_ANY
+			? "__GCC_push_shmedia_regs"
+			: "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
+      emit_insn (gen_shmedia_save_restore_regs_compact
+		 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
+    }
+
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+
+  target_flags = save_flags;
+
+  output_stack_adjust (-rounded_frame_size (d) + d_rounding,
+		       stack_pointer_rtx, 0, NULL, true);
+  stack_usage += rounded_frame_size (d) - d_rounding;
+
+  if (frame_pointer_needed)
+    frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
+    {
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		      "__GCC_shcompact_incoming_args", SFUNC_GOT);
+      emit_insn (gen_shcompact_incoming_args ());
+    }
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = stack_usage;
+}
+
+void
+sh_expand_epilogue (bool sibcall_p)
+{
+  HARD_REG_SET live_regs_mask;
+  int d, i;
+  int d_rounding = 0;
+
+  int save_flags = target_flags;
+  int frame_size, save_size;
+  int fpscr_deferred = 0;
+  int e = sibcall_p ? -1 : 1;
+
+  d = calc_live_regs (&live_regs_mask);
+
+  save_size = d;
+  frame_size = rounded_frame_size (d);
+
+  if (TARGET_SH5)
+    {
+      int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
+      int total_size;
+      if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
+      d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+		    - d % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+      total_size = d + tregs_space;
+      total_size += rounded_frame_size (total_size);
+      save_size = total_size - frame_size;
+
+      /* If adjusting the stack in a single step costs nothing extra, do so.
+	 I.e. either if a single addi is enough, or we need a movi anyway,
+	 and we don't exceed the maximum offset range (the test for the
+	 latter is conservative for simplicity).  */
+      if (TARGET_SHMEDIA
+	  && ! frame_pointer_needed
+	  && (CONST_OK_FOR_I10 (total_size)
+	      || (! CONST_OK_FOR_I10 (save_size + d_rounding)
+		  && total_size <= 2044)))
+	d_rounding = frame_size;
+
+      frame_size -= d_rounding;
+    }
+
+  if (frame_pointer_needed)
+    {
+      /* We must avoid scheduling the epilogue with previous basic blocks.
+	 See PR/18032 and PR/40313.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
+			   &live_regs_mask, false);
+
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
+    }
+  else if (frame_size)
+    {
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, stack_pointer_rtx, e,
+			   &live_regs_mask, false);
+    }
+
+  if (SHMEDIA_REGS_STACK_ADJUST ())
+    {
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		       (TARGET_FPU_ANY
+			? "__GCC_pop_shmedia_regs"
+			: "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      emit_insn (gen_shmedia_save_restore_regs_compact
+		 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
+    }
+
+  /* Pop all the registers.  */
+
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+  if (TARGET_SH5)
+    {
+      int offset_base, offset;
+      int offset_in_r0 = -1;
+      int sp_in_r0 = 0;
+      rtx r0 = gen_rtx_REG (Pmode, R0_REG);
+      save_schedule schedule;
+      save_entry *entry;
+      int *tmp_pnt;
+
+      entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
+      offset_base = -entry[1].offset + d_rounding;
+      tmp_pnt = schedule.temps;
+      for (; entry->mode != VOIDmode; entry--)
+	{
+	  enum machine_mode mode = (enum machine_mode) entry->mode;
+	  int reg = entry->reg;
+	  rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
+
+	  offset = offset_base + entry->offset;
+	  reg_rtx = gen_rtx_REG (mode, reg);
+
+	  mem_rtx = gen_frame_mem (mode,
+				   gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (offset)));
+
+	  if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
+	    mem_rtx = NULL_RTX;
+
+	  if (HAVE_POST_INCREMENT
+	      && (offset == offset_in_r0
+		  || (offset + GET_MODE_SIZE (mode) != d + d_rounding
+		      && mem_rtx == NULL_RTX)
+		  || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
+	    {
+	      post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
+
+	      if (!memory_address_p (mode, XEXP (post_inc, 0)))
+		post_inc = NULL_RTX;
+	      else
+		mem_rtx = NULL_RTX;
+	    }
+
+	  if (mem_rtx != NULL_RTX)
+	    goto addr_ok;
+
+	  if (offset_in_r0 == -1)
+	    {
+	      emit_move_insn (r0, GEN_INT (offset));
+	      offset_in_r0 = offset;
+	    }
+	  else if (offset != offset_in_r0)
+	    {
+	      emit_move_insn (r0,
+			      gen_rtx_PLUS
+			      (Pmode, r0,
+			       GEN_INT (offset - offset_in_r0)));
+	      offset_in_r0 += offset - offset_in_r0;
+	    }
+
+	  if (post_inc != NULL_RTX)
+	    {
+	      if (! sp_in_r0)
+		{
+		  emit_move_insn (r0,
+				  gen_rtx_PLUS
+				  (Pmode, r0, stack_pointer_rtx));
+		  sp_in_r0 = 1;
+		}
+
+	      mem_rtx = post_inc;
+
+	      offset_in_r0 += GET_MODE_SIZE (mode);
+	    }
+	  else if (sp_in_r0)
+	    mem_rtx = gen_frame_mem (mode, r0);
+	  else
+	    mem_rtx = gen_frame_mem (mode,
+				     gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   r0));
+
+	  gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
+		      || mem_rtx == post_inc);
+
+	addr_ok:
+	  if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
+	      && mem_rtx != post_inc)
+	    {
+	      emit_move_insn (r0, mem_rtx);
+	      mem_rtx = r0;
+	    }
+	  else if (TARGET_REGISTER_P (reg))
+	    {
+	      rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
+
+	      /* Give the scheduler a bit of freedom by using up to
+		 MAX_TEMPS registers in a round-robin fashion.  */
+	      emit_move_insn (tmp_reg, mem_rtx);
+	      mem_rtx = tmp_reg;
+	      if (*++tmp_pnt < 0)
+		tmp_pnt = schedule.temps;
+	    }
+
+	  emit_move_insn (reg_rtx, mem_rtx);
+	}
+
+      gcc_assert (entry->offset + offset_base == d + d_rounding);
+    }
+  else /* ! TARGET_SH5 */
+    {
+      int last_reg;
+
+      save_size = 0;
+	/* For an ISR with RESBANK attribute assigned, don't pop PR
+	   register.  */
+      if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
+	  && !sh_cfun_resbank_handler_p ())	
+	{
+	  if (!frame_pointer_needed)
+	    emit_insn (gen_blockage ());
+	  pop (PR_REG);
+	}
+
+      /* Banked registers are popped first to avoid being scheduled in the
+	 delay slot. RTE switches banks before the ds instruction.  */
+      if (current_function_interrupt)
+	{
+	  bool use_movml = false;
+
+	  if (TARGET_SH2A)
+	    {
+	      unsigned int count = 0;
+
+	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+		if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		  count++;
+		else
+		  break;
+
+	      /* Use movml when all banked register are poped.  */
+	      if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+		use_movml = true;
+	    }
+
+	  if (use_movml)
+	    {
+	      rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	      /* We must avoid scheduling multiple load insn with another
+		 insns.  */
+	      emit_insn (gen_blockage ());
+	      emit_insn (gen_movml_pop_banked (sp_reg));
+	      emit_insn (gen_blockage ());
+	    }
+	  else
+	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		pop (i);
+
+	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
+	}
+      else
+	last_reg = FIRST_PSEUDO_REGISTER;
+
+      for (i = 0; i < last_reg; i++)
+	{
+	  int j = (FIRST_PSEUDO_REGISTER - 1) - i;
+
+	  if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
+	      && hard_reg_set_intersect_p (live_regs_mask,
+					  reg_class_contents[DF_REGS]))
+	    fpscr_deferred = 1;
+	  /* For an ISR with RESBANK attribute assigned, don't pop
+	     following registers, R0-R14, MACH, MACL and GBR.  */
+	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 
+		   && ! (sh_cfun_resbank_handler_p ()
+			 && ((j >= FIRST_GENERAL_REG
+			      && j < LAST_GENERAL_REG)
+			      || j == MACH_REG
+			      || j == MACL_REG
+			      || j == GBR_REG)))
+	    pop (j);
+
+	  if (j == FIRST_FP_REG && fpscr_deferred)
+	    pop (FPSCR_REG);
+	}
+    }
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+  target_flags = save_flags;
+
+  output_stack_adjust (crtl->args.pretend_args_size
+		       + save_size + d_rounding
+		       + crtl->args.info.stack_regs * 8,
+		       stack_pointer_rtx, e, NULL, false);
+
+  if (crtl->calls_eh_return)
+    emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
+			 EH_RETURN_STACKADJ_RTX));
+
+  /* Switch back to the normal stack if necessary.  */
+  if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
+    emit_insn (gen_sp_switch_2 ());
+
+  /* Tell flow the insn that pops PR isn't dead.  */
+  /* PR_REG will never be live in SHmedia mode, and we don't need to
+     USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
+     by the return pattern.  */
+  if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
+    emit_use (gen_rtx_REG (SImode, PR_REG));
+}
+
+static int sh_need_epilogue_known = 0;
+
+int
+sh_need_epilogue (void)
+{
+  if (! sh_need_epilogue_known)
+    {
+      rtx epilogue;
+
+      start_sequence ();
+      sh_expand_epilogue (0);
+      epilogue = get_insns ();
+      end_sequence ();
+      sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
+    }
+  return sh_need_epilogue_known > 0;
+}
+
+/* Emit code to change the current function's return address to RA.
+   TEMP is available as a scratch register, if needed.  */
+
+void
+sh_set_return_address (rtx ra, rtx tmp)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+  int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
+  int pr_offset;
+
+  d = calc_live_regs (&live_regs_mask);
+
+  /* If pr_reg isn't life, we can set it (or the register given in
+     sh_media_register_for_return) directly.  */
+  if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
+    {
+      rtx rr;
+
+      if (TARGET_SHMEDIA)
+	{
+	  int rr_regno = sh_media_register_for_return ();
+
+	  if (rr_regno < 0)
+	    rr_regno = pr_reg;
+
+	  rr = gen_rtx_REG (DImode, rr_regno);
+	}
+      else
+	rr = gen_rtx_REG (SImode, pr_reg);
+
+      emit_insn (GEN_MOV (rr, ra));
+      /* Tell flow the register for return isn't dead.  */
+      emit_use (rr);
+      return;
+    }
+
+  if (TARGET_SH5)
+    {
+      int offset;
+      save_schedule schedule;
+      save_entry *entry;
+
+      entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
+      offset = entry[1].offset;
+      for (; entry->mode != VOIDmode; entry--)
+	if (entry->reg == pr_reg)
+	  goto found;
+
+      /* We can't find pr register.  */
+      gcc_unreachable ();
+
+    found:
+      offset = entry->offset - offset;
+      pr_offset = (rounded_frame_size (d) + offset
+		   + SHMEDIA_REGS_STACK_ADJUST ());
+    }
+  else
+    pr_offset = rounded_frame_size (d);
+
+  emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
+
+  if (frame_pointer_needed)
+    emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
+  else
+    emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
+
+  tmp = gen_frame_mem (Pmode, tmp);
+  emit_insn (GEN_MOV (tmp, ra));
+  /* Tell this store isn't dead.  */
+  emit_use (tmp);
+}
+
+/* Clear variables at function end.  */
+
+static void
+sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  sh_need_epilogue_known = 0;
+}
+
+static rtx
+sh_builtin_saveregs (void)
+{
+  /* First unnamed integer register.  */
+  int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
+  /* Number of integer registers we need to save.  */
+  int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
+  /* First unnamed SFmode float reg */
+  int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
+  /* Number of SFmode float regs to save.  */
+  int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
+  rtx regbuf, fpregs;
+  int bufsize, regno;
+  alias_set_type alias_set;
+
+  if (TARGET_SH5)
+    {
+      if (n_intregs)
+	{
+	  int pushregs = n_intregs;
+
+	  while (pushregs < NPARM_REGS (SImode) - 1
+		 && (CALL_COOKIE_INT_REG_GET
+			(crtl->args.info.call_cookie,
+			 NPARM_REGS (SImode) - pushregs)
+		     == 1))
+	    {
+	      crtl->args.info.call_cookie
+		&= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
+					  - pushregs, 1);
+	      pushregs++;
+	    }
+
+	  if (pushregs == NPARM_REGS (SImode))
+	    crtl->args.info.call_cookie
+	      |= (CALL_COOKIE_INT_REG (0, 1)
+		  | CALL_COOKIE_STACKSEQ (pushregs - 1));
+	  else
+	    crtl->args.info.call_cookie
+	      |= CALL_COOKIE_STACKSEQ (pushregs);
+
+	  crtl->args.pretend_args_size += 8 * n_intregs;
+	}
+      if (TARGET_SHCOMPACT)
+	return const0_rtx;
+    }
+
+  if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
+    {
+      error ("__builtin_saveregs not supported by this subtarget");
+      return const0_rtx;
+    }
+
+  if (TARGET_SHMEDIA)
+    n_floatregs = 0;
+
+  /* Allocate block of memory for the regs.  */
+  /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
+     Or can assign_stack_local accept a 0 SIZE argument?  */
+  bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
+
+  if (TARGET_SHMEDIA)
+    regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
+  else if (n_floatregs & 1)
+    {
+      rtx addr;
+
+      regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
+      addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
+      emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
+      regbuf = change_address (regbuf, BLKmode, addr);
+    }
+  else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
+    {
+      rtx addr, mask;
+
+      regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
+      addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
+      mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
+      emit_insn (gen_andsi3 (addr, addr, mask));
+      regbuf = change_address (regbuf, BLKmode, addr);
+    }
+  else
+    regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
+  alias_set = get_varargs_alias_set ();
+  set_mem_alias_set (regbuf, alias_set);
+
+  /* Save int args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.  */
+  if (n_intregs > 0)
+    move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
+			 adjust_address (regbuf, BLKmode,
+					 n_floatregs * UNITS_PER_WORD),
+			 n_intregs);
+
+  if (TARGET_SHMEDIA)
+    /* Return the address of the regbuf.  */
+    return XEXP (regbuf, 0);
+
+  /* Save float args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.
+     We explicitly build a pointer to the buffer because it halves the insn
+     count when not optimizing (otherwise the pointer is built for each reg
+     saved).
+     We emit the moves in reverse order so that we can use predecrement.  */
+
+  fpregs = copy_to_mode_reg (Pmode,
+			     plus_constant (XEXP (regbuf, 0),
+                                            n_floatregs * UNITS_PER_WORD));
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      rtx mem;
+      for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs,
+				 GEN_INT (-2 * UNITS_PER_WORD)));
+	  mem = change_address (regbuf, DFmode, fpregs);
+	  emit_move_insn (mem,
+			  gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
+	}
+      regno = first_floatreg;
+      if (regno & 1)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
+	  mem = change_address (regbuf, SFmode, fpregs);
+	  emit_move_insn (mem,
+			  gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
+						- (TARGET_LITTLE_ENDIAN != 0)));
+	}
+    }
+  else
+    for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+      {
+        rtx mem;
+
+	emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
+	mem = change_address (regbuf, SFmode, fpregs);
+	emit_move_insn (mem,
+			gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
+      }
+
+  /* Return the address of the regbuf.  */
+  return XEXP (regbuf, 0);
+}
+
+/* Define the `__builtin_va_list' type for the ABI.  */
+
+static tree
+sh_build_builtin_va_list (void)
+{
+  tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+  tree record, type_decl;
+
+  if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
+      || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
+    return ptr_type_node;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_next_o = build_decl (BUILTINS_LOCATION,
+			 FIELD_DECL, get_identifier ("__va_next_o"),
+			 ptr_type_node);
+  f_next_o_limit = build_decl (BUILTINS_LOCATION,
+			       FIELD_DECL,
+			       get_identifier ("__va_next_o_limit"),
+			       ptr_type_node);
+  f_next_fp = build_decl (BUILTINS_LOCATION,
+			  FIELD_DECL, get_identifier ("__va_next_fp"),
+			  ptr_type_node);
+  f_next_fp_limit = build_decl (BUILTINS_LOCATION,
+				FIELD_DECL,
+				get_identifier ("__va_next_fp_limit"),
+				ptr_type_node);
+  f_next_stack = build_decl (BUILTINS_LOCATION,
+			     FIELD_DECL, get_identifier ("__va_next_stack"),
+			     ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_next_o) = record;
+  DECL_FIELD_CONTEXT (f_next_o_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_fp) = record;
+  DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_stack) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_next_o;
+  DECL_CHAIN (f_next_o) = f_next_o_limit;
+  DECL_CHAIN (f_next_o_limit) = f_next_fp;
+  DECL_CHAIN (f_next_fp) = f_next_fp_limit;
+  DECL_CHAIN (f_next_fp_limit) = f_next_stack;
+
+  layout_type (record);
+
+  return record;
+}
+
+/* Implement `va_start' for varargs and stdarg.  */
+
+static void
+sh_va_start (tree valist, rtx nextarg)
+{
+  tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+  tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
+  tree t, u;
+  int nfp, nint;
+
+  if (TARGET_SH5)
+    {
+      expand_builtin_saveregs ();
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  if ((! TARGET_SH2E && ! TARGET_SH4)
+      || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
+    {
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  f_next_o = TYPE_FIELDS (va_list_type_node);
+  f_next_o_limit = DECL_CHAIN (f_next_o);
+  f_next_fp = DECL_CHAIN (f_next_o_limit);
+  f_next_fp_limit = DECL_CHAIN (f_next_fp);
+  f_next_stack = DECL_CHAIN (f_next_fp_limit);
+
+  next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
+		   NULL_TREE);
+  next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
+			 valist, f_next_o_limit, NULL_TREE);
+  next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
+		    NULL_TREE);
+  next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
+			  valist, f_next_fp_limit, NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* Call __builtin_saveregs.  */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
+  if (nfp < 8)
+    nfp = 8 - nfp;
+  else
+    nfp = 0;
+  u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
+		   size_int (UNITS_PER_WORD * nfp));
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  nint = crtl->args.info.arg_count[SH_ARG_INT];
+  if (nint < 4)
+    nint = 4 - nint;
+  else
+    nint = 0;
+  u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u,
+		   size_int (UNITS_PER_WORD * nint));
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  u = make_tree (ptr_type_node, nextarg);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* TYPE is a RECORD_TYPE.  If there is only a single nonzero-sized
+   member, return it.  */
+static tree
+find_sole_member (tree type)
+{
+  tree field, member = NULL_TREE;
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+      if (!DECL_SIZE (field))
+	return NULL_TREE;
+      if (integer_zerop (DECL_SIZE (field)))
+	continue;
+      if (member)
+	return NULL_TREE;
+      member = field;
+    }
+  return member;
+}
+/* Implement `va_arg'.  */
+
+static tree
+sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			 gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size, rsize;
+  tree tmp, pptr_type_node;
+  tree addr, lab_over = NULL, result = NULL;
+  int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
+  tree eff_type;
+
+  if (pass_by_ref)
+    type = build_pointer_type (type);
+
+  size = int_size_in_bytes (type);
+  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+  pptr_type_node = build_pointer_type (ptr_type_node);
+
+  if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
+      && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
+    {
+      tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+      tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
+      int pass_as_float;
+      tree lab_false;
+      tree member;
+
+      f_next_o = TYPE_FIELDS (va_list_type_node);
+      f_next_o_limit = DECL_CHAIN (f_next_o);
+      f_next_fp = DECL_CHAIN (f_next_o_limit);
+      f_next_fp_limit = DECL_CHAIN (f_next_fp);
+      f_next_stack = DECL_CHAIN (f_next_fp_limit);
+
+      next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
+		       NULL_TREE);
+      next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
+			     valist, f_next_o_limit, NULL_TREE);
+      next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
+		        valist, f_next_fp, NULL_TREE);
+      next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
+			      valist, f_next_fp_limit, NULL_TREE);
+      next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+			   valist, f_next_stack, NULL_TREE);
+
+      /* Structures with a single member with a distinct mode are passed
+	 like their member.  This is relevant if the latter has a REAL_TYPE
+	 or COMPLEX_TYPE type.  */
+      eff_type = type;
+      while (TREE_CODE (eff_type) == RECORD_TYPE
+	     && (member = find_sole_member (eff_type))
+	     && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
+		 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
+		 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
+	{
+	  tree field_type = TREE_TYPE (member);
+
+	  if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
+	    eff_type = field_type;
+	  else
+	    {
+	      gcc_assert ((TYPE_ALIGN (eff_type)
+			   < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
+			  || (TYPE_ALIGN (eff_type)
+			      > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
+	      break;
+	    }
+	}
+
+      if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+	{
+	  pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
+			   || (TREE_CODE (eff_type) == COMPLEX_TYPE
+			       && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
+			       && size <= 16));
+	}
+      else
+	{
+	  pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
+	}
+
+      addr = create_tmp_var (pptr_type_node, NULL);
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      valist = build_simple_mem_ref (addr);
+
+      if (pass_as_float)
+	{
+	  tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
+	  tree cmp;
+	  bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+
+	  gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
+	  tmp = next_fp_limit;
+	  if (size > 4 && !is_double)
+	    tmp = build2 (POINTER_PLUS_EXPR, TREE_TYPE (tmp),
+			  unshare_expr (tmp), size_int (4 - size));
+	  tmp = build2 (GE_EXPR, boolean_type_node,
+			unshare_expr (next_fp_tmp), unshare_expr (tmp));
+	  cmp = build3 (COND_EXPR, void_type_node, tmp,
+		        build1 (GOTO_EXPR, void_type_node,
+				unshare_expr (lab_false)), NULL_TREE);
+	  if (!is_double)
+	    gimplify_and_add (cmp, pre_p);
+
+	  if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
+	      || (is_double || size == 16))
+	    {
+	      tmp = fold_convert (sizetype, next_fp_tmp);
+	      tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
+			    size_int (UNITS_PER_WORD));
+	      tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+			    unshare_expr (next_fp_tmp), tmp);
+	      gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
+	    }
+	  if (is_double)
+	    gimplify_and_add (cmp, pre_p);
+
+#ifdef FUNCTION_ARG_SCmode_WART
+	  if (TYPE_MODE (eff_type) == SCmode
+	      && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
+	    {
+	      tree subtype = TREE_TYPE (eff_type);
+	      tree real, imag;
+
+	      imag
+		= std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
+	      imag = get_initialized_tmp_var (imag, pre_p, NULL);
+
+	      real
+		= std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
+	      real = get_initialized_tmp_var (real, pre_p, NULL);
+
+	      result = build2 (COMPLEX_EXPR, eff_type, real, imag);
+	      if (type != eff_type)
+		result = build1 (VIEW_CONVERT_EXPR, type, result);
+	      result = get_initialized_tmp_var (result, pre_p, NULL);
+	    }
+#endif /* FUNCTION_ARG_SCmode_WART */
+
+	  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+	  gimplify_assign (unshare_expr (next_fp_tmp),
+			   unshare_expr (valist), pre_p);
+
+	  gimplify_assign (unshare_expr (valist),
+			   unshare_expr (next_fp_tmp), post_p);
+	  valist = next_fp_tmp;
+	}
+      else
+	{
+	  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
+			unshare_expr (next_o), size_int (rsize));
+	  tmp = build2 (GT_EXPR, boolean_type_node, tmp,
+			unshare_expr (next_o_limit));
+	  tmp = build3 (COND_EXPR, void_type_node, tmp,
+		        build1 (GOTO_EXPR, void_type_node,
+				unshare_expr (lab_false)),
+			NULL_TREE);
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+
+	  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
+	  gimplify_and_add (tmp, pre_p);
+
+	  if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
+	    gimplify_assign (unshare_expr (next_o),
+			     unshare_expr (next_o_limit), pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+	}
+
+      if (!result)
+	{
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+	}
+    }
+
+  /* ??? In va-sh.h, there had been code to make values larger than
+     size 8 indirect.  This does not match the FUNCTION_ARG macros.  */
+
+  tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
+  if (result)
+    {
+      gimplify_assign (result, tmp, pre_p);
+      result = build1 (NOP_EXPR, TREE_TYPE (result), result);
+      tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
+      gimplify_and_add (tmp, pre_p);
+    }
+  else
+    result = tmp;
+
+  if (pass_by_ref)
+    result = build_va_arg_indirect_ref (result);
+
+  return result;
+}
+
+/* 64 bit floating points memory transfers are paired single precision loads
+   or store. So DWARF information needs fixing in little endian (unless
+   PR=SZ=1 in FPSCR).  */
+rtx
+sh_dwarf_register_span (rtx reg)
+{
+  unsigned regno = REGNO (reg);
+
+  if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
+    return NULL_RTX;
+
+  return
+    gen_rtx_PARALLEL (VOIDmode,
+		      gen_rtvec (2,
+				 gen_rtx_REG (SFmode,
+					      DBX_REGISTER_NUMBER (regno+1)),
+				 gen_rtx_REG (SFmode,
+					      DBX_REGISTER_NUMBER (regno))));
+}
+
+static enum machine_mode
+sh_promote_function_mode (const_tree type, enum machine_mode mode,
+			  int *punsignedp, const_tree funtype,
+			  int for_return)
+{
+  if (sh_promote_prototypes (funtype))
+    return promote_mode (type, mode, punsignedp);
+  else
+    return default_promote_function_mode (type, mode, punsignedp, funtype,
+					  for_return);
+}
+
+static bool
+sh_promote_prototypes (const_tree type)
+{
+  if (TARGET_HITACHI)
+    return 0;
+  if (! type)
+    return 1;
+  return ! sh_attr_renesas_p (type);
+}
+
+/* Whether an argument must be passed by reference.  On SHcompact, we
+   pretend arguments wider than 32-bits that would have been passed in
+   registers are passed by reference, so that an SHmedia trampoline
+   loads them into the full 64-bits registers.  */
+
+static int
+shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
+      && (!named
+	  || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
+	  || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
+	      && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
+      && size > 4
+      && !SHCOMPACT_FORCE_ON_STACK (mode, type)
+      && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
+    return size;
+  else
+    return 0;
+}
+
+static bool
+sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		      const_tree type, bool named)
+{
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return true;
+
+  /* ??? std_gimplify_va_arg_expr passes NULL for cum.  That function
+     wants to know about pass-by-reference semantics for incoming
+     arguments.  */
+  if (! cum)
+    return false;
+
+  if (TARGET_SHCOMPACT)
+    {
+      cum->byref = shcompact_byref (cum, mode, type, named);
+      return cum->byref != 0;
+    }
+
+  return false;
+}
+
+static bool
+sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* ??? How can it possibly be correct to return true only on the
+     caller side of the equation?  Is there someplace else in the
+     sh backend that's magically producing the copies?  */
+  return (cum->outgoing
+	  && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
+	      % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
+}
+
+static int
+sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		      tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words = 0;
+
+  if (!TARGET_SH5
+      && PASS_IN_REG_P (*cum, mode, type)
+      && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+      && (ROUND_REG (*cum, mode)
+	  + (mode != BLKmode
+	     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+	     : ROUND_ADVANCE (int_size_in_bytes (type)))
+	  > NPARM_REGS (mode)))
+    words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
+
+  else if (!TARGET_SHCOMPACT
+	   && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
+    words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
+
+  return words * UNITS_PER_WORD;
+}
+
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On SH the first args are normally in registers
+   and the rest are pushed.  Any arg that starts within the first
+   NPARM_REGS words is at least partially passed in a register unless
+   its data type forbids.  */
+
+static rtx
+sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  if (! TARGET_SH5 && mode == VOIDmode)
+    return GEN_INT (ca->renesas_abi ? 1 : 0);
+
+  if (! TARGET_SH5
+      && PASS_IN_REG_P (*ca, mode, type)
+      && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
+    {
+      int regno;
+
+      if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
+	  && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
+	{
+	  rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SFmode,
+						   BASE_ARG_REG (mode)
+						   + (ROUND_REG (*ca, mode) ^ 1)),
+				      const0_rtx);
+	  rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SFmode,
+						   BASE_ARG_REG (mode)
+						   + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
+				      GEN_INT (4));
+	  return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
+	}
+
+     /* If the alignment of a DF value causes an SF register to be
+	skipped, we will use that skipped register for the next SF
+	value.  */
+      if ((TARGET_HITACHI || ca->renesas_abi)
+	  && ca->free_single_fp_reg
+	  && mode == SFmode)
+	return gen_rtx_REG (mode, ca->free_single_fp_reg);
+
+      regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
+	       ^ (mode == SFmode && TARGET_SH4
+		  && TARGET_LITTLE_ENDIAN != 0
+		  && ! TARGET_HITACHI && ! ca->renesas_abi);
+      return gen_rtx_REG (mode, regno);
+
+    }
+
+  if (TARGET_SH5)
+    {
+      if (mode == VOIDmode && TARGET_SHCOMPACT)
+	return GEN_INT (ca->call_cookie);
+
+      /* The following test assumes unnamed arguments are promoted to
+	 DFmode.  */
+      if (mode == SFmode && ca->free_single_fp_reg)
+	return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
+
+      if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
+	  && (named || ! ca->prototype_p)
+	  && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
+	{
+	  if (! ca->prototype_p && TARGET_SHMEDIA)
+	    return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
+
+	  return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
+					   FIRST_FP_PARM_REG
+					   + ca->arg_count[(int) SH_ARG_FLOAT]);
+	}
+
+      if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
+	  && (! TARGET_SHCOMPACT
+	      || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
+		  && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
+						   type, named))))
+	{
+	  return gen_rtx_REG (mode, (FIRST_PARM_REG
+				       + ca->arg_count[(int) SH_ARG_INT]));
+	}
+
+      return 0;
+    }
+
+  return 0;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be
+   available.)  */
+
+static void
+sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
+			 const_tree type, bool named)
+{
+  if (ca->force_mem)
+    ca->force_mem = 0;
+  else if (TARGET_SH5)
+    {
+      const_tree type2 = (ca->byref && type
+			  ? TREE_TYPE (type)
+			  : type);
+      enum machine_mode mode2 = (ca->byref && type
+				 ? TYPE_MODE (type2)
+				 : mode);
+      int dwords = ((ca->byref
+		     ? ca->byref
+		     : mode2 == BLKmode
+		     ? int_size_in_bytes (type2)
+		     : GET_MODE_SIZE (mode2)) + 7) / 8;
+      int numregs = MIN (dwords, NPARM_REGS (SImode)
+			 - ca->arg_count[(int) SH_ARG_INT]);
+
+      if (numregs)
+	{
+	  ca->arg_count[(int) SH_ARG_INT] += numregs;
+	  if (TARGET_SHCOMPACT
+	      && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
+	    {
+	      ca->call_cookie
+		|= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					- numregs, 1);
+	      /* N.B. We want this also for outgoing.  */
+	      ca->stack_regs += numregs;
+	    }
+	  else if (ca->byref)
+	    {
+	      if (! ca->outgoing)
+		ca->stack_regs += numregs;
+	      ca->byref_regs += numregs;
+	      ca->byref = 0;
+	      do
+		ca->call_cookie
+		  |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					  - numregs, 2);
+	      while (--numregs);
+	      ca->call_cookie
+		|= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					- 1, 1);
+	    }
+	  else if (dwords > numregs)
+	    {
+	      int pushregs = numregs;
+
+	      if (TARGET_SHCOMPACT)
+		ca->stack_regs += numregs;
+	      while (pushregs < NPARM_REGS (SImode) - 1
+		     && (CALL_COOKIE_INT_REG_GET
+			 (ca->call_cookie,
+			  NPARM_REGS (SImode) - pushregs)
+			 == 1))
+		{
+		  ca->call_cookie
+		    &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
+					      - pushregs, 1);
+		  pushregs++;
+		}
+	      if (numregs == NPARM_REGS (SImode))
+		ca->call_cookie
+		  |= CALL_COOKIE_INT_REG (0, 1)
+		  | CALL_COOKIE_STACKSEQ (numregs - 1);
+	      else
+		ca->call_cookie
+		  |= CALL_COOKIE_STACKSEQ (numregs);
+	    }
+	}
+      if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
+	  && (named || ! ca->prototype_p))
+	{
+	  if (mode2 == SFmode && ca->free_single_fp_reg)
+	    ca->free_single_fp_reg = 0;
+	  else if (ca->arg_count[(int) SH_ARG_FLOAT]
+		   < NPARM_REGS (SFmode))
+	    {
+	      int numfpregs
+		= MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
+		       NPARM_REGS (SFmode)
+		       - ca->arg_count[(int) SH_ARG_FLOAT]);
+
+	      ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
+
+	      if (TARGET_SHCOMPACT && ! ca->prototype_p)
+		{
+		  if (ca->outgoing && numregs > 0)
+		    do
+		      {
+			ca->call_cookie
+			  |= (CALL_COOKIE_INT_REG
+			      (ca->arg_count[(int) SH_ARG_INT]
+			       - numregs + ((numfpregs - 2) / 2),
+			       4 + (ca->arg_count[(int) SH_ARG_FLOAT]
+				    - numfpregs) / 2));
+		      }
+		    while (numfpregs -= 2);
+		}
+	      else if (mode2 == SFmode && (named)
+		       && (ca->arg_count[(int) SH_ARG_FLOAT]
+			   < NPARM_REGS (SFmode)))
+		ca->free_single_fp_reg
+		  = FIRST_FP_PARM_REG - numfpregs
+		  + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
+	    }
+	}
+      return;
+    }
+
+  if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
+    {
+      /* Note that we've used the skipped register.  */
+      if (mode == SFmode && ca->free_single_fp_reg)
+	{
+	  ca->free_single_fp_reg = 0;
+	  return;
+	}
+      /* When we have a DF after an SF, there's an SF register that get
+	 skipped in order to align the DF value.  We note this skipped
+	 register, because the next SF value will use it, and not the
+	 SF that follows the DF.  */
+      if (mode == DFmode
+	  && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
+	{
+	  ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
+				    + BASE_ARG_REG (mode));
+	}
+    }
+
+  if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
+      || PASS_IN_REG_P (*ca, mode, type))
+    (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
+     = (ROUND_REG (*ca, mode)
+	+ (mode == BLKmode
+	   ? ROUND_ADVANCE (int_size_in_bytes (type))
+	   : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
+}
+
+/* The Renesas calling convention doesn't quite fit into this scheme since
+   the address is passed like an invisible argument, but one that is always
+   passed in memory.  */
+static rtx
+sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
+{
+  if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
+    return 0;
+  return gen_rtx_REG (Pmode, 2);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   For the SH, this is like LIBCALL_VALUE, except that we must change the
+   mode like PROMOTE_MODE does.
+   ??? PROMOTE_MODE is ignored for non-scalar types.  The set of types
+   tested here has to be kept in sync with the one in explow.c:promote_mode.
+*/
+
+static rtx
+sh_function_value (const_tree valtype,
+		   const_tree fn_decl_or_type,
+		   bool outgoing ATTRIBUTE_UNUSED)
+{
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    fn_decl_or_type = NULL;
+
+  return gen_rtx_REG (
+	   ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
+	     && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
+	     && (TREE_CODE (valtype) == INTEGER_TYPE
+		 || TREE_CODE (valtype) == ENUMERAL_TYPE
+		 || TREE_CODE (valtype) == BOOLEAN_TYPE
+		 || TREE_CODE (valtype) == REAL_TYPE
+		 || TREE_CODE (valtype) == OFFSET_TYPE))
+	    && sh_promote_prototypes (fn_decl_or_type)
+	    ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
+	   BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
+}
+
+/* Return true if N is a possible register number of function value.  */
+
+static bool
+sh_function_value_regno_p (const unsigned int regno)
+{
+  return ((regno) == FIRST_RET_REG 
+	  || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
+	  || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+sh_return_in_memory (const_tree type, const_tree fndecl)
+{
+  if (TARGET_SH5)
+    {
+      if (TYPE_MODE (type) == BLKmode)
+	return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
+      else
+	return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
+    }
+  else
+    {
+      return (TYPE_MODE (type) == BLKmode
+	      || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
+		  && TREE_CODE (type) == RECORD_TYPE));
+    }
+}
+
+/* We actually emit the code in sh_expand_prologue.  We used to use
+   a static variable to flag that we need to emit this code, but that
+   doesn't when inlining, when functions are deferred and then emitted
+   later.  Fortunately, we already have two flags that are part of struct
+   function that tell if a function uses varargs or stdarg.  */
+static void
+sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
+			   enum machine_mode mode,
+			   tree type,
+			   int *pretend_arg_size,
+			   int second_time ATTRIBUTE_UNUSED)
+{
+  gcc_assert (cfun->stdarg);
+  if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
+    {
+      int named_parm_regs, anon_parm_regs;
+
+      named_parm_regs = (ROUND_REG (*ca, mode)
+			 + (mode == BLKmode
+			    ? ROUND_ADVANCE (int_size_in_bytes (type))
+			    : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
+      anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
+      if (anon_parm_regs > 0)
+	*pretend_arg_size = anon_parm_regs * 4;
+    }
+}
+
+static bool
+sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
+{
+  return TARGET_SH5;
+}
+
+static bool
+sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
+{
+  return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
+}
+
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+initial_elimination_offset (int from, int to)
+{
+  int regs_saved;
+  int regs_saved_rounding = 0;
+  int total_saved_regs_space;
+  int total_auto_space;
+  int save_flags = target_flags;
+  int copy_flags;
+  HARD_REG_SET live_regs_mask;
+
+  shmedia_space_reserved_for_target_registers = false;
+  regs_saved = calc_live_regs (&live_regs_mask);
+  regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
+
+  if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
+    {
+      shmedia_space_reserved_for_target_registers = true;
+      regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
+    }
+
+  if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
+    regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+			   - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+  total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
+  copy_flags = target_flags;
+  target_flags = save_flags;
+
+  total_saved_regs_space = regs_saved + regs_saved_rounding;
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space
+      + crtl->args.info.byref_regs * 8;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space
+      + crtl->args.info.byref_regs * 8;
+
+  /* Initial gap between fp and sp is 0.  */
+  if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return 0;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return rounded_frame_size (0);
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return rounded_frame_size (0);
+
+  gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
+	      && (to == HARD_FRAME_POINTER_REGNUM
+		  || to == STACK_POINTER_REGNUM));
+  if (TARGET_SH5)
+    {
+      int n = total_saved_regs_space;
+      int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
+      save_schedule schedule;
+      save_entry *entry;
+      
+      n += total_auto_space;
+      
+      /* If it wasn't saved, there's not much we can do.  */
+      if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
+	return n;
+      
+      target_flags = copy_flags;
+      
+      sh5_schedule_saves (&live_regs_mask, &schedule, n);
+      for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
+	if (entry->reg == pr_reg)
+	  {
+	    target_flags = save_flags;
+	    return entry->offset;
+	  }
+      gcc_unreachable ();
+    }
+  else
+    return total_auto_space;
+}
+
+/* Parse the -mfixed-range= option string.  */
+void
+sh_fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+  
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  */
+  
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+  
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+      
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+      
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+      
+      *dash = '-';
+      
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+      
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Insert any deferred function attributes from earlier pragmas.  */
+static void
+sh_insert_attributes (tree node, tree *attributes)
+{
+  tree attrs;
+
+  if (TREE_CODE (node) != FUNCTION_DECL)
+    return;
+
+  /* We are only interested in fields.  */
+  if (!DECL_P (node))
+    return;
+
+  /* Append the attributes to the deferred attributes.  */
+  *sh_deferred_function_attributes_tail = *attributes;
+  attrs = sh_deferred_function_attributes;
+  if (!attrs)
+    return;
+
+  /* Some attributes imply or require the interrupt attribute.  */
+  if (!lookup_attribute ("interrupt_handler", attrs)
+      && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
+    {
+      /* If we have a trapa_handler, but no interrupt_handler attribute,
+	 insert an interrupt_handler attribute.  */
+      if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
+	/* We can't use sh_pr_interrupt here because that's not in the
+	   java frontend.  */
+	attrs
+	  = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
+      /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
+	 if the interrupt attribute is missing, we ignore the attribute
+	 and warn.  */
+      else if (lookup_attribute ("sp_switch", attrs)
+	       || lookup_attribute ("trap_exit", attrs)
+	       || lookup_attribute ("nosave_low_regs", attrs)
+	       || lookup_attribute ("resbank", attrs))
+	{
+	  tree *tail;
+
+	  for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
+	    {
+	      if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
+		warning (OPT_Wattributes,
+			 "%qE attribute only applies to interrupt functions",
+			 TREE_PURPOSE (attrs));
+	      else
+		{
+		  *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
+				     NULL_TREE);
+		  tail = &TREE_CHAIN (*tail);
+		}
+	    }
+	  attrs = *attributes;
+	}
+    }
+
+  /* Install the processed list.  */
+  *attributes = attrs;
+
+  /* Clear deferred attributes.  */
+  sh_deferred_function_attributes = NULL_TREE;
+  sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
+
+  return;
+}
+
+/* Supported attributes:
+
+   interrupt_handler -- specifies this function is an interrupt handler.
+
+   trapa_handler - like above, but don't save all registers.
+
+   sp_switch -- specifies an alternate stack for an interrupt handler
+   to run on.
+
+   trap_exit -- use a trapa to exit an interrupt function instead of
+   an rte instruction.
+
+   nosave_low_regs - don't save r0..r7 in an interrupt handler.
+     This is useful on the SH3 and upwards,
+     which has a separate set of low regs for User and Supervisor modes.
+     This should only be used for the lowest level of interrupts.  Higher levels
+     of interrupts must save the registers in case they themselves are
+     interrupted.
+
+   renesas -- use Renesas calling/layout conventions (functions and
+   structures).
+
+   resbank -- In case of an ISR, use a register bank to save registers
+   R0-R14, MACH, MACL, GBR and PR.  This is useful only on SH2A targets.
+*/
+
+/* Handle a 'resbank' attribute.  */
+static tree
+sh_handle_resbank_handler_attribute (tree * node, tree name,
+                                     tree args ATTRIBUTE_UNUSED,
+                                     int flags ATTRIBUTE_UNUSED,
+                                     bool * no_add_attrs)
+{
+  if (!TARGET_SH2A)
+    {
+      warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
+               name);
+      *no_add_attrs = true;
+    }
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+               name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt_handler" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_interrupt_handler_attribute (tree *node, tree name,
+                                       tree args ATTRIBUTE_UNUSED,
+                                       int flags ATTRIBUTE_UNUSED,
+                                       bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+               name);
+      *no_add_attrs = true;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      error ("attribute interrupt_handler is not compatible with -m5-compact");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an 'function_vector' attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
+                                               tree args ATTRIBUTE_UNUSED,
+                                               int flags ATTRIBUTE_UNUSED,
+                                               bool * no_add_attrs)
+{
+  if (!TARGET_SH2A)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
+               name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+               name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes,
+               "%qE attribute argument not an integer constant",
+               name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
+    {
+      /* The argument value must be between 0 to 255.  */
+      warning (OPT_Wattributes,
+               "%qE attribute argument should be between 0 to 255",
+               name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+/* Returns 1 if current function has been assigned the attribute
+   'function_vector'.  */
+int
+sh2a_is_function_vector_call (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      tree tr = SYMBOL_REF_DECL (x);
+
+      if (sh2a_function_vector_p (tr))
+        return 1;
+    }
+
+  return 0;
+}
+
+/* Returns the function vector number, if the the attribute
+   'function_vector' is assigned, otherwise returns zero.  */
+int
+sh2a_get_function_vector_number (rtx x)
+{
+  int num;
+  tree list, t;
+
+  if ((GET_CODE (x) == SYMBOL_REF)
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      t = SYMBOL_REF_DECL (x);
+
+      if (TREE_CODE (t) != FUNCTION_DECL)
+        return 0;
+
+      list = SH_ATTRIBUTES (t);
+      while (list)
+        {
+          if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+            {
+              num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
+              return num;
+            }
+
+          list = TREE_CHAIN (list);
+        }
+
+      return 0;
+    }
+  else
+    return 0;
+}
+
+/* Handle an "sp_switch" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
+			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+    {
+      /* The argument must be a constant string.  */
+      warning (OPT_Wattributes, "%qE attribute argument not a string constant",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "trap_exit" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
+			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  /* The argument specifies a trap number to be used in a trapa instruction
+     at function exit (instead of an rte instruction).  */
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes, "%qE attribute argument not an "
+	       "integer constant", name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
+			     tree name ATTRIBUTE_UNUSED,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
+
+/* True if __attribute__((renesas)) or -mrenesas.  */
+int
+sh_attr_renesas_p (const_tree td)
+{
+  if (TARGET_HITACHI)
+    return 1;
+  if (td == 0)
+    return 0;
+  if (DECL_P (td))
+    td = TREE_TYPE (td);
+  if (td == error_mark_node)
+    return 0;
+  return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
+	  != NULL_TREE);
+}
+
+/* True if __attribute__((renesas)) or -mrenesas, for the current
+   function.  */
+int
+sh_cfun_attr_renesas_p (void)
+{
+  return sh_attr_renesas_p (current_function_decl);
+}
+
+int
+sh_cfun_interrupt_handler_p (void)
+{
+  return (lookup_attribute ("interrupt_handler",
+			    DECL_ATTRIBUTES (current_function_decl))
+	  != NULL_TREE);
+}
+
+/* Returns 1 if FUNC has been assigned the attribute
+   "function_vector".  */
+int
+sh2a_function_vector_p (tree func)
+{
+  tree list;
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  list = SH_ATTRIBUTES (func);
+  while (list)
+    {
+      if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+        return 1;
+
+      list = TREE_CHAIN (list);
+    }
+  return 0;
+}
+
+/* Returns TRUE if given tree has the "resbank" attribute.  */
+
+int
+sh_cfun_resbank_handler_p (void)
+{
+  return ((lookup_attribute ("resbank",
+                             DECL_ATTRIBUTES (current_function_decl))
+           != NULL_TREE)
+          && (lookup_attribute ("interrupt_handler",
+                                DECL_ATTRIBUTES (current_function_decl))
+              != NULL_TREE) && TARGET_SH2A);
+}
+
+/* Implement TARGET_CHECK_PCH_TARGET_FLAGS.  */
+
+static const char *
+sh_check_pch_target_flags (int old_flags)
+{
+  if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
+				    | MASK_SH_E | MASK_HARD_SH4
+				    | MASK_FPU_SINGLE | MASK_SH4))
+    return _("created and used with different architectures / ABIs");
+  if ((old_flags ^ target_flags) & MASK_HITACHI)
+    return _("created and used with different ABIs");
+  if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
+    return _("created and used with different endianness");
+  return NULL;
+}
+
+/* Predicates used by the templates.  */
+
+/* Returns 1 if OP is MACL, MACH or PR.  The input must be a REG rtx.
+   Used only in general_movsrc_operand.  */
+
+int
+system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (REGNO (op))
+    {
+    case PR_REG:
+    case MACL_REG:
+    case MACH_REG:
+      return 1;
+    }
+  return 0;
+}
+
+/* Nonzero if OP is a floating point value with value 0.0.  */
+
+int
+fp_zero_operand (rtx op)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
+}
+
+/* Nonzero if OP is a floating point value with value 1.0.  */
+
+int
+fp_one_operand (rtx op)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return 0;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst1);
+}
+
+/* In general mode switching is used.  If we are
+   compiling without -mfmovd, movsf_ie isn't taken into account for
+   mode switching.  We could check in machine_dependent_reorg for
+   cases where we know we are in single precision mode, but there is
+   interface to find that out during reload, so we must avoid
+   choosing an fldi alternative during reload and thus failing to
+   allocate a scratch register for the constant loading.  */
+int
+fldi_ok (void)
+{
+  return 1;
+}
+
+int
+tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code = GET_CODE (op);
+  return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
+}
+
+/* Return the TLS type for TLS symbols, 0 for otherwise.  */
+enum tls_model
+tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (op) != SYMBOL_REF)
+    return TLS_MODEL_NONE;
+  return SYMBOL_REF_TLS_MODEL (op);
+}
+
+/* Return the destination address of a branch.  */
+
+static int
+branch_dest (rtx branch)
+{
+  rtx dest = SET_SRC (PATTERN (branch));
+  int dest_uid;
+
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, 1);
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+  return INSN_ADDRESSES (dest_uid);
+}
+
+/* Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+int
+reg_unused_after (rtx reg, rtx insn)
+{
+  enum rtx_code code;
+  rtx set;
+
+  /* If the reg is set by this instruction, then it is safe for our
+     case.  Disregard the case where this is a store to memory, since
+     we are checking a register used in the store address.  */
+  set = single_set (insn);
+  if (set && !MEM_P (SET_DEST (set))
+      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+    return 1;
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      rtx set;
+      if (!INSN_P (insn))
+	continue;
+
+      code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 if dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return 1;
+      /* else */
+#endif
+
+      if (code == JUMP_INSN)
+	return 0;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+	      rtx set = single_set (this_insn);
+
+	      if (CALL_P (this_insn))
+		code = CALL_INSN;
+	      else if (JUMP_P (this_insn))
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return 0;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return 0;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (!MEM_P (SET_DEST (set)))
+		    retval = 1;
+		  else
+		    return 0;
+		}
+	      if (set == 0
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return 0;
+	    }
+	  if (retval == 1)
+	    return 1;
+	  else if (code == JUMP_INSN)
+	    return 0;
+	}
+
+      set = single_set (insn);
+      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	return 0;
+      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return !MEM_P (SET_DEST (set));
+      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	return 0;
+
+      if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
+	return 1;
+    }
+  return 1;
+}
+
+#include "ggc.h"
+
+static GTY(()) rtx fpscr_rtx;
+rtx
+get_fpscr_rtx (void)
+{
+  if (! fpscr_rtx)
+    {
+      fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
+      REG_USERVAR_P (fpscr_rtx) = 1;
+      mark_user_reg (fpscr_rtx);
+    }
+  if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
+    mark_user_reg (fpscr_rtx);
+  return fpscr_rtx;
+}
+
+static GTY(()) tree fpscr_values;
+
+static void
+emit_fpu_switch (rtx scratch, int index)
+{
+  rtx dst, src;
+
+  if (fpscr_values == NULL)
+    {
+      tree t;
+
+      t = build_index_type (integer_one_node);
+      t = build_array_type (integer_type_node, t);
+      t = build_decl (BUILTINS_LOCATION,
+		      VAR_DECL, get_identifier ("__fpscr_values"), t);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_IGNORED_P (t) = 1;
+      DECL_EXTERNAL (t) = 1;
+      TREE_STATIC (t) = 1;
+      TREE_PUBLIC (t) = 1;
+      TREE_USED (t) = 1;
+
+      fpscr_values = t;
+    }
+
+  src = DECL_RTL (fpscr_values);
+  if (!can_create_pseudo_p ())
+    {
+      emit_move_insn (scratch, XEXP (src, 0));
+      if (index != 0)
+	emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
+      src = adjust_automodify_address (src, PSImode, scratch, index * 4);
+    }
+  else
+    src = adjust_address (src, PSImode, index * 4);
+
+  dst = get_fpscr_rtx ();
+  emit_move_insn (dst, src);
+}
+
+void
+emit_sf_insn (rtx pat)
+{
+  emit_insn (pat);
+}
+
+void
+emit_df_insn (rtx pat)
+{
+  emit_insn (pat);
+}
+
+void
+expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
+			 get_fpscr_rtx ()));
+}
+
+void
+expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
+			get_fpscr_rtx ()));
+}
+
+static rtx get_free_reg (HARD_REG_SET);
+
+/* This function returns a register to use to load the address to load
+   the fpscr from.  Currently it always returns r1 or r7, but when we are
+   able to use pseudo registers after combine, or have a better mechanism
+   for choosing a register, it should be done here.  */
+/* REGS_LIVE is the liveness information for the point for which we
+   need this allocation.  In some bare-bones exit blocks, r1 is live at the
+   start.  We can even have all of r0..r3 being live:
+__complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
+   INSN before which new insns are placed with will clobber the register
+   we return.  If a basic block consists only of setting the return value
+   register to a pseudo and using that register, the return value is not
+   live before or after this block, yet we we'll insert our insns right in
+   the middle.  */
+
+static rtx
+get_free_reg (HARD_REG_SET regs_live)
+{
+  if (! TEST_HARD_REG_BIT (regs_live, 1))
+    return gen_rtx_REG (Pmode, 1);
+
+  /* Hard reg 1 is live; since this is a small register classes target,
+     there shouldn't be anything but a jump before the function end.  */
+  gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
+  return gen_rtx_REG (Pmode, 7);
+}
+
+/* This function will set the fpscr from memory.
+   MODE is the mode we are setting it to.  */
+void
+fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
+{
+  enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
+  enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
+  rtx addr_reg;
+
+  addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
+  emit_fpu_switch (addr_reg, fp_mode == norm_mode);
+}
+
+/* Is the given character a logical line separator for the assembler?  */
+#ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
+#endif
+
+int
+sh_insn_length_adjustment (rtx insn)
+{
+  /* Instructions with unfilled delay slots take up an extra two bytes for
+     the nop in the delay slot.  */
+  if (((NONJUMP_INSN_P (insn)
+	&& GET_CODE (PATTERN (insn)) != USE
+	&& GET_CODE (PATTERN (insn)) != CLOBBER)
+       || CALL_P (insn)
+       || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
+      && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
+      && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
+    return 2;
+
+  /* SH2e has a bug that prevents the use of annulled branches, so if
+     the delay slot is not filled, we'll have to put a NOP in it.  */
+  if (sh_cpu_attr == CPU_SH2E
+      && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
+      && get_attr_type (insn) == TYPE_CBRANCH
+      && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
+    return 2;
+
+  /* sh-dsp parallel processing insn take four bytes instead of two.  */
+
+  if (NONJUMP_INSN_P (insn))
+    {
+      int sum = 0;
+      rtx body = PATTERN (insn);
+      const char *templ;
+      char c;
+      int maybe_label = 1;
+
+      if (GET_CODE (body) == ASM_INPUT)
+	templ = XSTR (body, 0);
+      else if (asm_noperands (body) >= 0)
+	templ
+	  = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
+      else
+	return 0;
+      do
+	{
+	  int ppi_adjust = 0;
+
+	  do
+	    c = *templ++;
+	  while (c == ' ' || c == '\t');
+	  /* all sh-dsp parallel-processing insns start with p.
+	     The only non-ppi sh insn starting with p is pref.
+	     The only ppi starting with pr is prnd.  */
+	  if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
+	    ppi_adjust = 2;
+	  /* The repeat pseudo-insn expands two three insns, a total of
+	     six bytes in size.  */
+	  else if ((c == 'r' || c == 'R')
+		   && ! strncasecmp ("epeat", templ, 5))
+	    ppi_adjust = 4;
+	  while (c && c != '\n'
+		 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
+	    {
+	      /* If this is a label, it is obviously not a ppi insn.  */
+	      if (c == ':' && maybe_label)
+		{
+		  ppi_adjust = 0;
+		  break;
+		}
+	      else if (c == '\'' || c == '"')
+		maybe_label = 0;
+	      c = *templ++;
+	    }
+	  sum += ppi_adjust;
+	  maybe_label = c != ':';
+	}
+      while (c);
+      return sum;
+    }
+  return 0;
+}
+
+/* Return TRUE for a valid displacement for the REG+disp addressing
+   with MODE.  */
+
+/* ??? The SH2e does not have the REG+disp addressing mode when loading values
+   into the FRx registers.  We implement this by setting the maximum offset
+   to zero when the value is SFmode.  This also restricts loading of SFmode
+   values into the integer registers, but that can't be helped.  */
+
+/* The SH allows a displacement in a QI or HI amode, but only when the
+   other operand is R0. GCC doesn't handle this very well, so we forgot
+   all of that.
+
+   A legitimate index for a QI or HI is 0, SI can be any number 0..63,
+   DI can be any number 0..60.  */
+
+bool
+sh_legitimate_index_p (enum machine_mode mode, rtx op)
+{
+  if (CONST_INT_P (op))
+    {
+      if (TARGET_SHMEDIA)
+	{
+	  int size;
+
+	  /* Check if this the address of an unaligned load / store.  */
+	  if (mode == VOIDmode)
+	    return CONST_OK_FOR_I06 (INTVAL (op));
+
+	  size = GET_MODE_SIZE (mode);
+	  return (!(INTVAL (op) & (size - 1))
+		  && INTVAL (op) >= -512 * size
+		  && INTVAL (op) < 512 * size);
+	}
+
+      if (TARGET_SH2A)
+	{
+	  if (GET_MODE_SIZE (mode) == 1
+		&& (unsigned) INTVAL (op) < 4096)
+	    return true;
+	}
+
+      if ((GET_MODE_SIZE (mode) == 4
+	   && (unsigned) INTVAL (op) < 64
+	   && !(INTVAL (op) & 3)
+	   && !(TARGET_SH2E && mode == SFmode))
+	  || (GET_MODE_SIZE (mode) == 4
+	      && (unsigned) INTVAL (op) < 16383
+	      && !(INTVAL (op) & 3) && TARGET_SH2A))
+	return true;
+
+      if ((GET_MODE_SIZE (mode) == 8
+	   && (unsigned) INTVAL (op) < 60
+	   && !(INTVAL (op) & 3)
+	   && !((TARGET_SH4 || TARGET_SH2A) && mode == DFmode))
+	  || ((GET_MODE_SIZE (mode)==8)
+	      && (unsigned) INTVAL (op) < 8192
+	      && !(INTVAL (op) & (TARGET_SH2A_DOUBLE ? 7 : 3))
+	      && (TARGET_SH2A && mode == DFmode)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Recognize an RTL expression that is a valid memory address for
+   an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+   Allow  REG
+	  REG+disp
+	  REG+r0
+	  REG++
+	  --REG  */
+
+static bool
+sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
+    return true;
+  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
+	   && ! TARGET_SHMEDIA
+	   && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
+    return true;
+  else if (GET_CODE (x) == PLUS
+	   && (mode != PSImode || reload_completed))
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (GET_MODE_SIZE (mode) <= 8
+	  && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
+	  && sh_legitimate_index_p (mode, xop1))
+	return true;
+
+      if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
+	   || ((xop0 == stack_pointer_rtx
+		|| xop0 == hard_frame_pointer_rtx)
+	       && REG_P (xop1) && REGNO (xop1) == R0_REG)
+	   || ((xop1 == stack_pointer_rtx
+		|| xop1 == hard_frame_pointer_rtx)
+	       && REG_P (xop0) && REGNO (xop0) == R0_REG))
+	  && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
+	      || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
+	      || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
+		  && TARGET_FMOVD && mode == DFmode)))
+	{
+	  if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
+	      && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
+	    return true;
+	  if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
+	      && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec.  */
+int
+nonpic_symbol_mentioned_p (rtx x)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
+      || GET_CODE (x) == PC)
+    return 1;
+
+  /* We don't want to look into the possible MEM location of a
+     CONST_DOUBLE, since we're not going to use it, in general.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    return 0;
+
+  if (GET_CODE (x) == UNSPEC
+      && (XINT (x, 1) == UNSPEC_PIC
+	  || XINT (x, 1) == UNSPEC_GOT
+	  || XINT (x, 1) == UNSPEC_GOTOFF
+	  || XINT (x, 1) == UNSPEC_GOTPLT
+	  || XINT (x, 1) == UNSPEC_GOTTPOFF
+	  || XINT (x, 1) == UNSPEC_DTPOFF
+	  || XINT (x, 1) == UNSPEC_TPOFF
+	  || XINT (x, 1) == UNSPEC_PLT
+	  || XINT (x, 1) == UNSPEC_SYMOFF
+	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+    return 0;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return 1;
+	}
+      else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Convert a non-PIC address in `orig' to a PIC address using @GOT or
+   @GOTOFF in `reg'.  */
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
+			rtx reg)
+{
+  if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
+    return orig;
+
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
+    {
+      if (reg == 0)
+	reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTOFF2reg (reg, orig));
+      return reg;
+    }
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      if (reg == 0)
+	reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, orig));
+      return reg;
+    }
+  return orig;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   Otherwise, return X.
+
+   For the SH, if X is almost suitable for indexing, but the offset is
+   out of range, convert it into a normal form so that CSE has a chance
+   of reducing the number of address registers used.  */
+
+static rtx
+sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  if (flag_pic)
+    x = legitimize_pic_address (oldx, mode, NULL_RTX);
+
+  if (GET_CODE (x) == PLUS
+      && (GET_MODE_SIZE (mode) == 4
+	  || GET_MODE_SIZE (mode) == 8)
+      && CONST_INT_P (XEXP (x, 1))
+      && BASE_REGISTER_RTX_P (XEXP (x, 0))
+      && ! TARGET_SHMEDIA
+      && ! ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
+      && ! (TARGET_SH2E && mode == SFmode))
+    {
+      rtx index_rtx = XEXP (x, 1);
+      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
+      rtx sum;
+
+      /* On rare occasions, we might get an unaligned pointer
+	 that is indexed in a way to give an aligned address.
+	 Therefore, keep the lower two bits in offset_base.  */
+      /* Instead of offset_base 128..131 use 124..127, so that
+	 simple add suffices.  */
+      if (offset > 127)
+	offset_base = ((offset + 4) & ~60) - 4;
+      else
+	offset_base = offset & ~60;
+
+      /* Sometimes the normal form does not suit DImode.  We
+	 could avoid that by using smaller ranges, but that
+	 would give less optimized code when SImode is
+	 prevalent.  */
+      if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
+	{
+	  sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
+			      GEN_INT (offset_base), NULL_RTX, 0,
+			      OPTAB_LIB_WIDEN);
+
+	  return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
+	}
+    }
+
+  return x;
+}
+
+/* Attempt to replace *P, which is an address that needs reloading, with
+   a valid memory address for an operand of mode MODE.
+   Like for sh_legitimize_address, for the SH we try to get a normal form
+   of the address.  That will allow inheritance of the address reloads.  */
+
+bool
+sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
+			      int itype)
+{
+  enum reload_type type = (enum reload_type) itype;
+
+  if (GET_CODE (*p) == PLUS
+      && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+      && CONST_INT_P (XEXP (*p, 1))
+      && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
+      && ! TARGET_SHMEDIA
+      && ! (TARGET_SH4 && mode == DFmode)
+      && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
+      && (ALLOW_INDEXED_ADDRESS
+	  || XEXP (*p, 0) == stack_pointer_rtx
+	  || XEXP (*p, 0) == hard_frame_pointer_rtx))
+    {
+      rtx index_rtx = XEXP (*p, 1);
+      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;
+      rtx sum;
+
+      if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
+	{
+	  push_reload (*p, NULL_RTX, p, NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  goto win;
+	}
+      if (TARGET_SH2E && mode == SFmode)
+	{
+	  *p = copy_rtx (*p);
+	  push_reload (*p, NULL_RTX, p, NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  goto win;
+	}
+      /* Instead of offset_base 128..131 use 124..127, so that
+	 simple add suffices.  */
+      if (offset > 127)
+	offset_base = ((offset + 4) & ~60) - 4;
+      else
+	offset_base = offset & ~60;
+      /* Sometimes the normal form does not suit DImode.  We could avoid
+	 that by using smaller ranges, but that would give less optimized
+	 code when SImode is prevalent.  */
+      if (GET_MODE_SIZE (mode) + offset - offset_base <= 64)
+	{
+	  sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), GEN_INT (offset_base));
+	  *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
+	  push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  goto win;
+	}
+    }
+  /* We must re-recognize what we created before.  */
+  else if (GET_CODE (*p) == PLUS
+	   && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+	   && GET_CODE (XEXP (*p, 0)) == PLUS
+	   && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
+	   && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
+	   && CONST_INT_P (XEXP (*p, 1))
+	   && ! TARGET_SHMEDIA
+	   && ! (TARGET_SH2E && mode == SFmode))
+    {
+      /* Because this address is so complex, we know it must have
+	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
+	 it is already unshared, and needs no further unsharing.  */
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+      goto win;
+    }
+
+  return false;
+
+ win:
+  return true;
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+
+static rtx
+sh_delegitimize_address (rtx orig_x)
+{
+  rtx x, y;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) == CONST)
+    {
+      y = XEXP (x, 0);
+      if (GET_CODE (y) == UNSPEC)
+	{
+	  if (XINT (y, 1) == UNSPEC_GOT
+	      || XINT (y, 1) == UNSPEC_GOTOFF)
+	    return XVECEXP (y, 0, 0);
+	  else if (TARGET_SHMEDIA
+		   && (XINT (y, 1) == UNSPEC_EXTRACT_S16
+		       || XINT (y, 1) == UNSPEC_EXTRACT_U16))
+	    {
+	      rtx offset = XVECEXP (y, 0, 1);
+
+	      x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
+	      if (MEM_P (orig_x))
+		x = replace_equiv_address_nv (orig_x, x);
+	      return x;
+	    }
+	}
+    }
+
+  return orig_x;
+}
+
+/* Mark the use of a constant in the literal table. If the constant
+   has multiple labels, make it unique.  */
+static rtx
+mark_constant_pool_use (rtx x)
+{
+  rtx insn, lab, pattern;
+
+  if (x == NULL)
+    return x;
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+      x = XEXP (x, 0);
+    case CODE_LABEL:
+      break;
+    default:
+      return x;
+    }
+
+  /* Get the first label in the list of labels for the same constant
+     and delete another labels in the list.  */
+  lab = x;
+  for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
+    {
+      if (!LABEL_P (insn)
+	  || LABEL_REFS (insn) != NEXT_INSN (insn))
+	break;
+      lab = insn;
+    }
+
+  for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
+    INSN_DELETED_P (insn) = 1;
+
+  /* Mark constants in a window.  */
+  for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
+    {
+      if (!NONJUMP_INSN_P (insn))
+	continue;
+
+      pattern = PATTERN (insn);
+      if (GET_CODE (pattern) != UNSPEC_VOLATILE)
+	continue;
+
+      switch (XINT (pattern, 1))
+	{
+	case UNSPECV_CONST2:
+	case UNSPECV_CONST4:
+	case UNSPECV_CONST8:
+	  XVECEXP (pattern, 0, 1) = const1_rtx;
+	  break;
+	case UNSPECV_WINDOW_END:
+	  if (XVECEXP (pattern, 0, 0) == x)
+	    return lab;
+	  break;
+	case UNSPECV_CONST_END:
+	  return lab;
+	default:
+	  break;
+	}
+    }
+
+  return lab;
+}
+
+/* Return true if it's possible to redirect BRANCH1 to the destination
+   of an unconditional jump BRANCH2.  We only want to do this if the
+   resulting branch will have a short displacement.  */
+int
+sh_can_redirect_branch (rtx branch1, rtx branch2)
+{
+  if (flag_expensive_optimizations && simplejump_p (branch2))
+    {
+      rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
+      rtx insn;
+      int distance;
+
+      for (distance = 0, insn = NEXT_INSN (branch1);
+	   insn && distance < 256;
+	   insn = PREV_INSN (insn))
+	{
+	  if (insn == dest)
+	    return 1;
+	  else
+	    distance += get_attr_length (insn);
+	}
+      for (distance = 0, insn = NEXT_INSN (branch1);
+	   insn && distance < 256;
+	   insn = NEXT_INSN (insn))
+	{
+	  if (insn == dest)
+	    return 1;
+	  else
+	    distance += get_attr_length (insn);
+	}
+    }
+  return 0;
+}
+
+/* Return nonzero if register old_reg can be renamed to register new_reg.  */
+int
+sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			 unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+
+  if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
+    return 0;
+
+  return 1;
+}
+
+/* Function to update the integer COST
+   based on the relationship between INSN that is dependent on
+   DEP_INSN through the dependence LINK.  The default is to make no
+   adjustment to COST.  This can be used for example to specify to
+   the scheduler that an output- or anti-dependence does not incur
+   the same cost as a data-dependence.  The return value should be
+   the new value for COST.  */
+static int
+sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
+{
+  rtx reg, use_pat;
+
+  if (TARGET_SHMEDIA)
+    {
+      /* On SHmedia, if the dependence is an anti-dependence or
+         output-dependence, there is no cost.  */
+      if (REG_NOTE_KIND (link) != 0)
+	{
+	  /* However, dependencies between target register loads and
+	     uses of the register in a subsequent block that are separated
+	     by a conditional branch are not modelled - we have to do with
+	     the anti-dependency between the target register load and the
+	     conditional branch that ends the current block.  */
+	  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+	      && GET_CODE (PATTERN (dep_insn)) == SET
+	      && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
+		  || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
+	      && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
+	    {
+	      int orig_cost = cost;
+	      rtx note = find_reg_note (insn, REG_BR_PROB, 0);
+	      rtx target = ((! note
+			     || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
+			    ? insn : JUMP_LABEL (insn));
+	      /* On the likely path, the branch costs 1, on the unlikely path,
+		 it costs 3.  */
+	      cost--;
+	      do
+		target = next_active_insn (target);
+	      while (target && ! flow_dependent_p (target, dep_insn)
+		     && --cost > 0);
+	      /* If two branches are executed in immediate succession, with the
+		 first branch properly predicted, this causes a stall at the
+		 second branch, hence we won't need the target for the
+		 second branch for two cycles after the launch of the first
+		 branch.  */
+	      if (cost > orig_cost - 2)
+		cost = orig_cost - 2;
+	    }
+	  else
+	    cost = 0;
+	}
+
+      else if (get_attr_is_mac_media (insn)
+	       && get_attr_is_mac_media (dep_insn))
+	cost = 1;
+
+      else if (! reload_completed
+	       && GET_CODE (PATTERN (insn)) == SET
+	       && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
+	       && GET_CODE (PATTERN (dep_insn)) == SET
+	       && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
+	       && cost < 4)
+	cost = 4;
+      /* Schedule the ptabs for a casesi_jump_media in preference to stuff
+	 that is needed at the target.  */
+      else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
+	       && ! flow_dependent_p (insn, dep_insn))
+	cost--;
+    }
+  else if (REG_NOTE_KIND (link) == 0)
+    {
+      enum attr_type type;
+      rtx dep_set;
+
+      if (recog_memoized (insn) < 0
+	  || recog_memoized (dep_insn) < 0)
+	return cost;
+
+      dep_set = single_set (dep_insn);
+
+      /* The latency that we specify in the scheduling description refers
+	 to the actual output, not to an auto-increment register; for that,
+	 the latency is one.  */
+      if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
+	{
+	  rtx set = single_set (insn);
+
+	  if (set
+	      && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
+	      && (!MEM_P (SET_DEST (set))
+		  || !reg_mentioned_p (SET_DEST (dep_set),
+				       XEXP (SET_DEST (set), 0))))
+	    cost = 1;
+	}
+      /* The only input for a call that is timing-critical is the
+	 function's address.  */
+      if (CALL_P (insn))
+	{
+	  rtx call = PATTERN (insn);
+
+	  if (GET_CODE (call) == PARALLEL)
+	    call = XVECEXP (call, 0 ,0);
+	  if (GET_CODE (call) == SET)
+	    call = SET_SRC (call);
+	  if (GET_CODE (call) == CALL && MEM_P (XEXP (call, 0))
+		  /* sibcalli_thunk uses a symbol_ref in an unspec.  */
+	      && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
+		  || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
+	    cost -= TARGET_SH4_300 ? 3 : 6;
+	}
+      /* Likewise, the most timing critical input for an sfuncs call
+	 is the function address.  However, sfuncs typically start
+	 using their arguments pretty quickly.
+	 Assume a four cycle delay for SH4 before they are needed.
+	 Cached ST40-300 calls are quicker, so assume only a one
+	 cycle delay there.
+	 ??? Maybe we should encode the delays till input registers
+	 are needed by sfuncs into the sfunc call insn.  */
+      /* All sfunc calls are parallels with at least four components.
+	 Exploit this to avoid unnecessary calls to sfunc_uses_reg.  */
+      else if (GET_CODE (PATTERN (insn)) == PARALLEL
+	       && XVECLEN (PATTERN (insn), 0) >= 4
+	       && (reg = sfunc_uses_reg (insn)))
+	{
+	  if (! reg_set_p (reg, dep_insn))
+	    cost -= TARGET_SH4_300 ? 1 : 4;
+	}
+      if (TARGET_HARD_SH4 && !TARGET_SH4_300)
+	{
+	  enum attr_type dep_type = get_attr_type (dep_insn);
+
+	  if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
+	    cost--;
+	  else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
+		   && (type = get_attr_type (insn)) != TYPE_CALL
+		   && type != TYPE_SFUNC)
+	    cost--;
+	  /* When the preceding instruction loads the shift amount of
+	     the following SHAD/SHLD, the latency of the load is increased
+	     by 1 cycle.  */
+	  if (get_attr_type (insn) == TYPE_DYN_SHIFT
+	      && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
+	      && reg_overlap_mentioned_p (SET_DEST (dep_set),
+					  XEXP (SET_SRC (single_set (insn)),
+						1)))
+	    cost++;
+	  /* When an LS group instruction with a latency of less than
+	     3 cycles is followed by a double-precision floating-point
+	     instruction, FIPR, or FTRV, the latency of the first
+	     instruction is increased to 3 cycles.  */
+	  else if (cost < 3
+		   && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
+		   && get_attr_dfp_comp (insn) == DFP_COMP_YES)
+	    cost = 3;
+	  /* The lsw register of a double-precision computation is ready one
+	     cycle earlier.  */
+	  else if (reload_completed
+		   && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
+		   && (use_pat = single_set (insn))
+		   && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
+				      SET_SRC (use_pat)))
+	    cost -= 1;
+
+	  if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
+	      && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
+	    cost -= 1;
+	}
+      else if (TARGET_SH4_300)
+	{
+	  /* Stores need their input register two cycles later.  */
+	  if (dep_set && cost >= 1
+	      && ((type = get_attr_type (insn)) == TYPE_STORE
+		  || type == TYPE_PSTORE
+		  || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
+	    {
+	      rtx set = single_set (insn);
+
+	      if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
+		  && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
+		{
+		  cost -= 2;
+		  /* But don't reduce the cost below 1 if the address depends
+		     on a side effect of dep_insn.  */
+		  if (cost < 1
+		      && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
+		    cost = 1;
+		}
+	    }
+	}
+    }
+  /* An anti-dependence penalty of two applies if the first insn is a double
+     precision fadd / fsub / fmul.  */
+  else if (!TARGET_SH4_300
+	   && REG_NOTE_KIND (link) == REG_DEP_ANTI
+	   && recog_memoized (dep_insn) >= 0
+	   && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
+	       || get_attr_type (dep_insn) == TYPE_DFP_MUL)
+	   /* A lot of alleged anti-flow dependences are fake,
+	      so check this one is real.  */
+	   && flow_dependent_p (dep_insn, insn))
+    cost = 2;
+
+  return cost;
+}
+
+/* Check if INSN is flow-dependent on DEP_INSN.  Can also be used to check
+   if DEP_INSN is anti-flow dependent on INSN.  */
+static int
+flow_dependent_p (rtx insn, rtx dep_insn)
+{
+  rtx tmp = PATTERN (insn);
+
+  note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
+  return tmp == NULL_RTX;
+}
+
+/* A helper function for flow_dependent_p called through note_stores.  */
+static void
+flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx * pinsn = (rtx *) data;
+
+  if (*pinsn && reg_referenced_p (x, *pinsn))
+    *pinsn = NULL_RTX;
+}
+
+/* For use by sh_allocate_initial_value.  Note that sh.md contains some
+   'special function' patterns (type sfunc) that clobber pr, but that
+   do not look like function calls to leaf_function_p.  Hence we must
+   do this extra check.  */
+static int
+sh_pr_n_sets (void)
+{
+  return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
+}
+
+/* Return where to allocate pseudo for a given hard register initial
+   value.  */
+static rtx
+sh_allocate_initial_value (rtx hard_reg)
+{
+  rtx x;
+
+  if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
+    {
+      if (current_function_is_leaf
+	  && ! sh_pr_n_sets ()
+	  && ! (TARGET_SHCOMPACT
+		&& ((crtl->args.info.call_cookie
+		     & ~ CALL_COOKIE_RET_TRAMP (1))
+		    || crtl->saves_all_registers)))
+	x = hard_reg;
+      else
+	x = gen_frame_mem (Pmode, return_address_pointer_rtx);
+    }
+  else
+    x = NULL_RTX;
+
+  return x;
+}
+
+/* This function returns "2" to indicate dual issue for the SH4
+   processor.  To be used by the DFA pipeline description.  */
+static int
+sh_issue_rate (void)
+{
+  if (TARGET_SUPERSCALAR)
+    return 2;
+  else
+    return 1;
+}
+
+/* Functions for ready queue reordering for sched1.  */
+
+/* Get weight for mode for a set x.  */
+static short
+find_set_regmode_weight (rtx x, enum machine_mode mode)
+{
+  if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
+    return 1;
+  if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
+    {
+      if (REG_P (SET_DEST (x)))
+	{
+	  if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
+	    return 1;
+	  else
+	    return 0;
+	}
+      return 1;
+    }
+  return 0;
+}
+
+/* Get regmode weight for insn.  */
+static short
+find_insn_regmode_weight (rtx insn, enum machine_mode mode)
+{
+  short reg_weight = 0;
+  rtx x;
+
+  /* Increment weight for each register born here.  */
+  x = PATTERN (insn);
+  reg_weight += find_set_regmode_weight (x, mode);
+  if (GET_CODE (x) == PARALLEL)
+    {
+      int j;
+      for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
+	{
+	  x = XVECEXP (PATTERN (insn), 0, j);
+	  reg_weight += find_set_regmode_weight (x, mode);
+	}
+    }
+  /* Decrement weight for each register that dies here.  */
+  for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
+    {
+      if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
+	{
+	  rtx note = XEXP (x, 0);
+	  if (REG_P (note) && GET_MODE (note) == mode)
+	    reg_weight--;
+	}
+    }
+  return reg_weight;
+}
+
+/* Calculate regmode weights for all insns of a basic block.  */
+static void
+find_regmode_weight (basic_block b, enum machine_mode mode)
+{
+  rtx insn, next_tail, head, tail;
+
+  get_ebb_head_tail (b, b, &head, &tail);
+  next_tail = NEXT_INSN (tail);
+
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      /* Handle register life information.  */
+      if (!INSN_P (insn))
+	continue;
+
+      if (mode == SFmode)
+	INSN_REGMODE_WEIGHT (insn, mode) =
+	  find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
+      else if (mode == SImode)
+	INSN_REGMODE_WEIGHT (insn, mode) =
+	  find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
+    }
+}
+
+/* Comparison function for ready queue sorting.  */
+static int
+rank_for_reorder (const void *x, const void *y)
+{
+  rtx tmp = *(const rtx *) y;
+  rtx tmp2 = *(const rtx *) x;
+
+  /* The insn in a schedule group should be issued the first.  */
+  if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
+    return SCHED_GROUP_P (tmp2) ? 1 : -1;
+
+  /* If insns are equally good, sort by INSN_LUID (original insn order), This
+     minimizes instruction movement, thus minimizing sched's effect on
+     register pressure.  */
+  return INSN_LUID (tmp) - INSN_LUID (tmp2);
+}
+
+/* Resort the array A in which only element at index N may be out of order.  */
+static void
+swap_reorder (rtx *a, int n)
+{
+  rtx insn = a[n - 1];
+  int i = n - 2;
+
+  while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
+    {
+      a[i + 1] = a[i];
+      i -= 1;
+    }
+  a[i + 1] = insn;
+}
+
+#define SCHED_REORDER(READY, N_READY)                                	\
+  do									\
+    {									\
+      if ((N_READY) == 2)						\
+	swap_reorder (READY, N_READY);					\
+      else if ((N_READY) > 2)						\
+	qsort (READY, N_READY, sizeof (rtx), rank_for_reorder);		\
+    }									\
+  while (0)
+
+/* Sort the ready list READY by ascending priority, using the SCHED_REORDER
+   macro.  */
+static void
+ready_reorder (rtx *ready, int nready)
+{
+  SCHED_REORDER (ready, nready);
+}
+
+/* Count life regions of r0 for a block.  */
+static int
+find_r0_life_regions (basic_block b)
+{
+  rtx end, insn;
+  rtx pset;
+  rtx r0_reg;
+  int live;
+  int set;
+  int death = 0;
+
+  if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
+    {
+      set = 1;
+      live = 1;
+    }
+  else
+    {
+      set = 0;
+      live = 0;
+    }
+
+  insn = BB_HEAD (b);
+  end = BB_END (b);
+  r0_reg = gen_rtx_REG (SImode, R0_REG);
+  while (1)
+    {
+      if (INSN_P (insn))
+	{
+	  if (find_regno_note (insn, REG_DEAD, R0_REG))
+	    {
+	      death++;
+	      live = 0;
+	    }
+	  if (!live
+	      && (pset = single_set (insn))
+	      && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
+	      && !find_regno_note (insn, REG_UNUSED, R0_REG))
+	    {
+	      set++;
+	      live = 1;
+	    }
+	}
+      if (insn == end)
+	break;
+      insn = NEXT_INSN (insn);
+    }
+  return set - death;
+}
+
+/* Calculate regmode weights for all insns of all basic block.  */
+static void
+sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
+		   int verbose ATTRIBUTE_UNUSED,
+		   int old_max_uid)
+{
+  basic_block b;
+
+  regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
+  regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
+  r0_life_regions = 0;
+
+  FOR_EACH_BB_REVERSE (b)
+  {
+    find_regmode_weight (b, SImode);
+    find_regmode_weight (b, SFmode);
+    if (!reload_completed)
+      r0_life_regions += find_r0_life_regions (b);
+  }
+
+  CURR_REGMODE_PRESSURE (SImode) = 0;
+  CURR_REGMODE_PRESSURE (SFmode) = 0;
+
+}
+
+/* Cleanup.  */
+static void
+sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+		     int verbose ATTRIBUTE_UNUSED)
+{
+  if (regmode_weight[0])
+    {
+      free (regmode_weight[0]);
+      regmode_weight[0] = NULL;
+    }
+  if (regmode_weight[1])
+    {
+      free (regmode_weight[1]);
+      regmode_weight[1] = NULL;
+    }
+}
+
+/* The scalar modes supported differs from the default version in TImode
+   for 32-bit SHMEDIA.  */
+static bool
+sh_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_SHMEDIA32 && mode == TImode)
+    return false;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Cache the can_issue_more so that we can return it from reorder2. Also,
+   keep count of register pressures on SImode and SFmode. */
+static int
+sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+		   int sched_verbose ATTRIBUTE_UNUSED,
+		   rtx insn,
+		   int can_issue_more)
+{
+  if (GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    cached_can_issue_more = can_issue_more - 1;
+  else
+    cached_can_issue_more = can_issue_more;
+
+  if (reload_completed)
+    return cached_can_issue_more;
+
+  CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
+  CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
+
+  return cached_can_issue_more;
+}
+
+static void
+sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
+	    int verbose ATTRIBUTE_UNUSED,
+	    int veclen ATTRIBUTE_UNUSED)
+{
+  CURR_REGMODE_PRESSURE (SImode) = 0;
+  CURR_REGMODE_PRESSURE (SFmode) = 0;
+}
+
+/* Some magic numbers.  */
+/* Pressure on register r0 can lead to spill failures. so avoid sched1 for
+   functions that already have high pressure on r0. */
+#define R0_MAX_LIFE_REGIONS 2
+/* Register Pressure thresholds for SImode and SFmode registers.  */
+#define SIMODE_MAX_WEIGHT 5
+#define SFMODE_MAX_WEIGHT 10
+
+/* Return true if the pressure is high for MODE.  */
+static short
+high_pressure (enum machine_mode mode)
+{
+  /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
+     functions that already have high pressure on r0. */
+   if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
+     return 1;
+
+  if (mode == SFmode)
+    return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
+  else
+    return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
+}
+
+/* Reorder ready queue if register pressure is high.  */
+static int
+sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
+	    int sched_verbose ATTRIBUTE_UNUSED,
+	    rtx *ready,
+	    int *n_readyp,
+	    int clock_var ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return sh_issue_rate ();
+
+  if (high_pressure (SFmode) || high_pressure (SImode))
+    {
+      ready_reorder (ready, *n_readyp);
+    }
+
+  return sh_issue_rate ();
+}
+
+/* Skip cycles if the current register pressure is high.  */
+static int
+sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+	     int sched_verbose ATTRIBUTE_UNUSED,
+	     rtx *ready ATTRIBUTE_UNUSED,
+	     int *n_readyp ATTRIBUTE_UNUSED,
+	     int clock_var ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return cached_can_issue_more;
+
+  if (high_pressure(SFmode) || high_pressure (SImode))
+    skip_cycles = 1;
+
+  return cached_can_issue_more;
+}
+
+/* Skip cycles without sorting the ready queue. This will move insn from
+   Q->R. If this is the last cycle we are skipping; allow sorting of ready
+   queue by sh_reorder.  */
+
+/* Generally, skipping these many cycles are sufficient for all insns to move
+   from Q -> R.  */
+#define MAX_SKIPS 8
+
+static int
+sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
+		  int sched_verbose ATTRIBUTE_UNUSED,
+		  rtx insn ATTRIBUTE_UNUSED,
+		  int last_clock_var,
+		  int clock_var,
+		  int *sort_p)
+{
+  if (reload_completed)
+    return 0;
+
+  if (skip_cycles)
+    {
+      if ((clock_var - last_clock_var) < MAX_SKIPS)
+	{
+	  *sort_p = 0;
+	  return 1;
+	}
+      /* If this is the last cycle we are skipping, allow reordering of R.  */
+      if ((clock_var - last_clock_var) == MAX_SKIPS)
+	{
+	  *sort_p = 1;
+	  return 1;
+	}
+    }
+
+  skip_cycles = 0;
+
+  return 0;
+}
+
+/* SHmedia requires registers for branches, so we can't generate new
+   branches past reload.  */
+static bool
+sh_cannot_modify_jumps_p (void)
+{
+  return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
+}
+
+static reg_class_t
+sh_target_reg_class (void)
+{
+  return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
+}
+
+static bool
+sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
+{
+  HARD_REG_SET dummy;
+#if 0
+  rtx insn;
+#endif
+
+  if (! shmedia_space_reserved_for_target_registers)
+    return 0;
+  if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
+    return 0;
+  if (calc_live_regs (&dummy) >= 6 * 8)
+    return 1;
+  return 0;
+}
+
+static bool
+sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
+}
+
+/*
+   On the SH1..SH4, the trampoline looks like
+   2 0002 D202     	   	mov.l	l2,r2
+   1 0000 D301     		mov.l	l1,r3
+   3 0004 422B     		jmp	@r2
+   4 0006 0009     		nop
+   5 0008 00000000 	l1:  	.long   area
+   6 000c 00000000 	l2:	.long   function
+
+   SH5 (compact) uses r1 instead of r3 for the static chain.  */
+
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+
+static void
+sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
+
+  if (TARGET_SHMEDIA64)
+    {
+      rtx tramp_templ;
+      int fixed_len;
+
+      rtx movi1 = GEN_INT (0xcc000010);
+      rtx shori1 = GEN_INT (0xc8000010);
+      rtx src, dst;
+
+      /* The following trampoline works within a +- 128 KB range for cxt:
+	 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
+         shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
+         gettr tr1,r1; blink tr0,r63  */
+      /* Address rounding makes it hard to compute the exact bounds of the
+	 offset for this trampoline, but we have a rather generous offset
+	 range, so frame_offset should do fine as an upper bound.  */
+      if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
+	{
+	  /* ??? could optimize this trampoline initialization
+	     by writing DImode words with two insns each.  */
+	  rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
+	  rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
+	  insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  /* Or in ptb/u .,tr1 pattern */
+	  insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
+	  insn = force_operand (insn, NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
+	  insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 20),
+			  GEN_INT (0x6bf10600));
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 24),
+			  GEN_INT (0x4415fc10));
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 28),
+			  GEN_INT (0x4401fff0));
+	  emit_insn (gen_ic_invalidate_line (tramp));
+	  return;
+	}
+      tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
+      fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
+
+      tramp_templ = gen_datalabel_ref (tramp_templ);
+      dst = tramp_mem;
+      src = gen_const_mem (BLKmode, tramp_templ);
+      set_mem_align (dst, 256);
+      set_mem_align (src, 64);
+      emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
+
+      emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
+      emit_move_insn (adjust_address (tramp_mem, Pmode,
+				      fixed_len + GET_MODE_SIZE (Pmode)),
+		      cxt);
+      emit_insn (gen_ic_invalidate_line (tramp));
+      return;
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
+         movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63  */
+      rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
+      rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
+      /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010  concatenated,
+	 rotated 10 right, and higher 16 bit of every 32 selected.  */
+      rtx movishori
+	= force_reg (V2HImode, (simplify_gen_subreg
+				(V2HImode, GEN_INT (0x4330432), SImode, 0)));
+      rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
+      rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
+
+      fnaddr = force_reg (SImode, fnaddr);
+      cxt = force_reg (SImode, cxt);
+      emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
+				 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
+				 movishori));
+      emit_insn (gen_rotrdi3_mextr (quad0, quad0,
+				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
+      emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
+      emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
+      emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
+				 gen_rtx_SUBREG (V2HImode, cxt, 0),
+				 movishori));
+      emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
+				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
+      emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
+      if (TARGET_LITTLE_ENDIAN)
+	{
+	  emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
+	  emit_insn (gen_mextr4 (quad2, cxtload, blink));
+	}
+      else
+	{
+	  emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
+	  emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
+	}
+      emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
+      emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
+      emit_insn (gen_ic_invalidate_line (tramp));
+      return;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
+      return;
+    }
+  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
+				SImode));
+  emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
+				SImode));
+  emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
+  emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+  if (TARGET_HARVARD)
+    {
+      if (!TARGET_INLINE_IC_INVALIDATE
+	  || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
+	emit_library_call (function_symbol (NULL, "__ic_invalidate",
+					    FUNCTION_ORDINARY),
+			   LCT_NORMAL, VOIDmode, 1, tramp, SImode);
+      else
+	emit_insn (gen_ic_invalidate_line (tramp));
+    }
+}
+
+/* On SH5, trampolines are SHmedia code, so add 1 to the address.  */
+
+static rtx
+sh_trampoline_adjust_address (rtx tramp)
+{
+  if (TARGET_SHMEDIA)
+    tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
+				 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
+  return tramp;
+}
+
+/* FIXME: This is overly conservative.  A SHcompact function that
+   receives arguments ``by reference'' will have them stored in its
+   own stack frame, so it must not pass pointers or references to
+   these arguments to other functions by means of sibling calls.  */
+/* If PIC, we cannot make sibling calls to global functions
+   because the PLT requires r12 to be live.  */
+static bool
+sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return (1
+	  && (! TARGET_SHCOMPACT
+	      || crtl->args.info.stack_regs == 0)
+	  && ! sh_cfun_interrupt_handler_p ()
+	  && (! flag_pic
+	      || (decl && ! TREE_PUBLIC (decl))
+	      || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
+}
+
+/* Machine specific built-in functions.  */
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *const name;
+  int signature;
+  tree fndecl;
+};
+
+/* describe number and signedness of arguments; arg[0] == result
+   (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
+/* 9: 64-bit pointer, 10: 32-bit pointer */
+static const char signature_args[][4] =
+{
+#define SH_BLTIN_V2SI2 0
+  { 4, 4 },
+#define SH_BLTIN_V4HI2 1
+  { 4, 4 },
+#define SH_BLTIN_V2SI3 2
+  { 4, 4, 4 },
+#define SH_BLTIN_V4HI3 3
+  { 4, 4, 4 },
+#define SH_BLTIN_V8QI3 4
+  { 4, 4, 4 },
+#define SH_BLTIN_MAC_HISI 5
+  { 1, 4, 4, 1 },
+#define SH_BLTIN_SH_HI 6
+  { 4, 4, 1 },
+#define SH_BLTIN_SH_SI 7
+  { 4, 4, 1 },
+#define SH_BLTIN_V4HI2V2SI 8
+  { 4, 4, 4 },
+#define SH_BLTIN_V4HI2V8QI 9
+  { 4, 4, 4 },
+#define SH_BLTIN_SISF 10
+  { 4, 2 },
+#define SH_BLTIN_LDUA_L 11
+  { 2, 10 },
+#define SH_BLTIN_LDUA_Q 12
+  { 1, 10 },
+#define SH_BLTIN_STUA_L 13
+  { 0, 10, 2 },
+#define SH_BLTIN_STUA_Q 14
+  { 0, 10, 1 },
+#define SH_BLTIN_LDUA_L64 15
+  { 2, 9 },
+#define SH_BLTIN_LDUA_Q64 16
+  { 1, 9 },
+#define SH_BLTIN_STUA_L64 17
+  { 0, 9, 2 },
+#define SH_BLTIN_STUA_Q64 18
+  { 0, 9, 1 },
+#define SH_BLTIN_NUM_SHARED_SIGNATURES 19
+#define SH_BLTIN_2 19
+#define SH_BLTIN_SU 19
+  { 1, 2 },
+#define SH_BLTIN_3 20
+#define SH_BLTIN_SUS 20
+  { 2, 2, 1 },
+#define SH_BLTIN_PSSV 21
+  { 0, 8, 2, 2 },
+#define SH_BLTIN_XXUU 22
+#define SH_BLTIN_UUUU 22
+  { 1, 1, 1, 1 },
+#define SH_BLTIN_PV 23
+  { 0, 8 },
+};
+/* mcmv: operands considered unsigned.  */
+/* mmulsum_wq, msad_ubq: result considered unsigned long long.  */
+/* mperm: control value considered unsigned int.  */
+/* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int.  */
+/* mshards_q: returns signed short.  */
+/* nsb: takes long long arg, returns unsigned char.  */
+static struct builtin_description bdesc[] =
+{
+  { CODE_FOR_absv2si2,	"__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
+  { CODE_FOR_absv4hi2,	"__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
+  { CODE_FOR_addv2si3,	"__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_addv4hi3,	"__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_alloco_i,	"__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
+  { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mcmv,	"__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
+  { CODE_FOR_mcnvs_lw,	"__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
+  { CODE_FOR_mcnvs_wb,	"__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
+  { CODE_FOR_mcnvs_wub,	"__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
+  { CODE_FOR_mextr1,	"__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr2,	"__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr3,	"__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr4,	"__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr5,	"__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr6,	"__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mextr7,	"__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mmacfx_wl,	"__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { CODE_FOR_mulv2si3,	"__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mulv4hi3,	"__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mmulfx_l,	"__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mmulfx_w,	"__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mmulhi_wl,	"__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { CODE_FOR_mmullo_wl,	"__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
+  { CODE_FOR_mperm_w,	"__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_msad_ubq,	"__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
+  { CODE_FOR_mshalds_l,	"__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_mshalds_w,	"__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_ashrv2si3,	"__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_ashrv4hi3,	"__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_mshards_q,	"__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
+  { CODE_FOR_mshfhi_b,	"__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mshfhi_l,	"__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mshfhi_w,	"__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_mshflo_b,	"__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_mshflo_l,	"__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_mshflo_w,	"__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_ashlv2si3,	"__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_ashlv4hi3,	"__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_lshrv2si3,	"__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
+  { CODE_FOR_lshrv4hi3,	"__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { CODE_FOR_subv2si3,	"__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_subv4hi3,	"__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
+  { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
+  { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
+  { CODE_FOR_fcosa_s,	"__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
+  { CODE_FOR_fsina_s,	"__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
+  { CODE_FOR_fipr,	"__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
+  { CODE_FOR_ftrv,	"__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
+  { CODE_FOR_mac_media,	"__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
+  { CODE_FOR_sqrtdf2,	"__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
+  { CODE_FOR_sqrtsf2,	"__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
+  { CODE_FOR_fsrra_s,	"__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
+  { CODE_FOR_ldhi_l,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
+  { CODE_FOR_ldhi_q,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
+  { CODE_FOR_ldlo_l,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
+  { CODE_FOR_ldlo_q,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
+  { CODE_FOR_sthi_l,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
+  { CODE_FOR_sthi_q,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
+  { CODE_FOR_stlo_l,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
+  { CODE_FOR_stlo_q,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
+  { CODE_FOR_ldhi_l64,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
+  { CODE_FOR_ldhi_q64,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { CODE_FOR_ldlo_l64,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
+  { CODE_FOR_ldlo_q64,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { CODE_FOR_sthi_l64,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
+  { CODE_FOR_sthi_q64,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
+  { CODE_FOR_stlo_l64,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
+  { CODE_FOR_stlo_q64,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
+  { CODE_FOR_nsb,	"__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
+  { CODE_FOR_byterev,	"__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
+  { CODE_FOR_prefetch,	"__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
+};
+
+static void
+sh_media_init_builtins (void)
+{
+  tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
+  struct builtin_description *d;
+
+  memset (shared, 0, sizeof shared);
+  for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
+    {
+      tree type, arg_type = 0;
+      int signature = d->signature;
+      int i;
+
+      if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
+	type = shared[signature];
+      else
+	{
+	  int has_result = signature_args[signature][0] != 0;
+
+	  if ((signature_args[signature][1] & 8)
+	      && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
+		  || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
+	    continue;
+	  if (! TARGET_FPU_ANY
+	      && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
+	    continue;
+	  type = void_list_node;
+	  for (i = 3; ; i--)
+	    {
+	      int arg = signature_args[signature][i];
+	      int opno = i - 1 + has_result;
+
+	      if (arg & 8)
+		arg_type = ptr_type_node;
+	      else if (arg)
+		arg_type = (*lang_hooks.types.type_for_mode)
+		  (insn_data[d->icode].operand[opno].mode,
+		   (arg & 1));
+	      else if (i)
+		continue;
+	      else
+		arg_type = void_type_node;
+	      if (i == 0)
+		break;
+	      type = tree_cons (NULL_TREE, arg_type, type);
+	    }
+	  type = build_function_type (arg_type, type);
+	  if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
+	    shared[signature] = type;
+	}
+      d->fndecl =
+	add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
+			      NULL, NULL_TREE);
+    }
+}
+
+/* Returns the shmedia builtin decl for CODE.  */
+
+static tree
+sh_media_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{        
+  if (code >= ARRAY_SIZE (bdesc))
+    return error_mark_node;
+          
+  return bdesc[code].fndecl;
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+bool
+sh_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_FPU_ANY
+      && ((mode == V2SFmode)
+	  || (mode == V4SFmode)
+	  || (mode == V16SFmode)))
+    return true;
+
+  else if (TARGET_SHMEDIA
+	   && ((mode == V8QImode)
+	       || (mode == V2HImode)
+	       || (mode == V4HImode)
+	       || (mode == V2SImode)))
+    return true;
+
+  return false;
+}
+
+bool
+sh_frame_pointer_required (void)
+{
+/* If needed override this in other tm.h files to cope with various OS 
+   lossage requiring a frame pointer.  */
+  if (SUBTARGET_FRAME_POINTER_REQUIRED)
+    return true;
+
+  if (crtl->profile)
+    return true;
+
+  return false;
+}
+
+/* Implements target hook dwarf_calling_convention.  Return an enum
+   of dwarf_calling_convention.  */
+int
+sh_dwarf_calling_convention (const_tree func)
+{
+  if (sh_attr_renesas_p (func))
+    return DW_CC_GNU_renesas_sh;
+
+  return DW_CC_normal;
+}
+
+static void
+sh_init_builtins (void)
+{
+  if (TARGET_SHMEDIA)
+    sh_media_init_builtins ();
+}
+
+/* Returns the sh builtin decl for CODE.  */
+
+static tree
+sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{        
+  if (TARGET_SHMEDIA)
+    return sh_media_builtin_decl (code, initialize_p);
+          
+  return error_mark_node;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d = &bdesc[fcode];
+  enum insn_code icode = d->icode;
+  int signature = d->signature;
+  enum machine_mode tmode = VOIDmode;
+  int nop = 0, i;
+  rtx op[4];
+  rtx pat = 0;
+
+  if (signature_args[signature][0])
+    {
+      if (ignore)
+	return 0;
+
+      tmode = insn_data[icode].operand[0].mode;
+      if (! target
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      op[nop++] = target;
+    }
+  else
+    target = 0;
+
+  for (i = 1; i <= 3; i++, nop++)
+    {
+      tree arg;
+      enum machine_mode opmode, argmode;
+      tree optype;
+
+      if (! signature_args[signature][i])
+	break;
+      arg = CALL_EXPR_ARG (exp, i - 1);
+      if (arg == error_mark_node)
+	return const0_rtx;
+      if (signature_args[signature][i] & 8)
+	{
+	  opmode = ptr_mode;
+	  optype = ptr_type_node;
+	}
+      else
+	{
+	  opmode = insn_data[icode].operand[nop].mode;
+	  optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
+	}
+      argmode = TYPE_MODE (TREE_TYPE (arg));
+      if (argmode != opmode)
+	arg = build1 (NOP_EXPR, optype, arg);
+      op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
+      if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
+	op[nop] = copy_to_mode_reg (opmode, op[nop]);
+    }
+
+  switch (nop)
+    {
+    case 1:
+      pat = (*insn_data[d->icode].genfun) (op[0]);
+      break;
+    case 2:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
+      break;
+    case 3:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
+      break;
+    case 4:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+void
+sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx sel0 = const0_rtx;
+  rtx sel1 = const1_rtx;
+  rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
+  rtx op = gen_rtx_fmt_e (code, SFmode, op1);
+
+  emit_insn ((*fn) (op0, op1, op, sel0, sel0));
+  emit_insn ((*fn) (op0, op1, op, sel1, sel1));
+}
+
+void
+sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
+{
+  rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
+
+  emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
+  emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
+}
+
+/* Return true if hard register REGNO can hold a value of machine-mode MODE.
+   We can allow any mode in any general register.  The special registers
+   only allow SImode.  Don't allow any mode in the PR.
+
+   We cannot hold DCmode values in the XD registers because alter_reg
+   handles subregs of them incorrectly.  We could work around this by
+   spacing the XD registers like the DR registers, but this would require
+   additional memory in every compilation to hold larger register vectors.
+   We could hold SFmode / SCmode values in XD registers, but that
+   would require a tertiary reload when reloading from / to memory,
+   and a secondary reload to reload from / to general regs; that
+   seems to be a loosing proposition.
+
+   We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
+   it won't be ferried through GP registers first.  */
+
+bool
+sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (SPECIAL_REGISTER_P (regno))
+    return mode == SImode;
+
+  if (regno == FPUL_REG)
+    return (mode == SImode || mode == SFmode);
+
+  if (FP_REGISTER_P (regno) && mode == SFmode)
+    return true;
+
+  if (mode == V2SFmode)
+    {
+      if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
+	   || GENERAL_REGISTER_P (regno)))
+	return true;
+      else
+	return false;
+    }
+
+  if (mode == V4SFmode)
+    {
+      if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
+	  || GENERAL_REGISTER_P (regno))
+	return true;
+      else
+	return false;
+    }
+
+  if (mode == V16SFmode)
+    {
+      if (TARGET_SHMEDIA)
+	{
+	  if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
+	    return true;
+	  else
+	    return false;
+	}
+      else
+	return regno == FIRST_XD_REG;
+    }
+
+  if (FP_REGISTER_P (regno))
+    {
+      if (mode == SFmode
+	  || mode == SImode
+	  || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
+	  || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
+	       || mode == DCmode
+	       || (TARGET_SHMEDIA
+		   && (mode == DFmode || mode == DImode
+		       || mode == V2SFmode || mode == TImode)))
+	      && ((regno - FIRST_FP_REG) & 1) == 0)
+	  || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
+	      && ((regno - FIRST_FP_REG) & 3) == 0))
+	return true;
+      else
+	return false;
+    }
+
+  if (XD_REGISTER_P (regno))
+    return mode == DFmode;
+
+  if (TARGET_REGISTER_P (regno))
+    return (mode == DImode || mode == SImode || mode == PDImode);
+
+  if (regno == PR_REG)
+    return mode == SImode;
+
+  if (regno == FPSCR_REG)
+    return mode == PSImode;
+
+  /* FIXME.  This works around PR target/37633 for -O0.  */
+  if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
+    {
+      unsigned int n = GET_MODE_SIZE (mode) / 8;
+
+      if (regno >= FIRST_GENERAL_REG + 10 - n + 1
+	  && regno <= FIRST_GENERAL_REG + 14)
+	return false;
+    }
+
+  return true;
+}
+
+/* Return the class of registers for which a mode change from FROM to TO
+   is invalid.  */
+bool
+sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			     enum reg_class rclass)
+{
+  /* We want to enable the use of SUBREGs as a means to
+     VEC_SELECT a single element of a vector.  */
+  if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
+    return (reg_classes_intersect_p (GENERAL_REGS, rclass));
+
+  if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
+    {
+      if (TARGET_LITTLE_ENDIAN)
+	{
+	  if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
+	    return reg_classes_intersect_p (DF_REGS, rclass);
+	}
+      else
+	{
+	  if (GET_MODE_SIZE (from) < 8)
+	    return reg_classes_intersect_p (DF_HI_REGS, rclass);
+	}
+    }
+  return 0;
+}
+
+/* Return true if registers in machine mode MODE will likely be
+   allocated to registers in small register classes.  */
+
+bool
+sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return (! TARGET_SHMEDIA);
+}
+
+/* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
+   that label is used.  */
+
+void
+sh_mark_label (rtx address, int nuses)
+{
+  if (GOTOFF_P (address))
+    {
+      /* Extract the label or symbol.  */
+      address = XEXP (address, 0);
+      if (GET_CODE (address) == PLUS)
+	address = XEXP (address, 0);
+      address = XVECEXP (address, 0, 0);
+    }
+  if (GET_CODE (address) == LABEL_REF
+      && LABEL_P (XEXP (address, 0)))
+    LABEL_NUSES (XEXP (address, 0)) += nuses;
+}
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+
+/* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
+   uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+
+static int
+sh_register_move_cost (enum machine_mode mode,
+		       reg_class_t srcclass, reg_class_t dstclass)
+{
+  if (dstclass == T_REGS || dstclass == PR_REGS)
+    return 10;
+
+  if (dstclass == MAC_REGS && srcclass == MAC_REGS)
+    return 4;
+
+  if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
+      && REGCLASS_HAS_FP_REG (srcclass)
+      && REGCLASS_HAS_FP_REG (dstclass))
+    return 4;
+
+  if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
+    return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
+
+  if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
+      || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
+    return 9;
+
+  if ((REGCLASS_HAS_FP_REG (dstclass)
+       && REGCLASS_HAS_GENERAL_REG (srcclass))
+      || (REGCLASS_HAS_GENERAL_REG (dstclass)
+	  && REGCLASS_HAS_FP_REG (srcclass)))
+    return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
+	    * ((GET_MODE_SIZE (mode) + 7) / 8U));
+
+  if ((dstclass == FPUL_REGS
+       && REGCLASS_HAS_GENERAL_REG (srcclass))
+      || (srcclass == FPUL_REGS
+	  && REGCLASS_HAS_GENERAL_REG (dstclass)))
+    return 5;
+
+  if ((dstclass == FPUL_REGS
+       && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
+      || (srcclass == FPUL_REGS
+	  && (dstclass == PR_REGS || dstclass == MAC_REGS)))
+    return 7;
+
+  if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
+      || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
+    return 20;
+
+  /* ??? ptabs faults on (value & 0x3) == 0x3  */
+  if (TARGET_SHMEDIA
+      && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
+    {
+      if (sh_gettrcost >= 0)
+	return sh_gettrcost;
+      else if (!TARGET_PT_FIXED)
+	return 100;
+    }
+
+  if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
+      || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
+  return 4;
+
+  if (TARGET_SHMEDIA
+      || (TARGET_FMOVD
+	  && ! REGCLASS_HAS_GENERAL_REG (srcclass)
+	  && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
+    return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
+
+  return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
+}
+
+static rtx emit_load_ptr (rtx, rtx);
+
+static rtx
+emit_load_ptr (rtx reg, rtx addr)
+{
+  rtx mem = gen_const_mem (ptr_mode, addr);
+
+  if (Pmode != ptr_mode)
+    mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
+  return emit_move_insn (reg, mem);
+}
+
+static void
+sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		    HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		    tree function)
+{
+  CUMULATIVE_ARGS cum;
+  int structure_value_byref = 0;
+  rtx this_rtx, this_value, sibcall, insns, funexp;
+  tree funtype = TREE_TYPE (function);
+  int simple_add = CONST_OK_FOR_ADD (delta);
+  int did_load = 0;
+  rtx scratch0, scratch1, scratch2;
+  unsigned i;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+  current_function_uses_only_leaf_regs = 1;
+
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  We have such a wide range of ABIs for the
+     SH that it's best to do this completely machine independently.
+     "this" is passed as first argument, unless a structure return pointer
+     comes first, in which case "this" comes second.  */
+  INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
+#ifndef PCC_STATIC_STRUCT_RETURN
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    structure_value_byref = 1;
+#endif /* not PCC_STATIC_STRUCT_RETURN */
+  if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
+    {
+      tree ptype = build_pointer_type (TREE_TYPE (funtype));
+
+      sh_function_arg_advance (&cum, Pmode, ptype, true);
+    }
+  this_rtx = sh_function_arg (&cum, Pmode, ptr_type_node, true);
+
+  /* For SHcompact, we only have r0 for a scratch register: r1 is the
+     static chain pointer (even if you can't have nested virtual functions
+     right now, someone might implement them sometime), and the rest of the
+     registers are used for argument passing, are callee-saved, or reserved.  */
+  /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
+     -ffixed-reg has been used.  */
+  if (! call_used_regs[0] || fixed_regs[0])
+    error ("r0 needs to be available as a call-clobbered register");
+  scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
+  if (! TARGET_SH5)
+    {
+      if (call_used_regs[1] && ! fixed_regs[1])
+	scratch1 = gen_rtx_REG (ptr_mode, 1);
+      /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
+	 pointing where to return struct values.  */
+      if (call_used_regs[3] && ! fixed_regs[3])
+	scratch2 = gen_rtx_REG (Pmode, 3);
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
+	if (i != REGNO (scratch0) &&
+	    call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
+	  {
+	    scratch1 = gen_rtx_REG (ptr_mode, i);
+	    break;
+	  }
+      if (scratch1 == scratch0)
+	error ("need a second call-clobbered general purpose register");
+      for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+	if (call_used_regs[i] && ! fixed_regs[i])
+	  {
+	    scratch2 = gen_rtx_REG (Pmode, i);
+	    break;
+	  }
+      if (scratch2 == scratch0)
+	error ("need a call-clobbered target register");
+    }
+
+  this_value = plus_constant (this_rtx, delta);
+  if (vcall_offset
+      && (simple_add || scratch0 != scratch1)
+      && strict_memory_address_p (ptr_mode, this_value))
+    {
+      emit_load_ptr (scratch0, this_value);
+      did_load = 1;
+    }
+
+  if (!delta)
+    ; /* Do nothing.  */
+  else if (simple_add)
+    emit_move_insn (this_rtx, this_value);
+  else
+    {
+      emit_move_insn (scratch1, GEN_INT (delta));
+      emit_insn (gen_add2_insn (this_rtx, scratch1));
+    }
+
+  if (vcall_offset)
+    {
+      rtx offset_addr;
+
+      if (!did_load)
+	emit_load_ptr (scratch0, this_rtx);
+
+      offset_addr = plus_constant (scratch0, vcall_offset);
+      if (strict_memory_address_p (ptr_mode, offset_addr))
+	; /* Do nothing.  */
+      else if (! TARGET_SH5 && scratch0 != scratch1)
+	{
+	  /* scratch0 != scratch1, and we have indexed loads.  Get better
+	     schedule by loading the offset into r1 and using an indexed
+	     load - then the load of r1 can issue before the load from
+             (this_rtx + delta) finishes.  */
+	  emit_move_insn (scratch1, GEN_INT (vcall_offset));
+	  offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
+	}
+      else if (CONST_OK_FOR_ADD (vcall_offset))
+	{
+	  emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
+	  offset_addr = scratch0;
+	}
+      else if (scratch0 != scratch1)
+	{
+	  emit_move_insn (scratch1, GEN_INT (vcall_offset));
+	  emit_insn (gen_add2_insn (scratch0, scratch1));
+	  offset_addr = scratch0;
+	}
+      else
+	gcc_unreachable (); /* FIXME */
+      emit_load_ptr (scratch0, offset_addr);
+
+      if (Pmode != ptr_mode)
+	scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
+      emit_insn (gen_add2_insn (this_rtx, scratch0));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  /* If the function is overridden, so is the thunk, hence we don't
+     need GOT addressing even if this is a public symbol.  */
+#if 0
+  if (TARGET_SH1 && ! flag_weak)
+    sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
+  else
+#endif
+  if (TARGET_SH2 && flag_pic)
+    {
+      sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+      XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+    }
+  else
+    {
+      if (TARGET_SHMEDIA && flag_pic)
+	{
+	  funexp = gen_sym2PIC (funexp);
+	  PUT_MODE (funexp, Pmode);
+	}
+      emit_move_insn (scratch2, funexp);
+      funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
+      sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
+    }
+  sibcall = emit_call_insn (sibcall);
+  SIBLING_CALL_P (sibcall) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to do scheduling and get
+     the insns emitted.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+
+  insn_locators_alloc ();
+  insns = get_insns ();
+
+  if (optimize > 0)
+    {
+      if (! cfun->cfg)
+	init_flow (cfun);
+      split_all_insns_noflow ();
+    }
+
+  sh_reorg ();
+
+  if (optimize > 0 && flag_delayed_branch)
+    dbr_schedule (insns);
+
+  shorten_branches (insns);
+  final_start_function (insns, file, 1);
+  final (insns, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+rtx
+function_symbol (rtx target, const char *name, enum sh_function_kind kind)
+{
+  rtx sym;
+
+  /* If this is not an ordinary function, the name usually comes from a
+     string literal or an sprintf buffer.  Make sure we use the same
+     string consistently, so that cse will be able to unify address loads.  */
+  if (kind != FUNCTION_ORDINARY)
+    name = IDENTIFIER_POINTER (get_identifier (name));
+  sym = gen_rtx_SYMBOL_REF (Pmode, name);
+  SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
+  if (flag_pic)
+    switch (kind)
+      {
+      case FUNCTION_ORDINARY:
+	break;
+      case SFUNC_GOT:
+	{
+	  rtx reg = target ? target : gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOT2reg (reg, sym));
+	  sym = reg;
+	  break;
+	}
+      case SFUNC_STATIC:
+	{
+	  /* ??? To allow cse to work, we use GOTOFF relocations.
+	     we could add combiner patterns to transform this into
+	     straight pc-relative calls with sym2PIC / bsrf when
+	     label load and function call are still 1:1 and in the
+	     same basic block during combine.  */
+	  rtx reg = target ? target : gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOTOFF2reg (reg, sym));
+	  sym = reg;
+	  break;
+	}
+      }
+  if (target && sym != target)
+    {
+      emit_move_insn (target, sym);
+      return target;
+    }
+  return sym;
+}
+
+/* Find the number of a general purpose register in S.  */
+static int
+scavenge_reg (HARD_REG_SET *s)
+{
+  int r;
+  for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
+    if (TEST_HARD_REG_BIT (*s, r))
+      return r;
+  return -1;
+}
+
+rtx
+sh_get_pr_initial_val (void)
+{
+  rtx val;
+
+  /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
+     PR register on SHcompact, because it might be clobbered by the prologue.
+     We check first if that is known to be the case.  */
+  if (TARGET_SHCOMPACT
+      && ((crtl->args.info.call_cookie
+	   & ~ CALL_COOKIE_RET_TRAMP (1))
+	  || crtl->saves_all_registers))
+    return gen_frame_mem (SImode, return_address_pointer_rtx);
+
+  /* If we haven't finished rtl generation, there might be a nonlocal label
+     that we haven't seen yet.
+     ??? get_hard_reg_initial_val fails if it is called after register
+     allocation has started, unless it has been called before for the
+     same register.  And even then, we end in trouble if we didn't use
+     the register in the same basic block before.  So call
+     get_hard_reg_initial_val now and wrap it in an unspec if we might
+     need to replace it.  */
+  /* ??? We also must do this for TARGET_SH1 in general, because otherwise
+     combine can put the pseudo returned by get_hard_reg_initial_val into
+     instructions that need a general purpose registers, which will fail to
+     be recognized when the pseudo becomes allocated to PR.  */
+  val
+    = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
+  if (TARGET_SH1)
+    return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
+  return val;
+}
+
+int
+sh_expand_t_scc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx target = operands[0];
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx result = target;
+  HOST_WIDE_INT val;
+
+  if (!REG_P (op0) || REGNO (op0) != T_REG
+      || !CONST_INT_P (op1))
+    return 0;
+  if (!REG_P (result))
+    result = gen_reg_rtx (SImode);
+  val = INTVAL (op1);
+  if ((code == EQ && val == 1) || (code == NE && val == 0))
+    emit_insn (gen_movt (result));
+  else if (TARGET_SH2A && ((code == EQ && val == 0)
+			    || (code == NE && val == 1)))
+    emit_insn (gen_xorsi3_movrt (result));
+  else if ((code == EQ && val == 0) || (code == NE && val == 1))
+    {
+      emit_clobber (result);
+      emit_insn (gen_subc (result, result, result));
+      emit_insn (gen_addsi3 (result, result, const1_rtx));
+    }
+  else if (code == EQ || code == NE)
+    emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
+  else
+    return 0;
+  if (result != target)
+    emit_move_insn (target, result);
+  return 1;
+}
+
+/* INSN is an sfunc; return the rtx that describes the address used.  */
+static rtx
+extract_sfunc_addr (rtx insn)
+{
+  rtx pattern, part = NULL_RTX;
+  int len, i;
+
+  pattern = PATTERN (insn);
+  len = XVECLEN (pattern, 0);
+  for (i = 0; i < len; i++)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
+	  && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
+	return XEXP (part, 0);
+    }
+  gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
+  return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
+}
+
+/* Verify that the register in use_sfunc_addr still agrees with the address
+   used in the sfunc.  This prevents fill_slots_from_thread from changing
+   use_sfunc_addr.
+   INSN is the use_sfunc_addr instruction, and REG is the register it
+   guards.  */
+int
+check_use_sfunc_addr (rtx insn, rtx reg)
+{
+  /* Search for the sfunc.  It should really come right after INSN.  */
+  while ((insn = NEXT_INSN (insn)))
+    {
+      if (LABEL_P (insn) || JUMP_P (insn))
+	break;
+      if (! INSN_P (insn))
+	continue;
+
+      if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+	insn = XVECEXP (PATTERN (insn), 0, 0);
+      if (GET_CODE (PATTERN (insn)) != PARALLEL
+	  || get_attr_type (insn) != TYPE_SFUNC)
+	continue;
+      return rtx_equal_p (extract_sfunc_addr (insn), reg);
+    }
+  gcc_unreachable ();
+}
+
+/* This function returns a constant rtx that represents pi / 2**15 in
+   SFmode.  it's used to scale SFmode angles, in radians, to a
+   fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
+   maps to 0x10000).  */
+
+static GTY(()) rtx sh_fsca_sf2int_rtx;
+
+rtx
+sh_fsca_sf2int (void)
+{
+  if (! sh_fsca_sf2int_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "10430.378350470453");
+      sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
+    }
+
+  return sh_fsca_sf2int_rtx;
+}
+
+/* This function returns a constant rtx that represents pi / 2**15 in
+   DFmode.  it's used to scale DFmode angles, in radians, to a
+   fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
+   maps to 0x10000).  */
+
+static GTY(()) rtx sh_fsca_df2int_rtx;
+
+rtx
+sh_fsca_df2int (void)
+{
+  if (! sh_fsca_df2int_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "10430.378350470453");
+      sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
+    }
+
+  return sh_fsca_df2int_rtx;
+}
+
+/* This function returns a constant rtx that represents 2**15 / pi in
+   SFmode.  it's used to scale a fixed-point signed 16.16-bit fraction
+   of a full circle back to a SFmode value, i.e., 0x10000 maps to
+   2*pi).  */
+
+static GTY(()) rtx sh_fsca_int2sf_rtx;
+
+rtx
+sh_fsca_int2sf (void)
+{
+  if (! sh_fsca_int2sf_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "9.587379924285257e-5");
+      sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
+    }
+
+  return sh_fsca_int2sf_rtx;
+}
+
+/* Initialize the CUMULATIVE_ARGS structure.  */
+
+void
+sh_init_cumulative_args (CUMULATIVE_ARGS *  pcum,
+			 tree               fntype,
+			 rtx		    libname ATTRIBUTE_UNUSED,
+			 tree               fndecl,
+			 signed int         n_named_args,
+			 enum machine_mode  mode)
+{
+  pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
+  pcum->free_single_fp_reg = 0;
+  pcum->stack_regs = 0;
+  pcum->byref_regs = 0;
+  pcum->byref = 0;
+  pcum->outgoing = (n_named_args == -1) ? 0 : 1;
+
+  /* XXX - Should we check TARGET_HITACHI here ???  */
+  pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
+
+  if (fntype)
+    {
+      pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
+			 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
+      pcum->prototype_p = prototype_p (fntype);
+      pcum->arg_count [(int) SH_ARG_INT]
+	= TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
+
+      pcum->call_cookie
+	= CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
+				 && pcum->arg_count [(int) SH_ARG_INT] == 0
+				 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
+				     ? int_size_in_bytes (TREE_TYPE (fntype))
+				     : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
+				 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
+				     == FIRST_RET_REG));
+    }
+  else
+    {
+      pcum->arg_count [(int) SH_ARG_INT] = 0;
+      pcum->prototype_p = FALSE;
+      if (mode != VOIDmode)
+	{
+	  pcum->call_cookie =
+	    CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
+				   && GET_MODE_SIZE (mode) > 4
+				   && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
+
+	  /* If the default ABI is the Renesas ABI then all library
+	     calls must assume that the library will be using the
+	     Renesas ABI.  So if the function would return its result
+	     in memory then we must force the address of this memory
+	     block onto the stack.  Ideally we would like to call
+	     targetm.calls.return_in_memory() here but we do not have
+	     the TYPE or the FNDECL available so we synthesize the
+	     contents of that function as best we can.  */
+	  pcum->force_mem =
+	    (TARGET_DEFAULT & MASK_HITACHI)
+	    && (mode == BLKmode
+		|| (GET_MODE_SIZE (mode) > 4
+		    && !(mode == DFmode
+			 && TARGET_FPU_DOUBLE)));
+	}
+      else
+	{
+	  pcum->call_cookie = 0;
+	  pcum->force_mem = FALSE;
+	}
+    }
+}
+
+/* Replace any occurrence of FROM(n) in X with TO(n).  The function does
+   not enter into CONST_DOUBLE for the replace.
+
+   Note that copying is not done so X must not be shared unless all copies
+   are to be modified.
+
+   This is like replace_rtx, except that we operate on N_REPLACEMENTS
+   replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
+   replacements[n*2+1] - and that we take mode changes into account.
+
+   If a replacement is ambiguous, return NULL_RTX.
+
+   If MODIFY is zero, don't modify any rtl in place,
+   just return zero or nonzero for failure / success.  */
+
+rtx
+replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
+{
+  int i, j;
+  const char *fmt;
+
+  /* The following prevents loops occurrence when we change MEM in
+     CONST_DOUBLE onto the same CONST_DOUBLE.  */
+  if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
+    return x;
+
+  for (i = n_replacements - 1; i >= 0 ; i--)
+  if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
+    return replacements[i*2+1];
+
+  /* Allow this function to make replacements in EXPR_LISTs.  */
+  if (x == 0)
+    return 0;
+
+  if (GET_CODE (x) == SUBREG)
+    {
+      rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
+				    n_replacements, modify);
+
+      if (CONST_INT_P (new_rtx))
+	{
+	  x = simplify_subreg (GET_MODE (x), new_rtx,
+			       GET_MODE (SUBREG_REG (x)),
+			       SUBREG_BYTE (x));
+	  if (! x)
+	    abort ();
+	}
+      else if (modify)
+	SUBREG_REG (x) = new_rtx;
+
+      return x;
+    }
+  else if (REG_P (x))
+    {
+      unsigned regno = REGNO (x);
+      unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
+			? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
+      rtx result = NULL_RTX;
+
+      for (i = n_replacements - 1; i >= 0; i--)
+	{
+	  rtx from = replacements[i*2];
+	  rtx to = replacements[i*2+1];
+	  unsigned from_regno, from_nregs, to_regno, new_regno;
+
+	  if (!REG_P (from))
+	    continue;
+	  from_regno = REGNO (from);
+	  from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
+			? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
+	  if (regno < from_regno + from_nregs && regno + nregs > from_regno)
+	    {
+	      if (regno < from_regno
+		  || regno + nregs > from_regno + nregs
+		  || !REG_P (to)
+		  || result)
+		return NULL_RTX;
+	      to_regno = REGNO (to);
+	      if (to_regno < FIRST_PSEUDO_REGISTER)
+		{
+		  new_regno = regno + to_regno - from_regno;
+		  if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
+		      != nregs)
+		    return NULL_RTX;
+		  result = gen_rtx_REG (GET_MODE (x), new_regno);
+		}
+	      else if (GET_MODE (x) <= GET_MODE (to))
+		result = gen_lowpart_common (GET_MODE (x), to);
+	      else
+		result = gen_lowpart_SUBREG (GET_MODE (x), to);
+	    }
+	}
+      return result ? result : x;
+    }
+  else if (GET_CODE (x) == ZERO_EXTEND)
+    {
+      rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
+				    n_replacements, modify);
+
+      if (CONST_INT_P (new_rtx))
+	{
+	  x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
+					new_rtx, GET_MODE (XEXP (x, 0)));
+	  if (! x)
+	    abort ();
+	}
+      else if (modify)
+	XEXP (x, 0) = new_rtx;
+
+      return x;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      rtx new_rtx;
+
+      if (fmt[i] == 'e')
+	{
+	  new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
+				    n_replacements, modify);
+	  if (!new_rtx)
+	    return NULL_RTX;
+	  if (modify)
+	    XEXP (x, i) = new_rtx;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  {
+	    new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
+				      n_replacements, modify);
+	  if (!new_rtx)
+	    return NULL_RTX;
+	    if (modify)
+	      XVECEXP (x, i, j) = new_rtx;
+	  }
+    }
+
+  return x;
+}
+
+rtx
+sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
+{
+  enum rtx_code code = TRUNCATE;
+
+  if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
+    {
+      rtx inner = XEXP (x, 0);
+      enum machine_mode inner_mode = GET_MODE (inner);
+
+      if (inner_mode == mode)
+	return inner;
+      else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
+	x = inner;
+      else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
+	       && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
+	{
+	  code = GET_CODE (x);
+	  x = inner;
+	}
+    }
+  return gen_rtx_fmt_e (code, mode, x);
+}
+
+/* called via for_each_rtx after reload, to clean up truncates of
+   registers that span multiple actual hard registers.  */
+int
+shmedia_cleanup_truncate (rtx *p, void *n_changes)
+{
+  rtx x = *p, reg;
+
+  if (GET_CODE (x) != TRUNCATE)
+    return 0;
+  reg = XEXP (x, 0);
+  if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
+    {
+      enum machine_mode reg_mode = GET_MODE (reg);
+      XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
+				     subreg_lowpart_offset (DImode, reg_mode));
+      *(int*) n_changes += 1;
+      return -1;
+    }
+  return 0;
+}
+
+/* Load and store depend on the highpart of the address.  However,
+   set_attr_alternative does not give well-defined results before reload,
+   so we must look at the rtl ourselves to see if any of the feeding
+   registers is used in a memref.  */
+
+/* Called by sh_contains_memref_p via for_each_rtx.  */
+static int
+sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  return (MEM_P (*loc));
+}
+
+/* Return nonzero iff INSN contains a MEM.  */
+int
+sh_contains_memref_p (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
+}
+
+/* Return nonzero iff INSN loads a banked register.  */
+int
+sh_loads_bankedreg_p (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx op = SET_DEST (PATTERN(insn));
+      if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
+	return 1;
+    }
+
+  return 0;  
+}
+
+/* FNADDR is the MEM expression from a call expander.  Return an address
+   to use in an SHmedia insn pattern.  */
+rtx
+shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
+{
+  int is_sym;
+
+  fnaddr = XEXP (fnaddr, 0);
+  is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
+  if (flag_pic && is_sym)
+    {
+      if (! SYMBOL_REF_LOCAL_P (fnaddr))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+
+	  /* We must not use GOTPLT for sibcalls, because PIC_REG
+	     must be restored before the PLT code gets to run.  */
+	  if (is_sibcall)
+	    emit_insn (gen_symGOT2reg (reg, fnaddr));
+	  else
+	    emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
+	  fnaddr = reg;
+	}
+      else
+	{
+	  fnaddr = gen_sym2PIC (fnaddr);
+	  PUT_MODE (fnaddr, Pmode);
+	}
+    }
+  /* If ptabs might trap, make this visible to the rest of the compiler.
+     We generally assume that symbols pertain to valid locations, but
+     it is possible to generate invalid symbols with asm or linker tricks.
+     In a list of functions where each returns its successor, an invalid
+     symbol might denote an empty list.  */
+  if (!TARGET_PT_FIXED
+      && (!is_sym || TARGET_INVALID_SYMBOLS)
+      && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
+    {
+      rtx tr = gen_reg_rtx (PDImode);
+
+      emit_insn (gen_ptabs (tr, fnaddr));
+      fnaddr = tr;
+    }
+  else if (! target_reg_operand (fnaddr, Pmode))
+    fnaddr = copy_to_mode_reg (Pmode, fnaddr);
+  return fnaddr;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+sh_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == NO_REGS
+      && TARGET_SHMEDIA
+      && (CONST_DOUBLE_P (x)
+	  || GET_CODE (x) == SYMBOL_REF
+	  || PIC_ADDR_P (x)))
+    return GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		     enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  if (in_p)
+    {
+      if (REGCLASS_HAS_FP_REG (rclass)
+	  && ! TARGET_SHMEDIA
+	  && immediate_operand ((x), mode)
+	  && ! ((fp_zero_operand (x) || fp_one_operand (x))
+		&& mode == SFmode && fldi_ok ()))
+	switch (mode)
+	  {
+	  case SFmode:
+	    sri->icode = CODE_FOR_reload_insf__frn;
+	    return NO_REGS;
+	  case DFmode:
+	    sri->icode = CODE_FOR_reload_indf__frn;
+	    return NO_REGS;
+	  case SImode:
+	    /* ??? If we knew that we are in the appropriate mode -
+	       single precision - we could use a reload pattern directly.  */
+	    return FPUL_REGS;
+	  default:
+	    abort ();
+	  }
+      if (rclass == FPUL_REGS
+          && ((REG_P (x)
+               && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
+                   || REGNO (x) == T_REG))
+              || GET_CODE (x) == PLUS))
+        return GENERAL_REGS;
+      if (rclass == FPUL_REGS && immediate_operand (x, mode))
+	{
+	  if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
+	    return GENERAL_REGS;
+	  else if (mode == SFmode)
+	    return FP_REGS;
+	  sri->icode = CODE_FOR_reload_insi__i_fpul;
+	  return NO_REGS;
+	}
+      if (rclass == FPSCR_REGS
+          && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
+              || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
+        return GENERAL_REGS;
+      if (REGCLASS_HAS_FP_REG (rclass)
+          && TARGET_SHMEDIA
+          && immediate_operand (x, mode)
+          && x != CONST0_RTX (GET_MODE (x))
+          && GET_MODE (x) != V4SFmode)
+        return GENERAL_REGS;
+      if ((mode == QImode || mode == HImode)
+          && TARGET_SHMEDIA && inqhi_operand (x, mode))
+	{
+	  sri->icode = ((mode == QImode)
+			? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
+	  return NO_REGS;
+	}
+      if (TARGET_SHMEDIA && rclass == GENERAL_REGS
+          && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
+        return TARGET_REGS;
+    } /* end of input-only processing.  */
+
+  if (((REGCLASS_HAS_FP_REG (rclass)
+	&& (REG_P (x)
+	    && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
+		|| (FP_REGISTER_P (REGNO (x)) && mode == SImode
+		    && TARGET_FMOVD))))
+       || (REGCLASS_HAS_GENERAL_REG (rclass)
+	   && REG_P (x)
+	   && FP_REGISTER_P (REGNO (x))))
+      && ! TARGET_SHMEDIA
+      && (mode == SFmode || mode == SImode))
+    return FPUL_REGS;
+  if ((rclass == FPUL_REGS
+       || (REGCLASS_HAS_FP_REG (rclass)
+           && ! TARGET_SHMEDIA && mode == SImode))
+      && (MEM_P (x)
+          || (REG_P (x)
+              && (REGNO (x) >= FIRST_PSEUDO_REGISTER
+                  || REGNO (x) == T_REG
+                  || system_reg_operand (x, VOIDmode)))))
+    {
+      if (rclass == FPUL_REGS)
+	return GENERAL_REGS;
+      return FPUL_REGS;
+    }
+  if ((rclass == TARGET_REGS
+       || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
+      && !satisfies_constraint_Csy (x)
+      && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
+    return GENERAL_REGS;
+  if ((rclass == MAC_REGS || rclass == PR_REGS)
+      && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
+      && rclass != REGNO_REG_CLASS (REGNO (x)))
+    return GENERAL_REGS;
+  if (rclass != GENERAL_REGS && REG_P (x)
+      && TARGET_REGISTER_P (REGNO (x)))
+    return GENERAL_REGS;
+  return NO_REGS;
+}
+
+static void
+sh_conditional_register_usage (void)
+{
+  int regno;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
+    if (! VALID_REGISTER_P (regno))
+      fixed_regs[regno] = call_used_regs[regno] = 1;
+  /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.  */
+  if (TARGET_SH5)
+    {
+      call_used_regs[FIRST_GENERAL_REG + 8]
+	= call_used_regs[FIRST_GENERAL_REG + 9] = 1;
+      call_really_used_regs[FIRST_GENERAL_REG + 8]
+	= call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
+    }
+  if (TARGET_SHMEDIA)
+    {
+      regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
+      CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
+      regno_reg_class[FIRST_FP_REG] = FP_REGS;
+    }
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  /* Renesas saves and restores mac registers on call.  */
+  if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
+    {
+      call_really_used_regs[MACH_REG] = 0;
+      call_really_used_regs[MACL_REG] = 0;
+    }
+  for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
+       regno <= LAST_FP_REG; regno += 2)
+    SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
+  if (TARGET_SHMEDIA)
+    {
+      for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
+	if (! fixed_regs[regno] && call_really_used_regs[regno])
+	  SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+    }
+  else
+    for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
+      if (! fixed_regs[regno] && call_really_used_regs[regno])
+	SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+}
+
+
+enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
+
+#include "gt-sh.h"
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
new file mode 100644
index 000000000..4579af327
--- /dev/null
+++ b/gcc/config/sh/sh.h
@@ -0,0 +1,2511 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+   2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SH_H
+#define GCC_SH_H
+
+#include "config/vxworks-dummy.h"
+
+#define TARGET_VERSION \
+  fputs (" (Hitachi SH)", stderr);
+
+/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h.  We can't
+   include it here, because bconfig.h is also included by gencodes.c .  */
+/* ??? No longer true.  */
+extern int code_for_indirect_jump_scratch;
+
+#define TARGET_CPU_CPP_BUILTINS() \
+do { \
+  builtin_define ("__sh__"); \
+  builtin_assert ("cpu=sh"); \
+  builtin_assert ("machine=sh"); \
+  switch ((int) sh_cpu) \
+    { \
+    case PROCESSOR_SH1: \
+      builtin_define ("__sh1__"); \
+      break; \
+    case PROCESSOR_SH2: \
+      builtin_define ("__sh2__"); \
+      break; \
+    case PROCESSOR_SH2E: \
+      builtin_define ("__SH2E__"); \
+      break; \
+    case PROCESSOR_SH2A: \
+      builtin_define ("__SH2A__"); \
+      builtin_define (TARGET_SH2A_DOUBLE \
+		      ? (TARGET_FPU_SINGLE ? "__SH2A_SINGLE__" : "__SH2A_DOUBLE__") \
+		      : TARGET_FPU_ANY ? "__SH2A_SINGLE_ONLY__" \
+		      : "__SH2A_NOFPU__"); \
+      break; \
+    case PROCESSOR_SH3: \
+      builtin_define ("__sh3__"); \
+      builtin_define ("__SH3__"); \
+      if (TARGET_HARD_SH4) \
+	builtin_define ("__SH4_NOFPU__"); \
+      break; \
+    case PROCESSOR_SH3E: \
+      builtin_define (TARGET_HARD_SH4 ? "__SH4_SINGLE_ONLY__" : "__SH3E__"); \
+      break; \
+    case PROCESSOR_SH4: \
+      builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__"); \
+      break; \
+    case PROCESSOR_SH4A: \
+      builtin_define ("__SH4A__"); \
+      builtin_define (TARGET_SH4 \
+		      ? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__") \
+		      : TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__" \
+		      : "__SH4_NOFPU__"); \
+      break; \
+    case PROCESSOR_SH5: \
+      { \
+	builtin_define_with_value ("__SH5__", \
+				   TARGET_SHMEDIA64 ? "64" : "32", 0); \
+	builtin_define_with_value ("__SHMEDIA__", \
+				   TARGET_SHMEDIA ? "1" : "0", 0); \
+	if (! TARGET_FPU_DOUBLE) \
+	  builtin_define ("__SH4_NOFPU__"); \
+      } \
+    } \
+  if (TARGET_FPU_ANY) \
+    builtin_define ("__SH_FPU_ANY__"); \
+  if (TARGET_FPU_DOUBLE) \
+    builtin_define ("__SH_FPU_DOUBLE__"); \
+  if (TARGET_HITACHI) \
+    builtin_define ("__HITACHI__"); \
+  if (TARGET_FMOVD) \
+    builtin_define ("__FMOVD_ENABLED__"); \
+  builtin_define (TARGET_LITTLE_ENDIAN \
+		  ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__"); \
+} while (0)
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may be accessed
+   via the stack pointer) in functions that seem suitable.  */
+
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+
+/* Nonzero if this is an ELF target - compile time only */
+#define TARGET_ELF 0
+
+/* Nonzero if we should generate code using type 2E insns.  */
+#define TARGET_SH2E (TARGET_SH2 && TARGET_SH_E)
+
+/* Nonzero if we should generate code using type 2A insns.  */
+#define TARGET_SH2A TARGET_HARD_SH2A
+/* Nonzero if we should generate code using type 2A SF insns.  */
+#define TARGET_SH2A_SINGLE (TARGET_SH2A && TARGET_SH2E)
+/* Nonzero if we should generate code using type 2A DF insns.  */
+#define TARGET_SH2A_DOUBLE (TARGET_HARD_SH2A_DOUBLE && TARGET_SH2A)
+
+/* Nonzero if we should generate code using type 3E insns.  */
+#define TARGET_SH3E (TARGET_SH3 && TARGET_SH_E)
+
+/* Nonzero if the cache line size is 32.  */
+#define TARGET_CACHE32 (TARGET_HARD_SH4 || TARGET_SH5)
+
+/* Nonzero if we schedule for a superscalar implementation.  */
+#define TARGET_SUPERSCALAR TARGET_HARD_SH4
+
+/* Nonzero if the target has separate instruction and data caches.  */
+#define TARGET_HARVARD (TARGET_HARD_SH4 || TARGET_SH5)
+
+/* Nonzero if a double-precision FPU is available.  */
+#define TARGET_FPU_DOUBLE \
+  ((target_flags & MASK_SH4) != 0 || TARGET_SH2A_DOUBLE)
+
+/* Nonzero if an FPU is available.  */
+#define TARGET_FPU_ANY (TARGET_SH2E || TARGET_FPU_DOUBLE)
+
+/* Nonzero if we should generate code using type 4 insns.  */
+#undef TARGET_SH4
+#define TARGET_SH4 ((target_flags & MASK_SH4) != 0 && TARGET_SH1)
+
+/* Nonzero if we're generating code for the common subset of
+   instructions present on both SH4a and SH4al-dsp.  */
+#define TARGET_SH4A_ARCH TARGET_SH4A
+
+/* Nonzero if we're generating code for SH4a, unless the use of the
+   FPU is disabled (which makes it compatible with SH4al-dsp).  */
+#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY)
+
+/* Nonzero if we should generate code using the SHcompact instruction
+   set and 32-bit ABI.  */
+#define TARGET_SHCOMPACT (TARGET_SH5 && TARGET_SH1)
+
+/* Nonzero if we should generate code using the SHmedia instruction
+   set and ABI.  */
+#define TARGET_SHMEDIA (TARGET_SH5 && ! TARGET_SH1)
+
+/* Nonzero if we should generate code using the SHmedia ISA and 32-bit
+   ABI.  */
+#define TARGET_SHMEDIA32 (TARGET_SH5 && ! TARGET_SH1 && TARGET_SH_E)
+
+/* Nonzero if we should generate code using the SHmedia ISA and 64-bit
+   ABI.  */
+#define TARGET_SHMEDIA64 (TARGET_SH5 && ! TARGET_SH1 && ! TARGET_SH_E)
+
+/* Nonzero if we should generate code using SHmedia FPU instructions.  */
+#define TARGET_SHMEDIA_FPU (TARGET_SHMEDIA && TARGET_FPU_DOUBLE)
+
+/* This is not used by the SH2E calling convention  */
+#define TARGET_VARARGS_PRETEND_ARGS(FUN_DECL) \
+  (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 \
+   && ! (TARGET_HITACHI || sh_attr_renesas_p (FUN_DECL)))
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT SELECT_SH1
+#define SUPPORT_SH1 1
+#define SUPPORT_SH2E 1
+#define SUPPORT_SH4 1
+#define SUPPORT_SH4_SINGLE 1
+#define SUPPORT_SH2A 1
+#define SUPPORT_SH2A_SINGLE 1
+#endif
+
+#define TARGET_DIVIDE_INV \
+  (sh_div_strategy == SH_DIV_INV || sh_div_strategy == SH_DIV_INV_MINLAT \
+   || sh_div_strategy == SH_DIV_INV20U || sh_div_strategy == SH_DIV_INV20L \
+   || sh_div_strategy == SH_DIV_INV_CALL \
+   || sh_div_strategy == SH_DIV_INV_CALL2 || sh_div_strategy == SH_DIV_INV_FP)
+#define TARGET_DIVIDE_FP (sh_div_strategy == SH_DIV_FP)
+#define TARGET_DIVIDE_INV_FP (sh_div_strategy == SH_DIV_INV_FP)
+#define TARGET_DIVIDE_CALL2 (sh_div_strategy == SH_DIV_CALL2)
+#define TARGET_DIVIDE_INV_MINLAT (sh_div_strategy == SH_DIV_INV_MINLAT)
+#define TARGET_DIVIDE_INV20U (sh_div_strategy == SH_DIV_INV20U)
+#define TARGET_DIVIDE_INV20L (sh_div_strategy == SH_DIV_INV20L)
+#define TARGET_DIVIDE_INV_CALL (sh_div_strategy == SH_DIV_INV_CALL)
+#define TARGET_DIVIDE_INV_CALL2 (sh_div_strategy == SH_DIV_INV_CALL2)
+#define TARGET_DIVIDE_CALL_DIV1 (sh_div_strategy == SH_DIV_CALL_DIV1)
+#define TARGET_DIVIDE_CALL_FP (sh_div_strategy == SH_DIV_CALL_FP)
+#define TARGET_DIVIDE_CALL_TABLE (sh_div_strategy == SH_DIV_CALL_TABLE)
+
+#define SELECT_SH1               (MASK_SH1)
+#define SELECT_SH2               (MASK_SH2 | SELECT_SH1)
+#define SELECT_SH2E              (MASK_SH_E | MASK_SH2 | MASK_SH1 \
+				  | MASK_FPU_SINGLE)
+#define SELECT_SH2A              (MASK_SH_E | MASK_HARD_SH2A \
+				  | MASK_HARD_SH2A_DOUBLE \
+				  | MASK_SH2 | MASK_SH1)
+#define SELECT_SH2A_NOFPU        (MASK_HARD_SH2A | MASK_SH2 | MASK_SH1)
+#define SELECT_SH2A_SINGLE_ONLY  (MASK_SH_E | MASK_HARD_SH2A | MASK_SH2 \
+				  | MASK_SH1 | MASK_FPU_SINGLE)
+#define SELECT_SH2A_SINGLE       (MASK_SH_E | MASK_HARD_SH2A \
+				  | MASK_FPU_SINGLE | MASK_HARD_SH2A_DOUBLE \
+				  | MASK_SH2 | MASK_SH1)
+#define SELECT_SH3               (MASK_SH3 | SELECT_SH2)
+#define SELECT_SH3E              (MASK_SH_E | MASK_FPU_SINGLE | SELECT_SH3)
+#define SELECT_SH4_NOFPU         (MASK_HARD_SH4 | SELECT_SH3)
+#define SELECT_SH4_SINGLE_ONLY   (MASK_HARD_SH4 | SELECT_SH3E)
+#define SELECT_SH4               (MASK_SH4 | MASK_SH_E | MASK_HARD_SH4 \
+				  | SELECT_SH3)
+#define SELECT_SH4_SINGLE        (MASK_FPU_SINGLE | SELECT_SH4)
+#define SELECT_SH4A_NOFPU        (MASK_SH4A | SELECT_SH4_NOFPU)
+#define SELECT_SH4A_SINGLE_ONLY  (MASK_SH4A | SELECT_SH4_SINGLE_ONLY)
+#define SELECT_SH4A              (MASK_SH4A | SELECT_SH4)
+#define SELECT_SH4A_SINGLE       (MASK_SH4A | SELECT_SH4_SINGLE)
+#define SELECT_SH5_64MEDIA       (MASK_SH5 | MASK_SH4)
+#define SELECT_SH5_64MEDIA_NOFPU (MASK_SH5)
+#define SELECT_SH5_32MEDIA       (MASK_SH5 | MASK_SH4 | MASK_SH_E)
+#define SELECT_SH5_32MEDIA_NOFPU (MASK_SH5 | MASK_SH_E)
+#define SELECT_SH5_COMPACT       (MASK_SH5 | MASK_SH4 | SELECT_SH3E)
+#define SELECT_SH5_COMPACT_NOFPU (MASK_SH5 | SELECT_SH3)
+
+#if SUPPORT_SH1
+#define SUPPORT_SH2 1
+#endif
+#if SUPPORT_SH2
+#define SUPPORT_SH3 1
+#define SUPPORT_SH2A_NOFPU 1
+#endif
+#if SUPPORT_SH3
+#define SUPPORT_SH4_NOFPU 1
+#endif
+#if SUPPORT_SH4_NOFPU
+#define SUPPORT_SH4A_NOFPU 1
+#define SUPPORT_SH4AL 1
+#endif
+
+#if SUPPORT_SH2E
+#define SUPPORT_SH3E 1
+#define SUPPORT_SH2A_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH3E
+#define SUPPORT_SH4_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH4_SINGLE_ONLY
+#define SUPPORT_SH4A_SINGLE_ONLY 1
+#endif
+
+#if SUPPORT_SH4
+#define SUPPORT_SH4A 1
+#endif
+
+#if SUPPORT_SH4_SINGLE
+#define SUPPORT_SH4A_SINGLE 1
+#endif
+
+#if SUPPORT_SH5_COMPAT
+#define SUPPORT_SH5_32MEDIA 1
+#endif
+
+#if SUPPORT_SH5_COMPACT_NOFPU
+#define SUPPORT_SH5_32MEDIA_NOFPU 1
+#endif
+
+#define SUPPORT_ANY_SH5_32MEDIA \
+  (SUPPORT_SH5_32MEDIA || SUPPORT_SH5_32MEDIA_NOFPU)
+#define SUPPORT_ANY_SH5_64MEDIA \
+  (SUPPORT_SH5_64MEDIA || SUPPORT_SH5_64MEDIA_NOFPU)
+#define SUPPORT_ANY_SH5 \
+  (SUPPORT_ANY_SH5_32MEDIA || SUPPORT_ANY_SH5_64MEDIA)
+
+/* Reset all target-selection flags.  */
+#define MASK_ARCH (MASK_SH1 | MASK_SH2 | MASK_SH3 | MASK_SH_E | MASK_SH4 \
+		   | MASK_HARD_SH2A | MASK_HARD_SH2A_DOUBLE | MASK_SH4A \
+		   | MASK_HARD_SH4 | MASK_FPU_SINGLE | MASK_SH5)
+
+/* This defaults us to big-endian.  */
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+#ifndef TARGET_OPT_DEFAULT
+#define TARGET_OPT_DEFAULT  MASK_ADJUST_UNROLL
+#endif
+
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT | TARGET_OPT_DEFAULT)
+
+#ifndef SH_MULTILIB_CPU_DEFAULT
+#define SH_MULTILIB_CPU_DEFAULT "m1"
+#endif
+
+#if TARGET_ENDIAN_DEFAULT
+#define MULTILIB_DEFAULTS { "ml", SH_MULTILIB_CPU_DEFAULT }
+#else
+#define MULTILIB_DEFAULTS { "mb", SH_MULTILIB_CPU_DEFAULT }
+#endif
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS						\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },			\
+  { "link_emul_prefix", LINK_EMUL_PREFIX },			\
+  { "link_default_cpu_emul", LINK_DEFAULT_CPU_EMUL },		\
+  { "subtarget_link_emul_suffix", SUBTARGET_LINK_EMUL_SUFFIX },	\
+  { "subtarget_link_spec", SUBTARGET_LINK_SPEC },		\
+  { "subtarget_asm_endian_spec", SUBTARGET_ASM_ENDIAN_SPEC },	\
+  { "subtarget_asm_relax_spec", SUBTARGET_ASM_RELAX_SPEC },	\
+  { "subtarget_asm_isa_spec", SUBTARGET_ASM_ISA_SPEC },		\
+  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH4
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m1:%{!m2:%{!m3*:%{!m5*:-isa=sh4-up}}}}"
+#else
+#define SUBTARGET_ASM_RELAX_SPEC "%{m4*:-isa=sh4-up}"
+#endif
+
+#define SH_ASM_SPEC \
+ "%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)}\
+%(subtarget_asm_isa_spec) %(subtarget_asm_spec)\
+%{m2a:--isa=sh2a} \
+%{m2a-single:--isa=sh2a} \
+%{m2a-single-only:--isa=sh2a} \
+%{m2a-nofpu:--isa=sh2a-nofpu} \
+%{m5-compact*:--isa=SHcompact} \
+%{m5-32media*:--isa=SHmedia --abi=32} \
+%{m5-64media*:--isa=SHmedia --abi=64} \
+%{m4al:-dsp} %{mcut2-workaround:-cut2-workaround}"
+
+#define ASM_SPEC SH_ASM_SPEC
+
+#ifndef SUBTARGET_ASM_ENDIAN_SPEC
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define SUBTARGET_ASM_ENDIAN_SPEC "%{mb:-big} %{!mb:-little}"
+#else
+#define SUBTARGET_ASM_ENDIAN_SPEC "%{ml:-little} %{!ml:-big}"
+#endif
+#endif
+
+#if STRICT_NOFPU == 1
+/* Strict nofpu means that the compiler should tell the assembler
+   to reject FPU instructions. E.g. from ASM inserts.  */
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH4 && !(TARGET_CPU_DEFAULT & MASK_SH_E)
+#define SUBTARGET_ASM_ISA_SPEC "%{!m1:%{!m2:%{!m3*:%{m4-nofpu|!m4*:%{!m5:-isa=sh4-nofpu}}}}}"
+#else
+/* If there were an -isa option for sh5-nofpu then it would also go here. */
+#define SUBTARGET_ASM_ISA_SPEC \
+ "%{m4-nofpu:-isa=sh4-nofpu} " ASM_ISA_DEFAULT_SPEC
+#endif
+#else /* ! STRICT_NOFPU */
+#define SUBTARGET_ASM_ISA_SPEC ASM_ISA_DEFAULT_SPEC
+#endif
+
+#ifndef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC ""
+#endif
+
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define LINK_EMUL_PREFIX "sh%{!mb:l}"
+#else
+#define LINK_EMUL_PREFIX "sh%{ml:l}"
+#endif
+
+#if TARGET_CPU_DEFAULT & MASK_SH5
+#if TARGET_CPU_DEFAULT & MASK_SH_E
+#define LINK_DEFAULT_CPU_EMUL "32"
+#if TARGET_CPU_DEFAULT & MASK_SH1
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHcompact"
+#else
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=32"
+#endif /* MASK_SH1 */
+#else /* !MASK_SH_E */
+#define LINK_DEFAULT_CPU_EMUL "64"
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=64"
+#endif /* MASK_SH_E */
+#define ASM_ISA_DEFAULT_SPEC \
+" %{!m1:%{!m2*:%{!m3*:%{!m4*:%{!m5*:" ASM_ISA_SPEC_DEFAULT "}}}}}"
+#else /* !MASK_SH5 */
+#define LINK_DEFAULT_CPU_EMUL ""
+#define ASM_ISA_DEFAULT_SPEC ""
+#endif /* MASK_SH5 */
+
+#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_SPEC ""
+
+/* Go via SH_LINK_SPEC to avoid code replication.  */
+#define LINK_SPEC SH_LINK_SPEC
+
+#define SH_LINK_SPEC "\
+-m %(link_emul_prefix)\
+%{m5-compact*|m5-32media*:32}\
+%{m5-64media*:64}\
+%{!m1:%{!m2:%{!m3*:%{!m4*:%{!m5*:%(link_default_cpu_emul)}}}}}\
+%(subtarget_link_emul_suffix) \
+%{mrelax:-relax} %(subtarget_link_spec)"
+
+#ifndef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call"
+#endif
+
+#define DRIVER_SELF_SPECS "%{m2a:%{ml:%eSH2a does not support little-endian}}"
+
+#define ASSEMBLER_DIALECT assembler_dialect
+
+extern int assembler_dialect;
+
+enum sh_divide_strategy_e {
+  /* SH5 strategies.  */
+  SH_DIV_CALL,
+  SH_DIV_CALL2,
+  SH_DIV_FP, /* We could do this also for SH4.  */
+  SH_DIV_INV,
+  SH_DIV_INV_MINLAT,
+  SH_DIV_INV20U,
+  SH_DIV_INV20L,
+  SH_DIV_INV_CALL,
+  SH_DIV_INV_CALL2,
+  SH_DIV_INV_FP,
+  /* SH1 .. SH4 strategies.  Because of the small number of registers
+     available, the compiler uses knowledge of the actual set of registers
+     being clobbered by the different functions called.  */
+  SH_DIV_CALL_DIV1, /* No FPU, medium size, highest latency.  */
+  SH_DIV_CALL_FP,     /* FPU needed, small size, high latency.  */
+  SH_DIV_CALL_TABLE,  /* No FPU, large size, medium latency. */
+  SH_DIV_INTRINSIC
+};
+
+extern enum sh_divide_strategy_e sh_div_strategy;
+
+#ifndef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL
+#endif
+
+#define SUBTARGET_OVERRIDE_OPTIONS (void) 0
+
+
+/* Target machine storage layout.  */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN (TARGET_LITTLE_ENDIAN == 0)
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width in bits of an `int'.  We want just 32-bits, even if words are
+   longer.  */
+#define INT_TYPE_SIZE 32
+
+/* Width in bits of a `long'.  */
+#define LONG_TYPE_SIZE (TARGET_SHMEDIA64 ? 64 : 32)
+
+/* Width in bits of a `long long'.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* Width in bits of a `long double'.  */
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD	(TARGET_SHMEDIA ? 8 : 4)
+#define MIN_UNITS_PER_WORD 4
+
+/* Scaling factor for Dwarf data offsets for CFI information.
+   The dwarf2out.c default would use -UNITS_PER_WORD, which is -8 for
+   SHmedia; however, since we do partial register saves for the registers
+   visible to SHcompact, and for target registers for SHMEDIA32, we have
+   to allow saves that are only 4-byte aligned.  */
+#define DWARF_CIE_DATA_ALIGNMENT -4
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE  (TARGET_SHMEDIA64 ? 64 : 32)
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY  	(TARGET_SH5 ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY  BIGGEST_ALIGNMENT
+
+/* The log (base 2) of the cache line size, in bytes.  Processors prior to
+   SH2 have no actual cache, but they fetch code in chunks of 4 bytes.
+   The SH2/3 have 16 byte cache lines, and the SH4 has a 32 byte cache line */
+#define CACHE_LOG (TARGET_CACHE32 ? 5 : TARGET_SH2 ? 4 : 2)
+
+/* ABI given & required minimum allocation boundary (in *bits*) for the
+   code of a function.  */
+#define FUNCTION_BOUNDARY (16 << TARGET_SHMEDIA)
+
+/* On SH5, the lowest bit is used to indicate SHmedia functions, so
+   the vbit must go into the delta field of
+   pointers-to-member-functions.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION \
+  (TARGET_SH5 ? ptrmemfunc_vbit_in_delta : ptrmemfunc_vbit_in_pfn)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT  (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT (TARGET_SH5 ? 64 : 32)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)	\
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+    ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* get_mode_alignment assumes complex values are always held in multiple
+   registers, but that is not the case on the SH; CQImode and CHImode are
+   held in a single integer register.  SH5 also holds CSImode and SCmode
+   values in integer registers.  This is relevant for argument passing on
+   SHcompact as we use a stack temp in order to pass CSImode by reference.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  ((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \
+    || GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \
+   ? (unsigned) MIN (BIGGEST_ALIGNMENT, GET_MODE_BITSIZE (TYPE_MODE (TYPE))) \
+   : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Number of bits which any structure or union's size must be a
+   multiple of.  Each structure or union's size is rounded up to a
+   multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm.  */
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
+  barrier_align (LABEL_AFTER_BARRIER)
+
+#define LOOP_ALIGN(A_LABEL) \
+  ((! optimize || TARGET_HARD_SH4 || optimize_size) \
+   ? 0 : sh_loop_align (A_LABEL))
+
+#define LABEL_ALIGN(A_LABEL) \
+(									\
+  (PREV_INSN (A_LABEL)							\
+   && NONJUMP_INSN_P (PREV_INSN (A_LABEL))				\
+   && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE	\
+   && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == UNSPECV_ALIGN)		\
+   /* explicit alignment insn in constant tables.  */			\
+  ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0))		\
+  : 0)
+
+/* Jump tables must be 32 bit aligned, no matter the size of the element.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* The base two logarithm of the known minimum alignment of an insn length.  */
+#define INSN_LENGTH_ALIGNMENT(A_INSN)					\
+  (NONJUMP_INSN_P (A_INSN)						\
+   ? 1 << TARGET_SHMEDIA						\
+   : JUMP_P (A_INSN) || CALL_P (A_INSN)					\
+   ? 1 << TARGET_SHMEDIA						\
+   : CACHE_LOG)
+
+/* Standard register usage.  */
+
+/* Register allocation for the Renesas calling convention:
+
+        r0		arg return
+	r1..r3          scratch
+	r4..r7		args in
+	r8..r13		call saved
+	r14		frame pointer/call saved
+	r15		stack pointer
+	ap		arg pointer (doesn't really exist, always eliminated)
+	pr		subroutine return address
+	t               t bit
+	mach		multiply/accumulate result, high part
+	macl		multiply/accumulate result, low part.
+	fpul		fp/int communication register
+	rap		return address pointer register
+	fr0		fp arg return
+	fr1..fr3	scratch floating point registers
+	fr4..fr11	fp args in
+	fr12..fr15	call saved floating point registers  */
+
+#define MAX_REGISTER_NAME_LENGTH 5
+extern char sh_register_names[][MAX_REGISTER_NAME_LENGTH + 1];
+
+#define SH_REGISTER_NAMES_INITIALIZER					\
+{				                   			\
+  "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7", 	\
+  "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",	\
+  "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",	\
+  "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",	\
+  "r32",  "r33",  "r34",  "r35",  "r36",  "r37",  "r38",  "r39", 	\
+  "r40",  "r41",  "r42",  "r43",  "r44",  "r45",  "r46",  "r47",	\
+  "r48",  "r49",  "r50",  "r51",  "r52",  "r53",  "r54",  "r55",	\
+  "r56",  "r57",  "r58",  "r59",  "r60",  "r61",  "r62",  "r63",	\
+  "fr0",  "fr1",  "fr2",  "fr3",  "fr4",  "fr5",  "fr6",  "fr7", 	\
+  "fr8",  "fr9",  "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",	\
+  "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",	\
+  "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31",	\
+  "fr32", "fr33", "fr34", "fr35", "fr36", "fr37", "fr38", "fr39", 	\
+  "fr40", "fr41", "fr42", "fr43", "fr44", "fr45", "fr46", "fr47",	\
+  "fr48", "fr49", "fr50", "fr51", "fr52", "fr53", "fr54", "fr55",	\
+  "fr56", "fr57", "fr58", "fr59", "fr60", "fr61", "fr62", "fr63",	\
+  "tr0",  "tr1",  "tr2",  "tr3",  "tr4",  "tr5",  "tr6",  "tr7", 	\
+  "xd0",  "xd2",  "xd4",  "xd6",  "xd8",  "xd10", "xd12", "xd14",	\
+  "gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr",	\
+  "rap",  "sfp"								\
+}
+
+#define REGNAMES_ARR_INDEX_1(index) \
+  (sh_register_names[index])
+#define REGNAMES_ARR_INDEX_2(index) \
+  REGNAMES_ARR_INDEX_1 ((index)), REGNAMES_ARR_INDEX_1 ((index)+1)
+#define REGNAMES_ARR_INDEX_4(index) \
+  REGNAMES_ARR_INDEX_2 ((index)), REGNAMES_ARR_INDEX_2 ((index)+2)
+#define REGNAMES_ARR_INDEX_8(index) \
+  REGNAMES_ARR_INDEX_4 ((index)), REGNAMES_ARR_INDEX_4 ((index)+4)
+#define REGNAMES_ARR_INDEX_16(index) \
+  REGNAMES_ARR_INDEX_8 ((index)), REGNAMES_ARR_INDEX_8 ((index)+8)
+#define REGNAMES_ARR_INDEX_32(index) \
+  REGNAMES_ARR_INDEX_16 ((index)), REGNAMES_ARR_INDEX_16 ((index)+16)
+#define REGNAMES_ARR_INDEX_64(index) \
+  REGNAMES_ARR_INDEX_32 ((index)), REGNAMES_ARR_INDEX_32 ((index)+32)
+
+#define REGISTER_NAMES \
+{ \
+  REGNAMES_ARR_INDEX_64 (0), \
+  REGNAMES_ARR_INDEX_64 (64), \
+  REGNAMES_ARR_INDEX_8 (128), \
+  REGNAMES_ARR_INDEX_8 (136), \
+  REGNAMES_ARR_INDEX_8 (144), \
+  REGNAMES_ARR_INDEX_2 (152) \
+}
+
+#define ADDREGNAMES_SIZE 32
+#define MAX_ADDITIONAL_REGISTER_NAME_LENGTH 4
+extern char sh_additional_register_names[ADDREGNAMES_SIZE] \
+  [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1];
+
+#define SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER			\
+{									\
+  "dr0",  "dr2",  "dr4",  "dr6",  "dr8",  "dr10", "dr12", "dr14",	\
+  "dr16", "dr18", "dr20", "dr22", "dr24", "dr26", "dr28", "dr30",	\
+  "dr32", "dr34", "dr36", "dr38", "dr40", "dr42", "dr44", "dr46",	\
+  "dr48", "dr50", "dr52", "dr54", "dr56", "dr58", "dr60", "dr62"	\
+}
+
+#define ADDREGNAMES_REGNO(index) \
+  ((index < 32) ? (FIRST_FP_REG + (index) * 2) \
+   : (-1))
+
+#define ADDREGNAMES_ARR_INDEX_1(index) \
+  { (sh_additional_register_names[index]), ADDREGNAMES_REGNO (index) }
+#define ADDREGNAMES_ARR_INDEX_2(index) \
+  ADDREGNAMES_ARR_INDEX_1 ((index)), ADDREGNAMES_ARR_INDEX_1 ((index)+1)
+#define ADDREGNAMES_ARR_INDEX_4(index) \
+  ADDREGNAMES_ARR_INDEX_2 ((index)), ADDREGNAMES_ARR_INDEX_2 ((index)+2)
+#define ADDREGNAMES_ARR_INDEX_8(index) \
+  ADDREGNAMES_ARR_INDEX_4 ((index)), ADDREGNAMES_ARR_INDEX_4 ((index)+4)
+#define ADDREGNAMES_ARR_INDEX_16(index) \
+  ADDREGNAMES_ARR_INDEX_8 ((index)), ADDREGNAMES_ARR_INDEX_8 ((index)+8)
+#define ADDREGNAMES_ARR_INDEX_32(index) \
+  ADDREGNAMES_ARR_INDEX_16 ((index)), ADDREGNAMES_ARR_INDEX_16 ((index)+16)
+
+#define ADDITIONAL_REGISTER_NAMES \
+{					\
+  ADDREGNAMES_ARR_INDEX_32 (0)		\
+}
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+/* There are many other relevant definitions in sh.md's md_constants.  */
+
+#define FIRST_GENERAL_REG R0_REG
+#define LAST_GENERAL_REG (FIRST_GENERAL_REG + (TARGET_SHMEDIA ? 63 : 15))
+#define FIRST_FP_REG DR0_REG
+#define LAST_FP_REG  (FIRST_FP_REG + \
+		      (TARGET_SHMEDIA_FPU ? 63 : TARGET_SH2E ? 15 : -1))
+#define FIRST_XD_REG XD0_REG
+#define LAST_XD_REG  (FIRST_XD_REG + ((TARGET_SH4 && TARGET_FMOVD) ? 7 : -1))
+#define FIRST_TARGET_REG TR0_REG
+#define LAST_TARGET_REG  (FIRST_TARGET_REG + (TARGET_SHMEDIA ? 7 : -1))
+
+/* Registers that can be accessed through bank0 or bank1 depending on sr.md.  */
+
+#define FIRST_BANKED_REG R0_REG
+#define LAST_BANKED_REG R7_REG
+
+#define BANKED_REGISTER_P(REGNO)                       \
+  IN_RANGE ((REGNO),                                   \
+	    (unsigned HOST_WIDE_INT) FIRST_BANKED_REG, \
+	    (unsigned HOST_WIDE_INT) LAST_BANKED_REG)
+
+#define GENERAL_REGISTER_P(REGNO) \
+  IN_RANGE ((REGNO), \
+	    (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \
+	    (unsigned HOST_WIDE_INT) LAST_GENERAL_REG)
+
+#define GENERAL_OR_AP_REGISTER_P(REGNO) \
+  (GENERAL_REGISTER_P (REGNO) || ((REGNO) == AP_REG)	\
+   || ((REGNO) == FRAME_POINTER_REGNUM))
+
+#define FP_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_FP_REG && (int) (REGNO) <= LAST_FP_REG)
+
+#define XD_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_XD_REG && (int) (REGNO) <= LAST_XD_REG)
+
+#define FP_OR_XD_REGISTER_P(REGNO) \
+  (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO))
+
+#define FP_ANY_REGISTER_P(REGNO) \
+  (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) || (REGNO) == FPUL_REG)
+
+#define SPECIAL_REGISTER_P(REGNO) \
+  ((REGNO) == GBR_REG || (REGNO) == T_REG \
+   || (REGNO) == MACH_REG || (REGNO) == MACL_REG)
+
+#define TARGET_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_TARGET_REG && (int) (REGNO) <= LAST_TARGET_REG)
+
+#define SHMEDIA_REGISTER_P(REGNO) \
+  (GENERAL_REGISTER_P (REGNO) || FP_REGISTER_P (REGNO) \
+   || TARGET_REGISTER_P (REGNO))
+
+/* This is to be used in TARGET_CONDITIONAL_REGISTER_USAGE, to mark
+   registers that should be fixed.  */
+#define VALID_REGISTER_P(REGNO) \
+  (SHMEDIA_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) \
+   || (REGNO) == AP_REG || (REGNO) == RAP_REG \
+   || (REGNO) == FRAME_POINTER_REGNUM \
+   || (TARGET_SH1 && (SPECIAL_REGISTER_P (REGNO) || (REGNO) == PR_REG)) \
+   || (TARGET_SH2E && (REGNO) == FPUL_REG))
+
+/* The mode that should be generally used to store a register by
+   itself in the stack, or to load it back.  */
+#define REGISTER_NATURAL_MODE(REGNO) \
+  (FP_REGISTER_P (REGNO) ? SFmode \
+   : XD_REGISTER_P (REGNO) ? DFmode \
+   : TARGET_SHMEDIA && ! HARD_REGNO_CALL_PART_CLOBBERED ((REGNO), DImode) \
+   ? DImode \
+   : SImode)
+
+#define FIRST_PSEUDO_REGISTER 154
+
+/* Don't count soft frame pointer.  */
+#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   Mach register is fixed 'cause it's only 10 bits wide for SH1.
+   It is 32 bits wide for SH2.  */
+
+#define FIXED_REGISTERS  						\
+{				                   			\
+/* Regular registers.  */						\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+  /* r16 is reserved, r18 is the former pr.  */				\
+  1,      0,      0,      0,      0,      0,      0,      0,		\
+  /* r24 is reserved for the OS; r25, for the assembler or linker.  */	\
+  /* r26 is a global variable data pointer; r27 is for constants.  */	\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+/* FP registers.  */							\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* Branch target registers.  */						\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* XD registers.  */							\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/*"gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr", */	\
+  1,      1,      1,      1,      1,      1,      0,      1,		\
+/*"rap",  "sfp" */							\
+  1,	  1,								\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS  						\
+{				                   			\
+/* Regular registers.  */						\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.	\
+     Only the lower 32bits of R10-R14 are guaranteed to be preserved	\
+     across SH5 function calls.  */					\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      1,      1,      1,      1,		\
+/* FP registers.  */							\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* Branch target registers.  */						\
+  1,      1,      1,      1,      1,      0,      0,      0,		\
+/* XD registers.  */							\
+  1,      1,      1,      1,      1,      1,      0,      0,		\
+/*"gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr", */	\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+/*"rap",  "sfp" */							\
+  1,	  1,								\
+}
+
+/* TARGET_CONDITIONAL_REGISTER_USAGE might want to make a register
+   call-used, yet fixed, like PIC_OFFSET_TABLE_REGNUM.  */
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+/* Only the lower 32-bits of R10-R14 are guaranteed to be preserved
+   across SHcompact function calls.  We can't tell whether a called
+   function is SHmedia or SHcompact, so we assume it may be when
+   compiling SHmedia code with the 32-bit ABI, since that's the only
+   ABI that can be linked with SHcompact code.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO,MODE) \
+  (TARGET_SHMEDIA32 \
+   && GET_MODE_SIZE (MODE) > 4 \
+   && (((REGNO) >= FIRST_GENERAL_REG + 10 \
+        && (REGNO) <= FIRST_GENERAL_REG + 15) \
+       || TARGET_REGISTER_P (REGNO) \
+       || (REGNO) == PR_MEDIA_REG))
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the SH all but the XD regs are UNITS_PER_WORD bits wide.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+   (XD_REGISTER_P (REGNO) \
+    ? ((GET_MODE_SIZE (MODE) + (2*UNITS_PER_WORD - 1)) / (2*UNITS_PER_WORD)) \
+    : (TARGET_SHMEDIA && FP_REGISTER_P (REGNO)) \
+    ? ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2)) \
+    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)		\
+  sh_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.
+   That's the case for xd registers: we don't hold SFmode values in
+   them, so we can't tie an SFmode pseudos with one in another
+   floating-point mode.  */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2) \
+   || (TARGET_SHMEDIA \
+       && GET_MODE_SIZE (MODE1) == GET_MODE_SIZE (MODE2) \
+       && INTEGRAL_MODE_P (MODE1) && INTEGRAL_MODE_P (MODE2)) \
+   || (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2) \
+       && (TARGET_SHMEDIA ? ((GET_MODE_SIZE (MODE1) <= 4) \
+			      && (GET_MODE_SIZE (MODE2) <= 4)) \
+			  : ((MODE1) != SFmode && (MODE2) != SFmode))))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register */
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+   sh_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Define this if the program counter is overloaded on a register.  */
+/* #define PC_REGNUM		15*/
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM	SP_REG
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM	FP_REG
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM	153
+
+/* Fake register that holds the address on the stack of the
+   current function's return address.  */
+#define RETURN_ADDRESS_POINTER_REGNUM RAP_REG
+
+/* Register to hold the addressing base for position independent
+   code access to data items.  */
+#define PIC_OFFSET_TABLE_REGNUM	(flag_pic ? PIC_REG : INVALID_REGNUM)
+
+#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
+
+/* Definitions for register eliminations.
+
+   We have three registers that can be eliminated on the SH.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.
+   Third, there is the return address pointer, which can also be replaced
+   with either the stack or the frame pointer.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+
+/* If you add any registers here that are not actually hard registers,
+   and that have any alternative of elimination that doesn't always
+   apply, you need to amend calc_live_regs to exclude it, because
+   reload spills all eliminable registers where it sees an
+   can_eliminate == 0 entry, thus making them 'live' .
+   If you add any hard registers that can be eliminated in different
+   ways, you have to patch reload to spill them only when all alternatives
+   of elimination fail.  */
+
+#define ELIMINABLE_REGS						\
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = initial_elimination_offset ((FROM), (TO))
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	AP_REG
+
+/* Register in which the static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	(TARGET_SH5 ? 1 : 3)
+
+/* Don't default to pcc-struct-return, because we have already specified
+   exactly how to return structures in the TARGET_RETURN_IN_MEMORY
+   target hook.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define SHMEDIA_REGS_STACK_ADJUST() \
+  (TARGET_SHCOMPACT && crtl->saves_all_registers \
+   ? (8 * (/* r28-r35 */ 8 + /* r44-r59 */ 16 + /* tr5-tr7 */ 3) \
+      + (TARGET_FPU_ANY ? 4 * (/* fr36 - fr63 */ 28) : 0)) \
+   : 0)
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The SH has two sorts of general registers, R0 and the rest.  R0 can
+   be used as the destination of some of the arithmetic ops. There are
+   also some special purpose registers; the T bit register, the
+   Procedure Return Register and the Multiply Accumulate Registers.  */
+/* Place GENERAL_REGS after FPUL_REGS so that it will be preferred by
+   reg_class_subunion.  We don't want to have an actual union class
+   of these, because it would only be used when both classes are calculated
+   to give the same cost, but there is only one FPUL register.
+   Besides, regclass fails to notice the different REGISTER_MOVE_COSTS
+   applying to the actual instruction alternative considered.  E.g., the
+   y/r alternative of movsi_ie is considered to have no more cost that
+   the r/r alternative, which is patently untrue.  */
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  PR_REGS,
+  T_REGS,
+  MAC_REGS,
+  FPUL_REGS,
+  SIBCALL_REGS,
+  GENERAL_REGS,
+  FP0_REGS,
+  FP_REGS,
+  DF_HI_REGS,
+  DF_REGS,
+  FPSCR_REGS,
+  GENERAL_FP_REGS,
+  GENERAL_DF_REGS,
+  TARGET_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES	\
+{			\
+  "NO_REGS",		\
+  "R0_REGS",		\
+  "PR_REGS",		\
+  "T_REGS",		\
+  "MAC_REGS",		\
+  "FPUL_REGS",		\
+  "SIBCALL_REGS",	\
+  "GENERAL_REGS",	\
+  "FP0_REGS",		\
+  "FP_REGS",		\
+  "DF_HI_REGS",		\
+  "DF_REGS",		\
+  "FPSCR_REGS",		\
+  "GENERAL_FP_REGS",	\
+  "GENERAL_DF_REGS",	\
+  "TARGET_REGS",	\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS						\
+{									\
+/* NO_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* R0_REGS:  */								\
+  { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* PR_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00040000 },	\
+/* T_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00080000 },	\
+/* MAC_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00300000 },	\
+/* FPUL_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00400000 },	\
+/* SIBCALL_REGS: Initialized in TARGET_CONDITIONAL_REGISTER_USAGE.  */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* GENERAL_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 },	\
+/* FP0_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000 },	\
+/* FP_REGS:  */								\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 },	\
+/* DF_HI_REGS:  Initialized in TARGET_CONDITIONAL_REGISTER_USAGE.  */		\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x0000ff00 },	\
+/* DF_REGS:  */								\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x0000ff00 },	\
+/* FPSCR_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00800000 },	\
+/* GENERAL_FP_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03020000 },	\
+/* GENERAL_DF_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0302ff00 },	\
+/* TARGET_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff },	\
+/* ALL_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03ffffff },	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)]
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						     \
+{									     \
+  GENERAL_REGS, FP_REGS, PR_REGS, T_REGS, MAC_REGS, TARGET_REGS,  	     \
+  FPUL_REGS, LIM_REG_CLASSES						     \
+}
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  sh_small_register_classes_for_mode_p
+
+/* The order in which register should be allocated.  */
+/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo,
+   and GENERAL_FP_REGS the alternate class.  Since FP0 is likely to be
+   spilled or used otherwise, we better have the FP_REGS allocated first.  */
+#define REG_ALLOC_ORDER \
+  {/* Caller-saved FPRs */ \
+    65, 66, 67, 68, 69, 70, 71, 64, \
+    72, 73, 74, 75, 80, 81, 82, 83, \
+    84, 85, 86, 87, 88, 89, 90, 91, \
+    92, 93, 94, 95, 96, 97, 98, 99, \
+   /* Callee-saved FPRs */ \
+    76, 77, 78, 79,100,101,102,103, \
+   104,105,106,107,108,109,110,111, \
+   112,113,114,115,116,117,118,119, \
+   120,121,122,123,124,125,126,127, \
+   136,137,138,139,140,141,142,143, \
+   /* FPSCR */ 151, \
+   /* Caller-saved GPRs (except 8/9 on SH1-4) */ \
+     1,  2,  3,  7,  6,  5,  4,  0, \
+     8,  9, 17, 19, 20, 21, 22, 23, \
+    36, 37, 38, 39, 40, 41, 42, 43, \
+    60, 61, 62, \
+   /* SH1-4 callee-saved saved GPRs / SH5 partially-saved GPRs */ \
+    10, 11, 12, 13, 14, 18, \
+    /* SH5 callee-saved GPRs */ \
+    28, 29, 30, 31, 32, 33, 34, 35, \
+    44, 45, 46, 47, 48, 49, 50, 51, \
+    52, 53, 54, 55, 56, 57, 58, 59, \
+   /* FPUL */ 150, \
+   /* SH5 branch target registers */ \
+   128,129,130,131,132,133,134,135, \
+   /* Fixed registers */ \
+    15, 16, 24, 25, 26, 27, 63,144, \
+   145,146,147,148,149,152,153 }
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS \
+  (!ALLOW_INDEXED_ADDRESS ? NO_REGS : TARGET_SHMEDIA ? GENERAL_REGS : R0_REGS)
+#define BASE_REG_CLASS	 GENERAL_REGS
+
+/* Defines for sh.md and constraints.md.  */
+
+#define CONST_OK_FOR_I06(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -32 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 31)
+#define CONST_OK_FOR_I08(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 127)
+#define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 511)
+#define CONST_OK_FOR_I16(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -32768 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 32767)
+
+#define CONST_OK_FOR_J16(VALUE) \
+  ((HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) 0xffffffff) \
+   || (HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) -1 << 32))
+
+#define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 255)
+
+#if 0
+#define SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,ELSE) \
+  ((((REGCLASS_HAS_FP_REG (CLASS) 					\
+      && (REG_P (X)							\
+      && (GENERAL_OR_AP_REGISTER_P (REGNO (X))				\
+	  || (FP_REGISTER_P (REGNO (X)) && (MODE) == SImode		\
+	      && TARGET_FMOVD))))					\
+     || (REGCLASS_HAS_GENERAL_REG (CLASS) 				\
+	 && REG_P (X)							\
+	 && FP_REGISTER_P (REGNO (X))))					\
+    && ! TARGET_SHMEDIA							\
+    && ((MODE) == SFmode || (MODE) == SImode))				\
+   ? FPUL_REGS								\
+   : (((CLASS) == FPUL_REGS						\
+       || (REGCLASS_HAS_FP_REG (CLASS)					\
+	   && ! TARGET_SHMEDIA && MODE == SImode))			\
+      && (MEM_P (X)							\
+	  || (REG_P (X)							\
+	      && (REGNO (X) >= FIRST_PSEUDO_REGISTER			\
+		  || REGNO (X) == T_REG					\
+		  || system_reg_operand (X, VOIDmode)))))		\
+   ? GENERAL_REGS							\
+   : (((CLASS) == TARGET_REGS						\
+       || (TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS))			\
+      && !satisfies_constraint_Csy (X)					\
+      && (!REG_P (X) || ! GENERAL_REGISTER_P (REGNO (X))))		\
+   ? GENERAL_REGS							\
+   : (((CLASS) == MAC_REGS || (CLASS) == PR_REGS)			\
+      && REG_P (X) && ! GENERAL_REGISTER_P (REGNO (X))			\
+      && (CLASS) != REGNO_REG_CLASS (REGNO (X)))			\
+   ? GENERAL_REGS							\
+   : ((CLASS) != GENERAL_REGS && REG_P (X)				\
+      && TARGET_REGISTER_P (REGNO (X)))					\
+   ? GENERAL_REGS : (ELSE))
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS,MODE,X) \
+ SECONDARY_INOUT_RELOAD_CLASS(CLASS,MODE,X,NO_REGS)
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X)  \
+  ((REGCLASS_HAS_FP_REG (CLASS) 					\
+    && ! TARGET_SHMEDIA							\
+    && immediate_operand ((X), (MODE))					\
+    && ! ((fp_zero_operand (X) || fp_one_operand (X))			\
+	  && (MODE) == SFmode && fldi_ok ()))				\
+   ? R0_REGS								\
+   : ((CLASS) == FPUL_REGS						\
+      && ((REG_P (X)							\
+	   && (REGNO (X) == MACL_REG || REGNO (X) == MACH_REG		\
+	       || REGNO (X) == T_REG))					\
+	  || GET_CODE (X) == PLUS))					\
+   ? GENERAL_REGS							\
+   : (CLASS) == FPUL_REGS && immediate_operand ((X), (MODE))		\
+   ? (satisfies_constraint_I08 (X)					\
+      ? GENERAL_REGS							\
+      : R0_REGS)							\
+   : ((CLASS) == FPSCR_REGS						\
+      && ((REG_P (X) && REGNO (X) >= FIRST_PSEUDO_REGISTER)		\
+	  || (MEM_P (X) && GET_CODE (XEXP ((X), 0)) == PLUS)))		\
+   ? GENERAL_REGS							\
+   : (REGCLASS_HAS_FP_REG (CLASS) 					\
+      && TARGET_SHMEDIA							\
+      && immediate_operand ((X), (MODE))				\
+      && (X) != CONST0_RTX (GET_MODE (X))				\
+      && GET_MODE (X) != V4SFmode)					\
+   ? GENERAL_REGS							\
+   : (((MODE) == QImode || (MODE) == HImode)				\
+      && TARGET_SHMEDIA && inqhi_operand ((X), (MODE)))			\
+   ? GENERAL_REGS							\
+   : (TARGET_SHMEDIA && (CLASS) == GENERAL_REGS				\
+      && (GET_CODE (X) == LABEL_REF || PIC_ADDR_P (X)))			\
+   ? TARGET_REGS							\
+   : SECONDARY_INOUT_RELOAD_CLASS((CLASS),(MODE),(X), NO_REGS))
+#endif
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+
+   If TARGET_SHMEDIA, we need two FP registers per word.
+   Otherwise we will need at most one register per word.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+    (TARGET_SHMEDIA \
+     && TEST_HARD_REG_BIT (reg_class_contents[CLASS], FIRST_FP_REG) \
+     ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2) \
+     : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.  */
+/* ??? We need to renumber the internal numbers for the frnn registers
+   when in little endian in order to allow mode size changes.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 			    \
+  sh_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define the number of registers that can hold parameters.
+   These macros are used only in other macro definitions below.  */
+
+#define NPARM_REGS(MODE) \
+  (TARGET_FPU_ANY && (MODE) == SFmode \
+   ? (TARGET_SH5 ? 12 : 8) \
+   : (TARGET_SH4 || TARGET_SH2A_DOUBLE) && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? (TARGET_SH5 ? 12 : 8) \
+   : (TARGET_SH5 ? 8 : 4))
+
+#define FIRST_PARM_REG (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 4))
+#define FIRST_RET_REG  (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 0))
+
+#define FIRST_FP_PARM_REG (FIRST_FP_REG + (TARGET_SH5 ? 0 : 4))
+#define FIRST_FP_RET_REG FIRST_FP_REG
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/*  Define this macro to nonzero if the addresses of local variable slots
+    are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.  */
+/* Don't define PUSH_ROUNDING, since the hardware doesn't do this.
+   When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to
+   do correct alignment.  */
+#if 0
+#define PUSH_ROUNDING(NPUSHED)  (((NPUSHED) + 3) & ~3)
+#endif
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+/* Value is the number of bytes of arguments automatically popped when
+   calling a subroutine.
+   CUM is the accumulated argument list.
+
+   On SHcompact, the call trampoline pops arguments off the stack.  */
+#define CALL_POPS_ARGS(CUM) (TARGET_SHCOMPACT ? (CUM).stack_regs * 8 : 0)
+
+/* Some subroutine macros specific to this machine.  */
+
+#define BASE_RETURN_VALUE_REG(MODE) \
+  ((TARGET_FPU_ANY && ((MODE) == SFmode))			\
+   ? FIRST_FP_RET_REG					\
+   : TARGET_FPU_ANY && (MODE) == SCmode		\
+   ? FIRST_FP_RET_REG					\
+   : (TARGET_FPU_DOUBLE					\
+      && ((MODE) == DFmode || (MODE) == SFmode		\
+	  || (MODE) == DCmode || (MODE) == SCmode ))	\
+   ? FIRST_FP_RET_REG					\
+   : FIRST_RET_REG)
+
+#define BASE_ARG_REG(MODE) \
+  ((TARGET_SH2E && ((MODE) == SFmode))			\
+   ? FIRST_FP_PARM_REG					\
+   : (TARGET_SH4 || TARGET_SH2A_DOUBLE) && (GET_MODE_CLASS (MODE) == MODE_FLOAT	\
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\
+   ? FIRST_FP_PARM_REG					\
+   : FIRST_PARM_REG)
+
+/* 1 if N is a possible register number for function argument passing.  */
+/* ??? There are some callers that pass REGNO as int, and others that pass
+   it as unsigned.  We get warnings unless we do casts everywhere.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  (((unsigned) (REGNO) >= (unsigned) FIRST_PARM_REG			\
+    && (unsigned) (REGNO) < (unsigned) (FIRST_PARM_REG + NPARM_REGS (SImode)))\
+   || (TARGET_FPU_ANY                                                   \
+       && (unsigned) (REGNO) >= (unsigned) FIRST_FP_PARM_REG		\
+       && (unsigned) (REGNO) < (unsigned) (FIRST_FP_PARM_REG		\
+					   + NPARM_REGS (SFmode))))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On SH, this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus NARGREGS or more means all following args should go on the stack.  */
+
+enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 };
+struct sh_args {
+    int arg_count[2];
+    int force_mem;
+  /* Nonzero if a prototype is available for the function.  */
+    int prototype_p;
+  /* The number of an odd floating-point register, that should be used
+     for the next argument of type float.  */
+    int free_single_fp_reg;
+  /* Whether we're processing an outgoing function call.  */
+    int outgoing;
+  /* The number of general-purpose registers that should have been
+     used to pass partial arguments, that are passed totally on the
+     stack.  On SHcompact, a call trampoline will pop them off the
+     stack before calling the actual function, and, if the called
+     function is implemented in SHcompact mode, the incoming arguments
+     decoder will push such arguments back onto the stack.  For
+     incoming arguments, STACK_REGS also takes into account other
+     arguments passed by reference, that the decoder will also push
+     onto the stack.  */
+    int stack_regs;
+  /* The number of general-purpose registers that should have been
+     used to pass arguments, if the arguments didn't have to be passed
+     by reference.  */
+    int byref_regs;
+  /* Set as by shcompact_byref if the current argument is to be passed
+     by reference.  */
+    int byref;
+
+  /* call_cookie is a bitmask used by call expanders, as well as
+     function prologue and epilogues, to allow SHcompact to comply
+     with the SH5 32-bit ABI, that requires 64-bit registers to be
+     used even though only the lower 32-bit half is visible in
+     SHcompact mode.  The strategy is to call SHmedia trampolines.
+
+     The alternatives for each of the argument-passing registers are
+     (a) leave it unchanged; (b) pop it off the stack; (c) load its
+     contents from the address in it; (d) add 8 to it, storing the
+     result in the next register, then (c); (e) copy it from some
+     floating-point register,
+
+     Regarding copies from floating-point registers, r2 may only be
+     copied from dr0.  r3 may be copied from dr0 or dr2.  r4 maybe
+     copied from dr0, dr2 or dr4.  r5 maybe copied from dr0, dr2,
+     dr4 or dr6.  r6 may be copied from dr0, dr2, dr4, dr6 or dr8.
+     r7 through to r9 may be copied from dr0, dr2, dr4, dr8, dr8 or
+     dr10.
+
+     The bit mask is structured as follows:
+
+     - 1 bit to tell whether to set up a return trampoline.
+
+     - 3 bits to count the number consecutive registers to pop off the
+       stack.
+
+     - 4 bits for each of r9, r8, r7 and r6.
+
+     - 3 bits for each of r5, r4, r3 and r2.
+
+     - 3 bits set to 0 (the most significant ones)
+
+        3           2            1           0
+       1098 7654 3210 9876 5432 1098 7654 3210
+       FLPF LPFL PFLP FFLP FFLP FFLP FFLP SSST
+       2223 3344 4555 6666 7777 8888 9999 SSS-
+
+     - If F is set, the register must be copied from an FP register,
+       whose number is encoded in the remaining bits.
+
+     - Else, if L is set, the register must be loaded from the address
+       contained in it.  If the P bit is *not* set, the address of the
+       following dword should be computed first, and stored in the
+       following register.
+
+     - Else, if P is set, the register alone should be popped off the
+       stack.
+
+     - After all this processing, the number of registers represented
+       in SSS will be popped off the stack.  This is an optimization
+       for pushing/popping consecutive registers, typically used for
+       varargs and large arguments partially passed in registers.
+
+     - If T is set, a return trampoline will be set up for 64-bit
+     return values to be split into 2 32-bit registers.  */
+    long call_cookie;
+
+  /* This is set to nonzero when the call in question must use the Renesas ABI,
+     even without the -mrenesas option.  */
+    int renesas_abi;
+};
+
+#define CALL_COOKIE_RET_TRAMP_SHIFT 0
+#define CALL_COOKIE_RET_TRAMP(VAL) ((VAL) << CALL_COOKIE_RET_TRAMP_SHIFT)
+#define CALL_COOKIE_STACKSEQ_SHIFT 1
+#define CALL_COOKIE_STACKSEQ(VAL) ((VAL) << CALL_COOKIE_STACKSEQ_SHIFT)
+#define CALL_COOKIE_STACKSEQ_GET(COOKIE) \
+  (((COOKIE) >> CALL_COOKIE_STACKSEQ_SHIFT) & 7)
+#define CALL_COOKIE_INT_REG_SHIFT(REG) \
+  (4 * (7 - (REG)) + (((REG) <= 2) ? ((REG) - 2) : 1) + 3)
+#define CALL_COOKIE_INT_REG(REG, VAL) \
+  ((VAL) << CALL_COOKIE_INT_REG_SHIFT (REG))
+#define CALL_COOKIE_INT_REG_GET(COOKIE, REG) \
+  (((COOKIE) >> CALL_COOKIE_INT_REG_SHIFT (REG)) & ((REG) < 4 ? 7 : 15))
+
+#define CUMULATIVE_ARGS  struct sh_args
+
+#define GET_SH_ARG_CLASS(MODE) \
+  ((TARGET_FPU_ANY && (MODE) == SFmode) \
+   ? SH_ARG_FLOAT \
+   /* There's no mention of complex float types in the SH5 ABI, so we
+      should presumably handle them as aggregate types.  */ \
+   : TARGET_SH5 && GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT \
+   ? SH_ARG_INT \
+   : TARGET_FPU_DOUBLE && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+			   || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? SH_ARG_FLOAT : SH_ARG_INT)
+
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode
+   MODE.
+
+   The SH doesn't care about double alignment, so we only
+   round doubles to even regs when asked to explicitly.  */
+
+#define ROUND_REG(CUM, MODE) \
+   (((TARGET_ALIGN_DOUBLE					\
+      || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && ((MODE) == DFmode || (MODE) == DCmode)	\
+	  && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))\
+     && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD)		\
+    ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]		\
+       + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1))	\
+    : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)])
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On SH, the offset always starts at 0: the first parm reg is always
+   the same reg for a given argument class.
+
+   For TARGET_HITACHI, the structure value pointer is passed in memory.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  sh_init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL), (N_NAMED_ARGS), VOIDmode)
+
+#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \
+  sh_init_cumulative_args (& (CUM), NULL_TREE, (LIBNAME), NULL_TREE, 0, (MODE))
+
+/* Return boolean indicating arg of mode MODE will be passed in a reg.
+   This macro is only used in this file.  */
+
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (((TYPE) == 0 \
+    || (! TREE_ADDRESSABLE ((TYPE)) \
+	&& (! (TARGET_HITACHI || (CUM).renesas_abi) \
+	    || ! (AGGREGATE_TYPE_P (TYPE) \
+		  || (!TARGET_FPU_ANY \
+		      && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+			  && GET_MODE_SIZE (MODE) > GET_MODE_SIZE (SFmode))))))) \
+   && ! (CUM).force_mem \
+   && (TARGET_SH2E \
+       ? ((MODE) == BLKmode \
+	  ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \
+	      + int_size_in_bytes (TYPE)) \
+	     <= NPARM_REGS (SImode) * UNITS_PER_WORD) \
+	  : ((ROUND_REG((CUM), (MODE)) \
+	      + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \
+	     <= NPARM_REGS (MODE))) \
+       : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE)))
+
+/* By accident we got stuck with passing SCmode on SH4 little endian
+   in two registers that are nominally successive - which is different from
+   two single SFmode values, where we take endianness translation into
+   account.  That does not work at all if an odd number of registers is
+   already in use, so that got fixed, but library functions are still more
+   likely to use complex numbers without mixing them with SFmode arguments
+   (which in C would have to be structures), so for the sake of ABI
+   compatibility the way SCmode values are passed when an even number of
+   FP registers is in use remains different from a pair of SFmode values for
+   now.
+   I.e.:
+   foo (double); a: fr5,fr4
+   foo (float a, float b); a: fr5 b: fr4
+   foo (__complex float a); a.real fr4 a.imag: fr5 - for consistency,
+                            this should be the other way round...
+   foo (float a, __complex float b); a: fr5 b.real: fr4 b.imag: fr7  */
+#define FUNCTION_ARG_SCmode_WART 1
+
+/* If an argument of size 5, 6 or 7 bytes is to be passed in a 64-bit
+   register in SHcompact mode, it must be padded in the most
+   significant end.  This means that passing it by reference wouldn't
+   pad properly on a big-endian machine.  In this particular case, we
+   pass this argument on the stack, in a way that the call trampoline
+   will load its value into the appropriate register.  */
+#define SHCOMPACT_FORCE_ON_STACK(MODE,TYPE) \
+  ((MODE) == BLKmode \
+   && TARGET_SHCOMPACT \
+   && ! TARGET_LITTLE_ENDIAN \
+   && int_size_in_bytes (TYPE) > 4 \
+   && int_size_in_bytes (TYPE) < 8)
+
+/* Minimum alignment for an argument to be passed by callee-copy
+   reference.  We need such arguments to be aligned to 8 byte
+   boundaries, because they'll be loaded using quad loads.  */
+#define SH_MIN_ALIGN_FOR_CALLEE_COPY (8 * BITS_PER_UNIT)
+
+/* The SH5 ABI requires floating-point arguments to be passed to
+   functions without a prototype in both an FP register and a regular
+   register or the stack.  When passing the argument in both FP and
+   general-purpose registers, list the FP register first.  */
+#define SH5_PROTOTYPELESS_FLOAT_ARG(CUM,MODE) \
+  (gen_rtx_PARALLEL							\
+   ((MODE),								\
+    gen_rtvec (2,							\
+	       gen_rtx_EXPR_LIST					\
+	       (VOIDmode,						\
+		((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \
+		 ? gen_rtx_REG ((MODE), FIRST_FP_PARM_REG		\
+				+ (CUM).arg_count[(int) SH_ARG_FLOAT])	\
+		 : NULL_RTX),						\
+		const0_rtx),						\
+	       gen_rtx_EXPR_LIST					\
+	       (VOIDmode,						\
+		((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \
+		 ? gen_rtx_REG ((MODE), FIRST_PARM_REG			\
+				+ (CUM).arg_count[(int) SH_ARG_INT])	\
+		 : gen_rtx_REG ((MODE), FIRST_FP_PARM_REG		\
+				+ (CUM).arg_count[(int) SH_ARG_FLOAT])), \
+		const0_rtx))))
+
+/* The SH5 ABI requires regular registers or stack slots to be
+   reserved for floating-point arguments.  Registers are taken care of
+   in FUNCTION_ARG_ADVANCE, but stack slots must be reserved here.
+   Unfortunately, there's no way to just reserve a stack slot, so
+   we'll end up needlessly storing a copy of the argument in the
+   stack.  For incoming arguments, however, the PARALLEL will be
+   optimized to the register-only form, and the value in the stack
+   slot won't be used at all.  */
+#define SH5_PROTOTYPED_FLOAT_ARG(CUM,MODE,REG) \
+  ((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)		\
+   ? gen_rtx_REG ((MODE), (REG))					\
+   : gen_rtx_PARALLEL ((MODE),						\
+		       gen_rtvec (2,					\
+				  gen_rtx_EXPR_LIST			\
+				  (VOIDmode, NULL_RTX,			\
+				   const0_rtx),				\
+				  gen_rtx_EXPR_LIST			\
+				  (VOIDmode, gen_rtx_REG ((MODE),	\
+							  (REG)),	\
+				   const0_rtx))))
+
+#define SH5_WOULD_BE_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
+  (TARGET_SH5							\
+   && ((MODE) == BLKmode || (MODE) == TImode || (MODE) == CDImode \
+       || (MODE) == DCmode) \
+   && ((CUM).arg_count[(int) SH_ARG_INT]			\
+       + (((MODE) == BLKmode ? int_size_in_bytes (TYPE)		\
+			     : GET_MODE_SIZE (MODE))		\
+	  + 7) / 8) > NPARM_REGS (SImode))
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+/* Call the function profiler with a given profile label.
+   We use two .aligns, so as to make sure that both the .long is aligned
+   on a 4 byte boundary, and that the .long is a fixed distance (2 bytes)
+   from the trapa instruction.  */
+
+#define FUNCTION_PROFILER(STREAM,LABELNO)			\
+{								\
+  if (TARGET_SHMEDIA)						\
+    {								\
+      fprintf((STREAM), "\tmovi\t33,r0\n");			\
+      fprintf((STREAM), "\ttrapa\tr0\n");			\
+      asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+    }								\
+  else								\
+    {								\
+      fprintf((STREAM), "\t.align\t2\n");			\
+      fprintf((STREAM), "\ttrapa\t#33\n");			\
+      fprintf((STREAM), "\t.align\t2\n");			\
+      asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+    }								\
+}
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+
+#define PROFILE_BEFORE_PROLOGUE
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/*
+   On the SH, the trampoline looks like
+   2 0002 D202     	   	mov.l	l2,r2
+   1 0000 D301     		mov.l	l1,r3
+   3 0004 422B     		jmp	@r2
+   4 0006 0009     		nop
+   5 0008 00000000 	l1:  	.long   area
+   6 000c 00000000 	l2:	.long   function  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+
+/* Alignment required for a trampoline in bits .  */
+#define TRAMPOLINE_ALIGNMENT \
+  ((CACHE_LOG < 3 || (optimize_size && ! TARGET_HARVARD)) ? 32 \
+   : TARGET_SHMEDIA ? 256 : 64)
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, so we
+   can ignore COUNT.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)	\
+  (((COUNT) == 0) ? sh_get_pr_initial_val () : (rtx) 0)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before the
+   prologue.  This RTL is either a REG, indicating that the return
+   value is saved in REG, or a MEM representing a location in
+   the stack.  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_REG (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
+
+/* Addressing modes, and classification of registers for them.  */
+#define HAVE_POST_INCREMENT  TARGET_SH1
+#define HAVE_PRE_DECREMENT   TARGET_SH1
+
+#define USE_LOAD_POST_INCREMENT(mode)    ((mode == SImode || mode == DImode) \
+                                           ? 0 : TARGET_SH1)
+#define USE_LOAD_PRE_DECREMENT(mode)     0
+#define USE_STORE_POST_INCREMENT(mode)   0
+#define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
+                                           ? 0 : TARGET_SH1)
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  (GENERAL_OR_AP_REGISTER_P (REGNO) \
+   || GENERAL_OR_AP_REGISTER_P (reg_renumber[(REGNO)]))
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+  (TARGET_SHMEDIA \
+   ? (GENERAL_REGISTER_P (REGNO) \
+      || GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
+   : (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
+
+/* Maximum number of registers that can appear in a valid memory
+   address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X)	(GET_CODE (X) == LABEL_REF)
+
+/* Nonzero if the constant value X is a legitimate general operand.  */
+/* can_store_by_pieces constructs VOIDmode CONST_DOUBLEs.  */
+
+#define LEGITIMATE_CONSTANT_P(X) \
+  (TARGET_SHMEDIA							\
+   ? ((GET_MODE (X) != DFmode						\
+       && GET_MODE_CLASS (GET_MODE (X)) != MODE_VECTOR_FLOAT)		\
+      || (X) == CONST0_RTX (GET_MODE (X))				\
+      || ! TARGET_SHMEDIA_FPU						\
+      || TARGET_SHMEDIA64)						\
+   : (GET_CODE (X) != CONST_DOUBLE					\
+      || GET_MODE (X) == DFmode || GET_MODE (X) == SFmode		\
+      || GET_MODE (X) == DImode || GET_MODE (X) == VOIDmode))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   The suitable hard regs are always accepted and all pseudo regs
+   are also accepted if STRICT is not set.  */
+
+/* Nonzero if X is a reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X, STRICT)			\
+  (GENERAL_OR_AP_REGISTER_P (REGNO (X))			\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X is a reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X, STRICT)			\
+  ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X))	\
+    : REGNO (X) == R0_REG)				\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X/OFFSET is a reg that can be used as an index.  */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET, STRICT)	\
+  ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X))	\
+    : REGNO (X) == R0_REG && OFFSET == 0)		\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Macros for extra constraints.  */
+
+#define IS_PC_RELATIVE_LOAD_ADDR_P(OP)                          	\
+  ((GET_CODE ((OP)) == LABEL_REF)					\
+   || (GET_CODE ((OP)) == CONST						\
+       && GET_CODE (XEXP ((OP), 0)) == PLUS				\
+       && GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF		\
+       && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))
+
+#define IS_NON_EXPLICIT_CONSTANT_P(OP)					\
+  (CONSTANT_P (OP)							\
+   && !CONST_INT_P (OP)					\
+   && GET_CODE (OP) != CONST_DOUBLE					\
+   && (!flag_pic							\
+       || (LEGITIMATE_PIC_OPERAND_P (OP)				\
+	   && !PIC_ADDR_P (OP)						\
+	   && GET_CODE (OP) != LABEL_REF)))
+
+/* Check whether OP is a datalabel unspec.  */
+#define DATALABEL_REF_NO_CONST_P(OP) \
+  (GET_CODE (OP) == UNSPEC \
+   && XINT ((OP), 1) == UNSPEC_DATALABEL \
+   && XVECLEN ((OP), 0) == 1 \
+   && GET_CODE (XVECEXP ((OP), 0, 0)) == LABEL_REF)
+
+#define GOT_ENTRY_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOT)
+
+#define GOTPLT_ENTRY_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOTPLT)
+
+#define UNSPEC_GOTOFF_P(OP) \
+  (GET_CODE (OP) == UNSPEC && XINT ((OP), 1) == UNSPEC_GOTOFF)
+
+#define GOTOFF_P(OP) \
+  (GET_CODE (OP) == CONST \
+   && (UNSPEC_GOTOFF_P (XEXP ((OP), 0)) \
+       || (GET_CODE (XEXP ((OP), 0)) == PLUS \
+           && UNSPEC_GOTOFF_P (XEXP (XEXP ((OP), 0), 0)) \
+	   && CONST_INT_P (XEXP (XEXP ((OP), 0), 1)))))
+
+#define PIC_ADDR_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_PIC)
+
+#define PCREL_SYMOFF_P(OP) \
+  (GET_CODE (OP) == CONST \
+   && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_PCREL_SYMOFF)
+
+#define NON_PIC_REFERENCE_P(OP) \
+  (GET_CODE (OP) == LABEL_REF || GET_CODE (OP) == SYMBOL_REF \
+   || (GET_CODE (OP) == CONST \
+       && (GET_CODE (XEXP ((OP), 0)) == LABEL_REF \
+	   || GET_CODE (XEXP ((OP), 0)) == SYMBOL_REF \
+	   || DATALABEL_REF_NO_CONST_P (XEXP ((OP), 0)))) \
+   || (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == PLUS \
+       && (GET_CODE (XEXP (XEXP ((OP), 0), 0)) == SYMBOL_REF \
+	   || GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF \
+	   || DATALABEL_REF_NO_CONST_P (XEXP (XEXP ((OP), 0), 0))) \
+       && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))
+
+#define PIC_REFERENCE_P(OP) \
+  (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP) \
+   || GOTOFF_P (OP) || PIC_ADDR_P (OP))
+
+#define MOVI_SHORI_BASE_OPERAND_P(OP) \
+  (flag_pic \
+   ? (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP)  || GOTOFF_P (OP) \
+      || PCREL_SYMOFF_P (OP)) \
+   : NON_PIC_REFERENCE_P (OP))
+
+#define MAYBE_BASE_REGISTER_RTX_P(X, STRICT)			\
+  ((REG_P (X) && REG_OK_FOR_BASE_P (X, STRICT))	\
+   || (GET_CODE (X) == SUBREG					\
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))),	\
+				 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \
+       && REG_P (SUBREG_REG (X))			\
+       && REG_OK_FOR_BASE_P (SUBREG_REG (X), STRICT)))
+
+/* Since this must be r0, which is a single register class, we must check
+   SUBREGs more carefully, to be sure that we don't accept one that extends
+   outside the class.  */
+#define MAYBE_INDEX_REGISTER_RTX_P(X, STRICT)				\
+  ((REG_P (X) && REG_OK_FOR_INDEX_P (X, STRICT))	\
+   || (GET_CODE (X) == SUBREG					\
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))), \
+				 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \
+       && REG_P (SUBREG_REG (X))		\
+       && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_BYTE (X), STRICT)))
+
+#ifdef REG_OK_STRICT
+#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, true)
+#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, true)
+#else
+#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, false)
+#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, false)
+#endif
+
+#define ALLOW_INDEXED_ADDRESS \
+  ((!TARGET_SHMEDIA32 && !TARGET_SHCOMPACT) || TARGET_ALLOW_INDEXED_ADDRESS)
+
+#define GO_IF_LEGITIMATE_INDEX(MODE, OP, WIN)	\
+  do {						\
+    if (sh_legitimate_index_p ((MODE), (OP)))	\
+      goto WIN;					\
+  } while (0)
+
+/* A C compound statement that attempts to replace X, which is an address
+   that needs reloading, with a valid memory address for an operand of
+   mode MODE.  WIN is a C statement label elsewhere in the code.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	\
+  do {									\
+    if (sh_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE)))	\
+      goto WIN;								\
+  } while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE ((! optimize || TARGET_BIGTABLE) ? SImode : HImode)
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \
+ : SImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define it here, so that it doesn't get bumped to 64-bits on SHmedia.  */
+#define FLOAT_TYPE_SIZE 32
+
+/* Since the SH2e has only `float' support, it is desirable to make all
+   floating point types equivalent to `float'.  */
+#define DOUBLE_TYPE_SIZE ((TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH2A_DOUBLE) ? 32 : 64)
+
+/* 'char' is signed by default.  */
+#define DEFAULT_SIGNED_CHAR  1
+
+/* The type of size_t unsigned int.  */
+#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int")
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int")
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+#define SH_ELF_WCHAR_TYPE "long int"
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX (TARGET_SHMEDIA ? 8 : 4)
+
+/* Maximum value possibly taken by MOVE_MAX.  Must be defined whenever
+   MOVE_MAX is not a compile-time constant.  */
+#define MAX_MOVE_MAX 8
+
+/* Max number of bytes we want move_by_pieces to be able to copy
+   efficiently.  */
+#define MOVE_MAX_PIECES (TARGET_SH4 || TARGET_SHMEDIA ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+/* For SHmedia, we can truncate to QImode easier using zero extension.  */
+/* FP registers can load SImode values, but don't implicitly sign-extend
+   them to DImode.  */
+#define LOAD_EXTEND_OP(MODE) \
+ (((MODE) == QImode  && TARGET_SHMEDIA) ? ZERO_EXTEND \
+  : (MODE) != SImode ? SIGN_EXTEND : UNKNOWN)
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Nonzero if access to memory by bytes is no faster than for words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Immediate shift counts are truncated by the output routines (or was it
+   the assembler?).  Shift counts in a register are truncated by SH.  Note
+   that the native compiler puts too large (> 32) immediate shift counts
+   into a register and shifts by the register, letting the SH decide what
+   to do instead of doing that itself.  */
+/* ??? The library routines in lib1funcs.asm truncate the shift count.
+   However, the SH3 has hardware shifts that do not truncate exactly as gcc
+   expects - the sign bit is significant - so it appears that we need to
+   leave this zero for correct SH3 code.  */
+#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3 && ! TARGET_SH2A)
+
+/* All integers have the same format so truncation is easy.  */
+/* But SHmedia must sign-extend DImode when truncating to SImode.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \
+ (!TARGET_SHMEDIA || (INPREC) < 64 || (OUTPREC) >= 64)
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/*#define NO_FUNCTION_CSE 1*/
+
+/* The machine modes of pointers and functions.  */
+#define Pmode  (TARGET_SHMEDIA64 ? DImode : SImode)
+#define FUNCTION_MODE  Pmode
+
+/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2
+   are actually function calls with some special constraints on arguments
+   and register usage.
+
+   These macros tell reorg that the references to arguments and
+   register clobbers for insns of type sfunc do not appear to happen
+   until after the millicode call.  This allows reorg to put insns
+   which set the argument registers into the delay slot of the millicode
+   call -- thus they act more like traditional CALL_INSNs.
+
+   get_attr_is_sfunc will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+
+#define INSN_SETS_ARE_DELAYED(X) 		\
+  ((NONJUMP_INSN_P (X)			\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+#define INSN_REFERENCES_ARE_DELAYED(X) 		\
+  ((NONJUMP_INSN_P (X)			\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+
+/* Position Independent Code.  */
+
+/* We can't directly access anything that contains a symbol,
+   nor can we indirect via the constant pool.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)				\
+	((! nonpic_symbol_mentioned_p (X)			\
+	  && (GET_CODE (X) != SYMBOL_REF			\
+	      || ! CONSTANT_POOL_ADDRESS_P (X)			\
+	      || ! nonpic_symbol_mentioned_p (get_pool_constant (X)))) \
+	 || (TARGET_SHMEDIA && GET_CODE (X) == LABEL_REF))
+
+#define SYMBOLIC_CONST_P(X)	\
+((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == LABEL_REF)	\
+  && nonpic_symbol_mentioned_p (X))
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+
+/* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
+   uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+
+#define REGCLASS_HAS_GENERAL_REG(CLASS) \
+  ((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS \
+    || (! TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS))
+
+#define REGCLASS_HAS_FP_REG(CLASS) \
+  ((CLASS) == FP0_REGS || (CLASS) == FP_REGS \
+   || (CLASS) == DF_REGS || (CLASS) == DF_HI_REGS)
+
+/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option?  This
+   would be so that people with slow memory systems could generate
+   different code that does fewer memory accesses.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.
+   The SH1 does not have delay slots, hence we get a pipeline stall
+   at every branch.  The SH4 is superscalar, so the single delay slot
+   is not sufficient to keep both pipelines filled.  */
+#define BRANCH_COST(speed_p, predictable_p) \
+	(TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1)
+
+/* Assembler output control.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+#define ASM_COMMENT_START "!"
+
+#define ASM_APP_ON  		""
+#define ASM_APP_OFF  		""
+#define FILE_ASM_OP 		"\t.file\n"
+#define SET_ASM_OP		"\t.set\t"
+
+/* How to change between sections.  */
+
+#define TEXT_SECTION_ASM_OP  		(TARGET_SHMEDIA32 ? "\t.section\t.text..SHmedia32,\"ax\"" : "\t.text")
+#define DATA_SECTION_ASM_OP  		"\t.data"
+
+#if defined CRT_BEGIN || defined CRT_END
+/* Arrange for TEXT_SECTION_ASM_OP to be a compile-time constant.  */
+# undef TEXT_SECTION_ASM_OP
+# if __SHMEDIA__ == 1 && __SH5__ == 32
+#  define TEXT_SECTION_ASM_OP "\t.section\t.text..SHmedia32,\"ax\""
+# else
+#  define TEXT_SECTION_ASM_OP "\t.text"
+# endif
+#endif
+
+
+/* If defined, a C expression whose value is a string containing the
+   assembler operation to identify the following data as
+   uninitialized global data.  If not defined, and neither
+   `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+   uninitialized global data will be output in the data section if
+   `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+   used.  */
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#endif
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+   separate, explicit argument.  If you define this macro, it is used
+   in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+   handling the required alignment of the variable.  The alignment is
+   specified as the number of bits.
+
+   Try to use function `asm_output_aligned_bss' defined in file
+   `varasm.c' when defining this macro.  */
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
+
+/* Define this so that jump tables go in same section as the current function,
+   which could be text or it could be a user defined section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+#undef DO_GLOBAL_CTORS_BODY
+#define DO_GLOBAL_CTORS_BODY			\
+{						\
+  typedef void (*pfunc) (void);			\
+  extern pfunc __ctors[];			\
+  extern pfunc __ctors_end[];			\
+  pfunc *p;					\
+  for (p = __ctors_end; p > __ctors; )		\
+    {						\
+      (*--p)();					\
+    }						\
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY			\
+{						\
+  typedef void (*pfunc) (void);			\
+  extern pfunc __dtors[];			\
+  extern pfunc __dtors_end[];			\
+  pfunc *p;					\
+  for (p = __dtors; p < __dtors_end; p++)	\
+    {						\
+      (*p)();					\
+    }						\
+}
+
+#define ASM_OUTPUT_REG_PUSH(file, v) \
+{							\
+  if (TARGET_SHMEDIA)					\
+    {							\
+      fprintf ((file), "\taddi.l\tr15,-8,r15\n");	\
+      fprintf ((file), "\tst.q\tr15,0,r%d\n", (v));	\
+    }							\
+  else							\
+    fprintf ((file), "\tmov.l\tr%d,@-r15\n", (v));	\
+}
+
+#define ASM_OUTPUT_REG_POP(file, v) \
+{							\
+  if (TARGET_SHMEDIA)					\
+    {							\
+      fprintf ((file), "\tld.q\tr15,0,r%d\n", (v));	\
+      fprintf ((file), "\taddi.l\tr15,8,r15\n");	\
+    }							\
+  else							\
+    fprintf ((file), "\tmov.l\t@r15+,r%d\n", (v));	\
+}
+
+/* DBX register number for a given compiler register number.  */
+/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers
+   to match gdb.  */
+/* expand_builtin_init_dwarf_reg_sizes uses this to test if a
+   register exists, so we should return -1 for invalid register numbers.  */
+#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO)
+
+/* SHcompact PR_REG used to use the encoding 241, and SHcompact FP registers
+   used to use the encodings 245..260, but that doesn't make sense:
+   PR_REG and PR_MEDIA_REG are actually the same register, and likewise
+   the FP registers stay the same when switching between compact and media
+   mode.  Hence, we also need to use the same dwarf frame columns.
+   Likewise, we need to support unwind information for SHmedia registers
+   even in compact code.  */
+#define SH_DBX_REGISTER_NUMBER(REGNO) \
+  (IN_RANGE ((REGNO), \
+	     (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \
+	     FIRST_GENERAL_REG + (TARGET_SH5 ? 63U :15U)) \
+   ? ((unsigned) (REGNO) - FIRST_GENERAL_REG) \
+  : ((int) (REGNO) >= FIRST_FP_REG \
+     && ((int) (REGNO) \
+	 <= (FIRST_FP_REG + \
+	     ((TARGET_SH5 && TARGET_FPU_ANY) ? 63 : TARGET_SH2E ? 15 : -1)))) \
+   ? ((unsigned) (REGNO) - FIRST_FP_REG \
+      + (TARGET_SH5 ? 77 : 25)) \
+   : XD_REGISTER_P (REGNO) \
+   ? ((unsigned) (REGNO) - FIRST_XD_REG + (TARGET_SH5 ? 289 : 87)) \
+   : TARGET_REGISTER_P (REGNO) \
+   ? ((unsigned) (REGNO) - FIRST_TARGET_REG + 68) \
+   : (REGNO) == PR_REG \
+   ? (TARGET_SH5 ? 18 : 17) \
+   : (REGNO) == PR_MEDIA_REG \
+   ? (TARGET_SH5 ? 18 : (unsigned) -1) \
+   : (REGNO) == GBR_REG \
+   ? (TARGET_SH5 ? 238 : 18) \
+   : (REGNO) == MACH_REG \
+   ? (TARGET_SH5 ? 239 : 20) \
+   : (REGNO) == MACL_REG \
+   ? (TARGET_SH5 ? 240 : 21) \
+   : (REGNO) == T_REG \
+   ? (TARGET_SH5 ? 242 : 22) \
+   : (REGNO) == FPUL_REG \
+   ? (TARGET_SH5 ? 244 : 23) \
+   : (REGNO) == FPSCR_REG \
+   ? (TARGET_SH5 ? 243 : 24) \
+   : (unsigned) -1)
+
+/* This is how to output a reference to a symbol_ref.  On SH5,
+   references to non-code symbols must be preceded by `datalabel'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM)			\
+  do 							\
+    {							\
+      if (TARGET_SH5 && !SYMBOL_REF_FUNCTION_P (SYM))	\
+	fputs ("datalabel ", (FILE));			\
+      assemble_name ((FILE), XSTR ((SYM), 0));		\
+    }							\
+  while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE)	    */
+
+/* Output a relative address table.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)  		\
+  switch (GET_MODE (BODY))						\
+    {									\
+    case SImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.long\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case HImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.word\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case QImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.byte\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    default:								\
+      break;								\
+    }
+
+/* Output an absolute table element.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  				\
+  if (! optimize || TARGET_BIGTABLE)					\
+    asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); 		\
+  else									\
+    asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE));
+
+
+/* A C statement to be executed just prior to the output of
+   assembler code for INSN, to modify the extracted operands so
+   they will be output differently.
+
+   Here the argument OPVEC is the vector containing the operands
+   extracted from INSN, and NOPERANDS is the number of elements of
+   the vector which contain meaningful data for this insn.
+   The contents of this vector are what will be used to convert the insn
+   template into assembler code, so you can change the assembler output
+   by changing the contents of the vector.  */
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  final_prescan_insn ((INSN), (OPVEC), (NOPERANDS))
+
+
+extern struct rtx_def *sh_compare_op0;
+extern struct rtx_def *sh_compare_op1;
+
+/* Which processor to schedule for.  The elements of the enumeration must
+   match exactly the cpu attribute in the sh.md file.  */
+
+enum processor_type {
+  PROCESSOR_SH1,
+  PROCESSOR_SH2,
+  PROCESSOR_SH2E,
+  PROCESSOR_SH2A,
+  PROCESSOR_SH3,
+  PROCESSOR_SH3E,
+  PROCESSOR_SH4,
+  PROCESSOR_SH4A,
+  PROCESSOR_SH5
+};
+
+#define sh_cpu_attr ((enum attr_cpu)sh_cpu)
+extern enum processor_type sh_cpu;
+
+enum mdep_reorg_phase_e
+{
+  SH_BEFORE_MDEP_REORG,
+  SH_INSERT_USES_LABELS,
+  SH_SHORTEN_BRANCHES0,
+  SH_FIXUP_PCLOAD,
+  SH_SHORTEN_BRANCHES1,
+  SH_AFTER_MDEP_REORG
+};
+
+extern enum mdep_reorg_phase_e mdep_reorg_phase;
+
+/* Handle Renesas compiler's pragmas.  */
+#define REGISTER_TARGET_PRAGMAS() do {					\
+  c_register_pragma (0, "interrupt", sh_pr_interrupt);			\
+  c_register_pragma (0, "trapa", sh_pr_trapa);				\
+  c_register_pragma (0, "nosave_low_regs", sh_pr_nosave_low_regs);	\
+} while (0)
+
+extern tree sh_deferred_function_attributes;
+extern tree *sh_deferred_function_attributes_tail;
+
+/* Set when processing a function with interrupt attribute.  */
+
+extern int current_function_interrupt;
+
+
+/* Instructions with unfilled delay slots take up an
+   extra two bytes for the nop in the delay slot.
+   sh-dsp parallel processing insns are four bytes long.  */
+
+#define ADJUST_INSN_LENGTH(X, LENGTH)				\
+  (LENGTH) += sh_insn_length_adjustment (X);
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   Leaving the unsignedp unchanged gives better code than always setting it
+   to 0.  This is despite the fact that we have only signed char and short
+   load instructions.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  if (GET_MODE_CLASS (MODE) == MODE_INT			\
+      && GET_MODE_SIZE (MODE) < 4/* ! UNITS_PER_WORD */)\
+    (UNSIGNEDP) = ((MODE) == SImode ? 0 : (UNSIGNEDP)),	\
+    (MODE) = (TARGET_SH1 ? SImode \
+	      : TARGET_SHMEDIA32 ? SImode : DImode);
+
+#define MAX_FIXED_MODE_SIZE (TARGET_SH5 ? 128 : 64)
+
+#define SIDI_OFF (TARGET_LITTLE_ENDIAN ? 0 : 4)
+
+/* Better to allocate once the maximum space for outgoing args in the
+   prologue rather than duplicate around each call.  */
+#define ACCUMULATE_OUTGOING_ARGS TARGET_ACCUMULATE_OUTGOING_ARGS
+
+#define SH_DYNAMIC_SHIFT_COST \
+  (TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (optimize_size ? 1 : 2) : 20)
+
+
+#define NUM_MODES_FOR_MODE_SWITCHING { FP_MODE_NONE }
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+
+#define ACTUAL_NORMAL_MODE(ENTITY) \
+  (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
+
+#define NORMAL_MODE(ENTITY) \
+  (sh_cfun_interrupt_handler_p () \
+   ? (TARGET_FMOVD ? FP_MODE_DOUBLE : FP_MODE_NONE) \
+   : ACTUAL_NORMAL_MODE (ENTITY))
+
+#define MODE_ENTRY(ENTITY) NORMAL_MODE (ENTITY)
+
+#define MODE_EXIT(ENTITY) \
+  (sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (ENTITY))
+
+#define EPILOGUE_USES(REGNO)       ((TARGET_SH2E || TARGET_SH4)		\
+				    && (REGNO) == FPSCR_REG)
+
+#define MODE_NEEDED(ENTITY, INSN)					\
+  (recog_memoized (INSN) >= 0						\
+   ? get_attr_fp_mode (INSN)						\
+   : FP_MODE_NONE)
+
+#define MODE_AFTER(MODE, INSN)                  \
+     (TARGET_HITACHI				\
+      && recog_memoized (INSN) >= 0		\
+      && get_attr_fp_set (INSN) != FP_SET_NONE  \
+      ? (int) get_attr_fp_set (INSN)            \
+      : (MODE))
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) \
+  ((TARGET_FPU_SINGLE != 0) ^ (N) ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+  fpscr_set_from_mem ((MODE), (HARD_REGS_LIVE))
+
+#define MD_CAN_REDIRECT_BRANCH(INSN, SEQ) \
+  sh_can_redirect_branch ((INSN), (SEQ))
+
+#define DWARF_FRAME_RETURN_COLUMN \
+  (TARGET_SH5 ? DWARF_FRAME_REGNUM (PR_MEDIA_REG) : DWARF_FRAME_REGNUM (PR_REG))
+
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 4 ? (N) + (TARGET_SH5 ? 2U : 4U) : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_REGNO STATIC_CHAIN_REGNUM
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, EH_RETURN_STACKADJ_REGNO)
+
+/* We have to distinguish between code and data, so that we apply
+   datalabel where and only where appropriate.  Use sdataN for data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+  | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
+  | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do { \
+    if (((ENCODING) & 0xf) != DW_EH_PE_sdata4 \
+	&& ((ENCODING) & 0xf) != DW_EH_PE_sdata8) \
+      { \
+	gcc_assert (GET_CODE (ADDR) == SYMBOL_REF); \
+	SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
+	if (0) goto DONE; \
+      } \
+  } while (0)
+
+#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
+/* SH constant pool breaks the devices in crtstuff.c to control section
+   in where code resides.  We have to write it as asm code.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+   asm (SECTION_OP "\n\
+	mov.l	1f,r1\n\
+	mova	2f,r0\n\
+	braf	r1\n\
+	lds	r0,pr\n\
+0:	.p2align 2\n\
+1:	.long	" USER_LABEL_PREFIX #FUNC " - 0b\n\
+2:\n" TEXT_SECTION_ASM_OP);
+#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
+
+/* FIXME: middle-end support for highpart optimizations is missing.  */
+#define high_life_started reload_in_progress
+
+#endif /* ! GCC_SH_H */
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
new file mode 100644
index 000000000..e261d3339
--- /dev/null
+++ b/gcc/config/sh/sh.md
@@ -0,0 +1,13490 @@
+;;- Machine description for Renesas / SuperH SH.
+;;  Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+;;  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by Steve Chamberlain (sac@cygnus.com).
+;;  Improved by Jim Wilson (wilson@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ??? Should prepend a * to all pattern names which are not used.
+;; This will make the compiler smaller, and rebuilds after changes faster.
+
+;; ??? Should be enhanced to include support for many more GNU superoptimizer
+;; sequences.  Especially the sequences for arithmetic right shifts.
+
+;; ??? Should check all DImode patterns for consistency and usefulness.
+
+;; ??? The MAC.W and MAC.L instructions are not supported.  There is no
+;; way to generate them.
+
+;; ??? The cmp/str instruction is not supported.  Perhaps it can be used
+;; for a str* inline function.
+
+;; BSR is not generated by the compiler proper, but when relaxing, it
+;; generates .uses pseudo-ops that allow linker relaxation to create
+;; BSR.  This is actually implemented in bfd/{coff,elf32}-sh.c
+
+;; Special constraints for SH machine description:
+;;
+;;    t -- T
+;;    x -- mac
+;;    l -- pr
+;;    z -- r0
+;;
+;; Special formats used for outputting SH instructions:
+;;
+;;   %.  --  print a .s if insn needs delay slot
+;;   %@  --  print rte/rts if is/isn't an interrupt function
+;;   %#  --  output a nop if there is nothing to put in the delay slot
+;;   %O  --  print a constant without the #
+;;   %R  --  print the lsw reg of a double
+;;   %S  --  print the msw reg of a double
+;;   %T  --  print next word of a double REG or MEM
+;;
+;; Special predicates:
+;;
+;;  arith_operand          -- operand is valid source for arithmetic op
+;;  arith_reg_operand      -- operand is valid register for arithmetic op
+;;  general_movdst_operand -- operand is valid move destination
+;;  general_movsrc_operand -- operand is valid move source
+;;  logical_operand        -- operand is valid source for logical op
+
+;; -------------------------------------------------------------------------
+;; Constants
+;; -------------------------------------------------------------------------
+
+(define_constants [
+  (AP_REG	145)
+  (PR_REG	146)
+  (T_REG	147)
+  (GBR_REG	144)
+  (MACH_REG	148)
+  (MACL_REG	149)
+  (FPUL_REG	150)
+  (RAP_REG	152)
+
+  (FPSCR_REG	151)
+
+  (PIC_REG	12)
+  (FP_REG	14)
+  (SP_REG	15)
+
+  (PR_MEDIA_REG	18)
+  (T_MEDIA_REG	19)
+
+  (R0_REG	0)
+  (R1_REG	1)
+  (R2_REG	2)
+  (R3_REG	3)
+  (R4_REG	4)
+  (R5_REG	5)
+  (R6_REG	6)
+  (R7_REG	7)
+  (R8_REG	8)
+  (R9_REG	9)
+  (R10_REG	10)
+  (R20_REG	20)
+  (R21_REG	21)
+  (R22_REG	22)
+  (R23_REG	23)
+
+  (DR0_REG	64)
+  (DR2_REG	66)
+  (DR4_REG	68)
+  (FR23_REG	87)
+
+  (TR0_REG	128)
+  (TR1_REG	129)
+  (TR2_REG	130)
+
+  (XD0_REG	136)
+
+  ;; These are used with unspec.
+  (UNSPEC_COMPACT_ARGS	0)
+  (UNSPEC_MOVA		1)
+  (UNSPEC_CASESI	2)
+  (UNSPEC_DATALABEL	3)
+  (UNSPEC_BBR		4)
+  (UNSPEC_SFUNC		5)
+  (UNSPEC_PIC		6)
+  (UNSPEC_GOT		7)
+  (UNSPEC_GOTOFF	8)
+  (UNSPEC_PLT		9)
+  (UNSPEC_CALLER	10)
+  (UNSPEC_GOTPLT	11)
+  (UNSPEC_ICACHE	12)
+  (UNSPEC_INIT_TRAMP	13)
+  (UNSPEC_FCOSA		14)
+  (UNSPEC_FSRRA		15)
+  (UNSPEC_FSINA		16)
+  (UNSPEC_NSB		17)
+  (UNSPEC_ALLOCO	18)
+  (UNSPEC_TLSGD		20)
+  (UNSPEC_TLSLDM	21)
+  (UNSPEC_TLSIE		22)
+  (UNSPEC_DTPOFF	23)
+  (UNSPEC_GOTTPOFF	24)
+  (UNSPEC_TPOFF		25)
+  (UNSPEC_RA		26)
+  (UNSPEC_DIV_INV_M0	30)
+  (UNSPEC_DIV_INV_M1	31)
+  (UNSPEC_DIV_INV_M2	32)
+  (UNSPEC_DIV_INV_M3	33)
+  (UNSPEC_DIV_INV20	34)
+  (UNSPEC_DIV_INV_TABLE	37)
+  (UNSPEC_ASHIFTRT	35)
+  (UNSPEC_THUNK		36)
+  (UNSPEC_CHKADD	38)
+  (UNSPEC_SP_SET	40)
+  (UNSPEC_SP_TEST	41)
+  (UNSPEC_MOVUA		42)
+
+  ;; (unspec [VAL SHIFT] UNSPEC_EXTRACT_S16) computes (short) (VAL >> SHIFT).
+  ;; UNSPEC_EXTRACT_U16 is the unsigned equivalent.
+  (UNSPEC_EXTRACT_S16	43)
+  (UNSPEC_EXTRACT_U16	44)
+
+  ;; (unspec [TARGET ANCHOR] UNSPEC_SYMOFF) == TARGET - ANCHOR.
+  (UNSPEC_SYMOFF	45)
+
+  ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
+  (UNSPEC_PCREL_SYMOFF	46)
+
+  ;; These are used with unspec_volatile.
+  (UNSPECV_BLOCKAGE	0)
+  (UNSPECV_ALIGN	1)
+  (UNSPECV_CONST2	2)
+  (UNSPECV_CONST4	4)
+  (UNSPECV_CONST8	6)
+  (UNSPECV_WINDOW_END	10)
+  (UNSPECV_CONST_END	11)
+  (UNSPECV_EH_RETURN	12)
+])
+
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+;; Target CPU.
+
+(define_attr "cpu"
+ "sh1,sh2,sh2e,sh2a,sh3,sh3e,sh4,sh4a,sh5"
+  (const (symbol_ref "sh_cpu_attr")))
+
+(define_attr "endian" "big,little"
+ (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
+		      (const_string "little") (const_string "big"))))
+
+;; Indicate if the default fpu mode is single precision.
+(define_attr "fpu_single" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FPU_SINGLE")
+                         (const_string "yes") (const_string "no"))))
+
+(define_attr "fmovd" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FMOVD")
+		       (const_string "yes") (const_string "no"))))
+;; pipeline model
+(define_attr "pipe_model" "sh1,sh4,sh5media"
+  (const
+   (cond [(symbol_ref "TARGET_SHMEDIA") (const_string "sh5media")
+          (symbol_ref "TARGET_SUPERSCALAR") (const_string "sh4")]
+         (const_string "sh1"))))
+
+;; cbranch	conditional branch instructions
+;; jump		unconditional jumps
+;; arith	ordinary arithmetic
+;; arith3	a compound insn that behaves similarly to a sequence of
+;;		three insns of type arith
+;; arith3b	like above, but might end with a redirected branch
+;; load		from memory
+;; load_si	Likewise, SImode variant for general register.
+;; fload	Likewise, but load to fp register.
+;; store	to memory
+;; fstore	floating point register to memory
+;; move		general purpose register to register
+;; movi8	8-bit immediate to general purpose register
+;; mt_group	other sh4 mt instructions
+;; fmove	register to register, floating point
+;; smpy		word precision integer multiply
+;; dmpy		longword or doublelongword precision integer multiply
+;; return	rts
+;; pload	load of pr reg, which can't be put into delay slot of rts
+;; prset	copy register to pr reg, ditto
+;; pstore	store of pr reg, which can't be put into delay slot of jsr
+;; prget	copy pr to register, ditto
+;; pcload	pc relative load of constant value
+;; pcfload	Likewise, but load to fp register.
+;; pcload_si	Likewise, SImode variant for general register.
+;; rte		return from exception
+;; sfunc	special function call with known used registers
+;; call		function call
+;; fp		floating point
+;; fpscr_toggle	toggle a bit in the fpscr
+;; fdiv		floating point divide (or square root)
+;; gp_fpul	move from general purpose register to fpul
+;; fpul_gp	move from fpul to general purpose register
+;; mac_gp	move from mac[lh] to general purpose register
+;; gp_mac	move from general purpose register to mac[lh]
+;; mac_mem	move from mac[lh] to memory
+;; mem_mac	move from memory to mac[lh]
+;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv
+;; ftrc_s	fix_truncsfsi2_i4
+;; dfdiv	double precision floating point divide (or square root)
+;; cwb		ic_invalidate_line_i
+;; movua	SH4a unaligned load
+;; fsrra	square root reciprocal approximate
+;; fsca		sine and cosine approximate
+;; tls_load     load TLS related address
+;; arith_media	SHmedia arithmetic, logical, and shift instructions
+;; cbranch_media SHmedia conditional branch instructions
+;; cmp_media	SHmedia compare instructions
+;; dfdiv_media	SHmedia double precision divide and square root
+;; dfmul_media	SHmedia double precision multiply instruction
+;; dfparith_media SHmedia double precision floating point arithmetic
+;; dfpconv_media SHmedia double precision floating point conversions
+;; dmpy_media	SHmedia longword multiply
+;; fcmp_media	SHmedia floating point compare instructions
+;; fdiv_media	SHmedia single precision divide and square root
+;; fload_media	SHmedia floating point register load instructions
+;; fmove_media	SHmedia floating point register moves (inc. fabs and fneg)
+;; fparith_media SHmedia single precision floating point arithmetic
+;; fpconv_media	SHmedia single precision floating point conversions
+;; fstore_media	SHmedia floating point register store instructions
+;; gettr_media	SHmedia gettr instruction
+;; invalidate_line_media SHmedia invalidate_line sequence
+;; jump_media	SHmedia unconditional branch instructions
+;; load_media	SHmedia general register load instructions
+;; pt_media	SHmedia pt instruction (expanded by assembler)
+;; ptabs_media	SHmedia ptabs instruction
+;; store_media	SHmedia general register store instructions
+;; mcmp_media	SHmedia multimedia compare, absolute, saturating ops
+;; mac_media	SHmedia mac-style fixed point operations
+;; d2mpy_media	SHmedia: two 32-bit integer multiplies
+;; atrans_media	SHmedia approximate transcendental functions
+;; ustore_media	SHmedia unaligned stores
+;; nil		no-op move, will be deleted.
+
+(define_attr "type"
+ "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s,dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other"
+  (const_string "other"))
+
+;; We define a new attribute namely "insn_class".We use
+;; this for the DFA based pipeline description.
+;;
+;; mt_group      SH4 "mt" group instructions.
+;;
+;; ex_group      SH4 "ex" group instructions.
+;;
+;; ls_group      SH4 "ls" group instructions.
+;;
+
+(define_attr "insn_class"
+  "mt_group,ex_group,ls_group,br_group,fe_group,co_group,none"
+  (cond [(eq_attr "type" "move,mt_group") (const_string "mt_group")
+         (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group")
+	 (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,store,fstore,gp_fpul,fpul_gp") (const_string "ls_group")
+	 (eq_attr "type" "cbranch,jump") (const_string "br_group")
+	 (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
+	   (const_string "fe_group")
+	 (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb,gp_mac,mac_mem,mem_mac") (const_string "co_group")]
+	(const_string "none")))
+;; nil are zero instructions, and arith3 / arith3b are multiple instructions,
+;; so these do not belong in an insn group, although they are modeled
+;; with their own define_insn_reservations.
+
+;; Indicate what precision must be selected in fpscr for this insn, if any.
+
+(define_attr "fp_mode" "single,double,none" (const_string "none"))
+
+;; Indicate if the fpu mode is set by this instruction
+;; "unknown" must have the value as "none" in fp_mode, and means
+;; that the instruction/abi has left the processor in an unknown
+;; state.
+;; "none" means that nothing has changed and no mode is set.
+;; This attribute is only used for the Renesas ABI.
+(define_attr "fp_set" "single,double,unknown,none" (const_string "none"))
+
+; If a conditional branch destination is within -252..258 bytes away
+; from the instruction it can be 2 bytes long.  Something in the
+; range -4090..4100 bytes can be 6 bytes long.  All other conditional
+; branches are initially assumed to be 16 bytes long.
+; In machine_dependent_reorg, we split all branches that are longer than
+; 2 bytes.
+
+;; The maximum range used for SImode constant pool entries is 1018.  A final
+;; instruction can add 8 bytes while only being 4 bytes in size, thus we
+;; can have a total of 1022 bytes in the pool.  Add 4 bytes for a branch
+;; instruction around the pool table, 2 bytes of alignment before the table,
+;; and 30 bytes of alignment after the table.  That gives a maximum total
+;; pool size of 1058 bytes.
+;; Worst case code/pool content size ratio is 1:2 (using asms).
+;; Thus, in the worst case, there is one instruction in front of a maximum
+;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of
+;; code.  For the last n bytes of code, there are 2n + 36 bytes of pool.
+;; If we have a forward branch, the initial table will be put after the
+;; unconditional branch.
+;;
+;; ??? We could do much better by keeping track of the actual pcloads within
+;; the branch range and in the pcload range in front of the branch range.
+
+;; ??? This looks ugly because genattrtab won't allow if_then_else or cond
+;; inside an le.
+(define_attr "short_cbranch_p" "no,yes"
+  (cond [(ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506))
+	 (const_string "yes")
+	 (ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) != insn") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508))
+	 (const_string "yes")
+         ] (const_string "no")))
+
+(define_attr "med_branch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990))
+	      (const_int 1988))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4092))
+	      (const_int 8186))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "med_cbranch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988))
+	      (const_int 1986))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4090))
+	       (const_int 8184))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_branch_p" "no,yes"
+  (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10330))
+	      (const_int 20660))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32764))
+	      (const_int 65530))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_cbranch_p" "no,yes"
+  (cond [(ne (symbol_ref "! TARGET_SH2") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10328))
+	      (const_int 20658))
+	 (const_string "yes")
+	 (ne (symbol_ref "mdep_reorg_phase <= SH_FIXUP_PCLOAD") (const_int 0))
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32762))
+	      (const_int 65528))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+; An unconditional jump in the range -4092..4098 can be 2 bytes long.
+; For wider ranges, we need a combination of a code and a data part.
+; If we can get a scratch register for a long range jump, the code
+; part can be 4 bytes long; otherwise, it must be 8 bytes long.
+; If the jump is in the range -32764..32770, the data part can be 2 bytes
+; long; otherwise, it must be 6 bytes long.
+
+; All other instructions are two bytes long by default.
+
+;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)),
+;; but getattrtab doesn't understand this.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "cbranch")
+	 (cond [(eq_attr "short_cbranch_p" "yes")
+		(const_int 2)
+		(eq_attr "med_cbranch_p" "yes")
+		(const_int 6)
+		(eq_attr "braf_cbranch_p" "yes")
+		(const_int 12)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 14)
+		(ne (symbol_ref ("flag_pic")) (const_int 0))
+		(const_int 24)
+		] (const_int 16))
+	 (eq_attr "type" "jump")
+	 (cond [(eq_attr "med_branch_p" "yes")
+		(const_int 2)
+		(and (ne (symbol_ref "prev_nonnote_insn (insn)")
+			 (const_int 0))
+		     (and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "INSN"))
+			  (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "code_for_indirect_jump_scratch"))))
+                (cond [(eq_attr "braf_branch_p" "yes")
+                       (const_int 6)
+                       (eq (symbol_ref "flag_pic") (const_int 0))
+                       (const_int 10)
+                       (ne (symbol_ref "TARGET_SH2") (const_int 0))
+                       (const_int 10)] (const_int 18))
+		(eq_attr "braf_branch_p" "yes")
+		(const_int 10)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 12)
+		(ne (symbol_ref ("flag_pic")) (const_int 0))
+		(const_int 22)
+		] (const_int 14))
+	 (eq_attr "type" "pt_media")
+	 (if_then_else (ne (symbol_ref "TARGET_SHMEDIA64") (const_int 0))
+		       (const_int 20) (const_int 12))
+	 (and (eq_attr "type" "jump_media")
+	      (ne (symbol_ref "TARGET_SH5_CUT2_WORKAROUND") (const_int 0)))
+	 (const_int 8)
+	 ] (if_then_else (ne (symbol_ref "TARGET_SHMEDIA") (const_int 0))
+			 (const_int 4)
+			 (const_int 2))))
+
+;; DFA descriptions for the pipelines
+
+(include "sh1.md")
+(include "shmedia.md")
+(include "sh4.md")
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; Definitions for filling delay slots
+
+(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
+
+(define_attr "banked" "yes,no" 
+	(cond [(eq (symbol_ref "sh_loads_bankedreg_p (insn)")
+		   (const_int 1))
+	       (const_string "yes")]
+	      (const_string "no")))
+
+;; ??? This should be (nil) instead of (const_int 0)
+(define_attr "hit_stack" "yes,no"
+	(cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, SP_REG)")
+		   (const_int 0))
+	       (const_string "no")]
+	      (const_string "yes")))
+
+(define_attr "interrupt_function" "no,yes"
+  (const (symbol_ref "current_function_interrupt")))
+
+(define_attr "in_delay_slot" "yes,no"
+  (cond [(eq_attr "type" "cbranch") (const_string "no")
+	 (eq_attr "type" "pcload,pcload_si") (const_string "no")
+	 (eq_attr "needs_delay_slot" "yes") (const_string "no")
+	 (eq_attr "length" "2") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "cond_delay_slot" "yes,no"
+  (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "is_sfunc" ""
+  (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))
+
+(define_attr "is_mac_media" ""
+  (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0)))
+
+(define_attr "branch_zero" "yes,no"
+  (cond [(eq_attr "type" "!cbranch") (const_string "no")
+	 (ne (symbol_ref "(next_active_insn (insn)\
+			   == (prev_active_insn\
+			       (XEXP (SET_SRC (PATTERN (insn)), 1))))\
+			  && get_attr_length (next_active_insn (insn)) == 2")
+	     (const_int 0))
+	 (const_string "yes")]
+	(const_string "no")))
+
+;; SH4 Double-precision computation with double-precision result -
+;; the two halves are ready at different times.
+(define_attr "dfp_comp" "yes,no"
+  (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")]
+	(const_string "no")))
+
+;; Insns for which the latency of a preceding fp insn is decreased by one.
+(define_attr "late_fp_use" "yes,no" (const_string "no"))
+;; And feeding insns for which this relevant.
+(define_attr "any_fp_comp" "yes,no"
+  (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "any_int_load" "yes,no"
+  (cond [(eq_attr "type" "load,load_si,pcload,pcload_si")
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "highpart" "user, ignore, extend, depend, must_split"
+  (const_string "user"))
+
+(define_delay
+  (eq_attr "needs_delay_slot" "yes")
+  [(eq_attr "in_delay_slot" "yes") (nil) (nil)])
+
+;; On the SH and SH2, the rte instruction reads the return pc from the stack,
+;; and thus we can't put a pop instruction in its delay slot.
+;; ??? On the SH3, the rte instruction does not use the stack, so a pop
+;; instruction can go in the delay slot.
+
+;; Since a normal return (rts) implicitly uses the PR register,
+;; we can't allow PR register loads in an rts delay slot.
+
+(define_delay
+  (eq_attr "type" "return")
+  [(and (eq_attr "in_delay_slot" "yes")
+	(ior (and (eq_attr "interrupt_function" "no")
+		  (eq_attr "type" "!pload,prset"))
+	     (and (eq_attr "interrupt_function" "yes")
+		  (ior
+		   (eq (symbol_ref "TARGET_SH3") (const_int 0))
+		   (eq_attr "hit_stack" "no")
+		   (eq_attr "banked" "no"))))) (nil) (nil)])
+
+;; Since a call implicitly uses the PR register, we can't allow
+;; a PR register store in a jsr delay slot.
+
+(define_delay
+  (ior (eq_attr "type" "call") (eq_attr "type" "sfunc"))
+  [(and (eq_attr "in_delay_slot" "yes")
+	(eq_attr "type" "!pstore,prget")) (nil) (nil)])
+
+;; Say that we have annulled true branches, since this gives smaller and
+;; faster code when branches are predicted as not taken.
+
+;; ??? The non-annulled condition should really be "in_delay_slot",
+;; but insns that can be filled in non-annulled get priority over insns
+;; that can only be filled in anulled.
+
+(define_delay
+  (and (eq_attr "type" "cbranch")
+       (ne (symbol_ref "TARGET_SH2") (const_int 0)))
+  ;; SH2e has a hardware bug that pretty much prohibits the use of
+  ;; annuled delay slots.
+  [(eq_attr "cond_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes")
+					  (not (eq_attr "cpu" "sh2e"))) (nil)])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI T_REG)
+	(eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")
+		       (match_operand:SI 1 "logical_operand" "K08,r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "tst	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; ??? Perhaps should only accept reg/constant if the register is reg 0.
+;; That would still allow reload to create cmpi instructions, but would
+;; perhaps allow forcing the constant into a register when that is better.
+;; Probably should use r0 for mem/imm compares, but force constant into a
+;; register for pseudo/imm compares.
+
+(define_insn "cmpeqsi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r")
+	       (match_operand:SI 1 "arith_operand" "N,rI08,r")))]
+  "TARGET_SH1"
+  "@
+	tst	%0,%0
+	cmp/eq	%1,%0
+	cmp/eq	%1,%0"
+   [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgtsi_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+	       (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH1"
+  "@
+	cmp/gt	%1,%0
+	cmp/pl	%0"
+   [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgesi_t"
+  [(set (reg:SI T_REG)
+	(ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+	       (match_operand:SI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH1"
+  "@
+	cmp/ge	%1,%0
+	cmp/pz	%0"
+   [(set_attr "type" "mt_group")])
+
+;; -------------------------------------------------------------------------
+;; SImode compare and branch
+;; -------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "arith_operand" "")
+			 (match_operand:SI 2 "arith_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  ""
+  "if (TARGET_SHMEDIA)
+      emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
+					     operands[2], operands[3]));
+   else if (TARGET_CBRANCHDI4)
+     expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1);
+   else
+     sh_emit_compare_and_branch (operands, SImode);
+   DONE;")
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn_and_split "cmpgeusi_t"
+  [(set (reg:SI T_REG)
+	(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		(match_operand:SI 1 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SH1"
+  "cmp/hs	%1,%0"
+  "&& operands[1] == CONST0_RTX (SImode)"
+  [(pc)]
+  "
+{
+  emit_insn (gen_sett ());
+  DONE;
+}"
+   [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgtusi_t"
+  [(set (reg:SI T_REG)
+	(gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		(match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "cmp/hi	%1,%0"
+   [(set_attr "type" "mt_group")])
+
+
+;; -------------------------------------------------------------------------
+;; DImode compare and branch
+;; -------------------------------------------------------------------------
+
+
+;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism.
+;; Therefore, we aim to have a set of three branches that go straight to the
+;; destination, i.e. only one of them is taken at any one time.
+;; This mechanism should also be slightly better for the sh4-200.
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:DI 1 "arith_operand" "")
+			 (match_operand:DI 2 "arith_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_dup 4))
+   (clobber (reg:SI T_REG))]
+  "TARGET_CBRANCHDI4 || TARGET_SH2 || TARGET_SHMEDIA"
+  "
+{
+  enum rtx_code comparison;
+
+  if (TARGET_SHMEDIA)
+    {
+      emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
+					     operands[2], operands[3]));
+      DONE;
+    }
+
+  else if (!TARGET_CBRANCHDI4)
+    {
+      sh_emit_compare_and_branch (operands, DImode);
+      DONE;
+    }
+
+  else
+    {
+      if (expand_cbranchdi4 (operands, LAST_AND_UNUSED_RTX_CODE))
+	DONE;
+
+      comparison = prepare_cbranch_operands (operands, DImode,
+					     LAST_AND_UNUSED_RTX_CODE);
+      if (comparison != GET_CODE (operands[0]))
+        operands[0]
+          = gen_rtx_fmt_ee (comparison, VOIDmode, operands[1], operands[2]);
+       operands[4] = gen_rtx_SCRATCH (SImode);
+    }
+}")
+
+(define_insn_and_split "cbranchdi4_i"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:DI 1 "arith_operand" "r,r")
+			 (match_operand:DI 2 "arith_operand" "rN,I08")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_scratch:SI 4 "=X,&r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_CBRANCHDI4"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+  "
+{
+  if (!expand_cbranchdi4 (operands, GET_CODE (operands[0])))
+    FAIL;
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI T_REG)
+	(eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")
+		       (match_operand:DI 1 "arith_operand" "r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\",
+				 insn, operands);"
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_insn "cmpeqdi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+  "@
+	tst	%S0,%S0\;bf	%,Ldi%=\;tst	%R0,%R0\\n%,Ldi%=:
+	cmp/eq	%S1,%S0\;bf	%,Ldi%=\;cmp/eq	%R1,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_split
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DI 0 "arith_reg_operand" "")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "")))]
+;; If we applied this split when not optimizing, it would only be
+;; applied during the machine-dependent reorg, when no new basic blocks
+;; may be created.
+  "TARGET_SH1 && reload_completed && optimize"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 6))
+			   (pc)))
+   (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5)))
+   (match_dup 6)]
+  "
+{
+  operands[2]
+    = gen_rtx_REG (SImode,
+		   true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  operands[3]
+    = (operands[1] == const0_rtx
+       ? const0_rtx
+       : gen_rtx_REG (SImode,
+		      true_regnum (operands[1])
+		      + (TARGET_LITTLE_ENDIAN ? 1 : 0)));
+  operands[4] = gen_lowpart (SImode, operands[0]);
+  operands[5] = gen_lowpart (SImode, operands[1]);
+  operands[6] = gen_label_rtx ();
+}")
+
+(define_insn "cmpgtdi_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+  "@
+	cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:
+	tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgedi_t"
+  [(set (reg:SI T_REG)
+	(ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+  "@
+	cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:
+	cmp/pz\\t%S0"
+  [(set_attr "length" "8,2")
+   (set_attr "type" "arith3,mt_group")])
+
+;; -------------------------------------------------------------------------
+;; DImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeudi_t"
+  [(set (reg:SI T_REG)
+	(geu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+		(match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgtudi_t"
+  [(set (reg:SI T_REG)
+	(gtu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+		(match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:"
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpeqsi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "logical_operand" "%r")
+	       (match_operand:SI 2 "cmp_operand" "Nr")))]
+  "TARGET_SHMEDIA"
+  "cmpeq	%1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpeqdi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:DI 1 "register_operand" "%r")
+	       (match_operand:DI 2 "cmp_operand" "Nr")))]
+  "TARGET_SHMEDIA"
+  "cmpeq	%1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtsi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:SI 1 "cmp_operand" "Nr")
+	       (match_operand:SI 2 "cmp_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgt	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtdi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr")
+	       (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgt	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtusi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gtu:SI (match_operand:SI 1 "cmp_operand" "Nr")
+		(match_operand:SI 2 "cmp_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtudi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gtu:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr")
+		(match_operand:DI 2 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+; These two patterns are for combine.
+(define_insn "*cmpne0sisi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "arith_reg_operand" "r") (const_int 0)))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%1,r63,%0"
+  [(set_attr "type" "cmp_media")])
+
+;; -------------------------------------------------------------------------
+;; Conditional move instructions
+;; -------------------------------------------------------------------------
+
+;; The insn names may seem reversed, but note that cmveq performs the move
+;; if op1 == 0, and cmvne does it if op1 != 0.
+
+(define_insn "movdicc_false"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(if_then_else:DI (eq (match_operand:DI 1 "arith_reg_operand" "r")
+			     (const_int 0))
+	 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:DI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmveq	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "movdicc_true"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(if_then_else:DI (ne (match_operand:DI 1 "arith_reg_operand" "r")
+			     (const_int 0))
+	 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:DI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmvne	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(if_then_else:DI (match_operator 3 "equality_comparison_operator"
+			   [(match_operand:DI 1 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_operand:DI 2 "arith_reg_dest" "")
+	 (match_dup 0)))
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:DI (match_dup 3) (match_dup 0) (match_dup 2)))]
+  "
+{
+  operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])),
+				VOIDmode, operands[1], CONST0_RTX (DImode));
+}")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "arith_reg_or_0_operand" ""))
+   (set (match_operand:DI 2 "arith_reg_dest" "")
+	(if_then_else:DI (match_operator 4 "equality_comparison_operator"
+			   [(match_operand:DI 3 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 2)))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:DI (match_dup 4) (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (match_operand 1 "comparison_operator" "")
+			 (match_operand:DI 2 "register_operand" "")
+			 (match_operand:DI 3 "register_operand" "")))]
+  "TARGET_SHMEDIA"
+  "
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == DImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    ;
+  else
+    {
+      if (!can_create_pseudo_p ())
+	FAIL;
+
+      operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]),
+					      GET_CODE (operands[1]),
+					      XEXP (operands[1], 0),
+	                                      XEXP (operands[1], 1));
+      if (!operands[1])
+	FAIL;
+    }
+}")
+
+;; Add SImode variants for cmveq / cmvne to compensate for not promoting
+;; SImode to DImode.
+(define_insn "movsicc_false"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (eq (match_operand:SI 1 "arith_reg_operand" "r")
+			  (const_int 0))
+	 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:SI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmveq	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "movsicc_true"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (ne (match_operand:SI 1 "arith_reg_operand" "r")
+			  (const_int 0))
+	 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:SI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmvne	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (match_operator 3 "equality_comparison_operator"
+			   [(match_operand:SI 1 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_operand:SI 2 "arith_reg_dest" "")
+	 (match_dup 0)))
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 3) (match_dup 0) (match_dup 2)))]
+  "
+{
+  operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])),
+				VOIDmode, operands[1], CONST0_RTX (SImode));
+}")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "arith_reg_or_0_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(if_then_else:SI (match_operator 4 "equality_comparison_operator"
+			   [(match_operand:SI 3 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 2)))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])
+   && (!REG_P (operands[1]) || GENERAL_REGISTER_P (REGNO (operands[1])))"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 4) (match_dup 1) (match_dup 2)))]
+  "
+{
+  replace_rtx (operands[4], operands[0], operands[1]);
+}")
+
+(define_peephole2
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operand 1 "any_register_operand" ""))
+   (set (match_operand 2 "any_register_operand" "") (match_operand 3 "" ""))
+   (set (match_operand 4 "" "") (match_operand 5 "" ""))]
+  "(HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[2]))
+    <= HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[0])))
+   && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2])
+   && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[0])
+   && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[2])
+   && ! reg_overlap_mentioned_p (operands[0], operands[3])
+   && ! reg_overlap_mentioned_p (operands[2], operands[0])
+   && ! reg_overlap_mentioned_p (operands[0], operands[1])
+   && (REGNO_REG_CLASS (REGNO (operands[0]))
+       == REGNO_REG_CLASS (REGNO (operands[2])))
+   && (REGNO_REG_CLASS (REGNO (operands[1]))
+       == REGNO_REG_CLASS (REGNO (operands[0])))"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  rtx set1, set2, insn2;
+  rtx replacements[4];
+
+  /* We want to replace occurrences of operands[0] with operands[1] and
+     operands[2] with operands[0] in operands[4]/operands[5].
+     Doing just two replace_rtx calls naively would result in the second
+     replacement undoing all that the first did if operands[1] and operands[2]
+     are identical, so we must do this simultaneously.  */
+  replacements[0] = operands[0];
+  replacements[1] = operands[1];
+  replacements[2] = operands[2];
+  replacements[3] = operands[0];
+  if (!replace_n_hard_rtx (operands[5], replacements, 2, 0)
+      || !replace_n_hard_rtx (operands[4], replacements, 2, 0)
+      || !replace_n_hard_rtx (operands[2], replacements, 2, 0))
+    FAIL;
+
+  operands[5] = replace_n_hard_rtx (operands[5], replacements, 2, 1);
+  replace_n_hard_rtx (operands[4], replacements, 2, 1);
+  operands[2] = replace_n_hard_rtx (operands[2], replacements, 2, 1);
+  /* The operands array is aliased to recog_data.operand, which gets
+     clobbered by extract_insn, so finish with it now.  */
+  set1 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
+  set2 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
+  /* ??? The last insn might be a jump insn, but the generic peephole2 code
+     always uses emit_insn.  */
+  /* Check that we don't violate matching constraints or earlyclobbers.  */
+  extract_insn (emit_insn (set1));
+  if (! constrain_operands (1))
+    goto failure;
+  insn2 = emit (set2);
+  if (GET_CODE (insn2) == BARRIER)
+    goto failure;
+  extract_insn (insn2);
+  if (! constrain_operands (1))
+    {
+      rtx tmp;
+    failure:
+      tmp = replacements[0];
+      replacements[0] = replacements[1];
+      replacements[1] = tmp;
+      tmp = replacements[2];
+      replacements[2] = replacements[3];
+      replacements[3] = tmp;
+      replace_n_hard_rtx (SET_DEST (set1), replacements, 2, 1);
+      replace_n_hard_rtx (SET_DEST (set2), replacements, 2, 1);
+      replace_n_hard_rtx (SET_SRC (set2), replacements, 2, 1);
+      FAIL;
+    }
+  DONE;
+}")
+
+;; The register allocator is rather clumsy in handling multi-way conditional
+;; moves, so allow the combiner to make them, and we split them up after
+;; reload.  */
+(define_insn_and_split "*movsicc_umin"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=&r")
+	(umin:SI (if_then_else:SI
+		   (eq (match_operand:SI 1 "arith_reg_operand" "r")
+		       (const_int 0))
+		   (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+		   (match_operand:SI 3 "register_operand" "0"))
+		 (match_operand:SI 4 "arith_reg_or_0_operand" "r")))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "TARGET_SHMEDIA && !can_create_pseudo_p ()"
+  "#"
+  "TARGET_SHMEDIA && reload_completed"
+  [(pc)]
+  "
+{
+  emit_insn (gen_movsicc_false (operands[0], operands[1], operands[2],
+				operands[3]));
+  emit_insn (gen_cmpgtusi_media (operands[5], operands[4], operands[0]));
+  emit_insn (gen_movsicc_false (operands[0], operands[5], operands[4],
+				operands[0]));
+  DONE;
+}")
+
+(define_insn "*movsicc_t_false"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "general_movsrc_operand" "r,I08")
+		      (match_operand:SI 2 "arith_reg_operand" "0,0")))]
+  "TARGET_PRETEND_CMOVE
+   && (arith_reg_operand (operands[1], SImode)
+       || (immediate_operand (operands[1], SImode)
+	   && satisfies_constraint_I08 (operands[1])))"
+  "bt 0f\;mov %1,%0\\n0:"
+  [(set_attr "type" "mt_group,arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn "*movsicc_t_true"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "general_movsrc_operand" "r,I08")
+		      (match_operand:SI 2 "arith_reg_operand" "0,0")))]
+  "TARGET_PRETEND_CMOVE
+   && (arith_reg_operand (operands[1], SImode)
+       || (immediate_operand (operands[1], SImode)
+	   && satisfies_constraint_I08 (operands[1])))"
+  "bf 0f\;mov %1,%0\\n0:"
+  [(set_attr "type" "mt_group,arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "")
+			 (match_operand:SI 3 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA || TARGET_PRETEND_CMOVE"
+  "
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && (TARGET_SHMEDIA
+	  || (REG_P (XEXP (operands[1], 0))
+	      && REGNO (XEXP (operands[1], 0)) == T_REG))
+      && XEXP (operands[1], 1) == const0_rtx)
+    ;
+
+  else if (TARGET_PRETEND_CMOVE)
+    {
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx op0 = XEXP (operands[1], 0);
+      rtx op1 = XEXP (operands[1], 1);
+
+      if (! currently_expanding_to_rtl)
+	FAIL;
+      switch (code)
+	{
+	case LT: case LE: case LEU: case LTU:
+	  if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_INT)
+	    break;
+	case NE:
+	  new_code = reverse_condition (code);
+	  break;
+	case EQ: case GT: case GE: case GEU: case GTU:
+	  break;
+	default:
+	  FAIL;
+	}
+      sh_emit_scc_to_t (new_code, op0, op1);
+      operands[1] = gen_rtx_fmt_ee (new_code == code ? NE : EQ, VOIDmode,
+				    gen_rtx_REG (SImode, T_REG), const0_rtx);
+    }
+  else
+    {
+      if (!can_create_pseudo_p ())
+	FAIL;
+
+      operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]),
+					      GET_CODE (operands[1]),
+					      XEXP (operands[1], 0),
+	                                      XEXP (operands[1], 1));
+      if (!operands[1])
+	FAIL;
+    }
+}")
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(if_then_else:QI (match_operand 1 "comparison_operator" "")
+			 (match_operand:QI 2 "register_operand" "")
+			 (match_operand:QI 3 "register_operand" "")))]
+  "TARGET_SHMEDIA"
+  "
+{
+  operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], QImode, 0);
+  emit (gen_movsicc (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+}")
+
+;; -------------------------------------------------------------------------
+;; Addition instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "")
+		 (match_operand:DI 2 "arith_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_SH1)
+    {
+      if (!can_create_pseudo_p () && ! arith_reg_operand (operands[2], DImode))
+        FAIL;
+      operands[2] = force_reg (DImode, operands[2]);
+      emit_insn (gen_adddi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*adddi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add	%1, %2, %0
+	addi	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*adddisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r,r") 0)
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "adddi3z_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI
+	 (plus:SI (match_operand:SI 1 "extend_reg_operand" "r")
+		  (match_operand:SI 2 "extend_reg_or_0_operand" "rN"))))]
+  "TARGET_SHMEDIA"
+  "addz.l	%1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "adddi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=&r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%0")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "")
+		 (match_operand:DI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+  high0 = gen_rtx_REG (SImode,
+		       true_regnum (operands[0])
+		       + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  high2 = gen_rtx_REG (SImode,
+		       true_regnum (operands[2])
+		       + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  emit_insn (gen_clrt ());
+  emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_addc1 (high0, high0, high2));
+  DONE;
+}")
+
+(define_insn "addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			  (match_operand:SI 2 "arith_reg_operand" "r"))
+		 (reg:SI T_REG)))
+   (set (reg:SI T_REG)
+	(ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "addc1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			  (match_operand:SI 2 "arith_reg_operand" "r"))
+		 (reg:SI T_REG)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "arith_operand" "")
+		 (match_operand:SI 2 "arith_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    operands[1] = force_reg (SImode, operands[1]);
+}")
+
+(define_insn "addsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(plus:SI (match_operand:SI 1 "extend_reg_operand" "%r,r")
+		 (match_operand:SI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "addsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (plus:SI (match_operand:SI 1 "extend_reg_operand"
+				  "%r,r")
+				 (match_operand:SI 2 "arith_operand"
+				  "r,I10"))))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*addsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_operand:SI 1 "arith_operand" "%0")
+		 (match_operand:SI 2 "arith_operand" "rI08")))]
+  "TARGET_SH1"
+  "add	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Subtraction instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "")
+		  (match_operand:DI 2 "arith_reg_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_SH1)
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_subdi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}")
+
+(define_insn "*subdi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub	%N1, %2, %0"
+  [(set_attr "type" "arith_media")])
+  
+(define_insn "subdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub.l	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "subdi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=&r")
+	(minus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "length" "6")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(minus:DI (match_operand:DI 1 "arith_reg_operand" "")
+		  (match_operand:DI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+  high0 = gen_rtx_REG (SImode,
+		       true_regnum (operands[0])
+		       + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  high2 = gen_rtx_REG (SImode,
+		       true_regnum (operands[2])
+		       + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  emit_insn (gen_clrt ());
+  emit_insn (gen_subc (low0, low0, gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_subc1 (high0, high0, high2));
+  DONE;
+}")
+
+(define_insn "subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_operand" "r"))
+		  (reg:SI T_REG)))
+   (set (reg:SI T_REG)
+	(gtu:SI (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+			  (reg:SI T_REG))
+		(match_dup 1)))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "subc1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_operand" "r"))
+		  (reg:SI T_REG)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; life_analysis thinks rn is live before subc rn,rn, so make a special
+;; pattern for this case.  This helps multimedia applications that compute
+;; the sum of absolute differences.
+(define_insn "mov_neg_si_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r") (neg:SI (reg:SI T_REG)))]
+  "TARGET_SH1"
+  "subc	%0,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*subsi3_internal"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		  (match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "sub	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*subsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (match_operand:SI 1 "minuend_operand" "rN")
+		  (match_operand:SI 2 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA
+   && (operands[1] != constm1_rtx
+       || (GET_CODE (operands[2]) != TRUNCATE
+	   && GET_CODE (operands[2]) != SUBREG))"
+  "sub.l	%N1, %2, %0"
+  "operands[1] == constm1_rtx"
+  [(set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))]
+  ""
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1
+						       "general_extend_operand"
+						       "") 0)) 0)))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1
+						       "general_extend_operand"
+						       "") 0)) 3)))]
+  "TARGET_SHMEDIA && ! TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))]
+  "")
+;; Convert `constant - reg' to `neg rX; add rX, #const' since this
+;; will sometimes save one instruction.  Otherwise we might get
+;; `mov #const, rY; sub rY,rX; mov rX, rY' if the source and dest regs
+;; are the same.
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(minus:SI (match_operand:SI 1 "arith_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_SH1 && CONST_INT_P (operands[1]))
+    {
+      emit_insn (gen_negsi2 (operands[0], operands[2]));
+      emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SHMEDIA)
+    {
+      if (!can_create_pseudo_p ()
+	  && ! arith_reg_or_0_operand (operands[1], SImode))
+	FAIL;
+      if (operands[1] != const0_rtx && GET_CODE (operands[1]) != SUBREG)
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+}")
+
+;; -------------------------------------------------------------------------
+;; Division instructions
+;; -------------------------------------------------------------------------
+
+;; We take advantage of the library routines which don't clobber as many
+;; registers as a normal function call would.
+
+;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+;; also has an effect on the register that holds the address of the sfunc.
+;; To make this work, we have an extra dummy insn that shows the use
+;; of this register for reorg.
+
+(define_insn "use_sfunc_addr"
+  [(set (reg:SI PR_REG)
+	(unspec:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_SFUNC))]
+  "TARGET_SH1 && check_use_sfunc_addr (insn, operands[0])"
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "udivsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(udiv:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		(match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "divu	%2,%1"
+  [(set_attr "type" "arith")
+   (set_attr "in_delay_slot" "no")])
+
+;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
+;; hard register 0.  If we used hard register 0, then the next instruction
+;; would be a move from hard register 0 to a pseudo-reg.  If the pseudo-reg
+;; gets allocated to a stack slot that needs its address reloaded, then
+;; there is nothing to prevent reload from using r0 to reload the address.
+;; This reload would clobber the value in r0 we are trying to store.
+;; If we let reload allocate r0, then this problem can never happen.
+
+(define_insn "udivsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R4_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && ! TARGET_SH4"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+; Since shmedia-nofpu code could be linked against shcompact code, and
+; the udivsi3 libcall has the same name, we must consider all registers
+; clobbered that are in the union of the registers clobbered by the
+; shmedia and the shcompact implementation.  Note, if the shcompact
+; implementation actually used shcompact code, we'd need to clobber
+; also r23 and fr23.
+(define_insn "udivsi3_i1_media"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R20_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI R22_REG))
+   (clobber (reg:DI TR0_REG))
+   (clobber (reg:DI TR1_REG))
+   (clobber (reg:DI TR2_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "udivsi3_i4_media"
+  [(set (match_dup 3)
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:DI (match_operand:SI 2 "register_operand" "")))
+   (set (match_dup 5) (float:DF (match_dup 3)))
+   (set (match_dup 6) (float:DF (match_dup 4)))
+   (set (match_dup 7) (div:DF (match_dup 5) (match_dup 6)))
+   (set (match_dup 8) (fix:DI (match_dup 7)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI (match_dup 8)))]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DFmode);
+  operands[6] = gen_reg_rtx (DFmode);
+  operands[7] = gen_reg_rtx (DFmode);
+  operands[8] = gen_reg_rtx (DImode);
+}")
+
+(define_insn "udivsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:DF DR4_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (use (reg:PSI FPSCR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH4 && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "fp_mode" "double")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:DF DR4_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "(TARGET_HARD_SH4 || TARGET_SHCOMPACT) && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_int"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+
+(define_expand "udivsi3"
+  [(set (match_dup 3) (symbol_ref:SI "__udivsi3"))
+   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (udiv:SI (reg:SI R4_REG)
+			    (reg:SI R5_REG)))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R4_REG))
+	      (use (match_dup 3))])]
+  ""
+  "
+{
+  rtx last;
+
+  operands[3] = gen_reg_rtx (Pmode);
+  /* Emit the move of the address to a pseudo outside of the libcall.  */
+  if (TARGET_DIVIDE_CALL_TABLE)
+    {
+      /* libgcc2:__udivmoddi4 is not supposed to use an actual division, since
+	 that causes problems when the divide code is supposed to come from a
+	 separate library.  Division by zero is undefined, so dividing 1 can be
+	 implemented by comparing with the divisor.  */
+      if (operands[1] == const1_rtx && currently_expanding_to_rtl)
+	{
+	  rtx test = gen_rtx_GEU (VOIDmode, operands[1], operands[2]);
+	  emit_insn (gen_cstoresi4 (operands[0], test,
+				    operands[1], operands[2]));
+	  DONE;
+	}
+      else if (operands[2] == const0_rtx)
+	{
+	  emit_move_insn (operands[0], operands[2]);
+	  DONE;
+	}
+      function_symbol (operands[3], \"__udivsi3_i4i\", SFUNC_GOT);
+      last = gen_udivsi3_i4_int (operands[0], operands[3]);
+    }
+  else if (TARGET_DIVIDE_CALL_FP)
+    {
+      function_symbol (operands[3], \"__udivsi3_i4\", SFUNC_STATIC);
+      if (TARGET_FPU_SINGLE)
+	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_udivsi3_i4 (operands[0], operands[3]);
+    }
+  else if (TARGET_SHMEDIA_FPU)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_udivsi3_i4_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH2A)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH5)
+    {
+      function_symbol (operands[3],
+		       TARGET_FPU_ANY ? \"__udivsi3_i4\" : \"__udivsi3\",
+		       SFUNC_STATIC);
+
+      if (TARGET_SHMEDIA)
+	last = gen_udivsi3_i1_media (operands[0], operands[3]);
+      else if (TARGET_FPU_ANY)
+	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_udivsi3_i1 (operands[0], operands[3]);
+    }
+  else
+    {
+      function_symbol (operands[3], \"__udivsi3\", SFUNC_STATIC);
+      last = gen_udivsi3_i1 (operands[0], operands[3]);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_insn (last);
+  DONE;
+}")
+
+(define_insn "divsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(div:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		(match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "divs	%2,%1"
+  [(set_attr "type" "arith")
+   (set_attr "in_delay_slot" "no")])
+
+(define_insn "divsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R3_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && ! TARGET_SH4"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i1_media"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R20_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")])
+
+(define_insn "divsi3_media_2"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (use (reg:SI R20_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")])
+
+;; This pattern acts as a placeholder for -mdiv=inv:call to carry
+;; hard reg clobbers and data dependencies that we need when we want
+;; to rematerialize the division into a call.
+(define_insn_and_split "divsi_inv_call"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (clobber (reg:SI R20_REG))
+   (use (match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& (high_life_started || reload_completed)"
+  [(set (match_dup 0) (match_dup 3))]
+  ""
+  [(set_attr "highpart" "must_split")])
+
+;; This is the combiner pattern for -mdiv=inv:call .
+(define_insn_and_split "*divsi_inv_call_combine"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (clobber (reg:SI R20_REG))
+   (use (unspec:SI [(match_dup 1)
+		    (match_operand:SI 3 "" "")
+		    (unspec:SI [(match_operand:SI 4 "" "")
+				(match_dup 3)
+				(match_operand:DI 5 "" "")]
+		     UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "" "")
+		    (const_int 0)
+		    (const_int 0)]
+	 UNSPEC_DIV_INV_M3))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& (high_life_started || reload_completed)"
+  [(pc)]
+  "
+{
+  const char *name = sh_divsi3_libfunc;
+  enum sh_function_kind kind = SFUNC_GOT;
+  rtx sym;
+
+  emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, R5_REG), operands[2]);
+  while (TARGET_DIVIDE_INV_CALL2)
+    {
+      rtx x = operands[3];
+
+      if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_DIV_INV_M1)
+	break;
+      x = XVECEXP (x, 0, 0);
+      name = \"__sdivsi3_2\";
+      kind = SFUNC_STATIC;
+      emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
+      break;
+    }
+  sym = function_symbol (NULL, name, kind);
+  emit_insn (gen_divsi3_media_2 (operands[0], sym));
+  DONE;
+}"
+  [(set_attr "highpart" "must_split")])
+
+(define_expand "divsi3_i4_media"
+  [(set (match_dup 3) (float:DF (match_operand:SI 1 "register_operand" "r")))
+   (set (match_dup 4) (float:DF (match_operand:SI 2 "register_operand" "r")))
+   (set (match_dup 5) (div:DF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (match_dup 5)))]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (DFmode);
+  operands[5] = gen_reg_rtx (DFmode);
+}")
+
+(define_insn "divsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (use (reg:PSI FPSCR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH4 && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "fp_mode" "double")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:SI R2_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "(TARGET_HARD_SH4 || TARGET_SHCOMPACT) && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_int"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "divsi3"
+  [(set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
+   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (reg:SI R4_REG)
+			   (reg:SI R5_REG)))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))
+	      (clobber (reg:SI R3_REG))
+	      (use (match_dup 3))])]
+  ""
+  "
+{
+  rtx last;
+
+  operands[3] = gen_reg_rtx (Pmode);
+  /* Emit the move of the address to a pseudo outside of the libcall.  */
+  if (TARGET_DIVIDE_CALL_TABLE)
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      last = gen_divsi3_i4_int (operands[0], operands[3]);
+    }
+  else if (TARGET_DIVIDE_CALL_FP)
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      if (TARGET_FPU_SINGLE)
+	last = gen_divsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_divsi3_i4 (operands[0], operands[3]);
+    }
+  else if (TARGET_SH2A)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_divsi3_sh2a (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_DIVIDE_INV)
+    {
+      rtx dividend = operands[1];
+      rtx divisor = operands[2];
+      rtx tab_base;
+      rtx nsb_res = gen_reg_rtx (DImode);
+      rtx norm64 = gen_reg_rtx (DImode);
+      rtx tab_ix = gen_reg_rtx (DImode);
+      rtx norm32 = gen_reg_rtx (SImode);
+      rtx i92 = force_reg (DImode, GEN_INT (92));
+      rtx scratch0a = gen_reg_rtx (DImode);
+      rtx scratch0b = gen_reg_rtx (DImode);
+      rtx inv0 = gen_reg_rtx (SImode);
+      rtx scratch1a = gen_reg_rtx (DImode);
+      rtx scratch1b = gen_reg_rtx (DImode);
+      rtx shift = gen_reg_rtx (DImode);
+      rtx i2p27, i43;
+      rtx inv1 = gen_reg_rtx (SImode);
+      rtx scratch2a = gen_reg_rtx (DImode);
+      rtx scratch2b = gen_reg_rtx (SImode);
+      rtx inv2 = gen_reg_rtx (SImode);
+      rtx scratch3a = gen_reg_rtx (DImode);
+      rtx scratch3b = gen_reg_rtx (DImode);
+      rtx scratch3c = gen_reg_rtx (DImode);
+      rtx scratch3d = gen_reg_rtx (SImode);
+      rtx scratch3e = gen_reg_rtx (DImode);
+      rtx result = gen_reg_rtx (SImode);
+
+      if (! arith_reg_or_0_operand (dividend, SImode))
+	dividend = force_reg (SImode, dividend);
+      if (! arith_reg_operand (divisor, SImode))
+	divisor = force_reg (SImode, divisor);
+      if (flag_pic && Pmode != DImode)
+	{
+	  tab_base = gen_rtx_SYMBOL_REF (Pmode, \"__div_table\");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  tab_base = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, tab_base));
+	}
+      else
+	{
+	  tab_base = gen_rtx_SYMBOL_REF (DImode, \"__div_table\");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  tab_base = force_reg (DImode, tab_base);
+	}
+      if (TARGET_DIVIDE_INV20U)
+	i2p27 = force_reg (DImode, GEN_INT (-2 << 27));
+      else
+	i2p27 = GEN_INT (0);
+      if (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)
+	i43 = force_reg (DImode, GEN_INT (43));
+      else
+	i43 = GEN_INT (0);
+      emit_insn (gen_nsbdi (nsb_res,
+			    simplify_gen_subreg (DImode, divisor, SImode, 0)));
+      emit_insn (gen_ashldi3_media (norm64,
+				    gen_rtx_SUBREG (DImode, divisor, 0),
+				    nsb_res));
+      emit_insn (gen_ashrdi3_media (tab_ix, norm64, GEN_INT (58)));
+      emit_insn (gen_ashrdisi3_media_high (norm32, norm64, GEN_INT (32)));
+      emit_insn (gen_divsi_inv_m1 (inv1, tab_base, tab_ix, norm32,
+				   inv0, scratch0a, scratch0b,
+				   scratch1a, scratch1b));
+      emit_insn (gen_subdi3 (shift, i92, nsb_res));
+      emit_insn (gen_divsi_inv_m2 (inv2, norm32, inv1, i92,
+				   scratch2a));
+      emit_insn (gen_divsi_inv_m3 (result, dividend, inv1, inv2, shift,
+				   i2p27, i43,
+				   scratch3a, scratch3b, scratch3c,
+				   scratch2a, scratch2b, scratch3d, scratch3e));
+      if (TARGET_DIVIDE_INV_CALL || TARGET_DIVIDE_INV_CALL2)
+	emit_insn (gen_divsi_inv_call (operands[0], dividend, divisor, result));
+      else if (TARGET_DIVIDE_INV_FP)
+	emit_insn (gen_divsi_inv_fp (operands[0], dividend, divisor, result,
+				     gen_reg_rtx (SImode), gen_reg_rtx (SImode),
+				     gen_reg_rtx (DFmode), gen_reg_rtx (DFmode),
+				     gen_reg_rtx (DFmode)));
+      else
+	emit_move_insn (operands[0], result);
+      DONE;
+    }
+  else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH5)
+    {
+      if (TARGET_DIVIDE_CALL2)
+	{
+	  rtx tab_base = gen_rtx_SYMBOL_REF (Pmode, \"__div_table\");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  emit_move_insn (gen_rtx_REG (Pmode, R20_REG), tab_base);
+	}
+      if (TARGET_FPU_ANY && TARGET_SH1)
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      else if (TARGET_DIVIDE_CALL2)
+	function_symbol (operands[3], \"__sdivsi3_2\", SFUNC_STATIC);
+      else
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+
+      if (TARGET_SHMEDIA)
+	last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
+		(operands[0], operands[3]));
+      else if (TARGET_FPU_ANY)
+	last = gen_divsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_divsi3_i1 (operands[0], operands[3]);
+    }
+  else
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      last = gen_divsi3_i1 (operands[0], operands[3]);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_insn (last);
+  DONE;
+}")
+
+;; operands: scratch, tab_base, tab_ix
+;; These are unspecs because we could generate an indexed addressing mode
+;; even if -m5-32media, where INDEX_REG_CLASS == NO_REGS, and this would
+;; confuse reload.  See PR27117.
+
+(define_insn "divsi_inv_qitable"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (unspec:QI [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "register_operand" "r")]
+			 UNSPEC_DIV_INV_TABLE)))]
+  "TARGET_SHMEDIA"
+  "@
+	ldx.ub	%1, %2, %0"
+  [(set_attr "type" "load_media")
+   (set_attr "highpart" "user")])
+
+;; operands: scratch, tab_base, tab_ix
+(define_insn "divsi_inv_hitable"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (unspec:HI [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "register_operand" "r")]
+			 UNSPEC_DIV_INV_TABLE)))]
+  "TARGET_SHMEDIA"
+  "@
+	ldx.w	%1, %2, %0"
+  [(set_attr "type" "load_media")
+   (set_attr "highpart" "user")])
+
+;; operands: inv0, tab_base, tab_ix, norm32
+;; scratch equiv in sdivsi3_2: r19, r21
+(define_expand "divsi_inv_m0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M0))
+   (clobber (match_operand:DI 4 "register_operand" "=r"))
+   (clobber (match_operand:DI 5 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "
+{
+/*
+tab_base: r20
+tab_ix: r21
+norm32: r25
+ ldx.ub r20, r21, r19 // u0.8
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+*/
+
+  rtx inv0 = operands[0];
+  rtx tab_base = operands[1];
+  rtx tab_ix = operands[2];
+  rtx norm32 = operands[3];
+  rtx scratch0 = operands[4];
+  rtx scratch0_si = simplify_gen_subreg (SImode, scratch0, DImode, SIDI_OFF);
+  rtx scratch1 = operands[5];
+
+  emit_insn (gen_divsi_inv_qitable (scratch0, tab_base, tab_ix));
+  emit_insn (gen_ashldi3_media (scratch1, tab_ix, GEN_INT (1)));
+  emit_insn (gen_mulsidi3_media (scratch0, norm32, scratch0_si));
+  emit_insn (gen_divsi_inv_hitable (scratch1, tab_base, scratch1));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (24)));
+  emit_insn (gen_subdisi3_media (inv0, scratch1, scratch0));
+  DONE;
+}")
+
+;; operands: inv1, tab_base, tab_ix, norm32
+(define_insn_and_split "divsi_inv_m1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M1))
+   (clobber (match_operand:SI 4 "register_operand" "=r"))
+   (clobber (match_operand:DI 5 "register_operand" "=r"))
+   (clobber (match_operand:DI 6 "register_operand" "=r"))
+   (clobber (match_operand:DI 7 "register_operand" "=r"))
+   (clobber (match_operand:DI 8 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+  "
+{
+/* inv0: r19
+ muls.l r19, r19, r18 // u0.28
+ muls.l r25, r18, r18 // s2.58
+ shlli r19, 45, r0    // multiply by two and convert to s2.58
+ sub r0, r18, r18
+ shari r18, 28, r18   // some 18 bit inverse in s1.30
+*/
+
+  rtx inv1 = operands[0];
+  rtx tab_base = operands[1];
+  rtx tab_ix = operands[2];
+  rtx norm32 = operands[3];
+  rtx inv0 = operands[4];
+  rtx inv0_di = simplify_gen_subreg (DImode, inv0, SImode, 0);
+  rtx scratch0a = operands[5];
+  rtx scratch0b = operands[6];
+  rtx scratch0 = operands[7];
+  rtx scratch1 = operands[8];
+  rtx scratch1_si = simplify_gen_subreg (SImode, scratch1, DImode, SIDI_OFF);
+
+  emit_insn (gen_divsi_inv_m0 (inv0, tab_base, tab_ix, norm32,
+			       scratch0a, scratch0b));
+  emit_insn (gen_mulsidi3_media (scratch1, inv0, inv0));
+  emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si));
+  emit_insn (gen_ashldi3_media (scratch0, inv0_di, GEN_INT (45)));
+  emit_insn (gen_subdi3 (scratch1, scratch0, scratch1));
+  emit_insn (gen_ashrdisi3_media_opaque (inv1, scratch1, GEN_INT (28)));
+  DONE;
+}")
+
+;; operands: inv2, norm32, inv1, i92
+(define_insn_and_split "divsi_inv_m2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:DI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M2))
+   (clobber (match_operand:DI 4 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+  "
+{
+/*
+ muls.l r18, r25, r0  // s2.60
+ shari r0, 16, r0     // s-16.44
+  sub
+ muls.l r0, r18, r19  // s-16.74
+ shari r19, 30, r19   // s-16.44
+*/
+  rtx inv2 = operands[0];
+  rtx norm32 = operands[1];
+  rtx inv1 = operands[2];
+  rtx i92 = operands[3];
+  rtx scratch0 = operands[4];
+  rtx scratch0_si = simplify_gen_subreg (SImode, scratch0, DImode, SIDI_OFF);
+
+  emit_insn (gen_mulsidi3_media (scratch0, inv1, norm32));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (16)));
+  emit_insn (gen_subdi3 (scratch0, i92, scratch0));
+  emit_insn (gen_mulsidi3_media (scratch0, scratch0_si, inv1));
+  emit_insn (gen_ashrdisi3_media_opaque (inv2, scratch0, GEN_INT (30)));
+  DONE;
+}")
+
+(define_insn_and_split "divsi_inv_m3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")
+		    (match_operand:DI 4 "register_operand" "r")
+		    (match_operand:DI 5 "arith_reg_or_0_operand" "rN")
+		    (match_operand:DI 6 "arith_reg_or_0_operand" "rN")]
+	 UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:DI 7 "register_operand" "=r"))
+   (clobber (match_operand:DI 8 "register_operand" "=r"))
+   (clobber (match_operand:DI 9 "register_operand" "=r"))
+   (clobber (match_operand:DI 10 "register_operand" "=r"))
+   (clobber (match_operand:SI 11 "register_operand" "=r"))
+   (clobber (match_operand:SI 12 "register_operand" "=r"))
+   (clobber (match_operand:DI 13 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+  "
+{
+/*
+  r0: result  r1: shift  r4: dividend  r18: inv1  r19: inv2
+  r0: scratch0  r19: scratch1 r21: scratch2
+
+  muls.l r18, r4, r25 // s32.30
+ muls.l r19, r4, r19  // s15.30
+ shari r25, 63, r21
+  shari r19, 14, r19  // s18.-14
+ sub r25, r19, r0
+ shard r0, r1, r0
+ sub r0, r21, r0
+*/
+
+  rtx result = operands[0];
+  rtx dividend = operands[1];
+  rtx inv1 = operands[2];
+  rtx inv2 = operands[3];
+  rtx shift = operands[4];
+  rtx scratch0 = operands[7];
+  rtx scratch1 = operands[8];
+  rtx scratch2 = operands[9];
+
+  if (satisfies_constraint_N (dividend))
+    {
+      emit_move_insn (result, dividend);
+      DONE;
+    }
+
+  emit_insn (gen_mulsidi3_media (scratch0, inv1, dividend));
+  emit_insn (gen_mulsidi3_media (scratch1, inv2, dividend));
+  emit_insn (gen_ashrdi3_media (scratch2, scratch0, GEN_INT (63)));
+  emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (14)));
+  emit_insn (gen_adddi3 (scratch0, scratch0, scratch1));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, shift));
+  emit_insn (gen_subdisi3_media (result, scratch0, scratch2));
+  DONE;
+}")
+
+;; operands: quotient, dividend, inv1, inv2, shift, i2p27, i43
+;; inv1: tab_base, tab_ix, norm32
+;; inv2: norm32, inv1, i92
+(define_insn_and_split "divsi_inv_m1_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+		    (unspec:SI [(match_operand:DI 2 "register_operand" "r")
+				(match_operand:DI 3 "register_operand" "r")
+				(match_operand:SI 4 "register_operand" "r")]
+		     UNSPEC_DIV_INV_M1)
+		    (unspec:SI [(match_dup 4)
+				(unspec:SI [(match_dup 2)
+					    (match_dup 3)
+					    (match_dup 4)] UNSPEC_DIV_INV_M1)
+				(match_operand:SI 5 "" "")]
+		     UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "register_operand" "r")
+		    (match_operand:DI 7 "arith_reg_or_0_operand" "rN")
+		    (match_operand:DI 8 "arith_reg_or_0_operand" "rN")]
+	 UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:DI 9 "register_operand" "=r"))
+   (clobber (match_operand:DI 10 "register_operand" "=r"))
+   (clobber (match_operand:DI 11 "register_operand" "=r"))
+   (clobber (match_operand:DI 12 "register_operand" "=r"))
+   (clobber (match_operand:SI 13 "register_operand" "=r"))
+   (clobber (match_operand:SI 14 "register_operand" "=r"))
+   (clobber (match_operand:DI 15 "register_operand" "=r"))]
+  "TARGET_SHMEDIA
+   && (TARGET_DIVIDE_INV_MINLAT
+       || TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+  "
+{
+  rtx result = operands[0];
+  rtx dividend = operands[1];
+  rtx tab_base = operands[2];
+  rtx tab_ix = operands[3];
+  rtx norm32 = operands[4];
+  /* rtx i92 = operands[5]; */
+  rtx shift = operands[6];
+  rtx i2p27 = operands[7];
+  rtx i43 = operands[8];
+  rtx scratch0 = operands[9];
+  rtx scratch0_si = simplify_gen_subreg (SImode, scratch0, DImode, SIDI_OFF);
+  rtx scratch1 = operands[10];
+  rtx scratch1_si = simplify_gen_subreg (SImode, scratch1, DImode, SIDI_OFF);
+  rtx scratch2 = operands[11];
+  rtx scratch3 = operands[12];
+  rtx scratch4 = operands[13];
+  rtx scratch4_di = simplify_gen_subreg (DImode, scratch4, SImode, 0);
+  rtx scratch5 = operands[14];
+  rtx scratch5_di = simplify_gen_subreg (DImode, scratch5, SImode, 0);
+  rtx scratch6 = operands[15];
+
+  emit_insn (gen_divsi_inv_m0 (scratch4, tab_base, tab_ix, norm32,
+			       scratch0, scratch1));
+  /* inv0 == scratch4 */
+  if (! TARGET_DIVIDE_INV20U)
+    {
+      emit_insn (gen_mulsidi3_media (scratch0, scratch4, scratch4));
+      i2p27 = scratch0;
+      emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch0_si));
+    }
+  else
+    {
+      emit_insn (gen_mulsidi3_media (scratch1, scratch4, scratch4));
+      emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si));
+    }
+  emit_insn (gen_ashldi3_media (scratch2, scratch4_di, GEN_INT (45)));
+  emit_insn (gen_subdi3 (scratch1, scratch2, scratch1));
+  emit_insn (gen_ashrdisi3_media_opaque (scratch4, scratch1, GEN_INT (28)));
+  /* inv1 == scratch4 */
+
+  if (TARGET_DIVIDE_INV_MINLAT)
+    {
+      emit_insn (gen_mulsidi3_media (scratch1, scratch4, norm32));
+      emit_insn (gen_mulsidi3_media (scratch2, dividend, scratch4));
+      emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (16)));
+      emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch4));
+      emit_insn (gen_ashrdi3_media (scratch3, scratch2, GEN_INT (63)));
+      emit_insn (gen_ashrsi3_media (scratch5, dividend, GEN_INT (14)));
+      emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (30)));
+      emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch5));
+      emit_insn (gen_xordi3 (scratch0, scratch3, i2p27));
+      emit_insn (gen_adddi3 (scratch2, scratch2, scratch0));
+      emit_insn (gen_subdi3 (scratch2, scratch2, scratch1));
+    }
+  else
+    {
+      rtx label = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
+      /* Use separate scratch regs for nsb and sign to allow scheduling.  */
+      emit_insn (gen_nsbdi (scratch6,
+			    simplify_gen_subreg (DImode, dividend, SImode, 0)));
+      emit_insn (gen_xorsi3 (scratch5, dividend, norm32));
+      emit_insn (gen_ashrdi3_media (scratch3, scratch5_di, GEN_INT (63)));
+      emit_insn (gen_divsi_inv20 (scratch2,
+				  norm32, scratch4, dividend,
+				  scratch6, scratch3, i43,
+				  /* scratch0 may be shared with i2p27.  */
+				  scratch0, scratch1, scratch5,
+				  label, label, i2p27));
+    }
+  emit_insn (gen_ashrdi3_media (scratch2, scratch2, shift));
+  emit_insn (gen_subdisi3_media (result, scratch2, scratch3));
+  DONE;
+}")
+
+(define_insn "divsi_inv20"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")
+		    (match_operand:DI 4 "register_operand" "r")
+		    (match_operand:DI 5 "register_operand" "r")
+		    (match_operand:DI 6 "register_operand" "r")
+		    (match_operand:DI 12 "register_operand" "r")
+		    (match_operand 10 "target_operand" "b")
+		    (match_operand 11 "immediate_operand" "i")]
+	 UNSPEC_DIV_INV20))
+   (clobber (match_operand:DI 7 "register_operand" "=&r"))
+   (clobber (match_operand:DI 8 "register_operand" "=&r"))
+   (clobber (match_operand:SI 9 "register_operand" "=r"))]
+  "TARGET_SHMEDIA
+   && (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)"
+  "*
+{
+/* operands: %0 div_result, %1 norm32, %2 inv1, %3 dividend,
+	     %4 dividend_nsb, %5 result_sign, %6 i43, %12 i2p27,
+	     %7 round_scratch, %8 scratch0 (di), %9 scratch1 (si)
+	     %10 label (tr), %11 label (imm)
+
+ muls.l inv1, norm32, scratch0  // s2.60
+  muls.l inv1, dividend, result // s32.30
+  xor i2p27, result_sign, round_scratch
+ bge/u dividend_nsb, i43, tr.. (label)
+ shari scratch0, 16, scratch0   // s-16.44
+ muls.l sratch0_si, inv1, scratch0 // s-16.74
+  sub result, round_scratch, result
+  shari dividend, 14, scratch1   // s19.-14
+ shari scratch0, 30, scratch0   // s-16.44
+ muls.l scratch0, scratch1, round_scratch // s15.30
+label:
+ sub result, round_scratch, result */
+
+  int likely = TARGET_DIVIDE_INV20L;
+
+  if (! likely) output_asm_insn (\"muls.l\t%2, %1 , %8\", operands);
+  output_asm_insn (\"muls.l\t%2, %3, %0\;xor\t%12, %5, %7\", operands);
+  output_asm_insn (likely
+		   ? \"bge/l\t%4, %6, %10\;muls.l\t%2, %1 , %8\"
+		   : \"bge/u\t%4, %6, %10\", operands);
+  output_asm_insn (\"shari\t%8, 16, %8\;muls.l\t%8, %2, %8\", operands);
+  if (! likely) output_asm_insn (\"sub\t%0, %7, %0\", operands);
+  output_asm_insn (\"shari\t%3, 14, %9\;shari\t%8, 30, %8\", operands);
+  return (likely
+	  ? \"muls.l\t%8, %9, %8\;sub\t%0, %8, %0\n%11:\tadd\t%0, %7, %0\"
+	  : \"muls.l\t%8, %9, %7\n%11:\tsub\t%0, %7, %0\");
+}")
+
+(define_insn_and_split "divsi_inv_fp"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=rf")
+	(div:SI (match_operand:SI 1 "general_movsrc_operand" "rf")
+		(match_operand:SI 2 "register_operand" "rf")))
+   (use (match_operand:SI 3 "general_movsrc_operand" "r"))
+   (clobber (match_operand:SI 4 "register_operand" "=r"))
+   (clobber (match_operand:SI 5 "register_operand" "=r"))
+   (clobber (match_operand:DF 6 "register_operand" "=r"))
+   (clobber (match_operand:DF 7 "register_operand" "=r"))
+   (clobber (match_operand:DF 8 "register_operand" "=r"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& (high_life_started || reload_completed)"
+  [(set (match_dup 0) (match_dup 3))]
+  ""
+  [(set_attr "highpart" "must_split")])
+
+;; If a matching group of divide-by-inverse instructions is in the same
+;; basic block after gcse & loop optimizations, we want to transform them
+;; to a straight division using floating point for TARGET_DIVIDE_INV_FP.
+(define_insn_and_split "*divsi_inv_fp_combine"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(div:SI (match_operand:SI 1 "register_operand" "f")
+		(match_operand:SI 2 "register_operand" "f")))
+   (use (unspec:SI [(match_dup 1)
+		    (match_operand:SI 3 "" "")
+		    (unspec:SI [(match_operand:SI 4 "" "")
+				(match_dup 3)
+				(match_operand:DI 5 "" "")] UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "" "")
+		    (const_int 0)
+		    (const_int 0)] UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:SI 7 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:SI 8 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 9 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 10 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))]
+  "TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV_FP && !can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 9) (float:DF (match_dup 1)))
+   (set (match_dup 10) (float:DF (match_dup 2)))
+   (set (match_dup 11) (div:DF (match_dup 9) (match_dup 10)))
+   (set (match_dup 8)
+	(fix:SI (match_dup 11)))
+   (set (match_dup 0) (match_dup 8))]
+  "
+{
+  if (! fp_arith_reg_operand (operands[1], SImode))
+    {
+      emit_move_insn (operands[7], operands[1]);
+      operands[1] = operands[7];
+    }
+  if (! fp_arith_reg_operand (operands[2], SImode))
+    {
+      emit_move_insn (operands[8], operands[2]);
+      operands[2] = operands[8];
+    }
+}"
+  [(set_attr "highpart" "must_split")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "umulhisi3_i"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "mulu.w	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_insn "mulhisi3_i"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "muls.w	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_expand "mulhisi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+  "
+{
+  rtx insn, macl;
+
+  macl = gen_rtx_REG (SImode, MACL_REG);
+  start_sequence ();
+  emit_insn (gen_mulhisi3_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in umul_widen_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also smulsi3_highpart.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_expr.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn)));
+
+  DONE;
+}")
+
+(define_expand "umulhisi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+  "
+{
+  rtx insn, macl;
+
+  macl = gen_rtx_REG (SImode, MACL_REG);
+  start_sequence ();
+  emit_insn (gen_umulhisi3_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in umul_widen_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also smulsi3_highpart.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_expr.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn)));
+
+  DONE;
+}")
+
+;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate
+;; a call to a routine which clobbers known registers.
+
+(define_insn ""
+  [(set (match_operand:SI 1 "register_operand" "=z")
+	(mult:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI MACL_REG))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R3_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "mulsi3_call"
+  [(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel[(set (match_operand:SI 0 "register_operand" "")
+		  (mult:SI (reg:SI R4_REG)
+			   (reg:SI R5_REG)))
+	     (clobber (reg:SI MACL_REG))
+	     (clobber (reg:SI T_REG))
+	     (clobber (reg:SI PR_REG))
+	     (clobber (reg:SI R3_REG))
+	     (clobber (reg:SI R2_REG))
+	     (clobber (reg:SI R1_REG))
+	     (use (match_operand:SI 3 "register_operand" ""))])]
+  "TARGET_SH1"
+  "")
+
+(define_insn "mul_r"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(mult:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		 (match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "mulr	%2,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_insn "mul_l"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		 (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "mul.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI  (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+  "
+{
+  if (!TARGET_SH2)
+    {
+      /* The address must be set outside the libcall,
+	 since it goes into a pseudo.  */
+      rtx sym = function_symbol (NULL, \"__mulsi3\", SFUNC_STATIC);
+      rtx addr = force_reg (SImode, sym);
+      rtx insns = gen_mulsi3_call (operands[0], operands[1],
+				   operands[2], addr);
+      emit_insn (insns);
+    }
+  else
+    {
+      rtx macl = gen_rtx_REG (SImode, MACL_REG);
+
+      emit_insn (gen_mul_l (operands[1], operands[2]));
+      /* consec_sets_giv can only recognize the first insn that sets a
+	 giv as the giv insn.  So we must tag this also with a REG_EQUAL
+	 note.  */
+      emit_insn (gen_movsi_i ((operands[0]), macl));
+    }
+  DONE;
+}")
+
+(define_insn "mulsidi3_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (set (reg:SI MACL_REG)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+		 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+  "
+{
+  if (TARGET_SH2)
+    {
+       emit_insn (gen_mulsidi3_compact (operands[0], operands[1],
+					operands[2]));
+       DONE;
+    }
+}")
+
+(define_insn "mulsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r"))
+		 (sign_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "muls.l	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "mulsidi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+	 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  [(const_int 0)]
+  "
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_mulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+}")
+
+(define_insn "umulsidi3_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (set (reg:SI MACL_REG)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+		 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+  "
+{
+  if (TARGET_SH2)
+    {
+       emit_insn (gen_umulsidi3_compact (operands[0], operands[1],
+					 operands[2]));
+       DONE;
+    }
+}")
+
+(define_insn "umulsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r"))
+		 (zero_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "mulu.l	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "umulsidi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+	 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "#")
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  [(const_int 0)]
+  "
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_umulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+}")
+
+(define_insn "smulsi3_highpart_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (reg:SI MACH_REG)
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	     (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+	    (const_int 32))))
+    (clobber (reg:SI MACL_REG))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACH_REG))]
+  "TARGET_SH2"
+  "
+{
+  rtx insn, mach;
+
+  mach = gen_rtx_REG (SImode, MACH_REG);
+  start_sequence ();
+  emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in mul_highpart_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also {,u}mulhisi.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_mult_highpart.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn)));
+
+  DONE;
+}")
+
+(define_insn "umulsi3_highpart_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (reg:SI MACH_REG)
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	     (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+	    (const_int 32))))
+    (clobber (reg:SI MACL_REG))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACH_REG))]
+  "TARGET_SH2"
+  "
+{
+  rtx insn, mach;
+
+  mach = gen_rtx_REG (SImode, MACH_REG);
+  start_sequence ();
+  emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn)));
+
+  DONE;
+}")
+
+(define_insn_and_split "muldi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (match_scratch:DI 4 "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx op3_v2si, op2_v2si;
+
+  op3_v2si = operands[3];
+  if (GET_CODE (op3_v2si) == SIGN_EXTEND)
+    {
+      op3_v2si = XEXP (op3_v2si, 0);
+      op3_v2si = simplify_gen_subreg (DImode, op3_v2si, GET_MODE (op3_v2si), 0);
+    }
+  op3_v2si = simplify_gen_subreg (V2SImode, op3_v2si, DImode, 0);
+  op2_v2si = operands[2];
+  if (GET_CODE (op2_v2si) == SIGN_EXTEND)
+    {
+      op2_v2si = XEXP (op2_v2si, 0);
+      op2_v2si = simplify_gen_subreg (DImode, op2_v2si, GET_MODE (op2_v2si), 0);
+    }
+  op2_v2si = simplify_gen_subreg (V2SImode, op2_v2si, DImode, 0);
+  emit_insn (gen_rotldi3 (operands[3], operands[1], GEN_INT (32)));
+  emit_insn (gen_mulv2si3 (op3_v2si, op3_v2si, op2_v2si));
+  emit_insn (gen_umulsidi3_media (operands[4],
+				 sh_gen_truncate (SImode, operands[1], 0),
+				 sh_gen_truncate (SImode, operands[2], 0)));
+  emit_insn (gen_anddi3 (operands[0], operands[3], GEN_INT (0xffffffff00000000LL)));
+  emit_insn (gen_ashldi3_media (operands[3], operands[3], GEN_INT (32)));
+  emit_insn (gen_adddi3 (operands[0], operands[3], operands[0]));
+  emit_insn (gen_adddi3 (operands[0], operands[4], operands[0]));
+  DONE;
+}")
+
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+(define_insn "*andsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,z")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "r,K08")))]
+  "TARGET_SH1"
+  "and	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*andsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(and:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	and	%1, %2, %0
+	andi	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*andsi3_bclr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+		(match_operand:SI 2 "const_int_operand" "Psz")))]
+  "TARGET_SH2A && satisfies_constraint_Psz (operands[2])"
+  "bclr\\t%W2,%0"
+  [(set_attr "type" "arith")])
+
+;; If the constant is 255, then emit an extu.b instruction instead of an
+;; and, since that will give better code.
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(and:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "logical_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_SH1
+      && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 255)
+    {
+      emit_insn (gen_zero_extendqisi2 (operands[0],
+				       gen_lowpart (QImode, operands[1])));
+      DONE;
+    }
+}")
+
+(define_insn_and_split "anddi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r,r")
+	(and:DI (match_operand:DI 1 "arith_reg_operand" "%r,r,r")
+		(match_operand:DI 2 "and_operand" "r,I10,J16")))]
+  "TARGET_SHMEDIA"
+  "@
+	and	%1, %2, %0
+	andi	%1, %2, %0
+	#"
+  "reload_completed
+   && ! logical_operand (operands[2], DImode)"
+  [(const_int 0)]
+  "
+{
+  if ((unsigned)INTVAL (operands[2]) == (unsigned) 0xffffffff)
+    emit_insn (gen_mshflo_l_di (operands[0], operands[1], CONST0_RTX (DImode)));
+  else
+    emit_insn (gen_mshfhi_l_di (operands[0], CONST0_RTX (DImode), operands[1]));
+  DONE;
+}"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "andcsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		(not:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "andc	%1,%2,%0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "andcdi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(and:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		(not:DI (match_operand:DI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "andc	%1,%2,%0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "logical_operand" "")))]
+  ""
+  "")
+
+(define_insn "*iorsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,z")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "r,K08")))]
+  "TARGET_SH1
+   && !(TARGET_SH2A && satisfies_constraint_Pso (operands[2]))"
+  "or	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*iorsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ior:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	or	%1, %2, %0
+	ori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*iorsi3_bset"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+	(match_operand:SI 2 "const_int_operand" "Pso")))]
+  "TARGET_SH2A && satisfies_constraint_Pso (operands[2])"
+  "bset\\t%V2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(ior:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		(match_operand:DI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	or	%1, %2, %0
+	ori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn_and_split "*logical_sidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")])))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+  "
+{
+  operands[3]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+		      simplify_gen_subreg (DImode, operands[1], SImode, 0),
+		      simplify_gen_subreg (DImode, operands[2], SImode, 0));
+}")
+
+(define_insn_and_split "*logical_sidisi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(truncate:SI (sign_extend:DI
+			(match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")]))))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 3))])
+
+(define_insn_and_split "*logical_sidi3_2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (truncate:SI (sign_extend:DI
+			(match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")])))))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (sign_extend:DI (match_dup 3)))])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(xor:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "xor_operand" "")))]
+  ""
+  "")
+
+(define_insn "*xorsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+	(xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "K08,r")))]
+  "TARGET_SH1"
+  "xor	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*xorsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "xor_operand" "r,I06")))]
+  "TARGET_SHMEDIA"
+  "@
+	xor	%1, %2, %0
+	xori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+;; Store the complements of the T bit in a register.
+(define_insn "xorsi3_movrt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (reg:SI T_REG)
+		(const_int 1)))]
+  "TARGET_SH2A"
+  "movrt\\t%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(xor:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		(match_operand:DI 2 "xor_operand" "r,I06")))]
+  "TARGET_SHMEDIA"
+  "@
+	xor	%1, %2, %0
+	xori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+;; Combiner bridge pattern for 2 * sign extend -> logical op -> truncate.
+;; converts 2 * sign extend -> logical op into logical op -> sign extend
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(sign_extend:DI (match_operator 4 "binary_logical_operator"
+			  [(match_operand 1 "any_register_operand" "")
+			   (match_operand 2 "any_register_operand" "")])))]
+  "TARGET_SHMEDIA"
+  [(set (match_dup 5) (match_dup 4))
+   (set (match_dup 0) (sign_extend:DI (match_dup 5)))]
+"
+{
+  enum machine_mode inmode = GET_MODE (operands[1]);
+  int offset = 0;
+
+  if (GET_CODE (operands[0]) == SUBREG)
+    {
+      offset = SUBREG_BYTE (operands[0]);
+      operands[0] = SUBREG_REG (operands[0]);
+    }
+  gcc_assert (REG_P (operands[0]));
+  if (! TARGET_LITTLE_ENDIAN)
+    offset += 8 - GET_MODE_SIZE (inmode);
+  operands[5] = gen_rtx_SUBREG (inmode, operands[0], offset);
+}")
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+(define_expand "rotldi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotate:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+  "if (! mextr_bit_offset (operands[2], HImode)) FAIL;")
+
+(define_insn "rotldi3_mextr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotate:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+  "*
+{
+  static char templ[16];
+
+  sprintf (templ, \"mextr%d\\t%%1,%%1,%%0\",
+	   8 - (int) (INTVAL (operands[2]) >> 3));
+  return templ;
+}"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+  "if (! mextr_bit_offset (operands[2], HImode)) FAIL;")
+
+(define_insn "rotrdi3_mextr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+  "*
+{
+  static char templ[16];
+
+  sprintf (templ, \"mextr%d\\t%%1,%%1,%%0\", (int) INTVAL (operands[2]) >> 3);
+  return templ;
+}"
+  [(set_attr "type" "arith_media")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ior:DI (zero_extend:DI (mem:QI (match_operand 1
+					 "ua_address_operand" "")))
+		(ashift:DI (match_operand:DI 2 "arith_reg_operand" "")
+			   (const_int 8))))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "TARGET_SHMEDIA"
+  [(match_dup 4) (match_dup 5)]
+  "
+{
+  operands[4] = ((TARGET_LITTLE_ENDIAN ? gen_ldhi_q : gen_ldlo_q)
+		 (operands[3], operands[1]));
+  operands[5] = gen_mextr_rl (operands[0], operands[3], operands[2],
+			      GEN_INT (56), GEN_INT (8));
+}")
+
+(define_insn "rotlsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (set (reg:SI T_REG)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  "TARGET_SH1"
+  "rotl	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_31"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_16"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		   (const_int 16)))]
+  "TARGET_SH1"
+  "swap.w	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "immediate_operand" "")))]
+  "TARGET_SH1"
+  "
+{
+  static const char rot_tab[] = {
+    000, 000, 000, 000, 000, 000, 010, 001,
+    001, 001, 011, 013, 003, 003, 003, 003,
+    003, 003, 003, 003, 003, 013, 012, 002,
+    002, 002, 010, 000, 000, 000, 000, 000,
+  };
+
+  int count, choice;
+
+  if (!CONST_INT_P (operands[2]))
+    FAIL;
+  count = INTVAL (operands[2]);
+  choice = rot_tab[count];
+  if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1)
+    FAIL;
+  choice &= 7;
+  switch (choice)
+    {
+    case 0:
+      emit_move_insn (operands[0], operands[1]);
+      count -= (count & 16) * 2;
+      break;
+    case 3:
+     emit_insn (gen_rotlsi3_16 (operands[0], operands[1]));
+     count -= 16;
+     break;
+    case 1:
+    case 2:
+      {
+	rtx parts[2];
+	parts[0] = gen_reg_rtx (SImode);
+	parts[1] = gen_reg_rtx (SImode);
+	emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1]));
+	emit_move_insn (parts[choice-1], operands[1]);
+	emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8)));
+	emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8)));
+	emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1]));
+	count = (count & ~16) - 8;
+      }
+    }
+
+  for (; count > 0; count--)
+    emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+  for (; count < 0; count++)
+    emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+
+  DONE;
+}")
+
+(define_insn "*rotlhi3_8"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
+		   (const_int 8)))]
+  "TARGET_SH1"
+  "swap.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "arith_reg_operand" "")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand" "")
+		   (match_operand:HI 2 "immediate_operand" "")))]
+  "TARGET_SH1"
+  "
+{
+  if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 8)
+    FAIL;
+}")
+
+;;
+;; shift left
+
+(define_insn "ashlsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH2A"
+  "shad	%2,%0"
+  [(set_attr "type" "arith")
+   (set_attr "length" "4")])
+
+;; This pattern is used by init_expmed for computing the costs of shift
+;; insns.
+
+(define_insn_and_split "ashlsi3_std"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r,r,r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0,0,0")
+		   (match_operand:SI 2 "nonmemory_operand" "r,M,P27,?ri")))
+   (clobber (match_scratch:SI 3 "=X,X,X,&r"))]
+  "TARGET_SH3
+   || (TARGET_SH1 && satisfies_constraint_P27 (operands[2]))"
+  "@
+   shld	%2,%0
+   add	%0,%0
+   shll%O2	%0
+   #"
+  "TARGET_SH3
+   && reload_completed
+   && CONST_INT_P (operands[2])
+   && ! satisfies_constraint_P27 (operands[2])"
+  [(set (match_dup 3) (match_dup 2))
+   (parallel
+    [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 3)))
+     (clobber (match_dup 4))])]
+  "operands[4] = gen_rtx_SCRATCH (SImode);"
+  [(set_attr "length" "*,*,*,4")
+   (set_attr "type" "dyn_shift,arith,arith,arith")])
+
+(define_insn "ashlhi3_k"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r,r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0")
+		   (match_operand:HI 2 "const_int_operand" "M,P27")))]
+  "TARGET_SH1 && satisfies_constraint_P27 (operands[2])"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashlsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && ! sh_dynamicalize_shift_p (operands[2])"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
+	       (const_string "6")]
+	      (const_string "8")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed"
+  [(use (reg:SI R0_REG))]
+  "
+{
+  gen_shifty_op (ASHIFT, operands);
+  DONE;
+}")
+
+(define_insn "ashlsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashift:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		   (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlld.l	%1, %2, %0
+	shlli.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+		   (ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+			      (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (CONST_INT_P (operands[2])
+      && sh_dynamicalize_shift_p (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+  if (TARGET_SH3)
+    {
+      emit_insn (gen_ashlsi3_std (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (! immediate_operand (operands[2], GET_MODE (operands[2])))
+    FAIL;
+}")
+
+(define_insn "*ashlhi3_n"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0")
+		   (match_operand:HI 2 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+	       (const_string "4")]
+	      (const_string "6")))
+   (set_attr "type" "arith")])
+
+(define_expand "ashlhi3"
+  [(parallel [(set (match_operand:HI 0 "arith_reg_operand" "")
+		   (ashift:HI (match_operand:HI 1 "arith_reg_operand" "")
+			      (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1"
+  "
+{
+  if (!CONST_INT_P (operands[2]))
+    FAIL;
+  /* It may be possible to call gen_ashlhi3 directly with more generic
+     operands.  Make sure operands[1] is a HImode register here.  */
+  if (!arith_reg_operand (operands[1], HImode))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+}")
+
+(define_split
+  [(set (match_operand:HI 0 "arith_reg_dest" "")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "")
+		   (match_operand:HI 2 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed"
+  [(use (reg:SI R0_REG))]
+  "
+{
+  gen_shifty_hi_op (ASHIFT, operands);
+  DONE;
+}")
+
+;
+; arithmetic shift right
+;
+
+(define_insn "ashrsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH2A"
+  "shad	%2,%0"
+  [(set_attr "type" "dyn_shift")
+   (set_attr "length" "4")])
+
+(define_insn "ashrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "M")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && INTVAL (operands[2]) == 1"
+  "shar	%0"
+  [(set_attr "type" "arith")])
+
+;; We can't do HImode right shifts correctly unless we start out with an
+;; explicit zero / sign extension; doing that would result in worse overall
+;; code, so just let the machine independent code widen the mode.
+;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 .
+
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrsi2_16"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+                     (const_int 16)))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+        (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+		     (const_int 16)))]
+  "TARGET_SH1"
+  [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16)))
+   (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrsi2_31"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+		     (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_ashlsi_c (operands[0], operands[1]));
+  emit_insn (gen_mov_neg_si_t (copy_rtx (operands[0])));
+  DONE;
+}")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "") (const_int 0))
+   (set (reg:SI T_REG)
+	(gt:SI (match_dup 0) (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (2, operands[0])
+   && peep2_reg_dead_p (2, operands[1])"
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_ashlsi_c (operands[1], operands[1]));
+  DONE;
+}")
+
+(define_insn "ashlsi_c"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
+   (set (reg:SI T_REG)
+	(lt:SI (match_dup 1) (const_int 0)))]
+  "TARGET_SH1"
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*ashlsi_c_void"
+  [(set (reg:SI T_REG)
+	(lt:SI (match_operand:SI 0 "arith_reg_operand" "r") (const_int 0)))
+   (clobber (match_scratch:SI 1 "=0"))]
+  "TARGET_SH1 && cse_not_expected"
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH3"
+  "shad	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashrsi3_n"
+  [(set (reg:SI R4_REG)
+	(ashiftrt:SI (reg:SI R4_REG)
+		     (match_operand:SI 0 "const_int_operand" "i")))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "ashrsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		     (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shard.l	%1, %2, %0
+	shari.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_operand" "")
+		   (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+				(match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (expand_ashiftrt (operands))
+    DONE;
+  else
+    FAIL;
+}")
+
+;; logical shift right
+
+(define_insn "lshrsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH2A"
+  "shld	%2,%0"
+  [(set_attr "type" "dyn_shift")
+   (set_attr "length" "4")])
+
+(define_insn "lshrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SH3"
+  "shld	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+;;  Only the single bit shift clobbers the T bit.
+
+(define_insn "lshrsi3_m"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "M")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && satisfies_constraint_M (operands[2])"
+  "shlr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "P27")))]
+  "TARGET_SH1 && satisfies_constraint_P27 (operands[2])
+   && ! satisfies_constraint_M (operands[2])"
+  "shlr%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && ! sh_dynamicalize_shift_p (operands[2])"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
+	       (const_string "6")]
+	      (const_string "8")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+		     (match_operand:SI 2 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed"
+  [(use (reg:SI R0_REG))]
+  "
+{
+  gen_shifty_op (LSHIFTRT, operands);
+  DONE;
+}")
+
+(define_insn "lshrsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		     (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlrd.l	%1, %2, %0
+	shlri.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest" "")
+		   (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+				(match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (CONST_INT_P (operands[2])
+      && sh_dynamicalize_shift_p (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+  if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
+    {
+      rtx count = copy_to_mode_reg (SImode, operands[2]);
+      emit_insn (gen_negsi2 (count, count));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], count));
+      DONE;
+    }
+  if (! immediate_operand (operands[2], GET_MODE (operands[2])))
+    FAIL;
+}")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashldi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "shll	%R0\;rotcl	%S0"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+;; Expander for DImode shift left with SImode operations.
+
+(define_expand "ashldi3_std"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+                   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SH1 && INTVAL (operands[2]) < 32"
+  "
+{
+  int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
+  int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
+  rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
+  rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
+  rtx dst = gen_reg_rtx (DImode);
+  rtx low_dst = operand_subword (dst, low_word, 1, DImode);
+  rtx high_dst = operand_subword (dst, high_word, 1, DImode);
+  rtx tmp0, tmp1;
+
+  tmp0 = gen_reg_rtx (SImode);
+  tmp1 = gen_reg_rtx (SImode);
+  emit_insn (gen_lshrsi3 (tmp0, low_src, GEN_INT (32 - INTVAL (operands[2]))));
+  emit_insn (gen_ashlsi3 (low_dst, low_src, operands[2]));  
+  emit_insn (gen_ashlsi3 (tmp1, high_src, operands[2]));  
+  emit_insn (gen_iorsi3 (high_dst, tmp0, tmp1));
+  emit_move_insn (operands[0], dst);
+  DONE;
+}")
+
+(define_insn "ashldi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		   (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlld	%1, %2, %0
+	shlli	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*ashldisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shlli.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "ashldi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashift:DI (match_operand:DI 1 "arith_reg_operand" "")
+			      (match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (CONST_INT_P (operands[2])
+      && INTVAL (operands[2]) == 1)
+    {
+      emit_insn (gen_ashldi3_k (operands[0], operands[1]));
+      DONE;
+    }
+  else if (CONST_INT_P (operands[2])
+      && INTVAL (operands[2]) < 32)
+    {
+      emit_insn (gen_ashldi3_std (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+}")
+
+;; ??? This should be a define expand.
+
+(define_insn "lshrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "shlr	%S0\;rotcr	%R0"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_insn "lshrdi3_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		     (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA
+   && (arith_reg_dest (operands[0], DImode)
+       || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > 32))"
+  "@
+	shlrd	%1, %2, %0
+	shlri	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*lshrdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shlri.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "lshrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+			       (match_operand:DI 2 "immediate_operand" "")))
+	     (clobber (reg:SI T_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (!CONST_INT_P (operands[2])
+      || INTVAL (operands[2]) != 1)
+    FAIL;
+}")
+
+;; ??? This should be a define expand.
+
+(define_insn "ashrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "shar	%S0\;rotcr	%R0"
+  [(set_attr "length" "4")
+   (set_attr "type" "arith")])
+
+(define_insn "ashrdi3_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		     (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA
+   && (arith_reg_dest (operands[0], DImode)
+       || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 32))"
+  "@
+	shard	%1, %2, %0
+	shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*ashrdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shari.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "ashrdisi3_media_high"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(truncate:SI
+	   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+			(match_operand:DI 2 "const_int_operand" "n"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) >= 32"
+  "shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "ashrdisi3_media_opaque"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(unspec:SI [(match_operand:DI 1 "arith_reg_operand" "r")
+		    (match_operand:DI 2 "const_int_operand" "n")]
+	 UNSPEC_ASHIFTRT))]
+  "TARGET_SHMEDIA"
+  "shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "ashrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+				(match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (!CONST_INT_P (operands[2])
+      || INTVAL (operands[2]) != 1)
+    FAIL;
+}")
+
+;; combined left/right shift
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI R0_REG))]
+  "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL;
+   DONE;")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI R0_REG))]
+  "if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) FAIL;
+   DONE;")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 1"
+ "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+   (set_attr "type" "arith")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 2"
+ "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")]
+	      (const_string "10")))
+   (set_attr "type" "arith")])
+
+;; shift left / and combination with a scratch register: The combine pass
+;; does not accept the individual instructions, even though they are
+;; cheap.  But it needs a precise description so that it is usable after
+;; reload.
+(define_insn "and_shl_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(lshiftrt:SI
+	 (ashift:SI
+	  (and:SI
+	   (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+			(match_operand:SI 2 "const_int_operand" "N,n"))
+	   (match_operand:SI 3 "" "0,r"))
+	  (match_operand:SI 4 "const_int_operand" "n,n"))
+	 (match_operand:SI 5 "const_int_operand" "n,n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5))
+	       (const_string "10")]
+	      (const_string "12")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI
+	 (ashift:SI
+	  (and:SI
+	   (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" ""))
+	   (match_operand:SI 3 "register_operand" ""))
+	  (match_operand:SI 4 "const_int_operand" ""))
+	 (match_operand:SI 5 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(use (reg:SI R0_REG))]
+  "
+{
+  rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1];
+
+  if (INTVAL (operands[2]))
+    {
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  emit_insn (gen_andsi3 (operands[0], operands[0], and_source));
+  operands[2] = operands[4];
+  gen_shifty_op (ASHIFT, operands);
+  if (INTVAL (operands[5]))
+    {
+      operands[2] = operands[5];
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  DONE;
+}")
+
+;; signed left/right shift combination.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" ""))
+	 (match_operand:SI 3 "const_int_operand" "")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(use (reg:SI R0_REG))]
+  "if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1])) FAIL;
+   DONE;")
+
+(define_insn "shl_sext_ext"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "const_int_operand" "n"))
+	 (match_operand:SI 3 "const_int_operand" "n")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && (unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 1))
+	       (const_string "2")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+    (set_attr "type" "arith")])
+
+(define_insn "shl_sext_sub"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "const_int_operand" "n"))
+	 (match_operand:SI 3 "const_int_operand" "n")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && (shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")]
+	      (const_string "14")))
+    (set_attr "type" "arith")])
+
+;; These patterns are found in expansions of DImode shifts by 16, and
+;; allow the xtrct instruction to be generated from C source.
+
+(define_insn "xtrct_left"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+			   (const_int 16))
+ 	        (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0")
+			     (const_int 16))))]
+  "TARGET_SH1"
+  "xtrct	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "xtrct_right"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			     (const_int 16))
+ 	        (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r")
+			   (const_int 16))))]
+  "TARGET_SH1"
+  "xtrct	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (plus:SI (reg:SI T_REG)
+			 (match_operand:SI 1 "arith_reg_operand" "r"))))
+   (set (reg:SI T_REG)
+	(ne:SI (ior:SI (reg:SI T_REG) (match_dup 1))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*negdi_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub	r63, %1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "")))]
+  ""
+  "
+{
+  if (TARGET_SH1)
+    {
+      int low_word = (TARGET_LITTLE_ENDIAN ? 0 : 1);
+      int high_word = (TARGET_LITTLE_ENDIAN ? 1 : 0);
+
+      rtx low_src = operand_subword (operands[1], low_word, 0, DImode);
+      rtx high_src = operand_subword (operands[1], high_word, 0, DImode);
+
+      rtx low_dst = operand_subword (operands[0], low_word, 1, DImode);
+      rtx high_dst = operand_subword (operands[0], high_word, 1, DImode);
+
+      emit_insn (gen_clrt ());
+      emit_insn (gen_negc (low_dst, low_src));
+      emit_insn (gen_negc (high_dst, high_src));
+      DONE;
+    }
+}")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "neg	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "not	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(xor:DI (match_operand:DI 1 "arith_reg_operand" "")
+		(const_int -1)))]
+  "TARGET_SHMEDIA" "")
+
+/* The SH4 202 can do zero-offset branches without pipeline stalls.
+   This can be used as some kind of conditional execution, which is useful
+   for abs.  */
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (xor:SI (neg:SI (reg:SI T_REG))
+			 (match_operand:SI 1 "arith_reg_operand" ""))
+		 (reg:SI T_REG)))]
+  "TARGET_HARD_SH4"
+  [(const_int 0)]
+  "emit_insn (gen_movsi_i (operands[0], operands[1]));
+   emit_insn (gen_cneg (operands[0], operands[0], operands[0]));
+   DONE;")
+
+(define_insn "cneg"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (eq:SI (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "arith_reg_operand" "0")
+		      (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_HARD_SH4"
+  "bf 0f\;neg %2,%0\\n0:"
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "addz.l	%1, r63, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "extend")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.uw	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
+  "
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+}")
+
+;; ??? when a truncated input to a zero_extend is reloaded, reload will
+;; reload the entire truncate expression.
+(define_insn_and_split "*loaddi_trunc"
+  [(set (match_operand 0 "any_register_operand" "=r")
+	(truncate (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_SHMEDIA && reload_completed"
+  "#"
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));")
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	ld%M1.ub	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "general_extend_operand" "")))]
+  ""
+  "
+{
+  if (! TARGET_SHMEDIA && ! arith_reg_operand (operands[1], HImode))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+}")
+
+(define_insn "*zero_extendhisi2_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.w	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*zero_extendhisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.uw	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))]
+  "
+{
+  rtx op1 = operands[1];
+
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+}")
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "general_extend_operand" "")))]
+  ""
+  "
+{
+  if (! TARGET_SHMEDIA && ! arith_reg_operand (operands[1], QImode))
+    operands[1] = copy_to_mode_reg (QImode, operands[1]);
+}")
+
+(define_insn "*zero_extendqisi2_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*zero_extendqisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	ld%M1.ub	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+;; ??? Or perhaps it should be dropped?
+
+;; convert_move generates good code for SH[1-4].
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,?f")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, r63, %0
+	ld%M1.l	%m1, %0
+	fmov.sl	%1, %0"
+  [(set_attr "type" "arith_media,load_media,fpconv_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.w	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
+  "
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+}")
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.b	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:QI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 56)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
+  "
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+}")
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  ""
+  "")
+
+(define_insn "*extendhisi2_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_movsrc_operand" "r,m")))]
+  "TARGET_SH1"
+  "@
+	exts.w	%1,%0
+	mov.w	%1,%0"
+  [(set_attr "type" "arith,load")])
+
+(define_insn "*extendhisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.w	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+  "
+{
+  rtx op1 = operands[1];
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+}")
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  ""
+  "")
+
+(define_insn "*extendqisi2_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+  "TARGET_SH1"
+  "@
+	exts.b	%1,%0
+	mov.b	%1,%0"
+  [(set_attr "type" "arith,load")
+   (set_attr_alternative "length"
+     [(const_int 2)
+       (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))])])
+
+(define_insn "*extendqisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.b	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))]
+   "
+{
+  rtx op1 = operands[1];
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+}")
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:HI (match_operand:QI 1 "general_movsrc_operand" "r,m")))]
+  "TARGET_SH1"
+  "@
+	exts.b	%1,%0
+	mov.b	%1,%0"
+  [(set_attr "type" "arith,load")
+   (set_attr_alternative "length"
+     [(const_int 2)
+       (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))])])
+
+/* It would seem useful to combine the truncXi patterns into the movXi
+   patterns, but unary operators are ignored when matching constraints,
+   so we need separate patterns.  */
+(define_insn "truncdisi2"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=r,m,m,f,r,f")
+	(truncate:SI (match_operand:DI 1 "register_operand" "r,r,f,r,f,f")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, r63, %0
+	st%M0.l	%m0, %1
+	fst%M0.s	%m0, %T1
+	fmov.ls	%1, %0
+	fmov.sl	%T1, %0
+	fmov.s	%T1, %0"
+  [(set_attr "type"   "arith_media,store_media,fstore_media,fload_media,fpconv_media,fmove_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+(define_insn "truncdihi2"
+  [(set (match_operand:HI 0 "general_movdst_operand" "=?r,m")
+	(truncate:HI (match_operand:DI 1 "register_operand" "r,r")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlli\\t%1,48,%0\;shlri\\t%0,48,%0
+	st%M0.w	%m0, %1"
+  [(set_attr "type"   "arith_media,store_media")
+   (set_attr "length" "8,4")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+; N.B. This should agree with LOAD_EXTEND_OP and movqi.
+; Because we use zero extension, we can't provide signed QImode compares
+; using a simple compare or conditional branch insn.
+(define_insn "truncdiqi2"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,m")
+	(truncate:QI (match_operand:DI 1 "register_operand" "r,r")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	st%M0.b	%m0, %1"
+  [(set_attr "type"   "arith_media,store")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "extend")))])
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; define push and pop so it is easy for sh.c
+;; We can't use push and pop on SHcompact because the stack must always
+;; be 8-byte aligned.
+
+(define_expand "push"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" "r,l,x"))]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "pop"
+  [(set (match_operand:SI 0 "register_operand" "=r,l,x")
+	(mem:SI (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "push_e"
+  [(parallel [(set (mem:SF (pre_dec:SI (reg:SI SP_REG)))
+		   (match_operand:SF 0 "" ""))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_insn "push_fpul"
+  [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))]
+  "TARGET_SH2E && ! TARGET_SH5"
+  "sts.l	fpul,@-r15"
+  [(set_attr "type" "fstore")
+   (set_attr "late_fp_use" "yes")
+   (set_attr "hit_stack" "yes")])
+
+;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4,
+;; so use that.
+(define_expand "push_4"
+  [(parallel [(set (mem:DF (pre_dec:SI (reg:SI SP_REG)))
+		   (match_operand:DF 0 "" ""))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "pop_e"
+  [(parallel [(set (match_operand:SF 0 "" "")
+	      (mem:SF (post_inc:SI (reg:SI SP_REG))))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_insn "pop_fpul"
+  [(set (reg:SF FPUL_REG) (mem:SF (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_SH2E && ! TARGET_SH5"
+  "lds.l	@r15+,fpul"
+  [(set_attr "type" "load")
+   (set_attr "hit_stack" "yes")])
+
+(define_expand "pop_4"
+  [(parallel [(set (match_operand:DF 0 "" "")
+		   (mem:DF (post_inc:SI (reg:SI SP_REG))))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "push_fpscr"
+  [(const_int 0)]
+  "TARGET_SH2E"
+  "
+{
+  rtx insn = emit_insn (gen_fpu_switch (gen_frame_mem (PSImode,
+						 gen_rtx_PRE_DEC (Pmode,
+							  stack_pointer_rtx)),
+					get_fpscr_rtx ()));
+  add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+}")
+
+(define_expand "pop_fpscr"
+  [(const_int 0)]
+  "TARGET_SH2E"
+  "
+{
+  rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+					gen_frame_mem (PSImode,
+						 gen_rtx_POST_INC (Pmode,
+							  stack_pointer_rtx))));
+  add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+}")
+
+;; These two patterns can happen as the result of optimization, when
+;; comparisons get simplified to a move of zero or 1 into the T reg.
+;; They don't disappear completely, because the T reg is a fixed hard reg.
+
+(define_insn "clrt"
+  [(set (reg:SI T_REG) (const_int 0))]
+  "TARGET_SH1"
+  "clrt")
+
+(define_insn "sett"
+  [(set (reg:SI T_REG) (const_int 1))]
+  "TARGET_SH1"
+  "sett")
+
+;; Define additional pop for SH1 and SH2 so it does not get 
+;; placed in the delay slot.
+(define_insn "*movsi_pop"
+  [(set (match_operand:SI 0 "register_operand" "=r,x,l")
+        (match_operand:SI 1 "sh_no_delay_pop_operand" ">,>,>"))]
+  "(TARGET_SH1 || TARGET_SH2E || TARGET_SH2A)
+   && ! TARGET_SH3"
+  "@
+   mov.l   %1,%0
+   lds.l   %1,%0
+   lds.l   %1,%0"
+  [(set_attr "type" "load_si,mem_mac,pload")
+   (set_attr "length" "2,2,2")
+   (set_attr "in_delay_slot" "no,no,no")])
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (set (subreg:SI (mem:QI (plus:SI (reg:SI SP_REG) (const_int 12)) 0) 0)
+;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T.
+(define_insn "movsi_i"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	    "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,r")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "Q,r,I08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))]
+  "TARGET_SH1
+   && ! TARGET_SH2E
+   && ! TARGET_SH2A
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	cmp/pl	%1
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	movt	%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload_si,move,movi8,mt_group,load_si,mac_gp,prget,arith,store,mac_mem,pstore,gp_mac,prset,mem_mac,pload,pcload_si")
+   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2)
+;; ??? This allows moves from macl to fpul to be recognized, but these moves
+;; will require a reload.
+;; ??? We can't include f/f because we need the proper FPSCR setting when
+;; TARGET_FMOVD is in effect, and mode switching is done before reload.
+(define_insn "movsi_ie"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	    "=r,r,r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "Q,r,I08,I20,I28,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
+  "(TARGET_SH2E || TARGET_SH2A)
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	movi20	%1,%0
+	movi20s	%1,%0
+	cmp/pl	%1
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	movt	%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	sts.l	%1,%0
+	fake	%1,%0
+	lds	%1,%0
+	sts	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	fmov	%1,%0
+	! move optimized away"
+  [(set_attr "type" "pcload_si,move,movi8,move,move,*,load_si,mac_gp,prget,arith,store,mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
+   (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 4)
+      (const_int 2)
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 0)])])
+
+(define_insn "movsi_i_lowpart"
+  [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,r,m,r"))
+	(match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,t,r,i"))]
+   "TARGET_SH1
+    && (register_operand (operands[0], SImode)
+        || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	movt	%0
+	mov.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,arith,store,pcload")])
+
+(define_insn_and_split "load_ra"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")] UNSPEC_RA))]
+  "TARGET_SH1"
+  "#"
+  "&& ! currently_expanding_to_rtl"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  if (TARGET_SHCOMPACT && crtl->saves_all_registers)
+    operands[1] = gen_frame_mem (SImode, return_address_pointer_rtx);
+}")
+
+;; The '?'s in the following constraints may not reflect the time taken
+;; to perform the move. They are there to discourage the use of floating-
+;; point registers for storing integer values.
+(define_insn "*movsi_media"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	        "=r,r,r,r,m,f?,m,f?,r,f?,*b,r,b")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpg,m,rZ,m,f?,rZ,f?,f?,r,*b,Csy"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], SImode)
+       || sh_register_operand (operands[1], SImode)
+       || GET_CODE (operands[1]) == TRUNCATE)"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1
+	fld%M1.s	%m1, %0
+	fst%M0.s	%m0, %1
+	fmov.ls	%N1, %0
+	fmov.sl	%1, %0
+	fmov.s	%1, %0
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media,fload_media,fstore_media,fload_media,fpconv_media,fmove_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,8,4,4,4,4,4,4,4,4,4,12")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "*movsi_media_nofpu"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	        "=r,r,r,r,m,*b,r,*b")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpg,m,rZ,r,*b,Csy"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], SImode)
+       || sh_register_operand (operands[1], SImode)
+       || GET_CODE (operands[1]) == TRUNCATE)"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,8,4,4,4,4,12")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movsi_const"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 16)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 0) (const_int 16))
+		(const:SI (unspec:SI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  "
+{
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && GET_CODE (XEXP (operands[1], 0)) == CODE_LABEL)
+    LABEL_NUSES (XEXP (operands[1], 0)) += 2;
+  else if (GOTOFF_P (operands[1]))
+    {
+      rtx unspec = XEXP (operands[1], 0);
+
+      if (! UNSPEC_GOTOFF_P (unspec))
+	{
+	  unspec = XEXP (unspec, 0);
+	  if (! UNSPEC_GOTOFF_P (unspec))
+	    abort ();
+	}
+      if (GET_CODE (XVECEXP (unspec , 0, 0)) == LABEL_REF
+	  && (GET_CODE (XEXP (XVECEXP (unspec, 0, 0), 0)) == CODE_LABEL))
+	LABEL_NUSES (XEXP (XVECEXP (unspec, 0, 0), 0)) += 2;
+    }
+}")
+
+(define_expand "movsi_const_16bit"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 0)] UNSPEC_EXTRACT_S16)))]
+  "TARGET_SHMEDIA && flag_pic && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  [(const_int 0)]
+  "
+{
+  rtx insn = emit_insn (gen_movsi_const (operands[0], operands[1]));
+
+  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1]));
+
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ((CONST_INT_P (operands[1])
+	&& ! satisfies_constraint_I16 (operands[1]))
+       || GET_CODE (operands[1]) == CONST_DOUBLE)"
+  [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "general_movsrc_operand" ""))]
+  ""
+  "{ if (prepare_move_operands (operands, SImode)) DONE; }")
+
+(define_expand "ic_invalidate_line"
+  [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r")
+				(match_dup 1)] UNSPEC_ICACHE)
+	      (clobber (scratch:SI))])]
+  "TARGET_HARD_SH4 || TARGET_SH5"
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ic_invalidate_line_media (operands[0]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      operands[1] = function_symbol (NULL, \"__ic_invalidate\", SFUNC_STATIC);
+      operands[1] = force_reg (Pmode, operands[1]);
+      emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SH4A_ARCH || TARGET_SH4_300)
+    {
+      emit_insn (gen_ic_invalidate_line_sh4a (operands[0]));
+      DONE;
+    }
+  operands[0] = force_reg (Pmode, operands[0]);
+  operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008,
+							       Pmode)));
+}")
+
+;; The address %0 is assumed to be 4-aligned at least.  Thus, by ORing
+;; 0xf0000008, we get the low-oder bits *1*00 (binary), which fits
+;; the requirement *1*00 for associative address writes.  The alignment of
+;; %0 implies that its least significant bit is cleared,
+;; thus we clear the V bit of a matching entry if there is one.
+(define_insn "ic_invalidate_line_i"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "register_operand" "r")]
+		     UNSPEC_ICACHE)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_HARD_SH4"
+  "ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%1,%2\;mov.l\\t%0,@%2"
+  [(set_attr "length" "8")
+   (set_attr "type" "cwb")])
+
+(define_insn "ic_invalidate_line_sh4a"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+		    UNSPEC_ICACHE)]
+  "TARGET_SH4A_ARCH || TARGET_SH4_300"
+  "ocbwb\\t@%0\;synco\;icbi\\t@%0"
+  [(set_attr "length" "16")
+   (set_attr "type" "cwb")])
+
+;; ??? could make arg 0 an offsettable memory operand to allow to save
+;; an add in the code that calculates the address.
+(define_insn "ic_invalidate_line_media"
+  [(unspec_volatile [(match_operand 0 "any_register_operand" "r")]
+		    UNSPEC_ICACHE)]
+  "TARGET_SHMEDIA"
+  "ocbwb	%0,0\;synco\;icbi	%0, 0\;synci"
+  [(set_attr "length" "16")
+   (set_attr "type" "invalidate_line_media")])
+
+(define_insn "ic_invalidate_line_compact"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_ICACHE)
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr @%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "initialize_trampoline"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")]
+  "TARGET_SHCOMPACT"
+  "
+{
+  rtx sfun, tramp;
+
+  tramp = force_reg (Pmode, operands[0]);
+  sfun = force_reg (Pmode, function_symbol (NULL, \"__init_trampoline\",
+					    SFUNC_STATIC));
+  emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
+
+  emit_insn (gen_initialize_trampoline_compact (tramp, sfun));
+  DONE;
+}")
+
+(define_insn "initialize_trampoline_compact"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
+		     (match_operand:SI 1 "register_operand" "r")
+		     (reg:SI R2_REG) (reg:SI R3_REG)]
+		    UNSPEC_INIT_TRAMP)
+
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr @%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "movqi_i"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m,r,r,l")
+	(match_operand:QI 1 "general_movsrc_operand"  "r,i,m,r,t,l,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], QImode)
+       || arith_reg_operand (operands[1], QImode))"
+  "@
+	mov	%1,%0
+	mov	%1,%0
+	mov.b	%1,%0
+	mov.b	%1,%0
+	movt	%0
+	sts	%1,%0
+	lds	%1,%0"
+ [(set_attr "type" "move,movi8,load,store,arith,prget,prset")
+  (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)])])
+
+(define_insn "*movqi_media"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:QI 1 "general_movsrc_operand" "r,I16Css,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (arith_reg_operand (operands[0], QImode)
+       || extend_reg_or_0_operand (operands[1], QImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	ld%M1.ub	%m1, %0
+	st%M0.b	%m0, %N1"
+  [(set_attr "type" "arith_media,arith_media,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand"  ""))]
+  ""
+  "{ if (prepare_move_operands (operands, QImode)) DONE; }")
+
+(define_expand "reload_inqi"
+  [(set (match_operand:SI 2 "" "=&r")
+	(match_operand:QI 1 "inqhi_operand" ""))
+   (set (match_operand:QI 0 "arith_reg_operand" "=r")
+	(truncate:QI (match_dup 3)))]
+  "TARGET_SHMEDIA"
+  "
+{
+  rtx inner = XEXP (operands[1], 0);
+  int regno = REGNO (inner);
+
+  regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1;
+  operands[1] = gen_rtx_REG (SImode, regno);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
+}")
+
+/* When storing r0, we have to avoid reg+reg addressing.  */
+(define_insn "movhi_i"
+  [(set (match_operand:HI 0 "general_movdst_operand"   "=r,r,r,r,m,r,l,r")
+	(match_operand:HI 1 "general_movsrc_operand" "Q,rI08,m,t,r,l,r,i"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], HImode)
+       || arith_reg_operand (operands[1], HImode))
+   && (!MEM_P (operands[0])
+       || GET_CODE (XEXP (operands[0], 0)) != PLUS
+       || !REG_P (XEXP (XEXP (operands[0], 0), 1))
+       || ! refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0))"
+  "@
+	mov.w	%1,%0
+	mov	%1,%0
+	mov.w	%1,%0
+	movt	%0
+	mov.w	%1,%0
+	sts	%1,%0
+	lds	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload,move,load,move,store,move,move,pcload")])
+
+(define_insn "*movhi_media"
+  [(set (match_operand:HI 0 "general_movdst_operand"     "=r,r,r,r,m")
+	(match_operand:HI 1 "general_movsrc_operand" "r,I16Css,n,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (arith_reg_operand (operands[0], HImode)
+       || arith_reg_or_0_operand (operands[1], HImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.w	%m1, %0
+	st%M0.w	%m0, %N1"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(match_operand:HI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ! satisfies_constraint_I16 (operands[1])"
+  [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_movdst_operand" "")
+	(match_operand:HI 1 "general_movsrc_operand"  ""))]
+  ""
+  "{ if (prepare_move_operands (operands, HImode)) DONE; }")
+
+(define_expand "reload_inhi"
+  [(set (match_operand:SI 2 "" "=&r")
+	(match_operand:HI 1 "inqhi_operand" ""))
+   (set (match_operand:HI 0 "arith_reg_operand" "=r")
+	(truncate:HI (match_dup 3)))]
+  "TARGET_SHMEDIA"
+  "
+{
+  rtx inner = XEXP (operands[1], 0);
+  int regno = REGNO (inner);
+
+  regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1;
+  operands[1] = gen_rtx_REG (SImode, regno);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
+}")
+
+;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
+;; compiled with -m2 -ml -O3 -funroll-loops
+(define_insn "*movdi_i"
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
+	(match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I08,i,x,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], DImode)
+       || arith_reg_operand (operands[1], DImode))"
+  "* return output_movedouble (insn, operands, DImode);"
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+
+(define_split
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  int regno;
+
+  if ((MEM_P (operands[0])
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      regno = REGNO (operands[0]);
+      break;
+    case SUBREG:
+      regno = subreg_regno (operands[0]);
+      break;
+    case MEM:
+      regno = -1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DImode);
+      operands[3] = operand_subword (operands[1], 0, 0, DImode);
+      operands[4] = operand_subword (operands[0], 1, 0, DImode);
+      operands[5] = operand_subword (operands[1], 1, 0, DImode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DImode);
+      operands[3] = operand_subword (operands[1], 1, 0, DImode);
+      operands[4] = operand_subword (operands[0], 0, 0, DImode);
+      operands[5] = operand_subword (operands[1], 0, 0, DImode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+}")
+
+;; The '?'s in the following constraints may not reflect the time taken
+;; to perform the move. They are there to discourage the use of floating-
+;; point registers for storing integer values.
+(define_insn "*movdi_media"
+  [(set (match_operand:DI 0 "general_movdst_operand"
+	         "=r,r,r,rl,m,f?,m,f?,r,f?,*b,r,*b")
+	(match_operand:DI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpgF,m,rlZ,m,f?,rZ,f?,f?,r,*b,Csy"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], DImode)
+       || sh_register_operand (operands[1], DImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1
+	fld%M1.d	%m1, %0
+	fst%M0.d	%m0, %1
+	fmov.qd	%N1, %0
+	fmov.dq	%1, %0
+	fmov.d	%1, %0
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media,fload_media,fstore_media,fload_media,dfpconv_media,fmove_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,16,4,4,4,4,4,4,4,4,4,*")])
+
+(define_insn "*movdi_media_nofpu"
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,rl,m,*b,r,*b");
+	(match_operand:DI 1 "general_movsrc_operand" "r,I16Css,nCpgF,m,rlZ,r,*b,Csy"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], DImode)
+       || sh_register_operand (operands[1], DImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,16,4,4,4,4,*")])
+
+(define_insn "*movdi_media_I16"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r")
+	(match_operand:DI 1 "const_int_operand" "I16"))]
+  "TARGET_SHMEDIA && reload_completed"
+  "movi	%1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  rtx insn;
+
+  if (TARGET_SHMEDIA64)
+    insn = emit_insn (gen_movdi_const (operands[0], operands[1]));
+  else
+    insn = emit_insn (gen_movdi_const_32bit (operands[0], operands[1]));
+
+  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1]));
+
+  DONE;
+}")
+
+(define_expand "movdi_const"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+		  	      (const_int 48)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 32)] UNSPEC_EXTRACT_U16))))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 16)] UNSPEC_EXTRACT_U16))))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA64 && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  "
+{
+  sh_mark_label (operands[1], 4);
+}")
+
+(define_expand "movdi_const_32bit"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 16)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA32 && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  "
+{
+  sh_mark_label (operands[1], 2);
+}")
+
+(define_expand "movdi_const_16bit"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 0)] UNSPEC_EXTRACT_S16)))]
+  "TARGET_SHMEDIA && flag_pic && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "ext_dest_operand" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && CONST_INT_P (operands[1])
+   && ! satisfies_constraint_I16 (operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (match_dup 1)]
+  "
+{
+  unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
+  unsigned HOST_WIDE_INT low = val;
+  unsigned HOST_WIDE_INT high = val;
+  unsigned HOST_WIDE_INT sign;
+  unsigned HOST_WIDE_INT val2 = val ^ (val-1);
+
+  /* Zero-extend the 16 least-significant bits.  */
+  low &= 0xffff;
+
+  /* Arithmetic shift right the word by 16 bits.  */
+  high >>= 16;
+  if (GET_CODE (operands[0]) == SUBREG
+      && GET_MODE (SUBREG_REG (operands[0])) == SImode)
+    {
+      high &= 0xffff;
+      high ^= 0x8000;
+      high -= 0x8000;
+    }
+  else
+    {
+      sign = 1;
+      sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1);
+      high ^= sign;
+      high -= sign;
+    }
+  do
+    {
+      /* If we can't generate the constant with a two-insn movi / shori
+	 sequence, try some other strategies.  */
+      if (! CONST_OK_FOR_I16 (high))
+	{
+	  /* Try constant load / left shift.  We know VAL != 0.  */
+	  val2 = val ^ (val-1);
+	  if (val2 > 0x1ffff)
+	    {
+	      int trailing_zeroes = exact_log2 ((val2 >> 16) + 1) + 15;
+
+	      if (CONST_OK_FOR_I16 (val >> trailing_zeroes)
+		  || (! CONST_OK_FOR_I16 (high >> 16)
+		      && CONST_OK_FOR_I16 (val >> (trailing_zeroes + 16))))
+		{
+		  val2 = (HOST_WIDE_INT) val >> trailing_zeroes;
+		  operands[1] = gen_ashldi3_media (operands[0], operands[0],
+						   GEN_INT (trailing_zeroes));
+		  break;
+		}
+	    }
+	  /* Try constant load / right shift.  */
+	  val2 = (val >> 15) + 1;
+	  if (val2 == (val2 & -val2))
+	    {
+	      int shift = 49 - exact_log2 (val2);
+
+	      val2 = trunc_int_for_mode (val << shift, DImode);
+	      if (CONST_OK_FOR_I16 (val2))
+		{
+		  operands[1] = gen_lshrdi3_media (operands[0], operands[0],
+						   GEN_INT (shift));
+		  break;
+		}
+	    }
+	  /* Try mperm.w .  */
+	  val2 = val & 0xffff;
+	  if ((val >> 16 & 0xffff) == val2
+	      && (val >> 32 & 0xffff) == val2
+	      && (val >> 48 & 0xffff) == val2)
+	    {
+	      val2 = (HOST_WIDE_INT) val >> 48;
+	      operands[1] = gen_rtx_REG (V4HImode, true_regnum (operands[0]));
+	      operands[1] = gen_mperm_w0 (operands[1], operands[1]);
+	      break;
+	    }
+	  /* Try movi / mshflo.l  */
+	  val2 = (HOST_WIDE_INT) val >> 32;
+	  if (val2 == ((unsigned HOST_WIDE_INT)
+			trunc_int_for_mode (val, SImode)))
+	    {
+	      operands[1] = gen_mshflo_l_di (operands[0], operands[0],
+					     operands[0]);
+	      break;
+	    }
+	  /* Try movi / mshflo.l w/ r63.  */
+	  val2 = val + ((HOST_WIDE_INT) -1 << 32);
+	  if ((HOST_WIDE_INT) val2 < 0 && CONST_OK_FOR_I16 (val2))
+	    {
+	      operands[1] = gen_mshflo_l_di (operands[0], operands[0],
+					     const0_rtx);
+	      break;
+	    }
+	}
+      val2 = high;
+      operands[1] = gen_shori_media (operands[0], operands[0], GEN_INT (low));
+    }
+  while (0);
+  operands[2] = GEN_INT (val2);
+}")
+
+(define_split
+  [(set (match_operand:DI 0 "ext_dest_operand" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_CODE (operands[1]) == CONST_DOUBLE"
+  [(set (match_dup 0) (match_dup 2))
+  (set (match_dup 0)
+       (ior:DI (ashift:DI (match_dup 0) (const_int 16)) (match_dup 1)))]
+  "
+{
+  unsigned HOST_WIDE_INT low = CONST_DOUBLE_LOW (operands[1]);
+  unsigned HOST_WIDE_INT high = CONST_DOUBLE_HIGH (operands[1]);
+  unsigned HOST_WIDE_INT val = low;
+  unsigned HOST_WIDE_INT sign;
+
+  /* Zero-extend the 16 least-significant bits.  */
+  val &= 0xffff;
+  operands[1] = GEN_INT (val);
+
+  /* Arithmetic shift right the double-word by 16 bits.  */
+  low >>= 16;
+  low |= (high & 0xffff) << (HOST_BITS_PER_WIDE_INT - 16);
+  high >>= 16;
+  sign = 1;
+  sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1);
+  high ^= sign;
+  high -= sign;
+
+  /* This will only be true if high is a sign-extension of low, i.e.,
+     it must be either 0 or (unsigned)-1, and be zero iff the
+     most-significant bit of low is set.  */
+  if (high + (low >> (HOST_BITS_PER_WIDE_INT - 1)) == 0)
+    operands[2] = GEN_INT (low);
+  else
+    operands[2] = immed_double_const (low, high, DImode);
+}")
+
+(define_insn "shori_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0,0")
+			   (const_int 16))
+		(match_operand:DI 2 "immediate_operand" "K16Csu,nF")))]
+  "TARGET_SHMEDIA && (reload_completed || arith_reg_dest (operands[0], DImode))"
+  "@
+	shori	%u2, %0
+	#"
+  [(set_attr "type" "arith_media,*")])
+
+(define_insn "*shori_media_si"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			   (const_int 16))
+		(match_operand:SI 2 "immediate_operand" "K16Csu")))]
+  "TARGET_SHMEDIA"
+  "shori	%u2, %0")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  ""
+  "{ if (prepare_move_operands (operands, DImode)) DONE; }")
+
+(define_insn "movdf_media"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], DFmode)
+       || sh_register_operand (operands[1], DFmode))"
+  "@
+	fmov.d	%1, %0
+	fmov.qd	%N1, %0
+	fmov.dq	%1, %0
+	add	%1, r63, %0
+	#
+	fld%M1.d	%m1, %0
+	fst%M0.d	%m0, %1
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type" "fmove_media,fload_media,dfpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media")])
+
+(define_insn "movdf_media_nofpu"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,F,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], DFmode)
+       || sh_register_operand (operands[1], DFmode))"
+  "@
+	add	%1, r63, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type" "arith_media,*,load_media,store_media")])
+
+(define_split
+  [(set (match_operand:DF 0 "arith_reg_dest" "")
+	(match_operand:DF 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 3) (match_dup 2))]
+  "
+{
+  int endian = WORDS_BIG_ENDIAN ? 1 : 0;
+  long values[2];
+  REAL_VALUE_TYPE value;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_DOUBLE (value, values);
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    operands[2] = immed_double_const ((unsigned long) values[endian]
+				      | ((HOST_WIDE_INT) values[1 - endian]
+					 << 32), 0, DImode);
+  else
+    {
+      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
+      operands[2] = immed_double_const (values[endian], values[1 - endian],
+	  			        DImode);
+    }
+
+  operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+}")
+
+;; ??? This should be a define expand.
+
+(define_insn "movdf_k"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
+  "TARGET_SH1
+   && (! (TARGET_SH4 || TARGET_SH2A_DOUBLE) || reload_completed
+       /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
+       || (REG_P (operands[0]) && REGNO (operands[0]) == 3)
+       || (REG_P (operands[1]) && REGNO (operands[1]) == 3))
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+  "* return output_movedouble (insn, operands, DFmode);"
+  [(set_attr "length" "4")
+   (set_attr "type" "move,pcload,load,store")])
+
+;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
+;; However, the d/F/c/z alternative cannot be split directly; it is converted
+;; with special code in machine_dependent_reorg into a load of the R0_REG and
+;; the d/m/c/X alternative, which is split later into single-precision
+;; instructions.  And when not optimizing, no splits are done before fixing
+;; up pcloads, so we need usable length information for that.
+(define_insn "movdf_i4"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
+	(match_operand:DF 1 "general_movsrc_operand"  "d,r,F,m,d,FQ,m,r,d,r"))
+   (use (match_operand:PSI 2 "fpscr_operand"          "c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3                      "=X,X,&z,X,X,X,X,X,X,X"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+  {
+    switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_FMOVD)
+	return "fmov	%1,%0";
+      else if (REGNO (operands[0]) != REGNO (operands[1]) + 1)
+	return "fmov	%R1,%R0\n\tfmov	%S1,%S0";
+      else
+	return "fmov	%S1,%S0\n\tfmov	%R1,%R0";
+    case 3:
+    case 4:
+      return "fmov.d	%1,%0";
+    default:
+      return "#";
+    }
+  }
+  [(set_attr_alternative "length"
+     [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))
+      (const_int 4)
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (const_int 4)
+      (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
+      ;; We can't use 4-byte push/pop on SHcompact, so we have to
+      ;; increment or decrement r15 explicitly.
+      (if_then_else
+       (ne (symbol_ref "TARGET_SHCOMPACT") (const_int 0))
+       (const_int 10) (const_int 8))
+      (if_then_else
+       (ne (symbol_ref "TARGET_SHCOMPACT") (const_int 0))
+       (const_int 10) (const_int 8))])
+   (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload")
+   (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*")
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+					   (const_string "double")
+					   (const_string "none")))])
+
+;; Moving DFmode between fp/general registers through memory
+;; (the top of the stack) is faster than moving through fpul even for
+;; little endian.  Because the type of an instruction is important for its
+;; scheduling,  it is beneficial to split these operations, rather than
+;; emitting them in one single chunk, even if this will expose a stack
+;; use that will prevent scheduling of other stack accesses beyond this
+;; instruction.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 "=X"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed
+   && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
+  [(const_int 0)]
+  "
+{
+  rtx insn, tos;
+
+  if (TARGET_SH5 && true_regnum (operands[1]) < 16)
+    {
+      emit_move_insn (stack_pointer_rtx,
+		      plus_constant (stack_pointer_rtx, -8));
+      tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx);
+    }
+  else
+    tos = gen_tmp_stack_mem (DFmode,
+			     gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2]));
+  if (! (TARGET_SH5 && true_regnum (operands[1]) < 16))
+    add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  if (TARGET_SH5 && true_regnum (operands[0]) < 16)
+    tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx);
+  else
+    tos = gen_tmp_stack_mem (DFmode,
+			     gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2]));
+  if (TARGET_SH5 && true_regnum (operands[0]) < 16)
+    emit_move_insn (stack_pointer_rtx, plus_constant (stack_pointer_rtx, 8));
+  else
+    add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+}")
+
+;; local-alloc sometimes allocates scratch registers even when not required,
+;; so we must be prepared to handle these.
+
+;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k.
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && reload_completed
+   && true_regnum (operands[0]) < 16
+   && true_regnum (operands[1]) < 16"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  /* If this was a reg <-> mem operation with base + index reg addressing,
+     we have to handle this in a special way.  */
+  rtx mem = operands[0];
+  int store_p = 1;
+  if (! memory_operand (mem, DFmode))
+    {
+      mem = operands[1];
+      store_p = 0;
+    }
+  if (GET_CODE (mem) == SUBREG && SUBREG_BYTE (mem) == 0)
+    mem = SUBREG_REG (mem);
+  if (MEM_P (mem))
+    {
+      rtx addr = XEXP (mem, 0);
+      if (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && REG_P (XEXP (addr, 1)))
+	{
+	  int offset;
+	  rtx reg0 = gen_rtx_REG (Pmode, 0);
+	  rtx regop = operands[store_p], word0 ,word1;
+
+	  if (GET_CODE (regop) == SUBREG)
+	    alter_subreg (&regop);
+	  if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1)))
+	    offset = 2;
+	  else
+	    offset = 4;
+	  mem = copy_rtx (mem);
+	  PUT_MODE (mem, SImode);
+	  word0 = gen_rtx_SUBREG (SImode, regop, 0);
+	  alter_subreg (&word0);
+	  word1 = gen_rtx_SUBREG (SImode, regop, 4);
+	  alter_subreg (&word1);
+	  if (store_p || ! refers_to_regno_p (REGNO (word0),
+					      REGNO (word0) + 1, addr, 0))
+	    {
+	      emit_insn (store_p
+			 ? gen_movsi_ie (mem, word0)
+			 : gen_movsi_ie (word0, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	      mem = copy_rtx (mem);
+	      emit_insn (store_p
+			 ? gen_movsi_ie (mem, word1)
+			 : gen_movsi_ie (word1, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	    }
+	  else
+	    {
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	      emit_insn (gen_movsi_ie (word1, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	      mem = copy_rtx (mem);
+	      emit_insn (gen_movsi_ie (word0, mem));
+	    }
+	  DONE;
+	}
+    }
+}")
+
+;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (reg:SI R0_REG))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "reload_indf__frn"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=a")
+		   (match_operand:DF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "reload_outdf__RnFRm"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f")
+		   (match_operand:DF 1 "register_operand" "af,r"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])]
+  "TARGET_SH1"
+  "")
+
+;; Simplify no-op moves.
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_SH2E && reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 0) (match_dup 0))]
+  "")
+
+;; fmovd substitute post-reload splits
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
+  [(const_int 0)]
+  "
+{
+  int dst = true_regnum (operands[0]), src = true_regnum (operands[1]);
+  emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst),
+			   gen_rtx_REG (SFmode, src), operands[2]));
+  emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst + 1),
+			   gen_rtx_REG (SFmode, src + 1), operands[2]));
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(mem:DF (match_operand:SI 1 "register_operand" "")))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
+   && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
+  [(const_int 0)]
+  "
+{
+  int regno = true_regnum (operands[0]);
+  rtx insn;
+  rtx mem = SET_SRC (XVECEXP (PATTERN (curr_insn), 0, 0));
+  rtx mem2
+    = change_address (mem, SFmode, gen_rtx_POST_INC (Pmode, operands[1]));
+  insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode,
+					   regno + !! TARGET_LITTLE_ENDIAN),
+				  mem2, operands[2]));
+  add_reg_note (insn, REG_INC, operands[1]);
+  insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode,
+					       regno + ! TARGET_LITTLE_ENDIAN),
+				  change_address (mem, SFmode, NULL_RTX),
+				  operands[2]));
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[0]);
+  rtx addr, insn;
+  rtx mem2 = change_address (operands[1], SFmode, NULL_RTX);
+  rtx reg0 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  rtx reg1 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 0 : 1));
+
+  operands[1] = copy_rtx (mem2);
+  addr = XEXP (mem2, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* This is complicated.  If the register is an arithmetic register
+         we can just fall through to the REG+DISP case below.  Otherwise
+	 we have to use a combination of POST_INC and REG addressing...  */
+      if (! arith_reg_operand (operands[1], SFmode))
+        {
+          XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
+          insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2]));
+          add_reg_note (insn, REG_INC, XEXP (addr, 0));
+	  
+	  emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+
+          /* If we have modified the stack pointer, the value that we have
+  	     read with post-increment might be modified by an interrupt,
+	     so write it back.  */
+          if (REGNO (XEXP (addr, 0)) == STACK_POINTER_REGNUM)
+	    emit_insn (gen_push_e (reg0));
+          else
+	    emit_insn (gen_addsi3 (XEXP (operands[1], 0), XEXP (operands[1], 0), GEN_INT (-4)));
+	  break;
+        }
+      /* Fall through.  */
+	 
+    case PLUS:
+      emit_insn (gen_movsf_ie (reg0, operands[1], operands[2]));
+      operands[1] = copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = plus_constant (addr, 4);
+      emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+      break;
+      
+    case POST_INC:
+      insn = emit_insn (gen_movsf_ie (reg0, operands[1], operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+    
+      insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+      break;
+
+    default:
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[1]);
+  rtx insn, addr;
+  rtx reg0 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+  rtx reg1 = gen_rtx_REG (SFmode, regno + (TARGET_LITTLE_ENDIAN ? 0 : 1));
+
+  operands[0] = copy_rtx (operands[0]);
+  PUT_MODE (operands[0], SFmode);
+  addr = XEXP (operands[0], 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* This is complicated.  If the register is an arithmetic register
+         we can just fall through to the REG+DISP case below.  Otherwise
+	 we have to use a combination of REG and PRE_DEC addressing...  */
+      if (! arith_reg_operand (operands[0], SFmode))
+        {
+	  emit_insn (gen_addsi3 (addr, addr, GEN_INT (4)));
+          emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+
+	  operands[0] = copy_rtx (operands[0]);
+          XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr);
+	  
+          insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+          add_reg_note (insn, REG_INC, XEXP (addr, 0));
+	  break;
+        }
+      /* Fall through.  */
+      
+    case PLUS:
+      /* Since REG+DISP addressing has already been decided upon by gcc
+         we can rely upon it having chosen an arithmetic register as the
+	 register component of the address.  Just emit the lower numbered
+	 register first, to the lower address, then the higher numbered
+	 register to the higher address.  */
+      emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+
+      operands[0] = copy_rtx (operands[0]);
+      XEXP (operands[0], 0) = plus_constant (addr, 4);
+
+      emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));	 
+      break;
+      
+    case PRE_DEC:
+      /* This is easy.  Output the word to go to the higher address
+         first (ie the word in the higher numbered register) then the
+	 word to go to the lower address.  */
+
+      insn = emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+
+      insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+      break;
+      
+    default:
+      /* FAIL; */
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+
+  DONE;
+})
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  int regno;
+
+  if ((MEM_P (operands[0])
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      regno = REGNO (operands[0]);
+      break;
+    case SUBREG:
+      regno = subreg_regno (operands[0]);
+      break;
+    case MEM:
+      regno = -1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 0, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 1, 0, DFmode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 1, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 0, 0, DFmode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+}")
+
+;; If a base address generated by LEGITIMIZE_ADDRESS for SImode is
+;; used only once, let combine add in the index again.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "" ""))
+   (clobber (match_operand 2 "register_operand" ""))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed
+   && ALLOW_INDEXED_ADDRESS"
+  [(use (reg:SI R0_REG))]
+  "
+{
+  rtx addr, reg, const_int;
+
+  if (!MEM_P (operands[1]))
+    FAIL;
+  addr = XEXP (operands[1], 0);
+  if (GET_CODE (addr) != PLUS)
+    FAIL;
+  reg = XEXP (addr, 0);
+  const_int = XEXP (addr, 1);
+  if (! (BASE_REGISTER_RTX_P (reg) && INDEX_REGISTER_RTX_P (operands[2])
+	 && CONST_INT_P (const_int)))
+    FAIL;
+  emit_move_insn (operands[2], const_int);
+  emit_move_insn (operands[0],
+		  change_address (operands[1], VOIDmode,
+				  gen_rtx_PLUS (SImode, reg, operands[2])));
+  DONE;
+}")
+
+(define_split
+  [(set (match_operand:SI 1 "" "")
+	(match_operand:SI 0 "register_operand" ""))
+   (clobber (match_operand 2 "register_operand" ""))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed
+   && ALLOW_INDEXED_ADDRESS"
+  [(use (reg:SI R0_REG))]
+  "
+{
+  rtx addr, reg, const_int;
+
+  if (!MEM_P (operands[1]))
+    FAIL;
+  addr = XEXP (operands[1], 0);
+  if (GET_CODE (addr) != PLUS)
+    FAIL;
+  reg = XEXP (addr, 0);
+  const_int = XEXP (addr, 1);
+  if (! (BASE_REGISTER_RTX_P (reg) && INDEX_REGISTER_RTX_P (operands[2])
+	 && CONST_INT_P (const_int)))
+    FAIL;
+  emit_move_insn (operands[2], const_int);
+  emit_move_insn (change_address (operands[1], VOIDmode,
+				  gen_rtx_PLUS (SImode, reg, operands[2])),
+		  operands[0]);
+  DONE;
+}")
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  ""
+  "
+{
+  if (prepare_move_operands (operands, DFmode)) DONE;
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA_FPU)
+	emit_insn (gen_movdf_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_movdf_media_nofpu (operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+;;This is incompatible with the way gcc uses subregs.
+;;(define_insn "movv2sf_i"
+;;  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,m")
+;;	(match_operand:V2SF 1 "nonimmediate_operand" "f,m,f"))]
+;;  "TARGET_SHMEDIA_FPU
+;;   && (fp_arith_reg_operand (operands[0], V2SFmode)
+;;       || fp_arith_reg_operand (operands[1], V2SFmode))"
+;;  "@
+;;	#
+;;	fld%M1.p	%m1, %0
+;;	fst%M0.p	%m0, %1"
+;;  [(set_attr "type" "*,fload_media,fstore_media")])
+
+(define_insn_and_split "movv2sf_i"
+  [(set (match_operand:V2SF 0 "general_movdst_operand" "=f,rf,r,m,mf")
+	(match_operand:V2SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "TARGET_SHMEDIA_FPU && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  operands[0] = simplify_gen_subreg (DFmode, operands[0], V2SFmode, 0);
+  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2SFmode, 0);
+}")
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_movdst_operand" "")
+	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  if (prepare_move_operands (operands, V2SFmode))
+    DONE;
+}")
+
+(define_expand "addv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  sh_expand_binop_v2sf (PLUS, operands[0], operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "subv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  sh_expand_binop_v2sf (MINUS, operands[0], operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "mulv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  sh_expand_binop_v2sf (MULT, operands[0], operands[1], operands[2]);
+  DONE;
+}")
+
+(define_expand "divv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  sh_expand_binop_v2sf (DIV, operands[0], operands[1], operands[2]);
+  DONE;
+}")
+
+(define_insn_and_split "*movv4sf_i"
+  [(set (match_operand:V4SF 0 "general_movdst_operand" "=f,rf,r,m,mf")
+	(match_operand:V4SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+{
+  int i;
+
+  for (i = 0; i < 4/2; i++)
+    {
+      rtx x, y;
+
+      if (MEM_P (operands[0]))
+	x = adjust_address (operands[0], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	x = simplify_gen_subreg (V2SFmode, operands[0], V4SFmode, i * 8);
+
+      if (MEM_P (operands[1]))
+	y = adjust_address (operands[1], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	y = simplify_gen_subreg (V2SFmode, operands[1], V4SFmode, i * 8);
+
+      emit_insn (gen_movv2sf_i (x, y));
+    }
+
+  DONE;
+}"
+  [(set_attr "length" "8")])
+
+(define_expand "movv4sf"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(match_operand:V4SF 1 "general_operand" ""))]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  if (prepare_move_operands (operands, V4SFmode))
+    DONE;
+}")
+
+(define_insn_and_split "*movv16sf_i"
+  [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "
+{
+  int i;
+
+  for (i = 0; i < 16/2; i++)
+    {
+      rtx x,y;
+
+      if (MEM_P (operands[0]))
+	x = adjust_address (operands[0], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	{
+	  x = gen_rtx_SUBREG (V2SFmode, operands[0], i * 8);
+	  alter_subreg (&x);
+	}
+
+      if (MEM_P (operands[1]))
+	y = adjust_address (operands[1], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	{
+	  y = gen_rtx_SUBREG (V2SFmode, operands[1], i * 8);
+	  alter_subreg (&y);
+	}
+
+      emit_insn (gen_movv2sf_i (x, y));
+    }
+
+  DONE;
+}"
+  [(set_attr "length" "32")])
+
+(define_expand "movv16sf"
+  [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))]
+  "TARGET_SHMEDIA_FPU"
+  "
+{
+  if (prepare_move_operands (operands, V16SFmode))
+    DONE;
+}")
+
+(define_insn "movsf_media"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m")
+	(match_operand:SF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], SFmode)
+       || sh_register_operand (operands[1], SFmode))"
+  "@
+	fmov.s	%1, %0
+	fmov.ls	%N1, %0
+	fmov.sl	%1, %0
+	add.l	%1, r63, %0
+	#
+	fld%M1.s	%m1, %0
+	fst%M0.s	%m0, %1
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type" "fmove_media,fload_media,fpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "movsf_media_nofpu"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:SF 1 "general_movsrc_operand" "r,F,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], SFmode)
+       || sh_register_operand (operands[1], SFmode))"
+  "@
+	add.l	%1, r63, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type" "arith_media,*,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SF 0 "arith_reg_dest" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ! FP_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 2))]
+  "
+{
+  long values;
+  REAL_VALUE_TYPE value;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (value, values);
+  operands[2] = GEN_INT (values);
+
+  operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+}")
+
+(define_insn "movsf_i"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
+	(match_operand:SF 1 "general_movsrc_operand"  "r,G,FQ,mr,r,r,l"))]
+  "TARGET_SH1
+   && (! TARGET_SH2E
+       /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */
+       || (REG_P (operands[0]) && REGNO (operands[0]) == 3)
+       || (REG_P (operands[1]) && REGNO (operands[1]) == 3))
+   && (arith_reg_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode))"
+  "@
+	mov	%1,%0
+	mov	#0,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	lds	%1,%0
+	sts	%1,%0"
+  [(set_attr "type" "move,move,pcload,load,store,move,move")])
+
+;; We may not split the ry/yr/XX alternatives to movsi_ie, since
+;; update_flow_info would not know where to put REG_EQUAL notes
+;; when the destination changes mode.
+(define_insn "movsf_ie"
+  [(set (match_operand:SF 0 "general_movdst_operand"
+	 "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y")
+	(match_operand:SF 1 "general_movsrc_operand"
+	  "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y"))
+   (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3 "=X,X,Bsc,Bsc,&z,X,X,X,X,X,X,X,X,y,X,X,X,X,X"))]
+
+  "TARGET_SH2E
+   && (arith_reg_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode)
+       || arith_reg_operand (operands[3], SImode)
+       || (fpul_operand (operands[0], SFmode)
+	   && memory_operand (operands[1], SFmode)
+	   && GET_CODE (XEXP (operands[1], 0)) == POST_INC)
+       || (fpul_operand (operands[1], SFmode)
+	   && memory_operand (operands[0], SFmode)
+	   && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC))"
+  "@
+	fmov	%1,%0
+	mov	%1,%0
+	fldi0	%0
+	fldi1	%0
+	#
+	fmov.s	%1,%0
+	fmov.s	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	lds.l	%1,%0
+	#
+	sts	%1,%0
+	lds	%1,%0
+	sts.l	%1,%0
+	lds.l	%1,%0
+	! move optimized away"
+  [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil")
+   (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(ne (symbol_ref "TARGET_SH2A") (const_int 0))
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 0)])
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+					   (const_string "single")
+					   (const_string "single")))])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (reg:SI FPUL_REG))]
+  "TARGET_SH1"
+  [(parallel [(set (reg:SF FPUL_REG) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_dup 0) (reg:SF FPUL_REG))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_movdst_operand" "")
+        (match_operand:SF 1 "general_movsrc_operand" ""))]
+  ""
+  "
+{
+  if (prepare_move_operands (operands, SFmode))
+    DONE;
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA_FPU)
+	emit_insn (gen_movsf_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_movsf_media_nofpu (operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SH2E)
+    {
+      emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "mov_nop"
+  [(set (match_operand 0 "any_register_operand" "") (match_dup 0))]
+  "TARGET_SH2E"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "nil")])
+
+(define_expand "reload_insf__frn"
+  [(parallel [(set (match_operand:SF 0 "register_operand" "=a")
+		   (match_operand:SF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "reload_insi__i_fpul"
+  [(parallel [(set (match_operand:SI 0 "fpul_operand" "=y")
+		   (match_operand:SI 1 "immediate_operand" "i"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "ptabs"
+  [(set (match_operand 0 "" "=b") (match_operand 1 "" "r"))]
+  "TARGET_SHMEDIA"
+  "
+{
+  if (!TARGET_PT_FIXED)
+    {
+      rtx eq = operands[1];
+
+      /* ??? For canonical RTL we really should remove any CONST from EQ
+	 before wrapping it in the AND, and finally wrap the EQ into a
+	 const if is constant.  However, for reload we must expose the
+	 input register or symbolic constant, and we can't have
+	 different insn structures outside of the operands for different
+	 alternatives of the same pattern.  */
+      eq = gen_rtx_EQ (SImode, gen_rtx_AND (Pmode, eq, GEN_INT (3)),
+		       GEN_INT (3));
+      operands[1]
+	= (gen_rtx_IF_THEN_ELSE
+	    (PDImode,
+	     eq,
+	     gen_rtx_MEM (PDImode, operands[1]),
+	     gen_rtx_fmt_e (TARGET_SHMEDIA32 ? SIGN_EXTEND : TRUNCATE,
+			    PDImode, operands[1])));
+    }
+}")
+
+;; expanded by ptabs expander.
+(define_insn "*extendsipdi_media"
+  [(set (match_operand:PDI 0 "target_reg_operand" "=b,b");
+	(if_then_else:PDI (eq (and:SI (match_operand:SI 1 "target_operand"
+							  "r,Csy")
+				      (const_int 3))
+			      (const_int 3))
+			  (mem:PDI (match_dup 1))
+			  (sign_extend:PDI (match_dup 1))))]
+  "TARGET_SHMEDIA && !TARGET_PT_FIXED"
+  "@
+	ptabs	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "ptabs_media,pt_media")
+   (set_attr "length" "4,*")])
+
+(define_insn "*truncdipdi_media"
+  [(set (match_operand:PDI 0 "target_reg_operand" "=b,b");
+	(if_then_else:PDI (eq (and:DI (match_operand:DI 1 "target_operand"
+							  "r,Csy")
+				      (const_int 3))
+			      (const_int 3))
+			  (mem:PDI (match_dup 1))
+			  (truncate:PDI (match_dup 1))))]
+  "TARGET_SHMEDIA && !TARGET_PT_FIXED"
+  "@
+	ptabs	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "ptabs_media,pt_media")
+   (set_attr "length" "4,*")])
+
+(define_insn "*movsi_y"
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
+	(match_operand:SI 1 "immediate_operand" "Qi,I08"))
+   (clobber (match_scratch:SI 2 "=&z,r"))]
+  "TARGET_SH2E
+   && (reload_in_progress || reload_completed)"
+  "#"
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "TARGET_SH1"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "memory_operand" ""))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_SH1"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+(define_insn "branch_true"
+  [(set (pc) (if_then_else (ne (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  "TARGET_SH1"
+  "* return output_branch (1, insn, operands);"
+  [(set_attr "type" "cbranch")])
+
+(define_insn "branch_false"
+  [(set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_operand 0 "" ""))
+			   (pc)))]
+  "TARGET_SH1"
+  "* return output_branch (0, insn, operands);"
+  [(set_attr "type" "cbranch")])
+
+;; Patterns to prevent reorg from re-combining a condbranch with a branch
+;; which destination is too far away.
+;; The const_int_operand is distinct for each branch target; it avoids
+;; unwanted matches with redundant_insn.
+(define_insn "block_branch_redirect"
+  [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")])
+
+;; This one has the additional purpose to record a possible scratch register
+;; for the following branch.
+;; ??? Unfortunately, just setting the scratch register is not good enough,
+;; because the insn then might be deemed dead and deleted.  And we can't
+;; make the use in the jump insn explicit because that would disable
+;; delay slot scheduling from the target.
+(define_insn "indirect_jump_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR))
+   (set (pc) (unspec [(const_int 0)] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")])
+
+;; This one is used to preemt an insn from beyond the bra / braf / jmp
+;; being pulled into the delay slot of a condbranch that has been made to
+;; jump around the unconditional jump because it was out of range.
+(define_insn "stuff_delay_slot"
+  [(set (pc)
+	(unspec [(match_operand:SI 0 "const_int_operand" "") (pc)
+		 (match_operand:SI 1 "const_int_operand" "")] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "cond_delay_slot" "yes")])
+
+;; Conditional branch insns
+
+(define_expand "cbranchint4_media"
+  [(set (pc)
+	(if_then_else (match_operator 0 "shmedia_cbranch_comparison_operator"
+		       [(match_operand 1 "" "")
+			(match_operand 2 "" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "
+{
+  enum machine_mode mode = GET_MODE (operands[1]);
+  if (mode == VOIDmode)
+    mode = GET_MODE (operands[2]);
+  if (GET_CODE (operands[0]) == EQ || GET_CODE (operands[0]) == NE)
+    {
+      operands[1] = force_reg (mode, operands[1]);
+      if (CONSTANT_P (operands[2])
+          && (! satisfies_constraint_I06 (operands[2])))
+        operands[2] = force_reg (mode, operands[2]);
+    }
+  else
+    {
+      if (operands[1] != const0_rtx)
+        operands[1] = force_reg (mode, operands[1]);
+      if (operands[2] != const0_rtx)
+        operands[2] = force_reg (mode, operands[2]);
+    }
+  switch (GET_CODE (operands[0]))
+    {
+    case LEU:
+    case LE:
+    case LTU:
+    case LT:
+      operands[0] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[0])),
+				    VOIDmode, operands[2], operands[1]);
+      operands[1] = XEXP (operands[0], 0);
+      operands[2] = XEXP (operands[0], 1);
+      break;
+    default:
+      operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]),
+				    VOIDmode, operands[1], operands[2]);
+      break;
+    }
+  operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
+}")
+
+(define_expand "cbranchfp4_media"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand 1 "" "")
+			(match_operand 2 "" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx cmp;
+  if (GET_CODE (operands[0]) == NE)
+    cmp = gen_rtx_EQ (SImode, operands[1], operands[2]);
+  else
+    cmp = gen_rtx_fmt_ee (GET_CODE (operands[0]), SImode,
+			  operands[1], operands[2]);
+
+  emit_insn (gen_cstore4_media (tmp, cmp, operands[1], operands[2]));
+
+  if (GET_CODE (cmp) == GET_CODE (operands[0]))
+    operands[0] = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+  else
+    operands[0] = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  operands[1] = tmp;
+  operands[2] = const0_rtx;
+  operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
+}")
+
+(define_insn "*beq_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_operand" "r,r")
+			 (match_operand:DI 2 "arith_operand" "r,I06")])
+		      (match_operand 0 "target_operand" "b,b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "@
+	b%o3%'	%1, %2, %0%>
+	b%o3i%'	%1, %2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*beq_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_operand" "r,r")
+			 (match_operand:SI 2 "arith_operand" "r,I06")])
+		      (match_operand 0 "target_operand" "b,b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "@
+	b%o3%'	%1, %2, %0%>
+	b%o3i%'	%1, %2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*bgt_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "greater_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N1, %N2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*bgt_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "greater_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N1, %N2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+;; These are only needed to make invert_jump() happy - otherwise, jump
+;; optimization will be silently disabled.
+(define_insn "*blt_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "less_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N2, %N1, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*blt_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "less_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N2, %N1, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+;; combiner splitter for test-and-branch on single bit in register.  This
+;; is endian dependent because the non-paradoxical subreg looks different
+;; on big endian.
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "equality_comparison_operator"
+	    [(subreg:SI (zero_extract:DI (subreg:DI (match_operand:SI 1
+						      "extend_reg_operand" "")
+						    0)
+					 (const_int 1)
+					 (match_operand 2
+					  "const_int_operand" "")) 0)
+	     (const_int 0)])
+	  (match_operand 0 "target_operand" "")
+	  (pc)))
+   (clobber (match_operand:SI 4 "arith_reg_dest" ""))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 5)))
+   (set (pc) (if_then_else (match_dup 6) (match_dup 0) (pc)))]
+
+  "
+{
+  operands[5] = GEN_INT (31 - INTVAL (operands[2]));
+  operands[6] = (GET_CODE (operands[3]) == EQ
+		 ? gen_rtx_GE (VOIDmode, operands[4], const0_rtx)
+		 : gen_rtx_GT (VOIDmode, const0_rtx, operands[4]));
+}")
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the label to jump to at the top of the loop
+
+(define_expand "doloop_end"
+  [(parallel [(set (pc) (if_then_else
+			  (ne:SI (match_operand:SI 0 "" "")
+			      (const_int 1))
+			  (label_ref (match_operand 4 "" ""))
+			  (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0) (const_int -1)))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH2"
+  "
+{
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+}
+")
+
+(define_insn_and_split "doloop_end_split"
+  [(set (pc)
+	(if_then_else (ne:SI  (match_operand:SI 2 "arith_reg_dest" "0")
+			  (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus (match_dup 2) (const_int -1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2"
+  "#"
+  ""
+  [(parallel [(set (reg:SI T_REG)
+		   (eq:SI (match_dup 2) (const_int 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))])
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+""
+   [(set_attr "type" "cbranch")])
+
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump_compact"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+  "*
+{
+  /* The length is 16 if the delay slot is unfilled.  */
+  if (get_attr_length(insn) > 4)
+    return output_far_jump(insn, operands[0]);
+  else
+    return   \"bra	%l0%#\";
+}"
+  [(set_attr "type" "jump")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; ??? It would be much saner to explicitly use the scratch register
+;; in the jump insn, and have indirect_jump_scratch only set it,
+;; but fill_simple_delay_slots would refuse to do delay slot filling
+;; from the target then, as it uses simplejump_p.
+;;(define_insn "jump_compact_far"
+;;  [(set (pc)
+;;	(label_ref (match_operand 0 "" "")))
+;;   (use (match_operand 1 "register_operand" "r")]
+;;  "TARGET_SH1"
+;;  "* return output_far_jump(insn, operands[0], operands[1]);"
+;;  [(set_attr "type" "jump")
+;;   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "jump_media"
+  [(set (pc)
+	(match_operand 0 "target_operand" "b"))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63%>"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "
+{
+  if (TARGET_SH1)
+    emit_jump_insn (gen_jump_compact (operands[0]));
+  else if (TARGET_SHMEDIA)
+    {
+      if (reload_in_progress || reload_completed)
+	FAIL;
+      emit_jump_insn (gen_jump_media (gen_rtx_LABEL_REF (Pmode,
+							 operands[0])));
+    }
+  DONE;
+}")
+
+(define_insn "force_mode_for_call"
+  [(use (reg:PSI FPSCR_REG))]
+  "TARGET_SHCOMPACT"
+  ""
+  [(set_attr "length" "0")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))])
+
+(define_insn "calli"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+  "*
+   {
+     if (TARGET_SH2A && (dbr_sequence_length () == 0))
+	return \"jsr/n\\t@%0\";
+     else
+	return \"jsr\\t@%0%#\";
+   }"
+
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+;; This is TBR relative jump instruction for SH2A architecture.
+;; Its use is enabled assigning an attribute "function_vector"
+;; and the vector number to a function during its declaration.
+
+(define_insn "calli_tbr_rel"
+  [(call (mem (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2A && sh2a_is_function_vector_call (operands[0])"
+  "*
+{
+  unsigned HOST_WIDE_INT vect_num;
+  vect_num = sh2a_get_function_vector_number (operands[0]);
+  operands[2] = GEN_INT (vect_num * 4);
+
+  return \"jsr/n\\t@@(%O2,tbr)\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "no")
+   (set_attr "fp_set" "unknown")])
+
+;; This is a pc-rel call, using bsrf, for use with PIC.
+
+(define_insn "calli_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2"
+  "bsrf	%0\\n%O2:%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn_and_split "call_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (match_scratch:SI 2 "=r"))]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  if (SYMBOL_REF_LOCAL_P (operands[0]))
+    emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  else
+    emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab));
+  emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab)));
+  DONE;
+}"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_compact"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_compact_rettramp"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_media"
+  [(call (mem:DI (match_operand 0 "target_reg_operand" "b"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI PR_MEDIA_REG))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r18"
+  [(set_attr "type" "jump_media")])
+
+(define_insn "call_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+  "*
+   {
+     if (TARGET_SH2A && (dbr_sequence_length () == 0))
+	return \"jsr/n\\t@%1\";
+     else
+	return \"jsr\\t@%1%#\";
+   }"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+;; This is TBR relative jump instruction for SH2A architecture.
+;; Its use is enabled assigning an attribute "function_vector"
+;; and the vector number to a function during its declaration.
+
+(define_insn "call_valuei_tbr_rel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2A && sh2a_is_function_vector_call (operands[1])"
+  "*
+{
+  unsigned HOST_WIDE_INT vect_num;
+  vect_num = sh2a_get_function_vector_number (operands[1]);
+  operands[3] = GEN_INT (vect_num * 4);
+
+  return \"jsr/n\\t@@(%O3,tbr)\";
+}"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "no")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_valuei_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2"
+  "bsrf	%1\\n%O3:%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn_and_split "call_value_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  if (SYMBOL_REF_LOCAL_P (operands[1]))
+    emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  else
+    emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab));
+  emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3],
+					 operands[2], copy_rtx (lab)));
+  DONE;
+}"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_value_compact"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_compact_rettramp"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_media"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:DI (match_operand 1 "target_reg_operand" "b"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI PR_MEDIA_REG))]
+  "TARGET_SHMEDIA"
+  "blink	%1, r18"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "call"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+			    (match_operand 1 "" ""))
+	      (match_operand 2 "" "")
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (reg:SI PR_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[0] = shmedia_prepare_call_address (operands[0], 0);
+      emit_call_insn (gen_call_media (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[2] && INTVAL (operands[2]))
+    {
+      rtx cookie_rtx = operands[2];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[0], 0);
+      rtx r0, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOTPLT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      r0 = gen_rtx_REG (SImode, R0_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[0]
+	= function_symbol (NULL, \"__GCC_shcompact_call_trampoline\",
+			   SFUNC_GOT);
+      operands[0] = force_reg (SImode, operands[0]);
+
+      emit_move_insn (r0, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	emit_call_insn (gen_call_compact_rettramp (operands[0], operands[1],
+						   operands[2]));
+      else
+	emit_call_insn (gen_call_compact (operands[0], operands[1],
+					  operands[2]));
+
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0)));
+      XEXP (operands[0], 0) = reg;
+    }
+  if (!flag_pic && TARGET_SH2A
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      if (sh2a_is_function_vector_call (XEXP (operands[0], 0)))
+	{
+	  emit_call_insn (gen_calli_tbr_rel (XEXP (operands[0], 0),
+					     operands[1]));
+	  DONE;
+	}
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      emit_call_insn (gen_call_pcrel (XEXP (operands[0], 0), operands[1]));
+      DONE;
+    }
+  else
+  {
+    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+    operands[1] = operands[2];
+  }
+
+  emit_call_insn (gen_calli (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn "call_pop_compact"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 3 "immediate_operand" "n")))
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_pop_compact_rettramp"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 3 "immediate_operand" "n")))
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call_pop"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+		    (match_operand 1 "" ""))
+	     (match_operand 2 "" "")
+	     (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+					   (match_operand 3 "" "")))])]
+  "TARGET_SHCOMPACT"
+  "
+{
+  rtx cookie_rtx;
+  long cookie;
+  rtx func;
+  rtx r0, r1;
+
+  gcc_assert (operands[2] && INTVAL (operands[2]));
+  cookie_rtx = operands[2];
+  cookie = INTVAL (cookie_rtx);
+  func = XEXP (operands[0], 0);
+
+  if (flag_pic)
+    {
+      if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+        {
+	  rtx reg = gen_reg_rtx (Pmode);
+	  emit_insn (gen_symGOTPLT2reg (reg, func));
+	  func = reg;
+	}
+      else
+        func = legitimize_pic_address (func, Pmode, 0);
+    }
+
+  r0 = gen_rtx_REG (SImode, R0_REG);
+  r1 = gen_rtx_REG (SImode, R1_REG);
+
+  /* Since such a call function may use all call-clobbered
+     registers, we force a mode switch earlier, so that we don't
+     run out of registers when adjusting fpscr for the call.  */
+  emit_insn (gen_force_mode_for_call ());
+
+  operands[0] = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\",
+				 SFUNC_GOT);
+  operands[0] = force_reg (SImode, operands[0]);
+
+  emit_move_insn (r0, func);
+  emit_move_insn (r1, cookie_rtx);
+
+  if (cookie & CALL_COOKIE_RET_TRAMP (1))
+    emit_call_insn (gen_call_pop_compact_rettramp
+	   	     (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_call_insn (gen_call_pop_compact
+	  	     (operands[0], operands[1], operands[2], operands[3]));
+
+  DONE;
+}")
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (match_operand 3 "" "")
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (reg:SI PR_REG))])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[1] = shmedia_prepare_call_address (operands[1], 0);
+      emit_call_insn (gen_call_value_media (operands[0], operands[1],
+					    operands[2]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3]))
+    {
+      rtx cookie_rtx = operands[3];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[1], 0);
+      rtx r0, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOTPLT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      r0 = gen_rtx_REG (SImode, R0_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[1]
+	= function_symbol (NULL, \"__GCC_shcompact_call_trampoline\",
+			   SFUNC_GOT);
+      operands[1] = force_reg (SImode, operands[1]);
+
+      emit_move_insn (r0, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	emit_call_insn (gen_call_value_compact_rettramp (operands[0],
+							 operands[1],
+							 operands[2],
+							 operands[3]));
+      else
+	emit_call_insn (gen_call_value_compact (operands[0], operands[1],
+						operands[2], operands[3]));
+
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0)));
+      XEXP (operands[1], 0) = reg;
+    }
+  if (!flag_pic && TARGET_SH2A
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      if (sh2a_is_function_vector_call (XEXP (operands[1], 0)))
+	{
+	  emit_call_insn (gen_call_valuei_tbr_rel (operands[0],
+				 XEXP (operands[1], 0), operands[2]));
+	  DONE;
+	}
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      emit_call_insn (gen_call_value_pcrel (operands[0], XEXP (operands[1], 0),
+					    operands[2]));
+      DONE;
+    }
+  else
+    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+
+  emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "sibcalli"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH2"
+  "braf	%0\\n%O2:%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+;; This uses an unspec to describe that the symbol_ref is very close.
+(define_insn "sibcalli_thunk"
+  [(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")]
+			     UNSPEC_THUNK))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "bra	%O0"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump")
+   (set_attr "length" "2")])
+
+(define_insn_and_split "sibcall_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (match_scratch:SI 2 "=k"))
+   (return)]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1],
+						  copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_compact"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
+	 (match_operand 1 "" ""))
+   (return)
+   (use (match_operand:SI 2 "register_operand" "z,x"))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   ;; We want to make sure the `x' above will only match MACH_REG
+   ;; because sibcall_epilogue may clobber MACL_REG.
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SHCOMPACT"
+  "@
+	jmp	@%0%#
+	jmp	@%0\\n	sts	%2, r0"
+  [(set_attr "needs_delay_slot" "yes,no")
+   (set_attr "length" "2,4")
+   (set (attr "fp_mode") (const_string "single"))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_media"
+  [(call (mem:DI (match_operand 0 "target_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI PR_MEDIA_REG))
+   (return)]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "sibcall"
+  [(parallel
+    [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+	   (match_operand 1 "" ""))
+     (match_operand 2 "" "")
+     (use (reg:PSI FPSCR_REG))
+     (return)])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[0] = shmedia_prepare_call_address (operands[0], 1);
+      emit_call_insn (gen_sibcall_media (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[2]
+	   && (INTVAL (operands[2]) & ~ CALL_COOKIE_RET_TRAMP (1)))
+    {
+      rtx cookie_rtx = operands[2];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[0], 0);
+      rtx mach, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      /* FIXME: if we could tell whether all argument registers are
+	 already taken, we could decide whether to force the use of
+	 MACH_REG or to stick to R0_REG.  Unfortunately, there's no
+	 simple way to tell.  We could use the CALL_COOKIE, but we
+	 can't currently tell a register used for regular argument
+	 passing from one that is unused.  If we leave it up to reload
+	 to decide which register to use, it seems to always choose
+	 R0_REG, which leaves no available registers in SIBCALL_REGS
+	 to hold the address of the trampoline.  */
+      mach = gen_rtx_REG (SImode, MACH_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[0]
+	= function_symbol (NULL, \"__GCC_shcompact_call_trampoline\",
+			   SFUNC_GOT);
+      operands[0] = force_reg (SImode, operands[0]);
+
+      /* We don't need a return trampoline, since the callee will
+	 return directly to the upper caller.  */
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	{
+	  cookie &= ~ CALL_COOKIE_RET_TRAMP (1);
+	  cookie_rtx = GEN_INT (cookie);
+	}
+
+      emit_move_insn (mach, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      emit_call_insn (gen_sibcall_compact (operands[0], operands[1], mach));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0)));
+      XEXP (operands[0], 0) = reg;
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+      /* The PLT needs the PIC register, but the epilogue would have
+	 to restore it, so we can only use PC-relative PIC calls for
+	 static functions.  */
+      && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+      DONE;
+    }
+  else
+    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+
+  emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+  DONE;
+}")
+
+(define_insn "sibcall_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "jmp	@%1%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH2"
+  "braf	%1\\n%O3:%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn_and_split "sibcall_value_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (match_scratch:SI 3 "=k"))
+   (return)]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0],
+							operands[3],
+							operands[2],
+							copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_value_compact"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k,k"))
+	      (match_operand 2 "" "")))
+   (return)
+   (use (match_operand:SI 3 "register_operand" "z,x"))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   ;; We want to make sure the `x' above will only match MACH_REG
+   ;; because sibcall_epilogue may clobber MACL_REG.
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SHCOMPACT"
+  "@
+	jmp	@%1%#
+	jmp	@%1\\n	sts	%3, r0"
+  [(set_attr "needs_delay_slot" "yes,no")
+   (set_attr "length" "2,4")
+   (set (attr "fp_mode") (const_string "single"))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_value_media"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:DI (match_operand 1 "target_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI PR_MEDIA_REG))
+   (return)]
+  "TARGET_SHMEDIA"
+  "blink	%1, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "arith_reg_operand" "")
+	  (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+	  	(match_operand 2 "" "")))
+     (match_operand 3 "" "")
+     (use (reg:PSI FPSCR_REG))
+     (return)])]
+  ""
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[1] = shmedia_prepare_call_address (operands[1], 1);
+      emit_call_insn (gen_sibcall_value_media (operands[0], operands[1],
+					       operands[2]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[3]
+	   && (INTVAL (operands[3]) & ~ CALL_COOKIE_RET_TRAMP (1)))
+    {
+      rtx cookie_rtx = operands[3];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[1], 0);
+      rtx mach, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      /* FIXME: if we could tell whether all argument registers are
+	 already taken, we could decide whether to force the use of
+	 MACH_REG or to stick to R0_REG.  Unfortunately, there's no
+	 simple way to tell.  We could use the CALL_COOKIE, but we
+	 can't currently tell a register used for regular argument
+	 passing from one that is unused.  If we leave it up to reload
+	 to decide which register to use, it seems to always choose
+	 R0_REG, which leaves no available registers in SIBCALL_REGS
+	 to hold the address of the trampoline.  */
+      mach = gen_rtx_REG (SImode, MACH_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[1]
+	= function_symbol (NULL, \"__GCC_shcompact_call_trampoline\",
+			   SFUNC_GOT);
+      operands[1] = force_reg (SImode, operands[1]);
+
+      /* We don't need a return trampoline, since the callee will
+	 return directly to the upper caller.  */
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	{
+	  cookie &= ~ CALL_COOKIE_RET_TRAMP (1);
+	  cookie_rtx = GEN_INT (cookie);
+	}
+
+      emit_move_insn (mach, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      emit_call_insn (gen_sibcall_value_compact (operands[0], operands[1],
+						 operands[2], mach));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0)));
+      XEXP (operands[1], 0) = reg;
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+      /* The PLT needs the PIC register, but the epilogue would have
+	 to restore it, so we can only use PC-relative PIC calls for
+	 static functions.  */
+      && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+					       XEXP (operands[1], 0),
+					       operands[2]));
+      DONE;
+    }
+  else
+    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+
+  emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "call_value_pop_compact"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 4 "immediate_operand" "n")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_pop_compact_rettramp"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 4 "immediate_operand" "n")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (match_operand 3 "" "")
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+					    (match_operand 4 "" "")))])]
+  "TARGET_SHCOMPACT"
+  "
+{
+  rtx cookie_rtx;
+  long cookie;
+  rtx func;
+  rtx r0, r1;
+
+  gcc_assert (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3]));
+  cookie_rtx = operands[3];
+  cookie = INTVAL (cookie_rtx);
+  func = XEXP (operands[1], 0);
+
+  if (flag_pic)
+    {
+      if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+        {
+          rtx reg = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOTPLT2reg (reg, func));
+          func = reg;
+        }
+      else
+        func = legitimize_pic_address (func, Pmode, 0);
+    }
+
+  r0 = gen_rtx_REG (SImode, R0_REG);
+  r1 = gen_rtx_REG (SImode, R1_REG);
+
+  /* Since such a call function may use all call-clobbered
+     registers, we force a mode switch earlier, so that we don't
+     run out of registers when adjusting fpscr for the call.  */
+  emit_insn (gen_force_mode_for_call ());
+
+  operands[1] = function_symbol (NULL, \"__GCC_shcompact_call_trampoline\",
+				 SFUNC_GOT);
+  operands[1] = force_reg (SImode, operands[1]);
+
+  emit_move_insn (r0, func);
+  emit_move_insn (r1, cookie_rtx);
+
+  if (cookie & CALL_COOKIE_RET_TRAMP (1))
+    emit_call_insn (gen_call_value_pop_compact_rettramp
+			(operands[0], operands[1], operands[2],
+			 operands[3], operands[4]));
+  else
+    emit_call_insn (gen_call_value_pop_compact
+			(operands[0], operands[1], operands[2],
+			 operands[3], operands[4]));
+
+  DONE;
+}")
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+  "
+{
+  sh_expand_epilogue (1);
+  if (TARGET_SHCOMPACT)
+    {
+      rtx insn, set;
+
+      /* If epilogue clobbers r0, preserve it in macl.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if ((set = single_set (insn))
+	    && REG_P (SET_DEST (set))
+	    && REGNO (SET_DEST (set)) == R0_REG)
+	  {
+	    rtx r0 = gen_rtx_REG (SImode, R0_REG);
+	    rtx tmp = gen_rtx_REG (SImode, MACL_REG);
+
+	    /* We can't tell at this point whether the sibcall is a
+	       sibcall_compact and, if it is, whether it uses r0 or
+	       mach as operand 2, so let the instructions that
+	       preserve r0 be optimized away if r0 turns out to be
+	       dead.  */
+	    emit_insn_before (gen_rtx_SET (SImode, tmp, r0), insn);
+	    emit_move_insn (r0, tmp);
+	    break;
+	  }
+    }
+  DONE;
+}")
+
+(define_insn "indirect_jump_compact"
+  [(set (pc)
+	(match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "register_operand" ""))]
+  ""
+  "
+{
+  if (GET_MODE (operands[0]) != Pmode)
+    operands[0] = gen_rtx_SUBREG (Pmode, operands[0], 0);
+}")
+
+;; The use of operand 1 / 2 helps us distinguish case table jumps
+;; which can be present in structured code from indirect jumps which can not
+;; be present in structured code.  This allows -fprofile-arcs to work.
+
+;; For SH1 processors.
+(define_insn "casesi_jump_1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_SH1"
+  "jmp  @%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+;; For all later processors.
+(define_insn "casesi_jump_2"
+  [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r")
+		      (label_ref (match_operand 1 "" ""))))
+   (use (label_ref (match_operand 2 "" "")))]
+  "TARGET_SH2
+   && (! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn)"
+  "braf	%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+(define_insn "casesi_jump_media"
+  [(set (pc) (match_operand 0 "target_reg_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+;; Call subroutine returning any type.
+;; ??? This probably doesn't work.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "(TARGET_SH2E || TARGET_SH2A) || TARGET_SHMEDIA"
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "dect"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SI 1 "arith_reg_dest" "0") (const_int 1)))
+   (set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_dup 1) (const_int -1)))]
+  "TARGET_SH2"
+  "dt	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; Load address of a label. This is only generated by the casesi expand,
+;; and by machine_dependent_reorg (fixing up fp moves).
+;; This must use unspec, because this only works for labels that are
+;; within range,
+
+(define_insn "mova"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(label_ref (match_operand 0 "" ""))] UNSPEC_MOVA))]
+  "TARGET_SH1"
+  "mova	%O0,r0"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+;; machine_dependent_reorg will make this a `mova'.
+(define_insn "mova_const"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(match_operand 0 "immediate_operand" "i")] UNSPEC_MOVA))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+(define_expand "GOTaddr2picreg"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))]
+		   UNSPEC_MOVA))
+   (set (match_dup 0) (const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))]
+  "" "
+{
+  if (TARGET_VXWORKS_RTP)
+    {
+      rtx gott_base = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+      rtx gott_index = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      emit_insn (gen_vxworks_picreg (gott_base, gott_index));
+      DONE;
+    }
+
+  operands[0] = gen_rtx_REG (Pmode, PIC_REG);
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+
+  if (TARGET_SHMEDIA)
+    {
+      rtx tr = gen_rtx_REG (Pmode, TR0_REG);
+      rtx pic = operands[0];
+      rtx lab = PATTERN (gen_call_site ());
+      rtx insn, equiv;
+
+      equiv = operands[1];
+      operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], lab),
+				    UNSPEC_PCREL_SYMOFF);
+      operands[1] = gen_rtx_CONST (Pmode, operands[1]);
+
+      if (Pmode == SImode)
+	{
+	  emit_insn (gen_movsi_const (pic, operands[1]));
+	  emit_insn (gen_ptrel_si (tr, pic, copy_rtx (lab)));
+	}
+      else
+	{
+	  emit_insn (gen_movdi_const (pic, operands[1]));
+	  emit_insn (gen_ptrel_di (tr, pic, copy_rtx (lab)));
+	}
+
+      insn = emit_move_insn (operands[0], tr);
+
+      set_unique_reg_note (insn, REG_EQUAL, equiv);
+
+      DONE;
+    }
+}
+")
+
+;; A helper for GOTaddr2picreg to finish up the initialization of the
+;; PIC register.
+
+(define_expand "vxworks_picreg"
+  [(set (reg:SI PIC_REG)
+	(const:SI (unspec:SI [(match_operand:SI 0 "" "")] UNSPEC_PIC)))
+   (set (reg:SI R0_REG)
+	(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PIC)))
+   (set (reg:SI PIC_REG)
+	(mem:SI (reg:SI PIC_REG)))
+   (set (reg:SI PIC_REG)
+	(mem:SI (plus:SI (reg:SI PIC_REG)
+			 (reg:SI R0_REG))))]
+  "TARGET_VXWORKS_RTP")
+
+(define_insn "*ptb"
+  [(set (match_operand 0 "target_reg_operand" "=b")
+	(const (unspec [(match_operand 1 "" "Csy")]
+			     UNSPEC_DATALABEL)))]
+  "TARGET_SHMEDIA && flag_pic
+   && satisfies_constraint_Csy (operands[1])"
+  "ptb/u	datalabel %1, %0"
+  [(set_attr "type" "ptabs_media")
+   (set_attr "length" "*")])
+
+(define_insn "ptrel_si"
+  [(set (match_operand:SI 0 "target_reg_operand" "=b")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+	      (pc)))
+   (match_operand:SI 2 "" "")]
+  "TARGET_SHMEDIA"
+  "%O2: ptrel/u	%1, %0"
+  [(set_attr "type" "ptabs_media")])
+
+(define_insn "ptrel_di"
+  [(set (match_operand:DI 0 "target_reg_operand" "=b")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+	      (pc)))
+   (match_operand:DI 2 "" "")]
+  "TARGET_SHMEDIA"
+  "%O2: ptrel/u	%1, %0"
+  [(set_attr "type" "ptabs_media")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+  "
+{
+  emit_insn (gen_GOTaddr2picreg ());
+  DONE;
+}")
+
+(define_expand "call_site"
+  [(unspec [(match_dup 0)] UNSPEC_CALLER)]
+  "TARGET_SH1"
+  "
+{
+  static HOST_WIDE_INT i = 0;
+  operands[0] = GEN_INT (i);
+  i++;
+}")
+
+;; op0 = op1 + r12 but hide it before reload completed.  See the comment
+;; in symGOT_load expand.
+
+(define_insn_and_split "chk_guard_add"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (reg:SI PIC_REG)]
+		   UNSPEC_CHKADD))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 0) (reg:SI PIC_REG))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  [(set_attr "type" "arith")])
+
+(define_expand "sym_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI (unspec:SI [(match_operand:SI 1 "" "")
+			      (const (plus:SI (match_operand:SI 2 "" "")
+					      (const_int 2)))]
+			     UNSPEC_SYMOFF)))]
+  "TARGET_SH1" "")
+
+(define_expand "symGOT_load"
+  [(set (match_dup 2) (match_operand 1 "" ""))
+   (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG)))
+   (set (match_operand 0 "" "") (mem (match_dup 3)))]
+  ""
+  "
+{
+  rtx mem;
+
+  operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (TARGET_SHMEDIA)
+    {
+      rtx reg = operands[2];
+
+      if (Pmode == DImode)
+	{      
+	  if (flag_pic > 1)
+	    emit_insn (gen_movdi_const_32bit (reg, operands[1]));
+	  else
+	    emit_insn (gen_movdi_const_16bit (reg, operands[1]));
+	}
+      else
+	{
+	  if (flag_pic > 1)
+	    emit_insn (gen_movsi_const (reg, operands[1]));
+	  else
+	    emit_insn (gen_movsi_const_16bit (reg, operands[1]));
+	}
+    }
+  else
+    emit_move_insn (operands[2], operands[1]);
+
+  /* When stack protector inserts codes after the result is set to
+     R0, @(rX, r12) will cause a spill failure for R0.  Use a unspec
+     insn to avoid combining (set A (plus rX r12)) and (set op0 (mem A))
+     when rX is a GOT address for the guard symbol.  Ugly but doesn't
+     matter because this is a rare situation.  */
+  if (!TARGET_SHMEDIA
+      && flag_stack_protect
+      && GET_CODE (operands[1]) == CONST
+      && GET_CODE (XEXP (operands[1], 0)) == UNSPEC
+      && GET_CODE (XVECEXP (XEXP (operands[1], 0), 0, 0)) == SYMBOL_REF
+      && strcmp (XSTR (XVECEXP (XEXP (operands[1], 0), 0, 0), 0),
+		 \"__stack_chk_guard\") == 0)
+    emit_insn (gen_chk_guard_add (operands[3], operands[2]));
+  else
+    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
+					       gen_rtx_REG (Pmode, PIC_REG)));
+
+  /* N.B. This is not constant for a GOTPLT relocation.  */
+  mem = gen_rtx_MEM (Pmode, operands[3]);
+  MEM_NOTRAP_P (mem) = 1;
+  /* ??? Should we have a special alias set for the GOT?  */
+  emit_move_insn (operands[0], mem);
+
+  DONE;
+}")
+
+(define_expand "sym2GOT"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOT))]
+  ""
+  "")
+
+(define_expand "symGOT2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx gotsym, insn;
+
+  gotsym = gen_sym2GOT (operands[1]);
+  PUT_MODE (gotsym, Pmode);
+  insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  DONE;
+}")
+
+(define_expand "symGOTPLT2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx pltsym = gen_rtx_CONST (Pmode,
+			      gen_rtx_UNSPEC (Pmode,
+					      gen_rtvec (1, operands[1]),
+					      UNSPEC_GOTPLT));
+  emit_insn (gen_symGOT_load (operands[0], pltsym));
+  DONE;
+}")
+
+(define_expand "sym2GOTOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTOFF))]
+  ""
+  "")
+
+(define_expand "symGOTOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx gotoffsym, insn;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  gotoffsym = gen_sym2GOTOFF (operands[1]);
+  PUT_MODE (gotoffsym, Pmode);
+  emit_move_insn (t, gotoffsym);
+  insn = emit_move_insn (operands[0],
+			 gen_rtx_PLUS (Pmode, t,
+				       gen_rtx_REG (Pmode, PIC_REG)));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+}")
+
+(define_expand "symPLT_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI
+	 (unspec:SI
+	  [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PLT))
+	   (const:SI (plus:SI (match_operand:SI 2 "" "")
+			      (const_int 2)))] UNSPEC_PCREL_SYMOFF)))
+   ;; Even though the PIC register is not really used by the call
+   ;; sequence in which this is expanded, the PLT code assumes the PIC
+   ;; register is set, so we must not skip its initialization.  Since
+   ;; we only use this expand as part of calling sequences, and never
+   ;; to take the address of a function, this is the best point to
+   ;; insert the (use).  Using the PLT to take the address of a
+   ;; function would be wrong, not only because the PLT entry could
+   ;; then be called from a function that doesn't initialize the PIC
+   ;; register to the proper GOT, but also because pointers to the
+   ;; same function might not compare equal, should they be set by
+   ;; different shared libraries.
+   (use (reg:SI PIC_REG))]
+  "TARGET_SH1"
+  "")
+
+(define_expand "sym2PIC"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PIC))]
+  ""
+  "")
+
+;; TLS code generation.
+;; ??? this should be a define_insn_and_split
+;; See the thread [PATCH/RFA] SH TLS support on gcc-patches
+;; <http://gcc.gnu.org/ml/gcc-patches/2003-02/msg01898.html>
+;; for details.
+
+(define_insn "tls_global_dynamic"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+				  UNSPEC_TLSGD))
+	      (const_int 0)))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (scratch:SI))]
+  "TARGET_SH1"
+  "*
+{
+  return \"\\
+mov.l\\t1f,r4\\n\\
+\\tmova\\t2f,r0\\n\\
+\\tmov.l\\t2f,r1\\n\\
+\\tadd\\tr0,r1\\n\\
+\\tjsr\\t@r1\\n\\
+\\tadd\\tr12,r4\\n\\
+\\tbra\\t3f\\n\\
+\\tnop\\n\\
+\\t.align\\t2\\n\\
+1:\\t.long\\t%a1@TLSGD\\n\\
+2:\\t.long\\t__tls_get_addr@PLT\\n\\
+3:\";
+}"
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "26")])
+
+(define_insn "tls_local_dynamic"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+				  UNSPEC_TLSLDM))
+	      (const_int 0)))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (scratch:SI))]
+  "TARGET_SH1"
+  "*
+{
+  return \"\\
+mov.l\\t1f,r4\\n\\
+\\tmova\\t2f,r0\\n\\
+\\tmov.l\\t2f,r1\\n\\
+\\tadd\\tr0,r1\\n\\
+\\tjsr\\t@r1\\n\\
+\\tadd\\tr12,r4\\n\\
+\\tbra\\t3f\\n\\
+\\tnop\\n\\
+\\t.align\\t2\\n\\
+1:\\t.long\\t%a1@TLSLDM\\n\\
+2:\\t.long\\t__tls_get_addr@PLT\\n\\
+3:\";
+}"
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "26")])
+
+(define_expand "sym2DTPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_DTPOFF))]
+  ""
+  "")
+
+(define_expand "symDTPOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "")]
+  ""
+  "
+{
+  rtx dtpoffsym;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  dtpoffsym = gen_sym2DTPOFF (operands[1]);
+  PUT_MODE (dtpoffsym, Pmode);
+  emit_move_insn (t, dtpoffsym);
+  emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, operands[2]));
+  DONE;
+}")
+
+(define_expand "sym2GOTTPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTTPOFF))]
+  ""
+  "")
+
+(define_insn "tls_initial_exec"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "" "")]
+		    UNSPEC_TLSIE))
+   (use (reg:SI GBR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI R0_REG))]
+  ""
+  "*
+{
+  return \"\\
+mov.l\\t1f,r0\\n\\
+\\tstc\\tgbr,%0\\n\\
+\\tmov.l\\t@(r0,r12),r0\\n\\
+\\tbra\\t2f\\n\\
+\\tadd\\tr0,%0\\n\\
+\\t.align\\t2\\n\\
+1:\\t.long\\t%a1\\n\\
+2:\";
+}"
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "16")])
+
+(define_expand "sym2TPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_TPOFF))]
+  ""
+  "")
+
+(define_expand "symTPOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+  "
+{
+  rtx tpoffsym;
+
+  tpoffsym = gen_sym2TPOFF (operands[1]);
+  PUT_MODE (tpoffsym, Pmode);
+  emit_move_insn (operands[0], tpoffsym);
+  DONE;
+}")
+
+(define_insn "load_gbr"
+  [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GBR_REG))
+   (use (reg:SI GBR_REG))]
+  ""
+  "stc	gbr,%0"
+  [(set_attr "type" "tls_load")])
+
+;; case instruction for switch statements.
+
+;; Operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "arith_reg_operand" "")
+   (match_operand:SI 1 "arith_reg_operand" "")
+   (match_operand:SI 2 "arith_reg_operand" "")
+   (match_operand 3 "" "") (match_operand 4 "" "")]
+  ""
+  "
+{
+  rtx reg = gen_reg_rtx (SImode);
+  rtx reg2 = gen_reg_rtx (SImode);
+  if (TARGET_SHMEDIA)
+    {
+      rtx reg = gen_reg_rtx (DImode);
+      rtx reg2 = gen_reg_rtx (DImode);
+      rtx reg3 = gen_reg_rtx (Pmode);
+      rtx reg4 = gen_reg_rtx (Pmode);
+      rtx reg5 = gen_reg_rtx (Pmode);
+      rtx load, test;
+
+      operands[0] = convert_modes (DImode, SImode, operands[0], 0);
+      operands[1] = convert_modes (DImode, SImode, operands[1], 0);
+      operands[2] = convert_modes (DImode, SImode, operands[2], 1);
+
+      test = gen_rtx_GT (VOIDmode, operands[1], operands[0]);
+      emit_jump_insn (gen_cbranchdi4 (test, operands[1], operands[0], operands[4]));
+      emit_move_insn (reg, gen_rtx_MINUS (DImode, operands[0], operands[1]));
+      test = gen_rtx_GTU (VOIDmode, reg, operands[2]);
+      emit_jump_insn (gen_cbranchdi4 (test, reg, operands[2], operands[4]));
+      emit_insn (gen_casesi_shift_media (reg2, reg, operands[3]));
+      emit_move_insn (reg3, gen_datalabel_ref (gen_rtx_LABEL_REF
+					       (Pmode, operands[3])));
+      /* Messy: can we subreg to clean this up? */
+      if (Pmode == DImode)
+	load = gen_casesi_load_media (reg4, reg3, reg2, operands[3]);
+      else
+	load = gen_casesi_load_media (reg4,
+				      gen_rtx_SUBREG (DImode, reg3, 0),
+				      reg2, operands[3]);
+      PUT_MODE (SET_SRC (load), Pmode);
+      emit_insn (load);
+      /* ??? The following add could be eliminated if we used ptrel.  */
+      emit_move_insn (reg5, gen_rtx_PLUS (Pmode, reg3, reg4));
+      emit_jump_insn (gen_casesi_jump_media (reg5, operands[3]));
+      emit_barrier ();
+      DONE;
+    }
+  operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  /* If optimizing, casesi_worker depends on the mode of the instruction
+     before label it 'uses' - operands[3].  */
+  emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4],
+			   reg));
+  emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3]));
+  if (TARGET_SH2)
+    emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3]));
+  else
+    emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3]));
+  /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to
+     operands[3], but to lab.  We will fix this up in
+     machine_dependent_reorg.  */
+  emit_barrier ();
+  DONE;
+}")
+
+(define_expand "casesi_0"
+  [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" ""))
+   (set (match_dup 4) (minus:SI (match_dup 4)
+				(match_operand:SI 1 "arith_operand" "")))
+   (set (reg:SI T_REG)
+	(gtu:SI (match_dup 4)
+		(match_operand:SI 2 "arith_reg_operand" "")))
+   (set (pc)
+	(if_then_else (ne (reg:SI T_REG)
+			  (const_int 0))
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_SH1"
+  "")
+
+;; ??? reload might clobber r0 if we use it explicitly in the RTL before
+;; reload; using a R0_REGS pseudo reg is likely to give poor code.
+;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload.
+
+(define_insn "casesi_worker_0"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0,r")
+		 (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 "=X,1"))
+   (clobber (match_scratch:SI 4 "=&z,z"))]
+  "TARGET_SH1"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH1 && ! TARGET_SH2 && reload_completed"
+  [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA))
+   (parallel [(set (match_dup 0)
+	      (unspec:SI [(reg:SI R0_REG) (match_dup 1)
+			  (label_ref (match_dup 2))] UNSPEC_CASESI))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))]
+  "if (GET_CODE (operands[2]) == CODE_LABEL) LABEL_NUSES (operands[2])++;")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH2 && reload_completed"
+  [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA))
+   (parallel [(set (match_dup 0)
+	      (unspec:SI [(reg:SI R0_REG) (match_dup 1)
+			  (label_ref (match_dup 2))] UNSPEC_CASESI))
+	      (clobber (match_dup 3))])]
+  "if (GET_CODE (operands[2]) == CODE_LABEL) LABEL_NUSES (operands[2])++;")
+
+(define_insn "casesi_worker_1"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(reg:SI R0_REG)
+		    (match_operand:SI 1 "register_operand" "0,r")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 "=X,1"))]
+  "TARGET_SH1"
+  "*
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return \"shll2	%1\;mov.l	@(r0,%1),%0\";
+    case HImode:
+      return \"add	%1,%1\;mov.w	@(r0,%1),%0\";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"mov.b	@(r0,%1),%0\;extu.b	%0,%0\";
+      return \"mov.b	@(r0,%1),%0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "length" "4")])
+
+(define_insn "casesi_worker_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(reg:SI R0_REG)
+		    (match_operand:SI 1 "register_operand" "0,r")
+		    (label_ref (match_operand 2 "" ""))
+		    (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))
+   (clobber (match_operand:SI 4 "" "=X,1"))]
+  "TARGET_SH2 && reload_completed && flag_pic"
+  "*
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+  const char *load;
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      output_asm_insn (\"shll2    %1\", operands);
+      load = \"mov.l	@(r0,%1),%0\"; break;
+    case HImode:
+      output_asm_insn (\"add	%1,%1\", operands);
+      load = \"mov.w	@(r0,%1),%0\"; break;
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	load = \"mov.b	@(r0,%1),%0\;extu.b	%0,%0\";
+      else
+	load = \"mov.b	@(r0,%1),%0\";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_asm_insn (\"add\tr0,%1\;mova\t%O3,r0\\n\", operands);
+  return load;
+}"
+  [(set_attr "length" "8")])
+
+(define_insn "casesi_shift_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (unspec:DI [(label_ref:DI (match_operand 2 "" ""))]
+		    UNSPEC_CASESI)))]
+  "TARGET_SHMEDIA"
+  "*
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return \"shlli	%1, 2, %0\";
+    case HImode:
+      return \"shlli	%1, 1, %0\";
+    case QImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	return \"\";
+      return \"add	%1, r63, %0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "casesi_load_media"
+  [(set (match_operand 0 "any_arith_reg_dest" "=r")
+	(mem (unspec [(match_operand:DI 1 "arith_reg_operand" "r")
+			 (match_operand:DI 2 "arith_reg_operand" "r")
+			 (label_ref:DI (match_operand 3 "" ""))] UNSPEC_CASESI)))]
+  "TARGET_SHMEDIA"
+  "*
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[3]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return \"ldx.l	%1, %2, %0\";
+    case HImode:
+#if 0
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"ldx.uw	%1, %2, %0\";
+#endif
+      return \"ldx.w	%1, %2, %0\";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"ldx.ub	%1, %2, %0\";
+      return \"ldx.b	%1, %2, %0\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "load_media")])
+
+(define_expand "return"
+  [(return)]
+  "reload_completed && ! sh_need_epilogue ()"
+  "
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_jump_insn (gen_return_media ());
+      DONE;
+    }
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1)))
+    {
+      emit_jump_insn (gen_shcompact_return_tramp ());
+      DONE;
+    }
+}")
+
+(define_insn "*return_i"
+  [(return)]
+  "TARGET_SH1 && ! (TARGET_SHCOMPACT
+		    && (crtl->args.info.call_cookie
+			& CALL_COOKIE_RET_TRAMP (1)))
+   && reload_completed
+   && lookup_attribute (\"trap_exit\",
+			DECL_ATTRIBUTES (current_function_decl)) == NULL_TREE"
+  "*
+  {
+    if (TARGET_SH2A && (dbr_sequence_length () == 0)
+			&& !current_function_interrupt)
+       return \"rts/n\";
+    else
+       return \"%@	%#\";
+  }"
+  [(set_attr "type" "return")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; trapa has no delay slot.
+(define_insn "*return_trapa"
+  [(return)]
+  "TARGET_SH1 && !TARGET_SHCOMPACT
+   && reload_completed"
+  "%@"
+  [(set_attr "type" "return")])
+
+(define_expand "shcompact_return_tramp"
+  [(return)]
+  "TARGET_SHCOMPACT
+   && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))"
+  "
+{
+  rtx reg = gen_rtx_REG (Pmode, R0_REG);
+
+  function_symbol (reg, \"__GCC_shcompact_return_trampoline\", SFUNC_STATIC);
+  emit_jump_insn (gen_shcompact_return_tramp_i ());
+  DONE;
+}")
+
+(define_insn "shcompact_return_tramp_i"
+  [(parallel [(return) (use (reg:SI R0_REG))])]
+  "TARGET_SHCOMPACT
+   && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))"
+  "jmp	@r0%#"
+  [(set_attr "type" "jump_ind")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "return_media_i"
+  [(parallel [(return) (use (match_operand 0 "target_reg_operand" "k"))])]
+  "TARGET_SHMEDIA && reload_completed"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_insn "return_media_rte"
+  [(return)]
+  "TARGET_SHMEDIA && reload_completed && current_function_interrupt"
+  "rte"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "return_media"
+  [(return)]
+  "TARGET_SHMEDIA && reload_completed"
+  "
+{
+  int tr_regno = sh_media_register_for_return ();
+  rtx tr;
+
+  if (current_function_interrupt)
+    {
+      emit_jump_insn (gen_return_media_rte ());
+      DONE;
+    }
+  if (tr_regno < 0)
+    {
+      rtx r18 = gen_rtx_REG (Pmode, PR_MEDIA_REG);
+
+      gcc_assert (call_really_used_regs[TR0_REG] && !fixed_regs[TR0_REG]);
+      tr_regno = TR0_REG;
+      tr = gen_rtx_REG (Pmode, tr_regno);
+      emit_move_insn (tr, r18);
+    }
+  else
+    tr = gen_rtx_REG (Pmode, tr_regno);
+
+  emit_jump_insn (gen_return_media_i (tr));
+  DONE;
+}")
+
+(define_insn "shcompact_preserve_incoming_args"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(unspec:SI [(match_dup 0)] UNSPEC_COMPACT_ARGS))]
+  "TARGET_SHCOMPACT"
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "shcompact_incoming_args"
+  [(set (reg:SI R2_REG) (unspec:SI [(reg:SI R2_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R3_REG) (unspec:SI [(reg:SI R3_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R4_REG) (unspec:SI [(reg:SI R4_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R5_REG) (unspec:SI [(reg:SI R5_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R6_REG) (unspec:SI [(reg:SI R6_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R7_REG) (unspec:SI [(reg:SI R7_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R8_REG) (unspec:SI [(reg:SI R8_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R9_REG) (unspec:SI [(reg:SI R9_REG)] UNSPEC_COMPACT_ARGS))
+   (set (mem:BLK (reg:SI MACL_REG))
+	(unspec:BLK [(reg:SI MACH_REG)] UNSPEC_COMPACT_ARGS))
+   (use (reg:SI R0_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI MACL_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr	@r0%#"
+  [(set_attr "needs_delay_slot" "yes")])
+
+(define_insn "shmedia_save_restore_regs_compact"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 0 "immediate_operand" "i")))
+   (use (reg:SI R0_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT
+   && (INTVAL (operands[0]) == SHMEDIA_REGS_STACK_ADJUST ()
+       || INTVAL (operands[0]) == - SHMEDIA_REGS_STACK_ADJUST ())"
+  "jsr @r0%#"
+  [(set_attr "needs_delay_slot" "yes")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "sh_expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  sh_expand_epilogue (0);
+  emit_jump_insn (gen_return ());
+  DONE;
+}")
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx ra = operands[0];
+
+  if (TARGET_SHMEDIA64)
+    emit_insn (gen_eh_set_ra_di (ra));
+  else
+    emit_insn (gen_eh_set_ra_si (ra));
+
+  DONE;
+})
+
+;; Clobber the return address on the stack.  We can't expand this
+;; until we know where it will be put in the stack frame.
+
+(define_insn "eh_set_ra_si"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "! TARGET_SHMEDIA64"
+  "#")
+
+(define_insn "eh_set_ra_di"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch:DI 1 "=&r"))]
+  "TARGET_SHMEDIA64"
+  "#")
+
+(define_split
+  [(unspec_volatile [(match_operand 0 "register_operand" "")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch 1 ""))]
+  "reload_completed"
+  [(const_int 0)]
+  "
+{
+  sh_set_return_address (operands[0], operands[1]);
+  DONE;
+}")
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Define movml instructions for SH2A target.  Currently they are
+;; used to push and pop all banked registers only.
+
+(define_insn "movml_push_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int -32)))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\tr7,@-r15"
+  [(set_attr "in_delay_slot" "no")])
+
+(define_insn "movml_pop_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int 32)))
+   (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32))))
+   (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28))))
+   (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24))))
+   (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20))))
+   (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16))))
+   (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12))))
+   (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8))))
+   (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l\t@r15+,r7"
+  [(set_attr "in_delay_slot" "no")])
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "movt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(eq:SI (reg:SI T_REG) (const_int 1)))]
+  "TARGET_SH1"
+  "movt	%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "cstore4_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand 2 "logical_operand" "")
+	  (match_operand 3 "cmp_operand" "")]))]
+  "TARGET_SHMEDIA"
+  "
+{
+  enum machine_mode mode = GET_MODE (operands[2]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  bool invert, swap;
+  if (mode == VOIDmode)
+    mode = GET_MODE (operands[3]);
+  if (operands[2] == const0_rtx)
+    {
+      if (code == EQ || code == NE)
+	operands[2] = operands[3], operands[3] = const0_rtx;
+    }
+  else
+    operands[2] = force_reg (mode, operands[2]);
+  if (operands[3] != const0_rtx)
+    operands[3] = force_reg (mode, operands[3]);
+
+  switch (code)
+    {
+    case GEU:
+    case GE:
+      swap = invert = !FLOAT_MODE_P (mode);
+      break;
+
+    case LEU:
+    case LE:
+      swap = FLOAT_MODE_P (mode), invert = !swap;
+      break;
+
+    case LTU:
+    case LT:
+      swap = true, invert = false;
+      break;
+
+    case GTU:
+    case GT:
+    case EQ:
+    case UNORDERED:
+      swap = invert = false;
+      break;
+
+    case NE:
+      swap = invert = true;
+      break;
+
+    default:
+      gcc_unreachable ();
+  }
+
+  if (swap)
+    {
+      rtx tem = operands[2];
+      operands[2] = operands[3];
+      operands[3] = tem;
+      code = swap_condition (code);
+    }
+
+  if (invert)
+    {
+      rtx tem = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+      code = reverse_condition (code);
+      operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]);
+      emit_insn (gen_cstore4_media (tem, operands[1],
+				    operands[2], operands[3]));
+      code = EQ;
+      operands[2] = tem;
+      operands[3] = const0_rtx;
+    }
+
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]);
+}")
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:SI 2 "cmpsi_operand" "")
+	  (match_operand:SI 3 "arith_operand" "")]))]
+  "TARGET_SH1 || TARGET_SHMEDIA"
+  "if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+   if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+       && sh_expand_t_scc (operands))
+     DONE;
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, SImode);
+   DONE;
+")
+
+(define_expand "cstoredi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:DI 2 "arith_operand" "")
+	  (match_operand:DI 3 "arith_operand" "")]))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+  "if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+   if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+       && sh_expand_t_scc (operands))
+     DONE;
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, DImode);
+   DONE;
+")
+
+
+
+;; sne moves the complement of the T reg to DEST like this:
+;;      cmp/eq ...
+;;      mov    #-1,temp
+;;      negc   temp,dest
+;;   This is better than xoring compare result with 1 because it does
+;;   not require r0 and further, the -1 may be CSE-ed or lifted out of a
+;;   loop.
+
+(define_expand "movnegt"
+  [(set (match_dup 1) (const_int -1))
+   (parallel [(set (match_operand:SI 0 "" "")
+		   (neg:SI (plus:SI (reg:SI T_REG)
+				    (match_dup 1))))
+	      (set (reg:SI T_REG)
+		   (ne:SI (ior:SI (reg:SI T_REG) (match_dup 1))
+			  (const_int 0)))])]
+  ""
+  "
+{
+  operands[1] = gen_reg_rtx (SImode);
+}")
+
+
+;; Recognize mov #-1/negc/neg sequence, and change it to movt/add #-1.
+;; This prevents a regression that occurred when we switched from xor to
+;; mov/neg for sne.
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (reg:SI T_REG)
+		 (const_int -1)))]
+  "TARGET_SH1"
+  [(set (match_dup 0) (eq:SI (reg:SI T_REG) (const_int 1)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+  "")
+
+(define_expand "cstoresf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand:SF 2 "arith_operand" "")
+	  (match_operand:SF 3 "arith_operand" "")]))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "if (TARGET_SHMEDIA)
+     {
+       emit_insn (gen_cstore4_media (operands[0], operands[1],
+				     operands[2], operands[3]));
+       DONE;
+     }
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, SFmode);
+   DONE;
+")
+
+(define_expand "cstoredf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand:DF 2 "arith_operand" "")
+	  (match_operand:DF 3 "arith_operand" "")]))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "if (TARGET_SHMEDIA)
+     {
+       emit_insn (gen_cstore4_media (operands[0], operands[1],
+				     operands[2], operands[3]));
+       DONE;
+     }
+
+    if (! currently_expanding_to_rtl)
+      FAIL;
+   
+   sh_emit_compare_and_set (operands, DFmode);
+   DONE;
+")
+
+
+;; -------------------------------------------------------------------------
+;; Instructions to cope with inline literal tables
+;; -------------------------------------------------------------------------
+
+; 2 byte integer in line
+
+(define_insn "consttable_2"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST2)]
+ ""
+ "*
+{
+  if (operands[1] != const0_rtx)
+    assemble_integer (operands[0], 2, BITS_PER_UNIT * 2, 1);
+  return \"\";
+}"
+ [(set_attr "length" "2")
+ (set_attr "in_delay_slot" "no")])
+
+; 4 byte integer in line
+
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST4)]
+ ""
+ "*
+{
+  if (operands[1] != const0_rtx)
+    {
+      assemble_integer (operands[0], 4, BITS_PER_UNIT * 4, 1);
+      mark_symbol_refs_as_used (operands[0]);
+    }
+  return \"\";
+}"
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+; 8 byte integer in line
+
+(define_insn "consttable_8"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST8)]
+ ""
+ "*
+{
+  if (operands[1] != const0_rtx)
+    assemble_integer (operands[0], 8, BITS_PER_UNIT * 8, 1);
+  return \"\";
+}"
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+; 4 byte floating point
+
+(define_insn "consttable_sf"
+ [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST4)]
+ ""
+ "*
+{
+  if (operands[1] != const0_rtx)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+      assemble_real (d, SFmode, GET_MODE_ALIGNMENT (SFmode));
+    }
+  return \"\";
+}"
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+; 8 byte floating point
+
+(define_insn "consttable_df"
+ [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST8)]
+ ""
+ "*
+{
+  if (operands[1] != const0_rtx)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+      assemble_real (d, DFmode, GET_MODE_ALIGNMENT (DFmode));
+    }
+  return \"\";
+}"
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+;; Alignment is needed for some constant tables; it may also be added for
+;; Instructions at the start of loops, or after unconditional branches.
+;; ??? We would get more accurate lengths if we did instruction
+;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used
+;; here is too conservative.
+
+; align to a two byte boundary
+
+(define_expand "align_2"
+ [(unspec_volatile [(const_int 1)] UNSPECV_ALIGN)]
+ ""
+ "")
+
+; align to a four byte boundary
+;; align_4 and align_log are instructions for the starts of loops, or
+;; after unconditional branches, which may take up extra room.
+
+(define_expand "align_4"
+ [(unspec_volatile [(const_int 2)] UNSPECV_ALIGN)]
+ ""
+ "")
+
+; align to a cache line boundary
+
+(define_insn "align_log"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPECV_ALIGN)]
+ ""
+ ""
+ [(set_attr "length" "0")
+  (set_attr "in_delay_slot" "no")])
+
+; emitted at the end of the literal table, used to emit the
+; 32bit branch labels if needed.
+
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CONST_END)]
+  ""
+  "* return output_jump_label_table ();"
+  [(set_attr "in_delay_slot" "no")])
+
+; emitted at the end of the window in the literal table.
+
+(define_insn "consttable_window_end"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_WINDOW_END)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "in_delay_slot" "no")])
+
+;; -------------------------------------------------------------------------
+;; Misc
+;; -------------------------------------------------------------------------
+
+;; String/block move insn.
+
+(define_expand "movmemsi"
+  [(parallel [(set (mem:BLK (match_operand:BLK 0 "" ""))
+		   (mem:BLK (match_operand:BLK 1 "" "")))
+	      (use (match_operand:SI 2 "nonmemory_operand" ""))
+	      (use (match_operand:SI 3 "immediate_operand" ""))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "
+{
+  if(expand_block_move (operands))
+     DONE;
+  else FAIL;
+}")
+
+(define_insn "block_move_real"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI R6_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R6_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_move_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R0_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI R6_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R6_REG))
+	      (clobber (reg:SI R0_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))
+	      (clobber (reg:SI R3_REG))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; -------------------------------------------------------------------------
+;; Floating point instructions.
+;; -------------------------------------------------------------------------
+
+;; ??? All patterns should have a type attribute.
+
+(define_expand "movpsi"
+  [(set (match_operand:PSI 0 "register_operand" "")
+	(match_operand:PSI 1 "general_movsrc_operand" ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "")
+
+;; The c / m alternative is a fake to guide reload to load directly into
+;; fpscr, since reload doesn't know how to use post-increment.
+;; TARGET_LEGITIMATE_ADDRESS_P guards about bogus addresses before reload,
+;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's
+;; predicate after reload.
+;; The mac_gp type for r/!c might look a bit odd, but it actually schedules
+;; like a mac -> gpr move.
+(define_insn "fpu_switch"
+  [(set (match_operand:PSI 0 "general_movdst_operand" "=c,c,r,c,c,r,m,r,<")
+	(match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c,c"))]
+  "TARGET_SH2E
+   && (! reload_completed
+       || true_regnum (operands[0]) != FPSCR_REG
+       || !MEM_P (operands[1])
+       || GET_CODE (XEXP (operands[1], 0)) != PLUS)"
+  "@
+	! precision stays the same
+	lds.l	%1,fpscr
+	mov.l	%1,%0
+	#
+	lds	%1,fpscr
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	fpscr,%0
+	sts.l	fpscr,%0"
+  [(set_attr "length" "0,2,2,4,2,2,2,2,2")
+   (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,mac_gp,fstore")])
+
+(define_peephole2
+  [(set (reg:PSI FPSCR_REG)
+	(mem:PSI (match_operand:SI 0 "register_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && peep2_reg_dead_p (1, operands[0])"
+  [(const_int 0)]
+{
+  rtx fpscr, mem, new_insn;
+
+  fpscr = SET_DEST (PATTERN (curr_insn));
+  mem = SET_SRC (PATTERN (curr_insn));
+  mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0]));
+
+  new_insn = emit_insn (gen_fpu_switch (fpscr, mem));
+  add_reg_note (new_insn, REG_INC, operands[0]);
+  DONE;
+})
+
+(define_split
+  [(set (reg:PSI FPSCR_REG)
+	(mem:PSI (match_operand:SI 0 "register_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && (flag_peephole2 ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+{
+  rtx fpscr, mem, new_insn;
+
+  fpscr = SET_DEST (PATTERN (curr_insn));
+  mem = SET_SRC (PATTERN (curr_insn));
+  mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0]));
+
+  new_insn = emit_insn (gen_fpu_switch (fpscr, mem));
+  add_reg_note (new_insn, REG_INC, operands[0]);
+
+  if (!find_regno_note (curr_insn, REG_DEAD, true_regnum (operands[0])))
+    emit_insn (gen_addsi3 (operands[0], operands[0], GEN_INT (-4)));
+  DONE;
+})
+
+;; ??? This uses the fp unit, but has no type indicating that.
+;; If we did that, this would either give a bogus latency or introduce
+;; a bogus FIFO constraint.
+;; Since this insn is currently only used for prologues/epilogues,
+;; it is probably best to claim no function unit, which matches the
+;; current setting.
+(define_insn "toggle_sz"
+  [(set (reg:PSI FPSCR_REG)
+	(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fschg"
+  [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")])
+
+;; There's no way we can use it today, since optimize mode switching
+;; doesn't enable us to know from which mode we're switching to the
+;; mode it requests, to tell whether we can use a relative mode switch
+;; (like toggle_pr) or an absolute switch (like loading fpscr from
+;; memory).
+(define_insn "toggle_pr"
+  [(set (reg:PSI FPSCR_REG)
+	(xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
+  "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
+  "fpchg"
+  [(set_attr "type" "fpscr_toggle")])
+
+(define_expand "addsf3"
+  [(set (match_operand:SF 0 "arith_reg_operand" "")
+	(plus:SF (match_operand:SF 1 "arith_reg_operand" "")
+		 (match_operand:SF 2 "arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_addsf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*addsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fadd.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "unary_sf_op"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_select:V2SF
+	 (vec_concat:V2SF
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(not:BI (match_operand 3 "const_int_operand" "n"))]))
+	  (match_operator:SF 2 "unary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(match_operand 4
+					"const_int_operand" "n")]))]))
+	 (parallel [(not:BI (match_dup 3)) (match_dup 3)])))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "TARGET_SHMEDIA_FPU && reload_completed"
+  [(set (match_dup 5) (match_dup 6))]
+  "
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 (true_regnum (operands[1])
+			  + (INTVAL (operands[4]) ^ endian)));
+
+  operands[7] = gen_rtx_REG (SFmode,
+			     (true_regnum (operands[0])
+			      + (INTVAL (operands[3]) ^ endian)));
+  operands[6] = gen_rtx_fmt_e (GET_CODE (operands[2]), SFmode, op1);
+}"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "binary_sf_op0"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	  (match_operator:SF 3 "binary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0)]))
+	     (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0)]))])
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(const_int 1)]))))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))]
+  "
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[1]) + endian);
+  rtx op2 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[2]) + endian);
+
+  operands[4] = gen_rtx_REG (SFmode,
+			     true_regnum (operands[0]) + endian);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2);
+}"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "binary_sf_op1"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(const_int 0)]))
+	  (match_operator:SF 3 "binary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 1)]))
+	     (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 1)]))])))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))]
+  "
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[1]) + (1 ^ endian));
+  rtx op2 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[2]) + (1 ^ endian));
+
+  operands[4] = gen_rtx_REG (SFmode,
+			     true_regnum (operands[0]) + (1 ^ endian));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2);
+}"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "addsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fadd	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "subsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		  (match_operand:SF 2 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_subsf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*subsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		  (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsub.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "subsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fsub	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "")
+
+(define_insn "*mulsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmul.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
+;; register in feeding fp instructions.  Thus, in order to generate fmac,
+;; we start out with a mulsf pattern that does not depend on fpscr.
+;; This is split after combine to introduce the dependency, in order to
+;; get mode switching and scheduling right.
+(define_insn_and_split "mulsf3_ie"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E"
+  "fmul	%2,%0"
+  "TARGET_SH4 || TARGET_SH2A_SINGLE"
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_mulsf3_i4 (operands[0], operands[1], operands[2],
+	     get_fpscr_rtx ()));
+  DONE;
+}"
+  [(set_attr "type" "fp")])
+
+(define_insn "mulsf3_i4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fmul	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "mac_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU && TARGET_FMAC"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "*macsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand" "c"))]
+  "TARGET_SH2E && TARGET_FMAC"
+  "fmac	fr0,%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "arith_reg_operand" "")
+	(div:SF (match_operand:SF 1 "arith_reg_operand" "")
+		(match_operand:SF 2 "arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_divsf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*divsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fdiv.s	%1, %2, %0"
+  [(set_attr "type" "fdiv_media")])
+
+(define_insn "divsf3_i"
+  [(set (match_operand:SF 0 "arith_reg_dest" "=f")
+	(div:SF (match_operand:SF 1 "arith_reg_operand" "0")
+		 (match_operand:SF 2 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fdiv	%2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.qs %1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_expand "floatsisf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(float:SF (match_operand:SI 1 "fpul_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_SINGLE)
+    {
+      emit_sf_insn (gen_floatsisf2_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "*floatsisf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.ls	%1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_insn "floatsisf2_i4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "float	%1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*floatsisf2_ie"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpul_operand" "y")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "float	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f")
+	(fix:DI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.sq %1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_SINGLE)
+    {
+      emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "*fix_truncsfsi2_media"
+  [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.sl	%1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_insn "fix_truncsfsi2_i4"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "ftrc_s")
+   (set_attr "fp_mode" "single")])
+
+;; ??? This pattern is used nowhere.  fix_truncsfsi2 always expands to
+;; fix_truncsfsi2_i4.
+;; (define_insn "fix_truncsfsi2_i4_2"
+;;  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;;	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+;;   (use (reg:PSI FPSCR_REG))
+;;   (clobber (reg:SI FPUL_REG))]
+;;  "TARGET_SH4"
+;;  "#"
+;;  [(set_attr "length" "4")
+;;   (set_attr "fp_mode" "single")])
+
+;;(define_split
+;;  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;;	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+;;   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;   (clobber (reg:SI FPUL_REG))]
+;;  "TARGET_SH4"
+;;  [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1)))
+;;	      (use (match_dup 2))])
+;;   (set (match_dup 0) (reg:SI FPUL_REG))])
+
+(define_insn "*fixsfsi"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "cmpgtsf_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "ieee_ccmpeqsf_t"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:SF 1 "fp_arith_reg_operand" "f"))))]
+  "TARGET_SH2E && TARGET_IEEE && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "* return output_ieee_ccmpeq (insn, operands);"
+  [(set_attr "length" "4")])
+
+
+(define_insn "cmpgtsf_t_i4"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_t_i4"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*ieee_ccmpeqsf_t_4"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:SF 1 "fp_arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "* return output_ieee_ccmpeq (insn, operands);"
+  [(set_attr "length" "4")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpeq.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgtsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpgt.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgesf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ge:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpge.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpunsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unordered:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		      (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpun.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand:SF 1 "arith_operand" "")
+			(match_operand:SF 2 "arith_operand" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2],
+					  operands[3]));
+  else
+    sh_emit_compare_and_branch (operands, SFmode);
+  DONE;
+}")
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_unop (&gen_negsf2_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*negsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fneg.s	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "negsf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fneg	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH3E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH3E)
+    {
+      expand_sf_unop (&gen_sqrtsf2_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*sqrtsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsqrt.s	%1, %0"
+  [(set_attr "type" "fdiv_media")])
+
+(define_insn "sqrtsf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fsqrt	%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "immediate_operand" "i")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "0"))))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH4A_FP && flag_unsafe_math_optimizations
+   && operands[1] == CONST1_RTX (SFmode)"
+  "fsrra	%0"
+  [(set_attr "type" "fsrra")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "fsca"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (unspec:SF [(mult:SF
+		      (float:SF (match_operand:SI 1 "fpul_operand" "y"))
+		      (match_operand:SF 2 "immediate_operand" "i"))
+		    ] UNSPEC_FSINA)
+	 (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2))
+		    ] UNSPEC_FCOSA)))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH4A_FP && flag_unsafe_math_optimizations
+   && operands[2] == sh_fsca_int2sf ()"
+  "fsca	fpul,%d0"
+  [(set_attr "type" "fsca")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "sinsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
+		   UNSPEC_FSINA))]
+  "TARGET_SH4A_FP && flag_unsafe_math_optimizations"
+  "
+{
+  rtx scaled = gen_reg_rtx (SFmode);
+  rtx truncated = gen_reg_rtx (SImode);
+  rtx fsca = gen_reg_rtx (V2SFmode);
+  rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+  emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
+  emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+			  get_fpscr_rtx ()));
+  emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 0));
+  DONE;
+}")
+
+(define_expand "cossf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+	(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "")]
+		   UNSPEC_FCOSA))]
+  "TARGET_SH4A_FP && flag_unsafe_math_optimizations"
+  "
+{
+  rtx scaled = gen_reg_rtx (SFmode);
+  rtx truncated = gen_reg_rtx (SImode);
+  rtx fsca = gen_reg_rtx (V2SFmode);
+  rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+  emit_sf_insn (gen_mulsf3 (scaled, operands[1], scale_reg));
+  emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+			  get_fpscr_rtx ()));
+  emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4));
+  DONE;
+}")
+
+(define_expand "sindf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
+		   UNSPEC_FSINA))]
+  "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
+  "
+{
+  rtx scaled = gen_reg_rtx (DFmode);
+  rtx truncated = gen_reg_rtx (SImode);
+  rtx fsca = gen_reg_rtx (V2SFmode);
+  rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
+  rtx sfresult = gen_reg_rtx (SFmode);
+
+  emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
+  emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
+  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+			  get_fpscr_rtx ()));
+  emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 0));
+  emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
+  DONE;
+}")
+
+(define_expand "cosdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(unspec:DF [(match_operand:DF 1 "fp_arith_reg_operand" "")]
+		   UNSPEC_FCOSA))]
+  "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE && flag_unsafe_math_optimizations"
+  "
+{
+  rtx scaled = gen_reg_rtx (DFmode);
+  rtx truncated = gen_reg_rtx (SImode);
+  rtx fsca = gen_reg_rtx (V2SFmode);
+  rtx scale_reg = force_reg (DFmode, sh_fsca_df2int ());
+  rtx sfresult = gen_reg_rtx (SFmode);
+
+  emit_df_insn (gen_muldf3 (scaled, operands[1], scale_reg));
+  emit_df_insn (gen_fix_truncdfsi2 (truncated, scaled));
+  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+			  get_fpscr_rtx ()));
+  emit_move_insn (sfresult, gen_rtx_SUBREG (SFmode, fsca, 4));
+  emit_df_insn (gen_extendsfdf2 (operands[0], sfresult));
+  DONE;
+}")
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_unop (&gen_abssf2_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*abssf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fabs.s	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "abssf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fabs	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_adddf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*adddf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fadd.d	%1, %2, %0"
+  [(set_attr "type" "dfparith_media")])
+
+(define_insn "adddf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fadd	%2,%0"
+  [(set_attr "type" "dfp_arith")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_subdf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*subdf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsub.d	%1, %2, %0"
+  [(set_attr "type" "dfparith_media")])
+
+(define_insn "subdf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fsub	%2,%0"
+  [(set_attr "type" "dfp_arith")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_muldf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*muldf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmul.d	%1, %2, %0"
+  [(set_attr "type" "dfmul_media")])
+
+(define_insn "muldf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fmul	%2,%0"
+  [(set_attr "type" "dfp_mul")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		(match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_divdf3_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*divdf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		(match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fdiv.d	%1, %2, %0"
+  [(set_attr "type" "dfdiv_media")])
+
+(define_insn "divdf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
+		(match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fdiv	%2,%0"
+  [(set_attr "type" "dfdiv")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:DI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.qd	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_expand "floatsidf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(float:DF (match_operand:SI 1 "fpul_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_floatsidf2_i (operands[0], operands[1],
+				      get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "*floatsidf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:SI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.ld	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "floatsidf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:SI 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "float	%1,%0"
+  [(set_attr "type" "dfp_conv")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f")
+	(fix:DI (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.dq	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "fpul_operand" "")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_fix_truncdfsi2_i (operands[0], operands[1],
+					  get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "*fix_truncdfsi2_media"
+  [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.dl	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "fix_truncdfsi2_i"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "dfp_conv")
+   (set_attr "dfp_comp" "no")
+   (set_attr "fp_mode" "double")])
+
+;; ??? This pattern is used nowhere.  fix_truncdfsi2 always expands to
+;; fix_truncdfsi2_i.
+;; (define_insn "fix_truncdfsi2_i4"
+;;   [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;; 	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+;;    (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;    (clobber (reg:SI FPUL_REG))]
+;;   "TARGET_SH4"
+;;   "#"
+;;   [(set_attr "length" "4")
+;;    (set_attr "fp_mode" "double")])
+;;
+;; (define_split
+;;   [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;; 	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+;;    (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;    (clobber (reg:SI FPUL_REG))]
+;;   "TARGET_SH4"
+;;   [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1)))
+;; 	      (use (match_dup 2))])
+;;    (set (match_dup 0) (reg:SI FPUL_REG))])
+
+(define_insn "cmpgtdf_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
+	       (match_operand:DF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "dfp_cmp")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "cmpeqdf_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
+	       (match_operand:DF 1 "arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "dfp_cmp")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "*ieee_ccmpeqdf_t"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
+		       (match_operand:DF 1 "arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "* return output_ieee_ccmpeq (insn, operands);"
+  [(set_attr "length" "4")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "cmpeqdf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpeq.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgtdf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpgt.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgedf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ge:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpge.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpundf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unordered:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		      (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpun.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand:DF 1 "arith_operand" "")
+			(match_operand:DF 2 "arith_operand" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2],
+					  operands[3]));
+  else
+    sh_emit_compare_and_branch (operands, DFmode);
+  DONE;
+}")
+
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "arith_reg_operand" "")
+	(neg:DF (match_operand:DF 1 "arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_negdf2_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*negdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fneg.d	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "negdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fneg	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "arith_reg_operand" "")
+	(sqrt:DF (match_operand:DF 1 "arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_sqrtdf2_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*sqrtdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsqrt.d	%1, %0"
+  [(set_attr "type" "dfdiv_media")])
+
+(define_insn "sqrtdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fsqrt	%0"
+  [(set_attr "type" "dfdiv")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "arith_reg_operand" "")
+	(abs:DF (match_operand:DF 1 "arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_absdf2_i, operands);
+      DONE;
+    }
+}")
+
+(define_insn "*absdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fabs.d	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "absdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fabs	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_extendsfdf2_i4 (operands[0], operands[1],
+					get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "*extendsfdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcnv.sd	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "extendsfdf2_i4"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcnvsd  %1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "fpul_operand" "")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+  "
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_truncdfsf2_i4 (operands[0], operands[1],
+				       get_fpscr_rtx ()));
+      DONE;
+    }
+}")
+
+(define_insn "*truncdfsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcnv.ds	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "truncdfsf2_i4"
+  [(set (match_operand:SF 0 "fpul_operand" "=y")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcnvds  %1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "double")])
+
+;; Bit field extract patterns.  These give better code for packed bitfields,
+;; because they allow auto-increment addresses to be generated.
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "")
+			 (match_operand:SI 1 "immediate_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))]
+  "TARGET_SH1 && ! TARGET_LITTLE_ENDIAN"
+  "
+{
+  rtx addr_target, orig_address, shift_reg, qi_val;
+  HOST_WIDE_INT bitsize, size, v = 0;
+  rtx x = operands[3];
+
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[0])
+	  || satisfies_constraint_Sbv (operands[0]))
+      && satisfies_constraint_M (operands[1])
+      && satisfies_constraint_K03 (operands[2]))
+    {
+      if (satisfies_constraint_N (operands[3]))
+	{
+	  emit_insn (gen_bclr_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if (satisfies_constraint_M (operands[3]))
+	{
+	  emit_insn (gen_bset_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if ((REG_P (operands[3]) && REGNO (operands[3]) == T_REG)
+		&& satisfies_constraint_M (operands[1]))
+	{
+	  emit_insn (gen_bst_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if (REG_P (operands[3])
+	       && satisfies_constraint_M (operands[1]))
+	{
+	  emit_insn (gen_bld_reg (operands[3], const0_rtx));
+	  emit_insn (gen_bst_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+    }
+  /* ??? expmed doesn't care for non-register predicates.  */
+  if (! memory_operand (operands[0], VOIDmode)
+      || ! immediate_operand (operands[1], VOIDmode)
+      || ! immediate_operand (operands[2], VOIDmode)
+      || ! general_operand (x, VOIDmode))
+    FAIL;
+  /* If this isn't a 16 / 24 / 32 bit field, or if
+     it doesn't start on a byte boundary, then fail.  */
+  bitsize = INTVAL (operands[1]);
+  if (bitsize < 16 || bitsize > 32 || bitsize % 8 != 0
+      || (INTVAL (operands[2]) % 8) != 0)
+    FAIL;
+
+  size = bitsize / 8;
+  orig_address = XEXP (operands[0], 0);
+  shift_reg = gen_reg_rtx (SImode);
+  if (CONST_INT_P (x))
+    {
+      v = INTVAL (x);
+      qi_val = force_reg (QImode, GEN_INT (trunc_int_for_mode (v, QImode)));
+    }
+  else
+    {
+      emit_insn (gen_movsi (shift_reg, operands[3]));
+      qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3);
+    }
+  addr_target = copy_addr_to_reg (plus_constant (orig_address, size - 1));
+
+  operands[0] = replace_equiv_address (operands[0], addr_target);
+  emit_insn (gen_movqi (operands[0], qi_val));
+
+  while (size -= 1)
+    {
+      if (CONST_INT_P (x))
+	qi_val
+	  = force_reg (QImode, GEN_INT (trunc_int_for_mode (v >>= 8, QImode)));
+      else
+	{
+	  emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8)));
+	  qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3);
+	}
+      emit_insn (gen_addsi3 (addr_target, addr_target, constm1_rtx));
+      emit_insn (gen_movqi (operands[0], qi_val));
+    }
+
+  DONE;
+}")
+
+(define_insn "movua"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(unspec:SI [(match_operand:BLK 1 "unaligned_load_operand" "Sua>")]
+		   UNSPEC_MOVUA))]
+  "TARGET_SH4A_ARCH"
+  "movua.l	%1,%0"
+  [(set_attr "type" "movua")])
+
+;; We shouldn't need this, but cse replaces increments with references
+;; to other regs before flow has a chance to create post_inc
+;; addressing modes, and only postreload's cse_move2add brings the
+;; increments back to a usable form.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" ""))
+			 (const_int 32) (const_int 0)))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (mem:SI (post_inc:SI
+				  (match_operand:SI 1 "register_operand" "")))
+			 (const_int 32) (const_int 0)))]
+  "")
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  "TARGET_SH4A_ARCH || TARGET_SH2A"
+{
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[1])
+	  || satisfies_constraint_Sbv (operands[1]))
+      && satisfies_constraint_M (operands[2])
+      && satisfies_constraint_K03 (operands[3]))
+   {
+      emit_insn (gen_bldsign_m2a (operands[1], operands[3]));
+      if (REGNO (operands[0]) != T_REG)
+	emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+      DONE;
+   }
+  if (TARGET_SH4A_ARCH
+      && INTVAL (operands[2]) == 32
+      && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+      && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32)
+    {
+      rtx src = adjust_address (operands[1], BLKmode, 0);
+      set_mem_size (src, GEN_INT (4));
+      emit_insn (gen_movua (operands[0], src));
+      DONE;
+    }
+
+  FAIL;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  "TARGET_SH4A_ARCH || TARGET_SH2A"
+{
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[1])
+	  || satisfies_constraint_Sbv (operands[1]))
+      && satisfies_constraint_M (operands[2])
+      && satisfies_constraint_K03 (operands[3]))
+    {
+      emit_insn (gen_bld_m2a (operands[1], operands[3]));
+      if (REGNO (operands[0]) != T_REG)
+	emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+      DONE;
+    }
+  if (TARGET_SH4A_ARCH
+      && INTVAL (operands[2]) == 32
+      && INTVAL (operands[3]) == -24 * (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+      && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32)
+    {
+      rtx src = adjust_address (operands[1], BLKmode, 0);
+      set_mem_size (src, GEN_INT (4));
+      emit_insn (gen_movua (operands[0], src));
+      DONE;
+    }
+
+  FAIL;
+})
+
+;; SH2A instructions for bitwise operations.
+
+;; Clear a bit in a memory location.
+(define_insn "bclr_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(and:QI
+	    (not:QI (ashift:QI (const_int 1)
+			(match_operand:QI 1 "const_int_operand" "K03,K03")))
+	    (match_dup 0)))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bclr.b\\t%1,%0
+	bclr.b\\t%1,@(0,%t0)"
+[(set_attr "length" "4,4")])
+
+(define_insn "bclrmem_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+        (and:QI (match_dup 0)
+                (match_operand:QI 1 "const_int_operand" "Psz,Psz")))]
+  "TARGET_SH2A && satisfies_constraint_Psz (operands[1]) && TARGET_BITOPS"
+  "@
+        bclr.b\\t%W1,%0
+        bclr.b\\t%W1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Set a bit in a memory location.
+(define_insn "bset_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(ior:QI
+	    (ashift:QI (const_int 1)
+		       (match_operand:QI 1 "const_int_operand" "K03,K03"))
+	    (match_dup 0)))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bset.b\\t%1,%0
+	bset.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bsetmem_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(ior:QI (match_dup 0)
+		(match_operand:QI 1 "const_int_operand" "Pso,Pso")))]
+  "TARGET_SH2A && satisfies_constraint_Pso (operands[1]) && TARGET_BITOPS"
+  "@
+        bset.b\\t%V1,%0
+        bset.b\\t%V1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;;; Transfer the contents of the T bit to a specified bit of memory.
+(define_insn "bst_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,m")
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+	    (and:QI
+		(not:QI (ashift:QI (const_int 1)
+			(match_operand:QI 1 "const_int_operand" "K03,K03")))
+		(match_dup 0))
+	    (ior:QI
+		(ashift:QI (const_int 1) (match_dup 1))
+		(match_dup 0))))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bst.b\\t%1,%0
+	bst.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4")])
+
+;; Store a specified bit of memory in the T bit.
+(define_insn "bld_m2a"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,Sbv")
+	    (const_int 1)
+	    (match_operand 1 "const_int_operand" "K03,K03")))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bld.b\\t%1,%0
+	bld.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Store a specified bit of memory in the T bit.
+(define_insn "bldsign_m2a"
+  [(set (reg:SI T_REG)
+	(sign_extract:SI
+	    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+	    (const_int 1)
+	    (match_operand 1 "const_int_operand" "K03,K03")))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bld.b\\t%1,%0
+	bld.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Store a specified bit of the LSB 8 bits of a register in the T bit.
+(define_insn "bld_reg"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			 (const_int 1)
+			 (match_operand 1 "const_int_operand" "K03")))]
+  "TARGET_SH2A"
+  "bld\\t%1,%0")
+
+(define_insn "*bld_regqi"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (match_operand:QI 0 "arith_reg_operand" "r")
+			 (const_int 1)
+			 (match_operand 1 "const_int_operand" "K03")))]
+  "TARGET_SH2A"
+  "bld\\t%1,%0")
+
+;; Take logical and of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "band_m2a"
+  [(set (reg:SI T_REG)
+	(and:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	band.b\\t%1,%0
+	band.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bandreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+        	(match_operand:SI 3 "register_operand" "r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	band.b\\t%2,%1\;movt\\t%0
+	band.b\\t%2,@(0,%t1)\;movt\\t%0"
+  [(set_attr "length" "6,6")])
+
+;; Take logical or of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "bor_m2a"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bor.b\\t%1,%0
+	bor.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "borreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+		(match_operand:SI 3 "register_operand" "=r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bor.b\\t%2,%1\;movt\\t%0
+	bor.b\\t%2,@(0,%t1)\;movt\\t%0"
+  [(set_attr "length" "6,6")])
+
+;; Take exclusive or of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "bxor_m2a"
+  [(set (reg:SI T_REG)
+	(xor:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bxor.b\\t%1,%0
+	bxor.b\\t%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bxorreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+		(match_operand:SI 3 "register_operand" "=r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS"
+  "@
+	bxor.b\\t%2,%1\;movt\\t%0
+	bxor.b\\t%2,@(0,%t1)\;movt\\t%0"
+  [(set_attr "length" "6,6")])
+
+
+;; -------------------------------------------------------------------------
+;; Peepholes
+;; -------------------------------------------------------------------------
+;; This matches cases where the bit in a memory location is set.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_operand" "r,r")
+	(sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")))
+   (set (match_dup 0)
+	(ior:SI (match_dup 0)
+	(match_operand:SI 2 "const_int_operand" "Pso,Pso")))
+   (set (match_dup 1)
+	(match_operand 3 "arith_reg_operand" "r,r"))]
+  "TARGET_SH2A && TARGET_BITOPS
+   && satisfies_constraint_Pso (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])"
+  [(set (match_dup 1)
+        (ior:QI (match_dup 1)
+                (match_dup 2)))]
+  "")
+
+;; This matches cases where the bit in a memory location is cleared.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_operand" "r,r")
+	(sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+	(match_operand:SI 2 "const_int_operand" "Psz,Psz")))
+   (set (match_dup 1)
+	(match_operand 3 "arith_reg_operand" "r,r"))]
+  "TARGET_SH2A && TARGET_BITOPS
+   && satisfies_constraint_Psz (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])"
+  [(set (match_dup 1)
+        (and:QI (match_dup 1)
+                (match_dup 2)))]
+  "")
+
+;; This matches cases where a stack pointer increment at the start of the
+;; epilogue combines with a stack slot read loading the return value.
+
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(mem:SI (match_operand:SI 1 "arith_reg_operand" "")))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "TARGET_SH1 && REGNO (operands[1]) != REGNO (operands[0])"
+  "mov.l	@%1+,%0")
+
+;; See the comment on the dt combiner pattern above.
+
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (reg:SI T_REG)
+	(eq:SI (match_dup 0)
+	       (const_int 0)))]
+  "TARGET_SH2"
+  "dt	%0")
+
+;; These convert sequences such as `mov #k,r0; add r15,r0; mov.l @r0,rn'
+;; to `mov #k,r0; mov.l @(r0,r15),rn'.  These sequences are generated by
+;; reload when the constant is too large for a reg+offset address.
+
+;; ??? We would get much better code if this was done in reload.  This would
+;; require modifying find_reloads_address to recognize that if the constant
+;; is out-of-range for an immediate add, then we get better code by reloading
+;; the constant into a register than by reloading the sum into a register,
+;; since the former is one instruction shorter if the address does not need
+;; to be offsettable.  Unfortunately this does not work, because there is
+;; only one register, r0, that can be used as an index register.  This register
+;; is also the function return value register.  So, if we try to force reload
+;; to use double-reg addresses, then we end up with some instructions that
+;; need to use r0 twice.  The only way to fix this is to change the calling
+;; convention so that r0 is not used to return values.
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 0))
+	(match_operand:SI 2 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.l	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SI 2 "general_movdst_operand" "")
+	(mem:SI (match_dup 0)))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.l	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:HI (match_dup 0))
+	(match_operand:HI 2 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.w	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:HI 2 "general_movdst_operand" "")
+	(mem:HI (match_dup 0)))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.w	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QI (match_dup 0))
+	(match_operand:QI 2 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.b	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:QI 2 "general_movdst_operand" "")
+	(mem:QI (match_dup 0)))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0 && reg_unused_after (operands[0], insn)"
+  "mov.b	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2]) && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2]) && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "TARGET_SH2E && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2])
+        && FP_OR_XD_REGISTER_P (REGNO (operands[2])))
+       || (GET_CODE (operands[2]) == SUBREG
+	   && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2])))))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "TARGET_SH2E && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2])
+	&& FP_OR_XD_REGISTER_P (REGNO (operands[2])))
+       || (GET_CODE (operands[2]) == SUBREG
+	   && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2])))))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	@(%0,%1),%2")
+
+;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF).  */
+(define_insn "sp_switch_1"
+  [(const_int 1) (match_operand:SI 0 "symbol_ref_operand" "s")]
+  "TARGET_SH1"
+  "*
+{
+  output_asm_insn (\"mov.l r0,@-r15\;mov.l %0,r0\", operands);
+  output_asm_insn (\"mov.l @r0,r0\;mov.l r15,@-r0\", operands);
+  return \"mov r0,r15\";
+}"
+  [(set_attr "length" "10")])
+
+;; Switch back to the original stack for interrupt functions with the
+;; sp_switch attribute.  */
+(define_insn "sp_switch_2"
+  [(const_int 2)]
+  "TARGET_SH1"
+  "mov.l @r15+,r15\;mov.l @r15+,r0"
+  [(set_attr "length" "4")])
+
+;; Integer vector moves
+
+(define_expand "movv8qi"
+  [(set (match_operand:V8QI 0 "general_movdst_operand" "")
+	(match_operand:V8QI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+  "{ if (prepare_move_operands (operands, V8QImode)) DONE; }")
+
+(define_insn "movv8qi_i"
+  [(set (match_operand:V8QI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V8QI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V8QImode)
+       || sh_register_operand (operands[1], V8QImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")])
+
+(define_split
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "")
+	(subreg:V8QI (const_int 0) 0))]
+  "TARGET_SHMEDIA"
+  [(set (match_dup 0)
+	(const_vector:V8QI [(const_int 0) (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))])
+
+(define_split
+  [(set (match_operand 0 "arith_reg_dest" "")
+	(match_operand 1 "sh_rep_vec" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && sh_vector_mode_supported_p (GET_MODE (operands[0]))
+   && GET_MODE_SIZE (GET_MODE (operands[0])) == 8
+   && (XVECEXP (operands[1], 0, 0) != const0_rtx
+       || XVECEXP (operands[1], 0, 1) != const0_rtx)
+   && (XVECEXP (operands[1], 0, 0) != constm1_rtx
+       || XVECEXP (operands[1], 0, 1) != constm1_rtx)"
+  [(set (match_dup 0) (match_dup 1))
+   (match_dup 2)]
+  "
+{
+  int unit_size = GET_MODE_UNIT_SIZE (GET_MODE (operands[1]));
+  rtx elt1 = XVECEXP (operands[1], 0, 1);
+
+  if (unit_size > 2)
+    operands[2] = gen_mshflo_l (operands[0], operands[0], operands[0]);
+  else
+    {
+      if (unit_size < 2)
+	operands[0] = gen_rtx_REG (V4HImode, true_regnum (operands[0]));
+      operands[2] = gen_mperm_w0 (operands[0], operands[0]);
+    }
+  operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+  operands[1] = XVECEXP (operands[1], 0, 0);
+  if (unit_size < 2)
+    {
+      if (CONST_INT_P (operands[1]) && CONST_INT_P (elt1))
+	operands[1]
+	  = GEN_INT (TARGET_LITTLE_ENDIAN
+		     ? (INTVAL (operands[1]) & 0xff) + (INTVAL (elt1) << 8)
+		     : (INTVAL (operands[1]) << 8) + (INTVAL (elt1) & 0xff));
+      else
+	{
+	  operands[0] = gen_rtx_REG (V2QImode, true_regnum (operands[0]));
+	  operands[1]
+	    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, operands[1], elt1));
+	}
+    }
+}")
+
+(define_split
+  [(set (match_operand 0 "arith_reg_dest" "")
+	(match_operand 1 "sh_const_vec" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && sh_vector_mode_supported_p (GET_MODE (operands[0]))"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  rtx v = operands[1];
+  enum machine_mode new_mode
+    = mode_for_size (GET_MODE_BITSIZE (GET_MODE (v)), MODE_INT, 0);
+
+  operands[0] = gen_rtx_REG (new_mode, true_regnum (operands[0]));
+  operands[1]
+    = simplify_subreg (new_mode, operands[1], GET_MODE (operands[1]), 0);
+}")
+
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "general_movdst_operand" "")
+	(match_operand:V2HI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+  "{ if (prepare_move_operands (operands, V2HImode)) DONE; }")
+
+(define_insn "movv2hi_i"
+  [(set (match_operand:V2HI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V2HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V2HImode)
+       || sh_register_operand (operands[1], V2HImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set (attr "highpart")
+	(cond [(ne (symbol_ref "sh_contains_memref_p (insn)") (const_int 0))
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "general_movdst_operand" "")
+	(match_operand:V4HI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+  "{ if (prepare_move_operands (operands, V4HImode)) DONE; }")
+
+(define_insn "movv4hi_i"
+  [(set (match_operand:V4HI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V4HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V4HImode)
+       || sh_register_operand (operands[1], V4HImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set_attr "highpart" "depend")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_movdst_operand" "")
+	(match_operand:V2SI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+  "{ if (prepare_move_operands (operands, V2SImode)) DONE; }")
+
+(define_insn "movv2si_i"
+  [(set (match_operand:V2SI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V2SI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V2SImode)
+       || sh_register_operand (operands[1], V2SImode))"
+  "@
+	add	%1, r63, %0
+	#
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set_attr "highpart" "depend")])
+
+;; Multimedia Intrinsics
+
+(define_insn "absv2si2"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(abs:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mabs.l	%1, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "absv4hi2"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(abs:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mabs.w	%1, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r")
+		   (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madd.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r")
+		   (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madd.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn_and_split "addv2hi3"
+  [(set (match_operand:V2HI 0 "arith_reg_dest" "=r")
+	(plus:V2HI (match_operand:V2HI 1 "extend_reg_operand" "%r")
+		   (match_operand:V2HI 2 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "#"
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+  "
+{
+  rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0);
+  rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0);
+  rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0);
+  rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0);
+  rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0);
+
+  emit_insn (gen_addv4hi3 (v4hi_dst, src0, src1));
+  emit_insn (gen_truncdisi2 (si_dst, di_dst));
+  DONE;
+}"
+  [(set_attr "highpart" "must_split")])
+
+(define_insn "ssaddv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r")
+		      (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.l	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(us_plus:V8QI (match_operand:V8QI 1 "arith_reg_operand" "%r")
+		      (match_operand:V8QI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.ub	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r")
+		      (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.w	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv8qi"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(neg:V8QI (eq:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ")
+			   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.b	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv2si"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(neg:V2SI (eq:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ")
+			   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.l	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv4hi"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(neg:V4HI (eq:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ")
+			   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.w	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtuv8qi"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(neg:V8QI (gtu:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ")
+			    (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.ub	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtv2si"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(neg:V2SI (gt:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ")
+			   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.l	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtv4hi"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(neg:V4HI (gt:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ")
+			   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.w	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mcmv"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(match_operand:DI 2 "arith_reg_operand" "r"))
+		(and:DI (match_operand:DI 3 "arith_reg_operand" "0")
+			(not:DI (match_dup 2)))))]
+  "TARGET_SHMEDIA"
+  "mcmv	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mcnvs_lw"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_concat:V4HI
+	 (ss_truncate:V2HI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ"))
+	 (ss_truncate:V2HI (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.lw	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mcnvs_wb"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_concat:V8QI
+	 (ss_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ"))
+	 (ss_truncate:V4QI (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.wb	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mcnvs_wub"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_concat:V8QI
+	 (us_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ"))
+	 (us_truncate:V4QI (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.wub	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mextr_rl"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			     (match_operand:HI 3 "mextr_bit_offset" "i"))
+	       (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:HI 4 "mextr_bit_offset" "i"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+  "*
+{
+  static char templ[21];
+
+  sprintf (templ, \"mextr%d\\t%%N1, %%N2, %%0\",
+	   (int) INTVAL (operands[3]) >> 3);
+  return templ;
+}"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*mextr_lr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (match_operand:HI 3 "mextr_bit_offset" "i"))
+	       (lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			    (match_operand:HI 4 "mextr_bit_offset" "i"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+  "*
+{
+  static char templ[21];
+
+  sprintf (templ, \"mextr%d\\t%%N2, %%N1, %%0\",
+	   (int) INTVAL (operands[4]) >> 3);
+  return templ;
+}"
+  [(set_attr "type" "arith_media")])
+
+; mextrN can be modelled with vec_select / vec_concat, but the selection
+; vector then varies depending on endianness.
+(define_expand "mextr1"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (1 * 8), GEN_INT (7 * 8)));
+  DONE;
+}")
+
+(define_expand "mextr2"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (2 * 8), GEN_INT (6 * 8)));
+  DONE;
+}")
+
+(define_expand "mextr3"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (3 * 8), GEN_INT (5 * 8)));
+  DONE;
+}")
+
+(define_expand "mextr4"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (4 * 8), GEN_INT (4 * 8)));
+  DONE;
+}")
+
+(define_expand "mextr5"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (5 * 8), GEN_INT (3 * 8)));
+  DONE;
+}")
+
+(define_expand "mextr6"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (6 * 8), GEN_INT (2 * 8)));
+  DONE;
+}")
+
+(define_expand "mextr7"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (7 * 8), GEN_INT (1 * 8)));
+  DONE;
+}")
+
+(define_expand "mmacfx_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2HI 1 "extend_reg_operand" "")
+   (match_operand:V2HI 2 "extend_reg_operand" "")
+   (match_operand:V2SI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mmacfx_wl_i (operands[0], operands[3],
+			      operands[1], operands[2]));
+  DONE;
+}")
+
+;; This could be highpart ignore if it only had inputs 2 or 3, but input 1
+;; is depend
+(define_insn "mmacfx_wl_i"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_plus:V2SI
+	 (match_operand:V2SI 1 "arith_reg_operand" "0")
+	 (ss_truncate:V2SI
+	  (ashift:V2DI
+	   (sign_extend:V2DI
+	    (mult:V2SI
+	     (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r"))
+	     (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r"))))
+	   (const_int 1)))))]
+  "TARGET_SHMEDIA"
+  "mmacfx.wl	%2, %3, %0"
+  [(set_attr "type" "mac_media")
+   (set_attr "highpart" "depend")])
+
+(define_expand "mmacnfx_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2HI 1 "extend_reg_operand" "")
+   (match_operand:V2HI 2 "extend_reg_operand" "")
+   (match_operand:V2SI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mmacnfx_wl_i (operands[0], operands[3],
+			       operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mmacnfx_wl_i"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_minus:V2SI
+	 (match_operand:V2SI 1 "arith_reg_operand" "0")
+	 (ss_truncate:V2SI
+	  (ashift:V2DI
+	   (sign_extend:V2DI
+	    (mult:V2SI
+	     (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r"))
+	     (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r"))))
+	   (const_int 1)))))]
+  "TARGET_SHMEDIA"
+  "mmacnfx.wl	%2, %3, %0"
+  [(set_attr "type" "mac_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mulv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(mult:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		   (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mmul.l	%1, %2, %0"
+  [(set_attr "type" "d2mpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(mult:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		   (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mmul.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfx_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V2SI
+	 (ashiftrt:V2DI
+	  (mult:V2DI
+	   (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r"))
+	   (sign_extend:V2DI (match_operand:V2SI 2 "arith_reg_operand" "r")))
+	  (const_int 31))))]
+  "TARGET_SHMEDIA"
+  "mmulfx.l	%1, %2, %0"
+  [(set_attr "type" "d2mpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfx_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashiftrt:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	   (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	  (const_int 15))))]
+  "TARGET_SHMEDIA"
+  "mmulfx.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfxrp_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashiftrt:V4SI
+	  (plus:V4SI
+	   (mult:V4SI
+	    (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	    (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	   (const_int 16384))
+	  (const_int 15))))]
+  "TARGET_SHMEDIA"
+  "mmulfxrp.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+
+(define_expand "mmulhi_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul23_wl : gen_mmul01_wl)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "mmullo_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul01_wl : gen_mmul23_wl)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mmul23_wl"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (mult:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	 (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mmulhi.wl	%1, %2, %0\"
+	     : \"mmullo.wl	%1, %2, %0\");"
+  [(set_attr "type" "dmpy_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mmul01_wl"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (mult:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	 (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mmullo.wl	%1, %2, %0\"
+	     : \"mmulhi.wl	%1, %2, %0\");"
+  [(set_attr "type" "dmpy_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+
+(define_expand "mmulsum_wq"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")
+   (match_operand:DI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_mmulsum_wq_i (operands[0], operands[3],
+			       operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mmulsum_wq_i"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+	 (plus:DI
+	  (plus:DI
+	   (vec_select:DI
+	    (mult:V4DI
+	     (sign_extend:V4DI (match_operand:V4HI 2 "arith_reg_operand" "r"))
+	     (sign_extend:V4DI (match_operand:V4HI 3 "arith_reg_operand" "r")))
+	    (parallel [(const_int 0)]))
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 1)])))
+	  (plus:DI
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 2)]))
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 3)]))))))]
+  "TARGET_SHMEDIA"
+  "mmulsum.wq	%2, %3, %0"
+  [(set_attr "type" "mac_media")])
+
+(define_expand "mperm_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "=r")
+   (match_operand:V4HI 1 "arith_reg_operand" "r")
+   (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mperm_w_little : gen_mperm_w_big)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+; This use of vec_select isn't exactly correct according to rtl.texi
+; (because not constant), but it seems a straightforward extension.
+(define_insn "mperm_w_little"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (match_operand:V4HI 1 "arith_reg_operand" "r")
+	 (parallel
+	  [(zero_extract:QI (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")
+			    (const_int 2) (const_int 0))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 2))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 4))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 6))])))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  "mperm.w	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "mperm_w_big"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (match_operand:V4HI 1 "arith_reg_operand" "r")
+	 (parallel
+	  [(zero_extract:QI (not:QI (match_operand:QI 2
+				     "extend_reg_or_0_operand" "rZ"))
+			    (const_int 2) (const_int 0))
+	   (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 2))
+	   (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 4))
+	   (zero_extract:QI (not:QI (match_dup 2))
+			    (const_int 2) (const_int 6))])))]
+  "TARGET_SHMEDIA && ! TARGET_LITTLE_ENDIAN"
+  "mperm.w	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "mperm_w0"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_duplicate:V4HI (truncate:HI (match_operand 1
+					  "trunc_hi_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "mperm.w	%1, r63, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "msad_ubq"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "")
+   (match_operand:DI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn (gen_msad_ubq_i (operands[0], operands[3],
+			     operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "msad_ubq_i"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(plus:DI
+	 (plus:DI
+	  (plus:DI
+	   (plus:DI
+	    (match_operand:DI 1 "arith_reg_operand" "0")
+	    (abs:DI (vec_select:DI
+		     (minus:V8DI
+		      (zero_extend:V8DI
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+		      (zero_extend:V8DI
+		       (match_operand:V8QI 3 "arith_reg_or_0_operand" "rZ")))
+		     (parallel [(const_int 0)]))))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 1)]))))
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 2)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 3)])))))
+	 (plus:DI
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 4)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 5)]))))
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 6)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 7)])))))))]
+  "TARGET_SHMEDIA"
+  "msad.ubq	%N2, %N3, %0"
+  [(set_attr "type" "mac_media")])
+
+(define_insn "mshalds_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V2SI
+	 (ashift:V2DI
+	  (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r"))
+	  (and:DI (match_operand:DI 2 "arith_reg_operand" "r")
+		  (const_int 31)))))]
+  "TARGET_SHMEDIA"
+  "mshalds.l	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mshalds_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashift:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (and:DI (match_operand:DI 2 "arith_reg_operand" "r")
+		  (const_int 15)))))]
+  "TARGET_SHMEDIA"
+  "mshalds.w	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshard.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ashiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshard.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mshards_q"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:HI
+	 (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		      (match_operand:DI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mshards.q	%1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_expand "mshfhi_b"
+  [(match_operand:V8QI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_b : gen_mshf0_b)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "mshflo_b"
+  [(match_operand:V8QI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_b : gen_mshf4_b)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mshf4_b"
+  [(set
+    (match_operand:V8QI 0 "arith_reg_dest" "=r")
+    (vec_select:V8QI
+     (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+     (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13)
+		(const_int 6) (const_int 14) (const_int 7) (const_int 15)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mshfhi.b	%N1, %N2, %0\"
+	     : \"mshflo.b	%N1, %N2, %0\");"
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_b"
+  [(set
+    (match_operand:V8QI 0 "arith_reg_dest" "=r")
+    (vec_select:V8QI
+     (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+     (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9)
+		(const_int 2) (const_int 10) (const_int 3) (const_int 11)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mshflo.b	%N1, %N2, %0\"
+	     : \"mshfhi.b	%N1, %N2, %0\");"
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_expand "mshfhi_l"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_l : gen_mshf0_l)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "mshflo_l"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_l : gen_mshf4_l)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mshf4_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mshfhi.l	%N1, %N2, %0\"
+	     : \"mshflo.l	%N1, %N2, %0\");"
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mshflo.l	%N1, %N2, %0\"
+	     : \"mshfhi.l	%N1, %N2, %0\");"
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_expand "mshfhi_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_w : gen_mshf0_w)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_expand "mshflo_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+  "
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_w : gen_mshf4_w)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+(define_insn "mshf4_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mshfhi.w	%N1, %N2, %0\"
+	     : \"mshflo.w	%N1, %N2, %0\");"
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))]
+  "TARGET_SHMEDIA"
+  "* return (TARGET_LITTLE_ENDIAN
+	     ? \"mshflo.w	%N1, %N2, %0\"
+	     : \"mshfhi.w	%N1, %N2, %0\");"
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshflo_w_x"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V4HI (match_operand:V2HI 1 "extend_reg_or_0_operand" "rZ")
+			  (match_operand:V2HI 2 "extend_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 2) (const_int 0) (const_int 3) (const_int 1)])))]
+  "TARGET_SHMEDIA"
+  "mshflo.w	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+/* These are useful to expand ANDs and as combiner patterns.  */
+(define_insn_and_split "mshfhi_l_di"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,f")
+	(ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ,f")
+                             (const_int 32))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ,?f")
+			(const_int -4294967296))))]
+  "TARGET_SHMEDIA"
+  "@
+	mshfhi.l	%N1, %N2, %0
+	#"
+  "TARGET_SHMEDIA && reload_completed
+   && ! GENERAL_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+  "
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_highpart (SImode, operands[1]);
+  operands[5] = gen_highpart (SImode, operands[0]);
+  operands[6] = gen_highpart (SImode, operands[2]);
+}"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*mshfhi_l_di_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(const_int -4294967296))
+		(lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+                             (const_int 32))))]
+  "TARGET_SHMEDIA"
+  "mshfhi.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ior:DI (zero_extend:DI (match_operand:SI 1
+					      "extend_reg_or_0_operand" ""))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "")
+			(const_int -4294967296))))
+   (clobber (match_operand:DI 3 "arith_reg_dest" ""))]
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+  "
+{
+  emit_insn (gen_ashldi3_media (operands[3],
+				simplify_gen_subreg (DImode, operands[1],
+						     SImode, 0),
+				GEN_INT (32)));
+  emit_insn (gen_mshfhi_l_di (operands[0], operands[3], operands[2]));
+  DONE;
+}")
+
+(define_insn "mshflo_l_di"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(const_int 4294967295))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+                           (const_int 32))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+                           (const_int 32))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			(const_int 4294967295))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; Combiner pattern for trampoline initialization.
+(define_insn_and_split "*double_shori"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+                           (const_int 32))
+		(match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA
+   && ! (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xffffffffUL)"
+  "#"
+  "rtx_equal_p (operands[0], operands[1])"
+  [(const_int 0)]
+  "
+{
+  HOST_WIDE_INT v = INTVAL (operands[2]);
+
+  emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v >> 16)));
+  emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v & 65535)));
+  DONE;
+}"
+  [(set_attr "highpart" "ignore")])
+
+
+(define_insn "*mshflo_l_di_x"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand"
+				 "rZ"))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+                           (const_int 32))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "concat_v2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=r,f,f?")
+;;	(vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,0,f")
+	(vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,f,f")
+			 (match_operand:SF 2 "register_operand" "rZ,f,f")))]
+
+  "TARGET_SHMEDIA"
+  "@
+	mshflo.l	%N1, %N2, %0
+	#
+	#"
+  "TARGET_SHMEDIA && reload_completed
+   && ! GENERAL_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (match_dup 2))]
+  "
+{
+  operands[3] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0);
+  operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 4);
+}"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_x_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+                           (const_int 32))
+		(zero_extend:DI (match_operand:SI 2 "extend_reg_or_0_operand" "rZ"))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "ashlv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ashift:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlld.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_split
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operator 3 "shift_operator"
+	  [(match_operand 1 "any_register_operand" "")
+	   (match_operand 2 "shift_count_reg_operand" "")]))]
+  "TARGET_SHMEDIA && ! register_operand (operands[2], VOIDmode)"
+  [(set (match_dup 0) (match_dup 3))]
+  "
+{
+  rtx count = operands[2];
+  enum machine_mode outer_mode = GET_MODE (operands[2]), inner_mode;
+
+  while (GET_CODE (count) == ZERO_EXTEND || GET_CODE (count) == SIGN_EXTEND
+	 || (GET_CODE (count) == SUBREG && SUBREG_BYTE (count) == 0)
+	 || GET_CODE (count) == TRUNCATE)
+    count = XEXP (count, 0);
+  inner_mode = GET_MODE (count);
+  count = simplify_gen_subreg (outer_mode, count, inner_mode,
+			       subreg_lowpart_offset (outer_mode, inner_mode));
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+				operands[1], count);
+}")
+
+(define_insn "ashlv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ashift:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlld.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "lshrv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlrd.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(lshiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlrd.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+		    (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msub.l	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+		    (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msub.w	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn_and_split "subv2hi3"
+  [(set (match_operand:V2HI 0 "arith_reg_dest" "=r")
+	(minus:V2HI (match_operand:V2HI 1 "arith_reg_or_0_operand" "rZ")
+		   (match_operand:V2HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "#"
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+  "
+{
+  rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0);
+  rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0);
+  rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0);
+  rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0);
+  rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0);
+
+  emit_insn (gen_subv4hi3 (v4hi_dst, src0, src1));
+  emit_insn (gen_truncdisi2 (si_dst, di_dst));
+  DONE;
+}"
+  [(set_attr "highpart" "must_split")])
+
+(define_insn "sssubv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.l	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(us_minus:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.ub	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.w	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+;; Floating Point Intrinsics
+
+(define_insn "fcosa_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FCOSA))]
+  "TARGET_SHMEDIA"
+  "fcosa.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "fsina_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FSINA))]
+  "TARGET_SHMEDIA"
+  "fsina.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "fipr"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (plus:SF (vec_select:SF (mult:V4SF (match_operand:V4SF 1
+						    "fp_arith_reg_operand" "f")
+						   (match_operand:V4SF 2
+						    "fp_arith_reg_operand" "f"))
+					 (parallel [(const_int 0)]))
+			  (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 1)])))
+		 (plus:SF (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 2)]))
+			  (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 3)])))))]
+  "TARGET_SHMEDIA"
+  "fipr.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "fsrra_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FSRRA))]
+  "TARGET_SHMEDIA"
+  "fsrra.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "ftrv"
+  [(set (match_operand:V4SF 0 "fp_arith_reg_operand" "=f")
+	(plus:V4SF
+	 (plus:V4SF
+	  (mult:V4SF
+	   (vec_select:V4SF (match_operand:V16SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0) (const_int 5)
+				       (const_int 10) (const_int 15)]))
+	   (match_operand:V4SF 2 "fp_arith_reg_operand" "f"))
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 4) (const_int 9)
+				       (const_int 14) (const_int 3)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 1) (const_int 2)
+				       (const_int 3) (const_int 0)]))))
+	 (plus:V4SF
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 8) (const_int 13)
+				       (const_int 2) (const_int 7)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 2) (const_int 3)
+				       (const_int 0) (const_int 1)])))
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 12) (const_int 1)
+				       (const_int 6) (const_int 11)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 3) (const_int 0)
+				       (const_int 1) (const_int 2)]))))))]
+  "TARGET_SHMEDIA"
+  "ftrv.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "ldhi_l"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 3)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32"
+  "ldhi.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldhi_q"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 7)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32"
+  "ldhi.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn_and_split "*ldhi_q_comb0"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 1
+					    "register_operand" "r")
+					   (match_operand:SI 2
+					    "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 7))
+		  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_ldhi_q (operands[0],
+			  gen_rtx_PLUS (SImode, operands[1], operands[2])));
+   DONE;")
+
+
+(define_insn_and_split "*ldhi_q_comb1"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 1
+					    "register_operand" "r")
+					   (match_operand:SI 2
+					    "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (plus:SI (match_dup 1)  (match_operand:SI 3
+						   "ua_offset" "I06"))
+			  (const_int 7))
+		  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8)
+   && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_ldhi_q (operands[0],
+			  gen_rtx_PLUS (SImode, operands[1], operands[2])));
+   DONE;")
+
+
+(define_insn "ldlo_l"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (and:SI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:SI (const_int 4) (and:SI (match_dup 1) (const_int 3)))
+	 (and:SI (match_dup 1) (const_int 3))))]
+  "TARGET_SHMEDIA32"
+  "ldlo.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_q"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7)))
+	 (and:SI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA32"
+  "ldlo.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn_and_split "*ldlo_q_comb0"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7)))
+	 (and:SI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_ldlo_q (operands[0],
+			  gen_rtx_PLUS (SImode, operands[1], operands[2])));
+   DONE;")
+
+(define_insn_and_split "*ldlo_q_comb1"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8)
+		   (and:SI (plus:SI (match_dup 1)
+				    (match_operand:SI 3 "ua_offset" "I06"))
+			   (const_int 7)))
+	 (and:SI (plus:SI (match_dup 1) (match_dup 3)) (const_int 7))))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8)
+   && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_ldlo_q (operands[0],
+			  gen_rtx_PLUS (SImode, operands[1], operands[2])));
+   DONE;")
+
+(define_insn "sthi_l"
+  [(set (zero_extract:SI
+	 (mem:SI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 3)) (const_int 1))
+	 (const_int 0))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "sthi.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+;; All unaligned stores are considered to be 'narrow' because they typically
+;; operate on less that a quadword, and when they operate on a full quadword,
+;; the vanilla store high / store low sequence will cause a stall if not
+;; scheduled apart.
+(define_insn "sthi_q"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "sthi.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn_and_split "*sthi_q_comb0"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 0
+					    "register_operand" "r")
+					   (match_operand:SI 1 "ua_offset"
+					    "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 2 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			  operands[2]));
+   DONE;")
+
+(define_insn_and_split "*sthi_q_comb1"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI (match_operand:SI 0
+					    "register_operand" "r")
+					   (match_operand:SI 1 "ua_offset"
+					    "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (plus:SI (match_dup 0)
+				   (match_operand:SI 2 "ua_offset" "I06"))
+			  (const_int 7))
+		  (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 3 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & -8)
+   && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			  operands[3]));
+   DONE;")
+
+;; This is highpart user because the address is used as full 64 bit.
+(define_insn "stlo_l"
+  [(set (zero_extract:SI
+	 (mem:SI (and:SI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:SI (const_int 4) (and:SI (match_dup 0) (const_int 3)))
+	 (and:SI (match_dup 0) (const_int 3)))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "stlo.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_q"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7)))
+	 (and:SI (match_dup 0) (const_int 7)))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "stlo.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn_and_split "*stlo_q_comb0"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7)))
+	 (and:SI (match_dup 0) (const_int 7)))
+	(match_operand:DI 2 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			  operands[2]));
+   DONE;")
+
+(define_insn_and_split "*stlo_q_comb1"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (plus:SI (match_dup 0)
+						  (match_operand:SI 2
+						   "ua_offset" "I06"))
+					 (const_int 7)))
+	 (and:SI (plus:SI (match_dup 0) (match_dup 2)) (const_int 7)))
+	(match_operand:DI 3 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])"
+  "#"
+  ""
+  [(pc)]
+  "emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			  operands[3]));
+   DONE;")
+
+(define_insn "ldhi_l64"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:DI (and:DI (match_dup 1) (const_int 3)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA64"
+  "ldhi.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldhi_q64"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:DI (and:DI (match_dup 1) (const_int 7)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA64"
+  "ldhi.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_l64"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (and:DI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:DI (const_int 4) (and:DI (match_dup 1) (const_int 3)))
+	 (and:DI (match_dup 1) (const_int 3))))]
+  "TARGET_SHMEDIA64"
+  "ldlo.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_q64"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:DI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:DI (const_int 8) (and:DI (match_dup 1) (const_int 7)))
+	 (and:DI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA64"
+  "ldlo.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "sthi_l64"
+  [(set (zero_extract:SI
+	 (mem:SI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:DI (and:DI (match_dup 0) (const_int 3)) (const_int 1))
+	 (const_int 0))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "sthi.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "sthi_q64"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:DI (and:DI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "sthi.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_l64"
+  [(set (zero_extract:SI
+	 (mem:SI (and:DI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:DI (const_int 4) (and:DI (match_dup 0) (const_int 3)))
+	 (and:DI (match_dup 0) (const_int 3)))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "stlo.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_q64"
+  [(set (zero_extract:DI
+	 (mem:DI (and:DI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:DI (const_int 8) (and:DI (match_dup 0) (const_int 7)))
+	 (and:DI (match_dup 0) (const_int 7)))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "stlo.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "nsb"
+  [(set (match_operand:QI 0 "arith_reg_dest" "=r")
+	(unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		   UNSPEC_NSB))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "nsbsi"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI
+	 (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		    UNSPEC_NSB)))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "nsbdi"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI
+	 (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		    UNSPEC_NSB)))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "ffsdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ffs:DI (match_operand:DI 1 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA"
+  "
+{
+  rtx scratch = gen_reg_rtx (DImode);
+  rtx last;
+
+  emit_insn (gen_adddi3 (scratch, operands[1], constm1_rtx));
+  emit_insn (gen_xordi3 (scratch, operands[1], scratch));
+  emit_insn (gen_lshrdi3_media (scratch, scratch, const1_rtx));
+  emit_insn (gen_nsbdi (scratch, scratch));
+  emit_insn (gen_adddi3 (scratch, scratch, GEN_INT (-64)));
+  emit_insn (gen_movdicc_false (scratch, operands[1], const0_rtx, scratch));
+  last = emit_insn (gen_subdi3 (operands[0], const0_rtx, scratch));
+  set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (DImode, operands[0]));
+
+  DONE;
+}")
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ffs:SI (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA"
+  "
+{
+  rtx scratch = gen_reg_rtx (SImode);
+  rtx discratch = gen_reg_rtx (DImode);
+  rtx last;
+
+  emit_insn (gen_adddi3 (discratch,
+			 simplify_gen_subreg (DImode, operands[1], SImode, 0),
+			 constm1_rtx));
+  emit_insn (gen_andcdi3 (discratch,
+			  simplify_gen_subreg (DImode, operands[1], SImode, 0),
+			  discratch));
+  emit_insn (gen_nsbsi (scratch, discratch));
+  last = emit_insn (gen_subsi3 (operands[0],
+				force_reg (SImode, GEN_INT (63)), scratch));
+  set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (SImode, operands[0]));
+
+  DONE;
+}")
+
+(define_insn "byterev"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_select:V8QI (match_operand:V8QI 1 "arith_reg_operand" "r")
+			 (parallel [(const_int 7) (const_int 6) (const_int 5)
+				    (const_int 4) (const_int 3) (const_int 2)
+				    (const_int 1) (const_int 0)])))]
+  "TARGET_SHMEDIA"
+  "byterev	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*prefetch_media"
+  [(prefetch (match_operand:QI 0 "address_operand" "p")
+             (match_operand:SI 1 "const_int_operand" "n")
+             (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_SHMEDIA"
+  "*
+{
+  operands[0] = gen_rtx_MEM (QImode, operands[0]);
+  output_asm_insn (\"ld%M0.b    %m0,r63\", operands);
+  return \"\";
+}"
+  [(set_attr "type" "other")])
+
+(define_insn "*prefetch_i4"
+  [(prefetch (match_operand:SI 0 "register_operand" "r")
+             (match_operand:SI 1 "const_int_operand" "n")
+             (match_operand:SI 2 "const_int_operand" "n"))]
+  "(TARGET_HARD_SH4 || TARGET_SHCOMPACT) && !TARGET_VXWORKS_RTP"
+  "*
+{
+  return \"pref @%0\";
+}"
+  [(set_attr "type" "other")])
+
+;; In user mode, the "pref" instruction will raise a RADDERR exception
+;; for accesses to [0x80000000,0xffffffff].  This makes it an unsuitable
+;; implementation of __builtin_prefetch for VxWorks RTPs.
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "p")
+             (match_operand:SI 1 "const_int_operand" "n")
+             (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_SH2A || ((TARGET_HARD_SH4 || TARGET_SH5)
+   && (TARGET_SHMEDIA || !TARGET_VXWORKS_RTP))"
+  "
+{
+  if (GET_MODE (operands[0]) != Pmode
+      || !CONST_INT_P (operands[1])
+      || !CONST_INT_P (operands[2]))
+    FAIL;
+  if (! TARGET_SHMEDIA)
+    operands[0] = force_reg (Pmode, operands[0]);
+}")
+
+(define_insn "prefetch_m2a"
+  [(prefetch (match_operand:SI 0 "register_operand" "r")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_SH2A"
+  "pref\\t@%0"
+  [(set_attr "type" "other")])
+
+(define_insn "alloco_i"
+  [(set (mem:BLK (match_operand:QI 0 "cache_address_operand" "p"))
+	(unspec:BLK [(const_int 0)] UNSPEC_ALLOCO))]
+  "TARGET_SHMEDIA32"
+  "*
+{
+  rtx xops[2];
+
+  if (GET_CODE (operands[0]) == PLUS)
+    {
+      xops[0] = XEXP (operands[0], 0);
+      xops[1] = XEXP (operands[0], 1);
+    }
+  else
+    {
+      xops[0] = operands[0];
+      xops[1] = const0_rtx;
+    }
+  output_asm_insn (\"alloco   %0, %1\", xops);
+  return \"\";
+}"
+  [(set_attr "type" "other")])
+
+(define_split
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operand 1 "" ""))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "
+{
+  int n_changes = 0;
+
+  for_each_rtx (&operands[1], shmedia_cleanup_truncate, &n_changes);
+  if (!n_changes)
+    FAIL;
+}")
+
+; Stack Protector Patterns
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "memory_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA64)
+	emit_insn (gen_stack_protect_set_di_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_stack_protect_set_si_media (operands[0], operands[1]));
+    }
+  else
+    emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "!TARGET_SHMEDIA"
+  "mov.l\t%1, %2\;mov.l\t%2, %0\;mov\t#0, %2"
+  [(set_attr "type" "other")
+   (set_attr "length" "6")])
+
+(define_insn "stack_protect_set_si_media"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA"
+  "ld%M1.l\t%m1, %2\;st%M0.l\t%m0, %2\;movi\t0, %2"
+  [(set_attr "type" "other")
+   (set_attr "length" "12")])
+
+(define_insn "stack_protect_set_di_media"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA64"
+  "ld%M1.q\t%m1, %2\;st%M0.q\t%m0, %2\;movi\t0, %2"
+  [(set_attr "type" "other")
+   (set_attr "length" "12")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
+      rtx test;
+
+      test = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+      if (TARGET_SHMEDIA64)
+	{
+	  emit_insn (gen_stack_protect_test_di_media (tmp, operands[0],
+						      operands[1]));
+          emit_jump_insn (gen_cbranchdi4 (test, tmp, const0_rtx, operands[2]));
+	}
+      else
+	{
+	  emit_insn (gen_stack_protect_test_si_media (tmp, operands[0],
+						      operands[1]));
+          emit_jump_insn (gen_cbranchsi4 (test, tmp, const0_rtx, operands[2]));
+	}
+    }
+  else
+    {
+      emit_insn (gen_stack_protect_test_si (operands[0], operands[1]));
+      emit_jump_insn (gen_branch_true (operands[2]));
+    }
+
+  DONE;
+})
+
+(define_insn "stack_protect_test_si"
+  [(set (reg:SI T_REG)
+        (unspec:SI [(match_operand:SI 0 "memory_operand" "m")
+		    (match_operand:SI 1 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:SI 2 "=&r") (const_int 0))
+  (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  "!TARGET_SHMEDIA"
+  "mov.l\t%0, %2\;mov.l\t%1, %3\;cmp/eq\t%2, %3\;mov\t#0, %2\;mov\t#0, %3"
+  [(set_attr "type" "other")
+   (set_attr "length" "10")])
+
+(define_insn "stack_protect_test_si_media"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+		    (match_operand:SI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA"
+  "ld%M1.l\t%m1, %0\;ld%M2.l\t%m2, %3\;cmpeq\t%0, %3, %0\;movi\t0, %3"
+  [(set_attr "type" "other")
+   (set_attr "length" "16")])
+
+(define_insn "stack_protect_test_di_media"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:DI 3 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA64"
+  "ld%M1.q\t%m1, %0\;ld%M2.q\t%m2, %3\;cmpeq\t%0, %3, %0\;movi\t0, %3"
+  [(set_attr "type" "other")
+   (set_attr "length" "16")])
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
new file mode 100644
index 000000000..8464bd75f
--- /dev/null
+++ b/gcc/config/sh/sh.opt
@@ -0,0 +1,338 @@
+; Options for the SH port of the compiler.
+
+; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; Used for various architecture options.
+Mask(SH_E)
+
+;; Set if the default precision of th FPU is single.
+Mask(FPU_SINGLE)
+
+;; Set if we should generate code using type 2A insns.
+Mask(HARD_SH2A)
+
+;; Set if we should generate code using type 2A DF insns.
+Mask(HARD_SH2A_DOUBLE)
+
+;; Set if compiling for SH4 hardware (to be used for insn costs etc.)
+Mask(HARD_SH4)
+
+;; Set if we should generate code for a SH5 CPU (either ISA).
+Mask(SH5)
+
+;; Set if we should save all target registers.
+Mask(SAVE_ALL_TARGET_REGS)
+
+m1
+Target RejectNegative Mask(SH1) Condition(SUPPORT_SH1)
+Generate SH1 code
+
+m2
+Target RejectNegative Mask(SH2) Condition(SUPPORT_SH2)
+Generate SH2 code
+
+m2a
+Target RejectNegative Condition(SUPPORT_SH2A)
+Generate default double-precision SH2a-FPU code
+
+m2a-nofpu
+Target RejectNegative Condition(SUPPORT_SH2A_NOFPU)
+Generate SH2a FPU-less code
+
+m2a-single
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE)
+Generate default single-precision SH2a-FPU code
+
+m2a-single-only
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE_ONLY)
+Generate only single-precision SH2a-FPU code
+
+m2e
+Target RejectNegative Condition(SUPPORT_SH2E)
+Generate SH2e code
+
+m3
+Target RejectNegative Mask(SH3) Condition(SUPPORT_SH3)
+Generate SH3 code
+
+m3e
+Target RejectNegative Condition(SUPPORT_SH3E)
+Generate SH3e code
+
+m4
+Target RejectNegative Mask(SH4) Condition(SUPPORT_SH4)
+Generate SH4 code
+
+m4-100
+Target RejectNegative Condition(SUPPORT_SH4)
+Generate SH4-100 code
+
+m4-200
+Target RejectNegative Condition(SUPPORT_SH4)
+Generate SH4-200 code
+
+;; TARGET_SH4_300 indicates if we have the ST40-300 instruction set and
+;; pipeline - irrespective of ABI.
+m4-300
+Target RejectNegative Condition(SUPPORT_SH4) Var(TARGET_SH4_300)
+Generate SH4-300 code
+
+m4-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4 FPU-less code
+
+m4-100-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-100 FPU-less code
+
+m4-200-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-200 FPU-less code
+
+m4-300-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300)
+Generate SH4-300 FPU-less code
+
+m4-340
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300)
+Generate code for SH4 340 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
+m4-400
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate code for SH4 400 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
+m4-500
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate code for SH4 500 series (FPU-less).
+;; passes -isa=sh4-nofpu to the assembler.
+
+m4-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4 code
+
+m4-100-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4-100 code
+
+m4-200-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4-200 code
+
+m4-300-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Var(TARGET_SH4_300)
+Generate default single-precision SH4-300 code
+
+m4-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4 code
+
+m4-100-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4-100 code
+
+m4-200-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4-200 code
+
+m4-300-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Var(TARGET_SH4_300)
+Generate only single-precision SH4-300 code
+
+m4a
+Target RejectNegative Mask(SH4A) Condition(SUPPORT_SH4A)
+Generate SH4a code
+
+m4a-nofpu
+Target RejectNegative Condition(SUPPORT_SH4A_NOFPU)
+Generate SH4a FPU-less code
+
+m4a-single
+Target RejectNegative Condition(SUPPORT_SH4A_SINGLE)
+Generate default single-precision SH4a code
+
+m4a-single-only
+Target RejectNegative Condition(SUPPORT_SH4A_SINGLE_ONLY)
+Generate only single-precision SH4a code
+
+m4al
+Target RejectNegative Condition(SUPPORT_SH4AL)
+Generate SH4al-dsp code
+
+m5-32media
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA)
+Generate 32-bit SHmedia code
+
+m5-32media-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU)
+Generate 32-bit FPU-less SHmedia code
+
+m5-64media
+Target RejectNegative Condition(SUPPORT_SH5_64MEDIA)
+Generate 64-bit SHmedia code
+
+m5-64media-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_64MEDIA_NOFPU)
+Generate 64-bit FPU-less SHmedia code
+
+m5-compact
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA)
+Generate SHcompact code
+
+m5-compact-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU)
+Generate FPU-less SHcompact code
+
+maccumulate-outgoing-args
+Target Report Var(TARGET_ACCUMULATE_OUTGOING_ARGS) Init(1)
+Reserve space for outgoing arguments in the function prologue
+
+madjust-unroll
+Target Report Mask(ADJUST_UNROLL) Condition(SUPPORT_ANY_SH5)
+Throttle unrolling to avoid thrashing target registers unless the unroll benefit outweighs this
+
+mb
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Generate code in big endian mode
+
+mbigtable
+Target Report RejectNegative Mask(BIGTABLE)
+Generate 32-bit offsets in switch tables
+
+mbitops
+Target Report RejectNegative Mask(BITOPS)
+Generate bit instructions
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(sh_branch_cost) Init(-1)
+Cost to assume for a branch insn
+
+mcbranchdi
+Target Var(TARGET_CBRANCHDI4)
+Enable cbranchdi4 pattern
+
+mcmpeqdi
+Target Var(TARGET_CMPEQDI_T)
+Emit cmpeqdi_t pattern even when -mcbranchdi is in effect.
+
+mcut2-workaround
+Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND)
+Enable SH5 cut2 workaround
+
+mdalign
+Target Report RejectNegative Mask(ALIGN_DOUBLE)
+Align doubles at 64-bit boundaries
+
+mdiv=
+Target RejectNegative Joined Var(sh_div_str) Init("")
+Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table
+
+mdivsi3_libfunc=
+Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
+Specify name for 32 bit signed division function
+
+mfmovd
+Target RejectNegative Mask(FMOVD)
+Enable the use of 64-bit floating point registers in fmov instructions.  See -mdalign if 64-bit alignment is required.
+
+mfixed-range=
+Target RejectNegative Joined Var(sh_fixed_range_str)
+Specify range of registers to make fixed
+
+mfused-madd
+Target Var(TARGET_FMAC)
+Enable the use of the fused floating point multiply-accumulate operation
+
+mgettrcost=
+Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1)
+Cost to assume for gettr insn
+
+mhitachi
+Target Report RejectNegative Mask(HITACHI)
+Follow Renesas (formerly Hitachi) / SuperH calling conventions
+
+mieee
+Target Var(TARGET_IEEE)
+Increase the IEEE compliance for floating-point comparisons
+
+mindexed-addressing
+Target Report Mask(ALLOW_INDEXED_ADDRESS) Condition(SUPPORT_ANY_SH5_32MEDIA)
+Enable the use of the indexed addressing mode for SHmedia32/SHcompact
+
+minline-ic_invalidate
+Target Report Var(TARGET_INLINE_IC_INVALIDATE)
+inline code to invalidate instruction cache entries after setting up nested function trampolines
+
+minvalid-symbols
+Target Report Mask(INVALID_SYMBOLS) Condition(SUPPORT_ANY_SH5)
+Assume symbols might be invalid
+
+misize
+Target Report RejectNegative Mask(DUMPISIZE)
+Annotate assembler instructions with estimated addresses
+
+ml
+Target Report RejectNegative Mask(LITTLE_ENDIAN)
+Generate code in little endian mode
+
+mnomacsave
+Target Report RejectNegative Mask(NOMACSAVE)
+Mark MAC register as call-clobbered
+
+;; ??? This option is not useful, but is retained in case there are people
+;; who are still relying on it.  It may be deleted in the future.
+mpadstruct
+Target Report RejectNegative Mask(PADSTRUCT)
+Make structs a multiple of 4 bytes (warning: ABI altered)
+
+mprefergot
+Target Report RejectNegative Mask(PREFERGOT)
+Emit function-calls using global offset table when generating PIC
+
+mpt-fixed
+Target Report Mask(PT_FIXED) Condition(SUPPORT_ANY_SH5)
+Assume pt* instructions won't trap
+
+mrelax
+Target Report RejectNegative Mask(RELAX)
+Shorten address references during linking
+
+mrenesas
+Target Mask(HITACHI) MaskExists
+Follow Renesas (formerly Hitachi) / SuperH calling conventions
+
+mspace
+Target RejectNegative Alias(Os)
+Deprecated.  Use -Os instead
+
+multcost=
+Target RejectNegative Joined UInteger Var(sh_multcost) Init(-1)
+Cost to assume for a multiply insn
+
+musermode
+Target Report RejectNegative Mask(USERMODE)
+Don't generate privileged-mode only code; implies -mno-inline-ic_invalidate if the inline code would not work in user mode.
+
+;; We might want to enable this by default for TARGET_HARD_SH4, because
+;; zero-offset branches have zero latency.  Needs some benchmarking.
+mpretend-cmove
+Target Var(TARGET_PRETEND_CMOVE)
+Pretend a branch-around-a-move is a conditional move.
diff --git a/gcc/config/sh/sh1.md b/gcc/config/sh/sh1.md
new file mode 100644
index 000000000..970f3fc06
--- /dev/null
+++ b/gcc/config/sh/sh1.md
@@ -0,0 +1,85 @@
+;; DFA scheduling description for Renesas / SuperH SH.
+;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; SH-1 scheduling.  This is just a conversion of the old scheduling
+;; model, using define_function_unit.
+
+(define_automaton "sh1")
+(define_cpu_unit "sh1memory,sh1int,sh1mpy,sh1fp" "sh1")
+
+;; Loads have a latency of two.
+;; However, call insns can have a delay slot, so that we want one more
+;; insn to be scheduled between the load of the function address and the call.
+;; This is equivalent to a latency of three.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+(define_insn_reservation "sh1_load_si" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "load_si,pcload_si"))
+  "sh1memory*2")
+
+(define_insn_reservation "sh1_load_store" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "load,pcload,pload,mem_mac,store,fstore,pstore,mac_mem"))
+  "sh1memory*2")
+
+(define_insn_reservation "sh1_arith3" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "arith3,arith3b"))
+  "sh1int*3")
+
+(define_insn_reservation "sh1_dyn_shift" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "dyn_shift"))
+  "sh1int*2")
+
+(define_insn_reservation "sh1_int" 1
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "!arith3,arith3b,dyn_shift"))
+  "sh1int")
+
+;; ??? These are approximations.
+(define_insn_reservation "sh1_smpy" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "smpy"))
+  "sh1mpy*2")
+
+(define_insn_reservation "sh1_dmpy" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "dmpy"))
+  "sh1mpy*3")
+
+(define_insn_reservation "sh1_fp" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "fp,fpscr_toggle,fp_cmp,fmove"))
+  "sh1fp")
+
+(define_insn_reservation "sh1_fdiv" 13
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "fdiv"))
+  "sh1fp*12")
+
diff --git a/gcc/config/sh/sh4-300.md b/gcc/config/sh/sh4-300.md
new file mode 100644
index 000000000..a9fb07cac
--- /dev/null
+++ b/gcc/config/sh/sh4-300.md
@@ -0,0 +1,287 @@
+;; DFA scheduling description for ST40-300.
+;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the ST40-300 pipeline using the DFA based
+;; scheduler.
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+
+(define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+
+(define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline")
+
+;; The floating point units.
+
+(define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe")
+
+;; integer multiplier unit
+
+(define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline")
+
+;; LS unit
+
+(define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+
+(define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation  "sh4_300_issue"  "sh4_300_pipe_01|sh4_300_pipe_02")
+
+(define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02")
+
+;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; MOV RM,RN / MOV #imm8,RN / STS PR,RN
+(define_insn_reservation "sh4_300_mov" 0
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "move,movi8,prget"))
+  "sh4_300_issue")
+
+;; Fixed STS from MACL / MACH
+(define_insn_reservation "sh4_300_mac_gp" 0
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mac_gp"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; Fixed LDS to MACL / MACH
+(define_insn_reservation "sh4_300_gp_mac" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_mac"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; Instructions without specific resource requirements with latency 1.
+
+(define_insn_reservation "sh4_300_simple_arith" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mt_group,arith,dyn_shift,prset"))
+  "sh4_300_issue")
+
+;; Load and store instructions have no alignment peculiarities for the ST40-300,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of three.
+
+;; Load Store instructions.
+(define_insn_reservation "sh4_300_load" 3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "load,pcload,load_si,pcload_si,pload"))
+  "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_mac_load" 3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mem_mac"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_mul")
+
+(define_insn_reservation "sh4_300_fload" 4
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fload,pcfload"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; sh_adjust_cost describes the reduced latency of the feeding insns of a store.
+;; The latency of an auto-increment register is 1; the latency of the memory
+;; output is not actually considered here anyway.
+(define_insn_reservation "sh4_300_store" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "store,pstore"))
+  "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_fstore" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fstore"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; Fixed STS.L from MACL / MACH
+(define_insn_reservation "sh4_300_mac_store" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mac_mem"))
+  "sh4_300_issue+sh4_300_mul+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_gp_fpul" 2
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_fpul"))
+  "sh4_300_issue+sh4_300_fpt")
+
+(define_insn_reservation "sh4_300_fpul_gp" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fpul_gp"))
+  "sh4_300_issue+sh4_300_fpt")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Branch Far (JMP,RTS,BRAF)
+;; Group:	BR
+;; When displacement is 0 for BF / BT, we have effectively conditional
+;; execution of one instruction, without pipeline disruption.
+;; Otherwise, the latency depends on prediction success.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, and not paired with a preceding insn,
+;; or likely and likely not predicted, we might want to fill the delay slot.
+;; However, there appears to be no machinery to make the compiler
+;; recognize these scenarios.
+
+(define_insn_reservation "sh4_300_branch"  1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "cbranch,jump,return,jump_ind"))
+  "sh4_300_issue+sh4_300_br")
+
+;; RTE
+(define_insn_reservation "sh4_300_return_from_exp" 9
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "rte"))
+  "sh4_300_pipe_01+sh4_300_pipe_02*9")
+
+;; OCBP, OCBWB
+;; Group:	CO
+;; Latency: 	1-5
+;; Issue Rate: 	1
+
+;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2
+;; This description is likely inexact, but this pattern should not actually
+;; appear when compiling for sh4-300; we should use isbi instead.
+;; If a -mtune option is added later, we should use the icache array
+;; dispatch method instead.
+(define_insn_reservation "sh4_300_ocbwb"  3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "cwb"))
+  "all*3")
+
+;; JSR,BSR,BSRF
+;; Calls have a mandatory delay slot, which we'd like to fill with an insn
+;; that can be paired with the call itself.
+;; Scheduling runs before reorg, so we approximate this by saying that we
+;; want the call to be paired with a preceding insn.
+;; In most cases, the insn that loads the address of the call should have
+;; a nonzero latency (mov rn,rm doesn't make sense since we could use rn
+;; for the address then).  Thus, a preceding insn that can be paired with
+;; a call should be eligible for the delay slot.
+;;
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn.  But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+
+(define_insn_reservation "sh4_300_call" 16
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "call,sfunc"))
+  "sh4_300_issue+sh4_300_br,all*15")
+
+;; FMOV.S / FMOV.D
+(define_insn_reservation "sh4_300_fmov" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fmove"))
+  "sh4_300_issue+sh4_300_fpt")
+
+;; LDS to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load" 8
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_fpscr"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; LDS.L to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load_mem" 8
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type"  "mem_fpscr"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+(define_insn_reservation "multi" 2
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "smpy,dmpy"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; FPCHG, FRCHG, FSCHG
+(define_insn_reservation "fpscr_toggle"  1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fpscr_toggle"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; FCMP/EQ, FCMP/GT
+(define_insn_reservation "fp_cmp"  3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fp_cmp,dfp_cmp"))
+  "sh4_300_issue+sh4_300_fpu")
+
+;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC)
+;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC)
+(define_insn_reservation "fp_arith"  6
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv"))
+  "sh4_300_issue+sh4_300_fpu")
+
+;; Single Precision FDIV/SQRT
+(define_insn_reservation "fp_div" 19
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fdiv"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15")
+
+;; Double-precision floating-point FMUL
+(define_insn_reservation "dfp_mul" 9
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "dfp_mul"))
+  "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3")
+
+;; Double precision FDIV/SQRT
+(define_insn_reservation "dp_div" 35
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "dfdiv"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31")
+
+
+;; ??? We don't really want these for sh4-300.
+;; this pattern itself is likely to finish in 3 cycles, but also
+;; to disrupt branch prediction for taken branches for the following
+;; condbranch.
+(define_insn_reservation "sh4_300_arith3" 5
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "arith3"))
+  "sh4_300_issue,all*4")
+
+;; arith3b insns without brach redirection make use of the 0-offset 0-latency
+;; branch feature, and thus schedule the same no matter if the branch is taken
+;; or not.  If the branch is redirected, the taken branch might take longer,
+;; but then, we don't have to take the next branch.
+;; ??? should we suppress branch redirection for sh4-300 to improve branch
+;; target hit rates?
+(define_insn_reservation "arith3b" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,all")
diff --git a/gcc/config/sh/sh4.md b/gcc/config/sh/sh4.md
new file mode 100644
index 000000000..0fb4a9aec
--- /dev/null
+++ b/gcc/config/sh/sh4.md
@@ -0,0 +1,486 @@
+;; DFA scheduling description for SH4.
+;; Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the SH4 pipeline using the DFA based
+;; scheduler.  The DFA based description is better way to model a
+;; superscalar pipeline as compared to function unit reservation model.
+;; 1. The function unit based model is oriented to describe at most one
+;;    unit reservation by each insn. It is difficult to model unit reservations
+;;    in multiple pipeline units by same insn.  This can be done using DFA
+;;    based description.
+;; 2. The execution performance of DFA based scheduler does not depend on
+;;    processor complexity.
+;; 3. Writing all unit reservations for an instruction class is a more natural
+;;    description of the pipeline and makes the interface to the hazard
+;;    recognizer simpler than the old function unit based model.
+;; 4. The DFA model is richer and is a part of greater overall framework
+;;    of RCSP.
+
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+
+(define_automaton "inst_pipeline,fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+
+(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
+
+
+;; The fixed point arithmetic calculator(?? EX Unit).
+
+(define_cpu_unit  "int" "inst_pipeline")
+
+;; f1_1 and f1_2 are floating point units.Actually there is
+;; a f1 unit which can overlap with other f1 unit but
+;; not another F1 unit.It is as though there were two
+;; f1 units.
+
+(define_cpu_unit "f1_1,f1_2" "fpu_pipe")
+
+;; The floating point units (except FS - F2 always precedes it.)
+
+(define_cpu_unit "F0,F1,F2,F3" "fpu_pipe")
+
+;; This is basically the MA unit of SH4
+;; used in LOAD/STORE pipeline.
+
+(define_cpu_unit "memory" "inst_pipeline")
+
+;; However, there are LS group insns that don't use it, even ones that
+;; complete in 0 cycles.  So we use an extra unit for the issue of LS insns.
+(define_cpu_unit "load_store" "inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+
+(define_cpu_unit "pcr_addrcalc" "inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation  "issue"  "pipe_01|pipe_02")
+
+;; This is to express the locking of D stage.
+;; Note that the issue of a CO group insn also effectively locks the D stage.
+
+(define_reservation  "d_lock" "pipe_01+pipe_02")
+
+;; Every FE instruction but fipr / ftrv starts with issue and this.
+(define_reservation "F01" "F0+F1")
+
+;; This is to simplify description where F1,F2,FS
+;; are used simultaneously.
+
+(define_reservation "fpu" "F1+F2")
+
+;; This is to highlight the fact that f1
+;; cannot overlap with F1.
+
+(exclusion_set  "f1_1,f1_2" "F1")
+
+(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; Although reg moves have a latency of zero
+;; we need to highlight that they use D stage
+;; for one cycle.
+
+;; Group:	MT
+
+(define_insn_reservation "reg_mov" 0
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "move"))
+  "issue")
+
+;; Group:	LS
+
+(define_insn_reservation "freg_mov" 0
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fmove"))
+  "issue+load_store")
+
+;; We don't model all pipeline stages; we model the issue ('D') stage
+;; inasmuch as we allow only two instructions to issue simultaneously,
+;; and CO instructions prevent any simultaneous issue of another instruction.
+;; (This uses pipe_01 and pipe_02).
+;; Double issue of EX insns is prevented by using the int unit in the EX stage.
+;; Double issue of EX / BR insns is prevented by using the int unit /
+;; pcr_addrcalc unit in the EX stage.
+;; Double issue of BR / LS instructions is prevented by using the
+;; pcr_addrcalc / load_store unit in the issue cycle.
+;; Double issue of FE instructions is prevented by using F0 in the first
+;; pipeline stage after the first D stage.
+;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage
+;; (except in the cases outlined above), nor to describe the FS stage after
+;; the F2 stage.
+
+;; Other MT  group instructions(1 step operations)
+;; Group:	MT
+;; Latency: 	1
+;; Issue Rate: 	1
+
+(define_insn_reservation "mt" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mt_group"))
+  "issue")
+
+;; Fixed Point Arithmetic Instructions(1 step operations)
+;; Group:	EX
+;; Latency: 	1
+;; Issue Rate: 	1
+
+(define_insn_reservation "sh4_simple_arith" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "insn_class" "ex_group"))
+  "issue,int")
+
+;; Load and store instructions have no alignment peculiarities for the SH4,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of two.
+;; However, call insns can only paired with a preceding insn, and have
+;; a delay slot, so that we want two more insns to be scheduled between the
+;; load of the function address and the call.  This is equivalent to a
+;; latency of three.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here, which gets multiplied by 10 to yield 30.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+
+;; Load Store instructions. (MOV.[BWL]@(d,GBR)
+;; Group:	LS
+;; Latency: 	2
+;; Issue Rate: 	1
+
+(define_insn_reservation "sh4_load" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "load,pcload"))
+  "issue+load_store,nothing,memory")
+
+;; calls / sfuncs need an extra instruction for their delay slot.
+;; Moreover, estimating the latency for SImode loads as 3 will also allow
+;; adjust_cost to meaningfully bump it back up to 3 if they load the shift
+;; count of a dynamic shift.
+(define_insn_reservation "sh4_load_si" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "load_si,pcload_si"))
+  "issue+load_store,nothing,memory")
+
+;; (define_bypass 2 "sh4_load_si" "!sh4_call")
+
+;; The load latency is upped to three higher if the dependent insn does
+;; double precision computation.  We want the 'default' latency to reflect
+;; that increased latency because otherwise the insn priorities won't
+;; allow proper scheduling.
+(define_insn_reservation "sh4_fload" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fload,pcfload"))
+  "issue+load_store,nothing,memory")
+
+;; (define_bypass 2 "sh4_fload" "!")
+
+(define_insn_reservation "sh4_store" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "store,fstore"))
+  "issue+load_store,nothing,memory")
+
+(define_insn_reservation "mac_mem" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mac_mem"))
+  "d_lock,nothing,memory")
+
+;; Load Store instructions.
+;; Group:	LS
+;; Latency: 	1
+;; Issue Rate: 	1
+
+(define_insn_reservation "sh4_gp_fpul" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "gp_fpul"))
+  "issue+load_store")
+
+;; Load Store instructions.
+;; Group:	LS
+;; Latency: 	3
+;; Issue Rate: 	1
+
+(define_insn_reservation "sh4_fpul_gp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fpul_gp"))
+  "issue+load_store")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Group:	BR
+;; Latency when taken: 	2 (or 1)
+;; Issue Rate: 	1
+;; The latency is 1 when displacement is 0.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, we might want to fill the delay slot;
+;; if the branch is likely, but not very likely, should we pretend to use
+;; a resource that CO instructions use, to get a pairable delay slot insn?
+
+(define_insn_reservation "sh4_branch"  1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "cbranch,jump"))
+  "issue+pcr_addrcalc")
+
+;; Branch Far (JMP,RTS,BRAF)
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; ??? Scheduling happens before branch shortening, and hence jmp and braf
+;; can't be distinguished from bra for the "jump" pattern.
+
+(define_insn_reservation "sh4_return" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "return,jump_ind"))
+         "d_lock*2")
+
+;; RTE
+;; Group:	CO
+;; Latency: 	5
+;; Issue Rate: 	5
+;; this instruction can be executed in any of the pipelines
+;; and blocks the pipeline for next 4 stages.
+
+(define_insn_reservation "sh4_return_from_exp" 5
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "rte"))
+  "d_lock*5")
+
+;; OCBP, OCBWB
+;; Group:	CO
+;; Latency: 	1-5
+;; Issue Rate: 	1
+
+;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2
+;; ocbwb on its own would be "d_lock,nothing,memory*5"
+(define_insn_reservation "ocbwb"  6
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "cwb"))
+  "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2")
+
+;; LDS to PR,JSR
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; The SX stage is blocked for last 2 cycles.
+;; OTOH, the only time that has an effect for insns generated by the compiler
+;; is when lds to PR is followed by sts from PR - and that is highly unlikely -
+;; or when we are doing a function call - and we don't do inter-function
+;; scheduling.  For the function call case, it's really best that we end with
+;; something that models an rts.
+
+(define_insn_reservation "sh4_lds_to_pr" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "prset") )
+  "d_lock*2")
+
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn.  But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+
+(define_insn_reservation "sh4_call" 16
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "call,sfunc"))
+  "d_lock*16")
+
+;; LDS.L to PR
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; The SX unit is blocked for last 2 cycles.
+
+(define_insn_reservation "ldsmem_to_pr"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "pload"))
+  "d_lock*2")
+
+;; STS from PR
+;; Group:	CO
+;; Latency: 	2
+;; Issue Rate: 	2
+;; The SX unit in second and third cycles.
+
+(define_insn_reservation "sts_from_pr" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "prget"))
+  "d_lock*2")
+
+;; STS.L from PR
+;; Group:	CO
+;; Latency: 	2
+;; Issue Rate: 	2
+
+(define_insn_reservation "sh4_prstore_mem" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "pstore"))
+  "d_lock*2,nothing,memory")
+
+;; LDS to FPSCR
+;; Group:	CO
+;; Latency: 	4
+;; Issue Rate: 	1
+;; F1 is blocked for last three cycles.
+
+(define_insn_reservation "fpscr_load" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "gp_fpscr"))
+  "d_lock,nothing,F1*3")
+
+;; LDS.L to FPSCR
+;; Group:	CO
+;; Latency: 	1 / 4
+;; Latency to update Rn is 1 and latency to update FPSCR is 4
+;; Issue Rate: 	1
+;; F1 is blocked for last three cycles.
+
+(define_insn_reservation "fpscr_load_mem" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type"  "mem_fpscr"))
+  "d_lock,nothing,(F1+memory),F1*2")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+;; Group:	CO
+;; Latency: 	4 / 4
+;; Issue Rate: 	2
+
+(define_insn_reservation "multi" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "smpy,dmpy"))
+  "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2")
+
+;; Fixed STS from, and LDS to MACL / MACH
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	1
+
+(define_insn_reservation "sh4_mac_gp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mac_gp,gp_mac,mem_mac"))
+  "d_lock")
+
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG
+;; Group:	FE
+;; Latency: 	3/4
+;; Issue Rate: 	1
+
+(define_insn_reservation "fp_arith"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fp,fp_cmp"))
+  "issue,F01,F2")
+
+;; We don't model the resource usage of this exactly because that would
+;; introduce a bogus latency.
+(define_insn_reservation "sh4_fpscr_toggle"  1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fpscr_toggle"))
+  "issue")
+
+(define_insn_reservation "fp_arith_ftrc"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "ftrc_s"))
+  "issue,F01,F2")
+
+(define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp")
+
+;; Single Precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	12/13 (FDIV); 11/12 (FSQRT)
+;; Issue Rate: 	1
+;; We describe fdiv here; fsqrt is actually one cycle faster.
+
+(define_insn_reservation "fp_div" 12
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fdiv"))
+  "issue,F01+F3,F2+F3,F3*7,F1+F3,F2")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group:	FE
+;; Latency: 	(3,4)/5
+;; Issue Rate: 	1
+
+(define_insn_reservation "dp_float" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_conv"))
+  "issue,F01,F1+F2,F2")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group:	FE
+;; Latency: 	(7,8)/9
+;; Issue Rate: 	1
+
+(define_insn_reservation "fp_double_arith" 8
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_arith,dfp_mul"))
+  "issue,F01,F1+F2,fpu*4,F2")
+
+;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
+;; Group:	CO
+;; Latency: 	3/5
+;; Issue Rate: 	2
+
+(define_insn_reservation "fp_double_cmp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_cmp"))
+  "d_lock,(d_lock+F01),F1+F2,F2")
+
+;; Double precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	(24,25)/26
+;; Issue Rate: 	1
+
+(define_insn_reservation "dp_div" 25
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfdiv"))
+  "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2")
+
+
+;; Use the branch-not-taken case to model arith3 insns.  For the branch taken
+;; case, we'd get a d_lock instead of issue at the end.
+(define_insn_reservation "arith3" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,d_lock+pcr_addrcalc,issue")
+
+;; arith3b insns schedule the same no matter if the branch is taken or not.
+(define_insn_reservation "arith3b" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,d_lock+pcr_addrcalc")
diff --git a/gcc/config/sh/sh4a.md b/gcc/config/sh/sh4a.md
new file mode 100644
index 000000000..75f239f53
--- /dev/null
+++ b/gcc/config/sh/sh4a.md
@@ -0,0 +1,236 @@
+;; Scheduling description for Renesas SH4a
+;; Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following description models the SH4A pipeline
+;; using the DFA based scheduler.
+
+(define_automaton "sh4a")
+
+(define_cpu_unit "sh4a_ex"   "sh4a")
+(define_cpu_unit "sh4a_ls"   "sh4a")
+(define_cpu_unit "sh4a_fex"  "sh4a")
+(define_cpu_unit "sh4a_fls"  "sh4a")
+(define_cpu_unit "sh4a_mult" "sh4a")
+(define_cpu_unit "sh4a_fdiv" "sh4a")
+
+;; Decoding is done on the integer pipeline like the
+;; sh4. Define issue to be the | of the two pipelines
+;; to control how often instructions are issued.
+(define_reservation "ID_or" "sh4a_ex|sh4a_ls")
+(define_reservation "ID_and" "sh4a_ex+sh4a_ls")
+
+
+;; =======================================================
+;; Locking Descriptions
+
+;; Sh4a_Memory access on the LS pipeline.
+(define_cpu_unit "sh4a_memory" "sh4a")
+
+;; Other access on the LS pipeline.
+(define_cpu_unit "sh4a_load_store" "sh4a")
+
+;;  The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+(define_reservation "sh4a_addrcalc" "sh4a_ex")
+
+;; =======================================================
+;; Reservations
+
+;; Branch (BF,BF/S,BT,BT/S,BRA,BSR)
+;; Group: BR
+;; Latency when taken: 2
+(define_insn_reservation "sh4a_branch" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "cbranch,jump"))
+  "ID_or+sh4a_addrcalc")
+
+;; Jump (JSR,JMP,RTS)
+;; Group: BR
+;; Latency: 3
+(define_insn_reservation "sh4a_jump" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "return,jump_ind"))
+  "ID_or+sh4a_addrcalc")
+
+;; RTE
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_rte" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "rte"))
+  "ID_and*4")
+
+;; EX Group Single
+;; Group: EX
+;; Latency: 0
+(define_insn_reservation "sh4a_ex" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "insn_class" "ex_group"))
+  "sh4a_ex")
+
+;; MOVA
+;; Group: LS
+;; Latency: 1
+(define_insn_reservation "sh4a_mova" 1
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mova"))
+  "sh4a_ls+sh4a_load_store")
+
+;; MOV
+;; Group: MT
+;; Latency: 0
+;; ??? not sure if movi8 belongs here, but that's where it was
+;; effectively before.
+(define_insn_reservation "sh4a_mov" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "move,movi8,gp_mac"))
+  "ID_or")
+
+;; Load
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_load" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "load,pcload,mem_mac"))
+  "sh4a_ls+sh4a_memory")
+
+(define_insn_reservation "sh4a_load_si" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "load_si,pcload_si"))
+  "sh4a_ls+sh4a_memory")
+
+;; Store
+;; Group: LS
+;; Latency: 0
+(define_insn_reservation "sh4a_store" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "store,fstore,mac_mem"))
+  "sh4a_ls+sh4a_memory")
+
+;; CWB TYPE
+
+;; MOVUA
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_movua" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "movua"))
+  "sh4a_ls+sh4a_memory*2")
+
+;; Fixed point multiplication (single)
+;; Group: CO
+;; Latency: 2
+(define_insn_reservation "sh4a_smult" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "smpy"))
+  "ID_or+sh4a_mult")
+
+;; Fixed point multiplication (double)
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_dmult" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dmpy"))
+  "ID_or+sh4a_mult")
+
+(define_insn_reservation "sh4a_mac_gp" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mac_gp"))
+  "ID_and")
+
+;; Other MT  group instructions(1 step operations)
+;; Group:	MT
+;; Latency: 	1
+(define_insn_reservation "sh4a_mt" 1
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mt_group"))
+  "ID_or")
+
+;; Floating point reg move
+;; Group: LS
+;; Latency: 2
+(define_insn_reservation "sh4a_freg_mov" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fmove"))
+  "sh4a_ls,sh4a_fls")
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
+;; Group:	FE
+;; Latency: 	3
+(define_insn_reservation "sh4a_fp_arith"  3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fp,fp_cmp,fpscr_toggle"))
+  "ID_or,sh4a_fex")
+
+(define_insn_reservation "sh4a_fp_arith_ftrc"  3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "ftrc_s"))
+  "ID_or,sh4a_fex")
+
+;; Single-precision FDIV/FSQRT
+;; Group: FE
+;; Latency: 20
+(define_insn_reservation "sh4a_fdiv" 20
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fdiv"))
+  "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group:	FE
+;; Latency: 	3
+(define_insn_reservation "sh4a_dp_float" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfp_conv"))
+  "ID_or,sh4a_fex")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group:	FE
+;; Latency: 	5
+(define_insn_reservation "sh4a_fp_double_arith" 5
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfp_arith,dfp_mul"))
+  "ID_or,sh4a_fex*3")
+
+;; Double precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	36
+(define_insn_reservation "sh4a_dp_div" 36
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfdiv"))
+  "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2")
+
+;; FSRRA
+;; Group: FE
+;; Latency: 5
+(define_insn_reservation "sh4a_fsrra" 5
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fsrra"))
+  "ID_or,sh4a_fex")
+
+;; FSCA
+;; Group: FE
+;; Latency: 7
+(define_insn_reservation "sh4a_fsca" 7
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fsca"))
+  "ID_or,sh4a_fex*3")
diff --git a/gcc/config/sh/sh64.h b/gcc/config/sh/sh64.h
new file mode 100644
index 000000000..c954d72ca
--- /dev/null
+++ b/gcc/config/sh/sh64.h
@@ -0,0 +1,26 @@
+/* Definitions of target machine for GNU compiler for SuperH SH 5.
+   Copyright 2000, 2001, 2002, 2003, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Alexandre Oliva <aoliva@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION \
+  fputs (" (SuperH SH)", stderr);
+
+#undef SH_ELF_WCHAR_TYPE
+#define SH_ELF_WCHAR_TYPE "int"
diff --git a/gcc/config/sh/shmedia.h b/gcc/config/sh/shmedia.h
new file mode 100644
index 000000000..d78a5e573
--- /dev/null
+++ b/gcc/config/sh/shmedia.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2000, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _SHMEDIA_H
+#define _SHMEDIA_H
+
+#include <ushmedia.h>
+#include <sshmedia.h>
+
+#endif
diff --git a/gcc/config/sh/shmedia.md b/gcc/config/sh/shmedia.md
new file mode 100644
index 000000000..47c1ce694
--- /dev/null
+++ b/gcc/config/sh/shmedia.md
@@ -0,0 +1,94 @@
+;; DFA scheduling description for SH-5 SHmedia instructions.
+;; Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This is just a conversion of the old model using define_function_unit.
+
+;; When executing SHmedia code, the SH-5 is a fairly straightforward
+;; single-issue machine.  It has four pipelines, the branch unit (br),
+;; the integer and multimedia unit (imu), the load/store unit (lsu), and
+;; the floating point unit (fpu).
+
+(define_automaton "sh5inst_pipe, sh5fpu_pipe")
+
+(define_cpu_unit "sh5issue" "sh5inst_pipe")
+
+(define_cpu_unit "sh5fds" "sh5fpu_pipe")
+
+;; Every instruction on SH-5 occupies the issue resource for at least one
+;; cycle.
+(define_insn_reservation "shmedia1" 1
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media"))
+  "sh5issue")
+
+;; Specify the various types of instruction which have latency > 1
+(define_insn_reservation "shmedia2" 2
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "mcmp_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia3" 3
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media"))
+  "sh5issue")
+;; but see sh_adjust_cost for mac_media exception.
+
+(define_insn_reservation "shmedia4" 4
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "fload_media,fmove_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia_d2mpy" 4
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "d2mpy_media"))
+  "sh5issue*2")
+
+(define_insn_reservation "shmedia5" 5
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "pt_media,ptabs_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia6" 6
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia_invalidate" 7
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "invalidate_line_media"))
+  "sh5issue*7")
+
+(define_insn_reservation "shmedia_dfmul" 9
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media"))
+  "sh5issue*4")
+
+(define_insn_reservation "shmedia_atrans" 10
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media"))
+  "sh5issue*5")
+
+;; Floating-point divide and square-root occupy an additional resource,
+;; which is not internally pipelined.  However, other instructions
+;; can continue to issue.
+(define_insn_reservation "shmedia_fdiv" 19
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media"))
+  "sh5issue+sh5fds,sh5fds*18")
+
+(define_insn_reservation "shmedia_dfdiv" 35
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media"))
+  "sh5issue+sh5fds,sh5fds*34")
diff --git a/gcc/config/sh/sshmedia.h b/gcc/config/sh/sshmedia.h
new file mode 100644
index 000000000..f8245042a
--- /dev/null
+++ b/gcc/config/sh/sshmedia.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2000, 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* sshmedia.h: Intrinsics corresponding to SHmedia instructions that
+   may only be executed in privileged mode.  */
+
+#ifndef _SSHMEDIA_H
+#define _SSHMEDIA_H
+
+#if __SHMEDIA__
+__inline__ static unsigned long long sh_media_GETCON (unsigned int k)
+  __attribute__((always_inline));
+
+__inline__ static
+unsigned long long
+sh_media_GETCON (unsigned int k)
+{
+  unsigned long long res;
+  __asm__ __volatile__ ("getcon	cr%1, %0" : "=r" (res) : "n" (k));
+  return res;
+}
+
+__inline__ static void sh_media_PUTCON (unsigned long long mm, unsigned int k)
+  __attribute__((always_inline));
+
+__inline__ static
+void
+sh_media_PUTCON (unsigned long long mm, unsigned int k)
+{
+  __asm__ __volatile__ ("putcon	%0, cr%1" : : "r" (mm), "n" (k));
+}
+
+__inline__ static
+unsigned long long
+sh_media_GETCFG (unsigned long long mm, int s)
+{
+  unsigned long long res;
+  __asm__ __volatile__ ("getcfg	%1, %2, %0" : "=r" (res) : "r" (mm), "n" (s));
+  return res;
+}
+
+__inline__ static
+void
+sh_media_PUTCFG (unsigned long long mm, int s, unsigned long long mw)
+{
+  __asm__ __volatile__ ("putcfg	%0, %1, %2" : : "r" (mm), "n" (s), "r" (mw));
+}
+
+__inline__ static
+void
+sh_media_SLEEP (void)
+{
+  __asm__ __volatile__ ("sleep");
+}
+#endif
+
+#endif
diff --git a/gcc/config/sh/superh.h b/gcc/config/sh/superh.h
new file mode 100644
index 000000000..88920739e
--- /dev/null
+++ b/gcc/config/sh/superh.h
@@ -0,0 +1,107 @@
+/* Definitions of target machine for gcc for Super-H using sh-superh-elf.
+   Copyright (C) 2001, 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This header file is used when the vendor name is set to 'superh'.
+   config.gcc already configured the compiler for SH4 only and switched
+   the default endianess to little (although big endian is still available).
+   This file configures the spec file to the default board configuration
+   but in such a way that it can be overridden by a boardspecs file
+   (using the -specs= option). This file is expected to disable the
+   defaults and provide options --defsym _start and --defsym _stack
+   which are required by the SuperH configuration of GNU ld.
+
+   This file is intended to override sh.h.  */
+
+
+#ifndef _SUPERH_H
+#define _SUPERH_H
+#endif
+
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (SuperH SH special %s)", __DATE__);
+
+/* Override the linker spec strings to use the new emulation
+   The specstrings are concatenated as follows
+   LINK_EMUL_PREFIX.(''|'32'|'64'|LINK_DEFAULT_CPU_EMUL).SUBTARGET_LINK_EMUL_SUFFIX
+*/
+#undef LINK_EMUL_PREFIX
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+
+#define LINK_EMUL_PREFIX "superh"
+#define SUBTARGET_LINK_EMUL_SUFFIX ""
+
+/* Add the SUBTARGET_LINK_SPEC to add the board and runtime support and
+   change the endianness */
+#undef SUBTARGET_LINK_SPEC
+#if  TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml|!mb:-EL}%{mb:-EB}"
+#else
+#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml:-EL}%{mb|!ml:-EB}"
+#endif
+
+
+/* This is used by the link spec if the boardspecs file is not used (for whatever reason).
+   If the boardspecs file overrides this then an alternative can be used. */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+{ "board_link", "--defsym _start=0x1000 --defsym _stack=0x30000" }, \
+{ "asruntime", "" }, \
+{ "cppruntime", "-D__GDB_SIM__" }, \
+{ "cc1runtime", "" }, \
+{ "ldruntime", "" }, \
+{ "libruntime", "-lc -lgloss" }
+
+
+/* Set the SUBTARGET_CPP_SPEC to define __EMBEDDED_CROSS__ which has an effect
+   on newlib and provide the runtime support */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC \
+"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-300*:-D__SH4_300__} %{m4-340:-D__SH4_340__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \
+%(cppruntime)"
+
+/* Override the SUBTARGET_ASM_SPEC to add the runtime support */
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400|m4-340:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)"
+
+/* Override the SUBTARGET_ASM_RELAX_SPEC so it doesn't interfere with the
+   runtime support by adding -isa=sh4 in the wrong place.  */
+#undef SUBTARGET_ASM_RELAX_SPEC
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-300*:%{!m4-340:%{!m4-400:%{!m4-500:-isa=sh4}}}}}}"
+
+/* Create the CC1_SPEC to add the runtime support */
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1runtime)"
+
+#undef CC1PLUS_SPEC
+#define CC1PLUS_SPEC "%(cc1runtime)"
+
+
+/* Override the LIB_SPEC to add the runtime support */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:%(libruntime) -lc}} %{pg:-lprofile -lc}"
+
+/* Override STARTFILE_SPEC to add profiling and MMU support.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{!m4-400*:%{!m4-340*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}}} \
+   %{!shared: %{m4-340*|m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \
+   crti.o%s \
+   %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
diff --git a/gcc/config/sh/superh.opt b/gcc/config/sh/superh.opt
new file mode 100644
index 000000000..b85abddaf
--- /dev/null
+++ b/gcc/config/sh/superh.opt
@@ -0,0 +1,10 @@
+;; The -mboard and -mruntime options need only be accepted here, they are
+;; actually processed by supplementary specs files.
+
+mboard=
+Target RejectNegative Joined
+Board name [and memory region].
+
+mruntime=
+Target RejectNegative Joined
+Runtime name.
diff --git a/gcc/config/sh/symbian-base.c b/gcc/config/sh/symbian-base.c
new file mode 100644
index 000000000..f8e678be3
--- /dev/null
+++ b/gcc/config/sh/symbian-base.c
@@ -0,0 +1,244 @@
+/* Routines for GCC for a Symbian OS targeted SH backend, shared by
+   both the C and C++ compilers.
+   Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by RedHat.
+   Most of this code is stolen from i386/winnt.c.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "output.h"
+#include "flags.h"
+#include "tree.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "sh-symbian.h"
+
+/* Return nonzero if SYMBOL is marked as being dllexport'd.  */
+
+bool
+sh_symbian_is_dllexported_name (const char *symbol)
+{
+  return strncmp (DLL_EXPORT_PREFIX, symbol,
+		  strlen (DLL_EXPORT_PREFIX)) == 0;
+}
+
+/* Return nonzero if SYMBOL is marked as being dllimport'd.  */
+
+static bool
+sh_symbian_is_dllimported_name (const char *symbol)
+{
+  return strncmp (DLL_IMPORT_PREFIX, symbol,
+		  strlen (DLL_IMPORT_PREFIX)) == 0;
+}
+
+/* Return nonzero if DECL is a dllexport'd object.  */
+
+bool
+sh_symbian_is_dllexported (tree decl)
+{
+  tree exp;
+
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  exp = lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl));
+
+  /* Class members get the dllexport status of their class.  */
+  if (exp == NULL)
+    {
+      tree class = sh_symbian_associated_type (decl);
+
+      if (class)
+	exp = lookup_attribute ("dllexport", TYPE_ATTRIBUTES (class));
+    }
+#if SYMBIAN_DEBUG
+  if (exp)
+    {
+      print_node_brief (stderr, "dllexport:", decl, 0);
+      fprintf (stderr, "\n");
+    }
+  else
+#if SYMBIAN_DEBUG < 2
+    if (TREE_CODE (decl) != FUNCTION_DECL)
+#endif
+    {
+      print_node_brief (stderr, "no dllexport:", decl, 0);
+      fprintf (stderr, "\n");
+    }
+#endif
+  return exp ? true : false;
+}
+
+/* Mark a DECL as being dllimport'd.  */
+
+static void
+sh_symbian_mark_dllimport (tree decl)
+{
+  const char *oldname;
+  char *newname;
+  tree idp;
+  rtx rtlname;
+  rtx newrtl;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  if (MEM_P (rtlname))
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+
+  if (sh_symbian_is_dllexported_name (oldname))
+    {
+      error ("%qE declared as both exported to and imported from a DLL",
+             DECL_NAME (decl));
+    }
+  else if (sh_symbian_is_dllimported_name (oldname))
+    {
+      /* Already done, but do a sanity check to prevent assembler errors.  */
+      if (!DECL_EXTERNAL (decl) || !TREE_PUBLIC (decl))
+	error ("failure in redeclaration of %q+D: dllimport%'d symbol lacks external linkage",
+	       decl);
+    }
+  else
+    {
+      newname = (char *) alloca (strlen (DLL_IMPORT_PREFIX) + strlen (oldname) + 1);
+      sprintf (newname, "%s%s", DLL_IMPORT_PREFIX, oldname);
+
+      /* We pass newname through get_identifier to ensure it has a unique
+	 address.  RTL processing can sometimes peek inside the symbol ref
+	 and compare the string's addresses to see if two symbols are
+	 identical.  */
+      idp = get_identifier (newname);
+      newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+      XEXP (DECL_RTL (decl), 0) = newrtl;
+    }
+}
+
+/* Mark a DECL as being dllexport'd.
+   Note that we override the previous setting (e.g.: dllimport).  */
+
+static void
+sh_symbian_mark_dllexport (tree decl)
+{
+  const char *oldname;
+  char *newname;
+  rtx rtlname;
+  tree idp;
+
+  rtlname = XEXP (DECL_RTL (decl), 0);
+  if (MEM_P (rtlname))
+    rtlname = XEXP (rtlname, 0);
+  gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
+  oldname = XSTR (rtlname, 0);
+
+  if (sh_symbian_is_dllimported_name (oldname))
+    {
+     /* Remove DLL_IMPORT_PREFIX.
+	Note - we do not issue a warning here.  In Symbian's environment it
+	is legitimate for a prototype to be marked as dllimport and the
+	corresponding definition to be marked as dllexport.  The prototypes
+	are in headers used everywhere and the definition is in a translation
+	unit which has included the header in order to ensure argument
+	correctness.  */
+      oldname += strlen (DLL_IMPORT_PREFIX);
+      DECL_DLLIMPORT_P (decl) = 0;
+    }
+  else if (sh_symbian_is_dllexported_name (oldname))
+    return; /* Already done.  */
+
+  newname = (char *) alloca (strlen (DLL_EXPORT_PREFIX) + strlen (oldname) + 1);
+  sprintf (newname, "%s%s", DLL_EXPORT_PREFIX, oldname);
+
+  /* We pass newname through get_identifier to ensure it has a unique
+     address.  RTL processing can sometimes peek inside the symbol ref
+     and compare the string's addresses to see if two symbols are
+     identical.  */
+  idp = get_identifier (newname);
+
+  XEXP (DECL_RTL (decl), 0) =
+    gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+}
+
+void
+sh_symbian_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  /* Mark the decl so we can tell from the rtl whether
+     the object is dllexport'd or dllimport'd.  */
+  if (sh_symbian_is_dllexported (decl))
+    sh_symbian_mark_dllexport (decl);
+  else if (sh_symbian_is_dllimported (decl))
+    sh_symbian_mark_dllimport (decl);
+  /* It might be that DECL has already been marked as dllimport, but a
+     subsequent definition nullified that.  The attribute is gone but
+     DECL_RTL still has (DLL_IMPORT_PREFIX) prefixed. We need to remove
+     that. Ditto for the DECL_DLLIMPORT_P flag.  */
+  else if (  (TREE_CODE (decl) == FUNCTION_DECL
+	   || TREE_CODE (decl) == VAR_DECL)
+	   && DECL_RTL (decl) != NULL_RTX
+	   && MEM_P (DECL_RTL (decl))
+	   && MEM_P (XEXP (DECL_RTL (decl), 0))
+	   && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF
+	   && sh_symbian_is_dllimported_name (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0)))
+    {
+      const char * oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0);
+      /* Remove DLL_IMPORT_PREFIX.  */
+      tree idp = get_identifier (oldname + strlen (DLL_IMPORT_PREFIX));
+      rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
+
+      warning (0, "%s %q+D %s after being referenced with dllimport linkage",
+	       TREE_CODE (decl) == VAR_DECL ? "variable" : "function",
+	       decl, (DECL_INITIAL (decl) || !DECL_EXTERNAL (decl))
+	       ? "defined locally" : "redeclared without dllimport attribute");
+
+      XEXP (DECL_RTL (decl), 0) = newrtl;
+
+      DECL_DLLIMPORT_P (decl) = 0;
+    }
+}
+
+/* Return the length of a function name prefix
+    that starts with the character 'c'.  */
+
+static int
+sh_symbian_get_strip_length (int c)
+{
+  /* XXX Assumes strlen (DLL_EXPORT_PREFIX) == strlen (DLL_IMPORT_PREFIX).  */
+  return (c == SH_SYMBIAN_FLAG_CHAR[0]) ? strlen (DLL_EXPORT_PREFIX) : 0;
+}
+
+/* Return a pointer to a function's name with any
+   and all prefix encodings stripped from it.  */
+
+const char *
+sh_symbian_strip_name_encoding (const char *name)
+{
+  int skip;
+
+  while ((skip = sh_symbian_get_strip_length (*name)))
+    name += skip;
+
+  return name;
+}
+
diff --git a/gcc/config/sh/symbian-c.c b/gcc/config/sh/symbian-c.c
new file mode 100644
index 000000000..c93922a39
--- /dev/null
+++ b/gcc/config/sh/symbian-c.c
@@ -0,0 +1,181 @@
+/* Routines for C compiler part of GCC for a Symbian OS targeted SH backend.
+   Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by RedHat.
+   Most of this code is stolen from i386/winnt.c.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "output.h"
+#include "flags.h"
+#include "tree.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "sh-symbian.h"
+
+
+/* Return the type that we should use to determine if DECL is
+   imported or exported.  */
+
+tree
+sh_symbian_associated_type (tree decl)
+{
+  tree t = NULL_TREE;
+
+  /* We can just take the DECL_CONTEXT as normal.  */
+  if (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl)))
+    t = DECL_CONTEXT (decl);
+
+  return t;
+}
+
+/* Return nonzero if DECL is a dllimport'd object.  */
+
+bool
+sh_symbian_is_dllimported (tree decl)
+{
+  tree imp;
+
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  imp = lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl));
+  if (imp)
+    return true;
+
+  /* Class members get the dllimport status of their class.  */
+  imp = sh_symbian_associated_type (decl);
+  if (! imp)
+    return false;
+
+  imp = lookup_attribute ("dllimport", TYPE_ATTRIBUTES (imp));
+  if (!imp)
+    return false;
+
+  /* Don't mark defined functions as dllimport.  If the definition itself
+     was marked with dllimport, then sh_symbian_handle_dll_attribute reports
+     an error. This handles the case when the definition overrides an
+     earlier declaration.  */
+  if (TREE_CODE (decl) ==  FUNCTION_DECL
+      && DECL_INITIAL (decl)
+      && ! DECL_DECLARED_INLINE_P (decl))
+    {
+      warning (OPT_Wattributes, "function %q+D is defined after prior "
+	       "declaration as dllimport: attribute ignored",
+	       decl);
+      return false;
+    }
+
+  /*  Don't allow definitions of static data members in dllimport
+      class.  Just ignore the attribute for vtable data.  */
+  else if (TREE_CODE (decl) == VAR_DECL
+	   && TREE_STATIC (decl)
+	   && TREE_PUBLIC (decl)
+	   && !DECL_EXTERNAL (decl))
+    {
+      error ("definition of static data member %q+D of dllimport%'d class",
+	     decl);
+      return false;
+    }
+
+  return true;
+}
+
+/* Handle a "dllimport" or "dllexport" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+tree
+sh_symbian_handle_dll_attribute (tree *pnode, tree name, tree args,
+				 int flags, bool *no_add_attrs)
+{
+  tree node = *pnode;
+  const char *attr = IDENTIFIER_POINTER (name);
+
+  /* These attributes may apply to structure and union types being
+     created, but otherwise should pass to the declaration involved.  */
+  if (!DECL_P (node))
+    {
+      if (flags & ((int) ATTR_FLAG_DECL_NEXT
+		   | (int) ATTR_FLAG_FUNCTION_NEXT
+		   | (int) ATTR_FLAG_ARRAY_NEXT))
+	{
+	  warning (OPT_Wattributes, "%qs attribute ignored", attr);
+	  *no_add_attrs = true;
+	  return tree_cons (name, args, NULL_TREE);
+	}
+
+      if (TREE_CODE (node) != RECORD_TYPE && TREE_CODE (node) != UNION_TYPE)
+	{
+	  warning (OPT_Wattributes, "%qs attribute ignored", attr);
+	  *no_add_attrs = true;
+	}
+
+      return NULL_TREE;
+    }
+
+  /* Report error on dllimport ambiguities
+     seen now before they cause any damage.  */
+  else if (is_attribute_p ("dllimport", name))
+    {
+      if (TREE_CODE (node) == VAR_DECL)
+	{
+	  if (DECL_INITIAL (node))
+	    {
+	      error ("variable %q+D definition is marked dllimport",
+		     node);
+	      *no_add_attrs = true;
+	    }
+
+	  /* `extern' needn't be specified with dllimport.
+	     Specify `extern' now and hope for the best.  Sigh.  */
+	  DECL_EXTERNAL (node) = 1;
+	  /* Also, implicitly give dllimport'd variables declared within
+	     a function global scope, unless declared static.  */
+	  if (current_function_decl != NULL_TREE && ! TREE_STATIC (node))
+  	    TREE_PUBLIC (node) = 1;
+	}
+    }
+
+  /*  Report error if symbol is not accessible at global scope.  */
+  if (!TREE_PUBLIC (node)
+      && (   TREE_CODE (node) == VAR_DECL
+	  || TREE_CODE (node) == FUNCTION_DECL))
+    {
+      error ("external linkage required for symbol %q+D because of %qE attribute",
+	     node, name);
+      *no_add_attrs = true;
+    }
+
+#if SYMBIAN_DEBUG
+  print_node_brief (stderr, "mark node", node, 0);
+  fprintf (stderr, " as %s\n", attr);
+#endif
+
+  return NULL_TREE;
+}
+
+int
+sh_symbian_import_export_class (tree ctype ATTRIBUTE_UNUSED, int import_export)
+{
+  return import_export;
+}
diff --git a/gcc/config/sh/symbian-cxx.c b/gcc/config/sh/symbian-cxx.c
new file mode 100644
index 000000000..c0f8b71f6
--- /dev/null
+++ b/gcc/config/sh/symbian-cxx.c
@@ -0,0 +1,662 @@
+/* Routines for C++ support for GCC for a Symbian OS targeted SH backend.
+   Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by RedHat.
+   Most of this code is stolen from i386/winnt.c.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "output.h"
+#include "flags.h"
+#include "tree.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "cp/cp-tree.h"	/* We need access to the OVL_... macros.  */
+#include "diagnostic-core.h"
+#include "sh-symbian.h"
+
+
+/* Return the type that we should use to determine if DECL is
+   imported or exported.  */
+
+tree
+sh_symbian_associated_type (tree decl)
+{
+  tree t = NULL_TREE;
+
+  if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE)
+  /* Methods now inherit their dllimport/dllexport attributes correctly
+     so there is no need to check their class.  In fact it is wrong to
+     check their class since a method can remain unexported from an
+     exported class.  */
+    return t;
+
+  /* Otherwise we can just take the DECL_CONTEXT as normal.  */
+  if (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl)))
+    t = DECL_CONTEXT (decl);
+
+  return t;
+}
+
+
+/* Return nonzero if DECL is a dllimport'd object.  */
+
+bool
+sh_symbian_is_dllimported (tree decl)
+{
+  tree imp;
+
+  if (   TREE_CODE (decl) != VAR_DECL
+      && TREE_CODE (decl) != FUNCTION_DECL)
+    return false;
+
+  imp = lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl));
+  if (imp)
+    return true;
+
+  /* Class members get the dllimport status of their class.  */
+  imp = sh_symbian_associated_type (decl);
+  if (! imp)
+    return false;
+
+  imp = lookup_attribute ("dllimport", TYPE_ATTRIBUTES (imp));
+  if (!imp)
+    return false;
+
+  /* Don't mark defined functions as dllimport.  If the definition itself
+     was marked with dllimport, then sh_symbian_handle_dll_attribute reports
+     an error. This handles the case when the definition overrides an
+     earlier declaration.  */
+  if (TREE_CODE (decl) ==  FUNCTION_DECL
+      && DECL_INITIAL (decl)
+      && ! DECL_DECLARED_INLINE_P (decl))
+    {
+      /* Don't warn about artificial methods.  */
+      if (!DECL_ARTIFICIAL (decl))
+	warning (OPT_Wattributes, "function %q+D is defined after prior "
+		 "declaration as dllimport: attribute ignored",
+		 decl);
+      return false;
+    }
+
+  /* We ignore the dllimport attribute for inline member functions.
+     This differs from MSVC behavior which treats it like GNUC
+     'extern inline' extension.   */
+  else if (TREE_CODE (decl) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (decl))
+    {
+      if (extra_warnings)
+	warning (OPT_Wattributes, "inline function %q+D is declared as "
+		 "dllimport: attribute ignored",
+		 decl);
+      return false;
+    }
+
+  /*  Don't allow definitions of static data members in dllimport
+      class.  Just ignore the attribute for vtable data.  */
+  else if (TREE_CODE (decl) == VAR_DECL
+	   && TREE_STATIC (decl)
+	   && TREE_PUBLIC (decl)
+	   && !DECL_EXTERNAL (decl))
+    {
+      if (!DECL_VIRTUAL_P (decl))
+	error ("definition of static data member %q+D of dllimport%'d class",
+	       decl);
+      return false;
+    }
+
+  /* Since we can't treat a pointer to a dllimport'd symbol as a
+     constant address, we turn off the attribute on C++ virtual
+     methods to allow creation of vtables using thunks.  Don't mark
+     artificial methods either (in sh_symbian_associated_type, only
+     COMDAT artificial method get import status from class context).  */
+  else if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE
+	   && (DECL_VIRTUAL_P (decl) || DECL_ARTIFICIAL (decl)))
+    return false;
+
+  return true;
+}
+
+
+/* This code implements a specification for exporting the vtable and rtti of
+   classes that have members with the dllexport or dllexport attributes.
+   This specification is defined here:
+
+     http://www.armdevzone.com/EABI/exported_class.txt
+
+   Basically it says that a class's vtable and rtti should be exported if
+   the following rules apply:
+
+   - If it has any non-inline non-pure virtual functions,
+     at least one of these need to be declared dllimport
+     OR any of the constructors is declared dllimport.
+
+   AND
+
+   - The class has an inline constructor/destructor and
+     a key-function (placement of vtable uniquely defined) that
+     is defined in this translation unit.
+
+   The specification also says that for classes which will have their
+   vtables and rtti exported that their base class(es) might also need a
+   similar exporting if:
+
+   - Every base class needs to have its vtable & rtti exported
+     as well, if the following the conditions hold true:
+     + The base class has a non-inline declared non-pure virtual function
+     + The base class is polymorphic (has or inherits any virtual functions)
+       or the base class has any virtual base classes.  */
+
+/* Decide if a base class of a class should
+   also have its vtable and rtti exported.  */
+
+static void
+sh_symbian_possibly_export_base_class (tree base_class)
+{
+  VEC(tree,gc) *method_vec;
+  int len;
+
+  if (! (TYPE_CONTAINS_VPTR_P (base_class)))
+    return;
+
+  method_vec = CLASSTYPE_METHOD_VEC (base_class);
+  len = method_vec ? VEC_length (tree, method_vec) : 0;
+
+  for (;len --;)
+    {
+      tree member = VEC_index (tree, method_vec, len);
+
+      if (! member)
+	continue;
+
+      for (member = OVL_CURRENT (member); member; member = OVL_NEXT (member))
+	{
+	  if (TREE_CODE (member) != FUNCTION_DECL)
+	    continue;
+
+	  if (DECL_CONSTRUCTOR_P (member) || DECL_DESTRUCTOR_P (member))
+	    continue;
+
+	  if (! DECL_VIRTUAL_P (member))
+	    continue;
+
+	  if (DECL_PURE_VIRTUAL_P (member))
+	    continue;
+
+	  if (DECL_DECLARED_INLINE_P (member))
+	    continue;
+
+	  break;
+	}
+
+      if (member)
+	break;
+    }
+
+  if (len < 0)
+    return;
+
+  /* FIXME: According to the spec this base class should be exported, but
+     a) how do we do this ? and
+     b) it does not appear to be necessary for compliance with the Symbian
+        OS which so far is the only consumer of this code.  */
+#if SYMBIAN_DEBUG
+  print_node_brief (stderr, "", base_class, 0);
+  fprintf (stderr, " EXPORTed [base class of exported class]\n");
+#endif
+}
+
+/* Add the named attribute to the given node.  Copes with both DECLs and
+   TYPEs.  Will only add the attribute if it is not already present.  */
+
+static void
+sh_symbian_add_attribute (tree node, const char *attr_name)
+{
+  tree attrs;
+  tree attr;
+
+  attrs = DECL_P (node) ? DECL_ATTRIBUTES (node) : TYPE_ATTRIBUTES (node);
+
+  if (lookup_attribute (attr_name, attrs) != NULL_TREE)
+    return;
+
+  attr = get_identifier (attr_name);
+
+  if (DECL_P (node))
+    DECL_ATTRIBUTES (node) = tree_cons (attr, NULL_TREE, attrs);
+  else
+    TYPE_ATTRIBUTES (node) = tree_cons (attr, NULL_TREE, attrs);
+
+#if SYMBIAN_DEBUG
+  fprintf (stderr, "propagate %s attribute", attr_name);
+  print_node_brief (stderr, " to", node, 0);
+  fprintf (stderr, "\n");
+#endif
+}
+
+/* Add the named attribute to a class and its vtable and rtti.  */
+
+static void
+sh_symbian_add_attribute_to_class_vtable_and_rtti (tree ctype, const char *attr_name)
+{
+  sh_symbian_add_attribute (ctype, attr_name);
+
+  /* If the vtable exists then they need annotating as well.  */
+  if (CLASSTYPE_VTABLES (ctype))
+    /* XXX - Do we need to annotate any vtables other than the primary ?  */
+    sh_symbian_add_attribute (CLASSTYPE_VTABLES (ctype), attr_name);
+
+  /* If the rtti exists then it needs annotating as well.  */
+  if (TYPE_MAIN_VARIANT (ctype)
+      && CLASSTYPE_TYPEINFO_VAR (TYPE_MAIN_VARIANT (ctype)))
+    sh_symbian_add_attribute (CLASSTYPE_TYPEINFO_VAR (TYPE_MAIN_VARIANT (ctype)),
+			      attr_name);
+}
+
+/* Decide if a class needs to have an attribute because
+   one of its member functions has the attribute.  */
+
+static bool
+sh_symbian_class_needs_attribute (tree ctype, const char *attribute_name)
+{
+  VEC(tree,gc) *method_vec;
+
+  method_vec = CLASSTYPE_METHOD_VEC (ctype);
+
+  /* If the key function has the attribute then the class needs it too.  */
+  if (TYPE_POLYMORPHIC_P (ctype)
+      && method_vec
+      && tree_contains_struct [TREE_CODE (ctype), TS_DECL_COMMON] == 1
+      && lookup_attribute (attribute_name,
+			   DECL_ATTRIBUTES (VEC_index (tree, method_vec, 0))))
+    return true;
+
+  /* Check the class's member functions.  */
+  if (TREE_CODE (ctype) == RECORD_TYPE)
+    {
+      unsigned int len;
+
+      len = method_vec ? VEC_length (tree, method_vec) : 0;
+
+      for (;len --;)
+	{
+	  tree member = VEC_index (tree, method_vec, len);
+
+	  if (! member)
+	    continue;
+
+	  for (member = OVL_CURRENT (member);
+	       member;
+	       member = OVL_NEXT (member))
+	    {
+	      if (TREE_CODE (member) != FUNCTION_DECL)
+		continue;
+
+	      if (DECL_PURE_VIRTUAL_P (member))
+		continue;
+
+	      if (! DECL_VIRTUAL_P (member))
+		continue;
+
+	      if (lookup_attribute (attribute_name, DECL_ATTRIBUTES (member)))
+		{
+#if SYMBIAN_DEBUG
+		  print_node_brief (stderr, "", ctype, 0);
+		  fprintf (stderr, " inherits %s because", attribute_name);
+		  print_node_brief (stderr, "", member, 0);
+		  fprintf (stderr, " has it.\n");
+#endif
+		  return true;
+		}
+	    }
+	}
+    }
+
+#if SYMBIAN_DEBUG
+  print_node_brief (stderr, "", ctype, 0);
+  fprintf (stderr, " does not inherit %s\n", attribute_name);
+#endif
+  return false;
+}
+
+/* Decide if a class needs its vtable and rtti exporting.  */
+
+static bool
+symbian_export_vtable_and_rtti_p (tree ctype)
+{
+  bool inline_ctor_dtor;
+  bool dllimport_ctor_dtor;
+  bool dllimport_member;
+  tree binfo, base_binfo;
+  VEC(tree,gc) *method_vec;
+  tree key;
+  int i;
+  int len;
+
+  /* Make sure that we are examining a class...  */
+  if (TREE_CODE (ctype) != RECORD_TYPE)
+    {
+#if SYMBIAN_DEBUG
+      print_node_brief (stderr, "", ctype, 0);
+      fprintf (stderr, " does NOT need to be EXPORTed [not a class]\n");
+#endif
+      return false;
+    }
+
+  /* If the class does not have a key function it
+     does not need to have its vtable exported.  */
+  if ((key = CLASSTYPE_KEY_METHOD (ctype)) == NULL_TREE)
+    {
+#if SYMBIAN_DEBUG
+      print_node_brief (stderr, "", ctype, 0);
+      fprintf (stderr, " does NOT need to be EXPORTed [no key function]\n");
+#endif
+      return false;
+    }
+
+  /* If the key fn has not been defined
+     then the class should not be exported.  */
+  if (! TREE_ASM_WRITTEN (key))
+    {
+#if SYMBIAN_DEBUG
+      print_node_brief (stderr, "", ctype, 0);
+      fprintf (stderr, " does NOT need to be EXPORTed [key function not defined]\n");
+#endif
+      return false;
+    }
+
+  /* Check the class's member functions.  */
+  inline_ctor_dtor = false;
+  dllimport_ctor_dtor = false;
+  dllimport_member = false;
+
+  method_vec = CLASSTYPE_METHOD_VEC (ctype);
+  len = method_vec ? VEC_length (tree, method_vec) : 0;
+
+  for (;len --;)
+    {
+      tree member = VEC_index (tree, method_vec, len);
+
+      if (! member)
+	continue;
+
+      for (member = OVL_CURRENT (member); member; member = OVL_NEXT (member))
+	{
+	  if (TREE_CODE (member) != FUNCTION_DECL)
+	    continue;
+
+	  if (DECL_CONSTRUCTOR_P (member) || DECL_DESTRUCTOR_P (member))
+	    {
+	      if (DECL_DECLARED_INLINE_P (member)
+		  /* Ignore C++ backend created inline ctors/dtors.  */
+		  && (   DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (member)
+		      || DECL_MAYBE_IN_CHARGE_DESTRUCTOR_P (member)))
+		inline_ctor_dtor = true;
+
+	      if (lookup_attribute ("dllimport", DECL_ATTRIBUTES (member)))
+		dllimport_ctor_dtor = true;
+	    }
+	  else
+	    {
+	      if (DECL_PURE_VIRTUAL_P (member))
+		continue;
+
+	      if (! DECL_VIRTUAL_P (member))
+		continue;
+
+	      if (DECL_DECLARED_INLINE_P (member))
+		continue;
+
+	      if (lookup_attribute ("dllimport", DECL_ATTRIBUTES (member)))
+		dllimport_member = true;
+	    }
+	}
+    }
+
+  if (! dllimport_member && ! dllimport_ctor_dtor)
+    {
+#if SYMBIAN_DEBUG
+      print_node_brief (stderr, "", ctype, 0);
+      fprintf (stderr,
+	       " does NOT need to be EXPORTed [no non-pure virtuals or ctors/dtors with dllimport]\n");
+#endif
+      return false;
+    }
+
+  if (! inline_ctor_dtor)
+    {
+#if SYMBIAN_DEBUG
+      print_node_brief (stderr, "", ctype, 0);
+      fprintf (stderr,
+	       " does NOT need to be EXPORTed [no inline ctor/dtor]\n");
+#endif
+      return false;
+    }
+
+#if SYMBIAN_DEBUG
+  print_node_brief (stderr, "", ctype, 0);
+  fprintf (stderr, " DOES need to be EXPORTed\n");
+#endif
+
+  /* Now we must check and possibly export the base classes.  */
+  for (i = 0, binfo = TYPE_BINFO (ctype);
+       BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
+    sh_symbian_possibly_export_base_class (BINFO_TYPE (base_binfo));
+
+  return true;
+}
+
+/* Possibly override the decision to export class TYPE.  Upon entry
+   IMPORT_EXPORT will contain 1 if the class is going to be exported,
+   -1 if it is going to be imported and 0 otherwise.  This function
+   should return the modified value and perform any other actions
+   necessary to support the backend's targeted operating system.  */
+
+int
+sh_symbian_import_export_class (tree ctype, int import_export)
+{
+  const char *attr_name = NULL;
+
+  /* If we are exporting the class but it does not have the dllexport
+     attribute then we may need to add it.  Similarly imported classes
+     may need the dllimport attribute.  */
+  switch (import_export)
+    {
+    case  1: attr_name = "dllexport"; break;
+    case -1: attr_name = "dllimport"; break;
+    default: break;
+    }
+
+  if (attr_name
+      && ! lookup_attribute (attr_name, TYPE_ATTRIBUTES (ctype)))
+    {
+      if (sh_symbian_class_needs_attribute (ctype, attr_name))
+	sh_symbian_add_attribute_to_class_vtable_and_rtti (ctype, attr_name);
+
+      /* Classes can be forced to export their
+	 vtable and rtti under certain conditions.  */
+      if (symbian_export_vtable_and_rtti_p (ctype))
+	{
+	  sh_symbian_add_attribute_to_class_vtable_and_rtti (ctype, "dllexport");
+
+	  /* Make sure that the class and its vtable are exported.  */
+	  import_export = 1;
+
+	  if (CLASSTYPE_VTABLES (ctype))
+	    DECL_EXTERNAL (CLASSTYPE_VTABLES (ctype)) = 1;
+
+	  /* Check to make sure that if the class has a key method that
+	     it is now on the list of keyed classes.  That way its vtable
+	     will be emitted.  */
+	  if (CLASSTYPE_KEY_METHOD (ctype))
+	    {
+	      tree class;
+
+	      for (class = keyed_classes; class; class = TREE_CHAIN (class))
+		if (class == ctype)
+		  break;
+
+	      if (class == NULL_TREE)
+		{
+#if SYMBIAN_DEBUG
+		  print_node_brief (stderr, "Add node", ctype, 0);
+		  fprintf (stderr, " to the keyed classes list\n");
+#endif
+		  keyed_classes = tree_cons (NULL_TREE, ctype, keyed_classes);
+		}
+	    }
+
+	  /* Make sure that the typeinfo will be emitted as well.  */
+	  if (CLASS_TYPE_P (ctype))
+	    TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (CLASSTYPE_TYPEINFO_VAR (TYPE_MAIN_VARIANT (ctype)))) = 1;
+	}
+    }
+
+  return import_export;
+}
+
+/* Handle a "dllimport" or "dllexport" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+tree
+sh_symbian_handle_dll_attribute (tree *pnode, tree name, tree args,
+				 int flags, bool *no_add_attrs)
+{
+  tree thunk;
+  tree node = *pnode;
+  const char *attr = IDENTIFIER_POINTER (name);
+
+  /* These attributes may apply to structure and union types being
+     created, but otherwise should pass to the declaration involved.  */
+  if (!DECL_P (node))
+    {
+      if (flags & ((int) ATTR_FLAG_DECL_NEXT
+		   | (int) ATTR_FLAG_FUNCTION_NEXT
+		   | (int) ATTR_FLAG_ARRAY_NEXT))
+	{
+	  warning (OPT_Wattributes, "%qs attribute ignored", attr);
+	  *no_add_attrs = true;
+	  return tree_cons (name, args, NULL_TREE);
+	}
+
+      if (TREE_CODE (node) != RECORD_TYPE && TREE_CODE (node) != UNION_TYPE)
+	{
+	  warning (OPT_Wattributes, "%qs attribute ignored", attr);
+	  *no_add_attrs = true;
+	}
+
+      return NULL_TREE;
+    }
+
+  /* Report error on dllimport ambiguities
+     seen now before they cause any damage.  */
+  else if (is_attribute_p ("dllimport", name))
+    {
+      if (TREE_CODE (node) == VAR_DECL)
+	{
+	  if (DECL_INITIAL (node))
+	    {
+	      error ("variable %q+D definition is marked dllimport",
+		     node);
+	      *no_add_attrs = true;
+	    }
+
+	  /* `extern' needn't be specified with dllimport.
+	     Specify `extern' now and hope for the best.  Sigh.  */
+	  DECL_EXTERNAL (node) = 1;
+	  /* Also, implicitly give dllimport'd variables declared within
+	     a function global scope, unless declared static.  */
+	  if (current_function_decl != NULL_TREE && ! TREE_STATIC (node))
+  	    TREE_PUBLIC (node) = 1;
+	}
+    }
+
+  /* If the node is an overloaded constructor or destructor, then we must
+     make sure that the attribute is propagated along the overload chain,
+     as it is these overloaded functions which will be emitted, rather than
+     the user declared constructor itself.  */
+  if (TREE_CODE (TREE_TYPE (node)) == METHOD_TYPE
+      && (DECL_CONSTRUCTOR_P (node) || DECL_DESTRUCTOR_P (node)))
+    {
+      tree overload;
+
+      for (overload = OVL_CHAIN (node); overload; overload = OVL_CHAIN (overload))
+	{
+	  tree node_args;
+	  tree func_args;
+	  tree function = OVL_CURRENT (overload);
+
+	  if (! function
+	      || ! DECL_P (function)
+	      || (DECL_CONSTRUCTOR_P (node) && ! DECL_CONSTRUCTOR_P (function))
+	      || (DECL_DESTRUCTOR_P (node)  && ! DECL_DESTRUCTOR_P (function)))
+	    continue;
+
+	  /* The arguments must match as well.  */
+	  for (node_args = DECL_ARGUMENTS (node), func_args = DECL_ARGUMENTS (function);
+	       node_args && func_args;
+	       node_args = TREE_CHAIN (node_args), func_args = TREE_CHAIN (func_args))
+	    if (TREE_TYPE (node_args) != TREE_TYPE (func_args))
+	      break;
+
+	  if (node_args || func_args)
+	    {
+	      /* We can ignore an extraneous __in_chrg arguments in the node.
+		 GCC generated destructors, for example, will have this.  */
+	      if ((node_args == NULL_TREE
+		   || func_args != NULL_TREE)
+		  && strcmp (IDENTIFIER_POINTER (DECL_NAME (node)), "__in_chrg") != 0)
+		continue;
+	    }
+
+	  sh_symbian_add_attribute (function, attr);
+
+	  /* Propagate the attribute to any function thunks as well.  */
+	  for (thunk = DECL_THUNKS (function); thunk; thunk = DECL_CHAIN (thunk))
+	    if (TREE_CODE (thunk) == FUNCTION_DECL)
+	      sh_symbian_add_attribute (thunk, attr);
+	}
+    }
+
+  if (TREE_CODE (node) == FUNCTION_DECL && DECL_VIRTUAL_P (node))
+    {
+      /* Propagate the attribute to any thunks of this function.  */
+      for (thunk = DECL_THUNKS (node); thunk; thunk = DECL_CHAIN (thunk))
+	if (TREE_CODE (thunk) == FUNCTION_DECL)
+	  sh_symbian_add_attribute (thunk, attr);
+    }
+
+  /*  Report error if symbol is not accessible at global scope.  */
+  if (!TREE_PUBLIC (node)
+      && (   TREE_CODE (node) == VAR_DECL
+	  || TREE_CODE (node) == FUNCTION_DECL))
+    {
+      error ("external linkage required for symbol %q+D because of %qE attribute",
+	     node, name);
+      *no_add_attrs = true;
+    }
+
+#if SYMBIAN_DEBUG
+  print_node_brief (stderr, "mark node", node, 0);
+  fprintf (stderr, " as %s\n", attr);
+#endif
+
+  return NULL_TREE;
+}
diff --git a/gcc/config/sh/symbian-post.h b/gcc/config/sh/symbian-post.h
new file mode 100644
index 000000000..a4497b969
--- /dev/null
+++ b/gcc/config/sh/symbian-post.h
@@ -0,0 +1,88 @@
+/* Definitions for the Symbian OS running on an SH part.
+   This file is included after all the other target specific headers.
+
+   Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION \
+  fputs (" (Renesas SH for Symbian OS)", stderr);
+
+#undef  LINK_EMUL_PREFIX
+#define LINK_EMUL_PREFIX "shlsymbian"
+
+
+#define SYMBIAN_EXPORT_NAME(NAME,FILE,DECL)			\
+  do								\
+    {								\
+      if ((DECL && sh_symbian_is_dllexported (DECL))		\
+         || sh_symbian_is_dllexported_name (NAME))		\
+        {							\
+          fprintf ((FILE), "\t.pushsection .directive\n");	\
+          fprintf ((FILE), "\t.asciz \"EXPORT %s\\n\"\n",	\
+	           sh_symbian_strip_name_encoding (NAME));	\
+          fprintf ((FILE), "\t.popsection\n");			\
+       }							\
+    }								\
+  while (0)
+
+/* Output a function definition label.  */
+#undef  ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      SYMBIAN_EXPORT_NAME ((NAME), (FILE), (DECL));		\
+      ASM_OUTPUT_TYPE_DIRECTIVE ((FILE), (NAME), "function");	\
+      ASM_DECLARE_RESULT ((FILE), DECL_RESULT (DECL));		\
+      ASM_OUTPUT_LABEL ((FILE), (NAME));			\
+    }								\
+  while (0)
+
+/* Output the label for an initialized variable.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+								\
+      SYMBIAN_EXPORT_NAME ((NAME), (FILE), (DECL));		\
+      ASM_OUTPUT_TYPE_DIRECTIVE ((FILE), (NAME), "object");	\
+								\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL)						\
+          && DECL_SIZE (DECL))					\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE ((FILE), (NAME), size);	\
+	}							\
+								\
+      ASM_OUTPUT_LABEL ((FILE), (NAME));			\
+    }								\
+  while (0)
+
+#undef  ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(FILE, NAME)				\
+  do								\
+    {								\
+      asm_fprintf ((FILE), "%U%s",				\
+		   sh_symbian_strip_name_encoding (NAME));	\
+    }								\
+  while (0)
diff --git a/gcc/config/sh/symbian-pre.h b/gcc/config/sh/symbian-pre.h
new file mode 100644
index 000000000..d2229e071
--- /dev/null
+++ b/gcc/config/sh/symbian-pre.h
@@ -0,0 +1,40 @@
+/* Definitions for the Symbian OS running on an SH part.
+   This file is included before any other target specific headers.
+
+   Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Red Hat.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Enable Symbian specific code.  */
+#define SYMBIAN		1
+
+/* Default to using the Renesas ABI.  */
+#define TARGET_ABI_DEFAULT	MASK_HITACHI
+
+#define SUBTARGET_CPP_SPEC ""
+
+/* Get tree.c to declare merge_dllimport_decl_attributes().  */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+/* The Symbian OS currently does not support exception handling.  */
+#define SUBTARGET_CC1PLUS_SPEC "-fno-exceptions"
+
+/* Create constructor/destructor sections without the writable flag.
+   Symbian puts them into the text segment and munges them later on.  */
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"ax\",@progbits"
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"ax\",@progbits"
diff --git a/gcc/config/sh/t-elf b/gcc/config/sh/t-elf
new file mode 100644
index 000000000..333efb54e
--- /dev/null
+++ b/gcc/config/sh/t-elf
@@ -0,0 +1,10 @@
+EXTRA_MULTILIB_PARTS= crt1.o crti.o crtn.o \
+	crtbegin.o crtend.o crtbeginS.o crtendS.o $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS)
+
+# Compile crtbeginS.o and crtendS.o with pic.
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+# Don't compile libgcc with -fpic for now.  It's unlikely that we'll
+# build shared libraries for embedded SH.
+# Linux / Netbsd will already have set TARGET_LIBGCC2_CFLAGS.
+# TARGET_LIBGCC2_CFLAGS = -fpic
diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux
new file mode 100644
index 000000000..13ff848dd
--- /dev/null
+++ b/gcc/config/sh/t-linux
@@ -0,0 +1,8 @@
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
+LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm
+
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = 
+
+EXTRA_MULTILIB_PARTS= crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
diff --git a/gcc/config/sh/t-linux64 b/gcc/config/sh/t-linux64
new file mode 100644
index 000000000..126b01637
--- /dev/null
+++ b/gcc/config/sh/t-linux64
@@ -0,0 +1 @@
+EXTRA_MULTILIB_PARTS= crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd
new file mode 100644
index 000000000..b2794a006
--- /dev/null
+++ b/gcc/config/sh/t-netbsd
@@ -0,0 +1,31 @@
+# Copyright (C) 2002, 2004, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TARGET_LIBGCC2_CFLAGS = -fpic -mieee
+LIB1ASMFUNCS_CACHE = _ic_invalidate
+
+LIB2FUNCS_EXTRA=
+
+EXTRA_MULTILIB_PARTS=
+
+# NetBSD's C library includes a fast software FP library that
+# has support for setting/setting the rounding mode, exception
+# mask, etc.  Therefore, we don't want to include software FP
+# in libgcc.
+FPBIT =
+DPBIT =
diff --git a/gcc/config/sh/t-netbsd-sh5-64 b/gcc/config/sh/t-netbsd-sh5-64
new file mode 100644
index 000000000..8fc6bd1ea
--- /dev/null
+++ b/gcc/config/sh/t-netbsd-sh5-64
@@ -0,0 +1 @@
+MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES:/media64=)
diff --git a/gcc/config/sh/t-rtems b/gcc/config/sh/t-rtems
new file mode 100644
index 000000000..9fd262cf8
--- /dev/null
+++ b/gcc/config/sh/t-rtems
@@ -0,0 +1,7 @@
+# Custom multilibs for RTEMS
+
+MULTILIB_ENDIAN = ml
+MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) m2/m2e/m4-single-only/m4-single/m4
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu
+MULTILIB_EXCEPTIONS = ml
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
new file mode 100644
index 000000000..a897bfffb
--- /dev/null
+++ b/gcc/config/sh/t-sh
@@ -0,0 +1,166 @@
+# Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+sh-c.o: $(srcdir)/config/sh/sh-c.c \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/sh/sh-c.c
+
+LIB1ASMSRC = sh/lib1funcs.asm
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \
+  _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _div_table _udiv_qrnnd_16 \
+  $(LIB1ASMFUNCS_CACHE)
+LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array
+
+TARGET_LIBGCC2_CFLAGS = -mieee
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+	echo '#endif' 		>> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+	echo '#endif' 		>> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
+OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG))
+
+MULTILIB_OPTIONS= $(OTHER_ENDIAN) $(TM_MULTILIB_CONFIG)
+MULTILIB_DIRNAMES= 
+
+# The separate entries for m2a-nofpu and m2a-single-only with
+# duplicate base libraries are there to make sure we don't ever use an
+# m4* multilib for m2a or vice-versa; they are not compatible.  This
+# is why sh2a and sh2a-single need their own multilibs.
+MULTILIB_MATCHES = $(shell \
+  multilibs="$(MULTILIB_OPTIONS)" ; \
+  for abi in m1,m2,m3,m4-nofpu,m4-100-nofpu,m4-200-nofpu,m4-400,m4-500,m4-340,m4-300-nofpu,m4al,m4a-nofpu \
+             m1,m2,m2a-nofpu \
+             m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4-300-single-only,m4a-single-only \
+             m2e,m2a-single-only \
+             m4-single,m4-100-single,m4-200-single,m4-300-single,m4a-single \
+             m4,m4-100,m4-200,m4-300,m4a \
+             m5-32media,m5-compact,m5-32media \
+             m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \
+    subst= ; \
+    for lib in `echo $$abi|tr , ' '` ; do \
+      if test "`echo $$multilibs|sed s/$$lib//`" != "$$multilibs"; then \
+        subst=$$lib ; \
+      elif test x$$subst != x ; then \
+        echo $$subst=$$lib ; \
+      fi \
+    done \
+  done)
+
+# SH1 only supports big endian.
+MULTILIB_EXCEPTIONS = ml/m1 ml/m2a* $(TM_MULTILIB_EXCEPTIONS_CONFIG)
+
+MULTILIB_OSDIRNAMES = \
+	$(OTHER_ENDIAN)=!$(OTHER_ENDIAN) \
+	m1=!m1 $(OTHER_ENDIAN)/m1=!$(OTHER_ENDIAN)/m1 \
+	m2a=!m2a $(OTHER_ENDIAN)/m2a=!$(OTHER_ENDIAN)/m2a \
+	m2a-nofpu=!m2a-nofpu $(OTHER_ENDIAN)/m2a-nofpu=!$(OTHER_ENDIAN)/m2a-nofpu \
+	m2a-single-only=!m2a-single-only $(OTHER_ENDIAN)/m2a-single-only=!$(OTHER_ENDIAN)/m2a-single-only \
+	m2a-single=!m2a-single $(OTHER_ENDIAN)/m2a-single=!$(OTHER_ENDIAN)/m2a-single \
+	m2e=!m2e $(OTHER_ENDIAN)/m2e=!$(OTHER_ENDIAN)/m2e \
+	m2=!m2 $(OTHER_ENDIAN)/m2=!$(OTHER_ENDIAN)/m2 \
+	m3e=!m3e $(OTHER_ENDIAN)/m3e=!$(OTHER_ENDIAN)/m3e \
+	m3=!m3 $(OTHER_ENDIAN)/m3=!$(OTHER_ENDIAN)/m3 \
+	m4-nofpu=!m4-nofpu $(OTHER_ENDIAN)/m4-nofpu=!$(OTHER_ENDIAN)/m4-nofpu \
+	m4-single-only=!m4-single-only $(OTHER_ENDIAN)/m4-single-only=!$(OTHER_ENDIAN)/m4-single-only \
+	m4-single=!m4-single $(OTHER_ENDIAN)/m4-single=!$(OTHER_ENDIAN)/m4-single \
+	m4=!m4 $(OTHER_ENDIAN)/m4=!$(OTHER_ENDIAN)/m4 \
+	m4a-nofpu=!m4a-nofpu $(OTHER_ENDIAN)/m4a-nofpu=!$(OTHER_ENDIAN)/m4a-nofpu \
+	m4a-single-only=!m4a-single-only $(OTHER_ENDIAN)/m4a-single-only=!$(OTHER_ENDIAN)/m4a-single-only \
+	m4a-single=!m4a-single $(OTHER_ENDIAN)/m4a-single=!$(OTHER_ENDIAN)/m4a-single \
+	m4a=!m4a $(OTHER_ENDIAN)/m4a=!$(OTHER_ENDIAN)/m4a \
+	m4al=!m4al $(OTHER_ENDIAN)/m4al=!$(OTHER_ENDIAN)/m4al \
+	m5-32media=!m5-32media $(OTHER_ENDIAN)/m5-32media=!$(OTHER_ENDIAN)/m5-32media \
+	m5-32media-nofpu=!m5-32media-nofpu $(OTHER_ENDIAN)/m5-32media-nofpu=!$(OTHER_ENDIAN)/m5-32media-nofpu \
+	m5-compact=!m5-compact $(OTHER_ENDIAN)/m5-compact=!$(OTHER_ENDIAN)/m5-compact \
+	m5-compact-nofpu=!m5-compact-nofpu $(OTHER_ENDIAN)/m5-compact-nofpu=!$(OTHER_ENDIAN)/m5-compact-nofpu \
+	m5-64media=!m5-64media $(OTHER_ENDIAN)/m5-64media=!$(OTHER_ENDIAN)/m5-64media \
+	m5-64media-nofpu=!m5-64media-nofpu $(OTHER_ENDIAN)/m5-64media-nofpu=!$(OTHER_ENDIAN)/m5-64media-nofpu
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+$(T)crt1.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm
+$(T)crti.o: $(srcdir)/config/sh/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sh/crti.asm
+$(T)crtn.o: $(srcdir)/config/sh/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sh/crtn.asm
+
+$(out_object_file): gt-sh.h
+gt-sh.h : s-gtype ; @true
+
+# These are not suitable for COFF.
+# EXTRA_MULTILIB_PARTS= crt1.o crti.o crtn.o crtbegin.o crtend.o
+
+IC_EXTRA_PARTS= libic_invalidate_array_4-100.a libic_invalidate_array_4-200.a \
+libic_invalidate_array_4a.a
+OPT_EXTRA_PARTS= libgcc-Os-4-200.a libgcc-4-300.a
+EXTRA_MULTILIB_PARTS= $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS)
+
+$(T)ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)ic_invalidate_array_4-100.o -DL_ic_invalidate_array -DWAYS=1 -DWAY_SIZE=0x2000 -x assembler-with-cpp $(srcdir)/config/sh/lib1funcs.asm
+$(T)libic_invalidate_array_4-100.a: $(T)ic_invalidate_array_4-100.o $(GCC_PASSES)
+	$(AR_CREATE_FOR_TARGET) $(T)libic_invalidate_array_4-100.a $(T)ic_invalidate_array_4-100.o
+
+$(T)ic_invalidate_array_4-200.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)ic_invalidate_array_4-200.o -DL_ic_invalidate_array -DWAYS=2 -DWAY_SIZE=0x2000 -x assembler-with-cpp $(srcdir)/config/sh/lib1funcs.asm
+$(T)libic_invalidate_array_4-200.a: $(T)ic_invalidate_array_4-200.o $(GCC_PASSES)
+	$(AR_CREATE_FOR_TARGET) $(T)libic_invalidate_array_4-200.a $(T)ic_invalidate_array_4-200.o
+
+$(T)ic_invalidate_array_4a.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)ic_invalidate_array_4a.o -DL_ic_invalidate_array -D__FORCE_SH4A__ -x assembler-with-cpp $(srcdir)/config/sh/lib1funcs.asm
+$(T)libic_invalidate_array_4a.a: $(T)ic_invalidate_array_4a.o $(GCC_PASSES)
+	$(AR_CREATE_FOR_TARGET) $(T)libic_invalidate_array_4a.a $(T)ic_invalidate_array_4a.o
+
+$(T)sdivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_sdivsi3_i4i -x assembler-with-cpp $<
+$(T)udivsi3_i4i-Os-4-200.o: $(srcdir)/config/sh/lib1funcs-Os-4-200.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_udivsi3_i4i -x assembler-with-cpp $<
+$(T)unwind-dw2-Os-4-200.o: $(srcdir)/unwind-dw2.c $(srcdir)/unwind-generic.h unwind-pe.h unwind.inc unwind-dw2-fde.h unwind-dw2.h $(CONFIG_H) coretypes.h $(TM_H) $(MACHMODE_H) longlong.h config.status stmp-int-hdrs tsystem.h $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) $(LIBGCC2_CFLAGS) $(INCLUDES) $(vis_hide) -fexceptions -Os -c -o $@ $<
+OBJS_Os_4_200=$(T)sdivsi3_i4i-Os-4-200.o $(T)udivsi3_i4i-Os-4-200.o $(T)unwind-dw2-Os-4-200.o
+$(T)libgcc-Os-4-200.a: $(OBJS_Os_4_200) $(GCC_PASSES)
+	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_Os_4_200)
+
+$(T)div_table-4-300.o: $(srcdir)/config/sh/lib1funcs-4-300.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_div_table -x assembler-with-cpp $<
+
+$(T)libgcc-4-300.a: $(T)div_table-4-300.o $(GCC_PASSES)
+	$(AR_CREATE_FOR_TARGET) $@ $(T)div_table-4-300.o
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64
new file mode 100644
index 000000000..d88f929fd
--- /dev/null
+++ b/gcc/config/sh/t-sh64
@@ -0,0 +1,29 @@
+# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMFUNCS = \
+  _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  _shcompact_call_trampoline _shcompact_return_trampoline \
+  _shcompact_incoming_args _ic_invalidate _nested_trampoline \
+  _push_pop_shmedia_regs \
+  _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
+
+MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64)
+
+MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=)
+MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES)
diff --git a/gcc/config/sh/t-superh b/gcc/config/sh/t-superh
new file mode 100644
index 000000000..4e2d83dcb
--- /dev/null
+++ b/gcc/config/sh/t-superh
@@ -0,0 +1,33 @@
+# Copyright (C) 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+EXTRA_MULTILIB_PARTS= crt1.o crti.o crtn.o \
+	crtbegin.o crtend.o crtbeginS.o crtendS.o \
+	crt1-mmu.o gcrt1-mmu.o gcrt1.o $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS)
+
+# Compile crt1-mmu.o as crt1.o with -DMMU_SUPPORT
+$(T)crt1-mmu.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1-mmu.o -DMMU_SUPPORT -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm
+
+# Compile gcrt1-mmu.o as crt1-mmu.o with -DPROFILE
+$(T)gcrt1-mmu.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)gcrt1-mmu.o -DPROFILE -DMMU_SUPPORT -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm
+
+# For sh4-400: Compile gcrt1.o as crt1.o with -DPROFILE
+$(T)gcrt1.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)gcrt1.o -DPROFILE -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm
diff --git a/gcc/config/sh/t-symbian b/gcc/config/sh/t-symbian
new file mode 100644
index 000000000..f0b7dabd4
--- /dev/null
+++ b/gcc/config/sh/t-symbian
@@ -0,0 +1,81 @@
+# Copyright (C) 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+sh-c.o: $(srcdir)/config/sh/sh-c.c \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/sh/sh-c.c
+
+symbian-cxx.o: \
+  $(srcdir)/config/sh/symbian-cxx.c \
+  $(srcdir)/config/sh/sh-symbian.h \
+  $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(TREE_H) $(RTL_H) \
+  toplev.h output.h coretypes.h flags.h expr.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+symbian-c.o: \
+  $(srcdir)/config/sh/symbian-c.c \
+  $(srcdir)/config/sh/sh-symbian.h \
+  $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(TREE_H) $(RTL_H) \
+  toplev.h output.h coretypes.h flags.h expr.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+symbian-base.o: \
+  $(srcdir)/config/sh/symbian-base.c \
+  $(srcdir)/config/sh/sh-symbian.h \
+  $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(TREE_H) $(RTL_H) \
+  toplev.h output.h coretypes.h flags.h expr.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+
+LIB1ASMSRC = sh/lib1funcs.asm
+LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \
+  _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
+  $(LIB1ASMFUNCS_CACHE)
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+$(T)crt1.o: $(srcdir)/config/sh/crt1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sh/crt1.asm
+$(T)crti.o: $(srcdir)/config/sh/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sh/crti.asm
+$(T)crtn.o: $(srcdir)/config/sh/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sh/crtn.asm
+
+$(out_object_file): gt-sh.h
+gt-sh.h : s-gtype ; @true
+
+symbian.o: $(srcdir)/config/sh/symbian.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) output.h flags.h $(TREE_H) expr.h toplev.h $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/sh/symbian.c
+
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc/config/sh/t-vxworks b/gcc/config/sh/t-vxworks
new file mode 100644
index 000000000..66aa7091a
--- /dev/null
+++ b/gcc/config/sh/t-vxworks
@@ -0,0 +1,9 @@
+# Multilibs for VxWorks.
+
+MULTILIB_OPTIONS = mrtp fPIC m2/m3/m4/m4a ml
+# Don't build -fPIC without -mrtp, or -ml without -m3/-m4.
+MULTILIB_EXCEPTIONS = fPIC* ml* mrtp/ml* mrtp/fPIC/ml* *m2/ml*
+MULTILIB_MATCHES = m2=m4-nofpu fPIC=fpic
+
+# Restore a variable from t-vxworks clobbered by t-elf.
+EXTRA_MULTILIB_PARTS =
diff --git a/gcc/config/sh/ushmedia.h b/gcc/config/sh/ushmedia.h
new file mode 100644
index 000000000..2f1f55583
--- /dev/null
+++ b/gcc/config/sh/ushmedia.h
@@ -0,0 +1,1087 @@
+/* Copyright (C) 2000, 2001, 2004, 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* ushmedia.h: Intrinsics corresponding to SHmedia instructions that
+   may be executed in both user and privileged mode.  */
+
+#ifndef _USHMEDIA_H
+#define _USHMEDIA_H
+
+#if __SHMEDIA__
+#if ! __SH4_NO_FPU
+typedef float __GCC_FV __attribute__ ((vector_size (4 * sizeof (float))));
+typedef float __GCC_MTRX __attribute__ ((vector_size (16 * sizeof (float))));
+#endif
+
+static __inline unsigned long long
+sh_media_MABS_L (unsigned long long mm)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_absv2si2 ((v2si) mm);
+}
+
+static __inline unsigned long long
+sh_media_MABS_W (unsigned long long mm)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_absv4hi2 ((v4hi) mm);
+}
+
+static __inline unsigned long long
+sh_media_MADD_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_addv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADD_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_addv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ssaddv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_usaddv8qi3 ((v8qi) mm, (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ssaddv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_UB ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+#define sh_media_MCMV __builtin_sh_media_MCMV
+
+static __inline unsigned long long
+sh_media_MCNVS_LW (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_LW ((v2si) mm,
+							   (uv2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCNVS_WB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_WB ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCNVS_WUB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_WUB ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR1 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR1 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR2 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR2 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR3 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR3 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR4 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR4 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR5 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR5 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR6 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR6 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR7 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR7 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMACFX_WL (unsigned long long mm, unsigned long long mn,
+		    unsigned long long mw)
+{
+  typedef float v2hi __attribute__ ((mode(V2HI)));
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  long mm_l = (long) mm;
+  long mn_l = (long) mn;
+
+  return ((unsigned long long)
+    __builtin_sh_media_MMACFX_WL ((v2hi) mm_l, (v2hi) mn_l,
+				  (uv2si) mw));
+}
+
+static __inline unsigned long long
+sh_media_MMACNFX_WL (unsigned long long mm, unsigned long long mn,
+		     unsigned long long mw)
+{
+  typedef float v2hi __attribute__ ((mode(V2HI)));
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  long mm_l = (long) mm;
+  long mn_l = (long) mn;
+
+  return ((unsigned long long)
+    __builtin_sh_media_MMACNFX_WL ((v2hi) mm_l, (v2hi) mn_l,
+				   (uv2si) mw));
+}
+
+static __inline unsigned long long
+sh_media_MMUL_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_mulv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMUL_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_mulv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFX_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFX_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFX_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFX_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFXRP_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFXRP_W ((v4hi) mm,
+							     (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULHI_WL (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULHI_WL ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULLO_WL (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULLO_WL ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULSUM_WQ (unsigned long long mm, unsigned long long mn,
+		     unsigned long long mw)
+{
+  typedef unsigned int uv4hi __attribute__ ((mode(V4HI)));
+
+  return __builtin_sh_media_MMULSUM_WQ ((uv4hi) mm, (uv4hi) mn, mw);
+}
+
+static __inline unsigned long long
+sh_media_MPERM_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MPERM_W ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSAD_UBQ (unsigned long long mm, unsigned long long mn,
+		   unsigned long long mw)
+{
+  typedef unsigned int uv8qi __attribute__ ((mode(V8QI)));
+
+  return __builtin_sh_media_MSAD_UBQ ((uv8qi) mm, (uv8qi) mn, mw);
+}
+
+static __inline unsigned long long
+sh_media_MSHALDS_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHALDS_L ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHALDS_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHALDS_W ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHARD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ashrv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHARD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ashrv4hi3 ((v4hi) mm, mn);
+}
+
+#define sh_media_MSHARDS_Q __builtin_sh_media_MSHARDS_Q
+
+static __inline unsigned long long
+sh_media_MSHFHI_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFHI_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFHI_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLLD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ashlv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLLD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ashlv4hi3 ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLRD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_lshrv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLRD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_lshrv4hi3 ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUB_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_subv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUB_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_subv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sssubv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_ussubv8qi3 ((v8qi) mm, (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sssubv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+#if ! __SH4_NOFPU__
+/* Floating-point Intrinsics */
+
+#define sh_media_FABS_D __builtin_fabs
+#define sh_media_FABS_S __builtin_fabsf
+#define sh_media_FCMPUN_D __builtin_isunordered
+#define sh_media_FCMPUN_S __builtin_isunordered
+
+static __inline float sh_media_FCOSA_S (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return __builtin_sh_media_FCOSA_S (u.i);
+}
+
+static __inline float
+sh_media_FGETSCR (void)
+{ 
+  float f;
+
+  __asm volatile ("fgetscr %0" : "=f" (f));
+  return f;
+}
+
+static __inline float
+sh_media_FIPR_S (const void *fvg, const void *fvh)
+{
+  typedef float v4sf __attribute__ ((mode(V4SF)));
+  v4sf vg = *(v4sf*) fvg;
+  v4sf vh = *(v4sf*) fvh;
+
+  return __builtin_sh_media_FIPR_S (vg, vh);
+}
+
+#if 0
+/* This gives different results for -O0  */
+static __inline float
+sh_media_FMAC_S (float fg, float fh, float fq)
+{
+  return fg * fh + fq;
+}
+#else
+
+#define sh_media_FMAC_S __builtin_sh_media_FMAC_S
+#endif
+
+static __inline long long
+sh_media_FMOV_DQ (double dg)
+{
+  union { long long l; double d; } u;
+
+  u.d = dg;
+  return u.l;
+}
+
+static __inline float
+sh_media_FMOV_LS (int mm)
+{
+  union { int i; float f; } u;
+
+  u.i = mm;
+  return u.f;
+}
+
+static __inline double
+sh_media_FMOV_QD (long long mm)
+{
+  union { long long l; double d; } u;
+
+  u.l = mm;
+  return u.d;
+}
+
+static __inline int
+sh_media_FMOV_SL (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return u.i;
+}
+
+static __inline void
+sh_media_FPUTSCR (float fg)
+{ 
+  __asm volatile ("fputscr %0" : : "f" (fg));
+}
+
+static __inline float sh_media_FSINA_S (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return __builtin_sh_media_FSINA_S (u.i);
+}
+
+/* Can't use __builtin_sqrt / __builtin_sqrtf because they still implement
+   error handling unless -ffast-math is used.  */
+#define sh_media_FSQRT_D __builtin_sh_media_FSQRT_D
+#define sh_media_FSQRT_S __builtin_sh_media_FSQRT_S
+#define sh_media_FSRRA_S __builtin_sh_media_FSRRA_S
+
+static __inline void
+sh_media_FTRV_S (const void *mtrxg, const void *fvh, void *fvf)
+{
+  typedef float v16sf __attribute__ ((mode(V16SF)));
+  typedef float v4sf __attribute__ ((mode(V4SF)));
+  v16sf mtrx = *(v16sf*) mtrxg;
+  v4sf vh = *(v4sf*) fvh;
+
+  *(v4sf*) fvf = __builtin_sh_media_FTRV_S (mtrx, vh);
+}
+#endif /* ! __SH4_NOFPU__ */
+
+/* Not implemented here: Control and Configuration intrinsics.  */
+/* Misaligned Access Support intrinsics */
+
+static __inline unsigned long long
+sh_media_LDHI_L (void *p, int s)
+{
+  return __builtin_sh_media_LDHI_L ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDHI_Q (void *p, int s)
+{
+  return __builtin_sh_media_LDHI_Q ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDLO_L (void *p, int s)
+{
+  return __builtin_sh_media_LDLO_L ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDLO_Q (void *p, int s)
+{
+  return __builtin_sh_media_LDLO_Q ((char *)p + s);
+}
+
+static __inline void
+sh_media_STHI_L (void *p, int s, unsigned int mw)
+{
+  __builtin_sh_media_STHI_L ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STHI_Q (void *p, int s, unsigned long long mw)
+{
+  __builtin_sh_media_STHI_Q ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STLO_L (void *p, int s, unsigned int mw)
+{
+  __builtin_sh_media_STLO_L ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STLO_Q (void *p, int s, unsigned long long mw)
+{
+  __builtin_sh_media_STLO_Q ((char*)p + s, mw);
+}
+
+/* Miscellaneous intrinsics */
+
+#define sh_media_NSB __builtin_sh_media_NSB
+
+static __inline unsigned long long
+sh_media_BYTEREV (unsigned long long mm)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_BYTEREV ((v8qi) mm);
+}
+
+__inline__ static unsigned long long
+sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline));
+
+__inline__ static unsigned long long
+sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw)
+{
+  return mm == 0 ? mn : mw;
+}
+
+__inline__ static unsigned long long
+sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline));
+
+__inline__ static unsigned long long
+sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw)
+{
+  return mm != 0 ? mn : mw;
+}
+
+static __inline long long
+sh_media_ADDZ_L (unsigned int mm, unsigned int mn)
+{
+  return mm + mn;
+}
+
+/* NOP and Synchronization intrinsics not implemented here.  */
+
+static __inline__ void sh_media_PREFO(void *mm, int s)
+{
+  __builtin_sh_media_PREFO (mm + s, 0, 0);
+}
+
+/* Event Handling intrinsics not implemented here.  */
+
+/* Old asm stuff */
+
+static __inline__
+void
+sh_media_NOP (void)
+{
+  __asm__ ("nop" : :);
+}
+
+__inline__ static
+unsigned long long
+sh_media_SWAP_Q (void *mm, long long mn, unsigned long long mw)
+{
+  unsigned long long res;
+  unsigned long long *addr = (unsigned long long *)((char *)mm + mn);
+  __asm__ ("swap.q	%m1, %0" : "=r" (res), "+o" (*addr) : "0" (mw));
+  return res;
+}
+
+__inline__ static
+void     
+sh_media_SYNCI (void)
+{
+  __asm__ __volatile__ ("synci");
+}
+
+__inline__ static
+void     
+sh_media_SYNCO (void)
+{
+  __asm__ __volatile__ ("synco");
+}
+
+__inline__ static
+void
+sh_media_ALLOCO (void *mm, int s)
+{
+  __builtin_sh_media_ALLOCO (mm + s);
+}
+
+__inline__ static
+void
+sh_media_ICBI (void *mm, int s)
+{
+  __asm__ __volatile__ ("icbi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBI (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBP (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbp	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBWB (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbwb	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_PREFI (void *mm, int s)
+{
+  __asm__ __volatile__ ("prefi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_BRK (void)
+{
+  __asm__ __volatile__ ("brk");
+}
+
+__inline__ static
+void
+sh_media_TRAPA (unsigned long long mm)
+{
+  __asm__ __volatile__ ("trapa	%%0" : : "r" (mm));
+}
+
+__inline__ static
+short         
+sh_media_unaligned_LD_W (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (((unsigned char *)p)[0]
+	  | (((short)((__signed__ char *)p)[1]) << 8));
+#else
+  return ((((short)((__signed__ char *)p)[0]) << 8)
+	  | ((unsigned char *)p)[1]);
+#endif
+}
+
+__inline__ static
+unsigned short
+sh_media_unaligned_LD_UW (void *p)
+{
+  unsigned char *addr = p;
+#if __LITTLE_ENDIAN__
+  return sh_media_MSHFLO_B (addr[0], addr[1]);
+#else
+  return sh_media_MSHFLO_B (addr[1], addr[0]);
+#endif
+}
+
+/* We don't use the sh_media_LD* functions here because that turned out
+   to impede constant propagation of the offsets into the ldhi / ldlo
+   instructions.  */
+__inline__ static
+int           
+sh_media_unaligned_LD_L (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (__builtin_sh_media_LDHI_L ((char *)p + 3)
+	  | __builtin_sh_media_LDLO_L (p));
+#else
+  return (__builtin_sh_media_LDLO_L ((char *)p + 3)
+	  | __builtin_sh_media_LDHI_L (p));
+#endif
+}
+
+__inline__ static
+long long     
+sh_media_unaligned_LD_Q (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (__builtin_sh_media_LDHI_Q ((char *)p + 7)
+	  | __builtin_sh_media_LDLO_Q (p));
+#else
+  return (__builtin_sh_media_LDLO_Q ((char *)p + 7)
+	  | __builtin_sh_media_LDHI_Q (p));
+#endif
+}
+
+__inline__ static
+void
+sh_media_unaligned_ST_W (void *p, unsigned int k)
+{
+  char *addr = p;
+#if __LITTLE_ENDIAN__
+  addr[0] = k;
+  addr[1] = k >> 8;
+#else
+  addr[1] = k;
+  addr[0] = k >> 8;
+#endif
+}
+
+/* We don't use the sh_media_ST* functions here because that turned out
+   to impede constant propagation of the offsets into the ldhi / ldlo
+   instructions.  */
+__inline__ static
+void
+sh_media_unaligned_ST_L (void *p, unsigned int k)
+{
+#if __LITTLE_ENDIAN__
+  __builtin_sh_media_STHI_L (p + 3, k);
+  __builtin_sh_media_STLO_L (p, k);
+#else
+  __builtin_sh_media_STLO_L (p + 3, k);
+  __builtin_sh_media_STHI_L (p, k);
+#endif
+}
+
+__inline__ static
+void
+sh_media_unaligned_ST_Q (void *p, unsigned long long k)
+{
+#if __LITTLE_ENDIAN__
+  __builtin_sh_media_STHI_Q (p + 7, k);
+  __builtin_sh_media_STLO_Q (p, k);
+#else
+  __builtin_sh_media_STLO_Q (p + 7, k);
+  __builtin_sh_media_STHI_Q (p, k);
+#endif
+}
+
+#if ! __SH4_NOFPU__
+__inline__ static
+void
+sh_media_FVCOPY_S (const void *fvg, void *fvf)
+{
+  const __GCC_FV *g = fvg;
+  __GCC_FV *f = fvf;
+  *f = *g;
+}
+
+__inline__ static
+void
+sh_media_FVADD_S (const void *fvg, const void *fvh, void *fvf)
+{
+  const float *g = fvg, *h = fvh;
+  float *f = fvf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    f[i] = g[i] + h[i];
+#else
+  f[0] = g[0] + h[0];
+  f[1] = g[1] + h[1];
+  f[2] = g[2] + h[2];
+  f[3] = g[3] + h[3];
+#endif
+}
+
+__inline__ static
+void
+sh_media_FVSUB_S (const void *fvg, const void *fvh, void *fvf)
+{
+  const float *g = fvg, *h = fvh;
+  float *f = fvf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    f[i] = g[i] - h[i];
+#else
+  f[0] = g[0] - h[0];
+  f[1] = g[1] - h[1];
+  f[2] = g[2] - h[2];
+  f[3] = g[3] - h[3];
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXCOPY_S (const void *mtrxg, void *mtrxf)
+{
+  const __GCC_MTRX *g = mtrxg;
+  __GCC_MTRX *f = mtrxf;
+  *f = *g;
+}
+
+__inline__ static
+void
+sh_media_FMTRXADD_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *h = mtrxh;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sh_media_FVADD_S (&g[i], &h[i], &f[i]);
+#else
+  sh_media_FVADD_S (&g[0], &h[0], &f[0]);
+  sh_media_FVADD_S (&g[1], &h[1], &f[1]);
+  sh_media_FVADD_S (&g[2], &h[2], &f[2]);
+  sh_media_FVADD_S (&g[3], &h[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXSUB_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *h = mtrxh;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sh_media_FVSUB_S (&g[i], &h[i], &f[i]);
+#else
+  sh_media_FVSUB_S (&g[0], &h[0], &f[0]);
+  sh_media_FVSUB_S (&g[1], &h[1], &f[1]);
+  sh_media_FVSUB_S (&g[2], &h[2], &f[2]);
+  sh_media_FVSUB_S (&g[3], &h[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FTRVADD_S (const void *mtrxg, const void *fvh, const void *fvi, void *fvf)
+{
+  sh_media_FTRV_S (mtrxg, fvh, fvf);
+  sh_media_FVADD_S (fvf, fvi, fvf);
+}
+
+__inline__ static
+void
+sh_media_FTRVSUB_S (const void *mtrxg, const void *fvh, const void *fvi, void *fvf)
+{
+  sh_media_FTRV_S (mtrxg, fvh, fvf);
+  sh_media_FVSUB_S (fvf, fvi, fvf);
+}
+
+__inline__ static
+void
+sh_media_FMTRXMUL_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRV_S (mtrxh, &g[j], &f[j]);
+#else
+  sh_media_FTRV_S (mtrxh, &g[0], &f[0]);
+  sh_media_FTRV_S (mtrxh, &g[1], &f[1]);
+  sh_media_FTRV_S (mtrxh, &g[2], &f[2]);
+  sh_media_FTRV_S (mtrxh, &g[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXMULADD_S (const void *mtrxg, const void *mtrxh, const void *mtrxi, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *i = mtrxi;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRVADD_S (mtrxh, &g[j], &i[j], &f[j]);
+#else
+  sh_media_FTRVADD_S (mtrxh, &g[0], &i[0], &f[0]);
+  sh_media_FTRVADD_S (mtrxh, &g[1], &i[1], &f[1]);
+  sh_media_FTRVADD_S (mtrxh, &g[2], &i[2], &f[2]);
+  sh_media_FTRVADD_S (mtrxh, &g[3], &i[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXMULSUB_S (const void *mtrxg, const void *mtrxh, const void *mtrxi, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *i = mtrxi;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRVSUB_S (mtrxh, &g[j], &i[j], &f[j]);
+#else
+  sh_media_FTRVSUB_S (mtrxh, &g[0], &i[0], &f[0]);
+  sh_media_FTRVSUB_S (mtrxh, &g[1], &i[1], &f[1]);
+  sh_media_FTRVSUB_S (mtrxh, &g[2], &i[2], &f[2]);
+  sh_media_FTRVSUB_S (mtrxh, &g[3], &i[3], &f[3]);
+#endif
+}
+#endif /* ! __SH4_NOFPU__ */
+
+#endif /* __SHMEDIA__ */
+
+#endif /* _USHMEDIA_H */
diff --git a/gcc/config/sh/vxworks.h b/gcc/config/sh/vxworks.h
new file mode 100644
index 000000000..3276979e4
--- /dev/null
+++ b/gcc/config/sh/vxworks.h
@@ -0,0 +1,69 @@
+/* Definitions of target machine for GCC,
+   for SuperH with targeting the VXWorks run time environment. 
+   Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+   
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS()	\
+  do					\
+    {					\
+      builtin_define ("CPU=SH7000");	\
+      VXWORKS_OS_CPP_BUILTINS ();	\
+    }					\
+  while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      VXWORKS_OVERRIDE_OPTIONS;					\
+      /* The kernel loader cannot handle the relaxation		\
+	 relocations, so it cannot load kernel modules		\
+	 (which are ET_REL) or RTP executables (which are	\
+	 linked with --emit-relocs).  No relaxation relocations	\
+	 appear in shared libraries, so relaxation is OK	\
+	 for RTP PIC.  */					\
+      if (TARGET_RELAX && !(TARGET_VXWORKS_RTP && flag_pic))	\
+	error ("-mrelax is only supported for RTP PIC");	\
+    }								\
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_vxworks"
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " SH_LINK_SPEC
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#undef TARGET_VERSION
+#define TARGET_VERSION	fputs (" (SH/VxWorks)", stderr);
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
diff --git a/gcc/config/soft-fp/README b/gcc/config/soft-fp/README
new file mode 100644
index 000000000..ea58753ec
--- /dev/null
+++ b/gcc/config/soft-fp/README
@@ -0,0 +1,5 @@
+Except for t-softfp, conversions involving TImode and conversions involving
+XFmode, the files in this directory are part of the GNU C Library, not part
+of GCC.  As described at <http://gcc.gnu.org/codingconventions.html>, changes
+should be made to the GNU C Library and the changed files then imported
+into GCC.
diff --git a/gcc/config/soft-fp/adddf3.c b/gcc/config/soft-fp/adddf3.c
new file mode 100644
index 000000000..24c03db0a
--- /dev/null
+++ b/gcc/config/soft-fp/adddf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a + b
+   Copyright (C) 1997,1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __adddf3(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R);
+  DFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_D(A, a);
+  FP_UNPACK_SEMIRAW_D(B, b);
+  FP_ADD_D(R, A, B);
+  FP_PACK_SEMIRAW_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/addsf3.c b/gcc/config/soft-fp/addsf3.c
new file mode 100644
index 000000000..b86991ee5
--- /dev/null
+++ b/gcc/config/soft-fp/addsf3.c
@@ -0,0 +1,50 @@
+/* Software floating-point emulation.
+   Return a + b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __addsf3(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R);
+  SFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_S(A, a);
+  FP_UNPACK_SEMIRAW_S(B, b);
+  FP_ADD_S(R, A, B);
+  FP_PACK_SEMIRAW_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
diff --git a/gcc/config/soft-fp/addtf3.c b/gcc/config/soft-fp/addtf3.c
new file mode 100644
index 000000000..49b67f0ba
--- /dev/null
+++ b/gcc/config/soft-fp/addtf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a + b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __addtf3(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+  FP_UNPACK_SEMIRAW_Q(B, b);
+  FP_ADD_Q(R, A, B);
+  FP_PACK_SEMIRAW_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/divdf3.c b/gcc/config/soft-fp/divdf3.c
new file mode 100644
index 000000000..c3bb0d247
--- /dev/null
+++ b/gcc/config/soft-fp/divdf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a / b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __divdf3(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R);
+  DFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_D(A, a);
+  FP_UNPACK_D(B, b);
+  FP_DIV_D(R, A, B);
+  FP_PACK_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/divsf3.c b/gcc/config/soft-fp/divsf3.c
new file mode 100644
index 000000000..176bb3c2c
--- /dev/null
+++ b/gcc/config/soft-fp/divsf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a / b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __divsf3(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R);
+  SFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_S(A, a);
+  FP_UNPACK_S(B, b);
+  FP_DIV_S(R, A, B);
+  FP_PACK_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/divtf3.c b/gcc/config/soft-fp/divtf3.c
new file mode 100644
index 000000000..916fbfe97
--- /dev/null
+++ b/gcc/config/soft-fp/divtf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a / b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __divtf3(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_UNPACK_Q(B, b);
+  FP_DIV_Q(R, A, B);
+  FP_PACK_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/double.h b/gcc/config/soft-fp/double.h
new file mode 100644
index 000000000..1cde3308b
--- /dev/null
+++ b/gcc/config/soft-fp/double.h
@@ -0,0 +1,265 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Double Precision
+   Copyright (C) 1997, 1998, 1999, 2006, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_D		(2 * _FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_D		_FP_W_TYPE_SIZE
+#endif
+
+#define _FP_FRACBITS_D		53
+#define _FP_FRACXBITS_D		(_FP_FRACTBITS_D - _FP_FRACBITS_D)
+#define _FP_WFRACBITS_D		(_FP_WORKBITS + _FP_FRACBITS_D)
+#define _FP_WFRACXBITS_D	(_FP_FRACTBITS_D - _FP_WFRACBITS_D)
+#define _FP_EXPBITS_D		11
+#define _FP_EXPBIAS_D		1023
+#define _FP_EXPMAX_D		2047
+
+#define _FP_QNANBIT_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_D		\
+	((_FP_W_TYPE)1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE)
+
+typedef float DFtype __attribute__((mode(DF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_D
+{
+  DFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign  : 1;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+#else
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned sign  : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_D(X)		_FP_DECL(2,X)
+#define FP_UNPACK_RAW_D(X,val)	_FP_UNPACK_RAW_2(D,X,val)
+#define FP_UNPACK_RAW_DP(X,val)	_FP_UNPACK_RAW_2_P(D,X,val)
+#define FP_PACK_RAW_D(val,X)	_FP_PACK_RAW_2(D,val,X)
+#define FP_PACK_RAW_DP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(D,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_D(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2(D,X,val);		\
+    _FP_UNPACK_CANONICAL(D,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_DP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2_P(D,X,val);	\
+    _FP_UNPACK_CANONICAL(D,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_D(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2(D,X,val);		\
+    _FP_UNPACK_SEMIRAW(D,2,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_DP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2_P(D,X,val);	\
+    _FP_UNPACK_SEMIRAW(D,2,X);		\
+  } while (0)
+
+#define FP_PACK_D(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,2,X);		\
+    _FP_PACK_RAW_2(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_DP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(D,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_D(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,2,X);		\
+    _FP_PACK_RAW_2(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_DP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(D,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN(D,2,X)
+#define FP_NEG_D(R,X)			_FP_NEG(D,2,R,X)
+#define FP_ADD_D(R,X,Y)			_FP_ADD(D,2,R,X,Y)
+#define FP_SUB_D(R,X,Y)			_FP_SUB(D,2,R,X,Y)
+#define FP_MUL_D(R,X,Y)			_FP_MUL(D,2,R,X,Y)
+#define FP_DIV_D(R,X,Y)			_FP_DIV(D,2,R,X,Y)
+#define FP_SQRT_D(R,X)			_FP_SQRT(D,2,R,X)
+#define _FP_SQRT_MEAT_D(R,S,T,X,Q)	_FP_SQRT_MEAT_2(R,S,T,X,Q)
+
+#define FP_CMP_D(r,X,Y,un)	_FP_CMP(D,2,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)	_FP_CMP_EQ(D,2,r,X,Y)
+#define FP_CMP_UNORD_D(r,X,Y)	_FP_CMP_UNORD(D,2,r,X,Y)
+
+#define FP_TO_INT_D(r,X,rsz,rsg)	_FP_TO_INT(D,2,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)	_FP_FROM_INT(D,2,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_D(X)	_FP_FRAC_HIGH_2(X)
+#define _FP_FRAC_HIGH_RAW_D(X)	_FP_FRAC_HIGH_2(X)
+
+#else
+
+union _FP_UNION_D
+{
+  DFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign   : 1;
+    unsigned exp    : _FP_EXPBITS_D;
+    _FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+#else
+    _FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+    unsigned exp    : _FP_EXPBITS_D;
+    unsigned sign   : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_D(X)		_FP_DECL(1,X)
+#define FP_UNPACK_RAW_D(X,val)	_FP_UNPACK_RAW_1(D,X,val)
+#define FP_UNPACK_RAW_DP(X,val)	_FP_UNPACK_RAW_1_P(D,X,val)
+#define FP_PACK_RAW_D(val,X)	_FP_PACK_RAW_1(D,val,X)
+#define FP_PACK_RAW_DP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(D,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_D(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1(D,X,val);		\
+    _FP_UNPACK_CANONICAL(D,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_DP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1_P(D,X,val);	\
+    _FP_UNPACK_CANONICAL(D,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_D(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_1(D,X,val);		\
+    _FP_UNPACK_SEMIRAW(D,1,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_DP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_1_P(D,X,val);	\
+    _FP_UNPACK_SEMIRAW(D,1,X);		\
+  } while (0)
+
+#define FP_PACK_D(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,1,X);		\
+    _FP_PACK_RAW_1(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_DP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(D,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_D(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,1,X);		\
+    _FP_PACK_RAW_1(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_DP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(D,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN(D,1,X)
+#define FP_NEG_D(R,X)			_FP_NEG(D,1,R,X)
+#define FP_ADD_D(R,X,Y)			_FP_ADD(D,1,R,X,Y)
+#define FP_SUB_D(R,X,Y)			_FP_SUB(D,1,R,X,Y)
+#define FP_MUL_D(R,X,Y)			_FP_MUL(D,1,R,X,Y)
+#define FP_DIV_D(R,X,Y)			_FP_DIV(D,1,R,X,Y)
+#define FP_SQRT_D(R,X)			_FP_SQRT(D,1,R,X)
+#define _FP_SQRT_MEAT_D(R,S,T,X,Q)	_FP_SQRT_MEAT_1(R,S,T,X,Q)
+
+/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
+   the target machine.  */
+
+#define FP_CMP_D(r,X,Y,un)	_FP_CMP(D,1,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)	_FP_CMP_EQ(D,1,r,X,Y)
+#define FP_CMP_UNORD_D(r,X,Y)	_FP_CMP_UNORD(D,1,r,X,Y)
+
+#define FP_TO_INT_D(r,X,rsz,rsg)	_FP_TO_INT(D,1,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)	_FP_FROM_INT(D,1,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_D(X)	_FP_FRAC_HIGH_1(X)
+#define _FP_FRAC_HIGH_RAW_D(X)	_FP_FRAC_HIGH_1(X)
+
+#endif /* W_TYPE_SIZE < 64 */
diff --git a/gcc/config/soft-fp/eqdf2.c b/gcc/config/soft-fp/eqdf2.c
new file mode 100644
index 000000000..82a885834
--- /dev/null
+++ b/gcc/config/soft-fp/eqdf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 otherwise
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+CMPtype __eqdf2(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_UNPACK_RAW_D(B, b);
+  FP_CMP_EQ_D(r, A, B);
+  if (r && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__eqdf2, __nedf2);
diff --git a/gcc/config/soft-fp/eqsf2.c b/gcc/config/soft-fp/eqsf2.c
new file mode 100644
index 000000000..0a1180f87
--- /dev/null
+++ b/gcc/config/soft-fp/eqsf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 otherwise
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+CMPtype __eqsf2(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_UNPACK_RAW_S(B, b);
+  FP_CMP_EQ_S(r, A, B);
+  if (r && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__eqsf2, __nesf2);
diff --git a/gcc/config/soft-fp/eqtf2.c b/gcc/config/soft-fp/eqtf2.c
new file mode 100644
index 000000000..46240b735
--- /dev/null
+++ b/gcc/config/soft-fp/eqtf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 otherwise
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+CMPtype __eqtf2(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_EQ_Q(r, A, B);
+  if (r && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__eqtf2, __netf2);
diff --git a/gcc/config/soft-fp/extenddftf2.c b/gcc/config/soft-fp/extenddftf2.c
new file mode 100644
index 000000000..4101639a9
--- /dev/null
+++ b/gcc/config/soft-fp/extenddftf2.c
@@ -0,0 +1,54 @@
+/* Software floating-point emulation.
+   Return a converted to IEEE quad
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+TFtype __extenddftf2(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_D(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,D,4,2,R,A);
+#else
+  FP_EXTEND(Q,D,2,1,R,A);
+#endif
+  FP_PACK_RAW_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/extended.h b/gcc/config/soft-fp/extended.h
new file mode 100644
index 000000000..e5f16debe
--- /dev/null
+++ b/gcc/config/soft-fp/extended.h
@@ -0,0 +1,431 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Extended Precision.
+   Copyright (C) 1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel, kid. Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_E         (4*_FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_E		(2*_FP_W_TYPE_SIZE)
+#endif
+
+#define _FP_FRACBITS_E		64
+#define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
+#define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
+#define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
+#define _FP_EXPBITS_E		15
+#define _FP_EXPBIAS_E		16383
+#define _FP_EXPMAX_E		32767
+
+#define _FP_QNANBIT_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_E		\
+	((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
+
+typedef float XFtype __attribute__((mode(XF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_E
+{
+   XFtype flt;
+   struct 
+   {
+#if __BYTE_ORDER == __BIG_ENDIAN
+      unsigned long pad1 : _FP_W_TYPE_SIZE;
+      unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
+      unsigned long sign : 1;
+      unsigned long exp : _FP_EXPBITS_E;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+#else
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned exp : _FP_EXPBITS_E;
+      unsigned sign : 1;
+#endif /* not bigendian */
+   } bits __attribute__((packed));
+};
+
+
+#define FP_DECL_E(X)		_FP_DECL(4,X)
+
+#define FP_UNPACK_RAW_E(X, val)				\
+  do {							\
+    union _FP_UNION_E _flo; _flo.flt = (val);		\
+							\
+    X##_f[2] = 0; X##_f[3] = 0;				\
+    X##_f[0] = _flo.bits.frac0;				\
+    X##_f[1] = _flo.bits.frac1;				\
+    X##_e  = _flo.bits.exp;				\
+    X##_s  = _flo.bits.sign;				\
+  } while (0)
+
+#define FP_UNPACK_RAW_EP(X, val)			\
+  do {							\
+    union _FP_UNION_E *_flo =				\
+    (union _FP_UNION_E *)(val);				\
+							\
+    X##_f[2] = 0; X##_f[3] = 0;				\
+    X##_f[0] = _flo->bits.frac0;			\
+    X##_f[1] = _flo->bits.frac1;			\
+    X##_e  = _flo->bits.exp;				\
+    X##_s  = _flo->bits.sign;				\
+  } while (0)
+
+#define FP_PACK_RAW_E(val, X)				\
+  do {							\
+    union _FP_UNION_E _flo;				\
+							\
+    if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
+    else X##_f[1] &= ~(_FP_IMPLBIT_E);			\
+    _flo.bits.frac0 = X##_f[0];				\
+    _flo.bits.frac1 = X##_f[1];				\
+    _flo.bits.exp   = X##_e;				\
+    _flo.bits.sign  = X##_s;				\
+							\
+    (val) = _flo.flt;					\
+  } while (0)
+
+#define FP_PACK_RAW_EP(val, X)				\
+  do {							\
+    if (!FP_INHIBIT_RESULTS)				\
+      {							\
+	union _FP_UNION_E *_flo =			\
+	  (union _FP_UNION_E *)(val);			\
+							\
+	if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
+	else X##_f[1] &= ~(_FP_IMPLBIT_E);		\
+	_flo->bits.frac0 = X##_f[0];			\
+	_flo->bits.frac1 = X##_f[1];			\
+	_flo->bits.exp   = X##_e;			\
+	_flo->bits.sign  = X##_s;			\
+      }							\
+  } while (0)
+
+#define FP_UNPACK_E(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_CANONICAL(E,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_EP(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_CANONICAL(E,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_E(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,4,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_EP(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,4,X);		\
+  } while (0)
+
+#define FP_PACK_E(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,4,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_EP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,4,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_E(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,4,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_EP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,4,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,4,X)
+#define FP_NEG_E(R,X)		_FP_NEG(E,4,R,X)
+#define FP_ADD_E(R,X,Y)		_FP_ADD(E,4,R,X,Y)
+#define FP_SUB_E(R,X,Y)		_FP_SUB(E,4,R,X,Y)
+#define FP_MUL_E(R,X,Y)		_FP_MUL(E,4,R,X,Y)
+#define FP_DIV_E(R,X,Y)		_FP_DIV(E,4,R,X,Y)
+#define FP_SQRT_E(R,X)		_FP_SQRT(E,4,R,X)
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ * This has special _E version because standard _4 square
+ * root would not work (it has to start normally with the
+ * second word and not the first), but as we have to do it
+ * anyway, we optimize it by doing most of the calculations
+ * in two UWtype registers instead of four.
+ */
+ 
+#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
+  do {							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    _FP_FRAC_SRL_4(X, (_FP_WORKBITS));			\
+    while (q)						\
+      {							\
+	T##_f[1] = S##_f[1] + q;			\
+	if (T##_f[1] <= X##_f[1])			\
+	  {						\
+	    S##_f[1] = T##_f[1] + q;			\
+	    X##_f[1] -= T##_f[1];			\
+	    R##_f[1] += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    while (q)						\
+      {							\
+	T##_f[0] = S##_f[0] + q;			\
+	T##_f[1] = S##_f[1];				\
+	if (T##_f[1] < X##_f[1] || 			\
+	    (T##_f[1] == X##_f[1] &&			\
+	     T##_f[0] <= X##_f[0]))			\
+	  {						\
+	    S##_f[0] = T##_f[0] + q;			\
+	    S##_f[1] += (T##_f[0] > S##_f[0]);		\
+	    _FP_FRAC_DEC_2(X, T);			\
+	    R##_f[0] += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    _FP_FRAC_SLL_4(R, (_FP_WORKBITS));			\
+    if (X##_f[0] | X##_f[1])				\
+      {							\
+	if (S##_f[1] < X##_f[1] || 			\
+	    (S##_f[1] == X##_f[1] &&			\
+	     S##_f[0] < X##_f[0]))			\
+	  R##_f[0] |= _FP_WORK_ROUND;			\
+	R##_f[0] |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+
+#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,4,r,X,Y,un)
+#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,4,r,X,Y)
+#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,4,r,X,Y)
+
+#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,4,r,X,rsz,rsg)
+#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,4,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_E(X)	(X##_f[2])
+#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
+
+#else   /* not _FP_W_TYPE_SIZE < 64 */
+union _FP_UNION_E
+{
+  XFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
+    unsigned sign   : 1;
+    unsigned exp    : _FP_EXPBITS_E;
+    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
+#else
+    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
+    unsigned exp    : _FP_EXPBITS_E;
+    unsigned sign   : 1;
+#endif
+  } bits;
+};
+
+#define FP_DECL_E(X)		_FP_DECL(2,X)
+
+#define FP_UNPACK_RAW_E(X, val)					\
+  do {								\
+    union _FP_UNION_E _flo; _flo.flt = (val);			\
+								\
+    X##_f0 = _flo.bits.frac;					\
+    X##_f1 = 0;							\
+    X##_e = _flo.bits.exp;					\
+    X##_s = _flo.bits.sign;					\
+  } while (0)
+
+#define FP_UNPACK_RAW_EP(X, val)				\
+  do {								\
+    union _FP_UNION_E *_flo =					\
+      (union _FP_UNION_E *)(val);				\
+								\
+    X##_f0 = _flo->bits.frac;					\
+    X##_f1 = 0;							\
+    X##_e = _flo->bits.exp;					\
+    X##_s = _flo->bits.sign;					\
+  } while (0)
+
+#define FP_PACK_RAW_E(val, X)					\
+  do {								\
+    union _FP_UNION_E _flo;					\
+								\
+    if (X##_e) X##_f0 |= _FP_IMPLBIT_E;				\
+    else X##_f0 &= ~(_FP_IMPLBIT_E);				\
+    _flo.bits.frac = X##_f0;					\
+    _flo.bits.exp  = X##_e;					\
+    _flo.bits.sign = X##_s;					\
+								\
+    (val) = _flo.flt;						\
+  } while (0)
+
+#define FP_PACK_RAW_EP(fs, val, X)				\
+  do {								\
+    if (!FP_INHIBIT_RESULTS)					\
+      {								\
+	union _FP_UNION_E *_flo =				\
+	  (union _FP_UNION_E *)(val);				\
+								\
+	if (X##_e) X##_f0 |= _FP_IMPLBIT_E;			\
+	else X##_f0 &= ~(_FP_IMPLBIT_E);			\
+	_flo->bits.frac = X##_f0;				\
+	_flo->bits.exp  = X##_e;				\
+	_flo->bits.sign = X##_s;				\
+      }								\
+  } while (0)
+
+
+#define FP_UNPACK_E(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_CANONICAL(E,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_EP(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_CANONICAL(E,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_E(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,2,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_EP(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,2,X);		\
+  } while (0)
+
+#define FP_PACK_E(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,2,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_EP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,2,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_E(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,2,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_EP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,2,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,2,X)
+#define FP_NEG_E(R,X)		_FP_NEG(E,2,R,X)
+#define FP_ADD_E(R,X,Y)		_FP_ADD(E,2,R,X,Y)
+#define FP_SUB_E(R,X,Y)		_FP_SUB(E,2,R,X,Y)
+#define FP_MUL_E(R,X,Y)		_FP_MUL(E,2,R,X,Y)
+#define FP_DIV_E(R,X,Y)		_FP_DIV(E,2,R,X,Y)
+#define FP_SQRT_E(R,X)		_FP_SQRT(E,2,R,X)
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ * We optimize it by doing most of the calculations
+ * in one UWtype registers instead of two, although we don't
+ * have to.
+ */
+#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
+  do {							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    _FP_FRAC_SRL_2(X, (_FP_WORKBITS));			\
+    while (q)						\
+      {							\
+        T##_f0 = S##_f0 + q;				\
+        if (T##_f0 <= X##_f0)				\
+          {						\
+            S##_f0 = T##_f0 + q;			\
+            X##_f0 -= T##_f0;				\
+            R##_f0 += q;				\
+          }						\
+        _FP_FRAC_SLL_1(X, 1);				\
+        q >>= 1;					\
+      }							\
+    _FP_FRAC_SLL_2(R, (_FP_WORKBITS));			\
+    if (X##_f0)						\
+      {							\
+	if (S##_f0 < X##_f0)				\
+	  R##_f0 |= _FP_WORK_ROUND;			\
+	R##_f0 |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+ 
+#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,2,r,X,Y,un)
+#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,2,r,X,Y)
+#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,2,r,X,Y)
+
+#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,2,r,X,rsz,rsg)
+#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,2,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_E(X)	(X##_f1)
+#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
+
+#endif /* not _FP_W_TYPE_SIZE < 64 */
diff --git a/gcc/config/soft-fp/extendsfdf2.c b/gcc/config/soft-fp/extendsfdf2.c
new file mode 100644
index 000000000..fba22d5a1
--- /dev/null
+++ b/gcc/config/soft-fp/extendsfdf2.c
@@ -0,0 +1,54 @@
+/* Software floating-point emulation.
+   Return a converted to IEEE double
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "double.h"
+
+DFtype __extendsfdf2(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  FP_DECL_D(R);
+  DFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_S(A, a);
+#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
+  FP_EXTEND(D,S,2,1,R,A);
+#else
+  FP_EXTEND(D,S,1,1,R,A);
+#endif
+  FP_PACK_RAW_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/extendsftf2.c b/gcc/config/soft-fp/extendsftf2.c
new file mode 100644
index 000000000..c43cf1ede
--- /dev/null
+++ b/gcc/config/soft-fp/extendsftf2.c
@@ -0,0 +1,54 @@
+/* Software floating-point emulation.
+   Return a converted to IEEE quad
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "quad.h"
+
+TFtype __extendsftf2(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_S(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,S,4,1,R,A);
+#else
+  FP_EXTEND(Q,S,2,1,R,A);
+#endif
+  FP_PACK_RAW_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/extendxftf2.c b/gcc/config/soft-fp/extendxftf2.c
new file mode 100644
index 000000000..af29a2ae9
--- /dev/null
+++ b/gcc/config/soft-fp/extendxftf2.c
@@ -0,0 +1,53 @@
+/* Software floating-point emulation.
+   Return a converted to IEEE quad
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "extended.h"
+#include "quad.h"
+
+TFtype __extendxftf2(XFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_E(A);
+  FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_E(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,E,4,4,R,A);
+#else
+  FP_EXTEND(Q,E,2,2,R,A);
+#endif
+  FP_PACK_RAW_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixdfdi.c b/gcc/config/soft-fp/fixdfdi.c
new file mode 100644
index 000000000..fdfe35af5
--- /dev/null
+++ b/gcc/config/soft-fp/fixdfdi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 64bit signed integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DItype __fixdfdi(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  UDItype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_TO_INT_D(r, A, DI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixdfsi.c b/gcc/config/soft-fp/fixdfsi.c
new file mode 100644
index 000000000..a05f3e39a
--- /dev/null
+++ b/gcc/config/soft-fp/fixdfsi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 32bit signed integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+SItype __fixdfsi(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  USItype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_TO_INT_D(r, A, SI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixdfti.c b/gcc/config/soft-fp/fixdfti.c
new file mode 100644
index 000000000..473165725
--- /dev/null
+++ b/gcc/config/soft-fp/fixdfti.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert IEEE double to 128bit signed integer
+   Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+TItype __fixdfti(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  UTItype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_TO_INT_D(r, A, TI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixsfdi.c b/gcc/config/soft-fp/fixsfdi.c
new file mode 100644
index 000000000..384d9bdd5
--- /dev/null
+++ b/gcc/config/soft-fp/fixsfdi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 64bit signed integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+DItype __fixsfdi(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  UDItype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_TO_INT_S(r, A, DI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixsfsi.c b/gcc/config/soft-fp/fixsfsi.c
new file mode 100644
index 000000000..1d40ed05d
--- /dev/null
+++ b/gcc/config/soft-fp/fixsfsi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 32bit signed integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SItype __fixsfsi(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  USItype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_TO_INT_S(r, A, SI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixsfti.c b/gcc/config/soft-fp/fixsfti.c
new file mode 100644
index 000000000..779628eb4
--- /dev/null
+++ b/gcc/config/soft-fp/fixsfti.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert IEEE single to 128bit signed integer
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+TItype __fixsfti(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  UTItype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_TO_INT_S(r, A, TI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixtfdi.c b/gcc/config/soft-fp/fixtfdi.c
new file mode 100644
index 000000000..ea10ce2dd
--- /dev/null
+++ b/gcc/config/soft-fp/fixtfdi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 64bit signed integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+DItype __fixtfdi(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  UDItype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, DI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixtfsi.c b/gcc/config/soft-fp/fixtfsi.c
new file mode 100644
index 000000000..eb71038bc
--- /dev/null
+++ b/gcc/config/soft-fp/fixtfsi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 32bit signed integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+SItype __fixtfsi(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  USItype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, SI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixtfti.c b/gcc/config/soft-fp/fixtfti.c
new file mode 100644
index 000000000..8311ea5a7
--- /dev/null
+++ b/gcc/config/soft-fp/fixtfti.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert IEEE quad to 128bit signed integer
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TItype __fixtfti(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  UTItype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, TI_BITS, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunsdfdi.c b/gcc/config/soft-fp/fixunsdfdi.c
new file mode 100644
index 000000000..d85198f18
--- /dev/null
+++ b/gcc/config/soft-fp/fixunsdfdi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 64bit unsigned integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+UDItype __fixunsdfdi(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  UDItype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_TO_INT_D(r, A, DI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunsdfsi.c b/gcc/config/soft-fp/fixunsdfsi.c
new file mode 100644
index 000000000..492ffdea6
--- /dev/null
+++ b/gcc/config/soft-fp/fixunsdfsi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 32bit unsigned integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+USItype __fixunsdfsi(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  USItype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_TO_INT_D(r, A, SI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunsdfti.c b/gcc/config/soft-fp/fixunsdfti.c
new file mode 100644
index 000000000..48c41d4ac
--- /dev/null
+++ b/gcc/config/soft-fp/fixunsdfti.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert IEEE double to 128bit unsigned integer
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+UTItype __fixunsdfti(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  UTItype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_TO_INT_D(r, A, TI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunssfdi.c b/gcc/config/soft-fp/fixunssfdi.c
new file mode 100644
index 000000000..548415383
--- /dev/null
+++ b/gcc/config/soft-fp/fixunssfdi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 64bit unsigned integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+UDItype __fixunssfdi(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  UDItype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_TO_INT_S(r, A, DI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunssfsi.c b/gcc/config/soft-fp/fixunssfsi.c
new file mode 100644
index 000000000..ac9d4b965
--- /dev/null
+++ b/gcc/config/soft-fp/fixunssfsi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 32bit unsigned integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+USItype __fixunssfsi(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  USItype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_TO_INT_S(r, A, SI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunssfti.c b/gcc/config/soft-fp/fixunssfti.c
new file mode 100644
index 000000000..89bcedbad
--- /dev/null
+++ b/gcc/config/soft-fp/fixunssfti.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert IEEE single to 128bit unsigned integer
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+UTItype __fixunssfti(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  UTItype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_TO_INT_S(r, A, TI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunstfdi.c b/gcc/config/soft-fp/fixunstfdi.c
new file mode 100644
index 000000000..86f1fc856
--- /dev/null
+++ b/gcc/config/soft-fp/fixunstfdi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 64bit unsigned integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+UDItype __fixunstfdi(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  UDItype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, DI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunstfsi.c b/gcc/config/soft-fp/fixunstfsi.c
new file mode 100644
index 000000000..e0335da47
--- /dev/null
+++ b/gcc/config/soft-fp/fixunstfsi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a to 32bit unsigned integer
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+USItype __fixunstfsi(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  USItype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, SI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/fixunstfti.c b/gcc/config/soft-fp/fixunstfti.c
new file mode 100644
index 000000000..f62bd505c
--- /dev/null
+++ b/gcc/config/soft-fp/fixunstfti.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert IEEE quad to 128bit unsigned integer
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+UTItype __fixunstfti(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  UTItype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, TI_BITS, 0);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/floatdidf.c b/gcc/config/soft-fp/floatdidf.c
new file mode 100644
index 000000000..21e9fb189
--- /dev/null
+++ b/gcc/config/soft-fp/floatdidf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 64bit signed integer to IEEE double
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __floatdidf(DItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  DFtype a;
+
+  FP_FROM_INT_D(A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_D(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatdisf.c b/gcc/config/soft-fp/floatdisf.c
new file mode 100644
index 000000000..ee57915c3
--- /dev/null
+++ b/gcc/config/soft-fp/floatdisf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 64bit signed integer to IEEE single
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __floatdisf(DItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  SFtype a;
+
+  FP_FROM_INT_S(A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_S(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatditf.c b/gcc/config/soft-fp/floatditf.c
new file mode 100644
index 000000000..564800bc0
--- /dev/null
+++ b/gcc/config/soft-fp/floatditf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 64bit signed integer to IEEE quad
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __floatditf(DItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  TFtype a;
+
+  FP_FROM_INT_Q(A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_Q(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatsidf.c b/gcc/config/soft-fp/floatsidf.c
new file mode 100644
index 000000000..b6d5f8d1d
--- /dev/null
+++ b/gcc/config/soft-fp/floatsidf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 32bit signed integer to IEEE double
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __floatsidf(SItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  DFtype a;
+
+  FP_FROM_INT_D(A, i, SI_BITS, USItype);
+  FP_PACK_RAW_D(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatsisf.c b/gcc/config/soft-fp/floatsisf.c
new file mode 100644
index 000000000..76217fe34
--- /dev/null
+++ b/gcc/config/soft-fp/floatsisf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 32bit signed integer to IEEE single
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __floatsisf(SItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  SFtype a;
+
+  FP_FROM_INT_S(A, i, SI_BITS, USItype);
+  FP_PACK_RAW_S(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatsitf.c b/gcc/config/soft-fp/floatsitf.c
new file mode 100644
index 000000000..8c3d9cc61
--- /dev/null
+++ b/gcc/config/soft-fp/floatsitf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 32bit signed integer to IEEE quad
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __floatsitf(SItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  TFtype a;
+
+  FP_FROM_INT_Q(A, i, SI_BITS, USItype);
+  FP_PACK_RAW_Q(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floattidf.c b/gcc/config/soft-fp/floattidf.c
new file mode 100644
index 000000000..14b6ea36a
--- /dev/null
+++ b/gcc/config/soft-fp/floattidf.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 128bit signed integer to IEEE double
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __floattidf(TItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  DFtype a;
+
+  FP_FROM_INT_D(A, i, TI_BITS, UTItype);
+  FP_PACK_RAW_D(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floattisf.c b/gcc/config/soft-fp/floattisf.c
new file mode 100644
index 000000000..475cafa27
--- /dev/null
+++ b/gcc/config/soft-fp/floattisf.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 128bit signed integer to IEEE single
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __floattisf(TItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  SFtype a;
+
+  FP_FROM_INT_S(A, i, TI_BITS, UTItype);
+  FP_PACK_RAW_S(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floattitf.c b/gcc/config/soft-fp/floattitf.c
new file mode 100644
index 000000000..12bbb2772
--- /dev/null
+++ b/gcc/config/soft-fp/floattitf.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 128bit signed integer to IEEE quad
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __floattitf(TItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  TFtype a;
+
+  FP_FROM_INT_Q(A, i, TI_BITS, UTItype);
+  FP_PACK_RAW_Q(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatundidf.c b/gcc/config/soft-fp/floatundidf.c
new file mode 100644
index 000000000..af8e4a5ae
--- /dev/null
+++ b/gcc/config/soft-fp/floatundidf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 64bit unsigned integer to IEEE double
+   Copyright (C) 1997, 1999, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __floatundidf(UDItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  DFtype a;
+
+  FP_FROM_INT_D(A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_D(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatundisf.c b/gcc/config/soft-fp/floatundisf.c
new file mode 100644
index 000000000..977f7dfc7
--- /dev/null
+++ b/gcc/config/soft-fp/floatundisf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 64bit unsigned integer to IEEE single
+   Copyright (C) 1997, 1999, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __floatundisf(UDItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  SFtype a;
+
+  FP_FROM_INT_S(A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_S(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatunditf.c b/gcc/config/soft-fp/floatunditf.c
new file mode 100644
index 000000000..ab357f051
--- /dev/null
+++ b/gcc/config/soft-fp/floatunditf.c
@@ -0,0 +1,47 @@
+/* Software floating-point emulation.
+   Convert a 64bit unsigned integer to IEEE quad
+   Copyright (C) 1997,1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype
+__floatunditf(UDItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  TFtype a;
+
+  FP_FROM_INT_Q(A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_Q(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatunsidf.c b/gcc/config/soft-fp/floatunsidf.c
new file mode 100644
index 000000000..12d0f25bf
--- /dev/null
+++ b/gcc/config/soft-fp/floatunsidf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 32bit unsigned integer to IEEE double
+   Copyright (C) 1997, 1999, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __floatunsidf(USItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  DFtype a;
+
+  FP_FROM_INT_D(A, i, SI_BITS, USItype);
+  FP_PACK_RAW_D(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatunsisf.c b/gcc/config/soft-fp/floatunsisf.c
new file mode 100644
index 000000000..80c5d3d35
--- /dev/null
+++ b/gcc/config/soft-fp/floatunsisf.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Convert a 32bit unsigned integer to IEEE single
+   Copyright (C) 1997, 1999, 2006, 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __floatunsisf(USItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  SFtype a;
+
+  FP_FROM_INT_S(A, i, SI_BITS, USItype);
+  FP_PACK_RAW_S(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatunsitf.c b/gcc/config/soft-fp/floatunsitf.c
new file mode 100644
index 000000000..c993716e5
--- /dev/null
+++ b/gcc/config/soft-fp/floatunsitf.c
@@ -0,0 +1,47 @@
+/* Software floating-point emulation.
+   Convert a 32bit unsigned integer to IEEE quad
+   Copyright (C) 1997,1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype
+__floatunsitf(USItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  TFtype a;
+
+  FP_FROM_INT_Q(A, i, SI_BITS, USItype);
+  FP_PACK_RAW_Q(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatuntidf.c b/gcc/config/soft-fp/floatuntidf.c
new file mode 100644
index 000000000..db1fe1aac
--- /dev/null
+++ b/gcc/config/soft-fp/floatuntidf.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 128bit unsigned integer to IEEE double
+   Copyright (C) 1997,1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __floatuntidf(UTItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  DFtype a;
+
+  FP_FROM_INT_D(A, i, TI_BITS, UTItype);
+  FP_PACK_RAW_D(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatuntisf.c b/gcc/config/soft-fp/floatuntisf.c
new file mode 100644
index 000000000..73914878b
--- /dev/null
+++ b/gcc/config/soft-fp/floatuntisf.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 128bit unsigned integer to IEEE single
+   Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __floatuntisf(UTItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  SFtype a;
+
+  FP_FROM_INT_S(A, i, TI_BITS, UTItype);
+  FP_PACK_RAW_S(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/floatuntitf.c b/gcc/config/soft-fp/floatuntitf.c
new file mode 100644
index 000000000..8d6690126
--- /dev/null
+++ b/gcc/config/soft-fp/floatuntitf.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 128bit unsigned integer to IEEE quad
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __floatuntitf(UTItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  TFtype a;
+
+  FP_FROM_INT_Q(A, i, TI_BITS, UTItype);
+  FP_PACK_RAW_Q(a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
diff --git a/gcc/config/soft-fp/gedf2.c b/gcc/config/soft-fp/gedf2.c
new file mode 100644
index 000000000..17a0453ad
--- /dev/null
+++ b/gcc/config/soft-fp/gedf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 iff a > b, -2 iff a ? b, -1 iff a < b
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+CMPtype __gedf2(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_UNPACK_RAW_D(B, b);
+  FP_CMP_D(r, A, B, -2);
+  if (r == -2 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__gedf2, __gtdf2);
diff --git a/gcc/config/soft-fp/gesf2.c b/gcc/config/soft-fp/gesf2.c
new file mode 100644
index 000000000..609a61f31
--- /dev/null
+++ b/gcc/config/soft-fp/gesf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 iff a > b, -2 iff a ? b, -1 iff a < b
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+CMPtype __gesf2(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_UNPACK_RAW_S(B, b);
+  FP_CMP_S(r, A, B, -2);
+  if (r == -2 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__gesf2, __gtsf2);
diff --git a/gcc/config/soft-fp/getf2.c b/gcc/config/soft-fp/getf2.c
new file mode 100644
index 000000000..eb52d05a4
--- /dev/null
+++ b/gcc/config/soft-fp/getf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 iff a > b, -2 iff a ? b, -1 iff a < b
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+CMPtype __getf2(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, A, B, -2);
+  if (r == -2 && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__getf2, __gttf2);
diff --git a/gcc/config/soft-fp/ledf2.c b/gcc/config/soft-fp/ledf2.c
new file mode 100644
index 000000000..b8ba4400d
--- /dev/null
+++ b/gcc/config/soft-fp/ledf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 iff a > b, 2 iff a ? b, -1 iff a < b
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+CMPtype __ledf2(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_UNPACK_RAW_D(B, b);
+  FP_CMP_D(r, A, B, 2);
+  if (r == 2 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__ledf2, __ltdf2);
diff --git a/gcc/config/soft-fp/lesf2.c b/gcc/config/soft-fp/lesf2.c
new file mode 100644
index 000000000..cb359c910
--- /dev/null
+++ b/gcc/config/soft-fp/lesf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 iff a > b, 2 iff a ? b, -1 iff a < b
+   Copyright (C) 1997,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+CMPtype __lesf2(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_UNPACK_RAW_S(B, b);
+  FP_CMP_S(r, A, B, 2);
+  if (r == 2 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__lesf2, __ltsf2);
diff --git a/gcc/config/soft-fp/letf2.c b/gcc/config/soft-fp/letf2.c
new file mode 100644
index 000000000..01fd12b57
--- /dev/null
+++ b/gcc/config/soft-fp/letf2.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 iff a > b, 2 iff a ? b, -1 iff a < b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+CMPtype __letf2(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, A, B, 2);
+  if (r == 2 && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias(__letf2, __lttf2);
diff --git a/gcc/config/soft-fp/muldf3.c b/gcc/config/soft-fp/muldf3.c
new file mode 100644
index 000000000..7eb2015ae
--- /dev/null
+++ b/gcc/config/soft-fp/muldf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a * b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __muldf3(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R);
+  DFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_D(A, a);
+  FP_UNPACK_D(B, b);
+  FP_MUL_D(R, A, B);
+  FP_PACK_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/mulsf3.c b/gcc/config/soft-fp/mulsf3.c
new file mode 100644
index 000000000..5df440687
--- /dev/null
+++ b/gcc/config/soft-fp/mulsf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a * b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __mulsf3(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R);
+  SFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_S(A, a);
+  FP_UNPACK_S(B, b);
+  FP_MUL_S(R, A, B);
+  FP_PACK_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/multf3.c b/gcc/config/soft-fp/multf3.c
new file mode 100644
index 000000000..0abab6ddc
--- /dev/null
+++ b/gcc/config/soft-fp/multf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a * b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __multf3(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_UNPACK_Q(B, b);
+  FP_MUL_Q(R, A, B);
+  FP_PACK_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/negdf2.c b/gcc/config/soft-fp/negdf2.c
new file mode 100644
index 000000000..54869e9a6
--- /dev/null
+++ b/gcc/config/soft-fp/negdf2.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return -a
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __negdf2(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(R);
+  DFtype r;
+
+  FP_UNPACK_D(A, a);
+  FP_NEG_D(R, A);
+  FP_PACK_D(r, R);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/negsf2.c b/gcc/config/soft-fp/negsf2.c
new file mode 100644
index 000000000..bf5db7a45
--- /dev/null
+++ b/gcc/config/soft-fp/negsf2.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return -a
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __negsf2(SFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(R);
+  SFtype r;
+
+  FP_UNPACK_S(A, a);
+  FP_NEG_S(R, A);
+  FP_PACK_S(r, R);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/negtf2.c b/gcc/config/soft-fp/negtf2.c
new file mode 100644
index 000000000..5524c82df
--- /dev/null
+++ b/gcc/config/soft-fp/negtf2.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return -a
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __negtf2(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(R);
+  TFtype r;
+
+  FP_UNPACK_Q(A, a);
+  FP_NEG_Q(R, A);
+  FP_PACK_Q(r, R);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/op-1.h b/gcc/config/soft-fp/op-1.h
new file mode 100644
index 000000000..35cd0ba7b
--- /dev/null
+++ b/gcc/config/soft-fp/op-1.h
@@ -0,0 +1,302 @@
+/* Software floating-point emulation.
+   Basic one-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_FRAC_DECL_1(X)	_FP_W_TYPE X##_f
+#define _FP_FRAC_COPY_1(D,S)	(D##_f = S##_f)
+#define _FP_FRAC_SET_1(X,I)	(X##_f = I)
+#define _FP_FRAC_HIGH_1(X)	(X##_f)
+#define _FP_FRAC_LOW_1(X)	(X##_f)
+#define _FP_FRAC_WORD_1(X,w)	(X##_f)
+
+#define _FP_FRAC_ADDI_1(X,I)	(X##_f += I)
+#define _FP_FRAC_SLL_1(X,N)			\
+  do {						\
+    if (__builtin_constant_p(N) && (N) == 1)	\
+      X##_f += X##_f;				\
+    else					\
+      X##_f <<= (N);				\
+  } while (0)
+#define _FP_FRAC_SRL_1(X,N)	(X##_f >>= N)
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRST_1(X,S,N,sz)	__FP_FRAC_SRST_1(X##_f, S, N, sz)
+#define _FP_FRAC_SRS_1(X,N,sz)	__FP_FRAC_SRS_1(X##_f, N, sz)
+
+#define __FP_FRAC_SRST_1(X,S,N,sz)			\
+do {							\
+  S = (__builtin_constant_p(N) && (N) == 1		\
+       ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0);	\
+  X = X >> (N);						\
+} while (0)
+
+#define __FP_FRAC_SRS_1(X,N,sz)						\
+   (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1		\
+		     ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
+
+#define _FP_FRAC_ADD_1(R,X,Y)	(R##_f = X##_f + Y##_f)
+#define _FP_FRAC_SUB_1(R,X,Y)	(R##_f = X##_f - Y##_f)
+#define _FP_FRAC_DEC_1(X,Y)	(X##_f -= Y##_f)
+#define _FP_FRAC_CLZ_1(z, X)	__FP_CLZ(z, X##_f)
+
+/* Predicates */
+#define _FP_FRAC_NEGP_1(X)	((_FP_WS_TYPE)X##_f < 0)
+#define _FP_FRAC_ZEROP_1(X)	(X##_f == 0)
+#define _FP_FRAC_OVERP_1(fs,X)	(X##_f & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_1(fs,X)	(X##_f &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_1(X, Y)	(X##_f == Y##_f)
+#define _FP_FRAC_GE_1(X, Y)	(X##_f >= Y##_f)
+#define _FP_FRAC_GT_1(X, Y)	(X##_f > Y##_f)
+
+#define _FP_ZEROFRAC_1		0
+#define _FP_MINFRAC_1		1
+#define _FP_MAXFRAC_1		(~(_FP_WS_TYPE)0)
+
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+
+#define _FP_UNPACK_RAW_1(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
+								\
+    X##_f = _flo.bits.frac;					\
+    X##_e = _flo.bits.exp;					\
+    X##_s = _flo.bits.sign;					\
+  } while (0)
+
+#define _FP_UNPACK_RAW_1_P(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    X##_f = _flo->bits.frac;					\
+    X##_e = _flo->bits.exp;					\
+    X##_s = _flo->bits.sign;					\
+  } while (0)
+
+/*
+ * Repack the raw bits of a native fp value.
+ */
+
+#define _FP_PACK_RAW_1(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs _flo;					\
+								\
+    _flo.bits.frac = X##_f;					\
+    _flo.bits.exp  = X##_e;					\
+    _flo.bits.sign = X##_s;					\
+								\
+    (val) = _flo.flt;						\
+  } while (0)
+
+#define _FP_PACK_RAW_1_P(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    _flo->bits.frac = X##_f;					\
+    _flo->bits.exp  = X##_e;					\
+    _flo->bits.sign = X##_s;					\
+  } while (0)
+
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   multiplication immediately.  */
+
+#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y)				\
+  do {									\
+    R##_f = X##_f * Y##_f;						\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_1(R, wfracbits-1, 2*wfracbits);			\
+  } while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit)			\
+  do {									\
+    _FP_W_TYPE _Z_f0, _Z_f1;						\
+    doit(_Z_f1, _Z_f0, X##_f, Y##_f);					\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_2(_Z, wfracbits-1, 2*wfracbits);			\
+    R##_f = _Z_f0;							\
+  } while (0)
+
+/* Finally, a simple widening multiply algorithm.  What fun!  */
+
+#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1;		\
+									\
+    /* split the words in half */					\
+    _xh = X##_f >> (_FP_W_TYPE_SIZE/2);					\
+    _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);		\
+    _yh = Y##_f >> (_FP_W_TYPE_SIZE/2);					\
+    _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);		\
+									\
+    /* multiply the pieces */						\
+    _z_f0 = _xl * _yl;							\
+    _a_f0 = _xh * _yl;							\
+    _a_f1 = _xl * _yh;							\
+    _z_f1 = _xh * _yh;							\
+									\
+    /* reassemble into two full words */				\
+    if ((_a_f0 += _a_f1) < _a_f1)					\
+      _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);			\
+    _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);				\
+    _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);				\
+    _FP_FRAC_ADD_2(_z, _z, _a);						\
+									\
+    /* normalize */							\
+    _FP_FRAC_SRS_2(_z, wfracbits - 1, 2*wfracbits);			\
+    R##_f = _z_f0;							\
+  } while (0)
+
+
+/*
+ * Division algorithms:
+ */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   division immediately.  Give this macro either _FP_DIV_HELP_imm for
+   C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
+   choose will depend on what the compiler does with divrem4.  */
+
+#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)		\
+  do {							\
+    _FP_W_TYPE _q, _r;					\
+    X##_f <<= (X##_f < Y##_f				\
+	       ? R##_e--, _FP_WFRACBITS_##fs		\
+	       : _FP_WFRACBITS_##fs - 1);		\
+    doit(_q, _r, X##_f, Y##_f);				\
+    R##_f = _q | (_r != 0);				\
+  } while (0)
+
+/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
+   that may be useful in this situation.  This first is for a primitive
+   that requires normalization, the second for one that does not.  Look
+   for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
+
+#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _nh, _nl, _q, _r, _y;					\
+									\
+    /* Normalize Y -- i.e. make the most significant bit set.  */	\
+    _y = Y##_f << _FP_WFRACXBITS_##fs;					\
+									\
+    /* Shift X op correspondingly high, that is, up one full word.  */	\
+    if (X##_f < Y##_f)							\
+      {									\
+	R##_e--;							\
+	_nl = 0;							\
+	_nh = X##_f;							\
+      }									\
+    else								\
+      {									\
+	_nl = X##_f << (_FP_W_TYPE_SIZE - 1);				\
+	_nh = X##_f >> 1;						\
+      }									\
+    									\
+    udiv_qrnnd(_q, _r, _nh, _nl, _y);					\
+    R##_f = _q | (_r != 0);						\
+  } while (0)
+
+#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)		\
+  do {							\
+    _FP_W_TYPE _nh, _nl, _q, _r;			\
+    if (X##_f < Y##_f)					\
+      {							\
+	R##_e--;					\
+	_nl = X##_f << _FP_WFRACBITS_##fs;		\
+	_nh = X##_f >> _FP_WFRACXBITS_##fs;		\
+      }							\
+    else						\
+      {							\
+	_nl = X##_f << (_FP_WFRACBITS_##fs - 1);	\
+	_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);	\
+      }							\
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);		\
+    R##_f = _q | (_r != 0);				\
+  } while (0)
+  
+  
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+ 
+#define _FP_SQRT_MEAT_1(R, S, T, X, q)			\
+  do {							\
+    while (q != _FP_WORK_ROUND)				\
+      {							\
+        T##_f = S##_f + q;				\
+        if (T##_f <= X##_f)				\
+          {						\
+            S##_f = T##_f + q;				\
+            X##_f -= T##_f;				\
+            R##_f += q;					\
+          }						\
+        _FP_FRAC_SLL_1(X, 1);				\
+        q >>= 1;					\
+      }							\
+    if (X##_f)						\
+      {							\
+	if (S##_f < X##_f)				\
+	  R##_f |= _FP_WORK_ROUND;			\
+	R##_f |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+
+/*
+ * Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+
+#define _FP_FRAC_ASSEMBLE_1(r, X, rsize)	(r = X##_f)
+#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)	(X##_f = r)
+
+
+/*
+ * Convert FP values between word sizes
+ */
+
+#define _FP_FRAC_COPY_1_1(D, S)		(D##_f = S##_f)
diff --git a/gcc/config/soft-fp/op-2.h b/gcc/config/soft-fp/op-2.h
new file mode 100644
index 000000000..3a3b3aa06
--- /dev/null
+++ b/gcc/config/soft-fp/op-2.h
@@ -0,0 +1,617 @@
+/* Software floating-point emulation.
+   Basic two-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0, X##_f1
+#define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
+#define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
+#define _FP_FRAC_HIGH_2(X)	(X##_f1)
+#define _FP_FRAC_LOW_2(X)	(X##_f0)
+#define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
+
+#define _FP_FRAC_SLL_2(X,N)						    \
+(void)(((N) < _FP_W_TYPE_SIZE)						    \
+       ? ({								    \
+	    if (__builtin_constant_p(N) && (N) == 1)			    \
+	      {								    \
+		X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
+		X##_f0 += X##_f0;					    \
+	      }								    \
+	    else							    \
+	      {								    \
+		X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
+		X##_f0 <<= (N);						    \
+	      }								    \
+	    0;								    \
+	  })								    \
+       : ({								    \
+	    X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);			    \
+	    X##_f0 = 0;							    \
+	  }))
+
+
+#define _FP_FRAC_SRL_2(X,N)						\
+(void)(((N) < _FP_W_TYPE_SIZE)						\
+       ? ({								\
+	    X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));	\
+	    X##_f1 >>= (N);						\
+	  })								\
+       : ({								\
+	    X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);			\
+	    X##_f1 = 0;							\
+	  }))
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRST_2(X,S, N,sz)					  \
+(void)(((N) < _FP_W_TYPE_SIZE)						  \
+       ? ({								  \
+	    S = (__builtin_constant_p(N) && (N) == 1			  \
+		 ? X##_f0 & 1						  \
+		 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);		  \
+	    X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
+	    X##_f1 >>= (N);						  \
+	  })								  \
+       : ({								  \
+	    S = ((((N) == _FP_W_TYPE_SIZE				  \
+		   ? 0							  \
+		   : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))		  \
+		  | X##_f0) != 0);					  \
+	    X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));		  \
+	    X##_f1 = 0;							  \
+	  }))
+
+#define _FP_FRAC_SRS_2(X,N,sz)						  \
+(void)(((N) < _FP_W_TYPE_SIZE)						  \
+       ? ({								  \
+	    X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
+		      (__builtin_constant_p(N) && (N) == 1		  \
+		       ? X##_f0 & 1					  \
+		       : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));	  \
+	    X##_f1 >>= (N);						  \
+	  })								  \
+       : ({								  \
+	    X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |		  \
+		      ((((N) == _FP_W_TYPE_SIZE				  \
+			 ? 0						  \
+			 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))	  \
+			| X##_f0) != 0));				  \
+	    X##_f1 = 0;							  \
+	  }))
+
+#define _FP_FRAC_ADDI_2(X,I)	\
+  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
+
+#define _FP_FRAC_ADD_2(R,X,Y)	\
+  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_SUB_2(R,X,Y)	\
+  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_DEC_2(X,Y)	\
+  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_CLZ_2(R,X)	\
+  do {				\
+    if (X##_f1)			\
+      __FP_CLZ(R,X##_f1);	\
+    else 			\
+    {				\
+      __FP_CLZ(R,X##_f0);	\
+      R += _FP_W_TYPE_SIZE;	\
+    }				\
+  } while(0)
+
+/* Predicates */
+#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
+#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
+#define _FP_FRAC_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
+#define _FP_FRAC_GT_2(X, Y)	\
+  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
+#define _FP_FRAC_GE_2(X, Y)	\
+  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
+
+#define _FP_ZEROFRAC_2		0, 0
+#define _FP_MINFRAC_2		0, 1
+#define _FP_MAXFRAC_2		(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
+
+/*
+ * Internals 
+ */
+
+#define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
+
+#define __FP_CLZ_2(R, xh, xl)	\
+  do {				\
+    if (xh)			\
+      __FP_CLZ(R,xh);		\
+    else 			\
+    {				\
+      __FP_CLZ(R,xl);		\
+      R += _FP_W_TYPE_SIZE;	\
+    }				\
+  } while(0)
+
+#if 0
+
+#ifndef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i)	\
+  (xh += ((xl += i) < i))
+#endif
+#ifndef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
+  (rh = xh + yh + ((rl = xl + yl) < xl))
+#endif
+#ifndef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
+  (rh = xh - yh - ((rl = xl - yl) > xl))
+#endif
+#ifndef __FP_FRAC_DEC_2
+#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
+  do {					\
+    UWtype _t = xl;			\
+    xh -= yh + ((xl -= yl) > _t);	\
+  } while (0)
+#endif
+
+#else
+
+#undef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
+#undef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2			add_ssaaaa
+#undef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2			sub_ddmmss
+#undef __FP_FRAC_DEC_2
+#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	sub_ddmmss(xh, xl, xh, xl, yh, yl)
+
+#endif
+
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+
+#define _FP_UNPACK_RAW_2(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);	\
+							\
+    X##_f0 = _flo.bits.frac0;				\
+    X##_f1 = _flo.bits.frac1;				\
+    X##_e  = _flo.bits.exp;				\
+    X##_s  = _flo.bits.sign;				\
+  } while (0)
+
+#define _FP_UNPACK_RAW_2_P(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+      (union _FP_UNION_##fs *)(val);			\
+							\
+    X##_f0 = _flo->bits.frac0;				\
+    X##_f1 = _flo->bits.frac1;				\
+    X##_e  = _flo->bits.exp;				\
+    X##_s  = _flo->bits.sign;				\
+  } while (0)
+
+
+/*
+ * Repack the raw bits of a native fp value.
+ */
+
+#define _FP_PACK_RAW_2(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs _flo;				\
+							\
+    _flo.bits.frac0 = X##_f0;				\
+    _flo.bits.frac1 = X##_f1;				\
+    _flo.bits.exp   = X##_e;				\
+    _flo.bits.sign  = X##_s;				\
+							\
+    (val) = _flo.flt;					\
+  } while (0)
+
+#define _FP_PACK_RAW_2_P(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+      (union _FP_UNION_##fs *)(val);			\
+							\
+    _flo->bits.frac0 = X##_f0;				\
+    _flo->bits.frac1 = X##_f1;				\
+    _flo->bits.exp   = X##_e;				\
+    _flo->bits.sign  = X##_s;				\
+  } while (0)
+
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
+  do {									\
+    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
+									\
+    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);	\
+    doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
+    doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
+    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);	\
+									\
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,		\
+		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1));				\
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,		\
+		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1));				\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
+    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
+    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
+  } while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
+   Do only 3 multiplications instead of four. This one is for machines
+   where multiplication is much more expensive than subtraction.  */
+
+#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
+  do {									\
+    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
+    _FP_W_TYPE _d;							\
+    int _c1, _c2;							\
+									\
+    _b_f0 = X##_f0 + X##_f1;						\
+    _c1 = _b_f0 < X##_f0;						\
+    _b_f1 = Y##_f0 + Y##_f1;						\
+    _c2 = _b_f1 < Y##_f0;						\
+    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);			\
+    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);	\
+    doit(_c_f1, _c_f0, X##_f1, Y##_f1);					\
+									\
+    _b_f0 &= -_c2;							\
+    _b_f1 &= -_c1;							\
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,		\
+		    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));	\
+    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		     _b_f0);						\
+    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		     _b_f1);						\
+    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1),				\
+		    0, _d, _FP_FRAC_WORD_4(_z,0));			\
+    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);		\
+    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),	\
+		    _c_f1, _c_f0,					\
+		    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));	\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
+    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
+    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
+  } while (0)
+
+#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
+  do {									\
+    _FP_FRAC_DECL_4(_z);						\
+    _FP_W_TYPE _x[2], _y[2];						\
+    _x[0] = X##_f0; _x[1] = X##_f1;					\
+    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
+									\
+    mpn_mul_n(_z_f, _x, _y, 2);						\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
+    R##_f0 = _z_f[0];							\
+    R##_f1 = _z_f[1];							\
+  } while (0)
+
+/* Do at most 120x120=240 bits multiplication using double floating
+   point multiplication.  This is useful if floating point
+   multiplication has much bigger throughput than integer multiply.
+   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
+   between 106 and 120 only.  
+   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
+   SETFETZ is a macro which will disable all FPU exceptions and set rounding
+   towards zero,  RESETFE should optionally reset it back.  */
+
+#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)	\
+  do {										\
+    static const double _const[] = {						\
+      /* 2^-24 */ 5.9604644775390625e-08,					\
+      /* 2^-48 */ 3.5527136788005009e-15,					\
+      /* 2^-72 */ 2.1175823681357508e-22,					\
+      /* 2^-96 */ 1.2621774483536189e-29,					\
+      /* 2^28 */ 2.68435456e+08,						\
+      /* 2^4 */ 1.600000e+01,							\
+      /* 2^-20 */ 9.5367431640625e-07,						\
+      /* 2^-44 */ 5.6843418860808015e-14,					\
+      /* 2^-68 */ 3.3881317890172014e-21,					\
+      /* 2^-92 */ 2.0194839173657902e-28,					\
+      /* 2^-116 */ 1.2037062152420224e-35};					\
+    double _a240, _b240, _c240, _d240, _e240, _f240, 				\
+	   _g240, _h240, _i240, _j240, _k240;					\
+    union { double d; UDItype i; } _l240, _m240, _n240, _o240,			\
+				   _p240, _q240, _r240, _s240;			\
+    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;			\
+										\
+    if (wfracbits < 106 || wfracbits > 120)					\
+      abort();									\
+										\
+    setfetz;									\
+										\
+    _e240 = (double)(long)(X##_f0 & 0xffffff);					\
+    _j240 = (double)(long)(Y##_f0 & 0xffffff);					\
+    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);				\
+    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);				\
+    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));	\
+    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));	\
+    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);				\
+    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);				\
+    _a240 = (double)(long)(X##_f1 >> 32);					\
+    _f240 = (double)(long)(Y##_f1 >> 32);					\
+    _e240 *= _const[3];								\
+    _j240 *= _const[3];								\
+    _d240 *= _const[2];								\
+    _i240 *= _const[2];								\
+    _c240 *= _const[1];								\
+    _h240 *= _const[1];								\
+    _b240 *= _const[0];								\
+    _g240 *= _const[0];								\
+    _s240.d =							      _e240*_j240;\
+    _r240.d =						_d240*_j240 + _e240*_i240;\
+    _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240;\
+    _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
+    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
+    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;		\
+    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;				\
+    _l240.d = _a240*_g240 + _b240*_f240;					\
+    _k240 =   _a240*_f240;							\
+    _r240.d += _s240.d;								\
+    _q240.d += _r240.d;								\
+    _p240.d += _q240.d;								\
+    _o240.d += _p240.d;								\
+    _n240.d += _o240.d;								\
+    _m240.d += _n240.d;								\
+    _l240.d += _m240.d;								\
+    _k240 += _l240.d;								\
+    _s240.d -= ((_const[10]+_s240.d)-_const[10]);				\
+    _r240.d -= ((_const[9]+_r240.d)-_const[9]);					\
+    _q240.d -= ((_const[8]+_q240.d)-_const[8]);					\
+    _p240.d -= ((_const[7]+_p240.d)-_const[7]);					\
+    _o240.d += _const[7];							\
+    _n240.d += _const[6];							\
+    _m240.d += _const[5];							\
+    _l240.d += _const[4];							\
+    if (_s240.d != 0.0) _y240 = 1;						\
+    if (_r240.d != 0.0) _y240 = 1;						\
+    if (_q240.d != 0.0) _y240 = 1;						\
+    if (_p240.d != 0.0) _y240 = 1;						\
+    _t240 = (DItype)_k240;							\
+    _u240 = _l240.i;								\
+    _v240 = _m240.i;								\
+    _w240 = _n240.i;								\
+    _x240 = _o240.i;								\
+    R##_f1 = (_t240 << (128 - (wfracbits - 1)))					\
+	     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));			\
+    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))			\
+    	     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))			\
+    	     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))			\
+    	     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))			\
+    	     | _y240;								\
+    resetfe;									\
+  } while (0)
+
+/*
+ * Division algorithms:
+ */
+
+#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;		\
+    if (_FP_FRAC_GT_2(X, Y))						\
+      {									\
+	_n_f2 = X##_f1 >> 1;						\
+	_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
+	_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
+      }									\
+    else								\
+      {									\
+	R##_e--;							\
+	_n_f2 = X##_f1;							\
+	_n_f1 = X##_f0;							\
+	_n_f0 = 0;							\
+      }									\
+									\
+    /* Normalize, i.e. make the most significant bit of the 		\
+       denominator set. */						\
+    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);				\
+									\
+    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
+    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);				\
+    _r_f0 = _n_f0;							\
+    if (_FP_FRAC_GT_2(_m, _r))						\
+      {									\
+	R##_f1--;							\
+	_FP_FRAC_ADD_2(_r, Y, _r);					\
+	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
+	  {								\
+	    R##_f1--;							\
+	    _FP_FRAC_ADD_2(_r, Y, _r);					\
+	  }								\
+      }									\
+    _FP_FRAC_DEC_2(_r, _m);						\
+									\
+    if (_r_f1 == Y##_f1)						\
+      {									\
+	/* This is a special case, not an optimization			\
+	   (_r/Y##_f1 would not fit into UWtype).			\
+	   As _r is guaranteed to be < Y,  R##_f0 can be either		\
+	   (UWtype)-1 or (UWtype)-2.  But as we know what kind		\
+	   of bits it is (sticky, guard, round),  we don't care.	\
+	   We also don't care what the reminder is,  because the	\
+	   guard bit will be set anyway.  -jj */			\
+	R##_f0 = -1;							\
+      }									\
+    else								\
+      {									\
+	udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
+	umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);			\
+	_r_f0 = 0;							\
+	if (_FP_FRAC_GT_2(_m, _r))					\
+	  {								\
+	    R##_f0--;							\
+	    _FP_FRAC_ADD_2(_r, Y, _r);					\
+	    if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
+	      {								\
+		R##_f0--;						\
+		_FP_FRAC_ADD_2(_r, Y, _r);				\
+	      }								\
+	  }								\
+	if (!_FP_FRAC_EQ_2(_r, _m))					\
+	  R##_f0 |= _FP_WORK_STICKY;					\
+      }									\
+  } while (0)
+
+
+#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
+  do {									\
+    _FP_W_TYPE _x[4], _y[2], _z[4];					\
+    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
+    _x[0] = _x[3] = 0;							\
+    if (_FP_FRAC_GT_2(X, Y))						\
+      {									\
+	R##_e++;							\
+	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |	\
+		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
+			    (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE)));	\
+	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);	\
+      }									\
+    else								\
+      {									\
+	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |	\
+		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
+			    (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));	\
+	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);	\
+      }									\
+									\
+    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
+    R##_f1 = _z[1];							\
+    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
+  } while (0)
+
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+ 
+#define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
+  do {							\
+    while (q)						\
+      {							\
+	T##_f1 = S##_f1 + q;				\
+	if (T##_f1 <= X##_f1)				\
+	  {						\
+	    S##_f1 = T##_f1 + q;			\
+	    X##_f1 -= T##_f1;				\
+	    R##_f1 += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    while (q != _FP_WORK_ROUND)				\
+      {							\
+	T##_f0 = S##_f0 + q;				\
+	T##_f1 = S##_f1;				\
+	if (T##_f1 < X##_f1 || 				\
+	    (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
+	  {						\
+	    S##_f0 = T##_f0 + q;			\
+	    S##_f1 += (T##_f0 > S##_f0);		\
+	    _FP_FRAC_DEC_2(X, T);			\
+	    R##_f0 += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    if (X##_f0 | X##_f1)				\
+      {							\
+	if (S##_f1 < X##_f1 || 				\
+	    (S##_f1 == X##_f1 && S##_f0 < X##_f0))	\
+	  R##_f0 |= _FP_WORK_ROUND;			\
+	R##_f0 |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+
+
+/*
+ * Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+
+#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
+(void)((rsize <= _FP_W_TYPE_SIZE)		\
+       ? ({ r = X##_f0; })			\
+       : ({					\
+	    r = X##_f1;				\
+	    r <<= _FP_W_TYPE_SIZE;		\
+	    r += X##_f0;			\
+	  }))
+
+#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
+  do {									\
+    X##_f0 = r;								\
+    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
+  } while (0)
+
+/*
+ * Convert FP values between word sizes
+ */
+
+#define _FP_FRAC_COPY_1_2(D, S)		(D##_f = S##_f0)
+
+#define _FP_FRAC_COPY_2_1(D, S)		((D##_f0 = S##_f), (D##_f1 = 0))
+
+#define _FP_FRAC_COPY_2_2(D,S)		_FP_FRAC_COPY_2(D,S)
diff --git a/gcc/config/soft-fp/op-4.h b/gcc/config/soft-fp/op-4.h
new file mode 100644
index 000000000..70b9fafbe
--- /dev/null
+++ b/gcc/config/soft-fp/op-4.h
@@ -0,0 +1,688 @@
+/* Software floating-point emulation.
+   Basic four-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
+#define _FP_FRAC_COPY_4(D,S)			\
+  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
+   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+#define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
+#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
+#define _FP_FRAC_LOW_4(X)	(X##_f[0])
+#define _FP_FRAC_WORD_4(X,w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_4(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _up = (N) % _FP_W_TYPE_SIZE;					\
+    _down = _FP_W_TYPE_SIZE - _up;					\
+    if (!_up)								\
+      for (_i = 3; _i >= _skip; --_i)					\
+	X##_f[_i] = X##_f[_i-_skip];					\
+    else								\
+      {									\
+	for (_i = 3; _i > _skip; --_i)					\
+	  X##_f[_i] = X##_f[_i-_skip] << _up				\
+		      | X##_f[_i-_skip-1] >> _down;			\
+	X##_f[_i--] = X##_f[0] << _up; 					\
+      }									\
+    for (; _i >= 0; --_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+/* This one was broken too */
+#define _FP_FRAC_SRL_4(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    if (!_down)								\
+      for (_i = 0; _i <= 3-_skip; ++_i)					\
+	X##_f[_i] = X##_f[_i+_skip];					\
+    else								\
+      {									\
+	for (_i = 0; _i < 3-_skip; ++_i)				\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
+		      | X##_f[_i+_skip+1] << _up;			\
+	X##_f[_i++] = X##_f[3] >> _down;				\
+      }									\
+    for (; _i < 4; ++_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+
+/* Right shift with sticky-lsb. 
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRST_4(X,S,N,size)			\
+  do {							\
+    _FP_I_TYPE _up, _down, _skip, _i;			\
+    _FP_W_TYPE _s;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;			\
+    _down = (N) % _FP_W_TYPE_SIZE;			\
+    _up = _FP_W_TYPE_SIZE - _down;			\
+    for (_s = _i = 0; _i < _skip; ++_i)			\
+      _s |= X##_f[_i];					\
+    if (!_down)						\
+      for (_i = 0; _i <= 3-_skip; ++_i)			\
+	X##_f[_i] = X##_f[_i+_skip];			\
+    else						\
+      {							\
+	_s |= X##_f[_i] << _up;				\
+	for (_i = 0; _i < 3-_skip; ++_i)		\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down		\
+		      | X##_f[_i+_skip+1] << _up;	\
+	X##_f[_i++] = X##_f[3] >> _down;		\
+      }							\
+    for (; _i < 4; ++_i)				\
+      X##_f[_i] = 0;					\
+    S = (_s != 0);					\
+  } while (0)
+
+#define _FP_FRAC_SRS_4(X,N,size)		\
+  do {						\
+    int _sticky;				\
+    _FP_FRAC_SRST_4(X, _sticky, N, size);	\
+    X##_f[0] |= _sticky;			\
+  } while (0)
+
+#define _FP_FRAC_ADD_4(R,X,Y)						\
+  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_SUB_4(R,X,Y)						\
+  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_DEC_4(X,Y)						\
+  __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_ADDI_4(X,I)						\
+  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+
+#define _FP_ZEROFRAC_4  0,0,0,0
+#define _FP_MINFRAC_4   0,0,0,1
+#define _FP_MAXFRAC_4	(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
+
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
+
+#define _FP_FRAC_EQ_4(X,Y)				\
+ (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]		\
+  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+
+#define _FP_FRAC_GT_4(X,Y)				\
+ (X##_f[3] > Y##_f[3] ||				\
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
+    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])	\
+   ))							\
+  ))							\
+ )
+
+#define _FP_FRAC_GE_4(X,Y)				\
+ (X##_f[3] > Y##_f[3] ||				\
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
+    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])	\
+   ))							\
+  ))							\
+ )
+
+
+#define _FP_FRAC_CLZ_4(R,X)		\
+  do {					\
+    if (X##_f[3])			\
+    {					\
+	__FP_CLZ(R,X##_f[3]);		\
+    }					\
+    else if (X##_f[2])			\
+    {					\
+	__FP_CLZ(R,X##_f[2]);		\
+	R += _FP_W_TYPE_SIZE;		\
+    }					\
+    else if (X##_f[1])			\
+    {					\
+	__FP_CLZ(R,X##_f[1]);		\
+	R += _FP_W_TYPE_SIZE*2;		\
+    }					\
+    else				\
+    {					\
+	__FP_CLZ(R,X##_f[0]);		\
+	R += _FP_W_TYPE_SIZE*3;		\
+    }					\
+  } while(0)
+
+
+#define _FP_UNPACK_RAW_4(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
+    X##_f[0] = _flo.bits.frac0;					\
+    X##_f[1] = _flo.bits.frac1;					\
+    X##_f[2] = _flo.bits.frac2;					\
+    X##_f[3] = _flo.bits.frac3;					\
+    X##_e  = _flo.bits.exp;					\
+    X##_s  = _flo.bits.sign;					\
+  } while (0)
+
+#define _FP_UNPACK_RAW_4_P(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    X##_f[0] = _flo->bits.frac0;				\
+    X##_f[1] = _flo->bits.frac1;				\
+    X##_f[2] = _flo->bits.frac2;				\
+    X##_f[3] = _flo->bits.frac3;				\
+    X##_e  = _flo->bits.exp;					\
+    X##_s  = _flo->bits.sign;					\
+  } while (0)
+
+#define _FP_PACK_RAW_4(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs _flo;					\
+    _flo.bits.frac0 = X##_f[0];					\
+    _flo.bits.frac1 = X##_f[1];					\
+    _flo.bits.frac2 = X##_f[2];					\
+    _flo.bits.frac3 = X##_f[3];					\
+    _flo.bits.exp   = X##_e;					\
+    _flo.bits.sign  = X##_s;					\
+    (val) = _flo.flt;				   		\
+  } while (0)
+
+#define _FP_PACK_RAW_4_P(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    _flo->bits.frac0 = X##_f[0];				\
+    _flo->bits.frac1 = X##_f[1];				\
+    _flo->bits.frac2 = X##_f[2];				\
+    _flo->bits.frac3 = X##_f[3];				\
+    _flo->bits.exp   = X##_e;					\
+    _flo->bits.sign  = X##_s;					\
+  } while (0)
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)			    \
+  do {									    \
+    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	    \
+    _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);	    \
+									    \
+    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
+    doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);				    \
+    doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);				    \
+    doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);				    \
+    doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);				    \
+    doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,		    \
+		    0,0,_FP_FRAC_WORD_8(_z,1));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,		    \
+		    _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,		    \
+		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,		    \
+		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2));				    \
+    doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);				    \
+    doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);				    \
+    doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);				    \
+    doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,		    \
+		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,		    \
+		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,		    \
+		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3));				    \
+    doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);				    \
+    doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);				    \
+    doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);				    \
+    doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);				    \
+    doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,		    \
+		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,		    \
+		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,		    \
+		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _FP_FRAC_WORD_8(_z,5));				    \
+    doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);				    \
+    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _b_f1,_b_f0,					    \
+		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));	    \
+									    \
+    /* Normalize since we know where the msb of the multiplicands	    \
+       were (bit B), we know that the msb of the of the product is	    \
+       at either 2B or 2B-1.  */					    \
+    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);			    \
+    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
+  } while (0)
+
+#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)				    \
+  do {									    \
+    _FP_FRAC_DECL_8(_z);						    \
+									    \
+    mpn_mul_n(_z_f, _x_f, _y_f, 4);					    \
+									    \
+    /* Normalize since we know where the msb of the multiplicands	    \
+       were (bit B), we know that the msb of the of the product is	    \
+       at either 2B or 2B-1.  */					    \
+    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);	 		    \
+    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
+  } while (0)
+
+/*
+ * Helper utility for _FP_DIV_MEAT_4_udiv:
+ * pppp = m * nnn
+ */
+#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)				    \
+  do {									    \
+    UWtype _t;								    \
+    umul_ppmm(p1,p0,m,n0);						    \
+    umul_ppmm(p2,_t,m,n1);						    \
+    __FP_FRAC_ADDI_2(p2,p1,_t);						    \
+    umul_ppmm(p3,_t,m,n2);						    \
+    __FP_FRAC_ADDI_2(p3,p2,_t);						    \
+  } while (0)
+
+/*
+ * Division algorithms:
+ */
+
+#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)				    \
+  do {									    \
+    int _i;								    \
+    _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);				    \
+    _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);					    \
+    if (_FP_FRAC_GT_4(X, Y))						    \
+      {									    \
+	_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);			    \
+	_FP_FRAC_SRL_4(X, 1);						    \
+      }									    \
+    else								    \
+      R##_e--;								    \
+									    \
+    /* Normalize, i.e. make the most significant bit of the 		    \
+       denominator set. */						    \
+    _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);				    \
+									    \
+    for (_i = 3; ; _i--)						    \
+      {									    \
+        if (X##_f[3] == Y##_f[3])					    \
+          {								    \
+            /* This is a special case, not an optimization		    \
+               (X##_f[3]/Y##_f[3] would not fit into UWtype).		    \
+               As X## is guaranteed to be < Y,  R##_f[_i] can be either	    \
+               (UWtype)-1 or (UWtype)-2.  */				    \
+            R##_f[_i] = -1;						    \
+            if (!_i)							    \
+	      break;							    \
+            __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],	    \
+			    Y##_f[2], Y##_f[1], Y##_f[0], 0,		    \
+			    X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);	    \
+            _FP_FRAC_SUB_4(X, Y, X);					    \
+            if (X##_f[3] > Y##_f[3])					    \
+              {								    \
+                R##_f[_i] = -2;						    \
+                _FP_FRAC_ADD_4(X, Y, X);				    \
+              }								    \
+          }								    \
+        else								    \
+          {								    \
+            udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
+            umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],		    \
+			  R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);	    \
+            X##_f[2] = X##_f[1];					    \
+            X##_f[1] = X##_f[0];					    \
+            X##_f[0] = _n_f[_i];					    \
+            if (_FP_FRAC_GT_4(_m, X))					    \
+              {								    \
+                R##_f[_i]--;						    \
+                _FP_FRAC_ADD_4(X, Y, X);				    \
+                if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))	    \
+                  {							    \
+		    R##_f[_i]--;					    \
+		    _FP_FRAC_ADD_4(X, Y, X);				    \
+                  }							    \
+              }								    \
+            _FP_FRAC_DEC_4(X, _m);					    \
+            if (!_i)							    \
+	      {								    \
+		if (!_FP_FRAC_EQ_4(X, _m))				    \
+		  R##_f[0] |= _FP_WORK_STICKY;				    \
+		break;							    \
+	      }								    \
+          }								    \
+      }									    \
+  } while (0)
+
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+ 
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)				\
+  do {								\
+    while (q)							\
+      {								\
+	T##_f[3] = S##_f[3] + q;				\
+	if (T##_f[3] <= X##_f[3])				\
+	  {							\
+	    S##_f[3] = T##_f[3] + q;				\
+	    X##_f[3] -= T##_f[3];				\
+	    R##_f[3] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
+    while (q)							\
+      {								\
+	T##_f[2] = S##_f[2] + q;				\
+	T##_f[3] = S##_f[3];					\
+	if (T##_f[3] < X##_f[3] || 				\
+	    (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))	\
+	  {							\
+	    S##_f[2] = T##_f[2] + q;				\
+	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	    __FP_FRAC_DEC_2(X##_f[3], X##_f[2],			\
+			    T##_f[3], T##_f[2]);		\
+	    R##_f[2] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
+    while (q)							\
+      {								\
+	T##_f[1] = S##_f[1] + q;				\
+	T##_f[2] = S##_f[2];					\
+	T##_f[3] = S##_f[3];					\
+	if (T##_f[3] < X##_f[3] || 				\
+	    (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||	\
+	     (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))	\
+	  {							\
+	    S##_f[1] = T##_f[1] + q;				\
+	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
+	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	    __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],	\
+	    		    T##_f[3], T##_f[2], T##_f[1]);	\
+	    R##_f[1] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
+    while (q != _FP_WORK_ROUND)					\
+      {								\
+	T##_f[0] = S##_f[0] + q;				\
+	T##_f[1] = S##_f[1];					\
+	T##_f[2] = S##_f[2];					\
+	T##_f[3] = S##_f[3];					\
+	if (_FP_FRAC_GE_4(X,T))					\
+	  {							\
+	    S##_f[0] = T##_f[0] + q;				\
+	    S##_f[1] += (T##_f[0] > S##_f[0]);			\
+	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
+	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	    _FP_FRAC_DEC_4(X, T);				\
+	    R##_f[0] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    if (!_FP_FRAC_ZEROP_4(X))					\
+      {								\
+	if (_FP_FRAC_GT_4(X,S))					\
+	  R##_f[0] |= _FP_WORK_ROUND;				\
+	R##_f[0] |= _FP_WORK_STICKY;				\
+      }								\
+  } while (0)
+
+
+/*
+ * Internals 
+ */
+
+#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\
+  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
+
+#ifndef __FP_FRAC_ADD_3
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
+  do {								\
+    _FP_W_TYPE _c1, _c2;					\
+    r0 = x0 + y0;						\
+    _c1 = r0 < x0;						\
+    r1 = x1 + y1;						\
+    _c2 = r1 < x1;						\
+    r1 += _c1;							\
+    _c2 |= r1 < _c1;						\
+    r2 = x2 + y2 + _c2;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_ADD_4
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
+  do {								\
+    _FP_W_TYPE _c1, _c2, _c3;					\
+    r0 = x0 + y0;						\
+    _c1 = r0 < x0;						\
+    r1 = x1 + y1;						\
+    _c2 = r1 < x1;						\
+    r1 += _c1;							\
+    _c2 |= r1 < _c1;						\
+    r2 = x2 + y2;						\
+    _c3 = r2 < x2;						\
+    r2 += _c2;							\
+    _c3 |= r2 < _c2;						\
+    r3 = x3 + y3 + _c3;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_SUB_3
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
+  do {								\
+    _FP_W_TYPE _c1, _c2;					\
+    r0 = x0 - y0;						\
+    _c1 = r0 > x0;						\
+    r1 = x1 - y1;						\
+    _c2 = r1 > x1;						\
+    r1 -= _c1;							\
+    _c2 |= _c1 && (y1 == x1);					\
+    r2 = x2 - y2 - _c2;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_SUB_4
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
+  do {								\
+    _FP_W_TYPE _c1, _c2, _c3;					\
+    r0 = x0 - y0;						\
+    _c1 = r0 > x0;						\
+    r1 = x1 - y1;						\
+    _c2 = r1 > x1;						\
+    r1 -= _c1;							\
+    _c2 |= _c1 && (y1 == x1);					\
+    r2 = x2 - y2;						\
+    _c3 = r2 > x2;						\
+    r2 -= _c2;							\
+    _c3 |= _c2 && (y2 == x2);					\
+    r3 = x3 - y3 - _c3;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_DEC_3
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)				\
+  do {									\
+    UWtype _t0, _t1, _t2;						\
+    _t0 = x0, _t1 = x1, _t2 = x2;					\
+    __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);		\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_DEC_4
+#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)			\
+  do {									\
+    UWtype _t0, _t1, _t2, _t3;						\
+    _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;				\
+    __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);		\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_ADDI_4
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
+  do {									\
+    UWtype _t;								\
+    _t = ((x0 += i) < i);						\
+    x1 += _t; _t = (x1 < _t);						\
+    x2 += _t; _t = (x2 < _t);						\
+    x3 += _t;								\
+  } while (0)
+#endif
+
+/* Convert FP values between word sizes. This appears to be more
+ * complicated than I'd have expected it to be, so these might be
+ * wrong... These macros are in any case somewhat bogus because they
+ * use information about what various FRAC_n variables look like 
+ * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+ * the ones in op-2.h and op-1.h. 
+ */
+#define _FP_FRAC_COPY_1_4(D, S)		(D##_f = S##_f[0])
+
+#define _FP_FRAC_COPY_2_4(D, S)			\
+do {						\
+  D##_f0 = S##_f[0];				\
+  D##_f1 = S##_f[1];				\
+} while (0)
+
+/* Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+/* Put the FP value X into r, which is an integer of size rsize. */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)				\
+  do {									\
+    if (rsize <= _FP_W_TYPE_SIZE)					\
+      r = X##_f[0];							\
+    else if (rsize <= 2*_FP_W_TYPE_SIZE)				\
+    {									\
+      r = X##_f[1];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[0];							\
+    }									\
+    else								\
+    {									\
+      /* I'm feeling lazy so we deal with int == 3words (implausible)*/	\
+      /* and int == 4words as a single case.			 */	\
+      r = X##_f[3];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[2];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[1];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[0];							\
+    }									\
+  } while (0)
+
+/* "No disassemble Number Five!" */
+/* move an integer of size rsize into X's fractional part. We rely on
+ * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+ * having to mask the values we store into it.
+ */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)				\
+  do {									\
+    X##_f[0] = r;							\
+    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
+    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
+    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
+  } while (0);
+
+#define _FP_FRAC_COPY_4_1(D, S)			\
+do {						\
+  D##_f[0] = S##_f;				\
+  D##_f[1] = D##_f[2] = D##_f[3] = 0;		\
+} while (0)
+
+#define _FP_FRAC_COPY_4_2(D, S)			\
+do {						\
+  D##_f[0] = S##_f0;				\
+  D##_f[1] = S##_f1;				\
+  D##_f[2] = D##_f[3] = 0;			\
+} while (0)
+
+#define _FP_FRAC_COPY_4_4(D,S)	_FP_FRAC_COPY_4(D,S)
diff --git a/gcc/config/soft-fp/op-8.h b/gcc/config/soft-fp/op-8.h
new file mode 100644
index 000000000..e0612a5e6
--- /dev/null
+++ b/gcc/config/soft-fp/op-8.h
@@ -0,0 +1,111 @@
+/* Software floating-point emulation.
+   Basic eight-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+/* We need just a few things from here for op-4, if we ever need some
+   other macros, they can be added. */
+#define _FP_FRAC_DECL_8(X)	_FP_W_TYPE X##_f[8]
+#define _FP_FRAC_HIGH_8(X)	(X##_f[7])
+#define _FP_FRAC_LOW_8(X)	(X##_f[0])
+#define _FP_FRAC_WORD_8(X,w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_8(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _up = (N) % _FP_W_TYPE_SIZE;					\
+    _down = _FP_W_TYPE_SIZE - _up;					\
+    if (!_up)								\
+      for (_i = 7; _i >= _skip; --_i)					\
+	X##_f[_i] = X##_f[_i-_skip];					\
+    else								\
+      {									\
+	for (_i = 7; _i > _skip; --_i)					\
+	  X##_f[_i] = X##_f[_i-_skip] << _up				\
+		      | X##_f[_i-_skip-1] >> _down;			\
+	X##_f[_i--] = X##_f[0] << _up; 					\
+      }									\
+    for (; _i >= 0; --_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+#define _FP_FRAC_SRL_8(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    if (!_down)								\
+      for (_i = 0; _i <= 7-_skip; ++_i)					\
+	X##_f[_i] = X##_f[_i+_skip];					\
+    else								\
+      {									\
+	for (_i = 0; _i < 7-_skip; ++_i)				\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
+		      | X##_f[_i+_skip+1] << _up;			\
+	X##_f[_i++] = X##_f[7] >> _down;				\
+      }									\
+    for (; _i < 8; ++_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+
+/* Right shift with sticky-lsb. 
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRS_8(X,N,size)					\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _FP_W_TYPE _s;							\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    for (_s = _i = 0; _i < _skip; ++_i)					\
+      _s |= X##_f[_i];							\
+    if (!_down)								\
+      for (_i = 0; _i <= 7-_skip; ++_i)					\
+	X##_f[_i] = X##_f[_i+_skip];					\
+    else								\
+      {									\
+	_s |= X##_f[_i] << _up;						\
+	for (_i = 0; _i < 7-_skip; ++_i)				\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
+		      | X##_f[_i+_skip+1] << _up;			\
+	X##_f[_i++] = X##_f[7] >> _down;				\
+      }									\
+    for (; _i < 8; ++_i)						\
+      X##_f[_i] = 0;							\
+    /* don't fix the LSB until the very end when we're sure f[0] is stable */	\
+    X##_f[0] |= (_s != 0);						\
+  } while (0)
+
diff --git a/gcc/config/soft-fp/op-common.h b/gcc/config/soft-fp/op-common.h
new file mode 100644
index 000000000..ef11b527b
--- /dev/null
+++ b/gcc/config/soft-fp/op-common.h
@@ -0,0 +1,1359 @@
+/* Software floating-point emulation. Common operations.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_DECL(wc, X)						\
+  _FP_I_TYPE X##_c __attribute__((unused)), X##_s, X##_e;	\
+  _FP_FRAC_DECL_##wc(X)
+
+/*
+ * Finish truely unpacking a native fp value by classifying the kind
+ * of fp value and normalizing both the exponent and the fraction.
+ */
+
+#define _FP_UNPACK_CANONICAL(fs, wc, X)					\
+do {									\
+  switch (X##_e)							\
+  {									\
+  default:								\
+    _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_IMPLBIT_##fs;			\
+    _FP_FRAC_SLL_##wc(X, _FP_WORKBITS);					\
+    X##_e -= _FP_EXPBIAS_##fs;						\
+    X##_c = FP_CLS_NORMAL;						\
+    break;								\
+									\
+  case 0:								\
+    if (_FP_FRAC_ZEROP_##wc(X))						\
+      X##_c = FP_CLS_ZERO;						\
+    else								\
+      {									\
+	/* a denormalized number */					\
+	_FP_I_TYPE _shift;						\
+	_FP_FRAC_CLZ_##wc(_shift, X);					\
+	_shift -= _FP_FRACXBITS_##fs;					\
+	_FP_FRAC_SLL_##wc(X, (_shift+_FP_WORKBITS));			\
+	X##_e -= _FP_EXPBIAS_##fs - 1 + _shift;				\
+	X##_c = FP_CLS_NORMAL;						\
+	FP_SET_EXCEPTION(FP_EX_DENORM);					\
+      }									\
+    break;								\
+									\
+  case _FP_EXPMAX_##fs:							\
+    if (_FP_FRAC_ZEROP_##wc(X))						\
+      X##_c = FP_CLS_INF;						\
+    else								\
+      {									\
+	X##_c = FP_CLS_NAN;						\
+	/* Check for signaling NaN */					\
+	if (!(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))		\
+	  FP_SET_EXCEPTION(FP_EX_INVALID);				\
+      }									\
+    break;								\
+  }									\
+} while (0)
+
+/* Finish unpacking an fp value in semi-raw mode: the mantissa is
+   shifted by _FP_WORKBITS but the implicit MSB is not inserted and
+   other classification is not done.  */
+#define _FP_UNPACK_SEMIRAW(fs, wc, X)	_FP_FRAC_SLL_##wc(X, _FP_WORKBITS)
+
+/* A semi-raw value has overflowed to infinity.  Adjust the mantissa
+   and exponent appropriately.  */
+#define _FP_OVERFLOW_SEMIRAW(fs, wc, X)			\
+do {							\
+  if (FP_ROUNDMODE == FP_RND_NEAREST			\
+      || (FP_ROUNDMODE == FP_RND_PINF && !X##_s)	\
+      || (FP_ROUNDMODE == FP_RND_MINF && X##_s))	\
+    {							\
+      X##_e = _FP_EXPMAX_##fs;				\
+      _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);		\
+    }							\
+  else							\
+    {							\
+      X##_e = _FP_EXPMAX_##fs - 1;			\
+      _FP_FRAC_SET_##wc(X, _FP_MAXFRAC_##wc);		\
+    }							\
+    FP_SET_EXCEPTION(FP_EX_INEXACT);			\
+    FP_SET_EXCEPTION(FP_EX_OVERFLOW);			\
+} while (0)
+
+/* Check for a semi-raw value being a signaling NaN and raise the
+   invalid exception if so.  */
+#define _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X)			\
+do {								\
+  if (X##_e == _FP_EXPMAX_##fs					\
+      && !_FP_FRAC_ZEROP_##wc(X)				\
+      && !(_FP_FRAC_HIGH_##fs(X) & _FP_QNANBIT_SH_##fs))	\
+    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+} while (0)
+
+/* Choose a NaN result from an operation on two semi-raw NaN
+   values.  */
+#define _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP)			\
+do {									\
+  /* _FP_CHOOSENAN expects raw values, so shift as required.  */	\
+  _FP_FRAC_SRL_##wc(X, _FP_WORKBITS);					\
+  _FP_FRAC_SRL_##wc(Y, _FP_WORKBITS);					\
+  _FP_CHOOSENAN(fs, wc, R, X, Y, OP);					\
+  _FP_FRAC_SLL_##wc(R, _FP_WORKBITS);					\
+} while (0)
+
+/* Test whether a biased exponent is normal (not zero or maximum).  */
+#define _FP_EXP_NORMAL(fs, wc, X)	(((X##_e + 1) & _FP_EXPMAX_##fs) > 1)
+
+/* Prepare to pack an fp value in semi-raw mode: the mantissa is
+   rounded and shifted right, with the rounding possibly increasing
+   the exponent (including changing a finite value to infinity).  */
+#define _FP_PACK_SEMIRAW(fs, wc, X)				\
+do {								\
+  _FP_ROUND(wc, X);						\
+  if (_FP_FRAC_HIGH_##fs(X)					\
+      & (_FP_OVERFLOW_##fs >> 1))				\
+    {								\
+      _FP_FRAC_HIGH_##fs(X) &= ~(_FP_OVERFLOW_##fs >> 1);	\
+      X##_e++;							\
+      if (X##_e == _FP_EXPMAX_##fs)				\
+	_FP_OVERFLOW_SEMIRAW(fs, wc, X);			\
+    }								\
+  _FP_FRAC_SRL_##wc(X, _FP_WORKBITS);				\
+  if (!_FP_EXP_NORMAL(fs, wc, X) && !_FP_FRAC_ZEROP_##wc(X))	\
+    {								\
+      if (X##_e == 0)						\
+	FP_SET_EXCEPTION(FP_EX_UNDERFLOW);			\
+      else							\
+	{							\
+	  if (!_FP_KEEPNANFRACP)				\
+	    {							\
+	      _FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs);		\
+	      X##_s = _FP_NANSIGN_##fs;				\
+	    }							\
+	  else							\
+	    _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_QNANBIT_##fs;	\
+	}							\
+    }								\
+} while (0)
+
+/*
+ * Before packing the bits back into the native fp result, take care
+ * of such mundane things as rounding and overflow.  Also, for some
+ * kinds of fp values, the original parts may not have been fully
+ * extracted -- but that is ok, we can regenerate them now.
+ */
+
+#define _FP_PACK_CANONICAL(fs, wc, X)				\
+do {								\
+  switch (X##_c)						\
+  {								\
+  case FP_CLS_NORMAL:						\
+    X##_e += _FP_EXPBIAS_##fs;					\
+    if (X##_e > 0)						\
+      {								\
+	_FP_ROUND(wc, X);					\
+	if (_FP_FRAC_OVERP_##wc(fs, X))				\
+	  {							\
+	    _FP_FRAC_CLEAR_OVERP_##wc(fs, X);			\
+	    X##_e++;						\
+	  }							\
+	_FP_FRAC_SRL_##wc(X, _FP_WORKBITS);			\
+	if (X##_e >= _FP_EXPMAX_##fs)				\
+	  {							\
+	    /* overflow */					\
+	    switch (FP_ROUNDMODE)				\
+	      {							\
+	      case FP_RND_NEAREST:				\
+		X##_c = FP_CLS_INF;				\
+		break;						\
+	      case FP_RND_PINF:					\
+		if (!X##_s) X##_c = FP_CLS_INF;			\
+		break;						\
+	      case FP_RND_MINF:					\
+		if (X##_s) X##_c = FP_CLS_INF;			\
+		break;						\
+	      }							\
+	    if (X##_c == FP_CLS_INF)				\
+	      {							\
+		/* Overflow to infinity */			\
+		X##_e = _FP_EXPMAX_##fs;			\
+		_FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);	\
+	      }							\
+	    else						\
+	      {							\
+		/* Overflow to maximum normal */		\
+		X##_e = _FP_EXPMAX_##fs - 1;			\
+		_FP_FRAC_SET_##wc(X, _FP_MAXFRAC_##wc);		\
+	      }							\
+	    FP_SET_EXCEPTION(FP_EX_OVERFLOW);			\
+            FP_SET_EXCEPTION(FP_EX_INEXACT);			\
+	  }							\
+      }								\
+    else							\
+      {								\
+	/* we've got a denormalized number */			\
+	X##_e = -X##_e + 1;					\
+	if (X##_e <= _FP_WFRACBITS_##fs)			\
+	  {							\
+	    _FP_FRAC_SRS_##wc(X, X##_e, _FP_WFRACBITS_##fs);	\
+	    _FP_ROUND(wc, X);					\
+	    if (_FP_FRAC_HIGH_##fs(X)				\
+		& (_FP_OVERFLOW_##fs >> 1))			\
+	      {							\
+	        X##_e = 1;					\
+	        _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);	\
+	      }							\
+	    else						\
+	      {							\
+		X##_e = 0;					\
+		_FP_FRAC_SRL_##wc(X, _FP_WORKBITS);		\
+		FP_SET_EXCEPTION(FP_EX_UNDERFLOW);		\
+	      }							\
+	  }							\
+	else							\
+	  {							\
+	    /* underflow to zero */				\
+	    X##_e = 0;						\
+	    if (!_FP_FRAC_ZEROP_##wc(X))			\
+	      {							\
+	        _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);		\
+	        _FP_ROUND(wc, X);				\
+	        _FP_FRAC_LOW_##wc(X) >>= (_FP_WORKBITS);	\
+	      }							\
+	    FP_SET_EXCEPTION(FP_EX_UNDERFLOW);			\
+	  }							\
+      }								\
+    break;							\
+								\
+  case FP_CLS_ZERO:						\
+    X##_e = 0;							\
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    break;							\
+								\
+  case FP_CLS_INF:						\
+    X##_e = _FP_EXPMAX_##fs;					\
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    break;							\
+								\
+  case FP_CLS_NAN:						\
+    X##_e = _FP_EXPMAX_##fs;					\
+    if (!_FP_KEEPNANFRACP)					\
+      {								\
+	_FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs);			\
+	X##_s = _FP_NANSIGN_##fs;				\
+      }								\
+    else							\
+      _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_QNANBIT_##fs;		\
+    break;							\
+  }								\
+} while (0)
+
+/* This one accepts raw argument and not cooked,  returns
+ * 1 if X is a signaling NaN.
+ */
+#define _FP_ISSIGNAN(fs, wc, X)					\
+({								\
+  int __ret = 0;						\
+  if (X##_e == _FP_EXPMAX_##fs)					\
+    {								\
+      if (!_FP_FRAC_ZEROP_##wc(X)				\
+	  && !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+	__ret = 1;						\
+    }								\
+  __ret;							\
+})
+
+
+
+
+
+/* Addition on semi-raw values.  */
+#define _FP_ADD_INTERNAL(fs, wc, R, X, Y, OP)				 \
+do {									 \
+  if (X##_s == Y##_s)							 \
+    {									 \
+      /* Addition.  */							 \
+      R##_s = X##_s;							 \
+      int ediff = X##_e - Y##_e;					 \
+      if (ediff > 0)							 \
+	{								 \
+	  R##_e = X##_e;						 \
+	  if (Y##_e == 0)						 \
+	    {								 \
+	      /* Y is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(Y))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_FRAC_COPY_##wc(R, X);				 \
+		  goto add_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_ADD_##wc(R, X, Y);			 \
+		      goto add3;					 \
+		    }							 \
+		  if (X##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);		 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      goto add_done;					 \
+		    }							 \
+		  goto add1;						 \
+		}							 \
+	    }								 \
+	  else if (X##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* X is NaN or Inf, Y is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+	      _FP_FRAC_COPY_##wc(R, X);					 \
+	      goto add_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of Y.  */				 \
+	  _FP_FRAC_HIGH_##fs(Y) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	add1:								 \
+	  /* Shift the mantissa of Y to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of X.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(Y, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(Y))				 \
+	    _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_ADD_##wc(R, X, Y);					 \
+	}								 \
+      else if (ediff < 0)						 \
+	{								 \
+	  ediff = -ediff;						 \
+	  R##_e = Y##_e;						 \
+	  if (X##_e == 0)						 \
+	    {								 \
+	      /* X is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(X))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  _FP_FRAC_COPY_##wc(R, Y);				 \
+		  goto add_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_ADD_##wc(R, Y, X);			 \
+		      goto add3;					 \
+		    }							 \
+		  if (Y##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);		 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      goto add_done;					 \
+		    }							 \
+		  goto add2;						 \
+		}							 \
+	    }								 \
+	  else if (Y##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* Y is NaN or Inf, X is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+	      _FP_FRAC_COPY_##wc(R, Y);					 \
+	      goto add_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of X.  */				 \
+	  _FP_FRAC_HIGH_##fs(X) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	add2:								 \
+	  /* Shift the mantissa of X to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of Y.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(X, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(X))				 \
+	    _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_ADD_##wc(R, Y, X);					 \
+	}								 \
+      else								 \
+	{								 \
+	  /* ediff == 0.  */						 \
+	  if (!_FP_EXP_NORMAL(fs, wc, X))				 \
+	    {								 \
+	      if (X##_e == 0)						 \
+		{							 \
+		  /* X and Y are zero or denormalized.  */		 \
+		  R##_e = 0;						 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    {							 \
+		      if (!_FP_FRAC_ZEROP_##wc(Y))			 \
+			FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      goto add_done;					 \
+		    }							 \
+		  else if (_FP_FRAC_ZEROP_##wc(Y))			 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      goto add_done;					 \
+		    }							 \
+		  else							 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_ADD_##wc(R, X, Y);			 \
+		      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)	 \
+			{						 \
+			  /* Normalized result.  */			 \
+			  _FP_FRAC_HIGH_##fs(R)				 \
+			    &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs;	 \
+			  R##_e = 1;					 \
+			}						 \
+		      goto add_done;					 \
+		    }							 \
+		}							 \
+	      else							 \
+		{							 \
+		  /* X and Y are NaN or Inf.  */			 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  R##_e = _FP_EXPMAX_##fs;				 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    _FP_FRAC_COPY_##wc(R, Y);				 \
+		  else if (_FP_FRAC_ZEROP_##wc(Y))			 \
+		    _FP_FRAC_COPY_##wc(R, X);				 \
+		  else							 \
+		    _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP);		 \
+		  goto add_done;					 \
+		}							 \
+	    }								 \
+	  /* The exponents of X and Y, both normal, are equal.  The	 \
+	     implicit MSBs will always add to increase the		 \
+	     exponent.  */						 \
+	  _FP_FRAC_ADD_##wc(R, X, Y);					 \
+	  R##_e = X##_e + 1;						 \
+	  _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);			 \
+	  if (R##_e == _FP_EXPMAX_##fs)					 \
+	    /* Overflow to infinity (depending on rounding mode).  */	 \
+	    _FP_OVERFLOW_SEMIRAW(fs, wc, R);				 \
+	  goto add_done;						 \
+	}								 \
+    add3:								 \
+      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)			 \
+	{								 \
+	  /* Overflow.  */						 \
+	  _FP_FRAC_HIGH_##fs(R) &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs;	 \
+	  R##_e++;							 \
+	  _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);			 \
+	  if (R##_e == _FP_EXPMAX_##fs)					 \
+	    /* Overflow to infinity (depending on rounding mode).  */	 \
+	    _FP_OVERFLOW_SEMIRAW(fs, wc, R);				 \
+	}								 \
+    add_done: ;								 \
+    }									 \
+  else									 \
+    {									 \
+      /* Subtraction.  */						 \
+      int ediff = X##_e - Y##_e;					 \
+      if (ediff > 0)							 \
+	{								 \
+	  R##_e = X##_e;						 \
+	  R##_s = X##_s;						 \
+	  if (Y##_e == 0)						 \
+	    {								 \
+	      /* Y is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(Y))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_FRAC_COPY_##wc(R, X);				 \
+		  goto sub_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_SUB_##wc(R, X, Y);			 \
+		      goto sub3;					 \
+		    }							 \
+		  if (X##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);		 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      goto sub_done;					 \
+		    }							 \
+		  goto sub1;						 \
+		}							 \
+	    }								 \
+	  else if (X##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* X is NaN or Inf, Y is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+	      _FP_FRAC_COPY_##wc(R, X);					 \
+	      goto sub_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of Y.  */				 \
+	  _FP_FRAC_HIGH_##fs(Y) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	sub1:								 \
+	  /* Shift the mantissa of Y to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of X.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(Y, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(Y))				 \
+	    _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_SUB_##wc(R, X, Y);					 \
+	}								 \
+      else if (ediff < 0)						 \
+	{								 \
+	  ediff = -ediff;						 \
+	  R##_e = Y##_e;						 \
+	  R##_s = Y##_s;						 \
+	  if (X##_e == 0)						 \
+	    {								 \
+	      /* X is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(X))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  _FP_FRAC_COPY_##wc(R, Y);				 \
+		  goto sub_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_SUB_##wc(R, Y, X);			 \
+		      goto sub3;					 \
+		    }							 \
+		  if (Y##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);		 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      goto sub_done;					 \
+		    }							 \
+		  goto sub2;						 \
+		}							 \
+	    }								 \
+	  else if (Y##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* Y is NaN or Inf, X is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+	      _FP_FRAC_COPY_##wc(R, Y);					 \
+	      goto sub_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of X.  */				 \
+	  _FP_FRAC_HIGH_##fs(X) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	sub2:								 \
+	  /* Shift the mantissa of X to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of Y.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(X, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(X))				 \
+	    _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_SUB_##wc(R, Y, X);					 \
+	}								 \
+      else								 \
+	{								 \
+	  /* ediff == 0.  */						 \
+	  if (!_FP_EXP_NORMAL(fs, wc, X))				 \
+	    {								 \
+	      if (X##_e == 0)						 \
+		{							 \
+		  /* X and Y are zero or denormalized.  */		 \
+		  R##_e = 0;						 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    {							 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      if (_FP_FRAC_ZEROP_##wc(Y))			 \
+			R##_s = (FP_ROUNDMODE == FP_RND_MINF);		 \
+		      else						 \
+			{						 \
+			  FP_SET_EXCEPTION(FP_EX_DENORM);		 \
+			  R##_s = Y##_s;				 \
+			}						 \
+		      goto sub_done;					 \
+		    }							 \
+		  else if (_FP_FRAC_ZEROP_##wc(Y))			 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      R##_s = X##_s;					 \
+		      goto sub_done;					 \
+		    }							 \
+		  else							 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_SUB_##wc(R, X, Y);			 \
+		      R##_s = X##_s;					 \
+		      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)	 \
+			{						 \
+			  /* |X| < |Y|, negate result.  */		 \
+			  _FP_FRAC_SUB_##wc(R, Y, X);			 \
+			  R##_s = Y##_s;				 \
+			}						 \
+		      else if (_FP_FRAC_ZEROP_##wc(R))			 \
+			R##_s = (FP_ROUNDMODE == FP_RND_MINF);		 \
+		      goto sub_done;					 \
+		    }							 \
+		}							 \
+	      else							 \
+		{							 \
+		  /* X and Y are NaN or Inf, of opposite signs.  */	 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  R##_e = _FP_EXPMAX_##fs;				 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    {							 \
+		      if (_FP_FRAC_ZEROP_##wc(Y))			 \
+			{						 \
+			  /* Inf - Inf.  */				 \
+			  R##_s = _FP_NANSIGN_##fs;			 \
+			  _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);	 \
+			  _FP_FRAC_SLL_##wc(R, _FP_WORKBITS);		 \
+			  FP_SET_EXCEPTION(FP_EX_INVALID);		 \
+			}						 \
+		      else						 \
+			{						 \
+			  /* Inf - NaN.  */				 \
+			  R##_s = Y##_s;				 \
+			  _FP_FRAC_COPY_##wc(R, Y);			 \
+			}						 \
+		    }							 \
+		  else							 \
+		    {							 \
+		      if (_FP_FRAC_ZEROP_##wc(Y))			 \
+			{						 \
+			  /* NaN - Inf.  */				 \
+			  R##_s = X##_s;				 \
+			  _FP_FRAC_COPY_##wc(R, X);			 \
+			}						 \
+		      else						 \
+			{						 \
+			  /* NaN - NaN.  */				 \
+			  _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP);	 \
+			}						 \
+		    }							 \
+		  goto sub_done;					 \
+		}							 \
+	    }								 \
+	  /* The exponents of X and Y, both normal, are equal.  The	 \
+	     implicit MSBs cancel.  */					 \
+	  R##_e = X##_e;						 \
+	  _FP_FRAC_SUB_##wc(R, X, Y);					 \
+	  R##_s = X##_s;						 \
+	  if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)		 \
+	    {								 \
+	      /* |X| < |Y|, negate result.  */				 \
+	      _FP_FRAC_SUB_##wc(R, Y, X);				 \
+	      R##_s = Y##_s;						 \
+	    }								 \
+	  else if (_FP_FRAC_ZEROP_##wc(R))				 \
+	    {								 \
+	      R##_e = 0;						 \
+	      R##_s = (FP_ROUNDMODE == FP_RND_MINF);			 \
+	      goto sub_done;						 \
+	    }								 \
+	  goto norm;							 \
+	}								 \
+    sub3:								 \
+      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)			 \
+	{								 \
+	  int diff;							 \
+	  /* Carry into most significant bit of larger one of X and Y,	 \
+	     canceling it; renormalize.  */				 \
+	  _FP_FRAC_HIGH_##fs(R) &= _FP_IMPLBIT_SH_##fs - 1;		 \
+	norm:								 \
+	  _FP_FRAC_CLZ_##wc(diff, R);					 \
+	  diff -= _FP_WFRACXBITS_##fs;					 \
+	  _FP_FRAC_SLL_##wc(R, diff);					 \
+	  if (R##_e <= diff)						 \
+	    {								 \
+	      /* R is denormalized.  */					 \
+	      diff = diff - R##_e + 1;					 \
+	      _FP_FRAC_SRS_##wc(R, diff, _FP_WFRACBITS_##fs);		 \
+	      R##_e = 0;						 \
+	    }								 \
+	  else								 \
+	    {								 \
+	      R##_e -= diff;						 \
+	      _FP_FRAC_HIGH_##fs(R) &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs; \
+	    }								 \
+	}								 \
+    sub_done: ;								 \
+    }									 \
+} while (0)
+
+#define _FP_ADD(fs, wc, R, X, Y) _FP_ADD_INTERNAL(fs, wc, R, X, Y, '+')
+#define _FP_SUB(fs, wc, R, X, Y)					    \
+  do {									    \
+    if (!(Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y))) Y##_s ^= 1; \
+    _FP_ADD_INTERNAL(fs, wc, R, X, Y, '-');				    \
+  } while (0)
+
+
+/*
+ * Main negation routine.  FIXME -- when we care about setting exception
+ * bits reliably, this will not do.  We should examine all of the fp classes.
+ */
+
+#define _FP_NEG(fs, wc, R, X)		\
+  do {					\
+    _FP_FRAC_COPY_##wc(R, X);		\
+    R##_c = X##_c;			\
+    R##_e = X##_e;			\
+    R##_s = 1 ^ X##_s;			\
+  } while (0)
+
+
+/*
+ * Main multiplication routine.  The input values should be cooked.
+ */
+
+#define _FP_MUL(fs, wc, R, X, Y)			\
+do {							\
+  R##_s = X##_s ^ Y##_s;				\
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))		\
+  {							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_NORMAL;				\
+    R##_e = X##_e + Y##_e + 1;				\
+							\
+    _FP_MUL_MEAT_##fs(R,X,Y);				\
+							\
+    if (_FP_FRAC_OVERP_##wc(fs, R))			\
+      _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);	\
+    else						\
+      R##_e--;						\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):		\
+    _FP_CHOOSENAN(fs, wc, R, X, Y, '*');		\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
+    R##_s = X##_s;					\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):	\
+    _FP_FRAC_COPY_##wc(R, X);				\
+    R##_c = X##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
+    R##_s = Y##_s;					\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
+    _FP_FRAC_COPY_##wc(R, Y);				\
+    R##_c = Y##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):		\
+    R##_s = _FP_NANSIGN_##fs;				\
+    R##_c = FP_CLS_NAN;					\
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);		\
+    FP_SET_EXCEPTION(FP_EX_INVALID);			\
+    break;						\
+							\
+  default:						\
+    abort();						\
+  }							\
+} while (0)
+
+
+/*
+ * Main division routine.  The input values should be cooked.
+ */
+
+#define _FP_DIV(fs, wc, R, X, Y)			\
+do {							\
+  R##_s = X##_s ^ Y##_s;				\
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))		\
+  {							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_NORMAL;				\
+    R##_e = X##_e - Y##_e;				\
+							\
+    _FP_DIV_MEAT_##fs(R,X,Y);				\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):		\
+    _FP_CHOOSENAN(fs, wc, R, X, Y, '/');		\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
+    R##_s = X##_s;					\
+    _FP_FRAC_COPY_##wc(R, X);				\
+    R##_c = X##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
+    R##_s = Y##_s;					\
+    _FP_FRAC_COPY_##wc(R, Y);				\
+    R##_c = Y##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_ZERO;				\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
+    FP_SET_EXCEPTION(FP_EX_DIVZERO);			\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_INF;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):	\
+    R##_s = _FP_NANSIGN_##fs;				\
+    R##_c = FP_CLS_NAN;					\
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);		\
+    FP_SET_EXCEPTION(FP_EX_INVALID);			\
+    break;						\
+							\
+  default:						\
+    abort();						\
+  }							\
+} while (0)
+
+
+/*
+ * Main differential comparison routine.  The inputs should be raw not
+ * cooked.  The return is -1,0,1 for normal values, 2 otherwise.
+ */
+
+#define _FP_CMP(fs, wc, ret, X, Y, un)					\
+  do {									\
+    /* NANs are unordered */						\
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))		\
+	|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))	\
+      {									\
+	ret = un;							\
+      }									\
+    else								\
+      {									\
+	int __is_zero_x;						\
+	int __is_zero_y;						\
+									\
+	__is_zero_x = (!X##_e && _FP_FRAC_ZEROP_##wc(X)) ? 1 : 0;	\
+	__is_zero_y = (!Y##_e && _FP_FRAC_ZEROP_##wc(Y)) ? 1 : 0;	\
+									\
+	if (__is_zero_x && __is_zero_y)					\
+		ret = 0;						\
+	else if (__is_zero_x)						\
+		ret = Y##_s ? 1 : -1;					\
+	else if (__is_zero_y)						\
+		ret = X##_s ? -1 : 1;					\
+	else if (X##_s != Y##_s)					\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_e > Y##_e)						\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_e < Y##_e)						\
+	  ret = X##_s ? 1 : -1;						\
+	else if (_FP_FRAC_GT_##wc(X, Y))				\
+	  ret = X##_s ? -1 : 1;						\
+	else if (_FP_FRAC_GT_##wc(Y, X))				\
+	  ret = X##_s ? 1 : -1;						\
+	else								\
+	  ret = 0;							\
+      }									\
+  } while (0)
+
+
+/* Simplification for strict equality.  */
+
+#define _FP_CMP_EQ(fs, wc, ret, X, Y)					    \
+  do {									    \
+    /* NANs are unordered */						    \
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))		    \
+	|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))	    \
+      {									    \
+	ret = 1;							    \
+      }									    \
+    else								    \
+      {									    \
+	ret = !(X##_e == Y##_e						    \
+		&& _FP_FRAC_EQ_##wc(X, Y)				    \
+		&& (X##_s == Y##_s || (!X##_e && _FP_FRAC_ZEROP_##wc(X)))); \
+      }									    \
+  } while (0)
+
+/* Version to test unordered.  */
+
+#define _FP_CMP_UNORD(fs, wc, ret, X, Y)				\
+  do {									\
+    ret = ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))	\
+	   || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)));	\
+  } while (0)
+
+/*
+ * Main square root routine.  The input value should be cooked.
+ */
+
+#define _FP_SQRT(fs, wc, R, X)						\
+do {									\
+    _FP_FRAC_DECL_##wc(T); _FP_FRAC_DECL_##wc(S);			\
+    _FP_W_TYPE q;							\
+    switch (X##_c)							\
+    {									\
+    case FP_CLS_NAN:							\
+	_FP_FRAC_COPY_##wc(R, X);					\
+	R##_s = X##_s;							\
+    	R##_c = FP_CLS_NAN;						\
+    	break;								\
+    case FP_CLS_INF:							\
+    	if (X##_s)							\
+    	  {								\
+    	    R##_s = _FP_NANSIGN_##fs;					\
+	    R##_c = FP_CLS_NAN; /* NAN */				\
+	    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);			\
+	    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+    	  }								\
+    	else								\
+    	  {								\
+    	    R##_s = 0;							\
+    	    R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */			\
+    	  }								\
+    	break;								\
+    case FP_CLS_ZERO:							\
+	R##_s = X##_s;							\
+	R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */			\
+	break;								\
+    case FP_CLS_NORMAL:							\
+    	R##_s = 0;							\
+        if (X##_s)							\
+          {								\
+	    R##_c = FP_CLS_NAN; /* sNAN */				\
+	    R##_s = _FP_NANSIGN_##fs;					\
+	    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);			\
+	    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+	    break;							\
+          }								\
+    	R##_c = FP_CLS_NORMAL;						\
+        if (X##_e & 1)							\
+          _FP_FRAC_SLL_##wc(X, 1);					\
+        R##_e = X##_e >> 1;						\
+        _FP_FRAC_SET_##wc(S, _FP_ZEROFRAC_##wc);			\
+        _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc);			\
+        q = _FP_OVERFLOW_##fs >> 1;					\
+        _FP_SQRT_MEAT_##wc(R, S, T, X, q);				\
+    }									\
+  } while (0)
+
+/*
+ * Convert from FP to integer.  Input is raw.
+ */
+
+/* RSIGNED can have following values:
+ * 0:  the number is required to be 0..(2^rsize)-1, if not, NV is set plus
+ *     the result is either 0 or (2^rsize)-1 depending on the sign in such
+ *     case.
+ * 1:  the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not,
+ *     NV is set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
+ *     depending on the sign in such case.
+ * -1: the number is required to be -(2^(rsize-1))..(2^rsize)-1, if not, NV is
+ *     set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
+ *     depending on the sign in such case.
+ */
+#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned)			\
+do {									\
+  if (X##_e < _FP_EXPBIAS_##fs)						\
+    {									\
+      r = 0;								\
+      if (X##_e == 0)							\
+	{								\
+	  if (!_FP_FRAC_ZEROP_##wc(X))					\
+	    {								\
+	      FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				\
+	    }								\
+	}								\
+      else								\
+	FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+    }									\
+  else if (X##_e >= _FP_EXPBIAS_##fs + rsize - (rsigned > 0 || X##_s)	\
+	   || (!rsigned && X##_s))					\
+    {									\
+      /* Overflow or converting to the most negative integer.  */	\
+      if (rsigned)							\
+	{								\
+	  r = 1;							\
+	  r <<= rsize - 1;						\
+	  r -= 1 - X##_s;						\
+	} else {							\
+	  r = 0;							\
+	  if (X##_s)							\
+	    r = ~r;							\
+	}								\
+									\
+      if (rsigned && X##_s && X##_e == _FP_EXPBIAS_##fs + rsize - 1)	\
+	{								\
+	  /* Possibly converting to most negative integer; check the	\
+	     mantissa.  */						\
+	  int inexact = 0;						\
+	  (void)((_FP_FRACBITS_##fs > rsize)				\
+		 ? ({ _FP_FRAC_SRST_##wc(X, inexact,			\
+					 _FP_FRACBITS_##fs - rsize,	\
+					 _FP_FRACBITS_##fs); 0; })	\
+		 : 0);							\
+	  if (!_FP_FRAC_ZEROP_##wc(X))					\
+	    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+	  else if (inexact)						\
+	    FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+	}								\
+      else								\
+	FP_SET_EXCEPTION(FP_EX_INVALID);				\
+    }									\
+  else									\
+    {									\
+      _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_IMPLBIT_##fs;			\
+      if (X##_e >= _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1)		\
+	{								\
+	  _FP_FRAC_ASSEMBLE_##wc(r, X, rsize);				\
+	  r <<= X##_e - _FP_EXPBIAS_##fs - _FP_FRACBITS_##fs + 1;	\
+	}								\
+      else								\
+	{								\
+	  int inexact;							\
+	  _FP_FRAC_SRST_##wc(X, inexact,				\
+			    (_FP_FRACBITS_##fs + _FP_EXPBIAS_##fs - 1	\
+			     - X##_e),					\
+			    _FP_FRACBITS_##fs);				\
+	  if (inexact)							\
+	    FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+	  _FP_FRAC_ASSEMBLE_##wc(r, X, rsize);				\
+	}								\
+      if (rsigned && X##_s)						\
+	r = -r;								\
+    }									\
+} while (0)
+
+/* Convert integer to fp.  Output is raw.  RTYPE is unsigned even if
+   input is signed.  */
+#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype)			     \
+  do {									     \
+    if (r)								     \
+      {									     \
+	rtype ur_;							     \
+									     \
+	if ((X##_s = (r < 0)))						     \
+	  r = -(rtype)r;						     \
+									     \
+	ur_ = (rtype) r;						     \
+	(void)((rsize <= _FP_W_TYPE_SIZE)				     \
+	       ? ({							     \
+		    int lz_;						     \
+		    __FP_CLZ(lz_, (_FP_W_TYPE)ur_);			     \
+		    X##_e = _FP_EXPBIAS_##fs + _FP_W_TYPE_SIZE - 1 - lz_;    \
+		  })							     \
+	       : ((rsize <= 2 * _FP_W_TYPE_SIZE)			     \
+		  ? ({							     \
+		       int lz_;						     \
+		       __FP_CLZ_2(lz_, (_FP_W_TYPE)(ur_ >> _FP_W_TYPE_SIZE), \
+				  (_FP_W_TYPE)ur_);			     \
+		       X##_e = (_FP_EXPBIAS_##fs + 2 * _FP_W_TYPE_SIZE - 1   \
+				- lz_);					     \
+		     })							     \
+		  : (abort(), 0)));					     \
+									     \
+	if (rsize - 1 + _FP_EXPBIAS_##fs >= _FP_EXPMAX_##fs		     \
+	    && X##_e >= _FP_EXPMAX_##fs)				     \
+	  {								     \
+	    /* Exponent too big; overflow to infinity.  (May also	     \
+	       happen after rounding below.)  */			     \
+	    _FP_OVERFLOW_SEMIRAW(fs, wc, X);				     \
+	    goto pack_semiraw;						     \
+	  }								     \
+									     \
+	if (rsize <= _FP_FRACBITS_##fs					     \
+	    || X##_e < _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs)		     \
+	  {								     \
+	    /* Exactly representable; shift left.  */			     \
+	    _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize);			     \
+	    _FP_FRAC_SLL_##wc(X, (_FP_EXPBIAS_##fs			     \
+				  + _FP_FRACBITS_##fs - 1 - X##_e));	     \
+	  }								     \
+	else								     \
+	  {								     \
+	    /* More bits in integer than in floating type; need to	     \
+	       round.  */						     \
+	    if (_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 < X##_e)	     \
+	      ur_ = ((ur_ >> (X##_e - _FP_EXPBIAS_##fs			     \
+			      - _FP_WFRACBITS_##fs + 1))		     \
+		     | ((ur_ << (rsize - (X##_e - _FP_EXPBIAS_##fs	     \
+					  - _FP_WFRACBITS_##fs + 1)))	     \
+			!= 0));						     \
+	    _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize);			     \
+	    if ((_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 - X##_e) > 0)     \
+	      _FP_FRAC_SLL_##wc(X, (_FP_EXPBIAS_##fs			     \
+				    + _FP_WFRACBITS_##fs - 1 - X##_e));	     \
+	    _FP_FRAC_HIGH_##fs(X) &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs;	     \
+	  pack_semiraw:							     \
+	    _FP_PACK_SEMIRAW(fs, wc, X);				     \
+	  }								     \
+      }									     \
+    else								     \
+      {									     \
+	X##_s = 0;							     \
+	X##_e = 0;							     \
+	_FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			     \
+      }									     \
+  } while (0)
+
+
+/* Extend from a narrower floating-point format to a wider one.  Input
+   and output are raw.  */
+#define FP_EXTEND(dfs,sfs,dwc,swc,D,S)					 \
+do {									 \
+  if (_FP_FRACBITS_##dfs < _FP_FRACBITS_##sfs				 \
+      || (_FP_EXPMAX_##dfs - _FP_EXPBIAS_##dfs				 \
+	  < _FP_EXPMAX_##sfs - _FP_EXPBIAS_##sfs)			 \
+      || (_FP_EXPBIAS_##dfs < _FP_EXPBIAS_##sfs + _FP_FRACBITS_##sfs - 1 \
+	  && _FP_EXPBIAS_##dfs != _FP_EXPBIAS_##sfs))			 \
+    abort();								 \
+  D##_s = S##_s;							 \
+  _FP_FRAC_COPY_##dwc##_##swc(D, S);					 \
+  if (_FP_EXP_NORMAL(sfs, swc, S))					 \
+    {									 \
+      D##_e = S##_e + _FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs;		 \
+      _FP_FRAC_SLL_##dwc(D, (_FP_FRACBITS_##dfs - _FP_FRACBITS_##sfs));	 \
+    }									 \
+  else									 \
+    {									 \
+      if (S##_e == 0)							 \
+	{								 \
+	  if (_FP_FRAC_ZEROP_##swc(S))					 \
+	    D##_e = 0;							 \
+	  else if (_FP_EXPBIAS_##dfs					 \
+		   < _FP_EXPBIAS_##sfs + _FP_FRACBITS_##sfs - 1)	 \
+	    {								 \
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				 \
+	      _FP_FRAC_SLL_##dwc(D, (_FP_FRACBITS_##dfs			 \
+				     - _FP_FRACBITS_##sfs));		 \
+	      D##_e = 0;						 \
+	    }								 \
+	  else								 \
+	    {								 \
+	      int _lz;							 \
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				 \
+	      _FP_FRAC_CLZ_##swc(_lz, S);				 \
+	      _FP_FRAC_SLL_##dwc(D,					 \
+				 _lz + _FP_FRACBITS_##dfs		 \
+				 - _FP_FRACTBITS_##sfs);		 \
+	      D##_e = (_FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs + 1	 \
+		       + _FP_FRACXBITS_##sfs - _lz);			 \
+	    }								 \
+	}								 \
+      else								 \
+	{								 \
+	  D##_e = _FP_EXPMAX_##dfs;					 \
+	  if (!_FP_FRAC_ZEROP_##swc(S))					 \
+	    {								 \
+	      if (!(_FP_FRAC_HIGH_RAW_##sfs(S) & _FP_QNANBIT_##sfs))	 \
+		FP_SET_EXCEPTION(FP_EX_INVALID);			 \
+	      _FP_FRAC_SLL_##dwc(D, (_FP_FRACBITS_##dfs			 \
+				     - _FP_FRACBITS_##sfs));		 \
+	    }								 \
+	}								 \
+    }									 \
+} while (0)
+
+/* Truncate from a wider floating-point format to a narrower one.
+   Input and output are semi-raw.  */
+#define FP_TRUNC(dfs,sfs,dwc,swc,D,S)					     \
+do {									     \
+  if (_FP_FRACBITS_##sfs < _FP_FRACBITS_##dfs				     \
+      || (_FP_EXPBIAS_##sfs < _FP_EXPBIAS_##dfs + _FP_FRACBITS_##dfs - 1     \
+	  && _FP_EXPBIAS_##sfs != _FP_EXPBIAS_##dfs))			     \
+    abort();								     \
+  D##_s = S##_s;							     \
+  if (_FP_EXP_NORMAL(sfs, swc, S))					     \
+    {									     \
+      D##_e = S##_e + _FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs;		     \
+      if (D##_e >= _FP_EXPMAX_##dfs)					     \
+	_FP_OVERFLOW_SEMIRAW(dfs, dwc, D);				     \
+      else								     \
+	{								     \
+	  if (D##_e <= 0)						     \
+	    {								     \
+	      if (D##_e < 1 - _FP_FRACBITS_##dfs)			     \
+		{							     \
+		  _FP_FRAC_SET_##swc(S, _FP_ZEROFRAC_##swc);		     \
+		  _FP_FRAC_LOW_##swc(S) |= 1;				     \
+		}							     \
+	      else							     \
+		{							     \
+		  _FP_FRAC_HIGH_##sfs(S) |= _FP_IMPLBIT_SH_##sfs;	     \
+		  _FP_FRAC_SRS_##swc(S, (_FP_WFRACBITS_##sfs		     \
+					 - _FP_WFRACBITS_##dfs + 1 - D##_e), \
+				     _FP_WFRACBITS_##sfs);		     \
+		}							     \
+	      D##_e = 0;						     \
+	    }								     \
+	  else								     \
+	    _FP_FRAC_SRS_##swc(S, (_FP_WFRACBITS_##sfs			     \
+				   - _FP_WFRACBITS_##dfs),		     \
+			       _FP_WFRACBITS_##sfs);			     \
+	  _FP_FRAC_COPY_##dwc##_##swc(D, S);				     \
+	}								     \
+    }									     \
+  else									     \
+    {									     \
+      if (S##_e == 0)							     \
+	{								     \
+	  D##_e = 0;							     \
+	  if (_FP_FRAC_ZEROP_##swc(S))					     \
+	    _FP_FRAC_SET_##dwc(D, _FP_ZEROFRAC_##dwc);			     \
+	  else								     \
+	    {								     \
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				     \
+	      if (_FP_EXPBIAS_##sfs					     \
+		  < _FP_EXPBIAS_##dfs + _FP_FRACBITS_##dfs - 1)		     \
+		{							     \
+		  _FP_FRAC_SRS_##swc(S, (_FP_WFRACBITS_##sfs		     \
+					 - _FP_WFRACBITS_##dfs),	     \
+				     _FP_WFRACBITS_##sfs);		     \
+		  _FP_FRAC_COPY_##dwc##_##swc(D, S);			     \
+		}							     \
+	      else							     \
+		{							     \
+		  _FP_FRAC_SET_##dwc(D, _FP_ZEROFRAC_##dwc);		     \
+		  _FP_FRAC_LOW_##dwc(D) |= 1;				     \
+		}							     \
+	    }								     \
+	}								     \
+      else								     \
+	{								     \
+	  D##_e = _FP_EXPMAX_##dfs;					     \
+	  if (_FP_FRAC_ZEROP_##swc(S))					     \
+	    _FP_FRAC_SET_##dwc(D, _FP_ZEROFRAC_##dwc);			     \
+	  else								     \
+	    {								     \
+	      _FP_CHECK_SIGNAN_SEMIRAW(sfs, swc, S);			     \
+	      _FP_FRAC_SRL_##swc(S, (_FP_WFRACBITS_##sfs		     \
+				     - _FP_WFRACBITS_##dfs));		     \
+	      _FP_FRAC_COPY_##dwc##_##swc(D, S);			     \
+	      /* Semi-raw NaN must have all workbits cleared.  */	     \
+	      _FP_FRAC_LOW_##dwc(D)					     \
+		&= ~(_FP_W_TYPE) ((1 << _FP_WORKBITS) - 1);		     \
+	      _FP_FRAC_HIGH_##dfs(D) |= _FP_QNANBIT_SH_##dfs;		     \
+	    }								     \
+	}								     \
+    }									     \
+} while (0)
+
+/*
+ * Helper primitives.
+ */
+
+/* Count leading zeros in a word.  */
+
+#ifndef __FP_CLZ
+/* GCC 3.4 and later provide the builtins for us.  */
+#define __FP_CLZ(r, x)							      \
+  do {									      \
+    if (sizeof (_FP_W_TYPE) == sizeof (unsigned int))			      \
+      r = __builtin_clz (x);						      \
+    else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long))		      \
+      r = __builtin_clzl (x);						      \
+    else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long long))	      \
+      r = __builtin_clzll (x);						      \
+    else								      \
+      abort ();								      \
+  } while (0)
+#endif /* ndef __FP_CLZ */
+
+#define _FP_DIV_HELP_imm(q, r, n, d)		\
+  do {						\
+    q = n / d, r = n % d;			\
+  } while (0)
+
+
+/* A restoring bit-by-bit division primitive.  */
+
+#define _FP_DIV_MEAT_N_loop(fs, wc, R, X, Y)				\
+  do {									\
+    int count = _FP_WFRACBITS_##fs;					\
+    _FP_FRAC_DECL_##wc (u);						\
+    _FP_FRAC_DECL_##wc (v);						\
+    _FP_FRAC_COPY_##wc (u, X);						\
+    _FP_FRAC_COPY_##wc (v, Y);						\
+    _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc);				\
+    /* Normalize U and V.  */						\
+    _FP_FRAC_SLL_##wc (u, _FP_WFRACXBITS_##fs);				\
+    _FP_FRAC_SLL_##wc (v, _FP_WFRACXBITS_##fs);				\
+    /* First round.  Since the operands are normalized, either the	\
+       first or second bit will be set in the fraction.  Produce a	\
+       normalized result by checking which and adjusting the loop	\
+       count and exponent accordingly.  */				\
+    if (_FP_FRAC_GE_1 (u, v))						\
+      {									\
+	_FP_FRAC_SUB_##wc (u, u, v);					\
+	_FP_FRAC_LOW_##wc (R) |= 1;					\
+	count--;							\
+      }									\
+    else								\
+      R##_e--;								\
+    /* Subsequent rounds.  */						\
+    do {								\
+      int msb = (_FP_WS_TYPE) _FP_FRAC_HIGH_##wc (u) < 0;		\
+      _FP_FRAC_SLL_##wc (u, 1);						\
+      _FP_FRAC_SLL_##wc (R, 1);						\
+      if (msb || _FP_FRAC_GE_1 (u, v))					\
+	{								\
+	  _FP_FRAC_SUB_##wc (u, u, v);					\
+	  _FP_FRAC_LOW_##wc (R) |= 1;					\
+	}								\
+    } while (--count > 0);						\
+    /* If there's anything left in U, the result is inexact.  */	\
+    _FP_FRAC_LOW_##wc (R) |= !_FP_FRAC_ZEROP_##wc (u);			\
+  } while (0)
+
+#define _FP_DIV_MEAT_1_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 1, R, X, Y)
+#define _FP_DIV_MEAT_2_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 2, R, X, Y)
+#define _FP_DIV_MEAT_4_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 4, R, X, Y)
diff --git a/gcc/config/soft-fp/quad.h b/gcc/config/soft-fp/quad.h
new file mode 100644
index 000000000..c22e94402
--- /dev/null
+++ b/gcc/config/soft-fp/quad.h
@@ -0,0 +1,271 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Quad Precision.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel, kid. Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_Q         (4*_FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_Q		(2*_FP_W_TYPE_SIZE)
+#endif
+
+#define _FP_FRACBITS_Q		113
+#define _FP_FRACXBITS_Q		(_FP_FRACTBITS_Q - _FP_FRACBITS_Q)
+#define _FP_WFRACBITS_Q		(_FP_WORKBITS + _FP_FRACBITS_Q)
+#define _FP_WFRACXBITS_Q	(_FP_FRACTBITS_Q - _FP_WFRACBITS_Q)
+#define _FP_EXPBITS_Q		15
+#define _FP_EXPBIAS_Q		16383
+#define _FP_EXPMAX_Q		32767
+
+#define _FP_QNANBIT_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_Q		\
+	((_FP_W_TYPE)1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE))
+
+typedef float TFtype __attribute__((mode(TF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_Q
+{
+   TFtype flt;
+   struct 
+   {
+#if __BYTE_ORDER == __BIG_ENDIAN
+      unsigned sign : 1;
+      unsigned exp : _FP_EXPBITS_Q;
+      unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+      unsigned long frac2 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+#else
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac2 : _FP_W_TYPE_SIZE;
+      unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+      unsigned exp : _FP_EXPBITS_Q;
+      unsigned sign : 1;
+#endif /* not bigendian */
+   } bits __attribute__((packed));
+};
+
+
+#define FP_DECL_Q(X)		_FP_DECL(4,X)
+#define FP_UNPACK_RAW_Q(X,val)	_FP_UNPACK_RAW_4(Q,X,val)
+#define FP_UNPACK_RAW_QP(X,val)	_FP_UNPACK_RAW_4_P(Q,X,val)
+#define FP_PACK_RAW_Q(val,X)	_FP_PACK_RAW_4(Q,val,X)
+#define FP_PACK_RAW_QP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_4_P(Q,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_Q(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_4(Q,X,val);		\
+    _FP_UNPACK_CANONICAL(Q,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_QP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_4_P(Q,X,val);	\
+    _FP_UNPACK_CANONICAL(Q,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_Q(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_4(Q,X,val);		\
+    _FP_UNPACK_SEMIRAW(Q,4,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_QP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_4_P(Q,X,val);	\
+    _FP_UNPACK_SEMIRAW(Q,4,X);		\
+  } while (0)
+
+#define FP_PACK_Q(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,4,X);		\
+    _FP_PACK_RAW_4(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_QP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,4,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_4_P(Q,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_Q(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,4,X);		\
+    _FP_PACK_RAW_4(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_QP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,4,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_4_P(Q,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN(Q,4,X)
+#define FP_NEG_Q(R,X)			_FP_NEG(Q,4,R,X)
+#define FP_ADD_Q(R,X,Y)			_FP_ADD(Q,4,R,X,Y)
+#define FP_SUB_Q(R,X,Y)			_FP_SUB(Q,4,R,X,Y)
+#define FP_MUL_Q(R,X,Y)			_FP_MUL(Q,4,R,X,Y)
+#define FP_DIV_Q(R,X,Y)			_FP_DIV(Q,4,R,X,Y)
+#define FP_SQRT_Q(R,X)			_FP_SQRT(Q,4,R,X)
+#define _FP_SQRT_MEAT_Q(R,S,T,X,Q)	_FP_SQRT_MEAT_4(R,S,T,X,Q)
+
+#define FP_CMP_Q(r,X,Y,un)	_FP_CMP(Q,4,r,X,Y,un)
+#define FP_CMP_EQ_Q(r,X,Y)	_FP_CMP_EQ(Q,4,r,X,Y)
+#define FP_CMP_UNORD_Q(r,X,Y)	_FP_CMP_UNORD(Q,4,r,X,Y)
+
+#define FP_TO_INT_Q(r,X,rsz,rsg)	_FP_TO_INT(Q,4,r,X,rsz,rsg)
+#define FP_FROM_INT_Q(X,r,rs,rt)	_FP_FROM_INT(Q,4,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_Q(X)	_FP_FRAC_HIGH_4(X)
+#define _FP_FRAC_HIGH_RAW_Q(X)	_FP_FRAC_HIGH_4(X)
+
+#else   /* not _FP_W_TYPE_SIZE < 64 */
+union _FP_UNION_Q
+{
+  TFtype flt /* __attribute__((mode(TF))) */ ;
+  struct {
+    _FP_W_TYPE a, b;
+  } longs;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign    : 1;
+    unsigned exp     : _FP_EXPBITS_Q;
+    _FP_W_TYPE frac1 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0) - _FP_W_TYPE_SIZE;
+    _FP_W_TYPE frac0 : _FP_W_TYPE_SIZE;
+#else
+    _FP_W_TYPE frac0 : _FP_W_TYPE_SIZE;
+    _FP_W_TYPE frac1 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp     : _FP_EXPBITS_Q;
+    unsigned sign    : 1;
+#endif
+  } bits;
+};
+
+#define FP_DECL_Q(X)		_FP_DECL(2,X)
+#define FP_UNPACK_RAW_Q(X,val)	_FP_UNPACK_RAW_2(Q,X,val)
+#define FP_UNPACK_RAW_QP(X,val)	_FP_UNPACK_RAW_2_P(Q,X,val)
+#define FP_PACK_RAW_Q(val,X)	_FP_PACK_RAW_2(Q,val,X)
+#define FP_PACK_RAW_QP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(Q,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_Q(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2(Q,X,val);		\
+    _FP_UNPACK_CANONICAL(Q,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_QP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2_P(Q,X,val);	\
+    _FP_UNPACK_CANONICAL(Q,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_Q(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2(Q,X,val);		\
+    _FP_UNPACK_SEMIRAW(Q,2,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_QP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2_P(Q,X,val);	\
+    _FP_UNPACK_SEMIRAW(Q,2,X);		\
+  } while (0)
+
+#define FP_PACK_Q(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,2,X);		\
+    _FP_PACK_RAW_2(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_QP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(Q,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_Q(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,2,X);		\
+    _FP_PACK_RAW_2(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_QP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(Q,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN(Q,2,X)
+#define FP_NEG_Q(R,X)			_FP_NEG(Q,2,R,X)
+#define FP_ADD_Q(R,X,Y)			_FP_ADD(Q,2,R,X,Y)
+#define FP_SUB_Q(R,X,Y)			_FP_SUB(Q,2,R,X,Y)
+#define FP_MUL_Q(R,X,Y)			_FP_MUL(Q,2,R,X,Y)
+#define FP_DIV_Q(R,X,Y)			_FP_DIV(Q,2,R,X,Y)
+#define FP_SQRT_Q(R,X)			_FP_SQRT(Q,2,R,X)
+#define _FP_SQRT_MEAT_Q(R,S,T,X,Q)	_FP_SQRT_MEAT_2(R,S,T,X,Q)
+
+#define FP_CMP_Q(r,X,Y,un)	_FP_CMP(Q,2,r,X,Y,un)
+#define FP_CMP_EQ_Q(r,X,Y)	_FP_CMP_EQ(Q,2,r,X,Y)
+#define FP_CMP_UNORD_Q(r,X,Y)	_FP_CMP_UNORD(Q,2,r,X,Y)
+
+#define FP_TO_INT_Q(r,X,rsz,rsg)	_FP_TO_INT(Q,2,r,X,rsz,rsg)
+#define FP_FROM_INT_Q(X,r,rs,rt)	_FP_FROM_INT(Q,2,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_Q(X)	_FP_FRAC_HIGH_2(X)
+#define _FP_FRAC_HIGH_RAW_Q(X)	_FP_FRAC_HIGH_2(X)
+
+#endif /* not _FP_W_TYPE_SIZE < 64 */
diff --git a/gcc/config/soft-fp/single.h b/gcc/config/soft-fp/single.h
new file mode 100644
index 000000000..9c3734adf
--- /dev/null
+++ b/gcc/config/soft-fp/single.h
@@ -0,0 +1,151 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Single Precision.
+   Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#define _FP_FRACTBITS_S		_FP_W_TYPE_SIZE
+
+#define _FP_FRACBITS_S		24
+#define _FP_FRACXBITS_S		(_FP_FRACTBITS_S - _FP_FRACBITS_S)
+#define _FP_WFRACBITS_S		(_FP_WORKBITS + _FP_FRACBITS_S)
+#define _FP_WFRACXBITS_S	(_FP_FRACTBITS_S - _FP_WFRACBITS_S)
+#define _FP_EXPBITS_S		8
+#define _FP_EXPBIAS_S		127
+#define _FP_EXPMAX_S		255
+#define _FP_QNANBIT_S		((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2))
+#define _FP_QNANBIT_SH_S	((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2+_FP_WORKBITS))
+#define _FP_IMPLBIT_S		((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1))
+#define _FP_IMPLBIT_SH_S	((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1+_FP_WORKBITS))
+#define _FP_OVERFLOW_S		((_FP_W_TYPE)1 << (_FP_WFRACBITS_S))
+
+/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
+   chosen by the target machine.  */
+
+typedef float SFtype __attribute__((mode(SF)));
+
+union _FP_UNION_S
+{
+  SFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+#else
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_S(X)		_FP_DECL(1,X)
+#define FP_UNPACK_RAW_S(X,val)	_FP_UNPACK_RAW_1(S,X,val)
+#define FP_UNPACK_RAW_SP(X,val)	_FP_UNPACK_RAW_1_P(S,X,val)
+#define FP_PACK_RAW_S(val,X)	_FP_PACK_RAW_1(S,val,X)
+#define FP_PACK_RAW_SP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(S,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_S(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1(S,X,val);		\
+    _FP_UNPACK_CANONICAL(S,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_SP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1_P(S,X,val);	\
+    _FP_UNPACK_CANONICAL(S,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_S(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_1(S,X,val);		\
+    _FP_UNPACK_SEMIRAW(S,1,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_SP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_1_P(S,X,val);	\
+    _FP_UNPACK_SEMIRAW(S,1,X);		\
+  } while (0)
+
+#define FP_PACK_S(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(S,1,X);		\
+    _FP_PACK_RAW_1(S,val,X);		\
+  } while (0)
+
+#define FP_PACK_SP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(S,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(S,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_S(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(S,1,X);		\
+    _FP_PACK_RAW_1(S,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_SP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(S,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(S,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_S(X)		_FP_ISSIGNAN(S,1,X)
+#define FP_NEG_S(R,X)			_FP_NEG(S,1,R,X)
+#define FP_ADD_S(R,X,Y)			_FP_ADD(S,1,R,X,Y)
+#define FP_SUB_S(R,X,Y)			_FP_SUB(S,1,R,X,Y)
+#define FP_MUL_S(R,X,Y)			_FP_MUL(S,1,R,X,Y)
+#define FP_DIV_S(R,X,Y)			_FP_DIV(S,1,R,X,Y)
+#define FP_SQRT_S(R,X)			_FP_SQRT(S,1,R,X)
+#define _FP_SQRT_MEAT_S(R,S,T,X,Q)	_FP_SQRT_MEAT_1(R,S,T,X,Q)
+
+#define FP_CMP_S(r,X,Y,un)	_FP_CMP(S,1,r,X,Y,un)
+#define FP_CMP_EQ_S(r,X,Y)	_FP_CMP_EQ(S,1,r,X,Y)
+#define FP_CMP_UNORD_S(r,X,Y)	_FP_CMP_UNORD(S,1,r,X,Y)
+
+#define FP_TO_INT_S(r,X,rsz,rsg)	_FP_TO_INT(S,1,r,X,rsz,rsg)
+#define FP_FROM_INT_S(X,r,rs,rt)	_FP_FROM_INT(S,1,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_S(X)	_FP_FRAC_HIGH_1(X)
+#define _FP_FRAC_HIGH_RAW_S(X)	_FP_FRAC_HIGH_1(X)
diff --git a/gcc/config/soft-fp/soft-fp.h b/gcc/config/soft-fp/soft-fp.h
new file mode 100644
index 000000000..230735734
--- /dev/null
+++ b/gcc/config/soft-fp/soft-fp.h
@@ -0,0 +1,213 @@
+/* Software floating-point emulation.
+   Copyright (C) 1997,1998,1999,2000,2002,2003,2005,2006
+	Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#ifndef SOFT_FP_H
+#define SOFT_FP_H
+
+#ifdef _LIBC
+#include <sfp-machine.h>
+#else
+#include "sfp-machine.h"
+#endif
+
+/* Allow sfp-machine to have its own byte order definitions. */
+#ifndef __BYTE_ORDER
+#ifdef _LIBC
+#include <endian.h>
+#else
+#error "endianness not defined by sfp-machine.h"
+#endif
+#endif
+
+#define _FP_WORKBITS		3
+#define _FP_WORK_LSB		((_FP_W_TYPE)1 << 3)
+#define _FP_WORK_ROUND		((_FP_W_TYPE)1 << 2)
+#define _FP_WORK_GUARD		((_FP_W_TYPE)1 << 1)
+#define _FP_WORK_STICKY		((_FP_W_TYPE)1 << 0)
+
+#ifndef FP_RND_NEAREST
+# define FP_RND_NEAREST		0
+# define FP_RND_ZERO		1
+# define FP_RND_PINF		2
+# define FP_RND_MINF		3
+#endif
+#ifndef FP_ROUNDMODE
+# define FP_ROUNDMODE		FP_RND_NEAREST
+#endif
+
+/* By default don't care about exceptions. */
+#ifndef FP_EX_INVALID
+#define FP_EX_INVALID		0
+#endif
+#ifndef FP_EX_OVERFLOW
+#define FP_EX_OVERFLOW		0
+#endif
+#ifndef FP_EX_UNDERFLOW
+#define FP_EX_UNDERFLOW		0
+#endif
+#ifndef FP_EX_DIVZERO
+#define FP_EX_DIVZERO		0
+#endif
+#ifndef FP_EX_INEXACT
+#define FP_EX_INEXACT		0
+#endif
+#ifndef FP_EX_DENORM
+#define FP_EX_DENORM		0
+#endif
+
+#ifdef _FP_DECL_EX
+#define FP_DECL_EX					\
+  int _fex = 0;						\
+  _FP_DECL_EX
+#else
+#define FP_DECL_EX int _fex = 0
+#endif
+
+#ifndef FP_INIT_ROUNDMODE
+#define FP_INIT_ROUNDMODE do {} while (0)
+#endif
+
+#ifndef FP_HANDLE_EXCEPTIONS
+#define FP_HANDLE_EXCEPTIONS do {} while (0)
+#endif
+
+#ifndef FP_INHIBIT_RESULTS
+/* By default we write the results always.
+ * sfp-machine may override this and e.g.
+ * check if some exceptions are unmasked
+ * and inhibit it in such a case.
+ */
+#define FP_INHIBIT_RESULTS 0
+#endif
+
+#define FP_SET_EXCEPTION(ex)				\
+  _fex |= (ex)
+
+#define FP_UNSET_EXCEPTION(ex)				\
+  _fex &= ~(ex)
+
+#define FP_CLEAR_EXCEPTIONS				\
+  _fex = 0
+
+#define _FP_ROUND_NEAREST(wc, X)			\
+do {							\
+    if ((_FP_FRAC_LOW_##wc(X) & 15) != _FP_WORK_ROUND)	\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);		\
+} while (0)
+
+#define _FP_ROUND_ZERO(wc, X)		(void)0
+
+#define _FP_ROUND_PINF(wc, X)				\
+do {							\
+    if (!X##_s && (_FP_FRAC_LOW_##wc(X) & 7))		\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);		\
+} while (0)
+
+#define _FP_ROUND_MINF(wc, X)				\
+do {							\
+    if (X##_s && (_FP_FRAC_LOW_##wc(X) & 7))		\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);		\
+} while (0)
+
+#define _FP_ROUND(wc, X)			\
+do {						\
+	if (_FP_FRAC_LOW_##wc(X) & 7)		\
+	  FP_SET_EXCEPTION(FP_EX_INEXACT);	\
+	switch (FP_ROUNDMODE)			\
+	{					\
+	  case FP_RND_NEAREST:			\
+	    _FP_ROUND_NEAREST(wc,X);		\
+	    break;				\
+	  case FP_RND_ZERO:			\
+	    _FP_ROUND_ZERO(wc,X);		\
+	    break;				\
+	  case FP_RND_PINF:			\
+	    _FP_ROUND_PINF(wc,X);		\
+	    break;				\
+	  case FP_RND_MINF:			\
+	    _FP_ROUND_MINF(wc,X);		\
+	    break;				\
+	}					\
+} while (0)
+
+#define FP_CLS_NORMAL		0
+#define FP_CLS_ZERO		1
+#define FP_CLS_INF		2
+#define FP_CLS_NAN		3
+
+#define _FP_CLS_COMBINE(x,y)	(((x) << 2) | (y))
+
+#include "op-1.h"
+#include "op-2.h"
+#include "op-4.h"
+#include "op-8.h"
+#include "op-common.h"
+
+/* Sigh.  Silly things longlong.h needs.  */
+#define UWtype		_FP_W_TYPE
+#define W_TYPE_SIZE	_FP_W_TYPE_SIZE
+
+typedef int QItype __attribute__((mode(QI)));
+typedef int SItype __attribute__((mode(SI)));
+typedef int DItype __attribute__((mode(DI)));
+typedef unsigned int UQItype __attribute__((mode(QI)));
+typedef unsigned int USItype __attribute__((mode(SI)));
+typedef unsigned int UDItype __attribute__((mode(DI)));
+#if _FP_W_TYPE_SIZE == 32
+typedef unsigned int UHWtype __attribute__((mode(HI)));
+#elif _FP_W_TYPE_SIZE == 64
+typedef USItype UHWtype;
+#endif
+
+#ifndef CMPtype
+#define CMPtype		int
+#endif
+
+#define SI_BITS		(__CHAR_BIT__ * (int)sizeof(SItype))
+#define DI_BITS		(__CHAR_BIT__ * (int)sizeof(DItype))
+
+#ifndef umul_ppmm
+#ifdef _LIBC
+#include <stdlib/longlong.h>
+#else
+#include "longlong.h"
+#endif
+#endif
+
+#ifdef _LIBC
+#include <stdlib.h>
+#else
+extern void abort (void);
+#endif
+
+#endif
diff --git a/gcc/config/soft-fp/subdf3.c b/gcc/config/soft-fp/subdf3.c
new file mode 100644
index 000000000..3978b5299
--- /dev/null
+++ b/gcc/config/soft-fp/subdf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a - b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+DFtype __subdf3(DFtype a, DFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A); FP_DECL_D(B); FP_DECL_D(R);
+  DFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_D(A, a);
+  FP_UNPACK_SEMIRAW_D(B, b);
+  FP_SUB_D(R, A, B);
+  FP_PACK_SEMIRAW_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/subsf3.c b/gcc/config/soft-fp/subsf3.c
new file mode 100644
index 000000000..f1cbdd1ff
--- /dev/null
+++ b/gcc/config/soft-fp/subsf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a - b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+SFtype __subsf3(SFtype a, SFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A); FP_DECL_S(B); FP_DECL_S(R);
+  SFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_S(A, a);
+  FP_UNPACK_SEMIRAW_S(B, b);
+  FP_SUB_S(R, A, B);
+  FP_PACK_SEMIRAW_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/subtf3.c b/gcc/config/soft-fp/subtf3.c
new file mode 100644
index 000000000..7ba4c8c5e
--- /dev/null
+++ b/gcc/config/soft-fp/subtf3.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return a - b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+TFtype __subtf3(TFtype a, TFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(R);
+  TFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+  FP_UNPACK_SEMIRAW_Q(B, b);
+  FP_SUB_Q(R, A, B);
+  FP_PACK_SEMIRAW_Q(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/t-softfp b/gcc/config/soft-fp/t-softfp
new file mode 100644
index 000000000..b5959077d
--- /dev/null
+++ b/gcc/config/soft-fp/t-softfp
@@ -0,0 +1,107 @@
+# Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Targets using soft-fp should define the following variables:
+#
+# softfp_float_modes: a list of soft-float floating-point modes,
+#                     e.g. sf df
+# softfp_int_modes: a list of integer modes for which to define conversions,
+#                   e.g. si di
+# softfp_extensions: a list of extensions between floating-point modes,
+#                    e.g. sfdf
+# softfp_truncations: a list of truncations between floating-point modes,
+#                     e.g. dfsf
+# softfp_machine_header: the target sfp-machine.h file (relative to config/),
+#                        e.g. rs6000/sfp-machine.h
+#
+# Extensions and truncations should include those where only one mode
+# is a soft-float mode; for example, sftf where sf is hard-float and
+# tf is soft-float.
+#
+# If the libgcc2.c functions should not be replaced, also define:
+#
+# softfp_exclude_libgcc2 := y
+#
+# Avoiding replacing the libgcc2.c functions is a temporary measure
+# for targets with both hard-float and soft-float multilibs, since
+# these variables apply for all multilibs.  With toplevel libgcc,
+# soft-fp can be used conditionally on the multilib instead.
+#
+# If the code should not be compiled at all for some multilibs, define:
+#
+# softfp_wrap_start: text to put at the start of wrapper source files,
+#                    output with echo
+#                    e.g. '#ifndef __powerpc64__'
+# softfp_wrap_end: text to put at the end of wrapper source files,
+#                  e.g. '#endif'
+#
+# This is another temporary measure.
+
+softfp_float_funcs = add$(m)3 div$(m)3 eq$(m)2 ge$(m)2 le$(m)2 mul$(m)3 \
+  neg$(m)2 sub$(m)3 unord$(m)2
+softfp_floatint_funcs = fix$(m)$(i) fixuns$(m)$(i) \
+  float$(i)$(m) floatun$(i)$(m)
+
+softfp_func_list := \
+  $(foreach m,$(softfp_float_modes), \
+              $(softfp_float_funcs) \
+              $(foreach i,$(softfp_int_modes), \
+                          $(softfp_floatint_funcs))) \
+  $(foreach e,$(softfp_extensions),extend$(e)2) \
+  $(foreach t,$(softfp_truncations),trunc$(t)2)
+
+ifeq ($(softfp_exclude_libgcc2),y)
+# This list is taken from mklibgcc.in and doesn't presently allow for
+# 64-bit targets where si should become di and di should become ti.
+softfp_func_list := $(filter-out floatdidf floatdisf fixunsdfsi fixunssfsi \
+  fixunsdfdi fixdfdi fixunssfdi fixsfdi fixxfdi fixunsxfdi \
+  floatdixf fixunsxfsi fixtfdi fixunstfdi floatditf \
+  floatundidf floatundisf floatundixf floatunditf,$(softfp_func_list))
+endif
+
+ifeq ($(softfp_wrap_start),)
+softfp_file_list := \
+  $(addsuffix .c,$(addprefix $(srcdir)/config/soft-fp/,$(softfp_func_list)))
+else
+softfp_file_list := $(addsuffix .c,$(softfp_func_list))
+
+$(softfp_file_list):
+	echo $(softfp_wrap_start) > $@
+	echo '#include "config/soft-fp/$@"' >> $@
+	echo $(softfp_wrap_end) >> $@
+endif
+
+LIB2FUNCS_EXTRA += $(softfp_file_list)
+
+ifneq ($(softfp_exclude_libgcc2),y)
+# Functions in libgcc2.c are excluded for each soft-float mode (a
+# target may have both soft-float and hard-float modes), for the fixed
+# list of integer modes (si and di) for which libgcc2.c defines any
+# such functions.  Depending on the target, the si and di symbols may
+# in fact define di and ti functions.
+
+LIB2FUNCS_EXCLUDE += \
+  $(addprefix _,$(foreach m,$(softfp_float_modes), \
+                            $(foreach i,si di, \
+                                        $(softfp_floatint_funcs))))
+endif
+
+SFP_MACHINE := sfp-machine.h
+
+$(SFP_MACHINE): $(srcdir)/config/$(softfp_machine_header)
+	cp $(srcdir)/config/$(softfp_machine_header) $(SFP_MACHINE)
diff --git a/gcc/config/soft-fp/truncdfsf2.c b/gcc/config/soft-fp/truncdfsf2.c
new file mode 100644
index 000000000..bd953912e
--- /dev/null
+++ b/gcc/config/soft-fp/truncdfsf2.c
@@ -0,0 +1,54 @@
+/* Software floating-point emulation.
+   Truncate IEEE double into IEEE single
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "double.h"
+
+SFtype __truncdfsf2(DFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  FP_DECL_S(R);
+  SFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_D(A, a);
+#if _FP_W_TYPE_SIZE < _FP_FRACBITS_D
+  FP_TRUNC(S,D,1,2,R,A);
+#else
+  FP_TRUNC(S,D,1,1,R,A);
+#endif
+  FP_PACK_SEMIRAW_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/trunctfdf2.c b/gcc/config/soft-fp/trunctfdf2.c
new file mode 100644
index 000000000..c3827b08a
--- /dev/null
+++ b/gcc/config/soft-fp/trunctfdf2.c
@@ -0,0 +1,54 @@
+/* Software floating-point emulation.
+   Truncate IEEE quad into IEEE double
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+DFtype __trunctfdf2(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_D(R);
+  DFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(D,Q,2,4,R,A);
+#else
+  FP_TRUNC(D,Q,1,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/trunctfsf2.c b/gcc/config/soft-fp/trunctfsf2.c
new file mode 100644
index 000000000..676c937e2
--- /dev/null
+++ b/gcc/config/soft-fp/trunctfsf2.c
@@ -0,0 +1,54 @@
+/* Software floating-point emulation.
+   Truncate IEEE quad into IEEE single
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "quad.h"
+
+SFtype __trunctfsf2(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_S(R);
+  SFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(S,Q,1,4,R,A);
+#else
+  FP_TRUNC(S,Q,1,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/trunctfxf2.c b/gcc/config/soft-fp/trunctfxf2.c
new file mode 100644
index 000000000..50d60bc5d
--- /dev/null
+++ b/gcc/config/soft-fp/trunctfxf2.c
@@ -0,0 +1,53 @@
+/* Software floating-point emulation.
+   Truncate IEEE quad into IEEE extended
+   Copyright (C) 2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Uros Bizjak (ubizjak@gmail.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "extended.h"
+#include "quad.h"
+
+XFtype __trunctfxf2(TFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_E(R);
+  XFtype r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(E,Q,4,4,R,A);
+#else
+  FP_TRUNC(E,Q,2,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_E(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/unorddf2.c b/gcc/config/soft-fp/unorddf2.c
new file mode 100644
index 000000000..5ea63e2b9
--- /dev/null
+++ b/gcc/config/soft-fp/unorddf2.c
@@ -0,0 +1,44 @@
+/* Software floating-point emulation.
+   Return 1 iff a or b is a NaN, 0 otherwise.
+   Copyright (C) 2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Joseph Myers (joseph@codesourcery.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+
+CMPtype __unorddf2(DFtype a, DFtype b)
+{
+  FP_DECL_D(A); FP_DECL_D(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_D(A, a);
+  FP_UNPACK_RAW_D(B, b);
+  FP_CMP_UNORD_D(r, A, B);
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/unordsf2.c b/gcc/config/soft-fp/unordsf2.c
new file mode 100644
index 000000000..6d0afe853
--- /dev/null
+++ b/gcc/config/soft-fp/unordsf2.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Return 1 iff a or b is a NaN, 0 otherwise.
+   Copyright (C) 2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Joseph Myers (joseph@codesourcery.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+
+CMPtype __unordsf2(SFtype a, SFtype b)
+{
+  FP_DECL_S(A);
+  FP_DECL_S(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_S(A, a);
+  FP_UNPACK_RAW_S(B, b);
+  FP_CMP_UNORD_S(r, A, B);
+
+  return r;
+}
diff --git a/gcc/config/soft-fp/unordtf2.c b/gcc/config/soft-fp/unordtf2.c
new file mode 100644
index 000000000..9d1bd916c
--- /dev/null
+++ b/gcc/config/soft-fp/unordtf2.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Return 1 iff a or b is a NaN, 0 otherwise.
+   Copyright (C) 2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Joseph Myers (joseph@codesourcery.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+CMPtype __unordtf2(TFtype a, TFtype b)
+{
+  FP_DECL_Q(A);
+  FP_DECL_Q(B);
+  CMPtype r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_UNORD_Q(r, A, B);
+
+  return r;
+}
diff --git a/gcc/config/sol2-10.h b/gcc/config/sol2-10.h
new file mode 100644
index 000000000..92aeabede
--- /dev/null
+++ b/gcc/config/sol2-10.h
@@ -0,0 +1,23 @@
+/* Operating system specific defines to be used when targeting GCC for any
+   Solaris 2 system starting from Solaris 10.
+   Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Solaris 10 has the float and long double forms of math functions.  */
+#undef TARGET_C99_FUNCTIONS
+#define TARGET_C99_FUNCTIONS 1
diff --git a/gcc/config/sol2-c.c b/gcc/config/sol2-c.c
new file mode 100644
index 000000000..ff42c3895
--- /dev/null
+++ b/gcc/config/sol2-c.c
@@ -0,0 +1,272 @@
+/* Solaris support needed only by C/C++ frontends.
+   Copyright (C) 2004, 2005, 2007, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "tm.h"
+#include "tm_p.h"
+
+#include "c-family/c-format.h"
+#include "intl.h"
+
+#include "cpplib.h"
+#include "c-family/c-pragma.h"
+#include "c-family/c-common.h"
+
+/* cmn_err only accepts "l" and "ll".  */
+static const format_length_info cmn_err_length_specs[] =
+{
+  { "l", FMT_LEN_l, STD_C89, "ll", FMT_LEN_ll, STD_C89, 0 },
+  { NULL, FMT_LEN_none, STD_C89, NULL, FMT_LEN_none, STD_C89, 0 }
+};
+
+static const format_flag_spec cmn_err_flag_specs[] =
+{
+  { 'w',  0, 0, N_("field width"),     N_("field width in printf format"),     STD_C89 },
+  { 'L',  0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
+  { 0, 0, 0, NULL, NULL, STD_C89 }
+};
+
+
+static const format_flag_pair cmn_err_flag_pairs[] =
+{
+  { 0, 0, 0, 0 }
+};
+
+static const format_char_info bitfield_string_type =
+  { "b",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "",   "cR", NULL };
+
+static const format_char_info cmn_err_char_table[] =
+{
+  /* C89 conversion specifiers.  */
+  { "dD",  0, STD_C89, { T89_I,   BADLEN,  BADLEN,  T89_L,   T9L_LL,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "oOxX",0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "u",   0, STD_C89, { T89_UI,  BADLEN,  BADLEN,  T89_UL,  T9L_ULL, BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "c",   0, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "",   NULL },
+  { "p",   1, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w", "c",  NULL },
+  { "s",   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",  "cR", NULL },
+  { "b",   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "w",   "",   &bitfield_string_type },
+  { NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
+};
+
+const format_kind_info solaris_format_types[] = {
+  { "cmn_err",  cmn_err_length_specs,  cmn_err_char_table, "", NULL,
+    cmn_err_flag_specs, cmn_err_flag_pairs,
+    FMT_FLAG_ARG_CONVERT|FMT_FLAG_EMPTY_PREC_OK,
+    'w', 0, 0, 0, 'L', 0,
+    &integer_type_node, &integer_type_node
+  }
+};
+
+/* Handle #pragma align ALIGNMENT (VAR [, VAR]...)  */
+
+static void
+solaris_pragma_align (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree t, x;
+  enum cpp_ttype ttype;
+  HOST_WIDE_INT low;
+
+  if (pragma_lex (&x) != CPP_NUMBER
+      || pragma_lex (&t) != CPP_OPEN_PAREN)
+    {
+      warning (0, "malformed %<#pragma align%>, ignoring");
+      return;
+    }
+
+  low = TREE_INT_CST_LOW (x);
+  if (TREE_INT_CST_HIGH (x) != 0
+      || (low != 1 && low != 2 && low != 4 && low != 8 && low != 16
+	  && low != 32 && low != 64 && low != 128))
+    {
+      warning (0, "invalid alignment for %<#pragma align%>, ignoring");
+      return;
+    }
+
+  ttype = pragma_lex (&t);
+  if (ttype != CPP_NAME)
+    {
+      warning (0, "malformed %<#pragma align%>, ignoring");
+      return;
+    }
+
+  while (1)
+    {
+      tree decl = identifier_global_value (t);
+      if (decl && DECL_P (decl))
+	warning (0, "%<#pragma align%> must appear before the declaration of "
+		 "%D, ignoring", decl);
+      else
+	solaris_pending_aligns = tree_cons (t, build_tree_list (NULL, x),
+					    solaris_pending_aligns);
+
+      ttype = pragma_lex (&t);
+      if (ttype == CPP_COMMA)
+	{
+	  ttype = pragma_lex (&t);
+	  if (ttype != CPP_NAME)
+	    {
+	      warning (0, "malformed %<#pragma align%>");
+	      return;
+	    }
+	}
+      else if (ttype == CPP_CLOSE_PAREN)
+	{
+	  if (pragma_lex (&t) != CPP_EOF)
+	    warning (0, "junk at end of %<#pragma align%>");
+	  return;
+	}
+      else
+	{
+	  warning (0, "malformed %<#pragma align%>");
+	  return;
+	}
+    }
+}
+
+/* Handle #pragma init (function [, function]...)  */
+
+static void
+solaris_pragma_init (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree t;
+  enum cpp_ttype ttype;
+
+  if (pragma_lex (&t) != CPP_OPEN_PAREN)
+    {
+      warning (0, "malformed %<#pragma init%>, ignoring");
+      return;
+    }
+
+  ttype = pragma_lex (&t);
+  if (ttype != CPP_NAME)
+    {
+      warning (0, "malformed %<#pragma init%>, ignoring");
+      return;
+    }
+
+  while (1)
+    {
+      tree decl = identifier_global_value (t);
+      if (decl && DECL_P (decl))
+	{
+	  tree attrs = build_tree_list (get_identifier ("init"),
+					NULL);
+	  TREE_USED (decl) = 1;
+	  DECL_PRESERVE_P (decl) = 1;
+	  decl_attributes (&decl, attrs, 0);
+	}
+      else
+	solaris_pending_inits = tree_cons (t, NULL, solaris_pending_inits);
+
+      ttype = pragma_lex (&t);
+      if (ttype == CPP_COMMA)
+	{
+	  ttype = pragma_lex (&t);
+	  if (ttype != CPP_NAME)
+	    {
+	      warning (0, "malformed %<#pragma init%>");
+	      return;
+	    }
+	}
+      else if (ttype == CPP_CLOSE_PAREN)
+	{
+	  if (pragma_lex (&t) != CPP_EOF)
+	    warning (0, "junk at end of %<#pragma init%>");
+	  return;
+	}
+      else
+	{
+	  warning (0, "malformed %<#pragma init%>");
+	  return;
+	}
+    }
+}
+
+/* Handle #pragma fini (function [, function]...)  */
+
+static void
+solaris_pragma_fini (cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  tree t;
+  enum cpp_ttype ttype;
+
+  if (pragma_lex (&t) != CPP_OPEN_PAREN)
+    {
+      warning (0, "malformed %<#pragma fini%>, ignoring");
+      return;
+    }
+
+  ttype = pragma_lex (&t);
+  if (ttype != CPP_NAME)
+    {
+      warning (0, "malformed %<#pragma fini%>, ignoring");
+      return;
+    }
+
+  while (1)
+    {
+      tree decl = identifier_global_value (t);
+      if (decl && DECL_P (decl))
+	{
+	  tree attrs = build_tree_list (get_identifier ("fini"),
+					NULL);
+	  TREE_USED (decl) = 1;
+	  DECL_PRESERVE_P (decl) = 1;
+	  decl_attributes (&decl, attrs, 0);
+	}
+      else
+	solaris_pending_finis = tree_cons (t, NULL, solaris_pending_finis);
+
+      ttype = pragma_lex (&t);
+      if (ttype == CPP_COMMA)
+	{
+	  ttype = pragma_lex (&t);
+	  if (ttype != CPP_NAME)
+	    {
+	      warning (0, "malformed %<#pragma fini%>");
+	      return;
+	    }
+	}
+      else if (ttype == CPP_CLOSE_PAREN)
+	{
+	  if (pragma_lex (&t) != CPP_EOF)
+	    warning (0, "junk at end of %<#pragma fini%>");
+	  return;
+	}
+      else
+	{
+	  warning (0, "malformed %<#pragma fini%>");
+	  return;
+	}
+    }
+}
+
+/* Register Solaris-specific #pragma directives.  */
+
+void
+solaris_register_pragmas (void)
+{
+  c_register_pragma_with_expansion (0, "align", solaris_pragma_align);
+  c_register_pragma (0, "init", solaris_pragma_init);
+  c_register_pragma (0, "fini", solaris_pragma_fini);
+}
diff --git a/gcc/config/sol2-gld.h b/gcc/config/sol2-gld.h
new file mode 100644
index 000000000..5ab158235
--- /dev/null
+++ b/gcc/config/sol2-gld.h
@@ -0,0 +1,36 @@
+/* Definitions of target machine for GCC, for any machine running Solaris 2
+   using the GNU linker.
+
+   Copyright (C) 2002, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY 1
+
+/* GNU ld needs --export-dynamic to implement -rdynamic.  */
+#undef RDYNAMIC_SPEC
+#define RDYNAMIC_SPEC "--export-dynamic"
+
+/* Solaris 11 build 135+ implements dl_iterate_phdr.  */
+#if defined(HAVE_LD_EH_FRAME_HDR) && defined(TARGET_DL_ITERATE_PHDR)
+#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
+#endif /* HAVE_LD_EH_FRAME && TARGET_DL_ITERATE_PHDR */
diff --git a/gcc/config/sol2-protos.h b/gcc/config/sol2-protos.h
new file mode 100644
index 000000000..800629aab
--- /dev/null
+++ b/gcc/config/sol2-protos.h
@@ -0,0 +1,24 @@
+/* Operating system specific prototypes to be used when targeting GCC for any
+   Solaris 2 system.
+   Copyright 2004, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern void solaris_insert_attributes (tree, tree *);
+extern void solaris_register_pragmas (void);
+extern void solaris_output_init_fini (FILE *, tree);
+extern void solaris_assemble_visibility (tree, int);
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
new file mode 100644
index 000000000..d9f43ea26
--- /dev/null
+++ b/gcc/config/sol2.c
@@ -0,0 +1,156 @@
+/* General Solaris system support.
+   Copyright (C) 2004, 2005 , 2007, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "output.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+
+tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis;
+
+/* Attach any pending attributes for DECL to the list in *ATTRIBUTES.
+   Pending attributes come from #pragma or _Pragma, so this code is
+   only useful in the C family front ends, but it is included in
+   all languages to avoid changing the target machine initializer
+   depending on the language.  */
+
+void
+solaris_insert_attributes (tree decl, tree *attributes)
+{
+  tree *x, next;
+
+  if (solaris_pending_aligns != NULL && TREE_CODE (decl) == VAR_DECL)
+    for (x = &solaris_pending_aligns; *x; x = &TREE_CHAIN (*x))
+      {
+	tree name = TREE_PURPOSE (*x);
+	tree value = TREE_VALUE (*x);
+	if (DECL_NAME (decl) == name)
+	  {
+	    if (lookup_attribute ("aligned", DECL_ATTRIBUTES (decl))
+		|| lookup_attribute ("aligned", *attributes))
+	      warning (0, "ignoring %<#pragma align%> for explicitly "
+		       "aligned %q+D", decl);
+	    else
+	      *attributes = tree_cons (get_identifier ("aligned"), value,
+				       *attributes);
+	    next = TREE_CHAIN (*x);
+	    ggc_free (*x);
+	    *x = next;
+	    break;
+	  }
+      }
+
+  if (solaris_pending_inits != NULL && TREE_CODE (decl) == FUNCTION_DECL)
+    for (x = &solaris_pending_inits; *x; x = &TREE_CHAIN (*x))
+      {
+	tree name = TREE_PURPOSE (*x);
+	if (DECL_NAME (decl) == name)
+	  {
+	    *attributes = tree_cons (get_identifier ("init"), NULL,
+				     *attributes);
+	    TREE_USED (decl) = 1;
+	    DECL_PRESERVE_P (decl) = 1;
+	    next = TREE_CHAIN (*x);
+	    ggc_free (*x);
+	    *x = next;
+	    break;
+	  }
+      }
+
+  if (solaris_pending_finis != NULL && TREE_CODE (decl) == FUNCTION_DECL)
+    for (x = &solaris_pending_finis; *x; x = &TREE_CHAIN (*x))
+      {
+	tree name = TREE_PURPOSE (*x);
+	if (DECL_NAME (decl) == name)
+	  {
+	    *attributes = tree_cons (get_identifier ("fini"), NULL,
+				     *attributes);
+	    TREE_USED (decl) = 1;
+	    DECL_PRESERVE_P (decl) = 1;
+	    next = TREE_CHAIN (*x);
+	    ggc_free (*x);
+	    *x = next;
+	    break;
+	  }
+      }
+}
+
+/* Output initializer or finalizer entries for DECL to FILE.  */
+
+void
+solaris_output_init_fini (FILE *file, tree decl)
+{
+  if (lookup_attribute ("init", DECL_ATTRIBUTES (decl)))
+    {
+      fprintf (file, PUSHSECTION_FORMAT, ".init");
+      ASM_OUTPUT_CALL (file, decl);
+      fprintf (file, "\t.popsection\n");
+    }
+
+  if (lookup_attribute ("fini", DECL_ATTRIBUTES (decl)))
+    {
+      fprintf (file, PUSHSECTION_FORMAT, ".fini");
+      ASM_OUTPUT_CALL (file, decl);
+      fprintf (file, "\t.popsection\n");
+    }
+}
+
+/* Emit an assembler directive to set symbol for DECL visibility to
+   the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */
+
+void
+solaris_assemble_visibility (tree decl ATTRIBUTE_UNUSED,
+			     int vis ATTRIBUTE_UNUSED)
+{
+#ifdef HAVE_GAS_HIDDEN
+  /* Sun as uses .symbolic for STV_PROTECTED.  STV_INTERNAL is marked as
+     `currently reserved', but the linker treats it like STV_HIDDEN.  Sun
+     Studio 12.1 cc emits .hidden instead.
+
+     There are 3 Sun extensions GCC doesn't yet know about: STV_EXPORTED,
+     STV_SINGLETON, and STV_ELIMINATE.
+
+     See Linker and Libraries Guide, Ch. 2, Link-Editor, Defining
+     Additional Symbols, and Ch. 7, Object-File Format, Symbol Table
+     Section.  */
+
+  static const char * const visibility_types[] = {
+    NULL, "symbolic", "hidden", "hidden"
+  };
+
+  const char *name, *type;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  type = visibility_types[vis];
+
+  fprintf (asm_out_file, "\t.%s\t", type);
+  assemble_name (asm_out_file, name);
+  fprintf (asm_out_file, "\n");
+#else
+  warning (OPT_Wattributes, "visibility attribute not supported "
+	   "in this configuration; ignored");
+#endif
+}
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
new file mode 100644
index 000000000..eb0328ac2
--- /dev/null
+++ b/gcc/config/sol2.h
@@ -0,0 +1,310 @@
+/* Operating system specific defines to be used when targeting GCC for any
+   Solaris 2 system.
+   Copyright 2002, 2003, 2004, 2007, 2008, 2009, 2010, 2011, 2012
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We use stabs-in-elf for debugging, because that is what the native
+   toolchain uses.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+/* Solaris 2 (at least as of 2.5.1) uses a 32-bit wchar_t.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Solaris 2 uses a wint_t different from the default. This is required
+   by the SCD 2.4.1, p. 6-83, Figure 6-66.  */
+#undef	WINT_TYPE
+#define	WINT_TYPE "long int"
+
+#undef	WINT_TYPE_SIZE
+#define	WINT_TYPE_SIZE BITS_PER_WORD
+
+#define SIG_ATOMIC_TYPE "int"
+
+/* ??? This definition of int8_t follows the system header but does
+   not conform to C99.  Likewise int_fast8_t, int_least8_t.  */
+#define INT8_TYPE "char"
+#define INT16_TYPE "short int"
+#define INT32_TYPE "int"
+#define INT64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT8_TYPE "unsigned char"
+#define UINT16_TYPE "short unsigned int"
+#define UINT32_TYPE "unsigned int"
+#define UINT64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_LEAST8_TYPE "char"
+#define INT_LEAST16_TYPE "short int"
+#define INT_LEAST32_TYPE "int"
+#define INT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_LEAST8_TYPE "unsigned char"
+#define UINT_LEAST16_TYPE "short unsigned int"
+#define UINT_LEAST32_TYPE "unsigned int"
+#define UINT_LEAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INT_FAST8_TYPE "char"
+#define INT_FAST16_TYPE "int"
+#define INT_FAST32_TYPE "int"
+#define INT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "long long int")
+#define UINT_FAST8_TYPE "unsigned char"
+#define UINT_FAST16_TYPE "unsigned int"
+#define UINT_FAST32_TYPE "unsigned int"
+#define UINT_FAST64_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "long long unsigned int")
+
+#define INTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long int" : "int")
+#define UINTPTR_TYPE (LONG_TYPE_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+/* ??? Note: in order for -compat-bsd to work fully,
+   we must somehow arrange to fixincludes /usr/ucbinclude
+   and put the result in $(libsubdir)/ucbinclude.  */
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "\
+%{pthreads|pthread:-D_REENTRANT -D_PTHREADS} \
+%{!pthreads:%{!pthread:%{threads:-D_REENTRANT -D_SOLARIS_THREADS}}} \
+%{compat-bsd:-iwithprefixbefore ucbinclude -I/usr/ucbinclude} \
+"
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_SUB_OS_CPP_BUILTINS()
+#define TARGET_OS_CPP_BUILTINS()			\
+    do {						\
+	builtin_define_std ("unix");			\
+	builtin_define_std ("sun");			\
+	builtin_define ("__svr4__");			\
+	builtin_define ("__SVR4");			\
+	builtin_assert ("system=unix");			\
+	builtin_assert ("system=svr4");			\
+	/* For C++ we need to add some additional macro	\
+	   definitions required by the C++ standard	\
+	   library.  */					\
+	if (c_dialect_cxx ())				\
+	  {						\
+	    builtin_define ("__STDC_VERSION__=199901L");\
+	    builtin_define ("_XOPEN_SOURCE=600");	\
+	    builtin_define ("_LARGEFILE_SOURCE=1");	\
+	    builtin_define ("_LARGEFILE64_SOURCE=1");	\
+	    builtin_define ("__EXTENSIONS__");		\
+	  }						\
+	TARGET_SUB_OS_CPP_BUILTINS();			\
+    } while (0)
+
+/* The system headers under Solaris 2 are C++-aware since 2.0.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} -s \
+%{fpic|fpie|fPIC|fPIE:-K PIC} \
+%(asm_cpu) \
+"
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/usr/ccs/bin/"
+
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/usr/ccs/lib/"
+#endif
+
+/* We don't use the standard LIB_SPEC only because we don't yet support c++.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{compat-bsd:-lucb -lsocket -lnsl -lelf -laio} \
+   %{!symbolic:\
+     %{pthreads|pthread:-lpthread " LIB_TLS_SPEC "} \
+     %{!pthreads:%{!pthread:%{threads:-lthread}}} \
+     %{p|pg:-ldl} -lc}"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+/* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{!shared: \
+			 %{!symbolic: \
+			  %{p:mcrt1.o%s} \
+                          %{!p: \
+	                    %{pg:gcrt1.o%s gmon.o%s} \
+                            %{!pg:crt1.o%s}}}} \
+			crti.o%s %(startfile_arch) \
+			crtbegin.o%s"
+
+#undef STARTFILE_ARCH32_SPEC
+#define STARTFILE_ARCH32_SPEC "%{ansi:values-Xc.o%s} \
+			    %{!ansi:values-Xa.o%s}"
+
+#undef STARTFILE_ARCH_SPEC
+#define STARTFILE_ARCH_SPEC STARTFILE_ARCH32_SPEC
+
+#undef LINK_ARCH32_SPEC_BASE
+#define LINK_ARCH32_SPEC_BASE \
+  "%{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{compat-bsd: \
+     %{!YP,*:%{p|pg:-Y P,%R/usr/ucblib:%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/usr/lib:%R/lib} \
+             %{!p:%{!pg:-Y P,%R/usr/ucblib:%R/usr/ccs/lib:%R/usr/lib:%R/lib}}} \
+             -R %R/usr/ucblib} \
+   %{!compat-bsd: \
+     %{!YP,*:%{p|pg:-Y P,%R/usr/ccs/lib/libp:%R/usr/lib/libp:%R/usr/ccs/lib:%R/usr/lib:%R/lib} \
+             %{!p:%{!pg:-Y P,%R/usr/ccs/lib:%R/usr/lib:%R/lib}}}}"
+
+#undef LINK_ARCH32_SPEC
+#define LINK_ARCH32_SPEC LINK_ARCH32_SPEC_BASE
+
+#undef LINK_ARCH_SPEC
+#define LINK_ARCH_SPEC LINK_ARCH32_SPEC
+
+#undef  LINK_SPEC
+#define LINK_SPEC \
+  "%{h*} %{v:-V} \
+   %{!shared:%{!static:%{rdynamic: " RDYNAMIC_SPEC "}}} \
+   %{static:-dn -Bstatic} \
+   %{shared:-G -dy %{!mimpure-text:-z text}} \
+   %{symbolic:-Bsymbolic -G -dy -z text} \
+   %{pthreads|pthread|threads:" LIB_THREAD_LDFLAGS_SPEC "} \
+   %(link_arch) \
+   %{Qy:} %{!Qn:-Qy}"
+
+/* With Sun ld, -rdynamic is a no-op.  */
+#define RDYNAMIC_SPEC ""
+
+/* The Solaris linker doesn't understand constructor priorities.  (The
+   GNU linker does support constructor priorities, so GNU ld
+   configuration files for Solaris override this setting.)  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY 0
+
+/* collect2.c can only parse GNU nm -n output.  Solaris nm needs -png to
+   produce the same format.  */
+#define NM_FLAGS "-png"
+
+#define STDC_0_IN_SYSTEM_HEADERS 1
+
+/*
+ * Attempt to turn on access permissions for the stack.
+ *
+ * _SC_STACK_PROT is only defined for post 2.6, but we want this code
+ * to run always.  2.6 can change the stack protection but has no way to
+ * query it.
+ *
+ */
+
+/* sys/mman.h is not present on some non-Solaris configurations
+   that use sol2.h, so ENABLE_EXECUTE_STACK must use a magic
+   number instead of the appropriate PROT_* flags.  */
+
+#define ENABLE_EXECUTE_STACK					\
+									\
+/* #define STACK_PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC) */	\
+									\
+static int need_enable_exec_stack;					\
+									\
+static void check_enabling(void) __attribute__ ((constructor));		\
+static void check_enabling(void)					\
+{									\
+  extern long sysconf(int);						\
+									\
+  int prot = (int) sysconf(515 /* _SC_STACK_PROT */);			\
+  if (prot != 7 /* STACK_PROT_RWX */)					\
+    need_enable_exec_stack = 1;						\
+}									\
+									\
+extern void __enable_execute_stack (void *);				\
+void									\
+__enable_execute_stack (void *addr)					\
+{									\
+  extern int mprotect(void *, size_t, int);				\
+  if (!need_enable_exec_stack)						\
+    return;								\
+  else {								\
+    long size = getpagesize ();						\
+    long mask = ~(size-1);						\
+    char *page = (char *) (((long) addr) & mask); 			\
+    char *end  = (char *) ((((long) (addr + TRAMPOLINE_SIZE)) & mask) + size); \
+									\
+    if (mprotect (page, end - page, 7 /* STACK_PROT_RWX */) < 0)	\
+      perror ("mprotect of trampoline code");				\
+  }									\
+}
+
+/* Support Solaris-specific format checking for cmn_err.  */
+#define TARGET_N_FORMAT_TYPES 1
+#define TARGET_FORMAT_TYPES solaris_format_types
+
+/* #pragma init and #pragma fini are implemented on top of init and
+   fini attributes.  */
+#define SOLARIS_ATTRIBUTE_TABLE						\
+  { "init",      0, 0, true,  false,  false, NULL },			\
+  { "fini",      0, 0, true,  false,  false, NULL }
+
+/* Solaris/x86 as and gas support the common ELF .section/.pushsection
+   syntax.  */
+#define PUSHSECTION_FORMAT	"\t.pushsection\t%s\n"
+
+/* This is how to declare the size of a function.  For Solaris, we output
+   any .init or .fini entries here.  */
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)		\
+  do								\
+    {								\
+      if (!flag_inhibit_size_directive)				\
+	ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME);			\
+      solaris_output_init_fini (FILE, DECL);			\
+    }								\
+  while (0)
+
+/* Solaris 'as' has a bug: a .common directive in .tbss or .tdata section
+   behaves as .tls_common rather than normal non-TLS .common.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
+  do									\
+    {									\
+      if (TARGET_SUN_TLS						\
+	  && in_section							\
+	  && ((in_section->common.flags & SECTION_TLS) == SECTION_TLS))	\
+	switch_to_section (bss_section);				\
+      fprintf ((FILE), "%s", COMMON_ASM_OP);				\
+      assemble_name ((FILE), (NAME));					\
+      fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",		\
+	       (SIZE), (ALIGN) / BITS_PER_UNIT);			\
+    }									\
+  while (0)
+
+#ifndef USE_GAS
+#undef TARGET_ASM_ASSEMBLE_VISIBILITY
+#define TARGET_ASM_ASSEMBLE_VISIBILITY solaris_assemble_visibility
+
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
+#endif
+
+extern GTY(()) tree solaris_pending_aligns;
+extern GTY(()) tree solaris_pending_inits;
+extern GTY(()) tree solaris_pending_finis;
+
+/* Allow macro expansion in #pragma pack.  */
+#define HANDLE_PRAGMA_PACK_WITH_EXPANSION
+
+#define TARGET_POSIX_IO
diff --git a/gcc/config/sol2.opt b/gcc/config/sol2.opt
new file mode 100644
index 000000000..c1a78cb02
--- /dev/null
+++ b/gcc/config/sol2.opt
@@ -0,0 +1,47 @@
+; Options for the Solaris 2 port of the compiler
+;
+; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+G
+Driver
+
+YP,
+Driver Joined
+
+Ym,
+Driver Joined
+
+compat-bsd
+Driver
+
+mimpure-text
+Target Report
+Pass -z text to linker
+
+pthread
+Driver
+
+pthreads
+Driver
+
+rdynamic
+Driver
+
+threads
+Driver
diff --git a/gcc/config/sparc/biarch64.h b/gcc/config/sparc/biarch64.h
new file mode 100644
index 000000000..6328a3e0a
--- /dev/null
+++ b/gcc/config/sparc/biarch64.h
@@ -0,0 +1,23 @@
+/* Definitions of target machine for GCC, for Sun SPARC.
+   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify this in a cover file to provide bi-architecture (32/64) support.  */
+
+#define SPARC_BI_ARCH
diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
new file mode 100644
index 000000000..cca34ede5
--- /dev/null
+++ b/gcc/config/sparc/constraints.md
@@ -0,0 +1,148 @@
+;; Constraint definitions for SPARC.
+;; Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;    ABCD           P         Z
+;;;    a        jkl    q  tuvwxyz
+
+
+;; Register constraints
+
+(define_register_constraint "b" "(TARGET_V9 && TARGET_VIS ? EXTRA_FP_REGS : NO_REGS)"
+ "Any floating-point register in VIS mode")
+
+(define_register_constraint "c" "FPCC_REGS"
+ "Floating-point condition code register")
+
+(define_register_constraint "d" "(TARGET_V9 && TARGET_VIS ? FP_REGS : NO_REGS)"
+ "Lower floating-point register in VIS mode")
+
+;; In the non-V9 case, coerce V9 'e' class to 'f', so we can use 'e' in the
+;; MD file for V8 and V9.
+(define_register_constraint "e" "(TARGET_FPU ? (TARGET_V9 ? EXTRA_FP_REGS : FP_REGS) : NO_REGS)"
+ "Any floating-point register")
+
+(define_register_constraint "f" "(TARGET_FPU ? FP_REGS : NO_REGS)"
+ "Lower floating-point register")
+ 
+(define_register_constraint "h" "(TARGET_V9 && TARGET_V8PLUS ? I64_REGS : NO_REGS)"
+ "64-bit global or out register in V8+ mode")
+
+
+;; Floating-point constant constraints
+
+(define_constraint "G"
+ "The floating-point zero constant"
+ (and (match_code "const_double")
+      (match_test "const_zero_operand (op, mode)")))
+
+
+;; Integer constant constraints
+
+(define_constraint "H"
+ "Valid operand of double arithmetic operation"
+ (and (match_code "const_double")
+      (match_test "arith_double_operand (op, DImode)")))
+
+(define_constraint "I"
+ "Signed 13-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM13_P (ival)")))
+
+(define_constraint "J"
+ "The integer zero constant"
+ (and (match_code "const_int")
+      (match_test "ival == 0")))
+
+(define_constraint "K"
+ "Signed 32-bit constant that can be loaded with a sethi instruction"
+ (and (match_code "const_int")
+      (match_test "SPARC_SETHI32_P (ival)")))
+
+(define_constraint "L"
+ "Signed 11-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM11_P (ival)")))
+
+(define_constraint "M"
+ "Signed 10-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM10_P (ival)")))
+
+(define_constraint "N"
+ "Signed constant that can be loaded with a sethi instruction"
+ (and (match_code "const_int")
+      (match_test "SPARC_SETHI_P (ival)")))
+
+(define_constraint "O"
+ "The 4096 constant"
+ (and (match_code "const_int")
+      (match_test "ival == 4096")))
+
+
+;; Extra constraints
+;; Our memory extra constraints have to emulate the behavior of 'm' and 'o',
+;; i.e. accept pseudo-registers during reload.
+
+(define_constraint "D"
+ "const_vector"
+  (and (match_code "const_vector")
+       (match_test "GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT")))
+
+(define_constraint "Q"
+ "Floating-point constant that can be loaded with a sethi instruction"
+ (and (match_code "const_double")
+      (match_test "fp_sethi_p (op)")))
+
+(define_constraint "R"
+ "Floating-point constant that can be loaded with a move instruction"
+ (and (match_code "const_double")
+      (match_test "fp_mov_p (op)")))
+
+(define_constraint "S"
+ "Floating-point constant that can be loaded with a high/lo_sum sequence"
+ (and (match_code "const_double")
+      (match_test "fp_high_losum_p (op)")))
+
+;; Not needed in 64-bit mode
+(define_constraint "T"
+ "Memory reference whose address is aligned to 8-byte boundary"
+ (and (match_test "TARGET_ARCH32")
+      (match_code "mem,reg")
+      (match_test "memory_ok_for_ldd (op)")))
+
+;; Not needed in 64-bit mode
+(define_constraint "U"
+ "Pseudo-register or hard even-numbered integer register"
+ (and (match_test "TARGET_ARCH32")
+      (match_code "reg")
+      (ior (match_test "REGNO (op) < FIRST_PSEUDO_REGISTER")
+	   (not (match_test "reload_in_progress && reg_renumber [REGNO (op)] < 0")))
+      (match_test "register_ok_for_ldd (op)")))
+
+;; Equivalent to 'T' but available in 64-bit mode
+(define_constraint "W"
+ "Memory reference for 'e' constraint floating-point register"
+ (and (match_code "mem,reg")
+      (match_test "memory_ok_for_ldd (op)")))
+
+(define_constraint "Y"
+ "The vector zero constant"
+ (and (match_code "const_vector")
+      (match_test "const_zero_operand (op, mode)")))
diff --git a/gcc/config/sparc/crtfastmath.c b/gcc/config/sparc/crtfastmath.c
new file mode 100644
index 000000000..04727ec94
--- /dev/null
+++ b/gcc/config/sparc/crtfastmath.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2001, 2009 Free Software Foundation, Inc.
+ * Contributed by David S. Miller (davem@redhat.com)
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#define FPRS_NS		(1 << 22)	/* Non-Standard fpu results */
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+  unsigned int fsr;
+
+  /* This works for the 64-bit case because, even if 32-bit ld/st of
+     the fsr register modified the upper 32-bit, the only thing up there
+     are the 3 other condition codes which are "do not care" at the time
+     that this runs.  */
+
+  __asm__("st %%fsr, %0"
+	  : "=m" (fsr));
+
+  fsr |= FPRS_NS;
+
+  __asm__("ld %0, %%fsr"
+	  : : "m" (fsr));
+}
diff --git a/gcc/config/sparc/cypress.md b/gcc/config/sparc/cypress.md
new file mode 100644
index 000000000..633c0fd77
--- /dev/null
+++ b/gcc/config/sparc/cypress.md
@@ -0,0 +1,50 @@
+;; Scheduling description for SPARC Cypress.
+;;   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The Cypress is a pretty simple single-issue processor.
+
+(define_automaton "cypress_0,cypress_1")
+
+(define_cpu_unit "cyp_memory, cyp_fpalu" "cypress_0")
+(define_cpu_unit "cyp_fpmds" "cypress_1")
+
+(define_insn_reservation "cyp_load" 2
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "load,sload,fpload"))
+  "cyp_memory, nothing")
+
+(define_insn_reservation "cyp_fp_alu" 5
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fp,fpmove"))
+  "cyp_fpalu, nothing*3")
+
+(define_insn_reservation "cyp_fp_mult" 7
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpmul"))
+  "cyp_fpmds, nothing*5")
+
+(define_insn_reservation "cyp_fp_div" 37
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpdivs,fpdivd"))
+  "cyp_fpmds, nothing*35")
+
+(define_insn_reservation "cyp_fp_sqrt" 63
+  (and (eq_attr "cpu" "cypress")
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "cyp_fpmds, nothing*61")
diff --git a/gcc/config/sparc/freebsd.h b/gcc/config/sparc/freebsd.h
new file mode 100644
index 000000000..76c27d39b
--- /dev/null
+++ b/gcc/config/sparc/freebsd.h
@@ -0,0 +1,177 @@
+/* Definitions for Sun SPARC64 running FreeBSD using the ELF format
+   Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+/* FreeBSD needs the platform name (sparc64) defined.
+   Emacs etc needs to know if the arch is 64 or 32-bits.
+   This also selects which targets are available via -mcpu.  */
+
+#undef  FBSD_TARGET_CPU_CPP_BUILTINS
+#define FBSD_TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__sparc64__");		\
+      builtin_define ("__sparc__");		\
+      builtin_define ("__sparc_v9__");		\
+      builtin_define ("__sparcv9");		\
+    }						\
+  while (0)
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
+#define LINK_SPEC "%(link_arch)						\
+  %{!mno-relax:%{!r:-relax}}						\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof(1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{symbolic:-Bsymbolic}						\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}"
+
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.  
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* Earlier headers may get this wrong for FreeBSD.
+   We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#undef  LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#if defined(__arch64__) || defined(__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+/* Definitions for 64-bit SPARC running systems with ELF. */
+
+#undef  TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/sparc64 ELF)");
+
+#define TARGET_ELF		1
+
+/* XXX */
+/* A 64 bit v9 compiler with stack-bias,
+   in a Medium/mid code model environment.  */
+
+#undef  TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_64BIT + MASK_PTR64 /* + MASK_FASTER_STRUCTS */ \
+   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU \
+   + MASK_LONG_DOUBLE_128 /* + MASK_HARD_QUAD */)
+
+/* The default code model.  */
+#undef  SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL	CM_MEDLOW
+
+#define ENABLE_EXECUTE_STACK						\
+  static int need_enable_exec_stack;					\
+  static void check_enabling(void) __attribute__ ((constructor));	\
+  static void check_enabling(void)					\
+  {									\
+    extern int sysctlbyname(const char *, void *, size_t *, void *, size_t);\
+    int prot = 0;							\
+    size_t len = sizeof(prot);						\
+									\
+    sysctlbyname ("kern.stackprot", &prot, &len, NULL, 0);		\
+    if (prot != 7)							\
+      need_enable_exec_stack = 1;					\
+  }									\
+  extern void __enable_execute_stack (void *);				\
+  void __enable_execute_stack (void *addr)				\
+  {									\
+    if (!need_enable_exec_stack)					\
+      return;								\
+    else {								\
+      /* 7 is PROT_READ | PROT_WRITE | PROT_EXEC */ 			\
+      if (mprotect (addr, TRAMPOLINE_SIZE, 7) < 0)			\
+        perror ("mprotect of trampoline code");				\
+    }									\
+  }
+
+
+/************************[  Assembler stuff  ]********************************/
+
+#undef	LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* XXX2 */
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)			\
+  sprintf (LABEL, "*.L%s%lu", PREFIX, (unsigned long)(NUM))
+
+
+/************************[  Debugger stuff  ]*********************************/
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+
+#undef  DBX_CONTIN_CHAR
+#define DBX_CONTIN_CHAR	'?'
+
+/* DWARF bits.  */
+
+/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. 
+   Obviously the Dwarf2 folks havn't tried to actually build systems
+   with their spec.  On a 64-bit system, only 64-bit relocs become
+   RELATIVE relocations.  */
+
+/* #define DWARF_OFFSET_SIZE PTR_SIZE */
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC						\
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} "	\
+  FBSD_ENDFILE_SPEC
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
diff --git a/gcc/config/sparc/gmon-sol2.c b/gcc/config/sparc/gmon-sol2.c
new file mode 100644
index 000000000..452d98d7d
--- /dev/null
+++ b/gcc/config/sparc/gmon-sol2.c
@@ -0,0 +1,420 @@
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. [rescinded 22 July 1999]
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Mangled into a form that works on SPARC Solaris 2 by Mark Eichin
+ * for Cygnus Support, July 1992.
+ */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include <fcntl.h> /* for creat() */
+
+#if 0
+#include "sparc/gmon.h"
+#else
+struct phdr {
+  char *lpc;
+  char *hpc;
+  int ncnt;
+};
+#define HISTFRACTION 2
+#define HISTCOUNTER unsigned short
+#define HASHFRACTION 1
+#define ARCDENSITY 2
+#define MINARCS 50
+struct tostruct {
+  char *selfpc;
+  long count;
+  unsigned short link;
+};
+struct rawarc {
+    unsigned long       raw_frompc;
+    unsigned long       raw_selfpc;
+    long                raw_count;
+};
+#define ROUNDDOWN(x,y)  (((x)/(y))*(y))
+#define ROUNDUP(x,y)    ((((x)+(y)-1)/(y))*(y))
+
+#endif
+
+/* extern mcount() asm ("mcount"); */
+/*extern*/ char *minbrk /* asm ("minbrk") */;
+
+    /*
+     *	froms is actually a bunch of unsigned shorts indexing tos
+     */
+static int		profiling = 3;
+static unsigned short	*froms;
+static struct tostruct	*tos = 0;
+static long		tolimit = 0;
+static char		*s_lowpc = 0;
+static char		*s_highpc = 0;
+static unsigned long	s_textsize = 0;
+
+static int	ssiz;
+static char	*sbuf;
+static int	s_scale;
+    /* see profil(2) where this is describe (incorrectly) */
+#define		SCALE_1_TO_1	0x10000L
+
+#define	MSG "No space for profiling buffer(s)\n"
+
+static void moncontrol (int);
+extern void monstartup (char *, char *);
+extern void _mcleanup (void);
+
+void monstartup(char *lowpc, char *highpc)
+{
+    int			monsize;
+    char		*buffer;
+    register int	o;
+
+	/*
+	 *	round lowpc and highpc to multiples of the density we're using
+	 *	so the rest of the scaling (here and in gprof) stays in ints.
+	 */
+    lowpc = (char *)
+	    ROUNDDOWN((unsigned long)lowpc, HISTFRACTION*sizeof(HISTCOUNTER));
+    s_lowpc = lowpc;
+    highpc = (char *)
+	    ROUNDUP((unsigned long)highpc, HISTFRACTION*sizeof(HISTCOUNTER));
+    s_highpc = highpc;
+    s_textsize = highpc - lowpc;
+    monsize = (s_textsize / HISTFRACTION) + sizeof(struct phdr);
+    buffer = sbrk( monsize );
+    if ( buffer == (char *) -1 ) {
+	write( 2 , MSG , sizeof(MSG) );
+	return;
+    }
+    froms = (unsigned short *) sbrk( s_textsize / HASHFRACTION );
+    if ( froms == (unsigned short *) -1 ) {
+	write( 2 , MSG , sizeof(MSG) );
+	froms = 0;
+	return;
+    }
+    tolimit = s_textsize * ARCDENSITY / 100;
+    if ( tolimit < MINARCS ) {
+	tolimit = MINARCS;
+    } else if ( tolimit > 65534 ) {
+	tolimit = 65534;
+    }
+    tos = (struct tostruct *) sbrk( tolimit * sizeof( struct tostruct ) );
+    if ( tos == (struct tostruct *) -1 ) {
+	write( 2 , MSG , sizeof(MSG) );
+	froms = 0;
+	tos = 0;
+	return;
+    }
+    minbrk = sbrk(0);
+    tos[0].link = 0;
+    sbuf = buffer;
+    ssiz = monsize;
+    ( (struct phdr *) buffer ) -> lpc = lowpc;
+    ( (struct phdr *) buffer ) -> hpc = highpc;
+    ( (struct phdr *) buffer ) -> ncnt = ssiz;
+    monsize -= sizeof(struct phdr);
+    if ( monsize <= 0 )
+	return;
+    o = highpc - lowpc;
+    if( monsize < o )
+#ifndef hp300
+	s_scale = ( (float) monsize / o ) * SCALE_1_TO_1;
+#else /* avoid floating point */
+    {
+	int quot = o / monsize;
+
+	if (quot >= 0x10000)
+		s_scale = 1;
+	else if (quot >= 0x100)
+		s_scale = 0x10000 / quot;
+	else if (o >= 0x800000)
+		s_scale = 0x1000000 / (o / (monsize >> 8));
+	else
+		s_scale = 0x1000000 / ((o << 8) / monsize);
+    }
+#endif
+    else
+	s_scale = SCALE_1_TO_1;
+    moncontrol(1);
+}
+
+void
+_mcleanup(void)
+{
+    int			fd;
+    int			fromindex;
+    int			endfrom;
+    char		*frompc;
+    int			toindex;
+    struct rawarc	rawarc;
+    char		*profdir;
+    const char		*proffile;
+    char		*progname;
+    char		 buf[PATH_MAX];
+    extern char	       **___Argv;
+
+    moncontrol(0);
+
+    if ((profdir = getenv("PROFDIR")) != NULL) {
+	/* If PROFDIR contains a null value, no profiling output is produced */
+	if (*profdir == '\0') {
+	    return;
+	}
+
+	progname=strrchr(___Argv[0], '/');
+	if (progname == NULL)
+	    progname=___Argv[0];
+	else
+	    progname++;
+
+	sprintf(buf, "%s/%ld.%s", profdir, (long) getpid(), progname);
+	proffile = buf;
+    } else {
+	proffile = "gmon.out";
+    }
+
+    fd = creat( proffile, 0666 );
+    if ( fd < 0 ) {
+	perror( proffile );
+	return;
+    }
+#   ifdef DEBUG
+	fprintf( stderr , "[mcleanup] sbuf 0x%x ssiz %d\n" , sbuf , ssiz );
+#   endif /* DEBUG */
+    write( fd , sbuf , ssiz );
+    endfrom = s_textsize / (HASHFRACTION * sizeof(*froms));
+    for ( fromindex = 0 ; fromindex < endfrom ; fromindex++ ) {
+	if ( froms[fromindex] == 0 ) {
+	    continue;
+	}
+	frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof(*froms));
+	for (toindex=froms[fromindex]; toindex!=0; toindex=tos[toindex].link) {
+#	    ifdef DEBUG
+		fprintf( stderr ,
+			"[mcleanup] frompc 0x%x selfpc 0x%x count %d\n" ,
+			frompc , tos[toindex].selfpc , tos[toindex].count );
+#	    endif /* DEBUG */
+	    rawarc.raw_frompc = (unsigned long) frompc;
+	    rawarc.raw_selfpc = (unsigned long) tos[toindex].selfpc;
+	    rawarc.raw_count = tos[toindex].count;
+	    write( fd , &rawarc , sizeof rawarc );
+	}
+    }
+    close( fd );
+}
+
+/*
+ * The SPARC stack frame is only held together by the frame pointers
+ * in the register windows. According to the SVR4 SPARC ABI
+ * Supplement, Low Level System Information/Operating System
+ * Interface/Software Trap Types, a type 3 trap will flush all of the
+ * register windows to the stack, which will make it possible to walk
+ * the frames and find the return addresses.
+ * 	However, it seems awfully expensive to incur a trap (system
+ * call) for every function call. It turns out that "call" simply puts
+ * the return address in %o7 expecting the "save" in the procedure to
+ * shift it into %i7; this means that before the "save" occurs, %o7
+ * contains the address of the call to mcount, and %i7 still contains
+ * the caller above that. The asm mcount here simply saves those
+ * registers in argument registers and branches to internal_mcount,
+ * simulating a call with arguments.
+ * 	Kludges:
+ * 	1) the branch to internal_mcount is hard coded; it should be
+ * possible to tell asm to use the assembler-name of a symbol.
+ * 	2) in theory, the function calling mcount could have saved %i7
+ * somewhere and reused the register; in practice, I *think* this will
+ * break longjmp (and maybe the debugger) but I'm not certain. (I take
+ * some comfort in the knowledge that it will break the native mcount
+ * as well.)
+ * 	3) if builtin_return_address worked, this could be portable.
+ * However, it would really have to be optimized for arguments of 0
+ * and 1 and do something like what we have here in order to avoid the
+ * trap per function call performance hit. 
+ * 	4) the atexit and monsetup calls prevent this from simply
+ * being a leaf routine that doesn't do a "save" (and would thus have
+ * access to %o7 and %i7 directly) but the call to write() at the end
+ * would have also prevented this.
+ *
+ * -- [eichin:19920702.1107EST]
+ */
+
+static void internal_mcount (char *, unsigned short *) __attribute__ ((used));
+
+/* i7 == last ret, -> frompcindex */
+/* o7 == current ret, -> selfpc */
+/* Solaris 2 libraries use _mcount.  */
+asm(".global _mcount; _mcount: mov %i7,%o1; mov %o7,%o0;b,a internal_mcount");
+/* This is for compatibility with old versions of gcc which used mcount.  */
+asm(".global mcount; mcount: mov %i7,%o1; mov %o7,%o0;b,a internal_mcount");
+
+static void internal_mcount(char *selfpc, unsigned short *frompcindex)
+{
+	register struct tostruct	*top;
+	register struct tostruct	*prevtop;
+	register long			toindex;
+	static char already_setup;
+
+	/*
+	 *	find the return address for mcount,
+	 *	and the return address for mcount's caller.
+	 */
+
+	if(!already_setup) {
+          extern char etext[];
+	  extern char _start[];
+	  extern char _init[];
+	  already_setup = 1;
+	  monstartup(_start < _init ? _start : _init, etext);
+#ifdef USE_ONEXIT
+	  on_exit(_mcleanup, 0);
+#else
+	  atexit(_mcleanup);
+#endif
+	}
+	/*
+	 *	check that we are profiling
+	 *	and that we aren't recursively invoked.
+	 */
+	if (profiling) {
+		goto out;
+	}
+	profiling++;
+	/*
+	 *	check that frompcindex is a reasonable pc value.
+	 *	for example:	signal catchers get called from the stack,
+	 *			not from text space.  too bad.
+	 */
+	frompcindex = (unsigned short *)((long)frompcindex - (long)s_lowpc);
+	if ((unsigned long)frompcindex > s_textsize) {
+		goto done;
+	}
+	frompcindex =
+	    &froms[((long)frompcindex) / (HASHFRACTION * sizeof(*froms))];
+	toindex = *frompcindex;
+	if (toindex == 0) {
+		/*
+		 *	first time traversing this arc
+		 */
+		toindex = ++tos[0].link;
+		if (toindex >= tolimit) {
+			goto overflow;
+		}
+		*frompcindex = toindex;
+		top = &tos[toindex];
+		top->selfpc = selfpc;
+		top->count = 1;
+		top->link = 0;
+		goto done;
+	}
+	top = &tos[toindex];
+	if (top->selfpc == selfpc) {
+		/*
+		 *	arc at front of chain; usual case.
+		 */
+		top->count++;
+		goto done;
+	}
+	/*
+	 *	have to go looking down chain for it.
+	 *	top points to what we are looking at,
+	 *	prevtop points to previous top.
+	 *	we know it is not at the head of the chain.
+	 */
+	for (; /* goto done */; ) {
+		if (top->link == 0) {
+			/*
+			 *	top is end of the chain and none of the chain
+			 *	had top->selfpc == selfpc.
+			 *	so we allocate a new tostruct
+			 *	and link it to the head of the chain.
+			 */
+			toindex = ++tos[0].link;
+			if (toindex >= tolimit) {
+				goto overflow;
+			}
+			top = &tos[toindex];
+			top->selfpc = selfpc;
+			top->count = 1;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+		/*
+		 *	otherwise, check the next arc on the chain.
+		 */
+		prevtop = top;
+		top = &tos[top->link];
+		if (top->selfpc == selfpc) {
+			/*
+			 *	there it is.
+			 *	increment its count
+			 *	move it to the head of the chain.
+			 */
+			top->count++;
+			toindex = prevtop->link;
+			prevtop->link = top->link;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+
+	}
+done:
+	profiling--;
+	/* and fall through */
+out:
+	return;		/* normal return restores saved registers */
+
+overflow:
+	profiling++; /* halt further profiling */
+#   define	TOLIMIT	"mcount: tos overflow\n"
+	write(2, TOLIMIT, sizeof(TOLIMIT));
+	goto out;
+}
+
+/*
+ * Control profiling
+ *	profiling is what mcount checks to see if
+ *	all the data structures are ready.
+ */
+static void moncontrol(int mode)
+{
+    if (mode) {
+	/* start */
+	profil((unsigned short *)(sbuf + sizeof(struct phdr)),
+	       ssiz - sizeof(struct phdr),
+	       (long)s_lowpc, s_scale);
+	profiling = 0;
+    } else {
+	/* stop */
+	profil((unsigned short *)0, 0, 0, 0);
+	profiling = 3;
+    }
+}
diff --git a/gcc/config/sparc/hypersparc.md b/gcc/config/sparc/hypersparc.md
new file mode 100644
index 000000000..0d35b15e3
--- /dev/null
+++ b/gcc/config/sparc/hypersparc.md
@@ -0,0 +1,82 @@
+;; Scheduling description for HyperSPARC.
+;;   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The HyperSPARC is a dual-issue processor.  It is not all that fancy.
+
+;; ??? There are some things not modelled.  For example, sethi+or
+;; ??? coming right after each other are specifically identified and
+;; ??? dual-issued by the processor.  Similarly for sethi+ld[reg+lo].
+;; ??? Actually, to be more precise that rule is sort of modelled now.
+
+(define_automaton "hypersparc_0,hypersparc_1")
+
+;; HyperSPARC/sparclite86x scheduling
+
+(define_cpu_unit "hs_memory,hs_branch,hs_shift,hs_fpalu" "hypersparc_0")
+(define_cpu_unit "hs_fpmds" "hypersparc_1")
+
+(define_insn_reservation "hs_load" 1
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "load,sload,fpload"))
+  "hs_memory")
+
+(define_insn_reservation "hs_store" 2
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "store,fpstore"))
+  "hs_memory, nothing")
+
+(define_insn_reservation "hs_slbranch" 1
+  (and (eq_attr "cpu" "sparclite86x")
+    (eq_attr "type" "branch"))
+  "hs_branch")
+
+(define_insn_reservation "hs_slshift" 1
+  (and (eq_attr "cpu" "sparclite86x")
+    (eq_attr "type" "shift"))
+  "hs_shift")
+
+(define_insn_reservation "hs_fp_alu" 1
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fp,fpmove,fpcmp"))
+  "hs_fpalu")
+
+(define_insn_reservation "hs_fp_mult" 1
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpmul"))
+  "hs_fpmds")
+
+(define_insn_reservation "hs_fp_divs" 8
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpdivs"))
+  "hs_fpmds*6, nothing*2")
+
+(define_insn_reservation "hs_fp_divd" 12
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpdivd"))
+  "hs_fpmds*10, nothing*2")
+
+(define_insn_reservation "hs_fp_sqrt" 17
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "hs_fpmds*15, nothing*2")
+
+(define_insn_reservation "hs_imul" 17
+  (and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
+    (eq_attr "type" "imul"))
+  "hs_fpmds*15, nothing*2")
diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm
new file mode 100644
index 000000000..b60bd5740
--- /dev/null
+++ b/gcc/config/sparc/lb1spc.asm
@@ -0,0 +1,784 @@
+/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
+   for the sparc processor.
+
+   These routines are derived from the SPARC Architecture Manual, version 8,
+   slightly edited to match the desired calling convention, and also to
+   optimize them for our purposes.  */
+
+#ifdef L_mulsi3
+.text
+	.align 4
+	.global .umul
+	.proc 4
+.umul:
+	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
+	mov	%o0, %y		! multiplier to Y register
+	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
+	be	mul_shortway	! can do it the short way
+	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
+	!
+	! long multiply
+	!
+	mulscc	%o4, %o1, %o4	! first iteration of 33
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4	! 32nd iteration
+	mulscc	%o4, %g0, %o4	! last iteration only shifts
+	! the upper 32 bits of product are wrong, but we do not care
+	retl
+	rd	%y, %o0
+	!
+	! short multiply
+	!
+mul_shortway:
+	mulscc	%o4, %o1, %o4	! first iteration of 13
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4
+	mulscc	%o4, %o1, %o4	! 12th iteration
+	mulscc	%o4, %g0, %o4	! last iteration only shifts
+	rd	%y, %o5
+	sll	%o4, 12, %o4	! left shift partial product by 12 bits
+	srl	%o5, 20, %o5	! right shift partial product by 20 bits
+	retl
+	or	%o5, %o4, %o0	! merge for true product
+#endif
+
+#ifdef L_divsi3
+/*
+ * Division and remainder, from Appendix E of the SPARC Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .div	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .global .udiv
+        .align 4
+        .proc 4
+        .text
+.udiv:
+         b ready_to_divide
+         mov 0, %g3             ! result is always positive
+
+        .global .div
+        .align 4
+        .proc 4
+        .text
+.div:
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	ready_to_divide	! no, go do the divide
+	xor	%o1, %o0, %g3	! compute sign in any case
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	ready_to_divide	! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+
+
+ready_to_divide:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+	! Divide by zero trap.  If it returns, return 0 (about as
+	! wrong as possible, but that is what SunOS does...).
+	ta	0x2    		! ST_DIV0
+	retl
+	clr	%o0
+
+1:
+	cmp	%o3, %o5		! if %o1 exceeds %o0, done
+	blu	got_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	not_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	not_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5	! rest of %o5
+		add	%o5, %g1, %o5
+		b	do_single_div
+		sub	%g2, 1, %g2
+
+	not_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	do_single_div
+		nop
+	/* NB: these are commented out in the V8-SPARC manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	do_single_div:
+		subcc	%g2, 1, %g2
+		bl	end_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	end_single_divloop
+		nop
+	single_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	end_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	single_divloop
+		tst	%o3
+		b,a	end_regular_divide
+
+not_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	got_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+divloop:
+	sll	%o2, 4, %o2
+	! depth 1, accumulated bits 0
+	bl	L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 2, accumulated bits 1
+	bl	L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 3
+	bl	L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 7
+	bl	L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2+1), %o2
+	
+L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2-1), %o2
+	
+	
+L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 5
+	bl	L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2+1), %o2
+	
+L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2-1), %o2
+	
+L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 1
+	bl	L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 3
+	bl	L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2+1), %o2
+	
+L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2-1), %o2
+
+L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 1
+	bl	L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2+1), %o2
+
+L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2-1), %o2
+	
+L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 2, accumulated bits -1
+	bl	L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -1
+	bl	L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -1
+	bl	L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2+1), %o2
+	
+L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2-1), %o2
+	
+L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -3
+	bl	L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2+1), %o2
+	
+L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2-1), %o2
+	
+L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -3
+	bl	L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -5
+	bl	L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2+1), %o2
+	
+L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2-1), %o2
+	
+L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -7
+	bl	L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2+1), %o2
+
+L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2-1), %o2
+	
+	9:
+end_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	divloop
+	tst	%o3
+	bl,a	got_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+got_result:
+	! check to see if answer should be < 0
+	tst	%g3
+	bl,a	1f
+	sub %g0, %o2, %o2
+1:
+	retl
+	mov %o2, %o0
+#endif
+
+#ifdef L_modsi3
+/* This implementation was taken from glibc:
+ *
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+.text
+	.align 4
+	.global	.urem
+	.proc 4
+.urem:
+	b	divide
+	mov	0, %g3		! result always positive
+
+        .align 4
+	.global .rem
+	.proc 4
+.rem:
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	mov	%o0, %g3		! sign of remainder matches %o0
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+divide:
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	0x2   !ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5		! if %o1 exceeds %o0, done
+	blu	got_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	not_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	not_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	do_single_div
+		sub	%g2, 1, %g2
+
+	not_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	do_single_div
+		nop
+	/* NB: these are commented out in the V8-SPARC manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	do_single_div:
+		subcc	%g2, 1, %g2
+		bl	end_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	end_single_divloop
+		nop
+	single_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	end_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	single_divloop
+		tst	%o3
+		b,a	end_regular_divide
+
+not_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	got_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+divloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 2, accumulated bits 1
+	bl	L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 3
+	bl	L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 7
+	bl	L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2+1), %o2
+L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (7*2-1), %o2
+	
+L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 5
+	bl	L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2+1), %o2
+	
+L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (5*2-1), %o2
+	
+L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits 1
+	bl	L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 3
+	bl	L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2+1), %o2
+	
+L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (3*2-1), %o2
+	
+L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits 1
+	bl	L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2+1), %o2
+	
+L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (1*2-1), %o2
+	
+L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 2, accumulated bits -1
+	bl	L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -1
+	bl	L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -1
+	bl	L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2+1), %o2
+	
+L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-1*2-1), %o2
+	
+L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -3
+	bl	L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2+1), %o2
+	
+L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-3*2-1), %o2
+	
+L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 3, accumulated bits -3
+	bl	L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -5
+	bl	L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2+1), %o2
+	
+L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-5*2-1), %o2
+	
+L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	! depth 4, accumulated bits -7
+	bl	L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2+1), %o2
+	
+L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+	b	9f
+	add	%o2, (-7*2-1), %o2
+	
+	9:
+end_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	divloop
+	tst	%o3
+	bl,a	got_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+got_result:
+	! check to see if answer should be < 0
+	tst	%g3
+	bl,a	1f
+	sub %g0, %o3, %o3
+1:
+	retl
+	mov %o3, %o0
+
+#endif
+
diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm
new file mode 100644
index 000000000..973401f80
--- /dev/null
+++ b/gcc/config/sparc/lb1spl.asm
@@ -0,0 +1,246 @@
+/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
+   for the sparclite processor.
+
+   These routines are all from the SPARClite User's Guide, slightly edited
+   to match the desired calling convention, and also to optimize them.  */
+
+#ifdef L_udivsi3
+.text
+	.align 4
+	.global .udiv
+	.proc	04
+.udiv:
+	wr	%g0,%g0,%y	! Not a delayed write for sparclite
+	tst	%g0
+	divscc	%o0,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	retl
+	divscc	%g1,%o1,%o0
+#endif
+
+#ifdef L_umodsi3
+.text
+	.align 4
+	.global .urem
+	.proc	04
+.urem:
+	wr	%g0,%g0,%y	! Not a delayed write for sparclite
+	tst	%g0
+	divscc	%o0,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	divscc	%g1,%o1,%g1
+	bl 1f
+	rd	%y,%o0
+	retl
+	nop
+1:	retl
+	add	%o0,%o1,%o0
+#endif
+
+#ifdef L_divsi3
+.text
+	.align 4
+	.global .div
+	.proc	04
+! ??? This routine could be made faster if was optimized, and if it was
+! rewritten to only calculate the quotient.
+.div:
+	wr	%g0,%g0,%y	! Not a delayed write for sparclite
+	mov	%o1,%o4
+	tst	%o1
+	bl,a	1f
+	sub	%g0,%o4,%o4
+1:	tst	%o0
+	bl,a	2f
+	mov	-1,%y
+2:	divscc	%o0,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	be	6f
+	mov	%y,%o3
+	bg	4f
+	addcc	%o3,%o4,%g0
+	be,a	6f
+	mov	%g0,%o3
+	tst	%o0
+	bl	5f
+	tst	%g1
+	ba	5f
+	add	%o3,%o4,%o3
+4:	subcc	%o3,%o4,%g0
+	be,a	6f
+	mov	%g0,%o3
+	tst	%o0
+	bge	5f
+	tst	%g1
+	sub	%o3,%o4,%o3
+5:	bl,a	6f
+	add	%g1,1,%g1
+6:	tst	%o1
+	bl,a	7f
+	sub	%g0,%g1,%g1
+7:	retl
+	mov	%g1,%o0		! Quotient is in %g1.
+#endif
+
+#ifdef L_modsi3
+.text
+	.align 4
+	.global .rem
+	.proc	04
+! ??? This routine could be made faster if was optimized, and if it was
+! rewritten to only calculate the remainder.
+.rem:
+	wr	%g0,%g0,%y	! Not a delayed write for sparclite
+	mov	%o1,%o4
+	tst	%o1
+	bl,a	1f
+	sub	%g0,%o4,%o4
+1:	tst	%o0
+	bl,a	2f
+	mov	-1,%y
+2:	divscc	%o0,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	divscc	%g1,%o4,%g1
+	be	6f
+	mov	%y,%o3
+	bg	4f
+	addcc	%o3,%o4,%g0
+	be,a	6f
+	mov	%g0,%o3
+	tst	%o0
+	bl	5f
+	tst	%g1
+	ba	5f
+	add	%o3,%o4,%o3
+4:	subcc	%o3,%o4,%g0
+	be,a	6f
+	mov	%g0,%o3
+	tst	%o0
+	bge	5f
+	tst	%g1
+	sub	%o3,%o4,%o3
+5:	bl,a	6f
+	add	%g1,1,%g1
+6:	tst	%o1
+	bl,a	7f
+	sub	%g0,%g1,%g1
+7:	retl
+	mov	%o3,%o0		! Remainder is in %o3.
+#endif
diff --git a/gcc/config/sparc/leon.md b/gcc/config/sparc/leon.md
new file mode 100644
index 000000000..bc77c6ab9
--- /dev/null
+++ b/gcc/config/sparc/leon.md
@@ -0,0 +1,56 @@
+;; Scheduling description for LEON.
+;;   Copyright (C) 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_automaton "leon")
+
+(define_cpu_unit "leon_memory, leon_fpalu" "leon")
+(define_cpu_unit "leon_fpmds" "leon")
+(define_cpu_unit "write_buf" "leon")
+
+(define_insn_reservation "leon_load" 1
+  (and (eq_attr "cpu" "leon")
+    (eq_attr "type" "load,sload,fpload"))
+  "leon_memory")
+
+(define_insn_reservation "leon_store" 1
+  (and (eq_attr "cpu" "leon")
+    (eq_attr "type" "store,fpstore"))
+  "leon_memory+write_buf")
+  
+(define_insn_reservation "leon_fp_alu" 1
+  (and (eq_attr "cpu" "leon")
+    (eq_attr "type" "fp,fpmove"))
+  "leon_fpalu, nothing")
+
+(define_insn_reservation "leon_fp_mult" 1
+  (and (eq_attr "cpu" "leon")
+    (eq_attr "type" "fpmul"))
+  "leon_fpmds, nothing")
+
+(define_insn_reservation "leon_fp_div" 16
+  (and (eq_attr "cpu" "leon")
+    (eq_attr "type" "fpdivs,fpdivd"))
+  "leon_fpmds, nothing*15")
+
+(define_insn_reservation "leon_fp_sqrt" 23
+  (and (eq_attr "cpu" "leon")
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "leon_fpmds, nothing*21")
+
diff --git a/gcc/config/sparc/libgcc-sparc-glibc.ver b/gcc/config/sparc/libgcc-sparc-glibc.ver
new file mode 100644
index 000000000..91138d379
--- /dev/null
+++ b/gcc/config/sparc/libgcc-sparc-glibc.ver
@@ -0,0 +1,93 @@
+# Copyright (C) 2002, 2006, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+%exclude {
+  __divdi3
+  __moddi3
+  __udivdi3
+  __umoddi3
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%ifdef __arch64__
+%define GLIBC_VER GLIBC_2.2
+%else
+%define GLIBC_VER GLIBC_2.0
+%endif
+%inherit GCC_3.0 GLIBC_VER
+GLIBC_VER {
+  # Sampling of DImode arithmetic used by (at least) i386 and m68k.
+  __divdi3
+  __moddi3
+  __udivdi3
+  __umoddi3
+
+  # Exception handling support functions used by most everyone.
+  __register_frame
+  __register_frame_table
+  __deregister_frame
+  __register_frame_info
+  __deregister_frame_info
+  __frame_state_for
+  __register_frame_info_table
+}
+
+%if !defined (__arch64__) && defined (__LONG_DOUBLE_128__)
+
+# long double 128 bit support from 32-bit libgcc_s.so.1 is only available
+# when configured with --with-long-double-128.  Make sure all the
+# symbols are available at @@GCC_LDBL_* versions to make it clear
+# there is a configurable symbol set.
+
+%exclude {
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+%inherit GCC_LDBL_3.0 GCC_3.0
+GCC_LDBL_3.0 {
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+}
+
+%inherit GCC_LDBL_4.0.0 GCC_4.0.0
+GCC_LDBL_4.0.0 {
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+%endif
diff --git a/gcc/config/sparc/linux-unwind.h b/gcc/config/sparc/linux-unwind.h
new file mode 100644
index 000000000..adfef6ec2
--- /dev/null
+++ b/gcc/config/sparc/linux-unwind.h
@@ -0,0 +1,202 @@
+/* DWARF2 EH unwinding support for SPARC Linux.
+   Copyright 2004, 2005, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#if defined(__arch64__)
+
+/* 64-bit SPARC version */
+#define MD_FALLBACK_FRAME_STATE_FOR sparc64_fallback_frame_state
+
+static _Unwind_Reason_Code
+sparc64_fallback_frame_state (struct _Unwind_Context *context,
+			      _Unwind_FrameState *fs)
+{
+  unsigned int *pc = context->ra;
+  long this_cfa = (long) context->cfa;
+  long new_cfa, ra_location, shifted_ra_location;
+  long regs_off, fpu_save_off;
+  long fpu_save;
+  int i;
+
+  if (pc[0] != 0x82102065	/* mov NR_rt_sigreturn, %g1 */
+      || pc[1] != 0x91d0206d)	/* ta 0x6d */
+    return _URC_END_OF_STACK;
+
+  regs_off = 192 + 128;
+  fpu_save_off = regs_off + (16 * 8) + (3 * 8) + (2 * 4);
+
+  new_cfa = *(long *)(this_cfa + regs_off + (14 * 8));
+  new_cfa += 2047; /* Stack bias */
+  fpu_save = *(long *)(this_cfa + fpu_save_off);
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = new_cfa - this_cfa;
+
+  for (i = 1; i < 16; i++)
+    {
+      /* We never restore %sp as everything is purely CFA-based.  */
+      if ((unsigned int) i == __builtin_dwarf_sp_column ())
+	continue;
+
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= this_cfa + regs_off + (i * 8) - new_cfa;
+    }
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i + 16].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + 16].loc.offset
+	= this_cfa + (i * 8) - new_cfa;
+    }
+  if (fpu_save)
+    {
+      for (i = 0; i < 64; i++)
+	{
+	  if (i > 32 && (i & 0x1))
+	    continue;
+	  fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i + 32].loc.offset
+	    = fpu_save + (i * 4) - new_cfa;
+	}
+    }
+
+  /* State the rules to find the kernel's code "return address", which is
+     the address of the active instruction when the signal was caught.
+     On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we
+     need to preventively subtract it from the purported return address.  */
+  ra_location = this_cfa + regs_off + 17 * 8;
+  shifted_ra_location = this_cfa + regs_off + 19 * 8; /* Y register */
+  *(long *)shifted_ra_location = *(long *)ra_location - 8;
+  fs->retaddr_column = 0;
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = shifted_ra_location - new_cfa;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT sparc64_frob_update_context
+
+static void
+sparc64_frob_update_context (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  /* The column of %sp contains the old CFA, not the old value of %sp.
+     The CFA offset already comprises the stack bias so, when %sp is the
+     CFA register, we must avoid counting the stack bias twice.  Do not
+     do that for signal frames as the offset is artificial for them.  */
+  if (fs->regs.cfa_reg == __builtin_dwarf_sp_column ()
+      && fs->regs.cfa_how == CFA_REG_OFFSET
+      && fs->regs.cfa_offset != 0
+      && !fs->signal_frame)
+    context->cfa -= 2047;
+}
+
+#else
+
+/* 32-bit SPARC version */
+#define MD_FALLBACK_FRAME_STATE_FOR sparc_fallback_frame_state
+
+static _Unwind_Reason_Code
+sparc_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  unsigned int *pc = context->ra;
+  int this_cfa = (int) context->cfa;
+  int new_cfa, ra_location, shifted_ra_location;
+  int regs_off, fpu_save_off;
+  int fpu_save;
+  int old_style, i;
+
+  if (pc[1] != 0x91d02010)	/* ta 0x10 */
+    return _URC_END_OF_STACK;
+
+  if (pc[0] == 0x821020d8)	/* mov NR_sigreturn, %g1 */
+    old_style = 1;
+  else if (pc[0] == 0x82102065)	/* mov NR_rt_sigreturn, %g1 */
+    old_style = 0;
+  else
+    return _URC_END_OF_STACK;
+
+  if (old_style)
+    {
+      regs_off = 96;
+      fpu_save_off = regs_off + (4 * 4) + (16 * 4);
+    }
+  else
+    {
+      regs_off = 96 + 128;
+      fpu_save_off = regs_off + (4 * 4) + (16 * 4) + (2 * 4);
+    }
+
+  new_cfa = *(int *)(this_cfa + regs_off + (4 * 4) + (14 * 4));
+  fpu_save = *(int *)(this_cfa + fpu_save_off);
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = new_cfa - this_cfa;
+
+  for (i = 1; i < 16; i++)
+    {
+      /* We never restore %sp as everything is purely CFA-based.  */
+      if ((unsigned int) i == __builtin_dwarf_sp_column ())
+	continue;
+
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset
+	= this_cfa + regs_off + (4 * 4) + (i * 4) - new_cfa;
+    }
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i + 16].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + 16].loc.offset
+	= this_cfa + (i * 4) - new_cfa;
+    }
+  if (fpu_save)
+    {
+      for (i = 0; i < 32; i++)
+	{
+	  fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i + 32].loc.offset
+	    = fpu_save + (i * 4) - new_cfa;
+	}
+    }
+
+  /* State the rules to find the kernel's code "return address", which is
+     the address of the active instruction when the signal was caught.
+     On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we
+     need to preventively subtract it from the purported return address.  */
+  ra_location = this_cfa + regs_off + 4;
+  shifted_ra_location = this_cfa + regs_off + 3 * 4; /* Y register */
+  *(int *)shifted_ra_location = *(int *)ra_location - 8;
+  fs->retaddr_column = 0;
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = shifted_ra_location - new_cfa;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#endif
diff --git a/gcc/config/sparc/linux.h b/gcc/config/sparc/linux.h
new file mode 100644
index 000000000..acdbcb928
--- /dev/null
+++ b/gcc/config/sparc/linux.h
@@ -0,0 +1,168 @@
+/* Definitions for SPARC running Linux-based GNU systems with ELF.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
+   2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by Eddie C. Dost (ecd@skynet.be)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      LINUX_TARGET_OS_CPP_BUILTINS();		\
+      if (TARGET_LONG_DOUBLE_128)       	\
+	builtin_define ("__LONG_DOUBLE_128__");	\
+    }						\
+  while (0)
+
+/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
+   the GNU/Linux magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU/Linux "finalizer" file, `crtn.o'.  */
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
+   %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#undef	CC1_SPEC
+#define	CC1_SPEC "%{profile:-p} \
+"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc GNU/Linux with ELF)");
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+ 
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+  
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC \
+"%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Provide a LINK_SPEC appropriate for GNU/Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef  LINK_SPEC
+#define LINK_SPEC "-m elf32_sparc -Y P,/usr/lib %{shared:-shared} \
+  %{!mno-relax:%{!r:-relax}} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+      %{static:-static}}"
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s \
+%{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{!.c:%{findirect-dispatch:-K PIC}} \
+%(asm_cpu) %(asm_relax)"
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  fputs ("\t.local\t", (FILE));		\
+  assemble_name ((FILE), (NAME));					\
+  putc ('\n', (FILE));							\
+  ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP "\t.common\t"
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM))
+
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#ifdef __LONG_DOUBLE_128__
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+#undef DITF_CONVERSION_LIBFUNCS
+#define DITF_CONVERSION_LIBFUNCS 1
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#define MD_UNWIND_SUPPORT "config/sparc/linux-unwind.h"
+
+/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
+   TMO, and TMO in multiprocessor mode.  But they reserve the right to
+   change their minds.  */
+#undef SPARC_RELAXED_ORDERING
+#define SPARC_RELAXED_ORDERING true
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* sparc glibc provides __stack_chk_guard in [%g7 + 0x14].  */
+#define TARGET_THREAD_SSP_OFFSET	0x14
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* We use glibc _mcount for profiling.  */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h
new file mode 100644
index 000000000..38863588a
--- /dev/null
+++ b/gcc/config/sparc/linux64.h
@@ -0,0 +1,289 @@
+/* Definitions for 64-bit SPARC running Linux-based GNU systems with ELF.
+   Copyright 1996, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+   2009, 2010, 2011 Free Software Foundation, Inc.
+   Contributed by David S. Miller (davem@caip.rutgers.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      LINUX_TARGET_OS_CPP_BUILTINS();		\
+      if (TARGET_ARCH64)			\
+        builtin_define ("_LONGLONG");		\
+      if (TARGET_ARCH32				\
+          && TARGET_LONG_DOUBLE_128)		\
+	builtin_define ("__LONG_DOUBLE_128__");	\
+    }						\
+  while (0)
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+    || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+    || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \
+    || TARGET_CPU_DEFAULT == TARGET_CPU_niagara \
+    || TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+/* A 64 bit v9 compiler with stack-bias,
+   in a Medium/Low code model environment.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \
+   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+#endif
+
+/* This must be v9a not just v9 because by default we enable
+   -mvis.  */
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
+
+/* Provide a ENDFILE_SPEC appropriate for GNU/Linux.  Here we tack on
+   the GNU/Linux magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU/Linux "finalizer" file, `crtn.o'.  */
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s\
+   %{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc64 GNU/Linux with ELF)");
+
+/* The default code model.  */
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDLOW
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Define this to set long double type size to use in libgcc2.c, which can
+   not depend on target_flags.  */
+#if defined(__arch64__) || defined(__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "\
+%{posix:-D_POSIX_SOURCE} \
+%{pthread:-D_REENTRANT} \
+"
+
+/* Provide a LINK_SPEC appropriate for GNU/Linux.  Here we provide support
+   for the special GCC options -static and -shared, which allow us to
+   link things in one of these three modes by applying the appropriate
+   combinations of options at link-time.
+
+   When the -shared link option is used a final link is not being
+   done.  */
+
+#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux.so.2"
+
+#ifdef SPARC_BI_ARCH
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_arch32",       LINK_ARCH32_SPEC },              \
+  { "link_arch64",       LINK_ARCH64_SPEC },              \
+  { "link_arch_default", LINK_ARCH_DEFAULT_SPEC },	  \
+  { "link_arch",	 LINK_ARCH_SPEC },
+
+#define LINK_ARCH32_SPEC "-m elf32_sparc -Y P,%R/usr/lib %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER32 "} \
+      %{static:-static}} \
+"
+
+#define LINK_ARCH64_SPEC "-m elf64_sparc -Y P,%R/usr/lib64 %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER64 "} \
+      %{static:-static}} \
+"
+
+#define LINK_ARCH_SPEC "\
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
+#undef  LINK_SPEC
+#define LINK_SPEC "\
+%(link_arch) \
+%{mlittle-endian:-EL} \
+%{!mno-relax:%{!r:-relax}} \
+"
+
+#undef	CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC "%{profile:-p} \
+%{m32:%{m64:%emay not use both -m32 and -m64}} \
+%{m64:-mptr64 -mstack-bias -mlong-double-128 \
+  %{!mcpu*:-mcpu=ultrasparc} \
+  %{!mno-vis:%{!mcpu=v9:-mvis}}} \
+"
+#else
+#define CC1_SPEC "%{profile:-p} \
+%{m32:%{m64:%emay not use both -m32 and -m64}} \
+%{m32:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
+  %{!mcpu*:-mcpu=cypress}} \
+%{!m32:%{!mcpu*:-mcpu=ultrasparc}} \
+%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}} \
+"
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.
+   --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu
+     are specified.
+   In the SPARC_BI_ARCH compiler we cannot pass %{!mcpu=*:-mcpu=%(VALUE)}
+   here, otherwise say -mcpu=v7 would be passed even when -m64.
+   CC1_SPEC above takes care of this instead.  */
+#undef OPTION_DEFAULT_SPECS
+#if DEFAULT_ARCH32_P
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m64:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#else
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m32:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+#else /* !SPARC_BI_ARCH */
+
+#undef LINK_SPEC
+#define LINK_SPEC "-m elf64_sparc -Y P,%R/usr/lib64 %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER64 "} \
+    %{static:-static}} \
+%{mlittle-endian:-EL} \
+%{!mno-relax:%{!r:-relax}} \
+"
+
+#endif /* !SPARC_BI_ARCH */
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s \
+%{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{!.c:%{findirect-dispatch:-K PIC}} \
+%{mlittle-endian:-EL} \
+%(asm_cpu) %(asm_arch) %(asm_relax)"
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
+do {									\
+  fputs ("\t.local\t", (FILE));		\
+  assemble_name ((FILE), (NAME));					\
+  putc ('\n', (FILE));							\
+  ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN);			\
+} while (0)
+
+#undef COMMON_ASM_OP
+#define COMMON_ASM_OP "\t.common\t"
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM))
+
+/* DWARF bits.  */
+
+/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. 
+   Obviously the Dwarf2 folks haven't tried to actually build systems
+   with their spec.  On a 64-bit system, only 64-bit relocs become
+   RELATIVE relocations.  */
+
+/* #define DWARF_OFFSET_SIZE PTR_SIZE */
+
+#undef DITF_CONVERSION_LIBFUNCS
+#define DITF_CONVERSION_LIBFUNCS 1
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#define MD_UNWIND_SUPPORT "config/sparc/linux-unwind.h"
+
+/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
+   TMO, and TMO in multiprocessor mode.  But they reserve the right to
+   change their minds.  */
+#undef SPARC_RELAXED_ORDERING
+#define SPARC_RELAXED_ORDERING true
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* sparc glibc provides __stack_chk_guard in [%g7 + 0x14],
+   sparc64 glibc provides it at [%g7 + 0x28].  */
+#define TARGET_THREAD_SSP_OFFSET	(TARGET_ARCH64 ? 0x28 : 0x14)
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* We use glibc _mcount for profiling.  */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS	1
diff --git a/gcc/config/sparc/little-endian.opt b/gcc/config/sparc/little-endian.opt
new file mode 100644
index 000000000..52db029c0
--- /dev/null
+++ b/gcc/config/sparc/little-endian.opt
@@ -0,0 +1,27 @@
+; Options for the SPARC port of the compiler
+;
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mlittle-endian
+Target Report RejectNegative Mask(LITTLE_ENDIAN) MaskExists
+Generate code for little-endian
+
+mbig-endian
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Generate code for big-endian
diff --git a/gcc/config/sparc/long-double-switch.opt b/gcc/config/sparc/long-double-switch.opt
new file mode 100644
index 000000000..eb3c1a00f
--- /dev/null
+++ b/gcc/config/sparc/long-double-switch.opt
@@ -0,0 +1,27 @@
+; Options for the SPARC port of the compiler
+;
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mlong-double-128
+Target Report RejectNegative Mask(LONG_DOUBLE_128) MaskExists
+Use 128-bit long double
+
+mlong-double-64
+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double
diff --git a/gcc/config/sparc/netbsd-elf.h b/gcc/config/sparc/netbsd-elf.h
new file mode 100644
index 000000000..ed9cabe1e
--- /dev/null
+++ b/gcc/config/sparc/netbsd-elf.h
@@ -0,0 +1,246 @@
+/* Definitions of target machine for GCC, for ELF on NetBSD/sparc
+   and NetBSD/sparc64.
+   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Matthew Green (mrg@eterna.com.au).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      NETBSD_OS_CPP_BUILTINS_ELF();			\
+      if (TARGET_ARCH64)				\
+	{						\
+	  builtin_define ("__sparc64__");		\
+	  builtin_define ("__sparc_v9__");		\
+	  builtin_define ("__sparcv9");			\
+	}						\
+      else						\
+	builtin_define ("__sparc");			\
+      builtin_define ("__sparc__");			\
+    }							\
+  while (0)
+
+/* CPP defines used by all NetBSD targets.  */
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "%(netbsd_cpp_spec)"
+
+/* SIZE_TYPE and PTRDIFF_TYPE are wrong from sparc/sparc.h.  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+#undef DBX_CONTIN_CHAR
+#define DBX_CONTIN_CHAR '?'
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM))
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{mlittle-endian:-EL} \
+%(asm_cpu) %(asm_arch) %(asm_relax)"
+
+#undef STDC_0_IN_SYSTEM_HEADERS
+
+/* Attempt to enable execute permissions on the stack.  */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (%s)", TARGET_NAME);
+
+/* Below here exists the merged NetBSD/sparc & NetBSD/sparc64 compiler
+   description, allowing one to build 32-bit or 64-bit applications
+   on either.  We define the sparc & sparc64 versions of things,
+   occasionally a neutral version (should be the same as "netbsd-elf.h")
+   and then based on SPARC_BI_ARCH, DEFAULT_ARCH32_P, and TARGET_CPU_DEFAULT,
+   we choose the correct version.  */
+
+/* We use the default NetBSD ELF STARTFILE_SPEC and ENDFILE_SPEC
+   definitions, even for the SPARC_BI_ARCH compiler, because NetBSD does
+   not have a default place to find these libraries..  */
+
+/* Name the port(s).  */
+#define TARGET_NAME64     "NetBSD/sparc64 ELF"
+#define TARGET_NAME32     "NetBSD/sparc ELF"
+
+/* TARGET_CPU_DEFAULT is set in Makefile.in.  We test for 64-bit default
+   platform here.  */
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+/* A 64 bit v9 compiler with stack-bias,
+   in a Medium/Low code model environment.  */
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_PTR64 + MASK_64BIT /* + MASK_HARD_QUAD */ \
+   + MASK_STACK_BIAS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDANY
+
+#endif
+
+/* CC1_SPEC for NetBSD/sparc.  */
+#define CC1_SPEC32 \
+ "%{m32:%{m64:%emay not use both -m32 and -m64}} \
+  %{m64: \
+    -mptr64 -mstack-bias -mno-v8plus -mlong-double-128 \
+    %{!mcpu*:%{!mv8plus:-mcpu=ultrasparc}} \
+    %{!mno-vis:%{!mcpu=v9:-mvis}} \
+    %{p:-mcmodel=medlow} \
+    %{pg:-mcmodel=medlow}}"
+
+#define CC1_SPEC64 \
+ "%{m32:%{m64:%emay not use both -m32 and -m64}} \
+  %{m32: \
+    -mptr32 -mno-stack-bias \
+    %{!mlong-double-128:-mlong-double-64} \
+    %{!mcpu*:%{!mv8plus:-mcpu=cypress}}} \
+  %{!m32: \
+    %{p:-mcmodel=medlow} \
+    %{pg:-mcmodel=medlow}}"
+
+/* Make sure we use the right output format.  Pick a default and then
+   make sure -m32/-m64 switch to the right one.  */
+
+#define LINK_ARCH32_SPEC "-m elf32_sparc"
+
+#define LINK_ARCH64_SPEC "-m elf64_sparc"
+
+#define LINK_ARCH_SPEC \
+ "%{m32:%(link_arch32)} \
+  %{m64:%(link_arch64)} \
+  %{!m32:%{!m64:%(link_arch_default)}}"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%(link_arch) \
+  %{!mno-relax:%{!r:-relax}} \
+  %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#if DEFAULT_ARCH32_P
+#define LINK_ARCH_DEFAULT_SPEC LINK_ARCH32_SPEC
+#else
+#define LINK_ARCH_DEFAULT_SPEC LINK_ARCH64_SPEC
+#endif
+
+/* What extra spec entries do we need?  */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "link_arch32",		LINK_ARCH32_SPEC }, \
+  { "link_arch64",		LINK_ARCH64_SPEC }, \
+  { "link_arch_default",	LINK_ARCH_DEFAULT_SPEC }, \
+  { "link_arch",		LINK_ARCH_SPEC }, \
+  { "netbsd_cpp_spec",		NETBSD_CPP_SPEC }, \
+  { "netbsd_link_spec",		NETBSD_LINK_SPEC_ELF }, \
+  { "netbsd_entry_point",	NETBSD_ENTRY_POINT },
+
+
+/* Build a compiler that supports -m32 and -m64?  */
+
+#ifdef SPARC_BI_ARCH
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+#if defined(__arch64__) || defined(__LONG_DOUBLE_128__)
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+#else
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
+
+#undef  CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC CC1_SPEC32
+#else
+#define CC1_SPEC CC1_SPEC64
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
+
+/* Name the port.  */
+#undef TARGET_NAME
+#define TARGET_NAME     (DEFAULT_ARCH32_P ? TARGET_NAME32 : TARGET_NAME64)
+
+#else	/* SPARC_BI_ARCH */
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+
+#undef  CC1_SPEC
+#define CC1_SPEC CC1_SPEC64
+
+#undef TARGET_NAME
+#define TARGET_NAME     TARGET_NAME64
+
+#else	/* TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+	|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc */
+
+/* A 32-bit only compiler.  NetBSD don't support 128 bit `long double'
+   for 32-bit code, unlike Solaris.  */
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+
+#undef  CC1_SPEC
+#define CC1_SPEC CC1_SPEC32
+
+#undef TARGET_NAME
+#define TARGET_NAME     TARGET_NAME32
+
+#endif	/* TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+	|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc */
+
+#endif	/* SPARC_BI_ARCH */
+
+/* We use GNU ld so undefine this so that attribute((init_priority)) works.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md
new file mode 100644
index 000000000..e73c65b80
--- /dev/null
+++ b/gcc/config/sparc/niagara.md
@@ -0,0 +1,118 @@
+;; Scheduling description for Niagara.
+;;   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Niagara is a single-issue processor.
+
+(define_automaton "niagara_0")
+
+(define_cpu_unit "niag_pipe" "niagara_0")
+
+(define_insn_reservation "niag_5cycle" 5
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "multi,flushw,iflush,trap"))
+  "niag_pipe*5")
+
+(define_insn_reservation "niag_4cycle" 4
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "savew"))
+  "niag_pipe*4")
+
+/* Most basic operations are single-cycle. */
+(define_insn_reservation "niag_ialu" 1
+ (and (eq_attr "cpu" "niagara")
+   (eq_attr "type" "ialu,shift,compare,cmove"))
+ "niag_pipe")
+
+(define_insn_reservation "niag_imul" 11
+ (and (eq_attr "cpu" "niagara")
+   (eq_attr "type" "imul"))
+ "niag_pipe*11")
+
+(define_insn_reservation "niag_idiv" 72
+ (and (eq_attr "cpu" "niagara")
+   (eq_attr "type" "idiv"))
+ "niag_pipe*72")
+
+(define_insn_reservation "niag_branch" 3
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch"))
+  "niag_pipe*3")
+
+(define_insn_reservation "niag_3cycle_load" 3
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "load"))
+  "niag_pipe*3")
+
+(define_insn_reservation "niag_9cycle_load" 9
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpload"))
+  "niag_pipe*9")
+
+(define_insn_reservation "niag_1cycle_store" 1
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "store"))
+  "niag_pipe")
+
+(define_insn_reservation "niag_8cycle_store" 8
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpstore"))
+  "niag_pipe*8")
+
+/* Things incorrectly modelled here:
+ *  FPADD{s,d}: 26 cycles
+ *  FPSUB{s,d}: 26 cycles
+ *  FABSD: 26 cycles
+ *  F{s,d}TO{s,d}: 26 cycles
+ *  F{s,d}TO{i,x}: 26 cycles
+ *  FSMULD: 29 cycles
+ */
+(define_insn_reservation "niag_fmov" 8
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpmove,fpcmove,fpcrmove"))
+  "niag_pipe*8")
+
+(define_insn_reservation "niag_fpcmp" 26
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpcmp"))
+  "niag_pipe*26")
+
+(define_insn_reservation "niag_fmult" 29
+ (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpmul"))
+  "niag_pipe*29")
+
+(define_insn_reservation "niag_fdivs" 54
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpdivs"))
+  "niag_pipe*54")
+
+(define_insn_reservation "niag_fdivd" 83
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fpdivd"))
+  "niag_pipe*83")
+
+/* Things incorrectly modelled here:
+ *  FPADD{16,32}: 10 cycles
+ *  FPSUB{16,32}: 10 cycles
+ *  FALIGNDATA: 10 cycles
+ */
+(define_insn_reservation "niag_vis" 8
+  (and (eq_attr "cpu" "niagara")
+    (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist"))
+  "niag_pipe*8")
diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md
new file mode 100644
index 000000000..298ebe013
--- /dev/null
+++ b/gcc/config/sparc/niagara2.md
@@ -0,0 +1,90 @@
+;; Scheduling description for Niagara-2.
+;;   Copyright (C) 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>. 
+
+;; Niagara-2 is a single-issue processor.
+
+(define_automaton "niagara2_0")
+
+(define_cpu_unit "niag2_pipe" "niagara2_0")
+
+(define_insn_reservation "niag2_25cycle" 25
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "flushw"))
+  "niag2_pipe*25")
+
+(define_insn_reservation "niag2_5cycle" 5
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "multi,flushw,iflush,trap"))
+  "niag2_pipe*5")
+
+(define_insn_reservation "niag2_6cycle" 4
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "savew"))
+  "niag2_pipe*4")
+
+/* Most basic operations are single-cycle. */
+(define_insn_reservation "niag2_ialu" 1
+ (and (eq_attr "cpu" "niagara2")
+   (eq_attr "type" "ialu,shift,compare,cmove"))
+ "niag2_pipe")
+
+(define_insn_reservation "niag2_imul" 5
+ (and (eq_attr "cpu" "niagara2")
+   (eq_attr "type" "imul"))
+ "niag2_pipe*5")
+
+(define_insn_reservation "niag2_idiv" 31
+ (and (eq_attr "cpu" "niagara2")
+   (eq_attr "type" "idiv"))
+ "niag2_pipe*31")
+
+(define_insn_reservation "niag2_branch" 5
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch"))
+  "niag2_pipe*5")
+
+(define_insn_reservation "niag2_3cycle_load" 3
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "load,fpload"))
+  "niag2_pipe*3")
+
+(define_insn_reservation "niag2_1cycle_store" 1
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "store,fpstore"))
+  "niag2_pipe")
+
+(define_insn_reservation "niag2_fp" 3
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fpmove,fpcmove,fpcrmove,fpcmp,fpmul"))
+  "niag2_pipe*3")
+
+(define_insn_reservation "niag2_fdivs" 19
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fpdivs"))
+  "niag2_pipe*19")
+
+(define_insn_reservation "niag2_fdivd" 33
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fpdivd"))
+  "niag2_pipe*33")
+
+(define_insn_reservation "niag2_vis" 6
+  (and (eq_attr "cpu" "niagara2")
+    (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist"))
+  "niag2_pipe*6")
diff --git a/gcc/config/sparc/openbsd1-64.h b/gcc/config/sparc/openbsd1-64.h
new file mode 100644
index 000000000..77ca79fe5
--- /dev/null
+++ b/gcc/config/sparc/openbsd1-64.h
@@ -0,0 +1,23 @@
+/* Configuration file for sparc64 OpenBSD target.
+   Copyright (C) 1999, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define OBSD_HAS_DECLARE_FUNCTION_NAME
+#define OBSD_HAS_DECLARE_FUNCTION_SIZE
+#define OBSD_HAS_DECLARE_OBJECT
+
diff --git a/gcc/config/sparc/openbsd64.h b/gcc/config/sparc/openbsd64.h
new file mode 100644
index 000000000..5d87f72e5
--- /dev/null
+++ b/gcc/config/sparc/openbsd64.h
@@ -0,0 +1,85 @@
+/* Configuration file for sparc64 OpenBSD target.
+   Copyright (C) 1999, 2005, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc64 OpenBSD ELF)")
+
+/* XXX - do we really want HARD_QUAD? */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+(MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \
+ + MASK_APP_REGS + MASK_FPU + MASK_STACK_BIAS + MASK_LONG_DOUBLE_128)
+
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDMID
+
+/* Target OS builtins.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__unix__");		\
+	builtin_define ("__OpenBSD__");		\
+	builtin_assert ("system=unix");		\
+	builtin_assert ("system=OpenBSD");	\
+	builtin_define ("__sparc64__");		\
+	builtin_define ("__sparcv9__");		\
+	builtin_define ("__sparc_v9__");	\
+	builtin_define ("__arch64__");		\
+    }						\
+  while (0)
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC ""
+
+/* Inherited from sp64-elf.  */
+#undef NO_IMPLICIT_EXTERN_C
+
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s %{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{mlittle-endian:-EL} \
+%(asm_cpu) %(asm_arch) \
+"
+
+/* Layout of source language data types.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "%{!shared:%{!nostdlib:%{!r:%{!e*:-e __start}}}} \
+   %{shared:-shared} %{R*} \
+   %{static:-Bstatic} \
+   %{!static:-Bdynamic} \
+   %{assert*} \
+   -dynamic-linker /usr/libexec/ld.so"
+
+/* As an elf system, we need crtbegin/crtend stuff.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+        %{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \
+        crtbegin%O%s} %{shared:crtbeginS%O%s}"
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
diff --git a/gcc/config/sparc/predicates.md b/gcc/config/sparc/predicates.md
new file mode 100644
index 000000000..4af960a88
--- /dev/null
+++ b/gcc/config/sparc/predicates.md
@@ -0,0 +1,475 @@
+;; Predicate definitions for SPARC.
+;; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Predicates for numerical constants.
+
+;; Return true if OP is the zero constant for MODE.
+(define_predicate "const_zero_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return true if OP is the one constant for MODE.
+(define_predicate "const_one_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONST1_RTX (mode)")))
+
+;; Return true if OP is the integer constant 4096.
+(define_predicate "const_4096_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 4096")))
+
+;; Return true if OP is a constant that is representable by a 13-bit
+;; signed field.  This is an acceptable immediate operand for most
+;; 3-address instructions.
+(define_predicate "small_int_operand"
+  (and (match_code "const_int")
+       (match_test "SPARC_SIMM13_P (INTVAL (op))")))
+
+;; Return true if OP is a constant operand for the umul instruction.  That
+;; instruction sign-extends immediate values just like all other SPARC
+;; instructions, but interprets the extended result as an unsigned number.
+(define_predicate "uns_small_int_operand"
+  (match_code "const_int,const_double")
+{
+#if HOST_BITS_PER_WIDE_INT == 32
+  return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
+	  || (GET_CODE (op) == CONST_DOUBLE
+	      && CONST_DOUBLE_HIGH (op) == 0
+	      && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
+#else
+  return (GET_CODE (op) == CONST_INT
+	  && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
+	      || (INTVAL (op) >= 0xFFFFF000
+                  && INTVAL (op) <= 0xFFFFFFFF)));
+#endif
+})
+
+;; Return true if OP is a constant that can be loaded by the sethi instruction.
+;; The first test avoids emitting sethi to load zero for example.
+(define_predicate "const_high_operand"
+  (and (match_code "const_int")
+       (and (not (match_operand 0 "small_int_operand"))
+            (match_test "SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))"))))
+
+;; Return true if OP is a constant whose 1's complement can be loaded by the
+;; sethi instruction.
+(define_predicate "const_compl_high_operand"
+  (and (match_code "const_int")
+       (and (not (match_operand 0 "small_int_operand"))
+            (match_test "SPARC_SETHI_P (~INTVAL (op) & GET_MODE_MASK (mode))"))))
+
+;; Return true if OP is a FP constant that needs to be loaded by the sethi/losum
+;; pair of instructions.
+(define_predicate "fp_const_high_losum_operand"
+  (match_operand 0 "const_double_operand")
+{
+  gcc_assert (mode == SFmode);
+  return fp_high_losum_p (op);
+})
+
+;; Return true if OP is a const_double or const_vector.
+(define_predicate "const_double_or_vector_operand"
+  (match_code "const_double,const_vector"))
+
+
+;; Predicates for symbolic constants.
+
+;; Return true if OP is either a symbol reference or a sum of a symbol
+;; reference and a constant.
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref,label_ref,const")
+{
+  enum machine_mode omode = GET_MODE (op);
+
+  if (omode != mode && omode != VOIDmode && mode != VOIDmode)
+    return false;
+
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+      return !SYMBOL_REF_TLS_MODEL (op);
+
+    case LABEL_REF:
+      return true;
+
+    case CONST:
+      op = XEXP (op, 0);
+      return (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+		&& !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
+	       || GET_CODE (XEXP (op, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (op, 1)) == CONST_INT);
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a symbolic operand for the TLS Global Dynamic model.
+(define_predicate "tgd_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_GLOBAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Dynamic model.
+(define_predicate "tld_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_DYNAMIC")))
+
+;; Return true if OP is a symbolic operand for the TLS Initial Exec model.
+(define_predicate "tie_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC")))
+
+;; Return true if OP is a symbolic operand for the TLS Local Exec model.
+(define_predicate "tle_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC")))
+
+;; Return true if the operand is an argument used in generating PIC references
+;; in either the medium/low or embedded medium/anywhere code models on V9.
+;; Check for (const (minus (symbol_ref:GOT)
+;;                         (const (minus (label) (pc)))))
+(define_predicate "medium_pic_operand"
+  (match_code "const")
+{
+  /* Check for (const (minus (symbol_ref:GOT)
+                             (const (minus (label) (pc))))).  */
+  op = XEXP (op, 0);
+  return GET_CODE (op) == MINUS
+         && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+         && GET_CODE (XEXP (op, 1)) == CONST
+         && GET_CODE (XEXP (XEXP (op, 1), 0)) == MINUS;
+})
+
+;; Return true if OP is a LABEL_REF of mode MODE.
+(define_predicate "label_ref_operand"
+  (and (match_code "label_ref")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return true if OP is a data segment reference.  This includes the readonly
+;; data segment or, in other words, anything but the text segment.
+;; This is needed in the embedded medium/anywhere code model on V9.  These
+;; values are accessed with EMBMEDANY_BASE_REG.  */
+(define_predicate "data_segment_operand"
+  (match_code "symbol_ref,plus,const")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+      return ! SYMBOL_REF_FUNCTION_P (op);
+    case PLUS :
+      /* Assume canonical format of symbol + constant.
+	 Fall through.  */
+    case CONST :
+      return data_segment_operand (XEXP (op, 0), VOIDmode);
+    default :
+      gcc_unreachable ();
+    }
+})
+
+;; Return true if OP is a text segment reference.
+;; This is needed in the embedded medium/anywhere code model on V9.
+(define_predicate "text_segment_operand"
+  (match_code "label_ref,symbol_ref,plus,const")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF :
+      return true;
+    case SYMBOL_REF :
+      return SYMBOL_REF_FUNCTION_P (op);
+    case PLUS :
+      /* Assume canonical format of symbol + constant.
+	 Fall through.  */
+    case CONST :
+      return text_segment_operand (XEXP (op, 0), VOIDmode);
+    default :
+      gcc_unreachable ();
+    }
+})
+
+
+;; Predicates for registers.
+
+;; Return true if OP is either the zero constant or a register.
+(define_predicate "register_or_zero_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_zero_operand")))
+
+;; Return true if OP is a register operand in a floating point register.
+(define_predicate "fp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op); /* Possibly a MEM */
+  return REG_P (op) && SPARC_FP_REG_P (REGNO (op));
+})
+
+;; Return true if OP is an integer register.
+(define_special_predicate "int_register_operand"
+  (ior (match_test "register_operand (op, SImode)")
+       (match_test "TARGET_ARCH64 && register_operand (op, DImode)")))
+
+;; Return true if OP is a floating point condition code register.
+(define_predicate "fcc_register_operand"
+  (match_code "reg")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return false;
+  if (mode == VOIDmode
+      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
+    return false;
+
+#if 0 /* ??? 1 when %fcc0-3 are pseudos first.  See gen_compare_reg().  */
+  if (reg_renumber == 0)
+    return REGNO (op) >= FIRST_PSEUDO_REGISTER;
+  return REGNO_OK_FOR_CCFP_P (REGNO (op));
+#else
+  return ((unsigned) REGNO (op) - SPARC_FIRST_V9_FCC_REG) < 4;
+#endif
+})
+
+;; Return true if OP is the floating point condition code register fcc0.
+(define_predicate "fcc0_register_operand"
+  (match_code "reg")
+{
+  if (mode != VOIDmode && mode != GET_MODE (op))
+    return false;
+  if (mode == VOIDmode
+      && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
+    return false;
+
+  return REGNO (op) == SPARC_FCC_REG;
+})
+
+;; Return true if OP is an integer or floating point condition code register.
+(define_predicate "icc_or_fcc_register_operand"
+  (match_code "reg")
+{
+  if (REGNO (op) == SPARC_ICC_REG)
+    {
+      if (mode != VOIDmode && mode != GET_MODE (op))
+	return false;
+      if (mode == VOIDmode
+	  && GET_MODE (op) != CCmode && GET_MODE (op) != CCXmode)
+	return false;
+
+      return true;
+    }
+
+  return fcc_register_operand (op, mode);
+})
+
+
+;; Predicates for arithmetic instructions.
+
+;; Return true if OP is a register, or is a constant that is representable
+;; by a 13-bit signed field.  This is an acceptable operand for most
+;; 3-address instructions.
+(define_predicate "arith_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "small_int_operand")))
+
+;; 64-bit: Same as above.
+;; 32-bit: Return true if OP is a register, or is a constant that is 
+;; representable by a couple of 13-bit signed fields.  This is an
+;; acceptable operand for most 3-address splitters.
+(define_predicate "arith_double_operand"
+  (match_code "const_int,const_double,reg,subreg")
+{
+  bool arith_simple_operand = arith_operand (op, mode);
+  HOST_WIDE_INT m1, m2;
+
+  if (TARGET_ARCH64 || arith_simple_operand)
+    return arith_simple_operand;
+
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (op) != CONST_DOUBLE)
+    return false;
+  m1 = CONST_DOUBLE_LOW (op);
+  m2 = CONST_DOUBLE_HIGH (op);
+#else
+  if (GET_CODE (op) != CONST_INT)
+    return false;
+  m1 = trunc_int_for_mode (INTVAL (op), SImode);
+  m2 = trunc_int_for_mode (INTVAL (op) >> 32, SImode);
+#endif
+
+  return SPARC_SIMM13_P (m1) && SPARC_SIMM13_P (m2);
+})
+
+;; Return true if OP is suitable as second operand for add/sub.
+(define_predicate "arith_add_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_4096_operand")))
+       
+;; Return true if OP is suitable as second double operand for add/sub.
+(define_predicate "arith_double_add_operand"
+  (match_code "const_int,const_double,reg,subreg")
+{
+  bool _arith_double_operand = arith_double_operand (op, mode);
+
+  if (_arith_double_operand)
+    return true;
+
+  return TARGET_ARCH64 && const_4096_operand (op, mode);
+})
+
+;; Return true if OP is a register, or is a CONST_INT that can fit in a
+;; signed 10-bit immediate field.  This is an acceptable SImode operand for
+;; the movrcc instructions.
+(define_predicate "arith10_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+            (match_test "SPARC_SIMM10_P (INTVAL (op))"))))
+
+;; Return true if OP is a register, or is a CONST_INT that can fit in a
+;; signed 11-bit immediate field.  This is an acceptable SImode operand for
+;; the movcc instructions.
+(define_predicate "arith11_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+            (match_test "SPARC_SIMM11_P (INTVAL (op))"))))
+
+;; Return true if OP is a register or a constant for the umul instruction.
+(define_predicate "uns_arith_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "uns_small_int_operand")))
+
+
+;; Predicates for miscellaneous instructions.
+
+;; Return true if OP is valid for the lhs of a comparison insn.
+(define_predicate "compare_operand"
+  (match_code "reg,subreg,zero_extract")
+{
+  if (GET_CODE (op) == ZERO_EXTRACT)
+    return (register_operand (XEXP (op, 0), mode)
+	    && small_int_operand (XEXP (op, 1), mode)
+	    && small_int_operand (XEXP (op, 2), mode)
+	    /* This matches cmp_zero_extract.  */
+	    && ((mode == SImode
+		 && INTVAL (XEXP (op, 2)) > 19)
+		/* This matches cmp_zero_extract_sp64.  */
+		|| (TARGET_ARCH64
+		    && mode == DImode
+		    && INTVAL (XEXP (op, 2)) > 51)));
+  else
+    return register_operand (op, mode);
+})
+
+;; Return true if OP is a valid operand for the source of a move insn.
+(define_predicate "input_operand"
+  (match_code "const_int,const_double,const_vector,reg,subreg,mem")
+{
+  enum mode_class mclass;
+
+  /* If both modes are non-void they must be the same.  */
+  if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
+    return false;
+
+  mclass = GET_MODE_CLASS (mode);
+
+  /* Allow any 1-instruction integer constant.  */
+  if (mclass == MODE_INT
+      && (small_int_operand (op, mode) || const_high_operand (op, mode)))
+    return true;
+
+  /* If 32-bit mode and this is a DImode constant, allow it
+     so that the splits can be generated.  */
+  if (TARGET_ARCH32
+      && mode == DImode
+      && (GET_CODE (op) == CONST_DOUBLE || GET_CODE (op) == CONST_INT))
+    return true;
+
+  if ((mclass == MODE_FLOAT && GET_CODE (op) == CONST_DOUBLE)
+      || (mclass == MODE_VECTOR_INT && GET_CODE (op) == CONST_VECTOR))
+    return true;
+
+  if (register_operand (op, mode))
+    return true;
+
+  /* If this is a SUBREG, look inside so that we handle paradoxical ones.  */
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  /* Check for valid MEM forms.  */
+  if (GET_CODE (op) == MEM)
+    return memory_address_p (mode, XEXP (op, 0));
+
+  return false;
+})
+
+;; Return true if OP is an address suitable for a call insn.
+;; Call insn on SPARC can take a PC-relative constant address
+;; or any regular memory address.
+(define_predicate "call_address_operand"
+  (ior (match_operand 0 "symbolic_operand")
+       (match_test "memory_address_p (Pmode, op)")))
+
+;; Return true if OP is an operand suitable for a call insn.
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "call_address_operand (XEXP (op, 0), mode)")))
+
+
+;; Predicates for operators.
+
+;; Return true if OP is a comparison operator.  This allows the use of
+;; MATCH_OPERATOR to recognize all the branch insns.
+(define_predicate "noov_compare_operator"
+  (match_code "ne,eq,ge,gt,le,lt,geu,gtu,leu,ltu")
+{
+  enum rtx_code code = GET_CODE (op);
+  if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode
+      || GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
+    /* These are the only branches which work with CC_NOOVmode.  */
+    return (code == EQ || code == NE || code == GE || code == LT);
+  return true;
+})
+
+;; Return true if OP is a 64-bit comparison operator.  This allows the use of
+;; MATCH_OPERATOR to recognize all the branch insns.
+(define_predicate "noov_compare64_operator"
+  (and (match_code "ne,eq,ge,gt,le,lt,geu,gtu,leu,ltu")
+       (match_test "TARGET_V9"))
+{
+  enum rtx_code code = GET_CODE (op);
+  if (GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
+    /* These are the only branches which work with CCX_NOOVmode.  */
+    return (code == EQ || code == NE || code == GE || code == LT);
+  return (GET_MODE (XEXP (op, 0)) == CCXmode);
+})
+
+;; Return true if OP is a comparison operator suitable for use in V9
+;; conditional move or branch on register contents instructions.
+(define_predicate "v9_register_compare_operator"
+  (match_code "eq,ne,ge,lt,le,gt"))
+
+;; Return true if OP is an operator which can set the condition codes
+;; explicitly.  We do not include PLUS and MINUS because these
+;; require CC_NOOVmode, which we handle explicitly.
+(define_predicate "cc_arith_operator"
+  (match_code "and,ior,xor"))
+
+;; Return true if OP is an operator which can bitwise complement its
+;; second operand and set the condition codes explicitly.
+;; XOR is not here because combine canonicalizes (xor (not ...) ...)
+;; and (xor ... (not ...)) to (not (xor ...)).  */
+(define_predicate "cc_arith_not_operator"
+  (match_code "and,ior"))
diff --git a/gcc/config/sparc/rtemself.h b/gcc/config/sparc/rtemself.h
new file mode 100644
index 000000000..f0b8202ad
--- /dev/null
+++ b/gcc/config/sparc/rtemself.h
@@ -0,0 +1,33 @@
+/* Definitions for rtems targeting a SPARC using ELF.
+   Copyright (C) 1996, 1997, 2000, 2002, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target OS builtins.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define ("__rtems__");		\
+	builtin_define ("__USE_INIT_FINI__");	\
+	builtin_assert ("system=rtems");	\
+    }						\
+  while (0)
+
+/* Use the default */
+#undef LINK_GCC_C_SEQUENCE_SPEC
diff --git a/gcc/config/sparc/sol2-64.h b/gcc/config/sparc/sol2-64.h
new file mode 100644
index 000000000..41e228114
--- /dev/null
+++ b/gcc/config/sparc/sol2-64.h
@@ -0,0 +1,22 @@
+/* Definitions of target machine for GCC, for bi-arch SPARC
+   running Solaris 2, defaulting to 64-bit code generation.
+
+   Copyright (C) 1999, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_64BIT_DEFAULT 1
diff --git a/gcc/config/sparc/sol2-bi.h b/gcc/config/sparc/sol2-bi.h
new file mode 100644
index 000000000..356e8256f
--- /dev/null
+++ b/gcc/config/sparc/sol2-bi.h
@@ -0,0 +1,271 @@
+/* Definitions of target machine for GCC, for bi-arch SPARC
+   running Solaris 2 using the system assembler and linker.
+   Copyright (C) 2002, 2003, 2004, 2006, 2007, 2009, 2010, 2011, 2012
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* The default code model used to be CM_MEDANY on Solaris
+   but even Sun eventually found it to be quite wasteful
+   and changed it to CM_MEDMID in the Studio 9 compiler.  */
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_MEDMID
+
+#define AS_SPARC64_FLAG	"-xarch=v9"
+
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC	""
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC	AS_SPARC64_FLAG
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "a"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+#undef CPP_CPU64_DEFAULT_SPEC
+#define CPP_CPU64_DEFAULT_SPEC ""
+#undef ASM_CPU32_DEFAULT_SPEC
+#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
+#undef ASM_CPU64_DEFAULT_SPEC
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+#endif
+
+#if DEFAULT_ARCH32_P
+#define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}"
+#else
+#define DEF_ARCH32_SPEC(__str) "%{m32:" __str "}"
+#define DEF_ARCH64_SPEC(__str) "%{!m32:" __str "}"
+#endif
+
+#undef CPP_CPU_SPEC
+#define CPP_CPU_SPEC "\
+%{mcpu=sparclet|mcpu=tsc701:-D__sparclet__} \
+%{mcpu=sparclite|mcpu-f930|mcpu=f934:-D__sparclite__} \
+%{mcpu=v8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=supersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3|mcpu=niagara|mcpu=niagara2:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+%{!mcpu*:%(cpp_cpu_default)} \
+"
+
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC "\
+%{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "} \
+%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "a") "} \
+%{mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
+%{mcpu=niagara:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
+%{mcpu=niagara2:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
+%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "}}}}}} \
+%{!mcpu*:%(asm_cpu_default)} \
+"
+
+#undef CPP_CPU_DEFAULT_SPEC
+#define CPP_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" CPP_CPU64_DEFAULT_SPEC "} \
+%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" CPP_CPU32_DEFAULT_SPEC "} \
+%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \
+")
+
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
+/* wchar_t is called differently in <wchar.h> for 32 and 64-bit
+   compilations.  This is called for by SCD 2.4.1, p. 6-83, Figure 6-65
+   (32-bit) and p. 6P-10, Figure 6.38 (64-bit).  */
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_ARCH64 ? "int" : "long int")
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Same for wint_t.  See SCD 2.4.1, p. 6-83, Figure 6-66 (32-bit).  There's
+   no corresponding 64-bit definition, but this is what Solaris 8
+   <iso/wchar_iso.h> uses.  */
+
+#undef WINT_TYPE
+#define WINT_TYPE (TARGET_ARCH64 ? "int" : "long int")
+
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 32
+
+#undef CPP_ARCH32_SPEC
+#define CPP_ARCH32_SPEC ""
+#undef CPP_ARCH64_SPEC
+#define CPP_ARCH64_SPEC "-D__arch64__ -D__sparcv9"
+
+#undef CPP_ARCH_SPEC
+#define CPP_ARCH_SPEC "\
+%{m32:%(cpp_arch32)} \
+%{m64:%(cpp_arch64)} \
+%{!m32:%{!m64:%(cpp_arch_default)}} \
+"
+
+#undef ASM_ARCH_SPEC
+#define ASM_ARCH_SPEC ""
+
+#undef ASM_ARCH32_SPEC
+#define ASM_ARCH32_SPEC ""
+
+#undef ASM_ARCH64_SPEC
+#define ASM_ARCH64_SPEC ""
+
+#undef ASM_ARCH_DEFAULT_SPEC
+#define ASM_ARCH_DEFAULT_SPEC ""
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "startfile_arch",	 STARTFILE_ARCH_SPEC },		  \
+  { "link_arch32",       LINK_ARCH32_SPEC },              \
+  { "link_arch64",       LINK_ARCH64_SPEC },              \
+  { "link_arch_default", LINK_ARCH_DEFAULT_SPEC },	  \
+  { "link_arch",	 LINK_ARCH_SPEC },
+    
+/*
+ * This should be the same as in sol2.h, except with "/sparcv9"
+ * appended to the paths and /usr/ccs/lib is no longer necessary
+ */
+#define LINK_ARCH64_SPEC_BASE \
+  "%{mcmodel=medlow:-M /usr/lib/ld/sparcv9/map.below4G} \
+   %{G:-G} \
+   %{YP,*} \
+   %{R*} \
+   %{compat-bsd: \
+     %{!YP,*:%{p|pg:-Y P,%R/usr/ucblib/sparcv9:%R/usr/lib/libp/sparcv9:%R/usr/lib/sparcv9:%R/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,%R/usr/ucblib/sparcv9:%R/usr/lib/sparcv9:%R/lib/sparcv9}}} \
+     -R %R/usr/ucblib/sparcv9} \
+   %{!compat-bsd: \
+     %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/sparcv9:%R/usr/lib/sparcv9:%R/lib/sparcv9} \
+       %{!p:%{!pg:-Y P,%R/usr/lib/sparcv9:%R/lib/sparcv9}}}}"
+
+#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE
+
+#undef LINK_ARCH_SPEC
+#if DISABLE_MULTILIB
+#if DEFAULT_ARCH32_P
+#define LINK_ARCH_SPEC "\
+%{m32:%(link_arch32)} \
+%{m64:%edoes not support multilib} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#else
+#define LINK_ARCH_SPEC "\
+%{m32:%edoes not support multilib} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#endif
+#else
+#define LINK_ARCH_SPEC "\
+%{m32:%(link_arch32)} \
+%{m64:%(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#endif
+
+#define LINK_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC)
+
+#undef	CC1_SPEC
+#if DEFAULT_ARCH32_P
+#define CC1_SPEC "\
+%{m64:%{m32:%emay not use both -m32 and -m64}} \
+%{m64:-mptr64 -mstack-bias -mno-v8plus \
+  %{!mcpu*:-%{!mv8plus:mcpu=v9}}} \
+"
+#else
+#define CC1_SPEC "\
+%{m32:%{m64:%emay not use both -m32 and -m64}} \
+%{m32:-mptr32 -mno-stack-bias \
+  %{!mcpu*:%{!mv8plus:-mcpu=v9}}} \
+%{mv8plus:-m32 -mptr32 -mno-stack-bias \
+  %{!mcpu*:-mcpu=v9}} \
+"
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.
+   --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu
+     are specified.
+   In the SPARC_BI_ARCH compiler we cannot pass %{!mcpu=*:-mcpu=%(VALUE)}
+   here, otherwise say -mcpu=v7 would be passed even when -m64.
+   CC1_SPEC above takes care of this instead.  */
+#undef OPTION_DEFAULT_SPECS
+#if DEFAULT_ARCH32_P
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m64:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#else
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!m32:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+#endif
+
+#if DEFAULT_ARCH32_P
+#define MULTILIB_DEFAULTS { "m32" }
+#else
+#define MULTILIB_DEFAULTS { "m64" }
+#endif
diff --git a/gcc/config/sparc/sol2-c1.asm b/gcc/config/sparc/sol2-c1.asm
new file mode 100644
index 000000000..63aa748e8
--- /dev/null
+++ b/gcc/config/sparc/sol2-c1.asm
@@ -0,0 +1,103 @@
+! crt1.s for sparc & sparcv9 (SunOS 5)
+
+!   Copyright (C) 1992, 2009 Free Software Foundation, Inc.
+!   Written By David Vinayak Henkel-Wallace, June 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+! This file takes control of the process from the kernel, as specified
+! in section 3 of the SVr4 ABI.
+! This file is the first thing linked into any executable.
+
+#ifdef __sparcv9
+#define	CPTRSIZE	8
+#define	CPTRSHIFT	3
+#define	STACK_BIAS	2047
+#define	ldn		ldx
+#define	stn		stx
+#define setn(s, scratch, dst)	setx s, scratch, dst
+#else
+#define	CPTRSIZE	4
+#define	CPTRSHIFT	2
+#define	STACK_BIAS	0
+#define	ldn		ld
+#define	stn		st
+#define setn(s, scratch, dst)	set s, dst
+#endif
+
+	.section	".text"
+	.proc	022
+	.global	_start
+
+_start:
+	mov	0, %fp		! Mark bottom frame pointer
+	ldn	[%sp + (16 * CPTRSIZE) + STACK_BIAS], %l0	! argc
+	add	%sp, (17 * CPTRSIZE) + STACK_BIAS, %l1		! argv
+
+	! Leave some room for a call.  Sun leaves 32 octets (to sit on
+	! a cache line?) so we do too.
+#ifdef __sparcv9
+	sub	%sp, 48, %sp
+#else
+	sub	%sp, 32, %sp
+#endif
+
+	! %g1 may contain a function to be registered w/atexit
+	orcc	%g0, %g1, %g0
+#ifdef __sparcv9
+	be	%xcc, .nope
+#else
+	be	.nope
+#endif
+	mov	%g1, %o0
+	call	atexit
+	nop   
+.nope:
+	! Now make sure constructors and destructors are handled.
+	setn(_fini, %o1, %o0)
+	call	atexit, 1
+	nop
+	call	_init, 0
+	nop
+
+	! We ignore the auxiliary vector; there is no defined way to
+	! access those data anyway.  Instead, go straight to main:
+	mov	%l0, %o0	! argc
+	mov	%l1, %o1	! argv
+#ifdef GCRT1
+	setn(___Argv, %o4, %o3)
+	stn	%o1, [%o3]      ! *___Argv
+#endif
+	! Skip argc words past argv, to env:
+	sll	%l0, CPTRSHIFT, %o2
+	add	%o2, CPTRSIZE, %o2
+	add	%l1, %o2, %o2	! env
+	setn(_environ, %o4, %o3)
+	stn	%o2, [%o3]	! *_environ
+	call	main, 4
+	nop   
+	call	exit, 0
+	nop   
+	call	_exit, 0
+	nop   
+	! We should never get here.
+
+	.type	_start,#function
+	.size	_start,.-_start
diff --git a/gcc/config/sparc/sol2-ci.asm b/gcc/config/sparc/sol2-ci.asm
new file mode 100644
index 000000000..8825f7958
--- /dev/null
+++ b/gcc/config/sparc/sol2-ci.asm
@@ -0,0 +1,55 @@
+! crti.s for solaris 2.0.
+
+!   Copyright (C) 1992, 2008, 2009 Free Software Foundation, Inc.
+!   Written By David Vinayak Henkel-Wallace, June 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+! This file just make a stack frame for the contents of the .fini and
+! .init sections.  Users may put any desired instructions in those
+! sections.
+
+! This file is linked in before the Values-Xx.o files and also before
+! crtbegin, with which perhaps it should be merged.
+ 
+	.section	".init"
+	.proc	022
+	.global	_init
+	.type	_init,#function
+	.align	4
+_init:
+#ifdef __sparcv9
+	save	%sp, -176, %sp
+#else
+	save	%sp, -96, %sp
+#endif
+
+
+	.section	".fini"
+	.proc	022
+	.global	_fini
+	.type	_fini,#function
+	.align	4
+_fini:
+#ifdef __sparcv9
+	save	%sp, -176, %sp
+#else
+	save	%sp, -96, %sp
+#endif
diff --git a/gcc/config/sparc/sol2-cn.asm b/gcc/config/sparc/sol2-cn.asm
new file mode 100644
index 000000000..b92f3cf08
--- /dev/null
+++ b/gcc/config/sparc/sol2-cn.asm
@@ -0,0 +1,41 @@
+! crtn.s for solaris 2.0.
+
+!   Copyright (C) 1992, 2008, 2009 Free Software Foundation, Inc.
+!   Written By David Vinayak Henkel-Wallace, June 1992
+! 
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+! 
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! General Public License for more details.
+! 
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+! <http://www.gnu.org/licenses/>.
+
+! This file just makes sure that the .fini and .init sections do in
+! fact return.  Users may put any desired instructions in those sections.
+! This file is the last thing linked into any executable.
+
+	.section	".init"
+	.align		4
+
+	ret
+	restore
+
+	.section	".fini"
+	.align		4
+
+	ret
+	restore
+
+! Th-th-th-that is all folks!
diff --git a/gcc/config/sparc/sol2-gas-bi.h b/gcc/config/sparc/sol2-gas-bi.h
new file mode 100644
index 000000000..001f978b8
--- /dev/null
+++ b/gcc/config/sparc/sol2-gas-bi.h
@@ -0,0 +1,23 @@
+/* Definitions of target machine for GCC, for bi-arch SPARC
+   running Solaris 2 using the GNU assembler.
+
+   Copyright (C) 2002, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef  AS_SPARC64_FLAG
+#define AS_SPARC64_FLAG	"-TSO -64 -Av9"
diff --git a/gcc/config/sparc/sol2-gas.h b/gcc/config/sparc/sol2-gas.h
new file mode 100644
index 000000000..d83e7b917
--- /dev/null
+++ b/gcc/config/sparc/sol2-gas.h
@@ -0,0 +1,47 @@
+/* Definitions of target machine for GCC, for SPARC running Solaris 2
+   using the GNU assembler.
+   Copyright (C) 2004, 2005, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Undefine this as the filler pattern doesn't work with GNU as.  */
+#undef ASM_OUTPUT_ALIGN_WITH_NOP
+
+/* Undefine this so that BNSYM/ENSYM pairs are emitted by STABS+.  */
+#undef NO_DBX_BNSYM_ENSYM
+
+/* Use GNU extensions to TLS support.  */
+#ifdef HAVE_AS_TLS
+#undef TARGET_SUN_TLS
+#undef TARGET_GNU_TLS
+#define TARGET_SUN_TLS 0
+#define TARGET_GNU_TLS 1
+#endif
+
+/* Use default ELF section syntax.  */
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section
+
+/* And standard pushsection syntax.  While GNU as supports the non-standard
+   variant too, we prefer the former.  */
+#undef PUSHSECTION_FORMAT
+#define PUSHSECTION_FORMAT "\t.pushsection\t%s\n"
diff --git a/gcc/config/sparc/sol2-gld-bi.h b/gcc/config/sparc/sol2-gld-bi.h
new file mode 100644
index 000000000..3be20b2cd
--- /dev/null
+++ b/gcc/config/sparc/sol2-gld-bi.h
@@ -0,0 +1,67 @@
+/* Definitions of target machine for GCC, for bi-arch SPARC
+   running Solaris 2 using the GNU linker.
+
+Copyright (C) 2002, 2003, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef LINK_ARCH32_SPEC
+#define LINK_ARCH32_SPEC \
+  LINK_ARCH32_SPEC_BASE "%{!static: -rpath-link %R/usr/lib}"
+
+#undef LINK_ARCH64_SPEC
+#define LINK_ARCH64_SPEC \
+  LINK_ARCH64_SPEC_BASE "%{!static: -rpath-link %R/usr/lib/sparcv9}"
+
+/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
+   follow the Solaris 2 ABI.  Prefer them if present.  */
+#ifdef HAVE_LD_SOL2_EMULATION
+#define SPARC32_EMULATION "elf32_sparc_sol2"
+#define SPARC64_EMULATION "elf64_sparc_sol2"
+#else
+#define SPARC32_EMULATION "elf32_sparc"
+#define SPARC64_EMULATION "elf64_sparc"
+#endif
+
+#undef LINK_ARCH_SPEC
+#if DISABLE_MULTILIB
+#if DEFAULT_ARCH32_P
+#define LINK_ARCH_SPEC "\
+%{m32:-m " SPARC32_EMULATION " %(link_arch32)} \
+%{m64:%edoes not support multilib} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#else
+#define LINK_ARCH_SPEC "\
+%{m32:%edoes not support multilib} \
+%{m64:-m " SPARC64_EMULATION " %(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#endif
+#else
+#define LINK_ARCH_SPEC "\
+%{m32:-m " SPARC32_EMULATION " %(link_arch32)} \
+%{m64:-m " SPARC64_EMULATION " %(link_arch64)} \
+%{!m32:%{!m64:%(link_arch_default)}} \
+"
+#endif
+
diff --git a/gcc/config/sparc/sol2-unwind.h b/gcc/config/sparc/sol2-unwind.h
new file mode 100644
index 000000000..d6c4f6c1f
--- /dev/null
+++ b/gcc/config/sparc/sol2-unwind.h
@@ -0,0 +1,480 @@
+/* DWARF2 EH unwinding support for SPARC Solaris.
+   Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <ucontext.h>
+#include <sys/frame.h>
+#include <sys/stack.h>
+
+#if defined(__arch64__)
+
+#define IS_SIGHANDLER sparc64_is_sighandler
+
+static int
+sparc64_is_sighandler (unsigned int *pc, void *cfa, int *nframes)
+{
+  if (/* Solaris 8 - single-threaded
+	----------------------------
+	<sigacthandler+24>:  add  %g5, %o7, %o2
+	<sigacthandler+28>:  ldx  [ %o2 + 0xfa0 ], %g5
+	<sigacthandler+32>:  sra  %i0, 0, %o0
+	<sigacthandler+36>:  sllx  %o0, 3, %g4
+	<sigacthandler+40>:  ldx  [ %g4 + %g5 ], %l0
+	<sigacthandler+44>:  call  %l0
+	<sigacthandler+48>:  mov  %i2, %o2
+	<sigacthandler+52>:  cmp  %i3, 8	<--- PC  */
+      (   pc[-7] == 0x9401400f
+       && pc[-6] == 0xca5aafa0
+       && pc[-5] == 0x913e2000
+       && pc[-4] == 0x892a3003
+       && pc[-3] == 0xe0590005
+       && pc[-2] == 0x9fc40000
+       && pc[-1] == 0x9410001a
+       && pc[ 0] == 0x80a6e008)
+
+      || /* Solaris 9 - single-threaded
+	   ----------------------------
+	   The pattern changes slightly in different versions of the
+	   operating system, so we skip the comparison against pc[-6] for
+	   Solaris 9.
+
+	   <sigacthandler+24>:  sra  %i0, 0, %l1
+
+	   Solaris 9 5/02:
+	   <sigacthandler+28>:  ldx  [ %o2 + 0xf68 ], %g5
+	   Solaris 9 9/05:
+	   <sigacthandler+28>:  ldx  [ %o2 + 0xe50 ], %g5
+
+	   <sigacthandler+32>:  sllx  %l1, 3, %g4
+	   <sigacthandler+36>:  mov  %l1, %o0
+	   <sigacthandler+40>:  ldx  [ %g4 + %g5 ], %l0
+	   <sigacthandler+44>:  call  %l0
+	   <sigacthandler+48>:  mov  %i2, %o2
+	   <sigacthandler+52>:  cmp  %l1, 8	<--- PC  */
+      (   pc[-7] == 0xa33e2000
+       /* skip pc[-6] */
+       && pc[-5] == 0x892c7003
+       && pc[-4] == 0x90100011
+       && pc[-3] == 0xe0590005
+       && pc[-2] == 0x9fc40000
+       && pc[-1] == 0x9410001a
+       && pc[ 0] == 0x80a46008))
+    {
+      /* We need to move up one frame:
+
+		<signal handler>	<-- context->cfa
+		sigacthandler
+		<kernel>
+      */
+      *nframes = 1;
+      return 1;
+    }
+
+  if (/* Solaris 8+ - multi-threaded
+	----------------------------
+	<__sighndlr>:        save  %sp, -176, %sp
+	<__sighndlr+4>:      mov  %i0, %o0
+	<__sighndlr+8>:      mov  %i1, %o1
+	<__sighndlr+12>:     call  %i3
+	<__sighndlr+16>:     mov  %i2, %o2
+	<__sighndlr+20>:     ret 		<--- PC
+	<__sighndlr+24>:     restore  */
+         pc[-5] == 0x9de3bf50
+      && pc[-4] == 0x90100018
+      && pc[-3] == 0x92100019
+      && pc[-2] == 0x9fc6c000
+      && pc[-1] == 0x9410001a
+      && pc[ 0] == 0x81c7e008
+      && pc[ 1] == 0x81e80000)
+    {
+      /* We have observed different calling frames among different
+	 versions of the operating system, so that we need to
+	 discriminate using the upper frame.  We look for the return
+	 address of the caller frame (there is an offset of 15 double
+	 words between the frame address and the place where this return
+	 address is stored) in order to do some more pattern matching.  */
+      unsigned int cuh_pattern
+	= *(unsigned int *)(*(unsigned long *)(cfa + 15*8) - 4);
+
+      if (cuh_pattern == 0x92100019)
+	/* This matches the call_user_handler pattern for Solaris 11.
+	   This is the same setup as for Solaris 9, see below.  */
+	*nframes = 3;
+
+      else if (cuh_pattern == 0xd25fa7ef)
+	{
+	  /* This matches the call_user_handler pattern for Solaris 10.
+	     There are 2 cases so we look for the return address of the
+	     caller's caller frame in order to do more pattern matching.  */
+	  unsigned long sah_address = *(unsigned long *)(cfa + 176 + 15*8);
+
+          if (sah_address && *(unsigned int *)(sah_address - 4) == 0x92100019)
+	    /* This is the same setup as for Solaris 9, see below.  */
+	    *nframes = 3;
+	  else
+	    /* The sigacthandler frame isn't present in the chain.
+	       We need to move up two frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		call_user_handler frame
+		<kernel>
+	    */
+	    *nframes = 2;
+	}
+
+      else if (cuh_pattern == 0x9410001a || cuh_pattern == 0x94100013)
+	/* This matches the call_user_handler pattern for Solaris 9 and
+	   for Solaris 8 running inside Solaris Containers respectively
+	   We need to move up three frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		call_user_handler
+		sigacthandler
+		<kernel>
+	*/
+	*nframes = 3;
+
+      else /* cuh_pattern == 0xe0272010 */
+	/* This is the default Solaris 8 case.
+	   We need to move up two frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		sigacthandler
+		<kernel>
+	*/
+	*nframes = 2;
+
+      return 1;
+    }
+
+  return 0;
+}
+
+#define MD_FALLBACK_FRAME_STATE_FOR sparc64_fallback_frame_state
+
+#define MD_FROB_UPDATE_CONTEXT sparc64_frob_update_context
+
+static void
+sparc64_frob_update_context (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  /* The column of %sp contains the old CFA, not the old value of %sp.
+     The CFA offset already comprises the stack bias so, when %sp is the
+     CFA register, we must avoid counting the stack bias twice.  Do not
+     do that for signal frames as the offset is artificial for them.  */
+  if (fs->regs.cfa_reg == __builtin_dwarf_sp_column ()
+      && fs->regs.cfa_how == CFA_REG_OFFSET
+      && fs->regs.cfa_offset != 0
+      && !fs->signal_frame)
+    context->cfa -= STACK_BIAS;
+}
+
+#else
+
+#define IS_SIGHANDLER sparc_is_sighandler
+
+static int
+sparc_is_sighandler (unsigned int *pc, void *cfa, int *nframes)
+{
+  if (/* Solaris 8, 9 - single-threaded
+        -------------------------------
+	The pattern changes slightly in different versions of the operating
+	system, so we skip the comparison against pc[-6].
+
+	<sigacthandler+16>:  add  %o1, %o7, %o3
+	<sigacthandler+20>:  mov  %i1, %o1
+
+	<sigacthandler+24>:  ld  [ %o3 + <offset> ], %o2
+
+	<sigacthandler+28>:  sll  %i0, 2, %o0
+	<sigacthandler+32>:  ld  [ %o0 + %o2 ], %l0
+	<sigacthandler+36>:  mov  %i0, %o0
+	<sigacthandler+40>:  call  %l0
+	<sigacthandler+44>:  mov  %i2, %o2
+	<sigacthandler+48>:  cmp  %i0, 8	<--- PC  */
+         pc[-8] == 0x9602400f
+      && pc[-7] == 0x92100019
+      /* skip pc[-6] */
+      && pc[-5] == 0x912e2002
+      && pc[-4] == 0xe002000a
+      && pc[-3] == 0x90100018
+      && pc[-2] == 0x9fc40000
+      && pc[-1] == 0x9410001a
+      && pc[ 0] == 0x80a62008)
+    {
+      /* We need to move up one frame:
+
+		<signal handler>	<-- context->cfa
+		sigacthandler
+		<kernel>
+      */
+      *nframes = 1;
+      return 1;
+    }
+
+  if (/* Solaris 8 - multi-threaded
+	---------------------------
+	<__libthread_segvhdlr+212>:  clr  %o2
+	<__libthread_segvhdlr+216>:  ld  [ %fp + -28 ], %l0
+	<__libthread_segvhdlr+220>:  mov  %i4, %o0
+	<__libthread_segvhdlr+224>:  mov  %i1, %o1
+	<__libthread_segvhdlr+228>:  call  %l0
+	<__libthread_segvhdlr+232>:  mov  %i2, %o2
+	<__libthread_segvhdlr+236>:  ret		<--- PC
+	<__libthread_segvhdlr+240>:  restore
+	<__libthread_segvhdlr+244>:  cmp  %o1, 0  */
+         pc[-6] == 0x94102000
+      && pc[-5] == 0xe007bfe4
+      && pc[-4] == 0x9010001c
+      && pc[-3] == 0x92100019
+      && pc[-2] == 0x9fc40000
+      && pc[-1] == 0x9410001a
+      && pc[ 0] == 0x81c7e008
+      && pc[ 1] == 0x81e80000
+      && pc[ 2] == 0x80a26000)
+    {
+      /* We need to move up one frame:
+
+		<signal handler>	<-- context->cfa
+		__libthread_segvhdlr
+		<kernel>
+      */
+      *nframes = 1;
+      return 1;
+    }
+
+  if(/* Solaris 8+ - multi-threaded
+       ----------------------------
+       <__sighndlr>:	save  %sp, -96, %sp
+       <__sighndlr+4>:	mov  %i0, %o0
+       <__sighndlr+8>:	mov  %i1, %o1
+       <__sighndlr+12>:	call  %i3
+       <__sighndlr+16>:	mov  %i2, %o2
+       <__sighndlr+20>:	ret 		<--- PC
+       <__sighndlr+24>:	restore  */
+        pc[-5] == 0x9de3bfa0
+     && pc[-4] == 0x90100018
+     && pc[-3] == 0x92100019
+     && pc[-2] == 0x9fc6c000
+     && pc[-1] == 0x9410001a
+     && pc[ 0] == 0x81c7e008
+     && pc[ 1] == 0x81e80000)
+    {
+      /* We have observed different calling frames among different
+	 versions of the operating system, so that we need to
+	 discriminate using the upper frame.  We look for the return
+	 address of the caller frame (there is an offset of 15 words
+	 between the frame address and the place where this return
+	 address is stored) in order to do some more pattern matching.  */
+      unsigned int cuh_pattern
+	= *(unsigned int *)(*(unsigned int *)(cfa + 15*4) - 4);
+
+      if (cuh_pattern == 0x92100019)
+	/* This matches the call_user_handler pattern for Solaris 11.
+	   This is the same setup as for Solaris 9, see below.  */
+	*nframes = 3;
+
+      else if (cuh_pattern == 0xd407a04c)
+	{
+	  /* This matches the call_user_handler pattern for Solaris 10.
+	     There are 2 cases so we look for the return address of the
+	     caller's caller frame in order to do more pattern matching.  */
+	  unsigned int sah_address = *(unsigned int *)(cfa + 96 + 15*4);
+
+          if (sah_address && *(unsigned int *)(sah_address - 4) == 0x92100019)
+	    /* This is the same setup as for Solaris 9, see below.  */
+	    *nframes = 3;
+	  else
+	    /* The sigacthandler frame isn't present in the chain.
+	       We need to move up two frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		call_user_handler frame
+		<kernel>
+	    */
+	    *nframes = 2;
+	}
+
+      else if (cuh_pattern == 0x9410001a || cuh_pattern == 0x9410001b)
+	/* This matches the call_user_handler pattern for Solaris 9 and
+	   for Solaris 8 running inside Solaris Containers respectively.
+	   We need to move up three frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		call_user_handler
+		sigacthandler
+		<kernel>
+	*/
+	*nframes = 3;
+
+      else /* cuh_pattern == 0x90100018 */
+	/* This is the default Solaris 8 case.
+	   We need to move up two frames:
+
+		<signal handler>	<-- context->cfa
+		__sighndlr
+		sigacthandler
+		<kernel>
+	*/
+	*nframes = 2;
+
+      return 1;
+    }
+
+  return 0;
+}
+
+#define MD_FALLBACK_FRAME_STATE_FOR sparc_fallback_frame_state
+
+#endif
+
+static _Unwind_Reason_Code
+MD_FALLBACK_FRAME_STATE_FOR (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  void *pc = context->ra;
+  struct frame *fp = (struct frame *) context->cfa;
+  int nframes;
+  void *this_cfa = context->cfa;
+  long new_cfa;
+  void *ra_location, *shifted_ra_location;
+  mcontext_t *mctx;
+  int i;
+
+  /* Deal with frame-less function from which a signal was raised.  */
+  if (_Unwind_IsSignalFrame (context))
+    {
+      /* The CFA is by definition unmodified in this case.  */
+      fs->regs.cfa_how = CFA_REG_OFFSET;
+      fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+      fs->regs.cfa_offset = 0;
+
+      /* This is the canonical RA column.  */
+      fs->retaddr_column = 15;
+
+      return _URC_NO_REASON;
+    }
+
+  if (IS_SIGHANDLER (pc, this_cfa, &nframes))
+    {
+      struct handler_args {
+	struct frame frwin;
+	ucontext_t ucontext;
+      } *handler_args;
+      ucontext_t *ucp;
+
+      /* context->cfa points into the frame after the saved frame pointer and
+         saved pc (struct frame).
+
+         The ucontext_t structure is in the kernel frame after a struct
+         frame.  Since the frame sizes vary even within OS releases, we
+         need to walk the stack to get there.  */
+
+      for (i = 0; i < nframes; i++)
+	fp = (struct frame *) ((char *)fp->fr_savfp + STACK_BIAS);
+
+      handler_args = (struct handler_args *) fp;
+      ucp = &handler_args->ucontext;
+      mctx = &ucp->uc_mcontext;
+    }
+
+  /* Exit if the pattern at the return address does not match the
+     previous three patterns.  */
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = mctx->gregs[REG_SP];
+  /* The frame address is %sp + STACK_BIAS in 64-bit mode.  */
+  new_cfa += STACK_BIAS;
+
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = __builtin_dwarf_sp_column ();
+  fs->regs.cfa_offset = new_cfa - (long) this_cfa;
+
+  /* Restore global and out registers (in this order) from the
+     ucontext_t structure, uc_mcontext.gregs field.  */
+  for (i = 1; i < 16; i++)
+    {
+      /* We never restore %sp as everything is purely CFA-based.  */
+      if ((unsigned int) i == __builtin_dwarf_sp_column ())
+	continue;
+
+      /* First the global registers and then the out registers.  */
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset = (long)&mctx->gregs[REG_Y + i] - new_cfa;
+    }
+
+  /* Just above the stack pointer there are 16 extended words in which
+     the register window (in and local registers) was saved.  */
+  for (i = 0; i < 16; i++)
+    {
+      fs->regs.reg[i + 16].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + 16].loc.offset = i*sizeof(long);
+    }
+
+  /* Check whether we need to restore FPU registers.  */
+  if (mctx->fpregs.fpu_qcnt)
+    {
+      for (i = 0; i < 32; i++)
+	{
+	  fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i + 32].loc.offset
+	    = (long)&mctx->fpregs.fpu_fr.fpu_regs[i] - new_cfa;
+	}
+
+#ifdef __arch64__
+      /* For 64-bit, fpu_fr.fpu_dregs contains 32 instead of 16 doubles.  */
+      for (i = 32; i < 64; i++)
+	{
+	  if (i > 32 && (i & 1))
+	    continue;
+
+	  fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+	  fs->regs.reg[i + 32].loc.offset
+	    = (long)&mctx->fpregs.fpu_fr.fpu_dregs[i/2] - new_cfa;
+	}
+#endif
+    }
+
+  /* State the rules to find the kernel's code "return address", which is
+     the address of the active instruction when the signal was caught.
+     On the SPARC, since RETURN_ADDR_OFFSET (essentially 8) is defined, we
+     need to preventively subtract it from the purported return address.  */
+  ra_location = &mctx->gregs[REG_PC];
+  shifted_ra_location = &mctx->gregs[REG_Y];
+  *(void **)shifted_ra_location = *(void **)ra_location - 8;
+  fs->retaddr_column = 0;
+  fs->regs.reg[0].how = REG_SAVED_OFFSET;
+  fs->regs.reg[0].loc.offset = (long)shifted_ra_location - new_cfa;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
new file mode 100644
index 000000000..4c8edaf1f
--- /dev/null
+++ b/gcc/config/sparc/sol2.h
@@ -0,0 +1,205 @@
+/* Definitions of target machine for GCC, for SPARC running Solaris 2
+   Copyright 1992, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005,
+   2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+   Contributed by Ron Guilmette (rfg@netcom.com).
+   Additional changes by David V. Henkel-Wallace (gumby@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Supposedly the same as vanilla sparc svr4, except for the stuff below: */
+
+/* This is here rather than in sparc.h because it's not known what
+   other assemblers will accept.  */
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plus"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusa"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+#undef ASM_CPU_DEFAULT_SPEC
+#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb"
+#endif
+
+#undef ASM_CPU_SPEC
+#define ASM_CPU_SPEC "\
+%{mcpu=v9:-xarch=v8plus} \
+%{mcpu=ultrasparc:-xarch=v8plusa} \
+%{mcpu=ultrasparc3:-xarch=v8plusb} \
+%{mcpu=niagara:-xarch=v8plusb} \
+%{mcpu=niagara2:-xarch=v8plusb} \
+%{!mcpu*:%(asm_cpu_default)} \
+"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  { "startfile_arch",	STARTFILE_ARCH_SPEC },	\
+  { "link_arch",	LINK_ARCH_SPEC }
+
+/* However it appears that Solaris 2.0 uses the same reg numbering as
+   the old BSD-style system did.  */
+
+/* The Solaris 2 assembler uses .skip, not .zero, so put this back.  */
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.skip %u\n", (int)(SIZE))
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%lu", (PREFIX), (unsigned long)(NUM))
+
+/* The native TLS-enabled assembler requires the directive #tls_object
+   to be put on objects in TLS sections (as of v7.1).  This is not
+   required by the GNU assembler but supported on SPARC.  */
+#undef  ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      HOST_WIDE_INT size;					\
+								\
+      if (targetm.have_tls && DECL_THREAD_LOCAL_P (DECL))	\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "tls_object");	\
+      else							\
+	ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");	\
+								\
+      size_directive_output = 0;				\
+      if (!flag_inhibit_size_directive				\
+	  && (DECL) && DECL_SIZE (DECL))			\
+	{							\
+	  size_directive_output = 1;				\
+	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
+	}							\
+								\
+      ASM_OUTPUT_LABEL (FILE, NAME);				\
+    }								\
+  while (0)
+
+/* The Solaris assembler cannot grok .stabd directives.  */
+#undef NO_DBX_BNSYM_ENSYM
+#define NO_DBX_BNSYM_ENSYM 1
+
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   crtend.o%s crtn.o%s"
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   Some Solaris dynamic linkers don't handle unaligned section relative
+   relocs properly, so force them to be aligned.  */
+#ifndef HAVE_AS_SPARC_UA_PCREL
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)		\
+  ((flag_pic || GLOBAL) ? DW_EH_PE_aligned : DW_EH_PE_absptr)
+#endif
+
+
+/* Define for support of TFmode long double.
+   SPARC ABI says that long double is 4 words.  */
+#define LONG_DOUBLE_TYPE_SIZE 128
+
+/* Solaris's _Qp_* library routine implementation clobbers the output
+   memory before the inputs are fully consumed.  */
+
+#undef TARGET_BUGGY_QP_LIB
+#define TARGET_BUGGY_QP_LIB	1
+
+#undef SUN_CONVERSION_LIBFUNCS
+#define SUN_CONVERSION_LIBFUNCS 1
+
+#undef DITF_CONVERSION_LIBFUNCS
+#define DITF_CONVERSION_LIBFUNCS 1
+
+#undef SUN_INTEGER_MULTIPLY_64
+#define SUN_INTEGER_MULTIPLY_64 1
+
+/* Solaris allows 64-bit out and global registers to be used in 32-bit mode.
+   sparc_override_options will disable V8+ if either not generating V9 code
+   or generating 64-bit code.  */
+#undef TARGET_DEFAULT
+#ifdef TARGET_64BIT_DEFAULT
+#define TARGET_DEFAULT \
+  (MASK_V9 + MASK_64BIT + MASK_PTR64 + MASK_STACK_BIAS + \
+   MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+#else
+#define TARGET_DEFAULT \
+  (MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128)
+#endif
+
+/* Solaris-specific #pragmas are implemented on top of attributes.  Hook in
+   the bits from config/sol2.c.  */
+#define SUBTARGET_INSERT_ATTRIBUTES solaris_insert_attributes
+#define SUBTARGET_ATTRIBUTE_TABLE SOLARIS_ATTRIBUTE_TABLE
+
+/* Register the Solaris-specific #pragma directives.  */
+#define REGISTER_TARGET_PRAGMAS() solaris_register_pragmas ()
+
+/* Output a simple call for .init/.fini.  */
+#define ASM_OUTPUT_CALL(FILE, FN)			        \
+  do								\
+    {								\
+      fprintf (FILE, "\tcall\t");				\
+      print_operand (FILE, XEXP (DECL_RTL (FN), 0), 0);	\
+      fprintf (FILE, "\n\tnop\n");				\
+    }								\
+  while (0)
+
+/* This is how to output an assembler line that says to advance
+   the location counter to a multiple of 2**LOG bytes using the
+   NOP instruction as padding.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE,LOG)   \
+  if ((LOG) != 0)                             \
+    fprintf (FILE, "\t.align %d,0x1000000\n", (1<<(LOG)))
+
+/* Use Solaris ELF section syntax.  */
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION sparc_solaris_elf_asm_named_section
+
+/* And SPARC non-standard pushsection syntax.  */
+#undef PUSHSECTION_FORMAT
+#define PUSHSECTION_FORMAT "\t.pushsection\t\"%s\"\n"
+
+/* Static stack checking is supported by means of probes.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
+
+#define MD_UNWIND_SUPPORT "config/sparc/sol2-unwind.h"
diff --git a/gcc/config/sparc/sp-elf.h b/gcc/config/sparc/sp-elf.h
new file mode 100644
index 000000000..d78eba3b5
--- /dev/null
+++ b/gcc/config/sparc/sp-elf.h
@@ -0,0 +1,69 @@
+/* Definitions of target machine for GCC,
+   for SPARC running in an embedded environment using the ELF file format.
+   Copyright (C) 2005, 2007, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc-elf)")
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* It's safe to pass -s always, even if -g is not used.  */
+#undef ASM_SPEC
+#define ASM_SPEC \
+  "-s \
+   %{fpic|fpie|fPIC|fPIE:-K PIC} %(asm_cpu)"
+
+/* Use the default.  */
+#undef LINK_SPEC
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   crtend.o%s crtn.o%s"
+
+/* Don't set the target flags, this is done by the linker script */
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM))
+
+/* ??? Inherited from sol2.h.  Probably wrong.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* ??? until fixed.  */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
diff --git a/gcc/config/sparc/sp64-elf.h b/gcc/config/sparc/sp64-elf.h
new file mode 100644
index 000000000..b21969386
--- /dev/null
+++ b/gcc/config/sparc/sp64-elf.h
@@ -0,0 +1,93 @@
+/* Definitions of target machine for GCC, for SPARC64, ELF.
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2004, 2005, 2007, 2010,
+   2011
+   Free Software Foundation, Inc.
+   Contributed by Doug Evans, dje@cygnus.com.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc64-elf)")
+
+/* A 64 bit v9 compiler in a Medium/Anywhere code model environment.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+(MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_HARD_QUAD \
+ + MASK_APP_REGS + MASK_FPU + MASK_STACK_BIAS + MASK_LONG_DOUBLE_128)
+
+#undef SPARC_DEFAULT_CMODEL
+#define SPARC_DEFAULT_CMODEL CM_EMBMEDANY
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+/* __svr4__ is used by the C library (FIXME) */
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC "-D__svr4__"
+
+#undef ASM_SPEC
+#define ASM_SPEC "\
+-s %{fpic|fPIC|fpie|fPIE:-K PIC} \
+%{mlittle-endian:-EL} \
+%(asm_cpu) %(asm_arch) \
+"
+
+/* This is taken from sol2.h.  */
+#undef LINK_SPEC
+#define LINK_SPEC "\
+%{v:-V} \
+%{mlittle-endian:-EL} \
+"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   crtend.o%s crtn.o%s"
+
+/* Use the default (for now).  */
+#undef LIB_SPEC
+
+#undef BYTES_BIG_ENDIAN
+#define BYTES_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
+
+#undef WORDS_BIG_ENDIAN
+#define WORDS_BIG_ENDIAN (! TARGET_LITTLE_ENDIAN)
+
+#undef  LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef  ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*.L%s%ld", (PREFIX), (long)(NUM))
+
+/* ??? This should be 32 bits for v9 but what can we do?  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 128
diff --git a/gcc/config/sparc/sparc-modes.def b/gcc/config/sparc/sparc-modes.def
new file mode 100644
index 000000000..628470086
--- /dev/null
+++ b/gcc/config/sparc/sparc-modes.def
@@ -0,0 +1,47 @@
+/* Definitions of target machine for GCC, for Sun SPARC.
+   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+   64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Add any extra modes needed to represent the condition code.
+
+   On the SPARC, we have a "no-overflow" mode which is used when an add or
+   subtract insn is used to set the condition code.  Different branches are
+   used in this case for some operations.
+
+   We also have two modes to indicate that the relevant condition code is
+   in the floating-point condition code register.  One for comparisons which
+   will generate an exception if the result is unordered (CCFPEmode) and
+   one for comparisons which will never trap (CCFPmode).
+
+   CCXmode and CCX_NOOVmode are only used by v9.  */
+
+CC_MODE (CCX);
+CC_MODE (CC_NOOV);
+CC_MODE (CCX_NOOV);
+CC_MODE (CCFP);
+CC_MODE (CCFPE);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 4);        /*       V4QI V2HI */
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
new file mode 100644
index 000000000..d37823f56
--- /dev/null
+++ b/gcc/config/sparc/sparc-protos.h
@@ -0,0 +1,108 @@
+/* Prototypes of target machine for SPARC.
+   Copyright (C) 1999, 2000, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __SPARC_PROTOS_H__
+#define __SPARC_PROTOS_H__
+
+#ifdef TREE_CODE
+#ifdef RTX_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+#endif
+extern unsigned long sparc_type_code (tree);
+#ifdef ARGS_SIZE_RTX
+/* expr.h defines ARGS_SIZE_RTX and `enum direction' */
+extern enum direction function_arg_padding (enum machine_mode, const_tree);
+#endif /* ARGS_SIZE_RTX */
+#endif /* TREE_CODE */
+
+extern void order_regs_for_local_alloc (void);
+extern HOST_WIDE_INT sparc_compute_frame_size (HOST_WIDE_INT, int);
+extern void sparc_expand_prologue (void);
+extern void sparc_expand_epilogue (void);
+extern bool sparc_can_use_return_insn_p (void);
+extern int check_pic (int);
+extern int short_branch (int, int);
+extern void sparc_profile_hook (int);
+extern void sparc_override_options (void);
+extern void sparc_output_scratch_registers (FILE *);
+
+#ifdef RTX_CODE
+extern enum machine_mode select_cc_mode (enum rtx_code, rtx, rtx);
+/* Define the function that build the compare insn for scc and bcc.  */
+extern rtx gen_compare_reg (rtx cmp);
+extern rtx sparc_emit_float_lib_cmp (rtx, rtx, enum rtx_code);
+extern void sparc_emit_floatunsdi (rtx [2], enum machine_mode);
+extern void sparc_emit_fixunsdi (rtx [2], enum machine_mode);
+extern void emit_tfmode_binop (enum rtx_code, rtx *);
+extern void emit_tfmode_unop (enum rtx_code, rtx *);
+extern void emit_tfmode_cvt (enum rtx_code, rtx *);
+extern bool legitimate_constant_p (rtx);
+extern bool constant_address_p (rtx);
+extern bool legitimate_pic_operand_p (rtx);
+extern rtx sparc_legitimize_reload_address (rtx, enum machine_mode, int, int,
+					    int, int *win);
+extern void sparc_emit_call_insn (rtx, rtx);
+extern void sparc_defer_case_vector (rtx, rtx, int);
+extern bool sparc_expand_move (enum machine_mode, rtx *);
+extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx);
+extern int sparc_splitdi_legitimate (rtx, rtx);
+extern int sparc_absnegfloat_split_legitimate (rtx, rtx);
+extern const char *output_ubranch (rtx, int, rtx);
+extern const char *output_cbranch (rtx, rtx, int, int, int, rtx);
+extern const char *output_return (rtx);
+extern const char *output_sibcall (rtx, rtx);
+extern const char *output_v8plus_shift (rtx *, rtx, const char *);
+extern const char *output_v9branch (rtx, rtx, int, int, int, int, rtx);
+extern const char *output_probe_stack_range (rtx, rtx);
+extern bool emit_scc_insn (rtx []);
+extern void emit_conditional_branch_insn (rtx []);
+extern void print_operand (FILE *, rtx, int);
+extern int mems_ok_for_ldd_peep (rtx, rtx, rtx);
+extern int arith_double_4096_operand (rtx, enum machine_mode);
+extern int arith_4096_operand (rtx, enum machine_mode);
+extern int zero_operand (rtx, enum machine_mode);
+extern int fp_zero_operand (rtx, enum machine_mode);
+extern int reg_or_0_operand (rtx, enum machine_mode);
+extern int empty_delay_slot (rtx);
+extern int eligible_for_return_delay (rtx);
+extern int eligible_for_sibcall_delay (rtx);
+extern int tls_call_delay (rtx);
+extern int emit_move_sequence (rtx, enum machine_mode);
+extern int fp_sethi_p (rtx);
+extern int fp_mov_p (rtx);
+extern int fp_high_losum_p (rtx);
+extern int mem_min_alignment (rtx, int);
+extern int pic_address_needs_scratch (rtx);
+extern int reg_unused_after (rtx, rtx);
+extern int register_ok_for_ldd (rtx);
+extern int memory_ok_for_ldd (rtx);
+extern int registers_ok_for_ldd_peep (rtx, rtx);
+extern int v9_regcmp_p (enum rtx_code);
+/* Function used for V8+ code generation.  Returns 1 if the high
+   32 bits of REG are 0 before INSN.  */   
+extern int sparc_check_64 (rtx, rtx);
+extern rtx gen_df_reg (rtx, int);
+extern void sparc_expand_compare_and_swap_12 (rtx, rtx, rtx, rtx);
+#endif /* RTX_CODE */
+
+#endif /* __SPARC_PROTOS_H__ */
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
new file mode 100644
index 000000000..9682609fe
--- /dev/null
+++ b/gcc/config/sparc/sparc.c
@@ -0,0 +1,9873 @@
+/* Subroutines for insn-output.c for SPARC.
+   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com)
+   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "function.h"
+#include "except.h"
+#include "expr.h"
+#include "optabs.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "tm_p.h"
+#include "debug.h"
+#include "target.h"
+#include "target-def.h"
+#include "cfglayout.h"
+#include "gimple.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "params.h"
+#include "df.h"
+#include "dwarf2out.h"
+
+/* Processor costs */
+static const
+struct processor_costs cypress_costs = {
+  COSTS_N_INSNS (2), /* int load */
+  COSTS_N_INSNS (2), /* int signed load */
+  COSTS_N_INSNS (2), /* int zeroed load */
+  COSTS_N_INSNS (2), /* float load */
+  COSTS_N_INSNS (5), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (5), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (7), /* fmul */
+  COSTS_N_INSNS (37), /* fdivs */
+  COSTS_N_INSNS (37), /* fdivd */
+  COSTS_N_INSNS (63), /* fsqrts */
+  COSTS_N_INSNS (63), /* fsqrtd */
+  COSTS_N_INSNS (1), /* imul */
+  COSTS_N_INSNS (1), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (1), /* idiv */
+  COSTS_N_INSNS (1), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs supersparc_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (0), /* float load */
+  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (3), /* fadd, fsub */
+  COSTS_N_INSNS (3), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (3), /* fmul */
+  COSTS_N_INSNS (6), /* fdivs */
+  COSTS_N_INSNS (9), /* fdivd */
+  COSTS_N_INSNS (12), /* fsqrts */
+  COSTS_N_INSNS (12), /* fsqrtd */
+  COSTS_N_INSNS (4), /* imul */
+  COSTS_N_INSNS (4), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (4), /* idiv */
+  COSTS_N_INSNS (4), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  1, /* shift penalty */
+};
+
+static const
+struct processor_costs hypersparc_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (8), /* fdivs */
+  COSTS_N_INSNS (12), /* fdivd */
+  COSTS_N_INSNS (17), /* fsqrts */
+  COSTS_N_INSNS (17), /* fsqrtd */
+  COSTS_N_INSNS (17), /* imul */
+  COSTS_N_INSNS (17), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (17), /* idiv */
+  COSTS_N_INSNS (17), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs leon_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (15), /* fdivs */
+  COSTS_N_INSNS (15), /* fdivd */
+  COSTS_N_INSNS (23), /* fsqrts */
+  COSTS_N_INSNS (23), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (5), /* idiv */
+  COSTS_N_INSNS (5), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs sparclet_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (1), /* fdivs */
+  COSTS_N_INSNS (1), /* fdivd */
+  COSTS_N_INSNS (1), /* fsqrts */
+  COSTS_N_INSNS (1), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (5), /* idiv */
+  COSTS_N_INSNS (5), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs ultrasparc_costs = {
+  COSTS_N_INSNS (2), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (2), /* int zeroed load */
+  COSTS_N_INSNS (2), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (4), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (2), /* fmov, fmovr */
+  COSTS_N_INSNS (4), /* fmul */
+  COSTS_N_INSNS (13), /* fdivs */
+  COSTS_N_INSNS (23), /* fdivd */
+  COSTS_N_INSNS (13), /* fsqrts */
+  COSTS_N_INSNS (23), /* fsqrtd */
+  COSTS_N_INSNS (4), /* imul */
+  COSTS_N_INSNS (4), /* imulX */
+  2, /* imul bit factor */
+  COSTS_N_INSNS (37), /* idiv */
+  COSTS_N_INSNS (68), /* idivX */
+  COSTS_N_INSNS (2), /* movcc/movr */
+  2, /* shift penalty */
+};
+
+static const
+struct processor_costs ultrasparc3_costs = {
+  COSTS_N_INSNS (2), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (2), /* float load */
+  COSTS_N_INSNS (3), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (4), /* fadd, fsub */
+  COSTS_N_INSNS (5), /* fcmp */
+  COSTS_N_INSNS (3), /* fmov, fmovr */
+  COSTS_N_INSNS (4), /* fmul */
+  COSTS_N_INSNS (17), /* fdivs */
+  COSTS_N_INSNS (20), /* fdivd */
+  COSTS_N_INSNS (20), /* fsqrts */
+  COSTS_N_INSNS (29), /* fsqrtd */
+  COSTS_N_INSNS (6), /* imul */
+  COSTS_N_INSNS (6), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (40), /* idiv */
+  COSTS_N_INSNS (71), /* idivX */
+  COSTS_N_INSNS (2), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (9), /* float load */
+  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (8), /* fadd, fsub */
+  COSTS_N_INSNS (26), /* fcmp */
+  COSTS_N_INSNS (8), /* fmov, fmovr */
+  COSTS_N_INSNS (29), /* fmul */
+  COSTS_N_INSNS (54), /* fdivs */
+  COSTS_N_INSNS (83), /* fdivd */
+  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
+  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
+  COSTS_N_INSNS (11), /* imul */
+  COSTS_N_INSNS (11), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (72), /* idiv */
+  COSTS_N_INSNS (72), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara2_costs = {
+  COSTS_N_INSNS (3), /* int load */
+  COSTS_N_INSNS (3), /* int signed load */
+  COSTS_N_INSNS (3), /* int zeroed load */
+  COSTS_N_INSNS (3), /* float load */
+  COSTS_N_INSNS (6), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (6), /* fadd, fsub */
+  COSTS_N_INSNS (6), /* fcmp */
+  COSTS_N_INSNS (6), /* fmov, fmovr */
+  COSTS_N_INSNS (6), /* fmul */
+  COSTS_N_INSNS (19), /* fdivs */
+  COSTS_N_INSNS (33), /* fdivd */
+  COSTS_N_INSNS (19), /* fsqrts */
+  COSTS_N_INSNS (33), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
+  COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+const struct processor_costs *sparc_costs = &cypress_costs;
+
+#ifdef HAVE_AS_RELAX_OPTION
+/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
+   "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
+   With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
+   somebody does not branch between the sethi and jmp.  */
+#define LEAF_SIBCALL_SLOT_RESERVED_P 1
+#else
+#define LEAF_SIBCALL_SLOT_RESERVED_P \
+  ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
+#endif
+
+/* Global variables for machine-dependent things.  */
+
+/* Size of frame.  Need to know this to emit return insns from leaf procedures.
+   ACTUAL_FSIZE is set by sparc_compute_frame_size() which is called during the
+   reload pass.  This is important as the value is later used for scheduling
+   (to see what can go in a delay slot).
+   APPARENT_FSIZE is the size of the stack less the register save area and less
+   the outgoing argument area.  It is used when saving call preserved regs.  */
+static HOST_WIDE_INT apparent_fsize;
+static HOST_WIDE_INT actual_fsize;
+
+/* Number of live general or floating point registers needed to be
+   saved (as 4-byte quantities).  */
+static int num_gfregs;
+
+/* Vector to say how input registers are mapped to output registers.
+   HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
+   eliminate it.  You must use -fomit-frame-pointer to get that.  */
+char leaf_reg_remap[] =
+{ 0, 1, 2, 3, 4, 5, 6, 7,
+  -1, -1, -1, -1, -1, -1, 14, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
+  8, 9, 10, 11, 12, 13, -1, 15,
+
+  32, 33, 34, 35, 36, 37, 38, 39,
+  40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55,
+  56, 57, 58, 59, 60, 61, 62, 63,
+  64, 65, 66, 67, 68, 69, 70, 71,
+  72, 73, 74, 75, 76, 77, 78, 79,
+  80, 81, 82, 83, 84, 85, 86, 87,
+  88, 89, 90, 91, 92, 93, 94, 95,
+  96, 97, 98, 99, 100};
+
+/* Vector, indexed by hard register number, which contains 1
+   for a register that is allowable in a candidate for leaf
+   function treatment.  */
+char sparc_leaf_regs[] =
+{ 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 1, 0,
+  0, 0, 0, 0, 0, 0, 0, 0,
+  1, 1, 1, 1, 1, 1, 0, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1};
+
+struct GTY(()) machine_function
+{
+  /* Some local-dynamic TLS symbol name.  */
+  const char *some_ld_name;
+
+  /* True if the current function is leaf and uses only leaf regs,
+     so that the SPARC leaf function optimization can be applied.
+     Private version of current_function_uses_only_leaf_regs, see
+     sparc_expand_prologue for the rationale.  */
+  int leaf_function_p;
+
+  /* True if the data calculated by sparc_expand_prologue are valid.  */
+  bool prologue_data_valid_p;
+};
+
+#define sparc_leaf_function_p  cfun->machine->leaf_function_p
+#define sparc_prologue_data_valid_p  cfun->machine->prologue_data_valid_p
+
+/* Register we pretend to think the frame pointer is allocated to.
+   Normally, this is %fp, but if we are in a leaf procedure, this
+   is %sp+"something".  We record "something" separately as it may
+   be too big for reg+constant addressing.  */
+static rtx frame_base_reg;
+static HOST_WIDE_INT frame_base_offset;
+
+/* 1 if the next opcode is to be specially indented.  */
+int sparc_indent_opcode = 0;
+
+static bool sparc_handle_option (size_t, const char *, int);
+static void sparc_option_override (void);
+static void sparc_init_modes (void);
+static void scan_record_type (const_tree, int *, int *, int *);
+static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool, bool, int *, int *);
+
+static int supersparc_adjust_cost (rtx, rtx, rtx, int);
+static int hypersparc_adjust_cost (rtx, rtx, rtx, int);
+
+static void sparc_emit_set_const32 (rtx, rtx);
+static void sparc_emit_set_const64 (rtx, rtx);
+static void sparc_output_addr_vec (rtx);
+static void sparc_output_addr_diff_vec (rtx);
+static void sparc_output_deferred_case_vectors (void);
+static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx sparc_builtin_saveregs (void);
+static int epilogue_renumber (rtx *, int);
+static bool sparc_assemble_integer (rtx, unsigned int, int);
+static int set_extends (rtx);
+static void load_got_register (void);
+static int save_or_restore_regs (int, int, rtx, int, int);
+static void emit_save_or_restore_regs (int);
+static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT);
+static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT);
+static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
+						 tree) ATTRIBUTE_UNUSED;
+static int sparc_adjust_cost (rtx, rtx, rtx, int);
+static int sparc_issue_rate (void);
+static void sparc_sched_init (FILE *, int, int);
+static int sparc_use_sched_lookahead (void);
+
+static void emit_soft_tfmode_libcall (const char *, int, rtx *);
+static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
+static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
+static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
+static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
+
+static bool sparc_function_ok_for_sibcall (tree, tree);
+static void sparc_init_libfuncs (void);
+static void sparc_init_builtins (void);
+static void sparc_vis_init_builtins (void);
+static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static tree sparc_fold_builtin (tree, int, tree *, bool);
+static int sparc_vis_mul8x16 (int, int);
+static tree sparc_handle_vis_mul8x16 (int, tree, tree, tree);
+static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				   HOST_WIDE_INT, tree);
+static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
+				       HOST_WIDE_INT, const_tree);
+static void sparc_reorg (void);
+static struct machine_function * sparc_init_machine_status (void);
+static bool sparc_cannot_force_const_mem (rtx);
+static rtx sparc_tls_get_addr (void);
+static rtx sparc_tls_got (void);
+static const char *get_some_local_dynamic_name (void);
+static int get_some_local_dynamic_name_1 (rtx *, void *);
+static bool sparc_rtx_costs (rtx, int, int, int *, bool);
+static rtx sparc_function_value (const_tree, const_tree, bool);
+static rtx sparc_libcall_value (enum machine_mode, const_rtx);
+static bool sparc_function_value_regno_p (const unsigned int);
+static rtx sparc_struct_value_rtx (tree, int);
+static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode,
+						      int *, const_tree, int);
+static bool sparc_return_in_memory (const_tree, const_tree);
+static bool sparc_strict_argument_naming (CUMULATIVE_ARGS *);
+static void sparc_va_start (tree, rtx);
+static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
+static bool sparc_vector_mode_supported_p (enum machine_mode);
+static bool sparc_tls_referenced_p (rtx);
+static rtx sparc_legitimize_tls_address (rtx);
+static rtx sparc_legitimize_pic_address (rtx, rtx);
+static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx sparc_delegitimize_address (rtx);
+static bool sparc_mode_dependent_address_p (const_rtx);
+static bool sparc_pass_by_reference (CUMULATIVE_ARGS *,
+				     enum machine_mode, const_tree, bool);
+static void sparc_function_arg_advance (CUMULATIVE_ARGS *,
+					enum machine_mode, const_tree, bool);
+static rtx sparc_function_arg_1 (const CUMULATIVE_ARGS *,
+				 enum machine_mode, const_tree, bool, bool);
+static rtx sparc_function_arg (CUMULATIVE_ARGS *,
+			       enum machine_mode, const_tree, bool);
+static rtx sparc_function_incoming_arg (CUMULATIVE_ARGS *,
+					enum machine_mode, const_tree, bool);
+static unsigned int sparc_function_arg_boundary (enum machine_mode,
+						 const_tree);
+static int sparc_arg_partial_bytes (CUMULATIVE_ARGS *,
+				    enum machine_mode, tree, bool);
+static void sparc_dwarf_handle_frame_unspec (const char *, rtx, int);
+static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static void sparc_file_end (void);
+static bool sparc_frame_pointer_required (void);
+static bool sparc_can_eliminate (const int, const int);
+static void sparc_conditional_register_usage (void);
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+static const char *sparc_mangle_type (const_tree);
+#endif
+static void sparc_trampoline_init (rtx, tree, rtx);
+static enum machine_mode sparc_preferred_simd_mode (enum machine_mode);
+
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+/* Table of valid machine attributes.  */
+static const struct attribute_spec sparc_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  SUBTARGET_ATTRIBUTE_TABLE,
+  { NULL,        0, 0, false, false, false, NULL }
+};
+#endif
+
+/* Option handling.  */
+
+/* Parsed value.  */
+enum cmodel sparc_cmodel;
+
+char sparc_hard_reg_printed[8];
+
+struct sparc_cpu_select sparc_select[] =
+{
+  /* switch	name,		tune	arch */
+  { (char *)0,	"default",	1,	1 },
+  { (char *)0,	"-mcpu=",	1,	1 },
+  { (char *)0,	"-mtune=",	1,	0 },
+  { 0, 0, 0, 0 }
+};
+
+/* CPU type.  This is set from TARGET_CPU_DEFAULT and -m{cpu,tune}=xxx.  */
+enum processor_type sparc_cpu;
+
+/* Whetheran FPU option was specified.  */
+static bool fpu_option_set = false;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options sparc_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Initialize the GCC target structure.  */
+
+/* The default is to use .half rather than .short for aligned HI objects.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
+
+/* The target hook has to handle DI-mode values.  */
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER sparc_assemble_integer
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT sparc_sched_init
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sparc_init_builtins
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN sparc_fold_builtin
+
+#if TARGET_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS sparc_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE sparc_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE sparc_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG sparc_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
+
+#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
+#define TARGET_DWARF_HANDLE_FRAME_UNSPEC sparc_dwarf_handle_frame_unspec
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
+#endif
+
+#undef TARGET_RELAXED_ORDERING
+#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION sparc_handle_option
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE sparc_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE sparc_option_optimization_table
+
+#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
+#endif
+
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END sparc_file_end
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE sparc_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE sparc_mangle_type
+#endif
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mfpu:
+    case OPT_mhard_float:
+    case OPT_msoft_float:
+      fpu_option_set = true;
+      break;
+
+    case OPT_mcpu_:
+      sparc_select[1].string = arg;
+      break;
+
+    case OPT_mtune_:
+      sparc_select[2].string = arg;
+      break;
+    }
+
+  return true;
+}
+
+/* Validate and override various options, and do some machine dependent
+   initialization.  */
+
+static void
+sparc_option_override (void)
+{
+  static struct code_model {
+    const char *const name;
+    const enum cmodel value;
+  } const cmodels[] = {
+    { "32", CM_32 },
+    { "medlow", CM_MEDLOW },
+    { "medmid", CM_MEDMID },
+    { "medany", CM_MEDANY },
+    { "embmedany", CM_EMBMEDANY },
+    { NULL, (enum cmodel) 0 }
+  };
+  const struct code_model *cmodel;
+  /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=.  */
+  static struct cpu_default {
+    const int cpu;
+    const char *const name;
+  } const cpu_default[] = {
+    /* There must be one entry here for each TARGET_CPU value.  */
+    { TARGET_CPU_sparc, "cypress" },
+    { TARGET_CPU_v8, "v8" },
+    { TARGET_CPU_supersparc, "supersparc" },
+    { TARGET_CPU_hypersparc, "hypersparc" },
+    { TARGET_CPU_leon, "leon" },
+    { TARGET_CPU_sparclite, "f930" },
+    { TARGET_CPU_sparclite86x, "sparclite86x" },
+    { TARGET_CPU_sparclet, "tsc701" },
+    { TARGET_CPU_v9, "v9" },
+    { TARGET_CPU_ultrasparc, "ultrasparc" },
+    { TARGET_CPU_ultrasparc3, "ultrasparc3" },
+    { TARGET_CPU_niagara, "niagara" },
+    { TARGET_CPU_niagara2, "niagara2" },
+    { 0, 0 }
+  };
+  const struct cpu_default *def;
+  /* Table of values for -m{cpu,tune}=.  */
+  static struct cpu_table {
+    const char *const name;
+    const enum processor_type processor;
+    const int disable;
+    const int enable;
+  } const cpu_table[] = {
+    { "v7",         PROCESSOR_V7, MASK_ISA, 0 },
+    { "cypress",    PROCESSOR_CYPRESS, MASK_ISA, 0 },
+    { "v8",         PROCESSOR_V8, MASK_ISA, MASK_V8 },
+    /* TI TMS390Z55 supersparc */
+    { "supersparc", PROCESSOR_SUPERSPARC, MASK_ISA, MASK_V8 },
+    { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
+    /* LEON */
+    { "leon",       PROCESSOR_LEON, MASK_ISA, MASK_V8|MASK_FPU },
+    { "sparclite",  PROCESSOR_SPARCLITE, MASK_ISA, MASK_SPARCLITE },
+    /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
+    { "f930",       PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
+    /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU.  */
+    { "f934",       PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
+    { "sparclite86x",  PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
+      MASK_SPARCLITE },
+    { "sparclet",   PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
+    /* TEMIC sparclet */
+    { "tsc701",     PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
+    { "v9",         PROCESSOR_V9, MASK_ISA, MASK_V9 },
+    /* UltraSPARC I, II, IIi */
+    { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA,
+    /* Although insns using %y are deprecated, it is a clear win.  */
+      MASK_V9|MASK_DEPRECATED_V8_INSNS},
+    /* UltraSPARC III */
+    /* ??? Check if %y issue still holds true.  */
+    { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA,
+      MASK_V9|MASK_DEPRECATED_V8_INSNS},
+    /* UltraSPARC T1 */
+    { "niagara", PROCESSOR_NIAGARA, MASK_ISA,
+      MASK_V9|MASK_DEPRECATED_V8_INSNS},
+    /* UltraSPARC T2 */
+    { "niagara2", PROCESSOR_NIAGARA2, MASK_ISA, MASK_V9},
+    { 0, (enum processor_type) 0, 0, 0 }
+  };
+  const struct cpu_table *cpu;
+  const struct sparc_cpu_select *sel;
+  int fpu;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifndef SPARC_BI_ARCH
+  /* Check for unsupported architecture size.  */
+  if (! TARGET_64BIT != DEFAULT_ARCH32_P)
+    error ("%s is not supported by this configuration",
+	   DEFAULT_ARCH32_P ? "-m64" : "-m32");
+#endif
+
+  /* We force all 64bit archs to use 128 bit long double */
+  if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
+    {
+      error ("-mlong-double-64 not allowed with -m64");
+      target_flags |= MASK_LONG_DOUBLE_128;
+    }
+
+  /* Code model selection.  */
+  sparc_cmodel = SPARC_DEFAULT_CMODEL;
+
+#ifdef SPARC_BI_ARCH
+  if (TARGET_ARCH32)
+    sparc_cmodel = CM_32;
+#endif
+
+  if (sparc_cmodel_string != NULL)
+    {
+      if (TARGET_ARCH64)
+	{
+	  for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
+	    if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
+	      break;
+	  if (cmodel->name == NULL)
+	    error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
+	  else
+	    sparc_cmodel = cmodel->value;
+	}
+      else
+	error ("-mcmodel= is not supported on 32 bit systems");
+    }
+
+  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
+
+  /* Set the default CPU.  */
+  for (def = &cpu_default[0]; def->name; ++def)
+    if (def->cpu == TARGET_CPU_DEFAULT)
+      break;
+  gcc_assert (def->name);
+  sparc_select[0].string = def->name;
+
+  for (sel = &sparc_select[0]; sel->name; ++sel)
+    {
+      if (sel->string)
+	{
+	  for (cpu = &cpu_table[0]; cpu->name; ++cpu)
+	    if (! strcmp (sel->string, cpu->name))
+	      {
+		if (sel->set_tune_p)
+		  sparc_cpu = cpu->processor;
+
+		if (sel->set_arch_p)
+		  {
+		    target_flags &= ~cpu->disable;
+		    target_flags |= cpu->enable;
+		  }
+		break;
+	      }
+
+	  if (! cpu->name)
+	    error ("bad value (%s) for %s switch", sel->string, sel->name);
+	}
+    }
+
+  /* If -mfpu or -mno-fpu was explicitly used, don't override with
+     the processor default.  */
+  if (fpu_option_set)
+    target_flags = (target_flags & ~MASK_FPU) | fpu;
+
+  /* Don't allow -mvis if FPU is disabled.  */
+  if (! TARGET_FPU)
+    target_flags &= ~MASK_VIS;
+
+  /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
+     are available.
+     -m64 also implies v9.  */
+  if (TARGET_VIS || TARGET_ARCH64)
+    {
+      target_flags |= MASK_V9;
+      target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
+    }
+
+  /* Use the deprecated v8 insns for sparc64 in 32 bit mode.  */
+  if (TARGET_V9 && TARGET_ARCH32)
+    target_flags |= MASK_DEPRECATED_V8_INSNS;
+
+  /* V8PLUS requires V9, makes no sense in 64 bit mode.  */
+  if (! TARGET_V9 || TARGET_ARCH64)
+    target_flags &= ~MASK_V8PLUS;
+
+  /* Don't use stack biasing in 32 bit mode.  */
+  if (TARGET_ARCH32)
+    target_flags &= ~MASK_STACK_BIAS;
+
+  /* Supply a default value for align_functions.  */
+  if (align_functions == 0
+      && (sparc_cpu == PROCESSOR_ULTRASPARC
+	  || sparc_cpu == PROCESSOR_ULTRASPARC3
+	  || sparc_cpu == PROCESSOR_NIAGARA
+	  || sparc_cpu == PROCESSOR_NIAGARA2))
+    align_functions = 32;
+
+  /* Validate PCC_STRUCT_RETURN.  */
+  if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
+    flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
+
+  /* Only use .uaxword when compiling for a 64-bit target.  */
+  if (!TARGET_ARCH64)
+    targetm.asm_out.unaligned_op.di = NULL;
+
+  /* Do various machine dependent initializations.  */
+  sparc_init_modes ();
+
+  /* Set up function hooks.  */
+  init_machine_status = sparc_init_machine_status;
+
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_V7:
+    case PROCESSOR_CYPRESS:
+      sparc_costs = &cypress_costs;
+      break;
+    case PROCESSOR_V8:
+    case PROCESSOR_SPARCLITE:
+    case PROCESSOR_SUPERSPARC:
+      sparc_costs = &supersparc_costs;
+      break;
+    case PROCESSOR_F930:
+    case PROCESSOR_F934:
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      sparc_costs = &hypersparc_costs;
+      break;
+    case PROCESSOR_LEON:
+      sparc_costs = &leon_costs;
+      break;
+    case PROCESSOR_SPARCLET:
+    case PROCESSOR_TSC701:
+      sparc_costs = &sparclet_costs;
+      break;
+    case PROCESSOR_V9:
+    case PROCESSOR_ULTRASPARC:
+      sparc_costs = &ultrasparc_costs;
+      break;
+    case PROCESSOR_ULTRASPARC3:
+      sparc_costs = &ultrasparc3_costs;
+      break;
+    case PROCESSOR_NIAGARA:
+      sparc_costs = &niagara_costs;
+      break;
+    case PROCESSOR_NIAGARA2:
+      sparc_costs = &niagara2_costs;
+      break;
+    };
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+
+  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			 ((sparc_cpu == PROCESSOR_ULTRASPARC
+			   || sparc_cpu == PROCESSOR_NIAGARA
+			   || sparc_cpu == PROCESSOR_NIAGARA2)
+			  ? 2
+			  : (sparc_cpu == PROCESSOR_ULTRASPARC3
+			     ? 8 : 3)),
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+			 ((sparc_cpu == PROCESSOR_ULTRASPARC
+			   || sparc_cpu == PROCESSOR_ULTRASPARC3
+			   || sparc_cpu == PROCESSOR_NIAGARA
+			   || sparc_cpu == PROCESSOR_NIAGARA2)
+			  ? 64 : 32),
+			 global_options.x_param_values,
+			 global_options_set.x_param_values);
+
+  /* Disable save slot sharing for call-clobbered registers by default.
+     The IRA sharing algorithm works on single registers only and this
+     pessimizes for double floating-point registers.  */
+  if (!global_options_set.x_flag_ira_share_save_slots)
+    flag_ira_share_save_slots = 0;
+}
+
+/* Miscellaneous utilities.  */
+
+/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
+   or branch on register contents instructions.  */
+
+int
+v9_regcmp_p (enum rtx_code code)
+{
+  return (code == EQ || code == NE || code == GE || code == LT
+	  || code == LE || code == GT);
+}
+
+/* Nonzero if OP is a floating point constant which can
+   be loaded into an integer register using a single
+   sethi instruction.  */
+
+int
+fp_sethi_p (rtx op)
+{
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
+    }
+
+  return 0;
+}
+
+/* Nonzero if OP is a floating point constant which can
+   be loaded into an integer register using a single
+   mov instruction.  */
+
+int
+fp_mov_p (rtx op)
+{
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      return SPARC_SIMM13_P (i);
+    }
+
+  return 0;
+}
+
+/* Nonzero if OP is a floating point constant which can
+   be loaded into an integer register using a high/losum
+   instruction sequence.  */
+
+int
+fp_high_losum_p (rtx op)
+{
+  /* The constraints calling this should only be in
+     SFmode move insns, so any constant which cannot
+     be moved using a single insn will do.  */
+  if (GET_CODE (op) == CONST_DOUBLE)
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
+    }
+
+  return 0;
+}
+
+/* Return true if the address of LABEL can be loaded by means of the
+   mov{si,di}_pic_label_ref patterns in PIC mode.  */
+
+static bool
+can_use_mov_pic_label_ref (rtx label)
+{
+  /* VxWorks does not impose a fixed gap between segments; the run-time
+     gap can be different from the object-file gap.  We therefore can't
+     assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
+     are absolutely sure that X is in the same segment as the GOT.
+     Unfortunately, the flexibility of linker scripts means that we
+     can't be sure of that in general, so assume that GOT-relative
+     accesses are never valid on VxWorks.  */
+  if (TARGET_VXWORKS_RTP)
+    return false;
+
+  /* Similarly, if the label is non-local, it might end up being placed
+     in a different section than the current one; now mov_pic_label_ref
+     requires the label and the code to be in the same section.  */
+  if (LABEL_REF_NONLOCAL_P (label))
+    return false;
+
+  /* Finally, if we are reordering basic blocks and partition into hot
+     and cold sections, this might happen for any label.  */
+  if (flag_reorder_blocks_and_partition)
+    return false;
+
+  return true;
+}
+
+/* Expand a move instruction.  Return true if all work is done.  */
+
+bool
+sparc_expand_move (enum machine_mode mode, rtx *operands)
+{
+  /* Handle sets of MEM first.  */
+  if (GET_CODE (operands[0]) == MEM)
+    {
+      /* 0 is a register (or a pair of registers) on SPARC.  */
+      if (register_or_zero_operand (operands[1], mode))
+	return false;
+
+      if (!reload_in_progress)
+	{
+	  operands[0] = validize_mem (operands[0]);
+	  operands[1] = force_reg (mode, operands[1]);
+	}
+    }
+
+  /* Fixup TLS cases.  */
+  if (TARGET_HAVE_TLS
+      && CONSTANT_P (operands[1])
+      && sparc_tls_referenced_p (operands [1]))
+    {
+      operands[1] = sparc_legitimize_tls_address (operands[1]);
+      return false;
+    }
+
+  /* Fixup PIC cases.  */
+  if (flag_pic && CONSTANT_P (operands[1]))
+    {
+      if (pic_address_needs_scratch (operands[1]))
+	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
+
+      /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
+      if (GET_CODE (operands[1]) == LABEL_REF
+	  && can_use_mov_pic_label_ref (operands[1]))
+	{
+	  if (mode == SImode)
+	    {
+	      emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
+	      return true;
+	    }
+
+	  if (mode == DImode)
+	    {
+	      gcc_assert (TARGET_ARCH64);
+	      emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
+	      return true;
+	    }
+	}
+
+      if (symbolic_operand (operands[1], mode))
+	{
+	  operands[1]
+	    = sparc_legitimize_pic_address (operands[1],
+					    reload_in_progress
+					    ? operands[0] : NULL_RTX);
+	  return false;
+	}
+    }
+
+  /* If we are trying to toss an integer constant into FP registers,
+     or loading a FP or vector constant, force it into memory.  */
+  if (CONSTANT_P (operands[1])
+      && REG_P (operands[0])
+      && (SPARC_FP_REG_P (REGNO (operands[0]))
+	  || SCALAR_FLOAT_MODE_P (mode)
+	  || VECTOR_MODE_P (mode)))
+    {
+      /* emit_group_store will send such bogosity to us when it is
+         not storing directly into memory.  So fix this up to avoid
+         crashes in output_constant_pool.  */
+      if (operands [1] == const0_rtx)
+	operands[1] = CONST0_RTX (mode);
+
+      /* We can clear FP registers if TARGET_VIS, and always other regs.  */
+      if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
+	  && const_zero_operand (operands[1], mode))
+	return false;
+
+      if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
+	  /* We are able to build any SF constant in integer registers
+	     with at most 2 instructions.  */
+	  && (mode == SFmode
+	      /* And any DF constant in integer registers.  */
+	      || (mode == DFmode
+		  && (reload_completed || reload_in_progress))))
+	return false;
+
+      operands[1] = force_const_mem (mode, operands[1]);
+      if (!reload_in_progress)
+	operands[1] = validize_mem (operands[1]);
+      return false;
+    }
+
+  /* Accept non-constants and valid constants unmodified.  */
+  if (!CONSTANT_P (operands[1])
+      || GET_CODE (operands[1]) == HIGH
+      || input_operand (operands[1], mode))
+    return false;
+
+  switch (mode)
+    {
+    case QImode:
+      /* All QImode constants require only one insn, so proceed.  */
+      break;
+
+    case HImode:
+    case SImode:
+      sparc_emit_set_const32 (operands[0], operands[1]);
+      return true;
+
+    case DImode:
+      /* input_operand should have filtered out 32-bit mode.  */
+      sparc_emit_set_const64 (operands[0], operands[1]);
+      return true;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return false;
+}
+
+/* Load OP1, a 32-bit constant, into OP0, a register.
+   We know it can't be done in one insn when we get
+   here, the move expander guarantees this.  */
+
+static void
+sparc_emit_set_const32 (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx temp;
+
+  if (reload_in_progress || reload_completed)
+    temp = op0;
+  else
+    temp = gen_reg_rtx (mode);
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      gcc_assert (!small_int_operand (op1, mode)
+		  && !const_high_operand (op1, mode));
+
+      /* Emit them as real moves instead of a HIGH/LO_SUM,
+	 this way CSE can see everything and reuse intermediate
+	 values if it wants.  */
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      GEN_INT (INTVAL (op1)
+			        & ~(HOST_WIDE_INT)0x3ff)));
+
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      op0,
+			      gen_rtx_IOR (mode, temp,
+					   GEN_INT (INTVAL (op1) & 0x3ff))));
+    }
+  else
+    {
+      /* A symbol, emit in the traditional way.  */
+      emit_insn (gen_rtx_SET (VOIDmode, temp,
+			      gen_rtx_HIGH (mode, op1)));
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      op0, gen_rtx_LO_SUM (mode, temp, op1)));
+    }
+}
+
+/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
+   If TEMP is nonzero, we are forbidden to use any other scratch
+   registers.  Otherwise, we are allowed to generate them as needed.
+
+   Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
+   or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns).  */
+
+void
+sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
+{
+  rtx temp1, temp2, temp3, temp4, temp5;
+  rtx ti_temp = 0;
+
+  if (temp && GET_MODE (temp) == TImode)
+    {
+      ti_temp = temp;
+      temp = gen_rtx_REG (DImode, REGNO (temp));
+    }
+
+  /* SPARC-V9 code-model support.  */
+  switch (sparc_cmodel)
+    {
+    case CM_MEDLOW:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable must be in the low 4TB of the virtual address
+	 space.
+
+	 sethi	%hi(symbol), %temp1
+	 or	%temp1, %lo(symbol), %reg  */
+      if (temp)
+	temp1 = temp;  /* op0 is allowed.  */
+      else
+	temp1 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1)));
+      emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
+      break;
+
+    case CM_MEDMID:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable must be in the low 16TB of the virtual address
+	 space.
+
+	 sethi	%h44(symbol), %temp1
+	 or	%temp1, %m44(symbol), %temp2
+	 sllx	%temp2, 12, %temp3
+	 or	%temp3, %l44(symbol), %reg  */
+      if (temp)
+	{
+	  temp1 = op0;
+	  temp2 = op0;
+	  temp3 = temp;  /* op0 is allowed.  */
+	}
+      else
+	{
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+	  temp3 = gen_reg_rtx (DImode);
+	}
+
+      emit_insn (gen_seth44 (temp1, op1));
+      emit_insn (gen_setm44 (temp2, temp1, op1));
+      emit_insn (gen_rtx_SET (VOIDmode, temp3,
+			      gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
+      emit_insn (gen_setl44 (op0, temp3, op1));
+      break;
+
+    case CM_MEDANY:
+      /* The range spanned by all instructions in the object is less
+	 than 2^31 bytes (2GB) and the distance from any instruction
+	 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
+	 than 2^31 bytes (2GB).
+
+	 The executable can be placed anywhere in the virtual address
+	 space.
+
+	 sethi	%hh(symbol), %temp1
+	 sethi	%lm(symbol), %temp2
+	 or	%temp1, %hm(symbol), %temp3
+	 sllx	%temp3, 32, %temp4
+	 or	%temp4, %temp2, %temp5
+	 or	%temp5, %lo(symbol), %reg  */
+      if (temp)
+	{
+	  /* It is possible that one of the registers we got for operands[2]
+	     might coincide with that of operands[0] (which is why we made
+	     it TImode).  Pick the other one to use as our scratch.  */
+	  if (rtx_equal_p (temp, op0))
+	    {
+	      gcc_assert (ti_temp);
+	      temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
+	    }
+	  temp1 = op0;
+	  temp2 = temp;  /* op0 is _not_ allowed, see above.  */
+	  temp3 = op0;
+	  temp4 = op0;
+	  temp5 = op0;
+	}
+      else
+	{
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+	  temp3 = gen_reg_rtx (DImode);
+	  temp4 = gen_reg_rtx (DImode);
+	  temp5 = gen_reg_rtx (DImode);
+	}
+
+      emit_insn (gen_sethh (temp1, op1));
+      emit_insn (gen_setlm (temp2, op1));
+      emit_insn (gen_sethm (temp3, temp1, op1));
+      emit_insn (gen_rtx_SET (VOIDmode, temp4,
+			      gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
+      emit_insn (gen_rtx_SET (VOIDmode, temp5,
+			      gen_rtx_PLUS (DImode, temp4, temp2)));
+      emit_insn (gen_setlo (op0, temp5, op1));
+      break;
+
+    case CM_EMBMEDANY:
+      /* Old old old backwards compatibility kruft here.
+	 Essentially it is MEDLOW with a fixed 64-bit
+	 virtual base added to all data segment addresses.
+	 Text-segment stuff is computed like MEDANY, we can't
+	 reuse the code above because the relocation knobs
+	 look different.
+
+	 Data segment:	sethi	%hi(symbol), %temp1
+			add	%temp1, EMBMEDANY_BASE_REG, %temp2
+			or	%temp2, %lo(symbol), %reg  */
+      if (data_segment_operand (op1, GET_MODE (op1)))
+	{
+	  if (temp)
+	    {
+	      temp1 = temp;  /* op0 is allowed.  */
+	      temp2 = op0;
+	    }
+	  else
+	    {
+	      temp1 = gen_reg_rtx (DImode);
+	      temp2 = gen_reg_rtx (DImode);
+	    }
+
+	  emit_insn (gen_embmedany_sethi (temp1, op1));
+	  emit_insn (gen_embmedany_brsum (temp2, temp1));
+	  emit_insn (gen_embmedany_losum (op0, temp2, op1));
+	}
+
+      /* Text segment:	sethi	%uhi(symbol), %temp1
+			sethi	%hi(symbol), %temp2
+			or	%temp1, %ulo(symbol), %temp3
+			sllx	%temp3, 32, %temp4
+			or	%temp4, %temp2, %temp5
+			or	%temp5, %lo(symbol), %reg  */
+      else
+	{
+	  if (temp)
+	    {
+	      /* It is possible that one of the registers we got for operands[2]
+		 might coincide with that of operands[0] (which is why we made
+		 it TImode).  Pick the other one to use as our scratch.  */
+	      if (rtx_equal_p (temp, op0))
+		{
+		  gcc_assert (ti_temp);
+		  temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
+		}
+	      temp1 = op0;
+	      temp2 = temp;  /* op0 is _not_ allowed, see above.  */
+	      temp3 = op0;
+	      temp4 = op0;
+	      temp5 = op0;
+	    }
+	  else
+	    {
+	      temp1 = gen_reg_rtx (DImode);
+	      temp2 = gen_reg_rtx (DImode);
+	      temp3 = gen_reg_rtx (DImode);
+	      temp4 = gen_reg_rtx (DImode);
+	      temp5 = gen_reg_rtx (DImode);
+	    }
+
+	  emit_insn (gen_embmedany_textuhi (temp1, op1));
+	  emit_insn (gen_embmedany_texthi  (temp2, op1));
+	  emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
+	  emit_insn (gen_rtx_SET (VOIDmode, temp4,
+				  gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
+	  emit_insn (gen_rtx_SET (VOIDmode, temp5,
+				  gen_rtx_PLUS (DImode, temp4, temp2)));
+	  emit_insn (gen_embmedany_textlo  (op0, temp5, op1));
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+#if HOST_BITS_PER_WIDE_INT == 32
+static void
+sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+#else
+/* These avoid problems when cross compiling.  If we do not
+   go through all this hair then the optimizer will see
+   invalid REG_EQUAL notes or in some cases none at all.  */
+static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
+static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
+static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
+static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
+
+/* The optimizer is not to assume anything about exactly
+   which bits are set for a HIGH, they are unspecified.
+   Unfortunately this leads to many missed optimizations
+   during CSE.  We mask out the non-HIGH bits, and matches
+   a plain movdi, to alleviate this problem.  */
+static rtx
+gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
+{
+  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
+}
+
+static rtx
+gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
+{
+  return gen_rtx_SET (VOIDmode, dest, GEN_INT (val));
+}
+
+static rtx
+gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
+{
+  return gen_rtx_IOR (DImode, src, GEN_INT (val));
+}
+
+static rtx
+gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
+{
+  return gen_rtx_XOR (DImode, src, GEN_INT (val));
+}
+
+/* Worker routines for 64-bit constant formation on arch64.
+   One of the key things to be doing in these emissions is
+   to create as many temp REGs as possible.  This makes it
+   possible for half-built constants to be used later when
+   such values are similar to something required later on.
+   Without doing this, the optimizer cannot see such
+   opportunities.  */
+
+static void sparc_emit_set_const64_quick1 (rtx, rtx,
+					   unsigned HOST_WIDE_INT, int);
+
+static void
+sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
+			       unsigned HOST_WIDE_INT low_bits, int is_neg)
+{
+  unsigned HOST_WIDE_INT high_bits;
+
+  if (is_neg)
+    high_bits = (~low_bits) & 0xffffffff;
+  else
+    high_bits = low_bits;
+
+  emit_insn (gen_safe_HIGH64 (temp, high_bits));
+  if (!is_neg)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+    }
+  else
+    {
+      /* If we are XOR'ing with -1, then we should emit a one's complement
+	 instead.  This way the combiner will notice logical operations
+	 such as ANDN later on and substitute.  */
+      if ((low_bits & 0x3ff) == 0x3ff)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_NOT (DImode, temp)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_safe_XOR64 (temp,
+						  (-(HOST_WIDE_INT)0x400
+						   | (low_bits & 0x3ff)))));
+	}
+    }
+}
+
+static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
+					   unsigned HOST_WIDE_INT, int);
+
+static void
+sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
+			       unsigned HOST_WIDE_INT high_bits,
+			       unsigned HOST_WIDE_INT low_immediate,
+			       int shift_count)
+{
+  rtx temp2 = op0;
+
+  if ((high_bits & 0xfffffc00) != 0)
+    {
+      emit_insn (gen_safe_HIGH64 (temp, high_bits));
+      if ((high_bits & ~0xfffffc00) != 0)
+	emit_insn (gen_rtx_SET (VOIDmode, op0,
+				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+      else
+	temp2 = temp;
+    }
+  else
+    {
+      emit_insn (gen_safe_SET64 (temp, high_bits));
+      temp2 = temp;
+    }
+
+  /* Now shift it up into place.  */
+  emit_insn (gen_rtx_SET (VOIDmode, op0,
+			  gen_rtx_ASHIFT (DImode, temp2,
+					  GEN_INT (shift_count))));
+
+  /* If there is a low immediate part piece, finish up by
+     putting that in as well.  */
+  if (low_immediate != 0)
+    emit_insn (gen_rtx_SET (VOIDmode, op0,
+			    gen_safe_OR64 (op0, low_immediate)));
+}
+
+static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
+					    unsigned HOST_WIDE_INT);
+
+/* Full 64-bit constant decomposition.  Even though this is the
+   'worst' case, we still optimize a few things away.  */
+static void
+sparc_emit_set_const64_longway (rtx op0, rtx temp,
+				unsigned HOST_WIDE_INT high_bits,
+				unsigned HOST_WIDE_INT low_bits)
+{
+  rtx sub_temp;
+
+  if (reload_in_progress || reload_completed)
+    sub_temp = op0;
+  else
+    sub_temp = gen_reg_rtx (DImode);
+
+  if ((high_bits & 0xfffffc00) != 0)
+    {
+      emit_insn (gen_safe_HIGH64 (temp, high_bits));
+      if ((high_bits & ~0xfffffc00) != 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				sub_temp,
+				gen_safe_OR64 (temp, (high_bits & 0x3ff))));
+      else
+	sub_temp = temp;
+    }
+  else
+    {
+      emit_insn (gen_safe_SET64 (temp, high_bits));
+      sub_temp = temp;
+    }
+
+  if (!reload_in_progress && !reload_completed)
+    {
+      rtx temp2 = gen_reg_rtx (DImode);
+      rtx temp3 = gen_reg_rtx (DImode);
+      rtx temp4 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, temp4,
+			      gen_rtx_ASHIFT (DImode, sub_temp,
+					      GEN_INT (32))));
+
+      emit_insn (gen_safe_HIGH64 (temp2, low_bits));
+      if ((low_bits & ~0xfffffc00) != 0)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, temp3,
+				  gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, temp4, temp3)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_PLUS (DImode, temp4, temp2)));
+	}
+    }
+  else
+    {
+      rtx low1 = GEN_INT ((low_bits >> (32 - 12))          & 0xfff);
+      rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12))     & 0xfff);
+      rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
+      int to_shift = 12;
+
+      /* We are in the middle of reload, so this is really
+	 painful.  However we do still make an attempt to
+	 avoid emitting truly stupid code.  */
+      if (low1 != const0_rtx)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, sub_temp,
+						  GEN_INT (to_shift))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_IOR (DImode, op0, low1)));
+	  sub_temp = op0;
+	  to_shift = 12;
+	}
+      else
+	{
+	  to_shift += 12;
+	}
+      if (low2 != const0_rtx)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_ASHIFT (DImode, sub_temp,
+						  GEN_INT (to_shift))));
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_IOR (DImode, op0, low2)));
+	  sub_temp = op0;
+	  to_shift = 8;
+	}
+      else
+	{
+	  to_shift += 8;
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, op0,
+			      gen_rtx_ASHIFT (DImode, sub_temp,
+					      GEN_INT (to_shift))));
+      if (low3 != const0_rtx)
+	emit_insn (gen_rtx_SET (VOIDmode, op0,
+				gen_rtx_IOR (DImode, op0, low3)));
+      /* phew...  */
+    }
+}
+
+/* Analyze a 64-bit constant for certain properties.  */
+static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
+				    unsigned HOST_WIDE_INT,
+				    int *, int *, int *);
+
+static void
+analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
+			unsigned HOST_WIDE_INT low_bits,
+			int *hbsp, int *lbsp, int *abbasp)
+{
+  int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+  int i;
+
+  lowest_bit_set = highest_bit_set = -1;
+  i = 0;
+  do
+    {
+      if ((lowest_bit_set == -1)
+	  && ((low_bits >> i) & 1))
+	lowest_bit_set = i;
+      if ((highest_bit_set == -1)
+	  && ((high_bits >> (32 - i - 1)) & 1))
+	highest_bit_set = (64 - i - 1);
+    }
+  while (++i < 32
+	 && ((highest_bit_set == -1)
+	     || (lowest_bit_set == -1)));
+  if (i == 32)
+    {
+      i = 0;
+      do
+	{
+	  if ((lowest_bit_set == -1)
+	      && ((high_bits >> i) & 1))
+	    lowest_bit_set = i + 32;
+	  if ((highest_bit_set == -1)
+	      && ((low_bits >> (32 - i - 1)) & 1))
+	    highest_bit_set = 32 - i - 1;
+	}
+      while (++i < 32
+	     && ((highest_bit_set == -1)
+		 || (lowest_bit_set == -1)));
+    }
+  /* If there are no bits set this should have gone out
+     as one instruction!  */
+  gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
+  all_bits_between_are_set = 1;
+  for (i = lowest_bit_set; i <= highest_bit_set; i++)
+    {
+      if (i < 32)
+	{
+	  if ((low_bits & (1 << i)) != 0)
+	    continue;
+	}
+      else
+	{
+	  if ((high_bits & (1 << (i - 32))) != 0)
+	    continue;
+	}
+      all_bits_between_are_set = 0;
+      break;
+    }
+  *hbsp = highest_bit_set;
+  *lbsp = lowest_bit_set;
+  *abbasp = all_bits_between_are_set;
+}
+
+static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
+
+static int
+const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
+		   unsigned HOST_WIDE_INT low_bits)
+{
+  int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+  if (high_bits == 0
+      || high_bits == 0xffffffff)
+    return 1;
+
+  analyze_64bit_constant (high_bits, low_bits,
+			  &highest_bit_set, &lowest_bit_set,
+			  &all_bits_between_are_set);
+
+  if ((highest_bit_set == 63
+       || lowest_bit_set == 0)
+      && all_bits_between_are_set != 0)
+    return 1;
+
+  if ((highest_bit_set - lowest_bit_set) < 21)
+    return 1;
+
+  return 0;
+}
+
+static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
+							unsigned HOST_WIDE_INT,
+							int, int);
+
+static unsigned HOST_WIDE_INT
+create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
+			  unsigned HOST_WIDE_INT low_bits,
+			  int lowest_bit_set, int shift)
+{
+  HOST_WIDE_INT hi, lo;
+
+  if (lowest_bit_set < 32)
+    {
+      lo = (low_bits >> lowest_bit_set) << shift;
+      hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+    }
+  else
+    {
+      lo = 0;
+      hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+    }
+  gcc_assert (! (hi & lo));
+  return (hi | lo);
+}
+
+/* Here we are sure to be arch64 and this is an integer constant
+   being loaded into a register.  Emit the most efficient
+   insn sequence possible.  Detection of all the 1-insn cases
+   has been done already.  */
+static void
+sparc_emit_set_const64 (rtx op0, rtx op1)
+{
+  unsigned HOST_WIDE_INT high_bits, low_bits;
+  int lowest_bit_set, highest_bit_set;
+  int all_bits_between_are_set;
+  rtx temp = 0;
+
+  /* Sanity check that we know what we are working with.  */
+  gcc_assert (TARGET_ARCH64
+	      && (GET_CODE (op0) == SUBREG
+		  || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
+
+  if (reload_in_progress || reload_completed)
+    temp = op0;
+
+  if (GET_CODE (op1) != CONST_INT)
+    {
+      sparc_emit_set_symbolic_const64 (op0, op1, temp);
+      return;
+    }
+
+  if (! temp)
+    temp = gen_reg_rtx (DImode);
+
+  high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
+  low_bits = (INTVAL (op1) & 0xffffffff);
+
+  /* low_bits	bits 0  --> 31
+     high_bits	bits 32 --> 63  */
+
+  analyze_64bit_constant (high_bits, low_bits,
+			  &highest_bit_set, &lowest_bit_set,
+			  &all_bits_between_are_set);
+
+  /* First try for a 2-insn sequence.  */
+
+  /* These situations are preferred because the optimizer can
+   * do more things with them:
+   * 1) mov	-1, %reg
+   *    sllx	%reg, shift, %reg
+   * 2) mov	-1, %reg
+   *    srlx	%reg, shift, %reg
+   * 3) mov	some_small_const, %reg
+   *    sllx	%reg, shift, %reg
+   */
+  if (((highest_bit_set == 63
+	|| lowest_bit_set == 0)
+       && all_bits_between_are_set != 0)
+      || ((highest_bit_set - lowest_bit_set) < 12))
+    {
+      HOST_WIDE_INT the_const = -1;
+      int shift = lowest_bit_set;
+
+      if ((highest_bit_set != 63
+	   && lowest_bit_set != 0)
+	  || all_bits_between_are_set == 0)
+	{
+	  the_const =
+	    create_simple_focus_bits (high_bits, low_bits,
+				      lowest_bit_set, 0);
+	}
+      else if (lowest_bit_set == 0)
+	shift = -(63 - highest_bit_set);
+
+      gcc_assert (SPARC_SIMM13_P (the_const));
+      gcc_assert (shift != 0);
+
+      emit_insn (gen_safe_SET64 (temp, the_const));
+      if (shift > 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_ASHIFT (DImode,
+						temp,
+						GEN_INT (shift))));
+      else if (shift < 0)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_LSHIFTRT (DImode,
+						  temp,
+						  GEN_INT (-shift))));
+      return;
+    }
+
+  /* Now a range of 22 or less bits set somewhere.
+   * 1) sethi	%hi(focus_bits), %reg
+   *    sllx	%reg, shift, %reg
+   * 2) sethi	%hi(focus_bits), %reg
+   *    srlx	%reg, shift, %reg
+   */
+  if ((highest_bit_set - lowest_bit_set) < 21)
+    {
+      unsigned HOST_WIDE_INT focus_bits =
+	create_simple_focus_bits (high_bits, low_bits,
+				  lowest_bit_set, 10);
+
+      gcc_assert (SPARC_SETHI_P (focus_bits));
+      gcc_assert (lowest_bit_set != 10);
+
+      emit_insn (gen_safe_HIGH64 (temp, focus_bits));
+
+      /* If lowest_bit_set == 10 then a sethi alone could have done it.  */
+      if (lowest_bit_set < 10)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_LSHIFTRT (DImode, temp,
+						  GEN_INT (10 - lowest_bit_set))));
+      else if (lowest_bit_set > 10)
+	emit_insn (gen_rtx_SET (VOIDmode,
+				op0,
+				gen_rtx_ASHIFT (DImode, temp,
+						GEN_INT (lowest_bit_set - 10))));
+      return;
+    }
+
+  /* 1) sethi	%hi(low_bits), %reg
+   *    or	%reg, %lo(low_bits), %reg
+   * 2) sethi	%hi(~low_bits), %reg
+   *	xor	%reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
+   */
+  if (high_bits == 0
+      || high_bits == 0xffffffff)
+    {
+      sparc_emit_set_const64_quick1 (op0, temp, low_bits,
+				     (high_bits == 0xffffffff));
+      return;
+    }
+
+  /* Now, try 3-insn sequences.  */
+
+  /* 1) sethi	%hi(high_bits), %reg
+   *    or	%reg, %lo(high_bits), %reg
+   *    sllx	%reg, 32, %reg
+   */
+  if (low_bits == 0)
+    {
+      sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
+      return;
+    }
+
+  /* We may be able to do something quick
+     when the constant is negated, so try that.  */
+  if (const64_is_2insns ((~high_bits) & 0xffffffff,
+			 (~low_bits) & 0xfffffc00))
+    {
+      /* NOTE: The trailing bits get XOR'd so we need the
+	 non-negated bits, not the negated ones.  */
+      unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
+
+      if ((((~high_bits) & 0xffffffff) == 0
+	   && ((~low_bits) & 0x80000000) == 0)
+	  || (((~high_bits) & 0xffffffff) == 0xffffffff
+	      && ((~low_bits) & 0x80000000) != 0))
+	{
+	  unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
+
+	  if ((SPARC_SETHI_P (fast_int)
+	       && (~high_bits & 0xffffffff) == 0)
+	      || SPARC_SIMM13_P (fast_int))
+	    emit_insn (gen_safe_SET64 (temp, fast_int));
+	  else
+	    sparc_emit_set_const64 (temp, GEN_INT (fast_int));
+	}
+      else
+	{
+	  rtx negated_const;
+	  negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
+				   (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
+	  sparc_emit_set_const64 (temp, negated_const);
+	}
+
+      /* If we are XOR'ing with -1, then we should emit a one's complement
+	 instead.  This way the combiner will notice logical operations
+	 such as ANDN later on and substitute.  */
+      if (trailing_bits == 0x3ff)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0,
+				  gen_rtx_NOT (DImode, temp)));
+	}
+      else
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  op0,
+				  gen_safe_XOR64 (temp,
+						  (-0x400 | trailing_bits))));
+	}
+      return;
+    }
+
+  /* 1) sethi	%hi(xxx), %reg
+   *    or	%reg, %lo(xxx), %reg
+   *	sllx	%reg, yyy, %reg
+   *
+   * ??? This is just a generalized version of the low_bits==0
+   * thing above, FIXME...
+   */
+  if ((highest_bit_set - lowest_bit_set) < 32)
+    {
+      unsigned HOST_WIDE_INT focus_bits =
+	create_simple_focus_bits (high_bits, low_bits,
+				  lowest_bit_set, 0);
+
+      /* We can't get here in this state.  */
+      gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
+
+      /* So what we know is that the set bits straddle the
+	 middle of the 64-bit word.  */
+      sparc_emit_set_const64_quick2 (op0, temp,
+				     focus_bits, 0,
+				     lowest_bit_set);
+      return;
+    }
+
+  /* 1) sethi	%hi(high_bits), %reg
+   *    or	%reg, %lo(high_bits), %reg
+   *    sllx	%reg, 32, %reg
+   *	or	%reg, low_bits, %reg
+   */
+  if (SPARC_SIMM13_P(low_bits)
+      && ((int)low_bits > 0))
+    {
+      sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
+      return;
+    }
+
+  /* The easiest way when all else fails, is full decomposition.  */
+  sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
+}
+#endif /* HOST_BITS_PER_WIDE_INT == 32 */
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point,
+   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
+   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
+   processing is needed.  */
+
+enum machine_mode
+select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    {
+      switch (op)
+	{
+	case EQ:
+	case NE:
+	case UNORDERED:
+	case ORDERED:
+	case UNLT:
+	case UNLE:
+	case UNGT:
+	case UNGE:
+	case UNEQ:
+	case LTGT:
+	  return CCFPmode;
+
+	case LT:
+	case LE:
+	case GT:
+	case GE:
+	  return CCFPEmode;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
+	   || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
+    {
+      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
+	return CCX_NOOVmode;
+      else
+	return CC_NOOVmode;
+    }
+  else
+    {
+      if (TARGET_ARCH64 && GET_MODE (x) == DImode)
+	return CCXmode;
+      else
+	return CCmode;
+    }
+}
+
+/* Emit the compare insn and return the CC reg for a CODE comparison
+   with operands X and Y.  */
+
+static rtx
+gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
+{
+  enum machine_mode mode;
+  rtx cc_reg;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
+    return x;
+
+  mode = SELECT_CC_MODE (code, x, y);
+
+  /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
+     fcc regs (cse can't tell they're really call clobbered regs and will
+     remove a duplicate comparison even if there is an intervening function
+     call - it will then try to reload the cc reg via an int reg which is why
+     we need the movcc patterns).  It is possible to provide the movcc
+     patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
+     registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
+     to tell cse that CCFPE mode registers (even pseudos) are call
+     clobbered.  */
+
+  /* ??? This is an experiment.  Rather than making changes to cse which may
+     or may not be easy/clean, we do our own cse.  This is possible because
+     we will generate hard registers.  Cse knows they're call clobbered (it
+     doesn't know the same thing about pseudos). If we guess wrong, no big
+     deal, but if we win, great!  */
+
+  if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+#if 1 /* experiment */
+    {
+      int reg;
+      /* We cycle through the registers to ensure they're all exercised.  */
+      static int next_fcc_reg = 0;
+      /* Previous x,y for each fcc reg.  */
+      static rtx prev_args[4][2];
+
+      /* Scan prev_args for x,y.  */
+      for (reg = 0; reg < 4; reg++)
+	if (prev_args[reg][0] == x && prev_args[reg][1] == y)
+	  break;
+      if (reg == 4)
+	{
+	  reg = next_fcc_reg;
+	  prev_args[reg][0] = x;
+	  prev_args[reg][1] = y;
+	  next_fcc_reg = (next_fcc_reg + 1) & 3;
+	}
+      cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
+    }
+#else
+    cc_reg = gen_reg_rtx (mode);
+#endif /* ! experiment */
+  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+    cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
+  else
+    cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
+
+  /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD.  If we do, this
+     will only result in an unrecognizable insn so no point in asserting.  */
+  emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)));
+
+  return cc_reg;
+}
+
+
+/* Emit the compare insn and return the CC reg for the comparison in CMP.  */
+
+rtx
+gen_compare_reg (rtx cmp)
+{
+  return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
+}
+
+/* This function is used for v9 only.
+   DEST is the target of the Scc insn.
+   CODE is the code for an Scc's comparison.
+   X and Y are the values we compare.
+
+   This function is needed to turn
+
+	   (set (reg:SI 110)
+	       (gt (reg:CCX 100 %icc)
+	           (const_int 0)))
+   into
+	   (set (reg:SI 110)
+	       (gt:DI (reg:CCX 100 %icc)
+	           (const_int 0)))
+
+   IE: The instruction recognizer needs to see the mode of the comparison to
+   find the right instruction. We could use "gt:DI" right in the
+   define_expand, but leaving it out allows us to handle DI, SI, etc.  */
+
+static int
+gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
+{
+  if (! TARGET_ARCH64
+      && (GET_MODE (x) == DImode
+	  || GET_MODE (dest) == DImode))
+    return 0;
+
+  /* Try to use the movrCC insns.  */
+  if (TARGET_ARCH64
+      && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
+      && y == const0_rtx
+      && v9_regcmp_p (compare_code))
+    {
+      rtx op0 = x;
+      rtx temp;
+
+      /* Special case for op0 != 0.  This can be done with one instruction if
+	 dest == x.  */
+
+      if (compare_code == NE
+	  && GET_MODE (dest) == DImode
+	  && rtx_equal_p (op0, dest))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, dest,
+			      gen_rtx_IF_THEN_ELSE (DImode,
+				       gen_rtx_fmt_ee (compare_code, DImode,
+						       op0, const0_rtx),
+				       const1_rtx,
+				       dest)));
+	  return 1;
+	}
+
+      if (reg_overlap_mentioned_p (dest, op0))
+	{
+	  /* Handle the case where dest == x.
+	     We "early clobber" the result.  */
+	  op0 = gen_reg_rtx (GET_MODE (x));
+	  emit_move_insn (op0, x);
+	}
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+      if (GET_MODE (op0) != DImode)
+	{
+	  temp = gen_reg_rtx (DImode);
+	  convert_move (temp, op0, 0);
+	}
+      else
+	temp = op0;
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
+				   gen_rtx_fmt_ee (compare_code, DImode,
+						   temp, const0_rtx),
+				   const1_rtx,
+				   dest)));
+      return 1;
+    }
+  else
+    {
+      x = gen_compare_reg_1 (compare_code, x, y);
+      y = const0_rtx;
+
+      gcc_assert (GET_MODE (x) != CC_NOOVmode
+		  && GET_MODE (x) != CCX_NOOVmode);
+
+      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+      emit_insn (gen_rtx_SET (VOIDmode, dest,
+			  gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
+				   gen_rtx_fmt_ee (compare_code,
+						   GET_MODE (x), x, y),
+				    const1_rtx, dest)));
+      return 1;
+    }
+}
+
+
+/* Emit an scc insn.  For seq, sne, sgeu, and sltu, we can do this
+   without jumps using the addx/subx instructions.  */
+
+bool
+emit_scc_insn (rtx operands[])
+{
+  rtx tem;
+  rtx x;
+  rtx y;
+  enum rtx_code code;
+
+  /* The quad-word fp compare library routines all return nonzero to indicate
+     true, which is different from the equivalent libgcc routines, so we must
+     handle them specially here.  */
+  if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
+    {
+      operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
+					      GET_CODE (operands[1]));
+      operands[2] = XEXP (operands[1], 0);
+      operands[3] = XEXP (operands[1], 1);
+    }
+
+  code = GET_CODE (operands[1]);
+  x = operands[2];
+  y = operands[3];
+
+  /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
+     more applications).  The exception to this is "reg != 0" which can
+     be done in one instruction on v9 (so we do it).  */
+  if (code == EQ)
+    {
+      if (GET_MODE (x) == SImode)
+        {
+          rtx pat = gen_seqsi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+      else if (GET_MODE (x) == DImode)
+        {
+          rtx pat = gen_seqdi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+    }
+
+  if (code == NE)
+    {
+      if (GET_MODE (x) == SImode)
+        {
+          rtx pat = gen_snesi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+      else if (GET_MODE (x) == DImode)
+        {
+          rtx pat = gen_snedi_special (operands[0], x, y);
+          emit_insn (pat);
+          return true;
+        }
+    }
+
+  /* For the rest, on v9 we can use conditional moves.  */
+
+  if (TARGET_V9)
+    {
+      if (gen_v9_scc (operands[0], code, x, y))
+        return true;
+    }
+
+  /* We can do LTU and GEU using the addx/subx instructions too.  And
+     for GTU/LEU, if both operands are registers swap them and fall
+     back to the easy case.  */
+  if (code == GTU || code == LEU)
+    {
+      if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+          && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
+        {
+          tem = x;
+          x = y;
+          y = tem;
+          code = swap_condition (code);
+        }
+    }
+
+  if (code == LTU || code == GEU)
+    {
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			      gen_rtx_fmt_ee (code, SImode,
+					      gen_compare_reg_1 (code, x, y),
+					      const0_rtx)));
+      return true;
+    }
+
+  /* Nope, do branches.  */
+  return false;
+}
+
+/* Emit a conditional jump insn for the v9 architecture using comparison code
+   CODE and jump target LABEL.
+   This function exists to take advantage of the v9 brxx insns.  */
+
+static void
+emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
+{
+  emit_jump_insn (gen_rtx_SET (VOIDmode,
+			   pc_rtx,
+			   gen_rtx_IF_THEN_ELSE (VOIDmode,
+				    gen_rtx_fmt_ee (code, GET_MODE (op0),
+						    op0, const0_rtx),
+				    gen_rtx_LABEL_REF (VOIDmode, label),
+				    pc_rtx)));
+}
+
+void
+emit_conditional_branch_insn (rtx operands[])
+{
+  /* The quad-word fp compare library routines all return nonzero to indicate
+     true, which is different from the equivalent libgcc routines, so we must
+     handle them specially here.  */
+  if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
+    {
+      operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
+					      GET_CODE (operands[0]));
+      operands[1] = XEXP (operands[0], 0);
+      operands[2] = XEXP (operands[0], 1);
+    }
+
+  if (TARGET_ARCH64 && operands[2] == const0_rtx
+      && GET_CODE (operands[1]) == REG
+      && GET_MODE (operands[1]) == DImode)
+    {
+      emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
+      return;
+    }
+
+  operands[1] = gen_compare_reg (operands[0]);
+  operands[2] = const0_rtx;
+  operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
+				operands[1], operands[2]);
+  emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
+				  operands[3]));
+}
+
+
+/* Generate a DFmode part of a hard TFmode register.
+   REG is the TFmode hard register, LOW is 1 for the
+   low 64bit of the register and 0 otherwise.
+ */
+rtx
+gen_df_reg (rtx reg, int low)
+{
+  int regno = REGNO (reg);
+
+  if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
+    regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
+  return gen_rtx_REG (DFmode, regno);
+}
+
+/* Generate a call to FUNC with OPERANDS.  Operand 0 is the return value.
+   Unlike normal calls, TFmode operands are passed by reference.  It is
+   assumed that no more than 3 operands are required.  */
+
+static void
+emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
+{
+  rtx ret_slot = NULL, arg[3], func_sym;
+  int i;
+
+  /* We only expect to be called for conversions, unary, and binary ops.  */
+  gcc_assert (nargs == 2 || nargs == 3);
+
+  for (i = 0; i < nargs; ++i)
+    {
+      rtx this_arg = operands[i];
+      rtx this_slot;
+
+      /* TFmode arguments and return values are passed by reference.  */
+      if (GET_MODE (this_arg) == TFmode)
+	{
+	  int force_stack_temp;
+
+	  force_stack_temp = 0;
+	  if (TARGET_BUGGY_QP_LIB && i == 0)
+	    force_stack_temp = 1;
+
+	  if (GET_CODE (this_arg) == MEM
+	      && ! force_stack_temp)
+	    this_arg = XEXP (this_arg, 0);
+	  else if (CONSTANT_P (this_arg)
+		   && ! force_stack_temp)
+	    {
+	      this_slot = force_const_mem (TFmode, this_arg);
+	      this_arg = XEXP (this_slot, 0);
+	    }
+	  else
+	    {
+	      this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
+
+	      /* Operand 0 is the return value.  We'll copy it out later.  */
+	      if (i > 0)
+		emit_move_insn (this_slot, this_arg);
+	      else
+		ret_slot = this_slot;
+
+	      this_arg = XEXP (this_slot, 0);
+	    }
+	}
+
+      arg[i] = this_arg;
+    }
+
+  func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
+
+  if (GET_MODE (operands[0]) == TFmode)
+    {
+      if (nargs == 2)
+	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
+			   arg[0], GET_MODE (arg[0]),
+			   arg[1], GET_MODE (arg[1]));
+      else
+	emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
+			   arg[0], GET_MODE (arg[0]),
+			   arg[1], GET_MODE (arg[1]),
+			   arg[2], GET_MODE (arg[2]));
+
+      if (ret_slot)
+	emit_move_insn (operands[0], ret_slot);
+    }
+  else
+    {
+      rtx ret;
+
+      gcc_assert (nargs == 2);
+
+      ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
+				     GET_MODE (operands[0]), 1,
+				     arg[1], GET_MODE (arg[1]));
+
+      if (ret != operands[0])
+	emit_move_insn (operands[0], ret);
+    }
+}
+
+/* Expand soft-float TFmode calls to sparc abi routines.  */
+
+static void
+emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
+{
+  const char *func;
+
+  switch (code)
+    {
+    case PLUS:
+      func = "_Qp_add";
+      break;
+    case MINUS:
+      func = "_Qp_sub";
+      break;
+    case MULT:
+      func = "_Qp_mul";
+      break;
+    case DIV:
+      func = "_Qp_div";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_soft_tfmode_libcall (func, 3, operands);
+}
+
+static void
+emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
+{
+  const char *func;
+
+  gcc_assert (code == SQRT);
+  func = "_Qp_sqrt";
+
+  emit_soft_tfmode_libcall (func, 2, operands);
+}
+
+static void
+emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
+{
+  const char *func;
+
+  switch (code)
+    {
+    case FLOAT_EXTEND:
+      switch (GET_MODE (operands[1]))
+	{
+	case SFmode:
+	  func = "_Qp_stoq";
+	  break;
+	case DFmode:
+	  func = "_Qp_dtoq";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FLOAT_TRUNCATE:
+      switch (GET_MODE (operands[0]))
+	{
+	case SFmode:
+	  func = "_Qp_qtos";
+	  break;
+	case DFmode:
+	  func = "_Qp_qtod";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FLOAT:
+      switch (GET_MODE (operands[1]))
+	{
+	case SImode:
+	  func = "_Qp_itoq";
+	  if (TARGET_ARCH64)
+	    operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
+	  break;
+	case DImode:
+	  func = "_Qp_xtoq";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case UNSIGNED_FLOAT:
+      switch (GET_MODE (operands[1]))
+	{
+	case SImode:
+	  func = "_Qp_uitoq";
+	  if (TARGET_ARCH64)
+	    operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
+	  break;
+	case DImode:
+	  func = "_Qp_uxtoq";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case FIX:
+      switch (GET_MODE (operands[0]))
+	{
+	case SImode:
+	  func = "_Qp_qtoi";
+	  break;
+	case DImode:
+	  func = "_Qp_qtox";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case UNSIGNED_FIX:
+      switch (GET_MODE (operands[0]))
+	{
+	case SImode:
+	  func = "_Qp_qtoui";
+	  break;
+	case DImode:
+	  func = "_Qp_qtoux";
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_soft_tfmode_libcall (func, 2, operands);
+}
+
+/* Expand a hard-float tfmode operation.  All arguments must be in
+   registers.  */
+
+static void
+emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
+{
+  rtx op, dest;
+
+  if (GET_RTX_CLASS (code) == RTX_UNARY)
+    {
+      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+      op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
+    }
+  else
+    {
+      operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+      operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+      op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
+			   operands[1], operands[2]);
+    }
+
+  if (register_operand (operands[0], VOIDmode))
+    dest = operands[0];
+  else
+    dest = gen_reg_rtx (GET_MODE (operands[0]));
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, op));
+
+  if (dest != operands[0])
+    emit_move_insn (operands[0], dest);
+}
+
+void
+emit_tfmode_binop (enum rtx_code code, rtx *operands)
+{
+  if (TARGET_HARD_QUAD)
+    emit_hard_tfmode_operation (code, operands);
+  else
+    emit_soft_tfmode_binop (code, operands);
+}
+
+void
+emit_tfmode_unop (enum rtx_code code, rtx *operands)
+{
+  if (TARGET_HARD_QUAD)
+    emit_hard_tfmode_operation (code, operands);
+  else
+    emit_soft_tfmode_unop (code, operands);
+}
+
+void
+emit_tfmode_cvt (enum rtx_code code, rtx *operands)
+{
+  if (TARGET_HARD_QUAD)
+    emit_hard_tfmode_operation (code, operands);
+  else
+    emit_soft_tfmode_cvt (code, operands);
+}
+
+/* Return nonzero if a branch/jump/call instruction will be emitting
+   nop into its delay slot.  */
+
+int
+empty_delay_slot (rtx insn)
+{
+  rtx seq;
+
+  /* If no previous instruction (should not happen), return true.  */
+  if (PREV_INSN (insn) == NULL)
+    return 1;
+
+  seq = NEXT_INSN (PREV_INSN (insn));
+  if (GET_CODE (PATTERN (seq)) == SEQUENCE)
+    return 0;
+
+  return 1;
+}
+
+/* Return nonzero if TRIAL can go into the call delay slot.  */
+
+int
+tls_call_delay (rtx trial)
+{
+  rtx pat;
+
+  /* Binutils allows
+       call __tls_get_addr, %tgd_call (foo)
+        add %l7, %o0, %o0, %tgd_add (foo)
+     while Sun as/ld does not.  */
+  if (TARGET_GNU_TLS || !TARGET_TLS)
+    return 1;
+
+  pat = PATTERN (trial);
+
+  /* We must reject tgd_add{32|64}, i.e.
+       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
+     and tldm_add{32|64}, i.e.
+       (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
+     for Sun as/ld.  */
+  if (GET_CODE (pat) == SET
+      && GET_CODE (SET_SRC (pat)) == PLUS)
+    {
+      rtx unspec = XEXP (SET_SRC (pat), 1);
+
+      if (GET_CODE (unspec) == UNSPEC
+	  && (XINT (unspec, 1) == UNSPEC_TLSGD
+	      || XINT (unspec, 1) == UNSPEC_TLSLDM))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
+   instruction.  RETURN_P is true if the v9 variant 'return' is to be
+   considered in the test too.
+
+   TRIAL must be a SET whose destination is a REG appropriate for the
+   'restore' instruction or, if RETURN_P is true, for the 'return'
+   instruction.  */
+
+static int
+eligible_for_restore_insn (rtx trial, bool return_p)
+{
+  rtx pat = PATTERN (trial);
+  rtx src = SET_SRC (pat);
+
+  /* The 'restore src,%g0,dest' pattern for word mode and below.  */
+  if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
+      && arith_operand (src, GET_MODE (src)))
+    {
+      if (TARGET_ARCH64)
+        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+      else
+        return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+    }
+
+  /* The 'restore src,%g0,dest' pattern for double-word mode.  */
+  else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
+	   && arith_double_operand (src, GET_MODE (src)))
+    return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+
+  /* The 'restore src,%g0,dest' pattern for float if no FPU.  */
+  else if (! TARGET_FPU && register_operand (src, SFmode))
+    return 1;
+
+  /* The 'restore src,%g0,dest' pattern for double if no FPU.  */
+  else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
+    return 1;
+
+  /* If we have the 'return' instruction, anything that does not use
+     local or output registers and can go into a delay slot wins.  */
+  else if (return_p && TARGET_V9 && ! epilogue_renumber (&pat, 1)
+	   && (get_attr_in_uncond_branch_delay (trial)
+	       == IN_UNCOND_BRANCH_DELAY_TRUE))
+    return 1;
+
+  /* The 'restore src1,src2,dest' pattern for SImode.  */
+  else if (GET_CODE (src) == PLUS
+	   && register_operand (XEXP (src, 0), SImode)
+	   && arith_operand (XEXP (src, 1), SImode))
+    return 1;
+
+  /* The 'restore src1,src2,dest' pattern for DImode.  */
+  else if (GET_CODE (src) == PLUS
+	   && register_operand (XEXP (src, 0), DImode)
+	   && arith_double_operand (XEXP (src, 1), DImode))
+    return 1;
+
+  /* The 'restore src1,%lo(src2),dest' pattern.  */
+  else if (GET_CODE (src) == LO_SUM
+	   && ! TARGET_CM_MEDMID
+	   && ((register_operand (XEXP (src, 0), SImode)
+	        && immediate_operand (XEXP (src, 1), SImode))
+	       || (TARGET_ARCH64
+		   && register_operand (XEXP (src, 0), DImode)
+		   && immediate_operand (XEXP (src, 1), DImode))))
+    return 1;
+
+  /* The 'restore src,src,dest' pattern.  */
+  else if (GET_CODE (src) == ASHIFT
+	   && (register_operand (XEXP (src, 0), SImode)
+	       || register_operand (XEXP (src, 0), DImode))
+	   && XEXP (src, 1) == const1_rtx)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if TRIAL can go into the function return's
+   delay slot.  */
+
+int
+eligible_for_return_delay (rtx trial)
+{
+  rtx pat;
+
+  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+    return 0;
+
+  if (get_attr_length (trial) != 1)
+    return 0;
+
+  /* If the function uses __builtin_eh_return, the eh_return machinery
+     occupies the delay slot.  */
+  if (crtl->calls_eh_return)
+    return 0;
+
+  /* In the case of a true leaf function, anything can go into the slot.  */
+  if (sparc_leaf_function_p)
+    return get_attr_in_uncond_branch_delay (trial)
+	   == IN_UNCOND_BRANCH_DELAY_TRUE;
+
+  pat = PATTERN (trial);
+
+  /* Otherwise, only operations which can be done in tandem with
+     a `restore' or `return' insn can go into the delay slot.  */
+  if (GET_CODE (SET_DEST (pat)) != REG
+      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
+    return 0;
+
+  /* If this instruction sets up floating point register and we have a return
+     instruction, it can probably go in.  But restore will not work
+     with FP_REGS.  */
+  if (REGNO (SET_DEST (pat)) >= 32)
+    return (TARGET_V9
+	    && ! epilogue_renumber (&pat, 1)
+	    && (get_attr_in_uncond_branch_delay (trial)
+		== IN_UNCOND_BRANCH_DELAY_TRUE));
+
+  return eligible_for_restore_insn (trial, true);
+}
+
+/* Return nonzero if TRIAL can go into the sibling call's
+   delay slot.  */
+
+int
+eligible_for_sibcall_delay (rtx trial)
+{
+  rtx pat;
+
+  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+    return 0;
+
+  if (get_attr_length (trial) != 1)
+    return 0;
+
+  pat = PATTERN (trial);
+
+  if (sparc_leaf_function_p)
+    {
+      /* If the tail call is done using the call instruction,
+	 we have to restore %o7 in the delay slot.  */
+      if (LEAF_SIBCALL_SLOT_RESERVED_P)
+	return 0;
+
+      /* %g1 is used to build the function address */
+      if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
+	return 0;
+
+      return 1;
+    }
+
+  /* Otherwise, only operations which can be done in tandem with
+     a `restore' insn can go into the delay slot.  */
+  if (GET_CODE (SET_DEST (pat)) != REG
+      || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
+      || REGNO (SET_DEST (pat)) >= 32)
+    return 0;
+
+  /* If it mentions %o7, it can't go in, because sibcall will clobber it
+     in most cases.  */
+  if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
+    return 0;
+
+  return eligible_for_restore_insn (trial, false);
+}
+
+int
+short_branch (int uid1, int uid2)
+{
+  int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
+
+  /* Leave a few words of "slop".  */
+  if (delta >= -1023 && delta <= 1022)
+    return 1;
+
+  return 0;
+}
+
+/* Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels or calls or jumps.  */
+int
+reg_unused_after (rtx reg, rtx insn)
+{
+  enum rtx_code code, prev_code = UNKNOWN;
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
+	return 1;
+
+      code = GET_CODE (insn);
+      if (GET_CODE (insn) == CODE_LABEL)
+	return 1;
+
+      if (INSN_P (insn))
+	{
+	  rtx set = single_set (insn);
+	  int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
+	  if (set && in_src)
+	    return 0;
+	  if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	    return 1;
+	  if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	    return 0;
+	}
+      prev_code = code;
+    }
+  return 1;
+}
+
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible if X contains the address of a symbol that is
+   not constant (TLS) or not known at final link time (PIC).  */
+
+static bool
+sparc_cannot_force_const_mem (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST_VECTOR:
+      /* Accept all non-symbolic constants.  */
+      return false;
+
+    case LABEL_REF:
+      /* Labels are OK iff we are non-PIC.  */
+      return flag_pic != 0;
+
+    case SYMBOL_REF:
+      /* 'Naked' TLS symbol references are never OK,
+	 non-TLS symbols are OK iff we are non-PIC.  */
+      if (SYMBOL_REF_TLS_MODEL (x))
+	return true;
+      else
+	return flag_pic != 0;
+
+    case CONST:
+      return sparc_cannot_force_const_mem (XEXP (x, 0));
+    case PLUS:
+    case MINUS:
+      return sparc_cannot_force_const_mem (XEXP (x, 0))
+         || sparc_cannot_force_const_mem (XEXP (x, 1));
+    case UNSPEC:
+      return true;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Global Offset Table support.  */
+static GTY(()) rtx got_helper_rtx = NULL_RTX;
+static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
+
+/* Return the SYMBOL_REF for the Global Offset Table.  */
+
+static GTY(()) rtx sparc_got_symbol = NULL_RTX;
+
+static rtx
+sparc_got (void)
+{
+  if (!sparc_got_symbol)
+    sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+  return sparc_got_symbol;
+}
+
+/* Ensure that we are not using patterns that are not OK with PIC.  */
+
+int
+check_pic (int i)
+{
+  rtx op;
+
+  switch (flag_pic)
+    {
+    case 1:
+      op = recog_data.operand[i];
+      gcc_assert (GET_CODE (op) != SYMBOL_REF
+	  	  && (GET_CODE (op) != CONST
+		      || (GET_CODE (XEXP (op, 0)) == MINUS
+			  && XEXP (XEXP (op, 0), 0) == sparc_got ()
+			  && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
+    case 2:
+    default:
+      return 1;
+    }
+}
+
+/* Return true if X is an address which needs a temporary register when
+   reloaded while generating PIC code.  */
+
+int
+pic_address_needs_scratch (rtx x)
+{
+  /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+      && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
+    return 1;
+
+  return 0;
+}
+
+/* Determine if a given RTX is a valid constant.  We already know this
+   satisfies CONSTANT_P.  */
+
+bool
+legitimate_constant_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+    case SYMBOL_REF:
+      if (sparc_tls_referenced_p (x))
+	return false;
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+        return true;
+
+      /* Floating point constants are generally not ok.
+	 The only exception is 0.0 in VIS.  */
+      if (TARGET_VIS
+	  && SCALAR_FLOAT_MODE_P (GET_MODE (x))
+	  && const_zero_operand (x, GET_MODE (x)))
+	return true;
+
+      return false;
+
+    case CONST_VECTOR:
+      /* Vector constants are generally not ok.
+	 The only exception is 0 in VIS.  */
+      if (TARGET_VIS
+	  && const_zero_operand (x, GET_MODE (x)))
+	return true;
+
+      return false;
+
+    default:
+      break;
+    }
+
+  return true;
+}
+
+/* Determine if a given RTX is a valid constant address.  */
+
+bool
+constant_address_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+    case CONST_INT:
+    case HIGH:
+      return true;
+
+    case CONST:
+      if (flag_pic && pic_address_needs_scratch (x))
+	return false;
+      return legitimate_constant_p (x);
+
+    case SYMBOL_REF:
+      return !flag_pic && legitimate_constant_p (x);
+
+    default:
+      return false;
+    }
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+  if (pic_address_needs_scratch (x))
+    return false;
+  if (sparc_tls_referenced_p (x))
+    return false;
+  return true;
+}
+
+/* Return nonzero if ADDR is a valid memory address.
+   STRICT specifies whether strict register checking applies.  */
+
+static bool
+sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
+
+  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+    rs1 = addr;
+  else if (GET_CODE (addr) == PLUS)
+    {
+      rs1 = XEXP (addr, 0);
+      rs2 = XEXP (addr, 1);
+
+      /* Canonicalize.  REG comes first, if there are no regs,
+	 LO_SUM comes first.  */
+      if (!REG_P (rs1)
+	  && GET_CODE (rs1) != SUBREG
+	  && (REG_P (rs2)
+	      || GET_CODE (rs2) == SUBREG
+	      || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
+	{
+	  rs1 = XEXP (addr, 1);
+	  rs2 = XEXP (addr, 0);
+	}
+
+      if ((flag_pic == 1
+	   && rs1 == pic_offset_table_rtx
+	   && !REG_P (rs2)
+	   && GET_CODE (rs2) != SUBREG
+	   && GET_CODE (rs2) != LO_SUM
+	   && GET_CODE (rs2) != MEM
+	   && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
+	   && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
+	   && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
+	  || ((REG_P (rs1)
+	       || GET_CODE (rs1) == SUBREG)
+	      && RTX_OK_FOR_OFFSET_P (rs2)))
+	{
+	  imm1 = rs2;
+	  rs2 = NULL;
+	}
+      else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
+	       && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
+	{
+	  /* We prohibit REG + REG for TFmode when there are no quad move insns
+	     and we consequently need to split.  We do this because REG+REG
+	     is not an offsettable address.  If we get the situation in reload
+	     where source and destination of a movtf pattern are both MEMs with
+	     REG+REG address, then only one of them gets converted to an
+	     offsettable address.  */
+	  if (mode == TFmode
+	      && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
+	    return 0;
+
+	  /* We prohibit REG + REG on ARCH32 if not optimizing for
+	     DFmode/DImode because then mem_min_alignment is likely to be zero
+	     after reload and the  forced split would lack a matching splitter
+	     pattern.  */
+	  if (TARGET_ARCH32 && !optimize
+	      && (mode == DFmode || mode == DImode))
+	    return 0;
+	}
+      else if (USE_AS_OFFSETABLE_LO10
+	       && GET_CODE (rs1) == LO_SUM
+	       && TARGET_ARCH64
+	       && ! TARGET_CM_MEDMID
+	       && RTX_OK_FOR_OLO10_P (rs2))
+	{
+	  rs2 = NULL;
+	  imm1 = XEXP (rs1, 1);
+	  rs1 = XEXP (rs1, 0);
+	  if (!CONSTANT_P (imm1)
+	      || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
+	    return 0;
+	}
+    }
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      rs1 = XEXP (addr, 0);
+      imm1 = XEXP (addr, 1);
+
+      if (!CONSTANT_P (imm1)
+	  || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
+	return 0;
+
+      /* We can't allow TFmode in 32-bit mode, because an offset greater
+	 than the alignment (8) may cause the LO_SUM to overflow.  */
+      if (mode == TFmode && TARGET_ARCH32)
+	return 0;
+    }
+  else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
+    return 1;
+  else
+    return 0;
+
+  if (GET_CODE (rs1) == SUBREG)
+    rs1 = SUBREG_REG (rs1);
+  if (!REG_P (rs1))
+    return 0;
+
+  if (rs2)
+    {
+      if (GET_CODE (rs2) == SUBREG)
+	rs2 = SUBREG_REG (rs2);
+      if (!REG_P (rs2))
+	return 0;
+    }
+
+  if (strict)
+    {
+      if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
+	  || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
+	return 0;
+    }
+  else
+    {
+      if ((REGNO (rs1) >= 32
+	   && REGNO (rs1) != FRAME_POINTER_REGNUM
+	   && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
+	  || (rs2
+	      && (REGNO (rs2) >= 32
+		  && REGNO (rs2) != FRAME_POINTER_REGNUM
+		  && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
+	return 0;
+    }
+  return 1;
+}
+
+/* Return the SYMBOL_REF for the tls_get_addr function.  */
+
+static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
+
+static rtx
+sparc_tls_get_addr (void)
+{
+  if (!sparc_tls_symbol)
+    sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
+
+  return sparc_tls_symbol;
+}
+
+/* Return the Global Offset Table to be used in TLS mode.  */
+
+static rtx
+sparc_tls_got (void)
+{
+  /* In PIC mode, this is just the PIC offset table.  */
+  if (flag_pic)
+    {
+      crtl->uses_pic_offset_table = 1;
+      return pic_offset_table_rtx;
+    }
+
+  /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
+     the GOT symbol with the 32-bit ABI, so we reload the GOT register.  */
+  if (TARGET_SUN_TLS && TARGET_ARCH32)
+    {
+      load_got_register ();
+      return global_offset_table_rtx;
+    }
+
+  /* In all other cases, we load a new pseudo with the GOT symbol.  */
+  return copy_to_reg (sparc_got ());
+}
+
+/* Return true if X contains a thread-local symbol.  */
+
+static bool
+sparc_tls_referenced_p (rtx x)
+{
+  if (!TARGET_HAVE_TLS)
+    return false;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = XEXP (XEXP (x, 0), 0);
+
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
+    return true;
+
+  /* That's all we handle in sparc_legitimize_tls_address for now.  */
+  return false;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  */
+
+static rtx
+sparc_legitimize_tls_address (rtx addr)
+{
+  rtx temp1, temp2, temp3, ret, o0, got, insn;
+
+  gcc_assert (can_create_pseudo_p ());
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (SYMBOL_REF_TLS_MODEL (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	start_sequence ();
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	ret = gen_reg_rtx (Pmode);
+	o0 = gen_rtx_REG (Pmode, 8);
+	got = sparc_tls_got ();
+	emit_insn (gen_tgd_hi22 (temp1, addr));
+	emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
+	if (TARGET_ARCH32)
+	  {
+	    emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
+	    insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
+						   addr, const1_rtx));
+	  }
+	else
+	  {
+	    emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
+	    insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
+						   addr, const1_rtx));
+	  }
+        CALL_INSN_FUNCTION_USAGE (insn)
+	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
+			       CALL_INSN_FUNCTION_USAGE (insn));
+	insn = get_insns ();
+	end_sequence ();
+	emit_libcall_block (insn, ret, o0, addr);
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	start_sequence ();
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	temp3 = gen_reg_rtx (Pmode);
+	ret = gen_reg_rtx (Pmode);
+	o0 = gen_rtx_REG (Pmode, 8);
+	got = sparc_tls_got ();
+	emit_insn (gen_tldm_hi22 (temp1));
+	emit_insn (gen_tldm_lo10 (temp2, temp1));
+	if (TARGET_ARCH32)
+	  {
+	    emit_insn (gen_tldm_add32 (o0, got, temp2));
+	    insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
+						    const1_rtx));
+	  }
+	else
+	  {
+	    emit_insn (gen_tldm_add64 (o0, got, temp2));
+	    insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
+						    const1_rtx));
+	  }
+        CALL_INSN_FUNCTION_USAGE (insn)
+	  = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, o0),
+			       CALL_INSN_FUNCTION_USAGE (insn));
+	insn = get_insns ();
+	end_sequence ();
+	emit_libcall_block (insn, temp3, o0,
+			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+					    UNSPEC_TLSLD_BASE));
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	emit_insn (gen_tldo_hix22 (temp1, addr));
+	emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
+	if (TARGET_ARCH32)
+	  emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
+	else
+	  emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	temp1 = gen_reg_rtx (SImode);
+	temp2 = gen_reg_rtx (SImode);
+	temp3 = gen_reg_rtx (Pmode);
+	got = sparc_tls_got ();
+	emit_insn (gen_tie_hi22 (temp1, addr));
+	emit_insn (gen_tie_lo10 (temp2, temp1, addr));
+	if (TARGET_ARCH32)
+	  emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
+	else
+	  emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
+        if (TARGET_SUN_TLS)
+	  {
+	    ret = gen_reg_rtx (Pmode);
+	    if (TARGET_ARCH32)
+	      emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
+					temp3, addr));
+	    else
+	      emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
+					temp3, addr));
+	  }
+	else
+	  ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	temp1 = gen_reg_rtx (Pmode);
+	temp2 = gen_reg_rtx (Pmode);
+	if (TARGET_ARCH32)
+	  {
+	    emit_insn (gen_tle_hix22_sp32 (temp1, addr));
+	    emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
+	  }
+	else
+	  {
+	    emit_insn (gen_tle_hix22_sp64 (temp1, addr));
+	    emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
+	  }
+	ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+  else if (GET_CODE (addr) == CONST)
+    {
+      rtx base, offset;
+
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
+
+      base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
+      offset = XEXP (XEXP (addr, 0), 1);
+
+      base = force_operand (base, NULL_RTX);
+      if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
+	offset = force_reg (Pmode, offset);
+      ret = gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  else
+    gcc_unreachable ();  /* for now ... */
+
+  return ret;
+}
+
+/* Legitimize PIC addresses.  If the address is already position-independent,
+   we return ORIG.  Newly generated position-independent addresses go into a
+   reg.  This is REG if nonzero, otherwise we allocate register(s) as
+   necessary.  */
+
+static rtx
+sparc_legitimize_pic_address (rtx orig, rtx reg)
+{
+  bool gotdata_op = false;
+
+  if (GET_CODE (orig) == SYMBOL_REF
+      /* See the comment in sparc_expand_move.  */
+      || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
+    {
+      rtx pic_ref, address;
+      rtx insn;
+
+      if (reg == 0)
+	{
+	  gcc_assert (! reload_in_progress && ! reload_completed);
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      if (flag_pic == 2)
+	{
+	  /* If not during reload, allocate another temp reg here for loading
+	     in the address, so that these instructions can be optimized
+	     properly.  */
+	  rtx temp_reg = ((reload_in_progress || reload_completed)
+			  ? reg : gen_reg_rtx (Pmode));
+
+	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
+	     won't get confused into thinking that these two instructions
+	     are loading in the true address of the symbol.  If in the
+	     future a PIC rtx exists, that should be used instead.  */
+	  if (TARGET_ARCH64)
+	    {
+	      emit_insn (gen_movdi_high_pic (temp_reg, orig));
+	      emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
+	    }
+	  else
+	    {
+	      emit_insn (gen_movsi_high_pic (temp_reg, orig));
+	      emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
+	    }
+	  address = temp_reg;
+	  gotdata_op = true;
+	}
+      else
+	address = orig;
+
+      crtl->uses_pic_offset_table = 1;
+      if (gotdata_op)
+	{
+	  if (TARGET_ARCH64)
+	    insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
+							pic_offset_table_rtx,
+							address, orig));
+	  else
+	    insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
+							pic_offset_table_rtx,
+							address, orig));
+	}
+      else
+	{
+	  pic_ref
+	    = gen_const_mem (Pmode,
+			     gen_rtx_PLUS (Pmode,
+					   pic_offset_table_rtx, address));
+	  insn = emit_move_insn (reg, pic_ref);
+	}
+
+      /* Put a REG_EQUAL note on this insn, so that it can be optimized
+	 by loop.  */
+      set_unique_reg_note (insn, REG_EQUAL, orig);
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (! reload_in_progress && ! reload_completed);
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+      base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
+      offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
+			 		     base == reg ? NULL_RTX : reg);
+
+      if (GET_CODE (offset) == CONST_INT)
+	{
+	  if (SMALL_INT (offset))
+	    return plus_constant (base, INTVAL (offset));
+	  else if (! reload_in_progress && ! reload_completed)
+	    offset = force_reg (Pmode, offset);
+	  else
+	    /* If we reach here, then something is seriously wrong.  */
+	    gcc_unreachable ();
+	}
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+  else if (GET_CODE (orig) == LABEL_REF)
+    /* ??? We ought to be checking that the register is live instead, in case
+       it is eliminated.  */
+    crtl->uses_pic_offset_table = 1;
+
+  return orig;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address X
+   to be legitimate.  If we find one, return the new, valid address.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   MODE is the mode of the operand pointed to by X.
+
+   On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG.  */
+
+static rtx
+sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			  enum machine_mode mode)
+{
+  rtx orig_x = x;
+
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
+		      force_operand (XEXP (x, 0), NULL_RTX));
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
+		      force_operand (XEXP (x, 1), NULL_RTX));
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
+    x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
+		      XEXP (x, 1));
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
+		      force_operand (XEXP (x, 1), NULL_RTX));
+
+  if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
+    return x;
+
+  if (sparc_tls_referenced_p (x))
+    x = sparc_legitimize_tls_address (x);
+  else if (flag_pic)
+    x = sparc_legitimize_pic_address (x, NULL_RTX);
+  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
+		      copy_to_mode_reg (Pmode, XEXP (x, 1)));
+  else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
+    x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
+		      copy_to_mode_reg (Pmode, XEXP (x, 0)));
+  else if (GET_CODE (x) == SYMBOL_REF
+	   || GET_CODE (x) == CONST
+	   || GET_CODE (x) == LABEL_REF)
+    x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
+
+  return x;
+}
+
+/* Delegitimize an address that was legitimized by the above function.  */
+
+static rtx
+sparc_delegitimize_address (rtx x)
+{
+  x = delegitimize_mem_from_attrs (x);
+
+  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
+    switch (XINT (XEXP (x, 1), 1))
+      {
+      case UNSPEC_MOVE_PIC:
+      case UNSPEC_TLSLE:
+	x = XVECEXP (XEXP (x, 1), 0, 0);
+	gcc_assert (GET_CODE (x) == SYMBOL_REF);
+	break;
+      default:
+	break;
+      }
+
+  return x;
+}
+
+/* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
+   replace the input X, or the original X if no replacement is called for.
+   The output parameter *WIN is 1 if the calling macro should goto WIN,
+   0 if it should not.
+
+   For SPARC, we wish to handle addresses by splitting them into
+   HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
+   This cuts the number of extra insns by one.
+
+   Do nothing when generating PIC code and the address is a symbolic
+   operand or requires a scratch register.  */
+
+rtx
+sparc_legitimize_reload_address (rtx x, enum machine_mode mode,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED, int *win)
+{
+  /* Decompose SImode constants into HIGH+LO_SUM.  */
+  if (CONSTANT_P (x)
+      && (mode != TFmode || TARGET_ARCH64)
+      && GET_MODE (x) == SImode
+      && GET_CODE (x) != LO_SUM
+      && GET_CODE (x) != HIGH
+      && sparc_cmodel <= CM_MEDLOW
+      && !(flag_pic
+	   && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
+    {
+      x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  /* We have to recognize what we have already generated above.  */
+  if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
+		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+		   opnum, (enum reload_type)type);
+      *win = 1;
+      return x;
+    }
+
+  *win = 0;
+  return x;
+}
+
+/* Return true if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   In PIC mode,
+
+      (mem:HI [%l7+a])
+
+   is not equivalent to
+
+      (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
+
+   because [%l7+a+1] is interpreted as the address of (a+1).  */
+
+
+static bool
+sparc_mode_dependent_address_p (const_rtx addr)
+{
+  if (flag_pic && GET_CODE (addr) == PLUS)
+    {
+      rtx op0 = XEXP (addr, 0);
+      rtx op1 = XEXP (addr, 1);
+      if (op0 == pic_offset_table_rtx
+	  && SYMBOLIC_CONST (op1))
+	return true;
+    }
+
+  return false;
+}
+
+#ifdef HAVE_GAS_HIDDEN
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+  const char *reg_name = reg_names[regno];
+
+  /* Skip the leading '%' as that cannot be used in a
+     symbol name.  */
+  reg_name += 1;
+
+  if (USE_HIDDEN_LINKONCE)
+    sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
+  else
+    ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
+}
+
+/* Wrapper around the load_pcrel_sym{si,di} patterns.  */
+
+static rtx
+gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+  int orig_flag_pic = flag_pic;
+  rtx insn;
+
+  /* The load_pcrel_sym{si,di} patterns require absolute addressing.  */
+  flag_pic = 0;
+  if (TARGET_ARCH64)
+    insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
+  else
+    insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
+  flag_pic = orig_flag_pic;
+
+  return insn;
+}
+
+/* Emit code to load the GOT register.  */
+
+static void
+load_got_register (void)
+{
+  /* In PIC mode, this will retrieve pic_offset_table_rtx.  */
+  if (!global_offset_table_rtx)
+    global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
+
+  if (TARGET_VXWORKS_RTP)
+    emit_insn (gen_vxworks_load_got ());
+  else
+    {
+      /* The GOT symbol is subject to a PC-relative relocation so we need a
+	 helper function to add the PC value and thus get the final value.  */
+      if (!got_helper_rtx)
+	{
+	  char name[32];
+	  get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
+	  got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+	}
+
+      emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
+				     got_helper_rtx,
+				     GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
+    }
+
+  /* Need to emit this whether or not we obey regdecls,
+     since setjmp/longjmp can cause life info to screw up.
+     ??? In the case where we don't obey regdecls, this is not sufficient
+     since we may not fall out the bottom.  */
+  emit_use (global_offset_table_rtx);
+}
+
+/* Emit a call instruction with the pattern given by PAT.  ADDR is the
+   address of the call target.  */
+
+void
+sparc_emit_call_insn (rtx pat, rtx addr)
+{
+  rtx insn;
+
+  insn = emit_call_insn (pat);
+
+  /* The PIC register is live on entry to VxWorks PIC PLT entries.  */
+  if (TARGET_VXWORKS_RTP
+      && flag_pic
+      && GET_CODE (addr) == SYMBOL_REF
+      && (SYMBOL_REF_DECL (addr)
+	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
+	  : !SYMBOL_REF_LOCAL_P (addr)))
+    {
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+      crtl->uses_pic_offset_table = 1;
+    }
+}
+
+/* Return 1 if RTX is a MEM which is known to be aligned to at
+   least a DESIRED byte boundary.  */
+
+int
+mem_min_alignment (rtx mem, int desired)
+{
+  rtx addr, base, offset;
+
+  /* If it's not a MEM we can't accept it.  */
+  if (GET_CODE (mem) != MEM)
+    return 0;
+
+  /* Obviously...  */
+  if (!TARGET_UNALIGNED_DOUBLES
+      && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
+    return 1;
+
+  /* ??? The rest of the function predates MEM_ALIGN so
+     there is probably a bit of redundancy.  */
+  addr = XEXP (mem, 0);
+  base = offset = NULL_RTX;
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	{
+	  base = XEXP (addr, 0);
+
+	  /* What we are saying here is that if the base
+	     REG is aligned properly, the compiler will make
+	     sure any REG based index upon it will be so
+	     as well.  */
+	  if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+	    offset = XEXP (addr, 1);
+	  else
+	    offset = const0_rtx;
+	}
+    }
+  else if (GET_CODE (addr) == REG)
+    {
+      base = addr;
+      offset = const0_rtx;
+    }
+
+  if (base != NULL_RTX)
+    {
+      int regno = REGNO (base);
+
+      if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
+	{
+	  /* Check if the compiler has recorded some information
+	     about the alignment of the base REG.  If reload has
+	     completed, we already matched with proper alignments.
+	     If not running global_alloc, reload might give us
+	     unaligned pointer to local stack though.  */
+	  if (((cfun != 0
+		&& REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
+	       || (optimize && reload_completed))
+	      && (INTVAL (offset) & (desired - 1)) == 0)
+	    return 1;
+	}
+      else
+	{
+	  if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
+	    return 1;
+	}
+    }
+  else if (! TARGET_UNALIGNED_DOUBLES
+	   || CONSTANT_P (addr)
+	   || GET_CODE (addr) == LO_SUM)
+    {
+      /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
+	 is true, in which case we can only assume that an access is aligned if
+	 it is to a constant address, or the address involves a LO_SUM.  */
+      return 1;
+    }
+
+  /* An obviously unaligned address.  */
+  return 0;
+}
+
+
+/* Vectors to keep interesting information about registers where it can easily
+   be got.  We used to use the actual mode value as the bit number, but there
+   are more than 32 modes now.  Instead we use two tables: one indexed by
+   hard register number, and one indexed by mode.  */
+
+/* The purpose of sparc_mode_class is to shrink the range of modes so that
+   they all fit (as bit numbers) in a 32-bit word (again).  Each real mode is
+   mapped into one sparc_mode_class mode.  */
+
+enum sparc_mode_class {
+  S_MODE, D_MODE, T_MODE, O_MODE,
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
+  CC_MODE, CCFP_MODE
+};
+
+/* Modes for single-word and smaller quantities.  */
+#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+
+/* Modes for double-word and smaller quantities.  */
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+
+/* Modes for quad-word and smaller quantities.  */
+#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+
+/* Modes for 8-word and smaller quantities.  */
+#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
+
+/* Modes for single-float quantities.  We must allow any single word or
+   smaller quantity.  This is because the fix/float conversion instructions
+   take integer inputs/outputs from the float registers.  */
+#define SF_MODES (S_MODES)
+
+/* Modes for double-float and smaller quantities.  */
+#define DF_MODES (D_MODES)
+
+/* Modes for quad-float and smaller quantities.  */
+#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
+
+/* Modes for quad-float pairs and smaller quantities.  */
+#define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
+
+/* Modes for double-float only quantities.  */
+#define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
+
+/* Modes for quad-float and double-float only quantities.  */
+#define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
+
+/* Modes for quad-float pairs and double-float only quantities.  */
+#define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
+
+/* Modes for condition codes.  */
+#define CC_MODES (1 << (int) CC_MODE)
+#define CCFP_MODES (1 << (int) CCFP_MODE)
+
+/* Value is 1 if register/mode pair is acceptable on sparc.
+   The funny mixture of D and T modes is because integer operations
+   do not specially operate on tetra quantities, so non-quad-aligned
+   registers can hold quadword quantities (except %o4 and %i4 because
+   they cross fixed registers).  */
+
+/* This points to either the 32 bit or the 64 bit version.  */
+const int *hard_regno_mode_classes;
+
+static const int hard_32bit_mode_classes[] = {
+  S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
+  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
+  T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
+  T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
+
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
+
+  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
+     and none can hold SFmode/SImode values.  */
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+
+  /* %fcc[0123] */
+  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
+
+  /* %icc */
+  CC_MODES
+};
+
+static const int hard_64bit_mode_classes[] = {
+  D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+  T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+  O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+  OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
+
+  /* FP regs f32 to f63.  Only the even numbered registers actually exist,
+     and none can hold SFmode/SImode values.  */
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+  OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+
+  /* %fcc[0123] */
+  CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
+
+  /* %icc */
+  CC_MODES
+};
+
+int sparc_mode_class [NUM_MACHINE_MODES];
+
+enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+static void
+sparc_init_modes (void)
+{
+  int i;
+
+  for (i = 0; i < NUM_MACHINE_MODES; i++)
+    {
+      switch (GET_MODE_CLASS (i))
+	{
+	case MODE_INT:
+	case MODE_PARTIAL_INT:
+	case MODE_COMPLEX_INT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    sparc_mode_class[i] = 1 << (int) S_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    sparc_mode_class[i] = 1 << (int) D_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    sparc_mode_class[i] = 1 << (int) T_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    sparc_mode_class[i] = 1 << (int) O_MODE;
+	  else
+	    sparc_mode_class[i] = 0;
+	  break;
+	case MODE_VECTOR_INT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    sparc_mode_class[i] = 1 << (int)SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    sparc_mode_class[i] = 1 << (int)DF_MODE;
+	  break;
+	case MODE_FLOAT:
+	case MODE_COMPLEX_FLOAT:
+	  if (GET_MODE_SIZE (i) <= 4)
+	    sparc_mode_class[i] = 1 << (int) SF_MODE;
+	  else if (GET_MODE_SIZE (i) == 8)
+	    sparc_mode_class[i] = 1 << (int) DF_MODE;
+	  else if (GET_MODE_SIZE (i) == 16)
+	    sparc_mode_class[i] = 1 << (int) TF_MODE;
+	  else if (GET_MODE_SIZE (i) == 32)
+	    sparc_mode_class[i] = 1 << (int) OF_MODE;
+	  else
+	    sparc_mode_class[i] = 0;
+	  break;
+	case MODE_CC:
+	  if (i == (int) CCFPmode || i == (int) CCFPEmode)
+	    sparc_mode_class[i] = 1 << (int) CCFP_MODE;
+	  else
+	    sparc_mode_class[i] = 1 << (int) CC_MODE;
+	  break;
+	default:
+	  sparc_mode_class[i] = 0;
+	  break;
+	}
+    }
+
+  if (TARGET_ARCH64)
+    hard_regno_mode_classes = hard_64bit_mode_classes;
+  else
+    hard_regno_mode_classes = hard_32bit_mode_classes;
+
+  /* Initialize the array used by REGNO_REG_CLASS.  */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (i < 16 && TARGET_V8PLUS)
+	sparc_regno_reg_class[i] = I64_REGS;
+      else if (i < 32 || i == FRAME_POINTER_REGNUM)
+	sparc_regno_reg_class[i] = GENERAL_REGS;
+      else if (i < 64)
+	sparc_regno_reg_class[i] = FP_REGS;
+      else if (i < 96)
+	sparc_regno_reg_class[i] = EXTRA_FP_REGS;
+      else if (i < 100)
+	sparc_regno_reg_class[i] = FPCC_REGS;
+      else
+	sparc_regno_reg_class[i] = NO_REGS;
+    }
+}
+
+/* Compute the frame size required by the function.  This function is called
+   during the reload pass and also by sparc_expand_prologue.  */
+
+HOST_WIDE_INT
+sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function_p)
+{
+  int outgoing_args_size = (crtl->outgoing_args_size
+			    + REG_PARM_STACK_SPACE (current_function_decl));
+  int n_regs = 0;  /* N_REGS is the number of 4-byte regs saved thus far.  */
+  int i;
+
+  if (TARGET_ARCH64)
+    {
+      for (i = 0; i < 8; i++)
+	if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	  n_regs += 2;
+    }
+  else
+    {
+      for (i = 0; i < 8; i += 2)
+	if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
+	    || (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
+	  n_regs += 2;
+    }
+
+  for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
+    if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
+	|| (df_regs_ever_live_p (i+1) && ! call_used_regs[i+1]))
+      n_regs += 2;
+
+  /* Set up values for use in prologue and epilogue.  */
+  num_gfregs = n_regs;
+
+  if (leaf_function_p
+      && n_regs == 0
+      && size == 0
+      && crtl->outgoing_args_size == 0)
+    actual_fsize = apparent_fsize = 0;
+  else
+    {
+      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
+      apparent_fsize = (size - STARTING_FRAME_OFFSET + 7) & -8;
+      apparent_fsize += n_regs * 4;
+      actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
+    }
+
+  /* Make sure nothing can clobber our register windows.
+     If a SAVE must be done, or there is a stack-local variable,
+     the register window area must be allocated.  */
+  if (! leaf_function_p || size > 0)
+    actual_fsize += FIRST_PARM_OFFSET (current_function_decl);
+
+  return SPARC_STACK_ALIGN (actual_fsize);
+}
+
+/* Output any necessary .register pseudo-ops.  */
+
+void
+sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
+{
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+  int i;
+
+  if (TARGET_ARCH32)
+    return;
+
+  /* Check if %g[2367] were used without
+     .register being printed for them already.  */
+  for (i = 2; i < 8; i++)
+    {
+      if (df_regs_ever_live_p (i)
+	  && ! sparc_hard_reg_printed [i])
+	{
+	  sparc_hard_reg_printed [i] = 1;
+	  /* %g7 is used as TLS base register, use #ignore
+	     for it instead of #scratch.  */
+	  fprintf (file, "\t.register\t%%g%d, #%s\n", i,
+		   i == 7 ? "ignore" : "scratch");
+	}
+      if (i == 3) i = 5;
+    }
+#endif
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if PROBE_INTERVAL > 4096
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.
+
+   Note that we don't use the REG+REG addressing mode for the probes because
+   of the stack bias in 64-bit mode.  And it doesn't really buy us anything
+   so the advantages of having a single code win here.  */
+
+static void
+sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  rtx g1 = gen_rtx_REG (Pmode, 1);
+
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if (size <= PROBE_INTERVAL)
+    {
+      emit_move_insn (g1, GEN_INT (first));
+      emit_insn (gen_rtx_SET (VOIDmode, g1,
+			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
+      emit_stack_probe (plus_constant (g1, -size));
+    }
+
+  /* The run-time loop is made up of 10 insns in the generic case while the
+     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
+  else if (size <= 5 * PROBE_INTERVAL)
+    {
+      HOST_WIDE_INT i;
+
+      emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
+      emit_insn (gen_rtx_SET (VOIDmode, g1,
+			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
+      emit_stack_probe (g1);
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+	 it exceeds SIZE.  If only two probes are needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, g1,
+				  plus_constant (g1, -PROBE_INTERVAL)));
+	  emit_stack_probe (g1);
+	}
+
+      emit_stack_probe (plus_constant (g1, (i - PROBE_INTERVAL) - size));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx g4 = gen_rtx_REG (Pmode, 4);
+
+      emit_move_insn (g1, GEN_INT (first));
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = size & -PROBE_INTERVAL;
+      emit_move_insn (g4, GEN_INT (rounded_size));
+
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      /* TEST_ADDR = SP + FIRST.  */
+      emit_insn (gen_rtx_SET (VOIDmode, g1,
+			      gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
+
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4)));
+
+
+      /* Step 3: the loop
+
+	 while (TEST_ADDR != LAST_ADDR)
+	   {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	   }
+
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+
+      if (TARGET_64BIT)
+	emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
+      else
+	emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
+
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	emit_stack_probe (plus_constant (g4, rounded_size - size));
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+
+const char *
+output_probe_stack_range (rtx reg1, rtx reg2)
+{
+  static int labelno = 0;
+  char loop_lab[32], end_lab[32];
+  rtx xops[2];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+   /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
+  xops[0] = reg1;
+  xops[1] = reg2;
+  output_asm_insn ("cmp\t%0, %1", xops);
+  if (TARGET_ARCH64)
+    fputs ("\tbe,pn\t%xcc,", asm_out_file);
+  else
+    fputs ("\tbe\t", asm_out_file);
+  assemble_name_raw (asm_out_file, end_lab);
+  fputc ('\n', asm_out_file);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  output_asm_insn (" add\t%0, %1, %0", xops);
+
+  /* Probe at TEST_ADDR and branch.  */
+  if (TARGET_ARCH64)
+    fputs ("\tba,pt\t%xcc,", asm_out_file);
+  else
+    fputs ("\tba\t", asm_out_file);
+  assemble_name_raw (asm_out_file, loop_lab);
+  fputc ('\n', asm_out_file);
+  xops[1] = GEN_INT (SPARC_STACK_BIAS);
+  output_asm_insn (" st\t%%g0, [%0+%1]", xops);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+  return "";
+}
+
+/* Save/restore call-saved registers from LOW to HIGH at BASE+OFFSET
+   as needed.  LOW should be double-word aligned for 32-bit registers.
+   Return the new OFFSET.  */
+
+#define SORR_SAVE    0
+#define SORR_RESTORE 1
+
+static int
+save_or_restore_regs (int low, int high, rtx base, int offset, int action)
+{
+  rtx mem, insn;
+  int i;
+
+  if (TARGET_ARCH64 && high <= 32)
+    {
+      for (i = low; i < high; i++)
+	{
+	  if (df_regs_ever_live_p (i) && ! call_used_regs[i])
+	    {
+	      mem = gen_frame_mem (DImode, plus_constant (base, offset));
+	      if (action == SORR_SAVE)
+		{
+		  insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		}
+	      else  /* action == SORR_RESTORE */
+		emit_move_insn (gen_rtx_REG (DImode, i), mem);
+	      offset += 8;
+	    }
+	}
+    }
+  else
+    {
+      for (i = low; i < high; i += 2)
+	{
+	  bool reg0 = df_regs_ever_live_p (i) && ! call_used_regs[i];
+	  bool reg1 = df_regs_ever_live_p (i+1) && ! call_used_regs[i+1];
+	  enum machine_mode mode;
+	  int regno;
+
+	  if (reg0 && reg1)
+	    {
+	      mode = i < 32 ? DImode : DFmode;
+	      regno = i;
+	    }
+	  else if (reg0)
+	    {
+	      mode = i < 32 ? SImode : SFmode;
+	      regno = i;
+	    }
+	  else if (reg1)
+	    {
+	      mode = i < 32 ? SImode : SFmode;
+	      regno = i + 1;
+	      offset += 4;
+	    }
+	  else
+	    continue;
+
+	  mem = gen_frame_mem (mode, plus_constant (base, offset));
+	  if (action == SORR_SAVE)
+	    {
+	      insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	  else  /* action == SORR_RESTORE */
+	    emit_move_insn (gen_rtx_REG (mode, regno), mem);
+
+	  /* Always preserve double-word alignment.  */
+	  offset = (offset + 8) & -8;
+	}
+    }
+
+  return offset;
+}
+
+/* Emit code to save call-saved registers.  */
+
+static void
+emit_save_or_restore_regs (int action)
+{
+  HOST_WIDE_INT offset;
+  rtx base;
+
+  offset = frame_base_offset - apparent_fsize;
+
+  if (offset < -4096 || offset + num_gfregs * 4 > 4095)
+    {
+      /* ??? This might be optimized a little as %g1 might already have a
+	 value close enough that a single add insn will do.  */
+      /* ??? Although, all of this is probably only a temporary fix
+	 because if %g1 can hold a function result, then
+	 sparc_expand_epilogue will lose (the result will be
+	 clobbered).  */
+      base = gen_rtx_REG (Pmode, 1);
+      emit_move_insn (base, GEN_INT (offset));
+      emit_insn (gen_rtx_SET (VOIDmode,
+			      base,
+			      gen_rtx_PLUS (Pmode, frame_base_reg, base)));
+      offset = 0;
+    }
+  else
+    base = frame_base_reg;
+
+  offset = save_or_restore_regs (0, 8, base, offset, action);
+  save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, action);
+}
+
+/* Generate a save_register_window insn.  */
+
+static rtx
+gen_save_register_window (rtx increment)
+{
+  if (TARGET_ARCH64)
+    return gen_save_register_windowdi (increment);
+  else
+    return gen_save_register_windowsi (increment);
+}
+
+/* Generate an increment for the stack pointer.  */
+
+static rtx
+gen_stack_pointer_inc (rtx increment)
+{
+  return gen_rtx_SET (VOIDmode,
+		      stack_pointer_rtx,
+		      gen_rtx_PLUS (Pmode,
+				    stack_pointer_rtx,
+				    increment));
+}
+
+/* Generate a decrement for the stack pointer.  */
+
+static rtx
+gen_stack_pointer_dec (rtx decrement)
+{
+  return gen_rtx_SET (VOIDmode,
+		      stack_pointer_rtx,
+		      gen_rtx_MINUS (Pmode,
+				     stack_pointer_rtx,
+				     decrement));
+}
+
+/* Expand the function prologue.  The prologue is responsible for reserving
+   storage for the frame, saving the call-saved registers and loading the
+   GOT register if needed.  */
+
+void
+sparc_expand_prologue (void)
+{
+  rtx insn;
+  int i;
+
+  /* Compute a snapshot of current_function_uses_only_leaf_regs.  Relying
+     on the final value of the flag means deferring the prologue/epilogue
+     expansion until just before the second scheduling pass, which is too
+     late to emit multiple epilogues or return insns.
+
+     Of course we are making the assumption that the value of the flag
+     will not change between now and its final value.  Of the three parts
+     of the formula, only the last one can reasonably vary.  Let's take a
+     closer look, after assuming that the first two ones are set to true
+     (otherwise the last value is effectively silenced).
+
+     If only_leaf_regs_used returns false, the global predicate will also
+     be false so the actual frame size calculated below will be positive.
+     As a consequence, the save_register_window insn will be emitted in
+     the instruction stream; now this insn explicitly references %fp
+     which is not a leaf register so only_leaf_regs_used will always
+     return false subsequently.
+
+     If only_leaf_regs_used returns true, we hope that the subsequent
+     optimization passes won't cause non-leaf registers to pop up.  For
+     example, the regrename pass has special provisions to not rename to
+     non-leaf registers in a leaf function.  */
+  sparc_leaf_function_p
+    = optimize > 0 && current_function_is_leaf && only_leaf_regs_used ();
+
+  /* Need to use actual_fsize, since we are also allocating
+     space for our callee (and our own register save area).  */
+  actual_fsize
+    = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
+
+  /* Advertise that the data calculated just above are now valid.  */
+  sparc_prologue_data_valid_p = true;
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = actual_fsize;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && actual_fsize)
+    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, actual_fsize);
+
+  if (sparc_leaf_function_p)
+    {
+      frame_base_reg = stack_pointer_rtx;
+      frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
+    }
+  else
+    {
+      frame_base_reg = hard_frame_pointer_rtx;
+      frame_base_offset = SPARC_STACK_BIAS;
+    }
+
+  if (actual_fsize == 0)
+    /* do nothing.  */ ;
+  else if (sparc_leaf_function_p)
+    {
+      if (actual_fsize <= 4096)
+	insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
+      else if (actual_fsize <= 8192)
+	{
+	  insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+
+	  /* %sp is still the CFA register.  */
+	  insn
+	    = emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (reg, GEN_INT (-actual_fsize));
+	  insn = emit_insn (gen_stack_pointer_inc (reg));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_stack_pointer_inc (GEN_INT (-actual_fsize)));
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      if (actual_fsize <= 4096)
+	insn = emit_insn (gen_save_register_window (GEN_INT (-actual_fsize)));
+      else if (actual_fsize <= 8192)
+	{
+	  insn = emit_insn (gen_save_register_window (GEN_INT (-4096)));
+
+	  /* %sp is not the CFA register anymore.  */
+	  emit_insn (gen_stack_pointer_inc (GEN_INT (4096-actual_fsize)));
+
+	  /* Make sure no %fp-based store is issued until after the frame is
+	     established.  The offset between the frame pointer and the stack
+	     pointer is calculated relative to the value of the stack pointer
+	     at the end of the function prologue, and moving instructions that
+	     access the stack via the frame pointer between the instructions
+	     that decrement the stack pointer could result in accessing the
+	     register window save area, which is volatile.  */
+	  emit_insn (gen_frame_blockage ());
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (reg, GEN_INT (-actual_fsize));
+	  insn = emit_insn (gen_save_register_window (reg));
+	}
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      for (i=0; i < XVECLEN (PATTERN (insn), 0); i++)
+        RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, i)) = 1;
+    }
+
+  if (num_gfregs)
+    emit_save_or_restore_regs (SORR_SAVE);
+
+  /* Load the GOT register if needed.  */
+  if (crtl->uses_pic_offset_table)
+    load_got_register ();
+}
+
+/* This function generates the assembly code for function entry, which boils
+   down to emitting the necessary .register directives.  */
+
+static void
+sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Check that the assumption we made in sparc_expand_prologue is valid.  */
+  gcc_assert (sparc_leaf_function_p == current_function_uses_only_leaf_regs);
+
+  sparc_output_scratch_registers (file);
+}
+
+/* Expand the function epilogue, either normal or part of a sibcall.
+   We emit all the instructions except the return or the call.  */
+
+void
+sparc_expand_epilogue (void)
+{
+  if (num_gfregs)
+    emit_save_or_restore_regs (SORR_RESTORE);
+
+  if (actual_fsize == 0)
+    /* do nothing.  */ ;
+  else if (sparc_leaf_function_p)
+    {
+      if (actual_fsize <= 4096)
+	emit_insn (gen_stack_pointer_dec (GEN_INT (- actual_fsize)));
+      else if (actual_fsize <= 8192)
+	{
+	  emit_insn (gen_stack_pointer_dec (GEN_INT (-4096)));
+	  emit_insn (gen_stack_pointer_dec (GEN_INT (4096 - actual_fsize)));
+	}
+      else
+	{
+	  rtx reg = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (reg, GEN_INT (-actual_fsize));
+	  emit_insn (gen_stack_pointer_dec (reg));
+	}
+    }
+}
+
+/* Return true if it is appropriate to emit `return' instructions in the
+   body of a function.  */
+
+bool
+sparc_can_use_return_insn_p (void)
+{
+  return sparc_prologue_data_valid_p
+	 && num_gfregs == 0
+	 && (actual_fsize == 0 || !sparc_leaf_function_p);
+}
+
+/* This function generates the assembly code for function exit.  */
+
+static void
+sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* If the last two instructions of a function are "call foo; dslot;"
+     the return address might point to the first instruction in the next
+     function and we have to output a dummy nop for the sake of sane
+     backtraces in such cases.  This is pointless for sibling calls since
+     the return address is explicitly adjusted.  */
+
+  rtx insn, last_real_insn;
+
+  insn = get_last_insn ();
+
+  last_real_insn = prev_real_insn (insn);
+  if (last_real_insn
+      && GET_CODE (last_real_insn) == INSN
+      && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
+    last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
+
+  if (last_real_insn
+      && CALL_P (last_real_insn)
+      && !SIBLING_CALL_P (last_real_insn))
+    fputs("\tnop\n", file);
+
+  sparc_output_deferred_case_vectors ();
+}
+
+/* Output a 'restore' instruction.  */
+
+static void
+output_restore (rtx pat)
+{
+  rtx operands[3];
+
+  if (! pat)
+    {
+      fputs ("\t restore\n", asm_out_file);
+      return;
+    }
+
+  gcc_assert (GET_CODE (pat) == SET);
+
+  operands[0] = SET_DEST (pat);
+  pat = SET_SRC (pat);
+
+  switch (GET_CODE (pat))
+    {
+      case PLUS:
+	operands[1] = XEXP (pat, 0);
+	operands[2] = XEXP (pat, 1);
+	output_asm_insn (" restore %r1, %2, %Y0", operands);
+	break;
+      case LO_SUM:
+	operands[1] = XEXP (pat, 0);
+	operands[2] = XEXP (pat, 1);
+	output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
+	break;
+      case ASHIFT:
+	operands[1] = XEXP (pat, 0);
+	gcc_assert (XEXP (pat, 1) == const1_rtx);
+	output_asm_insn (" restore %r1, %r1, %Y0", operands);
+	break;
+      default:
+	operands[1] = pat;
+	output_asm_insn (" restore %%g0, %1, %Y0", operands);
+	break;
+    }
+}
+
+/* Output a return.  */
+
+const char *
+output_return (rtx insn)
+{
+  if (sparc_leaf_function_p)
+    {
+      /* This is a leaf function so we don't have to bother restoring the
+	 register window, which frees us from dealing with the convoluted
+	 semantics of restore/return.  We simply output the jump to the
+	 return address and the insn in the delay slot (if any).  */
+
+      gcc_assert (! crtl->calls_eh_return);
+
+      return "jmp\t%%o7+%)%#";
+    }
+  else
+    {
+      /* This is a regular function so we have to restore the register window.
+	 We may have a pending insn for the delay slot, which will be either
+	 combined with the 'restore' instruction or put in the delay slot of
+	 the 'return' instruction.  */
+
+      if (crtl->calls_eh_return)
+	{
+	  /* If the function uses __builtin_eh_return, the eh_return
+	     machinery occupies the delay slot.  */
+	  gcc_assert (! final_sequence);
+
+          if (flag_delayed_branch)
+	    {
+	      if (TARGET_V9)
+		fputs ("\treturn\t%i7+8\n", asm_out_file);
+	      else
+		fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file);
+
+	      fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
+	    }
+	  else
+	    {
+	      fputs ("\trestore\n\tadd\t%sp, %g1, %sp\n", asm_out_file);
+	      fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
+	    }
+	}
+      else if (final_sequence)
+	{
+	  rtx delay, pat;
+
+	  delay = NEXT_INSN (insn);
+	  gcc_assert (delay);
+
+	  pat = PATTERN (delay);
+
+	  if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
+	    {
+	      epilogue_renumber (&pat, 0);
+	      return "return\t%%i7+%)%#";
+	    }
+	  else
+	    {
+	      output_asm_insn ("jmp\t%%i7+%)", NULL);
+	      output_restore (pat);
+	      PATTERN (delay) = gen_blockage ();
+	      INSN_CODE (delay) = -1;
+	    }
+	}
+      else
+        {
+	  /* The delay slot is empty.  */
+	  if (TARGET_V9)
+	    return "return\t%%i7+%)\n\t nop";
+	  else if (flag_delayed_branch)
+	    return "jmp\t%%i7+%)\n\t restore";
+	  else
+	    return "restore\n\tjmp\t%%o7+%)\n\t nop";
+	}
+    }
+
+  return "";
+}
+
+/* Output a sibling call.  */
+
+const char *
+output_sibcall (rtx insn, rtx call_operand)
+{
+  rtx operands[1];
+
+  gcc_assert (flag_delayed_branch);
+
+  operands[0] = call_operand;
+
+  if (sparc_leaf_function_p)
+    {
+      /* This is a leaf function so we don't have to bother restoring the
+	 register window.  We simply output the jump to the function and
+	 the insn in the delay slot (if any).  */
+
+      gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
+
+      if (final_sequence)
+	output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
+			 operands);
+      else
+	/* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
+	   it into branch if possible.  */
+	output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
+			 operands);
+    }
+  else
+    {
+      /* This is a regular function so we have to restore the register window.
+	 We may have a pending insn for the delay slot, which will be combined
+	 with the 'restore' instruction.  */
+
+      output_asm_insn ("call\t%a0, 0", operands);
+
+      if (final_sequence)
+	{
+	  rtx delay = NEXT_INSN (insn);
+	  gcc_assert (delay);
+
+	  output_restore (PATTERN (delay));
+
+	  PATTERN (delay) = gen_blockage ();
+	  INSN_CODE (delay) = -1;
+	}
+      else
+	output_restore (NULL_RTX);
+    }
+
+  return "";
+}
+
+/* Functions for handling argument passing.
+
+   For 32-bit, the first 6 args are normally in registers and the rest are
+   pushed.  Any arg that starts within the first 6 words is at least
+   partially passed in a register unless its data type forbids.
+
+   For 64-bit, the argument registers are laid out as an array of 16 elements
+   and arguments are added sequentially.  The first 6 int args and up to the
+   first 16 fp args (depending on size) are passed in regs.
+
+   Slot    Stack   Integral   Float   Float in structure   Double   Long Double
+   ----    -----   --------   -----   ------------------   ------   -----------
+    15   [SP+248]              %f31       %f30,%f31         %d30
+    14   [SP+240]              %f29       %f28,%f29         %d28       %q28
+    13   [SP+232]              %f27       %f26,%f27         %d26
+    12   [SP+224]              %f25       %f24,%f25         %d24       %q24
+    11   [SP+216]              %f23       %f22,%f23         %d22
+    10   [SP+208]              %f21       %f20,%f21         %d20       %q20
+     9   [SP+200]              %f19       %f18,%f19         %d18
+     8   [SP+192]              %f17       %f16,%f17         %d16       %q16
+     7   [SP+184]              %f15       %f14,%f15         %d14
+     6   [SP+176]              %f13       %f12,%f13         %d12       %q12
+     5   [SP+168]     %o5      %f11       %f10,%f11         %d10
+     4   [SP+160]     %o4       %f9        %f8,%f9           %d8        %q8
+     3   [SP+152]     %o3       %f7        %f6,%f7           %d6
+     2   [SP+144]     %o2       %f5        %f4,%f5           %d4        %q4
+     1   [SP+136]     %o1       %f3        %f2,%f3           %d2
+     0   [SP+128]     %o0       %f1        %f0,%f1           %d0        %q0
+
+   Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
+
+   Integral arguments are always passed as 64-bit quantities appropriately
+   extended.
+
+   Passing of floating point values is handled as follows.
+   If a prototype is in scope:
+     If the value is in a named argument (i.e. not a stdarg function or a
+     value not part of the `...') then the value is passed in the appropriate
+     fp reg.
+     If the value is part of the `...' and is passed in one of the first 6
+     slots then the value is passed in the appropriate int reg.
+     If the value is part of the `...' and is not passed in one of the first 6
+     slots then the value is passed in memory.
+   If a prototype is not in scope:
+     If the value is one of the first 6 arguments the value is passed in the
+     appropriate integer reg and the appropriate fp reg.
+     If the value is not one of the first 6 arguments the value is passed in
+     the appropriate fp reg and in memory.
+
+
+   Summary of the calling conventions implemented by GCC on the SPARC:
+
+   32-bit ABI:
+                                size      argument     return value
+
+      small integer              <4       int. reg.      int. reg.
+      word                        4       int. reg.      int. reg.
+      double word                 8       int. reg.      int. reg.
+
+      _Complex small integer     <8       int. reg.      int. reg.
+      _Complex word               8       int. reg.      int. reg.
+      _Complex double word       16        memory        int. reg.
+
+      vector integer            <=8       int. reg.       FP reg.
+      vector integer             >8        memory         memory
+
+      float                       4       int. reg.       FP reg.
+      double                      8       int. reg.       FP reg.
+      long double                16        memory         memory
+
+      _Complex float              8        memory         FP reg.
+      _Complex double            16        memory         FP reg.
+      _Complex long double       32        memory         FP reg.
+
+      vector float              any        memory         memory
+
+      aggregate                 any        memory         memory
+
+
+
+    64-bit ABI:
+                                size      argument     return value
+
+      small integer              <8       int. reg.      int. reg.
+      word                        8       int. reg.      int. reg.
+      double word                16       int. reg.      int. reg.
+
+      _Complex small integer    <16       int. reg.      int. reg.
+      _Complex word              16       int. reg.      int. reg.
+      _Complex double word       32        memory        int. reg.
+
+      vector integer           <=16        FP reg.        FP reg.
+      vector integer       16<s<=32        memory         FP reg.
+      vector integer            >32        memory         memory
+
+      float                       4        FP reg.        FP reg.
+      double                      8        FP reg.        FP reg.
+      long double                16        FP reg.        FP reg.
+
+      _Complex float              8        FP reg.        FP reg.
+      _Complex double            16        FP reg.        FP reg.
+      _Complex long double       32        memory         FP reg.
+
+      vector float             <=16        FP reg.        FP reg.
+      vector float         16<s<=32        memory         FP reg.
+      vector float              >32        memory         memory
+
+      aggregate                <=16         reg.           reg.
+      aggregate            16<s<=32        memory          reg.
+      aggregate                 >32        memory         memory
+
+
+
+Note #1: complex floating-point types follow the extended SPARC ABIs as
+implemented by the Sun compiler.
+
+Note #2: integral vector types follow the scalar floating-point types
+conventions to match what is implemented by the Sun VIS SDK.
+
+Note #3: floating-point vector types follow the aggregate types
+conventions.  */
+
+
+/* Maximum number of int regs for args.  */
+#define SPARC_INT_ARG_MAX 6
+/* Maximum number of fp regs for args.  */
+#define SPARC_FP_ARG_MAX 16
+
+#define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Handle the INIT_CUMULATIVE_ARGS macro.
+   Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+void
+init_cumulative_args (struct sparc_args *cum, tree fntype,
+		      rtx libname ATTRIBUTE_UNUSED,
+		      tree fndecl ATTRIBUTE_UNUSED)
+{
+  cum->words = 0;
+  cum->prototype_p = fntype && prototype_p (fntype);
+  cum->libcall_p = fntype == 0;
+}
+
+/* Handle promotion of pointer and integer arguments.  */
+
+static enum machine_mode
+sparc_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+                             enum machine_mode mode,
+                             int *punsignedp ATTRIBUTE_UNUSED,
+                             const_tree fntype ATTRIBUTE_UNUSED,
+                             int for_return ATTRIBUTE_UNUSED)
+{
+  if (POINTER_TYPE_P (type))
+    {
+      *punsignedp = POINTERS_EXTEND_UNSIGNED;
+      return Pmode;
+    }
+
+  /* Integral arguments are passed as full words, as per the ABI.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+    return word_mode;
+
+  return mode;
+}
+
+/* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.  */
+
+static bool
+sparc_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
+{
+  return TARGET_ARCH64 ? true : false;
+}
+
+/* Scan the record type TYPE and return the following predicates:
+    - INTREGS_P: the record contains at least one field or sub-field
+      that is eligible for promotion in integer registers.
+    - FP_REGS_P: the record contains at least one field or sub-field
+      that is eligible for promotion in floating-point registers.
+    - PACKED_P: the record contains at least one field that is packed.
+
+   Sub-fields are not taken into account for the PACKED_P predicate.  */
+
+static void
+scan_record_type (const_tree type, int *intregs_p, int *fpregs_p,
+		  int *packed_p)
+{
+  tree field;
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0);
+	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
+		   || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+		  && TARGET_FPU)
+	    *fpregs_p = 1;
+	  else
+	    *intregs_p = 1;
+
+	  if (packed_p && DECL_PACKED (field))
+	    *packed_p = 1;
+	}
+    }
+}
+
+/* Compute the slot number to pass an argument in.
+   Return the slot number or -1 if passing on the stack.
+
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
+   *PREGNO records the register number to use if scalar type.
+   *PPADDING records the amount of padding needed in words.  */
+
+static int
+function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode,
+		     const_tree type, bool named, bool incoming_p,
+		     int *pregno, int *ppadding)
+{
+  int regbase = (incoming_p
+		 ? SPARC_INCOMING_INT_ARG_FIRST
+		 : SPARC_OUTGOING_INT_ARG_FIRST);
+  int slotno = cum->words;
+  enum mode_class mclass;
+  int regno;
+
+  *ppadding = 0;
+
+  if (type && TREE_ADDRESSABLE (type))
+    return -1;
+
+  if (TARGET_ARCH32
+      && mode == BLKmode
+      && type
+      && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
+    return -1;
+
+  /* For SPARC64, objects requiring 16-byte alignment get it.  */
+  if (TARGET_ARCH64
+      && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
+      && (slotno & 1) != 0)
+    slotno++, *ppadding = 1;
+
+  mclass = GET_MODE_CLASS (mode);
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    {
+      /* Vector types deserve special treatment because they are
+	 polymorphic wrt their mode, depending upon whether VIS
+	 instructions are enabled.  */
+      if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+	{
+	  /* The SPARC port defines no floating-point vector modes.  */
+	  gcc_assert (mode == BLKmode);
+	}
+      else
+	{
+	  /* Integral vector types should either have a vector
+	     mode or an integral mode, because we are guaranteed
+	     by pass_by_reference that their size is not greater
+	     than 16 bytes and TImode is 16-byte wide.  */
+	  gcc_assert (mode != BLKmode);
+
+	  /* Vector integers are handled like floats according to
+	     the Sun VIS SDK.  */
+	  mclass = MODE_FLOAT;
+	}
+    }
+
+  switch (mclass)
+    {
+    case MODE_FLOAT:
+    case MODE_COMPLEX_FLOAT:
+    case MODE_VECTOR_INT:
+      if (TARGET_ARCH64 && TARGET_FPU && named)
+	{
+	  if (slotno >= SPARC_FP_ARG_MAX)
+	    return -1;
+	  regno = SPARC_FP_ARG_FIRST + slotno * 2;
+	  /* Arguments filling only one single FP register are
+	     right-justified in the outer double FP register.  */
+	  if (GET_MODE_SIZE (mode) <= 4)
+	    regno++;
+	  break;
+	}
+      /* fallthrough */
+
+    case MODE_INT:
+    case MODE_COMPLEX_INT:
+      if (slotno >= SPARC_INT_ARG_MAX)
+	return -1;
+      regno = regbase + slotno;
+      break;
+
+    case MODE_RANDOM:
+      if (mode == VOIDmode)
+	/* MODE is VOIDmode when generating the actual call.  */
+	return -1;
+
+      gcc_assert (mode == BLKmode);
+
+      if (TARGET_ARCH32
+	  || !type
+	  || (TREE_CODE (type) != VECTOR_TYPE
+	      && TREE_CODE (type) != RECORD_TYPE))
+	{
+	  if (slotno >= SPARC_INT_ARG_MAX)
+	    return -1;
+	  regno = regbase + slotno;
+	}
+      else  /* TARGET_ARCH64 && type */
+	{
+	  int intregs_p = 0, fpregs_p = 0, packed_p = 0;
+
+	  /* First see what kinds of registers we would need.  */
+	  if (TREE_CODE (type) == VECTOR_TYPE)
+	    fpregs_p = 1;
+	  else
+	    scan_record_type (type, &intregs_p, &fpregs_p, &packed_p);
+
+	  /* The ABI obviously doesn't specify how packed structures
+	     are passed.  These are defined to be passed in int regs
+	     if possible, otherwise memory.  */
+	  if (packed_p || !named)
+	    fpregs_p = 0, intregs_p = 1;
+
+	  /* If all arg slots are filled, then must pass on stack.  */
+	  if (fpregs_p && slotno >= SPARC_FP_ARG_MAX)
+	    return -1;
+
+	  /* If there are only int args and all int arg slots are filled,
+	     then must pass on stack.  */
+	  if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX)
+	    return -1;
+
+	  /* Note that even if all int arg slots are filled, fp members may
+	     still be passed in regs if such regs are available.
+	     *PREGNO isn't set because there may be more than one, it's up
+	     to the caller to compute them.  */
+	  return slotno;
+	}
+      break;
+
+    default :
+      gcc_unreachable ();
+    }
+
+  *pregno = regno;
+  return slotno;
+}
+
+/* Handle recursive register counting for structure field layout.  */
+
+struct function_arg_record_value_parms
+{
+  rtx ret;		/* return expression being built.  */
+  int slotno;		/* slot number of the argument.  */
+  int named;		/* whether the argument is named.  */
+  int regbase;		/* regno of the base register.  */
+  int stack;		/* 1 if part of the argument is on the stack.  */
+  int intoffset;	/* offset of the first pending integer field.  */
+  unsigned int nregs;	/* number of words passed in registers.  */
+};
+
+static void function_arg_record_value_3
+ (HOST_WIDE_INT, struct function_arg_record_value_parms *);
+static void function_arg_record_value_2
+ (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
+static void function_arg_record_value_1
+ (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool);
+static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int);
+static rtx function_arg_union_value (int, enum machine_mode, int, int);
+
+/* A subroutine of function_arg_record_value.  Traverse the structure
+   recursively and determine how many registers will be required.  */
+
+static void
+function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos,
+			     struct function_arg_record_value_parms *parms,
+			     bool packed_p)
+{
+  tree field;
+
+  /* We need to compute how many registers are needed so we can
+     allocate the PARALLEL but before we can do that we need to know
+     whether there are any packed fields.  The ABI obviously doesn't
+     specify how structures are passed in this case, so they are
+     defined to be passed in int regs if possible, otherwise memory,
+     regardless of whether there are fp values present.  */
+
+  if (! packed_p)
+    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+      {
+	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
+	  {
+	    packed_p = true;
+	    break;
+	  }
+      }
+
+  /* Compute how many registers we need.  */
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  HOST_WIDE_INT bitpos = startbitpos;
+
+	  if (DECL_SIZE (field) != 0)
+	    {
+	      if (integer_zerop (DECL_SIZE (field)))
+		continue;
+
+	      if (host_integerp (bit_position (field), 1))
+		bitpos += int_bit_position (field);
+	    }
+
+	  /* ??? FIXME: else assume zero offset.  */
+
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    function_arg_record_value_1 (TREE_TYPE (field),
+	    				 bitpos,
+					 parms,
+					 packed_p);
+	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
+		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+		   && TARGET_FPU
+		   && parms->named
+		   && ! packed_p)
+	    {
+	      if (parms->intoffset != -1)
+		{
+		  unsigned int startbit, endbit;
+		  int intslots, this_slotno;
+
+		  startbit = parms->intoffset & -BITS_PER_WORD;
+		  endbit   = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+
+		  intslots = (endbit - startbit) / BITS_PER_WORD;
+		  this_slotno = parms->slotno + parms->intoffset
+		    / BITS_PER_WORD;
+
+		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
+		    {
+		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
+		      /* We need to pass this field on the stack.  */
+		      parms->stack = 1;
+		    }
+
+		  parms->nregs += intslots;
+		  parms->intoffset = -1;
+		}
+
+	      /* There's no need to check this_slotno < SPARC_FP_ARG MAX.
+		 If it wasn't true we wouldn't be here.  */
+	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
+		  && DECL_MODE (field) == BLKmode)
+		parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
+	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
+		parms->nregs += 2;
+	      else
+		parms->nregs += 1;
+	    }
+	  else
+	    {
+	      if (parms->intoffset == -1)
+		parms->intoffset = bitpos;
+	    }
+	}
+    }
+}
+
+/* A subroutine of function_arg_record_value.  Assign the bits of the
+   structure between parms->intoffset and bitpos to integer registers.  */
+
+static void
+function_arg_record_value_3 (HOST_WIDE_INT bitpos,
+			     struct function_arg_record_value_parms *parms)
+{
+  enum machine_mode mode;
+  unsigned int regno;
+  unsigned int startbit, endbit;
+  int this_slotno, intslots, intoffset;
+  rtx reg;
+
+  if (parms->intoffset == -1)
+    return;
+
+  intoffset = parms->intoffset;
+  parms->intoffset = -1;
+
+  startbit = intoffset & -BITS_PER_WORD;
+  endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+  intslots = (endbit - startbit) / BITS_PER_WORD;
+  this_slotno = parms->slotno + intoffset / BITS_PER_WORD;
+
+  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
+  if (intslots <= 0)
+    return;
+
+  /* If this is the trailing part of a word, only load that much into
+     the register.  Otherwise load the whole register.  Note that in
+     the latter case we may pick up unwanted bits.  It's not a problem
+     at the moment but may wish to revisit.  */
+
+  if (intoffset % BITS_PER_WORD != 0)
+    mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+			  	   MODE_INT);
+  else
+    mode = word_mode;
+
+  intoffset /= BITS_PER_UNIT;
+  do
+    {
+      regno = parms->regbase + this_slotno;
+      reg = gen_rtx_REG (mode, regno);
+      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
+	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
+
+      this_slotno += 1;
+      intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
+      mode = word_mode;
+      parms->nregs += 1;
+      intslots -= 1;
+    }
+  while (intslots > 0);
+}
+
+/* A subroutine of function_arg_record_value.  Traverse the structure
+   recursively and assign bits to floating point registers.  Track which
+   bits in between need integer registers; invoke function_arg_record_value_3
+   to make that happen.  */
+
+static void
+function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos,
+			     struct function_arg_record_value_parms *parms,
+			     bool packed_p)
+{
+  tree field;
+
+  if (! packed_p)
+    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+      {
+	if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
+	  {
+	    packed_p = true;
+	    break;
+	  }
+      }
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) == FIELD_DECL)
+	{
+	  HOST_WIDE_INT bitpos = startbitpos;
+
+	  if (DECL_SIZE (field) != 0)
+	    {
+	      if (integer_zerop (DECL_SIZE (field)))
+		continue;
+
+	      if (host_integerp (bit_position (field), 1))
+		bitpos += int_bit_position (field);
+	    }
+
+	  /* ??? FIXME: else assume zero offset.  */
+
+	  if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
+	    function_arg_record_value_2 (TREE_TYPE (field),
+	    				 bitpos,
+					 parms,
+					 packed_p);
+	  else if ((FLOAT_TYPE_P (TREE_TYPE (field))
+		    || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+		   && TARGET_FPU
+		   && parms->named
+		   && ! packed_p)
+	    {
+	      int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
+	      int regno, nregs, pos;
+	      enum machine_mode mode = DECL_MODE (field);
+	      rtx reg;
+
+	      function_arg_record_value_3 (bitpos, parms);
+
+	      if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE
+		  && mode == BLKmode)
+	        {
+		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
+		  nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
+		}
+	      else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
+	        {
+		  mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
+		  nregs = 2;
+		}
+	      else
+	        nregs = 1;
+
+	      regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
+	      if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
+		regno++;
+	      reg = gen_rtx_REG (mode, regno);
+	      pos = bitpos / BITS_PER_UNIT;
+	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
+		= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
+	      parms->nregs += 1;
+	      while (--nregs > 0)
+		{
+		  regno += GET_MODE_SIZE (mode) / 4;
+	  	  reg = gen_rtx_REG (mode, regno);
+		  pos += GET_MODE_SIZE (mode);
+		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
+		    = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
+		  parms->nregs += 1;
+		}
+	    }
+	  else
+	    {
+	      if (parms->intoffset == -1)
+		parms->intoffset = bitpos;
+	    }
+	}
+    }
+}
+
+/* Used by function_arg and sparc_function_value_1 to implement the complex
+   conventions of the 64-bit ABI for passing and returning structures.
+   Return an expression valid as a return value for the FUNCTION_ARG
+   and TARGET_FUNCTION_VALUE.
+
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   MODE is the argument's machine mode.
+   SLOTNO is the index number of the argument's slot in the parameter array.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   REGBASE is the regno of the base register for the parameter array.  */
+
+static rtx
+function_arg_record_value (const_tree type, enum machine_mode mode,
+			   int slotno, int named, int regbase)
+{
+  HOST_WIDE_INT typesize = int_size_in_bytes (type);
+  struct function_arg_record_value_parms parms;
+  unsigned int nregs;
+
+  parms.ret = NULL_RTX;
+  parms.slotno = slotno;
+  parms.named = named;
+  parms.regbase = regbase;
+  parms.stack = 0;
+
+  /* Compute how many registers we need.  */
+  parms.nregs = 0;
+  parms.intoffset = 0;
+  function_arg_record_value_1 (type, 0, &parms, false);
+
+  /* Take into account pending integer fields.  */
+  if (parms.intoffset != -1)
+    {
+      unsigned int startbit, endbit;
+      int intslots, this_slotno;
+
+      startbit = parms.intoffset & -BITS_PER_WORD;
+      endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD;
+      intslots = (endbit - startbit) / BITS_PER_WORD;
+      this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
+
+      if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
+        {
+	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
+	  /* We need to pass this field on the stack.  */
+	  parms.stack = 1;
+        }
+
+      parms.nregs += intslots;
+    }
+  nregs = parms.nregs;
+
+  /* Allocate the vector and handle some annoying special cases.  */
+  if (nregs == 0)
+    {
+      /* ??? Empty structure has no value?  Duh?  */
+      if (typesize <= 0)
+	{
+	  /* Though there's nothing really to store, return a word register
+	     anyway so the rest of gcc doesn't go nuts.  Returning a PARALLEL
+	     leads to breakage due to the fact that there are zero bytes to
+	     load.  */
+	  return gen_rtx_REG (mode, regbase);
+	}
+      else
+	{
+	  /* ??? C++ has structures with no fields, and yet a size.  Give up
+	     for now and pass everything back in integer registers.  */
+	  nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	}
+      if (nregs + slotno > SPARC_INT_ARG_MAX)
+	nregs = SPARC_INT_ARG_MAX - slotno;
+    }
+  gcc_assert (nregs != 0);
+
+  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
+
+  /* If at least one field must be passed on the stack, generate
+     (parallel [(expr_list (nil) ...) ...]) so that all fields will
+     also be passed on the stack.  We can't do much better because the
+     semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
+     of structures for which the fields passed exclusively in registers
+     are not at the beginning of the structure.  */
+  if (parms.stack)
+    XVECEXP (parms.ret, 0, 0)
+      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+
+  /* Fill in the entries.  */
+  parms.nregs = 0;
+  parms.intoffset = 0;
+  function_arg_record_value_2 (type, 0, &parms, false);
+  function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms);
+
+  gcc_assert (parms.nregs == nregs);
+
+  return parms.ret;
+}
+
+/* Used by function_arg and sparc_function_value_1 to implement the conventions
+   of the 64-bit ABI for passing and returning unions.
+   Return an expression valid as a return value for the FUNCTION_ARG
+   and TARGET_FUNCTION_VALUE.
+
+   SIZE is the size in bytes of the union.
+   MODE is the argument's machine mode.
+   REGNO is the hard register the union will be passed in.  */
+
+static rtx
+function_arg_union_value (int size, enum machine_mode mode, int slotno,
+			  int regno)
+{
+  int nwords = ROUND_ADVANCE (size), i;
+  rtx regs;
+
+  /* See comment in previous function for empty structures.  */
+  if (nwords == 0)
+    return gen_rtx_REG (mode, regno);
+
+  if (slotno == SPARC_INT_ARG_MAX - 1)
+    nwords = 1;
+
+  regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
+
+  for (i = 0; i < nwords; i++)
+    {
+      /* Unions are passed left-justified.  */
+      XVECEXP (regs, 0, i)
+	= gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_REG (word_mode, regno),
+			     GEN_INT (UNITS_PER_WORD * i));
+      regno++;
+    }
+
+  return regs;
+}
+
+/* Used by function_arg and sparc_function_value_1 to implement the conventions
+   for passing and returning large (BLKmode) vectors.
+   Return an expression valid as a return value for the FUNCTION_ARG
+   and TARGET_FUNCTION_VALUE.
+
+   SIZE is the size in bytes of the vector (at least 8 bytes).
+   REGNO is the FP hard register the vector will be passed in.  */
+
+static rtx
+function_arg_vector_value (int size, int regno)
+{
+  int i, nregs = size / 8;
+  rtx regs;
+
+  regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
+
+  for (i = 0; i < nregs; i++)
+    {
+      XVECEXP (regs, 0, i)
+	= gen_rtx_EXPR_LIST (VOIDmode,
+			     gen_rtx_REG (DImode, regno + 2*i),
+			     GEN_INT (i*8));
+    }
+
+  return regs;
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   NAMED is true if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   INCOMING_P is false for TARGET_FUNCTION_ARG, true for
+    TARGET_FUNCTION_INCOMING_ARG.  */
+
+static rtx
+sparc_function_arg_1 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		      const_tree type, bool named, bool incoming_p)
+{
+  int regbase = (incoming_p
+		 ? SPARC_INCOMING_INT_ARG_FIRST
+		 : SPARC_OUTGOING_INT_ARG_FIRST);
+  int slotno, regno, padding;
+  enum mode_class mclass = GET_MODE_CLASS (mode);
+
+  slotno = function_arg_slotno (cum, mode, type, named, incoming_p,
+				&regno, &padding);
+  if (slotno == -1)
+    return 0;
+
+  /* Vector types deserve special treatment because they are polymorphic wrt
+     their mode, depending upon whether VIS instructions are enabled.  */
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert ((TARGET_ARCH32 && size <= 8)
+		  || (TARGET_ARCH64 && size <= 16));
+
+      if (mode == BLKmode)
+	return function_arg_vector_value (size,
+					  SPARC_FP_ARG_FIRST + 2*slotno);
+      else
+	mclass = MODE_FLOAT;
+    }
+
+  if (TARGET_ARCH32)
+    return gen_rtx_REG (mode, regno);
+
+  /* Structures up to 16 bytes in size are passed in arg slots on the stack
+     and are promoted to registers if possible.  */
+  if (type && TREE_CODE (type) == RECORD_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert (size <= 16);
+
+      return function_arg_record_value (type, mode, slotno, named, regbase);
+    }
+
+  /* Unions up to 16 bytes in size are passed in integer registers.  */
+  else if (type && TREE_CODE (type) == UNION_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert (size <= 16);
+
+      return function_arg_union_value (size, mode, slotno, regno);
+    }
+
+  /* v9 fp args in reg slots beyond the int reg slots get passed in regs
+     but also have the slot allocated for them.
+     If no prototype is in scope fp values in register slots get passed
+     in two places, either fp regs and int regs or fp regs and memory.  */
+  else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
+	   && SPARC_FP_REG_P (regno))
+    {
+      rtx reg = gen_rtx_REG (mode, regno);
+      if (cum->prototype_p || cum->libcall_p)
+	{
+	  /* "* 2" because fp reg numbers are recorded in 4 byte
+	     quantities.  */
+#if 0
+	  /* ??? This will cause the value to be passed in the fp reg and
+	     in the stack.  When a prototype exists we want to pass the
+	     value in the reg but reserve space on the stack.  That's an
+	     optimization, and is deferred [for a bit].  */
+	  if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2)
+	    return gen_rtx_PARALLEL (mode,
+			    gen_rtvec (2,
+				       gen_rtx_EXPR_LIST (VOIDmode,
+						NULL_RTX, const0_rtx),
+				       gen_rtx_EXPR_LIST (VOIDmode,
+						reg, const0_rtx)));
+	  else
+#else
+	  /* ??? It seems that passing back a register even when past
+	     the area declared by REG_PARM_STACK_SPACE will allocate
+	     space appropriately, and will not copy the data onto the
+	     stack, exactly as we desire.
+
+	     This is due to locate_and_pad_parm being called in
+	     expand_call whenever reg_parm_stack_space > 0, which
+	     while beneficial to our example here, would seem to be
+	     in error from what had been intended.  Ho hum...  -- r~ */
+#endif
+	    return reg;
+	}
+      else
+	{
+	  rtx v0, v1;
+
+	  if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
+	    {
+	      int intreg;
+
+	      /* On incoming, we don't need to know that the value
+		 is passed in %f0 and %i0, and it confuses other parts
+		 causing needless spillage even on the simplest cases.  */
+	      if (incoming_p)
+		return reg;
+
+	      intreg = (SPARC_OUTGOING_INT_ARG_FIRST
+			+ (regno - SPARC_FP_ARG_FIRST) / 2);
+
+	      v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	      v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
+				      const0_rtx);
+	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
+	    }
+	  else
+	    {
+	      v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	      v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
+	      return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
+	    }
+	}
+    }
+
+  /* All other aggregate types are passed in an integer register in a mode
+     corresponding to the size of the type.  */
+  else if (type && AGGREGATE_TYPE_P (type))
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert (size <= 16);
+
+      mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+    }
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Handle the TARGET_FUNCTION_ARG target hook.  */
+
+static rtx
+sparc_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		    const_tree type, bool named)
+{
+  return sparc_function_arg_1 (cum, mode, type, named, false);
+}
+
+/* Handle the TARGET_FUNCTION_INCOMING_ARG target hook.  */
+
+static rtx
+sparc_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     const_tree type, bool named)
+{
+  return sparc_function_arg_1 (cum, mode, type, named, true);
+}
+
+/* For sparc64, objects requiring 16 byte alignment are passed that way.  */
+
+static unsigned int
+sparc_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  return ((TARGET_ARCH64
+	   && (GET_MODE_ALIGNMENT (mode) == 128
+	       || (type && TYPE_ALIGN (type) == 128)))
+	  ? 128
+	  : PARM_BOUNDARY);
+}
+
+/* For an arg passed partly in registers and partly in memory,
+   this is the number of bytes of registers used.
+   For args passed entirely in registers or entirely in memory, zero.
+
+   Any arg that starts in the first 6 regs but won't entirely fit in them
+   needs partial registers on v8.  On v9, structures with integer
+   values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
+   values that begin in the last fp reg [where "last fp reg" varies with the
+   mode] will be split between that reg and memory.  */
+
+static int
+sparc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			 tree type, bool named)
+{
+  int slotno, regno, padding;
+
+  /* We pass false for incoming_p here, it doesn't matter.  */
+  slotno = function_arg_slotno (cum, mode, type, named, false,
+				&regno, &padding);
+
+  if (slotno == -1)
+    return 0;
+
+  if (TARGET_ARCH32)
+    {
+      if ((slotno + (mode == BLKmode
+		     ? ROUND_ADVANCE (int_size_in_bytes (type))
+		     : ROUND_ADVANCE (GET_MODE_SIZE (mode))))
+	  > SPARC_INT_ARG_MAX)
+	return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
+    }
+  else
+    {
+      /* We are guaranteed by pass_by_reference that the size of the
+	 argument is not greater than 16 bytes, so we only need to return
+	 one word if the argument is partially passed in registers.  */
+
+      if (type && AGGREGATE_TYPE_P (type))
+	{
+	  int size = int_size_in_bytes (type);
+
+	  if (size > UNITS_PER_WORD
+	      && slotno == SPARC_INT_ARG_MAX - 1)
+	    return UNITS_PER_WORD;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
+	       || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
+		   && ! (TARGET_FPU && named)))
+	{
+	  /* The complex types are passed as packed types.  */
+	  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+	      && slotno == SPARC_INT_ARG_MAX - 1)
+	    return UNITS_PER_WORD;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	{
+	  if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
+	      > SPARC_FP_ARG_MAX)
+	    return UNITS_PER_WORD;
+	}
+    }
+
+  return 0;
+}
+
+/* Handle the TARGET_PASS_BY_REFERENCE target hook.
+   Specify whether to pass the argument by reference.  */
+
+static bool
+sparc_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			 enum machine_mode mode, const_tree type,
+			 bool named ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ARCH32)
+    /* Original SPARC 32-bit ABI says that structures and unions,
+       and quad-precision floats are passed by reference.  For Pascal,
+       also pass arrays by reference.  All other base types are passed
+       in registers.
+
+       Extended ABI (as implemented by the Sun compiler) says that all
+       complex floats are passed by reference.  Pass complex integers
+       in registers up to 8 bytes.  More generally, enforce the 2-word
+       cap for passing arguments in registers.
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are passed like floats of the same size, that is in
+       registers up to 8 bytes.  Pass all vector floats by reference
+       like structure and unions.  */
+    return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
+	    || mode == SCmode
+	    /* Catch CDImode, TFmode, DCmode and TCmode.  */
+	    || GET_MODE_SIZE (mode) > 8
+	    || (type
+		&& TREE_CODE (type) == VECTOR_TYPE
+		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
+  else
+    /* Original SPARC 64-bit ABI says that structures and unions
+       smaller than 16 bytes are passed in registers, as well as
+       all other base types.
+
+       Extended ABI (as implemented by the Sun compiler) says that
+       complex floats are passed in registers up to 16 bytes.  Pass
+       all complex integers in registers up to 16 bytes.  More generally,
+       enforce the 2-word cap for passing arguments in registers.
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are passed like floats of the same size, that is in
+       registers (up to 16 bytes).  Pass all vector floats like structure
+       and unions.  */
+    return ((type
+	     && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
+	     && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
+	    /* Catch CTImode and TCmode.  */
+	    || GET_MODE_SIZE (mode) > 16);
+}
+
+/* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
+   Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   TYPE is null for libcalls where that information may not be available.  */
+
+static void
+sparc_function_arg_advance (struct sparc_args *cum, enum machine_mode mode,
+			    const_tree type, bool named)
+{
+  int regno, padding;
+
+  /* We pass false for incoming_p here, it doesn't matter.  */
+  function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
+
+  /* If argument requires leading padding, add it.  */
+  cum->words += padding;
+
+  if (TARGET_ARCH32)
+    {
+      cum->words += (mode != BLKmode
+		     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+		     : ROUND_ADVANCE (int_size_in_bytes (type)));
+    }
+  else
+    {
+      if (type && AGGREGATE_TYPE_P (type))
+	{
+	  int size = int_size_in_bytes (type);
+
+	  if (size <= 8)
+	    ++cum->words;
+	  else if (size <= 16)
+	    cum->words += 2;
+	  else /* passed by reference */
+	    ++cum->words;
+	}
+      else
+	{
+	  cum->words += (mode != BLKmode
+			 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+			 : ROUND_ADVANCE (int_size_in_bytes (type)));
+	}
+    }
+}
+
+/* Handle the FUNCTION_ARG_PADDING macro.
+   For the 64 bit ABI structs are always stored left shifted in their
+   argument slot.  */
+
+enum direction
+function_arg_padding (enum machine_mode mode, const_tree type)
+{
+  if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type))
+    return upward;
+
+  /* Fall back to the default.  */
+  return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
+}
+
+/* Handle the TARGET_RETURN_IN_MEMORY target hook.
+   Specify whether to return the return value in memory.  */
+
+static bool
+sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  if (TARGET_ARCH32)
+    /* Original SPARC 32-bit ABI says that structures and unions,
+       and quad-precision floats are returned in memory.  All other
+       base types are returned in registers.
+
+       Extended ABI (as implemented by the Sun compiler) says that
+       all complex floats are returned in registers (8 FP registers
+       at most for '_Complex long double').  Return all complex integers
+       in registers (4 at most for '_Complex long long').
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are returned like floats of the same size, that is in
+       registers up to 8 bytes and in memory otherwise.  Return all
+       vector floats in memory like structure and unions; note that
+       they always have BLKmode like the latter.  */
+    return (TYPE_MODE (type) == BLKmode
+	    || TYPE_MODE (type) == TFmode
+	    || (TREE_CODE (type) == VECTOR_TYPE
+		&& (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
+  else
+    /* Original SPARC 64-bit ABI says that structures and unions
+       smaller than 32 bytes are returned in registers, as well as
+       all other base types.
+
+       Extended ABI (as implemented by the Sun compiler) says that all
+       complex floats are returned in registers (8 FP registers at most
+       for '_Complex long double').  Return all complex integers in
+       registers (4 at most for '_Complex TItype').
+
+       Vector ABI (as implemented by the Sun VIS SDK) says that vector
+       integers are returned like floats of the same size, that is in
+       registers.  Return all vector floats like structure and unions;
+       note that they always have BLKmode like the latter.  */
+    return (TYPE_MODE (type) == BLKmode
+	    && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
+}
+
+/* Handle the TARGET_STRUCT_VALUE target hook.
+   Return where to find the structure return value address.  */
+
+static rtx
+sparc_struct_value_rtx (tree fndecl, int incoming)
+{
+  if (TARGET_ARCH64)
+    return 0;
+  else
+    {
+      rtx mem;
+
+      if (incoming)
+	mem = gen_frame_mem (Pmode, plus_constant (frame_pointer_rtx,
+						   STRUCT_VALUE_OFFSET));
+      else
+	mem = gen_frame_mem (Pmode, plus_constant (stack_pointer_rtx,
+						   STRUCT_VALUE_OFFSET));
+
+      /* Only follow the SPARC ABI for fixed-size structure returns.
+         Variable size structure returns are handled per the normal
+         procedures in GCC. This is enabled by -mstd-struct-return */
+      if (incoming == 2
+	  && sparc_std_struct_return
+	  && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
+	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
+	{
+	  /* We must check and adjust the return address, as it is
+	     optional as to whether the return object is really
+	     provided.  */
+	  rtx ret_rtx = gen_rtx_REG (Pmode, 31);
+	  rtx scratch = gen_reg_rtx (SImode);
+	  rtx endlab = gen_label_rtx ();
+
+	  /* Calculate the return object size */
+	  tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
+	  rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
+	  /* Construct a temporary return value */
+	  rtx temp_val
+	    = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
+
+	  /* Implement SPARC 32-bit psABI callee return struct checking:
+
+	     Fetch the instruction where we will return to and see if
+	     it's an unimp instruction (the most significant 10 bits
+	     will be zero).  */
+	  emit_move_insn (scratch, gen_rtx_MEM (SImode,
+						plus_constant (ret_rtx, 8)));
+	  /* Assume the size is valid and pre-adjust */
+	  emit_insn (gen_add3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
+	  emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
+				   0, endlab);
+	  emit_insn (gen_sub3_insn (ret_rtx, ret_rtx, GEN_INT (4)));
+	  /* Write the address of the memory pointed to by temp_val into
+	     the memory pointed to by mem */
+	  emit_move_insn (mem, XEXP (temp_val, 0));
+	  emit_label (endlab);
+	}
+
+      return mem;
+    }
+}
+
+/* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
+   For v9, function return values are subject to the same rules as arguments,
+   except that up to 32 bytes may be returned in registers.  */
+
+static rtx
+sparc_function_value_1 (const_tree type, enum machine_mode mode,
+			bool outgoing)
+{
+  /* Beware that the two values are swapped here wrt function_arg.  */
+  int regbase = (outgoing
+		 ? SPARC_INCOMING_INT_ARG_FIRST
+		 : SPARC_OUTGOING_INT_ARG_FIRST);
+  enum mode_class mclass = GET_MODE_CLASS (mode);
+  int regno;
+
+  /* Vector types deserve special treatment because they are polymorphic wrt
+     their mode, depending upon whether VIS instructions are enabled.  */
+  if (type && TREE_CODE (type) == VECTOR_TYPE)
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (type);
+      gcc_assert ((TARGET_ARCH32 && size <= 8)
+		  || (TARGET_ARCH64 && size <= 32));
+
+      if (mode == BLKmode)
+	return function_arg_vector_value (size,
+					  SPARC_FP_ARG_FIRST);
+      else
+	mclass = MODE_FLOAT;
+    }
+
+  if (TARGET_ARCH64 && type)
+    {
+      /* Structures up to 32 bytes in size are returned in registers.  */
+      if (TREE_CODE (type) == RECORD_TYPE)
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (type);
+	  gcc_assert (size <= 32);
+
+	  return function_arg_record_value (type, mode, 0, 1, regbase);
+	}
+
+      /* Unions up to 32 bytes in size are returned in integer registers.  */
+      else if (TREE_CODE (type) == UNION_TYPE)
+	{
+	  HOST_WIDE_INT size = int_size_in_bytes (type);
+	  gcc_assert (size <= 32);
+
+	  return function_arg_union_value (size, mode, 0, regbase);
+	}
+
+      /* Objects that require it are returned in FP registers.  */
+      else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
+	;
+
+      /* All other aggregate types are returned in an integer register in a
+	 mode corresponding to the size of the type.  */
+      else if (AGGREGATE_TYPE_P (type))
+	{
+	  /* All other aggregate types are passed in an integer register
+	     in a mode corresponding to the size of the type.  */
+	  HOST_WIDE_INT size = int_size_in_bytes (type);
+	  gcc_assert (size <= 32);
+
+	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
+
+	  /* ??? We probably should have made the same ABI change in
+	     3.4.0 as the one we made for unions.   The latter was
+	     required by the SCD though, while the former is not
+	     specified, so we favored compatibility and efficiency.
+
+	     Now we're stuck for aggregates larger than 16 bytes,
+	     because OImode vanished in the meantime.  Let's not
+	     try to be unduly clever, and simply follow the ABI
+	     for unions in that case.  */
+	  if (mode == BLKmode)
+	    return function_arg_union_value (size, mode, 0, regbase);
+	  else
+	    mclass = MODE_INT;
+	}
+
+      /* We should only have pointer and integer types at this point.  This
+	 must match sparc_promote_function_mode.  */
+      else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+	mode = word_mode;
+    }
+
+  /* We should only have pointer and integer types at this point.  This must
+     match sparc_promote_function_mode.  */
+  else if (TARGET_ARCH32
+	   && mclass == MODE_INT
+	   && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+    mode = word_mode;
+
+  if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
+    regno = SPARC_FP_ARG_FIRST;
+  else
+    regno = regbase;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Handle TARGET_FUNCTION_VALUE.
+   On the SPARC, the value is found in the first "output" register, but the
+   called function leaves it in the first "input" register.  */
+
+static rtx
+sparc_function_value (const_tree valtype,
+		      const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		      bool outgoing)
+{
+  return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
+}
+
+/* Handle TARGET_LIBCALL_VALUE.  */
+
+static rtx
+sparc_libcall_value (enum machine_mode mode,
+		     const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return sparc_function_value_1 (NULL_TREE, mode, false);
+}
+
+/* Handle FUNCTION_VALUE_REGNO_P.
+   On the SPARC, the first "output" reg is used for integer values, and the
+   first floating point register is used for floating point values.  */
+
+static bool
+sparc_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 8 || regno == 32);
+}
+
+/* Do what is necessary for `va_start'.  We look at the current function
+   to determine if stdarg or varargs is used and return the address of
+   the first unnamed parameter.  */
+
+static rtx
+sparc_builtin_saveregs (void)
+{
+  int first_reg = crtl->args.info.words;
+  rtx address;
+  int regno;
+
+  for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
+    emit_move_insn (gen_rtx_MEM (word_mode,
+				 gen_rtx_PLUS (Pmode,
+					       frame_pointer_rtx,
+					       GEN_INT (FIRST_PARM_OFFSET (0)
+							+ (UNITS_PER_WORD
+							   * regno)))),
+		    gen_rtx_REG (word_mode,
+				 SPARC_INCOMING_INT_ARG_FIRST + regno));
+
+  address = gen_rtx_PLUS (Pmode,
+			  frame_pointer_rtx,
+			  GEN_INT (FIRST_PARM_OFFSET (0)
+				   + UNITS_PER_WORD * first_reg));
+
+  return address;
+}
+
+/* Implement `va_start' for stdarg.  */
+
+static void
+sparc_va_start (tree valist, rtx nextarg)
+{
+  nextarg = expand_builtin_saveregs ();
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Implement `va_arg' for stdarg.  */
+
+static tree
+sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		       gimple_seq *post_p)
+{
+  HOST_WIDE_INT size, rsize, align;
+  tree addr, incr;
+  bool indirect;
+  tree ptrtype = build_pointer_type (type);
+
+  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
+    {
+      indirect = true;
+      size = rsize = UNITS_PER_WORD;
+      align = 0;
+    }
+  else
+    {
+      indirect = false;
+      size = int_size_in_bytes (type);
+      rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+      align = 0;
+
+      if (TARGET_ARCH64)
+	{
+	  /* For SPARC64, objects requiring 16-byte alignment get it.  */
+	  if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
+	    align = 2 * UNITS_PER_WORD;
+
+	  /* SPARC-V9 ABI states that structures up to 16 bytes in size
+	     are left-justified in their slots.  */
+	  if (AGGREGATE_TYPE_P (type))
+	    {
+	      if (size == 0)
+		size = rsize = UNITS_PER_WORD;
+	      else
+		size = rsize;
+	    }
+	}
+    }
+
+  incr = valist;
+  if (align)
+    {
+      incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
+			  size_int (align - 1));
+      incr = fold_convert (sizetype, incr);
+      incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
+			  size_int (-align));
+      incr = fold_convert (ptr_type_node, incr);
+    }
+
+  gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
+  addr = incr;
+
+  if (BYTES_BIG_ENDIAN && size < rsize)
+    addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
+			size_int (rsize - size));
+
+  if (indirect)
+    {
+      addr = fold_convert (build_pointer_type (ptrtype), addr);
+      addr = build_va_arg_indirect_ref (addr);
+    }
+
+  /* If the address isn't aligned properly for the type, we need a temporary.
+     FIXME: This is inefficient, usually we can do this in registers.  */
+  else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
+    {
+      tree tmp = create_tmp_var (type, "va_arg_tmp");
+      tree dest_addr = build_fold_addr_expr (tmp);
+      tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+				   3, dest_addr, addr, size_int (rsize));
+      TREE_ADDRESSABLE (tmp) = 1;
+      gimplify_and_add (copy, pre_p);
+      addr = dest_addr;
+    }
+
+  else
+    addr = fold_convert (ptrtype, addr);
+
+  incr
+    = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
+  gimplify_assign (valist, incr, post_p);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
+   Specify whether the vector mode is supported by the hardware.  */
+
+static bool
+sparc_vector_mode_supported_p (enum machine_mode mode)
+{
+  return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
+}
+
+/* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook.  */
+
+static enum machine_mode
+sparc_preferred_simd_mode (enum machine_mode mode)
+{
+  if (TARGET_VIS)
+    switch (mode)
+      {
+      case SImode:
+	return V2SImode;
+      case HImode:
+	return V4HImode;
+      case QImode:
+	return V8QImode;
+
+      default:;
+      }
+
+  return word_mode;
+}
+
+/* Return the string to output an unconditional branch to LABEL, which is
+   the operand number of the label.
+
+   DEST is the destination insn (i.e. the label), INSN is the source.  */
+
+const char *
+output_ubranch (rtx dest, int label, rtx insn)
+{
+  static char string[64];
+  bool v9_form = false;
+  char *p;
+
+  if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
+    {
+      int delta = (INSN_ADDRESSES (INSN_UID (dest))
+		   - INSN_ADDRESSES (INSN_UID (insn)));
+      /* Leave some instructions for "slop".  */
+      if (delta >= -260000 && delta < 260000)
+	v9_form = true;
+    }
+
+  if (v9_form)
+    strcpy (string, "ba%*,pt\t%%xcc, ");
+  else
+    strcpy (string, "b%*\t");
+
+  p = strchr (string, '\0');
+  *p++ = '%';
+  *p++ = 'l';
+  *p++ = '0' + label;
+  *p++ = '%';
+  *p++ = '(';
+  *p = '\0';
+
+  return string;
+}
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label.  OP is the conditional expression.
+   XEXP (OP, 0) is assumed to be a condition code register (integer or
+   floating point) and its mode specifies what kind of comparison we made.
+
+   DEST is the destination insn (i.e. the label), INSN is the source.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   ANNUL is nonzero if we should generate an annulling branch.  */
+
+const char *
+output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
+		rtx insn)
+{
+  static char string[64];
+  enum rtx_code code = GET_CODE (op);
+  rtx cc_reg = XEXP (op, 0);
+  enum machine_mode mode = GET_MODE (cc_reg);
+  const char *labelno, *branch;
+  int spaces = 8, far;
+  char *p;
+
+  /* v9 branches are limited to +-1MB.  If it is too far away,
+     change
+
+     bne,pt %xcc, .LC30
+
+     to
+
+     be,pn %xcc, .+12
+      nop
+     ba .LC30
+
+     and
+
+     fbne,a,pn %fcc2, .LC29
+
+     to
+
+     fbe,pt %fcc2, .+16
+      nop
+     ba .LC29  */
+
+  far = TARGET_V9 && (get_attr_length (insn) >= 3);
+  if (reversed ^ far)
+    {
+      /* Reversal of FP compares takes care -- an ordered compare
+	 becomes an unordered compare and vice versa.  */
+      if (mode == CCFPmode || mode == CCFPEmode)
+	code = reverse_condition_maybe_unordered (code);
+      else
+	code = reverse_condition (code);
+    }
+
+  /* Start by writing the branch condition.  */
+  if (mode == CCFPmode || mode == CCFPEmode)
+    {
+      switch (code)
+	{
+	case NE:
+	  branch = "fbne";
+	  break;
+	case EQ:
+	  branch = "fbe";
+	  break;
+	case GE:
+	  branch = "fbge";
+	  break;
+	case GT:
+	  branch = "fbg";
+	  break;
+	case LE:
+	  branch = "fble";
+	  break;
+	case LT:
+	  branch = "fbl";
+	  break;
+	case UNORDERED:
+	  branch = "fbu";
+	  break;
+	case ORDERED:
+	  branch = "fbo";
+	  break;
+	case UNGT:
+	  branch = "fbug";
+	  break;
+	case UNLT:
+	  branch = "fbul";
+	  break;
+	case UNEQ:
+	  branch = "fbue";
+	  break;
+	case UNGE:
+	  branch = "fbuge";
+	  break;
+	case UNLE:
+	  branch = "fbule";
+	  break;
+	case LTGT:
+	  branch = "fblg";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* ??? !v9: FP branches cannot be preceded by another floating point
+	 insn.  Because there is currently no concept of pre-delay slots,
+	 we can fix this only by always emitting a nop before a floating
+	 point branch.  */
+
+      string[0] = '\0';
+      if (! TARGET_V9)
+	strcpy (string, "nop\n\t");
+      strcat (string, branch);
+    }
+  else
+    {
+      switch (code)
+	{
+	case NE:
+	  branch = "bne";
+	  break;
+	case EQ:
+	  branch = "be";
+	  break;
+	case GE:
+	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	    branch = "bpos";
+	  else
+	    branch = "bge";
+	  break;
+	case GT:
+	  branch = "bg";
+	  break;
+	case LE:
+	  branch = "ble";
+	  break;
+	case LT:
+	  if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	    branch = "bneg";
+	  else
+	    branch = "bl";
+	  break;
+	case GEU:
+	  branch = "bgeu";
+	  break;
+	case GTU:
+	  branch = "bgu";
+	  break;
+	case LEU:
+	  branch = "bleu";
+	  break;
+	case LTU:
+	  branch = "blu";
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      strcpy (string, branch);
+    }
+  spaces -= strlen (branch);
+  p = strchr (string, '\0');
+
+  /* Now add the annulling, the label, and a possible noop.  */
+  if (annul && ! far)
+    {
+      strcpy (p, ",a");
+      p += 2;
+      spaces -= 2;
+    }
+
+  if (TARGET_V9)
+    {
+      rtx note;
+      int v8 = 0;
+
+      if (! far && insn && INSN_ADDRESSES_SET_P ())
+	{
+	  int delta = (INSN_ADDRESSES (INSN_UID (dest))
+		       - INSN_ADDRESSES (INSN_UID (insn)));
+	  /* Leave some instructions for "slop".  */
+	  if (delta < -260000 || delta >= 260000)
+	    v8 = 1;
+	}
+
+      if (mode == CCFPmode || mode == CCFPEmode)
+	{
+	  static char v9_fcc_labelno[] = "%%fccX, ";
+	  /* Set the char indicating the number of the fcc reg to use.  */
+	  v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
+	  labelno = v9_fcc_labelno;
+	  if (v8)
+	    {
+	      gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
+	      labelno = "";
+	    }
+	}
+      else if (mode == CCXmode || mode == CCX_NOOVmode)
+	{
+	  labelno = "%%xcc, ";
+	  gcc_assert (! v8);
+	}
+      else
+	{
+	  labelno = "%%icc, ";
+	  if (v8)
+	    labelno = "";
+	}
+
+      if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
+	{
+	  strcpy (p,
+		  ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
+		  ? ",pt" : ",pn");
+	  p += 3;
+	  spaces -= 3;
+	}
+    }
+  else
+    labelno = "";
+
+  if (spaces > 0)
+    *p++ = '\t';
+  else
+    *p++ = ' ';
+  strcpy (p, labelno);
+  p = strchr (p, '\0');
+  if (far)
+    {
+      strcpy (p, ".+12\n\t nop\n\tb\t");
+      /* Skip the next insn if requested or
+	 if we know that it will be a nop.  */
+      if (annul || ! final_sequence)
+        p[3] = '6';
+      p += 14;
+    }
+  *p++ = '%';
+  *p++ = 'l';
+  *p++ = label + '0';
+  *p++ = '%';
+  *p++ = '#';
+  *p = '\0';
+
+  return string;
+}
+
+/* Emit a library call comparison between floating point X and Y.
+   COMPARISON is the operator to compare with (EQ, NE, GT, etc).
+   Return the new operator to be used in the comparison sequence.
+
+   TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
+   values as arguments instead of the TFmode registers themselves,
+   that's why we cannot call emit_float_lib_cmp.  */
+
+rtx
+sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
+{
+  const char *qpfunc;
+  rtx slot0, slot1, result, tem, tem2, libfunc;
+  enum machine_mode mode;
+  enum rtx_code new_comparison;
+
+  switch (comparison)
+    {
+    case EQ:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
+      break;
+
+    case NE:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
+      break;
+
+    case GT:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
+      break;
+
+    case GE:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
+      break;
+
+    case LT:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
+      break;
+
+    case LE:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
+      break;
+
+    case ORDERED:
+    case UNORDERED:
+    case UNGT:
+    case UNLT:
+    case UNEQ:
+    case UNGE:
+    case UNLE:
+    case LTGT:
+      qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (TARGET_ARCH64)
+    {
+      if (MEM_P (x))
+	slot0 = x;
+      else
+	{
+	  slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
+	  emit_move_insn (slot0, x);
+	}
+
+      if (MEM_P (y))
+	slot1 = y;
+      else
+	{
+	  slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
+	  emit_move_insn (slot1, y);
+	}
+
+      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+      emit_library_call (libfunc, LCT_NORMAL,
+			 DImode, 2,
+			 XEXP (slot0, 0), Pmode,
+			 XEXP (slot1, 0), Pmode);
+      mode = DImode;
+    }
+  else
+    {
+      libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
+      emit_library_call (libfunc, LCT_NORMAL,
+			 SImode, 2,
+			 x, TFmode, y, TFmode);
+      mode = SImode;
+    }
+
+
+  /* Immediately move the result of the libcall into a pseudo
+     register so reload doesn't clobber the value if it needs
+     the return register for a spill reg.  */
+  result = gen_reg_rtx (mode);
+  emit_move_insn (result, hard_libcall_value (mode, libfunc));
+
+  switch (comparison)
+    {
+    default:
+      return gen_rtx_NE (VOIDmode, result, const0_rtx);
+    case ORDERED:
+    case UNORDERED:
+      new_comparison = (comparison == UNORDERED ? EQ : NE);
+      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
+    case UNGT:
+    case UNGE:
+      new_comparison = (comparison == UNGT ? GT : NE);
+      return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
+    case UNLE:
+      return gen_rtx_NE (VOIDmode, result, const2_rtx);
+    case UNLT:
+      tem = gen_reg_rtx (mode);
+      if (TARGET_ARCH32)
+	emit_insn (gen_andsi3 (tem, result, const1_rtx));
+      else
+	emit_insn (gen_anddi3 (tem, result, const1_rtx));
+      return gen_rtx_NE (VOIDmode, tem, const0_rtx);
+    case UNEQ:
+    case LTGT:
+      tem = gen_reg_rtx (mode);
+      if (TARGET_ARCH32)
+	emit_insn (gen_addsi3 (tem, result, const1_rtx));
+      else
+	emit_insn (gen_adddi3 (tem, result, const1_rtx));
+      tem2 = gen_reg_rtx (mode);
+      if (TARGET_ARCH32)
+	emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
+      else
+	emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
+      new_comparison = (comparison == UNEQ ? EQ : NE);
+      return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Generate an unsigned DImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.  */
+
+void
+sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode)
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+
+  out = operands[0];
+  in = force_reg (DImode, operands[1]);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
+  emit_insn (gen_anddi3 (i1, in, const1_rtx));
+  emit_insn (gen_iordi3 (i0, i0, i1));
+  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
+  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* Generate an FP to unsigned DImode conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.  */
+
+void
+sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
+{
+  rtx neglab, donelab, i0, i1, f0, in, out, limit;
+
+  out = operands[0];
+  in = force_reg (mode, operands[1]);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  limit = gen_reg_rtx (mode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_move_insn (limit,
+		  CONST_DOUBLE_FROM_REAL_VALUE (
+		    REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
+  emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  out,
+			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit)));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  i0,
+			  gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
+  emit_insn (gen_movdi (i1, const1_rtx));
+  emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
+  emit_insn (gen_xordi3 (out, i0, i1));
+
+  emit_label (donelab);
+}
+
+/* Return the string to output a conditional branch to LABEL, testing
+   register REG.  LABEL is the operand number of the label; REG is the
+   operand number of the reg.  OP is the conditional expression.  The mode
+   of REG says what kind of comparison we made.
+
+   DEST is the destination insn (i.e. the label), INSN is the source.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   ANNUL is nonzero if we should generate an annulling branch.  */
+
+const char *
+output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
+		 int annul, rtx insn)
+{
+  static char string[64];
+  enum rtx_code code = GET_CODE (op);
+  enum machine_mode mode = GET_MODE (XEXP (op, 0));
+  rtx note;
+  int far;
+  char *p;
+
+  /* branch on register are limited to +-128KB.  If it is too far away,
+     change
+
+     brnz,pt %g1, .LC30
+
+     to
+
+     brz,pn %g1, .+12
+      nop
+     ba,pt %xcc, .LC30
+
+     and
+
+     brgez,a,pn %o1, .LC29
+
+     to
+
+     brlz,pt %o1, .+16
+      nop
+     ba,pt %xcc, .LC29  */
+
+  far = get_attr_length (insn) >= 3;
+
+  /* If not floating-point or if EQ or NE, we can just reverse the code.  */
+  if (reversed ^ far)
+    code = reverse_condition (code);
+
+  /* Only 64 bit versions of these instructions exist.  */
+  gcc_assert (mode == DImode);
+
+  /* Start by writing the branch condition.  */
+
+  switch (code)
+    {
+    case NE:
+      strcpy (string, "brnz");
+      break;
+
+    case EQ:
+      strcpy (string, "brz");
+      break;
+
+    case GE:
+      strcpy (string, "brgez");
+      break;
+
+    case LT:
+      strcpy (string, "brlz");
+      break;
+
+    case LE:
+      strcpy (string, "brlez");
+      break;
+
+    case GT:
+      strcpy (string, "brgz");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  p = strchr (string, '\0');
+
+  /* Now add the annulling, reg, label, and nop.  */
+  if (annul && ! far)
+    {
+      strcpy (p, ",a");
+      p += 2;
+    }
+
+  if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
+    {
+      strcpy (p,
+	      ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
+	      ? ",pt" : ",pn");
+      p += 3;
+    }
+
+  *p = p < string + 8 ? '\t' : ' ';
+  p++;
+  *p++ = '%';
+  *p++ = '0' + reg;
+  *p++ = ',';
+  *p++ = ' ';
+  if (far)
+    {
+      int veryfar = 1, delta;
+
+      if (INSN_ADDRESSES_SET_P ())
+	{
+	  delta = (INSN_ADDRESSES (INSN_UID (dest))
+		   - INSN_ADDRESSES (INSN_UID (insn)));
+	  /* Leave some instructions for "slop".  */
+	  if (delta >= -260000 && delta < 260000)
+	    veryfar = 0;
+	}
+
+      strcpy (p, ".+12\n\t nop\n\t");
+      /* Skip the next insn if requested or
+	 if we know that it will be a nop.  */
+      if (annul || ! final_sequence)
+        p[3] = '6';
+      p += 12;
+      if (veryfar)
+	{
+	  strcpy (p, "b\t");
+	  p += 2;
+	}
+      else
+	{
+	  strcpy (p, "ba,pt\t%%xcc, ");
+	  p += 13;
+	}
+    }
+  *p++ = '%';
+  *p++ = 'l';
+  *p++ = '0' + label;
+  *p++ = '%';
+  *p++ = '#';
+  *p = '\0';
+
+  return string;
+}
+
+/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
+   Such instructions cannot be used in the delay slot of return insn on v9.
+   If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
+ */
+
+static int
+epilogue_renumber (register rtx *where, int test)
+{
+  register const char *fmt;
+  register int i;
+  register enum rtx_code code;
+
+  if (*where == 0)
+    return 0;
+
+  code = GET_CODE (*where);
+
+  switch (code)
+    {
+    case REG:
+      if (REGNO (*where) >= 8 && REGNO (*where) < 24)      /* oX or lX */
+	return 1;
+      if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
+	*where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
+    case SCRATCH:
+    case CC0:
+    case PC:
+    case CONST_INT:
+    case CONST_DOUBLE:
+      return 0;
+
+      /* Do not replace the frame pointer with the stack pointer because
+	 it can cause the delayed instruction to load below the stack.
+	 This occurs when instructions like:
+
+	 (set (reg/i:SI 24 %i0)
+	     (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
+                       (const_int -20 [0xffffffec])) 0))
+
+	 are in the return delayed slot.  */
+    case PLUS:
+      if (GET_CODE (XEXP (*where, 0)) == REG
+	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
+	  && (GET_CODE (XEXP (*where, 1)) != CONST_INT
+	      || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
+	return 1;
+      break;
+
+    case MEM:
+      if (SPARC_STACK_BIAS
+	  && GET_CODE (XEXP (*where, 0)) == REG
+	  && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
+	return 1;
+      break;
+
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
+	    if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
+	      return 1;
+	}
+      else if (fmt[i] == 'e'
+	       && epilogue_renumber (&(XEXP (*where, i)), test))
+	return 1;
+    }
+  return 0;
+}
+
+/* Leaf functions and non-leaf functions have different needs.  */
+
+static const int
+reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
+
+static const int
+reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
+
+static const int *const reg_alloc_orders[] = {
+  reg_leaf_alloc_order,
+  reg_nonleaf_alloc_order};
+
+void
+order_regs_for_local_alloc (void)
+{
+  static int last_order_nonleaf = 1;
+
+  if (df_regs_ever_live_p (15) != last_order_nonleaf)
+    {
+      last_order_nonleaf = !last_order_nonleaf;
+      memcpy ((char *) reg_alloc_order,
+	      (const char *) reg_alloc_orders[last_order_nonleaf],
+	      FIRST_PSEUDO_REGISTER * sizeof (int));
+    }
+}
+
+/* Return 1 if REG and MEM are legitimate enough to allow the various
+   mem<-->reg splits to be run.  */
+
+int
+sparc_splitdi_legitimate (rtx reg, rtx mem)
+{
+  /* Punt if we are here by mistake.  */
+  gcc_assert (reload_completed);
+
+  /* We must have an offsettable memory reference.  */
+  if (! offsettable_memref_p (mem))
+    return 0;
+
+  /* If we have legitimate args for ldd/std, we do not want
+     the split to happen.  */
+  if ((REGNO (reg) % 2) == 0
+      && mem_min_alignment (mem, 8))
+    return 0;
+
+  /* Success.  */
+  return 1;
+}
+
+/* Return 1 if x and y are some kind of REG and they refer to
+   different hard registers.  This test is guaranteed to be
+   run after reload.  */
+
+int
+sparc_absnegfloat_split_legitimate (rtx x, rtx y)
+{
+  if (GET_CODE (x) != REG)
+    return 0;
+  if (GET_CODE (y) != REG)
+    return 0;
+  if (REGNO (x) == REGNO (y))
+    return 0;
+  return 1;
+}
+
+/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
+   This makes them candidates for using ldd and std insns.
+
+   Note reg1 and reg2 *must* be hard registers.  */
+
+int
+registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
+{
+  /* We might have been passed a SUBREG.  */
+  if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
+    return 0;
+
+  if (REGNO (reg1) % 2 != 0)
+    return 0;
+
+  /* Integer ldd is deprecated in SPARC V9 */
+  if (TARGET_V9 && REGNO (reg1) < 32)
+    return 0;
+
+  return (REGNO (reg1) == REGNO (reg2) - 1);
+}
+
+/* Return 1 if the addresses in mem1 and mem2 are suitable for use in
+   an ldd or std insn.
+
+   This can only happen when addr1 and addr2, the addresses in mem1
+   and mem2, are consecutive memory locations (addr1 + 4 == addr2).
+   addr1 must also be aligned on a 64-bit boundary.
+
+   Also iff dependent_reg_rtx is not null it should not be used to
+   compute the address for mem1, i.e. we cannot optimize a sequence
+   like:
+   	ld [%o0], %o0
+	ld [%o0 + 4], %o1
+   to
+   	ldd [%o0], %o0
+   nor:
+	ld [%g3 + 4], %g3
+	ld [%g3], %g2
+   to
+        ldd [%g3], %g2
+
+   But, note that the transformation from:
+	ld [%g2 + 4], %g3
+        ld [%g2], %g2
+   to
+	ldd [%g2], %g2
+   is perfectly fine.  Thus, the peephole2 patterns always pass us
+   the destination register of the first load, never the second one.
+
+   For stores we don't have a similar problem, so dependent_reg_rtx is
+   NULL_RTX.  */
+
+int
+mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
+{
+  rtx addr1, addr2;
+  unsigned int reg1;
+  HOST_WIDE_INT offset1;
+
+  /* The mems cannot be volatile.  */
+  if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
+    return 0;
+
+  /* MEM1 should be aligned on a 64-bit boundary.  */
+  if (MEM_ALIGN (mem1) < 64)
+    return 0;
+
+  addr1 = XEXP (mem1, 0);
+  addr2 = XEXP (mem2, 0);
+
+  /* Extract a register number and offset (if used) from the first addr.  */
+  if (GET_CODE (addr1) == PLUS)
+    {
+      /* If not a REG, return zero.  */
+      if (GET_CODE (XEXP (addr1, 0)) != REG)
+	return 0;
+      else
+	{
+          reg1 = REGNO (XEXP (addr1, 0));
+	  /* The offset must be constant!  */
+	  if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
+            return 0;
+          offset1 = INTVAL (XEXP (addr1, 1));
+	}
+    }
+  else if (GET_CODE (addr1) != REG)
+    return 0;
+  else
+    {
+      reg1 = REGNO (addr1);
+      /* This was a simple (mem (reg)) expression.  Offset is 0.  */
+      offset1 = 0;
+    }
+
+  /* Make sure the second address is a (mem (plus (reg) (const_int).  */
+  if (GET_CODE (addr2) != PLUS)
+    return 0;
+
+  if (GET_CODE (XEXP (addr2, 0)) != REG
+      || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
+    return 0;
+
+  if (reg1 != REGNO (XEXP (addr2, 0)))
+    return 0;
+
+  if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
+    return 0;
+
+  /* The first offset must be evenly divisible by 8 to ensure the
+     address is 64 bit aligned.  */
+  if (offset1 % 8 != 0)
+    return 0;
+
+  /* The offset for the second addr must be 4 more than the first addr.  */
+  if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
+    return 0;
+
+  /* All the tests passed.  addr1 and addr2 are valid for ldd and std
+     instructions.  */
+  return 1;
+}
+
+/* Return 1 if reg is a pseudo, or is the first register in
+   a hard register pair.  This makes it suitable for use in
+   ldd and std insns.  */
+
+int
+register_ok_for_ldd (rtx reg)
+{
+  /* We might have been passed a SUBREG.  */
+  if (!REG_P (reg))
+    return 0;
+
+  if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
+    return (REGNO (reg) % 2 == 0);
+
+  return 1;
+}
+
+/* Return 1 if OP is a memory whose address is known to be
+   aligned to 8-byte boundary, or a pseudo during reload.
+   This makes it suitable for use in ldd and std insns.  */
+
+int
+memory_ok_for_ldd (rtx op)
+{
+  if (MEM_P (op))
+    {
+      /* In 64-bit mode, we assume that the address is word-aligned.  */
+      if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
+	return 0;
+
+      if ((reload_in_progress || reload_completed)
+	  && !strict_memory_address_p (Pmode, XEXP (op, 0)))
+	return 0;
+    }
+  else if (REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER)
+    {
+      if (!(reload_in_progress && reg_renumber [REGNO (op)] < 0))
+	return 0;
+    }
+  else
+    return 0;
+
+  return 1;
+}
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case '#':
+      /* Output an insn in a delay slot.  */
+      if (final_sequence)
+        sparc_indent_opcode = 1;
+      else
+	fputs ("\n\t nop", file);
+      return;
+    case '*':
+      /* Output an annul flag if there's nothing for the delay slot and we
+	 are optimizing.  This is always used with '(' below.
+         Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
+	 this is a dbx bug.  So, we only do this when optimizing.
+         On UltraSPARC, a branch in a delay slot causes a pipeline flush.
+	 Always emit a nop in case the next instruction is a branch.  */
+      if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
+	fputs (",a", file);
+      return;
+    case '(':
+      /* Output a 'nop' if there's nothing for the delay slot and we are
+	 not optimizing.  This is always used with '*' above.  */
+      if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
+	fputs ("\n\t nop", file);
+      else if (final_sequence)
+        sparc_indent_opcode = 1;
+      return;
+    case ')':
+      /* Output the right displacement from the saved PC on function return.
+	 The caller may have placed an "unimp" insn immediately after the call
+	 so we have to account for it.  This insn is used in the 32-bit ABI
+	 when calling a function that returns a non zero-sized structure.  The
+	 64-bit ABI doesn't have it.  Be careful to have this test be the same
+	 as that for the call.  The exception is when sparc_std_struct_return
+	 is enabled, the psABI is followed exactly and the adjustment is made
+	 by the code in sparc_struct_value_rtx.  The call emitted is the same
+	 when sparc_std_struct_return is enabled. */
+     if (!TARGET_ARCH64
+	 && cfun->returns_struct
+	 && !sparc_std_struct_return
+	 && DECL_SIZE (DECL_RESULT (current_function_decl))
+	 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
+	     == INTEGER_CST
+	 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
+	fputs ("12", file);
+      else
+        fputc ('8', file);
+      return;
+    case '_':
+      /* Output the Embedded Medium/Anywhere code model base register.  */
+      fputs (EMBMEDANY_BASE_REG, file);
+      return;
+    case '&':
+      /* Print some local dynamic TLS name.  */
+      assemble_name (file, get_some_local_dynamic_name ());
+      return;
+
+    case 'Y':
+      /* Adjust the operand to take into account a RESTORE operation.  */
+      if (GET_CODE (x) == CONST_INT)
+	break;
+      else if (GET_CODE (x) != REG)
+	output_operand_lossage ("invalid %%Y operand");
+      else if (REGNO (x) < 8)
+	fputs (reg_names[REGNO (x)], file);
+      else if (REGNO (x) >= 24 && REGNO (x) < 32)
+	fputs (reg_names[REGNO (x)-16], file);
+      else
+	output_operand_lossage ("invalid %%Y operand");
+      return;
+    case 'L':
+      /* Print out the low order register name of a register pair.  */
+      if (WORDS_BIG_ENDIAN)
+	fputs (reg_names[REGNO (x)+1], file);
+      else
+	fputs (reg_names[REGNO (x)], file);
+      return;
+    case 'H':
+      /* Print out the high order register name of a register pair.  */
+      if (WORDS_BIG_ENDIAN)
+	fputs (reg_names[REGNO (x)], file);
+      else
+	fputs (reg_names[REGNO (x)+1], file);
+      return;
+    case 'R':
+      /* Print out the second register name of a register pair or quad.
+	 I.e., R (%o0) => %o1.  */
+      fputs (reg_names[REGNO (x)+1], file);
+      return;
+    case 'S':
+      /* Print out the third register name of a register quad.
+	 I.e., S (%o0) => %o2.  */
+      fputs (reg_names[REGNO (x)+2], file);
+      return;
+    case 'T':
+      /* Print out the fourth register name of a register quad.
+	 I.e., T (%o0) => %o3.  */
+      fputs (reg_names[REGNO (x)+3], file);
+      return;
+    case 'x':
+      /* Print a condition code register.  */
+      if (REGNO (x) == SPARC_ICC_REG)
+	{
+	  /* We don't handle CC[X]_NOOVmode because they're not supposed
+	     to occur here.  */
+	  if (GET_MODE (x) == CCmode)
+	    fputs ("%icc", file);
+	  else if (GET_MODE (x) == CCXmode)
+	    fputs ("%xcc", file);
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	/* %fccN register */
+	fputs (reg_names[REGNO (x)], file);
+      return;
+    case 'm':
+      /* Print the operand's address only.  */
+      output_address (XEXP (x, 0));
+      return;
+    case 'r':
+      /* In this case we need a register.  Use %g0 if the
+	 operand is const0_rtx.  */
+      if (x == const0_rtx
+	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
+	{
+	  fputs ("%g0", file);
+	  return;
+	}
+      else
+	break;
+
+    case 'A':
+      switch (GET_CODE (x))
+	{
+	case IOR: fputs ("or", file); break;
+	case AND: fputs ("and", file); break;
+	case XOR: fputs ("xor", file); break;
+	default: output_operand_lossage ("invalid %%A operand");
+	}
+      return;
+
+    case 'B':
+      switch (GET_CODE (x))
+	{
+	case IOR: fputs ("orn", file); break;
+	case AND: fputs ("andn", file); break;
+	case XOR: fputs ("xnor", file); break;
+	default: output_operand_lossage ("invalid %%B operand");
+	}
+      return;
+
+      /* These are used by the conditional move instructions.  */
+    case 'c' :
+    case 'C':
+      {
+	enum rtx_code rc = GET_CODE (x);
+	
+	if (code == 'c')
+	  {
+	    enum machine_mode mode = GET_MODE (XEXP (x, 0));
+	    if (mode == CCFPmode || mode == CCFPEmode)
+	      rc = reverse_condition_maybe_unordered (GET_CODE (x));
+	    else
+	      rc = reverse_condition (GET_CODE (x));
+	  }
+	switch (rc)
+	  {
+	  case NE: fputs ("ne", file); break;
+	  case EQ: fputs ("e", file); break;
+	  case GE: fputs ("ge", file); break;
+	  case GT: fputs ("g", file); break;
+	  case LE: fputs ("le", file); break;
+	  case LT: fputs ("l", file); break;
+	  case GEU: fputs ("geu", file); break;
+	  case GTU: fputs ("gu", file); break;
+	  case LEU: fputs ("leu", file); break;
+	  case LTU: fputs ("lu", file); break;
+	  case LTGT: fputs ("lg", file); break;
+	  case UNORDERED: fputs ("u", file); break;
+	  case ORDERED: fputs ("o", file); break;
+	  case UNLT: fputs ("ul", file); break;
+	  case UNLE: fputs ("ule", file); break;
+	  case UNGT: fputs ("ug", file); break;
+	  case UNGE: fputs ("uge", file); break;
+	  case UNEQ: fputs ("ue", file); break;
+	  default: output_operand_lossage (code == 'c'
+					   ? "invalid %%c operand"
+					   : "invalid %%C operand");
+	  }
+	return;
+      }
+
+      /* These are used by the movr instruction pattern.  */
+    case 'd':
+    case 'D':
+      {
+	enum rtx_code rc = (code == 'd'
+			    ? reverse_condition (GET_CODE (x))
+			    : GET_CODE (x));
+	switch (rc)
+	  {
+	  case NE: fputs ("ne", file); break;
+	  case EQ: fputs ("e", file); break;
+	  case GE: fputs ("gez", file); break;
+	  case LT: fputs ("lz", file); break;
+	  case LE: fputs ("lez", file); break;
+	  case GT: fputs ("gz", file); break;
+	  default: output_operand_lossage (code == 'd'
+					   ? "invalid %%d operand"
+					   : "invalid %%D operand");
+	  }
+	return;
+      }
+
+    case 'b':
+      {
+	/* Print a sign-extended character.  */
+	int i = trunc_int_for_mode (INTVAL (x), QImode);
+	fprintf (file, "%d", i);
+	return;
+      }
+
+    case 'f':
+      /* Operand must be a MEM; write its address.  */
+      if (GET_CODE (x) != MEM)
+	output_operand_lossage ("invalid %%f operand");
+      output_address (XEXP (x, 0));
+      return;
+
+    case 's':
+      {
+	/* Print a sign-extended 32-bit value.  */
+	HOST_WIDE_INT i;
+	if (GET_CODE(x) == CONST_INT)
+	  i = INTVAL (x);
+	else if (GET_CODE(x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else
+	  {
+	    output_operand_lossage ("invalid %%s operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i, SImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
+    case 0:
+      /* Do nothing special.  */
+      break;
+
+    default:
+      /* Undocumented flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+
+  if (GET_CODE (x) == REG)
+    fputs (reg_names[REGNO (x)], file);
+  else if (GET_CODE (x) == MEM)
+    {
+      fputc ('[', file);
+	/* Poor Sun assembler doesn't understand absolute addressing.  */
+      if (CONSTANT_P (XEXP (x, 0)))
+	fputs ("%g0+", file);
+      output_address (XEXP (x, 0));
+      fputc (']', file);
+    }
+  else if (GET_CODE (x) == HIGH)
+    {
+      fputs ("%hi(", file);
+      output_addr_const (file, XEXP (x, 0));
+      fputc (')', file);
+    }
+  else if (GET_CODE (x) == LO_SUM)
+    {
+      print_operand (file, XEXP (x, 0), 0);
+      if (TARGET_CM_MEDMID)
+	fputs ("+%l44(", file);
+      else
+	fputs ("+%lo(", file);
+      output_addr_const (file, XEXP (x, 1));
+      fputc (')', file);
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE
+	   && (GET_MODE (x) == VOIDmode
+	       || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
+    {
+      if (CONST_DOUBLE_HIGH (x) == 0)
+	fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
+      else if (CONST_DOUBLE_HIGH (x) == -1
+	       && CONST_DOUBLE_LOW (x) < 0)
+	fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
+      else
+	output_operand_lossage ("long long constant not a valid immediate operand");
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    output_operand_lossage ("floating point constant not a valid immediate operand");
+  else { output_addr_const (file, x); }
+}
+
+/* Target hook for assembling integer objects.  The sparc version has
+   special handling for aligned DI-mode objects.  */
+
+static bool
+sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  /* ??? We only output .xword's for symbols and only then in environments
+     where the assembler can handle them.  */
+  if (aligned_p && size == 8
+      && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE))
+    {
+      if (TARGET_V9)
+	{
+	  assemble_integer_with_op ("\t.xword\t", x);
+	  return true;
+	}
+      else
+	{
+	  assemble_aligned_integer (4, const0_rtx);
+	  assemble_aligned_integer (4, x);
+	  return true;
+	}
+    }
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+/* Return the value of a code used in the .proc pseudo-op that says
+   what kind of result this function returns.  For non-C types, we pick
+   the closest C type.  */
+
+#ifndef SHORT_TYPE_SIZE
+#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
+#endif
+
+#ifndef INT_TYPE_SIZE
+#define INT_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef LONG_LONG_TYPE_SIZE
+#define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
+#endif
+
+#ifndef FLOAT_TYPE_SIZE
+#define FLOAT_TYPE_SIZE BITS_PER_WORD
+#endif
+
+#ifndef DOUBLE_TYPE_SIZE
+#define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
+#endif
+
+#ifndef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
+#endif
+
+unsigned long
+sparc_type_code (register tree type)
+{
+  register unsigned long qualifiers = 0;
+  register unsigned shift;
+
+  /* Only the first 30 bits of the qualifier are valid.  We must refrain from
+     setting more, since some assemblers will give an error for this.  Also,
+     we must be careful to avoid shifts of 32 bits or more to avoid getting
+     unpredictable results.  */
+
+  for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
+    {
+      switch (TREE_CODE (type))
+	{
+	case ERROR_MARK:
+	  return qualifiers;
+
+	case ARRAY_TYPE:
+	  qualifiers |= (3 << shift);
+	  break;
+
+	case FUNCTION_TYPE:
+	case METHOD_TYPE:
+	  qualifiers |= (2 << shift);
+	  break;
+
+	case POINTER_TYPE:
+	case REFERENCE_TYPE:
+	case OFFSET_TYPE:
+	  qualifiers |= (1 << shift);
+	  break;
+
+	case RECORD_TYPE:
+	  return (qualifiers | 8);
+
+	case UNION_TYPE:
+	case QUAL_UNION_TYPE:
+	  return (qualifiers | 9);
+
+	case ENUMERAL_TYPE:
+	  return (qualifiers | 10);
+
+	case VOID_TYPE:
+	  return (qualifiers | 16);
+
+	case INTEGER_TYPE:
+	  /* If this is a range type, consider it to be the underlying
+	     type.  */
+	  if (TREE_TYPE (type) != 0)
+	    break;
+
+	  /* Carefully distinguish all the standard types of C,
+	     without messing up if the language is not C.  We do this by
+	     testing TYPE_PRECISION and TYPE_UNSIGNED.  The old code used to
+	     look at both the names and the above fields, but that's redundant.
+	     Any type whose size is between two C types will be considered
+	     to be the wider of the two types.  Also, we do not have a
+	     special code to use for "long long", so anything wider than
+	     long is treated the same.  Note that we can't distinguish
+	     between "int" and "long" in this code if they are the same
+	     size, but that's fine, since neither can the assembler.  */
+
+	  if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
+
+	  else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
+
+	  else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
+
+	  else
+	    return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
+
+	case REAL_TYPE:
+	  /* If this is a range type, consider it to be the underlying
+	     type.  */
+	  if (TREE_TYPE (type) != 0)
+	    break;
+
+	  /* Carefully distinguish all the standard types of C,
+	     without messing up if the language is not C.  */
+
+	  if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
+	    return (qualifiers | 6);
+
+	  else
+	    return (qualifiers | 7);
+
+	case COMPLEX_TYPE:	/* GNU Fortran COMPLEX type.  */
+	  /* ??? We need to distinguish between double and float complex types,
+	     but I don't know how yet because I can't reach this code from
+	     existing front-ends.  */
+	  return (qualifiers | 7);	/* Who knows? */
+
+	case VECTOR_TYPE:
+	case BOOLEAN_TYPE:	/* Boolean truth value type.  */
+	case LANG_TYPE:
+	case NULLPTR_TYPE:
+	  return qualifiers;
+
+	default:
+	  gcc_unreachable ();		/* Not a type! */
+        }
+    }
+
+  return qualifiers;
+}
+
+/* Nested function support.  */
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.
+
+   This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
+   (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
+   (to store insns).  This is a bit excessive.  Perhaps a different
+   mechanism would be better here.
+
+   Emit enough FLUSH insns to synchronize the data and instruction caches.  */
+
+static void
+sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
+{
+  /* SPARC 32-bit trampoline:
+
+ 	sethi	%hi(fn), %g1
+ 	sethi	%hi(static), %g2
+ 	jmp	%g1+%lo(fn)
+ 	or	%g2, %lo(static), %g2
+
+    SETHI i,r  = 00rr rrr1 00ii iiii iiii iiii iiii iiii
+    JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
+   */
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 0),
+     expand_binop (SImode, ior_optab,
+		   expand_shift (RSHIFT_EXPR, SImode, fnaddr,
+				 size_int (10), 0, 1),
+		   GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 4),
+     expand_binop (SImode, ior_optab,
+		   expand_shift (RSHIFT_EXPR, SImode, cxt,
+				 size_int (10), 0, 1),
+		   GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 8),
+     expand_binop (SImode, ior_optab,
+		   expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
+		   GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  emit_move_insn
+    (adjust_address (m_tramp, SImode, 12),
+     expand_binop (SImode, ior_optab,
+		   expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
+		   GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
+		   NULL_RTX, 1, OPTAB_DIRECT));
+
+  /* On UltraSPARC a flush flushes an entire cache line.  The trampoline is
+     aligned on a 16 byte boundary so one flush clears it all.  */
+  emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0))));
+  if (sparc_cpu != PROCESSOR_ULTRASPARC
+      && sparc_cpu != PROCESSOR_ULTRASPARC3
+      && sparc_cpu != PROCESSOR_NIAGARA
+      && sparc_cpu != PROCESSOR_NIAGARA2)
+    emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
+
+  /* Call __enable_execute_stack after writing onto the stack to make sure
+     the stack address is accessible.  */
+#ifdef ENABLE_EXECUTE_STACK
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+
+}
+
+/* The 64-bit version is simpler because it makes more sense to load the
+   values as "immediate" data out of the trampoline.  It's also easier since
+   we can read the PC without clobbering a register.  */
+
+static void
+sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
+{
+  /* SPARC 64-bit trampoline:
+
+	rd	%pc, %g1
+	ldx	[%g1+24], %g5
+	jmp	%g5
+	ldx	[%g1+16], %g5
+	+16 bytes data
+   */
+
+  emit_move_insn (adjust_address (m_tramp, SImode, 0),
+		  GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
+  emit_move_insn (adjust_address (m_tramp, SImode, 4),
+		  GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
+  emit_move_insn (adjust_address (m_tramp, SImode, 8),
+		  GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
+  emit_move_insn (adjust_address (m_tramp, SImode, 12),
+		  GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
+  emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
+  emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
+  emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
+
+  if (sparc_cpu != PROCESSOR_ULTRASPARC
+      && sparc_cpu != PROCESSOR_ULTRASPARC3
+      && sparc_cpu != PROCESSOR_NIAGARA
+      && sparc_cpu != PROCESSOR_NIAGARA2)
+    emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
+
+  /* Call __enable_execute_stack after writing onto the stack to make sure
+     the stack address is accessible.  */
+#ifdef ENABLE_EXECUTE_STACK
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+                     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+#endif
+}
+
+/* Worker for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
+  cxt = force_reg (Pmode, cxt);
+  if (TARGET_ARCH64)
+    sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
+  else
+    sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type;
+
+  if (! recog_memoized (insn))
+    return 0;
+
+  insn_type = get_attr_type (insn);
+
+  if (REG_NOTE_KIND (link) == 0)
+    {
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */
+
+      /* if a load, then the dependence must be on the memory address;
+	 add an extra "cycle".  Note that the cost could be two cycles
+	 if the reg was written late in an instruction group; we ca not tell
+	 here.  */
+      if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
+	return cost + 3;
+
+      /* Get the delay only if the address of the store is the dependence.  */
+      if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
+	{
+	  rtx pat = PATTERN(insn);
+	  rtx dep_pat = PATTERN (dep_insn);
+
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;  /* This should not happen!  */
+
+	  /* The dependency between the two instructions was on the data that
+	     is being stored.  Assume that this implies that the address of the
+	     store is not dependent.  */
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return cost;
+
+	  return cost + 3;  /* An approximation.  */
+	}
+
+      /* A shift instruction cannot receive its data from an instruction
+	 in the same cycle; add a one cycle penalty.  */
+      if (insn_type == TYPE_SHIFT)
+	return cost + 3;   /* Split before cascade into shift.  */
+    }
+  else
+    {
+      /* Anti- or output- dependency; DEP_INSN reads/writes a register that
+	 INSN writes some cycles later.  */
+
+      /* These are only significant for the fpu unit; writing a fp reg before
+         the fpu has finished with it stalls the processor.  */
+
+      /* Reusing an integer register causes no problems.  */
+      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+	return 0;
+    }
+	
+  return cost;
+}
+
+static int
+hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  enum attr_type insn_type, dep_type;
+  rtx pat = PATTERN(insn);
+  rtx dep_pat = PATTERN (dep_insn);
+
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
+
+  insn_type = get_attr_type (insn);
+  dep_type = get_attr_type (dep_insn);
+
+  switch (REG_NOTE_KIND (link))
+    {
+    case 0:
+      /* Data dependency; DEP_INSN writes a register that INSN reads some
+	 cycles later.  */
+
+      switch (insn_type)
+	{
+	case TYPE_STORE:
+	case TYPE_FPSTORE:
+	  /* Get the delay iff the address of the store is the dependence.  */
+	  if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+	    return cost;
+
+	  if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+	    return cost;
+	  return cost + 3;
+
+	case TYPE_LOAD:
+	case TYPE_SLOAD:
+	case TYPE_FPLOAD:
+	  /* If a load, then the dependence must be on the memory address.  If
+	     the addresses aren't equal, then it might be a false dependency */
+	  if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+	    {
+	      if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+		  || GET_CODE (SET_DEST (dep_pat)) != MEM
+		  || GET_CODE (SET_SRC (pat)) != MEM
+		  || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
+				    XEXP (SET_SRC (pat), 0)))
+		return cost + 2;
+
+	      return cost + 8;
+	    }
+	  break;
+
+	case TYPE_BRANCH:
+	  /* Compare to branch latency is 0.  There is no benefit from
+	     separating compare and branch.  */
+	  if (dep_type == TYPE_COMPARE)
+	    return 0;
+	  /* Floating point compare to branch latency is less than
+	     compare to conditional move.  */
+	  if (dep_type == TYPE_FPCMP)
+	    return cost - 1;
+	  break;
+	default:
+	  break;
+	}
+	break;
+
+    case REG_DEP_ANTI:
+      /* Anti-dependencies only penalize the fpu unit.  */
+      if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+        return 0;
+      break;
+
+    default:
+      break;
+    }
+
+  return cost;
+}
+
+static int
+sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost)
+{
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_SUPERSPARC:
+      cost = supersparc_adjust_cost (insn, link, dep, cost);
+      break;
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      cost = hypersparc_adjust_cost (insn, link, dep, cost);
+      break;
+    default:
+      break;
+    }
+  return cost;
+}
+
+static void
+sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+		  int sched_verbose ATTRIBUTE_UNUSED,
+		  int max_ready ATTRIBUTE_UNUSED)
+{}
+
+static int
+sparc_use_sched_lookahead (void)
+{
+  if (sparc_cpu == PROCESSOR_NIAGARA
+      || sparc_cpu == PROCESSOR_NIAGARA2)
+    return 0;
+  if (sparc_cpu == PROCESSOR_ULTRASPARC
+      || sparc_cpu == PROCESSOR_ULTRASPARC3)
+    return 4;
+  if ((1 << sparc_cpu) &
+      ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
+       (1 << PROCESSOR_SPARCLITE86X)))
+    return 3;
+  return 0;
+}
+
+static int
+sparc_issue_rate (void)
+{
+  switch (sparc_cpu)
+    {
+    case PROCESSOR_NIAGARA:
+    case PROCESSOR_NIAGARA2:
+    default:
+      return 1;
+    case PROCESSOR_V9:
+      /* Assume V9 processors are capable of at least dual-issue.  */
+      return 2;
+    case PROCESSOR_SUPERSPARC:
+      return 3;
+    case PROCESSOR_HYPERSPARC:
+    case PROCESSOR_SPARCLITE86X:
+      return 2;
+    case PROCESSOR_ULTRASPARC:
+    case PROCESSOR_ULTRASPARC3:
+      return 4;
+    }
+}
+
+static int
+set_extends (rtx insn)
+{
+  register rtx pat = PATTERN (insn);
+
+  switch (GET_CODE (SET_SRC (pat)))
+    {
+      /* Load and some shift instructions zero extend.  */
+    case MEM:
+    case ZERO_EXTEND:
+      /* sethi clears the high bits */
+    case HIGH:
+      /* LO_SUM is used with sethi.  sethi cleared the high
+	 bits and the values used with lo_sum are positive */
+    case LO_SUM:
+      /* Store flag stores 0 or 1 */
+    case LT: case LTU:
+    case GT: case GTU:
+    case LE: case LEU:
+    case GE: case GEU:
+    case EQ:
+    case NE:
+      return 1;
+    case AND:
+      {
+	rtx op0 = XEXP (SET_SRC (pat), 0);
+	rtx op1 = XEXP (SET_SRC (pat), 1);
+	if (GET_CODE (op1) == CONST_INT)
+	  return INTVAL (op1) >= 0;
+	if (GET_CODE (op0) != REG)
+	  return 0;
+	if (sparc_check_64 (op0, insn) == 1)
+	  return 1;
+	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
+      }
+    case IOR:
+    case XOR:
+      {
+	rtx op0 = XEXP (SET_SRC (pat), 0);
+	rtx op1 = XEXP (SET_SRC (pat), 1);
+	if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
+	  return 0;
+	if (GET_CODE (op1) == CONST_INT)
+	  return INTVAL (op1) >= 0;
+	return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
+      }
+    case LSHIFTRT:
+      return GET_MODE (SET_SRC (pat)) == SImode;
+      /* Positive integers leave the high bits zero.  */
+    case CONST_DOUBLE:
+      return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
+    case CONST_INT:
+      return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
+    case ASHIFTRT:
+    case SIGN_EXTEND:
+      return - (GET_MODE (SET_SRC (pat)) == SImode);
+    case REG:
+      return sparc_check_64 (SET_SRC (pat), insn);
+    default:
+      return 0;
+    }
+}
+
+/* We _ought_ to have only one kind per function, but...  */
+static GTY(()) rtx sparc_addr_diff_list;
+static GTY(()) rtx sparc_addr_list;
+
+void
+sparc_defer_case_vector (rtx lab, rtx vec, int diff)
+{
+  vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
+  if (diff)
+    sparc_addr_diff_list
+      = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
+  else
+    sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
+}
+
+static void
+sparc_output_addr_vec (rtx vec)
+{
+  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
+  int idx, vlen = XVECLEN (body, 0);
+
+#ifdef ASM_OUTPUT_ADDR_VEC_START
+  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
+#endif
+
+#ifdef ASM_OUTPUT_CASE_LABEL
+  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
+			 NEXT_INSN (lab));
+#else
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
+#endif
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_VEC_ELT
+	(asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
+    }
+
+#ifdef ASM_OUTPUT_ADDR_VEC_END
+  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
+#endif
+}
+
+static void
+sparc_output_addr_diff_vec (rtx vec)
+{
+  rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
+  rtx base = XEXP (XEXP (body, 0), 0);
+  int idx, vlen = XVECLEN (body, 1);
+
+#ifdef ASM_OUTPUT_ADDR_VEC_START
+  ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
+#endif
+
+#ifdef ASM_OUTPUT_CASE_LABEL
+  ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
+			 NEXT_INSN (lab));
+#else
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
+#endif
+
+  for (idx = 0; idx < vlen; idx++)
+    {
+      ASM_OUTPUT_ADDR_DIFF_ELT
+        (asm_out_file,
+         body,
+         CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
+         CODE_LABEL_NUMBER (base));
+    }
+
+#ifdef ASM_OUTPUT_ADDR_VEC_END
+  ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
+#endif
+}
+
+static void
+sparc_output_deferred_case_vectors (void)
+{
+  rtx t;
+  int align;
+
+  if (sparc_addr_list == NULL_RTX
+      && sparc_addr_diff_list == NULL_RTX)
+    return;
+
+  /* Align to cache line in the function's code section.  */
+  switch_to_section (current_function_section ());
+
+  align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+  if (align > 0)
+    ASM_OUTPUT_ALIGN (asm_out_file, align);
+
+  for (t = sparc_addr_list; t ; t = XEXP (t, 1))
+    sparc_output_addr_vec (XEXP (t, 0));
+  for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
+    sparc_output_addr_diff_vec (XEXP (t, 0));
+
+  sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
+}
+
+/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
+   unknown.  Return 1 if the high bits are zero, -1 if the register is
+   sign extended.  */
+int
+sparc_check_64 (rtx x, rtx insn)
+{
+  /* If a register is set only once it is safe to ignore insns this
+     code does not know how to handle.  The loop will either recognize
+     the single set and return the correct value or fail to recognize
+     it and return 0.  */
+  int set_once = 0;
+  rtx y = x;
+
+  gcc_assert (GET_CODE (x) == REG);
+
+  if (GET_MODE (x) == DImode)
+    y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
+
+  if (flag_expensive_optimizations
+      && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
+    set_once = 1;
+
+  if (insn == 0)
+    {
+      if (set_once)
+	insn = get_last_insn_anywhere ();
+      else
+	return 0;
+    }
+
+  while ((insn = PREV_INSN (insn)))
+    {
+      switch (GET_CODE (insn))
+	{
+	case JUMP_INSN:
+	case NOTE:
+	  break;
+	case CODE_LABEL:
+	case CALL_INSN:
+	default:
+	  if (! set_once)
+	    return 0;
+	  break;
+	case INSN:
+	  {
+	    rtx pat = PATTERN (insn);
+	    if (GET_CODE (pat) != SET)
+	      return 0;
+	    if (rtx_equal_p (x, SET_DEST (pat)))
+	      return set_extends (insn);
+	    if (y && rtx_equal_p (y, SET_DEST (pat)))
+	      return set_extends (insn);
+	    if (reg_overlap_mentioned_p (SET_DEST (pat), y))
+	      return 0;
+	  }
+	}
+    }
+  return 0;
+}
+
+/* Returns assembly code to perform a DImode shift using
+   a 64-bit global or out register on SPARC-V8+.  */
+const char *
+output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
+{
+  static char asm_code[60];
+
+  /* The scratch register is only required when the destination
+     register is not a 64-bit global or out register.  */
+  if (which_alternative != 2)
+    operands[3] = operands[0];
+
+  /* We can only shift by constants <= 63. */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      output_asm_insn ("mov\t%1, %3", operands);
+    }
+  else
+    {
+      output_asm_insn ("sllx\t%H1, 32, %3", operands);
+      if (sparc_check_64 (operands[1], insn) <= 0)
+	output_asm_insn ("srl\t%L1, 0, %L1", operands);
+      output_asm_insn ("or\t%L1, %3, %3", operands);
+    }
+
+  strcpy(asm_code, opcode);
+
+  if (which_alternative != 2)
+    return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
+  else
+    return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
+}
+
+/* Output rtl to increment the profiler label LABELNO
+   for profiling a function entry.  */
+
+void
+sparc_profile_hook (int labelno)
+{
+  char buf[32];
+  rtx lab, fun;
+
+  fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
+  if (NO_PROFILE_COUNTERS)
+    {
+      emit_library_call (fun, LCT_NORMAL, VOIDmode, 0);
+    }
+  else
+    {
+      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+      lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+      emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
+    }
+}
+
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
+
+static void
+sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
+				     tree decl ATTRIBUTE_UNUSED)
+{
+  fprintf (asm_out_file, "\t.section\t\"%s\"", name);
+
+  if (!(flags & SECTION_DEBUG))
+    fputs (",#alloc", asm_out_file);
+  if (flags & SECTION_WRITE)
+    fputs (",#write", asm_out_file);
+  if (flags & SECTION_TLS)
+    fputs (",#tls", asm_out_file);
+  if (flags & SECTION_CODE)
+    fputs (",#execinstr", asm_out_file);
+
+  /* ??? Handle SECTION_BSS.  */
+
+  fputc ('\n', asm_out_file);
+}
+
+/* We do not allow indirect calls to be optimized into sibling calls.
+
+   We cannot use sibling calls when delayed branches are disabled
+   because they will likely require the call delay slot to be filled.
+
+   Also, on SPARC 32-bit we cannot emit a sibling call when the
+   current function returns a structure.  This is because the "unimp
+   after call" convention would cause the callee to return to the
+   wrong place.  The generic code already disallows cases where the
+   function being called returns a structure.
+
+   It may seem strange how this last case could occur.  Usually there
+   is code after the call which jumps to epilogue code which dumps the
+   return value into the struct return area.  That ought to invalidate
+   the sibling call right?  Well, in the C++ case we can end up passing
+   the pointer to the struct return area to a constructor (which returns
+   void) and then nothing else happens.  Such a sibling call would look
+   valid without the added check here.
+
+   VxWorks PIC PLT entries require the global pointer to be initialized
+   on entry.  We therefore can't emit sibling calls to them.  */
+static bool
+sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return (decl
+	  && flag_delayed_branch
+	  && (TARGET_ARCH64 || ! cfun->returns_struct)
+	  && !(TARGET_VXWORKS_RTP
+	       && flag_pic
+	       && !targetm.binds_local_p (decl)));
+}
+
+/* libfunc renaming.  */
+
+static void
+sparc_init_libfuncs (void)
+{
+  if (TARGET_ARCH32)
+    {
+      /* Use the subroutines that Sun's library provides for integer
+	 multiply and divide.  The `*' prevents an underscore from
+	 being prepended by the compiler. .umul is a little faster
+	 than .mul.  */
+      set_optab_libfunc (smul_optab, SImode, "*.umul");
+      set_optab_libfunc (sdiv_optab, SImode, "*.div");
+      set_optab_libfunc (udiv_optab, SImode, "*.udiv");
+      set_optab_libfunc (smod_optab, SImode, "*.rem");
+      set_optab_libfunc (umod_optab, SImode, "*.urem");
+
+      /* TFmode arithmetic.  These names are part of the SPARC 32bit ABI.  */
+      set_optab_libfunc (add_optab, TFmode, "_Q_add");
+      set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
+      set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
+      set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
+      set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
+
+      /* We can define the TFmode sqrt optab only if TARGET_FPU.  This
+	 is because with soft-float, the SFmode and DFmode sqrt
+	 instructions will be absent, and the compiler will notice and
+	 try to use the TFmode sqrt instruction for calls to the
+	 builtin function sqrt, but this fails.  */
+      if (TARGET_FPU)
+	set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
+
+      set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
+      set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
+      set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
+      set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
+      set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
+      set_optab_libfunc (le_optab, TFmode, "_Q_fle");
+
+      set_conv_libfunc (sext_optab,   TFmode, SFmode, "_Q_stoq");
+      set_conv_libfunc (sext_optab,   TFmode, DFmode, "_Q_dtoq");
+      set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_Q_qtos");
+      set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_Q_qtod");
+
+      set_conv_libfunc (sfix_optab,   SImode, TFmode, "_Q_qtoi");
+      set_conv_libfunc (ufix_optab,   SImode, TFmode, "_Q_qtou");
+      set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
+      set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
+
+      if (DITF_CONVERSION_LIBFUNCS)
+	{
+	  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_Q_qtoll");
+	  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_Q_qtoull");
+	  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
+	  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
+	}
+
+      if (SUN_CONVERSION_LIBFUNCS)
+	{
+	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
+	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
+	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
+	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
+	}
+    }
+  if (TARGET_ARCH64)
+    {
+      /* In the SPARC 64bit ABI, SImode multiply and divide functions
+	 do not exist in the library.  Make sure the compiler does not
+	 emit calls to them by accident.  (It should always use the
+         hardware instructions.)  */
+      set_optab_libfunc (smul_optab, SImode, 0);
+      set_optab_libfunc (sdiv_optab, SImode, 0);
+      set_optab_libfunc (udiv_optab, SImode, 0);
+      set_optab_libfunc (smod_optab, SImode, 0);
+      set_optab_libfunc (umod_optab, SImode, 0);
+
+      if (SUN_INTEGER_MULTIPLY_64)
+	{
+	  set_optab_libfunc (smul_optab, DImode, "__mul64");
+	  set_optab_libfunc (sdiv_optab, DImode, "__div64");
+	  set_optab_libfunc (udiv_optab, DImode, "__udiv64");
+	  set_optab_libfunc (smod_optab, DImode, "__rem64");
+	  set_optab_libfunc (umod_optab, DImode, "__urem64");
+	}
+
+      if (SUN_CONVERSION_LIBFUNCS)
+	{
+	  set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
+	  set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
+	  set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
+	  set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
+	}
+    }
+}
+
+#define def_builtin(NAME, CODE, TYPE) \
+  add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
+                       NULL_TREE)
+
+/* Implement the TARGET_INIT_BUILTINS target hook.
+   Create builtin functions for special SPARC instructions.  */
+
+static void
+sparc_init_builtins (void)
+{
+  if (TARGET_VIS)
+    sparc_vis_init_builtins ();
+}
+
+/* Create builtin functions for VIS 1.0 instructions.  */
+
+static void
+sparc_vis_init_builtins (void)
+{
+  tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
+  tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
+  tree v4hi = build_vector_type (intHI_type_node, 4);
+  tree v2hi = build_vector_type (intHI_type_node, 2);
+  tree v2si = build_vector_type (intSI_type_node, 2);
+
+  tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
+  tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
+  tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
+  tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
+  tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
+  tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
+  tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
+  tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
+  tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
+  tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
+  tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
+  tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
+  tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
+							 v8qi, v8qi,
+							 intDI_type_node, 0);
+  tree di_ftype_di_di = build_function_type_list (intDI_type_node,
+						  intDI_type_node,
+						  intDI_type_node, 0);
+  tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
+		        			    ptr_type_node,
+					            intSI_type_node, 0);
+  tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
+		        			    ptr_type_node,
+					            intDI_type_node, 0);
+
+  /* Packing and expanding vectors.  */
+  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
+  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
+	       v8qi_ftype_v2si_v8qi);
+  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
+	       v2hi_ftype_v2si);
+  def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
+  def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
+	       v8qi_ftype_v4qi_v4qi);
+
+  /* Multiplications.  */
+  def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
+	       v4hi_ftype_v4qi_v4hi);
+  def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
+	       v4hi_ftype_v4qi_v2hi);
+  def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
+	       v4hi_ftype_v4qi_v2hi);
+  def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
+	       v4hi_ftype_v8qi_v4hi);
+  def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
+	       v4hi_ftype_v8qi_v4hi);
+  def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
+	       v2si_ftype_v4qi_v2hi);
+  def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
+	       v2si_ftype_v4qi_v2hi);
+
+  /* Data aligning.  */
+  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
+	       v4hi_ftype_v4hi_v4hi);
+  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
+	       v8qi_ftype_v8qi_v8qi);
+  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
+	       v2si_ftype_v2si_v2si);
+  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
+               di_ftype_di_di);
+  if (TARGET_ARCH64)
+    def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
+	         ptr_ftype_ptr_di);
+  else
+    def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
+	         ptr_ftype_ptr_si);
+
+  /* Pixel distance.  */
+  def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
+	       di_ftype_v8qi_v8qi_di);
+}
+
+/* Handle TARGET_EXPAND_BUILTIN target hook.
+   Expand builtin functions for sparc intrinsics.  */
+
+static rtx
+sparc_expand_builtin (tree exp, rtx target,
+		      rtx subtarget ATTRIBUTE_UNUSED,
+		      enum machine_mode tmode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+  tree arg;
+  call_expr_arg_iterator iter;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int icode = DECL_FUNCTION_CODE (fndecl);
+  rtx pat, op[4];
+  enum machine_mode mode[4];
+  int arg_count = 0;
+
+  mode[0] = insn_data[icode].operand[0].mode;
+  if (!target
+      || GET_MODE (target) != mode[0]
+      || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
+    op[0] = gen_reg_rtx (mode[0]);
+  else
+    op[0] = target;
+
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      arg_count++;
+      mode[arg_count] = insn_data[icode].operand[arg_count].mode;
+      op[arg_count] = expand_normal (arg);
+
+      if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
+							      mode[arg_count]))
+	op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
+    }
+
+  switch (arg_count)
+    {
+    case 1:
+      pat = GEN_FCN (icode) (op[0], op[1]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (!pat)
+    return NULL_RTX;
+
+  emit_insn (pat);
+
+  return op[0];
+}
+
+static int
+sparc_vis_mul8x16 (int e8, int e16)
+{
+  return (e8 * e16 + 128) / 256;
+}
+
+/* Multiply the vector elements in ELTS0 to the elements in ELTS1 as specified
+   by FNCODE.  All of the elements in ELTS0 and ELTS1 lists must be integer
+   constants.  A tree list with the results of the multiplications is returned,
+   and each element in the list is of INNER_TYPE.  */
+
+static tree
+sparc_handle_vis_mul8x16 (int fncode, tree inner_type, tree elts0, tree elts1)
+{
+  tree n_elts = NULL_TREE;
+  int scale;
+
+  switch (fncode)
+    {
+    case CODE_FOR_fmul8x16_vis:
+      for (; elts0 && elts1;
+	   elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
+	{
+	  int val
+	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
+				 TREE_INT_CST_LOW (TREE_VALUE (elts1)));
+	  n_elts = tree_cons (NULL_TREE,
+			      build_int_cst (inner_type, val),
+			      n_elts);
+	}
+      break;
+
+    case CODE_FOR_fmul8x16au_vis:
+      scale = TREE_INT_CST_LOW (TREE_VALUE (elts1));
+
+      for (; elts0; elts0 = TREE_CHAIN (elts0))
+	{
+	  int val
+	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
+				 scale);
+	  n_elts = tree_cons (NULL_TREE,
+			      build_int_cst (inner_type, val),
+			      n_elts);
+	}
+      break;
+
+    case CODE_FOR_fmul8x16al_vis:
+      scale = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (elts1)));
+
+      for (; elts0; elts0 = TREE_CHAIN (elts0))
+	{
+	  int val
+	    = sparc_vis_mul8x16 (TREE_INT_CST_LOW (TREE_VALUE (elts0)),
+				 scale);
+	  n_elts = tree_cons (NULL_TREE,
+			      build_int_cst (inner_type, val),
+			      n_elts);
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return nreverse (n_elts);
+
+}
+/* Handle TARGET_FOLD_BUILTIN target hook.
+   Fold builtin functions for SPARC intrinsics.  If IGNORE is true the
+   result of the function call is ignored.  NULL_TREE is returned if the
+   function could not be folded.  */
+
+static tree
+sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
+		    tree *args, bool ignore)
+{
+  tree arg0, arg1, arg2;
+  tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
+  enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
+
+  if (ignore
+      && icode != CODE_FOR_alignaddrsi_vis
+      && icode != CODE_FOR_alignaddrdi_vis)
+    return build_zero_cst (rtype);
+
+  switch (icode)
+    {
+    case CODE_FOR_fexpand_vis:
+      arg0 = args[0];
+      STRIP_NOPS (arg0);
+
+      if (TREE_CODE (arg0) == VECTOR_CST)
+	{
+	  tree inner_type = TREE_TYPE (rtype);
+	  tree elts = TREE_VECTOR_CST_ELTS (arg0);
+	  tree n_elts = NULL_TREE;
+
+	  for (; elts; elts = TREE_CHAIN (elts))
+	    {
+	      unsigned int val = TREE_INT_CST_LOW (TREE_VALUE (elts)) << 4;
+	      n_elts = tree_cons (NULL_TREE,
+				  build_int_cst (inner_type, val),
+				  n_elts);
+	    }
+	  return build_vector (rtype, nreverse (n_elts));
+	}
+      break;
+
+    case CODE_FOR_fmul8x16_vis:
+    case CODE_FOR_fmul8x16au_vis:
+    case CODE_FOR_fmul8x16al_vis:
+      arg0 = args[0];
+      arg1 = args[1];
+      STRIP_NOPS (arg0);
+      STRIP_NOPS (arg1);
+
+      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
+	{
+	  tree inner_type = TREE_TYPE (rtype);
+	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
+	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
+	  tree n_elts = sparc_handle_vis_mul8x16 (icode, inner_type, elts0,
+						  elts1);
+
+	  return build_vector (rtype, n_elts);
+	}
+      break;
+
+    case CODE_FOR_fpmerge_vis:
+      arg0 = args[0];
+      arg1 = args[1];
+      STRIP_NOPS (arg0);
+      STRIP_NOPS (arg1);
+
+      if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
+	{
+	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
+	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
+	  tree n_elts = NULL_TREE;
+
+	  for (; elts0 && elts1;
+	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
+	    {
+	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts0), n_elts);
+	      n_elts = tree_cons (NULL_TREE, TREE_VALUE (elts1), n_elts);
+	    }
+
+	  return build_vector (rtype, nreverse (n_elts));
+	}
+      break;
+
+    case CODE_FOR_pdist_vis:
+      arg0 = args[0];
+      arg1 = args[1];
+      arg2 = args[2];
+      STRIP_NOPS (arg0);
+      STRIP_NOPS (arg1);
+      STRIP_NOPS (arg2);
+
+      if (TREE_CODE (arg0) == VECTOR_CST
+	  && TREE_CODE (arg1) == VECTOR_CST
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  int overflow = 0;
+	  unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
+	  HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
+	  tree elts0 = TREE_VECTOR_CST_ELTS (arg0);
+	  tree elts1 = TREE_VECTOR_CST_ELTS (arg1);
+
+	  for (; elts0 && elts1;
+	       elts0 = TREE_CHAIN (elts0), elts1 = TREE_CHAIN (elts1))
+	    {
+	      unsigned HOST_WIDE_INT
+		low0 = TREE_INT_CST_LOW (TREE_VALUE (elts0)),
+		low1 = TREE_INT_CST_LOW (TREE_VALUE (elts1));
+	      HOST_WIDE_INT high0 = TREE_INT_CST_HIGH (TREE_VALUE (elts0));
+	      HOST_WIDE_INT high1 = TREE_INT_CST_HIGH (TREE_VALUE (elts1));
+
+	      unsigned HOST_WIDE_INT l;
+	      HOST_WIDE_INT h;
+
+	      overflow |= neg_double (low1, high1, &l, &h);
+	      overflow |= add_double (low0, high0, l, h, &l, &h);
+	      if (h < 0)
+		overflow |= neg_double (l, h, &l, &h);
+
+	      overflow |= add_double (low, high, l, h, &low, &high);
+	    }
+
+	  gcc_assert (overflow == 0);
+
+	  return build_int_cst_wide (rtype, low, high);
+	}
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* ??? This duplicates information provided to the compiler by the
+   ??? scheduler description.  Some day, teach genautomata to output
+   ??? the latencies and then CSE will just use that.  */
+
+static bool
+sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
+		 bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case HIGH:
+      *total = 2;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 4;
+      return true;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode
+	  && ((CONST_DOUBLE_HIGH (x) == 0
+	       && CONST_DOUBLE_LOW (x) < 0x1000)
+	      || (CONST_DOUBLE_HIGH (x) == -1
+		  && CONST_DOUBLE_LOW (x) < 0
+		  && CONST_DOUBLE_LOW (x) >= -0x1000)))
+	*total = 0;
+      else
+	*total = 8;
+      return true;
+
+    case MEM:
+      /* If outer-code was a sign or zero extension, a cost
+	 of COSTS_N_INSNS (1) was already added in.  This is
+	 why we are subtracting it back out.  */
+      if (outer_code == ZERO_EXTEND)
+	{
+	  *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
+	}
+      else if (outer_code == SIGN_EXTEND)
+	{
+	  *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
+	}
+      else if (float_mode_p)
+	{
+	  *total = sparc_costs->float_load;
+	}
+      else
+	{
+	  *total = sparc_costs->int_load;
+	}
+
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (float_mode_p)
+	*total = sparc_costs->float_plusminus;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = sparc_costs->float_mul;
+      else if (! TARGET_HARD_MUL)
+	*total = COSTS_N_INSNS (25);
+      else
+	{
+	  int bit_cost;
+
+	  bit_cost = 0;
+	  if (sparc_costs->int_mul_bit_factor)
+	    {
+	      int nbits;
+
+	      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+		{
+		  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+		  for (nbits = 0; value != 0; value &= value - 1)
+		    nbits++;
+		}
+	      else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+		       && GET_MODE (XEXP (x, 1)) == VOIDmode)
+		{
+		  rtx x1 = XEXP (x, 1);
+		  unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1);
+		  unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1);
+
+		  for (nbits = 0; value1 != 0; value1 &= value1 - 1)
+		    nbits++;
+		  for (; value2 != 0; value2 &= value2 - 1)
+		    nbits++;
+		}
+	      else
+		nbits = 7;
+
+	      if (nbits < 3)
+		nbits = 3;
+	      bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
+	      bit_cost = COSTS_N_INSNS (bit_cost);
+	    }
+
+	  if (mode == DImode)
+	    *total = sparc_costs->int_mulX + bit_cost;
+	  else
+	    *total = sparc_costs->int_mul + bit_cost;
+	}
+      return false;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (float_mode_p)
+	{
+	  if (mode == DFmode)
+	    *total = sparc_costs->float_div_df;
+	  else
+	    *total = sparc_costs->float_div_sf;
+	}
+      else
+	{
+	  if (mode == DImode)
+	    *total = sparc_costs->int_divX;
+	  else
+	    *total = sparc_costs->int_div;
+	}
+      return false;
+
+    case NEG:
+      if (! float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ABS:
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+      *total = sparc_costs->float_move;
+      return false;
+
+    case SQRT:
+      if (mode == DFmode)
+	*total = sparc_costs->float_sqrt_df;
+      else
+	*total = sparc_costs->float_sqrt_sf;
+      return false;
+
+    case COMPARE:
+      if (float_mode_p)
+	*total = sparc_costs->float_cmp;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case IF_THEN_ELSE:
+      if (float_mode_p)
+	*total = sparc_costs->float_cmove;
+      else
+	*total = sparc_costs->int_cmove;
+      return false;
+
+    case IOR:
+      /* Handle the NAND vector patterns.  */
+      if (sparc_vector_mode_supported_p (GET_MODE (x))
+	  && GET_CODE (XEXP (x, 0)) == NOT
+	  && GET_CODE (XEXP (x, 1)) == NOT)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+        return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
+   This is achieved by means of a manual dynamic stack space allocation in
+   the current frame.  We make the assumption that SEQ doesn't contain any
+   function calls, with the possible exception of calls to the GOT helper.  */
+
+static void
+emit_and_preserve (rtx seq, rtx reg, rtx reg2)
+{
+  /* We must preserve the lowest 16 words for the register save area.  */
+  HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
+  /* We really need only 2 words of fresh stack space.  */
+  HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
+
+  rtx slot
+    = gen_rtx_MEM (word_mode, plus_constant (stack_pointer_rtx,
+					     SPARC_STACK_BIAS + offset));
+
+  emit_insn (gen_stack_pointer_dec (GEN_INT (size)));
+  emit_insn (gen_rtx_SET (VOIDmode, slot, reg));
+  if (reg2)
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    adjust_address (slot, word_mode, UNITS_PER_WORD),
+			    reg2));
+  emit_insn (seq);
+  if (reg2)
+    emit_insn (gen_rtx_SET (VOIDmode,
+			    reg2,
+			    adjust_address (slot, word_mode, UNITS_PER_WORD)));
+  emit_insn (gen_rtx_SET (VOIDmode, reg, slot));
+  emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
+}
+
+/* Output the assembler code for a thunk function.  THUNK_DECL is the
+   declaration for the thunk function itself, FUNCTION is the decl for
+   the target function.  DELTA is an immediate constant offset to be
+   added to THIS.  If VCALL_OFFSET is nonzero, the word at address
+   (*THIS + VCALL_OFFSET) should be additionally added to THIS.  */
+
+static void
+sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		       HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		       tree function)
+{
+  rtx this_rtx, insn, funexp;
+  unsigned int int_arg_first;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  if (flag_delayed_branch)
+    {
+      /* We will emit a regular sibcall below, so we need to instruct
+	 output_sibcall that we are in a leaf function.  */
+      sparc_leaf_function_p = current_function_uses_only_leaf_regs = 1;
+
+      /* This will cause final.c to invoke leaf_renumber_regs so we
+	 must behave as if we were in a not-yet-leafified function.  */
+      int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
+    }
+  else
+    {
+      /* We will emit the sibcall manually below, so we will need to
+	 manually spill non-leaf registers.  */
+      sparc_leaf_function_p = current_function_uses_only_leaf_regs = 0;
+
+      /* We really are in a leaf function.  */
+      int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
+    }
+
+  /* Find the "this" pointer.  Normally in %o0, but in ARCH64 if the function
+     returns a structure, the structure return pointer is there instead.  */
+  if (TARGET_ARCH64
+      && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, int_arg_first);
+
+  /* Add DELTA.  When possible use a plain add, otherwise load it into
+     a register first.  */
+  if (delta)
+    {
+      rtx delta_rtx = GEN_INT (delta);
+
+      if (! SPARC_SIMM13_P (delta))
+	{
+	  rtx scratch = gen_rtx_REG (Pmode, 1);
+	  emit_move_insn (scratch, delta_rtx);
+	  delta_rtx = scratch;
+	}
+
+      /* THIS_RTX += DELTA.  */
+      emit_insn (gen_add2_insn (this_rtx, delta_rtx));
+    }
+
+  /* Add the word at address (*THIS_RTX + VCALL_OFFSET).  */
+  if (vcall_offset)
+    {
+      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+      rtx scratch = gen_rtx_REG (Pmode, 1);
+
+      gcc_assert (vcall_offset < 0);
+
+      /* SCRATCH = *THIS_RTX.  */
+      emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Prepare for adding VCALL_OFFSET.  The difficulty is that we
+	 may not have any available scratch register at this point.  */
+      if (SPARC_SIMM13_P (vcall_offset))
+	;
+      /* This is the case if ARCH64 (unless -ffixed-g5 is passed).  */
+      else if (! fixed_regs[5]
+	       /* The below sequence is made up of at least 2 insns,
+		  while the default method may need only one.  */
+	       && vcall_offset < -8192)
+	{
+	  rtx scratch2 = gen_rtx_REG (Pmode, 5);
+	  emit_move_insn (scratch2, vcall_offset_rtx);
+	  vcall_offset_rtx = scratch2;
+	}
+      else
+	{
+	  rtx increment = GEN_INT (-4096);
+
+	  /* VCALL_OFFSET is a negative number whose typical range can be
+	     estimated as -32768..0 in 32-bit mode.  In almost all cases
+	     it is therefore cheaper to emit multiple add insns than
+	     spilling and loading the constant into a register (at least
+	     6 insns).  */
+	  while (! SPARC_SIMM13_P (vcall_offset))
+	    {
+	      emit_insn (gen_add2_insn (scratch, increment));
+	      vcall_offset += 4096;
+	    }
+	  vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
+	}
+
+      /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET).  */
+      emit_move_insn (scratch, gen_rtx_MEM (Pmode,
+					    gen_rtx_PLUS (Pmode,
+							  scratch,
+							  vcall_offset_rtx)));
+
+      /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET).  */
+      emit_insn (gen_add2_insn (this_rtx, scratch));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+
+  if (flag_delayed_branch)
+    {
+      funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+      insn = emit_call_insn (gen_sibcall (funexp));
+      SIBLING_CALL_P (insn) = 1;
+    }
+  else
+    {
+      /* The hoops we have to jump through in order to generate a sibcall
+	 without using delay slots...  */
+      rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
+
+      if (flag_pic)
+        {
+	  spill_reg = gen_rtx_REG (word_mode, 15);  /* %o7 */
+	  start_sequence ();
+	  /* Delay emitting the GOT helper function because it needs to
+	     change the section and we are emitting assembly code.  */
+	  load_got_register ();  /* clobbers %o7 */
+	  scratch = sparc_legitimize_pic_address (funexp, scratch);
+	  seq = get_insns ();
+	  end_sequence ();
+	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
+	}
+      else if (TARGET_ARCH32)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch,
+				  gen_rtx_HIGH (SImode, funexp)));
+	  emit_insn (gen_rtx_SET (VOIDmode,
+				  scratch,
+				  gen_rtx_LO_SUM (SImode, scratch, funexp)));
+	}
+      else  /* TARGET_ARCH64 */
+        {
+	  switch (sparc_cmodel)
+	    {
+	    case CM_MEDLOW:
+	    case CM_MEDMID:
+	      /* The destination can serve as a temporary.  */
+	      sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
+	      break;
+
+	    case CM_MEDANY:
+	    case CM_EMBMEDANY:
+	      /* The destination cannot serve as a temporary.  */
+	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
+	      start_sequence ();
+	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
+	      seq = get_insns ();
+	      end_sequence ();
+	      emit_and_preserve (seq, spill_reg, 0);
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      emit_jump_insn (gen_indirect_jump (scratch));
+    }
+
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+  insn = get_insns ();
+  insn_locators_alloc ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+/* Return true if sparc_output_mi_thunk would be able to output the
+   assembler code for the thunk function specified by the arguments
+   it is passed, and false otherwise.  */
+static bool
+sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT vcall_offset,
+			   const_tree function ATTRIBUTE_UNUSED)
+{
+  /* Bound the loop used in the default method above.  */
+  return (vcall_offset >= -32768 || ! fixed_regs[5]);
+}
+
+/* We use the machine specific reorg pass to enable workarounds for errata.  */
+
+static void
+sparc_reorg (void)
+{
+  rtx insn, next;
+
+  /* The only erratum we handle for now is that of the AT697F processor.  */
+  if (!sparc_fix_at697f)
+    return;
+
+  /* We need to have the (essentially) final form of the insn stream in order
+     to properly detect the various hazards.  Run delay slot scheduling.  */
+  if (optimize > 0 && flag_delayed_branch)
+    {
+      cleanup_barriers ();
+      dbr_schedule (get_insns ());
+    }
+
+  /* Now look for specific patterns in the insn stream.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      bool insert_nop = false;
+      rtx set;
+
+      /* Look for a single-word load into an odd-numbered FP register.  */
+      if (NONJUMP_INSN_P (insn)
+	  && (set = single_set (insn)) != NULL_RTX
+	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+	  && MEM_P (SET_SRC (set))
+	  && REG_P (SET_DEST (set))
+	  && REGNO (SET_DEST (set)) > 31
+	  && REGNO (SET_DEST (set)) % 2 != 0)
+	{
+	  /* The wrong dependency is on the enclosing double register.  */
+	  unsigned int x = REGNO (SET_DEST (set)) - 1;
+	  unsigned int src1, src2, dest;
+	  int code;
+
+	  /* If the insn has a delay slot, then it cannot be problematic.  */
+	  next = next_active_insn (insn);
+	  if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
+	    code = -1;
+	  else
+	    {
+	      extract_insn (next);
+	      code = INSN_CODE (next);
+	    }
+
+	  switch (code)
+	    {
+	    case CODE_FOR_adddf3:
+	    case CODE_FOR_subdf3:
+	    case CODE_FOR_muldf3:
+	    case CODE_FOR_divdf3:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      src2 = REGNO (recog_data.operand[2]);
+	      if (src1 != src2)
+		{
+		  /* Case [1-4]:
+				 ld [address], %fx+1
+				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
+		  if ((src1 == x || src2 == x)
+		      && (dest == src1 || dest == src2))
+		    insert_nop = true;
+		}
+	      else
+		{
+		  /* Case 5:
+			     ld [address], %fx+1
+			     FPOPd %fx, %fx, %fx  */
+		  if (src1 == x
+		      && dest == src1
+		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
+		    insert_nop = true;
+		}
+	      break;
+
+	    case CODE_FOR_sqrtdf2:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      /* Case 6:
+			 ld [address], %fx+1
+			 fsqrtd %fx, %fx  */
+	      if (src1 == x && dest == src1)
+		insert_nop = true;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+      else
+	next = NEXT_INSN (insn);
+
+      if (insert_nop)
+	emit_insn_after (gen_nop (), insn);
+    }
+}
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+sparc_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in local-dynamic base patterns.  */
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  gcc_unreachable ();
+}
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+
+  if (x
+      && GET_CODE (x) == SYMBOL_REF
+      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
+   This is called from dwarf2out.c to emit call frame instructions
+   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
+static void
+sparc_dwarf_handle_frame_unspec (const char *label,
+				 rtx pattern ATTRIBUTE_UNUSED,
+				 int index ATTRIBUTE_UNUSED)
+{
+  gcc_assert (index == UNSPECV_SAVEW);
+  dwarf2out_window_save (label);
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+   We need to emit DTP-relative relocations.  */
+
+static void
+sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.word\t%r_tls_dtpoff32(", file);
+      break;
+    case 8:
+      fputs ("\t.xword\t%r_tls_dtpoff64(", file);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs (")", file);
+}
+
+/* Do whatever processing is required at the end of a file.  */
+
+static void
+sparc_file_end (void)
+{
+  /* If we need to emit the special GOT helper function, do so now.  */
+  if (got_helper_rtx)
+    {
+      const char *name = XSTR (got_helper_rtx, 0);
+      const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
+#ifdef DWARF2_UNWIND_INFO
+      bool do_cfi;
+#endif
+
+      if (USE_HIDDEN_LINKONCE)
+	{
+	  tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+				  get_identifier (name),
+				  build_function_type (void_type_node,
+						       void_list_node));
+	  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+					   NULL_TREE, void_type_node);
+	  TREE_PUBLIC (decl) = 1;
+	  TREE_STATIC (decl) = 1;
+	  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+	  DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
+	  DECL_VISIBILITY_SPECIFIED (decl) = 1;
+	  resolve_unique_section (decl, 0, flag_function_sections);
+	  allocate_struct_function (decl, true);
+	  cfun->is_thunk = 1;
+	  current_function_decl = decl;
+	  init_varasm_status ();
+	  assemble_start_function (decl, name);
+	}
+      else
+	{
+	  const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
+          switch_to_section (text_section);
+	  if (align > 0)
+	    ASM_OUTPUT_ALIGN (asm_out_file, align);
+	  ASM_OUTPUT_LABEL (asm_out_file, name);
+	}
+
+#ifdef DWARF2_UNWIND_INFO
+      do_cfi = dwarf2out_do_cfi_asm ();
+      if (do_cfi)
+	fprintf (asm_out_file, "\t.cfi_startproc\n");
+#endif
+      if (flag_delayed_branch)
+	fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
+		 reg_name, reg_name);
+      else
+	fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
+		 reg_name, reg_name);
+#ifdef DWARF2_UNWIND_INFO
+      if (do_cfi)
+	fprintf (asm_out_file, "\t.cfi_endproc\n");
+#endif
+    }
+
+  if (NEED_INDICATE_EXEC_STACK)
+    file_end_indicate_exec_stack ();
+}
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+sparc_mangle_type (const_tree type)
+{
+  if (!TARGET_64BIT
+      && TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
+   compare and swap on the word containing the byte or half-word.  */
+
+void
+sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
+{
+  rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
+  rtx addr = gen_reg_rtx (Pmode);
+  rtx off = gen_reg_rtx (SImode);
+  rtx oldv = gen_reg_rtx (SImode);
+  rtx newv = gen_reg_rtx (SImode);
+  rtx oldvalue = gen_reg_rtx (SImode);
+  rtx newvalue = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx resv = gen_reg_rtx (SImode);
+  rtx memsi, val, mask, end_label, loop_label, cc;
+
+  emit_insn (gen_rtx_SET (VOIDmode, addr,
+			  gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
+
+  if (Pmode != SImode)
+    addr1 = gen_lowpart (SImode, addr1);
+  emit_insn (gen_rtx_SET (VOIDmode, off,
+			  gen_rtx_AND (SImode, addr1, GEN_INT (3))));
+
+  memsi = gen_rtx_MEM (SImode, addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  val = force_reg (SImode, memsi);
+
+  emit_insn (gen_rtx_SET (VOIDmode, off,
+			  gen_rtx_XOR (SImode, off,
+				       GEN_INT (GET_MODE (mem) == QImode
+						? 3 : 2))));
+
+  emit_insn (gen_rtx_SET (VOIDmode, off,
+			  gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
+
+  if (GET_MODE (mem) == QImode)
+    mask = force_reg (SImode, GEN_INT (0xff));
+  else
+    mask = force_reg (SImode, GEN_INT (0xffff));
+
+  emit_insn (gen_rtx_SET (VOIDmode, mask,
+			  gen_rtx_ASHIFT (SImode, mask, off)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, val,
+			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+				       val)));
+
+  oldval = gen_lowpart (SImode, oldval);
+  emit_insn (gen_rtx_SET (VOIDmode, oldv,
+			  gen_rtx_ASHIFT (SImode, oldval, off)));
+
+  newval = gen_lowpart_common (SImode, newval);
+  emit_insn (gen_rtx_SET (VOIDmode, newv,
+			  gen_rtx_ASHIFT (SImode, newval, off)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, oldv,
+			  gen_rtx_AND (SImode, oldv, mask)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, newv,
+			  gen_rtx_AND (SImode, newv, mask)));
+
+  end_label = gen_label_rtx ();
+  loop_label = gen_label_rtx ();
+  emit_label (loop_label);
+
+  emit_insn (gen_rtx_SET (VOIDmode, oldvalue,
+			  gen_rtx_IOR (SImode, oldv, val)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, newvalue,
+			  gen_rtx_IOR (SImode, newv, val)));
+
+  emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
+
+  emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
+
+  emit_insn (gen_rtx_SET (VOIDmode, resv,
+			  gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
+				       res)));
+
+  cc = gen_compare_reg_1 (NE, resv, val);
+  emit_insn (gen_rtx_SET (VOIDmode, val, resv));
+
+  /* Use cbranchcc4 to separate the compare and branch!  */
+  emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
+				  cc, const0_rtx, loop_label));
+
+  emit_label (end_label);
+
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_AND (SImode, res, mask)));
+
+  emit_insn (gen_rtx_SET (VOIDmode, res,
+			  gen_rtx_LSHIFTRT (SImode, res, off)));
+
+  emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+bool
+sparc_frame_pointer_required (void)
+{
+  return !(current_function_is_leaf && only_leaf_regs_used ());
+}
+
+/* The way this is structured, we can't eliminate SFP in favor of SP
+   if the frame pointer is required: we want to use the SFP->HFP elimination
+   in that case.  But the test in update_eliminables doesn't know we are
+   assuming below that we only do the former elimination.  */
+
+bool
+sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == HARD_FRAME_POINTER_REGNUM
+          || !targetm.frame_pointer_required ());
+}
+
+/* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
+   they won't be allocated.  */
+
+static void
+sparc_conditional_register_usage (void)
+{
+  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
+  /* then honor it.  */
+  if (TARGET_ARCH32 && fixed_regs[5])
+    fixed_regs[5] = 1;
+  else if (TARGET_ARCH64 && fixed_regs[5] == 2)
+    fixed_regs[5] = 0;
+  if (! TARGET_V9)
+    {
+      int regno;
+      for (regno = SPARC_FIRST_V9_FP_REG;
+	   regno <= SPARC_LAST_V9_FP_REG;
+	   regno++)
+	fixed_regs[regno] = 1;
+      /* %fcc0 is used by v8 and v9.  */
+      for (regno = SPARC_FIRST_V9_FCC_REG + 1;
+	   regno <= SPARC_LAST_V9_FCC_REG;
+	   regno++)
+	fixed_regs[regno] = 1;
+    }
+  if (! TARGET_FPU)
+    {
+      int regno;
+      for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
+	fixed_regs[regno] = 1;
+    }
+  /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
+  /* then honor it.  Likewise with g3 and g4.  */
+  if (fixed_regs[2] == 2)
+    fixed_regs[2] = ! TARGET_APP_REGS;
+  if (fixed_regs[3] == 2)
+    fixed_regs[3] = ! TARGET_APP_REGS;
+  if (TARGET_ARCH32 && fixed_regs[4] == 2)
+    fixed_regs[4] = ! TARGET_APP_REGS;
+  else if (TARGET_CM_EMBMEDANY)
+    fixed_regs[4] = 1;
+  else if (fixed_regs[4] == 2)
+    fixed_regs[4] = 0;
+}
+
+#include "gt-sparc.h"
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
new file mode 100644
index 000000000..31e6d123b
--- /dev/null
+++ b/gcc/config/sparc/sparc.h
@@ -0,0 +1,2122 @@
+/* Definitions of target machine for GNU compiler, for Sun SPARC.
+   Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997, 1998, 1999
+   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com).
+   64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+   at Cygnus Support.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config/vxworks-dummy.h"
+
+/* Note that some other tm.h files include this one and then override
+   whatever definitions are necessary.  */
+
+/* Define the specific costs for a given cpu */
+
+struct processor_costs {
+  /* Integer load */
+  const int int_load;
+
+  /* Integer signed load */
+  const int int_sload;
+
+  /* Integer zeroed load */
+  const int int_zload;
+
+  /* Float load */
+  const int float_load;
+
+  /* fmov, fneg, fabs */
+  const int float_move;
+
+  /* fadd, fsub */
+  const int float_plusminus;
+
+  /* fcmp */
+  const int float_cmp;
+
+  /* fmov, fmovr */
+  const int float_cmove;
+
+  /* fmul */
+  const int float_mul;
+
+  /* fdivs */
+  const int float_div_sf;
+
+  /* fdivd */
+  const int float_div_df;
+
+  /* fsqrts */
+  const int float_sqrt_sf;
+
+  /* fsqrtd */
+  const int float_sqrt_df;
+
+  /* umul/smul */
+  const int int_mul;
+
+  /* mulX */
+  const int int_mulX;
+
+  /* integer multiply cost for each bit set past the most
+     significant 3, so the formula for multiply cost becomes:
+
+	if (rs1 < 0)
+	  highest_bit = highest_clear_bit(rs1);
+	else
+	  highest_bit = highest_set_bit(rs1);
+	if (highest_bit < 3)
+	  highest_bit = 3;
+	cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
+
+     A value of zero indicates that the multiply costs is fixed,
+     and not variable.  */
+  const int int_mul_bit_factor;
+
+  /* udiv/sdiv */
+  const int int_div;
+
+  /* divX */
+  const int int_divX;
+
+  /* movcc, movr */
+  const int int_cmove;
+
+  /* penalty for shifts, due to scheduling rules etc. */
+  const int shift_penalty;
+};
+
+extern const struct processor_costs *sparc_costs;
+
+/* Target CPU builtins.  FIXME: Defining sparc is for the benefit of
+   Solaris only; otherwise just define __sparc__.  Sadly the headers
+   are such a mess there is no Solaris-specific header.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+	builtin_define_std ("sparc");		\
+	if (TARGET_64BIT)			\
+	  { 					\
+	    builtin_assert ("cpu=sparc64");	\
+	    builtin_assert ("machine=sparc64");	\
+	  }					\
+	else					\
+	  { 					\
+	    builtin_assert ("cpu=sparc");	\
+	    builtin_assert ("machine=sparc");	\
+	  }					\
+    }						\
+  while (0)
+
+/* Specify this in a cover file to provide bi-architecture (32/64) support.  */
+/* #define SPARC_BI_ARCH */
+
+/* Macro used later in this file to determine default architecture.  */
+#define DEFAULT_ARCH32_P ((TARGET_DEFAULT & MASK_64BIT) == 0)
+
+/* TARGET_ARCH{32,64} are the main macros to decide which of the two
+   architectures to compile for.  We allow targets to choose compile time or
+   runtime selection.  */
+#ifdef IN_LIBGCC2
+#if defined(__sparcv9) || defined(__arch64__)
+#define TARGET_ARCH32 0
+#else
+#define TARGET_ARCH32 1
+#endif /* sparc64 */
+#else
+#ifdef SPARC_BI_ARCH
+#define TARGET_ARCH32 (! TARGET_64BIT)
+#else
+#define TARGET_ARCH32 (DEFAULT_ARCH32_P)
+#endif /* SPARC_BI_ARCH */
+#endif /* IN_LIBGCC2 */
+#define TARGET_ARCH64 (! TARGET_ARCH32)
+
+/* Code model selection in 64-bit environment.
+
+   The machine mode used for addresses is 32-bit wide:
+
+   TARGET_CM_32:     32-bit address space.
+                     It is the code model used when generating 32-bit code.
+
+   The machine mode used for addresses is 64-bit wide:
+
+   TARGET_CM_MEDLOW: 32-bit address space.
+                     The executable must be in the low 32 bits of memory.
+                     This avoids generating %uhi and %ulo terms.  Programs
+                     can be statically or dynamically linked.
+
+   TARGET_CM_MEDMID: 44-bit address space.
+                     The executable must be in the low 44 bits of memory,
+                     and the %[hml]44 terms are used.  The text and data
+                     segments have a maximum size of 2GB (31-bit span).
+                     The maximum offset from any instruction to the label
+                     _GLOBAL_OFFSET_TABLE_ is 2GB (31-bit span).
+
+   TARGET_CM_MEDANY: 64-bit address space.
+                     The text and data segments have a maximum size of 2GB
+                     (31-bit span) and may be located anywhere in memory.
+                     The maximum offset from any instruction to the label
+                     _GLOBAL_OFFSET_TABLE_ is 2GB (31-bit span).
+
+   TARGET_CM_EMBMEDANY: 64-bit address space.
+                     The text and data segments have a maximum size of 2GB
+                     (31-bit span) and may be located anywhere in memory.
+                     The global register %g4 contains the start address of
+                     the data segment.  Programs are statically linked and
+                     PIC is not supported.
+
+   Different code models are not supported in 32-bit environment.  */
+
+enum cmodel {
+  CM_32,
+  CM_MEDLOW,
+  CM_MEDMID,
+  CM_MEDANY,
+  CM_EMBMEDANY
+};
+
+/* One of CM_FOO.  */
+extern enum cmodel sparc_cmodel;
+
+/* V9 code model selection.  */
+#define TARGET_CM_MEDLOW    (sparc_cmodel == CM_MEDLOW)
+#define TARGET_CM_MEDMID    (sparc_cmodel == CM_MEDMID)
+#define TARGET_CM_MEDANY    (sparc_cmodel == CM_MEDANY)
+#define TARGET_CM_EMBMEDANY (sparc_cmodel == CM_EMBMEDANY)
+
+#define SPARC_DEFAULT_CMODEL CM_32
+
+/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO)
+   which requires the following macro to be true if enabled.  Prior to V9,
+   there are no instructions to even talk about memory synchronization.
+   Note that the UltraSPARC III processors don't implement RMO, unlike the
+   UltraSPARC II processors.  Niagara and Niagara-2 do not implement RMO
+   either.
+
+   Default to false; for example, Solaris never enables RMO, only ever uses
+   total memory ordering (TMO).  */
+#define SPARC_RELAXED_ORDERING false
+
+/* Do not use the .note.GNU-stack convention by default.  */
+#define NEED_INDICATE_EXEC_STACK 0
+
+/* This is call-clobbered in the normal ABI, but is reserved in the
+   home grown (aka upward compatible) embedded ABI.  */
+#define EMBMEDANY_BASE_REG "%g4"
+
+/* Values of TARGET_CPU_DEFAULT, set via -D in the Makefile,
+   and specified by the user via --with-cpu=foo.
+   This specifies the cpu implementation, not the architecture size.  */
+/* Note that TARGET_CPU_v9 is assumed to start the list of 64-bit
+   capable cpu's.  */
+#define TARGET_CPU_sparc	0
+#define TARGET_CPU_v7		0	/* alias */
+#define TARGET_CPU_cypress	0       /* alias */
+#define TARGET_CPU_v8		1	/* generic v8 implementation */
+#define TARGET_CPU_supersparc	2
+#define TARGET_CPU_hypersparc	3
+#define TARGET_CPU_leon		4
+#define TARGET_CPU_sparclite	5
+#define TARGET_CPU_f930		5       /* alias */
+#define TARGET_CPU_f934		5       /* alias */
+#define TARGET_CPU_sparclite86x	6
+#define TARGET_CPU_sparclet	7
+#define TARGET_CPU_tsc701	7       /* alias */
+#define TARGET_CPU_v9		8	/* generic v9 implementation */
+#define TARGET_CPU_sparcv9	8	/* alias */
+#define TARGET_CPU_sparc64	8	/* alias */
+#define TARGET_CPU_ultrasparc	9
+#define TARGET_CPU_ultrasparc3	10
+#define TARGET_CPU_niagara	11
+#define TARGET_CPU_niagara2	12
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+
+#define CPP_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC ""
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v9
+/* ??? What does Sun's CC pass?  */
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+/* ??? It's not clear how other assemblers will handle this, so by default
+   use GAS.  Sun's Solaris assembler recognizes -xarch=v8plus, but this case
+   is handled in sol2.h.  */
+#define ASM_CPU64_DEFAULT_SPEC "-Av9"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara2
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
+
+#else
+
+#define CPP_CPU64_DEFAULT_SPEC ""
+#define ASM_CPU64_DEFAULT_SPEC ""
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_v8
+#define CPP_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclet
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclet__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclet"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclite"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_sparclite86x
+#define CPP_CPU32_DEFAULT_SPEC "-D__sparclite86x__"
+#define ASM_CPU32_DEFAULT_SPEC "-Asparclite"
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_supersparc
+#define CPP_CPU32_DEFAULT_SPEC "-D__supersparc__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_hypersparc
+#define CPP_CPU32_DEFAULT_SPEC "-D__hypersparc__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_leon
+#define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__"
+#define ASM_CPU32_DEFAULT_SPEC ""
+#endif
+
+#endif
+
+#if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC)
+ #error Unrecognized value in TARGET_CPU_DEFAULT.
+#endif
+
+#ifdef SPARC_BI_ARCH
+
+#define CPP_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" CPP_CPU64_DEFAULT_SPEC "} \
+%{!m64:" CPP_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" CPP_CPU32_DEFAULT_SPEC "} \
+%{!m32:" CPP_CPU64_DEFAULT_SPEC "} \
+")
+#define ASM_CPU_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? "\
+%{m64:" ASM_CPU64_DEFAULT_SPEC "} \
+%{!m64:" ASM_CPU32_DEFAULT_SPEC "} \
+" : "\
+%{m32:" ASM_CPU32_DEFAULT_SPEC "} \
+%{!m32:" ASM_CPU64_DEFAULT_SPEC "} \
+")
+
+#else /* !SPARC_BI_ARCH */
+
+#define CPP_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? CPP_CPU32_DEFAULT_SPEC : CPP_CPU64_DEFAULT_SPEC)
+#define ASM_CPU_DEFAULT_SPEC (DEFAULT_ARCH32_P ? ASM_CPU32_DEFAULT_SPEC : ASM_CPU64_DEFAULT_SPEC)
+
+#endif /* !SPARC_BI_ARCH */
+
+/* Define macros to distinguish architectures.  */
+
+/* Common CPP definitions used by CPP_SPEC amongst the various targets
+   for handling -mcpu=xxx switches.  */
+#define CPP_CPU_SPEC "\
+%{msoft-float:-D_SOFT_FLOAT} \
+%{mcpu=sparclet:-D__sparclet__} %{mcpu=tsc701:-D__sparclet__} \
+%{mcpu=sparclite:-D__sparclite__} \
+%{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
+%{mcpu=sparclite86x:-D__sparclite86x__} \
+%{mcpu=v8:-D__sparc_v8__} \
+%{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
+%{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \
+%{mcpu=leon:-D__leon__ -D__sparc_v8__} \
+%{mcpu=v9:-D__sparc_v9__} \
+%{mcpu=ultrasparc:-D__sparc_v9__} \
+%{mcpu=ultrasparc3:-D__sparc_v9__} \
+%{mcpu=niagara:-D__sparc_v9__} \
+%{mcpu=niagara2:-D__sparc_v9__} \
+%{!mcpu*:%(cpp_cpu_default)} \
+"
+#define CPP_ARCH32_SPEC ""
+#define CPP_ARCH64_SPEC "-D__arch64__"
+
+#define CPP_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? CPP_ARCH32_SPEC : CPP_ARCH64_SPEC)
+
+#define CPP_ARCH_SPEC "\
+%{m32:%(cpp_arch32)} \
+%{m64:%(cpp_arch64)} \
+%{!m32:%{!m64:%(cpp_arch_default)}} \
+"
+
+/* Macro to distinguish endianness.  */
+#define CPP_ENDIAN_SPEC "\
+%{mlittle-endian:-D__LITTLE_ENDIAN__}"
+
+/* Macros to distinguish the particular subtarget.  */
+#define CPP_SUBTARGET_SPEC ""
+
+#define CPP_SPEC "%(cpp_cpu) %(cpp_arch) %(cpp_endian) %(cpp_subtarget)"
+
+/* This used to translate -dalign to -malign, but that is no good
+   because it can't turn off the usual meaning of making debugging dumps.  */
+
+#define CC1_SPEC ""
+
+/* Override in target specific files.  */
+#define ASM_CPU_SPEC "\
+%{mcpu=sparclet:-Asparclet} %{mcpu=tsc701:-Asparclet} \
+%{mcpu=sparclite:-Asparclite} \
+%{mcpu=sparclite86x:-Asparclite} \
+%{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
+%{mcpu=v8:-Av8} \
+%{mcpu=supersparc:-Av8} \
+%{mcpu=hypersparc:-Av8} \
+%{mcpu=leon:-Av8} \
+%{mv8plus:-Av8plus} \
+%{mcpu=v9:-Av9} \
+%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
+%{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \
+%{mcpu=niagara:%{!mv8plus:-Av9b}} \
+%{mcpu=niagara2:%{!mv8plus:-Av9b}} \
+%{!mcpu*:%(asm_cpu_default)} \
+"
+
+/* Word size selection, among other things.
+   This is what GAS uses.  Add %(asm_arch) to ASM_SPEC to enable.  */
+
+#define ASM_ARCH32_SPEC "-32"
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+#define ASM_ARCH64_SPEC "-64 -no-undeclared-regs"
+#else
+#define ASM_ARCH64_SPEC "-64"
+#endif
+#define ASM_ARCH_DEFAULT_SPEC \
+(DEFAULT_ARCH32_P ? ASM_ARCH32_SPEC : ASM_ARCH64_SPEC)
+
+#define ASM_ARCH_SPEC "\
+%{m32:%(asm_arch32)} \
+%{m64:%(asm_arch64)} \
+%{!m32:%{!m64:%(asm_arch_default)}} \
+"
+
+#ifdef HAVE_AS_RELAX_OPTION
+#define ASM_RELAX_SPEC "%{!mno-relax:-relax}"
+#else
+#define ASM_RELAX_SPEC ""
+#endif
+
+/* Special flags to the Sun-4 assembler when using pipe for input.  */
+
+#define ASM_SPEC "\
+%{!pg:%{!p:%{fpic|fPIC|fpie|fPIE:-k}}} %{keep-local-as-symbols:-L} \
+%(asm_cpu) %(asm_relax)"
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define EXTRA_SPECS \
+  { "cpp_cpu",		CPP_CPU_SPEC },		\
+  { "cpp_cpu_default",	CPP_CPU_DEFAULT_SPEC },	\
+  { "cpp_arch32",	CPP_ARCH32_SPEC },	\
+  { "cpp_arch64",	CPP_ARCH64_SPEC },	\
+  { "cpp_arch_default",	CPP_ARCH_DEFAULT_SPEC },\
+  { "cpp_arch",		CPP_ARCH_SPEC },	\
+  { "cpp_endian",	CPP_ENDIAN_SPEC },	\
+  { "cpp_subtarget",	CPP_SUBTARGET_SPEC },	\
+  { "asm_cpu",		ASM_CPU_SPEC },		\
+  { "asm_cpu_default",	ASM_CPU_DEFAULT_SPEC },	\
+  { "asm_arch32",	ASM_ARCH32_SPEC },	\
+  { "asm_arch64",	ASM_ARCH64_SPEC },	\
+  { "asm_relax",	ASM_RELAX_SPEC },	\
+  { "asm_arch_default",	ASM_ARCH_DEFAULT_SPEC },\
+  { "asm_arch",		ASM_ARCH_SPEC },	\
+  SUBTARGET_EXTRA_SPECS
+
+#define SUBTARGET_EXTRA_SPECS
+
+/* Because libgcc can generate references back to libc (via .umul etc.) we have
+   to list libc again after the second libgcc.  */
+#define LINK_GCC_C_SEQUENCE_SPEC "%G %L %G %L"
+
+
+#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int")
+#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int")
+
+/* ??? This should be 32 bits for v9 but what can we do?  */
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+/* Mask of all CPU selection flags.  */
+#define MASK_ISA \
+(MASK_V8 + MASK_SPARCLITE + MASK_SPARCLET + MASK_V9 + MASK_DEPRECATED_V8_INSNS)
+
+/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y.
+   TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y
+   to get high 32 bits.  False in V8+ or V9 because multiply stores
+   a 64-bit result in a register.  */
+
+#define TARGET_HARD_MUL32				\
+  ((TARGET_V8 || TARGET_SPARCLITE			\
+    || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS)	\
+   && ! TARGET_V8PLUS && TARGET_ARCH32)
+
+#define TARGET_HARD_MUL					\
+  (TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET	\
+   || TARGET_DEPRECATED_V8_INSNS || TARGET_V8PLUS)
+
+/* MASK_APP_REGS must always be the default because that's what
+   FIXED_REGISTERS is set to and -ffixed- is processed before
+   TARGET_CONDITIONAL_REGISTER_USAGE is called (where we process
+   -mno-app-regs).  */
+#define TARGET_DEFAULT (MASK_APP_REGS + MASK_FPU)
+
+/* Processor type.
+   These must match the values for the cpu attribute in sparc.md.  */
+enum processor_type {
+  PROCESSOR_V7,
+  PROCESSOR_CYPRESS,
+  PROCESSOR_V8,
+  PROCESSOR_SUPERSPARC,
+  PROCESSOR_HYPERSPARC,
+  PROCESSOR_LEON,
+  PROCESSOR_SPARCLITE,
+  PROCESSOR_F930,
+  PROCESSOR_F934,
+  PROCESSOR_SPARCLITE86X,
+  PROCESSOR_SPARCLET,
+  PROCESSOR_TSC701,
+  PROCESSOR_V9,
+  PROCESSOR_ULTRASPARC,
+  PROCESSOR_ULTRASPARC3,
+  PROCESSOR_NIAGARA,
+  PROCESSOR_NIAGARA2
+};
+
+/* This is set from -m{cpu,tune}=xxx.  */
+extern enum processor_type sparc_cpu;
+
+/* Recast the cpu class to be the cpu attribute.
+   Every file includes us, but not every file includes insn-attr.h.  */
+#define sparc_cpu_attr ((enum attr_cpu) sparc_cpu)
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.
+   --with-float is ignored if -mhard-float, -msoft-float, -mfpu, or -mno-fpu
+     are specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
+  {"float", "%{!msoft-float:%{!mhard-float:%{!mfpu:%{!mno-fpu:-m%(VALUE)-float}}}}" }
+
+/* sparc_select[0] is reserved for the default cpu.  */
+struct sparc_cpu_select
+{
+  const char *string;
+  const char *const name;
+  const int set_tune_p;
+  const int set_arch_p;
+};
+
+extern struct sparc_cpu_select sparc_select[];
+
+/* target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN 1
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN 1
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN 1
+
+#define MAX_BITS_PER_WORD	64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		(TARGET_ARCH64 ? 8 : 4)
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD	UNITS_PER_WORD
+#else
+#define MIN_UNITS_PER_WORD	4
+#endif
+
+/* Now define the sizes of the C data types.  */
+
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		32
+#define LONG_TYPE_SIZE		(TARGET_ARCH64 ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+
+/* LONG_DOUBLE_TYPE_SIZE is defined per OS even though the
+   SPARC ABI says that it is 128-bit wide.  */
+/* #define LONG_DOUBLE_TYPE_SIZE	128 */
+
+/* The widest floating-point format really supported by the hardware.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+/* Width in bits of a pointer.  This is the size of ptr_mode.  */
+#define POINTER_SIZE (TARGET_PTR64 ? 64 : 32)
+
+/* This is the machine mode used for addresses.  */
+#define Pmode (TARGET_ARCH64 ? DImode : SImode)
+
+/* If we have to extend pointers (only when TARGET_ARCH64 and not
+   TARGET_PTR64), we want to do it unsigned.   This macro does nothing
+   if ptr_mode and Pmode are the same.  */
+#define POINTERS_EXTEND_UNSIGNED 1
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY (TARGET_ARCH64 ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+/* FIXME, this is wrong when TARGET_ARCH64 and TARGET_STACK_BIAS, because
+   then %sp+2047 is 128-bit aligned so %sp is really only byte-aligned.  */
+#define STACK_BOUNDARY (TARGET_ARCH64 ? 128 : 64)
+/* Temporary hack until the FIXME above is fixed.  */
+#define SPARC_STACK_BOUNDARY_HACK (TARGET_ARCH64 && TARGET_STACK_BIAS)
+
+/* ALIGN FRAMES on double word boundaries */
+
+#define SPARC_STACK_ALIGN(LOC) \
+  (TARGET_ARCH64 ? (((LOC)+15) & ~15) : (((LOC)+7) & ~7))
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY (TARGET_ARCH64 ? 64 : 32)
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT (TARGET_ARCH64 ? 128 : 64)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT 64
+
+/* Define this macro as an expression for the alignment of a structure
+   (given by STRUCT as a tree node) if the alignment computed in the
+   usual way is COMPUTED and the alignment explicitly specified was
+   SPECIFIED.
+
+   The default is to use SPECIFIED if it is larger; otherwise, use
+   the smaller of COMPUTED and `BIGGEST_ALIGNMENT' */
+#define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED)	\
+ (TARGET_FASTER_STRUCTS ?				\
+  ((TREE_CODE (STRUCT) == RECORD_TYPE			\
+    || TREE_CODE (STRUCT) == UNION_TYPE                 \
+    || TREE_CODE (STRUCT) == QUAL_UNION_TYPE)           \
+   && TYPE_FIELDS (STRUCT) != 0                         \
+     ? MAX (MAX ((COMPUTED), (SPECIFIED)), BIGGEST_ALIGNMENT) \
+     : MAX ((COMPUTED), (SPECIFIED)))			\
+   :  MAX ((COMPUTED), (SPECIFIED)))
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+   ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Make local arrays of chars word-aligned for the same reasons.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Things that must be doubleword aligned cannot go in the text section,
+   because the linker fails to align the text section enough!
+   Put them in the data section.  This macro is only used in this file.  */
+#define MAX_TEXT_ALIGN 32
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   SPARC has 32 integer registers and 32 floating point registers.
+   64-bit SPARC has 32 additional fp regs, but the odd numbered ones are not
+   accessible.  We still account for them to simplify register computations
+   (e.g.: in CLASS_MAX_NREGS).  There are also 4 fp condition code registers, so
+   32+32+32+4 == 100.
+   Register 100 is used as the integer condition code register.
+   Register 101 is used as the soft frame pointer register.  */
+
+#define FIRST_PSEUDO_REGISTER 102
+
+#define SPARC_FIRST_FP_REG     32
+/* Additional V9 fp regs.  */
+#define SPARC_FIRST_V9_FP_REG  64
+#define SPARC_LAST_V9_FP_REG   95
+/* V9 %fcc[0123].  V8 uses (figuratively) %fcc0.  */
+#define SPARC_FIRST_V9_FCC_REG 96
+#define SPARC_LAST_V9_FCC_REG  99
+/* V8 fcc reg.  */
+#define SPARC_FCC_REG 96
+/* Integer CC reg.  We don't distinguish %icc from %xcc.  */
+#define SPARC_ICC_REG 100
+
+/* Nonzero if REGNO is an fp reg.  */
+#define SPARC_FP_REG_P(REGNO) \
+((REGNO) >= SPARC_FIRST_FP_REG && (REGNO) <= SPARC_LAST_V9_FP_REG)
+
+/* Argument passing regs.  */
+#define SPARC_OUTGOING_INT_ARG_FIRST 8
+#define SPARC_INCOMING_INT_ARG_FIRST 24
+#define SPARC_FP_ARG_FIRST           32
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   On non-v9 systems:
+   g1 is free to use as temporary.
+   g2-g4 are reserved for applications.  Gcc normally uses them as
+   temporaries, but this can be disabled via the -mno-app-regs option.
+   g5 through g7 are reserved for the operating system.
+
+   On v9 systems:
+   g1,g5 are free to use as temporaries, and are free to use between calls
+   if the call is to an external function via the PLT.
+   g4 is free to use as a temporary in the non-embedded case.
+   g4 is reserved in the embedded case.
+   g2-g3 are reserved for applications.  Gcc normally uses them as
+   temporaries, but this can be disabled via the -mno-app-regs option.
+   g6-g7 are reserved for the operating system (or application in
+   embedded case).
+   ??? Register 1 is used as a temporary by the 64 bit sethi pattern, so must
+   currently be a fixed register until this pattern is rewritten.
+   Register 1 is also used when restoring call-preserved registers in large
+   stack frames.
+
+   Registers fixed in arch32 and not arch64 (or vice-versa) are marked in
+   TARGET_CONDITIONAL_REGISTER_USAGE in order to properly handle -ffixed-.
+*/
+
+#define FIXED_REGISTERS  \
+ {1, 0, 2, 2, 2, 2, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 1, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 1, 1,	\
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+				\
+  0, 0, 0, 0, 0, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+
+#define CALL_USED_REGISTERS  \
+ {1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  0, 0, 0, 0, 0, 0, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 1, 1, 1,	\
+				\
+  1, 1, 1, 1, 1, 1}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On SPARC, ordinary registers hold 32 bits worth;
+   this means both integer and floating point registers.
+   On v9, integer regs hold 64 bits worth; floating point regs hold
+   32 bits worth (this includes the new fp regs as even the odd ones are
+   included in the hard register count).  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+  (TARGET_ARCH64							\
+   ? ((REGNO) < 32 || (REGNO) == FRAME_POINTER_REGNUM			\
+      ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD	\
+      : (GET_MODE_SIZE (MODE) + 3) / 4)					\
+   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Due to the ARCH64 discrepancy above we must override this next
+   macro too.  */
+#define REGMODE_NATURAL_SIZE(MODE) \
+  ((TARGET_ARCH64 && FLOAT_MODE_P (MODE)) ? 4 : UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   See sparc.c for how we initialize this.  */
+extern const int *hard_regno_mode_classes;
+extern int sparc_mode_class[];
+
+/* ??? Because of the funny way we pass parameters we should allow certain
+   ??? types of float/complex values to be in integer registers during
+   ??? RTL generation.  This only matters on arch32.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+  ((hard_regno_mode_classes[REGNO] & sparc_mode_class[MODE]) != 0)
+
+/* Value is 1 if it is OK to rename a hard register FROM to another hard
+   register TO.  We cannot rename %g1 as it may be used before the save
+   register window instruction in the prologue.  */
+#define HARD_REGNO_RENAME_OK(FROM, TO) ((FROM) != 1)
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.
+
+   For V9: SFmode can't be combined with other float modes, because they can't
+   be allocated to the %d registers.  Also, DFmode won't fit in odd %f
+   registers, but SFmode will.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2)						\
+   || (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)		\
+       && (! TARGET_V9						\
+	   || (GET_MODE_CLASS (MODE1) != MODE_FLOAT		\
+	       || (MODE1 != SFmode && MODE2 != SFmode)))))
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 14
+
+/* The stack bias (amount by which the hardware register is offset by).  */
+#define SPARC_STACK_BIAS ((TARGET_ARCH64 && TARGET_STACK_BIAS) ? 2047 : 0)
+
+/* Actual top-of-stack address is 92/176 greater than the contents of the
+   stack pointer register for !v9/v9.  That is:
+   - !v9: 64 bytes for the in and local registers, 4 bytes for structure return
+     address, and 6*4 bytes for the 6 register parameters.
+   - v9: 128 bytes for the in and local registers + 6*8 bytes for the integer
+     parameter regs.  */
+#define STACK_POINTER_OFFSET (FIRST_PARM_OFFSET(0) + SPARC_STACK_BIAS)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 30
+
+/* The soft frame pointer does not have the stack bias applied.  */
+#define FRAME_POINTER_REGNUM 101
+
+/* Given the stack bias, the stack pointer isn't actually aligned.  */
+#define INIT_EXPANDERS							 \
+  do {									 \
+    if (crtl->emit.regno_pointer_align && SPARC_STACK_BIAS)	 \
+      {									 \
+	REGNO_POINTER_ALIGN (STACK_POINTER_REGNUM) = BITS_PER_UNIT;	 \
+	REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = BITS_PER_UNIT; \
+      }									 \
+  } while (0)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM
+
+/* Register in which static-chain is passed to a function.  This must
+   not be a register used by the prologue.  */
+#define STATIC_CHAIN_REGNUM (TARGET_ARCH64 ? 5 : 2)
+
+/* Register which holds the global offset table, if any.  */
+
+#define GLOBAL_OFFSET_TABLE_REGNUM 23
+
+/* Register which holds offset table for position-independent
+   data references.  */
+
+#define PIC_OFFSET_TABLE_REGNUM \
+  (flag_pic ? GLOBAL_OFFSET_TABLE_REGNUM : INVALID_REGNUM)
+
+/* Pick a default value we can notice from override_options:
+   !v9: Default is on.
+   v9: Default is off.
+   Originally it was -1, but later on the container of options changed to
+   unsigned byte, so we decided to pick 127 as default value, which does
+   reflect an undefined default value in case of 0/1.  */
+
+#define DEFAULT_PCC_STRUCT_RETURN 127
+
+/* Functions which return large structures get the address
+   to place the wanted value at offset 64 from the frame.
+   Must reserve 64 bytes for the in and local registers.
+   v9: Functions which return large structures get the address to place the
+   wanted value from an invisible first argument.  */
+#define STRUCT_VALUE_OFFSET 64
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The SPARC has various kinds of registers: general, floating point,
+   and condition codes [well, it has others as well, but none that we
+   care directly about].
+
+   For v9 we must distinguish between the upper and lower floating point
+   registers because the upper ones can't hold SFmode values.
+   HARD_REGNO_MODE_OK won't help here because reload assumes that register(s)
+   satisfying a group need for a class will also satisfy a single need for
+   that class.  EXTRA_FP_REGS is a bit of a misnomer as it covers all 64 fp
+   regs.
+
+   It is important that one class contains all the general and all the standard
+   fp regs.  Otherwise find_reg() won't properly allocate int regs for moves,
+   because reg_class_record() will bias the selection in favor of fp regs,
+   because reg_class_subunion[GENERAL_REGS][FP_REGS] will yield FP_REGS,
+   because FP_REGS > GENERAL_REGS.
+
+   It is also important that one class contain all the general and all
+   the fp regs.  Otherwise when spilling a DFmode reg, it may be from
+   EXTRA_FP_REGS but find_reloads() may use class
+   GENERAL_OR_FP_REGS. This will cause allocate_reload_reg() to die
+   because the compiler thinks it doesn't have a spill reg when in
+   fact it does.
+
+   v9 also has 4 floating point condition code registers.  Since we don't
+   have a class that is the union of FPCC_REGS with either of the others,
+   it is important that it appear first.  Otherwise the compiler will die
+   trying to compile _fixunsdfsi because fix_truncdfsi2 won't match its
+   constraints.
+
+   It is important that SPARC_ICC_REG have class NO_REGS.  Otherwise combine
+   may try to use it to hold an SImode value.  See register_operand.
+   ??? Should %fcc[0123] be handled similarly?
+*/
+
+enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS,
+		 EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
+		 ALL_REGS, LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+  { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS",	\
+     "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS",	\
+     "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS				\
+  {{0, 0, 0, 0},	/* NO_REGS */			\
+   {0, 0, 0, 0xf},	/* FPCC_REGS */			\
+   {0xffff, 0, 0, 0},	/* I64_REGS */			\
+   {-1, 0, 0, 0x20},	/* GENERAL_REGS */		\
+   {0, -1, 0, 0},	/* FP_REGS */			\
+   {0, -1, -1, 0},	/* EXTRA_FP_REGS */		\
+   {-1, -1, 0, 0x20},	/* GENERAL_OR_FP_REGS */	\
+   {-1, -1, -1, 0x20},	/* GENERAL_OR_EXTRA_FP_REGS */	\
+   {-1, -1, -1, 0x3f}}	/* ALL_REGS */
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+extern enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
+
+#define REGNO_REG_CLASS(REGNO) sparc_regno_reg_class[(REGNO)]
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+
+#define IRA_COVER_CLASSES						     \
+{									     \
+  GENERAL_REGS, EXTRA_FP_REGS, FPCC_REGS, LIM_REG_CLASSES		     \
+}
+
+/* Defines invalid mode changes.  Borrowed from pa64-regs.h.
+
+   SImode loads to floating-point registers are not zero-extended.
+   The definition for LOAD_EXTEND_OP specifies that integer loads
+   narrower than BITS_PER_WORD will be zero-extended.  As a result,
+   we inhibit changes from SImode unless they are to a mode that is
+   identical in size.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (TARGET_ARCH64						\
+   && (FROM) == SImode						\
+   && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)		\
+   ? reg_classes_intersect_p (CLASS, FP_REGS) : 0)
+
+/* This is the order in which to allocate registers normally.
+
+   We put %f0-%f7 last among the float registers, so as to make it more
+   likely that a pseudo-register which dies in the float return register
+   area will get allocated to the float return register, thus saving a move
+   instruction at the end of the function.
+
+   Similarly for integer return value registers.
+
+   We know in this case that we will not end up with a leaf function.
+
+   The register allocator is given the global and out registers first
+   because these registers are call clobbered and thus less useful to
+   global register allocation.
+
+   Next we list the local and in registers.  They are not call clobbered
+   and thus very useful for global register allocation.  We list the input
+   registers before the locals so that it is more likely the incoming
+   arguments received in those registers can just stay there and not be
+   reloaded.  */
+
+#define REG_ALLOC_ORDER \
+{ 1, 2, 3, 4, 5, 6, 7,			/* %g1-%g7 */	\
+  13, 12, 11, 10, 9, 8, 		/* %o5-%o0 */	\
+  15,					/* %o7 */	\
+  16, 17, 18, 19, 20, 21, 22, 23,	/* %l0-%l7 */ 	\
+  29, 28, 27, 26, 25, 24, 31,		/* %i5-%i0,%i7 */\
+  40, 41, 42, 43, 44, 45, 46, 47,	/* %f8-%f15 */  \
+  48, 49, 50, 51, 52, 53, 54, 55,	/* %f16-%f23 */ \
+  56, 57, 58, 59, 60, 61, 62, 63,	/* %f24-%f31 */ \
+  64, 65, 66, 67, 68, 69, 70, 71,	/* %f32-%f39 */ \
+  72, 73, 74, 75, 76, 77, 78, 79,	/* %f40-%f47 */ \
+  80, 81, 82, 83, 84, 85, 86, 87,	/* %f48-%f55 */ \
+  88, 89, 90, 91, 92, 93, 94, 95,	/* %f56-%f63 */ \
+  39, 38, 37, 36, 35, 34, 33, 32,	/* %f7-%f0 */   \
+  96, 97, 98, 99,			/* %fcc0-3 */   \
+  100, 0, 14, 30, 101}			/* %icc, %g0, %o6, %i6, %sfp */
+
+/* This is the order in which to allocate registers for
+   leaf functions.  If all registers can fit in the global and
+   output registers, then we have the possibility of having a leaf
+   function.
+
+   The macro actually mentioned the input registers first,
+   because they get renumbered into the output registers once
+   we know really do have a leaf function.
+
+   To be more precise, this register allocation order is used
+   when %o7 is found to not be clobbered right before register
+   allocation.  Normally, the reason %o7 would be clobbered is
+   due to a call which could not be transformed into a sibling
+   call.
+
+   As a consequence, it is possible to use the leaf register
+   allocation order and not end up with a leaf function.  We will
+   not get suboptimal register allocation in that case because by
+   definition of being potentially leaf, there were no function
+   calls.  Therefore, allocation order within the local register
+   window is not critical like it is when we do have function calls.  */
+
+#define REG_LEAF_ALLOC_ORDER \
+{ 1, 2, 3, 4, 5, 6, 7, 			/* %g1-%g7 */	\
+  29, 28, 27, 26, 25, 24,		/* %i5-%i0 */	\
+  15,					/* %o7 */	\
+  13, 12, 11, 10, 9, 8,			/* %o5-%o0 */	\
+  16, 17, 18, 19, 20, 21, 22, 23,	/* %l0-%l7 */	\
+  40, 41, 42, 43, 44, 45, 46, 47,	/* %f8-%f15 */	\
+  48, 49, 50, 51, 52, 53, 54, 55,	/* %f16-%f23 */	\
+  56, 57, 58, 59, 60, 61, 62, 63,	/* %f24-%f31 */	\
+  64, 65, 66, 67, 68, 69, 70, 71,	/* %f32-%f39 */	\
+  72, 73, 74, 75, 76, 77, 78, 79,	/* %f40-%f47 */	\
+  80, 81, 82, 83, 84, 85, 86, 87,	/* %f48-%f55 */	\
+  88, 89, 90, 91, 92, 93, 94, 95,	/* %f56-%f63 */	\
+  39, 38, 37, 36, 35, 34, 33, 32,	/* %f7-%f0 */	\
+  96, 97, 98, 99,			/* %fcc0-3 */	\
+  100, 0, 14, 30, 31, 101}		/* %icc, %g0, %o6, %i6, %i7, %sfp */
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+extern char sparc_leaf_regs[];
+#define LEAF_REGISTERS sparc_leaf_regs
+
+extern char leaf_reg_remap[];
+#define LEAF_REG_REMAP(REGNO) (leaf_reg_remap[REGNO])
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS GENERAL_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Local macro to handle the two v9 classes of FP regs.  */
+#define FP_REG_CLASS_P(CLASS) ((CLASS) == FP_REGS || (CLASS) == EXTRA_FP_REGS)
+
+/* Predicates for 10-bit, 11-bit and 13-bit signed constants.  */
+#define SPARC_SIMM10_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x200 < 0x400)
+#define SPARC_SIMM11_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x400 < 0x800)
+#define SPARC_SIMM13_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x1000 < 0x2000)
+
+/* 10- and 11-bit immediates are only used for a few specific insns.
+   SMALL_INT is used throughout the port so we continue to use it.  */
+#define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X)))
+
+/* Predicate for constants that can be loaded with a sethi instruction.
+   This is the general, 64-bit aware, bitwise version that ensures that
+   only constants whose representation fits in the mask
+
+     0x00000000fffffc00
+
+   are accepted.  It will reject, for example, negative SImode constants
+   on 64-bit hosts, so correct handling is to mask the value beforehand
+   according to the mode of the instruction.  */
+#define SPARC_SETHI_P(X) \
+  (((unsigned HOST_WIDE_INT) (X) \
+    & ((unsigned HOST_WIDE_INT) 0x3ff - GET_MODE_MASK (SImode) - 1)) == 0)
+
+/* Version of the above predicate for SImode constants and below.  */
+#define SPARC_SETHI32_P(X) \
+  (SPARC_SETHI_P ((unsigned HOST_WIDE_INT) (X) & GET_MODE_MASK (SImode)))
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+/* - We can't load constants into FP registers.
+   - We can't load FP constants into integer registers when soft-float,
+     because there is no soft-float pattern with a r/F constraint.
+   - We can't load FP constants into integer registers for TFmode unless
+     it is 0.0L, because there is no movtf pattern with a r/F constraint.
+   - Try and reload integer constants (symbolic or otherwise) back into
+     registers directly, rather than having them dumped to memory.  */
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS)			\
+  (CONSTANT_P (X)					\
+   ? ((FP_REG_CLASS_P (CLASS)				\
+       || (CLASS) == GENERAL_OR_FP_REGS			\
+       || (CLASS) == GENERAL_OR_EXTRA_FP_REGS		\
+       || (GET_MODE_CLASS (GET_MODE (X)) == MODE_FLOAT	\
+	   && ! TARGET_FPU)				\
+       || (GET_MODE (X) == TFmode			\
+	   && ! const_zero_operand (X, TFmode)))	\
+      ? NO_REGS						\
+      : (!FP_REG_CLASS_P (CLASS)			\
+         && GET_MODE_CLASS (GET_MODE (X)) == MODE_INT)	\
+      ? GENERAL_REGS					\
+      : (CLASS))					\
+   : (CLASS))
+
+/* Return the register class of a scratch register needed to load IN into
+   a register of class CLASS in MODE.
+
+   We need a temporary when loading/storing a HImode/QImode value
+   between memory and the FPU registers.  This can happen when combine puts
+   a paradoxical subreg in a float/fix conversion insn.
+
+   We need a temporary when loading/storing a DFmode value between
+   unaligned memory and the upper FPU registers.  */
+
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, IN)		\
+  ((FP_REG_CLASS_P (CLASS)					\
+    && ((MODE) == HImode || (MODE) == QImode)			\
+    && (GET_CODE (IN) == MEM					\
+        || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)	\
+            && true_regnum (IN) == -1)))			\
+   ? GENERAL_REGS						\
+   : ((CLASS) == EXTRA_FP_REGS && (MODE) == DFmode		\
+      && GET_CODE (IN) == MEM && TARGET_ARCH32			\
+      && ! mem_min_alignment ((IN), 8))				\
+     ? FP_REGS							\
+     : (((TARGET_CM_MEDANY					\
+	  && symbolic_operand ((IN), (MODE)))			\
+	 || (TARGET_CM_EMBMEDANY				\
+	     && text_segment_operand ((IN), (MODE))))		\
+	&& !flag_pic)						\
+       ? GENERAL_REGS						\
+       : NO_REGS)
+
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, IN)		\
+  ((FP_REG_CLASS_P (CLASS)					\
+     && ((MODE) == HImode || (MODE) == QImode)			\
+     && (GET_CODE (IN) == MEM					\
+         || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)	\
+             && true_regnum (IN) == -1)))			\
+   ? GENERAL_REGS						\
+   : ((CLASS) == EXTRA_FP_REGS && (MODE) == DFmode		\
+      && GET_CODE (IN) == MEM && TARGET_ARCH32			\
+      && ! mem_min_alignment ((IN), 8))				\
+     ? FP_REGS							\
+     : (((TARGET_CM_MEDANY					\
+	  && symbolic_operand ((IN), (MODE)))			\
+	 || (TARGET_CM_EMBMEDANY				\
+	     && text_segment_operand ((IN), (MODE))))		\
+	&& !flag_pic)						\
+       ? GENERAL_REGS						\
+       : NO_REGS)
+
+/* On SPARC it is not possible to directly move data between
+   GENERAL_REGS and FP_REGS.  */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+  (FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2))
+
+/* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
+   because the movsi and movsf patterns don't handle r/f moves.
+   For v8 we copy the default definition.  */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
+  (TARGET_ARCH64						\
+   ? (GET_MODE_BITSIZE (MODE) < 32				\
+      ? mode_for_size (32, GET_MODE_CLASS (MODE), 0)		\
+      : MODE)							\
+   : (GET_MODE_BITSIZE (MODE) < BITS_PER_WORD			\
+      ? mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0)	\
+      : MODE))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+/* On SPARC, this is the size of MODE in words.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+  (FP_REG_CLASS_P (CLASS) ? (GET_MODE_SIZE (MODE) + 3) / 4 \
+   : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.
+   !v9: This is 64 for the ins and locals, plus 4 for the struct-return reg
+   even if this function isn't going to use it.
+   v9: This is 128 for the ins and locals.  */
+#define FIRST_PARM_OFFSET(FNDECL) \
+  (TARGET_ARCH64 ? 16 * UNITS_PER_WORD : STRUCT_VALUE_OFFSET + UNITS_PER_WORD)
+
+/* Offset from the argument pointer register value to the CFA.
+   This is different from FIRST_PARM_OFFSET because the register window
+   comes between the CFA and the arguments.  */
+#define ARG_POINTER_CFA_OFFSET(FNDECL)  0
+
+/* When a parameter is passed in a register, stack space is still
+   allocated for it.
+   !v9: All 6 possible integer registers have backing store allocated.
+   v9: Only space for the arguments passed is allocated.  */
+/* ??? Ideally, we'd use zero here (as the minimum), but zero has special
+   meaning to the backend.  Further, we need to be able to detect if a
+   varargs/unprototyped function is called, as they may want to spill more
+   registers than we've provided space.  Ugly, ugly.  So for now we retain
+   all 6 slots even for v9.  */
+#define REG_PARM_STACK_SPACE(DECL) (6 * UNITS_PER_WORD)
+
+/* Definitions for register elimination.  */
+
+#define ELIMINABLE_REGS \
+  {{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+   { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM} }
+
+/* We always pretend that this is a leaf function because if it's not,
+   there's no point in trying to eliminate the frame pointer.  If it
+   is a leaf function, we guessed right!  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) 			\
+  do {									\
+    if ((TO) == STACK_POINTER_REGNUM)					\
+      (OFFSET) = sparc_compute_frame_size (get_frame_size (), 1);	\
+    else								\
+      (OFFSET) = 0;							\
+    (OFFSET) += SPARC_STACK_BIAS;					\
+  } while (0)
+
+/* Keep the stack pointer constant throughout the function.
+   This is both an optimization and a necessity: longjmp
+   doesn't behave itself when the stack pointer moves within
+   the function!  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Define this macro if the target machine has "register windows".  This
+   C expression returns the register number as seen by the called function
+   corresponding to register number OUT as seen by the calling function.
+   Return OUT if register number OUT is not an outbound register.  */
+
+#define INCOMING_REGNO(OUT) \
+ (((OUT) < 8 || (OUT) > 15) ? (OUT) : (OUT) + 16)
+
+/* Define this macro if the target machine has "register windows".  This
+   C expression returns the register number as seen by the calling function
+   corresponding to register number IN as seen by the called function.
+   Return IN if register number IN is not an inbound register.  */
+
+#define OUTGOING_REGNO(IN) \
+ (((IN) < 24 || (IN) > 31) ? (IN) : (IN) - 16)
+
+/* Define this macro if the target machine has register windows.  This
+   C expression returns true if the register is call-saved but is in the
+   register window.  */
+
+#define LOCAL_REGNO(REGNO) \
+  ((REGNO) >= 16 && (REGNO) <= 31)
+
+/* Define the size of space to allocate for the return value of an
+   untyped_call.  */
+
+#define APPLY_RESULT_SIZE (TARGET_ARCH64 ? 24 : 16)
+
+/* 1 if N is a possible register number for function argument passing.
+   On SPARC, these are the "output" registers.  v9 also uses %f0-%f31.  */
+
+#define FUNCTION_ARG_REGNO_P(N) \
+(TARGET_ARCH64 \
+ ? (((N) >= 8 && (N) <= 13) || ((N) >= 32 && (N) <= 63)) \
+ : ((N) >= 8 && (N) <= 13))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On SPARC (!v9), this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus 7 or more means all following args should go on the stack.
+
+   For v9, we also need to know whether a prototype is present.  */
+
+struct sparc_args {
+  int words;       /* number of words passed so far */
+  int prototype_p; /* nonzero if a prototype is present */
+  int libcall_p;   /* nonzero if a library call */
+};
+#define CUMULATIVE_ARGS struct sparc_args
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL));
+
+/* If defined, a C expression which determines whether, and in which direction,
+   to pad out an argument with extra space.  The value should be of type
+   `enum direction': either `upward' to pad above the argument,
+   `downward' to pad below, or `none' to inhibit padding.  */
+
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+function_arg_padding ((MODE), (TYPE))
+
+
+/* Generate the special assembly code needed to tell the assembler whatever
+   it might need to know about the return value of a function.
+
+   For SPARC assemblers, we need to output a .proc pseudo-op which conveys
+   information to the assembler relating to peephole optimization (done in
+   the assembler).  */
+
+#define ASM_DECLARE_RESULT(FILE, RESULT) \
+  fprintf ((FILE), "\t.proc\t0%lo\n", sparc_type_code (TREE_TYPE (RESULT)))
+
+/* Output the special assembly code needed to tell the assembler some
+   register is used as global register variable.
+
+   SPARC 64bit psABI declares registers %g2 and %g3 as application
+   registers and %g6 and %g7 as OS registers.  Any object using them
+   should declare (for %g2/%g3 has to, for %g6/%g7 can) that it uses them
+   and how they are used (scratch or some global variable).
+   Linker will then refuse to link together objects which use those
+   registers incompatibly.
+
+   Unless the registers are used for scratch, two different global
+   registers cannot be declared to the same name, so in the unlikely
+   case of a global register variable occupying more than one register
+   we prefix the second and following registers with .gnu.part1. etc.  */
+
+extern GTY(()) char sparc_hard_reg_printed[8];
+
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+#define ASM_DECLARE_REGISTER_GLOBAL(FILE, DECL, REGNO, NAME)		\
+do {									\
+  if (TARGET_ARCH64)							\
+    {									\
+      int end = HARD_REGNO_NREGS ((REGNO), DECL_MODE (decl)) + (REGNO); \
+      int reg;								\
+      for (reg = (REGNO); reg < 8 && reg < end; reg++)			\
+	if ((reg & ~1) == 2 || (reg & ~1) == 6)				\
+	  {								\
+	    if (reg == (REGNO))						\
+	      fprintf ((FILE), "\t.register\t%%g%d, %s\n", reg, (NAME)); \
+	    else							\
+	      fprintf ((FILE), "\t.register\t%%g%d, .gnu.part%d.%s\n",	\
+		       reg, reg - (REGNO), (NAME));			\
+	    sparc_hard_reg_printed[reg] = 1;				\
+	  }								\
+    }									\
+} while (0)
+#endif
+
+
+/* Emit rtl for profiling.  */
+#define PROFILE_HOOK(LABEL)   sparc_profile_hook (LABEL)
+
+/* All the work done in PROFILE_HOOK, but still required.  */
+#define FUNCTION_PROFILER(FILE, LABELNO) do { } while (0)
+
+/* Set the name of the mcount function for the system.  */
+#define MCOUNT_FUNCTION "*mcount"
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK	\
+ (get_frame_size () != 0	\
+  || cfun->calls_alloca || crtl->outgoing_args_size)
+
+/* Define registers used by the epilogue and return instruction.  */
+#define EPILOGUE_USES(REGNO) ((REGNO) == 31 \
+  || (crtl->calls_eh_return && (REGNO) == 1))
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16)
+
+#define TRAMPOLINE_ALIGNMENT 128 /* 16 bytes */
+
+/* Generate RTL to flush the register windows so as to make arbitrary frames
+   available.  */
+#define SETUP_FRAME_ADDRESSES()		\
+  emit_insn (gen_flush_register_windows ())
+
+/* Given an rtx for the address of a frame,
+   return an rtx for the address of the word in the frame
+   that holds the dynamic chain--the previous frame's address.  */
+#define DYNAMIC_CHAIN_ADDRESS(frame)	\
+  plus_constant (frame, 14 * UNITS_PER_WORD + SPARC_STACK_BIAS)
+
+/* Given an rtx for the frame pointer,
+   return an rtx for the address of the frame.  */
+#define FRAME_ADDR_RTX(frame) plus_constant (frame, SPARC_STACK_BIAS)
+
+/* The return address isn't on the stack, it is in a register, so we can't
+   access it from the current frame pointer.  We can access it from the
+   previous frame pointer though by reading a value from the register window
+   save area.  */
+#define RETURN_ADDR_IN_PREVIOUS_FRAME
+
+/* This is the offset of the return address to the true next instruction to be
+   executed for the current function.  */
+#define RETURN_ADDR_OFFSET \
+  (8 + 4 * (! TARGET_ARCH64 && cfun->returns_struct))
+
+/* The current return address is in %i7.  The return address of anything
+   farther back is in the register window save area at [%fp+60].  */
+/* ??? This ignores the fact that the actual return address is +8 for normal
+   returns, and +12 for structure returns.  */
+#define RETURN_ADDR_RTX(count, frame)		\
+  ((count == -1)				\
+   ? gen_rtx_REG (Pmode, 31)			\
+   : gen_rtx_MEM (Pmode,			\
+		  memory_address (Pmode, plus_constant (frame, \
+							15 * UNITS_PER_WORD \
+							+ SPARC_STACK_BIAS))))
+
+/* Before the prologue, the return address is %o7 + 8.  OK, sometimes it's
+   +12, but always using +8 is close enough for frame unwind purposes.
+   Actually, just using %o7 is close enough for unwinding, but %o7+8
+   is something you can return to.  */
+#define INCOMING_RETURN_ADDR_RTX \
+  plus_constant (gen_rtx_REG (word_mode, 15), 8)
+#define DWARF_FRAME_RETURN_COLUMN	DWARF_FRAME_REGNUM (15)
+
+/* The offset from the incoming value of %sp to the top of the stack frame
+   for the current function.  On sparc64, we have to account for the stack
+   bias if present.  */
+#define INCOMING_FRAME_SP_OFFSET SPARC_STACK_BIAS
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 24 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 1)	/* %g1 */
+#define EH_RETURN_HANDLER_RTX	gen_rtx_REG (Pmode, 31)	/* %i7 */
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   If assembler and linker properly support .uaword %r_disp32(foo),
+   then use PC relative 32-bit relocations instead of absolute relocs
+   for shared libraries.  On sparc64, use pc relative 32-bit relocs even
+   for binaries, to save memory.
+
+   binutils 2.12 would emit a R_SPARC_DISP32 dynamic relocation if the
+   symbol %r_disp32() is against was not local, but .hidden.  In that
+   case, we have to use DW_EH_PE_absptr for pic personality.  */
+#ifdef HAVE_AS_SPARC_UA_PCREL
+#ifdef HAVE_AS_SPARC_UA_PCREL_HIDDEN
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (flag_pic								\
+   ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4\
+   : ((TARGET_ARCH64 && ! GLOBAL)					\
+      ? (DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+      : DW_EH_PE_absptr))
+#else
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			\
+  (flag_pic								\
+   ? (GLOBAL ? DW_EH_PE_absptr : (DW_EH_PE_pcrel | DW_EH_PE_sdata4))	\
+   : ((TARGET_ARCH64 && ! GLOBAL)					\
+      ? (DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+      : DW_EH_PE_absptr))
+#endif
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    fprintf (FILE, "%%r_disp%d(", SIZE * 8);		\
+    assemble_name (FILE, LABEL);			\
+    fputc (')', FILE);					\
+  } while (0)
+#endif
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < (unsigned)32	\
+ || (REGNO) == FRAME_POINTER_REGNUM				\
+ || reg_renumber[REGNO] == FRAME_POINTER_REGNUM)
+
+#define REGNO_OK_FOR_BASE_P(REGNO)  REGNO_OK_FOR_INDEX_P (REGNO)
+
+#define REGNO_OK_FOR_FP_P(REGNO) \
+  (((unsigned) (REGNO) - 32 < (TARGET_V9 ? (unsigned)64 : (unsigned)32)) \
+   || ((unsigned) reg_renumber[REGNO] - 32 < (TARGET_V9 ? (unsigned)64 : (unsigned)32)))
+#define REGNO_OK_FOR_CCFP_P(REGNO) \
+ (TARGET_V9 \
+  && (((unsigned) (REGNO) - 96 < (unsigned)4) \
+      || ((unsigned) reg_renumber[REGNO] - 96 < (unsigned)4)))
+
+/* Now macros that check whether X is a register and also,
+   strictly, whether it is in a specified class.
+
+   These macros are specific to the SPARC, and may be used only
+   in code for printing assembler insns and in conditions for
+   define_optimization.  */
+
+/* 1 if X is an fp register.  */
+
+#define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X)))
+
+/* Is X, a REG, an in or global register?  i.e. is regno 0..7 or 24..31 */
+#define IN_OR_GLOBAL_P(X) (REGNO (X) < 8 || (REGNO (X) >= 24 && REGNO (X) <= 31))
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.
+   When PIC, we do not accept an address that would require a scratch reg
+   to load into a register.  */
+
+#define CONSTANT_ADDRESS_P(X) constant_address_p (X)
+
+/* Define this, so that when PIC, reload won't try to reload invalid
+   addresses which require two reload registers.  */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   Anything can be made to work except floating point constants.
+   If TARGET_VIS, 0.0 can be made to work as well.  */
+
+#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) \
+  (REGNO (X) < 32				\
+   || REGNO (X) == FRAME_POINTER_REGNUM		\
+   || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X)  REG_OK_FOR_INDEX_P (X)
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* Should gcc use [%reg+%lo(xx)+offset] addresses?  */
+
+#ifdef HAVE_AS_OFFSETABLE_LO10
+#define USE_AS_OFFSETABLE_LO10 1
+#else
+#define USE_AS_OFFSETABLE_LO10 0
+#endif
+
+/* On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
+   ordinarily.  This changes a bit when generating PIC.  The details are
+   in sparc.c's implementation of TARGET_LEGITIMATE_ADDRESS_P.  */
+
+#define SYMBOLIC_CONST(X) symbolic_operand (X, VOIDmode)
+
+#define RTX_OK_FOR_BASE_P(X)						\
+  ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
+  || (GET_CODE (X) == SUBREG						\
+      && GET_CODE (SUBREG_REG (X)) == REG				\
+      && REG_OK_FOR_BASE_P (SUBREG_REG (X))))
+
+#define RTX_OK_FOR_INDEX_P(X)						\
+  ((GET_CODE (X) == REG && REG_OK_FOR_INDEX_P (X))			\
+  || (GET_CODE (X) == SUBREG						\
+      && GET_CODE (SUBREG_REG (X)) == REG				\
+      && REG_OK_FOR_INDEX_P (SUBREG_REG (X))))
+
+#define RTX_OK_FOR_OFFSET_P(X)						\
+  (GET_CODE (X) == CONST_INT && INTVAL (X) >= -0x1000 && INTVAL (X) < 0x1000 - 8)
+
+#define RTX_OK_FOR_OLO10_P(X)						\
+  (GET_CODE (X) == CONST_INT && INTVAL (X) >= -0x1000 && INTVAL (X) < 0xc00 - 8)
+
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	   \
+do {									   \
+  int win;								   \
+  (X) = sparc_legitimize_reload_address ((X), (MODE), (OPNUM),		   \
+					 (int)(TYPE), (IND_LEVELS), &win); \
+  if (win)								   \
+    goto WIN;								   \
+} while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+/* If we ever implement any of the full models (such as CM_FULLANY),
+   this has to be DImode in that case */
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+#define CASE_VECTOR_MODE \
+(! TARGET_PTR64 ? SImode : flag_pic ? SImode : TARGET_CM_MEDLOW ? SImode : DImode)
+#else
+/* If assembler does not have working .subsection -1, we use DImode for pic, as otherwise
+   we have to sign extend which slows things down.  */
+#define CASE_VECTOR_MODE \
+(! TARGET_PTR64 ? SImode : flag_pic ? DImode : TARGET_CM_MEDLOW ? SImode : DImode)
+#endif
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.  */
+
+#define MOVE_RATIO(speed) ((speed) ? 8 : 3)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.  For floating-point,
+   CCFP[E]mode is used.  CC_NOOVmode should be used when the first operand
+   is a PLUS, MINUS, NEG, or ASHIFT.  CCmode should be used when no special
+   processing is needed.  */
+#define SELECT_CC_MODE(OP,X,Y)  select_cc_mode ((OP), (X), (Y))
+
+/* Return nonzero if MODE implies a floating point inequality can be
+   reversed.  For SPARC this is always true because we have a full
+   compliment of ordered and unordered comparisons, but until generic
+   code knows how to reverse it correctly we keep the old definition.  */
+#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPEmode && (MODE) != CCFPmode)
+
+/* A function address in a call instruction for indexing purposes.  */
+#define FUNCTION_MODE Pmode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+#define NO_FUNCTION_CSE
+
+/* alloca should avoid clobbering the old register save area.  */
+#define SETJMP_VIA_SAVE_AREA
+
+/* The _Q_* comparison libcalls return booleans.  */
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) ((MODE) == TFmode)
+
+/* Assume by default that the _Qp_* 64-bit libcalls are implemented such
+   that the inputs are fully consumed before the output memory is clobbered.  */
+
+#define TARGET_BUGGY_QP_LIB	0
+
+/* Assume by default that we do not have the Solaris-specific conversion
+   routines nor 64-bit integer multiply and divide routines.  */
+
+#define SUN_CONVERSION_LIBFUNCS 	0
+#define DITF_CONVERSION_LIBFUNCS	0
+#define SUN_INTEGER_MULTIPLY_64 	0
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+#define GENERAL_OR_I64(C) ((C) == GENERAL_REGS || (C) == I64_REGS)
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2)		\
+  (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
+    || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
+    || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)		\
+   ? ((sparc_cpu == PROCESSOR_ULTRASPARC \
+       || sparc_cpu == PROCESSOR_ULTRASPARC3 \
+       || sparc_cpu == PROCESSOR_NIAGARA \
+       || sparc_cpu == PROCESSOR_NIAGARA2) ? 12 : 6) : 2)
+
+/* Provide the cost of a branch.  For pre-v9 processors we use
+   a value of 3 to take into account the potential annulling of
+   the delay slot (which ends up being a bubble in the pipeline slot)
+   plus a cycle to take into consideration the instruction cache
+   effects.
+
+   On v9 and later, which have branch prediction facilities, we set
+   it to the depth of the pipeline as that is the cost of a
+   mispredicted branch.
+
+   On Niagara, normal branches insert 3 bubbles into the pipe
+   and annulled branches insert 4 bubbles.
+
+   On Niagara-2, a not-taken branch costs 1 cycle whereas a taken
+   branch costs 6 cycles.  */
+
+#define BRANCH_COST(speed_p, predictable_p) \
+	((sparc_cpu == PROCESSOR_V9 \
+	  || sparc_cpu == PROCESSOR_ULTRASPARC) \
+	 ? 7 \
+         : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
+            ? 9 \
+	 : (sparc_cpu == PROCESSOR_NIAGARA \
+	    ? 4 \
+	 : (sparc_cpu == PROCESSOR_NIAGARA2 \
+	    ? 5 \
+	 : 3))))
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+
+#define ASM_COMMENT_START "!"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON ""
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF ""
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES \
+{"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",		\
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",		\
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",		\
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",		\
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",		\
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",		\
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",	\
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",	\
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",	\
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",	\
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",	\
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",	\
+ "%fcc0", "%fcc1", "%fcc2", "%fcc3", "%icc", "%sfp" }
+
+/* Define additional names for use in asm clobbers and asm declarations.  */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{{"ccr", SPARC_ICC_REG}, {"cc", SPARC_ICC_REG}}
+
+/* On Sun 4, this limit is 2048.  We use 1000 to be safe, since the length
+   can run past this up to a continuation point.  Once we used 1500, but
+   a single entry in C++ can run more than 500 bytes, due to the length of
+   mangled symbol names.  dbxout.c should really be fixed to do
+   continuations when they are actually needed instead of trying to
+   guess...  */
+#define DBX_CONTIN_LENGTH 1000
+
+/* This is how to output a command to make the user-level label named NAME
+   defined for reference from other files.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf ((LABEL), "*%s%ld", (PREFIX), (long)(NUM))
+
+/* This is how we hook in and defer the case-vector until the end of
+   the function.  */
+#define ASM_OUTPUT_ADDR_VEC(LAB,VEC) \
+  sparc_defer_case_vector ((LAB),(VEC), 0)
+
+#define ASM_OUTPUT_ADDR_DIFF_VEC(LAB,VEC) \
+  sparc_defer_case_vector ((LAB),(VEC), 1)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+do {									\
+  char label[30];							\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);			\
+  if (CASE_VECTOR_MODE == SImode)					\
+    fprintf (FILE, "\t.word\t");					\
+  else									\
+    fprintf (FILE, "\t.xword\t");					\
+  assemble_name (FILE, label);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* This is how to output an element of a case-vector that is relative.
+   (SPARC uses such vectors only when generating PIC.)  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		\
+do {									\
+  char label[30];							\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));			\
+  if (CASE_VECTOR_MODE == SImode)					\
+    fprintf (FILE, "\t.word\t");					\
+  else									\
+    fprintf (FILE, "\t.xword\t");					\
+  assemble_name (FILE, label);						\
+  ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL));			\
+  fputc ('-', FILE);							\
+  assemble_name (FILE, label);						\
+  fputc ('\n', FILE);							\
+} while (0)
+
+/* This is what to output before and after case-vector (both
+   relative and absolute).  If .subsection -1 works, we put case-vectors
+   at the beginning of the current section.  */
+
+#ifdef HAVE_GAS_SUBSECTION_ORDERING
+
+#define ASM_OUTPUT_ADDR_VEC_START(FILE)					\
+  fprintf(FILE, "\t.subsection\t-1\n")
+
+#define ASM_OUTPUT_ADDR_VEC_END(FILE)					\
+  fprintf(FILE, "\t.previous\n")
+
+#endif
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (1<<(LOG)))
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.skip "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs ("\t.common ", (FILE)),		\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",\"bss\"\n", (SIZE)))
+
+/* This says how to output an assembler line to define a local common
+   symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGNED)		\
+( fputs ("\t.reserve ", (FILE)),					\
+  assemble_name ((FILE), (NAME)),					\
+  fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",\"bss\",%u\n",	\
+	   (SIZE), ((ALIGNED) / BITS_PER_UNIT)))
+
+/* A C statement (sans semicolon) to output to the stdio stream
+   FILE the assembler definition of uninitialized global DECL named
+   NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+   Try to use asm_output_aligned_bss to implement this macro.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)	\
+  do {								\
+    ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);		\
+  } while (0)
+
+#define IDENT_ASM_OP "\t.ident\t"
+
+/* Output #ident as a .ident.  */
+
+#define ASM_OUTPUT_IDENT(FILE, NAME) \
+  fprintf (FILE, "%s\"%s\"\n", IDENT_ASM_OP, NAME);
+
+/* Prettify the assembly.  */
+
+extern int sparc_indent_opcode;
+
+#define ASM_OUTPUT_OPCODE(FILE, PTR)	\
+  do {					\
+    if (sparc_indent_opcode)		\
+      {					\
+	putc (' ', FILE);		\
+	sparc_indent_opcode = 0;	\
+      }					\
+  } while (0)
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+  ((CHAR) == '#' || (CHAR) == '*' || (CHAR) == '('		\
+   || (CHAR) == ')' || (CHAR) == '_' || (CHAR) == '&')
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+
+/* Print a memory address as an operand to reference that memory location.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+{ register rtx base, index = 0;					\
+  int offset = 0;						\
+  register rtx addr = ADDR;					\
+  if (GET_CODE (addr) == REG)					\
+    fputs (reg_names[REGNO (addr)], FILE);			\
+  else if (GET_CODE (addr) == PLUS)				\
+    {								\
+      if (GET_CODE (XEXP (addr, 0)) == CONST_INT)		\
+	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);\
+      else if (GET_CODE (XEXP (addr, 1)) == CONST_INT)		\
+	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);\
+      else							\
+	base = XEXP (addr, 0), index = XEXP (addr, 1);		\
+      if (GET_CODE (base) == LO_SUM)				\
+	{							\
+	  gcc_assert (USE_AS_OFFSETABLE_LO10			\
+	      	      && TARGET_ARCH64				\
+		      && ! TARGET_CM_MEDMID);			\
+	  output_operand (XEXP (base, 0), 0);			\
+	  fputs ("+%lo(", FILE);				\
+	  output_address (XEXP (base, 1));			\
+	  fprintf (FILE, ")+%d", offset);			\
+	}							\
+      else							\
+	{							\
+	  fputs (reg_names[REGNO (base)], FILE);		\
+	  if (index == 0)					\
+	    fprintf (FILE, "%+d", offset);			\
+	  else if (GET_CODE (index) == REG)			\
+	    fprintf (FILE, "+%s", reg_names[REGNO (index)]);	\
+	  else if (GET_CODE (index) == SYMBOL_REF		\
+		   || GET_CODE (index) == LABEL_REF		\
+		   || GET_CODE (index) == CONST)		\
+	    fputc ('+', FILE), output_addr_const (FILE, index);	\
+	  else gcc_unreachable ();				\
+	}							\
+    }								\
+  else if (GET_CODE (addr) == MINUS				\
+	   && GET_CODE (XEXP (addr, 1)) == LABEL_REF)		\
+    {								\
+      output_addr_const (FILE, XEXP (addr, 0));			\
+      fputs ("-(", FILE);					\
+      output_addr_const (FILE, XEXP (addr, 1));			\
+      fputs ("-.)", FILE);					\
+    }								\
+  else if (GET_CODE (addr) == LO_SUM)				\
+    {								\
+      output_operand (XEXP (addr, 0), 0);			\
+      if (TARGET_CM_MEDMID)					\
+        fputs ("+%l44(", FILE);					\
+      else							\
+        fputs ("+%lo(", FILE);					\
+      output_address (XEXP (addr, 1));				\
+      fputc (')', FILE);					\
+    }								\
+  else if (flag_pic && GET_CODE (addr) == CONST			\
+	   && GET_CODE (XEXP (addr, 0)) == MINUS		\
+	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST	\
+	   && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS	\
+	   && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)	\
+    {								\
+      addr = XEXP (addr, 0);					\
+      output_addr_const (FILE, XEXP (addr, 0));			\
+      /* Group the args of the second CONST in parenthesis.  */	\
+      fputs ("-(", FILE);					\
+      /* Skip past the second CONST--it does nothing for us.  */\
+      output_addr_const (FILE, XEXP (XEXP (addr, 1), 0));	\
+      /* Close the parenthesis.  */				\
+      fputc (')', FILE);					\
+    }								\
+  else								\
+    {								\
+      output_addr_const (FILE, addr);				\
+    }								\
+}
+
+/* TLS support defaulting to original Sun flavor.  GNU extensions
+   must be activated in separate configuration files.  */
+#ifdef HAVE_AS_TLS
+#define TARGET_TLS 1
+#else
+#define TARGET_TLS 0
+#endif
+
+#define TARGET_SUN_TLS TARGET_TLS
+#define TARGET_GNU_TLS 0
+
+/* The number of Pmode words for the setjmp buffer.  */
+#define JMP_BUF_SIZE 12
+
+/* We use gcc _mcount for profiling.  */
+#define NO_PROFILE_COUNTERS 0
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
new file mode 100644
index 000000000..06b34e908
--- /dev/null
+++ b/gcc/config/sparc/sparc.md
@@ -0,0 +1,7828 @@
+;; Machine description for SPARC chip for GCC
+;;  Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+;;  1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;;  Free Software Foundation, Inc.
+;;  Contributed by Michael Tiemann (tiemann@cygnus.com)
+;;  64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
+;;  at Cygnus Support.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+(define_constants
+  [(UNSPEC_MOVE_PIC		0)
+   (UNSPEC_UPDATE_RETURN	1)
+   (UNSPEC_LOAD_PCREL_SYM	2)
+   (UNSPEC_FRAME_BLOCKAGE      3)
+   (UNSPEC_MOVE_PIC_LABEL	5)
+   (UNSPEC_SETH44		6)
+   (UNSPEC_SETM44		7)
+   (UNSPEC_SETHH		9)
+   (UNSPEC_SETLM		10)
+   (UNSPEC_EMB_HISUM		11)
+   (UNSPEC_EMB_TEXTUHI		13)
+   (UNSPEC_EMB_TEXTHI		14)
+   (UNSPEC_EMB_TEXTULO		15)
+   (UNSPEC_EMB_SETHM		18)
+   (UNSPEC_MOVE_GOTDATA		19)
+
+   (UNSPEC_MEMBAR		20)
+
+   (UNSPEC_TLSGD		30)
+   (UNSPEC_TLSLDM		31)
+   (UNSPEC_TLSLDO		32)
+   (UNSPEC_TLSIE		33)
+   (UNSPEC_TLSLE		34)
+   (UNSPEC_TLSLD_BASE		35)
+
+   (UNSPEC_FPACK16	 	40)
+   (UNSPEC_FPACK32		41)
+   (UNSPEC_FPACKFIX		42)
+   (UNSPEC_FEXPAND		43)
+   (UNSPEC_FPMERGE		44)
+   (UNSPEC_MUL16AL		45)
+   (UNSPEC_MUL8UL		46)
+   (UNSPEC_MULDUL		47)
+   (UNSPEC_ALIGNDATA		48)
+   (UNSPEC_ALIGNADDR		49)
+   (UNSPEC_PDIST		50)
+
+   (UNSPEC_SP_SET		60)
+   (UNSPEC_SP_TEST		61)
+  ])
+
+(define_constants
+  [(UNSPECV_BLOCKAGE		0)
+   (UNSPECV_FLUSHW		1)
+   (UNSPECV_GOTO		2)
+   (UNSPECV_FLUSH		4)
+   (UNSPECV_SETJMP		5)
+   (UNSPECV_SAVEW		6)
+   (UNSPECV_CAS			8)
+   (UNSPECV_SWAP		9)
+   (UNSPECV_LDSTUB		10)
+   (UNSPECV_PROBE_STACK_RANGE	11)
+  ])
+
+
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+(define_mode_iterator I [QI HI SI DI])
+(define_mode_iterator F [SF DF TF])
+
+;; We don't define V1SI because SI should work just fine.
+(define_mode_iterator V32 [SF V2HI V4QI])
+(define_mode_iterator V32I [SI V2HI V4QI])
+
+(define_mode_iterator V64 [DF V2SI V4HI V8QI])
+(define_mode_iterator V64I [DI V2SI V4HI V8QI])
+
+;; The upper 32 fp regs on the v9 can't hold SFmode values.  To deal with this
+;; a second register class, EXTRA_FP_REGS, exists for the v9 chip.  The name
+;; is a bit of a misnomer as it covers all 64 fp regs.  The corresponding
+;; constraint letter is 'e'.  To avoid any confusion, 'e' is used instead of
+;; 'f' for all DF/TFmode values, including those that are specific to the v8.
+
+
+;; Attribute for cpu type.
+;; These must match the values for enum processor_type in sparc.h.
+(define_attr "cpu"
+  "v7,
+   cypress,
+   v8,
+   supersparc,
+   hypersparc,
+   leon,
+   sparclite,
+   f930,
+   f934,
+   sparclite86x,
+   sparclet,
+   tsc701,
+   v9,
+   ultrasparc,
+   ultrasparc3,
+   niagara,
+   niagara2"
+  (const (symbol_ref "sparc_cpu_attr")))
+
+;; Attribute for the instruction set.
+;; At present we only need to distinguish v9/!v9, but for clarity we
+;; test TARGET_V8 too.
+(define_attr "isa" "v7,v8,v9,sparclet"
+ (const
+  (cond [(symbol_ref "TARGET_V9") (const_string "v9")
+	 (symbol_ref "TARGET_V8") (const_string "v8")
+	 (symbol_ref "TARGET_SPARCLET") (const_string "sparclet")]
+	(const_string "v7"))))
+
+;; Insn type.
+(define_attr "type"
+  "ialu,compare,shift,
+   load,sload,store,
+   uncond_branch,branch,call,sibcall,call_no_delay_slot,return,
+   imul,idiv,
+   fpload,fpstore,
+   fp,fpmove,
+   fpcmove,fpcrmove,
+   fpcmp,
+   fpmul,fpdivs,fpdivd,
+   fpsqrts,fpsqrtd,
+   fga,fgm_pack,fgm_mul,fgm_pdist,fgm_cmp,
+   cmove,
+   ialuX,
+   multi,savew,flushw,iflush,trap"
+  (const_string "ialu"))
+
+;; True if branch/call has empty delay slot and will emit a nop in it
+(define_attr "empty_delay_slot" "false,true"
+  (symbol_ref "(empty_delay_slot (insn)
+		? EMPTY_DELAY_SLOT_TRUE : EMPTY_DELAY_SLOT_FALSE)"))
+
+(define_attr "branch_type" "none,icc,fcc,reg"
+  (const_string "none"))
+
+(define_attr "pic" "false,true"
+  (symbol_ref "(flag_pic != 0 ? PIC_TRUE : PIC_FALSE)"))
+
+(define_attr "calls_alloca" "false,true"
+  (symbol_ref "(cfun->calls_alloca != 0
+		? CALLS_ALLOCA_TRUE : CALLS_ALLOCA_FALSE)"))
+
+(define_attr "calls_eh_return" "false,true"
+   (symbol_ref "(crtl->calls_eh_return != 0
+		 ? CALLS_EH_RETURN_TRUE : CALLS_EH_RETURN_FALSE)"))
+   
+(define_attr "leaf_function" "false,true"
+  (symbol_ref "(current_function_uses_only_leaf_regs != 0
+		? LEAF_FUNCTION_TRUE : LEAF_FUNCTION_FALSE)"))
+
+(define_attr "delayed_branch" "false,true"
+  (symbol_ref "(flag_delayed_branch != 0
+		? DELAYED_BRANCH_TRUE : DELAYED_BRANCH_FALSE)"))
+
+;; Length (in # of insns).
+;; Beware that setting a length greater or equal to 3 for conditional branches
+;; has a side-effect (see output_cbranch and output_v9branch).
+(define_attr "length" ""
+  (cond [(eq_attr "type" "uncond_branch,call")
+	   (if_then_else (eq_attr "empty_delay_slot" "true")
+	     (const_int 2)
+	     (const_int 1))
+	 (eq_attr "type" "sibcall")
+	   (if_then_else (eq_attr "leaf_function" "true")
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (const_int 3)
+	       (const_int 2))
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (const_int 2)
+	       (const_int 1)))
+	 (eq_attr "branch_type" "icc")
+	   (if_then_else (match_operand 0 "noov_compare64_operator" "")
+	     (if_then_else (lt (pc) (match_dup 1))
+	       (if_then_else (lt (minus (match_dup 1) (pc)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3)))
+	       (if_then_else (lt (minus (pc) (match_dup 1)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3))))
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (const_int 2)
+	       (const_int 1)))
+	 (eq_attr "branch_type" "fcc")
+	   (if_then_else (match_operand 0 "fcc0_register_operand" "")
+	     (if_then_else (eq_attr "empty_delay_slot" "true")
+	       (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0))
+		 (const_int 3)
+		 (const_int 2))
+	       (if_then_else (eq (symbol_ref "TARGET_V9") (const_int 0))
+		 (const_int 2)
+		 (const_int 1)))
+	     (if_then_else (lt (pc) (match_dup 2))
+	       (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3)))
+	       (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 260000))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 2)
+		   (const_int 1))
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+		   (const_int 4)
+		   (const_int 3)))))
+	 (eq_attr "branch_type" "reg")
+	   (if_then_else (lt (pc) (match_dup 2))
+	     (if_then_else (lt (minus (match_dup 2) (pc)) (const_int 32000))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 2)
+		 (const_int 1))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 4)
+		 (const_int 3)))
+	     (if_then_else (lt (minus (pc) (match_dup 2)) (const_int 32000))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 2)
+		 (const_int 1))
+	       (if_then_else (eq_attr "empty_delay_slot" "true")
+		 (const_int 4)
+		 (const_int 3))))
+	 ] (const_int 1)))
+
+;; FP precision.
+(define_attr "fptype" "single,double"
+  (const_string "single"))
+
+;; UltraSPARC-III integer load type.
+(define_attr "us3load_type" "2cycle,3cycle"
+  (const_string "2cycle"))
+
+(define_asm_attributes
+  [(set_attr "length" "2")
+   (set_attr "type" "multi")])
+
+;; Attributes for instruction and branch scheduling
+(define_attr "tls_call_delay" "false,true"
+  (symbol_ref "(tls_call_delay (insn)
+		? TLS_CALL_DELAY_TRUE : TLS_CALL_DELAY_FALSE)"))
+
+(define_attr "in_call_delay" "false,true"
+  (cond [(eq_attr "type" "uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+	 	(const_string "false")
+	 (eq_attr "type" "load,fpload,store,fpstore")
+	 	(if_then_else (eq_attr "length" "1")
+			      (const_string "true")
+			      (const_string "false"))]
+	(if_then_else (and (eq_attr "length" "1")
+			   (eq_attr "tls_call_delay" "true"))
+		      (const_string "true")
+		      (const_string "false"))))
+
+(define_attr "eligible_for_sibcall_delay" "false,true"
+  (symbol_ref "(eligible_for_sibcall_delay (insn)
+		? ELIGIBLE_FOR_SIBCALL_DELAY_TRUE
+		: ELIGIBLE_FOR_SIBCALL_DELAY_FALSE)"))
+
+(define_attr "eligible_for_return_delay" "false,true"
+  (symbol_ref "(eligible_for_return_delay (insn)
+		? ELIGIBLE_FOR_RETURN_DELAY_TRUE
+		: ELIGIBLE_FOR_RETURN_DELAY_FALSE)"))
+
+;; ??? !v9: Should implement the notion of predelay slots for floating-point
+;; branches.  This would allow us to remove the nop always inserted before
+;; a floating point branch.
+
+;; ??? It is OK for fill_simple_delay_slots to put load/store instructions
+;; in a delay slot, but it is not OK for fill_eager_delay_slots to do so.
+;; This is because doing so will add several pipeline stalls to the path
+;; that the load/store did not come from.  Unfortunately, there is no way
+;; to prevent fill_eager_delay_slots from using load/store without completely
+;; disabling them.  For the SPEC benchmark set, this is a serious lose,
+;; because it prevents us from moving back the final store of inner loops.
+
+(define_attr "in_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+		     (eq_attr "length" "1"))
+		(const_string "true")
+		(const_string "false")))
+
+(define_attr "in_uncond_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+		     (eq_attr "length" "1"))
+		(const_string "true")
+		(const_string "false")))
+
+(define_attr "in_annul_branch_delay" "false,true"
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+		     (eq_attr "length" "1"))
+		(const_string "true")
+		(const_string "false")))
+
+(define_delay (eq_attr "type" "call")
+  [(eq_attr "in_call_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "sibcall")
+  [(eq_attr "eligible_for_sibcall_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "branch")
+  [(eq_attr "in_branch_delay" "true")
+   (nil) (eq_attr "in_annul_branch_delay" "true")])
+
+(define_delay (eq_attr "type" "uncond_branch")
+  [(eq_attr "in_uncond_branch_delay" "true")
+   (nil) (nil)])
+
+(define_delay (eq_attr "type" "return")
+  [(eq_attr "eligible_for_return_delay" "true") (nil) (nil)])
+
+
+;; Include SPARC DFA schedulers
+
+(include "cypress.md")
+(include "supersparc.md")
+(include "hypersparc.md")
+(include "leon.md")
+(include "sparclet.md")
+(include "ultra1_2.md")
+(include "ultra3.md")
+(include "niagara.md")
+(include "niagara2.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare instructions.
+
+;; These are just the DEFINE_INSNs to match the patterns and the
+;; DEFINE_SPLITs for some of the scc insns that actually require
+;; more than one machine instruction.  DEFINE_EXPANDs are further down.
+
+;; The compare DEFINE_INSNs.
+
+(define_insn "*cmpsi_insn"
+  [(set (reg:CC 100)
+	(compare:CC (match_operand:SI 0 "register_operand" "r")
+		    (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "cmp\t%0, %1"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmpdi_sp64"
+  [(set (reg:CCX 100)
+	(compare:CCX (match_operand:DI 0 "register_operand" "r")
+		     (match_operand:DI 1 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "cmp\t%0, %1"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmpsf_fpe"
+  [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
+	(compare:CCFPE (match_operand:SF 1 "register_operand" "f")
+		       (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmpes\t%0, %1, %2";
+  return "fcmpes\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+(define_insn "*cmpdf_fpe"
+  [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
+	(compare:CCFPE (match_operand:DF 1 "register_operand" "e")
+		       (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmped\t%0, %1, %2";
+  return "fcmped\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")
+   (set_attr "fptype" "double")])
+
+(define_insn "*cmptf_fpe"
+  [(set (match_operand:CCFPE 0 "fcc_register_operand" "=c")
+	(compare:CCFPE (match_operand:TF 1 "register_operand" "e")
+		       (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+{
+  if (TARGET_V9)
+    return "fcmpeq\t%0, %1, %2";
+  return "fcmpeq\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+(define_insn "*cmpsf_fp"
+  [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
+	(compare:CCFP (match_operand:SF 1 "register_operand" "f")
+		      (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmps\t%0, %1, %2";
+  return "fcmps\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+(define_insn "*cmpdf_fp"
+  [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
+	(compare:CCFP (match_operand:DF 1 "register_operand" "e")
+		      (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+{
+  if (TARGET_V9)
+    return "fcmpd\t%0, %1, %2";
+  return "fcmpd\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")
+   (set_attr "fptype" "double")])
+
+(define_insn "*cmptf_fp"
+  [(set (match_operand:CCFP 0 "fcc_register_operand" "=c")
+	(compare:CCFP (match_operand:TF 1 "register_operand" "e")
+		      (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+{
+  if (TARGET_V9)
+    return "fcmpq\t%0, %1, %2";
+  return "fcmpq\t%1, %2";
+}
+  [(set_attr "type" "fpcmp")])
+
+;; Next come the scc insns.
+
+(define_expand "cstoresi4"
+  [(use (match_operator 1 "comparison_operator"
+         [(match_operand:SI 2 "compare_operand" "")
+          (match_operand:SI 3 "arith_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  ""
+{
+  if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx)
+    operands[2] = force_reg (SImode, operands[2]);
+  if (emit_scc_insn (operands)) DONE; else FAIL;
+})
+
+(define_expand "cstoredi4"
+  [(use (match_operator 1 "comparison_operator"
+         [(match_operand:DI 2 "compare_operand" "")
+          (match_operand:DI 3 "arith_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[2]) == ZERO_EXTRACT && operands[3] != const0_rtx)
+    operands[2] = force_reg (DImode, operands[2]);
+  if (emit_scc_insn (operands)) DONE; else FAIL;
+})
+
+(define_expand "cstore<F:mode>4"
+  [(use (match_operator 1 "comparison_operator"
+         [(match_operand:F 2 "register_operand" "")
+          (match_operand:F 3 "register_operand" "")]))
+   (clobber (match_operand:SI 0 "register_operand"))]
+  "TARGET_FPU"
+  { if (emit_scc_insn (operands)) DONE; else FAIL; })
+
+
+
+;; Seq_special[_xxx] and sne_special[_xxx] clobber the CC reg, because they
+;; generate addcc/subcc instructions.
+
+(define_expand "seqsi_special"
+  [(set (match_dup 3)
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (eq:SI (match_dup 3) (const_int 0)))
+	      (clobber (reg:CC 100))])]
+  ""
+  { operands[3] = gen_reg_rtx (SImode); })
+
+(define_expand "seqdi_special"
+  [(set (match_dup 3)
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(eq:SI (match_dup 3) (const_int 0)))]
+  "TARGET_ARCH64"
+  { operands[3] = gen_reg_rtx (DImode); })
+
+(define_expand "snesi_special"
+  [(set (match_dup 3)
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ne:SI (match_dup 3) (const_int 0)))
+	      (clobber (reg:CC 100))])]
+  ""
+  { operands[3] = gen_reg_rtx (SImode); })
+
+(define_expand "snedi_special"
+  [(set (match_dup 3)
+	(xor:DI (match_operand:DI 1 "register_operand" "")
+		(match_operand:DI 2 "register_operand" "")))
+   (set (match_operand:SI 0 "register_operand" "")
+	(ne:SI (match_dup 3) (const_int 0)))]
+  "TARGET_ARCH64"
+  { operands[3] = gen_reg_rtx (DImode); })
+
+
+;; Now the DEFINE_INSNs for the scc cases.
+
+;; The SEQ and SNE patterns are special because they can be done
+;; without any branching and do not involve a COMPARE.  We want
+;; them to always use the splits below so the results can be
+;; scheduled.
+
+(define_insn_and_split "*snesi_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (ltu:SI (reg:CC 100) (const_int 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_snesi_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (ne:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 0))))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (neg:SI (ltu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snesi_zero_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (ne:DI (match_operand:SI 1 "register_operand" "r")
+               (const_int 0)))
+   (clobber (reg:CC 100))]
+  "TARGET_ARCH64"
+  "#"
+  "&& 1"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (const_int 0)
+                                                     (match_dup 1))
+                                           (const_int 0)))
+   (set (match_dup 0) (zero_extend:DI (plus:SI (plus:SI (const_int 0)
+                                                        (const_int 0))
+                                               (ltu:SI (reg:CC_NOOV 100)
+                                                       (const_int 0)))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snedi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (ne:DI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_snedi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (neg:DI (ne:DI (match_operand:DI 1 "register_operand" "r")
+                       (const_int 0))))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int -1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*snedi_zero_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (ne:SI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:SI (ne:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*seqsi_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (geu:SI (reg:CC 100) (const_int 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_seqsi_zero"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (eq:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int 0))))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (neg:SI (geu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*seqsi_zero_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (eq:DI (match_operand:SI 1 "register_operand" "r")
+               (const_int 0)))
+   (clobber (reg:CC 100))]
+  "TARGET_ARCH64"
+  "#"
+  "&& 1"
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (minus:SI (const_int 0)
+                                                     (match_dup 1))
+                                           (const_int 0)))
+   (set (match_dup 0) (zero_extend:DI (minus:SI (minus:SI (const_int 0)
+                                                          (const_int -1))
+                                                (ltu:SI (reg:CC_NOOV 100)
+                                                        (const_int 0)))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*seqdi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (eq:DI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*neg_seqdi_zero"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+        (neg:DI (eq:DI (match_operand:DI 1 "register_operand" "r")
+                       (const_int 0))))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:DI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int -1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")]) 
+
+(define_insn_and_split "*seqdi_zero_trunc"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+        (eq:SI (match_operand:DI 1 "register_operand" "r")
+               (const_int 0)))]
+  "TARGET_ARCH64"
+  "#"
+  "&& ! reg_overlap_mentioned_p (operands[1], operands[0])"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0) (if_then_else:SI (eq:DI (match_dup 1)
+                                              (const_int 0))
+                                       (const_int 1)
+                                       (match_dup 0)))]
+  ""
+  [(set_attr "length" "2")])
+
+;; We can also do (x + (i == 0)) and related, so put them in.
+;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
+;; versions for v9.
+
+(define_insn_and_split "*x_plus_i_ne_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (ne:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 0))
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
+			       (match_dup 2)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*x_minus_i_ne_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 2 "register_operand" "r")
+		  (ne:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 0))))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (minus:SI (match_dup 2)
+				(ltu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*x_plus_i_eq_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (eq:SI (match_operand:SI 1 "register_operand" "r")
+			(const_int 0))
+		 (match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (plus:SI (geu:SI (reg:CC 100) (const_int 0))
+			       (match_dup 2)))]
+  ""
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*x_minus_i_eq_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 2 "register_operand" "r")
+		  (eq:SI (match_operand:SI 1 "register_operand" "r")
+			 (const_int 0))))
+   (clobber (reg:CC 100))]
+  ""
+  "#"
+  ""
+  [(set (reg:CC_NOOV 100) (compare:CC_NOOV (neg:SI (match_dup 1))
+					   (const_int 0)))
+   (set (match_dup 0) (minus:SI (match_dup 2)
+				(geu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  [(set_attr "length" "2")])
+
+;; We can also do GEU and LTU directly, but these operate after a compare.
+;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
+;; versions for v9.
+
+(define_insn "*sltu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ltu:SI (reg:CC 100) (const_int 0)))]
+  ""
+  "addx\t%%g0, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sltu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (ltu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  "subx\t%%g0, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+;; ??? Combine should canonicalize these next two to the same pattern.
+(define_insn "*neg_sltu_minus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (neg:SI (ltu:SI (reg:CC 100) (const_int 0)))
+		  (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "subx\t%%g0, %1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sltu_plus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
+			 (match_operand:SI 1 "arith_operand" "rI"))))]
+  ""
+  "subx\t%%g0, %1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sgeu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(geu:SI (reg:CC 100) (const_int 0)))]
+  ""
+  "subx\t%%g0, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*neg_sgeu_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (geu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  "addx\t%%g0, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+;; We can also do (x + ((unsigned) i >= 0)) and related, so put them in.
+;; ??? The addx/subx insns use the 32 bit carry flag so there are no DImode
+;; versions for v9.
+
+(define_insn "*sltu_plus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (ltu:SI (reg:CC 100) (const_int 0))
+		 (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "addx\t%%g0, %1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sltu_plus_x_plus_y"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (ltu:SI (reg:CC 100) (const_int 0))
+		 (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))))]
+  ""
+  "addx\t%1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*x_minus_sltu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (ltu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  "subx\t%1, 0, %0"
+  [(set_attr "type" "ialuX")])
+
+;; ??? Combine should canonicalize these next two to the same pattern.
+(define_insn "*x_minus_y_minus_sltu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		  (ltu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*x_minus_sltu_plus_y"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+		  (plus:SI (ltu:SI (reg:CC 100) (const_int 0))
+			   (match_operand:SI 2 "arith_operand" "rI"))))]
+  ""
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*sgeu_plus_x"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (geu:SI (reg:CC 100) (const_int 0))
+		 (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "subx\t%1, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*x_minus_sgeu"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (geu:SI (reg:CC 100) (const_int 0))))]
+  ""
+  "addx\t%1, -1, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 2 "noov_compare_operator"
+			   [(match_operand 1 "icc_or_fcc_register_operand" "")
+			    (const_int 0)]))]
+  "TARGET_V9
+   && REGNO (operands[1]) == SPARC_ICC_REG
+   && (GET_MODE (operands[1]) == CCXmode
+       /* 32-bit LTU/GEU are better implemented using addx/subx.  */
+       || (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))"
+  [(set (match_dup 0) (const_int 0))
+   (set (match_dup 0)
+	(if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)])
+			 (const_int 1)
+			 (match_dup 0)))]
+  "")
+
+
+;; These control RTL generation for conditional jump insns
+
+(define_expand "cbranchcc4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+		          [(match_operand 1 "compare_operand" "")
+		           (match_operand 2 "const_zero_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+  "")
+
+(define_expand "cbranchsi4"
+  [(use (match_operator 0 "comparison_operator"
+         [(match_operand:SI 1 "compare_operand" "")
+          (match_operand:SI 2 "arith_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+{
+  if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx)
+    operands[1] = force_reg (SImode, operands[1]);
+  emit_conditional_branch_insn (operands);
+  DONE;
+})
+
+(define_expand "cbranchdi4"
+  [(use (match_operator 0 "comparison_operator"
+         [(match_operand:DI 1 "compare_operand" "")
+          (match_operand:DI 2 "arith_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[1]) == ZERO_EXTRACT && operands[2] != const0_rtx)
+    operands[1] = force_reg (DImode, operands[1]);
+  emit_conditional_branch_insn (operands);
+  DONE;
+})
+
+(define_expand "cbranch<F:mode>4"
+  [(use (match_operator 0 "comparison_operator"
+         [(match_operand:F 1 "register_operand" "")
+          (match_operand:F 2 "register_operand" "")]))
+   (use (match_operand 3 ""))]
+  "TARGET_FPU"
+  { emit_conditional_branch_insn (operands); DONE; })
+
+
+;; Now match both normal and inverted jump.
+
+;; XXX fpcmp nop braindamage
+(define_insn "*normal_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "noov_compare_operator"
+				      [(reg 100) (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+{
+  return output_cbranch (operands[0], operands[1], 1, 0,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "icc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*inverted_branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "noov_compare_operator"
+				      [(reg 100) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+{
+  return output_cbranch (operands[0], operands[1], 1, 1,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "icc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*normal_fp_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFP 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 0,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*inverted_fp_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFP 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 1,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*normal_fpe_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFPE 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 0,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; XXX fpcmp nop braindamage
+(define_insn "*inverted_fpe_branch"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(match_operand:CCFPE 0 "fcc_register_operand" "c")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return output_cbranch (operands[1], operands[2], 2, 1,
+			 final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			 insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "fcc")])
+
+;; SPARC V9-specific jump insns.  None of these are guaranteed to be
+;; in the architecture.
+
+;; There are no 32 bit brreg insns.
+
+;; XXX
+(define_insn "*normal_int_branch_sp64"
+  [(set (pc)
+	(if_then_else (match_operator 0 "v9_register_compare_operator"
+				      [(match_operand:DI 1 "register_operand" "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_ARCH64"
+{
+  return output_v9branch (operands[0], operands[2], 1, 2, 0,
+			  final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			  insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "reg")])
+
+;; XXX
+(define_insn "*inverted_int_branch_sp64"
+  [(set (pc)
+	(if_then_else (match_operator 0 "v9_register_compare_operator"
+				      [(match_operand:DI 1 "register_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  "TARGET_ARCH64"
+{
+  return output_v9branch (operands[0], operands[2], 1, 2, 1,
+			  final_sequence && INSN_ANNULLED_BRANCH_P (insn),
+			  insn);
+}
+  [(set_attr "type" "branch")
+   (set_attr "branch_type" "reg")])
+
+
+;; Load in operand 0 the (absolute) address of operand 1, which is a symbolic
+;; value subject to a PC-relative relocation.  Operand 2 is a helper function
+;; that adds the PC value at the call point to register #(operand 3).
+
+(define_insn "load_pcrel_sym<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand:P 1 "symbolic_operand" "")
+		   (match_operand:P 2 "call_address_operand" "")
+		   (match_operand:P 3 "const_int_operand" "")] UNSPEC_LOAD_PCREL_SYM))
+   (clobber (reg:P 15))]
+  "REGNO (operands[0]) == INTVAL (operands[3])"
+{
+  if (flag_delayed_branch)
+    return "sethi\t%%hi(%a1-4), %0\n\tcall\t%a2\n\t add\t%0, %%lo(%a1+4), %0";
+  else
+    return "sethi\t%%hi(%a1-8), %0\n\tadd\t%0, %%lo(%a1-4), %0\n\tcall\t%a2\n\t nop";
+}
+  [(set (attr "type") (const_string "multi"))
+   (set (attr "length")
+	(if_then_else (eq_attr "delayed_branch" "true")
+		      (const_int 3)
+		      (const_int 4)))])
+
+
+;; Integer move instructions
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (QImode, operands))
+    DONE;
+})
+
+(define_insn "*movqi_insn"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m")
+	(match_operand:QI 1 "input_operand"   "rI,m,rJ"))]
+  "(register_operand (operands[0], QImode)
+    || register_or_zero_operand (operands[1], QImode))"
+  "@
+   mov\t%1, %0
+   ldub\t%1, %0
+   stb\t%r1, %0"
+  [(set_attr "type" "*,load,store")
+   (set_attr "us3load_type" "*,3cycle,*")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (HImode, operands))
+    DONE;
+})
+
+(define_insn "*movhi_insn"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "input_operand"   "rI,K,m,rJ"))]
+  "(register_operand (operands[0], HImode)
+    || register_or_zero_operand (operands[1], HImode))"
+  "@
+   mov\t%1, %0
+   sethi\t%%hi(%a1), %0
+   lduh\t%1, %0
+   sth\t%r1, %0"
+  [(set_attr "type" "*,*,load,store")
+   (set_attr "us3load_type" "*,*,3cycle,*")])
+
+;; We always work with constants here.
+(define_insn "*movhi_lo_sum"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ior:HI (match_operand:HI 1 "register_operand" "%r")
+                (match_operand:HI 2 "small_int_operand" "I")))]
+  ""
+  "or\t%1, %2, %0")
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (SImode, operands))
+    DONE;
+})
+
+(define_insn "*movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m,!f,!f,!m,d")
+	(match_operand:SI 1 "input_operand"   "rI,K,m,rJ,f,m,f,J"))]
+  "(register_operand (operands[0], SImode)
+    || register_or_zero_operand (operands[1], SImode))"
+  "@
+   mov\t%1, %0
+   sethi\t%%hi(%a1), %0
+   ld\t%1, %0
+   st\t%r1, %0
+   fmovs\t%1, %0
+   ld\t%1, %0
+   st\t%1, %0
+   fzeros\t%0"
+  [(set_attr "type" "*,*,load,store,fpmove,fpload,fpstore,fga")])
+
+(define_insn "*movsi_lo_sum"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+                   (match_operand:SI 2 "immediate_operand" "in")))]
+  ""
+  "or\t%1, %%lo(%a2), %0")
+
+(define_insn "*movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" "in")))]
+  ""
+  "sethi\t%%hi(%a1), %0")
+
+;; The next two patterns must wrap the SYMBOL_REF in an UNSPEC
+;; so that CSE won't optimize the address computation away.
+(define_insn "movsi_lo_sum_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+                   (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] UNSPEC_MOVE_PIC)))]
+  "flag_pic"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "xor\t%1, %%gdop_lox10(%a2), %0";
+#else
+  return "or\t%1, %%lo(%a2), %0";
+#endif
+})
+
+(define_insn "movsi_high_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))]
+  "flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "sethi\t%%gdop_hix22(%a1), %0";
+#else
+  return "sethi\t%%hi(%a1), %0";
+#endif
+})
+
+(define_insn "movsi_pic_gotdata_op"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+	            (match_operand:SI 2 "register_operand" "r")
+		    (match_operand 3 "symbolic_operand" "")] UNSPEC_MOVE_GOTDATA))]
+  "flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "ld\t[%1 + %2], %0, %%gdop(%a3)";
+#else
+  return "ld\t[%1 + %2], %0";
+#endif
+}
+  [(set_attr "type" "load")])
+
+(define_expand "movsi_pic_label_ref"
+  [(set (match_dup 3) (high:SI
+     (unspec:SI [(match_operand:SI 1 "label_ref_operand" "")
+		 (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_dup 4) (lo_sum:SI (match_dup 3)
+     (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 5) (match_dup 4)))]
+  "flag_pic"
+{
+  crtl->uses_pic_offset_table = 1;
+  operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  if (!can_create_pseudo_p ())
+    {
+      operands[3] = operands[0];
+      operands[4] = operands[0];
+    }
+  else
+    {
+      operands[3] = gen_reg_rtx (SImode);
+      operands[4] = gen_reg_rtx (SImode);
+    }
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_insn "*movsi_high_pic_label_ref"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (high:SI
+        (unspec:SI [(match_operand:SI 1 "label_ref_operand" "")
+		    (match_operand:SI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "flag_pic"
+  "sethi\t%%hi(%a2-(%a1-.)), %0")
+
+(define_insn "*movsi_lo_sum_pic_label_ref"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+        (unspec:SI [(match_operand:SI 2 "label_ref_operand" "")
+		    (match_operand:SI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "flag_pic"
+  "or\t%1, %%lo(%a3-(%a2-.)), %0")
+
+;; Set up the PIC register for VxWorks.
+
+(define_expand "vxworks_load_got"
+  [(set (match_dup 0)
+	(high:SI (match_dup 1)))
+   (set (match_dup 0)
+	(mem:SI (lo_sum:SI (match_dup 0) (match_dup 1))))
+   (set (match_dup 0)
+	(mem:SI (lo_sum:SI (match_dup 0) (match_dup 2))))]
+  "TARGET_VXWORKS_RTP"
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_BASE);
+  operands[2] = gen_rtx_SYMBOL_REF (SImode, VXWORKS_GOTT_INDEX);
+})
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (DImode, operands))
+    DONE;
+})
+
+;; Be careful, fmovd does not exist when !v9.
+;; We match MEM moves directly when we have correct even
+;; numbered registers, but fall into splits otherwise.
+;; The constraint ordering here is really important to
+;; avoid insane problems in reload, especially for patterns
+;; of the form:
+;;
+;; (set (mem:DI (plus:SI (reg:SI 30 %fp)
+;;                       (const_int -5016)))
+;;      (reg:DI 2 %g2))
+;;
+
+(define_insn "*movdi_insn_sp32"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+				"=o,T,U,o,r,r,r,?T,?f,?f,?o,?f")
+        (match_operand:DI 1 "input_operand"
+				" J,U,T,r,o,i,r, f, T, o, f, f"))]
+  "! TARGET_V9
+   && (register_operand (operands[0], DImode)
+       || register_or_zero_operand (operands[1], DImode))"
+  "@
+   #
+   std\t%1, %0
+   ldd\t%1, %0
+   #
+   #
+   #
+   #
+   std\t%1, %0
+   ldd\t%1, %0
+   #
+   #
+   #"
+  [(set_attr "type" "store,store,load,*,*,*,*,fpstore,fpload,*,*,*")
+   (set_attr "length" "2,*,*,2,2,2,2,*,*,2,2,2")])
+
+(define_insn "*movdi_insn_sp32_v9"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+					"=T,o,T,U,o,r,r,r,?T,?f,?f,?o,?e,?e,?W")
+        (match_operand:DI 1 "input_operand"
+					" J,J,U,T,r,o,i,r, f, T, o, f, e, W, e"))]
+  "! TARGET_ARCH64
+   && TARGET_V9
+   && (register_operand (operands[0], DImode)
+       || register_or_zero_operand (operands[1], DImode))"
+  "@
+   stx\t%%g0, %0
+   #
+   std\t%1, %0
+   ldd\t%1, %0
+   #
+   #
+   #
+   #
+   std\t%1, %0
+   ldd\t%1, %0
+   #
+   #
+   fmovd\\t%1, %0
+   ldd\\t%1, %0
+   std\\t%1, %0"
+  [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,fpload,fpstore")
+   (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,*,*,*")
+   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*")])
+
+(define_insn "*movdi_insn_sp64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m,?e,?e,?W,b")
+        (match_operand:DI 1 "input_operand"   "rI,N,m,rJ,e,W,e,J"))]
+  "TARGET_ARCH64
+   && (register_operand (operands[0], DImode)
+       || register_or_zero_operand (operands[1], DImode))"
+  "@
+   mov\t%1, %0
+   sethi\t%%hi(%a1), %0
+   ldx\t%1, %0
+   stx\t%r1, %0
+   fmovd\t%1, %0
+   ldd\t%1, %0
+   std\t%1, %0
+   fzero\t%0"
+  [(set_attr "type" "*,*,load,store,fpmove,fpload,fpstore,fga")
+   (set_attr "fptype" "*,*,*,*,double,*,*,double")])
+
+(define_expand "movdi_pic_label_ref"
+  [(set (match_dup 3) (high:DI
+     (unspec:DI [(match_operand:DI 1 "label_ref_operand" "")
+                 (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_dup 4) (lo_sum:DI (match_dup 3)
+     (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_MOVE_PIC_LABEL)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (minus:DI (match_dup 5) (match_dup 4)))]
+  "TARGET_ARCH64 && flag_pic"
+{
+  crtl->uses_pic_offset_table = 1;
+  operands[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  if (!can_create_pseudo_p ())
+    {
+      operands[3] = operands[0];
+      operands[4] = operands[0];
+    }
+  else
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      operands[4] = gen_reg_rtx (DImode);
+    }
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi_high_pic_label_ref"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI
+          (unspec:DI [(match_operand:DI 1 "label_ref_operand" "")
+                      (match_operand:DI 2 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "TARGET_ARCH64 && flag_pic"
+  "sethi\t%%hi(%a2-(%a1-.)), %0")
+
+(define_insn "*movdi_lo_sum_pic_label_ref"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+        (unspec:DI [(match_operand:DI 2 "label_ref_operand" "")
+                    (match_operand:DI 3 "" "")] UNSPEC_MOVE_PIC_LABEL)))]
+  "TARGET_ARCH64 && flag_pic"
+  "or\t%1, %%lo(%a3-(%a2-.)), %0")
+
+;; SPARC-v9 code model support insns.  See sparc_emit_set_symbolic_const64
+;; in sparc.c to see what is going on here... PIC stuff comes first.
+
+(define_insn "movdi_lo_sum_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "immediate_operand" "in")] UNSPEC_MOVE_PIC)))]
+  "TARGET_ARCH64 && flag_pic"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "xor\t%1, %%gdop_lox10(%a2), %0";
+#else
+  return "or\t%1, %%lo(%a2), %0";
+#endif
+})
+
+(define_insn "movdi_high_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand 1 "" "")] UNSPEC_MOVE_PIC)))]
+  "TARGET_ARCH64 && flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "sethi\t%%gdop_hix22(%a1), %0";
+#else
+  return "sethi\t%%hi(%a1), %0";
+#endif
+})
+
+(define_insn "movdi_pic_gotdata_op"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+	            (match_operand:DI 2 "register_operand" "r")
+		    (match_operand 3 "symbolic_operand" "")] UNSPEC_MOVE_GOTDATA))]
+  "TARGET_ARCH64 && flag_pic && check_pic (1)"
+{
+#ifdef HAVE_AS_SPARC_GOTDATA_OP
+  return "ldx\t[%1 + %2], %0, %%gdop(%a3)";
+#else
+  return "ldx\t[%1 + %2], %0";
+#endif
+}
+  [(set_attr "type" "load")])
+
+(define_insn "*sethi_di_medlow_embmedany_pic"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (match_operand:DI 1 "medium_pic_operand" "")))]
+  "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "*sethi_di_medlow"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (match_operand:DI 1 "symbolic_operand" "")))]
+  "TARGET_CM_MEDLOW && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "*losum_di_medlow"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDLOW"
+  "or\t%1, %%lo(%a2), %0")
+
+(define_insn "seth44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETH44)))]
+  "TARGET_CM_MEDMID"
+  "sethi\t%%h44(%a1), %0")
+
+(define_insn "setm44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] UNSPEC_SETM44)))]
+  "TARGET_CM_MEDMID"
+  "or\t%1, %%m44(%a2), %0")
+
+(define_insn "setl44"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDMID"
+  "or\t%1, %%l44(%a2), %0")
+
+(define_insn "sethh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETHH)))]
+  "TARGET_CM_MEDANY"
+  "sethi\t%%hh(%a1), %0")
+
+(define_insn "setlm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")] UNSPEC_SETLM)))]
+  "TARGET_CM_MEDANY"
+  "sethi\t%%lm(%a1), %0")
+
+(define_insn "sethm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] UNSPEC_EMB_SETHM)))]
+  "TARGET_CM_MEDANY"
+  "or\t%1, %%hm(%a2), %0")
+
+(define_insn "setlo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "symbolic_operand" "")))]
+  "TARGET_CM_MEDANY"
+  "or\t%1, %%lo(%a2), %0")
+
+(define_insn "embmedany_sethi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "data_segment_operand" "")] UNSPEC_EMB_HISUM)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "embmedany_losum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "data_segment_operand" "")))]
+  "TARGET_CM_EMBMEDANY"
+  "add\t%1, %%lo(%a2), %0")
+
+(define_insn "embmedany_brsum"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_EMB_HISUM))]
+  "TARGET_CM_EMBMEDANY"
+  "add\t%1, %_, %0")
+
+(define_insn "embmedany_textuhi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] UNSPEC_EMB_TEXTUHI)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\t%%uhi(%a1), %0")
+
+(define_insn "embmedany_texthi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")] UNSPEC_EMB_TEXTHI)))]
+  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "sethi\t%%hi(%a1), %0")
+
+(define_insn "embmedany_textulo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")] UNSPEC_EMB_TEXTULO)))]
+  "TARGET_CM_EMBMEDANY"
+  "or\t%1, %%ulo(%a2), %0")
+
+(define_insn "embmedany_textlo"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+                   (match_operand:DI 2 "text_segment_operand" "")))]
+  "TARGET_CM_EMBMEDANY"
+  "or\t%1, %%lo(%a2), %0")
+
+;; Now some patterns to help reload out a bit.
+(define_expand "reload_indi"
+  [(parallel [(match_operand:DI 0 "register_operand" "=r")
+              (match_operand:DI 1 "immediate_operand" "")
+              (match_operand:TI 2 "register_operand" "=&r")])]
+  "(TARGET_CM_MEDANY
+    || TARGET_CM_EMBMEDANY)
+   && ! flag_pic"
+{
+  sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "reload_outdi"
+  [(parallel [(match_operand:DI 0 "register_operand" "=r")
+              (match_operand:DI 1 "immediate_operand" "")
+              (match_operand:TI 2 "register_operand" "=&r")])]
+  "(TARGET_CM_MEDANY
+    || TARGET_CM_EMBMEDANY)
+   && ! flag_pic"
+{
+  sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+;; Split up putting CONSTs and REGs into DI regs when !arch64
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "const_int_operand" ""))]
+  "! TARGET_ARCH64 && reload_completed"
+  [(clobber (const_int 0))]
+{
+#if HOST_BITS_PER_WIDE_INT == 32
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			(INTVAL (operands[1]) < 0) ?
+			constm1_rtx :
+			const0_rtx));
+  emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			operands[1]));
+#else
+  unsigned int low, high;
+
+  low = trunc_int_for_mode (INTVAL (operands[1]), SImode);
+  high = trunc_int_for_mode (INTVAL (operands[1]) >> 32, SImode);
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), GEN_INT (high)));
+
+  /* Slick... but this trick loses if this subreg constant part
+     can be done in one insn.  */
+  if (low == high
+      && ! SPARC_SETHI32_P (high)
+      && ! SPARC_SIMM13_P (high))
+    emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			  gen_highpart (SImode, operands[0])));
+  else
+    emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), GEN_INT (low)));
+#endif
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "const_double_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+           && ((GET_CODE (operands[0]) == REG
+                && REGNO (operands[0]) < 32)
+               || (GET_CODE (operands[0]) == SUBREG
+                   && GET_CODE (SUBREG_REG (operands[0])) == REG
+                   && REGNO (SUBREG_REG (operands[0])) < 32))))"
+  [(clobber (const_int 0))]
+{
+  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]),
+			GEN_INT (CONST_DOUBLE_HIGH (operands[1]))));
+
+  /* Slick... but this trick loses if this subreg constant part
+     can be done in one insn.  */
+  if (CONST_DOUBLE_LOW (operands[1]) == CONST_DOUBLE_HIGH (operands[1])
+      && ! SPARC_SETHI32_P (CONST_DOUBLE_HIGH (operands[1]))
+      && ! SPARC_SIMM13_P (CONST_DOUBLE_HIGH (operands[1])))
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			    gen_highpart (SImode, operands[0])));
+    }
+  else
+    {
+      emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			    GEN_INT (CONST_DOUBLE_LOW (operands[1]))));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+           && ((GET_CODE (operands[0]) == REG
+                && REGNO (operands[0]) < 32)
+               || (GET_CODE (operands[0]) == SUBREG
+                   && GET_CODE (SUBREG_REG (operands[0])) == REG
+                   && REGNO (SUBREG_REG (operands[0])) < 32))))"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (SImode, set_dest);
+  dest2 = gen_lowpart (SImode, set_dest);
+  src1 = gen_highpart (SImode, set_src);
+  src2 = gen_lowpart (SImode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movsi (dest2, src2));
+      emit_insn (gen_movsi (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movsi (dest1, src1));
+      emit_insn (gen_movsi (dest2, src2));
+    }
+  DONE;
+})
+
+;; Now handle the cases of memory moves from/to non-even
+;; DI mode register pairs.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "memory_operand" ""))]
+  "(! TARGET_ARCH64
+    && reload_completed
+    && sparc_splitdi_legitimate (operands[0], operands[1]))"
+  [(clobber (const_int 0))]
+{
+  rtx word0 = adjust_address (operands[1], SImode, 0);
+  rtx word1 = adjust_address (operands[1], SImode, 4);
+  rtx high_part = gen_highpart (SImode, operands[0]);
+  rtx low_part = gen_lowpart (SImode, operands[0]);
+
+  if (reg_overlap_mentioned_p (high_part, word1))
+    {
+      emit_insn (gen_movsi (low_part, word1));
+      emit_insn (gen_movsi (high_part, word0));
+    }
+  else
+    {
+      emit_insn (gen_movsi (high_part, word0));
+      emit_insn (gen_movsi (low_part, word1));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "register_operand" ""))]
+  "(! TARGET_ARCH64
+    && reload_completed
+    && sparc_splitdi_legitimate (operands[1], operands[0]))"
+  [(clobber (const_int 0))]
+{
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0),
+			gen_highpart (SImode, operands[1])));
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4),
+			gen_lowpart (SImode, operands[1])));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+        (match_operand:DI 1 "const_zero_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+	   && ! mem_min_alignment (operands[0], 8)))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0), const0_rtx));
+  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4), const0_rtx));
+  DONE;
+})
+
+
+;; Floating point and vector move instructions
+
+;; Yes, you guessed it right, the former movsf expander.
+(define_expand "mov<V32:mode>"
+  [(set (match_operand:V32 0 "nonimmediate_operand" "")
+	(match_operand:V32 1 "general_operand" ""))]
+  "<V32:MODE>mode == SFmode || TARGET_VIS"
+{
+  if (sparc_expand_move (<V32:MODE>mode, operands))
+    DONE;
+})
+
+(define_insn "*movsf_insn"
+  [(set (match_operand:V32 0 "nonimmediate_operand" "=d,f,  *r,*r,*r,f,*r,m,   m")
+	(match_operand:V32 1 "input_operand"        "GY,f,*rRY, Q, S,m, m,f,*rGY"))]
+  "TARGET_FPU
+   && (register_operand (operands[0], <V32:MODE>mode)
+       || register_or_zero_operand (operands[1], <V32:MODE>mode))"
+{
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && (which_alternative == 2
+          || which_alternative == 3
+          || which_alternative == 4))
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      operands[1] = GEN_INT (i);
+    }
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "fzeros\t%0";
+    case 1:
+      return "fmovs\t%1, %0";
+    case 2:
+      return "mov\t%1, %0";
+    case 3:
+      return "sethi\t%%hi(%a1), %0";
+    case 4:
+      return "#";
+    case 5:
+    case 6:
+      return "ld\t%1, %0";
+    case 7:
+    case 8:
+      return "st\t%r1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fga,fpmove,*,*,*,fpload,load,fpstore,store")])
+
+;; Exactly the same as above, except that all `f' cases are deleted.
+;; This is necessary to prevent reload from ever trying to use a `f' reg
+;; when -mno-fpu.
+
+(define_insn "*movsf_insn_no_fpu"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r,r, m")
+	(match_operand:SF 1 "input_operand"        "rR,Q,S,m,rG"))]
+  "! TARGET_FPU
+   && (register_operand (operands[0], SFmode)
+       || register_or_zero_operand (operands[1], SFmode))"
+{
+  if (GET_CODE (operands[1]) == CONST_DOUBLE
+      && (which_alternative == 0
+          || which_alternative == 1
+          || which_alternative == 2))
+    {
+      REAL_VALUE_TYPE r;
+      long i;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+      REAL_VALUE_TO_TARGET_SINGLE (r, i);
+      operands[1] = GEN_INT (i);
+    }
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov\t%1, %0";
+    case 1:
+      return "sethi\t%%hi(%a1), %0";
+    case 2:
+      return "#";
+    case 3:
+      return "ld\t%1, %0";
+    case 4:
+      return "st\t%r1, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "*,*,*,load,store")])
+
+;; The following 3 patterns build SFmode constants in integer registers.
+
+(define_insn "*movsf_lo_sum"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (lo_sum:SF (match_operand:SF 1 "register_operand" "r")
+                   (match_operand:SF 2 "fp_const_high_losum_operand" "S")))]
+  ""
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[2]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[2] = GEN_INT (i);
+  return "or\t%1, %%lo(%a2), %0";
+})
+
+(define_insn "*movsf_high"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (high:SF (match_operand:SF 1 "fp_const_high_losum_operand" "S")))]
+  ""
+{
+  REAL_VALUE_TYPE r;
+  long i;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (r, i);
+  operands[1] = GEN_INT (i);
+  return "sethi\t%%hi(%1), %0";
+})
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "fp_const_high_losum_operand" ""))]
+  "REG_P (operands[0]) && REGNO (operands[0]) < 32"
+  [(set (match_dup 0) (high:SF (match_dup 1)))
+   (set (match_dup 0) (lo_sum:SF (match_dup 0) (match_dup 1)))])
+
+;; Yes, you again guessed it right, the former movdf expander.
+(define_expand "mov<V64:mode>"
+  [(set (match_operand:V64 0 "nonimmediate_operand" "")
+	(match_operand:V64 1 "general_operand" ""))]
+  "<V64:MODE>mode == DFmode || TARGET_VIS"
+{
+  if (sparc_expand_move (<V64:MODE>mode, operands))
+    DONE;
+})
+
+;; Be careful, fmovd does not exist when !v9.
+(define_insn "*movdf_insn_sp32"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "= e,W,U,T,o,e,  *r, o,  e,o")
+	(match_operand:DF 1 "input_operand"        "W#F,e,T,U,G,e,*rFo,*r,o#F,e"))]
+  "TARGET_FPU
+   && ! TARGET_V9
+   && (register_operand (operands[0], DFmode)
+       || register_or_zero_operand (operands[1], DFmode))"
+  "@
+  ldd\t%1, %0
+  std\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  #
+  #
+  #
+  #
+  #
+  #"
+ [(set_attr "type" "fpload,fpstore,load,store,*,*,*,*,*,*")
+  (set_attr "length" "*,*,*,*,2,2,2,2,2,2")])
+
+(define_insn "*movdf_insn_sp32_no_fpu"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,o, r,o")
+	(match_operand:DF 1 "input_operand"        " T,U,G,ro,r"))]
+  "! TARGET_FPU
+   && ! TARGET_V9
+   && (register_operand (operands[0], DFmode)
+       || register_or_zero_operand (operands[1], DFmode))"
+  "@
+  ldd\t%1, %0
+  std\t%1, %0
+  #
+  #
+  #"
+  [(set_attr "type" "load,store,*,*,*")
+   (set_attr "length" "*,*,2,2,2")])
+
+;; We have available v9 double floats but not 64-bit integer registers.
+(define_insn "*movdf_insn_sp32_v9"
+  [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e,  e, T,W,U,T,  f,   *r,    o")
+	(match_operand:V64 1 "input_operand"        "GY,e,W#F,GY,e,T,U,o#F,*roFD,*rGYf"))]
+  "TARGET_FPU
+   && TARGET_V9
+   && ! TARGET_ARCH64
+   && (register_operand (operands[0], <V64:MODE>mode)
+       || register_or_zero_operand (operands[1], <V64:MODE>mode))"
+  "@
+  fzero\t%0
+  fmovd\t%1, %0
+  ldd\t%1, %0
+  stx\t%r1, %0
+  std\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  #
+  #
+  #"
+  [(set_attr "type" "fga,fpmove,load,store,store,load,store,*,*,*")
+   (set_attr "length" "*,*,*,*,*,*,*,2,2,2")
+   (set_attr "fptype" "double,double,*,*,*,*,*,*,*,*")])
+
+(define_insn "*movdf_insn_sp32_v9_no_fpu"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=U,T,T, r, o")
+	(match_operand:DF 1 "input_operand"        " T,U,G,ro,rG"))]
+  "! TARGET_FPU
+   && TARGET_V9
+   && ! TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_or_zero_operand (operands[1], DFmode))"
+  "@
+  ldd\t%1, %0
+  std\t%1, %0
+  stx\t%r1, %0
+  #
+  #"
+  [(set_attr "type" "load,store,store,*,*")
+   (set_attr "length" "*,*,*,2,2")])
+
+;; We have available both v9 double floats and 64-bit integer registers.
+(define_insn "*movdf_insn_sp64"
+  [(set (match_operand:V64 0 "nonimmediate_operand" "=b,e,  e,W,  *r,*r,   m,*r")
+	(match_operand:V64 1 "input_operand"        "GY,e,W#F,e,*rGY, m,*rGY,FD"))]
+  "TARGET_FPU
+   && TARGET_ARCH64
+   && (register_operand (operands[0], <V64:MODE>mode)
+       || register_or_zero_operand (operands[1], <V64:MODE>mode))"
+  "@
+  fzero\t%0
+  fmovd\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
+  mov\t%r1, %0
+  ldx\t%1, %0
+  stx\t%r1, %0
+  #"
+  [(set_attr "type" "fga,fpmove,load,store,*,load,store,*")
+   (set_attr "length" "*,*,*,*,*,*,*,2")
+   (set_attr "fptype" "double,double,*,*,*,*,*,*")])
+
+(define_insn "*movdf_insn_sp64_no_fpu"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r, m")
+	(match_operand:DF 1 "input_operand"         "r,m,rG"))]
+  "! TARGET_FPU
+   && TARGET_ARCH64
+   && (register_operand (operands[0], DFmode)
+       || register_or_zero_operand (operands[1], DFmode))"
+  "@
+  mov\t%1, %0
+  ldx\t%1, %0
+  stx\t%r1, %0"
+  [(set_attr "type" "*,load,store")])
+
+;; This pattern builds V64mode constants in integer registers.
+(define_split
+  [(set (match_operand:V64 0 "register_operand" "")
+        (match_operand:V64 1 "const_double_or_vector_operand" ""))]
+  "TARGET_FPU
+   && (GET_CODE (operands[0]) == REG
+       && REGNO (operands[0]) < 32)
+   && ! const_zero_operand (operands[1], GET_MODE (operands[0]))
+   && reload_completed"
+  [(clobber (const_int 0))]
+{
+  operands[0] = gen_rtx_raw_REG (DImode, REGNO (operands[0]));
+
+  if (TARGET_ARCH64)
+    {
+#if HOST_BITS_PER_WIDE_INT == 32
+      gcc_unreachable ();
+#else
+      enum machine_mode mode = GET_MODE (operands[1]);
+      rtx tem = simplify_subreg (DImode, operands[1], mode, 0);
+      emit_insn (gen_movdi (operands[0], tem));
+#endif
+    }
+  else
+    {
+      enum machine_mode mode = GET_MODE (operands[1]);
+      rtx hi = simplify_subreg (SImode, operands[1], mode, 0);
+      rtx lo = simplify_subreg (SImode, operands[1], mode, 4);
+
+      gcc_assert (GET_CODE (hi) == CONST_INT);
+      gcc_assert (GET_CODE (lo) == CONST_INT);
+
+      emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), hi));
+
+      /* Slick... but this trick loses if this subreg constant part
+         can be done in one insn.  */
+      if (lo == hi
+	  && ! SPARC_SETHI32_P (INTVAL (hi))
+	  && ! SPARC_SIMM13_P (INTVAL (hi)))
+        {
+          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
+			        gen_highpart (SImode, operands[0])));
+        }
+      else
+        {
+          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lo));
+        }
+    }
+  DONE;
+})
+
+;; Ok, now the splits to handle all the multi insn and
+;; mis-aligned memory address cases.
+;; In these splits please take note that we must be
+;; careful when V9 but not ARCH64 because the integer
+;; register DFmode cases must be handled.
+(define_split
+  [(set (match_operand:V64 0 "register_operand" "")
+        (match_operand:V64 1 "register_operand" ""))]
+  "(! TARGET_V9
+    || (! TARGET_ARCH64
+        && ((GET_CODE (operands[0]) == REG
+             && REGNO (operands[0]) < 32)
+            || (GET_CODE (operands[0]) == SUBREG
+                && GET_CODE (SUBREG_REG (operands[0])) == REG
+                && REGNO (SUBREG_REG (operands[0])) < 32))))
+   && reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+  enum machine_mode half_mode;
+
+  /* We can be expanded for DFmode or integral vector modes.  */
+  if (<V64:MODE>mode == DFmode)
+    half_mode = SFmode;
+  else
+    half_mode = SImode;
+  
+  dest1 = gen_highpart (half_mode, set_dest);
+  dest2 = gen_lowpart (half_mode, set_dest);
+  src1 = gen_highpart (half_mode, set_src);
+  src2 = gen_lowpart (half_mode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_move_insn_1 (dest2, src2);
+      emit_move_insn_1 (dest1, src1);
+    }
+  else
+    {
+      emit_move_insn_1 (dest1, src1);
+      emit_move_insn_1 (dest2, src2);
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V64 0 "register_operand" "")
+	(match_operand:V64 1 "memory_operand" ""))]
+  "reload_completed
+   && ! TARGET_ARCH64
+   && (((REGNO (operands[0]) % 2) != 0)
+       || ! mem_min_alignment (operands[1], 8))
+   && offsettable_memref_p (operands[1])"
+  [(clobber (const_int 0))]
+{
+  enum machine_mode half_mode;
+  rtx word0, word1;
+
+  /* We can be expanded for DFmode or integral vector modes.  */
+  if (<V64:MODE>mode == DFmode)
+    half_mode = SFmode;
+  else
+    half_mode = SImode;
+
+  word0 = adjust_address (operands[1], half_mode, 0);
+  word1 = adjust_address (operands[1], half_mode, 4);
+
+  if (reg_overlap_mentioned_p (gen_highpart (half_mode, operands[0]), word1))
+    {
+      emit_move_insn_1 (gen_lowpart (half_mode, operands[0]), word1);
+      emit_move_insn_1 (gen_highpart (half_mode, operands[0]), word0);
+    }
+  else
+    {
+      emit_move_insn_1 (gen_highpart (half_mode, operands[0]), word0);
+      emit_move_insn_1 (gen_lowpart (half_mode, operands[0]), word1);
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V64 0 "memory_operand" "")
+	(match_operand:V64 1 "register_operand" ""))]
+  "reload_completed
+   && ! TARGET_ARCH64
+   && (((REGNO (operands[1]) % 2) != 0)
+       || ! mem_min_alignment (operands[0], 8))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  enum machine_mode half_mode;
+  rtx word0, word1;
+
+  /* We can be expanded for DFmode or integral vector modes.  */
+  if (<V64:MODE>mode == DFmode)
+    half_mode = SFmode;
+  else
+    half_mode = SImode;
+
+  word0 = adjust_address (operands[0], half_mode, 0);
+  word1 = adjust_address (operands[0], half_mode, 4);
+
+  emit_move_insn_1 (word0, gen_highpart (half_mode, operands[1]));
+  emit_move_insn_1 (word1, gen_lowpart (half_mode, operands[1]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V64 0 "memory_operand" "")
+        (match_operand:V64 1 "const_zero_operand" ""))]
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+	   && ! mem_min_alignment (operands[0], 8)))
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  enum machine_mode half_mode;
+  rtx dest1, dest2;
+
+  /* We can be expanded for DFmode or integral vector modes.  */
+  if (<V64:MODE>mode == DFmode)
+    half_mode = SFmode;
+  else
+    half_mode = SImode;
+
+  dest1 = adjust_address (operands[0], half_mode, 0);
+  dest2 = adjust_address (operands[0], half_mode, 4);
+
+  emit_move_insn_1 (dest1, CONST0_RTX (half_mode));
+  emit_move_insn_1 (dest2, CONST0_RTX (half_mode));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V64 0 "register_operand" "")
+        (match_operand:V64 1 "const_zero_operand" ""))]
+  "reload_completed
+   && ! TARGET_ARCH64
+   && ((GET_CODE (operands[0]) == REG
+	&& REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(clobber (const_int 0))]
+{
+  enum machine_mode half_mode;
+  rtx set_dest = operands[0];
+  rtx dest1, dest2;
+
+  /* We can be expanded for DFmode or integral vector modes.  */
+  if (<V64:MODE>mode == DFmode)
+    half_mode = SFmode;
+  else
+    half_mode = SImode;
+
+  dest1 = gen_highpart (half_mode, set_dest);
+  dest2 = gen_lowpart (half_mode, set_dest);
+  emit_move_insn_1 (dest1, CONST0_RTX (half_mode));
+  emit_move_insn_1 (dest2, CONST0_RTX (half_mode));
+  DONE;
+})
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(match_operand:TF 1 "general_operand" ""))]
+  ""
+{
+  if (sparc_expand_move (TFmode, operands))
+    DONE;
+})
+
+(define_insn "*movtf_insn_sp32"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,   o,U,  r")
+	(match_operand:TF 1 "input_operand"        " G,oe,GeUr,o,roG"))]
+  "TARGET_FPU
+   && ! TARGET_ARCH64
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "4")])
+
+;; Exactly the same as above, except that all `e' cases are deleted.
+;; This is necessary to prevent reload from ever trying to use a `e' reg
+;; when -mno-fpu.
+
+(define_insn "*movtf_insn_sp32_no_fpu"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o,  r,o")
+	(match_operand:TF 1 "input_operand"        " G,o,U,roG,r"))]
+  "! TARGET_FPU
+   && ! TARGET_ARCH64
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "4")])
+
+(define_insn "*movtf_insn_sp64"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,  o,  r")
+	(match_operand:TF 1 "input_operand"         "G,oe,Ger,roG"))]
+  "TARGET_FPU
+   && TARGET_ARCH64
+   && ! TARGET_HARD_QUAD
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "2")])
+
+(define_insn "*movtf_insn_sp64_hq"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o,  r")
+	(match_operand:TF 1 "input_operand"         "G,e,m,e,rG,roG"))]
+  "TARGET_FPU
+   && TARGET_ARCH64
+   && TARGET_HARD_QUAD
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "@
+  #
+  fmovq\t%1, %0
+  ldq\t%1, %0
+  stq\t%1, %0
+  #
+  #"
+  [(set_attr "type" "*,fpmove,fpload,fpstore,*,*")
+   (set_attr "length" "2,*,*,*,2,2")])
+
+(define_insn "*movtf_insn_sp64_no_fpu"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=  r, o")
+	(match_operand:TF 1 "input_operand"         "orG,rG"))]
+  "! TARGET_FPU
+   && TARGET_ARCH64
+   && (register_operand (operands[0], TFmode)
+       || register_or_zero_operand (operands[1], TFmode))"
+  "#"
+  [(set_attr "length" "2")])
+
+;; Now all the splits to handle multi-insn TF mode moves.
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+        (match_operand:TF 1 "register_operand" ""))]
+  "reload_completed
+   && (! TARGET_ARCH64
+       || (TARGET_FPU
+           && ! TARGET_HARD_QUAD)
+       || ! fp_register_operand (operands[0], TFmode))"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  src1 = gen_df_reg (set_src, 0);
+  src2 = gen_df_reg (set_src, 1);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movdf (dest2, src2));
+      emit_insn (gen_movdf (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_movdf (dest1, src1));
+      emit_insn (gen_movdf (dest2, src2));
+    }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "const_zero_operand" ""))]
+  "reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx dest1, dest2;
+
+  switch (GET_CODE (set_dest))
+    {
+    case REG:
+      dest1 = gen_df_reg (set_dest, 0);
+      dest2 = gen_df_reg (set_dest, 1);
+      break;
+    case MEM:
+      dest1 = adjust_address (set_dest, DFmode, 0);
+      dest2 = adjust_address (set_dest, DFmode, 8);
+      break;
+    default:
+      gcc_unreachable ();      
+    }
+
+  emit_insn (gen_movdf (dest1, CONST0_RTX (DFmode)));
+  emit_insn (gen_movdf (dest2, CONST0_RTX (DFmode)));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TF 0 "register_operand" "")
+        (match_operand:TF 1 "memory_operand" ""))]
+  "(reload_completed
+    && offsettable_memref_p (operands[1])
+    && (! TARGET_ARCH64
+	|| ! TARGET_HARD_QUAD
+	|| ! fp_register_operand (operands[0], TFmode)))"
+  [(clobber (const_int 0))]
+{
+  rtx word0 = adjust_address (operands[1], DFmode, 0);
+  rtx word1 = adjust_address (operands[1], DFmode, 8);
+  rtx set_dest, dest1, dest2;
+
+  set_dest = operands[0];
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+
+  /* Now output, ordering such that we don't clobber any registers
+     mentioned in the address.  */
+  if (reg_overlap_mentioned_p (dest1, word1))
+
+    {
+      emit_insn (gen_movdf (dest2, word1));
+      emit_insn (gen_movdf (dest1, word0));
+    }
+  else
+   {
+      emit_insn (gen_movdf (dest1, word0));
+      emit_insn (gen_movdf (dest2, word1));
+   }
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:TF 0 "memory_operand" "")
+	(match_operand:TF 1 "register_operand" ""))]
+  "(reload_completed
+    && offsettable_memref_p (operands[0])
+    && (! TARGET_ARCH64
+	|| ! TARGET_HARD_QUAD
+	|| ! fp_register_operand (operands[1], TFmode)))"
+  [(clobber (const_int 0))]
+{
+  rtx set_src = operands[1];
+
+  emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 0),
+			gen_df_reg (set_src, 0)));
+  emit_insn (gen_movdf (adjust_address (operands[0], DFmode, 8),
+			gen_df_reg (set_src, 1)));
+  DONE;
+})
+
+
+;; SPARC-V9 conditional move instructions
+
+;; We can handle larger constants here for some flavors, but for now we keep
+;; it simple and only allow those constants supported by all flavors.
+;; Note that emit_conditional_move canonicalizes operands 2,3 so that operand
+;; 3 contains the constant if one is present, but we handle either for
+;; generality (sparc.c puts a constant in operand 2).
+
+(define_expand "mov<I:mode>cc"
+  [(set (match_operand:I 0 "register_operand" "")
+	(if_then_else:I (match_operand 1 "comparison_operator" "")
+			(match_operand:I 2 "arith10_operand" "")
+			(match_operand:I 3 "arith10_operand" "")))]
+  "TARGET_V9 && !(<I:MODE>mode == DImode && TARGET_ARCH32)"
+{
+  rtx cc_reg;
+
+  if (GET_MODE (XEXP (operands[1], 0)) == DImode && !TARGET_ARCH64)
+    FAIL;
+
+  if (GET_MODE (XEXP (operands[1], 0)) == TFmode && !TARGET_HARD_QUAD)
+    operands[1]
+      = sparc_emit_float_lib_cmp (XEXP (operands[1], 0), XEXP (operands[1], 1),
+				  GET_CODE (operands[1]));
+
+  if (XEXP (operands[1], 1) == const0_rtx
+      && GET_CODE (XEXP (operands[1], 0)) == REG
+      && GET_MODE (XEXP (operands[1], 0)) == DImode
+      && v9_regcmp_p (GET_CODE (operands[1])))
+    cc_reg = XEXP (operands[1], 0);
+  else
+    cc_reg = gen_compare_reg (operands[1]);
+
+  operands[1]
+    = gen_rtx_fmt_ee (GET_CODE (operands[1]), GET_MODE (cc_reg), cc_reg,
+		      const0_rtx);
+})
+
+(define_expand "mov<F:mode>cc"
+  [(set (match_operand:F 0 "register_operand" "")
+	(if_then_else:F (match_operand 1 "comparison_operator" "")
+			(match_operand:F 2 "register_operand" "")
+			(match_operand:F 3 "register_operand" "")))]
+  "TARGET_V9 && TARGET_FPU"
+{
+  rtx cc_reg;
+
+  if (GET_MODE (XEXP (operands[1], 0)) == DImode && !TARGET_ARCH64)
+    FAIL;
+
+  if (GET_MODE (XEXP (operands[1], 0)) == TFmode && !TARGET_HARD_QUAD)
+    operands[1]
+      = sparc_emit_float_lib_cmp (XEXP (operands[1], 0), XEXP (operands[1], 1),
+				  GET_CODE (operands[1]));
+
+  if (XEXP (operands[1], 1) == const0_rtx
+      && GET_CODE (XEXP (operands[1], 0)) == REG
+      && GET_MODE (XEXP (operands[1], 0)) == DImode
+      && v9_regcmp_p (GET_CODE (operands[1])))
+    cc_reg = XEXP (operands[1], 0);
+  else
+    cc_reg = gen_compare_reg (operands[1]);
+
+  operands[1]
+    = gen_rtx_fmt_ee (GET_CODE (operands[1]), GET_MODE (cc_reg), cc_reg,
+		      const0_rtx);
+})
+
+;; Conditional move define_insns
+
+(define_insn "*mov<I:mode>_cc_v9"
+  [(set (match_operand:I 0 "register_operand" "=r,r")
+	(if_then_else:I (match_operator 1 "comparison_operator"
+			       [(match_operand 2 "icc_or_fcc_register_operand" "X,X")
+				(const_int 0)])
+			(match_operand:I 3 "arith11_operand" "rL,0")
+			(match_operand:I 4 "arith11_operand" "0,rL")))]
+  "TARGET_V9 && !(<I:MODE>mode == DImode && TARGET_ARCH32)"
+  "@
+   mov%C1\t%x2, %3, %0
+   mov%c1\t%x2, %4, %0"
+  [(set_attr "type" "cmove")])
+
+(define_insn "*mov<I:mode>_cc_reg_sp64"
+  [(set (match_operand:I 0 "register_operand" "=r,r")
+	(if_then_else:I (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r,r")
+				 (const_int 0)])
+			(match_operand:I 3 "arith10_operand" "rM,0")
+			(match_operand:I 4 "arith10_operand" "0,rM")))]
+  "TARGET_ARCH64"
+  "@
+   movr%D1\t%2, %r3, %0
+   movr%d1\t%2, %r4, %0"
+  [(set_attr "type" "cmove")])
+
+(define_insn "*movsf_cc_v9"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(if_then_else:SF (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_register_operand" "X,X")
+				 (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "f,0")
+			 (match_operand:SF 4 "register_operand" "0,f")))]
+  "TARGET_V9 && TARGET_FPU"
+  "@
+   fmovs%C1\t%x2, %3, %0
+   fmovs%c1\t%x2, %4, %0"
+  [(set_attr "type" "fpcmove")])
+
+(define_insn "*movsf_cc_reg_sp64"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(if_then_else:SF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r,r")
+				 (const_int 0)])
+			 (match_operand:SF 3 "register_operand" "f,0")
+			 (match_operand:SF 4 "register_operand" "0,f")))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "@
+   fmovrs%D1\t%2, %3, %0
+   fmovrs%d1\t%2, %4, %0"
+  [(set_attr "type" "fpcrmove")])
+
+;; Named because invoked by movtf_cc_v9
+(define_insn "movdf_cc_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(if_then_else:DF (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_register_operand" "X,X")
+				 (const_int 0)])
+			 (match_operand:DF 3 "register_operand" "e,0")
+			 (match_operand:DF 4 "register_operand" "0,e")))]
+  "TARGET_V9 && TARGET_FPU"
+  "@
+   fmovd%C1\t%x2, %3, %0
+   fmovd%c1\t%x2, %4, %0"
+  [(set_attr "type" "fpcmove")
+   (set_attr "fptype" "double")])
+
+;; Named because invoked by movtf_cc_reg_sp64
+(define_insn "movdf_cc_reg_sp64"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(if_then_else:DF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r,r")
+				 (const_int 0)])
+			 (match_operand:DF 3 "register_operand" "e,0")
+			 (match_operand:DF 4 "register_operand" "0,e")))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "@
+   fmovrd%D1\t%2, %3, %0
+   fmovrd%d1\t%2, %4, %0"
+  [(set_attr "type" "fpcrmove")
+   (set_attr "fptype" "double")])
+
+(define_insn "*movtf_cc_hq_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(if_then_else:TF (match_operator 1 "comparison_operator"
+				[(match_operand 2 "icc_or_fcc_register_operand" "X,X")
+				 (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e,0")
+			 (match_operand:TF 4 "register_operand" "0,e")))]
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
+  "@
+   fmovq%C1\t%x2, %3, %0
+   fmovq%c1\t%x2, %4, %0"
+  [(set_attr "type" "fpcmove")])
+
+(define_insn "*movtf_cc_reg_hq_sp64"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(if_then_else:TF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r,r")
+				 (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e,0")
+			 (match_operand:TF 4 "register_operand" "0,e")))]
+  "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+  "@
+   fmovrq%D1\t%2, %3, %0
+   fmovrq%d1\t%2, %4, %0"
+  [(set_attr "type" "fpcrmove")])
+
+(define_insn_and_split "*movtf_cc_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(if_then_else:TF (match_operator 1 "comparison_operator"
+			    [(match_operand 2 "icc_or_fcc_register_operand" "X,X")
+			     (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e,0")
+			 (match_operand:TF 4 "register_operand" "0,e")))]
+  "TARGET_V9 && TARGET_FPU && !TARGET_HARD_QUAD"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_srca = operands[3];
+  rtx set_srcb = operands[4];
+  int third = rtx_equal_p (set_dest, set_srca);
+  rtx dest1, dest2;
+  rtx srca1, srca2, srcb1, srcb2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  srca1 = gen_df_reg (set_srca, 0);
+  srca2 = gen_df_reg (set_srca, 1);
+  srcb1 = gen_df_reg (set_srcb, 0);
+  srcb2 = gen_df_reg (set_srcb, 1);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if ((third && reg_overlap_mentioned_p (dest1, srcb2))
+      || (!third && reg_overlap_mentioned_p (dest1, srca2)))
+    {
+      emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2], srca2, srcb2));
+      emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2], srca1, srcb1));
+    }
+  else
+    {
+      emit_insn (gen_movdf_cc_v9 (dest1, operands[1], operands[2], srca1, srcb1));
+      emit_insn (gen_movdf_cc_v9 (dest2, operands[1], operands[2], srca2, srcb2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*movtf_cc_reg_sp64"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(if_then_else:TF (match_operator 1 "v9_register_compare_operator"
+				[(match_operand:DI 2 "register_operand" "r,r")
+				 (const_int 0)])
+			 (match_operand:TF 3 "register_operand" "e,0")
+			 (match_operand:TF 4 "register_operand" "0,e")))]
+  "TARGET_ARCH64 && TARGET_FPU && ! TARGET_HARD_QUAD"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_srca = operands[3];
+  rtx set_srcb = operands[4];
+  int third = rtx_equal_p (set_dest, set_srca);
+  rtx dest1, dest2;
+  rtx srca1, srca2, srcb1, srcb2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  srca1 = gen_df_reg (set_srca, 0);
+  srca2 = gen_df_reg (set_srca, 1);
+  srcb1 = gen_df_reg (set_srcb, 0);
+  srcb2 = gen_df_reg (set_srcb, 1);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if ((third && reg_overlap_mentioned_p (dest1, srcb2))
+      || (!third && reg_overlap_mentioned_p (dest1, srca2)))
+    {
+      emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2], srca2, srcb2));
+      emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2], srca1, srcb1));
+    }
+  else
+    {
+      emit_insn (gen_movdf_cc_reg_sp64 (dest1, operands[1], operands[2], srca1, srcb1));
+      emit_insn (gen_movdf_cc_reg_sp64 (dest2, operands[1], operands[2], srca2, srcb2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
+
+
+;; Zero-extension instructions
+
+;; These patterns originally accepted general_operands, however, slightly
+;; better code is generated by only accepting register_operands, and then
+;; letting combine generate the ldu[hb] insns.
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_16 = GEN_INT (16);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_16));
+  emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
+  DONE;
+})
+
+(define_insn "*zero_extendhisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
+  ""
+  "lduh\t%1, %0"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendqihi2_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r,r")
+	(zero_extend:HI (match_operand:QI 1 "input_operand" "r,m")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   and\t%1, 0xff, %0
+   ldub\t%1, %0"
+  [(set_attr "type" "*,load")
+   (set_attr "us3load_type" "*,3cycle")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendqisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "input_operand" "r,m")))]
+  "GET_CODE (operands[1]) != CONST_INT"
+  "@
+   and\t%1, 0xff, %0
+   ldub\t%1, %0"
+  [(set_attr "type" "*,load")
+   (set_attr "us3load_type" "*,3cycle")])
+
+(define_expand "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+  "")
+
+(define_insn "*zero_extendqidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "input_operand" "r,m")))]
+  "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   and\t%1, 0xff, %0
+   ldub\t%1, %0"
+  [(set_attr "type" "*,load")
+   (set_attr "us3load_type" "*,3cycle")])
+
+(define_expand "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+{
+  rtx temp = gen_reg_rtx (DImode);
+  rtx shift_48 = GEN_INT (48);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (DImode);
+      op1_subbyte *= GET_MODE_SIZE (DImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+			  shift_48));
+  emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
+  DONE;
+})
+
+(define_insn "*zero_extendhidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_ARCH64"
+  "lduh\t%1, %0"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+;; ??? Write truncdisi pattern using sra?
+
+(define_expand "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn "*zero_extendsidi2_insn_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:SI 1 "input_operand" "r,m")))]
+  "TARGET_ARCH64 && GET_CODE (operands[1]) != CONST_INT"
+  "@
+   srl\t%1, 0, %0
+   lduw\t%1, %0"
+  [(set_attr "type" "shift,load")])
+
+(define_insn_and_split "*zero_extendsidi2_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx dest1, dest2;
+
+  dest1 = gen_highpart (SImode, operands[0]);
+  dest2 = gen_lowpart (SImode, operands[0]);
+
+  /* Swap the order in case of overlap.  */
+  if (REGNO (dest1) == REGNO (operands[1]))
+    {
+      operands[2] = dest2;
+      operands[3] = operands[1];
+      operands[4] = dest1;
+      operands[5] = const0_rtx;
+    }
+  else
+    {
+      operands[2] = dest1;
+      operands[3] = const0_rtx;
+      operands[4] = dest2;
+      operands[5] = operands[1];
+    }
+}
+  [(set_attr "length" "2")])
+
+;; Simplify comparisons of extended values.
+
+(define_insn "*cmp_zero_extendqisi2"
+  [(set (reg:CC 100)
+	(compare:CC (zero_extend:SI (match_operand:QI 0 "register_operand" "r"))
+		    (const_int 0)))]
+  ""
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_qi"
+  [(set (reg:CC 100)
+	(compare:CC (match_operand:QI 0 "register_operand" "r")
+		    (const_int 0)))]
+  ""
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqisi2_set"
+  [(set (reg:CC 100)
+	(compare:CC (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (match_dup 1)))]
+  ""
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqisi2_andcc_set"
+  [(set (reg:CC 100)
+	(compare:CC (and:SI (match_operand:SI 1 "register_operand" "r")
+			    (const_int 255))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (subreg:QI (match_dup 1) 0)))]
+  ""
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqidi2"
+  [(set (reg:CCX 100)
+	(compare:CCX (zero_extend:DI (match_operand:QI 0 "register_operand" "r"))
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_qi_sp64"
+  [(set (reg:CCX 100)
+	(compare:CCX (match_operand:QI 0 "register_operand" "r")
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqidi2_set"
+  [(set (reg:CCX 100)
+	(compare:CCX (zero_extend:DI (match_operand:QI 1 "register_operand" "r"))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extendqidi2_andcc_set"
+  [(set (reg:CCX 100)
+	(compare:CCX (and:DI (match_operand:DI 1 "register_operand" "r")
+			     (const_int 255))
+		     (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (subreg:QI (match_dup 1) 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+;; Similarly, handle {SI,DI}->QI mode truncation followed by a compare.
+
+(define_insn "*cmp_siqi_trunc"
+  [(set (reg:CC 100)
+	(compare:CC (subreg:QI (match_operand:SI 0 "register_operand" "r") 3)
+		    (const_int 0)))]
+  ""
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_siqi_trunc_set"
+  [(set (reg:CC 100)
+	(compare:CC (subreg:QI (match_operand:SI 1 "register_operand" "r") 3)
+		    (const_int 0)))
+   (set (match_operand:QI 0 "register_operand" "=r")
+	(subreg:QI (match_dup 1) 3))]
+  ""
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_diqi_trunc"
+  [(set (reg:CC 100)
+	(compare:CC (subreg:QI (match_operand:DI 0 "register_operand" "r") 7)
+		    (const_int 0)))]
+  "TARGET_ARCH64"
+  "andcc\t%0, 0xff, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_diqi_trunc_set"
+  [(set (reg:CC 100)
+	(compare:CC (subreg:QI (match_operand:DI 1 "register_operand" "r") 7)
+		    (const_int 0)))
+   (set (match_operand:QI 0 "register_operand" "=r")
+	(subreg:QI (match_dup 1) 7))]
+  "TARGET_ARCH64"
+  "andcc\t%1, 0xff, %0"
+  [(set_attr "type" "compare")])
+
+
+;; Sign-extension instructions
+
+;; These patterns originally accepted general_operands, however, slightly
+;; better code is generated by only accepting register_operands, and then
+;; letting combine generate the lds[hb] insns.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_16 = GEN_INT (16);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_16));
+  emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
+  DONE;
+})
+
+(define_insn "*sign_extendhisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
+  ""
+  "ldsh\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_24 = GEN_INT (24);
+  int op1_subbyte = 0;
+  int op0_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+  if (GET_CODE (operand0) == SUBREG)
+    {
+      op0_subbyte = SUBREG_BYTE (operand0);
+      op0_subbyte /= GET_MODE_SIZE (SImode);
+      op0_subbyte *= GET_MODE_SIZE (SImode);
+      operand0 = XEXP (operand0, 0);
+    }
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_24));
+  if (GET_MODE (operand0) != SImode)
+    operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte);
+  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  DONE;
+})
+
+(define_insn "*sign_extendqihi2_insn"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
+  ""
+  "ldsb\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift_24 = GEN_INT (24);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (SImode);
+      op1_subbyte *= GET_MODE_SIZE (SImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+			  shift_24));
+  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  DONE;
+})
+
+(define_insn "*sign_extendqisi2_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
+  ""
+  "ldsb\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+{
+  rtx temp = gen_reg_rtx (DImode);
+  rtx shift_56 = GEN_INT (56);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (DImode);
+      op1_subbyte *= GET_MODE_SIZE (DImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+			  shift_56));
+  emit_insn (gen_ashrdi3 (operand0, temp, shift_56));
+  DONE;
+})
+
+(define_insn "*sign_extendqidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
+  "TARGET_ARCH64"
+  "ldsb\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+{
+  rtx temp = gen_reg_rtx (DImode);
+  rtx shift_48 = GEN_INT (48);
+  int op1_subbyte = 0;
+
+  if (GET_CODE (operand1) == SUBREG)
+    {
+      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte /= GET_MODE_SIZE (DImode);
+      op1_subbyte *= GET_MODE_SIZE (DImode);
+      operand1 = XEXP (operand1, 0);
+    }
+
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+			  shift_48));
+  emit_insn (gen_ashrdi3 (operand0, temp, shift_48));
+  DONE;
+})
+
+(define_insn "*sign_extendhidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_ARCH64"
+  "ldsh\t%1, %0"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_ARCH64"
+  "")
+
+(define_insn "*sign_extendsidi2_insn"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:SI 1 "input_operand" "r,m")))]
+  "TARGET_ARCH64"
+  "@
+  sra\t%1, 0, %0
+  ldsw\t%1, %0"
+  [(set_attr "type" "shift,sload")
+   (set_attr "us3load_type" "*,3cycle")])
+
+
+;; Special pattern for optimizing bit-field compares.  This is needed
+;; because combine uses this as a canonical form.
+
+(define_insn "*cmp_zero_extract"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "small_int_operand" "I")
+			  (match_operand:SI 2 "small_int_operand" "I"))
+	 (const_int 0)))]
+  "INTVAL (operands[2]) > 19"
+{
+  int len = INTVAL (operands[1]);
+  int pos = 32 - INTVAL (operands[2]) - len;
+  HOST_WIDE_INT mask = ((1 << len) - 1) << pos;
+  operands[1] = GEN_INT (mask);
+  return "andcc\t%0, %1, %%g0";
+}
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_zero_extract_sp64"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (zero_extract:DI (match_operand:DI 0 "register_operand" "r")
+			  (match_operand:SI 1 "small_int_operand" "I")
+			  (match_operand:SI 2 "small_int_operand" "I"))
+	 (const_int 0)))]
+  "TARGET_ARCH64 && INTVAL (operands[2]) > 51"
+{
+  int len = INTVAL (operands[1]);
+  int pos = 64 - INTVAL (operands[2]) - len;
+  HOST_WIDE_INT mask = (((unsigned HOST_WIDE_INT) 1 << len) - 1) << pos;
+  operands[1] = GEN_INT (mask);
+  return "andcc\t%0, %1, %%g0";
+}
+  [(set_attr "type" "compare")])
+
+
+;; Conversions between float, double and long double.
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float_extend:DF
+	 (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fstod\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "extendsftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF
+	 (match_operand:SF 1 "register_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*extendsftf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float_extend:TF
+	 (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fstoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "extenddftf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float_extend:TF
+	 (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*extenddftf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float_extend:TF
+	 (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fdtoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fdtos\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "trunctfsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float_truncate:SF
+	 (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_insn "*trunctfsf2_hq"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float_truncate:SF
+	 (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtos\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "trunctfdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float_truncate:DF
+	 (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_insn "*trunctfdf2_hq"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float_truncate:DF
+	 (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtod\t%1, %0"
+  [(set_attr "type" "fp")])
+
+
+;; Conversion between fixed point and floating point.
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fitos\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float:DF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fitod\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "floatsitf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float:TF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT, operands); DONE;")
+
+(define_insn "*floatsitf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float:TF (match_operand:SI 1 "register_operand" "f")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fitoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "floatunssitf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(unsigned_float:TF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+;; Now the same for 64 bit sources.
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:DI 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU"
+  "fxtos\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "floatunsdisf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_floatunsdi (operands, SFmode); DONE;")
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(float:DF (match_operand:DI 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU"
+  "fxtod\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "floatunsdidf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DI 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_floatunsdi (operands, DFmode); DONE;")
+
+(define_expand "floatditf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(float:TF (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_FPU && TARGET_V9 && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FLOAT, operands); DONE;")
+
+(define_insn "*floatditf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(float:TF (match_operand:DI 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
+  "fxtoq\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "floatunsditf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(unsigned_float:TF (match_operand:DI 1 "register_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+;; Convert a float to an actual integer.
+;; Truncation is performed as part of the conversion.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_FPU"
+  "fstoi\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
+  "TARGET_FPU"
+  "fdtoi\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "fix_trunctfsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(fix:SI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FIX, operands); DONE;")
+
+(define_insn "*fix_trunctfsi2_hq"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(fix:SI (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtoi\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "fixuns_trunctfsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unsigned_fix:SI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;")
+
+;; Now the same, for V9 targets
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+  "TARGET_V9 && TARGET_FPU"
+  "fstox\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "fixuns_truncsfdi2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:SF 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_fixunsdi (operands, SFmode); DONE;")
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
+  "TARGET_V9 && TARGET_FPU"
+  "fdtox\t%1, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_expand "fixuns_truncdfdi2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DF 1 "general_operand" ""))]
+  "TARGET_ARCH64 && TARGET_FPU"
+  "sparc_emit_fixunsdi (operands, DFmode); DONE;")
+
+(define_expand "fix_trunctfdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(fix:DI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_V9 && TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_cvt (FIX, operands); DONE;")
+
+(define_insn "*fix_trunctfdi2_hq"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+	(fix:DI (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
+  "fqtox\t%1, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "fixuns_trunctfdi2"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(unsigned_fix:DI (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && TARGET_ARCH64 && ! TARGET_HARD_QUAD"
+  "emit_tfmode_cvt (UNSIGNED_FIX, operands); DONE;")
+
+
+;; Integer addition/subtraction instructions.
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "arith_double_add_operand" "")))]
+  ""
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
+			  gen_rtx_SET (VOIDmode, operands[0],
+				   gen_rtx_PLUS (DImode, operands[1],
+						 operands[2])),
+			  gen_rtx_CLOBBER (VOIDmode,
+				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*adddi3_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
+		 (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+		   (compare:CC_NOOV (plus:SI (match_dup 4)
+					     (match_dup 5))
+				    (const_int 0)))
+	      (set (match_dup 3)
+		   (plus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+	(plus:SI (plus:SI (match_dup 7)
+			  (match_dup 8))
+		 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[1]);
+  operands[5] = gen_lowpart (SImode, operands[2]);
+  operands[6] = gen_highpart (SImode, operands[0]);
+  operands[7] = gen_highpart_mode (SImode, DImode, operands[1]);
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) < 0)
+	operands[8] = constm1_rtx;
+      else
+	operands[8] = const0_rtx;
+    }
+  else
+#endif
+    operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
+}
+  [(set_attr "length" "2")])
+
+;; LTU here means "carry set"
+(define_insn "addx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))
+		 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  ""
+  "addx\t%1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn_and_split "*addx_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (plus:SI
+                                  (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+                                  (match_operand:SI 2 "arith_operand" "rI"))
+                                 (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+                               (ltu:SI (reg:CC_NOOV 100) (const_int 0))))
+   (set (match_dup 4) (const_int 0))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_highpart_mode (SImode, DImode, operands[1]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*addx_extend_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (plus:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+                                          (match_operand:SI 2 "arith_operand" "rI"))
+                                 (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "addx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn_and_split "*adddi3_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+                 (match_operand:DI 2 "register_operand" "r")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                   (compare:CC_NOOV (plus:SI (match_dup 3) (match_dup 1))
+                                    (const_int 0)))
+              (set (match_dup 5) (plus:SI (match_dup 3) (match_dup 1)))])
+   (set (match_dup 6)
+        (plus:SI (plus:SI (match_dup 4) (const_int 0))
+                 (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[2]);
+   operands[4] = gen_highpart (SImode, operands[2]);
+   operands[5] = gen_lowpart (SImode, operands[0]);
+   operands[6] = gen_highpart (SImode, operands[0]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*adddi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "arith_add_operand" "rI,O")))]
+  "TARGET_ARCH64"
+  "@
+   add\t%1, %2, %0
+   sub\t%1, -%2, %0")
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,d")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,r,d")
+		 (match_operand:SI 2 "arith_add_operand" "rI,O,d")))]
+  ""
+  "@
+   add\t%1, %2, %0
+   sub\t%1, -%2, %0
+   fpadd32s\t%1, %2, %0"
+  [(set_attr "type" "*,*,fga")
+   (set_attr "fptype" "*,*,single")])
+
+(define_insn "*cmp_cc_plus"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (plus:SI (match_operand:SI 0 "arith_operand" "%r")
+				  (match_operand:SI 1 "arith_operand" "rI"))
+			 (const_int 0)))]
+  ""
+  "addcc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_plus"
+  [(set (reg:CCX_NOOV 100)
+	(compare:CCX_NOOV (plus:DI (match_operand:DI 0 "arith_operand" "%r")
+				   (match_operand:DI 1 "arith_operand" "rI"))
+			  (const_int 0)))]
+  "TARGET_ARCH64"
+  "addcc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_plus_set"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+				  (match_operand:SI 2 "arith_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "addcc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_plus_set"
+  [(set (reg:CCX_NOOV 100)
+	(compare:CCX_NOOV (plus:DI (match_operand:DI 1 "arith_operand" "%r")
+				   (match_operand:DI 2 "arith_operand" "rI"))
+			  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARCH64"
+  "addcc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "register_operand" "")
+		  (match_operand:DI 2 "arith_double_add_operand" "")))]
+  ""
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
+			  gen_rtx_SET (VOIDmode, operands[0],
+				   gen_rtx_MINUS (DImode, operands[1],
+						  operands[2])),
+			  gen_rtx_CLOBBER (VOIDmode,
+				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*subdi3_insn_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r")
+		  (match_operand:DI 2 "arith_double_operand" "rHI")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+		   (compare:CC_NOOV (minus:SI (match_dup 4)
+					      (match_dup 5))
+				    (const_int 0)))
+	      (set (match_dup 3)
+		   (minus:SI (match_dup 4) (match_dup 5)))])
+   (set (match_dup 6)
+	(minus:SI (minus:SI (match_dup 7)
+			    (match_dup 8))
+		  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_lowpart (SImode, operands[1]);
+  operands[5] = gen_lowpart (SImode, operands[2]);
+  operands[6] = gen_highpart (SImode, operands[0]);
+  operands[7] = gen_highpart (SImode, operands[1]);
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) < 0)
+	operands[8] = constm1_rtx;
+      else
+	operands[8] = const0_rtx;
+    }
+  else
+#endif
+    operands[8] = gen_highpart_mode (SImode, DImode, operands[2]);
+}
+  [(set_attr "length" "2")])
+
+;; LTU here means "carry set"
+(define_insn "subx"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  ""
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn "*subx_extend_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "TARGET_ARCH64"
+  "subx\t%r1, %2, %0"
+  [(set_attr "type" "ialuX")])
+
+(define_insn_and_split "*subx_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                                            (match_operand:SI 2 "arith_operand" "rI"))
+                                  (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+                                (ltu:SI (reg:CC_NOOV 100) (const_int 0))))
+   (set (match_dup 4) (const_int 0))]
+  "operands[3] = gen_lowpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[0]);"
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*subdi3_extend_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+      (minus:DI (match_operand:DI 1 "register_operand" "r")
+                (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                   (compare:CC_NOOV (minus:SI (match_dup 3) (match_dup 2))
+                                    (const_int 0)))
+              (set (match_dup 5) (minus:SI (match_dup 3) (match_dup 2)))])
+   (set (match_dup 6)
+        (minus:SI (minus:SI (match_dup 4) (const_int 0))
+                  (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+  "operands[3] = gen_lowpart (SImode, operands[1]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_lowpart (SImode, operands[0]);
+   operands[6] = gen_highpart (SImode, operands[0]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*subdi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r,r")
+		  (match_operand:DI 2 "arith_add_operand" "rI,O")))]
+  "TARGET_ARCH64"
+  "@
+   sub\t%1, %2, %0
+   add\t%1, -%2, %0")
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,d")
+	(minus:SI (match_operand:SI 1 "register_operand" "r,r,d")
+		  (match_operand:SI 2 "arith_add_operand" "rI,O,d")))]
+  ""
+  "@
+   sub\t%1, %2, %0
+   add\t%1, -%2, %0
+   fpsub32s\t%1, %2, %0"
+  [(set_attr "type" "*,*,fga")
+   (set_attr "fptype" "*,*,single")])
+
+(define_insn "*cmp_minus_cc"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (minus:SI (match_operand:SI 0 "register_or_zero_operand" "rJ")
+				   (match_operand:SI 1 "arith_operand" "rI"))
+			 (const_int 0)))]
+  ""
+  "subcc\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_minus_ccx"
+  [(set (reg:CCX_NOOV 100)
+	(compare:CCX_NOOV (minus:DI (match_operand:DI 0 "register_operand" "r")
+				    (match_operand:DI 1 "arith_operand" "rI"))
+			  (const_int 0)))]
+  "TARGET_ARCH64"
+  "subcc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "cmp_minus_cc_set"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (minus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+				   (match_operand:SI 2 "arith_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "subcc\t%r1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_minus_ccx_set"
+  [(set (reg:CCX_NOOV 100)
+	(compare:CCX_NOOV (minus:DI (match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "arith_operand" "rI"))
+			  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARCH64"
+  "subcc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+
+;; Integer multiply/divide instructions.
+
+;; The 32-bit multiply/divide instructions are deprecated on v9, but at
+;; least in UltraSPARC I, II and IIi it is a win tick-wise.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "arith_operand" "%r")
+		 (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_HARD_MUL"
+  "smul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_expand "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (match_operand:DI 1 "arith_operand" "")
+		 (match_operand:DI 2 "arith_operand" "")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*muldi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (match_operand:DI 1 "arith_operand" "%r")
+		 (match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "mulx\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+;; V8plus wide multiply.
+;; XXX
+(define_insn "muldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=r,h")
+	(mult:DI (match_operand:DI 1 "arith_operand" "%r,0")
+		 (match_operand:DI 2 "arith_operand" "rI,rI")))
+   (clobber (match_scratch:SI 3 "=&h,X"))
+   (clobber (match_scratch:SI 4 "=&h,X"))]
+  "TARGET_V8PLUS"
+{
+  if (sparc_check_64 (operands[1], insn) <= 0)
+    output_asm_insn ("srl\t%L1, 0, %L1", operands);
+  if (which_alternative == 1)
+    output_asm_insn ("sllx\t%H1, 32, %H1", operands);
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (which_alternative == 1)
+	return "or\t%L1, %H1, %H1\n\tmulx\t%H1, %2, %L0\;srlx\t%L0, 32, %H0";
+      else
+	return "sllx\t%H1, 32, %3\n\tor\t%L1, %3, %3\n\tmulx\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0";
+    }
+  else if (rtx_equal_p (operands[1], operands[2]))
+    {
+      if (which_alternative == 1)
+	return "or\t%L1, %H1, %H1\n\tmulx\t%H1, %H1, %L0\;srlx\t%L0, 32, %H0";
+      else
+	return "sllx\t%H1, 32, %3\n\tor\t%L1, %3, %3\n\tmulx\t%3, %3, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0";
+    }
+  if (sparc_check_64 (operands[2], insn) <= 0)
+    output_asm_insn ("srl\t%L2, 0, %L2", operands);
+  if (which_alternative == 1)
+    return "or\t%L1, %H1, %H1\n\tsllx\t%H2, 32, %L1\n\tor\t%L2, %L1, %L1\n\tmulx\t%H1, %L1, %L0\;srlx\t%L0, 32, %H0";
+  else
+    return "sllx\t%H1, 32, %3\n\tsllx\t%H2, 32, %4\n\tor\t%L1, %3, %3\n\tor\t%L2, %4, %4\n\tmulx\t%3, %4, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "9,8")])
+
+(define_insn "*cmp_mul_set"
+  [(set (reg:CC 100)
+	(compare:CC (mult:SI (match_operand:SI 1 "arith_operand" "%r")
+		    (match_operand:SI 2 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_V8 || TARGET_SPARCLITE || TARGET_DEPRECATED_V8_INSNS"
+  "smulcc\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "arith_operand" ""))))]
+  "TARGET_HARD_MUL"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	emit_insn (gen_const_mulsidi3_v8plus (operands[0], operands[1],
+					      operands[2]));
+      else if (TARGET_ARCH32)
+	emit_insn (gen_const_mulsidi3_sp32 (operands[0], operands[1],
+					    operands[2]));
+      else 
+	emit_insn (gen_const_mulsidi3_sp64 (operands[0], operands[1],
+					    operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_mulsidi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+;; V9 puts the 64-bit product in a 64-bit register.  Only out or global
+;; registers can hold 64-bit values in the V8plus environment.
+;; XXX
+(define_insn "mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
+   smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+;; XXX
+(define_insn "const_mulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (match_operand:DI 2 "small_int_operand" "I,I")))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
+   smul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+;; XXX
+(define_insn "*mulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "smuld\t%1, %2, %L0"
+         : "smul\t%1, %2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "*mulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "smul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+;; Extra pattern, because sign_extend of a constant isn't valid.
+
+;; XXX
+(define_insn "const_mulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "small_int_operand" "I")))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "smuld\t%1, %2, %L0"
+         : "smul\t%1, %2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "const_mulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "small_int_operand" "I")))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "smul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_expand "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+			       (sign_extend:DI (match_operand:SI 2 "arith_operand" "")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL && TARGET_ARCH32"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	{
+	  emit_insn (gen_const_smulsi3_highpart_v8plus (operands[0],
+							operands[1],
+							operands[2],
+							GEN_INT (32)));
+	  DONE;
+	}
+      emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_smulsi3_highpart_v8plus (operands[0], operands[1],
+					      operands[2], GEN_INT (32)));
+      DONE;
+    }
+})
+
+;; XXX
+(define_insn "smulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %0\;srlx\t%0, %3, %0
+   smul\t%1, %2, %4\;srlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
+;; XXX
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(subreg:SI
+	 (lshiftrt:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		   (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+	  (match_operand:SI 3 "small_int_operand" "I,I"))
+	 4))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
+   smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_smulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (match_operand:DI 2 "small_int_operand" "I,I"))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   smul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
+   smul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "*smulsi3_highpart_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (match_operand:DI 2 "small_int_operand" "i"))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" ""))))]
+  "TARGET_HARD_MUL"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	emit_insn (gen_const_umulsidi3_v8plus (operands[0], operands[1],
+					       operands[2]));
+      else if (TARGET_ARCH32)
+	emit_insn (gen_const_umulsidi3_sp32 (operands[0], operands[1],
+					     operands[2]));
+      else 
+	emit_insn (gen_const_umulsidi3_sp64 (operands[0], operands[1],
+					     operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+;; XXX
+(define_insn "umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+   (clobber (match_scratch:SI 3 "=X,&h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
+   umul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+;; XXX
+(define_insn "*umulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "umuld\t%1, %2, %L0"
+         : "umul\t%1, %2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+        (if_then_else (eq_attr "isa" "sparclet")
+                      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "*umulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "umul\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+;; Extra pattern, because sign_extend of a constant isn't valid.
+
+;; XXX
+(define_insn "const_umulsidi3_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "uns_small_int_operand" "")))]
+  "TARGET_HARD_MUL32"
+{
+  return TARGET_SPARCLET
+         ? "umuld\t%1, %s2, %L0"
+         : "umul\t%1, %s2, %L0\n\trd\t%%y, %H0";
+}
+  [(set (attr "type")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_string "imul") (const_string "multi")))
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "sparclet")
+		      (const_int 1) (const_int 2)))])
+
+(define_insn "const_umulsidi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (match_operand:DI 2 "uns_small_int_operand" "")))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "umul\t%1, %s2, %0"
+  [(set_attr "type" "imul")])
+
+;; XXX
+(define_insn "const_umulsidi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h,r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+		 (match_operand:DI 2 "uns_small_int_operand" "")))
+   (clobber (match_scratch:SI 3 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %s2, %L0\n\tsrlx\t%L0, 32, %H0
+   umul\t%1, %s2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2,3")])
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+			       (zero_extend:DI (match_operand:SI 2 "uns_arith_operand" "")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL && TARGET_ARCH32"
+{
+  if (CONSTANT_P (operands[2]))
+    {
+      if (TARGET_V8PLUS)
+	{
+	  emit_insn (gen_const_umulsi3_highpart_v8plus (operands[0],
+							operands[1],
+							operands[2],
+							GEN_INT (32)));
+	  DONE;
+	}
+      emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (TARGET_V8PLUS)
+    {
+      emit_insn (gen_umulsi3_highpart_v8plus (operands[0], operands[1],
+					      operands[2], GEN_INT (32)));
+      DONE;
+    }
+})
+
+;; XXX
+(define_insn "umulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
+   umul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_umulsi3_highpart_v8plus"
+  [(set (match_operand:SI 0 "register_operand" "=h,r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+			       (match_operand:DI 2 "uns_small_int_operand" ""))
+		      (match_operand:SI 3 "small_int_operand" "I,I"))))
+   (clobber (match_scratch:SI 4 "=X,h"))]
+  "TARGET_V8PLUS"
+  "@
+   umul\t%1, %s2, %0\n\tsrlx\t%0, %3, %0
+   umul\t%1, %s2, %4\n\tsrlx\t%4, %3, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "*umulsi3_highpart_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "umul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+;; XXX
+(define_insn "const_umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+			       (match_operand:DI 2 "uns_small_int_operand" ""))
+		      (const_int 32))))]
+  "TARGET_HARD_MUL32"
+  "umul\t%1, %s2, %%g0\n\trd\t%%y, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_expand "divsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "input_operand" "")))
+	      (clobber (match_scratch:SI 3 ""))])]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+{
+  if (TARGET_ARCH64)
+    {
+      operands[3] = gen_reg_rtx(SImode);
+      emit_insn (gen_ashrsi3 (operands[3], operands[1], GEN_INT (31)));
+      emit_insn (gen_divsi3_sp64 (operands[0], operands[1], operands[2],
+				  operands[3]));
+      DONE;
+    }
+})
+
+;; The V8 architecture specifies that there must be at least 3 instructions
+;; between a write to the Y register and a use of it for correct results.
+;; We try to fill one of them with a simple constant or a memory load.
+
+(define_insn "divsi3_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(div:SI (match_operand:SI 1 "register_operand" "r,r,r")
+		(match_operand:SI 2 "input_operand" "rI,K,m")))
+   (clobber (match_scratch:SI 3 "=&r,&r,&r"))]
+  "(TARGET_V8 || TARGET_DEPRECATED_V8_INSNS) && TARGET_ARCH32"
+{
+  output_asm_insn ("sra\t%1, 31, %3", operands);
+  output_asm_insn ("wr\t%3, 0, %%y", operands);
+
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_V9)
+	return "sdiv\t%1, %2, %0";
+      else
+	return "nop\n\tnop\n\tnop\n\tsdiv\t%1, %2, %0";
+    case 1:
+      if (TARGET_V9)
+	return "sethi\t%%hi(%a2), %3\n\tsdiv\t%1, %3, %0";
+      else
+	return "sethi\t%%hi(%a2), %3\n\tnop\n\tnop\n\tsdiv\t%1, %3, %0";
+    case 2:
+      if (TARGET_V9)
+	return "ld\t%2, %3\n\tsdiv\t%1, %3, %0";
+      else
+	return "ld\t%2, %3\n\tnop\n\tnop\n\tsdiv\t%1, %3, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 4) (const_int 6)))])
+
+(define_insn "divsi3_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "input_operand" "rI")))
+   (use (match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "wr\t%%g0, %3, %%y\n\tsdiv\t%1, %2, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_insn "divdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(div:DI (match_operand:DI 1 "register_operand" "r")
+		(match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "sdivx\t%1, %2, %0"
+  [(set_attr "type" "idiv")])
+
+(define_insn "*cmp_sdiv_cc_set"
+  [(set (reg:CC 100)
+	(compare:CC (div:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+{
+  output_asm_insn ("sra\t%1, 31, %3", operands);
+  output_asm_insn ("wr\t%3, 0, %%y", operands);
+
+  if (TARGET_V9)
+    return "sdivcc\t%1, %2, %0";
+  else
+    return "nop\n\tnop\n\tnop\n\tsdivcc\t%1, %2, %0";
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 3) (const_int 6)))])
+
+;; XXX
+(define_expand "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(udiv:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		 (match_operand:SI 2 "input_operand" "")))]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+  "")
+
+;; The V8 architecture specifies that there must be at least 3 instructions
+;; between a write to the Y register and a use of it for correct results.
+;; We try to fill one of them with a simple constant or a memory load.
+
+(define_insn "udivsi3_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r,&r,&r")
+	(udiv:SI (match_operand:SI 1 "nonimmediate_operand" "r,r,r,m")
+		 (match_operand:SI 2 "input_operand" "rI,K,m,r")))]
+  "(TARGET_V8 || TARGET_DEPRECATED_V8_INSNS) && TARGET_ARCH32"
+{
+  output_asm_insn ("wr\t%%g0, 0, %%y", operands);
+
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_V9)
+	return "udiv\t%1, %2, %0";
+      else
+	return "nop\n\tnop\n\tnop\n\tudiv\t%1, %2, %0";
+    case 1:
+      if (TARGET_V9)
+	return "sethi\t%%hi(%a2), %0\n\tudiv\t%1, %0, %0";
+      else
+	return "sethi\t%%hi(%a2), %0\n\tnop\n\tnop\n\tudiv\t%1, %0, %0";
+    case 2:
+      if (TARGET_V9)
+	return "ld\t%2, %0\n\tudiv\t%1, %0, %0";
+      else
+	return "ld\t%2, %0\n\tnop\n\tnop\n\tudiv\t%1, %0, %0";
+    case 3:
+      if (TARGET_V9)
+	return "ld\t%1, %0\n\tudiv\t%0, %2, %0";
+      else
+	return "ld\t%1, %0\n\tnop\n\tnop\n\tudiv\t%0, %2, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 3) (const_int 5)))])
+
+(define_insn "udivsi3_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "nonimmediate_operand" "r")
+		 (match_operand:SI 2 "input_operand" "rI")))]
+  "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
+  "wr\t%%g0, 0, %%y\n\tudiv\t%1, %2, %0"
+  [(set_attr "type" "multi")
+   (set_attr "length" "2")])
+
+(define_insn "udivdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(udiv:DI (match_operand:DI 1 "register_operand" "r")
+		 (match_operand:DI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  "udivx\t%1, %2, %0"
+  [(set_attr "type" "idiv")])
+
+(define_insn "*cmp_udiv_cc_set"
+  [(set (reg:CC 100)
+	(compare:CC (udiv:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:SI 2 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
+{
+  output_asm_insn ("wr\t%%g0, 0, %%y", operands);
+
+  if (TARGET_V9)
+    return "udivcc\t%1, %2, %0";
+  else
+    return "nop\n\tnop\n\tnop\n\tudivcc\t%1, %2, %0";
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "isa" "v9")
+		      (const_int 2) (const_int 5)))])
+
+; sparclet multiply/accumulate insns
+
+(define_insn "*smacsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "%r")
+			  (match_operand:SI 2 "arith_operand" "rI"))
+		 (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_SPARCLET"
+  "smac\t%1, %2, %0"
+  [(set_attr "type" "imul")])
+
+(define_insn "*smacdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (sign_extend:DI
+			   (match_operand:SI 1 "register_operand" "%r"))
+			  (sign_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "register_operand" "0")))]
+  "TARGET_SPARCLET"
+  "smacd\t%1, %2, %L0"
+  [(set_attr "type" "imul")])
+
+(define_insn "*umacdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (mult:DI (zero_extend:DI
+			   (match_operand:SI 1 "register_operand" "%r"))
+			  (zero_extend:DI
+			   (match_operand:SI 2 "register_operand" "r")))
+		 (match_operand:DI 3 "register_operand" "0")))]
+  "TARGET_SPARCLET"
+  "umacd\t%1, %2, %L0"
+  [(set_attr "type" "imul")])
+
+
+;; Boolean instructions.
+
+;; We define DImode `and' so with DImode `not' we can get
+;; DImode `andn'.  Other combinations are possible.
+
+(define_expand "and<V64I:mode>3"
+  [(set (match_operand:V64I 0 "register_operand" "")
+	(and:V64I (match_operand:V64I 1 "arith_double_operand" "")
+		  (match_operand:V64I 2 "arith_double_operand" "")))]
+  ""
+  "")
+
+(define_insn "*and<V64I:mode>3_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(and:V64I (match_operand:V64I 1 "arith_double_operand" "%r,b")
+		  (match_operand:V64I 2 "arith_double_operand" "rHI,b")))]
+  "! TARGET_ARCH64"
+  "@
+  #
+  fand\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*and<V64I:mode>3_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(and:V64I (match_operand:V64I 1 "arith_operand" "%r,b")
+		  (match_operand:V64I 2 "arith_operand" "rI,b")))]
+  "TARGET_ARCH64"
+  "@
+   and\t%1, %2, %0
+   fand\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "and<V32I:mode>3"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(and:V32I (match_operand:V32I 1 "arith_operand" "%r,d")
+		  (match_operand:V32I 2 "arith_operand" "rI,d")))]
+  ""
+  "@
+   and\t%1, %2, %0
+   fands\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "const_compl_high_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+(define_insn_and_split "*and_not_<V64I:mode>_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(and:V64I (not:V64I (match_operand:V64I 1 "register_operand" "%r,b"))
+		  (match_operand:V64I 2 "register_operand" "r,b")))]
+  "! TARGET_ARCH64"
+  "@
+   #
+   fandnot1\t%1, %2, %0"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (and:SI (not:SI (match_dup 7)) (match_dup 8)))]
+  "operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*and_not_<V64I:mode>_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(and:V64I (not:V64I (match_operand:V64I 1 "register_operand" "%r,b"))
+		  (match_operand:V64I 2 "register_operand" "r,b")))]
+  "TARGET_ARCH64"
+  "@
+   andn\t%2, %1, %0
+   fandnot1\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*and_not_<V32I:mode>"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(and:V32I (not:V32I (match_operand:V32I 1 "register_operand" "%r,d"))
+		  (match_operand:V32I 2 "register_operand" "r,d")))]
+  ""
+  "@
+   andn\t%2, %1, %0
+   fandnot1s\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+(define_expand "ior<V64I:mode>3"
+  [(set (match_operand:V64I 0 "register_operand" "")
+	(ior:V64I (match_operand:V64I 1 "arith_double_operand" "")
+		  (match_operand:V64I 2 "arith_double_operand" "")))]
+  ""
+  "")
+
+(define_insn "*ior<V64I:mode>3_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(ior:V64I (match_operand:V64I 1 "arith_double_operand" "%r,b")
+		  (match_operand:V64I 2 "arith_double_operand" "rHI,b")))]
+  "! TARGET_ARCH64"
+  "@
+  #
+  for\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*ior<V64I:mode>3_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(ior:V64I (match_operand:V64I 1 "arith_operand" "%r,b")
+		  (match_operand:V64I 2 "arith_operand" "rI,b")))]
+  "TARGET_ARCH64"
+  "@
+  or\t%1, %2, %0
+  for\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "ior<V32I:mode>3"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(ior:V32I (match_operand:V32I 1 "arith_operand" "%r,d")
+		  (match_operand:V32I 2 "arith_operand" "rI,d")))]
+  ""
+  "@
+   or\t%1, %2, %0
+   fors\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "const_compl_high_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+(define_insn_and_split "*or_not_<V64I:mode>_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(ior:V64I (not:V64I (match_operand:V64I 1 "register_operand" "r,b"))
+		  (match_operand:V64I 2 "register_operand" "r,b")))]
+  "! TARGET_ARCH64"
+  "@
+   #
+   fornot1\t%1, %2, %0"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))
+   (set (match_dup 6) (ior:SI (not:SI (match_dup 7)) (match_dup 8)))]
+  "operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*or_not_<V64I:mode>_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(ior:V64I (not:V64I (match_operand:V64I 1 "register_operand" "r,b"))
+		  (match_operand:V64I 2 "register_operand" "r,b")))]
+  "TARGET_ARCH64"
+  "@
+  orn\t%2, %1, %0
+  fornot1\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*or_not_<V32I:mode>"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(ior:V32I (not:V32I (match_operand:V32I 1 "register_operand" "r,d"))
+		  (match_operand:V32I 2 "register_operand" "r,d")))]
+  ""
+  "@
+   orn\t%2, %1, %0
+   fornot1s\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+(define_expand "xor<V64I:mode>3"
+  [(set (match_operand:V64I 0 "register_operand" "")
+	(xor:V64I (match_operand:V64I 1 "arith_double_operand" "")
+		  (match_operand:V64I 2 "arith_double_operand" "")))]
+  ""
+  "")
+
+(define_insn "*xor<V64I:mode>3_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(xor:V64I (match_operand:V64I 1 "arith_double_operand" "%r,b")
+		  (match_operand:V64I 2 "arith_double_operand" "rHI,b")))]
+  "! TARGET_ARCH64"
+  "@
+  #
+  fxor\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*xor<V64I:mode>3_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(xor:V64I (match_operand:V64I 1 "arith_operand" "%rJ,b")
+		  (match_operand:V64I 2 "arith_operand" "rI,b")))]
+  "TARGET_ARCH64"
+  "@
+  xor\t%r1, %2, %0
+  fxor\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "xor<V32I:mode>3"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(xor:V32I (match_operand:V32I 1 "arith_operand" "%rJ,d")
+		  (match_operand:V32I 2 "arith_operand" "rI,d")))]
+  ""
+  "@
+   xor\t%r1, %2, %0
+   fxors\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "const_compl_high_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+   ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (xor:SI (match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "const_compl_high_operand" ""))))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  ""
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))]
+{
+  operands[4] = GEN_INT (~INTVAL (operands[2]));
+})
+
+;; Split DImode logical operations requiring two instructions.
+(define_split
+  [(set (match_operand:V64I 0 "register_operand" "")
+	(match_operator:V64I 1 "cc_arith_operator"	; AND, IOR, XOR
+			   [(match_operand:V64I 2 "register_operand" "")
+			    (match_operand:V64I 3 "arith_double_operand" "")]))]
+  "! TARGET_ARCH64
+   && reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
+   (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
+{
+  operands[4] = gen_highpart (SImode, operands[0]);
+  operands[5] = gen_lowpart (SImode, operands[0]);
+  operands[6] = gen_highpart (SImode, operands[2]);
+  operands[7] = gen_lowpart (SImode, operands[2]);
+#if HOST_BITS_PER_WIDE_INT == 32
+  if (GET_CODE (operands[3]) == CONST_INT && <V64I:MODE>mode == DImode)
+    {
+      if (INTVAL (operands[3]) < 0)
+	operands[8] = constm1_rtx;
+      else
+	operands[8] = const0_rtx;
+    }
+  else
+#endif
+    operands[8] = gen_highpart_mode (SImode, <V64I:MODE>mode, operands[3]);
+  operands[9] = gen_lowpart (SImode, operands[3]);
+})
+
+;; xnor patterns.  Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
+;; Combine now canonicalizes to the rightmost expression.
+(define_insn_and_split "*xor_not_<V64I:mode>_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(not:V64I (xor:V64I (match_operand:V64I 1 "register_operand" "r,b")
+			    (match_operand:V64I 2 "register_operand" "r,b"))))]
+  "! TARGET_ARCH64"
+  "@
+   #
+   fxnor\t%1, %2, %0"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 3) (not:SI (xor:SI (match_dup 4) (match_dup 5))))
+   (set (match_dup 6) (not:SI (xor:SI (match_dup 7) (match_dup 8))))]
+  "operands[3] = gen_highpart (SImode, operands[0]);
+   operands[4] = gen_highpart (SImode, operands[1]);
+   operands[5] = gen_highpart (SImode, operands[2]);
+   operands[6] = gen_lowpart (SImode, operands[0]);
+   operands[7] = gen_lowpart (SImode, operands[1]);
+   operands[8] = gen_lowpart (SImode, operands[2]);"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*xor_not_<V64I:mode>_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(not:V64I (xor:V64I (match_operand:V64I 1 "register_or_zero_operand" "rJ,b")
+			    (match_operand:V64I 2 "arith_operand" "rI,b"))))]
+  "TARGET_ARCH64"
+  "@
+  xnor\t%r1, %2, %0
+  fxnor\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*xor_not_<V32I:mode>"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(not:V32I (xor:V32I (match_operand:V32I 1 "register_or_zero_operand" "rJ,d")
+			    (match_operand:V32I 2 "arith_operand" "rI,d"))))]
+  ""
+  "@
+   xnor\t%r1, %2, %0
+   fxnors\t%1, %2, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+;; These correspond to the above in the case where we also (or only)
+;; want to set the condition code.  
+
+(define_insn "*cmp_cc_arith_op"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (match_operator:SI 2 "cc_arith_operator"
+			    [(match_operand:SI 0 "arith_operand" "%r")
+			     (match_operand:SI 1 "arith_operand" "rI")])
+	 (const_int 0)))]
+  ""
+  "%A2cc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (match_operator:DI 2 "cc_arith_operator"
+			    [(match_operand:DI 0 "arith_operand" "%r")
+			     (match_operand:DI 1 "arith_operand" "rI")])
+	 (const_int 0)))]
+  "TARGET_ARCH64"
+  "%A2cc\t%0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_arith_op_set"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (match_operator:SI 3 "cc_arith_operator"
+			    [(match_operand:SI 1 "arith_operand" "%r")
+			     (match_operand:SI 2 "arith_operand" "rI")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 4 "cc_arith_operator" [(match_dup 1) (match_dup 2)]))]
+  "GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%A3cc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op_set"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (match_operator:DI 3 "cc_arith_operator"
+			    [(match_operand:DI 1 "arith_operand" "%r")
+			     (match_operand:DI 2 "arith_operand" "rI")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 4 "cc_arith_operator" [(match_dup 1) (match_dup 2)]))]
+  "TARGET_ARCH64 && GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%A3cc\t%1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_xor_not"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (not:SI (xor:SI (match_operand:SI 0 "register_or_zero_operand" "%rJ")
+			 (match_operand:SI 1 "arith_operand" "rI")))
+	 (const_int 0)))]
+  ""
+  "xnorcc\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_xor_not"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (not:DI (xor:DI (match_operand:DI 0 "register_or_zero_operand" "%rJ")
+			 (match_operand:DI 1 "arith_operand" "rI")))
+	 (const_int 0)))]
+  "TARGET_ARCH64"
+  "xnorcc\t%r0, %1, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_xor_not_set"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (not:SI (xor:SI (match_operand:SI 1 "register_or_zero_operand" "%rJ")
+			 (match_operand:SI 2 "arith_operand" "rI")))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (xor:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "xnorcc\t%r1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_xor_not_set"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (not:DI (xor:DI (match_operand:DI 1 "register_or_zero_operand" "%rJ")
+			 (match_operand:DI 2 "arith_operand" "rI")))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (xor:DI (match_dup 1) (match_dup 2))))]
+  "TARGET_ARCH64"
+  "xnorcc\t%r1, %2, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_arith_op_not"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (match_operator:SI 2 "cc_arith_not_operator"
+			    [(not:SI (match_operand:SI 0 "arith_operand" "rI"))
+			     (match_operand:SI 1 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))]
+  ""
+  "%B2cc\t%r1, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op_not"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (match_operator:DI 2 "cc_arith_not_operator"
+			    [(not:DI (match_operand:DI 0 "arith_operand" "rI"))
+			     (match_operand:DI 1 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))]
+  "TARGET_ARCH64"
+  "%B2cc\t%r1, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_arith_op_not_set"
+  [(set (reg:CC 100)
+	(compare:CC
+	 (match_operator:SI 3 "cc_arith_not_operator"
+			    [(not:SI (match_operand:SI 1 "arith_operand" "rI"))
+			     (match_operand:SI 2 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 4 "cc_arith_not_operator"
+			    [(not:SI (match_dup 1)) (match_dup 2)]))]
+  "GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%B3cc\t%r2, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_arith_op_not_set"
+  [(set (reg:CCX 100)
+	(compare:CCX
+	 (match_operator:DI 3 "cc_arith_not_operator"
+			    [(not:DI (match_operand:DI 1 "arith_operand" "rI"))
+			     (match_operand:DI 2 "register_or_zero_operand" "rJ")])
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 4 "cc_arith_not_operator"
+			    [(not:DI (match_dup 1)) (match_dup 2)]))]
+  "TARGET_ARCH64 && GET_CODE (operands[3]) == GET_CODE (operands[4])"
+  "%B3cc\t%r2, %1, %0"
+  [(set_attr "type" "compare")])
+
+;; We cannot use the "neg" pseudo insn because the Sun assembler
+;; does not know how to make it work for constants.
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+{
+  if (! TARGET_ARCH64)
+    {
+      emit_insn (gen_rtx_PARALLEL
+		 (VOIDmode,
+		  gen_rtvec (2,
+			     gen_rtx_SET (VOIDmode, operand0,
+					  gen_rtx_NEG (DImode, operand1)),
+			     gen_rtx_CLOBBER (VOIDmode,
+					      gen_rtx_REG (CCmode,
+							   SPARC_ICC_REG)))));
+      DONE;
+    }
+})
+
+(define_insn_and_split "*negdi2_sp32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (clobber (reg:CC 100))]
+  "! TARGET_ARCH64"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CC_NOOV 100)
+                   (compare:CC_NOOV (minus:SI (const_int 0) (match_dup 5))
+                                    (const_int 0)))
+              (set (match_dup 4) (minus:SI (const_int 0) (match_dup 5)))])
+   (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3))
+                                (ltu:SI (reg:CC 100) (const_int 0))))]
+  "operands[2] = gen_highpart (SImode, operands[0]);
+   operands[3] = gen_highpart (SImode, operands[1]);
+   operands[4] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "length" "2")])
+
+(define_insn "*negdi2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_ARCH64"
+  "sub\t%%g0, %1, %0")
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (neg:SI (match_operand:SI 1 "arith_operand" "rI")))]
+  ""
+  "sub\t%%g0, %1, %0")
+
+(define_insn "*cmp_cc_neg"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (neg:SI (match_operand:SI 0 "arith_operand" "rI"))
+			 (const_int 0)))]
+  ""
+  "subcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_neg"
+  [(set (reg:CCX_NOOV 100)
+	(compare:CCX_NOOV (neg:DI (match_operand:DI 0 "arith_operand" "rI"))
+			  (const_int 0)))]
+  "TARGET_ARCH64"
+  "subcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set_neg"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (neg:SI (match_operand:SI 1 "arith_operand" "rI"))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_dup 1)))]
+  ""
+  "subcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_set_neg"
+  [(set (reg:CCX_NOOV 100)
+	(compare:CCX_NOOV (neg:DI (match_operand:DI 1 "arith_operand" "rI"))
+			  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(neg:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "subcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+;; We cannot use the "not" pseudo insn because the Sun assembler
+;; does not know how to make it work for constants.
+(define_expand "one_cmpl<V64I:mode>2"
+  [(set (match_operand:V64I 0 "register_operand" "")
+	(not:V64I (match_operand:V64I 1 "register_operand" "")))]
+  ""
+  "")
+
+(define_insn_and_split "*one_cmpl<V64I:mode>2_sp32"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(not:V64I (match_operand:V64I 1 "register_operand" "r,b")))]
+  "! TARGET_ARCH64"
+  "@
+   #
+   fnot1\t%1, %0"
+  "&& reload_completed
+   && ((GET_CODE (operands[0]) == REG
+        && REGNO (operands[0]) < 32)
+       || (GET_CODE (operands[0]) == SUBREG
+           && GET_CODE (SUBREG_REG (operands[0])) == REG
+           && REGNO (SUBREG_REG (operands[0])) < 32))"
+  [(set (match_dup 2) (not:SI (xor:SI (match_dup 3) (const_int 0))))
+   (set (match_dup 4) (not:SI (xor:SI (match_dup 5) (const_int 0))))]
+  "operands[2] = gen_highpart (SImode, operands[0]);
+   operands[3] = gen_highpart (SImode, operands[1]);
+   operands[4] = gen_lowpart (SImode, operands[0]);
+   operands[5] = gen_lowpart (SImode, operands[1]);"
+  [(set_attr "type" "*,fga")
+   (set_attr "length" "2,*")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "*one_cmpl<V64I:mode>2_sp64"
+  [(set (match_operand:V64I 0 "register_operand" "=r,b")
+	(not:V64I (match_operand:V64I 1 "arith_operand" "rI,b")))]
+  "TARGET_ARCH64"
+  "@
+   xnor\t%%g0, %1, %0
+   fnot1\t%1, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,double")])
+
+(define_insn "one_cmpl<V32I:mode>2"
+  [(set (match_operand:V32I 0 "register_operand" "=r,d")
+	(not:V32I (match_operand:V32I 1 "arith_operand" "rI,d")))]
+  ""
+  "@
+  xnor\t%%g0, %1, %0
+  fnot1s\t%1, %0"
+  [(set_attr "type" "*,fga")
+   (set_attr "fptype" "*,single")])
+
+(define_insn "*cmp_cc_not"
+  [(set (reg:CC 100)
+	(compare:CC (not:SI (match_operand:SI 0 "arith_operand" "rI"))
+		    (const_int 0)))]
+  ""
+  "xnorcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_not"
+  [(set (reg:CCX 100)
+	(compare:CCX (not:DI (match_operand:DI 0 "arith_operand" "rI"))
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "xnorcc\t%%g0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set_not"
+  [(set (reg:CC 100)
+	(compare:CC (not:SI (match_operand:SI 1 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_dup 1)))]
+  ""
+  "xnorcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_set_not"
+  [(set (reg:CCX 100)
+	(compare:CCX (not:DI (match_operand:DI 1 "arith_operand" "rI"))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_dup 1)))]
+  "TARGET_ARCH64"
+  "xnorcc\t%%g0, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "register_operand" "r"))
+   (set (reg:CC 100)
+	(compare:CC (match_dup 1)
+		    (const_int 0)))]
+  ""
+  "orcc\t%1, 0, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_ccx_set64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "register_operand" "r"))
+   (set (reg:CCX 100)
+	(compare:CCX (match_dup 1)
+		     (const_int 0)))]
+  "TARGET_ARCH64"
+  "orcc\t%1, 0, %0"
+   [(set_attr "type" "compare")])
+
+
+;; Floating point arithmetic instructions.
+
+(define_expand "addtf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(plus:TF (match_operand:TF 1 "general_operand" "")
+		 (match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (PLUS, operands); DONE;")
+
+(define_insn "*addtf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(plus:TF (match_operand:TF 1 "register_operand" "e")
+		 (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "faddq\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(plus:DF (match_operand:DF 1 "register_operand" "e")
+		 (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "faddd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fadds\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "subtf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(minus:TF (match_operand:TF 1 "general_operand" "")
+		  (match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (MINUS, operands); DONE;")
+
+(define_insn "*subtf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(minus:TF (match_operand:TF 1 "register_operand" "e")
+		  (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fsubq\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(minus:DF (match_operand:DF 1 "register_operand" "e")
+		  (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fsubd\t%1, %2, %0"
+  [(set_attr "type" "fp")
+   (set_attr "fptype" "double")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fsubs\t%1, %2, %0"
+  [(set_attr "type" "fp")])
+
+(define_expand "multf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(mult:TF (match_operand:TF 1 "general_operand" "")
+		 (match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (MULT, operands); DONE;")
+
+(define_insn "*multf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(mult:TF (match_operand:TF 1 "register_operand" "e")
+		 (match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fmulq\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(mult:DF (match_operand:DF 1 "register_operand" "e")
+		 (match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fmuld\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fmuls\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_insn "*muldf3_extend"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f"))
+		 (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))]
+  "(TARGET_V8 || TARGET_V9) && TARGET_FPU"
+  "fsmuld\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "*multf3_extend"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(mult:TF (float_extend:TF (match_operand:DF 1 "register_operand" "e"))
+		 (float_extend:TF (match_operand:DF 2 "register_operand" "e"))))]
+  "(TARGET_V8 || TARGET_V9) && TARGET_FPU && TARGET_HARD_QUAD"
+  "fdmulq\t%1, %2, %0"
+  [(set_attr "type" "fpmul")])
+
+(define_expand "divtf3"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(div:TF (match_operand:TF 1 "general_operand" "")
+		(match_operand:TF 2 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_binop (DIV, operands); DONE;")
+
+;; don't have timing for quad-prec. divide.
+(define_insn "*divtf3_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(div:TF (match_operand:TF 1 "register_operand" "e")
+		(match_operand:TF 2 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fdivq\t%1, %2, %0"
+  [(set_attr "type" "fpdivd")])
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fdivd\t%1, %2, %0"
+  [(set_attr "type" "fpdivd")
+   (set_attr "fptype" "double")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fdivs\t%1, %2, %0"
+  [(set_attr "type" "fpdivs")])
+
+(define_expand "negtf2"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*negtf2_notv9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU
+   && ! TARGET_V9"
+  "@
+  fnegs\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
+   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn_and_split "*negtf2_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU && TARGET_V9"
+  "@
+  fnegd\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:DF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")
+   (set_attr "fptype" "double")])
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*negdf2_notv9"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(neg:DF (match_operand:DF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fnegs\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (neg:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn "*negdf2_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(neg:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_V9"
+  "fnegd\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "fptype" "double")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fnegs\t%1, %0"
+  [(set_attr "type" "fpmove")])
+
+(define_expand "abstf2"
+  [(set (match_operand:TF 0 "register_operand" "")
+	(abs:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*abstf2_notv9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fabss\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))
+   (set (match_dup 6) (match_dup 7))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
+   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn "*abstf2_hq_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && TARGET_V9 && TARGET_HARD_QUAD"
+  "@
+  fabsd\t%0, %0
+  fabsq\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "fptype" "double,*")])
+
+(define_insn_and_split "*abstf2_v9"
+  [(set (match_operand:TF 0 "register_operand" "=e,e")
+	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && TARGET_V9 && !TARGET_HARD_QUAD"
+  "@
+  fabsd\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:DF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
+   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")
+   (set_attr "fptype" "double,*")])
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(abs:DF (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_FPU"
+  "")
+
+(define_insn_and_split "*absdf2_notv9"
+  [(set (match_operand:DF 0 "register_operand" "=e,e")
+	(abs:DF (match_operand:DF 1 "register_operand" "0,e")))]
+  "TARGET_FPU && ! TARGET_V9"
+  "@
+  fabss\t%0, %0
+  #"
+  "&& reload_completed
+   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
+  [(set (match_dup 2) (abs:SF (match_dup 3)))
+   (set (match_dup 4) (match_dup 5))]
+  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
+   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
+   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
+   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
+  [(set_attr "type" "fpmove,*")
+   (set_attr "length" "*,2")])
+
+(define_insn "*absdf2_v9"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(abs:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_V9"
+  "fabsd\t%1, %0"
+  [(set_attr "type" "fpmove")
+   (set_attr "fptype" "double")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fabss\t%1, %0"
+  [(set_attr "type" "fpmove")])
+
+(define_expand "sqrttf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+	(sqrt:TF (match_operand:TF 1 "general_operand" "")))]
+  "TARGET_FPU && (TARGET_HARD_QUAD || TARGET_ARCH64)"
+  "emit_tfmode_unop (SQRT, operands); DONE;")
+
+(define_insn "*sqrttf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(sqrt:TF (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fsqrtq\t%1, %0"
+  [(set_attr "type" "fpsqrtd")])
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU"
+  "fsqrtd\t%1, %0"
+  [(set_attr "type" "fpsqrtd")
+   (set_attr "fptype" "double")])
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FPU"
+  "fsqrts\t%1, %0"
+  [(set_attr "type" "fpsqrts")])
+
+
+;; Arithmetic shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+  return "sll\t%1, %2, %0";
+}
+  [(set (attr "type")
+	(if_then_else (match_operand 2 "const_one_operand" "")
+		      (const_string "ialu") (const_string "shift")))])
+
+(define_expand "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+	FAIL;
+      emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*ashldi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+  return "sllx\t%1, %2, %0";
+}
+  [(set (attr "type")
+	(if_then_else (match_operand 2 "const_one_operand" "")
+		      (const_string "ialu") (const_string "shift")))])
+
+;; XXX UGH!
+(define_insn "ashldi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
+		   (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_shift (operands, insn, \"sllx\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5,5,6")])
+
+;; Optimize (1LL<<x)-1
+;; XXX this also needs to be fixed to handle equal subregs
+;; XXX first before we could re-enable it.
+;(define_insn ""
+;  [(set (match_operand:DI 0 "register_operand" "=h")
+;	(plus:DI (ashift:DI (const_int 1)
+;			    (match_operand:SI 1 "arith_operand" "rI"))
+;		 (const_int -1)))]
+;  "0 && TARGET_V8PLUS"
+;{
+;  if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == REGNO (operands[0]))
+;    return "mov\t1, %L0\;sllx\t%L0, %1, %L0\;sub\t%L0, 1, %L0\;srlx\t%L0, 32, %H0";
+;  return "mov\t1, %H0\;sllx\t%H0, %1, %L0\;sub\t%L0, 1, %L0\;srlx\t%L0, 32, %H0";
+;}
+;  [(set_attr "type" "multi")
+;   (set_attr "length" "4")])
+
+(define_insn "*cmp_cc_ashift_1"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
+				    (const_int 1))
+			 (const_int 0)))]
+  ""
+  "addcc\t%0, %0, %%g0"
+  [(set_attr "type" "compare")])
+
+(define_insn "*cmp_cc_set_ashift_1"
+  [(set (reg:CC_NOOV 100)
+	(compare:CC_NOOV (ashift:SI (match_operand:SI 1 "register_operand" "r")
+				    (const_int 1))
+			 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(ashift:SI (match_dup 1) (const_int 1)))]
+  ""
+  "addcc\t%1, %1, %0"
+  [(set_attr "type" "compare")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  {
+     if (GET_CODE (operands[2]) == CONST_INT)
+       operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+     return "sra\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+(define_insn "*ashrsi3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "r")
+				     (match_operand:SI 2 "arith_operand" "r"))))]
+  "TARGET_ARCH64"
+  "sra\t%1, %2, %0"
+  [(set_attr "type" "shift")])
+
+;; This handles the case as above, but with constant shift instead of
+;; register. Combiner "simplifies" it for us a little bit though.
+(define_insn "*ashrsi3_extend2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (ashift:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+				(const_int 32))
+		     (match_operand:SI 2 "small_int_operand" "I")))]
+  "TARGET_ARCH64 && INTVAL (operands[2]) >= 32 && INTVAL (operands[2]) < 64"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) - 32);
+  return "sra\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        FAIL;	/* prefer generic code in this case */
+      emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*ashrdi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+    return "srax\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+;; XXX
+(define_insn "ashrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
+		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_shift (operands, insn, \"srax\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5,5,6")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
+    return "srl\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+;; This handles the case where
+;; (zero_extend:DI (lshiftrt:SI (match_operand:SI) (match_operand:SI))),
+;; but combiner "simplifies" it for us.
+(define_insn "*lshrsi3_extend"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (subreg:DI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 2 "arith_operand" "r")) 0)
+		(match_operand 3 "const_int_operand" "")))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) == 0xffffffff"
+  "srl\t%1, %2, %0"
+  [(set_attr "type" "shift")])
+
+;; This handles the case where
+;; (lshiftrt:DI (zero_extend:DI (match_operand:SI)) (const_int >=0 < 32))
+;; but combiner "simplifies" it for us.
+(define_insn "*lshrsi3_extend2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (subreg:DI (match_operand:SI 1 "register_operand" "r") 0)
+			 (match_operand 2 "small_int_operand" "I")
+			 (const_int 32)))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32"
+{
+  operands[2] = GEN_INT (32 - INTVAL (operands[2]));
+  return "srl\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64 || TARGET_V8PLUS"
+{
+  if (! TARGET_ARCH64)
+    {
+      if (GET_CODE (operands[2]) == CONST_INT)
+        FAIL;
+      emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*lshrdi3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+		     (match_operand:SI 2 "arith_operand" "rI")))]
+  "TARGET_ARCH64"
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
+    return "srlx\t%1, %2, %0";
+  }
+  [(set_attr "type" "shift")])
+
+;; XXX
+(define_insn "lshrdi3_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
+		     (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+   (clobber (match_scratch:SI 3 "=X,X,&h"))]
+  "TARGET_V8PLUS"
+  "* return output_v8plus_shift (operands, insn, \"srlx\");"
+  [(set_attr "type" "multi")
+   (set_attr "length" "5,5,6")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (subreg:SI (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (const_int 32)) 4)
+		     (match_operand:SI 2 "small_int_operand" "I")))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 32);
+  return "srax\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (subreg:SI (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (const_int 32)) 4)
+		     (match_operand:SI 2 "small_int_operand" "I")))]
+  "TARGET_ARCH64 && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 32"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 32);
+  return "srlx\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (subreg:SI (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (match_operand:SI 2 "small_int_operand" "I")) 4)
+		     (match_operand:SI 3 "small_int_operand" "I")))]
+  "TARGET_ARCH64
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 32
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) < 32
+   && (unsigned HOST_WIDE_INT) (INTVAL (operands[2]) + INTVAL (operands[3])) < 64"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]));
+
+  return "srax\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (subreg:SI (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+					     (match_operand:SI 2 "small_int_operand" "I")) 4)
+		     (match_operand:SI 3 "small_int_operand" "I")))]
+  "TARGET_ARCH64
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 32
+   && (unsigned HOST_WIDE_INT) INTVAL (operands[3]) < 32
+   && (unsigned HOST_WIDE_INT) (INTVAL (operands[2]) + INTVAL (operands[3])) < 64"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]));
+
+  return "srlx\t%1, %2, %0";
+}
+  [(set_attr "type" "shift")])
+
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "* return output_ubranch (operands[0], 0, insn);"
+  [(set_attr "type" "uncond_branch")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
+	      (use (label_ref (match_operand 1 "" "")))])]
+  ""
+{
+  gcc_assert (GET_MODE (operands[0]) == CASE_VECTOR_MODE);
+
+  /* In pic mode, our address differences are against the base of the
+     table.  Add that base value back in; CSE ought to be able to combine
+     the two address loads.  */
+  if (flag_pic)
+    {
+      rtx tmp, tmp2;
+      tmp = gen_rtx_LABEL_REF (Pmode, operands[1]);
+      tmp2 = operands[0];
+      if (CASE_VECTOR_MODE != Pmode)
+        tmp2 = gen_rtx_SIGN_EXTEND (Pmode, tmp2);
+      tmp = gen_rtx_PLUS (Pmode, tmp2, tmp);
+      operands[0] = memory_address (Pmode, tmp);
+    }
+})
+
+(define_insn "*tablejump_sp32"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "! TARGET_ARCH64"
+  "jmp\t%a0%#"
+  [(set_attr "type" "uncond_branch")])
+
+(define_insn "*tablejump_sp64"
+  [(set (pc) (match_operand:DI 0 "address_operand" "p"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_ARCH64"
+  "jmp\t%a0%#"
+  [(set_attr "type" "uncond_branch")])
+
+
+;; Jump to subroutine instructions.
+
+(define_expand "call"
+  ;; Note that this expression is not used for generating RTL.
+  ;; All the RTL is generated explicitly below.
+  [(call (match_operand 0 "call_operand" "")
+	 (match_operand 3 "" "i"))]
+  ;; operands[2] is next_arg_register
+  ;; operands[3] is struct_value_size_rtx.
+  ""
+{
+  rtx fn_rtx;
+
+  gcc_assert (MEM_P (operands[0]) && GET_MODE (operands[0]) == FUNCTION_MODE);
+
+  gcc_assert (GET_CODE (operands[3]) == CONST_INT);
+
+  if (GET_CODE (XEXP (operands[0], 0)) == LABEL_REF)
+    {
+      /* This is really a PIC sequence.  We want to represent
+	 it as a funny jump so its delay slots can be filled. 
+
+	 ??? But if this really *is* a CALL, will not it clobber the
+	 call-clobbered registers?  We lose this if it is a JUMP_INSN.
+	 Why cannot we have delay slots filled if it were a CALL?  */
+
+      /* We accept negative sizes for untyped calls.  */
+      if (! TARGET_ARCH64 && INTVAL (operands[3]) != 0)
+	emit_jump_insn
+	  (gen_rtx_PARALLEL
+	   (VOIDmode,
+	    gen_rtvec (3,
+		       gen_rtx_SET (VOIDmode, pc_rtx, XEXP (operands[0], 0)),
+		       operands[3],
+		       gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))));
+      else
+	emit_jump_insn
+	  (gen_rtx_PARALLEL
+	   (VOIDmode,
+	    gen_rtvec (2,
+		       gen_rtx_SET (VOIDmode, pc_rtx, XEXP (operands[0], 0)),
+		       gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))));
+      goto finish_call;
+    }
+
+  fn_rtx = operands[0];
+
+  /* We accept negative sizes for untyped calls.  */
+  if (! TARGET_ARCH64 && INTVAL (operands[3]) != 0)
+    sparc_emit_call_insn
+      (gen_rtx_PARALLEL
+       (VOIDmode,
+	gen_rtvec (3, gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx),
+		   operands[3],
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))),
+       XEXP (fn_rtx, 0));
+  else
+    sparc_emit_call_insn
+      (gen_rtx_PARALLEL
+       (VOIDmode,
+	gen_rtvec (2, gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx),
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)))),
+       XEXP (fn_rtx, 0));
+
+ finish_call:
+
+  DONE;
+})
+
+;; We can't use the same pattern for these two insns, because then registers
+;; in the address may not be properly reloaded.
+
+(define_insn "*call_address_sp32"
+  [(call (mem:SI (match_operand:SI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_symbolic_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_address_sp64"
+  [(call (mem:DI (match_operand:DI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_symbolic_sp64"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a0, %1%#"
+  [(set_attr "type" "call")])
+
+;; This is a call that wants a structure value.
+;; There is no such critter for v9 (??? we may need one anyway).
+(define_insn "*call_address_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) > 0"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0xfff);
+  return "call\t%a0, %1\n\t nop\n\tunimp\t%2";
+}
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+;; This is a call that wants a structure value.
+;; There is no such critter for v9 (??? we may need one anyway).
+(define_insn "*call_symbolic_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) > 0"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) & 0xfff);
+  return "call\t%a0, %1\n\t nop\n\tunimp\t%2";
+}
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+;; This is a call that may want a structure value.  This is used for
+;; untyped_calls.
+(define_insn "*call_address_untyped_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "address_operand" "p"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0"
+  "call\t%a0, %1\n\t nop\n\tnop"
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+;; This is a call that may want a structure value.  This is used for
+;; untyped_calls.
+(define_insn "*call_symbolic_untyped_struct_value_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "")
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 1 for most machines.
+  "! TARGET_ARCH64 && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0"
+  "call\t%a0, %1\n\t nop\n\tnop"
+  [(set_attr "type" "call_no_delay_slot")
+   (set_attr "length" "3")])
+
+(define_expand "call_value"
+  ;; Note that this expression is not used for generating RTL.
+  ;; All the RTL is generated explicitly below.
+  [(set (match_operand 0 "register_operand" "=rf")
+	(call (match_operand 1 "" "")
+	      (match_operand 4 "" "")))]
+  ;; operand 2 is stack_size_rtx
+  ;; operand 3 is next_arg_register
+  ""
+{
+  rtx fn_rtx;
+  rtvec vec;
+
+  gcc_assert (MEM_P (operands[1]) && GET_MODE (operands[1]) == FUNCTION_MODE);
+
+  fn_rtx = operands[1];
+
+  vec = gen_rtvec (2,
+		   gen_rtx_SET (VOIDmode, operands[0],
+				gen_rtx_CALL (VOIDmode, fn_rtx, const0_rtx)),
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 15)));
+
+  sparc_emit_call_insn (gen_rtx_PARALLEL (VOIDmode, vec), XEXP (fn_rtx, 0));
+
+  DONE;
+})
+
+(define_insn "*call_value_address_sp32"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "address_operand" "p"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 2 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_symbolic_sp32"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 15))]
+  ;;- Do not use operand 2 for most machines.
+  "! TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_address_sp64"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "address_operand" "p"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI 15))]
+  ;;- Do not use operand 2 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_symbolic_sp64"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI 15))]
+  ;;- Do not use operand 2 for most machines.
+  "TARGET_ARCH64"
+  "call\t%a1, %2%#"
+  [(set_attr "type" "call")])
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand:BLK 1 "memory_operand" "")
+	      (match_operand 2 "" "")])]
+  ""
+{
+  rtx valreg1 = gen_rtx_REG (DImode, 8);
+  rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32);
+  rtx result = operands[1];
+
+  /* Pass constm1 to indicate that it may expect a structure value, but
+     we don't know what size it is.  */
+  emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, constm1_rtx));
+
+  /* Save the function value registers.  */
+  emit_move_insn (adjust_address (result, DImode, 0), valreg1);
+  emit_move_insn (adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8),
+				  valreg2);
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;;  Tail call instructions.
+
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "call_operand" "") (const_int 0))
+	      (return)])]
+  ""
+  "")
+
+(define_insn "*sibcall_symbolic_sp32"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (return)]
+  "! TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[0]);"
+  [(set_attr "type" "sibcall")])
+
+(define_insn "*sibcall_symbolic_sp64"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "s"))
+	 (match_operand 1 "" ""))
+   (return)]
+  "TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[0]);"
+  [(set_attr "type" "sibcall")])
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "=rf")
+		(call (match_operand 1 "" "") (const_int 0)))
+	      (return)])]
+  ""
+  "")
+
+(define_insn "*sibcall_value_symbolic_sp32"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (return)]
+  "! TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[1]);"
+  [(set_attr "type" "sibcall")])
+
+(define_insn "*sibcall_value_symbolic_sp64"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "s"))
+	      (match_operand 2 "" "")))
+   (return)]
+  "TARGET_ARCH64"
+  "* return output_sibcall(insn, operands[1]);"
+  [(set_attr "type" "sibcall")])
+
+
+;; Special instructions.
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  sparc_expand_prologue ();
+  DONE;
+})
+
+;; The "save register window" insn is modelled as follows so that the DWARF-2
+;; backend automatically emits the required call frame debugging information
+;; while it is parsing it.  Therefore, the pattern should not be modified
+;; without first studying the impact of the changes on the debug info.
+;; [(set (%fp) (%sp))
+;;  (set (%sp) (unspec_volatile [(%sp) (-frame_size)] UNSPECV_SAVEW))
+;;  (set (%i7) (%o7))]
+
+(define_insn "save_register_window<P:mode>"
+  [(set (reg:P 30) (reg:P 14))
+   (set (reg:P 14) (unspec_volatile:P [(reg:P 14)
+				       (match_operand:P 0 "arith_operand" "rI")] UNSPECV_SAVEW))
+   (set (reg:P 31) (reg:P 15))]
+  ""
+  "save\t%%sp, %0, %%sp"
+  [(set_attr "type" "savew")])
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  sparc_expand_epilogue ();
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  sparc_expand_epilogue ();
+  DONE;
+})
+
+(define_expand "return"
+  [(return)]
+  "sparc_can_use_return_insn_p ()"
+  "")
+
+(define_insn "*return_internal"
+  [(return)]
+  ""
+  "* return output_return (insn);"
+  [(set_attr "type" "return")
+   (set (attr "length")
+	(cond [(eq_attr "leaf_function" "true")
+		 (if_then_else (eq_attr "empty_delay_slot" "true")
+			       (const_int 2)
+			       (const_int 1))
+	       (eq_attr "calls_eh_return" "true")
+		 (if_then_else (eq_attr "delayed_branch" "true")
+			       (if_then_else (eq_attr "isa" "v9")
+					     (const_int 2)
+					     (const_int 3))
+			       (const_int 4))
+	       (eq_attr "empty_delay_slot" "true")
+		 (if_then_else (eq_attr "delayed_branch" "true")
+			       (const_int 2)
+			       (const_int 3))
+	      ] (const_int 1)))])
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Do not schedule instructions accessing memory before this point.
+
+(define_expand "frame_blockage"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 1)] UNSPEC_FRAME_BLOCKAGE))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+  operands[1] = stack_pointer_rtx;
+})
+
+(define_insn "*frame_blockage<P:mode>"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_operand:P 1 "" "")] UNSPEC_FRAME_BLOCKAGE))]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+(define_expand "probe_stack"
+  [(set (match_operand 0 "memory_operand" "") (const_int 0))]
+  ""
+{
+  operands[0]
+    = adjust_address (operands[0], GET_MODE (operands[0]), SPARC_STACK_BIAS);
+})
+
+(define_insn "probe_stack_range<P:mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile:P [(match_operand:P 1 "register_operand" "0")
+			    (match_operand:P 2 "register_operand" "r")]
+			    UNSPECV_PROBE_STACK_RANGE))]
+  ""
+  "* return output_probe_stack_range (operands[0], operands[2]);"
+  [(set_attr "type" "multi")])
+
+;; Prepare to return any type including a structure value.
+
+(define_expand "untyped_return"
+  [(match_operand:BLK 0 "memory_operand" "")
+   (match_operand 1 "" "")]
+  ""
+{
+  rtx valreg1 = gen_rtx_REG (DImode, 24);
+  rtx valreg2 = gen_rtx_REG (TARGET_ARCH64 ? TFmode : DFmode, 32);
+  rtx result = operands[0];
+
+  if (! TARGET_ARCH64)
+    {
+      rtx rtnreg = gen_rtx_REG (SImode, (current_function_uses_only_leaf_regs
+					 ? 15 : 31));
+      rtx value = gen_reg_rtx (SImode);
+
+      /* Fetch the instruction where we will return to and see if it's an unimp
+	 instruction (the most significant 10 bits will be zero).  If so,
+	 update the return address to skip the unimp instruction.  */
+      emit_move_insn (value,
+		      gen_rtx_MEM (SImode, plus_constant (rtnreg, 8)));
+      emit_insn (gen_lshrsi3 (value, value, GEN_INT (22)));
+      emit_insn (gen_update_return (rtnreg, value));
+    }
+
+  /* Reload the function value registers.  */
+  emit_move_insn (valreg1, adjust_address (result, DImode, 0));
+  emit_move_insn (valreg2,
+		  adjust_address (result, TARGET_ARCH64 ? TFmode : DFmode, 8));
+
+  /* Put USE insns before the return.  */
+  emit_use (valreg1);
+  emit_use (valreg2);
+
+  /* Construct the return.  */
+  expand_naked_return ();
+
+  DONE;
+})
+
+;; Adjust the return address conditionally. If the value of op1 is equal
+;; to all zero then adjust the return address i.e. op0 = op0 + 4.
+;; This is technically *half* the check required by the 32-bit SPARC
+;; psABI. This check only ensures that an "unimp" insn was written by
+;; the caller, but doesn't check to see if the expected size matches
+;; (this is encoded in the 12 lower bits). This check is obsolete and
+;; only used by the above code "untyped_return".
+
+(define_insn "update_return"
+  [(unspec:SI [(match_operand:SI 0 "register_operand" "r")
+	       (match_operand:SI 1 "register_operand" "r")] UNSPEC_UPDATE_RETURN)]
+  "! TARGET_ARCH64"
+{
+  if (flag_delayed_branch)
+    return "cmp\t%1, 0\n\tbe,a\t.+8\n\t add\t%0, 4, %0";
+  else
+    return "cmp\t%1, 0\n\tbne\t.+12\n\t nop\n\tadd\t%0, 4, %0";
+}
+  [(set (attr "type") (const_string "multi"))
+   (set (attr "length")
+	(if_then_else (eq_attr "delayed_branch" "true")
+		      (const_int 3)
+		      (const_int 4)))])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+(define_expand "indirect_jump"
+  [(set (pc) (match_operand 0 "address_operand" "p"))]
+  ""
+  "")
+
+(define_insn "*branch_sp32"
+  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  "! TARGET_ARCH64"
+ "jmp\t%a0%#"
+ [(set_attr "type" "uncond_branch")])
+ 
+(define_insn "*branch_sp64"
+  [(set (pc) (match_operand:DI 0 "address_operand" "p"))]
+  "TARGET_ARCH64"
+  "jmp\t%a0%#"
+  [(set_attr "type" "uncond_branch")])
+
+(define_expand "nonlocal_goto"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (match_operand:SI 3 "" "")]
+  ""
+{
+  rtx lab = operands[1];
+  rtx stack = operands[2];
+  rtx fp = operands[3];
+  rtx labreg;
+
+  /* Trap instruction to flush all the register windows.  */
+  emit_insn (gen_flush_register_windows ());
+
+  /* Load the fp value for the containing fn into %fp.  This is needed
+     because STACK refers to %fp.  Note that virtual register instantiation
+     fails if the virtual %fp isn't set from a register.  */
+  if (GET_CODE (fp) != REG)
+    fp = force_reg (Pmode, fp);
+  emit_move_insn (virtual_stack_vars_rtx, fp);
+
+  /* Find the containing function's current nonlocal goto handler,
+     which will do any cleanups and then jump to the label.  */
+  labreg = gen_rtx_REG (Pmode, 8);
+  emit_move_insn (labreg, lab);
+
+  /* Restore %fp from stack pointer value for containing function.
+     The restore insn that follows will move this to %sp,
+     and reload the appropriate value into %fp.  */
+  emit_move_insn (hard_frame_pointer_rtx, stack);
+
+  emit_use (stack_pointer_rtx);
+
+  /* ??? The V9-specific version was disabled in rev 1.65.  */
+  emit_jump_insn (gen_goto_handler_and_restore (labreg));
+  emit_barrier ();
+  DONE;
+})
+
+;; Special trap insn to flush register windows.
+(define_insn "flush_register_windows"
+  [(unspec_volatile [(const_int 0)] UNSPECV_FLUSHW)]
+  ""
+  { return TARGET_V9 ? "flushw" : "ta\t3"; }
+  [(set_attr "type" "flushw")])
+
+(define_insn "goto_handler_and_restore"
+  [(unspec_volatile [(match_operand 0 "register_operand" "=r")] UNSPECV_GOTO)]
+  "GET_MODE (operands[0]) == Pmode"
+{
+  if (flag_delayed_branch)
+    return "jmp\t%0\n\t restore";
+  else
+    return "mov\t%0,%%g1\n\trestore\n\tjmp\t%%g1\n\t nop";
+}
+  [(set (attr "type") (const_string "multi"))
+   (set (attr "length")
+	(if_then_else (eq_attr "delayed_branch" "true")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; For __builtin_setjmp we need to flush register windows iff the function
+;; calls alloca as well, because otherwise the current register window might
+;; be saved after the %sp adjustment and thus setjmp would crash.
+(define_expand "builtin_setjmp_setup"
+  [(match_operand 0 "register_operand" "r")]
+  ""
+{
+  emit_insn (gen_do_builtin_setjmp_setup ());
+  DONE;
+})
+
+(define_insn "do_builtin_setjmp_setup"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SETJMP)]
+  ""
+{
+  if (!cfun->calls_alloca)
+    return "";
+  if (!TARGET_V9)
+    return "ta\t3";
+  fputs ("\tflushw\n", asm_out_file);
+  if (flag_pic)
+    fprintf (asm_out_file, "\tst%c\t%%l7, [%%sp+%d]\n",
+	     TARGET_ARCH64 ? 'x' : 'w',
+	     SPARC_STACK_BIAS + 7 * UNITS_PER_WORD);
+  fprintf (asm_out_file, "\tst%c\t%%fp, [%%sp+%d]\n",
+	   TARGET_ARCH64 ? 'x' : 'w',
+	   SPARC_STACK_BIAS + 14 * UNITS_PER_WORD);
+  fprintf (asm_out_file, "\tst%c\t%%i7, [%%sp+%d]\n",
+	   TARGET_ARCH64 ? 'x' : 'w',
+	   SPARC_STACK_BIAS + 15 * UNITS_PER_WORD);
+  return "";
+}
+  [(set_attr "type" "multi")
+   (set (attr "length")
+        (cond [(eq_attr "calls_alloca" "false")
+                 (const_int 0)
+               (eq_attr "isa" "!v9")
+                 (const_int 1)
+               (eq_attr "pic" "true")
+                 (const_int 4)] (const_int 3)))])
+
+;; Pattern for use after a setjmp to store registers into the save area.
+
+(define_expand "setjmp"
+  [(const_int 0)]
+  ""
+{
+  rtx mem;
+
+  if (flag_pic)
+    {
+      mem = gen_rtx_MEM (Pmode,
+			 plus_constant (stack_pointer_rtx,
+					SPARC_STACK_BIAS + 7 * UNITS_PER_WORD));
+      emit_insn (gen_rtx_SET (VOIDmode, mem, pic_offset_table_rtx));
+    }
+
+  mem = gen_rtx_MEM (Pmode,
+		     plus_constant (stack_pointer_rtx,
+				    SPARC_STACK_BIAS + 14 * UNITS_PER_WORD));
+  emit_insn (gen_rtx_SET (VOIDmode, mem, hard_frame_pointer_rtx));
+
+  mem = gen_rtx_MEM (Pmode,
+		     plus_constant (stack_pointer_rtx,
+				    SPARC_STACK_BIAS + 15 * UNITS_PER_WORD));
+  emit_insn (gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (Pmode, 31)));
+  DONE;
+})
+
+;; Special pattern for the FLUSH instruction.
+
+; We do SImode and DImode versions of this to quiet down genrecog's complaints
+; of the define_insn otherwise missing a mode.  We make "flush", aka
+; gen_flush, the default one since sparc_initialize_trampoline uses
+; it on SImode mem values.
+
+(define_insn "flush"
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_FLUSH)]
+  ""
+  { return TARGET_V9 ? "flush\t%f0" : "iflush\t%f0"; }
+  [(set_attr "type" "iflush")])
+
+(define_insn "flushdi"
+  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_FLUSH)]
+  ""
+  { return TARGET_V9 ? "flush\t%f0" : "iflush\t%f0"; }
+  [(set_attr "type" "iflush")])
+
+
+;; Find first set instructions.
+
+;; The scan instruction searches from the most significant bit while ffs
+;; searches from the least significant bit.  The bit index and treatment of
+;; zero also differ.  It takes at least 7 instructions to get the proper
+;; result.  Here is an obvious 8 instruction sequence.
+
+;; XXX
+(define_insn "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(ffs:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_SPARCLITE || TARGET_SPARCLET"
+{
+  return "sub\t%%g0, %1, %0\;and\t%0, %1, %0\;scan\t%0, 0, %0\;mov\t32, %2\;sub\t%2, %0, %0\;sra\t%0, 31, %2\;and\t%2, 31, %2\;add\t%2, %0, %0";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;; ??? This should be a define expand, so that the extra instruction have
+;; a chance of being optimized away.
+
+;; Disabled because none of the UltraSPARCs implement popc.  The HAL R1
+;; does, but no one uses that and we don't have a switch for it.
+;
+;(define_insn "ffsdi2"
+;  [(set (match_operand:DI 0 "register_operand" "=&r")
+;	(ffs:DI (match_operand:DI 1 "register_operand" "r")))
+;   (clobber (match_scratch:DI 2 "=&r"))]
+;  "TARGET_ARCH64"
+;  "neg\t%1, %2\;xnor\t%1, %2, %2\;popc\t%2, %0\;movzr\t%1, 0, %0"
+;  [(set_attr "type" "multi")
+;   (set_attr "length" "4")])
+
+
+
+;; Peepholes go at the end.
+
+;; Optimize consecutive loads or stores into ldd and std when possible.
+;; The conditions in which we do this are very restricted and are 
+;; explained in the code for {registers,memory}_ok_for_ldd functions.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+      (const_int 0))
+   (set (match_operand:SI 1 "memory_operand" "")
+      (const_int 0))]
+  "TARGET_V9
+   && mems_ok_for_ldd_peep (operands[0], operands[1], NULL_RTX)"
+  [(set (match_dup 0)
+       (const_int 0))]
+  "operands[0] = widen_memory_access (operands[0], DImode, 0);")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+      (const_int 0))
+   (set (match_operand:SI 1 "memory_operand" "")
+      (const_int 0))]
+  "TARGET_V9
+   && mems_ok_for_ldd_peep (operands[1], operands[0], NULL_RTX)"
+  [(set (match_dup 1)
+       (const_int 0))]
+  "operands[1] = widen_memory_access (operands[1], DImode, 0);")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])" 
+  [(set (match_dup 0)
+	(match_dup 1))]
+  "operands[1] = widen_memory_access (operands[1], DImode, 0);
+   operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[1], operands[3]) 
+   && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)"
+  [(set (match_dup 0)
+	(match_dup 1))]
+  "operands[0] = widen_memory_access (operands[0], DImode, 0);
+   operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
+
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "memory_operand" ""))
+   (set (match_operand:SF 2 "register_operand" "")
+        (match_operand:SF 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[0], operands[2]) 
+   && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])"
+  [(set (match_dup 0)
+	(match_dup 1))]
+  "operands[1] = widen_memory_access (operands[1], DFmode, 0);
+   operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
+(define_peephole2
+  [(set (match_operand:SF 0 "memory_operand" "")
+        (match_operand:SF 1 "register_operand" ""))
+   (set (match_operand:SF 2 "memory_operand" "")
+        (match_operand:SF 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[1], operands[3]) 
+  && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)"
+  [(set (match_dup 0)
+	(match_dup 1))]
+  "operands[0] = widen_memory_access (operands[0], DFmode, 0);
+   operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "memory_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+        (match_operand:SI 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[2], operands[0]) 
+  && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])"
+  [(set (match_dup 2)
+	(match_dup 3))]
+   "operands[3] = widen_memory_access (operands[3], DImode, 0);
+    operands[2] = gen_rtx_REG (DImode, REGNO (operands[2]));")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "memory_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "memory_operand" "")
+        (match_operand:SI 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[3], operands[1]) 
+  && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)" 
+  [(set (match_dup 2)
+	(match_dup 3))]
+  "operands[2] = widen_memory_access (operands[2], DImode, 0);
+   operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+   ")
+ 
+(define_peephole2
+  [(set (match_operand:SF 0 "register_operand" "")
+        (match_operand:SF 1 "memory_operand" ""))
+   (set (match_operand:SF 2 "register_operand" "")
+        (match_operand:SF 3 "memory_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[2], operands[0]) 
+  && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])"
+  [(set (match_dup 2)
+	(match_dup 3))]
+  "operands[3] = widen_memory_access (operands[3], DFmode, 0);
+   operands[2] = gen_rtx_REG (DFmode, REGNO (operands[2]));")
+
+(define_peephole2
+  [(set (match_operand:SF 0 "memory_operand" "")
+        (match_operand:SF 1 "register_operand" ""))
+   (set (match_operand:SF 2 "memory_operand" "")
+        (match_operand:SF 3 "register_operand" ""))]
+  "registers_ok_for_ldd_peep (operands[3], operands[1]) 
+  && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)"
+  [(set (match_dup 2)
+	(match_dup 3))]
+  "operands[2] = widen_memory_access (operands[2], DFmode, 0);
+   operands[3] = gen_rtx_REG (DFmode, REGNO (operands[3]));")
+ 
+;; Optimize the case of following a reg-reg move with a test
+;; of reg just moved.  Don't allow floating point regs for operand 0 or 1.
+;; This can result from a float to fix conversion.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "register_operand" ""))
+   (set (reg:CC 100)
+	(compare:CC (match_operand:SI 2 "register_operand" "")
+		    (const_int 0)))]
+  "(rtx_equal_p (operands[2], operands[0])
+    || rtx_equal_p (operands[2], operands[1]))
+    && ! SPARC_FP_REG_P (REGNO (operands[0]))
+    && ! SPARC_FP_REG_P (REGNO (operands[1]))"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (reg:CC 100)
+		   (compare:CC (match_dup 1) (const_int 0)))])]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+	(match_operand:DI 1 "register_operand" ""))
+   (set (reg:CCX 100)
+	(compare:CCX (match_operand:DI 2 "register_operand" "")
+		    (const_int 0)))]
+  "TARGET_ARCH64
+   && (rtx_equal_p (operands[2], operands[0])
+       || rtx_equal_p (operands[2], operands[1]))
+   && ! SPARC_FP_REG_P (REGNO (operands[0]))
+   && ! SPARC_FP_REG_P (REGNO (operands[1]))"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (set (reg:CCX 100)
+		   (compare:CCX (match_dup 1) (const_int 0)))])]
+  "")
+
+
+;; Prefetch instructions.
+
+;; ??? UltraSPARC-III note: A memory operation loading into the floating point register
+;; ??? file, if it hits the prefetch cache, has a chance to dual-issue with other memory
+;; ??? operations.  With DFA we might be able to model this, but it requires a lot of
+;; ??? state.
+(define_expand "prefetch"
+  [(match_operand 0 "address_operand" "")
+   (match_operand 1 "const_int_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  "TARGET_V9"
+{
+  if (TARGET_ARCH64)
+    emit_insn (gen_prefetch_64 (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_prefetch_32 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "prefetch_64"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  ""
+{
+  static const char * const prefetch_instr[2][2] = {
+    {
+      "prefetch\t[%a0], 1", /* no locality: prefetch for one read */
+      "prefetch\t[%a0], 0", /* medium to high locality: prefetch for several reads */
+    },
+    {
+      "prefetch\t[%a0], 3", /* no locality: prefetch for one write */
+      "prefetch\t[%a0], 2", /* medium to high locality: prefetch for several writes */
+    }
+  };
+  int read_or_write = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (read_or_write == 0 || read_or_write == 1);
+  gcc_assert (locality >= 0 && locality < 4);
+  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+}
+  [(set_attr "type" "load")])
+
+(define_insn "prefetch_32"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  ""
+{
+  static const char * const prefetch_instr[2][2] = {
+    {
+      "prefetch\t[%a0], 1", /* no locality: prefetch for one read */
+      "prefetch\t[%a0], 0", /* medium to high locality: prefetch for several reads */
+    },
+    {
+      "prefetch\t[%a0], 3", /* no locality: prefetch for one write */
+      "prefetch\t[%a0], 2", /* medium to high locality: prefetch for several writes */
+    }
+  };
+  int read_or_write = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
+
+  gcc_assert (read_or_write == 0 || read_or_write == 1);
+  gcc_assert (locality >= 0 && locality < 4);
+  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+}
+  [(set_attr "type" "load")])
+
+
+;; Trap instructions.
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 5))]
+  ""
+  "ta\t5"
+  [(set_attr "type" "trap")])
+
+(define_expand "ctrapsi4"
+  [(trap_if (match_operator 0 "noov_compare_operator"
+	     [(match_operand:SI 1 "compare_operand" "")
+	      (match_operand:SI 2 "arith_operand" "")])
+	   (match_operand 3 ""))]
+  ""
+  "operands[1] = gen_compare_reg (operands[0]);
+   if (GET_MODE (operands[1]) != CCmode && GET_MODE (operands[1]) != CCXmode)
+     FAIL;
+   operands[2] = const0_rtx;")
+
+(define_expand "ctrapdi4"
+  [(trap_if (match_operator 0 "noov_compare_operator"
+	     [(match_operand:DI 1 "compare_operand" "")
+	      (match_operand:DI 2 "arith_operand" "")])
+	   (match_operand 3 ""))]
+  "TARGET_ARCH64"
+  "operands[1] = gen_compare_reg (operands[0]);
+   if (GET_MODE (operands[1]) != CCmode && GET_MODE (operands[1]) != CCXmode)
+     FAIL;
+   operands[2] = const0_rtx;")
+
+
+(define_insn ""
+  [(trap_if (match_operator 0 "noov_compare_operator" [(reg:CC 100) (const_int 0)])
+	    (match_operand:SI 1 "arith_operand" "rM"))]
+  ""
+{
+  if (TARGET_V9)
+    return "t%C0\t%%icc, %1";
+  else
+    return "t%C0\t%1";
+}
+  [(set_attr "type" "trap")])
+
+(define_insn ""
+  [(trap_if (match_operator 0 "noov_compare_operator" [(reg:CCX 100) (const_int 0)])
+	    (match_operand:SI 1 "arith_operand" "rM"))]
+  "TARGET_V9"
+  "t%C0\t%%xcc, %1"
+  [(set_attr "type" "trap")])
+
+
+;; TLS support instructions.
+
+(define_insn "tgd_hi22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")]
+			    UNSPEC_TLSGD)))]
+  "TARGET_TLS"
+  "sethi\\t%%tgd_hi22(%a1), %0")
+
+(define_insn "tgd_lo10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tgd_symbolic_operand" "")]
+			      UNSPEC_TLSGD)))]
+  "TARGET_TLS"
+  "add\\t%1, %%tgd_lo10(%a2), %0")
+
+(define_insn "tgd_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tgd_symbolic_operand" "")]
+			    UNSPEC_TLSGD)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tgd_add(%a3)")
+
+(define_insn "tgd_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tgd_symbolic_operand" "")]
+			    UNSPEC_TLSGD)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tgd_add(%a3)")
+
+(define_insn "tgd_call32"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "s")
+				  (match_operand 2 "tgd_symbolic_operand" "")]
+				 UNSPEC_TLSGD))
+	      (match_operand 3 "" "")))
+   (clobber (reg:SI 15))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "call\t%a1, %%tgd_call(%a2)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tgd_call64"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "s")
+				  (match_operand 2 "tgd_symbolic_operand" "")]
+				 UNSPEC_TLSGD))
+	      (match_operand 3 "" "")))
+   (clobber (reg:DI 15))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "call\t%a1, %%tgd_call(%a2)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tldm_hi22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(const_int 0)] UNSPEC_TLSLDM)))]
+  "TARGET_TLS"
+  "sethi\\t%%tldm_hi22(%&), %0")
+
+(define_insn "tldm_lo10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		    (unspec:SI [(const_int 0)] UNSPEC_TLSLDM)))]
+  "TARGET_TLS"
+  "add\\t%1, %%tldm_lo10(%&), %0")
+
+(define_insn "tldm_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")]
+			    UNSPEC_TLSLDM)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tldm_add(%&)")
+
+(define_insn "tldm_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:SI 2 "register_operand" "r")]
+			    UNSPEC_TLSLDM)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tldm_add(%&)")
+
+(define_insn "tldm_call32"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "s")]
+				 UNSPEC_TLSLDM))
+	      (match_operand 2 "" "")))
+   (clobber (reg:SI 15))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "call\t%a1, %%tldm_call(%&)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tldm_call64"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (mem:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "s")]
+				 UNSPEC_TLSLDM))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI 15))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "call\t%a1, %%tldm_call(%&)%#"
+  [(set_attr "type" "call")])
+
+(define_insn "tldo_hix22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")]
+			    UNSPEC_TLSLDO)))]
+  "TARGET_TLS"
+  "sethi\\t%%tldo_hix22(%a1), %0")
+
+(define_insn "tldo_lox10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tld_symbolic_operand" "")]
+			      UNSPEC_TLSLDO)))]
+  "TARGET_TLS"
+  "xor\\t%1, %%tldo_lox10(%a2), %0")
+
+(define_insn "tldo_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tld_symbolic_operand" "")]
+			    UNSPEC_TLSLDO)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tldo_add(%a3)")
+
+(define_insn "tldo_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tld_symbolic_operand" "")]
+			    UNSPEC_TLSLDO)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tldo_add(%a3)")
+
+(define_insn "tie_hi22"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")]
+			    UNSPEC_TLSIE)))]
+  "TARGET_TLS"
+  "sethi\\t%%tie_hi22(%a1), %0")
+
+(define_insn "tie_lo10"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tie_symbolic_operand" "")]
+			      UNSPEC_TLSIE)))]
+  "TARGET_TLS"
+  "add\\t%1, %%tie_lo10(%a2), %0")
+
+(define_insn "tie_ld32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand 3 "tie_symbolic_operand" "")]
+		   UNSPEC_TLSIE))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ld\\t[%1 + %2], %0, %%tie_ld(%a3)"
+  [(set_attr "type" "load")])
+
+(define_insn "tie_ld64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand 3 "tie_symbolic_operand" "")]
+		   UNSPEC_TLSIE))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldx\\t[%1 + %2], %0, %%tie_ldx(%a3)"
+  [(set_attr "type" "load")])
+
+(define_insn "tie_add32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+			     (match_operand 3 "tie_symbolic_operand" "")]
+			    UNSPEC_TLSIE)))]
+  "TARGET_SUN_TLS && TARGET_ARCH32"
+  "add\\t%1, %2, %0, %%tie_add(%a3)")
+
+(define_insn "tie_add64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (unspec:DI [(match_operand:DI 2 "register_operand" "r")
+			     (match_operand 3 "tie_symbolic_operand" "")]
+			    UNSPEC_TLSIE)))]
+  "TARGET_SUN_TLS && TARGET_ARCH64"
+  "add\\t%1, %2, %0, %%tie_add(%a3)")
+
+(define_insn "tle_hix22_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (high:SI (unspec:SI [(match_operand 1 "tle_symbolic_operand" "")]
+			    UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "sethi\\t%%tle_hix22(%a1), %0")
+
+(define_insn "tle_lox10_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (unspec:SI [(match_operand 2 "tle_symbolic_operand" "")]
+			      UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "xor\\t%1, %%tle_lox10(%a2), %0")
+
+(define_insn "tle_hix22_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (high:DI (unspec:DI [(match_operand 1 "tle_symbolic_operand" "")]
+			    UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "sethi\\t%%tle_hix22(%a1), %0")
+
+(define_insn "tle_lox10_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (unspec:DI [(match_operand 2 "tle_symbolic_operand" "")]
+			      UNSPEC_TLSLE)))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "xor\\t%1, %%tle_lox10(%a2), %0")
+
+;; Now patterns combining tldo_add{32,64} with some integer loads or stores
+(define_insn "*tldo_ldub_sp32"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub1_sp32"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub2_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb1_sp32"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb2_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub_sp64"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub1_sp64"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub2_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldub3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldub\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb1_sp64"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb2_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsb3_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsb\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh_sp32"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh1_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsh1_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:SI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ldsh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh_sp64"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh1_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduh2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsh1_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldsh2_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsh\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_lduw_sp32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "ld\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_lduw_sp64"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduw\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_lduw1_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "lduw\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_ldsw1_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+						     (match_operand 3 "tld_symbolic_operand" "")]
+						    UNSPEC_TLSLDO)
+					 (match_operand:DI 1 "register_operand" "r")))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldsw\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "sload")
+   (set_attr "us3load_type" "3cycle")])
+
+(define_insn "*tldo_ldx_sp64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mem:DI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r"))))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "ldx\t[%1 + %2], %0, %%tldo_add(%3)"
+  [(set_attr "type" "load")])
+
+(define_insn "*tldo_stb_sp32"
+  [(set (mem:QI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r")))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "stb\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stb_sp64"
+  [(set (mem:QI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:QI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "stb\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_sth_sp32"
+  [(set (mem:HI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r")))
+	(match_operand:HI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "sth\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_sth_sp64"
+  [(set (mem:HI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:HI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "sth\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stw_sp32"
+  [(set (mem:SI (plus:SI (unspec:SI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:SI 1 "register_operand" "r")))
+	(match_operand:SI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH32"
+  "st\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stw_sp64"
+  [(set (mem:SI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:SI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "stw\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+(define_insn "*tldo_stx_sp64"
+  [(set (mem:DI (plus:DI (unspec:DI [(match_operand:SI 2 "register_operand" "r")
+				     (match_operand 3 "tld_symbolic_operand" "")]
+				    UNSPEC_TLSLDO)
+			 (match_operand:DI 1 "register_operand" "r")))
+	(match_operand:DI 0 "register_operand" "r"))]
+  "TARGET_TLS && TARGET_ARCH64"
+  "stx\t%0, [%1 + %2], %%tldo_add(%3)"
+  [(set_attr "type" "store")])
+
+
+;; Stack protector instructions.
+
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, 7);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  if (TARGET_ARCH64)
+    emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_setsi (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "stack_protect_setsi"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_ARCH32"
+  "ld\t%1, %2\;st\t%2, %0\;mov\t0, %2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "3")])
+
+(define_insn "stack_protect_setdi"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_ARCH64"
+  "ldx\t%1, %2\;stx\t%2, %0\;mov\t0, %2"
+  [(set_attr "type" "multi")
+   (set_attr "length" "3")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx result, test;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tlsreg = gen_rtx_REG (Pmode, 7);
+  rtx addr = gen_rtx_PLUS (Pmode, tlsreg, GEN_INT (TARGET_THREAD_SSP_OFFSET));
+  operands[1] = gen_rtx_MEM (Pmode, addr);
+#endif
+  if (TARGET_ARCH64)
+    {
+      result = gen_reg_rtx (Pmode);
+      emit_insn (gen_stack_protect_testdi (result, operands[0], operands[1]));
+      test = gen_rtx_EQ (VOIDmode, result, const0_rtx);
+      emit_jump_insn (gen_cbranchdi4 (test, result, const0_rtx, operands[2]));
+    }
+  else
+    {
+      emit_insn (gen_stack_protect_testsi (operands[0], operands[1]));
+      result = gen_rtx_REG (CCmode, SPARC_ICC_REG);
+      test = gen_rtx_EQ (VOIDmode, result, const0_rtx);
+      emit_jump_insn (gen_cbranchcc4 (test, result, const0_rtx, operands[2]));
+    }
+  DONE;
+})
+
+(define_insn "stack_protect_testsi"
+  [(set (reg:CC 100)
+	(unspec:CC [(match_operand:SI 0 "memory_operand" "m")
+		    (match_operand:SI 1 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+   (set (match_scratch:SI 3 "=r") (const_int 0))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_ARCH32"
+  "ld\t%0, %2\;ld\t%1, %3\;xorcc\t%2, %3, %2\;mov\t0, %3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+(define_insn "stack_protect_testdi"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+   (set (match_scratch:DI 3 "=r") (const_int 0))]
+  "TARGET_ARCH64"
+  "ldx\t%1, %0\;ldx\t%2, %3\;xor\t%0, %3, %0\;mov\t0, %3"
+  [(set_attr "type" "multi")
+   (set_attr "length" "4")])
+
+
+;; Vector instructions.
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+	(plus:V2SI (match_operand:V2SI 1 "register_operand" "e")
+		   (match_operand:V2SI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fpadd32\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+	 (plus:V4HI (match_operand:V4HI 1 "register_operand" "e")
+		    (match_operand:V4HI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fpadd16\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+;; fpadd32s is emitted by the addsi3 pattern.
+
+(define_insn "addv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=f")
+	(plus:V2HI (match_operand:V2HI 1 "register_operand" "f")
+		   (match_operand:V2HI 2 "register_operand" "f")))]
+  "TARGET_VIS"
+  "fpadd16s\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "single")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+	(minus:V2SI (match_operand:V2SI 1 "register_operand" "e")
+		    (match_operand:V2SI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fpsub32\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+	(minus:V4HI (match_operand:V4HI 1 "register_operand" "e")
+		    (match_operand:V4HI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fpsub16\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+;; fpsub32s is emitted by the subsi3 pattern.
+
+(define_insn "subv2hi3"
+  [(set (match_operand:V2HI 0 "register_operand" "=f")
+	(minus:V2HI (match_operand:V2HI 1 "register_operand" "f")
+		    (match_operand:V2HI 2 "register_operand" "f")))]
+  "TARGET_VIS"
+  "fpsub16s\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "single")])
+
+;; All other logical instructions have integer equivalents so they
+;; are defined together.
+
+;; (ior (not (op1)) (not (op2))) is the canonical form of NAND.
+
+(define_insn "*nand<V64:mode>_vis"
+  [(set (match_operand:V64 0 "register_operand" "=e")
+	(ior:V64 (not:V64 (match_operand:V64 1 "register_operand" "e"))
+		 (not:V64 (match_operand:V64 2 "register_operand" "e"))))]
+  "TARGET_VIS"
+  "fnand\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "*nand<V32:mode>_vis"
+  [(set (match_operand:V32 0 "register_operand" "=f")
+	 (ior:V32 (not:V32 (match_operand:V32 1 "register_operand" "f"))
+		  (not:V32 (match_operand:V32 2 "register_operand" "f"))))]
+  "TARGET_VIS"
+  "fnands\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "single")])
+
+;; Hard to generate VIS instructions.  We have builtins for these.
+
+(define_insn "fpack16_vis"
+  [(set (match_operand:V4QI 0 "register_operand" "=f")
+        (unspec:V4QI [(match_operand:V4HI 1 "register_operand" "e")]
+		      UNSPEC_FPACK16))]
+  "TARGET_VIS"
+  "fpack16\t%1, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "fpackfix_vis"
+  [(set (match_operand:V2HI 0 "register_operand" "=f")
+        (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "e")]
+		      UNSPEC_FPACKFIX))]
+  "TARGET_VIS"
+  "fpackfix\t%1, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "fpack32_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (unspec:V8QI [(match_operand:V2SI 1 "register_operand" "e")
+        	      (match_operand:V8QI 2 "register_operand" "e")]
+                     UNSPEC_FPACK32))]
+  "TARGET_VIS"
+  "fpack32\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "fexpand_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")]
+         UNSPEC_FEXPAND))]
+ "TARGET_VIS"
+ "fexpand\t%1, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+;; It may be possible to describe this operation as (1 indexed):
+;; (vec_select (vec_duplicate (vec_duplicate (vec_concat 1 2)))
+;;  1,5,10,14,19,23,28,32)
+;; Note that (vec_merge:V8QI [(V4QI) (V4QI)] (10101010 = 170) doesn't work
+;; because vec_merge expects all the operands to be of the same type.
+(define_insn "fpmerge_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (unspec:V8QI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V4QI 2 "register_operand" "f")]
+         UNSPEC_FPMERGE))]
+ "TARGET_VIS"
+ "fpmerge\t%1, %2, %0"
+ [(set_attr "type" "fga")
+  (set_attr "fptype" "double")])
+
+;; Partitioned multiply instructions
+(define_insn "fmul8x16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (mult:V4HI (match_operand:V4QI 1 "register_operand" "f")
+                   (match_operand:V4HI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fmul8x16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; Only one of the following two insns can be a multiply.
+(define_insn "fmul8x16au_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (mult:V4HI (match_operand:V4QI 1 "register_operand" "f")
+                   (match_operand:V2HI 2 "register_operand" "f")))]
+  "TARGET_VIS"
+  "fmul8x16au\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8x16al_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MUL16AL))]
+  "TARGET_VIS"
+  "fmul8x16al\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; Only one of the following two insns can be a multiply.
+(define_insn "fmul8sux16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (mult:V4HI (match_operand:V8QI 1 "register_operand" "e")
+                   (match_operand:V4HI 2 "register_operand" "e")))]
+  "TARGET_VIS"
+  "fmul8sux16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmul8ulx16_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(match_operand:V8QI 1 "register_operand" "e")
+                      (match_operand:V4HI 2 "register_operand" "e")]
+         UNSPEC_MUL8UL))]
+  "TARGET_VIS"
+  "fmul8ulx16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; Only one of the following two insns can be a multiply.
+(define_insn "fmuld8sux16_vis"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+        (mult:V2SI (match_operand:V4QI 1 "register_operand" "f")
+                   (match_operand:V2HI 2 "register_operand" "f")))]
+  "TARGET_VIS"
+  "fmuld8sux16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+(define_insn "fmuld8ulx16_vis"
+  [(set (match_operand:V2SI 0 "register_operand" "=e")
+        (unspec:V2SI [(match_operand:V4QI 1 "register_operand" "f")
+                      (match_operand:V2HI 2 "register_operand" "f")]
+         UNSPEC_MULDUL))]
+  "TARGET_VIS"
+  "fmuld8ulx16\t%1, %2, %0"
+  [(set_attr "type" "fpmul")
+   (set_attr "fptype" "double")])
+
+;; Using faligndata only makes sense after an alignaddr since the choice of
+;; bytes to take out of each operand is dependent on the results of the last
+;; alignaddr.
+(define_insn "faligndata<V64I:mode>_vis"
+  [(set (match_operand:V64I 0 "register_operand" "=e")
+        (unspec:V64I [(match_operand:V64I 1 "register_operand" "e")
+                      (match_operand:V64I 2 "register_operand" "e")]
+         UNSPEC_ALIGNDATA))]
+  "TARGET_VIS"
+  "faligndata\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(define_insn "alignaddr<P:mode>_vis"
+  [(set (match_operand:P 0 "register_operand" "=r")
+        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
+                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
+         UNSPEC_ALIGNADDR))]
+  "TARGET_VIS"
+  "alignaddr\t%r1, %r2, %0")
+
+(define_insn "pdist_vis"
+  [(set (match_operand:DI 0 "register_operand" "=e")
+        (unspec:DI [(match_operand:V8QI 1 "register_operand" "e")
+                    (match_operand:V8QI 2 "register_operand" "e")
+                    (match_operand:DI 3 "register_operand" "0")]
+         UNSPEC_PDIST))]
+  "TARGET_VIS"
+  "pdist\t%1, %2, %0"
+  [(set_attr "type" "fga")
+   (set_attr "fptype" "double")])
+
+(include "sync.md")
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
new file mode 100644
index 000000000..a97cad176
--- /dev/null
+++ b/gcc/config/sparc/sparc.opt
@@ -0,0 +1,126 @@
+; Options for the SPARC port of the compiler
+;
+; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mfpu
+Target Report Mask(FPU)
+Use hardware FP
+
+mhard-float
+Target RejectNegative Mask(FPU) MaskExists
+Use hardware FP
+
+msoft-float
+Target RejectNegative InverseMask(FPU)
+Do not use hardware FP
+
+munaligned-doubles
+Target Report Mask(UNALIGNED_DOUBLES)
+Assume possible double misalignment
+
+mapp-regs
+Target Report Mask(APP_REGS)
+Use ABI reserved registers
+
+mhard-quad-float
+Target Report RejectNegative Mask(HARD_QUAD)
+Use hardware quad FP instructions
+
+msoft-quad-float
+Target Report RejectNegative InverseMask(HARD_QUAD)
+Do not use hardware quad fp instructions
+
+mv8plus
+Target Report Mask(V8PLUS)
+Compile for V8+ ABI
+
+mvis
+Target Report Mask(VIS)
+Use UltraSPARC Visual Instruction Set extensions
+
+mptr64
+Target Report RejectNegative Mask(PTR64)
+Pointers are 64-bit
+
+mptr32
+Target Report RejectNegative InverseMask(PTR64)
+Pointers are 32-bit
+
+m64
+Target Report RejectNegative Mask(64BIT)
+Use 64-bit ABI
+
+m32
+Target Report RejectNegative InverseMask(64BIT)
+Use 32-bit ABI
+
+mstack-bias
+Target Report Mask(STACK_BIAS)
+Use stack bias
+
+mfaster-structs
+Target Report Mask(FASTER_STRUCTS)
+Use structs on stronger alignment for double-word copies
+
+mrelax
+Target
+Optimize tail call instructions in assembler and linker
+
+mcpu=
+Target RejectNegative Joined
+Use features of and schedule code for given CPU
+
+mtune=
+Target RejectNegative Joined
+Schedule code for given CPU
+
+mcmodel=
+Target RejectNegative Joined Var(sparc_cmodel_string)
+Use given SPARC-V9 code model
+
+mstd-struct-return
+Target Report RejectNegative Var(sparc_std_struct_return)
+Enable strict 32-bit psABI struct return checking.
+
+mfix-at697f
+Target Report RejectNegative Var(sparc_fix_at697f)
+Enable workaround for single erratum of AT697F processor
+(corresponding to erratum #13 of AT697E processor)
+
+Mask(LITTLE_ENDIAN)
+;; Generate code for little-endian
+
+Mask(LONG_DOUBLE_128)
+;; Use 128-bit long double
+
+Mask(SPARCLITE)
+;; Generate code for SPARClite
+
+Mask(SPARCLET)
+;; Generate code for SPARClet
+
+Mask(V8)
+;; Generate code for SPARC-V8
+
+Mask(V9)
+;; Generate code for SPARC-V9
+
+Mask(DEPRECATED_V8_INSNS)
+;; Generate code that uses the V8 instructions deprecated
+;; in the V9 architecture.
diff --git a/gcc/config/sparc/sparclet.md b/gcc/config/sparc/sparclet.md
new file mode 100644
index 000000000..3e99d56ad
--- /dev/null
+++ b/gcc/config/sparc/sparclet.md
@@ -0,0 +1,43 @@
+;; Scheduling description for SPARClet.
+;;   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The SPARClet is a single-issue processor.
+
+(define_automaton "sparclet")
+
+(define_cpu_unit "sl_load0,sl_load1,sl_load2,sl_load3" "sparclet")
+(define_cpu_unit "sl_store,sl_imul" "sparclet")
+
+(define_reservation "sl_load_any" "(sl_load0 | sl_load1 | sl_load2 | sl_load3)")
+(define_reservation "sl_load_all" "(sl_load0 + sl_load1 + sl_load2 + sl_load3)")
+
+(define_insn_reservation "sl_ld" 3
+  (and (eq_attr "cpu" "tsc701")
+   (eq_attr "type" "load,sload"))
+  "sl_load_any, sl_load_any, sl_load_any")
+
+(define_insn_reservation "sl_st" 3
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "store"))
+  "(sl_store+sl_load_all)*3")
+
+(define_insn_reservation "sl_imul" 5
+  (and (eq_attr "cpu" "tsc701")
+    (eq_attr "type" "imul"))
+  "sl_imul*5")
diff --git a/gcc/config/sparc/supersparc.md b/gcc/config/sparc/supersparc.md
new file mode 100644
index 000000000..c5617c155
--- /dev/null
+++ b/gcc/config/sparc/supersparc.md
@@ -0,0 +1,92 @@
+;; Scheduling description for SuperSPARC.
+;;   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The SuperSPARC is a tri-issue, which was considered quite parallel
+;; at the time it was released.  Much like UltraSPARC-I and UltraSPARC-II
+;; there are two integer units but only one of them may take shifts.
+;;
+;; ??? If SuperSPARC has the same slotting rules as ultrasparc for these
+;; ??? shifts, we should model that.
+
+(define_automaton "supersparc_0,supersparc_1")
+
+(define_cpu_unit "ss_memory, ss_shift, ss_iwport0, ss_iwport1" "supersparc_0")
+(define_cpu_unit "ss_fpalu" "supersparc_0")
+(define_cpu_unit "ss_fpmds" "supersparc_1")
+
+(define_reservation "ss_iwport" "(ss_iwport0 | ss_iwport1)")
+
+(define_insn_reservation "ss_iuload" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "load,sload"))
+  "ss_memory")
+
+;; Ok, fpu loads deliver the result in zero cycles.  But we
+;; have to show the ss_memory reservation somehow, thus...
+(define_insn_reservation "ss_fpload" 0
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpload"))
+  "ss_memory")
+
+(define_bypass 0 "ss_fpload" "ss_fp_alu,ss_fp_mult,ss_fp_divs,ss_fp_divd,ss_fp_sqrt")
+
+(define_insn_reservation "ss_store" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "store,fpstore"))
+  "ss_memory")
+
+(define_insn_reservation "ss_ialu_shift" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "shift"))
+  "ss_shift + ss_iwport")
+
+(define_insn_reservation "ss_ialu_any" 1
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "load,sload,store,shift,ialu"))
+  "ss_iwport")
+
+(define_insn_reservation "ss_fp_alu" 3
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fp,fpmove,fpcmp"))
+  "ss_fpalu, nothing*2")
+
+(define_insn_reservation "ss_fp_mult" 3
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpmul"))
+  "ss_fpmds, nothing*2")
+
+(define_insn_reservation "ss_fp_divs" 6
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpdivs"))
+  "ss_fpmds*4, nothing*2")
+
+(define_insn_reservation "ss_fp_divd" 9
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpdivd"))
+  "ss_fpmds*7, nothing*2")
+
+(define_insn_reservation "ss_fp_sqrt" 12
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "fpsqrts,fpsqrtd"))
+  "ss_fpmds*10, nothing*2")
+
+(define_insn_reservation "ss_imul" 4
+  (and (eq_attr "cpu" "supersparc")
+    (eq_attr "type" "imul"))
+  "ss_fpmds*4")
diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md
new file mode 100644
index 000000000..5dd37d094
--- /dev/null
+++ b/gcc/config/sparc/sync.md
@@ -0,0 +1,199 @@
+;; GCC machine description for SPARC synchronization instructions.
+;; Copyright (C) 2005, 2007, 2009, 2010
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator I12MODE [QI HI])
+(define_mode_iterator I24MODE [HI SI])
+(define_mode_iterator I48MODE [SI (DI "TARGET_ARCH64 || TARGET_V8PLUS")])
+(define_mode_attr modesuffix [(SI "") (DI "x")])
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMBAR))]
+  "TARGET_V8 || TARGET_V9"
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+
+})
+
+(define_insn "*stbar"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMBAR))]
+  "TARGET_V8"
+  "stbar"
+  [(set_attr "type" "multi")])
+
+;; membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad
+(define_insn "*membar"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMBAR))]
+  "TARGET_V9"
+  "membar\t15"
+  [(set_attr "type" "multi")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(match_operand:I12MODE 0 "register_operand" "")
+   (match_operand:I12MODE 1 "memory_operand" "")
+   (match_operand:I12MODE 2 "register_operand" "")
+   (match_operand:I12MODE 3 "register_operand" "")]
+  "TARGET_V9"
+{
+  sparc_expand_compare_and_swap_12 (operands[0], operands[1],
+				    operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+     [(set (match_operand:I48MODE 0 "register_operand" "")
+	   (match_operand:I48MODE 1 "memory_operand" ""))
+      (set (match_dup 1)
+	   (unspec_volatile:I48MODE
+	     [(match_operand:I48MODE 2 "register_operand" "")
+	      (match_operand:I48MODE 3 "register_operand" "")]
+	     UNSPECV_CAS))])]
+  "TARGET_V9"
+{
+  if (!REG_P (XEXP (operands[1], 0)))
+    {
+      rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+      operands[1] = replace_equiv_address (operands[1], addr);
+    }
+  emit_insn (gen_memory_barrier ());
+})
+
+(define_insn "*sync_compare_and_swap<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(mem:I48MODE (match_operand 1 "register_operand" "r")))
+   (set (mem:I48MODE (match_dup 1))
+	(unspec_volatile:I48MODE
+	  [(match_operand:I48MODE 2 "register_operand" "r")
+	   (match_operand:I48MODE 3 "register_operand" "0")]
+	  UNSPECV_CAS))]
+  "TARGET_V9 && (<MODE>mode == SImode || TARGET_ARCH64)"
+  "cas<modesuffix>\t[%1], %2, %0"
+  [(set_attr "type" "multi")])
+
+(define_insn "*sync_compare_and_swapdi_v8plus"
+  [(set (match_operand:DI 0 "register_operand" "=h")
+	(mem:DI (match_operand 1 "register_operand" "r")))
+   (set (mem:DI (match_dup 1))
+	(unspec_volatile:DI
+	  [(match_operand:DI 2 "register_operand" "h")
+	   (match_operand:DI 3 "register_operand" "0")]
+	  UNSPECV_CAS))]
+  "TARGET_V8PLUS"
+{
+  if (sparc_check_64 (operands[3], insn) <= 0)
+    output_asm_insn ("srl\t%L3, 0, %L3", operands);
+  output_asm_insn ("sllx\t%H3, 32, %H3", operands);
+  output_asm_insn ("or\t%L3, %H3, %L3", operands);
+  if (sparc_check_64 (operands[2], insn) <= 0)
+    output_asm_insn ("srl\t%L2, 0, %L2", operands);
+  output_asm_insn ("sllx\t%H2, 32, %H3", operands);
+  output_asm_insn ("or\t%L2, %H3, %H3", operands);
+  output_asm_insn ("casx\t[%1], %H3, %L3", operands);
+  return "srlx\t%L3, 32, %H3";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:I12MODE 0 "register_operand" "")
+   (match_operand:I12MODE 1 "memory_operand" "")
+   (match_operand:I12MODE 2 "arith_operand" "")]
+  "!TARGET_V9"
+{
+  if (operands[2] != const1_rtx)
+    FAIL;
+  if (TARGET_V8)
+    emit_insn (gen_memory_barrier ());
+  if (<MODE>mode != QImode)
+    operands[1] = adjust_address (operands[1], QImode, 0);
+  emit_insn (gen_ldstub<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sync_lock_test_and_setsi"
+  [(parallel
+     [(set (match_operand:SI 0 "register_operand" "")
+	   (unspec_volatile:SI [(match_operand:SI 1 "memory_operand" "")]
+			       UNSPECV_SWAP))
+      (set (match_dup 1)
+	   (match_operand:SI 2 "arith_operand" ""))])]
+  ""
+{
+  if (! TARGET_V8 && ! TARGET_V9)
+    {
+      if (operands[2] != const1_rtx)
+	FAIL;
+      operands[1] = adjust_address (operands[1], QImode, 0);
+      emit_insn (gen_ldstubsi (operands[0], operands[1]));
+      DONE;
+    }
+  emit_insn (gen_memory_barrier ());
+  operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "*swapsi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "memory_operand" "+m")]
+			    UNSPECV_SWAP))
+   (set (match_dup 1)
+	(match_operand:SI 2 "register_operand" "0"))]
+  "TARGET_V8 || TARGET_V9"
+  "swap\t%1, %0"
+  [(set_attr "type" "multi")])
+
+(define_expand "ldstubqi"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "")]
+				       UNSPECV_LDSTUB))
+	      (set (match_dup 1) (const_int -1))])]
+  ""
+  "")
+
+(define_expand "ldstub<mode>"
+  [(parallel [(set (match_operand:I24MODE 0 "register_operand" "")
+		   (zero_extend:I24MODE
+		      (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "")]
+					  UNSPECV_LDSTUB)))
+	      (set (match_dup 1) (const_int -1))])]
+  ""
+  "")
+
+(define_insn "*ldstubqi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "+m")]
+			    UNSPECV_LDSTUB))
+   (set (match_dup 1) (const_int -1))]
+  ""
+  "ldstub\t%1, %0"
+  [(set_attr "type" "multi")])
+
+(define_insn "*ldstub<mode>"
+  [(set (match_operand:I24MODE 0 "register_operand" "=r")
+	(zero_extend:I24MODE
+	  (unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "+m")]
+			      UNSPECV_LDSTUB)))
+   (set (match_dup 1) (const_int -1))]
+  ""
+  "ldstub\t%1, %0"
+  [(set_attr "type" "multi")])
diff --git a/gcc/config/sparc/sysv4.h b/gcc/config/sparc/sysv4.h
new file mode 100644
index 000000000..2ffa94527
--- /dev/null
+++ b/gcc/config/sparc/sysv4.h
@@ -0,0 +1,125 @@
+/* Target definitions for GNU compiler for SPARC running System V.4
+   Copyright (C) 1991, 1992, 1995, 1996, 1997, 1998, 2000, 2002, 2007, 2009,
+   2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Ron Guilmette (rfg@monkeys.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (sparc ELF)"); 
+#endif
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_ARCH64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_ARCH64 ? "long int" : "int")
+
+/* Undefined some symbols which are appropriate only for typical svr4
+   systems, but not for the specific case of svr4 running on a
+   SPARC.  */
+
+#undef INIT_SECTION_ASM_OP
+#undef FINI_SECTION_ASM_OP
+#undef READONLY_DATA_SECTION_ASM_OP
+#undef TYPE_OPERAND_FMT
+#undef STRING_ASM_OP
+#undef COMMON_ASM_OP
+#undef SKIP_ASM_OP
+#undef SET_ASM_OP	/* Has no equivalent.  See ASM_OUTPUT_DEF below.  */
+
+/* Pass -K to the assembler when PIC.  */
+#undef ASM_SPEC
+#define ASM_SPEC \
+  "%{v:-V} %{Qy:} %{!Qn:-Qy} %{Ym,*} \
+   %{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
+/* Define the names of various pseudo-op used by the SPARC/svr4 assembler.
+   Note that many of these are different from the typical pseudo-ops used
+   by most svr4 assemblers.  That is probably due to a (misguided?) attempt
+   to keep the SPARC/svr4 assembler somewhat compatible with the SPARC/SunOS
+   assembler.  */
+
+#define STRING_ASM_OP		"\t.asciz\t"
+#define COMMON_ASM_OP		"\t.common\t"
+#define SKIP_ASM_OP		"\t.skip\t"
+
+/* This is the format used to print the second operand of a .type pseudo-op
+   for the SPARC/svr4 assembler.  */
+
+#define TYPE_OPERAND_FMT      "#%s"
+
+#undef ASM_OUTPUT_CASE_LABEL
+#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE)		\
+do { ASM_OUTPUT_ALIGN ((FILE), Pmode == SImode ? 2 : 3);		\
+     (*targetm.asm_out.internal_label) ((FILE), PREFIX, NUM);		\
+   } while (0)
+
+/* This is how to equate one symbol to another symbol.  The syntax used is
+   `SYM1=SYM2'.  Note that this is different from the way equates are done
+   with most svr4 assemblers, where the syntax is `.set SYM1,SYM2'.  */
+
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)				\
+ do {	fprintf ((FILE), "\t");						\
+	assemble_name (FILE, LABEL1);					\
+	fprintf (FILE, " = ");						\
+	assemble_name (FILE, LABEL2);					\
+	fprintf (FILE, "\n");						\
+  } while (0)
+
+/* A set of symbol definitions for assembly pseudo-ops which will
+   get us switched to various sections of interest.  These are used
+   in all places where we simply want to switch to a section, and
+   *not* to push the previous section name onto the assembler's
+   section names stack (as we do often in dwarfout.c).  */
+
+#define TEXT_SECTION_ASM_OP	"\t.section\t\".text\""
+#define DATA_SECTION_ASM_OP	"\t.section\t\".data\""
+#define BSS_SECTION_ASM_OP	"\t.section\t\".bss\""
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t\".rodata\""
+#define INIT_SECTION_ASM_OP	"\t.section\t\".init\""
+#define FINI_SECTION_ASM_OP	"\t.section\t\".fini\""
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+ 
+   Note that we want to give these sections the SHF_WRITE attribute
+   because these sections will actually contain data (i.e. tables of
+   addresses of functions in the current root executable or shared library
+   file) and, in the case of a shared library, the relocatable addresses
+   will have to be properly resolved/relocated (and then written into) by
+   the dynamic linker when it actually attaches the given shared library
+   to the executing process.  (Note that on SVR4, you may wish to use the
+   `-z text' option to the ELF linker, when building a shared library, as
+   an additional check that you are doing everything right.  But if you do
+   use the `-z text' option when building a shared library, you will get
+   errors unless the .ctors and .dtors sections are marked as writable
+   via the SHF_WRITE attribute.)  */
+ 
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP    "\t.section\t\".ctors\",#alloc,#write"
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP    "\t.section\t\".dtors\",#alloc,#write"
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Override the name of the mcount profiling function.  */
+
+#undef MCOUNT_FUNCTION
+#define MCOUNT_FUNCTION "*_mcount"
diff --git a/gcc/config/sparc/t-crtfm b/gcc/config/sparc/t-crtfm
new file mode 100644
index 000000000..e0adb97bd
--- /dev/null
+++ b/gcc/config/sparc/t-crtfm
@@ -0,0 +1,4 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/sparc/crtfastmath.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) $(LIBGCC2_CFLAGS) -c -o $(T)crtfastmath.o $(srcdir)/config/sparc/crtfastmath.c
diff --git a/gcc/config/sparc/t-crtin b/gcc/config/sparc/t-crtin
new file mode 100644
index 000000000..2612bac89
--- /dev/null
+++ b/gcc/config/sparc/t-crtin
@@ -0,0 +1,6 @@
+EXTRA_PARTS += crti.o crtn.o
+
+$(T)crti.o: $(srcdir)/config/sparc/sol2-ci.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-ci.asm
+$(T)crtn.o: $(srcdir)/config/sparc/sol2-cn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-cn.asm
diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf
new file mode 100644
index 000000000..b1d18fda6
--- /dev/null
+++ b/gcc/config/sparc/t-elf
@@ -0,0 +1,40 @@
+# Copyright (C) 1997, 1998, 1999, 2001, 2002, 2005, 2007, 2010, 2011
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = sparc/lb1spc.asm
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+MULTILIB_OPTIONS = msoft-float mcpu=v8
+MULTILIB_DIRNAMES = soft v8
+MULTILIB_MATCHES = msoft-float=mno-fpu
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon
new file mode 100644
index 000000000..6573f824e
--- /dev/null
+++ b/gcc/config/sparc/t-leon
@@ -0,0 +1,42 @@
+# Copyright (C) 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = sparc/lb1spc.asm
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Multilibs for LEON
+# LEON is a SPARC-V8, but the AT697 implementation has a bug in the
+# V8-specific instructions.
+MULTILIB_OPTIONS = mcpu=v7 msoft-float
+MULTILIB_DIRNAMES = v7 soft
+MULTILIB_MATCHES = mcpu?v7=mv7 msoft-float=mno-fpu
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-leon3 b/gcc/config/sparc/t-leon3
new file mode 100644
index 000000000..ce57d1675
--- /dev/null
+++ b/gcc/config/sparc/t-leon3
@@ -0,0 +1,37 @@
+# Copyright (C) 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Multilibs for LEON3
+MULTILIB_OPTIONS = msoft-float
+MULTILIB_DIRNAMES = soft
+MULTILIB_MATCHES = msoft-float=mno-fpu
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-linux b/gcc/config/sparc/t-linux
new file mode 100644
index 000000000..38741fd89
--- /dev/null
+++ b/gcc/config/sparc/t-linux
@@ -0,0 +1,7 @@
+# Override t-slibgcc-elf-ver to export some libgcc symbols with
+# the symbol versions that glibc used.
+# Avoid the t-linux version file.
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \
+		 $(srcdir)/config/sparc/libgcc-sparc-glibc.ver
+
+MULTIARCH_DIRNAME = $(call if_multiarch,sparc-linux-gnu)
diff --git a/gcc/config/sparc/t-linux64 b/gcc/config/sparc/t-linux64
new file mode 100644
index 000000000..d904876d4
--- /dev/null
+++ b/gcc/config/sparc/t-linux64
@@ -0,0 +1,37 @@
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004,
+# 2006, 2010, 2012 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64.  The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems.  Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:sparc64-linux-gnu)
+MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:sparc-linux-gnu)
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+CRTSTUFF_T_CFLAGS = `if test x$$($(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) \
+				 -print-multi-os-directory) \
+			= x../lib64; then echo -mcmodel=medany; fi`
diff --git a/gcc/config/sparc/t-netbsd64 b/gcc/config/sparc/t-netbsd64
new file mode 100644
index 000000000..0fddb0ffe
--- /dev/null
+++ b/gcc/config/sparc/t-netbsd64
@@ -0,0 +1,8 @@
+# Disable multilib fow now, as NetBSD/sparc64 does not ship with
+# a 32-bit environment.
+#MULTILIB_OPTIONS = m32/m64
+#MULTILIB_DIRNAMES = 32 64
+#MULTILIB_MATCHES =
+
+#LIBGCC = stmp-multilib
+#INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-sol2 b/gcc/config/sparc/t-sol2
new file mode 100644
index 000000000..b7f665b1b
--- /dev/null
+++ b/gcc/config/sparc/t-sol2
@@ -0,0 +1,39 @@
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001,
+# 2002 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# gmon build rule:
+$(T)gmon.o:	$(srcdir)/config/sparc/gmon-sol2.c $(GCC_PASSES) \
+  $(TCONFIG_H) tsystem.h coretypes.h $(TM_H) stmp-int-hdrs
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \
+		-c $(srcdir)/config/sparc/gmon-sol2.c -o $(T)gmon.o
+
+# Assemble startup files.
+$(T)crt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crt1.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-c1.asm
+$(T)gcrt1.o: $(srcdir)/config/sparc/sol2-c1.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -DGCRT1 -o $(T)gcrt1.o -x assembler-with-cpp $(srcdir)/config/sparc/sol2-c1.asm
+
+# We need to use -fPIC when we are using gcc to compile the routines in
+# crtstuff.c.  This is only really needed when we are going to use gcc/g++
+# to produce a shared library, but since we don't know ahead of time when
+# we will be doing that, we just always use -fPIC when compiling the
+# routines in crtstuff.c.
+
+CRTSTUFF_T_CFLAGS = -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
diff --git a/gcc/config/sparc/t-sol2-64 b/gcc/config/sparc/t-sol2-64
new file mode 100644
index 000000000..1802239d3
--- /dev/null
+++ b/gcc/config/sparc/t-sol2-64
@@ -0,0 +1,7 @@
+MULTILIB_OPTIONS = m32/m64
+MULTILIB_DIRNAMES = sparcv8plus sparcv9
+MULTILIB_MATCHES =
+MULTILIB_OSDIRNAMES = . sparcv9
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-vxworks b/gcc/config/sparc/t-vxworks
new file mode 100644
index 000000000..2aabf1a43
--- /dev/null
+++ b/gcc/config/sparc/t-vxworks
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks.
+
+MULTILIB_OPTIONS = mrtp fPIC
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC
diff --git a/gcc/config/sparc/ultra1_2.md b/gcc/config/sparc/ultra1_2.md
new file mode 100644
index 000000000..f0bd7a1cb
--- /dev/null
+++ b/gcc/config/sparc/ultra1_2.md
@@ -0,0 +1,301 @@
+;; Scheduling description for UltraSPARC-I/II.
+;;   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; UltraSPARC-I and II are quad-issue processors.  Interesting features
+;; to note:
+;;
+;; - Buffered loads, they can queue waiting for the actual data until
+;;   an instruction actually tries to reference the destination register
+;;   as an input
+;; - Two integer units.  Only one of them can do shifts, and the other
+;;   is the only one which may do condition code setting instructions.
+;;   Complicating things further, a shift may go only into the first
+;;   slot in a dispatched group.  And if you have a non-condition code
+;;   setting instruction and one that does set the condition codes.  The
+;;   former must be issued first in order for both of them to issue.
+;; - Stores can issue before the value being stored is available.  As long
+;;   as the input data becomes ready before the store is to move out of the
+;;   store buffer, it will not cause a stall.
+;; - Branches may issue in the same cycle as an instruction setting the
+;;   condition codes being tested by that branch.  This does not apply
+;;   to floating point, only integer.
+
+(define_automaton "ultrasparc_0,ultrasparc_1")
+
+(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0");
+(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1")
+(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1")
+(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1")
+(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1")
+
+(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)")
+(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)")
+(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3")
+
+(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)")
+(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)")
+
+;; This is a simplified representation of the issue at hand.
+;; For most cases, going from one FP precision type insn to another
+;; just breaks up the insn group.  However for some cases, such
+;; a situation causes the second insn to stall 2 more cycles.
+(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1")
+
+;; If we have to schedule an ieu1 specific instruction and we want
+;; to reserve the ieu0 unit as well, we must reserve it first.  So for
+;; example we could not schedule this sequence:
+;;	COMPARE		IEU1
+;;	IALU		IEU0
+;; but we could schedule them together like this:
+;;	IALU		IEU0
+;;	COMPARE		IEU1
+;; This basically requires that ieu0 is reserved before ieu1 when
+;; it is required that both be reserved.
+(absence_set "us1_ieu0" "us1_ieu1")
+
+;; This defines the slotting order.  Most IEU instructions can only
+;; execute in the first three slots, FPU and branches can go into
+;; any slot.  We represent instructions which "break the group"
+;; as requiring reservation of us1_slot0.
+(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3")
+(absence_set "us1_slot1" "us1_slot2,us1_slot3")
+(absence_set "us1_slot2" "us1_slot3")
+
+(define_insn_reservation "us1_single" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "multi,savew,flushw,iflush,trap"))
+  "us1_single_issue")
+
+(define_insn_reservation "us1_simple_ieuN" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "ialu"))
+  "(us1_ieu0 | us1_ieu1) + us1_slot012")
+
+(define_insn_reservation "us1_simple_ieu0" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "shift"))
+  "us1_ieu0 + us1_slot012")
+
+(define_insn_reservation "us1_simple_ieu1" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "compare"))
+  "us1_ieu1 + us1_slot012")
+
+(define_insn_reservation "us1_ialuX" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "ialuX"))
+  "us1_single_issue")
+
+(define_insn_reservation "us1_cmove" 2
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "cmove"))
+  "us1_single_issue, nothing")
+
+(define_insn_reservation "us1_imul" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "imul"))
+  "us1_single_issue")
+
+(define_insn_reservation "us1_idiv" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "idiv"))
+  "us1_single_issue")
+
+;; For loads, the "delayed return mode" behavior of the chip
+;; is represented using the us1_load_writeback resource.
+(define_insn_reservation "us1_load" 2
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "load,fpload"))
+  "us1_lsu + us1_slot012, us1_load_writeback")
+
+(define_insn_reservation "us1_load_signed" 3
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "sload"))
+  "us1_lsu + us1_slot012, nothing, us1_load_writeback")
+
+(define_insn_reservation "us1_store" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "store,fpstore"))
+  "us1_lsu + us1_slot012")
+
+(define_insn_reservation "us1_branch" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "branch"))
+  "us1_cti + us1_slotany")
+
+(define_insn_reservation "us1_call_jmpl" 1
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
+  "us1_cti + us1_ieu1 + us1_slot0")
+
+(define_insn_reservation "us1_fmov_single" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmove"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany")
+
+(define_insn_reservation "us1_fmov_double" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmove"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany")
+
+(define_insn_reservation "us1_fcmov_single" 2
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmove,fpcrmove"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany, nothing")
+
+(define_insn_reservation "us1_fcmov_double" 2
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmove,fpcrmove"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany, nothing")
+
+(define_insn_reservation "us1_faddsub_single" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fp"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_faddsub_double" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fp"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_fpcmp_single" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmp"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany")
+
+(define_insn_reservation "us1_fpcmp_double" 1
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpcmp"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany")
+
+(define_insn_reservation "us1_fmult_single" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmul"))
+       (eq_attr "fptype" "single"))
+  "us1_fpm + us1_fp_single + us1_slotany, nothing*3")
+
+(define_insn_reservation "us1_fmult_double" 4
+  (and (and (eq_attr "cpu" "ultrasparc")
+            (eq_attr "type" "fpmul"))
+       (eq_attr "fptype" "double"))
+  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+;; This is actually in theory dangerous, because it is possible
+;; for the chip to prematurely dispatch the dependent instruction
+;; in the G stage, resulting in a 9 cycle stall.  However I have never
+;; been able to trigger this case myself even with hand written code,
+;; so it must require some rare complicated pipeline state.
+(define_bypass 3
+   "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double"
+   "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+;; Floating point divide and square root use the multiplier unit
+;; for final rounding 3 cycles before the divide/sqrt is complete.
+
+(define_insn_reservation "us1_fdivs"
+  13
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpdivs,fpsqrts"))
+  "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2"
+  )
+
+(define_bypass
+  12
+  "us1_fdivs"
+  "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+(define_insn_reservation "us1_fdivd"
+  23
+  (and (eq_attr "cpu" "ultrasparc")
+    (eq_attr "type" "fpdivd,fpsqrtd"))
+  "(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2"
+  )
+(define_bypass
+  22
+  "us1_fdivd"
+  "us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
+
+;; Any store may multi issue with the insn creating the source
+;; data as long as that creating insn is not an FPU div/sqrt.
+;; We need a special guard function because this bypass does
+;; not apply to the address inputs of the store.
+(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store"
+   "store_data_bypass_p")
+
+;; An integer branch may execute in the same cycle as the compare
+;; creating the condition codes.
+(define_bypass 0 "us1_simple_ieu1" "us1_branch")
+
+;; VIS scheduling
+(define_insn_reservation "us1_fga_single"
+  2
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fga"))
+       (eq_attr "fptype" "single"))
+  "us1_fpa + us1_fp_single + us1_slotany, nothing")
+
+(define_bypass 1 "us1_fga_single" "us1_fga_single")
+
+(define_insn_reservation "us1_fga_double"
+  2
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fga"))
+       (eq_attr "fptype" "double"))
+  "us1_fpa + us1_fp_double + us1_slotany, nothing")
+
+(define_bypass 1 "us1_fga_double" "us1_fga_double")
+
+(define_insn_reservation "us1_fgm_single"
+  4
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fgm_pack,fgm_mul,fgm_cmp"))
+       (eq_attr "fptype" "single"))
+  "us1_fpm + us1_fp_single + us1_slotany, nothing*3")
+
+(define_bypass 3 "us1_fgm_single" "us1_fga_single")
+
+(define_insn_reservation "us1_fgm_double"
+  4
+  (and (and
+         (eq_attr "cpu" "ultrasparc")
+         (eq_attr "type" "fgm_pack,fgm_mul,fgm_cmp"))
+       (eq_attr "fptype" "double"))
+  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+(define_bypass 3 "us1_fgm_double" "us1_fga_double")
+
+(define_insn_reservation "us1_pdist"
+  4
+  (and (eq_attr "cpu" "ultrasparc")
+       (eq_attr "type" "fgm_pdist"))
+  "us1_fpm + us1_fp_double + us1_slotany, nothing*3")
+
+(define_bypass 3 "us1_pdist" "us1_fga_double,us1_fga_single")
+(define_bypass 1 "us1_pdist" "us1_pdist")
diff --git a/gcc/config/sparc/ultra3.md b/gcc/config/sparc/ultra3.md
new file mode 100644
index 000000000..8feaf794c
--- /dev/null
+++ b/gcc/config/sparc/ultra3.md
@@ -0,0 +1,189 @@
+;; Scheduling description for UltraSPARC-III.
+;;   Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; UltraSPARC-III is a quad-issue processor.
+;;
+;; It is also a much simpler beast than Ultra-I/II, no silly
+;; slotting rules and both integer units are fully symmetric.
+;; It does still have single-issue instructions though.
+
+(define_automaton "ultrasparc3_0,ultrasparc3_1")
+
+(define_cpu_unit "us3_ms,us3_br,us3_fpm" "ultrasparc3_0")
+(define_cpu_unit "us3_a0,us3_a1,us3_slot0,\
+                  us3_slot1,us3_slot2,us3_slot3,us3_fpa" "ultrasparc3_1")
+(define_cpu_unit "us3_load_writeback" "ultrasparc3_1")
+
+(define_reservation "us3_slotany" "(us3_slot0 | us3_slot1 | us3_slot2 | us3_slot3)")
+(define_reservation "us3_single_issue" "us3_slot0 + us3_slot1 + us3_slot2 + us3_slot3")
+(define_reservation "us3_ax" "(us3_a0 | us3_a1)")
+
+(define_insn_reservation "us3_single" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "multi,savew,flushw,iflush,trap"))
+  "us3_single_issue")
+
+(define_insn_reservation "us3_integer" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "ialu,shift,compare"))
+  "us3_ax + us3_slotany")
+
+(define_insn_reservation "us3_ialuX" 5
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "ialu,shift,compare"))
+  "us3_single_issue*4, nothing")
+
+(define_insn_reservation "us3_cmove" 2
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "cmove"))
+  "us3_ms + us3_br + us3_slotany, nothing")
+
+;; ??? Not entirely accurate.
+;; ??? It can run from 6 to 9 cycles.  The first cycle the MS pipe
+;; ??? is needed, and the instruction group is broken right after
+;; ??? the imul.  Then 'helper' instructions are generated to perform
+;; ??? each further stage of the multiplication, each such 'helper' is
+;; ??? single group.  So, the reservation aspect is represented accurately
+;; ??? here, but the variable cycles are not.
+;; ??? Currently I have no idea how to determine the variability, but once
+;; ??? known we can simply add a define_bypass or similar to model it.
+(define_insn_reservation "us3_imul" 7
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "imul"))
+  "us3_ms + us3_slotany, us3_single_issue*4, nothing*2")
+
+(define_insn_reservation "us3_idiv" 72
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "idiv"))
+  "us3_ms + us3_slotany, us3_single_issue*69, nothing*2")
+
+;; UltraSPARC-III has a similar load delay as UltraSPARC-I/II except
+;; that all loads except 32-bit/64-bit unsigned loads take the extra
+;; delay for sign/zero extension.
+(define_insn_reservation "us3_2cycle_load" 2
+  (and (eq_attr "cpu" "ultrasparc3")
+    (and (eq_attr "type" "load,fpload")
+      (eq_attr "us3load_type" "2cycle")))
+  "us3_ms + us3_slotany, us3_load_writeback")
+
+(define_insn_reservation "us3_load_delayed" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (and (eq_attr "type" "load,sload")
+      (eq_attr "us3load_type" "3cycle")))
+  "us3_ms + us3_slotany, nothing, us3_load_writeback")
+
+(define_insn_reservation "us3_store" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "store,fpstore"))
+  "us3_ms + us3_slotany")
+
+(define_insn_reservation "us3_branch" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "branch"))
+  "us3_br + us3_slotany")
+
+(define_insn_reservation "us3_call_jmpl" 1
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
+  "us3_br + us3_ms + us3_slotany")
+
+(define_insn_reservation "us3_fmov" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpmove"))
+  "us3_fpa + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fcmov" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpcmove"))
+  "us3_fpa + us3_br + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fcrmov" 3
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpcrmove"))
+  "us3_fpa + us3_ms + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_faddsub" 4
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fp"))
+  "us3_fpa + us3_slotany, nothing*3")
+
+(define_insn_reservation "us3_fpcmp" 5
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpcmp"))
+  "us3_fpa + us3_slotany, nothing*4")
+
+(define_insn_reservation "us3_fmult" 4
+ (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpmul"))
+  "us3_fpm + us3_slotany, nothing*3")
+
+(define_insn_reservation "us3_fdivs" 17
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpdivs"))
+  "(us3_fpm + us3_slotany), us3_fpm*14, nothing*2")
+
+(define_insn_reservation "us3_fsqrts" 20
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpsqrts"))
+  "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
+
+(define_insn_reservation "us3_fdivd" 20
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpdivd"))
+  "(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
+
+(define_insn_reservation "us3_fsqrtd" 29
+  (and (eq_attr "cpu" "ultrasparc3")
+    (eq_attr "type" "fpsqrtd"))
+  "(us3_fpm + us3_slotany), us3_fpm*26, nothing*2")
+
+;; Any store may multi issue with the insn creating the source
+;; data as long as that creating insn is not an FPU div/sqrt.
+;; We need a special guard function because this bypass does
+;; not apply to the address inputs of the store.
+(define_bypass 0 "us3_integer,us3_faddsub,us3_fmov,us3_fcmov,us3_fmult" "us3_store"
+   "store_data_bypass_p")
+
+;; An integer branch may execute in the same cycle as the compare
+;; creating the condition codes.
+(define_bypass 0 "us3_integer" "us3_branch")
+
+;; If FMOVfcc is user of FPCMP, latency is only 1 cycle.
+(define_bypass 1 "us3_fpcmp" "us3_fcmov")
+
+;; VIS scheduling
+(define_insn_reservation "us3_fga"
+  3
+  (and (eq_attr "cpu" "ultrasparc3")
+       (eq_attr "type" "fga"))
+  "us3_fpa + us3_slotany, nothing*2")
+
+(define_insn_reservation "us3_fgm"
+  4
+  (and (eq_attr "cpu" "ultrasparc3")
+       (eq_attr "type" "fgm_pack,fgm_mul,fgm_cmp"))
+  "us3_fpm + us3_slotany, nothing*3")
+
+(define_insn_reservation "us3_pdist"
+  4
+  (and (eq_attr "cpu" "ultrasparc3")
+       (eq_attr "type" "fgm_pdist"))
+  "us3_fpm + us3_slotany, nothing*3")
+
+(define_bypass 1 "us3_pdist" "us3_pdist")
diff --git a/gcc/config/sparc/vxworks.h b/gcc/config/sparc/vxworks.h
new file mode 100644
index 000000000..e1b596e7f
--- /dev/null
+++ b/gcc/config/sparc/vxworks.h
@@ -0,0 +1,60 @@
+/* Definitions of target machine for GNU compiler,
+   for SPARC targeting the VxWorks run time environment.
+   Copyright (C) 2007, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__sparc");		\
+      builtin_define ("CPU=SIMSPARCSOLARIS");	\
+      VXWORKS_OS_CPP_BUILTINS ();		\
+    }						\
+  while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+#undef CPP_SUBTARGET_SPEC
+#define CPP_SUBTARGET_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC|fpie|fPIE:-K PIC} %(asm_cpu)"
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (SPARC/VxWorks)", stderr);
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER
+
+/* Use standard numbered ctors/dtors sections.  */
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+
+/* We cannot use PC-relative accesses for VxWorks PIC because there is no
+   fixed gap between segments.  */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
diff --git a/gcc/config/spu/cache.S b/gcc/config/spu/cache.S
new file mode 100644
index 000000000..9ffb6a0d1
--- /dev/null
+++ b/gcc/config/spu/cache.S
@@ -0,0 +1,43 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+	.data
+	.p2align 7
+	.global __cache
+__cache:
+	.rept __CACHE_SIZE__ * 8
+	.fill 128
+	.endr
+
+	.p2align 7
+	.global __cache_tag_array
+__cache_tag_array:
+	.rept __CACHE_SIZE__ * 2
+	.long 1, 1, 1, 1
+	.fill 128-16
+	.endr
+__end_cache_tag_array:
+
+	.globl __cache_tag_array_size
+	.set __cache_tag_array_size, __end_cache_tag_array-__cache_tag_array
+
diff --git a/gcc/config/spu/cachemgr.c b/gcc/config/spu/cachemgr.c
new file mode 100644
index 000000000..e7abd5e62
--- /dev/null
+++ b/gcc/config/spu/cachemgr.c
@@ -0,0 +1,438 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <spu_mfcio.h>
+#include <spu_internals.h>
+#include <spu_intrinsics.h>
+#include <spu_cache.h>
+
+extern unsigned long long __ea_local_store;
+extern char __cache_tag_array_size;
+
+#define LINE_SIZE 128
+#define TAG_MASK (LINE_SIZE - 1)
+
+#define WAYS 4
+#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
+
+#define CACHE_LINES ((int) &__cache_tag_array_size /		\
+		     sizeof (struct __cache_tag_array) * WAYS)
+
+struct __cache_tag_array
+{
+  unsigned int tag_lo[WAYS];
+  unsigned int tag_hi[WAYS];
+  void *base[WAYS];
+  int reserved[WAYS];
+  vector unsigned short dirty_bits[WAYS];
+};
+
+extern struct __cache_tag_array __cache_tag_array[];
+extern char __cache[];
+
+/* In order to make the code seem a little cleaner, and to avoid having
+   64/32 bit ifdefs all over the place, we use macros.  */
+
+#ifdef __EA64__
+typedef unsigned long long addr;
+
+#define CHECK_TAG(_entry, _way, _tag)			\
+  ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF)	\
+   && (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
+
+#define GET_TAG(_entry, _way) \
+  ((unsigned long long)(_entry)->tag_hi[(_way)] << 32	\
+   | (unsigned long long)(_entry)->tag_lo[(_way)])
+
+#define SET_TAG(_entry, _way, _tag)			\
+  (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF;	\
+  (_entry)->tag_hi[(_way)] = (_tag) >> 32
+
+#else /*__EA32__*/
+typedef unsigned long addr;
+
+#define CHECK_TAG(_entry, _way, _tag)			\
+  ((_entry)->tag_lo[(_way)] == (_tag))
+
+#define GET_TAG(_entry, _way)				\
+  ((_entry)->tag_lo[(_way)])
+
+#define SET_TAG(_entry, _way, _tag)			\
+  (_entry)->tag_lo[(_way)] = (_tag)
+
+#endif
+
+/* In GET_ENTRY, we cast away the high 32 bits,
+   as the tag is only in the low 32.  */
+
+#define GET_ENTRY(_addr)						   \
+  ((struct __cache_tag_array *)						   \
+   si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
+			     si_from_uint (SET_MASK)),			   \
+	       si_from_uint ((unsigned int) __cache_tag_array))))
+
+#define GET_CACHE_LINE(_addr, _way) \
+  ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
+
+#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
+#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
+#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
+
+#define LS_FLAG 0x80000000
+#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
+#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
+#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
+
+static int dma_tag = 32;
+
+static void
+__cache_evict_entry (struct __cache_tag_array *entry, int way)
+{
+  addr tag = GET_TAG (entry, way);
+
+  if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
+    {
+#ifdef NONATOMIC
+      /* Non-atomic writes.  */
+      unsigned int oldmask, mach_stat;
+      char *line = ((void *) 0);
+
+      /* Enter critical section.  */
+      mach_stat = spu_readch (SPU_RdMachStat);
+      spu_idisable ();
+
+      /* Issue DMA request.  */
+      line = GET_CACHE_LINE (entry->tag_lo[way], way);
+      mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+      /* Wait for DMA completion.  */
+      oldmask = mfc_read_tag_mask ();
+      mfc_write_tag_mask (1 << dma_tag);
+      mfc_read_tag_status_all ();
+      mfc_write_tag_mask (oldmask);
+
+      /* Leave critical section.  */
+      if (__builtin_expect (mach_stat & 1, 0))
+	spu_ienable ();
+#else
+      /* Allocate a buffer large enough that we know it has 128 bytes
+         that are 128 byte aligned (for DMA). */
+
+      char buffer[LINE_SIZE + 127];
+      qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
+      qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
+      qword bits;
+      unsigned int mach_stat;
+
+      /* Enter critical section.  */
+      mach_stat = spu_readch (SPU_RdMachStat);
+      spu_idisable ();
+
+      do
+	{
+	  /* We atomically read the current memory into a buffer
+	     modify the dirty bytes in the buffer, and write it
+	     back. If writeback fails, loop and try again.  */
+
+	  mfc_getllar (buf_ptr, tag, 0, 0);
+	  mfc_read_atomic_status ();
+
+	  /* The method we're using to write 16 dirty bytes into
+	     the buffer at a time uses fsmb which in turn uses
+	     the least significant 16 bits of word 0, so we
+	     load the bits and rotate so that the first bit of
+	     the bitmap is in the first bit that fsmb will use.  */
+
+	  bits = (qword) entry->dirty_bits[way];
+	  bits = si_rotqbyi (bits, -2);
+
+	  /* Si_fsmb creates the mask of dirty bytes.
+	     Use selb to nab the appropriate bits.  */
+	  buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
+
+	  /* Rotate to next 16 byte section of cache.  */
+	  bits = si_rotqbyi (bits, 2);
+
+	  buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+	  buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
+	  bits = si_rotqbyi (bits, 2);
+
+	  mfc_putllc (buf_ptr, tag, 0, 0);
+	}
+      while (mfc_read_atomic_status ());
+
+      /* Leave critical section.  */
+      if (__builtin_expect (mach_stat & 1, 0))
+	spu_ienable ();
+#endif
+    }
+
+  /* In any case, marking the lo tag with 1 which denotes empty.  */
+  SET_EMPTY (entry, way);
+  entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
+}
+
+void
+__cache_evict (__ea void *ea)
+{
+  addr tag = (addr) ea & ~TAG_MASK;
+  struct __cache_tag_array *entry = GET_ENTRY (ea);
+  int i = 0;
+
+  /* Cycles through all the possible ways an address could be at
+     and evicts the way if found.  */
+
+  for (i = 0; i < WAYS; i++)
+    if (CHECK_TAG (entry, i, tag))
+      __cache_evict_entry (entry, i);
+}
+
+static void *
+__cache_fill (int way, addr tag)
+{
+  unsigned int oldmask, mach_stat;
+  char *line = ((void *) 0);
+
+  /* Reserve our DMA tag.  */
+  if (dma_tag == 32)
+    dma_tag = mfc_tag_reserve ();
+
+  /* Enter critical section.  */
+  mach_stat = spu_readch (SPU_RdMachStat);
+  spu_idisable ();
+
+  /* Issue DMA request.  */
+  line = GET_CACHE_LINE (tag, way);
+  mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
+
+  /* Wait for DMA completion.  */
+  oldmask = mfc_read_tag_mask ();
+  mfc_write_tag_mask (1 << dma_tag);
+  mfc_read_tag_status_all ();
+  mfc_write_tag_mask (oldmask);
+
+  /* Leave critical section.  */
+  if (__builtin_expect (mach_stat & 1, 0))
+    spu_ienable ();
+
+  return (void *) line;
+}
+
+static void
+__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
+{
+
+  addr tag = (addr) ea & ~TAG_MASK;
+  unsigned int lru = 0;
+  int i = 0;
+  int idx = 0;
+
+  /* If way > 4, then there are no empty slots, so we must evict
+     the least recently used entry. */
+  if (way >= 4)
+    {
+      for (i = 0; i < WAYS; i++)
+	{
+	  if (GET_LRU (entry, i) > lru)
+	    {
+	      lru = GET_LRU (entry, i);
+	      idx = i;
+	    }
+	}
+      __cache_evict_entry (entry, idx);
+      way = idx;
+    }
+
+  /* Set the empty entry's tag and fill it's cache line. */
+
+  SET_TAG (entry, way, tag);
+  entry->reserved[way] = 0;
+
+  /* Check if the address is just an effective address within the
+     SPU's local store. */
+
+  /* Because the LS is not 256k aligned, we can't do a nice and mask
+     here to compare, so we must check the whole range.  */
+
+  if ((addr) ea >= (addr) __ea_local_store
+      && (addr) ea < (addr) (__ea_local_store + 0x40000))
+    {
+      SET_IS_LS (entry, way);
+      entry->base[way] =
+	(void *) ((unsigned int) ((addr) ea -
+				  (addr) __ea_local_store) & ~0x7f);
+    }
+  else
+    {
+      entry->base[way] = __cache_fill (way, tag);
+    }
+}
+
+void *
+__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
+{
+#ifdef __EA64__
+  unsigned int tag_hi;
+  qword etag_hi;
+#endif
+  unsigned int tag_lo;
+  struct __cache_tag_array *entry;
+
+  qword etag_lo;
+  qword equal;
+  qword bit_mask;
+  qword way;
+
+  /* This first chunk, we merely fill the pointer and tag.  */
+
+  entry = GET_ENTRY (ea);
+
+#ifndef __EA64__
+  tag_lo =
+    si_to_uint (si_andc
+		(si_shufb
+		 (si_from_uint ((addr) ea), si_from_uint (0),
+		  si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
+#else
+  tag_lo =
+    si_to_uint (si_andc
+		(si_shufb
+		 (si_from_ullong ((addr) ea), si_from_uint (0),
+		  si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
+
+  tag_hi =
+    si_to_uint (si_shufb
+		(si_from_ullong ((addr) ea), si_from_uint (0),
+		 si_from_uint (0x00010203)));
+#endif
+
+  /* Increment LRU in reserved bytes.  */
+  si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
+	   si_from_ptr (entry), 48);
+
+missreturn:
+  /* Check if the entry's lo_tag is equal to the address' lo_tag.  */
+  etag_lo = si_lqd (si_from_ptr (entry), 0);
+  equal = si_ceq (etag_lo, si_from_uint (tag_lo));
+#ifdef __EA64__
+  /* And the high tag too.  */
+  etag_hi = si_lqd (si_from_ptr (entry), 16);
+  equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
+#endif
+
+  if ((si_to_uint (si_orx (equal)) == 0))
+    goto misshandler;
+
+  if (n_bytes_dirty)
+    {
+      /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
+         offset of the appropriate dirty bits.  */
+      way = si_shli (si_clz (si_gbb (equal)), 2);
+
+      /* To create the bit_mask, we set it to all 1s (uint -1), then we
+         shift it over (128 - n_bytes_dirty) times.  */
+
+      bit_mask = si_from_uint (-1);
+
+      bit_mask =
+	si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
+
+      bit_mask =
+	si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
+
+      /* Rotate it around to the correct offset.  */
+      bit_mask =
+	si_rotqby (bit_mask,
+		   si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
+
+      bit_mask =
+	si_rotqbi (bit_mask,
+		   si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
+
+      /* Update the dirty bits.  */
+      si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
+	       si_from_ptr (entry), way);
+    };
+
+  /* We've definitely found the right entry, set LRU (reserved) to 0
+     maintaining the LS flag (MSB).  */
+
+  si_stqd (si_andc
+	   (si_lqd (si_from_ptr (entry), 48),
+	    si_and (equal, si_from_uint (~(LS_FLAG)))),
+	   si_from_ptr (entry), 48);
+
+  return (void *)
+    si_to_uint (si_a
+		(si_orx
+		 (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
+		 si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
+
+misshandler:
+  equal = si_ceqi (etag_lo, 1);
+  __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
+  goto missreturn;
+}
+
+void *
+__cache_fetch (__ea void *ea)
+{
+  return __cache_fetch_dirty (ea, 0);
+}
+
+void
+__cache_touch (__ea void *ea __attribute__ ((unused)))
+{
+  /* NO-OP for now.  */
+}
+
+void __cache_flush (void) __attribute__ ((destructor));
+void
+__cache_flush (void)
+{
+  struct __cache_tag_array *entry = __cache_tag_array;
+  unsigned int i;
+  int j;
+
+  /* Cycle through each cache entry and evict all used ways.  */
+
+  for (i = 0; i < CACHE_LINES / WAYS; i++)
+    {
+      for (j = 0; j < WAYS; j++)
+	if (!CHECK_EMPTY (entry, j))
+	  __cache_evict_entry (entry, j);
+
+      entry++;
+    }
+}
diff --git a/gcc/config/spu/constraints.md b/gcc/config/spu/constraints.md
new file mode 100644
index 000000000..b1f594706
--- /dev/null
+++ b/gcc/config/spu/constraints.md
@@ -0,0 +1,179 @@
+;; Constraint definitions for SPU
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;       ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+;; GCC:      ffffiiiiiiii     x x        x x   xxxx xx
+;; SPU:  xxxx    xxx xxxx xxxx x xxx xx x   xxx         xx
+;; FREE:     ffff   i    a          a  a  a        a  aa  aaa
+;; x - used
+;; a - available
+;; i - available for integer immediates
+;; f - available for floating point immediates
+
+;; For most immediate constraints we have 3 variations to deal with the
+;; fact const_int has no mode.  One variation treats const_int as 32 bit,
+;; another treats it as 64 bit, and the third sign extends it to 128 bit.
+
+(define_constraint "A"
+  "An immediate which can be loaded with the il/ila/ilh/ilhu instructions.  const_int is treated as a 32-bit value."
+  (ior (and (match_code "const_int,const_double,const_vector")
+	    (match_test "immediate_load_p (op, SImode)"))
+       (match_code "symbol_ref,label_ref,high,const")))
+
+(define_constraint "B"
+  "An immediate for arithmetic instructions (e.g., ai, ceqi).  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "arith_immediate_p (op, SImode, -0x200, 0x1ff)")))
+
+(define_constraint "C"
+  "An immediate for and/xor/or instructions.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "logical_immediate_p (op, SImode)")))
+
+(define_constraint "D"
+  "An immediate for iohl instruction.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "iohl_immediate_p (op, SImode)")))
+
+(define_constraint "U"
+  "An immediate which can be loaded with the il/ila/ilh/ilhu instructions.  const_int is sign extended to 128 bit."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "immediate_load_p (op, TImode)")))
+
+(define_constraint "W"
+  "An immediate for shift and rotate instructions.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "arith_immediate_p (op, SImode, -0x80000000ll, 0x7fffffffll)")))
+
+(define_constraint "Y"
+  "An immediate for and/xor/or instructions.  const_int is sign extended as a 128 bit."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "logical_immediate_p (op, TImode)")))
+
+(define_constraint "Z"
+  "An immediate for iohl instruction.  const_int is sign extended to 128 bit."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "iohl_immediate_p (op, TImode)")))
+
+(define_constraint "a"
+  "An immediate which can be loaded with the il/ila/ilh/ilhu instructions.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int")
+       (match_test "immediate_load_p (op, DImode)")))
+
+(define_constraint "c"
+  "An immediate for and/xor/or instructions.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int")
+       (match_test "logical_immediate_p (op, DImode)")))
+
+(define_constraint "d"
+  "An immediate for iohl instruction.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int")
+       (match_test "iohl_immediate_p (op, DImode)")))
+
+(define_constraint "f"
+  "An immediate which can be loaded with fsmbi."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "fsmbi_const_p (op)")))
+
+(define_constraint "j"
+  "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions.  const_int is treated as a 32-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "cpat_const_p (op, SImode)")))
+
+(define_constraint "k"
+  "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions.  const_int is treated as a 64-bit value."
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "cpat_const_p (op, DImode)")))
+
+(define_constraint "l"
+  "An immediate which can be loaded with one of the cbd/chd/cwd/cdd instructions."
+  (and (match_code "const_double,const_vector")
+       (match_test "cpat_const_p (op, TImode)")))
+
+
+;; Integer constraints
+
+(define_constraint "I"
+  "A constant in the range [-64, 63] for shift/rotate instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x40 && ival <= 0x3f")))
+
+(define_constraint "J"
+  "An unsigned 7-bit constant for conversion/nop/channel instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 0x7f")))
+
+(define_constraint "K"
+  "A signed 10-bit constant for most arithmetic instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x200 && ival <= 0x1ff")))
+ 
+(define_constraint "M"
+  "A signed 16-bit immediate for @code{stop}."
+  (and (match_code "const_int")
+       (match_test "ival >= -0x8000ll && ival <= 0x7fffll")))
+
+(define_constraint "N"
+  "An unsigned 16-bit constant for @code{iohl} and @code{fsmbi}."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 0xffff")))
+
+(define_constraint "O"
+  "An unsigned 7-bit constant whose 3 least significant bits are 0."
+  (and (match_code "const_int")
+       (match_test "(ival & 7) == 0")))
+
+(define_constraint "P"
+  "An unsigned 3-bit constant for 16-byte rotates and shifts"
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+
+;; Memory constraints
+
+(define_memory_constraint "R"
+  "Call operand, reg, for indirect calls"
+  (and (match_code "mem")
+       (match_test "GET_CODE(XEXP(op, 0)) == REG")))
+
+(define_memory_constraint "S"
+  "Call operand, symbol, for relative calls."
+  (and (match_code "mem")
+       (match_test "!TARGET_LARGE_MEM
+		    && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
+			 || GET_CODE (XEXP (op, 0)) == LABEL_REF))")))
+
+(define_memory_constraint "T"
+  "Call operand, const_int, for absolute calls."
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == CONST_INT
+		    && INTVAL (XEXP (op, 0)) >= 0
+		    && INTVAL (XEXP (op, 0)) <= 0x3ffff")))
+
+
+;; Floating-point constant constraints.
+
+(define_constraint "v"
+  "Floating point power of 2 with exponent in [0..127]"
+  (and (match_code "const_double,const_vector")
+       (match_test "exp2_immediate_p (op, VOIDmode, 0, 127)")))
+
+(define_constraint "w"
+  "Floating point power of 2 with exponent in [-126..0]"
+  (and (match_code "const_double,const_vector")
+       (match_test "exp2_immediate_p (op, VOIDmode, -126, 0)")))
diff --git a/gcc/config/spu/divmodti4.c b/gcc/config/spu/divmodti4.c
new file mode 100644
index 000000000..8f70616bc
--- /dev/null
+++ b/gcc/config/spu/divmodti4.c
@@ -0,0 +1,166 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ 
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+ 
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+ 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <spu_intrinsics.h>
+
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+typedef int TItype __attribute__ ((mode (TI)));
+TItype __divti3 (TItype u, TItype v);
+TItype __modti3 (TItype u, TItype v);
+UTItype __udivti3 (UTItype u, UTItype v);
+UTItype __umodti3 (UTItype u, UTItype v);
+UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
+
+inline static unsigned int
+count_leading_zeros (UTItype x)
+{
+  qword c = si_clz (*(qword *) & x);
+  qword cmp0 = si_cgti (c, 31);
+  qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
+  qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
+  qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
+  s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
+  s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
+  return si_to_uint (s);
+}
+
+/* Based on implementation of udivmodsi4, which is essentially
+ * an optimized version of gcc/config/udivmodsi4.c
+        clz      %7,%2
+        clz      %4,%1
+        il       %5,1
+        fsmbi    %0,0
+        sf       %7,%4,%7
+        ori      %3,%1,0
+        shl      %5,%5,%7
+        shl      %4,%2,%7
+1:      or       %8,%0,%5
+        rotmi    %5,%5,-1
+        clgt     %6,%4,%3
+        sf       %7,%4,%3
+        rotmi    %4,%4,-1
+        selb     %0,%8,%0,%6
+        selb     %3,%7,%3,%6
+3:      brnz     %5,1b
+ */
+
+UTItype
+__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
+{
+  qword shift =
+    si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
+  qword n0 = *(qword *) & num;
+  qword d0 = *(qword *) & den;
+  qword bit = si_andi (si_fsmbi (1), 1);
+  qword r0 = si_il (0);
+  qword m1 = si_fsmbi (0x000f);
+  qword mask, r1, n1;
+
+  d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
+  bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
+
+  do
+    {
+      r1 = si_or (r0, bit);
+
+      // n1 = n0 - d0 in TImode
+      n1 = si_bg (d0, n0);
+      n1 = si_shlqbyi (n1, 4);
+      n1 = si_sf (m1, n1);
+      n1 = si_bgx (d0, n0, n1);
+      n1 = si_shlqbyi (n1, 4);
+      n1 = si_sf (m1, n1);
+      n1 = si_bgx (d0, n0, n1);
+      n1 = si_shlqbyi (n1, 4);
+      n1 = si_sf (m1, n1);
+      n1 = si_sfx (d0, n0, n1);
+
+      mask = si_fsm (si_cgti (n1, -1));
+      r0 = si_selb (r0, r1, mask);
+      n0 = si_selb (n0, n1, mask);
+      bit = si_rotqmbii (bit, -1);
+      d0 = si_rotqmbii (d0, -1);
+    }
+  while (si_to_uint (si_orx (bit)));
+  if (rp)
+    *rp = *(UTItype *) & n0;
+  return *(UTItype *) & r0;
+}
+
+UTItype
+__udivti3 (UTItype n, UTItype d)
+{
+  return __udivmodti4 (n, d, (UTItype *)0);
+}
+
+UTItype
+__umodti3 (UTItype n, UTItype d)
+{
+  UTItype w;
+  __udivmodti4 (n, d, &w);
+  return w;
+}
+
+TItype
+__divti3 (TItype n, TItype d)
+{
+  int c = 0;
+  TItype w;
+
+  if (n < 0)
+    {
+        c = ~c;
+        n = -n;
+    }
+  if (d < 0)
+    {
+        c = ~c;
+        d = -d;
+    }
+
+  w = __udivmodti4 (n, d, (UTItype *)0);
+  if (c)
+    w = -w;
+  return w;
+}
+
+TItype
+__modti3 (TItype n, TItype d)
+{
+  int c = 0;
+  TItype w;
+
+  if (n < 0)
+    {
+        c = ~c;
+        n = -n;
+    }
+  if (d < 0)
+    {
+        c = ~c;
+        d = -d;
+    }
+
+  __udivmodti4 (n, d, (UTItype *) &w);
+  if (c)
+    w = -w;
+  return w;
+}
diff --git a/gcc/config/spu/divv2df3.c b/gcc/config/spu/divv2df3.c
new file mode 100644
index 000000000..9d5e1a594
--- /dev/null
+++ b/gcc/config/spu/divv2df3.c
@@ -0,0 +1,195 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+ 
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+ 
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+ 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <spu_intrinsics.h>
+
+vector double __divv2df3 (vector double a_in, vector double b_in);
+
+/* __divv2df3 divides the vector dividend a by the vector divisor b and 
+   returns the resulting vector quotient.  Maximum error about 0.5 ulp 
+   over entire double range including denorms, compared to true result
+   in round-to-nearest rounding mode.  Handles Inf or NaN operands and 
+   results correctly.  */
+
+vector double
+__divv2df3 (vector double a_in, vector double b_in)
+{
+  /* Variables */
+  vec_int4    exp, exp_bias;
+  vec_uint4   no_underflow, overflow;
+  vec_float4  mant_bf, inv_bf;
+  vec_ullong2 exp_a, exp_b;
+  vec_ullong2 a_nan, a_zero, a_inf, a_denorm, a_denorm0;
+  vec_ullong2 b_nan, b_zero, b_inf, b_denorm, b_denorm0;
+  vec_ullong2 nan;
+  vec_uint4   a_exp, b_exp;
+  vec_ullong2 a_mant_0, b_mant_0;
+  vec_ullong2 a_exp_1s, b_exp_1s;
+  vec_ullong2 sign_exp_mask;
+
+  vec_double2 a, b;
+  vec_double2 mant_a, mant_b, inv_b, q0, q1, q2, mult;
+
+  /* Constants */
+  vec_uint4   exp_mask_u32 = spu_splats((unsigned int)0x7FF00000);
+  vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3, 0,1,2,3,  8, 9,10,11, 8,9,10,11};
+  vec_uchar16 swap_32 = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
+  vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL);
+  vec_ullong2 sign_mask = spu_splats(0x8000000000000000ULL);
+  vec_float4  onef = spu_splats(1.0f);
+  vec_double2 one = spu_splats(1.0);
+  vec_double2 exp_53 = (vec_double2)spu_splats(0x0350000000000000ULL);
+
+  sign_exp_mask = spu_or(sign_mask, exp_mask);
+
+  /* Extract the floating point components from each of the operands including
+   * exponent and mantissa.
+   */
+  a_exp = (vec_uint4)spu_and((vec_uint4)a_in, exp_mask_u32);
+  a_exp = spu_shuffle(a_exp, a_exp, splat_hi);
+  b_exp = (vec_uint4)spu_and((vec_uint4)b_in, exp_mask_u32);
+  b_exp = spu_shuffle(b_exp, b_exp, splat_hi);
+
+  a_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)a_in, sign_exp_mask), 0);
+  a_mant_0 = spu_and(a_mant_0, spu_shuffle(a_mant_0, a_mant_0, swap_32));
+
+  b_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)b_in, sign_exp_mask), 0);
+  b_mant_0 = spu_and(b_mant_0, spu_shuffle(b_mant_0, b_mant_0, swap_32));
+
+  a_exp_1s = (vec_ullong2)spu_cmpeq(a_exp, exp_mask_u32);
+  b_exp_1s = (vec_ullong2)spu_cmpeq(b_exp, exp_mask_u32);
+
+  /* Identify all possible special values that must be accomodated including:
+   * +-denorm, +-0, +-infinity, and NaNs.
+   */
+  a_denorm0= (vec_ullong2)spu_cmpeq(a_exp, 0);
+  a_nan    = spu_andc(a_exp_1s, a_mant_0);
+  a_zero   = spu_and (a_denorm0, a_mant_0);
+  a_inf    = spu_and (a_exp_1s, a_mant_0);
+  a_denorm = spu_andc(a_denorm0, a_zero);
+
+  b_denorm0= (vec_ullong2)spu_cmpeq(b_exp, 0);
+  b_nan    = spu_andc(b_exp_1s, b_mant_0);
+  b_zero   = spu_and (b_denorm0, b_mant_0);
+  b_inf    = spu_and (b_exp_1s, b_mant_0);
+  b_denorm = spu_andc(b_denorm0, b_zero);
+
+  /* Scale denorm inputs to into normalized numbers by conditionally scaling the 
+   * input parameters.
+   */
+  a = spu_sub(spu_or(a_in, exp_53), spu_sel(exp_53, a_in, sign_mask));
+  a = spu_sel(a_in, a, a_denorm);
+
+  b = spu_sub(spu_or(b_in, exp_53), spu_sel(exp_53, b_in, sign_mask));
+  b = spu_sel(b_in, b, b_denorm);
+
+  /* Extract the divisor and dividend exponent and force parameters into the signed 
+   * range [1.0,2.0) or [-1.0,2.0).
+   */
+  exp_a = spu_and((vec_ullong2)a, exp_mask);
+  exp_b = spu_and((vec_ullong2)b, exp_mask);
+
+  mant_a = spu_sel(a, one, (vec_ullong2)exp_mask);
+  mant_b = spu_sel(b, one, (vec_ullong2)exp_mask);
+  
+  /* Approximate the single reciprocal of b by using
+   * the single precision reciprocal estimate followed by one 
+   * single precision iteration of Newton-Raphson.
+   */
+  mant_bf = spu_roundtf(mant_b);
+  inv_bf = spu_re(mant_bf);
+  inv_bf = spu_madd(spu_nmsub(mant_bf, inv_bf, onef), inv_bf, inv_bf);
+
+  /* Perform 2 more Newton-Raphson iterations in double precision. The
+   * result (q1) is in the range (0.5, 2.0).
+   */
+  inv_b = spu_extend(inv_bf);
+  inv_b = spu_madd(spu_nmsub(mant_b, inv_b, one), inv_b, inv_b);
+  q0 = spu_mul(mant_a, inv_b);
+  q1 = spu_madd(spu_nmsub(mant_b, q0, mant_a), inv_b, q0);
+
+  /* Determine the exponent correction factor that must be applied 
+   * to q1 by taking into account the exponent of the normalized inputs
+   * and the scale factors that were applied to normalize them.
+   */
+  exp = spu_rlmaska(spu_sub((vec_int4)exp_a, (vec_int4)exp_b), -20);
+  exp = spu_add(exp, (vec_int4)spu_add(spu_and((vec_int4)a_denorm, -0x34), spu_and((vec_int4)b_denorm, 0x34)));
+  
+  /* Bias the quotient exponent depending on the sign of the exponent correction
+   * factor so that a single multiplier will ensure the entire double precision
+   * domain (including denorms) can be achieved.
+   *
+   *    exp 	       bias q1     adjust exp
+   *   =====	       ========    ==========
+   *   positive         2^+65         -65
+   *   negative         2^-64         +64
+   */
+  exp_bias = spu_xor(spu_rlmaska(exp, -31), 64);
+  exp = spu_sub(exp, exp_bias);
+
+  q1 = spu_sel(q1, (vec_double2)spu_add((vec_int4)q1, spu_sl(exp_bias, 20)), exp_mask);
+
+  /* Compute a multiplier (mult) to applied to the quotient (q1) to produce the 
+   * expected result. On overflow, clamp the multiplier to the maximum non-infinite
+   * number in case the rounding mode is not round-to-nearest.
+   */
+  exp = spu_add(exp, 0x3FF);
+  no_underflow = spu_cmpgt(exp, 0);
+  overflow = spu_cmpgt(exp, 0x7FE);
+  exp = spu_and(spu_sl(exp, 20), (vec_int4)no_underflow);
+  exp = spu_and(exp, (vec_int4)exp_mask);
+
+  mult = spu_sel((vec_double2)exp, (vec_double2)(spu_add((vec_uint4)exp_mask, -1)), (vec_ullong2)overflow);
+
+  /* Handle special value conditions. These include:
+   *
+   * 1) IF either operand is a NaN OR both operands are 0 or INFINITY THEN a NaN 
+   *    results.
+   * 2) ELSE IF the dividend is an INFINITY OR the divisor is 0 THEN a INFINITY results.
+   * 3) ELSE IF the dividend is 0 OR the divisor is INFINITY THEN a 0 results.
+   */
+  mult = spu_andc(mult, (vec_double2)spu_or(a_zero, b_inf));
+  mult = spu_sel(mult, (vec_double2)exp_mask, spu_or(a_inf, b_zero));
+
+  nan = spu_or(a_nan, b_nan);
+  nan = spu_or(nan, spu_and(a_zero, b_zero));
+  nan = spu_or(nan, spu_and(a_inf, b_inf));
+
+  mult = spu_or(mult, (vec_double2)nan);
+
+  /* Scale the final quotient */
+
+  q2 = spu_mul(q1, mult);
+
+  return (q2);
+}
+
+
+/* We use the same function for vector and scalar division.  Provide the
+   scalar entry point as an alias.  */
+double __divdf3 (double a, double b)
+  __attribute__ ((__alias__ ("__divv2df3")));
+
+/* Some toolchain builds used the __fast_divdf3 name for this helper function.
+   Provide this as another alternate entry point for compatibility.  */
+double __fast_divdf3 (double a, double b)
+  __attribute__ ((__alias__ ("__divv2df3")));
+
diff --git a/gcc/config/spu/float_disf.c b/gcc/config/spu/float_disf.c
new file mode 100644
index 000000000..0f4fe3d8e
--- /dev/null
+++ b/gcc/config/spu/float_disf.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+  
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+  
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Prototype.  */
+float __floatdisf (long long x);
+
+float __floatdisf (long long x)
+{
+  /* The SPU back-end now generates inline code for this conversion.
+     This file is solely used to provide the __floatdisf functions
+     for objects generated with prior versions of GCC.  */
+  return x;
+}
diff --git a/gcc/config/spu/float_unsdidf.c b/gcc/config/spu/float_unsdidf.c
new file mode 100644
index 000000000..4fdf0b88a
--- /dev/null
+++ b/gcc/config/spu/float_unsdidf.c
@@ -0,0 +1,54 @@
+/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
+  
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+  
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <spu_intrinsics.h>
+const unsigned char __didf_scale[16] __attribute__ ((__aligned__ (16))) = {
+  0x00, 0x00, 0x04, 0x3e,
+  0x00, 0x00, 0x04, 0x1e,
+  0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00
+};
+const unsigned char __didf_pat[16] __attribute__ ((__aligned__ (16))) = {
+  0x02, 0x03, 0x10, 0x11,
+  0x12, 0x13, 0x80, 0x80,
+  0x06, 0x07, 0x14, 0x15,
+  0x16, 0x17, 0x80, 0x80
+};
+
+/* double __float_unsdidf (unsigned long long int) 
+   Construct two exact doubles representing the high and low parts (in
+   parallel), then add them. */
+qword __float_unsdidf (qword DI);
+qword
+__float_unsdidf (qword DI)
+{
+  qword t0, t1, t2, t3, t4, t5, t6, t7, t8;
+  t0 = si_clz (DI);
+  t1 = si_shl (DI, t0);
+  t2 = si_ceqi (t0, 32);
+  t3 = si_sf (t0, *(const qword *) __didf_scale);
+  t4 = si_a (t1, t1);
+  t5 = si_andc (t3, t2);
+  t6 = si_shufb (t5, t4, *(const qword *) __didf_pat);
+  t7 = si_shlqbii (t6, 4);
+  t8 = si_shlqbyi (t7, 8);
+  return si_dfa (t7, t8);
+}
diff --git a/gcc/config/spu/float_unsdisf.c b/gcc/config/spu/float_unsdisf.c
new file mode 100644
index 000000000..7af120ecc
--- /dev/null
+++ b/gcc/config/spu/float_unsdisf.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+  
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+  
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Prototype.  */
+float __floatundisf (unsigned long long x);
+
+float __floatundisf (unsigned long long x)
+{
+  /* The SPU back-end now generates inline code for this conversion.
+     This file is solely used to provide the __floatundisf function
+     for objects generated with prior versions of GCC.  */
+  return x;
+}
diff --git a/gcc/config/spu/float_unssidf.c b/gcc/config/spu/float_unssidf.c
new file mode 100644
index 000000000..b255f81af
--- /dev/null
+++ b/gcc/config/spu/float_unssidf.c
@@ -0,0 +1,45 @@
+/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
+  
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+  
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+  
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <spu_intrinsics.h>
+const unsigned char __sidf_pat[16] __attribute__ ((__aligned__ (16))) = {
+  0x02, 0x03, 0x10, 0x11,
+  0x12, 0x13, 0x80, 0x80,
+  0x06, 0x07, 0x14, 0x15,
+  0x16, 0x17, 0x80, 0x80
+};
+
+/* double __float_unssidf (unsigned int SI) */
+qword __float_unssidf (qword SI);
+qword
+__float_unssidf (qword SI)
+{
+  qword t0, t1, t2, t3, t4, t5, t6, t7;
+  t0 = si_clz (SI);
+  t1 = si_il (1054);
+  t2 = si_shl (SI, t0);
+  t3 = si_ceqi (t0, 32);
+  t4 = si_sf (t0, t1);
+  t5 = si_a (t2, t2);
+  t6 = si_andc (t4, t3);
+  t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat);
+  return si_shlqbii (t7, 4);
+}
diff --git a/gcc/config/spu/mfc_multi_tag_release.c b/gcc/config/spu/mfc_multi_tag_release.c
new file mode 100644
index 000000000..62eb2beeb
--- /dev/null
+++ b/gcc/config/spu/mfc_multi_tag_release.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Release a sequential group of tags from exclusive use. The sequential
+   group of tags is the range starting from <first_tag> through
+   <first_tag>+<number_of_tags>-1. Upon sucessful release, MFC_DMA_TAG_VALID
+   is returned and the tags become available for future reservation.
+
+   If the specified tags were not previously reserved, no action is
+   taken and MFC_DMA_TAG_INVALID is returned.  */
+
+unsigned int
+__mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags)
+{
+  vector unsigned int table_copy, tmp, tmp1;
+  vector unsigned int one = (vector unsigned int)
+        { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+  vector unsigned int is_invalid;
+  unsigned int last_tag;
+  vector unsigned int has_been_reserved;
+
+  last_tag = first_tag + number_of_tags;
+
+  table_copy = spu_sl (one, number_of_tags);
+  table_copy = spu_rl (table_copy, -last_tag);
+  table_copy = spu_xor (table_copy, -1);
+
+  /* Make sure the tags are in range and valid.  */
+  tmp = spu_cmpgt (spu_promote(last_tag, 0), 32);
+  tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32);
+  is_invalid =  spu_cmpgt (spu_promote(first_tag, 0), 31);
+
+  /* All bits are set to 1 if invalid, 0 if valid.  */
+  is_invalid = spu_or (tmp, is_invalid);
+  is_invalid = spu_or (tmp1, is_invalid);
+
+  /* check whether these tags have been reserved */
+  tmp = spu_rlmask (one, (int)-number_of_tags);
+  tmp1 = spu_sl (__mfc_tag_table, first_tag);
+  has_been_reserved = spu_cmpgt(tmp1, tmp);
+
+  is_invalid = spu_or (has_been_reserved, is_invalid);
+
+  table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy);
+  __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid);
+
+  return spu_extract (is_invalid, 0);
+}
+
diff --git a/gcc/config/spu/mfc_multi_tag_reserve.c b/gcc/config/spu/mfc_multi_tag_reserve.c
new file mode 100644
index 000000000..06d702592
--- /dev/null
+++ b/gcc/config/spu/mfc_multi_tag_reserve.c
@@ -0,0 +1,84 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Reserve a sequential group of tags for exclusive use.  The number of
+   tags to be reserved is specified by the <number_of_tags> parameter.
+   This routine returns the first tag ID for a sequential list of
+   available tags and marks them as reserved. The reserved group
+   of tags is in the range starting from the returned tag through
+   the returned tag + <number_of_tags>-1.
+
+   If the number of tags requested exceeds the number of available
+   sequential tags, then MFC_DMA_TAG_INVALID is returned indicating
+   that the request could not be serviced.  */
+
+unsigned int
+__mfc_multi_tag_reserve (unsigned int number_of_tags)
+{
+  vector unsigned int table_copy;
+  vector unsigned int one = (vector unsigned int)
+        { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+  vector unsigned int count_busy, is_valid;
+  vector unsigned int count_total;
+  vector unsigned int count_avail = (vector unsigned int) { 0, 0, 0, 0 };
+  vector unsigned int index = (vector unsigned int) { 0, 0, 0, 0 };
+
+  table_copy = __mfc_tag_table;
+
+
+  /* count_busy: number of consecutive busy tags
+     count_avail: number of consecutive free tags
+     table_copy: temporary copy of the tag table
+     count_total: sum of count_busy and count_avail
+     index: index of the current working tag  */
+  do
+    {
+      table_copy = spu_sl (table_copy, count_avail);
+
+      count_busy = spu_cntlz (table_copy);
+      table_copy = spu_sl (table_copy, count_busy);
+      count_avail = spu_cntlz (spu_xor(table_copy, -1));
+      count_total = spu_add (count_busy, count_avail);
+      index = spu_add (index, count_total);
+    }
+  while (spu_extract (count_avail, 0) < number_of_tags
+	 && spu_extract (table_copy, 0) != 0);
+
+  index = spu_sub (index, count_avail);
+
+  /* is_valid is set to 0xFFFFFFFF if table_copy == 0, 0 otherwise.  */
+  is_valid = spu_cmpeq (table_copy, 0);
+  index = spu_sel (index, is_valid, is_valid);
+
+  /* Now I need to actually mark the tags as used.  */
+  table_copy = spu_sl (one, number_of_tags);
+  table_copy = spu_rl (table_copy, -number_of_tags - spu_extract (index, 0));
+  table_copy = spu_sel (table_copy, __mfc_tag_table, table_copy);
+  __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_valid);
+
+  return spu_extract (index, 0);
+}
+
diff --git a/gcc/config/spu/mfc_tag_release.c b/gcc/config/spu/mfc_tag_release.c
new file mode 100644
index 000000000..d59c57130
--- /dev/null
+++ b/gcc/config/spu/mfc_tag_release.c
@@ -0,0 +1,59 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Release the specified DMA tag from exclusive use.  Once released, the
+   tag is available for future reservation.  Upon sucessful release,
+   MFC_DMA_TAG_VALID is returned.  If the specified tag is not in the
+   range 0 to 31, or had not been reserved, no action is taken and
+   MFC_DMA_TAG_INVALID is returned.  */
+
+unsigned int
+__mfc_tag_release (unsigned int tag)
+{
+  vector unsigned int is_invalid;
+  vector unsigned int mask = (vector unsigned int)
+	{ 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+  vector signed int zero = (vector signed int) { 0, 0, 0, 0 };
+
+  vector signed int has_been_reserved;
+
+  /* Check if the tag is out of range.  */
+  is_invalid = spu_cmpgt (spu_promote (tag, 0), 31);
+
+  /* Check whether the tag has been reserved, set to all 1 if has not
+     been reserved, 0 otherwise.  */
+  has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag);
+  has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved);
+
+  /* Set invalid.  */
+  is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid);
+
+  mask = spu_rlmask (mask, (int)(-tag));
+  __mfc_tag_table = spu_or (__mfc_tag_table, mask);
+
+  return spu_extract(is_invalid, 0);
+}
+
diff --git a/gcc/config/spu/mfc_tag_reserve.c b/gcc/config/spu/mfc_tag_reserve.c
new file mode 100644
index 000000000..23b4817c7
--- /dev/null
+++ b/gcc/config/spu/mfc_tag_reserve.c
@@ -0,0 +1,51 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Reserves a DMA tag for exclusive use.  This routine returns an available
+   tag id in the range 0 to 31 and marks the tag as reserved.  If no tags
+   are available, MFC_DMA_TAG_INVALID is returned indicating that all tags
+   are already reserved.  */
+
+unsigned int
+__mfc_tag_reserve (void)
+{
+  vector unsigned int mask = (vector unsigned int)
+	{ 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+  vector unsigned int count_zeros, is_valid;
+  vector signed int count_neg;
+
+  count_zeros = spu_cntlz (__mfc_tag_table);
+  count_neg = spu_sub (0, (vector signed int) count_zeros);
+
+  mask = spu_rlmask (mask, (vector signed int) count_neg);
+  __mfc_tag_table = spu_andc (__mfc_tag_table, mask);
+
+  is_valid = spu_cmpeq (count_zeros, 32);
+  count_zeros = spu_sel (count_zeros, is_valid, is_valid);
+
+  return spu_extract (count_zeros, 0);
+}
+
diff --git a/gcc/config/spu/mfc_tag_table.c b/gcc/config/spu/mfc_tag_table.c
new file mode 100644
index 000000000..bd08c580c
--- /dev/null
+++ b/gcc/config/spu/mfc_tag_table.c
@@ -0,0 +1,39 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* The free tag table used by the MFC tag manager, with tag0
+   reserved for the overlay manager.  */
+__vector unsigned int
+__mfc_tag_table = (__vector unsigned int) { 0x7FFFFFFF, -1, -1, -1 };
+
+/* Arrange to release tag0 if overlays are not present.  */
+static void __mfc_tag_init (void) __attribute__ ((constructor));
+
+static void
+__mfc_tag_init (void)
+{
+  extern void _ovly_table __attribute__ ((weak));
+
+  if (&_ovly_table == 0)
+    __mfc_tag_table = (__vector unsigned int) { -1, -1, -1, -1 };
+}
diff --git a/gcc/config/spu/multi3.c b/gcc/config/spu/multi3.c
new file mode 100644
index 000000000..e7d76802d
--- /dev/null
+++ b/gcc/config/spu/multi3.c
@@ -0,0 +1,97 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+ 
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+ 
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+ 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <spu_intrinsics.h>
+
+typedef int TItype __attribute__ ((mode (TI)));
+
+/* A straight forward vectorization and unrolling of
+ *   short l[8], r[8];
+ *   TItype total = 0;
+ *   for (i = 0; i < 8; i++)
+ *     for (j = 0; j < 8; j++)
+ *       total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j)));
+ */
+TItype
+__multi3 (TItype l, TItype r)
+{
+  qword u = *(qword *) & l;
+  qword v = *(qword *) & r;
+  qword splat0 = si_shufb (v, v, si_ilh (0x0001));
+  qword splat1 = si_shufb (v, v, si_ilh (0x0203));
+  qword splat2 = si_shufb (v, v, si_ilh (0x0405));
+  qword splat3 = si_shufb (v, v, si_ilh (0x0607));
+  qword splat4 = si_shufb (v, v, si_ilh (0x0809));
+  qword splat5 = si_shufb (v, v, si_ilh (0x0a0b));
+  qword splat6 = si_shufb (v, v, si_ilh (0x0c0d));
+  qword splat7 = si_shufb (v, v, si_ilh (0x0e0f));
+
+  qword part0l = si_shlqbyi (si_mpyu   (u, splat0), 14);
+  qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14);
+  qword part1l = si_shlqbyi (si_mpyu   (u, splat1), 12);
+  qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12);
+  qword part2l = si_shlqbyi (si_mpyu   (u, splat2), 10);
+  qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10);
+  qword part3l = si_shlqbyi (si_mpyu   (u, splat3), 8);
+  qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8);
+  qword part4l = si_shlqbyi (si_mpyu   (u, splat4), 6);
+  qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6);
+  qword part5l = si_shlqbyi (si_mpyu   (u, splat5), 4);
+  qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4);
+  qword part6l = si_shlqbyi (si_mpyu   (u, splat6), 2);
+  qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2);
+  qword part7l = si_mpyu (u, splat7);
+
+  qword carry, total0, total1, total2, total3, total4;
+  qword total5, total6, total7, total8, total9, total10;
+  qword total;
+
+  total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l);
+  total1 = si_a (part2l, part3h);
+  total2 = si_a (part3l, part4h);
+  total3 = si_a (part4l, part5h);
+  total4 = si_a (part5l, part6h);
+  total5 = si_a (part6l, part7h);
+  total6 = si_a (total0, total1);
+  total7 = si_a (total2, total3);
+  total8 = si_a (total4, total5);
+  total9 = si_a (total6, total7);
+  total10 = si_a (total8, total9);
+
+  carry = si_cg (part2l, part3h);
+  carry = si_a (carry, si_cg (part3l, part4h));
+  carry = si_a (carry, si_cg (part4l, part5h));
+  carry = si_a (carry, si_cg (part5l, part6h));
+  carry = si_a (carry, si_cg (part6l, part7h));
+  carry = si_a (carry, si_cg (total0, total1));
+  carry = si_a (carry, si_cg (total2, total3));
+  carry = si_a (carry, si_cg (total4, total5));
+  carry = si_a (carry, si_cg (total6, total7));
+  carry = si_a (carry, si_cg (total8, total9));
+  carry = si_shlqbyi (carry, 4);
+
+  total = si_cg (total10, carry);
+  total = si_shlqbyi (total, 4);
+  total = si_cgx (total10, carry, total);
+  total = si_shlqbyi (total, 4);
+  total = si_addx (total10, carry, total);
+  return *(TItype *) & total;
+}
diff --git a/gcc/config/spu/predicates.md b/gcc/config/spu/predicates.md
new file mode 100644
index 000000000..8c6798d80
--- /dev/null
+++ b/gcc/config/spu/predicates.md
@@ -0,0 +1,122 @@
+;; Predicate definitions for CELL SPU
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if operand is constant zero of its mode
+(define_predicate "const_zero_operand"
+  (and (match_code "const_int,const,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_predicate "const_one_operand"
+  (and (match_code "const_int,const,const_double,const_vector")
+       (match_test "op == CONST1_RTX (mode)")))
+
+(define_predicate "spu_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (ior (not (match_code "subreg"))
+            (match_test "valid_subreg (op)"))))
+
+(define_predicate "spu_nonimm_operand"
+  (and (match_operand 0 "nonimmediate_operand")
+       (ior (not (match_code "subreg"))
+            (match_test "valid_subreg (op)"))))
+
+(define_predicate "spu_nonmem_operand"
+  (and (match_operand 0 "nonmemory_operand")
+       (ior (not (match_code "subreg"))
+            (match_test "valid_subreg (op)"))))
+
+(define_predicate "spu_mov_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "spu_nonmem_operand")))
+
+(define_predicate "spu_dest_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "spu_reg_operand")))
+
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "(!TARGET_LARGE_MEM && satisfies_constraint_S (op))
+		    || (satisfies_constraint_R (op)
+			&& REGNO (XEXP (op, 0)) != FRAME_POINTER_REGNUM
+			&& REGNO (XEXP (op, 0)) != ARG_POINTER_REGNUM
+			&& (REGNO (XEXP (op, 0)) < FIRST_PSEUDO_REGISTER
+			    || REGNO (XEXP (op, 0)) > LAST_VIRTUAL_REGISTER))")))
+
+(define_predicate "vec_imm_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "spu_legitimate_constant_p (op)")))
+
+(define_predicate "spu_arith_operand"
+  (match_code "reg,subreg,const_int,const_vector")
+  {
+    if (spu_reg_operand (op, mode))
+      return 1;
+    if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_VECTOR)
+      return arith_immediate_p (op, mode, -0x200, 0x1ff);
+    return 0;
+  })
+
+(define_predicate "spu_logical_operand"
+  (match_code "reg,subreg,const_int,const_double,const_vector")
+  {
+    if (spu_reg_operand (op, mode))
+      return 1;
+    if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	|| GET_CODE (op) == CONST_VECTOR)
+      return logical_immediate_p (op, mode);
+    return 0;
+  })
+
+(define_predicate "spu_ior_operand"
+  (match_code "reg,subreg,const_int,const_double,const_vector")
+  {
+    if (spu_reg_operand (op, mode))
+      return 1;
+    if (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	|| GET_CODE (op) == CONST_VECTOR)
+      return logical_immediate_p (op, mode)
+	     || iohl_immediate_p (op, mode);
+    return 0;
+  })
+
+(define_predicate "imm_K_operand"
+  (and (match_code "const_int")
+       (match_test "arith_immediate_p (op, mode, -0x200, 0x1ff)")))
+
+;; Return 1 if OP is a comparison operation that is valid for a branch insn.
+;; We only check the opcode against the mode of the register value here. 
+(define_predicate "branch_comparison_operator"
+  (and (match_code "eq,ne")
+       (ior (match_test "GET_MODE (XEXP (op, 0)) == HImode")
+	    (match_test "GET_MODE (XEXP (op, 0)) == SImode"))))
+
+(define_predicate "spu_inv_exp2_operand"
+  (and (match_code "const_double,const_vector")
+       (and (match_operand 0 "immediate_operand")
+	    (match_test "exp2_immediate_p (op, mode, -126, 0)"))))
+
+(define_predicate "spu_exp2_operand"
+  (and (match_code "const_double,const_vector")
+       (and (match_operand 0 "immediate_operand")
+	    (match_test "exp2_immediate_p (op, mode, 0, 127)"))))
+
+(define_predicate "shiftrt_operator"
+  (match_code "lshiftrt,ashiftrt"))
+
+(define_predicate "extend_operator"
+  (match_code "sign_extend,zero_extend"))
+
diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def
new file mode 100644
index 000000000..4d01d945c
--- /dev/null
+++ b/gcc/config/spu/spu-builtins.def
@@ -0,0 +1,781 @@
+/* Definitions of builtin functions for the Synergistic Processing Unit (SPU).  */
+/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* The first argument to these macros is the return type of the builtin,
+ * the rest are arguments of the builtin. */
+#define _A1(a)       {a, SPU_BTI_END_OF_PARAMS}
+#define _A2(a,b)     {a, b, SPU_BTI_END_OF_PARAMS}
+#define _A3(a,b,c)   {a, b, c, SPU_BTI_END_OF_PARAMS}
+#define _A4(a,b,c,d) {a, b, c, d, SPU_BTI_END_OF_PARAMS}
+
+/* definitions to support si intrinsic functions: (These and other builtin
+ * definitions must precede definitions of the overloaded generic intrinsics */
+
+DEF_BUILTIN (SI_LQD,         CODE_FOR_spu_lqd,       "si_lqd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10_4))
+DEF_BUILTIN (SI_LQX,         CODE_FOR_spu_lqx,       "si_lqx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_LQA,         CODE_FOR_spu_lqa,       "si_lqa",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U16_2))
+DEF_BUILTIN (SI_LQR,         CODE_FOR_spu_lqr,       "si_lqr",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_S16_2))
+DEF_BUILTIN (SI_STQD,        CODE_FOR_spu_stqd,      "si_stqd",        B_INSN,   _A4(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10_4))
+DEF_BUILTIN (SI_STQX,        CODE_FOR_spu_stqx,      "si_stqx",        B_INSN,   _A4(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_STQA,        CODE_FOR_spu_stqa,      "si_stqa",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_U16_2))
+DEF_BUILTIN (SI_STQR,        CODE_FOR_spu_stqr,      "si_stqr",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S16_2))
+DEF_BUILTIN (SI_CBD,         CODE_FOR_spu_cbx,       "si_cbd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CBX,         CODE_FOR_spu_cbx,       "si_cbx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CHD,         CODE_FOR_spu_chx,       "si_chd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CHX,         CODE_FOR_spu_chx,       "si_chx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CWD,         CODE_FOR_spu_cwx,       "si_cwd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CWX,         CODE_FOR_spu_cwx,       "si_cwx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CDD,         CODE_FOR_spu_cdx,       "si_cdd",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S7))
+DEF_BUILTIN (SI_CDX,         CODE_FOR_spu_cdx,       "si_cdx",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ILH,         CODE_FOR_movv8hi,       "si_ilh",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_16))
+DEF_BUILTIN (SI_ILHU,        CODE_FOR_spu_ilhu,      "si_ilhu",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_16))
+DEF_BUILTIN (SI_IL,          CODE_FOR_movv4si,       "si_il",          B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_S16))
+DEF_BUILTIN (SI_ILA,         CODE_FOR_movv4si,       "si_ila",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U18))
+DEF_BUILTIN (SI_IOHL,        CODE_FOR_iorv4si3,      "si_iohl",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U16))
+DEF_BUILTIN (SI_FSMBI,       CODE_FOR_spu_fsmb,      "si_fsmbi",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_16))
+DEF_BUILTIN (SI_AH,          CODE_FOR_addv8hi3,      "si_ah",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AHI,         CODE_FOR_addv8hi3,      "si_ahi",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_A,           CODE_FOR_addv4si3,      "si_a",           B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AI,          CODE_FOR_addv4si3,      "si_ai",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ADDX,        CODE_FOR_addx_v4si,     "si_addx",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CG,          CODE_FOR_cg_v4si,       "si_cg",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGX,         CODE_FOR_cgx_v4si,      "si_cgx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SFH,         CODE_FOR_spu_sfh,       "si_sfh",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SFHI,        CODE_FOR_spu_sfh,       "si_sfhi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_SF,          CODE_FOR_spu_sf,        "si_sf",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SFI,         CODE_FOR_spu_sf,        "si_sfi",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_SFX,         CODE_FOR_spu_sfx,       "si_sfx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_BG,          CODE_FOR_spu_bg,        "si_bg",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_BGX,         CODE_FOR_spu_bgx,       "si_bgx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPY,         CODE_FOR_spu_mpy,       "si_mpy",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYU,        CODE_FOR_spu_mpyu,      "si_mpyu",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYI,        CODE_FOR_spu_mpy,       "si_mpyi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_MPYUI,       CODE_FOR_spu_mpyu,      "si_mpyui",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_MPYA,        CODE_FOR_spu_mpya,      "si_mpya",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYH,        CODE_FOR_spu_mpyh,      "si_mpyh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYS,        CODE_FOR_spu_mpys,      "si_mpys",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHH,       CODE_FOR_spu_mpyhh,     "si_mpyhh",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHU,      CODE_FOR_spu_mpyhhu,    "si_mpyhhu",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHA,      CODE_FOR_spu_mpyhha,    "si_mpyhha",      B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHAU,     CODE_FOR_spu_mpyhhau,   "si_mpyhhau",     B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLZ,         CODE_FOR_clzv4si2,      "si_clz",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CNTB,        CODE_FOR_cntb_v16qi,    "si_cntb",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSMB,        CODE_FOR_spu_fsmb,      "si_fsmb",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSMH,        CODE_FOR_spu_fsmh,      "si_fsmh",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSM,         CODE_FOR_spu_fsm,       "si_fsm",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_GBB,         CODE_FOR_spu_gbb,       "si_gbb",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_GBH,         CODE_FOR_spu_gbh,       "si_gbh",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_GB,          CODE_FOR_spu_gb,        "si_gb",          B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AVGB,        CODE_FOR_spu_avgb,      "si_avgb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ABSDB,       CODE_FOR_spu_absdb,     "si_absdb",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SUMB,        CODE_FOR_spu_sumb,      "si_sumb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XSBH,        CODE_FOR_spu_xsbh,      "si_xsbh",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XSHW,        CODE_FOR_spu_xshw,      "si_xshw",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XSWD,        CODE_FOR_spu_xswd,      "si_xswd",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_AND,         CODE_FOR_andv16qi3,     "si_and",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ANDC,        CODE_FOR_andc_v16qi,    "si_andc",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ANDBI,       CODE_FOR_andv16qi3,     "si_andbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ANDHI,       CODE_FOR_andv8hi3,      "si_andhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ANDI,        CODE_FOR_andv4si3,      "si_andi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_OR,          CODE_FOR_iorv16qi3,     "si_or",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ORC,         CODE_FOR_orc_v16qi,     "si_orc",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ORBI,        CODE_FOR_iorv16qi3,     "si_orbi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ORHI,        CODE_FOR_iorv8hi3,      "si_orhi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ORI,         CODE_FOR_iorv4si3,      "si_ori",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_ORX,         CODE_FOR_spu_orx,       "si_orx",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XOR,         CODE_FOR_xorv16qi3,     "si_xor",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_XORBI,       CODE_FOR_xorv16qi3,     "si_xorbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_XORHI,       CODE_FOR_xorv8hi3,      "si_xorhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_XORI,        CODE_FOR_xorv4si3,      "si_xori",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_NAND,        CODE_FOR_nand_v16qi,    "si_nand",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_NOR,         CODE_FOR_nor_v16qi,     "si_nor",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_EQV,         CODE_FOR_eqv_v16qi,     "si_eqv",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SELB,        CODE_FOR_selb,          "si_selb",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHUFB,       CODE_FOR_shufb,         "si_shufb",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLH,        CODE_FOR_vashlv8hi3,     "si_shlh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLHI,       CODE_FOR_vashlv8hi3,     "si_shlhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHL,         CODE_FOR_vashlv4si3,     "si_shl",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLI,        CODE_FOR_vashlv4si3,     "si_shli",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLQBI,      CODE_FOR_shlqbi_ti,     "si_shlqbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLQBII,     CODE_FOR_shlqbi_ti,     "si_shlqbii",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLQBY,      CODE_FOR_shlqby_ti,     "si_shlqby",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLQBYI,     CODE_FOR_shlqby_ti,     "si_shlqbyi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLQBYBI,    CODE_FOR_shlqbybi_ti,   "si_shlqbybi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTH,        CODE_FOR_vrotlv8hi3,    "si_roth",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTHI,       CODE_FOR_vrotlv8hi3,    "si_rothi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROT,         CODE_FOR_vrotlv4si3,    "si_rot",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTI,        CODE_FOR_vrotlv4si3,    "si_roti",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQBY,      CODE_FOR_rotqby_ti,     "si_rotqby",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQBYI,     CODE_FOR_rotqby_ti,     "si_rotqbyi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQBYBI,    CODE_FOR_rotqbybi_ti,   "si_rotqbybi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQBI,      CODE_FOR_rotqbi_ti,     "si_rotqbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQBII,     CODE_FOR_rotqbi_ti,     "si_rotqbii",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTHM,       CODE_FOR_rotm_v8hi,     "si_rothm",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTHMI,      CODE_FOR_rotm_v8hi,     "si_rothmi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTM,        CODE_FOR_rotm_v4si,     "si_rotm",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMI,       CODE_FOR_rotm_v4si,     "si_rotmi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQMBY,     CODE_FOR_rotqmby_ti,    "si_rotqmby",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQMBYI,    CODE_FOR_rotqmby_ti,    "si_rotqmbyi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQMBI,     CODE_FOR_rotqmbi_ti,    "si_rotqmbi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTQMBII,    CODE_FOR_rotqmbi_ti,    "si_rotqmbii",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTQMBYBI,   CODE_FOR_rotqmbybi_ti,  "si_rotqmbybi",   B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMAH,      CODE_FOR_rotma_v8hi,    "si_rotmah",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMAHI,     CODE_FOR_rotma_v8hi,    "si_rotmahi",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_ROTMA,       CODE_FOR_rotma_v4si,    "si_rotma",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_ROTMAI,      CODE_FOR_rotma_v4si,    "si_rotmai",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_HEQ,         CODE_FOR_spu_heq,       "si_heq",         B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_HEQI,        CODE_FOR_spu_heq,       "si_heqi",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_HGT,         CODE_FOR_spu_hgt,       "si_hgt",         B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_HGTI,        CODE_FOR_spu_hgt,       "si_hgti",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_HLGT,        CODE_FOR_spu_hlgt,      "si_hlgt",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_HLGTI,       CODE_FOR_spu_hlgt,      "si_hlgti",       B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CEQB,        CODE_FOR_ceq_v16qi,     "si_ceqb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CEQBI,       CODE_FOR_ceq_v16qi,     "si_ceqbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CEQH,        CODE_FOR_ceq_v8hi,      "si_ceqh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CEQHI,       CODE_FOR_ceq_v8hi,      "si_ceqhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CEQ,         CODE_FOR_ceq_v4si,      "si_ceq",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CEQI,        CODE_FOR_ceq_v4si,      "si_ceqi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CGTB,        CODE_FOR_cgt_v16qi,     "si_cgtb",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGTBI,       CODE_FOR_cgt_v16qi,     "si_cgtbi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CGTH,        CODE_FOR_cgt_v8hi,      "si_cgth",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGTHI,       CODE_FOR_cgt_v8hi,      "si_cgthi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CGT,         CODE_FOR_cgt_v4si,      "si_cgt",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CGTI,        CODE_FOR_cgt_v4si,      "si_cgti",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CLGTB,       CODE_FOR_clgt_v16qi,    "si_clgtb",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLGTBI,      CODE_FOR_clgt_v16qi,    "si_clgtbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CLGTH,       CODE_FOR_clgt_v8hi,     "si_clgth",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLGTHI,      CODE_FOR_clgt_v8hi,     "si_clgthi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_CLGT,        CODE_FOR_clgt_v4si,     "si_clgt",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CLGTI,       CODE_FOR_clgt_v4si,     "si_clgti",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_BISLED,      CODE_FOR_spu_bisled,    "si_bisled",      B_BISLED, _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_PTR))
+DEF_BUILTIN (SI_BISLEDD,     CODE_FOR_spu_bisledd,   "si_bisledd",     B_BISLED, _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_PTR))
+DEF_BUILTIN (SI_BISLEDE,     CODE_FOR_spu_bislede,   "si_bislede",     B_BISLED, _A3(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_PTR))
+DEF_BUILTIN (SI_FA,          CODE_FOR_addv4sf3,      "si_fa",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFA,         CODE_FOR_addv2df3,      "si_dfa",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FS,          CODE_FOR_subv4sf3,      "si_fs",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFS,         CODE_FOR_subv2df3,      "si_dfs",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FM,          CODE_FOR_mulv4sf3,      "si_fm",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFM,         CODE_FOR_mulv2df3,      "si_dfm",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FMA,         CODE_FOR_fmav4sf4,      "si_fma",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFMA,        CODE_FOR_fmav2df4,      "si_dfma",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFNMA,       CODE_FOR_nfmav2df4,     "si_dfnma",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FNMS,        CODE_FOR_fnmav4sf4,     "si_fnms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFNMS,       CODE_FOR_nfmsv2df4,     "si_dfnms",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FMS,         CODE_FOR_fmsv4sf4,      "si_fms",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFMS,        CODE_FOR_fmsv2df4,      "si_dfms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FREST,       CODE_FOR_frest_v4sf,    "si_frest",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FRSQEST,     CODE_FOR_frsqest_v4sf,  "si_frsqest",     B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FI,          CODE_FOR_fi_v4sf,       "si_fi",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_CSFLT,       CODE_FOR_spu_csflt,     "si_csflt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_CFLTS,       CODE_FOR_spu_cflts,     "si_cflts",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_CUFLT,       CODE_FOR_spu_cuflt,     "si_cuflt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_CFLTU,       CODE_FOR_spu_cfltu,     "si_cfltu",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_FRDS,        CODE_FOR_spu_frds,      "si_frds",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FESD,        CODE_FOR_spu_fesd,      "si_fesd",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCEQ,        CODE_FOR_ceq_v4sf,      "si_fceq",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCEQ,       CODE_FOR_ceq_v2df,      "si_dfceq",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCMEQ,       CODE_FOR_cmeq_v4sf,     "si_fcmeq",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMEQ,      CODE_FOR_cmeq_v2df,     "si_dfcmeq",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCGT,        CODE_FOR_cgt_v4sf,      "si_fcgt",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCGT,       CODE_FOR_cgt_v2df,      "si_dfcgt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FCMGT,       CODE_FOR_cmgt_v4sf,     "si_fcmgt",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMGT,      CODE_FOR_cmgt_v2df,     "si_dfcmgt",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFTSV,       CODE_FOR_dftsv,         "si_dftsv",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_STOP,        CODE_FOR_spu_stop,      "si_stop",        B_INSN,   _A2(SPU_BTI_VOID,     SPU_BTI_U14))
+DEF_BUILTIN (SI_STOPD,       CODE_FOR_spu_stopd,     "si_stopd",       B_INSN,   _A4(SPU_BTI_VOID,     SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_LNOP,        CODE_FOR_lnop,          "si_lnop",        B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_NOP,         CODE_FOR_nop,           "si_nop",         B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_SYNC,        CODE_FOR_sync,          "si_sync",        B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_SYNCC,       CODE_FOR_syncc,         "si_syncc",       B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_DSYNC,       CODE_FOR_dsync,         "si_dsync",       B_INSN,   _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SI_MFSPR,       CODE_FOR_spu_mfspr,     "si_mfspr",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_MTSPR,       CODE_FOR_spu_mtspr,     "si_mtspr",       B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_U7,       SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSCRRD,      CODE_FOR_spu_fscrrd,    "si_fscrrd",      B_INSN,   _A1(SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FSCRWR,      CODE_FOR_spu_fscrwr,    "si_fscrwr",      B_INSN,   _A2(SPU_BTI_VOID,     SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_RDCH,        CODE_FOR_spu_rdch,      "si_rdch",        B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_RCHCNT,      CODE_FOR_spu_rchcnt,    "si_rchcnt",      B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_U7))
+DEF_BUILTIN (SI_WRCH,        CODE_FOR_spu_wrch,      "si_wrch",        B_INSN,   _A3(SPU_BTI_VOID,     SPU_BTI_U7,       SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_CHAR,     CODE_FOR_spu_convert,   "si_to_char",     B_INSN,   _A2(SPU_BTI_INTQI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_UCHAR,    CODE_FOR_spu_convert,   "si_to_uchar",    B_INSN,   _A2(SPU_BTI_UINTQI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_SHORT,    CODE_FOR_spu_convert,   "si_to_short",    B_INSN,   _A2(SPU_BTI_INTHI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_USHORT,   CODE_FOR_spu_convert,   "si_to_ushort",   B_INSN,   _A2(SPU_BTI_UINTHI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_INT,      CODE_FOR_spu_convert,   "si_to_int",      B_INSN,   _A2(SPU_BTI_INTSI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_UINT,     CODE_FOR_spu_convert,   "si_to_uint",     B_INSN,   _A2(SPU_BTI_UINTSI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_LONG,     CODE_FOR_spu_convert,   "si_to_long",     B_INSN,   _A2(SPU_BTI_INTDI,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_ULONG,    CODE_FOR_spu_convert,   "si_to_ulong",    B_INSN,   _A2(SPU_BTI_UINTDI,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_FLOAT,    CODE_FOR_spu_convert,   "si_to_float",    B_INSN,   _A2(SPU_BTI_FLOAT,    SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_DOUBLE,   CODE_FOR_spu_convert,   "si_to_double",   B_INSN,   _A2(SPU_BTI_DOUBLE,   SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_TO_PTR,      CODE_FOR_spu_convert,   "si_to_ptr",      B_INSN,   _A2(SPU_BTI_PTR,      SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FROM_CHAR,   CODE_FOR_spu_convert,   "si_from_char",   B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTQI))
+DEF_BUILTIN (SI_FROM_UCHAR,  CODE_FOR_spu_convert,   "si_from_uchar",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTQI))
+DEF_BUILTIN (SI_FROM_SHORT,  CODE_FOR_spu_convert,   "si_from_short",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTHI))
+DEF_BUILTIN (SI_FROM_USHORT, CODE_FOR_spu_convert,   "si_from_ushort", B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTHI))
+DEF_BUILTIN (SI_FROM_INT,    CODE_FOR_spu_convert,   "si_from_int",    B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTSI))
+DEF_BUILTIN (SI_FROM_UINT,   CODE_FOR_spu_convert,   "si_from_uint",   B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTSI))
+DEF_BUILTIN (SI_FROM_LONG,   CODE_FOR_spu_convert,   "si_from_long",   B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_INTDI))
+DEF_BUILTIN (SI_FROM_ULONG,  CODE_FOR_spu_convert,   "si_from_ulong",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_UINTDI))
+DEF_BUILTIN (SI_FROM_FLOAT,  CODE_FOR_spu_convert,   "si_from_float",  B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_FLOAT))
+DEF_BUILTIN (SI_FROM_DOUBLE, CODE_FOR_spu_convert,   "si_from_double", B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_DOUBLE))
+DEF_BUILTIN (SI_FROM_PTR,    CODE_FOR_spu_convert,   "si_from_ptr",    B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_PTR))
+
+/* definitions to support generic builtin functions: */
+
+DEF_BUILTIN (SPU_CONVTS,     CODE_FOR_spu_cflts,      "spu_convts",     B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CONVTU,     CODE_FOR_spu_cfltu,      "spu_convtu",     B_INSN,     _A3(SPU_BTI_UV4SI,    SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ROUNDTF,    CODE_FOR_spu_frds,       "spu_roundtf",    B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MULH,       CODE_FOR_spu_mpyh,       "spu_mulh",       B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MULSR,      CODE_FOR_spu_mpys,       "spu_mulsr",      B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_FREST,      CODE_FOR_frest_v4sf,     "spu_frest",      B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_FRSQEST,    CODE_FOR_frsqest_v4sf,   "spu_frsqest",    B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NMADD,      CODE_FOR_nfmav2df4,      "spu_nmadd",      B_INSN,     _A4(SPU_BTI_V2DF,     SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_ABSD,       CODE_FOR_spu_absdb,      "spu_absd",       B_INSN,     _A3(SPU_BTI_UV16QI,   SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_AVG,        CODE_FOR_spu_avgb,       "spu_avg",        B_INSN,     _A3(SPU_BTI_UV16QI,   SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SUMB,       CODE_FOR_spu_sumb,       "spu_sumb",       B_INSN,     _A3(SPU_BTI_UV8HI,    SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_BISLED,     CODE_FOR_spu_bisled,     "spu_bisled",     B_BISLED,   _A3(SPU_BTI_VOID,    SPU_BTI_PTR,   SPU_BTI_PTR))
+DEF_BUILTIN (SPU_BISLED_D,   CODE_FOR_spu_bisledd,    "spu_bisled_d",   B_BISLED,   _A3(SPU_BTI_VOID,    SPU_BTI_PTR,   SPU_BTI_PTR))
+DEF_BUILTIN (SPU_BISLED_E,   CODE_FOR_spu_bislede,    "spu_bisled_e",   B_BISLED,   _A3(SPU_BTI_VOID,    SPU_BTI_PTR,   SPU_BTI_PTR))
+DEF_BUILTIN (SPU_IDISABLE,   CODE_FOR_spu_idisable,   "spu_idisable",   B_INSN,     _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_IENABLE,    CODE_FOR_spu_ienable,    "spu_ienable",    B_INSN,     _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASK_FOR_LOAD,    CODE_FOR_spu_lvsr, "spu_lvsr",       B_INSN,     _A2(SPU_BTI_V16QI, SPU_BTI_PTR))
+DEF_BUILTIN (SPU_TESTSV,     CODE_FOR_dftsv,          "spu_testsv",     B_INSN,     _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7))
+
+/* definitions to support overloaded generic builtin functions:  */
+
+DEF_BUILTIN (SPU_CONVTF,           CODE_FOR_nothing,       "spu_convtf",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CONVTF_0,         CODE_FOR_spu_cuflt,     "spu_convtf_0",         B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CONVTF_1,         CODE_FOR_spu_csflt,     "spu_convtf_1",         B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_EXTEND,           CODE_FOR_nothing,       "spu_extend",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_EXTEND_0,         CODE_FOR_spu_xsbh,      "spu_extend_0",         B_INTERNAL, _A2(SPU_BTI_V8HI,   SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_EXTEND_1,         CODE_FOR_spu_xshw,      "spu_extend_1",         B_INTERNAL, _A2(SPU_BTI_V4SI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_EXTEND_2,         CODE_FOR_spu_xswd,      "spu_extend_2",         B_INTERNAL, _A2(SPU_BTI_V2DI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_EXTEND_3,         CODE_FOR_spu_fesd,      "spu_extend_3",         B_INTERNAL, _A2(SPU_BTI_V2DF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ADD,              CODE_FOR_nothing,       "spu_add",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ADD_0,            CODE_FOR_addv4si3,      "spu_add_0",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_ADD_1,            CODE_FOR_addv4si3,      "spu_add_1",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ADD_2,            CODE_FOR_addv8hi3,      "spu_add_2",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_ADD_3,            CODE_FOR_addv8hi3,      "spu_add_3",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_ADD_4,            CODE_FOR_addv4sf3,      "spu_add_4",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ADD_5,            CODE_FOR_addv2df3,      "spu_add_5",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_ADD_6,            CODE_FOR_addv8hi3,      "spu_add_6",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_ADD_7,            CODE_FOR_addv8hi3,      "spu_add_7",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_ADD_8,            CODE_FOR_addv4si3,      "spu_add_8",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_ADD_9,            CODE_FOR_addv4si3,      "spu_add_9",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ADDX,             CODE_FOR_nothing,       "spu_addx",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ADDX_0,           CODE_FOR_addx_v4si,     "spu_addx_0",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ADDX_1,           CODE_FOR_addx_v4si,     "spu_addx_1",           B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENC,             CODE_FOR_nothing,       "spu_genc",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENC_0,           CODE_FOR_cg_v4si,       "spu_genc_0",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENC_1,           CODE_FOR_cg_v4si,       "spu_genc_1",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENCX,            CODE_FOR_nothing,       "spu_gencx",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENCX_0,          CODE_FOR_cgx_v4si,      "spu_gencx_0",          B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENCX_1,          CODE_FOR_cgx_v4si,      "spu_gencx_1",          B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_MADD,             CODE_FOR_nothing,       "spu_madd",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MADD_0,           CODE_FOR_spu_mpya,      "spu_madd_0",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_MADD_1,           CODE_FOR_fmav4sf4,      "spu_madd_1",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MADD_2,           CODE_FOR_fmav2df4,      "spu_madd_2",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MSUB,             CODE_FOR_nothing,       "spu_msub",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MSUB_0,           CODE_FOR_fmsv4sf4,      "spu_msub_0",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MSUB_1,           CODE_FOR_fmsv2df4,      "spu_msub_1",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MHHADD,           CODE_FOR_nothing,       "spu_mhhadd",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MHHADD_0,         CODE_FOR_spu_mpyhhau,   "spu_mhhadd_0",         B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_MHHADD_1,         CODE_FOR_spu_mpyhha,    "spu_mhhadd_1",         B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_MULE,             CODE_FOR_nothing,       "spu_mule",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MULE_0,           CODE_FOR_spu_mpyhhu,    "spu_mule_0",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_MULE_1,           CODE_FOR_spu_mpyhh,     "spu_mule_1",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MUL,              CODE_FOR_nothing,       "spu_mul",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MUL_0,            CODE_FOR_mulv4sf3,      "spu_mul_0",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MUL_1,            CODE_FOR_mulv2df3,      "spu_mul_1",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MULO,             CODE_FOR_nothing,       "spu_mulo",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MULO_0,           CODE_FOR_spu_mpy,       "spu_mulo_0",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MULO_1,           CODE_FOR_spu_mpyu,      "spu_mulo_1",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_MULO_2,           CODE_FOR_spu_mpy,       "spu_mulo_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MULO_3,           CODE_FOR_spu_mpyu,      "spu_mulo_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_NMSUB,            CODE_FOR_nothing,       "spu_nmsub",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_NMSUB_0,          CODE_FOR_fnmav4sf4,     "spu_nmsub_0",          B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NMSUB_1,          CODE_FOR_nfmsv2df4,     "spu_nmsub_1",          B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_SUB,              CODE_FOR_nothing,       "spu_sub",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SUB_0,            CODE_FOR_subv8hi3,      "spu_sub_0",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SUB_1,            CODE_FOR_subv8hi3,      "spu_sub_1",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_SUB_2,            CODE_FOR_subv4si3,      "spu_sub_2",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SUB_3,            CODE_FOR_subv4si3,      "spu_sub_3",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_SUB_4,            CODE_FOR_subv4sf3,      "spu_sub_4",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_SUB_5,            CODE_FOR_subv2df3,      "spu_sub_5",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_SUB_6,            CODE_FOR_subv8hi3,      "spu_sub_6",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UINTHI, SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SUB_7,            CODE_FOR_subv8hi3,      "spu_sub_7",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_INTHI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_SUB_8,            CODE_FOR_subv4si3,      "spu_sub_8",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UINTSI, SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SUB_9,            CODE_FOR_subv4si3,      "spu_sub_9",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_INTSI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_SUBX,             CODE_FOR_nothing,       "spu_subx",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SUBX_0,           CODE_FOR_sfx_v4si,      "spu_subx_0",           B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SUBX_1,           CODE_FOR_sfx_v4si,      "spu_subx_1",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENB,             CODE_FOR_nothing,       "spu_genb",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENB_0,           CODE_FOR_bg_v4si,       "spu_genb_0",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENB_1,           CODE_FOR_bg_v4si,       "spu_genb_1",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GENBX,            CODE_FOR_nothing,       "spu_genbx",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GENBX_0,          CODE_FOR_bgx_v4si,      "spu_genbx_0",          B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GENBX_1,          CODE_FOR_bgx_v4si,      "spu_genbx_1",          B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CMPEQ,            CODE_FOR_nothing,       "spu_cmpeq",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPEQ_0,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_0",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_CMPEQ_1,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_1",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_CMPEQ_2,          CODE_FOR_ceq_v8hi,      "spu_cmpeq_2",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_CMPEQ_3,          CODE_FOR_ceq_v8hi,      "spu_cmpeq_3",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_CMPEQ_4,          CODE_FOR_ceq_v4si,      "spu_cmpeq_4",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_CMPEQ_5,          CODE_FOR_ceq_v4si,      "spu_cmpeq_5",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CMPEQ_6,          CODE_FOR_ceq_v4sf,      "spu_cmpeq_6",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPEQ_7,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_7",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_CMPEQ_8,          CODE_FOR_ceq_v16qi,     "spu_cmpeq_8",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_CMPEQ_9,          CODE_FOR_ceq_v8hi,      "spu_cmpeq_9",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_CMPEQ_10,         CODE_FOR_ceq_v8hi,      "spu_cmpeq_10",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_CMPEQ_11,         CODE_FOR_ceq_v4si,      "spu_cmpeq_11",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CMPEQ_12,         CODE_FOR_ceq_v4si,      "spu_cmpeq_12",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CMPEQ_13,         CODE_FOR_ceq_v2df,      "spu_cmpeq_13",         B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSEQ,         CODE_FOR_nothing,       "spu_cmpabseq",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSEQ_0,       CODE_FOR_cmeq_v4sf,     "spu_cmpabseq_0",       B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSEQ_1,       CODE_FOR_cmeq_v2df,     "spu_cmpabseq_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPGT,            CODE_FOR_nothing,       "spu_cmpgt",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPGT_0,          CODE_FOR_clgt_v16qi,    "spu_cmpgt_0",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_CMPGT_1,          CODE_FOR_cgt_v16qi,     "spu_cmpgt_1",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_CMPGT_2,          CODE_FOR_clgt_v8hi,     "spu_cmpgt_2",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_CMPGT_3,          CODE_FOR_cgt_v8hi,      "spu_cmpgt_3",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_CMPGT_4,          CODE_FOR_clgt_v4si,     "spu_cmpgt_4",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_CMPGT_5,          CODE_FOR_cgt_v4si,      "spu_cmpgt_5",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CMPGT_6,          CODE_FOR_cgt_v4sf,      "spu_cmpgt_6",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPGT_7,          CODE_FOR_clgt_v16qi,    "spu_cmpgt_7",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_CMPGT_8,          CODE_FOR_cgt_v16qi,     "spu_cmpgt_8",          B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_CMPGT_9,          CODE_FOR_clgt_v8hi,     "spu_cmpgt_9",          B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_CMPGT_10,         CODE_FOR_cgt_v8hi,      "spu_cmpgt_10",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_CMPGT_11,         CODE_FOR_cgt_v4si,      "spu_cmpgt_11",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CMPGT_12,         CODE_FOR_clgt_v4si,     "spu_cmpgt_12",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CMPGT_13,         CODE_FOR_cgt_v2df,      "spu_cmpgt_13",         B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSGT,         CODE_FOR_nothing,       "spu_cmpabsgt",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSGT_0,       CODE_FOR_cmgt_v4sf,     "spu_cmpabsgt_0",       B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSGT_1,       CODE_FOR_cmgt_v2df,     "spu_cmpabsgt_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_HCMPEQ,           CODE_FOR_nothing,       "spu_hcmpeq",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_HCMPEQ_0,         CODE_FOR_spu_heq,       "spu_hcmpeq_0",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_INTSI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_HCMPEQ_1,         CODE_FOR_spu_heq,       "spu_hcmpeq_1",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_UINTSI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_HCMPGT,           CODE_FOR_nothing,       "spu_hcmpgt",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_HCMPGT_0,         CODE_FOR_spu_hgt,       "spu_hcmpgt_0",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_INTSI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_HCMPGT_1,         CODE_FOR_spu_hlgt,      "spu_hcmpgt_1",         B_INTERNAL, _A3(SPU_BTI_VOID,  SPU_BTI_UINTSI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CNTB,             CODE_FOR_nothing,       "spu_cntb",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CNTB_0,           CODE_FOR_cntb_v16qi,    "spu_cntb_0",           B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_CNTB_1,           CODE_FOR_cntb_v16qi,    "spu_cntb_1",           B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_CNTLZ,            CODE_FOR_nothing,       "spu_cntlz",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CNTLZ_0,          CODE_FOR_clzv4si2,      "spu_cntlz_0",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_CNTLZ_1,          CODE_FOR_clzv4si2,      "spu_cntlz_1",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_CNTLZ_2,          CODE_FOR_clzv4si2,      "spu_cntlz_2",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_GATHER,           CODE_FOR_nothing,       "spu_gather",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_GATHER_0,         CODE_FOR_spu_gb,        "spu_gather_0",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_GATHER_1,         CODE_FOR_spu_gb,        "spu_gather_1",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_GATHER_2,         CODE_FOR_spu_gbh,       "spu_gather_2",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_GATHER_3,         CODE_FOR_spu_gbh,       "spu_gather_3",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_GATHER_4,         CODE_FOR_spu_gbb,       "spu_gather_4",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_GATHER_5,         CODE_FOR_spu_gbb,       "spu_gather_5",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_GATHER_6,         CODE_FOR_spu_gb,        "spu_gather_6",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MASKB,            CODE_FOR_nothing,       "spu_maskb",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASKB_0,          CODE_FOR_spu_fsmb,      "spu_maskb_0",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MASKB_1,          CODE_FOR_spu_fsmb,      "spu_maskb_1",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MASKB_2,          CODE_FOR_spu_fsmb,      "spu_maskb_2",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_MASKB_3,          CODE_FOR_spu_fsmb,      "spu_maskb_3",          B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_MASKH,            CODE_FOR_nothing,       "spu_maskh",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASKH_0,          CODE_FOR_spu_fsmh,      "spu_maskh_0",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_MASKH_1,          CODE_FOR_spu_fsmh,      "spu_maskh_1",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_MASKH_2,          CODE_FOR_spu_fsmh,      "spu_maskh_2",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MASKH_3,          CODE_FOR_spu_fsmh,      "spu_maskh_3",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MASKH_4,          CODE_FOR_spu_fsmh,      "spu_maskh_4",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_MASKH_5,          CODE_FOR_spu_fsmh,      "spu_maskh_5",          B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_MASKW,            CODE_FOR_nothing,       "spu_maskw",            B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASKW_0,          CODE_FOR_spu_fsm,       "spu_maskw_0",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_MASKW_1,          CODE_FOR_spu_fsm,       "spu_maskw_1",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_MASKW_2,          CODE_FOR_spu_fsm,       "spu_maskw_2",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MASKW_3,          CODE_FOR_spu_fsm,       "spu_maskw_3",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MASKW_4,          CODE_FOR_spu_fsm,       "spu_maskw_4",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_MASKW_5,          CODE_FOR_spu_fsm,       "spu_maskw_5",          B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_SEL,              CODE_FOR_nothing,       "spu_sel",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SEL_0,            CODE_FOR_selb,          "spu_sel_0",            B_INTERNAL, _A4(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_SEL_1,            CODE_FOR_selb,          "spu_sel_1",            B_INTERNAL, _A4(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_SEL_2,            CODE_FOR_selb,          "spu_sel_2",            B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SEL_3,            CODE_FOR_selb,          "spu_sel_3",            B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SEL_4,            CODE_FOR_selb,          "spu_sel_4",            B_INTERNAL, _A4(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SEL_5,            CODE_FOR_selb,          "spu_sel_5",            B_INTERNAL, _A4(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SEL_6,            CODE_FOR_selb,          "spu_sel_6",            B_INTERNAL, _A4(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SEL_7,            CODE_FOR_selb,          "spu_sel_7",            B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SEL_8,            CODE_FOR_selb,          "spu_sel_8",            B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SEL_9,            CODE_FOR_selb,          "spu_sel_9",            B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_SHUFFLE,          CODE_FOR_nothing,       "spu_shuffle",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SHUFFLE_0,        CODE_FOR_shufb,         "spu_shuffle_0",        B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_1,        CODE_FOR_shufb,         "spu_shuffle_1",        B_INTERNAL, _A4(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_2,        CODE_FOR_shufb,         "spu_shuffle_2",        B_INTERNAL, _A4(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_3,        CODE_FOR_shufb,         "spu_shuffle_3",        B_INTERNAL, _A4(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_4,        CODE_FOR_shufb,         "spu_shuffle_4",        B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_5,        CODE_FOR_shufb,         "spu_shuffle_5",        B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_6,        CODE_FOR_shufb,         "spu_shuffle_6",        B_INTERNAL, _A4(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_7,        CODE_FOR_shufb,         "spu_shuffle_7",        B_INTERNAL, _A4(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_8,        CODE_FOR_shufb,         "spu_shuffle_8",        B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_SHUFFLE_9,        CODE_FOR_shufb,         "spu_shuffle_9",        B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_AND,              CODE_FOR_nothing,       "spu_and",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_AND_0,            CODE_FOR_andv16qi3,     "spu_and_0",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_AND_1,            CODE_FOR_andv16qi3,     "spu_and_1",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_AND_2,            CODE_FOR_andv8hi3,      "spu_and_2",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_AND_3,            CODE_FOR_andv8hi3,      "spu_and_3",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_AND_4,            CODE_FOR_andv4si3,      "spu_and_4",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_AND_5,            CODE_FOR_andv4si3,      "spu_and_5",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_AND_6,            CODE_FOR_andv2di3,      "spu_and_6",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_AND_7,            CODE_FOR_andv2di3,      "spu_and_7",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_AND_8,            CODE_FOR_andv4si3,      "spu_and_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_AND_9,            CODE_FOR_andv2di3,      "spu_and_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_AND_10,           CODE_FOR_andv16qi3,     "spu_and_10",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_AND_11,           CODE_FOR_andv16qi3,     "spu_and_11",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_AND_12,           CODE_FOR_andv8hi3,      "spu_and_12",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_AND_13,           CODE_FOR_andv8hi3,      "spu_and_13",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_AND_14,           CODE_FOR_andv4si3,      "spu_and_14",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_AND_15,           CODE_FOR_andv4si3,      "spu_and_15",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ANDC,             CODE_FOR_nothing,       "spu_andc",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ANDC_0,           CODE_FOR_andc_v2di,     "spu_andc_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_ANDC_1,           CODE_FOR_andc_v2di,     "spu_andc_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_ANDC_2,           CODE_FOR_andc_v4si,     "spu_andc_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ANDC_3,           CODE_FOR_andc_v4si,     "spu_andc_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_ANDC_4,           CODE_FOR_andc_v8hi,     "spu_andc_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_ANDC_5,           CODE_FOR_andc_v8hi,     "spu_andc_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_ANDC_6,           CODE_FOR_andc_v16qi,    "spu_andc_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_ANDC_7,           CODE_FOR_andc_v16qi,    "spu_andc_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_ANDC_8,           CODE_FOR_andc_v4si,     "spu_andc_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ANDC_9,           CODE_FOR_andc_v2di,     "spu_andc_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_EQV,              CODE_FOR_nothing,       "spu_eqv",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_EQV_0,            CODE_FOR_eqv_v2di,      "spu_eqv_0",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_EQV_1,            CODE_FOR_eqv_v2di,      "spu_eqv_1",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_EQV_2,            CODE_FOR_eqv_v4si,      "spu_eqv_2",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_EQV_3,            CODE_FOR_eqv_v4si,      "spu_eqv_3",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_EQV_4,            CODE_FOR_eqv_v8hi,      "spu_eqv_4",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_EQV_5,            CODE_FOR_eqv_v8hi,      "spu_eqv_5",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_EQV_6,            CODE_FOR_eqv_v16qi,     "spu_eqv_6",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_EQV_7,            CODE_FOR_eqv_v16qi,     "spu_eqv_7",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_EQV_8,            CODE_FOR_eqv_v4si,      "spu_eqv_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_EQV_9,            CODE_FOR_eqv_v2di,      "spu_eqv_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_NAND,             CODE_FOR_nothing,       "spu_nand",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_NAND_0,           CODE_FOR_nand_v2di,     "spu_nand_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_NAND_1,           CODE_FOR_nand_v2di,     "spu_nand_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_NAND_2,           CODE_FOR_nand_v4si,     "spu_nand_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_NAND_3,           CODE_FOR_nand_v4si,     "spu_nand_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_NAND_4,           CODE_FOR_nand_v8hi,     "spu_nand_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_NAND_5,           CODE_FOR_nand_v8hi,     "spu_nand_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_NAND_6,           CODE_FOR_nand_v16qi,    "spu_nand_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_NAND_7,           CODE_FOR_nand_v16qi,    "spu_nand_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_NAND_8,           CODE_FOR_nand_v4si,     "spu_nand_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NAND_9,           CODE_FOR_nand_v2di,     "spu_nand_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_NOR,              CODE_FOR_nothing,       "spu_nor",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_NOR_0,            CODE_FOR_nor_v2di,      "spu_nor_0",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_NOR_1,            CODE_FOR_nor_v2di,      "spu_nor_1",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_NOR_2,            CODE_FOR_nor_v4si,      "spu_nor_2",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_NOR_3,            CODE_FOR_nor_v4si,      "spu_nor_3",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_NOR_4,            CODE_FOR_nor_v8hi,      "spu_nor_4",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_NOR_5,            CODE_FOR_nor_v8hi,      "spu_nor_5",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_NOR_6,            CODE_FOR_nor_v16qi,     "spu_nor_6",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_NOR_7,            CODE_FOR_nor_v16qi,     "spu_nor_7",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_NOR_8,            CODE_FOR_nor_v4si,      "spu_nor_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NOR_9,            CODE_FOR_nor_v2di,      "spu_nor_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_OR,               CODE_FOR_nothing,       "spu_or",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_OR_0,             CODE_FOR_iorv16qi3,     "spu_or_0",             B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_OR_1,             CODE_FOR_iorv16qi3,     "spu_or_1",             B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_OR_2,             CODE_FOR_iorv8hi3,      "spu_or_2",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_OR_3,             CODE_FOR_iorv8hi3,      "spu_or_3",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_OR_4,             CODE_FOR_iorv4si3,      "spu_or_4",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_OR_5,             CODE_FOR_iorv4si3,      "spu_or_5",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_OR_6,             CODE_FOR_iorv2di3,      "spu_or_6",             B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_OR_7,             CODE_FOR_iorv2di3,      "spu_or_7",             B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_OR_8,             CODE_FOR_iorv4si3,      "spu_or_8",             B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_OR_9,             CODE_FOR_iorv2di3,      "spu_or_9",             B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_OR_10,            CODE_FOR_iorv16qi3,     "spu_or_10",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_OR_11,            CODE_FOR_iorv16qi3,     "spu_or_11",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_OR_12,            CODE_FOR_iorv8hi3,      "spu_or_12",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_OR_13,            CODE_FOR_iorv8hi3,      "spu_or_13",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_OR_14,            CODE_FOR_iorv4si3,      "spu_or_14",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_OR_15,            CODE_FOR_iorv4si3,      "spu_or_15",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_ORC,              CODE_FOR_nothing,       "spu_orc",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ORC_0,            CODE_FOR_orc_v2di,      "spu_orc_0",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_ORC_1,            CODE_FOR_orc_v2di,      "spu_orc_1",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_ORC_2,            CODE_FOR_orc_v4si,      "spu_orc_2",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ORC_3,            CODE_FOR_orc_v4si,      "spu_orc_3",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_ORC_4,            CODE_FOR_orc_v8hi,      "spu_orc_4",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_ORC_5,            CODE_FOR_orc_v8hi,      "spu_orc_5",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_ORC_6,            CODE_FOR_orc_v16qi,     "spu_orc_6",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_ORC_7,            CODE_FOR_orc_v16qi,     "spu_orc_7",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_ORC_8,            CODE_FOR_orc_v4si,      "spu_orc_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_ORC_9,            CODE_FOR_orc_v2di,      "spu_orc_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_ORX,              CODE_FOR_nothing,       "spu_orx",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_ORX_0,            CODE_FOR_spu_orx,       "spu_orx_0",            B_INTERNAL, _A2(SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_ORX_1,            CODE_FOR_spu_orx,       "spu_orx_1",            B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_XOR,              CODE_FOR_nothing,       "spu_xor",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_XOR_0,            CODE_FOR_xorv16qi3,     "spu_xor_0",            B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
+DEF_BUILTIN (SPU_XOR_1,            CODE_FOR_xorv16qi3,     "spu_xor_1",            B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_V16QI))
+DEF_BUILTIN (SPU_XOR_2,            CODE_FOR_xorv8hi3,      "spu_xor_2",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_XOR_3,            CODE_FOR_xorv8hi3,      "spu_xor_3",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_XOR_4,            CODE_FOR_xorv4si3,      "spu_xor_4",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_XOR_5,            CODE_FOR_xorv4si3,      "spu_xor_5",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_XOR_6,            CODE_FOR_xorv2di3,      "spu_xor_6",            B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UV2DI))
+DEF_BUILTIN (SPU_XOR_7,            CODE_FOR_xorv2di3,      "spu_xor_7",            B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_V2DI))
+DEF_BUILTIN (SPU_XOR_8,            CODE_FOR_xorv4si3,      "spu_xor_8",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_XOR_9,            CODE_FOR_xorv2di3,      "spu_xor_9",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_XOR_10,           CODE_FOR_xorv16qi3,     "spu_xor_10",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_XOR_11,           CODE_FOR_xorv16qi3,     "spu_xor_11",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_XOR_12,           CODE_FOR_xorv8hi3,      "spu_xor_12",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_XOR_13,           CODE_FOR_xorv8hi3,      "spu_xor_13",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_XOR_14,           CODE_FOR_xorv4si3,      "spu_xor_14",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_XOR_15,           CODE_FOR_xorv4si3,      "spu_xor_15",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RL,               CODE_FOR_nothing,       "spu_rl",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RL_0,             CODE_FOR_vrotlv8hi3,    "spu_rl_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RL_1,             CODE_FOR_vrotlv8hi3,    "spu_rl_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RL_2,             CODE_FOR_vrotlv4si3,    "spu_rl_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RL_3,             CODE_FOR_vrotlv4si3,    "spu_rl_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RL_4,             CODE_FOR_vrotlv8hi3,    "spu_rl_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_RL_5,             CODE_FOR_vrotlv8hi3,    "spu_rl_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_RL_6,             CODE_FOR_vrotlv4si3,    "spu_rl_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RL_7,             CODE_FOR_vrotlv4si3,    "spu_rl_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW,             CODE_FOR_nothing,       "spu_rlqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLQW_0,           CODE_FOR_rotqbi_ti,     "spu_rlqw_0",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_1,           CODE_FOR_rotqbi_ti,     "spu_rlqw_1",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_2,           CODE_FOR_rotqbi_ti,     "spu_rlqw_2",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_3,           CODE_FOR_rotqbi_ti,     "spu_rlqw_3",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_4,           CODE_FOR_rotqbi_ti,     "spu_rlqw_4",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_5,           CODE_FOR_rotqbi_ti,     "spu_rlqw_5",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_6,           CODE_FOR_rotqbi_ti,     "spu_rlqw_6",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_7,           CODE_FOR_rotqbi_ti,     "spu_rlqw_7",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_8,           CODE_FOR_rotqbi_ti,     "spu_rlqw_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQW_9,           CODE_FOR_rotqbi_ti,     "spu_rlqw_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE,         CODE_FOR_nothing,       "spu_rlqwbyte",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLQWBYTE_0,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_0",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_1,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_1",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_2,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_2",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_3,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_3",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_4,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_4",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_5,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_5",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_6,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_6",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_7,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_7",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_8,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTE_9,       CODE_FOR_rotqby_ti,     "spu_rlqwbyte_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC,       CODE_FOR_nothing,       "spu_rlqwbytebc",       B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLQWBYTEBC_0,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_0",     B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_1,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_1",     B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_2,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_2",     B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_3,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_3",     B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_4,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_4",     B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_5,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_5",     B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_6,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_6",     B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_7,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_7",     B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_8,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_8",     B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLQWBYTEBC_9,     CODE_FOR_rotqbybi_ti,   "spu_rlqwbytebc_9",     B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK,           CODE_FOR_nothing,       "spu_rlmask",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASK_0,         CODE_FOR_rotm_v8hi,     "spu_rlmask_0",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASK_1,         CODE_FOR_rotm_v8hi,     "spu_rlmask_1",         B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASK_2,         CODE_FOR_rotm_v4si,     "spu_rlmask_2",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASK_3,         CODE_FOR_rotm_v4si,     "spu_rlmask_3",         B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASK_4,         CODE_FOR_rotm_v8hi,     "spu_rlmask_4",         B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK_5,         CODE_FOR_rotm_v8hi,     "spu_rlmask_5",         B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK_6,         CODE_FOR_rotm_v4si,     "spu_rlmask_6",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASK_7,         CODE_FOR_rotm_v4si,     "spu_rlmask_7",         B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA,          CODE_FOR_nothing,       "spu_rlmaska",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKA_0,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_0",        B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASKA_1,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_1",        B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_RLMASKA_2,        CODE_FOR_rotma_v4si,    "spu_rlmaska_2",        B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASKA_3,        CODE_FOR_rotma_v4si,    "spu_rlmaska_3",        B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_V4SI))
+DEF_BUILTIN (SPU_RLMASKA_4,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_4",        B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA_5,        CODE_FOR_rotma_v8hi,    "spu_rlmaska_5",        B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA_6,        CODE_FOR_rotma_v4si,    "spu_rlmaska_6",        B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKA_7,        CODE_FOR_rotma_v4si,    "spu_rlmaska_7",        B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW,         CODE_FOR_nothing,       "spu_rlmaskqw",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKQW_0,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_0",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_1,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_1",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_2,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_2",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_3,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_3",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_4,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_4",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_5,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_5",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_6,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_6",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_7,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_7",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_8,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQW_9,       CODE_FOR_rotqmbi_ti,    "spu_rlmaskqw_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE,     CODE_FOR_nothing,       "spu_rlmaskqwbyte",     B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_0,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_0",   B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_1,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_1",   B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_2,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_2",   B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_3,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_3",   B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_4,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_4",   B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_5,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_5",   B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_6,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_6",   B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_7,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_7",   B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_8,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_8",   B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTE_9,   CODE_FOR_rotqmby_ti,    "spu_rlmaskqwbyte_9",   B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC,   CODE_FOR_nothing,       "spu_rlmaskqwbytebc",   B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_0, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_1, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_1", B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_2, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_2", B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_3, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_3", B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_4, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_4", B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_5, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_5", B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_6, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_6", B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_7, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_7", B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_8, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_RLMASKQWBYTEBC_9, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_SL,               CODE_FOR_nothing,       "spu_sl",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SL_0,             CODE_FOR_vashlv8hi3,     "spu_sl_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SL_1,             CODE_FOR_vashlv8hi3,     "spu_sl_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SL_2,             CODE_FOR_vashlv4si3,     "spu_sl_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SL_3,             CODE_FOR_vashlv4si3,     "spu_sl_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SL_4,             CODE_FOR_vashlv8hi3,     "spu_sl_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_5,             CODE_FOR_vashlv8hi3,     "spu_sl_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_6,             CODE_FOR_vashlv4si3,     "spu_sl_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_7,             CODE_FOR_vashlv4si3,     "spu_sl_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW,             CODE_FOR_nothing,       "spu_slqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SLQW_0,           CODE_FOR_shlqbi_ti,     "spu_slqw_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_1,           CODE_FOR_shlqbi_ti,     "spu_slqw_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_2,           CODE_FOR_shlqbi_ti,     "spu_slqw_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_3,           CODE_FOR_shlqbi_ti,     "spu_slqw_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_4,           CODE_FOR_shlqbi_ti,     "spu_slqw_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_5,           CODE_FOR_shlqbi_ti,     "spu_slqw_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_6,           CODE_FOR_shlqbi_ti,     "spu_slqw_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_7,           CODE_FOR_shlqbi_ti,     "spu_slqw_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_8,           CODE_FOR_shlqbi_ti,     "spu_slqw_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQW_9,           CODE_FOR_shlqbi_ti,     "spu_slqw_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE,         CODE_FOR_nothing,       "spu_slqwbyte",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SLQWBYTE_0,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_0",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_1,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_2,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_2",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_3,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_3",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_4,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_4",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_5,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_5",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_6,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_6",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_7,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_7",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_8,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTE_9,       CODE_FOR_shlqby_ti,     "spu_slqwbyte_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC,       CODE_FOR_nothing,       "spu_slqwbytebc",       B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SLQWBYTEBC_0,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_0",     B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_1,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_1",     B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_2,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_2",     B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_3,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_3",     B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_4,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_4",     B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_5,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_5",     B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_6,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_6",     B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_7,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_7",     B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_8,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_8",     B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SLQWBYTEBC_9,     CODE_FOR_shlqbybi_ti,   "spu_slqwbytebc_9",     B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR,               CODE_FOR_nothing,       "spu_sr",               B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SR_0,             CODE_FOR_vlshrv8hi3,    "spu_sr_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SR_1,             CODE_FOR_vlshrv8hi3,    "spu_sr_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SR_2,             CODE_FOR_vlshrv4si3,    "spu_sr_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SR_3,             CODE_FOR_vlshrv4si3,    "spu_sr_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SR_4,             CODE_FOR_vlshrv8hi3,    "spu_sr_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR_5,             CODE_FOR_vlshrv8hi3,    "spu_sr_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR_6,             CODE_FOR_vlshrv4si3,    "spu_sr_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SR_7,             CODE_FOR_vlshrv4si3,    "spu_sr_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA,              CODE_FOR_nothing,       "spu_sra",              B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRA_0,            CODE_FOR_vashrv8hi3,    "spu_sra_0",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SRA_1,            CODE_FOR_vashrv8hi3,    "spu_sra_1",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SRA_2,            CODE_FOR_vashrv4si3,    "spu_sra_2",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SRA_3,            CODE_FOR_vashrv4si3,    "spu_sra_3",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SRA_4,            CODE_FOR_vashrv8hi3,    "spu_sra_4",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA_5,            CODE_FOR_vashrv8hi3,    "spu_sra_5",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA_6,            CODE_FOR_vashrv4si3,    "spu_sra_6",            B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRA_7,            CODE_FOR_vashrv4si3,    "spu_sra_7",            B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW,             CODE_FOR_nothing,       "spu_srqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRQW_0,           CODE_FOR_shrqbi_ti,     "spu_srqw_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_1,           CODE_FOR_shrqbi_ti,     "spu_srqw_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_2,           CODE_FOR_shrqbi_ti,     "spu_srqw_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_3,           CODE_FOR_shrqbi_ti,     "spu_srqw_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_4,           CODE_FOR_shrqbi_ti,     "spu_srqw_4",           B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_5,           CODE_FOR_shrqbi_ti,     "spu_srqw_5",           B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_6,           CODE_FOR_shrqbi_ti,     "spu_srqw_6",           B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_7,           CODE_FOR_shrqbi_ti,     "spu_srqw_7",           B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_8,           CODE_FOR_shrqbi_ti,     "spu_srqw_8",           B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQW_9,           CODE_FOR_shrqbi_ti,     "spu_srqw_9",           B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE,         CODE_FOR_nothing,       "spu_srqwbyte",         B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRQWBYTE_0,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_0",       B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_1,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_1",       B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_2,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_2",       B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_3,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_3",       B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_4,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_4",       B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_5,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_5",       B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_6,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_6",       B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_7,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_7",       B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_8,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_8",       B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTE_9,       CODE_FOR_shrqby_ti,     "spu_srqwbyte_9",       B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC,       CODE_FOR_nothing,       "spu_srqwbytebc",       B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SRQWBYTEBC_0,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_0",     B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_1,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_1",     B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_2,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_2",     B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_3,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_3",     B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_4,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_4",     B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_5,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_5",     B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_6,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_6",     B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_V16QI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_7,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_7",     B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_8,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_8",     B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SRQWBYTEBC_9,     CODE_FOR_shrqbybi_ti,   "spu_srqwbytebc_9",     B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_UINTSI))
+
+DEF_BUILTIN (SPU_SPLATS,           CODE_FOR_nothing,       "spu_splats",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_SPLATS_0,         CODE_FOR_spu_splats,    "spu_splats_0",         B_INTERNAL, _A2(SPU_BTI_UV16QI, SPU_BTI_UINTQI))
+DEF_BUILTIN (SPU_SPLATS_1,         CODE_FOR_spu_splats,    "spu_splats_1",         B_INTERNAL, _A2(SPU_BTI_V16QI,  SPU_BTI_INTQI))
+DEF_BUILTIN (SPU_SPLATS_2,         CODE_FOR_spu_splats,    "spu_splats_2",         B_INTERNAL, _A2(SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_SPLATS_3,         CODE_FOR_spu_splats,    "spu_splats_3",         B_INTERNAL, _A2(SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_SPLATS_4,         CODE_FOR_spu_splats,    "spu_splats_4",         B_INTERNAL, _A2(SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SPLATS_5,         CODE_FOR_spu_splats,    "spu_splats_5",         B_INTERNAL, _A2(SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_SPLATS_6,         CODE_FOR_spu_splats,    "spu_splats_6",         B_INTERNAL, _A2(SPU_BTI_UV2DI,  SPU_BTI_UINTDI))
+DEF_BUILTIN (SPU_SPLATS_7,         CODE_FOR_spu_splats,    "spu_splats_7",         B_INTERNAL, _A2(SPU_BTI_V2DI,   SPU_BTI_INTDI))
+DEF_BUILTIN (SPU_SPLATS_8,         CODE_FOR_spu_splats,    "spu_splats_8",         B_INTERNAL, _A2(SPU_BTI_V4SF,   SPU_BTI_FLOAT))
+DEF_BUILTIN (SPU_SPLATS_9,         CODE_FOR_spu_splats,    "spu_splats_9",         B_INTERNAL, _A2(SPU_BTI_V2DF,   SPU_BTI_DOUBLE))
+DEF_BUILTIN (SPU_EXTRACT,          CODE_FOR_nothing,       "spu_extract",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_EXTRACT_0,        CODE_FOR_spu_extract,   "spu_extract_0",        B_INTERNAL, _A3(SPU_BTI_UINTQI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_1,        CODE_FOR_spu_extract,   "spu_extract_1",        B_INTERNAL, _A3(SPU_BTI_INTQI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_2,        CODE_FOR_spu_extract,   "spu_extract_2",        B_INTERNAL, _A3(SPU_BTI_UINTHI, SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_3,        CODE_FOR_spu_extract,   "spu_extract_3",        B_INTERNAL, _A3(SPU_BTI_INTHI,  SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_4,        CODE_FOR_spu_extract,   "spu_extract_4",        B_INTERNAL, _A3(SPU_BTI_UINTSI, SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_5,        CODE_FOR_spu_extract,   "spu_extract_5",        B_INTERNAL, _A3(SPU_BTI_INTSI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_6,        CODE_FOR_spu_extract,   "spu_extract_6",        B_INTERNAL, _A3(SPU_BTI_UINTDI, SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_7,        CODE_FOR_spu_extract,   "spu_extract_7",        B_INTERNAL, _A3(SPU_BTI_INTDI,  SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_8,        CODE_FOR_spu_extract,   "spu_extract_8",        B_INTERNAL, _A3(SPU_BTI_FLOAT,  SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_EXTRACT_9,        CODE_FOR_spu_extract,   "spu_extract_9",        B_INTERNAL, _A3(SPU_BTI_DOUBLE, SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT,           CODE_FOR_nothing,       "spu_insert",           B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_INSERT_0,         CODE_FOR_spu_insert,    "spu_insert_0",         B_INTERNAL, _A4(SPU_BTI_UV16QI, SPU_BTI_UINTQI, SPU_BTI_UV16QI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_1,         CODE_FOR_spu_insert,    "spu_insert_1",         B_INTERNAL, _A4(SPU_BTI_V16QI,  SPU_BTI_INTQI,  SPU_BTI_V16QI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_2,         CODE_FOR_spu_insert,    "spu_insert_2",         B_INTERNAL, _A4(SPU_BTI_UV8HI,  SPU_BTI_UINTHI, SPU_BTI_UV8HI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_3,         CODE_FOR_spu_insert,    "spu_insert_3",         B_INTERNAL, _A4(SPU_BTI_V8HI,   SPU_BTI_INTHI,  SPU_BTI_V8HI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_4,         CODE_FOR_spu_insert,    "spu_insert_4",         B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UINTSI, SPU_BTI_UV4SI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_5,         CODE_FOR_spu_insert,    "spu_insert_5",         B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_INTSI,  SPU_BTI_V4SI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_6,         CODE_FOR_spu_insert,    "spu_insert_6",         B_INTERNAL, _A4(SPU_BTI_UV2DI,  SPU_BTI_UINTDI, SPU_BTI_UV2DI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_7,         CODE_FOR_spu_insert,    "spu_insert_7",         B_INTERNAL, _A4(SPU_BTI_V2DI,   SPU_BTI_INTDI,  SPU_BTI_V2DI,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_8,         CODE_FOR_spu_insert,    "spu_insert_8",         B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_FLOAT,  SPU_BTI_V4SF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_INSERT_9,         CODE_FOR_spu_insert,    "spu_insert_9",         B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_DOUBLE, SPU_BTI_V2DF,   SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE,          CODE_FOR_nothing,       "spu_promote",          B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_PROMOTE_0,        CODE_FOR_spu_promote,   "spu_promote_0",        B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UINTQI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_1,        CODE_FOR_spu_promote,   "spu_promote_1",        B_INTERNAL, _A3(SPU_BTI_V16QI,  SPU_BTI_INTQI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_2,        CODE_FOR_spu_promote,   "spu_promote_2",        B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UINTHI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_3,        CODE_FOR_spu_promote,   "spu_promote_3",        B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_INTHI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_4,        CODE_FOR_spu_promote,   "spu_promote_4",        B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UINTSI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_5,        CODE_FOR_spu_promote,   "spu_promote_5",        B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_INTSI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_6,        CODE_FOR_spu_promote,   "spu_promote_6",        B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UINTDI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_7,        CODE_FOR_spu_promote,   "spu_promote_7",        B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_INTDI,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_8,        CODE_FOR_spu_promote,   "spu_promote_8",        B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_FLOAT,  SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_PROMOTE_9,        CODE_FOR_spu_promote,   "spu_promote_9",        B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_DOUBLE, SPU_BTI_INTSI))
+
+/* We need something that is not B_INTERNAL as a sentinel.  */
+
+/* These are for the convenience of implementing fma() in the standard
+   libraries.  */
+DEF_BUILTIN (SCALAR_FMA,           CODE_FOR_fmasf4,        "fmas",                 B_INSN,     _A4(SPU_BTI_FLOAT,  SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT))
+DEF_BUILTIN (SCALAR_DFMA,          CODE_FOR_fmadf4,        "dfmas",                B_INSN,     _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE))
+
+DEF_BUILTIN (SPU_ALIGN_HINT,       CODE_FOR_spu_align_hint,"spu_align_hint",       B_INSN,     _A4(SPU_BTI_VOID,   SPU_BTI_PTR,    SPU_BTI_7,      SPU_BTI_7))
+#undef _A1
+#undef _A2
+#undef _A3
+#undef _A4
diff --git a/gcc/config/spu/spu-builtins.md b/gcc/config/spu/spu-builtins.md
new file mode 100644
index 000000000..ac3a33361
--- /dev/null
+++ b/gcc/config/spu/spu-builtins.md
@@ -0,0 +1,929 @@
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; This includes expands for all the intrinsics.
+;; spu_expand_builtin looks at the mode of match_operand.
+
+
+;; load/store
+
+(define_expand "spu_lqd"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+				 (match_operand:SI 2 "spu_nonmem_operand" ""))
+		        (const_int -16))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT
+	&& (INTVAL (operands[2]) & 15) != 0)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & -16);
+    if (GET_CODE (operands[2]) != CONST_INT)
+      {
+	rtx op2 = operands[2];
+	operands[2] = force_reg (Pmode, operands[2]);
+	if (!ALIGNED_SYMBOL_REF_P (op2))
+	  emit_insn (gen_andsi3 (operands[2], operands[2], GEN_INT (-16)));
+      }
+  })
+
+(define_expand "spu_lqx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+                                 (match_operand:SI 2 "spu_reg_operand" ""))
+                        (const_int -16))))]
+  ""
+  "")
+
+(define_expand "spu_lqa"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "")
+                        (const_int -16))))]
+  ""
+  {
+    if (GET_CODE (operands[1]) == CONST_INT
+	&& (INTVAL (operands[1]) & 15) != 0)
+      operands[1] = GEN_INT (INTVAL (operands[1]) & -16);
+  })
+
+(define_expand "spu_lqr"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+	(mem:TI (and:SI (match_operand:SI 1 "address_operand" "")
+			(const_int -16))))]
+  ""
+  "")
+
+(define_expand "spu_stqd"
+  [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+				 (match_operand:SI 2 "spu_nonmem_operand" ""))
+		        (const_int -16)))
+        (match_operand:TI 0 "spu_reg_operand" "r,r"))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT
+	&& (INTVAL (operands[2]) & 15) != 0)
+      operands[2] = GEN_INT (INTVAL (operands[2]) & -16);
+    if (GET_CODE (operands[2]) != CONST_INT)
+      {
+	rtx op2 = operands[2];
+	operands[2] = force_reg (Pmode, operands[2]);
+	if (!ALIGNED_SYMBOL_REF_P (op2))
+	  emit_insn (gen_andsi3 (operands[2], operands[2], GEN_INT (-16)));
+      }
+  })
+
+(define_expand "spu_stqx"
+  [(set (mem:TI (and:SI (plus:SI (match_operand:SI 1 "spu_reg_operand" "")
+				 (match_operand:SI 2 "spu_reg_operand" ""))
+		        (const_int -16)))
+        (match_operand:TI 0 "spu_reg_operand" "r"))]
+  ""
+  "")
+
+(define_expand "spu_stqa"
+  [(set (mem:TI (and:SI (match_operand:SI 1 "immediate_operand" "")
+			(const_int -16)))
+        (match_operand:TI 0 "spu_reg_operand" "r"))]
+  ""
+  {
+    if (GET_CODE (operands[1]) == CONST_INT
+	&& (INTVAL (operands[1]) & 15) != 0)
+      operands[1] = GEN_INT (INTVAL (operands[1]) & -16);
+  })
+
+(define_expand "spu_stqr"
+    [(set (mem:TI (and:SI (match_operand:SI 1 "address_operand" "")
+			  (const_int -16)))
+	  (match_operand:TI 0 "spu_reg_operand" ""))]
+  ""
+  "")
+
+
+;; generate control word
+
+(define_expand "spu_cbx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 1)] UNSPEC_CPAT))]
+  ""
+  "")
+
+(define_expand "spu_chx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 2)] UNSPEC_CPAT))]
+  ""
+  "")
+
+(define_expand "spu_cwx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 4)] UNSPEC_CPAT))]
+  ""
+  "")
+
+(define_expand "spu_cdx"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+        (unspec:TI [(match_operand:SI 1 "spu_reg_operand" "")
+                    (match_operand:SI 2 "spu_nonmem_operand" "")
+                    (const_int 8)] UNSPEC_CPAT))]
+  ""
+  "")
+
+
+
+;; Constant formation
+
+(define_expand "spu_ilhu"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (const_vector:V4SI [(match_operand:SI 1 "immediate_operand" "")]))]
+  ""
+  "{ emit_insn(gen_movv4si(operands[0], spu_const(V4SImode, (INTVAL(operands[1]) << 16))));
+     DONE;
+   }")
+
+
+;; integer subtract
+(define_expand "spu_sfh"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "")
+        (minus:V8HI (match_operand:V8HI 2 "spu_nonmem_operand" "")
+                    (match_operand:V8HI 1 "spu_reg_operand" "")))]
+  ""
+  "")
+
+(define_expand "spu_sf"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (minus:V4SI (match_operand:V4SI 2 "spu_nonmem_operand" "")
+                    (match_operand:V4SI 1 "spu_reg_operand" "")))]
+  ""
+  "")
+
+(define_expand "spu_sfx"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "")
+		      (match_operand:V4SI 1 "spu_reg_operand" "")
+		      (match_operand:V4SI 3 "spu_reg_operand" "")] UNSPEC_SFX))]
+  ""
+  "")
+
+(define_expand "spu_bg"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "")
+		      (match_operand:V4SI 1 "spu_reg_operand" "")] UNSPEC_BG))]
+  ""
+  "")
+
+(define_expand "spu_bgx"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec:V4SI [(match_operand:V4SI 2 "spu_reg_operand" "")
+		      (match_operand:V4SI 1 "spu_reg_operand" "")
+		      (match_operand:V4SI 3 "spu_reg_operand" "")] UNSPEC_BGX))]
+  ""
+  "")
+
+;; integer multiply
+(define_insn "spu_mpy"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
+        (mult:V4SI
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+          (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
+  ""
+  "@
+   mpy\t%0,%1,%2
+   mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyu"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
+        (mult:V4SI
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+          (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
+  ""
+  "@
+   mpyu\t%0,%1,%2
+   mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpya"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))
+	(match_operand:V4SI 3 "spu_reg_operand" "r")))]
+  ""
+  "mpya\t%0,%1,%2,%3"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyh"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (ashift:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))
+	  (const_vector:V4SI [(const_int 16)(const_int 16)(const_int 16)(const_int 16)])))]
+  ""
+  "mpyh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpys"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (ashiftrt:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))
+	  (const_vector:V4SI [(const_int 16)(const_int 16)(const_int 16)(const_int 16)])))]
+  ""
+  "mpys\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyhhu"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
+  ""
+  "mpyhhu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyhh"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
+  ""
+  "mpyhh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyhhau"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (plus:V4SI
+	  (mult:V4SI
+	    (zero_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	    (zero_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))
+	  (match_operand:V4SI 3 "spu_reg_operand" "0")))]
+  ""
+  "mpyhhau\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "spu_mpyhha"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "spu_reg_operand" "r")
+		(parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))
+	  (match_operand:V4SI 3 "spu_reg_operand" "0")))]
+  ""
+  "mpyhha\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+;; form select mask
+(define_insn "spu_fsmb"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r,r")
+        (unspec:V16QI [(match_operand:SI 1 "spu_nonmem_operand" "r,MN")] UNSPEC_FSMB))]
+  ""
+  "@
+  fsmb\t%0,%1
+  fsmbi\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_fsmh"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (unspec:V8HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_FSMH))]
+  ""
+  "fsmh\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_fsm"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
+  ""
+  "fsm\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+
+;; gather bits
+(define_insn "spu_gbb"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:V16QI 1 "spu_reg_operand" "r")] UNSPEC_GBB))]
+  ""
+  "gbb\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_gbh"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "spu_reg_operand" "r")] UNSPEC_GBH))]
+  ""
+  "gbh\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "spu_gb"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec:V4SI [(match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_GB))]
+  ""
+  "gb\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+;; misc byte operations
+(define_insn "spu_avgb"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")
+		       (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_AVGB))]
+  ""
+  "avgb\t%0,%1,%2"
+  [(set_attr "type" "fxb")])
+
+(define_insn "spu_absdb"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")
+		       (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_ABSDB))]
+  ""
+  "absdb\t%0,%1,%2"
+  [(set_attr "type" "fxb")])
+
+(define_insn "spu_sumb"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "spu_reg_operand" "r")
+		      (match_operand:V16QI 2 "spu_reg_operand" "r")] UNSPEC_SUMB))]
+  ""
+  "sumb\t%0,%1,%2"
+  [(set_attr "type" "fxb")])
+
+;; sign extend
+(define_insn "spu_xsbh"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (sign_extend:V8HI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)
+	               (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))))]
+  ""
+  "xsbh\t%0,%1")
+
+(define_insn "spu_xshw"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (sign_extend:V4SI
+	  (vec_select:V4HI
+	    (match_operand:V8HI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))]
+  ""
+  "xshw\t%0,%1")
+
+(define_insn "spu_xswd"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (sign_extend:V2DI
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1)(const_int 3)]))))]
+  ""
+  "xswd\t%0,%1")
+
+;; or across
+
+(define_insn "spu_orx"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(unspec:V4SI [(match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_ORX))]
+  ""
+  "orx\t%0,%1")
+
+
+;; compare & halt
+(define_insn "spu_heq"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r")
+	             (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HEQ)]
+  ""
+  "@
+  heq\t%0,%1
+  heqi\t%0,%1")
+
+(define_insn "spu_hgt"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r")
+	             (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HGT)]
+  ""
+  "@
+  hgt\t%0,%1
+  hgti\t%0,%1")
+
+(define_insn "spu_hlgt"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r,r")
+	             (match_operand:SI 1 "spu_nonmem_operand" "r,K")] UNSPEC_HLGT)]
+  ""
+  "@
+  hlgt\t%0,%1
+  hlgti\t%0,%1")
+
+;; branches
+
+;; The description below hides the fact that bisled conditionally
+;; executes the call depending on the value in channel 0.  This was 
+;; done so that the description would conform to the format of a call 
+;; insn.  Otherwise (if this were not part of call insn), the link 
+;; register, $lr, would not be saved/restored in the prologue/epilogue.
+
+(define_insn "spu_bisled"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r"))
+            (const_int 0))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))
+     (use (match_operand:SI 1 "address_operand" ""))
+     (use (const_int 0))])]
+  ""
+  "bisled\t$lr,%0"
+  [(set_attr "type" "br")])
+
+(define_insn "spu_bisledd"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r"))
+            (const_int 0))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))
+     (use (match_operand:SI 1 "address_operand" ""))
+     (use (const_int 1))])]
+  ""
+  "bisledd\t$lr,%0"
+  [(set_attr "type" "br")])
+
+(define_insn "spu_bislede"
+  [(parallel
+    [(call (mem:QI (match_operand:SI 0 "spu_reg_operand" "r"))
+            (const_int 0))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))
+     (use (match_operand:SI 1 "address_operand" ""))
+     (use (const_int 2))])]
+  ""
+  "bislede\t$lr,%0"
+  [(set_attr "type" "br")])
+
+;; float convert
+(define_expand "spu_csflt"
+  [(set (match_operand:V4SF 0 "spu_reg_operand")
+	(unspec:V4SF [(match_operand:V4SI 1 "spu_reg_operand")
+		      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convtf expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx exp2;
+      rtx cnv = gen_reg_rtx (V4SFmode);
+      rtx scale = gen_reg_rtx (SImode);
+      rtx op2 = force_reg (SImode, operands[2]);
+      rtx m1 = spu_gen_exp2 (V4SFmode, GEN_INT (-1));
+      emit_insn (gen_subsi3 (scale, const1_rtx, op2));
+      exp2 = spu_gen_exp2 (V4SFmode, scale);
+      emit_insn (gen_floatv4siv4sf2_mul (cnv, operands[1], m1));
+      emit_insn (gen_mulv4sf3 (operands[0], cnv, exp2));
+    }
+  else
+    {
+      rtx exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+      emit_insn (gen_floatv4siv4sf2_div (operands[0], operands[1], exp2));
+    }
+  DONE;
+})
+
+(define_expand "spu_cflts"
+  [(set (match_operand:V4SI 0 "spu_reg_operand")
+	(unspec:V4SI [(match_operand:V4SF 1 "spu_reg_operand")
+                      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  rtx exp2;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convts expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx mul = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_mulv4sf3 (mul, operands[1], exp2));
+      emit_insn (gen_fix_truncv4sfv4si2 (operands[0], mul));
+    }
+  else 
+    emit_insn (gen_fix_truncv4sfv4si2_mul (operands[0], operands[1], exp2));
+  DONE;
+})
+
+(define_expand "spu_cuflt"
+  [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+	(unspec:V4SF [(match_operand:V4SI 1 "spu_reg_operand")
+		      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convtf expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx exp2;
+      rtx cnv = gen_reg_rtx (V4SFmode);
+      rtx scale = gen_reg_rtx (SImode);
+      rtx op2 = force_reg (SImode, operands[2]);
+      rtx m1 = spu_gen_exp2 (V4SFmode, GEN_INT (-1));
+      emit_insn (gen_subsi3 (scale, const1_rtx, op2));
+      exp2 = spu_gen_exp2 (V4SFmode, scale);
+      emit_insn (gen_floatunsv4siv4sf2_mul (cnv, operands[1], m1));
+      emit_insn (gen_mulv4sf3 (operands[0], cnv, exp2));
+    }
+  else
+    {
+      rtx exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+      emit_insn (gen_floatunsv4siv4sf2_div (operands[0], operands[1], exp2));
+    }
+  DONE;
+})
+
+(define_expand "spu_cfltu"
+  [(set (match_operand:V4SI 0 "spu_reg_operand")
+	(unspec:V4SI [(match_operand:V4SF 1 "spu_reg_operand")
+		      (match_operand:SI 2 "spu_nonmem_operand")] 0 ))]
+  ""
+{
+  rtx exp2;
+  if (GET_CODE (operands[2]) == CONST_INT
+      && (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 127))
+    {
+      error ("spu_convtu expects an integer literal in the range [0, 127].");
+      operands[2] = force_reg (SImode, operands[2]);
+    }
+  exp2 = spu_gen_exp2 (V4SFmode, operands[2]);
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx mul = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_mulv4sf3 (mul, operands[1], exp2));
+      emit_insn (gen_fixuns_truncv4sfv4si2 (operands[0], mul));
+    }
+  else 
+    emit_insn (gen_fixuns_truncv4sfv4si2_mul (operands[0], operands[1], exp2));
+  DONE;
+})
+
+(define_expand "spu_frds"
+   [(set (match_operand:V4SF 0 "spu_reg_operand" "")
+         (vec_select:V4SF
+	   (vec_concat:V4SF
+	     (float_truncate:V2SF (match_operand:V2DF 1 "spu_reg_operand" ""))
+	     (match_dup:V2SF 2))
+	   (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+  ""
+  "operands[2] = spu_const(V2SFmode, 0);")
+
+(define_insn "_frds"
+   [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+        (vec_select:V4SF
+	  (vec_concat:V4SF
+	    (float_truncate:V2SF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+	    (match_operand:V2SF 2 "vec_imm_operand" "i"))
+	  (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+  ""
+  "frds\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+(define_insn "spu_fesd"
+  [(set (match_operand:V2DF 0 "spu_reg_operand" "=r")
+        (float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)]))))]
+  ""
+  "fesd\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+;; control
+(define_insn "spu_stop"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "M")] UNSPEC_STOP)]
+  ""
+  "stop\t%0"
+  [(set_attr "type" "br")])
+
+(define_insn "spu_stopd"
+  [(unspec_volatile [(match_operand:SI 0 "spu_reg_operand" "r")
+		     (match_operand:SI 1 "spu_reg_operand" "r")
+		     (match_operand:SI 2 "spu_reg_operand" "r")] UNSPEC_STOPD)]
+  ""
+  "stopd\t%0,%1,%2"
+  [(set_attr "type" "br")])
+
+;; interrupt disable/enable
+(define_expand "spu_idisable"
+  [(parallel
+    [(unspec_volatile [(const_int 0)] UNSPEC_SET_INTR)
+     (clobber (match_dup:SI 0))
+     (clobber (mem:BLK (scratch)))])]
+  ""
+  "operands[0] = gen_reg_rtx (SImode);")
+
+(define_expand "spu_ienable"
+  [(parallel
+    [(unspec_volatile [(const_int 1)] UNSPEC_SET_INTR)
+     (clobber (match_dup:SI 0))
+     (clobber (mem:BLK (scratch)))])]
+  ""
+  "operands[0] = gen_reg_rtx (SImode);")
+
+(define_insn "set_intr"
+  [(unspec_volatile [(match_operand 1 "const_int_operand" "i")] UNSPEC_SET_INTR)
+   (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+   (clobber (mem:BLK (scratch)))]
+  "! flag_pic"
+  "ila\t%0,.+8\;bi%I1\t%0"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi0")])
+
+(define_insn "set_intr_pic"
+  [(unspec_volatile [(match_operand 1 "const_int_operand" "i")] UNSPEC_SET_INTR)
+   (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+   (clobber (mem:BLK (scratch)))]
+  "flag_pic"
+  "brsl\t%0,.+4\;ai\t%0,%0,8\;bi%I1\t%0"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi1")])
+
+(define_insn "set_intr_cc"
+  [(cond_exec (match_operator 1 "branch_comparison_operator"
+		[(match_operand 2 "spu_reg_operand" "r")
+		 (const_int 0)])
+              (parallel [(unspec_volatile [(match_operand:SI 3 "const_int_operand" "i")] UNSPEC_SET_INTR)
+                         (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+			 (clobber (mem:BLK (scratch)))]))]
+  "! flag_pic"
+  "ila\t%0,.+8\;bi%b2%b1z%I3\t%2,%0"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi0")])
+
+(define_insn "set_intr_cc_pic"
+  [(cond_exec (match_operator 1 "branch_comparison_operator"
+		[(match_operand 2 "spu_reg_operand" "r")
+		 (const_int 0)])
+              (parallel [(unspec_volatile [(match_operand:SI 3 "const_int_operand" "i")] UNSPEC_SET_INTR)
+                         (clobber (match_operand:SI 0 "spu_reg_operand" "=&r"))
+			 (clobber (mem:BLK (scratch)))]))]
+  "flag_pic"
+  "brsl\t%0,.+4\;ai\t%0,%0,8\;bi%b2%b1z%I3\t%2,%0"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi1")])
+
+(define_insn "set_intr_return"
+  [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i")] UNSPEC_SET_INTR)
+   (return)]
+  ""
+  "bi%I0\t$lr"
+  [(set_attr "type" "br")])
+
+(define_peephole2
+  [(parallel
+    [(unspec_volatile [(match_operand:SI 0 "const_int_operand")] UNSPEC_SET_INTR)
+     (clobber (match_operand:SI 1 "spu_reg_operand"))
+     (clobber (mem:BLK (scratch)))])
+   (use (reg:SI 0))
+   (return)]
+  ""
+  [(use (reg:SI 0))
+   (parallel
+    [(unspec_volatile [(match_dup:SI 0)] UNSPEC_SET_INTR)
+     (return)])]
+  "")
+
+;; special purpose registers
+(define_insn "spu_fscrrd"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:V4SI [(const_int 6)] UNSPEC_FSCRRD))]
+  ""
+  "fscrrd\t%0"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_fscrwr"
+  [(unspec_volatile [(match_operand:V4SI 0 "spu_reg_operand" "r")] UNSPEC_FSCRWR)]
+  ""
+  "fscrwr\t$0,%0"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_mfspr"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_MFSPR))]
+  ""
+  "mfspr\t%0,$sp%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_mtspr"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J")
+	             (match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_MTSPR)]
+  ""
+  "mtspr\t$sp%0,%1"
+  [(set_attr "type" "spr")])
+
+;; channels
+(define_expand "spu_rdch"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "")
+        (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_RDCH))]
+  ""
+  "{
+    if (spu_safe_dma (INTVAL (operands[1])))
+      {
+        emit_insn (gen_spu_rdch_clobber (operands[0], operands[1]));
+        DONE;
+      }
+   }")
+
+(define_expand "spu_rchcnt"
+  [(set (match_operand:SI 0 "spu_reg_operand" "")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "")] UNSPEC_RCHCNT))]
+  ""
+  "{
+    if (spu_safe_dma (INTVAL (operands[1])))
+      {
+        emit_insn (gen_spu_rchcnt_clobber (operands[0], operands[1]));
+        DONE;
+      }
+   }")
+
+(define_expand "spu_wrch"
+   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "")
+ 	              (match_operand:V4SI 1 "spu_reg_operand" "")] UNSPEC_WRCH)]
+   ""
+  "{
+    if (spu_safe_dma (INTVAL (operands[0])))
+      {
+        emit_insn (gen_spu_wrch_clobber (operands[0], operands[1]));
+        DONE;
+      }
+   }")
+
+(define_insn "spu_rdch_noclobber"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RDCH))]
+  ""
+  "rdch\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_rchcnt_noclobber"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RCHCNT))]
+  ""
+  "rchcnt\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_wrch_noclobber"
+   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J")
+ 	              (match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_WRCH)]
+   ""
+   "wrch\t$ch%0,%1"
+   [(set_attr "type" "spr")])
+
+(define_insn "spu_rdch_clobber"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:V4SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RDCH))
+    (clobber (mem:BLK (scratch)))]
+  ""
+  "rdch\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_rchcnt_clobber"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "J")] UNSPEC_RCHCNT))
+    (clobber (mem:BLK (scratch)))]
+  ""
+  "rchcnt\t%0,$ch%1"
+  [(set_attr "type" "spr")])
+
+(define_insn "spu_wrch_clobber"
+   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "J")
+ 	              (match_operand:V4SI 1 "spu_reg_operand" "r")] UNSPEC_WRCH)
+    (clobber (mem:BLK (scratch)))]
+   ""
+   "wrch\t$ch%0,%1"
+   [(set_attr "type" "spr")])
+
+(define_expand "spu_splats" 
+  [(set (match_operand 0 "spu_reg_operand" "")
+        (vec_duplicate (match_operand 1 "spu_nonmem_operand" "")))]
+  ""
+  {
+    spu_builtin_splats(operands);
+    DONE;
+  })
+
+(define_expand "spu_extract"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(unspec [(match_operand 1 "spu_reg_operand" "")
+		 (match_operand 2 "spu_nonmem_operand" "")] 0))]
+  ""
+  {
+    spu_builtin_extract (operands);
+    DONE;
+  })
+
+(define_expand "spu_insert"
+  [(set (match_operand 0 "spu_reg_operand" "")
+        (unspec [(match_operand 1 "spu_reg_operand" "")
+                 (match_operand 2 "spu_reg_operand" "")
+                 (match_operand:SI 3 "spu_nonmem_operand" "")] 0))] 
+  ""
+  {
+    spu_builtin_insert(operands);
+    DONE;
+  })
+
+(define_expand "spu_promote"
+  [(set (match_operand 0 "spu_reg_operand" "")
+        (unspec [(match_operand 1 "spu_reg_operand" "")
+                 (match_operand:SI 2 "immediate_operand" "")] 0))] 
+  ""
+  {
+    spu_builtin_promote(operands);
+    DONE;
+  })
+
+;; Currently doing nothing with this but expanding its args.
+(define_expand "spu_align_hint"
+  [(unspec [(match_operand:SI 0 "address_operand" "")
+            (match_operand:SI 1 "immediate_operand" "")
+            (match_operand:SI 2 "immediate_operand" "")] 0)]
+  ""
+  {
+     DONE;
+  })
+
diff --git a/gcc/config/spu/spu-c.c b/gcc/config/spu/spu-c.c
new file mode 100644
index 000000000..905030d5e
--- /dev/null
+++ b/gcc/config/spu/spu-c.c
@@ -0,0 +1,234 @@
+/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-common.h"
+#include "c-family/c-pragma.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "target.h"
+
+
+/* Keep the vector keywords handy for fast comparisons.  */
+static GTY(()) tree __vector_keyword;
+static GTY(()) tree vector_keyword;
+
+static cpp_hashnode *
+spu_categorize_keyword (const cpp_token *tok)
+{
+  if (tok->type == CPP_NAME)
+    {
+      cpp_hashnode *ident = tok->val.node.node;
+
+      if (ident == C_CPP_HASHNODE (vector_keyword)
+	  || ident == C_CPP_HASHNODE (__vector_keyword))
+	return C_CPP_HASHNODE (__vector_keyword);
+      else
+	return ident;
+    }
+  return 0;
+}
+
+/* Called to decide whether a conditional macro should be expanded.
+   Since we have exactly one such macro (i.e, 'vector'), we do not
+   need to examine the 'tok' parameter.  */
+
+static cpp_hashnode *
+spu_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
+{
+  cpp_hashnode *expand_this = tok->val.node.node;
+  cpp_hashnode *ident;
+
+  ident = spu_categorize_keyword (tok);
+  if (ident == C_CPP_HASHNODE (__vector_keyword))
+    {
+      tok = cpp_peek_token (pfile, 0);
+      ident = spu_categorize_keyword (tok);
+
+      if (ident)
+	{
+	  enum rid rid_code = (enum rid)(ident->rid_code);
+	  if (ident->type == NT_MACRO)
+	    {
+	      (void) cpp_get_token (pfile);
+	      tok = cpp_peek_token (pfile, 0);
+	      ident = spu_categorize_keyword (tok);
+	      if (ident)
+		rid_code = (enum rid)(ident->rid_code);
+	    }
+	  
+	  if (rid_code == RID_UNSIGNED || rid_code == RID_LONG
+	      || rid_code == RID_SHORT || rid_code == RID_SIGNED
+	      || rid_code == RID_INT || rid_code == RID_CHAR
+	      || rid_code == RID_FLOAT || rid_code == RID_DOUBLE)
+	    expand_this = C_CPP_HASHNODE (__vector_keyword);
+	}
+    }
+  return expand_this;
+}
+
+/* target hook for resolve_overloaded_builtin(). Returns a function call
+   RTX if we can resolve the overloaded builtin */
+tree
+spu_resolve_overloaded_builtin (location_t loc, tree fndecl, void *passed_args)
+{
+#define SCALAR_TYPE_P(t) (INTEGRAL_TYPE_P (t) \
+			  || SCALAR_FLOAT_TYPE_P (t) \
+			  || POINTER_TYPE_P (t))
+  VEC(tree,gc) *fnargs = (VEC(tree,gc) *) passed_args;
+  unsigned int nargs = VEC_length (tree, fnargs);
+  int new_fcode, fcode = DECL_FUNCTION_CODE (fndecl);
+  struct spu_builtin_description *desc;
+  tree match = NULL_TREE;
+
+  /* The vector types are not available if the backend is not initialized.  */
+  gcc_assert (!flag_preprocess_only);
+
+  desc = &spu_builtins[fcode];
+  if (desc->type != B_OVERLOAD)
+    return NULL_TREE;
+
+  /* Compare the signature of each internal builtin function with the
+     function arguments until a match is found. */
+
+  for (new_fcode = fcode + 1; spu_builtins[new_fcode].type == B_INTERNAL;
+       new_fcode++)
+    {
+      tree decl = targetm.builtin_decl (new_fcode, true);
+      tree params = TYPE_ARG_TYPES (TREE_TYPE (decl));
+      tree param;
+      bool all_scalar;
+      unsigned int p;
+
+      /* Check whether all parameters are scalar.  */
+      all_scalar = true;
+      for (param = params; param != void_list_node; param = TREE_CHAIN (param))
+      if (!SCALAR_TYPE_P (TREE_VALUE (param)))
+	all_scalar = false;
+
+      for (param = params, p = 0;
+	   param != void_list_node;
+	   param = TREE_CHAIN (param), p++)
+	{
+	  tree var, arg_type, param_type = TREE_VALUE (param);
+
+	  if (p >= nargs)
+	    {
+	      error ("insufficient arguments to overloaded function %s",
+		     desc->name);
+	      return error_mark_node;
+	    }
+
+	  var = VEC_index (tree, fnargs, p);
+
+	  if (TREE_CODE (var) == NON_LVALUE_EXPR)
+	    var = TREE_OPERAND (var, 0);
+
+	  if (TREE_CODE (var) == ERROR_MARK)
+	    return NULL_TREE;	/* Let somebody else deal with the problem. */
+
+	  arg_type = TREE_TYPE (var);
+
+	  /* The intrinsics spec does not specify precisely how to
+	     resolve generic intrinsics.  We require an exact match
+	     for vector types and let C do it's usual parameter type
+	     checking/promotions for scalar arguments, except for the
+	     first argument of intrinsics which don't have a vector
+	     parameter. */
+	  if ((!SCALAR_TYPE_P (param_type)
+	       || !SCALAR_TYPE_P (arg_type)
+	       || (all_scalar && p == 0))
+	      && !lang_hooks.types_compatible_p (param_type, arg_type))
+	    break;
+	}
+      if (param == void_list_node)
+	{
+	  if (p != nargs)
+	    {
+	      error ("too many arguments to overloaded function %s",
+		     desc->name);
+	      return error_mark_node;
+	    }
+
+	  match = decl;
+	  break;
+	}
+    }
+
+  if (match == NULL_TREE)
+    {
+      error ("parameter list does not match a valid signature for %s()",
+	     desc->name);
+      return error_mark_node;
+    }
+
+  return build_function_call_vec (loc, match, fnargs, NULL);
+#undef SCALAR_TYPE_P
+}
+
+
+void
+spu_cpu_cpp_builtins (struct cpp_reader *pfile)
+{
+  cpp_define (pfile, "__SPU__");
+  cpp_assert (pfile, "cpu=spu");
+  cpp_assert (pfile, "machine=spu");
+  if (spu_arch == PROCESSOR_CELLEDP)
+    cpp_define (pfile, "__SPU_EDP__");
+  cpp_define (pfile, "__vector=__attribute__((__spu_vector__))");
+  switch (spu_ea_model)
+    {
+    case 32:
+      cpp_define (pfile, "__EA32__");
+      break;
+    case 64:
+      cpp_define (pfile, "__EA64__");
+      break;
+    default:
+       gcc_unreachable ();
+    }
+
+  if (!flag_iso)
+    {
+      /* Define this when supporting context-sensitive keywords.  */
+      cpp_define (pfile, "__VECTOR_KEYWORD_SUPPORTED__");
+      cpp_define (pfile, "vector=vector");
+
+      /* Initialize vector keywords.  */
+      __vector_keyword = get_identifier ("__vector");
+      C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
+      vector_keyword = get_identifier ("vector");
+      C_CPP_HASHNODE (vector_keyword)->flags |= NODE_CONDITIONAL;
+
+      /* Enable context-sensitive macros.  */
+      cpp_get_callbacks (pfile)->macro_to_expand = spu_macro_to_expand;
+    }
+}
+
+void
+spu_c_common_override_options (void)
+{ 
+  if (!TARGET_STD_MAIN)
+    {
+      /* Don't give warnings about the main() function.  */
+      warn_main = 0;
+    }
+}
diff --git a/gcc/config/spu/spu-elf.h b/gcc/config/spu/spu-elf.h
new file mode 100644
index 000000000..818c391f9
--- /dev/null
+++ b/gcc/config/spu/spu-elf.h
@@ -0,0 +1,80 @@
+/* Copyright (C) 2006, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef OBJECT_FORMAT_ELF
+ #error elf.h included before elfos.h
+#endif
+
+#define BSS_SECTION_ASM_OP "\t.section .bss"
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+            asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+
+/* The following macros define "native" directory locations; on the SPU,
+   these are used only when building the compiler with --with-sysroot.
+   This can be used to build a pair of PPU and SPU cross-compilers with
+   a common sysroot; the SPU compiler will search for its files in
+   ${sysroot}/include and ${sysroot}/lib.  */
+
+/* STANDARD_STARTFILE_PREFIX_1 is "/lib", which we keep.
+   STANDARD_STARTFILE_PREFIX_2 is "/usr/lib" -- we remove this.  */
+#undef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 ""
+
+/* Use "/include" instead of "/usr/include".  */
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR "/include"
+
+/* We do not provide any "/usr/local/include" directory on SPU.  */
+#undef LOCAL_INCLUDE_DIR
+
+/* Provide a STARTFILE_SPEC appropriate for GNU/Linux.  Here we add
+   the GNU/Linux magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+
+#undef  STARTFILE_SPEC 
+#define STARTFILE_SPEC "%{mstdmain: %{pg|p:gcrt2.o%s;:crt2.o%s}}\
+                        %{!mstdmain: %{pg|p:gcrt1.o%s;:crt1.o%s}}\
+                        crti.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC   "crtend.o%s crtn.o%s"
+
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#define SET_ASM_OP		"\t.set\t"
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION  default_elf_asm_named_section
+
+#define EH_FRAME_IN_DATA_SECTION 1
+
+#define LINK_SPEC "%{mlarge-mem: --defsym __stack=0xfffffff0 }"
+
+#define LIB_SPEC "-( %{!shared:%{g*:-lg}} -lc -lgloss -) \
+    %{mno-atomic-updates:-lgcc_cachemgr_nonatomic; :-lgcc_cachemgr} \
+    %{mcache-size=128:-lgcc_cache128k; \
+      mcache-size=64 :-lgcc_cache64k; \
+      mcache-size=32 :-lgcc_cache32k; \
+      mcache-size=16 :-lgcc_cache16k; \
+      mcache-size=8  :-lgcc_cache8k; \
+                     :-lgcc_cache64k}"
+
diff --git a/gcc/config/spu/spu-modes.def b/gcc/config/spu/spu-modes.def
new file mode 100644
index 000000000..ef2101259
--- /dev/null
+++ b/gcc/config/spu/spu-modes.def
@@ -0,0 +1,29 @@
+/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 2);        /*                 V2QI */ 
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+        
+        
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */ 
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */ 
+        
+/* cse_insn needs an INT_MODE larger than WORD_MODE, otherwise some
+   parts of it will go into an infinite loop. */
+INT_MODE (OI, 32);
diff --git a/gcc/config/spu/spu-protos.h b/gcc/config/spu/spu-protos.h
new file mode 100644
index 000000000..fa9453527
--- /dev/null
+++ b/gcc/config/spu/spu-protos.h
@@ -0,0 +1,96 @@
+/* Copyright (C) 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU_PROTOS_
+#define _SPU_PROTOS_
+
+extern void spu_cpu_cpp_builtins (struct cpp_reader * pfile);
+extern void builtin_define_std (const char *);
+extern void spu_c_common_override_options (void);
+extern int valid_subreg (rtx op);
+extern void spu_expand_extv (rtx * ops, int unsignedp);
+extern void spu_expand_insv (rtx * ops);
+extern int spu_expand_block_move (rtx * ops);
+extern void spu_emit_branch_or_set (int is_set, rtx cmp, rtx * operands);
+extern int spu_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
+extern HOST_WIDE_INT const_double_to_hwint (rtx x);
+extern void print_operand_address (FILE * file, register rtx addr);
+extern void print_operand (FILE * file, rtx x, int code);
+extern int spu_split_immediate (rtx * ops);
+extern int spu_saved_regs_size (void);
+extern int direct_return (void);
+extern void spu_expand_prologue (void);
+extern void spu_expand_epilogue (bool sibcall_p);
+extern rtx spu_return_addr (int count, rtx frame);
+
+#ifdef RTX_CODE
+extern rtx hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v);
+extern rtx spu_const (enum machine_mode mode, HOST_WIDE_INT val);
+extern rtx spu_const_from_ints (enum machine_mode mode, 
+			        int a, int b, int c, int d);
+extern struct rtx_def *spu_float_const (const char *string,
+					enum machine_mode mode);
+extern int immediate_load_p (rtx op, enum machine_mode mode);
+extern int logical_immediate_p (rtx op, enum machine_mode mode);
+extern int iohl_immediate_p (rtx op, enum machine_mode mode);
+extern int arith_immediate_p (rtx op, enum machine_mode mode,
+			      HOST_WIDE_INT low, HOST_WIDE_INT high);
+extern bool exp2_immediate_p (rtx op, enum machine_mode mode, int low,
+			      int high);
+extern int spu_constant_address_p (rtx x);
+extern int spu_legitimate_constant_p (rtx x);
+extern int spu_initial_elimination_offset (int from, int to);
+extern rtx spu_function_value (const_tree type, const_tree func);
+extern void spu_setup_incoming_varargs (int *cum, enum machine_mode mode,
+					tree type, int *pretend_size,
+					int no_rtl);
+extern int spu_expand_mov (rtx * ops, enum machine_mode mode);
+extern int spu_split_load (rtx * ops);
+extern int spu_split_store (rtx * ops);
+extern int fsmbi_const_p (rtx x);
+extern int cpat_const_p (rtx x, enum machine_mode mode);
+extern rtx gen_cpat_const (rtx * ops);
+extern void constant_to_array (enum machine_mode mode, rtx x,
+			       unsigned char *arr);
+extern rtx array_to_constant (enum machine_mode mode, const unsigned char *arr);
+extern rtx spu_gen_exp2 (enum machine_mode mode, rtx x);
+extern void spu_allocate_stack (rtx op0, rtx op1);
+extern void spu_restore_stack_nonlocal (rtx op0, rtx op1);
+extern void spu_restore_stack_block (rtx op0, rtx op1);
+extern rtx spu_gen_subreg (enum machine_mode mode, rtx x);
+extern int spu_safe_dma(HOST_WIDE_INT channel);
+extern void spu_builtin_splats (rtx ops[]);
+extern void spu_builtin_extract (rtx ops[]);
+extern void spu_builtin_insert (rtx ops[]);
+extern void spu_builtin_promote (rtx ops[]);
+extern void spu_expand_sign_extend (rtx ops[]);
+extern void spu_expand_vector_init (rtx target, rtx vals);
+#endif /* RTX_CODE  */
+
+extern void spu_init_expanders (void);
+extern void spu_split_convert (rtx *);
+extern void spu_function_profiler (FILE *, int);
+
+/* spu-c.c */
+extern tree spu_resolve_overloaded_builtin (location_t, tree fndecl,
+    					    void *fnargs);
+extern rtx spu_expand_builtin (tree exp, rtx target, rtx subtarget,
+			       enum machine_mode mode, int ignore);
+extern rtx spu_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+#endif /* _SPU_PROTOS_  */
+
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
new file mode 100644
index 000000000..dffca84b0
--- /dev/null
+++ b/gcc/config/spu/spu.c
@@ -0,0 +1,7180 @@
+/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "output.h"
+#include "basic-block.h"
+#include "integrate.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "hashtab.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "reload.h"
+#include "cfglayout.h"
+#include "sched-int.h"
+#include "params.h"
+#include "machmode.h"
+#include "gimple.h"
+#include "tm-constrs.h"
+#include "ddg.h"
+#include "sbitmap.h"
+#include "timevar.h"
+#include "df.h"
+
+/* Builtin types, data and prototypes. */
+
+enum spu_builtin_type_index
+{
+  SPU_BTI_END_OF_PARAMS,
+
+  /* We create new type nodes for these. */
+  SPU_BTI_V16QI,
+  SPU_BTI_V8HI,
+  SPU_BTI_V4SI,
+  SPU_BTI_V2DI,
+  SPU_BTI_V4SF,
+  SPU_BTI_V2DF,
+  SPU_BTI_UV16QI,
+  SPU_BTI_UV8HI,
+  SPU_BTI_UV4SI,
+  SPU_BTI_UV2DI,
+
+  /* A 16-byte type. (Implemented with V16QI_type_node) */
+  SPU_BTI_QUADWORD,
+
+  /* These all correspond to intSI_type_node */
+  SPU_BTI_7,
+  SPU_BTI_S7,
+  SPU_BTI_U7,
+  SPU_BTI_S10,
+  SPU_BTI_S10_4,
+  SPU_BTI_U14,
+  SPU_BTI_16,
+  SPU_BTI_S16,
+  SPU_BTI_S16_2,
+  SPU_BTI_U16,
+  SPU_BTI_U16_2,
+  SPU_BTI_U18,
+
+  /* These correspond to the standard types */
+  SPU_BTI_INTQI, 
+  SPU_BTI_INTHI, 
+  SPU_BTI_INTSI, 
+  SPU_BTI_INTDI, 
+
+  SPU_BTI_UINTQI,
+  SPU_BTI_UINTHI,
+  SPU_BTI_UINTSI,
+  SPU_BTI_UINTDI,
+
+  SPU_BTI_FLOAT, 
+  SPU_BTI_DOUBLE,
+
+  SPU_BTI_VOID,   
+  SPU_BTI_PTR,   
+
+  SPU_BTI_MAX
+};
+
+#define V16QI_type_node               (spu_builtin_types[SPU_BTI_V16QI])
+#define V8HI_type_node                (spu_builtin_types[SPU_BTI_V8HI])
+#define V4SI_type_node                (spu_builtin_types[SPU_BTI_V4SI])
+#define V2DI_type_node                (spu_builtin_types[SPU_BTI_V2DI])
+#define V4SF_type_node                (spu_builtin_types[SPU_BTI_V4SF])
+#define V2DF_type_node                (spu_builtin_types[SPU_BTI_V2DF])
+#define unsigned_V16QI_type_node      (spu_builtin_types[SPU_BTI_UV16QI])
+#define unsigned_V8HI_type_node       (spu_builtin_types[SPU_BTI_UV8HI])
+#define unsigned_V4SI_type_node       (spu_builtin_types[SPU_BTI_UV4SI])
+#define unsigned_V2DI_type_node       (spu_builtin_types[SPU_BTI_UV2DI])
+
+static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
+
+struct spu_builtin_range
+{
+  int low, high;
+};
+
+static struct spu_builtin_range spu_builtin_range[] = {
+  {-0x40ll, 0x7fll},		/* SPU_BTI_7     */
+  {-0x40ll, 0x3fll},		/* SPU_BTI_S7    */
+  {0ll, 0x7fll},		/* SPU_BTI_U7    */
+  {-0x200ll, 0x1ffll},		/* SPU_BTI_S10   */
+  {-0x2000ll, 0x1fffll},	/* SPU_BTI_S10_4 */
+  {0ll, 0x3fffll},		/* SPU_BTI_U14   */
+  {-0x8000ll, 0xffffll},	/* SPU_BTI_16    */
+  {-0x8000ll, 0x7fffll},	/* SPU_BTI_S16   */
+  {-0x20000ll, 0x1ffffll},	/* SPU_BTI_S16_2 */
+  {0ll, 0xffffll},		/* SPU_BTI_U16   */
+  {0ll, 0x3ffffll},		/* SPU_BTI_U16_2 */
+  {0ll, 0x3ffffll},		/* SPU_BTI_U18   */
+};
+
+
+/*  Target specific attribute specifications.  */
+char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
+
+/*  Prototypes and external defs.  */
+static void spu_option_override (void);
+static void spu_option_init_struct (struct gcc_options *opts);
+static void spu_option_default_params (void);
+static void spu_init_builtins (void);
+static tree spu_builtin_decl (unsigned, bool);
+static bool spu_scalar_mode_supported_p (enum machine_mode mode);
+static bool spu_vector_mode_supported_p (enum machine_mode mode);
+static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
+						 bool, addr_space_t);
+static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
+static rtx get_pic_reg (void);
+static int need_to_save_reg (int regno, int saving);
+static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
+static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
+static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
+			       rtx scratch);
+static void emit_nop_for_insn (rtx insn);
+static bool insn_clobbers_hbr (rtx insn);
+static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
+				  int distance, sbitmap blocks);
+static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
+	                            enum machine_mode dmode);
+static rtx get_branch_target (rtx branch);
+static void spu_machine_dependent_reorg (void);
+static int spu_sched_issue_rate (void);
+static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
+				     int can_issue_more);
+static int get_pipe (rtx insn);
+static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
+static void spu_sched_init_global (FILE *, int, int);
+static void spu_sched_init (FILE *, int, int);
+static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
+static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
+					 int flags,
+					 bool *no_add_attrs);
+static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
+					 int flags,
+					 bool *no_add_attrs);
+static int spu_naked_function_p (tree func);
+static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+				   const_tree type, bool named);
+static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     const_tree type, bool named);
+static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+				      const_tree type, bool named);
+static tree spu_build_builtin_va_list (void);
+static void spu_va_start (tree, rtx);
+static tree spu_gimplify_va_arg_expr (tree valist, tree type,
+				      gimple_seq * pre_p, gimple_seq * post_p);
+static int store_with_one_insn_p (rtx mem);
+static int mem_is_padded_component_ref (rtx x);
+static int reg_aligned_for_addr (rtx x);
+static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
+static void spu_asm_globalize_label (FILE * file, const char *name);
+static bool spu_rtx_costs (rtx x, int code, int outer_code,
+			   int *total, bool speed);
+static bool spu_function_ok_for_sibcall (tree decl, tree exp);
+static void spu_init_libfuncs (void);
+static bool spu_return_in_memory (const_tree type, const_tree fntype);
+static void fix_range (const char *);
+static void spu_encode_section_info (tree, rtx, int);
+static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
+					      addr_space_t);
+static tree spu_builtin_mul_widen_even (tree);
+static tree spu_builtin_mul_widen_odd (tree);
+static tree spu_builtin_mask_for_load (void);
+static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
+static bool spu_vector_alignment_reachable (const_tree, bool);
+static tree spu_builtin_vec_perm (tree, tree *);
+static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
+static enum machine_mode spu_addr_space_address_mode (addr_space_t);
+static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
+static rtx spu_addr_space_convert (rtx, tree, tree);
+static int spu_sms_res_mii (struct ddg *g);
+static unsigned int spu_section_type_flags (tree, const char *, int);
+static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
+static void spu_unique_section (tree, int);
+static rtx spu_expand_load (rtx, rtx, rtx, int);
+static void spu_trampoline_init (rtx, tree, rtx);
+static void spu_conditional_register_usage (void);
+static bool spu_ref_may_alias_errno (ao_ref *);
+
+/* Which instruction set architecture to use.  */
+int spu_arch;
+/* Which cpu are we tuning for.  */
+int spu_tune;
+
+/* The hardware requires 8 insns between a hint and the branch it
+   effects.  This variable describes how many rtl instructions the
+   compiler needs to see before inserting a hint, and then the compiler
+   will insert enough nops to make it at least 8 insns.  The default is
+   for the compiler to allow up to 2 nops be emitted.  The nops are
+   inserted in pairs, so we round down. */
+int spu_hint_dist = (8*4) - (2*4);
+
+enum spu_immediate {
+  SPU_NONE,
+  SPU_IL,
+  SPU_ILA,
+  SPU_ILH,
+  SPU_ILHU,
+  SPU_ORI,
+  SPU_ORHI,
+  SPU_ORBI,
+  SPU_IOHL
+};
+enum immediate_class
+{
+  IC_POOL,			/* constant pool */
+  IC_IL1,			/* one il* instruction */
+  IC_IL2,			/* both ilhu and iohl instructions */
+  IC_IL1s,			/* one il* instruction */
+  IC_IL2s,			/* both ilhu and iohl instructions */
+  IC_FSMBI,			/* the fsmbi instruction */
+  IC_CPAT,			/* one of the c*d instructions */
+  IC_FSMBI2			/* fsmbi plus 1 other instruction */
+};
+
+static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
+static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
+static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
+static enum immediate_class classify_immediate (rtx op,
+						enum machine_mode mode);
+
+static enum machine_mode spu_unwind_word_mode (void);
+
+static enum machine_mode
+spu_libgcc_cmp_return_mode (void);
+
+static enum machine_mode
+spu_libgcc_shift_count_mode (void);
+
+/* Pointer mode for __ea references.  */
+#define EAmode (spu_ea_model != 32 ? DImode : SImode)
+
+
+/*  Table of machine attributes.  */
+static const struct attribute_spec spu_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute },
+  { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute },
+  { NULL,             0, 0, false, false, false, NULL }
+};
+
+/*  TARGET overrides.  */
+
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
+
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
+
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+  spu_addr_space_legitimate_address_p
+
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
+
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS spu_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL spu_builtin_decl
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN spu_expand_builtin
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
+
+/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
+   and .quad for the debugger.  When it is known that the assembler is fixed,
+   these can be removed.  */
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
+
+/* The .8byte directive doesn't seem to work well for a 32 bit
+   architecture. */
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP NULL
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS spu_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT spu_sched_init
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER spu_sched_reorder
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 spu_sched_reorder
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
+
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER spu_assemble_integer
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG spu_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
+
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
+
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
+
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
+
+#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
+#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
+
+#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
+#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
+
+#undef TARGET_SCHED_SMS_RES_MII
+#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  spu_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE spu_option_override
+
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
+
+#undef TARGET_OPTION_DEFAULT_PARAMS
+#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO  sjlj_except_unwind_info
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
+
+#undef TARGET_REF_MAY_ALIAS_ERRNO
+#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+static void
+spu_option_init_struct (struct gcc_options *opts)
+{
+  /* With so many registers this is better on by default. */
+  opts->x_flag_rename_registers = 1;
+}
+
+/* Implement TARGET_OPTION_DEFAULT_PARAMS.  */
+static void
+spu_option_default_params (void)
+{
+  /* Override some of the default param values.  With so many registers
+     larger values are better for these params.  */
+  set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
+}
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+static void
+spu_option_override (void)
+{
+  /* Small loops will be unpeeled at -O3.  For SPU it is more important
+     to keep code small by default.  */
+  if (!flag_unroll_loops && !flag_peel_loops)
+    maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
+			   global_options.x_param_values,
+			   global_options_set.x_param_values);
+
+  flag_omit_frame_pointer = 1;
+
+  /* Functions must be 8 byte aligned so we correctly handle dual issue */
+  if (align_functions < 8)
+    align_functions = 8;
+
+  spu_hint_dist = 8*4 - spu_max_nops*4;
+  if (spu_hint_dist < 0) 
+    spu_hint_dist = 0;
+
+  if (spu_fixed_range_string)
+    fix_range (spu_fixed_range_string);
+
+  /* Determine processor architectural level.  */
+  if (spu_arch_string)
+    {
+      if (strcmp (&spu_arch_string[0], "cell") == 0)
+        spu_arch = PROCESSOR_CELL;
+      else if (strcmp (&spu_arch_string[0], "celledp") == 0)
+        spu_arch = PROCESSOR_CELLEDP;
+      else
+        error ("bad value (%s) for -march= switch", spu_arch_string);
+    }
+
+  /* Determine processor to tune for.  */
+  if (spu_tune_string)
+    {
+      if (strcmp (&spu_tune_string[0], "cell") == 0)
+        spu_tune = PROCESSOR_CELL;
+      else if (strcmp (&spu_tune_string[0], "celledp") == 0)
+        spu_tune = PROCESSOR_CELLEDP;
+      else
+        error ("bad value (%s) for -mtune= switch", spu_tune_string);
+    }
+
+  /* Change defaults according to the processor architecture.  */
+  if (spu_arch == PROCESSOR_CELLEDP)
+    {
+      /* If no command line option has been otherwise specified, change
+	 the default to -mno-safe-hints on celledp -- only the original
+	 Cell/B.E. processors require this workaround.  */
+      if (!(target_flags_explicit & MASK_SAFE_HINTS))
+	target_flags &= ~MASK_SAFE_HINTS;
+    }
+
+  REAL_MODE_FORMAT (SFmode) = &spu_single_format;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+
+/* True if MODE is valid for the target.  By "valid", we mean able to
+   be manipulated in non-trivial ways.  In particular, this means all
+   the arithmetic is supported.  */
+static bool
+spu_scalar_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+    case SFmode:
+    case DImode:
+    case TImode:
+    case DFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Similarly for vector modes.  "Supported" here is less strict.  At
+   least some operations are supported; need to check optabs or builtins
+   for further details.  */
+static bool
+spu_vector_mode_supported_p (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+    case V4SFmode:
+    case V2DFmode:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
+   least significant bytes of the outer mode.  This function returns
+   TRUE for the SUBREG's where this is correct.  */
+int
+valid_subreg (rtx op)
+{
+  enum machine_mode om = GET_MODE (op);
+  enum machine_mode im = GET_MODE (SUBREG_REG (op));
+  return om != VOIDmode && im != VOIDmode
+    && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
+	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
+	|| (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
+}
+
+/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
+   and adjust the start offset.  */
+static rtx
+adjust_operand (rtx op, HOST_WIDE_INT * start)
+{
+  enum machine_mode mode;
+  int op_size;
+  /* Strip any paradoxical SUBREG.  */
+  if (GET_CODE (op) == SUBREG
+      && (GET_MODE_BITSIZE (GET_MODE (op))
+	  > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
+    {
+      if (start)
+	*start -=
+	  GET_MODE_BITSIZE (GET_MODE (op)) -
+	  GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
+      op = SUBREG_REG (op);
+    }
+  /* If it is smaller than SI, assure a SUBREG */
+  op_size = GET_MODE_BITSIZE (GET_MODE (op));
+  if (op_size < 32)
+    {
+      if (start)
+	*start += 32 - op_size;
+      op_size = 32;
+    }
+  /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
+  mode = mode_for_size (op_size, MODE_INT, 0);
+  if (mode != GET_MODE (op))
+    op = gen_rtx_SUBREG (mode, op, 0);
+  return op;
+}
+
+void
+spu_expand_extv (rtx ops[], int unsignedp)
+{
+  rtx dst = ops[0], src = ops[1];
+  HOST_WIDE_INT width = INTVAL (ops[2]);
+  HOST_WIDE_INT start = INTVAL (ops[3]);
+  HOST_WIDE_INT align_mask;
+  rtx s0, s1, mask, r0;
+
+  gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
+
+  if (MEM_P (src))
+    {
+      /* First, determine if we need 1 TImode load or 2.  We need only 1
+         if the bits being extracted do not cross the alignment boundary
+         as determined by the MEM and its address. */
+
+      align_mask = -MEM_ALIGN (src);
+      if ((start & align_mask) == ((start + width - 1) & align_mask))
+	{
+	  /* Alignment is sufficient for 1 load. */
+	  s0 = gen_reg_rtx (TImode);
+	  r0 = spu_expand_load (s0, 0, src, start / 8);
+	  start &= 7;
+	  if (r0)
+	    emit_insn (gen_rotqby_ti (s0, s0, r0));
+	}
+      else
+	{
+	  /* Need 2 loads. */
+	  s0 = gen_reg_rtx (TImode);
+	  s1 = gen_reg_rtx (TImode);
+	  r0 = spu_expand_load (s0, s1, src, start / 8);
+	  start &= 7;
+
+	  gcc_assert (start + width <= 128);
+	  if (r0)
+	    {
+	      rtx r1 = gen_reg_rtx (SImode);
+	      mask = gen_reg_rtx (TImode);
+	      emit_move_insn (mask, GEN_INT (-1));
+	      emit_insn (gen_rotqby_ti (s0, s0, r0));
+	      emit_insn (gen_rotqby_ti (s1, s1, r0));
+	      if (GET_CODE (r0) == CONST_INT)
+		r1 = GEN_INT (INTVAL (r0) & 15);
+	      else
+		emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
+	      emit_insn (gen_shlqby_ti (mask, mask, r1));
+	      emit_insn (gen_selb (s0, s1, s0, mask));
+	    }
+	}
+
+    }
+  else if (GET_CODE (src) == SUBREG)
+    {
+      rtx r = SUBREG_REG (src);
+      gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
+      s0 = gen_reg_rtx (TImode);
+      if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
+	emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
+      else
+	emit_move_insn (s0, src);
+    }
+  else 
+    {
+      gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
+      s0 = gen_reg_rtx (TImode);
+      emit_move_insn (s0, src);
+    }
+
+  /* Now s0 is TImode and contains the bits to extract at start. */
+
+  if (start)
+    emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
+
+  if (128 - width)
+    {
+      tree c = build_int_cst (NULL_TREE, 128 - width);
+      s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
+    }
+
+  emit_move_insn (dst, s0);
+}
+
+void
+spu_expand_insv (rtx ops[])
+{
+  HOST_WIDE_INT width = INTVAL (ops[1]);
+  HOST_WIDE_INT start = INTVAL (ops[2]);
+  HOST_WIDE_INT maskbits;
+  enum machine_mode dst_mode;
+  rtx dst = ops[0], src = ops[3];
+  int dst_size;
+  rtx mask;
+  rtx shift_reg;
+  int shift;
+
+
+  if (GET_CODE (ops[0]) == MEM)
+    dst = gen_reg_rtx (TImode);
+  else
+    dst = adjust_operand (dst, &start);
+  dst_mode = GET_MODE (dst);
+  dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
+
+  if (CONSTANT_P (src))
+    {
+      enum machine_mode m =
+	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
+      src = force_reg (m, convert_to_mode (m, src, 0));
+    }
+  src = adjust_operand (src, 0);
+
+  mask = gen_reg_rtx (dst_mode);
+  shift_reg = gen_reg_rtx (dst_mode);
+  shift = dst_size - start - width;
+
+  /* It's not safe to use subreg here because the compiler assumes
+     that the SUBREG_REG is right justified in the SUBREG. */
+  convert_move (shift_reg, src, 1);
+
+  if (shift > 0)
+    {
+      switch (dst_mode)
+	{
+	case SImode:
+	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
+	  break;
+	case DImode:
+	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
+	  break;
+	case TImode:
+	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  else if (shift < 0)
+    abort ();
+
+  switch (dst_size)
+    {
+    case 32:
+      maskbits = (-1ll << (32 - width - start));
+      if (start)
+	maskbits += (1ll << (32 - start));
+      emit_move_insn (mask, GEN_INT (maskbits));
+      break;
+    case 64:
+      maskbits = (-1ll << (64 - width - start));
+      if (start)
+	maskbits += (1ll << (64 - start));
+      emit_move_insn (mask, GEN_INT (maskbits));
+      break;
+    case 128:
+      {
+	unsigned char arr[16];
+	int i = start / 8;
+	memset (arr, 0, sizeof (arr));
+	arr[i] = 0xff >> (start & 7);
+	for (i++; i <= (start + width - 1) / 8; i++)
+	  arr[i] = 0xff;
+	arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
+	emit_move_insn (mask, array_to_constant (TImode, arr));
+      }
+      break;
+    default:
+      abort ();
+    }
+  if (GET_CODE (ops[0]) == MEM)
+    {
+      rtx low = gen_reg_rtx (SImode);
+      rtx rotl = gen_reg_rtx (SImode);
+      rtx mask0 = gen_reg_rtx (TImode);
+      rtx addr;
+      rtx addr0;
+      rtx addr1;
+      rtx mem;
+
+      addr = force_reg (Pmode, XEXP (ops[0], 0));
+      addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
+      emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
+      emit_insn (gen_negsi2 (rotl, low));
+      emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
+      emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
+      mem = change_address (ops[0], TImode, addr0);
+      set_mem_alias_set (mem, 0);
+      emit_move_insn (dst, mem);
+      emit_insn (gen_selb (dst, dst, shift_reg, mask0));
+      if (start + width > MEM_ALIGN (ops[0]))
+	{
+	  rtx shl = gen_reg_rtx (SImode);
+	  rtx mask1 = gen_reg_rtx (TImode);
+	  rtx dst1 = gen_reg_rtx (TImode);
+	  rtx mem1;
+	  addr1 = plus_constant (addr, 16);
+	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
+	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
+	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
+	  mem1 = change_address (ops[0], TImode, addr1);
+	  set_mem_alias_set (mem1, 0);
+	  emit_move_insn (dst1, mem1);
+	  emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
+	  emit_move_insn (mem1, dst1);
+	}
+      emit_move_insn (mem, dst);
+    }
+  else
+    emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
+}
+
+
+int
+spu_expand_block_move (rtx ops[])
+{
+  HOST_WIDE_INT bytes, align, offset;
+  rtx src, dst, sreg, dreg, target;
+  int i;
+  if (GET_CODE (ops[2]) != CONST_INT
+      || GET_CODE (ops[3]) != CONST_INT
+      || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
+    return 0;
+
+  bytes = INTVAL (ops[2]);
+  align = INTVAL (ops[3]);
+
+  if (bytes <= 0)
+    return 1;
+
+  dst = ops[0];
+  src = ops[1];
+
+  if (align == 16)
+    {
+      for (offset = 0; offset + 16 <= bytes; offset += 16)
+	{
+	  dst = adjust_address (ops[0], V16QImode, offset);
+	  src = adjust_address (ops[1], V16QImode, offset);
+	  emit_move_insn (dst, src);
+	}
+      if (offset < bytes)
+	{
+	  rtx mask;
+	  unsigned char arr[16] = { 0 };
+	  for (i = 0; i < bytes - offset; i++)
+	    arr[i] = 0xff;
+	  dst = adjust_address (ops[0], V16QImode, offset);
+	  src = adjust_address (ops[1], V16QImode, offset);
+	  mask = gen_reg_rtx (V16QImode);
+	  sreg = gen_reg_rtx (V16QImode);
+	  dreg = gen_reg_rtx (V16QImode);
+	  target = gen_reg_rtx (V16QImode);
+	  emit_move_insn (mask, array_to_constant (V16QImode, arr));
+	  emit_move_insn (dreg, dst);
+	  emit_move_insn (sreg, src);
+	  emit_insn (gen_selb (target, dreg, sreg, mask));
+	  emit_move_insn (dst, target);
+	}
+      return 1;
+    }
+  return 0;
+}
+
+enum spu_comp_code
+{ SPU_EQ, SPU_GT, SPU_GTU };
+
+int spu_comp_icode[12][3] = {
+ {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
+ {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
+ {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
+ {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
+ {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
+ {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
+ {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
+ {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
+ {CODE_FOR_ceq_v8hi,  CODE_FOR_cgt_v8hi,  CODE_FOR_clgt_v8hi},
+ {CODE_FOR_ceq_v4si,  CODE_FOR_cgt_v4si,  CODE_FOR_clgt_v4si},
+ {CODE_FOR_ceq_v4sf,  CODE_FOR_cgt_v4sf, 0},
+ {CODE_FOR_ceq_v2df,  CODE_FOR_cgt_v2df, 0},
+};
+
+/* Generate a compare for CODE.  Return a brand-new rtx that represents
+   the result of the compare.   GCC can figure this out too if we don't
+   provide all variations of compares, but GCC always wants to use
+   WORD_MODE, we can generate better code in most cases if we do it
+   ourselves.  */
+void
+spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
+{
+  int reverse_compare = 0;
+  int reverse_test = 0;
+  rtx compare_result, eq_result;
+  rtx comp_rtx, eq_rtx;
+  enum machine_mode comp_mode;
+  enum machine_mode op_mode;
+  enum spu_comp_code scode, eq_code;
+  enum insn_code ior_code;
+  enum rtx_code code = GET_CODE (cmp);
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+  int index;
+  int eq_test = 0;
+
+  /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
+     and so on, to keep the constant in operand 1. */
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (op1) - 1;
+      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
+	switch (code)
+	  {
+	  case GE:
+	    op1 = GEN_INT (val);
+	    code = GT;
+	    break;
+	  case LT:
+	    op1 = GEN_INT (val);
+	    code = LE;
+	    break;
+	  case GEU:
+	    op1 = GEN_INT (val);
+	    code = GTU;
+	    break;
+	  case LTU:
+	    op1 = GEN_INT (val);
+	    code = LEU;
+	    break;
+	  default:
+	    break;
+	  }
+    }
+
+  comp_mode = SImode;
+  op_mode = GET_MODE (op0);
+
+  switch (code)
+    {
+    case GE:
+      scode = SPU_GT;
+      if (HONOR_NANS (op_mode))
+	{
+	  reverse_compare = 0;
+	  reverse_test = 0;
+	  eq_test = 1;
+	  eq_code = SPU_EQ;
+	}
+      else
+	{
+	  reverse_compare = 1;
+	  reverse_test = 1;
+	}
+      break;
+    case LE:
+      scode = SPU_GT;
+      if (HONOR_NANS (op_mode))
+	{
+	  reverse_compare = 1;
+	  reverse_test = 0;
+	  eq_test = 1;
+	  eq_code = SPU_EQ;
+	}
+      else
+	{
+	  reverse_compare = 0;
+	  reverse_test = 1;
+	}
+      break;
+    case LT:
+      reverse_compare = 1;
+      reverse_test = 0;
+      scode = SPU_GT;
+      break;
+    case GEU:
+      reverse_compare = 1;
+      reverse_test = 1;
+      scode = SPU_GTU;
+      break;
+    case LEU:
+      reverse_compare = 0;
+      reverse_test = 1;
+      scode = SPU_GTU;
+      break;
+    case LTU:
+      reverse_compare = 1;
+      reverse_test = 0;
+      scode = SPU_GTU;
+      break;
+    case NE:
+      reverse_compare = 0;
+      reverse_test = 1;
+      scode = SPU_EQ;
+      break;
+
+    case EQ:
+      scode = SPU_EQ;
+      break;
+    case GT:
+      scode = SPU_GT;
+      break;
+    case GTU:
+      scode = SPU_GTU;
+      break;
+    default:
+      scode = SPU_EQ;
+      break;
+    }
+
+  switch (op_mode)
+    {
+    case QImode:
+      index = 0;
+      comp_mode = QImode;
+      break;
+    case HImode:
+      index = 1;
+      comp_mode = HImode;
+      break;
+    case SImode:
+      index = 2;
+      break;
+    case DImode:
+      index = 3;
+      break;
+    case TImode:
+      index = 4;
+      break;
+    case SFmode:
+      index = 5;
+      break;
+    case DFmode:
+      index = 6;
+      break;
+    case V16QImode:
+      index = 7;
+      comp_mode = op_mode;
+      break;
+    case V8HImode:
+      index = 8;
+      comp_mode = op_mode;
+      break;
+    case V4SImode:
+      index = 9;
+      comp_mode = op_mode;
+      break;
+    case V4SFmode:
+      index = 10;
+      comp_mode = V4SImode;
+      break;
+    case V2DFmode:
+      index = 11;
+      comp_mode = V2DImode;
+      break;
+    case V2DImode:
+    default:
+      abort ();
+    }
+
+  if (GET_MODE (op1) == DFmode
+      && (scode != SPU_GT && scode != SPU_EQ))
+    abort ();
+
+  if (is_set == 0 && op1 == const0_rtx
+      && (GET_MODE (op0) == SImode
+	  || GET_MODE (op0) == HImode) && scode == SPU_EQ)
+    {
+      /* Don't need to set a register with the result when we are 
+         comparing against zero and branching. */
+      reverse_test = !reverse_test;
+      compare_result = op0;
+    }
+  else
+    {
+      compare_result = gen_reg_rtx (comp_mode);
+
+      if (reverse_compare)
+	{
+	  rtx t = op1;
+	  op1 = op0;
+	  op0 = t;
+	}
+
+      if (spu_comp_icode[index][scode] == 0)
+	abort ();
+
+      if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
+	  (op0, op_mode))
+	op0 = force_reg (op_mode, op0);
+      if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
+	  (op1, op_mode))
+	op1 = force_reg (op_mode, op1);
+      comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
+							 op0, op1);
+      if (comp_rtx == 0)
+	abort ();
+      emit_insn (comp_rtx);
+
+      if (eq_test)
+        {
+          eq_result = gen_reg_rtx (comp_mode);
+          eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
+							     op0, op1);
+          if (eq_rtx == 0)
+	    abort ();
+          emit_insn (eq_rtx);
+          ior_code = optab_handler (ior_optab, comp_mode);
+          gcc_assert (ior_code != CODE_FOR_nothing);
+          emit_insn (GEN_FCN (ior_code)
+		     (compare_result, compare_result, eq_result));
+        }
+    }
+
+  if (is_set == 0)
+    {
+      rtx bcomp;
+      rtx loc_ref;
+
+      /* We don't have branch on QI compare insns, so we convert the
+         QI compare result to a HI result. */
+      if (comp_mode == QImode)
+	{
+	  rtx old_res = compare_result;
+	  compare_result = gen_reg_rtx (HImode);
+	  comp_mode = HImode;
+	  emit_insn (gen_extendqihi2 (compare_result, old_res));
+	}
+
+      if (reverse_test)
+	bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
+      else
+	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
+
+      loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+							 loc_ref, pc_rtx)));
+    }
+  else if (is_set == 2)
+    {
+      rtx target = operands[0];
+      int compare_size = GET_MODE_BITSIZE (comp_mode);
+      int target_size = GET_MODE_BITSIZE (GET_MODE (target));
+      enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
+      rtx select_mask;
+      rtx op_t = operands[2];
+      rtx op_f = operands[3];
+
+      /* The result of the comparison can be SI, HI or QI mode.  Create a
+         mask based on that result. */
+      if (target_size > compare_size)
+	{
+	  select_mask = gen_reg_rtx (mode);
+	  emit_insn (gen_extend_compare (select_mask, compare_result));
+	}
+      else if (target_size < compare_size)
+	select_mask =
+	  gen_rtx_SUBREG (mode, compare_result,
+			  (compare_size - target_size) / BITS_PER_UNIT);
+      else if (comp_mode != mode)
+	select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
+      else
+	select_mask = compare_result;
+
+      if (GET_MODE (target) != GET_MODE (op_t)
+	  || GET_MODE (target) != GET_MODE (op_f))
+	abort ();
+
+      if (reverse_test)
+	emit_insn (gen_selb (target, op_t, op_f, select_mask));
+      else
+	emit_insn (gen_selb (target, op_f, op_t, select_mask));
+    }
+  else
+    {
+      rtx target = operands[0];
+      if (reverse_test)
+	emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+				gen_rtx_NOT (comp_mode, compare_result)));
+      if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
+	emit_insn (gen_extendhisi2 (target, compare_result));
+      else if (GET_MODE (target) == SImode
+	       && GET_MODE (compare_result) == QImode)
+	emit_insn (gen_extend_compare (target, compare_result));
+      else
+	emit_move_insn (target, compare_result);
+    }
+}
+
+HOST_WIDE_INT
+const_double_to_hwint (rtx x)
+{
+  HOST_WIDE_INT val;
+  REAL_VALUE_TYPE rv;
+  if (GET_MODE (x) == SFmode)
+    {
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
+    }
+  else if (GET_MODE (x) == DFmode)
+    {
+      long l[2];
+      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+      REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+      val = l[0];
+      val = (val << 32) | (l[1] & 0xffffffff);
+    }
+  else
+    abort ();
+  return val;
+}
+
+rtx
+hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
+{
+  long tv[2];
+  REAL_VALUE_TYPE rv;
+  gcc_assert (mode == SFmode || mode == DFmode);
+
+  if (mode == SFmode)
+    tv[0] = (v << 32) >> 32;
+  else if (mode == DFmode)
+    {
+      tv[1] = (v << 32) >> 32;
+      tv[0] = v >> 32;
+    }
+  real_from_target (&rv, tv, mode);
+  return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
+}
+
+void
+print_operand_address (FILE * file, register rtx addr)
+{
+  rtx reg;
+  rtx offset;
+
+  if (GET_CODE (addr) == AND
+      && GET_CODE (XEXP (addr, 1)) == CONST_INT
+      && INTVAL (XEXP (addr, 1)) == -16)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
+      break;
+
+    case PLUS:
+      reg = XEXP (addr, 0);
+      offset = XEXP (addr, 1);
+      if (GET_CODE (offset) == REG)
+	{
+	  fprintf (file, "%s,%s", reg_names[REGNO (reg)],
+		   reg_names[REGNO (offset)]);
+	}
+      else if (GET_CODE (offset) == CONST_INT)
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
+		   INTVAL (offset), reg_names[REGNO (reg)]);
+	}
+      else
+	abort ();
+      break;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_INT:
+      output_addr_const (file, addr);
+      break;
+
+    default:
+      debug_rtx (addr);
+      abort ();
+    }
+}
+
+void
+print_operand (FILE * file, rtx x, int code)
+{
+  enum machine_mode mode = GET_MODE (x);
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int xcode = GET_CODE (x);
+  int i, info;
+  if (GET_MODE (x) == VOIDmode)
+    switch (code)
+      {
+      case 'L':			/* 128 bits, signed */
+      case 'm':			/* 128 bits, signed */
+      case 'T':			/* 128 bits, signed */
+      case 't':			/* 128 bits, signed */
+	mode = TImode;
+	break;
+      case 'K':			/* 64 bits, signed */
+      case 'k':			/* 64 bits, signed */
+      case 'D':			/* 64 bits, signed */
+      case 'd':			/* 64 bits, signed */
+	mode = DImode;
+	break;
+      case 'J':			/* 32 bits, signed */
+      case 'j':			/* 32 bits, signed */
+      case 's':			/* 32 bits, signed */
+      case 'S':			/* 32 bits, signed */
+	mode = SImode;
+	break;
+      }
+  switch (code)
+    {
+
+    case 'j':			/* 32 bits, signed */
+    case 'k':			/* 64 bits, signed */
+    case 'm':			/* 128 bits, signed */
+      if (xcode == CONST_INT
+	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
+	{
+	  gcc_assert (logical_immediate_p (x, mode));
+	  constant_to_array (mode, x, arr);
+	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	  val = trunc_int_for_mode (val, SImode);
+	  switch (which_logical_immediate (val))
+	  {
+	  case SPU_ORI:
+	    break;
+	  case SPU_ORHI:
+	    fprintf (file, "h");
+	    break;
+	  case SPU_ORBI:
+	    fprintf (file, "b");
+	    break;
+	  default:
+	    gcc_unreachable();
+	  }
+	}
+      else
+	gcc_unreachable();
+      return;
+
+    case 'J':			/* 32 bits, signed */
+    case 'K':			/* 64 bits, signed */
+    case 'L':			/* 128 bits, signed */
+      if (xcode == CONST_INT
+	  || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
+	{
+	  gcc_assert (logical_immediate_p (x, mode)
+		      || iohl_immediate_p (x, mode));
+	  constant_to_array (mode, x, arr);
+	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	  val = trunc_int_for_mode (val, SImode);
+	  switch (which_logical_immediate (val))
+	  {
+	  case SPU_ORI:
+	  case SPU_IOHL:
+	    break;
+	  case SPU_ORHI:
+	    val = trunc_int_for_mode (val, HImode);
+	    break;
+	  case SPU_ORBI:
+	    val = trunc_int_for_mode (val, QImode);
+	    break;
+	  default:
+	    gcc_unreachable();
+	  }
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+	}
+      else
+	gcc_unreachable();
+      return;
+
+    case 't':			/* 128 bits, signed */
+    case 'd':			/* 64 bits, signed */
+    case 's':			/* 32 bits, signed */
+      if (CONSTANT_P (x))
+	{
+	  enum immediate_class c = classify_immediate (x, mode);
+	  switch (c)
+	    {
+	    case IC_IL1:
+	      constant_to_array (mode, x, arr);
+	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	      val = trunc_int_for_mode (val, SImode);
+	      switch (which_immediate_load (val))
+		{
+		case SPU_IL:
+		  break;
+		case SPU_ILA:
+		  fprintf (file, "a");
+		  break;
+		case SPU_ILH:
+		  fprintf (file, "h");
+		  break;
+		case SPU_ILHU:
+		  fprintf (file, "hu");
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      break;
+	    case IC_CPAT:
+	      constant_to_array (mode, x, arr);
+	      cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
+	      if (info == 1)
+		fprintf (file, "b");
+	      else if (info == 2)
+		fprintf (file, "h");
+	      else if (info == 4)
+		fprintf (file, "w");
+	      else if (info == 8)
+		fprintf (file, "d");
+	      break;
+	    case IC_IL1s:
+	      if (xcode == CONST_VECTOR)
+		{
+		  x = CONST_VECTOR_ELT (x, 0);
+		  xcode = GET_CODE (x);
+		}
+	      if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
+		fprintf (file, "a");
+	      else if (xcode == HIGH)
+		fprintf (file, "hu");
+	      break;
+	    case IC_FSMBI:
+	    case IC_FSMBI2:
+	    case IC_IL2:
+	    case IC_IL2s:
+	    case IC_POOL:
+	      abort ();
+	    }
+	}
+      else
+	gcc_unreachable ();
+      return;
+
+    case 'T':			/* 128 bits, signed */
+    case 'D':			/* 64 bits, signed */
+    case 'S':			/* 32 bits, signed */
+      if (CONSTANT_P (x))
+	{
+	  enum immediate_class c = classify_immediate (x, mode);
+	  switch (c)
+	    {
+	    case IC_IL1:
+	      constant_to_array (mode, x, arr);
+	      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	      val = trunc_int_for_mode (val, SImode);
+	      switch (which_immediate_load (val))
+		{
+		case SPU_IL:
+		case SPU_ILA:
+		  break;
+		case SPU_ILH:
+		case SPU_ILHU:
+		  val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
+	      break;
+	    case IC_FSMBI:
+	      constant_to_array (mode, x, arr);
+	      val = 0;
+	      for (i = 0; i < 16; i++)
+		{
+		  val <<= 1;
+		  val |= arr[i] & 1;
+		}
+	      print_operand (file, GEN_INT (val), 0);
+	      break;
+	    case IC_CPAT:
+	      constant_to_array (mode, x, arr);
+	      cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
+	      fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
+	      break;
+	    case IC_IL1s:
+	      if (xcode == HIGH)
+		x = XEXP (x, 0);
+	      if (GET_CODE (x) == CONST_VECTOR)
+		x = CONST_VECTOR_ELT (x, 0);
+	      output_addr_const (file, x);
+	      if (xcode == HIGH)
+		fprintf (file, "@h");
+	      break;
+	    case IC_IL2:
+	    case IC_IL2s:
+	    case IC_FSMBI2:
+	    case IC_POOL:
+	      abort ();
+	    }
+	}
+      else
+	gcc_unreachable ();
+      return;
+
+    case 'C':
+      if (xcode == CONST_INT)
+	{
+	  /* Only 4 least significant bits are relevant for generate
+	     control word instructions. */
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
+	  return;
+	}
+      break;
+
+    case 'M':			/* print code for c*d */
+      if (GET_CODE (x) == CONST_INT)
+	switch (INTVAL (x))
+	  {
+	  case 1:
+	    fprintf (file, "b");
+	    break;
+	  case 2:
+	    fprintf (file, "h");
+	    break;
+	  case 4:
+	    fprintf (file, "w");
+	    break;
+	  case 8:
+	    fprintf (file, "d");
+	    break;
+	  default:
+	    gcc_unreachable();
+	  }
+      else
+	gcc_unreachable();
+      return;
+
+    case 'N':			/* Negate the operand */
+      if (xcode == CONST_INT)
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
+      else if (xcode == CONST_VECTOR)
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 -INTVAL (CONST_VECTOR_ELT (x, 0)));
+      return;
+
+    case 'I':			/* enable/disable interrupts */
+      if (xcode == CONST_INT)
+	fprintf (file, "%s",  INTVAL (x) == 0 ? "d" : "e");
+      return;
+
+    case 'b':			/* branch modifiers */
+      if (xcode == REG)
+	fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
+      else if (COMPARISON_P (x))
+	fprintf (file, "%s", xcode == NE ? "n" : "");
+      return;
+
+    case 'i':			/* indirect call */
+      if (xcode == MEM)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == REG)
+	    /* Used in indirect function calls. */
+	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
+	  else
+	    output_address (XEXP (x, 0));
+	}
+      return;
+
+    case 'p':			/* load/store */
+      if (xcode == MEM)
+	{
+	  x = XEXP (x, 0);
+	  xcode = GET_CODE (x);
+	}
+      if (xcode == AND)
+	{
+	  x = XEXP (x, 0);
+	  xcode = GET_CODE (x);
+	}
+      if (xcode == REG)
+	fprintf (file, "d");
+      else if (xcode == CONST_INT)
+	fprintf (file, "a");
+      else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
+	fprintf (file, "r");
+      else if (xcode == PLUS || xcode == LO_SUM)
+	{
+	  if (GET_CODE (XEXP (x, 1)) == REG)
+	    fprintf (file, "x");
+	  else
+	    fprintf (file, "d");
+	}
+      return;
+
+    case 'e':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val &= 0x7;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'f':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val &= 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'g':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val &= 0x3f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'h':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = (val >> 3) & 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'E':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -val;
+      val &= 0x7;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'F':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -val;
+      val &= 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'G':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -val;
+      val &= 0x3f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'H':
+      val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
+      val = -(val & -8ll);
+      val = (val >> 3) & 0x1f;
+      output_addr_const (file, GEN_INT (val));
+      return;
+
+    case 'v':
+    case 'w':
+      constant_to_array (mode, x, arr);
+      val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
+      output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
+      return;
+
+    case 0:
+      if (xcode == REG)
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (xcode == MEM)
+	output_address (XEXP (x, 0));
+      else if (xcode == CONST_VECTOR)
+	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
+      else
+	output_addr_const (file, x);
+      return;
+
+      /* unused letters
+	              o qr  u   yz
+	AB            OPQR  UVWXYZ */
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+  gcc_unreachable ();
+}
+
+/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
+   caller saved register.  For leaf functions it is more efficient to
+   use a volatile register because we won't need to save and restore the
+   pic register.  This routine is only valid after register allocation
+   is completed, so we can pick an unused register.  */
+static rtx
+get_pic_reg (void)
+{
+  rtx pic_reg = pic_offset_table_rtx;
+  if (!reload_completed && !reload_in_progress)
+    abort ();
+  if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
+    pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
+  return pic_reg;
+}
+
+/* Split constant addresses to handle cases that are too large. 
+   Add in the pic register when in PIC mode.
+   Split immediates that require more than 1 instruction. */
+int
+spu_split_immediate (rtx * ops)
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  enum immediate_class c = classify_immediate (ops[1], mode);
+
+  switch (c)
+    {
+    case IC_IL2:
+      {
+	unsigned char arrhi[16];
+	unsigned char arrlo[16];
+	rtx to, temp, hi, lo;
+	int i;
+	enum machine_mode imode = mode;
+	/* We need to do reals as ints because the constant used in the
+	   IOR might not be a legitimate real constant. */
+	imode = int_mode_for_mode (mode);
+	constant_to_array (mode, ops[1], arrhi);
+	if (imode != mode)
+	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
+	else
+	  to = ops[0];
+	temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
+	for (i = 0; i < 16; i += 4)
+	  {
+	    arrlo[i + 2] = arrhi[i + 2];
+	    arrlo[i + 3] = arrhi[i + 3];
+	    arrlo[i + 0] = arrlo[i + 1] = 0;
+	    arrhi[i + 2] = arrhi[i + 3] = 0;
+	  }
+	hi = array_to_constant (imode, arrhi);
+	lo = array_to_constant (imode, arrlo);
+	emit_move_insn (temp, hi);
+	emit_insn (gen_rtx_SET
+		   (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
+	return 1;
+      }
+    case IC_FSMBI2:
+      {
+	unsigned char arr_fsmbi[16];
+	unsigned char arr_andbi[16];
+	rtx to, reg_fsmbi, reg_and;
+	int i;
+	enum machine_mode imode = mode;
+	/* We need to do reals as ints because the constant used in the
+	 * AND might not be a legitimate real constant. */
+	imode = int_mode_for_mode (mode);
+	constant_to_array (mode, ops[1], arr_fsmbi);
+	if (imode != mode)
+	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
+	else
+	  to = ops[0];
+	for (i = 0; i < 16; i++)
+	  if (arr_fsmbi[i] != 0)
+	    {
+	      arr_andbi[0] = arr_fsmbi[i];
+	      arr_fsmbi[i] = 0xff;
+	    }
+	for (i = 1; i < 16; i++)
+	  arr_andbi[i] = arr_andbi[0];
+	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
+	reg_and = array_to_constant (imode, arr_andbi);
+	emit_move_insn (to, reg_fsmbi);
+	emit_insn (gen_rtx_SET
+		   (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
+	return 1;
+      }
+    case IC_POOL:
+      if (reload_in_progress || reload_completed)
+	{
+	  rtx mem = force_const_mem (mode, ops[1]);
+	  if (TARGET_LARGE_MEM)
+	    {
+	      rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
+	      emit_move_insn (addr, XEXP (mem, 0));
+	      mem = replace_equiv_address (mem, addr);
+	    }
+	  emit_move_insn (ops[0], mem);
+	  return 1;
+	}
+      break;
+    case IC_IL1s:
+    case IC_IL2s:
+      if (reload_completed && GET_CODE (ops[1]) != HIGH)
+	{
+	  if (c == IC_IL2s)
+	    {
+	      emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
+	      emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
+	    }
+	  else if (flag_pic)
+	    emit_insn (gen_pic (ops[0], ops[1]));
+	  if (flag_pic)
+	    {
+	      rtx pic_reg = get_pic_reg ();
+	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
+	      crtl->uses_pic_offset_table = 1;
+	    }
+	  return flag_pic || c == IC_IL2s;
+	}
+      break;
+    case IC_IL1:
+    case IC_FSMBI:
+    case IC_CPAT:
+      break;
+    }
+  return 0;
+}
+
+/* SAVING is TRUE when we are generating the actual load and store
+   instructions for REGNO.  When determining the size of the stack
+   needed for saving register we must allocate enough space for the
+   worst case, because we don't always have the information early enough
+   to not allocate it.  But we can at least eliminate the actual loads
+   and stores during the prologue/epilogue.  */
+static int
+need_to_save_reg (int regno, int saving)
+{
+  if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+    return 1;
+  if (flag_pic
+      && regno == PIC_OFFSET_TABLE_REGNUM
+      && (!saving || crtl->uses_pic_offset_table)
+      && (!saving
+	  || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
+    return 1;
+  return 0;
+}
+
+/* This function is only correct starting with local register
+   allocation */
+int
+spu_saved_regs_size (void)
+{
+  int reg_save_size = 0;
+  int regno;
+
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
+    if (need_to_save_reg (regno, 0))
+      reg_save_size += 0x10;
+  return reg_save_size;
+}
+
+static rtx
+frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
+{
+  rtx reg = gen_rtx_REG (V4SImode, regno);
+  rtx mem =
+    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
+  return emit_insn (gen_movv4si (mem, reg));
+}
+
+static rtx
+frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
+{
+  rtx reg = gen_rtx_REG (V4SImode, regno);
+  rtx mem =
+    gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
+  return emit_insn (gen_movv4si (reg, mem));
+}
+
+/* This happens after reload, so we need to expand it.  */
+static rtx
+frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
+{
+  rtx insn;
+  if (satisfies_constraint_K (GEN_INT (imm)))
+    {
+      insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
+    }
+  else
+    {
+      emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
+      insn = emit_insn (gen_addsi3 (dst, src, scratch));
+      if (REGNO (src) == REGNO (scratch))
+	abort ();
+    }
+  return insn;
+}
+
+/* Return nonzero if this function is known to have a null epilogue.  */
+
+int
+direct_return (void)
+{
+  if (reload_completed)
+    {
+      if (cfun->static_chain_decl == 0
+	  && (spu_saved_regs_size ()
+	      + get_frame_size ()
+	      + crtl->outgoing_args_size
+	      + crtl->args.pretend_args_size == 0)
+	  && current_function_is_leaf)
+	return 1;
+    }
+  return 0;
+}
+
+/*
+   The stack frame looks like this:
+         +-------------+
+         |  incoming   | 
+         |    args     | 
+   AP -> +-------------+
+         | $lr save    |
+         +-------------+
+ prev SP | back chain  | 
+         +-------------+
+         |  var args   | 
+         |  reg save   | crtl->args.pretend_args_size bytes
+         +-------------+
+         |    ...      | 
+         | saved regs  | spu_saved_regs_size() bytes
+   FP -> +-------------+
+         |    ...      | 
+         |   vars      | get_frame_size()  bytes
+  HFP -> +-------------+
+         |    ...      | 
+         |  outgoing   | 
+         |    args     | crtl->outgoing_args_size bytes
+         +-------------+
+         | $lr of next |
+         |   frame     | 
+         +-------------+
+         | back chain  | 
+   SP -> +-------------+
+
+*/
+void
+spu_expand_prologue (void)
+{
+  HOST_WIDE_INT size = get_frame_size (), offset, regno;
+  HOST_WIDE_INT total_size;
+  HOST_WIDE_INT saved_regs_size;
+  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx scratch_reg_0, scratch_reg_1;
+  rtx insn, real;
+
+  if (flag_pic && optimize == 0)
+    crtl->uses_pic_offset_table = 1;
+
+  if (spu_naked_function_p (current_function_decl))
+    return;
+
+  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
+  scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
+
+  saved_regs_size = spu_saved_regs_size ();
+  total_size = size + saved_regs_size
+    + crtl->outgoing_args_size
+    + crtl->args.pretend_args_size;
+
+  if (!current_function_is_leaf
+      || cfun->calls_alloca || total_size > 0)
+    total_size += STACK_POINTER_OFFSET;
+
+  /* Save this first because code after this might use the link
+     register as a scratch register. */
+  if (!current_function_is_leaf)
+    {
+      insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  if (total_size > 0)
+    {
+      offset = -crtl->args.pretend_args_size;
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
+	if (need_to_save_reg (regno, 1))
+	  {
+	    offset -= 16;
+	    insn = frame_emit_store (regno, sp_reg, offset);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	  }
+    }
+
+  if (flag_pic && crtl->uses_pic_offset_table)
+    {
+      rtx pic_reg = get_pic_reg ();
+      insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
+      insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
+    }
+
+  if (total_size > 0)
+    {
+      if (flag_stack_check)
+	{
+	  /* We compare against total_size-1 because
+	     ($sp >= total_size) <=> ($sp > total_size-1) */
+	  rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
+	  rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
+	  rtx size_v4si = spu_const (V4SImode, total_size - 1);
+	  if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
+	    {
+	      emit_move_insn (scratch_v4si, size_v4si);
+	      size_v4si = scratch_v4si;
+	    }
+	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
+	  emit_insn (gen_vec_extractv4si
+		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
+	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
+	}
+
+      /* Adjust the stack pointer, and make sure scratch_reg_0 contains
+         the value of the previous $sp because we save it as the back
+         chain. */
+      if (total_size <= 2000)
+	{
+	  /* In this case we save the back chain first. */
+	  insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
+	  insn =
+	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
+	}
+      else
+	{
+	  insn = emit_move_insn (scratch_reg_0, sp_reg);
+	  insn =
+	    frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
+	}
+      RTX_FRAME_RELATED_P (insn) = 1;
+      real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+
+      if (total_size > 2000)
+	{
+	  /* Save the back chain ptr */
+	  insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
+	}
+
+      if (frame_pointer_needed)
+	{
+	  rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
+	  HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
+	    + crtl->outgoing_args_size;
+	  /* Set the new frame_pointer */
+	  insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+          REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
+	}
+    }
+
+  if (flag_stack_usage)
+    current_function_static_stack_size = total_size;
+}
+
+void
+spu_expand_epilogue (bool sibcall_p)
+{
+  int size = get_frame_size (), offset, regno;
+  HOST_WIDE_INT saved_regs_size, total_size;
+  rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+  rtx jump, scratch_reg_0;
+
+  if (spu_naked_function_p (current_function_decl))
+    return;
+
+  scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
+
+  saved_regs_size = spu_saved_regs_size ();
+  total_size = size + saved_regs_size
+    + crtl->outgoing_args_size
+    + crtl->args.pretend_args_size;
+
+  if (!current_function_is_leaf
+      || cfun->calls_alloca || total_size > 0)
+    total_size += STACK_POINTER_OFFSET;
+
+  if (total_size > 0)
+    {
+      if (cfun->calls_alloca)
+	frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
+      else
+	frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
+
+
+      if (saved_regs_size > 0)
+	{
+	  offset = -crtl->args.pretend_args_size;
+	  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
+	    if (need_to_save_reg (regno, 1))
+	      {
+		offset -= 0x10;
+		frame_emit_load (regno, sp_reg, offset);
+	      }
+	}
+    }
+
+  if (!current_function_is_leaf)
+    frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
+
+  if (!sibcall_p)
+    {
+      emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
+      jump = emit_jump_insn (gen__return ());
+      emit_barrier_after (jump);
+    }
+
+}
+
+rtx
+spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return 0;
+  /* This is inefficient because it ends up copying to a save-register
+     which then gets saved even though $lr has already been saved.  But
+     it does generate better code for leaf functions and we don't need
+     to use RETURN_ADDRESS_POINTER_REGNUM to get it working.  It's only
+     used for __builtin_return_address anyway, so maybe we don't care if
+     it's inefficient. */
+  return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
+}
+
+
+/* Given VAL, generate a constant appropriate for MODE.
+   If MODE is a vector mode, every element will be VAL.
+   For TImode, VAL will be zero extended to 128 bits. */
+rtx
+spu_const (enum machine_mode mode, HOST_WIDE_INT val)
+{
+  rtx inner;
+  rtvec v;
+  int units, i;
+
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
+	      || GET_MODE_CLASS (mode) == MODE_FLOAT
+	      || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
+
+  if (GET_MODE_CLASS (mode) == MODE_INT)
+    return immed_double_const (val, 0, mode);
+
+  /* val is the bit representation of the float */
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return hwint_to_const_double (mode, val);
+
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+    inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
+  else 
+    inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
+
+  units = GET_MODE_NUNITS (mode);
+
+  v = rtvec_alloc (units);
+
+  for (i = 0; i < units; ++i)
+    RTVEC_ELT (v, i) = inner;
+
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+/* Create a MODE vector constant from 4 ints. */
+rtx
+spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
+{
+  unsigned char arr[16];
+  arr[0] = (a >> 24) & 0xff;
+  arr[1] = (a >> 16) & 0xff;
+  arr[2] = (a >> 8) & 0xff;
+  arr[3] = (a >> 0) & 0xff;
+  arr[4] = (b >> 24) & 0xff;
+  arr[5] = (b >> 16) & 0xff;
+  arr[6] = (b >> 8) & 0xff;
+  arr[7] = (b >> 0) & 0xff;
+  arr[8] = (c >> 24) & 0xff;
+  arr[9] = (c >> 16) & 0xff;
+  arr[10] = (c >> 8) & 0xff;
+  arr[11] = (c >> 0) & 0xff;
+  arr[12] = (d >> 24) & 0xff;
+  arr[13] = (d >> 16) & 0xff;
+  arr[14] = (d >> 8) & 0xff;
+  arr[15] = (d >> 0) & 0xff;
+  return array_to_constant(mode, arr);
+}
+
+/* branch hint stuff */
+
+/* An array of these is used to propagate hints to predecessor blocks. */
+struct spu_bb_info
+{
+  rtx prop_jump; /* propagated from another block */
+  int bb_index;  /* the original block. */
+};
+static struct spu_bb_info *spu_bb_info;
+
+#define STOP_HINT_P(INSN) \
+		(GET_CODE(INSN) == CALL_INSN \
+		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
+		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
+
+/* 1 when RTX is a hinted branch or its target.  We keep track of
+   what has been hinted so the safe-hint code can test it easily.  */
+#define HINTED_P(RTX)						\
+  (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
+
+/* 1 when RTX is an insn that must be scheduled on an even boundary. */
+#define SCHED_ON_EVEN_P(RTX)						\
+  (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
+
+/* Emit a nop for INSN such that the two will dual issue.  This assumes
+   INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
+   We check for TImode to handle a MULTI1 insn which has dual issued its
+   first instruction.  get_pipe returns -1 for MULTI0, inline asm, or
+   ADDR_VEC insns. */
+static void
+emit_nop_for_insn (rtx insn)
+{
+  int p;
+  rtx new_insn;
+  p = get_pipe (insn);
+  if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
+    new_insn = emit_insn_after (gen_lnop (), insn);
+  else if (p == 1 && GET_MODE (insn) == TImode)
+    {
+      new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
+      PUT_MODE (new_insn, TImode);
+      PUT_MODE (insn, VOIDmode);
+    }
+  else
+    new_insn = emit_insn_after (gen_lnop (), insn);
+  recog_memoized (new_insn);
+  INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
+}
+
+/* Insert nops in basic blocks to meet dual issue alignment
+   requirements.  Also make sure hbrp and hint instructions are at least
+   one cycle apart, possibly inserting a nop.  */
+static void
+pad_bb(void)
+{
+  rtx insn, next_insn, prev_insn, hbr_insn = 0;
+  int length;
+  int addr;
+
+  /* This sets up INSN_ADDRESSES. */
+  shorten_branches (get_insns ());
+
+  /* Keep track of length added by nops. */
+  length = 0;
+
+  prev_insn = 0;
+  insn = get_insns ();
+  if (!active_insn_p (insn))
+    insn = next_active_insn (insn);
+  for (; insn; insn = next_insn)
+    {
+      next_insn = next_active_insn (insn);
+      if (INSN_CODE (insn) == CODE_FOR_iprefetch
+	  || INSN_CODE (insn) == CODE_FOR_hbr)
+	{
+	  if (hbr_insn)
+	    {
+	      int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
+	      int a1 = INSN_ADDRESSES (INSN_UID (insn));
+	      if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
+		  || (a1 - a0 == 4))
+		{
+		  prev_insn = emit_insn_before (gen_lnop (), insn);
+		  PUT_MODE (prev_insn, GET_MODE (insn));
+		  PUT_MODE (insn, TImode);
+		  INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
+		  length += 4;
+		}
+	    }
+	  hbr_insn = insn;
+	}
+      if (INSN_CODE (insn) == CODE_FOR_blockage)
+	{
+	  if (GET_MODE (insn) == TImode)
+	    PUT_MODE (next_insn, TImode);
+	  insn = next_insn;
+	  next_insn = next_active_insn (insn);
+	}
+      addr = INSN_ADDRESSES (INSN_UID (insn));
+      if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
+	{
+	  if (((addr + length) & 7) != 0)
+	    {
+	      emit_nop_for_insn (prev_insn);
+	      length += 4;
+	    }
+	}
+      else if (GET_MODE (insn) == TImode
+	       && ((next_insn && GET_MODE (next_insn) != TImode)
+		   || get_attr_type (insn) == TYPE_MULTI0)
+	       && ((addr + length) & 7) != 0)
+	{
+	  /* prev_insn will always be set because the first insn is
+	     always 8-byte aligned. */
+	  emit_nop_for_insn (prev_insn);
+	  length += 4;
+	}
+      prev_insn = insn;
+    }
+}
+
+
+/* Routines for branch hints. */
+
+static void
+spu_emit_branch_hint (rtx before, rtx branch, rtx target,
+		      int distance, sbitmap blocks)
+{
+  rtx branch_label = 0;
+  rtx hint;
+  rtx insn;
+  rtx table;
+
+  if (before == 0 || branch == 0 || target == 0)
+    return;
+
+  /* While scheduling we require hints to be no further than 600, so
+     we need to enforce that here too */
+  if (distance > 600)
+    return;
+
+  /* If we have a Basic block note, emit it after the basic block note.  */
+  if (NOTE_INSN_BASIC_BLOCK_P (before))
+    before = NEXT_INSN (before);
+
+  branch_label = gen_label_rtx ();
+  LABEL_NUSES (branch_label)++;
+  LABEL_PRESERVE_P (branch_label) = 1;
+  insn = emit_label_before (branch_label, branch);
+  branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
+  SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
+
+  hint = emit_insn_before (gen_hbr (branch_label, target), before);
+  recog_memoized (hint);
+  INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
+  HINTED_P (branch) = 1;
+
+  if (GET_CODE (target) == LABEL_REF)
+    HINTED_P (XEXP (target, 0)) = 1;
+  else if (tablejump_p (branch, 0, &table))
+    {
+      rtvec vec;
+      int j;
+      if (GET_CODE (PATTERN (table)) == ADDR_VEC)
+	vec = XVEC (PATTERN (table), 0);
+      else
+	vec = XVEC (PATTERN (table), 1);
+      for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
+	HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
+    }
+
+  if (distance >= 588)
+    {
+      /* Make sure the hint isn't scheduled any earlier than this point,
+         which could make it too far for the branch offest to fit */
+      insn = emit_insn_before (gen_blockage (), hint);
+      recog_memoized (insn);
+      INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
+    }
+  else if (distance <= 8 * 4)
+    {
+      /* To guarantee at least 8 insns between the hint and branch we
+         insert nops. */
+      int d;
+      for (d = distance; d < 8 * 4; d += 4)
+	{
+	  insn =
+	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
+	  recog_memoized (insn);
+	  INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
+	}
+
+      /* Make sure any nops inserted aren't scheduled before the hint. */
+      insn = emit_insn_after (gen_blockage (), hint);
+      recog_memoized (insn);
+      INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
+
+      /* Make sure any nops inserted aren't scheduled after the call. */
+      if (CALL_P (branch) && distance < 8 * 4)
+	{
+	  insn = emit_insn_before (gen_blockage (), branch);
+	  recog_memoized (insn);
+	  INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
+	}
+    }
+}
+
+/* Returns 0 if we don't want a hint for this branch.  Otherwise return
+   the rtx for the branch target. */
+static rtx
+get_branch_target (rtx branch)
+{
+  if (GET_CODE (branch) == JUMP_INSN)
+    {
+      rtx set, src;
+
+      /* Return statements */
+      if (GET_CODE (PATTERN (branch)) == RETURN)
+	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
+
+      /* jump table */
+      if (GET_CODE (PATTERN (branch)) == ADDR_VEC
+	  || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
+	return 0;
+
+     /* ASM GOTOs. */
+     if (extract_asm_operands (PATTERN (branch)) != NULL)
+	return NULL;
+
+      set = single_set (branch);
+      src = SET_SRC (set);
+      if (GET_CODE (SET_DEST (set)) != PC)
+	abort ();
+
+      if (GET_CODE (src) == IF_THEN_ELSE)
+	{
+	  rtx lab = 0;
+	  rtx note = find_reg_note (branch, REG_BR_PROB, 0);
+	  if (note)
+	    {
+	      /* If the more probable case is not a fall through, then
+	         try a branch hint.  */
+	      HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
+	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
+		  && GET_CODE (XEXP (src, 1)) != PC)
+		lab = XEXP (src, 1);
+	      else if (prob < (REG_BR_PROB_BASE * 4 / 10)
+		       && GET_CODE (XEXP (src, 2)) != PC)
+		lab = XEXP (src, 2);
+	    }
+	  if (lab)
+	    {
+	      if (GET_CODE (lab) == RETURN)
+		return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
+	      return lab;
+	    }
+	  return 0;
+	}
+
+      return src;
+    }
+  else if (GET_CODE (branch) == CALL_INSN)
+    {
+      rtx call;
+      /* All of our call patterns are in a PARALLEL and the CALL is
+         the first pattern in the PARALLEL. */
+      if (GET_CODE (PATTERN (branch)) != PARALLEL)
+	abort ();
+      call = XVECEXP (PATTERN (branch), 0, 0);
+      if (GET_CODE (call) == SET)
+	call = SET_SRC (call);
+      if (GET_CODE (call) != CALL)
+	abort ();
+      return XEXP (XEXP (call, 0), 0);
+    }
+  return 0;
+}
+
+/* The special $hbr register is used to prevent the insn scheduler from
+   moving hbr insns across instructions which invalidate them.  It
+   should only be used in a clobber, and this function searches for
+   insns which clobber it.  */
+static bool
+insn_clobbers_hbr (rtx insn)
+{
+  if (INSN_P (insn)
+      && GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      rtx parallel = PATTERN (insn);
+      rtx clobber;
+      int j;
+      for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
+	{
+	  clobber = XVECEXP (parallel, 0, j);
+	  if (GET_CODE (clobber) == CLOBBER
+	      && GET_CODE (XEXP (clobber, 0)) == REG
+	      && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
+	    return 1;
+	}
+    }
+  return 0;
+}
+
+/* Search up to 32 insns starting at FIRST:
+   - at any kind of hinted branch, just return
+   - at any unconditional branch in the first 15 insns, just return
+   - at a call or indirect branch, after the first 15 insns, force it to
+     an even address and return
+   - at any unconditional branch, after the first 15 insns, force it to
+     an even address. 
+   At then end of the search, insert an hbrp within 4 insns of FIRST,
+   and an hbrp within 16 instructions of FIRST.
+ */
+static void
+insert_hbrp_for_ilb_runout (rtx first)
+{
+  rtx insn, before_4 = 0, before_16 = 0;
+  int addr = 0, length, first_addr = -1;
+  int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
+  int insert_lnop_after = 0;
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      {
+	if (first_addr == -1)
+	  first_addr = INSN_ADDRESSES (INSN_UID (insn));
+	addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
+	length = get_attr_length (insn);
+
+	if (before_4 == 0 && addr + length >= 4 * 4)
+	  before_4 = insn;
+	/* We test for 14 instructions because the first hbrp will add
+	   up to 2 instructions. */
+	if (before_16 == 0 && addr + length >= 14 * 4)
+	  before_16 = insn;
+
+	if (INSN_CODE (insn) == CODE_FOR_hbr)
+	  {
+	    /* Make sure an hbrp is at least 2 cycles away from a hint. 
+	       Insert an lnop after the hbrp when necessary. */
+	    if (before_4 == 0 && addr > 0)
+	      {
+		before_4 = insn;
+		insert_lnop_after |= 1;
+	      }
+	    else if (before_4 && addr <= 4 * 4)
+	      insert_lnop_after |= 1;
+	    if (before_16 == 0 && addr > 10 * 4)
+	      {
+		before_16 = insn;
+		insert_lnop_after |= 2;
+	      }
+	    else if (before_16 && addr <= 14 * 4)
+	      insert_lnop_after |= 2;
+	  }
+
+	if (INSN_CODE (insn) == CODE_FOR_iprefetch)
+	  {
+	    if (addr < hbrp_addr0)
+	      hbrp_addr0 = addr;
+	    else if (addr < hbrp_addr1)
+	      hbrp_addr1 = addr;
+	  }
+
+	if (CALL_P (insn) || JUMP_P (insn))
+	  {
+	    if (HINTED_P (insn))
+	      return;
+
+	    /* Any branch after the first 15 insns should be on an even
+	       address to avoid a special case branch.  There might be
+	       some nops and/or hbrps inserted, so we test after 10
+	       insns. */
+	    if (addr > 10 * 4)
+	      SCHED_ON_EVEN_P (insn) = 1;
+	  }
+
+	if (CALL_P (insn) || tablejump_p (insn, 0, 0))
+	  return;
+
+
+	if (addr + length >= 32 * 4)
+	  {
+	    gcc_assert (before_4 && before_16);
+	    if (hbrp_addr0 > 4 * 4)
+	      {
+		insn =
+		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
+		recog_memoized (insn);
+		INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
+		INSN_ADDRESSES_NEW (insn,
+				    INSN_ADDRESSES (INSN_UID (before_4)));
+		PUT_MODE (insn, GET_MODE (before_4));
+		PUT_MODE (before_4, TImode);
+		if (insert_lnop_after & 1)
+		  {
+		    insn = emit_insn_before (gen_lnop (), before_4);
+		    recog_memoized (insn);
+		    INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
+		    INSN_ADDRESSES_NEW (insn,
+					INSN_ADDRESSES (INSN_UID (before_4)));
+		    PUT_MODE (insn, TImode);
+		  }
+	      }
+	    if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
+		&& hbrp_addr1 > 16 * 4)
+	      {
+		insn =
+		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
+		recog_memoized (insn);
+		INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
+		INSN_ADDRESSES_NEW (insn,
+				    INSN_ADDRESSES (INSN_UID (before_16)));
+		PUT_MODE (insn, GET_MODE (before_16));
+		PUT_MODE (before_16, TImode);
+		if (insert_lnop_after & 2)
+		  {
+		    insn = emit_insn_before (gen_lnop (), before_16);
+		    recog_memoized (insn);
+		    INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
+		    INSN_ADDRESSES_NEW (insn,
+					INSN_ADDRESSES (INSN_UID
+							(before_16)));
+		    PUT_MODE (insn, TImode);
+		  }
+	      }
+	    return;
+	  }
+      }
+    else if (BARRIER_P (insn))
+      return;
+
+}
+
+/* The SPU might hang when it executes 48 inline instructions after a
+   hinted branch jumps to its hinted target.  The beginning of a
+   function and the return from a call might have been hinted, and must
+   be handled as well.  To prevent a hang we insert 2 hbrps.  The first
+   should be within 6 insns of the branch target.  The second should be
+   within 22 insns of the branch target.  When determining if hbrps are
+   necessary, we look for only 32 inline instructions, because up to to
+   12 nops and 4 hbrps could be inserted.  Similarily, when inserting
+   new hbrps, we insert them within 4 and 16 insns of the target.  */
+static void
+insert_hbrp (void)
+{
+  rtx insn;
+  if (TARGET_SAFE_HINTS)
+    {
+      shorten_branches (get_insns ());
+      /* Insert hbrp at beginning of function */
+      insn = next_active_insn (get_insns ());
+      if (insn)
+	insert_hbrp_for_ilb_runout (insn);
+      /* Insert hbrp after hinted targets. */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
+	  insert_hbrp_for_ilb_runout (next_active_insn (insn));
+    }
+}
+
+static int in_spu_reorg;
+
+static void
+spu_var_tracking (void)
+{
+  if (flag_var_tracking)
+    {
+      df_analyze ();
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+      df_finish_pass (false);
+    }
+}
+
+/* Insert branch hints.  There are no branch optimizations after this
+   pass, so it's safe to set our branch hints now. */
+static void
+spu_machine_dependent_reorg (void)
+{
+  sbitmap blocks;
+  basic_block bb;
+  rtx branch, insn;
+  rtx branch_target = 0;
+  int branch_addr = 0, insn_addr, required_dist = 0;
+  int i;
+  unsigned int j;
+
+  if (!TARGET_BRANCH_HINTS || optimize == 0)
+    {
+      /* We still do it for unoptimized code because an external
+         function might have hinted a call or return. */
+      compute_bb_for_insn ();
+      insert_hbrp ();
+      pad_bb ();
+      spu_var_tracking ();
+      free_bb_for_insn ();
+      return;
+    }
+
+  blocks = sbitmap_alloc (last_basic_block);
+  sbitmap_zero (blocks);
+
+  in_spu_reorg = 1;
+  compute_bb_for_insn ();
+
+  compact_blocks ();
+
+  spu_bb_info =
+    (struct spu_bb_info *) xcalloc (n_basic_blocks,
+				    sizeof (struct spu_bb_info));
+
+  /* We need exact insn addresses and lengths.  */
+  shorten_branches (get_insns ());
+
+  for (i = n_basic_blocks - 1; i >= 0; i--)
+    {
+      bb = BASIC_BLOCK (i);
+      branch = 0;
+      if (spu_bb_info[i].prop_jump)
+	{
+	  branch = spu_bb_info[i].prop_jump;
+	  branch_target = get_branch_target (branch);
+	  branch_addr = INSN_ADDRESSES (INSN_UID (branch));
+	  required_dist = spu_hint_dist;
+	}
+      /* Search from end of a block to beginning.   In this loop, find
+         jumps which need a branch and emit them only when:
+         - it's an indirect branch and we're at the insn which sets
+         the register  
+         - we're at an insn that will invalidate the hint. e.g., a
+         call, another hint insn, inline asm that clobbers $hbr, and
+         some inlined operations (divmodsi4).  Don't consider jumps
+         because they are only at the end of a block and are
+         considered when we are deciding whether to propagate
+         - we're getting too far away from the branch.  The hbr insns
+         only have a signed 10 bit offset
+         We go back as far as possible so the branch will be considered
+         for propagation when we get to the beginning of the block.  */
+      for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
+	{
+	  if (INSN_P (insn))
+	    {
+	      insn_addr = INSN_ADDRESSES (INSN_UID (insn));
+	      if (branch
+		  && ((GET_CODE (branch_target) == REG
+		       && set_of (branch_target, insn) != NULL_RTX)
+		      || insn_clobbers_hbr (insn)
+		      || branch_addr - insn_addr > 600))
+		{
+		  rtx next = NEXT_INSN (insn);
+		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
+		  if (insn != BB_END (bb)
+		      && branch_addr - next_addr >= required_dist)
+		    {
+		      if (dump_file)
+			fprintf (dump_file,
+				 "hint for %i in block %i before %i\n",
+				 INSN_UID (branch), bb->index,
+				 INSN_UID (next));
+		      spu_emit_branch_hint (next, branch, branch_target,
+					    branch_addr - next_addr, blocks);
+		    }
+		  branch = 0;
+		}
+
+	      /* JUMP_P will only be true at the end of a block.  When
+	         branch is already set it means we've previously decided
+	         to propagate a hint for that branch into this block. */
+	      if (CALL_P (insn) || (JUMP_P (insn) && !branch))
+		{
+		  branch = 0;
+		  if ((branch_target = get_branch_target (insn)))
+		    {
+		      branch = insn;
+		      branch_addr = insn_addr;
+		      required_dist = spu_hint_dist;
+		    }
+		}
+	    }
+	  if (insn == BB_HEAD (bb))
+	    break;
+	}
+
+      if (branch)
+	{
+	  /* If we haven't emitted a hint for this branch yet, it might
+	     be profitable to emit it in one of the predecessor blocks,
+	     especially for loops.  */
+	  rtx bbend;
+	  basic_block prev = 0, prop = 0, prev2 = 0;
+	  int loop_exit = 0, simple_loop = 0;
+	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
+
+	  for (j = 0; j < EDGE_COUNT (bb->preds); j++)
+	    if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
+	      prev = EDGE_PRED (bb, j)->src;
+	    else
+	      prev2 = EDGE_PRED (bb, j)->src;
+
+	  for (j = 0; j < EDGE_COUNT (bb->succs); j++)
+	    if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
+	      loop_exit = 1;
+	    else if (EDGE_SUCC (bb, j)->dest == bb)
+	      simple_loop = 1;
+
+	  /* If this branch is a loop exit then propagate to previous
+	     fallthru block. This catches the cases when it is a simple
+	     loop or when there is an initial branch into the loop. */
+	  if (prev && (loop_exit || simple_loop)
+	      && prev->loop_depth <= bb->loop_depth)
+	    prop = prev;
+
+	  /* If there is only one adjacent predecessor.  Don't propagate
+	     outside this loop.  This loop_depth test isn't perfect, but
+	     I'm not sure the loop_father member is valid at this point.  */
+	  else if (prev && single_pred_p (bb)
+		   && prev->loop_depth == bb->loop_depth)
+	    prop = prev;
+
+	  /* If this is the JOIN block of a simple IF-THEN then
+	     propogate the hint to the HEADER block. */
+	  else if (prev && prev2
+		   && EDGE_COUNT (bb->preds) == 2
+		   && EDGE_COUNT (prev->preds) == 1
+		   && EDGE_PRED (prev, 0)->src == prev2
+		   && prev2->loop_depth == bb->loop_depth
+		   && GET_CODE (branch_target) != REG)
+	    prop = prev;
+
+	  /* Don't propagate when:
+	     - this is a simple loop and the hint would be too far
+	     - this is not a simple loop and there are 16 insns in
+	     this block already
+	     - the predecessor block ends in a branch that will be
+	     hinted
+	     - the predecessor block ends in an insn that invalidates
+	     the hint */
+	  if (prop
+	      && prop->index >= 0
+	      && (bbend = BB_END (prop))
+	      && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
+	      (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
+	      && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
+			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
+			 bb->index, prop->index, bb->loop_depth,
+			 INSN_UID (branch), loop_exit, simple_loop,
+			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
+
+	      spu_bb_info[prop->index].prop_jump = branch;
+	      spu_bb_info[prop->index].bb_index = i;
+	    }
+	  else if (branch_addr - next_addr >= required_dist)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "hint for %i in block %i before %i\n",
+			 INSN_UID (branch), bb->index,
+			 INSN_UID (NEXT_INSN (insn)));
+	      spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
+				    branch_addr - next_addr, blocks);
+	    }
+	  branch = 0;
+	}
+    }
+  free (spu_bb_info);
+
+  if (!sbitmap_empty_p (blocks))
+    find_many_sub_basic_blocks (blocks);
+
+  /* We have to schedule to make sure alignment is ok. */
+  FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
+
+  /* The hints need to be scheduled, so call it again. */
+  schedule_insns ();
+  df_finish_pass (true);
+
+  insert_hbrp ();
+
+  pad_bb ();
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
+      {
+	/* Adjust the LABEL_REF in a hint when we have inserted a nop
+	   between its branch label and the branch .  We don't move the
+	   label because GCC expects it at the beginning of the block. */
+	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+	rtx label_ref = XVECEXP (unspec, 0, 0);
+	rtx label = XEXP (label_ref, 0);
+	rtx branch;
+	int offset = 0;
+	for (branch = NEXT_INSN (label);
+	     !JUMP_P (branch) && !CALL_P (branch);
+	     branch = NEXT_INSN (branch))
+	  if (NONJUMP_INSN_P (branch))
+	    offset += get_attr_length (branch);
+	if (offset > 0)
+	  XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
+      }
+
+  spu_var_tracking ();
+
+  free_bb_for_insn ();
+
+  in_spu_reorg = 0;
+}
+
+
+/* Insn scheduling routines, primarily for dual issue. */
+static int
+spu_sched_issue_rate (void)
+{
+  return 2;
+}
+
+static int
+uses_ls_unit(rtx insn)
+{
+  rtx set = single_set (insn);
+  if (set != 0
+      && (GET_CODE (SET_DEST (set)) == MEM
+	  || GET_CODE (SET_SRC (set)) == MEM))
+    return 1;
+  return 0;
+}
+
+static int
+get_pipe (rtx insn)
+{
+  enum attr_type t;
+  /* Handle inline asm */
+  if (INSN_CODE (insn) == -1)
+    return -1;
+  t = get_attr_type (insn);
+  switch (t)
+    {
+    case TYPE_CONVERT:
+      return -2;
+    case TYPE_MULTI0:
+      return -1;
+
+    case TYPE_FX2:
+    case TYPE_FX3:
+    case TYPE_SPR:
+    case TYPE_NOP:
+    case TYPE_FXB:
+    case TYPE_FPD:
+    case TYPE_FP6:
+    case TYPE_FP7:
+      return 0;
+
+    case TYPE_LNOP:
+    case TYPE_SHUF:
+    case TYPE_LOAD:
+    case TYPE_STORE:
+    case TYPE_BR:
+    case TYPE_MULTI1:
+    case TYPE_HBR:
+    case TYPE_IPREFETCH:
+      return 1;
+    default:
+      abort ();
+    }
+}
+
+
+/* haifa-sched.c has a static variable that keeps track of the current
+   cycle.  It is passed to spu_sched_reorder, and we record it here for
+   use by spu_sched_variable_issue.  It won't be accurate if the
+   scheduler updates it's clock_var between the two calls. */
+static int clock_var;
+
+/* This is used to keep track of insn alignment.  Set to 0 at the
+   beginning of each block and increased by the "length" attr of each
+   insn scheduled. */
+static int spu_sched_length;
+
+/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
+   ready list appropriately in spu_sched_reorder(). */
+static int pipe0_clock;
+static int pipe1_clock;
+
+static int prev_clock_var;
+
+static int prev_priority;
+
+/* The SPU needs to load the next ilb sometime during the execution of
+   the previous ilb.  There is a potential conflict if every cycle has a
+   load or store.  To avoid the conflict we make sure the load/store
+   unit is free for at least one cycle during the execution of insns in
+   the previous ilb. */
+static int spu_ls_first;
+static int prev_ls_clock;
+
+static void
+spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		       int max_ready ATTRIBUTE_UNUSED)
+{
+  spu_sched_length = 0;
+}
+
+static void
+spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		int max_ready ATTRIBUTE_UNUSED)
+{
+  if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
+    {
+      /* When any block might be at least 8-byte aligned, assume they
+         will all be at least 8-byte aligned to make sure dual issue
+         works out correctly. */
+      spu_sched_length = 0;
+    }
+  spu_ls_first = INT_MAX;
+  clock_var = -1;
+  prev_ls_clock = -1;
+  pipe0_clock = -1;
+  pipe1_clock = -1;
+  prev_clock_var = -1;
+  prev_priority = -1;
+}
+
+static int
+spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
+			  int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
+{
+  int len;
+  int p;
+  if (GET_CODE (PATTERN (insn)) == USE
+      || GET_CODE (PATTERN (insn)) == CLOBBER
+      || (len = get_attr_length (insn)) == 0)
+    return more;
+
+  spu_sched_length += len;
+
+  /* Reset on inline asm */
+  if (INSN_CODE (insn) == -1)
+    {
+      spu_ls_first = INT_MAX;
+      pipe0_clock = -1;
+      pipe1_clock = -1;
+      return 0;
+    }
+  p = get_pipe (insn);
+  if (p == 0)
+    pipe0_clock = clock_var;
+  else
+    pipe1_clock = clock_var;
+
+  if (in_spu_reorg)
+    {
+      if (clock_var - prev_ls_clock > 1
+	  || INSN_CODE (insn) == CODE_FOR_iprefetch)
+	spu_ls_first = INT_MAX;
+      if (uses_ls_unit (insn))
+	{
+	  if (spu_ls_first == INT_MAX)
+	    spu_ls_first = spu_sched_length;
+	  prev_ls_clock = clock_var;
+	}
+
+      /* The scheduler hasn't inserted the nop, but we will later on.
+         Include those nops in spu_sched_length. */
+      if (prev_clock_var == clock_var && (spu_sched_length & 7))
+	spu_sched_length += 4;
+      prev_clock_var = clock_var;
+
+      /* more is -1 when called from spu_sched_reorder for new insns
+         that don't have INSN_PRIORITY */
+      if (more >= 0)
+	prev_priority = INSN_PRIORITY (insn);
+    }
+
+  /* Always try issueing more insns.  spu_sched_reorder will decide 
+     when the cycle should be advanced. */
+  return 1;
+}
+
+/* This function is called for both TARGET_SCHED_REORDER and
+   TARGET_SCHED_REORDER2.  */
+static int
+spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+		   rtx *ready, int *nreadyp, int clock)
+{
+  int i, nready = *nreadyp;
+  int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
+  rtx insn;
+
+  clock_var = clock;
+
+  if (nready <= 0 || pipe1_clock >= clock)
+    return 0;
+
+  /* Find any rtl insns that don't generate assembly insns and schedule
+     them first. */
+  for (i = nready - 1; i >= 0; i--)
+    {
+      insn = ready[i];
+      if (INSN_CODE (insn) == -1
+	  || INSN_CODE (insn) == CODE_FOR_blockage
+	  || (INSN_P (insn) && get_attr_length (insn) == 0))
+	{
+	  ready[i] = ready[nready - 1];
+	  ready[nready - 1] = insn;
+	  return 1;
+	}
+    }
+
+  pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
+  for (i = 0; i < nready; i++)
+    if (INSN_CODE (ready[i]) != -1)
+      {
+	insn = ready[i];
+	switch (get_attr_type (insn))
+	  {
+	  default:
+	  case TYPE_MULTI0:
+	  case TYPE_CONVERT:
+	  case TYPE_FX2:
+	  case TYPE_FX3:
+	  case TYPE_SPR:
+	  case TYPE_NOP:
+	  case TYPE_FXB:
+	  case TYPE_FPD:
+	  case TYPE_FP6:
+	  case TYPE_FP7:
+	    pipe_0 = i;
+	    break;
+	  case TYPE_LOAD:
+	  case TYPE_STORE:
+	    pipe_ls = i;
+	  case TYPE_LNOP:
+	  case TYPE_SHUF:
+	  case TYPE_BR:
+	  case TYPE_MULTI1:
+	  case TYPE_HBR:
+	    pipe_1 = i;
+	    break;
+	  case TYPE_IPREFETCH:
+	    pipe_hbrp = i;
+	    break;
+	  }
+      }
+
+  /* In the first scheduling phase, schedule loads and stores together
+     to increase the chance they will get merged during postreload CSE. */
+  if (!reload_completed && pipe_ls >= 0)
+    {
+      insn = ready[pipe_ls];
+      ready[pipe_ls] = ready[nready - 1];
+      ready[nready - 1] = insn;
+      return 1;
+    }
+
+  /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
+  if (pipe_hbrp >= 0)
+    pipe_1 = pipe_hbrp;
+
+  /* When we have loads/stores in every cycle of the last 15 insns and
+     we are about to schedule another load/store, emit an hbrp insn
+     instead. */
+  if (in_spu_reorg
+      && spu_sched_length - spu_ls_first >= 4 * 15
+      && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
+    {
+      insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
+      recog_memoized (insn);
+      if (pipe0_clock < clock)
+	PUT_MODE (insn, TImode);
+      spu_sched_variable_issue (file, verbose, insn, -1);
+      return 0;
+    }
+
+  /* In general, we want to emit nops to increase dual issue, but dual
+     issue isn't faster when one of the insns could be scheduled later
+     without effecting the critical path.  We look at INSN_PRIORITY to
+     make a good guess, but it isn't perfect so -mdual-nops=n can be
+     used to effect it. */
+  if (in_spu_reorg && spu_dual_nops < 10)
+    {
+      /* When we are at an even address and we are not issueing nops to
+         improve scheduling then we need to advance the cycle.  */
+      if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
+	  && (spu_dual_nops == 0
+	      || (pipe_1 != -1
+		  && prev_priority >
+		  INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
+	return 0;
+
+      /* When at an odd address, schedule the highest priority insn
+         without considering pipeline. */
+      if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
+	  && (spu_dual_nops == 0
+	      || (prev_priority >
+		  INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
+	return 1;
+    }
+
+
+  /* We haven't issued a pipe0 insn yet this cycle, if there is a
+     pipe0 insn in the ready list, schedule it. */
+  if (pipe0_clock < clock && pipe_0 >= 0)
+    schedule_i = pipe_0;
+
+  /* Either we've scheduled a pipe0 insn already or there is no pipe0
+     insn to schedule.  Put a pipe1 insn at the front of the ready list. */
+  else
+    schedule_i = pipe_1;
+
+  if (schedule_i > -1)
+    {
+      insn = ready[schedule_i];
+      ready[schedule_i] = ready[nready - 1];
+      ready[nready - 1] = insn;
+      return 1;
+    }
+  return 0;
+}
+
+/* INSN is dependent on DEP_INSN. */
+static int
+spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+  rtx set;
+
+  /* The blockage pattern is used to prevent instructions from being
+     moved across it and has no cost. */
+  if (INSN_CODE (insn) == CODE_FOR_blockage
+      || INSN_CODE (dep_insn) == CODE_FOR_blockage)
+    return 0;
+
+  if ((INSN_P (insn) && get_attr_length (insn) == 0)
+      || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
+    return 0;
+
+  /* Make sure hbrps are spread out. */
+  if (INSN_CODE (insn) == CODE_FOR_iprefetch
+      && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
+    return 8;
+
+  /* Make sure hints and hbrps are 2 cycles apart. */
+  if ((INSN_CODE (insn) == CODE_FOR_iprefetch
+       || INSN_CODE (insn) == CODE_FOR_hbr)
+       && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
+	   || INSN_CODE (dep_insn) == CODE_FOR_hbr))
+    return 2;
+
+  /* An hbrp has no real dependency on other insns. */
+  if (INSN_CODE (insn) == CODE_FOR_iprefetch
+      || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
+    return 0;
+
+  /* Assuming that it is unlikely an argument register will be used in
+     the first cycle of the called function, we reduce the cost for
+     slightly better scheduling of dep_insn.  When not hinted, the
+     mispredicted branch would hide the cost as well.  */
+  if (CALL_P (insn))
+  {
+    rtx target = get_branch_target (insn);
+    if (GET_CODE (target) != REG || !set_of (target, insn))
+      return cost - 2;
+    return cost;
+  }
+
+  /* And when returning from a function, let's assume the return values
+     are completed sooner too. */
+  if (CALL_P (dep_insn))
+    return cost - 2;
+
+  /* Make sure an instruction that loads from the back chain is schedule
+     away from the return instruction so a hint is more likely to get
+     issued. */
+  if (INSN_CODE (insn) == CODE_FOR__return
+      && (set = single_set (dep_insn))
+      && GET_CODE (SET_DEST (set)) == REG
+      && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
+    return 20;
+
+  /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
+     scheduler makes every insn in a block anti-dependent on the final
+     jump_insn.  We adjust here so higher cost insns will get scheduled
+     earlier. */
+  if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
+    return insn_cost (dep_insn) - 3;
+
+  return cost;
+}
+
+/* Create a CONST_DOUBLE from a string.  */
+struct rtx_def *
+spu_float_const (const char *string, enum machine_mode mode)
+{
+  REAL_VALUE_TYPE value;
+  value = REAL_VALUE_ATOF (string, mode);
+  return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
+}
+
+int
+spu_constant_address_p (rtx x)
+{
+  return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
+	  || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
+	  || GET_CODE (x) == HIGH);
+}
+
+static enum spu_immediate
+which_immediate_load (HOST_WIDE_INT val)
+{
+  gcc_assert (val == trunc_int_for_mode (val, SImode));
+
+  if (val >= -0x8000 && val <= 0x7fff)
+    return SPU_IL;
+  if (val >= 0 && val <= 0x3ffff)
+    return SPU_ILA;
+  if ((val & 0xffff) == ((val >> 16) & 0xffff))
+    return SPU_ILH;
+  if ((val & 0xffff) == 0)
+    return SPU_ILHU;
+
+  return SPU_NONE;
+}
+
+/* Return true when OP can be loaded by one of the il instructions, or
+   when flow2 is not completed and OP can be loaded using ilhu and iohl. */
+int
+immediate_load_p (rtx op, enum machine_mode mode)
+{
+  if (CONSTANT_P (op))
+    {
+      enum immediate_class c = classify_immediate (op, mode);
+      return c == IC_IL1 || c == IC_IL1s
+	     || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
+    }
+  return 0;
+}
+
+/* Return true if the first SIZE bytes of arr is a constant that can be
+   generated with cbd, chd, cwd or cdd.  When non-NULL, PRUN and PSTART
+   represent the size and offset of the instruction to use. */
+static int
+cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
+{
+  int cpat, run, i, start;
+  cpat = 1;
+  run = 0;
+  start = -1;
+  for (i = 0; i < size && cpat; i++)
+    if (arr[i] != i+16)
+      { 
+	if (!run)
+	  {
+	    start = i;
+	    if (arr[i] == 3)
+	      run = 1;
+	    else if (arr[i] == 2 && arr[i+1] == 3)
+	      run = 2;
+	    else if (arr[i] == 0)
+	      {
+		while (arr[i+run] == run && i+run < 16)
+		  run++;
+		if (run != 4 && run != 8)
+		  cpat = 0;
+	      }
+	    else
+	      cpat = 0;
+	    if ((i & (run-1)) != 0)
+	      cpat = 0;
+	    i += run;
+	  }
+	else
+	  cpat = 0;
+      }
+  if (cpat && (run || size < 16))
+    {
+      if (run == 0)
+	run = 1;
+      if (prun)
+	*prun = run;
+      if (pstart)
+	*pstart = start == -1 ? 16-run : start;
+      return 1;
+    }
+  return 0;
+}
+
+/* OP is a CONSTANT_P.  Determine what instructions can be used to load
+   it into a register.  MODE is only valid when OP is a CONST_INT. */
+static enum immediate_class
+classify_immediate (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int i, j, repeated, fsmbi, repeat;
+
+  gcc_assert (CONSTANT_P (op));
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  /* A V4SI const_vector with all identical symbols is ok. */
+  if (!flag_pic
+      && mode == V4SImode
+      && GET_CODE (op) == CONST_VECTOR
+      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
+      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
+      && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
+      && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
+      && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
+    op = CONST_VECTOR_ELT (op, 0);
+
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
+
+    case CONST:
+      /* We can never know if the resulting address fits in 18 bits and can be
+	 loaded with ila.  For now, assume the address will not overflow if
+	 the displacement is "small" (fits 'K' constraint).  */
+      if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
+	{
+	  rtx sym = XEXP (XEXP (op, 0), 0);
+	  rtx cst = XEXP (XEXP (op, 0), 1);
+
+	  if (GET_CODE (sym) == SYMBOL_REF
+	      && GET_CODE (cst) == CONST_INT
+	      && satisfies_constraint_K (cst))
+	    return IC_IL1s;
+	}
+      return IC_IL2s;
+
+    case HIGH:
+      return IC_IL1s;
+
+    case CONST_VECTOR:
+      for (i = 0; i < GET_MODE_NUNITS (mode); i++)
+	if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
+	    && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
+	  return IC_POOL;
+      /* Fall through. */
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      constant_to_array (mode, op, arr);
+
+      /* Check that each 4-byte slot is identical. */
+      repeated = 1;
+      for (i = 4; i < 16; i += 4)
+	for (j = 0; j < 4; j++)
+	  if (arr[j] != arr[i + j])
+	    repeated = 0;
+
+      if (repeated)
+	{
+	  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+	  val = trunc_int_for_mode (val, SImode);
+
+	  if (which_immediate_load (val) != SPU_NONE)
+	    return IC_IL1;
+	}
+
+      /* Any mode of 2 bytes or smaller can be loaded with an il
+         instruction. */
+      gcc_assert (GET_MODE_SIZE (mode) > 2);
+
+      fsmbi = 1;
+      repeat = 0;
+      for (i = 0; i < 16 && fsmbi; i++)
+	if (arr[i] != 0 && repeat == 0)
+	  repeat = arr[i];
+	else if (arr[i] != 0 && arr[i] != repeat)
+	  fsmbi = 0;
+      if (fsmbi)
+	return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
+
+      if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
+	return IC_CPAT;
+
+      if (repeated)
+	return IC_IL2;
+
+      return IC_POOL;
+    default:
+      break;
+    }
+  gcc_unreachable ();
+}
+
+static enum spu_immediate
+which_logical_immediate (HOST_WIDE_INT val)
+{
+  gcc_assert (val == trunc_int_for_mode (val, SImode));
+
+  if (val >= -0x200 && val <= 0x1ff)
+    return SPU_ORI;
+  if (val >= 0 && val <= 0xffff)
+    return SPU_IOHL;
+  if ((val & 0xffff) == ((val >> 16) & 0xffff))
+    {
+      val = trunc_int_for_mode (val, HImode);
+      if (val >= -0x200 && val <= 0x1ff)
+	return SPU_ORHI;
+      if ((val & 0xff) == ((val >> 8) & 0xff))
+	{
+	  val = trunc_int_for_mode (val, QImode);
+	  if (val >= -0x200 && val <= 0x1ff)
+	    return SPU_ORBI;
+	}
+    }
+  return SPU_NONE;
+}
+
+/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
+   CONST_DOUBLEs. */
+static int
+const_vector_immediate_p (rtx x)
+{
+  int i;
+  gcc_assert (GET_CODE (x) == CONST_VECTOR);
+  for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
+    if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
+	&& GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
+      return 0;
+  return 1;
+}
+
+int
+logical_immediate_p (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  /* Check that bytes are repeated. */
+  for (i = 4; i < 16; i += 4)
+    for (j = 0; j < 4; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+  val = trunc_int_for_mode (val, SImode);
+
+  i = which_logical_immediate (val);
+  return i != SPU_NONE && i != SPU_IOHL;
+}
+
+int
+iohl_immediate_p (rtx op, enum machine_mode mode)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  /* Check that bytes are repeated. */
+  for (i = 4; i < 16; i += 4)
+    for (j = 0; j < 4; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+  val = trunc_int_for_mode (val, SImode);
+
+  return val >= 0 && val <= 0xffff;
+}
+
+int
+arith_immediate_p (rtx op, enum machine_mode mode,
+		   HOST_WIDE_INT low, HOST_WIDE_INT high)
+{
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int bytes, i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  bytes = GET_MODE_SIZE (mode);
+  mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+
+  /* Check that bytes are repeated. */
+  for (i = bytes; i < 16; i += bytes)
+    for (j = 0; j < bytes; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = arr[0];
+  for (j = 1; j < bytes; j++)
+    val = (val << 8) | arr[j];
+
+  val = trunc_int_for_mode (val, mode);
+
+  return val >= low && val <= high;
+}
+
+/* TRUE when op is an immediate and an exact power of 2, and given that
+   OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
+   all entries must be the same. */
+bool
+exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
+{
+  enum machine_mode int_mode;
+  HOST_WIDE_INT val;
+  unsigned char arr[16];
+  int bytes, i, j;
+
+  gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
+	      || GET_CODE (op) == CONST_VECTOR);
+
+  if (GET_CODE (op) == CONST_VECTOR
+      && !const_vector_immediate_p (op))
+    return 0;
+
+  if (GET_MODE (op) != VOIDmode)
+    mode = GET_MODE (op);
+
+  constant_to_array (mode, op, arr);
+
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  bytes = GET_MODE_SIZE (mode);
+  int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+
+  /* Check that bytes are repeated. */
+  for (i = bytes; i < 16; i += bytes)
+    for (j = 0; j < bytes; j++)
+      if (arr[j] != arr[i + j])
+	return 0;
+
+  val = arr[0];
+  for (j = 1; j < bytes; j++)
+    val = (val << 8) | arr[j];
+
+  val = trunc_int_for_mode (val, int_mode);
+
+  /* Currently, we only handle SFmode */
+  gcc_assert (mode == SFmode);
+  if (mode == SFmode)
+    {
+      int exp = (val >> 23) - 127;
+      return val > 0 && (val & 0x007fffff) == 0
+	     &&  exp >= low && exp <= high;
+    }
+  return FALSE;
+}
+
+/* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
+
+static int
+ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *px;
+  tree decl;
+
+  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+    {
+      rtx plus = XEXP (x, 0);
+      rtx op0 = XEXP (plus, 0);
+      rtx op1 = XEXP (plus, 1);
+      if (GET_CODE (op1) == CONST_INT)
+	x = op0;
+    }
+
+  return (GET_CODE (x) == SYMBOL_REF
+ 	  && (decl = SYMBOL_REF_DECL (x)) != 0
+ 	  && TREE_CODE (decl) == VAR_DECL
+ 	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
+}
+
+/* We accept:
+   - any 32-bit constant (SImode, SFmode)
+   - any constant that can be generated with fsmbi (any mode)
+   - a 64-bit constant where the high and low bits are identical
+     (DImode, DFmode)
+   - a 128-bit constant where the four 32-bit words match.  */
+int
+spu_legitimate_constant_p (rtx x)
+{
+  if (GET_CODE (x) == HIGH)
+    x = XEXP (x, 0);
+
+  /* Reject any __ea qualified reference.  These can't appear in
+     instructions but must be forced to the constant pool.  */
+  if (for_each_rtx (&x, ea_symbol_ref, 0))
+    return 0;
+
+  /* V4SI with all identical symbols is valid. */
+  if (!flag_pic
+      && GET_MODE (x) == V4SImode
+      && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
+	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
+	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
+    return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
+	   && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
+	   && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
+
+  if (GET_CODE (x) == CONST_VECTOR
+      && !const_vector_immediate_p (x))
+    return 0;
+  return 1;
+}
+
+/* Valid address are:
+   - symbol_ref, label_ref, const
+   - reg
+   - reg + const_int, where const_int is 16 byte aligned
+   - reg + reg, alignment doesn't matter
+  The alignment matters in the reg+const case because lqd and stqd
+  ignore the 4 least significant bits of the const.  We only care about
+  16 byte modes because the expand phase will change all smaller MEM
+  references to TImode.  */
+static bool
+spu_legitimate_address_p (enum machine_mode mode,
+			  rtx x, bool reg_ok_strict)
+{
+  int aligned = GET_MODE_SIZE (mode) >= 16;
+  if (aligned
+      && GET_CODE (x) == AND
+      && GET_CODE (XEXP (x, 1)) == CONST_INT
+      && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
+    x = XEXP (x, 0);
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+      return !TARGET_LARGE_MEM;
+
+    case SYMBOL_REF:
+    case CONST:
+      /* Keep __ea references until reload so that spu_expand_mov can see them
+	 in MEMs.  */
+      if (ea_symbol_ref (&x, 0))
+	return !reload_in_progress && !reload_completed;
+      return !TARGET_LARGE_MEM;
+
+    case CONST_INT:
+      return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
+
+    case SUBREG:
+      x = XEXP (x, 0);
+      if (REG_P (x))
+	return 0;
+
+    case REG:
+      return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
+
+    case PLUS:
+    case LO_SUM:
+      {
+	rtx op0 = XEXP (x, 0);
+	rtx op1 = XEXP (x, 1);
+	if (GET_CODE (op0) == SUBREG)
+	  op0 = XEXP (op0, 0);
+	if (GET_CODE (op1) == SUBREG)
+	  op1 = XEXP (op1, 0);
+	if (GET_CODE (op0) == REG
+	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
+	    && GET_CODE (op1) == CONST_INT
+	    && INTVAL (op1) >= -0x2000
+	    && INTVAL (op1) <= 0x1fff
+	    && (!aligned || (INTVAL (op1) & 15) == 0))
+	  return TRUE;
+	if (GET_CODE (op0) == REG
+	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
+	    && GET_CODE (op1) == REG
+	    && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
+	  return TRUE;
+      }
+      break;
+
+    default:
+      break;
+    }
+  return FALSE;
+}
+
+/* Like spu_legitimate_address_p, except with named addresses.  */
+static bool
+spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+				     bool reg_ok_strict, addr_space_t as)
+{
+  if (as == ADDR_SPACE_EA)
+    return (REG_P (x) && (GET_MODE (x) == EAmode));
+
+  else if (as != ADDR_SPACE_GENERIC)
+    gcc_unreachable ();
+
+  return spu_legitimate_address_p (mode, x, reg_ok_strict);
+}
+
+/* When the address is reg + const_int, force the const_int into a
+   register.  */
+rtx
+spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx op0, op1;
+  /* Make sure both operands are registers.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (ALIGNED_SYMBOL_REF_P (op0))
+	{
+	  op0 = force_reg (Pmode, op0);
+	  mark_reg_pointer (op0, 128);
+	}
+      else if (GET_CODE (op0) != REG)
+	op0 = force_reg (Pmode, op0);
+      if (ALIGNED_SYMBOL_REF_P (op1))
+	{
+	  op1 = force_reg (Pmode, op1);
+	  mark_reg_pointer (op1, 128);
+	}
+      else if (GET_CODE (op1) != REG)
+	op1 = force_reg (Pmode, op1);
+      x = gen_rtx_PLUS (Pmode, op0, op1);
+    }
+  return x;
+}
+
+/* Like spu_legitimate_address, except with named address support.  */
+static rtx
+spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+				   addr_space_t as)
+{
+  if (as != ADDR_SPACE_GENERIC)
+    return x;
+
+  return spu_legitimize_address (x, oldx, mode);
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+spu_handle_fndecl_attribute (tree * node,
+			     tree name,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (0, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle the "vector" attribute.  */
+static tree
+spu_handle_vector_attribute (tree * node, tree name,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
+{
+  tree type = *node, result = NULL_TREE;
+  enum machine_mode mode;
+  int unsigned_p;
+
+  while (POINTER_TYPE_P (type)
+	 || TREE_CODE (type) == FUNCTION_TYPE
+	 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
+    type = TREE_TYPE (type);
+
+  mode = TYPE_MODE (type);
+
+  unsigned_p = TYPE_UNSIGNED (type);
+  switch (mode)
+    {
+    case DImode:
+      result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+      break;
+    case SImode:
+      result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+      break;
+    case HImode:
+      result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+      break;
+    case QImode:
+      result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+      break;
+    case SFmode:
+      result = V4SF_type_node;
+      break;
+    case DFmode:
+      result = V2DF_type_node;
+      break;
+    default:
+      break;
+    }
+
+  /* Propagate qualifiers attached to the element type
+     onto the vector type.  */
+  if (result && result != type && TYPE_QUALS (type))
+    result = build_qualified_type (result, TYPE_QUALS (type));
+
+  *no_add_attrs = true;		/* No need to hang on to the attribute.  */
+
+  if (!result)
+    warning (0, "%qE attribute ignored", name);
+  else
+    *node = lang_hooks.types.reconstruct_complex_type (*node, result);
+
+  return NULL_TREE;
+}
+
+/* Return nonzero if FUNC is a naked function.  */
+static int
+spu_naked_function_p (tree func)
+{
+  tree a;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    abort ();
+
+  a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
+  return a != NULL_TREE;
+}
+
+int
+spu_initial_elimination_offset (int from, int to)
+{
+  int saved_regs_size = spu_saved_regs_size ();
+  int sp_offset = 0;
+  if (!current_function_is_leaf || crtl->outgoing_args_size
+      || get_frame_size () || saved_regs_size)
+    sp_offset = STACK_POINTER_OFFSET;
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return get_frame_size () + crtl->outgoing_args_size + sp_offset;
+  else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return get_frame_size ();
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return sp_offset + crtl->outgoing_args_size
+      + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return get_frame_size () + saved_regs_size + sp_offset;
+  else
+    gcc_unreachable ();
+}
+
+rtx
+spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (type);
+  int byte_size = ((mode == BLKmode)
+		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+
+  /* Make sure small structs are left justified in a register. */
+  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
+      && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
+    {
+      enum machine_mode smode;
+      rtvec v;
+      int i;
+      int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+      int n = byte_size / UNITS_PER_WORD;
+      v = rtvec_alloc (nregs);
+      for (i = 0; i < n; i++)
+	{
+	  RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
+						gen_rtx_REG (TImode,
+							     FIRST_RETURN_REGNUM
+							     + i),
+						GEN_INT (UNITS_PER_WORD * i));
+	  byte_size -= UNITS_PER_WORD;
+	}
+
+      if (n < nregs)
+	{
+	  if (byte_size < 4)
+	    byte_size = 4;
+	  smode =
+	    smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
+	  RTVEC_ELT (v, n) =
+	    gen_rtx_EXPR_LIST (VOIDmode,
+			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
+			       GEN_INT (UNITS_PER_WORD * n));
+	}
+      return gen_rtx_PARALLEL (mode, v);
+    }
+  return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
+}
+
+static rtx
+spu_function_arg (CUMULATIVE_ARGS *cum,
+		  enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int byte_size;
+
+  if (*cum >= MAX_REGISTER_ARGS)
+    return 0;
+
+  byte_size = ((mode == BLKmode)
+	       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
+
+  /* The ABI does not allow parameters to be passed partially in
+     reg and partially in stack. */
+  if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
+    return 0;
+
+  /* Make sure small structs are left justified in a register. */
+  if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
+      && byte_size < UNITS_PER_WORD && byte_size > 0)
+    {
+      enum machine_mode smode;
+      rtx gr_reg;
+      if (byte_size < 4)
+	byte_size = 4;
+      smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
+      gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
+				  const0_rtx);
+      return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
+    }
+  else
+    return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
+}
+
+static void
+spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	   ? 1
+	   : mode == BLKmode
+	   ? ((int_size_in_bytes (type) + 15) / 16)
+	   : mode == VOIDmode
+	   ? 1
+	   : HARD_REGNO_NREGS (cum, mode));
+}
+
+/* Variable sized types are passed by reference.  */
+static bool
+spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
+}
+
+
+/* Var args. */
+
+/* Create and return the va_list datatype.
+
+   On SPU, va_list is an array type equivalent to
+
+      typedef struct __va_list_tag
+        {
+            void *__args __attribute__((__aligned(16)));
+            void *__skip __attribute__((__aligned(16)));
+            
+        } va_list[1];
+
+   where __args points to the arg that will be returned by the next
+   va_arg(), and __skip points to the previous stack frame such that
+   when __args == __skip we should advance __args by 32 bytes. */
+static tree
+spu_build_builtin_va_list (void)
+{
+  tree f_args, f_skip, record, type_decl;
+  bool owp;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+
+  type_decl =
+    build_decl (BUILTINS_LOCATION,
+		TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_args = build_decl (BUILTINS_LOCATION,
+		       FIELD_DECL, get_identifier ("__args"), ptr_type_node);
+  f_skip = build_decl (BUILTINS_LOCATION,
+		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_args) = record;
+  DECL_ALIGN (f_args) = 128;
+  DECL_USER_ALIGN (f_args) = 1;
+
+  DECL_FIELD_CONTEXT (f_skip) = record;
+  DECL_ALIGN (f_skip) = 128;
+  DECL_USER_ALIGN (f_skip) = 1;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_args;
+  DECL_CHAIN (f_args) = f_skip;
+
+  /* We know this is being padded and we want it too.  It is an internal
+     type so hide the warnings from the user. */
+  owp = warn_padded;
+  warn_padded = false;
+
+  layout_type (record);
+
+  warn_padded = owp;
+
+  /* The correct type is an array type of one element.  */
+  return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Implement va_start by filling the va_list structure VALIST.
+   NEXTARG points to the first anonymous stack argument.
+
+   The following global variables are used to initialize
+   the va_list structure:
+
+     crtl->args.info;
+       the CUMULATIVE_ARGS for this function
+
+     crtl->args.arg_offset_rtx:
+       holds the offset of the first anonymous stack argument
+       (relative to the virtual arg pointer).  */
+
+static void
+spu_va_start (tree valist, rtx nextarg)
+{
+  tree f_args, f_skip;
+  tree args, skip, t;
+
+  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_skip = DECL_CHAIN (f_args);
+
+  valist = build_simple_mem_ref (valist);
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  /* Find the __args area.  */
+  t = make_tree (TREE_TYPE (args), nextarg);
+  if (crtl->args.pretend_args_size > 0)
+    t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
+		size_int (-STACK_POINTER_OFFSET));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Find the __skip area.  */
+  t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
+  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
+	      size_int (crtl->args.pretend_args_size
+			 - STACK_POINTER_OFFSET));
+  t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Gimplify va_arg by updating the va_list structure 
+   VALIST as required to retrieve an argument of type
+   TYPE, and returning that argument. 
+   
+   ret = va_arg(VALIST, TYPE);
+
+   generates code equivalent to:
+   
+    paddedsize = (sizeof(TYPE) + 15) & -16;
+    if (VALIST.__args + paddedsize > VALIST.__skip
+	&& VALIST.__args <= VALIST.__skip)
+      addr = VALIST.__skip + 32;
+    else
+      addr = VALIST.__args;
+    VALIST.__args = addr + paddedsize;
+    ret = *(TYPE *)addr;
+ */
+static tree
+spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
+			  gimple_seq * post_p ATTRIBUTE_UNUSED)
+{
+  tree f_args, f_skip;
+  tree args, skip;
+  HOST_WIDE_INT size, rsize;
+  tree paddedsize, addr, tmp;
+  bool pass_by_reference_p;
+
+  f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
+  f_skip = DECL_CHAIN (f_args);
+
+  valist = build_simple_mem_ref (valist);
+  args =
+    build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
+  skip =
+    build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
+
+  addr = create_tmp_var (ptr_type_node, "va_arg");
+
+  /* if an object is dynamically sized, a pointer to it is passed
+     instead of the object itself. */
+  pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
+					       false);
+  if (pass_by_reference_p)
+    type = build_pointer_type (type);
+  size = int_size_in_bytes (type);
+  rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
+
+  /* build conditional expression to calculate addr. The expression
+     will be gimplified later. */
+  paddedsize = size_int (rsize);
+  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
+  tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
+		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
+		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
+		unshare_expr (skip)));
+
+  tmp = build3 (COND_EXPR, ptr_type_node, tmp,
+		build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
+			size_int (32)), unshare_expr (args));
+
+  gimplify_assign (addr, tmp, pre_p);
+
+  /* update VALIST.__args */
+  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
+  gimplify_assign (unshare_expr (args), tmp, pre_p);
+
+  addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
+		       addr);
+
+  if (pass_by_reference_p)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Save parameter registers starting with the register that corresponds
+   to the first unnamed parameters.  If the first unnamed parameter is
+   in the stack then save no registers.  Set pretend_args_size to the
+   amount of space needed to save the registers. */
+void
+spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+			    tree type, int *pretend_size, int no_rtl)
+{
+  if (!no_rtl)
+    {
+      rtx tmp;
+      int regno;
+      int offset;
+      int ncum = *cum;
+
+      /* cum currently points to the last named argument, we want to
+         start at the next argument. */
+      spu_function_arg_advance (&ncum, mode, type, true);
+
+      offset = -STACK_POINTER_OFFSET;
+      for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
+	{
+	  tmp = gen_frame_mem (V4SImode,
+			       plus_constant (virtual_incoming_args_rtx,
+					      offset));
+	  emit_move_insn (tmp,
+			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
+	  offset += 16;
+	}
+      *pretend_size = offset + STACK_POINTER_OFFSET;
+    }
+}
+
+static void
+spu_conditional_register_usage (void)
+{
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+}
+
+/* This is called any time we inspect the alignment of a register for
+   addresses.  */
+static int
+reg_aligned_for_addr (rtx x)
+{
+  int regno =
+    REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
+  return REGNO_POINTER_ALIGN (regno) >= 128;
+}
+
+/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
+   into its SYMBOL_REF_FLAGS.  */
+static void
+spu_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  /* If a variable has a forced alignment to < 16 bytes, mark it with
+     SYMBOL_FLAG_ALIGN1.  */
+  if (TREE_CODE (decl) == VAR_DECL
+      && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
+}
+
+/* Return TRUE if we are certain the mem refers to a complete object
+   which is both 16-byte aligned and padded to a 16-byte boundary.  This
+   would make it safe to store with a single instruction. 
+   We guarantee the alignment and padding for static objects by aligning
+   all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
+   FIXME: We currently cannot guarantee this for objects on the stack
+   because assign_parm_setup_stack calls assign_stack_local with the
+   alignment of the parameter mode and in that case the alignment never
+   gets adjusted by LOCAL_ALIGNMENT. */
+static int
+store_with_one_insn_p (rtx mem)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx addr = XEXP (mem, 0);
+  if (mode == BLKmode)
+    return 0;
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 1;
+  /* Only static objects. */
+  if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      /* We use the associated declaration to make sure the access is
+         referring to the whole object.
+         We check both MEM_EXPR and and SYMBOL_REF_DECL.  I'm not sure
+         if it is necessary.  Will there be cases where one exists, and
+         the other does not?  Will there be cases where both exist, but
+         have different types?  */
+      tree decl = MEM_EXPR (mem);
+      if (decl
+	  && TREE_CODE (decl) == VAR_DECL
+	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
+	return 1;
+      decl = SYMBOL_REF_DECL (addr);
+      if (decl
+	  && TREE_CODE (decl) == VAR_DECL
+	  && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
+	return 1;
+    }
+  return 0;
+}
+
+/* Return 1 when the address is not valid for a simple load and store as
+   required by the '_mov*' patterns.   We could make this less strict
+   for loads, but we prefer mem's to look the same so they are more
+   likely to be merged.  */
+static int
+address_needs_split (rtx mem)
+{
+  if (GET_MODE_SIZE (GET_MODE (mem)) < 16
+      && (GET_MODE_SIZE (GET_MODE (mem)) < 4
+	  || !(store_with_one_insn_p (mem)
+	       || mem_is_padded_component_ref (mem))))
+    return 1;
+
+  return 0;
+}
+
+static GTY(()) rtx cache_fetch;		  /* __cache_fetch function */
+static GTY(()) rtx cache_fetch_dirty;	  /* __cache_fetch_dirty function */
+static alias_set_type ea_alias_set = -1;  /* alias set for __ea memory */
+
+/* MEM is known to be an __ea qualified memory access.  Emit a call to
+   fetch the ppu memory to local store, and return its address in local
+   store.  */
+
+static void
+ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+  if (is_store)
+    {
+      rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
+      if (!cache_fetch_dirty)
+	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
+      emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
+			       2, ea_addr, EAmode, ndirty, SImode);
+    }
+  else
+    {
+      if (!cache_fetch)
+	cache_fetch = init_one_libfunc ("__cache_fetch");
+      emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
+			       1, ea_addr, EAmode);
+    }
+}
+
+/* Like ea_load_store, but do the cache tag comparison and, for stores,
+   dirty bit marking, inline.
+
+   The cache control data structure is an array of
+
+   struct __cache_tag_array
+     {
+        unsigned int tag_lo[4];
+        unsigned int tag_hi[4];
+        void *data_pointer[4];
+        int reserved[4];
+        vector unsigned short dirty_bits[4];
+     }  */
+
+static void
+ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
+{
+  rtx ea_addr_si;
+  HOST_WIDE_INT v;
+  rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
+  rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
+  rtx index_mask = gen_reg_rtx (SImode);
+  rtx tag_arr = gen_reg_rtx (Pmode);
+  rtx splat_mask = gen_reg_rtx (TImode);
+  rtx splat = gen_reg_rtx (V4SImode);
+  rtx splat_hi = NULL_RTX;
+  rtx tag_index = gen_reg_rtx (Pmode);
+  rtx block_off = gen_reg_rtx (SImode);
+  rtx tag_addr = gen_reg_rtx (Pmode);
+  rtx tag = gen_reg_rtx (V4SImode);
+  rtx cache_tag = gen_reg_rtx (V4SImode);
+  rtx cache_tag_hi = NULL_RTX;
+  rtx cache_ptrs = gen_reg_rtx (TImode);
+  rtx cache_ptrs_si = gen_reg_rtx (SImode);
+  rtx tag_equal = gen_reg_rtx (V4SImode);
+  rtx tag_equal_hi = NULL_RTX;
+  rtx tag_eq_pack = gen_reg_rtx (V4SImode);
+  rtx tag_eq_pack_si = gen_reg_rtx (SImode);
+  rtx eq_index = gen_reg_rtx (SImode);
+  rtx bcomp, hit_label, hit_ref, cont_label, insn;
+
+  if (spu_ea_model != 32)
+    {
+      splat_hi = gen_reg_rtx (V4SImode);
+      cache_tag_hi = gen_reg_rtx (V4SImode);
+      tag_equal_hi = gen_reg_rtx (V4SImode);
+    }
+
+  emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
+  emit_move_insn (tag_arr, tag_arr_sym);
+  v = 0x0001020300010203LL;
+  emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
+  ea_addr_si = ea_addr;
+  if (spu_ea_model != 32)
+    ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
+
+  /* tag_index = ea_addr & (tag_array_size - 128)  */
+  emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
+
+  /* splat ea_addr to all 4 slots.  */
+  emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
+  /* Similarly for high 32 bits of ea_addr.  */
+  if (spu_ea_model != 32)
+    emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
+
+  /* block_off = ea_addr & 127  */
+  emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
+
+  /* tag_addr = tag_arr + tag_index  */
+  emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
+
+  /* Read cache tags.  */
+  emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
+  if (spu_ea_model != 32)
+    emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
+					       plus_constant (tag_addr, 16)));
+
+  /* tag = ea_addr & -128  */
+  emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
+
+  /* Read all four cache data pointers.  */
+  emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
+					   plus_constant (tag_addr, 32)));
+
+  /* Compare tags.  */
+  emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
+  if (spu_ea_model != 32)
+    {
+      emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
+      emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
+    }
+
+  /* At most one of the tags compare equal, so tag_equal has one
+     32-bit slot set to all 1's, with the other slots all zero.
+     gbb picks off low bit from each byte in the 128-bit registers,
+     so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
+     we have a hit.  */
+  emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
+  emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
+
+  /* So counting leading zeros will set eq_index to 16, 20, 24 or 28.  */
+  emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
+
+  /* Allowing us to rotate the corresponding cache data pointer to slot0.
+     (rotating eq_index mod 16 bytes).  */
+  emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
+  emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
+
+  /* Add block offset to form final data address.  */
+  emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
+
+  /* Check that we did hit.  */
+  hit_label = gen_label_rtx ();
+  hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
+  bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+							    hit_ref, pc_rtx)));
+  /* Say that this branch is very likely to happen.  */
+  v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
+  add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
+
+  ea_load_store (mem, is_store, ea_addr, data_addr);
+  cont_label = gen_label_rtx ();
+  emit_jump_insn (gen_jump (cont_label));
+  emit_barrier ();
+
+  emit_label (hit_label);
+
+  if (is_store)
+    {
+      HOST_WIDE_INT v_hi;
+      rtx dirty_bits = gen_reg_rtx (TImode);
+      rtx dirty_off = gen_reg_rtx (SImode);
+      rtx dirty_128 = gen_reg_rtx (TImode);
+      rtx neg_block_off = gen_reg_rtx (SImode);
+
+      /* Set up mask with one dirty bit per byte of the mem we are
+	 writing, starting from top bit.  */
+      v_hi = v = -1;
+      v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
+      if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
+	{
+	  v_hi = v;
+	  v = 0;
+	}
+      emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
+
+      /* Form index into cache dirty_bits.  eq_index is one of
+	 0x10, 0x14, 0x18 or 0x1c.  Multiplying by 4 gives us
+	 0x40, 0x50, 0x60 or 0x70 which just happens to be the
+	 offset to each of the four dirty_bits elements.  */
+      emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
+
+      emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
+
+      /* Rotate bit mask to proper bit.  */
+      emit_insn (gen_negsi2 (neg_block_off, block_off));
+      emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
+      emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
+
+      /* Or in the new dirty bits.  */
+      emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
+
+      /* Store.  */
+      emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
+    }
+
+  emit_label (cont_label);
+}
+
+static rtx
+expand_ea_mem (rtx mem, bool is_store)
+{
+  rtx ea_addr;
+  rtx data_addr = gen_reg_rtx (Pmode);
+  rtx new_mem;
+
+  ea_addr = force_reg (EAmode, XEXP (mem, 0));
+  if (optimize_size || optimize == 0)
+    ea_load_store (mem, is_store, ea_addr, data_addr);
+  else
+    ea_load_store_inline (mem, is_store, ea_addr, data_addr);
+
+  if (ea_alias_set == -1)
+    ea_alias_set = new_alias_set ();
+
+  /* We generate a new MEM RTX to refer to the copy of the data
+     in the cache.  We do not copy memory attributes (except the
+     alignment) from the original MEM, as they may no longer apply
+     to the cache copy.  */
+  new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
+  set_mem_alias_set (new_mem, ea_alias_set);
+  set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
+
+  return new_mem;
+}
+
+int
+spu_expand_mov (rtx * ops, enum machine_mode mode)
+{
+  if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
+    {
+      /* Perform the move in the destination SUBREG's inner mode.  */
+      ops[0] = SUBREG_REG (ops[0]);
+      mode = GET_MODE (ops[0]);
+      ops[1] = gen_lowpart_common (mode, ops[1]);
+      gcc_assert (ops[1]);
+    }
+
+  if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
+    {
+      rtx from = SUBREG_REG (ops[1]);
+      enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
+
+      gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
+		  && GET_MODE_CLASS (imode) == MODE_INT
+		  && subreg_lowpart_p (ops[1]));
+
+      if (GET_MODE_SIZE (imode) < 4)
+	imode = SImode;
+      if (imode != GET_MODE (from))
+	from = gen_rtx_SUBREG (imode, from, 0);
+
+      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
+	{
+	  enum insn_code icode = convert_optab_handler (trunc_optab,
+							mode, imode);
+	  emit_insn (GEN_FCN (icode) (ops[0], from));
+	}
+      else
+	emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
+      return 1;
+    }
+
+  /* At least one of the operands needs to be a register. */
+  if ((reload_in_progress | reload_completed) == 0
+      && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
+    {
+      rtx temp = force_reg (mode, ops[1]);
+      emit_move_insn (ops[0], temp);
+      return 1;
+    }
+  if (reload_in_progress || reload_completed)
+    {
+      if (CONSTANT_P (ops[1]))
+	return spu_split_immediate (ops);
+      return 0;
+    }
+
+  /* Catch the SImode immediates greater than 0x7fffffff, and sign
+     extend them. */
+  if (GET_CODE (ops[1]) == CONST_INT)
+    {
+      HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
+      if (val != INTVAL (ops[1]))
+	{
+	  emit_move_insn (ops[0], GEN_INT (val));
+	  return 1;
+	}
+    }
+  if (MEM_P (ops[0]))
+    {
+      if (MEM_ADDR_SPACE (ops[0]))
+	ops[0] = expand_ea_mem (ops[0], true);
+      return spu_split_store (ops);
+    }
+  if (MEM_P (ops[1]))
+    {
+      if (MEM_ADDR_SPACE (ops[1]))
+	ops[1] = expand_ea_mem (ops[1], false);
+      return spu_split_load (ops);
+    }
+
+  return 0;
+}
+
+static void
+spu_convert_move (rtx dst, rtx src)
+{
+  enum machine_mode mode = GET_MODE (dst);
+  enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+  rtx reg;
+  gcc_assert (GET_MODE (src) == TImode);
+  reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
+  emit_insn (gen_rtx_SET (VOIDmode, reg,
+	       gen_rtx_TRUNCATE (int_mode,
+		 gen_rtx_LSHIFTRT (TImode, src,
+		   GEN_INT (int_mode == DImode ? 64 : 96)))));
+  if (int_mode != mode)
+    {
+      reg = simplify_gen_subreg (mode, reg, int_mode, 0);
+      emit_move_insn (dst, reg);
+    }
+}
+
+/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
+   the address from SRC and SRC+16.  Return a REG or CONST_INT that 
+   specifies how many bytes to rotate the loaded registers, plus any
+   extra from EXTRA_ROTQBY.  The address and rotate amounts are
+   normalized to improve merging of loads and rotate computations. */
+static rtx
+spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
+{
+  rtx addr = XEXP (src, 0);
+  rtx p0, p1, rot, addr0, addr1;
+  int rot_amt;
+
+  rot = 0;
+  rot_amt = 0;
+
+  if (MEM_ALIGN (src) >= 128)
+    /* Address is already aligned; simply perform a TImode load.  */ ;
+  else if (GET_CODE (addr) == PLUS)
+    {
+      /* 8 cases:
+         aligned reg   + aligned reg     => lqx
+         aligned reg   + unaligned reg   => lqx, rotqby
+         aligned reg   + aligned const   => lqd
+         aligned reg   + unaligned const => lqd, rotqbyi
+         unaligned reg + aligned reg     => lqx, rotqby
+         unaligned reg + unaligned reg   => lqx, a, rotqby (1 scratch)
+         unaligned reg + aligned const   => lqd, rotqby
+         unaligned reg + unaligned const -> not allowed by legitimate address
+       */
+      p0 = XEXP (addr, 0);
+      p1 = XEXP (addr, 1);
+      if (!reg_aligned_for_addr (p0))
+	{
+	  if (REG_P (p1) && !reg_aligned_for_addr (p1))
+	    {
+	      rot = gen_reg_rtx (SImode);
+	      emit_insn (gen_addsi3 (rot, p0, p1));
+	    }
+	  else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
+	    {
+	      if (INTVAL (p1) > 0
+		  && REG_POINTER (p0)
+		  && INTVAL (p1) * BITS_PER_UNIT
+		     < REGNO_POINTER_ALIGN (REGNO (p0)))
+		{
+		  rot = gen_reg_rtx (SImode);
+		  emit_insn (gen_addsi3 (rot, p0, p1));
+		  addr = p0;
+		}
+	      else
+		{
+		  rtx x = gen_reg_rtx (SImode);
+		  emit_move_insn (x, p1);
+		  if (!spu_arith_operand (p1, SImode))
+		    p1 = x;
+		  rot = gen_reg_rtx (SImode);
+		  emit_insn (gen_addsi3 (rot, p0, p1));
+		  addr = gen_rtx_PLUS (Pmode, p0, x);
+		}
+	    }
+	  else
+	    rot = p0;
+	}
+      else
+	{
+	  if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
+	    {
+	      rot_amt = INTVAL (p1) & 15;
+	      if (INTVAL (p1) & -16)
+		{
+		  p1 = GEN_INT (INTVAL (p1) & -16);
+		  addr = gen_rtx_PLUS (SImode, p0, p1);
+		}
+	      else
+		addr = p0;
+	    }
+	  else if (REG_P (p1) && !reg_aligned_for_addr (p1))
+	    rot = p1;
+	}
+    }
+  else if (REG_P (addr))
+    {
+      if (!reg_aligned_for_addr (addr))
+	rot = addr;
+    }
+  else if (GET_CODE (addr) == CONST)
+    {
+      if (GET_CODE (XEXP (addr, 0)) == PLUS
+	  && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
+	  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+	{
+	  rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
+	  if (rot_amt & -16)
+	    addr = gen_rtx_CONST (Pmode,
+				  gen_rtx_PLUS (Pmode,
+						XEXP (XEXP (addr, 0), 0),
+						GEN_INT (rot_amt & -16)));
+	  else
+	    addr = XEXP (XEXP (addr, 0), 0);
+	}
+      else
+	{
+	  rot = gen_reg_rtx (Pmode);
+	  emit_move_insn (rot, addr);
+	}
+    }
+  else if (GET_CODE (addr) == CONST_INT)
+    {
+      rot_amt = INTVAL (addr);
+      addr = GEN_INT (rot_amt & -16);
+    }
+  else if (!ALIGNED_SYMBOL_REF_P (addr))
+    {
+      rot = gen_reg_rtx (Pmode);
+      emit_move_insn (rot, addr);
+    }
+
+  rot_amt += extra_rotby;
+
+  rot_amt &= 15;
+
+  if (rot && rot_amt)
+    {
+      rtx x = gen_reg_rtx (SImode);
+      emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
+      rot = x;
+      rot_amt = 0;
+    }
+  if (!rot && rot_amt)
+    rot = GEN_INT (rot_amt);
+
+  addr0 = copy_rtx (addr);
+  addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
+  emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
+
+  if (dst1)
+    {
+      addr1 = plus_constant (copy_rtx (addr), 16);
+      addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
+      emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
+    }
+
+  return rot;
+}
+
+int
+spu_split_load (rtx * ops)
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  rtx addr, load, rot;
+  int rot_amt;
+
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 0;
+
+  addr = XEXP (ops[1], 0);
+  gcc_assert (GET_CODE (addr) != AND);
+
+  if (!address_needs_split (ops[1]))
+    {
+      ops[1] = change_address (ops[1], TImode, addr);
+      load = gen_reg_rtx (TImode);
+      emit_insn (gen__movti (load, ops[1]));
+      spu_convert_move (ops[0], load);
+      return 1;
+    }
+
+  rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
+
+  load = gen_reg_rtx (TImode);
+  rot = spu_expand_load (load, 0, ops[1], rot_amt);
+
+  if (rot)
+    emit_insn (gen_rotqby_ti (load, load, rot));
+
+  spu_convert_move (ops[0], load);
+  return 1;
+}
+
+int
+spu_split_store (rtx * ops)
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  rtx reg;
+  rtx addr, p0, p1, p1_lo, smem;
+  int aform;
+  int scalar;
+
+  if (GET_MODE_SIZE (mode) >= 16)
+    return 0;
+
+  addr = XEXP (ops[0], 0);
+  gcc_assert (GET_CODE (addr) != AND);
+
+  if (!address_needs_split (ops[0]))
+    {
+      reg = gen_reg_rtx (TImode);
+      emit_insn (gen_spu_convert (reg, ops[1]));
+      ops[0] = change_address (ops[0], TImode, addr);
+      emit_move_insn (ops[0], reg);
+      return 1;
+    }
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      /* 8 cases:
+         aligned reg   + aligned reg     => lqx, c?x, shuf, stqx
+         aligned reg   + unaligned reg   => lqx, c?x, shuf, stqx
+         aligned reg   + aligned const   => lqd, c?d, shuf, stqx
+         aligned reg   + unaligned const => lqd, c?d, shuf, stqx
+         unaligned reg + aligned reg     => lqx, c?x, shuf, stqx
+         unaligned reg + unaligned reg   => lqx, c?x, shuf, stqx
+         unaligned reg + aligned const   => lqd, c?d, shuf, stqx
+         unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
+       */
+      aform = 0;
+      p0 = XEXP (addr, 0);
+      p1 = p1_lo = XEXP (addr, 1);
+      if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
+	{
+	  p1_lo = GEN_INT (INTVAL (p1) & 15);
+	  if (reg_aligned_for_addr (p0))
+	    {
+	      p1 = GEN_INT (INTVAL (p1) & -16);
+	      if (p1 == const0_rtx)
+		addr = p0;
+	      else
+		addr = gen_rtx_PLUS (SImode, p0, p1);
+	    }
+	  else
+	    {
+	      rtx x = gen_reg_rtx (SImode);
+	      emit_move_insn (x, p1);
+	      addr = gen_rtx_PLUS (SImode, p0, x);
+	    }
+	}
+    }
+  else if (REG_P (addr))
+    {
+      aform = 0;
+      p0 = addr;
+      p1 = p1_lo = const0_rtx;
+    }
+  else
+    {
+      aform = 1;
+      p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+      p1 = 0;			/* aform doesn't use p1 */
+      p1_lo = addr;
+      if (ALIGNED_SYMBOL_REF_P (addr))
+	p1_lo = const0_rtx;
+      else if (GET_CODE (addr) == CONST
+	       && GET_CODE (XEXP (addr, 0)) == PLUS
+	       && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
+	       && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
+	{
+	  HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
+	  if ((v & -16) != 0)
+	    addr = gen_rtx_CONST (Pmode,
+				  gen_rtx_PLUS (Pmode,
+						XEXP (XEXP (addr, 0), 0),
+						GEN_INT (v & -16)));
+	  else
+	    addr = XEXP (XEXP (addr, 0), 0);
+	  p1_lo = GEN_INT (v & 15);
+	}
+      else if (GET_CODE (addr) == CONST_INT)
+	{
+	  p1_lo = GEN_INT (INTVAL (addr) & 15);
+	  addr = GEN_INT (INTVAL (addr) & -16);
+	}
+      else
+	{
+	  p1_lo = gen_reg_rtx (SImode);
+	  emit_move_insn (p1_lo, addr);
+	}
+    }
+
+  gcc_assert (aform == 0 || aform == 1);
+  reg = gen_reg_rtx (TImode);
+
+  scalar = store_with_one_insn_p (ops[0]);
+  if (!scalar)
+    {
+      /* We could copy the flags from the ops[0] MEM to mem here,
+         We don't because we want this load to be optimized away if
+         possible, and copying the flags will prevent that in certain
+         cases, e.g. consider the volatile flag. */
+
+      rtx pat = gen_reg_rtx (TImode);
+      rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
+      set_mem_alias_set (lmem, 0);
+      emit_insn (gen_movti (reg, lmem));
+
+      if (!p0 || reg_aligned_for_addr (p0))
+	p0 = stack_pointer_rtx;
+      if (!p1_lo)
+	p1_lo = const0_rtx;
+
+      emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
+      emit_insn (gen_shufb (reg, ops[1], reg, pat));
+    }
+  else
+    {
+      if (GET_CODE (ops[1]) == REG)
+	emit_insn (gen_spu_convert (reg, ops[1]));
+      else if (GET_CODE (ops[1]) == SUBREG)
+	emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
+      else
+	abort ();
+    }
+
+  if (GET_MODE_SIZE (mode) < 4 && scalar)
+    emit_insn (gen_ashlti3
+	       (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
+
+  smem = change_address (ops[0], TImode, copy_rtx (addr));
+  /* We can't use the previous alias set because the memory has changed
+     size and can potentially overlap objects of other types.  */
+  set_mem_alias_set (smem, 0);
+
+  emit_insn (gen_movti (smem, reg));
+  return 1;
+}
+
+/* Return TRUE if X is MEM which is a struct member reference
+   and the member can safely be loaded and stored with a single
+   instruction because it is padded. */
+static int
+mem_is_padded_component_ref (rtx x)
+{
+  tree t = MEM_EXPR (x);
+  tree r;
+  if (!t || TREE_CODE (t) != COMPONENT_REF)
+    return 0;
+  t = TREE_OPERAND (t, 1);
+  if (!t || TREE_CODE (t) != FIELD_DECL
+      || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
+    return 0;
+  /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
+  r = DECL_FIELD_CONTEXT (t);
+  if (!r || TREE_CODE (r) != RECORD_TYPE)
+    return 0;
+  /* Make sure they are the same mode */
+  if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
+    return 0;
+  /* If there are no following fields then the field alignment assures
+     the structure is padded to the alignment which means this field is
+     padded too.  */
+  if (TREE_CHAIN (t) == 0)
+    return 1;
+  /* If the following field is also aligned then this field will be
+     padded. */
+  t = TREE_CHAIN (t);
+  if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
+    return 1;
+  return 0;
+}
+
+/* Parse the -mfixed-range= option string.  */
+static void
+fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+  
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  */
+  
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+  
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+      
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+      
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+      
+      *dash = '-';
+      
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+      
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
+   can be generated using the fsmbi instruction. */
+int
+fsmbi_const_p (rtx x)
+{
+  if (CONSTANT_P (x))
+    {
+      /* We can always choose TImode for CONST_INT because the high bits
+         of an SImode will always be all 1s, i.e., valid for fsmbi. */
+      enum immediate_class c = classify_immediate (x, TImode);
+      return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
+    }
+  return 0;
+}
+
+/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
+   can be generated using the cbd, chd, cwd or cdd instruction. */
+int
+cpat_const_p (rtx x, enum machine_mode mode)
+{
+  if (CONSTANT_P (x))
+    {
+      enum immediate_class c = classify_immediate (x, mode);
+      return c == IC_CPAT;
+    }
+  return 0;
+}
+
+rtx
+gen_cpat_const (rtx * ops)
+{
+  unsigned char dst[16];
+  int i, offset, shift, isize;
+  if (GET_CODE (ops[3]) != CONST_INT
+      || GET_CODE (ops[2]) != CONST_INT
+      || (GET_CODE (ops[1]) != CONST_INT
+	  && GET_CODE (ops[1]) != REG))
+    return 0;
+  if (GET_CODE (ops[1]) == REG
+      && (!REG_POINTER (ops[1])
+	  || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
+    return 0;
+
+  for (i = 0; i < 16; i++)
+    dst[i] = i + 16;
+  isize = INTVAL (ops[3]);
+  if (isize == 1)
+    shift = 3;
+  else if (isize == 2)
+    shift = 2;
+  else
+    shift = 0;
+  offset = (INTVAL (ops[2]) +
+	    (GET_CODE (ops[1]) ==
+	     CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
+  for (i = 0; i < isize; i++)
+    dst[offset + i] = i + shift;
+  return array_to_constant (TImode, dst);
+}
+
+/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
+   array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
+   than 16 bytes, the value is repeated across the rest of the array. */
+void
+constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
+{
+  HOST_WIDE_INT val;
+  int i, j, first;
+
+  memset (arr, 0, 16);
+  mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
+  if (GET_CODE (x) == CONST_INT
+      || (GET_CODE (x) == CONST_DOUBLE
+	  && (mode == SFmode || mode == DFmode)))
+    {
+      gcc_assert (mode != VOIDmode && mode != BLKmode);
+
+      if (GET_CODE (x) == CONST_DOUBLE)
+	val = const_double_to_hwint (x);
+      else
+	val = INTVAL (x);
+      first = GET_MODE_SIZE (mode) - 1;
+      for (i = first; i >= 0; i--)
+	{
+	  arr[i] = val & 0xff;
+	  val >>= 8;
+	}
+      /* Splat the constant across the whole array. */
+      for (j = 0, i = first + 1; i < 16; i++)
+	{
+	  arr[i] = arr[j];
+	  j = (j == first) ? 0 : j + 1;
+	}
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      val = CONST_DOUBLE_LOW (x);
+      for (i = 15; i >= 8; i--)
+	{
+	  arr[i] = val & 0xff;
+	  val >>= 8;
+	}
+      val = CONST_DOUBLE_HIGH (x);
+      for (i = 7; i >= 0; i--)
+	{
+	  arr[i] = val & 0xff;
+	  val >>= 8;
+	}
+    }
+  else if (GET_CODE (x) == CONST_VECTOR)
+    {
+      int units;
+      rtx elt;
+      mode = GET_MODE_INNER (mode);
+      units = CONST_VECTOR_NUNITS (x);
+      for (i = 0; i < units; i++)
+	{
+	  elt = CONST_VECTOR_ELT (x, i);
+	  if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
+	    {
+	      if (GET_CODE (elt) == CONST_DOUBLE)
+		val = const_double_to_hwint (elt);
+	      else
+		val = INTVAL (elt);
+	      first = GET_MODE_SIZE (mode) - 1;
+	      if (first + i * GET_MODE_SIZE (mode) > 16)
+		abort ();
+	      for (j = first; j >= 0; j--)
+		{
+		  arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
+		  val >>= 8;
+		}
+	    }
+	}
+    }
+  else
+    gcc_unreachable();
+}
+
+/* Convert a 16 byte array to a constant of mode MODE.  When MODE is
+   smaller than 16 bytes, use the bytes that would represent that value
+   in a register, e.g., for QImode return the value of arr[3].  */
+rtx
+array_to_constant (enum machine_mode mode, const unsigned char arr[16])
+{
+  enum machine_mode inner_mode;
+  rtvec v;
+  int units, size, i, j, k;
+  HOST_WIDE_INT val;
+
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+    {
+      j = GET_MODE_SIZE (mode);
+      i = j < 4 ? 4 - j : 0;
+      for (val = 0; i < j; i++)
+	val = (val << 8) | arr[i];
+      val = trunc_int_for_mode (val, mode);
+      return GEN_INT (val);
+    }
+
+  if (mode == TImode)
+    {
+      HOST_WIDE_INT high;
+      for (i = high = 0; i < 8; i++)
+	high = (high << 8) | arr[i];
+      for (i = 8, val = 0; i < 16; i++)
+	val = (val << 8) | arr[i];
+      return immed_double_const (val, high, TImode);
+    }
+  if (mode == SFmode)
+    {
+      val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
+      val = trunc_int_for_mode (val, SImode);
+      return hwint_to_const_double (SFmode, val);
+    }
+  if (mode == DFmode)
+    {
+      for (i = 0, val = 0; i < 8; i++)
+	val = (val << 8) | arr[i];
+      return hwint_to_const_double (DFmode, val);
+    }
+
+  if (!VECTOR_MODE_P (mode))
+    abort ();
+
+  units = GET_MODE_NUNITS (mode);
+  size = GET_MODE_UNIT_SIZE (mode);
+  inner_mode = GET_MODE_INNER (mode);
+  v = rtvec_alloc (units);
+
+  for (k = i = 0; i < units; ++i)
+    {
+      val = 0;
+      for (j = 0; j < size; j++, k++)
+	val = (val << 8) | arr[k];
+
+      if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
+	RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
+      else
+	RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
+    }
+  if (k > 16)
+    abort ();
+
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+static void
+reloc_diagnostic (rtx x)
+{
+  tree decl = 0;
+  if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
+    return;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    decl = SYMBOL_REF_DECL (x);
+  else if (GET_CODE (x) == CONST
+	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
+    decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
+
+  /* SYMBOL_REF_DECL is not necessarily a DECL. */
+  if (decl && !DECL_P (decl))
+    decl = 0;
+
+  /* The decl could be a string constant.  */
+  if (decl && DECL_P (decl))
+    {
+      location_t loc;
+      /* We use last_assemble_variable_decl to get line information.  It's
+	 not always going to be right and might not even be close, but will
+	 be right for the more common cases. */
+      if (!last_assemble_variable_decl || in_section == ctors_section)
+	loc = DECL_SOURCE_LOCATION (decl);
+      else
+	loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
+
+      if (TARGET_WARN_RELOC)
+	warning_at (loc, 0,
+		    "creating run-time relocation for %qD", decl);
+      else
+	error_at (loc,
+		  "creating run-time relocation for %qD", decl);
+    }
+  else 
+    {
+      if (TARGET_WARN_RELOC)
+	warning_at (input_location, 0, "creating run-time relocation");
+      else
+	error_at (input_location, "creating run-time relocation");
+    }
+}
+
+/* Hook into assemble_integer so we can generate an error for run-time
+   relocations.  The SPU ABI disallows them. */
+static bool
+spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
+{
+  /* By default run-time relocations aren't supported, but we allow them
+     in case users support it in their own run-time loader.  And we provide
+     a warning for those users that don't.  */
+  if ((GET_CODE (x) == SYMBOL_REF)
+      || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
+    reloc_diagnostic (x);
+
+  return default_assemble_integer (x, size, aligned_p);
+}
+
+static void
+spu_asm_globalize_label (FILE * file, const char *name)
+{
+  fputs ("\t.global\t", file);
+  assemble_name (file, name);
+  fputs ("\n", file);
+}
+
+static bool
+spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+	       bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int cost = COSTS_N_INSNS (2);
+
+  /* Folding to a CONST_VECTOR will use extra space but there might
+     be only a small savings in cycles.  We'd like to use a CONST_VECTOR
+     only if it allows us to fold away multiple insns.  Changing the cost
+     of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
+     because this cost will only be compared against a single insn. 
+     if (code == CONST_VECTOR)
+       return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
+   */
+
+  /* Use defaults for float operations.  Not accurate but good enough. */
+  if (mode == DFmode)
+    {
+      *total = COSTS_N_INSNS (13);
+      return true;
+    }
+  if (mode == SFmode)
+    {
+      *total = COSTS_N_INSNS (6);
+      return true;
+    }
+  switch (code)
+    {
+    case CONST_INT:
+      if (satisfies_constraint_K (x))
+	*total = 0;
+      else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (3);
+      return true;
+
+    case CONST:
+      *total = COSTS_N_INSNS (3);
+      return true;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (0);
+      return true;
+
+    case CONST_DOUBLE:
+      *total = COSTS_N_INSNS (5);
+      return true;
+
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+      *total = COSTS_N_INSNS (7);
+      return true;
+
+    case PLUS:
+      if (mode == TImode)
+	{
+	  *total = COSTS_N_INSNS (9);
+	  return true;
+	}
+      break;
+
+    case MULT:
+      cost =
+	GET_CODE (XEXP (x, 0)) ==
+	REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
+      if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
+	{
+	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+	      cost = COSTS_N_INSNS (14);
+	      if ((val & 0xffff) == 0)
+		cost = COSTS_N_INSNS (9);
+	      else if (val > 0 && val < 0x10000)
+		cost = COSTS_N_INSNS (11);
+	    }
+	}
+      *total = cost;
+      return true;
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (20);
+      return true;
+    case ROTATE:
+    case ROTATERT:
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = COSTS_N_INSNS (4);
+      return true;
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_CONVERT)
+	*total = COSTS_N_INSNS (0);
+      else
+	*total = COSTS_N_INSNS (4);
+      return true;
+    }
+  /* Scale cost by mode size.  Except when initializing (cfun->decl == 0). */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
+    cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
+      * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
+  *total = cost;
+  return true;
+}
+
+static enum machine_mode
+spu_unwind_word_mode (void)
+{
+  return SImode;
+}
+
+/* Decide whether we can make a sibling call to a function.  DECL is the
+   declaration of the function being targeted by the call and EXP is the
+   CALL_EXPR representing the call.  */
+static bool
+spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return decl && !TARGET_LARGE_MEM;
+}
+
+/* We need to correctly update the back chain pointer and the Available
+   Stack Size (which is in the second slot of the sp register.) */
+void
+spu_allocate_stack (rtx op0, rtx op1)
+{
+  HOST_WIDE_INT v;
+  rtx chain = gen_reg_rtx (V4SImode);
+  rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
+  rtx sp = gen_reg_rtx (V4SImode);
+  rtx splatted = gen_reg_rtx (V4SImode);
+  rtx pat = gen_reg_rtx (TImode);
+
+  /* copy the back chain so we can save it back again. */
+  emit_move_insn (chain, stack_bot);
+
+  op1 = force_reg (SImode, op1);
+
+  v = 0x1020300010203ll;
+  emit_move_insn (pat, immed_double_const (v, v, TImode));
+  emit_insn (gen_shufb (splatted, op1, op1, pat));
+
+  emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
+  emit_insn (gen_subv4si3 (sp, sp, splatted));
+
+  if (flag_stack_check)
+    {
+      rtx avail = gen_reg_rtx(SImode);
+      rtx result = gen_reg_rtx(SImode);
+      emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
+      emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
+      emit_insn (gen_spu_heq (result, GEN_INT(0) ));
+    }
+
+  emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
+
+  emit_move_insn (stack_bot, chain);
+
+  emit_move_insn (op0, virtual_stack_dynamic_rtx);
+}
+
+void
+spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
+{
+  static unsigned char arr[16] =
+    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
+  rtx temp = gen_reg_rtx (SImode);
+  rtx temp2 = gen_reg_rtx (SImode);
+  rtx temp3 = gen_reg_rtx (V4SImode);
+  rtx temp4 = gen_reg_rtx (V4SImode);
+  rtx pat = gen_reg_rtx (TImode);
+  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
+
+  /* Restore the backchain from the first word, sp from the second.  */
+  emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
+  emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
+
+  emit_move_insn (pat, array_to_constant (TImode, arr));
+
+  /* Compute Available Stack Size for sp */
+  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
+  emit_insn (gen_shufb (temp3, temp, temp, pat));
+
+  /* Compute Available Stack Size for back chain */
+  emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
+  emit_insn (gen_shufb (temp4, temp2, temp2, pat));
+  emit_insn (gen_addv4si3 (temp4, sp, temp4));
+
+  emit_insn (gen_addv4si3 (sp, sp, temp3));
+  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
+}
+
+static void
+spu_init_libfuncs (void)
+{
+  set_optab_libfunc (smul_optab, DImode, "__muldi3");
+  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
+  set_optab_libfunc (smod_optab, DImode, "__moddi3");
+  set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
+  set_optab_libfunc (umod_optab, DImode, "__umoddi3");
+  set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
+  set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
+  set_optab_libfunc (clz_optab, DImode, "__clzdi2");
+  set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
+  set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
+  set_optab_libfunc (parity_optab, DImode, "__paritydi2");
+
+  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
+  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
+
+  set_optab_libfunc (addv_optab, SImode, "__addvsi3");
+  set_optab_libfunc (subv_optab, SImode, "__subvsi3");
+  set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
+  set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
+  set_optab_libfunc (negv_optab, SImode, "__negvsi2");
+  set_optab_libfunc (absv_optab, SImode, "__absvsi2");
+  set_optab_libfunc (addv_optab, DImode, "__addvdi3");
+  set_optab_libfunc (subv_optab, DImode, "__subvdi3");
+  set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
+  set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
+  set_optab_libfunc (negv_optab, DImode, "__negvdi2");
+  set_optab_libfunc (absv_optab, DImode, "__absvdi2");
+
+  set_optab_libfunc (smul_optab, TImode, "__multi3");
+  set_optab_libfunc (sdiv_optab, TImode, "__divti3");
+  set_optab_libfunc (smod_optab, TImode, "__modti3");
+  set_optab_libfunc (udiv_optab, TImode, "__udivti3");
+  set_optab_libfunc (umod_optab, TImode, "__umodti3");
+  set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
+}
+
+/* Make a subreg, stripping any existing subreg.  We could possibly just
+   call simplify_subreg, but in this case we know what we want. */
+rtx
+spu_gen_subreg (enum machine_mode mode, rtx x)
+{
+  if (GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+  if (GET_MODE (x) == mode)
+    return x;
+  return gen_rtx_SUBREG (mode, x, 0);
+}
+
+static bool
+spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return (TYPE_MODE (type) == BLKmode
+	  && ((type) == 0
+	      || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	      || int_size_in_bytes (type) >
+	      (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
+}
+
+/* Create the built-in types and functions */
+
+enum spu_function_code
+{
+#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
+#include "spu-builtins.def"
+#undef DEF_BUILTIN
+   NUM_SPU_BUILTINS
+};
+
+extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
+
+struct spu_builtin_description spu_builtins[] = {
+#define DEF_BUILTIN(fcode, icode, name, type, params) \
+  {fcode, icode, name, type, params},
+#include "spu-builtins.def"
+#undef DEF_BUILTIN
+};
+
+static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
+
+/* Returns the spu builtin decl for CODE.  */
+
+static tree
+spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{           
+  if (code >= NUM_SPU_BUILTINS)
+    return error_mark_node;
+          
+  return spu_builtin_decls[code];
+}
+
+
+static void
+spu_init_builtins (void)
+{
+  struct spu_builtin_description *d;
+  unsigned int i;
+
+  V16QI_type_node = build_vector_type (intQI_type_node, 16);
+  V8HI_type_node = build_vector_type (intHI_type_node, 8);
+  V4SI_type_node = build_vector_type (intSI_type_node, 4);
+  V2DI_type_node = build_vector_type (intDI_type_node, 2);
+  V4SF_type_node = build_vector_type (float_type_node, 4);
+  V2DF_type_node = build_vector_type (double_type_node, 2);
+
+  unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
+  unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
+  unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
+  unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
+
+  spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
+
+  spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
+
+  spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
+  spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
+  spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
+  spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
+  spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
+
+  spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
+  spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
+
+  spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
+
+  spu_builtin_types[SPU_BTI_PTR] =
+    build_pointer_type (build_qualified_type
+			(void_type_node,
+			 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
+
+  /* For each builtin we build a new prototype.  The tree code will make
+     sure nodes are shared. */
+  for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
+    {
+      tree p;
+      char name[64];		/* build_function will make a copy. */
+      int parm;
+
+      if (d->name == 0)
+	continue;
+
+      /* Find last parm.  */
+      for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
+	;
+
+      p = void_list_node;
+      while (parm > 1)
+	p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
+
+      p = build_function_type (spu_builtin_types[d->parm[0]], p);
+
+      sprintf (name, "__builtin_%s", d->name);
+      spu_builtin_decls[i] =
+	add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
+      if (d->fcode == SPU_MASK_FOR_LOAD)
+	TREE_READONLY (spu_builtin_decls[i]) = 1;	
+
+      /* These builtins don't throw.  */
+      TREE_NOTHROW (spu_builtin_decls[i]) = 1;
+    }
+}
+
+void
+spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
+{
+  static unsigned char arr[16] =
+    { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
+
+  rtx temp = gen_reg_rtx (Pmode);
+  rtx temp2 = gen_reg_rtx (V4SImode);
+  rtx temp3 = gen_reg_rtx (V4SImode);
+  rtx pat = gen_reg_rtx (TImode);
+  rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
+
+  emit_move_insn (pat, array_to_constant (TImode, arr));
+
+  /* Restore the sp.  */
+  emit_move_insn (temp, op1);
+  emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
+
+  /* Compute available stack size for sp.  */
+  emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
+  emit_insn (gen_shufb (temp3, temp, temp, pat));
+
+  emit_insn (gen_addv4si3 (sp, sp, temp3));
+  emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
+}
+
+int
+spu_safe_dma (HOST_WIDE_INT channel)
+{
+  return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
+}
+
+void
+spu_builtin_splats (rtx ops[])
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
+    {
+      unsigned char arr[16];
+      constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
+      emit_move_insn (ops[0], array_to_constant (mode, arr));
+    }
+  else
+    {
+      rtx reg = gen_reg_rtx (TImode);
+      rtx shuf;
+      if (GET_CODE (ops[1]) != REG
+	  && GET_CODE (ops[1]) != SUBREG)
+	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
+      switch (mode)
+	{
+	case V2DImode:
+	case V2DFmode:
+	  shuf =
+	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
+				TImode);
+	  break;
+	case V4SImode:
+	case V4SFmode:
+	  shuf =
+	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
+				TImode);
+	  break;
+	case V8HImode:
+	  shuf =
+	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
+				TImode);
+	  break;
+	case V16QImode:
+	  shuf =
+	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
+				TImode);
+	  break;
+	default:
+	  abort ();
+	}
+      emit_move_insn (reg, shuf);
+      emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
+    }
+}
+
+void
+spu_builtin_extract (rtx ops[])
+{
+  enum machine_mode mode;
+  rtx rot, from, tmp;
+
+  mode = GET_MODE (ops[1]);
+
+  if (GET_CODE (ops[2]) == CONST_INT)
+    {
+      switch (mode)
+	{
+	case V16QImode:
+	  emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
+	  break;
+	case V8HImode:
+	  emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
+	  break;
+	case V4SFmode:
+	  emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
+	  break;
+	case V4SImode:
+	  emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
+	  break;
+	case V2DImode:
+	  emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
+	  break;
+	case V2DFmode:
+	  emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
+	  break;
+	default:
+	  abort ();
+	}
+      return;
+    }
+
+  from = spu_gen_subreg (TImode, ops[1]);
+  rot = gen_reg_rtx (TImode);
+  tmp = gen_reg_rtx (SImode);
+
+  switch (mode)
+    {
+    case V16QImode:
+      emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
+      break;
+    case V8HImode:
+      emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
+      emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
+      break;
+    case V4SFmode:
+    case V4SImode:
+      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
+      break;
+    case V2DImode:
+    case V2DFmode:
+      emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
+      break;
+    default:
+      abort ();
+    }
+  emit_insn (gen_rotqby_ti (rot, from, tmp));
+
+  emit_insn (gen_spu_convert (ops[0], rot));
+}
+
+void
+spu_builtin_insert (rtx ops[])
+{
+  enum machine_mode mode = GET_MODE (ops[0]);
+  enum machine_mode imode = GET_MODE_INNER (mode);
+  rtx mask = gen_reg_rtx (TImode);
+  rtx offset;
+
+  if (GET_CODE (ops[3]) == CONST_INT)
+    offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
+  else
+    {
+      offset = gen_reg_rtx (SImode);
+      emit_insn (gen_mulsi3
+		 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
+    }
+  emit_insn (gen_cpat
+	     (mask, stack_pointer_rtx, offset,
+	      GEN_INT (GET_MODE_SIZE (imode))));
+  emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
+}
+
+void
+spu_builtin_promote (rtx ops[])
+{
+  enum machine_mode mode, imode;
+  rtx rot, from, offset;
+  HOST_WIDE_INT pos;
+
+  mode = GET_MODE (ops[0]);
+  imode = GET_MODE_INNER (mode);
+
+  from = gen_reg_rtx (TImode);
+  rot = spu_gen_subreg (TImode, ops[0]);
+
+  emit_insn (gen_spu_convert (from, ops[1]));
+
+  if (GET_CODE (ops[2]) == CONST_INT)
+    {
+      pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
+      if (GET_MODE_SIZE (imode) < 4)
+	pos += 4 - GET_MODE_SIZE (imode);
+      offset = GEN_INT (pos & 15);
+    }
+  else
+    {
+      offset = gen_reg_rtx (SImode);
+      switch (mode)
+	{
+	case V16QImode:
+	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
+	  break;
+	case V8HImode:
+	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
+	  emit_insn (gen_addsi3 (offset, offset, offset));
+	  break;
+	case V4SFmode:
+	case V4SImode:
+	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
+	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
+	  break;
+	case V2DImode:
+	case V2DFmode:
+	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  emit_insn (gen_rotqby_ti (rot, from, offset));
+}
+
+static void
+spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx shuf = gen_reg_rtx (V4SImode);
+  rtx insn = gen_reg_rtx (V4SImode);
+  rtx shufc;
+  rtx insnc;
+  rtx mem;
+
+  fnaddr = force_reg (SImode, fnaddr);
+  cxt = force_reg (SImode, cxt);
+
+  if (TARGET_LARGE_MEM)
+    {
+      rtx rotl = gen_reg_rtx (V4SImode);
+      rtx mask = gen_reg_rtx (V4SImode);
+      rtx bi = gen_reg_rtx (SImode);
+      static unsigned char const shufa[16] = {
+	2, 3, 0, 1, 18, 19, 16, 17,
+	0, 1, 2, 3, 16, 17, 18, 19
+      };
+      static unsigned char const insna[16] = {
+	0x41, 0, 0, 79,
+	0x41, 0, 0, STATIC_CHAIN_REGNUM,
+	0x60, 0x80, 0, 79,
+	0x60, 0x80, 0, STATIC_CHAIN_REGNUM
+      };
+
+      shufc = force_reg (TImode, array_to_constant (TImode, shufa));
+      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
+
+      emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
+      emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
+      emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
+      emit_insn (gen_selb (insn, insnc, rotl, mask));
+
+      mem = adjust_address (m_tramp, V4SImode, 0);
+      emit_move_insn (mem, insn);
+
+      emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
+      mem = adjust_address (m_tramp, Pmode, 16);
+      emit_move_insn (mem, bi);
+    }
+  else
+    {
+      rtx scxt = gen_reg_rtx (SImode);
+      rtx sfnaddr = gen_reg_rtx (SImode);
+      static unsigned char const insna[16] = {
+	0x42, 0, 0, STATIC_CHAIN_REGNUM,
+	0x30, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+      };
+
+      shufc = gen_reg_rtx (TImode);
+      insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
+
+      /* By or'ing all of cxt with the ila opcode we are assuming cxt
+	 fits 18 bits and the last 4 are zeros.  This will be true if
+	 the stack pointer is initialized to 0x3fff0 at program start,
+	 otherwise the ila instruction will be garbage. */
+
+      emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
+      emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
+      emit_insn (gen_cpat
+		 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
+      emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
+      emit_insn (gen_iorv4si3 (insn, insnc, shuf));
+
+      mem = adjust_address (m_tramp, V4SImode, 0);
+      emit_move_insn (mem, insn);
+    }
+  emit_insn (gen_sync ());
+}
+
+void
+spu_expand_sign_extend (rtx ops[])
+{
+  unsigned char arr[16];
+  rtx pat = gen_reg_rtx (TImode);
+  rtx sign, c;
+  int i, last;
+  last = GET_MODE (ops[0]) == DImode ? 7 : 15;
+  if (GET_MODE (ops[1]) == QImode)
+    {
+      sign = gen_reg_rtx (HImode);
+      emit_insn (gen_extendqihi2 (sign, ops[1]));
+      for (i = 0; i < 16; i++)
+	arr[i] = 0x12;
+      arr[last] = 0x13;
+    }
+  else
+    {
+      for (i = 0; i < 16; i++)
+	arr[i] = 0x10;
+      switch (GET_MODE (ops[1]))
+	{
+	case HImode:
+	  sign = gen_reg_rtx (SImode);
+	  emit_insn (gen_extendhisi2 (sign, ops[1]));
+	  arr[last] = 0x03;
+	  arr[last - 1] = 0x02;
+	  break;
+	case SImode:
+	  sign = gen_reg_rtx (SImode);
+	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
+	  for (i = 0; i < 4; i++)
+	    arr[last - i] = 3 - i;
+	  break;
+	case DImode:
+	  sign = gen_reg_rtx (SImode);
+	  c = gen_reg_rtx (SImode);
+	  emit_insn (gen_spu_convert (c, ops[1]));
+	  emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
+	  for (i = 0; i < 8; i++)
+	    arr[last - i] = 7 - i;
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  emit_move_insn (pat, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
+}
+
+/* expand vector initialization. If there are any constant parts,
+   load constant parts first. Then load any non-constant parts.  */
+void
+spu_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int n_var = 0;
+  bool all_same = true;
+  rtx first, x = NULL_RTX, first_constant = NULL_RTX;
+  int i;
+
+  first = XVECEXP (vals, 0, 0); 
+  for (i = 0; i < n_elts; ++i)
+    {
+      x = XVECEXP (vals, 0, i);
+      if (!(CONST_INT_P (x)
+	    || GET_CODE (x) == CONST_DOUBLE
+	    || GET_CODE (x) == CONST_FIXED))
+	++n_var;
+      else
+	{
+	  if (first_constant == NULL_RTX)
+	    first_constant = x;
+	}
+      if (i > 0 && !rtx_equal_p (x, first))
+	all_same = false;
+    }
+
+  /* if all elements are the same, use splats to repeat elements */
+  if (all_same)
+    {
+      if (!CONSTANT_P (first)
+	  && !register_operand (first, GET_MODE (x)))
+	first = force_reg (GET_MODE (first), first);
+      emit_insn (gen_spu_splats (target, first));
+      return;
+    }
+
+  /* load constant parts */
+  if (n_var != n_elts)
+    {
+      if (n_var == 0)
+	{
+	  emit_move_insn (target,
+			  gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+	}
+      else
+	{
+	  rtx constant_parts_rtx = copy_rtx (vals);
+
+	  gcc_assert (first_constant != NULL_RTX);
+	  /* fill empty slots with the first constant, this increases
+	     our chance of using splats in the recursive call below. */
+	  for (i = 0; i < n_elts; ++i)
+	    {
+	      x = XVECEXP (constant_parts_rtx, 0, i);
+	      if (!(CONST_INT_P (x)
+		    || GET_CODE (x) == CONST_DOUBLE
+		    || GET_CODE (x) == CONST_FIXED))
+		XVECEXP (constant_parts_rtx, 0, i) = first_constant;
+	    }
+
+	  spu_expand_vector_init (target, constant_parts_rtx);
+	}
+    }
+
+  /* load variable parts */
+  if (n_var != 0)
+    {
+      rtx insert_operands[4];
+
+      insert_operands[0] = target;
+      insert_operands[2] = target;
+      for (i = 0; i < n_elts; ++i)
+	{
+	  x = XVECEXP (vals, 0, i);
+	  if (!(CONST_INT_P (x)
+		|| GET_CODE (x) == CONST_DOUBLE
+		|| GET_CODE (x) == CONST_FIXED))
+	    {
+	      if (!register_operand (x, GET_MODE (x)))
+		x = force_reg (GET_MODE (x), x);
+	      insert_operands[1] = x;
+	      insert_operands[3] = GEN_INT (i);
+	      spu_builtin_insert (insert_operands);
+	    }
+	}
+    }
+}
+
+/* Return insn index for the vector compare instruction for given CODE,
+   and DEST_MODE, OP_MODE. Return -1 if valid insn is not available.  */
+
+static int
+get_vec_cmp_insn (enum rtx_code code,
+                  enum machine_mode dest_mode,
+                  enum machine_mode op_mode)
+
+{
+  switch (code)
+    {
+    case EQ:
+      if (dest_mode == V16QImode && op_mode == V16QImode)
+        return CODE_FOR_ceq_v16qi;
+      if (dest_mode == V8HImode && op_mode == V8HImode)
+        return CODE_FOR_ceq_v8hi;
+      if (dest_mode == V4SImode && op_mode == V4SImode)
+        return CODE_FOR_ceq_v4si;
+      if (dest_mode == V4SImode && op_mode == V4SFmode)
+        return CODE_FOR_ceq_v4sf;
+      if (dest_mode == V2DImode && op_mode == V2DFmode)
+        return CODE_FOR_ceq_v2df;
+      break;
+    case GT:
+      if (dest_mode == V16QImode && op_mode == V16QImode)
+        return CODE_FOR_cgt_v16qi;
+      if (dest_mode == V8HImode && op_mode == V8HImode)
+        return CODE_FOR_cgt_v8hi;
+      if (dest_mode == V4SImode && op_mode == V4SImode)
+        return CODE_FOR_cgt_v4si;
+      if (dest_mode == V4SImode && op_mode == V4SFmode)
+        return CODE_FOR_cgt_v4sf;
+      if (dest_mode == V2DImode && op_mode == V2DFmode)
+        return CODE_FOR_cgt_v2df;
+      break;
+    case GTU:
+      if (dest_mode == V16QImode && op_mode == V16QImode)
+        return CODE_FOR_clgt_v16qi;
+      if (dest_mode == V8HImode && op_mode == V8HImode)
+        return CODE_FOR_clgt_v8hi;
+      if (dest_mode == V4SImode && op_mode == V4SImode)
+        return CODE_FOR_clgt_v4si;
+      break;
+    default:
+      break;
+    }
+  return -1;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+   DMODE is expected destination mode. This is a recursive function.  */
+
+static rtx
+spu_emit_vector_compare (enum rtx_code rcode,
+                         rtx op0, rtx op1,
+                         enum machine_mode dmode)
+{
+  int vec_cmp_insn;
+  rtx mask;
+  enum machine_mode dest_mode;
+  enum machine_mode op_mode = GET_MODE (op1);
+
+  gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+  /* Floating point vector compare instructions uses destination V4SImode.
+     Double floating point vector compare instructions uses destination V2DImode.
+     Move destination to appropriate mode later.  */
+  if (dmode == V4SFmode)
+    dest_mode = V4SImode;
+  else if (dmode == V2DFmode)
+    dest_mode = V2DImode;
+  else
+    dest_mode = dmode;
+
+  mask = gen_reg_rtx (dest_mode);
+  vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+  if (vec_cmp_insn == -1)
+    {
+      bool swap_operands = false;
+      bool try_again = false;
+      switch (rcode)
+        {
+        case LT:
+          rcode = GT;
+          swap_operands = true;
+          try_again = true;
+          break;
+        case LTU:
+          rcode = GTU;
+          swap_operands = true;
+          try_again = true;
+          break;
+        case NE:
+          /* Treat A != B as ~(A==B).  */
+          {
+            enum insn_code nor_code;
+            rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+            nor_code = optab_handler (one_cmpl_optab, dest_mode);
+            gcc_assert (nor_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        case GE:
+        case GEU:
+        case LE:
+        case LEU:
+          /* Try GT/GTU/LT/LTU OR EQ */
+          {
+            rtx c_rtx, eq_rtx;
+            enum insn_code ior_code;
+            enum rtx_code new_code;
+
+            switch (rcode)
+              {
+              case GE:  new_code = GT;  break;
+              case GEU: new_code = GTU; break;
+              case LE:  new_code = LT;  break;
+              case LEU: new_code = LTU; break;
+              default:
+                gcc_unreachable ();
+              }
+
+            c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
+            eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+
+            ior_code = optab_handler (ior_optab, dest_mode);
+            gcc_assert (ior_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        default:
+          gcc_unreachable ();
+        }
+
+      /* You only get two chances.  */
+      if (try_again)
+          vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+      gcc_assert (vec_cmp_insn != -1);
+
+      if (swap_operands)
+        {
+          rtx tmp;
+          tmp = op0;
+          op0 = op1;
+          op1 = tmp;
+        }
+    }
+
+  emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
+  if (dmode != dest_mode)
+    {
+      rtx temp = gen_reg_rtx (dest_mode);
+      convert_move (temp, mask, 0);
+      return temp;
+    }
+  return mask;
+}
+
+
+/* Emit vector conditional expression.
+   DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+   CC_OP0 and CC_OP1 are the two operands for the relation operation COND.  */
+
+int
+spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+                           rtx cond, rtx cc_op0, rtx cc_op1)
+{   
+  enum machine_mode dest_mode = GET_MODE (dest);
+  enum rtx_code rcode = GET_CODE (cond);
+  rtx mask;
+    
+  /* Get the vector mask for the given relational operations.  */
+  mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
+
+  emit_insn(gen_selb (dest, op2, op1, mask));
+
+  return 1;
+}
+
+static rtx
+spu_force_reg (enum machine_mode mode, rtx op)
+{
+  rtx x, r;
+  if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
+    {
+      if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
+	  || GET_MODE (op) == BLKmode)
+	return force_reg (mode, convert_to_mode (mode, op, 0));
+      abort ();
+    }
+
+  r = force_reg (GET_MODE (op), op);
+  if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
+    {
+      x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
+      if (x)
+	return x;
+    }
+
+  x = gen_reg_rtx (mode);
+  emit_insn (gen_spu_convert (x, r));
+  return x;
+}
+
+static void
+spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
+{
+  HOST_WIDE_INT v = 0;
+  int lsbits;
+  /* Check the range of immediate operands. */
+  if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
+    {
+      int range = p - SPU_BTI_7;
+
+      if (!CONSTANT_P (op))
+	error ("%s expects an integer literal in the range [%d, %d]",
+	       d->name,
+	       spu_builtin_range[range].low, spu_builtin_range[range].high);
+
+      if (GET_CODE (op) == CONST
+	  && (GET_CODE (XEXP (op, 0)) == PLUS
+	      || GET_CODE (XEXP (op, 0)) == MINUS))
+	{
+	  v = INTVAL (XEXP (XEXP (op, 0), 1));
+	  op = XEXP (XEXP (op, 0), 0);
+	}
+      else if (GET_CODE (op) == CONST_INT)
+	v = INTVAL (op);
+      else if (GET_CODE (op) == CONST_VECTOR
+	       && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
+	v = INTVAL (CONST_VECTOR_ELT (op, 0));
+
+      /* The default for v is 0 which is valid in every range. */
+      if (v < spu_builtin_range[range].low
+	  || v > spu_builtin_range[range].high)
+	error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
+	       d->name,
+	       spu_builtin_range[range].low, spu_builtin_range[range].high,
+	       v);
+
+      switch (p)
+	{
+	case SPU_BTI_S10_4:
+	  lsbits = 4;
+	  break;
+	case SPU_BTI_U16_2:
+	  /* This is only used in lqa, and stqa.  Even though the insns
+	     encode 16 bits of the address (all but the 2 least
+	     significant), only 14 bits are used because it is masked to
+	     be 16 byte aligned. */
+	  lsbits = 4;
+	  break;
+	case SPU_BTI_S16_2:
+	  /* This is used for lqr and stqr. */
+	  lsbits = 2;
+	  break;
+	default:
+	  lsbits = 0;
+	}
+
+      if (GET_CODE (op) == LABEL_REF
+	  || (GET_CODE (op) == SYMBOL_REF
+	      && SYMBOL_REF_FUNCTION_P (op))
+	  || (v & ((1 << lsbits) - 1)) != 0)
+	warning (0, "%d least significant bits of %s are ignored", lsbits,
+		 d->name);
+    }
+}
+
+
+static int
+expand_builtin_args (struct spu_builtin_description *d, tree exp,
+		     rtx target, rtx ops[])
+{
+  enum insn_code icode = (enum insn_code) d->icode;
+  int i = 0, a;
+
+  /* Expand the arguments into rtl. */
+
+  if (d->parm[0] != SPU_BTI_VOID)
+    ops[i++] = target;
+
+  for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
+    {
+      tree arg = CALL_EXPR_ARG (exp, a);
+      if (arg == 0)
+	abort ();
+      ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+    }
+
+  /* The insn pattern may have additional operands (SCRATCH).
+     Return the number of actual non-SCRATCH operands.  */
+  gcc_assert (i <= insn_data[icode].n_operands);
+  return i;
+}
+
+static rtx
+spu_expand_builtin_1 (struct spu_builtin_description *d,
+		      tree exp, rtx target)
+{
+  rtx pat;
+  rtx ops[8];
+  enum insn_code icode = (enum insn_code) d->icode;
+  enum machine_mode mode, tmode;
+  int i, p;
+  int n_operands;
+  tree return_type;
+
+  /* Set up ops[] with values from arglist. */
+  n_operands = expand_builtin_args (d, exp, target, ops);
+
+  /* Handle the target operand which must be operand 0. */
+  i = 0;
+  if (d->parm[0] != SPU_BTI_VOID)
+    {
+
+      /* We prefer the mode specified for the match_operand otherwise
+         use the mode from the builtin function prototype. */
+      tmode = insn_data[d->icode].operand[0].mode;
+      if (tmode == VOIDmode)
+	tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
+
+      /* Try to use target because not using it can lead to extra copies
+         and when we are using all of the registers extra copies leads
+         to extra spills.  */
+      if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
+	ops[0] = target;
+      else
+	target = ops[0] = gen_reg_rtx (tmode);
+
+      if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
+	abort ();
+
+      i++;
+    }
+
+  if (d->fcode == SPU_MASK_FOR_LOAD)
+    {
+      enum machine_mode mode = insn_data[icode].operand[1].mode;
+      tree arg;
+      rtx addr, op, pat;
+
+      /* get addr */
+      arg = CALL_EXPR_ARG (exp, 0);
+      gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+      op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+      addr = memory_address (mode, op);
+
+      /* negate addr */
+      op = gen_reg_rtx (GET_MODE (addr));
+      emit_insn (gen_rtx_SET (VOIDmode, op,
+                 gen_rtx_NEG (GET_MODE (addr), addr)));
+      op = gen_rtx_MEM (mode, op);
+
+      pat = GEN_FCN (icode) (target, op);
+      if (!pat) 
+        return 0;
+      emit_insn (pat);
+      return target;
+    }   
+
+  /* Ignore align_hint, but still expand it's args in case they have
+     side effects. */
+  if (icode == CODE_FOR_spu_align_hint)
+    return 0;
+
+  /* Handle the rest of the operands. */
+  for (p = 1; i < n_operands; i++, p++)
+    {
+      if (insn_data[d->icode].operand[i].mode != VOIDmode)
+	mode = insn_data[d->icode].operand[i].mode;
+      else
+	mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
+
+      /* mode can be VOIDmode here for labels */
+
+      /* For specific intrinsics with an immediate operand, e.g.,
+         si_ai(), we sometimes need to convert the scalar argument to a
+         vector argument by splatting the scalar. */
+      if (VECTOR_MODE_P (mode)
+	  && (GET_CODE (ops[i]) == CONST_INT
+	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
+	      || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
+	{
+	  if (GET_CODE (ops[i]) == CONST_INT)
+	    ops[i] = spu_const (mode, INTVAL (ops[i]));
+	  else
+	    {
+	      rtx reg = gen_reg_rtx (mode);
+	      enum machine_mode imode = GET_MODE_INNER (mode);
+	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
+		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
+	      if (imode != GET_MODE (ops[i]))
+		ops[i] = convert_to_mode (imode, ops[i],
+					  TYPE_UNSIGNED (spu_builtin_types
+							 [d->parm[i]]));
+	      emit_insn (gen_spu_splats (reg, ops[i]));
+	      ops[i] = reg;
+	    }
+	}
+
+      spu_check_builtin_parm (d, ops[i], d->parm[p]);
+
+      if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
+	ops[i] = spu_force_reg (mode, ops[i]);
+    }
+
+  switch (n_operands)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (0);
+      break;
+    case 1:
+      pat = GEN_FCN (icode) (ops[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (ops[0], ops[1]);
+      break;
+    case 3:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
+      break;
+    case 4:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
+      break;
+    case 5:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
+      break;
+    case 6:
+      pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
+      break;
+    default:
+      abort ();
+    }
+
+  if (!pat)
+    abort ();
+
+  if (d->type == B_CALL || d->type == B_BISLED)
+    emit_call_insn (pat);
+  else if (d->type == B_JUMP)
+    {
+      emit_jump_insn (pat);
+      emit_barrier ();
+    }
+  else
+    emit_insn (pat);
+
+  return_type = spu_builtin_types[d->parm[0]];
+  if (d->parm[0] != SPU_BTI_VOID
+      && GET_MODE (target) != TYPE_MODE (return_type))
+    {
+      /* target is the return value.  It should always be the mode of
+         the builtin function prototype. */
+      target = spu_force_reg (TYPE_MODE (return_type), target);
+    }
+
+  return target;
+}
+
+rtx
+spu_expand_builtin (tree exp,
+		    rtx target,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  struct spu_builtin_description *d;
+
+  if (fcode < NUM_SPU_BUILTINS)
+    {
+      d = &spu_builtins[fcode];
+
+      return spu_expand_builtin_1 (d, exp, target);
+    }
+  abort ();
+}
+
+/* Implement targetm.vectorize.builtin_mul_widen_even.  */
+static tree
+spu_builtin_mul_widen_even (tree type)
+{
+  switch (TYPE_MODE (type))
+    {
+    case V8HImode:
+      if (TYPE_UNSIGNED (type))
+	return spu_builtin_decls[SPU_MULE_0];
+      else
+	return spu_builtin_decls[SPU_MULE_1];
+      break;
+    default:
+      return NULL_TREE;
+    }
+}
+
+/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
+static tree
+spu_builtin_mul_widen_odd (tree type)
+{
+  switch (TYPE_MODE (type))
+    {
+    case V8HImode:
+      if (TYPE_UNSIGNED (type))
+	return spu_builtin_decls[SPU_MULO_1];
+      else
+	return spu_builtin_decls[SPU_MULO_0]; 
+      break;
+    default:
+      return NULL_TREE;
+    }
+}
+
+/* Implement targetm.vectorize.builtin_mask_for_load.  */
+static tree
+spu_builtin_mask_for_load (void)
+{
+  return spu_builtin_decls[SPU_MASK_FOR_LOAD];
+}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int 
+spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+                                tree vectype ATTRIBUTE_UNUSED,
+                                int misalign ATTRIBUTE_UNUSED)
+{
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+      case vector_stmt:
+      case vector_load:
+      case vector_store:
+      case vec_to_scalar:
+      case scalar_to_vec:
+      case cond_branch_not_taken:
+      case vec_perm:
+      case vec_promote_demote:
+        return 1;
+
+      case scalar_store:
+        return 10;
+
+      case scalar_load:
+        /* Load + rotate.  */
+        return 2;
+
+      case unaligned_load:
+        return 2;
+
+      case cond_branch_taken:
+        return 6;
+
+      default:
+        gcc_unreachable ();
+    }
+}
+
+/* Return true iff, data reference of TYPE can reach vector alignment (16)
+   after applying N number of iterations.  This routine does not determine
+   how may iterations are required to reach desired alignment.  */
+
+static bool
+spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
+{
+  if (is_packed)
+    return false;
+
+  /* All other types are naturally aligned.  */
+  return true;
+}
+
+/* Implement targetm.vectorize.builtin_vec_perm.  */
+tree
+spu_builtin_vec_perm (tree type, tree *mask_element_type)
+{
+  *mask_element_type = unsigned_char_type_node;
+
+  switch (TYPE_MODE (type))
+    {
+    case V16QImode:
+      if (TYPE_UNSIGNED (type))
+        return spu_builtin_decls[SPU_SHUFFLE_0];
+      else
+        return spu_builtin_decls[SPU_SHUFFLE_1];
+
+    case V8HImode:
+      if (TYPE_UNSIGNED (type))
+        return spu_builtin_decls[SPU_SHUFFLE_2];
+      else
+        return spu_builtin_decls[SPU_SHUFFLE_3];
+
+    case V4SImode:
+      if (TYPE_UNSIGNED (type))
+        return spu_builtin_decls[SPU_SHUFFLE_4];
+      else
+        return spu_builtin_decls[SPU_SHUFFLE_5];
+
+    case V2DImode:
+      if (TYPE_UNSIGNED (type))
+        return spu_builtin_decls[SPU_SHUFFLE_6];
+      else
+        return spu_builtin_decls[SPU_SHUFFLE_7];
+
+    case V4SFmode:
+      return spu_builtin_decls[SPU_SHUFFLE_8];
+
+    case V2DFmode:
+      return spu_builtin_decls[SPU_SHUFFLE_9];
+
+    default:
+      return NULL_TREE;
+    }
+}
+
+/* Return the appropriate mode for a named address pointer.  */
+static enum machine_mode
+spu_addr_space_pointer_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return ptr_mode;
+    case ADDR_SPACE_EA:
+      return EAmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return the appropriate mode for a named address address.  */
+static enum machine_mode
+spu_addr_space_address_mode (addr_space_t addrspace)
+{
+  switch (addrspace)
+    {
+    case ADDR_SPACE_GENERIC:
+      return Pmode;
+    case ADDR_SPACE_EA:
+      return EAmode;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Determine if one named address space is a subset of another.  */
+
+static bool
+spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
+{
+  gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
+  gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
+
+  if (subset == superset)
+    return true;
+
+  /* If we have -mno-address-space-conversion, treat __ea and generic as not
+     being subsets but instead as disjoint address spaces.  */
+  else if (!TARGET_ADDRESS_SPACE_CONVERSION)
+    return false;
+
+  else
+    return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
+}
+
+/* Convert from one address space to another.  */
+static rtx
+spu_addr_space_convert (rtx op, tree from_type, tree to_type)
+{
+  addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
+  addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
+
+  gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
+  gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
+
+  if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
+    {
+      rtx result, ls;
+
+      ls = gen_const_mem (DImode,
+			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+      set_mem_align (ls, 128);
+
+      result = gen_reg_rtx (Pmode);
+      ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
+      op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
+      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+					  ls, const0_rtx, Pmode, 1);
+
+      emit_insn (gen_subsi3 (result, op, ls));
+
+      return result;
+    }
+
+  else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
+    {
+      rtx result, ls;
+
+      ls = gen_const_mem (DImode,
+			  gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
+      set_mem_align (ls, 128);
+
+      result = gen_reg_rtx (EAmode);
+      ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
+      op = force_reg (Pmode, op);
+      ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
+					  ls, const0_rtx, EAmode, 1);
+      op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
+
+      if (EAmode == SImode)
+	emit_insn (gen_addsi3 (result, op, ls));
+      else
+	emit_insn (gen_adddi3 (result, op, ls));
+
+      return result;
+    }
+
+  else
+    gcc_unreachable ();
+}
+
+
+/* Count the total number of instructions in each pipe and return the
+   maximum, which is used as the Minimum Iteration Interval (MII)
+   in the modulo scheduler.  get_pipe() will return -2, -1, 0, or 1.
+   -2 are instructions that can go in pipe0 or pipe1.  */
+static int
+spu_sms_res_mii (struct ddg *g)
+{
+  int i;
+  unsigned t[4] = {0, 0, 0, 0};
+
+  for (i = 0; i < g->num_nodes; i++)
+    {
+      rtx insn = g->nodes[i].insn;
+      int p = get_pipe (insn) + 2;
+
+      gcc_assert (p >= 0);
+      gcc_assert (p < 4);
+
+      t[p]++;
+      if (dump_file && INSN_P (insn))
+            fprintf (dump_file, "i%d %s %d %d\n",
+                     INSN_UID (insn),
+                     insn_data[INSN_CODE(insn)].name,
+                     p, t[p]);
+    }
+  if (dump_file)
+    fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
+
+  return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
+}
+
+
+void
+spu_init_expanders (void)
+{
+  if (cfun)
+    {
+      rtx r0, r1;
+      /* HARD_FRAME_REGISTER is only 128 bit aligned when
+         frame_pointer_needed is true.  We don't know that until we're
+         expanding the prologue. */
+      REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
+
+      /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
+	 LAST_VIRTUAL_REGISTER+2 to test the back-end.  We want them
+	 to be treated as aligned, so generate them here. */
+      r0 = gen_reg_rtx (SImode);
+      r1 = gen_reg_rtx (SImode);
+      mark_reg_pointer (r0, 128);
+      mark_reg_pointer (r1, 128);
+      gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
+		  && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
+    }
+}
+
+static enum machine_mode
+spu_libgcc_cmp_return_mode (void)
+{
+
+/* For SPU word mode is TI mode so it is better to use SImode
+   for compare returns.  */
+  return SImode;
+}
+
+static enum machine_mode
+spu_libgcc_shift_count_mode (void)
+{
+/* For SPU word mode is TI mode so it is better to use SImode
+   for shift counts.  */
+  return SImode;
+}
+
+/* Implement targetm.section_type_flags.  */
+static unsigned int
+spu_section_type_flags (tree decl, const char *name, int reloc)
+{
+  /* .toe needs to have type @nobits.  */
+  if (strcmp (name, ".toe") == 0)
+    return SECTION_BSS;
+  /* Don't load _ea into the current address space.  */
+  if (strcmp (name, "._ea") == 0)
+    return SECTION_WRITE | SECTION_DEBUG;
+  return default_section_type_flags (decl, name, reloc);
+}
+
+/* Implement targetm.select_section.  */
+static section *
+spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  /* Variables and constants defined in the __ea address space
+     go into a special section named "._ea".  */
+  if (TREE_TYPE (decl) != error_mark_node
+      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
+    {
+      /* We might get called with string constants, but get_named_section
+	 doesn't like them as they are not DECLs.  Also, we need to set
+	 flags in that case.  */
+      if (!DECL_P (decl))
+	return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
+
+      return get_named_section (decl, "._ea", reloc);
+    }
+
+  return default_elf_select_section (decl, reloc, align);
+}
+
+/* Implement targetm.unique_section.  */
+static void
+spu_unique_section (tree decl, int reloc)
+{
+  /* We don't support unique section names in the __ea address
+     space for now.  */
+  if (TREE_TYPE (decl) != error_mark_node
+      && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
+    return;
+
+  default_unique_section (decl, reloc);
+}
+
+/* Generate a constant or register which contains 2^SCALE.  We assume
+   the result is valid for MODE.  Currently, MODE must be V4SFmode and
+   SCALE must be SImode. */
+rtx
+spu_gen_exp2 (enum machine_mode mode, rtx scale)
+{
+  gcc_assert (mode == V4SFmode);
+  gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
+  if (GET_CODE (scale) != CONST_INT)
+    {
+      /* unsigned int exp = (127 + scale) << 23;
+	__vector float m = (__vector float) spu_splats (exp); */
+      rtx reg = force_reg (SImode, scale);
+      rtx exp = gen_reg_rtx (SImode);
+      rtx mul = gen_reg_rtx (mode);
+      emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
+      emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
+      emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
+      return mul;
+    }
+  else 
+    {
+      HOST_WIDE_INT exp = 127 + INTVAL (scale);
+      unsigned char arr[16];
+      arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
+      arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
+      arr[2] = arr[6] = arr[10] = arr[14] = 0;
+      arr[3] = arr[7] = arr[11] = arr[15] = 0;
+      return array_to_constant (mode, arr);
+    }
+}
+
+/* After reload, just change the convert into a move instruction
+   or a dead instruction. */
+void
+spu_split_convert (rtx ops[])
+{
+  if (REGNO (ops[0]) == REGNO (ops[1]))
+    emit_note (NOTE_INSN_DELETED);
+  else
+    {
+      /* Use TImode always as this might help hard reg copyprop.  */
+      rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
+      rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
+      emit_insn (gen_move_insn (op0, op1));
+    }
+}
+
+void
+spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
+{
+  fprintf (file, "# profile\n");
+  fprintf (file, "brsl $75,  _mcount\n");
+}
+
+/* Implement targetm.ref_may_alias_errno.  */
+static bool
+spu_ref_may_alias_errno (ao_ref *ref)
+{
+  tree base = ao_ref_base (ref);
+
+  /* With SPU newlib, errno is defined as something like
+         _impure_data._errno
+     The default implementation of this target macro does not
+     recognize such expressions, so special-code for it here.  */
+
+  if (TREE_CODE (base) == VAR_DECL
+      && !TREE_STATIC (base)
+      && DECL_EXTERNAL (base)
+      && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
+      && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
+		 "_impure_data") == 0
+      /* _errno is the first member of _impure_data.  */
+      && ref->offset == 0)
+    return true;
+
+  return default_ref_may_alias_errno (ref);
+}
+
+#include "gt-spu.h"
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
new file mode 100644
index 000000000..51bfa5131
--- /dev/null
+++ b/gcc/config/spu/spu.h
@@ -0,0 +1,564 @@
+/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Run-time Target */
+#define TARGET_CPU_CPP_BUILTINS()	spu_cpu_cpp_builtins(pfile)
+
+#define TARGET_VERSION fprintf (stderr, " (spu %s)", __DATE__);
+
+#define C_COMMON_OVERRIDE_OPTIONS spu_c_common_override_options()
+
+#define INIT_EXPANDERS spu_init_expanders()
+
+/* Which processor to generate code or schedule for.  */
+enum processor_type
+{
+  PROCESSOR_CELL,
+  PROCESSOR_CELLEDP
+};
+
+extern GTY(()) int spu_arch;
+extern GTY(()) int spu_tune;
+
+/* Support for a compile-time default architecture and tuning.  The rules are:
+   --with-arch is ignored if -march is specified.
+   --with-tune is ignored if -mtune is specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
+/* Default target_flags if no switches specified.  */
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS \
+			| MASK_SAFE_HINTS | MASK_ADDRESS_SPACE_CONVERSION)
+#endif
+
+
+/* Storage Layout */
+
+#define BITS_BIG_ENDIAN 1
+
+#define BYTES_BIG_ENDIAN 1
+
+#define WORDS_BIG_ENDIAN 1
+
+#define BITS_PER_UNIT 8
+
+/* GCC uses word_mode in many places, assuming that it is the fastest
+   integer mode.  That is not the case for SPU though.  We can't use
+   32 here because (of some reason I can't remember.) */
+#define BITS_PER_WORD 128
+
+#define UNITS_PER_WORD (BITS_PER_WORD/BITS_PER_UNIT)
+
+/* When building libgcc, we need to assume 4 words per units even
+   though UNITS_PER_WORD is 16, because the SPU has basically a 32-bit
+   instruction set although register size is 128 bits.  In particular,
+   this causes libgcc to contain __divdi3 instead of __divti3 etc.
+   However, we allow this default to be re-defined on the command
+   line, so that we can use the LIB2_SIDITI_CONV_FUNCS mechanism
+   to get (in addition) TImode versions of some routines.  */
+#ifndef LIBGCC2_UNITS_PER_WORD
+#define LIBGCC2_UNITS_PER_WORD 4
+#endif
+
+#define POINTER_SIZE 32
+
+#define PARM_BOUNDARY 128
+
+#define STACK_BOUNDARY 128
+
+/* We want it 8-byte aligned so we can properly use dual-issue
+   instructions, which can only happen on an 8-byte aligned address. */
+#define FUNCTION_BOUNDARY 64
+
+/* We would like to allow a larger alignment for data objects (for DMA)
+   but the aligned attribute is limited by BIGGEST_ALIGNMENT.  We don't
+   define BIGGEST_ALIGNMENT as larger because it is used in other places
+   and would end up wasting space.  (Is this still true?)  */
+#define BIGGEST_ALIGNMENT 128
+
+#define MINIMUM_ATOMIC_ALIGNMENT 128
+
+/* Make all static objects 16-byte aligned.  This allows us to assume
+   they are also padded to 16-bytes, which means we can use a single
+   load or store instruction to access them.  Do the same for objects
+   on the stack.  (Except a bug (?) allows some stack objects to be
+   unaligned.)  */
+#define DATA_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+#define CONSTANT_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+#define LOCAL_ALIGNMENT(TYPE,ALIGN) ((ALIGN) > 128 ? (ALIGN) : 128)
+
+#define EMPTY_FIELD_BOUNDARY 32
+
+#define STRICT_ALIGNMENT 1
+
+/* symbol_ref's of functions are not aligned to 16 byte boundary. */
+#define ALIGNED_SYMBOL_REF_P(X) \
+	(GET_CODE (X) == SYMBOL_REF \
+          && (SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ALIGN1) == 0 \
+	  && (! SYMBOL_REF_FUNCTION_P (X) \
+	      || align_functions >= 16))
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+#define MAX_FIXED_MODE_SIZE 128
+
+#define STACK_SAVEAREA_MODE(save_level) \
+  (save_level == SAVE_FUNCTION ? VOIDmode \
+    : save_level == SAVE_NONLOCAL ? SImode \
+      : Pmode)
+
+#define STACK_SIZE_MODE SImode
+
+
+/* Type Layout */
+
+#define INT_TYPE_SIZE 32
+
+#define LONG_TYPE_SIZE 32
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 0
+
+#define STDINT_LONG32 0
+
+
+/* Register Basics */
+
+/* 128-130 are special registers that never appear in assembly code. */
+#define FIRST_PSEUDO_REGISTER 131
+
+#define FIXED_REGISTERS {			    \
+    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    1, 1, 1 \
+}
+
+#define CALL_USED_REGISTERS {			    \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    1, 1, 1 \
+}
+
+
+/* Values in Registers */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+    ((GET_MODE_BITSIZE(MODE)+MAX_FIXED_MODE_SIZE-1)/MAX_FIXED_MODE_SIZE)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  (GET_MODE_BITSIZE (MODE1) <= MAX_FIXED_MODE_SIZE \
+   && GET_MODE_BITSIZE (MODE2) <= MAX_FIXED_MODE_SIZE)
+
+
+/* Register Classes */
+
+enum reg_class { 
+   NO_REGS, 
+   GENERAL_REGS,
+   ALL_REGS,
+   LIM_REG_CLASSES 
+};
+
+/* SPU is simple, it really only has one class of registers.  */
+#define IRA_COVER_CLASSES { GENERAL_REGS, LIM_REG_CLASSES }
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define REG_CLASS_NAMES \
+{  "NO_REGS", \
+   "GENERAL_REGS", \
+   "ALL_REGS" \
+}
+
+#define REG_CLASS_CONTENTS { \
+    {0, 0, 0, 0, 0}, /* no regs */ \
+    {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x3}, /* general regs */ \
+    {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x3}} /* all regs */
+
+#define REGNO_REG_CLASS(REGNO) (GENERAL_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(regno) \
+   ((regno) < FIRST_PSEUDO_REGISTER || (regno > LAST_VIRTUAL_REGISTER && reg_renumber[regno] >= 0))
+
+#define REGNO_OK_FOR_INDEX_P(regno)  \
+   ((regno) < FIRST_PSEUDO_REGISTER || (regno > LAST_VIRTUAL_REGISTER && reg_renumber[regno] >= 0))
+
+#define INT_REG_OK_FOR_INDEX_P(X,STRICT) \
+	((!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X))))
+#define INT_REG_OK_FOR_BASE_P(X,STRICT) \
+	((!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X))))
+
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+	((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* GCC assumes that modes are in the lowpart of a register, which is
+   only true for SPU. */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+        ((GET_MODE_SIZE (FROM) > 4 || GET_MODE_SIZE (TO) > 4) \
+	 && (GET_MODE_SIZE (FROM) < 16 || GET_MODE_SIZE (TO) < 16) \
+	 && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO))
+
+#define REGISTER_TARGET_PRAGMAS() do {					\
+c_register_addr_space ("__ea", ADDR_SPACE_EA);				\
+targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin;	\
+}while (0);
+
+
+/* Frame Layout */
+
+#define STACK_GROWS_DOWNWARD
+
+#define FRAME_GROWS_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET (0)
+
+#define STACK_POINTER_OFFSET 32
+
+#define FIRST_PARM_OFFSET(FNDECL) (0)
+
+#define DYNAMIC_CHAIN_ADDRESS(FP) plus_constant ((FP), -16)
+
+#define RETURN_ADDR_RTX(COUNT,FP) (spu_return_addr (COUNT, FP))
+
+/* Should this be defined?  Would it simplify our implementation. */
+/* #define RETURN_ADDR_IN_PREVIOUS_FRAME */
+
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG(Pmode, LINK_REGISTER_REGNUM)
+
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LINK_REGISTER_REGNUM)
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (crtl->args.pretend_args_size - STACK_POINTER_OFFSET)
+
+
+/* Stack Checking */
+
+/* We store the Available Stack Size in the second slot of the stack
+   register.   We emit stack checking code during the prologue.  */
+#define STACK_CHECK_BUILTIN 1
+
+
+/* Frame Registers, and other registers */
+
+#define STACK_POINTER_REGNUM 1
+
+/* Will be eliminated. */
+#define FRAME_POINTER_REGNUM 128
+
+/* This is not specified in any ABI, so could be set to anything. */
+#define HARD_FRAME_POINTER_REGNUM 127
+
+/* Will be eliminated. */
+#define ARG_POINTER_REGNUM 129
+
+#define STATIC_CHAIN_REGNUM 2
+
+#define LINK_REGISTER_REGNUM 0
+
+/* Used to keep track of instructions that have clobbered the hint
+ * buffer.  Users can also specify it in inline asm. */
+#define HBR_REGNUM 130
+
+#define MAX_REGISTER_ARGS    72
+#define FIRST_ARG_REGNUM     3
+#define LAST_ARG_REGNUM      (FIRST_ARG_REGNUM + MAX_REGISTER_ARGS - 1)
+
+#define MAX_REGISTER_RETURN  72
+#define FIRST_RETURN_REGNUM  3
+#define LAST_RETURN_REGNUM   (FIRST_RETURN_REGNUM + MAX_REGISTER_RETURN - 1)
+
+
+/* Elimination */
+
+#define ELIMINABLE_REGS  \
+  {{ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},			\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  ((OFFSET) = spu_initial_elimination_offset((FROM),(TO)))
+
+
+/* Stack Arguments */
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define REG_PARM_STACK_SPACE(FNDECL) 0
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+
+/* Register Arguments */
+
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
+		((CUM) = 0)
+
+/* The SPU ABI wants 32/64-bit types at offset 0 in the quad-word on the
+   stack.  8/16-bit types should be at offsets 3/2 respectively.  */
+#define FUNCTION_ARG_OFFSET(MODE, TYPE)					\
+(((TYPE) && INTEGRAL_TYPE_P (TYPE) && GET_MODE_SIZE (MODE) < 4)		\
+ ? (4 - GET_MODE_SIZE (MODE))						\
+ : 0)
+
+#define FUNCTION_ARG_PADDING(MODE,TYPE) upward
+
+#define PAD_VARARGS_DOWN 0
+
+#define FUNCTION_ARG_REGNO_P(N) ((N) >= (FIRST_ARG_REGNUM) && (N) <= (LAST_ARG_REGNUM))
+
+/* Scalar Return */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC) \
+        (spu_function_value((VALTYPE),(FUNC)))
+
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, FIRST_RETURN_REGNUM)
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) >= (FIRST_RETURN_REGNUM) && (N) <= (LAST_RETURN_REGNUM))
+
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_ALIGN1	(SYMBOL_FLAG_MACH_DEP << 0)
+
+/* Aggregate Return */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+
+/* Function Entry */
+
+#define EXIT_IGNORE_STACK 0
+
+#define EPILOGUE_USES(REGNO) ((REGNO)==1 ? 1 : 0)
+
+
+/* Profiling */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)  \
+  spu_function_profiler ((FILE), (LABELNO));
+
+#define NO_PROFILE_COUNTERS 1
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+
+/* Trampolines */
+
+#define TRAMPOLINE_SIZE (TARGET_LARGE_MEM ? 20 : 16)
+
+#define TRAMPOLINE_ALIGNMENT 128
+
+/* Addressing Modes */
+
+#define CONSTANT_ADDRESS_P(X)   spu_constant_address_p(X)
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define LEGITIMATE_CONSTANT_P(X) spu_legitimate_constant_p(X)
+
+
+/* Costs */
+
+#define BRANCH_COST(speed_p, predictable_p) spu_branch_cost
+
+#define SLOW_BYTE_ACCESS 0
+
+#define MOVE_RATIO(speed) ((speed)? 32 : 4)
+
+#define NO_FUNCTION_CSE
+
+
+/* Sections */
+
+#define TEXT_SECTION_ASM_OP ".text"
+
+#define DATA_SECTION_ASM_OP ".data"
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+
+/* PIC */
+#define PIC_OFFSET_TABLE_REGNUM 126
+
+
+/* File Framework */
+
+#define ASM_APP_ON ""
+
+#define ASM_APP_OFF ""
+
+
+/* Uninitialized Data */
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%d\n", (ROUNDED)))
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%d\n", (ROUNDED)))
+
+
+/* Label Output */
+#define ASM_OUTPUT_LABEL(FILE,NAME)	\
+  do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME))
+
+#define ASM_OUTPUT_SYMBOL_REF(FILE, X) \
+  do							\
+    {							\
+      tree decl;					\
+      assemble_name (FILE, XSTR ((X), 0));		\
+      if ((decl = SYMBOL_REF_DECL ((X))) != 0		\
+	  && TREE_CODE (decl) == VAR_DECL		\
+	  && TYPE_ADDR_SPACE (TREE_TYPE (decl)))	\
+	fputs ("@ppu", FILE);				\
+    } while (0)
+
+
+/* Instruction Output */
+#define REGISTER_NAMES \
+{"$lr", "$sp", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", \
+ "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31", \
+ "$32", "$33", "$34", "$35", "$36", "$37", "$38", "$39", "$40", "$41", "$42", "$43", "$44", "$45", "$46", "$47", \
+ "$48", "$49", "$50", "$51", "$52", "$53", "$54", "$55", "$56", "$57", "$58", "$59", "$60", "$61", "$62", "$63", \
+ "$64", "$65", "$66", "$67", "$68", "$69", "$70", "$71", "$72", "$73", "$74", "$75", "$76", "$77", "$78", "$79", \
+ "$80", "$81", "$82", "$83", "$84", "$85", "$86", "$87", "$88", "$89", "$90", "$91", "$92", "$93", "$94", "$95", \
+ "$96", "$97", "$98", "$99", "$100", "$101", "$102", "$103", "$104", "$105", "$106", "$107", "$108", "$109", "$110", "$111", \
+ "$112", "$113", "$114", "$115", "$116", "$117", "$118", "$119", "$120", "$121", "$122", "$123", "$124", "$125", "$126", "$127", \
+ "$vfp", "$vap", "hbr" \
+}
+
+#define PRINT_OPERAND(FILE, X, CODE)  print_operand(FILE, X, CODE)
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+ print_operand_address (FILE, ADDR)
+
+#define LOCAL_LABEL_PREFIX "."
+
+#define USER_LABEL_PREFIX ""
+
+#define ASM_COMMENT_START "#"
+
+
+/* Dispatch Tables */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
+  fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  fprintf (FILE, "\t.word .L%d\n", VALUE)
+
+
+/* Alignment Output */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)  \
+  do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
+
+
+/* Misc */
+
+#define CASE_VECTOR_MODE SImode
+
+#define MOVE_MAX 16 
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) ((INPREC) <= 32 && (OUTPREC) <= (INPREC))
+
+#define STORE_FLAG_VALUE -1
+
+#define Pmode SImode
+
+#define FUNCTION_MODE QImode
+
+#define NO_IMPLICIT_EXTERN_C 1
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \
+  do {                                                                    \
+    if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \
+      {                                                                   \
+        rtx tem = (OP0);                                                  \
+        (OP0) = (OP1);                                                    \
+        (OP1) = tem;                                                      \
+        (CODE) = swap_condition (CODE);                                   \
+      }                                                                   \
+  } while (0)
+
+
+/* Address spaces.  */
+#define ADDR_SPACE_EA	1
+
+
+/* Builtins.  */
+
+enum spu_builtin_type
+{
+  B_INSN,
+  B_JUMP,
+  B_BISLED,
+  B_CALL,
+  B_HINT,
+  B_OVERLOAD,
+  B_INTERNAL
+};
+
+struct spu_builtin_description
+{
+  int fcode;
+  int icode;
+  const char *name;
+  enum spu_builtin_type type;
+
+  /* The first element of parm is always the return type.  The rest
+     are a zero terminated list of parameters.  */
+  int parm[5];
+};
+
+extern struct spu_builtin_description spu_builtins[];
+
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
new file mode 100644
index 000000000..79d0757ed
--- /dev/null
+++ b/gcc/config/spu/spu.md
@@ -0,0 +1,5431 @@
+;; Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option) 
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations.
+;; multi0 is a multiple insn rtl whose first insn is in pipe0
+;; multi1 is a multiple insn rtl whose first insn is in pipe1
+(define_attr "type" "fx2,shuf,fx3,load,store,br,spr,lnop,nop,fxb,fp6,fp7,fpd,iprefetch,multi0,multi1,hbr,convert"
+  (const_string "fx2"))
+
+;; Length (in bytes).
+(define_attr "length" ""
+		(const_int 4))
+
+(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in spu.h.
+
+(define_attr "cpu" "spu"
+  (const (symbol_ref "spu_cpu_attr")))
+
+; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
+;			TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST])
+
+(define_cpu_unit "pipe0,pipe1,fp,ls")
+
+(define_insn_reservation "NOP" 1 (eq_attr "type" "nop")
+    "pipe0")
+
+(define_insn_reservation "FX2" 2 (eq_attr "type" "fx2")
+    "pipe0, nothing")
+
+(define_insn_reservation "FX3" 4 (eq_attr "type" "fx3,fxb")
+    "pipe0, nothing*3")
+
+(define_insn_reservation "FP6" 6 (eq_attr "type" "fp6")
+    "pipe0 + fp, nothing*5")
+
+(define_insn_reservation "FP7" 7 (eq_attr "type" "fp7")
+    "pipe0, fp, nothing*5")
+
+;; The behavior of the double precision is that both pipes stall
+;; for 6 cycles and the rest of the operation pipelines for
+;; 7 cycles.  The simplest way to model this is to simply ignore
+;; the 6 cyle stall.
+(define_insn_reservation "FPD" 7 
+  (and (eq_attr "tune" "cell")
+       (eq_attr "type" "fpd"))
+    "pipe0 + pipe1, fp, nothing*5")
+
+;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
+(define_insn_reservation "FPD_CELLEDP" 9
+  (and (eq_attr "tune" "celledp")
+       (eq_attr "type" "fpd"))
+  "pipe0 + fp, nothing*8")
+
+(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
+    "pipe1")
+
+(define_insn_reservation "STORE" 1 (eq_attr "type" "store")
+    "pipe1 + ls")
+
+(define_insn_reservation "IPREFETCH" 1 (eq_attr "type" "iprefetch")
+    "pipe1 + ls")
+
+(define_insn_reservation "SHUF" 4 (eq_attr "type" "shuf,br,spr")
+    "pipe1, nothing*3")
+
+(define_insn_reservation "LOAD" 6 (eq_attr "type" "load")
+    "pipe1 + ls, nothing*5")
+
+(define_insn_reservation "HBR" 18 (eq_attr "type" "hbr")
+    "pipe1, nothing*15")
+
+(define_insn_reservation "MULTI0" 4 (eq_attr "type" "multi0")
+    "pipe0+pipe1, nothing*3")
+
+(define_insn_reservation "MULTI1" 4 (eq_attr "type" "multi1")
+    "pipe1, nothing*3")
+
+(define_insn_reservation "CONVERT" 0 (eq_attr "type" "convert")
+    "nothing")
+
+;; Force pipe0 to occur before pipe 1 in a cycle.
+(absence_set "pipe0" "pipe1")
+
+
+(define_constants [
+ (UNSPEC_BLOCKAGE	0)
+ (UNSPEC_IPREFETCH	1)
+ (UNSPEC_FREST		2)
+ (UNSPEC_FRSQEST	3)
+ (UNSPEC_FI		4)
+ (UNSPEC_EXTEND_CMP	5)
+ (UNSPEC_CG		6)
+ (UNSPEC_CGX		7)
+ (UNSPEC_ADDX		8)
+ (UNSPEC_BG		9)
+ (UNSPEC_BGX		10)
+ (UNSPEC_SFX		11)
+ (UNSPEC_FSM		12)
+ (UNSPEC_HBR		13)
+ (UNSPEC_LNOP		14)
+ (UNSPEC_NOP		15)
+ (UNSPEC_CONVERT	16)
+ (UNSPEC_SELB		17)
+ (UNSPEC_SHUFB		18)
+ (UNSPEC_CPAT		19)
+ (UNSPEC_SYNC		20)
+ (UNSPEC_CNTB		21)
+ (UNSPEC_SUMB		22)
+ (UNSPEC_FSMB           23)
+ (UNSPEC_FSMH           24)
+ (UNSPEC_GBB            25)
+ (UNSPEC_GBH            26)
+ (UNSPEC_GB             27)
+ (UNSPEC_AVGB           28)
+ (UNSPEC_ABSDB          29)
+ (UNSPEC_ORX            30)
+ (UNSPEC_HEQ            31)
+ (UNSPEC_HGT            32)
+ (UNSPEC_HLGT           33)
+ (UNSPEC_STOP           38)
+ (UNSPEC_STOPD          39)
+ (UNSPEC_SET_INTR       40)
+ (UNSPEC_FSCRRD         42)
+ (UNSPEC_FSCRWR         43)
+ (UNSPEC_MFSPR          44)
+ (UNSPEC_MTSPR          45)
+ (UNSPEC_RDCH           46)
+ (UNSPEC_RCHCNT         47)
+ (UNSPEC_WRCH           48)
+ (UNSPEC_SPU_REALIGN_LOAD 49)
+ (UNSPEC_SPU_MASK_FOR_LOAD 50)
+ (UNSPEC_DFTSV		 51)
+ (UNSPEC_FLOAT_EXTEND	 52)
+ (UNSPEC_FLOAT_TRUNCATE	 53)
+ (UNSPEC_SP_SET         54)
+ (UNSPEC_SP_TEST        55) 
+])
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Mode iterators
+
+(define_mode_iterator ALL [QI V16QI
+			HI V8HI
+			SI V4SI
+			DI V2DI
+			TI
+                        SF V4SF
+                        DF V2DF])
+
+; Everything except DI and TI which are handled separately because
+; they need different constraints to correctly test VOIDmode constants
+(define_mode_iterator MOV [QI V16QI
+			HI V8HI
+			SI V4SI
+			V2DI
+                        SF V4SF
+                        DF V2DF])
+
+(define_mode_iterator QHSI  [QI HI SI])
+(define_mode_iterator QHSDI  [QI HI SI DI])
+(define_mode_iterator DTI  [DI TI])
+
+(define_mode_iterator VINT [QI V16QI
+			 HI V8HI
+			 SI V4SI
+			 DI V2DI
+			 TI])
+
+(define_mode_iterator VQHSI [QI V16QI
+			  HI V8HI
+			  SI V4SI])
+
+(define_mode_iterator VHSI [HI V8HI
+			 SI V4SI])
+
+(define_mode_iterator VSDF [SF V4SF
+                         DF V2DF])
+
+(define_mode_iterator VSI [SI V4SI])
+(define_mode_iterator VDI [DI V2DI])
+(define_mode_iterator VSF [SF V4SF])
+(define_mode_iterator VDF [DF V2DF])
+
+(define_mode_iterator VCMP [V16QI
+			 V8HI
+			 V4SI
+                         V4SF
+                         V2DF])
+
+(define_mode_iterator VCMPU [V16QI
+			  V8HI
+			  V4SI])
+
+(define_mode_attr v	 [(V8HI  "v") (V4SI  "v")
+			  (HI    "") (SI    "")])
+
+(define_mode_attr bh  [(QI "b")  (V16QI "b")
+		       (HI "h")  (V8HI "h")
+		       (SI "")   (V4SI "")])
+
+(define_mode_attr d   [(SF "")   (V4SF "")
+                       (DF "d")  (V2DF "d")])
+(define_mode_attr d6  [(SF "6")  (V4SF "6")
+                       (DF "d")  (V2DF "d")])
+
+(define_mode_attr f2i [(SF "si") (V4SF "v4si")
+                       (DF "di") (V2DF "v2di")])
+(define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
+                       (DF "DI") (V2DF "V2DI")])
+(define_mode_attr i2f [(SI "sf") (V4SI "v4sf")
+                       (DI "df") (V2DI "v2df")])
+(define_mode_attr I2F [(SI "SF") (V4SI "V4SF")
+                       (DI "DF") (V2DI "V2DF")])
+
+(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
+
+(define_mode_attr umask  [(HI "f")  (V8HI "f")
+		          (SI "g")  (V4SI "g")])
+(define_mode_attr nmask  [(HI "F")  (V8HI "F")
+		          (SI "G")  (V4SI "G")])
+
+;; Used for carry and borrow instructions.
+(define_mode_iterator CBOP  [SI DI V4SI V2DI])
+
+;; Used in vec_set and vec_extract
+(define_mode_iterator V [V2DI V4SI V8HI V16QI V2DF V4SF])
+(define_mode_attr inner  [(V16QI "QI")
+			  (V8HI  "HI")
+			  (V4SI  "SI")
+			  (V2DI  "DI")
+			  (V4SF  "SF")
+			  (V2DF  "DF")])
+(define_mode_attr vmult  [(V16QI "1")
+			  (V8HI  "2")
+			  (V4SI  "4")
+			  (V2DI  "8")
+			  (V4SF  "4")
+			  (V2DF  "8")])
+(define_mode_attr voff   [(V16QI "13")
+			  (V8HI  "14")
+			  (V4SI  "0")
+			  (V2DI  "0")
+			  (V4SF  "0")
+			  (V2DF  "0")])
+
+
+;; mov
+
+(define_expand "mov<mode>"
+  [(set (match_operand:ALL 0 "nonimmediate_operand" "")
+	(match_operand:ALL 1 "general_operand" ""))]
+  ""
+  {
+    if (spu_expand_mov(operands, <MODE>mode))
+      DONE;
+  })
+
+(define_split 
+  [(set (match_operand 0 "spu_reg_operand")
+	(match_operand 1 "immediate_operand"))]
+
+  ""
+  [(set (match_dup 0)
+	(high (match_dup 1)))
+   (set (match_dup 0)
+	(lo_sum (match_dup 0)
+	        (match_dup 1)))]
+  {
+    if (spu_split_immediate (operands))
+      DONE;
+    FAIL;
+  })
+
+(define_insn "pic"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(match_operand:SI 1 "immediate_operand" "s"))
+   (use (const_int 0))]
+  "flag_pic"
+  "ila\t%0,%%pic(%1)")
+
+;; Whenever a function generates the 'pic' pattern above we need to
+;; load the pic_offset_table register.
+;; GCC doesn't deal well with labels in the middle of a block so we
+;; hardcode the offsets in the asm here.
+(define_insn "load_pic_offset"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(unspec:SI [(const_int 0)] 0))
+   (set (match_operand:SI 1 "spu_reg_operand" "=r")
+	(unspec:SI [(const_int 0)] 0))]
+  "flag_pic"
+  "ila\t%1,.+8\;brsl\t%0,4"
+  [(set_attr "length" "8")
+   (set_attr "type" "multi0")])
+
+
+;; move internal
+
+(define_insn "_mov<mode>"
+  [(set (match_operand:MOV 0 "spu_dest_operand" "=r,r,r,r,r,m")
+	(match_operand:MOV 1 "spu_mov_operand" "r,A,f,j,m,r"))]
+  "register_operand(operands[0], <MODE>mode)
+   || register_operand(operands[1], <MODE>mode)"
+  "@
+   ori\t%0,%1,0
+   il%s1\t%0,%S1
+   fsmbi\t%0,%S1
+   c%s1d\t%0,%S1($sp)
+   lq%p1\t%0,%1
+   stq%p0\t%1,%0"
+  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
+
+(define_insn "low_<mode>"
+  [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
+	(lo_sum:VSI (match_operand:VSI 1 "spu_reg_operand" "0")
+		    (match_operand:VSI 2 "immediate_operand" "i")))]
+  ""
+  "iohl\t%0,%2@l")
+
+(define_insn "_movdi"
+  [(set (match_operand:DI 0 "spu_dest_operand" "=r,r,r,r,r,m")
+	(match_operand:DI 1 "spu_mov_operand" "r,a,f,k,m,r"))]
+  "register_operand(operands[0], DImode)
+   || register_operand(operands[1], DImode)"
+  "@
+   ori\t%0,%1,0
+   il%d1\t%0,%D1
+   fsmbi\t%0,%D1
+   c%d1d\t%0,%D1($sp)
+   lq%p1\t%0,%1
+   stq%p0\t%1,%0"
+  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
+
+(define_insn "_movti"
+  [(set (match_operand:TI 0 "spu_dest_operand" "=r,r,r,r,r,m")
+	(match_operand:TI 1 "spu_mov_operand" "r,U,f,l,m,r"))]
+  "register_operand(operands[0], TImode)
+   || register_operand(operands[1], TImode)"
+  "@
+   ori\t%0,%1,0
+   il%t1\t%0,%T1
+   fsmbi\t%0,%T1
+   c%t1d\t%0,%T1($sp)
+   lq%p1\t%0,%1
+   stq%p0\t%1,%0"
+  [(set_attr "type" "fx2,fx2,shuf,shuf,load,store")])
+
+(define_split
+  [(set (match_operand 0 "spu_reg_operand")
+	(match_operand 1 "memory_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
+   && GET_MODE(operands[0]) == GET_MODE(operands[1])
+   && !reload_in_progress && !reload_completed" 
+  [(set (match_dup 0)
+	(match_dup 1))]
+  { if (spu_split_load(operands))
+      DONE;
+  })
+
+(define_split
+  [(set (match_operand 0 "memory_operand")
+	(match_operand 1 "spu_reg_operand"))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) < 16
+   && GET_MODE(operands[0]) == GET_MODE(operands[1])
+   && !reload_in_progress && !reload_completed" 
+  [(set (match_dup 0)
+	(match_dup 1))]
+  { if (spu_split_store(operands))
+      DONE;
+  })
+;; Operand 3 is the number of bytes. 1:b 2:h 4:w 8:d
+
+(define_expand "cpat"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
+		    (match_operand:SI 2 "spu_nonmem_operand" "r,n")
+		    (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
+  ""
+  {
+    rtx x = gen_cpat_const (operands);
+    if (x)
+      {
+        emit_move_insn (operands[0], x);
+        DONE;
+      }
+  })
+
+(define_insn "_cpat"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(unspec:TI [(match_operand:SI 1 "spu_reg_operand" "r,r")
+		    (match_operand:SI 2 "spu_nonmem_operand" "r,n")
+		    (match_operand:SI 3 "immediate_operand" "i,i")] UNSPEC_CPAT))]
+  ""
+  "@
+   c%M3x\t%0,%1,%2
+   c%M3d\t%0,%C2(%1)"
+  [(set_attr "type" "shuf")])
+
+(define_split
+  [(set (match_operand:TI 0 "spu_reg_operand")
+	(unspec:TI [(match_operand:SI 1 "spu_nonmem_operand")
+		    (match_operand:SI 2 "immediate_operand")
+		    (match_operand:SI 3 "immediate_operand")] UNSPEC_CPAT))]
+  ""
+  [(set (match_dup:TI 0)
+        (match_dup:TI 4))]
+  {
+    operands[4] = gen_cpat_const (operands);
+    if (!operands[4])
+      FAIL;
+  })
+
+;; extend
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "xsbh\t%0,%1")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))]
+  ""
+  "xshw\t%0,%1")
+
+(define_expand "extendsidi2"
+  [(set (match_dup:DI 2)
+	(zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "")))
+   (set (match_operand:DI 0 "spu_reg_operand" "")
+	(sign_extend:DI (vec_select:SI (match_dup:V2SI 3)
+				       (parallel [(const_int 1)]))))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (DImode);
+    operands[3] = spu_gen_subreg (V2SImode, operands[2]);
+  })
+
+(define_insn "xswd"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+	(sign_extend:DI
+	  (vec_select:SI
+	    (match_operand:V2SI 1 "spu_reg_operand" "r")
+	    (parallel [(const_int 1) ]))))]
+  ""
+  "xswd\t%0,%1");
+
+;; By splitting this late we don't allow much opportunity for sharing of
+;; constants.  That's ok because this should really be optimized away.
+(define_insn_and_split "extend<mode>ti2"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(sign_extend:TI (match_operand:QHSDI 1 "register_operand" "")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:TI 0)
+	(sign_extend:TI (match_dup:QHSDI 1)))]
+  {
+    spu_expand_sign_extend(operands);
+    DONE;
+  })
+
+
+;; zero_extend
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "andi\t%0,%1,0x00ff")
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(zero_extend:SI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "andi\t%0,%1,0x00ff")
+
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r")))
+   (clobber (match_scratch:SI 2 "=&r"))]
+  ""
+  {
+    rtx mask = gen_reg_rtx (SImode);
+    rtx op1 = simplify_gen_subreg (SImode, operands[1], HImode, 0);
+    emit_move_insn (mask, GEN_INT (0xffff));
+    emit_insn (gen_andsi3(operands[0], op1, mask));
+    DONE;
+  })
+  
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+	(zero_extend:DI (match_operand:SI 1 "spu_reg_operand" "r")))]
+  ""
+  "rotqmbyi\t%0,%1,-4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "zero_extendqiti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:QI 1 "spu_reg_operand" "r")))]
+  ""
+  "andi\t%0,%1,0x00ff\;rotqmbyi\t%0,%0,-12"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "8")])
+
+(define_insn "zero_extendhiti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:HI 1 "spu_reg_operand" "r")))]
+  ""
+  "shli\t%0,%1,16\;rotqmbyi\t%0,%0,-14"
+  [(set_attr "type" "multi1")
+   (set_attr "length" "8")])
+
+(define_insn "zero_extendsiti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:SI 1 "spu_reg_operand" "r")))]
+  ""
+  "rotqmbyi\t%0,%1,-12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "zero_extendditi2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(zero_extend:TI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "rotqmbyi\t%0,%1,-8"
+  [(set_attr "type" "shuf")])
+
+
+;; trunc
+
+(define_insn "truncdiqi2"
+  [(set (match_operand:QI 0 "spu_reg_operand" "=r")
+	(truncate:QI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "truncdihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(truncate:HI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "truncdisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(truncate:SI (match_operand:DI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,4"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctiqi2"
+  [(set (match_operand:QI 0 "spu_reg_operand" "=r")
+	(truncate:QI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctihi2"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+	(truncate:HI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctisi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(truncate:SI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,12"
+  [(set_attr "type" "shuf")])
+
+(define_insn "trunctidi2"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+	(truncate:DI (match_operand:TI 1 "spu_reg_operand" "r")))]
+  ""
+  "shlqbyi\t%0,%1,8"
+  [(set_attr "type" "shuf")])
+
+
+;; float conversions
+
+(define_insn "float<mode><i2f>2"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
+  ""
+  "csflt\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "fix_trunc<mode><f2i>2"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
+  ""
+  "cflts\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "floatuns<mode><i2f>2"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r")))]
+  ""
+  "cuflt\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "fixuns_trunc<mode><f2i>2"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(unsigned_fix:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")))]
+  ""
+  "cfltu\t%0,%1,0"
+  [(set_attr "type" "fp7")])
+
+(define_insn "float<mode><i2f>2_mul"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(mult:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		    (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
+  ""
+  "csflt\t%0,%1,%w2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "float<mode><i2f>2_div"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(div:<I2F> (float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		   (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
+  ""
+  "csflt\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+
+(define_insn "fix_trunc<mode><f2i>2_mul"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+			     (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
+  ""
+  "cflts\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "floatuns<mode><i2f>2_mul"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(mult:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		    (match_operand:<I2F> 2 "spu_inv_exp2_operand" "w")))]
+  ""
+  "cuflt\t%0,%1,%w2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "floatuns<mode><i2f>2_div"
+  [(set (match_operand:<I2F> 0 "spu_reg_operand" "=r")
+	(div:<I2F> (unsigned_float:<I2F> (match_operand:VSI 1 "spu_reg_operand" "r"))
+		   (match_operand:<I2F> 2 "spu_exp2_operand" "v")))]
+  ""
+  "cuflt\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "fixuns_trunc<mode><f2i>2_mul"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(unsigned_fix:<F2I> (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+				      (match_operand:VSF 2 "spu_exp2_operand" "v"))))]
+  ""
+  "cfltu\t%0,%1,%v2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "spu_reg_operand" "=r")
+	(unspec:DF [(match_operand:SF 1 "spu_reg_operand" "r")]
+                   UNSPEC_FLOAT_EXTEND))]
+  ""
+  "fesd\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "spu_reg_operand" "=r")
+	(unspec:SF [(match_operand:DF 1 "spu_reg_operand" "r")]
+                   UNSPEC_FLOAT_TRUNCATE))]
+  ""
+  "frds\t%0,%1"
+  [(set_attr "type" "fpd")])
+
+(define_expand "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float:SF (match_operand:DI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (SImode);
+    rtx r0 = gen_reg_rtx (DImode);
+    rtx r1 = gen_reg_rtx (SFmode);
+    rtx r2 = gen_reg_rtx (SImode);
+    rtx setneg = gen_reg_rtx (SImode);
+    rtx isneg = gen_reg_rtx (SImode);
+    rtx neg = gen_reg_rtx (DImode);
+    rtx mask = gen_reg_rtx (DImode);
+
+    emit_move_insn (c0, GEN_INT (-0x80000000ll));
+
+    emit_insn (gen_negdi2 (neg, operands[1]));
+    emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
+    emit_insn (gen_extend_compare (mask, isneg));
+    emit_insn (gen_selb (r0, neg, operands[1], mask));
+    emit_insn (gen_andc_si (setneg, c0, isneg));
+
+    emit_insn (gen_floatunsdisf2 (r1, r0));
+
+    emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
+    emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
+    DONE;
+  })
+
+(define_insn_and_split "floatunsdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unsigned_float:SF (match_operand:DI 1 "register_operand" "r")))
+   (clobber (match_scratch:SF 2 "=r"))
+   (clobber (match_scratch:SF 3 "=r"))
+   (clobber (match_scratch:SF 4 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SF 0)
+        (unsigned_float:SF (match_dup:DI 1)))]
+  {
+    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
+    rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
+
+    REAL_VALUE_TYPE scale;
+    real_2expN (&scale, 32, SFmode);
+
+    emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
+    emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
+
+    emit_move_insn (operands[4],
+		    CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode));
+    emit_insn (gen_fmasf4 (operands[0],
+			   operands[2], operands[4], operands[3]));
+    DONE;
+  })
+
+(define_expand "floattisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(float:SF (match_operand:TI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (SImode);
+    rtx r0 = gen_reg_rtx (TImode);
+    rtx r1 = gen_reg_rtx (SFmode);
+    rtx r2 = gen_reg_rtx (SImode);
+    rtx setneg = gen_reg_rtx (SImode);
+    rtx isneg = gen_reg_rtx (SImode);
+    rtx neg = gen_reg_rtx (TImode);
+    rtx mask = gen_reg_rtx (TImode);
+
+    emit_move_insn (c0, GEN_INT (-0x80000000ll));
+
+    emit_insn (gen_negti2 (neg, operands[1]));
+    emit_insn (gen_cgt_ti_m1 (isneg, operands[1]));
+    emit_insn (gen_extend_compare (mask, isneg));
+    emit_insn (gen_selb (r0, neg, operands[1], mask));
+    emit_insn (gen_andc_si (setneg, c0, isneg));
+
+    emit_insn (gen_floatunstisf2 (r1, r0));
+
+    emit_insn (gen_iorsi3 (r2, gen_rtx_SUBREG (SImode, r1, 0), setneg));
+    emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, r2, 0));
+    DONE;
+  })
+
+(define_insn_and_split "floatunstisf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+        (unsigned_float:SF (match_operand:TI 1 "register_operand" "r")))
+   (clobber (match_scratch:SF 2 "=r"))
+   (clobber (match_scratch:SF 3 "=r"))
+   (clobber (match_scratch:SF 4 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SF 0)
+        (unsigned_float:SF (match_dup:TI 1)))]
+  {
+    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2_v4sf = gen_rtx_REG (V4SFmode, REGNO (operands[2]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO (operands[2]));
+    rtx op3_ti = gen_rtx_REG (TImode, REGNO (operands[3]));
+
+    REAL_VALUE_TYPE scale;
+    real_2expN (&scale, 32, SFmode);
+
+    emit_insn (gen_floatunsv4siv4sf2 (op2_v4sf, op1_v4si));
+    emit_insn (gen_shlqby_ti (op3_ti, op2_ti, GEN_INT (4)));
+
+    emit_move_insn (operands[4],
+		    CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode));
+    emit_insn (gen_fmasf4 (operands[2],
+			   operands[2], operands[4], operands[3]));
+
+    emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
+    emit_insn (gen_fmasf4 (operands[2],
+			   operands[2], operands[4], operands[3]));
+
+    emit_insn (gen_shlqby_ti (op3_ti, op3_ti, GEN_INT (4)));
+    emit_insn (gen_fmasf4 (operands[0],
+			   operands[2], operands[4], operands[3]));
+    DONE;
+  })
+
+;; Do (double)(operands[1]+0x80000000u)-(double)0x80000000
+(define_expand "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float:DF (match_operand:SI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (SImode);
+    rtx c1 = gen_reg_rtx (DFmode);
+    rtx r0 = gen_reg_rtx (SImode);
+    rtx r1 = gen_reg_rtx (DFmode);
+
+    emit_move_insn (c0, GEN_INT (-0x80000000ll));
+    emit_move_insn (c1, spu_float_const ("2147483648", DFmode));
+    emit_insn (gen_xorsi3 (r0, operands[1], c0));
+    emit_insn (gen_floatunssidf2 (r1, r0));
+    emit_insn (gen_subdf3 (operands[0], r1, c1));
+    DONE;
+  })
+
+(define_expand "floatunssidf2"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:SI 1 "register_operand"   "r")))]
+  ""
+  "{
+    rtx value, insns;
+    rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, 
+                                             0x06071415, 0x16178080);
+    rtx r0 = gen_reg_rtx (V16QImode);
+
+    if (optimize_size)
+    {
+       start_sequence ();
+       value =
+         emit_library_call_value (convert_optab_libfunc (ufloat_optab,
+                                                         DFmode, SImode),
+                   NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], SImode);
+       insns = get_insns ();
+       end_sequence ();
+       emit_libcall_block (insns, operands[0], value,
+                           gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
+     }
+     else
+     {
+      emit_move_insn (r0, c0);
+      emit_insn (gen_floatunssidf2_internal (operands[0], operands[1], r0));
+     }
+    DONE;
+  }")
+
+(define_insn_and_split "floatunssidf2_internal"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:SI 1 "register_operand"   "r")))
+   (use (match_operand:V16QI 2 "register_operand" "r"))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))
+   (clobber (match_scratch:V4SI 6 "=&r"))]
+  ""
+  "clz\t%3,%1\;il\t%6,1023+31\;shl\t%4,%1,%3\;ceqi\t%5,%3,32\;sf\t%6,%3,%6\;a\t%4,%4,%4\;andc\t%6,%6,%5\;shufb\t%6,%6,%4,%2\;shlqbii\t%0,%6,4"
+  "reload_completed"
+  [(set (match_dup:DF 0)
+        (unsigned_float:DF (match_dup:SI 1)))]
+ "{
+    rtx *ops = operands;
+    rtx op1_v4si = gen_rtx_REG(V4SImode, REGNO(ops[1]));
+    rtx op0_ti = gen_rtx_REG (TImode, REGNO (ops[0]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO (ops[2]));
+    rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6]));
+    emit_insn (gen_clzv4si2 (ops[3],op1_v4si));
+    emit_move_insn (ops[6], spu_const (V4SImode, 1023+31));
+    emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3]));
+    emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32)));
+    emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3]));
+    emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4]));
+    emit_insn (gen_andc_v4si  (ops[6],ops[6],ops[5]));
+    emit_insn (gen_shufb (ops[6],ops[6],ops[4],op2_ti));
+    emit_insn (gen_shlqbi_ti (op0_ti,op6_ti,GEN_INT(4)));
+    DONE;
+  }"
+ [(set_attr "length" "32")])
+
+(define_expand "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(float:DF (match_operand:DI 1 "register_operand" "")))]
+  ""
+  {
+    rtx c0 = gen_reg_rtx (DImode);
+    rtx r0 = gen_reg_rtx (DImode);
+    rtx r1 = gen_reg_rtx (DFmode);
+    rtx r2 = gen_reg_rtx (DImode);
+    rtx setneg = gen_reg_rtx (DImode);
+    rtx isneg = gen_reg_rtx (SImode);
+    rtx neg = gen_reg_rtx (DImode);
+    rtx mask = gen_reg_rtx (DImode);
+
+    emit_move_insn (c0, GEN_INT (0x8000000000000000ull));
+
+    emit_insn (gen_negdi2 (neg, operands[1]));
+    emit_insn (gen_cgt_di_m1 (isneg, operands[1]));
+    emit_insn (gen_extend_compare (mask, isneg));
+    emit_insn (gen_selb (r0, neg, operands[1], mask));
+    emit_insn (gen_andc_di (setneg, c0, mask));
+
+    emit_insn (gen_floatunsdidf2 (r1, r0));
+
+    emit_insn (gen_iordi3 (r2, gen_rtx_SUBREG (DImode, r1, 0), setneg));
+    emit_move_insn (operands[0], gen_rtx_SUBREG (DFmode, r2, 0));
+    DONE;
+  })
+
+(define_expand "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))]
+  ""
+  "{
+    rtx value, insns;
+    rtx c0 = spu_const_from_ints (V16QImode, 0x02031011, 0x12138080, 
+                                             0x06071415, 0x16178080);
+    rtx c1 = spu_const_from_ints (V4SImode, 1023+63, 1023+31, 0, 0);
+    rtx r0 = gen_reg_rtx (V16QImode);
+    rtx r1 = gen_reg_rtx (V4SImode);
+
+    if (optimize_size)
+    {      
+      start_sequence ();
+      value =
+         emit_library_call_value (convert_optab_libfunc (ufloat_optab,
+                                                         DFmode, DImode),
+                   NULL_RTX, LCT_NORMAL, DFmode, 1, operands[1], DImode);
+      insns = get_insns ();
+      end_sequence ();
+      emit_libcall_block (insns, operands[0], value,
+                          gen_rtx_UNSIGNED_FLOAT (DFmode, operands[1]));
+    }
+    else
+    {
+      emit_move_insn (r1, c1);
+      emit_move_insn (r0, c0);
+      emit_insn (gen_floatunsdidf2_internal (operands[0], operands[1], r0, r1));
+    }
+    DONE;
+  }")
+
+(define_insn_and_split "floatunsdidf2_internal"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))
+   (use (match_operand:V16QI 2 "register_operand" "r"))
+   (use (match_operand:V4SI 3 "register_operand" "r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))
+   (clobber (match_scratch:V4SI 6 "=&r"))]
+  ""
+  "clz\t%4,%1\;shl\t%5,%1,%4\;ceqi\t%6,%4,32\;sf\t%4,%4,%3\;a\t%5,%5,%5\;andc\t%4,%4,%6\;shufb\t%4,%4,%5,%2\;shlqbii\t%4,%4,4\;shlqbyi\t%5,%4,8\;dfa\t%0,%4,%5"
+  "reload_completed"
+  [(set (match_operand:DF 0 "register_operand"  "=r")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"   "r")))]
+  "{
+    rtx *ops = operands;
+    rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO(ops[1]));
+    rtx op2_ti = gen_rtx_REG (TImode, REGNO(ops[2]));
+    rtx op4_ti = gen_rtx_REG (TImode, REGNO(ops[4]));
+    rtx op5_ti = gen_rtx_REG (TImode, REGNO(ops[5]));
+    rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4]));
+    rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5]));
+    emit_insn (gen_clzv4si2 (ops[4],op1_v4si));
+    emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4]));
+    emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32)));
+    emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4]));
+    emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5]));
+    emit_insn (gen_andc_v4si (ops[4],ops[4],ops[6]));
+    emit_insn (gen_shufb (ops[4],ops[4],ops[5],op2_ti));
+    emit_insn (gen_shlqbi_ti (op4_ti,op4_ti,GEN_INT(4)));
+    emit_insn (gen_shlqby_ti (op5_ti,op4_ti,GEN_INT(8)));
+    emit_insn (gen_adddf3 (ops[0],op4_df,op5_df));
+    DONE;
+  }"
+  [(set_attr "length" "40")])
+
+
+;; add
+
+(define_expand "addv16qi3"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(plus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
+		    (match_operand:V16QI 2 "spu_reg_operand" "r")))]
+  ""
+  "{
+    rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
+    rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
+    rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
+    rtx rhs_and = gen_reg_rtx (V8HImode);
+    rtx hi_char = gen_reg_rtx (V8HImode);
+    rtx lo_char = gen_reg_rtx (V8HImode);
+    rtx mask = gen_reg_rtx (V8HImode);
+
+    emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
+    emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
+    emit_insn (gen_addv8hi3 (hi_char, lhs_short, rhs_and));
+    emit_insn (gen_addv8hi3 (lo_char, lhs_short, rhs_short));
+    emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
+    DONE;
+   }")
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(plus:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		   (match_operand:VHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  a<bh>\t%0,%1,%2
+  a<bh>i\t%0,%1,%2")
+
+(define_expand "add<mode>3"
+  [(set (match_dup:VDI 3) 
+	(unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
+		     (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_CG))
+   (set (match_dup:VDI 5)
+	(unspec:VDI [(match_dup 3)
+		     (match_dup 3)
+		     (match_dup:TI 4)] UNSPEC_SHUFB))
+   (set (match_operand:VDI 0 "spu_reg_operand" "") 
+	(unspec:VDI [(match_dup 1)
+		     (match_dup 2)
+		     (match_dup 5)] UNSPEC_ADDX))]
+  ""
+  {
+    unsigned char pat[16] = {
+      0x04, 0x05, 0x06, 0x07,
+      0x80, 0x80, 0x80, 0x80,
+      0x0c, 0x0d, 0x0e, 0x0f,
+      0x80, 0x80, 0x80, 0x80
+    };
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    operands[4] = gen_reg_rtx (TImode);
+    operands[5] = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (operands[4], array_to_constant (TImode, pat));
+  })
+
+(define_insn "cg_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")] UNSPEC_CG))]
+  "operands"
+  "cg\t%0,%1,%2")
+
+(define_insn "cgx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_CGX))]
+  "operands"
+  "cgx\t%0,%1,%2")
+
+(define_insn "addx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_ADDX))]
+  "operands"
+  "addx\t%0,%1,%2")
+
+
+;; This is not the most efficient implementation of addti3.
+;; We include this here because 1) the compiler needs it to be
+;; defined as the word size is 128-bit and 2) sometimes gcc
+;; substitutes an add for a constant left-shift. 2) is unlikely
+;; because we also give addti3 a high cost. In case gcc does
+;; generate TImode add, here is the code to do it.
+;; operand 2 is a nonmemory because the compiler requires it.
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=&r")
+	(plus:TI (match_operand:TI 1 "spu_reg_operand" "r")
+		 (match_operand:TI 2 "spu_nonmem_operand" "r")))
+   (clobber (match_scratch:TI 3 "=&r"))]
+  ""
+  "cg\t%3,%1,%2\n\\
+   shlqbyi\t%3,%3,4\n\\
+   cgx\t%3,%1,%2\n\\
+   shlqbyi\t%3,%3,4\n\\
+   cgx\t%3,%1,%2\n\\
+   shlqbyi\t%0,%3,4\n\\
+   addx\t%0,%1,%2"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "28")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(plus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fa\t%0,%1,%2"
+  [(set_attr "type" "fp6")])
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(plus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		  (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  ""
+  "dfa\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+
+;; sub
+
+(define_expand "subv16qi3"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(minus:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")
+		     (match_operand:V16QI 2 "spu_reg_operand" "r")))]
+  ""
+  "{
+    rtx res_short = simplify_gen_subreg (V8HImode, operands[0], V16QImode, 0);
+    rtx lhs_short = simplify_gen_subreg (V8HImode, operands[1], V16QImode, 0);
+    rtx rhs_short = simplify_gen_subreg (V8HImode, operands[2], V16QImode, 0);
+    rtx rhs_and = gen_reg_rtx (V8HImode);
+    rtx hi_char = gen_reg_rtx (V8HImode);
+    rtx lo_char = gen_reg_rtx (V8HImode);
+    rtx mask = gen_reg_rtx (V8HImode);
+
+    emit_move_insn (mask, spu_const (V8HImode, 0x00ff));
+    emit_insn (gen_andv8hi3 (rhs_and, rhs_short, spu_const (V8HImode, 0xff00)));
+    emit_insn (gen_subv8hi3 (hi_char, lhs_short, rhs_and));
+    emit_insn (gen_subv8hi3 (lo_char, lhs_short, rhs_short));
+    emit_insn (gen_selb (res_short, hi_char, lo_char, mask));
+    DONE;
+   }")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(minus:VHSI (match_operand:VHSI 1 "spu_arith_operand" "r,B")
+		    (match_operand:VHSI 2 "spu_reg_operand" "r,r")))]
+  ""
+  "@
+  sf<bh>\t%0,%2,%1
+  sf<bh>i\t%0,%2,%1")
+
+(define_expand "sub<mode>3"
+  [(set (match_dup:VDI 3) 
+	(unspec:VDI [(match_operand:VDI 1 "spu_reg_operand" "")
+		     (match_operand:VDI 2 "spu_reg_operand" "")] UNSPEC_BG))
+   (set (match_dup:VDI 5)
+	(unspec:VDI [(match_dup 3)
+		     (match_dup 3)
+		     (match_dup:TI 4)] UNSPEC_SHUFB))
+   (set (match_operand:VDI 0 "spu_reg_operand" "") 
+	(unspec:VDI [(match_dup 1)
+		     (match_dup 2)
+		     (match_dup 5)] UNSPEC_SFX))]
+  ""
+  {
+    unsigned char pat[16] = {
+      0x04, 0x05, 0x06, 0x07,
+      0xc0, 0xc0, 0xc0, 0xc0,
+      0x0c, 0x0d, 0x0e, 0x0f,
+      0xc0, 0xc0, 0xc0, 0xc0
+    };
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    operands[4] = gen_reg_rtx (TImode);
+    operands[5] = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (operands[4], array_to_constant (TImode, pat));
+  })
+
+(define_insn "bg_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")] UNSPEC_BG))]
+  "operands"
+  "bg\t%0,%2,%1")
+
+(define_insn "bgx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_BGX))]
+  "operands"
+  "bgx\t%0,%2,%1")
+
+(define_insn "sfx_<mode>"
+  [(set (match_operand:CBOP 0 "spu_reg_operand" "=r")
+	(unspec:CBOP [(match_operand 1 "spu_reg_operand" "r")
+		      (match_operand 2 "spu_reg_operand" "r")
+		      (match_operand 3 "spu_reg_operand" "0")] UNSPEC_SFX))]
+  "operands"
+  "sfx\t%0,%2,%1")
+
+(define_insn "subti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(minus:TI (match_operand:TI 1 "spu_reg_operand" "r")
+		  (match_operand:TI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:TI 3 "=&r"))
+   (clobber (match_scratch:TI 4 "=&r"))
+   (clobber (match_scratch:TI 5 "=&r"))
+   (clobber (match_scratch:TI 6 "=&r"))]
+  ""
+  "il\t%6,1\n\\
+   bg\t%3,%2,%1\n\\
+   xor\t%3,%3,%6\n\\
+   sf\t%4,%2,%1\n\\
+   shlqbyi\t%5,%3,4\n\\
+   bg\t%3,%5,%4\n\\
+   xor\t%3,%3,%6\n\\
+   sf\t%4,%5,%4\n\\
+   shlqbyi\t%5,%3,4\n\\
+   bg\t%3,%5,%4\n\\
+   xor\t%3,%3,%6\n\\
+   sf\t%4,%5,%4\n\\
+   shlqbyi\t%5,%3,4\n\\
+   sf\t%0,%5,%4"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "56")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(minus:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		   (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fs\t%0,%1,%2"
+  [(set_attr "type" "fp6")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(minus:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		   (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  ""
+  "dfs\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+
+;; neg
+
+(define_expand "negv16qi2"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(neg:V16QI (match_operand:V16QI 1 "spu_reg_operand" "r")))]
+  ""
+  "{
+    rtx zero = gen_reg_rtx (V16QImode);
+    emit_move_insn (zero, CONST0_RTX (V16QImode));
+    emit_insn (gen_subv16qi3 (operands[0], zero, operands[1]));
+    DONE;
+   }")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
+	(neg:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")))]
+  ""
+  "sf<bh>i\t%0,%1,0")
+
+(define_expand "negdi2"
+  [(set (match_operand:DI 0 "spu_reg_operand" "")
+	(neg:DI (match_operand:DI 1 "spu_reg_operand" "")))]
+  ""
+  {
+    rtx zero = gen_reg_rtx(DImode);
+    emit_move_insn(zero, GEN_INT(0));
+    emit_insn (gen_subdi3(operands[0], zero, operands[1]));
+    DONE;
+  })
+
+(define_expand "negti2"
+  [(set (match_operand:TI 0 "spu_reg_operand" "")
+	(neg:TI (match_operand:TI 1 "spu_reg_operand" "")))]
+  ""
+  {
+    rtx zero = gen_reg_rtx(TImode);
+    emit_move_insn(zero, GEN_INT(0));
+    emit_insn (gen_subti3(operands[0], zero, operands[1]));
+    DONE;
+  })
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")
+	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
+
+(define_expand "neg<mode>2"
+  [(parallel
+    [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	  (neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
+
+(define_insn_and_split "_neg<mode>2"
+  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
+	(neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
+   (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:<F2I> 3)
+	(xor:<F2I> (match_dup:<F2I> 4)
+		   (match_dup:<F2I> 2)))]
+  {
+    operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+    operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
+  })
+
+
+;; abs
+
+(define_expand "abs<mode>2"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")
+	  (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
+
+(define_expand "abs<mode>2"
+  [(parallel
+    [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	  (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
+     (use (match_dup 2))])]
+  ""
+  "operands[2] = gen_reg_rtx (<F2I>mode);
+   emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
+
+(define_insn_and_split "_abs<mode>2"
+  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
+	(abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
+   (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:<F2I> 3)
+	(and:<F2I> (match_dup:<F2I> 4)
+		   (match_dup:<F2I> 2)))]
+  {
+    operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+    operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
+  })
+
+
+;; mul
+
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r,r")
+	(mult:HI (match_operand:HI 1 "spu_reg_operand" "r,r")
+		 (match_operand:HI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  mpy\t%0,%1,%2
+  mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_expand "mulv8hi3"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "")
+	(mult:V8HI (match_operand:V8HI 1 "spu_reg_operand" "")
+		   (match_operand:V8HI 2 "spu_reg_operand" "")))]
+  ""
+  "{
+    rtx result = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
+    rtx low = gen_reg_rtx (V4SImode);
+    rtx high = gen_reg_rtx (V4SImode);
+    rtx shift = gen_reg_rtx (V4SImode);
+    rtx mask = gen_reg_rtx (V4SImode);
+
+    emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
+    emit_insn (gen_spu_mpyhh (high, operands[1], operands[2]));
+    emit_insn (gen_spu_mpy (low, operands[1], operands[2]));
+    emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16)));
+    emit_insn (gen_selb (result, shift, low, mask));
+    DONE;
+   }")
+
+(define_expand "mul<mode>3"
+  [(parallel
+    [(set (match_operand:VSI 0 "spu_reg_operand" "")
+	  (mult:VSI (match_operand:VSI 1 "spu_reg_operand" "")
+		    (match_operand:VSI 2 "spu_reg_operand" "")))
+     (clobber (match_dup:VSI 3))
+     (clobber (match_dup:VSI 4))
+     (clobber (match_dup:VSI 5))
+     (clobber (match_dup:VSI 6))])]
+  ""
+  {
+    operands[3] = gen_reg_rtx(<MODE>mode);
+    operands[4] = gen_reg_rtx(<MODE>mode);
+    operands[5] = gen_reg_rtx(<MODE>mode);
+    operands[6] = gen_reg_rtx(<MODE>mode);
+  })
+
+(define_insn_and_split "_mulsi3"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (match_operand:SI 1 "spu_reg_operand" "r")
+		 (match_operand:SI 2 "spu_arith_operand" "rK")))
+   (clobber (match_operand:SI 3 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:SI 4 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:SI 5 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:SI 6 "spu_reg_operand" "=&r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:SI 0)
+	(mult:SI (match_dup:SI 1)
+		 (match_dup:SI 2)))]
+  {
+    HOST_WIDE_INT val = 0;
+    rtx a = operands[3];
+    rtx b = operands[4];
+    rtx c = operands[5];
+    rtx d = operands[6];
+    if (GET_CODE(operands[2]) == CONST_INT)
+      {
+	val = INTVAL(operands[2]);
+	emit_move_insn(d, operands[2]);
+	operands[2] = d;
+      }
+    if (val && (val & 0xffff) == 0)
+      {
+	emit_insn (gen_mpyh_si(operands[0], operands[2], operands[1]));
+      }
+    else if (val > 0 && val < 0x10000)
+      {
+	rtx cst = satisfies_constraint_K (GEN_INT (val)) ? GEN_INT(val) : d;
+	emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
+	emit_insn (gen_mpyu_si(c, operands[1], cst));
+	emit_insn (gen_addsi3(operands[0], a, c));
+      }
+    else
+      {
+	emit_insn (gen_mpyh_si(a, operands[1], operands[2]));
+	emit_insn (gen_mpyh_si(b, operands[2], operands[1]));
+	emit_insn (gen_mpyu_si(c, operands[1], operands[2]));
+	emit_insn (gen_addsi3(d, a, b));
+	emit_insn (gen_addsi3(operands[0], d, c));
+      }
+    DONE;
+   })
+
+(define_insn_and_split "_mulv4si3"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI (match_operand:V4SI 1 "spu_reg_operand" "r")
+		   (match_operand:V4SI 2 "spu_reg_operand" "r")))
+   (clobber (match_operand:V4SI 3 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:V4SI 4 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:V4SI 5 "spu_reg_operand" "=&r"))
+   (clobber (match_operand:V4SI 6 "spu_reg_operand" "=&r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:V4SI 0)
+	(mult:V4SI (match_dup:V4SI 1)
+		   (match_dup:V4SI 2)))]
+  {
+    rtx a = operands[3];
+    rtx b = operands[4];
+    rtx c = operands[5];
+    rtx d = operands[6];
+    rtx op1 = simplify_gen_subreg (V8HImode, operands[1], V4SImode, 0);
+    rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
+    emit_insn (gen_spu_mpyh(a, op1, op2));
+    emit_insn (gen_spu_mpyh(b, op2, op1));
+    emit_insn (gen_spu_mpyu(c, op1, op2));
+    emit_insn (gen_addv4si3(d, a, b));
+    emit_insn (gen_addv4si3(operands[0], d, c));
+    DONE;
+   })
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
+  ""
+  "mpy\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mulhisi3_imm"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (match_operand:SI 2 "imm_K_operand" "K")))]
+  ""
+  "mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (zero_extend:SI (match_operand:HI 2 "spu_reg_operand" "r"))))]
+  ""
+  "mpyu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "umulhisi3_imm"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		 (and:SI (match_operand:SI 2 "imm_K_operand" "K") (const_int 65535))))]
+  ""
+  "mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyu_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r,r")
+	(mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r,r")
+			 (const_int 65535))
+		 (and:SI (match_operand:SI 2 "spu_arith_operand" "r,K")
+			 (const_int 65535))))]
+  ""
+  "@
+   mpyu\t%0,%1,%2
+   mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+;; This isn't always profitable to use.  Consider r = a * b + c * d.
+;; It's faster to do the multiplies in parallel then add them.  If we
+;; merge a multiply and add it prevents the multiplies from happening in
+;; parallel.
+(define_insn "mpya_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(plus:SI (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+			  (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
+		 (match_operand:SI 3 "spu_reg_operand" "r")))]
+  "0"
+  "mpya\t%0,%1,%2,%3"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyh_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (and:SI (match_operand:SI 1 "spu_reg_operand" "r")
+			 (const_int -65536))
+	         (and:SI (match_operand:SI 2 "spu_reg_operand" "r")
+			 (const_int 65535))))]
+  ""
+  "mpyh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpys_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(ashiftrt:SI
+	    (mult:SI (sign_extend:SI (match_operand:HI 1 "spu_reg_operand" "r"))
+		     (sign_extend:SI (match_operand:HI 2 "spu_reg_operand" "r")))
+	    (const_int 16)))]
+  ""
+  "mpys\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyhh_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
+			      (const_int 16))
+		 (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
+			      (const_int 16))))]
+  ""
+  "mpyhh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyhhu_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(mult:SI (lshiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
+			      (const_int 16))
+		 (lshiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
+			      (const_int 16))))]
+  ""
+  "mpyhhu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mpyhha_si" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(plus:SI (mult:SI (ashiftrt:SI (match_operand:SI 1 "spu_reg_operand" "r")
+				       (const_int 16))
+			  (ashiftrt:SI (match_operand:SI 2 "spu_reg_operand" "r")
+				       (const_int 16)))
+		 (match_operand:SI 3 "spu_reg_operand" "0")))]
+  "0"
+  "mpyhha\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
+	(mult:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")
+		   (match_operand:VSDF 2 "spu_reg_operand" "r")))]
+  ""
+  "<d>fm\t%0,%1,%2"
+  [(set_attr "type" "fp<d6>")])
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(fma:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")
+		 (match_operand:VSF 3 "spu_reg_operand" "r")))]
+  ""
+  "fma\t%0,%1,%2,%3"
+  [(set_attr "type"	"fp6")])
+
+;; ??? The official description is (c - a*b), which is exactly (-a*b + c).
+;; Note that this doesn't match the dfnms description.  Incorrect?
+(define_insn "fnma<mode>4"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(fma:VSF
+	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+	  (match_operand:VSF 2 "spu_reg_operand" "r")
+	  (match_operand:VSF 3 "spu_reg_operand" "r")))]
+  ""
+  "fnms\t%0,%1,%2,%3"
+  [(set_attr "type" "fp6")])
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(fma:VSF
+	  (match_operand:VSF 1 "spu_reg_operand" "r")
+	  (match_operand:VSF 2 "spu_reg_operand" "r")
+	  (neg:VSF (match_operand:VSF 3 "spu_reg_operand" "r"))))]
+  ""
+  "fms\t%0,%1,%2,%3"
+  [(set_attr "type" "fp6")])
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		 (match_operand:VDF 2 "spu_reg_operand" "r")
+		 (match_operand:VDF 3 "spu_reg_operand" "0")))]
+  ""
+  "dfma\t%0,%1,%2"
+  [(set_attr "type"	"fpd")])
+
+(define_insn "fms<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(fma:VDF
+	  (match_operand:VDF 1 "spu_reg_operand" "r")
+	  (match_operand:VDF 2 "spu_reg_operand" "r")
+	  (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0"))))]
+  ""
+  "dfms\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "nfma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(neg:VDF
+	  (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		   (match_operand:VDF 2 "spu_reg_operand" "r")
+		   (match_operand:VDF 3 "spu_reg_operand" "0"))))]
+  ""
+  "dfnma\t%0,%1,%2"
+  [(set_attr "type"	"fpd")])
+
+(define_insn "nfms<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "spu_reg_operand" "r")
+	    (match_operand:VDF 2 "spu_reg_operand" "r")
+	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0")))))]
+  ""
+  "dfnms\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
+(define_expand "fnma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "spu_reg_operand" "")
+	    (match_operand:VDF 2 "spu_reg_operand" "")
+	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "")))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_expand "fnms<mode>4"
+  [(set (match_operand:VDF 0 "register_operand" "")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "register_operand" "")
+	    (match_operand:VDF 2 "register_operand" "")
+	    (match_operand:VDF 3 "register_operand" ""))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+;; mul highpart, used for divide by constant optimizations.
+
+(define_expand "smulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" ""))
+	             (sign_extend:DI (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))]
+  ""
+  {
+    rtx t0 = gen_reg_rtx (SImode);
+    rtx t1 = gen_reg_rtx (SImode);
+    rtx t2 = gen_reg_rtx (SImode);
+    rtx t3 = gen_reg_rtx (SImode);
+    rtx t4 = gen_reg_rtx (SImode);
+    rtx t5 = gen_reg_rtx (SImode);
+    rtx t6 = gen_reg_rtx (SImode);
+    rtx t7 = gen_reg_rtx (SImode);
+    rtx t8 = gen_reg_rtx (SImode);
+    rtx t9 = gen_reg_rtx (SImode);
+    rtx t11 = gen_reg_rtx (SImode);
+    rtx t12 = gen_reg_rtx (SImode);
+    rtx t14 = gen_reg_rtx (SImode);
+    rtx t15 = gen_reg_rtx (HImode);
+    rtx t16 = gen_reg_rtx (HImode);
+    rtx t17 = gen_reg_rtx (HImode);
+    rtx t18 = gen_reg_rtx (HImode);
+    rtx t19 = gen_reg_rtx (SImode);
+    rtx t20 = gen_reg_rtx (SImode);
+    rtx t21 = gen_reg_rtx (SImode);
+    rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
+    rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
+    rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
+    rtx t1_hi = gen_rtx_SUBREG (HImode, t1, 2);
+
+    rtx insn = emit_insn (gen_lshrsi3 (t0, operands[1], GEN_INT (16)));
+    emit_insn (gen_lshrsi3 (t1, operands[2], GEN_INT (16)));
+    emit_insn (gen_umulhisi3 (t2, op1_hi, op2_hi));
+    emit_insn (gen_mpyh_si (t3, operands[1], operands[2]));
+    emit_insn (gen_mpyh_si (t4, operands[2], operands[1]));
+    emit_insn (gen_mpyhh_si (t5, operands[1], operands[2]));
+    emit_insn (gen_mpys_si (t6, t0_hi, op2_hi));
+    emit_insn (gen_mpys_si (t7, t1_hi, op1_hi));
+
+    /* Gen carry bits (in t9 and t11). */
+    emit_insn (gen_addsi3 (t8, t2, t3));
+    emit_insn (gen_cg_si (t9, t2, t3));
+    emit_insn (gen_cg_si (t11, t8, t4));
+
+    /* Gen high 32 bits in operand[0].  Correct for mpys. */
+    emit_insn (gen_addx_si (t12, t5, t6, t9));
+    emit_insn (gen_addx_si (t14, t12, t7, t11));
+
+    /* mpys treats both operands as signed when we really want it to treat
+       the first operand as signed and the second operand as unsigned.
+       The code below corrects for that difference.  */
+    emit_insn (gen_cgt_hi (t15, op1_hi, GEN_INT (-1)));
+    emit_insn (gen_cgt_hi (t16, op2_hi, GEN_INT (-1)));
+    emit_insn (gen_andc_hi (t17, t1_hi, t15));
+    emit_insn (gen_andc_hi (t18, t0_hi, t16));
+    emit_insn (gen_extendhisi2 (t19, t17));
+    emit_insn (gen_extendhisi2 (t20, t18));
+    emit_insn (gen_addsi3 (t21, t19, t20));
+    emit_insn (gen_addsi3 (operands[0], t14, t21));
+    unshare_all_rtl_in_chain (insn);
+    DONE;
+  })
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+	             (zero_extend:DI (match_operand:SI 2 "register_operand" "")))
+	    (const_int 32))))]
+  ""
+  
+  {
+    rtx t0 = gen_reg_rtx (SImode);
+    rtx t1 = gen_reg_rtx (SImode);
+    rtx t2 = gen_reg_rtx (SImode);
+    rtx t3 = gen_reg_rtx (SImode);
+    rtx t4 = gen_reg_rtx (SImode);
+    rtx t5 = gen_reg_rtx (SImode);
+    rtx t6 = gen_reg_rtx (SImode);
+    rtx t7 = gen_reg_rtx (SImode);
+    rtx t8 = gen_reg_rtx (SImode);
+    rtx t9 = gen_reg_rtx (SImode);
+    rtx t10 = gen_reg_rtx (SImode);
+    rtx t12 = gen_reg_rtx (SImode);
+    rtx t13 = gen_reg_rtx (SImode);
+    rtx t14 = gen_reg_rtx (SImode);
+    rtx op1_hi = gen_rtx_SUBREG (HImode, operands[1], 2);
+    rtx op2_hi = gen_rtx_SUBREG (HImode, operands[2], 2);
+    rtx t0_hi = gen_rtx_SUBREG (HImode, t0, 2);
+
+    rtx insn = emit_insn (gen_rotlsi3 (t0, operands[2], GEN_INT (16)));
+    emit_insn (gen_umulhisi3 (t1, op1_hi, op2_hi));
+    emit_insn (gen_umulhisi3 (t2, op1_hi, t0_hi));
+    emit_insn (gen_mpyhhu_si (t3, operands[1], t0));
+    emit_insn (gen_mpyhhu_si (t4, operands[1], operands[2]));
+    emit_insn (gen_ashlsi3 (t5, t2, GEN_INT (16)));
+    emit_insn (gen_ashlsi3 (t6, t3, GEN_INT (16)));
+    emit_insn (gen_lshrsi3 (t7, t2, GEN_INT (16)));
+    emit_insn (gen_lshrsi3 (t8, t3, GEN_INT (16)));
+
+    /* Gen carry bits (in t10 and t12). */
+    emit_insn (gen_addsi3 (t9, t1, t5));
+    emit_insn (gen_cg_si (t10, t1, t5));
+    emit_insn (gen_cg_si (t12, t9, t6));
+
+    /* Gen high 32 bits in operand[0]. */
+    emit_insn (gen_addx_si (t13, t4, t7, t10));
+    emit_insn (gen_addx_si (t14, t13, t8, t12));
+    emit_insn (gen_movsi (operands[0], t14));
+    unshare_all_rtl_in_chain (insn);
+
+    DONE;
+  })
+
+;; div
+
+;; Not necessarily the best implementation of divide but faster then
+;; the default that gcc provides because this is inlined and it uses
+;; clz.
+(define_insn "divmodsi4"
+      [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
+	    (div:SI (match_operand:SI 1 "spu_reg_operand" "r")
+		    (match_operand:SI 2 "spu_reg_operand" "r")))
+       (set (match_operand:SI 3 "spu_reg_operand" "=&r")
+	    (mod:SI (match_dup 1)
+		    (match_dup 2)))
+       (clobber (match_scratch:SI 4 "=&r"))
+       (clobber (match_scratch:SI 5 "=&r"))
+       (clobber (match_scratch:SI 6 "=&r"))
+       (clobber (match_scratch:SI 7 "=&r"))
+       (clobber (match_scratch:SI 8 "=&r"))
+       (clobber (match_scratch:SI 9 "=&r"))
+       (clobber (match_scratch:SI 10 "=&r"))
+       (clobber (match_scratch:SI 11 "=&r"))
+       (clobber (match_scratch:SI 12 "=&r"))
+       (clobber (reg:SI 130))]
+  ""
+  "heqi	%2,0\\n\\
+	hbrr	3f,1f\\n\\
+	sfi	%8,%1,0\\n\\
+	sfi	%9,%2,0\\n\\
+	cgti	%10,%1,-1\\n\\
+	cgti	%11,%2,-1\\n\\
+	selb	%8,%8,%1,%10\\n\\
+	selb	%9,%9,%2,%11\\n\\
+	clz	%4,%8\\n\\
+	clz	%7,%9\\n\\
+	il	%5,1\\n\\
+	fsmbi	%0,0\\n\\
+	sf	%7,%4,%7\\n\\
+	shlqbyi	%3,%8,0\\n\\
+	xor	%11,%10,%11\\n\\
+	shl	%5,%5,%7\\n\\
+	shl	%4,%9,%7\\n\\
+	lnop	\\n\\
+1:	or	%12,%0,%5\\n\\
+	rotqmbii	%5,%5,-1\\n\\
+	clgt	%6,%4,%3\\n\\
+	lnop	\\n\\
+	sf	%7,%4,%3\\n\\
+	rotqmbii	%4,%4,-1\\n\\
+	selb	%0,%12,%0,%6\\n\\
+	lnop	\\n\\
+	selb	%3,%7,%3,%6\\n\\
+3:	brnz	%5,1b\\n\\
+2:	sfi	%8,%3,0\\n\\
+	sfi	%9,%0,0\\n\\
+	selb	%3,%8,%3,%10\\n\\
+	selb	%0,%0,%9,%11"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "128")])
+
+(define_insn "udivmodsi4"
+      [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
+	    (udiv:SI (match_operand:SI 1 "spu_reg_operand" "r")
+		     (match_operand:SI 2 "spu_reg_operand" "r")))
+       (set (match_operand:SI 3 "spu_reg_operand" "=&r")
+	    (umod:SI (match_dup 1)
+		     (match_dup 2)))
+       (clobber (match_scratch:SI 4 "=&r"))
+       (clobber (match_scratch:SI 5 "=&r"))
+       (clobber (match_scratch:SI 6 "=&r"))
+       (clobber (match_scratch:SI 7 "=&r"))
+       (clobber (match_scratch:SI 8 "=&r"))
+       (clobber (reg:SI 130))]
+  ""
+  "heqi	%2,0\\n\\
+	hbrr	3f,1f\\n\\
+	clz	%7,%2\\n\\
+	clz	%4,%1\\n\\
+	il	%5,1\\n\\
+	fsmbi	%0,0\\n\\
+	sf	%7,%4,%7\\n\\
+	ori	%3,%1,0\\n\\
+	shl	%5,%5,%7\\n\\
+	shl	%4,%2,%7\\n\\
+1:	or	%8,%0,%5\\n\\
+	rotqmbii	%5,%5,-1\\n\\
+	clgt	%6,%4,%3\\n\\
+	lnop	\\n\\
+	sf	%7,%4,%3\\n\\
+	rotqmbii	%4,%4,-1\\n\\
+	selb	%0,%8,%0,%6\\n\\
+	lnop	\\n\\
+	selb	%3,%7,%3,%6\\n\\
+3:	brnz	%5,1b\\n\\
+2:"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "80")])
+
+(define_expand "div<mode>3"
+  [(parallel
+    [(set (match_operand:VSF 0 "spu_reg_operand" "")	
+	  (div:VSF (match_operand:VSF 1 "spu_reg_operand" "")
+		   (match_operand:VSF 2 "spu_reg_operand" "")))
+     (clobber (match_scratch:VSF 3 ""))
+     (clobber (match_scratch:VSF 4 ""))
+     (clobber (match_scratch:VSF 5 ""))])]
+  ""
+  "")
+
+(define_insn_and_split "*div<mode>3_fast"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:VSF 3 "=&r"))
+   (clobber (match_scratch:VSF 4 "=&r"))
+   (clobber (scratch:VSF))]
+  "flag_unsafe_math_optimizations"
+  "#"
+  "reload_completed"
+  [(set (match_dup:VSF 0)
+	(div:VSF (match_dup:VSF 1)
+		 (match_dup:VSF 2)))
+   (clobber (match_dup:VSF 3))
+   (clobber (match_dup:VSF 4))
+   (clobber (scratch:VSF))]
+  {
+    emit_insn (gen_frest_<mode>(operands[3], operands[2]));
+    emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
+    emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3]));
+    emit_insn (gen_fnma<mode>4(operands[0], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma<mode>4(operands[0], operands[0], operands[3], operands[4]));
+    DONE;
+  })
+
+(define_insn_and_split "*div<mode>3_adjusted"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(div:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:VSF 3 "=&r"))
+   (clobber (match_scratch:VSF 4 "=&r"))
+   (clobber (match_scratch:VSF 5 "=&r"))]
+  "!flag_unsafe_math_optimizations"
+  "#"
+  "reload_completed"
+  [(set (match_dup:VSF 0)
+	(div:VSF (match_dup:VSF 1)
+		 (match_dup:VSF 2)))
+   (clobber (match_dup:VSF 3))
+   (clobber (match_dup:VSF 4))
+   (clobber (match_dup:VSF 5))]
+  {
+    emit_insn (gen_frest_<mode> (operands[3], operands[2]));
+    emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3]));
+    emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3]));
+    emit_insn (gen_fnma<mode>4 (operands[5], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma<mode>4 (operands[3], operands[5], operands[3], operands[4]));
+
+   /* Due to truncation error, the quotient result may be low by 1 ulp.
+      Conditionally add one if the estimate is too small in magnitude.  */
+
+    emit_move_insn (gen_lowpart (<F2I>mode, operands[4]),
+		    spu_const (<F2I>mode, 0x80000000ULL));
+    emit_move_insn (gen_lowpart (<F2I>mode, operands[5]),
+		    spu_const (<F2I>mode, 0x3f800000ULL));
+    emit_insn (gen_selb (operands[5], operands[5], operands[1], operands[4]));
+
+    emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]),
+			      gen_lowpart (<F2I>mode, operands[3]),
+			      spu_const (<F2I>mode, 1)));
+    emit_insn (gen_fnma<mode>4 (operands[0], operands[2], operands[4], operands[1]));
+    emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5]));
+    emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]),
+			      gen_lowpart (<F2I>mode, operands[0]),
+			      spu_const (<F2I>mode, -1)));
+    emit_insn (gen_selb (operands[0], operands[3], operands[4], operands[0]));
+    DONE;
+  })
+
+
+;; sqrt
+
+(define_insn_and_split "sqrtsf2"
+  [(set (match_operand:SF 0 "spu_reg_operand" "=r")
+	(sqrt:SF (match_operand:SF 1 "spu_reg_operand" "r")))
+   (clobber (match_scratch:SF 2 "=&r"))
+   (clobber (match_scratch:SF 3 "=&r"))
+   (clobber (match_scratch:SF 4 "=&r"))
+   (clobber (match_scratch:SF 5 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SF 0)
+	(sqrt:SF (match_dup:SF 1)))
+   (clobber (match_dup:SF 2))
+   (clobber (match_dup:SF 3))
+   (clobber (match_dup:SF 4))
+   (clobber (match_dup:SF 5))]
+  {
+    emit_move_insn (operands[3],spu_float_const(\"0.5\",SFmode));
+    emit_move_insn (operands[4],spu_float_const(\"1.00000011920928955078125\",SFmode));
+    emit_insn (gen_frsqest_sf(operands[2],operands[1]));
+    emit_insn (gen_fi_sf(operands[2],operands[1],operands[2]));
+    emit_insn (gen_mulsf3(operands[5],operands[2],operands[1]));
+    emit_insn (gen_mulsf3(operands[3],operands[5],operands[3]));
+    emit_insn (gen_fnmasf4(operands[4],operands[2],operands[5],operands[4]));
+    emit_insn (gen_fmasf4(operands[0],operands[4],operands[3],operands[5]));
+    DONE;
+  })
+
+(define_insn "frest_<mode>"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FREST))]
+  ""
+  "frest\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "frsqest_<mode>"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")] UNSPEC_FRSQEST))]
+  ""
+  "frsqest\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+(define_insn "fi_<mode>"
+  [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
+	(unspec:VSF [(match_operand:VSF 1 "spu_reg_operand" "r")
+		    (match_operand:VSF 2 "spu_reg_operand" "r")] UNSPEC_FI))]
+  ""
+  "fi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+
+;; and
+
+(define_insn "and<mode>3"
+  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
+	(and:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
+		 (match_operand:MOV 2 "spu_logical_operand" "r,C")))]
+  ""
+  "@
+  and\t%0,%1,%2
+  and%j2i\t%0,%1,%J2")
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+	(and:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+		(match_operand:DI 2 "spu_logical_operand" "r,c")))]
+  ""
+  "@
+  and\t%0,%1,%2
+  and%k2i\t%0,%1,%K2")
+
+(define_insn "andti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(and:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		(match_operand:TI 2 "spu_logical_operand" "r,Y")))]
+  ""
+  "@
+  and\t%0,%1,%2
+  and%m2i\t%0,%1,%L2")
+
+(define_insn "andc_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(and:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
+	         (match_operand:ALL 1 "spu_reg_operand" "r")))]
+  ""
+  "andc\t%0,%1,%2")
+
+(define_insn "nand_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (and:ALL (match_operand:ALL 2 "spu_reg_operand" "r")
+			  (match_operand:ALL 1 "spu_reg_operand" "r"))))]
+  ""
+  "nand\t%0,%1,%2")
+
+
+;; ior
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r,r")
+	(ior:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r,0")
+		 (match_operand:MOV 2 "spu_ior_operand" "r,C,D")))]
+  ""
+  "@
+  or\t%0,%1,%2
+  or%j2i\t%0,%1,%J2
+  iohl\t%0,%J2")
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r,r")
+	(ior:DI (match_operand:DI 1 "spu_reg_operand" "r,r,0")
+		(match_operand:DI 2 "spu_ior_operand" "r,c,d")))]
+  ""
+  "@
+  or\t%0,%1,%2
+  or%k2i\t%0,%1,%K2
+  iohl\t%0,%K2")
+
+(define_insn "iorti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r,r")
+	(ior:TI (match_operand:TI 1 "spu_reg_operand" "r,r,0")
+		(match_operand:TI 2 "spu_ior_operand" "r,Y,Z")))]
+  ""
+  "@
+  or\t%0,%1,%2
+  or%m2i\t%0,%1,%L2
+  iohl\t%0,%L2")
+
+(define_insn "orc_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(ior:ALL (not:ALL (match_operand:ALL 2 "spu_reg_operand" "r"))
+	         (match_operand:ALL 1 "spu_reg_operand" "r")))]
+  ""
+  "orc\t%0,%1,%2")
+
+(define_insn "nor_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (ior:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
+			  (match_operand:ALL 2 "spu_reg_operand" "r"))))]
+  ""
+  "nor\t%0,%1,%2")
+
+;; xor
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:MOV 0 "spu_reg_operand" "=r,r")
+	(xor:MOV (match_operand:MOV 1 "spu_reg_operand" "r,r")
+		 (match_operand:MOV 2 "spu_logical_operand" "r,B")))]
+  ""
+  "@
+  xor\t%0,%1,%2
+  xor%j2i\t%0,%1,%J2")
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+	(xor:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+		(match_operand:DI 2 "spu_logical_operand" "r,c")))]
+  ""
+  "@
+  xor\t%0,%1,%2
+  xor%k2i\t%0,%1,%K2")
+
+(define_insn "xorti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(xor:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		(match_operand:TI 2 "spu_logical_operand" "r,Y")))]
+  ""
+  "@
+  xor\t%0,%1,%2
+  xor%m2i\t%0,%1,%L2")
+
+(define_insn "eqv_<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (xor:ALL (match_operand:ALL 1 "spu_reg_operand" "r")
+			  (match_operand:ALL 2 "spu_reg_operand" "r"))))]
+  ""
+  "eqv\t%0,%1,%2")
+
+;; one_cmpl
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(not:ALL (match_operand:ALL 1 "spu_reg_operand" "r")))]
+  ""
+  "nor\t%0,%1,%1")
+
+
+;; selb
+
+(define_expand "selb"
+  [(set (match_operand 0 "spu_reg_operand" "")
+  	(unspec [(match_operand 1 "spu_reg_operand" "")
+		 (match_operand 2 "spu_reg_operand" "")
+		 (match_operand 3 "spu_reg_operand" "")] UNSPEC_SELB))]
+  ""
+  {
+    rtx s = gen__selb (operands[0], operands[1], operands[2], operands[3]);
+    PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
+    emit_insn (s);
+    DONE;
+  })
+
+;; This could be defined as a combination of logical operations, but at
+;; one time it caused a crash due to recursive expansion of rtl during CSE.
+(define_insn "_selb"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+  	(unspec [(match_operand 1 "spu_reg_operand" "r")
+		 (match_operand 2 "spu_reg_operand" "r")
+		 (match_operand 3 "spu_reg_operand" "r")] UNSPEC_SELB))]
+  "GET_MODE(operands[0]) == GET_MODE(operands[1]) 
+   && GET_MODE(operands[1]) == GET_MODE(operands[2])"
+  "selb\t%0,%1,%2,%3")
+
+
+;; Misc. byte/bit operations
+;; clz/ctz/ffs/popcount/parity
+;; cntb/sumb
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:VSI 0 "spu_reg_operand" "=r")
+	(clz:VSI (match_operand:VSI 1 "spu_reg_operand" "r")))]
+  ""
+  "clz\t%0,%1")
+
+(define_expand "ctz<mode>2"
+  [(set (match_dup 2)
+	(neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
+   (set (match_dup 3) (and:VSI (match_dup 1)
+			       (match_dup 2)))
+   (set (match_dup 4) (clz:VSI (match_dup 3)))
+   (set (match_operand:VSI 0 "spu_reg_operand" "")
+	(minus:VSI (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = spu_const(<MODE>mode, 31);
+  })
+
+(define_expand "ffs<mode>2"
+  [(set (match_dup 2)
+	(neg:VSI (match_operand:VSI 1 "spu_reg_operand" "")))
+   (set (match_dup 3) (and:VSI (match_dup 1)
+			       (match_dup 2)))
+   (set (match_dup 4) (clz:VSI (match_dup 3)))
+   (set (match_operand:VSI 0 "spu_reg_operand" "")
+	(minus:VSI (match_dup 5) (match_dup 4)))]
+  ""
+  {
+     operands[2] = gen_reg_rtx (<MODE>mode);
+     operands[3] = gen_reg_rtx (<MODE>mode);
+     operands[4] = gen_reg_rtx (<MODE>mode);
+     operands[5] = spu_const(<MODE>mode, 32);
+  })
+
+(define_expand "popcountsi2"
+  [(set (match_dup 2)
+	(unspec:SI [(match_operand:SI 1 "spu_reg_operand" "")]
+		     UNSPEC_CNTB))
+   (set (match_dup 3)
+	(unspec:HI [(match_dup 2)] UNSPEC_SUMB))
+   (set (match_operand:SI 0 "spu_reg_operand" "")
+	(sign_extend:SI (match_dup 3)))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (SImode);
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "spu_reg_operand" "")
+	(parity:SI (match_operand:SI 1 "spu_reg_operand" "")))]
+  ""
+  {
+    operands[2] = gen_reg_rtx (SImode);
+    emit_insn (gen_popcountsi2(operands[2], operands[1]));
+    emit_insn (gen_andsi3(operands[0], operands[2], GEN_INT (1)));
+    DONE;
+  })
+
+(define_insn "cntb_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (unspec:SI [(match_operand:SI 1 "spu_reg_operand" "r")]
+                   UNSPEC_CNTB))]
+  ""
+  "cntb\t%0,%1"
+  [(set_attr "type" "fxb")])
+
+(define_insn "cntb_v16qi"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+        (unspec:V16QI [(match_operand:V16QI 1 "spu_reg_operand" "r")]
+                      UNSPEC_CNTB))]
+  ""
+  "cntb\t%0,%1"
+  [(set_attr "type" "fxb")])
+
+(define_insn "sumb_si"
+  [(set (match_operand:HI 0 "spu_reg_operand" "=r")
+        (unspec:HI [(match_operand:SI 1 "spu_reg_operand" "r")] UNSPEC_SUMB))]
+  ""
+  "sumb\t%0,%1,%1"
+  [(set_attr "type" "fxb")])
+
+
+;; ashl, vashl
+
+(define_insn "<v>ashl<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
+  ""
+  "@
+  shl<bh>\t%0,%1,%2
+  shl<bh>i\t%0,%1,%<umask>2"
+  [(set_attr "type" "fx3")])
+
+(define_insn_and_split "ashldi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+	           (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
+   (clobber (match_scratch:SI 3 "=&r,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:DI 0)
+	(ashift:DI (match_dup:DI 1)
+	           (match_dup:SI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+    rtx op2 = operands[2];
+    rtx op3 = operands[3];
+
+    if (GET_CODE (operands[2]) == REG)
+      {
+	emit_insn (gen_addsi3 (op3, op2, GEN_INT (64)));
+	emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
+	emit_insn (gen_shlqbybi_ti (op0, op0, op3));
+	emit_insn (gen_shlqbi_ti (op0, op0, op3));
+      }
+    else
+      {
+	HOST_WIDE_INT val = INTVAL (operands[2]);
+	emit_insn (gen_rotlti3 (op0, op1, GEN_INT (64)));
+	emit_insn (gen_shlqby_ti (op0, op0, GEN_INT (val / 8 + 8)));
+	if (val % 8)
+	  emit_insn (gen_shlqbi_ti (op0, op0, GEN_INT (val % 8)));
+      }
+    DONE;
+  })
+
+(define_expand "ashlti3"
+  [(parallel [(set (match_operand:TI 0 "spu_reg_operand" "")
+		   (ashift:TI (match_operand:TI 1 "spu_reg_operand" "")
+			      (match_operand:SI 2 "spu_nonmem_operand" "")))
+	      (clobber (match_dup:TI 3))])]
+  ""
+  "if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      emit_insn (gen_ashlti3_imm(operands[0], operands[1], operands[2]));
+      DONE;
+    }
+   operands[3] = gen_reg_rtx (TImode);")
+
+(define_insn_and_split "ashlti3_imm"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (match_operand:SI 2 "immediate_operand" "O,P")))]
+  ""
+  "@
+   shlqbyi\t%0,%1,%h2
+   shlqbii\t%0,%1,%e2"
+  "!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2])"
+  [(set (match_dup:TI 0)
+	(ashift:TI (match_dup:TI 1)
+		   (match_dup:SI 3)))
+   (set (match_dup:TI 0)
+	(ashift:TI (match_dup:TI 0)
+		   (match_dup:SI 4)))]
+  {
+    HOST_WIDE_INT val = INTVAL(operands[2]);
+    operands[3] = GEN_INT (val&7);
+    operands[4] = GEN_INT (val&-8);
+  }
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn_and_split "ashlti3_reg"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r")
+		   (match_operand:SI 2 "spu_reg_operand" "r")))
+   (clobber (match_operand:TI 3 "spu_reg_operand" "=&r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:TI 3)
+	(ashift:TI (match_dup:TI 1)
+		   (and:SI (match_dup:SI 2)
+			   (const_int 7))))
+   (set (match_dup:TI 0)
+	(ashift:TI (match_dup:TI 3)
+		   (and:SI (match_dup:SI 2)
+			   (const_int -8))))]
+  "")
+
+(define_insn "shlqbybi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int -8))))]
+  ""
+  "@
+   shlqbybi\t%0,%1,%2
+   shlqbyi\t%0,%1,%h2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "shlqbi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int 7))))]
+  ""
+  "@
+   shlqbi\t%0,%1,%2
+   shlqbii\t%0,%1,%e2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "shlqby_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashift:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			    (const_int 8))))]
+  ""
+  "@
+   shlqby\t%0,%1,%2
+   shlqbyi\t%0,%1,%f2"
+  [(set_attr "type" "shuf,shuf")])
+
+
+;; lshr, vlshr
+
+(define_insn_and_split "<v>lshr<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
+   (clobber (match_scratch:VHSI 3 "=&r,X"))]
+  ""
+  "@
+   #
+   rot<bh>mi\t%0,%1,-%<umask>2"
+  "reload_completed && GET_CODE (operands[2]) == REG"
+  [(set (match_dup:VHSI 3)
+	(neg:VHSI (match_dup:VHSI 2)))
+   (set (match_dup:VHSI 0)
+	(lshiftrt:VHSI (match_dup:VHSI 1)
+		       (neg:VHSI (match_dup:VHSI 3))))]
+  ""
+  [(set_attr "type" "*,fx3")])
+  
+(define_insn "<v>lshr<mode>3_imm"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
+	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
+		       (match_operand:VHSI 2 "immediate_operand" "W")))]
+  ""
+  "rot<bh>mi\t%0,%1,-%<umask>2"
+  [(set_attr "type" "fx3")])
+
+(define_insn "rotm_<mode>"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
+  ""
+  "@
+   rot<bh>m\t%0,%1,%2
+   rot<bh>mi\t%0,%1,-%<nmask>2"
+  [(set_attr "type" "fx3")])
+ 
+(define_insn_and_split "lshr<mode>3"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r,r")
+		      (match_operand:SI 2 "spu_nonmem_operand" "r,O,P")))]
+  ""
+  "@
+   #
+   rotqmbyi\t%0,%1,-%h2
+   rotqmbii\t%0,%1,-%e2"
+  "REG_P (operands[2]) || (!satisfies_constraint_O (operands[2]) && !satisfies_constraint_P (operands[2]))"
+  [(set (match_dup:DTI 3)
+	(lshiftrt:DTI (match_dup:DTI 1)
+		      (match_dup:SI 4)))
+   (set (match_dup:DTI 0)
+	(lshiftrt:DTI (match_dup:DTI 3)
+		      (match_dup:SI 5)))]
+  {
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT val = INTVAL(operands[2]);
+	operands[4] = GEN_INT (val & 7);
+	operands[5] = GEN_INT (val & -8);
+      }
+    else
+      {
+        rtx t0 = gen_reg_rtx (SImode);
+        rtx t1 = gen_reg_rtx (SImode);
+	emit_insn (gen_subsi3(t0, GEN_INT(0), operands[2]));
+	emit_insn (gen_subsi3(t1, GEN_INT(7), operands[2]));
+        operands[4] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, t0), GEN_INT (7));
+        operands[5] = gen_rtx_AND (SImode, gen_rtx_NEG (SImode, gen_rtx_AND (SImode, t1, GEN_INT (-8))), GEN_INT (-8));
+      }
+  }
+  [(set_attr "type" "*,shuf,shuf")])
+
+(define_expand "shrqbybi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+					      (const_int -8)))
+			      (const_int -8))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (7 - INTVAL (operands[2]));
+    else
+      {
+        rtx t0 = gen_reg_rtx (SImode);
+	emit_insn (gen_subsi3 (t0, GEN_INT (7), operands[2]));
+        operands[2] = t0;
+      }
+  })
+
+(define_insn "rotqmbybi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (neg:SI (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+					      (const_int -8)))
+			      (const_int -8))))]
+  ""
+  "@
+   rotqmbybi\t%0,%1,%2
+   rotqmbyi\t%0,%1,-%H2"
+  [(set_attr "type" "shuf")])
+
+(define_insn_and_split "shrqbi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			      (const_int 7))))
+   (clobber (match_scratch:SI 3 "=&r,X"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:DTI 0)
+	(lshiftrt:DTI (match_dup:DTI 1)
+		      (and:SI (neg:SI (match_dup:SI 3)) (const_int 7))))]
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[3] = GEN_INT (-INTVAL (operands[2]));
+    else
+      emit_insn (gen_subsi3 (operands[3], GEN_INT (0), operands[2]));
+  }
+  [(set_attr "type" "shuf")])
+
+(define_insn "rotqmbi_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (and:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
+			      (const_int 7))))]
+  ""
+  "@
+   rotqmbi\t%0,%1,%2
+   rotqmbii\t%0,%1,-%E2"
+  [(set_attr "type" "shuf")])
+
+(define_expand "shrqby_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
+			       (const_int 8))))]
+  ""
+  {
+    if (GET_CODE (operands[2]) == CONST_INT)
+      operands[2] = GEN_INT (-INTVAL (operands[2]));
+    else
+      {
+        rtx t0 = gen_reg_rtx (SImode);
+	emit_insn (gen_subsi3 (t0, GEN_INT (0), operands[2]));
+        operands[2] = t0;
+      }
+  })
+
+(define_insn "rotqmby_<mode>"
+  [(set (match_operand:DTI 0 "spu_reg_operand" "=r,r")
+	(lshiftrt:DTI (match_operand:DTI 1 "spu_reg_operand" "r,r")
+		      (mult:SI (neg:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I"))
+			       (const_int 8))))]
+  ""
+  "@
+   rotqmby\t%0,%1,%2
+   rotqmbyi\t%0,%1,-%F2"
+  [(set_attr "type" "shuf")])
+
+
+;; ashr, vashr
+
+(define_insn_and_split "<v>ashr<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
+   (clobber (match_scratch:VHSI 3 "=&r,X"))]
+  ""
+  "@
+   #
+   rotma<bh>i\t%0,%1,-%<umask>2"
+  "reload_completed && GET_CODE (operands[2]) == REG"
+  [(set (match_dup:VHSI 3)
+	(neg:VHSI (match_dup:VHSI 2)))
+   (set (match_dup:VHSI 0)
+	(ashiftrt:VHSI (match_dup:VHSI 1)
+		       (neg:VHSI (match_dup:VHSI 3))))]
+  ""
+  [(set_attr "type" "*,fx3")])
+  
+(define_insn "<v>ashr<mode>3_imm"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r")
+	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r")
+		       (match_operand:VHSI 2 "immediate_operand" "W")))]
+  ""
+  "rotma<bh>i\t%0,%1,-%<umask>2"
+  [(set_attr "type" "fx3")])
+  
+
+(define_insn "rotma_<mode>"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		       (neg:VHSI (match_operand:VHSI 2 "spu_nonmem_operand" "r,W"))))]
+  ""
+  "@
+   rotma<bh>\t%0,%1,%2
+   rotma<bh>i\t%0,%1,-%<nmask>2"
+  [(set_attr "type" "fx3")])
+ 
+(define_insn_and_split "ashrdi3"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r,r")
+        (ashiftrt:DI (match_operand:DI 1 "spu_reg_operand" "r,r")
+                     (match_operand:SI 2 "spu_nonmem_operand" "r,I")))
+   (clobber (match_scratch:TI 3 "=&r,&r"))
+   (clobber (match_scratch:TI 4 "=&r,&r"))
+   (clobber (match_scratch:SI 5 "=&r,&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:DI 0)
+        (ashiftrt:DI (match_dup:DI 1)
+                     (match_dup:SI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (TImode, REGNO (operands[0]));
+    rtx op0v = gen_rtx_REG (V4SImode, REGNO (op0));
+    rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+    rtx op1s = gen_rtx_REG (SImode, REGNO (op1));
+    rtx op2 = operands[2];
+    rtx op3 = operands[3];
+    rtx op4 = operands[4];
+    rtx op5 = operands[5];
+
+    if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 63)
+      {
+	rtx op0s = gen_rtx_REG (SImode, REGNO (op0));
+	emit_insn (gen_ashrsi3 (op0s, op1s, GEN_INT (32)));
+	emit_insn (gen_spu_fsm (op0v, op0s));
+      }
+    else if (GET_CODE (op2) == CONST_INT && INTVAL (op2) >= 32)
+      {
+	rtx op0d = gen_rtx_REG (V2DImode, REGNO (op0));
+	HOST_WIDE_INT val = INTVAL (op2);
+	emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
+	emit_insn (gen_spu_xswd (op0d, op0v));
+        if (val > 32)
+	  emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
+      }
+    else
+      {
+	rtx op3v = gen_rtx_REG (V4SImode, REGNO (op3));
+	unsigned char arr[16] = {
+	  0xff, 0xff, 0xff, 0xff,
+	  0xff, 0xff, 0xff, 0xff,
+	  0x00, 0x00, 0x00, 0x00,
+	  0x00, 0x00, 0x00, 0x00
+	};
+
+	emit_insn (gen_ashrsi3 (op5, op1s, GEN_INT (31)));
+	emit_move_insn (op4, array_to_constant (TImode, arr));
+	emit_insn (gen_spu_fsm (op3v, op5));
+
+	if (GET_CODE (operands[2]) == REG)
+	  {
+	    emit_insn (gen_selb (op4, op3, op1, op4));
+	    emit_insn (gen_negsi2 (op5, op2));
+	    emit_insn (gen_rotqbybi_ti (op0, op4, op5));
+	    emit_insn (gen_rotqbi_ti (op0, op0, op5));
+	  }
+	else
+	  {
+	    HOST_WIDE_INT val = -INTVAL (op2);
+	    emit_insn (gen_selb (op0, op3, op1, op4));
+	    if ((val - 7) / 8)
+	      emit_insn (gen_rotqby_ti (op0, op0, GEN_INT ((val - 7) / 8)));
+	    if (val % 8)
+	      emit_insn (gen_rotqbi_ti (op0, op0, GEN_INT (val % 8)));
+	  }
+      }
+    DONE;
+  })
+
+
+(define_insn_and_split "ashrti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(ashiftrt:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		     (match_operand:SI 2 "spu_nonmem_operand" "r,i")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup:TI 0)
+	(ashiftrt:TI (match_dup:TI 1)
+		     (match_dup:SI 2)))]
+  {
+    rtx sign_shift = gen_reg_rtx (SImode);
+    rtx sign_mask = gen_reg_rtx (TImode);
+    rtx sign_mask_v4si = gen_rtx_SUBREG (V4SImode, sign_mask, 0);
+    rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
+    rtx t = gen_reg_rtx (TImode);
+    emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
+    emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
+    emit_insn (gen_fsm_ti (sign_mask, sign_mask));
+    emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
+    emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
+    emit_insn (gen_iorti3 (operands[0], t, sign_mask));
+    DONE;
+  })
+
+;; fsm is used after rotam to replicate the sign across the whole register.
+(define_insn "fsm_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+	(unspec:TI [(match_operand:TI 1 "spu_reg_operand" "r")] UNSPEC_FSM))]
+  ""
+  "fsm\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+
+;; vrotl, rotl
+
+(define_insn "<v>rotl<mode>3"
+  [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
+	(rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
+		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
+  ""
+  "@
+  rot<bh>\t%0,%1,%2
+  rot<bh>i\t%0,%1,%<umask>2"
+  [(set_attr "type" "fx3")])
+
+(define_insn "rotlti3"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=&r,r,r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r,r,r")
+		   (match_operand:SI 2 "spu_nonmem_operand" "r,O,P,I")))]
+  ""
+  "@
+  rotqbybi\t%0,%1,%2\;rotqbi\t%0,%0,%2
+  rotqbyi\t%0,%1,%h2
+  rotqbii\t%0,%1,%e2
+  rotqbyi\t%0,%1,%h2\;rotqbii\t%0,%0,%e2"
+  [(set_attr "length" "8,4,4,8")
+   (set_attr "type" "multi1,shuf,shuf,multi1")])
+
+(define_insn "rotqbybi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int -8))))]
+  ""
+  "@
+  rotqbybi\t%0,%1,%2
+  rotqbyi\t%0,%1,%h2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "rotqby_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (mult:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			    (const_int 8))))]
+  ""
+  "@
+  rotqby\t%0,%1,%2
+  rotqbyi\t%0,%1,%f2"
+  [(set_attr "type" "shuf,shuf")])
+
+(define_insn "rotqbi_ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r,r")
+	(rotate:TI (match_operand:TI 1 "spu_reg_operand" "r,r")
+		   (and:SI (match_operand:SI 2 "spu_nonmem_operand" "r,I")
+			   (const_int 7))))]
+  ""
+  "@
+  rotqbi\t%0,%1,%2
+  rotqbii\t%0,%1,%e2"
+  [(set_attr "type" "shuf,shuf")])
+
+
+;; struct extract/insert
+;; We handle mem's because GCC will generate invalid SUBREG's
+;; and inefficient code.
+
+(define_expand "extv"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(sign_extract:TI (match_operand 1 "nonimmediate_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+  {
+    spu_expand_extv (operands, 0);
+    DONE;
+  })
+
+(define_expand "extzv"
+  [(set (match_operand:TI 0 "register_operand" "")
+	(zero_extract:TI (match_operand 1 "nonimmediate_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+  {
+    spu_expand_extv (operands, 1);
+    DONE;
+  })
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+		      (match_operand:SI 1 "const_int_operand" "")
+		      (match_operand:SI 2 "const_int_operand" ""))
+	(match_operand 3 "nonmemory_operand" ""))]
+  ""
+  { spu_expand_insv(operands); DONE; })
+
+;; Simplify a number of patterns that get generated by extv, extzv,
+;; insv, and loads.
+(define_insn_and_split "trunc_shr_ti<mode>"
+  [(set (match_operand:QHSI 0 "spu_reg_operand" "=r")
+        (truncate:QHSI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
+								(const_int 96)])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn_and_split "trunc_shr_tidi"
+  [(set (match_operand:DI 0 "spu_reg_operand" "=r")
+        (truncate:DI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "0")
+							      (const_int 64)])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn_and_split "shl_ext_<mode>ti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+        (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:QHSI 1 "spu_reg_operand" "0")])
+		   (const_int 96)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn_and_split "shl_ext_diti"
+  [(set (match_operand:TI 0 "spu_reg_operand" "=r")
+        (ashift:TI (match_operator:TI 2 "extend_operator" [(match_operand:DI 1 "spu_reg_operand" "0")])
+		   (const_int 64)))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_insn "sext_trunc_lshr_tiqisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (sign_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 120)]))))]
+  ""
+  "rotmai\t%0,%1,-24"
+  [(set_attr "type" "fx3")])
+
+(define_insn "zext_trunc_lshr_tiqisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (zero_extend:SI (truncate:QI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 120)]))))]
+  ""
+  "rotmi\t%0,%1,-24"
+  [(set_attr "type" "fx3")])
+
+(define_insn "sext_trunc_lshr_tihisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (sign_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 112)]))))]
+  ""
+  "rotmai\t%0,%1,-16"
+  [(set_attr "type" "fx3")])
+
+(define_insn "zext_trunc_lshr_tihisi"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (zero_extend:SI (truncate:HI (match_operator:TI 2 "shiftrt_operator" [(match_operand:TI 1 "spu_reg_operand" "r")
+									      (const_int 112)]))))]
+  ""
+  "rotmi\t%0,%1,-16"
+  [(set_attr "type" "fx3")])
+
+
+;; String/block move insn.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "movstrsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "" ""))
+	      (use (match_operand:SI 3 "" ""))])]
+  ""
+  "
+  {
+    if (spu_expand_block_move (operands))
+      DONE;
+    else
+      FAIL;
+  }")
+
+
+;; jump
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))]
+  ""
+  "bi\t%0"
+  [(set_attr "type" "br")])
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "br\t%0"
+  [(set_attr "type" "br")])
+
+
+;; return
+
+;; This will be used for leaf functions, that don't save any regs and
+;; don't have locals on stack, maybe... that is for functions that
+;; don't change $sp and don't need to save $lr. 
+(define_expand "return"
+    [(return)]
+  "direct_return()"
+  "")
+
+;; used in spu_expand_epilogue to generate return from a function and
+;; explicitly set use of $lr.
+
+(define_insn "_return"
+  [(return)]
+  ""
+  "bi\t$lr"
+  [(set_attr "type" "br")])
+
+
+
+;; ceq
+
+(define_insn "ceq_<mode>"
+  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
+	(eq:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
+	         (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  ceq<bh>\t%0,%1,%2
+  ceq<bh>i\t%0,%1,%2")
+
+(define_insn_and_split "ceq_di"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (eq:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	       (match_operand:DI 2 "spu_reg_operand" "r")))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SI 0)
+        (eq:SI (match_dup:DI 1)
+	       (match_dup:DI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+    emit_insn (gen_ceq_v4si (op0, op1, op2));
+    emit_insn (gen_spu_gb (op0, op0));
+    emit_insn (gen_cgt_si (operands[0], operands[0], GEN_INT (11)));
+    DONE;
+  })
+
+
+;; We provide the TI compares for completeness and because some parts of
+;; gcc/libgcc use them, even though user code might never see it.
+(define_insn "ceq_ti"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(eq:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (match_operand:TI 2 "spu_reg_operand" "r")))]
+  ""
+  "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "12")])
+
+(define_insn "ceq_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
+		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fceq\t%0,%1,%2")
+
+(define_insn "cmeq_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+	          (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
+  ""
+  "fcmeq\t%0,%1,%2")
+
+;; These implementations will ignore checking of NaN or INF if
+;; compiled with option -ffinite-math-only.
+(define_expand "ceq_df"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
+               (match_operand:DF 2 "const_zero_operand" "i")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+      {
+        rtx ra = gen_reg_rtx (V4SImode);
+        rtx rb = gen_reg_rtx (V4SImode);
+        rtx temp = gen_reg_rtx (TImode);
+        rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+        rtx temp2 = gen_reg_rtx (V4SImode);
+        rtx biteq = gen_reg_rtx (V4SImode);
+        rtx ahi_inf = gen_reg_rtx (V4SImode);
+        rtx a_nan = gen_reg_rtx (V4SImode);
+        rtx a_abs = gen_reg_rtx (V4SImode);
+        rtx b_abs = gen_reg_rtx (V4SImode);
+        rtx iszero = gen_reg_rtx (V4SImode);
+        rtx sign_mask = gen_reg_rtx (V4SImode);
+        rtx nan_mask = gen_reg_rtx (V4SImode);
+        rtx hihi_promote = gen_reg_rtx (TImode);
+        rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+                                                 0x7FFFFFFF, 0xFFFFFFFF);
+
+        emit_move_insn (sign_mask, pat);
+        pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+                                             0x7FF00000, 0x0);
+        emit_move_insn (nan_mask, pat);
+        pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+                                           0x08090A0B, 0x18191A1B);
+        emit_move_insn (hihi_promote, pat);
+
+        emit_insn (gen_spu_convert (ra, operands[1]));
+        emit_insn (gen_spu_convert (rb, operands[2]));
+        emit_insn (gen_ceq_v4si (biteq, ra, rb));
+        emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+				GEN_INT (4 * 8)));
+        emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+
+        emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+        emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+	if (!flag_finite_math_only)
+          {
+            emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+            emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+            emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+                                   GEN_INT (4 * 8)));
+            emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+            emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+	  }
+        emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+        emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+        emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+				GEN_INT (4 * 8)));
+        emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+        emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+	if (!flag_finite_math_only)
+          {
+            emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+	  }
+        emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+        DONE;
+      }
+})
+
+(define_insn "ceq_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+                   (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfceq\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "cmeq_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+                   (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfcmeq\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_expand "ceq_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+                 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx biteq = gen_reg_rtx (V4SImode);
+      rtx ahi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx iszero = gen_reg_rtx (V4SImode);
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hihi_promote = gen_reg_rtx (TImode);
+
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+					     0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, 
+					   0x08090A0B, 0x18191A1B);
+      emit_move_insn (hihi_promote, pat);
+
+      emit_insn (gen_ceq_v4si (biteq, ra, rb));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), 
+                              GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                              GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+      emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), 
+                              GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+      emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+      DONE;
+  }
+})
+
+(define_expand "cmeq_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+                 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx biteq = gen_reg_rtx (V4SImode);
+      rtx ahi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hihi_promote = gen_reg_rtx (TImode);
+
+      emit_move_insn (sign_mask, pat);
+
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+                                           0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213, 
+                                         0x08090A0B, 0x18191A1B);
+      emit_move_insn (hihi_promote, pat);
+
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
+      emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+      DONE;
+  }
+})
+
+
+;; cgt
+
+(define_insn "cgt_<mode>"
+  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
+	(gt:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
+	          (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  cgt<bh>\t%0,%1,%2
+  cgt<bh>i\t%0,%1,%2")
+
+(define_insn "cgt_di_m1" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	       (const_int -1)))]
+  ""
+  "cgti\t%0,%1,-1")
+
+(define_insn_and_split "cgt_di" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	       (match_operand:DI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SI 0)
+        (gt:SI (match_dup:DI 1)
+	       (match_dup:DI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+    rtx op3 = operands[3];
+    rtx op4 = operands[4];
+    rtx op5 = operands[5];
+    rtx op3d = gen_rtx_REG (V2DImode, REGNO (operands[3]));
+    emit_insn (gen_clgt_v4si (op3, op1, op2));
+    emit_insn (gen_ceq_v4si (op4, op1, op2));
+    emit_insn (gen_cgt_v4si (op5, op1, op2));
+    emit_insn (gen_spu_xswd (op3d, op3));
+    emit_insn (gen_selb (op0, op5, op3, op4));
+    DONE;
+  })
+
+(define_insn "cgt_ti_m1" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (const_int -1)))]
+  ""
+  "cgti\t%0,%1,-1")
+
+(define_insn "cgt_ti"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gt:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (match_operand:TI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))]
+  ""
+  "clgt\t%4,%1,%2\;\
+ceq\t%3,%1,%2\;\
+cgt\t%5,%1,%2\;\
+shlqbyi\t%0,%4,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%5,%0,%3"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "36")])
+
+(define_insn "cgt_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
+		  (match_operand:VSF 2 "spu_reg_operand" "r")))]
+  ""
+  "fcgt\t%0,%1,%2")
+
+(define_insn "cmgt_<mode>"
+  [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+	(gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+		  (abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
+  ""
+  "fcmgt\t%0,%1,%2")
+
+(define_expand "cgt_df"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+        (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
+               (match_operand:DF 2 "const_zero_operand" "i")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = gen_reg_rtx (V4SImode);
+      rtx rb = gen_reg_rtx (V4SImode);
+      rtx zero = gen_reg_rtx (V4SImode);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx hi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx b_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx asel = gen_reg_rtx (V4SImode);
+      rtx bsel = gen_reg_rtx (V4SImode);
+      rtx abor = gen_reg_rtx (V4SImode);
+      rtx bbor = gen_reg_rtx (V4SImode);
+      rtx gt_hi = gen_reg_rtx (V4SImode);
+      rtx gt_lo = gen_reg_rtx (V4SImode);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hi_promote = gen_reg_rtx (TImode);
+      rtx borrow_shuffle = gen_reg_rtx (TImode);
+
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+                                             0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+                                         0x08090A0B, 0x08090A0B);
+      emit_move_insn (hi_promote, pat);
+      pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
+                                         0x0C0D0E0F, 0xC0C0C0C0);
+      emit_move_insn (borrow_shuffle, pat);
+
+      emit_insn (gen_spu_convert (ra, operands[1]));
+      emit_insn (gen_spu_convert (rb, operands[2]));
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+
+      if (!flag_finite_math_only)
+	{
+	  /* check if ra is NaN  */
+          emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+          emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+          emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+                                  GEN_INT (4 * 8)));
+          emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+          emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+          emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+
+	  /* check if rb is NaN  */
+          emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+          emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+          emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+                                  GEN_INT (4 * 8)));
+          emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+          emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+          emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+
+	  /* check if ra or rb is NaN  */
+          emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+	}
+      emit_move_insn (zero, CONST0_RTX (V4SImode));
+      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+      emit_insn (gen_bg_v4si (abor, zero, a_abs));
+      emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+      emit_insn (gen_selb (abor, a_abs, abor, asel));
+
+      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+      emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+      emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+      emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+
+      emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+      emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+      emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+                                GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+      if (!flag_finite_math_only)
+        {
+	  /* correct for NaNs  */
+          emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+	}
+      emit_insn (gen_spu_convert (operands[0], temp2));
+      DONE;
+    }
+})
+
+(define_insn "cgt_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+                   (match_operand:VDF 2 "spu_reg_operand" "r")))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfcgt\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "cmgt_<mode>_celledp"
+  [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+        (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+                   (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dfcmgt\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_expand "cgt_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+                 (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx zero = gen_reg_rtx (V4SImode);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx hi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx b_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx asel = gen_reg_rtx (V4SImode);
+      rtx bsel = gen_reg_rtx (V4SImode);
+      rtx abor = gen_reg_rtx (V4SImode);
+      rtx bbor = gen_reg_rtx (V4SImode);
+      rtx gt_hi = gen_reg_rtx (V4SImode);
+      rtx gt_lo = gen_reg_rtx (V4SImode);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hi_promote = gen_reg_rtx (TImode);
+      rtx borrow_shuffle = gen_reg_rtx (TImode);
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+                                           0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, 
+                                         0x08090A0B, 0x08090A0B);
+      emit_move_insn (hi_promote, pat);
+      pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0, 
+                                         0x0C0D0E0F, 0xC0C0C0C0);
+      emit_move_insn (borrow_shuffle, pat);
+
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+      emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+      emit_move_insn (zero, CONST0_RTX (V4SImode));
+      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+      emit_insn (gen_bg_v4si (abor, zero, a_abs));
+      emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+      emit_insn (gen_selb (abor, a_abs, abor, asel));
+      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+      emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+      emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+      emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+      emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+      emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+      emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+      emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+      emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+
+      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+      DONE;
+    } 
+})
+
+(define_expand "cmgt_v2df"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+                 (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+      rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+      rtx temp = gen_reg_rtx (TImode);
+      rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+      rtx temp2 = gen_reg_rtx (V4SImode);
+      rtx hi_inf = gen_reg_rtx (V4SImode);
+      rtx a_nan = gen_reg_rtx (V4SImode);
+      rtx b_nan = gen_reg_rtx (V4SImode);
+      rtx a_abs = gen_reg_rtx (V4SImode);
+      rtx b_abs = gen_reg_rtx (V4SImode);
+      rtx gt_hi = gen_reg_rtx (V4SImode);
+      rtx gt_lo = gen_reg_rtx (V4SImode);
+      rtx sign_mask = gen_reg_rtx (V4SImode);
+      rtx nan_mask = gen_reg_rtx (V4SImode);
+      rtx hi_promote = gen_reg_rtx (TImode);
+      rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                               0x7FFFFFFF, 0xFFFFFFFF);
+      emit_move_insn (sign_mask, pat);
+      pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+                                           0x7FF00000, 0x0);
+      emit_move_insn (nan_mask, pat);
+      pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, 
+                                         0x08090A0B, 0x08090A0B);
+      emit_move_insn (hi_promote, pat);
+
+      emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+      emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+      emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+      emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+      emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+      emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+      emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+      emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+
+      emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
+      emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
+      emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
+      emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo), 
+                                                    GEN_INT (4 * 8)));
+      emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+      emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+      emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+      emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+      DONE;
+    }
+})
+
+
+;; clgt
+
+(define_insn "clgt_<mode>"
+  [(set (match_operand:VQHSI 0 "spu_reg_operand" "=r,r")
+	(gtu:VQHSI (match_operand:VQHSI 1 "spu_reg_operand" "r,r")
+		   (match_operand:VQHSI 2 "spu_arith_operand" "r,B")))]
+  ""
+  "@
+  clgt<bh>\t%0,%1,%2
+  clgt<bh>i\t%0,%1,%2")
+
+(define_insn_and_split "clgt_di" 
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gtu:SI (match_operand:DI 1 "spu_reg_operand" "r")
+	        (match_operand:DI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))
+   (clobber (match_scratch:V4SI 5 "=&r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup:SI 0)
+        (gtu:SI (match_dup:DI 1)
+	        (match_dup:DI 2)))]
+  {
+    rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+    rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+    rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+    rtx op3 = operands[3];
+    rtx op4 = operands[4];
+    rtx op5 = operands[5];
+    rtx op5d = gen_rtx_REG (V2DImode, REGNO (operands[5]));
+    emit_insn (gen_clgt_v4si (op3, op1, op2));
+    emit_insn (gen_ceq_v4si (op4, op1, op2));
+    emit_insn (gen_spu_xswd (op5d, op3));
+    emit_insn (gen_selb (op0, op3, op5, op4));
+    DONE;
+  })
+
+(define_insn "clgt_ti"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(gtu:SI (match_operand:TI 1 "spu_reg_operand" "r")
+	       (match_operand:TI 2 "spu_reg_operand" "r")))
+   (clobber (match_scratch:V4SI 3 "=&r"))
+   (clobber (match_scratch:V4SI 4 "=&r"))]
+  ""
+  "ceq\t%3,%1,%2\;\
+clgt\t%4,%1,%2\;\
+shlqbyi\t%0,%4,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%4,%0,%3\;\
+shlqbyi\t%0,%0,4\;\
+selb\t%0,%4,%0,%3"
+  [(set_attr "type" "multi0")
+   (set_attr "length" "32")])
+
+
+;; dftsv
+(define_insn "dftsv_celledp"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand"  "r")
+		      (match_operand:SI   2 "const_int_operand" "i")]
+		      UNSPEC_DFTSV))]
+  "spu_arch == PROCESSOR_CELLEDP"
+  "dftsv\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_expand "dftsv"
+  [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+        (unspec:V2DI [(match_operand:V2DF 1 "spu_reg_operand" "r")
+		      (match_operand:SI   2 "const_int_operand" "i")]
+		      UNSPEC_DFTSV))]
+  ""
+{
+  if (spu_arch == PROCESSOR_CELL)
+    {
+      rtx result = gen_reg_rtx (V4SImode);
+      emit_move_insn (result, CONST0_RTX (V4SImode));
+
+      if (INTVAL (operands[2]))
+        {
+          rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+          rtx abs = gen_reg_rtx (V4SImode);
+          rtx sign = gen_reg_rtx (V4SImode);
+          rtx temp = gen_reg_rtx (TImode);
+          rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+          rtx temp2 = gen_reg_rtx (V4SImode);
+          rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF, 
+                                                   0x7FFFFFFF, 0xFFFFFFFF);
+          rtx sign_mask = gen_reg_rtx (V4SImode);
+          rtx hi_promote = gen_reg_rtx (TImode);
+          emit_move_insn (sign_mask, pat);
+          pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203, 
+                                             0x08090A0B, 0x08090A0B);
+          emit_move_insn (hi_promote, pat);
+
+          emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
+          emit_insn (gen_shufb (sign, sign, sign, hi_promote));
+          emit_insn (gen_andv4si3 (abs, ra, sign_mask));
+
+          /* NaN  or +inf or -inf */
+          if (INTVAL (operands[2]) & 0x70)
+            {
+              rtx nan_mask = gen_reg_rtx (V4SImode);
+              rtx isinf = gen_reg_rtx (V4SImode);
+              pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0, 
+		   			           0x7FF00000, 0x0);
+              emit_move_insn (nan_mask, pat);
+              emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
+
+              /* NaN  */
+              if (INTVAL (operands[2]) & 0x40)
+                {
+                  rtx isnan = gen_reg_rtx (V4SImode);
+                  emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
+                  emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan), 
+                                                             GEN_INT (4 * 8)));
+                  emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
+                  emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
+                  emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
+                  emit_insn (gen_iorv4si3 (result, result, isnan));
+                }
+              /* +inf or -inf  */
+              if (INTVAL (operands[2]) & 0x30)
+                {
+                  emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf), 
+                                                             GEN_INT (4 * 8)));
+                  emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
+                  emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
+
+                  /* +inf  */
+                  if (INTVAL (operands[2]) & 0x20)
+                    {
+                      emit_insn (gen_andc_v4si (temp2, isinf, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                  /* -inf  */
+                  if (INTVAL (operands[2]) & 0x10)
+                    {
+                      emit_insn (gen_andv4si3 (temp2, isinf, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                }
+            }
+
+          /* 0 or denorm  */
+          if (INTVAL (operands[2]) & 0xF)
+            {
+              rtx iszero = gen_reg_rtx (V4SImode);
+              emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
+              emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero), 
+                                                          GEN_INT (4 * 8)));
+              emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+
+              /* denorm  */
+              if (INTVAL (operands[2]) & 0x3)
+                {
+                  rtx isdenorm = gen_reg_rtx (V4SImode);
+                  rtx denorm_mask = gen_reg_rtx (V4SImode);
+                  emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
+                  emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
+                  emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
+                  emit_insn (gen_shufb (isdenorm, isdenorm, 
+                                        isdenorm, hi_promote));
+                  /* +denorm  */
+                  if (INTVAL (operands[2]) & 0x2)
+                    {
+                      emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                  /* -denorm  */
+                  if (INTVAL (operands[2]) & 0x1)
+                    {
+                      emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                }
+
+              /* 0  */
+              if (INTVAL (operands[2]) & 0xC)
+                {
+                  emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
+                  /* +0  */
+                  if (INTVAL (operands[2]) & 0x8)
+                    {
+                      emit_insn (gen_andc_v4si (temp2, iszero, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                  /* -0  */
+                  if (INTVAL (operands[2]) & 0x4)
+                    {
+                      emit_insn (gen_andv4si3 (temp2, iszero, sign));
+                      emit_insn (gen_iorv4si3 (result, result, temp2));
+                    }
+                }
+             }
+          }
+      emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
+      DONE;
+    }
+})
+
+
+;; branches
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "br%b2%b1z\t%2,%0"
+  [(set_attr "type" "br")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (return)
+		      (pc)))]
+  "direct_return ()"
+  "bi%b1%b0z\t%1,$lr"
+  [(set_attr "type" "br")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 1 "branch_comparison_operator"
+				      [(match_operand 2
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "br%b2%b1z\t%2,%0"
+  [(set_attr "type" "br")])
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else (match_operator 0 "branch_comparison_operator"
+				      [(match_operand 1
+						      "spu_reg_operand" "r")
+				       (const_int 0)])
+		      (pc)
+		      (return)))]
+  "direct_return ()"
+  "bi%b1%b0z\t%1,$lr"
+  [(set_attr "type" "br")])
+
+
+;; vector conditional compare patterns
+(define_expand "vcond<mode>"
+  [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
+        (if_then_else:VCMP
+          (match_operator 3 "comparison_operator"
+            [(match_operand:VCMP 4 "spu_reg_operand" "r")
+             (match_operand:VCMP 5 "spu_reg_operand" "r")])
+          (match_operand:VCMP 1 "spu_reg_operand" "r")
+          (match_operand:VCMP 2 "spu_reg_operand" "r")))]
+  ""
+  {
+    if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+                                   operands[3], operands[4], operands[5]))
+    DONE;
+    else
+    FAIL;
+  })
+
+(define_expand "vcondu<mode>"
+  [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
+        (if_then_else:VCMPU
+          (match_operator 3 "comparison_operator"
+            [(match_operand:VCMPU 4 "spu_reg_operand" "r")
+             (match_operand:VCMPU 5 "spu_reg_operand" "r")])
+          (match_operand:VCMPU 1 "spu_reg_operand" "r")
+          (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
+  ""
+  {
+    if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+                                   operands[3], operands[4], operands[5]))
+    DONE;
+    else
+    FAIL;
+  })
+	
+
+;; branch on condition
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:VQHSI 1 "spu_reg_operand" "")
+	  (match_operand:VQHSI 2 "spu_nonmem_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:DTI 1 "spu_reg_operand" "")
+	  (match_operand:DTI 2 "spu_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+(define_expand "cbranch<mode>4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:VSF 1 "spu_reg_operand" "")
+	  (match_operand:VSF 2 "spu_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "ordered_comparison_operator"
+	 [(match_operand:DF 1 "spu_reg_operand" "")
+	  (match_operand:DF 2 "spu_reg_operand" "")]))
+   (use (match_operand 3 ""))]
+  ""
+  { spu_emit_branch_or_set (0, operands[0], operands); DONE; })
+
+
+;; set on condition
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:VQHSI 2 "spu_reg_operand" "")
+	  (match_operand:VQHSI 3 "spu_nonmem_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:DTI 2 "spu_reg_operand" "")
+	  (match_operand:DTI 3 "spu_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:VSF 2 "spu_reg_operand" "")
+	  (match_operand:VSF 3 "spu_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+(define_expand "cstoredf4"
+  [(use (match_operator 1 "ordered_comparison_operator"
+	 [(match_operand:DF 2 "spu_reg_operand" "")
+	  (match_operand:DF 3 "spu_reg_operand" "")]))
+   (clobber (match_operand:SI 0 "spu_reg_operand"))]
+  ""
+  { spu_emit_branch_or_set (1, operands[1], operands); DONE; })
+
+
+;; conditional move
+
+;; Define this first one so HAVE_conditional_move is defined.
+(define_insn "movcc_dummy"
+  [(set (match_operand 0 "" "")
+       (if_then_else (match_operand 1 "" "")
+		     (match_operand 2 "" "")
+		     (match_operand 3 "" "")))]
+  "!operands[0]"
+  "")
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "")
+	(if_then_else:ALL (match_operand 1 "ordered_comparison_operator" "")
+		      (match_operand:ALL 2 "spu_reg_operand" "")
+		      (match_operand:ALL 3 "spu_reg_operand" "")))]
+  ""
+  {
+    spu_emit_branch_or_set(2, operands[1], operands);
+    DONE;
+  })
+
+;; This pattern is used when the result of a compare is not large
+;; enough to use in a selb when expanding conditional moves.
+(define_expand "extend_compare"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+	(unspec [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
+  ""
+  {
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+			    gen_rtx_UNSPEC (GET_MODE (operands[0]),
+			                    gen_rtvec (1, operands[1]),
+					    UNSPEC_EXTEND_CMP)));
+    DONE;
+  })
+
+(define_insn "extend_compare<mode>"
+  [(set (match_operand:ALL 0 "spu_reg_operand" "=r")
+	(unspec:ALL [(match_operand 1 "spu_reg_operand" "r")] UNSPEC_EXTEND_CMP))]
+  "operands"
+  "fsm\t%0,%1"
+  [(set_attr "type" "shuf")])
+
+
+;; case
+
+;; operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+(define_expand "casesi"
+  [(match_operand:SI 0 "spu_reg_operand" "")
+   (match_operand:SI 1 "immediate_operand" "")
+   (match_operand:SI 2 "immediate_operand" "")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+  {
+    rtx table = gen_reg_rtx (SImode);
+    rtx index = gen_reg_rtx (SImode);
+    rtx sindex = gen_reg_rtx (SImode);
+    rtx addr = gen_reg_rtx (Pmode);
+
+    emit_move_insn (table, gen_rtx_LABEL_REF (SImode, operands[3]));
+
+    emit_insn (gen_subsi3(index, operands[0], force_reg(SImode, operands[1])));
+    emit_insn (gen_ashlsi3(sindex, index, GEN_INT (2)));
+    emit_move_insn (addr, gen_rtx_MEM (SImode,
+				       gen_rtx_PLUS (SImode, table, sindex)));
+    if (flag_pic)
+      emit_insn (gen_addsi3 (addr, addr, table));
+
+    emit_cmp_and_jump_insns (index, operands[2], GTU, NULL_RTX, SImode, 1, operands[4]);
+    emit_jump_insn (gen_tablejump (addr, operands[3]));
+    DONE;
+  })
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "spu_reg_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "bi\t%0"
+  [(set_attr "type" "br")])
+
+
+;; call
+
+;; Note that operand 1 is total size of args, in bytes,
+;; and what the call insn wants is the number of words.
+(define_expand "sibcall"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "")
+	   (match_operand:QI 1 "" ""))
+     (use (reg:SI 0))])]
+  ""
+  {
+    if (! call_operand (operands[0], QImode))
+      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  })
+
+(define_insn "_sibcall"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "R,S")
+	   (match_operand:QI 1 "" "i,i"))
+     (use (reg:SI 0))])]
+  "SIBLING_CALL_P(insn)"
+  "@
+   bi\t%i0
+   br\t%0"
+   [(set_attr "type" "br,br")])
+
+(define_expand "sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "")
+		(match_operand:QI 2 "" "")))
+     (use (reg:SI 0))])]
+  ""
+  {
+    if (! call_operand (operands[1], QImode))
+      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  })
+
+(define_insn "_sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "R,S")
+		(match_operand:QI 2 "" "i,i")))
+     (use (reg:SI 0))])]
+  "SIBLING_CALL_P(insn)"
+  "@
+   bi\t%i1
+   br\t%1"
+   [(set_attr "type" "br,br")])
+
+;; Note that operand 1 is total size of args, in bytes,
+;; and what the call insn wants is the number of words.
+(define_expand "call"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "")
+	   (match_operand:QI 1 "" ""))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  {
+    if (! call_operand (operands[0], QImode))
+      XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+  })
+
+(define_insn "_call"
+  [(parallel
+    [(call (match_operand:QI 0 "call_operand" "R,S,T")
+	   (match_operand:QI 1 "" "i,i,i"))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  "@
+   bisl\t$lr,%i0
+   brsl\t$lr,%0
+   brasl\t$lr,%0"
+   [(set_attr "type" "br")])
+
+(define_expand "call_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "")
+		(match_operand:QI 2 "" "")))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  {
+    if (! call_operand (operands[1], QImode))
+      XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+  })
+
+(define_insn "_call_value"
+  [(parallel
+    [(set (match_operand 0 "" "")
+	  (call (match_operand:QI 1 "call_operand" "R,S,T")
+		(match_operand:QI 2 "" "i,i,i")))
+     (clobber (reg:SI 0))
+     (clobber (reg:SI 130))])]
+  ""
+  "@
+   bisl\t$lr,%i1
+   brsl\t$lr,%1
+   brasl\t$lr,%1"
+   [(set_attr "type" "br")])
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  {
+    int i;
+    rtx reg = gen_rtx_REG (TImode, 3);
+
+    /* We need to use call_value so the return value registers don't get
+     * clobbered. */
+    emit_call_insn (gen_call_value (reg, operands[0], const0_rtx));
+
+    for (i = 0; i < XVECLEN (operands[2], 0); i++)
+      {
+	rtx set = XVECEXP (operands[2], 0, i);
+	emit_move_insn (SET_DEST (set), SET_SRC (set));
+      }
+
+    /* The optimizer does not know that the call sets the function value
+       registers we stored in the result block.  We avoid problems by
+       claiming that all hard registers are used and clobbered at this
+       point.  */
+    emit_insn (gen_blockage ());
+
+    DONE;
+  })
+
+
+;; Patterns used for splitting and combining.
+
+
+;; Function prologue and epilogue.
+
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  { spu_expand_prologue (); DONE; })
+
+;; "blockage" is only emited in epilogue.  This is what it took to
+;; make "basic block reordering" work with the insns sequence
+;; generated by the spu_expand_epilogue (taken from mips.md)
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPEC_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  { spu_expand_epilogue (false); DONE; })
+
+(define_expand "sibcall_epilogue"
+  [(const_int 2)]
+  ""
+  { spu_expand_epilogue (true); DONE; })
+
+
+;; stack manipulations
+
+;; An insn to allocate new stack space for dynamic use (e.g., alloca).
+;; We move the back-chain and decrement the stack pointer.
+(define_expand "allocate_stack"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(minus (reg 1) (match_operand 1 "spu_nonmem_operand" "")))
+   (set (reg 1)
+	(minus (reg 1) (match_dup 1)))]
+  ""
+  "spu_allocate_stack (operands[0], operands[1]); DONE;")
+
+;; These patterns say how to save and restore the stack pointer.  We need not
+;; save the stack pointer at function level since we are careful to preserve 
+;; the backchain.  
+;; 
+
+;; At block level the stack pointer is saved and restored, so that the
+;; stack space allocated within a block is deallocated when leaving
+;; block scope.  By default, according to the SPU ABI, the stack
+;; pointer and available stack size are saved in a register. Upon
+;; restoration, the stack pointer is simply copied back, and the
+;; current available stack size is calculated against the restored
+;; stack pointer.
+;;
+;; For nonlocal gotos, we must save the stack pointer and its
+;; backchain and restore both.  Note that in the nonlocal case, the
+;; save area is a memory location.
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_function"
+  [(match_operand 0 "general_operand" "")
+   (match_operand 1 "general_operand" "")]
+  ""
+  "DONE;")
+
+(define_expand "restore_stack_block"
+  [(match_operand 0 "spu_reg_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+  "
+  {
+    spu_restore_stack_block (operands[0], operands[1]);
+    DONE;
+  }")
+
+(define_expand "save_stack_nonlocal"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "spu_reg_operand" "")]
+  ""
+  "
+  {
+    rtx temp = gen_reg_rtx (Pmode);
+
+    /* Copy the backchain to the first word, sp to the second.  We need to
+       save the back chain because __builtin_apply appears to clobber it. */
+    emit_move_insn (temp, gen_rtx_MEM (Pmode, operands[1]));
+    emit_move_insn (adjust_address_nv (operands[0], SImode, 0), temp);
+    emit_move_insn (adjust_address_nv (operands[0], SImode, 4), operands[1]);
+    DONE;
+  }")
+
+(define_expand "restore_stack_nonlocal"
+  [(match_operand 0 "spu_reg_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
+  "
+  {
+    spu_restore_stack_nonlocal(operands[0], operands[1]);
+    DONE;
+  }")
+
+
+;; vector patterns
+
+;; Vector initialization
+(define_expand "vec_init<mode>"
+  [(match_operand:V 0 "register_operand" "")
+   (match_operand 1 "" "")]
+  ""
+  {
+    spu_expand_vector_init (operands[0], operands[1]);
+    DONE;
+  })
+
+(define_expand "vec_set<mode>"
+  [(use (match_operand:SI 2 "spu_nonmem_operand" ""))
+   (set (match_dup:TI 3)
+        (unspec:TI [(match_dup:SI 4)
+		    (match_dup:SI 5)
+		    (match_dup:SI 6)] UNSPEC_CPAT))
+   (set (match_operand:V 0 "spu_reg_operand" "")
+	(unspec:V [(match_operand:<inner> 1 "spu_reg_operand" "")
+		   (match_dup:V 0)
+		   (match_dup:TI 3)] UNSPEC_SHUFB))]
+  ""
+  {
+    HOST_WIDE_INT size = GET_MODE_SIZE (<inner>mode);
+    rtx offset = GEN_INT (INTVAL (operands[2]) * size);
+    operands[3] = gen_reg_rtx (TImode);
+    operands[4] = stack_pointer_rtx;
+    operands[5] = offset;
+    operands[6] = GEN_INT (size);
+  })
+
+(define_expand "vec_extract<mode>"
+  [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
+	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
+			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
+  ""
+  {
+    if ((INTVAL (operands[2]) * <vmult> + <voff>) % 16 == 0)
+      {
+	emit_insn (gen_spu_convert (operands[0], operands[1]));
+	DONE;
+      }
+  })
+
+(define_insn "_vec_extract<mode>"
+  [(set (match_operand:<inner> 0 "spu_reg_operand" "=r")
+	(vec_select:<inner> (match_operand:V 1 "spu_reg_operand" "r")
+			    (parallel [(match_operand 2 "const_int_operand" "i")])))]
+  ""
+  "rotqbyi\t%0,%1,(%2*<vmult>+<voff>)%%16"
+  [(set_attr "type" "shuf")])
+
+(define_insn "_vec_extractv8hi_ze"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+	(zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "spu_reg_operand" "r")
+				       (parallel [(const_int 0)]))))]
+  ""
+  "rotqmbyi\t%0,%1,-2"
+  [(set_attr "type" "shuf")])
+
+
+;; misc
+
+(define_expand "shufb"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(unspec [(match_operand 1 "spu_reg_operand" "")
+		 (match_operand 2 "spu_reg_operand" "")
+		 (match_operand:TI 3 "spu_reg_operand" "")] UNSPEC_SHUFB))]
+  ""
+  {
+    rtx s = gen__shufb (operands[0], operands[1], operands[2], operands[3]);
+    PUT_MODE (SET_SRC (s), GET_MODE (operands[0]));
+    emit_insn (s);
+    DONE;
+  })
+
+(define_insn "_shufb"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+	(unspec [(match_operand 1 "spu_reg_operand" "r")
+		 (match_operand 2 "spu_reg_operand" "r")
+		 (match_operand:TI 3 "spu_reg_operand" "r")] UNSPEC_SHUFB))]
+  "operands"
+  "shufb\t%0,%1,%2,%3"
+  [(set_attr "type" "shuf")])
+
+(define_insn "nop"
+  [(unspec_volatile [(const_int 0)] UNSPEC_NOP)]
+  ""
+  "nop"
+  [(set_attr "type" "nop")])
+
+(define_insn "nopn"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "K")] UNSPEC_NOP)]
+  ""
+  "nop\t%0"
+  [(set_attr "type" "nop")])
+
+(define_insn "lnop"
+  [(unspec_volatile [(const_int 0)] UNSPEC_LNOP)]
+  ""
+  "lnop"
+  [(set_attr "type" "lnop")])
+
+;; The operand is so we know why we generated this hbrp.
+;; We clobber mem to make sure it isn't moved over any
+;; loads, stores or calls while scheduling.
+(define_insn "iprefetch"
+  [(unspec [(match_operand:SI 0 "const_int_operand" "n")] UNSPEC_IPREFETCH)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "hbrp\t# %0"
+  [(set_attr "type" "iprefetch")])
+
+;; A non-volatile version so it gets scheduled
+(define_insn "nopn_nv"
+  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_NOP)]
+  ""
+  "nop\t%0"
+  [(set_attr "type" "nop")])
+
+(define_insn "hbr"
+  [(set (reg:SI 130)
+	(unspec:SI [(match_operand:SI 0 "immediate_operand" "i,i,i")
+		    (match_operand:SI 1 "nonmemory_operand" "r,s,i")] UNSPEC_HBR))
+   (unspec [(const_int 0)] UNSPEC_HBR)]
+  ""
+  "@
+   hbr\t%0,%1
+   hbrr\t%0,%1
+   hbra\t%0,%1"
+  [(set_attr "type" "hbr")])
+
+(define_insn "sync"
+  [(unspec_volatile [(const_int 0)] UNSPEC_SYNC)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "sync"
+  [(set_attr "type" "br")])
+
+(define_insn "syncc"
+  [(unspec_volatile [(const_int 1)] UNSPEC_SYNC)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "syncc"
+  [(set_attr "type" "br")])
+
+(define_insn "dsync"
+  [(unspec_volatile [(const_int 2)] UNSPEC_SYNC)
+   (clobber (mem:BLK (scratch)))]
+  ""
+  "dsync"
+  [(set_attr "type" "br")])
+
+
+
+ ;; Define the subtract-one-and-jump insns so loop.c
+ ;; knows what to generate.
+ (define_expand "doloop_end"
+   [(use (match_operand 0 "" ""))      ; loop pseudo
+    (use (match_operand 1 "" ""))      ; iterations; zero if unknown
+    (use (match_operand 2 "" ""))      ; max iterations
+    (use (match_operand 3 "" ""))      ; loop level
+    (use (match_operand 4 "" ""))]     ; label
+   ""
+   "
+ {
+   /* Currently SMS relies on the do-loop pattern to recognize loops
+      where (1) the control part comprises of all insns defining and/or
+      using a certain 'count' register and (2) the loop count can be
+      adjusted by modifying this register prior to the loop.
+.     ??? The possible introduction of a new block to initialize the
+      new IV can potentially effects branch optimizations.  */
+   if (optimize > 0 && flag_modulo_sched)
+   {
+     rtx s0;
+     rtx bcomp;
+     rtx loc_ref;
+
+     /* Only use this on innermost loops.  */
+     if (INTVAL (operands[3]) > 1)
+       FAIL;
+     if (GET_MODE (operands[0]) != SImode)
+       FAIL;
+
+     s0 = operands [0];
+     emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
+     bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
+     loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
+     emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                  gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                        loc_ref, pc_rtx)));
+
+     DONE;
+   }else
+      FAIL;
+ }")
+
+;; convert between any two modes, avoiding any GCC assumptions
+(define_expand "spu_convert"
+  [(set (match_operand 0 "spu_reg_operand" "")
+	(unspec [(match_operand 1 "spu_reg_operand" "")] UNSPEC_CONVERT))]
+  ""
+  {
+    rtx c = gen__spu_convert (operands[0], operands[1]);
+    PUT_MODE (SET_SRC (c), GET_MODE (operands[0]));
+    emit_insn (c);
+    DONE;
+  })
+
+(define_insn_and_split "_spu_convert"
+  [(set (match_operand 0 "spu_reg_operand" "=r")
+	(unspec [(match_operand 1 "spu_reg_operand" "0")] UNSPEC_CONVERT))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    spu_split_convert (operands);
+    DONE;
+  }
+  [(set_attr "type" "convert")
+   (set_attr "length" "0")])
+
+
+;;
+(include "spu-builtins.md")
+
+  
+(define_expand "smaxv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=r")
+        (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
+                 (match_operand:V4SF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[2], operands[1], mask));
+  DONE;
+}") 
+
+(define_expand "sminv4sf3"
+  [(set (match_operand:V4SF 0 "register_operand" "=r")
+        (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
+                 (match_operand:V4SF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_cgt_v4sf (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}") 
+
+(define_expand "smaxv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=r")
+        (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
+                 (match_operand:V2DF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V2DImode);
+  emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[2], operands[1], 
+		       spu_gen_subreg (V4SImode, mask)));
+  DONE;
+}")
+
+(define_expand "sminv2df3"
+  [(set (match_operand:V2DF 0 "register_operand" "=r")
+        (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
+                 (match_operand:V2DF 2 "register_operand" "r")))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (V2DImode);
+  emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+  emit_insn (gen_selb (operands[0], operands[1], operands[2], 
+		       spu_gen_subreg (V4SImode, mask)));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+    0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
+  
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
+  emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_umult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
+          (zero_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 
+    0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
+  emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+    0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
+  
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
+  emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_widen_smult_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand"   "=r")
+        (mult:V4SI
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 1 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
+          (sign_extend:V4SI
+            (vec_select:V4HI
+              (match_operand:V8HI 2 "register_operand" "r")
+              (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))))]
+  ""
+  "
+{
+  rtx ve = gen_reg_rtx (V4SImode);
+  rtx vo = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, 
+    0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
+  emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
+  emit_insn (gen_shufb (operands[0], ve, vo, mask));
+  DONE;
+}")
+
+(define_expand "vec_realign_load_<mode>"
+  [(set (match_operand:ALL 0 "register_operand" "=r")
+	(unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
+		     (match_operand:ALL 2 "register_operand" "r")
+		     (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
+  ""
+  "
+{
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3])); 
+  DONE;
+}")
+
+(define_expand "spu_lvsr"
+  [(set (match_operand:V16QI 0 "register_operand" "")
+        (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
+  ""
+  "
+{ 
+  rtx addr;
+  rtx offset = gen_reg_rtx (V8HImode);
+  rtx addr_bits = gen_reg_rtx (SImode);
+  rtx addr_bits_vec = gen_reg_rtx (V8HImode);
+  rtx splatqi = gen_reg_rtx (TImode);
+  rtx result = gen_reg_rtx (V8HImode);
+  unsigned char arr[16] = {
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
+  unsigned char arr2[16] = {
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
+
+  emit_move_insn (offset, array_to_constant (V8HImode, arr));
+  emit_move_insn (splatqi, array_to_constant (TImode, arr2));
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  addr = force_reg (Pmode, XEXP (operands[1], 0));
+  emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF))); 
+  emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
+
+  /* offset - (addr & 0xF) 
+     It is safe to use a single sfh, because each byte of offset is > 15 and
+     each byte of addr is <= 15. */
+  emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
+
+  result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
+  emit_move_insn (operands[0], result);
+
+  DONE;
+}")
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+        (zero_extend:V4SI 
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  ""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (zero_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+  
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (sign_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  ""
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xshw (tmp2, tmp1)); 
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+         (sign_extend:V4SI
+          (vec_select:V4HI
+            (match_operand:V8HI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xshw (tmp2, tmp1)); 
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+        (zero_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  ""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+          (zero_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+         (sign_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+                       (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V16QImode);
+  rtx tmp2 = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xsbh (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+         (sign_extend:V8HI
+          (vec_select:V8QI
+            (match_operand:V16QI 1 "spu_reg_operand" "r")
+            (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+                       (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+""
+{
+  rtx tmp1 = gen_reg_rtx (V16QImode);
+  rtx tmp2 = gen_reg_rtx (V8HImode);
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (tmp1, operands[1], operands[1], mask));
+  emit_insn (gen_spu_xsbh (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+})
+
+
+(define_expand "vec_extract_evenv4si"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+       (vec_concat:V4SI
+         (vec_select:V2SI
+	   (match_operand:V4SI 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)]))
+         (vec_select:V2SI
+	   (match_operand:V4SI 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+	0x00, 0x01, 0x02, 0x03,
+ 	0x08, 0x09, 0x0A, 0x0B,
+ 	0x10, 0x11, 0x12, 0x13,
+ 	0x18, 0x19, 0x1A, 0x1B};	
+ 
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+
+ 
+(define_expand "vec_extract_evenv4sf"
+ [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+       (vec_concat:V4SF
+         (vec_select:V2SF
+	   (match_operand:V4SF 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)]))
+         (vec_select:V2SF
+	   (match_operand:V4SF 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x00, 0x01, 0x02, 0x03,
+        0x08, 0x09, 0x0A, 0x0B,
+        0x10, 0x11, 0x12, 0x13,
+        0x18, 0x19, 0x1A, 0x1B};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_extract_evenv8hi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+       (vec_concat:V8HI
+         (vec_select:V4HI
+	   (match_operand:V8HI 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))
+         (vec_select:V4HI
+	   (match_operand:V8HI 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x00, 0x01, 0x04, 0x05,
+        0x08, 0x09, 0x0C, 0x0D,
+        0x10, 0x11, 0x14, 0x15,
+        0x18, 0x19, 0x1C, 0x1D};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_extract_evenv16qi"
+ [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+       (vec_concat:V16QI
+         (vec_select:V8QI
+	   (match_operand:V16QI 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)
+		      (const_int 8)(const_int 10)(const_int 12)(const_int 14)]))
+         (vec_select:V8QI
+	   (match_operand:V16QI 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)
+		      (const_int 8)(const_int 10)(const_int 12)(const_int 14)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x00, 0x02, 0x04, 0x06,
+        0x08, 0x0A, 0x0C, 0x0E,
+        0x10, 0x12, 0x14, 0x16,
+        0x18, 0x1A, 0x1C, 0x1E};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_extract_oddv4si"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+       (vec_concat:V4SI
+         (vec_select:V2SI
+	   (match_operand:V4SI 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)]))
+         (vec_select:V2SI
+	   (match_operand:V4SI 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x04, 0x05, 0x06, 0x07,
+        0x0C, 0x0D, 0x0E, 0x0F,
+        0x14, 0x15, 0x16, 0x17,
+        0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_extract_oddv4sf"
+ [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+       (vec_concat:V4SF
+         (vec_select:V2SF
+	   (match_operand:V4SF 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)]))
+         (vec_select:V2SF
+	   (match_operand:V4SF 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x04, 0x05, 0x06, 0x07,
+        0x0C, 0x0D, 0x0E, 0x0F,
+        0x14, 0x15, 0x16, 0x17,
+        0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+
+(define_expand "vec_extract_oddv8hi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+       (vec_concat:V8HI
+         (vec_select:V4HI
+	   (match_operand:V8HI 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))
+         (vec_select:V4HI
+	   (match_operand:V8HI 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x02, 0x03, 0x06, 0x07,
+        0x0A, 0x0B, 0x0E, 0x0F,
+        0x12, 0x13, 0x16, 0x17,
+        0x1A, 0x1B, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_extract_oddv16qi"
+ [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+       (vec_concat:V16QI
+         (vec_select:V8QI
+	   (match_operand:V16QI 1 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)
+		      (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))
+         (vec_select:V8QI
+	   (match_operand:V16QI 2 "spu_reg_operand" "r")
+	   (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)
+		      (const_int 9)(const_int 11)(const_int 13)(const_int 15)]))))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x01, 0x03, 0x05, 0x07,
+        0x09, 0x0B, 0x0D, 0x0F,
+        0x11, 0x13, 0x15, 0x17,
+        0x19, 0x1B, 0x1D, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_interleave_highv4sf"
+ [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+       (vec_select:V4SF
+         (vec_concat:V4SF
+           (vec_select:V2SF
+	     (match_operand:V4SF 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)]))
+           (vec_select:V2SF
+	     (match_operand:V4SF 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)])))
+	 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x00, 0x01, 0x02, 0x03,
+        0x10, 0x11, 0x12, 0x13,
+        0x04, 0x05, 0x06, 0x07,
+        0x14, 0x15, 0x16, 0x17};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+
+(define_expand "vec_interleave_lowv4sf"
+ [(set (match_operand:V4SF 0 "spu_reg_operand" "=r")
+       (vec_select:V4SF
+         (vec_concat:V4SF
+           (vec_select:V2SF
+	     (match_operand:V4SF 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 2)(const_int 3)]))
+           (vec_select:V2SF
+	     (match_operand:V4SF 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 2)(const_int 3)])))
+	 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x08, 0x09, 0x0A, 0x0B,
+        0x18, 0x19, 0x1A, 0x1B,
+        0x0C, 0x0D, 0x0E, 0x0F,
+        0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_interleave_highv4si"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+       (vec_select:V4SI
+         (vec_concat:V4SI
+           (vec_select:V2SI
+	     (match_operand:V4SI 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)]))
+           (vec_select:V2SI
+	     (match_operand:V4SI 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)])))
+	 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+	0x00, 0x01, 0x02, 0x03,
+	0x10, 0x11, 0x12, 0x13,
+ 	0x04, 0x05, 0x06, 0x07,
+ 	0x14, 0x15, 0x16, 0x17};
+ 
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+
+(define_expand "vec_interleave_lowv4si"
+ [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+       (vec_select:V4SI
+         (vec_concat:V4SI
+           (vec_select:V2SI
+	     (match_operand:V4SI 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 2)(const_int 3)]))
+           (vec_select:V2SI
+	     (match_operand:V4SI 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 2)(const_int 3)])))
+	 (parallel [(const_int 0)(const_int 2)(const_int 1)(const_int 3)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x08, 0x09, 0x0A, 0x0B,
+        0x18, 0x19, 0x1A, 0x1B,
+        0x0C, 0x0D, 0x0E, 0x0F,
+        0x1C, 0x1D, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_interleave_highv8hi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+       (vec_select:V8HI
+         (vec_concat:V8HI
+           (vec_select:V4HI
+	     (match_operand:V8HI 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))
+           (vec_select:V4HI
+	     (match_operand:V8HI 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)])))
+	 (parallel [(const_int 0)(const_int 4)(const_int 1)(const_int 5)
+		    (const_int 2)(const_int 6)(const_int 3)(const_int 7)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x00, 0x01, 0x10, 0x11,
+        0x02, 0x03, 0x12, 0x13,
+        0x04, 0x05, 0x14, 0x15,
+        0x06, 0x07, 0x16, 0x17};
+ 
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+ }")
+ 
+(define_expand "vec_interleave_lowv8hi"
+ [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+       (vec_select:V8HI
+         (vec_concat:V8HI
+           (vec_select:V4HI
+	     (match_operand:V8HI 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))
+           (vec_select:V4HI
+	     (match_operand:V8HI 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
+	 (parallel [(const_int 0)(const_int 4)(const_int 1)(const_int 5)
+		    (const_int 2)(const_int 6)(const_int 3)(const_int 7)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x08, 0x09, 0x18, 0x19,
+        0x0A, 0x0B, 0x1A, 0x1B,
+        0x0C, 0x0D, 0x1C, 0x1D,
+        0x0E, 0x0F, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_interleave_highv16qi"
+ [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+       (vec_select:V16QI
+         (vec_concat:V16QI
+           (vec_select:V8QI
+	     (match_operand:V16QI 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+		        (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))
+           (vec_select:V8QI
+	     (match_operand:V16QI 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+		        (const_int 4)(const_int 5)(const_int 6)(const_int 7)])))
+	 (parallel [(const_int 0)(const_int 8)(const_int 1)(const_int 9)
+		    (const_int 2)(const_int 10)(const_int 3)(const_int 11)
+		    (const_int 4)(const_int 12)(const_int 5)(const_int 13)
+		    (const_int 6)(const_int 14)(const_int 7)(const_int 15)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+        0x00, 0x10, 0x01, 0x11,
+        0x02, 0x12, 0x03, 0x13,
+        0x04, 0x14, 0x05, 0x15,
+        0x06, 0x16, 0x07, 0x17};
+ 
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+ 
+(define_expand "vec_interleave_lowv16qi"
+ [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+       (vec_select:V16QI
+         (vec_concat:V16QI
+           (vec_select:V8QI
+	     (match_operand:V16QI 1 "spu_reg_operand" "r")
+	     (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+		        (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))
+           (vec_select:V8QI
+	     (match_operand:V16QI 2 "spu_reg_operand" "r")
+	     (parallel [(const_int 8)(const_int 9)(const_int 10)(const_int 11)
+		        (const_int 12)(const_int 13)(const_int 14)(const_int 15)])))
+	 (parallel [(const_int 0)(const_int 8)(const_int 1)(const_int 9)
+		    (const_int 2)(const_int 10)(const_int 3)(const_int 11)
+		    (const_int 4)(const_int 12)(const_int 5)(const_int 13)
+		    (const_int 6)(const_int 14)(const_int 7)(const_int 15)])))]
+ 
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+         0x08, 0x18, 0x09, 0x19,
+         0x0A, 0x1A, 0x0B, 0x1B,
+         0x0C, 0x1C, 0x0D, 0x1D,
+         0x0E, 0x1E, 0x0F, 0x1F};
+ 
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+  DONE;
+}")
+
+(define_expand "vec_pack_trunc_v8hi"
+  [(set (match_operand:V16QI 0 "spu_reg_operand" "=r")
+	(vec_concat:V16QI
+          (truncate:V8QI (match_operand:V8HI 1 "spu_reg_operand" "r"))
+          (truncate:V8QI (match_operand:V8HI 2 "spu_reg_operand" "r"))))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+    0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+
+  DONE;
+}")
+
+(define_expand "vec_pack_trunc_v4si"
+  [(set (match_operand:V8HI 0 "spu_reg_operand" "=r")
+	(vec_concat:V8HI
+          (truncate:V4HI (match_operand:V4SI 1 "spu_reg_operand" "r"))
+          (truncate:V4HI (match_operand:V4SI 2 "spu_reg_operand" "r"))))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+    0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[2], mask));
+
+  DONE;
+}")
+
+(define_insn "stack_protect_set"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  ""
+  "lq%p1\t%2,%1\;stq%p0\t%2,%0\;xor\t%2,%2,%2"
+  [(set_attr "length" "12")
+   (set_attr "type" "multi1")]
+)
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  rtx compare_result;
+  rtx bcomp, loc_ref;
+
+  compare_result = gen_reg_rtx (SImode);
+
+  emit_insn (gen_stack_protect_test_si (compare_result,
+                                        operands[0],
+                                        operands[1]));
+
+  bcomp = gen_rtx_NE (SImode, compare_result, const0_rtx);
+
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[2]);
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+                                   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+                                                         loc_ref, pc_rtx)));
+
+  DONE;
+})
+
+(define_insn "stack_protect_test_si"
+  [(set (match_operand:SI 0 "spu_reg_operand" "=&r")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "memory_operand" "m")]
+                   UNSPEC_SP_TEST))
+   (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  ""
+  "lq%p1\t%0,%1\;lq%p2\t%3,%2\;ceq\t%0,%0,%3\;xor\t%3,%3,%3"
+  [(set_attr "length" "16")
+   (set_attr "type" "multi1")]
+)
+
diff --git a/gcc/config/spu/spu.opt b/gcc/config/spu/spu.opt
new file mode 100644
index 000000000..75cf8c3fd
--- /dev/null
+++ b/gcc/config/spu/spu.opt
@@ -0,0 +1,105 @@
+; Options for the SPU port of the compiler
+; Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+; This file is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3 of the License, or (at your option)
+; any later version.
+
+; This file is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mwarn-reloc
+Target Report Mask(WARN_RELOC)
+Emit warnings when run-time relocations are generated
+
+merror-reloc
+Target Report Mask(ERROR_RELOC)
+Emit errors when run-time relocations are generated
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(spu_branch_cost) Init(20)
+Specify cost of branches (Default 20)
+
+msafe-dma
+Target Report RejectNegative Mask(SAFE_DMA)
+Make sure loads and stores are not moved past DMA instructions
+
+munsafe-dma
+Target Report RejectNegative InverseMask(SAFE_DMA)
+volatile must be specified on any memory that is effected by DMA
+
+mdual-nops
+Target Report Var(spu_dual_nops,10) Init(10)
+Insert nops when it might improve performance by allowing dual issue (default)
+
+mdual-nops=
+Target RejectNegative Joined UInteger Var(spu_dual_nops)
+Insert nops when it might improve performance by allowing dual issue (default)
+
+mstdmain
+Target Report Mask(STD_MAIN)
+Use standard main function as entry for startup
+
+mbranch-hints
+Target Report Mask(BRANCH_HINTS)
+Generate branch hints for branches
+
+mhint-max-nops=
+Target RejectNegative Joined UInteger Var(spu_max_nops) Init(2)
+Maximum number of nops to insert for a hint (Default 2)
+
+mhint-max-distance=
+Target RejectNegative Joined Var(spu_max_distance_str)
+Approximate maximum number of instructions to allow between a hint and its branch [125]
+
+msmall-mem
+Target Report RejectNegative InverseMask(LARGE_MEM)
+Generate code for 18 bit addressing
+
+mlarge-mem
+Target Report RejectNegative Mask(LARGE_MEM)
+Generate code for 32 bit addressing
+
+mfixed-range=
+Target RejectNegative Joined Var(spu_fixed_range_string)
+Specify range of registers to make fixed
+
+msafe-hints
+Target Report Mask(SAFE_HINTS)
+Insert hbrp instructions after hinted branch targets to avoid the SPU hang issue
+
+march=
+Target RejectNegative Joined Var(spu_arch_string)
+Generate code for given CPU
+
+mtune=
+Target RejectNegative Joined Var(spu_tune_string)
+Schedule code for given CPU
+
+mea32
+Target Report RejectNegative Var(spu_ea_model,32) Init(32)
+Access variables in 32-bit PPU objects (default)
+
+mea64
+Target Report RejectNegative Var(spu_ea_model,64)
+Access variables in 64-bit PPU objects
+
+maddress-space-conversion
+Target Report Mask(ADDRESS_SPACE_CONVERSION)
+Allow conversions between __ea and generic pointers (default)
+
+mcache-size=
+Target Report RejectNegative Joined UInteger
+Size (in KB) of software data cache
+
+matomic-updates
+Target Report
+Atomically write back software data cache lines (default)
+
diff --git a/gcc/config/spu/spu_cache.h b/gcc/config/spu/spu_cache.h
new file mode 100644
index 000000000..66a679be5
--- /dev/null
+++ b/gcc/config/spu/spu_cache.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SPU_CACHE_H
+#define _SPU_CACHE_H
+
+void *__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty);
+void *__cache_fetch (__ea void *ea);
+void __cache_evict (__ea void *ea);
+void __cache_flush (void);
+void __cache_touch (__ea void *ea);
+
+#define cache_fetch_dirty(_ea, _n_bytes_dirty) \
+     __cache_fetch_dirty(_ea, _n_bytes_dirty)
+
+#define cache_fetch(_ea) __cache_fetch(_ea)
+#define cache_touch(_ea) __cache_touch(_ea)
+#define cache_evict(_ea) __cache_evict(_ea)
+#define cache_flush() __cache_flush()
+
+#endif
diff --git a/gcc/config/spu/spu_internals.h b/gcc/config/spu/spu_internals.h
new file mode 100644
index 000000000..8da98e4ed
--- /dev/null
+++ b/gcc/config/spu/spu_internals.h
@@ -0,0 +1,421 @@
+/* Definitions of Synergistic Processing Unit (SPU). */
+/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef  _SPU_INTERNALS_H
+#define _SPU_INTERNALS_H 
+ 
+/* For a typical GCC implementation, the vector keyword is defined here
+ * as a macro.  If this macro conflicts with user code the user needs to
+ * undefine it.  An extended GCC implementation may implement this
+ * keyword differently, such that it never conflicts,  and will define
+ * the macro __VECTOR_KEYWORD_SUPPORTED__. */
+#ifndef __VECTOR_KEYWORD_SUPPORTED__
+#define vector __vector
+#endif
+
+
+/*  The spu specific instruction macros, si_*(), correspond 1-1 with
+ *  SPU instructions in the ISA.  The arguments are the same with the
+ *  following exceptions:
+ *   -  any instruction which both reads and writes rt will have an
+ *      extra parameter in the macro.
+ *   -  instructions which append zero to the immediate field assume
+ *      the value given in a macro already has the zeroes appended.
+ *   -  integer/float convert functions expect a value from 0 to 127,
+ *      i.e., the bias is added by the compiler.
+ *
+ *  Parameters named 'imm' accept an integer literal.
+ *  Parameters named 'r[abcdt]' accept a qword argument.
+ *  Parameters named 'scalar' accept a scalar argument.
+ */
+
+#define qword __vector signed char
+
+#define si_lqd(ra,imm)       __builtin_si_lqd(ra,imm)
+#define si_lqx(ra,rb)        __builtin_si_lqx(ra,rb)
+#define si_lqa(imm)          __builtin_si_lqa(imm)
+#define si_lqr(imm)          __builtin_si_lqr(imm)
+#define si_stqd(rt,ra,imm)   __builtin_si_stqd(rt,ra,imm)
+#define si_stqx(rt,ra,rb)    __builtin_si_stqx(rt,ra,rb)
+#define si_stqa(rt,imm)      __builtin_si_stqa(rt,imm)
+#define si_stqr(rt,imm)      __builtin_si_stqr(rt,imm)
+#define si_cbd(ra,imm)       __builtin_si_cbd(ra,imm)
+#define si_cbx(ra,rb)        __builtin_si_cbx(ra,rb)
+#define si_chd(ra,imm)       __builtin_si_chd(ra,imm)
+#define si_chx(ra,rb)        __builtin_si_chx(ra,rb)
+#define si_cwd(ra,imm)       __builtin_si_cwd(ra,imm)
+#define si_cwx(ra,rb)        __builtin_si_cwx(ra,rb)
+#define si_cdd(ra,imm)       __builtin_si_cdd(ra,imm)
+#define si_cdx(ra,rb)        __builtin_si_cdx(ra,rb)
+#define si_ilh(imm)          __builtin_si_ilh(imm)
+#define si_ilhu(imm)         __builtin_si_ilhu(imm)
+#define si_il(imm)           __builtin_si_il(imm)
+#define si_ila(imm)          __builtin_si_ila(imm)
+#define si_iohl(ra,imm)      __builtin_si_iohl(ra,imm)
+#define si_fsmbi(imm)        __builtin_si_fsmbi(imm)
+#define si_ah(ra,rb)         __builtin_si_ah(ra,rb)
+#define si_ahi(ra,imm)       __builtin_si_ahi(ra,imm)
+#define si_a(ra,rb)          __builtin_si_a(ra,rb)
+#define si_ai(ra,imm)        __builtin_si_ai(ra,imm)
+#define si_addx(ra,rb,rt)    __builtin_si_addx(ra,rb,rt)
+#define si_cg(ra,rb)         __builtin_si_cg(ra,rb)
+#define si_cgx(ra,rb,rt)     __builtin_si_cgx(ra,rb,rt)
+#define si_sfh(ra,rb)        __builtin_si_sfh(ra,rb)
+#define si_sfhi(imm,ra)      __builtin_si_sfhi(imm,ra)
+#define si_sf(ra,rb)         __builtin_si_sf(ra,rb)
+#define si_sfi(ra,imm)       __builtin_si_sfi(ra,imm)
+#define si_sfx(ra,rb,rt)     __builtin_si_sfx(ra,rb,rt)
+#define si_bg(ra,rb)         __builtin_si_bg(ra,rb)
+#define si_bgx(ra,rb,rt)     __builtin_si_bgx(ra,rb,rt)
+#define si_mpy(ra,rb)        __builtin_si_mpy(ra,rb)
+#define si_mpyu(ra,rb)       __builtin_si_mpyu(ra,rb)
+#define si_mpyi(ra,imm)      __builtin_si_mpyi(ra,imm)
+#define si_mpyui(ra,imm)     __builtin_si_mpyui(ra,imm)
+#define si_mpya(ra,rb,rc)    __builtin_si_mpya(ra,rb,rc)
+#define si_mpyh(ra,rb)       __builtin_si_mpyh(ra,rb)
+#define si_mpys(ra,rb)       __builtin_si_mpys(ra,rb)
+#define si_mpyhh(ra,rb)      __builtin_si_mpyhh(ra,rb)
+#define si_mpyhhu(ra,rb)     __builtin_si_mpyhhu(ra,rb)
+#define si_mpyhha(ra,rb,rc)  __builtin_si_mpyhha(ra,rb,rc)
+#define si_mpyhhau(ra,rb,rc) __builtin_si_mpyhhau(ra,rb,rc)
+#define si_clz(ra)           __builtin_si_clz(ra)
+#define si_cntb(ra)          __builtin_si_cntb(ra)
+#define si_fsmb(ra)          __builtin_si_fsmb(ra)
+#define si_fsmh(ra)          __builtin_si_fsmh(ra)
+#define si_fsm(ra)           __builtin_si_fsm(ra)
+#define si_gbb(ra)           __builtin_si_gbb(ra)
+#define si_gbh(ra)           __builtin_si_gbh(ra)
+#define si_gb(ra)            __builtin_si_gb(ra)
+#define si_avgb(ra,rb)       __builtin_si_avgb(ra,rb)
+#define si_absdb(ra,rb)      __builtin_si_absdb(ra,rb)
+#define si_sumb(ra,rb)       __builtin_si_sumb(ra,rb)
+#define si_xsbh(ra)          __builtin_si_xsbh(ra)
+#define si_xshw(ra)          __builtin_si_xshw(ra)
+#define si_xswd(ra)          __builtin_si_xswd(ra)
+#define si_and(ra,rb)        __builtin_si_and(ra,rb)
+#define si_andc(ra,rb)       __builtin_si_andc(ra,rb)
+#define si_andbi(ra,imm)     __builtin_si_andbi(ra,imm)
+#define si_andhi(ra,imm)     __builtin_si_andhi(ra,imm)
+#define si_andi(ra,imm)      __builtin_si_andi(ra,imm)
+#define si_or(ra,rb)         __builtin_si_or(ra,rb)
+#define si_orc(ra,rb)        __builtin_si_orc(ra,rb)
+#define si_orbi(ra,imm)      __builtin_si_orbi(ra,imm)
+#define si_orhi(ra,imm)      __builtin_si_orhi(ra,imm)
+#define si_ori(ra,imm)       __builtin_si_ori(ra,imm)
+#define si_orx(ra)           __builtin_si_orx(ra)
+#define si_xor(ra,rb)        __builtin_si_xor(ra,rb)
+#define si_xorbi(ra,imm)     __builtin_si_xorbi(ra,imm)
+#define si_xorhi(ra,imm)     __builtin_si_xorhi(ra,imm)
+#define si_xori(ra,imm)      __builtin_si_xori(ra,imm)
+#define si_nand(ra,rb)       __builtin_si_nand(ra,rb)
+#define si_nor(ra,rb)        __builtin_si_nor(ra,rb)
+#define si_eqv(ra,rb)        __builtin_si_eqv(ra,rb)
+#define si_selb(ra,rb,rc)    __builtin_si_selb(ra,rb,rc)
+#define si_shufb(ra,rb,rc)   __builtin_si_shufb(ra,rb,rc)
+#define si_shlh(ra,rb)       __builtin_si_shlh(ra,rb)
+#define si_shlhi(ra,imm)     __builtin_si_shlhi(ra,imm)
+#define si_shl(ra,rb)        __builtin_si_shl(ra,rb)
+#define si_shli(ra,imm)      __builtin_si_shli(ra,imm)
+#define si_shlqbi(ra,rb)     __builtin_si_shlqbi(ra,rb)
+#define si_shlqbii(ra,imm)   __builtin_si_shlqbii(ra,imm)
+#define si_shlqby(ra,rb)     __builtin_si_shlqby(ra,rb)
+#define si_shlqbyi(ra,imm)   __builtin_si_shlqbyi(ra,imm)
+#define si_shlqbybi(ra,rb)   __builtin_si_shlqbybi(ra,rb)
+#define si_roth(ra,rb)       __builtin_si_roth(ra,rb)
+#define si_rothi(ra,imm)     __builtin_si_rothi(ra,imm)
+#define si_rot(ra,rb)        __builtin_si_rot(ra,rb)
+#define si_roti(ra,imm)      __builtin_si_roti(ra,imm)
+#define si_rotqby(ra,rb)     __builtin_si_rotqby(ra,rb)
+#define si_rotqbyi(ra,imm)   __builtin_si_rotqbyi(ra,imm)
+#define si_rotqbybi(ra,rb)   __builtin_si_rotqbybi(ra,rb)
+#define si_rotqbi(ra,rb)     __builtin_si_rotqbi(ra,rb)
+#define si_rotqbii(ra,imm)   __builtin_si_rotqbii(ra,imm)
+#define si_rothm(ra,rb)      __builtin_si_rothm(ra,rb)
+#define si_rothmi(ra,imm)    __builtin_si_rothmi(ra,imm)
+#define si_rotm(ra,rb)       __builtin_si_rotm(ra,rb)
+#define si_rotmi(ra,imm)     __builtin_si_rotmi(ra,imm)
+#define si_rotqmby(ra,rb)    __builtin_si_rotqmby(ra,rb)
+#define si_rotqmbyi(ra,imm)  __builtin_si_rotqmbyi(ra,imm)
+#define si_rotqmbi(ra,rb)    __builtin_si_rotqmbi(ra,rb)
+#define si_rotqmbii(ra,imm)  __builtin_si_rotqmbii(ra,imm)
+#define si_rotqmbybi(ra,rb)  __builtin_si_rotqmbybi(ra,rb)
+#define si_rotmah(ra,rb)     __builtin_si_rotmah(ra,rb)
+#define si_rotmahi(ra,imm)   __builtin_si_rotmahi(ra,imm)
+#define si_rotma(ra,rb)      __builtin_si_rotma(ra,rb)
+#define si_rotmai(ra,imm)    __builtin_si_rotmai(ra,imm)
+#define si_heq(ra,rb)        __builtin_si_heq(ra,rb)
+#define si_heqi(ra,imm)      __builtin_si_heqi(ra,imm)
+#define si_hgt(ra,rb)        __builtin_si_hgt(ra,rb)
+#define si_hgti(ra,imm)      __builtin_si_hgti(ra,imm)
+#define si_hlgt(ra,rb)       __builtin_si_hlgt(ra,rb)
+#define si_hlgti(ra,imm)     __builtin_si_hlgti(ra,imm)
+#define si_ceqb(ra,rb)       __builtin_si_ceqb(ra,rb)
+#define si_ceqbi(ra,imm)     __builtin_si_ceqbi(ra,imm)
+#define si_ceqh(ra,rb)       __builtin_si_ceqh(ra,rb)
+#define si_ceqhi(ra,imm)     __builtin_si_ceqhi(ra,imm)
+#define si_ceq(ra,rb)        __builtin_si_ceq(ra,rb)
+#define si_ceqi(ra,imm)      __builtin_si_ceqi(ra,imm)
+#define si_cgtb(ra,rb)       __builtin_si_cgtb(ra,rb)
+#define si_cgtbi(ra,imm)     __builtin_si_cgtbi(ra,imm)
+#define si_cgth(ra,rb)       __builtin_si_cgth(ra,rb)
+#define si_cgthi(ra,imm)     __builtin_si_cgthi(ra,imm)
+#define si_cgt(ra,rb)        __builtin_si_cgt(ra,rb)
+#define si_cgti(ra,imm)      __builtin_si_cgti(ra,imm)
+#define si_clgtb(ra,rb)      __builtin_si_clgtb(ra,rb)
+#define si_clgtbi(ra,imm)    __builtin_si_clgtbi(ra,imm)
+#define si_clgth(ra,rb)      __builtin_si_clgth(ra,rb)
+#define si_clgthi(ra,imm)    __builtin_si_clgthi(ra,imm)
+#define si_clgt(ra,rb)       __builtin_si_clgt(ra,rb)
+#define si_clgti(ra,imm)     __builtin_si_clgti(ra,imm)
+#define si_bisled(ra)        __builtin_si_bisled(ra,0)
+#define si_bisledd(ra)       __builtin_si_bisledd(ra,0)
+#define si_bislede(ra)       __builtin_si_bislede(ra,0)
+#define si_fa(ra,rb)         __builtin_si_fa(ra,rb)
+#define si_dfa(ra,rb)        __builtin_si_dfa(ra,rb)
+#define si_fs(ra,rb)         __builtin_si_fs(ra,rb)
+#define si_dfs(ra,rb)        __builtin_si_dfs(ra,rb)
+#define si_fm(ra,rb)         __builtin_si_fm(ra,rb)
+#define si_dfm(ra,rb)        __builtin_si_dfm(ra,rb)
+#define si_fma(ra,rb,rc)     __builtin_si_fma(ra,rb,rc)
+#define si_dfma(ra,rb,rc)    __builtin_si_dfma(ra,rb,rc)
+#define si_dfnma(ra,rb,rc)   __builtin_si_dfnma(ra,rb,rc)
+#define si_fnms(ra,rb,rc)    __builtin_si_fnms(ra,rb,rc)
+#define si_dfnms(ra,rb,rc)   __builtin_si_dfnms(ra,rb,rc)
+#define si_fms(ra,rb,rc)     __builtin_si_fms(ra,rb,rc)
+#define si_dfms(ra,rb,rc)    __builtin_si_dfms(ra,rb,rc)
+#define si_frest(ra)         __builtin_si_frest(ra)
+#define si_frsqest(ra)       __builtin_si_frsqest(ra)
+#define si_fi(ra,rb)         __builtin_si_fi(ra,rb)
+#define si_csflt(ra,imm)     __builtin_si_csflt(ra,imm)
+#define si_cflts(ra,imm)     __builtin_si_cflts(ra,imm)
+#define si_cuflt(ra,imm)     __builtin_si_cuflt(ra,imm)
+#define si_cfltu(ra,imm)     __builtin_si_cfltu(ra,imm)
+#define si_frds(ra)          __builtin_si_frds(ra)
+#define si_fesd(ra)          __builtin_si_fesd(ra)
+#define si_fceq(ra,rb)       __builtin_si_fceq(ra,rb)
+#define si_fcmeq(ra,rb)      __builtin_si_fcmeq(ra,rb)
+#define si_fcgt(ra,rb)       __builtin_si_fcgt(ra,rb)
+#define si_fcmgt(ra,rb)      __builtin_si_fcmgt(ra,rb)
+#define si_stop(imm)         __builtin_si_stop(imm)
+#define si_stopd(ra,rb,rc)   __builtin_si_stopd(ra,rb,rc)
+#define si_lnop()            __builtin_si_lnop()
+#define si_nop()             __builtin_si_nop()
+#define si_sync()            __builtin_si_sync()
+#define si_syncc()           __builtin_si_syncc()
+#define si_dsync()           __builtin_si_dsync()
+#define si_mfspr(imm)        __builtin_si_mfspr(imm)
+#define si_mtspr(imm,ra)     __builtin_si_mtspr(imm,ra)
+#define si_fscrrd()          __builtin_si_fscrrd()
+#define si_fscrwr(ra)        __builtin_si_fscrwr(ra)
+#define si_rdch(imm)         __builtin_si_rdch(imm)
+#define si_rchcnt(imm)       __builtin_si_rchcnt(imm)
+#define si_wrch(imm,ra)      __builtin_si_wrch(imm,ra)
+
+/* celledp only instructions  */
+#ifdef __SPU_EDP__
+#define si_dfceq(ra,rb)      __builtin_si_dfceq(ra,rb)
+#define si_dfcmeq(ra,rb)     __builtin_si_dfcmeq(ra,rb)
+#define si_dfcgt(ra,rb)      __builtin_si_dfcgt(ra,rb)
+#define si_dfcmgt(ra,rb)     __builtin_si_dfcmgt(ra,rb)
+#define si_dftsv(ra,imm)     __builtin_si_dftsv(ra,imm)
+#endif /* __SPU_EDP__  */
+
+#define si_from_char(scalar)    __builtin_si_from_char(scalar)
+#define si_from_uchar(scalar)   __builtin_si_from_uchar(scalar)
+#define si_from_short(scalar)   __builtin_si_from_short(scalar)
+#define si_from_ushort(scalar)  __builtin_si_from_ushort(scalar)
+#define si_from_int(scalar)     __builtin_si_from_int(scalar)
+#define si_from_uint(scalar)    __builtin_si_from_uint(scalar)
+#define si_from_llong(scalar)   __builtin_si_from_long(scalar)
+#define si_from_ullong(scalar)  __builtin_si_from_ulong(scalar)
+#define si_from_float(scalar)   __builtin_si_from_float(scalar)
+#define si_from_double(scalar)  __builtin_si_from_double(scalar)
+#define si_from_ptr(scalar)     __builtin_si_from_ptr(scalar)
+
+#define si_to_char(ra)      __builtin_si_to_char(ra)
+#define si_to_uchar(ra)     __builtin_si_to_uchar(ra)
+#define si_to_short(ra)     __builtin_si_to_short(ra)
+#define si_to_ushort(ra)    __builtin_si_to_ushort(ra)
+#define si_to_int(ra)       __builtin_si_to_int(ra)
+#define si_to_uint(ra)      __builtin_si_to_uint(ra)
+#define si_to_llong(ra)     __builtin_si_to_long(ra)
+#define si_to_ullong(ra)    __builtin_si_to_ulong(ra)
+#define si_to_float(ra)     __builtin_si_to_float(ra)
+#define si_to_double(ra)    __builtin_si_to_double(ra)
+#define si_to_ptr(ra)       __builtin_si_to_ptr(ra)
+
+#define __align_hint(ptr,base,offset) __builtin_spu_align_hint(ptr,base,offset)
+
+/* generic spu_* intrinsics */
+
+#define spu_splats(scalar)        __builtin_spu_splats(scalar) 
+#define spu_convtf(ra,imm)        __builtin_spu_convtf(ra,imm)
+#define spu_convts(ra,imm)        __builtin_spu_convts(ra,imm)
+#define spu_convtu(ra,imm)        __builtin_spu_convtu(ra,imm) 
+#define spu_extend(ra)            __builtin_spu_extend(ra) 
+#define spu_roundtf(ra)           __builtin_spu_roundtf(ra) 
+#define spu_add(ra,rb)            __builtin_spu_add(ra,rb) 
+#define spu_addx(ra,rb,rt)        __builtin_spu_addx(ra,rb,rt) 
+#define spu_genc(ra,rb)           __builtin_spu_genc(ra,rb) 
+#define spu_gencx(ra,rb,rt)       __builtin_spu_gencx(ra,rb,rt) 
+#define spu_madd(ra,rb,rc)        __builtin_spu_madd(ra,rb,rc)
+#define spu_nmadd(ra,rb,rc)       __builtin_spu_nmadd(ra,rb,rc)
+#define spu_mhhadd(ra,rb,rc)      __builtin_spu_mhhadd(ra,rb,rc)
+#define spu_msub(ra,rb,rc)        __builtin_spu_msub(ra,rb,rc) 
+#define spu_mul(ra,rb)            __builtin_spu_mul(ra,rb) 
+#define spu_mulh(ra,rb)           __builtin_spu_mulh(ra,rb) 
+#define spu_mule(ra,rb)           __builtin_spu_mule(ra,rb) 
+#define spu_mulo(ra,rb)           __builtin_spu_mulo(ra,rb) 
+#define spu_mulsr(ra,rb)          __builtin_spu_mulsr(ra,rb) 
+#define spu_nmsub(ra,rb,rc)       __builtin_spu_nmsub(ra,rb,rc) 
+#define spu_sub(ra,rb)            __builtin_spu_sub(ra,rb)
+#define spu_subx(ra,rb,rt)        __builtin_spu_subx(ra,rb,rt) 
+#define spu_genb(ra,rb)           __builtin_spu_genb(ra,rb) 
+#define spu_genbx(ra,rb,rt)       __builtin_spu_genbx(ra,rb,rt) 
+#define spu_absd(ra,rb)           __builtin_spu_absd(ra,rb) 
+#define spu_avg(ra,rb)            __builtin_spu_avg(ra,rb) 
+#define spu_sumb(ra,rb)           __builtin_spu_sumb(ra,rb) 
+#define spu_bisled(ra)            __builtin_spu_bisled(ra, 0)
+#define spu_bisled_d(ra)          __builtin_spu_bisled_d(ra, 0)
+#define spu_bisled_e(ra)          __builtin_spu_bisled_e(ra, 0)
+#define spu_cmpabseq(ra,rb)       __builtin_spu_cmpabseq(ra,rb) 
+#define spu_cmpabsgt(ra,rb)       __builtin_spu_cmpabsgt(ra,rb) 
+#define spu_cmpeq(ra,rb)          __builtin_spu_cmpeq(ra,rb) 
+#define spu_cmpgt(ra,rb)          __builtin_spu_cmpgt(ra,rb) 
+#define spu_testsv(ra,imm)        __builtin_spu_testsv(ra,imm) 
+#define spu_hcmpeq(ra,rb)         __builtin_spu_hcmpeq(ra,rb) 
+#define spu_hcmpgt(ra,rb)         __builtin_spu_hcmpgt(ra,rb) 
+#define spu_cntb(ra)              __builtin_spu_cntb(ra) 
+#define spu_cntlz(ra)             __builtin_spu_cntlz(ra) 
+#define spu_gather(ra)            __builtin_spu_gather(ra) 
+#define spu_maskb(ra)             __builtin_spu_maskb(ra) 
+#define spu_maskh(ra)             __builtin_spu_maskh(ra) 
+#define spu_maskw(ra)             __builtin_spu_maskw(ra) 
+#define spu_sel(ra,rb,rc)         __builtin_spu_sel(ra,rb,rc) 
+#define spu_shuffle(ra,rb,rc)     __builtin_spu_shuffle(ra,rb,rc) 
+#define spu_and(ra,rb)            __builtin_spu_and(ra,rb) 
+#define spu_andc(ra,rb)           __builtin_spu_andc(ra,rb) 
+#define spu_eqv(ra,rb)            __builtin_spu_eqv(ra,rb) 
+#define spu_nand(ra,rb)           __builtin_spu_nand(ra,rb)
+#define spu_nor(ra,rb)            __builtin_spu_nor(ra,rb) 
+#define spu_or(ra,rb)             __builtin_spu_or(ra,rb) 
+#define spu_orc(ra,rb)            __builtin_spu_orc(ra,rb) 
+#define spu_orx(ra)               __builtin_spu_orx(ra)
+#define spu_xor(ra,rb)            __builtin_spu_xor(ra,rb) 
+#define spu_rl(ra,rb)             __builtin_spu_rl(ra,rb) 
+#define spu_rlqw(ra,count)        __builtin_spu_rlqw(ra,count) 
+#define spu_rlqwbyte(ra,count)    __builtin_spu_rlqwbyte(ra,count) 
+#define spu_rlqwbytebc(ra,count)  __builtin_spu_rlqwbytebc(ra,count) 
+#define spu_rlmask(ra,rb)         __builtin_spu_rlmask(ra,rb) 
+#define spu_rlmaska(ra,rb)        __builtin_spu_rlmaska(ra,rb) 
+#define spu_rlmaskqw(ra,rb)       __builtin_spu_rlmaskqw(ra,rb) 
+#define spu_rlmaskqwbyte(ra,rb)   __builtin_spu_rlmaskqwbyte(ra,rb) 
+#define spu_rlmaskqwbytebc(ra,rb) __builtin_spu_rlmaskqwbytebc(ra,rb) 
+#define spu_sl(ra,rb)             __builtin_spu_sl(ra,rb) 
+#define spu_slqw(ra,rb)           __builtin_spu_slqw(ra,rb) 
+#define spu_slqwbyte(ra,rb)       __builtin_spu_slqwbyte(ra,rb) 
+#define spu_slqwbytebc(ra,rb)     __builtin_spu_slqwbytebc(ra,rb) 
+#define spu_sr(ra,rb)             __builtin_spu_sr(ra,rb) 
+#define spu_sra(ra,rb)            __builtin_spu_sra(ra,rb) 
+#define spu_srqw(ra,rb)           __builtin_spu_srqw(ra,rb) 
+#define spu_srqwbyte(ra,rb)       __builtin_spu_srqwbyte(ra,rb) 
+#define spu_srqwbytebc(ra,rb)     __builtin_spu_srqwbytebc(ra,rb) 
+#define spu_extract(ra,pos)       __builtin_spu_extract(ra,pos) 
+#define spu_insert(scalar,ra,pos) __builtin_spu_insert(scalar,ra,pos) 
+#define spu_promote(scalar,pos)   __builtin_spu_promote(scalar,pos) 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The type checking for some of these won't be accurate but they need
+ * to be defines because of the immediate values. */
+#define spu_idisable()          __builtin_spu_idisable()
+#define spu_ienable()           __builtin_spu_ienable()
+#define spu_mfspr(imm)          si_to_uint(si_mfspr((imm)))
+#define spu_mtspr(imm, ra)      si_mtspr((imm),si_from_uint (ra))
+#define spu_mffpscr()           ((vec_uint4)si_fscrrd())
+#define spu_mtfpscr(a)          si_fscrwr((qword)a)
+#define spu_dsync()             si_dsync() 
+#define spu_stop(imm)           si_stop(imm)
+#define spu_sync()              si_sync()
+#define spu_sync_c()            si_syncc()
+#define spu_readch(imm)         si_to_uint(si_rdch((imm)))
+#define spu_readchqw(imm)       ((vec_uint4)si_rdch((imm)))
+#define spu_readchcnt(imm)      si_to_uint(si_rchcnt((imm)))
+#define spu_writech(imm, ra)    si_wrch((imm), si_from_uint(ra))
+#define spu_writechqw(imm, ra)  si_wrch((imm), (qword)(ra))
+
+/* The following functions are static and always_inline to make sure
+ * they don't show up in object files which they aren't used in.  */
+
+static __inline__ vec_float4 spu_re (vec_float4 ra) __attribute__((__always_inline__));
+static __inline__ vec_float4 spu_rsqrte (vec_float4 ra) __attribute__((__always_inline__));
+
+static __inline__ vec_float4
+spu_re (vec_float4 ra)
+{
+  return (vec_float4) si_fi ((qword) (ra), si_frest ((qword) (ra)));
+}
+static __inline__ vec_float4
+spu_rsqrte (vec_float4 ra)
+{
+  return (vec_float4) si_fi ((qword) (ra), si_frsqest ((qword) (ra)));
+}
+
+/* composite intrinsics */
+static __inline__ void spu_mfcdma32(volatile void *ls, unsigned int ea, unsigned int size, unsigned int tagid, unsigned int cmd) __attribute__((__always_inline__));
+static __inline__ void spu_mfcdma64(volatile void *ls, unsigned int eahi, unsigned int ealow, unsigned int size, unsigned int tagid, unsigned int cmd) __attribute__((__always_inline__));
+static __inline__ unsigned int spu_mfcstat(unsigned int type) __attribute__((__always_inline__));
+
+static __inline__ void
+spu_mfcdma32(volatile void *ls, unsigned int ea, unsigned int size, unsigned int tagid, unsigned int cmd)
+{
+      si_wrch(MFC_LSA,si_from_ptr(ls));
+      si_wrch(MFC_EAL,si_from_uint(ea));
+      si_wrch(MFC_Size,si_from_uint(size));
+      si_wrch(MFC_TagID,si_from_uint(tagid));
+      si_wrch(MFC_Cmd,si_from_uint(cmd));
+}
+static __inline__ void
+spu_mfcdma64(volatile void *ls, unsigned int eahi, unsigned int ealow, unsigned int size, unsigned int tagid, unsigned int cmd)
+{
+      si_wrch(MFC_LSA,si_from_ptr(ls));
+      si_wrch(MFC_EAH,si_from_uint(eahi));
+      si_wrch(MFC_EAL,si_from_uint(ealow));
+      si_wrch(MFC_Size,si_from_uint(size));
+      si_wrch(MFC_TagID,si_from_uint(tagid));
+      si_wrch(MFC_Cmd,si_from_uint(cmd));
+}
+static __inline__ unsigned int
+spu_mfcstat(unsigned int type)
+{
+      si_wrch(MFC_WrTagUpdate,si_from_uint(type));
+      return si_to_uint(si_rdch(MFC_RdTagStat));
+}
+#ifdef __cplusplus
+
+}
+#endif  /* __cplusplus */
+
+#endif /* SPUINTRIN_H */
+
diff --git a/gcc/config/spu/spu_intrinsics.h b/gcc/config/spu/spu_intrinsics.h
new file mode 100644
index 000000000..26d98b3b3
--- /dev/null
+++ b/gcc/config/spu/spu_intrinsics.h
@@ -0,0 +1,83 @@
+/* Definitions of Synergistic Processing Unit (SPU). */
+/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef  _SPU_INTRINSICS_H
+#define _SPU_INTRINSICS_H 
+ 
+#define vec_uchar16             __vector unsigned char
+#define vec_char16              __vector   signed char
+#define vec_ushort8             __vector unsigned short
+#define vec_short8              __vector   signed short
+#define vec_uint4               __vector unsigned int
+#define vec_int4                __vector   signed int
+#define vec_ullong2             __vector unsigned long long
+#define vec_llong2              __vector   signed long long
+#define vec_float4              __vector          float
+#define vec_double2             __vector          double
+
+/* SPU Channel Defines 
+ */
+#define SPU_RdEventStat		 0
+#define SPU_WrEventMask		 1
+#define SPU_WrEventAck		 2
+#define SPU_RdSigNotify1	 3
+#define SPU_RdSigNotify2	 4
+#define SPU_WrDec		 7
+#define SPU_RdDec		 8
+#define SPU_RdEventMask		11
+#define SPU_RdMachStat		13
+#define SPU_WrSRR0		14
+#define SPU_RdSRR0		15
+#define SPU_WrOutMbox		28 
+#define SPU_RdInMbox		29 
+#define SPU_WrOutIntrMbox	30 
+
+/* MFC Channel Defines. 
+ */
+#define MFC_WrMSSyncReq		 9
+#define MFC_RdTagMask		12
+#define MFC_LSA			16 
+#define MFC_EAH			17 
+#define MFC_EAL			18 
+#define MFC_Size		19 
+#define MFC_TagID		20 
+#define MFC_Cmd			21 
+#define MFC_WrTagMask		22 
+#define MFC_WrTagUpdate		23 
+#define MFC_RdTagStat		24 
+#define MFC_RdListStallStat	25 
+#define MFC_WrListStallAck	26 
+#define MFC_RdAtomicStat	27 
+
+/* Bit flag mnemonics for test special value.
+ */
+#define SPU_SV_NEG_DENORM       0x01    /* negative denormalized number  */
+#define SPU_SV_POS_DENORM       0x02    /* positive denormalized number  */
+#define SPU_SV_NEG_ZERO         0x04    /* negative zero                 */
+#define SPU_SV_POS_ZERO         0x08    /* positive zero                 */
+#define SPU_SV_NEG_INFINITY     0x10    /* negative infinity             */
+#define SPU_SV_POS_INFINITY     0x20    /* positive infinity             */
+#define SPU_SV_NAN              0x40    /* not a number                  */
+
+#include <spu_internals.h>
+
+#endif /* _SPU_INTRINSICS_H */
diff --git a/gcc/config/spu/spu_mfcio.h b/gcc/config/spu/spu_mfcio.h
new file mode 100644
index 000000000..e9fc880aa
--- /dev/null
+++ b/gcc/config/spu/spu_mfcio.h
@@ -0,0 +1,342 @@
+/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __SPU_MFCIO_H__
+#define __SPU_MFCIO_H__ 1
+
+#include <spu_intrinsics.h>
+#ifdef __IN_LIBGCC2
+typedef unsigned long long uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/****************************************************************/
+/* DMA list element structure*/
+/****************************************************************/
+ 
+#ifdef __GNUC__
+__extension__
+#endif
+typedef struct mfc_list_element {
+  uint64_t notify       :  1;   /** Stall-and-notify bit  */
+  uint64_t reserved     : 16;
+  uint64_t size         : 15;   /** Transfer size */
+  uint64_t eal          : 32;   /** Lower word of effective address */
+} mfc_list_element_t;
+ 
+/****************************************************************/
+/* DMA max/min size definitions.                        */
+/****************************************************************/
+
+#define MFC_MIN_DMA_SIZE_SHIFT  4      /* 16 bytes */
+#define MFC_MAX_DMA_SIZE_SHIFT 14      /* 16384 bytes */
+
+#define MFC_MIN_DMA_SIZE (1 << MFC_MIN_DMA_SIZE_SHIFT)
+#define MFC_MAX_DMA_SIZE (1 << MFC_MAX_DMA_SIZE_SHIFT)
+
+#define MFC_MIN_DMA_SIZE_MASK (MFC_MIN_DMA_SIZE - 1)
+#define MFC_MAX_DMA_SIZE_MASK (MFC_MAX_DMA_SIZE - 1)
+
+#define MFC_MIN_DMA_LIST_ELEMENTS 1
+#define MFC_MAX_DMA_LIST_ELEMENTS 2048
+
+#define MFC_MIN_DMA_LIST_SIZE (MFC_MIN_DMA_LIST_ELEMENTS << 3) /*   8 bytes */
+#define MFC_MAX_DMA_LIST_SIZE (MFC_MAX_DMA_LIST_ELEMENTS << 3) /* 16K bytes */
+
+/****************************************************************/
+/* MFC DMA command modifiers to identify classes of operations. */
+/****************************************************************/
+
+/* Note: These commands modifier may be used in conjunction with the base
+   command types (i.e. MFC_PUT_CMD, MFC_GET_CMD, and MFC_SNDSIG_CMD)
+   to construct the various command permutations.  */
+
+#define MFC_BARRIER_ENABLE    0x0001
+#define MFC_FENCE_ENABLE      0x0002
+#define MFC_LIST_ENABLE       0x0004
+#define MFC_RESULT_ENABLE     0x0010
+
+/****************************************************************/
+/* MFC DMA Put Commands                                 */
+/****************************************************************/
+
+#define MFC_PUT_CMD          0x0020
+#define MFC_PUTB_CMD         (MFC_PUT_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTF_CMD         (MFC_PUT_CMD | MFC_FENCE_ENABLE)
+#define MFC_PUTL_CMD         (MFC_PUT_CMD | MFC_LIST_ENABLE)
+#define MFC_PUTLB_CMD        (MFC_PUTL_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTLF_CMD        (MFC_PUTL_CMD | MFC_FENCE_ENABLE)
+
+#define MFC_PUTR_CMD         (MFC_PUT_CMD | MFC_RESULT_ENABLE)
+#define MFC_PUTRB_CMD        (MFC_PUTR_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTRF_CMD        (MFC_PUTR_CMD | MFC_FENCE_ENABLE)
+#define MFC_PUTRL_CMD        (MFC_PUTR_CMD | MFC_LIST_ENABLE)
+#define MFC_PUTRLB_CMD       (MFC_PUTRL_CMD | MFC_BARRIER_ENABLE)
+#define MFC_PUTRLF_CMD       (MFC_PUTRL_CMD | MFC_FENCE_ENABLE)
+
+/****************************************************************/
+/* MFC DMA Get Commands                                 */
+/****************************************************************/
+
+#define MFC_GET_CMD          0x0040
+#define MFC_GETB_CMD         (MFC_GET_CMD | MFC_BARRIER_ENABLE)
+#define MFC_GETF_CMD         (MFC_GET_CMD | MFC_FENCE_ENABLE)
+#define MFC_GETL_CMD         (MFC_GET_CMD | MFC_LIST_ENABLE)
+#define MFC_GETLB_CMD        (MFC_GETL_CMD | MFC_BARRIER_ENABLE)
+#define MFC_GETLF_CMD        (MFC_GETL_CMD | MFC_FENCE_ENABLE)
+
+/****************************************************************/
+/* MFC Synchronization Commands                           */
+/****************************************************************/
+
+#define MFC_SNDSIG_CMD       0x00A0
+#define MFC_SNDSIGB_CMD      (MFC_SNDSIG_CMD | MFC_BARRIER_ENABLE)
+#define MFC_SNDSIGF_CMD      (MFC_SNDSIG_CMD | MFC_FENCE_ENABLE)
+#define MFC_BARRIER_CMD      0x00C0
+#define MFC_EIEIO_CMD        0x00C8
+#define MFC_SYNC_CMD         0x00CC
+
+/****************************************************************/
+/* MFC Atomic Commands                                 */
+/****************************************************************/
+
+#define MFC_GETLLAR_CMD      0x00D0
+#define MFC_PUTLLC_CMD       0x00B4
+#define MFC_PUTLLUC_CMD      0x00B0
+#define MFC_PUTQLLUC_CMD     0x00B8
+
+/****************************************************************/
+/* MFC SL1 Storage Control Commands                             */
+/****************************************************************/
+
+#define MFC_SDCRT_CMD        0x0080
+#define MFC_SDCRTST_CMD      0x0081
+#define MFC_SDCRZ_CMD        0x0089
+#define MFC_SDCRST_CMD       0x008D
+#define MFC_SDCRF_CMD        0x008F
+
+/****************************************************************/
+/* Channel Defines                                    */
+/****************************************************************/
+
+/* Events Defines for channels
+ *    0 (SPU_RdEventStat),
+ *    1 (SPU_WrEventMask), and
+ *    2 (SPU_WrEventAck).
+ */
+#define MFC_TAG_STATUS_UPDATE_EVENT         0x00000001
+#define MFC_LIST_STALL_NOTIFY_EVENT         0x00000002
+#define MFC_COMMAND_QUEUE_AVAILABLE_EVENT   0x00000008
+#define MFC_IN_MBOX_AVAILABLE_EVENT         0x00000010
+#define MFC_DECREMENTER_EVENT               0x00000020
+#define MFC_OUT_INTR_MBOX_AVAILABLE_EVENT   0x00000040
+#define MFC_OUT_MBOX_AVAILABLE_EVENT        0x00000080
+#define MFC_SIGNAL_NOTIFY_2_EVENT           0x00000100
+#define MFC_SIGNAL_NOTIFY_1_EVENT           0x00000200
+#define MFC_LLR_LOST_EVENT                  0x00000400
+#define MFC_PRIV_ATTN_EVENT                 0x00000800
+#define MFC_MULTI_SRC_SYNC_EVENT            0x00001000
+
+/* Tag Status Update defines for channel 23 (MFC_WrTagUpdate) */
+#define MFC_TAG_UPDATE_IMMEDIATE   0x0
+#define MFC_TAG_UPDATE_ANY         0x1
+#define MFC_TAG_UPDATE_ALL         0x2
+
+/* Atomic Command Status defines for channel 27 (MFC_RdAtomicStat) */
+#define MFC_PUTLLC_STATUS    0x00000001
+#define MFC_PUTLLUC_STATUS   0x00000002
+#define MFC_GETLLAR_STATUS   0x00000004
+
+
+/****************************************************************/
+/* Definitions for constructing a 32-bit command word         */
+/* including the transfer and replacement class id and the      */
+/* command opcode.                                    */
+/****************************************************************/
+#define MFC_CMD_WORD(_tid, _rid, _cmd) (((_tid)<<24)|((_rid)<<16)|(_cmd))
+
+
+/* Addressing Utilities */
+#define mfc_ea2h(ea)   (unsigned int)((unsigned long long)(ea)>>32)
+#define mfc_ea2l(ea)   (unsigned int)(ea)
+#define mfc_hl2ea(h,l)   si_to_ullong(si_selb(si_from_uint(h),\
+                                  si_rotqbyi(si_from_uint(l), -4),\
+                                  si_fsmbi(0x0f0f)))
+#define mfc_ceil128(v)   (((v) + 127) & ~127)
+
+/* MFC DMA */
+#define mfc_put(  ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUT_CMD))
+#define mfc_putf( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTF_CMD))
+#define mfc_putb( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTB_CMD))
+#define mfc_get(  ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GET_CMD))
+#define mfc_getf( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETF_CMD))
+#define mfc_getb( ls,ea,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETB_CMD))
+
+/* MFC list DMA */
+#define mfc_putl(  ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTL_CMD))
+#define mfc_putlf( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTLF_CMD))
+#define mfc_putlb( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_PUTLB_CMD))
+#define mfc_getl(  ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETL_CMD))
+#define mfc_getlf( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETLF_CMD))
+#define mfc_getlb( ls,ea,lsa,size,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),(unsigned int)(lsa),size,tag,MFC_CMD_WORD(tid,rid,MFC_GETLB_CMD))
+
+/* MFC Atomic Update DMA */
+#define mfc_getllar( ls,ea,tid,rid)     spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,  0,MFC_CMD_WORD(tid,rid,MFC_GETLLAR_CMD))
+#define mfc_putllc(  ls,ea,tid,rid)     spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,  0,MFC_CMD_WORD(tid,rid,MFC_PUTLLC_CMD))
+#define mfc_putlluc( ls,ea,tid,rid)     spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,  0,MFC_CMD_WORD(tid,rid,MFC_PUTLLUC_CMD))
+#define mfc_putqlluc(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),128,tag,MFC_CMD_WORD(tid,rid,MFC_PUTQLLUC_CMD))
+
+/* MFC Synchronization Commands */
+#define mfc_sndsig( ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIG_CMD))
+#define mfc_sndsigb(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIGB_CMD))
+#define mfc_sndsigf(ls,ea,tag,tid,rid) spu_mfcdma64(ls,mfc_ea2h(ea),mfc_ea2l(ea),4,tag,MFC_CMD_WORD(tid,rid,MFC_SNDSIGF_CMD))
+#define mfc_barrier(tag)       spu_mfcdma32(0,0,0,tag,MFC_BARRIER_CMD)
+#define mfc_eieio(tag,tid,rid) spu_mfcdma32(0,0,0,tag,MFC_CMD_WORD(tid,rid,MFC_EIEIO_CMD))
+#define mfc_sync(tag)          spu_mfcdma32(0,0,0,tag,MFC_SYNC_CMD)
+
+/* MFC SL1 Storage Control Commands */
+#define mfc_sdcrt(  ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRT_CMD))
+#define mfc_sdcrtst(ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRTST_CMD))
+#define mfc_sdcrz(  ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRZ_CMD))
+#define mfc_sdcrst( ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRST_CMD))
+#define mfc_sdcrf(  ea,size,tag,tid,rid) spu_mfcdma64(0,mfc_ea2h(ea),mfc_ea2l(ea),size,tag,MFC_CMD_WORD(tid,rid,MFC_SDCRF_CMD))
+
+/* DMA Queue */
+#define mfc_stat_cmd_queue()          spu_readchcnt(MFC_Cmd)
+
+/* MFC Tag-Status */
+#define mfc_write_tag_mask(mask)      spu_writech(MFC_WrTagMask,mask)
+#define mfc_read_tag_mask()           spu_readch(MFC_RdTagMask)
+
+#define mfc_write_tag_update(ts)         spu_writech(MFC_WrTagUpdate,ts)
+#define mfc_write_tag_update_immediate() mfc_write_tag_update(MFC_TAG_UPDATE_IMMEDIATE)
+#define mfc_write_tag_update_any()       mfc_write_tag_update(MFC_TAG_UPDATE_ANY)
+#define mfc_write_tag_update_all()       mfc_write_tag_update(MFC_TAG_UPDATE_ALL)
+#define mfc_stat_tag_update()            spu_readchcnt(MFC_WrTagUpdate)
+
+#define mfc_read_tag_status()            spu_readch(MFC_RdTagStat)
+#define mfc_read_tag_status_immediate()  (mfc_write_tag_update_immediate(), mfc_read_tag_status())
+#define mfc_read_tag_status_any()        (mfc_write_tag_update_any(), mfc_read_tag_status())
+#define mfc_read_tag_status_all()        (mfc_write_tag_update_all(), mfc_read_tag_status())
+#define mfc_stat_tag_status()            spu_readchcnt(MFC_RdTagStat)
+
+/* MFC List Stall-and-Notify Tag */
+#define mfc_read_list_stall_status()     spu_readch(MFC_RdListStallStat)
+#define mfc_stat_list_stall_status()     spu_readchcnt(MFC_RdListStallStat)
+#define mfc_write_list_stall_ack(tag)    spu_writech(MFC_WrListStallAck,tag)
+
+/* Atomic DMA */
+#define mfc_read_atomic_status()      spu_readch(MFC_RdAtomicStat)
+#define mfc_stat_atomic_status()      spu_readchcnt(MFC_RdAtomicStat)
+
+/* MFC Multi-source Synchronization */
+#define mfc_write_multi_src_sync_request()   spu_writech(MFC_WrMSSyncReq,0)
+#define mfc_stat_multi_src_sync_request()    spu_readchcnt(MFC_WrMSSyncReq)
+
+/* SPU Signal */
+#define spu_read_signal1()            spu_readch(SPU_RdSigNotify1)
+#define spu_stat_signal1()            spu_readchcnt(SPU_RdSigNotify1)
+#define spu_read_signal2()            spu_readch(SPU_RdSigNotify2)
+#define spu_stat_signal2()            spu_readchcnt(SPU_RdSigNotify2)
+
+/* SPU/PPE Mailbox */
+#define spu_read_in_mbox()            spu_readch(SPU_RdInMbox)
+#define spu_stat_in_mbox()            spu_readchcnt(SPU_RdInMbox)
+#define spu_write_out_mbox(a)         spu_writech(SPU_WrOutMbox,a)
+#define spu_stat_out_mbox()           spu_readchcnt(SPU_WrOutMbox)
+#define spu_write_out_intr_mbox(a)    spu_writech(SPU_WrOutIntrMbox,a)
+#define spu_stat_out_intr_mbox()      spu_readchcnt(SPU_WrOutIntrMbox)
+
+/* SPU Decrementer */
+#define spu_read_decrementer()        spu_readch(SPU_RdDec)
+#define spu_write_decrementer(cnt)    spu_writech(SPU_WrDec,(cnt))
+
+/* SPU Event */
+#define spu_read_event_status()       spu_readch(SPU_RdEventStat)
+#define spu_stat_event_status()       spu_readchcnt(SPU_RdEventStat)
+#define spu_write_event_mask(mask)    spu_writech(SPU_WrEventMask,(mask))
+#define spu_write_event_ack(ack)      spu_writech(SPU_WrEventAck,(ack))
+#define spu_read_event_mask()         spu_readch(SPU_RdEventMask)
+
+/* SPU State Management */
+#define spu_read_machine_status()     spu_readch(SPU_RdMachStat)
+#define spu_write_srr0(srr0)          spu_writech(SPU_WrSRR0,srr0)
+#define spu_read_srr0()               spu_readch(SPU_RdSRR0)
+
+/* Interrupt-Safe Critical Sections */
+
+static __inline__ unsigned int mfc_begin_critical_section (void)
+  __attribute__ ((__always_inline__));
+
+static __inline__ unsigned int
+mfc_begin_critical_section (void)
+{
+#ifdef SPU_MFCIO_INTERRUPT_SAFE
+  unsigned int __status = spu_read_machine_status ();
+  spu_idisable ();
+  return __status;
+#else
+  return 0;
+#endif
+}
+
+static __inline__ void mfc_end_critical_section (unsigned int)
+  __attribute__ ((__always_inline__));
+
+static __inline__ void
+mfc_end_critical_section (unsigned int __status __attribute__ ((__unused__)))
+{
+#ifdef SPU_MFCIO_INTERRUPT_SAFE
+  if (__status & 1)
+    spu_ienable ();
+#endif
+}
+
+/* MFC Tag Manager */
+
+#define MFC_TAG_INVALID 0xFFFFFFFF
+#define MFC_TAG_VALID   0x00000000
+
+#define mfc_tag_reserve() \
+	__mfc_tag_reserve()
+#define mfc_tag_release(tag) \
+	__mfc_tag_release((tag))
+#define mfc_multi_tag_reserve(nr_tags) \
+	__mfc_multi_tag_reserve((nr_tags))
+#define mfc_multi_tag_release(tag, nr_tags) \
+	__mfc_multi_tag_release((tag),(nr_tags))
+
+extern unsigned int __mfc_tag_reserve (void);
+extern unsigned int __mfc_tag_release (unsigned int);
+extern unsigned int __mfc_multi_tag_reserve (unsigned int);
+extern unsigned int __mfc_multi_tag_release (unsigned int, unsigned int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __SPU_MFCIO_H__ */
diff --git a/gcc/config/spu/t-spu-elf b/gcc/config/spu/t-spu-elf
new file mode 100644
index 000000000..5189f2858
--- /dev/null
+++ b/gcc/config/spu/t-spu-elf
@@ -0,0 +1,123 @@
+#  Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
+#  Free Software Foundation, Inc.
+#
+#  This file is free software; you can redistribute it and/or modify it under
+#  the terms of the GNU General Public License as published by the Free
+#  Software Foundation; either version 3 of the License, or (at your option) 
+#  any later version.
+#
+#  This file is distributed in the hope that it will be useful, but WITHOUT
+#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+#  for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with GCC; see the file COPYING3.  If not see
+#  <http://www.gnu.org/licenses/>.
+
+# Define system directory to match STANDARD_INCLUDE_DIR in spu-elf.h,
+# allowing combined SPU/PPU sysroot builds.
+NATIVE_SYSTEM_HEADER_DIR = /include
+
+# Suppress building libgcc1.a
+LIBGCC1 =
+CROSS_LIBGCC1 =
+
+TARGET_LIBGCC2_CFLAGS = -fPIC -mwarn-reloc -D__IN_LIBGCC2
+
+# We exclude those because the libgcc2.c default versions do not support
+# the SPU single-precision format (round towards zero).  We provide our
+# own versions below and/or via direct expansion.
+LIB2FUNCS_EXCLUDE = _floatdisf _floatundisf _floattisf _floatunstisf
+
+# We provide our own version of __divdf3 that performs better and has
+# better support for non-default rounding modes.
+DPBIT_FUNCS := $(filter-out _div_df, $(DPBIT_FUNCS))
+
+LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/spu/float_unssidf.c \
+			 $(srcdir)/config/spu/float_unsdidf.c \
+			 $(srcdir)/config/spu/float_unsdisf.c \
+			 $(srcdir)/config/spu/float_disf.c \
+			 $(srcdir)/config/spu/mfc_tag_table.c \
+			 $(srcdir)/config/spu/mfc_tag_reserve.c \
+			 $(srcdir)/config/spu/mfc_tag_release.c \
+			 $(srcdir)/config/spu/mfc_multi_tag_reserve.c \
+			 $(srcdir)/config/spu/mfc_multi_tag_release.c \
+			 $(srcdir)/config/spu/multi3.c \
+			 $(srcdir)/config/spu/divmodti4.c \
+			 $(srcdir)/config/spu/divv2df3.c
+
+LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde.c \
+   $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/spu/t-spu-elf
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c $(srcdir)/config/spu/t-spu-elf
+	echo '#define FLOAT' > fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Build TImode conversion routines to support Fortran 128-bit
+# integer data types.
+LIB2_SIDITI_CONV_FUNCS=yes
+
+# Don't let CTOR_LIST end up in sdata section.
+CRTSTUFF_T_CFLAGS =
+
+# Multi-lib support.
+MULTILIB_OPTIONS=mea64
+
+# Neither gcc or newlib seem to have a standard way to generate multiple
+# crt*.o files.  So we don't use the standard crt0.o name anymore.
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o libgcc_cachemgr.a libgcc_cachemgr_nonatomic.a \
+	libgcc_cache8k.a libgcc_cache16k.a libgcc_cache32k.a libgcc_cache64k.a libgcc_cache128k.a
+
+$(T)cachemgr.o: $(srcdir)/config/spu/cachemgr.c
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -c $< -o $@
+
+# Specialised rule to add a -D flag.
+$(T)cachemgr_nonatomic.o: $(srcdir)/config/spu/cachemgr.c
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -DNONATOMIC -c $< -o $@
+
+$(T)libgcc_%.a: $(T)%.o
+	$(AR_FOR_TARGET) -rcs $@ $<
+
+$(T)cache8k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=8 -o $@ -c $<
+
+$(T)cache16k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=16 -o $@ -c $<
+
+$(T)cache32k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=32 -o $@ -c $<
+
+$(T)cache64k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=64 -o $@ -c $<
+
+$(T)cache128k.o: $(srcdir)/config/spu/cache.S
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -D__CACHE_SIZE__=128 -o $@ -c $<
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+spu.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(RTL_H) $(REGS_H) hard-reg-set.h \
+  real.h insn-config.h conditions.h insn-attr.h flags.h $(RECOG_H) \
+  $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \
+  output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) $(GGC_H) $(HASHTAB_H) \
+  $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h cfglayout.h \
+  $(srcdir)/config/spu/spu-protos.h \
+  $(srcdir)/config/spu/spu-builtins.def 
+
+spu-c.o: $(srcdir)/config/spu/spu-c.c \
+    $(srcdir)/config/spu/spu-protos.h \
+    $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(CPPLIB_H) \
+    $(TM_P_H) $(C_COMMON_H) $(C_PRAGMA_H) coretypes.h $(TM_H) insn-codes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/spu/spu-c.c
diff --git a/gcc/config/spu/vec_types.h b/gcc/config/spu/vec_types.h
new file mode 100644
index 000000000..7073de869
--- /dev/null
+++ b/gcc/config/spu/vec_types.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2006, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _VEC_TYPES_H_
+#define _VEC_TYPES_H_	1
+
+#include <spu_intrinsics.h>
+
+/* Define additional PowerPC SIMD/Vector Multi-media eXtension
+ * single keyword vector data types for use in mapping VMX code
+ * to the SPU.
+ */
+#define vec_bchar16	__vector unsigned char
+#define vec_bshort8	__vector unsigned short
+#define vec_pixel8	__vector unsigned short
+#define vec_bint4	__vector unsigned int
+
+#endif /* _VEC_TYPES_H_ */
diff --git a/gcc/config/spu/vmx2spu.h b/gcc/config/spu/vmx2spu.h
new file mode 100644
index 000000000..409d73f78
--- /dev/null
+++ b/gcc/config/spu/vmx2spu.h
@@ -0,0 +1,3985 @@
+/* Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _VMX2SPU_H_
+#define _VMX2SPU_H_	1
+
+#ifdef __cplusplus
+
+#ifdef __SPU__
+
+#include <spu_intrinsics.h>
+#include <vec_types.h>
+
+/* This file maps generic VMX intrinsics and predicates to the SPU using 
+ * overloaded C++ functions.
+ */
+
+/************************************************************************
+ *                        INTRINSICS 
+ ************************************************************************/
+
+/* vec_abs (vector absolute value)
+ * =======
+ */
+static inline vec_char16 vec_abs(vec_char16 a)
+{
+  vec_char16 minus_a;
+
+  minus_a = (vec_char16)(spu_add((vec_ushort8)(spu_and(spu_xor(a, 0xFF), 0x7F)), 0x101));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_short8 vec_abs(vec_short8 a)
+{
+  return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_int4 vec_abs(vec_int4 a)
+{
+  return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_float4 vec_abs(vec_float4 a)
+{
+  return ((vec_float4)(spu_rlmask(spu_sl((vec_uint4)(a), 1), -1)));
+}
+
+/* vec_abss (vector absolute value saturate)
+ * ========
+ */
+static inline vec_char16 vec_abss(vec_char16 a)
+{
+  vec_char16 minus_a;
+
+  minus_a = (vec_char16)spu_add((vec_short8)(spu_xor(a, -1)), 
+				(vec_short8)(spu_and(spu_cmpgt((vec_uchar16)(a), 0x80), 1)));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_short8 vec_abss(vec_short8 a)
+{
+  vec_short8 minus_a;
+
+  minus_a = spu_add(spu_sub(0, a), (vec_short8)(spu_cmpeq(a, ((vec_short8){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}))));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+static inline vec_int4 vec_abss(vec_int4 a)
+{
+  vec_int4 minus_a;
+
+  minus_a = spu_add(spu_sub(0, a), (vec_int4)(spu_cmpeq(a, ((vec_int4){0x80000000,0x80000000,0x80000000,0x80000000}))));
+  return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
+}
+
+
+/* vec_add (vector add)
+ * =======
+ */
+static inline vec_uchar16 vec_add(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(spu_sel(spu_add((vec_ushort8)(a), (vec_ushort8)(b)),
+				spu_add(spu_and((vec_ushort8)(a), 0xFF00), spu_and((vec_ushort8)(b), 0xFF00)),
+				spu_splats((unsigned short)(0xFF00)))));
+}
+
+static inline vec_char16 vec_add(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
+}
+
+static inline vec_char16 vec_add(vec_bchar16 a, vec_char16 b)
+{
+  return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
+}
+
+static inline vec_char16 vec_add(vec_char16 a, vec_bchar16 b)
+{
+  return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
+}
+
+static inline vec_ushort8 vec_add(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_short8 vec_add(vec_short8 a, vec_short8 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_short8 vec_add(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_add((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_add(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_add(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_add(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_int4 vec_add(vec_int4 a, vec_int4 b)
+{
+  return (spu_add(a, b));
+}
+
+static inline vec_int4 vec_add(vec_bint4 a, vec_int4 b)
+{
+  return (spu_add((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_add(vec_int4 a, vec_bint4 b)
+{
+  return (spu_add(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_add(vec_float4 a, vec_float4 b)
+{
+  return (spu_add(a, b));
+}
+
+/* vec_addc (vector add carryout unsigned word)
+ * ========
+ */
+#define vec_addc(_a, _b)	spu_genc(_a, _b)
+
+/* vec_adds (vector add saturated)
+ * ========
+ */
+static inline vec_uchar16 vec_adds(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_uchar16 s1, s2, s, d;
+
+  s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)));
+  s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)));
+  s  = spu_shuffle(s1, s2, ((vec_uchar16){0, 16,  2, 18,  4, 20,  6, 22,
+				          8, 24, 10, 26, 12, 28, 14, 30}));
+  d  = spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+				          9, 25, 11, 27, 13, 29, 15, 31}));
+  return (spu_or(d, spu_cmpeq(s, 1)));
+}
+
+static inline vec_char16 vec_adds(vec_char16 a, vec_char16 b)
+{
+  vec_uchar16 s1, s2, s, d;
+
+  s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)));
+  s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)));
+  s  = spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+				          9, 25, 11, 27, 13, 29, 15, 31}));
+  d = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_and(s, (vec_uchar16)(spu_nor(a, b))), 0x7F));
+  d = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_nor(s, (vec_uchar16)(spu_nand(a, b))), 0x7F));
+  return ((vec_char16)(d));
+}
+
+static inline vec_char16 vec_adds(vec_bchar16 a, vec_char16 b)
+{
+  return (vec_adds((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_adds(vec_char16 a, vec_bchar16 b)
+{
+  return (vec_adds(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_adds(vec_ushort8 a, vec_ushort8 b)
+{
+  vec_ushort8 s, d;
+  
+  s = spu_add(a, b);
+  d = spu_or(s, spu_rlmaska(spu_sel(spu_xor(s, -1), a, spu_eqv(a, b)), -15));
+  return (d);
+}
+
+static inline vec_short8 vec_adds(vec_short8 a, vec_short8 b)
+{
+  vec_short8 s, d;
+  
+  s = spu_add(a, b);
+  d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_and(s, spu_nor(a, b)), -15)));
+  d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_nor(s, spu_nand(a, b)), -15)));
+  return (d);
+}
+
+static inline vec_short8 vec_adds(vec_bshort8 a, vec_short8 b)
+{
+  return (vec_adds((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_adds(vec_short8 a, vec_bshort8 b)
+{
+  return (vec_adds(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_adds(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_or(spu_add(a, b), spu_rlmaska(spu_sl(spu_genc(a, b), 31), -31)));
+}
+
+static inline vec_int4 vec_adds(vec_int4 a, vec_int4 b)
+{
+  vec_int4 s, d;
+  
+  s = spu_add(a, b);
+  d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)spu_rlmaska(spu_and(s, spu_nor(a, b)), -31));
+  d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)spu_rlmaska(spu_nor(s, spu_nand(a, b)), -31));
+  return (d);
+}
+
+static inline vec_int4 vec_adds(vec_bint4 a, vec_int4 b)
+{
+  return (vec_adds((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_adds(vec_int4 a, vec_bint4 b)
+{
+  return (vec_adds(a, (vec_int4)(b)));
+}
+
+/* vec_and (vector logical and)
+ * =======
+ */
+static inline vec_uchar16 vec_and(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_char16 vec_and(vec_char16 a, vec_char16 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_char16 vec_and(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_and((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_and(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_and(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_and(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_short8 vec_and(vec_short8 a, vec_short8 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_short8 vec_and(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_and((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_and(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_and(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_and(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_int4 vec_and(vec_int4 a, vec_int4 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_int4 vec_and(vec_bint4 a, vec_int4 b)
+{
+  return (spu_and((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_and(vec_int4 a, vec_bint4 b)
+{
+  return (spu_and(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_and(vec_float4 a, vec_float4 b)
+{
+  return (spu_and(a, b));
+}
+
+static inline vec_float4 vec_and(vec_bint4 a, vec_float4 b)
+{
+  return (spu_and((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_and(vec_float4 a, vec_bint4 b)
+{
+  return (spu_and(a, (vec_float4)(b)));
+}
+
+
+/* vec_andc (vector logical and with complement) 
+ * ========
+ */
+static inline vec_uchar16 vec_andc(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_char16 vec_andc(vec_char16 a, vec_char16 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_char16 vec_andc(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_andc((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_andc(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_andc(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_andc(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_short8 vec_andc(vec_short8 a, vec_short8 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_short8 vec_andc(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_andc((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_andc(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_andc(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_andc(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_int4 vec_andc(vec_int4 a, vec_int4 b)
+{
+  return (spu_andc(a, b));
+}
+
+static inline vec_int4 vec_andc(vec_bint4 a, vec_int4 b)
+{
+  return (spu_andc((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_andc(vec_int4 a, vec_bint4 b)
+{
+  return (spu_andc(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_andc(vec_float4 a, vec_float4 b)
+{
+  return (spu_andc(a,b));
+}
+
+static inline vec_float4 vec_andc(vec_bint4 a, vec_float4 b)
+{
+  return (spu_andc((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_andc(vec_float4 a, vec_bint4 b)
+{
+  return (spu_andc(a, (vec_float4)(b)));
+}
+
+/* vec_avg (vector average)
+ * =======
+ */
+static inline vec_uchar16 vec_avg(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_avg(a, b));
+}
+
+static inline vec_char16 vec_avg(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(spu_xor(spu_avg((vec_uchar16)(a), (vec_uchar16)(b)), 
+			       (vec_uchar16)(spu_and(spu_xor(a,b), 0x80)))));
+}
+
+static inline vec_ushort8 vec_avg(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+static inline vec_short8 vec_avg(vec_short8 a, vec_short8 b)
+{
+  return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+static inline vec_uint4 vec_avg(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+static inline vec_int4 vec_avg(vec_int4 a, vec_int4 b)
+{
+  return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)), 
+		  spu_and(spu_or(a, b), 1)));
+}
+
+
+/* vec_ceil (vector ceiling)
+ * ========
+ */
+static inline vec_float4 vec_ceil(vec_float4 a)
+{
+  vec_int4  exp;
+  vec_uint4 mask;
+
+  a = spu_add(a, (vec_float4)(spu_and(spu_xor(spu_rlmaska((vec_int4)a, -31), -1), spu_splats((signed int)0x3F7FFFFF))));
+  exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+
+  return ((vec_float4)(spu_andc((vec_uint4)(a), mask)));
+}
+
+
+/* vec_cmpb (vector compare bounds floating-point)
+ * ========
+ */
+static inline vec_int4 vec_cmpb(vec_float4 a, vec_float4 b)
+{
+  vec_int4 b0 = (vec_int4)spu_splats(0x80000000);
+  vec_int4 b1 = (vec_int4)spu_splats(0x40000000);
+
+  return (spu_or(spu_and((vec_int4)spu_cmpgt(a, b), b0), 
+		 spu_and((vec_int4)spu_cmpgt(spu_xor(b, (vec_float4)(b0)), a), b1)));
+}
+
+/* vec_cmpeq (vector compare equal)
+ * =========
+ */
+#define vec_cmpeq(_a, _b)	spu_cmpeq(_a, _b)
+
+
+/* vec_cmpge (vector compare greater than or equal)
+ * =========
+ */
+static inline vec_bint4 vec_cmpge(vec_float4 a, vec_float4 b)
+{
+  return (spu_xor(spu_cmpgt(b, a), -1));
+}
+
+
+/* vec_cmpgt (vector compare greater than)
+ * =========
+ */
+#define vec_cmpgt(_a, _b)	spu_cmpgt(_a, _b)
+
+
+/* vec_cmple (vector compare less than or equal)
+ * =========
+ */
+static inline vec_bint4 vec_cmple(vec_float4 a, vec_float4 b)
+{
+  return (spu_xor(spu_cmpgt(a, b), -1));
+}
+
+
+/* vec_cmplt (vector compare less than)
+ * =========
+ */
+#define vec_cmplt(_a, _b)	spu_cmpgt(_b, _a)
+
+
+/* vec_ctf (vector convert from fixed-point word)
+ * =======
+ */
+#define vec_ctf(_a, _b)		spu_convtf(_a, _b)
+
+
+/* vec_cts (vector convert to signed fixed-point word saturate)
+ * =======
+ */
+#define vec_cts(_a, _b)		spu_convts(_a, _b)
+
+
+/* vec_ctu (vector convert to unsigned fixed-point word saturate)
+ * =======
+ */
+#define vec_ctu(_a, _b)		spu_convtu(_a, _b)
+
+
+/* vec_dss (vector data stream stop)
+ * =======
+ */
+#define vec_dss(_a)
+
+
+/* vec_dssall (vector data stream stop all)
+ * ==========
+ */
+#define vec_dssall()
+
+
+/* vec_dst (vector data stream touch)
+ * =======
+ */
+#define vec_dst(_a, _b, _c)
+
+
+/* vec_dstst (vector data stream touch for store)
+ * =========
+ */
+#define vec_dstst(_a, _b, _c)
+
+
+/* vec_dststt (vector data stream touch for store transient)
+ * ==========
+ */
+#define vec_dststt(_a, _b, _c)
+
+
+/* vec_dstt (vector data stream touch transient)
+ * ========
+ */
+#define vec_dstt(_a, _b, _c)
+
+
+/* vec_expte (vector is 2 raised tp the exponent estimate floating-point)
+ * =========
+ */
+static inline vec_float4 vec_expte(vec_float4 a)
+{
+  vec_float4 bias, frac, exp;
+  vec_int4 ia;
+
+  bias = (vec_float4)(spu_andc(spu_splats((signed int)0x3F7FFFFF), spu_rlmaska((vec_int4)(a), -31)));
+  ia   = spu_convts(spu_add(a, bias), 0);
+  frac = spu_sub(spu_convtf(ia, 0), a);
+  exp  = (vec_float4)(spu_sl(spu_add(ia, 127), 23));
+
+  return (spu_mul(spu_madd(spu_madd(spu_splats(0.17157287f), frac, spu_splats(-0.67157287f)),
+			   frac, spu_splats(1.0f)), exp));
+}
+
+
+/* vec_floor (vector floor)
+ * =========
+ */
+static inline vec_float4 vec_floor(vec_float4 a)
+{
+  vec_int4  exp;
+  vec_uint4 mask;
+
+  a = spu_sub(a, (vec_float4)(spu_and(spu_rlmaska((vec_int4)a, -31), spu_splats((signed int)0x3F7FFFFF))));
+  exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+
+  return ((vec_float4)(spu_andc((vec_uint4)(a), mask)));
+}
+
+
+/* vec_ld (vector load indexed)
+ * ======
+ */
+static inline vec_uchar16 vec_ld(int a, unsigned char *b)
+{
+  return (*((vec_uchar16 *)(b+a)));
+}
+
+static inline vec_uchar16 vec_ld(int a, vec_uchar16 *b)
+{
+  return (*((vec_uchar16 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_char16 vec_ld(int a, signed char *b)
+{
+  return (*((vec_char16 *)(b+a)));
+}
+
+static inline vec_char16 vec_ld(int a, vec_char16 *b)
+{
+  return (*((vec_char16 *)((signed char *)(b)+a)));
+}
+
+static inline vec_ushort8 vec_ld(int a, unsigned short *b)
+{
+  return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_ushort8 vec_ld(int a, vec_ushort8 *b)
+{
+  return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_short8 vec_ld(int a, signed short *b)
+{
+  return (*((vec_short8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_short8 vec_ld(int a, vec_short8 *b)
+{
+  return (*((vec_short8 *)((signed char *)(b)+a)));
+}
+
+static inline vec_uint4 vec_ld(int a, unsigned int *b)
+{
+  return (*((vec_uint4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_uint4 vec_ld(int a, vec_uint4 *b)
+{
+  return (*((vec_uint4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_int4 vec_ld(int a, signed int *b)
+{
+  return (*((vec_int4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_int4 vec_ld(int a, vec_int4 *b)
+{
+  return (*((vec_int4 *)((signed char *)(b)+a)));
+}
+
+static inline vec_float4 vec_ld(int a, float *b)
+{
+  return (*((vec_float4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_float4 vec_ld(int a, vec_float4 *b)
+{
+  return (*((vec_float4 *)((unsigned char *)(b)+a)));
+}
+
+/* vec_lde (vector load element indexed)
+ * =======
+ */
+static inline vec_uchar16 vec_lde(int a, unsigned char *b)
+{
+  return (*((vec_uchar16 *)(b+a)));
+}
+
+static inline vec_char16 vec_lde(int a, signed char *b)
+{
+  return (*((vec_char16 *)(b+a)));
+}
+
+static inline vec_ushort8 vec_lde(int a, unsigned short *b)
+{
+  return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_short8 vec_lde(int a, signed short *b)
+{
+  return (*((vec_short8 *)((unsigned char *)(b)+a)));
+}
+
+
+static inline vec_uint4 vec_lde(int a, unsigned int *b)
+{
+  return (*((vec_uint4 *)((unsigned char *)(b)+a)));
+}
+
+static inline vec_int4 vec_lde(int a, signed int *b)
+{
+  return (*((vec_int4 *)((unsigned char *)(b)+a)));
+}
+
+
+static inline vec_float4 vec_lde(int a, float *b)
+{
+  return (*((vec_float4 *)((unsigned char *)(b)+a)));
+}
+
+/* vec_ldl (vector load indexed LRU)
+ * =======
+ */
+#define vec_ldl(_a, _b)		vec_ld(_a, _b)
+
+
+/* vec_loge (vector log2 estimate floating-point)
+ * ========
+ */
+static inline vec_float4 vec_loge(vec_float4 a)
+{
+  vec_int4 exp;
+  vec_float4 frac;
+
+  exp  = spu_add((vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)), -127);
+  frac = (vec_float4)(spu_sub((vec_int4)(a), spu_sl(exp, 23)));
+
+  return (spu_madd(spu_madd(spu_splats(-0.33985f), frac, spu_splats(2.01955f)), 
+		   frac, spu_sub(spu_convtf(exp, 0), spu_splats(1.6797f))));
+}
+
+
+/* vec_lvsl (vector load for shift left)
+ * ========
+ */
+static inline vec_uchar16 vec_lvsl(int a, unsigned char *b)
+{
+  return ((vec_uchar16)spu_add((vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))), 
+			       ((vec_ushort8){0x0001, 0x0203, 0x0405, 0x0607,
+				              0x0809, 0x0A0B, 0x0C0D, 0x0E0F})));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, signed char *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, unsigned short *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, short *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, unsigned int *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, int *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsl(int a, float *b)
+{
+  return (vec_lvsl(a, (unsigned char *)b));
+}
+
+
+/* vec_lvsr (vector load for shift right)
+ * ========
+ */
+static  inline vec_uchar16 vec_lvsr(int a, unsigned char *b)
+{
+  return ((vec_uchar16)(spu_sub(((vec_ushort8){0x1011, 0x1213, 0x1415, 0x1617,
+				               0x1819, 0x1A1B, 0x1C1D, 0x1E1F}),
+				(vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))))));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, signed char *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, unsigned short *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, short *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, unsigned int *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, int *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+static inline vec_uchar16 vec_lvsr(int a, float *b)
+{
+  return (vec_lvsr(a, (unsigned char *)b));
+}
+
+/* vec_madd (vector multiply add)
+ * ========
+ */
+#define vec_madd(_a, _b, _c)	spu_madd(_a, _b, _c)
+
+
+
+/* vec_madds (vector multiply add saturate)
+ * =========
+ */
+static inline vec_short8 vec_madds(vec_short8 a, vec_short8 b, vec_short8 c)
+{
+  return (vec_adds(c, spu_sel((vec_short8)(spu_sl(spu_mule(a, b), 1)),
+			      (vec_short8)(spu_rlmask(spu_mulo(a, b), -15)),
+			      ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF}))));
+}
+
+/* vec_max (vector maximum)
+ * =======
+ */
+static inline vec_uchar16 vec_max(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_max(vec_char16 a, vec_char16 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_max(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_sel(b, (vec_char16)(a), spu_cmpgt((vec_char16)(a), b)));
+}
+
+static inline vec_char16 vec_max(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_sel((vec_char16)(b), a, spu_cmpgt(a, (vec_char16)(b))));
+}
+
+static inline vec_ushort8 vec_max(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_max(vec_short8 a, vec_short8 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_max(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_sel(b, (vec_short8)(a), spu_cmpgt((vec_short8)(a), b)));
+}
+
+static inline vec_short8 vec_max(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_sel((vec_short8)(b), a, spu_cmpgt(a, (vec_short8)(b))));
+}
+
+static inline vec_uint4 vec_max(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_max(vec_int4 a, vec_int4 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_max(vec_bint4 a, vec_int4 b)
+{
+  return (spu_sel(b, (vec_int4)(a), spu_cmpgt((vec_int4)(a), b)));
+}
+
+static inline vec_int4 vec_max(vec_int4 a, vec_bint4 b)
+{
+  return (spu_sel((vec_int4)(b), a, spu_cmpgt(a, (vec_int4)(b))));
+}
+
+static inline vec_float4 vec_max(vec_float4 a, vec_float4 b)
+{
+  return (spu_sel(b, a, spu_cmpgt(a, b)));
+}
+
+
+/* vec_mergeh (vector merge high)
+ * ==========
+ */
+static inline vec_uchar16 vec_mergeh(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19,
+				           4, 20, 5, 21, 6, 22, 7, 23})));
+}
+
+static inline vec_char16 vec_mergeh(vec_char16 a, vec_char16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19,
+				           4, 20, 5, 21, 6, 22, 7, 23})));
+}
+
+static inline vec_ushort8 vec_mergeh(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19, 
+				           4, 5, 20, 21, 6, 7, 22, 23})));
+}
+
+static inline vec_short8 vec_mergeh(vec_short8 a, vec_short8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19, 
+				           4, 5, 20, 21, 6, 7, 22, 23})));
+}
+
+static inline vec_uint4 vec_mergeh(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, 
+				           4, 5, 6, 7, 20, 21, 22, 23})));
+}
+
+static inline vec_int4 vec_mergeh(vec_int4 a, vec_int4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, 
+				           4, 5, 6, 7, 20, 21, 22, 23})));
+}
+
+static inline vec_float4 vec_mergeh(vec_float4 a, vec_float4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19, 
+				           4, 5, 6, 7, 20, 21, 22, 23})));
+}
+
+/* vec_mergel (vector merge low)
+ * ==========
+ */
+static inline vec_uchar16 vec_mergel(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24,  9, 25, 10, 26, 11, 27, 
+				           12, 28, 13, 29, 14, 30, 15, 31})));
+}
+
+static inline vec_char16 vec_mergel(vec_char16 a, vec_char16 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24,  9, 25, 10, 26, 11, 27, 
+				           12, 28, 13, 29, 14, 30, 15, 31})));
+}
+
+static inline vec_ushort8 vec_mergel(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 24, 25, 10, 11, 26, 27, 
+				           12, 13, 28, 29, 14, 15, 30, 31})));
+}
+
+static inline vec_short8 vec_mergel(vec_short8 a, vec_short8 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 24, 25, 10, 11, 26, 27, 
+				           12, 13, 28, 29, 14, 15, 30, 31})));
+}
+
+static inline vec_uint4 vec_mergel(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 10, 11, 24, 25, 26, 27, 
+				           12, 13, 14, 15, 28, 29, 30, 31})));
+}
+
+static inline vec_int4 vec_mergel(vec_int4 a, vec_int4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 10, 11, 24, 25, 26, 27, 
+				           12, 13, 14, 15, 28, 29, 30, 31})));
+}
+
+static inline vec_float4 vec_mergel(vec_float4 a, vec_float4 b)
+{
+  return (spu_shuffle(a, b, ((vec_uchar16){ 8,  9, 10, 11, 24, 25, 26, 27, 
+				           12, 13, 14, 15, 28, 29, 30, 31})));
+}
+
+/* vec_mfvscr (vector move from vector status and control register)
+ * ==========
+ */
+static inline vec_ushort8 vec_mfvscr()
+{
+  return ((vec_ushort8)spu_splats(0)); 		/* not supported */
+}
+
+
+/* vec_min (vector minimum)
+ * =======
+ */
+static inline vec_uchar16 vec_min(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_min(vec_char16 a, vec_char16 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_char16 vec_min(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_sel((vec_char16)(a), b, spu_cmpgt((vec_char16)(a), b)));
+}
+
+static inline vec_char16 vec_min(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_sel(a, (vec_char16)(b), spu_cmpgt(a, (vec_char16)(b))));
+}
+
+static inline vec_ushort8 vec_min(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_min(vec_short8 a, vec_short8 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_short8 vec_min(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_sel((vec_short8)(a), b, spu_cmpgt((vec_short8)(a), b)));
+}
+
+static inline vec_short8 vec_min(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_sel(a, (vec_short8)(b), spu_cmpgt(a, (vec_short8)(b))));
+}
+
+static inline vec_uint4 vec_min(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_min(vec_int4 a, vec_int4 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+static inline vec_int4 vec_min(vec_bint4 a, vec_int4 b)
+{
+  return (spu_sel((vec_int4)(a), b, spu_cmpgt((vec_int4)(a), b)));
+}
+
+static inline vec_int4 vec_min(vec_int4 a, vec_bint4 b)
+{
+  return (spu_sel(a, (vec_int4)(b), spu_cmpgt(a, (vec_int4)(b))));
+}
+
+static inline vec_float4 vec_min(vec_float4 a, vec_float4 b)
+{
+  return (spu_sel(a, b, spu_cmpgt(a, b)));
+}
+
+/* vec_mladd (vector multiply low and add unsigned half word)
+ * =========
+ */
+static inline vec_short8 vec_mladd(vec_short8 a, vec_short8 b, vec_short8 c)
+{
+  return ((vec_short8)(spu_shuffle(spu_madd((vec_short8)(spu_rl((vec_uint4)(a), -16)),
+					    (vec_short8)(spu_rl((vec_uint4)(b), -16)),
+					    (vec_int4)(spu_rl((vec_uint4)(c), -16))),
+				   spu_madd(a, b, spu_extend(c)),
+				   ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+					          10, 11, 26, 27, 14, 15, 30, 31}))));
+}
+
+
+static inline vec_ushort8 vec_mladd(vec_ushort8 a, vec_ushort8 b, vec_ushort8 c)
+{
+  return ((vec_ushort8)(vec_mladd((vec_short8)(a), (vec_short8)(b), (vec_short8)(c))));
+}
+
+static inline vec_short8 vec_mladd(vec_ushort8 a, vec_short8 b, vec_short8 c)
+{
+  return (vec_mladd((vec_short8)(a), b, c));
+}
+
+static inline vec_short8 vec_mladd(vec_short8 a, vec_ushort8 b, vec_ushort8 c)
+{
+  return (vec_mladd(a, (vec_short8)(b), (vec_short8)(c)));
+}
+
+
+/* vec_mradds (vector multiply round and add saturate)
+ * ==========
+ */
+static inline vec_short8 vec_mradds(vec_short8 a, vec_short8 b, vec_short8 c)
+{
+  vec_int4 round = (vec_int4)spu_splats(0x4000);
+  vec_short8 hi, lo;
+
+  hi = (vec_short8)(spu_sl(spu_add(spu_mule(a, b), round), 1));
+  lo = (vec_short8)(spu_rlmask(spu_add(spu_mulo(a, b), round), -15));
+
+  return (vec_adds(spu_sel(hi, lo, ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF})), c));
+}
+
+
+/* vec_msum (vector multiply sum)
+ * ========
+ */
+static inline vec_uint4 vec_msum(vec_uchar16 a, vec_uchar16 b, vec_uint4 c)
+{
+  vec_ushort8 a1, a2, b1, b2;
+  vec_uint4 p1, p2;
+
+  a1 = spu_and((vec_ushort8)(a), 0xFF);
+  a2 = spu_rlmask((vec_ushort8)(a), -8);
+  b1 = spu_and((vec_ushort8)(b), 0xFF);
+  b2 = spu_rlmask((vec_ushort8)(b), -8);
+
+  p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2)));
+  p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2)));
+  return (spu_add(p2, spu_add(p1, c)));
+}
+
+static inline vec_int4 vec_msum(vec_char16 a, vec_uchar16 b, vec_int4 c)
+{
+  vec_short8 a1, a2, b1, b2;
+  vec_int4 p1, p2;
+
+  a1 = (vec_short8)(spu_extend(a));
+  a2 = spu_rlmaska((vec_short8)(a), -8);
+  b1 = (vec_short8)(spu_and((vec_ushort8)(b), 0xFF));
+  b2 = (vec_short8)spu_rlmask((vec_ushort8)(b), -8);
+
+  p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2)));
+  p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2)));
+  return (spu_add(p2, spu_add(p1, c)));
+}
+
+static inline vec_uint4 vec_msum(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
+{
+  return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
+}
+
+static inline vec_int4 vec_msum(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
+}
+
+
+/* vec_msums (vector multiply sum saturate)
+ * ========
+ */
+static inline vec_uint4 vec_msums(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
+{
+  vec_uint4 p1, p2;
+
+  p1 = spu_mulo(a, b);
+  p2 = spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2));
+
+  return (vec_adds(p2, vec_adds(p1, c)));
+}
+
+static inline vec_int4 vec_msums(vec_short8 a, vec_short8 b, vec_int4 c)
+{
+  return (vec_adds(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
+}
+
+/* vec_mtvscr (vector move to vector status and control register)
+ * ==========
+ */
+#define vec_mtvscr(_a)		/* not supported */
+
+
+/* vec_mule (vector multiply even)
+ * ========
+ */
+static inline vec_ushort8 vec_mule(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_uint4)(a), -24)), 
+			     (vec_ushort8)(spu_rlmask((vec_uint4)(b), -24)));
+  lo = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_short8)(a), -8)), 
+			     (vec_ushort8)(spu_rlmask((vec_short8)(b), -8)));
+
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_short8 vec_mule(vec_char16 a, vec_char16 b)
+{
+  vec_short8 hi, lo;
+
+  hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(a), -24)), 
+			    (vec_short8)(spu_rlmaska((vec_uint4)(b), -24)));
+  lo = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_short8)(a), -8)), 
+			    (vec_short8)(spu_rlmaska((vec_short8)(b), -8)));
+
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_uint4 vec_mule(vec_ushort8 a, vec_ushort8 b)
+{
+ return (spu_mulo((vec_ushort8)spu_rlmask((vec_uint4)(a), -16),
+		  (vec_ushort8)spu_rlmask((vec_uint4)(b), -16)));
+}
+
+
+static inline vec_int4 vec_mule(vec_short8 a, vec_short8 b)
+{
+ return (spu_mulo((vec_short8)spu_rlmaska((vec_int4)(a), -16),
+		  (vec_short8)spu_rlmaska((vec_int4)(b), -16)));
+}
+
+
+/* vec_mulo (vector multiply odd)
+ * ========
+ */
+static inline vec_ushort8 vec_mulo(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(a), -16), 0xFF)), 
+			     (vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(b), -16), 0xFF)));
+  lo = (vec_ushort8)spu_mulo(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
+
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_short8 vec_mulo(vec_char16 a, vec_char16 b)
+{
+  vec_short8 aa, bb, hi, lo;
+
+  aa = spu_extend(a);
+  bb = spu_extend(b);
+
+  hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(aa), -16)), 
+		(vec_short8)(spu_rlmaska((vec_uint4)(bb), -16)));
+  lo = (vec_short8)spu_mulo(aa, bb);
+  return (spu_shuffle(hi, lo, ((vec_uchar16){ 2,  3, 18, 19,  6,  7, 22, 23,
+				             10, 11, 26, 27, 14, 15, 30, 31})));
+}
+
+static inline vec_uint4 vec_mulo(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_mulo(a, b));
+}
+
+
+static inline vec_int4 vec_mulo(vec_short8 a, vec_short8 b)
+{
+  return (spu_mulo(a, b));
+}
+
+
+/* vec_nmsub (vector negative multiply subtract)
+ * =========
+ */
+#define vec_nmsub(_a, _b, _c)	spu_nmsub(_a, _b, _c)
+
+
+/* vec_nor (vector logical nor)
+ * =======
+ */
+#define vec_nor(_a, _b)		spu_nor(_a, _b)
+
+
+/* vec_or (vector logical or)
+ * ======
+ */
+static inline vec_uchar16 vec_or(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_char16 vec_or(vec_char16 a, vec_char16 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_char16 vec_or(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_or((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_or(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_or(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_or(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_short8 vec_or(vec_short8 a, vec_short8 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_short8 vec_or(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_or((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_or(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_or(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_or(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_int4 vec_or(vec_int4 a, vec_int4 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_int4 vec_or(vec_bint4 a, vec_int4 b)
+{
+  return (spu_or((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_or(vec_int4 a, vec_bint4 b)
+{
+  return (spu_or(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_or(vec_float4 a, vec_float4 b)
+{
+  return (spu_or(a, b));
+}
+
+static inline vec_float4 vec_or(vec_bint4 a, vec_float4 b)
+{
+  return (spu_or((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_or(vec_float4 a, vec_bint4 b)
+{
+  return (spu_or(a, (vec_float4)(b)));
+}
+
+
+/* vec_pack (vector pack)
+ * ========
+ */
+static inline vec_uchar16 vec_pack(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_uchar16)spu_shuffle(a, b, ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					                17, 19, 21, 23, 25, 27, 29, 31})));
+}
+
+static inline vec_char16 vec_pack(vec_short8 a, vec_short8 b)
+{
+  return ((vec_char16)spu_shuffle(a, b, ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					               17, 19, 21, 23, 25, 27, 29, 31})));
+}
+
+static inline vec_ushort8 vec_pack(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_ushort8)spu_shuffle(a, b, ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					                18, 19, 22, 23, 26, 27, 30, 31})));
+}
+
+static inline vec_short8 vec_pack(vec_int4 a, vec_int4 b)
+{
+  return ((vec_short8)spu_shuffle(a, b, ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					               18, 19, 22, 23, 26, 27, 30, 31})));
+}
+
+
+/* vec_packpx (vector pack pixel)
+ * ==========
+ */
+static inline vec_pixel8 vec_packpx(vec_uint4 a, vec_uint4 b)
+{
+  vec_uint4 x03FF = (vec_uint4)(spu_splats((unsigned short)0x03FF));
+  vec_uint4 x001F = (vec_uint4)(spu_splats((unsigned short)0x001F));
+
+  return ((vec_pixel8)(spu_shuffle(spu_sel(spu_sel(spu_sl(a, 7), spu_sl(a, 10), x03FF),
+					   spu_sl(a, 13), x001F),
+				   spu_sel(spu_sel(spu_sl(b, 7), spu_sl(b, 10), x03FF),
+					   spu_sl(b, 13), x001F),
+				   ((vec_uchar16){ 0,  1,  4,  5,   8,  9, 12, 13,
+					          16, 17, 20, 21, 24, 25, 28, 29}))));
+}
+
+
+/* vec_packs (vector pack saturate)
+ * =========
+ */
+static inline vec_uchar16 vec_packs(vec_ushort8 a, vec_ushort8 b)
+{
+  vec_ushort8 max = spu_splats((unsigned short)0x00FF);
+  
+  return ((vec_uchar16)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, 255)),
+				    spu_sel(b, max, spu_cmpgt(b, 255)),
+				    ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					           17, 19, 21, 23, 25, 27, 29, 31}))));
+}
+
+static inline vec_char16 vec_packs(vec_short8 a, vec_short8 b)
+{
+  vec_short8 max = spu_splats((signed short)0x007F);
+  vec_short8 min = spu_splats((signed short)0xFF80);
+  
+  return ((vec_char16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 127)), spu_cmpgt(a, -128)),
+				    spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 127)), spu_cmpgt(b, -128)),
+				   ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					          17, 19, 21, 23, 25, 27, 29, 31}))));
+}
+
+static inline vec_ushort8 vec_packs(vec_uint4 a, vec_uint4 b)
+{
+  vec_uint4 max = spu_splats((unsigned int)0x0000FFFF);
+  
+  return ((vec_ushort8)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, max)), 
+				    spu_sel(b, max, spu_cmpgt(b, max)), 
+				    ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					           18, 19, 22, 23, 26, 27, 30, 31}))));
+}  
+
+static inline vec_short8 vec_packs(vec_int4 a, vec_int4 b)
+{
+  vec_int4 max = spu_splats((signed int)0x00007FFF);
+  vec_int4 min = spu_splats((signed int)0xFFFF8000);
+  
+  return ((vec_short8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)),
+				   spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)),
+				   ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					          18, 19, 22, 23, 26, 27, 30, 31}))));
+}  
+
+
+/* vec_packsu (vector pack saturate unsigned)
+ * ==========
+ */
+static inline vec_uchar16 vec_packsu(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_uchar16)spu_shuffle(spu_or(a, (vec_ushort8)(spu_cmpgt(a, 255))),
+				   spu_or(b, (vec_ushort8)(spu_cmpgt(b, 255))),
+				   ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					          17, 19, 21, 23, 25, 27, 29, 31})));
+}
+
+static inline vec_uchar16 vec_packsu(vec_short8 a, vec_short8 b)
+{
+  vec_short8 max = spu_splats((signed short)0x00FF);
+  vec_short8 min = spu_splats((signed short)0x0000);
+  
+  return ((vec_uchar16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 255)), spu_cmpgt(a, 0)),
+				    spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 255)), spu_cmpgt(b, 0)),
+				    ((vec_uchar16){ 1,  3,  5,  7,  9, 11, 13, 15,
+					           17, 19, 21, 23, 25, 27, 29, 31}))));
+
+  return (vec_packsu((vec_ushort8)(a), (vec_ushort8)(b)));
+}
+
+static inline vec_ushort8 vec_packsu(vec_uint4 a, vec_uint4 b)
+{
+  vec_uint4 max = spu_splats((unsigned int)0xFFFF);
+
+  return ((vec_ushort8)spu_shuffle(spu_or(a, (vec_uint4)(spu_cmpgt(a, max))),
+				   spu_or(b, (vec_uint4)(spu_cmpgt(b, max))),
+				   ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					          18, 19, 22, 23, 26, 27, 30, 31})));
+}
+
+static inline vec_ushort8 vec_packsu(vec_int4 a, vec_int4 b)
+{
+  vec_int4 max = spu_splats((signed int)0x0000FFFF);
+  vec_int4 min = spu_splats((signed int)0x00000000);
+  
+  return ((vec_ushort8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)),
+				    spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)),
+				    ((vec_uchar16){ 2,  3,  6,  7, 10, 11, 14, 15,
+					           18, 19, 22, 23, 26, 27, 30, 31}))));
+}
+
+
+/* vec_perm (vector permute)
+ * ========
+ */
+static inline vec_uchar16 vec_perm(vec_uchar16 a, vec_uchar16 b, vec_uchar16 c)
+{
+  return (spu_shuffle(a, b, spu_and(c, 0x1F)));
+}
+
+static inline vec_char16 vec_perm(vec_char16 a, vec_char16 b, vec_uchar16 c)
+{
+  return ((vec_char16)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_ushort8 vec_perm(vec_ushort8 a, vec_ushort8 b, vec_uchar16 c)
+{
+  return ((vec_ushort8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_short8 vec_perm(vec_short8 a, vec_short8 b, vec_uchar16 c)
+{
+  return ((vec_short8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_uint4 vec_perm(vec_uint4 a, vec_uint4 b, vec_uchar16 c)
+{
+  return ((vec_uint4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_int4 vec_perm(vec_int4 a, vec_int4 b, vec_uchar16 c)
+{
+  return ((vec_int4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+static inline vec_float4 vec_perm(vec_float4 a, vec_float4 b, vec_uchar16 c)
+{
+  return ((vec_float4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
+}
+
+
+/* vec_re (vector reciprocal estimate)
+ * ======
+ */
+#define vec_re(_a)	spu_re(_a)
+
+
+/* vec_rl (vector rotate left)
+ * ======
+ */
+static inline vec_uchar16 vec_rl(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 r1, r2;
+
+  r1 = spu_rl(spu_and((vec_ushort8)(a), 0xFF), (vec_short8)spu_and((vec_ushort8)(b), 7));
+  r2 = spu_rl(spu_and((vec_ushort8)(a), -256), (vec_short8)spu_and(spu_rlmask((vec_ushort8)(b), -8), 7));
+  return ((vec_uchar16)(spu_sel(spu_or(r2, spu_sl(r2, 8)), spu_or(r1, spu_rlmask(r1, -8)), spu_splats((unsigned short)0xFF))));
+}
+
+static inline vec_char16 vec_rl(vec_char16 a, vec_uchar16 b)
+{
+  return ((vec_char16)(vec_rl((vec_uchar16)(a), b)));
+}
+
+static inline vec_ushort8 vec_rl(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_rl(a, (vec_short8)(b)));
+}
+
+static inline vec_short8 vec_rl(vec_short8 a, vec_ushort8 b)
+{
+  return (spu_rl(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_rl(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_rl(a, (vec_int4)(b)));
+}
+
+static inline vec_int4 vec_rl(vec_int4 a, vec_uint4 b)
+{
+  return (spu_rl(a, (vec_int4)(b)));
+}
+
+
+/* vec_round (vector round)
+ * =========
+ */
+static inline vec_float4 vec_round(vec_float4 a)
+{
+  vec_float4 s_half, s_one, d;
+  vec_uint4 odd;
+  vec_uint4 msb = spu_splats((unsigned int)0x80000000);
+  vec_float4 half = spu_splats(0.5f);
+  vec_int4 exp;
+  vec_uint4 mask;
+
+  s_half = (vec_float4)(spu_sel((vec_uint4)(half), (vec_uint4)(a), msb));
+  a = spu_add(a, s_half);
+  s_one = spu_add(s_half, s_half);
+  exp  = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+
+  odd = spu_and((vec_uint4)(spu_convts(a, 0)), 1);
+  s_one = spu_andc(s_one, (vec_float4)spu_cmpeq(mask, 0));
+  s_one = spu_and(s_one, spu_and((vec_float4)spu_cmpeq(spu_and((vec_uint4)(a), mask), 0),
+				 (vec_float4)spu_cmpeq(odd, 1)));
+  d = spu_andc(a, (vec_float4)(mask));
+  d = spu_sub(d, s_one);
+  return (d);
+}
+
+/* vec_rsqrte (vector reciprocal square root estimate)
+ * ==========
+ */
+#define vec_rsqrte(_a)	spu_rsqrte(_a)
+
+
+/* vec_sel (vector select)
+ * =======
+ */
+#define vec_sel(_a, _b, _c)	spu_sel(_a, _b, _c)
+
+
+/* vec_sl (vector shift left)
+ * ======
+ */
+static inline vec_uchar16 vec_sl(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  lo = spu_and(spu_sl((vec_ushort8)(a), spu_and((vec_ushort8)(b), 7)), 0xFF);
+  hi = spu_sl(spu_and((vec_ushort8)(a), -256), spu_and(spu_rlmask((vec_ushort8)(b), -8), 7));
+
+  return ((vec_uchar16)(spu_or(hi, lo)));
+}
+
+static inline vec_char16 vec_sl(vec_char16 a, vec_uchar16 b)
+{
+  return ((vec_char16)(vec_sl((vec_uchar16)(a), b)));
+}
+
+static inline vec_ushort8 vec_sl(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sl(a, spu_and(b, 15)));
+}
+
+static inline vec_short8 vec_sl(vec_short8 a, vec_ushort8 b)
+{
+  return (spu_sl(a, spu_and((vec_ushort8)(b), 15)));
+}
+
+static inline vec_uint4 vec_sl(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sl(a, spu_and(b, 31)));
+}
+
+static inline vec_int4 vec_sl(vec_int4 a, vec_uint4 b)
+{
+  return (spu_sl(a, spu_and(b, 31)));
+}
+
+
+/* vec_sld (vector shift left double)
+ * =======
+ */
+#define vec_sld(_a, _b, _c)	spu_shuffle(_a, _b, ((vec_uchar16){ 0+(_c),  1+(_c),  2+(_c),  3+(_c),  \
+								    4+(_c),  5+(_c),  6+(_c),  7+(_c), 	\
+								    8+(_c),  9+(_c), 10+(_c), 11+(_c), 	\
+							           12+(_c), 13+(_c), 14+(_c), 15+(_c)}))
+
+
+/* vec_sll (vector shift left long)
+ * =======
+ */
+#define vec_sll(_a, _b)		spu_slqw(_a, spu_extract((vec_uint4)(_b), 0))
+
+
+/* vec_slo (vector shift left by octet)
+ * =======
+ */
+#define vec_slo(_a, _b)		spu_slqwbytebc(_a, spu_extract((vec_uint4)(_b), 3) & 0x7F)
+
+
+/* vec_splat (vector splat)
+ * =========
+ */
+#define vec_splat(_a, _b)	spu_splats(spu_extract(_a, _b))
+
+
+/* vec_splat_s8 (vector splat signed byte)
+ * ============
+ */
+#define vec_splat_s8(_a)	spu_splats((signed char)(_a))
+
+
+/* vec_splat_s16 (vector splat signed half-word)
+ * =============
+ */
+#define vec_splat_s16(_a)	spu_splats((signed short)(_a))
+
+
+/* vec_splat_s32 (vector splat signed word)
+ * =============
+ */
+#define vec_splat_s32(_a)	spu_splats((signed int)(_a))
+
+
+/* vec_splat_u8 (vector splat unsigned byte)
+ * ============
+ */
+#define vec_splat_u8(_a)	spu_splats((unsigned char)(_a))
+
+
+/* vec_splat_u16 (vector splat unsigned half-word)
+ * =============
+ */
+#define vec_splat_u16(_a)	spu_splats((unsigned short)(_a))
+
+
+/* vec_splat_u32 (vector splat unsigned word)
+ * =============
+ */
+#define vec_splat_u32(_a)	spu_splats((unsigned int)(_a))
+
+
+/* vec_sr (vector shift right)
+ * ======
+ */
+static inline vec_uchar16 vec_sr(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 hi, lo;
+
+  lo = spu_rlmask(spu_and((vec_ushort8)(a), 0xFF), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7))));
+  hi = spu_and(spu_rlmask((vec_ushort8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256);
+
+  return ((vec_uchar16)(spu_or(hi, lo)));
+}
+
+static inline vec_char16 vec_sr(vec_char16 a, vec_uchar16 b)
+{
+  return ((vec_char16)(vec_sr((vec_uchar16)(a), b)));
+}
+
+static inline vec_ushort8 vec_sr(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_rlmask(a, spu_sub(0, (vec_short8)(spu_and(b, 15)))));
+}
+
+static inline vec_short8 vec_sr(vec_short8 a, vec_ushort8 b)
+{
+  return ((vec_short8)(vec_sr((vec_ushort8)(a), b)));
+}
+
+static inline vec_uint4 vec_sr(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_rlmask(a, spu_sub(0, (vec_int4)(spu_and(b, 31)))));
+}
+
+static inline vec_int4 vec_sr(vec_int4 a, vec_uint4 b)
+{
+  return ((vec_int4)(vec_sr((vec_uint4)(a), b)));
+}
+
+
+/* vec_sra (vector shift right algebraic)
+ * =======
+ */
+static inline vec_char16 vec_sra(vec_char16 a, vec_uchar16 b)
+{
+  vec_short8 hi, lo;
+
+  lo = spu_and(spu_rlmaska(spu_extend(a), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7)))), 0xFF);
+  hi = spu_and(spu_rlmaska((vec_short8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256);
+
+  return ((vec_char16)(spu_or(hi, lo)));
+}
+
+static inline vec_uchar16 vec_sra(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(vec_sra((vec_char16)(a), b)));
+}
+
+static inline vec_short8 vec_sra(vec_short8 a, vec_ushort8 b)
+{
+  return (spu_rlmaska(a, spu_sub(0, (vec_short8)(spu_and(b, 15)))));
+}
+
+static inline vec_ushort8 vec_sra(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((vec_ushort8)(vec_sra((vec_short8)(a), b)));
+}
+
+static inline vec_int4 vec_sra(vec_int4 a, vec_uint4 b)
+{
+  return (spu_rlmaska(a, spu_sub(0, (vec_int4)(spu_and(b, 31)))));
+}
+
+static inline vec_uint4 vec_sra(vec_uint4 a, vec_uint4 b)
+{
+  return ((vec_uint4)(vec_sra((vec_int4)(a), b)));
+}
+
+
+/* vec_srl (vector shift right long)
+ * =======
+ */
+#define vec_srl(_a, _b)		spu_rlmaskqw(_a, 0-spu_extract((vec_int4)(_b), 3))
+
+
+/* vec_sro (vector shift right by octet)
+ * =======
+ */
+#define vec_sro(_a, _b)		spu_rlmaskqwbyte(_a, 0 - ((spu_extract((vec_int4)(_b), 3) >> 3) & 0xF))
+
+/* vec_st (vector store indexed)
+ * ======
+ */
+static inline void vec_st(vec_uchar16 a, int b, unsigned char *c)
+{
+  *((vec_uchar16 *)(c+b)) = a;
+}
+
+static inline void vec_st(vec_uchar16 a, int b, vec_uchar16 *c)
+{
+  *((vec_uchar16 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_char16 a, int b, signed char *c)
+{
+  *((vec_char16 *)(c+b)) = a;
+}
+
+static inline void vec_st(vec_char16 a, int b, vec_char16 *c)
+{
+  *((vec_char16 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_bchar16 a, int b, signed char *c)
+{
+  *((vec_bchar16 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_ushort8 a, int b, unsigned short *c)
+{
+  *((vec_ushort8 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_ushort8 a, int b, vec_ushort8 *c)
+{
+  *((vec_ushort8 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_short8 a, int b, signed short *c)
+{
+  *((vec_short8 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_short8 a, int b, vec_short8 *c)
+{
+  *((vec_short8 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_bshort8 a, int b, signed short *c)
+{
+  *((vec_bshort8 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_uint4 a, int b, unsigned int *c)
+{
+  *((vec_uint4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_uint4 a, int b, vec_uint4 *c)
+{
+  *((vec_uint4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_int4 a, int b, signed int *c)
+{
+  *((vec_int4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_int4 a, int b, vec_int4 *c)
+{
+  *((vec_int4 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_bint4 a, int b, signed int *c)
+{
+  *((vec_bint4 *)((signed char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_float4 a, int b, float *c)
+{
+  *((vec_float4 *)((unsigned char *)(c)+b)) = a;
+}
+
+static inline void vec_st(vec_float4 a, int b, vec_float4 *c)
+{
+  *((vec_float4 *)((unsigned char *)(c)+b)) = a;
+}
+
+
+/* vec_ste (vector store element indexed)
+ * =======
+ */
+static inline void vec_ste(vec_uchar16 a, int b, unsigned char *c)
+{
+  unsigned char *ptr;
+
+  ptr = c + b;
+  *ptr = spu_extract(a, (int)(ptr) & 15);
+}
+
+static inline void vec_ste(vec_char16 a, int b, signed char *c)
+{
+  vec_ste((vec_uchar16)(a), b, (unsigned char *)(c));
+}
+
+static inline void vec_ste(vec_bchar16 a, int b, signed char *c)
+{
+  vec_ste((vec_uchar16)(a), b, (unsigned char *)(c));
+}
+
+static inline void vec_ste(vec_ushort8 a, int b, unsigned short *c)
+{
+  unsigned short *ptr;
+
+  ptr = (unsigned short *)(((unsigned int)(c) + b) & ~1);
+  *ptr = spu_extract(a, ((int)(ptr) >> 1) & 7);
+}
+
+static inline void vec_ste(vec_short8 a, int b, signed short *c)
+{
+  vec_ste((vec_ushort8)(a), b, (unsigned short *)(c));
+}
+
+static inline void vec_ste(vec_bshort8 a, int b, signed short *c)
+{
+  vec_ste((vec_ushort8)(a), b, (unsigned short *)(c));
+}
+
+static inline void vec_ste(vec_uint4 a, int b, unsigned int *c)
+{
+  unsigned int *ptr;
+
+  ptr = (unsigned int *)(((unsigned int)(c) + b) & ~3);
+  *ptr = spu_extract(a, ((int)(ptr) >> 2) & 3);
+}
+
+static inline void vec_ste(vec_int4 a, int b, signed int *c)
+{
+  vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
+}
+
+static inline void vec_ste(vec_bint4 a, int b, signed int *c)
+{
+  vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
+}
+
+static inline void vec_ste(vec_float4 a, int b, float *c)
+{
+  vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
+}
+
+
+/* vec_stl (vector store indexed LRU)
+ * =======
+ */
+#define vec_stl(_a, _b, _c)		vec_st(_a, _b, _c)
+
+
+/* vec_sub (vector subtract)
+ * =======
+ */
+static inline vec_uchar16 vec_sub(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((vec_uchar16)(spu_sel(spu_sub((vec_ushort8)(a), (vec_ushort8)(b)),
+				spu_sub(spu_and((vec_ushort8)(a), -256), spu_and((vec_ushort8)(b), -256)),
+				spu_splats((unsigned short)0xFF00))));
+}
+
+static inline vec_char16 vec_sub(vec_char16 a, vec_char16 b)
+{
+  return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static inline vec_char16 vec_sub(vec_bchar16 a, vec_char16 b)
+{
+  return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static inline vec_char16 vec_sub(vec_char16 a, vec_bchar16 b)
+{
+  return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
+}
+
+static inline vec_ushort8 vec_sub(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_short8 vec_sub(vec_short8 a, vec_short8 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_short8 vec_sub(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_sub((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_sub(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_sub(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_sub(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_int4 vec_sub(vec_int4 a, vec_int4 b)
+{
+  return (spu_sub(a, b));
+}
+
+static inline vec_int4 vec_sub(vec_bint4 a, vec_int4 b)
+{
+  return (spu_sub((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_sub(vec_int4 a, vec_bint4 b)
+{
+  return (spu_sub(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_sub(vec_float4 a, vec_float4 b)
+{
+  return (spu_sub(a, b));
+}
+
+
+/* vec_subc (vector subtract carryout)
+ * ========
+ */
+#define vec_subc(_a, _b)	spu_genb(_a, _b)
+
+
+/* vec_subs (vector subtract saturate)
+ * ========
+ */
+static inline vec_uchar16 vec_subs(vec_uchar16 a, vec_uchar16 b)
+{
+  vec_ushort8 s1, s2;
+  vec_uchar16 s, d;
+
+  s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8));
+  s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
+  s  = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){0, 16,  2, 18,  4, 20,  6, 22,
+					                8, 24, 10, 26, 12, 28, 14, 30})));
+  d  = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+					                9, 25, 11, 27, 13, 29, 15, 31})));
+  return (spu_andc(d, s));
+}
+
+static inline vec_char16 vec_subs(vec_char16 a, vec_char16 b)
+{
+  vec_ushort8 s1, s2;
+  vec_uchar16 s, d;
+
+  s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8));
+  s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
+  s  = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17,  3, 19,  5, 21,  7, 23,
+					                9, 25, 11, 27, 13, 29, 15, 31})));
+  d  = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_nor((vec_uchar16)(a), spu_nand(s, (vec_uchar16)(b))), 0x7F));
+  d  = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_and((vec_uchar16)(a), spu_nor(s, (vec_uchar16)(b))), 0x7F));
+  
+  return ((vec_char16)(d));
+}
+
+static inline vec_char16 vec_subs(vec_bchar16 a, vec_char16 b)
+{
+  return (vec_subs((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_subs(vec_char16 a, vec_bchar16 b)
+{
+  return (vec_subs(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_subs(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a)));
+}
+
+static inline vec_short8 vec_subs(vec_short8 a, vec_short8 b)
+{
+  vec_short8 s;
+  vec_short8 d;
+  
+  s = spu_sub(a, b);
+  d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -15)));
+  d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -15)));
+
+  return (d);
+}
+
+static inline vec_short8 vec_subs(vec_bshort8 a, vec_short8 b)
+{
+  return ((vec_short8)(vec_subs((vec_short8)(a), b)));
+}
+
+static inline vec_short8 vec_subs(vec_short8 a, vec_bshort8 b)
+{
+  return ((vec_short8)(vec_subs(a, (vec_short8)(b))));
+}
+
+static inline vec_uint4 vec_subs(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a)));
+}
+
+static inline vec_int4 vec_subs(vec_int4 a, vec_int4 b)
+{
+  vec_int4 s;
+  vec_int4 d;
+  
+  s = spu_sub(a, b);
+  d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -31)));
+  d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -31)));
+
+  return (d);
+}
+
+static inline vec_int4 vec_subs(vec_bint4 a, vec_int4 b)
+{
+  return ((vec_int4)(vec_subs((vec_int4)(a), b)));
+}
+
+static inline vec_int4 vec_subs(vec_int4 a, vec_bint4 b)
+{
+  return ((vec_int4)(vec_subs(a, (vec_int4)(b))));
+}
+
+
+/* vec_sum4s (vector sum across partial (1/4) saturated)
+ * =========
+ */
+static inline vec_uint4 vec_sum4s(vec_uchar16 a, vec_uint4 b)
+{
+  vec_uint4 a01_23, a0123;
+
+  a01_23 = (vec_uint4)(spu_add(spu_rlmask((vec_ushort8)(a), -8),
+			       spu_and((vec_ushort8)(a), 0xFF)));
+  a0123 = spu_add(spu_rlmask(a01_23, -16), spu_and(a01_23, 0x1FF));
+  return (vec_adds(a0123, b));
+}
+
+static inline vec_int4 vec_sum4s(vec_char16 a, vec_int4 b)
+{
+  vec_int4 a01_23, a0123;
+
+  a01_23 = (vec_int4)(spu_add(spu_rlmaska((vec_short8)(a), -8),
+			      spu_extend(a)));
+  a0123 = spu_add(spu_rlmaska(a01_23, -16), spu_extend((vec_short8)(a01_23)));
+  return (vec_adds(a0123, b));
+}
+
+static inline vec_int4 vec_sum4s(vec_short8 a, vec_int4 b)
+{
+  vec_int4 a0123;
+
+  a0123 = spu_add(spu_rlmaska((vec_int4)(a), -16), spu_extend(a));
+  return (vec_adds(a0123, b));
+}
+
+
+/* vec_sum2s (vector sum across partial (1/2) saturated)
+ * =========
+ */
+static inline vec_int4 vec_sum2s(vec_int4 a, vec_int4 b)
+{
+  vec_int4 c, d;
+  vec_int4 sign1, sign2, sign3;
+  vec_int4 carry, sum_l, sum_h, sat, sat_val;
+
+  sign1 = spu_rlmaska(a, -31);
+  sign2 = spu_rlmaska(b, -31);
+
+  c = spu_rlqwbyte(a, -4);
+  sign3 = spu_rlqwbyte(sign1, -4);
+  
+  carry = spu_genc(a, b);
+  sum_l = spu_add(a, b);
+  sum_h = spu_addx(sign1, sign2, carry);
+
+  carry = spu_genc(sum_l, c);
+  sum_l = spu_add(sum_l, c);
+  sum_h = spu_addx(sum_h, sign3, carry);
+  
+  sign1 = spu_rlmaska(sum_l, -31);
+  sign2 = spu_rlmaska(sum_h, -31);
+
+  sat_val = spu_xor(sign2, spu_splats((signed int)0x7FFFFFFF));
+
+  sat = spu_orc(spu_xor(sign1, sign2), (vec_int4)spu_cmpeq(sum_h, sign2));
+
+  d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), (vec_int4){0, -1, 0, -1});
+
+  return (d);
+}
+
+
+/* vec_sums (vector sum saturated)
+ * ========
+ */
+static inline vec_int4 vec_sums(vec_int4 a, vec_int4 b)
+{
+  vec_int4 a0, a1, a2, c0, c1, c2, d;
+  vec_int4 sign_a, sign_b, sign_l, sign_h;
+  vec_int4 sum_l, sum_h, sat, sat_val;
+
+  sign_a = spu_rlmaska(a, -31);
+  sign_b = spu_rlmaska(b, -31);
+
+  a0 = spu_rlqwbyte(a, -12);
+  a1 = spu_rlqwbyte(a, -8);
+  a2 = spu_rlqwbyte(a, -4);
+
+  sum_l = spu_add(a, b);
+  sum_h = spu_addx(sign_a, sign_b, spu_genc(a, b));
+  
+  c2 = spu_genc(sum_l, a2);
+  sum_l = spu_add(sum_l, a2);
+  sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -4), c2);
+
+  c1 = spu_genc(sum_l, a1);
+  sum_l = spu_add(sum_l, a1);
+  sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -8), c1);
+
+  c0 = spu_genc(sum_l, a0);
+  sum_l = spu_add(sum_l, a0);
+  sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -12), c0);
+
+  sign_l = spu_rlmaska(sum_l, -31);
+  sign_h = spu_rlmaska(sum_h, -31);
+
+  sat_val = spu_xor(sign_h, spu_splats((signed int)0x7FFFFFFF));
+
+  sat = spu_orc(spu_xor(sign_l, sign_h), (vec_int4)spu_cmpeq(sum_h, sign_h));
+
+  d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), ((vec_int4){0, 0, 0, -1}));
+
+  return (d);
+}
+
+
+/* vec_trunc (vector truncate) 
+ * =========
+ */
+static inline vec_float4 vec_trunc(vec_float4 a)
+{
+  vec_int4 exp;
+  vec_uint4 mask;
+
+  exp  = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
+  mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
+  mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
+  mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
+  return (spu_andc(a, (vec_float4)(mask)));
+}
+
+/* vec_unpackh (vector unpack high element) 
+ * ===========
+ */
+static inline vec_short8 vec_unpackh(vec_char16 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 1, 1, 2, 2, 3, 3, 
+					              4, 4, 5, 5, 6, 6, 7, 7}))));
+}
+
+static inline vec_bshort8 vec_unpackh(vec_bchar16 a)
+{
+  return ((vec_bshort8)(vec_unpackh((vec_char16)(a))));
+}
+
+static inline vec_int4 vec_unpackh(vec_short8 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 0, 1, 0, 0, 2, 3, 
+					              0, 0, 4, 5, 0, 0, 6, 7}))));
+}
+
+#ifdef SUPPORT_UNPACK_PIXEL
+/* Due to type conflicts, unpacking of pixel types and boolean shorts
+ * can not simultaneously be supported. By default, the boolean short is
+ * supported.
+ */
+static inline vec_uint4 vec_unpackh(vec_pixel8 a)
+{
+  vec_ushort8 p1, p2;
+
+  p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a.p), -7)),
+		   spu_and((vec_ushort8)(a.p), 0x1F),
+		   ((vec_uchar16){ 0, 128, 128, 17,  2, 128, 128, 19,
+			           4, 128, 128, 21,  6, 128, 128, 23}));
+  p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a.p), -5), 0x1F),
+		   spu_and(spu_rlmask((vec_ushort8)(a.p), -10), 0x1F),
+		   ((vec_uchar16){ 128,  17, 1, 128, 128,  19, 3, 128,
+			           128,  21, 5, 128, 128,  23, 7, 128}));
+  return ((vec_uint4)(spu_or(p1, p2)));
+}
+
+#else
+
+static inline vec_bint4 vec_unpackh(vec_bshort8 a)
+{
+  return ((vec_bint4)(vec_unpackh((vec_short8)(a))));
+}
+#endif
+
+
+
+
+
+/* vec_unpackl (vector unpack low element) 
+ * ===========
+ */
+static inline vec_short8 vec_unpackl(vec_char16 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){8, 8, 9, 9, 10, 10, 11, 11,
+					              12, 12, 13, 13, 14, 14, 15, 15}))));
+}
+
+static inline vec_bshort8 vec_unpackl(vec_bchar16 a)
+{
+  return ((vec_bshort8)(vec_unpackl((vec_char16)(a))));
+}
+
+
+static inline vec_int4 vec_unpackl(vec_short8 a)
+{
+  return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 8, 9, 0, 0, 10, 11, 
+					              0, 0,12,13, 0, 0, 14, 15}))));
+}
+
+
+#ifdef SUPPORT_UNPACK_PIXEL
+/* Due to type conflicts, unpacking of pixel types and boolean shorts
+ * can not simultaneously be supported. By default, the boolean short is
+ * supported.
+ */
+static inline vec_uint4 vec_unpackl(vec_pixel8 a)
+{
+  vec_ushort8 p1, p2;
+
+  p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a), -7)),
+		   spu_and((vec_ushort8)(a), 0x1F),
+		   ((vec_uchar16){ 8, 128, 128, 25,  10, 128, 128, 27,
+			          12, 128, 128, 29,  14, 128, 128, 31}));
+  p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a), -5), 0x1F),
+		   spu_and(spu_rlmask((vec_ushort8)(a), -10), 0x1F),
+		   ((vec_uchar16){ 128, 25,  9, 128, 128, 27, 11, 128,
+			           128, 29, 13, 128, 128, 31, 15, 128}));
+  return ((vec_uint4)(spu_or(p1, p2)));
+}
+
+#else
+
+static inline vec_bint4 vec_unpackl(vec_bshort8 a)
+{
+  return ((vec_bint4)(vec_unpackl((vec_short8)(a))));
+
+}
+#endif
+
+
+
+/* vec_xor (vector logical xor)
+ * ======
+ */
+static inline vec_uchar16 vec_xor(vec_uchar16 a, vec_uchar16 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_char16 vec_xor(vec_char16 a, vec_char16 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_char16 vec_xor(vec_bchar16 a, vec_char16 b)
+{
+  return (spu_xor((vec_char16)(a), b));
+}
+
+static inline vec_char16 vec_xor(vec_char16 a, vec_bchar16 b)
+{
+  return (spu_xor(a, (vec_char16)(b)));
+}
+
+static inline vec_ushort8 vec_xor(vec_ushort8 a, vec_ushort8 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_short8 vec_xor(vec_short8 a, vec_short8 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_short8 vec_xor(vec_bshort8 a, vec_short8 b)
+{
+  return (spu_xor((vec_short8)(a), b));
+}
+
+static inline vec_short8 vec_xor(vec_short8 a, vec_bshort8 b)
+{
+  return (spu_xor(a, (vec_short8)(b)));
+}
+
+static inline vec_uint4 vec_xor(vec_uint4 a, vec_uint4 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_int4 vec_xor(vec_int4 a, vec_int4 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_int4 vec_xor(vec_bint4 a, vec_int4 b)
+{
+  return (spu_xor((vec_int4)(a), b));
+}
+
+static inline vec_int4 vec_xor(vec_int4 a, vec_bint4 b)
+{
+  return (spu_xor(a, (vec_int4)(b)));
+}
+
+static inline vec_float4 vec_xor(vec_float4 a, vec_float4 b)
+{
+  return (spu_xor(a, b));
+}
+
+static inline vec_float4 vec_xor(vec_bint4 a, vec_float4 b)
+{
+  return (spu_xor((vec_float4)(a),b));
+}
+
+static inline vec_float4 vec_xor(vec_float4 a, vec_bint4 b)
+{
+  return (spu_xor(a, (vec_float4)(b)));
+}
+
+/************************************************************************
+ *                        PREDICATES
+ ************************************************************************/
+
+/* vec_all_eq (all elements equal)
+ * ==========
+ */
+static inline int vec_all_eq(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0xFFFF));
+}
+
+static inline int vec_all_eq(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0xFF));
+}
+
+static inline int vec_all_eq(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0xF));
+}
+
+static inline int vec_all_eq(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
+}
+
+
+/* vec_all_ge (all elements greater than or equal)
+ * ==========
+ */
+static inline int vec_all_ge(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline  int vec_all_ge(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0));
+}
+
+static inline int vec_all_ge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+
+/* vec_all_gt (all elements greater than)
+ * ==========
+ */
+static inline int vec_all_gt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0xFFFF));
+}
+
+static inline int vec_all_gt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0xFF));
+}
+
+static inline int vec_all_gt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0xF));
+}
+
+static inline int vec_all_gt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+
+/* vec_all_in (all elements in bounds)
+ * ==========
+ */
+static inline int vec_all_in(vec_float4 a, vec_float4 b)
+{
+  return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) == 0xF);
+}
+
+
+/* vec_all_le (all elements less than or equal)
+ * ==========
+ */
+static inline int vec_all_le(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0));
+}
+
+static inline int vec_all_le(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0));
+}
+
+static inline int vec_all_le(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_le(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0));
+}
+
+static inline int vec_all_le(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+
+/* vec_all_lt (all elements less than)
+ * ==========
+ */
+static inline int vec_all_lt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0xFFFF));
+}
+
+static inline int vec_all_lt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0xFF));
+}
+
+static inline int vec_all_lt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0xF));
+}
+
+static inline int vec_all_lt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+
+/* vec_all_nan (all elements not a number)
+ * ===========
+ */
+static inline int vec_all_nan(vec_float4 a)
+{
+  vec_uint4 exp, man;
+  vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000);
+
+  exp = spu_and((vec_uint4)(a), exp_mask);
+  man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF));
+  return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask), 
+						spu_cmpeq(man, 0))), 0) == 0xF));
+}
+
+#define vec_all_nan(_a)		(0)
+
+
+/* vec_all_ne (all elements not equal)
+ * ==========
+ */
+static inline int vec_all_ne(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0));
+}
+
+static inline int vec_all_ne(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
+}
+
+
+/* vec_all_nge (all elements not greater than or equal)
+ * ===========
+ */
+static inline int vec_all_nge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
+}
+
+
+/* vec_all_ngt (all elements not greater than)
+ * ===========
+ */
+static inline int vec_all_ngt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
+}
+
+
+/* vec_all_nle (all elements not less than or equal)
+ * ===========
+ */
+static inline int vec_all_nle(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
+}
+
+
+/* vec_all_nlt (all elements not less than)
+ * ===========
+ */
+static inline int vec_all_nlt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
+}
+
+
+/* vec_all_numeric (all elements numeric)
+ * ===========
+ */
+static inline int vec_all_numeric(vec_float4 a)
+{
+  vec_uint4 exp;
+
+  exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF);
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) == 0));
+}
+
+
+
+/* vec_any_eq (any elements equal)
+ * ==========
+ */
+static inline int vec_any_eq(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0));
+}
+
+static inline int vec_any_eq(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq((vec_int4)(a), b), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, (vec_int4)(b)), -31)), 0)));
+}
+
+static inline int vec_any_eq(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
+}
+
+/* vec_any_ge (any elements greater than or equal)
+ * ==========
+ */
+static inline int vec_any_ge(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ge(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0xFF));
+}
+
+static inline int vec_any_ge(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) != 0xF));
+}
+
+static inline int vec_any_ge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+
+/* vec_any_gt (any elements greater than)
+ * ==========
+ */
+static inline int vec_any_gt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0));
+}
+
+static inline int vec_any_gt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0));
+}
+
+
+static inline int vec_any_gt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(a), b), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, (vec_int4)(b)), -31)), 0)));
+}
+
+static inline int vec_any_gt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
+}
+
+/* vec_any_le (any elements less than or equal)
+ * ==========
+ */
+static inline int vec_any_le(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0xFFFF));
+}
+
+static inline int vec_any_le(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0xFF));
+}
+
+static inline int vec_any_le(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) != 0xF));
+}
+
+static inline int vec_any_le(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+
+/* vec_any_lt (any elements less than)
+ * ==========
+ */
+static inline int vec_any_lt(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0));
+}
+
+static inline int vec_any_lt(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, (vec_int4)(a)), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(b), a), -31)), 0)));
+}
+
+static inline int vec_any_lt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+/* vec_any_nan (any elements not a number)
+ * ===========
+ */
+static inline int vec_any_nan(vec_float4 a)
+{
+  vec_uint4 exp, man;
+  vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000);
+
+  exp = spu_and((vec_uint4)(a), exp_mask);
+  man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF));
+  return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask), 
+						spu_cmpeq(man, 0))), 0) != 0));
+}
+
+
+/* vec_any_ne (any elements not equal)
+ * ==========
+ */
+static inline int vec_any_ne(vec_uchar16 a, vec_uchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_char16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_bchar16 a, vec_char16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_char16 a, vec_bchar16 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0xFFFF));
+}
+
+static inline int vec_any_ne(vec_ushort8 a, vec_ushort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_short8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_bshort8 a, vec_short8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_short8 a, vec_bshort8 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0xFF));
+}
+
+static inline int vec_any_ne(vec_uint4 a, vec_uint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_int4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_bint4 a, vec_int4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_int4 a, vec_bint4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) != 0xF));
+}
+
+static inline int vec_any_ne(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
+}
+
+
+/* vec_any_nge (any elements not greater than or equal)
+ * ===========
+ */
+static inline int vec_any_nge(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
+}
+
+/* vec_any_ngt (any elements not greater than)
+ * ===========
+ */
+static inline int vec_any_ngt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
+}
+
+
+/* vec_any_nle (any elements not less than or equal)
+ * ===========
+ */
+static inline int vec_any_nle(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
+}
+
+
+/* vec_any_nlt (any elements not less than)
+ * ===========
+ */
+static inline int vec_any_nlt(vec_float4 a, vec_float4 b)
+{
+  return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
+}
+
+
+/* vec_any_numeric (any elements numeric)
+ * ===============
+ */
+static inline int vec_any_numeric(vec_float4 a)
+{
+  vec_uint4 exp;
+
+  exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF);
+  return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) != 0xF));
+}
+
+
+/* vec_any_out (any elements out of bounds)
+ * ===========
+ */
+static inline int vec_any_out(vec_float4 a, vec_float4 b)
+{
+  return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) != 0xF);
+}
+
+
+/* CBE Language Extension Intrinsics
+ */
+
+/* vec_extract (extract element from vector)
+ * ===========
+ */
+#define vec_extract(_a, _element)	spu_extract(_a, _element)
+
+
+/* vec_insert (insert scalar into specified vector element)
+ * ==========
+ */
+#define vec_insert(_a, _b, _element)	spu_insert(_a, _b, _element)
+
+/* vec_lvlx (load vector left indexed)
+ * ========
+ */
+static inline vec_uchar16 vec_lvlx(int a, unsigned char *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_uchar16 vec_lvlx(int a, vec_uchar16 *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_char16 vec_lvlx(int a, signed char *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_char16 vec_lvlx(int a, vec_char16 *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_ushort8 vec_lvlx(int a, unsigned short *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_ushort8 vec_lvlx(int a, vec_ushort8 *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_short8 vec_lvlx(int a, signed short *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_short8 vec_lvlx(int a, vec_short8 *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_uint4 vec_lvlx(int a, unsigned int *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_uint4 vec_lvlx(int a, vec_uint4 *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_int4 vec_lvlx(int a, signed int *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_int4 vec_lvlx(int a, vec_int4 *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_float4 vec_lvlx(int a, float *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+static inline vec_float4 vec_lvlx(int a, vec_float4 *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
+}
+
+
+/* vec_lvlxl (load vector left indexed last)
+ * =========
+ */
+#define vec_lvlxl(_a, _b)	vec_lvlx(_a, _b)
+
+
+/* vec_lvrx (load vector right indexed)
+ * ========
+ */
+static inline vec_uchar16 vec_lvrx(int a, unsigned char *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_uchar16 vec_lvrx(int a, vec_uchar16 *b)
+{
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_char16 vec_lvrx(int a, signed char *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_char16 vec_lvrx(int a, vec_char16 *b)
+{
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_ushort8 vec_lvrx(int a, unsigned short *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_ushort8 vec_lvrx(int a, vec_ushort8 *b)
+{
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_short8 vec_lvrx(int a, signed short *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_short8 vec_lvrx(int a, vec_short8 *b)
+{
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_uint4 vec_lvrx(int a, unsigned int *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_uint4 vec_lvrx(int a, vec_uint4 *b)
+{
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_int4 vec_lvrx(int a, signed int *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_int4 vec_lvrx(int a, vec_int4 *b)
+{
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_float4 vec_lvrx(int a, float *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+static inline vec_float4 vec_lvrx(int a, vec_float4 *b)
+{
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
+  return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
+}
+
+
+
+/* vec_lvrxl (load vector right indexed last)
+ * =========
+ */
+#define vec_lvrxl(_a, _b)	vec_lvrx(_a, _b)
+
+
+/* vec_promote (promote scalar to a vector)
+ * ===========
+ */
+#define vec_promote(_a, _element)	spu_promote(_a, _element)
+
+
+/* vec_splats (splat scalar to a vector)
+ * ==========
+ */
+#define vec_splats(_a)	spu_splats(_a)
+
+
+/* vec_stvlx (store vector left indexed)
+ * =========
+ */
+static inline void vec_stvlx(vec_uchar16 a, int b, unsigned char *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_uchar16 a, int b, vec_uchar16 *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_char16 a, int b, signed char *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_char16 a, int b, vec_char16 *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvlx(vec_ushort8 a, int b, unsigned short *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_ushort8 a, int b, vec_ushort8 *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_short8 a, int b, signed short *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_short8 a, int b, vec_short8 *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_uint4 a, int b, unsigned int *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_uint4 a, int b, vec_uint4 *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_int4 a, int b, signed int *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_int4 a, int b, vec_int4 *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_float4 a, int b, float *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvlx(vec_float4 a, int b, vec_float4 *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = -((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_rlmaskqwbyte(a, shift),
+	       spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+/* vec_stvlxl (store vector left indexed last)
+ * ==========
+ */
+#define vec_stvlxl(_a, _b, _c)	vec_stvlx(_a, _b, _c)
+
+
+/* vec_stvrx (store vector right indexed)
+ * =========
+ */
+static inline void vec_stvrx(vec_uchar16 a, int b, unsigned char *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_uchar16 a, int b, vec_uchar16 *c)
+{
+  int shift;
+  vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_char16 a, int b, signed char *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_char16 a, int b, vec_char16 *c)
+{
+  int shift;
+  vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
+}
+
+static inline void vec_stvrx(vec_ushort8 a, int b, unsigned short *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_ushort8 a, int b, vec_ushort8 *c)
+{
+  int shift;
+  vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_short8 a, int b, signed short *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_short8 a, int b, vec_short8 *c)
+{
+  int shift;
+  vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_uint4 a, int b, unsigned int *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_uint4 a, int b, vec_uint4 *c)
+{
+  int shift;
+  vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_int4 a, int b, signed int *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_int4 a, int b, vec_int4 *c)
+{
+  int shift;
+  vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_float4 a, int b, float *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+static inline void vec_stvrx(vec_float4 a, int b, vec_float4 *c)
+{
+  int shift;
+  vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
+
+  shift = 16-((int)p & 0xF);
+  *p = spu_sel(*p,
+	       spu_slqwbyte(a, shift),
+	       spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
+}
+
+/* vec_stvrxl (store vector right indexed last)
+ * ==========
+ */
+#define vec_stvrxl(_a, _b, _c)	vec_stvrx(_a, _b, _c)
+
+
+#endif /* __SPU__ */
+#endif /* __cplusplus */
+#endif /* !_VMX2SPU_H_ */
diff --git a/gcc/config/stormy16/constraints.md b/gcc/config/stormy16/constraints.md
new file mode 100644
index 000000000..5ad17bdbf
--- /dev/null
+++ b/gcc/config/stormy16/constraints.md
@@ -0,0 +1,119 @@
+;; Constraint definitions for XSTORMY16.
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+(define_register_constraint "a" "R0_REGS"
+  "@internal")
+
+(define_register_constraint "b" "R1_REGS"
+  "@internal")
+
+(define_register_constraint "c" "R2_REGS"
+  "@internal")
+
+(define_register_constraint "d" "R8_REGS"
+  "@internal")
+
+(define_register_constraint "e" "EIGHT_REGS"
+  "@internal")
+
+(define_register_constraint "t" "TWO_REGS"
+  "@internal")
+
+(define_register_constraint "z" "ICALL_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "An integer between 0 and 3."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "J"
+  "A power of two."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (ival) != -1")))
+
+(define_constraint "K"
+  "A power of two when inverted."
+  (and (match_code "const_int")
+       (match_test "exact_log2 (~ival) != -1")))
+
+(define_constraint "L"
+  "An 8-bit unsigned integer."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "M"
+  "An integer between -255 and 0."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -255, 0)")))
+
+(define_constraint "N"
+  "An integer between -3 and 0."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -3, 0)")))
+
+(define_constraint "O"
+  "An integer between 1 and 4."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 4)")))
+
+(define_constraint "P"
+  "An integer between -4 and -1."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -4, -1)")))
+
+;; Extra constraints.
+(define_constraint "Q"
+  "A register push operation."
+  (and (match_code "mem")
+       (match_code "post_inc" "0")
+       (match_test "XEXP (XEXP (op, 0), 0) == stack_pointer_rtx")))
+
+(define_constraint "R"
+  "A register pop operation."
+  (and (match_code "mem")
+       (match_code "pre_dec" "0")
+       (match_test "XEXP (XEXP (op, 0), 0) == stack_pointer_rtx")))
+
+(define_constraint "S"
+  "An immediate memory address."
+  (and (match_code "mem")
+       (match_code "const_int" "0")
+       (match_test "xstormy16_legitimate_address_p (VOIDmode, XEXP (op, 0), false)")))
+
+(define_constraint "T"
+  "@internal"
+  ;; For Rx; not implemented yet.
+  (match_test "0"))
+
+(define_constraint "U"
+  "An integer not between 2 and 15."
+  (and (match_code "const_int")
+       (match_test "!IN_RANGE (ival, 2, 15)")))
+
+(define_constraint "W"
+  "@internal"
+  (match_operand 0 "xstormy16_below100_operand"))
+
+(define_constraint "Z"
+  "Zero."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+\ No newline at end of file
diff --git a/gcc/config/stormy16/predicates.md b/gcc/config/stormy16/predicates.md
new file mode 100644
index 000000000..7270c7fe7
--- /dev/null
+++ b/gcc/config/stormy16/predicates.md
@@ -0,0 +1,178 @@
+;; Predicate definitions for XSTORMY16.
+;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is a shift operator.
+
+(define_predicate "shift_operator"
+  (match_code "ashift,ashiftrt,lshiftrt")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  return (code == ASHIFT
+	  || code == ASHIFTRT
+	  || code == LSHIFTRT);
+})
+
+;; Return 1 if this is an EQ or NE operator.
+
+(define_predicate "equality_operator"
+  (match_code "eq,ne")
+{
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+	  && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
+})
+
+;; Return 1 if this is a comparison operator but not an EQ or NE
+;; operator.
+
+(define_predicate "inequality_operator"
+  (match_code "ge,gt,le,lt,geu,gtu,leu,ltu")
+{
+  return comparison_operator (op, mode) && ! equality_operator (op, mode);
+})
+
+;; Return 1 if this is a LT, GE, LTU, or GEU operator.
+
+(define_predicate "xstormy16_ineqsi_operator"
+  (match_code "lt,ge,ltu,geu")
+{
+  enum rtx_code code = GET_CODE (op);
+  
+  return ((mode == VOIDmode || GET_MODE (op) == mode)
+	  && (code == LT || code == GE || code == LTU || code == GEU));
+})
+
+;; Predicate for MEMs that can use special 8-bit addressing.
+
+(define_predicate "xstormy16_below100_operand"
+  (match_code "mem")
+{
+  if (GET_MODE (op) != mode)
+    return 0;
+  if (GET_CODE (op) == MEM)
+    op = XEXP (op, 0);
+  else if (GET_CODE (op) == SUBREG
+	   && GET_CODE (XEXP (op, 0)) == MEM
+	   && !MEM_VOLATILE_P (XEXP (op, 0)))
+    op = XEXP (XEXP (op, 0), 0);
+  else
+    return 0;
+  if (GET_CODE (op) == CONST_INT)
+    {
+      HOST_WIDE_INT i = INTVAL (op);
+      return (i >= 0x7f00 && i < 0x7fff);
+    }
+  return xstormy16_below100_symbol (op, HImode);
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "xstormy16_below100_or_register"
+  (match_code "mem,reg,subreg")
+{
+  return (xstormy16_below100_operand (op, mode)
+	  || register_operand (op, mode));
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "xstormy16_splittable_below100_or_register"
+  (match_code "mem,reg,subreg")
+{
+  if (GET_CODE (op) == MEM && MEM_VOLATILE_P (op))
+    return 0;
+  return (xstormy16_below100_operand (op, mode)
+	  || register_operand (op, mode));
+})
+
+;; Predicate for constants with exactly one bit not set.
+
+(define_predicate "xstormy16_onebit_clr_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  i = ~ INTVAL (op);
+  if (mode == QImode)
+    i &= 0xff;
+  if (mode == HImode)
+    i &= 0xffff;
+  return exact_log2 (i) != -1;
+})
+
+;; Predicate for constants with exactly one bit set.
+
+(define_predicate "xstormy16_onebit_set_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+  i = INTVAL (op);
+  if (mode == QImode)
+    i &= 0xff;
+  if (mode == HImode)
+    i &= 0xffff;
+  return exact_log2 (i) != -1;
+})
+
+;; TODO: Add a comment here.
+
+(define_predicate "nonimmediate_nonstack_operand"
+  (match_code "reg,mem,subreg")
+{
+  /* 'Q' is for pushes, 'R' for pops.  */
+  return (nonimmediate_operand (op, mode) 
+	  && ! satisfies_constraint_Q (op)
+	  && ! satisfies_constraint_R (op));
+})
+
+(define_predicate "xstormy16_carry_plus_operand"
+  (match_code "plus")
+{
+  return (GET_CODE (XEXP (op, 1)) == CONST_INT
+	  && (INTVAL (XEXP (op, 1)) < -4 || INTVAL (XEXP (op, 1)) > 4));
+})
+
+(define_predicate "xs_hi_general_operand"
+  (match_code "const_int,reg,subreg,mem,symbol_ref,label_ref,const")
+{
+  if ((GET_CODE (op) == CONST_INT)
+       && ((INTVAL (op) >= 32768) || (INTVAL (op) < -32768)))
+    {
+      error ("constant halfword load operand out of range");
+      return false;
+    }
+    
+  return general_operand (op, mode);
+})
+
+(define_predicate "xs_hi_nonmemory_operand"
+  (match_code "const_int,reg,subreg,const")
+{
+  if ((GET_CODE (op) == CONST_INT) 
+       && ((INTVAL (op) >= 32768) || (INTVAL (op) < -32768)))
+    {
+      error ("constant arithmetic operand out of range");
+      return false;
+    }
+
+  return nonmemory_operand (op, mode);
+})
diff --git a/gcc/config/stormy16/stormy-abi b/gcc/config/stormy16/stormy-abi
new file mode 100644
index 000000000..887e7d76e
--- /dev/null
+++ b/gcc/config/stormy16/stormy-abi
@@ -0,0 +1,174 @@
+xStormy16 ABI
+************
+
+!!!!! NOTE !!!!!
+This document is a draft and is subject to change.
+!!!!! NOTE !!!!!
+
+This part of the file describes the conventions required to write
+ELF object files that are link-compatible with the ones produced
+by the GNU toolchains.
+
+Bit and Byte Ordering
+=====================
+
+This implementation is little-endian.   Bits are numbered starting 
+from 0 being the LSB.
+
+In this document, 'word' means 16 bits.
+
+Calling Sequence
+================
+
+The registers are allocated as follows:
+
+Register	Purpose
+-------------------------------------------------------------------
+r0, r1          Call-volatile.  May be changed during the execution
+                of a call instruction.
+r2 through r7   Argument passing;  call-clobbered.
+r8, r9		Call-volatile.  May be changed during the execution
+		of a call instruction.
+r10 through r13	Call-saved.
+r14		Program status word.
+r15		Stack pointer.
+
+
+Scalar values are returned in register r2-r7 if the value fits.
+Otherwise, a pointer is passed as a 'hidden' first argument and
+the return value is placed there.
+
+Arguments are passed in registers starting in r2, then on the stack.
+Arguments of size not a multiple of a word are padded to whole words.
+If an argument would otherwise be passed partially in registers, and
+partially on the stack, the whole of it is passed on the stack.  The
+last argument is pushed on the stack first.
+
+After a procedure's arguments are pushed on the stack,
+the return address is pushed on the stack, as if by the call
+instruction.  The return address is on the top of the stack when
+a procedure is called.
+
+Objects whose size is a multiple of 16 bits are aligned to a 16-bit
+boundary.
+
+Pointers are 16 bits, referencing addresses between 0 and 0xFFFF.
+
+Procedure pointers are also implemented as 16-bit pointers.
+
+Variable Argument Functions
+===========================
+
+The C type 'va_list' is implemented as a structure, as follows:
+
+struct {
+  char *base;
+  unsigned count;
+}
+
+Both fields are 16 bits.  An argument of size N bytes
+(N will be even) is accessed as if by the following code:
+
+char *result;
+/* count = #bytes non-variable arguments */
+/* 12 = #bytes for register arguments */
+if (count + N > 12)
+  {
+    if (count < 12)
+      count = 12;
+    result = base - (count + N - 12 + 4);
+  }
+else
+  {
+    result = base + count;
+  }
+count += N;
+/* The argument is at `*result'.  */
+
+
+One implementation of this is if a variadic function first
+pushes registers 2 through 7 in sequence at entry, and
+sets 'base' to the address of the first word pushed, 
+producing a stack that appears like:
+
+SP ->
+	[other data]
+	r7
+	r6
+	r5
+	r4
+	r3
+count->	r2
+	Return address (two words)
+	7th procedure parameter word
+	8th procedure parameter word
+	...
+	last procedure parameter word
+
+and initializes 'count' to be the number of bytes of non-variable
+arguments to the function.
+
+ELF File Format
+===============
+
+ELF file header
+---------------
+
+xStormy16 ELF files are distinguished by the value EM_XSTORMY16 in
+the e_machine field of the ELF file header:
+
+#define EM_XSTORMY16	        0xad45
+
+DWARF Register Number Mapping
+-----------------------------
+
+Registers r0 through r15 are mapped to numbers 0 through 15.
+
+Relocations
+-----------
+
+RELA relocs are used exclusively.  The relocation types defined are:
+
+Name			Value	Field	Calculation	Overflow
+----------------------------------------------------------------
+R_XSTORMY16_NONE           0     none      none           none
+R_XSTORMY16_32             1      32       S + A          none
+R_XSTORMY16_16             2      16       S + A          either
+R_XSTORMY16_8              3       8       S + A          unsigned
+R_XSTORMY16_PC32           4      32       S + A - P      none
+R_XSTORMY16_PC16           5      16       S + A - P      signed
+R_XSTORMY16_PC8            6       8       S + A - P      signed
+R_XSTORMY16_REL_12         7      16:12:0  S + A - P      signed
+R_XSTORMY16_24             8      32:23:1 (S + A) >> 1    unsigned
+R_XSTORMY16_FPTR16         9      16       S + A          either
+R_XSTORMY16_LO16           10     16       S + A          none
+R_XSTORMY16_HI16           11     32:16:16 S + A          none
+R_XSTORMY16_12             12     16:12:0  S + A          signed
+R_XSTORMY16_GNU_VTINHERIT  128    n/a      n/a            n/a
+R_XSTORMY16_GNU_VTENTRY    129    n/a      n/a            n/a
+
+In the 'Field' column, the first number indicates whether the
+relocation refers to a byte, word or doubleword.  The second number,
+if any, indicates the size of the bit-field into which the relocation
+is to occur (and also the size for overflow checking).  The third
+number indicates the first bit of the bit-field in the word or
+doubleword, counting the LSB as bit 0.
+
+In the 'Calculation' column, 'S' is the value of the symbol to which
+the reloc refers, 'A' is the addend, and 'P' represents the place of
+the storage unit being relocated.
+
+In the 'Overflow' column, 'none' means that any overflow of the
+computation performed in the 'Calculation' column is ignored.
+'signed' means that the overflow is only reported if it happens when
+the values are treated as signed quantities.  'unsigned' is the same,
+except that the values are treated as unsigned quantities.  'either'
+means that overflow is reported for either signed or unsigned
+overflow.
+
+
+Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc/config/stormy16/stormy16-lib2-ashlsi3.c b/gcc/config/stormy16/stormy16-lib2-ashlsi3.c
new file mode 100644
index 000000000..d6cabc669
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-ashlsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_ASHLSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-ashrsi3.c b/gcc/config/stormy16/stormy16-lib2-ashrsi3.c
new file mode 100644
index 000000000..151e3d01c
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-ashrsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_ASHRSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-clzhi2.c b/gcc/config/stormy16/stormy16-lib2-clzhi2.c
new file mode 100644
index 000000000..066fdf131
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-clzhi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_CLZHI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-cmpsi2.c b/gcc/config/stormy16/stormy16-lib2-cmpsi2.c
new file mode 100644
index 000000000..7563c36b5
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-cmpsi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_CMPSI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-ctzhi2.c b/gcc/config/stormy16/stormy16-lib2-ctzhi2.c
new file mode 100644
index 000000000..c1497db95
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-ctzhi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_CTZHI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-divsi3.c b/gcc/config/stormy16/stormy16-lib2-divsi3.c
new file mode 100644
index 000000000..33c370d1c
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-divsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_DIVSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-ffshi2.c b/gcc/config/stormy16/stormy16-lib2-ffshi2.c
new file mode 100644
index 000000000..4b629ddec
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-ffshi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_FFSHI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-lshrsi3.c b/gcc/config/stormy16/stormy16-lib2-lshrsi3.c
new file mode 100644
index 000000000..cd769ee66
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-lshrsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_LSHRSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-modsi3.c b/gcc/config/stormy16/stormy16-lib2-modsi3.c
new file mode 100644
index 000000000..587d0070d
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-modsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_MODSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-parityhi2.c b/gcc/config/stormy16/stormy16-lib2-parityhi2.c
new file mode 100644
index 000000000..1d128171a
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-parityhi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_PARITYHI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-popcounthi2.c b/gcc/config/stormy16/stormy16-lib2-popcounthi2.c
new file mode 100644
index 000000000..f07d66873
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-popcounthi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_POPCOUNTHI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-ucmpsi2.c b/gcc/config/stormy16/stormy16-lib2-ucmpsi2.c
new file mode 100644
index 000000000..da1a3e707
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-ucmpsi2.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_UCMPSI2
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-udivmodsi4.c b/gcc/config/stormy16/stormy16-lib2-udivmodsi4.c
new file mode 100644
index 000000000..d555e64c6
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-udivmodsi4.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_UDIVMODSI4
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-udivsi3.c b/gcc/config/stormy16/stormy16-lib2-udivsi3.c
new file mode 100644
index 000000000..fdcd64a0a
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-udivsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_UDIVSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2-umodsi3.c b/gcc/config/stormy16/stormy16-lib2-umodsi3.c
new file mode 100644
index 000000000..87921f2a1
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2-umodsi3.c
@@ -0,0 +1,2 @@
+#define XSTORMY16_UMODSI3
+#include "stormy16-lib2.c"
diff --git a/gcc/config/stormy16/stormy16-lib2.c b/gcc/config/stormy16/stormy16-lib2.c
new file mode 100644
index 000000000..e3c164354
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-lib2.c
@@ -0,0 +1,357 @@
+/* This file contains 16-bit versions of some of the functions found in
+   libgcc2.c.  Really libgcc ought to be moved out of the gcc directory
+   and into its own top level directory, and then split up into multiple
+   files.  On this glorious day maybe this code can be integrated into
+   it too.  */
+
+/* Copyright (C) 2005, 2008, 2009, 2010  Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+#ifdef HAVE_GAS_HIDDEN
+#define ATTRIBUTE_HIDDEN  __attribute__ ((__visibility__ ("hidden")))
+#else
+#define ATTRIBUTE_HIDDEN
+#endif
+
+#ifndef MIN_UNITS_PER_WORD
+#define MIN_UNITS_PER_WORD UNITS_PER_WORD
+#endif
+
+#ifndef LIBGCC2_UNITS_PER_WORD
+# if MIN_UNITS_PER_WORD > 4
+#  define LIBGCC2_UNITS_PER_WORD 8
+# elif (MIN_UNITS_PER_WORD > 2 \
+        || (MIN_UNITS_PER_WORD > 1 && LONG_LONG_TYPE_SIZE > 32))
+#  define LIBGCC2_UNITS_PER_WORD 4
+# else
+#  define LIBGCC2_UNITS_PER_WORD MIN_UNITS_PER_WORD
+# endif
+#endif
+
+#define word_type Wtype
+
+#include "libgcc2.h"
+#undef int
+
+/* These prototypes would normally live in libgcc2.h, but this can
+   only happen once the code below is integrated into libgcc2.c.  */
+
+extern USItype udivmodsi4 (USItype, USItype, word_type);
+extern SItype __divsi3 (SItype, SItype);
+extern SItype __modsi3 (SItype, SItype);
+extern SItype __udivsi3 (SItype, SItype);
+extern SItype __umodsi3 (SItype, SItype);
+extern SItype __ashlsi3 (SItype, SItype);
+extern SItype __ashrsi3 (SItype, SItype);
+extern USItype __lshrsi3 (USItype, USItype);
+extern int __popcounthi2 (UHWtype);
+extern int __parityhi2 (UHWtype);
+extern int __clzhi2 (UHWtype);
+extern int __ctzhi2 (UHWtype);
+
+
+#ifdef XSTORMY16_UDIVMODSI4
+USItype
+udivmodsi4 (USItype num, USItype den, word_type modwanted)
+{
+  USItype bit = 1;
+  USItype res = 0;
+
+  while (den < num && bit && !(den & (1L << 31)))
+    {
+      den <<= 1;
+      bit <<= 1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>= 1;
+      den >>= 1;
+    }
+
+  if (modwanted)
+    return num;
+  return res;
+}
+#endif
+
+#ifdef XSTORMY16_DIVSI3
+SItype
+__divsi3 (SItype a, SItype b)
+{
+  word_type neg = 0;
+  SItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = !neg;
+    }
+
+  if (b < 0)
+    {
+      b = -b;
+      neg = !neg;
+    }
+
+  res = udivmodsi4 (a, b, 0);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+#endif
+
+#ifdef XSTORMY16_MODSI3
+SItype
+__modsi3 (SItype a, SItype b)
+{
+  word_type neg = 0;
+  SItype res;
+
+  if (a < 0)
+    {
+      a = -a;
+      neg = 1;
+    }
+
+  if (b < 0)
+    b = -b;
+
+  res = udivmodsi4 (a, b, 1);
+
+  if (neg)
+    res = -res;
+
+  return res;
+}
+#endif
+
+#ifdef XSTORMY16_UDIVSI3
+SItype
+__udivsi3 (SItype a, SItype b)
+{
+  return udivmodsi4 (a, b, 0);
+}
+#endif
+
+#ifdef XSTORMY16_UMODSI3
+SItype
+__umodsi3 (SItype a, SItype b)
+{
+  return udivmodsi4 (a, b, 1);
+}
+#endif
+
+#ifdef XSTORMY16_ASHLSI3
+SItype
+__ashlsi3 (SItype a, SItype b)
+{
+  word_type i;
+  
+  if (b & 16)
+    a <<= 16;
+  if (b & 8)
+    a <<= 8;
+  for (i = (b & 0x7); i > 0; --i)
+    a <<= 1;
+  return a;
+}
+#endif
+
+#ifdef XSTORMY16_ASHRSI3
+SItype
+__ashrsi3 (SItype a, SItype b)
+{
+  word_type i;
+  
+  if (b & 16)
+    a >>= 16;
+  if (b & 8)
+    a >>= 8;
+  for (i = (b & 0x7); i > 0; --i)
+    a >>= 1;
+  return a;
+}
+#endif
+
+#ifdef XSTORMY16_LSHRSI3
+USItype
+__lshrsi3 (USItype a, USItype b)
+{
+  word_type i;
+  
+  if (b & 16)
+    a >>= 16;
+  if (b & 8)
+    a >>= 8;
+  for (i = (b & 0x7); i > 0; --i)
+    a >>= 1;
+  return a;
+}
+#endif
+
+#ifdef XSTORMY16_POPCOUNTHI2
+/* Returns the number of set bits in X.
+   FIXME:  The return type really should be "unsigned int"
+   but this is not how the builtin is prototyped.  */
+int
+__popcounthi2 (UHWtype x)
+{
+  int ret;
+
+  ret = __popcount_tab [x & 0xff];
+  ret += __popcount_tab [(x >> 8) & 0xff];
+
+  return ret;
+}
+#endif
+
+#ifdef XSTORMY16_PARITYHI2
+/* Returns the number of set bits in X, modulo 2.
+   FIXME:  The return type really should be "unsigned int"
+   but this is not how the builtin is prototyped.  */
+
+int
+__parityhi2 (UHWtype x)
+{
+  x ^= x >> 8;
+  x ^= x >> 4;
+  x &= 0xf;
+  return (0x6996 >> x) & 1;
+}
+#endif
+
+#ifdef XSTORMY16_CLZHI2
+/* Returns the number of zero-bits from the most significant bit to the
+   first nonzero bit in X.  Returns 16 for X == 0.  Implemented as a
+   simple for loop in order to save space by removing the need for
+   the __clz_tab array.
+   FIXME:  The return type really should be "unsigned int" but this is
+   not how the builtin is prototyped.  */
+#undef unsigned
+int
+__clzhi2 (UHWtype x)
+{
+  unsigned int i;
+  unsigned int c;
+  unsigned int value = x;
+
+  for (c = 0, i = 1 << 15; i; i >>= 1, c++)
+    if (i & value)
+      break;
+  return c;
+}
+#endif
+
+#ifdef XSTORMY16_CTZHI2
+/* Returns the number of trailing zero bits in X.
+   FIXME:  The return type really should be "signed int" since
+   ctz(0) returns -1, but this is not how the builtin is prototyped.  */
+
+int
+__ctzhi2 (UHWtype x)
+{
+  /* This is cunning.  It converts X into a number with only the one bit
+     set, the bit that was the least significant bit in X.  From this we
+     can use the count_leading_zeros to compute the number of trailing
+     bits.  */
+  x &= - x;
+
+  return 15 - __builtin_clz (x);
+}
+#endif
+
+#ifdef XSTORMY16_FFSHI2
+/* Returns one plus the index of the least significant 1-bit of X,
+   or if X is zero, returns zero.  FIXME:  The return type really
+   should be "unsigned int" but this is not how the builtin is
+   prototyped.  */
+
+int
+__ffshi2 (UHWtype u)
+{
+  UHWtype count;
+
+  if (u == 0)
+    return 0;
+
+  return 16 - __builtin_clz (u & - u);
+}
+#endif
+
+#ifdef XSTORMY16_UCMPSI2
+/* Performs an unsigned comparison of two 32-bit values: A and B.
+   If A is less than B, then 0 is returned.  If A is greater than B,
+   then 2 is returned.  Otherwise A and B are equal and 1 is returned.  */
+
+word_type
+__ucmpsi2 (USItype a, USItype b)
+{
+  word_type hi_a = (a >> 16);
+  word_type hi_b = (b >> 16);
+
+  if (hi_a == hi_b)
+    {
+      word_type low_a = (a & 0xffff);
+      word_type low_b = (b & 0xffff);
+
+      return low_a < low_b ? 0 : (low_a > low_b ? 2 : 1);
+    }
+
+  return hi_a < hi_b ? 0 : 2;
+}
+#endif
+
+#ifdef XSTORMY16_CMPSI2
+/* Performs an signed comparison of two 32-bit values: A and B.
+   If A is less than B, then 0 is returned.  If A is greater than B,
+   then 2 is returned.  Otherwise A and B are equal and 1 is returned.  */
+
+word_type
+__cmpsi2 (SItype a, SItype b)
+{
+  word_type hi_a = (a >> 16);
+  word_type hi_b = (b >> 16);
+
+  if (hi_a == hi_b)
+    {
+      word_type low_a = (a & 0xffff);
+      word_type low_b = (b & 0xffff);
+
+      return low_a < low_b ? 0 : (low_a > low_b ? 2 : 1);
+    }
+
+  return hi_a < hi_b ? 0 : 2;
+}
+#endif
diff --git a/gcc/config/stormy16/stormy16-protos.h b/gcc/config/stormy16/stormy16-protos.h
new file mode 100644
index 000000000..83e116f50
--- /dev/null
+++ b/gcc/config/stormy16/stormy16-protos.h
@@ -0,0 +1,70 @@
+/* Prototypes for exported functions defined in xstormy16.c
+   Copyright (C) 2000, 2001, 2003, 2004, 2007, 2008, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+
+extern struct xstormy16_stack_layout xstormy16_compute_stack_layout (void);
+extern void xstormy16_expand_prologue (void);
+extern void xstormy16_expand_epilogue (void);
+extern int xstormy16_initial_elimination_offset (int, int);
+extern int direct_return (void);
+extern int xstormy16_interrupt_function_p (void);
+extern int xstormy16_epilogue_uses (int);
+extern void xstormy16_function_profiler (void);
+
+#if defined (TREE_CODE)
+extern void xstormy16_asm_output_aligned_common (FILE *, tree, const char *,
+						 int, int, int);
+#endif
+
+#if defined (TREE_CODE) && defined (RTX_CODE)
+extern void xstormy16_initialize_trampoline (rtx, rtx, rtx);
+#endif
+
+#ifdef RTX_CODE
+extern void xstormy16_emit_cbranch (enum rtx_code, rtx, rtx, rtx);
+extern char *xstormy16_output_cbranch_hi (rtx, const char *, int, rtx);
+extern char *xstormy16_output_cbranch_si (rtx, const char *, int, rtx);
+
+extern void xstormy16_expand_casesi (rtx, rtx, rtx, rtx, rtx);
+extern void xstormy16_output_addr_vec (FILE *, rtx, rtx);
+extern void xstormy16_expand_call (rtx, rtx, rtx);
+extern void xstormy16_expand_iorqi3 (rtx *);
+extern void xstormy16_expand_andqi3 (rtx *);
+#endif
+
+#if defined (HAVE_MACHINE_MODES) && defined (RTX_CODE)
+extern void xstormy16_split_cbranch (enum machine_mode, rtx, rtx, rtx);
+extern int  short_memory_operand (rtx, enum machine_mode);
+extern int  nonimmediate_nonstack_operand (rtx, enum machine_mode);
+extern enum reg_class xstormy16_secondary_reload_class 
+ (enum reg_class, enum machine_mode, rtx);
+extern void xstormy16_split_move (enum machine_mode, rtx, rtx);
+extern void xstormy16_expand_move (enum machine_mode, rtx, rtx);
+extern void xstormy16_expand_arith (enum machine_mode, enum rtx_code, 
+				    rtx, rtx, rtx);
+extern const char * xstormy16_output_shift (enum machine_mode, enum rtx_code, 
+					    rtx, rtx, rtx);
+extern int  xstormy16_below100_symbol (rtx, enum machine_mode);
+extern int  xstormy16_splittable_below100_operand (rtx, enum machine_mode);
+extern bool xstormy16_legitimate_address_p (enum machine_mode, rtx, bool);
+#endif
+
diff --git a/gcc/config/stormy16/stormy16.c b/gcc/config/stormy16/stormy16.c
new file mode 100644
index 000000000..3954d5055
--- /dev/null
+++ b/gcc/config/stormy16/stormy16.c
@@ -0,0 +1,2677 @@
+/* Xstormy16 target functions.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+   2006, 2007, 2008, 2009, 2010, 2011  Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "obstack.h"
+#include "tree.h"
+#include "expr.h"
+#include "optabs.h"
+#include "except.h"
+#include "function.h"
+#include "target.h"
+#include "target-def.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "gimple.h"
+#include "df.h"
+#include "reload.h"
+#include "ggc.h"
+
+static rtx emit_addhi3_postreload (rtx, rtx, rtx);
+static void xstormy16_asm_out_constructor (rtx, int);
+static void xstormy16_asm_out_destructor (rtx, int);
+static void xstormy16_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+					   HOST_WIDE_INT, tree);
+
+static void xstormy16_init_builtins (void);
+static rtx xstormy16_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static bool xstormy16_rtx_costs (rtx, int, int, int *, bool);
+static int xstormy16_address_cost (rtx, bool);
+static bool xstormy16_return_in_memory (const_tree, const_tree);
+
+static GTY(()) section *bss100_section;
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+xstormy16_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		     int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      if (INTVAL (x) < 16 && INTVAL (x) >= 0)
+        *total = COSTS_N_INSNS (1) / 2;
+      else if (INTVAL (x) < 256 && INTVAL (x) >= 0)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      *total = COSTS_N_INSNS (2);
+      return true;
+
+    case MULT:
+      *total = COSTS_N_INSNS (35 + 6);
+      return true;
+    case DIV:
+      *total = COSTS_N_INSNS (51 - 6);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+static int
+xstormy16_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+  return (CONST_INT_P (x) ? 2
+	  : GET_CODE (x) == PLUS ? 7
+	  : 5);
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+xstormy16_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+			    bool in)
+{
+  return (5 + memory_move_secondary_cost (mode, rclass, in));
+}
+
+/* Branches are handled as follows:
+
+   1. HImode compare-and-branches.  The machine supports these
+      natively, so the appropriate pattern is emitted directly.
+
+   2. SImode EQ and NE.  These are emitted as pairs of HImode
+      compare-and-branches.
+
+   3. SImode LT, GE, LTU and GEU.  These are emitted as a sequence
+      of a SImode subtract followed by a branch (not a compare-and-branch),
+      like this:
+      sub
+      sbc
+      blt
+
+   4. SImode GT, LE, GTU, LEU.  These are emitted as a sequence like:
+      sub
+      sbc
+      blt
+      or
+      bne.  */
+
+/* Emit a branch of kind CODE to location LOC.  */
+
+void
+xstormy16_emit_cbranch (enum rtx_code code, rtx op0, rtx op1, rtx loc)
+{
+  rtx condition_rtx, loc_ref, branch, cy_clobber;
+  rtvec vec;
+  enum machine_mode mode;
+
+  mode = GET_MODE (op0);
+  gcc_assert (mode == HImode || mode == SImode);
+
+  if (mode == SImode
+      && (code == GT || code == LE || code == GTU || code == LEU))
+    {
+      int unsigned_p = (code == GTU || code == LEU);
+      int gt_p = (code == GT || code == GTU);
+      rtx lab = NULL_RTX;
+
+      if (gt_p)
+	lab = gen_label_rtx ();
+      xstormy16_emit_cbranch (unsigned_p ? LTU : LT, op0, op1, gt_p ? lab : loc);
+      /* This should be generated as a comparison against the temporary
+	 created by the previous insn, but reload can't handle that.  */
+      xstormy16_emit_cbranch (gt_p ? NE : EQ, op0, op1, loc);
+      if (gt_p)
+	emit_label (lab);
+      return;
+    }
+  else if (mode == SImode
+	   && (code == NE || code == EQ)
+	   && op1 != const0_rtx)
+    {
+      rtx op0_word, op1_word;
+      rtx lab = NULL_RTX;
+      int num_words = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+      int i;
+
+      if (code == EQ)
+	lab = gen_label_rtx ();
+
+      for (i = 0; i < num_words - 1; i++)
+	{
+	  op0_word = simplify_gen_subreg (word_mode, op0, mode,
+					  i * UNITS_PER_WORD);
+	  op1_word = simplify_gen_subreg (word_mode, op1, mode,
+					  i * UNITS_PER_WORD);
+	  xstormy16_emit_cbranch (NE, op0_word, op1_word, code == EQ ? lab : loc);
+	}
+      op0_word = simplify_gen_subreg (word_mode, op0, mode,
+				      i * UNITS_PER_WORD);
+      op1_word = simplify_gen_subreg (word_mode, op1, mode,
+				      i * UNITS_PER_WORD);
+      xstormy16_emit_cbranch (code, op0_word, op1_word, loc);
+
+      if (code == EQ)
+	emit_label (lab);
+      return;
+    }
+
+  /* We can't allow reload to try to generate any reload after a branch,
+     so when some register must match we must make the temporary ourselves.  */
+  if (mode != HImode)
+    {
+      rtx tmp;
+      tmp = gen_reg_rtx (mode);
+      emit_move_insn (tmp, op0);
+      op0 = tmp;
+    }
+
+  condition_rtx = gen_rtx_fmt_ee (code, mode, op0, op1);
+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, loc);
+  branch = gen_rtx_SET (VOIDmode, pc_rtx,
+			gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
+					      loc_ref, pc_rtx));
+
+  cy_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+
+  if (mode == HImode)
+    vec = gen_rtvec (2, branch, cy_clobber);
+  else if (code == NE || code == EQ)
+    vec = gen_rtvec (2, branch, gen_rtx_CLOBBER (VOIDmode, op0));
+  else
+    {
+      rtx sub;
+#if 0
+      sub = gen_rtx_SET (VOIDmode, op0, gen_rtx_MINUS (SImode, op0, op1));
+#else
+      sub = gen_rtx_CLOBBER (SImode, op0);
+#endif
+      vec = gen_rtvec (3, branch, sub, cy_clobber);
+    }
+
+  emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+}
+
+/* Take a SImode conditional branch, one of GT/LE/GTU/LEU, and split
+   the arithmetic operation.  Most of the work is done by
+   xstormy16_expand_arith.  */
+
+void
+xstormy16_split_cbranch (enum machine_mode mode, rtx label, rtx comparison,
+			 rtx dest)
+{
+  rtx op0 = XEXP (comparison, 0);
+  rtx op1 = XEXP (comparison, 1);
+  rtx seq, last_insn;
+  rtx compare;
+
+  start_sequence ();
+  xstormy16_expand_arith (mode, COMPARE, dest, op0, op1);
+  seq = get_insns ();
+  end_sequence ();
+
+  gcc_assert (INSN_P (seq));
+
+  last_insn = seq;
+  while (NEXT_INSN (last_insn) != NULL_RTX)
+    last_insn = NEXT_INSN (last_insn);
+
+  compare = SET_SRC (XVECEXP (PATTERN (last_insn), 0, 0));
+  PUT_CODE (XEXP (compare, 0), GET_CODE (comparison));
+  XEXP (compare, 1) = gen_rtx_LABEL_REF (VOIDmode, label);
+  emit_insn (seq);
+}
+
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label.
+
+   OP is the conditional expression, or NULL for branch-always.
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   INSN is the insn.  */
+
+char *
+xstormy16_output_cbranch_hi (rtx op, const char *label, int reversed, rtx insn)
+{
+  static char string[64];
+  int need_longbranch = (op != NULL_RTX
+			 ? get_attr_length (insn) == 8
+			 : get_attr_length (insn) == 4);
+  int really_reversed = reversed ^ need_longbranch;
+  const char *ccode;
+  const char *templ;
+  const char *operands;
+  enum rtx_code code;
+
+  if (! op)
+    {
+      if (need_longbranch)
+	ccode = "jmpf";
+      else
+	ccode = "br";
+      sprintf (string, "%s %s", ccode, label);
+      return string;
+    }
+
+  code = GET_CODE (op);
+
+  if (! REG_P (XEXP (op, 0)))
+    {
+      code = swap_condition (code);
+      operands = "%3,%2";
+    }
+  else
+      operands = "%2,%3";
+
+  /* Work out which way this really branches.  */
+  if (really_reversed)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case EQ:   ccode = "z";   break;
+    case NE:   ccode = "nz";  break;
+    case GE:   ccode = "ge";  break;
+    case LT:   ccode = "lt";  break;
+    case GT:   ccode = "gt";  break;
+    case LE:   ccode = "le";  break;
+    case GEU:  ccode = "nc";  break;
+    case LTU:  ccode = "c";   break;
+    case GTU:  ccode = "hi";  break;
+    case LEU:  ccode = "ls";  break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (need_longbranch)
+    templ = "b%s %s,.+8 | jmpf %s";
+  else
+    templ = "b%s %s,%s";
+  sprintf (string, templ, ccode, operands, label);
+
+  return string;
+}
+
+/* Return the string to output a conditional branch to LABEL, which is
+   the operand number of the label, but suitable for the tail of a
+   SImode branch.
+
+   OP is the conditional expression (OP is never NULL_RTX).
+
+   REVERSED is nonzero if we should reverse the sense of the comparison.
+
+   INSN is the insn.  */
+
+char *
+xstormy16_output_cbranch_si (rtx op, const char *label, int reversed, rtx insn)
+{
+  static char string[64];
+  int need_longbranch = get_attr_length (insn) >= 8;
+  int really_reversed = reversed ^ need_longbranch;
+  const char *ccode;
+  const char *templ;
+  char prevop[16];
+  enum rtx_code code;
+
+  code = GET_CODE (op);
+
+  /* Work out which way this really branches.  */
+  if (really_reversed)
+    code = reverse_condition (code);
+
+  switch (code)
+    {
+    case EQ:   ccode = "z";   break;
+    case NE:   ccode = "nz";  break;
+    case GE:   ccode = "ge";  break;
+    case LT:   ccode = "lt";  break;
+    case GEU:  ccode = "nc";  break;
+    case LTU:  ccode = "c";   break;
+
+      /* The missing codes above should never be generated.  */
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (code)
+    {
+    case EQ: case NE:
+      {
+	int regnum;
+
+	gcc_assert (REG_P (XEXP (op, 0)));
+
+	regnum = REGNO (XEXP (op, 0));
+	sprintf (prevop, "or %s,%s", reg_names[regnum], reg_names[regnum+1]);
+      }
+      break;
+
+    case GE: case LT: case GEU: case LTU:
+      strcpy (prevop, "sbc %2,%3");
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (need_longbranch)
+    templ = "%s | b%s .+6 | jmpf %s";
+  else
+    templ = "%s | b%s %s";
+  sprintf (string, templ, prevop, ccode, label);
+
+  return string;
+}
+
+/* Many machines have some registers that cannot be copied directly to or from
+   memory or even from other types of registers.  An example is the `MQ'
+   register, which on most machines, can only be copied to or from general
+   registers, but not memory.  Some machines allow copying all registers to and
+   from memory, but require a scratch register for stores to some memory
+   locations (e.g., those with symbolic address on the RT, and those with
+   certain symbolic address on the SPARC when compiling PIC).  In some cases,
+   both an intermediate and a scratch register are required.
+
+   You should define these macros to indicate to the reload phase that it may
+   need to allocate at least one register for a reload in addition to the
+   register to contain the data.  Specifically, if copying X to a register
+   RCLASS in MODE requires an intermediate register, you should define
+   `SECONDARY_INPUT_RELOAD_CLASS' to return the largest register class all of
+   whose registers can be used as intermediate registers or scratch registers.
+
+   If copying a register RCLASS in MODE to X requires an intermediate or scratch
+   register, `SECONDARY_OUTPUT_RELOAD_CLASS' should be defined to return the
+   largest register class required.  If the requirements for input and output
+   reloads are the same, the macro `SECONDARY_RELOAD_CLASS' should be used
+   instead of defining both macros identically.
+
+   The values returned by these macros are often `GENERAL_REGS'.  Return
+   `NO_REGS' if no spare register is needed; i.e., if X can be directly copied
+   to or from a register of RCLASS in MODE without requiring a scratch register.
+   Do not define this macro if it would always return `NO_REGS'.
+
+   If a scratch register is required (either with or without an intermediate
+   register), you should define patterns for `reload_inM' or `reload_outM', as
+   required..  These patterns, which will normally be implemented with a
+   `define_expand', should be similar to the `movM' patterns, except that
+   operand 2 is the scratch register.
+
+   Define constraints for the reload register and scratch register that contain
+   a single register class.  If the original reload register (whose class is
+   RCLASS) can meet the constraint given in the pattern, the value returned by
+   these macros is used for the class of the scratch register.  Otherwise, two
+   additional reload registers are required.  Their classes are obtained from
+   the constraints in the insn pattern.
+
+   X might be a pseudo-register or a `subreg' of a pseudo-register, which could
+   either be in a hard register or in memory.  Use `true_regnum' to find out;
+   it will return -1 if the pseudo is in memory and the hard register number if
+   it is in a register.
+
+   These macros should not be used in the case where a particular class of
+   registers can only be copied to memory and not to another class of
+   registers.  In that case, secondary reload registers are not needed and
+   would not be helpful.  Instead, a stack location must be used to perform the
+   copy and the `movM' pattern should use memory as an intermediate storage.
+   This case often occurs between floating-point and general registers.  */
+
+enum reg_class
+xstormy16_secondary_reload_class (enum reg_class rclass,
+				  enum machine_mode mode ATTRIBUTE_UNUSED,
+				  rtx x)
+{
+  /* This chip has the interesting property that only the first eight
+     registers can be moved to/from memory.  */
+  if ((MEM_P (x)
+       || ((GET_CODE (x) == SUBREG || REG_P (x))
+	   && (true_regnum (x) == -1
+	       || true_regnum (x) >= FIRST_PSEUDO_REGISTER)))
+      && ! reg_class_subset_p (rclass, EIGHT_REGS))
+    return EIGHT_REGS;
+
+  return NO_REGS;
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS
+   and TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+xstormy16_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == GENERAL_REGS && MEM_P (x))
+    return EIGHT_REGS;
+
+  return rclass;
+}
+
+/* Predicate for symbols and addresses that reflect special 8-bit
+   addressing.  */
+
+int
+xstormy16_below100_symbol (rtx x,
+			   enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_XSTORMY16_BELOW100) != 0;
+
+  if (CONST_INT_P (x))
+    {
+      HOST_WIDE_INT i = INTVAL (x);
+
+      if ((i >= 0x0000 && i <= 0x00ff)
+	  || (i >= 0x7f00 && i <= 0x7fff))
+	return 1;
+    }
+  return 0;
+}
+
+/* Likewise, but only for non-volatile MEMs, for patterns where the
+   MEM will get split into smaller sized accesses.  */
+
+int
+xstormy16_splittable_below100_operand (rtx x, enum machine_mode mode)
+{
+  if (MEM_P (x) && MEM_VOLATILE_P (x))
+    return 0;
+  return xstormy16_below100_operand (x, mode);
+}
+
+/* Expand an 8-bit IOR.  This either detects the one case we can
+   actually do, or uses a 16-bit IOR.  */
+
+void
+xstormy16_expand_iorqi3 (rtx *operands)
+{
+  rtx in, out, outsub, val;
+
+  out = operands[0];
+  in = operands[1];
+  val = operands[2];
+
+  if (xstormy16_onebit_set_operand (val, QImode))
+    {
+      if (!xstormy16_below100_or_register (in, QImode))
+	in = copy_to_mode_reg (QImode, in);
+      if (!xstormy16_below100_or_register (out, QImode))
+	out = gen_reg_rtx (QImode);
+      emit_insn (gen_iorqi3_internal (out, in, val));
+      if (out != operands[0])
+	emit_move_insn (operands[0], out);
+      return;
+    }
+
+  if (! REG_P (in))
+    in = copy_to_mode_reg (QImode, in);
+
+  if (! REG_P (val) && ! CONST_INT_P (val))
+    val = copy_to_mode_reg (QImode, val);
+
+  if (! REG_P (out))
+    out = gen_reg_rtx (QImode);
+
+  in = simplify_gen_subreg (HImode, in, QImode, 0);
+  outsub = simplify_gen_subreg (HImode, out, QImode, 0);
+
+  if (! CONST_INT_P (val))
+    val = simplify_gen_subreg (HImode, val, QImode, 0);
+
+  emit_insn (gen_iorhi3 (outsub, in, val));
+
+  if (out != operands[0])
+    emit_move_insn (operands[0], out);
+}
+
+/* Expand an 8-bit AND.  This either detects the one case we can
+   actually do, or uses a 16-bit AND.  */
+
+void
+xstormy16_expand_andqi3 (rtx *operands)
+{
+  rtx in, out, outsub, val;
+
+  out = operands[0];
+  in = operands[1];
+  val = operands[2];
+
+  if (xstormy16_onebit_clr_operand (val, QImode))
+    {
+      if (!xstormy16_below100_or_register (in, QImode))
+	in = copy_to_mode_reg (QImode, in);
+      if (!xstormy16_below100_or_register (out, QImode))
+	out = gen_reg_rtx (QImode);
+      emit_insn (gen_andqi3_internal (out, in, val));
+      if (out != operands[0])
+	emit_move_insn (operands[0], out);
+      return;
+    }
+
+  if (! REG_P (in))
+    in = copy_to_mode_reg (QImode, in);
+
+  if (! REG_P (val) && ! CONST_INT_P (val))
+    val = copy_to_mode_reg (QImode, val);
+
+  if (! REG_P (out))
+    out = gen_reg_rtx (QImode);
+
+  in = simplify_gen_subreg (HImode, in, QImode, 0);
+  outsub = simplify_gen_subreg (HImode, out, QImode, 0);
+
+  if (! CONST_INT_P (val))
+    val = simplify_gen_subreg (HImode, val, QImode, 0);
+
+  emit_insn (gen_andhi3 (outsub, in, val));
+
+  if (out != operands[0])
+    emit_move_insn (operands[0], out);
+}
+
+#define LEGITIMATE_ADDRESS_INTEGER_P(X, OFFSET)				\
+  (CONST_INT_P (X)							\
+  && (unsigned HOST_WIDE_INT) (INTVAL (X) + (OFFSET) + 2048) < 4096)
+
+#define LEGITIMATE_ADDRESS_CONST_INT_P(X, OFFSET)			 \
+ (CONST_INT_P (X)							 \
+  && INTVAL (X) + (OFFSET) >= 0						 \
+  && INTVAL (X) + (OFFSET) < 0x8000					 \
+  && (INTVAL (X) + (OFFSET) < 0x100 || INTVAL (X) + (OFFSET) >= 0x7F00))
+
+bool
+xstormy16_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+				rtx x, bool strict)
+{
+  if (LEGITIMATE_ADDRESS_CONST_INT_P (x, 0))
+    return true;
+
+  if (GET_CODE (x) == PLUS
+      && LEGITIMATE_ADDRESS_INTEGER_P (XEXP (x, 1), 0))
+    {
+      x = XEXP (x, 0);
+      /* PR 31232: Do not allow INT+INT as an address.  */
+      if (CONST_INT_P (x))
+	return false;
+    }
+
+  if ((GET_CODE (x) == PRE_MODIFY && CONST_INT_P (XEXP (XEXP (x, 1), 1)))
+      || GET_CODE (x) == POST_INC
+      || GET_CODE (x) == PRE_DEC)
+    x = XEXP (x, 0);
+
+  if (REG_P (x)
+      && REGNO_OK_FOR_BASE_P (REGNO (x))
+      && (! strict || REGNO (x) < FIRST_PSEUDO_REGISTER))
+    return true;
+
+  if (xstormy16_below100_symbol (x, mode))
+    return true;
+
+  return false;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+   On this chip, this is true if the address is valid with an offset
+   of 0 but not of 6, because in that case it cannot be used as an
+   address for DImode or DFmode, or if the address is a post-increment
+   or pre-decrement address.  */
+
+static bool
+xstormy16_mode_dependent_address_p (const_rtx x)
+{
+  if (LEGITIMATE_ADDRESS_CONST_INT_P (x, 0)
+      && ! LEGITIMATE_ADDRESS_CONST_INT_P (x, 6))
+    return true;
+
+  if (GET_CODE (x) == PLUS
+      && LEGITIMATE_ADDRESS_INTEGER_P (XEXP (x, 1), 0)
+      && ! LEGITIMATE_ADDRESS_INTEGER_P (XEXP (x, 1), 6))
+    return true;
+
+  /* Auto-increment addresses are now treated generically in recog.c.  */
+  return false;
+}
+
+int
+short_memory_operand (rtx x, enum machine_mode mode)
+{
+  if (! memory_operand (x, mode))
+    return 0;
+  return (GET_CODE (XEXP (x, 0)) != PLUS);
+}
+
+/* Splitter for the 'move' patterns, for modes not directly implemented
+   by hardware.  Emit insns to copy a value of mode MODE from SRC to
+   DEST.
+
+   This function is only called when reload_completed.  */
+
+void
+xstormy16_split_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  int num_words = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+  int direction, end, i;
+  int src_modifies = 0;
+  int dest_modifies = 0;
+  int src_volatile = 0;
+  int dest_volatile = 0;
+  rtx mem_operand;
+  rtx auto_inc_reg_rtx = NULL_RTX;
+
+  /* Check initial conditions.  */
+  gcc_assert (reload_completed
+	      && mode != QImode && mode != HImode
+	      && nonimmediate_operand (dest, mode)
+	      && general_operand (src, mode));
+
+  /* This case is not supported below, and shouldn't be generated.  */
+  gcc_assert (! MEM_P (dest) || ! MEM_P (src));
+
+  /* This case is very very bad after reload, so trap it now.  */
+  gcc_assert (GET_CODE (dest) != SUBREG && GET_CODE (src) != SUBREG);
+
+  /* The general idea is to copy by words, offsetting the source and
+     destination.  Normally the least-significant word will be copied
+     first, but for pre-dec operations it's better to copy the
+     most-significant word first.  Only one operand can be a pre-dec
+     or post-inc operand.
+
+     It's also possible that the copy overlaps so that the direction
+     must be reversed.  */
+  direction = 1;
+
+  if (MEM_P (dest))
+    {
+      mem_operand = XEXP (dest, 0);
+      dest_modifies = side_effects_p (mem_operand);
+      if (auto_inc_p (mem_operand))
+        auto_inc_reg_rtx = XEXP (mem_operand, 0);
+      dest_volatile = MEM_VOLATILE_P (dest);
+      if (dest_volatile)
+	{
+	  dest = copy_rtx (dest);
+	  MEM_VOLATILE_P (dest) = 0;
+	}
+    }
+  else if (MEM_P (src))
+    {
+      mem_operand = XEXP (src, 0);
+      src_modifies = side_effects_p (mem_operand);
+      if (auto_inc_p (mem_operand))
+        auto_inc_reg_rtx = XEXP (mem_operand, 0);
+      src_volatile = MEM_VOLATILE_P (src);
+      if (src_volatile)
+	{
+	  src = copy_rtx (src);
+	  MEM_VOLATILE_P (src) = 0;
+	}
+    }
+  else
+    mem_operand = NULL_RTX;
+
+  if (mem_operand == NULL_RTX)
+    {
+      if (REG_P (src)
+	  && REG_P (dest)
+	  && reg_overlap_mentioned_p (dest, src)
+	  && REGNO (dest) > REGNO (src))
+	direction = -1;
+    }
+  else if (GET_CODE (mem_operand) == PRE_DEC
+      || (GET_CODE (mem_operand) == PLUS
+	  && GET_CODE (XEXP (mem_operand, 0)) == PRE_DEC))
+    direction = -1;
+  else if (MEM_P (src) && reg_overlap_mentioned_p (dest, src))
+    {
+      int regno;
+
+      gcc_assert (REG_P (dest));
+      regno = REGNO (dest);
+
+      gcc_assert (refers_to_regno_p (regno, regno + num_words,
+				     mem_operand, 0));
+
+      if (refers_to_regno_p (regno, regno + 1, mem_operand, 0))
+	direction = -1;
+      else if (refers_to_regno_p (regno + num_words - 1, regno + num_words,
+				  mem_operand, 0))
+	direction = 1;
+      else
+	/* This means something like
+	   (set (reg:DI r0) (mem:DI (reg:HI r1)))
+	   which we'd need to support by doing the set of the second word
+	   last.  */
+	gcc_unreachable ();
+    }
+
+  end = direction < 0 ? -1 : num_words;
+  for (i = direction < 0 ? num_words - 1 : 0; i != end; i += direction)
+    {
+      rtx w_src, w_dest, insn;
+
+      if (src_modifies)
+	w_src = gen_rtx_MEM (word_mode, mem_operand);
+      else
+	w_src = simplify_gen_subreg (word_mode, src, mode, i * UNITS_PER_WORD);
+      if (src_volatile)
+	MEM_VOLATILE_P (w_src) = 1;
+      if (dest_modifies)
+	w_dest = gen_rtx_MEM (word_mode, mem_operand);
+      else
+	w_dest = simplify_gen_subreg (word_mode, dest, mode,
+				      i * UNITS_PER_WORD);
+      if (dest_volatile)
+	MEM_VOLATILE_P (w_dest) = 1;
+
+      /* The simplify_subreg calls must always be able to simplify.  */
+      gcc_assert (GET_CODE (w_src) != SUBREG
+		  && GET_CODE (w_dest) != SUBREG);
+
+      insn = emit_insn (gen_rtx_SET (VOIDmode, w_dest, w_src));
+      if (auto_inc_reg_rtx)
+        REG_NOTES (insn) = alloc_EXPR_LIST (REG_INC,
+                                            auto_inc_reg_rtx,
+					    REG_NOTES (insn));
+    }
+}
+
+/* Expander for the 'move' patterns.  Emit insns to copy a value of
+   mode MODE from SRC to DEST.  */
+
+void
+xstormy16_expand_move (enum machine_mode mode, rtx dest, rtx src)
+{
+  if (MEM_P (dest) && (GET_CODE (XEXP (dest, 0)) == PRE_MODIFY))
+    {
+      rtx pmv      = XEXP (dest, 0);
+      rtx dest_reg = XEXP (pmv, 0);
+      rtx dest_mod = XEXP (pmv, 1);
+      rtx set      = gen_rtx_SET (Pmode, dest_reg, dest_mod);
+      rtx clobber  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+
+      dest = gen_rtx_MEM (mode, dest_reg);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+    }
+  else if (MEM_P (src) && (GET_CODE (XEXP (src, 0)) == PRE_MODIFY))
+    {
+      rtx pmv     = XEXP (src, 0);
+      rtx src_reg = XEXP (pmv, 0);
+      rtx src_mod = XEXP (pmv, 1);
+      rtx set     = gen_rtx_SET (Pmode, src_reg, src_mod);
+      rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+
+      src = gen_rtx_MEM (mode, src_reg);
+      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+    }
+
+  /* There are only limited immediate-to-memory move instructions.  */
+  if (! reload_in_progress
+      && ! reload_completed
+      && MEM_P (dest)
+      && (! CONST_INT_P (XEXP (dest, 0))
+	  || ! xstormy16_legitimate_address_p (mode, XEXP (dest, 0), 0))
+      && ! xstormy16_below100_operand (dest, mode)
+      && ! REG_P (src)
+      && GET_CODE (src) != SUBREG)
+    src = copy_to_mode_reg (mode, src);
+
+  /* Don't emit something we would immediately split.  */
+  if (reload_completed
+      && mode != HImode && mode != QImode)
+    {
+      xstormy16_split_move (mode, dest, src);
+      return;
+    }
+
+  emit_insn (gen_rtx_SET (VOIDmode, dest, src));
+}
+
+/* Stack Layout:
+
+   The stack is laid out as follows:
+
+SP->
+FP->	Local variables
+	Register save area (up to 4 words)
+	Argument register save area for stdarg (NUM_ARGUMENT_REGISTERS words)
+
+AP->	Return address (two words)
+	9th procedure parameter word
+	10th procedure parameter word
+	...
+	last procedure parameter word
+
+  The frame pointer location is tuned to make it most likely that all
+  parameters and local variables can be accessed using a load-indexed
+  instruction.  */
+
+/* A structure to describe the layout.  */
+struct xstormy16_stack_layout
+{
+  /* Size of the topmost three items on the stack.  */
+  int locals_size;
+  int register_save_size;
+  int stdarg_save_size;
+  /* Sum of the above items.  */
+  int frame_size;
+  /* Various offsets.  */
+  int first_local_minus_ap;
+  int sp_minus_fp;
+  int fp_minus_ap;
+};
+
+/* Does REGNO need to be saved?  */
+#define REG_NEEDS_SAVE(REGNUM, IFUN)					\
+  ((df_regs_ever_live_p (REGNUM) && ! call_used_regs[REGNUM])		\
+   || (IFUN && ! fixed_regs[REGNUM] && call_used_regs[REGNUM]		\
+       && (REGNUM != CARRY_REGNUM)					\
+       && (df_regs_ever_live_p (REGNUM) || ! current_function_is_leaf)))
+
+/* Compute the stack layout.  */
+
+struct xstormy16_stack_layout
+xstormy16_compute_stack_layout (void)
+{
+  struct xstormy16_stack_layout layout;
+  int regno;
+  const int ifun = xstormy16_interrupt_function_p ();
+
+  layout.locals_size = get_frame_size ();
+
+  layout.register_save_size = 0;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (REG_NEEDS_SAVE (regno, ifun))
+      layout.register_save_size += UNITS_PER_WORD;
+
+  if (cfun->stdarg)
+    layout.stdarg_save_size = NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD;
+  else
+    layout.stdarg_save_size = 0;
+
+  layout.frame_size = (layout.locals_size
+		       + layout.register_save_size
+		       + layout.stdarg_save_size);
+
+  if (crtl->args.size <= 2048 && crtl->args.size != -1)
+    {
+      if (layout.frame_size - INCOMING_FRAME_SP_OFFSET
+	  + crtl->args.size <= 2048)
+	layout.fp_minus_ap = layout.frame_size - INCOMING_FRAME_SP_OFFSET;
+      else
+	layout.fp_minus_ap = 2048 - crtl->args.size;
+    }
+  else
+    layout.fp_minus_ap = (layout.stdarg_save_size
+			  + layout.register_save_size
+			  - INCOMING_FRAME_SP_OFFSET);
+  layout.sp_minus_fp = (layout.frame_size - INCOMING_FRAME_SP_OFFSET
+			- layout.fp_minus_ap);
+  layout.first_local_minus_ap = layout.sp_minus_fp - layout.locals_size;
+  return layout;
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+static bool
+xstormy16_can_eliminate (const int from, const int to)
+{
+  return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
+          ? ! frame_pointer_needed
+          : true);
+}
+
+/* Determine how all the special registers get eliminated.  */
+
+int
+xstormy16_initial_elimination_offset (int from, int to)
+{
+  struct xstormy16_stack_layout layout;
+  int result;
+
+  layout = xstormy16_compute_stack_layout ();
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    result = layout.sp_minus_fp - layout.locals_size;
+  else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    result = - layout.locals_size;
+  else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    result = - layout.fp_minus_ap;
+  else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    result = - (layout.sp_minus_fp + layout.fp_minus_ap);
+  else
+    gcc_unreachable ();
+
+  return result;
+}
+
+static rtx
+emit_addhi3_postreload (rtx dest, rtx src0, rtx src1)
+{
+  rtx set, clobber, insn;
+
+  set = gen_rtx_SET (VOIDmode, dest, gen_rtx_PLUS (HImode, src0, src1));
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+  insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+  return insn;
+}
+
+/* Called after register allocation to add any instructions needed for
+   the prologue.  Using a prologue insn is favored compared to putting
+   all of the instructions in the TARGET_ASM_FUNCTION_PROLOGUE macro,
+   since it allows the scheduler to intermix instructions with the
+   saves of the caller saved registers.  In some cases, it might be
+   necessary to emit a barrier instruction as the last insn to prevent
+   such scheduling.
+
+   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+   so that the debug info generation code can handle them properly.  */
+
+void
+xstormy16_expand_prologue (void)
+{
+  struct xstormy16_stack_layout layout;
+  int regno;
+  rtx insn;
+  rtx mem_push_rtx;
+  const int ifun = xstormy16_interrupt_function_p ();
+
+  mem_push_rtx = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
+  mem_push_rtx = gen_rtx_MEM (HImode, mem_push_rtx);
+
+  layout = xstormy16_compute_stack_layout ();
+
+  if (layout.locals_size >= 32768)
+    error ("local variable memory requirements exceed capacity");
+
+  /* Save the argument registers if necessary.  */
+  if (layout.stdarg_save_size)
+    for (regno = FIRST_ARGUMENT_REGISTER;
+	 regno < FIRST_ARGUMENT_REGISTER + NUM_ARGUMENT_REGISTERS;
+	 regno++)
+      {
+	rtx dwarf;
+	rtx reg = gen_rtx_REG (HImode, regno);
+
+	insn = emit_move_insn (mem_push_rtx, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+	dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
+
+	XVECEXP (dwarf, 0, 0) = gen_rtx_SET (VOIDmode,
+					     gen_rtx_MEM (Pmode, stack_pointer_rtx),
+					     reg);
+	XVECEXP (dwarf, 0, 1) = gen_rtx_SET (Pmode, stack_pointer_rtx,
+					     plus_constant (stack_pointer_rtx,
+							    GET_MODE_SIZE (Pmode)));
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 0)) = 1;
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 1)) = 1;
+      }
+
+  /* Push each of the registers to save.  */
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (REG_NEEDS_SAVE (regno, ifun))
+      {
+	rtx dwarf;
+	rtx reg = gen_rtx_REG (HImode, regno);
+
+	insn = emit_move_insn (mem_push_rtx, reg);
+	RTX_FRAME_RELATED_P (insn) = 1;
+
+	dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
+
+	XVECEXP (dwarf, 0, 0) = gen_rtx_SET (VOIDmode,
+					     gen_rtx_MEM (Pmode, stack_pointer_rtx),
+					     reg);
+	XVECEXP (dwarf, 0, 1) = gen_rtx_SET (Pmode, stack_pointer_rtx,
+					     plus_constant (stack_pointer_rtx,
+							    GET_MODE_SIZE (Pmode)));
+	add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 0)) = 1;
+	RTX_FRAME_RELATED_P (XVECEXP (dwarf, 0, 1)) = 1;
+      }
+
+  /* It's just possible that the SP here might be what we need for
+     the new FP...  */
+  if (frame_pointer_needed && layout.sp_minus_fp == layout.locals_size)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Allocate space for local variables.  */
+  if (layout.locals_size)
+    {
+      insn = emit_addhi3_postreload (stack_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (layout.locals_size));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Set up the frame pointer, if required.  */
+  if (frame_pointer_needed && layout.sp_minus_fp != layout.locals_size)
+    {
+      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (layout.sp_minus_fp)
+	{
+	  insn = emit_addhi3_postreload (hard_frame_pointer_rtx,
+					 hard_frame_pointer_rtx,
+					 GEN_INT (- layout.sp_minus_fp));
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+}
+
+/* Do we need an epilogue at all?  */
+
+int
+direct_return (void)
+{
+  return (reload_completed
+	  && xstormy16_compute_stack_layout ().frame_size == 0
+	  && ! xstormy16_interrupt_function_p ());
+}
+
+/* Called after register allocation to add any instructions needed for
+   the epilogue.  Using an epilogue insn is favored compared to putting
+   all of the instructions in the TARGET_ASM_FUNCTION_PROLOGUE macro,
+   since it allows the scheduler to intermix instructions with the
+   saves of the caller saved registers.  In some cases, it might be
+   necessary to emit a barrier instruction as the last insn to prevent
+   such scheduling.  */
+
+void
+xstormy16_expand_epilogue (void)
+{
+  struct xstormy16_stack_layout layout;
+  rtx mem_pop_rtx;
+  int regno;
+  const int ifun = xstormy16_interrupt_function_p ();
+
+  mem_pop_rtx = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
+  mem_pop_rtx = gen_rtx_MEM (HImode, mem_pop_rtx);
+
+  layout = xstormy16_compute_stack_layout ();
+
+  /* Pop the stack for the locals.  */
+  if (layout.locals_size)
+    {
+      if (frame_pointer_needed && layout.sp_minus_fp == layout.locals_size)
+	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+      else
+	emit_addhi3_postreload (stack_pointer_rtx, stack_pointer_rtx,
+				GEN_INT (- layout.locals_size));
+    }
+
+  /* Restore any call-saved registers.  */
+  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
+    if (REG_NEEDS_SAVE (regno, ifun))
+      emit_move_insn (gen_rtx_REG (HImode, regno), mem_pop_rtx);
+
+  /* Pop the stack for the stdarg save area.  */
+  if (layout.stdarg_save_size)
+    emit_addhi3_postreload (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (- layout.stdarg_save_size));
+
+  /* Return.  */
+  if (ifun)
+    emit_jump_insn (gen_return_internal_interrupt ());
+  else
+    emit_jump_insn (gen_return_internal ());
+}
+
+int
+xstormy16_epilogue_uses (int regno)
+{
+  if (reload_completed && call_used_regs[regno])
+    {
+      const int ifun = xstormy16_interrupt_function_p ();
+      return REG_NEEDS_SAVE (regno, ifun);
+    }
+  return 0;
+}
+
+void
+xstormy16_function_profiler (void)
+{
+  sorry ("function_profiler support");
+}
+
+/* Update CUM to advance past an argument in the argument list.  The
+   values MODE, TYPE and NAMED describe that argument.  Once this is
+   done, the variable CUM is suitable for analyzing the *following*
+   argument with `TARGET_FUNCTION_ARG', etc.
+
+   This function need not do anything if the argument in question was
+   passed on the stack.  The compiler knows how to track the amount of
+   stack space used for arguments without any special help.  However,
+   it makes life easier for xstormy16_build_va_list if it does update
+   the word count.  */
+
+static void
+xstormy16_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+				const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* If an argument would otherwise be passed partially in registers,
+     and partially on the stack, the whole of it is passed on the
+     stack.  */
+  if (*cum < NUM_ARGUMENT_REGISTERS
+      && *cum + XSTORMY16_WORD_SIZE (type, mode) > NUM_ARGUMENT_REGISTERS)
+    *cum = NUM_ARGUMENT_REGISTERS;
+
+  *cum += XSTORMY16_WORD_SIZE (type, mode);
+}
+
+static rtx
+xstormy16_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  if (mode == VOIDmode)
+    return const0_rtx;
+  if (targetm.calls.must_pass_in_stack (mode, type)
+      || *cum + XSTORMY16_WORD_SIZE (type, mode) > NUM_ARGUMENT_REGISTERS)
+    return NULL_RTX;
+  return gen_rtx_REG (mode, *cum + FIRST_ARGUMENT_REGISTER);
+}
+
+/* Build the va_list type.
+
+   For this chip, va_list is a record containing a counter and a pointer.
+   The counter is of type 'int' and indicates how many bytes
+   have been used to date.  The pointer indicates the stack position
+   for arguments that have not been passed in registers.
+   To keep the layout nice, the pointer is first in the structure.  */
+
+static tree
+xstormy16_build_builtin_va_list (void)
+{
+  tree f_1, f_2, record, type_decl;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_1 = build_decl (BUILTINS_LOCATION,
+		    FIELD_DECL, get_identifier ("base"),
+		      ptr_type_node);
+  f_2 = build_decl (BUILTINS_LOCATION,
+		    FIELD_DECL, get_identifier ("count"),
+		      unsigned_type_node);
+
+  DECL_FIELD_CONTEXT (f_1) = record;
+  DECL_FIELD_CONTEXT (f_2) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_1;
+  DECL_CHAIN (f_1) = f_2;
+
+  layout_type (record);
+
+  return record;
+}
+
+/* Implement the stdarg/varargs va_start macro.  STDARG_P is nonzero if this
+   is stdarg.h instead of varargs.h.  VALIST is the tree of the va_list
+   variable to initialize.  NEXTARG is the machine independent notion of the
+   'next' argument after the variable arguments.  */
+
+static void
+xstormy16_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_base, f_count;
+  tree base, count;
+  tree t,u;
+
+  if (xstormy16_interrupt_function_p ())
+    error ("cannot use va_start in interrupt function");
+
+  f_base = TYPE_FIELDS (va_list_type_node);
+  f_count = DECL_CHAIN (f_base);
+
+  base = build3 (COMPONENT_REF, TREE_TYPE (f_base), valist, f_base, NULL_TREE);
+  count = build3 (COMPONENT_REF, TREE_TYPE (f_count), valist, f_count,
+		  NULL_TREE);
+
+  t = make_tree (TREE_TYPE (base), virtual_incoming_args_rtx);
+  u = build_int_cst (NULL_TREE, - INCOMING_FRAME_SP_OFFSET);
+  u = fold_convert (TREE_TYPE (count), u);
+  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), t, u);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (base), base, t);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  t = build2 (MODIFY_EXPR, TREE_TYPE (count), count,
+	      build_int_cst (NULL_TREE,
+			     crtl->args.info * UNITS_PER_WORD));
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* Implement the stdarg/varargs va_arg macro.  VALIST is the variable
+   of type va_list as a tree, TYPE is the type passed to va_arg.
+   Note:  This algorithm is documented in stormy-abi.  */
+
+static tree
+xstormy16_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+				gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_base, f_count;
+  tree base, count;
+  tree count_tmp, addr, t;
+  tree lab_gotaddr, lab_fromstack;
+  int size, size_of_reg_args, must_stack;
+  tree size_tree;
+
+  f_base = TYPE_FIELDS (va_list_type_node);
+  f_count = DECL_CHAIN (f_base);
+
+  base = build3 (COMPONENT_REF, TREE_TYPE (f_base), valist, f_base, NULL_TREE);
+  count = build3 (COMPONENT_REF, TREE_TYPE (f_count), valist, f_count,
+		  NULL_TREE);
+
+  must_stack = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
+  size_tree = round_up (size_in_bytes (type), UNITS_PER_WORD);
+  gimplify_expr (&size_tree, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+  size_of_reg_args = NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD;
+
+  count_tmp = get_initialized_tmp_var (count, pre_p, NULL);
+  lab_gotaddr = create_artificial_label (UNKNOWN_LOCATION);
+  lab_fromstack = create_artificial_label (UNKNOWN_LOCATION);
+  addr = create_tmp_var (ptr_type_node, NULL);
+
+  if (!must_stack)
+    {
+      tree r;
+
+      t = fold_convert (TREE_TYPE (count), size_tree);
+      t = build2 (PLUS_EXPR, TREE_TYPE (count), count_tmp, t);
+      r = fold_convert (TREE_TYPE (count), size_int (size_of_reg_args));
+      t = build2 (GT_EXPR, boolean_type_node, t, r);
+      t = build3 (COND_EXPR, void_type_node, t,
+		  build1 (GOTO_EXPR, void_type_node, lab_fromstack),
+		  NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      t = build2 (POINTER_PLUS_EXPR, ptr_type_node, base, count_tmp);
+      gimplify_assign (addr, t, pre_p);
+
+      t = build1 (GOTO_EXPR, void_type_node, lab_gotaddr);
+      gimplify_and_add (t, pre_p);
+
+      t = build1 (LABEL_EXPR, void_type_node, lab_fromstack);
+      gimplify_and_add (t, pre_p);
+    }
+
+  /* Arguments larger than a word might need to skip over some
+     registers, since arguments are either passed entirely in
+     registers or entirely on the stack.  */
+  size = PUSH_ROUNDING (int_size_in_bytes (type));
+  if (size > 2 || size < 0 || must_stack)
+    {
+      tree r, u;
+
+      r = size_int (NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD);
+      u = build2 (MODIFY_EXPR, TREE_TYPE (count_tmp), count_tmp, r);
+
+      t = fold_convert (TREE_TYPE (count), r);
+      t = build2 (GE_EXPR, boolean_type_node, count_tmp, t);
+      t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, u);
+      gimplify_and_add (t, pre_p);
+    }
+
+  t = size_int (NUM_ARGUMENT_REGISTERS * UNITS_PER_WORD
+		+ INCOMING_FRAME_SP_OFFSET);
+  t = fold_convert (TREE_TYPE (count), t);
+  t = build2 (MINUS_EXPR, TREE_TYPE (count), count_tmp, t);
+  t = build2 (PLUS_EXPR, TREE_TYPE (count), t,
+	      fold_convert (TREE_TYPE (count), size_tree));
+  t = fold_convert (TREE_TYPE (t), fold (t));
+  t = fold_build1 (NEGATE_EXPR, TREE_TYPE (t), t);
+  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
+  gimplify_assign (addr, t, pre_p);
+
+  t = build1 (LABEL_EXPR, void_type_node, lab_gotaddr);
+  gimplify_and_add (t, pre_p);
+
+  t = fold_convert (TREE_TYPE (count), size_tree);
+  t = build2 (PLUS_EXPR, TREE_TYPE (count), count_tmp, t);
+  gimplify_assign (count, t, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+xstormy16_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+{
+  rtx temp = gen_reg_rtx (HImode);
+  rtx reg_fnaddr = gen_reg_rtx (HImode);
+  rtx reg_addr, reg_addr_mem;
+
+  reg_addr = copy_to_reg (XEXP (m_tramp, 0));
+  reg_addr_mem = adjust_automodify_address (m_tramp, HImode, reg_addr, 0);
+
+  emit_move_insn (temp, GEN_INT (0x3130 | STATIC_CHAIN_REGNUM));
+  emit_move_insn (reg_addr_mem, temp);
+  emit_insn (gen_addhi3 (reg_addr, reg_addr, const2_rtx));
+  reg_addr_mem = adjust_automodify_address (reg_addr_mem, VOIDmode, NULL, 2);
+
+  emit_move_insn (temp, static_chain);
+  emit_move_insn (reg_addr_mem, temp);
+  emit_insn (gen_addhi3 (reg_addr, reg_addr, const2_rtx));
+  reg_addr_mem = adjust_automodify_address (reg_addr_mem, VOIDmode, NULL, 2);
+
+  emit_move_insn (reg_fnaddr, XEXP (DECL_RTL (fndecl), 0));
+  emit_move_insn (temp, reg_fnaddr);
+  emit_insn (gen_andhi3 (temp, temp, GEN_INT (0xFF)));
+  emit_insn (gen_iorhi3 (temp, temp, GEN_INT (0x0200)));
+  emit_move_insn (reg_addr_mem, temp);
+  emit_insn (gen_addhi3 (reg_addr, reg_addr, const2_rtx));
+  reg_addr_mem = adjust_automodify_address (reg_addr_mem, VOIDmode, NULL, 2);
+
+  emit_insn (gen_lshrhi3 (reg_fnaddr, reg_fnaddr, GEN_INT (8)));
+  emit_move_insn (reg_addr_mem, reg_fnaddr);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+static rtx
+xstormy16_function_value (const_tree valtype,
+			  const_tree func ATTRIBUTE_UNUSED,
+			  bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode;
+  mode = TYPE_MODE (valtype);
+  PROMOTE_MODE (mode, 0, valtype);
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+xstormy16_libcall_value (enum machine_mode mode,
+			 const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RETURN_VALUE_REGNUM);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+xstormy16_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == RETURN_VALUE_REGNUM);
+}
+
+/* A C compound statement that outputs the assembler code for a thunk function,
+   used to implement C++ virtual function calls with multiple inheritance.  The
+   thunk acts as a wrapper around a virtual function, adjusting the implicit
+   object parameter before handing control off to the real function.
+
+   First, emit code to add the integer DELTA to the location that contains the
+   incoming first argument.  Assume that this argument contains a pointer, and
+   is the one used to pass the `this' pointer in C++.  This is the incoming
+   argument *before* the function prologue, e.g. `%o0' on a sparc.  The
+   addition must preserve the values of all other incoming arguments.
+
+   After the addition, emit code to jump to FUNCTION, which is a
+   `FUNCTION_DECL'.  This is a direct pure jump, not a call, and does not touch
+   the return address.  Hence returning from FUNCTION will return to whoever
+   called the current `thunk'.
+
+   The effect must be as if @var{function} had been called directly
+   with the adjusted first argument.  This macro is responsible for
+   emitting all of the code for a thunk function;
+   TARGET_ASM_FUNCTION_PROLOGUE and TARGET_ASM_FUNCTION_EPILOGUE are
+   not invoked.
+
+   The THUNK_FNDECL is redundant.  (DELTA and FUNCTION have already been
+   extracted from it.)  It might possibly be useful on some targets, but
+   probably not.  */
+
+static void
+xstormy16_asm_output_mi_thunk (FILE *file,
+			       tree thunk_fndecl ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT delta,
+			       HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+			       tree function)
+{
+  int regnum = FIRST_ARGUMENT_REGISTER;
+
+  /* There might be a hidden first argument for a returned structure.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    regnum += 1;
+
+  fprintf (file, "\tadd %s,#0x%x\n", reg_names[regnum], (int) delta & 0xFFFF);
+  fputs ("\tjmpf ", file);
+  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+  putc ('\n', file);
+}
+
+/* The purpose of this function is to override the default behavior of
+   BSS objects.  Normally, they go into .bss or .sbss via ".common"
+   directives, but we need to override that and put them in
+   .bss_below100.  We can't just use a section override (like we do
+   for .data_below100), because that makes them initialized rather
+   than uninitialized.  */
+
+void
+xstormy16_asm_output_aligned_common (FILE *stream,
+				     tree decl,
+				     const char *name,
+				     int size,
+				     int align,
+				     int global)
+{
+  rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl);
+  rtx symbol;
+
+  if (mem != NULL_RTX
+      && MEM_P (mem)
+      && GET_CODE (symbol = XEXP (mem, 0)) == SYMBOL_REF
+      && SYMBOL_REF_FLAGS (symbol) & SYMBOL_FLAG_XSTORMY16_BELOW100)
+    {
+      const char *name2;
+      int p2align = 0;
+
+      switch_to_section (bss100_section);
+
+      while (align > 8)
+	{
+	  align /= 2;
+	  p2align ++;
+	}
+
+      name2 = default_strip_name_encoding (name);
+      if (global)
+	fprintf (stream, "\t.globl\t%s\n", name2);
+      if (p2align)
+	fprintf (stream, "\t.p2align %d\n", p2align);
+      fprintf (stream, "\t.type\t%s, @object\n", name2);
+      fprintf (stream, "\t.size\t%s, %d\n", name2, size);
+      fprintf (stream, "%s:\n\t.space\t%d\n", name2, size);
+      return;
+    }
+
+  if (!global)
+    {
+      fprintf (stream, "\t.local\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
+    }
+  fprintf (stream, "\t.comm\t");
+  assemble_name (stream, name);
+  fprintf (stream, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+xstormy16_asm_init_sections (void)
+{
+  bss100_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS,
+			   output_section_asm_op,
+			   "\t.section \".bss_below100\",\"aw\",@nobits");
+}
+
+/* Mark symbols with the "below100" attribute so that we can use the
+   special addressing modes for them.  */
+
+static void
+xstormy16_encode_section_info (tree decl, rtx r, int first)
+{
+  default_encode_section_info (decl, r, first);
+
+   if (TREE_CODE (decl) == VAR_DECL
+      && (lookup_attribute ("below100", DECL_ATTRIBUTES (decl))
+	  || lookup_attribute ("BELOW100", DECL_ATTRIBUTES (decl))))
+    {
+      rtx symbol = XEXP (r, 0);
+
+      gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+      SYMBOL_REF_FLAGS (symbol) |= SYMBOL_FLAG_XSTORMY16_BELOW100;
+    }
+}
+
+#undef  TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR  xstormy16_asm_out_constructor
+#undef  TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR   xstormy16_asm_out_destructor
+
+/* Output constructors and destructors.  Just like
+   default_named_section_asm_out_* but don't set the sections writable.  */
+
+static void
+xstormy16_asm_out_destructor (rtx symbol, int priority)
+{
+  const char *section = ".dtors";
+  char buf[16];
+
+  /* ??? This only works reliably with the GNU linker.  */
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".dtors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, 0, NULL));
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+static void
+xstormy16_asm_out_constructor (rtx symbol, int priority)
+{
+  const char *section = ".ctors";
+  char buf[16];
+
+  /* ??? This only works reliably with the GNU linker.  */
+  if (priority != DEFAULT_INIT_PRIORITY)
+    {
+      sprintf (buf, ".ctors.%.5u",
+	       /* Invert the numbering so the linker puts us in the proper
+		  order; constructors are run from right to left, and the
+		  linker sorts in increasing order.  */
+	       MAX_INIT_PRIORITY - priority);
+      section = buf;
+    }
+
+  switch_to_section (get_section (section, 0, NULL));
+  assemble_align (POINTER_SIZE);
+  assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+/* Worker function for TARGET_PRINT_OPERAND_ADDRESS.
+
+   Print a memory address as an operand to reference that memory location.  */
+
+static void
+xstormy16_print_operand_address (FILE *file, rtx address)
+{
+  HOST_WIDE_INT offset;
+  int pre_dec, post_inc;
+
+  /* There are a few easy cases.  */
+  if (CONST_INT_P (address))
+    {
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (address) & 0xFFFF);
+      return;
+    }
+
+  if (CONSTANT_P (address) || LABEL_P (address))
+    {
+      output_addr_const (file, address);
+      return;
+    }
+
+  /* Otherwise, it's hopefully something of the form
+     (plus:HI (pre_dec:HI (reg:HI ...)) (const_int ...)).  */
+  if (GET_CODE (address) == PLUS)
+    {
+      gcc_assert (CONST_INT_P (XEXP (address, 1)));
+      offset = INTVAL (XEXP (address, 1));
+      address = XEXP (address, 0);
+    }
+  else
+    offset = 0;
+
+  pre_dec = (GET_CODE (address) == PRE_DEC);
+  post_inc = (GET_CODE (address) == POST_INC);
+  if (pre_dec || post_inc)
+    address = XEXP (address, 0);
+
+  gcc_assert (REG_P (address));
+
+  fputc ('(', file);
+  if (pre_dec)
+    fputs ("--", file);
+  fputs (reg_names [REGNO (address)], file);
+  if (post_inc)
+    fputs ("++", file);
+  if (offset != 0)
+    fprintf (file, "," HOST_WIDE_INT_PRINT_DEC, offset);
+  fputc (')', file);
+}
+
+/* Worker function for TARGET_PRINT_OPERAND.
+
+   Print an operand to an assembler instruction.  */
+
+static void
+xstormy16_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'B':
+	/* There is either one bit set, or one bit clear, in X.
+	   Print it preceded by '#'.  */
+      {
+	static int bits_set[8] = { 0, 1, 1, 2, 1, 2, 2, 3 };
+	HOST_WIDE_INT xx = 1;
+	HOST_WIDE_INT l;
+
+	if (CONST_INT_P (x))
+	  xx = INTVAL (x);
+	else
+	  output_operand_lossage ("'B' operand is not constant");
+
+	/* GCC sign-extends masks with the MSB set, so we have to
+	   detect all the cases that differ only in sign extension
+	   beyond the bits we care about.  Normally, the predicates
+	   and constraints ensure that we have the right values.  This
+	   works correctly for valid masks.  */
+	if (bits_set[xx & 7] <= 1)
+	  {
+	    /* Remove sign extension bits.  */
+	    if ((~xx & ~(HOST_WIDE_INT)0xff) == 0)
+	      xx &= 0xff;
+	    else if ((~xx & ~(HOST_WIDE_INT)0xffff) == 0)
+	      xx &= 0xffff;
+	    l = exact_log2 (xx);
+	  }
+	else
+	  {
+	    /* Add sign extension bits.  */
+	    if ((xx & ~(HOST_WIDE_INT)0xff) == 0)
+	      xx |= ~(HOST_WIDE_INT)0xff;
+	    else if ((xx & ~(HOST_WIDE_INT)0xffff) == 0)
+	      xx |= ~(HOST_WIDE_INT)0xffff;
+	    l = exact_log2 (~xx);
+	  }
+
+	if (l == -1)
+	  output_operand_lossage ("'B' operand has multiple bits set");
+
+	fprintf (file, IMMEDIATE_PREFIX HOST_WIDE_INT_PRINT_DEC, l);
+	return;
+      }
+
+    case 'C':
+      /* Print the symbol without a surrounding @fptr().  */
+      if (GET_CODE (x) == SYMBOL_REF)
+	assemble_name (file, XSTR (x, 0));
+      else if (LABEL_P (x))
+	output_asm_label (x);
+      else
+	xstormy16_print_operand_address (file, x);
+      return;
+
+    case 'o':
+    case 'O':
+      /* Print the immediate operand less one, preceded by '#'.
+         For 'O', negate it first.  */
+      {
+	HOST_WIDE_INT xx = 0;
+
+	if (CONST_INT_P (x))
+	  xx = INTVAL (x);
+	else
+	  output_operand_lossage ("'o' operand is not constant");
+
+	if (code == 'O')
+	  xx = -xx;
+
+	fprintf (file, IMMEDIATE_PREFIX HOST_WIDE_INT_PRINT_DEC, xx - 1);
+	return;
+      }
+
+    case 'b':
+      /* Print the shift mask for bp/bn.  */
+      {
+	HOST_WIDE_INT xx = 1;
+	HOST_WIDE_INT l;
+
+	if (CONST_INT_P (x))
+	  xx = INTVAL (x);
+	else
+	  output_operand_lossage ("'B' operand is not constant");
+
+	l = 7 - xx;
+
+	fputs (IMMEDIATE_PREFIX, file);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, l);
+	return;
+      }
+
+    case 0:
+      /* Handled below.  */
+      break;
+
+    default:
+      output_operand_lossage ("xstormy16_print_operand: unknown code");
+      return;
+    }
+
+  switch (GET_CODE (x))
+    {
+    case REG:
+      fputs (reg_names [REGNO (x)], file);
+      break;
+
+    case MEM:
+      xstormy16_print_operand_address (file, XEXP (x, 0));
+      break;
+
+    default:
+      /* Some kind of constant or label; an immediate operand,
+         so prefix it with '#' for the assembler.  */
+      fputs (IMMEDIATE_PREFIX, file);
+      output_addr_const (file, x);
+      break;
+    }
+
+  return;
+}
+
+/* Expander for the `casesi' pattern.
+   INDEX is the index of the switch statement.
+   LOWER_BOUND is a CONST_INT that is the value of INDEX corresponding
+     to the first table entry.
+   RANGE is the number of table entries.
+   TABLE is an ADDR_VEC that is the jump table.
+   DEFAULT_LABEL is the address to branch to if INDEX is outside the
+     range LOWER_BOUND to LOWER_BOUND + RANGE - 1.  */
+
+void
+xstormy16_expand_casesi (rtx index, rtx lower_bound, rtx range,
+			 rtx table, rtx default_label)
+{
+  HOST_WIDE_INT range_i = INTVAL (range);
+  rtx int_index;
+
+  /* This code uses 'br', so it can deal only with tables of size up to
+     8192 entries.  */
+  if (range_i >= 8192)
+    sorry ("switch statement of size %lu entries too large",
+	   (unsigned long) range_i);
+
+  index = expand_binop (SImode, sub_optab, index, lower_bound, NULL_RTX, 0,
+			OPTAB_LIB_WIDEN);
+  emit_cmp_and_jump_insns (index, range, GTU, NULL_RTX, SImode, 1,
+			   default_label);
+  int_index = gen_lowpart_common (HImode, index);
+  emit_insn (gen_ashlhi3 (int_index, int_index, const2_rtx));
+  emit_jump_insn (gen_tablejump_pcrel (int_index, table));
+}
+
+/* Output an ADDR_VEC.  It is output as a sequence of 'jmpf'
+   instructions, without label or alignment or any other special
+   constructs.  We know that the previous instruction will be the
+   `tablejump_pcrel' output above.
+
+   TODO: it might be nice to output 'br' instructions if they could
+   all reach.  */
+
+void
+xstormy16_output_addr_vec (FILE *file, rtx label ATTRIBUTE_UNUSED, rtx table)
+{
+  int vlen, idx;
+
+  switch_to_section (current_function_section ());
+
+  vlen = XVECLEN (table, 0);
+  for (idx = 0; idx < vlen; idx++)
+    {
+      fputs ("\tjmpf ", file);
+      output_asm_label (XEXP (XVECEXP (table, 0, idx), 0));
+      fputc ('\n', file);
+    }
+}
+
+/* Expander for the `call' patterns.
+   RETVAL is the RTL for the return register or NULL for void functions.
+   DEST is the function to call, expressed as a MEM.
+   COUNTER is ignored.  */
+
+void
+xstormy16_expand_call (rtx retval, rtx dest, rtx counter)
+{
+  rtx call, temp;
+  enum machine_mode mode;
+
+  gcc_assert (MEM_P (dest));
+  dest = XEXP (dest, 0);
+
+  if (! CONSTANT_P (dest) && ! REG_P (dest))
+    dest = force_reg (Pmode, dest);
+
+  if (retval == NULL)
+    mode = VOIDmode;
+  else
+    mode = GET_MODE (retval);
+
+  call = gen_rtx_CALL (mode, gen_rtx_MEM (FUNCTION_MODE, dest),
+		       counter);
+  if (retval)
+    call = gen_rtx_SET (VOIDmode, retval, call);
+
+  if (! CONSTANT_P (dest))
+    {
+      temp = gen_reg_rtx (HImode);
+      emit_move_insn (temp, const0_rtx);
+    }
+  else
+    temp = const0_rtx;
+
+  call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call,
+						gen_rtx_USE (VOIDmode, temp)));
+  emit_call_insn (call);
+}
+
+/* Expanders for multiword computational operations.  */
+
+/* Expander for arithmetic operations; emit insns to compute
+
+   (set DEST (CODE:MODE SRC0 SRC1))
+
+   When CODE is COMPARE, a branch template is generated
+   (this saves duplicating code in xstormy16_split_cbranch).  */
+
+void
+xstormy16_expand_arith (enum machine_mode mode, enum rtx_code code,
+			rtx dest, rtx src0, rtx src1)
+{
+  int num_words = GET_MODE_BITSIZE (mode) / BITS_PER_WORD;
+  int i;
+  int firstloop = 1;
+
+  if (code == NEG)
+    emit_move_insn (src0, const0_rtx);
+
+  for (i = 0; i < num_words; i++)
+    {
+      rtx w_src0, w_src1, w_dest;
+      rtx insn;
+
+      w_src0 = simplify_gen_subreg (word_mode, src0, mode,
+				    i * UNITS_PER_WORD);
+      w_src1 = simplify_gen_subreg (word_mode, src1, mode, i * UNITS_PER_WORD);
+      w_dest = simplify_gen_subreg (word_mode, dest, mode, i * UNITS_PER_WORD);
+
+      switch (code)
+	{
+	case PLUS:
+	  if (firstloop
+	      && CONST_INT_P (w_src1)
+	      && INTVAL (w_src1) == 0)
+	    continue;
+
+	  if (firstloop)
+	    insn = gen_addchi4 (w_dest, w_src0, w_src1);
+	  else
+	    insn = gen_addchi5 (w_dest, w_src0, w_src1);
+	  break;
+
+	case NEG:
+	case MINUS:
+	case COMPARE:
+	  if (code == COMPARE && i == num_words - 1)
+	    {
+	      rtx branch, sub, clobber, sub_1;
+
+	      sub_1 = gen_rtx_MINUS (HImode, w_src0,
+				     gen_rtx_ZERO_EXTEND (HImode, gen_rtx_REG (BImode, CARRY_REGNUM)));
+	      sub = gen_rtx_SET (VOIDmode, w_dest,
+				 gen_rtx_MINUS (HImode, sub_1, w_src1));
+	      clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (BImode, CARRY_REGNUM));
+	      branch = gen_rtx_SET (VOIDmode, pc_rtx,
+				    gen_rtx_IF_THEN_ELSE (VOIDmode,
+							  gen_rtx_EQ (HImode,
+								      sub_1,
+								      w_src1),
+							  pc_rtx,
+							  pc_rtx));
+	      insn = gen_rtx_PARALLEL (VOIDmode,
+				       gen_rtvec (3, branch, sub, clobber));
+	    }
+	  else if (firstloop
+		   && code != COMPARE
+		   && CONST_INT_P (w_src1)
+		   && INTVAL (w_src1) == 0)
+	    continue;
+	  else if (firstloop)
+	    insn = gen_subchi4 (w_dest, w_src0, w_src1);
+	  else
+	    insn = gen_subchi5 (w_dest, w_src0, w_src1);
+	  break;
+
+	case IOR:
+	case XOR:
+	case AND:
+	  if (CONST_INT_P (w_src1)
+	      && INTVAL (w_src1) == -(code == AND))
+	    continue;
+
+	  insn = gen_rtx_SET (VOIDmode, w_dest, gen_rtx_fmt_ee (code, mode,
+								w_src0, w_src1));
+	  break;
+
+	case NOT:
+	  insn = gen_rtx_SET (VOIDmode, w_dest, gen_rtx_NOT (mode, w_src0));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      firstloop = 0;
+      emit (insn);
+    }
+
+  /* If we emit nothing, try_split() will think we failed.  So emit
+     something that does nothing and can be optimized away.  */
+  if (firstloop)
+    emit (gen_nop ());
+}
+
+/* The shift operations are split at output time for constant values;
+   variable-width shifts get handed off to a library routine.
+
+   Generate an output string to do (set X (CODE:MODE X SIZE_R))
+   SIZE_R will be a CONST_INT, X will be a hard register.  */
+
+const char *
+xstormy16_output_shift (enum machine_mode mode, enum rtx_code code,
+			rtx x, rtx size_r, rtx temp)
+{
+  HOST_WIDE_INT size;
+  const char *r0, *r1, *rt;
+  static char r[64];
+
+  gcc_assert (CONST_INT_P (size_r)
+	      && REG_P (x)
+	      && mode == SImode);
+
+  size = INTVAL (size_r) & (GET_MODE_BITSIZE (mode) - 1);
+
+  if (size == 0)
+    return "";
+
+  r0 = reg_names [REGNO (x)];
+  r1 = reg_names [REGNO (x) + 1];
+
+  /* For shifts of size 1, we can use the rotate instructions.  */
+  if (size == 1)
+    {
+      switch (code)
+	{
+	case ASHIFT:
+	  sprintf (r, "shl %s,#1 | rlc %s,#1", r0, r1);
+	  break;
+	case ASHIFTRT:
+	  sprintf (r, "asr %s,#1 | rrc %s,#1", r1, r0);
+	  break;
+	case LSHIFTRT:
+	  sprintf (r, "shr %s,#1 | rrc %s,#1", r1, r0);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return r;
+    }
+
+  /* For large shifts, there are easy special cases.  */
+  if (size == 16)
+    {
+      switch (code)
+	{
+	case ASHIFT:
+	  sprintf (r, "mov %s,%s | mov %s,#0", r1, r0, r0);
+	  break;
+	case ASHIFTRT:
+	  sprintf (r, "mov %s,%s | asr %s,#15", r0, r1, r1);
+	  break;
+	case LSHIFTRT:
+	  sprintf (r, "mov %s,%s | mov %s,#0", r0, r1, r1);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return r;
+    }
+  if (size > 16)
+    {
+      switch (code)
+	{
+	case ASHIFT:
+	  sprintf (r, "mov %s,%s | mov %s,#0 | shl %s,#%d",
+		   r1, r0, r0, r1, (int) size - 16);
+	  break;
+	case ASHIFTRT:
+	  sprintf (r, "mov %s,%s | asr %s,#15 | asr %s,#%d",
+		   r0, r1, r1, r0, (int) size - 16);
+	  break;
+	case LSHIFTRT:
+	  sprintf (r, "mov %s,%s | mov %s,#0 | shr %s,#%d",
+		   r0, r1, r1, r0, (int) size - 16);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return r;
+    }
+
+  /* For the rest, we have to do more work.  In particular, we
+     need a temporary.  */
+  rt = reg_names [REGNO (temp)];
+  switch (code)
+    {
+    case ASHIFT:
+      sprintf (r,
+	       "mov %s,%s | shl %s,#%d | shl %s,#%d | shr %s,#%d | or %s,%s",
+	       rt, r0, r0, (int) size, r1, (int) size, rt, (int) (16 - size),
+	       r1, rt);
+      break;
+    case ASHIFTRT:
+      sprintf (r,
+	       "mov %s,%s | asr %s,#%d | shr %s,#%d | shl %s,#%d | or %s,%s",
+	       rt, r1, r1, (int) size, r0, (int) size, rt, (int) (16 - size),
+	       r0, rt);
+      break;
+    case LSHIFTRT:
+      sprintf (r,
+	       "mov %s,%s | shr %s,#%d | shr %s,#%d | shl %s,#%d | or %s,%s",
+	       rt, r1, r1, (int) size, r0, (int) size, rt, (int) (16 - size),
+	       r0, rt);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return r;
+}
+
+/* Attribute handling.  */
+
+/* Return nonzero if the function is an interrupt function.  */
+
+int
+xstormy16_interrupt_function_p (void)
+{
+  tree attributes;
+
+  /* The dwarf2 mechanism asks for INCOMING_FRAME_SP_OFFSET before
+     any functions are declared, which is demonstrably wrong, but
+     it is worked around here.  FIXME.  */
+  if (!cfun)
+    return 0;
+
+  attributes = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+  return lookup_attribute ("interrupt", attributes) != NULL_TREE;
+}
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE  xstormy16_attribute_table
+
+static tree xstormy16_handle_interrupt_attribute
+  (tree *, tree, tree, int, bool *);
+static tree xstormy16_handle_below100_attribute
+  (tree *, tree, tree, int, bool *);
+
+static const struct attribute_spec xstormy16_attribute_table[] =
+{
+  /* name, min_len, max_len, decl_req, type_req, fn_type_req, handler.  */
+  { "interrupt", 0, 0, false, true,  true,  xstormy16_handle_interrupt_attribute },
+  { "BELOW100",  0, 0, false, false, false, xstormy16_handle_below100_attribute },
+  { "below100",  0, 0, false, false, false, xstormy16_handle_below100_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
+
+/* Handle an "interrupt" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+xstormy16_handle_interrupt_attribute (tree *node, tree name,
+				      tree args ATTRIBUTE_UNUSED,
+				      int flags ATTRIBUTE_UNUSED,
+				      bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "below" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+xstormy16_handle_below100_attribute (tree *node,
+				     tree name ATTRIBUTE_UNUSED,
+				     tree args ATTRIBUTE_UNUSED,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != VAR_DECL
+      && TREE_CODE (*node) != POINTER_TYPE
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes,
+	       "%<__BELOW100__%> attribute only applies to variables");
+      *no_add_attrs = true;
+    }
+  else if (args == NULL_TREE && TREE_CODE (*node) == VAR_DECL)
+    {
+      if (! (TREE_PUBLIC (*node) || TREE_STATIC (*node)))
+	{
+	  warning (OPT_Wattributes, "__BELOW100__ attribute not allowed "
+		   "with auto storage class");
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS   xstormy16_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN  xstormy16_expand_builtin
+
+static struct
+{
+  const char * name;
+  int          md_code;
+  const char * arg_ops;   /* 0..9, t for temp register, r for return value.  */
+  const char * arg_types; /* s=short,l=long, upper case for unsigned.  */
+}
+  s16builtins[] =
+{
+  { "__sdivlh", CODE_FOR_sdivlh, "rt01", "sls" },
+  { "__smodlh", CODE_FOR_sdivlh, "tr01", "sls" },
+  { "__udivlh", CODE_FOR_udivlh, "rt01", "SLS" },
+  { "__umodlh", CODE_FOR_udivlh, "tr01", "SLS" },
+  { NULL, 0, NULL, NULL }
+};
+
+static void
+xstormy16_init_builtins (void)
+{
+  tree args, ret_type, arg;
+  int i, a;
+
+  ret_type = void_type_node;
+
+  for (i = 0; s16builtins[i].name; i++)
+    {
+      args = void_list_node;
+      for (a = strlen (s16builtins[i].arg_types) - 1; a >= 0; a--)
+	{
+	  switch (s16builtins[i].arg_types[a])
+	    {
+	    case 's': arg = short_integer_type_node; break;
+	    case 'S': arg = short_unsigned_type_node; break;
+	    case 'l': arg = long_integer_type_node; break;
+	    case 'L': arg = long_unsigned_type_node; break;
+	    default: gcc_unreachable ();
+	    }
+	  if (a == 0)
+	    ret_type = arg;
+	  else
+	    args = tree_cons (NULL_TREE, arg, args);
+	}
+      add_builtin_function (s16builtins[i].name,
+			    build_function_type (ret_type, args),
+			    i, BUILT_IN_MD, NULL, NULL);
+    }
+}
+
+static rtx
+xstormy16_expand_builtin (tree exp, rtx target,
+			  rtx subtarget ATTRIBUTE_UNUSED,
+			  enum machine_mode mode ATTRIBUTE_UNUSED,
+			  int ignore ATTRIBUTE_UNUSED)
+{
+  rtx op[10], args[10], pat, copyto[10], retval = 0;
+  tree fndecl, argtree;
+  int i, a, o, code;
+
+  fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  argtree = TREE_OPERAND (exp, 1);
+  i = DECL_FUNCTION_CODE (fndecl);
+  code = s16builtins[i].md_code;
+
+  for (a = 0; a < 10 && argtree; a++)
+    {
+      args[a] = expand_normal (TREE_VALUE (argtree));
+      argtree = TREE_CHAIN (argtree);
+    }
+
+  for (o = 0; s16builtins[i].arg_ops[o]; o++)
+    {
+      char ao = s16builtins[i].arg_ops[o];
+      char c = insn_data[code].operand[o].constraint[0];
+      enum machine_mode omode;
+
+      copyto[o] = 0;
+
+      omode = (enum machine_mode) insn_data[code].operand[o].mode;
+      if (ao == 'r')
+	op[o] = target ? target : gen_reg_rtx (omode);
+      else if (ao == 't')
+	op[o] = gen_reg_rtx (omode);
+      else
+	op[o] = args[(int) hex_value (ao)];
+
+      if (! (*insn_data[code].operand[o].predicate) (op[o], GET_MODE (op[o])))
+	{
+	  if (c == '+' || c == '=')
+	    {
+	      copyto[o] = op[o];
+	      op[o] = gen_reg_rtx (omode);
+	    }
+	  else
+	    op[o] = copy_to_mode_reg (omode, op[o]);
+	}
+
+      if (ao == 'r')
+	retval = op[o];
+    }
+
+  pat = GEN_FCN (code) (op[0], op[1], op[2], op[3], op[4],
+			op[5], op[6], op[7], op[8], op[9]);
+  emit_insn (pat);
+
+  for (o = 0; s16builtins[i].arg_ops[o]; o++)
+    if (copyto[o])
+      {
+	emit_move_insn (copyto[o], op[o]);
+	if (op[o] == retval)
+	  retval = copyto[o];
+      }
+
+  return retval;
+}
+
+/* Look for combinations of insns that can be converted to BN or BP
+   opcodes.  This is, unfortunately, too complex to do with MD
+   patterns.  */
+
+static void
+combine_bnp (rtx insn)
+{
+  int insn_code, regno, need_extend;
+  unsigned int mask;
+  rtx cond, reg, and_insn, load, qireg, mem;
+  enum machine_mode load_mode = QImode;
+  enum machine_mode and_mode = QImode;
+  rtx shift = NULL_RTX;
+
+  insn_code = recog_memoized (insn);
+  if (insn_code != CODE_FOR_cbranchhi
+      && insn_code != CODE_FOR_cbranchhi_neg)
+    return;
+
+  cond = XVECEXP (PATTERN (insn), 0, 0); /* set */
+  cond = XEXP (cond, 1); /* if */
+  cond = XEXP (cond, 0); /* cond */
+  switch (GET_CODE (cond))
+    {
+    case NE:
+    case EQ:
+      need_extend = 0;
+      break;
+    case LT:
+    case GE:
+      need_extend = 1;
+      break;
+    default:
+      return;
+    }
+
+  reg = XEXP (cond, 0);
+  if (! REG_P (reg))
+    return;
+  regno = REGNO (reg);
+  if (XEXP (cond, 1) != const0_rtx)
+    return;
+  if (! find_regno_note (insn, REG_DEAD, regno))
+    return;
+  qireg = gen_rtx_REG (QImode, regno);
+
+  if (need_extend)
+    {
+      /* LT and GE conditionals should have a sign extend before
+	 them.  */
+      for (and_insn = prev_real_insn (insn); and_insn;
+	   and_insn = prev_real_insn (and_insn))
+	{
+	  int and_code = recog_memoized (and_insn);
+
+	  if (and_code == CODE_FOR_extendqihi2
+	      && rtx_equal_p (SET_DEST (PATTERN (and_insn)), reg)
+	      && rtx_equal_p (XEXP (SET_SRC (PATTERN (and_insn)), 0), qireg))
+	    break;
+
+	  if (and_code == CODE_FOR_movhi_internal
+	      && rtx_equal_p (SET_DEST (PATTERN (and_insn)), reg))
+	    {
+	      /* This is for testing bit 15.  */
+	      and_insn = insn;
+	      break;
+	    }
+
+	  if (reg_mentioned_p (reg, and_insn))
+	    return;
+
+	  if (GET_CODE (and_insn) != NOTE
+	      && GET_CODE (and_insn) != INSN)
+	    return;
+	}
+    }
+  else
+    {
+      /* EQ and NE conditionals have an AND before them.  */
+      for (and_insn = prev_real_insn (insn); and_insn;
+	   and_insn = prev_real_insn (and_insn))
+	{
+	  if (recog_memoized (and_insn) == CODE_FOR_andhi3
+	      && rtx_equal_p (SET_DEST (PATTERN (and_insn)), reg)
+	      && rtx_equal_p (XEXP (SET_SRC (PATTERN (and_insn)), 0), reg))
+	    break;
+
+	  if (reg_mentioned_p (reg, and_insn))
+	    return;
+
+	  if (GET_CODE (and_insn) != NOTE
+	      && GET_CODE (and_insn) != INSN)
+	    return;
+	}
+
+      if (and_insn)
+	{
+	  /* Some mis-optimizations by GCC can generate a RIGHT-SHIFT
+	     followed by an AND like this:
+
+               (parallel [(set (reg:HI r7) (lshiftrt:HI (reg:HI r7) (const_int 3)))
+                          (clobber (reg:BI carry))]
+
+               (set (reg:HI r7) (and:HI (reg:HI r7) (const_int 1)))
+
+	     Attempt to detect this here.  */
+	  for (shift = prev_real_insn (and_insn); shift;
+	       shift = prev_real_insn (shift))
+	    {
+	      if (recog_memoized (shift) == CODE_FOR_lshrhi3
+		  && rtx_equal_p (SET_DEST (XVECEXP (PATTERN (shift), 0, 0)), reg)
+		  && rtx_equal_p (XEXP (SET_SRC (XVECEXP (PATTERN (shift), 0, 0)), 0), reg))
+		break;
+
+	      if (reg_mentioned_p (reg, shift)
+		  || (GET_CODE (shift) != NOTE
+		      && GET_CODE (shift) != INSN))
+		{
+		  shift = NULL_RTX;
+		  break;
+		}
+	    }
+	}
+    }
+  if (!and_insn)
+    return;
+
+  for (load = shift ? prev_real_insn (shift) : prev_real_insn (and_insn);
+       load;
+       load = prev_real_insn (load))
+    {
+      int load_code = recog_memoized (load);
+
+      if (load_code == CODE_FOR_movhi_internal
+	  && rtx_equal_p (SET_DEST (PATTERN (load)), reg)
+	  && xstormy16_below100_operand (SET_SRC (PATTERN (load)), HImode)
+	  && ! MEM_VOLATILE_P (SET_SRC (PATTERN (load))))
+	{
+	  load_mode = HImode;
+	  break;
+	}
+
+      if (load_code == CODE_FOR_movqi_internal
+	  && rtx_equal_p (SET_DEST (PATTERN (load)), qireg)
+	  && xstormy16_below100_operand (SET_SRC (PATTERN (load)), QImode))
+	{
+	  load_mode = QImode;
+	  break;
+	}
+
+      if (load_code == CODE_FOR_zero_extendqihi2
+	  && rtx_equal_p (SET_DEST (PATTERN (load)), reg)
+	  && xstormy16_below100_operand (XEXP (SET_SRC (PATTERN (load)), 0), QImode))
+	{
+	  load_mode = QImode;
+	  and_mode = HImode;
+	  break;
+	}
+
+      if (reg_mentioned_p (reg, load))
+	return;
+
+      if (GET_CODE (load) != NOTE
+	  && GET_CODE (load) != INSN)
+	return;
+    }
+  if (!load)
+    return;
+
+  mem = SET_SRC (PATTERN (load));
+
+  if (need_extend)
+    {
+      mask = (load_mode == HImode) ? 0x8000 : 0x80;
+
+      /* If the mem includes a zero-extend operation and we are
+	 going to generate a sign-extend operation then move the
+	 mem inside the zero-extend.  */
+      if (GET_CODE (mem) == ZERO_EXTEND)
+	mem = XEXP (mem, 0);
+    }
+  else
+    {
+      if (!xstormy16_onebit_set_operand (XEXP (SET_SRC (PATTERN (and_insn)), 1),
+					 load_mode))
+	return;
+
+      mask = (int) INTVAL (XEXP (SET_SRC (PATTERN (and_insn)), 1));
+
+      if (shift)
+	mask <<= INTVAL (XEXP (SET_SRC (XVECEXP (PATTERN (shift), 0, 0)), 1));
+    }
+
+  if (load_mode == HImode)
+    {
+      rtx addr = XEXP (mem, 0);
+
+      if (! (mask & 0xff))
+	{
+	  addr = plus_constant (addr, 1);
+	  mask >>= 8;
+	}
+      mem = gen_rtx_MEM (QImode, addr);
+    }
+
+  if (need_extend)
+    XEXP (cond, 0) = gen_rtx_SIGN_EXTEND (HImode, mem);
+  else
+    XEXP (cond, 0) = gen_rtx_AND (and_mode, mem, GEN_INT (mask));
+
+  INSN_CODE (insn) = -1;
+  delete_insn (load);
+
+  if (and_insn != insn)
+    delete_insn (and_insn);
+
+  if (shift != NULL_RTX)
+    delete_insn (shift);
+}
+
+static void
+xstormy16_reorg (void)
+{
+  rtx insn;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (! JUMP_P (insn))
+	continue;
+      combine_bnp (insn);
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+xstormy16_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > UNITS_PER_WORD * NUM_ARGUMENT_REGISTERS);
+}
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options xstorym16_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+#undef  TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO xstormy16_encode_section_info
+
+/* Select_section doesn't handle .bss_below100.  */
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS false
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK xstormy16_asm_output_mi_thunk
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND xstormy16_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS xstormy16_print_operand_address
+
+#undef  TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST xstormy16_memory_move_cost
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS xstormy16_rtx_costs
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST xstormy16_address_cost
+
+#undef  TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST xstormy16_build_builtin_va_list
+#undef  TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START xstormy16_expand_builtin_va_start
+#undef  TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR xstormy16_gimplify_va_arg_expr
+
+#undef  TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG xstormy16_function_arg
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE xstormy16_function_arg_advance
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY xstormy16_return_in_memory
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE xstormy16_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE xstormy16_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P xstormy16_function_value_regno_p
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG xstormy16_reorg
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xstormy16_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xstormy16_preferred_reload_class
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	xstormy16_legitimate_address_p
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P xstormy16_mode_dependent_address_p
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE xstormy16_can_eliminate
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT xstormy16_trampoline_init
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE xstorym16_option_optimization_table
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-stormy16.h"
diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h
new file mode 100644
index 000000000..a838b8acf
--- /dev/null
+++ b/gcc/config/stormy16/stormy16.h
@@ -0,0 +1,520 @@
+/* Xstormy16 cpu description.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2007,
+   2008, 2009, 2010, 2011  Free Software Foundation, Inc.
+   Contributed by Red Hat, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* Driver configuration.  */
+
+#undef  ASM_SPEC
+#define ASM_SPEC ""
+
+#undef  LINK_SPEC
+#define LINK_SPEC "%{h*} %{v:-V} \
+		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
+
+/* For xstormy16:
+   - If -msim is specified, everything is built and linked as for the sim.
+   - If -T is specified, that linker script is used, and it should provide
+     appropriate libraries.
+   - If neither is specified, everything is built as for the sim, but no
+     I/O support is assumed.  */
+#undef  LIB_SPEC
+#define LIB_SPEC "-( -lc %{msim:-lsim}%{!msim:%{!T*:-lnosys}} -)"
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend.o%s crtn.o%s"
+
+
+/* Run-time target specifications.  */
+
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("xstormy16");		\
+      builtin_assert ("machine=xstormy16");	\
+      builtin_assert ("cpu=xstormy16");		\
+    }						\
+  while (0)
+
+#define TARGET_VERSION fprintf (stderr, " (xstormy16 cpu core)");
+
+/* Storage Layout.  */
+
+#define BITS_BIG_ENDIAN 1
+
+#define BYTES_BIG_ENDIAN 0
+
+#define WORDS_BIG_ENDIAN 0
+
+#define UNITS_PER_WORD 2
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)	\
+  do						\
+    {						\
+      if (GET_MODE_CLASS (MODE) == MODE_INT	\
+	  && GET_MODE_SIZE (MODE) < 2)		\
+	(MODE) = HImode;			\
+    }						\
+  while (0)
+
+#define PARM_BOUNDARY 16
+
+#define STACK_BOUNDARY 16
+
+#define FUNCTION_BOUNDARY 16
+
+#define BIGGEST_ALIGNMENT 16
+
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
+  (TREE_CODE (EXP) == STRING_CST	\
+   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+
+#define STRICT_ALIGNMENT 1
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Layout of Source Language Data Types.  */
+
+#define INT_TYPE_SIZE 16
+
+#define SHORT_TYPE_SIZE 16
+
+#define LONG_TYPE_SIZE 32
+
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+
+#define DOUBLE_TYPE_SIZE 64
+
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#define DEFAULT_SIGNED_CHAR 0
+
+#define SIZE_TYPE "unsigned int"
+
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* Register Basics.  */
+
+#define FIRST_PSEUDO_REGISTER 19
+
+#define FIXED_REGISTERS \
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1 }
+
+#define CALL_USED_REGISTERS \
+  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1 }
+
+
+/* Order of allocation of registers.  */
+
+#define REG_ALLOC_ORDER { 7, 6, 5, 4, 3, 2, 1, 0, 9, 8, 10, 11, 12, 13, 14, 15, 16 }
+
+
+/* How Values Fit in Registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) 				\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) ((REGNO) != 16 || (MODE) == BImode)
+
+/* A C expression that is nonzero if it is desirable to choose register
+   allocation so as to avoid move instructions between a value of mode MODE1
+   and a value of mode MODE2.
+
+   If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are
+   ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be
+   zero.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) ((MODE1) != BImode && (MODE2) != BImode)
+
+
+/* Register Classes.  */
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  R1_REGS,
+  TWO_REGS,
+  R2_REGS,
+  EIGHT_REGS,
+  R8_REGS,
+  ICALL_REGS,
+  GENERAL_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define IRA_COVER_CLASSES			\
+{						\
+  GENERAL_REGS, LIM_REG_CLASSES			\
+}
+
+#define REG_CLASS_NAMES				\
+{						\
+  "NO_REGS", 					\
+  "R0_REGS", 					\
+  "R1_REGS",					\
+  "TWO_REGS",					\
+  "R2_REGS",					\
+  "EIGHT_REGS",					\
+  "R8_REGS",					\
+  "ICALL_REGS",					\
+  "GENERAL_REGS",				\
+  "ALL_REGS"					\
+}
+
+#define REG_CLASS_CONTENTS			\
+{						\
+  { 0x00000 },					\
+  { 0x00001 },					\
+  { 0x00002 },					\
+  { 0x00003 },					\
+  { 0x00004 },					\
+  { 0x000FF },					\
+  { 0x00100 },					\
+  { 0x00300 },					\
+  { 0x6FFFF },					\
+  { (1 << FIRST_PSEUDO_REGISTER) - 1 }		\
+}
+
+#define REGNO_REG_CLASS(REGNO) 			\
+  (  (REGNO) ==  0 ? R0_REGS			\
+   : (REGNO) ==  1 ? R1_REGS			\
+   : (REGNO) ==  2 ? R2_REGS			\
+   : (REGNO) <   8 ? EIGHT_REGS			\
+   : (REGNO) ==  8 ? R8_REGS			\
+   : (REGNO) <= 18 ? GENERAL_REGS		\
+   : ALL_REGS)
+
+#define BASE_REG_CLASS GENERAL_REGS
+
+#define INDEX_REG_CLASS GENERAL_REGS
+
+#define REGNO_OK_FOR_BASE_P(NUM) 1
+
+#define REGNO_OK_FOR_INDEX_P(NUM) REGNO_OK_FOR_BASE_P (NUM)
+
+/* This chip has the interesting property that only the first eight
+   registers can be moved to/from memory.  */
+#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X)			\
+  xstormy16_secondary_reload_class (CLASS, MODE, X)
+
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+
+/* Basic Stack Layout.  */
+
+/* We want to use post-increment instructions to push things on the stack,
+   because we don't have any pre-increment ones.  */
+#define STACK_PUSH_CODE POST_INC
+
+#define FRAME_GROWS_DOWNWARD 0
+
+#define ARGS_GROW_DOWNWARD 1
+
+#define STARTING_FRAME_OFFSET 0
+
+#define FIRST_PARM_OFFSET(FUNDECL) 0
+
+#define RETURN_ADDR_RTX(COUNT, FRAMEADDR)	\
+  ((COUNT) == 0					\
+   ? gen_rtx_MEM (Pmode, arg_pointer_rtx)	\
+   : NULL_RTX)
+
+#define INCOMING_RETURN_ADDR_RTX  \
+   gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-4)))
+
+#define INCOMING_FRAME_SP_OFFSET (xstormy16_interrupt_function_p () ? -6 : -4)
+
+
+/* Register That Address the Stack Frame.  */
+
+#define STATIC_CHAIN_REGNUM	 1
+#define HARD_FRAME_POINTER_REGNUM 13
+#define STACK_POINTER_REGNUM	15
+#define CARRY_REGNUM		16
+#define FRAME_POINTER_REGNUM	17
+#define ARG_POINTER_REGNUM	18
+
+
+/* Eliminating the Frame Pointer and the Arg Pointer.  */
+
+#define ELIMINABLE_REGS					\
+{							\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+  {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},		\
+  {ARG_POINTER_REGNUM,	 HARD_FRAME_POINTER_REGNUM},	\
+}
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = xstormy16_initial_elimination_offset (FROM, TO)
+
+
+/* Passing Function Arguments on the Stack.  */
+
+#define PUSH_ROUNDING(BYTES) (((BYTES) + 1) & ~1)
+
+
+/* Function Arguments in Registers.  */
+
+#define NUM_ARGUMENT_REGISTERS  6
+#define FIRST_ARGUMENT_REGISTER 2
+
+#define XSTORMY16_WORD_SIZE(TYPE, MODE)				\
+  ((((TYPE) ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE))	\
+    + 1) 							\
+   / 2)
+
+/* For this platform, the value of CUMULATIVE_ARGS is the number of words
+   of arguments that have been passed in registers so far.  */
+#define CUMULATIVE_ARGS int
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  (CUM) = 0
+
+#define FUNCTION_ARG_REGNO_P(REGNO)					\
+  ((REGNO) >= FIRST_ARGUMENT_REGISTER 					\
+   && (REGNO) < FIRST_ARGUMENT_REGISTER + NUM_ARGUMENT_REGISTERS)
+
+
+/* How Scalar Function Values are Returned.  */
+
+/* The number of the hard register that is used to return a scalar value from a
+   function call.  */
+#define RETURN_VALUE_REGNUM	FIRST_ARGUMENT_REGISTER
+
+
+/* Function Entry and Exit.  */
+
+#define EPILOGUE_USES(REGNO) \
+  xstormy16_epilogue_uses (REGNO)
+
+
+/* Generating Code for Profiling.  */
+
+/* This declaration must be present, but it can be an abort if profiling is
+   not implemented.  */
+     
+#define FUNCTION_PROFILER(FILE, LABELNO) xstormy16_function_profiler ()
+
+
+/* Trampolines for Nested Functions.  */
+
+#define TRAMPOLINE_SIZE 8
+#define TRAMPOLINE_ALIGNMENT 16
+
+
+/* Addressing Modes.  */
+
+#define HAVE_POST_INCREMENT 1
+
+#define HAVE_PRE_DECREMENT 1
+
+#define MAX_REGS_PER_ADDRESS 1
+
+#define LEGITIMATE_CONSTANT_P(X) 1
+
+
+/* Describing Relative Costs of Operations.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 5
+
+#define SLOW_BYTE_ACCESS 0
+
+#define NO_FUNCTION_CSE
+
+
+/* Dividing the output into sections.  */
+
+#define TEXT_SECTION_ASM_OP ".text"
+
+#define DATA_SECTION_ASM_OP ".data"
+
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+   There are no shared libraries on this target so these sections need
+   not be writable.
+
+   Defined in elfos.h.  */
+
+#undef CTORS_SECTION_ASM_OP
+#undef DTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"a\""
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"a\""
+
+#define TARGET_ASM_INIT_SECTIONS xstormy16_asm_init_sections
+
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* The Overall Framework of an Assembler File.  */
+
+#define ASM_COMMENT_START ";"
+
+#define ASM_APP_ON "#APP\n"
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Output of Data.  */
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '|')
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+  xstormy16_asm_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 1)
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+  xstormy16_asm_output_aligned_common (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+
+/* Output and Generation of Labels.  */
+#define SYMBOL_FLAG_XSTORMY16_BELOW100	(SYMBOL_FLAG_MACH_DEP << 0)
+
+#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYMBOL)	\
+  do						\
+    {						\
+      const char *rn = XSTR (SYMBOL, 0);	\
+						\
+      if (SYMBOL_REF_FUNCTION_P (SYMBOL))	\
+	ASM_OUTPUT_LABEL_REF ((STREAM), rn);	\
+      else					\
+	assemble_name (STREAM, rn);		\
+    }						\
+  while (0)
+
+#define ASM_OUTPUT_LABEL_REF(STREAM, NAME)	\
+  do						\
+    {						\
+      fputs ("@fptr(", STREAM);			\
+      assemble_name (STREAM, NAME);		\
+      fputc (')', STREAM);			\
+    }						\
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+
+/* Output of Assembler Instructions.  */
+
+#define REGISTER_NAMES							\
+{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",	\
+  "r11", "r12", "r13", "psw", "sp", "carry", "fp", "ap" }
+
+#define ADDITIONAL_REGISTER_NAMES		\
+  { { "r14", 14 },				\
+    { "r15", 15 } }
+
+#define REGISTER_PREFIX ""
+#define LOCAL_LABEL_PREFIX "."
+#define USER_LABEL_PREFIX ""
+#define IMMEDIATE_PREFIX "#"
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \
+  fprintf (STREAM, "\tpush %d\n", REGNO)
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \
+  fprintf (STREAM, "\tpop %d\n", REGNO)
+
+
+/* Output of dispatch tables.  */
+
+/* This port does not use the ASM_OUTPUT_ADDR_VEC_ELT macro, because
+   this could cause label alignment to appear between the 'br' and the table,
+   which would be bad.  Instead, it controls the output of the table
+   itself.  */
+#define ASM_OUTPUT_ADDR_VEC(LABEL, BODY) \
+  xstormy16_output_addr_vec (file, LABEL, BODY)
+
+/* Alignment for ADDR_VECs is the same as for code.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 1
+
+
+/* Assembler Commands for Exception Regions.  */
+
+#define DWARF2_UNWIND_INFO 		0
+#define DWARF_CIE_DATA_ALIGNMENT	1
+
+#undef  DONT_USE_BUILTIN_SETJMP
+#define JMP_BUF_SIZE  8
+
+/* Assembler Commands for Alignment.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER) \
+  fprintf ((STREAM), "\t.p2align %d\n", (POWER))
+
+
+/* Macros Affecting all Debug Formats.  */
+
+#undef  PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+
+/* Macros for SDB and Dwarf Output.  */
+
+/* Define this macro if addresses in Dwarf 2 debugging info should not
+   be the same size as pointers on the target architecture.  The
+   macro's value should be the size, in bytes, to use for addresses in
+   the debugging info.
+
+   Some architectures use word addresses to refer to code locations,
+   but Dwarf 2 info always uses byte addresses.  On such machines,
+   Dwarf 2 addresses need to be larger than the architecture's
+   pointers.  */
+#define DWARF2_ADDR_SIZE 4
+
+
+/* Miscellaneous Parameters.  */
+
+#define CASE_VECTOR_MODE SImode
+
+#define WORD_REGISTER_OPERATIONS
+
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+#define MOVE_MAX 2
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define Pmode HImode
+
+#define FUNCTION_MODE HImode
+
+#define NO_IMPLICIT_EXTERN_C
diff --git a/gcc/config/stormy16/stormy16.md b/gcc/config/stormy16/stormy16.md
new file mode 100644
index 000000000..3eb74b72e
--- /dev/null
+++ b/gcc/config/stormy16/stormy16.md
@@ -0,0 +1,1251 @@
+;; XSTORMY16 Machine description template
+;; Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
+;; 2010 Free Software Foundation, Inc.
+;; Contributed by Red Hat, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Constraints
+;; a  $0
+;; b  $1
+;; c  $2
+;; d  $8
+;; e  $0..$7
+;; t  $0..$1
+;; z  $8..$9
+;; I  0..3
+;; J  2**N mask
+;; K  2**N antimask
+;; L  0..255
+;; M  -255..0
+;; N  -3..0
+;; O  1..4
+;; P  -4..-1
+;; Q  post-inc mem (push)
+;; R  pre-dec mem (pop)
+;; S  immediate mem
+;; T  Rx
+;; U  -inf..1 or 16..inf
+;; Z  0
+
+(define_constants
+  [
+    (CARRY_REG 16)
+  ]
+)
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Attributes
+;; ::
+;; ::::::::::::::::::::
+
+; Categorize branches for the conditional in the length attribute.
+(define_attr "branch_class" "notdirectbranch,br12,bcc12,bcc8p2,bcc8p4"
+    (const_string "notdirectbranch"))
+
+; The length of an instruction, used for branch shortening.
+(define_attr "length" ""
+  (cond
+   [(eq_attr "branch_class" "br12")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -2046))
+			(lt (minus (match_dup 0) (pc)) (const_int 2048)))
+		   (const_int 2)
+		   (const_int 4))
+    (eq_attr "branch_class" "bcc12")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -2044))
+			(lt (minus (match_dup 0) (pc)) (const_int 2048)))
+		   (const_int 4)
+		   (const_int 8))
+    (eq_attr "branch_class" "bcc8p2")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -124))
+			(lt (minus (match_dup 0) (pc)) (const_int 128)))
+		   (const_int 4)
+		   (const_int 8))
+    (eq_attr "branch_class" "bcc8p4")
+     (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -122))
+			(lt (minus (match_dup 0) (pc)) (const_int 128)))
+		   (const_int 6)
+		   (const_int 10))]
+   (const_int 2)))
+
+; The operand which determines the setting of Rpsw.
+; The numbers indicate the operand number,
+; 'clobber' indicates it is changed in some unspecified way
+; 'nop' means it is not changed.
+(define_attr "psw_operand" "clobber,nop,0,1,2,3,4" (const_string "0"))
+
+(define_asm_attributes [(set_attr "length" "4")
+			(set_attr "psw_operand" "clobber")])
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Moves
+;; ::
+;; ::::::::::::::::::::
+;; push/pop qi and hi are here as separate insns rather than part of
+;; the movqi/hi patterns because we need to ensure that reload isn't
+;; passed anything it can't cope with.  Without these patterns, we
+;; might end up with
+
+;; (set (mem (post_inc (sp))) mem (post_inc (reg)))
+
+;; If, in this example, reg needs reloading, reload will read reg from
+;; the stack , adjust sp, and store reg back at what is now the wrong
+;; offset.  By using separate patterns for push and pop we ensure that
+;; insns like this one are never generated.
+
+(define_insn "pushqi1"
+  [(set (mem:QI (post_inc (reg:HI 15)))
+	(match_operand:QI 0 "register_operand" "r"))]
+  ""
+  "push %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_insn "popqi1"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(mem:QI (pre_dec (reg:HI 15))))]
+  ""
+  "pop %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmediate_nonstack_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  { xstormy16_expand_move (QImode, operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "nonimmediate_nonstack_operand" "=r,m,e,e,T,r,S,W,e")
+	(match_operand:QI 1 "general_operand"       "r,e,m,i,i,i,i,ie,W"))]
+  ""
+  "@
+   mov %0,%1
+   mov.b %0,%1
+   mov.b %0,%1
+   mov %0,%1
+   mov Rx,%1
+   mov %0,%1
+   mov.b %0,%1
+   mov.b %0,%1
+   mov.b %0,%1"
+  [(set_attr_alternative "length"
+	     [(const_int 2)
+	      (if_then_else (match_operand:QI 0 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (const_int 2)
+	      (const_int 2)
+	      (const_int 4)
+	      (const_int 4)
+	      (const_int 2)
+	      (const_int 2)])
+   (set_attr "psw_operand" "0,0,0,0,nop,0,nop,0,0")])
+
+(define_insn "pushhi1"
+  [(set (mem:HI (post_inc (reg:HI 15)))
+	(match_operand:HI 0 "register_operand" "r"))]
+  ""
+  "push %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_insn "pophi1"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(mem:HI (pre_dec (reg:HI 15))))]
+  ""
+  "pop %0"
+  [(set_attr "psw_operand" "nop")
+   (set_attr "length" "2")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmediate_nonstack_operand" "")
+	(match_operand:HI 1 "xs_hi_general_operand" ""))]
+  ""
+  { xstormy16_expand_move (HImode, operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "nonimmediate_nonstack_operand" "=r,m,e,e,T,r,S,W,e")
+	(match_operand:HI 1 "xs_hi_general_operand"          "r,e,m,L,L,i,i,ie,W"))]
+  ""
+  "@
+   mov %0,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w Rx,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w %0,%1
+   mov.w %0,%1"
+  [(set_attr_alternative "length"
+	     [(const_int 2)
+	      (if_then_else (match_operand:QI 0 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 2)
+			    (const_int 4))
+	      (const_int 2)
+	      (const_int 2)
+	      (const_int 4)
+	      (const_int 4)
+	      (const_int 4)
+	      (const_int 4)])
+   (set_attr "psw_operand" "0,0,0,0,nop,0,nop,0,0")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  { xstormy16_expand_move (SImode, operands[0], operands[1]);
+    DONE;
+  })
+
+(define_insn_and_split "*movsi_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Q,r,m,e,&e,e,r,S")
+	(match_operand:SI 1 "general_operand"       "r,r,R,e,o, V,L,i,i"))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_split_move (SImode, operands[0], operands[1]);
+    DONE;
+  }
+  [(set_attr_alternative "length"
+	     [(const_int 4)
+	      (const_int 4)
+	      (const_int 4)
+	      (if_then_else (match_operand:QI 0 "short_memory_operand" "")
+			    (const_int 6)
+			    (const_int 8))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 6)
+			    (const_int 8))
+	      (if_then_else (match_operand:QI 1 "short_memory_operand" "")
+			    (const_int 6)
+			    (const_int 8))
+	      (const_int 4)
+	      (const_int 8)
+	      (const_int 8)])])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Conversions
+;; ::
+;; ::::::::::::::::::::
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "0")))]
+  ""
+  "cbw %0")
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI                 0 "register_operand" 	   "=e,r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m,0")))]
+  ""
+  "@
+   mov.b %0, %1
+   shl %0,#8\n\tshr %0,#8"
+  [(set_attr "psw_operand" "nop,0")
+   (set_attr_alternative "length"
+	     [(const_int 4)
+	      (const_int 8)])])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Bit field extraction
+;; ::
+;; ::::::::::::::::::::
+
+;; Extract an unsigned bit field
+;(define_insn "extzv"
+;  [(set (match_operand:SI 0 "register_operand" "=r")
+;	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+;			 (match_operand:SI 2 "const_int_operand" "n")
+;			 (match_operand:SI 3 "const_int_operand" "n")))]
+;  ""
+;  "extzv %0,%1,%2,%3"
+;  [(set_attr "length" "4")])
+
+;; Insert a bit field
+;(define_insn "insv"
+;  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+;			 (match_operand:SI 1 "const_int_operand" "n")
+;			 (match_operand:SI 2 "const_int_operand" "n"))
+;	(match_operand:SI 3 "nonmemory_operand" "ri"))]
+;  ""
+;  "insv %0,%1,%2,%3"
+;  [(set_attr "length" "4")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+; Note - the early clobber modifier is no longer needed on operand 3
+; and in fact can cause some reload spill failures if it is present.
+; Note that the 'Z' constraint matches "add $reg,0", which reload
+; will occasionally emit.  We avoid the "add $reg,imm" match because
+; it clobbers the carry.
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,r,T,T,r,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0,0,0")
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "O,P,Z,L,M,Ir,N,i")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "@
+   inc %0,%o2
+   dec %0,%O2
+   ;
+   add Rx,%2
+   sub Rx,#%n2
+   add %0,%2
+   sub %0,#%n2
+   add %0,%2"
+  [(set_attr "length" "2,2,0,2,2,2,2,4")])
+
+(define_insn "addchi4"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (plus:SI (zero_extend:SI (match_dup 1))
+					   (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   add Rx,%2
+   add %0,%2
+   add %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "addchi5"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(plus:HI (plus:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+			  (zero_extend:HI (reg:BI CARRY_REG)))
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (plus:SI (plus:SI
+					    (zero_extend:SI (match_dup 1))
+					    (zero_extend:SI (reg:BI CARRY_REG)))
+					   (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   adc Rx,%2
+   adc %0,%2
+   adc %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+;; Subtraction
+; Operand 3 is marked earlyclobber because that helps reload
+; to generate better code---this pattern will never need the
+; carry register as an input, and some output reloads or input
+; reloads might need to use it.  In fact, without the '&' reload
+; will fail in some cases.
+(define_insn "subhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r,r,T,T,r,r,r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0,0,0,0,0,0")
+		  (match_operand:HI 2 "xs_hi_nonmemory_operand" "O,P,L,M,rI,M,i")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "@
+   dec %0,%o2
+   inc %0,%O2
+   sub Rx,%2
+   add Rx,#%n2
+   sub %0,%2
+   add %0,#%n2
+   sub %0,%2"
+  [(set_attr "length" "2,2,2,2,2,2,4")])
+
+(define_insn "subchi4"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(minus:HI (match_operand:HI 1 "register_operand" "0,0,0")
+		  (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (minus:SI (zero_extend:SI (match_dup 1))
+					    (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   sub Rx,%2
+   sub %0,%2
+   sub %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+(define_insn "subchi5"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(minus:HI (minus:HI (match_operand:HI 1 "register_operand" "0,0,0")
+			  (zero_extend:HI (reg:BI CARRY_REG)))
+		 (match_operand:HI 2 "xs_hi_nonmemory_operand" "L,Ir,i")))
+   (set (reg:BI CARRY_REG)
+        (truncate:BI (lshiftrt:SI (minus:SI (minus:SI
+					     (zero_extend:SI (match_dup 1))
+					     (zero_extend:SI (reg:BI CARRY_REG)))
+					    (zero_extend:SI (match_dup 2)))
+				  (const_int 16))))]
+  ""
+  "@
+   sbc Rx,%2
+   sbc %0,%2
+   sbc %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+; Basic multiplication
+(define_insn "mulhi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(mult:HI (match_operand:HI 1 "register_operand" "%a")
+		 (match_operand:HI 2 "register_operand" "c")))
+   (clobber (match_scratch:HI 3 "=b"))
+   ]
+  ""
+  "mul"
+  [(set_attr "psw_operand" "nop")])
+
+;; Unsigned multiplication producing 64-bit results from 32-bit inputs
+; The constraint on operand 0 is 't' because it is actually two regs
+; long, and both regs must match the constraint.
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=t")
+	(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%a"))
+		 (zero_extend:SI (match_operand:HI 2 "register_operand" "c"))))
+   ]
+  ""
+  "mul"
+  [(set_attr "psw_operand" "nop")])
+
+;; Unsigned division giving both quotient and remainder
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(udiv:HI (match_operand:HI 1 "register_operand" "a")
+		 (match_operand:HI 2 "register_operand" "c")))
+   (set (match_operand:HI 3 "register_operand" "=b")
+	(umod:HI (match_dup 1)
+		 (match_dup 2)))]
+  ""
+  "div"
+  [(set_attr "psw_operand" "nop")])
+
+;; Signed division giving both quotient and remainder
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(div:HI (match_operand:HI 1 "register_operand" "a")
+		 (match_operand:HI 2 "register_operand" "c")))
+   (set (match_operand:HI 3 "register_operand" "=b")
+	(mod:HI (match_dup 1)
+		 (match_dup 2)))]
+  ""
+  "sdiv"
+  [(set_attr "psw_operand" "nop")])
+
+;; Signed 32/16 division
+(define_insn "sdivlh"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(div:HI (match_operand:SI 2 "register_operand" "t")
+		 (match_operand:HI 3 "register_operand" "c")))
+   (set (match_operand:HI 1 "register_operand" "=b")
+	(mod:HI (match_dup 2)
+		 (match_dup 3)))]
+  ""
+  "sdivlh"
+  [(set_attr "psw_operand" "nop")])
+
+;; Unsigned 32/16 division
+(define_insn "udivlh"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(udiv:HI (match_operand:SI 2 "register_operand" "t")
+		 (match_operand:HI 3 "register_operand" "c")))
+   (set (match_operand:HI 1 "register_operand" "=b")
+	(umod:HI (match_dup 2)
+		 (match_dup 3)))]
+  ""
+  "divlh"
+  [(set_attr "psw_operand" "nop")])
+
+;; Negation
+
+(define_expand "neghi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(not:HI (match_operand:HI 1 "register_operand" "")))
+   (parallel [(set (match_dup 0) (plus:HI (match_dup 0) (const_int 1)))
+	      (clobber (reg:BI CARRY_REG))])]
+  ""
+  "")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+(define_insn "ashlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0")
+		   (match_operand:HI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "shl %0,%2")
+
+;; Arithmetic Shift Right
+(define_insn "ashrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "asr %0,%2")
+
+;; Logical Shift Right
+(define_insn "lshrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0")
+		     (match_operand:HI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "shr %0,%2")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 16-Bit Integer Logical operations
+;; ::
+;; ::::::::::::::::::::
+
+;; Logical AND, 16-bit integers
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "xstormy16_splittable_below100_or_register" "=T,r,r,r,W")
+	(and:HI (match_operand:HI 1 "xstormy16_below100_or_register" "%0,0,0,0,0")
+		(match_operand:HI 2 "nonmemory_operand" "L,r,K,i,K")))]
+  ""
+  "@
+   and Rx,%2
+   and %0,%2
+   clr1 %0,%B2
+   and %0,%2
+   #"
+  [(set_attr "length" "2,2,2,4,2")])
+
+(define_split
+  [(set (match_operand:HI 0 "xstormy16_below100_operand" "")
+	(and:HI (match_operand:HI 1 "xstormy16_below100_operand" "")
+		(match_operand:HI 2 "xstormy16_onebit_clr_operand" "")))]
+  ""
+  [(set (match_dup 3)
+	(and:QI (match_dup 4)
+		(match_dup 5)))]
+  { int s = ((INTVAL (operands[2]) & 0xff) == 0xff) ? 1 : 0;
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, s);
+    operands[4] = simplify_gen_subreg (QImode, operands[1], HImode, s);
+    operands[5] = simplify_gen_subreg (QImode, operands[2], HImode, s);
+    operands[5] = GEN_INT (INTVAL (operands[5]) | ~ (HOST_WIDE_INT) 0xff);
+  })
+
+;; Inclusive OR, 16-bit integers
+(define_insn "iorhi3"
+  [(set (match_operand:HI 0 "xstormy16_splittable_below100_or_register" "=T,r,r,r,W")
+	(ior:HI (match_operand:HI 1 "xstormy16_below100_or_register" "%0,0,0,0,0")
+		(match_operand:HI 2 "nonmemory_operand" "L,r,J,i,J")))]
+  ""
+  "@
+   or Rx,%2
+   or %0,%2
+   set1 %0,%B2
+   or %0,%2
+   #"
+  [(set_attr "length" "2,2,2,4,2")])
+
+(define_split
+  [(set (match_operand:HI 0 "xstormy16_below100_operand" "")
+	(ior:HI (match_operand:HI 1 "xstormy16_below100_operand" "")
+		(match_operand:HI 2 "xstormy16_onebit_set_operand" "")))]
+  ""
+  [(set (match_dup 3)
+	(ior:QI (match_dup 4)
+		(match_dup 5)))]
+  { int s = ((INTVAL (operands[2]) & 0xff) == 0x00) ? 1 : 0;
+    operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, s);
+    operands[4] = simplify_gen_subreg (QImode, operands[1], HImode, s);
+    operands[5] = simplify_gen_subreg (QImode, operands[2], HImode, s);
+    operands[5] = GEN_INT (INTVAL (operands[5]) & 0xff);
+  })
+
+;; Exclusive OR, 16-bit integers
+(define_insn "xorhi3"
+  [(set (match_operand:HI 0 "register_operand" "=T,r,r")
+	(xor:HI (match_operand:HI 1 "register_operand" "%0,0,0")
+		(match_operand:HI 2 "nonmemory_operand" "L,r,i")))]
+  ""
+  "@
+   xor Rx,%2
+   xor %0,%2
+   xor %0,%2"
+  [(set_attr "length" "2,2,4")])
+
+;; One's complement, 16-bit integers
+(define_insn "one_cmplhi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(not:HI (match_operand:HI 1 "register_operand" "0")))]
+  ""
+  "not %0")
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer arithmetic
+;; ::
+;; ::::::::::::::::::::
+
+;; Addition
+(define_insn_and_split "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_expand_arith (SImode, PLUS, operands[0], operands[1],
+			    operands[2]);
+    DONE;
+  }
+  [(set_attr "length" "4")])
+
+;; Subtraction
+(define_insn_and_split "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "nonmemory_operand" "ri")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_expand_arith (SImode, MINUS, operands[0], operands[1],
+			    operands[2]);
+    DONE;
+  }
+  [(set_attr "length" "4")])
+
+(define_expand "negsi2"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (neg:SI (match_operand:SI 1 "register_operand" "")))
+	      (clobber (reg:BI CARRY_REG))])]
+  ""
+  { operands[2] = gen_reg_rtx (HImode); })
+
+(define_insn_and_split "*negsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(pc)]
+  { xstormy16_expand_arith (SImode, NEG, operands[0], operands[0],
+			    operands[1]);
+    DONE;
+  })
+
+;; ::::::::::::::::::::
+;; ::
+;; :: 32-bit Integer Shifts and Rotates
+;; ::
+;; ::::::::::::::::::::
+
+;; Arithmetic Shift Left
+(define_expand "ashlsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashift:SI (match_operand:SI 1 "register_operand" "")
+			      (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:BI CARRY_REG))
+	      (clobber (match_dup 3))])]
+  ""
+  { if (! const_int_operand (operands[2], SImode))
+      FAIL;
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+;; Arithmetic Shift Right
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashiftrt:SI (match_operand:SI 1 "register_operand" "")
+			        (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:BI CARRY_REG))
+	      (clobber (match_dup 3))])]
+  ""
+  { if (! const_int_operand (operands[2], SImode))
+      FAIL;
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+;; Logical Shift Right
+(define_expand "lshrsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+			        (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:BI CARRY_REG))
+	      (clobber (match_dup 3))])]
+  ""
+  { if (! const_int_operand (operands[2], SImode))
+      FAIL;
+    operands[3] = gen_reg_rtx (HImode);
+  })
+
+(define_insn "*shiftsi"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(match_operator:SI 4 "shift_operator"
+	 [(match_operand:SI 1 "register_operand" "0,0")
+	  (match_operand:SI 2 "const_int_operand" "U,n")]))
+   (clobber (reg:BI CARRY_REG))
+   (clobber (match_operand:HI 3 "" "=X,r"))]
+  ""
+  "* return xstormy16_output_shift (SImode, GET_CODE (operands[4]),
+				    operands[0], operands[2], operands[3]);"
+  [(set_attr "length" "6,10")
+   (set_attr "psw_operand" "clobber,clobber")])
+
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Branches
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "cbranchhi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+				      [(match_operand:HI 1 "register_operand" "")
+				       (match_operand:HI 2 "nonmemory_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  {
+  xstormy16_emit_cbranch (GET_CODE (operands[0]), operands[1], operands[2],
+			  operands[3]);
+  DONE;
+})
+
+(define_insn "cbranchhi"
+  [(set (pc)
+	(if_then_else (match_operator:HI 1 "comparison_operator"
+				      [(match_operand:HI 2 "nonmemory_operand"
+					"r,e,L")
+				       (match_operand:HI 3 "nonmemory_operand"
+						      "r,L,e")])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_hi (operands[1], \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "bcc12")
+   (set_attr "psw_operand" "0,0,1")])
+
+(define_insn "cbranchhi_neg"
+  [(set (pc)
+	(if_then_else (match_operator:HI 1 "comparison_operator"
+				      [(match_operand:HI 2 "nonmemory_operand"
+							 "r,e,L")
+				       (match_operand:HI 3 "nonmemory_operand"
+							 "r,L,e")])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_hi (operands[1], \"%l0\", 1, insn);
+}"
+  [(set_attr "branch_class" "bcc12")
+   (set_attr "psw_operand" "0,0,1")])
+
+(define_insn "*eqbranchsi"
+  [(set (pc)
+	(if_then_else (match_operator:SI 1 "equality_operator"
+				      [(match_operand:SI 2 "register_operand"
+							 "r")
+				       (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (match_operand:SI 3 "register_operand" "=2"))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_si (operands[1], \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "bcc8p2")
+   (set_attr "psw_operand" "clobber")])
+
+(define_insn "*ineqbranch_1"
+  [(set (pc)
+	(if_then_else (match_operator:HI 4 "xstormy16_ineqsi_operator"
+		       [(minus:HI (match_operand:HI 1 "register_operand" "T,r,r")
+			   (zero_extend:HI (reg:BI CARRY_REG)))
+			(match_operand:HI 3 "nonmemory_operand" "L,r,i")])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (match_operand:HI 2 "register_operand" "=1,1,1")
+	(minus:HI (minus:HI (match_dup 1) (zero_extend:HI (reg:BI CARRY_REG)))
+		  (match_dup 3)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_si (operands[4], \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "bcc8p2,bcc8p2,bcc8p4")
+   (set_attr "psw_operand" "2,2,2")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Call and branch instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Subroutine call instruction returning no value.  Operand 0 is the function
+;; to call; operand 1 is the number of bytes of arguments pushed (in mode
+;; `SImode', except it is normally a `const_int'); operand 2 is the number of
+;; registers used as operands.
+
+;; On most machines, operand 2 is not actually stored into the RTL pattern.  It
+;; is supplied for the sake of some RISC machines which need to put this
+;; information into the assembler code; they can put it in the RTL instead of
+;; operand 1.
+
+(define_expand "call"
+  [(call (match_operand:HI 0 "memory_operand" "m")
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "immediate_operand" ""))]
+  ""
+  "xstormy16_expand_call (NULL_RTX, operands[0], operands[1]); DONE;")
+
+;; Subroutine call instruction returning a value.  Operand 0 is the hard
+;; register in which the value is returned.  There are three more operands, the
+;; same as the three operands of the `call' instruction (but with numbers
+;; increased by one).
+
+;; Subroutines that return `BLKmode' objects use the `call' insn.
+
+(define_expand "call_value"
+  [(set (match_operand 0 "register_operand" "=r")
+	(call (match_operand:HI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "" "")))
+	(use (match_operand 3 "immediate_operand" ""))]
+  ""
+  "xstormy16_expand_call (operands[0], operands[1], operands[2]); DONE;")
+
+(define_insn "*call_internal"
+  [(call (mem:HI (match_operand:HI 0 "nonmemory_operand" "i,r"))
+	 (match_operand 1 "" ""))
+   (use (match_operand:HI 2 "nonmemory_operand" "X,z"))]
+  ""
+  "@
+   callf %C0
+   call %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "psw_operand" "clobber")])
+
+(define_insn "*call_value_internal"
+  [(set (match_operand 3 "register_operand" "=r,r")
+        (call (mem:HI (match_operand:HI 0 "nonmemory_operand" "i,r"))
+	      (match_operand 1 "" "")))
+   (use (match_operand:HI 2 "nonmemory_operand" "X,z"))]
+  ""
+  "@
+   callf %C0
+   call %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "psw_operand" "clobber")])
+
+;; Subroutine return
+(define_expand "return"
+  [(return)]
+  "direct_return()"
+  "")
+
+(define_insn "return_internal"
+  [(return)]
+  ""
+  "ret"
+  [(set_attr "psw_operand" "nop")])
+
+(define_insn "return_internal_interrupt"
+  [(return)
+   (unspec_volatile [(const_int 0)] 1)]
+  ""
+  "iret"
+  [(set_attr "psw_operand" "clobber")])
+
+;; Normal unconditional jump
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+  return xstormy16_output_cbranch_hi (NULL_RTX, \"%l0\", 0, insn);
+}"
+  [(set_attr "branch_class" "br12")
+   (set_attr "psw_operand" "nop")])
+
+;; Indirect jump through a register
+(define_expand "indirect_jump"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (pc) (match_operand:HI 0 "register_operand" ""))
+	      (use (match_dup 1))])]
+  ""
+  "operands[1] = gen_reg_rtx (HImode);")
+
+(define_insn ""
+  [(set (pc) (match_operand:HI 0 "register_operand" "r"))
+   (use (match_operand:HI 1 "register_operand" "z"))]
+  ""
+  "jmp %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+;; Table-based switch statements.
+(define_expand "casesi"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:SI 1 "immediate_operand" ""))
+   (use (match_operand:SI 2 "immediate_operand" ""))
+   (use (label_ref (match_operand 3 "" "")))
+   (use (label_ref (match_operand 4 "" "")))]
+  ""
+  "
+{
+  xstormy16_expand_casesi (operands[0], operands[1], operands[2],
+			  operands[3], operands[4]);
+  DONE;
+}")
+
+(define_insn "tablejump_pcrel"
+  [(set (pc) (mem:HI (plus:HI (pc)
+			      (match_operand:HI 0 "register_operand" "r"))))
+   (use (label_ref:SI (match_operand 1 "" "")))]
+  ""
+  "br %0"
+  [(set_attr "psw_operand" "nop")])
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Prologue and Epilogue instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; Called after register allocation to add any instructions needed for
+;; the prologue.  Using a prologue insn is favored compared to putting
+;; all of the instructions in the TARGET_ASM_FUNCTION_PROLOGUE macro,
+;; since it allows the scheduler to intermix instructions with the
+;; saves of the caller saved registers.  In some cases, it might be
+;; necessary to emit a barrier instruction as the last insn to prevent
+;; such scheduling.
+(define_expand "prologue"
+  [(const_int 1)]
+  ""
+  {
+    xstormy16_expand_prologue ();
+    DONE;
+  })
+
+;; Called after register allocation to add any instructions needed for
+;; the epilogue.  Using an epilogue insn is favored compared to putting
+;; all of the instructions in the TARGET_ASM_FUNCTION_EPILOGUE macro,
+;; since it allows the scheduler to intermix instructions with the
+;; restores of the caller saved registers.  In some cases, it might be
+;; necessary to emit a barrier instruction as the first insn to
+;; prevent such scheduling.
+(define_expand "epilogue"
+  [(const_int 2)]
+  ""
+  {
+    xstormy16_expand_epilogue ();
+    DONE;
+  })
+
+;; ::::::::::::::::::::
+;; ::
+;; :: Miscellaneous instructions
+;; ::
+;; ::::::::::::::::::::
+
+;; No operation, needed in case the user uses -g but not -O.
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "psw_operand" "nop")])
+
+;; Pseudo instruction that prevents the scheduler from moving code above this
+;; point.
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "psw_operand" "nop")])
+
+;;---------------------------------------------------------------------------
+
+(define_expand "iorqi3"
+  [(match_operand:QI 0 "xstormy16_below100_or_register" "")
+   (match_operand:QI 1 "xstormy16_below100_or_register" "")
+   (match_operand:QI 2 "nonmemory_operand" "")]
+  ""
+  {
+    xstormy16_expand_iorqi3 (operands);
+    DONE;
+  })
+
+(define_insn "iorqi3_internal"
+  [(set (match_operand:QI 0 "xstormy16_below100_or_register" "=Wr")
+	(ior:QI (match_operand:QI 1 "xstormy16_below100_or_register" "0")
+		(match_operand:QI 2 "xstormy16_onebit_set_operand" "i")))]
+  ""
+  "set1 %0,%B2"
+  [(set_attr "length" "2")
+   (set_attr "psw_operand" "0")])
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand" "")
+	(match_operand:QI 1 "xstormy16_below100_operand" ""))
+   (set (match_operand:HI 2 "register_operand" "")
+	(ior:HI (match_operand:HI 3 "register_operand" "")
+		(match_operand:QI 4 "xstormy16_onebit_set_operand" "")))
+   (set (match_operand:QI 5 "xstormy16_below100_operand" "")
+	(match_operand:QI 6 "register_operand" ""))
+   ]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[6])
+   && rtx_equal_p (operands[1], operands[5])"
+  [(set (match_dup 1)
+	(ior:QI (match_dup 1)
+		(match_dup 4)))
+   ]
+  "")
+
+
+(define_expand "andqi3"
+  [(match_operand:QI 0 "xstormy16_below100_or_register" "")
+   (match_operand:QI 1 "xstormy16_below100_or_register" "")
+   (match_operand:QI 2 "nonmemory_operand" "")]
+  ""
+  {
+    xstormy16_expand_andqi3 (operands);
+    DONE;
+  })
+
+(define_insn "andqi3_internal"
+  [(set (match_operand:QI 0 "xstormy16_below100_or_register" "=Wr")
+	(and:QI (match_operand:QI 1 "xstormy16_below100_or_register" "0")
+		(match_operand:QI 2 "xstormy16_onebit_clr_operand" "i")))]
+  ""
+  "clr1 %0,%B2"
+  [(set_attr "length" "2")
+   (set_attr "psw_operand" "0")])
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand" "")
+	(and:HI (match_operand:HI 1 "register_operand" "")
+		(match_operand 2 "immediate_operand" "")))
+   (set (match_operand:HI 3 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 4 "register_operand" "")));
+   ]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[4])"
+  [(set (match_dup 0)
+	(and:HI (match_dup 1)
+		(match_dup 5)))
+   ]
+  "operands[5] = GEN_INT (INTVAL (operands[2]) & 0xff);")
+
+(define_peephole2
+  [(set (match_operand:QI 0 "register_operand" "")
+	(match_operand:QI 1 "xstormy16_below100_operand" ""))
+   (set (match_operand:HI 2 "register_operand" "")
+	(and:HI (match_operand:HI 3 "register_operand" "")
+		(match_operand:QI 4 "xstormy16_onebit_clr_operand" "")))
+   (set (match_operand:QI 5 "xstormy16_below100_operand" "")
+	(match_operand:QI 6 "register_operand" ""))
+   ]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[6])
+   && rtx_equal_p (operands[1], operands[5])"
+  [(set (match_dup 1)
+	(and:QI (match_dup 1)
+		(match_dup 4)))
+   ]
+  "")
+
+;; GCC uses different techniques to optimize MSB and LSB accesses, so
+;; we have to code those separately.
+
+(define_insn "*bclrx"
+  [(set (pc)
+	(if_then_else (eq:HI (and:QI (match_operand:QI 1 "xstormy16_below100_operand" "W")
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclrx2"
+  [(set (pc)
+	(if_then_else (zero_extract:HI
+		       (xor:HI (subreg:HI
+				(match_operand:QI 1 "xstormy16_below100_operand" "W") 0)
+			       (match_operand:HI 2 "xstormy16_onebit_set_operand" "J"))
+		       (const_int 1)
+		       (match_operand:HI 3 "immediate_operand" "i"))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclrx3"
+  [(set (pc)
+	(if_then_else (eq:HI (and:HI (zero_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclr7"
+  [(set (pc)
+	(if_then_else (xor:HI (lshiftrt:HI (subreg:HI
+					    (match_operand:QI 1 "xstormy16_below100_operand" "W") 0)
+					   (const_int 7))
+			      (const_int 1))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bclr15"
+  [(set (pc)
+	(if_then_else (ge:HI (sign_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bn %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bsetx"
+  [(set (pc)
+	(if_then_else (ne:HI (and:QI (match_operand:QI 1 "xstormy16_below100_operand" "W")
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bsetx2"
+  [(set (pc)
+	(if_then_else (zero_extract:HI (match_operand:QI 1 "xstormy16_below100_operand" "W")
+				       (const_int 1)
+				       (match_operand:HI 2 "immediate_operand" "i"))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,%b2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bsetx3"
+  [(set (pc)
+	(if_then_else (ne:HI (and:HI (zero_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+				     (match_operand:HI 2 "immediate_operand" "i"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,%B2,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bset7"
+  [(set (pc)
+	(if_then_else (lshiftrt:HI (subreg:HI (match_operand:QI 1 "xstormy16_below100_operand" "W") 0)
+				   (const_int 7))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
+
+(define_insn "*bset15"
+  [(set (pc)
+	(if_then_else (lt:HI (sign_extend:HI (match_operand:QI 1 "xstormy16_below100_operand" "W"))
+			     (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (clobber (reg:BI CARRY_REG))]
+  ""
+  "bp %1,#7,%l0"
+  [(set_attr "length" "4")
+   (set_attr "psw_operand" "nop")])
diff --git a/gcc/config/stormy16/stormy16.opt b/gcc/config/stormy16/stormy16.opt
new file mode 100644
index 000000000..b71e9ad90
--- /dev/null
+++ b/gcc/config/stormy16/stormy16.opt
@@ -0,0 +1,24 @@
+; Options for the XSTORMY16 port of the compiler.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; Not used by the compiler
+msim
+Target RejectNegative
+Provide libraries for the simulator
diff --git a/gcc/config/stormy16/t-stormy16 b/gcc/config/stormy16/t-stormy16
new file mode 100644
index 000000000..62b4e9fa1
--- /dev/null
+++ b/gcc/config/stormy16/t-stormy16
@@ -0,0 +1,50 @@
+# -*- makefile -*-
+#
+# Copyright (C) 2001, 2004, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# SImode arithmetic and logical routines, HImode bit counting routines.
+LIB2FUNCS_EXTRA = \
+	$(srcdir)/config/stormy16/stormy16-lib2-udivmodsi4.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-divsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-modsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-udivsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-umodsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-ashlsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-ashrsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-lshrsi3.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-popcounthi2.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-parityhi2.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-clzhi2.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-ctzhi2.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-ffshi2.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-cmpsi2.c \
+	$(srcdir)/config/stormy16/stormy16-lib2-ucmpsi2.c
+
+# Floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT'				> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c			>> fp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	cat $(srcdir)/config/fp-bit.c > dp-bit.c
+
+TARGET_LIBGCC2_CFLAGS = -O2
diff --git a/gcc/config/svr3.h b/gcc/config/svr3.h
new file mode 100644
index 000000000..243206245
--- /dev/null
+++ b/gcc/config/svr3.h
@@ -0,0 +1,146 @@
+/* Operating system specific defines to be used when targeting GCC for
+   generic System V Release 3 system.
+   Copyright (C) 1991, 1996, 2000, 2002, 2004, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Ron Guilmette (rfg@monkeys.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Define a symbol indicating that we are using svr3.h.  */
+#define USING_SVR3_H
+
+/* Define a symbol so that libgcc* can know what sort of operating
+   environment and assembler syntax we are targeting for.  */
+#define SVR3_target
+
+/* Assembler, linker, library, and startfile spec's.  */
+
+/* The .file command should always begin the output.  */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+/* We don't use ROUNDED because the standard compiler doesn't,
+   and the linker gives error messages if a common symbol
+   has more than one length value.  */
+
+#undef ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".comm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), ",%lu\n", (unsigned long)(SIZE)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+/* Note that using bss_section here caused errors
+   in building shared libraries on system V.3.  */
+#undef ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  do {							\
+    int align = exact_log2 (ROUNDED);			\
+    if (align > 2) align = 2;				\
+    switch_to_section (data_section);			\
+    ASM_OUTPUT_ALIGN ((FILE), align == -1 ? 2 : align);	\
+    ASM_OUTPUT_LABEL ((FILE), (NAME));			\
+    fprintf ((FILE), "\t.set .,.+%u\n", (int)(ROUNDED));	\
+  } while (0)
+
+/* Output #ident as a .ident.  */
+
+#undef  ASM_OUTPUT_IDENT
+#define ASM_OUTPUT_IDENT(FILE, NAME) \
+  fprintf (FILE, "\t.ident \"%s\"\n", NAME);
+
+/* Use periods rather than dollar signs in special g++ assembler names.  */
+
+#define NO_DOLLAR_IN_LABEL
+
+/* System V Release 3 uses COFF debugging info.  */
+
+#define SDB_DEBUGGING_INFO 1
+
+/* We don't want to output DBX debugging information.  */
+
+#undef DBX_DEBUGGING_INFO
+
+/* The prefix to add to user-visible assembler symbols.
+
+   For System V Release 3 the convention is to prepend a leading
+   underscore onto user-level symbol names.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.
+
+   For most svr3 systems, the convention is that any symbol which begins
+   with a period is not put into the linker symbol table by the assembler.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long)(NUM))
+
+/* We want local labels to start with period if made with asm_fprintf.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Support const sections and the ctors and dtors sections for g++.  */
+
+/* Define a few machine-specific details of the implementation of
+   constructors.
+
+   The __CTORS_LIST__ goes in the .init section.  Define CTOR_LIST_BEGIN
+   and CTOR_LIST_END to contribute to the .init section an instruction to
+   push a word containing 0 (or some equivalent of that).
+
+   Define TARGET_ASM_CONSTRUCTOR to push the address of the constructor.  */
+
+#define INIT_SECTION_ASM_OP     "\t.section\t.init"
+#define FINI_SECTION_ASM_OP     "\t.section .fini,\"x\""
+#define DTORS_SECTION_ASM_OP    FINI_SECTION_ASM_OP
+
+/* CTOR_LIST_BEGIN and CTOR_LIST_END are machine-dependent
+   because they push on the stack.  */
+
+#ifndef STACK_GROWS_DOWNWARD
+
+/* Constructor list on stack is in reverse order.  Go to the end of the
+   list and go backwards to call constructors in the right order.  */
+#define DO_GLOBAL_CTORS_BODY					\
+do {								\
+  func_ptr *p, *beg = alloca (0);				\
+  for (p = beg; *p; p++)					\
+    ;								\
+  while (p != beg)						\
+    (*--p) ();							\
+} while (0)
+
+#else
+
+/* Constructor list on stack is in correct order.  Just call them.  */
+#define DO_GLOBAL_CTORS_BODY					\
+do {								\
+  func_ptr *p, *beg = alloca (0);				\
+  for (p = beg; *p; )						\
+    (*p++) ();							\
+} while (0)
+
+#endif /* STACK_GROWS_DOWNWARD */
diff --git a/gcc/config/sync.c b/gcc/config/sync.c
new file mode 100644
index 000000000..eacdce664
--- /dev/null
+++ b/gcc/config/sync.c
@@ -0,0 +1,180 @@
+/* Out-of-line libgcc versions of __sync_* builtins.  */
+/* Copyright (C) 2008, 2009  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file is used by targets whose makefiles define LIBGCC_SYNC
+   to "yes".  It is compiled with LIBGCC_SYNC_CFLAGS and provides
+   out-of-line versions of all relevant __sync_* primitives.
+
+   These routines are intended for targets like MIPS that have two
+   ISA encodings (the "normal" ISA and the MIPS16 ISA).  The normal
+   ISA provides full synchronization capabilities but the MIPS16 ISA
+   has no encoding for them.  MIPS16 code must therefore call external
+   non-MIPS16 implementations of the __sync_* routines.
+
+   The file is compiled once for each routine.  The following __foo
+   routines are selected by defining a macro called L<foo>:
+
+       __sync_synchronize
+
+   The following __foo_N routines are selected by defining FN=foo
+   and SIZE=N:
+
+       __sync_fetch_and_add_N
+       __sync_fetch_and_sub_N
+       __sync_fetch_and_or_N
+       __sync_fetch_and_and_N
+       __sync_fetch_and_xor_N
+       __sync_fetch_and_nand_N
+       __sync_add_and_fetch_N
+       __sync_sub_and_fetch_N
+       __sync_or_and_fetch_N
+       __sync_and_and_fetch_N
+       __sync_xor_and_fetch_N
+       __sync_nand_and_fetch_N
+       __sync_bool_compare_and_swap_N
+       __sync_val_compare_and_swap_N
+       __sync_lock_test_and_set_N
+
+   SIZE can be 1, 2, 4, 8 or 16.  __foo_N is omitted if the target does
+   not provide __sync_compare_and_swap_N.
+
+   Note that __sync_lock_release does not fall back on external
+   __sync_lock_release_N functions.  The default implementation
+   of __sync_lock_release is a call to __sync_synchronize followed
+   by a store of zero, so we don't need separate library functions
+   for it.  */
+
+#if defined FN
+
+/* Define macros for each __sync_* function type.  Each macro defines a
+   local function called <NAME>_<UNITS> that acts like __<NAME>_<UNITS>.
+   TYPE is a type that has UNITS bytes.  */
+
+#define DEFINE_V_PV(NAME, UNITS, TYPE)					\
+  static TYPE								\
+  NAME##_##UNITS (TYPE *ptr, TYPE value)				\
+  {									\
+    return __##NAME (ptr, value);					\
+  }
+
+#define DEFINE_V_PVV(NAME, UNITS, TYPE)				\
+  static TYPE								\
+  NAME##_##UNITS (TYPE *ptr, TYPE value1, TYPE value2)			\
+  {									\
+    return __##NAME (ptr, value1, value2);				\
+  }
+
+#define DEFINE_BOOL_PVV(NAME, UNITS, TYPE)				\
+  static _Bool								\
+  NAME##_##UNITS (TYPE *ptr, TYPE value1, TYPE value2)			\
+  {									\
+    return __##NAME (ptr, value1, value2);				\
+  }
+
+/* Map function names to the appropriate DEFINE_* macro.  */
+
+#define local_sync_fetch_and_add DEFINE_V_PV
+#define local_sync_fetch_and_sub DEFINE_V_PV
+#define local_sync_fetch_and_or DEFINE_V_PV
+#define local_sync_fetch_and_and DEFINE_V_PV
+#define local_sync_fetch_and_xor DEFINE_V_PV
+#define local_sync_fetch_and_nand DEFINE_V_PV
+
+#define local_sync_add_and_fetch DEFINE_V_PV
+#define local_sync_sub_and_fetch DEFINE_V_PV
+#define local_sync_or_and_fetch DEFINE_V_PV
+#define local_sync_and_and_fetch DEFINE_V_PV
+#define local_sync_xor_and_fetch DEFINE_V_PV
+#define local_sync_nand_and_fetch DEFINE_V_PV
+
+#define local_sync_bool_compare_and_swap DEFINE_BOOL_PVV
+#define local_sync_val_compare_and_swap DEFINE_V_PVV
+
+#define local_sync_lock_test_and_set DEFINE_V_PV
+
+/* Define the function __<NAME>_<UNITS>, given that TYPE is a type with
+   UNITS bytes.  */
+#define DEFINE1(NAME, UNITS, TYPE) \
+  static int unused[sizeof (TYPE) == UNITS ? 1 : -1]	\
+    __attribute__((unused));				\
+  local_##NAME (NAME, UNITS, TYPE);			\
+  typeof (NAME##_##UNITS) __##NAME##_##UNITS		\
+    __attribute__((alias (#NAME "_" #UNITS)));
+
+/* As above, but performing macro expansion on the arguments.  */
+#define DEFINE(NAME, UNITS, TYPE) DEFINE1 (NAME, UNITS, TYPE)
+
+/* Find an appropriate type TYPE for SIZE and invoke DEFINE (FN, SIZE, TYPE).
+
+   The types chosen here may be incorrect for some targets.
+   For example, targets with 16-byte atomicity support might not
+   support OImode.  We would need some kind of target-specific
+   override if that becomes a problem.  */
+
+#if SIZE == 1 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
+
+typedef unsigned int UQItype __attribute__((mode (QI)));
+DEFINE (FN, 1, UQItype)
+
+#elif SIZE == 2 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
+
+typedef unsigned int UHItype __attribute__((mode (HI)));
+DEFINE (FN, 2, UHItype)
+
+#elif SIZE == 4 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+
+typedef unsigned int USItype __attribute__((mode (SI)));
+DEFINE (FN, 4, USItype)
+
+#elif SIZE == 8 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+
+typedef unsigned int UDItype __attribute__((mode (DI)));
+DEFINE (FN, 8, UDItype)
+
+#elif SIZE == 16 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
+
+typedef unsigned int UOItype __attribute__((mode (OI)));
+DEFINE (FN, 8, UOItype)
+
+#endif
+
+#elif __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 \
+      || __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 \
+      || __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 \
+      || __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 \
+      || __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
+
+#if defined Lsync_synchronize
+
+static void
+sync_synchronize (void)
+{
+  __sync_synchronize ();
+}
+typeof (sync_synchronize) __sync_synchronize \
+  __attribute__((alias ("sync_synchronize")));
+
+#endif
+
+#endif
diff --git a/gcc/config/t-darwin b/gcc/config/t-darwin
new file mode 100644
index 000000000..ff6078782
--- /dev/null
+++ b/gcc/config/t-darwin
@@ -0,0 +1,63 @@
+# Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+TM_H += $(srcdir)/config/darwin-sections.def
+
+darwin.o: $(srcdir)/config/darwin.c $(CONFIG_H) $(SYSTEM_H) coretypes.h     \
+  $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(REAL_H) insn-config.h         \
+  conditions.h insn-flags.h output.h insn-attr.h flags.h $(TREE_H) expr.h   \
+  reload.h function.h $(GGC_H) langhooks.h $(TARGET_H) $(TM_P_H) gt-darwin.h \
+  config/darwin-sections.def
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/darwin.c
+
+darwin-c.o: $(srcdir)/config/darwin-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(CPPLIB_H) $(TREE_H) $(C_PRAGMA_H) $(TM_P_H) \
+  incpath.h flags.h $(C_COMMON_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/darwin-c.c $(PREPROCESSOR_DEFINES)
+
+darwin-f.o: $(srcdir)/config/darwin-f.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/darwin-f.c $(PREPROCESSOR_DEFINES)
+
+darwin-driver.o: $(srcdir)/config/darwin-driver.c \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(GCC_H) opts.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/darwin-driver.c
+
+# How to build crt3.o
+EXTRA_MULTILIB_PARTS=crt3.o
+# Pass -fno-tree-dominator-opts to work around bug 26840.
+$(T)crt3$(objext): $(srcdir)/config/darwin-crt3.c $(GCC_PASSES) \
+	$(TCONFIG_H) stmp-int-hdrs tsystem.h
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) \
+	  -fno-tree-dominator-opts $(DARWIN_EXTRA_CRT_BUILD_CFLAGS) \
+	  -c $(srcdir)/config/darwin-crt3.c -o $(T)crt3$(objext)
+
+# Use unwind-dw2-fde-darwin
+LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-darwin.c \
+  $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
+
+# -pipe because there's an assembler bug, 4077127, which causes
+# it to not properly process the first # directive, causing temporary
+# file names to appear in stabs, causing the bootstrap to fail.  Using -pipe
+# works around this by not having any temporary file names.
+TARGET_LIBGCC2_CFLAGS = -fPIC -pipe
diff --git a/gcc/config/t-dfprules b/gcc/config/t-dfprules
new file mode 100644
index 000000000..6bf6246d8
--- /dev/null
+++ b/gcc/config/t-dfprules
@@ -0,0 +1,10 @@
+# Use DFP_ENABLE to build decimal floating point support routines for
+# all decimal floating point types (32-bit, 64-bit and 128-bit). We
+# use `true' for clarity, but any value will do.
+#
+DFP_ENABLE = true
+
+# DFP_CFLAGS can be used to pass target-specific CFLAGS when compiling
+# dfp-bit.c.  This is useful for overriding the definition of macros.
+#
+# DFP_CFLAGS = -DFOO=bar
diff --git a/gcc/config/t-freebsd b/gcc/config/t-freebsd
new file mode 100644
index 000000000..c1b086ea1
--- /dev/null
+++ b/gcc/config/t-freebsd
@@ -0,0 +1,10 @@
+# Compile crtbeginS.o and crtendS.o with pic.
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
+
+# Compile libgcc.a with pic.
+TARGET_LIBGCC2_CFLAGS += -fPIC
+
+# Use unwind-dw2-fde-glibc
+LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-glibc.c \
+  $(srcdir)/unwind-sjlj.c $(srcdir)/gthr-gnat.c $(srcdir)/unwind-c.c
+LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
diff --git a/gcc/config/t-freebsd-thread b/gcc/config/t-freebsd-thread
new file mode 100644
index 000000000..6e5c64f78
--- /dev/null
+++ b/gcc/config/t-freebsd-thread
@@ -0,0 +1,2 @@
+# This is currently needed to compile libgcc2 for threads support
+TARGET_LIBGCC2_CFLAGS += -pthread
diff --git a/gcc/config/t-gnu b/gcc/config/t-gnu
new file mode 100644
index 000000000..7be5d00a7
--- /dev/null
+++ b/gcc/config/t-gnu
@@ -0,0 +1,2 @@
+# In GNU, "/usr" is a four-letter word.
+NATIVE_SYSTEM_HEADER_DIR = /include
diff --git a/gcc/config/t-libc-ok b/gcc/config/t-libc-ok
new file mode 100644
index 000000000..561ee0b31
--- /dev/null
+++ b/gcc/config/t-libc-ok
@@ -0,0 +1 @@
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
diff --git a/gcc/config/t-libgcc-pic b/gcc/config/t-libgcc-pic
new file mode 100644
index 000000000..ff935fe1e
--- /dev/null
+++ b/gcc/config/t-libgcc-pic
@@ -0,0 +1,2 @@
+# Compile libgcc2.a with pic.
+TARGET_LIBGCC2_CFLAGS = -fPIC
diff --git a/gcc/config/t-libunwind b/gcc/config/t-libunwind
new file mode 100644
index 000000000..6fdaf676b
--- /dev/null
+++ b/gcc/config/t-libunwind
@@ -0,0 +1,30 @@
+# Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Use the system libunwind library.
+#
+# Override the default value from t-slibgcc-elf-ver and mention -lunwind
+# so that the resulting libgcc_s.so has the necessary DT_NEEDED entry for
+# libunwind.
+SHLIB_LC = -lunwind -lc
+LIB2ADDEH = $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c \
+  $(srcdir)/unwind-compat.c $(srcdir)/unwind-dw2-fde-compat.c
+LIB2ADDEHSTATIC = $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+
+T_CFLAGS += -DUSE_LIBUNWIND_EXCEPTIONS
+TARGET_LIBGCC2_CFLAGS += -DUSE_GAS_SYMVER
diff --git a/gcc/config/t-libunwind-elf b/gcc/config/t-libunwind-elf
new file mode 100644
index 000000000..5ae0d62de
--- /dev/null
+++ b/gcc/config/t-libunwind-elf
@@ -0,0 +1,49 @@
+# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build libunwind for ELF with the GNU linker.
+
+# Use unwind-dw2-fde-glibc
+LIBUNWIND = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-glibc.c
+LIBUNWINDDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
+
+SHLIBUNWIND_SOVERSION = 7
+SHLIBUNWIND_SONAME = @shlib_base_name@.so.$(SHLIBUNWIND_SOVERSION)
+
+SHLIBUNWIND_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared \
+	-nodefaultlibs -Wl,-h,$(SHLIBUNWIND_SONAME) \
+	-Wl,-z,text -Wl,-z,defs -o $(SHLIB_DIR)/$(SHLIBUNWIND_SONAME).tmp \
+	@multilib_flags@ $(SHLIB_OBJS) -lc && \
+	rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIBUNWIND_SONAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIBUNWIND_SONAME) \
+		$(SHLIB_DIR)/$(SHLIBUNWIND_SONAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIBUNWIND_SONAME).tmp \
+	   $(SHLIB_DIR)/$(SHLIBUNWIND_SONAME) && \
+	$(LN_S) $(SHLIBUNWIND_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
+
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIBUNWIND_INSTALL = \
+	$$(SHELL) $$(srcdir)/mkinstalldirs $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIBUNWIND_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIBUNWIND_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	$(LN_S) $(SHLIBUNWIND_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
diff --git a/gcc/config/t-linux b/gcc/config/t-linux
new file mode 100644
index 000000000..1a7d79e21
--- /dev/null
+++ b/gcc/config/t-linux
@@ -0,0 +1,32 @@
+# Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+# 2004 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compile crtbeginS.o and crtendS.o with pic.
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
+# Compile libgcc2.a with pic.
+TARGET_LIBGCC2_CFLAGS = -fPIC
+
+# Override t-slibgcc-elf-ver to export some libgcc symbols with
+# the symbol versions that glibc used.
+SHLIB_MAPFILES += $(srcdir)/config/libgcc-glibc.ver
+
+# Use unwind-dw2-fde-glibc
+LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-glibc.c \
+  $(srcdir)/unwind-sjlj.c $(srcdir)/gthr-gnat.c $(srcdir)/unwind-c.c
+LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
diff --git a/gcc/config/t-lynx b/gcc/config/t-lynx
new file mode 100644
index 000000000..ab6d2675c
--- /dev/null
+++ b/gcc/config/t-lynx
@@ -0,0 +1,33 @@
+# Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compile crtbeginS.o and crtendS.o with pic.
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
+
+# Compile libgcc2.a with pic.
+TARGET_LIBGCC2_CFLAGS = -fPIC
+
+MULTILIB_OPTIONS = mthreads
+MULTILIB_DIRNAMES = thread
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+Local Variables:
+mode: makefile
+End:
diff --git a/gcc/config/t-netbsd b/gcc/config/t-netbsd
new file mode 100644
index 000000000..34949e128
--- /dev/null
+++ b/gcc/config/t-netbsd
@@ -0,0 +1,2 @@
+# Always build crtstuff with PIC.
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
diff --git a/gcc/config/t-openbsd b/gcc/config/t-openbsd
new file mode 100644
index 000000000..2289f154e
--- /dev/null
+++ b/gcc/config/t-openbsd
@@ -0,0 +1,2 @@
+# We don't need GCC's own include files.
+USER_H =
diff --git a/gcc/config/t-openbsd-thread b/gcc/config/t-openbsd-thread
new file mode 100644
index 000000000..5f4edf567
--- /dev/null
+++ b/gcc/config/t-openbsd-thread
@@ -0,0 +1,3 @@
+# This is currently needed to compile libgcc2 for threads support
+TARGET_LIBGCC2_CFLAGS=-pthread
+
diff --git a/gcc/config/t-pnt16-warn b/gcc/config/t-pnt16-warn
new file mode 100644
index 000000000..0bd52029c
--- /dev/null
+++ b/gcc/config/t-pnt16-warn
@@ -0,0 +1,27 @@
+# -Werror overrides for targets with 16 bit pointers
+# Copyright (C) 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+
+# Because POINTER_SIZE is only 16, in dwarf2out.c,
+# DWARF_ARANGES_PAD_SIZE is 0, thus a loop in output_aranges that checks
+# (i < (unsigned) DWARF_ARANGES_PAD_SIZE) elicits a warning that the
+# comparison is always false.
+# We could say "-Werror -Wno-error=type-limits", alas, not all supported
+# gcc bootstrap compilers support the latter option.
+dwarf2out.o-warn = -Wno-error
diff --git a/gcc/config/t-rtems b/gcc/config/t-rtems
new file mode 100644
index 000000000..dfbd3afe9
--- /dev/null
+++ b/gcc/config/t-rtems
@@ -0,0 +1,7 @@
+# RTEMS always has limits.h.
+LIMITS_H_TEST = true
+
+# If we are building next to newlib, this will let us find the RTEMS
+# limits.h when building libgcc2.  Otherwise, newlib must be installed
+# first.
+LIBGCC2_INCLUDES = -I$(srcdir)/../newlib/libc/sys/rtems/include
diff --git a/gcc/config/t-slibgcc-darwin b/gcc/config/t-slibgcc-darwin
new file mode 100644
index 000000000..b957b3532
--- /dev/null
+++ b/gcc/config/t-slibgcc-darwin
@@ -0,0 +1,2 @@
+# To keep DRIVER_DEFINES correct.
+SHLIB_LINK = dummy
diff --git a/gcc/config/t-slibgcc-elf-ver b/gcc/config/t-slibgcc-elf-ver
new file mode 100644
index 000000000..d5ef9ca16
--- /dev/null
+++ b/gcc/config/t-slibgcc-elf-ver
@@ -0,0 +1,56 @@
+# Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build a shared libgcc library for ELF with symbol versioning
+# with the GNU linker.
+
+SHLIB_EXT = .so
+SHLIB_SOLINK = @shlib_base_name@.so
+SHLIB_SOVERSION = 1
+SHLIB_SONAME = @shlib_base_name@.so.$(SHLIB_SOVERSION)
+SHLIB_MAP = @shlib_map_file@
+SHLIB_OBJS = @shlib_objs@
+SHLIB_DIR = @multilib_dir@
+SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@
+SHLIB_LC = -lc
+SHLIB_MAKE_SOLINK = $(LN_S) $(SHLIB_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
+SHLIB_INSTALL_SOLINK = $(LN_S) $(SHLIB_SONAME) \
+	$$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,--soname=$(SHLIB_SONAME) \
+	-Wl,--version-script=$(SHLIB_MAP) \
+	-o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
+	$(SHLIB_OBJS) $(SHLIB_LC) && \
+	rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIB_SONAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIB_SONAME) \
+		$(SHLIB_DIR)/$(SHLIB_SONAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIB_SONAME).tmp $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+	$(SHLIB_MAKE_SOLINK)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	$(SHLIB_INSTALL_SOLINK)
+SHLIB_MKMAP = $(srcdir)/mkmap-symver.awk
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/t-slibgcc-libgcc b/gcc/config/t-slibgcc-libgcc
new file mode 100644
index 000000000..df004a5e9
--- /dev/null
+++ b/gcc/config/t-slibgcc-libgcc
@@ -0,0 +1,32 @@
+# Copyright (C) 2009 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Instead of creating $(SHLIB_SOLINK) symlink create a GNU ld
+# linker script which sources in both $(SHLIB_SONAME) and libgcc.a.
+# This is needed on targets where libgcc.a contains routines that aren't in
+# $(SHLIB_SONAME) and are needed for shared libraries.
+
+SHLIB_MAKE_SOLINK = \
+	(echo "/* GNU ld script"; \
+	 echo "   Use the shared library, but some functions are only in"; \
+	 echo "   the static library.  */"; \
+	 echo "GROUP ( $(SHLIB_SONAME) libgcc.a )" \
+	) > $(SHLIB_DIR)/$(SHLIB_SOLINK)
+SHLIB_INSTALL_SOLINK = \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_SOLINK) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
diff --git a/gcc/config/t-slibgcc-nolc-override b/gcc/config/t-slibgcc-nolc-override
new file mode 100644
index 000000000..959d2cc2a
--- /dev/null
+++ b/gcc/config/t-slibgcc-nolc-override
@@ -0,0 +1 @@
+SHLIB_LC =
diff --git a/gcc/config/t-slibgcc-sld b/gcc/config/t-slibgcc-sld
new file mode 100644
index 000000000..3a343f5b9
--- /dev/null
+++ b/gcc/config/t-slibgcc-sld
@@ -0,0 +1,50 @@
+# Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build a shared libgcc library with the Solaris linker.
+
+SHLIB_EXT = .so
+SHLIB_SOLINK = @shlib_base_name@.so
+SHLIB_SONAME = @shlib_base_name@.so.1
+SHLIB_MAP = @shlib_map_file@
+SHLIB_OBJS = @shlib_objs@
+SHLIB_DIR = @multilib_dir@
+SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@
+
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,-h,$(SHLIB_SONAME) -Wl,-z,text -Wl,-z,defs \
+	-Wl,-M,$(SHLIB_MAP) -o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp \
+	@multilib_flags@ $(SHLIB_OBJS) -lc && \
+	rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIB_SONAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIB_SONAME) \
+		$(SHLIB_DIR)/$(SHLIB_SONAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIB_SONAME).tmp $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+	$(LN_S) $(SHLIB_SONAME) $(SHLIB_DIR)/$(SHLIB_SOLINK)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk.  We want this delayed until actual install time.
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	$(LN_S) $(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+SHLIB_MKMAP = $(srcdir)/mkmap-symver.awk
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/t-sol2 b/gcc/config/t-sol2
new file mode 100644
index 000000000..6a76bf1ba
--- /dev/null
+++ b/gcc/config/t-sol2
@@ -0,0 +1,36 @@
+# Copyright (C) 2004, 2008, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Solaris-specific format checking and pragmas
+sol2-c.o: $(srcdir)/config/sol2-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  tree.h c-family/c-format.h $(C_PRAGMA_H) $(C_COMMON_H) $(CPPLIB_H) \
+  intl.h $(TM_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/sol2-c.c
+
+# Solaris-specific attributes
+sol2.o: $(srcdir)/config/sol2.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  tree.h output.h $(TM_H) $(TM_P_H) $(GGC_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/sol2.c
+
+# Use unwind-dw2-fde-glibc.c.  Unless linker support and dl_iterate_phdr
+# are present, automatically falls back to unwind-dw2-fde.c.
+LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-glibc.c \
+  $(srcdir)/unwind-sjlj.c $(srcdir)/gthr-gnat.c $(srcdir)/unwind-c.c
+LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
diff --git a/gcc/config/t-svr4 b/gcc/config/t-svr4
new file mode 100644
index 000000000..6e75eea1f
--- /dev/null
+++ b/gcc/config/t-svr4
@@ -0,0 +1,8 @@
+# We need to use -fPIC when we are using gcc to compile the routines in
+# crtstuff.c.  This is only really needed when we are going to use gcc/g++
+# to produce a shared library, but since we don't know ahead of time when
+# we will be doing that, we just always use -fPIC when compiling the
+# routines in crtstuff.c.  Likewise for libgcc2.c.
+
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC
diff --git a/gcc/config/t-sysroot-suffix b/gcc/config/t-sysroot-suffix
new file mode 100644
index 000000000..08b4f949e
--- /dev/null
+++ b/gcc/config/t-sysroot-suffix
@@ -0,0 +1,7 @@
+# Generate SYSROOT_SUFFIX_SPEC from MULTILIB_OSDIRNAMES
+
+sysroot-suffix.h: $(srcdir)/config/print-sysroot-suffix.sh
+	$(SHELL) $(srcdir)/config/print-sysroot-suffix.sh \
+	  "$(MULTILIB_OSDIRNAMES)" "$(MULTILIB_OPTIONS)" \
+	  "$(MULTILIB_MATCHES)" > tmp-sysroot-suffix.h
+	mv tmp-sysroot-suffix.h $@
diff --git a/gcc/config/t-vxworks b/gcc/config/t-vxworks
new file mode 100644
index 000000000..e200d9320
--- /dev/null
+++ b/gcc/config/t-vxworks
@@ -0,0 +1,53 @@
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008,
+# 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Build libgcc using the multilib mechanism
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# No special flags needed for libgcc.a
+TARGET_LIBGCC2_CFLAGS =
+
+# Don't build libgcc.a with debug info
+LIBGCC2_DEBUG_CFLAGS =
+
+# Extra libgcc2 modules used by gthr-vxworks.h functions
+LIB2FUNCS_EXTRA = $(srcdir)/config/vxlib.c $(srcdir)/config/vxlib-tls.c
+
+# Some runtime modules need these.  Can't set extra_headers in config.gcc
+# because the paths are always made absolute to the cpu config dir.
+EXTRA_HEADERS += $(srcdir)/gthr-vxworks.h gthr-default.h
+
+# This ensures that the correct target headers are used; some
+# VxWorks system headers have names that collide with GCC's
+# internal (host) headers, e.g. regs.h.
+LIBGCC2_INCLUDES = -nostdinc -I \
+  `case "/$$(MULTIDIR)" in \
+     */mrtp*) echo $(WIND_USR)/h ;; \
+     *) echo $(WIND_BASE)/target/h ;; \
+   esac`
+
+# Both the kernel and RTP headers provide limits.h.
+LIMITS_H_TEST = true
+
+EXTRA_MULTILIB_PARTS = 
+
+vxworks.o: $(srcdir)/config/vxworks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+	$(TARGET_H) output.h $(TM_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/tm-dwarf2.h b/gcc/config/tm-dwarf2.h
new file mode 100644
index 000000000..d08646ecc
--- /dev/null
+++ b/gcc/config/tm-dwarf2.h
@@ -0,0 +1,4 @@
+/* Enable Dwarf2 debugging and make it the default */
+#define  DWARF2_DEBUGGING_INFO 1
+#undef	 PREFERRED_DEBUGGING_TYPE
+#define  PREFERRED_DEBUGGING_TYPE  DWARF2_DEBUG
diff --git a/gcc/config/udivmod.c b/gcc/config/udivmod.c
new file mode 100644
index 000000000..dc70de64f
--- /dev/null
+++ b/gcc/config/udivmod.c
@@ -0,0 +1,37 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+long udivmodsi4 ();
+
+long
+__udivsi3 (long a, long b)
+{
+  return udivmodsi4 (a, b, 0);
+}
+
+long
+__umodsi3 (long a, long b)
+{
+  return udivmodsi4 (a, b, 1);
+}
+
diff --git a/gcc/config/udivmodsi4.c b/gcc/config/udivmodsi4.c
new file mode 100644
index 000000000..39c030fa4
--- /dev/null
+++ b/gcc/config/udivmodsi4.c
@@ -0,0 +1,47 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+unsigned long
+udivmodsi4(unsigned long num, unsigned long den, int modwanted)
+{
+  unsigned long bit = 1;
+  unsigned long res = 0;
+
+  while (den < num && bit && !(den & (1L<<31)))
+    {
+      den <<=1;
+      bit <<=1;
+    }
+  while (bit)
+    {
+      if (num >= den)
+	{
+	  num -= den;
+	  res |= bit;
+	}
+      bit >>=1;
+      den >>=1;
+    }
+  if (modwanted) return num;
+  return res;
+}
diff --git a/gcc/config/usegas.h b/gcc/config/usegas.h
new file mode 100644
index 000000000..80eca1094
--- /dev/null
+++ b/gcc/config/usegas.h
@@ -0,0 +1,20 @@
+/* Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Just set a single flag we can test for it inside other files.  */
+#define USE_GAS 1
diff --git a/gcc/config/v850/constraints.md b/gcc/config/v850/constraints.md
new file mode 100644
index 000000000..eecdab3d4
--- /dev/null
+++ b/gcc/config/v850/constraints.md
@@ -0,0 +1,108 @@
+;; Constraint definitions for V850.
+;; Copyright (C) 2011 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_register_constraint "e" "EVEN_REGS"
+  "@internal")
+
+;; Integer constraints.
+(define_constraint "I"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "J"
+  "A signed 5-bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -16 && ival <= 15")))
+
+(define_constraint "K"
+  "A signed 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "L"
+  "A valid constant for a movhi instruction."
+  (and (match_code "const_int")
+       (ior (match_test "(ival | 0x7fff0000) == 0x7fff0000")
+	    (match_test "(ival | 0x7fff0000) + 0x10000 == 0"))))
+
+(define_constraint "M"
+  "An unsigned 16-bit immediate."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+
+(define_constraint "N"
+  "An unsigned 5-bit immediate in shift instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "O"
+  "A signed 9-bit immediate for word multiply instructions."
+  (and (match_code "const_int")
+       (match_test "ival >= -255 && ival <= 255")))
+
+(define_constraint "P"
+  "@internal"
+  (and (match_code "const_int")
+       (match_test "0")))
+
+;; Floating-point constraints.
+(define_constraint "G"
+  "A zero of some form."
+  (and (match_code "const_double")
+       (ior (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT")
+	    (match_test "GET_MODE_CLASS (mode) == MODE_INT"))
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "H"
+  "@internal"
+  (and (match_code "const_double")
+       (match_test "0")))
+
+;;; Extra constraints.
+(define_constraint "Q"
+  "A memory address that does not contain a symbol address."
+  (and (match_code "mem")
+       (match_test "ep_memory_operand (op, mode, FALSE)")))
+
+(define_constraint "R"
+  "@internal"
+  (match_test "special_symbolref_operand (op, VOIDmode)"))
+
+(define_constraint "S"
+  "@internal"
+  (and (match_code "symbol_ref")
+       (match_test "!SYMBOL_REF_ZDA_P (op)")))
+
+(define_constraint "T"
+  "@internal"
+  (match_test "ep_memory_operand (op, mode, TRUE)"))
+
+(define_constraint "U"
+  "@internal"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "SYMBOL_REF_ZDA_P (op)"))
+       (and (match_code "const")
+	    (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+	    (match_test "GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF")
+	    (match_test "SYMBOL_REF_ZDA_P (XEXP (XEXP (op, 0), 0))"))))
+
+(define_constraint "W"
+  "@internal"
+  (match_test "disp23_operand (op, VOIDmode)"))
diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm
new file mode 100644
index 000000000..04e9b1e0a
--- /dev/null
+++ b/gcc/config/v850/lib1funcs.asm
@@ -0,0 +1,2330 @@
+/* libgcc routines for NEC V850.
+   Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifdef L_mulsi3
+	.text
+	.globl ___mulsi3
+	.type  ___mulsi3,@function
+___mulsi3:
+#ifdef __v850__	
+/*
+   #define SHIFT 12
+   #define MASK ((1 << SHIFT) - 1)
+    
+   #define STEP(i, j)                               \
+   ({                                               \
+       short a_part = (a >> (i)) & MASK;            \
+       short b_part = (b >> (j)) & MASK;            \
+       int res = (((int) a_part) * ((int) b_part)); \
+       res;                                         \
+   })
+  
+   int
+   __mulsi3 (unsigned a, unsigned b)
+   {
+      return STEP (0, 0) +
+          ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
+          ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
+           << (2 * SHIFT));
+   }
+*/
+        mov   r6, r14
+        movea lo(32767), r0, r10
+        and   r10, r14
+        mov   r7,  r15
+        and   r10, r15
+        shr   15,  r6
+        mov   r6,  r13
+        and   r10, r13
+        shr   15,  r7
+        mov   r7,  r12
+        and   r10, r12
+        shr   15,  r6
+        shr   15,  r7
+        mov   r14, r10
+        mulh  r15, r10
+        mov   r14, r11
+        mulh  r12, r11
+        mov   r13, r16
+        mulh  r15, r16
+        mulh  r14, r7
+        mulh  r15, r6
+        add   r16, r11
+        mulh  r13, r12
+        shl   15,  r11
+        add   r11, r10
+        add   r12, r7
+        add   r6,  r7
+        shl   30,  r7
+        add   r7,  r10
+        jmp   [r31]
+#endif /* __v850__ */
+#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__)
+        /* This routine is almost unneccesarry because gcc
+           generates the MUL instruction for the RTX mulsi3.
+           But if someone wants to link his application with
+           previsously compiled v850 objects then they will 
+	   need this function.  */
+ 
+        /* It isn't good to put the inst sequence as below;
+              mul r7, r6,
+              mov r6, r10, r0
+           In this case, there is a RAW hazard between them.
+           MUL inst takes 2 cycle in EX stage, then MOV inst
+           must wait 1cycle.  */
+        mov   r7, r10
+        mul   r6, r10, r0
+        jmp   [r31]
+#endif /* __v850e__ */
+	.size ___mulsi3,.-___mulsi3
+#endif /* L_mulsi3 */
+
+
+#ifdef L_udivsi3
+	.text
+	.global ___udivsi3
+	.type	___udivsi3,@function
+___udivsi3:
+#ifdef __v850__
+	mov 1,r12
+	mov 0,r10
+	cmp r6,r7
+	bnl .L12
+	movhi hi(-2147483648),r0,r13
+	cmp r0,r7
+	blt .L12
+.L4:
+	shl 1,r7
+	shl 1,r12
+	cmp r6,r7
+	bnl .L12
+	cmp r0,r12
+	be .L8
+	mov r7,r19
+	and r13,r19
+	be .L4
+	br .L12
+.L9:
+	cmp r7,r6
+	bl .L10
+	sub r7,r6
+	or r12,r10
+.L10:
+	shr 1,r12
+	shr 1,r7
+.L12:
+	cmp r0,r12
+	bne .L9
+.L8:
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	mov   r6, r10	
+	divu  r7, r10, r0
+	jmp   [r31]		
+
+#endif /* __v850e__ */
+
+	.size ___udivsi3,.-___udivsi3
+#endif
+
+#ifdef L_divsi3
+	.text
+	.globl ___divsi3
+	.type  ___divsi3,@function
+___divsi3:
+#ifdef __v850__
+	add -8,sp
+	st.w r31,4[sp]
+	st.w r22,0[sp]
+	mov 1,r22
+	tst r7,r7
+	bp .L3
+	subr r0,r7
+	subr r0,r22
+.L3:
+	tst r6,r6
+	bp .L4
+	subr r0,r6
+	subr r0,r22
+.L4:
+	jarl ___udivsi3,r31
+	cmp r0,r22
+	bp .L7
+	subr r0,r10
+.L7:
+	ld.w 0[sp],r22
+	ld.w 4[sp],r31
+	add 8,sp
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	mov   r6, r10
+	div   r7, r10, r0
+	jmp   [r31]
+
+#endif /* __v850e__ */
+
+	.size ___divsi3,.-___divsi3
+#endif
+
+#ifdef  L_umodsi3
+	.text
+	.globl ___umodsi3
+	.type  ___umodsi3,@function
+___umodsi3:
+#ifdef __v850__
+	add -12,sp
+	st.w r31,8[sp]
+	st.w r7,4[sp]
+	st.w r6,0[sp]
+	jarl ___udivsi3,r31
+	ld.w 4[sp],r7
+	mov r10,r6
+	jarl ___mulsi3,r31
+	ld.w 0[sp],r6
+	subr r6,r10
+	ld.w 8[sp],r31
+	add 12,sp
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	divu  r7, r6, r10
+	jmp   [r31]
+
+#endif /* __v850e__ */
+
+	.size ___umodsi3,.-___umodsi3
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+	.text
+	.globl ___modsi3
+	.type  ___modsi3,@function
+___modsi3:
+#ifdef __v850__	
+	add -12,sp
+	st.w r31,8[sp]
+	st.w r7,4[sp]
+	st.w r6,0[sp]
+	jarl ___divsi3,r31
+	ld.w 4[sp],r7
+	mov r10,r6
+	jarl ___mulsi3,r31
+	ld.w 0[sp],r6
+	subr r6,r10
+	ld.w 8[sp],r31
+	add 12,sp
+	jmp [r31]
+
+#else /* defined(__v850e__) */
+
+	/* See comments at end of __mulsi3.  */
+	div  r7, r6, r10
+	jmp [r31]
+
+#endif /* __v850e__ */
+
+	.size ___modsi3,.-___modsi3
+#endif /* L_modsi3 */
+
+#ifdef	L_save_2
+	.text
+	.align	2
+	.globl	__save_r2_r29
+	.type	__save_r2_r29,@function
+	/* Allocate space and save registers 2, 20 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r2_r29,r10.  */
+__save_r2_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-44,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	sst.w	r2,40[ep]
+	mov	r1,ep
+#else
+	addi	-44,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+	st.w	r2,40[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r2_r29,.-__save_r2_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r2_r29.  */
+	.align	2
+	.globl	__return_r2_r29
+	.type	__return_r2_r29,@function
+__return_r2_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	sld.w	40[ep],r2
+	addi	44,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r20
+	ld.w	40[sp],r2
+	addi	44,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r2_r29,.-__return_r2_r29
+#endif /* L_save_2 */
+
+#ifdef	L_save_20
+	.text
+	.align	2
+	.globl	__save_r20_r29
+	.type	__save_r20_r29,@function
+	/* Allocate space and save registers 20 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r20_r29,r10.  */
+__save_r20_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-40,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	mov	r1,ep
+#else
+	addi	-40,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r20_r29,.-__save_r20_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r20_r29.  */
+	.align	2
+	.globl	__return_r20_r29
+	.type	__return_r20_r29,@function
+__return_r20_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	addi	40,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r20
+	addi	40,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r20_r29,.-__return_r20_r29
+#endif /* L_save_20 */
+
+#ifdef	L_save_21
+	.text
+	.align	2
+	.globl	__save_r21_r29
+	.type	__save_r21_r29,@function
+	/* Allocate space and save registers 21 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r21_r29,r10.  */
+__save_r21_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-36,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	mov	r1,ep
+#else
+	addi	-36,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r21_r29,.-__save_r21_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r21_r29.  */
+	.align	2
+	.globl	__return_r21_r29
+	.type	__return_r21_r29,@function
+__return_r21_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	addi	36,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	addi	36,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r21_r29,.-__return_r21_r29
+#endif /* L_save_21 */
+
+#ifdef	L_save_22
+	.text
+	.align	2
+	.globl	__save_r22_r29
+	.type	__save_r22_r29,@function
+	/* Allocate space and save registers 22 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r22_r29,r10.  */
+__save_r22_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-32,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	mov	r1,ep
+#else
+	addi	-32,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r22_r29,.-__save_r22_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r22_r29.  */
+	.align	2
+	.globl	__return_r22_r29
+	.type	__return_r22_r29,@function
+__return_r22_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	addi	32,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	addi	32,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r22_r29,.-__return_r22_r29
+#endif /* L_save_22 */
+
+#ifdef	L_save_23
+	.text
+	.align	2
+	.globl	__save_r23_r29
+	.type	__save_r23_r29,@function
+	/* Allocate space and save registers 23 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r23_r29,r10.  */
+__save_r23_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-28,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	mov	r1,ep
+#else
+	addi	-28,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r23_r29,.-__save_r23_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r23_r29.  */
+	.align	2
+	.globl	__return_r23_r29
+	.type	__return_r23_r29,@function
+__return_r23_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	addi	28,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	addi	28,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r23_r29,.-__return_r23_r29
+#endif /* L_save_23 */
+
+#ifdef	L_save_24
+	.text
+	.align	2
+	.globl	__save_r24_r29
+	.type	__save_r24_r29,@function
+	/* Allocate space and save registers 24 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r24_r29,r10.  */
+__save_r24_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-24,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	mov	r1,ep
+#else
+	addi	-24,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r24_r29,.-__save_r24_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r24_r29.  */
+	.align	2
+	.globl	__return_r24_r29
+	.type	__return_r24_r29,@function
+__return_r24_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	addi	24,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	addi	24,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r24_r29,.-__return_r24_r29
+#endif /* L_save_24 */
+
+#ifdef	L_save_25
+	.text
+	.align	2
+	.globl	__save_r25_r29
+	.type	__save_r25_r29,@function
+	/* Allocate space and save registers 25 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r25_r29,r10.  */
+__save_r25_r29:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-20,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	mov	r1,ep
+#else
+	addi	-20,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r25_r29,.-__save_r25_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r25_r29.  */
+	.align	2
+	.globl	__return_r25_r29
+	.type	__return_r25_r29,@function
+__return_r25_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	addi	20,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[ep],r29
+	ld.w	4[ep],r28
+	ld.w	8[ep],r27
+	ld.w	12[ep],r26
+	ld.w	16[ep],r25
+	addi	20,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r25_r29,.-__return_r25_r29
+#endif /* L_save_25 */
+
+#ifdef	L_save_26
+	.text
+	.align	2
+	.globl	__save_r26_r29
+	.type	__save_r26_r29,@function
+	/* Allocate space and save registers 26 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r26_r29,r10.  */
+__save_r26_r29:
+#ifdef __EP__
+	mov	ep,r1
+	add	-16,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	mov	r1,ep
+#else
+	add	-16,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r26_r29,.-__save_r26_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r26_r29.  */
+	.align	2
+	.globl	__return_r26_r29
+	.type	__return_r26_r29,@function
+__return_r26_r29:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	addi	16,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	addi	16,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r26_r29,.-__return_r26_r29
+#endif /* L_save_26 */
+
+#ifdef	L_save_27
+	.text
+	.align	2
+	.globl	__save_r27_r29
+	.type	__save_r27_r29,@function
+	/* Allocate space and save registers 27 .. 29 on the stack.  */
+	/* Called via:	jalr __save_r27_r29,r10.  */
+__save_r27_r29:
+	add	-12,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	jmp	[r10]
+	.size	__save_r27_r29,.-__save_r27_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r27_r29.  */
+	.align	2
+	.globl	__return_r27_r29
+	.type	__return_r27_r29,@function
+__return_r27_r29:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	add	12,sp
+	jmp	[r31]
+	.size	__return_r27_r29,.-__return_r27_r29
+#endif /* L_save_27 */
+
+#ifdef	L_save_28
+	.text
+	.align	2
+	.globl	__save_r28_r29
+	.type	__save_r28_r29,@function
+	/* Allocate space and save registers 28,29 on the stack.  */
+	/* Called via:	jalr __save_r28_r29,r10.  */
+__save_r28_r29:
+	add	-8,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	jmp	[r10]
+	.size	__save_r28_r29,.-__save_r28_r29
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r28_r29.  */
+	.align	2
+	.globl	__return_r28_r29
+	.type	__return_r28_r29,@function
+__return_r28_r29:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	add	8,sp
+	jmp	[r31]
+	.size	__return_r28_r29,.-__return_r28_r29
+#endif /* L_save_28 */
+
+#ifdef	L_save_29
+	.text
+	.align	2
+	.globl	__save_r29
+	.type	__save_r29,@function
+	/* Allocate space and save register 29 on the stack.  */
+	/* Called via:	jalr __save_r29,r10.  */
+__save_r29:
+	add	-4,sp
+	st.w	r29,0[sp]
+	jmp	[r10]
+	.size	__save_r29,.-__save_r29
+
+	/* Restore saved register 29, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r29.  */
+	.align	2
+	.globl	__return_r29
+	.type	__return_r29,@function
+__return_r29:
+	ld.w	0[sp],r29
+	add	4,sp
+	jmp	[r31]
+	.size	__return_r29,.-__return_r29
+#endif /* L_save_28 */
+
+#ifdef	L_save_2c
+	.text
+	.align	2
+	.globl	__save_r2_r31
+	.type	__save_r2_r31,@function
+	/* Allocate space and save registers 20 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r2_r31,r10.  */
+__save_r2_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-48,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	sst.w	r2,40[ep]
+	sst.w	r31,44[ep]
+	mov	r1,ep
+#else
+	addi	-48,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+	st.w	r2,40[sp]
+	st.w	r31,44[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r2_r31,.-__save_r2_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r20_r31.  */
+	.align	2
+	.globl	__return_r2_r31
+	.type	__return_r2_r31,@function
+__return_r2_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	sld.w	40[ep],r2
+	sld.w	44[ep],r31
+	addi	48,sp,sp
+	mov	r1,ep
+#else
+	ld.w	44[sp],r29
+	ld.w	40[sp],r28
+	ld.w	36[sp],r27
+	ld.w	32[sp],r26
+	ld.w	28[sp],r25
+	ld.w	24[sp],r24
+	ld.w	20[sp],r23
+	ld.w	16[sp],r22
+	ld.w	12[sp],r21
+	ld.w	8[sp],r20
+	ld.w	4[sp],r2
+	ld.w	0[sp],r31
+	addi	48,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r2_r31,.-__return_r2_r31
+#endif /* L_save_2c */
+
+#ifdef	L_save_20c
+	.text
+	.align	2
+	.globl	__save_r20_r31
+	.type	__save_r20_r31,@function
+	/* Allocate space and save registers 20 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r20_r31,r10.  */
+__save_r20_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-44,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r20,36[ep]
+	sst.w	r31,40[ep]
+	mov	r1,ep
+#else
+	addi	-44,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r20,36[sp]
+	st.w	r31,40[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r20_r31,.-__save_r20_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r20_r31.  */
+	.align	2
+	.globl	__return_r20_r31
+	.type	__return_r20_r31,@function
+__return_r20_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r20
+	sld.w	40[ep],r31
+	addi	44,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r20
+	ld.w	40[sp],r31
+	addi	44,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r20_r31,.-__return_r20_r31
+#endif /* L_save_20c */
+
+#ifdef	L_save_21c
+	.text
+	.align	2
+	.globl	__save_r21_r31
+	.type	__save_r21_r31,@function
+	/* Allocate space and save registers 21 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r21_r31,r10.  */
+__save_r21_r31:
+#ifdef __EP__	
+	mov	ep,r1
+	addi	-40,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r21,32[ep]
+	sst.w	r31,36[ep]
+	mov	r1,ep
+	jmp	[r10]
+#else	
+	addi	-40,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r21,32[sp]
+	st.w	r31,36[sp]
+	jmp	[r10]
+#endif	
+	.size	__save_r21_r31,.-__save_r21_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r21_r31.  */
+	.align	2
+	.globl	__return_r21_r31
+	.type	__return_r21_r31,@function
+__return_r21_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r21
+	sld.w	36[ep],r31
+	addi	40,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r21
+	ld.w	36[sp],r31
+	addi	40,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r21_r31,.-__return_r21_r31
+#endif /* L_save_21c */
+
+#ifdef	L_save_22c
+	.text
+	.align	2
+	.globl	__save_r22_r31
+	.type	__save_r22_r31,@function
+	/* Allocate space and save registers 22 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r22_r31,r10.  */
+__save_r22_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-36,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r22,28[ep]
+	sst.w	r31,32[ep]
+	mov	r1,ep
+#else
+	addi	-36,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r22,28[sp]
+	st.w	r31,32[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r22_r31,.-__save_r22_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r22_r31.  */
+	.align	2
+	.globl	__return_r22_r31
+	.type	__return_r22_r31,@function
+__return_r22_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r22
+	sld.w	32[ep],r31
+	addi	36,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r22
+	ld.w	32[sp],r31
+	addi	36,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r22_r31,.-__return_r22_r31
+#endif /* L_save_22c */
+
+#ifdef	L_save_23c
+	.text
+	.align	2
+	.globl	__save_r23_r31
+	.type	__save_r23_r31,@function
+	/* Allocate space and save registers 23 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r23_r31,r10.  */
+__save_r23_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-32,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r23,24[ep]
+	sst.w	r31,28[ep]
+	mov	r1,ep
+#else
+	addi	-32,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r23,24[sp]
+	st.w	r31,28[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r23_r31,.-__save_r23_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r23_r31.  */
+	.align	2
+	.globl	__return_r23_r31
+	.type	__return_r23_r31,@function
+__return_r23_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r23
+	sld.w	28[ep],r31
+	addi	32,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r23
+	ld.w	28[sp],r31
+	addi	32,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r23_r31,.-__return_r23_r31
+#endif /* L_save_23c */
+
+#ifdef	L_save_24c
+	.text
+	.align	2
+	.globl	__save_r24_r31
+	.type	__save_r24_r31,@function
+	/* Allocate space and save registers 24 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r24_r31,r10.  */
+__save_r24_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-28,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r24,20[ep]
+	sst.w	r31,24[ep]
+	mov	r1,ep
+#else
+	addi	-28,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r24,20[sp]
+	st.w	r31,24[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r24_r31,.-__save_r24_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r24_r31.  */
+	.align	2
+	.globl	__return_r24_r31
+	.type	__return_r24_r31,@function
+__return_r24_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r24
+	sld.w	24[ep],r31
+	addi	28,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r24
+	ld.w	24[sp],r31
+	addi	28,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r24_r31,.-__return_r24_r31
+#endif /* L_save_24c */
+
+#ifdef	L_save_25c
+	.text
+	.align	2
+	.globl	__save_r25_r31
+	.type	__save_r25_r31,@function
+	/* Allocate space and save registers 25 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r25_r31,r10.  */
+__save_r25_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-24,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r25,16[ep]
+	sst.w	r31,20[ep]
+	mov	r1,ep
+#else
+	addi	-24,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r25,16[sp]
+	st.w	r31,20[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r25_r31,.-__save_r25_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r25_r31.  */
+	.align	2
+	.globl	__return_r25_r31
+	.type	__return_r25_r31,@function
+__return_r25_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r25
+	sld.w	20[ep],r31
+	addi	24,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r25
+	ld.w	20[sp],r31
+	addi	24,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r25_r31,.-__return_r25_r31
+#endif /* L_save_25c */
+
+#ifdef	L_save_26c
+	.text
+	.align	2
+	.globl	__save_r26_r31
+	.type	__save_r26_r31,@function
+	/* Allocate space and save registers 26 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r26_r31,r10.  */
+__save_r26_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-20,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r26,12[ep]
+	sst.w	r31,16[ep]
+	mov	r1,ep
+#else
+	addi	-20,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r26,12[sp]
+	st.w	r31,16[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r26_r31,.-__save_r26_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r26_r31.  */
+	.align	2
+	.globl	__return_r26_r31
+	.type	__return_r26_r31,@function
+__return_r26_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r26
+	sld.w	16[ep],r31
+	addi	20,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r26
+	ld.w	16[sp],r31
+	addi	20,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r26_r31,.-__return_r26_r31
+#endif /* L_save_26c */
+
+#ifdef	L_save_27c
+	.text
+	.align	2
+	.globl	__save_r27_r31
+	.type	__save_r27_r31,@function
+	/* Allocate space and save registers 27 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r27_r31,r10.  */
+__save_r27_r31:
+#ifdef __EP__
+	mov	ep,r1
+	addi	-16,sp,sp
+	mov	sp,ep
+	sst.w	r29,0[ep]
+	sst.w	r28,4[ep]
+	sst.w	r27,8[ep]
+	sst.w	r31,12[ep]
+	mov	r1,ep
+#else
+	addi	-16,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r27,8[sp]
+	st.w	r31,12[sp]
+#endif
+	jmp	[r10]
+	.size	__save_r27_r31,.-__save_r27_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r27_r31.  */
+	.align	2
+	.globl	__return_r27_r31
+	.type	__return_r27_r31,@function
+__return_r27_r31:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	0[ep],r29
+	sld.w	4[ep],r28
+	sld.w	8[ep],r27
+	sld.w	12[ep],r31
+	addi	16,sp,sp
+	mov	r1,ep
+#else
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r27
+	ld.w	12[sp],r31
+	addi	16,sp,sp
+#endif
+	jmp	[r31]
+	.size	__return_r27_r31,.-__return_r27_r31
+#endif /* L_save_27c */
+
+#ifdef	L_save_28c
+	.text
+	.align	2
+	.globl	__save_r28_r31
+	.type	__save_r28_r31,@function
+	/* Allocate space and save registers 28 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r28_r31,r10.  */
+__save_r28_r31:
+	addi	-12,sp,sp
+	st.w	r29,0[sp]
+	st.w	r28,4[sp]
+	st.w	r31,8[sp]
+	jmp	[r10]
+	.size	__save_r28_r31,.-__save_r28_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r28_r31.  */
+	.align	2
+	.globl	__return_r28_r31
+	.type	__return_r28_r31,@function
+__return_r28_r31:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r28
+	ld.w	8[sp],r31
+	addi	12,sp,sp
+	jmp	[r31]
+	.size	__return_r28_r31,.-__return_r28_r31
+#endif /* L_save_28c */
+
+#ifdef	L_save_29c
+	.text
+	.align	2
+	.globl	__save_r29_r31
+	.type	__save_r29_r31,@function
+	/* Allocate space and save registers 29 & 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r29_r31,r10.  */
+__save_r29_r31:
+	addi	-8,sp,sp
+	st.w	r29,0[sp]
+	st.w	r31,4[sp]
+	jmp	[r10]
+	.size	__save_r29_r31,.-__save_r29_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r29_r31.  */
+	.align	2
+	.globl	__return_r29_r31
+	.type	__return_r29_r31,@function
+__return_r29_r31:
+	ld.w	0[sp],r29
+	ld.w	4[sp],r31
+	addi	8,sp,sp
+	jmp	[r31]
+	.size	__return_r29_r31,.-__return_r29_r31
+#endif /* L_save_29c */
+
+#ifdef	L_save_31c
+	.text
+	.align	2
+	.globl	__save_r31
+	.type	__save_r31,@function
+	/* Allocate space and save register 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	jalr __save_r31,r10.  */
+__save_r31:
+	addi	-4,sp,sp
+	st.w	r31,0[sp]
+	jmp	[r10]
+	.size	__save_r31,.-__save_r31
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	jr __return_r31.  */
+	.align	2
+	.globl	__return_r31
+	.type	__return_r31,@function
+__return_r31:
+	ld.w	0[sp],r31
+	addi	4,sp,sp
+	jmp	[r31]
+        .size   __return_r31,.-__return_r31
+#endif /* L_save_31c */
+
+#ifdef	L_save_interrupt
+	.text
+	.align	2
+	.globl	__save_interrupt
+	.type	__save_interrupt,@function
+	/* Save registers r1, r4 on stack and load up with expected values.  */
+	/* Note, 20 bytes of stack have already been allocated.  */
+	/* Called via:	jalr __save_interrupt,r10.  */
+__save_interrupt:
+       /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */
+	st.w	ep,0[sp]
+	st.w	gp,4[sp]
+	st.w	r1,8[sp]
+	movhi	hi(__ep),r0,ep
+	movea	lo(__ep),ep,ep
+	movhi	hi(__gp),r0,gp
+	movea	lo(__gp),gp,gp
+	jmp	[r10]
+	.size	__save_interrupt,.-__save_interrupt
+
+	/* Restore saved registers, deallocate stack and return from the interrupt.  */
+	/* Called via:	jr __return_interrupt.  */
+	.align	2
+	.globl	__return_interrupt
+	.type	__return_interrupt,@function
+__return_interrupt:
+	ld.w	0[sp],ep
+	ld.w	4[sp],gp
+	ld.w	8[sp],r1
+	ld.w	12[sp],r10
+	ld.w    16[sp],r11
+	addi    20,sp,sp
+	reti
+	.size	__return_interrupt,.-__return_interrupt
+#endif /* L_save_interrupt */
+
+#ifdef L_save_all_interrupt
+	.text
+	.align	2
+	.globl	__save_all_interrupt
+	.type	__save_all_interrupt,@function
+	/* Save all registers except for those saved in __save_interrupt.  */
+	/* Allocate enough stack for all of the registers & 16 bytes of space.  */
+	/* Called via:	jalr __save_all_interrupt,r10.  */
+__save_all_interrupt:
+	addi	-104,sp,sp
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sst.w	r31,100[ep]
+	sst.w	r2,96[ep]
+	sst.w	gp,92[ep]
+	sst.w	r6,88[ep]
+	sst.w	r7,84[ep]
+	sst.w	r8,80[ep]
+	sst.w	r9,76[ep]
+	sst.w	r11,72[ep]
+	sst.w	r12,68[ep]
+	sst.w	r13,64[ep]
+	sst.w	r14,60[ep]
+	sst.w	r15,56[ep]
+	sst.w	r16,52[ep]
+	sst.w	r17,48[ep]
+	sst.w	r18,44[ep]
+	sst.w	r19,40[ep]
+	sst.w	r20,36[ep]
+	sst.w	r21,32[ep]
+	sst.w	r22,28[ep]
+	sst.w	r23,24[ep]
+	sst.w	r24,20[ep]
+	sst.w	r25,16[ep]
+	sst.w	r26,12[ep]
+	sst.w	r27,8[ep]
+	sst.w	r28,4[ep]
+	sst.w	r29,0[ep]
+	mov	r1,ep
+#else
+	st.w	r31,100[sp]
+	st.w	r2,96[sp]
+	st.w	gp,92[sp]
+	st.w	r6,88[sp]
+	st.w	r7,84[sp]
+	st.w	r8,80[sp]
+	st.w	r9,76[sp]
+	st.w	r11,72[sp]
+	st.w	r12,68[sp]
+	st.w	r13,64[sp]
+	st.w	r14,60[sp]
+	st.w	r15,56[sp]
+	st.w	r16,52[sp]
+	st.w	r17,48[sp]
+	st.w	r18,44[sp]
+	st.w	r19,40[sp]
+	st.w	r20,36[sp]
+	st.w	r21,32[sp]
+	st.w	r22,28[sp]
+	st.w	r23,24[sp]
+	st.w	r24,20[sp]
+	st.w	r25,16[sp]
+	st.w	r26,12[sp]
+	st.w	r27,8[sp]
+	st.w	r28,4[sp]
+	st.w	r29,0[sp]
+#endif
+	jmp	[r10]
+	.size	__save_all_interrupt,.-__save_all_interrupt
+
+	.globl	__restore_all_interrupt
+	.type	__restore_all_interrupt,@function
+	/* Restore all registers saved in __save_all_interrupt and
+	   deallocate the stack space.  */
+	/* Called via:	jalr __restore_all_interrupt,r10.  */
+__restore_all_interrupt:
+#ifdef __EP__
+	mov	ep,r1
+	mov	sp,ep
+	sld.w	100[ep],r31
+	sld.w	96[ep],r2
+	sld.w	92[ep],gp
+	sld.w	88[ep],r6
+	sld.w	84[ep],r7
+	sld.w	80[ep],r8
+	sld.w	76[ep],r9
+	sld.w	72[ep],r11
+	sld.w	68[ep],r12
+	sld.w	64[ep],r13
+	sld.w	60[ep],r14
+	sld.w	56[ep],r15
+	sld.w	52[ep],r16
+	sld.w	48[ep],r17
+	sld.w	44[ep],r18
+	sld.w	40[ep],r19
+	sld.w	36[ep],r20
+	sld.w	32[ep],r21
+	sld.w	28[ep],r22
+	sld.w	24[ep],r23
+	sld.w	20[ep],r24
+	sld.w	16[ep],r25
+	sld.w	12[ep],r26
+	sld.w	8[ep],r27
+	sld.w	4[ep],r28
+	sld.w	0[ep],r29
+	mov	r1,ep
+#else
+	ld.w	100[sp],r31
+	ld.w	96[sp],r2
+	ld.w	92[sp],gp
+	ld.w	88[sp],r6
+	ld.w	84[sp],r7
+	ld.w	80[sp],r8
+	ld.w	76[sp],r9
+	ld.w	72[sp],r11
+	ld.w	68[sp],r12
+	ld.w	64[sp],r13
+	ld.w	60[sp],r14
+	ld.w	56[sp],r15
+	ld.w	52[sp],r16
+	ld.w	48[sp],r17
+	ld.w	44[sp],r18
+	ld.w	40[sp],r19
+	ld.w	36[sp],r20
+	ld.w	32[sp],r21
+	ld.w	28[sp],r22
+	ld.w	24[sp],r23
+	ld.w	20[sp],r24
+	ld.w	16[sp],r25
+	ld.w	12[sp],r26
+	ld.w	8[sp],r27
+	ld.w	4[sp],r28
+	ld.w	0[sp],r29
+#endif
+	addi	104,sp,sp	
+	jmp	[r10]
+	.size	__restore_all_interrupt,.-__restore_all_interrupt
+#endif /* L_save_all_interrupt */
+	
+#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__)
+#ifdef	L_callt_save_r2_r29
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Allocate space and save registers 2, 20 .. 29 on the stack.  */
+	/* Called via:	callt ctoff(__callt_save_r2_r29).  */
+	.align	2
+.L_save_r2_r29:
+	add	-4, sp
+	st.w	r2, 0[sp]
+	prepare {r20 - r29}, 0
+	ctret
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	callt ctoff(__callt_return_r2_r29).  */
+	.align	2
+.L_return_r2_r29:
+	dispose 0, {r20-r29}
+	ld.w    0[sp], r2
+	add	4, sp
+	jmp     [r31]
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_r2_r29
+	.type	__callt_save_r2_r29,@function
+__callt_save_r2_r29:	.short ctoff(.L_save_r2_r29)
+	
+	.global	__callt_return_r2_r29
+	.type	__callt_return_r2_r29,@function
+__callt_return_r2_r29:	.short ctoff(.L_return_r2_r29)
+	
+#endif /* L_callt_save_r2_r29.  */
+
+#ifdef	L_callt_save_r2_r31
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Allocate space and save registers 2 and 20 .. 29, 31 on the stack.  */
+	/* Also allocate space for the argument save area.  */
+	/* Called via:	callt ctoff(__callt_save_r2_r31).  */
+	.align	2
+.L_save_r2_r31:
+	add	-4, sp
+	st.w	r2, 0[sp]
+	prepare {r20 - r29, r31}, 0
+	ctret
+
+	/* Restore saved registers, deallocate stack and return to the user.  */
+	/* Called via:	callt ctoff(__callt_return_r2_r31).  */
+	.align	2
+.L_return_r2_r31:
+	dispose 0, {r20 - r29, r31}
+	ld.w    0[sp], r2
+	addi	4, sp, sp
+	jmp     [r31]
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_r2_r31
+	.type	__callt_save_r2_r31,@function
+__callt_save_r2_r31:	.short ctoff(.L_save_r2_r31)
+	
+	.global	__callt_return_r2_r31
+	.type	__callt_return_r2_r31,@function
+__callt_return_r2_r31:	.short ctoff(.L_return_r2_r31)
+	
+#endif /* L_callt_save_r2_r31 */
+
+#ifdef	L_callt_save_interrupt
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Save registers r1, ep, gp, r10 on stack and load up with expected values.  */
+	/* Called via:	callt ctoff(__callt_save_interrupt).  */
+	.align	2
+.L_save_interrupt:
+        /* SP has already been moved before callt ctoff(_save_interrupt).  */
+        /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt).  */
+        /* addi -28, sp, sp  */
+        /* st.w r1,    24[sp] */
+        /* st.w r10,   12[sp] */
+        /* st.w r11,   16[sp] */
+        /* stsr ctpc,  r10    */
+        /* st.w r10,   20[sp] */
+        /* stsr ctpsw, r10    */
+        /* st.w r10,   24[sp] */
+        st.w    ep,  0[sp]
+        st.w    gp,  4[sp]
+        st.w    r1,  8[sp]
+	mov	hilo(__ep),ep
+	mov	hilo(__gp),gp
+	ctret
+
+        .call_table_text
+	/* Restore saved registers, deallocate stack and return from the interrupt.  */
+        /* Called via:  callt ctoff(__callt_restore_interrupt).  */
+	.align	2
+	.globl	__return_interrupt
+	.type	__return_interrupt,@function
+.L_return_interrupt:
+        ld.w    24[sp], r1
+        ldsr    r1,     ctpsw
+        ld.w    20[sp], r1
+        ldsr    r1,     ctpc
+        ld.w    16[sp], r11
+        ld.w    12[sp], r10
+        ld.w     8[sp], r1
+        ld.w     4[sp], gp
+        ld.w     0[sp], ep
+        addi    28, sp, sp
+        reti
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+        .global __callt_save_interrupt
+        .type   __callt_save_interrupt,@function
+__callt_save_interrupt:         .short ctoff(.L_save_interrupt)
+
+        .global __callt_return_interrupt
+        .type   __callt_return_interrupt,@function
+__callt_return_interrupt:       .short ctoff(.L_return_interrupt)
+	
+#endif /* L_callt_save_interrupt */
+
+#ifdef L_callt_save_all_interrupt
+	/* Put these functions into the call table area.  */
+	.call_table_text
+	
+	/* Save all registers except for those saved in __save_interrupt.  */
+	/* Allocate enough stack for all of the registers & 16 bytes of space.  */
+	/* Called via:	callt ctoff(__callt_save_all_interrupt).  */
+	.align	2
+.L_save_all_interrupt:
+	addi	-60, sp, sp
+#ifdef __EP__
+	mov	ep,  r1
+	mov	sp,  ep
+	sst.w	r2,  56[ep]
+	sst.w	r5,  52[ep]
+	sst.w	r6,  48[ep]
+	sst.w	r7,  44[ep]
+	sst.w	r8,  40[ep]
+	sst.w	r9,  36[ep]
+	sst.w	r11, 32[ep]
+	sst.w	r12, 28[ep]
+	sst.w	r13, 24[ep]
+	sst.w	r14, 20[ep]
+	sst.w	r15, 16[ep]
+	sst.w	r16, 12[ep]
+	sst.w	r17, 8[ep]
+	sst.w	r18, 4[ep]
+	sst.w	r19, 0[ep]
+	mov	r1,  ep
+#else
+	st.w	r2,  56[sp]
+	st.w	r5,  52[sp]
+	st.w	r6,  48[sp]
+	st.w	r7,  44[sp]
+	st.w	r8,  40[sp]
+	st.w	r9,  36[sp]
+	st.w	r11, 32[sp]
+	st.w	r12, 28[sp]
+	st.w	r13, 24[sp]
+	st.w	r14, 20[sp]
+	st.w	r15, 16[sp]
+	st.w	r16, 12[sp]
+	st.w	r17, 8[sp]
+	st.w	r18, 4[sp]
+	st.w	r19, 0[sp]
+#endif
+	prepare {r20 - r29, r31}, 0
+	ctret	
+
+	/* Restore all registers saved in __save_all_interrupt
+	   deallocate the stack space.  */
+	/* Called via:	callt ctoff(__callt_restore_all_interrupt).  */
+	.align 2
+.L_restore_all_interrupt:
+	dispose 0, {r20 - r29, r31}
+#ifdef __EP__
+	mov	ep, r1
+	mov	sp, ep
+	sld.w	0 [ep], r19
+	sld.w	4 [ep], r18
+	sld.w	8 [ep], r17
+	sld.w	12[ep], r16
+	sld.w	16[ep], r15
+	sld.w	20[ep], r14
+	sld.w	24[ep], r13
+	sld.w	28[ep], r12
+	sld.w	32[ep], r11
+	sld.w	36[ep], r9
+	sld.w	40[ep], r8
+	sld.w	44[ep], r7
+	sld.w	48[ep], r6
+	sld.w	52[ep], r5
+	sld.w	56[ep], r2
+	mov	r1, ep
+#else
+	ld.w	0 [sp], r19
+	ld.w	4 [sp], r18
+	ld.w	8 [sp], r17
+	ld.w	12[sp], r16
+	ld.w	16[sp], r15
+	ld.w	20[sp], r14
+	ld.w	24[sp], r13
+	ld.w	28[sp], r12
+	ld.w	32[sp], r11
+	ld.w	36[sp], r9
+	ld.w	40[sp], r8
+	ld.w	44[sp], r7
+	ld.w	48[sp], r6
+	ld.w	52[sp], r5
+	ld.w	56[sp], r2
+#endif
+	addi	60, sp, sp
+	ctret
+
+	/* Place the offsets of the start of these routines into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_all_interrupt
+	.type	__callt_save_all_interrupt,@function
+__callt_save_all_interrupt:	.short ctoff(.L_save_all_interrupt)
+	
+	.global	__callt_restore_all_interrupt
+	.type	__callt_restore_all_interrupt,@function
+__callt_restore_all_interrupt:	.short ctoff(.L_restore_all_interrupt)
+	
+#endif /* L_callt_save_all_interrupt */
+
+
+#define MAKE_CALLT_FUNCS( START )						\
+	.call_table_text							;\
+	.align	2								;\
+	/* Allocate space and save registers START .. r29 on the stack.  */	;\
+	/* Called via:	callt ctoff(__callt_save_START_r29).  */		;\
+.L_save_##START##_r29:								;\
+	prepare { START - r29 }, 0						;\
+	ctret									;\
+										;\
+	/* Restore saved registers, deallocate stack and return.  */		;\
+	/* Called via:	callt ctoff(__return_START_r29).  */			;\
+	.align	2								;\
+.L_return_##START##_r29:							;\
+	dispose 0, { START - r29 }, r31						;\
+										;\
+	/* Place the offsets of the start of these funcs into the call table.  */;\
+	.call_table_data							;\
+										;\
+	.global	__callt_save_##START##_r29					;\
+	.type	__callt_save_##START##_r29,@function				;\
+__callt_save_##START##_r29:	.short ctoff(.L_save_##START##_r29 )		;\
+										;\
+	.global	__callt_return_##START##_r29					;\
+	.type	__callt_return_##START##_r29,@function				;\
+__callt_return_##START##_r29:	.short ctoff(.L_return_##START##_r29 )	
+
+
+#define MAKE_CALLT_CFUNCS( START )						\
+	.call_table_text							;\
+	.align	2								;\
+	/* Allocate space and save registers START .. r31 on the stack.  */	;\
+	/* Called via:	callt ctoff(__callt_save_START_r31c).  */		;\
+.L_save_##START##_r31c:								;\
+	prepare { START - r29, r31}, 0						;\
+	ctret									;\
+										;\
+	/* Restore saved registers, deallocate stack and return.  */		;\
+	/* Called via:	callt ctoff(__return_START_r31c).  */			;\
+	.align	2								;\
+.L_return_##START##_r31c:							;\
+	dispose 0, { START - r29, r31}, r31					;\
+										;\
+	/* Place the offsets of the start of these funcs into the call table.  */;\
+	.call_table_data							;\
+										;\
+	.global	__callt_save_##START##_r31c					;\
+	.type	__callt_save_##START##_r31c,@function				;\
+__callt_save_##START##_r31c:    .short ctoff(.L_save_##START##_r31c )		;\
+										;\
+	.global	__callt_return_##START##_r31c					;\
+	.type	__callt_return_##START##_r31c,@function				;\
+__callt_return_##START##_r31c:  .short ctoff(.L_return_##START##_r31c )	
+
+	
+#ifdef	L_callt_save_20
+	MAKE_CALLT_FUNCS (r20)
+#endif
+#ifdef	L_callt_save_21
+	MAKE_CALLT_FUNCS (r21)
+#endif
+#ifdef	L_callt_save_22
+	MAKE_CALLT_FUNCS (r22)
+#endif
+#ifdef	L_callt_save_23
+	MAKE_CALLT_FUNCS (r23)
+#endif
+#ifdef	L_callt_save_24
+	MAKE_CALLT_FUNCS (r24)
+#endif
+#ifdef	L_callt_save_25
+	MAKE_CALLT_FUNCS (r25)
+#endif
+#ifdef	L_callt_save_26
+	MAKE_CALLT_FUNCS (r26)
+#endif
+#ifdef	L_callt_save_27
+	MAKE_CALLT_FUNCS (r27)
+#endif
+#ifdef	L_callt_save_28
+	MAKE_CALLT_FUNCS (r28)
+#endif
+#ifdef	L_callt_save_29
+	MAKE_CALLT_FUNCS (r29)
+#endif
+
+#ifdef	L_callt_save_20c
+	MAKE_CALLT_CFUNCS (r20)
+#endif
+#ifdef	L_callt_save_21c
+	MAKE_CALLT_CFUNCS (r21)
+#endif
+#ifdef	L_callt_save_22c
+	MAKE_CALLT_CFUNCS (r22)
+#endif
+#ifdef	L_callt_save_23c
+	MAKE_CALLT_CFUNCS (r23)
+#endif
+#ifdef	L_callt_save_24c
+	MAKE_CALLT_CFUNCS (r24)
+#endif
+#ifdef	L_callt_save_25c
+	MAKE_CALLT_CFUNCS (r25)
+#endif
+#ifdef	L_callt_save_26c
+	MAKE_CALLT_CFUNCS (r26)
+#endif
+#ifdef	L_callt_save_27c
+	MAKE_CALLT_CFUNCS (r27)
+#endif
+#ifdef	L_callt_save_28c
+	MAKE_CALLT_CFUNCS (r28)
+#endif
+#ifdef	L_callt_save_29c
+	MAKE_CALLT_CFUNCS (r29)
+#endif
+
+	
+#ifdef	L_callt_save_31c
+	.call_table_text
+	.align	2
+	/* Allocate space and save register r31 on the stack.  */
+	/* Called via:	callt ctoff(__callt_save_r31c).  */
+.L_callt_save_r31c:
+	prepare {r31}, 0
+	ctret
+
+	/* Restore saved registers, deallocate stack and return.  */
+	/* Called via:	callt ctoff(__return_r31c).  */
+	.align	2
+.L_callt_return_r31c:
+	dispose 0, {r31}, r31
+	
+	/* Place the offsets of the start of these funcs into the call table.  */
+	.call_table_data
+
+	.global	__callt_save_r31c
+	.type	__callt_save_r31c,@function
+__callt_save_r31c:	.short ctoff(.L_callt_save_r31c)
+
+	.global	__callt_return_r31c
+	.type	__callt_return_r31c,@function
+__callt_return_r31c:	.short ctoff(.L_callt_return_r31c)		
+#endif
+
+#endif /* __v850e__ */
+
+/*  libgcc2 routines for NEC V850.  */
+/*  Double Integer Arithmetical Operation.  */
+
+#ifdef L_negdi2
+	.text
+	.global ___negdi2
+	.type   ___negdi2, @function
+___negdi2:
+	not	r6, r10
+	add	1,  r10
+	setf	l,  r6
+	not	r7, r11
+	add	r6, r11
+	jmp	[lp]
+
+	.size ___negdi2,.-___negdi2
+#endif
+
+#ifdef L_cmpdi2
+	.text
+	.global ___cmpdi2
+	.type	___cmpdi2,@function
+___cmpdi2:
+	# Signed comparison bitween each high word.
+	cmp	r9, r7
+	be	.L_cmpdi_cmp_low
+	setf	ge, r10
+	setf	gt, r6
+	add	r6, r10
+	jmp	[lp]
+.L_cmpdi_cmp_low:
+	# Unsigned comparigon bitween each low word.
+	cmp     r8, r6
+	setf	nl, r10
+	setf	h,  r6
+	add	r6, r10
+	jmp	[lp]	
+	.size ___cmpdi2, . - ___cmpdi2	
+#endif
+
+#ifdef L_ucmpdi2
+	.text
+	.global ___ucmpdi2
+	.type	___ucmpdi2,@function
+___ucmpdi2:
+	cmp	r9, r7  # Check if each high word are same.
+	bne	.L_ucmpdi_check_psw
+	cmp     r8, r6  # Compare the word.
+.L_ucmpdi_check_psw:
+	setf	nl, r10 # 
+	setf	h,  r6  # 
+	add	r6, r10 # Add the result of comparison NL and comparison H.
+	jmp	[lp]	
+	.size ___ucmpdi2, . - ___ucmpdi2
+#endif
+
+#ifdef L_muldi3
+	.text
+	.global ___muldi3
+	.type	___muldi3,@function
+___muldi3:
+#ifdef __v850__
+        jarl  __save_r26_r31, r10
+        addi  16,  sp, sp
+        mov   r6,  r28
+        shr   15,  r28
+        movea lo(32767), r0, r14
+        and   r14, r28
+        mov   r8,  r10
+        shr   15,  r10
+        and   r14, r10
+        mov   r6,  r19
+        shr   30,  r19
+        mov   r7,  r12
+        shl   2,   r12
+        or    r12, r19
+        and   r14, r19
+        mov   r8,  r13
+        shr   30,  r13
+        mov   r9,  r12
+        shl   2,   r12
+        or    r12, r13
+        and   r14, r13
+        mov   r7,  r11
+        shr   13,  r11
+        and   r14, r11
+        mov   r9,  r31
+        shr   13,  r31
+        and   r14, r31
+        mov   r7,  r29
+        shr   28,  r29
+        and   r14, r29
+        mov   r9,  r12
+        shr   28,  r12
+        and   r14, r12
+        and   r14, r6
+        and   r14, r8
+        mov   r6,  r14
+        mulh  r8,  r14
+        mov   r6,  r16
+        mulh  r10, r16
+        mov   r6,  r18
+        mulh  r13, r18
+        mov   r6,  r15
+        mulh  r31, r15
+        mulh  r12, r6
+        mov   r28,  r17
+        mulh  r10, r17
+        add   -16, sp
+        mov   r28,  r12
+        mulh  r8,  r12
+        add   r17, r18
+        mov   r28,  r17
+        mulh  r31, r17
+        add   r12, r16
+        mov   r28,  r12
+        mulh  r13, r12
+        add   r17, r6
+        mov   r19, r17
+        add   r12, r15
+        mov   r19, r12
+        mulh  r8,  r12
+        mulh  r10, r17
+        add   r12, r18
+        mov   r19, r12
+        mulh  r13, r12
+        add   r17, r15
+        mov   r11, r13
+        mulh  r8,  r13
+        add   r12, r6
+        mov   r11, r12
+        mulh  r10, r12
+        add   r13, r15
+        mulh  r29, r8
+        add   r12, r6
+        mov   r16, r13
+        shl   15,  r13
+        add   r14, r13
+        mov   r18, r12
+        shl   30,  r12
+        mov   r13, r26
+        add   r12, r26
+        shr   15,  r14
+        movhi hi(131071), r0,  r12
+        movea lo(131071), r12, r13
+        and   r13, r14
+        mov   r16, r12
+        and   r13, r12
+        add   r12, r14
+        mov   r18, r12
+        shl   15,  r12
+        and   r13, r12
+        add   r12, r14
+        shr   17,  r14
+        shr   17,  r16
+        add   r14, r16
+        shl   13,  r15
+        shr   2,   r18
+        add   r18, r15
+        add   r15, r16
+        mov   r16, r27
+        add   r8,  r6
+        shl   28,  r6
+        add   r6,  r27
+        mov   r26, r10
+        mov   r27, r11
+        jr    __return_r26_r31
+#else /* defined(__v850e__) */
+	/*  (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */
+	/*   r7           r6      r9         r8   */
+	mov  r8, r10
+	mulu r7, r8,  r0		/* Ahi * Blo */
+	mulu r6, r9,  r0		/* Alo * Bhi */
+	mulu r6, r10, r11		/* Alo * Blo */
+	add  r8, r11
+	add  r9, r11
+	jmp  [r31]
+#endif /* defined(__v850e__) */
+	.size ___muldi3, . - ___muldi3
+#endif
+	
diff --git a/gcc/config/v850/predicates.md b/gcc/config/v850/predicates.md
new file mode 100644
index 000000000..129f00d59
--- /dev/null
+++ b/gcc/config/v850/predicates.md
@@ -0,0 +1,501 @@
+;; Predicate definitions for NEC V850.
+;; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return true if OP is either a register or 0.
+
+(define_predicate "reg_or_0_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return INTVAL (op) == 0;
+
+  else if (GET_CODE (op) == CONST_DOUBLE)
+    return satisfies_constraint_G (op);
+
+  else
+    return register_operand (op, mode);
+})
+
+;; Return true if OP is either a register or a signed five bit
+;; integer.
+
+(define_predicate "reg_or_int5_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return CONST_OK_FOR_J (INTVAL (op));
+
+  else
+    return register_operand (op, mode);
+})
+
+;; Return true if OP is either a register or a signed nine bit
+;; integer.
+
+(define_predicate "reg_or_int9_operand"
+  (match_code "reg,subreg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return CONST_OK_FOR_O (INTVAL (op));
+
+  return register_operand (op, mode);
+})
+
+;; Return true if OP is either a register or a const integer.
+
+(define_predicate "reg_or_const_operand"
+  (match_code "reg,const_int")
+{
+  if (GET_CODE (op) == CONST_INT)
+    return TRUE;
+
+  return register_operand (op, mode);
+})
+
+;; Return true if OP is a even number register.
+
+(define_predicate "even_reg_operand"
+  (match_code "reg")
+{
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || ((REGNO (op) > 0) && (REGNO (op) < 32)
+		   && ((REGNO (op) & 1)==0))));
+})
+
+;; Return true if OP is a valid call operand.
+
+(define_predicate "call_address_operand"
+  (match_code "reg,symbol_ref")
+{
+  /* Only registers are valid call operands if TARGET_LONG_CALLS.  */
+  if (TARGET_LONG_CALLS)
+    return GET_CODE (op) == REG;
+  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG);
+})
+
+;; Return true if OP is a valid source operand for SImode move.
+
+(define_predicate "movsi_source_operand"
+  (match_code "label_ref,symbol_ref,const_int,const_double,const,high,mem,reg,subreg")
+{
+  /* Some constants, as well as symbolic operands
+     must be done with HIGH & LO_SUM patterns.  */
+  if (CONSTANT_P (op)
+      && GET_CODE (op) != HIGH
+      && !(GET_CODE (op) == CONST_INT
+           && (CONST_OK_FOR_J (INTVAL (op))
+               || CONST_OK_FOR_K (INTVAL (op))
+               || CONST_OK_FOR_L (INTVAL (op)))))
+    return special_symbolref_operand (op, mode);
+  else
+    return general_operand (op, mode);
+})
+
+;; Return true if OP is a valid operand for 23 bit displacement
+;; operations.
+
+(define_predicate "disp23_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) == CONST_INT
+      && ((unsigned)(INTVAL (op)) >= 0x8000)
+      && ((unsigned)(INTVAL (op)) < 0x400000))
+    return 1;
+  else
+    return 0;
+})
+
+;; Return true if OP is a symbol ref with 16-bit signed value.
+
+(define_predicate "special_symbolref_operand"
+  (match_code "symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && satisfies_constraint_K (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return (SYMBOL_REF_FLAGS (op)
+	    & (SYMBOL_FLAG_ZDA | SYMBOL_FLAG_TDA | SYMBOL_FLAG_SDA)) != 0;
+
+  return FALSE;
+})
+
+;; Return true if OP is a valid operand for bit related operations
+;; containing only single 1 in its binary representation.
+
+(define_predicate "power_of_two_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+
+  if (exact_log2 (INTVAL (op)) == -1)
+    return 0;
+  return 1;
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into a
+;; jump to a function prologue.
+
+(define_predicate "pattern_is_ok_for_prologue"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+  rtx vector_element;
+
+  /* If there are no registers to save then the function prologue
+     is not suitable.  */
+  if (count <= (TARGET_LONG_CALLS ? 3 : 2))
+    return 0;
+
+  /* The pattern matching has already established that we are adjusting the
+     stack and pushing at least one register.  We must now check that the
+     remaining entries in the vector to make sure that they are also register
+     pushes, except for the last entry which should be a CLOBBER of r10.
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+     (set (mem:SI (plus:SI (reg:SI 3)
+      (match_operand:SI 2 "immediate_operand" "i")))
+      (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))
+
+     */
+
+  for (i = 2; i < count - (TARGET_LONG_CALLS ? 2: 1); i++)
+    {
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      vector_element = XVECEXP (op, 0, i);
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src = SET_SRC (vector_element);
+
+      if (GET_CODE (dest) != MEM
+	  || GET_MODE (dest) != SImode
+	  || GET_CODE (src) != REG
+	  || GET_MODE (src) != SImode
+	  || ! register_is_ok_for_epilogue (src, SImode))
+	return 0;
+
+      plus = XEXP (dest, 0);
+
+      if ( GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+
+      /* If the register is being pushed somewhere other than the stack
+	 space just acquired by the first operand then abandon this quest.
+	 Note: the test is <= because both values are negative.	 */
+      if (INTVAL (XEXP (plus, 1))
+	  <= INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)))
+	{
+	  return 0;
+	}
+    }
+
+  /* Make sure that the last entries in the vector are clobbers.  */
+  vector_element = XVECEXP (op, 0, i++);
+
+  if (GET_CODE (vector_element) != CLOBBER
+      || GET_CODE (XEXP (vector_element, 0)) != REG
+      || REGNO (XEXP (vector_element, 0)) != 10)
+    return 0;
+
+  if (TARGET_LONG_CALLS)
+    {
+      vector_element = XVECEXP (op, 0, i++);
+
+      if (GET_CODE (vector_element) != CLOBBER
+	  || GET_CODE (XEXP (vector_element, 0)) != REG
+	  || REGNO (XEXP (vector_element, 0)) != 11)
+	return 0;
+    }
+
+  return i == count;
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into
+;; jump to a function epilogue.
+
+(define_predicate "pattern_is_ok_for_epilogue"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* If there are no registers to restore then the function epilogue
+     is not suitable.  */
+  if (count <= 2)
+    return 0;
+
+  /* The pattern matching has already established that we are performing a
+     function epilogue and that we are popping at least one register.  We must
+     now check the remaining entries in the vector to make sure that they are
+     also register pops.  There is no good reason why there should ever be
+     anything else in this vector, but being paranoid always helps...
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+        (set (match_operand:SI n "register_is_ok_for_epilogue" "r")
+	  (mem:SI (plus:SI (reg:SI 3) (match_operand:SI n "immediate_operand" "i"))))
+     */
+
+  for (i = 2; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src = SET_SRC (vector_element);
+
+      if (GET_CODE (dest) != REG
+	  || GET_MODE (dest) != SImode
+	  || ! register_is_ok_for_epilogue (dest, SImode)
+	  || GET_CODE (src) != MEM
+	  || GET_MODE (src) != SImode)
+	return 0;
+
+      plus = XEXP (src, 0);
+
+      if (GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return true if the given RTX is a register which can be restored by
+;; a function epilogue.
+
+(define_predicate "register_is_ok_for_epilogue"
+  (match_code "reg")
+{
+  /* The save/restore routines can only cope with registers 20 - 31.  */
+  return ((GET_CODE (op) == REG)
+          && (((REGNO (op) >= 20) && REGNO (op) <= 31)));
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into a
+;; DISPOSE instruction.
+
+(define_predicate "pattern_is_ok_for_dispose"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* If there are no registers to restore then
+     the dispose instruction is not suitable.  */
+  if (count <= 2)
+    return 0;
+
+  /* The pattern matching has already established that we are performing a
+     function epilogue and that we are popping at least one register.  We must
+     now check the remaining entries in the vector to make sure that they are
+     also register pops.  There is no good reason why there should ever be
+     anything else in this vector, but being paranoid always helps...
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+        (set (match_operand:SI n "register_is_ok_for_epilogue" "r")
+	  (mem:SI (plus:SI (reg:SI 3)
+	    (match_operand:SI n "immediate_operand" "i"))))
+     */
+
+  for (i = 3; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src  = SET_SRC (vector_element);
+
+      if (   GET_CODE (dest) != REG
+	  || GET_MODE (dest) != SImode
+	  || ! register_is_ok_for_epilogue (dest, SImode)
+	  || GET_CODE (src) != MEM
+	  || GET_MODE (src) != SImode)
+	return 0;
+
+      plus = XEXP (src, 0);
+
+      if (   GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO    (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return nonzero if the given RTX is suitable for collapsing into a
+;; PREPARE instruction.
+
+(define_predicate "pattern_is_ok_for_prepare"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  /* If there are no registers to restore then the prepare instruction
+     is not suitable.  */
+  if (count <= 1)
+    return 0;
+
+  /* The pattern matching has already established that we are adjusting the
+     stack and pushing at least one register.  We must now check that the
+     remaining entries in the vector to make sure that they are also register
+     pushes.
+
+     The test below performs the C equivalent of this machine description
+     pattern match:
+
+     (set (mem:SI (plus:SI (reg:SI 3)
+       (match_operand:SI 2 "immediate_operand" "i")))
+         (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))
+
+     */
+
+  for (i = 1; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      rtx dest;
+      rtx src;
+      rtx plus;
+
+      if (GET_CODE (vector_element) == CLOBBER)
+	continue;
+
+      if (GET_CODE (vector_element) != SET)
+	return 0;
+
+      dest = SET_DEST (vector_element);
+      src  = SET_SRC (vector_element);
+
+      if (   GET_CODE (dest) != MEM
+	  || GET_MODE (dest) != SImode
+	  || GET_CODE (src) != REG
+	  || GET_MODE (src) != SImode
+	  || ! register_is_ok_for_epilogue (src, SImode)
+	     )
+	return 0;
+
+      plus = XEXP (dest, 0);
+
+      if (   GET_CODE (plus) != PLUS
+	  || GET_CODE (XEXP (plus, 0)) != REG
+	  || GET_MODE (XEXP (plus, 0)) != SImode
+	  || REGNO    (XEXP (plus, 0)) != STACK_POINTER_REGNUM
+	  || GET_CODE (XEXP (plus, 1)) != CONST_INT)
+	return 0;
+
+      /* If the register is being pushed somewhere other than the stack
+	 space just acquired by the first operand then abandon this quest.
+	 Note: the test is <= because both values are negative.	 */
+      if (INTVAL (XEXP (plus, 1))
+	  < INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)))
+	return 0;
+    }
+
+  return 1;
+})
+
+;; Return true if OP is a valid operand for bit related operations
+;; containing only single 0 in its binary representation.
+
+(define_predicate "not_power_of_two_operand"
+  (match_code "const_int")
+{
+  unsigned int mask;
+
+  if (mode == QImode)
+    mask = 0xff;
+  else if (mode == HImode)
+    mask = 0xffff;
+  else if (mode == SImode)
+    mask = 0xffffffff;
+  else
+    return 0;
+
+  if (GET_CODE (op) != CONST_INT)
+    return 0;
+
+  if (exact_log2 (~INTVAL (op) & mask) == -1)
+    return 0;
+  return 1;
+})
+
+;; Return true if OP is a float value operand with value as 1.
+
+(define_predicate "const_float_1_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_DOUBLE
+      || mode != GET_MODE (op)
+      || (mode != DFmode && mode != SFmode))
+    return 0;
+
+  return op == CONST1_RTX(mode);
+})
+
+;; Return true if OP is a float value operand with value as 0.
+
+(define_predicate "const_float_0_operand"
+  (match_code "const_int")
+{
+  if (GET_CODE (op) != CONST_DOUBLE
+      || mode != GET_MODE (op)
+      || (mode != DFmode && mode != SFmode))
+    return 0;
+
+  return op == CONST0_RTX(mode);
+})
+
+
diff --git a/gcc/config/v850/t-v850 b/gcc/config/v850/t-v850
new file mode 100644
index 000000000..076b00d60
--- /dev/null
+++ b/gcc/config/v850/t-v850
@@ -0,0 +1,114 @@
+# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2008, 2009, 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = v850/lib1funcs.asm
+LIB1ASMFUNCS	= _mulsi3 \
+		  _divsi3 \
+		  _udivsi3 \
+		  _modsi3 \
+		  _umodsi3 \
+		  _save_2 \
+		  _save_20 \
+		  _save_21 \
+		  _save_22 \
+		  _save_23 \
+		  _save_24 \
+		  _save_25 \
+		  _save_26 \
+		  _save_27 \
+		  _save_28 \
+		  _save_29 \
+		  _save_2c \
+		  _save_20c \
+		  _save_21c \
+		  _save_22c \
+		  _save_23c \
+		  _save_24c \
+		  _save_25c \
+		  _save_26c \
+		  _save_27c \
+		  _save_28c \
+		  _save_29c \
+		  _save_31c \
+		  _save_interrupt \
+		  _save_all_interrupt \
+                  _callt_save_20 \
+		  _callt_save_21 \
+		  _callt_save_22 \
+		  _callt_save_23 \
+		  _callt_save_24 \
+		  _callt_save_25 \
+		  _callt_save_26 \
+		  _callt_save_27 \
+		  _callt_save_28 \
+		  _callt_save_29 \
+		  _callt_save_20c \
+		  _callt_save_21c \
+		  _callt_save_22c \
+		  _callt_save_23c \
+		  _callt_save_24c \
+		  _callt_save_25c \
+		  _callt_save_26c \
+		  _callt_save_27c \
+		  _callt_save_28c \
+		  _callt_save_29c \
+		  _callt_save_31c \
+		  _callt_save_interrupt \
+		  _callt_save_all_interrupt \
+		  _callt_save_r2_r29 \
+		  _callt_save_r2_r31 \
+		  _negdi2 \
+		  _cmpdi2 \
+		  _ucmpdi2 \
+		  _muldi3
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+	echo '#endif' 		>> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+	echo '#endif' 		>> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Create target-specific versions of the libraries
+MULTILIB_OPTIONS  = mv850/mv850e/mv850e2/mv850e2v3
+MULTILIB_DIRNAMES = v850 v850e v850e2 v850e2v3
+INSTALL_LIBGCC    = install-multilib
+MULTILIB_MATCHES  = mv850e=mv850e1 
+
+TCFLAGS = -mno-app-regs -msmall-sld -Wa,-mwarn-signed-overflow -Wa,-mwarn-unsigned-overflow
+
+v850-c.o: $(srcdir)/config/v850/v850-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(CPPLIB_H) $(TREE_H) $(C_PRAGMA_H) $(GGC_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/v850/v850-c.c 
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc/config/v850/t-v850e b/gcc/config/v850/t-v850e
new file mode 100644
index 000000000..1eb768520
--- /dev/null
+++ b/gcc/config/v850/t-v850e
@@ -0,0 +1,112 @@
+# Copyright (C) 2003, 2008, 2009, 2010 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = v850/lib1funcs.asm
+LIB1ASMFUNCS	= _mulsi3 \
+		  _divsi3 \
+		  _udivsi3 \
+		  _modsi3 \
+		  _umodsi3 \
+		  _save_2 \
+		  _save_20 \
+		  _save_21 \
+		  _save_22 \
+		  _save_23 \
+		  _save_24 \
+		  _save_25 \
+		  _save_26 \
+		  _save_27 \
+		  _save_28 \
+		  _save_29 \
+		  _save_2c \
+		  _save_20c \
+		  _save_21c \
+		  _save_22c \
+		  _save_23c \
+		  _save_24c \
+		  _save_25c \
+		  _save_26c \
+		  _save_27c \
+		  _save_28c \
+		  _save_29c \
+		  _save_31c \
+		  _save_interrupt \
+		  _save_all_interrupt \
+                  _callt_save_20 \
+		  _callt_save_21 \
+		  _callt_save_22 \
+		  _callt_save_23 \
+		  _callt_save_24 \
+		  _callt_save_25 \
+		  _callt_save_26 \
+		  _callt_save_27 \
+		  _callt_save_28 \
+		  _callt_save_29 \
+		  _callt_save_20c \
+		  _callt_save_21c \
+		  _callt_save_22c \
+		  _callt_save_23c \
+		  _callt_save_24c \
+		  _callt_save_25c \
+		  _callt_save_26c \
+		  _callt_save_27c \
+		  _callt_save_28c \
+		  _callt_save_29c \
+		  _callt_save_31c \
+		  _callt_save_interrupt \
+		  _callt_save_all_interrupt \
+		  _callt_save_r2_r29 \
+		  _callt_save_r2_r31 \
+		  _negdi2 \
+		  _cmpdi2 \
+		  _ucmpdi2 \
+		  _muldi3
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+	echo '#endif' 		>> dp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#define FLOAT' > fp-bit.c
+	echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+	echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+	echo '#endif' 		>> fp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+# Create target-specific versions of the libraries
+MULTILIB_OPTIONS  = mv850
+MULTILIB_DIRNAMES = v850
+INSTALL_LIBGCC    = install-multilib
+
+TCFLAGS = -mno-app-regs -msmall-sld -Wa,-mwarn-signed-overflow -Wa,-mwarn-unsigned-overflow
+
+v850-c.o: $(srcdir)/config/v850/v850-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+  $(TM_H) $(CPPLIB_H) $(TREE_H) $(C_PRAGMA_H) $(GGC_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/v850/v850-c.c 
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc/config/v850/v850-c.c b/gcc/config/v850/v850-c.c
new file mode 100644
index 000000000..db881cf4d
--- /dev/null
+++ b/gcc/config/v850/v850-c.c
@@ -0,0 +1,273 @@
+/* v850 specific, C compiler specific functions.
+   Copyright (C) 2000, 2001, 2002, 2003, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "tree.h"
+#include "c-family/c-pragma.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "tm_p.h"
+
+#ifndef streq
+#define streq(a,b) (strcmp (a, b) == 0)
+#endif
+
+static int  pop_data_area          (v850_data_area);
+static int  push_data_area         (v850_data_area);
+static void mark_current_function_as_interrupt (void);
+
+/* Push a data area onto the stack.  */
+
+static int
+push_data_area (v850_data_area data_area)
+{
+  data_area_stack_element * elem;
+
+  elem = (data_area_stack_element *) xmalloc (sizeof (* elem));
+
+  if (elem == NULL)
+    return 0;
+
+  elem->prev      = data_area_stack;
+  elem->data_area = data_area;
+
+  data_area_stack = elem;
+
+  return 1;
+}
+
+/* Remove a data area from the stack.  */
+
+static int
+pop_data_area (v850_data_area data_area)
+{
+  if (data_area_stack == NULL)
+    warning (OPT_Wpragmas, "#pragma GHS endXXXX found without "
+	     "previous startXXX");
+  else if (data_area != data_area_stack->data_area)
+    warning (OPT_Wpragmas, "#pragma GHS endXXX does not match "
+	     "previous startXXX");
+  else
+    {
+      data_area_stack_element * elem;
+
+      elem = data_area_stack;
+      data_area_stack = data_area_stack->prev;
+
+      free (elem);
+
+      return 1;
+    }
+
+  return 0;
+}
+
+/* Set the machine specific 'interrupt' attribute on the current function.  */
+
+static void
+mark_current_function_as_interrupt (void)
+{
+  tree name;
+  
+  if (current_function_decl ==  NULL_TREE)
+    {
+      warning (0, "cannot set interrupt attribute: no current function");
+      return;
+    }
+
+  name = get_identifier ("interrupt");
+
+  if (name == NULL_TREE || TREE_CODE (name) != IDENTIFIER_NODE)
+    {
+      warning (0, "cannot set interrupt attribute: no such identifier");
+      return;
+    }
+  
+  decl_attributes (&current_function_decl,
+		   tree_cons (name, NULL_TREE, NULL_TREE), 0);
+}
+
+
+/* Support for GHS pragmata.  */
+
+void
+ghs_pragma_section (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  int repeat = 0;
+
+  /* #pragma ghs section [name = alias [, name = alias [, ...]]] */
+  do
+    {
+      tree x;
+      enum cpp_ttype type;
+      tree sect_ident;
+      const char *sect, *alias;
+      enum GHS_section_kind kind;
+      
+      type = pragma_lex (&x);
+      
+      if (type == CPP_EOF && !repeat)
+	goto reset;
+      else if (type == CPP_NAME)
+	{
+	  sect_ident = x;
+	  sect = IDENTIFIER_POINTER (sect_ident);
+	}
+      else
+	goto bad;
+      repeat = 0;
+      
+      if (pragma_lex (&x) != CPP_EQ)
+	goto bad;
+      if (pragma_lex (&x) != CPP_NAME)
+	goto bad;
+      
+      alias = IDENTIFIER_POINTER (x);
+      
+      type = pragma_lex (&x);
+      if (type == CPP_COMMA)
+	repeat = 1;
+      else if (type != CPP_EOF)
+	warning (OPT_Wpragmas, "junk at end of #pragma ghs section");
+      
+      if      (streq (sect, "data"))    kind = GHS_SECTION_KIND_DATA;
+      else if (streq (sect, "text"))    kind = GHS_SECTION_KIND_TEXT;
+      else if (streq (sect, "rodata"))  kind = GHS_SECTION_KIND_RODATA;
+      else if (streq (sect, "const"))   kind = GHS_SECTION_KIND_RODATA;
+      else if (streq (sect, "rosdata")) kind = GHS_SECTION_KIND_ROSDATA;
+      else if (streq (sect, "rozdata")) kind = GHS_SECTION_KIND_ROZDATA;
+      else if (streq (sect, "sdata"))   kind = GHS_SECTION_KIND_SDATA;
+      else if (streq (sect, "tdata"))   kind = GHS_SECTION_KIND_TDATA;
+      else if (streq (sect, "zdata"))   kind = GHS_SECTION_KIND_ZDATA;
+      /* According to GHS beta documentation, the following should not be
+	 allowed!  */
+      else if (streq (sect, "bss"))     kind = GHS_SECTION_KIND_BSS;
+      else if (streq (sect, "zbss"))    kind = GHS_SECTION_KIND_ZDATA;
+      else
+	{
+	  warning (0, "unrecognized section name %qE", sect_ident);
+	  return;
+	}
+      
+      if (streq (alias, "default"))
+	GHS_current_section_names [kind] = NULL;
+      else
+	GHS_current_section_names [kind] =
+	  build_string (strlen (alias) + 1, alias);
+    }
+  while (repeat);
+
+  return;
+
+ bad:
+  warning (OPT_Wpragmas, "malformed #pragma ghs section");
+  return;
+
+ reset:
+  /* #pragma ghs section \n: Reset all section names back to their defaults.  */
+  {
+    int i;
+    
+    for (i = COUNT_OF_GHS_SECTION_KINDS; i--;)
+      GHS_current_section_names [i] = NULL;
+  }
+}
+
+void
+ghs_pragma_interrupt (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs interrupt");
+  
+  mark_current_function_as_interrupt ();
+}
+
+void
+ghs_pragma_starttda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs starttda");
+  
+  push_data_area (DATA_AREA_TDA);
+}
+
+void
+ghs_pragma_startsda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs startsda");
+  
+  push_data_area (DATA_AREA_SDA);
+}
+
+void
+ghs_pragma_startzda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs startzda");
+  
+  push_data_area (DATA_AREA_ZDA);
+}
+
+void
+ghs_pragma_endtda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs endtda");
+  
+  pop_data_area (DATA_AREA_TDA);
+}
+
+void
+ghs_pragma_endsda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs endsda");
+  
+  pop_data_area (DATA_AREA_SDA);
+}
+
+void
+ghs_pragma_endzda (cpp_reader * pfile ATTRIBUTE_UNUSED)
+{
+  tree x;
+  
+  if (pragma_lex (&x) != CPP_EOF)
+    warning (OPT_Wpragmas, "junk at end of #pragma ghs endzda");
+  
+  pop_data_area (DATA_AREA_ZDA);
+}
diff --git a/gcc/config/v850/v850-modes.def b/gcc/config/v850/v850-modes.def
new file mode 100644
index 000000000..d90ce5456
--- /dev/null
+++ b/gcc/config/v850/v850-modes.def
@@ -0,0 +1,29 @@
+/* Definitions of target machine for GNU compiler. NEC V850 series
+  Copyright (C) 2005
+   Free Software Foundation, Inc.
+   Contributed by NEC EL
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to
+  the Free Software Foundation, 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+CC_MODE (CC_FPU_LT);
+CC_MODE (CC_FPU_LE);
+CC_MODE (CC_FPU_GT);
+CC_MODE (CC_FPU_GE);
+CC_MODE (CC_FPU_EQ);
+CC_MODE (CC_FPU_NE);
+
diff --git a/gcc/config/v850/v850-protos.h b/gcc/config/v850/v850-protos.h
new file mode 100644
index 000000000..8ee6c20d1
--- /dev/null
+++ b/gcc/config/v850/v850-protos.h
@@ -0,0 +1,73 @@
+/* Prototypes for v850.c functions used in the md file & elsewhere.
+   Copyright (C) 1999, 2000, 2002, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Function prototypes that cannot exist in v850.h due to dependency
+   complications.  */
+#ifndef GCC_V850_PROTOS_H
+#define GCC_V850_PROTOS_H
+
+#define Mmode enum machine_mode
+
+extern void   expand_prologue               (void);
+extern void   expand_epilogue               (void);
+extern int    v850_handle_pragma            (int (*)(void), void (*)(int), char *);
+extern int    compute_register_save_size    (long *);
+extern int    compute_frame_size            (int, long *);
+extern void   v850_init_expanders           (void);
+
+#ifdef RTX_CODE
+extern int    v850_output_addr_const_extra  (FILE *, rtx);
+extern rtx    v850_return_addr              (int);
+extern const char *output_move_single       (rtx *);
+extern void   notice_update_cc              (rtx, rtx);
+extern char * construct_save_jarl           (rtx);
+extern char * construct_restore_jr          (rtx);
+#ifdef HAVE_MACHINE_MODES
+extern char * construct_dispose_instruction (rtx);
+extern char * construct_prepare_instruction (rtx);
+extern int    ep_memory_operand             (rtx, Mmode, int);
+extern int    v850_float_z_comparison_operator (rtx, Mmode);
+extern int    v850_float_nz_comparison_operator (rtx, Mmode);
+extern rtx    v850_gen_compare              (enum rtx_code, Mmode, rtx, rtx);
+extern Mmode  v850_gen_float_compare (enum rtx_code, Mmode, rtx, rtx);
+extern Mmode  v850_select_cc_mode (RTX_CODE, rtx, rtx);
+#endif
+#endif /* RTX_CODE */
+
+#ifdef TREE_CODE
+extern int    v850_interrupt_function_p     (tree);
+extern void   v850_output_aligned_bss       (FILE *, tree, const char *, unsigned HOST_WIDE_INT, int);
+extern void   v850_output_common            (FILE *, tree, const char *, int, int);
+extern void   v850_output_local             (FILE *, tree, const char *, int, int);
+extern v850_data_area v850_get_data_area    (tree);
+#endif
+
+extern void ghs_pragma_section		    (struct cpp_reader *);
+extern void ghs_pragma_interrupt	    (struct cpp_reader *);
+extern void ghs_pragma_starttda		    (struct cpp_reader *);
+extern void ghs_pragma_startsda		    (struct cpp_reader *);
+extern void ghs_pragma_startzda		    (struct cpp_reader *);
+extern void ghs_pragma_endtda		    (struct cpp_reader *);
+extern void ghs_pragma_endsda		    (struct cpp_reader *);
+extern void ghs_pragma_endzda		    (struct cpp_reader *);
+
+#undef  Mmode
+
+#endif /* ! GCC_V850_PROTOS_H */
diff --git a/gcc/config/v850/v850.c b/gcc/config/v850/v850.c
new file mode 100644
index 000000000..d75f88c55
--- /dev/null
+++ b/gcc/config/v850/v850.c
@@ -0,0 +1,3226 @@
+/* Subroutines for insn-output.c for NEC V850 series
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+   2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "recog.h"
+#include "expr.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "ggc.h"
+#include "integrate.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "df.h"
+
+#ifndef streq
+#define streq(a,b) (strcmp (a, b) == 0)
+#endif
+
+static void v850_print_operand_address (FILE *, rtx);
+
+/* Information about the various small memory areas.  */
+struct small_memory_info small_memory[ (int)SMALL_MEMORY_max ] =
+{
+  /* Name	Max	Physical max.  */
+  { "tda",	0,		256 },
+  { "sda",	0,		65536 },
+  { "zda",	0,		32768 },
+};
+
+/* Names of the various data areas used on the v850.  */
+tree GHS_default_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+tree GHS_current_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+
+/* Track the current data area set by the data area pragma (which 
+   can be nested).  Tested by check_default_data_area.  */
+data_area_stack_element * data_area_stack = NULL;
+
+/* True if we don't need to check any more if the current
+   function is an interrupt handler.  */
+static int v850_interrupt_cache_p = FALSE;
+
+rtx v850_compare_op0, v850_compare_op1;
+
+/* Whether current function is an interrupt handler.  */
+static int v850_interrupt_p = FALSE;
+
+static GTY(()) section * rosdata_section;
+static GTY(()) section * rozdata_section;
+static GTY(()) section * tdata_section;
+static GTY(()) section * zdata_section;
+static GTY(()) section * zbss_section;
+
+/* Set the maximum size of small memory area TYPE to the value given
+   by VALUE.  Return true if VALUE was syntactically correct.  VALUE
+   starts with the argument separator: either "-" or "=".  */
+
+static bool
+v850_handle_memory_option (enum small_memory_type type, const char *value)
+{
+  int i, size;
+
+  if (*value != '-' && *value != '=')
+    return false;
+
+  value++;
+  for (i = 0; value[i]; i++)
+    if (!ISDIGIT (value[i]))
+      return false;
+
+  size = atoi (value);
+  if (size > small_memory[type].physical_max)
+    error ("value passed to %<-m%s%> is too large", small_memory[type].name);
+  else
+    small_memory[type].max = size;
+  return true;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+v850_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case OPT_mspace:
+      target_flags |= MASK_EP | MASK_PROLOG_FUNCTION;
+      return true;
+
+    case OPT_mv850:
+      target_flags &= ~(MASK_CPU ^ MASK_V850);
+      return true;
+
+    case OPT_mv850e:
+    case OPT_mv850e1:
+      target_flags &= ~(MASK_CPU ^ MASK_V850E);
+      return true;
+
+    case OPT_mtda:
+      return v850_handle_memory_option (SMALL_MEMORY_TDA, arg);
+
+    case OPT_msda:
+      return v850_handle_memory_option (SMALL_MEMORY_SDA, arg);
+
+    case OPT_mzda:
+      return v850_handle_memory_option (SMALL_MEMORY_ZDA, arg);
+
+    default:
+      return true;
+    }
+}
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options v850_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    /* Note - we no longer enable MASK_EP when optimizing.  This is
+       because of a hardware bug which stops the SLD and SST instructions
+       from correctly detecting some hazards.  If the user is sure that
+       their hardware is fixed or that their program will not encounter
+       the conditions that trigger the bug then they can enable -mep by
+       hand.  */
+    { OPT_LEVELS_1_PLUS, OPT_mprolog_function, NULL, 1 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+/* Handle the TARGET_PASS_BY_REFERENCE target hook.
+   Specify whether to pass the argument by reference.  */
+
+static bool
+v850_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+			enum machine_mode mode, const_tree type,
+			bool named ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return size > 8;
+}
+
+/* Implementing the Varargs Macros.  */
+
+static bool
+v850_strict_argument_naming (CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED)
+{
+  return !TARGET_GHS ? true : false;
+}
+
+/* Return an RTX to represent where an argument with mode MODE
+   and type TYPE will be passed to a function.  If the result
+   is NULL_RTX, the argument will be pushed.  */
+
+static rtx
+v850_function_arg (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+		   const_tree type, bool named)
+{
+  rtx result = NULL_RTX;
+  int size, align;
+
+  if (!named)
+    return NULL_RTX;
+
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  size = (size + UNITS_PER_WORD -1) & ~(UNITS_PER_WORD -1);
+
+  if (size < 1)
+    {
+      /* Once we have stopped using argument registers, do not start up again.  */
+      cum->nbytes = 4 * UNITS_PER_WORD;
+      return NULL_RTX;
+    }
+
+  if (size <= UNITS_PER_WORD && type)
+    align = TYPE_ALIGN (type) / BITS_PER_UNIT;
+  else
+    align = size;
+
+  cum->nbytes = (cum->nbytes + align - 1) &~(align - 1);
+
+  if (cum->nbytes > 4 * UNITS_PER_WORD)
+    return NULL_RTX;
+
+  if (type == NULL_TREE
+      && cum->nbytes + size > 4 * UNITS_PER_WORD)
+    return NULL_RTX;
+
+  switch (cum->nbytes / UNITS_PER_WORD)
+    {
+    case 0:
+      result = gen_rtx_REG (mode, 6);
+      break;
+    case 1:
+      result = gen_rtx_REG (mode, 7);
+      break;
+    case 2:
+      result = gen_rtx_REG (mode, 8);
+      break;
+    case 3:
+      result = gen_rtx_REG (mode, 9);
+      break;
+    default:
+      result = NULL_RTX;
+    }
+
+  return result;
+}
+
+/* Return the number of bytes which must be put into registers
+   for values which are part in registers and part in memory.  */
+static int
+v850_arg_partial_bytes (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+                        tree type, bool named)
+{
+  int size, align;
+
+  if (TARGET_GHS && !named)
+    return 0;
+
+  if (mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (size < 1)
+    size = 1;
+  
+  if (type)
+    align = TYPE_ALIGN (type) / BITS_PER_UNIT;
+  else
+    align = size;
+
+  cum->nbytes = (cum->nbytes + align - 1) & ~ (align - 1);
+
+  if (cum->nbytes > 4 * UNITS_PER_WORD)
+    return 0;
+
+  if (cum->nbytes + size <= 4 * UNITS_PER_WORD)
+    return 0;
+
+  if (type == NULL_TREE
+      && cum->nbytes + size > 4 * UNITS_PER_WORD)
+    return 0;
+
+  return 4 * UNITS_PER_WORD - cum->nbytes;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be available.)  */
+
+static void
+v850_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  cum->nbytes += (((type && int_size_in_bytes (type) > 8
+		    ? GET_MODE_SIZE (Pmode)
+		    : (mode != BLKmode
+		       ? GET_MODE_SIZE (mode)
+		       : int_size_in_bytes (type))) + UNITS_PER_WORD - 1)
+		  & -UNITS_PER_WORD);
+}
+
+/* Return the high and low words of a CONST_DOUBLE */
+
+static void
+const_double_split (rtx x, HOST_WIDE_INT * p_high, HOST_WIDE_INT * p_low)
+{
+  if (GET_CODE (x) == CONST_DOUBLE)
+    {
+      long t[2];
+      REAL_VALUE_TYPE rv;
+
+      switch (GET_MODE (x))
+	{
+	case DFmode:
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	  REAL_VALUE_TO_TARGET_DOUBLE (rv, t);
+	  *p_high = t[1];	/* since v850 is little endian */
+	  *p_low = t[0];	/* high is second word */
+	  return;
+
+	case SFmode:
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (rv, *p_high);
+	  *p_low = 0;
+	  return;
+
+	case VOIDmode:
+	case DImode:
+	  *p_high = CONST_DOUBLE_HIGH (x);
+	  *p_low  = CONST_DOUBLE_LOW (x);
+	  return;
+
+	default:
+	  break;
+	}
+    }
+
+  fatal_insn ("const_double_split got a bad insn:", x);
+}
+
+
+/* Return the cost of the rtx R with code CODE.  */
+
+static int
+const_costs_int (HOST_WIDE_INT value, int zero_cost)
+{
+  if (CONST_OK_FOR_I (value))
+      return zero_cost;
+  else if (CONST_OK_FOR_J (value))
+    return 1;
+  else if (CONST_OK_FOR_K (value))
+    return 2;
+  else
+    return 4;
+}
+
+static int
+const_costs (rtx r, enum rtx_code c)
+{
+  HOST_WIDE_INT high, low;
+
+  switch (c)
+    {
+    case CONST_INT:
+      return const_costs_int (INTVAL (r), 0);
+
+    case CONST_DOUBLE:
+      const_double_split (r, &high, &low);
+      if (GET_MODE (r) == SFmode)
+	return const_costs_int (high, 1);
+      else
+	return const_costs_int (high, 1) + const_costs_int (low, 1);
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST:
+      return 2;
+
+    case HIGH:
+      return 1;
+
+    default:
+      return 4;
+    }
+}
+
+static bool
+v850_rtx_costs (rtx x,
+                int codearg,
+                int outer_code ATTRIBUTE_UNUSED,
+                int * total, bool speed)
+{
+  enum rtx_code code = (enum rtx_code) codearg;
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      *total = COSTS_N_INSNS (const_costs (x, code));
+      return true;
+
+    case MOD:
+    case DIV:
+    case UMOD:
+    case UDIV:
+      if (TARGET_V850E && !speed)
+        *total = 6;
+      else
+	*total = 60;
+      return true;
+
+    case MULT:
+      if (TARGET_V850E
+	  && (   GET_MODE (x) == SImode
+	      || GET_MODE (x) == HImode
+	      || GET_MODE (x) == QImode))
+        {
+	  if (GET_CODE (XEXP (x, 1)) == REG)
+	    *total = 4;
+	  else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	    {
+	      if (CONST_OK_FOR_O (INTVAL (XEXP (x, 1))))
+	        *total = 6;
+	      else if (CONST_OK_FOR_K (INTVAL (XEXP (x, 1))))
+	        *total = 10;
+	    }
+        }
+      else
+	*total = 20;
+      return true;
+
+    case ZERO_EXTRACT:
+      if (outer_code == COMPARE)
+	*total = 0;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Print operand X using operand code CODE to assembly language output file
+   FILE.  */
+
+static void
+v850_print_operand (FILE * file, rtx x, int code)
+{
+  HOST_WIDE_INT high, low;
+
+  switch (code)
+    {
+    case 'c':
+      /* We use 'c' operands with symbols for .vtinherit */
+      if (GET_CODE (x) == SYMBOL_REF)
+        {
+          output_addr_const(file, x);
+          break;
+        }
+      /* fall through */
+    case 'b':
+    case 'B':
+    case 'C':
+      switch ((code == 'B' || code == 'C')
+	      ? reverse_condition (GET_CODE (x)) : GET_CODE (x))
+	{
+	  case NE:
+	    if (code == 'c' || code == 'C')
+	      fprintf (file, "nz");
+	    else
+	      fprintf (file, "ne");
+	    break;
+	  case EQ:
+	    if (code == 'c' || code == 'C')
+	      fprintf (file, "z");
+	    else
+	      fprintf (file, "e");
+	    break;
+	  case GE:
+	    fprintf (file, "ge");
+	    break;
+	  case GT:
+	    fprintf (file, "gt");
+	    break;
+	  case LE:
+	    fprintf (file, "le");
+	    break;
+	  case LT:
+	    fprintf (file, "lt");
+	    break;
+	  case GEU:
+	    fprintf (file, "nl");
+	    break;
+	  case GTU:
+	    fprintf (file, "h");
+	    break;
+	  case LEU:
+	    fprintf (file, "nh");
+	    break;
+	  case LTU:
+	    fprintf (file, "l");
+	    break;
+	  default:
+	    gcc_unreachable ();
+	}
+      break;
+    case 'F':			/* high word of CONST_DOUBLE */
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (file, "%d", (INTVAL (x) >= 0) ? 0 : -1);
+	  break;
+	  
+	case CONST_DOUBLE:
+	  const_double_split (x, &high, &low);
+	  fprintf (file, "%ld", (long) high);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'G':			/* low word of CONST_DOUBLE */
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (file, "%ld", (long) INTVAL (x));
+	  break;
+	  
+	case CONST_DOUBLE:
+	  const_double_split (x, &high, &low);
+	  fprintf (file, "%ld", (long) low);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'L':
+      fprintf (file, "%d\n", (int)(INTVAL (x) & 0xffff));
+      break;
+    case 'M':
+      fprintf (file, "%d", exact_log2 (INTVAL (x)));
+      break;
+    case 'O':
+      gcc_assert (special_symbolref_operand (x, VOIDmode));
+      
+      if (GET_CODE (x) == CONST)
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	gcc_assert (GET_CODE (x) == SYMBOL_REF);
+      
+      if (SYMBOL_REF_ZDA_P (x))
+	fprintf (file, "zdaoff");
+      else if (SYMBOL_REF_SDA_P (x))
+	fprintf (file, "sdaoff");
+      else if (SYMBOL_REF_TDA_P (x))
+	fprintf (file, "tdaoff");
+      else
+	gcc_unreachable ();
+      break;
+    case 'P':
+      gcc_assert (special_symbolref_operand (x, VOIDmode));
+      output_addr_const (file, x);
+      break;
+    case 'Q':
+      gcc_assert (special_symbolref_operand (x, VOIDmode));
+      
+      if (GET_CODE (x) == CONST)
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	gcc_assert (GET_CODE (x) == SYMBOL_REF);
+      
+      if (SYMBOL_REF_ZDA_P (x))
+	fprintf (file, "r0");
+      else if (SYMBOL_REF_SDA_P (x))
+	fprintf (file, "gp");
+      else if (SYMBOL_REF_TDA_P (x))
+	fprintf (file, "ep");
+      else
+	gcc_unreachable ();
+      break;
+    case 'R':		/* 2nd word of a double.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fprintf (file, reg_names[REGNO (x) + 1]);
+	  break;
+	case MEM:
+	  x = XEXP (adjust_address (x, SImode, 4), 0);
+	  v850_print_operand_address (file, x);
+	  if (GET_CODE (x) == CONST_INT)
+	    fprintf (file, "[r0]");
+	  break;
+	  
+	default:
+	  break;
+	}
+      break;
+    case 'S':
+      {
+        /* If it's a reference to a TDA variable, use sst/sld vs. st/ld.  */
+        if (GET_CODE (x) == MEM && ep_memory_operand (x, GET_MODE (x), FALSE))
+          fputs ("s", file);
+
+        break;
+      }
+    case 'T':
+      {
+	/* Like an 'S' operand above, but for unsigned loads only.  */
+        if (GET_CODE (x) == MEM && ep_memory_operand (x, GET_MODE (x), TRUE))
+          fputs ("s", file);
+
+        break;
+      }
+    case 'W':			/* print the instruction suffix */
+      switch (GET_MODE (x))
+	{
+	default:
+	  gcc_unreachable ();
+
+	case QImode: fputs (".b", file); break;
+	case HImode: fputs (".h", file); break;
+	case SImode: fputs (".w", file); break;
+	case SFmode: fputs (".w", file); break;
+	}
+      break;
+    case '.':			/* register r0 */
+      fputs (reg_names[0], file);
+      break;
+    case 'z':			/* reg or zero */
+      if (GET_CODE (x) == REG)
+	fputs (reg_names[REGNO (x)], file);
+      else if ((GET_MODE(x) == SImode
+		|| GET_MODE(x) == DFmode
+		|| GET_MODE(x) == SFmode)
+		&& x == CONST0_RTX(GET_MODE(x)))
+      fputs (reg_names[0], file);
+      else
+	{
+	  gcc_assert (x == const0_rtx);
+	  fputs (reg_names[0], file);
+	}
+      break;
+    default:
+      switch (GET_CODE (x))
+	{
+	case MEM:
+	  if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	    output_address (gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, 0),
+					  XEXP (x, 0)));
+	  else
+	    output_address (XEXP (x, 0));
+	  break;
+
+	case REG:
+	  fputs (reg_names[REGNO (x)], file);
+	  break;
+	case SUBREG:
+	  fputs (reg_names[subreg_regno (x)], file);
+	  break;
+	case CONST_INT:
+	case SYMBOL_REF:
+	case CONST:
+	case LABEL_REF:
+	case CODE_LABEL:
+	  v850_print_operand_address (file, x);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    }
+}
+
+
+/* Output assembly language output for the address ADDR to FILE.  */
+
+static void
+v850_print_operand_address (FILE * file, rtx addr)
+{
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      fprintf (file, "0[");
+      v850_print_operand (file, addr, 0);
+      fprintf (file, "]");
+      break;
+    case LO_SUM:
+      if (GET_CODE (XEXP (addr, 0)) == REG)
+	{
+	  /* reg,foo */
+	  fprintf (file, "lo(");
+	  v850_print_operand (file, XEXP (addr, 1), 0);
+	  fprintf (file, ")[");
+	  v850_print_operand (file, XEXP (addr, 0), 0);
+	  fprintf (file, "]");
+	}
+      break;
+    case PLUS:
+      if (GET_CODE (XEXP (addr, 0)) == REG
+	  || GET_CODE (XEXP (addr, 0)) == SUBREG)
+	{
+	  /* reg,foo */
+	  v850_print_operand (file, XEXP (addr, 1), 0);
+	  fprintf (file, "[");
+	  v850_print_operand (file, XEXP (addr, 0), 0);
+	  fprintf (file, "]");
+	}
+      else
+	{
+	  v850_print_operand (file, XEXP (addr, 0), 0);
+	  fprintf (file, "+");
+	  v850_print_operand (file, XEXP (addr, 1), 0);
+	}
+      break;
+    case SYMBOL_REF:
+      {
+        const char *off_name = NULL;
+        const char *reg_name = NULL;
+
+	if (SYMBOL_REF_ZDA_P (addr))
+          {
+            off_name = "zdaoff";
+            reg_name = "r0";
+          }
+        else if (SYMBOL_REF_SDA_P (addr))
+          {
+            off_name = "sdaoff";
+            reg_name = "gp";
+          }
+        else if (SYMBOL_REF_TDA_P (addr))
+          {
+            off_name = "tdaoff";
+            reg_name = "ep";
+          }
+
+	if (off_name)
+          fprintf (file, "%s(", off_name);
+        output_addr_const (file, addr);
+	if (reg_name)
+          fprintf (file, ")[%s]", reg_name);
+      }
+      break;
+    case CONST:
+      if (special_symbolref_operand (addr, VOIDmode))
+        {
+	  rtx x = XEXP (XEXP (addr, 0), 0);
+          const char *off_name;
+          const char *reg_name;
+
+          if (SYMBOL_REF_ZDA_P (x))
+            {
+              off_name = "zdaoff";
+              reg_name = "r0";
+            }
+          else if (SYMBOL_REF_SDA_P (x))
+            {
+              off_name = "sdaoff";
+              reg_name = "gp";
+            }
+          else if (SYMBOL_REF_TDA_P (x))
+            {
+              off_name = "tdaoff";
+              reg_name = "ep";
+            }
+          else
+            gcc_unreachable ();
+
+          fprintf (file, "%s(", off_name);
+          output_addr_const (file, addr);
+          fprintf (file, ")[%s]", reg_name);
+        }
+      else
+        output_addr_const (file, addr);
+      break;
+    default:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+static bool
+v850_print_operand_punct_valid_p (unsigned char code)
+{
+  return code == '.';
+}
+
+/* When assemble_integer is used to emit the offsets for a switch
+   table it can encounter (TRUNCATE:HI (MINUS:SI (LABEL_REF:SI) (LABEL_REF:SI))).
+   output_addr_const will normally barf at this, but it is OK to omit
+   the truncate and just emit the difference of the two labels.  The
+   .hword directive will automatically handle the truncation for us.
+   
+   Returns 1 if rtx was handled, 0 otherwise.  */
+
+int
+v850_output_addr_const_extra (FILE * file, rtx x)
+{
+  if (GET_CODE (x) != TRUNCATE)
+    return 0;
+
+  x = XEXP (x, 0);
+
+  /* We must also handle the case where the switch table was passed a
+     constant value and so has been collapsed.  In this case the first
+     label will have been deleted.  In such a case it is OK to emit
+     nothing, since the table will not be used.
+     (cf gcc.c-torture/compile/990801-1.c).  */
+  if (GET_CODE (x) == MINUS
+      && GET_CODE (XEXP (x, 0)) == LABEL_REF
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == CODE_LABEL
+      && INSN_DELETED_P (XEXP (XEXP (x, 0), 0)))
+    return 1;
+
+  output_addr_const (file, x);
+  return 1;
+}
+
+/* Return appropriate code to load up a 1, 2, or 4 integer/floating
+   point value.  */
+
+const char *
+output_move_single (rtx * operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (REG_P (dst))
+    {
+      if (REG_P (src))
+	return "mov %1,%0";
+
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  HOST_WIDE_INT value = INTVAL (src);
+
+	  if (CONST_OK_FOR_J (value))		/* Signed 5-bit immediate.  */
+	    return "mov %1,%0";
+
+	  else if (CONST_OK_FOR_K (value))	/* Signed 16-bit immediate.  */
+	    return "movea %1,%.,%0";
+
+	  else if (CONST_OK_FOR_L (value))	/* Upper 16 bits were set.  */
+	    return "movhi hi0(%1),%.,%0";
+
+	  /* A random constant.  */
+	  else if (TARGET_V850E || TARGET_V850E2_ALL)
+	      return "mov %1,%0";
+	  else
+	    return "movhi hi(%1),%.,%0\n\tmovea lo(%1),%0,%0";
+	}
+
+      else if (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode)
+	{
+	  HOST_WIDE_INT high, low;
+
+	  const_double_split (src, &high, &low);
+
+	  if (CONST_OK_FOR_J (high))		/* Signed 5-bit immediate.  */
+	    return "mov %F1,%0";
+
+	  else if (CONST_OK_FOR_K (high))	/* Signed 16-bit immediate.  */
+	    return "movea %F1,%.,%0";
+
+	  else if (CONST_OK_FOR_L (high))	/* Upper 16 bits were set.  */
+	    return "movhi hi0(%F1),%.,%0";
+
+	  /* A random constant.  */
+	else if (TARGET_V850E || TARGET_V850E2_ALL)
+	      return "mov %F1,%0";
+
+	  else
+	    return "movhi hi(%F1),%.,%0\n\tmovea lo(%F1),%0,%0";
+	}
+
+      else if (GET_CODE (src) == MEM)
+	return "%S1ld%W1 %1,%0";
+
+      else if (special_symbolref_operand (src, VOIDmode))
+	return "movea %O1(%P1),%Q1,%0";
+
+      else if (GET_CODE (src) == LABEL_REF
+	       || GET_CODE (src) == SYMBOL_REF
+	       || GET_CODE (src) == CONST)
+	{
+	  if (TARGET_V850E || TARGET_V850E2_ALL) 
+	    return "mov hilo(%1),%0";
+	  else
+	    return "movhi hi(%1),%.,%0\n\tmovea lo(%1),%0,%0";
+	}
+
+      else if (GET_CODE (src) == HIGH)
+	return "movhi hi(%1),%.,%0";
+
+      else if (GET_CODE (src) == LO_SUM)
+	{
+	  operands[2] = XEXP (src, 0);
+	  operands[3] = XEXP (src, 1);
+	  return "movea lo(%3),%2,%0";
+	}
+    }
+
+  else if (GET_CODE (dst) == MEM)
+    {
+      if (REG_P (src))
+	return "%S0st%W0 %1,%0";
+
+      else if (GET_CODE (src) == CONST_INT && INTVAL (src) == 0)
+	return "%S0st%W0 %.,%0";
+
+      else if (GET_CODE (src) == CONST_DOUBLE
+	       && CONST0_RTX (GET_MODE (dst)) == src)
+	return "%S0st%W0 %.,%0";
+    }
+
+  fatal_insn ("output_move_single:", gen_rtx_SET (VOIDmode, dst, src));
+  return "";
+}
+
+/* Generate comparison code.  */
+int
+v850_float_z_comparison_operator (rtx op, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (GET_RTX_CLASS (code) != RTX_COMPARE
+      && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
+    return 0;
+
+  if (mode != GET_MODE (op) && mode != VOIDmode)
+    return 0;
+
+  if ((GET_CODE (XEXP (op, 0)) != REG
+       || REGNO (XEXP (op, 0)) != CC_REGNUM)
+      || XEXP (op, 1) != const0_rtx)
+    return 0;
+
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_LTmode)
+    return code == LT;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_LEmode)
+    return code == LE;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_EQmode)
+    return code == EQ;
+
+  return 0;
+}
+
+int
+v850_float_nz_comparison_operator (rtx op, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (GET_RTX_CLASS (code) != RTX_COMPARE
+      && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
+    return 0;
+
+  if (mode != GET_MODE (op) && mode != VOIDmode)
+    return 0;
+
+  if ((GET_CODE (XEXP (op, 0)) != REG
+       || REGNO (XEXP (op, 0)) != CC_REGNUM)
+      || XEXP (op, 1) != const0_rtx)
+    return 0;
+
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_GTmode)
+    return code == GT;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_GEmode)
+    return code == GE;
+  if (GET_MODE (XEXP (op, 0)) == CC_FPU_NEmode)
+    return code == NE;
+
+  return 0;
+}
+
+enum machine_mode
+v850_select_cc_mode (enum rtx_code cond, rtx op0, rtx op1 ATTRIBUTE_UNUSED)
+{
+  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
+    {
+      switch (cond)
+	{
+	case LE:
+	  return CC_FPU_LEmode;
+	case GE:
+	  return CC_FPU_GEmode;
+	case LT:
+	  return CC_FPU_LTmode;
+	case GT:
+	  return CC_FPU_GTmode;
+	case EQ:
+	  return CC_FPU_EQmode;
+	case NE:
+	  return CC_FPU_NEmode;
+	default:
+	  abort ();
+	}
+    }
+  return CCmode;
+}
+
+enum machine_mode
+v850_gen_float_compare (enum rtx_code cond, enum machine_mode mode ATTRIBUTE_UNUSED, rtx op0, rtx op1)
+{
+  if (GET_MODE(op0) == DFmode)
+    {
+      switch (cond)
+	{
+	case LE:
+	  emit_insn (gen_cmpdf_le_insn (op0, op1));
+	  break;
+	case GE:
+	  emit_insn (gen_cmpdf_ge_insn (op0, op1));
+	  break;
+	case LT:
+	  emit_insn (gen_cmpdf_lt_insn (op0, op1));
+	  break;
+	case GT:
+	  emit_insn (gen_cmpdf_gt_insn (op0, op1));
+	  break;
+	case EQ:
+	  emit_insn (gen_cmpdf_eq_insn (op0, op1));
+	  break;
+	case NE:
+	  emit_insn (gen_cmpdf_ne_insn (op0, op1));
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  else if (GET_MODE(v850_compare_op0) == SFmode)
+    {
+      switch (cond)
+	{
+	case LE:
+	  emit_insn (gen_cmpsf_le_insn(op0, op1));
+	  break;
+	case GE:
+	  emit_insn (gen_cmpsf_ge_insn(op0, op1));
+	  break;
+	case LT:
+	  emit_insn (gen_cmpsf_lt_insn(op0, op1));
+	  break;
+	case GT:
+	  emit_insn (gen_cmpsf_gt_insn(op0, op1));
+	  break;
+	case EQ:
+	  emit_insn (gen_cmpsf_eq_insn(op0, op1));
+	  break;
+	case NE:
+	  emit_insn (gen_cmpsf_ne_insn(op0, op1));
+	  break;
+	default:
+	  abort ();
+	}
+    }
+  else
+    {
+      abort ();
+    }
+
+  return v850_select_cc_mode (cond, op0, op1);
+}
+
+rtx
+v850_gen_compare (enum rtx_code cond, enum machine_mode mode, rtx op0, rtx op1)
+{
+  if (GET_MODE_CLASS(GET_MODE (op0)) != MODE_FLOAT)
+    {
+      emit_insn (gen_cmpsi_insn (op0, op1));
+      return gen_rtx_fmt_ee (cond, mode, gen_rtx_REG(CCmode, CC_REGNUM), const0_rtx);
+    }
+  else
+    {
+      rtx cc_reg;
+      mode = v850_gen_float_compare (cond, mode, op0, op1);
+      cc_reg = gen_rtx_REG (mode, CC_REGNUM);
+      emit_insn (gen_rtx_SET(mode, cc_reg, gen_rtx_REG (mode, FCC_REGNUM)));  
+
+      return gen_rtx_fmt_ee (cond, mode, cc_reg, const0_rtx);
+    }
+}
+
+/* Return maximum offset supported for a short EP memory reference of mode
+   MODE and signedness UNSIGNEDP.  */
+
+static int
+ep_memory_offset (enum machine_mode mode, int unsignedp ATTRIBUTE_UNUSED)
+{
+  int max_offset = 0;
+
+  switch (mode)
+    {
+    case QImode:
+      if (TARGET_SMALL_SLD)
+	max_offset = (1 << 4);
+      else if ((TARGET_V850E || TARGET_V850E2_ALL)
+		&& unsignedp)
+	max_offset = (1 << 4);
+      else
+	max_offset = (1 << 7);
+      break;
+
+    case HImode:
+      if (TARGET_SMALL_SLD)
+	max_offset = (1 << 5);
+      else if ((TARGET_V850E || TARGET_V850E2_ALL)
+		&& unsignedp)
+	max_offset = (1 << 5);
+      else
+	max_offset = (1 << 8);
+      break;
+
+    case SImode:
+    case SFmode:
+      max_offset = (1 << 8);
+      break;
+      
+    default:
+      break;
+    }
+
+  return max_offset;
+}
+
+/* Return true if OP is a valid short EP memory reference */
+
+int
+ep_memory_operand (rtx op, enum machine_mode mode, int unsigned_load)
+{
+  rtx addr, op0, op1;
+  int max_offset;
+  int mask;
+
+  /* If we are not using the EP register on a per-function basis
+     then do not allow this optimization at all.  This is to
+     prevent the use of the SLD/SST instructions which cannot be
+     guaranteed to work properly due to a hardware bug.  */
+  if (!TARGET_EP)
+    return FALSE;
+
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  max_offset = ep_memory_offset (mode, unsigned_load);
+
+  mask = GET_MODE_SIZE (mode) - 1;
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      break;
+
+    case SYMBOL_REF:
+      return SYMBOL_REF_TDA_P (addr);
+
+    case REG:
+      return REGNO (addr) == EP_REGNUM;
+
+    case PLUS:
+      op0 = XEXP (addr, 0);
+      op1 = XEXP (addr, 1);
+      if (GET_CODE (op1) == CONST_INT
+	  && INTVAL (op1) < max_offset
+	  && INTVAL (op1) >= 0
+	  && (INTVAL (op1) & mask) == 0)
+	{
+	  if (GET_CODE (op0) == REG && REGNO (op0) == EP_REGNUM)
+	    return TRUE;
+
+	  if (GET_CODE (op0) == SYMBOL_REF && SYMBOL_REF_TDA_P (op0))
+	    return TRUE;
+	}
+      break;
+    }
+
+  return FALSE;
+}
+
+/* Substitute memory references involving a pointer, to use the ep pointer,
+   taking care to save and preserve the ep.  */
+
+static void
+substitute_ep_register (rtx first_insn,
+                        rtx last_insn,
+                        int uses,
+                        int regno,
+                        rtx * p_r1,
+                        rtx * p_ep)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  rtx insn;
+
+  if (!*p_r1)
+    {
+      df_set_regs_ever_live (1, true);
+      *p_r1 = gen_rtx_REG (Pmode, 1);
+      *p_ep = gen_rtx_REG (Pmode, 30);
+    }
+
+  if (TARGET_DEBUG)
+    fprintf (stderr, "\
+Saved %d bytes (%d uses of register %s) in function %s, starting as insn %d, ending at %d\n",
+	     2 * (uses - 3), uses, reg_names[regno],
+	     IDENTIFIER_POINTER (DECL_NAME (current_function_decl)),
+	     INSN_UID (first_insn), INSN_UID (last_insn));
+
+  if (GET_CODE (first_insn) == NOTE)
+    first_insn = next_nonnote_insn (first_insn);
+
+  last_insn = next_nonnote_insn (last_insn);
+  for (insn = first_insn; insn && insn != last_insn; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == INSN)
+	{
+	  rtx pattern = single_set (insn);
+
+	  /* Replace the memory references.  */
+	  if (pattern)
+	    {
+	      rtx *p_mem;
+	      /* Memory operands are signed by default.  */
+	      int unsignedp = FALSE;
+
+	      if (GET_CODE (SET_DEST (pattern)) == MEM
+		  && GET_CODE (SET_SRC (pattern)) == MEM)
+		p_mem = (rtx *)0;
+
+	      else if (GET_CODE (SET_DEST (pattern)) == MEM)
+		p_mem = &SET_DEST (pattern);
+
+	      else if (GET_CODE (SET_SRC (pattern)) == MEM)
+		p_mem = &SET_SRC (pattern);
+
+	      else if (GET_CODE (SET_SRC (pattern)) == SIGN_EXTEND
+		       && GET_CODE (XEXP (SET_SRC (pattern), 0)) == MEM)
+		p_mem = &XEXP (SET_SRC (pattern), 0);
+
+	      else if (GET_CODE (SET_SRC (pattern)) == ZERO_EXTEND
+		       && GET_CODE (XEXP (SET_SRC (pattern), 0)) == MEM)
+		{
+		  p_mem = &XEXP (SET_SRC (pattern), 0);
+		  unsignedp = TRUE;
+		}
+	      else
+		p_mem = (rtx *)0;
+
+	      if (p_mem)
+		{
+		  rtx addr = XEXP (*p_mem, 0);
+
+		  if (GET_CODE (addr) == REG && REGNO (addr) == (unsigned) regno)
+		    *p_mem = change_address (*p_mem, VOIDmode, *p_ep);
+
+		  else if (GET_CODE (addr) == PLUS
+			   && GET_CODE (XEXP (addr, 0)) == REG
+			   && REGNO (XEXP (addr, 0)) == (unsigned) regno
+			   && GET_CODE (XEXP (addr, 1)) == CONST_INT
+			   && ((INTVAL (XEXP (addr, 1)))
+			       < ep_memory_offset (GET_MODE (*p_mem),
+						   unsignedp))
+			   && ((INTVAL (XEXP (addr, 1))) >= 0))
+		    *p_mem = change_address (*p_mem, VOIDmode,
+					     gen_rtx_PLUS (Pmode,
+							   *p_ep,
+							   XEXP (addr, 1)));
+		}
+	    }
+	}
+    }
+
+  /* Optimize back to back cases of ep <- r1 & r1 <- ep.  */
+  insn = prev_nonnote_insn (first_insn);
+  if (insn && GET_CODE (insn) == INSN
+      && GET_CODE (PATTERN (insn)) == SET
+      && SET_DEST (PATTERN (insn)) == *p_ep
+      && SET_SRC (PATTERN (insn)) == *p_r1)
+    delete_insn (insn);
+  else
+    emit_insn_before (gen_rtx_SET (Pmode, *p_r1, *p_ep), first_insn);
+
+  emit_insn_before (gen_rtx_SET (Pmode, *p_ep, reg), first_insn);
+  emit_insn_before (gen_rtx_SET (Pmode, *p_ep, *p_r1), last_insn);
+}
+
+
+/* TARGET_MACHINE_DEPENDENT_REORG.  On the 850, we use it to implement
+   the -mep mode to copy heavily used pointers to ep to use the implicit
+   addressing.  */
+
+static void
+v850_reorg (void)
+{
+  struct
+  {
+    int uses;
+    rtx first_insn;
+    rtx last_insn;
+  }
+  regs[FIRST_PSEUDO_REGISTER];
+
+  int i;
+  int use_ep = FALSE;
+  rtx r1 = NULL_RTX;
+  rtx ep = NULL_RTX;
+  rtx insn;
+  rtx pattern;
+
+  /* If not ep mode, just return now.  */
+  if (!TARGET_EP)
+    return;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      regs[i].uses = 0;
+      regs[i].first_insn = NULL_RTX;
+      regs[i].last_insn = NULL_RTX;
+    }
+
+  for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+	{
+	  /* End of basic block */
+	default:
+	  if (!use_ep)
+	    {
+	      int max_uses = -1;
+	      int max_regno = -1;
+
+	      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+		{
+		  if (max_uses < regs[i].uses)
+		    {
+		      max_uses = regs[i].uses;
+		      max_regno = i;
+		    }
+		}
+
+	      if (max_uses > 3)
+		substitute_ep_register (regs[max_regno].first_insn,
+					regs[max_regno].last_insn,
+					max_uses, max_regno, &r1, &ep);
+	    }
+
+	  use_ep = FALSE;
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	    {
+	      regs[i].uses = 0;
+	      regs[i].first_insn = NULL_RTX;
+	      regs[i].last_insn = NULL_RTX;
+	    }
+	  break;
+
+	case NOTE:
+	  break;
+
+	case INSN:
+	  pattern = single_set (insn);
+
+	  /* See if there are any memory references we can shorten */
+	  if (pattern)
+	    {
+	      rtx src = SET_SRC (pattern);
+	      rtx dest = SET_DEST (pattern);
+	      rtx mem;
+	      /* Memory operands are signed by default.  */
+	      int unsignedp = FALSE;
+
+	      /* We might have (SUBREG (MEM)) here, so just get rid of the
+		 subregs to make this code simpler.  */
+	      if (GET_CODE (dest) == SUBREG
+		  && (GET_CODE (SUBREG_REG (dest)) == MEM
+		      || GET_CODE (SUBREG_REG (dest)) == REG))
+		alter_subreg (&dest);
+	      if (GET_CODE (src) == SUBREG
+		  && (GET_CODE (SUBREG_REG (src)) == MEM
+		      || GET_CODE (SUBREG_REG (src)) == REG))
+		alter_subreg (&src);
+
+	      if (GET_CODE (dest) == MEM && GET_CODE (src) == MEM)
+		mem = NULL_RTX;
+
+	      else if (GET_CODE (dest) == MEM)
+		mem = dest;
+
+	      else if (GET_CODE (src) == MEM)
+		mem = src;
+
+	      else if (GET_CODE (src) == SIGN_EXTEND
+		       && GET_CODE (XEXP (src, 0)) == MEM)
+		mem = XEXP (src, 0);
+
+	      else if (GET_CODE (src) == ZERO_EXTEND
+		       && GET_CODE (XEXP (src, 0)) == MEM)
+		{
+		  mem = XEXP (src, 0);
+		  unsignedp = TRUE;
+		}
+	      else
+		mem = NULL_RTX;
+
+	      if (mem && ep_memory_operand (mem, GET_MODE (mem), unsignedp))
+		use_ep = TRUE;
+
+	      else if (!use_ep && mem
+		       && GET_MODE_SIZE (GET_MODE (mem)) <= UNITS_PER_WORD)
+		{
+		  rtx addr = XEXP (mem, 0);
+		  int regno = -1;
+		  int short_p;
+
+		  if (GET_CODE (addr) == REG)
+		    {
+		      short_p = TRUE;
+		      regno = REGNO (addr);
+		    }
+
+		  else if (GET_CODE (addr) == PLUS
+			   && GET_CODE (XEXP (addr, 0)) == REG
+			   && GET_CODE (XEXP (addr, 1)) == CONST_INT
+			   && ((INTVAL (XEXP (addr, 1)))
+			       < ep_memory_offset (GET_MODE (mem), unsignedp))
+			   && ((INTVAL (XEXP (addr, 1))) >= 0))
+		    {
+		      short_p = TRUE;
+		      regno = REGNO (XEXP (addr, 0));
+		    }
+
+		  else
+		    short_p = FALSE;
+
+		  if (short_p)
+		    {
+		      regs[regno].uses++;
+		      regs[regno].last_insn = insn;
+		      if (!regs[regno].first_insn)
+			regs[regno].first_insn = insn;
+		    }
+		}
+
+	      /* Loading up a register in the basic block zaps any savings
+		 for the register */
+	      if (GET_CODE (dest) == REG)
+		{
+		  enum machine_mode mode = GET_MODE (dest);
+		  int regno;
+		  int endregno;
+
+		  regno = REGNO (dest);
+		  endregno = regno + HARD_REGNO_NREGS (regno, mode);
+
+		  if (!use_ep)
+		    {
+		      /* See if we can use the pointer before this
+			 modification.  */
+		      int max_uses = -1;
+		      int max_regno = -1;
+
+		      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+			{
+			  if (max_uses < regs[i].uses)
+			    {
+			      max_uses = regs[i].uses;
+			      max_regno = i;
+			    }
+			}
+
+		      if (max_uses > 3
+			  && max_regno >= regno
+			  && max_regno < endregno)
+			{
+			  substitute_ep_register (regs[max_regno].first_insn,
+						  regs[max_regno].last_insn,
+						  max_uses, max_regno, &r1,
+						  &ep);
+
+			  /* Since we made a substitution, zap all remembered
+			     registers.  */
+			  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+			    {
+			      regs[i].uses = 0;
+			      regs[i].first_insn = NULL_RTX;
+			      regs[i].last_insn = NULL_RTX;
+			    }
+			}
+		    }
+
+		  for (i = regno; i < endregno; i++)
+		    {
+		      regs[i].uses = 0;
+		      regs[i].first_insn = NULL_RTX;
+		      regs[i].last_insn = NULL_RTX;
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/* # of registers saved by the interrupt handler.  */
+#define INTERRUPT_FIXED_NUM 5
+
+/* # of bytes for registers saved by the interrupt handler.  */
+#define INTERRUPT_FIXED_SAVE_SIZE (4 * INTERRUPT_FIXED_NUM)
+
+/* # of words saved for other registers.  */
+#define INTERRUPT_ALL_SAVE_NUM \
+  (30 - INTERRUPT_FIXED_NUM)
+
+#define INTERRUPT_ALL_SAVE_SIZE (4 * INTERRUPT_ALL_SAVE_NUM)
+
+int
+compute_register_save_size (long * p_reg_saved)
+{
+  int size = 0;
+  int i;
+  int interrupt_handler = v850_interrupt_function_p (current_function_decl);
+  int call_p = df_regs_ever_live_p (LINK_POINTER_REGNUM);
+  long reg_saved = 0;
+
+  /* Count the return pointer if we need to save it.  */
+  if (crtl->profile && !call_p)
+    {
+      df_set_regs_ever_live (LINK_POINTER_REGNUM, true);
+      call_p = 1;
+    }
+ 
+  /* Count space for the register saves.  */
+  if (interrupt_handler)
+    {
+      for (i = 0; i <= 31; i++)
+	switch (i)
+	  {
+	  default:
+	    if (df_regs_ever_live_p (i) || call_p)
+	      {
+		size += 4;
+		reg_saved |= 1L << i;
+	      }
+	    break;
+
+	    /* We don't save/restore r0 or the stack pointer */
+	  case 0:
+	  case STACK_POINTER_REGNUM:
+	    break;
+
+	    /* For registers with fixed use, we save them, set them to the
+	       appropriate value, and then restore them.
+	       These registers are handled specially, so don't list them
+	       on the list of registers to save in the prologue.  */
+	  case 1:		/* temp used to hold ep */
+	  case 4:		/* gp */
+	  case 10:		/* temp used to call interrupt save/restore */
+	  case 11:		/* temp used to call interrupt save/restore (long call) */
+	  case EP_REGNUM:	/* ep */
+	    size += 4;
+	    break;
+	  }
+    }
+  else
+    {
+      /* Find the first register that needs to be saved.  */
+      for (i = 0; i <= 31; i++)
+	if (df_regs_ever_live_p (i) && ((! call_used_regs[i])
+				  || i == LINK_POINTER_REGNUM))
+	  break;
+
+      /* If it is possible that an out-of-line helper function might be
+	 used to generate the prologue for the current function, then we
+	 need to cover the possibility that such a helper function will
+	 be used, despite the fact that there might be gaps in the list of
+	 registers that need to be saved.  To detect this we note that the
+	 helper functions always push at least register r29 (provided
+	 that the function is not an interrupt handler).  */
+	 
+      if (TARGET_PROLOG_FUNCTION
+          && (i == 2 || ((i >= 20) && (i < 30))))
+	{
+	  if (i == 2)
+	    {
+	      size += 4;
+	      reg_saved |= 1L << i;
+
+	      i = 20;
+	    }
+
+	  /* Helper functions save all registers between the starting
+	     register and the last register, regardless of whether they
+	     are actually used by the function or not.  */
+	  for (; i <= 29; i++)
+	    {
+	      size += 4;
+	      reg_saved |= 1L << i;
+	    }
+
+	  if (df_regs_ever_live_p (LINK_POINTER_REGNUM))
+	    {
+	      size += 4;
+	      reg_saved |= 1L << LINK_POINTER_REGNUM;
+	    }
+	}
+      else
+	{
+	  for (; i <= 31; i++)
+	    if (df_regs_ever_live_p (i) && ((! call_used_regs[i])
+				      || i == LINK_POINTER_REGNUM))
+	      {
+		size += 4;
+		reg_saved |= 1L << i;
+	      }
+	}
+    }
+  
+  if (p_reg_saved)
+    *p_reg_saved = reg_saved;
+
+  return size;
+}
+
+int
+compute_frame_size (int size, long * p_reg_saved)
+{
+  return (size
+	  + compute_register_save_size (p_reg_saved)
+	  + crtl->outgoing_args_size);
+}
+
+static int
+use_prolog_function (int num_save, int frame_size)
+{
+  int alloc_stack = (4 * num_save);
+  int unalloc_stack = frame_size - alloc_stack;
+  int save_func_len, restore_func_len;
+  int save_normal_len, restore_normal_len;
+
+  if (! TARGET_DISABLE_CALLT)
+      save_func_len = restore_func_len = 2;
+  else
+      save_func_len = restore_func_len = TARGET_LONG_CALLS ? (4+4+4+2+2) : 4;
+
+  if (unalloc_stack)
+    {
+      save_func_len += CONST_OK_FOR_J (-unalloc_stack) ? 2 : 4;
+      restore_func_len += CONST_OK_FOR_J (-unalloc_stack) ? 2 : 4;
+    }
+
+  /* See if we would have used ep to save the stack.  */
+  if (TARGET_EP && num_save > 3 && (unsigned)frame_size < 255)
+    save_normal_len = restore_normal_len = (3 * 2) + (2 * num_save);
+  else
+    save_normal_len = restore_normal_len = 4 * num_save;
+
+  save_normal_len += CONST_OK_FOR_J (-frame_size) ? 2 : 4;
+  restore_normal_len += (CONST_OK_FOR_J (frame_size) ? 2 : 4) + 2;
+
+  /* Don't bother checking if we don't actually save any space.
+     This happens for instance if one register is saved and additional
+     stack space is allocated.  */
+  return ((save_func_len + restore_func_len) < (save_normal_len + restore_normal_len));
+}
+
+void
+expand_prologue (void)
+{
+  unsigned int i;
+  unsigned int size = get_frame_size ();
+  unsigned int actual_fsize;
+  unsigned int init_stack_alloc = 0;
+  rtx save_regs[32];
+  rtx save_all;
+  unsigned int num_save;
+  int code;
+  int interrupt_handler = v850_interrupt_function_p (current_function_decl);
+  long reg_saved = 0;
+
+  actual_fsize = compute_frame_size (size, &reg_saved);
+
+  /* Save/setup global registers for interrupt functions right now.  */
+  if (interrupt_handler)
+    {
+      if (! TARGET_DISABLE_CALLT && (TARGET_V850E || TARGET_V850E2_ALL))
+	emit_insn (gen_callt_save_interrupt ());
+      else
+	emit_insn (gen_save_interrupt ());
+
+      actual_fsize -= INTERRUPT_FIXED_SAVE_SIZE;
+      
+      if (((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	actual_fsize -= INTERRUPT_ALL_SAVE_SIZE;
+    }
+
+  /* Identify all of the saved registers.  */
+  num_save = 0;
+  for (i = 1; i < 32; i++)
+    {
+      if (((1L << i) & reg_saved) != 0)
+	save_regs[num_save++] = gen_rtx_REG (Pmode, i);
+    }
+
+  /* See if we have an insn that allocates stack space and saves the particular
+     registers we want to.  */
+  save_all = NULL_RTX;
+  if (TARGET_PROLOG_FUNCTION && num_save > 0)
+    {
+      if (use_prolog_function (num_save, actual_fsize))
+	{
+	  int alloc_stack = 4 * num_save;
+	  int offset = 0;
+
+	  save_all = gen_rtx_PARALLEL
+	    (VOIDmode,
+	     rtvec_alloc (num_save + 1
+			  + (TARGET_DISABLE_CALLT ? (TARGET_LONG_CALLS ? 2 : 1) : 0)));
+
+	  XVECEXP (save_all, 0, 0)
+	    = gen_rtx_SET (VOIDmode,
+			   stack_pointer_rtx,
+			   gen_rtx_PLUS (Pmode,
+					 stack_pointer_rtx,
+					 GEN_INT(-alloc_stack)));
+	  for (i = 0; i < num_save; i++)
+	    {
+	      offset -= 4;
+	      XVECEXP (save_all, 0, i+1)
+		= gen_rtx_SET (VOIDmode,
+			       gen_rtx_MEM (Pmode,
+					    gen_rtx_PLUS (Pmode,
+							  stack_pointer_rtx,
+							  GEN_INT(offset))),
+			       save_regs[i]);
+	    }
+
+	  if (TARGET_DISABLE_CALLT)
+	    {
+	      XVECEXP (save_all, 0, num_save + 1)
+		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 10));
+
+	      if (TARGET_LONG_CALLS)
+		XVECEXP (save_all, 0, num_save + 2)
+		  = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
+	    }
+
+	  code = recog (save_all, NULL_RTX, NULL);
+	  if (code >= 0)
+	    {
+	      rtx insn = emit_insn (save_all);
+	      INSN_CODE (insn) = code;
+	      actual_fsize -= alloc_stack;
+	      
+	    }
+	  else
+	    save_all = NULL_RTX;
+	}
+    }
+
+  /* If no prolog save function is available, store the registers the old
+     fashioned way (one by one).  */
+  if (!save_all)
+    {
+      /* Special case interrupt functions that save all registers for a call.  */
+      if (interrupt_handler && ((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	{
+	  if (! TARGET_DISABLE_CALLT && (TARGET_V850E || TARGET_V850E2_ALL))
+	    emit_insn (gen_callt_save_all_interrupt ());
+	  else
+	    emit_insn (gen_save_all_interrupt ());
+	}
+      else
+	{
+	  int offset;
+	  /* If the stack is too big, allocate it in chunks so we can do the
+	     register saves.  We use the register save size so we use the ep
+	     register.  */
+	  if (actual_fsize && !CONST_OK_FOR_K (-actual_fsize))
+	    init_stack_alloc = compute_register_save_size (NULL);
+	  else
+	    init_stack_alloc = actual_fsize;
+	      
+	  /* Save registers at the beginning of the stack frame.  */
+	  offset = init_stack_alloc - 4;
+	  
+	  if (init_stack_alloc)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   GEN_INT (- (signed) init_stack_alloc)));
+	  
+	  /* Save the return pointer first.  */
+	  if (num_save > 0 && REGNO (save_regs[num_save-1]) == LINK_POINTER_REGNUM)
+	    {
+	      emit_move_insn (gen_rtx_MEM (SImode,
+					   plus_constant (stack_pointer_rtx,
+							  offset)),
+			      save_regs[--num_save]);
+	      offset -= 4;
+	    }
+	  
+	  for (i = 0; i < num_save; i++)
+	    {
+	      emit_move_insn (gen_rtx_MEM (SImode,
+					   plus_constant (stack_pointer_rtx,
+							  offset)),
+			      save_regs[i]);
+	      offset -= 4;
+	    }
+	}
+    }
+
+  /* Allocate the rest of the stack that was not allocated above (either it is
+     > 32K or we just called a function to save the registers and needed more
+     stack.  */
+  if (actual_fsize > init_stack_alloc)
+    {
+      int diff = actual_fsize - init_stack_alloc;
+      if (CONST_OK_FOR_K (-diff))
+	emit_insn (gen_addsi3 (stack_pointer_rtx,
+			       stack_pointer_rtx,
+			       GEN_INT (-diff)));
+      else
+	{
+	  rtx reg = gen_rtx_REG (Pmode, 12);
+	  emit_move_insn (reg, GEN_INT (-diff));
+	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
+	}
+    }
+
+  /* If we need a frame pointer, set it up now.  */
+  if (frame_pointer_needed)
+    emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+}
+
+
+void
+expand_epilogue (void)
+{
+  unsigned int i;
+  unsigned int size = get_frame_size ();
+  long reg_saved = 0;
+  int actual_fsize = compute_frame_size (size, &reg_saved);
+  rtx restore_regs[32];
+  rtx restore_all;
+  unsigned int num_restore;
+  int code;
+  int interrupt_handler = v850_interrupt_function_p (current_function_decl);
+
+  /* Eliminate the initial stack stored by interrupt functions.  */
+  if (interrupt_handler)
+    {
+      actual_fsize -= INTERRUPT_FIXED_SAVE_SIZE;
+      if (((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	actual_fsize -= INTERRUPT_ALL_SAVE_SIZE;
+    }
+
+  /* Cut off any dynamic stack created.  */
+  if (frame_pointer_needed)
+    emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+
+  /* Identify all of the saved registers.  */
+  num_restore = 0;
+  for (i = 1; i < 32; i++)
+    {
+      if (((1L << i) & reg_saved) != 0)
+	restore_regs[num_restore++] = gen_rtx_REG (Pmode, i);
+    }
+
+  /* See if we have an insn that restores the particular registers we
+     want to.  */
+  restore_all = NULL_RTX;
+
+  if (TARGET_PROLOG_FUNCTION
+      && num_restore > 0
+      && !interrupt_handler)
+    {
+      int alloc_stack = (4 * num_restore);
+
+      /* Don't bother checking if we don't actually save any space.  */
+      if (use_prolog_function (num_restore, actual_fsize))
+	{
+	  int offset;
+	  restore_all = gen_rtx_PARALLEL (VOIDmode,
+					  rtvec_alloc (num_restore + 2));
+	  XVECEXP (restore_all, 0, 0) = gen_rtx_RETURN (VOIDmode);
+	  XVECEXP (restore_all, 0, 1)
+	    = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			    gen_rtx_PLUS (Pmode,
+					  stack_pointer_rtx,
+					  GEN_INT (alloc_stack)));
+
+	  offset = alloc_stack - 4;
+	  for (i = 0; i < num_restore; i++)
+	    {
+	      XVECEXP (restore_all, 0, i+2)
+		= gen_rtx_SET (VOIDmode,
+			       restore_regs[i],
+			       gen_rtx_MEM (Pmode,
+                                            gen_rtx_PLUS (Pmode,
+                                                          stack_pointer_rtx,
+                                                          GEN_INT(offset))));
+	      offset -= 4;
+	    }
+
+	  code = recog (restore_all, NULL_RTX, NULL);
+	  
+	  if (code >= 0)
+	    {
+	      rtx insn;
+
+	      actual_fsize -= alloc_stack;
+	      if (actual_fsize)
+		{
+		  if (CONST_OK_FOR_K (actual_fsize))
+		    emit_insn (gen_addsi3 (stack_pointer_rtx,
+					   stack_pointer_rtx,
+					   GEN_INT (actual_fsize)));
+		  else
+		    {
+		      rtx reg = gen_rtx_REG (Pmode, 12);
+		      emit_move_insn (reg, GEN_INT (actual_fsize));
+		      emit_insn (gen_addsi3 (stack_pointer_rtx,
+					     stack_pointer_rtx,
+					     reg));
+		    }
+		}
+
+	      insn = emit_jump_insn (restore_all);
+	      INSN_CODE (insn) = code;
+
+	    }
+	  else
+	    restore_all = NULL_RTX;
+	}
+    }
+
+  /* If no epilogue save function is available, restore the registers the
+     old fashioned way (one by one).  */
+  if (!restore_all)
+    {
+      unsigned int init_stack_free;
+
+      /* If the stack is large, we need to cut it down in 2 pieces.  */
+      if (interrupt_handler)
+       init_stack_free = 0;
+      else if (actual_fsize && !CONST_OK_FOR_K (-actual_fsize))
+	init_stack_free = 4 * num_restore;
+      else
+	init_stack_free = (signed) actual_fsize;
+
+      /* Deallocate the rest of the stack if it is > 32K.  */
+      if ((unsigned int) actual_fsize > init_stack_free)
+	{
+	  int diff;
+
+	  diff = actual_fsize - init_stack_free;
+
+	  if (CONST_OK_FOR_K (diff))
+	    emit_insn (gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   GEN_INT (diff)));
+	  else
+	    {
+	      rtx reg = gen_rtx_REG (Pmode, 12);
+	      emit_move_insn (reg, GEN_INT (diff));
+	      emit_insn (gen_addsi3 (stack_pointer_rtx,
+				     stack_pointer_rtx,
+				     reg));
+	    }
+	}
+
+      /* Special case interrupt functions that save all registers
+	 for a call.  */
+      if (interrupt_handler && ((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
+	{
+	  if (! TARGET_DISABLE_CALLT)
+	    emit_insn (gen_callt_restore_all_interrupt ());
+	  else
+	    emit_insn (gen_restore_all_interrupt ());
+	}
+      else
+	{
+	  /* Restore registers from the beginning of the stack frame.  */
+	  int offset = init_stack_free - 4;
+
+	  /* Restore the return pointer first.  */
+	  if (num_restore > 0
+	      && REGNO (restore_regs [num_restore - 1]) == LINK_POINTER_REGNUM)
+	    {
+	      emit_move_insn (restore_regs[--num_restore],
+			      gen_rtx_MEM (SImode,
+					   plus_constant (stack_pointer_rtx,
+							  offset)));
+	      offset -= 4;
+	    }
+
+	  for (i = 0; i < num_restore; i++)
+	    {
+	      emit_move_insn (restore_regs[i],
+			      gen_rtx_MEM (SImode,
+					   plus_constant (stack_pointer_rtx,
+							  offset)));
+
+	      emit_use (restore_regs[i]);
+	      offset -= 4;
+	    }
+
+	  /* Cut back the remainder of the stack.  */
+	  if (init_stack_free)
+	    emit_insn (gen_addsi3 (stack_pointer_rtx,
+				   stack_pointer_rtx,
+				   GEN_INT (init_stack_free)));
+	}
+
+      /* And return or use reti for interrupt handlers.  */
+      if (interrupt_handler)
+        {
+          if (! TARGET_DISABLE_CALLT && (TARGET_V850E || TARGET_V850E2_ALL))
+            emit_insn (gen_callt_return_interrupt ());
+          else
+            emit_jump_insn (gen_return_interrupt ());
+	 }
+      else if (actual_fsize)
+	emit_jump_insn (gen_return_internal ());
+      else
+	emit_jump_insn (gen_return_simple ());
+    }
+
+  v850_interrupt_cache_p = FALSE;
+  v850_interrupt_p = FALSE;
+}
+
+/* Update the condition code from the insn.  */
+void
+notice_update_cc (rtx body, rtx insn)
+{
+  switch (get_attr_cc (insn))
+    {
+    case CC_NONE:
+      /* Insn does not affect CC at all.  */
+      break;
+
+    case CC_NONE_0HIT:
+      /* Insn does not change CC, but the 0'th operand has been changed.  */
+      if (cc_status.value1 != 0
+	  && reg_overlap_mentioned_p (recog_data.operand[0], cc_status.value1))
+	cc_status.value1 = 0;
+      break;
+
+    case CC_SET_ZN:
+      /* Insn sets the Z,N flags of CC to recog_data.operand[0].
+	 V,C is in an unusable state.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
+      cc_status.value1 = recog_data.operand[0];
+      break;
+
+    case CC_SET_ZNV:
+      /* Insn sets the Z,N,V flags of CC to recog_data.operand[0].
+  	 C is in an unusable state.  */
+      CC_STATUS_INIT;
+      cc_status.flags |= CC_NO_CARRY;
+      cc_status.value1 = recog_data.operand[0];
+      break;
+
+    case CC_COMPARE:
+      /* The insn is a compare instruction.  */
+      CC_STATUS_INIT;
+      cc_status.value1 = SET_SRC (body);
+      break;
+
+    case CC_CLOBBER:
+      /* Insn doesn't leave CC in a usable state.  */
+      CC_STATUS_INIT;
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Retrieve the data area that has been chosen for the given decl.  */
+
+v850_data_area
+v850_get_data_area (tree decl)
+{
+  if (lookup_attribute ("sda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
+    return DATA_AREA_SDA;
+  
+  if (lookup_attribute ("tda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
+    return DATA_AREA_TDA;
+  
+  if (lookup_attribute ("zda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
+    return DATA_AREA_ZDA;
+
+  return DATA_AREA_NORMAL;
+}
+
+/* Store the indicated data area in the decl's attributes.  */
+
+static void
+v850_set_data_area (tree decl, v850_data_area data_area)
+{
+  tree name;
+  
+  switch (data_area)
+    {
+    case DATA_AREA_SDA: name = get_identifier ("sda"); break;
+    case DATA_AREA_TDA: name = get_identifier ("tda"); break;
+    case DATA_AREA_ZDA: name = get_identifier ("zda"); break;
+    default:
+      return;
+    }
+
+  DECL_ATTRIBUTES (decl) = tree_cons
+    (name, NULL, DECL_ATTRIBUTES (decl));
+}
+
+/* Handle an "interrupt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+v850_handle_interrupt_attribute (tree * node,
+                                 tree name,
+                                 tree args ATTRIBUTE_UNUSED,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool * no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle a "sda", "tda" or "zda" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+v850_handle_data_area_attribute (tree* node,
+                                 tree name,
+                                 tree args ATTRIBUTE_UNUSED,
+                                 int flags ATTRIBUTE_UNUSED,
+                                 bool * no_add_attrs)
+{
+  v850_data_area data_area;
+  v850_data_area area;
+  tree decl = *node;
+
+  /* Implement data area attribute.  */
+  if (is_attribute_p ("sda", name))
+    data_area = DATA_AREA_SDA;
+  else if (is_attribute_p ("tda", name))
+    data_area = DATA_AREA_TDA;
+  else if (is_attribute_p ("zda", name))
+    data_area = DATA_AREA_ZDA;
+  else
+    gcc_unreachable ();
+  
+  switch (TREE_CODE (decl))
+    {
+    case VAR_DECL:
+      if (current_function_decl != NULL_TREE)
+	{
+          error_at (DECL_SOURCE_LOCATION (decl),
+		    "data area attributes cannot be specified for "
+		    "local variables");
+	  *no_add_attrs = true;
+	}
+
+      /* Drop through.  */
+
+    case FUNCTION_DECL:
+      area = v850_get_data_area (decl);
+      if (area != DATA_AREA_NORMAL && data_area != area)
+	{
+	  error ("data area of %q+D conflicts with previous declaration",
+                 decl);
+	  *no_add_attrs = true;
+	}
+      break;
+      
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+
+/* Return nonzero if FUNC is an interrupt function as specified
+   by the "interrupt" attribute.  */
+
+int
+v850_interrupt_function_p (tree func)
+{
+  tree a;
+  int ret = 0;
+
+  if (v850_interrupt_cache_p)
+    return v850_interrupt_p;
+
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return 0;
+
+  a = lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (func));
+  if (a != NULL_TREE)
+    ret = 1;
+
+  else
+    {
+      a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func));
+      ret = a != NULL_TREE;
+    }
+
+  /* Its not safe to trust global variables until after function inlining has
+     been done.  */
+  if (reload_completed | reload_in_progress)
+    v850_interrupt_p = ret;
+
+  return ret;
+}
+
+
+static void
+v850_encode_data_area (tree decl, rtx symbol)
+{
+  int flags;
+
+  /* Map explicit sections into the appropriate attribute */
+  if (v850_get_data_area (decl) == DATA_AREA_NORMAL)
+    {
+      if (DECL_SECTION_NAME (decl))
+	{
+	  const char *name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+	  
+	  if (streq (name, ".zdata") || streq (name, ".zbss"))
+	    v850_set_data_area (decl, DATA_AREA_ZDA);
+
+	  else if (streq (name, ".sdata") || streq (name, ".sbss"))
+	    v850_set_data_area (decl, DATA_AREA_SDA);
+
+	  else if (streq (name, ".tdata"))
+	    v850_set_data_area (decl, DATA_AREA_TDA);
+	}
+
+      /* If no attribute, support -m{zda,sda,tda}=n */
+      else
+	{
+	  int size = int_size_in_bytes (TREE_TYPE (decl));
+	  if (size <= 0)
+	    ;
+
+	  else if (size <= small_memory [(int) SMALL_MEMORY_TDA].max)
+	    v850_set_data_area (decl, DATA_AREA_TDA);
+
+	  else if (size <= small_memory [(int) SMALL_MEMORY_SDA].max)
+	    v850_set_data_area (decl, DATA_AREA_SDA);
+
+	  else if (size <= small_memory [(int) SMALL_MEMORY_ZDA].max)
+	    v850_set_data_area (decl, DATA_AREA_ZDA);
+	}
+      
+      if (v850_get_data_area (decl) == DATA_AREA_NORMAL)
+	return;
+    }
+
+  flags = SYMBOL_REF_FLAGS (symbol);
+  switch (v850_get_data_area (decl))
+    {
+    case DATA_AREA_ZDA: flags |= SYMBOL_FLAG_ZDA; break;
+    case DATA_AREA_TDA: flags |= SYMBOL_FLAG_TDA; break;
+    case DATA_AREA_SDA: flags |= SYMBOL_FLAG_SDA; break;
+    default: gcc_unreachable ();
+    }
+  SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+static void
+v850_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == VAR_DECL
+      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+    v850_encode_data_area (decl, XEXP (rtl, 0));
+}
+
+/* Construct a JR instruction to a routine that will perform the equivalent of
+   the RTL passed in as an argument.  This RTL is a function epilogue that
+   pops registers off the stack and possibly releases some extra stack space
+   as well.  The code has already verified that the RTL matches these
+   requirements.  */
+
+char *
+construct_restore_jr (rtx op)
+{
+  int count = XVECLEN (op, 0);
+  int stack_bytes;
+  unsigned long int mask;
+  unsigned long int first;
+  unsigned long int last;
+  int i;
+  static char buff [100]; /* XXX */
+  
+  if (count <= 2)
+    {
+      error ("bogus JR construction: %d", count);
+      return NULL;
+    }
+
+  /* Work out how many bytes to pop off the stack before retrieving
+     registers.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 1)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1)) == CONST_INT);
+    
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1));
+
+  /* Each pop will remove 4 bytes from the stack....  */
+  stack_bytes -= (count - 2) * 4;
+
+  /* Make sure that the amount we are popping either 0 or 16 bytes.  */
+  if (stack_bytes != 0)
+    {
+      error ("bad amount of stack space removal: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  mask = 0;
+  for (i = 2; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_DEST (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_DEST (vector_element),
+					       SImode));
+      
+      mask |= 1 << REGNO (SET_DEST (vector_element));
+    }
+
+  /* Scan for the first register to pop.  */
+  for (first = 0; first < 32; first++)
+    {
+      if (mask & (1 << first))
+	break;
+    }
+
+  gcc_assert (first < 32);
+
+  /* Discover the last register to pop.  */
+  if (mask & (1 << LINK_POINTER_REGNUM))
+    {
+      last = LINK_POINTER_REGNUM;
+    }
+  else
+    {
+      gcc_assert (!stack_bytes);
+      gcc_assert (mask & (1 << 29));
+      
+      last = 29;
+    }
+
+  /* Note, it is possible to have gaps in the register mask.
+     We ignore this here, and generate a JR anyway.  We will
+     be popping more registers than is strictly necessary, but
+     it does save code space.  */
+  
+  if (TARGET_LONG_CALLS)
+    {
+      char name[40];
+      
+      if (first == last)
+	sprintf (name, "__return_%s", reg_names [first]);
+      else
+	sprintf (name, "__return_%s_%s", reg_names [first], reg_names [last]);
+      
+      sprintf (buff, "movhi hi(%s), r0, r6\n\tmovea lo(%s), r6, r6\n\tjmp r6",
+	       name, name);
+    }
+  else
+    {
+      if (first == last)
+	sprintf (buff, "jr __return_%s", reg_names [first]);
+      else
+	sprintf (buff, "jr __return_%s_%s", reg_names [first], reg_names [last]);
+    }
+  
+  return buff;
+}
+
+
+/* Construct a JARL instruction to a routine that will perform the equivalent
+   of the RTL passed as a parameter.  This RTL is a function prologue that
+   saves some of the registers r20 - r31 onto the stack, and possibly acquires
+   some stack space as well.  The code has already verified that the RTL
+   matches these requirements.  */
+char *
+construct_save_jarl (rtx op)
+{
+  int count = XVECLEN (op, 0);
+  int stack_bytes;
+  unsigned long int mask;
+  unsigned long int first;
+  unsigned long int last;
+  int i;
+  static char buff [100]; /* XXX */
+  
+  if (count <= (TARGET_LONG_CALLS ? 3 : 2)) 
+    {
+      error ("bogus JARL construction: %d", count);
+      return NULL;
+    }
+
+  /* Paranoia.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 0)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0)) == REG);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)) == CONST_INT);
+    
+  /* Work out how many bytes to push onto the stack after storing the
+     registers.  */
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1));
+
+  /* Each push will put 4 bytes from the stack....  */
+  stack_bytes += (count - (TARGET_LONG_CALLS ? 3 : 2)) * 4;
+
+  /* Make sure that the amount we are popping either 0 or 16 bytes.  */
+  if (stack_bytes != 0)
+    {
+      error ("bad amount of stack space removal: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  mask = 0;
+  for (i = 1; i < count - (TARGET_LONG_CALLS ? 2 : 1); i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_SRC (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_SRC (vector_element),
+					       SImode));
+      
+      mask |= 1 << REGNO (SET_SRC (vector_element));
+    }
+
+  /* Scan for the first register to push.  */  
+  for (first = 0; first < 32; first++)
+    {
+      if (mask & (1 << first))
+	break;
+    }
+
+  gcc_assert (first < 32);
+
+  /* Discover the last register to push.  */
+  if (mask & (1 << LINK_POINTER_REGNUM))
+    {
+      last = LINK_POINTER_REGNUM;
+    }
+  else
+    {
+      gcc_assert (!stack_bytes);
+      gcc_assert (mask & (1 << 29));
+      
+      last = 29;
+    }
+
+  /* Note, it is possible to have gaps in the register mask.
+     We ignore this here, and generate a JARL anyway.  We will
+     be pushing more registers than is strictly necessary, but
+     it does save code space.  */
+  
+  if (TARGET_LONG_CALLS)
+    {
+      char name[40];
+      
+      if (first == last)
+	sprintf (name, "__save_%s", reg_names [first]);
+      else
+	sprintf (name, "__save_%s_%s", reg_names [first], reg_names [last]);
+      
+      sprintf (buff, "movhi hi(%s), r0, r11\n\tmovea lo(%s), r11, r11\n\tjarl .+4, r10\n\tadd 4, r10\n\tjmp r11",
+	       name, name);
+    }
+  else
+    {
+      if (first == last)
+	sprintf (buff, "jarl __save_%s, r10", reg_names [first]);
+      else
+	sprintf (buff, "jarl __save_%s_%s, r10", reg_names [first],
+		 reg_names [last]);
+    }
+
+  return buff;
+}
+
+extern tree last_assemble_variable_decl;
+extern int size_directive_output;
+
+/* A version of asm_output_aligned_bss() that copes with the special
+   data areas of the v850.  */
+void
+v850_output_aligned_bss (FILE * file,
+                         tree decl,
+                         const char * name,
+                         unsigned HOST_WIDE_INT size,
+                         int align)
+{
+  switch (v850_get_data_area (decl))
+    {
+    case DATA_AREA_ZDA:
+      switch_to_section (zbss_section);
+      break;
+
+    case DATA_AREA_SDA:
+      switch_to_section (sbss_section);
+      break;
+
+    case DATA_AREA_TDA:
+      switch_to_section (tdata_section);
+      
+    default:
+      switch_to_section (bss_section);
+      break;
+    }
+  
+  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+  last_assemble_variable_decl = decl;
+  ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+  /* Standard thing is just output label for the object.  */
+  ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+  ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+
+/* Called via the macro ASM_OUTPUT_DECL_COMMON */
+void
+v850_output_common (FILE * file,
+                    tree decl,
+                    const char * name,
+                    int size,
+                    int align)
+{
+  if (decl == NULL_TREE)
+    {
+      fprintf (file, "%s", COMMON_ASM_OP);
+    }
+  else
+    {
+      switch (v850_get_data_area (decl))
+	{
+	case DATA_AREA_ZDA:
+	  fprintf (file, "%s", ZCOMMON_ASM_OP);
+	  break;
+
+	case DATA_AREA_SDA:
+	  fprintf (file, "%s", SCOMMON_ASM_OP);
+	  break;
+
+	case DATA_AREA_TDA:
+	  fprintf (file, "%s", TCOMMON_ASM_OP);
+	  break;
+      
+	default:
+	  fprintf (file, "%s", COMMON_ASM_OP);
+	  break;
+	}
+    }
+  
+  assemble_name (file, name);
+  fprintf (file, ",%u,%u\n", size, align / BITS_PER_UNIT);
+}
+
+/* Called via the macro ASM_OUTPUT_DECL_LOCAL */
+void
+v850_output_local (FILE * file,
+                   tree decl,
+                   const char * name,
+                   int size,
+                   int align)
+{
+  fprintf (file, "%s", LOCAL_ASM_OP);
+  assemble_name (file, name);
+  fprintf (file, "\n");
+  
+  ASM_OUTPUT_ALIGNED_DECL_COMMON (file, decl, name, size, align);
+}
+
+/* Add data area to the given declaration if a ghs data area pragma is
+   currently in effect (#pragma ghs startXXX/endXXX).  */
+static void
+v850_insert_attributes (tree decl, tree * attr_ptr ATTRIBUTE_UNUSED )
+{
+  if (data_area_stack
+      && data_area_stack->data_area
+      && current_function_decl == NULL_TREE
+      && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL)
+      && v850_get_data_area (decl) == DATA_AREA_NORMAL)
+    v850_set_data_area (decl, data_area_stack->data_area);
+
+  /* Initialize the default names of the v850 specific sections,
+     if this has not been done before.  */
+  
+  if (GHS_default_section_names [(int) GHS_SECTION_KIND_SDATA] == NULL)
+    {
+      GHS_default_section_names [(int) GHS_SECTION_KIND_SDATA]
+	= build_string (sizeof (".sdata")-1, ".sdata");
+
+      GHS_default_section_names [(int) GHS_SECTION_KIND_ROSDATA]
+	= build_string (sizeof (".rosdata")-1, ".rosdata");
+
+      GHS_default_section_names [(int) GHS_SECTION_KIND_TDATA]
+	= build_string (sizeof (".tdata")-1, ".tdata");
+      
+      GHS_default_section_names [(int) GHS_SECTION_KIND_ZDATA]
+	= build_string (sizeof (".zdata")-1, ".zdata");
+
+      GHS_default_section_names [(int) GHS_SECTION_KIND_ROZDATA]
+	= build_string (sizeof (".rozdata")-1, ".rozdata");
+    }
+  
+  if (current_function_decl == NULL_TREE
+      && (TREE_CODE (decl) == VAR_DECL
+	  || TREE_CODE (decl) == CONST_DECL
+	  || TREE_CODE (decl) == FUNCTION_DECL)
+      && (!DECL_EXTERNAL (decl) || DECL_INITIAL (decl))
+      && !DECL_SECTION_NAME (decl))
+    {
+      enum GHS_section_kind kind = GHS_SECTION_KIND_DEFAULT;
+      tree chosen_section;
+
+      if (TREE_CODE (decl) == FUNCTION_DECL)
+	kind = GHS_SECTION_KIND_TEXT;
+      else
+	{
+	  /* First choose a section kind based on the data area of the decl.  */
+	  switch (v850_get_data_area (decl))
+	    {
+	    default:
+	      gcc_unreachable ();
+	      
+	    case DATA_AREA_SDA:
+	      kind = ((TREE_READONLY (decl))
+		      ? GHS_SECTION_KIND_ROSDATA
+		      : GHS_SECTION_KIND_SDATA);
+	      break;
+	      
+	    case DATA_AREA_TDA:
+	      kind = GHS_SECTION_KIND_TDATA;
+	      break;
+	      
+	    case DATA_AREA_ZDA:
+	      kind = ((TREE_READONLY (decl))
+		      ? GHS_SECTION_KIND_ROZDATA
+		      : GHS_SECTION_KIND_ZDATA);
+	      break;
+	      
+	    case DATA_AREA_NORMAL:		 /* default data area */
+	      if (TREE_READONLY (decl))
+		kind = GHS_SECTION_KIND_RODATA;
+	      else if (DECL_INITIAL (decl))
+		kind = GHS_SECTION_KIND_DATA;
+	      else
+		kind = GHS_SECTION_KIND_BSS;
+	    }
+	}
+
+      /* Now, if the section kind has been explicitly renamed,
+         then attach a section attribute.  */
+      chosen_section = GHS_current_section_names [(int) kind];
+
+      /* Otherwise, if this kind of section needs an explicit section
+         attribute, then also attach one.  */
+      if (chosen_section == NULL)
+        chosen_section = GHS_default_section_names [(int) kind];
+
+      if (chosen_section)
+	{
+	  /* Only set the section name if specified by a pragma, because
+	     otherwise it will force those variables to get allocated storage
+	     in this module, rather than by the linker.  */
+	  DECL_SECTION_NAME (decl) = chosen_section;
+	}
+    }
+}
+
+/* Construct a DISPOSE instruction that is the equivalent of
+   the given RTX.  We have already verified that this should
+   be possible.  */
+
+char *
+construct_dispose_instruction (rtx op)
+{
+  int                count = XVECLEN (op, 0);
+  int                stack_bytes;
+  unsigned long int  mask;
+  int		     i;
+  static char        buff[ 100 ]; /* XXX */
+  int                use_callt = 0;
+  
+  if (count <= 2)
+    {
+      error ("bogus DISPOSE construction: %d", count);
+      return NULL;
+    }
+
+  /* Work out how many bytes to pop off the
+     stack before retrieving registers.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 1)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1)) == CONST_INT);
+    
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1));
+
+  /* Each pop will remove 4 bytes from the stack....  */
+  stack_bytes -= (count - 2) * 4;
+
+  /* Make sure that the amount we are popping
+     will fit into the DISPOSE instruction.  */
+  if (stack_bytes > 128)
+    {
+      error ("too much stack space to dispose of: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  mask = 0;
+
+  for (i = 2; i < count; i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_DEST (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_DEST (vector_element),
+					       SImode));
+
+      if (REGNO (SET_DEST (vector_element)) == 2)
+	use_callt = 1;
+      else
+        mask |= 1 << REGNO (SET_DEST (vector_element));
+    }
+
+  if (! TARGET_DISABLE_CALLT
+      && (use_callt || stack_bytes == 0))
+    {
+      if (use_callt)
+	{
+	  sprintf (buff, "callt ctoff(__callt_return_r2_r%d)", (mask & (1 << 31)) ? 31 : 29);
+	  return buff;
+	}
+      else
+	{
+	  for (i = 20; i < 32; i++)
+	    if (mask & (1 << i))
+	      break;
+	  
+	  if (i == 31)
+	    sprintf (buff, "callt ctoff(__callt_return_r31c)");
+	  else
+	    sprintf (buff, "callt ctoff(__callt_return_r%d_r%s)",
+		     i, (mask & (1 << 31)) ? "31c" : "29");
+	}
+    }
+  else
+    {
+      static char        regs [100]; /* XXX */
+      int                done_one;
+      
+      /* Generate the DISPOSE instruction.  Note we could just issue the
+	 bit mask as a number as the assembler can cope with this, but for
+	 the sake of our readers we turn it into a textual description.  */
+      regs[0] = 0;
+      done_one = 0;
+      
+      for (i = 20; i < 32; i++)
+	{
+	  if (mask & (1 << i))
+	    {
+	      int first;
+	      
+	      if (done_one)
+		strcat (regs, ", ");
+	      else
+		done_one = 1;
+	      
+	      first = i;
+	      strcat (regs, reg_names[ first ]);
+	      
+	      for (i++; i < 32; i++)
+		if ((mask & (1 << i)) == 0)
+		  break;
+	      
+	      if (i > first + 1)
+		{
+		  strcat (regs, " - ");
+		  strcat (regs, reg_names[ i - 1 ] );
+		}
+	    }
+	}
+      
+      sprintf (buff, "dispose %d {%s}, r31", stack_bytes / 4, regs);
+    }
+  
+  return buff;
+}
+
+/* Construct a PREPARE instruction that is the equivalent of
+   the given RTL.  We have already verified that this should
+   be possible.  */
+
+char *
+construct_prepare_instruction (rtx op)
+{
+  int                count;
+  int                stack_bytes;
+  unsigned long int  mask;
+  int		     i;
+  static char        buff[ 100 ]; /* XXX */
+  int		     use_callt = 0;
+  
+  if (XVECLEN (op, 0) <= 1)
+    {
+      error ("bogus PREPEARE construction: %d", XVECLEN (op, 0));
+      return NULL;
+    }
+
+  /* Work out how many bytes to push onto
+     the stack after storing the registers.  */
+  gcc_assert (GET_CODE (XVECEXP (op, 0, 0)) == SET);
+  gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS);
+  gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)) == CONST_INT);
+    
+  stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1));
+
+
+  /* Make sure that the amount we are popping
+     will fit into the DISPOSE instruction.  */
+  if (stack_bytes < -128)
+    {
+      error ("too much stack space to prepare: %d", stack_bytes);
+      return NULL;
+    }
+
+  /* Now compute the bit mask of registers to push.  */
+  count = 0;
+  mask = 0;
+  for (i = 1; i < XVECLEN (op, 0); i++)
+    {
+      rtx vector_element = XVECEXP (op, 0, i);
+      
+      if (GET_CODE (vector_element) == CLOBBER)
+	continue;
+      
+      gcc_assert (GET_CODE (vector_element) == SET);
+      gcc_assert (GET_CODE (SET_SRC (vector_element)) == REG);
+      gcc_assert (register_is_ok_for_epilogue (SET_SRC (vector_element),
+					       SImode));
+
+      if (REGNO (SET_SRC (vector_element)) == 2)
+	use_callt = 1;
+      else
+	mask |= 1 << REGNO (SET_SRC (vector_element));
+      count++;
+    }
+
+  stack_bytes += count * 4;
+
+  if ((! TARGET_DISABLE_CALLT)
+      && (use_callt || stack_bytes == 0))
+    {
+      if (use_callt)
+	{
+	  sprintf (buff, "callt ctoff(__callt_save_r2_r%d)", (mask & (1 << 31)) ? 31 : 29 );
+	  return buff;
+	}
+      
+      for (i = 20; i < 32; i++)
+	if (mask & (1 << i))
+	  break;
+
+      if (i == 31)
+	sprintf (buff, "callt ctoff(__callt_save_r31c)");
+      else
+	sprintf (buff, "callt ctoff(__callt_save_r%d_r%s)",
+		 i, (mask & (1 << 31)) ? "31c" : "29");
+    }
+  else
+    {
+      static char        regs [100]; /* XXX */
+      int                done_one;
+
+      
+      /* Generate the PREPARE instruction.  Note we could just issue the
+	 bit mask as a number as the assembler can cope with this, but for
+	 the sake of our readers we turn it into a textual description.  */      
+      regs[0] = 0;
+      done_one = 0;
+      
+      for (i = 20; i < 32; i++)
+	{
+	  if (mask & (1 << i))
+	    {
+	      int first;
+	      
+	      if (done_one)
+		strcat (regs, ", ");
+	      else
+		done_one = 1;
+	      
+	      first = i;
+	      strcat (regs, reg_names[ first ]);
+	      
+	      for (i++; i < 32; i++)
+		if ((mask & (1 << i)) == 0)
+		  break;
+	      
+	      if (i > first + 1)
+		{
+		  strcat (regs, " - ");
+		  strcat (regs, reg_names[ i - 1 ] );
+		}
+	    }
+	}
+      	 
+      sprintf (buff, "prepare {%s}, %d", regs, (- stack_bytes) / 4);
+    }
+  
+  return buff;
+}
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+
+rtx
+v850_return_addr (int count)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, LINK_POINTER_REGNUM);
+}
+
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+v850_asm_init_sections (void)
+{
+  rosdata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .rosdata,\"a\"");
+
+  rozdata_section
+    = get_unnamed_section (0, output_section_asm_op,
+			   "\t.section .rozdata,\"a\"");
+
+  tdata_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .tdata,\"aw\"");
+
+  zdata_section
+    = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
+			   "\t.section .zdata,\"aw\"");
+
+  zbss_section
+    = get_unnamed_section (SECTION_WRITE | SECTION_BSS,
+			   output_section_asm_op,
+			   "\t.section .zbss,\"aw\"");
+}
+
+static section *
+v850_select_section (tree exp,
+                     int reloc ATTRIBUTE_UNUSED,
+                     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  if (TREE_CODE (exp) == VAR_DECL)
+    {
+      int is_const;
+      if (!TREE_READONLY (exp)
+	  || TREE_SIDE_EFFECTS (exp)
+	  || !DECL_INITIAL (exp)
+	  || (DECL_INITIAL (exp) != error_mark_node
+	      && !TREE_CONSTANT (DECL_INITIAL (exp))))
+        is_const = FALSE;
+      else
+        is_const = TRUE;
+
+      switch (v850_get_data_area (exp))
+        {
+        case DATA_AREA_ZDA:
+	  return is_const ? rozdata_section : zdata_section;
+
+        case DATA_AREA_TDA:
+	  return tdata_section;
+
+        case DATA_AREA_SDA:
+	  return is_const ? rosdata_section : sdata_section;
+
+        default:
+	  return is_const ? readonly_data_section : data_section;
+        }
+    }
+  return readonly_data_section;
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+v850_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 10);
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+v850_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  /* Return values > 8 bytes in length in memory.  */
+  return int_size_in_bytes (type) > 8 || TYPE_MODE (type) == BLKmode;
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+static rtx
+v850_function_value (const_tree valtype, 
+                    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+                    bool outgoing ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (TYPE_MODE (valtype), 10);
+}
+
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+v850_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
+			     enum machine_mode mode ATTRIBUTE_UNUSED,
+			     tree type ATTRIBUTE_UNUSED,
+			     int *pretend_arg_size ATTRIBUTE_UNUSED,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  ca->anonymous_args = (!TARGET_GHS ? 1 : 0);
+}
+
+/* Worker function for TARGET_CAN_ELIMINATE.  */
+
+static bool
+v850_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
+}
+
+/* Worker function for TARGET_CONDITIONAL_REGISTER_USAGE.
+
+   If TARGET_APP_REGS is not defined then add r2 and r5 to
+   the pool of fixed registers. See PR 14505.  */
+
+static void
+v850_conditional_register_usage (void)
+{
+  if (TARGET_APP_REGS)
+    {
+     fixed_regs[2] = 0;  call_used_regs[2] = 0;
+     fixed_regs[5] = 0;  call_used_regs[5] = 1;
+    }
+}
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
+
+static void
+v850_asm_trampoline_template (FILE *f)
+{
+  fprintf (f, "\tjarl .+4,r12\n");
+  fprintf (f, "\tld.w 12[r12],r20\n");
+  fprintf (f, "\tld.w 16[r12],r12\n");
+  fprintf (f, "\tjmp [r12]\n");
+  fprintf (f, "\tnop\n");
+  fprintf (f, "\t.long 0\n");
+  fprintf (f, "\t.long 0\n");
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+v850_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx mem, fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, 16);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, 20);
+  emit_move_insn (mem, fnaddr);
+}
+
+static int
+v850_issue_rate (void)
+{
+  return (TARGET_V850E2_ALL? 2 : 1);
+}
+
+/* V850 specific attributes.  */
+
+static const struct attribute_spec v850_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "interrupt_handler", 0, 0, true,  false, false, v850_handle_interrupt_attribute },
+  { "interrupt",         0, 0, true,  false, false, v850_handle_interrupt_attribute },
+  { "sda",               0, 0, true,  false, false, v850_handle_data_area_attribute },
+  { "tda",               0, 0, true,  false, false, v850_handle_data_area_attribute },
+  { "zda",               0, 0, true,  false, false, v850_handle_data_area_attribute },
+  { NULL,                0, 0, false, false, false, NULL }
+};
+
+/* Initialize the GCC target structure.  */
+#undef  TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
+
+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND v850_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS v850_print_operand_address
+#undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P v850_print_operand_punct_valid_p
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE v850_attribute_table
+
+#undef  TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES v850_insert_attributes
+
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  v850_select_section
+
+/* The assembler supports switchable .bss sections, but
+   v850_select_section doesn't yet make use of them.  */
+#undef  TARGET_HAVE_SWITCHABLE_BSS_SECTIONS
+#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS false
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO v850_encode_section_info
+
+#undef  TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef  TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (MASK_DEFAULT | MASK_APP_REGS)
+#undef  TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION v850_handle_option
+
+#undef  TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS v850_rtx_costs
+
+#undef  TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG v850_reorg
+
+#undef  TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE v850_issue_rate
+
+#undef  TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P v850_function_value_regno_p
+#undef  TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE v850_function_value
+
+#undef  TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef  TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY v850_return_in_memory
+
+#undef  TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE v850_pass_by_reference
+
+#undef  TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
+
+#undef  TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS v850_setup_incoming_varargs
+
+#undef  TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES v850_arg_partial_bytes
+
+#undef  TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG v850_function_arg
+
+#undef  TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE v850_function_arg_advance
+
+#undef  TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE v850_can_eliminate
+
+#undef  TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE v850_conditional_register_usage
+
+#undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE v850_asm_trampoline_template
+#undef  TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT v850_trampoline_init
+
+#undef  TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING v850_strict_argument_naming
+
+#undef  TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE v850_option_optimization_table
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-v850.h"
diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h
new file mode 100644
index 000000000..892a34c63
--- /dev/null
+++ b/gcc/config/v850/v850.h
@@ -0,0 +1,987 @@
+/* Definitions of target machine for GNU compiler. NEC V850 series
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+   2007, 2008, 2009, 2010  Free Software Foundation, Inc.
+   Contributed by Jeff Law (law@cygnus.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_V850_H
+#define GCC_V850_H
+
+extern GTY(()) rtx v850_compare_op0;
+extern GTY(()) rtx v850_compare_op1;
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:--start-group -lc -lgcc --end-group}}"
+
+#undef ENDFILE_SPEC
+#undef LINK_SPEC
+#undef STARTFILE_SPEC
+#undef ASM_SPEC
+
+#define TARGET_CPU_generic 	1
+#define TARGET_CPU_v850e   	2
+#define TARGET_CPU_v850e1	3
+#define TARGET_CPU_v850e2	4
+#define TARGET_CPU_v850e2v3	5
+
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT	TARGET_CPU_generic
+#endif
+
+#define MASK_DEFAULT            MASK_V850
+#define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850}"
+#define SUBTARGET_CPP_SPEC 	"%{!mv*:-D__v850__}"
+#define TARGET_VERSION 		fprintf (stderr, " (NEC V850)");
+
+/* Choose which processor will be the default.
+   We must pass a -mv850xx option to the assembler if no explicit -mv* option
+   is given, because the assembler's processor default may not be correct.  */
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850e}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC 	"%{!mv*:-D__v850e__}"
+#undef  TARGET_VERSION
+#define TARGET_VERSION 		fprintf (stderr, " (NEC V850E)");
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e1
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E     /* No practical difference.  */     
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC	"%{!mv*:-mv850e1}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC	"%{!mv*:-D__v850e1__} %{mv850e1:-D__v850e1__}"
+#undef  TARGET_VERSION
+#define TARGET_VERSION		fprintf (stderr, " (NEC V850E1)");
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e2
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E2	
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850e2}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC 	"%{!mv*:-D__v850e2__} %{mv850e2:-D__v850e2__}"
+#undef  TARGET_VERSION
+#define TARGET_VERSION 		fprintf (stderr, " (NEC V850E2)");
+#endif
+
+#if TARGET_CPU_DEFAULT == TARGET_CPU_v850e2v3
+#undef  MASK_DEFAULT
+#define MASK_DEFAULT            MASK_V850E2V3
+#undef  SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC	"%{!mv*:-mv850e2v3}"
+#undef  SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC	"%{!mv*:-D__v850e2v3__} %{mv850e2v3:-D__v850e2v3__}"
+#undef  TARGET_VERSION
+#define TARGET_VERSION		fprintf (stderr, " (NEC V850E2V3)");
+#endif
+
+#define TARGET_V850E2_ALL      (TARGET_V850E2 || TARGET_V850E2V3) 
+
+#define ASM_SPEC "%{mv850es:-mv850e1}%{!mv850es:%{mv*:-mv%*}}"
+#define CPP_SPEC "\
+  %{mv850e2v3:-D__v850e2v3__} \
+  %{mv850e2:-D__v850e2__} \
+  %{mv850es:-D__v850e1__} \
+  %{mv850e1:-D__v850e1__} \
+  %{mv850:-D__v850__} \
+  %(subtarget_cpp_spec)" \
+  " %{mep:-D__EP__}"
+
+#define EXTRA_SPECS \
+ { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \
+ { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC } 
+
+/* Names to predefine in the preprocessor for this target machine.  */
+#define TARGET_CPU_CPP_BUILTINS() do {		\
+  builtin_define( "__v851__" );                        \
+  builtin_define( "__v850" );			\
+  builtin_assert( "machine=v850" );		\
+  builtin_assert( "cpu=v850" );			\
+  if (TARGET_EP)				\
+    builtin_define ("__EP__");			\
+} while(0)
+
+#define MASK_CPU (MASK_V850 | MASK_V850E)
+
+/* Information about the various small memory areas.  */
+struct small_memory_info {
+  const char *name;
+  long max;
+  long physical_max;
+};
+
+enum small_memory_type {
+  /* tiny data area, using EP as base register */
+  SMALL_MEMORY_TDA = 0,
+  /* small data area using dp as base register */
+  SMALL_MEMORY_SDA,
+  /* zero data area using r0 as base register */
+  SMALL_MEMORY_ZDA,
+  SMALL_MEMORY_max
+};
+
+extern struct small_memory_info small_memory[(int)SMALL_MEMORY_max];
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the NEC V850.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* This is not true on the NEC V850.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+   This is not true on the NEC V850.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD		4
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   Some simple experiments have shown that leaving UNSIGNEDP alone
+   generates the best overall code.  */
+
+#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  \
+  if (GET_MODE_CLASS (MODE) == MODE_INT \
+      && GET_MODE_SIZE (MODE) < 4)      \
+    { (MODE) = SImode; }
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY		32
+
+/* The stack goes in 32-bit lumps.  */
+#define STACK_BOUNDARY 		32
+
+/* Allocation boundary (in *bits*) for the code of a function.
+   16 is the minimum boundary; 32 would give better performance.  */
+#define FUNCTION_BOUNDARY 16
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT	32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* No structure field wants to be aligned rounder than this.  */
+#define BIGGEST_FIELD_ALIGNMENT 32
+
+/* Define this if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT  (!TARGET_NO_STRICT_ALIGN)
+
+/* Define this as 1 if `char' should by default be signed; else as 0.
+
+   On the NEC V850, loads do sign extension, so make this default.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS \
+  { 1, 1, 1, 1, 1, 1, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 1, 0, \
+    1, 1,	\
+    1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you
+   like.  */
+
+#define CALL_USED_REGISTERS \
+  { 1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 1, 1, 1, 1, \
+    1, 1, 1, 1, 0, 0, 0, 0, \
+    0, 0, 0, 0, 0, 0, 1, 1, \
+    1, 1,	\
+    1, 1}
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.
+
+   On the 850, we make the return registers first, then all of the volatile
+   registers, then the saved registers in reverse order to better save the
+   registers with an out of line function, and finally the fixed
+   registers.  */
+
+#define REG_ALLOC_ORDER							\
+{									\
+  10, 11,				/* return registers */		\
+  12, 13, 14, 15, 16, 17, 18, 19,	/* scratch registers */		\
+   6,  7,  8,  9, 31,			/* argument registers */	\
+  29, 28, 27, 26, 25, 24, 23, 22,	/* saved registers */		\
+  21, 20,  2,								\
+   0,  1,  3,  4,  5, 30, 32, 33,      /* fixed registers */           \
+  34, 35								\
+}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.  */
+
+#define HARD_REGNO_NREGS(REGNO, MODE)   \
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ ((GET_MODE_SIZE (MODE) <= 4) || (((REGNO) & 1) == 0 && (REGNO) != 0))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  (MODE1 == MODE2 || (GET_MODE_SIZE (MODE1) <= 4 && GET_MODE_SIZE (MODE2) <= 4))
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+   
+enum reg_class
+{
+  NO_REGS, GENERAL_REGS, EVEN_REGS, ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define IRA_COVER_CLASSES		\
+{					\
+  GENERAL_REGS, LIM_REG_CLASSES		\
+}
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", "GENERAL_REGS", "EVEN_REGS", "ALL_REGS", "LIM_REGS" }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS                     \
+{                                              \
+  { 0x00000000,0x0 }, /* NO_REGS      */       \
+  { 0xffffffff,0x0 }, /* GENERAL_REGS */       \
+  { 0x55555554,0x0 }, /* EVEN_REGS */          \
+  { 0xffffffff,0x0 }, /* ALL_REGS      */      \
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)  ((REGNO == CC_REGNUM || REGNO == FCC_REGNUM) ? NO_REGS : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS  GENERAL_REGS
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+ 
+#define REGNO_OK_FOR_BASE_P(regno)             \
+  (((regno) < FIRST_PSEUDO_REGISTER            \
+    && (regno) != CC_REGNUM                    \
+    && (regno) != FCC_REGNUM)                  \
+   || reg_renumber[regno] >= 0)
+
+#define REGNO_OK_FOR_INDEX_P(regno) 0
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Convenience wrappers around insn_const_int_ok_for_constraint.  */
+
+#define CONST_OK_FOR_I(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_I)
+#define CONST_OK_FOR_J(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_J)
+#define CONST_OK_FOR_K(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_K)
+#define CONST_OK_FOR_L(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_L)
+#define CONST_OK_FOR_M(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_M)
+#define CONST_OK_FOR_N(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_N)
+#define CONST_OK_FOR_O(VALUE) \
+  insn_const_int_ok_for_constraint (VALUE, CONSTRAINT_O)
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+
+#define STARTING_FRAME_OFFSET 0
+
+/* Offset of first parameter from the argument pointer register value.  */
+/* Is equal to the size of the saved fp + pc, even if an fp isn't
+   saved since the value is used before we know.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM SP_REGNUM
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM 34
+
+/* Register containing return address from latest function call.  */
+#define LINK_POINTER_REGNUM LP_REGNUM
+     
+/* On some machines the offset between the frame pointer and starting
+   offset of the automatic variables is not known until after register
+   allocation has been done (for example, because the saved registers
+   are between these two locations).  On those machines, define
+   `FRAME_POINTER_REGNUM' the number of a special, fixed register to
+   be used internally until the offset is known, and define
+   `HARD_FRAME_POINTER_REGNUM' to be actual the hard register number
+   used for the frame pointer.
+
+   You should define this macro only in the very rare circumstances
+   when it is not possible to calculate the offset between the frame
+   pointer and the automatic variables until after register
+   allocation has been completed.  When this macro is defined, you
+   must also indicate in your definition of `ELIMINABLE_REGS' how to
+   eliminate `FRAME_POINTER_REGNUM' into either
+   `HARD_FRAME_POINTER_REGNUM' or `STACK_POINTER_REGNUM'.
+
+   Do not define this macro if it would be the same as
+   `FRAME_POINTER_REGNUM'.  */
+#undef  HARD_FRAME_POINTER_REGNUM 
+#define HARD_FRAME_POINTER_REGNUM 29
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 35
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM 20
+
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  If
+   it is not defined, the only elimination attempted by the compiler
+   is to replace references to the frame pointer with references to
+   the stack pointer.
+
+   The definition of this macro is a list of structure
+   initializations, each of which specifies an original and
+   replacement register.
+
+   On some machines, the position of the argument pointer is not
+   known until the compilation is completed.  In such a case, a
+   separate hard register must be used for the argument pointer.
+   This register can be eliminated by replacing it with either the
+   frame pointer or the argument pointer, depending on whether or not
+   the frame pointer has been eliminated.
+
+   In this case, you might specify:
+        #define ELIMINABLE_REGS  \
+        {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+         {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \
+         {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+   Note that the elimination of the argument pointer with the stack
+   pointer is specified first since that is the preferred elimination.  */
+
+#define ELIMINABLE_REGS							\
+{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM },			\
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }}			\
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers.  This macro must be defined if `ELIMINABLE_REGS' is
+   defined.  */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+{									\
+  if ((FROM) == FRAME_POINTER_REGNUM)					\
+    (OFFSET) = get_frame_size () + crtl->outgoing_args_size;	\
+  else if ((FROM) == ARG_POINTER_REGNUM)				\
+   (OFFSET) = compute_frame_size (get_frame_size (), (long *)0);	\
+  else									\
+    gcc_unreachable ();							\
+}
+
+/* Keep the stack pointer constant throughout the function.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define RETURN_ADDR_RTX(COUNT, FP) v850_return_addr (COUNT)
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.  */
+
+#define CUMULATIVE_ARGS struct cum_arg
+struct cum_arg { int nbytes; int anonymous_args; };
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM).nbytes = 0, (CUM).anonymous_args = 0)
+
+/* When a parameter is passed in a register, stack space is still
+   allocated for it.  */
+#define REG_PARM_STACK_SPACE(DECL) 0
+
+/* 1 if N is a possible register number for function argument passing.  */
+
+#define FUNCTION_ARG_REGNO_P(N) (N >= 6 && N <= 9)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+#define LIBCALL_VALUE(MODE) \
+  gen_rtx_REG (MODE, 10)
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define this macro as a C expression that is nonzero for registers
+   used by the epilogue or the `return' pattern.  */
+
+#define EPILOGUE_USES(REGNO) \
+  (reload_completed && (REGNO) == LINK_POINTER_REGNUM)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) ;
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE 24
+
+/* Addressing modes, and classification of registers for them.  */
+
+
+/* 1 if X is an rtx for a constant that is a valid address.  */
+
+/* ??? This seems too exclusive.  May get better code by accepting more
+   possibilities here, in particular, should accept ZDA_NAME SYMBOL_REFs.  */
+
+#define CONSTANT_ADDRESS_P(X) constraint_satisfied_p (X, CONSTRAINT_K)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 1
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) 1
+#define REG_OK_FOR_INDEX_P_STRICT(X) 0
+#define REG_OK_FOR_BASE_P_STRICT(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#define STRICT 0
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+#define STRICT 1
+
+#endif
+
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+
+   The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS,
+   except for CONSTANT_ADDRESS_P which is actually
+   machine-independent.  */
+
+/* Accept either REG or SUBREG where a register is valid.  */
+  
+#define RTX_OK_FOR_BASE_P(X)						\
+  ((REG_P (X) && REG_OK_FOR_BASE_P (X))					\
+   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))			\
+       && REG_OK_FOR_BASE_P (SUBREG_REG (X))))
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)				\
+do {									\
+  if (RTX_OK_FOR_BASE_P (X)) 						\
+    goto ADDR;								\
+  if (CONSTANT_ADDRESS_P (X)						\
+      && (MODE == QImode || INTVAL (X) % 2 == 0)			\
+      && (GET_MODE_SIZE (MODE) <= 4 || INTVAL (X) % 4 == 0))		\
+    goto ADDR;								\
+  if (GET_CODE (X) == LO_SUM						\
+      && REG_P (XEXP (X, 0))						\
+      && REG_OK_FOR_BASE_P (XEXP (X, 0))				\
+      && CONSTANT_P (XEXP (X, 1))					\
+      && (GET_CODE (XEXP (X, 1)) != CONST_INT				\
+	  || ((MODE == QImode || INTVAL (XEXP (X, 1)) % 2 == 0)		\
+	      && CONST_OK_FOR_K (INTVAL (XEXP (X, 1)))))		\
+      && GET_MODE_SIZE (MODE) <= GET_MODE_SIZE (word_mode))		\
+    goto ADDR;								\
+  if (special_symbolref_operand (X, MODE)				\
+      && (GET_MODE_SIZE (MODE) <= GET_MODE_SIZE (word_mode)))		\
+     goto ADDR;								\
+  if (GET_CODE (X) == PLUS						\
+      && RTX_OK_FOR_BASE_P (XEXP (X, 0)) 				\
+      && constraint_satisfied_p (XEXP (X,1), CONSTRAINT_K)		\
+      && ((MODE == QImode || INTVAL (XEXP (X, 1)) % 2 == 0)		\
+	   && CONST_OK_FOR_K (INTVAL (XEXP (X, 1)) 			\
+                              + (GET_MODE_NUNITS (MODE) * UNITS_PER_WORD)))) \
+    goto ADDR;			\
+} while (0)
+
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_CONSTANT_P(X)					\
+  (GET_CODE (X) == CONST_DOUBLE						\
+   || !(GET_CODE (X) == CONST						\
+	&& GET_CODE (XEXP (X, 0)) == PLUS				\
+	&& GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF		\
+	&& GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT		\
+	&& ! CONST_OK_FOR_K (INTVAL (XEXP (XEXP (X, 0), 1)))))
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+   return the mode to be used for the comparison.
+
+   For floating-point equality comparisons, CCFPEQmode should be used.
+   VOIDmode should be used in all other cases.
+
+   For integer comparisons against zero, reduce to CCNOmode or CCZmode if
+   possible, to allow for more combinations.  */
+
+#define SELECT_CC_MODE(OP, X, Y)       v850_select_cc_mode (OP, X, Y)
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  No extra ones are needed for the VAX.  */
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define CC_OVERFLOW_UNUSABLE 0x200
+#define CC_NO_CARRY CC_NO_OVERFLOW
+#define NOTICE_UPDATE_CC(EXP, INSN) notice_update_cc(EXP, INSN)
+
+/* Nonzero if access to memory by bytes or half words is no faster
+   than accessing full words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* According expr.c, a value of around 6 should minimize code size, and
+   for the V850 series, that's our primary concern.  */
+#define MOVE_RATIO(speed) 6
+
+/* Indirect calls are expensive, never turn a direct call
+   into an indirect call.  */
+#define NO_FUNCTION_CSE
+
+/* The four different data regions on the v850.  */
+typedef enum 
+{
+  DATA_AREA_NORMAL,
+  DATA_AREA_SDA,
+  DATA_AREA_TDA,
+  DATA_AREA_ZDA
+} v850_data_area;
+
+#define TEXT_SECTION_ASM_OP  "\t.section .text"
+#define DATA_SECTION_ASM_OP  "\t.section .data"
+#define BSS_SECTION_ASM_OP   "\t.section .bss"
+#define SDATA_SECTION_ASM_OP "\t.section .sdata,\"aw\""
+#define SBSS_SECTION_ASM_OP  "\t.section .sbss,\"aw\""
+
+#define SCOMMON_ASM_OP 	       "\t.scomm\t"
+#define ZCOMMON_ASM_OP 	       "\t.zcomm\t"
+#define TCOMMON_ASM_OP 	       "\t.tcomm\t"
+
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef  USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL)  \
+  if (! v850_output_addr_const_extra (FILE, X)) \
+     goto FAIL
+
+/* This says how to output the assembler to define a global
+   uninitialized but not common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+#undef  ASM_OUTPUT_ALIGNED_BSS 
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  v850_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This says how to output the assembler to define a global
+   uninitialized, common symbol.  */
+#undef  ASM_OUTPUT_ALIGNED_COMMON
+#undef  ASM_OUTPUT_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
+     v850_output_common (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This says how to output the assembler to define a local
+   uninitialized symbol.  */
+#undef  ASM_OUTPUT_ALIGNED_LOCAL
+#undef  ASM_OUTPUT_LOCAL
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
+     v850_output_local (FILE, DECL, NAME, SIZE, ALIGN)
+     
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global "
+
+#define ASM_PN_FORMAT "%s___%lu"
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+  do { assemble_name(FILE, NAME1); 	 \
+       fputs(" = ", FILE);		 \
+       assemble_name(FILE, NAME2);	 \
+       fputc('\n', FILE); } while (0)
+
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES                                         \
+{  "r0",  "r1",  "r2",  "sp",  "gp",  "r5",  "r6" , "r7",      \
+   "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",      \
+  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",      \
+  "r24", "r25", "r26", "r27", "r28", "r29",  "ep", "r31",      \
+  "psw", "fcc",      \
+  ".fp", ".ap"}
+
+/* Register numbers */
+
+#define ADDITIONAL_REGISTER_NAMES              \
+{ { "zero",    ZERO_REGNUM },                  \
+  { "hp",      2 },                            \
+  { "r3",      3 },                            \
+  { "r4",      4 },                            \
+  { "tp",      5 },                            \
+  { "fp",      29 },                           \
+  { "r30",     30 },                           \
+  { "lp",      LP_REGNUM} }
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  fprintf (FILE, "\t%s .L%d\n",					\
+	   (TARGET_BIG_SWITCH ? ".long" : ".short"), VALUE)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+/* Disable the shift, which is for the currently disabled "switch"
+   opcode.  Se casesi in v850.md.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) 		\
+  fprintf (FILE, "\t%s %s.L%d-.L%d%s\n",				\
+	   (TARGET_BIG_SWITCH ? ".long" : ".short"),			\
+	   (0 && ! TARGET_BIG_SWITCH && (TARGET_V850E || TARGET_V850E2_ALL) ? "(" : ""),             \
+	   VALUE, REL,							\
+	   (0 && ! TARGET_BIG_SWITCH && (TARGET_V850E || TARGET_V850E2_ALL) ? ")>>1" : ""))
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)	\
+  if ((LOG) != 0)			\
+    fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* We don't have to worry about dbx compatibility for the v850.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* Use stabs debugging info by default.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (TARGET_BIG_SWITCH ? SImode : HImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* The switch instruction requires that the jump table immediately follow
+   it.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (!TARGET_JUMP_TABLES_IN_DATA_SECTION)
+
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE,PREFIX,NUM,TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), (TARGET_BIG_SWITCH ? 2 : 1));
+
+#define WORD_REGISTER_OPERATIONS
+
+/* Byte and short loads sign extend the value to a word.  */
+#define LOAD_EXTEND_OP(MODE) SIGN_EXTEND
+
+/* This flag, if defined, says the same insns that convert to a signed fixnum
+   also convert validly to an unsigned one.  */
+#define FIXUNS_TRUNC_LIKE_FIX_TRUNC
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX	4
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Tell compiler we want to support GHS pragmas */
+#define REGISTER_TARGET_PRAGMAS() do {				\
+  c_register_pragma ("ghs", "interrupt", ghs_pragma_interrupt);	\
+  c_register_pragma ("ghs", "section",   ghs_pragma_section);	\
+  c_register_pragma ("ghs", "starttda",  ghs_pragma_starttda);	\
+  c_register_pragma ("ghs", "startsda",  ghs_pragma_startsda);	\
+  c_register_pragma ("ghs", "startzda",  ghs_pragma_startzda);	\
+  c_register_pragma ("ghs", "endtda",    ghs_pragma_endtda);	\
+  c_register_pragma ("ghs", "endsda",    ghs_pragma_endsda);	\
+  c_register_pragma ("ghs", "endzda",    ghs_pragma_endzda);	\
+} while (0)
+
+/* enum GHS_SECTION_KIND is an enumeration of the kinds of sections that
+   can appear in the "ghs section" pragma.  These names are used to index
+   into the GHS_default_section_names[] and GHS_current_section_names[]
+   that are defined in v850.c, and so the ordering of each must remain
+   consistent. 
+
+   These arrays give the default and current names for each kind of 
+   section defined by the GHS pragmas.  The current names can be changed
+   by the "ghs section" pragma.  If the current names are null, use 
+   the default names.  Note that the two arrays have different types.
+
+   For the *normal* section kinds (like .data, .text, etc.) we do not
+   want to explicitly force the name of these sections, but would rather
+   let the linker (or at least the back end) choose the name of the 
+   section, UNLESS the user has force a specific name for these section
+   kinds.  To accomplish this set the name in ghs_default_section_names
+   to null.  */
+
+enum GHS_section_kind
+{ 
+  GHS_SECTION_KIND_DEFAULT,
+
+  GHS_SECTION_KIND_TEXT,
+  GHS_SECTION_KIND_DATA, 
+  GHS_SECTION_KIND_RODATA,
+  GHS_SECTION_KIND_BSS,
+  GHS_SECTION_KIND_SDATA,
+  GHS_SECTION_KIND_ROSDATA,
+  GHS_SECTION_KIND_TDATA,
+  GHS_SECTION_KIND_ZDATA,
+  GHS_SECTION_KIND_ROZDATA,
+
+  COUNT_OF_GHS_SECTION_KINDS  /* must be last */
+};
+
+/* The following code is for handling pragmas supported by the
+   v850 compiler produced by Green Hills Software.  This is at
+   the specific request of a customer.  */
+
+typedef struct data_area_stack_element
+{
+  struct data_area_stack_element * prev;
+  v850_data_area                   data_area; /* Current default data area.  */
+} data_area_stack_element;
+
+/* Track the current data area set by the
+   data area pragma (which can be nested).  */
+extern data_area_stack_element * data_area_stack;
+
+/* Names of the various data areas used on the v850.  */
+extern union tree_node * GHS_default_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+extern union tree_node * GHS_current_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
+
+/* The assembler op to start the file.  */
+
+#define FILE_ASM_OP "\t.file\n"
+
+/* Enable the register move pass to improve code.  */
+#define ENABLE_REGMOVE_PASS
+
+
+/* Implement ZDA, TDA, and SDA */
+
+#define EP_REGNUM 30	/* ep register number */
+
+#define SYMBOL_FLAG_ZDA		(SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_FLAG_TDA		(SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_FLAG_SDA		(SYMBOL_FLAG_MACH_DEP << 2)
+#define SYMBOL_REF_ZDA_P(X)	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_ZDA) != 0)
+#define SYMBOL_REF_TDA_P(X)	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_TDA) != 0)
+#define SYMBOL_REF_SDA_P(X)	((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SDA) != 0)
+
+#define TARGET_ASM_INIT_SECTIONS v850_asm_init_sections
+
+/* Define this so that the cc1plus will not think that system header files
+   need an implicit 'extern "C" { ... }' assumed.  This breaks testing C++
+   in a build directory where the libstdc++ header files are found via a
+   -isystem <path-to-build-dir>.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#endif /* ! GCC_V850_H */
diff --git a/gcc/config/v850/v850.md b/gcc/config/v850/v850.md
new file mode 100644
index 000000000..88e42c65e
--- /dev/null
+++ b/gcc/config/v850/v850.md
@@ -0,0 +1,2667 @@
+;; GCC machine description for NEC V850
+;; Copyright (C) 1996, 1997, 1998, 1999, 2002, 2004, 2005, 2007, 2008, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Jeff Law (law@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; The V851 manual states that the instruction address space is 16M;
+;; the various branch/call instructions only have a 22bit offset (4M range).
+;;
+;; One day we'll probably need to handle calls to targets more than 4M
+;; away.
+
+;; The size of instructions in bytes.
+
+;;---------------------------------------------------------------------------
+;; Constants
+
+;;
+(define_constants
+  [(ZERO_REGNUM            	0)          ; constant zero
+   (SP_REGNUM      		3)          ; Stack Pointer
+   (GP_REGNUM      		4)          ; GP Pointer
+   (EP_REGNUM      		30)         ; EP pointer
+   (LP_REGNUM       		31)         ; Return address register
+   (CC_REGNUM       		32)         ; Condition code pseudo register
+   (FCC_REGNUM      		33)         ; Floating Condition code pseudo register
+  ]
+)
+
+(define_attr "length" ""
+  (const_int 4))
+
+(define_attr "long_calls" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_LONG_CALLS")
+		       (const_string "yes")
+		       (const_string "no"))))
+	    
+;; Types of instructions (for scheduling purposes).
+
+(define_attr "type" "load,store,bit1,mult,macc,div,fpu,single,other"
+  (const_string "other"))
+
+(define_attr "cpu" "none,v850,v850e,v850e1,v850e2,v850e2v3"
+  (cond [(ne (symbol_ref "TARGET_V850") (const_int 0))
+       (const_string "v850")
+       (ne (symbol_ref "TARGET_V850E") (const_int 0))
+       (const_string "v850e")
+       (ne (symbol_ref "TARGET_V850E1") (const_int 0))
+       (const_string "v850e1")
+       (ne (symbol_ref "TARGET_V850E2") (const_int 0))
+       (const_string "v850e2")
+       (ne (symbol_ref "TARGET_V850E2") (const_int 0))
+       (const_string "v850e2v3")]
+       (const_string "none")))
+
+;; Condition code settings.
+;; none - insn does not affect cc
+;; none_0hit - insn does not affect cc but it does modify operand 0
+;;	This attribute is used to keep track of when operand 0 changes.
+;; 	See the description of NOTICE_UPDATE_CC for more info.
+;; set_znv - sets z,n,v to usable values; c is unknown.
+;; set_zn  - sets z,n to usable values; v,c is unknown.
+;; compare - compare instruction
+;; clobber - value of cc is unknown
+(define_attr "cc" "none,none_0hit,set_z,set_zn,set_znv,compare,clobber"
+  (const_string "clobber"))
+
+;; Function units for the V850.  As best as I can tell, there's
+;; a traditional memory load/use stall as well as a stall if
+;; the result of a multiply is used too early.
+
+(define_insn_reservation "v850_other" 1
+			 (eq_attr "type" "other")
+			 "nothing")
+(define_insn_reservation "v850_mult" 2
+			 (eq_attr "type" "mult")
+			 "nothing")
+(define_insn_reservation "v850_memory" 2
+			 (eq_attr "type" "load")
+			 "nothing")
+
+(include "predicates.md")
+(include "constraints.md")
+
+;; ----------------------------------------------------------------------
+;; MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+(define_insn "sign23byte_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI
+	(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3"
+  "ld.b %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+  
+(define_insn "unsign23byte_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(mem:QI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3"
+  "ld.bu %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "sign23hword_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI
+	(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3"
+  "ld.h %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "unsign23hword_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(mem:HI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W")))))]
+  "TARGET_V850E2V3"
+  "ld.hu %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23word_load"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand 2 "disp23_operand" "W"))))]
+  "TARGET_V850E2V3"
+  "ld.w %2[%1],%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23byte_store"
+  [(set (mem:QI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand 1 "disp23_operand" "W")))
+	(match_operand:QI 2 "register_operand" "r"))]
+  "TARGET_V850E2V3"
+  "st.b %2,%1[%0]"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23hword_store"
+  [(set (mem:HI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand 1 "disp23_operand" "W")))
+	(match_operand:HI 2 "register_operand" "r"))]
+  "TARGET_V850E2V3"
+  "st.h %2,%1[%0]"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "23word_store"
+  [(set (mem:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+			 (match_operand 1 "disp23_operand" "W")))
+	(match_operand:SI 2 "register_operand" "r"))]
+  "TARGET_V850E2V3"
+  "st.w %2,%1[%0]"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+;; movqi
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register or 0 */
+  if (!register_operand (operand0, QImode)
+      && !reg_or_0_operand (operand1, QImode))
+    operands[1] = copy_to_mode_reg (QImode, operand1);
+}")
+
+(define_insn "*movqi_internal"
+  [(set (match_operand:QI 0 "general_operand" "=r,r,r,Q,r,m,m")
+	(match_operand:QI 1 "general_operand" "Jr,n,Q,Ir,m,r,I"))]
+  "register_operand (operands[0], QImode)
+   || reg_or_0_operand (operands[1], QImode)"
+  "* return output_move_single (operands);"
+  [(set_attr "length" "2,4,2,2,4,4,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,load,other,load,store,store")])
+
+;; movhi
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register or 0 */
+  if (!register_operand (operand0, HImode)
+      && !reg_or_0_operand (operand1, HImode))
+    operands[1] = copy_to_mode_reg (HImode, operand1);
+}")
+
+(define_insn "*movhi_internal"
+  [(set (match_operand:HI 0 "general_operand" "=r,r,r,Q,r,m,m")
+	(match_operand:HI 1 "general_operand" "Jr,n,Q,Ir,m,r,I"))]
+  "register_operand (operands[0], HImode)
+   || reg_or_0_operand (operands[1], HImode)"
+  "* return output_move_single (operands);"
+  [(set_attr "length" "2,4,2,2,4,4,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,load,other,load,store,store")])
+
+;; movsi and helpers
+
+(define_insn "*movsi_high"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(high:SI (match_operand 1 "" "")))]
+  ""
+  "movhi hi(%1),%.,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "other")])
+
+(define_insn "*movsi_lo"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")))]
+  ""
+  "movea lo(%2),%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "other")])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* One of the ops has to be in a register or 0 */
+  if (!register_operand (operand0, SImode)
+      && !reg_or_0_operand (operand1, SImode))
+    operands[1] = copy_to_mode_reg (SImode, operand1);
+
+  /* Some constants, as well as symbolic operands
+     must be done with HIGH & LO_SUM patterns.  */
+  if (CONSTANT_P (operands[1])
+      && GET_CODE (operands[1]) != HIGH
+      && ! (TARGET_V850E || TARGET_V850E2_ALL)
+      && !special_symbolref_operand (operands[1], VOIDmode)
+      && !(GET_CODE (operands[1]) == CONST_INT
+	   && (CONST_OK_FOR_J (INTVAL (operands[1]))
+	       || CONST_OK_FOR_K (INTVAL (operands[1]))
+	       || CONST_OK_FOR_L (INTVAL (operands[1])))))
+    {
+      rtx temp;
+
+      if (reload_in_progress || reload_completed)
+        temp = operands[0];
+      else
+	temp = gen_reg_rtx (SImode);
+
+      emit_insn (gen_rtx_SET (SImode, temp,
+			      gen_rtx_HIGH (SImode, operand1)));
+      emit_insn (gen_rtx_SET (SImode, operand0,
+			      gen_rtx_LO_SUM (SImode, temp, operand1)));
+      DONE;
+    }
+}")
+
+;; This is the same as the following pattern, except that it includes
+;; support for arbitrary 32-bit immediates.
+
+;; ??? This always loads addresses using hilo.  If the only use of this address
+;; was in a load/store, then we would get smaller code if we only loaded the
+;; upper part with hi, and then put the lower part in the load/store insn.
+
+(define_insn "*movsi_internal_v850e"
+  [(set (match_operand:SI 0 "general_operand" "=r,r,r,r,Q,r,r,m,m,r")
+	(match_operand:SI 1 "general_operand" "Jr,K,L,Q,Ir,m,R,r,I,i"))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)
+   && (register_operand (operands[0], SImode)
+       || reg_or_0_operand (operands[1], SImode))"
+  "* return output_move_single (operands);"
+  [(set_attr "length" "2,4,4,2,2,4,4,4,4,6")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,other,load,other,load,other,store,store,other")])
+
+(define_insn "*movsi_internal"
+  [(set (match_operand:SI 0 "general_operand" "=r,r,r,r,Q,r,r,m,m")
+	(match_operand:SI 1 "movsi_source_operand" "Jr,K,L,Q,Ir,m,R,r,I"))]
+  "register_operand (operands[0], SImode)
+   || reg_or_0_operand (operands[1], SImode)"
+  "* return output_move_single (operands);"
+  [(set_attr "length" "2,4,4,2,2,4,4,4,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,other,load,other,load,store,store,other")])
+
+(define_insn "*movsf_internal"
+  [(set (match_operand:SF 0 "general_operand" "=r,r,r,r,r,Q,r,m,m,r")
+	(match_operand:SF 1 "general_operand" "Jr,K,L,n,Q,Ir,m,r,IG,iF"))]
+  "register_operand (operands[0], SFmode)
+   || reg_or_0_operand (operands[1], SFmode)"
+  "* return output_move_single (operands);"
+  [(set_attr "length" "2,4,4,8,2,2,4,4,4,8")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")
+   (set_attr "type" "other,other,other,other,load,other,load,store,store,other")])
+
+;; ----------------------------------------------------------------------
+;; TEST INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_tst1"
+  [(set (cc0)
+	(compare (zero_extract:SI (match_operand:QI 0 "memory_operand" "m")
+                                  (const_int 1)
+                                  (match_operand:QI 1 "const_int_operand" "n"))
+		 (const_int 0)))]
+  ""
+  "tst1 %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; This replaces ld.b;sar;andi with tst1;setf nz.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(compare (zero_extract:SI (match_operand:QI 1 "memory_operand" "")
+				  (const_int 1)
+				  (match_operand 2 "const_int_operand" ""))
+		 (const_int 0)))]
+  ""
+  [(set (cc0) (compare (zero_extract:SI (match_dup 1)
+				        (const_int 1)
+				        (match_dup 2))
+		       (const_int 0)))
+   (set (match_dup 0) (ne:SI (cc0) (const_int 0)))])
+
+(define_expand "cbranchsi4"
+  [(set (cc0)
+	(compare (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "reg_or_int5_operand" "")))
+   (set (pc)
+	(if_then_else
+	      (match_operator 0 "ordered_comparison_operator" [(cc0)
+							       (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_expand "cstoresi4"
+  [(set (cc0)
+	(compare (match_operand:SI 2 "register_operand" "")
+		 (match_operand:SI 3 "reg_or_int5_operand" "")))
+   (set (match_operand:SI 0 "register_operand")
+        (match_operator:SI 1 "ordered_comparison_operator" [(cc0)
+							    (const_int 0)]))]
+  "")
+
+(define_expand "cmpsi"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "r,r")
+		 (match_operand:SI 1 "reg_or_int5_operand" "r,J")))]
+   ""
+  "
+{
+  v850_compare_op0 = operands[0];
+  v850_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_insn "cmpsi_insn"
+  [(set (cc0)
+	(compare (match_operand:SI 0 "register_operand" "r,r")
+		 (match_operand:SI 1 "reg_or_int5_operand" "r,J")))]
+  ""
+  "@
+  cmp %1,%0
+  cmp %1,%0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "compare")])
+
+(define_expand "cmpsf"
+  [(set (reg:CC CC_REGNUM)
+	(compare (match_operand:SF 0 "register_operand" "r")
+		 (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "
+{
+  v850_compare_op0 = operands[0];
+  v850_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "cmpdf"
+  [(set (reg:CC CC_REGNUM)
+	(compare (match_operand:DF 0 "even_reg_operand" "r")
+		 (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "
+{
+  v850_compare_op0 = operands[0];
+  v850_compare_op1 = operands[1];
+  DONE;
+}")
+
+;; ----------------------------------------------------------------------
+;; ADD INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,r,r")
+		 (match_operand:SI 2 "nonmemory_operand" "rJ,K,U")))
+   (clobber (reg:CC CC_REGNUM))]
+
+  ""
+  "@
+   add %2,%0
+   addi %2,%1,%0
+   addi %O2(%P2),%1,%0"
+  [(set_attr "length" "2,4,4")
+   (set_attr "cc" "set_zn,set_zn,set_zn")])
+
+;; ----------------------------------------------------------------------
+;; SUBTRACT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI (match_operand:SI 1 "register_operand" "0,r")
+		  (match_operand:SI 2 "register_operand" "r,0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  sub %2,%0
+  subr %1,%0"
+  [(set_attr "length" "2,2")
+   (set_attr "cc" "set_zn,set_zn")])
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "0")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "subr %.,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; MULTIPLY INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_expand "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(mult:SI
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+	  (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" ""))))]
+  ""
+  "if (GET_CODE (operands[2]) == CONST_INT)
+     {
+       emit_insn (gen_mulhisi3_internal2 (operands[0], operands[1], operands[2]));
+       DONE;
+     }")
+
+(define_insn "*mulhisi3_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" "%0"))
+	  (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
+  ""
+  "mulh %2,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "mult")])
+
+(define_insn "mulhisi3_internal2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(mult:SI
+	  (sign_extend:SI (match_operand:HI 1 "register_operand" "%0,r"))
+	  (match_operand:HI 2 "const_int_operand" "J,K")))]
+  ""
+  "@
+   mulh %2,%0
+   mulhi %2,%1,%0"
+  [(set_attr "length" "2,4")
+   (set_attr "cc" "none_0hit,none_0hit")
+   (set_attr "type" "mult")])
+
+;; ??? The scheduling info is probably wrong.
+
+;; ??? This instruction can also generate the 32-bit highpart, but using it
+;; may increase code size counter to the desired result.
+
+;; ??? This instructions can also give a DImode result.
+
+;; ??? There is unsigned version, but it matters only for the DImode/highpart
+;; results.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI (match_operand:SI 1 "register_operand" "%0")
+		 (match_operand:SI 2 "reg_or_int9_operand" "rO")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "mul %2,%1,%."
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "mult")])
+
+;; ----------------------------------------------------------------------
+;; DIVIDE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; ??? These insns do set the Z/N condition codes, except that they are based
+;; on only one of the two results, so it doesn't seem to make sense to use
+;; them.
+
+;; ??? The scheduling info is probably wrong.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "0")
+		(match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "register_operand" "=r")
+	(mod:SI (match_dup 1)
+		(match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E"
+  "div %2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+	
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" "0")
+		 (match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 3 "register_operand" "=r")
+	(umod:SI (match_dup 1)
+		 (match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E"
+  "divu %2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+	
+;; ??? There is a 2 byte instruction for generating only the quotient.
+;; However, it isn't clear how to compute the length field correctly.
+
+(define_insn "divmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(div:HI (match_operand:HI 1 "register_operand" "0")
+		(match_operand:HI 2 "register_operand" "r")))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(mod:HI (match_dup 1)
+		(match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E"
+  "divh %2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+
+;; Half-words are sign-extended by default, so we must zero extend to a word
+;; here before doing the divide.
+
+(define_insn "udivmodhi4"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(udiv:HI (match_operand:HI 1 "register_operand" "0")
+		 (match_operand:HI 2 "register_operand" "r")))
+   (set (match_operand:HI 3 "register_operand" "=r")
+	(umod:HI (match_dup 1)
+		 (match_dup 2)))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E"
+  "zxh %0 ; divhu %2,%0,%3"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "div")])
+
+;; ----------------------------------------------------------------------
+;; AND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_clr1_1"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(subreg:QI
+	  (and:SI (subreg:SI (match_dup 0) 0)
+		  (match_operand:QI 1 "not_power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "*
+{
+  rtx xoperands[2];
+  xoperands[0] = operands[0];
+  xoperands[1] = GEN_INT (~INTVAL (operands[1]) & 0xff);
+  output_asm_insn (\"clr1 %M1,%0\", xoperands);
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_clr1_2"
+  [(set (match_operand:HI 0 "indirect_operand" "=m")
+	(subreg:HI
+	  (and:SI (subreg:SI (match_dup 0) 0)
+		  (match_operand:HI 1 "not_power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "*
+{
+  int log2 = exact_log2 (~INTVAL (operands[1]) & 0xffff);
+
+  rtx xoperands[2];
+  xoperands[0] = gen_rtx_MEM (QImode,
+			      plus_constant (XEXP (operands[0], 0), log2 / 8));
+  xoperands[1] = GEN_INT (log2 % 8);
+  output_asm_insn (\"clr1 %1,%0\", xoperands);
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_clr1_3"
+  [(set (match_operand:SI 0 "indirect_operand" "=m")
+	(and:SI (match_dup 0)
+		(match_operand:SI 1 "not_power_of_two_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "*
+{
+  int log2 = exact_log2 (~INTVAL (operands[1]) & 0xffffffff);
+
+  rtx xoperands[2];
+  xoperands[0] = gen_rtx_MEM (QImode,
+			      plus_constant (XEXP (operands[0], 0), log2 / 8));
+  xoperands[1] = GEN_INT (log2 % 8);
+  output_asm_insn (\"clr1 %1,%0\", xoperands);
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,I,M")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  and %2,%0
+  and %.,%0
+  andi %2,%1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; OR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_set1_1"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(subreg:QI (ior:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "set1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_set1_2"
+  [(set (match_operand:HI 0 "indirect_operand" "=m")
+	(subreg:HI (ior:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))]
+  ""
+  "*
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return \"set1 %M1,%0\";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn (\"set1 %1,%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_set1_3"
+  [(set (match_operand:SI 0 "indirect_operand" "=m")
+	(ior:SI (match_dup 0)
+		(match_operand 1 "power_of_two_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "*
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return \"set1 %M1,%0\";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn (\"set1 %1,%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,I,M")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  or %2,%0
+  or %.,%0
+  ori %2,%1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; XOR INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "*v850_not1_1"
+  [(set (match_operand:QI 0 "memory_operand" "=m")
+	(subreg:QI (xor:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "not1 %M1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_not1_2"
+  [(set (match_operand:HI 0 "indirect_operand" "=m")
+	(subreg:HI (xor:SI (subreg:SI (match_dup 0) 0)
+			   (match_operand 1 "power_of_two_operand" "")) 0))]
+  ""
+  "*
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return \"not1 %M1,%0\";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn (\"not1 %1,%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "*v850_not1_3"
+  [(set (match_operand:SI 0 "indirect_operand" "=m")
+	(xor:SI (match_dup 0)
+		(match_operand 1 "power_of_two_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "*
+{
+  int log2 = exact_log2 (INTVAL (operands[1]));
+
+  if (log2 < 8)
+    return \"not1 %M1,%0\";
+  else
+    {
+      rtx xoperands[2];
+      xoperands[0] = gen_rtx_MEM (QImode,
+				  plus_constant (XEXP (operands[0], 0),
+						 log2 / 8));
+      xoperands[1] = GEN_INT (log2 % 8);
+      output_asm_insn (\"not1 %1,%0\", xoperands);
+    }
+  return \"\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")
+   (set_attr "type" "bit1")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,I,M")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  xor %2,%0
+  xor %.,%0
+  xori %2,%1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; NOT INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "not %1,%0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "set_zn")])
+
+;; -----------------------------------------------------------------
+;; BIT FIELDS
+;; -----------------------------------------------------------------
+
+;; ??? Is it worth defining insv and extv for the V850 series?!?
+
+;; An insv pattern would be useful, but does not get used because
+;; store_bit_field never calls insv when storing a constant value into a
+;; single-bit bitfield.
+
+;; extv/extzv patterns would be useful, but do not get used because
+;; optimize_bitfield_compare in fold-const usually converts single
+;; bit extracts into an AND with a mask.
+
+;; -----------------------------------------------------------------
+;; Scc INSTRUCTIONS
+;; -----------------------------------------------------------------
+
+(define_insn "*setcc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (match_operator:SI 1 "comparison_operator"
+	 [(cc0) (const_int 0)]))]
+  ""
+  "*
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    return 0;
+
+  return \"setf %c1,%0\";
+}"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "setf_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+                          [(reg:CC CC_REGNUM) (const_int 0)]))]
+  ""
+  "setf %b1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "set_z_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand 1 "v850_float_z_comparison_operator" ""))]
+  "TARGET_V850E2V3"
+  "setf z,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+(define_insn "set_nz_insn" 
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand 1 "v850_float_nz_comparison_operator" ""))]
+  "TARGET_V850E2V3"
+  "setf nz,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")])
+
+;; ----------------------------------------------------------------------
+;; CONDITIONAL MOVE INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Instructions using cc0 aren't allowed to have input reloads, so we must
+;; hide the fact that this instruction uses cc0.  We do so by including the
+;; compare instruction inside it.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operand 1 "comparison_operator")
+	 (match_operand:SI 2 "reg_or_const_operand" "rJ")
+	 (match_operand:SI 3 "reg_or_const_operand" "rI")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "
+{
+  if (   (GET_CODE (operands[2]) == CONST_INT
+       && GET_CODE (operands[3]) == CONST_INT))
+    {
+      int o2 = INTVAL (operands[2]);
+      int o3 = INTVAL (operands[3]);
+
+      if (o2 == 1 && o3 == 0)
+	FAIL;   /* setf */
+      if (o3 == 1 && o2 == 0)
+	FAIL;   /* setf */
+      if (o2 == 0 && (o3 < -16 || o3 > 15) && exact_log2 (o3) >= 0)
+	FAIL;   /* setf + shift */
+      if (o3 == 0 && (o2 < -16 || o2 > 15) && exact_log2 (o2) >=0)
+	FAIL;   /* setf + shift */
+      if (o2 != 0)
+	operands[2] = copy_to_mode_reg (SImode, operands[2]);
+      if (o3 !=0 )
+	operands[3] = copy_to_mode_reg (SImode, operands[3]);
+    }
+  else
+    {
+      if (GET_CODE (operands[2]) != REG)
+	operands[2] = copy_to_mode_reg (SImode,operands[2]);
+      if (GET_CODE (operands[3]) != REG)
+	operands[3] = copy_to_mode_reg (SImode, operands[3]);
+    }
+}")
+
+;; ??? Clobbering the condition codes is overkill.
+
+;; ??? We sometimes emit an unnecessary compare instruction because the
+;; condition codes may have already been set by an earlier instruction,
+;; but we have no code here to avoid the compare if it is unnecessary.
+
+(define_insn "movsicc_normal_cc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (if_then_else:SI
+         (match_operator 1 "comparison_operator"
+                         [(reg:CC CC_REGNUM) (const_int 0)])
+         (match_operand:SI 2 "reg_or_int5_operand" "rJ")
+         (match_operand:SI 3 "reg_or_0_operand" "rI")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "cmov %c1,%2,%z3,%0";
+  [(set_attr "length" "6")
+   (set_attr "cc" "compare")])
+
+(define_insn "movsicc_reversed_cc"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (if_then_else:SI
+         (match_operator 1 "comparison_operator"
+                         [(reg:CC CC_REGNUM) (const_int 0)])
+         (match_operand:SI 2 "reg_or_0_operand" "rI")
+         (match_operand:SI 3 "reg_or_int5_operand" "rJ")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "cmov %C1,%3,%z2,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "compare")])
+
+(define_insn "*movsicc_normal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 4 "register_operand" "r")
+			  (match_operand:SI 5 "reg_or_int5_operand" "rJ")])
+	 (match_operand:SI 2 "reg_or_int5_operand" "rJ")
+	 (match_operand:SI 3 "reg_or_0_operand" "rI")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "cmp %5,%4 ; cmov %c1,%2,%z3,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movsicc_reversed"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 4 "register_operand" "r")
+			  (match_operand:SI 5 "reg_or_int5_operand" "rJ")])
+	 (match_operand:SI 2 "reg_or_0_operand" "rI")
+	 (match_operand:SI 3 "reg_or_int5_operand" "rJ")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "cmp %5,%4 ; cmov %C1,%3,%z2,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movsicc_tst1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(zero_extract:SI
+			   (match_operand:QI 2 "memory_operand" "m")
+			   (const_int 1)
+			   (match_operand 3 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (match_operand:SI 4 "reg_or_int5_operand" "rJ")
+	 (match_operand:SI 5 "reg_or_0_operand" "rI")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "tst1 %3,%2 ; cmov %c1,%4,%z5,%0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*movsicc_tst1_reversed"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(zero_extract:SI
+			   (match_operand:QI 2 "memory_operand" "m")
+			   (const_int 1)
+			   (match_operand 3 "const_int_operand" "n"))
+			  (const_int 0)])
+	 (match_operand:SI 4 "reg_or_0_operand" "rI")
+	 (match_operand:SI 5 "reg_or_int5_operand" "rJ")))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "tst1 %3,%2 ; cmov %C1,%5,%z4,%0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")])
+
+;; Matching for sasf requires combining 4 instructions, so we provide a
+;; dummy pattern to match the first 3, which will always be turned into the
+;; second pattern by subsequent combining.  As above, we must include the
+;; comparison to avoid input reloads in an insn using cc0.
+
+(define_insn "*sasf"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ior:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 3 "register_operand" "r")
+			  (match_operand:SI 4 "reg_or_int5_operand" "rJ")])
+	 (ashift:SI (match_operand:SI 2 "register_operand" "0")
+		    (const_int 1))))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "cmp %4,%3 ; sasf %c1,%0"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 4 "register_operand" "")
+			  (match_operand:SI 5 "reg_or_int5_operand" "")])
+	 (match_operand:SI 2 "const_int_operand" "")
+	 (match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)
+   && ((INTVAL (operands[2]) ^ INTVAL (operands[3])) == 1)
+   && ((INTVAL (operands[2]) + INTVAL (operands[3])) != 1)
+   && (GET_CODE (operands[5]) == CONST_INT
+      || REGNO (operands[0]) != REGNO (operands[5]))
+   && REGNO (operands[0]) != REGNO (operands[4])"
+  [(set (match_dup 0) (match_dup 6))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (match_op_dup 7 [(match_dup 4) (match_dup 5)])
+			   (ashift:SI (match_dup 0) (const_int 1))))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "
+{
+  operands[6] = GEN_INT (INTVAL (operands[2]) >> 1);
+  if (INTVAL (operands[2]) & 0x1)
+    operands[7] = operands[1];
+  else
+    operands[7] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[1])),
+				  GET_MODE (operands[1]),
+				  XEXP (operands[1], 0), XEXP (operands[1], 1));
+}")
+
+;; ---------------------------------------------------------------------
+;; BYTE SWAP INSTRUCTIONS
+;; ---------------------------------------------------------------------
+(define_expand "rotlhi3"
+  [(parallel [(set (match_operand:HI 0 "register_operand" "")
+		   (rotate:HI (match_operand:HI 1 "register_operand" "")
+			      (match_operand:HI 2 "const_int_operand" "")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "
+{
+  if (INTVAL (operands[2]) != 8)
+    FAIL;
+}")
+
+(define_insn "*rotlhi3_8"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(rotate:HI (match_operand:HI 1 "register_operand" "r")
+		   (const_int 8)))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "bsh %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+(define_expand "rotlsi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (rotate:SI (match_operand:SI 1 "register_operand" "")
+			      (match_operand:SI 2 "const_int_operand" "")))
+	      (clobber (reg:CC CC_REGNUM))])]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "
+{
+  if (INTVAL (operands[2]) != 16)
+    FAIL;
+}")
+
+(define_insn "*rotlsi3_16"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r")
+		   (const_int 16)))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "hsw %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; ----------------------------------------------------------------------
+;; JUMP INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+;; Conditional jump instructions
+
+(define_insn "*branch_normal"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "*
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    return 0;
+
+  if (get_attr_length (insn) == 2)
+    return \"b%b1 %l0\";
+  else
+    return \"b%B1 .+6 ; jr %l0\";
+}"
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (const_int 6)))
+  (set_attr "cc" "none")])
+
+(define_insn "*branch_invert"
+  [(set (pc)
+	(if_then_else (match_operator 1 "comparison_operator"
+				      [(cc0) (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  ""
+  "*
+{
+  if ((cc_status.flags & CC_OVERFLOW_UNUSABLE) != 0
+      && (GET_CODE (operands[1]) == GT
+	  || GET_CODE (operands[1]) == GE
+	  || GET_CODE (operands[1]) == LE
+	  || GET_CODE (operands[1]) == LT))
+    return 0;
+  if (get_attr_length (insn) == 2)
+    return \"b%B1 %l0\";
+  else
+    return \"b%b1 .+6 ; jr %l0\";
+}"
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (const_int 6)))
+  (set_attr "cc" "none")])
+
+(define_insn "branch_z_normal"	
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_z_comparison_operator" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_V850E2V3"
+  "*
+{
+  if (get_attr_length (insn) == 2)
+    return \"bz %l0\";
+  else
+    return \"bnz 1f ; jr %l0 ; 1:\";
+}"
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (const_int 6)))
+  (set_attr "cc" "none")])
+
+(define_insn "*branch_z_invert"
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_z_comparison_operator" "")
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_V850E2V3"
+  "*
+{
+  if (get_attr_length (insn) == 2)
+    return \"bnz %l0\";
+  else
+    return \"bz 1f ; jr %l0 ; 1:\";
+}"
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+			   (const_int 256))
+		  (const_int 2)
+		  (const_int 6)))
+  (set_attr "cc" "none")])
+
+(define_insn "branch_nz_normal"
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_nz_comparison_operator" "")
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  "TARGET_V850E2V3"
+  "*
+{
+  if (get_attr_length (insn) == 2)
+    return \"bnz %l0\";
+  else
+    return \"bz 1f ; jr %l0 ; 1:\";
+}"
+[(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+			   (const_int 256))
+		  (const_int 2)
+		  (const_int 6)))
+  (set_attr "cc" "none")])
+
+(define_insn "*branch_nz_invert"
+  [(set (pc)
+	(if_then_else (match_operand 1 "v850_float_nz_comparison_operator" "")
+		      (pc)
+		      (label_ref (match_operand 0 "" ""))))]
+  "TARGET_V850E2V3"
+  "*
+{
+  if (get_attr_length (insn) == 2)
+    return \"bz %l0\";
+  else
+    return \"bnz 1f ; jr %l0 ; 1:\";
+}"
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (const_int 6)))
+  (set_attr "cc" "none")])
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "*
+{
+ if (get_attr_length (insn) == 2)
+    return \"br %0\";
+  else
+    return \"jr %0\";
+}"
+ [(set (attr "length")
+    (if_then_else (lt (abs (minus (match_dup 0) (pc)))
+		      (const_int 256))
+		  (const_int 2)
+		  (const_int 4)))
+  (set_attr "cc" "none")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "tablejump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jmp  %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "switch"
+  [(set (pc)
+	(plus:SI
+	 (sign_extend:SI
+	 (mem:HI
+	  (plus:SI (ashift:SI (match_operand:SI 0 "register_operand" "r")
+			      (const_int 1))
+		   (label_ref (match_operand 1 "" "")))))
+	(label_ref (match_dup 1))))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "switch %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:SI 1 "register_operand" "")
+   (match_operand:SI 2 "register_operand" "")
+   (match_operand 3 "" "") (match_operand 4 "" "")]
+  ""
+  "
+{
+  rtx reg = gen_reg_rtx (SImode);
+  rtx tableaddress = gen_reg_rtx (SImode);
+  rtx test;
+  rtx mem;
+
+  /* Subtract the lower bound from the index.  */
+  emit_insn (gen_subsi3 (reg, operands[0], operands[1]));
+
+  /* Compare the result against the number of table entries;
+     branch to the default label if out of range of the table.  */
+  test = gen_rtx_fmt_ee (GTU, VOIDmode, reg, operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, reg, operands[2], operands[4]));
+
+  /* Shift index for the table array access.  */
+  emit_insn (gen_ashlsi3 (reg, reg, GEN_INT (TARGET_BIG_SWITCH ? 2 : 1)));
+  /* Load the table address into a pseudo.  */
+  emit_insn (gen_movsi (tableaddress,
+			gen_rtx_LABEL_REF (Pmode, operands[3])));
+  /* Add the table address to the index.  */
+  emit_insn (gen_addsi3 (reg, reg, tableaddress));
+  /* Load the table entry.  */
+  mem = gen_const_mem (CASE_VECTOR_MODE, reg);
+  if (! TARGET_BIG_SWITCH)
+    {
+      rtx reg2 = gen_reg_rtx (HImode);
+      emit_insn (gen_movhi (reg2, mem));
+      emit_insn (gen_extendhisi2 (reg, reg2));
+    }
+  else
+    emit_insn (gen_movsi (reg, mem));
+  /* Add the table address.  */
+  emit_insn (gen_addsi3 (reg, reg, tableaddress));
+  /* Branch to the switch label.  */
+  emit_jump_insn (gen_tablejump (reg, operands[3]));
+  DONE;
+}")
+
+;; Call subroutine with no return value.
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "general_operand" "")
+	 (match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+  if (! call_address_operand (XEXP (operands[0], 0), QImode)
+      || TARGET_LONG_CALLS)
+    XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
+  if (TARGET_LONG_CALLS)
+    emit_call_insn (gen_call_internal_long (XEXP (operands[0], 0), operands[1]));
+  else
+    emit_call_insn (gen_call_internal_short (XEXP (operands[0], 0), operands[1]));
+  
+  DONE;
+}")
+
+(define_insn "call_internal_short"
+  [(call (mem:QI (match_operand:SI 0 "call_address_operand" "S,r"))
+	 (match_operand:SI 1 "general_operand" "g,g"))
+   (clobber (reg:SI 31))]
+  "! TARGET_LONG_CALLS"
+  "@
+  jarl %0,r31
+  jarl .+4,r31 ; add 4,r31 ; jmp %0"
+  [(set_attr "length" "4,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+(define_insn "call_internal_long"
+  [(call (mem:QI (match_operand:SI 0 "call_address_operand" "S,r"))
+	 (match_operand:SI 1 "general_operand" "g,g"))
+   (clobber (reg:SI 31))]
+  "TARGET_LONG_CALLS"
+  "*
+  {
+  if (which_alternative == 0)
+    {
+      if (GET_CODE (operands[0]) == REG)
+        return \"jarl %0,r31\";
+      else
+        return \"movhi hi(%0), r0, r11 ; movea lo(%0), r11, r11 ; jarl .+4,r31 ; add 4, r31 ; jmp r11\";
+    }
+  else
+    return \"jarl .+4,r31 ; add 4,r31 ; jmp %0\";
+  }"
+  [(set_attr "length" "16,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+;; Call subroutine, returning value in operand 0
+;; (which must be a hard register).
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "general_operand" "")
+	      (match_operand:SI 2 "general_operand" "")))]
+  ""
+  "
+{
+  if (! call_address_operand (XEXP (operands[1], 0), QImode)
+      || TARGET_LONG_CALLS)
+    XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
+  if (TARGET_LONG_CALLS)
+    emit_call_insn (gen_call_value_internal_long (operands[0],
+	 				          XEXP (operands[1], 0),
+					          operands[2]));
+  else
+    emit_call_insn (gen_call_value_internal_short (operands[0],
+	 				           XEXP (operands[1], 0),
+					           operands[2]));
+  DONE;
+}")
+
+(define_insn "call_value_internal_short"
+  [(set (match_operand 0 "" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "call_address_operand" "S,r"))
+	      (match_operand:SI 2 "general_operand" "g,g")))
+   (clobber (reg:SI 31))]
+  "! TARGET_LONG_CALLS"
+  "@
+  jarl %1,r31
+  jarl .+4,r31 ; add 4,r31 ; jmp %1"
+  [(set_attr "length" "4,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+(define_insn "call_value_internal_long"
+  [(set (match_operand 0 "" "=r,r")
+	(call (mem:QI (match_operand:SI 1 "call_address_operand" "S,r"))
+	      (match_operand:SI 2 "general_operand" "g,g")))
+   (clobber (reg:SI 31))]
+  "TARGET_LONG_CALLS"
+  "*
+  {
+  if (which_alternative == 0)
+    {
+      if (GET_CODE (operands[1]) == REG)
+        return \"jarl %1, r31\";
+      else
+      /* Reload can generate this pattern....  */
+        return \"movhi hi(%1), r0, r11 ; movea lo(%1), r11, r11 ; jarl .+4, r31 ; add 4, r31 ; jmp r11\";
+    }
+  else
+    return \"jarl .+4, r31 ; add 4, r31 ; jmp %1\";
+  }"
+  [(set_attr "length" "16,8")
+   (set_attr "cc" "clobber,clobber")]
+)
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; ----------------------------------------------------------------------
+;; EXTEND INSTRUCTIONS
+;; ----------------------------------------------------------------------
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(zero_extend:SI
+        (match_operand:HI 1 "nonimmediate_operand" "0,r,T,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "@
+   zxh %0
+   andi 65535,%1,%0
+   sld.hu %1,%0
+   ld.hu %1,%0"
+  [(set_attr "length" "2,4,2,4")
+   (set_attr "cc" "none_0hit,set_zn,none_0hit,none_0hit")])
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(match_operand:HI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "andi 65535,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(zero_extend:SI
+	(match_operand:QI 1 "nonimmediate_operand" "0,r,T,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "@
+   zxb %0
+   andi 255,%1,%0
+   sld.bu %1,%0
+   ld.bu %1,%0"
+  [(set_attr "length" "2,4,2,4")
+   (set_attr "cc" "none_0hit,set_zn,none_0hit,none_0hit")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(zero_extend:SI
+	(match_operand:QI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "andi 255,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+;;- sign extension instructions
+
+;; ??? The extendhisi2 pattern should not emit shifts for v850e?
+
+(define_insn "*extendhisi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,Q,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "@
+   sxh %0
+   sld.h %1,%0
+   ld.h %1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit")])
+
+;; ??? This is missing a sign extend from memory pattern to match the ld.h
+;; instruction.
+
+(define_expand "extendhisi2"
+  [(parallel [(set (match_dup 2)
+		   (ashift:SI (match_operand:HI 1 "register_operand" "")
+			      (const_int 16)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashiftrt:SI (match_dup 2)
+				(const_int 16)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_reg_rtx (SImode);
+}")
+
+;; ??? The extendqisi2 pattern should not emit shifts for v850e?
+
+(define_insn "*extendqisi_insn"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,Q,m")))
+   (clobber (reg:CC CC_REGNUM))]
+  "(TARGET_V850E || TARGET_V850E2_ALL)"
+  "@
+   sxb %0
+   sld.b %1,%0
+   ld.b %1,%0"
+  [(set_attr "length" "2,2,4")
+   (set_attr "cc" "none_0hit,none_0hit,none_0hit")])
+
+;; ??? This is missing a sign extend from memory pattern to match the ld.b
+;; instruction.
+
+(define_expand "extendqisi2"
+  [(parallel [(set (match_dup 2)
+		   (ashift:SI (match_operand:QI 1 "register_operand" "")
+			      (const_int 24)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (ashiftrt:SI (match_dup 2)
+			      (const_int 24)))
+	      (clobber (reg:CC CC_REGNUM))])]
+  ""
+  "
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  operands[2] = gen_reg_rtx (SImode);
+}")
+
+;; ----------------------------------------------------------------------
+;; SHIFTS
+;; ----------------------------------------------------------------------
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+      (ashift:SI
+	(match_operand:SI 1 "register_operand" "0,0")
+	(match_operand:SI 2 "nonmemory_operand" "r,N")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  shl %2,%0
+  shl %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "ashlsi3_v850e2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (ashift:SI
+	(match_operand:SI 1 "register_operand" "r")
+	(match_operand:SI 2 "nonmemory_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_ALL"
+  "shl %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_znv")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+      (lshiftrt:SI
+	(match_operand:SI 1 "register_operand" "0,0")
+        (match_operand:SI 2 "nonmemory_operand" "r,N")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  shr %2,%0
+  shr %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "lshrsi3_v850e2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (lshiftrt:SI
+	(match_operand:SI 1 "register_operand" "r")
+	(match_operand:SI 2 "nonmemory_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_ALL"
+  "shr %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+      (ashiftrt:SI
+	(match_operand:SI 1 "register_operand" "0,0")
+	(match_operand:SI 2 "nonmemory_operand" "r,N")))
+   (clobber (reg:CC CC_REGNUM))]
+  ""
+  "@
+  sar %2,%0
+  sar %2,%0"
+  [(set_attr "length" "4,2")
+   (set_attr "cc" "set_zn, set_zn")])
+
+(define_insn "ashrsi3_v850e2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+      (ashiftrt:SI
+	(match_operand:SI 1 "register_operand" "r")
+	(match_operand:SI 2 "nonmemory_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_ALL"
+  "sar %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_zn")])
+
+;; ----------------------------------------------------------------------
+;; FIND FIRST BIT INSTRUCTION
+;; ----------------------------------------------------------------------
+
+(define_insn "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (ffs:SI (match_operand:SI 1 "register_operand" "r")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_V850E2_ALL"
+  "sch1r %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; ----------------------------------------------------------------------
+;; PROLOGUE/EPILOGUE
+;; ----------------------------------------------------------------------
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "expand_prologue (); DONE;")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  expand_epilogue ();
+  DONE;
+}")
+
+(define_insn "return_simple"
+  [(return)]
+  "reload_completed"
+  "jmp [r31]"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "return_internal"
+  [(return)
+   (use (reg:SI 31))]
+  ""
+  "jmp [r31]"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+;; ----------------------------------------------------------------------
+;; v850e2V3 floating-point hardware support
+;; ----------------------------------------------------------------------
+
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(plus:SF (match_operand:SF 1 "register_operand" "r")
+		 (match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "addf.s %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "adddf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(plus:DF (match_operand:DF 1 "even_reg_operand" "r")
+	(match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "addf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(minus:SF (match_operand:SF 1 "register_operand" "r")
+		  (match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "subf.s %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "subdf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(minus:DF (match_operand:DF 1 "even_reg_operand" "r")
+		  (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "subf.d %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(mult:SF (match_operand:SF 1 "register_operand" "r")
+		 (match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "mulf.s %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "muldf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(mult:DF (match_operand:DF 1 "even_reg_operand" "r")
+		 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "mulf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(div:SF (match_operand:SF 1 "register_operand" "r")
+		(match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "divf.s %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "divdf3"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(div:DF (match_operand:DF 1 "even_reg_operand" "r")
+		(match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "divf.d %2,%1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "minsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(smin:SF (match_operand:SF 1 "reg_or_0_operand" "r")
+		 (match_operand:SF 2 "reg_or_0_operand" "r")))]
+  "TARGET_V850E2V3"
+  "minf.s %z1,%z2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "mindf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(smin:DF (match_operand:DF 1 "even_reg_operand" "r")
+		 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "minf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "maxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(smax:SF (match_operand:SF 1 "reg_or_0_operand" "r")
+		 (match_operand:SF 2 "reg_or_0_operand" "r")))]
+  "TARGET_V850E2V3"
+  "maxf.s %z1,%z2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "maxdf3"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(smax:DF (match_operand:DF 1 "even_reg_operand" "r")
+		 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "maxf.d %1,%2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(abs:SF (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "absf.s %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "absdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(abs:DF (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "absf.d %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(neg:SF (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "negf.s %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "negdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(neg:DF (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "negf.d %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; square-root
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "sqrtf.s %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "sqrtdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(sqrt:DF (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "sqrtf.d %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; float -> int
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "r"))))]
+  "TARGET_V850E2V3"
+  "trncf.sw %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (fix:DF (match_operand:DF 1 "even_reg_operand" "r"))))]
+  "TARGET_V850E2V3"
+  "trncf.dw %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; int -> float
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(float:SF (match_operand:SI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_V850E2V3"
+  "cvtf.ws %z1, %0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(float:DF (match_operand:SI 1 "reg_or_0_operand" "rI")))]
+  "TARGET_V850E2V3"
+  "cvtf.wd %z1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; single-float -> double-float
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(float_extend:DF
+	 (match_operand:SF 1 "reg_or_0_operand" "rI")))]
+  "TARGET_V850E2V3"
+  "cvtf.sd %z1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; double-float -> single-float
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(float_truncate:SF
+	 (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cvtf.ds %1,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;
+;; ---------------- special insns
+;;
+
+;;; reciprocal
+(define_insn "recipsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(match_operand:SF 2 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "recipf.s %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "recipdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
+		(match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "recipf.d %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; reciprocal of square-root
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "r"))))]
+  "TARGET_V850E2V3"
+  "rsqrtf.s %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "rsqrtdf2"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
+		(sqrt:DF (match_operand:DF 2 "even_reg_operand" "r"))))]
+  "TARGET_V850E2V3"
+  "rsqrtf.d %2,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; multiply-add
+(define_insn "fmasf4"
+  [(set (match_operand:SF         0 "register_operand" "=r")
+	(fma:SF (match_operand:SF 1 "register_operand" "r")
+		(match_operand:SF 2 "register_operand" "r")
+		(match_operand:SF 3 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "maddf.s %2,%1,%3,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; multiply-subtract
+(define_insn "fmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=r")
+	(fma:SF (match_operand:SF         1 "register_operand" "r")
+		(match_operand:SF         2 "register_operand" "r")
+		(neg:SF (match_operand:SF 3 "register_operand" "r"))))]
+  "TARGET_V850E2V3"
+  "msubf.s %2,%1,%3,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;;; negative-multiply-add
+(define_insn "fnmasf4"
+  [(set (match_operand:SF                 0 "register_operand" "=r")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "r"))
+		(match_operand:SF         2 "register_operand" "r")
+		(match_operand:SF         3 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "nmaddf.s %2,%1,%3,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+;; negative-multiply-subtract
+(define_insn "fnmssf4"
+  [(set (match_operand:SF                 0 "register_operand" "=r")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "r"))
+		(match_operand:SF         2 "register_operand" "r")
+		(neg:SF (match_operand:SF 3 "register_operand" "r"))))]
+  "TARGET_V850E2V3"
+  "nmsubf.s %2,%1,%3,%0"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+;
+; ---------------- comparison/conditionals
+;
+; SF
+
+(define_insn "cmpsf_le_insn"
+  [(set (reg:CC_FPU_LE FCC_REGNUM)
+        (compare:CC_FPU_LE (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.s le,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_lt_insn"
+  [(set (reg:CC_FPU_LT FCC_REGNUM)
+        (compare:CC_FPU_LT (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.s lt,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_ge_insn"
+  [(set (reg:CC_FPU_GE FCC_REGNUM)
+        (compare:CC_FPU_GE (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.s ge,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_gt_insn"
+  [(set (reg:CC_FPU_GT FCC_REGNUM)
+        (compare:CC_FPU_GT (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.s gt,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_eq_insn"
+  [(set (reg:CC_FPU_EQ FCC_REGNUM)
+        (compare:CC_FPU_EQ (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.s eq,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpsf_ne_insn"
+  [(set (reg:CC_FPU_NE FCC_REGNUM)
+        (compare:CC_FPU_NE (match_operand:SF 0 "register_operand" "r")
+			   (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.s neq,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+; DF
+
+(define_insn "cmpdf_le_insn"
+  [(set (reg:CC_FPU_LE FCC_REGNUM)
+        (compare:CC_FPU_LE (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.d le,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_lt_insn"
+  [(set (reg:CC_FPU_LT FCC_REGNUM)
+        (compare:CC_FPU_LT (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.d lt,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_ge_insn"
+  [(set (reg:CC_FPU_GE FCC_REGNUM)
+        (compare:CC_FPU_GE (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.d ge,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_gt_insn"
+  [(set (reg:CC_FPU_GT FCC_REGNUM)
+        (compare:CC_FPU_GT (match_operand:DF 0 "even_reg_operand" "r")
+		      (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.d gt,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_eq_insn"
+  [(set (reg:CC_FPU_EQ FCC_REGNUM)
+        (compare:CC_FPU_EQ (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.d eq,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+(define_insn "cmpdf_ne_insn"
+  [(set (reg:CC_FPU_NE FCC_REGNUM)
+        (compare:CC_FPU_NE (match_operand:DF 0 "even_reg_operand" "r")
+			   (match_operand:DF 1 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmpf.d neq,%z0,%z1"
+  [(set_attr "length" "4")
+   (set_attr "cc" "none_0hit")
+   (set_attr "type" "fpu")])
+
+
+;;
+;; Transfer a v850e2v3 fcc to the Z bit of CC0 (this is necessary to do a
+;; conditional branch based on a floating-point compare)
+;;
+
+(define_insn "trfsr"
+  [(set (match_operand 0 "" "") (match_operand 1 "" ""))]
+  "TARGET_V850E2V3
+   && GET_MODE(operands[0]) == GET_MODE(operands[1])
+   && GET_CODE(operands[0]) == REG && REGNO (operands[0]) == CC_REGNUM
+   && GET_CODE(operands[1]) == REG && REGNO (operands[1]) == FCC_REGNUM
+   && (GET_MODE(operands[0]) == CC_FPU_LEmode
+       || GET_MODE(operands[0]) == CC_FPU_GEmode
+       || GET_MODE(operands[0]) == CC_FPU_LTmode
+       || GET_MODE(operands[0]) == CC_FPU_GTmode
+       || GET_MODE(operands[0]) == CC_FPU_EQmode
+       || GET_MODE(operands[0]) == CC_FPU_NEmode)"
+  "trfsr"
+  [(set_attr "length" "4")
+   (set_attr "cc" "set_z")
+   (set_attr "type" "fpu")])
+
+;;
+;; Floating-point conditional moves for the v850e2v3.
+;;
+
+;; The actual v850e2v3 conditional move instructions
+;;
+(define_insn "movsfcc_z_insn"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(if_then_else:SF
+	 (match_operand 3 "v850_float_z_comparison_operator" "")
+	 (match_operand:SF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:SF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_V850E2V3"
+  "cmovf.s 0,%z1,%z2,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movsfcc_nz_insn"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(if_then_else:SF
+	 (match_operand 3 "v850_float_nz_comparison_operator" "")
+	 (match_operand:SF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:SF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_V850E2V3"
+  "cmovf.s 0,%z2,%z1,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movdfcc_z_insn"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_z_comparison_operator" "")
+	 (match_operand:DF 1 "even_reg_operand" "r")
+	 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmovf.d 0,%z1,%z2,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movdfcc_nz_insn"
+  [(set (match_operand:DF 0 "even_reg_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_nz_comparison_operator" "")
+	 (match_operand:DF 1 "even_reg_operand" "r")
+	 (match_operand:DF 2 "even_reg_operand" "r")))]
+  "TARGET_V850E2V3"
+  "cmovf.d 0,%z2,%z1,%0"
+  [(set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movedfcc_z_zero"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_z_comparison_operator" "")
+	 (match_operand:DF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:DF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_V850E2V3"
+  "cmovf.s 0,%z1,%z2,%0 ; cmovf.s 0,%Z1,%Z2,%R0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+(define_insn "movedfcc_nz_zero"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(if_then_else:DF
+	 (match_operand 3 "v850_float_nz_comparison_operator" "")
+	 (match_operand:DF 1 "reg_or_0_operand" "rIG")
+	 (match_operand:DF 2 "reg_or_0_operand" "rIG")))]
+  "TARGET_V850E2V3"
+  "cmovf.s 0,%z2,%z1,%0 ; cmovf.s 0,%Z2,%Z1,%R0"
+  [(set_attr "length" "8")
+   (set_attr "cc" "clobber")]) ;; ??? or none_0hit
+
+
+;; ----------------------------------------------------------------------
+;; HELPER INSTRUCTIONS for saving the prologue and epilogue registers
+;; ----------------------------------------------------------------------
+
+;; This pattern will match a stack adjust RTX followed by any number of push
+;; RTXs.  These RTXs will then be turned into a suitable call to a worker
+;; function.
+
+;;
+;; Actually, convert the RTXs into a PREPARE instruction.
+;;
+
+(define_insn ""
+ [(match_parallel 0 "pattern_is_ok_for_prepare"
+   [(set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (mem:SI (plus:SI (reg:SI 3)
+			  (match_operand:SI 2 "immediate_operand" "i")))
+	 (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))])]
+ "TARGET_PROLOG_FUNCTION && (TARGET_V850E || TARGET_V850E2_ALL)"
+ "* return construct_prepare_instruction (operands[0]);
+ "
+ [(set_attr "length" "4")
+  (set_attr "cc"     "clobber")])
+
+(define_insn ""
+ [(match_parallel 0 "pattern_is_ok_for_prologue"
+   [(set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (mem:SI (plus:SI (reg:SI 3)
+			   (match_operand:SI 2 "immediate_operand" "i")))
+	 (match_operand:SI 3 "register_is_ok_for_epilogue" "r"))])]
+ "TARGET_PROLOG_FUNCTION"
+ "* return construct_save_jarl (operands[0]);
+ "
+ [(set (attr "length") (if_then_else (eq_attr "long_calls" "yes")
+				     (const_string "16")
+				     (const_string "4")))
+  (set_attr "cc"     "clobber")])
+
+;;
+;; Actually, turn the RTXs into a DISPOSE instruction.
+;;
+(define_insn ""
+ [(match_parallel 0 "pattern_is_ok_for_dispose"
+   [(return)
+    (set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (match_operand:SI 2 "register_is_ok_for_epilogue" "=r")
+	 (mem:SI (plus:SI (reg:SI 3)
+			  (match_operand:SI 3 "immediate_operand" "i"))))])]
+ "TARGET_PROLOG_FUNCTION && (TARGET_V850E || TARGET_V850E2_ALL)"
+ "* return construct_dispose_instruction (operands[0]);
+ "
+ [(set_attr "length" "4")
+  (set_attr "cc"     "clobber")])
+
+;; This pattern will match a return RTX followed by any number of pop RTXs
+;; and possible a stack adjustment as well.  These RTXs will be turned into
+;; a suitable call to a worker function.
+
+(define_insn ""
+[(match_parallel 0 "pattern_is_ok_for_epilogue"
+   [(return)
+    (set (reg:SI 3)
+	 (plus:SI (reg:SI 3) (match_operand:SI 1 "immediate_operand" "i")))
+    (set (match_operand:SI 2 "register_is_ok_for_epilogue" "=r")
+	 (mem:SI (plus:SI (reg:SI 3)
+			  (match_operand:SI 3 "immediate_operand" "i"))))])]
+ "TARGET_PROLOG_FUNCTION"
+ "* return construct_restore_jr (operands[0]);
+ "
+ [(set (attr "length") (if_then_else (eq_attr "long_calls" "yes")
+				     (const_string "12")
+				     (const_string "4")))
+  (set_attr "cc"     "clobber")])
+
+;; Initialize an interrupt function.  Do not depend on TARGET_PROLOG_FUNCTION.
+(define_insn "callt_save_interrupt"
+  [(unspec_volatile [(const_int 0)] 2)]
+    "(TARGET_V850E || TARGET_V850E2_ALL) && !TARGET_DISABLE_CALLT"
+    ;; The CALLT instruction stores the next address of CALLT to CTPC register
+    ;; without saving its previous value.  So if the interrupt handler
+    ;; or its caller could possibly execute the CALLT insn, save_interrupt 
+    ;; MUST NOT be called via CALLT.
+    "*
+{
+  output_asm_insn (\"addi -28,   sp, sp\", operands);
+  output_asm_insn (\"st.w r1,    24[sp]\", operands);
+  output_asm_insn (\"st.w r10,   12[sp]\", operands);
+  output_asm_insn (\"st.w r11,   16[sp]\", operands);
+  output_asm_insn (\"stsr ctpc,  r10\",    operands);
+  output_asm_insn (\"st.w r10,   20[sp]\", operands);
+  output_asm_insn (\"stsr ctpsw, r10\",    operands);
+  output_asm_insn (\"st.w r10,   24[sp]\", operands);
+  output_asm_insn (\"callt ctoff(__callt_save_interrupt)\", operands);
+  return \"\";
+}"
+   [(set_attr "length" "26")
+    (set_attr "cc" "clobber")])
+
+(define_insn "callt_return_interrupt"
+  [(unspec_volatile [(const_int 0)] 3)]
+  "(TARGET_V850E || TARGET_V850E2_ALL) && !TARGET_DISABLE_CALLT"
+  "callt ctoff(__callt_return_interrupt)"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+(define_insn "save_interrupt"
+  [(set (reg:SI 3) (plus:SI (reg:SI 3) (const_int -20)))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int -20))) (reg:SI 30))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int -16))) (reg:SI 4))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int -12))) (reg:SI 1))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int  -8))) (reg:SI 10))
+   (set (mem:SI (plus:SI (reg:SI 3) (const_int  -4))) (reg:SI 11))]
+  ""
+  "*
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return \"addi -20,sp,sp \; st.w r11,16[sp] \; st.w r10,12[sp] \; jarl __save_interrupt,r10\";
+  else
+    {
+      output_asm_insn (\"addi  -20, sp, sp\", operands);
+      output_asm_insn (\"st.w  r11, 16[sp]\", operands);
+      output_asm_insn (\"st.w  r10, 12[sp]\", operands);
+      output_asm_insn (\"st.w  ep, 0[sp]\", operands);
+      output_asm_insn (\"st.w  gp, 4[sp]\", operands);
+      output_asm_insn (\"st.w  r1, 8[sp]\", operands);
+      output_asm_insn (\"movhi hi(__ep), r0, ep\", operands);
+      output_asm_insn (\"movea lo(__ep), ep, ep\", operands);
+      output_asm_insn (\"movhi hi(__gp), r0, gp\", operands);
+      output_asm_insn (\"movea lo(__gp), gp, gp\", operands);
+      return \"\";
+    }
+}"
+  [(set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") (const_int 0))
+                       (const_int 10)
+                       (const_int 34)))
+   (set_attr "cc" "clobber")])
+  
+;; Restore r1, r4, r10, and return from the interrupt
+(define_insn "return_interrupt"
+  [(return)
+   (set (reg:SI 3)  (plus:SI (reg:SI 3) (const_int 20)))
+   (set (reg:SI 11) (mem:SI (plus:SI (reg:SI 3) (const_int 16))))
+   (set (reg:SI 10) (mem:SI (plus:SI (reg:SI 3) (const_int 12))))
+   (set (reg:SI 1)  (mem:SI (plus:SI (reg:SI 3) (const_int  8))))
+   (set (reg:SI 4)  (mem:SI (plus:SI (reg:SI 3) (const_int  4))))
+   (set (reg:SI 30) (mem:SI (reg:SI 3)))]
+  ""
+  "*
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return \"jr __return_interrupt\";
+  else 
+    {
+      output_asm_insn (\"ld.w 0[sp],  ep\",   operands);
+      output_asm_insn (\"ld.w 4[sp],  gp\",   operands);
+      output_asm_insn (\"ld.w 8[sp],  r1\",   operands);
+      output_asm_insn (\"ld.w 12[sp], r10\", operands);
+      output_asm_insn (\"ld.w 16[sp], r11\", operands);
+      output_asm_insn (\"addi 20, sp, sp\",   operands);
+      output_asm_insn (\"reti\",            operands);
+      return \"\";
+    }
+}"
+  [(set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") (const_int 0))
+                       (const_int 4)
+                       (const_int 24)))
+   (set_attr "cc" "clobber")])
+
+;; Save all registers except for the registers saved in save_interrupt when
+;; an interrupt function makes a call.
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+;; This is needed because the rest of the compiler is not ready to handle
+;; insns this complicated.
+
+(define_insn "callt_save_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 0)]
+  "(TARGET_V850E || TARGET_V850E2_ALL) && !TARGET_DISABLE_CALLT"
+  "callt ctoff(__callt_save_all_interrupt)"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "save_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 0)]
+  ""
+  "*
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return \"jarl __save_all_interrupt,r10\";
+
+  output_asm_insn (\"addi -120, sp, sp\", operands);
+
+  if (TARGET_EP)
+    {
+      output_asm_insn (\"mov ep, r1\", operands);
+      output_asm_insn (\"mov sp, ep\", operands);
+      output_asm_insn (\"sst.w r31, 116[ep]\", operands);
+      output_asm_insn (\"sst.w r2,  112[ep]\", operands);
+      output_asm_insn (\"sst.w gp,  108[ep]\", operands);
+      output_asm_insn (\"sst.w r6,  104[ep]\", operands);
+      output_asm_insn (\"sst.w r7,  100[ep]\", operands);
+      output_asm_insn (\"sst.w r8,   96[ep]\", operands);
+      output_asm_insn (\"sst.w r9,   92[ep]\", operands);
+      output_asm_insn (\"sst.w r11,  88[ep]\", operands);
+      output_asm_insn (\"sst.w r12,  84[ep]\", operands);
+      output_asm_insn (\"sst.w r13,  80[ep]\", operands);
+      output_asm_insn (\"sst.w r14,  76[ep]\", operands);
+      output_asm_insn (\"sst.w r15,  72[ep]\", operands);
+      output_asm_insn (\"sst.w r16,  68[ep]\", operands);
+      output_asm_insn (\"sst.w r17,  64[ep]\", operands);
+      output_asm_insn (\"sst.w r18,  60[ep]\", operands);
+      output_asm_insn (\"sst.w r19,  56[ep]\", operands);
+      output_asm_insn (\"sst.w r20,  52[ep]\", operands);
+      output_asm_insn (\"sst.w r21,  48[ep]\", operands);
+      output_asm_insn (\"sst.w r22,  44[ep]\", operands);
+      output_asm_insn (\"sst.w r23,  40[ep]\", operands);
+      output_asm_insn (\"sst.w r24,  36[ep]\", operands);
+      output_asm_insn (\"sst.w r25,  32[ep]\", operands);
+      output_asm_insn (\"sst.w r26,  28[ep]\", operands);
+      output_asm_insn (\"sst.w r27,  24[ep]\", operands);
+      output_asm_insn (\"sst.w r28,  20[ep]\", operands);
+      output_asm_insn (\"sst.w r29,  16[ep]\", operands);
+      output_asm_insn (\"mov   r1,   ep\", operands);
+    }
+  else
+    {
+      output_asm_insn (\"st.w r31, 116[sp]\", operands);
+      output_asm_insn (\"st.w r2,  112[sp]\", operands);
+      output_asm_insn (\"st.w gp,  108[sp]\", operands);
+      output_asm_insn (\"st.w r6,  104[sp]\", operands);
+      output_asm_insn (\"st.w r7,  100[sp]\", operands);
+      output_asm_insn (\"st.w r8,   96[sp]\", operands);
+      output_asm_insn (\"st.w r9,   92[sp]\", operands);
+      output_asm_insn (\"st.w r11,  88[sp]\", operands);
+      output_asm_insn (\"st.w r12,  84[sp]\", operands);
+      output_asm_insn (\"st.w r13,  80[sp]\", operands);
+      output_asm_insn (\"st.w r14,  76[sp]\", operands);
+      output_asm_insn (\"st.w r15,  72[sp]\", operands);
+      output_asm_insn (\"st.w r16,  68[sp]\", operands);
+      output_asm_insn (\"st.w r17,  64[sp]\", operands);
+      output_asm_insn (\"st.w r18,  60[sp]\", operands);
+      output_asm_insn (\"st.w r19,  56[sp]\", operands);
+      output_asm_insn (\"st.w r20,  52[sp]\", operands);
+      output_asm_insn (\"st.w r21,  48[sp]\", operands);
+      output_asm_insn (\"st.w r22,  44[sp]\", operands);
+      output_asm_insn (\"st.w r23,  40[sp]\", operands);
+      output_asm_insn (\"st.w r24,  36[sp]\", operands);
+      output_asm_insn (\"st.w r25,  32[sp]\", operands);
+      output_asm_insn (\"st.w r26,  28[sp]\", operands);
+      output_asm_insn (\"st.w r27,  24[sp]\", operands);
+      output_asm_insn (\"st.w r28,  20[sp]\", operands);
+      output_asm_insn (\"st.w r29,  16[sp]\", operands);
+    }
+    
+  return \"\";
+}"
+  [(set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") (const_int 0))
+                       (const_int 4)
+                       (const_int 62)
+	))
+   (set_attr "cc" "clobber")])
+
+(define_insn "_save_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 0)]
+  "TARGET_V850 && ! TARGET_LONG_CALLS"
+  "jarl __save_all_interrupt,r10"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+;; Restore all registers saved when an interrupt function makes a call.
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+;; This is needed because the rest of the compiler is not ready to handle
+;; insns this complicated.
+
+(define_insn "callt_restore_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 1)]
+  "(TARGET_V850E || TARGET_V850E2_ALL) && !TARGET_DISABLE_CALLT"
+  "callt ctoff(__callt_restore_all_interrupt)"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+
+(define_insn "restore_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 1)]
+  ""
+  "*
+{
+  if (TARGET_PROLOG_FUNCTION && !TARGET_LONG_CALLS)
+    return \"jarl __restore_all_interrupt,r10\";
+
+  if (TARGET_EP)
+    {
+      output_asm_insn (\"mov   ep,      r1\", operands);
+      output_asm_insn (\"mov   sp,      ep\", operands);
+      output_asm_insn (\"sld.w 116[ep], r31\", operands);
+      output_asm_insn (\"sld.w 112[ep], r2\", operands);
+      output_asm_insn (\"sld.w 108[ep], gp\", operands);
+      output_asm_insn (\"sld.w 104[ep], r6\", operands);
+      output_asm_insn (\"sld.w 100[ep], r7\", operands);
+      output_asm_insn (\"sld.w 96[ep],  r8\", operands);
+      output_asm_insn (\"sld.w 92[ep],  r9\", operands);
+      output_asm_insn (\"sld.w 88[ep],  r11\", operands);
+      output_asm_insn (\"sld.w 84[ep],  r12\", operands);
+      output_asm_insn (\"sld.w 80[ep],  r13\", operands);
+      output_asm_insn (\"sld.w 76[ep],  r14\", operands);
+      output_asm_insn (\"sld.w 72[ep],  r15\", operands);
+      output_asm_insn (\"sld.w 68[ep],  r16\", operands);
+      output_asm_insn (\"sld.w 64[ep],  r17\", operands);
+      output_asm_insn (\"sld.w 60[ep],  r18\", operands);
+      output_asm_insn (\"sld.w 56[ep],  r19\", operands);
+      output_asm_insn (\"sld.w 52[ep],  r20\", operands);
+      output_asm_insn (\"sld.w 48[ep],  r21\", operands);
+      output_asm_insn (\"sld.w 44[ep],  r22\", operands);
+      output_asm_insn (\"sld.w 40[ep],  r23\", operands);
+      output_asm_insn (\"sld.w 36[ep],  r24\", operands);
+      output_asm_insn (\"sld.w 32[ep],  r25\", operands);
+      output_asm_insn (\"sld.w 28[ep],  r26\", operands);
+      output_asm_insn (\"sld.w 24[ep],  r27\", operands);
+      output_asm_insn (\"sld.w 20[ep],  r28\", operands);
+      output_asm_insn (\"sld.w 16[ep],  r29\", operands);
+      output_asm_insn (\"mov   r1,      ep\", operands);
+    }
+  else
+    {
+      output_asm_insn (\"ld.w 116[sp], r31\", operands);
+      output_asm_insn (\"ld.w 112[sp], r2\", operands);
+      output_asm_insn (\"ld.w 108[sp], gp\", operands);
+      output_asm_insn (\"ld.w 104[sp], r6\", operands);
+      output_asm_insn (\"ld.w 100[sp], r7\", operands);
+      output_asm_insn (\"ld.w 96[sp],  r8\", operands);
+      output_asm_insn (\"ld.w 92[sp],  r9\", operands);
+      output_asm_insn (\"ld.w 88[sp],  r11\", operands);
+      output_asm_insn (\"ld.w 84[sp],  r12\", operands);
+      output_asm_insn (\"ld.w 80[sp],  r13\", operands);
+      output_asm_insn (\"ld.w 76[sp],  r14\", operands);
+      output_asm_insn (\"ld.w 72[sp],  r15\", operands);
+      output_asm_insn (\"ld.w 68[sp],  r16\", operands);
+      output_asm_insn (\"ld.w 64[sp],  r17\", operands);
+      output_asm_insn (\"ld.w 60[sp],  r18\", operands);
+      output_asm_insn (\"ld.w 56[sp],  r19\", operands);
+      output_asm_insn (\"ld.w 52[sp],  r20\", operands);
+      output_asm_insn (\"ld.w 48[sp],  r21\", operands);
+      output_asm_insn (\"ld.w 44[sp],  r22\", operands);
+      output_asm_insn (\"ld.w 40[sp],  r23\", operands);
+      output_asm_insn (\"ld.w 36[sp],  r24\", operands);
+      output_asm_insn (\"ld.w 32[sp],  r25\", operands);
+      output_asm_insn (\"ld.w 28[sp],  r26\", operands);
+      output_asm_insn (\"ld.w 24[sp],  r27\", operands);
+      output_asm_insn (\"ld.w 20[sp],  r28\", operands);
+      output_asm_insn (\"ld.w 16[sp],  r29\", operands);
+    }
+  output_asm_insn (\"addi  120, sp, sp\", operands);
+  return \"\";
+}"
+  [(set (attr "length")
+        (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") (const_int 0))
+                       (const_int 4)
+                       (const_int 62)
+	))
+   (set_attr "cc" "clobber")])
+
+(define_insn "_restore_all_interrupt"
+  [(unspec_volatile [(const_int 0)] 1)]
+  "TARGET_V850 && ! TARGET_LONG_CALLS"
+  "jarl __restore_all_interrupt,r10"
+  [(set_attr "length" "4")
+   (set_attr "cc" "clobber")])
+
+
diff --git a/gcc/config/v850/v850.opt b/gcc/config/v850/v850.opt
new file mode 100644
index 000000000..0ae14313f
--- /dev/null
+++ b/gcc/config/v850/v850.opt
@@ -0,0 +1,106 @@
+; Options for the NEC V850 port of the compiler.
+
+; Copyright (C) 2005, 2007, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mapp-regs
+Target Report Mask(APP_REGS)
+Use registers r2 and r5
+
+mbig-switch
+Target Report Mask(BIG_SWITCH)
+Use 4 byte entries in switch tables
+
+mdebug
+Target Report Mask(DEBUG)
+Enable backend debugging
+
+mdisable-callt
+Target Report Mask(DISABLE_CALLT)
+Do not use the callt instruction
+
+mep
+Target Report Mask(EP)
+Reuse r30 on a per function basis
+
+mghs
+Target Report Mask(GHS)
+Support Green Hills ABI
+
+mlong-calls
+Target Report Mask(LONG_CALLS)
+Prohibit PC relative function calls
+
+mprolog-function
+Target Report Mask(PROLOG_FUNCTION)
+Use stubs for function prologues
+
+msda
+Target RejectNegative Joined
+Set the max size of data eligible for the SDA area
+
+msmall-sld
+Target Report Mask(SMALL_SLD)
+Enable the use of the short load instructions
+
+mspace
+Target RejectNegative
+Same as: -mep -mprolog-function
+
+mtda
+Target RejectNegative Joined
+Set the max size of data eligible for the TDA area
+
+mno-strict-align
+Target Report Mask(NO_STRICT_ALIGN)
+Do not enforce strict alignment
+
+mjump-tables-in-data-section
+Target Report Mask(JUMP_TABLES_IN_DATA_SECTION)
+Put jump tables for switch statements into the .data section rather than the .code section
+
+mUS-bit-set
+Target Report Mask(US_BIT_SET)
+
+mv850
+Target Report RejectNegative Mask(V850)
+Compile for the v850 processor
+
+mv850e
+Target Report RejectNegative Mask(V850E)
+Compile for the v850e processor
+
+mv850e1
+Target RejectNegative Mask(V850E1)
+Compile for the v850e1 processor
+
+mv850es
+Target RejectNegative Mask(V850E1) MaskExists
+Compile for the v850es variant of the v850e1
+
+mv850e2
+Target Report RejectNegative Mask(V850E2)
+Compile for the v850e2 processor
+
+mv850e2v3
+Target Report RejectNegative Mask(V850E2V3)
+Compile for the v850e2v3 processor
+
+mzda
+Target RejectNegative Joined
+Set the max size of data eligible for the ZDA area
diff --git a/gcc/config/vax/builtins.md b/gcc/config/vax/builtins.md
new file mode 100644
index 000000000..389526b38
--- /dev/null
+++ b/gcc/config/vax/builtins.md
@@ -0,0 +1,192 @@
+;; builtin definitions for DEC VAX.
+;; Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [
+    (VUNSPEC_LOCK 100)		; sync lock and test
+    (VUNSPEC_UNLOCK 101)	; sync lock release
+  ]
+)
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(ffs:SI (match_operand:SI 1 "general_operand" "")))]
+  ""
+  "
+{
+  rtx label = gen_label_rtx ();
+  emit_insn (gen_ffssi2_internal (operands[0], operands[1]));
+  emit_jump_insn (gen_bne (label));
+  emit_insn (gen_negsi2 (operands[0], const1_rtx));
+  emit_label (label);
+  emit_insn (gen_addsi3 (operands[0], operands[0], const1_rtx));
+  DONE;
+}")
+
+(define_insn "ffssi2_internal"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rQ")
+	(ffs:SI (match_operand:SI 1 "general_operand" "nrmT")))
+   (set (cc0) (match_dup 0))]
+  ""
+  "ffs $0,$32,%1,%0")
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=&g")
+	(unspec:VAXint [(match_operand:VAXint 1 "memory_operand" "+m")
+		    (match_operand:VAXint 2 "const_int_operand" "n")
+		   ] VUNSPEC_LOCK))]
+  ""
+  "
+{
+  rtx label;
+
+  if (operands[2] != const1_rtx)
+    FAIL;
+
+  label = gen_label_rtx ();
+  emit_move_insn (operands[0], const1_rtx);
+  emit_jump_insn (gen_jbbssi<mode> (operands[1], const0_rtx, label, operands[1]));
+  emit_move_insn (operands[0], const0_rtx);
+  emit_label (label);
+  DONE;
+}")
+
+(define_insn "jbbssiqi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (zero_extract:SI (match_operand:QI 0 "memory_operand" "g")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:QI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 1))])]
+  ""
+  "jbssi %1,%0,%l2")
+
+(define_insn "jbbssihi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (zero_extract:SI (match_operand:HI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:HI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 1))])]
+  ""
+  "jbssi %1,%0,%l2")
+
+(define_insn "jbbssisi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (ne (zero_extract:SI (match_operand:SI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:SI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 1))])]
+  ""
+  "jbssi %1,%0,%l2")
+
+
+(define_expand "sync_lock_release<mode>"
+  [(set (match_operand:VAXint 0 "memory_operand" "+m")
+	(unspec:VAXint [(match_operand:VAXint 1 "const_int_operand" "n")
+		   ] VUNSPEC_UNLOCK))]
+  ""
+  "
+{
+  rtx label;
+  if (operands[1] != const0_rtx)
+    FAIL;
+#if 1
+  label = gen_label_rtx ();
+  emit_jump_insn (gen_jbbcci<mode> (operands[0], const0_rtx, label, operands[0]));
+  emit_label (label);
+#else
+  emit_move_insn (operands[0], const0_rtx);
+#endif
+  DONE;
+}")
+
+(define_insn "jbbcciqi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (eq (zero_extract:SI (match_operand:QI 0 "memory_operand" "g")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:QI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 0))])]
+  ""
+  "jbcci %1,%0,%l2")
+
+(define_insn "jbbccihi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (eq (zero_extract:SI (match_operand:HI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:HI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 0))])]
+  ""
+  "jbcci %1,%0,%l2")
+
+(define_insn "jbbccisi"
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	    (eq (zero_extract:SI (match_operand:SI 0 "memory_operand" "Q")
+				 (const_int 1)
+				 (match_operand:SI 1 "general_operand" "nrm"))
+		(const_int 0))
+	    (label_ref (match_operand 2 "" ""))
+	    (pc)))
+     (set (zero_extract:SI (match_operand:SI 3 "memory_operand" "+0")
+			   (const_int 1)
+			   (match_dup 1))
+	  (const_int 0))])]
+  ""
+  "jbcci %1,%0,%l2")
+
diff --git a/gcc/config/vax/constraints.md b/gcc/config/vax/constraints.md
new file mode 100644
index 000000000..e3266f676
--- /dev/null
+++ b/gcc/config/vax/constraints.md
@@ -0,0 +1,117 @@
+;; Constraints for the DEC VAX port.
+;; Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
+
+(define_constraint "Z0"
+   "Match a CONST_INT of 0"
+   (and (match_code "const_int")
+	(match_test "ival == 0")))
+
+(define_constraint "U06"
+   "unsigned 6 bit value (0..63)"
+   (and (match_code "const_int")
+	(match_test "0 <= ival && ival < 64")))
+
+(define_constraint "U08"
+   "Unsigned 8 bit value"
+   (and (match_code "const_int")
+	(match_test "0 <= ival && ival < 256")))
+
+(define_constraint "U16"
+   "Unsigned 16 bit value"
+   (and (match_code "const_int")
+	(match_test "0 <= ival && ival < 65536")))
+
+(define_constraint "CN6"
+   "negative 6 bit value (-63..-1)"
+   (and (match_code "const_int")
+	(match_test "-63 <= ival && ival < 0")))
+
+(define_constraint "S08"
+   "signed 8 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "-128 <= ival && ival < 128")))
+
+(define_constraint "S16"
+   "signed 16 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "-32768 <= ival && ival < 32768")))
+
+(define_constraint "I"
+   "Match a CONST_INT of 0 [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_Z0 (GEN_INT (ival))")))
+
+(define_constraint "J"
+   "unsigned 6 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_U06 (GEN_INT (ival))")))
+
+(define_constraint "K"
+   "signed 8 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_S08 (GEN_INT (ival))")))
+
+(define_constraint "L"
+   "signed 16 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_S16 (GEN_INT (ival))")))
+
+(define_constraint "M"
+   "Unsigned 8 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_U08 (GEN_INT (ival))")))
+
+(define_constraint "N"
+   "Unsigned 16 bit value [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_U16 (GEN_INT (ival))")))
+
+(define_constraint "O"
+   "Negative short literals (-63..-1) [old]"
+   (and (match_code "const_int")
+	(match_test "satisfies_constraint_CN6 (GEN_INT (ival))")))
+
+/* Similar, but for floating constants, and defining letters G and H.  */
+
+(define_constraint "G"
+  "Match a floating-point zero"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (DFmode) || op == CONST0_RTX (SFmode)")))
+
+/* Optional extra constraints for this machine. */
+
+(define_memory_constraint "Q"
+   "operand is a MEM that does not have a mode-dependent address."
+   (and (match_code "mem")
+	(match_test "!mode_dependent_address_p (XEXP (op, 0))")))
+
+(define_memory_constraint "B"
+    ""
+    (and (match_operand:BLK 0 "memory_operand")
+	 (not (match_operand:BLK 0 "illegal_blk_memory_operand" ""))))
+
+(define_memory_constraint "R"
+    ""
+    (and (match_operand:DI 0 "memory_operand")
+	 (not (match_operand:DI 0 "illegal_addsub_di_memory_operand" ""))))
+
+(define_constraint "T"
+    "@internal satisfies CONSTANT_P and, if pic is enabled, is not a SYMBOL_REF, LABEL_REF, or CONST."
+   (ior (not (match_code "const,symbol_ref,label_ref"))
+	(match_test "!flag_pic")))
diff --git a/gcc/config/vax/elf.h b/gcc/config/vax/elf.h
new file mode 100644
index 000000000..8e5f4afe6
--- /dev/null
+++ b/gcc/config/vax/elf.h
@@ -0,0 +1,111 @@
+/* Target definitions for GNU compiler for VAX using ELF
+   Copyright (C) 2002, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+   Contributed by Matt Thomas <matt@3am-software.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef TARGET_ELF
+#define TARGET_ELF 1
+
+#undef REGISTER_PREFIX
+#undef REGISTER_NAMES
+#define REGISTER_PREFIX "%"
+#define REGISTER_NAMES \
+  { "%r0", "%r1",  "%r2",  "%r3", "%r4", "%r5", "%r6", "%r7", \
+    "%r8", "%r9", "%r10", "%r11", "%ap", "%fp", "%sp", "%pc", }
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+/* Profiling routine.  */
+#undef VAX_FUNCTION_PROFILER_NAME
+#define VAX_FUNCTION_PROFILER_NAME "__mcount"
+
+/*  Let's be re-entrant.  */
+#undef PCC_STATIC_STRUCT_RETURN
+
+/* Before the prologue, the top of the frame is below the argument
+   count pushed by the CALLS and before the start of the saved registers.  */
+#define INCOMING_FRAME_SP_OFFSET 0
+
+/* Offset from the frame pointer register value to the top of the stack.  */
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* We use R2-R5 (call-clobbered) registers for exceptions.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 2 : INVALID_REGNUM)
+
+/* Place the top of the stack for the DWARF2 EH stackadj value.  */
+#define EH_RETURN_STACKADJ_RTX						\
+  gen_rtx_MEM (SImode,							\
+	       plus_constant (gen_rtx_REG (Pmode, FRAME_POINTER_REGNUM),\
+			      -4))
+
+/* Simple store the return handler into the call frame.  */
+#define EH_RETURN_HANDLER_RTX						\
+  gen_rtx_MEM (Pmode,							\
+	       plus_constant (gen_rtx_REG (Pmode, FRAME_POINTER_REGNUM),\
+			      16))
+
+
+/* Reserve the top of the stack for exception handler stackadj value.  */
+#undef STARTING_FRAME_OFFSET
+#define STARTING_FRAME_OFFSET -4
+
+/* The VAX wants no space between the case instruction and the jump table.  */
+#undef  ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS			\
+  do							\
+    {							\
+      /* Turn off function CSE if we're doing PIC.  */	\
+      if (flag_pic)					\
+	flag_no_function_cse = 1;			\
+    }							\
+  while (0)
+
+/* Don't allow *foo which foo is non-local */
+#define NO_EXTERNAL_INDIRECT_ADDRESS
+
+#undef VAX_CC1_AND_CC1PLUS_SPEC
+#define VAX_CC1_AND_CC1PLUS_SPEC \
+  "%{!fno-pic: \
+     %{!fpic: \
+       %{!fPIC:-fPIC}}}"
+
+/* VAX ELF is always gas; override the generic VAX ASM_SPEC.  */
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{!fno-pic: %{!mno-asm-pic:-k}}"
+
+/*  We want PCREL dwarf output.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)	\
+  ((GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)	\
+  do {							\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);		\
+    fprintf (FILE, "%%pcrel%d(", SIZE * 8);		\
+    assemble_name (FILE, LABEL);			\
+    fputc (')', FILE);					\
+  } while (0)
diff --git a/gcc/config/vax/elf.opt b/gcc/config/vax/elf.opt
new file mode 100644
index 000000000..6296f7567
--- /dev/null
+++ b/gcc/config/vax/elf.opt
@@ -0,0 +1,30 @@
+; VAX ELF options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+mno-asm-pic
+Target RejectNegative
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/vax/lib1funcs.asm b/gcc/config/vax/lib1funcs.asm
new file mode 100644
index 000000000..1d57b56da
--- /dev/null
+++ b/gcc/config/vax/lib1funcs.asm
@@ -0,0 +1,92 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   This file is part of GCC.
+   Contributed by Maciej W. Rozycki <macro@linux-mips.org>.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef L_udivsi3
+	.text
+	.globl	__udivsi3
+	.type	__udivsi3, @function
+__udivsi3:
+	.word	0
+	movl	8(%ap), %r1
+	blss	0f			/* Check bit #31 of divisor.  */
+	movl	4(%ap), %r2
+	blss	1f			/* Check bit #31 of dividend.  */
+
+	/* Both zero, do a standard division.  */
+
+	divl3	%r1, %r2, %r0
+	ret
+
+	/* MSB of divisor set, only 1 or 0 may result.  */
+0:
+	decl	%r1
+	clrl	%r0
+	cmpl	%r1, 4(%ap)
+	adwc	$0, %r0
+	ret
+
+	/* MSB of dividend set, do an extended division.  */
+1:
+	clrl	%r3
+	ediv	%r1, %r2, %r0, %r3
+	ret
+	.size	__udivsi3, . - __udivsi3
+	.previous
+#endif
+
+#ifdef L_umodsi3
+	.text
+	.globl	__umodsi3
+	.type	__umodsi3, @function
+__umodsi3:
+	.word	0
+	movl	8(%ap), %r1
+	blss	0f			/* Check bit #31 of divisor.  */
+	movl	4(%ap), %r2
+	blss	1f			/* Check bit #31 of dividend.  */
+
+	/* Both zero, do a standard division.  */
+
+	divl3	%r1, %r2, %r0
+	mull2	%r0, %r1
+	subl3	%r1, %r2, %r0
+	ret
+
+	/* MSB of divisor set, subtract the divisor at most once.  */
+0:
+	movl	4(%ap), %r2
+	clrl	%r0
+	cmpl	%r2, %r1
+	sbwc	$0, %r0
+	bicl2	%r0, %r1
+	subl3	%r1, %r2, %r0
+	ret
+
+	/* MSB of dividend set, do an extended division.  */
+1:
+	clrl	%r3
+	ediv	%r1, %r2, %r3, %r0
+	ret
+	.size	__umodsi3, . - __umodsi3
+	.previous
+#endif
diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h
new file mode 100644
index 000000000..93ce7bb3a
--- /dev/null
+++ b/gcc/config/vax/linux.h
@@ -0,0 +1,54 @@
+/* Definitions for VAX running Linux-based GNU systems with ELF format.
+   Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (VAX GNU/Linux with ELF)");
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+/* We use GAS, G-float double and want new DI patterns.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_QMATH | MASK_G_FLOAT)
+
+/* Use standard names for udiv and umod libgcc calls.  */
+#undef TARGET_BSD_DIVMOD
+#define TARGET_BSD_DIVMOD 0
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{fpic|fPIC:-k}"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%(endian_spec) \
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker /lib/ld.so.1} \
+    %{static:-static}}"
+
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
diff --git a/gcc/config/vax/netbsd-elf.h b/gcc/config/vax/netbsd-elf.h
new file mode 100644
index 000000000..4e64a238f
--- /dev/null
+++ b/gcc/config/vax/netbsd-elf.h
@@ -0,0 +1,68 @@
+/* Definitions of target machine for GNU compiler,
+   for NetBSD/vax ELF systems.
+   Copyright (C) 2002, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Names to predefine in the preprocessor for this target OS.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_ELF();		\
+    }						\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#ifndef NETBSD_CC1_AND_CC1PLUS_SPEC
+#define NETBSD_CC1_AND_CC1PLUS_SPEC ""
+#endif
+
+#undef CC1_SPEC
+#define CC1_SPEC NETBSD_CC1_AND_CC1PLUS_SPEC VAX_CC1_AND_CC1PLUS_SPEC
+
+#undef CC1PLUS_SPEC
+#define CC1PLUS_SPEC NETBSD_CC1_AND_CC1PLUS_SPEC VAX_CC1_AND_CC1PLUS_SPEC
+
+#define NETBSD_ENTRY_POINT "__start"
+
+#undef LINK_SPEC
+#if 0
+/* FIXME: We must link all executables statically until PIC support
+   is added to the compiler.  */
+#define LINK_SPEC \
+  "%{assert*} %{R*} %{rpath*} \
+   %{shared:%ethe -shared option is not currently supported for VAX ELF} \
+   %{!shared: \
+     -dc -dp \
+     %{!nostdlib: \
+       %{!r: \
+	 %{!e*:-e %(netbsd_entry_point)}}} \
+     %{!static:-static} \
+     %{static:-static}}"
+#else
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+#endif
+
+#define EXTRA_SPECS				\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+/* We use gas, not the UNIX assembler.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT MASK_QMATH
diff --git a/gcc/config/vax/netbsd.h b/gcc/config/vax/netbsd.h
new file mode 100644
index 000000000..51c32f734
--- /dev/null
+++ b/gcc/config/vax/netbsd.h
@@ -0,0 +1,47 @@
+/* Definitions of target machine for GNU compiler.
+   NetBSD/vax a.out version.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2007
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      NETBSD_OS_CPP_BUILTINS_AOUT();		\
+    }						\
+  while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Until they use ELF or something that handles dwarf2 unwinds
+   and initialization stuff better.  Use sjlj exceptions.  */
+#undef DWARF2_UNWIND_INFO
+
+/* We use gas, not the UNIX assembler.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT 0
diff --git a/gcc/config/vax/openbsd.h b/gcc/config/vax/openbsd.h
new file mode 100644
index 000000000..cdff4ca82
--- /dev/null
+++ b/gcc/config/vax/openbsd.h
@@ -0,0 +1,48 @@
+/* Configuration fragment for a VAX OpenBSD target.
+   Copyright (C) 2000, 2002, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Amend common OpenBSD definitions for VAX target.  */
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__unix__");		\
+      builtin_define ("__OpenBSD__");		\
+      builtin_assert ("system=unix");		\
+      builtin_assert ("system=OpenBSD");	\
+    }						\
+  while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/ansi.h>  */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
diff --git a/gcc/config/vax/openbsd1.h b/gcc/config/vax/openbsd1.h
new file mode 100644
index 000000000..84b90e550
--- /dev/null
+++ b/gcc/config/vax/openbsd1.h
@@ -0,0 +1,22 @@
+/* Configuration fragment for a VAX OpenBSD target.
+   Copyright (C) 2000, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Set up definitions before picking up the common openbsd.h file.  */
+#define OBSD_OLD_GAS
+#define OBSD_NO_DYNAMIC_LIBRARIES
diff --git a/gcc/config/vax/predicates.md b/gcc/config/vax/predicates.md
new file mode 100644
index 000000000..775ddcddf
--- /dev/null
+++ b/gcc/config/vax/predicates.md
@@ -0,0 +1,111 @@
+;; Predicate definitions for DEC VAX.
+;; Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Special case of a symbolic operand that's used as a
+;; operand.
+
+(define_predicate "symbolic_operand"
+  (match_code "const,symbol_ref,label_ref"))
+
+(define_predicate "local_symbolic_operand"
+  (match_code "const,symbol_ref,label_ref")
+{
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+  if (GET_CODE (op) == SYMBOL_REF)
+    return !flag_pic || SYMBOL_REF_LOCAL_P (op);
+  if (GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF)
+    return 1;
+  return !flag_pic || SYMBOL_REF_LOCAL_P (XEXP (XEXP (op, 0), 0));
+})
+
+(define_predicate "external_symbolic_operand"
+  (and (match_code "symbol_ref")
+       (not (match_operand 0 "local_symbolic_operand" ""))))
+
+(define_predicate "external_const_operand"
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
+		    && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (op, 0), 0))")))
+
+(define_predicate "nonsymbolic_operand"
+  (and (ior (match_test "!flag_pic")
+	    (not (match_operand 0 "symbolic_operand")))
+       (match_operand 0 "general_operand" "")))
+
+(define_predicate "external_memory_operand"
+   (match_code "mem")
+{
+  rtx addr = XEXP (op, 0);
+  if (MEM_P (addr))
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS)
+    addr = XEXP (addr, 1);
+  if (MEM_P (addr))
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS)
+    addr = XEXP (addr, 1);
+  return external_symbolic_operand (addr, SImode)
+	 || external_const_operand (addr, SImode);
+})
+
+(define_predicate "indirect_memory_operand"
+   (match_code "mem")
+{
+  op = XEXP (op, 0);
+  if (MEM_P (op))
+    return 1;
+  if (GET_CODE (op) == PLUS)
+    op = XEXP (op, 1);
+  return MEM_P (op);
+})
+
+(define_predicate "indexed_memory_operand"
+   (match_code "mem")
+{
+  op = XEXP (op, 0);
+  return GET_CODE (op) != PRE_DEC && GET_CODE (op) != POST_INC
+	 && mode_dependent_address_p (op);
+})
+
+(define_predicate "illegal_blk_memory_operand"
+   (and (match_code "mem")
+	(ior (and (match_test "flag_pic")
+		  (match_operand 0 "external_memory_operand" ""))
+	     (ior (match_operand 0 "indexed_memory_operand" "")
+		  (ior (match_operand 0 "indirect_memory_operand" "")
+		       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC"))))))
+
+(define_predicate "illegal_addsub_di_memory_operand"
+   (and (match_code "mem")
+	(ior (and (match_test "flag_pic")
+		  (match_operand 0 "external_memory_operand" ""))
+	     (ior (match_operand 0 "indexed_memory_operand" "")
+		  (ior (match_operand 0 "indirect_memory_operand" "")
+		       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC"))))))
+
+(define_predicate "nonimmediate_addsub_di_operand"
+   (and (match_code "subreg,reg,mem")
+	(and (match_operand:DI 0 "nonimmediate_operand" "")
+	     (not (match_operand:DI 0 "illegal_addsub_di_memory_operand")))))
+
+(define_predicate "general_addsub_di_operand"
+   (and (match_code "const_int,const_double,subreg,reg,mem")
+	(and (match_operand:DI 0 "general_operand" "")
+	     (not (match_operand:DI 0 "illegal_addsub_di_memory_operand")))))
diff --git a/gcc/config/vax/t-linux b/gcc/config/vax/t-linux
new file mode 100644
index 000000000..9af1edb0f
--- /dev/null
+++ b/gcc/config/vax/t-linux
@@ -0,0 +1,2 @@
+LIB1ASMSRC = vax/lib1funcs.asm
+LIB1ASMFUNCS = _udivsi3 _umodsi3
diff --git a/gcc/config/vax/vax-modes.def b/gcc/config/vax/vax-modes.def
new file mode 100644
index 000000000..31b56f7f0
--- /dev/null
+++ b/gcc/config/vax/vax-modes.def
@@ -0,0 +1,22 @@
+/* VAX extra machine modes.
+   Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* We just need to reset the floating point formats.  */
+RESET_FLOAT_FORMAT (SF, vax_f_format);
+RESET_FLOAT_FORMAT (DF, vax_d_format);
diff --git a/gcc/config/vax/vax-protos.h b/gcc/config/vax/vax-protos.h
new file mode 100644
index 000000000..6861260f3
--- /dev/null
+++ b/gcc/config/vax/vax-protos.h
@@ -0,0 +1,41 @@
+/* Definitions of target machine for GNU compiler.  VAX version.
+   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2007, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern bool legitimate_constant_address_p (rtx);
+extern bool legitimate_constant_p (rtx);
+extern bool vax_mode_dependent_address_p (rtx);
+
+#ifdef RTX_CODE
+extern const char *cond_name (rtx);
+extern bool adjacent_operands_p (rtx, rtx, enum machine_mode);
+extern const char *rev_cond_name (rtx);
+extern void print_operand_address (FILE *, rtx);
+extern void print_operand (FILE *, rtx, int);
+extern void vax_notice_update_cc (rtx, rtx);
+extern void vax_expand_addsub_di_operands (rtx *, enum rtx_code);
+extern const char * vax_output_int_move (rtx, rtx *, enum machine_mode);
+extern const char * vax_output_int_add (rtx, rtx *, enum machine_mode);
+extern const char * vax_output_int_subtract (rtx, rtx *, enum machine_mode);
+extern const char * vax_output_movmemsi (rtx, rtx *);
+#endif /* RTX_CODE */
+
+#ifdef REAL_VALUE_TYPE
+extern int check_float_value (enum machine_mode, REAL_VALUE_TYPE *, int);
+#endif /* REAL_VALUE_TYPE */
diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
new file mode 100644
index 000000000..9656d7365
--- /dev/null
+++ b/gcc/config/vax/vax.c
@@ -0,0 +1,2140 @@
+/* Subroutines for insn-output.c for VAX.
+   Copyright (C) 1987, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002,
+   2004, 2005, 2006, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "df.h"
+#include "tree.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "function.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "flags.h"
+#include "debug.h"
+#include "diagnostic-core.h"
+#include "tm-preds.h"
+#include "tm-constrs.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+
+static void vax_option_override (void);
+static bool vax_legitimate_address_p (enum machine_mode, rtx, bool);
+static void vax_output_function_prologue (FILE *, HOST_WIDE_INT);
+static void vax_file_start (void);
+static void vax_init_libfuncs (void);
+static void vax_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				 HOST_WIDE_INT, tree);
+static int vax_address_cost_1 (rtx);
+static int vax_address_cost (rtx, bool);
+static bool vax_rtx_costs (rtx, int, int, int *, bool);
+static rtx vax_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+			     const_tree, bool);
+static void vax_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+				      const_tree, bool);
+static rtx vax_struct_value_rtx (tree, int);
+static rtx vax_builtin_setjmp_frame_value (void);
+static void vax_asm_trampoline_template (FILE *);
+static void vax_trampoline_init (rtx, tree, rtx);
+static int vax_return_pops_args (tree, tree, int);
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE vax_output_function_prologue
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START vax_file_start
+#undef TARGET_ASM_FILE_START_APP_OFF
+#define TARGET_ASM_FILE_START_APP_OFF true
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS vax_init_libfuncs
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK vax_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS vax_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST vax_address_cost
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG vax_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE vax_function_arg_advance
+
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX vax_struct_value_rtx
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE vax_builtin_setjmp_frame_value
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P vax_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED hook_bool_void_true
+
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE vax_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT vax_trampoline_init
+#undef TARGET_RETURN_POPS_ARGS
+#define TARGET_RETURN_POPS_ARGS vax_return_pops_args
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE vax_option_override
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* Set global variables as needed for the options enabled.  */
+
+static void
+vax_option_override (void)
+{
+  /* We're VAX floating point, not IEEE floating point.  */
+  if (TARGET_G_FLOAT)
+    REAL_MODE_FORMAT (DFmode) = &vax_g_format;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+}
+
+/* Generate the assembly code for function entry.  FILE is a stdio
+   stream to output the code to.  SIZE is an int: how many units of
+   temporary storage to allocate.
+
+   Refer to the array `regs_ever_live' to determine which registers to
+   save; `regs_ever_live[I]' is nonzero if register number I is ever
+   used in the function.  This function is responsible for knowing
+   which registers should not be saved even if used.  */
+
+static void
+vax_output_function_prologue (FILE * file, HOST_WIDE_INT size)
+{
+  int regno;
+  int mask = 0;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+      mask |= 1 << regno;
+
+  fprintf (file, "\t.word 0x%x\n", mask);
+
+  if (dwarf2out_do_frame ())
+    {
+      const char *label = dwarf2out_cfi_label (false);
+      int offset = 0;
+
+      for (regno = FIRST_PSEUDO_REGISTER-1; regno >= 0; --regno)
+	if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
+	  dwarf2out_reg_save (label, regno, offset -= 4);
+
+      dwarf2out_reg_save (label, PC_REGNUM, offset -= 4);
+      dwarf2out_reg_save (label, FRAME_POINTER_REGNUM, offset -= 4);
+      dwarf2out_reg_save (label, ARG_POINTER_REGNUM, offset -= 4);
+      dwarf2out_def_cfa (label, FRAME_POINTER_REGNUM, -(offset - 4));
+    }
+
+  size -= STARTING_FRAME_OFFSET;
+  if (size >= 64)
+    asm_fprintf (file, "\tmovab %wd(%Rsp),%Rsp\n", -size);
+  else if (size)
+    asm_fprintf (file, "\tsubl2 $%wd,%Rsp\n", size);
+}
+
+/* When debugging with stabs, we want to output an extra dummy label
+   so that gas can distinguish between D_float and G_float prior to
+   processing the .stabs directive identifying type double.  */
+static void
+vax_file_start (void)
+{
+  default_file_start ();
+
+  if (write_symbols == DBX_DEBUG)
+    fprintf (asm_out_file, "___vax_%c_doubles:\n", ASM_DOUBLE_CHAR);
+}
+
+/* We can use the BSD C library routines for the libgcc calls that are
+   still generated, since that's what they boil down to anyways.  When
+   ELF, avoid the user's namespace.  */
+
+static void
+vax_init_libfuncs (void)
+{
+  if (TARGET_BSD_DIVMOD)
+    {
+      set_optab_libfunc (udiv_optab, SImode, TARGET_ELF ? "*__udiv" : "*udiv");
+      set_optab_libfunc (umod_optab, SImode, TARGET_ELF ? "*__urem" : "*urem");
+    }
+}
+
+/* This is like nonimmediate_operand with a restriction on the type of MEM.  */
+
+static void
+split_quadword_operands (rtx insn, enum rtx_code code, rtx * operands,
+			 rtx * low, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    low[i] = 0;
+
+  for (i = 0; i < n; i++)
+    {
+      if (MEM_P (operands[i])
+	  && (GET_CODE (XEXP (operands[i], 0)) == PRE_DEC
+	      || GET_CODE (XEXP (operands[i], 0)) == POST_INC))
+	{
+	  rtx addr = XEXP (operands[i], 0);
+	  operands[i] = low[i] = gen_rtx_MEM (SImode, addr);
+	}
+      else if (optimize_size && MEM_P (operands[i])
+	       && REG_P (XEXP (operands[i], 0))
+	       && (code != MINUS || operands[1] != const0_rtx)
+	       && find_regno_note (insn, REG_DEAD,
+				   REGNO (XEXP (operands[i], 0))))
+	{
+	  low[i] = gen_rtx_MEM (SImode,
+				gen_rtx_POST_INC (Pmode,
+						  XEXP (operands[i], 0)));
+	  operands[i] = gen_rtx_MEM (SImode, XEXP (operands[i], 0));
+	}
+      else
+	{
+	  low[i] = operand_subword (operands[i], 0, 0, DImode);
+	  operands[i] = operand_subword (operands[i], 1, 0, DImode);
+	}
+    }
+}
+
+void
+print_operand_address (FILE * file, rtx addr)
+{
+  rtx orig = addr;
+  rtx reg1, breg, ireg;
+  rtx offset;
+
+ retry:
+  switch (GET_CODE (addr))
+    {
+    case MEM:
+      fprintf (file, "*");
+      addr = XEXP (addr, 0);
+      goto retry;
+
+    case REG:
+      fprintf (file, "(%s)", reg_names[REGNO (addr)]);
+      break;
+
+    case PRE_DEC:
+      fprintf (file, "-(%s)", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (file, "(%s)+", reg_names[REGNO (XEXP (addr, 0))]);
+      break;
+
+    case PLUS:
+      /* There can be either two or three things added here.  One must be a
+	 REG.  One can be either a REG or a MULT of a REG and an appropriate
+	 constant, and the third can only be a constant or a MEM.
+
+	 We get these two or three things and put the constant or MEM in
+	 OFFSET, the MULT or REG in IREG, and the REG in BREG.  If we have
+	 a register and can't tell yet if it is a base or index register,
+	 put it into REG1.  */
+
+      reg1 = 0; ireg = 0; breg = 0; offset = 0;
+
+      if (CONSTANT_ADDRESS_P (XEXP (addr, 0))
+	  || MEM_P (XEXP (addr, 0)))
+	{
+	  offset = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (CONSTANT_ADDRESS_P (XEXP (addr, 1))
+	       || MEM_P (XEXP (addr, 1)))
+	{
+	  offset = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      else if (GET_CODE (XEXP (addr, 1)) == MULT)
+	{
+	  ireg = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      else if (GET_CODE (XEXP (addr, 0)) == MULT)
+	{
+	  ireg = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else if (REG_P (XEXP (addr, 1)))
+	{
+	  reg1 = XEXP (addr, 1);
+	  addr = XEXP (addr, 0);
+	}
+      else if (REG_P (XEXP (addr, 0)))
+	{
+	  reg1 = XEXP (addr, 0);
+	  addr = XEXP (addr, 1);
+	}
+      else
+	gcc_unreachable ();
+
+      if (REG_P (addr))
+	{
+	  if (reg1)
+	    ireg = addr;
+	  else
+	    reg1 = addr;
+	}
+      else if (GET_CODE (addr) == MULT)
+	ireg = addr;
+      else
+	{
+	  gcc_assert (GET_CODE (addr) == PLUS);
+	  if (CONSTANT_ADDRESS_P (XEXP (addr, 0))
+	      || MEM_P (XEXP (addr, 0)))
+	    {
+	      if (offset)
+		{
+		  if (CONST_INT_P (offset))
+		    offset = plus_constant (XEXP (addr, 0), INTVAL (offset));
+		  else
+		    {
+		      gcc_assert (CONST_INT_P (XEXP (addr, 0)));
+		      offset = plus_constant (offset, INTVAL (XEXP (addr, 0)));
+		    }
+		}
+	      offset = XEXP (addr, 0);
+	    }
+	  else if (REG_P (XEXP (addr, 0)))
+	    {
+	      if (reg1)
+		ireg = reg1, breg = XEXP (addr, 0), reg1 = 0;
+	      else
+		reg1 = XEXP (addr, 0);
+	    }
+	  else
+	    {
+	      gcc_assert (GET_CODE (XEXP (addr, 0)) == MULT);
+	      gcc_assert (!ireg);
+	      ireg = XEXP (addr, 0);
+	    }
+
+	  if (CONSTANT_ADDRESS_P (XEXP (addr, 1))
+	      || MEM_P (XEXP (addr, 1)))
+	    {
+	      if (offset)
+		{
+		  if (CONST_INT_P (offset))
+		    offset = plus_constant (XEXP (addr, 1), INTVAL (offset));
+		  else
+		    {
+		      gcc_assert (CONST_INT_P (XEXP (addr, 1)));
+		      offset = plus_constant (offset, INTVAL (XEXP (addr, 1)));
+		    }
+		}
+	      offset = XEXP (addr, 1);
+	    }
+	  else if (REG_P (XEXP (addr, 1)))
+	    {
+	      if (reg1)
+		ireg = reg1, breg = XEXP (addr, 1), reg1 = 0;
+	      else
+		reg1 = XEXP (addr, 1);
+	    }
+	  else
+	    {
+	      gcc_assert (GET_CODE (XEXP (addr, 1)) == MULT);
+	      gcc_assert (!ireg);
+	      ireg = XEXP (addr, 1);
+	    }
+	}
+
+      /* If REG1 is nonzero, figure out if it is a base or index register.  */
+      if (reg1)
+	{
+	  if (breg
+	      || (flag_pic && GET_CODE (addr) == SYMBOL_REF)
+	      || (offset
+		  && (MEM_P (offset)
+		      || (flag_pic && symbolic_operand (offset, SImode)))))
+	    {
+	      gcc_assert (!ireg);
+	      ireg = reg1;
+	    }
+	  else
+	    breg = reg1;
+	}
+
+      if (offset != 0)
+	{
+	  if (flag_pic && symbolic_operand (offset, SImode))
+	    {
+	      if (breg && ireg)
+		{
+		  debug_rtx (orig);
+		  output_operand_lossage ("symbol used with both base and indexed registers");
+		}
+
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+	      if (flag_pic > 1 && GET_CODE (offset) == CONST
+		  && GET_CODE (XEXP (XEXP (offset, 0), 0)) == SYMBOL_REF
+		  && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (offset, 0), 0)))
+		{
+		  debug_rtx (orig);
+		  output_operand_lossage ("symbol with offset used in PIC mode");
+		}
+#endif
+
+	      /* symbol(reg) isn't PIC, but symbol[reg] is.  */
+	      if (breg)
+		{
+		  ireg = breg;
+		  breg = 0;
+		}
+
+	    }
+
+	  output_address (offset);
+	}
+
+      if (breg != 0)
+	fprintf (file, "(%s)", reg_names[REGNO (breg)]);
+
+      if (ireg != 0)
+	{
+	  if (GET_CODE (ireg) == MULT)
+	    ireg = XEXP (ireg, 0);
+	  gcc_assert (REG_P (ireg));
+	  fprintf (file, "[%s]", reg_names[REGNO (ireg)]);
+	}
+      break;
+
+    default:
+      output_addr_const (file, addr);
+    }
+}
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+  if (code == '#')
+    fputc (ASM_DOUBLE_CHAR, file);
+  else if (code == '|')
+    fputs (REGISTER_PREFIX, file);
+  else if (code == 'c')
+    fputs (cond_name (x), file);
+  else if (code == 'C')
+    fputs (rev_cond_name (x), file);
+  else if (code == 'D' && CONST_INT_P (x) && INTVAL (x) < 0)
+    fprintf (file, "$" NEG_HWI_PRINT_HEX16, INTVAL (x));
+  else if (code == 'P' && CONST_INT_P (x))
+    fprintf (file, "$" HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + 1);
+  else if (code == 'N' && CONST_INT_P (x))
+    fprintf (file, "$" HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
+  /* rotl instruction cannot deal with negative arguments.  */
+  else if (code == 'R' && CONST_INT_P (x))
+    fprintf (file, "$" HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
+  else if (code == 'H' && CONST_INT_P (x))
+    fprintf (file, "$%d", (int) (0xffff & ~ INTVAL (x)));
+  else if (code == 'h' && CONST_INT_P (x))
+    fprintf (file, "$%d", (short) - INTVAL (x));
+  else if (code == 'B' && CONST_INT_P (x))
+    fprintf (file, "$%d", (int) (0xff & ~ INTVAL (x)));
+  else if (code == 'b' && CONST_INT_P (x))
+    fprintf (file, "$%d", (int) (0xff & - INTVAL (x)));
+  else if (code == 'M' && CONST_INT_P (x))
+    fprintf (file, "$%d", ~((1 << INTVAL (x)) - 1));
+  else if (REG_P (x))
+    fprintf (file, "%s", reg_names[REGNO (x)]);
+  else if (MEM_P (x))
+    output_address (XEXP (x, 0));
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+    {
+      char dstr[30];
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x),
+		       sizeof (dstr), 0, 1);
+      fprintf (file, "$0f%s", dstr);
+    }
+  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+    {
+      char dstr[30];
+      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x),
+		       sizeof (dstr), 0, 1);
+      fprintf (file, "$0%c%s", ASM_DOUBLE_CHAR, dstr);
+    }
+  else
+    {
+      if (flag_pic > 1 && symbolic_operand (x, SImode))
+	{
+	  debug_rtx (x);
+	  output_operand_lossage ("symbol used as immediate operand");
+	}
+      putc ('$', file);
+      output_addr_const (file, x);
+    }
+}
+
+const char *
+cond_name (rtx op)
+{
+  switch (GET_CODE (op))
+    {
+    case NE:
+      return "neq";
+    case EQ:
+      return "eql";
+    case GE:
+      return "geq";
+    case GT:
+      return "gtr";
+    case LE:
+      return "leq";
+    case LT:
+      return "lss";
+    case GEU:
+      return "gequ";
+    case GTU:
+      return "gtru";
+    case LEU:
+      return "lequ";
+    case LTU:
+      return "lssu";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+rev_cond_name (rtx op)
+{
+  switch (GET_CODE (op))
+    {
+    case EQ:
+      return "neq";
+    case NE:
+      return "eql";
+    case LT:
+      return "geq";
+    case LE:
+      return "gtr";
+    case GT:
+      return "leq";
+    case GE:
+      return "lss";
+    case LTU:
+      return "gequ";
+    case LEU:
+      return "gtru";
+    case GTU:
+      return "lequ";
+    case GEU:
+      return "lssu";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static bool
+vax_float_literal (rtx c)
+{
+  enum machine_mode mode;
+  REAL_VALUE_TYPE r, s;
+  int i;
+
+  if (GET_CODE (c) != CONST_DOUBLE)
+    return false;
+
+  mode = GET_MODE (c);
+
+  if (c == const_tiny_rtx[(int) mode][0]
+      || c == const_tiny_rtx[(int) mode][1]
+      || c == const_tiny_rtx[(int) mode][2])
+    return true;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, c);
+
+  for (i = 0; i < 7; i++)
+    {
+      int x = 1 << i;
+      bool ok;
+      REAL_VALUE_FROM_INT (s, x, 0, mode);
+
+      if (REAL_VALUES_EQUAL (r, s))
+	return true;
+      ok = exact_real_inverse (mode, &s);
+      gcc_assert (ok);
+      if (REAL_VALUES_EQUAL (r, s))
+	return true;
+    }
+  return false;
+}
+
+
+/* Return the cost in cycles of a memory address, relative to register
+   indirect.
+
+   Each of the following adds the indicated number of cycles:
+
+   1 - symbolic address
+   1 - pre-decrement
+   1 - indexing and/or offset(register)
+   2 - indirect */
+
+
+static int
+vax_address_cost_1 (rtx addr)
+{
+  int reg = 0, indexed = 0, indir = 0, offset = 0, predec = 0;
+  rtx plus_op0 = 0, plus_op1 = 0;
+ restart:
+  switch (GET_CODE (addr))
+    {
+    case PRE_DEC:
+      predec = 1;
+    case REG:
+    case SUBREG:
+    case POST_INC:
+      reg = 1;
+      break;
+    case MULT:
+      indexed = 1;	/* 2 on VAX 2 */
+      break;
+    case CONST_INT:
+      /* byte offsets cost nothing (on a VAX 2, they cost 1 cycle) */
+      if (offset == 0)
+	offset = (unsigned HOST_WIDE_INT)(INTVAL(addr)+128) > 256;
+      break;
+    case CONST:
+    case SYMBOL_REF:
+      offset = 1;	/* 2 on VAX 2 */
+      break;
+    case LABEL_REF:	/* this is probably a byte offset from the pc */
+      if (offset == 0)
+	offset = 1;
+      break;
+    case PLUS:
+      if (plus_op0)
+	plus_op1 = XEXP (addr, 0);
+      else
+	plus_op0 = XEXP (addr, 0);
+      addr = XEXP (addr, 1);
+      goto restart;
+    case MEM:
+      indir = 2;	/* 3 on VAX 2 */
+      addr = XEXP (addr, 0);
+      goto restart;
+    default:
+      break;
+    }
+
+  /* Up to 3 things can be added in an address.  They are stored in
+     plus_op0, plus_op1, and addr.  */
+
+  if (plus_op0)
+    {
+      addr = plus_op0;
+      plus_op0 = 0;
+      goto restart;
+    }
+  if (plus_op1)
+    {
+      addr = plus_op1;
+      plus_op1 = 0;
+      goto restart;
+    }
+  /* Indexing and register+offset can both be used (except on a VAX 2)
+     without increasing execution time over either one alone.  */
+  if (reg && indexed && offset)
+    return reg + indir + offset + predec;
+  return reg + indexed + indir + offset + predec;
+}
+
+static int
+vax_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+  return (1 + (REG_P (x) ? 0 : vax_address_cost_1 (x)));
+}
+
+/* Cost of an expression on a VAX.  This version has costs tuned for the
+   CVAX chip (found in the VAX 3 series) with comments for variations on
+   other models.
+
+   FIXME: The costs need review, particularly for TRUNCATE, FLOAT_EXTEND
+   and FLOAT_TRUNCATE.  We need a -mcpu option to allow provision of
+   costs on a per cpu basis.  */
+
+static bool
+vax_rtx_costs (rtx x, int code, int outer_code, int *total,
+	       bool speed ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = GET_MODE (x);
+  int i = 0;				   /* may be modified in switch */
+  const char *fmt = GET_RTX_FORMAT (code); /* may be modified in switch */
+
+  switch (code)
+    {
+      /* On a VAX, constants from 0..63 are cheap because they can use the
+	 1 byte literal constant format.  Compare to -1 should be made cheap
+	 so that decrement-and-branch insns can be formed more easily (if
+	 the value -1 is copied to a register some decrement-and-branch
+	 patterns will not match).  */
+    case CONST_INT:
+      if (INTVAL (x) == 0)
+	{
+	  *total = 0;
+	  return true;
+	}
+      if (outer_code == AND)
+	{
+	  *total = ((unsigned HOST_WIDE_INT) ~INTVAL (x) <= 077) ? 1 : 2;
+	  return true;
+	}
+      if ((unsigned HOST_WIDE_INT) INTVAL (x) <= 077
+	  || (outer_code == COMPARE
+	      && INTVAL (x) == -1)
+	  || ((outer_code == PLUS || outer_code == MINUS)
+	      && (unsigned HOST_WIDE_INT) -INTVAL (x) <= 077))
+	{
+	  *total = 1;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = 3;
+      return true;
+
+    case CONST_DOUBLE:
+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+	*total = vax_float_literal (x) ? 5 : 8;
+      else
+	*total = ((CONST_DOUBLE_HIGH (x) == 0
+		   && (unsigned HOST_WIDE_INT) CONST_DOUBLE_LOW (x) < 64)
+		  || (outer_code == PLUS
+		      && CONST_DOUBLE_HIGH (x) == -1
+		      && (unsigned HOST_WIDE_INT)-CONST_DOUBLE_LOW (x) < 64))
+		 ? 2 : 5;
+      return true;
+
+    case POST_INC:
+      *total = 2;
+      return true;		/* Implies register operand.  */
+
+    case PRE_DEC:
+      *total = 3;
+      return true;		/* Implies register operand.  */
+
+    case MULT:
+      switch (mode)
+	{
+	case DFmode:
+	  *total = 16;		/* 4 on VAX 9000 */
+	  break;
+	case SFmode:
+	  *total = 9;		/* 4 on VAX 9000, 12 on VAX 2 */
+	  break;
+	case DImode:
+	  *total = 16;		/* 6 on VAX 9000, 28 on VAX 2 */
+	  break;
+	case SImode:
+	case HImode:
+	case QImode:
+	  *total = 10;		/* 3-4 on VAX 9000, 20-28 on VAX 2 */
+	  break;
+	default:
+	  *total = MAX_COST;	/* Mode is not supported.  */
+	  return true;
+	}
+      break;
+
+    case UDIV:
+      if (mode != SImode)
+	{
+	  *total = MAX_COST;	/* Mode is not supported.  */
+	  return true;
+	}
+      *total = 17;
+      break;
+
+    case DIV:
+      if (mode == DImode)
+	*total = 30;		/* Highly variable.  */
+      else if (mode == DFmode)
+	/* divide takes 28 cycles if the result is not zero, 13 otherwise */
+	*total = 24;
+      else
+	*total = 11;		/* 25 on VAX 2 */
+      break;
+
+    case MOD:
+      *total = 23;
+      break;
+
+    case UMOD:
+      if (mode != SImode)
+	{
+	  *total = MAX_COST;	/* Mode is not supported.  */
+	  return true;
+	}
+      *total = 29;
+      break;
+
+    case FLOAT:
+      *total = (6		/* 4 on VAX 9000 */
+		+ (mode == DFmode) + (GET_MODE (XEXP (x, 0)) != SImode));
+      break;
+
+    case FIX:
+      *total = 7;		/* 17 on VAX 2 */
+      break;
+
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (mode == DImode)
+	*total = 12;
+      else
+	*total = 10;		/* 6 on VAX 9000 */
+      break;
+
+    case ROTATE:
+    case ROTATERT:
+      *total = 6;		/* 5 on VAX 2, 4 on VAX 9000 */
+      if (CONST_INT_P (XEXP (x, 1)))
+	fmt = "e"; 		/* all constant rotate counts are short */
+      break;
+
+    case PLUS:
+    case MINUS:
+      *total = (mode == DFmode) ? 13 : 8; /* 6/8 on VAX 9000, 16/15 on VAX 2 */
+      /* Small integer operands can use subl2 and addl2.  */
+      if ((CONST_INT_P (XEXP (x, 1)))
+	  && (unsigned HOST_WIDE_INT)(INTVAL (XEXP (x, 1)) + 63) < 127)
+	fmt = "e";
+      break;
+
+    case IOR:
+    case XOR:
+      *total = 3;
+      break;
+
+    case AND:
+      /* AND is special because the first operand is complemented.  */
+      *total = 3;
+      if (CONST_INT_P (XEXP (x, 0)))
+	{
+	  if ((unsigned HOST_WIDE_INT)~INTVAL (XEXP (x, 0)) > 63)
+	    *total = 4;
+	  fmt = "e";
+	  i = 1;
+	}
+      break;
+
+    case NEG:
+      if (mode == DFmode)
+	*total = 9;
+      else if (mode == SFmode)
+	*total = 6;
+      else if (mode == DImode)
+	*total = 4;
+      else
+	*total = 2;
+      break;
+
+    case NOT:
+      *total = 2;
+      break;
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      *total = 15;
+      break;
+
+    case MEM:
+      if (mode == DImode || mode == DFmode)
+	*total = 5;		/* 7 on VAX 2 */
+      else
+	*total = 3;		/* 4 on VAX 2 */
+      x = XEXP (x, 0);
+      if (!REG_P (x) && GET_CODE (x) != POST_INC)
+	*total += vax_address_cost_1 (x);
+      return true;
+
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+    case TRUNCATE:
+      *total = 3;		/* FIXME: Costs need to be checked  */
+      break;
+
+    default:
+      return false;
+    }
+
+  /* Now look inside the expression.  Operands which are not registers or
+     short constants add to the cost.
+
+     FMT and I may have been adjusted in the switch above for instructions
+     which require special handling.  */
+
+  while (*fmt++ == 'e')
+    {
+      rtx op = XEXP (x, i);
+
+      i += 1;
+      code = GET_CODE (op);
+
+      /* A NOT is likely to be found as the first operand of an AND
+	 (in which case the relevant cost is of the operand inside
+	 the not) and not likely to be found anywhere else.  */
+      if (code == NOT)
+	op = XEXP (op, 0), code = GET_CODE (op);
+
+      switch (code)
+	{
+	case CONST_INT:
+	  if ((unsigned HOST_WIDE_INT)INTVAL (op) > 63
+	      && GET_MODE (x) != QImode)
+	    *total += 1;	/* 2 on VAX 2 */
+	  break;
+	case CONST:
+	case LABEL_REF:
+	case SYMBOL_REF:
+	  *total += 1;		/* 2 on VAX 2 */
+	  break;
+	case CONST_DOUBLE:
+	  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT)
+	    {
+	      /* Registers are faster than floating point constants -- even
+		 those constants which can be encoded in a single byte.  */
+	      if (vax_float_literal (op))
+		*total += 1;
+	      else
+		*total += (GET_MODE (x) == DFmode) ? 3 : 2;
+	    }
+	  else
+	    {
+	      if (CONST_DOUBLE_HIGH (op) != 0
+		  || (unsigned HOST_WIDE_INT)CONST_DOUBLE_LOW (op) > 63)
+		*total += 2;
+	    }
+	  break;
+	case MEM:
+	  *total += 1;		/* 2 on VAX 2 */
+	  if (!REG_P (XEXP (op, 0)))
+	    *total += vax_address_cost_1 (XEXP (op, 0));
+	  break;
+	case REG:
+	case SUBREG:
+	  break;
+	default:
+	  *total += 1;
+	  break;
+	}
+    }
+  return true;
+}
+
+/* Output code to add DELTA to the first argument, and then jump to FUNCTION.
+   Used for C++ multiple inheritance.
+	.mask	^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>  #conservative entry mask
+	addl2	$DELTA, 4(ap)	#adjust first argument
+	jmp	FUNCTION+2	#jump beyond FUNCTION's entry mask
+*/
+
+static void
+vax_output_mi_thunk (FILE * file,
+		     tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
+		     tree function)
+{
+  fprintf (file, "\t.word 0x0ffc\n\taddl2 $" HOST_WIDE_INT_PRINT_DEC, delta);
+  asm_fprintf (file, ",4(%Rap)\n");
+  fprintf (file, "\tjmp ");
+  assemble_name (file,  XSTR (XEXP (DECL_RTL (function), 0), 0));
+  fprintf (file, "+2\n");
+}
+
+static rtx
+vax_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
+		      int incoming ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (Pmode, VAX_STRUCT_VALUE_REGNUM);
+}
+
+static rtx
+vax_builtin_setjmp_frame_value (void)
+{
+  return hard_frame_pointer_rtx;
+}
+
+/* Worker function for NOTICE_UPDATE_CC.  */
+
+void
+vax_notice_update_cc (rtx exp, rtx insn ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (exp) == SET)
+    {
+      if (GET_CODE (SET_SRC (exp)) == CALL)
+	CC_STATUS_INIT;
+      else if (GET_CODE (SET_DEST (exp)) != ZERO_EXTRACT
+	       && GET_CODE (SET_DEST (exp)) != PC)
+	{
+	  cc_status.flags = 0;
+	  /* The integer operations below don't set carry or
+	     set it in an incompatible way.  That's ok though
+	     as the Z bit is all we need when doing unsigned
+	     comparisons on the result of these insns (since
+	     they're always with 0).  Set CC_NO_OVERFLOW to
+	     generate the correct unsigned branches.  */
+	  switch (GET_CODE (SET_SRC (exp)))
+	    {
+	    case NEG:
+	      if (GET_MODE_CLASS (GET_MODE (exp)) == MODE_FLOAT)
+		break;
+	    case AND:
+	    case IOR:
+	    case XOR:
+	    case NOT:
+	    case MEM:
+	    case REG:
+	      cc_status.flags = CC_NO_OVERFLOW;
+	      break;
+	    default:
+	      break;
+	    }
+	  cc_status.value1 = SET_DEST (exp);
+	  cc_status.value2 = SET_SRC (exp);
+	}
+    }
+  else if (GET_CODE (exp) == PARALLEL
+	   && GET_CODE (XVECEXP (exp, 0, 0)) == SET)
+    {
+      if (GET_CODE (SET_SRC (XVECEXP (exp, 0, 0))) == CALL)
+	CC_STATUS_INIT;
+      else if (GET_CODE (SET_DEST (XVECEXP (exp, 0, 0))) != PC)
+	{
+	  cc_status.flags = 0;
+	  cc_status.value1 = SET_DEST (XVECEXP (exp, 0, 0));
+	  cc_status.value2 = SET_SRC (XVECEXP (exp, 0, 0));
+	}
+      else
+	/* PARALLELs whose first element sets the PC are aob,
+	   sob insns.  They do change the cc's.  */
+	CC_STATUS_INIT;
+    }
+  else
+    CC_STATUS_INIT;
+  if (cc_status.value1 && REG_P (cc_status.value1)
+      && cc_status.value2
+      && reg_overlap_mentioned_p (cc_status.value1, cc_status.value2))
+    cc_status.value2 = 0;
+  if (cc_status.value1 && MEM_P (cc_status.value1)
+      && cc_status.value2
+      && MEM_P (cc_status.value2))
+    cc_status.value2 = 0;
+  /* Actual condition, one line up, should be that value2's address
+     depends on value1, but that is too much of a pain.  */
+}
+
+/* Output integer move instructions.  */
+
+const char *
+vax_output_int_move (rtx insn ATTRIBUTE_UNUSED, rtx *operands,
+		     enum machine_mode mode)
+{
+  rtx hi[3], lo[3];
+  const char *pattern_hi, *pattern_lo;
+
+  switch (mode)
+    {
+    case DImode:
+      if (operands[1] == const0_rtx)
+	return "clrq %0";
+      if (TARGET_QMATH && optimize_size
+	  && (CONST_INT_P (operands[1])
+	      || GET_CODE (operands[1]) == CONST_DOUBLE))
+	{
+	  unsigned HOST_WIDE_INT hval, lval;
+	  int n;
+
+	  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+	    {
+	      gcc_assert (HOST_BITS_PER_WIDE_INT != 64);
+
+	      /* Make sure only the low 32 bits are valid.  */
+	      lval = CONST_DOUBLE_LOW (operands[1]) & 0xffffffff;
+	      hval = CONST_DOUBLE_HIGH (operands[1]) & 0xffffffff;
+	    }
+	  else
+	    {
+	      lval = INTVAL (operands[1]);
+	      hval = 0;
+	    }
+
+	  /* Here we see if we are trying to see if the 64bit value is really
+	     a 6bit shifted some arbitrary amount.  If so, we can use ashq to
+	     shift it to the correct value saving 7 bytes (1 addr-mode-byte +
+	     8 bytes - 1 shift byte - 1 short literal byte.  */
+	  if (lval != 0
+	      && (n = exact_log2 (lval & (- lval))) != -1
+	      && (lval >> n) < 64)
+	    {
+	      lval >>= n;
+
+	      /* On 32bit platforms, if the 6bits didn't overflow into the
+		 upper 32bit value that value better be 0.  If we have
+		 overflowed, make sure it wasn't too much.  */
+	      if (HOST_BITS_PER_WIDE_INT == 32 && hval != 0)
+		{
+		  if (n <= 26 || hval >= ((unsigned)1 << (n - 26)))
+		    n = 0;	/* failure */
+		  else
+		    lval |= hval << (32 - n);
+		}
+	      /*  If n is 0, then ashq is not the best way to emit this.  */
+	      if (n > 0)
+		{
+		  operands[1] = GEN_INT (lval);
+		  operands[2] = GEN_INT (n);
+		  return "ashq %2,%1,%0";
+		}
+#if HOST_BITS_PER_WIDE_INT == 32
+	    }
+	  /* On 32bit platforms, if the low 32bit value is 0, checkout the
+	     upper 32bit value.  */
+	  else if (hval != 0
+		   && (n = exact_log2 (hval & (- hval)) - 1) != -1
+		   && (hval >> n) < 64)
+	    {
+	      operands[1] = GEN_INT (hval >> n);
+	      operands[2] = GEN_INT (n + 32);
+	      return "ashq %2,%1,%0";
+#endif
+	    }
+	}
+
+      if (TARGET_QMATH
+	  && (!MEM_P (operands[0])
+	      || GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
+	      || GET_CODE (XEXP (operands[0], 0)) == POST_INC
+	      || !illegal_addsub_di_memory_operand (operands[0], DImode))
+	  && ((CONST_INT_P (operands[1])
+	       && (unsigned HOST_WIDE_INT) INTVAL (operands[1]) >= 64)
+	      || GET_CODE (operands[1]) == CONST_DOUBLE))
+	{
+	  hi[0] = operands[0];
+	  hi[1] = operands[1];
+
+	  split_quadword_operands (insn, SET, hi, lo, 2);
+
+	  pattern_lo = vax_output_int_move (NULL, lo, SImode);
+	  pattern_hi = vax_output_int_move (NULL, hi, SImode);
+
+	  /* The patterns are just movl/movl or pushl/pushl then a movq will
+	     be shorter (1 opcode byte + 1 addrmode byte + 8 immediate value
+	     bytes .vs. 2 opcode bytes + 2 addrmode bytes + 8 immediate value
+	     value bytes.  */
+	  if ((!strncmp (pattern_lo, "movl", 4)
+	      && !strncmp (pattern_hi, "movl", 4))
+	      || (!strncmp (pattern_lo, "pushl", 5)
+		  && !strncmp (pattern_hi, "pushl", 5)))
+	    return "movq %1,%0";
+
+	  if (MEM_P (operands[0])
+	      && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+	    {
+	      output_asm_insn (pattern_hi, hi);
+	      operands[0] = lo[0];
+	      operands[1] = lo[1];
+	      operands[2] = lo[2];
+	      return pattern_lo;
+	    }
+	  else
+	    {
+	      output_asm_insn (pattern_lo, lo);
+	      operands[0] = hi[0];
+	      operands[1] = hi[1];
+	      operands[2] = hi[2];
+	      return pattern_hi;
+	    }
+	}
+      return "movq %1,%0";
+
+    case SImode:
+      if (symbolic_operand (operands[1], SImode))
+	{
+	  if (push_operand (operands[0], SImode))
+	    return "pushab %a1";
+	  return "movab %a1,%0";
+	}
+
+      if (operands[1] == const0_rtx)
+	{
+	  if (push_operand (operands[1], SImode))
+	    return "pushl %1";
+	  return "clrl %0";
+	}
+
+      if (CONST_INT_P (operands[1])
+	  && (unsigned HOST_WIDE_INT) INTVAL (operands[1]) >= 64)
+	{
+	  HOST_WIDE_INT i = INTVAL (operands[1]);
+	  int n;
+	  if ((unsigned HOST_WIDE_INT)(~i) < 64)
+	    return "mcoml %N1,%0";
+	  if ((unsigned HOST_WIDE_INT)i < 0x100)
+	    return "movzbl %1,%0";
+	  if (i >= -0x80 && i < 0)
+	    return "cvtbl %1,%0";
+	  if (optimize_size
+	      && (n = exact_log2 (i & (-i))) != -1
+	      && ((unsigned HOST_WIDE_INT)i >> n) < 64)
+	    {
+	      operands[1] = GEN_INT ((unsigned HOST_WIDE_INT)i >> n);
+	      operands[2] = GEN_INT (n);
+	      return "ashl %2,%1,%0";
+	    }
+	  if ((unsigned HOST_WIDE_INT)i < 0x10000)
+	    return "movzwl %1,%0";
+	  if (i >= -0x8000 && i < 0)
+	    return "cvtwl %1,%0";
+	}
+      if (push_operand (operands[0], SImode))
+	return "pushl %1";
+      return "movl %1,%0";
+
+    case HImode:
+      if (CONST_INT_P (operands[1]))
+	{
+	  HOST_WIDE_INT i = INTVAL (operands[1]);
+	  if (i == 0)
+	    return "clrw %0";
+	  else if ((unsigned HOST_WIDE_INT)i < 64)
+	    return "movw %1,%0";
+	  else if ((unsigned HOST_WIDE_INT)~i < 64)
+	    return "mcomw %H1,%0";
+	  else if ((unsigned HOST_WIDE_INT)i < 256)
+	    return "movzbw %1,%0";
+	  else if (i >= -0x80 && i < 0)
+	    return "cvtbw %1,%0";
+	}
+      return "movw %1,%0";
+
+    case QImode:
+      if (CONST_INT_P (operands[1]))
+	{
+	  HOST_WIDE_INT i = INTVAL (operands[1]);
+	  if (i == 0)
+	    return "clrb %0";
+	  else if ((unsigned HOST_WIDE_INT)~i < 64)
+	    return "mcomb %B1,%0";
+	}
+      return "movb %1,%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Output integer add instructions.
+
+   The space-time-opcode tradeoffs for addition vary by model of VAX.
+
+   On a VAX 3 "movab (r1)[r2],r3" is faster than "addl3 r1,r2,r3",
+   but it not faster on other models.
+
+   "movab #(r1),r2" is usually shorter than "addl3 #,r1,r2", and is
+   faster on a VAX 3, but some VAXen (e.g. VAX 9000) will stall if
+   a register is used in an address too soon after it is set.
+   Compromise by using movab only when it is shorter than the add
+   or the base register in the address is one of sp, ap, and fp,
+   which are not modified very often.  */
+
+const char *
+vax_output_int_add (rtx insn, rtx *operands, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case DImode:
+      {
+	rtx low[3];
+	const char *pattern;
+	int carry = 1;
+	bool sub;
+
+	if (TARGET_QMATH && 0)
+	  debug_rtx (insn);
+
+	split_quadword_operands (insn, PLUS, operands, low, 3);
+
+	if (TARGET_QMATH)
+	  {
+	    gcc_assert (rtx_equal_p (operands[0], operands[1]));
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESSS
+	    gcc_assert (!flag_pic || !external_memory_operand (low[2], SImode));
+	    gcc_assert (!flag_pic || !external_memory_operand (low[0], SImode));
+#endif
+
+	    /* No reason to add a 0 to the low part and thus no carry, so just
+	       emit the appropriate add/sub instruction.  */
+	    if (low[2] == const0_rtx)
+	      return vax_output_int_add (NULL, operands, SImode);
+
+	    /* Are we doing addition or subtraction?  */
+	    sub = CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0;
+
+	    /* We can't use vax_output_int_add since some the patterns don't
+	       modify the carry bit.  */
+	    if (sub)
+	      {
+		if (low[2] == constm1_rtx)
+		  pattern = "decl %0";
+		else
+		  pattern = "subl2 $%n2,%0";
+	      }
+	    else
+	      {
+		if (low[2] == const1_rtx)
+		  pattern = "incl %0";
+		else
+		  pattern = "addl2 %2,%0";
+	      }
+	    output_asm_insn (pattern, low);
+
+	    /* In 2's complement, -n = ~n + 1.  Since we are dealing with
+	       two 32bit parts, we complement each and then add one to
+	       low part.  We know that the low part can't overflow since
+	       it's value can never be 0.  */
+	    if (sub)
+		return "sbwc %N2,%0";
+	    return "adwc %2,%0";
+	  }
+
+	/* Add low parts.  */
+	if (rtx_equal_p (operands[0], operands[1]))
+	  {
+	    if (low[2] == const0_rtx)
+	/* Should examine operand, punt if not POST_INC.  */
+	      pattern = "tstl %0", carry = 0;
+	    else if (low[2] == const1_rtx)
+	      pattern = "incl %0";
+	    else
+	      pattern = "addl2 %2,%0";
+	  }
+	else
+	  {
+	    if (low[2] == const0_rtx)
+	      pattern = "movl %1,%0", carry = 0;
+	    else
+	      pattern = "addl3 %2,%1,%0";
+	  }
+	if (pattern)
+	  output_asm_insn (pattern, low);
+	if (!carry)
+	  /* If CARRY is 0, we don't have any carry value to worry about.  */
+	  return get_insn_template (CODE_FOR_addsi3, insn);
+	/* %0 = C + %1 + %2 */
+	if (!rtx_equal_p (operands[0], operands[1]))
+	  output_asm_insn ((operands[1] == const0_rtx
+			    ? "clrl %0"
+			    : "movl %1,%0"), operands);
+	return "adwc %2,%0";
+      }
+
+    case SImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	{
+	  if (operands[2] == const1_rtx)
+	    return "incl %0";
+	  if (operands[2] == constm1_rtx)
+	    return "decl %0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	    return "subl2 $%n2,%0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 64
+	      && REG_P (operands[1])
+	      && ((INTVAL (operands[2]) < 32767 && INTVAL (operands[2]) > -32768)
+		   || REGNO (operands[1]) > 11))
+	    return "movab %c2(%1),%0";
+	  if (REG_P (operands[0]) && symbolic_operand (operands[2], SImode))
+	    return "movab %a2[%0],%0";
+	  return "addl2 %2,%0";
+	}
+
+      if (rtx_equal_p (operands[0], operands[2]))
+	{
+	  if (REG_P (operands[0]) && symbolic_operand (operands[1], SImode))
+	    return "movab %a1[%0],%0";
+	  return "addl2 %1,%0";
+	}
+
+      if (CONST_INT_P (operands[2])
+	  && INTVAL (operands[2]) < 32767
+	  && INTVAL (operands[2]) > -32768
+	  && REG_P (operands[1])
+	  && push_operand (operands[0], SImode))
+	return "pushab %c2(%1)";
+
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	return "subl3 $%n2,%1,%0";
+
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) >= 64
+	  && REG_P (operands[1])
+	  && ((INTVAL (operands[2]) < 32767 && INTVAL (operands[2]) > -32768)
+	       || REGNO (operands[1]) > 11))
+	return "movab %c2(%1),%0";
+
+      /* Add this if using gcc on a VAX 3xxx:
+      if (REG_P (operands[1]) && REG_P (operands[2]))
+	return "movab (%1)[%2],%0";
+      */
+
+      if (REG_P (operands[1]) && symbolic_operand (operands[2], SImode))
+	{
+	  if (push_operand (operands[0], SImode))
+	    return "pushab %a2[%1]";
+	  return "movab %a2[%1],%0";
+	}
+
+      if (REG_P (operands[2]) && symbolic_operand (operands[1], SImode))
+	{
+	  if (push_operand (operands[0], SImode))
+	    return "pushab %a1[%2]";
+	  return "movab %a1[%2],%0";
+	}
+
+      if (flag_pic && REG_P (operands[0])
+	  && symbolic_operand (operands[2], SImode))
+	return "movab %a2,%0;addl2 %1,%0";
+
+      if (flag_pic
+	  && (symbolic_operand (operands[1], SImode)
+	      || symbolic_operand (operands[1], SImode)))
+	debug_rtx (insn);
+
+      return "addl3 %1,%2,%0";
+
+    case HImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	{
+	  if (operands[2] == const1_rtx)
+	    return "incw %0";
+	  if (operands[2] == constm1_rtx)
+	    return "decw %0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	    return "subw2 $%n2,%0";
+	  return "addw2 %2,%0";
+	}
+      if (rtx_equal_p (operands[0], operands[2]))
+	return "addw2 %1,%0";
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	return "subw3 $%n2,%1,%0";
+      return "addw3 %1,%2,%0";
+
+    case QImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	{
+	  if (operands[2] == const1_rtx)
+	    return "incb %0";
+	  if (operands[2] == constm1_rtx)
+	    return "decb %0";
+	  if (CONST_INT_P (operands[2])
+	      && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	    return "subb2 $%n2,%0";
+	  return "addb2 %2,%0";
+	}
+      if (rtx_equal_p (operands[0], operands[2]))
+	return "addb2 %1,%0";
+      if (CONST_INT_P (operands[2])
+	  && (unsigned HOST_WIDE_INT) (- INTVAL (operands[2])) < 64)
+	return "subb3 $%n2,%1,%0";
+      return "addb3 %1,%2,%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+const char *
+vax_output_int_subtract (rtx insn, rtx *operands, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case DImode:
+      {
+	rtx low[3];
+	const char *pattern;
+	int carry = 1;
+
+	if (TARGET_QMATH && 0)
+	  debug_rtx (insn);
+
+	split_quadword_operands (insn, MINUS, operands, low, 3);
+
+	if (TARGET_QMATH)
+	  {
+	    if (operands[1] == const0_rtx && low[1] == const0_rtx)
+	      {
+		/* Negation is tricky.  It's basically complement and increment.
+		   Negate hi, then lo, and subtract the carry back.  */
+		if ((MEM_P (low[0]) && GET_CODE (XEXP (low[0], 0)) == POST_INC)
+		    || (MEM_P (operands[0])
+			&& GET_CODE (XEXP (operands[0], 0)) == POST_INC))
+		  fatal_insn ("illegal operand detected", insn);
+		output_asm_insn ("mnegl %2,%0", operands);
+		output_asm_insn ("mnegl %2,%0", low);
+		return "sbwc $0,%0";
+	      }
+	    gcc_assert (rtx_equal_p (operands[0], operands[1]));
+	    gcc_assert (rtx_equal_p (low[0], low[1]));
+	    if (low[2] == const1_rtx)
+	      output_asm_insn ("decl %0", low);
+	    else
+	      output_asm_insn ("subl2 %2,%0", low);
+	    return "sbwc %2,%0";
+	  }
+
+	/* Subtract low parts.  */
+	if (rtx_equal_p (operands[0], operands[1]))
+	  {
+	    if (low[2] == const0_rtx)
+	      pattern = 0, carry = 0;
+	    else if (low[2] == constm1_rtx)
+	      pattern = "decl %0";
+	    else
+	      pattern = "subl2 %2,%0";
+	  }
+	else
+	  {
+	    if (low[2] == constm1_rtx)
+	      pattern = "decl %0";
+	    else if (low[2] == const0_rtx)
+	      pattern = get_insn_template (CODE_FOR_movsi, insn), carry = 0;
+	    else
+	      pattern = "subl3 %2,%1,%0";
+	  }
+	if (pattern)
+	  output_asm_insn (pattern, low);
+	if (carry)
+	  {
+	    if (!rtx_equal_p (operands[0], operands[1]))
+	      return "movl %1,%0;sbwc %2,%0";
+	    return "sbwc %2,%0";
+	    /* %0 = %2 - %1 - C */
+	  }
+	return get_insn_template (CODE_FOR_subsi3, insn);
+      }
+
+    default:
+      gcc_unreachable ();
+  }
+}
+
+/* True if X is an rtx for a constant that is a valid address.  */
+
+bool
+legitimate_constant_address_p (rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
+	  || CONST_INT_P (x) || GET_CODE (x) == HIGH)
+    return true;
+  if (GET_CODE (x) != CONST)
+    return false;
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+  if (flag_pic
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (x, 0), 0)))
+    return false;
+#endif
+   return true;
+}
+
+/* True if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+bool
+legitimate_constant_p (rtx x ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* The other macros defined here are used only in legitimate_address_p ().  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or, if not strict, if it is a pseudo reg.  */
+#define	INDEX_REGISTER_P(X, STRICT) \
+(REG_P (X) && (!(STRICT) || REGNO_OK_FOR_INDEX_P (REGNO (X))))
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or, if not strict, if it is a pseudo reg.  */
+#define	BASE_REGISTER_P(X, STRICT) \
+(REG_P (X) && (!(STRICT) || REGNO_OK_FOR_BASE_P (REGNO (X))))
+
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+
+/* Re-definition of CONSTANT_ADDRESS_P, which is true only when there
+   are no SYMBOL_REFs for external symbols present.  */
+
+static bool
+indirectable_constant_address_p (rtx x, bool indirect)
+{
+  if (GET_CODE (x) == SYMBOL_REF)
+    return !flag_pic || SYMBOL_REF_LOCAL_P (x) || !indirect;
+
+  if (GET_CODE (x) == CONST)
+    return !flag_pic
+	   || GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+	   || SYMBOL_REF_LOCAL_P (XEXP (XEXP (x, 0), 0));
+
+  return CONSTANT_ADDRESS_P (x);
+}
+
+#else /* not NO_EXTERNAL_INDIRECT_ADDRESS */
+
+static bool
+indirectable_constant_address_p (rtx x, bool indirect ATTRIBUTE_UNUSED)
+{
+  return CONSTANT_ADDRESS_P (x);
+}
+
+#endif /* not NO_EXTERNAL_INDIRECT_ADDRESS */
+
+/* True if X is an address which can be indirected.  External symbols
+   could be in a sharable image library, so we disallow those.  */
+
+static bool
+indirectable_address_p (rtx x, bool strict, bool indirect)
+{
+  if (indirectable_constant_address_p (x, indirect)
+      || BASE_REGISTER_P (x, strict))
+    return true;
+  if (GET_CODE (x) != PLUS
+      || !BASE_REGISTER_P (XEXP (x, 0), strict)
+      || (flag_pic && !CONST_INT_P (XEXP (x, 1))))
+    return false;
+  return indirectable_constant_address_p (XEXP (x, 1), indirect);
+}
+
+/* Return true if x is a valid address not using indexing.
+   (This much is the easy part.)  */
+static bool
+nonindexed_address_p (rtx x, bool strict)
+{
+  rtx xfoo0;
+  if (REG_P (x))
+    {
+      extern rtx *reg_equiv_mem;
+      if (! reload_in_progress
+	  || reg_equiv_mem[REGNO (x)] == 0
+	  || indirectable_address_p (reg_equiv_mem[REGNO (x)], strict, false))
+	return true;
+    }
+  if (indirectable_constant_address_p (x, false))
+    return true;
+  if (indirectable_address_p (x, strict, false))
+    return true;
+  xfoo0 = XEXP (x, 0);
+  if (MEM_P (x) && indirectable_address_p (xfoo0, strict, true))
+    return true;
+  if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)
+      && BASE_REGISTER_P (xfoo0, strict))
+    return true;
+  return false;
+}
+
+/* True if PROD is either a reg times size of mode MODE and MODE is less
+   than or equal 8 bytes, or just a reg if MODE is one byte.  */
+
+static bool
+index_term_p (rtx prod, enum machine_mode mode, bool strict)
+{
+  rtx xfoo0, xfoo1;
+
+  if (GET_MODE_SIZE (mode) == 1)
+    return BASE_REGISTER_P (prod, strict);
+
+  if (GET_CODE (prod) != MULT || GET_MODE_SIZE (mode) > 8)
+    return false;
+
+  xfoo0 = XEXP (prod, 0);
+  xfoo1 = XEXP (prod, 1);
+
+  if (CONST_INT_P (xfoo0)
+      && INTVAL (xfoo0) == (int)GET_MODE_SIZE (mode)
+      && INDEX_REGISTER_P (xfoo1, strict))
+    return true;
+
+  if (CONST_INT_P (xfoo1)
+      && INTVAL (xfoo1) == (int)GET_MODE_SIZE (mode)
+      && INDEX_REGISTER_P (xfoo0, strict))
+    return true;
+
+  return false;
+}
+
+/* Return true if X is the sum of a register
+   and a valid index term for mode MODE.  */
+static bool
+reg_plus_index_p (rtx x, enum machine_mode mode, bool strict)
+{
+  rtx xfoo0, xfoo1;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  xfoo0 = XEXP (x, 0);
+  xfoo1 = XEXP (x, 1);
+
+  if (BASE_REGISTER_P (xfoo0, strict) && index_term_p (xfoo1, mode, strict))
+    return true;
+
+  if (BASE_REGISTER_P (xfoo1, strict) && index_term_p (xfoo0, mode, strict))
+    return true;
+
+  return false;
+}
+
+/* Return true if xfoo0 and xfoo1 constitute a valid indexed address.  */
+static bool
+indexable_address_p (rtx xfoo0, rtx xfoo1, enum machine_mode mode, bool strict)
+{
+  if (!CONSTANT_ADDRESS_P (xfoo0))
+    return false;
+  if (BASE_REGISTER_P (xfoo1, strict))
+    return !flag_pic || mode == QImode;
+  if (flag_pic && symbolic_operand (xfoo0, SImode))
+    return false;
+  return reg_plus_index_p (xfoo1, mode, strict);
+}
+
+/* legitimate_address_p returns true if it recognizes an RTL expression "x"
+   that is a valid memory address for an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.  */
+bool
+vax_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  rtx xfoo0, xfoo1;
+
+  if (nonindexed_address_p (x, strict))
+    return true;
+
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  /* Handle <address>[index] represented with index-sum outermost */
+
+  xfoo0 = XEXP (x, 0);
+  xfoo1 = XEXP (x, 1);
+
+  if (index_term_p (xfoo0, mode, strict)
+      && nonindexed_address_p (xfoo1, strict))
+    return true;
+
+  if (index_term_p (xfoo1, mode, strict)
+      && nonindexed_address_p (xfoo0, strict))
+    return true;
+
+  /* Handle offset(reg)[index] with offset added outermost */
+
+  if (indexable_address_p (xfoo0, xfoo1, mode, strict)
+      || indexable_address_p (xfoo1, xfoo0, mode, strict))
+    return true;
+
+  return false;
+}
+
+/* Return true if x (a legitimate address expression) has an effect that
+   depends on the machine mode it is used for.  On the VAX, the predecrement
+   and postincrement address depend thus (the amount of decrement or
+   increment being the length of the operand) and all indexed address depend
+   thus (because the index scale factor is the length of the operand).  */
+
+bool
+vax_mode_dependent_address_p (rtx x)
+{
+  rtx xfoo0, xfoo1;
+
+  /* Auto-increment cases are now dealt with generically in recog.c.  */
+  if (GET_CODE (x) != PLUS)
+    return false;
+
+  xfoo0 = XEXP (x, 0);
+  xfoo1 = XEXP (x, 1);
+
+  if (CONST_INT_P (xfoo0) && REG_P (xfoo1))
+    return false;
+  if (CONST_INT_P (xfoo1) && REG_P (xfoo0))
+    return false;
+  if (!flag_pic && CONSTANT_ADDRESS_P (xfoo0) && REG_P (xfoo1))
+    return false;
+  if (!flag_pic && CONSTANT_ADDRESS_P (xfoo1) && REG_P (xfoo0))
+    return false;
+
+  return true;
+}
+
+static rtx
+fixup_mathdi_operand (rtx x, enum machine_mode mode)
+{
+  if (illegal_addsub_di_memory_operand (x, mode))
+    {
+      rtx addr = XEXP (x, 0);
+      rtx temp = gen_reg_rtx (Pmode);
+      rtx offset = 0;
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+      if (GET_CODE (addr) == CONST && flag_pic)
+	{
+	  offset = XEXP (XEXP (addr, 0), 1);
+	  addr = XEXP (XEXP (addr, 0), 0);
+	}
+#endif
+      emit_move_insn (temp, addr);
+      if (offset)
+	temp = gen_rtx_PLUS (Pmode, temp, offset);
+      x = gen_rtx_MEM (DImode, temp);
+    }
+  return x;
+}
+
+void
+vax_expand_addsub_di_operands (rtx * operands, enum rtx_code code)
+{
+  int hi_only = operand_subword (operands[2], 0, 0, DImode) == const0_rtx;
+  rtx temp;
+
+  rtx (*gen_old_insn)(rtx, rtx, rtx);
+  rtx (*gen_si_insn)(rtx, rtx, rtx);
+  rtx (*gen_insn)(rtx, rtx, rtx);
+
+  if (code == PLUS)
+    {
+      gen_old_insn = gen_adddi3_old;
+      gen_si_insn = gen_addsi3;
+      gen_insn = gen_adcdi3;
+    }
+  else if (code == MINUS)
+    {
+      gen_old_insn = gen_subdi3_old;
+      gen_si_insn = gen_subsi3;
+      gen_insn = gen_sbcdi3;
+    }
+  else
+    gcc_unreachable ();
+
+  /* If this is addition (thus operands are commutative) and if there is one
+     addend that duplicates the desination, we want that addend to be the
+     first addend.  */
+  if (code == PLUS
+      && rtx_equal_p (operands[0], operands[2])
+      && !rtx_equal_p (operands[1], operands[2]))
+    {
+      temp = operands[2];
+      operands[2] = operands[1];
+      operands[1] = temp;
+    }
+
+  if (!TARGET_QMATH)
+    {
+      emit_insn ((*gen_old_insn) (operands[0], operands[1], operands[2]));
+    }
+  else if (hi_only)
+    {
+      if (!rtx_equal_p (operands[0], operands[1])
+	  && (REG_P (operands[0]) && MEM_P (operands[1])))
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  operands[1] = operands[0];
+	}
+
+      operands[0] = fixup_mathdi_operand (operands[0], DImode);
+      operands[1] = fixup_mathdi_operand (operands[1], DImode);
+      operands[2] = fixup_mathdi_operand (operands[2], DImode);
+
+      if (!rtx_equal_p (operands[0], operands[1]))
+	emit_move_insn (operand_subword (operands[0], 0, 0, DImode),
+			  operand_subword (operands[1], 0, 0, DImode));
+
+      emit_insn ((*gen_si_insn) (operand_subword (operands[0], 1, 0, DImode),
+				 operand_subword (operands[1], 1, 0, DImode),
+				 operand_subword (operands[2], 1, 0, DImode)));
+    }
+  else
+    {
+      /* If are adding the same value together, that's really a multiply by 2,
+	 and that's just a left shift of 1.  */
+      if (rtx_equal_p (operands[1], operands[2]))
+	{
+	  gcc_assert (code != MINUS);
+	  emit_insn (gen_ashldi3 (operands[0], operands[1], const1_rtx));
+	  return;
+	}
+
+      operands[0] = fixup_mathdi_operand (operands[0], DImode);
+
+      /* If an operand is the same as operand[0], use the operand[0] rtx
+	 because fixup will an equivalent rtx but not an equal one. */
+
+      if (rtx_equal_p (operands[0], operands[1]))
+	operands[1] = operands[0];
+      else
+	operands[1] = fixup_mathdi_operand (operands[1], DImode);
+
+      if (rtx_equal_p (operands[0], operands[2]))
+	operands[2] = operands[0];
+      else
+	operands[2] = fixup_mathdi_operand (operands[2], DImode);
+
+      /* If we are subtracting not from ourselves [d = a - b], and because the
+	 carry ops are two operand only, we would need to do a move prior to
+	 the subtract.  And if d == b, we would need a temp otherwise
+	 [d = a, d -= d] and we end up with 0.  Instead we rewrite d = a - b
+	 into d = -b, d += a.  Since -b can never overflow, even if b == d,
+	 no temp is needed.
+
+	 If we are doing addition, since the carry ops are two operand, if
+	 we aren't adding to ourselves, move the first addend to the
+	 destination first.  */
+
+      gcc_assert (operands[1] != const0_rtx || code == MINUS);
+      if (!rtx_equal_p (operands[0], operands[1]) && operands[1] != const0_rtx)
+	{
+	  if (code == MINUS && CONSTANT_P (operands[1]))
+	    {
+	      temp = gen_reg_rtx (DImode);
+	      emit_insn (gen_sbcdi3 (operands[0], const0_rtx, operands[2]));
+	      code = PLUS;
+	      gen_insn = gen_adcdi3;
+	      operands[2] = operands[1];
+	      operands[1] = operands[0];
+	    }
+	  else
+	    emit_move_insn (operands[0], operands[1]);
+	}
+
+      /* Subtracting a constant will have been rewritten to an addition of the
+	 negative of that constant before we get here.  */
+      gcc_assert (!CONSTANT_P (operands[2]) || code == PLUS);
+      emit_insn ((*gen_insn) (operands[0], operands[1], operands[2]));
+    }
+}
+
+bool
+adjacent_operands_p (rtx lo, rtx hi, enum machine_mode mode)
+{
+  HOST_WIDE_INT lo_offset;
+  HOST_WIDE_INT hi_offset;
+
+  if (GET_CODE (lo) != GET_CODE (hi))
+    return false;
+
+  if (REG_P (lo))
+    return mode == SImode && REGNO (lo) + 1 == REGNO (hi);
+  if (CONST_INT_P (lo))
+    return INTVAL (hi) == 0 && 0 <= INTVAL (lo) && INTVAL (lo) < 64;
+  if (CONST_INT_P (lo))
+    return mode != SImode;
+
+  if (!MEM_P (lo))
+    return false;
+
+  if (MEM_VOLATILE_P (lo) || MEM_VOLATILE_P (hi))
+    return false;
+
+  lo = XEXP (lo, 0);
+  hi = XEXP (hi, 0);
+
+  if (GET_CODE (lo) == POST_INC /* || GET_CODE (lo) == PRE_DEC */)
+    return rtx_equal_p (lo, hi);
+
+  switch (GET_CODE (lo))
+    {
+    case REG:
+    case SYMBOL_REF:
+      lo_offset = 0;
+      break;
+    case CONST:
+      lo = XEXP (lo, 0);
+      /* FALLTHROUGH */
+    case PLUS:
+      if (!CONST_INT_P (XEXP (lo, 1)))
+	return false;
+      lo_offset = INTVAL (XEXP (lo, 1));
+      lo = XEXP (lo, 0);
+      break;
+    default:
+      return false;
+    }
+
+  switch (GET_CODE (hi))
+    {
+    case REG:
+    case SYMBOL_REF:
+      hi_offset = 0;
+      break;
+    case CONST:
+      hi = XEXP (hi, 0);
+      /* FALLTHROUGH */
+    case PLUS:
+      if (!CONST_INT_P (XEXP (hi, 1)))
+	return false;
+      hi_offset = INTVAL (XEXP (hi, 1));
+      hi = XEXP (hi, 0);
+      break;
+    default:
+      return false;
+    }
+
+  if (GET_CODE (lo) == MULT || GET_CODE (lo) == PLUS)
+    return false;
+
+  return rtx_equal_p (lo, hi)
+	 && hi_offset - lo_offset == GET_MODE_SIZE (mode);
+}
+
+/* Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.  */
+
+/* On the VAX, the trampoline contains an entry mask and two instructions:
+     .word NN
+     movl $STATIC,r0   (store the functions static chain)
+     jmp  *$FUNCTION   (jump to function code at address FUNCTION)  */
+
+static void
+vax_asm_trampoline_template (FILE *f ATTRIBUTE_UNUSED)
+{
+  assemble_aligned_integer (2, const0_rtx);
+  assemble_aligned_integer (2, GEN_INT (0x8fd0));
+  assemble_aligned_integer (4, const0_rtx);
+  assemble_aligned_integer (1, GEN_INT (0x50 + STATIC_CHAIN_REGNUM));
+  assemble_aligned_integer (2, GEN_INT (0x9f17));
+  assemble_aligned_integer (4, const0_rtx);
+}
+
+/* We copy the register-mask from the function's pure code
+   to the start of the trampoline.  */
+
+static void
+vax_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, HImode, 0);
+  emit_move_insn (mem, gen_const_mem (HImode, fnaddr));
+
+  mem = adjust_address (m_tramp, SImode, 4);
+  emit_move_insn (mem, cxt);
+  mem = adjust_address (m_tramp, SImode, 11);
+  emit_move_insn (mem, plus_constant (fnaddr, 2));
+  emit_insn (gen_sync_istream ());
+}
+
+/* Value is the number of bytes of arguments automatically
+   popped when returning from a subroutine call.
+   FUNDECL is the declaration node of the function (as a tree),
+   FUNTYPE is the data type of the function (as a tree),
+   or for a library call it is an identifier node for the subroutine name.
+   SIZE is the number of bytes of arguments passed on the stack.
+
+   On the VAX, the RET insn pops a maximum of 255 args for any function.  */
+
+static int
+vax_return_pops_args (tree fundecl ATTRIBUTE_UNUSED,
+		      tree funtype ATTRIBUTE_UNUSED, int size)
+{
+  return size > 255 * 4 ? 0 : size;
+}
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+
+/* On the VAX all args are pushed.  */
+
+static rtx
+vax_function_arg (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+		  enum machine_mode mode ATTRIBUTE_UNUSED,
+		  const_tree type ATTRIBUTE_UNUSED,
+		  bool named ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE and
+   data type TYPE.  (TYPE is null for libcalls where that information
+   may not be available.)  */
+
+static void
+vax_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  *cum += (mode != BLKmode
+	   ? (GET_MODE_SIZE (mode) + 3) & ~3
+	   : (int_size_in_bytes (type) + 3) & ~3);
+}
diff --git a/gcc/config/vax/vax.h b/gcc/config/vax/vax.h
new file mode 100644
index 000000000..272dfbbc3
--- /dev/null
+++ b/gcc/config/vax/vax.h
@@ -0,0 +1,751 @@
+/* Definitions of target machine for GNU compiler.  VAX version.
+   Copyright (C) 1987, 1988, 1991, 1993, 1994, 1995, 1996, 1997, 1998,
+   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__vax__");		\
+      builtin_assert ("cpu=vax");		\
+      builtin_assert ("machine=vax");		\
+      if (TARGET_G_FLOAT)			\
+	{					\
+	  builtin_define ("__GFLOAT");		\
+	  builtin_define ("__GFLOAT__");	\
+	}					\
+    }						\
+  while (0)
+
+#define VMS_TARGET 0
+
+/* Use -J option for long branch support with Unix assembler.  */
+
+#define ASM_SPEC "-J"
+
+/* Choose proper libraries depending on float format.
+   Note that there are no profiling libraries for g-format.
+   Also use -lg for the sake of dbx.  */
+
+#define LIB_SPEC "%{g:-lg}\
+ %{mg:%{lm:-lmg} -lcg \
+  %{p:%eprofiling not supported with -mg\n}\
+  %{pg:%eprofiling not supported with -mg\n}}\
+ %{!mg:%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}"
+
+/* Print subsidiary information on the compiler version in use.  */
+
+#ifndef TARGET_NAME	/* A more specific value might be supplied via -D.  */
+#define TARGET_NAME "vax"
+#endif
+#define TARGET_VERSION fprintf (stderr, " (%s)", TARGET_NAME)
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* Nonzero if ELF.  Redefined by vax/elf.h.  */
+#define TARGET_ELF 0
+
+/* Use BSD names for udiv and umod libgcc calls.  */
+#define TARGET_BSD_DIVMOD 1
+
+/* Default target_flags if no switches specified.  */
+
+#ifndef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_UNIX_ASM)
+#endif
+
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+   This is not true on the VAX.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+/* That is not true on the VAX.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+/* This is not true on the VAX.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 16
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY (TARGET_VAXC_ALIGNMENT ? 8 : 32)
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#define PCC_BITFIELD_TYPE_MATTERS (! TARGET_VAXC_ALIGNMENT)
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT 32
+
+/* No structure field wants to be aligned rounder than this.  */
+#define BIGGEST_FIELD_ALIGNMENT (TARGET_VAXC_ALIGNMENT ? 8 : 32)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 0
+
+/* Let's keep the stack somewhat aligned.  */
+#define STACK_BOUNDARY 32
+
+/* The table of an ADDR_DIFF_VEC must be contiguous with the case
+   opcode, it is part of the case instruction.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 0
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+#define FIRST_PSEUDO_REGISTER 16
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+   On the VAX, these are the AP, FP, SP and PC.  */
+#define FIXED_REGISTERS {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS {1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+   On the VAX, all registers are one word long.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
+   On the VAX, all registers can hold all modes.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE) 1
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)  1
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* VAX pc is overloaded on a register.  */
+#define PC_REGNUM VAX_PC_REGNUM
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM VAX_SP_REGNUM
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM VAX_FP_REGNUM
+
+/* Offset from the frame pointer register value to the top of stack.  */
+#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM VAX_AP_REGNUM
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM 0
+
+/* Register in which address to store a structure value
+   is passed to a function.  */
+#define VAX_STRUCT_VALUE_REGNUM 1
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+/* The VAX has only one kind of registers, so NO_REGS and ALL_REGS
+   are the only classes.  */
+
+enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES };
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Since GENERAL_REGS is the same class as ALL_REGS,
+   don't give it a different class number; just make it an alias.  */
+
+#define GENERAL_REGS ALL_REGS
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES	\
+  { "NO_REGS", "ALL_REGS" }
+
+/* The following macro defines cover classes for Integrated Register
+   Allocator.  Cover classes is a set of non-intersected register
+   classes covering all hard registers used for register allocation
+   purpose.  Any move between two registers of a cover class should be
+   cheaper than load or store of the registers.  The macro value is
+   array of register classes with LIM_REG_CLASSES used as the end
+   marker.  */
+#define IRA_COVER_CLASSES { ALL_REGS, LIM_REG_CLASSES }
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS {{0}, {0xffff}}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO) ALL_REGS
+
+/* The class value for index registers, and the one for base regs.  */
+
+#define INDEX_REG_CLASS ALL_REGS
+#define BASE_REG_CLASS ALL_REGS
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+/* On the VAX, this is always the size of MODE in words,
+   since all registers are the same size.  */
+#define CLASS_MAX_NREGS(CLASS, MODE)	\
+ ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+   first local allocated.  Otherwise, it is the offset to the BEGINNING
+   of the first local allocated.  */
+#define STARTING_FRAME_OFFSET 0
+
+/* Given an rtx for the address of a frame,
+   return an rtx for the address of the word in the frame
+   that holds the dynamic chain--the previous frame's address.  */
+#define DYNAMIC_CHAIN_ADDRESS(FRAME) plus_constant ((FRAME), 12)
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On the VAX, -(sp) pushes only the bytes of the operands.  */
+#define PUSH_ROUNDING(BYTES) (BYTES)
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL) 4
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FUNC is its FUNCTION_DECL;
+   otherwise, FUNC is 0.  */
+
+/* On the VAX the return value is in R0 regardless.  */
+
+#define FUNCTION_VALUE(VALTYPE, FUNC)	\
+  gen_rtx_REG (TYPE_MODE (VALTYPE), 0)
+
+/* Define how to find the value returned by a library function
+   assuming the value has mode MODE.  */
+
+/* On the VAX the return value is in R0 regardless.  */
+
+#define LIBCALL_VALUE(MODE)  gen_rtx_REG (MODE, 0)
+
+/* Define this if PCC uses the nonreentrant convention for returning
+   structure and union values.  */
+
+#define PCC_STATIC_STRUCT_RETURN
+
+/* 1 if N is a possible register number for a function value.
+   On the VAX, R0 is the only register thus used.  */
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0)
+
+/* 1 if N is a possible register number for function argument passing.
+   On the VAX, no registers are used in this way.  */
+
+#define FUNCTION_ARG_REGNO_P(N) 0
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On the VAX, this is a single integer, which is a number of bytes
+   of arguments scanned so far.  */
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On the VAX, the offset starts at 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+ ((CUM) = 0)
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define VAX_FUNCTION_PROFILER_NAME "mcount"
+#define FUNCTION_PROFILER(FILE, LABELNO)			\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "LP", (LABELNO));	\
+      fprintf (FILE, "\tmovab ");				\
+      assemble_name (FILE, label);				\
+      asm_fprintf (FILE, ",%Rr0\n\tjsb %s\n",			\
+		   VAX_FUNCTION_PROFILER_NAME);			\
+    }								\
+  while (0)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Store in the variable DEPTH the initial difference between the
+   frame pointer reg contents and the stack pointer reg contents,
+   as of the start of the function body.  This depends on the layout
+   of the fixed parts of the stack frame and on how registers are saved.
+
+   On the VAX, FRAME_POINTER_REQUIRED is always 1, so the definition of this
+   macro doesn't matter.  But it must be defined.  */
+
+#define INITIAL_FRAME_POINTER_OFFSET(DEPTH) (DEPTH) = 0;
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE 15
+
+/* Byte offset of return address in a stack frame.  The "saved PC" field
+   is in element [4] when treating the frame as an array of longwords.  */
+
+#define RETURN_ADDRESS_OFFSET	(4 * UNITS_PER_WORD)	/* 16 */
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, so we
+   can ignore COUNT.  */
+
+#define RETURN_ADDR_RTX(COUNT, FRAME)					\
+  ((COUNT == 0)								\
+   ? gen_rtx_MEM (Pmode, plus_constant (FRAME, RETURN_ADDRESS_OFFSET))	\
+   : (rtx) 0)
+
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define HAVE_POST_INCREMENT 1
+
+#define HAVE_PRE_DECREMENT 1
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in local-alloc.c.  */
+
+#define REGNO_OK_FOR_INDEX_P(regno)	\
+  ((regno) < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0)
+#define REGNO_OK_FOR_BASE_P(regno)	\
+  ((regno) < FIRST_PSEUDO_REGISTER || reg_renumber[regno] >= 0)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* 1 if X is an rtx for a constant that is a valid address.  */
+
+#define CONSTANT_ADDRESS_P(X) legitimate_constant_address_p (X)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+#ifndef REG_OK_STRICT
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 1
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_BASE_P(X) 1
+
+#else
+
+/* Nonzero if X is a hard reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#endif
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.  */
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \
+  { if (vax_mode_dependent_address_p (ADDR)) goto LABEL; }
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE HImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Indicate that jump tables go in the text section.  This is
+   necessary when compiling PIC code.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* This flag, if defined, says the same insns that convert to a signed fixnum
+   also convert validly to an unsigned one.  */
+#define FIXUNS_TRUNC_LIKE_FIX_TRUNC
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.  */
+#define MOVE_RATIO(speed) ((speed) ? 6 : 3)
+#define CLEAR_RATIO(speed) ((speed) ? 6 : 2)
+
+/* Nonzero if access to memory by bytes is slow and undesirable.  */
+#define SLOW_BYTE_ACCESS 0
+
+/* Define if shifts truncate the shift count
+   which implies one can omit a sign-extension or zero-extension
+   of a shift count.  */
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction
+   is a byte address (for indexing purposes)
+   so give the MEM rtx a byte's mode.  */
+#define FUNCTION_MODE QImode
+
+/* Specify the cost of a branch insn; roughly the number of extra insns that
+   should be added to avoid a branch.
+
+   Branches are extremely cheap on the VAX while the shift insns often
+   used to replace branches can be expensive.  */
+
+#define BRANCH_COST(speed_p, predictable_p) 0
+
+/* Tell final.c how to eliminate redundant test instructions.  */
+
+/* Here we define machine-dependent flags and fields in cc_status
+   (see `conditions.h').  No extra ones are needed for the VAX.  */
+
+/* Store in cc_status the expressions
+   that the condition codes will describe
+   after execution of an instruction whose pattern is EXP.
+   Do not alter them if the instruction would not alter the cc's.  */
+
+#define NOTICE_UPDATE_CC(EXP, INSN)	\
+  vax_notice_update_cc ((EXP), (INSN))
+
+#define OUTPUT_JUMP(NORMAL, FLOAT, NO_OV)	\
+  { if (cc_status.flags & CC_NO_OVERFLOW)	\
+      return NO_OV;				\
+    return NORMAL;				\
+  }
+
+/* Control the assembler format that we output.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+
+#define ASM_COMMENT_START "#"
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#define ASM_APP_ON "#APP\n"
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Output before read-only data.  */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).
+   The register names will be prefixed by REGISTER_PREFIX, if any.  */
+
+#define REGISTER_PREFIX ""
+#define REGISTER_NAMES					\
+  { "r0", "r1",  "r2",  "r3", "r4", "r5", "r6", "r7",	\
+    "r8", "r9", "r10", "r11", "ap", "fp", "sp", "pc", }
+
+/* This is BSD, so it wants DBX format.  */
+
+#define DBX_DEBUGGING_INFO 1
+
+/* Do not break .stabs pseudos into continuations.  */
+
+#define DBX_CONTIN_LENGTH 0
+
+/* This is the char to use for continuation (in case we need to turn
+   continuation back on).  */
+
+#define DBX_CONTIN_CHAR '?'
+
+/* Don't use the `xsfoo;' construct in DBX output; this system
+   doesn't support it.  */
+
+#define DBX_NO_XREFS
+
+/* Output the .stabs for a C `static' variable in the data section.  */
+#define DBX_STATIC_STAB_DATA_SECTION
+
+/* VAX specific: which type character is used for type double?  */
+
+#define ASM_DOUBLE_CHAR (TARGET_G_FLOAT ? 'g' : 'd')
+
+/* This is how to output a command to make the user-level label named NAME
+   defined for reference from other files.  */
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP ".globl "
+
+/* The prefix to add to user-visible assembler symbols.  */
+
+#define USER_LABEL_PREFIX "_"
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM)	\
+  sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM))
+
+/* This is how to output an insn to push a register on the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)  \
+  fprintf (FILE, "\tpushl %s\n", reg_names[REGNO])
+
+/* This is how to output an insn to pop a register from the stack.
+   It need not be very fast code.  */
+
+#define ASM_OUTPUT_REG_POP(FILE,REGNO)					\
+  fprintf (FILE, "\tmovl (%s)+,%s\n", reg_names[STACK_POINTER_REGNUM],	\
+	   reg_names[REGNO])
+
+/* This is how to output an element of a case-vector that is absolute.
+   (The VAX does not use such vectors,
+   but we must define this macro anyway.)  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)		\
+  do							\
+    {							\
+      char label[256];					\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));\
+      fprintf (FILE, "\t.long ");			\
+      assemble_name (FILE, label);			\
+      fprintf (FILE, "\n");				\
+    }							\
+  while (0)
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)	\
+  do								\
+    {								\
+      char label[256];						\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (VALUE));	\
+      fprintf (FILE, "\t.word ");				\
+      assemble_name (FILE, label);				\
+      ASM_GENERATE_INTERNAL_LABEL (label, "L", (REL));		\
+      fprintf (FILE, "-");					\
+      assemble_name (FILE, label);				\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)  \
+  fprintf (FILE, "\t.align %d\n", (LOG))
+
+/* This is how to output an assembler line
+   that says to advance the location counter by SIZE bytes.  */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE)  \
+  fprintf (FILE, "\t.space %u\n", (int)(SIZE))
+
+/* This says how to output an assembler line
+   to define a global common symbol.  */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".comm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* This says how to output an assembler line
+   to define a local common symbol.  */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)	\
+  ( fputs (".lcomm ", (FILE)),				\
+    assemble_name ((FILE), (NAME)),			\
+    fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* Print an instruction operand X on file FILE.
+   CODE is the code from the %-spec that requested printing this operand;
+   if `%z3' was used to print operand 3, then CODE is 'z'.
+
+VAX operand formatting codes:
+
+ letter	   print
+   c	direct branch condition
+   C	reverse branch condition
+   D	64-bit immediate operand
+   B	the low 8 bits of the complement of a constant operand
+   H	the low 16 bits of the complement of a constant operand
+   M	a mask for the N highest bits of a word
+   N	the complement of a constant integer operand
+   P	constant operand plus 1
+   R	32 - constant operand
+   b	the low 8 bits of a negated constant operand
+   h	the low 16 bits of a negated constant operand
+   #	'd' or 'g' depending on whether dfloat or gfloat is used
+   |	register prefix  */
+
+/* The purpose of D is to get around a quirk or bug in VAX assembler
+   whereby -1 in a 64-bit immediate operand means 0x00000000ffffffff,
+   which is not a 64-bit minus one.  As a workaround, we output negative
+   values in hex.  */
+#if HOST_BITS_PER_WIDE_INT == 64
+#  define NEG_HWI_PRINT_HEX16 HOST_WIDE_INT_PRINT_HEX
+#else
+#  define NEG_HWI_PRINT_HEX16 "0xffffffff%08lx"
+#endif
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)  \
+  ((CODE) == '#' || (CODE) == '|')
+
+#define PRINT_OPERAND(FILE, X, CODE)  \
+  print_operand (FILE, X, CODE)
+
+/* Print a memory operand whose address is X, on file FILE.
+   This uses a function in output-vax.c.  */
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR)  \
+  print_operand_address (FILE, ADDR)
+
+/* This is a blatent lie.  However, it's good enough, since we don't
+   actually have any code whatsoever for which this isn't overridden
+   by the proper FDE definition.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, PC_REGNUM)
+
diff --git a/gcc/config/vax/vax.md b/gcc/config/vax/vax.md
new file mode 100644
index 000000000..8c3ef0042
--- /dev/null
+++ b/gcc/config/vax/vax.md
@@ -0,0 +1,1636 @@
+;; Machine description for GNU compiler, VAX Version
+;; Copyright (C) 1987, 1988, 1991, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+;; 2002, 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;;- Instruction patterns.  When multiple patterns apply,
+;;- the first one in the file is chosen.
+;;-
+;;- See file "rtl.def" for documentation on define_insn, match_*, et al.
+;;-
+;;- cpp macro #define NOTICE_UPDATE_CC in file tm.h handles condition code
+;;- updates for most instructions.
+
+;; UNSPEC_VOLATILE usage:
+
+(define_constants
+  [(VUNSPEC_BLOCKAGE 0)	    ; `blockage' insn to prevent scheduling across an
+			    ; insn in the code.
+   (VUNSPEC_SYNC_ISTREAM 1) ; sequence of insns to sync the I-stream
+   (VAX_AP_REGNUM 12)	    ; Register 12 contains the argument pointer
+   (VAX_FP_REGNUM 13)	    ; Register 13 contains the frame pointer
+   (VAX_SP_REGNUM 14)	    ; Register 14 contains the stack pointer
+   (VAX_PC_REGNUM 15)	    ; Register 15 contains the program counter
+  ]
+)
+
+;; Integer modes supported on VAX, with a mapping from machine mode
+;; to mnemonic suffix.  DImode is always a special case.
+(define_mode_iterator VAXint [QI HI SI])
+(define_mode_iterator VAXintQH [QI HI])
+(define_mode_iterator VAXintQHSD [QI HI SI DI])
+(define_mode_attr  isfx [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Similar for float modes supported on VAX.
+(define_mode_iterator VAXfp [SF DF])
+(define_mode_attr  fsfx [(SF "f") (DF "%#")])
+
+;; Some output patterns want integer immediates with a prefix...
+(define_mode_attr  iprefx [(QI "B") (HI "H") (SI "N")])
+
+;;
+(include "constraints.md")
+(include "predicates.md")
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:VAXint 0 "nonimmediate_operand" "nrmT,nrmT")
+		 (match_operand:VAXint 1 "general_operand" "I,nrmT")))]
+  ""
+  "@
+   tst<VAXint:isfx> %0
+   cmp<VAXint:isfx> %0,%1")
+
+(define_insn "*cmp<mode>"
+  [(set (cc0)
+	(compare (match_operand:VAXfp 0 "general_operand" "gF,gF")
+		 (match_operand:VAXfp 1 "general_operand" "G,gF")))]
+  ""
+  "@
+   tst<VAXfp:fsfx> %0
+   cmp<VAXfp:fsfx> %0,%1")
+
+(define_insn "*bit<mode>"
+  [(set (cc0)
+	(compare (and:VAXint (match_operand:VAXint 0 "general_operand" "nrmT")
+			     (match_operand:VAXint 1 "general_operand" "nrmT"))
+		 (const_int 0)))]
+  ""
+  "bit<VAXint:isfx> %0,%1")
+
+;; The VAX has no sCOND insns.  It does have add/subtract with carry
+;; which could be used to implement the sltu and sgeu patterns.  However,
+;; to do this properly requires a complete rewrite of the compare insns
+;; to keep them together with the sltu/sgeu insns until after the
+;; reload pass is complete.  The previous implementation didn't do this
+;; and has been deleted.
+
+
+(define_insn "mov<mode>"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g")
+	(match_operand:VAXfp 1 "general_operand" "G,gF"))]
+  ""
+  "@
+   clr<VAXfp:fsfx> %0
+   mov<VAXfp:fsfx> %1,%0")
+
+;; Some VAXen don't support this instruction.
+;;(define_insn "movti"
+;;  [(set (match_operand:TI 0 "general_operand" "=g")
+;;	(match_operand:TI 1 "general_operand" "g"))]
+;;  ""
+;;  "movh %1,%0")
+
+(define_insn "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(match_operand:DI 1 "general_operand" "g"))]
+  ""
+  "* return vax_output_int_move (insn, operands, DImode);")
+
+;; The VAX move instructions have space-time tradeoffs.  On a MicroVAX
+;; register-register mov instructions take 3 bytes and 2 CPU cycles.  clrl
+;; takes 2 bytes and 3 cycles.  mov from constant to register takes 2 cycles
+;; if the constant is smaller than 4 bytes, 3 cycles for a longword
+;; constant.  movz, mneg, and mcom are as fast as mov, so movzwl is faster
+;; than movl for positive constants that fit in 16 bits but not 6 bits.  cvt
+;; instructions take 4 cycles.  inc takes 3 cycles.  The machine description
+;; is willing to trade 1 byte for 1 cycle (clrl instead of movl $0; cvtwl
+;; instead of movl).
+
+;; Cycle counts for other models may vary (on a VAX 750 they are similar,
+;; but on a VAX 9000 most move and add instructions with one constant
+;; operand take 1 cycle).
+
+;;  Loads of constants between 64 and 128 used to be done with
+;; "addl3 $63,#,dst" but this is slower than movzbl and takes as much space.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+  "
+{
+#ifdef NO_EXTERNAL_INDIRECT_ADDRESS
+  if (flag_pic
+      && GET_CODE (operands[1]) == CONST
+      && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF
+      && !SYMBOL_REF_LOCAL_P (XEXP (XEXP (operands[1], 0), 0)))
+    {
+      rtx symbol_ref = XEXP (XEXP (operands[1], 0), 0);
+      rtx const_int = XEXP (XEXP (operands[1], 0), 1);
+      rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+      emit_move_insn (temp, symbol_ref);
+      emit_move_insn (operands[0], gen_rtx_PLUS (SImode, temp, const_int));
+      DONE;
+    }
+#endif
+}")
+
+(define_insn "movsi_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:SI 1 "nonsymbolic_operand" "nrmT"))]
+  ""
+  "* return vax_output_int_move (insn, operands, SImode);")
+
+(define_insn "mov<mode>"
+  [(set (match_operand:VAXintQH 0 "nonimmediate_operand" "=g")
+	(match_operand:VAXintQH 1 "general_operand" "g"))]
+  ""
+  "* return vax_output_int_move (insn, operands, <MODE>mode);")
+
+(define_insn "movstricthi"
+  [(set (strict_low_part (match_operand:HI 0 "register_operand" "+g"))
+	(match_operand:HI 1 "general_operand" "g"))]
+  ""
+  "*
+{
+  if (CONST_INT_P (operands[1]))
+    {
+      int i = INTVAL (operands[1]);
+      if (i == 0)
+	return \"clrw %0\";
+      else if ((unsigned int)i < 64)
+	return \"movw %1,%0\";
+      else if ((unsigned int)~i < 64)
+	return \"mcomw %H1,%0\";
+      else if ((unsigned int)i < 256)
+	return \"movzbw %1,%0\";
+    }
+  return \"movw %1,%0\";
+}")
+
+(define_insn "movstrictqi"
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+g"))
+	(match_operand:QI 1 "general_operand" "g"))]
+  ""
+  "*
+{
+  if (CONST_INT_P (operands[1]))
+    {
+      int i = INTVAL (operands[1]);
+      if (i == 0)
+	return \"clrb %0\";
+      else if ((unsigned int)~i < 64)
+	return \"mcomb %B1,%0\";
+    }
+  return \"movb %1,%0\";
+}")
+
+;; This is here to accept 4 arguments and pass the first 3 along
+;; to the movmemhi1 pattern that really does the work.
+(define_expand "movmemhi"
+  [(set (match_operand:BLK 0 "general_operand" "=g")
+	(match_operand:BLK 1 "general_operand" "g"))
+   (use (match_operand:HI 2 "general_operand" "g"))
+   (match_operand 3 "" "")]
+  ""
+  "
+{
+  emit_insn (gen_movmemhi1 (operands[0], operands[1], operands[2]));
+  DONE;
+}")
+
+;; The definition of this insn does not really explain what it does,
+;; but it should suffice
+;; that anything generated as this insn will be recognized as one
+;; and that it won't successfully combine with anything.
+
+(define_insn "movmemhi1"
+  [(set (match_operand:BLK 0 "memory_operand" "=o")
+	(match_operand:BLK 1 "memory_operand" "o"))
+   (use (match_operand:HI 2 "general_operand" "g"))
+   (clobber (reg:SI 0))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 3))
+   (clobber (reg:SI 4))
+   (clobber (reg:SI 5))]
+  ""
+  "movc3 %2,%1,%0")
+
+;; Extension and truncation insns.
+
+(define_insn "truncsiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=g")
+	(truncate:QI (match_operand:SI 1 "nonimmediate_operand" "nrmT")))]
+  ""
+  "cvtlb %1,%0")
+
+(define_insn "truncsihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(truncate:HI (match_operand:SI 1 "nonimmediate_operand" "nrmT")))]
+  ""
+  "cvtlw %1,%0")
+
+(define_insn "trunchiqi2"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=g")
+	(truncate:QI (match_operand:HI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtwb %1,%0")
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtwl %1,%0")
+
+(define_insn "extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtbw %1,%0")
+
+(define_insn "extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvtbl %1,%0")
+
+(define_insn "extendsfdf2"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=g")
+	(float_extend:DF (match_operand:SF 1 "general_operand" "gF")))]
+  ""
+  "cvtf%# %1,%0")
+
+(define_insn "truncdfsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=g")
+	(float_truncate:SF (match_operand:DF 1 "general_operand" "gF")))]
+  ""
+  "cvt%#f %1,%0")
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "g")))]
+  ""
+  "movzwl %1,%0")
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=g")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "movzbw %1,%0")
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "g")))]
+  ""
+  "movzbl %1,%0")
+
+;; Fix-to-float conversion insns.
+
+(define_insn "float<VAXint:mode><VAXfp:mode>2"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g")
+	(float:VAXfp (match_operand:VAXint 1 "nonimmediate_operand" "g")))]
+  ""
+  "cvt<VAXint:isfx><VAXfp:fsfx> %1,%0")
+
+;; Float-to-fix conversion insns.
+
+(define_insn "fix_trunc<VAXfp:mode><VAXint:mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(fix:VAXint (fix:VAXfp (match_operand:VAXfp 1 "general_operand" "gF"))))]
+  ""
+  "cvt<VAXfp:fsfx><VAXint:isfx> %1,%0")
+
+;;- All kinds of add instructions.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g,g")
+	(plus:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF,gF")
+		    (match_operand:VAXfp 2 "general_operand" "gF,0,gF")))]
+  ""
+  "@
+   add<VAXfp:fsfx>2 %2,%0
+   add<VAXfp:fsfx>2 %1,%0
+   add<VAXfp:fsfx>3 %1,%2,%0")
+
+(define_insn "pushlclsymreg"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "local_symbolic_operand" "i")))]
+  "flag_pic"
+  "pushab %a2[%1]")
+
+(define_insn "pushextsymreg"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "external_symbolic_operand" "i")))]
+  "flag_pic"
+  "pushab %a2[%1]")
+
+(define_insn "movlclsymreg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "local_symbolic_operand" "i")))]
+  "flag_pic"
+  "movab %a2[%1],%0")
+
+(define_insn "movextsymreg"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "external_symbolic_operand" "i")))]
+  "flag_pic"
+  "movab %a2[%1],%0")
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(plus:VAXint (match_operand:VAXint 1 "general_operand" "nrmT")
+		    (match_operand:VAXint 2 "general_operand" "nrmT")))]
+  ""
+  "* return vax_output_int_add (insn, operands, <MODE>mode);")
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(plus:DI (match_operand:DI 1 "general_operand" "g")
+		 (match_operand:DI 2 "general_operand" "g")))]
+  "!reload_in_progress"
+  "vax_expand_addsub_di_operands (operands, PLUS); DONE;")
+
+(define_insn "adcdi3"
+  [(set (match_operand:DI 0 "nonimmediate_addsub_di_operand" "=Rr")
+	(plus:DI (match_operand:DI 1 "general_addsub_di_operand" "%0")
+		 (match_operand:DI 2 "general_addsub_di_operand" "nRr")))]
+  "TARGET_QMATH"
+  "* return vax_output_int_add (insn, operands, DImode);")
+
+;; The add-with-carry (adwc) instruction only accepts two operands.
+(define_insn "adddi3_old"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=ro>,ro>")
+	(plus:DI (match_operand:DI 1 "general_operand" "%0,ro>")
+		 (match_operand:DI 2 "general_operand" "Fsro,Fs")))]
+  "!TARGET_QMATH"
+  "* return vax_output_int_add (insn, operands, DImode);")
+
+;;- All kinds of subtract instructions.
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g")
+	(minus:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF")
+		     (match_operand:VAXfp 2 "general_operand" "gF,gF")))]
+  ""
+  "@
+   sub<VAXfp:fsfx>2 %2,%0
+   sub<VAXfp:fsfx>3 %2,%1,%0")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(minus:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT")
+		     (match_operand:VAXint 2 "general_operand" "nrmT,nrmT")))]
+  ""
+  "@
+   sub<VAXint:isfx>2 %2,%0
+   sub<VAXint:isfx>3 %2,%1,%0")
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(minus:DI (match_operand:DI 1 "general_operand" "g")
+		  (match_operand:DI 2 "general_operand" "g")))]
+  "!reload_in_progress"
+  "vax_expand_addsub_di_operands (operands, MINUS); DONE;")
+
+(define_insn "sbcdi3"
+  [(set (match_operand:DI 0 "nonimmediate_addsub_di_operand" "=Rr,=Rr")
+	(minus:DI (match_operand:DI 1 "general_addsub_di_operand" "0,I")
+		  (match_operand:DI 2 "general_addsub_di_operand" "nRr,Rr")))]
+  "TARGET_QMATH"
+  "* return vax_output_int_subtract (insn, operands, DImode);")
+
+;; The subtract-with-carry (sbwc) instruction only takes two operands.
+(define_insn "subdi3_old"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=or>,or>")
+	(minus:DI (match_operand:DI 1 "general_operand" "0,or>")
+		  (match_operand:DI 2 "general_operand" "Fsor,Fs")))]
+  "!TARGET_QMATH"
+  "* return vax_output_int_subtract (insn, operands, DImode);")
+
+;;- Multiply instructions.
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g,g")
+	(mult:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF,gF")
+		    (match_operand:VAXfp 2 "general_operand" "gF,0,gF")))]
+  ""
+  "@
+   mul<VAXfp:fsfx>2 %2,%0
+   mul<VAXfp:fsfx>2 %1,%0
+   mul<VAXfp:fsfx>3 %1,%2,%0")
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g,g")
+	(mult:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT,nrmT")
+		    (match_operand:VAXint 2 "general_operand" "nrmT,0,nrmT")))]
+  ""
+  "@
+   mul<VAXint:isfx>2 %2,%0
+   mul<VAXint:isfx>2 %1,%0
+   mul<VAXint:isfx>3 %1,%2,%0")
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(mult:DI (sign_extend:DI
+		  (match_operand:SI 1 "nonimmediate_operand" "nrmT"))
+		 (sign_extend:DI
+		  (match_operand:SI 2 "nonimmediate_operand" "nrmT"))))]
+  ""
+  "emul %1,%2,$0,%0")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(plus:DI
+	 (mult:DI (sign_extend:DI
+		   (match_operand:SI 1 "nonimmediate_operand" "nrmT"))
+		  (sign_extend:DI
+		   (match_operand:SI 2 "nonimmediate_operand" "nrmT")))
+	 (sign_extend:DI (match_operand:SI 3 "nonimmediate_operand" "g"))))]
+  ""
+  "emul %1,%2,%3,%0")
+
+;; 'F' constraint means type CONST_DOUBLE
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(plus:DI
+	 (mult:DI (sign_extend:DI
+		   (match_operand:SI 1 "nonimmediate_operand" "nrmT"))
+		  (sign_extend:DI
+		   (match_operand:SI 2 "nonimmediate_operand" "nrmT")))
+	 (match_operand:DI 3 "immediate_operand" "F")))]
+  "GET_CODE (operands[3]) == CONST_DOUBLE
+    && CONST_DOUBLE_HIGH (operands[3]) == (CONST_DOUBLE_LOW (operands[3]) >> 31)"
+  "*
+{
+  if (CONST_DOUBLE_HIGH (operands[3]))
+    operands[3] = GEN_INT (CONST_DOUBLE_LOW (operands[3]));
+  return \"emul %1,%2,%3,%0\";
+}")
+
+;;- Divide instructions.
+
+(define_insn "div<mode>3"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g,g")
+	(div:VAXfp (match_operand:VAXfp 1 "general_operand" "0,gF")
+		   (match_operand:VAXfp 2 "general_operand" "gF,gF")))]
+  ""
+  "@
+   div<VAXfp:fsfx>2 %2,%0
+   div<VAXfp:fsfx>3 %2,%1,%0")
+
+(define_insn "div<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(div:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT")
+		   (match_operand:VAXint 2 "general_operand" "nrmT,nrmT")))]
+  ""
+  "@
+   div<VAXint:isfx>2 %2,%0
+   div<VAXint:isfx>3 %2,%1,%0")
+
+;This is left out because it is very slow;
+;we are better off programming around the "lack" of this insn.
+;(define_insn "divmoddisi4"
+;  [(set (match_operand:SI 0 "general_operand" "=g")
+;	(div:SI (match_operand:DI 1 "general_operand" "g")
+;		(match_operand:SI 2 "general_operand" "g")))
+;   (set (match_operand:SI 3 "general_operand" "=g")
+;	(mod:SI (match_operand:DI 1 "general_operand" "g")
+;		(match_operand:SI 2 "general_operand" "g")))]
+;  ""
+;  "ediv %2,%1,%0,%3")
+
+;; Bit-and on the VAX is done with a clear-bits insn.
+(define_expand "and<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "")
+	(and:VAXint (not:VAXint (match_operand:VAXint 1 "general_operand" ""))
+		   (match_operand:VAXint 2 "general_operand" "")))]
+  ""
+  "
+{
+  rtx op1 = operands[1];
+
+  /* If there is a constant argument, complement that one.  */
+  if (CONST_INT_P (operands[2]) && ! CONST_INT_P (op1))
+    {
+      operands[1] = operands[2];
+      operands[2] = op1;
+      op1 = operands[1];
+    }
+
+  if (CONST_INT_P (op1))
+    operands[1] = GEN_INT (~INTVAL (op1));
+  else
+    operands[1] = expand_unop (<MODE>mode, one_cmpl_optab, op1, 0, 1);
+}")
+
+(define_insn "*and<mode>"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(and:VAXint (not:VAXint (match_operand:VAXint 1 "general_operand" "nrmT,nrmT"))
+		    (match_operand:VAXint 2 "general_operand" "0,nrmT")))]
+  ""
+  "@
+   bic<VAXint:isfx>2 %1,%0
+   bic<VAXint:isfx>3 %1,%2,%0")
+
+;; The following used to be needed because constant propagation can
+;; create them starting from the bic insn patterns above.  This is no
+;; longer a problem.  However, having these patterns allows optimization
+;; opportunities in combine.c.
+
+(define_insn "*and<mode>_const_int"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g")
+	(and:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT")
+		   (match_operand:VAXint 2 "const_int_operand" "n,n")))]
+  ""
+  "@
+   bic<VAXint:isfx>2 %<VAXint:iprefx>2,%0
+   bic<VAXint:isfx>3 %<VAXint:iprefx>2,%1,%0")
+
+
+;;- Bit set instructions.
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g,g")
+	(ior:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT,nrmT")
+		   (match_operand:VAXint 2 "general_operand" "nrmT,0,nrmT")))]
+  ""
+  "@
+   bis<VAXint:isfx>2 %2,%0
+   bis<VAXint:isfx>2 %1,%0
+   bis<VAXint:isfx>3 %2,%1,%0")
+
+;;- xor instructions.
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g,g,g")
+	(xor:VAXint (match_operand:VAXint 1 "general_operand" "0,nrmT,nrmT")
+		   (match_operand:VAXint 2 "general_operand" "nrmT,0,nrmT")))]
+  ""
+  "@
+   xor<VAXint:isfx>2 %2,%0
+   xor<VAXint:isfx>2 %1,%0
+   xor<VAXint:isfx>3 %2,%1,%0")
+
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VAXfp 0 "nonimmediate_operand" "=g")
+	(neg:VAXfp (match_operand:VAXfp 1 "general_operand" "gF")))]
+  ""
+  "mneg<VAXfp:fsfx> %1,%0")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(neg:VAXint (match_operand:VAXint 1 "general_operand" "nrmT")))]
+  ""
+  "mneg<VAXint:isfx> %1,%0")
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VAXint 0 "nonimmediate_operand" "=g")
+	(not:VAXint (match_operand:VAXint 1 "general_operand" "nrmT")))]
+  ""
+  "mcom<VAXint:isfx> %1,%0")
+
+
+;; Arithmetic right shift on the VAX works by negating the shift count,
+;; then emitting a right shift with the shift count negated.  This means
+;; that all actual shift counts in the RTL will be positive.  This
+;; prevents converting shifts to ZERO_EXTRACTs with negative positions,
+;; which isn't valid.
+(define_expand "ashrsi3"
+  [(set (match_operand:SI 0 "general_operand" "=g")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand" "g")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  if (! CONST_INT_P(operands[2]))
+    operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (match_operand:QI 2 "const_int_operand" "n")))]
+  ""
+  "ashl $%n2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(ashiftrt:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (neg:QI (match_operand:QI 2 "general_operand" "g"))))]
+  ""
+  "ashl %2,%1,%0")
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(ashift:SI (match_operand:SI 1 "general_operand" "nrmT")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "*
+{
+  if (operands[2] == const1_rtx && rtx_equal_p (operands[0], operands[1]))
+    return \"addl2 %0,%0\";
+  if (REG_P (operands[1]) && CONST_INT_P (operands[2]))
+    {
+      int i = INTVAL (operands[2]);
+      if (i == 1)
+	return \"addl3 %1,%1,%0\";
+      if (i == 2 && !optimize_size)
+	{
+	  if (push_operand (operands[0], SImode))
+	    return \"pushal 0[%1]\";
+	  return \"moval 0[%1],%0\";
+	}
+      if (i == 3 && !optimize_size)
+	{
+	  if (push_operand (operands[0], SImode))
+	    return \"pushaq 0[%1]\";
+	  return \"movaq 0[%1],%0\";
+	}
+    }
+  return \"ashl %2,%1,%0\";
+}")
+
+;; Arithmetic right shift on the VAX works by negating the shift count.
+(define_expand "ashrdi3"
+  [(set (match_operand:DI 0 "general_operand" "=g")
+	(ashiftrt:DI (match_operand:DI 1 "general_operand" "g")
+		     (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+}")
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(ashift:DI (match_operand:DI 1 "general_operand" "g")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "ashq %2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=g")
+	(ashiftrt:DI (match_operand:DI 1 "general_operand" "g")
+		     (neg:QI (match_operand:QI 2 "general_operand" "g"))))]
+  ""
+  "ashq %2,%1,%0")
+
+;; We used to have expand_shift handle logical right shifts by using extzv,
+;; but this make it very difficult to do lshrdi3.  Since the VAX is the
+;; only machine with this kludge, it's better to just do this with a
+;; define_expand and remove that case from expand_shift.
+
+(define_expand "lshrsi3"
+  [(set (match_dup 3)
+	(minus:QI (const_int 32)
+		  (match_dup 4)))
+   (set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_dup 3)
+			 (match_operand:SI 2 "register_operand" "g")))]
+  ""
+  "
+{
+  operands[3] = gen_reg_rtx (QImode);
+  operands[4] = gen_lowpart (QImode, operands[2]);
+}")
+
+;; Rotate right on the VAX works by negating the shift count.
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "general_operand" "=g")
+	(rotatert:SI (match_operand:SI 1 "general_operand" "g")
+		     (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "
+{
+  if (! CONST_INT_P (operands[2]))
+    operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2]));
+}")
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(rotate:SI (match_operand:SI 1 "general_operand" "nrmT")
+		   (match_operand:QI 2 "general_operand" "g")))]
+  ""
+  "rotl %2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(rotatert:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (match_operand:QI 2 "const_int_operand" "n")))]
+  ""
+  "rotl %R2,%1,%0")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(rotatert:SI (match_operand:SI 1 "general_operand" "nrmT")
+		     (neg:QI (match_operand:QI 2 "general_operand" "g"))))]
+  ""
+  "rotl %2,%1,%0")
+
+;This insn is probably slower than a multiply and an add.
+;(define_insn ""
+;  [(set (match_operand:SI 0 "general_operand" "=g")
+;	(mult:SI (plus:SI (match_operand:SI 1 "general_operand" "g")
+;			  (match_operand:SI 2 "general_operand" "g"))
+;		 (match_operand:SI 3 "general_operand" "g")))]
+;  ""
+;  "index %1,$0x80000000,$0x7fffffff,%3,%2,%0")
+
+;; Special cases of bit-field insns which we should
+;; recognize in preference to the general case.
+;; These handle aligned 8-bit and 16-bit fields,
+;; which can usually be done with move instructions.
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+ro")
+			 (match_operand:QI 1 "const_int_operand" "n")
+			 (match_operand:SI 2 "const_int_operand" "n"))
+	(match_operand:SI 3 "general_operand" "g"))]
+   "(INTVAL (operands[1]) == 8 || INTVAL (operands[1]) == 16)
+   && INTVAL (operands[2]) % INTVAL (operands[1]) == 0
+   && (REG_P (operands[0])
+       || ! mode_dependent_address_p (XEXP (operands[0], 0)))"
+  "*
+{
+  if (REG_P (operands[0]))
+    {
+      if (INTVAL (operands[2]) != 0)
+	return \"insv %3,%2,%1,%0\";
+    }
+  else
+    operands[0]
+      = adjust_address (operands[0],
+			INTVAL (operands[1]) == 8 ? QImode : HImode,
+			INTVAL (operands[2]) / 8);
+
+  CC_STATUS_INIT;
+  if (INTVAL (operands[1]) == 8)
+    return \"movb %3,%0\";
+  return \"movw %3,%0\";
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=&g")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "(INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0
+   && (REG_P (operands[1])
+       || ! mode_dependent_address_p (XEXP (operands[1], 0)))"
+  "*
+{
+  if (REG_P (operands[1]))
+    {
+      if (INTVAL (operands[3]) != 0)
+	return \"extzv %3,%2,%1,%0\";
+    }
+  else
+    operands[1]
+      = adjust_address (operands[1],
+			INTVAL (operands[2]) == 8 ? QImode : HImode,
+			INTVAL (operands[3]) / 8);
+
+  if (INTVAL (operands[2]) == 8)
+    return \"movzbl %1,%0\";
+  return \"movzwl %1,%0\";
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "const_int_operand" "n")
+			 (match_operand:SI 3 "const_int_operand" "n")))]
+  "(INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 16)
+   && INTVAL (operands[3]) % INTVAL (operands[2]) == 0
+   && (REG_P (operands[1])
+       || ! mode_dependent_address_p (XEXP (operands[1], 0)))"
+  "*
+{
+  if (REG_P (operands[1]))
+    {
+      if (INTVAL (operands[3]) != 0)
+	return \"extv %3,%2,%1,%0\";
+    }
+  else
+    operands[1]
+      = adjust_address (operands[1],
+			INTVAL (operands[2]) == 8 ? QImode : HImode,
+			INTVAL (operands[3]) / 8);
+
+  if (INTVAL (operands[2]) == 8)
+    return \"cvtbl %1,%0\";
+  return \"cvtwl %1,%0\";
+}")
+
+;; Register-only SImode cases of bit-field insns.
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (sign_extract:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpv %2,%1,%0,%3")
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpzv %2,%1,%0,%3")
+
+;; When the field position and size are constant and the destination
+;; is a register, extv and extzv are much slower than a rotate followed
+;; by a bicl or sign extension.  Because we might end up choosing ext[z]v
+;; anyway, we can't allow immediate values for the primary source operand.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (! CONST_INT_P (operands[3]) || ! CONST_INT_P (operands[2])
+      || ! REG_P (operands[0])
+      || (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16))
+    return \"extv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;cvtbl %0,%0\";
+  return \"rotl %R3,%1,%0\;cvtwl %0,%0\";
+}")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "ro")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (! CONST_INT_P (operands[3]) || ! CONST_INT_P (operands[2])
+      || ! REG_P (operands[0]))
+    return \"extzv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;movzbl %0,%0\";
+  if (INTVAL (operands[2]) == 16)
+    return \"rotl %R3,%1,%0\;movzwl %0,%0\";
+  if (INTVAL (operands[3]) & 31)
+    return \"rotl %R3,%1,%0\;bicl2 %M2,%0\";
+  if (rtx_equal_p (operands[0], operands[1]))
+    return \"bicl2 %M2,%0\";
+  return \"bicl3 %M2,%1,%0\";
+}")
+
+;; Non-register cases.
+;; nonimmediate_operand is used to make sure that mode-ambiguous cases
+;; don't match these (and therefore match the cases above instead).
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (sign_extract:SI (match_operand:QI 0 "memory_operand" "m")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpv %2,%1,%0,%3")
+
+(define_insn ""
+  [(set (cc0)
+	(compare
+	 (zero_extract:SI (match_operand:QI 0 "nonimmediate_operand" "rm")
+			  (match_operand:QI 1 "general_operand" "g")
+			  (match_operand:SI 2 "general_operand" "nrmT"))
+	 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "cmpzv %2,%1,%0,%3")
+
+(define_insn "extv"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(sign_extract:SI (match_operand:QI 1 "memory_operand" "m")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (!REG_P (operands[0]) || !CONST_INT_P (operands[2])
+      || !CONST_INT_P (operands[3])
+      || (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 16)
+      || INTVAL (operands[2]) + INTVAL (operands[3]) > 32
+      || side_effects_p (operands[1])
+      || (MEM_P (operands[1])
+	  && mode_dependent_address_p (XEXP (operands[1], 0))))
+    return \"extv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;cvtbl %0,%0\";
+  return \"rotl %R3,%1,%0\;cvtwl %0,%0\";
+}")
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "general_operand" "")
+			 (match_operand:QI 2 "general_operand" "")
+			 (match_operand:SI 3 "general_operand" "")))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(zero_extract:SI (match_operand:QI 1 "memory_operand" "m")
+			 (match_operand:QI 2 "general_operand" "g")
+			 (match_operand:SI 3 "general_operand" "nrmT")))]
+  ""
+  "*
+{
+  if (!REG_P (operands[0]) || !CONST_INT_P (operands[2])
+      || !CONST_INT_P (operands[3])
+      || INTVAL (operands[2]) + INTVAL (operands[3]) > 32
+      || side_effects_p (operands[1])
+      || (MEM_P (operands[1])
+	  && mode_dependent_address_p (XEXP (operands[1], 0))))
+    return \"extzv %3,%2,%1,%0\";
+  if (INTVAL (operands[2]) == 8)
+    return \"rotl %R3,%1,%0\;movzbl %0,%0\";
+  if (INTVAL (operands[2]) == 16)
+    return \"rotl %R3,%1,%0\;movzwl %0,%0\";
+  if (MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == PLUS
+      && REG_P (XEXP (XEXP (operands[1], 0), 0))
+      && CONST_INT_P (XEXP (XEXP (operands[1], 0), 1))
+      && CONST_INT_P (operands[2])
+      && CONST_INT_P (operands[3]))
+    {
+      HOST_WIDE_INT o = INTVAL (XEXP (XEXP (operands[1], 0), 1));
+      HOST_WIDE_INT l = INTVAL (operands[2]);
+      HOST_WIDE_INT v = INTVAL (operands[3]);
+      if ((o & 3) && (o & 3) * 8 + v + l <= 32)
+	{
+	  rtx tmp;
+	  tmp = XEXP (XEXP (operands[1], 0), 0);
+	  if (o & ~3)
+	    tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (o & ~3));
+	  operands[1] = gen_rtx_MEM (QImode, tmp);
+	  operands[3] = GEN_INT (v + (o & 3) * 8);
+	}
+      if (optimize_size)
+	return \"extzv %3,%2,%1,%0\";
+    }
+  return \"rotl %R3,%1,%0\;bicl2 %M2,%0\";
+}")
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:SI 0 "general_operand" "")
+			 (match_operand:QI 1 "general_operand" "")
+			 (match_operand:SI 2 "general_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))]
+  ""
+  "")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "+g")
+			 (match_operand:QI 1 "general_operand" "g")
+			 (match_operand:SI 2 "general_operand" "nrmT"))
+	(match_operand:SI 3 "general_operand" "nrmT"))]
+  ""
+  "*
+{
+  if (MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == PLUS
+      && REG_P (XEXP (XEXP (operands[0], 0), 0))
+      && CONST_INT_P (XEXP (XEXP (operands[0], 0), 1))
+      && CONST_INT_P (operands[1])
+      && CONST_INT_P (operands[2]))
+    {
+      HOST_WIDE_INT o = INTVAL (XEXP (XEXP (operands[0], 0), 1));
+      HOST_WIDE_INT v = INTVAL (operands[2]);
+      HOST_WIDE_INT l = INTVAL (operands[1]);
+      if ((o & 3) && (o & 3) * 8 + v + l <= 32)
+	{
+	  rtx tmp;
+	  tmp = XEXP (XEXP (operands[0], 0), 0);
+	  if (o & ~3)
+	    tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (o & ~3));
+	  operands[0] = gen_rtx_MEM (QImode, tmp);
+	  operands[2] = GEN_INT (v + (o & 3) * 8);
+	}
+    }
+  return \"insv %3,%2,%1,%0\";
+}")
+
+(define_insn ""
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+r")
+			 (match_operand:QI 1 "general_operand" "g")
+			 (match_operand:SI 2 "general_operand" "nrmT"))
+	(match_operand:SI 3 "general_operand" "nrmT"))]
+  ""
+  "insv %3,%2,%1,%0")
+
+;; Unconditional jump
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "jbr %l0")
+
+;; Conditional jumps
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:VAXint 1 "nonimmediate_operand" "")
+                 (match_operand:VAXint 2 "general_operand" "")))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_expand "cbranch<mode>4"
+  [(set (cc0)
+        (compare (match_operand:VAXfp 1 "general_operand" "")
+                 (match_operand:VAXfp 2 "general_operand" "")))
+   (set (pc)
+        (if_then_else
+              (match_operator 0 "ordered_comparison_operator" [(cc0)
+                                                               (const_int 0)])
+              (label_ref (match_operand 3 "" ""))
+              (pc)))]
+ "")
+
+(define_insn "*branch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+				      [(cc0)
+				       (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  ""
+  "j%c0 %l1")
+
+;; Recognize reversed jumps.
+(define_insn "*branch_reversed"
+  [(set (pc)
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
+				      [(cc0)
+				       (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  ""
+  "j%C0 %l1") ; %C0 negates condition
+
+;; Recognize jbs, jlbs, jbc and jlbc instructions.  Note that the operand
+;; of jlbs and jlbc insns are SImode in the hardware.  However, if it is
+;; memory, we use QImode in the insn.  So we can't use those instructions
+;; for mode-dependent addresses.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:QI 0 "memory_operand" "Q,g")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbs %0,%l2
+   jbs %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:QI 0 "memory_operand" "Q,g")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbc %0,%l2
+   jbc %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:SI (match_operand:SI 0 "register_operand" "r,r")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbs %0,%l2
+   jbs %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:SI (match_operand:SI 0 "register_operand" "r,r")
+			      (const_int 1)
+			      (match_operand:SI 1 "general_operand" "I,nrmT"))
+	     (const_int 0))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))]
+  ""
+  "@
+   jlbc %0,%l2
+   jbc %1,%0,%l2")
+
+;; Subtract-and-jump and Add-and-jump insns.
+;; These are not used when output is for the Unix assembler
+;; because it does not know how to modify them to reach far.
+
+;; Normal sob insns.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (gt (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int -1))
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_UNIX_ASM"
+  "jsobgtr %0,%l1")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ge (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int -1))
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  "!TARGET_UNIX_ASM"
+  "jsobgeq %0,%l1")
+
+;; Normal aob insns.  Define a version for when operands[1] is a constant.
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (lt (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int 1))
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM"
+  "jaoblss %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (lt (match_operand:SI 0 "nonimmediate_operand" "+g")
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM && CONST_INT_P (operands[1])"
+  "jaoblss %P1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (le (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+g")
+		      (const_int 1))
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM"
+  "jaobleq %1,%0,%l2")
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (le (match_operand:SI 0 "nonimmediate_operand" "+g")
+	     (match_operand:SI 1 "general_operand" "nrmT"))
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int 1)))]
+  "!TARGET_UNIX_ASM && CONST_INT_P (operands[1])"
+  "jaobleq %P1,%0,%l2")
+
+;; Something like a sob insn, but compares against -1.
+;; This finds `while (foo--)' which was changed to `while (--foo != -1)'.
+
+(define_insn ""
+  [(set (pc)
+	(if_then_else
+	 (ne (match_operand:SI 0 "nonimmediate_operand" "+g")
+	     (const_int 0))
+	 (label_ref (match_operand 1 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0)
+		 (const_int -1)))]
+  ""
+  "decl %0\;jgequ %l1")
+
+(define_expand "call_pop"
+  [(parallel [(call (match_operand:QI 0 "memory_operand" "")
+		    (match_operand:SI 1 "const_int_operand" ""))
+	      (set (reg:SI VAX_SP_REGNUM)
+		   (plus:SI (reg:SI VAX_SP_REGNUM)
+			    (match_operand:SI 3 "immediate_operand" "")))])]
+  ""
+{
+  gcc_assert (INTVAL (operands[3]) <= 255 * 4 && INTVAL (operands[3]) % 4 == 0);
+
+  /* Operand 1 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[1] = GEN_INT (INTVAL (operands[3]) + 4);
+})
+
+(define_insn "*call_pop"
+  [(call (match_operand:QI 0 "memory_operand" "m")
+	 (match_operand:SI 1 "const_int_operand" "n"))
+   (set (reg:SI VAX_SP_REGNUM) (plus:SI (reg:SI VAX_SP_REGNUM)
+					(match_operand:SI 2 "immediate_operand" "i")))]
+  ""
+{
+  operands[1] = GEN_INT ((INTVAL (operands[1]) - 4) / 4);
+  return "calls %1,%0";
+})
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand:QI 1 "memory_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")))
+	      (set (reg:SI VAX_SP_REGNUM)
+		   (plus:SI (reg:SI VAX_SP_REGNUM)
+			    (match_operand:SI 4 "immediate_operand" "")))])]
+  ""
+{
+  gcc_assert (INTVAL (operands[4]) <= 255 * 4 && INTVAL (operands[4]) % 4 == 0);
+
+  /* Operand 2 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[2] = GEN_INT (INTVAL (operands[4]) + 4);
+})
+
+(define_insn "*call_value_pop"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "const_int_operand" "n")))
+   (set (reg:SI VAX_SP_REGNUM) (plus:SI (reg:SI VAX_SP_REGNUM)
+					(match_operand:SI 3 "immediate_operand" "i")))]
+  ""
+  "*
+{
+  operands[2] = GEN_INT ((INTVAL (operands[2]) - 4) / 4);
+  return \"calls %2,%1\";
+}")
+
+(define_expand "call"
+  [(call (match_operand:QI 0 "memory_operand" "")
+      (match_operand:SI 1 "const_int_operand" ""))]
+  ""
+  "
+{
+  /* Operand 1 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[1] = GEN_INT (INTVAL (operands[1]) + 4);
+}")
+
+(define_insn "*call"
+   [(call (match_operand:QI 0 "memory_operand" "m")
+       (match_operand:SI 1 "const_int_operand" ""))]
+  ""
+  "calls $0,%0")
+
+(define_expand "call_value"
+  [(set (match_operand 0 "" "")
+      (call (match_operand:QI 1 "memory_operand" "")
+	    (match_operand:SI 2 "const_int_operand" "")))]
+  ""
+  "
+{
+  /* Operand 2 is the number of bytes to be popped by DW_CFA_GNU_args_size
+     during EH unwinding.  We must include the argument count pushed by
+     the calls instruction.  */
+  operands[2] = GEN_INT (INTVAL (operands[2]) + 4);
+}")
+
+(define_insn "*call_value"
+  [(set (match_operand 0 "" "")
+	(call (match_operand:QI 1 "memory_operand" "m")
+	      (match_operand:SI 2 "const_int_operand" "")))]
+  ""
+  "calls $0,%1")
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+	      (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  ""
+  "
+{
+  int i;
+
+  emit_call_insn (gen_call_pop (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+}")
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_BLOCKAGE)]
+  ""
+  "")
+
+(define_insn "return"
+  [(return)]
+  ""
+  "ret")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "
+{
+  emit_jump_insn (gen_return ());
+  DONE;
+}")
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; This had a wider constraint once, and it had trouble.
+;; If you are tempted to try `g', please don't--it's not worth
+;; the risk we will reopen the same bug.
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jmp (%0)")
+
+;; This is here to accept 5 arguments (as passed by expand_end_case)
+;; and pass the first 4 along to the casesi1 pattern that really does
+;; the actual casesi work.  We emit a jump here to the default label
+;; _before_ the casesi so that we can be sure that the casesi never
+;; drops through.
+;; This is suboptimal perhaps, but so is much of the rest of this
+;; machine description.  For what it's worth, HPPA uses the same trick.
+;;
+;; operand 0 is index
+;; operand 1 is the minimum bound (a const_int)
+;; operand 2 is the maximum bound - minimum bound + 1 (also a const_int)
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range (ie. default).
+;;
+;; We emit:
+;;	i = index - minimum_bound
+;;	if (i > (maximum_bound - minimum_bound + 1) goto default;
+;;	casesi (i, 0, table);
+;;
+(define_expand "casesi"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (match_operand 3 "" "")
+   (match_operand 4 "" "")]
+  ""
+{
+  rtx test;
+
+  /* i = index - minimum_bound;
+     But only if the lower bound is not already zero.  */
+  if (operands[1] != const0_rtx)
+    {
+      rtx index = gen_reg_rtx (SImode);
+      emit_insn (gen_addsi3 (index,
+			     operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = index;
+    }
+
+  /* if (i > (maximum_bound - minimum_bound + 1)) goto default;  */
+  test = gen_rtx_fmt_ee (GTU, VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
+
+  /* casesi (i, 0, table);  */
+  emit_jump_insn (gen_casesi1 (operands[0], operands[2], operands[3]));
+  DONE;
+})
+
+;; This insn is a bit of a lier.  It actually falls through if no case
+;; matches.  But, we prevent that from ever happening by emitting a jump
+;; before this, see the define_expand above.
+(define_insn "casesi1"
+  [(match_operand:SI 1 "const_int_operand" "n")
+   (set (pc)
+	(plus:SI (sign_extend:SI
+		  (mem:HI (plus:SI (mult:SI (match_operand:SI 0 "general_operand" "nrmT")
+					    (const_int 2))
+			  (pc))))
+		 (label_ref:SI (match_operand 2 "" ""))))]
+  ""
+  "casel %0,$0,%1")
+
+(define_insn "pushextsym"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:SI 1 "external_symbolic_operand" "i"))]
+  ""
+  "pushab %a1")
+
+(define_insn "movextsym"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:SI 1 "external_symbolic_operand" "i"))]
+  ""
+  "movab %a1,%0")
+
+(define_insn "pushlclsym"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:SI 1 "local_symbolic_operand" "i"))]
+  ""
+  "pushab %a1")
+
+(define_insn "movlclsym"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:SI 1 "local_symbolic_operand" "i"))]
+  ""
+  "movab %a1,%0")
+
+;;- load or push effective address
+;; These come after the move and add/sub patterns
+;; because we don't want pushl $1 turned into pushad 1.
+;; or addl3 r1,r2,r3 turned into movab 0(r1)[r2],r3.
+
+;; It does not work to use constraints to distinguish pushes from moves,
+;; because < matches any autodecrement, not just a push.
+
+(define_insn "pushaddr<mode>"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:VAXintQHSD 1 "address_operand" "p"))]
+  ""
+  "pusha<VAXintQHSD:isfx> %a1")
+
+(define_insn "movaddr<mode>"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:VAXintQHSD 1 "address_operand" "p"))]
+  ""
+  "mova<VAXintQHSD:isfx> %a1,%0")
+
+(define_insn "pushaddr<mode>"
+  [(set (match_operand:SI 0 "push_operand" "=g")
+	(match_operand:VAXfp 1 "address_operand" "p"))]
+  ""
+  "pusha<VAXfp:fsfx> %a1")
+
+(define_insn "movaddr<mode>"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=g")
+	(match_operand:VAXfp 1 "address_operand" "p"))]
+  ""
+  "mova<VAXfp:fsfx> %a1,%0")
+
+;; These used to be peepholes, but it is more straightforward to do them
+;; as single insns.  However, we must force the output to be a register
+;; if it is not an offsettable address so that we know that we can assign
+;; to it twice.
+
+;; If we had a good way of evaluating the relative costs, these could be
+;; machine-independent.
+
+;; Optimize   extzv ...,z;    andl2 ...,z
+;; or	      ashl ...,z;     andl2 ...,z
+;; with other operands constant.  This is what the combiner converts the
+;; above sequences to before attempting to recognize the new insn.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=ro")
+	(and:SI (ashiftrt:SI (match_operand:SI 1 "general_operand" "nrmT")
+			     (match_operand:QI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))]
+  "(INTVAL (operands[3]) & ~((1 << (32 - INTVAL (operands[2]))) - 1)) == 0"
+  "*
+{
+  unsigned long mask1 = INTVAL (operands[3]);
+  unsigned long mask2 = (1 << (32 - INTVAL (operands[2]))) - 1;
+
+  if ((mask1 & mask2) != mask1)
+    operands[3] = GEN_INT (mask1 & mask2);
+
+  return \"rotl %R2,%1,%0\;bicl2 %N3,%0\";
+}")
+
+;; left-shift and mask
+;; The only case where `ashl' is better is if the mask only turns off
+;; bits that the ashl would anyways, in which case it should have been
+;; optimized away.
+
+(define_insn ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=ro")
+	(and:SI (ashift:SI (match_operand:SI 1 "general_operand" "nrmT")
+			   (match_operand:QI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))]
+  ""
+  "*
+{
+  operands[3]
+    = GEN_INT (INTVAL (operands[3]) & ~((1 << INTVAL (operands[2])) - 1));
+  return \"rotl %2,%1,%0\;bicl2 %N3,%0\";
+}")
+
+;; Instruction sequence to sync the VAX instruction stream.
+(define_insn "sync_istream"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_SYNC_ISTREAM)]
+  ""
+  "movpsl -(%|sp)\;pushal 1(%|pc)\;rei")
+
+(define_expand "nonlocal_goto"
+  [(use (match_operand 0 "general_operand" ""))
+   (use (match_operand 1 "general_operand" ""))
+   (use (match_operand 2 "general_operand" ""))
+   (use (match_operand 3 "general_operand" ""))]
+  ""
+{
+  rtx lab = operands[1];
+  rtx stack = operands[2];
+  rtx fp = operands[3];
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  /* We'll convert this to direct jump via a peephole optimization.  */
+  emit_indirect_jump (copy_to_reg (lab));
+  emit_barrier ();
+  DONE;
+})
diff --git a/gcc/config/vax/vax.opt b/gcc/config/vax/vax.opt
new file mode 100644
index 000000000..82d6dee64
--- /dev/null
+++ b/gcc/config/vax/vax.opt
@@ -0,0 +1,51 @@
+; Options for the VAX port of the compiler.
+
+; Copyright (C) 2005, 2007, 2009 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+md
+Target RejectNegative InverseMask(G_FLOAT)
+Target DFLOAT double precision code
+
+md-float
+Target RejectNegative InverseMask(G_FLOAT)
+Target DFLOAT double precision code
+
+mg
+Target RejectNegative Mask(G_FLOAT)
+Generate GFLOAT double precision code
+
+mg-float
+Target RejectNegative Mask(G_FLOAT) MaskExists
+Generate GFLOAT double precision code
+
+mgnu
+Target RejectNegative InverseMask(UNIX_ASM)
+Generate code for GNU assembler (gas)
+
+munix
+Target RejectNegative Mask(UNIX_ASM)
+Generate code for UNIX assembler
+
+mvaxc-alignment
+Target RejectNegative Mask(VAXC_ALIGNMENT)
+Use VAXC structure conventions
+
+mqmath
+Target Mask(QMATH)
+Use new adddi3/subdi3 patterns
diff --git a/gcc/config/vms/t-vms b/gcc/config/vms/t-vms
new file mode 100644
index 000000000..d02b66614
--- /dev/null
+++ b/gcc/config/vms/t-vms
@@ -0,0 +1,37 @@
+# Copyright (C) 2009, 2010
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+STMP_FIXPROTO =
+STMP_FIXINC =
+LIMITS_H_TEST = false
+
+# Under VMS, directory names cannot contain dots.
+version:=$(shell echo $(BASEVER_c) | sed -e 's/\./_/g')
+
+VMS_EXTRA_PARTS=vcrt0.o pcrt0.o
+
+# Assemble startup files.
+$(T)vcrt0.o: $(srcdir)/config/vms/vms-ucrt0.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)vcrt0.o $(srcdir)/config/vms/vms-ucrt0.c
+
+$(T)pcrt0.o: $(srcdir)/config/vms/vms-ucrt0.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)pcrt0.o -DCRT0_POSIX_EXIT $(srcdir)/config/vms/vms-ucrt0.c
+
diff --git a/gcc/config/vms/vms-crtl-64.h b/gcc/config/vms/vms-crtl-64.h
new file mode 100644
index 000000000..84b653d79
--- /dev/null
+++ b/gcc/config/vms/vms-crtl-64.h
@@ -0,0 +1,195 @@
+/* Definitions of target machine GNU compiler. 64bit VMS version.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 
+   Correlation array of 64bit standard CRTL names with DECCRTL
+   function names. Currently contains only a partial list,
+   e.g. those functions use in GNAT and GCC. Memory allocation
+   routines are 32bit but this can be overridden by -mmalloc
+   subtarget switch.
+
+   Note: Please keep in alphabetical order.
+*/
+
+#undef CRTL_NAMES
+#define CRTL_NAMES                          \
+{                                           \
+{"_calloc32",    "decc$calloc",       0},   \
+{"_malloc32",    "decc$malloc",       0},   \
+{"_realloc32",   "decc$realloc",      0},   \
+{"_strdup32",    "decc$strdup",       0},   \
+{"abs",          "decc$abs",          0},   \
+{"abort",        "decc$abort",        0},   \
+{"access",       "decc$access",       0},   \
+{"accept",       "decc$accept",       0},   \
+{"acos",         "decc$tacos",        0},   \
+{"alarm",        "decc$alarm",        0},   \
+{"asin",         "decc$tasin",        0},   \
+{"atan",         "decc$tatan",        0},   \
+{"atan2",        "decc$tatan2",       0},   \
+{"atexit",       "decc$atexit",       0},   \
+{"atoi",         "decc$atoi",         0},   \
+{"atoll",        "decc$atoll",        0},   \
+{"atoq",         "decc$atoq",         0},   \
+{"basename",     "decc$_basename64",  0},   \
+{"bcmp",         "decc$bcmp",         0},   \
+{"bcopy",        "decc$bcopy",        0},   \
+{"bsearch",      "decc$_bsearch64",   0},   \
+{"bzero",        "decc$bzero",        0},   \
+{"calloc",       "decc$calloc",       0},   \
+{"ceil",         "decc$tceil",        0},   \
+{"chdir",        "decc$chdir",        0},   \
+{"chown",        "decc$chown",        0},   \
+{"clearerr",     "decc$clearerr",     0},   \
+{"clock",        "decc$clock",        0},   \
+{"close",        "decc$close",        0},   \
+{"cos",          "decc$tcos",         0},   \
+{"connect",      "decc$connect",      0},   \
+{"ctime",        "decc$ctime",        0},   \
+{"dup",          "decc$dup",          0},   \
+{"dup2",         "decc$dup2",         0},   \
+{"exit",         "decc$exit",         0},   \
+{"exp",          "decc$texp",         0},   \
+{"fabs",         "decc$tfabs",        0},   \
+{"fclose",       "decc$fclose",       0},   \
+{"fdopen",       "decc$fdopen",       0},   \
+{"fgetc",        "decc$fgetc",        0},   \
+{"fgets",        "decc$_fgets64",     0},   \
+{"fflush",       "decc$fflush",       0},   \
+{"ffs",          "decc$ffs",          0},   \
+{"floor",        "decc$tfloor",       0},   \
+{"fopen",        "decc$fopen",        0},   \
+{"fputc",        "decc$fputc",        0},   \
+{"fputs",        "decc$fputs",        0},   \
+{"free",         "decc$free",         0},   \
+{"fread",        "decc$fread",        0},   \
+{"freopen",      "decc$freopen",      0},   \
+{"fseek",        "decc$fseek",        0},   \
+{"ftell",        "decc$ftell",        0},   \
+{"fwrite",       "decc$fwrite",       0},   \
+{"getcwd",       "decc$_getcwd64",    0},   \
+{"getegid",      "decc$getegid",      0},   \
+{"getenv",       "decc$getenv",       0},   \
+{"geteuid",      "decc$geteuid",      0},   \
+{"getgid",       "decc$getgid",       0},   \
+{"gethostbyaddr","decc$gethostbyaddr",0},   \
+{"gethostbyname","decc$gethostbyname",0},   \
+{"getpagesize",  "decc$getpagesize",  0},   \
+{"getpid",       "decc$getpid",       0},   \
+{"getservbyname","decc$getservbyname",0},   \
+{"getservbyport","decc$getservbyport",0},   \
+{"gettimeofday", "decc$gettimeofday", 0},   \
+{"getuid",       "decc$getuid",       0},   \
+{"htons",        "decc$htons",        0},   \
+{"iconv",        "decc$iconv",        0},   \
+{"index",        "decc$_index64",     0},   \
+{"isatty",       "decc$isatty",       0},   \
+{"isdigit",      "decc$isdigit",      0},   \
+{"kill",         "decc$kill",         0},   \
+{"log",          "decc$tlog",         0},   \
+{"log10",        "decc$tlog10",       0},   \
+{"lseek",        "decc$lseek",        0},   \
+{"ioctl",        "decc$ioctl",        0},   \
+{"malloc",       "decc$malloc",       0},   \
+{"mbstowcs",     "decc$_mbstowcs64",  0},   \
+{"memchr",       "decc$_memchr64",    0},   \
+{"memcmp",       "decc$memcmp",       0},   \
+{"memcpy",       "decc$_memcpy64",    0},   \
+{"memmove",      "decc$_memmove64",   0},   \
+{"memset",       "decc$_memset64",    0},   \
+{"mkstemp",      "decc$mkstemp",      0},   \
+{"mktemp",       "decc$_mktemp64",    0},   \
+{"mmap",         "decc$_mmap64",      0},   \
+{"munmap",       "decc$munmap",       0},   \
+{"nl_langinfo",  "decc$nl_langinfo",  0},   \
+{"open",         "decc$open",         0},   \
+{"pclose",       "decc$pclose",       0},   \
+{"popen",        "decc$popen",        0},   \
+{"pow",          "decc$tpow",         0},   \
+{"printf",       "decc$txprintf",     0},   \
+{"putenv",       "decc$putenv",       0},   \
+{"puts",         "decc$puts",         0},   \
+{"random",       "decc$random",       0},   \
+{"read",         "decc$read",         0},   \
+{"realloc",      "decc$realloc",      0},   \
+{"recv",         "decc$recv",         0},   \
+{"recvfrom",     "decc$recvfrom",     0},   \
+{"recvmsg",      "decc$__bsd44___recvmsg64", 0}, \
+{"rename",       "decc$rename",       0},   \
+{"rewind",       "decc$rewind",       0},   \
+{"rindex",       "decc$_rindex64",    0},   \
+{"rmdir",        "decc$rmdir",        0},   \
+{"send",         "decc$send",         0},   \
+{"sendmsg",      "decc$__bsd44___sendmsg64", 0}, \
+{"sendto",       "decc$sendto",       0},   \
+{"setenv",       "decc$setenv",       0},   \
+{"setlocale",    "decc$setlocale",    0},   \
+{"setvbuf",      "decc$setvbuf",      0},   \
+{"signal",       "decc$signal",       0},   \
+{"sigsetmask",   "decc$sigsetmask",   0},   \
+{"sin",          "decc$tsin",         0},   \
+{"snprintf",     "decc$txsnprintf",   0},   \
+{"socket",       "decc$socket",       0},   \
+{"sqrt",         "decc$tsqrt",        0},   \
+{"strcasecmp",   "decc$strcasecmp",   0},   \
+{"strchr",       "decc$_strchr64",    0},   \
+{"strcpy",       "decc$_strcpy64",    0},   \
+{"strdup",       "decc$strdup",       0},   \
+{"strerror",     "decc$strerror",     0},   \
+{"strlen",       "decc$strlen",       0},   \
+{"strncasecmp",  "decc$strncasecmp",  0},   \
+{"strncmp",      "decc$strncmp",      0},   \
+{"strncpy",      "decc$_strncpy64",   0},   \
+{"strrchr",      "decc$_strrchr64",   0},   \
+{"strstr",       "decc$_strstr64",    0},   \
+{"strtod",       "decc$t_strtod64",   0},   \
+{"strtol",       "decc$_strtoll64",   0},   \
+{"strtoul",      "decc$_strtoull64",  0},   \
+{"sysconf",      "decc$sysconf",      0},   \
+{"system",       "decc$system",       0},   \
+{"tan",          "decc$ttan",         0},   \
+{"time",         "decc$time",         0},   \
+{"times",        "decc$times",        0},   \
+{"tmpfile",      "decc$tmpfile",      0},   \
+{"tmpnam",       "decc$_tmpnam64",    0},   \
+{"ungetc",       "decc$ungetc",       0},   \
+{"unlink",       "decc$unlink",       0},   \
+{"umask",        "decc$umask",        0},   \
+{"utime",        "decc$utime",        0},   \
+{"wait",         "decc$wait",         0},   \
+{"waitpid",      "decc$waitpid",      0},   \
+{"wcswidth",     "decc$wcswidth",     0},   \
+{"write",        "decc$write",        0},   \
+{"vfprintf",     "decc$txvfprintf",   0},   \
+{"vprintf",      "decc$txvprintf",    0},   \
+{"vsprintf",     "decc$txvsprintf",   0},   \
+{"vsnprintf",    "decc$txvsnprintf",  0},   \
+{NULL, NULL, 0}                             \
+}
+
+/* Initialize of libfuncs that are 32/64 bit memory specific.  */
+
+#undef MEM_LIBFUNCS_INIT
+#define MEM_LIBFUNCS_INIT                                 \
+do {                                                      \
+  memcpy_libfunc = init_one_libfunc ("decc$_memcpy64");   \
+  memmove_libfunc = init_one_libfunc ("decc$_memmove64"); \
+  memset_libfunc = init_one_libfunc ("decc$_memset64");   \
+} while (0)
diff --git a/gcc/config/vms/vms-crtl.h b/gcc/config/vms/vms-crtl.h
new file mode 100644
index 000000000..8f09176dc
--- /dev/null
+++ b/gcc/config/vms/vms-crtl.h
@@ -0,0 +1,191 @@
+/* Definitions of target machine GNU compiler. 32bit VMS version.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Douglas B Rupp (rupp@gnat.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 
+   Correlation array of standard CRTL names with DECCRTL
+   function names. Currently contains only a partial list,
+   e.g. those functions use in GNAT and GCC
+
+   Note: Please keep in alphabetical order.
+*/
+
+#define CRTL_NAMES                          \
+{                                           \
+{"_calloc32",    "decc$calloc",       0},   \
+{"_malloc32",    "decc$malloc",       0},   \
+{"_realloc32",   "decc$realloc",      0},   \
+{"_strdup32",    "decc$strdup",       0},   \
+{"abs",          "decc$abs",          0},   \
+{"abort",        "decc$abort",        0},   \
+{"access",       "decc$access",       0},   \
+{"accept",       "decc$accept",       0},   \
+{"acos",         "decc$tacos",        0},   \
+{"alarm",        "decc$alarm",        0},   \
+{"asin",         "decc$tasin",        0},   \
+{"atan",         "decc$tatan",        0},   \
+{"atan2",        "decc$tatan2",       0},   \
+{"atexit",       "decc$atexit",       0},   \
+{"atoi",         "decc$atoi",         0},   \
+{"atoll",        "decc$atoll",        0},   \
+{"atoq",         "decc$atoq",         0},   \
+{"basename",     "decc$basename",     0},   \
+{"bcmp",         "decc$bcmp",         0},   \
+{"bcopy",        "decc$bcopy",        0},   \
+{"bsearch",      "decc$bsearch",      0},   \
+{"bzero",        "decc$bzero",        0},   \
+{"calloc",       "decc$calloc",       0},   \
+{"ceil",         "decc$tceil",        0},   \
+{"chdir",        "decc$chdir",        0},   \
+{"chown",        "decc$chown",        0},   \
+{"clearerr",     "decc$clearerr",     0},   \
+{"clock",        "decc$clock",        0},   \
+{"close",        "decc$close",        0},   \
+{"cos",          "decc$tcos",         0},   \
+{"connect",      "decc$connect",      0},   \
+{"ctime",        "decc$ctime",        0},   \
+{"dup",          "decc$dup",          0},   \
+{"dup2",         "decc$dup2",         0},   \
+{"exit",         "decc$exit",         0},   \
+{"exp",          "decc$texp",         0},   \
+{"fabs",         "decc$tfabs",        0},   \
+{"fclose",       "decc$fclose",       0},   \
+{"fdopen",       "decc$fdopen",       0},   \
+{"fgetc",        "decc$fgetc",        0},   \
+{"fgets",        "decc$fgets",        0},   \
+{"fflush",       "decc$fflush",       0},   \
+{"ffs",          "decc$ffs",          0},   \
+{"floor",        "decc$tfloor",       0},   \
+{"fopen",        "decc$fopen",        0},   \
+{"fputc",        "decc$fputc",        0},   \
+{"fputs",        "decc$fputs",        0},   \
+{"free",         "decc$free",         0},   \
+{"fread",        "decc$fread",        0},   \
+{"freopen",      "decc$freopen",      0},   \
+{"fseek",        "decc$fseek",        0},   \
+{"ftell",        "decc$ftell",        0},   \
+{"fwrite",       "decc$fwrite",       0},   \
+{"getcwd",       "decc$getcwd",       0},   \
+{"getegid",      "decc$getegid",      0},   \
+{"getenv",       "decc$getenv",       0},   \
+{"geteuid",      "decc$geteuid",      0},   \
+{"getgid",       "decc$getgid",       0},   \
+{"gethostbyaddr","decc$gethostbyaddr",0},   \
+{"gethostbyname","decc$gethostbyname",0},   \
+{"getpagesize",  "decc$getpagesize",  0},   \
+{"getpid",       "decc$getpid",       0},   \
+{"getservbyname","decc$getservbyname",0},   \
+{"getservbyport","decc$getservbyport",0},   \
+{"gettimeofday", "decc$gettimeofday", 0},   \
+{"getuid",       "decc$getuid",       0},   \
+{"htons",        "decc$htons",        0},   \
+{"iconv",        "decc$iconv",        0},   \
+{"index",        "decc$index",        0},   \
+{"isatty",       "decc$isatty",       0},   \
+{"isdigit",      "decc$isdigit",      0},   \
+{"kill",         "decc$kill",         0},   \
+{"log",          "decc$tlog",         0},   \
+{"log10",        "decc$tlog10",       0},   \
+{"lseek",        "decc$lseek",        0},   \
+{"ioctl",        "decc$ioctl",        0},   \
+{"malloc",       "decc$malloc",       0},   \
+{"mbstowcs",     "decc$mbstowcs",     0},   \
+{"memchr",       "decc$memchr",       0},   \
+{"memcmp",       "decc$memcmp",       0},   \
+{"memcpy",       "decc$memcpy",       0},   \
+{"memmove",      "decc$memmove",      0},   \
+{"memset",       "decc$memset",       0},   \
+{"mkstemp",      "decc$mkstemp",      0},   \
+{"mktemp",       "decc$mktemp",       0},   \
+{"mmap",         "decc$mmap",         0},   \
+{"munmap",       "decc$munmap",       0},   \
+{"nl_langinfo",  "decc$nl_langinfo",  0},   \
+{"open",         "decc$open",         0},   \
+{"pclose",       "decc$pclose",       0},   \
+{"popen",        "decc$popen",        0},   \
+{"pow",          "decc$tpow",         0},   \
+{"printf",       "decc$txprintf",     0},   \
+{"putenv",       "decc$putenv",       0},   \
+{"puts",         "decc$puts",         0},   \
+{"random",       "decc$random",       0},   \
+{"read",         "decc$read",         0},   \
+{"realloc",      "decc$realloc",      0},   \
+{"recv",         "decc$recv",         0},   \
+{"recvfrom",     "decc$recvfrom",     0},   \
+{"recvmsg",      "decc$__bsd44___recvmsg32", 0}, \
+{"rename",       "decc$rename",       0},   \
+{"rewind",       "decc$rewind",       0},   \
+{"rindex",       "decc$rindex",       0},   \
+{"rmdir",        "decc$rmdir",        0},   \
+{"send",         "decc$send",         0},   \
+{"sendmsg",      "decc$__bsd44___sendmsg32", 0}, \
+{"sendto",       "decc$sendto",       0},   \
+{"setenv",       "decc$setenv",       0},   \
+{"setlocale",    "decc$setlocale",    0},   \
+{"setvbuf",      "decc$setvbuf",      0},   \
+{"signal",       "decc$signal",       0},   \
+{"sigsetmask",   "decc$sigsetmask",   0},   \
+{"sin",          "decc$tsin",         0},   \
+{"snprintf",     "decc$txsnprintf",   0},   \
+{"socket",       "decc$socket",       0},   \
+{"sqrt",         "decc$tsqrt",        0},   \
+{"strcasecmp",   "decc$strcasecmp",   0},   \
+{"strchr",       "decc$strchr",       0},   \
+{"strcpy",       "decc$strcpy",       0},   \
+{"strdup",       "decc$strdup",       0},   \
+{"strerror",     "decc$strerror",     0},   \
+{"strlen",       "decc$strlen",       0},   \
+{"strncasecmp",  "decc$strncasecmp",  0},   \
+{"strncmp",      "decc$strncmp",      0},   \
+{"strncpy",      "decc$strncpy",      0},   \
+{"strrchr",      "decc$strrchr",      0},   \
+{"strstr",       "decc$strstr",       0},   \
+{"strtod",       "decc$tstrtod",      0},   \
+{"strtol",       "decc$strtoll",      0},   \
+{"strtoul",      "decc$strtoull",     0},   \
+{"sysconf",      "decc$sysconf",      0},   \
+{"system",       "decc$system",       0},   \
+{"tan",          "decc$ttan",         0},   \
+{"time",         "decc$time",         0},   \
+{"times",        "decc$times",        0},   \
+{"tmpfile",      "decc$tmpfile",      0},   \
+{"tmpnam",       "decc$tmpnam",       0},   \
+{"ungetc",       "decc$ungetc",       0},   \
+{"unlink",       "decc$unlink",       0},   \
+{"umask",        "decc$umask",        0},   \
+{"utime",        "decc$utime",        0},   \
+{"wait",         "decc$wait",         0},   \
+{"waitpid",      "decc$waitpid",      0},   \
+{"wcswidth",     "decc$wcswidth",     0},   \
+{"write",        "decc$write",        0},   \
+{"vfprintf",     "decc$txvfprintf",   0},   \
+{"vprintf",      "decc$txvprintf",    0},   \
+{"vsprintf",     "decc$txvsprintf",   0},   \
+{"vsnprintf",    "decc$txvsnprintf",  0},   \
+{NULL, NULL, 0}                             \
+}
+
+/* Initialize of libfuncs that are 32/64 bit memory specific.  */
+
+#define MEM_LIBFUNCS_INIT                              \
+do {                                                   \
+  memcpy_libfunc = init_one_libfunc ("decc$memcpy");   \
+  memmove_libfunc = init_one_libfunc ("decc$memmove"); \
+  memset_libfunc = init_one_libfunc ("decc$memset");   \
+} while (0)
diff --git a/gcc/config/vms/vms-ucrt0.c b/gcc/config/vms/vms-ucrt0.c
new file mode 100644
index 000000000..344b59520
--- /dev/null
+++ b/gcc/config/vms/vms-ucrt0.c
@@ -0,0 +1,127 @@
+/* VMS crt0 returning Unix style condition codes.
+   Copyright (C) 2001, 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Douglas B. Rupp (rupp@gnat.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+
+/* Lots of cheat to handle 32bits/64bits pointer conversions.
+   We use 'long long' for 64 bits pointers and 'int' for 32 bits pointers.  */
+
+extern void decc$main (void *arg1, void *arg2, void *arg3,
+                       void *image_file_desc, void *arg5, void *arg6,
+                       int *, int *, int *);
+extern int main (int, char **, char **);
+extern int _malloc32 (int);
+
+#ifdef __ia64__
+#define MAIN_ASM_NAME asm ("ELF$TFRADR")
+#else
+#define MAIN_ASM_NAME
+#endif
+
+int __main (void *arg1, void *arg2, void *arg3,
+            void *image_file_desc, void *arg5, void *arg6) MAIN_ASM_NAME;
+
+/* From errnodef.h, but we need to emulate the globalval.  */
+extern int C$_EXIT1;
+
+/* From stsdef.h  */
+#define STS$V_MSG_NO 0x03
+#define STS$M_INHIB_MSG 0x10000000
+
+/* From ssdef.h  */
+#define SS$_NORMAL 1
+
+int
+__main (void *arg1, void *arg2, void *arg3,
+        void *image_file_desc, void *arg5, void *arg6)
+{
+  int argc;
+  int argv;
+  int envp;
+  int status;
+  int i;
+  long long *long_argv;
+  long long *long_envp;
+
+  /* The argv and envp arrays are 32 bits pointers to 32 bits pointers.  */
+  decc$main (arg1, arg2, arg3, image_file_desc,
+	     arg5, arg6, &argc, &argv, &envp);
+
+  if (sizeof (void *) == 8)
+    {
+      /* Reallocate argv and envp with 64 bit pointers.  */
+      long_argv = (long long *)
+        (long long) _malloc32 (sizeof (long long) * (argc + 1));
+
+      for (i = 0; i < argc; i++)
+        long_argv[i] = ((int *) (long long) argv)[i];
+
+      long_argv[argc] = 0;
+
+      for (i = 0; ((int *) (long long) envp)[i]; i++)
+        ;
+      long_envp = (long long *)
+        (long long) _malloc32 (sizeof (long long) * (i + 1));
+
+      for (i = 0; ((int *) (long long) envp)[i]; i++)
+        long_envp[i] = ((int *) (long long) envp)[i];
+
+      long_envp[i] = 0;
+    }
+  else
+    {
+      long_argv = (long long *) argv;
+      long_envp = (long long *) envp;
+    }
+  status = main (argc, (char **)long_argv, (char **)long_envp);
+
+#ifdef CRT0_POSIX_EXIT
+  /* Map into a range of 0 - 255.  */
+  status = status & 255;
+
+  if (status > 0)
+    {
+      int save_status = status;
+
+      status = (long) &C$_EXIT1 + ((status - 1) << STS$V_MSG_NO);
+
+      /* An exit failure status requires a "severe" error.  All status values
+	 are defined in errno with a successful (1) severity but can be
+	 changed to an error (2) severity by adding 1.  In addition for
+	 compatibility with UNIX exit() routines we inhibit a run-time error
+	 message from being generated on exit(1).  */
+
+      if (save_status == 1)
+	{
+	  status++;
+	  status |= STS$M_INHIB_MSG;
+	}
+    }
+  else
+    status = SS$_NORMAL;
+#endif /* CRT0_POSIX_EXIT */
+
+  return status;
+}
diff --git a/gcc/config/vms/vms.opt b/gcc/config/vms/vms.opt
new file mode 100644
index 000000000..a1713b811
--- /dev/null
+++ b/gcc/config/vms/vms.opt
@@ -0,0 +1,31 @@
+; Copyright (C) 2009, 2011 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+map
+Target RejectNegative
+
+mmalloc64
+Target Report Mask(MALLOC64)
+Malloc data into P2 space
+
+mdebug-main=
+Target RejectNegative Joined Var(vms_debug_main)
+Set name of main routine for the debugger
+
+mvms-return-codes
+Target RejectNegative
diff --git a/gcc/config/vms/x-vms b/gcc/config/vms/x-vms
new file mode 100644
index 000000000..b232c8e68
--- /dev/null
+++ b/gcc/config/vms/x-vms
@@ -0,0 +1,27 @@
+# Copyright (C) 2001, 2002, 2004, 2005, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LN = cp -p
+LN_S = cp -p
+
+# Doesn't work on VMS
+USE_COLLECT2=
+
+# There are no man pages on VMS
+POD2MAN = false
diff --git a/gcc/config/vms/xm-vms.h b/gcc/config/vms/xm-vms.h
new file mode 100644
index 000000000..7907f9263
--- /dev/null
+++ b/gcc/config/vms/xm-vms.h
@@ -0,0 +1,58 @@
+/* Configuration for GCC for hosting on VMS
+   using a Unix style C library.
+   Copyright (C) 1996, 1997, 2001, 2004, 2007, 2009
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* A couple of conditionals for execution machine are controlled here.  */
+#ifndef VMS
+#define VMS
+#endif
+
+/* Causes exit() to be redefined to __posix_exit() and
+   Posix compatible failure and success codes to be used.  */
+#define _POSIX_EXIT 1
+
+/* Open files in stream mode if not otherwise explicitly specified.  */
+#define __UNIX_FOPEN 1
+
+/* Write to stdout using fputc to avoid record terminators in pipes.  */
+#define __UNIX_FWRITE 1
+
+#define STDC_HEADERS 1
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+#define HOST_OBJECT_SUFFIX ".obj"
+
+#define DUMPFILE_FORMAT "_%02d_"
+
+#define DELETE_IF_ORDINARY(NAME,ST,VERBOSE_FLAG)           \
+do                                                         \
+  {                                                        \
+    while (stat (NAME, &ST) >= 0 && S_ISREG (ST.st_mode))  \
+      if (unlink (NAME) < 0)                               \
+	{                                                  \
+	  if (VERBOSE_FLAG)                                \
+	    perror_with_name (NAME);                       \
+	  break;                                           \
+	}                                                  \
+  } while (0)
+
+#define STANDARD_EXEC_PREFIX "/gnu/libexec/gcc/"
+#define STANDARD_STARTFILE_PREFIX "/gnu/lib/"
+#define STANDARD_INCLUDE_DIR "/gnu/include"
diff --git a/gcc/config/vms/xm-vms64.h b/gcc/config/vms/xm-vms64.h
new file mode 100644
index 000000000..9e77f890a
--- /dev/null
+++ b/gcc/config/vms/xm-vms64.h
@@ -0,0 +1,23 @@
+/* Configuration for GCC for hosting on 64bit VMS
+   using a Unix style C library.
+   Copyright (C) 2009
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define HOST_LONG_FORMAT "ll"
+#define HOST_PTR_PRINTF "%llp"
diff --git a/gcc/config/vx-common.h b/gcc/config/vx-common.h
new file mode 100644
index 000000000..6a6d1097f
--- /dev/null
+++ b/gcc/config/vx-common.h
@@ -0,0 +1,94 @@
+/* Target-independent configuration for VxWorks and VxWorks AE.   
+   Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* VxWorks headers are C++-aware.  */
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C
+
+/* Most of these will probably be overridden by subsequent headers.  We
+   undefine them here just in case, and define VXWORKS_ versions of each,
+   to be used in port-specific vxworks.h.  */
+#undef LIB_SPEC
+#undef LINK_SPEC
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC VXWORKS_LIBGCC_SPEC
+#undef STARTFILE_SPEC
+#undef ENDFILE_SPEC
+
+/* Most of these macros are overridden in "config/vxworks.h" or
+   "config/vxworksae.h" and are here merely for documentation
+   purposes.  */
+#define VXWORKS_ADDITIONAL_CPP_SPEC ""
+#define	VXWORKS_LIB_SPEC ""
+#define VXWORKS_LINK_SPEC ""
+#define VXWORKS_LIBGCC_SPEC ""
+#define	VXWORKS_STARTFILE_SPEC ""
+#define VXWORKS_ENDFILE_SPEC ""
+
+/* VxWorks cannot have dots in constructor labels, because it uses a
+   mutant variation of collect2 that generates C code instead of
+   assembly.  Thus each constructor label must be a legitimate C
+   symbol.  FIXME: Have VxWorks use real collect2 instead.  */
+#undef NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+/* VxWorks uses wchar_t == unsigned short (UCS2) on all architectures.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+/* Likewise wint_t.  */
+#undef WINT_TYPE
+#define WINT_TYPE "short unsigned int"
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 16
+
+/* Dwarf2 unwind info is not supported.  */
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 0
+
+/* VxWorks uses DWARF2.  */
+#define DWARF2_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* None of these other formats is supported.  */
+#undef DWARF_DEBUGGING_INFO
+#undef DBX_DEBUGGING_INFO
+#undef SDB_DEBUGGING_INFO
+#undef XCOFF_DEBUGGING_INFO
+#undef VMS_DEBUGGING_INFO
+
+/* Kernel mode doesn't have ctors/dtors, but RTP mode does.  */
+#define TARGET_HAVE_CTORS_DTORS false
+#define VXWORKS_OVERRIDE_OPTIONS /* empty */
+
+/* No math library needed.  */
+#define MATH_LIBRARY ""
+
+/* No profiling.  */
+#define VXWORKS_FUNCTION_PROFILER(FILE, LABELNO) do	\
+{							\
+  sorry ("profiler support for VxWorks");		\
+} while (0)
+
+/* We occasionally need to distinguish between the VxWorks variants.  */
+#define VXWORKS_KIND_NORMAL  1
+#define VXWORKS_KIND_AE      2
diff --git a/gcc/config/vxlib-tls.c b/gcc/config/vxlib-tls.c
new file mode 100644
index 000000000..c4696768f
--- /dev/null
+++ b/gcc/config/vxlib-tls.c
@@ -0,0 +1,362 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Zack Weinberg <zack@codesourcery.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Threads compatibility routines for libgcc2 for VxWorks.
+   These are out-of-line routines called from gthr-vxworks.h. 
+
+   This file provides the TLS related support routines, calling specific
+   VxWorks kernel entry points for this purpose.  The base VxWorks 5.x kernels
+   don't feature these entry points, and we provide gthr_supp_vxw_5x.c as an
+   option to fill this gap.  Asking users to rebuild a kernel is not to be
+   taken lightly, still, so we have isolated these routines from the rest of
+   vxlib to ensure that the kernel dependencies are only dragged when really
+   necessary.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "gthr.h"
+
+#if defined(__GTHREADS)
+#include <vxWorks.h>
+#ifndef __RTP__
+#include <vxLib.h>
+#endif
+#include <taskLib.h>
+#ifndef __RTP__
+#include <taskHookLib.h>
+#else
+# include <errno.h>
+#endif
+
+/* Thread-local storage.
+
+   We reserve a field in the TCB to point to a dynamically allocated
+   array which is used to store TLS values.  A TLS key is simply an
+   offset in this array.  The exact location of the TCB field is not
+   known to this code nor to vxlib.c -- all access to it indirects
+   through the routines __gthread_get_tls_data and
+   __gthread_set_tls_data, which are provided by the VxWorks kernel.
+
+   There is also a global array which records which keys are valid and
+   which have destructors.
+
+   A task delete hook is installed to execute key destructors.  The
+   routines __gthread_enter_tls_dtor_context and
+   __gthread_leave_tls_dtor_context, which are also provided by the
+   kernel, ensure that it is safe to call free() on memory allocated
+   by the task being deleted.  (This is a no-op on VxWorks 5, but
+   a major undertaking on AE.)
+
+   The task delete hook is only installed when at least one thread
+   has TLS data.  This is a necessary precaution, to allow this module
+   to be unloaded - a module with a hook can not be removed.
+
+   Since this interface is used to allocate only a small number of
+   keys, the table size is small and static, which simplifies the
+   code quite a bit.  Revisit this if and when it becomes necessary.  */
+
+#define MAX_KEYS 4
+
+/* This is the structure pointed to by the pointer returned
+   by __gthread_get_tls_data.  */
+struct tls_data
+{
+  int *owner;
+  void *values[MAX_KEYS];
+  unsigned int generation[MAX_KEYS];
+};
+
+/* To make sure we only delete TLS data associated with this object,
+   include a pointer to a local variable in the TLS data object.  */
+static int self_owner;
+
+/* Flag to check whether the delete hook is installed.  Once installed
+   it is only removed when unloading this module.  */
+static volatile int delete_hook_installed;
+
+/* kernel provided routines */
+extern void *__gthread_get_tls_data (void);
+extern void __gthread_set_tls_data (void *data);
+
+extern void __gthread_enter_tls_dtor_context (void);
+extern void __gthread_leave_tls_dtor_context (void);
+
+
+/* This is a global structure which records all of the active keys.
+
+   A key is potentially valid (i.e. has been handed out by
+   __gthread_key_create) iff its generation count in this structure is
+   even.  In that case, the matching entry in the dtors array is a
+   routine to be called when a thread terminates with a valid,
+   non-NULL specific value for that key.
+
+   A key is actually valid in a thread T iff the generation count
+   stored in this structure is equal to the generation count stored in
+   T's specific-value structure.  */
+
+typedef void (*tls_dtor) (void *);
+
+struct tls_keys
+{
+  tls_dtor dtor[MAX_KEYS];
+  unsigned int generation[MAX_KEYS];
+};
+
+#define KEY_VALID_P(key) !(tls_keys.generation[key] & 1)
+
+/* Note: if MAX_KEYS is increased, this initializer must be updated
+   to match.  All the generation counts begin at 1, which means no
+   key is valid.  */
+static struct tls_keys tls_keys =
+{
+  { 0, 0, 0, 0 },
+  { 1, 1, 1, 1 }
+};
+
+/* This lock protects the tls_keys structure.  */
+static __gthread_mutex_t tls_lock;
+
+static __gthread_once_t tls_init_guard = __GTHREAD_ONCE_INIT;
+
+/* Internal routines.  */
+
+/* The task TCB has just been deleted.  Call the destructor
+   function for each TLS key that has both a destructor and
+   a non-NULL specific value in this thread.
+
+   This routine does not need to take tls_lock; the generation
+   count protects us from calling a stale destructor.  It does
+   need to read tls_keys.dtor[key] atomically.  */
+
+static void
+tls_delete_hook (void *tcb ATTRIBUTE_UNUSED)
+{
+  struct tls_data *data;
+  __gthread_key_t key;
+
+#ifdef __RTP__
+  data = __gthread_get_tls_data ();
+#else
+  /* In kernel mode, we can be called in the context of the thread
+     doing the killing, so must use the TCB to determine the data of
+     the thread being killed.  */
+  data = __gthread_get_tsd_data (tcb);
+#endif
+  
+  if (data && data->owner == &self_owner)
+    {
+#ifdef __RTP__
+      __gthread_enter_tls_dtor_context ();
+#else
+      __gthread_enter_tsd_dtor_context (tcb);
+#endif
+      for (key = 0; key < MAX_KEYS; key++)
+	{
+	  if (data->generation[key] == tls_keys.generation[key])
+	    {
+	      tls_dtor dtor = tls_keys.dtor[key];
+
+	      if (dtor)
+		dtor (data->values[key]);
+	    }
+	}
+      free (data);
+#ifdef __RTP__
+      __gthread_leave_tls_dtor_context ();
+#else
+      __gthread_leave_tsd_dtor_context ();
+#endif
+
+#ifdef __RTP__
+      __gthread_set_tls_data (0);
+#else
+      __gthread_set_tsd_data (tcb, 0);
+#endif
+    }
+} 
+
+/* Initialize global data used by the TLS system.  */
+static void
+tls_init (void)
+{
+  __GTHREAD_MUTEX_INIT_FUNCTION (&tls_lock);
+}
+
+static void tls_destructor (void) __attribute__ ((destructor));
+static void
+tls_destructor (void)
+{
+#ifdef __RTP__
+  /* All threads but this one should have exited by now.  */
+  tls_delete_hook (NULL);
+#endif
+  /* Unregister the hook.  */
+  if (delete_hook_installed)
+    taskDeleteHookDelete ((FUNCPTR)tls_delete_hook);
+
+  if (tls_init_guard.done && __gthread_mutex_lock (&tls_lock) != ERROR)
+    semDelete (tls_lock);
+}
+
+/* External interface */
+
+/* Store in KEYP a value which can be passed to __gthread_setspecific/
+   __gthread_getspecific to store and retrieve a value which is
+   specific to each calling thread.  If DTOR is not NULL, it will be
+   called when a thread terminates with a non-NULL specific value for
+   this key, with the value as its sole argument.  */
+
+int
+__gthread_key_create (__gthread_key_t *keyp, tls_dtor dtor)
+{
+  __gthread_key_t key;
+
+  __gthread_once (&tls_init_guard, tls_init);
+
+  if (__gthread_mutex_lock (&tls_lock) == ERROR)
+    return errno;
+
+  for (key = 0; key < MAX_KEYS; key++)
+    if (!KEY_VALID_P (key))
+      goto found_slot;
+
+  /* no room */
+  __gthread_mutex_unlock (&tls_lock);
+  return EAGAIN;
+
+ found_slot:
+  tls_keys.generation[key]++;  /* making it even */
+  tls_keys.dtor[key] = dtor;
+  *keyp = key;
+  __gthread_mutex_unlock (&tls_lock);
+  return 0;
+}
+
+/* Invalidate KEY; it can no longer be used as an argument to
+   setspecific/getspecific.  Note that this does NOT call destructor
+   functions for any live values for this key.  */
+int
+__gthread_key_delete (__gthread_key_t key)
+{
+  if (key >= MAX_KEYS)
+    return EINVAL;
+
+  __gthread_once (&tls_init_guard, tls_init);
+
+  if (__gthread_mutex_lock (&tls_lock) == ERROR)
+    return errno;
+
+  if (!KEY_VALID_P (key))
+    {
+      __gthread_mutex_unlock (&tls_lock);
+      return EINVAL;
+    }
+
+  tls_keys.generation[key]++;  /* making it odd */
+  tls_keys.dtor[key] = 0;
+
+  __gthread_mutex_unlock (&tls_lock);
+  return 0;
+}
+
+/* Retrieve the thread-specific value for KEY.  If it has never been
+   set in this thread, or KEY is invalid, returns NULL.
+
+   It does not matter if this function races with key_create or
+   key_delete; the worst that can happen is you get a value other than
+   the one that a serialized implementation would have provided.  */
+
+void *
+__gthread_getspecific (__gthread_key_t key)
+{
+  struct tls_data *data;
+
+  if (key >= MAX_KEYS)
+    return 0;
+
+  data = __gthread_get_tls_data ();
+
+  if (!data)
+    return 0;
+
+  if (data->generation[key] != tls_keys.generation[key])
+    return 0;
+
+  return data->values[key];
+}
+
+/* Set the thread-specific value for KEY.  If KEY is invalid, or
+   memory allocation fails, returns -1, otherwise 0.
+
+   The generation count protects this function against races with
+   key_create/key_delete; the worst thing that can happen is that a
+   value is successfully stored into a dead generation (and then
+   immediately becomes invalid).  However, we do have to make sure
+   to read tls_keys.generation[key] atomically.  */
+
+int
+__gthread_setspecific (__gthread_key_t key, void *value)
+{
+  struct tls_data *data;
+  unsigned int generation;
+
+  if (key >= MAX_KEYS)
+    return EINVAL;
+
+  data = __gthread_get_tls_data ();
+  if (!data)
+    {
+      if (!delete_hook_installed)
+	{
+	  /* Install the delete hook.  */
+	  if (__gthread_mutex_lock (&tls_lock) == ERROR)
+	    return ENOMEM;
+	  if (!delete_hook_installed)
+	    {
+	      taskDeleteHookAdd ((FUNCPTR)tls_delete_hook);
+	      delete_hook_installed = 1;
+	    }
+	  __gthread_mutex_unlock (&tls_lock);
+	}
+
+      data = malloc (sizeof (struct tls_data));
+      if (!data)
+	return ENOMEM;
+
+      memset (data, 0, sizeof (struct tls_data));
+      data->owner = &self_owner;
+      __gthread_set_tls_data (data);
+    }
+
+  generation = tls_keys.generation[key];
+
+  if (generation & 1)
+    return EINVAL;
+
+  data->generation[key] = generation;
+  data->values[key] = value;
+
+  return 0;
+}
+#endif /* __GTHREADS */
diff --git a/gcc/config/vxlib.c b/gcc/config/vxlib.c
new file mode 100644
index 000000000..0ff996cfc
--- /dev/null
+++ b/gcc/config/vxlib.c
@@ -0,0 +1,95 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2009 Free Software Foundation, Inc.
+   Contributed by Zack Weinberg <zack@codesourcery.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Threads compatibility routines for libgcc2 for VxWorks.
+   These are out-of-line routines called from gthr-vxworks.h.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "gthr.h"
+
+#if defined(__GTHREADS)
+#include <vxWorks.h>
+#ifndef __RTP__
+#include <vxLib.h>
+#endif
+#include <taskLib.h>
+#ifndef __RTP__
+#include <taskHookLib.h>
+#else
+# include <errno.h>
+#endif
+
+/* Init-once operation.
+
+   This would be a clone of the implementation from gthr-solaris.h,
+   except that we have a bootstrap problem - the whole point of this
+   exercise is to prevent double initialization, but if two threads
+   are racing with each other, once->mutex is liable to be initialized
+   by both.  Then each thread will lock its own mutex, and proceed to
+   call the initialization routine.
+
+   So instead we use a bare atomic primitive (vxTas()) to handle
+   mutual exclusion.  Threads losing the race then busy-wait, calling
+   taskDelay() to yield the processor, until the initialization is
+   completed.  Inefficient, but reliable.  */
+
+int
+__gthread_once (__gthread_once_t *guard, void (*func)(void))
+{
+  if (guard->done)
+    return 0;
+
+#ifdef __RTP__
+  __gthread_lock_library ();
+#else
+  while (!vxTas ((void *)&guard->busy))
+    {
+#ifdef __PPC__
+      /* This can happen on powerpc, which is using all 32 bits
+	 of the gthread_once_t structure.  */
+      if (guard->done)
+	return;
+#endif
+      taskDelay (1);
+    }
+#endif
+
+  /* Only one thread at a time gets here.  Check ->done again, then
+     go ahead and call func() if no one has done it yet.  */
+  if (!guard->done)
+    {
+      func ();
+      guard->done = 1;
+    }
+
+#ifdef __RTP__
+  __gthread_unlock_library ();
+#else
+  guard->busy = 0;
+#endif
+  return 0;
+}
+
+#endif /* __GTHREADS */
diff --git a/gcc/config/vxworks-dummy.h b/gcc/config/vxworks-dummy.h
new file mode 100644
index 000000000..e3ea6ad6a
--- /dev/null
+++ b/gcc/config/vxworks-dummy.h
@@ -0,0 +1,40 @@
+/* Dummy definitions of VxWorks-related macros
+   Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* True if we're targetting VxWorks.  */
+#ifndef TARGET_VXWORKS
+#define TARGET_VXWORKS 0
+#endif
+
+/* True if generating code for a VxWorks RTP.  */
+#ifndef TARGET_VXWORKS_RTP
+#define TARGET_VXWORKS_RTP false
+#endif
+
+/* The symbol that points to an RTP's table of GOTs.  */
+#define VXWORKS_GOTT_BASE (gcc_unreachable (), "")
+
+/* The symbol that holds the index of the current module's GOT in
+   VXWORKS_GOTT_BASE.  */
+#define VXWORKS_GOTT_INDEX (gcc_unreachable (), "")
diff --git a/gcc/config/vxworks.c b/gcc/config/vxworks.c
new file mode 100644
index 000000000..2445c8b5a
--- /dev/null
+++ b/gcc/config/vxworks.c
@@ -0,0 +1,147 @@
+/* Common VxWorks target definitions for GNU compiler.
+   Copyright (C) 2007, 2008, 2010
+   Free Software Foundation, Inc.
+   Contributed by CodeSourcery, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "diagnostic-core.h"
+#include "output.h"
+#include "tm.h"
+#include "tree.h"
+
+/* Like default_named_section_asm_out_constructor, except that even
+   constructors with DEFAULT_INIT_PRIORITY must go in a numbered
+   section on VxWorks.  The VxWorks runtime uses a clever trick to get
+   the sentinel entry (-1) inserted at the beginning of the .ctors
+   segment.  This trick will not work if we ever generate any entries
+   in plain .ctors sections; we must always use .ctors.PRIORITY.  */
+
+void
+vxworks_asm_out_constructor (rtx symbol, int priority)
+{
+  section *sec;
+
+  sec = get_cdtor_priority_section (priority,
+				    /*constructor_p=*/true);
+  assemble_addr_to_section (symbol, sec);
+}
+
+/* See comment for vxworks_asm_out_constructor.  */
+
+void
+vxworks_asm_out_destructor (rtx symbol, int priority)
+{
+  section *sec;
+
+  sec = get_cdtor_priority_section (priority,
+				    /*constructor_p=*/false);
+  assemble_addr_to_section (symbol, sec);
+}
+
+/* Return the list of FIELD_DECLs that make up an emulated TLS
+   variable's control object.  TYPE is the structure these are fields
+   of and *NAME will be filled in with the structure tag that should
+   be used.  */
+
+static tree
+vxworks_emutls_var_fields (tree type, tree *name)
+{
+  tree field, next_field;
+  
+  *name = get_identifier ("__tls_var");
+  
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		      get_identifier ("size"), unsigned_type_node);
+  DECL_CONTEXT (field) = type;
+  next_field = field;
+
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		      get_identifier ("module_id"), unsigned_type_node);
+  DECL_CONTEXT (field) = type;
+  DECL_CHAIN (field) = next_field;
+  next_field = field;
+
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+		      get_identifier ("offset"), unsigned_type_node);
+  DECL_CONTEXT (field) = type;
+  DECL_CHAIN (field) = next_field;
+
+  return field;
+}
+
+/* Return the CONSTRUCTOR to initialize an emulated TLS control
+   object.  VAR is the control object.  DECL is the TLS object itself
+   and TMPL_ADDR is the address (an ADDR_EXPR) of the initializer for
+   that object.  */
+
+static tree
+vxworks_emutls_var_init (tree var, tree decl, tree tmpl_addr)
+{
+  VEC(constructor_elt,gc) *v = VEC_alloc (constructor_elt, gc, 3);
+  constructor_elt *elt;
+  
+  tree type = TREE_TYPE (var);
+  tree field = TYPE_FIELDS (type);
+  
+  elt = VEC_quick_push (constructor_elt, v, NULL);
+  elt->index = field;
+  elt->value = fold_convert (TREE_TYPE (field), tmpl_addr);
+  
+  elt = VEC_quick_push (constructor_elt, v, NULL);
+  field = DECL_CHAIN (field);
+  elt->index = field;
+  elt->value = build_int_cst (TREE_TYPE (field), 0);
+  
+  elt = VEC_quick_push (constructor_elt, v, NULL);
+  field = DECL_CHAIN (field);
+  elt->index = field;
+  elt->value = fold_convert (TREE_TYPE (field), DECL_SIZE_UNIT (decl));
+  
+  return build_constructor (type, v);
+}
+
+/* Do VxWorks-specific parts of TARGET_OPTION_OVERRIDE.  */
+
+void
+vxworks_override_options (void)
+{
+  /* We don't support __thread via target hooks.  */
+  targetm.have_tls = false;
+
+  targetm.emutls.get_address = "__builtin___tls_lookup";
+  targetm.emutls.register_common = NULL;
+  targetm.emutls.var_section = ".tls_vars";
+  targetm.emutls.tmpl_section = ".tls_data";
+  targetm.emutls.var_prefix = "__tls__";
+  targetm.emutls.tmpl_prefix = "";
+  targetm.emutls.var_fields = vxworks_emutls_var_fields;
+  targetm.emutls.var_init = vxworks_emutls_var_init;
+  targetm.emutls.var_align_fixed = true;
+  targetm.emutls.debug_form_tls_address = true;
+  
+  /* We can use .ctors/.dtors sections only in RTP mode.  */
+  targetm.have_ctors_dtors = TARGET_VXWORKS_RTP;
+
+  /* PIC is only supported for RTPs.  */
+  if (flag_pic && !TARGET_VXWORKS_RTP)
+    error ("PIC is only supported for RTPs");
+}
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
new file mode 100644
index 000000000..04ee945d6
--- /dev/null
+++ b/gcc/config/vxworks.h
@@ -0,0 +1,138 @@
+/* Common VxWorks target definitions for GNU compiler.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2010
+   Free Software Foundation, Inc.
+   Contributed by Wind River Systems.
+   Rewritten by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Assert that we are targetting VxWorks.  */
+#undef TARGET_VXWORKS
+#define TARGET_VXWORKS 1
+
+/* In kernel mode, VxWorks provides all the libraries itself, as well as
+   the functionality of startup files, etc.  In RTP mode, it behaves more
+   like a traditional Unix, with more external files.  Most of our specs
+   must be aware of the difference.  */
+
+/* We look for the VxWorks header files using the environment
+   variables that are set in VxWorks to indicate the location of the
+   system header files.  We use -idirafter so that the GCC's own
+   header-file directories (containing <stddef.h>, etc.) come before
+   the VxWorks system header directories.  */
+
+/* Since we provide a default -isystem, expand -isystem on the command
+   line early.  */
+#undef VXWORKS_ADDITIONAL_CPP_SPEC
+#define VXWORKS_ADDITIONAL_CPP_SPEC		\
+ "%{!nostdinc:					\
+    %{isystem*} -idirafter			\
+    %{mrtp: %:getenv(WIND_USR /h)		\
+      ;:    %:getenv(WIND_BASE /target/h)}}"
+
+/* The references to __init and __fini will be satisfied by
+   libc_internal.a.  */
+#undef VXWORKS_LIB_SPEC
+#define	VXWORKS_LIB_SPEC						\
+"%{mrtp:%{shared:-u " USER_LABEL_PREFIX "__init -u " USER_LABEL_PREFIX "__fini} \
+	%{!shared:%{non-static:-u " USER_LABEL_PREFIX "_STI__6__rtld -ldl} \
+		  --start-group -lc -lgcc -lc_internal -lnet -ldsi	\
+		  --end-group}}"
+
+/* The no-op spec for "-shared" below is present because otherwise GCC
+   will treat it as an unrecognized option.  */
+#undef VXWORKS_LINK_SPEC
+#define VXWORKS_LINK_SPEC				\
+"%{!mrtp:-r}						\
+ %{!shared:						\
+   %{mrtp:-q %{h*}					\
+          %{R*} %{!T*: %(link_start) }			\
+          %(link_target) %(link_os)}}			\
+ %{v:-v}						\
+ %{shared:-shared}					\
+ %{Bstatic:-Bstatic}					\
+ %{Bdynamic:-Bdynamic}					\
+ %{!Xbind-lazy:-z now}					\
+ %{Xbind-now:%{Xbind-lazy:				\
+   %e-Xbind-now and -Xbind-lazy are incompatible}}	\
+ %{mrtp:%{!shared:%{!non-static:-static}		\
+ 		  %{non-static:--force-dynamic --export-dynamic}}}"
+
+/* For VxWorks, the system provides libc_internal.a.  This is a superset
+   of libgcc.a; we want to use it.  Make sure not to dynamically export
+   any of its symbols, though.  Always look for libgcc.a first so that
+   we get the latest versions of the GNU intrinsics during our builds.  */
+#undef VXWORKS_LIBGCC_SPEC
+#define VXWORKS_LIBGCC_SPEC \
+  "-lgcc %{mrtp:--exclude-libs=libc_internal,libgcc -lc_internal}"
+
+#undef VXWORKS_STARTFILE_SPEC
+#define	VXWORKS_STARTFILE_SPEC "%{mrtp:%{!shared:-l:crt0.o}}"
+#define VXWORKS_ENDFILE_SPEC ""
+
+/* Do VxWorks-specific parts of TARGET_OPTION_OVERRIDE.  */
+#undef VXWORKS_OVERRIDE_OPTIONS
+#define VXWORKS_OVERRIDE_OPTIONS vxworks_override_options ()
+extern void vxworks_override_options (void);
+
+/* Only RTPs support prioritized constructors and destructors:
+   the implementation relies on numbered .ctors* sections.  */
+#define SUPPORTS_INIT_PRIORITY TARGET_VXWORKS_RTP
+
+/* VxWorks requires special handling of constructors and destructors.
+   All VxWorks configurations must use these functions.  */
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR vxworks_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR vxworks_asm_out_destructor
+extern void vxworks_asm_out_constructor (rtx symbol, int priority);
+extern void vxworks_asm_out_destructor (rtx symbol, int priority);
+
+/* Override the vxworks-dummy.h definitions.  TARGET_VXWORKS_RTP
+   is defined by vxworks.opt.  */
+#undef VXWORKS_GOTT_BASE
+#define VXWORKS_GOTT_BASE "__GOTT_BASE__"
+#undef VXWORKS_GOTT_INDEX
+#define VXWORKS_GOTT_INDEX "__GOTT_INDEX__"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+/* Both kernels and RTPs have the facilities required by this macro.  */
+#define TARGET_POSIX_IO
+
+/* A VxWorks implementation of TARGET_OS_CPP_BUILTINS.  */
+#define VXWORKS_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+      builtin_define ("__vxworks");					\
+      builtin_define ("__VXWORKS__");					\
+      builtin_assert ("system=unix");					\
+      if (TARGET_VXWORKS_RTP)						\
+	builtin_define ("__RTP__");					\
+      else								\
+	builtin_define ("_WRS_KERNEL");					\
+    }									\
+  while (0)
+
+#define VXWORKS_KIND VXWORKS_KIND_NORMAL
+
+/* The diab linker does not handle .gnu_attribute sections.  */
+#undef HAVE_AS_GNU_ATTRIBUTE
diff --git a/gcc/config/vxworks.opt b/gcc/config/vxworks.opt
new file mode 100644
index 000000000..ab7bab1df
--- /dev/null
+++ b/gcc/config/vxworks.opt
@@ -0,0 +1,46 @@
+; Processor-independent options for VxWorks.
+;
+; Copyright (C) 2005, 2007, 2010, 2011 Free Software Foundation, Inc.
+; Contributed by CodeSourcery, LLC.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Bdynamic
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+Bstatic
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+Xbind-lazy
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+Xbind-now
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+
+mrtp
+Target Report RejectNegative Mask(VXWORKS_RTP) Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
+Assume the VxWorks RTP environment
+
+; VxWorks AE has two modes: kernel mode and vThreads mode.  In
+; general, back ends do not actually need to know which mode they're
+; in, so we do not have to set any flags.
+mvthreads
+Target RejectNegative Condition(VXWORKS_KIND == VXWORKS_KIND_AE)
+Assume the VxWorks vThreads environment
+
+non-static
+Driver Condition(VXWORKS_KIND == VXWORKS_KIND_NORMAL)
diff --git a/gcc/config/vxworksae.h b/gcc/config/vxworksae.h
new file mode 100644
index 000000000..ee7c73c3c
--- /dev/null
+++ b/gcc/config/vxworksae.h
@@ -0,0 +1,70 @@
+/* Common VxWorks AE target definitions for GNU compiler.
+   Copyright (C) 2004, 2005, 2007, 2010 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This header should be included after including vx-common.h.  */
+
+/* Most of the definitions below this point are versions of the
+   vxworks.h definitions, without the -mrtp bits.  */
+
+/* The directory containing the VxWorks AE target headers.  */
+#define VXWORKSAE_TARGET_DIR \
+  "/home/tornado/vxworks-ae/latest/target"
+
+/* Include target/vThreads/h or target/h (depending on the compilation
+   mode), and then target/val/h (in either mode).  The macros defined
+   are in the user's namespace, but the VxWorks headers require
+   them.  */
+#undef VXWORKS_ADDITIONAL_CPP_SPEC
+#define VXWORKS_ADDITIONAL_CPP_SPEC "					\
+ %{!nostdinc:%{isystem*}}						\
+ %{mvthreads:-DVTHREADS=1						\
+	 %{!nostdinc:-isystem " VXWORKSAE_TARGET_DIR "/vThreads/h}}	\
+ %{!mvthreads:-DAE653_BUILD=1						\
+	 %{!nostdinc:-isystem " VXWORKSAE_TARGET_DIR "/h}}		\
+ %{!nostdinc:-isystem " VXWORKSAE_TARGET_DIR "/val/h}"
+
+#undef VXWORKS_LIB_SPEC
+#define VXWORKS_LIB_SPEC ""
+
+#undef VXWORKS_LINK_SPEC
+#define VXWORKS_LINK_SPEC	\
+  "-r %{v:-V}"
+ 
+#undef VXWORKS_LIBGCC_SPEC
+#define VXWORKS_LIBGCC_SPEC	\
+  "-lgcc"
+
+#undef VXWORKS_STARTFILE_SPEC
+#define VXWORKS_STARTFILE_SPEC ""
+
+#define VXWORKS_KIND VXWORKS_KIND_AE
+
+/* Both kernels and RTPs have the facilities required by this macro.  */
+#define TARGET_POSIX_IO
+
+/* A VxWorks 653 implementation of TARGET_OS_CPP_BUILTINS.  */
+#define VXWORKS_OS_CPP_BUILTINS()                                       \
+  do                                                                    \
+    {                                                                   \
+      builtin_define ("__vxworks");                                     \
+      builtin_define ("__VXWORKS__");                                   \
+    }                                                                   \
+  while (0)
+
diff --git a/gcc/config/x-cflags-O1 b/gcc/config/x-cflags-O1
new file mode 100644
index 000000000..9ba1e7496
--- /dev/null
+++ b/gcc/config/x-cflags-O1
@@ -0,0 +1,5 @@
+# At -O0 cc1 etc. are too large on some targets for successful
+# link; force building libbackend.a with -O1.
+ifeq ($(filter-out -O0,$(lastword $(filter -O%,$(CFLAGS)))),)
+$(OBJS) : override CFLAGS += -O1
+endif
diff --git a/gcc/config/x-darwin b/gcc/config/x-darwin
new file mode 100644
index 000000000..f671d911f
--- /dev/null
+++ b/gcc/config/x-darwin
@@ -0,0 +1,3 @@
+host-darwin.o : $(srcdir)/config/host-darwin.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h toplev.h config/host-darwin.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc/config/x-hpux b/gcc/config/x-hpux
new file mode 100644
index 000000000..e9f2f18f1
--- /dev/null
+++ b/gcc/config/x-hpux
@@ -0,0 +1,4 @@
+host-hpux.o : $(srcdir)/config/host-hpux.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/host-hpux.c
diff --git a/gcc/config/x-linux b/gcc/config/x-linux
new file mode 100644
index 000000000..f87a45b24
--- /dev/null
+++ b/gcc/config/x-linux
@@ -0,0 +1,4 @@
+host-linux.o : $(srcdir)/config/host-linux.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/host-linux.c
diff --git a/gcc/config/x-solaris b/gcc/config/x-solaris
new file mode 100644
index 000000000..3e99df657
--- /dev/null
+++ b/gcc/config/x-solaris
@@ -0,0 +1,4 @@
+host-solaris.o : $(srcdir)/config/host-solaris.c $(CONFIG_H) $(SYSTEM_H) \
+  coretypes.h hosthooks.h hosthooks-def.h $(HOOKS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/host-solaris.c
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
new file mode 100644
index 000000000..bde1ba31a
--- /dev/null
+++ b/gcc/config/xtensa/constraints.md
@@ -0,0 +1,139 @@
+;; Constraint definitions for Xtensa.
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints.
+
+(define_register_constraint "a" "GR_REGS"
+ "General-purpose AR registers @code{a0}-@code{a15},
+  except @code{a1} (@code{sp}).")
+
+(define_register_constraint "b" "TARGET_BOOLEANS ? BR_REGS : NO_REGS"
+ "Boolean registers @code{b0}-@code{b15}; only available if the Xtensa
+  Boolean Option is configured.")
+
+(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS"
+ "@internal
+  All AR registers, including sp, but only if the Xtensa Code Density
+  Option is configured.")
+
+(define_register_constraint "f" "TARGET_HARD_FLOAT ? FP_REGS : NO_REGS"
+ "Floating-point registers @code{f0}-@code{f15}; only available if the
+  Xtensa Floating-Pointer Coprocessor is configured.")
+
+(define_register_constraint "q" "SP_REG"
+ "@internal
+  The stack pointer (register @code{a1}).")
+
+(define_register_constraint "A" "TARGET_MAC16 ? ACC_REG : NO_REGS"
+ "The low 32 bits of the accumulator from the Xtensa MAC16 Option.")
+
+(define_register_constraint "B" "TARGET_SEXT ? GR_REGS : NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Sign Extend
+  Option is configured.")
+
+(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa 16-Bit Integer
+  Multiply Option is configured.")
+
+(define_register_constraint "D" "TARGET_DENSITY ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Code Density
+  Option is configured.")
+
+(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS: NO_REGS"
+ "@internal
+  General-purpose AR registers, but only if the Xtensa Const16
+  Option is configured.")
+
+;; Integer constant constraints.
+
+(define_constraint "I"
+ "A signed 12-bit integer constant for use with MOVI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm12b (ival)")))
+
+(define_constraint "J"
+ "A signed 8-bit integer constant for use with ADDI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm8 (ival)")))
+
+(define_constraint "K"
+ "A constant integer that can be an immediate operand of an Xtensa
+  conditional branch instruction that performs a signed comparison or
+  a comparison against zero."
+ (and (match_code "const_int")
+      (match_test "xtensa_b4const_or_zero (ival)")))
+
+(define_constraint "L"
+ "A constant integer that can be an immediate operand of an Xtensa
+  conditional branch instruction that performs an unsigned comparison."
+ (and (match_code "const_int")
+      (match_test "xtensa_b4constu (ival)")))
+
+(define_constraint "M"
+ "An integer constant in the range @minus{}32-95 for use with MOVI.N
+  instructions."
+ (and (match_code "const_int")
+      (match_test "ival >= -32 && ival <= 95")))
+
+(define_constraint "N"
+ "An unsigned 8-bit integer constant shifted left by 8 bits for use
+  with ADDMI instructions."
+ (and (match_code "const_int")
+      (match_test "xtensa_simm8x256 (ival)")))
+
+(define_constraint "O"
+ "An integer constant that can be used in ADDI.N instructions."
+ (and (match_code "const_int")
+      (match_test "ival == -1 || (ival >= 1 && ival <= 15)")))
+
+(define_constraint "P"
+ "An integer constant that can be used as a mask value in an EXTUI
+  instruction."
+ (and (match_code "const_int")
+      (match_test "xtensa_mask_immediate (ival)")))
+
+;; Memory constraints.  Do not use define_memory_constraint here.  Doing so
+;; causes reload to force some constants into the constant pool, but since
+;; the Xtensa constant pool can only be accessed with L32R instructions, it
+;; is always better to just copy a constant into a register.  Instead, use
+;; regular constraints but add a check to allow pseudos during reload.
+
+(define_constraint "R"
+ "Memory that can be accessed with a 4-bit unsigned offset from a register."
+ (ior (and (match_code "mem")
+	   (match_test "smalloffset_mem_p (op)"))
+      (and (match_code "reg")
+	   (match_test "reload_in_progress
+			&& REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
+
+(define_constraint "T"
+ "Memory in a literal pool (addressable with an L32R instruction)."
+ (and (match_code "mem")
+      (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
+
+(define_constraint "U"
+ "Memory that is not in a literal pool."
+ (ior (and (match_code "mem")
+	   (match_test "! constantpool_mem_p (op)"))
+      (and (match_code "reg")
+	   (match_test "reload_in_progress
+			&& REGNO (op) >= FIRST_PSEUDO_REGISTER"))))
diff --git a/gcc/config/xtensa/crti.asm b/gcc/config/xtensa/crti.asm
new file mode 100644
index 000000000..cbe91b0e7
--- /dev/null
+++ b/gcc/config/xtensa/crti.asm
@@ -0,0 +1,51 @@
+# Start .init and .fini sections.
+# Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# This file just makes a stack frame for the contents of the .fini and
+# .init sections.  Users may put any desired instructions in those
+# sections.
+
+#include "xtensa-config.h"
+
+	.section .init
+	.globl _init
+	.type _init,@function
+	.align	4
+_init:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry	sp, 64
+#else
+	addi	sp, sp, -32
+	s32i	a0, sp, 0
+#endif
+
+	.section .fini
+	.globl _fini
+	.type _fini,@function
+	.align	4
+_fini:
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry	sp, 64
+#else
+	addi	sp, sp, -32
+	s32i	a0, sp, 0
+#endif
diff --git a/gcc/config/xtensa/crtn.asm b/gcc/config/xtensa/crtn.asm
new file mode 100644
index 000000000..413cfa0ac
--- /dev/null
+++ b/gcc/config/xtensa/crtn.asm
@@ -0,0 +1,46 @@
+# End of .init and .fini sections.
+# Copyright (C) 2003, 2009 Free Software Foundation, Inc.
+# 
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+# 
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# This file just makes sure that the .fini and .init sections do in
+# fact return.  Users may put any desired instructions in those sections.
+# This file is the last thing linked into any executable.
+
+#include "xtensa-config.h"
+
+	.section .init
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	l32i	a0, sp, 0
+	addi	sp, sp, 32
+	ret
+#endif
+
+	.section .fini
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	l32i	a0, sp, 0
+	addi	sp, sp, 32
+	ret
+#endif
diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h
new file mode 100644
index 000000000..54a9c8f19
--- /dev/null
+++ b/gcc/config/xtensa/elf.h
@@ -0,0 +1,104 @@
+/* Xtensa/Elf configuration.
+   Derived from the configuration for GCC for Intel i386 running Linux.
+   Copyright (C) 2001, 2003, 2006, 2007, 2010 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_SECTION_TYPE_FLAGS xtensa_multibss_section_type_flags
+
+/* Don't assume anything about the header files.  */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (Xtensa/ELF)", stderr);
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+  %{mno-text-section-literals:--no-text-section-literals} \
+  %{mtarget-align:--target-align} \
+  %{mno-target-align:--no-target-align} \
+  %{mlongcalls:--longcalls} \
+  %{mno-longcalls:--no-longcalls}"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lsim -lc -lhandlers-sim -lhal"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "crt1-sim%O%s crt0%O%s crti%O%s crtbegin%O%s _vectors%O%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s crtn%O%s"  
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+    %{static:-static}}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Avoid dots for compatibility with VxWorks.  */
+#undef NO_DOLLAR_IN_LABEL
+#define NO_DOT_IN_LABEL
+
+/* Do not force "-fpic" for this target.  */
+#define XTENSA_ALWAYS_PIC 0
+
+#undef DBX_REGISTER_NUMBER
+
+/* Search for headers in $tooldir/arch/include and for libraries and
+   startfiles in $tooldir/arch/lib.  */
+#define GCC_DRIVER_HOST_INITIALIZATION \
+do \
+{ \
+  char *tooldir, *archdir; \
+  tooldir = concat (tooldir_base_prefix, spec_machine, \
+		    dir_separator_str, NULL); \
+  if (!IS_ABSOLUTE_PATH (tooldir)) \
+    tooldir = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
+		      spec_version, dir_separator_str, tooldir, NULL); \
+  archdir = concat (tooldir, "arch", dir_separator_str, NULL); \
+  add_prefix (&startfile_prefixes, \
+	      concat (archdir, "lib", dir_separator_str, NULL), \
+	      "GCC", PREFIX_PRIORITY_LAST, 0, 1); \
+  add_prefix (&include_prefixes, archdir, \
+	      "GCC", PREFIX_PRIORITY_LAST, 0, 0); \
+  } \
+while (0)
diff --git a/gcc/config/xtensa/elf.opt b/gcc/config/xtensa/elf.opt
new file mode 100644
index 000000000..bdeac15b2
--- /dev/null
+++ b/gcc/config/xtensa/elf.opt
@@ -0,0 +1,30 @@
+; Xtensa ELF (bare metal) options.
+
+; Copyright (C) 2011
+; Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+rdynamic
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S
new file mode 100644
index 000000000..9b46889bd
--- /dev/null
+++ b/gcc/config/xtensa/ieee754-df.S
@@ -0,0 +1,2388 @@
+/* IEEE-754 double-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negdf2
+
+	.align	4
+	.global	__negdf2
+	.type	__negdf2, @function
+__negdf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	xh, xh, a4
+	leaf_return
+
+#endif /* L_negdf2 */
+
+#ifdef L_addsubdf3
+
+	/* Addition */
+__adddf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__adddf3
+	.type	__adddf3, @function
+__adddf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Ladd_xnan_or_inf
+	ball	yh, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, xh, 20, 12
+	extui	a8, yh, 20, 12
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Ladd_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Ladd_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Ladd_addy:
+	/* Do the 64-bit addition.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:	leaf_return
+
+.Ladd_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Ladd_addy
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Ladd_addy
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	xh, a6, .Ladd_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_bigshiftx
+	
+	ssr	a10
+	sll	a9, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+.Ladd_addx:
+	add	xl, xl, yl
+	add	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, 1
+1:
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, xh, 20, 12
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_bigshiftx:
+	/* Mostly the same thing as "bigshifty"....  */
+	bgeui	a10, 64, .Ladd_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	beqz	a11, .Ladd_addx
+	or	a9, a9, a10
+	j	.Ladd_addx
+
+.Ladd_returny:
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x100000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
+	   or:
+	       shifted mantissa + ((x + 1) << 19)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
+	mov	a10, xl
+	ssai	1
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 19
+	add	xh, xh, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	xh, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Ladd_roundcarry
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	movi	xl, 0
+	srli	xh, xh, 20
+	slli	xh, xh, 20
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	xh, xh, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Ladd_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+
+	/* Subtraction */
+__subdf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	yh, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 11
+	xor	yh, yh, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subdf3
+	.type	__subdf3, @function
+__subdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
+	ball	xh, a6, .Lsub_xnan_or_inf
+	ball	yh, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, xh, 20, 11
+	extui	a8, yh, 20, 11
+	bltu	xh, yh, .Lsub_xsmaller
+	beq	xh, yh, .Lsub_compare_low
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	yh, a6, .Lsub_yexpzero
+
+	/* Replace yh sign/exponent with 0x001.  */
+	or	yh, yh, a6
+	slli	yh, yh, 11
+	srli	yh, yh, 11
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  Optimize for difference < 32.  */
+	sub	a10, a7, a8
+	bgeui	a10, 32, .Lsub_bigshifty
+	
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out of yl are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, yl, a9
+	src	yl, yh, yl
+	srl	yh, yh
+
+.Lsub_suby:
+	/* Do the 64-bit subtraction.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_compare_low:
+	/* The high words are equal.  Compare the low words.  */
+	bltu	xl, yl, .Lsub_xsmaller
+	bltu	yl, xl, .Lsub_ysmaller
+	/* The operands are equal.  Return 0.0.  */
+	movi	xh, 0
+	movi	xl, 0
+1:	leaf_return
+
+.Lsub_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	yh, yh, 12
+	srli	yh, yh, 12
+	bnone	xh, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_bigshifty:
+	/* Exponent difference > 64 -- just return the bigger value.  */
+	bgeui	a10, 64, 1b
+
+	/* Shift yh/yl right by the exponent difference.  Any bits that are
+	   shifted out are saved in a9 for rounding the result.  */
+	ssr	a10
+	sll	a11, yl		/* lost bits shifted out of yl */
+	src	a9, yh, yl
+	srl	yl, yh
+	movi	yh, 0
+	beqz	a11, .Lsub_suby
+	or	a9, a9, a10	/* any positive, nonzero value will work */
+	j	.Lsub_suby
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	xh, a6, .Lsub_xexpzero
+
+	or	xh, xh, a6
+	slli	xh, xh, 11
+	srli	xh, xh, 11
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_bigshiftx
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, xl, a9
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Negate y.  */
+	slli	a11, a6, 11
+	xor	yh, yh, a11
+
+.Lsub_subx:
+	sub	xl, yl, xl
+	sub	xh, yh, xh
+	bgeu	yl, xl, 1f
+	addi	xh, xh, -1
+1:
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from xh/xl.  */
+	neg	a9, a9
+	beqz	a9, 1f
+	addi	a5, xh, -1
+	moveqz	xh, a5, xl
+	addi	xl, xl, -1
+1:
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, xh, 20, 11
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	xl, xl, 1
+	beqz	xl, .Lsub_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	slli	xh, xh, 12
+	srli	xh, xh, 12
+	bnone	yh, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_bigshiftx:
+	/* Mostly the same thing as "bigshifty", but with the sign bit of the
+	   shifted value set so that the subsequent subtraction flips the
+	   sign of y.  */
+	bgeui	a10, 64, .Lsub_returny
+
+	ssr	a10
+	sll	a11, xl
+	src	a9, xh, xl
+	srl	xl, xh
+	slli	xh, a6, 11	/* set sign bit of xh */
+	beqz	a11, .Lsub_subx
+	or	a9, a9, a10
+	j	.Lsub_subx
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 11
+	xor	xh, yh, a7
+	mov	xl, yl
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, xh, 12
+	beqz	a8, .Lsub_xhzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 12
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_shift_lt32:
+	/* Shift the mantissa (a8/xl/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, xl
+	src	xl, xl, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lsub_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+.Lsub_xhzero:
+	/* When normalizing the result, all the mantissa bits in the high
+	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
+	do_nsau	a6, xl, a7, a11
+	addi	a6, a6, 21
+	blt	a10, a6, .Lsub_subnormal
+
+.Lsub_normalize_shift:
+	bltui	a6, 32, .Lsub_shift_lt32
+
+	ssl	a6
+	src	a8, xl, a9
+	sll	xl, a9
+	movi	a9, 0
+
+	srli	xh, xh, 20
+	sub	xh, xh, a6
+	slli	xh, xh, 20
+	add	xh, xh, a8
+	j	.Lsub_round
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubdf3 */
+
+#ifdef L_muldf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__muldf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Lmul_xh_zero
+	do_nsau	a10, xh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+.Lmul_xh_zero:
+	do_nsau	a10, xl, a11, a12
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Lmul_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Lmul_xnormalized
+.Lmul_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Lmul_xnormalized
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* If y is zero, return zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Lmul_yh_zero
+	do_nsau	a10, yh, a11, a12
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+.Lmul_yh_zero:
+	do_nsau	a10, yl, a11, a12
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Lmul_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Lmul_ynormalized
+.Lmul_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	bnez	yl, 1f
+	slli	a8, yh, 1
+	bnez	a8, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	yh, a6, .Lmul_returnx
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	xh, yh
+	mov	xl, yl
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	xh, xh, 1
+	ssai	1
+	src	xh, a7, xh
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	bnez	xl, .Lmul_returny
+	slli	a8, xh, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x80000	/* make it a quiet NaN */
+	or	xh, yh, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__muldf3
+	.type	__muldf3, @function
+__muldf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Lmul_xnan_or_inf
+	ball	yh, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
+	   The least-significant word of the result is thrown away except
+	   that if it is nonzero, the lsb of a6 is set to 1.  */
+#if XCHAL_HAVE_MUL32_HIGH
+
+	/* Compute a6 with any carry-outs in a10.  */
+	movi	a10, 0
+	mull	a6, xl, yh
+	mull	a11, xh, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	muluh	a11, xl, yl
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:	
+	/* If the low word of the result is nonzero, set the lsb of a6.  */
+	mull	a11, xl, yl
+	beqz	a11, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+1:
+	/* Compute xl with any carry-outs in a9.  */
+	movi	a9, 0
+	mull	a11, xh, yh
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	a11, xh, yl
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:	
+	muluh	xl, xl, yh
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute xh.  */
+	muluh	xh, xh, yh
+	add	xh, xh, a9
+
+#else /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
+	   products.  These partial products are:
+
+		0 xll * yll
+
+		1 xll * ylh
+		2 xlh * yll
+
+		3 xll * yhl
+		4 xlh * ylh
+		5 xhl * yll
+
+		6 xll * yhh
+		7 xlh * yhl
+		8 xhl * ylh
+		9 xhh * yll
+
+		10 xlh * yhh
+		11 xhl * yhl
+		12 xhh * ylh
+
+		13 xhl * yhh
+		14 xhh * yhl
+
+		15 xhh * yhh
+
+	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
+	   or Mul32 multiplier options, these input chunks must be stored in
+	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
+	   that the inputs come from either half of the registers, so there
+	   is no need to shift them out ahead of time.  If there is no
+	   multiply hardware, the 16-bit chunks can be extracted when setting
+	   up the arguments to the separate multiply function.  */
+
+	/* Save a7 since it is needed to hold a temporary value.  */
+	s32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 8
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define xlh a12
+#define ylh a13
+#define xhh a14
+#define yhh a15
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	xlh, xl, 16
+	srli	ylh, yl, 16
+	srli	xhh, xh, 16
+	srli	yhh, yh, 16
+
+#define xll xl
+#define yll yl
+#define xhl xh
+#define yhl yh
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	xl, xl, 0, 16
+	extui	xh, xh, 0, 16
+	extui	yl, yl, 0, 16
+	extui	yh, yh, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
+	do_mul(a10, xl, l, yl, h)	/* pp 1 */
+	do_mul(a11, xl, h, yl, l)	/* pp 2 */
+	movi	a9, 0
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Initialize a6 with a9/a10 shifted into position.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a6, a9, a10
+
+	/* Compute the low word into a10.  */
+	do_mul(a11, xl, l, yl, l)	/* pp 0 */
+	sll	a10, a10
+	add	a10, a10, a11
+	bgeu	a10, a11, 1f
+	addi	a6, a6, 1
+1:
+	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
+	   This is good enough to determine the low half of a6, so that any
+	   nonzero bits from the low word of the result can be collapsed
+	   into a6, freeing up a register.  */
+	movi	a9, 0
+	do_mul(a11, xl, l, yh, l)	/* pp 3 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xl, h, yl, h)	/* pp 4 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a11, xh, l, yl, l)	/* pp 5 */
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Collapse any nonzero bits from the low word into a6.  */
+	beqz	a10, 1f
+	movi	a11, 1
+	or	a6, a6, a11
+1:
+	/* Add pp6-9 into a11 with carry-outs in a10.  */
+	do_mul(a7, xl, l, yh, h)	/* pp 6 */
+	do_mul(a11, xh, h, yl, l)	/* pp 9 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xl, h, yh, l)	/* pp 7 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	do_mul(a7, xh, l, yl, h)	/* pp 8 */
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:	
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Add pp10-12 into xl with carry-outs in a9.  */
+	movi	a9, 0
+	do_mul(xl, xl, h, yh, h)	/* pp 10 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, l, yh, l)	/* pp 11 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	do_mul(a10, xh, h, yl, h)	/* pp 12 */
+	add	xl, xl, a10
+	bgeu	xl, a10, 1f
+	addi	a9, a9, 1
+1:
+	/* Add pp13-14 into a11 with carry-outs in a10.  */
+	do_mul(a11, xh, l, yh, h)	/* pp 13 */
+	do_mul(a7, xh, h, yh, l)	/* pp 14 */
+	movi	a10, 0
+	add	a11, a11, a7
+	bgeu	a11, a7, 1f
+	addi	a10, a10, 1
+1:
+	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
+	src	a10, a10, a11
+	add	a10, a10, a9
+	sll	a11, a11
+	add	xl, xl, a11
+	bgeu	xl, a11, 1f
+	addi	a10, a10, 1
+1:
+	/* Compute xh.  */
+	do_mul(xh, xh, h, yh, h)	/* pp 15 */
+	add	xh, xh, a10
+
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a7, sp, 4
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	l32i	a0, sp, 0
+	l32i	a8, sp, 8
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 12 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 11 bits and increment the
+	   exponent.  Note: It is convenient to use the constant 0x3ff
+	   instead of 0x400 when removing the extra exponent bias (so that
+	   it is easy to construct 0x7fe for the overflow check).  Reverse
+	   the logic here to decrement the exponent sum by one unless there
+	   was a carry-out.  */
+	movi	a4, 11
+	srli	a5, xh, 21 - 12
+	bnez	a5, 1f
+	addi	a4, a4, 1
+	addi	a8, a8, -1
+1:	ssl	a4
+	src	xh, xh, xl
+	src	xl, xl, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x3ff
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	slli	a4, a4, 1	/* 0x7fe */
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	xl, xl, 1
+	beqz	xl, .Lmul_roundcarry
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, xh, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	j	.Lmul_rounded
+
+.Lmul_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow is OK -- it will be added to the exponent.  */
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_bigshift
+	
+	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+	j	1f
+
+.Lmul_bigshift:
+	bgeui	a8, 64, .Lmul_flush_to_zero
+	sll	a10, xl		/* lost bits shifted out of xl */
+	src	a6, xh, xl
+	srl	xl, xh
+	movi	xh, 0
+	or	a9, a9, a10
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_muldf3 */
+
+#ifdef L_divdf3
+
+	/* Division */
+__divdf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	yh, yh, 1
+	srli	yh, yh, 1
+
+	/* Check for division by zero.  */
+	or	a10, yh, yl
+	beqz	a10, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	beqz	yh, .Ldiv_yh_zero
+	do_nsau	a10, yh, a11, a9
+	addi	a10, a10, -11
+	ssl	a10
+	src	yh, yh, yl
+	sll	yl, yl
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+.Ldiv_yh_zero:
+	do_nsau	a10, yl, a11, a9
+	addi	a10, a10, -11
+	movi	a9, -31
+	sub	a9, a9, a10
+	ssl	a10
+	bltz	a10, .Ldiv_yl_srl
+	sll	yh, yl
+	movi	yl, 0
+	j	.Ldiv_ynormalized
+.Ldiv_yl_srl:
+	srl	yh, yl
+	sll	yl, yl
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+	or	xl, xl, xh
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	or	xh, xh, a6
+	bnez	xl, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	movi	xl, 0
+	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	xh, xh, 1
+	srli	xh, xh, 1
+
+	/* If x is zero, return zero.  */
+	or	a10, xh, xl
+	beqz	a10, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	beqz	xh, .Ldiv_xh_zero
+	do_nsau	a10, xh, a11, a8
+	addi	a10, a10, -11
+	ssl	a10
+	src	xh, xh, xl
+	sll	xl, xl
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+.Ldiv_xh_zero:
+	do_nsau	a10, xl, a11, a8
+	addi	a10, a10, -11
+	movi	a8, -31
+	sub	a8, a8, a10
+	ssl	a10
+	bltz	a10, .Ldiv_xl_srl
+	sll	xh, xl
+	movi	xl, 0
+	j	.Ldiv_xnormalized
+.Ldiv_xl_srl:
+	srl	xh, xl
+	sll	xl, xl
+	j	.Ldiv_xnormalized
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, yh, 31
+	slli	a7, a7, 31
+	xor	xh, xh, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	yh, a6, 1f
+	movi	a4, 0x80000	/* make it a quiet NaN */
+	or	xh, xh, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, yh, 12
+	or	a8, a8, yl
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	xh, yh
+	mov	xl, yl
+	leaf_return
+
+.Ldiv_highequal1:
+	bltu	xl, yl, 2f
+	j	3f
+
+	.align	4
+	.global	__divdf3
+	.type	__divdf3, @function
+__divdf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+
+	/* Get the sign of the result.  */
+	xor	a7, xh, yh
+
+	/* Check for NaN and infinity.  */
+	ball	xh, a6, .Ldiv_xnan_or_inf
+	ball	yh, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, xh, 20, 11
+	extui	a9, yh, 20, 11
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0x1fffff
+	or	xh, xh, a6
+	and	xh, xh, a10
+	or	yh, yh, a6
+	and	yh, yh, a10
+
+	/* Set SAR for left shift by one.  */
+	ssai	(32 - 1)
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	yh, xh, 3f
+	beq	yh, xh, .Ldiv_highequal1
+2:	src	xh, xh, xl
+	sll	xl, xl
+	addi	a8, a8, -1
+3:
+	/* Do the first subtraction and shift.  */
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+	src	xh, xh, xl
+	sll	xl, xl
+
+	/* Put the quotient into a10/a11.  */
+	movi	a10, 0
+	movi	a11, 1
+
+	/* Divide one bit at a time for 52 bits.  */
+	movi	a9, 52
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	src	a10, a10, a11
+	sll	a11, a11
+
+	/* Is this digit a 0 or 1?  */
+	bltu	xh, yh, 3f
+	beq	xh, yh, .Ldiv_highequal2
+
+	/* Output a 1 and subtract.  */
+2:	addi	a11, a11, 1
+	sub	xh, xh, yh
+	bgeu	xl, yl, 1f
+	addi	xh, xh, -1
+1:	sub	xl, xl, yl
+
+	/* Shift the dividend << 1.  */
+3:	src	xh, xh, xl
+	sll	xl, xl
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	movi	a9, 0x3fe
+	add	a8, a8, a9
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..7fd are OK here.  */
+	addmi	a9, a9, 0x400	/* 0x7fe */
+	bgeu	a8, a9, .Ldiv_overflow
+
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in xh/xl.  */
+	bltu	xh, yh, .Ldiv_rounded
+	beq	xh, yh, .Ldiv_highequal3
+.Ldiv_roundup:
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+
+.Ldiv_rounded:
+	mov	xl, a11
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 20
+	add	xh, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Ldiv_highequal2:
+	bgeu	xl, yl, 2b
+	j	3b
+
+.Ldiv_highequal3:
+	bltu	xl, yl, .Ldiv_rounded
+	bne	xl, yl, .Ldiv_roundup
+
+	/* Remainder is exactly half the divisor.  Round even.  */
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a9, 1	/* 0x7ff */
+	slli	xh, a8, 20
+	movi	xl, 0
+	j	.Ldiv_addsign
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_bigshift
+	
+	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a11
+	src	a11, a10, a11
+	srl	a10, a10
+	j	1f
+
+.Ldiv_bigshift:
+	bgeui	a8, 64, .Ldiv_flush_to_zero
+	sll	a9, a11		/* lost bits shifted out of a11 */
+	src	a6, a10, a11
+	srl	a11, a10
+	movi	a10, 0
+	or	xl, xl, a9
+
+	/* Set the exponent to zero.  */
+1:	movi	a8, 0
+
+	/* Pack any nonzero remainder (in xh/xl) into a6.  */
+	or	xh, xh, xl
+	beqz	xh, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10/a11 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a11, a11, 1
+	beqz	a11, .Ldiv_roundcarry
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a11, a11, 1
+	slli	a11, a11, 1
+	j	.Ldiv_rounded
+
+.Ldiv_roundcarry:
+	/* a11 is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	a10, a10, 1
+	/* Overflow to the exponent field is OK.  */
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	xh, a7, 31
+	slli	xh, xh, 31
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_divdf3 */
+
+#ifdef L_cmpdf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqdf2
+	.global	__nedf2
+	.set	__nedf2, __eqdf2
+	.type	__eqdf2, @function
+__eqdf2:
+	leaf_entry sp, 16
+	bne	xl, yl, 2f
+	bne	xh, yh, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl	/* xl == yl here */
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7ff and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtdf2
+	.type	__gtdf2, @function
+__gtdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__ledf2
+	.type	__ledf2, @function
+__ledf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bltu	yl, xl, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bgeu	xl, yl, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	xh, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gedf2
+	.type	__gedf2, @function
+__gedf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltdf2
+	.type	__ltdf2, @function
+__ltdf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 2f
+1:	bnall	yh, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, xh, yh
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	xh, .Llt_xneg
+
+	/* Check if x < y.  */
+	bltu	xh, yh, 4f
+	bne	xh, yh, 5f
+	bgeu	xl, yl, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	yh, xh, 4b
+	bne	yh, xh, 5f
+	bltu	yl, xl, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	xh, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, xh, yh
+	slli	a7, a7, 1
+	or	a7, a7, xl
+	or	a7, a7, yl
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unorddf2
+	.type	__unorddf2, @function
+__unorddf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7ff00000
+	ball	xh, a6, 3f
+1:	ball	yh, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, xh, 12
+	or	a7, a7, xl
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, yh, 12
+	or	a7, a7, yl
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpdf2 */
+
+#ifdef L_fixdfsi
+
+	.align	4
+	.global	__fixdfsi
+	.type	__fixdfsi, @function
+__fixdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixdfsi_maxint
+	blti	a4, 1, .Lfixdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixdfsi */
+
+#ifdef L_fixdfdi
+
+	.align	4
+	.global	__fixdfdi
+	.type	__fixdfdi, @function
+__fixdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 19, 10	/* 0x3fe */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixdfdi_maxint
+	blti	a4, 1, .Lfixdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixdfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixdfdi_shifted
+
+.Lfixdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixdfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	xh, 0
+
+.Lfixdfdi_maxint:
+	slli	a7, a6, 11	/* 0x80000000 */
+	bgez	xh, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixdfdi */
+
+#ifdef L_fixunsdfsi
+
+	.align	4
+	.global	__fixunsdfsi
+	.type	__fixunsdfsi, @function
+__fixunsdfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 32, .Lfixunsdfsi_maxint
+	bltz	a4, .Lfixunsdfsi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	a5, a7, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunsdfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunsdfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunsdfsi_maxint:
+	slli	a4, a6, 11	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, xh
+	mov	a2, a4
+	leaf_return
+
+.Lfixunsdfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunsdfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	xh, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 11
+	leaf_return
+
+#endif /* L_fixunsdfsi */
+
+#ifdef L_fixunsdfdi
+
+	.align	4
+	.global	__fixunsdfdi
+	.type	__fixunsdfdi, @function
+__fixunsdfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7ff00000
+	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
+	extui	a4, xh, 20, 11
+	extui	a5, a6, 20, 10	/* 0x3ff */
+	sub	a4, a4, a5
+	bgei	a4, 64, .Lfixunsdfdi_maxint
+	bltz	a4, .Lfixunsdfdi_zero
+
+	/* Add explicit "1.0" and shift << 11.  */
+	or	a7, xh, a6
+	ssai	(32 - 11)
+	src	xh, a7, xl
+	sll	xl, xl
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunsdfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunsdfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunsdfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunsdfdi_smallshift:
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunsdfdi_shifted
+
+.Lfixunsdfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, xh, 12
+	or	a4, a4, xl
+	beqz	a4, .Lfixunsdfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunsdfdi_maxint:
+	bgez	xh, 1b
+2:	slli	xh, a6, 11	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunsdfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunsdfdi */
+
+#ifdef L_floatsidf
+
+	.align	4
+	.global	__floatunsidf
+	.type	__floatunsidf, @function
+__floatunsidf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Set the sign to zero and jump to the floatsidf code.  */
+	movi	a7, 0
+	j	.Lfloatsidf_normalize
+
+	.align	4
+	.global	__floatsidf
+	.type	__floatsidf, @function
+__floatsidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsidf_return_zero
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsidf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position.  */
+	srli	xh, a5, 11
+	slli	xl, a5, (32 - 11)
+
+	/* Set the exponent.  */
+	movi	a5, 0x41d	/* 0x3fe + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign and return. */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+	leaf_return
+
+.Lfloatsidf_return_zero:
+	movi	a3, 0
+	leaf_return
+
+#endif /* L_floatsidf */
+
+#ifdef L_floatdidf
+
+	.align	4
+	.global	__floatundidf
+	.type	__floatundidf, @function
+__floatundidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdidf code.  */
+	movi	a7, 0
+	j	.Lfloatdidf_normalize
+
+	.align	4
+	.global	__floatdidf
+	.type	__floatdidf, @function
+__floatdidf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdidf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdidf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdidf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdidf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdidf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	11
+	sll	a6, xl
+	src	xl, xh, xl
+	srl	xh, xh
+
+	/* Set the exponent.  */
+	movi	a5, 0x43d	/* 0x3fe + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 20
+	add	xh, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	xh, xh, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	xl, xl, 1
+	beqz	xl, .Lfloatdidf_roundcarry
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdidf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdidf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdidf_shifted
+
+.Lfloatdidf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	xl, xl, 1
+	slli	xl, xl, 1
+	leaf_return
+
+.Lfloatdidf_roundcarry:
+	/* xl is always zero when the rounding increment overflows, so
+	   there's no need to round it to an even value.  */
+	addi	xh, xh, 1
+	/* Overflow to the exponent is OK.  */
+	leaf_return
+
+#endif /* L_floatdidf */
+
+#ifdef L_truncdfsf2
+
+	.align	4
+	.global	__truncdfsf2
+	.type	__truncdfsf2, @function
+__truncdfsf2:
+	leaf_entry sp, 16
+
+	/* Adjust the exponent bias.  */
+	movi	a4, (0x3ff - 0x7f) << 20
+	sub	a5, xh, a4
+
+	/* Check for underflow.  */
+	xor	a6, xh, a5
+	bltz	a6, .Ltrunc_underflow
+	extui	a6, a5, 20, 11
+	beqz	a6, .Ltrunc_underflow
+
+	/* Check for overflow.  */
+	movi	a4, 255
+	bge	a6, a4, .Ltrunc_overflow
+
+	/* Shift a5/xl << 3 into a5/a4.  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+.Ltrunc_addsign:
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	slli	a6, a6, 31
+	or	a2, a6, a5
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a4, 1f
+	addi	a2, a2, 1
+	/* Overflow to the exponent is OK.  The answer will be correct.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a4, a4, 1
+	beqz	a4, .Ltrunc_exactlyhalf
+1:	leaf_return
+
+.Ltrunc_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Ltrunc_overflow:
+	/* Check if exponent == 0x7ff.  */
+	movi	a4, 0x7ff00000
+	bnall	xh, a4, 1f
+
+	/* Check if mantissa is nonzero.  */
+	slli	a5, xh, 12
+	or	a5, a5, xl
+	beqz	a5, 1f
+
+	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
+	srli	a4, a4, 1
+
+1:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
+	/* Add the sign bit.  */
+	extui	a6, xh, 31, 1
+	ssai	1
+	src	a2, a6, a4
+	leaf_return
+
+.Ltrunc_underflow:
+	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
+	extui	a6, xh, 20, 11
+	movi	a5, 0x3ff - 0x7f
+	sub	a6, a5, a6
+	addi	a6, a6, 1
+	bgeui	a6, 32, 1f
+
+	/* Replace the exponent with an explicit "1.0".  */
+	slli	a5, a5, 13	/* 0x700000 */
+	or	a5, a5, xh
+	slli	a5, a5, 11
+	srli	a5, a5, 11
+
+	/* Shift the mantissa left by 3 bits (into a5/a4).  */
+	ssai	(32 - 3)
+	src	a5, a5, xl
+	sll	a4, xl
+
+	/* Shift right by a6.  */
+	ssr	a6
+	sll	a7, a4
+	src	a4, a5, a4
+	srl	a5, a5
+	beqz	a7, .Ltrunc_addsign
+	or	a4, a4, a6	/* any positive, nonzero value will work */
+	j	.Ltrunc_addsign
+
+	/* Return +/- zero.  */
+1:	extui	a2, xh, 31, 1
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_truncdfsf2 */
+
+#ifdef L_extendsfdf2
+
+	.align	4
+	.global	__extendsfdf2
+	.type	__extendsfdf2, @function
+__extendsfdf2:
+	leaf_entry sp, 16
+
+	/* Save the sign bit and then shift it off.  */
+	extui	a5, a2, 31, 1
+	slli	a5, a5, 31
+	slli	a4, a2, 1
+
+	/* Extract and check the exponent.  */
+	extui	a6, a2, 23, 8
+	beqz	a6, .Lextend_expzero
+	addi	a6, a6, 1
+	beqi	a6, 256, .Lextend_nan_or_inf
+
+	/* Shift >> 3 into a4/xl.  */
+	srli	a4, a4, 4
+	slli	xl, a2, (32 - 3)
+
+	/* Adjust the exponent bias.  */
+	movi	a6, (0x3ff - 0x7f) << 20
+	add	a4, a4, a6
+
+	/* Add the sign bit.  */
+	or	xh, a4, a5
+	leaf_return
+
+.Lextend_nan_or_inf:
+	movi	a4, 0x7ff00000
+
+	/* Check for NaN.  */
+	slli	a7, a2, 9
+	beqz	a7, 1f
+
+	slli	a6, a6, 11	/* 0x80000 */
+	or	a4, a4, a6
+
+	/* Add the sign and return.  */
+1:	or	xh, a4, a5
+	movi	xl, 0
+	leaf_return
+
+.Lextend_expzero:
+	beqz	a4, 1b
+
+	/* Normalize it to have 8 zero bits before the first 1 bit.  */
+	do_nsau	a7, a4, a2, a3
+	addi	a7, a7, -8
+	ssl	a7
+	sll	a4, a4
+	
+	/* Shift >> 3 into a4/xl.  */
+	slli	xl, a4, (32 - 3)
+	srli	a4, a4, 3
+
+	/* Set the exponent.  */
+	movi	a6, 0x3fe - 0x7f
+	sub	a6, a6, a7
+	slli	a6, a6, 20
+	add	a4, a4, a6
+
+	/* Add the sign and return.  */
+	or	xh, a4, a5
+	leaf_return
+
+#endif /* L_extendsfdf2 */
+
+
diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S
new file mode 100644
index 000000000..d75be0e5a
--- /dev/null
+++ b/gcc/config/xtensa/ieee754-sf.S
@@ -0,0 +1,1757 @@
+/* IEEE-754 single-precision functions for Xtensa
+   Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef __XTENSA_EB__
+#define xh a2
+#define xl a3
+#define yh a4
+#define yl a5
+#else
+#define xh a3
+#define xl a2
+#define yh a5
+#define yl a4
+#endif
+
+/*  Warning!  The branch displacements for some Xtensa branch instructions
+    are quite small, and this code has been carefully laid out to keep
+    branch targets in range.  If you change anything, be sure to check that
+    the assembler is not relaxing anything to branch over a jump.  */
+
+#ifdef L_negsf2
+
+	.align	4
+	.global	__negsf2
+	.type	__negsf2, @function
+__negsf2:
+	leaf_entry sp, 16
+	movi	a4, 0x80000000
+	xor	a2, a2, a4
+	leaf_return
+
+#endif /* L_negsf2 */
+
+#ifdef L_addsubsf3
+
+	/* Addition */
+__addsf3_aux:
+
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Ladd_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* If x is a NaN, return it.  Otherwise, return y.  */
+	slli	a7, a2, 9
+	beqz	a7, .Ladd_ynan_or_inf
+1:	leaf_return
+
+.Ladd_ynan_or_inf:
+	/* Return y.  */
+	mov	a2, a3
+	leaf_return
+
+.Ladd_opposite_signs:
+	/* Operand signs differ.  Do a subtraction.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Lsub_same_sign
+
+	.align	4
+	.global	__addsf3
+	.type	__addsf3, @function
+__addsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Ladd_opposite_signs
+
+.Ladd_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Ladd_xnan_or_inf
+	ball	a3, a6, .Ladd_ynan_or_inf
+
+	/* Compare the exponents.  The smaller operand will be shifted
+	   right by the exponent difference and added to the larger
+	   one.  */
+	extui	a7, a2, 23, 9
+	extui	a8, a3, 23, 9
+	bltu	a7, a8, .Ladd_shiftx
+
+.Ladd_shifty:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Ladd_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Ladd_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	/* Do the addition.  */
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	beq	a10, a7, .Ladd_round
+	mov	a8, a7
+	j	.Ladd_carry
+
+.Ladd_yexpzero:
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0", and increment the apparent exponent
+	   because subnormals behave as if they had the minimum (nonzero)
+	   exponent.  Test for the case when both exponents are zero.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Ladd_bothexpzero
+	addi	a8, a8, 1
+	j	.Ladd_yexpdiff
+
+.Ladd_bothexpzero:
+	/* Both exponents are zero.  Handle this as a special case.  There
+	   is no need to shift or round, and the normal code for handling
+	   a carry into the exponent field will not work because it
+	   assumes there is an implicit "1.0" that needs to be added.  */
+	add	a2, a2, a3
+1:	leaf_return
+
+.Ladd_xexpzero:
+	/* Same as "yexpzero" except skip handling the case when both
+	   exponents are zero.  */
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	addi	a7, a7, 1
+	j	.Ladd_xexpdiff
+
+.Ladd_shiftx:
+	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
+	   because the exponent difference is always nonzero in this version,
+	   the shift sequence can use SLL and skip loading a constant zero.  */
+	bnone	a2, a6, .Ladd_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Ladd_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Ladd_returny
+	
+	ssr	a10
+	sll	a9, a2
+	srl	a2, a2
+
+	add	a2, a2, a3
+
+	/* Check if the add overflowed into the exponent.  */
+	extui	a10, a2, 23, 9
+	bne	a10, a8, .Ladd_carry
+
+.Ladd_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_returny:
+	mov	a2, a3
+	leaf_return
+
+.Ladd_carry:	
+	/* The addition has overflowed into the exponent field, so the
+	   value needs to be renormalized.  The mantissa of the result
+	   can be recovered by subtracting the original exponent and
+	   adding 0x800000 (which is the explicit "1.0" for the
+	   mantissa of the non-shifted operand -- the "1.0" for the
+	   shifted operand was already added).  The mantissa can then
+	   be shifted right by one bit.  The explicit "1.0" of the
+	   shifted mantissa then needs to be replaced by the exponent,
+	   incremented by one to account for the normalizing shift.
+	   It is faster to combine these operations: do the shift first
+	   and combine the additions and subtractions.  If x is the
+	   original exponent, the result is:
+	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
+	   or:
+	       shifted mantissa + ((x + 1) << 22)
+	   Note that the exponent is incremented here by leaving the
+	   explicit "1.0" of the mantissa in the exponent field.  */
+
+	/* Shift x right by one bit.  Save the lsb.  */
+	mov	a10, a2
+	srli	a2, a2, 1
+
+	/* See explanation above.  The original exponent is in a8.  */
+	addi	a8, a8, 1
+	slli	a8, a8, 22
+	add	a2, a2, a8
+
+	/* Return an Infinity if the exponent overflowed.  */
+	ball	a2, a6, .Ladd_infinity
+	
+	/* Same thing as the "round" code except the msb of the leftover
+	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
+	bbci.l	a10, 0, 1f
+	addi	a2, a2, 1
+	beqz	a9, .Ladd_exactlyhalf
+1:	leaf_return
+
+.Ladd_infinity:
+	/* Clear the mantissa.  */
+	srli	a2, a2, 23
+	slli	a2, a2, 23
+
+	/* The sign bit may have been lost in a carry-out.  Put it back.  */
+	slli	a8, a8, 1
+	or	a2, a2, a8
+	leaf_return
+
+.Ladd_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+
+	/* Subtraction */
+__subsf3_aux:
+	
+	/* Handle NaNs and Infinities.  (This code is placed before the
+	   start of the function just to keep it in range of the limited
+	   branch displacements.)  */
+
+.Lsub_xnan_or_inf:
+	/* If y is neither Infinity nor NaN, return x.  */
+	bnall	a3, a6, 1f
+	/* Both x and y are either NaN or Inf, so the result is NaN.  */
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Lsub_ynan_or_inf:
+	/* Negate y and return it.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+	leaf_return
+
+.Lsub_opposite_signs:
+	/* Operand signs differ.  Do an addition.  */
+	slli	a7, a6, 8
+	xor	a3, a3, a7
+	j	.Ladd_same_sign
+
+	.align	4
+	.global	__subsf3
+	.type	__subsf3, @function
+__subsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Check if the two operands have the same sign.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lsub_opposite_signs
+
+.Lsub_same_sign:	
+	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
+	ball	a2, a6, .Lsub_xnan_or_inf
+	ball	a3, a6, .Lsub_ynan_or_inf
+
+	/* Compare the operands.  In contrast to addition, the entire
+	   value matters here.  */
+	extui	a7, a2, 23, 8
+	extui	a8, a3, 23, 8
+	bltu	a2, a3, .Lsub_xsmaller
+
+.Lsub_ysmaller:
+	/* Check if the smaller (or equal) exponent is zero.  */
+	bnone	a3, a6, .Lsub_yexpzero
+
+	/* Replace y sign/exponent with 0x008.  */
+	or	a3, a3, a6
+	slli	a3, a3, 8
+	srli	a3, a3, 8
+
+.Lsub_yexpdiff:
+	/* Compute the exponent difference.  */
+	sub	a10, a7, a8
+
+	/* Exponent difference > 32 -- just return the bigger value.  */
+	bgeui	a10, 32, 1f
+	
+	/* Shift y right by the exponent difference.  Any bits that are
+	   shifted out of y are saved in a9 for rounding the result.  */
+	ssr	a10
+	movi	a9, 0
+	src	a9, a3, a9
+	srl	a3, a3
+
+	sub	a2, a2, a3
+
+	/* Subtract the leftover bits in a9 from zero and propagate any
+	   borrow from a2.  */
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	beq	a10, a7, .Lsub_round
+	j	.Lsub_borrow
+
+.Lsub_yexpzero:
+	/* Return zero if the inputs are equal.  (For the non-subnormal
+	   case, subtracting the "1.0" will cause a borrow from the exponent
+	   and this case can be detected when handling the borrow.)  */
+	beq	a2, a3, .Lsub_return_zero
+
+	/* y is a subnormal value.  Replace its sign/exponent with zero,
+	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
+	   y's apparent exponent because subnormals behave as if they had
+	   the minimum (nonzero) exponent.  */
+	slli	a3, a3, 9
+	srli	a3, a3, 9
+	bnone	a2, a6, .Lsub_yexpdiff
+	addi	a8, a8, 1
+	j	.Lsub_yexpdiff
+
+.Lsub_returny:
+	/* Negate and return y.  */
+	slli	a7, a6, 8
+	xor	a2, a3, a7
+1:	leaf_return
+
+.Lsub_xsmaller:
+	/* Same thing as the "ysmaller" code, but with x and y swapped and
+	   with y negated.  */
+	bnone	a2, a6, .Lsub_xexpzero
+
+	or	a2, a2, a6
+	slli	a2, a2, 8
+	srli	a2, a2, 8
+
+.Lsub_xexpdiff:
+	sub	a10, a8, a7
+	bgeui	a10, 32, .Lsub_returny
+	
+	ssr	a10
+	movi	a9, 0
+	src	a9, a2, a9
+	srl	a2, a2
+
+	/* Negate y.  */
+	slli	a11, a6, 8
+	xor	a3, a3, a11
+
+	sub	a2, a3, a2
+
+	neg	a9, a9
+	addi	a10, a2, -1
+	movnez	a2, a10, a9
+
+	/* Check if the subtract underflowed into the exponent.  */
+	extui	a10, a2, 23, 8
+	bne	a10, a8, .Lsub_borrow
+
+.Lsub_round:
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a9, 1f
+	addi	a2, a2, 1
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a9, a9, 1
+	beqz	a9, .Lsub_exactlyhalf
+1:	leaf_return
+
+.Lsub_xexpzero:
+	/* Same as "yexpzero".  */
+	beq	a2, a3, .Lsub_return_zero
+	slli	a2, a2, 9
+	srli	a2, a2, 9
+	bnone	a3, a6, .Lsub_xexpdiff
+	addi	a7, a7, 1
+	j	.Lsub_xexpdiff
+
+.Lsub_return_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lsub_borrow:	
+	/* The subtraction has underflowed into the exponent field, so the
+	   value needs to be renormalized.  Shift the mantissa left as
+	   needed to remove any leading zeros and adjust the exponent
+	   accordingly.  If the exponent is not large enough to remove
+	   all the leading zeros, the result will be a subnormal value.  */
+
+	slli	a8, a2, 9
+	beqz	a8, .Lsub_xzero
+	do_nsau	a6, a8, a7, a11
+	srli	a8, a8, 9
+	bge	a6, a10, .Lsub_subnormal
+	addi	a6, a6, 1
+
+.Lsub_normalize_shift:
+	/* Shift the mantissa (a8/a9) left by a6.  */
+	ssl	a6
+	src	a8, a8, a9
+	sll	a9, a9
+
+	/* Combine the shifted mantissa with the sign and exponent,
+	   decrementing the exponent by a6.  (The exponent has already
+	   been decremented by one due to the borrow from the subtraction,
+	   but adding the mantissa will increment the exponent by one.)  */
+	srli	a2, a2, 23
+	sub	a2, a2, a6
+	slli	a2, a2, 23
+	add	a2, a2, a8
+	j	.Lsub_round
+
+.Lsub_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+.Lsub_xzero:
+	/* If there was a borrow from the exponent, and the mantissa and
+	   guard digits are all zero, then the inputs were equal and the
+	   result should be zero.  */
+	beqz	a9, .Lsub_return_zero
+
+	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
+	addi	a11, a10, -24
+	movi	a6, 24
+	movltz	a6, a10, a11
+	j	.Lsub_normalize_shift
+
+.Lsub_subnormal:
+	/* The exponent is too small to shift away all the leading zeros.
+	   Set a6 to the current exponent (which has already been
+	   decremented by the borrow) so that the exponent of the result
+	   will be zero.  Do not add 1 to a6 in this case, because: (1)
+	   adding the mantissa will not increment the exponent, so there is
+	   no need to subtract anything extra from the exponent to
+	   compensate, and (2) the effective exponent of a subnormal is 1
+	   not 0 so the shift amount must be 1 smaller than normal. */
+	mov	a6, a10
+	j	.Lsub_normalize_shift
+
+#endif /* L_addsubsf3 */
+
+#ifdef L_mulsf3
+
+	/* Multiplication */
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+__mulsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Lmul_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Lmul_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2 
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Lmul_xnormalized	
+	
+.Lmul_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* If y is zero, return zero.  */
+	beqz	a3, .Lmul_return_zero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a11, a12
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Lmul_ynormalized	
+
+.Lmul_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+.Lmul_xnan_or_inf:
+	/* If y is zero, return NaN.  */
+	slli	a8, a3, 1
+	bnez	a8, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+	j	.Lmul_done
+1:
+	/* If y is NaN, return y.  */
+	bnall	a3, a6, .Lmul_returnx
+	slli	a8, a3, 9
+	beqz	a8, .Lmul_returnx
+
+.Lmul_returny:
+	mov	a2, a3
+
+.Lmul_returnx:
+	/* Set the sign bit and return.  */
+	extui	a7, a7, 31, 1
+	slli	a2, a2, 1
+	ssai	1
+	src	a2, a7, a2
+	j	.Lmul_done
+
+.Lmul_ynan_or_inf:
+	/* If x is zero, return NaN.  */
+	slli	a8, a2, 1
+	bnez	a8, .Lmul_returny
+	movi	a7, 0x400000	/* make it a quiet NaN */
+	or	a2, a3, a7
+	j	.Lmul_done
+
+	.align	4
+	.global	__mulsf3
+	.type	__mulsf3, @function
+__mulsf3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 64
+#else
+	leaf_entry sp, 32
+#endif
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Lmul_xnan_or_inf
+	ball	a3, a6, .Lmul_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a8, .Lmul_xexpzero
+.Lmul_xnormalized:	
+	beqz	a9, .Lmul_yexpzero
+.Lmul_ynormalized:	
+
+	/* Add the exponents.  */
+	add	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+
+	mull	a6, a2, a3
+	muluh	a2, a2, a3
+
+#else
+
+	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
+	   products.  These partial products are:
+
+		0 xl * yl
+
+		1 xl * yh
+		2 xh * yl
+
+		3 xh * yh
+
+	   If using the Mul16 or Mul32 multiplier options, these input
+	   chunks must be stored in separate registers.  For Mac16, the
+	   UMUL.AA.* opcodes can specify that the inputs come from either
+	   half of the registers, so there is no need to shift them out
+	   ahead of time.  If there is no multiply hardware, the 16-bit
+	   chunks can be extracted when setting up the arguments to the
+	   separate multiply function.  */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Calling a separate multiply function will clobber a0 and requires
+	   use of a8 as a temporary, so save those values now.  (The function
+	   uses a custom ABI so nothing else needs to be saved.)  */
+	s32i	a0, sp, 0
+	s32i	a8, sp, 4
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+	
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into a2.  */
+	do_mul(a2, a2, h, a3, h)	/* pp 3 */
+	add	a2, a2, a9
+	
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore values saved on the stack during the multiplication.  */
+	l32i	a0, sp, 0
+	l32i	a8, sp, 4
+#endif
+#endif /* ! XCHAL_HAVE_MUL32_HIGH */
+
+	/* Shift left by 9 bits, unless there was a carry-out from the
+	   multiply, in which case, shift by 8 bits and increment the
+	   exponent.  */
+	movi	a4, 9
+	srli	a5, a2, 24 - 9
+	beqz	a5, 1f
+	addi	a4, a4, -1
+	addi	a8, a8, 1
+1:	ssl	a4
+	src	a2, a2, a6
+	sll	a6, a6
+
+	/* Subtract the extra bias from the exponent sum (plus one to account
+	   for the explicit "1.0" of the mantissa that will be added to the
+	   exponent in the final result).  */
+	movi	a4, 0x80
+	sub	a8, a8, a4
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Lmul_overflow
+	
+.Lmul_round:
+	/* Round.  */
+	bgez	a6, .Lmul_rounded
+	addi	a2, a2, 1
+	slli	a6, a6, 1
+	beqz	a6, .Lmul_exactlyhalf
+
+.Lmul_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a2, a8
+
+.Lmul_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+.Lmul_done:
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+.Lmul_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	j	.Lmul_rounded
+
+.Lmul_overflow:
+	bltz	a8, .Lmul_underflow
+	/* Return +/- Infinity.  */
+	movi	a8, 0xff
+	slli	a2, a8, 23
+	j	.Lmul_addsign
+
+.Lmul_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	mov	a9, a6
+	ssr	a8
+	bgeui	a8, 32, .Lmul_flush_to_zero
+	
+	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
+	   in a6 (combined with the shifted-out bits currently in a6) for
+	   rounding the result.  */
+	sll	a6, a2
+	srl	a2, a2
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero bits shifted out into a6.  */
+	beqz	a9, .Lmul_round
+	movi	a9, 1
+	or	a6, a6, a9
+	j	.Lmul_round
+	
+.Lmul_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	j	.Lmul_done
+
+#if XCHAL_NO_MUL
+	
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+#endif /* L_mulsf3 */
+
+#ifdef L_divsf3
+
+	/* Division */
+__divsf3_aux:
+
+	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
+	   (This code is placed before the start of the function just to
+	   keep it in range of the limited branch displacements.)  */
+
+.Ldiv_yexpzero:
+	/* Clear the sign bit of y.  */
+	slli	a3, a3, 1
+	srli	a3, a3, 1
+
+	/* Check for division by zero.  */
+	beqz	a3, .Ldiv_yzero
+
+	/* Normalize y.  Adjust the exponent in a9.  */
+	do_nsau	a10, a3, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a3, a3
+	movi	a9, 1
+	sub	a9, a9, a10
+	j	.Ldiv_ynormalized	
+
+.Ldiv_yzero:
+	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
+	slli	a4, a2, 1
+	srli	a4, a4, 1
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	or	a2, a2, a6
+	bnez	a4, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_xexpzero:
+	/* Clear the sign bit of x.  */
+	slli	a2, a2, 1
+	srli	a2, a2, 1
+
+	/* If x is zero, return zero.  */
+	beqz	a2, .Ldiv_return_zero
+
+	/* Normalize x.  Adjust the exponent in a8.  */
+	do_nsau	a10, a2, a4, a5
+	addi	a10, a10, -8
+	ssl	a10
+	sll	a2, a2
+	movi	a8, 1
+	sub	a8, a8, a10
+	j	.Ldiv_xnormalized	
+	
+.Ldiv_return_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+.Ldiv_xnan_or_inf:
+	/* Set the sign bit of the result.  */
+	srli	a7, a3, 31
+	slli	a7, a7, 31
+	xor	a2, a2, a7
+	/* If y is NaN or Inf, return NaN.  */
+	bnall	a3, a6, 1f
+	movi	a4, 0x400000	/* make it a quiet NaN */
+	or	a2, a2, a4
+1:	leaf_return
+
+.Ldiv_ynan_or_inf:
+	/* If y is Infinity, return zero.  */
+	slli	a8, a3, 9
+	beqz	a8, .Ldiv_return_zero
+	/* y is NaN; return it.  */
+	mov	a2, a3
+	leaf_return
+
+	.align	4
+	.global	__divsf3
+	.type	__divsf3, @function
+__divsf3:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+
+	/* Get the sign of the result.  */
+	xor	a7, a2, a3
+
+	/* Check for NaN and infinity.  */
+	ball	a2, a6, .Ldiv_xnan_or_inf
+	ball	a3, a6, .Ldiv_ynan_or_inf
+
+	/* Extract the exponents.  */
+	extui	a8, a2, 23, 8
+	extui	a9, a3, 23, 8
+
+	beqz	a9, .Ldiv_yexpzero
+.Ldiv_ynormalized:	
+	beqz	a8, .Ldiv_xexpzero
+.Ldiv_xnormalized:	
+
+	/* Subtract the exponents.  */
+	sub	a8, a8, a9
+
+	/* Replace sign/exponent fields with explicit "1.0".  */
+	movi	a10, 0xffffff
+	or	a2, a2, a6
+	and	a2, a2, a10
+	or	a3, a3, a6
+	and	a3, a3, a10
+
+	/* The first digit of the mantissa division must be a one.
+	   Shift x (and adjust the exponent) as needed to make this true.  */
+	bltu	a3, a2, 1f
+	slli	a2, a2, 1
+	addi	a8, a8, -1
+1:
+	/* Do the first subtraction and shift.  */
+	sub	a2, a2, a3
+	slli	a2, a2, 1
+
+	/* Put the quotient into a10.  */
+	movi	a10, 1
+
+	/* Divide one bit at a time for 23 bits.  */
+	movi	a9, 23
+#if XCHAL_HAVE_LOOPS
+	loop	a9, .Ldiv_loopend
+#endif
+.Ldiv_loop:
+	/* Shift the quotient << 1.  */
+	slli	a10, a10, 1
+
+	/* Is this digit a 0 or 1?  */
+	bltu	a2, a3, 1f
+
+	/* Output a 1 and subtract.  */
+	addi	a10, a10, 1
+	sub	a2, a2, a3
+
+	/* Shift the dividend << 1.  */
+1:	slli	a2, a2, 1
+
+#if !XCHAL_HAVE_LOOPS
+	addi	a9, a9, -1
+	bnez	a9, .Ldiv_loop
+#endif
+.Ldiv_loopend:
+
+	/* Add the exponent bias (less one to account for the explicit "1.0"
+	   of the mantissa that will be added to the exponent in the final
+	   result).  */
+	addi	a8, a8, 0x7e
+	
+	/* Check for over/underflow.  The value in a8 is one less than the
+	   final exponent, so values in the range 0..fd are OK here.  */
+	movi	a4, 0xfe
+	bgeu	a8, a4, .Ldiv_overflow
+	
+.Ldiv_round:
+	/* Round.  The remainder (<< 1) is in a2.  */
+	bltu	a2, a3, .Ldiv_rounded
+	addi	a10, a10, 1
+	beq	a2, a3, .Ldiv_exactlyhalf
+
+.Ldiv_rounded:
+	/* Add the exponent to the mantissa.  */
+	slli	a8, a8, 23
+	add	a2, a10, a8
+
+.Ldiv_addsign:
+	/* Add the sign bit.  */
+	srli	a7, a7, 31
+	slli	a7, a7, 31
+	or	a2, a2, a7
+	leaf_return
+
+.Ldiv_overflow:
+	bltz	a8, .Ldiv_underflow
+	/* Return +/- Infinity.  */
+	addi	a8, a4, 1	/* 0xff */
+	slli	a2, a8, 23
+	j	.Ldiv_addsign
+
+.Ldiv_exactlyhalf:
+	/* Remainder is exactly half the divisor.  Round even.  */
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_underflow:
+	/* Create a subnormal value, where the exponent field contains zero,
+	   but the effective exponent is 1.  The value of a8 is one less than
+	   the actual exponent, so just negate it to get the shift amount.  */
+	neg	a8, a8
+	ssr	a8
+	bgeui	a8, 32, .Ldiv_flush_to_zero
+	
+	/* Shift a10 right.  Any bits that are shifted out of a10 are
+	   saved in a6 for rounding the result.  */
+	sll	a6, a10
+	srl	a10, a10
+
+	/* Set the exponent to zero.  */
+	movi	a8, 0
+
+	/* Pack any nonzero remainder (in a2) into a6.  */
+	beqz	a2, 1f
+	movi	a9, 1
+	or	a6, a6, a9
+	
+	/* Round a10 based on the bits shifted out into a6.  */
+1:	bgez	a6, .Ldiv_rounded
+	addi	a10, a10, 1
+	slli	a6, a6, 1
+	bnez	a6, .Ldiv_rounded
+	srli	a10, a10, 1
+	slli	a10, a10, 1
+	j	.Ldiv_rounded
+
+.Ldiv_flush_to_zero:
+	/* Return zero with the appropriate sign bit.  */
+	srli	a2, a7, 31
+	slli	a2, a2, 31
+	leaf_return
+
+#endif /* L_divsf3 */
+
+#ifdef L_cmpsf2
+
+	/* Equal and Not Equal */
+
+	.align	4
+	.global	__eqsf2
+	.global	__nesf2
+	.set	__nesf2, __eqsf2
+	.type	__eqsf2, @function
+__eqsf2:
+	leaf_entry sp, 16
+	bne	a2, a3, 4f
+
+	/* The values are equal but NaN != NaN.  Check the exponent.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+
+	/* Equal.  */
+	movi	a2, 0
+	leaf_return
+
+	/* Not equal.  */
+2:	movi	a2, 1
+	leaf_return
+
+	/* Check if the mantissas are nonzero.  */
+3:	slli	a7, a2, 9
+	j	5f
+
+	/* Check if x and y are zero with different signs.  */
+4:	or	a7, a2, a3
+	slli	a7, a7, 1
+
+	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
+	   or x when exponent(x) = 0x7f8 and x == y.  */
+5:	movi	a2, 0
+	movi	a3, 1
+	movnez	a2, a3, a7	
+	leaf_return
+
+
+	/* Greater Than */
+
+	.align	4
+	.global	__gtsf2
+	.type	__gtsf2, @function
+__gtsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+
+	/* Less Than or Equal */
+
+	.align	4
+	.global	__lesf2
+	.type	__lesf2, @function
+__lesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Lle_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Lle_cmp
+	movi	a2, 1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+.Lle_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Lle_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Lle_xneg
+
+	/* Check if x <= y.  */
+	bltu	a3, a2, 5f
+4:	movi	a2, 0
+	leaf_return
+
+.Lle_xneg:
+	/* Check if y <= x.  */
+	bgeu	a2, a3, 4b
+5:	movi	a2, 1
+	leaf_return
+
+.Lle_diff_signs:
+	bltz	a2, 4b
+
+	/* Check if both x and y are zero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 1
+	movi	a3, 0
+	moveqz	a2, a3, a7
+	leaf_return
+
+
+	/* Greater Than or Equal */
+
+	.align	4
+	.global	__gesf2
+	.type	__gesf2, @function
+__gesf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, -1
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, -1
+	leaf_return
+
+
+	/* Less Than */
+
+	.align	4
+	.global	__ltsf2
+	.type	__ltsf2, @function
+__ltsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 2f
+1:	bnall	a3, a6, .Llt_cmp
+
+	/* Check if y is a NaN.  */
+	slli	a7, a3, 9
+	beqz	a7, .Llt_cmp
+	movi	a2, 0
+	leaf_return
+
+	/* Check if x is a NaN.  */
+2:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 0
+	leaf_return
+
+.Llt_cmp:
+	/* Check if x and y have different signs.  */
+	xor	a7, a2, a3
+	bltz	a7, .Llt_diff_signs
+
+	/* Check if x is negative.  */
+	bltz	a2, .Llt_xneg
+
+	/* Check if x < y.  */
+	bgeu	a2, a3, 5f
+4:	movi	a2, -1
+	leaf_return
+
+.Llt_xneg:
+	/* Check if y < x.  */
+	bltu	a3, a2, 4b
+5:	movi	a2, 0
+	leaf_return
+
+.Llt_diff_signs:
+	bgez	a2, 5b
+
+	/* Check if both x and y are nonzero.  */
+	or	a7, a2, a3
+	slli	a7, a7, 1
+	movi	a2, 0
+	movi	a3, -1
+	movnez	a2, a3, a7
+	leaf_return
+
+
+	/* Unordered */
+
+	.align	4
+	.global	__unordsf2
+	.type	__unordsf2, @function
+__unordsf2:
+	leaf_entry sp, 16
+	movi	a6, 0x7f800000
+	ball	a2, a6, 3f
+1:	ball	a3, a6, 4f
+2:	movi	a2, 0
+	leaf_return
+
+3:	slli	a7, a2, 9
+	beqz	a7, 1b
+	movi	a2, 1
+	leaf_return
+
+4:	slli	a7, a3, 9
+	beqz	a7, 2b
+	movi	a2, 1
+	leaf_return
+
+#endif /* L_cmpsf2 */
+
+#ifdef L_fixsfsi
+
+	.align	4
+	.global	__fixsfsi
+	.type	__fixsfsi, @function
+__fixsfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 32, .Lfixsfsi_maxint
+	blti	a4, 1, .Lfixsfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixsfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfsi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	addi	a5, a4, -1	/* 0x7fffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixsfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+#endif /* L_fixsfsi */
+
+#ifdef L_fixsfdi
+
+	.align	4
+	.global	__fixsfdi
+	.type	__fixsfdi, @function
+__fixsfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixsfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7e
+	bgei	a4, 64, .Lfixsfdi_maxint
+	blti	a4, 1, .Lfixsfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixsfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixsfdi_shifted:	
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixsfdi_smallshift:
+	movi	xl, 0
+	sll	xl, xh
+	srl	xh, xh
+	j	.Lfixsfdi_shifted
+
+.Lfixsfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixsfdi_maxint
+
+	/* Translate NaN to +maxint.  */
+	movi	a2, 0
+
+.Lfixsfdi_maxint:
+	slli	a7, a6, 8	/* 0x80000000 */
+	bgez	a2, 1f
+	mov	xh, a7
+	movi	xl, 0
+	leaf_return
+
+1:	addi	xh, a7, -1	/* 0x7fffffff */
+	movi	xl, -1
+	leaf_return
+
+.Lfixsfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+#endif /* L_fixsfdi */
+
+#ifdef L_fixunssfsi
+
+	.align	4
+	.global	__fixunssfsi
+	.type	__fixunssfsi, @function
+__fixunssfsi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfsi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 32, .Lfixunssfsi_maxint
+	bltz	a4, .Lfixunssfsi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	a5, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 32, .Lfixunssfsi_bigexp
+	ssl	a4		/* shift by 32 - a4 */
+	srl	a5, a5
+
+	/* Negate the result if sign != 0.  */
+	neg	a2, a5
+	movgez	a2, a5, a7
+	leaf_return
+
+.Lfixunssfsi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfsi_maxint
+
+	/* Translate NaN to 0xffffffff.  */
+	movi	a2, -1
+	leaf_return
+
+.Lfixunssfsi_maxint:
+	slli	a4, a6, 8	/* 0x80000000 */
+	movi	a5, -1		/* 0xffffffff */
+	movgez	a4, a5, a2
+	mov	a2, a4
+	leaf_return
+
+.Lfixunssfsi_zero:
+	movi	a2, 0
+	leaf_return
+
+.Lfixunssfsi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a2, 1f
+	mov	a2, a5		/* no shift needed */
+	leaf_return
+
+	/* Return 0x80000000 if negative.  */
+1:	slli	a2, a6, 8
+	leaf_return
+
+#endif /* L_fixunssfsi */
+
+#ifdef L_fixunssfdi
+
+	.align	4
+	.global	__fixunssfdi
+	.type	__fixunssfdi, @function
+__fixunssfdi:
+	leaf_entry sp, 16
+
+	/* Check for NaN and Infinity.  */
+	movi	a6, 0x7f800000
+	ball	a2, a6, .Lfixunssfdi_nan_or_inf
+
+	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
+	extui	a4, a2, 23, 8
+	addi	a4, a4, -0x7f
+	bgei	a4, 64, .Lfixunssfdi_maxint
+	bltz	a4, .Lfixunssfdi_zero
+
+	/* Add explicit "1.0" and shift << 8.  */
+	or	a7, a2, a6
+	slli	xh, a7, 8
+
+	/* Shift back to the right, based on the exponent.  */
+	addi	a4, a4, 1
+	beqi	a4, 64, .Lfixunssfdi_bigexp
+	ssl	a4		/* shift by 64 - a4 */
+	bgei	a4, 32, .Lfixunssfdi_smallshift
+	srl	xl, xh
+	movi	xh, 0
+
+.Lfixunssfdi_shifted:
+	/* Negate the result if sign != 0.  */
+	bgez	a7, 1f
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, 1f
+	addi	xh, xh, -1
+1:	leaf_return
+
+.Lfixunssfdi_smallshift:
+	movi	xl, 0
+	src	xl, xh, xl
+	srl	xh, xh
+	j	.Lfixunssfdi_shifted
+
+.Lfixunssfdi_nan_or_inf:
+	/* Handle Infinity and NaN.  */
+	slli	a4, a2, 9
+	beqz	a4, .Lfixunssfdi_maxint
+
+	/* Translate NaN to 0xffffffff.... */
+1:	movi	xh, -1
+	movi	xl, -1
+	leaf_return
+
+.Lfixunssfdi_maxint:
+	bgez	a2, 1b
+2:	slli	xh, a6, 8	/* 0x80000000 */
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_zero:
+	movi	xh, 0
+	movi	xl, 0
+	leaf_return
+
+.Lfixunssfdi_bigexp:
+	/* Handle unsigned maximum exponent case.  */
+	bltz	a7, 2b
+	movi	xl, 0
+	leaf_return		/* no shift needed */
+
+#endif /* L_fixunssfdi */
+
+#ifdef L_floatsisf
+
+	.align	4
+	.global	__floatunsisf
+	.type	__floatunsisf, @function
+__floatunsisf:
+	leaf_entry sp, 16
+	beqz	a2, .Lfloatsisf_return
+
+	/* Set the sign to zero and jump to the floatsisf code.  */
+	movi	a7, 0
+	j	.Lfloatsisf_normalize
+
+	.align	4
+	.global	__floatsisf
+	.type	__floatsisf, @function
+__floatsisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	beqz	a2, .Lfloatsisf_return
+
+	/* Save the sign.  */
+	extui	a7, a2, 31, 1
+
+	/* Get the absolute value.  */
+#if XCHAL_HAVE_ABS
+	abs	a2, a2
+#else
+	neg	a4, a2
+	movltz	a2, a4, a2
+#endif
+
+.Lfloatsisf_normalize:
+	/* Normalize with the first 1 bit in the msb.  */
+	do_nsau	a4, a2, a5, a6
+	ssl	a4
+	sll	a5, a2
+
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	srli	a2, a5, 8
+	slli	a6, a5, (32 - 8)
+
+	/* Set the exponent.  */
+	movi	a5, 0x9d	/* 0x7e + 31 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, a2, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, .Lfloatsisf_return
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatsisf_exactlyhalf
+
+.Lfloatsisf_return:
+	leaf_return
+
+.Lfloatsisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatsisf */
+
+#ifdef L_floatdisf
+
+	.align	4
+	.global	__floatundisf
+	.type	__floatundisf, @function
+__floatundisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Set the sign to zero and jump to the floatdisf code.  */
+	movi	a7, 0
+	j	.Lfloatdisf_normalize
+
+	.align	4
+	.global	__floatdisf
+	.type	__floatdisf, @function
+__floatdisf:
+	leaf_entry sp, 16
+
+	/* Check for zero.  */
+	or	a4, xh, xl
+	beqz	a4, 2f
+
+	/* Save the sign.  */
+	extui	a7, xh, 31, 1
+
+	/* Get the absolute value.  */
+	bgez	xh, .Lfloatdisf_normalize
+	neg	xl, xl
+	neg	xh, xh
+	beqz	xl, .Lfloatdisf_normalize
+	addi	xh, xh, -1
+
+.Lfloatdisf_normalize:
+	/* Normalize with the first 1 bit in the msb of xh.  */
+	beqz	xh, .Lfloatdisf_bigshift
+	do_nsau	a4, xh, a5, a6
+	ssl	a4
+	src	xh, xh, xl
+	sll	xl, xl
+
+.Lfloatdisf_shifted:
+	/* Shift the mantissa into position, with rounding bits in a6.  */
+	ssai	8
+	sll	a5, xl
+	src	a6, xh, xl
+	srl	xh, xh
+	beqz	a5, 1f
+	movi	a5, 1
+	or	a6, a6, a5
+1:
+	/* Set the exponent.  */
+	movi	a5, 0xbd	/* 0x7e + 63 */
+	sub	a5, a5, a4
+	slli	a5, a5, 23
+	add	a2, xh, a5
+
+	/* Add the sign.  */
+	slli	a7, a7, 31
+	or	a2, a2, a7
+
+	/* Round up if the leftover fraction is >= 1/2.  */
+	bgez	a6, 2f
+	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
+
+	/* Check if the leftover fraction is exactly 1/2.  */
+	slli	a6, a6, 1
+	beqz	a6, .Lfloatdisf_exactlyhalf
+2:	leaf_return
+
+.Lfloatdisf_bigshift:
+	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
+	do_nsau	a4, xl, a5, a6
+	ssl	a4
+	sll	xh, xl
+	movi	xl, 0
+	addi	a4, a4, 32
+	j	.Lfloatdisf_shifted
+
+.Lfloatdisf_exactlyhalf:
+	/* Round down to the nearest even value.  */
+	srli	a2, a2, 1
+	slli	a2, a2, 1
+	leaf_return
+
+#endif /* L_floatdisf */
diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm
new file mode 100644
index 000000000..071b91711
--- /dev/null
+++ b/gcc/config/xtensa/lib1funcs.asm
@@ -0,0 +1,845 @@
+/* Assembly functions for the Xtensa version of libgcc1.
+   Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* Define macros for the ABS and ADDX* instructions to handle cases
+   where they are not included in the Xtensa processor configuration.  */
+
+	.macro	do_abs dst, src, tmp
+#if XCHAL_HAVE_ABS
+	abs	\dst, \src
+#else
+	neg	\tmp, \src
+	movgez	\tmp, \src, \src
+	mov	\dst, \tmp
+#endif
+	.endm
+
+	.macro	do_addx2 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx2	\dst, \as, \at
+#else
+	slli	\tmp, \as, 1
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx4 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx4	\dst, \as, \at
+#else
+	slli	\tmp, \as, 2
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+	.macro	do_addx8 dst, as, at, tmp
+#if XCHAL_HAVE_ADDX
+	addx8	\dst, \as, \at
+#else
+	slli	\tmp, \as, 3
+	add	\dst, \tmp, \at
+#endif
+	.endm
+
+/* Define macros for leaf function entry and return, supporting either the
+   standard register windowed ABI or the non-windowed call0 ABI.  These
+   macros do not allocate any extra stack space, so they only work for
+   leaf functions that do not need to spill anything to the stack.  */
+
+	.macro leaf_entry reg, size
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	entry \reg, \size
+#else
+	/* do nothing */
+#endif
+	.endm
+
+	.macro leaf_return
+#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
+	retw
+#else
+	ret
+#endif
+	.endm
+
+
+#ifdef L_mulsi3
+	.align	4
+	.global	__mulsi3
+	.type	__mulsi3, @function
+__mulsi3:
+	leaf_entry sp, 16
+
+#if XCHAL_HAVE_MUL32
+	mull	a2, a2, a3
+
+#elif XCHAL_HAVE_MUL16
+	or	a4, a2, a3
+	srai	a4, a4, 16
+	bnez	a4, .LMUL16
+	mul16u	a2, a2, a3
+	leaf_return
+.LMUL16:
+	srai	a4, a2, 16
+	srai	a5, a3, 16
+	mul16u	a7, a4, a3
+	mul16u	a6, a5, a2
+	mul16u	a4, a2, a3
+	add	a7, a7, a6
+	slli	a7, a7, 16
+	add	a2, a7, a4
+
+#elif XCHAL_HAVE_MAC16
+	mul.aa.hl a2, a3
+	mula.aa.lh a2, a3
+	rsr	a5, ACCLO
+	umul.aa.ll a2, a3
+	rsr	a4, ACCLO
+	slli	a5, a5, 16
+	add	a2, a4, a5
+
+#else /* !MUL32 && !MUL16 && !MAC16 */
+
+	/* Multiply one bit at a time, but unroll the loop 4x to better
+	   exploit the addx instructions and avoid overhead.
+	   Peel the first iteration to save a cycle on init.  */
+
+	/* Avoid negative numbers.  */
+	xor	a5, a2, a3	/* Top bit is 1 if one input is negative.  */
+	do_abs	a3, a3, a6
+	do_abs	a2, a2, a6
+
+	/* Swap so the second argument is smaller.  */
+	sub	a7, a2, a3
+	mov	a4, a3
+	movgez	a4, a2, a7	/* a4 = max (a2, a3) */
+	movltz	a3, a2, a7	/* a3 = min (a2, a3) */
+
+	movi	a2, 0
+	extui	a6, a3, 0, 1
+	movnez	a2, a4, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+	neg	a3, a2
+	movltz	a2, a3, a5
+	leaf_return
+
+	.align	4
+.Lmult_main_loop:
+	srli	a3, a3, 4
+	slli	a4, a4, 4
+
+	add	a7, a4, a2
+	extui	a6, a3, 0, 1
+	movnez	a2, a7, a6
+
+	do_addx2 a7, a4, a2, a7
+	extui	a6, a3, 1, 1
+	movnez	a2, a7, a6
+
+	do_addx4 a7, a4, a2, a7
+	extui	a6, a3, 2, 1
+	movnez	a2, a7, a6
+
+	do_addx8 a7, a4, a2, a7
+	extui	a6, a3, 3, 1
+	movnez	a2, a7, a6
+
+	bgeui	a3, 16, .Lmult_main_loop
+
+	neg	a3, a2
+	movltz	a2, a3, a5
+
+#endif /* !MUL32 && !MUL16 && !MAC16 */
+
+	leaf_return
+	.size	__mulsi3, . - __mulsi3
+
+#endif /* L_mulsi3 */
+
+
+#ifdef L_umulsidi3
+
+#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
+#define XCHAL_NO_MUL 1
+#endif
+
+	.align	4
+	.global	__umulsidi3
+	.type	__umulsidi3, @function
+__umulsidi3:
+#if __XTENSA_CALL0_ABI__
+	leaf_entry sp, 32
+	addi	sp, sp, -32
+	s32i	a12, sp, 16
+	s32i	a13, sp, 20
+	s32i	a14, sp, 24
+	s32i	a15, sp, 28
+#elif XCHAL_NO_MUL
+	/* This is not really a leaf function; allocate enough stack space
+	   to allow CALL12s to a helper function.  */
+	leaf_entry sp, 48
+#else
+	leaf_entry sp, 16
+#endif
+
+#ifdef __XTENSA_EB__
+#define wh a2
+#define wl a3
+#else
+#define wh a3
+#define wl a2
+#endif /* __XTENSA_EB__ */
+
+	/* This code is taken from the mulsf3 routine in ieee754-sf.S.
+	   See more comments there.  */
+
+#if XCHAL_HAVE_MUL32_HIGH
+	mull	a6, a2, a3
+	muluh	wh, a2, a3
+	mov	wl, a6
+
+#else /* ! MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* a0 and a8 will be clobbered by calling the multiply function
+	   but a8 is not used here and need not be saved.  */
+	s32i	a0, sp, 0
+#endif
+
+#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
+
+#define a2h a4
+#define a3h a5
+
+	/* Get the high halves of the inputs into registers.  */
+	srli	a2h, a2, 16
+	srli	a3h, a3, 16
+
+#define a2l a2
+#define a3l a3
+
+#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
+	/* Clear the high halves of the inputs.  This does not matter
+	   for MUL16 because the high bits are ignored.  */
+	extui	a2, a2, 0, 16
+	extui	a3, a3, 0, 16
+#endif
+#endif /* MUL16 || MUL32 */
+
+
+#if XCHAL_HAVE_MUL16
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mul16u	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MUL32
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	mull	dst, xreg ## xhalf, yreg ## yhalf
+
+#elif XCHAL_HAVE_MAC16
+
+/* The preprocessor insists on inserting a space when concatenating after
+   a period in the definition of do_mul below.  These macros are a workaround
+   using underscores instead of periods when doing the concatenation.  */
+#define umul_aa_ll umul.aa.ll
+#define umul_aa_lh umul.aa.lh
+#define umul_aa_hl umul.aa.hl
+#define umul_aa_hh umul.aa.hh
+
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
+	rsr	dst, ACCLO
+
+#else /* no multiply hardware */
+
+#define set_arg_l(dst, src) \
+	extui	dst, src, 0, 16
+#define set_arg_h(dst, src) \
+	srli	dst, src, 16
+
+#if __XTENSA_CALL0_ABI__
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a13, xreg); \
+	set_arg_ ## yhalf (a14, yreg); \
+	call0	.Lmul_mulsi3; \
+	mov	dst, a12
+#else
+#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
+	set_arg_ ## xhalf (a14, xreg); \
+	set_arg_ ## yhalf (a15, yreg); \
+	call12	.Lmul_mulsi3; \
+	mov	dst, a14
+#endif /* __XTENSA_CALL0_ABI__ */
+
+#endif /* no multiply hardware */
+
+	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
+	do_mul(a6, a2, l, a3, h)	/* pp 1 */
+	do_mul(a11, a2, h, a3, l)	/* pp 2 */
+	movi	a9, 0
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Shift the high half of a9/a6 into position in a9.  Note that
+	   this value can be safely incremented without any carry-outs.  */
+	ssai	16
+	src	a9, a9, a6
+
+	/* Compute the low word into a6.  */
+	do_mul(a11, a2, l, a3, l)	/* pp 0 */
+	sll	a6, a6
+	add	a6, a6, a11
+	bgeu	a6, a11, 1f
+	addi	a9, a9, 1
+1:
+	/* Compute the high word into wh.  */
+	do_mul(wh, a2, h, a3, h)	/* pp 3 */
+	add	wh, wh, a9
+	mov	wl, a6
+
+#endif /* !MUL32_HIGH */
+
+#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
+	/* Restore the original return address.  */
+	l32i	a0, sp, 0
+#endif
+#if __XTENSA_CALL0_ABI__
+	l32i	a12, sp, 16
+	l32i	a13, sp, 20
+	l32i	a14, sp, 24
+	l32i	a15, sp, 28
+	addi	sp, sp, 32
+#endif
+	leaf_return
+
+#if XCHAL_NO_MUL
+
+	/* For Xtensa processors with no multiply hardware, this simplified
+	   version of _mulsi3 is used for multiplying 16-bit chunks of
+	   the floating-point mantissas.  When using CALL0, this function
+	   uses a custom ABI: the inputs are passed in a13 and a14, the
+	   result is returned in a12, and a8 and a15 are clobbered.  */
+	.align	4
+.Lmul_mulsi3:
+	leaf_entry sp, 16
+	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
+	movi	\dst, 0
+1:	add	\tmp1, \src2, \dst
+	extui	\tmp2, \src1, 0, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx2 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 1, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx4 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 2, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	do_addx8 \tmp1, \src2, \dst, \tmp1
+	extui	\tmp2, \src1, 3, 1
+	movnez	\dst, \tmp1, \tmp2
+
+	srli	\src1, \src1, 4
+	slli	\src2, \src2, 4
+	bnez	\src1, 1b
+	.endm
+#if __XTENSA_CALL0_ABI__
+	mul_mulsi3_body a12, a13, a14, a15, a8
+#else
+	/* The result will be written into a2, so save that argument in a4.  */
+	mov	a4, a2
+	mul_mulsi3_body a2, a4, a3, a5, a6
+#endif
+	leaf_return
+#endif /* XCHAL_NO_MUL */
+
+	.size	__umulsidi3, . - __umulsidi3
+
+#endif /* L_umulsidi3 */
+
+
+/* Define a macro for the NSAU (unsigned normalize shift amount)
+   instruction, which computes the number of leading zero bits,
+   to handle cases where it is not included in the Xtensa processor
+   configuration.  */
+
+	.macro	do_nsau cnt, val, tmp, a
+#if XCHAL_HAVE_NSA
+	nsau	\cnt, \val
+#else
+	mov	\a, \val
+	movi	\cnt, 0
+	extui	\tmp, \a, 16, 16
+	bnez	\tmp, 0f
+	movi	\cnt, 16
+	slli	\a, \a, 16
+0:
+	extui	\tmp, \a, 24, 8
+	bnez	\tmp, 1f
+	addi	\cnt, \cnt, 8
+	slli	\a, \a, 8
+1:
+	movi	\tmp, __nsau_data
+	extui	\a, \a, 24, 8
+	add	\tmp, \tmp, \a
+	l8ui	\tmp, \tmp, 0
+	add	\cnt, \cnt, \tmp
+#endif /* !XCHAL_HAVE_NSA */
+	.endm
+
+#ifdef L_clz
+	.section .rodata
+	.align	4
+	.global	__nsau_data
+	.type	__nsau_data, @object
+__nsau_data:
+#if !XCHAL_HAVE_NSA
+	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
+	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+#endif /* !XCHAL_HAVE_NSA */
+	.size	__nsau_data, . - __nsau_data
+	.hidden	__nsau_data
+#endif /* L_clz */
+
+
+#ifdef L_clzsi2
+	.align	4
+	.global	__clzsi2
+	.type	__clzsi2, @function
+__clzsi2:
+	leaf_entry sp, 16
+	do_nsau	a2, a2, a3, a4
+	leaf_return
+	.size	__clzsi2, . - __clzsi2
+
+#endif /* L_clzsi2 */
+
+
+#ifdef L_ctzsi2
+	.align	4
+	.global	__ctzsi2
+	.type	__ctzsi2, @function
+__ctzsi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 31
+	leaf_return
+	.size	__ctzsi2, . - __ctzsi2
+
+#endif /* L_ctzsi2 */
+
+
+#ifdef L_ffssi2
+	.align	4
+	.global	__ffssi2
+	.type	__ffssi2, @function
+__ffssi2:
+	leaf_entry sp, 16
+	neg	a3, a2
+	and	a3, a3, a2
+	do_nsau	a2, a3, a4, a5
+	neg	a2, a2
+	addi	a2, a2, 32
+	leaf_return
+	.size	__ffssi2, . - __ffssi2
+
+#endif /* L_ffssi2 */
+
+
+#ifdef L_udivsi3
+	.align	4
+	.global	__udivsi3
+	.type	__udivsi3, @function
+__udivsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quou	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor <= 1 */
+
+	mov	a6, a2		/* keep dividend in a6 */
+	do_nsau	a5, a6, a2, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a2, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment quotient if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror	/* if divisor == 1, return the dividend */
+	leaf_return
+
+.Lspecial:
+	/* return dividend >= divisor */
+	bltu	a6, a3, .Lreturn0
+	movi	a2, 1
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__udivsi3, . - __udivsi3
+
+#endif /* L_udivsi3 */
+
+
+#ifdef L_divsi3
+	.align	4
+	.global	__divsi3
+	.type	__divsi3, @function
+__divsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	quos	a2, a2, a3
+#else
+	xor	a7, a2, a3	/* sign = dividend ^ divisor */
+	do_abs	a6, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a6, a2, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a2, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+	movi	a2, 0		/* quotient = 0 */
+
+	/* test-subtract-and-shift loop; one quotient bit on each iteration */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a6, a3, .Lzerobit
+	sub	a6, a6, a3
+	addi	a2, a2, 1
+.Lzerobit:
+	slli	a2, a2, 1
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+	bltu	a6, a3, .Lreturn
+	addi	a2, a2, 1	/* increment if udividend >= udivisor */
+.Lreturn:
+	neg	a5, a2
+	movltz	a2, a5, a7	/* return (sign < 0) ? -quotient : quotient */
+	leaf_return
+
+.Lle_one:
+	beqz	a3, .Lerror
+	neg	a2, a6		/* if udivisor == 1, then return... */
+	movgez	a2, a6, a7	/* (sign < 0) ? -udividend : udividend */
+	leaf_return
+
+.Lspecial:
+	bltu	a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
+	movi	a2, 1
+	movi	a4, -1
+	movltz	a2, a4, a7	/* else return (sign < 0) ? -1 : 1 */
+	leaf_return
+
+.Lerror:
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__divsi3, . - __divsi3
+
+#endif /* L_divsi3 */
+
+
+#ifdef L_umodsi3
+	.align	4
+	.global	__umodsi3
+	.type	__umodsi3, @function
+__umodsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	remu	a2, a2, a3
+#else
+	bltui	a3, 2, .Lle_one	/* check if the divisor is <= 1 */
+
+	do_nsau	a5, a2, a6, a7	/* dividend_shift = nsau (dividend) */
+	do_nsau	a4, a3, a6, a7	/* divisor_shift = nsau (divisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = divisor_shift - dividend_shift */
+	ssl	a4
+	sll	a3, a3		/* divisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract once more if dividend >= divisor */
+.Lreturn:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__umodsi3, . - __umodsi3
+
+#endif /* L_umodsi3 */
+
+
+#ifdef L_modsi3
+	.align	4
+	.global	__modsi3
+	.type	__modsi3, @function
+__modsi3:
+	leaf_entry sp, 16
+#if XCHAL_HAVE_DIV32
+	rems	a2, a2, a3
+#else
+	mov	a7, a2		/* save original (signed) dividend */
+	do_abs	a2, a2, a4	/* udividend = abs (dividend) */
+	do_abs	a3, a3, a4	/* udivisor = abs (divisor) */
+	bltui	a3, 2, .Lle_one	/* check if udivisor <= 1 */
+	do_nsau	a5, a2, a6, a8	/* udividend_shift = nsau (udividend) */
+	do_nsau	a4, a3, a6, a8	/* udivisor_shift = nsau (udivisor) */
+	bgeu	a5, a4, .Lspecial
+
+	sub	a4, a4, a5	/* count = udivisor_shift - udividend_shift */
+	ssl	a4
+	sll	a3, a3		/* udivisor <<= count */
+
+	/* test-subtract-and-shift loop */
+#if XCHAL_HAVE_LOOPS
+	loopnez	a4, .Lloopend
+#endif /* XCHAL_HAVE_LOOPS */
+.Lloop:
+	bltu	a2, a3, .Lzerobit
+	sub	a2, a2, a3
+.Lzerobit:
+	srli	a3, a3, 1
+#if !XCHAL_HAVE_LOOPS
+	addi	a4, a4, -1
+	bnez	a4, .Lloop
+#endif /* !XCHAL_HAVE_LOOPS */
+.Lloopend:
+
+.Lspecial:
+	bltu	a2, a3, .Lreturn
+	sub	a2, a2, a3	/* subtract again if udividend >= udivisor */
+.Lreturn:
+	bgez	a7, .Lpositive
+	neg	a2, a2		/* if (dividend < 0), return -udividend */
+.Lpositive:
+	leaf_return
+
+.Lle_one:
+	bnez	a3, .Lreturn0
+
+	/* Divide by zero: Use an illegal instruction to force an exception.
+	   The subsequent "DIV0" string can be recognized by the exception
+	   handler to identify the real cause of the exception.  */
+	ill
+	.ascii	"DIV0"
+
+.Lreturn0:
+	movi	a2, 0
+#endif /* XCHAL_HAVE_DIV32 */
+	leaf_return
+	.size	__modsi3, . - __modsi3
+
+#endif /* L_modsi3 */
+
+
+#ifdef __XTENSA_EB__
+#define uh a2
+#define ul a3
+#else
+#define uh a3
+#define ul a2
+#endif /* __XTENSA_EB__ */
+
+
+#ifdef L_ashldi3
+	.align	4
+	.global	__ashldi3
+	.type	__ashldi3, @function
+__ashldi3:
+	leaf_entry sp, 16
+	ssl	a4
+	bgei	a4, 32, .Llow_only
+	src	uh, uh, ul
+	sll	ul, ul
+	leaf_return
+
+.Llow_only:
+	sll	uh, ul
+	movi	ul, 0
+	leaf_return
+	.size	__ashldi3, . - __ashldi3
+
+#endif /* L_ashldi3 */
+
+
+#ifdef L_ashrdi3
+	.align	4
+	.global	__ashrdi3
+	.type	__ashrdi3, @function
+__ashrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only
+	src	ul, uh, ul
+	sra	uh, uh
+	leaf_return
+
+.Lhigh_only:
+	sra	ul, uh
+	srai	uh, uh, 31
+	leaf_return
+	.size	__ashrdi3, . - __ashrdi3
+
+#endif /* L_ashrdi3 */
+
+
+#ifdef L_lshrdi3
+	.align	4
+	.global	__lshrdi3
+	.type	__lshrdi3, @function
+__lshrdi3:
+	leaf_entry sp, 16
+	ssr	a4
+	bgei	a4, 32, .Lhigh_only1
+	src	ul, uh, ul
+	srl	uh, uh
+	leaf_return
+
+.Lhigh_only1:
+	srl	ul, uh
+	movi	uh, 0
+	leaf_return
+	.size	__lshrdi3, . - __lshrdi3
+
+#endif /* L_lshrdi3 */
+
+
+#include "ieee754-df.S"
+#include "ieee754-sf.S"
diff --git a/gcc/config/xtensa/lib2funcs.S b/gcc/config/xtensa/lib2funcs.S
new file mode 100644
index 000000000..65134e24c
--- /dev/null
+++ b/gcc/config/xtensa/lib2funcs.S
@@ -0,0 +1,186 @@
+/* Assembly functions for libgcc2.
+   Copyright (C) 2001, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "xtensa-config.h"
+
+/* __xtensa_libgcc_window_spill: This function flushes out all but the
+   current register window.  This is used to set up the stack so that
+   arbitrary frames can be accessed.  */
+
+	.align	4
+	.global	__xtensa_libgcc_window_spill
+	.type	__xtensa_libgcc_window_spill,@function
+__xtensa_libgcc_window_spill:
+	entry	sp, 32
+	movi	a2, 0
+	syscall
+	retw
+	.size	__xtensa_libgcc_window_spill, .-__xtensa_libgcc_window_spill
+
+
+/* __xtensa_nonlocal_goto: This code does all the hard work of a
+   nonlocal goto on Xtensa.  It is here in the library to avoid the
+   code size bloat of generating it in-line.  There are two
+   arguments:
+
+	a2 = frame pointer for the procedure containing the label
+	a3 = goto handler address
+
+  This function never returns to its caller but instead goes directly
+  to the address of the specified goto handler.  */
+
+	.align	4
+	.global	__xtensa_nonlocal_goto
+	.type	__xtensa_nonlocal_goto,@function
+__xtensa_nonlocal_goto:
+	entry	sp, 32
+
+	/* Flush registers.  */
+	mov	a5, a2
+	movi	a2, 0
+	syscall
+	mov	a2, a5
+
+	/* Because the save area for a0-a3 is stored one frame below
+	   the one identified by a2, the only way to restore those
+	   registers is to unwind the stack.  If alloca() were never
+	   called, we could just unwind until finding the sp value
+	   matching a2.  However, a2 is a frame pointer, not a stack
+	   pointer, and may not be encountered during the unwinding.
+	   The solution is to unwind until going _past_ the value
+	   given by a2.  This involves keeping three stack pointer
+	   values during the unwinding:
+
+		next = sp of frame N-1
+		cur = sp of frame N
+		prev = sp of frame N+1
+
+	   When next > a2, the desired save area is stored relative
+	   to prev.  At this point, cur will be the same as a2
+	   except in the alloca() case.
+
+	   Besides finding the values to be restored to a0-a3, we also
+	   need to find the current window size for the target
+	   function.  This can be extracted from the high bits of the
+	   return address, initially in a0.  As the unwinding
+	   proceeds, the window size is taken from the value of a0
+	   saved _two_ frames below the current frame.  */
+
+	addi	a5, sp, -16	/* a5 = prev - save area */
+	l32i	a6, a5, 4
+	addi	a6, a6, -16	/* a6 = cur - save area */
+	mov	a8, a0		/* a8 = return address (for window size) */
+	j	.Lfirstframe
+
+.Lnextframe:
+	l32i	a8, a5, 0	/* next return address (for window size) */
+	mov	a5, a6		/* advance prev */
+	addi	a6, a7, -16	/* advance cur */
+.Lfirstframe:
+	l32i	a7, a6, 4	/* a7 = next */
+	bgeu	a2, a7, .Lnextframe
+
+	/* At this point, prev (a5) points to the save area with the saved
+	   values of a0-a3.  Copy those values into the save area at the
+	   current sp so they will be reloaded when the return from this
+	   function underflows.  We don't have to worry about exceptions
+	   while updating the current save area, because the windows have
+	   already been flushed.  */
+
+	addi	a4, sp, -16	/* a4 = save area of this function */
+	l32i	a6, a5, 0
+	l32i	a7, a5, 4
+	s32i	a6, a4, 0
+	s32i	a7, a4, 4
+	l32i	a6, a5, 8
+	l32i	a7, a5, 12
+	s32i	a6, a4, 8
+	s32i	a7, a4, 12
+
+	/* Set return address to goto handler.  Use the window size bits
+	   from the return address two frames below the target.  */
+	extui	a8, a8, 30, 2	/* get window size from return addr. */
+	slli	a3, a3, 2	/* get goto handler addr. << 2 */
+	ssai	2
+	src	a0, a8, a3	/* combine them with a funnel shift */
+
+	retw
+	.size	__xtensa_nonlocal_goto, .-__xtensa_nonlocal_goto
+
+
+/* __xtensa_sync_caches: This function is called after writing a trampoline
+   on the stack to force all the data writes to memory and invalidate the
+   instruction cache. a2 is the address of the new trampoline.
+
+   After the trampoline data is written out, it must be flushed out of
+   the data cache into memory.  We use DHWB in case we have a writeback
+   cache.  At least one DHWB instruction is needed for each data cache
+   line which may be touched by the trampoline.  An ISYNC instruction
+   must follow the DHWBs.
+
+   We have to flush the i-cache to make sure that the new values get used.
+   At least one IHI instruction is needed for each i-cache line which may
+   be touched by the trampoline.  An ISYNC instruction is also needed to
+   make sure that the modified instructions are loaded into the instruction
+   fetch buffer.  */
+
+/* Use the maximum trampoline size.  Flushing a bit extra is OK.  */
+#define TRAMPOLINE_SIZE 60
+
+	.text
+	.align	4
+	.global	__xtensa_sync_caches
+	.type	__xtensa_sync_caches,@function
+__xtensa_sync_caches:
+	entry 	sp, 32
+#if XCHAL_DCACHE_SIZE > 0
+	/* Flush the trampoline from the data cache.  */
+	extui	a4, a2, 0, XCHAL_DCACHE_LINEWIDTH
+	addi	a4, a4, TRAMPOLINE_SIZE
+	addi	a4, a4, (1 << XCHAL_DCACHE_LINEWIDTH) - 1
+	srli	a4, a4, XCHAL_DCACHE_LINEWIDTH
+	mov	a3, a2
+.Ldcache_loop:
+	dhwb	a3, 0
+	addi	a3, a3, (1 << XCHAL_DCACHE_LINEWIDTH)
+	addi	a4, a4, -1
+	bnez	a4, .Ldcache_loop
+	isync
+#endif
+#if XCHAL_ICACHE_SIZE > 0
+	/* Invalidate the corresponding lines in the instruction cache.  */
+	extui	a4, a2, 0, XCHAL_ICACHE_LINEWIDTH
+	addi	a4, a4, TRAMPOLINE_SIZE
+	addi	a4, a4, (1 << XCHAL_ICACHE_LINEWIDTH) - 1
+	srli	a4, a4, XCHAL_ICACHE_LINEWIDTH
+.Licache_loop:
+	ihi	a2, 0
+	addi	a2, a2, (1 << XCHAL_ICACHE_LINEWIDTH)
+	addi	a4, a4, -1
+	bnez	a4, .Licache_loop
+#endif
+	isync
+	retw
+	.size	__xtensa_sync_caches, .-__xtensa_sync_caches
diff --git a/gcc/config/xtensa/libgcc-xtensa.ver b/gcc/config/xtensa/libgcc-xtensa.ver
new file mode 100644
index 000000000..43e7d4fc7
--- /dev/null
+++ b/gcc/config/xtensa/libgcc-xtensa.ver
@@ -0,0 +1,3 @@
+GCC_4.3.0 {
+  __umulsidi3
+}
diff --git a/gcc/config/xtensa/linux-unwind.h b/gcc/config/xtensa/linux-unwind.h
new file mode 100644
index 000000000..245649728
--- /dev/null
+++ b/gcc/config/xtensa/linux-unwind.h
@@ -0,0 +1,97 @@
+/* DWARF2 EH unwinding support for Xtensa.
+   Copyright (C) 2008, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2-xtensa.c for the structs.
+   Don't use this at all if inhibit_libc is used.  */
+
+#ifndef inhibit_libc
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+/* Encoded bytes for Xtensa instructions:
+	movi a2, __NR_rt_sigreturn
+	syscall
+	entry (first byte only)
+   Some of the bytes are endian-dependent.  */
+
+#define MOVI_BYTE0 0x22
+#define MOVI_BYTE2 225 /* __NR_rt_sigreturn */
+#define SYSC_BYTE0 0
+#define SYSC_BYTE2 0
+
+#ifdef __XTENSA_EB__
+#define MOVI_BYTE1 0x0a
+#define SYSC_BYTE1 0x05
+#define ENTRY_BYTE 0x6c
+#else
+#define MOVI_BYTE1 0xa0
+#define SYSC_BYTE1 0x50
+#define ENTRY_BYTE 0x36
+#endif
+
+#define MD_FALLBACK_FRAME_STATE_FOR xtensa_fallback_frame_state
+
+static _Unwind_Reason_Code
+xtensa_fallback_frame_state (struct _Unwind_Context *context,
+			     _Unwind_FrameState *fs)
+{
+  unsigned char *pc = context->ra;
+  struct sigcontext *sc;
+
+  struct rt_sigframe {
+    siginfo_t info;
+    struct ucontext uc;
+  } *rt_;
+
+  /* movi a2, __NR_rt_sigreturn; syscall */
+  if (pc[0] != MOVI_BYTE0
+      || pc[1] != MOVI_BYTE1
+      || pc[2] != MOVI_BYTE2
+      || pc[3] != SYSC_BYTE0
+      || pc[4] != SYSC_BYTE1
+      || pc[5] != SYSC_BYTE2)
+    return _URC_END_OF_STACK;
+
+  rt_ = context->sp;
+  sc = &rt_->uc.uc_mcontext;
+  fs->signal_regs = (_Unwind_Word *) sc->sc_a;
+
+  /* If the signal arrived just before an ENTRY instruction, find the return
+     address and pretend the signal arrived before executing the CALL.  */
+  if (*(unsigned char *) sc->sc_pc == ENTRY_BYTE)
+   {
+     unsigned callinc = (sc->sc_ps >> 16) & 3;
+     fs->signal_ra = ((sc->sc_a[callinc << 2] & XTENSA_RA_FIELD_MASK)
+		      | context->ra_high_bits) - 3;
+   }
+  else
+    fs->signal_ra = sc->sc_pc;
+
+  fs->signal_frame = 1;
+  return _URC_NO_REASON;
+}
+
+#endif /* ifdef inhibit_libc  */
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
new file mode 100644
index 000000000..83d2a9767
--- /dev/null
+++ b/gcc/config/xtensa/linux.h
@@ -0,0 +1,71 @@
+/* Xtensa Linux configuration.
+   Derived from the configuration for GCC for Intel i386 running Linux.
+   Copyright (C) 2001, 2002, 2003, 2006, 2007, 2008, 2010, 2011
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS() LINUX_TARGET_OS_CPP_BUILTINS()
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fputs (" (Xtensa GNU/Linux with ELF)", stderr);
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{mtext-section-literals:--text-section-literals} \
+  %{mno-text-section-literals:--no-text-section-literals} \
+  %{mtarget-align:--target-align} \
+  %{mno-target-align:--no-target-align} \
+  %{mlongcalls:--longcalls} \
+  %{mno-longcalls:--no-longcalls}"
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      -dynamic-linker " LINUX_DYNAMIC_LINKER "} \
+    %{static:-static}}"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX	"."
+
+/* Always enable "-fpic" for Xtensa Linux.  */
+#define XTENSA_ALWAYS_PIC 1
+
+#undef DBX_REGISTER_NUMBER
+
+#define MD_UNWIND_SUPPORT "config/xtensa/linux-unwind.h"
+
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
new file mode 100644
index 000000000..27f058de7
--- /dev/null
+++ b/gcc/config/xtensa/predicates.md
@@ -0,0 +1,175 @@
+;; Predicate definitions for Xtensa.
+;; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_predicate "add_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_simm8 (INTVAL (op))
+			 || xtensa_simm8x256 (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "addsubx_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2
+		    || INTVAL (op) == 4
+		    || INTVAL (op) == 8")))
+
+(define_predicate "arith_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_simm8 (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+;; Non-immediate operand excluding the constant pool.
+(define_predicate "nonimmed_operand"
+  (ior (and (match_operand 0 "memory_operand")
+	    (match_test "!constantpool_mem_p (op)"))
+       (match_operand 0 "register_operand")))
+
+;; Memory operand excluding the constant pool.
+(define_predicate "mem_operand"
+  (and (match_operand 0 "memory_operand")
+       (match_test "!constantpool_mem_p (op)")))
+
+;; Memory operand in the constant pool.
+(define_predicate "constantpool_operand"
+  (match_test "constantpool_mem_p (op)"))
+
+(define_predicate "mask_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_mask_immediate (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "extui_fldsz_operand"
+  (and (match_code "const_int")
+       (match_test "xtensa_mask_immediate ((1 << INTVAL (op)) - 1)")))
+
+(define_predicate "sext_operand"
+  (if_then_else (match_test "TARGET_SEXT")
+		(match_operand 0 "nonimmed_operand")
+		(match_operand 0 "mem_operand")))
+
+(define_predicate "sext_fldsz_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 8 && INTVAL (op) <= 23")))
+
+(define_predicate "lsbitnum_operand"
+  (and (match_code "const_int")
+       (match_test "BITS_BIG_ENDIAN
+		    ? (INTVAL (op) == BITS_PER_WORD - 1)
+		    : (INTVAL (op) == 0)")))
+
+(define_predicate "branch_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_b4const_or_zero (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "ubranch_operand"
+  (ior (and (match_code "const_int")
+	    (match_test "xtensa_b4constu (INTVAL (op))"))
+       (match_operand 0 "register_operand")))
+
+(define_predicate "call_insn_operand"
+  (match_code "const_int,const,symbol_ref,reg")
+{
+  if ((GET_CODE (op) == REG)
+      && (op != arg_pointer_rtx)
+      && ((REGNO (op) < FRAME_POINTER_REGNUM)
+	  || (REGNO (op) > LAST_VIRTUAL_REGISTER)))
+    return true;
+
+  if (CONSTANT_ADDRESS_P (op))
+    {
+      /* Direct calls only allowed to static functions with PIC.  */
+      if (flag_pic)
+	{
+	  tree callee, callee_sec, caller_sec;
+
+	  if (GET_CODE (op) != SYMBOL_REF
+	      || !SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_EXTERNAL_P (op))
+	    return false;
+
+	  /* Don't attempt a direct call if the callee is known to be in
+	     a different section, since there's a good chance it will be
+	     out of range.  */
+
+	  if (flag_function_sections
+	      || DECL_ONE_ONLY (current_function_decl))
+	    return false;
+	  caller_sec = DECL_SECTION_NAME (current_function_decl);
+	  callee = SYMBOL_REF_DECL (op);
+	  if (callee)
+	    {
+	      if (DECL_ONE_ONLY (callee))
+		return false;
+	      callee_sec = DECL_SECTION_NAME (callee);
+	      if (((caller_sec == NULL_TREE) ^ (callee_sec == NULL_TREE))
+		  || (caller_sec != NULL_TREE
+		      && strcmp (TREE_STRING_POINTER (caller_sec),
+				 TREE_STRING_POINTER (callee_sec)) != 0))
+		return false;
+	    }
+	  else if (caller_sec != NULL_TREE)
+	    return false;
+	}
+      return true;
+    }
+
+  return false;
+})
+
+(define_predicate "move_operand"
+  (ior
+     (ior (match_operand 0 "register_operand")
+	  (and (match_operand 0 "memory_operand")
+	       (match_test "!constantpool_mem_p (op)
+			    || GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))
+     (ior (and (match_code "const_int")
+	       (match_test "GET_MODE_CLASS (mode) == MODE_INT
+			    && xtensa_simm12b (INTVAL (op))"))
+	  (and (match_code "const_int,const_double,const,symbol_ref,label_ref")
+	       (match_test "TARGET_CONST16 && CONSTANT_P (op)
+			    && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))))
+
+;; Accept the floating point constant 1 in the appropriate mode.
+(define_predicate "const_float_1_operand"
+  (match_code "const_double")
+{
+  REAL_VALUE_TYPE d;
+  REAL_VALUE_FROM_CONST_DOUBLE (d, op);
+  return REAL_VALUES_EQUAL (d, dconst1);
+})
+
+(define_predicate "fpmem_offset_operand"
+  (and (match_code "const_int")
+       (match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
+
+(define_predicate "branch_operator"
+  (match_code "eq,ne,lt,ge"))
+
+(define_predicate "ubranch_operator"
+  (match_code "ltu,geu"))
+
+(define_predicate "boolean_operator"
+  (match_code "eq,ne"))
+
+(define_predicate "xtensa_cstoresi_operator"
+  (match_code "eq,ne,gt,ge,lt,le"))
+
+(define_predicate "tls_symbol_operand"
+  (and (match_code "symbol_ref")
+       (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
diff --git a/gcc/config/xtensa/t-elf b/gcc/config/xtensa/t-elf
new file mode 100644
index 000000000..7d6cd1a3a
--- /dev/null
+++ b/gcc/config/xtensa/t-elf
@@ -0,0 +1,6 @@
+# Build CRT files and libgcc with the "longcalls" option
+CRTSTUFF_T_CFLAGS += -mlongcalls
+CRTSTUFF_T_CFLAGS_S += -mlongcalls
+TARGET_LIBGCC2_CFLAGS += -mlongcalls
+
+EXTRA_MULTILIB_PARTS = crti.o crtn.o crtbegin.o crtend.o
diff --git a/gcc/config/xtensa/t-linux b/gcc/config/xtensa/t-linux
new file mode 100644
index 000000000..7d535e155
--- /dev/null
+++ b/gcc/config/xtensa/t-linux
@@ -0,0 +1,3 @@
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+
+SHLIB_MAPFILES += $(srcdir)/config/xtensa/libgcc-xtensa.ver
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
new file mode 100644
index 000000000..c3d98ae30
--- /dev/null
+++ b/gcc/config/xtensa/t-xtensa
@@ -0,0 +1,42 @@
+# Copyright (C) 2002, 2003, 2006, 2007, 2008 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+LIB1ASMSRC = xtensa/lib1funcs.asm
+LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
+	_umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
+	_ashldi3 _ashrdi3 _lshrdi3 \
+	_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
+	_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+	_floatdisf _floatundisf \
+	_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
+	_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
+	_floatdidf _floatundidf \
+	_truncdfsf2 _extendsfdf2
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S
+LIB2ADDEH = $(srcdir)/config/xtensa/unwind-dw2-xtensa.c \
+   $(srcdir)/unwind-dw2-fde.c $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
+
+$(T)crti.o: $(srcdir)/config/xtensa/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crti.o -x assembler-with-cpp $(srcdir)/config/xtensa/crti.asm
+$(T)crtn.o: $(srcdir)/config/xtensa/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $(T)crtn.o -x assembler-with-cpp $(srcdir)/config/xtensa/crtn.asm
+
+$(out_object_file): gt-xtensa.h
diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.c b/gcc/config/xtensa/unwind-dw2-xtensa.c
new file mode 100644
index 000000000..9544f65ab
--- /dev/null
+++ b/gcc/config/xtensa/unwind-dw2-xtensa.c
@@ -0,0 +1,546 @@
+/* DWARF2 exception handling and frame unwinding for Xtensa.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "dwarf2.h"
+#include "unwind.h"
+#ifdef __USING_SJLJ_EXCEPTIONS__
+# define NO_SIZE_OF_ENCODED_VALUE
+#endif
+#include "unwind-pe.h"
+#include "unwind-dw2-fde.h"
+#include "unwind-dw2-xtensa.h"
+
+#ifndef __USING_SJLJ_EXCEPTIONS__
+
+/* The standard CIE and FDE structures work fine for Xtensa but the
+   variable-size register window save areas are not a good fit for the rest
+   of the standard DWARF unwinding mechanism.  Nor is that mechanism
+   necessary, since the register save areas are always in fixed locations
+   in each stack frame.  This file is a stripped down and customized version
+   of the standard DWARF unwinding code.  It needs to be customized to have
+   builtin logic for finding the save areas and also to track the stack
+   pointer value (besides the CFA) while unwinding since the primary save
+   area is located below the stack pointer.  It is stripped down to reduce
+   code size and ease the maintenance burden of tracking changes in the
+   standard version of the code.  */
+
+#ifndef DWARF_REG_TO_UNWIND_COLUMN
+#define DWARF_REG_TO_UNWIND_COLUMN(REGNO) (REGNO)
+#endif
+
+#define XTENSA_RA_FIELD_MASK 0x3FFFFFFF
+
+/* This is the register and unwind state for a particular frame.  This
+   provides the information necessary to unwind up past a frame and return
+   to its caller.  */
+struct _Unwind_Context
+{
+  /* Track register window save areas of 4 registers each, instead of
+     keeping separate addresses for the individual registers.  */
+  _Unwind_Word *reg[4];
+
+  void *cfa;
+  void *sp;
+  void *ra;
+
+  /* Cache the 2 high bits to replace the window size in return addresses.  */
+  _Unwind_Word ra_high_bits;
+
+  void *lsda;
+  struct dwarf_eh_bases bases;
+  /* Signal frame context.  */
+#define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1)
+  _Unwind_Word flags;
+  /* 0 for now, can be increased when further fields are added to
+     struct _Unwind_Context.  */
+  _Unwind_Word version;
+};
+
+
+/* Read unaligned data from the instruction buffer.  */
+
+union unaligned
+{
+  void *p;
+} __attribute__ ((packed));
+
+static void uw_update_context (struct _Unwind_Context *, _Unwind_FrameState *);
+static _Unwind_Reason_Code uw_frame_state_for (struct _Unwind_Context *,
+					       _Unwind_FrameState *);
+
+static inline void *
+read_pointer (const void *p) { const union unaligned *up = p; return up->p; }
+
+static inline _Unwind_Word
+_Unwind_IsSignalFrame (struct _Unwind_Context *context)
+{
+  return (context->flags & SIGNAL_FRAME_BIT) ? 1 : 0;
+}
+
+static inline void
+_Unwind_SetSignalFrame (struct _Unwind_Context *context, int val)
+{
+  if (val)
+    context->flags |= SIGNAL_FRAME_BIT;
+  else
+    context->flags &= ~SIGNAL_FRAME_BIT;
+}
+
+/* Get the value of register INDEX as saved in CONTEXT.  */
+
+inline _Unwind_Word
+_Unwind_GetGR (struct _Unwind_Context *context, int index)
+{
+  _Unwind_Word *ptr;
+
+  index = DWARF_REG_TO_UNWIND_COLUMN (index);
+  ptr = context->reg[index >> 2] + (index & 3);
+
+  return *ptr;
+}
+
+/* Get the value of the CFA as saved in CONTEXT.  */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->cfa;
+}
+
+/* Overwrite the saved value for register INDEX in CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetGR (struct _Unwind_Context *context, int index, _Unwind_Word val)
+{
+  _Unwind_Word *ptr;
+
+  index = DWARF_REG_TO_UNWIND_COLUMN (index);
+  ptr = context->reg[index >> 2] + (index & 3);
+
+  *ptr = val;
+}
+
+/* Retrieve the return address for CONTEXT.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIP (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->ra;
+}
+
+/* Retrieve the return address and flag whether that IP is before
+   or after first not yet fully executed instruction.  */
+
+inline _Unwind_Ptr
+_Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn)
+{
+  *ip_before_insn = _Unwind_IsSignalFrame (context);
+  return (_Unwind_Ptr) context->ra;
+}
+
+/* Overwrite the return address for CONTEXT with VAL.  */
+
+inline void
+_Unwind_SetIP (struct _Unwind_Context *context, _Unwind_Ptr val)
+{
+  context->ra = (void *) val;
+}
+
+void *
+_Unwind_GetLanguageSpecificData (struct _Unwind_Context *context)
+{
+  return context->lsda;
+}
+
+_Unwind_Ptr
+_Unwind_GetRegionStart (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.func;
+}
+
+void *
+_Unwind_FindEnclosingFunction (void *pc)
+{
+  struct dwarf_eh_bases bases;
+  const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases);
+  if (fde)
+    return bases.func;
+  else
+    return NULL;
+}
+
+_Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.dbase;
+}
+
+_Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->bases.tbase;
+}
+
+#ifdef MD_UNWIND_SUPPORT
+#include MD_UNWIND_SUPPORT
+#endif
+
+/* Extract any interesting information from the CIE for the translation
+   unit F belongs to.  Return a pointer to the byte after the augmentation,
+   or NULL if we encountered an undecipherable augmentation.  */
+
+static const unsigned char *
+extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context,
+		  _Unwind_FrameState *fs)
+{
+  const unsigned char *aug = cie->augmentation;
+  const unsigned char *p = aug + strlen ((const char *)aug) + 1;
+  const unsigned char *ret = NULL;
+  _uleb128_t utmp;
+  _sleb128_t stmp;
+
+  /* g++ v2 "eh" has pointer immediately following augmentation string,
+     so it must be handled first.  */
+  if (aug[0] == 'e' && aug[1] == 'h')
+    {
+      fs->eh_ptr = read_pointer (p);
+      p += sizeof (void *);
+      aug += 2;
+    }
+
+  /* Immediately following the augmentation are the code and
+     data alignment and return address column.  */
+  p = read_uleb128 (p, &utmp);
+  p = read_sleb128 (p, &stmp);
+  if (cie->version == 1)
+    fs->retaddr_column = *p++;
+  else
+    {
+      p = read_uleb128 (p, &utmp);
+      fs->retaddr_column = (_Unwind_Word)utmp;
+    }
+  fs->lsda_encoding = DW_EH_PE_omit;
+
+  /* If the augmentation starts with 'z', then a uleb128 immediately
+     follows containing the length of the augmentation field following
+     the size.  */
+  if (*aug == 'z')
+    {
+      p = read_uleb128 (p, &utmp);
+      ret = p + utmp;
+
+      fs->saw_z = 1;
+      ++aug;
+    }
+
+  /* Iterate over recognized augmentation subsequences.  */
+  while (*aug != '\0')
+    {
+      /* "L" indicates a byte showing how the LSDA pointer is encoded.  */
+      if (aug[0] == 'L')
+	{
+	  fs->lsda_encoding = *p++;
+	  aug += 1;
+	}
+
+      /* "R" indicates a byte indicating how FDE addresses are encoded.  */
+      else if (aug[0] == 'R')
+	{
+	  fs->fde_encoding = *p++;
+	  aug += 1;
+	}
+
+      /* "P" indicates a personality routine in the CIE augmentation.  */
+      else if (aug[0] == 'P')
+	{
+	  _Unwind_Ptr personality;
+	  
+	  p = read_encoded_value (context, *p, p + 1, &personality);
+	  fs->personality = (_Unwind_Personality_Fn) personality;
+	  aug += 1;
+	}
+
+      /* "S" indicates a signal frame.  */
+      else if (aug[0] == 'S')
+	{
+	  fs->signal_frame = 1;
+	  aug += 1;
+	}
+
+      /* Otherwise we have an unknown augmentation string.
+	 Bail unless we saw a 'z' prefix.  */
+      else
+	return ret;
+    }
+
+  return ret ? ret : p;
+}
+
+/* Given the _Unwind_Context CONTEXT for a stack frame, look up the FDE for
+   its caller and decode it into FS.  This function also sets the
+   lsda member of CONTEXT, as it is really information
+   about the caller's frame.  */
+
+static _Unwind_Reason_Code
+uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  const struct dwarf_fde *fde;
+  const struct dwarf_cie *cie;
+  const unsigned char *aug;
+  int window_size;
+  _Unwind_Word *ra_ptr;
+
+  memset (fs, 0, sizeof (*fs));
+  context->lsda = 0;
+
+  fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1,
+			  &context->bases);
+  if (fde == NULL)
+    {
+#ifdef MD_FALLBACK_FRAME_STATE_FOR
+      _Unwind_Reason_Code reason;
+      /* Couldn't find frame unwind info for this function.  Try a
+	 target-specific fallback mechanism.  This will necessarily
+	 not provide a personality routine or LSDA.  */
+      reason = MD_FALLBACK_FRAME_STATE_FOR (context, fs);
+      if (reason != _URC_END_OF_STACK)
+	return reason;
+#endif
+      /* The frame was not recognized and handled by the fallback function,
+	 but it is not really the end of the stack.  Fall through here and
+	 unwind it anyway.  */
+    }
+  else
+    {
+      cie = get_cie (fde);
+      if (extract_cie_info (cie, context, fs) == NULL)
+	/* CIE contained unknown augmentation.  */
+	return _URC_FATAL_PHASE1_ERROR;
+
+      /* Locate augmentation for the fde.  */
+      aug = (const unsigned char *) fde + sizeof (*fde);
+      aug += 2 * size_of_encoded_value (fs->fde_encoding);
+      if (fs->saw_z)
+	{
+	  _uleb128_t i;
+	  aug = read_uleb128 (aug, &i);
+	}
+      if (fs->lsda_encoding != DW_EH_PE_omit)
+	{
+	  _Unwind_Ptr lsda;
+
+	  aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda);
+	  context->lsda = (void *) lsda;
+	}
+    }
+
+  /* Check for the end of the stack.  This needs to be checked after
+     the MD_FALLBACK_FRAME_STATE_FOR check for signal frames because
+     the contents of context->reg[0] are undefined at a signal frame,
+     and register a0 may appear to be zero.  (The return address in
+     context->ra comes from register a4 or a8).  */
+  ra_ptr = context->reg[0];
+  if (ra_ptr && *ra_ptr == 0)
+    return _URC_END_OF_STACK;
+
+  /* Find the window size from the high bits of the return address.  */
+  if (ra_ptr)
+    window_size = (*ra_ptr >> 30) * 4;
+  else
+    window_size = 8;
+
+  fs->retaddr_column = window_size;
+
+  return _URC_NO_REASON;
+}
+
+static void
+uw_update_context_1 (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  struct _Unwind_Context orig_context = *context;
+  _Unwind_Word *sp, *cfa, *next_cfa;
+  int i;
+
+  if (fs->signal_regs)
+    {
+      cfa = (_Unwind_Word *) fs->signal_regs[1];
+      next_cfa = (_Unwind_Word *) cfa[-3];
+
+      for (i = 0; i < 4; i++)
+	context->reg[i] = fs->signal_regs + (i << 2);
+    }
+  else
+    {
+      int window_size = fs->retaddr_column >> 2;
+
+      sp = (_Unwind_Word *) orig_context.sp;
+      cfa = (_Unwind_Word *) orig_context.cfa;
+      next_cfa = (_Unwind_Word *) cfa[-3];
+
+      /* Registers a0-a3 are in the save area below sp.  */
+      context->reg[0] = sp - 4;
+
+      /* Find the extra save area below next_cfa.  */
+      for (i = 1; i < window_size; i++)
+	context->reg[i] = next_cfa - 4 * (1 + window_size - i);
+
+      /* Remaining registers rotate from previous save areas.  */
+      for (i = window_size; i < 4; i++)
+	context->reg[i] = orig_context.reg[i - window_size];
+    }
+
+  context->sp = cfa;
+  context->cfa = next_cfa;
+
+  _Unwind_SetSignalFrame (context, fs->signal_frame);
+}
+
+/* CONTEXT describes the unwind state for a frame, and FS describes the FDE
+   of its caller.  Update CONTEXT to refer to the caller as well.  Note
+   that the lsda member is not updated here, but later in
+   uw_frame_state_for.  */
+
+static void
+uw_update_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context_1 (context, fs);
+
+  /* Compute the return address now, since the return address column
+     can change from frame to frame.  */
+  if (fs->signal_ra != 0)
+    context->ra = (void *) fs->signal_ra;
+  else
+    context->ra = (void *) ((_Unwind_GetGR (context, fs->retaddr_column)
+			     & XTENSA_RA_FIELD_MASK) | context->ra_high_bits);
+}
+
+static void
+uw_advance_context (struct _Unwind_Context *context, _Unwind_FrameState *fs)
+{
+  uw_update_context (context, fs);
+}
+
+/* Fill in CONTEXT for top-of-stack.  The only valid registers at this
+   level will be the return address and the CFA.  */
+
+#define uw_init_context(CONTEXT)					   \
+  do									   \
+    {									   \
+      __builtin_unwind_init ();						   \
+      uw_init_context_1 (CONTEXT, __builtin_dwarf_cfa (),		   \
+			 __builtin_return_address (0));			   \
+    }									   \
+  while (0)
+
+static void __attribute__((noinline))
+uw_init_context_1 (struct _Unwind_Context *context, void *outer_cfa,
+		   void *outer_ra)
+{
+  void *ra = __builtin_return_address (0);
+  void *cfa = __builtin_dwarf_cfa ();
+  _Unwind_FrameState fs;
+
+  memset (context, 0, sizeof (struct _Unwind_Context));
+  context->ra = ra;
+
+  memset (&fs, 0, sizeof (fs));
+  fs.retaddr_column = 8;
+  context->sp = cfa;
+  context->cfa = outer_cfa;
+  context->ra_high_bits =
+    ((_Unwind_Word) uw_init_context_1) & ~XTENSA_RA_FIELD_MASK;
+  uw_update_context_1 (context, &fs);
+
+  context->ra = outer_ra;
+}
+
+
+/* Install TARGET into CURRENT so that we can return to it.  This is a
+   macro because __builtin_eh_return must be invoked in the context of
+   our caller.  */
+
+#define uw_install_context(CURRENT, TARGET)				 \
+  do									 \
+    {									 \
+      long offset = uw_install_context_1 ((CURRENT), (TARGET));		 \
+      void *handler = __builtin_frob_return_addr ((TARGET)->ra);	 \
+      __builtin_eh_return (offset, handler);				 \
+    }									 \
+  while (0)
+
+static long
+uw_install_context_1 (struct _Unwind_Context *current,
+		      struct _Unwind_Context *target)
+{
+  long i;
+
+  /* The eh_return insn assumes a window size of 8, so don't bother copying
+     the save areas for registers a8-a15 since they won't be reloaded.  */
+  for (i = 0; i < 2; ++i)
+    {
+      void *c = current->reg[i];
+      void *t = target->reg[i];
+
+      if (t && c && t != c)
+	memcpy (c, t, 4 * sizeof (_Unwind_Word));
+    }
+
+  return 0;
+}
+
+static inline _Unwind_Ptr
+uw_identify_context (struct _Unwind_Context *context)
+{
+  return _Unwind_GetCFA (context);
+}
+
+
+#include "unwind.inc"
+
+#if defined (USE_GAS_SYMVER) && defined (SHARED) && defined (USE_LIBUNWIND_EXCEPTIONS)
+alias (_Unwind_Backtrace);
+alias (_Unwind_DeleteException);
+alias (_Unwind_FindEnclosingFunction);
+alias (_Unwind_ForcedUnwind);
+alias (_Unwind_GetDataRelBase);
+alias (_Unwind_GetTextRelBase);
+alias (_Unwind_GetCFA);
+alias (_Unwind_GetGR);
+alias (_Unwind_GetIP);
+alias (_Unwind_GetLanguageSpecificData);
+alias (_Unwind_GetRegionStart);
+alias (_Unwind_RaiseException);
+alias (_Unwind_Resume);
+alias (_Unwind_Resume_or_Rethrow);
+alias (_Unwind_SetGR);
+alias (_Unwind_SetIP);
+#endif
+
+#endif /* !USING_SJLJ_EXCEPTIONS */
diff --git a/gcc/config/xtensa/unwind-dw2-xtensa.h b/gcc/config/xtensa/unwind-dw2-xtensa.h
new file mode 100644
index 000000000..d13b3264c
--- /dev/null
+++ b/gcc/config/xtensa/unwind-dw2-xtensa.h
@@ -0,0 +1,50 @@
+/* DWARF2 frame unwind data structure for Xtensa.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008,
+   2009  Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* A target can override (perhaps for backward compatibility) how
+   many dwarf2 columns are unwound.  */
+#ifndef DWARF_FRAME_REGISTERS
+#define DWARF_FRAME_REGISTERS FIRST_PSEUDO_REGISTER
+#endif
+
+/* Xtensa's variable-size register window save areas can be unwound without
+   any unwind info.  This is a stripped down version of the standard DWARF
+   _Unwind_FrameState.  */
+typedef struct
+{
+  /* The information we care about from the CIE/FDE.  */
+  _Unwind_Personality_Fn personality;
+  _Unwind_Word retaddr_column;
+  unsigned char fde_encoding;
+  unsigned char lsda_encoding;
+  unsigned char saw_z;
+  unsigned char signal_frame;
+  void *eh_ptr;
+
+  /* Saved registers for a signal frame.  */
+  _Unwind_Word *signal_regs;
+  _Unwind_Word signal_ra;
+} _Unwind_FrameState;
+
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
new file mode 100644
index 000000000..0d1738f4e
--- /dev/null
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -0,0 +1,74 @@
+/* Prototypes of target machine for GNU compiler for Xtensa.
+   Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef __XTENSA_PROTOS_H__
+#define __XTENSA_PROTOS_H__
+
+/* Functions to test whether an immediate fits in a given field.  */
+extern bool xtensa_simm8 (HOST_WIDE_INT);
+extern bool xtensa_simm8x256 (HOST_WIDE_INT);
+extern bool xtensa_simm12b (HOST_WIDE_INT);
+extern bool xtensa_b4const_or_zero (HOST_WIDE_INT);
+extern bool xtensa_b4constu (HOST_WIDE_INT);
+extern bool xtensa_mask_immediate (HOST_WIDE_INT);
+extern bool xtensa_mem_offset (unsigned, enum machine_mode);
+
+/* Functions within xtensa.c that we reference.  */
+#ifdef RTX_CODE
+extern int xt_true_regnum (rtx);
+extern int xtensa_valid_move (enum machine_mode, rtx *);
+extern int smalloffset_mem_p (rtx);
+extern int constantpool_mem_p (rtx);
+extern void xtensa_extend_reg (rtx, rtx);
+extern void xtensa_expand_conditional_branch (rtx *, enum machine_mode);
+extern int xtensa_expand_conditional_move (rtx *, int);
+extern int xtensa_expand_scc (rtx *, enum machine_mode);
+extern int xtensa_expand_block_move (rtx *);
+extern void xtensa_split_operand_pair (rtx *, enum machine_mode);
+extern int xtensa_emit_move_sequence (rtx *, enum machine_mode);
+extern rtx xtensa_copy_incoming_a7 (rtx);
+extern void xtensa_expand_nonlocal_goto (rtx *);
+extern void xtensa_expand_compare_and_swap (rtx, rtx, rtx, rtx);
+extern void xtensa_expand_atomic (enum rtx_code, rtx, rtx, rtx, bool);
+extern void xtensa_emit_loop_end (rtx, rtx *);
+extern char *xtensa_emit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_bit_branch (bool, bool, rtx *);
+extern char *xtensa_emit_movcc (bool, bool, bool, rtx *);
+extern char *xtensa_emit_call (int, rtx *);
+extern bool xtensa_tls_referenced_p (rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
+#endif /* TREE_CODE */
+
+extern void print_operand (FILE *, rtx, int);
+extern void print_operand_address (FILE *, rtx);
+extern void xtensa_output_literal (FILE *, rtx, enum machine_mode, int);
+extern rtx xtensa_return_addr (int, rtx);
+#endif /* RTX_CODE */
+
+extern void xtensa_setup_frame_addresses (void);
+extern int xtensa_dbx_register_number (int);
+extern long compute_frame_size (int);
+extern void xtensa_expand_prologue (void);
+extern void order_regs_for_local_alloc (void);
+
+#endif /* !__XTENSA_PROTOS_H__ */
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
new file mode 100644
index 000000000..e7c395be5
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.c
@@ -0,0 +1,3715 @@
+/* Subroutines for insn-output.c for Tensilica's Xtensa architecture.
+   Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "insn-flags.h"
+#include "insn-attr.h"
+#include "insn-codes.h"
+#include "recog.h"
+#include "output.h"
+#include "tree.h"
+#include "expr.h"
+#include "flags.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "function.h"
+#include "diagnostic-core.h"
+#include "optabs.h"
+#include "libfuncs.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "gimple.h"
+#include "df.h"
+
+
+/* Enumeration for all of the relational tests, so that we can build
+   arrays indexed by the test type, and not worry about the order
+   of EQ, NE, etc.  */
+
+enum internal_test
+{
+  ITEST_EQ,
+  ITEST_NE,
+  ITEST_GT,
+  ITEST_GE,
+  ITEST_LT,
+  ITEST_LE,
+  ITEST_GTU,
+  ITEST_GEU,
+  ITEST_LTU,
+  ITEST_LEU,
+  ITEST_MAX
+};
+
+/* Array giving truth value on whether or not a given hard register
+   can support a given mode.  */
+char xtensa_hard_regno_mode_ok[(int) MAX_MACHINE_MODE][FIRST_PSEUDO_REGISTER];
+
+/* Current frame size calculated by compute_frame_size.  */
+unsigned xtensa_current_frame_size;
+
+/* Largest block move to handle in-line.  */
+#define LARGEST_MOVE_RATIO 15
+
+/* Define the structure for the machine field in struct function.  */
+struct GTY(()) machine_function
+{
+  int accesses_prev_frame;
+  bool need_a7_copy;
+  bool vararg_a7;
+  rtx vararg_a7_copy;
+  rtx set_frame_ptr_insn;
+};
+
+/* Vector, indexed by hard register number, which contains 1 for a
+   register that is allowable in a candidate for leaf function
+   treatment.  */
+
+const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER] =
+{
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1
+};
+
+/* Map hard register number to register class */
+const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER] =
+{
+  RL_REGS,	SP_REG,		RL_REGS,	RL_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	GR_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	RL_REGS,
+  RL_REGS,	RL_REGS,	RL_REGS,	RL_REGS,
+  AR_REGS,	AR_REGS,	BR_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  ACC_REG,
+};
+
+static void xtensa_option_override (void);
+static enum internal_test map_test_to_internal_test (enum rtx_code);
+static rtx gen_int_relational (enum rtx_code, rtx, rtx, int *);
+static rtx gen_float_relational (enum rtx_code, rtx, rtx);
+static rtx gen_conditional_move (enum rtx_code, enum machine_mode, rtx, rtx);
+static rtx fixup_subreg_mem (rtx);
+static struct machine_function * xtensa_init_machine_status (void);
+static rtx xtensa_legitimize_tls_address (rtx);
+static rtx xtensa_legitimize_address (rtx, rtx, enum machine_mode);
+static bool xtensa_mode_dependent_address_p (const_rtx);
+static bool xtensa_return_in_msb (const_tree);
+static void printx (FILE *, signed int);
+static void xtensa_function_epilogue (FILE *, HOST_WIDE_INT);
+static rtx xtensa_builtin_saveregs (void);
+static bool xtensa_legitimate_address_p (enum machine_mode, rtx, bool);
+static unsigned int xtensa_multibss_section_type_flags (tree, const char *,
+							int) ATTRIBUTE_UNUSED;
+static section *xtensa_select_rtx_section (enum machine_mode, rtx,
+					   unsigned HOST_WIDE_INT);
+static bool xtensa_rtx_costs (rtx, int, int, int *, bool);
+static int xtensa_register_move_cost (enum machine_mode, reg_class_t,
+				      reg_class_t);
+static int xtensa_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static tree xtensa_build_builtin_va_list (void);
+static bool xtensa_return_in_memory (const_tree, const_tree);
+static tree xtensa_gimplify_va_arg_expr (tree, tree, gimple_seq *,
+					 gimple_seq *);
+static void xtensa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+					 const_tree, bool);
+static rtx xtensa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
+				const_tree, bool);
+static rtx xtensa_function_incoming_arg (CUMULATIVE_ARGS *,
+					 enum machine_mode, const_tree, bool);
+static rtx xtensa_function_value (const_tree, const_tree, bool);
+static rtx xtensa_libcall_value (enum machine_mode, const_rtx);
+static bool xtensa_function_value_regno_p (const unsigned int);
+static unsigned int xtensa_function_arg_boundary (enum machine_mode,
+						  const_tree);
+static void xtensa_init_builtins (void);
+static tree xtensa_fold_builtin (tree, int, tree *, bool);
+static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void xtensa_va_start (tree, rtx);
+static bool xtensa_frame_pointer_required (void);
+static rtx xtensa_static_chain (const_tree, bool);
+static void xtensa_asm_trampoline_template (FILE *);
+static void xtensa_trampoline_init (rtx, tree, rtx);
+static bool xtensa_output_addr_const_extra (FILE *, rtx);
+
+static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
+static reg_class_t xtensa_secondary_reload (bool, rtx, reg_class_t,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+
+static bool constantpool_address_p (const_rtx addr);
+
+static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
+  REG_ALLOC_ORDER;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+
+static const struct default_options xtensa_option_optimization_table[] =
+  {
+    { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+    /* Reordering blocks for Xtensa is not a good idea unless the
+       compiler understands the range of conditional branches.
+       Currently all branch relaxation for Xtensa is handled in the
+       assembler, so GCC cannot do a good job of reordering blocks.
+       Do not enable reordering unless it is explicitly requested.  */
+    { OPT_LEVELS_ALL, OPT_freorder_blocks, NULL, 0 },
+    { OPT_LEVELS_NONE, 0, NULL, 0 }
+  };
+
+
+/* This macro generates the assembly code for function exit,
+   on machines that need it.  If FUNCTION_EPILOGUE is not defined
+   then individual return instructions are generated for each
+   return statement.  Args are same as for FUNCTION_PROLOGUE.  */
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE xtensa_function_epilogue
+
+/* These hooks specify assembly directives for creating certain kinds
+   of integer object.  */
+
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  xtensa_select_rtx_section
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS xtensa_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P xtensa_mode_dependent_address_p
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST xtensa_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST xtensa_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS xtensa_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST xtensa_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE xtensa_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE xtensa_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P xtensa_function_value_regno_p
+
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE xtensa_function_arg_advance
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG xtensa_function_arg
+#undef TARGET_FUNCTION_INCOMING_ARG
+#define TARGET_FUNCTION_INCOMING_ARG xtensa_function_incoming_arg
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY xtensa_function_arg_boundary
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS xtensa_builtin_saveregs
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR xtensa_gimplify_va_arg_expr
+
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB xtensa_return_in_msb
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS xtensa_init_builtins
+#undef  TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN xtensa_fold_builtin
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class
+#undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
+#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS xtensa_preferred_output_reload_class
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD xtensa_secondary_reload
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS (TARGET_THREADPTR && HAVE_AS_TLS)
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM xtensa_tls_referenced_p
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	xtensa_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED xtensa_frame_pointer_required
+
+#undef TARGET_STATIC_CHAIN
+#define TARGET_STATIC_CHAIN xtensa_static_chain
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE xtensa_asm_trampoline_template
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT xtensa_trampoline_init
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE xtensa_option_override
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE xtensa_option_optimization_table
+
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA xtensa_output_addr_const_extra
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Functions to test Xtensa immediate operand validity.  */
+
+bool
+xtensa_simm8 (HOST_WIDE_INT v)
+{
+  return v >= -128 && v <= 127;
+}
+
+
+bool
+xtensa_simm8x256 (HOST_WIDE_INT v)
+{
+  return (v & 255) == 0 && (v >= -32768 && v <= 32512);
+}
+
+
+bool
+xtensa_simm12b (HOST_WIDE_INT v)
+{
+  return v >= -2048 && v <= 2047;
+}
+
+
+static bool
+xtensa_uimm8 (HOST_WIDE_INT v)
+{
+  return v >= 0 && v <= 255;
+}
+
+
+static bool
+xtensa_uimm8x2 (HOST_WIDE_INT v)
+{
+  return (v & 1) == 0 && (v >= 0 && v <= 510);
+}
+
+
+static bool
+xtensa_uimm8x4 (HOST_WIDE_INT v)
+{
+  return (v & 3) == 0 && (v >= 0 && v <= 1020);
+}
+
+
+static bool
+xtensa_b4const (HOST_WIDE_INT v)
+{
+  switch (v)
+    {
+    case -1:
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+      return true;
+    }
+  return false;
+}
+
+
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
+{
+  if (v == 0)
+    return true;
+  return xtensa_b4const (v);
+}
+
+
+bool
+xtensa_b4constu (HOST_WIDE_INT v)
+{
+  switch (v)
+    {
+    case 32768:
+    case 65536:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case 8:
+    case 10:
+    case 12:
+    case 16:
+    case 32:
+    case 64:
+    case 128:
+    case 256:
+      return true;
+    }
+  return false;
+}
+
+
+bool
+xtensa_mask_immediate (HOST_WIDE_INT v)
+{
+#define MAX_MASK_SIZE 16
+  int mask_size;
+
+  for (mask_size = 1; mask_size <= MAX_MASK_SIZE; mask_size++)
+    {
+      if ((v & 1) == 0)
+	return false;
+      v = v >> 1;
+      if (v == 0)
+	return true;
+    }
+
+  return false;
+}
+
+
+/* This is just like the standard true_regnum() function except that it
+   works even when reg_renumber is not initialized.  */
+
+int
+xt_true_regnum (rtx x)
+{
+  if (GET_CODE (x) == REG)
+    {
+      if (reg_renumber
+	  && REGNO (x) >= FIRST_PSEUDO_REGISTER
+	  && reg_renumber[REGNO (x)] >= 0)
+	return reg_renumber[REGNO (x)];
+      return REGNO (x);
+    }
+  if (GET_CODE (x) == SUBREG)
+    {
+      int base = xt_true_regnum (SUBREG_REG (x));
+      if (base >= 0 && base < FIRST_PSEUDO_REGISTER)
+        return base + subreg_regno_offset (REGNO (SUBREG_REG (x)),
+                                           GET_MODE (SUBREG_REG (x)),
+                                           SUBREG_BYTE (x), GET_MODE (x));
+    }
+  return -1;
+}
+
+
+int
+xtensa_valid_move (enum machine_mode mode, rtx *operands)
+{
+  /* Either the destination or source must be a register, and the
+     MAC16 accumulator doesn't count.  */
+
+  if (register_operand (operands[0], mode))
+    {
+      int dst_regnum = xt_true_regnum (operands[0]);
+
+      /* The stack pointer can only be assigned with a MOVSP opcode.  */
+      if (dst_regnum == STACK_POINTER_REGNUM)
+	return (mode == SImode
+		&& register_operand (operands[1], mode)
+		&& !ACC_REG_P (xt_true_regnum (operands[1])));
+
+      if (!ACC_REG_P (dst_regnum))
+	return true;
+    }
+  if (register_operand (operands[1], mode))
+    {
+      int src_regnum = xt_true_regnum (operands[1]);
+      if (!ACC_REG_P (src_regnum))
+	return true;
+    }
+  return FALSE;
+}
+
+
+int
+smalloffset_mem_p (rtx op)
+{
+  if (GET_CODE (op) == MEM)
+    {
+      rtx addr = XEXP (op, 0);
+      if (GET_CODE (addr) == REG)
+	return BASE_REG_P (addr, 0);
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx offset = XEXP (addr, 0);
+	  HOST_WIDE_INT val;
+	  if (GET_CODE (offset) != CONST_INT)
+	    offset = XEXP (addr, 1);
+	  if (GET_CODE (offset) != CONST_INT)
+	    return FALSE;
+
+	  val = INTVAL (offset);
+	  return (val & 3) == 0 && (val >= 0 && val <= 60);
+	}
+    }
+  return FALSE;
+}
+
+
+static bool
+constantpool_address_p (const_rtx addr)
+{
+  const_rtx sym = addr;
+
+  if (GET_CODE (addr) == CONST)
+    {
+      rtx offset;
+
+      /* Only handle (PLUS (SYM, OFFSET)) form.  */
+      addr = XEXP (addr, 0);
+      if (GET_CODE (addr) != PLUS)
+	return false;
+
+      /* Make sure the address is word aligned.  */
+      offset = XEXP (addr, 1);
+      if ((!CONST_INT_P (offset))
+	  || ((INTVAL (offset) & 3) != 0))
+	return false;
+
+      sym = XEXP (addr, 0);
+    }
+
+  if ((GET_CODE (sym) == SYMBOL_REF)
+      && CONSTANT_POOL_ADDRESS_P (sym))
+    return true;
+  return false;
+}
+
+
+int
+constantpool_mem_p (rtx op)
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+  if (GET_CODE (op) == MEM)
+    return constantpool_address_p (XEXP (op, 0));
+  return FALSE;
+}
+
+
+/* Return TRUE if X is a thread-local symbol.  */
+
+static bool
+xtensa_tls_symbol_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+
+void
+xtensa_extend_reg (rtx dst, rtx src)
+{
+  rtx temp = gen_reg_rtx (SImode);
+  rtx shift = GEN_INT (BITS_PER_WORD - GET_MODE_BITSIZE (GET_MODE (src)));
+
+  /* Generate paradoxical subregs as needed so that the modes match.  */
+  src = simplify_gen_subreg (SImode, src, GET_MODE (src), 0);
+  dst = simplify_gen_subreg (SImode, dst, GET_MODE (dst), 0);
+
+  emit_insn (gen_ashlsi3 (temp, src, shift));
+  emit_insn (gen_ashrsi3 (dst, temp, shift));
+}
+
+
+bool
+xtensa_mem_offset (unsigned v, enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case BLKmode:
+      /* Handle the worst case for block moves.  See xtensa_expand_block_move
+	 where we emit an optimized block move operation if the block can be
+	 moved in < "move_ratio" pieces.  The worst case is when the block is
+	 aligned but has a size of (3 mod 4) (does this happen?) so that the
+	 last piece requires a byte load/store.  */
+      return (xtensa_uimm8 (v)
+	      && xtensa_uimm8 (v + MOVE_MAX * LARGEST_MOVE_RATIO));
+
+    case QImode:
+      return xtensa_uimm8 (v);
+
+    case HImode:
+      return xtensa_uimm8x2 (v);
+
+    case DFmode:
+      return (xtensa_uimm8x4 (v) && xtensa_uimm8x4 (v + 4));
+
+    default:
+      break;
+    }
+
+  return xtensa_uimm8x4 (v);
+}
+
+
+/* Make normal rtx_code into something we can index from an array.  */
+
+static enum internal_test
+map_test_to_internal_test (enum rtx_code test_code)
+{
+  enum internal_test test = ITEST_MAX;
+
+  switch (test_code)
+    {
+    default:			break;
+    case EQ:  test = ITEST_EQ;  break;
+    case NE:  test = ITEST_NE;  break;
+    case GT:  test = ITEST_GT;  break;
+    case GE:  test = ITEST_GE;  break;
+    case LT:  test = ITEST_LT;  break;
+    case LE:  test = ITEST_LE;  break;
+    case GTU: test = ITEST_GTU; break;
+    case GEU: test = ITEST_GEU; break;
+    case LTU: test = ITEST_LTU; break;
+    case LEU: test = ITEST_LEU; break;
+    }
+
+  return test;
+}
+
+
+/* Generate the code to compare two integer values.  The return value is
+   the comparison expression.  */
+
+static rtx
+gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+		    rtx cmp0, /* first operand to compare */
+		    rtx cmp1, /* second operand to compare */
+		    int *p_invert /* whether branch needs to reverse test */)
+{
+  struct cmp_info
+  {
+    enum rtx_code test_code;	/* test code to use in insn */
+    bool (*const_range_p) (HOST_WIDE_INT); /* range check function */
+    int const_add;		/* constant to add (convert LE -> LT) */
+    int reverse_regs;		/* reverse registers in test */
+    int invert_const;		/* != 0 if invert value if cmp1 is constant */
+    int invert_reg;		/* != 0 if invert value if cmp1 is register */
+    int unsignedp;		/* != 0 for unsigned comparisons.  */
+  };
+
+  static struct cmp_info info[ (int)ITEST_MAX ] = {
+
+    { EQ,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* EQ  */
+    { NE,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* NE  */
+
+    { LT,	xtensa_b4const_or_zero,	1, 1, 1, 0, 0 },	/* GT  */
+    { GE,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* GE  */
+    { LT,	xtensa_b4const_or_zero,	0, 0, 0, 0, 0 },	/* LT  */
+    { GE,	xtensa_b4const_or_zero,	1, 1, 1, 0, 0 },	/* LE  */
+
+    { LTU,	xtensa_b4constu,	1, 1, 1, 0, 1 },	/* GTU */
+    { GEU,	xtensa_b4constu,	0, 0, 0, 0, 1 },	/* GEU */
+    { LTU,	xtensa_b4constu,	0, 0, 0, 0, 1 },	/* LTU */
+    { GEU,	xtensa_b4constu,	1, 1, 1, 0, 1 },	/* LEU */
+  };
+
+  enum internal_test test;
+  enum machine_mode mode;
+  struct cmp_info *p_info;
+
+  test = map_test_to_internal_test (test_code);
+  gcc_assert (test != ITEST_MAX);
+
+  p_info = &info[ (int)test ];
+
+  mode = GET_MODE (cmp0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (cmp1);
+
+  /* Make sure we can handle any constants given to us.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      HOST_WIDE_INT value = INTVAL (cmp1);
+      unsigned HOST_WIDE_INT uvalue = (unsigned HOST_WIDE_INT)value;
+
+      /* if the immediate overflows or does not fit in the immediate field,
+	 spill it to a register */
+
+      if ((p_info->unsignedp ?
+	   (uvalue + p_info->const_add > uvalue) :
+	   (value + p_info->const_add > value)) != (p_info->const_add > 0))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	}
+      else if (!(p_info->const_range_p) (value + p_info->const_add))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	}
+    }
+  else if ((GET_CODE (cmp1) != REG) && (GET_CODE (cmp1) != SUBREG))
+    {
+      cmp1 = force_reg (mode, cmp1);
+    }
+
+  /* See if we need to invert the result.  */
+  *p_invert = ((GET_CODE (cmp1) == CONST_INT)
+	       ? p_info->invert_const
+	       : p_info->invert_reg);
+
+  /* Comparison to constants, may involve adding 1 to change a LT into LE.
+     Comparison between two registers, may involve switching operands.  */
+  if (GET_CODE (cmp1) == CONST_INT)
+    {
+      if (p_info->const_add != 0)
+	cmp1 = GEN_INT (INTVAL (cmp1) + p_info->const_add);
+
+    }
+  else if (p_info->reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  return gen_rtx_fmt_ee (p_info->test_code, VOIDmode, cmp0, cmp1);
+}
+
+
+/* Generate the code to compare two float values.  The return value is
+   the comparison expression.  */
+
+static rtx
+gen_float_relational (enum rtx_code test_code, /* relational test (EQ, etc) */
+		      rtx cmp0, /* first operand to compare */
+		      rtx cmp1 /* second operand to compare */)
+{
+  rtx (*gen_fn) (rtx, rtx, rtx);
+  rtx brtmp;
+  int reverse_regs, invert;
+
+  switch (test_code)
+    {
+    case EQ: reverse_regs = 0; invert = 0; gen_fn = gen_seq_sf; break;
+    case NE: reverse_regs = 0; invert = 1; gen_fn = gen_seq_sf; break;
+    case LE: reverse_regs = 0; invert = 0; gen_fn = gen_sle_sf; break;
+    case GT: reverse_regs = 1; invert = 0; gen_fn = gen_slt_sf; break;
+    case LT: reverse_regs = 0; invert = 0; gen_fn = gen_slt_sf; break;
+    case GE: reverse_regs = 1; invert = 0; gen_fn = gen_sle_sf; break;
+    case UNEQ: reverse_regs = 0; invert = 0; gen_fn = gen_suneq_sf; break;
+    case LTGT: reverse_regs = 0; invert = 1; gen_fn = gen_suneq_sf; break;
+    case UNLE: reverse_regs = 0; invert = 0; gen_fn = gen_sunle_sf; break;
+    case UNGT: reverse_regs = 1; invert = 0; gen_fn = gen_sunlt_sf; break;
+    case UNLT: reverse_regs = 0; invert = 0; gen_fn = gen_sunlt_sf; break;
+    case UNGE: reverse_regs = 1; invert = 0; gen_fn = gen_sunle_sf; break;
+    case UNORDERED:
+      reverse_regs = 0; invert = 0; gen_fn = gen_sunordered_sf; break;
+    case ORDERED:
+      reverse_regs = 0; invert = 1; gen_fn = gen_sunordered_sf; break;
+    default:
+      fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+      reverse_regs = 0; invert = 0; gen_fn = 0; /* avoid compiler warnings */
+    }
+
+  if (reverse_regs)
+    {
+      rtx temp = cmp0;
+      cmp0 = cmp1;
+      cmp1 = temp;
+    }
+
+  brtmp = gen_rtx_REG (CCmode, FPCC_REGNUM);
+  emit_insn (gen_fn (brtmp, cmp0, cmp1));
+
+  return gen_rtx_fmt_ee (invert ? EQ : NE, VOIDmode, brtmp, const0_rtx);
+}
+
+
+void
+xtensa_expand_conditional_branch (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code test_code = GET_CODE (operands[0]);
+  rtx cmp0 = operands[1];
+  rtx cmp1 = operands[2];
+  rtx cmp;
+  int invert;
+  rtx label1, label2;
+
+  switch (mode)
+    {
+    case DFmode:
+    default:
+      fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode, cmp0, cmp1));
+
+    case SImode:
+      invert = FALSE;
+      cmp = gen_int_relational (test_code, cmp0, cmp1, &invert);
+      break;
+
+    case SFmode:
+      if (!TARGET_HARD_FLOAT)
+	fatal_insn ("bad test", gen_rtx_fmt_ee (test_code, VOIDmode,
+						cmp0, cmp1));
+      invert = FALSE;
+      cmp = gen_float_relational (test_code, cmp0, cmp1);
+      break;
+    }
+
+  /* Generate the branch.  */
+
+  label1 = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  label2 = pc_rtx;
+
+  if (invert)
+    {
+      label2 = label1;
+      label1 = pc_rtx;
+    }
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode, cmp,
+						     label1,
+						     label2)));
+}
+
+
+static rtx
+gen_conditional_move (enum rtx_code code, enum machine_mode mode,
+		      rtx op0, rtx op1)
+{
+  if (mode == SImode)
+    {
+      rtx cmp;
+
+      /* Jump optimization calls get_condition() which canonicalizes
+	 comparisons like (GE x <const>) to (GT x <const-1>).
+	 Transform those comparisons back to GE, since that is the
+	 comparison supported in Xtensa.  We shouldn't have to
+	 transform <LE x const> comparisons, because neither
+	 xtensa_expand_conditional_branch() nor get_condition() will
+	 produce them.  */
+
+      if ((code == GT) && (op1 == constm1_rtx))
+	{
+	  code = GE;
+	  op1 = const0_rtx;
+	}
+      cmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
+
+      if (boolean_operator (cmp, VOIDmode))
+	{
+	  /* Swap the operands to make const0 second.  */
+	  if (op0 == const0_rtx)
+	    {
+	      op0 = op1;
+	      op1 = const0_rtx;
+	    }
+
+	  /* If not comparing against zero, emit a comparison (subtract).  */
+	  if (op1 != const0_rtx)
+	    {
+	      op0 = expand_binop (SImode, sub_optab, op0, op1,
+				  0, 0, OPTAB_LIB_WIDEN);
+	      op1 = const0_rtx;
+	    }
+	}
+      else if (branch_operator (cmp, VOIDmode))
+	{
+	  /* Swap the operands to make const0 second.  */
+	  if (op0 == const0_rtx)
+	    {
+	      op0 = op1;
+	      op1 = const0_rtx;
+
+	      switch (code)
+		{
+		case LT: code = GE; break;
+		case GE: code = LT; break;
+		default: gcc_unreachable ();
+		}
+	    }
+
+	  if (op1 != const0_rtx)
+	    return 0;
+	}
+      else
+	return 0;
+
+      return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+    }
+
+  if (TARGET_HARD_FLOAT && mode == SFmode)
+    return gen_float_relational (code, op0, op1);
+
+  return 0;
+}
+
+
+int
+xtensa_expand_conditional_move (rtx *operands, int isflt)
+{
+  rtx dest = operands[0];
+  rtx cmp = operands[1];
+  enum machine_mode cmp_mode = GET_MODE (XEXP (cmp, 0));
+  rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+  if (!(cmp = gen_conditional_move (GET_CODE (cmp), cmp_mode,
+				    XEXP (cmp, 0), XEXP (cmp, 1))))
+    return 0;
+
+  if (isflt)
+    gen_fn = (cmp_mode == SImode
+	      ? gen_movsfcc_internal0
+	      : gen_movsfcc_internal1);
+  else
+    gen_fn = (cmp_mode == SImode
+	      ? gen_movsicc_internal0
+	      : gen_movsicc_internal1);
+
+  emit_insn (gen_fn (dest, XEXP (cmp, 0), operands[2], operands[3], cmp));
+  return 1;
+}
+
+
+int
+xtensa_expand_scc (rtx operands[4], enum machine_mode cmp_mode)
+{
+  rtx dest = operands[0];
+  rtx cmp;
+  rtx one_tmp, zero_tmp;
+  rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
+
+  if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
+				    operands[2], operands[3])))
+    return 0;
+
+  one_tmp = gen_reg_rtx (SImode);
+  zero_tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (one_tmp, const_true_rtx));
+  emit_insn (gen_movsi (zero_tmp, const0_rtx));
+
+  gen_fn = (cmp_mode == SImode
+	    ? gen_movsicc_internal0
+	    : gen_movsicc_internal1);
+  emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp));
+  return 1;
+}
+
+
+/* Split OP[1] into OP[2,3] and likewise for OP[0] into OP[0,1].  MODE is
+   for the output, i.e., the input operands are twice as big as MODE.  */
+
+void
+xtensa_split_operand_pair (rtx operands[4], enum machine_mode mode)
+{
+  switch (GET_CODE (operands[1]))
+    {
+    case REG:
+      operands[3] = gen_rtx_REG (mode, REGNO (operands[1]) + 1);
+      operands[2] = gen_rtx_REG (mode, REGNO (operands[1]));
+      break;
+
+    case MEM:
+      operands[3] = adjust_address (operands[1], mode, GET_MODE_SIZE (mode));
+      operands[2] = adjust_address (operands[1], mode, 0);
+      break;
+
+    case CONST_INT:
+    case CONST_DOUBLE:
+      split_double (operands[1], &operands[2], &operands[3]);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      operands[1] = gen_rtx_REG (mode, REGNO (operands[0]) + 1);
+      operands[0] = gen_rtx_REG (mode, REGNO (operands[0]));
+      break;
+
+    case MEM:
+      operands[1] = adjust_address (operands[0], mode, GET_MODE_SIZE (mode));
+      operands[0] = adjust_address (operands[0], mode, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Emit insns to move operands[1] into operands[0].
+   Return 1 if we have written out everything that needs to be done to
+   do the move.  Otherwise, return 0 and the caller will emit the move
+   normally.  */
+
+int
+xtensa_emit_move_sequence (rtx *operands, enum machine_mode mode)
+{
+  rtx src = operands[1];
+
+  if (CONSTANT_P (src)
+      && (GET_CODE (src) != CONST_INT || ! xtensa_simm12b (INTVAL (src))))
+    {
+      rtx dst = operands[0];
+
+      if (xtensa_tls_referenced_p (src))
+	{
+	  rtx addend = NULL;
+
+	  if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS)
+	    {
+	      addend = XEXP (XEXP (src, 0), 1);
+	      src = XEXP (XEXP (src, 0), 0);
+	    }
+
+	  src = xtensa_legitimize_tls_address (src);
+	  if (addend)
+	    {
+	      src = gen_rtx_PLUS (mode, src, addend);
+	      src = force_operand (src, dst);
+	    }
+	  emit_move_insn (dst, src);
+	  return 1;
+	}
+
+      if (! TARGET_CONST16)
+	{
+	  src = force_const_mem (SImode, src);
+	  operands[1] = src;
+	}
+
+      /* PC-relative loads are always SImode, and CONST16 is only
+	 supported in the movsi pattern, so add a SUBREG for any other
+	 (smaller) mode.  */
+
+      if (mode != SImode)
+	{
+	  if (register_operand (dst, mode))
+	    {
+	      emit_move_insn (simplify_gen_subreg (SImode, dst, mode, 0), src);
+	      return 1;
+	    }
+	  else
+	    {
+	      src = force_reg (SImode, src);
+	      src = gen_lowpart_SUBREG (mode, src);
+	      operands[1] = src;
+	    }
+	}
+    }
+
+  if (!(reload_in_progress | reload_completed)
+      && !xtensa_valid_move (mode, operands))
+    operands[1] = force_reg (mode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+
+  /* During reload we don't want to emit (subreg:X (mem:Y)) since that
+     instruction won't be recognized after reload, so we remove the
+     subreg and adjust mem accordingly.  */
+  if (reload_in_progress)
+    {
+      operands[0] = fixup_subreg_mem (operands[0]);
+      operands[1] = fixup_subreg_mem (operands[1]);
+    }
+  return 0;
+}
+
+
+static rtx
+fixup_subreg_mem (rtx x)
+{
+  if (GET_CODE (x) == SUBREG
+      && GET_CODE (SUBREG_REG (x)) == REG
+      && REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER)
+    {
+      rtx temp =
+	gen_rtx_SUBREG (GET_MODE (x),
+			reg_equiv_mem [REGNO (SUBREG_REG (x))],
+			SUBREG_BYTE (x));
+      x = alter_subreg (&temp);
+    }
+  return x;
+}
+
+
+/* Check if an incoming argument in a7 is expected to be used soon and
+   if OPND is a register or register pair that includes a7.  If so,
+   create a new pseudo and copy a7 into that pseudo at the very
+   beginning of the function, followed by the special "set_frame_ptr"
+   unspec_volatile insn.  The return value is either the original
+   operand, if it is not a7, or the new pseudo containing a copy of
+   the incoming argument.  This is necessary because the register
+   allocator will ignore conflicts with a7 and may either assign some
+   other pseudo to a7 or use a7 as the hard_frame_pointer, clobbering
+   the incoming argument in a7.  By copying the argument out of a7 as
+   the very first thing, and then immediately following that with an
+   unspec_volatile to keep the scheduler away, we should avoid any
+   problems.  Putting the set_frame_ptr insn at the beginning, with
+   only the a7 copy before it, also makes it easier for the prologue
+   expander to initialize the frame pointer after the a7 copy and to
+   fix up the a7 copy to use the stack pointer instead of the frame
+   pointer.  */
+
+rtx
+xtensa_copy_incoming_a7 (rtx opnd)
+{
+  rtx entry_insns = 0;
+  rtx reg, tmp;
+  enum machine_mode mode;
+
+  if (!cfun->machine->need_a7_copy)
+    return opnd;
+
+  /* This function should never be called again once a7 has been copied.  */
+  gcc_assert (!cfun->machine->set_frame_ptr_insn);
+
+  mode = GET_MODE (opnd);
+
+  /* The operand using a7 may come in a later instruction, so just return
+     the original operand if it doesn't use a7.  */
+  reg = opnd;
+  if (GET_CODE (reg) == SUBREG)
+    {
+      gcc_assert (SUBREG_BYTE (reg) == 0);
+      reg = SUBREG_REG (reg);
+    }
+  if (GET_CODE (reg) != REG
+      || REGNO (reg) > A7_REG
+      || REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) <= A7_REG)
+    return opnd;
+
+  /* 1-word args will always be in a7; 2-word args in a6/a7.  */
+  gcc_assert (REGNO (reg) + HARD_REGNO_NREGS (A7_REG, mode) - 1 == A7_REG);
+
+  cfun->machine->need_a7_copy = false;
+
+  /* Copy a7 to a new pseudo at the function entry.  Use gen_raw_REG to
+     create the REG for a7 so that hard_frame_pointer_rtx is not used.  */
+
+  start_sequence ();
+  tmp = gen_reg_rtx (mode);
+
+  switch (mode)
+    {
+    case DFmode:
+    case DImode:
+      /* Copy the value out of A7 here but keep the first word in A6 until
+	 after the set_frame_ptr insn.  Otherwise, the register allocator
+	 may decide to put "subreg (tmp, 0)" in A7 and clobber the incoming
+	 value.  */
+      emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 4),
+				     gen_raw_REG (SImode, A7_REG)));
+      break;
+    case SFmode:
+      emit_insn (gen_movsf_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case SImode:
+      emit_insn (gen_movsi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case HImode:
+      emit_insn (gen_movhi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    case QImode:
+      emit_insn (gen_movqi_internal (tmp, gen_raw_REG (mode, A7_REG)));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  cfun->machine->set_frame_ptr_insn = emit_insn (gen_set_frame_ptr ());
+
+  /* For DF and DI mode arguments, copy the incoming value in A6 now.  */
+  if (mode == DFmode || mode == DImode)
+    emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
+				   gen_rtx_REG (SImode, A7_REG - 1)));
+  entry_insns = get_insns ();
+  end_sequence ();
+
+  if (cfun->machine->vararg_a7)
+    {
+      /* This is called from within builtin_saveregs, which will insert the
+	 saveregs code at the function entry, ahead of anything placed at
+	 the function entry now.  Instead, save the sequence to be inserted
+	 at the beginning of the saveregs code.  */
+      cfun->machine->vararg_a7_copy = entry_insns;
+    }
+  else
+    {
+      /* Put entry_insns after the NOTE that starts the function.  If
+	 this is inside a start_sequence, make the outer-level insn
+	 chain current, so the code is placed at the start of the
+	 function.  */
+      push_topmost_sequence ();
+      /* Do not use entry_of_function() here.  This is called from within
+	 expand_function_start, when the CFG still holds GIMPLE.  */
+      emit_insn_after (entry_insns, get_insns ());
+      pop_topmost_sequence ();
+    }
+
+  return tmp;
+}
+
+
+/* Try to expand a block move operation to a sequence of RTL move
+   instructions.  If not optimizing, or if the block size is not a
+   constant, or if the block is too large, the expansion fails and GCC
+   falls back to calling memcpy().
+
+   operands[0] is the destination
+   operands[1] is the source
+   operands[2] is the length
+   operands[3] is the alignment */
+
+int
+xtensa_expand_block_move (rtx *operands)
+{
+  static const enum machine_mode mode_from_align[] =
+  {
+    VOIDmode, QImode, HImode, VOIDmode, SImode,
+  };
+
+  rtx dst_mem = operands[0];
+  rtx src_mem = operands[1];
+  HOST_WIDE_INT bytes, align;
+  int num_pieces, move_ratio;
+  rtx temp[2];
+  enum machine_mode mode[2];
+  int amount[2];
+  bool active[2];
+  int phase = 0;
+  int next;
+  int offset_ld = 0;
+  int offset_st = 0;
+  rtx x;
+
+  /* If this is not a fixed size move, just call memcpy.  */
+  if (!optimize || (GET_CODE (operands[2]) != CONST_INT))
+    return 0;
+
+  bytes = INTVAL (operands[2]);
+  align = INTVAL (operands[3]);
+
+  /* Anything to move?  */
+  if (bytes <= 0)
+    return 0;
+
+  if (align > MOVE_MAX)
+    align = MOVE_MAX;
+
+  /* Decide whether to expand inline based on the optimization level.  */
+  move_ratio = 4;
+  if (optimize > 2)
+    move_ratio = LARGEST_MOVE_RATIO;
+  num_pieces = (bytes / align) + (bytes % align); /* Close enough anyway.  */
+  if (num_pieces > move_ratio)
+    return 0;
+
+  x = XEXP (dst_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      dst_mem = replace_equiv_address (dst_mem, x);
+    }
+
+  x = XEXP (src_mem, 0);
+  if (!REG_P (x))
+    {
+      x = force_reg (Pmode, x);
+      src_mem = replace_equiv_address (src_mem, x);
+    }
+
+  active[0] = active[1] = false;
+
+  do
+    {
+      next = phase;
+      phase ^= 1;
+
+      if (bytes > 0)
+	{
+	  int next_amount;
+
+	  next_amount = (bytes >= 4 ? 4 : (bytes >= 2 ? 2 : 1));
+	  next_amount = MIN (next_amount, align);
+
+	  amount[next] = next_amount;
+	  mode[next] = mode_from_align[next_amount];
+	  temp[next] = gen_reg_rtx (mode[next]);
+
+	  x = adjust_address (src_mem, mode[next], offset_ld);
+	  emit_insn (gen_rtx_SET (VOIDmode, temp[next], x));
+
+	  offset_ld += next_amount;
+	  bytes -= next_amount;
+	  active[next] = true;
+	}
+
+      if (active[phase])
+	{
+	  active[phase] = false;
+	  
+	  x = adjust_address (dst_mem, mode[phase], offset_st);
+	  emit_insn (gen_rtx_SET (VOIDmode, x, temp[phase]));
+
+	  offset_st += amount[phase];
+	}
+    }
+  while (active[next]);
+
+  return 1;
+}
+
+
+void
+xtensa_expand_nonlocal_goto (rtx *operands)
+{
+  rtx goto_handler = operands[1];
+  rtx containing_fp = operands[3];
+
+  /* Generate a call to "__xtensa_nonlocal_goto" (in libgcc); the code
+     is too big to generate in-line.  */
+
+  if (GET_CODE (containing_fp) != REG)
+    containing_fp = force_reg (Pmode, containing_fp);
+
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_nonlocal_goto"),
+		     LCT_NORMAL, VOIDmode, 2,
+		     containing_fp, Pmode,
+		     goto_handler, Pmode);
+}
+
+
+static struct machine_function *
+xtensa_init_machine_status (void)
+{
+  return ggc_alloc_cleared_machine_function ();
+}
+
+
+/* Shift VAL of mode MODE left by COUNT bits.  */
+
+static inline rtx
+xtensa_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
+{
+  val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
+			     NULL_RTX, 1, OPTAB_DIRECT);
+  return expand_simple_binop (SImode, ASHIFT, val, count,
+			      NULL_RTX, 1, OPTAB_DIRECT);
+}
+
+
+/* Structure to hold the initial parameters for a compare_and_swap operation
+   in HImode and QImode.  */
+
+struct alignment_context
+{
+  rtx memsi;	  /* SI aligned memory location.  */
+  rtx shift;	  /* Bit offset with regard to lsb.  */
+  rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
+  rtx modemaski;  /* ~modemask */
+};
+
+
+/* Initialize structure AC for word access to HI and QI mode memory.  */
+
+static void
+init_alignment_context (struct alignment_context *ac, rtx mem)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  rtx byteoffset = NULL_RTX;
+  bool aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
+
+  if (aligned)
+    ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
+  else
+    {
+      /* Alignment is unknown.  */
+      rtx addr, align;
+
+      /* Force the address into a register.  */
+      addr = force_reg (Pmode, XEXP (mem, 0));
+
+      /* Align it to SImode.  */
+      align = expand_simple_binop (Pmode, AND, addr,
+				   GEN_INT (-GET_MODE_SIZE (SImode)),
+				   NULL_RTX, 1, OPTAB_DIRECT);
+      /* Generate MEM.  */
+      ac->memsi = gen_rtx_MEM (SImode, align);
+      MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
+      set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
+      set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
+
+      byteoffset = expand_simple_binop (Pmode, AND, addr,
+					GEN_INT (GET_MODE_SIZE (SImode) - 1),
+					NULL_RTX, 1, OPTAB_DIRECT);
+    }
+
+  /* Calculate shiftcount.  */
+  if (TARGET_BIG_ENDIAN)
+    {
+      ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
+      if (!aligned)
+	ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
+					 NULL_RTX, 1, OPTAB_DIRECT);
+    }
+  else
+    {
+      if (aligned)
+	ac->shift = NULL_RTX;
+      else
+	ac->shift = byteoffset;
+    }
+
+  if (ac->shift != NULL_RTX)
+    {
+      /* Shift is the byte count, but we need the bitcount.  */
+      ac->shift = expand_simple_binop (SImode, MULT, ac->shift,
+				       GEN_INT (BITS_PER_UNIT),
+				       NULL_RTX, 1, OPTAB_DIRECT);
+      ac->modemask = expand_simple_binop (SImode, ASHIFT,
+					  GEN_INT (GET_MODE_MASK (mode)),
+					  ac->shift,
+					  NULL_RTX, 1, OPTAB_DIRECT);
+    }
+  else
+    ac->modemask = GEN_INT (GET_MODE_MASK (mode));
+
+  ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
+}
+
+
+/* Expand an atomic compare and swap operation for HImode and QImode.
+   MEM is the memory location, CMP the old value to compare MEM with
+   and NEW_RTX the value to set if CMP == MEM.  */
+
+void
+xtensa_expand_compare_and_swap (rtx target, rtx mem, rtx cmp, rtx new_rtx)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct alignment_context ac;
+  rtx tmp, cmpv, newv, val;
+  rtx oldval = gen_reg_rtx (SImode);
+  rtx res = gen_reg_rtx (SImode);
+  rtx csloop = gen_label_rtx ();
+  rtx csend = gen_label_rtx ();
+
+  init_alignment_context (&ac, mem);
+
+  if (ac.shift != NULL_RTX)
+    {
+      cmp = xtensa_expand_mask_and_shift (cmp, mode, ac.shift);
+      new_rtx = xtensa_expand_mask_and_shift (new_rtx, mode, ac.shift);
+    }
+
+  /* Load the surrounding word into VAL with the MEM value masked out.  */
+  val = force_reg (SImode, expand_simple_binop (SImode, AND, ac.memsi,
+						ac.modemaski, NULL_RTX, 1,
+						OPTAB_DIRECT));
+  emit_label (csloop);
+
+  /* Patch CMP and NEW_RTX into VAL at correct position.  */
+  cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
+						 NULL_RTX, 1, OPTAB_DIRECT));
+  newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
+						 NULL_RTX, 1, OPTAB_DIRECT));
+
+  /* Jump to end if we're done.  */
+  emit_insn (gen_sync_compare_and_swapsi (res, ac.memsi, cmpv, newv));
+  emit_cmp_and_jump_insns (res, cmpv, EQ, const0_rtx, SImode, true, csend);
+
+  /* Check for changes outside mode.  */
+  emit_move_insn (oldval, val);
+  tmp = expand_simple_binop (SImode, AND, res, ac.modemaski,
+			     val, 1, OPTAB_DIRECT);
+  if (tmp != val)
+    emit_move_insn (val, tmp);
+
+  /* Loop internal if so.  */
+  emit_cmp_and_jump_insns (oldval, val, NE, const0_rtx, SImode, true, csloop);
+
+  emit_label (csend);
+
+  /* Return the correct part of the bitfield.  */
+  convert_move (target,
+		(ac.shift == NULL_RTX ? res
+		 : expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+					NULL_RTX, 1, OPTAB_DIRECT)),
+		1);
+}
+
+
+/* Expand an atomic operation CODE of mode MODE (either HImode or QImode --
+   the default expansion works fine for SImode).  MEM is the memory location
+   and VAL the value to play with.  If AFTER is true then store the value
+   MEM holds after the operation, if AFTER is false then store the value MEM
+   holds before the operation.  If TARGET is zero then discard that value, else
+   store it to TARGET.  */
+
+void
+xtensa_expand_atomic (enum rtx_code code, rtx target, rtx mem, rtx val,
+		      bool after)
+{
+  enum machine_mode mode = GET_MODE (mem);
+  struct alignment_context ac;
+  rtx csloop = gen_label_rtx ();
+  rtx cmp, tmp;
+  rtx old = gen_reg_rtx (SImode);
+  rtx new_rtx = gen_reg_rtx (SImode);
+  rtx orig = NULL_RTX;
+
+  init_alignment_context (&ac, mem);
+
+  /* Prepare values before the compare-and-swap loop.  */
+  if (ac.shift != NULL_RTX)
+    val = xtensa_expand_mask_and_shift (val, mode, ac.shift);
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      orig = gen_reg_rtx (SImode);
+      convert_move (orig, val, 1);
+      break;
+
+    case SET:
+    case IOR:
+    case XOR:
+      break;
+
+    case MULT: /* NAND */
+    case AND:
+      /* val = "11..1<val>11..1" */
+      val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Load full word.  Subsequent loads are performed by S32C1I.  */
+  cmp = force_reg (SImode, ac.memsi);
+
+  emit_label (csloop);
+  emit_move_insn (old, cmp);
+
+  switch (code)
+    {
+    case PLUS:
+    case MINUS:
+      val = expand_simple_binop (SImode, code, old, orig,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      val = expand_simple_binop (SImode, AND, val, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      /* FALLTHRU */
+    case SET:
+      tmp = expand_simple_binop (SImode, AND, old, ac.modemaski,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (SImode, IOR, tmp, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    case AND:
+    case IOR:
+    case XOR:
+      tmp = expand_simple_binop (SImode, code, old, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    case MULT: /* NAND */
+      tmp = expand_simple_binop (SImode, XOR, old, ac.modemask,
+				 NULL_RTX, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (SImode, AND, tmp, val,
+				 new_rtx, 1, OPTAB_DIRECT);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (tmp != new_rtx)
+    emit_move_insn (new_rtx, tmp);
+  emit_insn (gen_sync_compare_and_swapsi (cmp, ac.memsi, old, new_rtx));
+  emit_cmp_and_jump_insns (cmp, old, NE, const0_rtx, SImode, true, csloop);
+
+  if (target)
+    {
+      tmp = (after ? new_rtx : cmp);
+      convert_move (target,
+		    (ac.shift == NULL_RTX ? tmp
+		     : expand_simple_binop (SImode, LSHIFTRT, tmp, ac.shift,
+					    NULL_RTX, 1, OPTAB_DIRECT)),
+		    1);
+    }
+}
+
+
+void
+xtensa_setup_frame_addresses (void)
+{
+  /* Set flag to cause TARGET_FRAME_POINTER_REQUIRED to return true.  */
+  cfun->machine->accesses_prev_frame = 1;
+
+  emit_library_call
+    (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_libgcc_window_spill"),
+     LCT_NORMAL, VOIDmode, 0);
+}
+
+
+/* Emit the assembly for the end of a zero-cost loop.  Normally we just emit
+   a comment showing where the end of the loop is.  However, if there is a
+   label or a branch at the end of the loop then we need to place a nop
+   there.  If the loop ends with a label we need the nop so that branches
+   targeting that label will target the nop (and thus remain in the loop),
+   instead of targeting the instruction after the loop (and thus exiting
+   the loop).  If the loop ends with a branch, we need the nop in case the
+   branch is targeting a location inside the loop.  When the branch
+   executes it will cause the loop count to be decremented even if it is
+   taken (because it is the last instruction in the loop), so we need to
+   nop after the branch to prevent the loop count from being decremented
+   when the branch is taken.  */
+
+void
+xtensa_emit_loop_end (rtx insn, rtx *operands)
+{
+  char done = 0;
+
+  for (insn = PREV_INSN (insn); insn && !done; insn = PREV_INSN (insn))
+    {
+      switch (GET_CODE (insn))
+	{
+	case NOTE:
+	case BARRIER:
+	  break;
+
+	case CODE_LABEL:
+	  output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+	  done = 1;
+	  break;
+
+	default:
+	  {
+	    rtx body = PATTERN (insn);
+
+	    if (GET_CODE (body) == JUMP_INSN)
+	      {
+		output_asm_insn (TARGET_DENSITY ? "nop.n" : "nop", operands);
+		done = 1;
+	      }
+	    else if ((GET_CODE (body) != USE)
+		     && (GET_CODE (body) != CLOBBER))
+	      done = 1;
+	  }
+	  break;
+        }
+    }
+
+  output_asm_insn ("# loop end for %0", operands);
+}
+
+
+char *
+xtensa_emit_branch (bool inverted, bool immed, rtx *operands)
+{
+  static char result[64];
+  enum rtx_code code;
+  const char *op;
+
+  code = GET_CODE (operands[3]);
+  switch (code)
+    {
+    case EQ:	op = inverted ? "ne" : "eq"; break;
+    case NE:	op = inverted ? "eq" : "ne"; break;
+    case LT:	op = inverted ? "ge" : "lt"; break;
+    case GE:	op = inverted ? "lt" : "ge"; break;
+    case LTU:	op = inverted ? "geu" : "ltu"; break;
+    case GEU:	op = inverted ? "ltu" : "geu"; break;
+    default:	gcc_unreachable ();
+    }
+
+  if (immed)
+    {
+      if (INTVAL (operands[1]) == 0)
+	sprintf (result, "b%sz%s\t%%0, %%2", op,
+		 (TARGET_DENSITY && (code == EQ || code == NE)) ? ".n" : "");
+      else
+	sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+    }
+  else
+    sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+  return result;
+}
+
+
+char *
+xtensa_emit_bit_branch (bool inverted, bool immed, rtx *operands)
+{
+  static char result[64];
+  const char *op;
+
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:	op = inverted ? "bs" : "bc"; break;
+    case NE:	op = inverted ? "bc" : "bs"; break;
+    default:	gcc_unreachable ();
+    }
+
+  if (immed)
+    {
+      unsigned bitnum = INTVAL (operands[1]) & 0x1f; 
+      operands[1] = GEN_INT (bitnum); 
+      sprintf (result, "b%si\t%%0, %%d1, %%2", op);
+    }
+  else
+    sprintf (result, "b%s\t%%0, %%1, %%2", op);
+
+  return result;
+}
+
+
+char *
+xtensa_emit_movcc (bool inverted, bool isfp, bool isbool, rtx *operands)
+{
+  static char result[64];
+  enum rtx_code code;
+  const char *op;
+
+  code = GET_CODE (operands[4]);
+  if (isbool)
+    {
+      switch (code)
+	{
+	case EQ:	op = inverted ? "t" : "f"; break;
+	case NE:	op = inverted ? "f" : "t"; break;
+	default:	gcc_unreachable ();
+	}
+    }
+  else
+    {
+      switch (code)
+	{
+	case EQ:	op = inverted ? "nez" : "eqz"; break;
+	case NE:	op = inverted ? "eqz" : "nez"; break;
+	case LT:	op = inverted ? "gez" : "ltz"; break;
+	case GE:	op = inverted ? "ltz" : "gez"; break;
+	default:	gcc_unreachable ();
+	}
+    }
+
+  sprintf (result, "mov%s%s\t%%0, %%%d, %%1",
+	   op, isfp ? ".s" : "", inverted ? 3 : 2);
+  return result;
+}
+
+
+char *
+xtensa_emit_call (int callop, rtx *operands)
+{
+  static char result[64];
+  rtx tgt = operands[callop];
+
+  if (GET_CODE (tgt) == CONST_INT)
+    sprintf (result, "call8\t0x%lx", INTVAL (tgt));
+  else if (register_operand (tgt, VOIDmode))
+    sprintf (result, "callx8\t%%%d", callop);
+  else
+    sprintf (result, "call8\t%%%d", callop);
+
+  return result;
+}
+
+
+bool
+xtensa_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
+{
+  /* Allow constant pool addresses.  */
+  if (mode != BLKmode && GET_MODE_SIZE (mode) >= UNITS_PER_WORD
+      && ! TARGET_CONST16 && constantpool_address_p (addr)
+      && ! xtensa_tls_referenced_p (addr))
+    return true;
+
+  while (GET_CODE (addr) == SUBREG)
+    addr = SUBREG_REG (addr);
+
+  /* Allow base registers.  */
+  if (GET_CODE (addr) == REG && BASE_REG_P (addr, strict))
+    return true;
+
+  /* Check for "register + offset" addressing.  */
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx xplus0 = XEXP (addr, 0);
+      rtx xplus1 = XEXP (addr, 1);
+      enum rtx_code code0;
+      enum rtx_code code1;
+
+      while (GET_CODE (xplus0) == SUBREG)
+	xplus0 = SUBREG_REG (xplus0);
+      code0 = GET_CODE (xplus0);
+
+      while (GET_CODE (xplus1) == SUBREG)
+	xplus1 = SUBREG_REG (xplus1);
+      code1 = GET_CODE (xplus1);
+
+      /* Swap operands if necessary so the register is first.  */
+      if (code0 != REG && code1 == REG)
+	{
+	  xplus0 = XEXP (addr, 1);
+	  xplus1 = XEXP (addr, 0);
+	  code0 = GET_CODE (xplus0);
+	  code1 = GET_CODE (xplus1);
+	}
+
+      if (code0 == REG && BASE_REG_P (xplus0, strict)
+	  && code1 == CONST_INT
+	  && xtensa_mem_offset (INTVAL (xplus1), mode))
+	return true;
+    }
+
+  return false;
+}
+
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
+
+static GTY(()) rtx xtensa_tls_module_base_symbol;
+
+static rtx
+xtensa_tls_module_base (void)
+{
+  if (! xtensa_tls_module_base_symbol)
+    {
+      xtensa_tls_module_base_symbol =
+	gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+      SYMBOL_REF_FLAGS (xtensa_tls_module_base_symbol)
+        |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+    }
+
+  return xtensa_tls_module_base_symbol;
+}
+
+
+static rtx
+xtensa_call_tls_desc (rtx sym, rtx *retp)
+{
+  rtx fn, arg, a10, call_insn, insns;
+
+  start_sequence ();
+  fn = gen_reg_rtx (Pmode);
+  arg = gen_reg_rtx (Pmode);
+  a10 = gen_rtx_REG (Pmode, 10);
+
+  emit_insn (gen_tls_func (fn, sym));
+  emit_insn (gen_tls_arg (arg, sym));
+  emit_move_insn (a10, arg);
+  call_insn = emit_call_insn (gen_tls_call (a10, fn, sym, const1_rtx));
+  CALL_INSN_FUNCTION_USAGE (call_insn)
+    = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, a10),
+			 CALL_INSN_FUNCTION_USAGE (call_insn));
+  insns = get_insns ();
+  end_sequence ();
+
+  *retp = a10;
+  return insns;
+}
+
+
+static rtx
+xtensa_legitimize_tls_address (rtx x)
+{
+  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+  rtx dest, tp, ret, modbase, base, addend, insns;
+
+  dest = gen_reg_rtx (Pmode);
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      insns = xtensa_call_tls_desc (x, &ret);
+      emit_libcall_block (insns, dest, ret, x);
+      break;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      base = gen_reg_rtx (Pmode);
+      modbase = xtensa_tls_module_base ();
+      insns = xtensa_call_tls_desc (modbase, &ret);
+      emit_libcall_block (insns, base, ret, modbase);
+      addend = force_reg (SImode, gen_sym_DTPOFF (x));
+      emit_insn (gen_addsi3 (dest, base, addend));
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+    case TLS_MODEL_LOCAL_EXEC:
+      tp = gen_reg_rtx (SImode);
+      emit_insn (gen_load_tp (tp));
+      addend = force_reg (SImode, gen_sym_TPOFF (x));
+      emit_insn (gen_addsi3 (dest, tp, addend));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return dest;
+}
+
+
+rtx
+xtensa_legitimize_address (rtx x,
+			   rtx oldx ATTRIBUTE_UNUSED,
+			   enum machine_mode mode)
+{
+  if (xtensa_tls_symbol_p (x))
+    return xtensa_legitimize_tls_address (x);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx plus0 = XEXP (x, 0);
+      rtx plus1 = XEXP (x, 1);
+
+      if (GET_CODE (plus0) != REG && GET_CODE (plus1) == REG)
+	{
+	  plus0 = XEXP (x, 1);
+	  plus1 = XEXP (x, 0);
+	}
+
+      /* Try to split up the offset to use an ADDMI instruction.  */
+      if (GET_CODE (plus0) == REG
+	  && GET_CODE (plus1) == CONST_INT
+	  && !xtensa_mem_offset (INTVAL (plus1), mode)
+	  && !xtensa_simm8 (INTVAL (plus1))
+	  && xtensa_mem_offset (INTVAL (plus1) & 0xff, mode)
+	  && xtensa_simm8x256 (INTVAL (plus1) & ~0xff))
+	{
+	  rtx temp = gen_reg_rtx (Pmode);
+	  rtx addmi_offset = GEN_INT (INTVAL (plus1) & ~0xff);
+	  emit_insn (gen_rtx_SET (Pmode, temp,
+				  gen_rtx_PLUS (Pmode, plus0, addmi_offset)));
+	  return gen_rtx_PLUS (Pmode, temp, GEN_INT (INTVAL (plus1) & 0xff));
+	}
+    }
+
+  return x;
+}
+
+/* Worker function for TARGET_MODE_DEPENDENT_ADDRESS_P.
+
+   Treat constant-pool references as "mode dependent" since they can
+   only be accessed with SImode loads.  This works around a bug in the
+   combiner where a constant pool reference is temporarily converted
+   to an HImode load, which is then assumed to zero-extend based on
+   our definition of LOAD_EXTEND_OP.  This is wrong because the high
+   bits of a 16-bit value in the constant pool are now sign-extended
+   by default.  */
+
+static bool
+xtensa_mode_dependent_address_p (const_rtx addr)
+{
+  return constantpool_address_p (addr);
+}
+
+/* Helper for xtensa_tls_referenced_p.  */
+
+static int
+xtensa_tls_referenced_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*x) == SYMBOL_REF)
+    return SYMBOL_REF_TLS_MODEL (*x) != 0;
+
+  /* Ignore TLS references that have already been legitimized.  */
+  if (GET_CODE (*x) == UNSPEC)
+    {
+      switch (XINT (*x, 1))
+	{
+	case UNSPEC_TPOFF:
+	case UNSPEC_DTPOFF:
+	case UNSPEC_TLS_FUNC:
+	case UNSPEC_TLS_ARG:
+	case UNSPEC_TLS_CALL:
+	  return -1;
+	default:
+	  break;
+	}
+    }
+
+  return 0;
+}
+
+
+/* Return TRUE if X contains any TLS symbol references.  */
+
+bool
+xtensa_tls_referenced_p (rtx x)
+{
+  if (! TARGET_HAVE_TLS)
+    return false;
+
+  return for_each_rtx (&x, xtensa_tls_referenced_p_1, NULL);
+}
+
+
+/* Return the debugger register number to use for 'regno'.  */
+
+int
+xtensa_dbx_register_number (int regno)
+{
+  int first = -1;
+
+  if (GP_REG_P (regno))
+    {
+      regno -= GP_REG_FIRST;
+      first = 0;
+    }
+  else if (BR_REG_P (regno))
+    {
+      regno -= BR_REG_FIRST;
+      first = 16;
+    }
+  else if (FP_REG_P (regno))
+    {
+      regno -= FP_REG_FIRST;
+      first = 48;
+    }
+  else if (ACC_REG_P (regno))
+    {
+      first = 0x200;	/* Start of Xtensa special registers.  */
+      regno = 16;	/* ACCLO is special register 16.  */
+    }
+
+  /* When optimizing, we sometimes get asked about pseudo-registers
+     that don't represent hard registers.  Return 0 for these.  */
+  if (first == -1)
+    return 0;
+
+  return first + regno;
+}
+
+
+/* Argument support functions.  */
+
+/* Initialize CUMULATIVE_ARGS for a function.  */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, int incoming)
+{
+  cum->arg_words = 0;
+  cum->incoming = incoming;
+}
+
+
+/* Advance the argument to the next argument position.  */
+
+static void
+xtensa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int words, max;
+  int *arg_words;
+
+  arg_words = &cum->arg_words;
+  max = MAX_ARGS_IN_REGISTERS;
+
+  words = (((mode != BLKmode)
+	    ? (int) GET_MODE_SIZE (mode)
+	    : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (*arg_words < max
+      && (targetm.calls.must_pass_in_stack (mode, type)
+	  || *arg_words + words > max))
+    *arg_words = max;
+
+  *arg_words += words;
+}
+
+
+/* Return an RTL expression containing the register for the given mode,
+   or 0 if the argument is to be passed on the stack.  INCOMING_P is nonzero
+   if this is an incoming argument to the current function.  */
+
+static rtx
+xtensa_function_arg_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		       const_tree type, bool incoming_p)
+{
+  int regbase, words, max;
+  int *arg_words;
+  int regno;
+
+  arg_words = &cum->arg_words;
+  regbase = (incoming_p ? GP_ARG_FIRST : GP_OUTGOING_ARG_FIRST);
+  max = MAX_ARGS_IN_REGISTERS;
+
+  words = (((mode != BLKmode)
+	    ? (int) GET_MODE_SIZE (mode)
+	    : int_size_in_bytes (type)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  if (type && (TYPE_ALIGN (type) > BITS_PER_WORD))
+    {
+      int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_WORD;
+      *arg_words = (*arg_words + align - 1) & -align;
+    }
+
+  if (*arg_words + words > max)
+    return (rtx)0;
+
+  regno = regbase + *arg_words;
+
+  if (cum->incoming && regno <= A7_REG && regno + words > A7_REG)
+    cfun->machine->need_a7_copy = true;
+
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+xtensa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		     const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return xtensa_function_arg_1 (cum, mode, type, false);
+}
+
+/* Implement TARGET_FUNCTION_INCOMING_ARG.  */
+
+static rtx
+xtensa_function_incoming_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+			      const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  return xtensa_function_arg_1 (cum, mode, type, true);
+}
+
+static unsigned int
+xtensa_function_arg_boundary (enum machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+  if (alignment < PARM_BOUNDARY)
+    alignment = PARM_BOUNDARY;
+  if (alignment > STACK_BOUNDARY)
+    alignment = STACK_BOUNDARY;
+  return alignment;
+}
+
+
+static bool
+xtensa_return_in_msb (const_tree valtype)
+{
+  return (TARGET_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && int_size_in_bytes (valtype) >= UNITS_PER_WORD);
+}
+
+
+static void
+xtensa_option_override (void)
+{
+  int regno;
+  enum machine_mode mode;
+
+  if (!TARGET_BOOLEANS && TARGET_HARD_FLOAT)
+    error ("boolean registers required for the floating-point option");
+
+  /* Set up array giving whether a given register can hold a given mode.  */
+  for (mode = VOIDmode;
+       mode != MAX_MACHINE_MODE;
+       mode = (enum machine_mode) ((int) mode + 1))
+    {
+      int size = GET_MODE_SIZE (mode);
+      enum mode_class mclass = GET_MODE_CLASS (mode);
+
+      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+	{
+	  int temp;
+
+	  if (ACC_REG_P (regno))
+	    temp = (TARGET_MAC16
+		    && (mclass == MODE_INT) && (size <= UNITS_PER_WORD));
+	  else if (GP_REG_P (regno))
+	    temp = ((regno & 1) == 0 || (size <= UNITS_PER_WORD));
+	  else if (FP_REG_P (regno))
+	    temp = (TARGET_HARD_FLOAT && (mode == SFmode));
+	  else if (BR_REG_P (regno))
+	    temp = (TARGET_BOOLEANS && (mode == CCmode));
+	  else
+	    temp = FALSE;
+
+	  xtensa_hard_regno_mode_ok[(int) mode][regno] = temp;
+	}
+    }
+
+  init_machine_status = xtensa_init_machine_status;
+
+  /* Check PIC settings.  PIC is only supported when using L32R
+     instructions, and some targets need to always use PIC.  */
+  if (flag_pic && TARGET_CONST16)
+    error ("-f%s is not supported with CONST16 instructions",
+	   (flag_pic > 1 ? "PIC" : "pic"));
+  else if (TARGET_FORCE_NO_PIC)
+    flag_pic = 0;
+  else if (XTENSA_ALWAYS_PIC)
+    {
+      if (TARGET_CONST16)
+	error ("PIC is required but not supported with CONST16 instructions");
+      flag_pic = 1;
+    }
+  /* There's no need for -fPIC (as opposed to -fpic) on Xtensa.  */
+  if (flag_pic > 1)
+    flag_pic = 1;
+  if (flag_pic && !flag_pie)
+    flag_shlib = 1;
+
+  /* Hot/cold partitioning does not work on this architecture, because of
+     constant pools (the load instruction cannot necessarily reach that far).
+     Therefore disable it on this architecture.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      flag_reorder_blocks_and_partition = 0;
+      flag_reorder_blocks = 1;
+    }
+}
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand X.  X is an RTL
+   expression.
+
+   CODE is a value that can be used to specify one of several ways
+   of printing the operand.  It is used when identical operands
+   must be printed differently depending on the context.  CODE
+   comes from the '%' specification that was used to request
+   printing of the operand.  If the specification was just '%DIGIT'
+   then CODE is 0; if the specification was '%LTR DIGIT' then CODE
+   is the ASCII code for LTR.
+
+   If X is a register, this macro should print the register's name.
+   The names can be found in an array 'reg_names' whose type is
+   'char *[]'.  'reg_names' is initialized from 'REGISTER_NAMES'.
+
+   When the machine description has a specification '%PUNCT' (a '%'
+   followed by a punctuation character), this macro is called with
+   a null pointer for X and the punctuation character for CODE.
+
+   'a', 'c', 'l', and 'n' are reserved.
+
+   The Xtensa specific codes are:
+
+   'd'  CONST_INT, print as signed decimal
+   'x'  CONST_INT, print as signed hexadecimal
+   'K'  CONST_INT, print number of bits in mask for EXTUI
+   'R'  CONST_INT, print (X & 0x1f)
+   'L'  CONST_INT, print ((32 - X) & 0x1f)
+   'D'  REG, print second register of double-word register operand
+   'N'  MEM, print address of next word following a memory operand
+   'v'  MEM, if memory reference is volatile, output a MEMW before it
+   't'  any constant, add "@h" suffix for top 16 bits
+   'b'  any constant, add "@l" suffix for bottom 16 bits
+*/
+
+static void
+printx (FILE *file, signed int val)
+{
+  /* Print a hexadecimal value in a nice way.  */
+  if ((val > -0xa) && (val < 0xa))
+    fprintf (file, "%d", val);
+  else if (val < 0)
+    fprintf (file, "-0x%x", -val);
+  else
+    fprintf (file, "0x%x", val);
+}
+
+
+void
+print_operand (FILE *file, rtx x, int letter)
+{
+  if (!x)
+    error ("PRINT_OPERAND null pointer");
+
+  switch (letter)
+    {
+    case 'D':
+      if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	fprintf (file, "%s", reg_names[xt_true_regnum (x) + 1]);
+      else
+	output_operand_lossage ("invalid %%D value");
+      break;
+
+    case 'v':
+      if (GET_CODE (x) == MEM)
+	{
+	  /* For a volatile memory reference, emit a MEMW before the
+	     load or store.  */
+	  if (MEM_VOLATILE_P (x) && TARGET_SERIALIZE_VOLATILE)
+	    fprintf (file, "memw\n\t");
+	}
+      else
+	output_operand_lossage ("invalid %%v value");
+      break;
+
+    case 'N':
+      if (GET_CODE (x) == MEM
+	  && (GET_MODE (x) == DFmode || GET_MODE (x) == DImode))
+	{
+	  x = adjust_address (x, GET_MODE (x) == DFmode ? SFmode : SImode, 4);
+	  output_address (XEXP (x, 0));
+	}
+      else
+	output_operand_lossage ("invalid %%N value");
+      break;
+
+    case 'K':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  int num_bits = 0;
+	  unsigned val = INTVAL (x);
+	  while (val & 1)
+	    {
+	      num_bits += 1;
+	      val = val >> 1;
+	    }
+	  if ((val != 0) || (num_bits == 0) || (num_bits > 16))
+	    fatal_insn ("invalid mask", x);
+
+	  fprintf (file, "%d", num_bits);
+	}
+      else
+	output_operand_lossage ("invalid %%K value");
+      break;
+
+    case 'L':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", (32 - INTVAL (x)) & 0x1f);
+      else
+	output_operand_lossage ("invalid %%L value");
+      break;
+
+    case 'R':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x) & 0x1f);
+      else
+	output_operand_lossage ("invalid %%R value");
+      break;
+
+    case 'x':
+      if (GET_CODE (x) == CONST_INT)
+	printx (file, INTVAL (x));
+      else
+	output_operand_lossage ("invalid %%x value");
+      break;
+
+    case 'd':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x));
+      else
+	output_operand_lossage ("invalid %%d value");
+      break;
+
+    case 't':
+    case 'b':
+      if (GET_CODE (x) == CONST_INT)
+	{
+	  printx (file, INTVAL (x));
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	}
+      else if (GET_CODE (x) == CONST_DOUBLE)
+	{
+	  REAL_VALUE_TYPE r;
+	  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+	  if (GET_MODE (x) == SFmode)
+	    {
+	      long l;
+	      REAL_VALUE_TO_TARGET_SINGLE (r, l);
+	      fprintf (file, "0x%08lx@%c", l, letter == 't' ? 'h' : 'l');
+	    }
+	  else
+	    output_operand_lossage ("invalid %%t/%%b value");
+	}
+      else if (GET_CODE (x) == CONST)
+	{
+	  /* X must be a symbolic constant on ELF.  Write an expression
+	     suitable for 'const16' that sets the high or low 16 bits.  */
+	  if (GET_CODE (XEXP (x, 0)) != PLUS
+	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
+	      || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+	    output_operand_lossage ("invalid %%t/%%b value");
+	  print_operand (file, XEXP (XEXP (x, 0), 0), 0);
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	  /* There must be a non-alphanumeric character between 'h' or 'l'
+	     and the number.  The '-' is added by print_operand() already.  */
+	  if (INTVAL (XEXP (XEXP (x, 0), 1)) >= 0)
+	    fputs ("+", file);
+	  print_operand (file, XEXP (XEXP (x, 0), 1), 0);
+	}
+      else
+	{
+	  output_addr_const (file, x);
+	  fputs (letter == 't' ? "@h" : "@l", file);
+	}
+      break;
+
+    default:
+      if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+	fprintf (file, "%s", reg_names[xt_true_regnum (x)]);
+      else if (GET_CODE (x) == MEM)
+	output_address (XEXP (x, 0));
+      else if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%ld", INTVAL (x));
+      else
+	output_addr_const (file, x);
+    }
+}
+
+
+/* A C compound statement to output to stdio stream STREAM the
+   assembler syntax for an instruction operand that is a memory
+   reference whose address is ADDR.  ADDR is an RTL expression.  */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+  if (!addr)
+    error ("PRINT_OPERAND_ADDRESS, null pointer");
+
+  switch (GET_CODE (addr))
+    {
+    default:
+      fatal_insn ("invalid address", addr);
+      break;
+
+    case REG:
+      fprintf (file, "%s, 0", reg_names [REGNO (addr)]);
+      break;
+
+    case PLUS:
+      {
+	rtx reg = (rtx)0;
+	rtx offset = (rtx)0;
+	rtx arg0 = XEXP (addr, 0);
+	rtx arg1 = XEXP (addr, 1);
+
+	if (GET_CODE (arg0) == REG)
+	  {
+	    reg = arg0;
+	    offset = arg1;
+	  }
+	else if (GET_CODE (arg1) == REG)
+	  {
+	    reg = arg1;
+	    offset = arg0;
+	  }
+	else
+	  fatal_insn ("no register in address", addr);
+
+	if (CONSTANT_P (offset))
+	  {
+	    fprintf (file, "%s, ", reg_names [REGNO (reg)]);
+	    output_addr_const (file, offset);
+	  }
+	else
+	  fatal_insn ("address offset not a constant", addr);
+      }
+      break;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST_INT:
+    case CONST:
+      output_addr_const (file, addr);
+      break;
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+xtensa_output_addr_const_extra (FILE *fp, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_TPOFF:
+	  output_addr_const (fp, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", fp);
+	  return true;
+	case UNSPEC_DTPOFF:
+	  output_addr_const (fp, XVECEXP (x, 0, 0));
+	  fputs ("@DTPOFF", fp);
+	  return true;
+	case UNSPEC_PLT:
+	  if (flag_pic)
+	    {
+	      output_addr_const (fp, XVECEXP (x, 0, 0));
+	      fputs ("@PLT", fp);
+	      return true;
+	    }
+	  break;
+	default:
+	  break;
+	}
+    }
+  return false;
+}
+
+
+void
+xtensa_output_literal (FILE *file, rtx x, enum machine_mode mode, int labelno)
+{
+  long value_long[2];
+  REAL_VALUE_TYPE r;
+  int size;
+  rtx first, second;
+
+  fprintf (file, "\t.literal .LC%u, ", (unsigned) labelno);
+
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_FLOAT:
+      gcc_assert (GET_CODE (x) == CONST_DOUBLE);
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+      switch (mode)
+	{
+	case SFmode:
+	  REAL_VALUE_TO_TARGET_SINGLE (r, value_long[0]);
+	  if (HOST_BITS_PER_LONG > 32)
+	    value_long[0] &= 0xffffffff;
+	  fprintf (file, "0x%08lx\n", value_long[0]);
+	  break;
+
+	case DFmode:
+	  REAL_VALUE_TO_TARGET_DOUBLE (r, value_long);
+	  if (HOST_BITS_PER_LONG > 32)
+	    {
+	      value_long[0] &= 0xffffffff;
+	      value_long[1] &= 0xffffffff;
+	    }
+	  fprintf (file, "0x%08lx, 0x%08lx\n",
+		   value_long[0], value_long[1]);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      break;
+
+    case MODE_INT:
+    case MODE_PARTIAL_INT:
+      size = GET_MODE_SIZE (mode);
+      switch (size)
+	{
+	case 4:
+	  output_addr_const (file, x);
+	  fputs ("\n", file);
+	  break;
+
+	case 8:
+	  split_double (x, &first, &second);
+	  output_addr_const (file, first);
+	  fputs (", ", file);
+	  output_addr_const (file, second);
+	  fputs ("\n", file);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+
+/* Return the bytes needed to compute the frame pointer from the current
+   stack pointer.  */
+
+#define STACK_BYTES (STACK_BOUNDARY / BITS_PER_UNIT)
+#define XTENSA_STACK_ALIGN(LOC) (((LOC) + STACK_BYTES-1) & ~(STACK_BYTES-1))
+
+long
+compute_frame_size (int size)
+{
+  /* Add space for the incoming static chain value.  */
+  if (cfun->static_chain_decl != NULL)
+    size += (1 * UNITS_PER_WORD);
+
+  xtensa_current_frame_size =
+    XTENSA_STACK_ALIGN (size
+			+ crtl->outgoing_args_size
+			+ (WINDOW_SIZE * UNITS_PER_WORD));
+  return xtensa_current_frame_size;
+}
+
+
+bool
+xtensa_frame_pointer_required (void)
+{
+  /* The code to expand builtin_frame_addr and builtin_return_addr
+     currently uses the hard_frame_pointer instead of frame_pointer.
+     This seems wrong but maybe it's necessary for other architectures.
+     This function is derived from the i386 code.  */
+
+  if (cfun->machine->accesses_prev_frame)
+    return true;
+
+  return false;
+}
+
+
+/* minimum frame = reg save area (4 words) plus static chain (1 word)
+   and the total number of words must be a multiple of 128 bits.  */
+#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
+
+void
+xtensa_expand_prologue (void)
+{
+  HOST_WIDE_INT total_size;
+  rtx size_rtx;
+  rtx insn, note_rtx;
+
+  total_size = compute_frame_size (get_frame_size ());
+  size_rtx = GEN_INT (total_size);
+
+  if (total_size < (1 << (12+3)))
+    insn = emit_insn (gen_entry (size_rtx));
+  else
+    {
+      /* Use a8 as a temporary since a0-a7 may be live.  */
+      rtx tmp_reg = gen_rtx_REG (Pmode, A8_REG);
+      emit_insn (gen_entry (GEN_INT (MIN_FRAME_SIZE)));
+      emit_move_insn (tmp_reg, GEN_INT (total_size - MIN_FRAME_SIZE));
+      emit_insn (gen_subsi3 (tmp_reg, stack_pointer_rtx, tmp_reg));
+      insn = emit_insn (gen_movsi (stack_pointer_rtx, tmp_reg));
+    }
+
+  if (frame_pointer_needed)
+    {
+      if (cfun->machine->set_frame_ptr_insn)
+	{
+	  rtx first;
+
+	  push_topmost_sequence ();
+	  first = get_insns ();
+	  pop_topmost_sequence ();
+
+	  /* For all instructions prior to set_frame_ptr_insn, replace
+	     hard_frame_pointer references with stack_pointer.  */
+	  for (insn = first;
+	       insn != cfun->machine->set_frame_ptr_insn;
+	       insn = NEXT_INSN (insn))
+	    {
+	      if (INSN_P (insn))
+		{
+		  PATTERN (insn) = replace_rtx (copy_rtx (PATTERN (insn)),
+						hard_frame_pointer_rtx,
+						stack_pointer_rtx);
+		  df_insn_rescan (insn);
+		}
+	    }
+	}
+      else
+	insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
+				     stack_pointer_rtx));
+    }
+
+  /* Create a note to describe the CFA.  Because this is only used to set
+     DW_AT_frame_base for debug info, don't bother tracking changes through
+     each instruction in the prologue.  It just takes up space.  */
+  note_rtx = gen_rtx_SET (VOIDmode, (frame_pointer_needed
+				     ? hard_frame_pointer_rtx
+				     : stack_pointer_rtx),
+			  plus_constant (stack_pointer_rtx, -total_size));
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
+}
+
+
+/* Clear variables at function end.  */
+
+void
+xtensa_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			  HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  xtensa_current_frame_size = 0;
+}
+
+
+rtx
+xtensa_return_addr (int count, rtx frame)
+{
+  rtx result, retaddr, curaddr, label;
+
+  if (count == -1)
+    retaddr = gen_rtx_REG (Pmode, A0_REG);
+  else
+    {
+      rtx addr = plus_constant (frame, -4 * UNITS_PER_WORD);
+      addr = memory_address (Pmode, addr);
+      retaddr = gen_reg_rtx (Pmode);
+      emit_move_insn (retaddr, gen_rtx_MEM (Pmode, addr));
+    }
+
+  /* The 2 most-significant bits of the return address on Xtensa hold
+     the register window size.  To get the real return address, these
+     bits must be replaced with the high bits from some address in the
+     code.  */
+
+  /* Get the 2 high bits of a local label in the code.  */
+  curaddr = gen_reg_rtx (Pmode);
+  label = gen_label_rtx ();
+  emit_label (label);
+  LABEL_PRESERVE_P (label) = 1;
+  emit_move_insn (curaddr, gen_rtx_LABEL_REF (Pmode, label));
+  emit_insn (gen_lshrsi3 (curaddr, curaddr, GEN_INT (30)));
+  emit_insn (gen_ashlsi3 (curaddr, curaddr, GEN_INT (30)));
+
+  /* Clear the 2 high bits of the return address.  */
+  result = gen_reg_rtx (Pmode);
+  emit_insn (gen_ashlsi3 (result, retaddr, GEN_INT (2)));
+  emit_insn (gen_lshrsi3 (result, result, GEN_INT (2)));
+
+  /* Combine them to get the result.  */
+  emit_insn (gen_iorsi3 (result, result, curaddr));
+  return result;
+}
+
+
+/* Create the va_list data type.
+
+   This structure is set up by __builtin_saveregs.  The __va_reg field
+   points to a stack-allocated region holding the contents of the
+   incoming argument registers.  The __va_ndx field is an index
+   initialized to the position of the first unnamed (variable)
+   argument.  This same index is also used to address the arguments
+   passed in memory.  Thus, the __va_stk field is initialized to point
+   to the position of the first argument in memory offset to account
+   for the arguments passed in registers and to account for the size
+   of the argument registers not being 16-byte aligned.  E.G., there
+   are 6 argument registers of 4 bytes each, but we want the __va_ndx
+   for the first stack argument to have the maximal alignment of 16
+   bytes, so we offset the __va_stk address by 32 bytes so that
+   __va_stk[32] references the first argument on the stack.  */
+
+static tree
+xtensa_build_builtin_va_list (void)
+{
+  tree f_stk, f_reg, f_ndx, record, type_decl;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_stk = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_stk"),
+		      ptr_type_node);
+  f_reg = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_reg"),
+		      ptr_type_node);
+  f_ndx = build_decl (BUILTINS_LOCATION,
+		      FIELD_DECL, get_identifier ("__va_ndx"),
+		      integer_type_node);
+
+  DECL_FIELD_CONTEXT (f_stk) = record;
+  DECL_FIELD_CONTEXT (f_reg) = record;
+  DECL_FIELD_CONTEXT (f_ndx) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_stk;
+  DECL_CHAIN (f_stk) = f_reg;
+  DECL_CHAIN (f_reg) = f_ndx;
+
+  layout_type (record);
+  return record;
+}
+
+
+/* Save the incoming argument registers on the stack.  Returns the
+   address of the saved registers.  */
+
+static rtx
+xtensa_builtin_saveregs (void)
+{
+  rtx gp_regs;
+  int arg_words = crtl->args.info.arg_words;
+  int gp_left = MAX_ARGS_IN_REGISTERS - arg_words;
+
+  if (gp_left <= 0)
+    return const0_rtx;
+
+  /* Allocate the general-purpose register space.  */
+  gp_regs = assign_stack_local
+    (BLKmode, MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD, -1);
+  set_mem_alias_set (gp_regs, get_varargs_alias_set ());
+
+  /* Now store the incoming registers.  */
+  cfun->machine->need_a7_copy = true;
+  cfun->machine->vararg_a7 = true;
+  move_block_from_reg (GP_ARG_FIRST + arg_words,
+		       adjust_address (gp_regs, BLKmode,
+				       arg_words * UNITS_PER_WORD),
+		       gp_left);
+  gcc_assert (cfun->machine->vararg_a7_copy != 0);
+  emit_insn_before (cfun->machine->vararg_a7_copy, get_insns ());
+
+  return XEXP (gp_regs, 0);
+}
+
+
+/* Implement `va_start' for varargs and stdarg.  We look at the
+   current function to fill in an initial va_list.  */
+
+static void
+xtensa_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  tree f_stk, stk;
+  tree f_reg, reg;
+  tree f_ndx, ndx;
+  tree t, u;
+  int arg_words;
+
+  arg_words = crtl->args.info.arg_words;
+
+  f_stk = TYPE_FIELDS (va_list_type_node);
+  f_reg = DECL_CHAIN (f_stk);
+  f_ndx = DECL_CHAIN (f_reg);
+
+  stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist, f_stk, NULL_TREE);
+  reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+		f_reg, NULL_TREE);
+  ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+		f_ndx, NULL_TREE);
+
+  /* Call __builtin_saveregs; save the result in __va_reg */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, reg, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Set the __va_stk member to ($arg_ptr - 32).  */
+  u = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+  u = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, u, size_int (-32));
+  t = build2 (MODIFY_EXPR, ptr_type_node, stk, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  /* Set the __va_ndx member.  If the first variable argument is on
+     the stack, adjust __va_ndx by 2 words to account for the extra
+     alignment offset for __va_stk.  */
+  if (arg_words >= MAX_ARGS_IN_REGISTERS)
+    arg_words += 2;
+  t = build2 (MODIFY_EXPR, integer_type_node, ndx,
+	      build_int_cst (integer_type_node, arg_words * UNITS_PER_WORD));
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+
+/* Implement `va_arg'.  */
+
+static tree
+xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			     gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  tree f_stk, stk;
+  tree f_reg, reg;
+  tree f_ndx, ndx;
+  tree type_size, array, orig_ndx, addr, size, va_size, t;
+  tree lab_false, lab_over, lab_false2;
+  bool indirect;
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type (type);
+
+  /* Handle complex values as separate real and imaginary parts.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      tree real_part, imag_part;
+
+      real_part = xtensa_gimplify_va_arg_expr (valist, TREE_TYPE (type),
+					       pre_p, NULL);
+      real_part = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+      imag_part = xtensa_gimplify_va_arg_expr (unshare_expr (valist),
+					       TREE_TYPE (type),
+					       pre_p, NULL);
+      imag_part = get_initialized_tmp_var (imag_part, pre_p, NULL);
+
+      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+    }
+
+  f_stk = TYPE_FIELDS (va_list_type_node);
+  f_reg = DECL_CHAIN (f_stk);
+  f_ndx = DECL_CHAIN (f_reg);
+
+  stk = build3 (COMPONENT_REF, TREE_TYPE (f_stk), valist,
+		f_stk, NULL_TREE);
+  reg = build3 (COMPONENT_REF, TREE_TYPE (f_reg), unshare_expr (valist),
+		f_reg, NULL_TREE);
+  ndx = build3 (COMPONENT_REF, TREE_TYPE (f_ndx), unshare_expr (valist),
+		f_ndx, NULL_TREE);
+
+  type_size = size_in_bytes (type);
+  va_size = round_up (type_size, UNITS_PER_WORD);
+  gimplify_expr (&va_size, pre_p, NULL, is_gimple_val, fb_rvalue);
+
+
+  /* First align __va_ndx if necessary for this arg:
+
+     orig_ndx = (AP).__va_ndx;
+     if (__alignof__ (TYPE) > 4 )
+       orig_ndx = ((orig_ndx + __alignof__ (TYPE) - 1)
+			& -__alignof__ (TYPE)); */
+
+  orig_ndx = get_initialized_tmp_var (ndx, pre_p, NULL);
+
+  if (TYPE_ALIGN (type) > BITS_PER_WORD)
+    {
+      int align = MIN (TYPE_ALIGN (type), STACK_BOUNDARY) / BITS_PER_UNIT;
+
+      t = build2 (PLUS_EXPR, integer_type_node, unshare_expr (orig_ndx),
+		  build_int_cst (integer_type_node, align - 1));
+      t = build2 (BIT_AND_EXPR, integer_type_node, t,
+		  build_int_cst (integer_type_node, -align));
+      gimplify_assign (unshare_expr (orig_ndx), t, pre_p);
+    }
+
+
+  /* Increment __va_ndx to point past the argument:
+
+     (AP).__va_ndx = orig_ndx + __va_size (TYPE); */
+
+  t = fold_convert (integer_type_node, va_size);
+  t = build2 (PLUS_EXPR, integer_type_node, orig_ndx, t);
+  gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+
+  /* Check if the argument is in registers:
+
+     if ((AP).__va_ndx <= __MAX_ARGS_IN_REGISTERS * 4
+         && !must_pass_in_stack (type))
+        __array = (AP).__va_reg; */
+
+  array = create_tmp_var (ptr_type_node, NULL);
+
+  lab_over = NULL;
+  if (!targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
+    {
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      t = build2 (GT_EXPR, boolean_type_node, unshare_expr (ndx),
+		  build_int_cst (integer_type_node,
+				 MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+      t = build3 (COND_EXPR, void_type_node, t,
+		  build1 (GOTO_EXPR, void_type_node, lab_false),
+		  NULL_TREE);
+      gimplify_and_add (t, pre_p);
+
+      gimplify_assign (unshare_expr (array), reg, pre_p);
+
+      t = build1 (GOTO_EXPR, void_type_node, lab_over);
+      gimplify_and_add (t, pre_p);
+
+      t = build1 (LABEL_EXPR, void_type_node, lab_false);
+      gimplify_and_add (t, pre_p);
+    }
+
+
+  /* ...otherwise, the argument is on the stack (never split between
+     registers and the stack -- change __va_ndx if necessary):
+
+     else
+       {
+	 if (orig_ndx <= __MAX_ARGS_IN_REGISTERS * 4)
+	     (AP).__va_ndx = 32 + __va_size (TYPE);
+	 __array = (AP).__va_stk;
+       } */
+
+  lab_false2 = create_artificial_label (UNKNOWN_LOCATION);
+
+  t = build2 (GT_EXPR, boolean_type_node, unshare_expr (orig_ndx),
+	      build_int_cst (integer_type_node,
+			     MAX_ARGS_IN_REGISTERS * UNITS_PER_WORD));
+  t = build3 (COND_EXPR, void_type_node, t,
+	      build1 (GOTO_EXPR, void_type_node, lab_false2),
+	      NULL_TREE);
+  gimplify_and_add (t, pre_p);
+
+  t = size_binop (PLUS_EXPR, unshare_expr (va_size), size_int (32));
+  t = fold_convert (integer_type_node, t);
+  gimplify_assign (unshare_expr (ndx), t, pre_p);
+
+  t = build1 (LABEL_EXPR, void_type_node, lab_false2);
+  gimplify_and_add (t, pre_p);
+
+  gimplify_assign (array, stk, pre_p);
+
+  if (lab_over)
+    {
+      t = build1 (LABEL_EXPR, void_type_node, lab_over);
+      gimplify_and_add (t, pre_p);
+    }
+
+
+  /* Given the base array pointer (__array) and index to the subsequent
+     argument (__va_ndx), find the address:
+
+     __array + (AP).__va_ndx - (BYTES_BIG_ENDIAN && sizeof (TYPE) < 4
+				? sizeof (TYPE)
+				: __va_size (TYPE))
+
+     The results are endian-dependent because values smaller than one word
+     are aligned differently.  */
+
+
+  if (BYTES_BIG_ENDIAN && TREE_CODE (type_size) == INTEGER_CST)
+    {
+      t = fold_build2 (GE_EXPR, boolean_type_node, unshare_expr (type_size),
+		       size_int (PARM_BOUNDARY / BITS_PER_UNIT));
+      t = fold_build3 (COND_EXPR, sizetype, t, unshare_expr (va_size),
+		       unshare_expr (type_size));
+      size = t;
+    }
+  else
+    size = unshare_expr (va_size);
+
+  t = fold_convert (sizetype, unshare_expr (ndx));
+  t = build2 (MINUS_EXPR, sizetype, t, size);
+  addr = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (array), t);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+  if (indirect)
+    addr = build_va_arg_indirect_ref (addr);
+  return build_va_arg_indirect_ref (addr);
+}
+
+
+/* Builtins.  */
+
+enum xtensa_builtin
+{
+  XTENSA_BUILTIN_UMULSIDI3,
+  XTENSA_BUILTIN_THREAD_POINTER,
+  XTENSA_BUILTIN_SET_THREAD_POINTER,
+  XTENSA_BUILTIN_max
+};
+
+
+static void
+xtensa_init_builtins (void)
+{
+  tree ftype, decl;
+
+  ftype = build_function_type_list (unsigned_intDI_type_node,
+				    unsigned_intSI_type_node,
+				    unsigned_intSI_type_node, NULL_TREE);
+
+  decl = add_builtin_function ("__builtin_umulsidi3", ftype,
+			       XTENSA_BUILTIN_UMULSIDI3, BUILT_IN_MD,
+			       "__umulsidi3", NULL_TREE);
+  TREE_NOTHROW (decl) = 1;
+  TREE_READONLY (decl) = 1;
+
+  if (TARGET_THREADPTR)
+    {
+      ftype = build_function_type (ptr_type_node, void_list_node);
+      decl = add_builtin_function ("__builtin_thread_pointer", ftype,
+				   XTENSA_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+				   NULL, NULL_TREE);
+      TREE_READONLY (decl) = 1;
+      TREE_NOTHROW (decl) = 1;
+
+      ftype = build_function_type_list (void_type_node, ptr_type_node,
+					NULL_TREE);
+      decl = add_builtin_function ("__builtin_set_thread_pointer", ftype,
+				   XTENSA_BUILTIN_SET_THREAD_POINTER,
+				   BUILT_IN_MD, NULL, NULL_TREE);
+      TREE_NOTHROW (decl) = 1;
+    }
+}
+
+
+static tree
+xtensa_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+		     bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arg0, arg1;
+
+  switch (fcode)
+    {
+    case XTENSA_BUILTIN_UMULSIDI3:
+      arg0 = args[0];
+      arg1 = args[1];
+      if ((TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST)
+	  || TARGET_MUL32_HIGH)
+	return fold_build2 (MULT_EXPR, unsigned_intDI_type_node,
+			    fold_convert (unsigned_intDI_type_node, arg0),
+			    fold_convert (unsigned_intDI_type_node, arg1));
+      break;
+
+    case XTENSA_BUILTIN_THREAD_POINTER:
+    case XTENSA_BUILTIN_SET_THREAD_POINTER:
+      break;
+
+    default:
+      internal_error ("bad builtin code");
+      break;
+    }
+
+  return NULL;
+}
+
+
+static rtx
+xtensa_expand_builtin (tree exp, rtx target,
+		       rtx subtarget ATTRIBUTE_UNUSED,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  rtx arg;
+
+  switch (fcode)
+    {
+    case XTENSA_BUILTIN_UMULSIDI3:
+      /* The umulsidi3 builtin is just a mechanism to avoid calling the real
+	 __umulsidi3 function when the Xtensa configuration can directly
+	 implement it.  If not, just call the function.  */
+      return expand_call (exp, target, ignore);
+
+    case XTENSA_BUILTIN_THREAD_POINTER:
+      if (!target || !register_operand (target, Pmode))
+	target = gen_reg_rtx (Pmode);
+      emit_insn (gen_load_tp (target));
+      return target;
+
+    case XTENSA_BUILTIN_SET_THREAD_POINTER:
+      arg = expand_normal (CALL_EXPR_ARG (exp, 0));
+      if (!register_operand (arg, Pmode))
+	arg = copy_to_mode_reg (Pmode, arg);
+      emit_insn (gen_set_tp (arg));
+      return const0_rtx;
+
+    default:
+      internal_error ("bad builtin code");
+    }
+  return NULL_RTX;
+}
+
+/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (CONSTANT_P (x) && CONST_DOUBLE_P (x))
+    return NO_REGS;
+
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_PREFERRED_OUTPUT_RELOAD_CLASS.  */
+
+static reg_class_t
+xtensa_preferred_output_reload_class (rtx x ATTRIBUTE_UNUSED,
+				      reg_class_t rclass)
+{
+  /* Don't use the stack pointer or hard frame pointer for reloads!
+     The hard frame pointer would normally be OK except that it may
+     briefly hold an incoming argument in the prologue, and reload
+     won't know that it is live because the hard frame pointer is
+     treated specially.  */
+
+  if (rclass == AR_REGS || rclass == GR_REGS)
+    return RL_REGS;
+
+  return rclass;
+}
+
+/* Worker function for TARGET_SECONDARY_RELOAD.  */
+
+static reg_class_t
+xtensa_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
+			 enum machine_mode mode, secondary_reload_info *sri)
+{
+  int regno;
+
+  if (in_p && constantpool_mem_p (x))
+    {
+      if (rclass == FP_REGS)
+	return RL_REGS;
+
+      if (mode == QImode)
+	sri->icode = CODE_FOR_reloadqi_literal;
+      else if (mode == HImode)
+	sri->icode = CODE_FOR_reloadhi_literal;
+    }
+
+  regno = xt_true_regnum (x);
+  if (ACC_REG_P (regno))
+    return ((rclass == GR_REGS || rclass == RL_REGS) ? NO_REGS : RL_REGS);
+  if (rclass == ACC_REG)
+    return (GP_REG_P (regno) ? NO_REGS : RL_REGS);
+
+  return NO_REGS;
+}
+
+
+void
+order_regs_for_local_alloc (void)
+{
+  if (!leaf_function_p ())
+    {
+      memcpy (reg_alloc_order, reg_nonleaf_alloc_order,
+	      FIRST_PSEUDO_REGISTER * sizeof (int));
+    }
+  else
+    {
+      int i, num_arg_regs;
+      int nxt = 0;
+
+      /* Use the AR registers in increasing order (skipping a0 and a1)
+	 but save the incoming argument registers for a last resort.  */
+      num_arg_regs = crtl->args.info.arg_words;
+      if (num_arg_regs > MAX_ARGS_IN_REGISTERS)
+	num_arg_regs = MAX_ARGS_IN_REGISTERS;
+      for (i = GP_ARG_FIRST; i < 16 - num_arg_regs; i++)
+	reg_alloc_order[nxt++] = i + num_arg_regs;
+      for (i = 0; i < num_arg_regs; i++)
+	reg_alloc_order[nxt++] = GP_ARG_FIRST + i;
+
+      /* List the coprocessor registers in order.  */
+      for (i = 0; i < BR_REG_NUM; i++)
+	reg_alloc_order[nxt++] = BR_REG_FIRST + i;
+
+      /* List the FP registers in order for now.  */
+      for (i = 0; i < 16; i++)
+	reg_alloc_order[nxt++] = FP_REG_FIRST + i;
+
+      /* GCC requires that we list *all* the registers....  */
+      reg_alloc_order[nxt++] = 0;	/* a0 = return address */
+      reg_alloc_order[nxt++] = 1;	/* a1 = stack pointer */
+      reg_alloc_order[nxt++] = 16;	/* pseudo frame pointer */
+      reg_alloc_order[nxt++] = 17;	/* pseudo arg pointer */
+
+      reg_alloc_order[nxt++] = ACC_REG_FIRST;	/* MAC16 accumulator */
+    }
+}
+
+
+/* Some Xtensa targets support multiple bss sections.  If the section
+   name ends with ".bss", add SECTION_BSS to the flags.  */
+
+static unsigned int
+xtensa_multibss_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = default_section_type_flags (decl, name, reloc);
+  const char *suffix;
+
+  suffix = strrchr (name, '.');
+  if (suffix && strcmp (suffix, ".bss") == 0)
+    {
+      if (!decl || (TREE_CODE (decl) == VAR_DECL
+		    && DECL_INITIAL (decl) == NULL_TREE))
+	flags |= SECTION_BSS;  /* @nobits */
+      else
+	warning (0, "only uninitialized variables can be placed in a "
+		 ".bss section");
+    }
+
+  return flags;
+}
+
+
+/* The literal pool stays with the function.  */
+
+static section *
+xtensa_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   rtx x ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+  return function_section (current_function_decl);
+}
+
+/* Worker function for TARGET_REGISTER_MOVE_COST.  */
+
+static int
+xtensa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   reg_class_t from, reg_class_t to)
+{
+  if (from == to && from != BR_REGS && to != BR_REGS)
+    return 2;
+  else if (reg_class_subset_p (from, AR_REGS)
+	   && reg_class_subset_p (to, AR_REGS))
+    return 2;
+  else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+    return 3;
+  else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+    return 3;
+  else
+    return 10;
+}
+
+/* Worker function for TARGET_MEMORY_MOVE_COST.  */
+
+static int
+xtensa_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t rclass ATTRIBUTE_UNUSED,
+			 bool in ATTRIBUTE_UNUSED)
+{
+  return 4;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+xtensa_rtx_costs (rtx x, int code, int outer_code, int *total,
+		  bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case CONST_INT:
+      switch (outer_code)
+	{
+	case SET:
+	  if (xtensa_simm12b (INTVAL (x)))
+	    {
+	      *total = 4;
+	      return true;
+	    }
+	  break;
+	case PLUS:
+	  if (xtensa_simm8 (INTVAL (x))
+	      || xtensa_simm8x256 (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case AND:
+	  if (xtensa_mask_immediate (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case COMPARE:
+	  if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+	    {
+	      *total = 0;
+	      return true;
+	    }
+	  break;
+	case ASHIFT:
+	case ASHIFTRT:
+	case LSHIFTRT:
+	case ROTATE:
+	case ROTATERT:
+	  /* No way to tell if X is the 2nd operand so be conservative.  */
+	default: break;
+	}
+      if (xtensa_simm12b (INTVAL (x)))
+	*total = 5;
+      else if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 6;
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 5;
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_CONST16)
+	*total = COSTS_N_INSNS (4);
+      else
+	*total = 7;
+      return true;
+
+    case MEM:
+      {
+	int num_words =
+	  (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) ?  2 : 1;
+
+	if (memory_address_p (GET_MODE (x), XEXP ((x), 0)))
+	  *total = COSTS_N_INSNS (num_words);
+	else
+	  *total = COSTS_N_INSNS (2*num_words);
+	return true;
+      }
+
+    case FFS:
+    case CTZ:
+      *total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50);
+      return true;
+
+    case CLZ:
+      *total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50);
+      return true;
+
+    case NOT:
+      *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2);
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (GET_MODE (x) == DImode)
+	*total = COSTS_N_INSNS (50);
+      else
+	*total = COSTS_N_INSNS (1);
+      return true;
+
+    case ABS:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+	else if (xmode == DFmode)
+	  *total = COSTS_N_INSNS (50);
+	else
+	  *total = COSTS_N_INSNS (4);
+	return true;
+      }
+
+    case PLUS:
+    case MINUS:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 1 : 50);
+	else if (xmode == DFmode || xmode == DImode)
+	  *total = COSTS_N_INSNS (50);
+	else
+	  *total = COSTS_N_INSNS (1);
+	return true;
+      }
+
+    case NEG:
+      *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 4 : 2);
+      return true;
+
+    case MULT:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50);
+	else if (xmode == DFmode)
+	  *total = COSTS_N_INSNS (50);
+	else if (xmode == DImode)
+	  *total = COSTS_N_INSNS (TARGET_MUL32_HIGH ? 10 : 50);
+	else if (TARGET_MUL32)
+	  *total = COSTS_N_INSNS (4);
+	else if (TARGET_MAC16)
+	  *total = COSTS_N_INSNS (16);
+	else if (TARGET_MUL16)
+	  *total = COSTS_N_INSNS (12);
+	else
+	  *total = COSTS_N_INSNS (50);
+	return true;
+      }
+
+    case DIV:
+    case MOD:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == SFmode)
+	  {
+	    *total = COSTS_N_INSNS (TARGET_HARD_FLOAT_DIV ? 8 : 50);
+	    return true;
+	  }
+	else if (xmode == DFmode)
+	  {
+	    *total = COSTS_N_INSNS (50);
+	    return true;
+	  }
+      }
+      /* Fall through.  */
+
+    case UDIV:
+    case UMOD:
+      {
+	enum machine_mode xmode = GET_MODE (x);
+	if (xmode == DImode)
+	  *total = COSTS_N_INSNS (50);
+	else if (TARGET_DIV32)
+	  *total = COSTS_N_INSNS (32);
+	else
+	  *total = COSTS_N_INSNS (50);
+	return true;
+      }
+
+    case SQRT:
+      if (GET_MODE (x) == SFmode)
+	*total = COSTS_N_INSNS (TARGET_HARD_FLOAT_SQRT ? 8 : 50);
+      else
+	*total = COSTS_N_INSNS (50);
+      return true;
+
+    case SMIN:
+    case UMIN:
+    case SMAX:
+    case UMAX:
+      *total = COSTS_N_INSNS (TARGET_MINMAX ? 1 : 50);
+      return true;
+
+    case SIGN_EXTRACT:
+    case SIGN_EXTEND:
+      *total = COSTS_N_INSNS (TARGET_SEXT ? 1 : 2);
+      return true;
+
+    case ZERO_EXTRACT:
+    case ZERO_EXTEND:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+xtensa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+  return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
+	  > 4 * UNITS_PER_WORD);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.  */
+
+rtx
+xtensa_function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, 
+                      bool outgoing)
+{
+  return gen_rtx_REG ((INTEGRAL_TYPE_P (valtype)
+                      && TYPE_PRECISION (valtype) < BITS_PER_WORD)
+                     ? SImode : TYPE_MODE (valtype),
+                     outgoing ? GP_OUTGOING_RETURN : GP_RETURN);
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+
+static rtx
+xtensa_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG ((GET_MODE_CLASS (mode) == MODE_INT
+		       && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
+		      ? SImode : mode, GP_RETURN);
+}
+
+/* Worker function TARGET_FUNCTION_VALUE_REGNO_P.  */
+
+static bool
+xtensa_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == GP_RETURN);
+}
+
+/* The static chain is passed in memory.  Provide rtx giving 'mem'
+   expressions that denote where they are stored.  */
+
+static rtx
+xtensa_static_chain (const_tree ARG_UNUSED (fndecl), bool incoming_p)
+{
+  rtx base = incoming_p ? arg_pointer_rtx : stack_pointer_rtx;
+  return gen_frame_mem (Pmode, plus_constant (base, -5 * UNITS_PER_WORD));
+}
+
+
+/* TRAMPOLINE_TEMPLATE: For Xtensa, the trampoline must perform an ENTRY
+   instruction with a minimal stack frame in order to get some free
+   registers.  Once the actual call target is known, the proper stack frame
+   size is extracted from the ENTRY instruction at the target and the
+   current frame is adjusted to match.  The trampoline then transfers
+   control to the instruction following the ENTRY at the target.  Note:
+   this assumes that the target begins with an ENTRY instruction.  */
+
+static void
+xtensa_asm_trampoline_template (FILE *stream)
+{
+  bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+
+  fprintf (stream, "\t.begin no-transform\n");
+  fprintf (stream, "\tentry\tsp, %d\n", MIN_FRAME_SIZE);
+
+  if (use_call0)
+    {
+      /* Save the return address.  */
+      fprintf (stream, "\tmov\ta10, a0\n");
+
+      /* Use a CALL0 instruction to skip past the constants and in the
+	 process get the PC into A0.  This allows PC-relative access to
+	 the constants without relying on L32R.  */
+      fprintf (stream, "\tcall0\t.Lskipconsts\n");
+    }
+  else
+    fprintf (stream, "\tj\t.Lskipconsts\n");
+
+  fprintf (stream, "\t.align\t4\n");
+  fprintf (stream, ".Lchainval:%s0\n", integer_asm_op (4, TRUE));
+  fprintf (stream, ".Lfnaddr:%s0\n", integer_asm_op (4, TRUE));
+  fprintf (stream, ".Lskipconsts:\n");
+
+  /* Load the static chain and function address from the trampoline.  */
+  if (use_call0)
+    {
+      fprintf (stream, "\taddi\ta0, a0, 3\n");
+      fprintf (stream, "\tl32i\ta9, a0, 0\n");
+      fprintf (stream, "\tl32i\ta8, a0, 4\n");
+    }
+  else
+    {
+      fprintf (stream, "\tl32r\ta9, .Lchainval\n");
+      fprintf (stream, "\tl32r\ta8, .Lfnaddr\n");
+    }
+
+  /* Store the static chain.  */
+  fprintf (stream, "\ts32i\ta9, sp, %d\n", MIN_FRAME_SIZE - 20);
+
+  /* Set the proper stack pointer value.  */
+  fprintf (stream, "\tl32i\ta9, a8, 0\n");
+  fprintf (stream, "\textui\ta9, a9, %d, 12\n",
+	   TARGET_BIG_ENDIAN ? 8 : 12);
+  fprintf (stream, "\tslli\ta9, a9, 3\n");
+  fprintf (stream, "\taddi\ta9, a9, %d\n", -MIN_FRAME_SIZE);
+  fprintf (stream, "\tsub\ta9, sp, a9\n");
+  fprintf (stream, "\tmovsp\tsp, a9\n");
+
+  if (use_call0)
+    /* Restore the return address.  */
+    fprintf (stream, "\tmov\ta0, a10\n");
+
+  /* Jump to the instruction following the ENTRY.  */
+  fprintf (stream, "\taddi\ta8, a8, 3\n");
+  fprintf (stream, "\tjx\ta8\n");
+
+  /* Pad size to a multiple of TRAMPOLINE_ALIGNMENT.  */
+  if (use_call0)
+    fprintf (stream, "\t.byte\t0\n");
+  else
+    fprintf (stream, "\tnop\n");
+
+  fprintf (stream, "\t.end no-transform\n");
+}
+
+static void
+xtensa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain)
+{
+  rtx func = XEXP (DECL_RTL (fndecl), 0);
+  bool use_call0 = (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS);
+  int chain_off = use_call0 ? 12 : 8;
+  int func_off = use_call0 ? 16 : 12;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  emit_move_insn (adjust_address (m_tramp, SImode, chain_off), chain);
+  emit_move_insn (adjust_address (m_tramp, SImode, func_off), func);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__xtensa_sync_caches"),
+		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
+}
+
+
+#include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
new file mode 100644
index 000000000..0a096cdb5
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.h
@@ -0,0 +1,847 @@
+/* Definitions of Tensilica's Xtensa target machine for GNU compiler.
+   Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
+   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Get Xtensa configuration settings */
+#include "xtensa-config.h"
+
+/* External variables defined in xtensa.c.  */
+
+extern unsigned xtensa_current_frame_size;
+
+/* Macros used in the machine description to select various Xtensa
+   configuration options.  */
+#ifndef XCHAL_HAVE_MUL32_HIGH
+#define XCHAL_HAVE_MUL32_HIGH 0
+#endif
+#ifndef XCHAL_HAVE_RELEASE_SYNC
+#define XCHAL_HAVE_RELEASE_SYNC 0
+#endif
+#ifndef XCHAL_HAVE_S32C1I
+#define XCHAL_HAVE_S32C1I 0
+#endif
+#ifndef XCHAL_HAVE_THREADPTR
+#define XCHAL_HAVE_THREADPTR 0
+#endif
+#define TARGET_BIG_ENDIAN	XCHAL_HAVE_BE
+#define TARGET_DENSITY		XCHAL_HAVE_DENSITY
+#define TARGET_MAC16		XCHAL_HAVE_MAC16
+#define TARGET_MUL16		XCHAL_HAVE_MUL16
+#define TARGET_MUL32		XCHAL_HAVE_MUL32
+#define TARGET_MUL32_HIGH	XCHAL_HAVE_MUL32_HIGH
+#define TARGET_DIV32		XCHAL_HAVE_DIV32
+#define TARGET_NSA		XCHAL_HAVE_NSA
+#define TARGET_MINMAX		XCHAL_HAVE_MINMAX
+#define TARGET_SEXT		XCHAL_HAVE_SEXT
+#define TARGET_BOOLEANS		XCHAL_HAVE_BOOLEANS
+#define TARGET_HARD_FLOAT	XCHAL_HAVE_FP
+#define TARGET_HARD_FLOAT_DIV	XCHAL_HAVE_FP_DIV
+#define TARGET_HARD_FLOAT_RECIP	XCHAL_HAVE_FP_RECIP
+#define TARGET_HARD_FLOAT_SQRT	XCHAL_HAVE_FP_SQRT
+#define TARGET_HARD_FLOAT_RSQRT	XCHAL_HAVE_FP_RSQRT
+#define TARGET_ABS		XCHAL_HAVE_ABS
+#define TARGET_ADDX		XCHAL_HAVE_ADDX
+#define TARGET_RELEASE_SYNC	XCHAL_HAVE_RELEASE_SYNC
+#define TARGET_S32C1I		XCHAL_HAVE_S32C1I
+#define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS
+#define TARGET_THREADPTR	XCHAL_HAVE_THREADPTR
+
+#define TARGET_DEFAULT \
+  ((XCHAL_HAVE_L32R	? 0 : MASK_CONST16) |				\
+   MASK_SERIALIZE_VOLATILE)
+
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					\
+  do {									\
+    builtin_assert ("cpu=xtensa");					\
+    builtin_assert ("machine=xtensa");					\
+    builtin_define ("__xtensa__");					\
+    builtin_define ("__XTENSA__");					\
+    builtin_define ("__XTENSA_WINDOWED_ABI__");				\
+    builtin_define (TARGET_BIG_ENDIAN ? "__XTENSA_EB__" : "__XTENSA_EL__"); \
+    if (!TARGET_HARD_FLOAT)						\
+      builtin_define ("__XTENSA_SOFT_FLOAT__");				\
+  } while (0)
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define EXTRA_SPECS							\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },
+
+/* Target machine storage layout */
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+/* Define this if most significant word of a multiword number is the lowest.  */
+#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
+
+#define MAX_BITS_PER_WORD 32
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 4
+#define MIN_UNITS_PER_WORD 4
+
+/* Width of a floating point register.  */
+#define UNITS_PER_FPREG 4
+
+/* Size in bits of various types on the target machine.  */
+#define INT_TYPE_SIZE 32
+#define SHORT_TYPE_SIZE 16
+#define LONG_TYPE_SIZE 32
+#define LONG_LONG_TYPE_SIZE 64
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Allocation boundary (in *bits*) for storing pointers in memory.  */
+#define POINTER_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 32
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after 'int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than this.  */
+#define BIGGEST_ALIGNMENT 128
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* Promote integer modes smaller than a word to SImode.  Set UNSIGNEDP
+   for QImode, because there is no 8-bit load from memory with sign
+   extension.  Otherwise, leave UNSIGNEDP alone, since Xtensa has 16-bit
+   loads both with and without sign extension.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)				\
+  do {									\
+    if (GET_MODE_CLASS (MODE) == MODE_INT				\
+	&& GET_MODE_SIZE (MODE) < UNITS_PER_WORD)			\
+      {									\
+	if ((MODE) == QImode)						\
+	  (UNSIGNEDP) = 1;						\
+	(MODE) = SImode;						\
+      }									\
+  } while (0)
+
+/* Imitate the way many other C compilers handle alignment of
+   bitfields and the structures that contain them.  */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Disable the use of word-sized or smaller complex modes for structures,
+   and for function arguments in particular, where they cause problems with
+   register a7.  The xtensa_copy_incoming_a7 function assumes that there is
+   a single reference to an argument in a7, but with small complex modes the
+   real and imaginary components may be extracted separately, leading to two
+   uses of the register, only one of which would be replaced.  */
+#define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) \
+  ((MODE) == CQImode || (MODE) == CHImode)
+
+/* Align string constants and constructors to at least a word boundary.
+   The typical use of this macro is to increase alignment for string
+   constants to be word aligned so that 'strcpy' calls that copy
+   constants can be done inline.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
+  ((TREE_CODE (EXP) == STRING_CST || TREE_CODE (EXP) == CONSTRUCTOR)	\
+   && (ALIGN) < BITS_PER_WORD						\
+	? BITS_PER_WORD							\
+	: (ALIGN))
+
+/* Align arrays, unions and records to at least a word boundary.
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that 'strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* Operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Xtensa loads are zero-extended by default.  */
+#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   The fake frame pointer and argument pointer will never appear in
+   the generated code, since they will always be eliminated and replaced
+   by either the stack pointer or the hard frame pointer.
+
+   0 - 15	AR[0] - AR[15]
+   16		FRAME_POINTER (fake = initial sp)
+   17		ARG_POINTER (fake = initial sp + framesize)
+   18		BR[0] for floating-point CC
+   19 - 34	FR[0] - FR[15]
+   35		MAC16 accumulator */
+
+#define FIRST_PSEUDO_REGISTER 36
+
+/* Return the stabs register number to use for REGNO.  */
+#define DBX_REGISTER_NUMBER(REGNO) xtensa_dbx_register_number (REGNO)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+#define FIXED_REGISTERS							\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  1, 1, 0,								\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0,									\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS						\
+{									\
+  1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1,								\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1,									\
+}
+
+/* For non-leaf procedures on Xtensa processors, the allocation order
+   is as specified below by REG_ALLOC_ORDER.  For leaf procedures, we
+   want to use the lowest numbered registers first to minimize
+   register window overflows.  However, local-alloc is not smart
+   enough to consider conflicts with incoming arguments.  If an
+   incoming argument in a2 is live throughout the function and
+   local-alloc decides to use a2, then the incoming argument must
+   either be spilled or copied to another register.  To get around
+   this, we define ADJUST_REG_ALLOC_ORDER to redefine
+   reg_alloc_order for leaf functions such that lowest numbered
+   registers are used first with the exception that the incoming
+   argument registers are not used until after other register choices
+   have been exhausted.  */
+
+#define REG_ALLOC_ORDER \
+{  8,  9, 10, 11, 12, 13, 14, 15,  7,  6,  5,  4,  3,  2, \
+  18, \
+  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \
+   0,  1, 16, 17, \
+  35, \
+}
+
+#define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
+
+/* For Xtensa, the only point of this is to prevent GCC from otherwise
+   giving preference to call-used registers.  To minimize window
+   overflows for the AR registers, we want to give preference to the
+   lower-numbered AR registers.  For other register files, which are
+   not windowed, we still prefer call-used registers, if there are any.  */
+extern const char xtensa_leaf_regs[FIRST_PSEUDO_REGISTER];
+#define LEAF_REGISTERS xtensa_leaf_regs
+
+/* For Xtensa, no remapping is necessary, but this macro must be
+   defined if LEAF_REGISTERS is defined.  */
+#define LEAF_REG_REMAP(REGNO) (REGNO)
+
+/* This must be declared if LEAF_REGISTERS is set.  */
+extern int leaf_function;
+
+/* Internal macros to classify a register number.  */
+
+/* 16 address registers + fake registers */
+#define GP_REG_FIRST 0
+#define GP_REG_LAST  17
+#define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
+
+/* Coprocessor registers */
+#define BR_REG_FIRST 18
+#define BR_REG_LAST  18 
+#define BR_REG_NUM   (BR_REG_LAST - BR_REG_FIRST + 1)
+
+/* 16 floating-point registers */
+#define FP_REG_FIRST 19
+#define FP_REG_LAST  34
+#define FP_REG_NUM   (FP_REG_LAST - FP_REG_FIRST + 1)
+
+/* MAC16 accumulator */
+#define ACC_REG_FIRST 35
+#define ACC_REG_LAST 35
+#define ACC_REG_NUM  (ACC_REG_LAST - ACC_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO) ((unsigned) ((REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define BR_REG_P(REGNO) ((unsigned) ((REGNO) - BR_REG_FIRST) < BR_REG_NUM)
+#define FP_REG_P(REGNO) ((unsigned) ((REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define ACC_REG_P(REGNO) ((unsigned) ((REGNO) - ACC_REG_FIRST) < ACC_REG_NUM)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.  */
+#define HARD_REGNO_NREGS(REGNO, MODE)					\
+  (FP_REG_P (REGNO) ?							\
+	((GET_MODE_SIZE (MODE) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG) : \
+	((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode
+   MODE.  */
+extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE)					\
+  xtensa_hard_regno_mode_ok[(int) (MODE)][(REGNO)]
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.  */
+#define MODES_TIEABLE_P(MODE1, MODE2)					\
+  ((GET_MODE_CLASS (MODE1) == MODE_FLOAT ||				\
+    GET_MODE_CLASS (MODE1) == MODE_COMPLEX_FLOAT)			\
+   == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||				\
+       GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 1)
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 7)
+
+/* The register number of the frame pointer register, which is used to
+   access automatic variables in the stack frame.  For Xtensa, this
+   register never appears in the output.  It is always eliminated to
+   either the stack pointer or the hard frame pointer.  */
+#define FRAME_POINTER_REGNUM (GP_REG_FIRST + 16)
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM (GP_REG_FIRST + 17)
+
+/* For now we don't try to use the full set of boolean registers.  Without
+   software pipelining of FP operations, there's not much to gain and it's
+   a real pain to get them reloaded.  */
+#define FPCC_REGNUM (BR_REG_FIRST + 0)
+
+/* It is as good or better to call a constant function address than to
+   call an address kept in a register.  */
+#define NO_FUNCTION_CSE 1
+
+/* Xtensa processors have "register windows".  GCC does not currently
+   take advantage of the possibility for variable-sized windows; instead,
+   we use a fixed window size of 8.  */
+
+#define INCOMING_REGNO(OUT)						\
+  ((GP_REG_P (OUT) &&							\
+    ((unsigned) ((OUT) - GP_REG_FIRST) >= WINDOW_SIZE)) ?		\
+   (OUT) - WINDOW_SIZE : (OUT))
+
+#define OUTGOING_REGNO(IN)						\
+  ((GP_REG_P (IN) &&							\
+    ((unsigned) ((IN) - GP_REG_FIRST) < WINDOW_SIZE)) ?			\
+   (IN) + WINDOW_SIZE : (IN))
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  */
+enum reg_class
+{
+  NO_REGS,			/* no registers in set */
+  BR_REGS,			/* coprocessor boolean registers */
+  FP_REGS,			/* floating point registers */
+  ACC_REG,			/* MAC16 accumulator */
+  SP_REG,			/* sp register (aka a1) */
+  RL_REGS,			/* preferred reload regs (not sp or fp) */
+  GR_REGS,			/* integer registers except sp */
+  AR_REGS,			/* all integer registers */
+  ALL_REGS,			/* all registers */
+  LIM_REG_CLASSES		/* max value + 1 */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS AR_REGS
+
+/* An initializer containing the names of the register classes as C
+   string constants.  These names are used in writing some of the
+   debugging dumps.  */
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "BR_REGS",								\
+  "FP_REGS",								\
+  "ACC_REG",								\
+  "SP_REG",								\
+  "RL_REGS",								\
+  "GR_REGS",								\
+  "AR_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* Contents of the register classes.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if 'MASK & (1 << R)' is 1.  */
+#define REG_CLASS_CONTENTS \
+{ \
+  { 0x00000000, 0x00000000 }, /* no registers */ \
+  { 0x00040000, 0x00000000 }, /* coprocessor boolean registers */ \
+  { 0xfff80000, 0x00000007 }, /* floating-point registers */ \
+  { 0x00000000, 0x00000008 }, /* MAC16 accumulator */ \
+  { 0x00000002, 0x00000000 }, /* stack pointer register */ \
+  { 0x0000ff7d, 0x00000000 }, /* preferred reload registers */ \
+  { 0x0000fffd, 0x00000000 }, /* general-purpose registers */ \
+  { 0x0003ffff, 0x00000000 }, /* integer registers */ \
+  { 0xffffffff, 0x0000000f }  /* all registers */ \
+}
+
+#define IRA_COVER_CLASSES						\
+{									\
+  BR_REGS, FP_REGS, ACC_REG, AR_REGS, LIM_REG_CLASSES			\
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+extern const enum reg_class xtensa_regno_to_class[FIRST_PSEUDO_REGISTER];
+
+#define REGNO_REG_CLASS(REGNO) xtensa_regno_to_class[ (REGNO) ]
+
+/* Use the Xtensa AR register file for base registers.
+   No index registers.  */
+#define BASE_REG_CLASS AR_REGS
+#define INDEX_REG_CLASS NO_REGS
+
+/* The small_register_classes_for_mode_p hook must always return true for
+   Xtrnase, because all of the 16 AR registers may be explicitly used in
+   the RTL, as either incoming or outgoing arguments.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+#define CLASS_UNITS(mode, size)						\
+  ((GET_MODE_SIZE (mode) + (size) - 1) / (size))
+
+#define CLASS_MAX_NREGS(CLASS, MODE)					\
+  (CLASS_UNITS (MODE, UNITS_PER_WORD))
+
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD
+
+/* Offset within stack frame to start allocating local variables at.  */
+#define STARTING_FRAME_OFFSET						\
+  crtl->outgoing_args_size
+
+/* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so
+   they are eliminated to either the stack pointer or hard frame pointer.  */
+#define ELIMINABLE_REGS							\
+{{ ARG_POINTER_REGNUM,		STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM,		HARD_FRAME_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM,	STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM,	HARD_FRAME_POINTER_REGNUM}}
+
+/* Specify the initial difference between the specified pair of registers.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
+  do {									\
+    compute_frame_size (get_frame_size ());				\
+    switch (FROM)							\
+      {									\
+      case FRAME_POINTER_REGNUM:					\
+        (OFFSET) = 0;							\
+	break;								\
+      case ARG_POINTER_REGNUM:						\
+        (OFFSET) = xtensa_current_frame_size;				\
+	break;								\
+      default:								\
+	gcc_unreachable ();						\
+      }									\
+  } while (0)
+
+/* If defined, the maximum amount of space required for outgoing
+   arguments will be computed and placed into the variable
+   'crtl->outgoing_args_size'.  No space will be pushed
+   onto the stack for each call; instead, the function prologue
+   should increase the stack frame size by this amount.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset from the argument pointer register to the first argument's
+   address.  On some machines it may depend on the data type of the
+   function.  If 'ARGS_GROW_DOWNWARD', this is the offset to the
+   location above the first argument's address.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Align stack frames on 128 bits for Xtensa.  This is necessary for
+   128-bit datatypes defined in TIE (e.g., for Vectra).  */
+#define STACK_BOUNDARY 128
+
+/* Use a fixed register window size of 8.  */
+#define WINDOW_SIZE 8
+
+/* Symbolic macros for the registers used to return integer, floating
+   point, and values of coprocessor and user-defined modes.  */
+#define GP_RETURN (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_RETURN (GP_REG_FIRST + 2)
+
+/* Symbolic macros for the first/last argument registers.  */
+#define GP_ARG_FIRST (GP_REG_FIRST + 2)
+#define GP_ARG_LAST  (GP_REG_FIRST + 7)
+#define GP_OUTGOING_ARG_FIRST (GP_REG_FIRST + 2 + WINDOW_SIZE)
+#define GP_OUTGOING_ARG_LAST  (GP_REG_FIRST + 7 + WINDOW_SIZE)
+
+#define MAX_ARGS_IN_REGISTERS 6
+
+/* Don't worry about compatibility with PCC.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* A C expression that is nonzero if REGNO is the number of a hard
+   register in which function arguments are sometimes passed.  This
+   does *not* include implicit arguments such as the static chain and
+   the structure-value address.  On many machines, no registers can be
+   used for this purpose since all function arguments are pushed on
+   the stack.  */
+#define FUNCTION_ARG_REGNO_P(N)						\
+  ((N) >= GP_OUTGOING_ARG_FIRST && (N) <= GP_OUTGOING_ARG_LAST)
+
+/* Record the number of argument words seen so far, along with a flag to
+   indicate whether these are incoming arguments.  (FUNCTION_INCOMING_ARG
+   is used for both incoming and outgoing args, so a separate flag is
+   needed.  */
+typedef struct xtensa_args
+{
+  int arg_words;
+  int incoming;
+} CUMULATIVE_ARGS;
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  init_cumulative_args (&CUM, 0)
+
+#define INIT_CUMULATIVE_INCOMING_ARGS(CUM, FNTYPE, LIBNAME)		\
+  init_cumulative_args (&CUM, 1)
+
+/* Profiling Xtensa code is typically done with the built-in profiling
+   feature of Tensilica's instruction set simulator, which does not
+   require any compiler support.  Profiling code on a real (i.e.,
+   non-simulated) Xtensa processor is currently only supported by
+   GNU/Linux with glibc.  The glibc version of _mcount doesn't require
+   counter variables.  The _mcount function needs the current PC and
+   the current return address to identify an arc in the call graph.
+   Pass the current return address as the first argument; the current
+   PC is available as a0 in _mcount's register window.  Both of these
+   values contain window size information in the two most significant
+   bits; we assume that _mcount will mask off those bits.  The call to
+   _mcount uses a window size of 8 to make sure that it doesn't clobber
+   any incoming argument values.  */
+
+#define NO_PROFILE_COUNTERS	1
+
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+  do {									\
+    fprintf (FILE, "\t%s\ta10, a0\n", TARGET_DENSITY ? "mov.n" : "mov"); \
+    if (flag_pic)							\
+      {									\
+	fprintf (FILE, "\tmovi\ta8, _mcount@PLT\n");			\
+	fprintf (FILE, "\tcallx8\ta8\n");				\
+      }									\
+    else								\
+      fprintf (FILE, "\tcall8\t_mcount\n");				\
+  } while (0)
+
+/* Stack pointer value doesn't matter at exit.  */
+#define EXIT_IGNORE_STACK 1
+
+/* Size in bytes of the trampoline, as an integer.  Make sure this is
+   a multiple of TRAMPOLINE_ALIGNMENT to avoid -Wpadded warnings.  */
+#define TRAMPOLINE_SIZE (TARGET_CONST16 || TARGET_ABSOLUTE_LITERALS ? 60 : 52)
+
+/* Alignment required for trampolines, in bits.  */
+#define TRAMPOLINE_ALIGNMENT 32
+
+/* If defined, a C expression that produces the machine-specific code
+   to setup the stack so that arbitrary frames can be accessed.
+
+   On Xtensa, a stack back-trace must always begin from the stack pointer,
+   so that the register overflow save area can be located.  However, the
+   stack-walking code in GCC always begins from the hard_frame_pointer
+   register, not the stack pointer.  The frame pointer is usually equal
+   to the stack pointer, but the __builtin_return_address and
+   __builtin_frame_address functions will not work if count > 0 and
+   they are called from a routine that uses alloca.  These functions
+   are not guaranteed to work at all if count > 0 so maybe that is OK.
+
+   A nicer solution would be to allow the architecture-specific files to
+   specify whether to start from the stack pointer or frame pointer.  That
+   would also allow us to skip the machine->accesses_prev_frame stuff that
+   we currently need to ensure that there is a frame pointer when these
+   builtin functions are used.  */
+
+#define SETUP_FRAME_ADDRESSES  xtensa_setup_frame_addresses
+
+/* A C expression whose value is RTL representing the address in a
+   stack frame where the pointer to the caller's frame is stored.
+   Assume that FRAMEADDR is an RTL expression for the address of the
+   stack frame itself.
+
+   For Xtensa, there is no easy way to get the frame pointer if it is
+   not equivalent to the stack pointer.  Moreover, the result of this
+   macro is used for continuing to walk back up the stack, so it must
+   return the stack pointer address.  Thus, there is some inconsistency
+   here in that __builtin_frame_address will return the frame pointer
+   when count == 0 and the stack pointer when count > 0.  */
+
+#define DYNAMIC_CHAIN_ADDRESS(frame)					\
+  gen_rtx_PLUS (Pmode, frame, GEN_INT (-3 * UNITS_PER_WORD))
+
+/* Define this if the return address of a particular stack frame is
+   accessed from the frame pointer of the previous stack frame.  */
+#define RETURN_ADDR_IN_PREVIOUS_FRAME
+
+/* A C expression whose value is RTL representing the value of the
+   return address for the frame COUNT steps up from the current
+   frame, after the prologue.  */
+#define RETURN_ADDR_RTX  xtensa_return_addr
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* C expressions which are nonzero if register number NUM is suitable
+   for use as a base or index register in operand addresses.  */
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+#define REGNO_OK_FOR_BASE_P(NUM) \
+  (GP_REG_P (NUM) || GP_REG_P ((unsigned) reg_renumber[NUM]))
+
+/* C expressions that are nonzero if X (assumed to be a `reg' RTX) is
+   valid for use as a base or index register.  */
+
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_FLAG 1
+#else
+#define REG_OK_STRICT_FLAG 0
+#endif
+
+#define BASE_REG_P(X, STRICT)						\
+  ((!(STRICT) && REGNO (X) >= FIRST_PSEUDO_REGISTER)			\
+   || REGNO_OK_FOR_BASE_P (REGNO (X)))
+
+#define REG_OK_FOR_INDEX_P(X) 0
+#define REG_OK_FOR_BASE_P(X) BASE_REG_P (X, REG_OK_STRICT_FLAG)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* A C expression that is 1 if the RTX X is a constant which is a
+   valid address.  This is defined to be the same as 'CONSTANT_P (X)',
+   but rejecting CONST_DOUBLE.  */
+#define CONSTANT_ADDRESS_P(X)						\
+  ((GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
+    || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
+    || (GET_CODE (X) == CONST)))
+
+/* Nonzero if the constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+#define LEGITIMATE_CONSTANT_P(X) (! xtensa_tls_referenced_p (X))
+
+/* A C expression that is nonzero if X is a legitimate immediate
+   operand on the target machine when generating position independent
+   code.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)					\
+  ((GET_CODE (X) != SYMBOL_REF						\
+    || (SYMBOL_REF_LOCAL_P (X) && !SYMBOL_REF_EXTERNAL_P (X)))		\
+   && GET_CODE (X) != LABEL_REF						\
+   && GET_CODE (X) != CONST)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE (SImode)
+
+/* Define this as 1 if 'char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX 4
+#define MAX_MOVE_MAX 4
+
+/* Prefer word-sized loads.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Shift instructions ignore all but the low-order few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+   is done just by pretending it is already truncated.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = -1, 1)
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode SImode
+
+/* A function address in a call instruction is a word address (for
+   indexing purposes) so give the MEM rtx a words's mode.  */
+#define FUNCTION_MODE SImode
+
+#define BRANCH_COST(speed_p, predictable_p) 3
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+#define REGISTER_NAMES							\
+{									\
+  "a0",   "sp",   "a2",   "a3",   "a4",   "a5",   "a6",   "a7",		\
+  "a8",   "a9",   "a10",  "a11",  "a12",  "a13",  "a14",  "a15",	\
+  "fp",   "argp", "b0",							\
+  "f0",   "f1",   "f2",   "f3",   "f4",   "f5",   "f6",   "f7",		\
+  "f8",   "f9",   "f10",  "f11",  "f12",  "f13",  "f14",  "f15",	\
+  "acc"									\
+}
+
+/* If defined, a C initializer for an array of structures containing a
+   name and a register number.  This macro defines additional names
+   for hard registers, thus allowing the 'asm' option in declarations
+   to refer to registers using alternate names.  */
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "a1",	 1 + GP_REG_FIRST }					\
+}
+
+#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) print_operand_address (FILE, ADDR)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* Declare an uninitialized external linkage data object.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
+  fprintf (STREAM, "%s%sL%u\n", integer_asm_op (4, TRUE),		\
+	   LOCAL_LABEL_PREFIX, VALUE)
+
+/* This is how to output an element of a case-vector that is relative.
+   This is used for pc-relative code.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)		\
+  do {									\
+    fprintf (STREAM, "%s%sL%u-%sL%u\n",	integer_asm_op (4, TRUE),	\
+	     LOCAL_LABEL_PREFIX, (VALUE),				\
+	     LOCAL_LABEL_PREFIX, (REL));				\
+  } while (0)
+
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
+#define ASM_OUTPUT_ALIGN(STREAM, LOG)					\
+  do {									\
+    if ((LOG) != 0)							\
+      fprintf (STREAM, "\t.align\t%d\n", 1 << (LOG));			\
+  } while (0)
+
+/* Indicate that jump tables go in the text section.  This is
+   necessary when compiling PIC code.  */
+#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic)
+
+
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+
+
+/* Define output to appear before the constant pool.  */
+#define ASM_OUTPUT_POOL_PROLOGUE(FILE, FUNNAME, FUNDECL, SIZE)          \
+  do {									\
+    if ((SIZE) > 0)							\
+      {									\
+	resolve_unique_section ((FUNDECL), 0, flag_function_sections);	\
+	switch_to_section (function_section (FUNDECL));			\
+	fprintf (FILE, "\t.literal_position\n");			\
+      }									\
+  } while (0)
+
+
+/* A C statement (with or without semicolon) to output a constant in
+   the constant pool, if it needs special treatment.  */
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY(FILE, X, MODE, ALIGN, LABELNO, JUMPTO) \
+  do {									\
+    xtensa_output_literal (FILE, X, MODE, LABELNO);			\
+    goto JUMPTO;							\
+  } while (0)
+
+/* How to start an assembler comment.  */
+#define ASM_COMMENT_START "#"
+
+/* Exception handling.  Xtensa uses much of the standard DWARF2 unwinding
+   machinery, but the variable size register window save areas are too
+   complicated to efficiently describe with CFI entries.  The CFA must
+   still be specified in DWARF so that DW_AT_frame_base is set correctly
+   for debugging.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 0)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (0)
+#define DWARF_FRAME_REGISTERS 16
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) + 2 : INVALID_REGNUM)
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			\
+  (flag_pic								\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0)				\
+      | DW_EH_PE_pcrel | DW_EH_PE_sdata4)				\
+   : DW_EH_PE_absptr)
+
+/* Emit a PC-relative relocation.  */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL)			\
+  do {									\
+    fputs (integer_asm_op (SIZE, FALSE), FILE);				\
+    assemble_name (FILE, LABEL);					\
+    fputs ("@pcrel", FILE);						\
+  } while (0)
+
+/* Xtensa constant pool breaks the devices in crtstuff.c to control
+   section in where code resides.  We have to write it as asm code.  Use
+   a MOVI and let the assembler relax it -- for the .init and .fini
+   sections, the assembler knows to put the literal in the right
+   place.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\
+	movi\ta8, " USER_LABEL_PREFIX #FUNC "\n\
+	callx8\ta8\n" \
+	TEXT_SECTION_ASM_OP);
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
new file mode 100644
index 000000000..d6eb54891
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.md
@@ -0,0 +1,1914 @@
+;; GCC machine description for Tensilica's Xtensa architecture.
+;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+;; Free Software Foundation, Inc.
+;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+(define_constants [
+  (A0_REG		0)
+  (A1_REG		1)
+  (A7_REG		7)
+  (A8_REG		8)
+
+  (UNSPEC_NOP		2)
+  (UNSPEC_PLT		3)
+  (UNSPEC_RET_ADDR	4)
+  (UNSPEC_TPOFF		5)
+  (UNSPEC_DTPOFF	6)
+  (UNSPEC_TLS_FUNC	7)
+  (UNSPEC_TLS_ARG	8)
+  (UNSPEC_TLS_CALL	9)
+  (UNSPEC_TP		10)
+  (UNSPEC_MEMW		11)
+
+  (UNSPECV_SET_FP	1)
+  (UNSPECV_ENTRY	2)
+  (UNSPECV_S32RI	4)
+  (UNSPECV_S32C1I	5)
+  (UNSPECV_EH_RETURN	6)
+  (UNSPECV_SET_TP	7)
+])
+
+;; This code iterator allows signed and unsigned widening multiplications
+;; to use the same template.
+(define_code_iterator any_extend [sign_extend zero_extend])
+
+;; <u> expands to an empty string when doing a signed operation and
+;; "u" when doing an unsigned operation.
+(define_code_attr u [(sign_extend "") (zero_extend "u")])
+
+;; <su> is like <u>, but the signed form expands to "s" rather than "".
+(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+
+;; This code iterator allows four integer min/max operations to be
+;; generated from one template.
+(define_code_iterator any_minmax [smin umin smax umax])
+
+;; <minmax> expands to the opcode name for any_minmax operations.
+(define_code_attr minmax [(smin "min") (umin "minu")
+			  (smax "max") (umax "maxu")])
+
+;; This code iterator is for floating-point comparisons.
+(define_code_iterator any_scc_sf [eq lt le uneq unlt unle unordered])
+(define_code_attr scc_sf [(eq "oeq") (lt "olt") (le "ole") 
+			  (uneq "ueq") (unlt "ult") (unle "ule")
+			  (unordered "un")])
+
+;; This iterator and attribute allow to combine most atomic operations.
+(define_code_iterator ATOMIC [and ior xor plus minus mult])
+(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") 
+			  (plus "add") (minus "sub") (mult "nand")])
+
+;; This mode iterator allows the HI and QI patterns to be defined from
+;; the same template.
+(define_mode_iterator HQI [HI QI])
+
+
+;; Attributes.
+
+(define_attr "type"
+  "unknown,jump,call,load,store,move,arith,multi,nop,farith,fmadd,fdiv,fsqrt,fconv,fload,fstore,mul16,mul32,div32,mac16,rsr,wsr,entry"
+  (const_string "unknown"))
+
+(define_attr "mode"
+  "unknown,none,QI,HI,SI,DI,SF,DF,BL"
+  (const_string "unknown"))
+
+(define_attr "length" "" (const_int 1))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+
+;; Pipeline model.
+
+;; The Xtensa basically has simple 5-stage RISC pipeline.
+;; Most instructions complete in 1 cycle, and it is OK to assume that
+;; everything is fully pipelined.  The exceptions have special insn
+;; reservations in the pipeline description below.  The Xtensa can
+;; issue one instruction per cycle, so defining CPU units is unnecessary.
+
+(define_insn_reservation "xtensa_any_insn" 1
+			 (eq_attr "type" "!load,fload,rsr,mul16,mul32,fmadd,fconv")
+			 "nothing")
+
+(define_insn_reservation "xtensa_memory" 2
+			 (eq_attr "type" "load,fload")
+			 "nothing")
+
+(define_insn_reservation "xtensa_sreg" 2
+			 (eq_attr "type" "rsr")
+			 "nothing")
+
+(define_insn_reservation "xtensa_mul16" 2
+			 (eq_attr "type" "mul16")
+			 "nothing")
+
+(define_insn_reservation "xtensa_mul32" 2
+			 (eq_attr "type" "mul32")
+			 "nothing")
+
+(define_insn_reservation "xtensa_fmadd" 4
+			 (eq_attr "type" "fmadd")
+			 "nothing")
+
+(define_insn_reservation "xtensa_fconv" 2
+			 (eq_attr "type" "fconv")
+			 "nothing")
+
+;; Include predicates and constraints.
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Addition.
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
+	(plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
+		 (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
+  ""
+  "@
+   add.n\t%0, %1, %2
+   addi.n\t%0, %1, %d2
+   add\t%0, %1, %2
+   addi\t%0, %1, %d2
+   addmi\t%0, %1, %x2"
+  [(set_attr "type"	"arith,arith,arith,arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"2,2,3,3,3")])
+
+(define_insn "*addx"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			  (match_operand:SI 3 "addsubx_operand" "i"))
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_ADDX"
+  "addx%3\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(plus:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "add.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Subtraction.
+
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+        (minus:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "sub\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "*subx"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(minus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			   (match_operand:SI 3 "addsubx_operand" "i"))
+		  (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_ADDX"
+  "subx%3\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(minus:SF (match_operand:SF 1 "register_operand" "f")
+		  (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "sub.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Multiplication.
+
+(define_expand "<u>mulsidi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "TARGET_MUL32_HIGH"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+  emit_insn (gen_<u>mulsi3_highpart (gen_highpart (SImode, operands[0]),
+				     operands[1], operands[2]));
+  emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), temp));
+  DONE;
+})
+
+(define_insn "<u>mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "%r"))
+		   (any_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (const_int 32))))]
+  "TARGET_MUL32_HIGH"
+  "mul<su>h\t%0, %1, %2"
+  [(set_attr "type"	"mul32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mult:SI (match_operand:SI 1 "register_operand" "%r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_MUL32"
+  "mull\t%0, %1, %2"
+  [(set_attr "type"	"mul32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=C,A")
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "register_operand" "%r,r"))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  "TARGET_MUL16 || TARGET_MAC16"
+  "@
+   mul16s\t%0, %1, %2
+   mul.aa.ll\t%1, %2"
+  [(set_attr "type"	"mul16,mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "umulhisi3"
+  [(set (match_operand:SI 0 "register_operand" "=C,A")
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "register_operand" "%r,r"))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  "TARGET_MUL16 || TARGET_MAC16"
+  "@
+   mul16u\t%0, %1, %2
+   umul.aa.ll\t%1, %2"
+  [(set_attr "type"	"mul16,mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "muladdhisi"
+  [(set (match_operand:SI 0 "register_operand" "=A")
+	(plus:SI (mult:SI (sign_extend:SI
+			   (match_operand:HI 1 "register_operand" "%r"))
+			  (sign_extend:SI
+			   (match_operand:HI 2 "register_operand" "r")))
+		 (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_MAC16"
+  "mula.aa.ll\t%1, %2"
+  [(set_attr "type"	"mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsubhisi"
+  [(set (match_operand:SI 0 "register_operand" "=A")
+	(minus:SI (match_operand:SI 1 "register_operand" "0")
+		  (mult:SI (sign_extend:SI
+			    (match_operand:HI 2 "register_operand" "%r"))
+			   (sign_extend:SI
+			    (match_operand:HI 3 "register_operand" "r")))))]
+  "TARGET_MAC16"
+  "muls.aa.ll\t%2, %3"
+  [(set_attr "type"	"mac16")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mult:SF (match_operand:SF 1 "register_operand" "%f")
+		 (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "mul.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_HARD_FLOAT"
+  "madd.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+;; Note that (C - A*B) = (-A*B + C)
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "register_operand" "f"))
+		(match_operand:SF 2 "register_operand" "f")
+		(match_operand:SF 3 "register_operand" "0")))]
+  "TARGET_HARD_FLOAT"
+  "msub.s\t%0, %1, %2"
+  [(set_attr "type"	"fmadd")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Division.
+
+(define_insn "divsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "quos\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "udivsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(udiv:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "quou\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_DIV"
+  "div.s\t%0, %1, %2"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*recipsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_RECIP && flag_unsafe_math_optimizations"
+  "recip.s\t%0, %2"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Remainders.
+
+(define_insn "modsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(mod:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "rems\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "umodsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(umod:SI (match_operand:SI 1 "register_operand" "r")
+		 (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_DIV32"
+  "remu\t%0, %1, %2"
+  [(set_attr "type"	"div32")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Square roots.
+
+(define_insn "sqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT_SQRT"
+  "sqrt.s\t%0, %1"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "f"))))]
+  "TARGET_HARD_FLOAT_RSQRT && flag_unsafe_math_optimizations"
+  "rsqrt.s\t%0, %2"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Absolute value.
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(abs:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_ABS"
+  "abs\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(abs:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "abs.s\t%0, %1"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Min and max.
+
+(define_insn "<code>si3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+        (any_minmax:SI (match_operand:SI 1 "register_operand" "%r")
+		       (match_operand:SI 2 "register_operand" "r")))]
+  "TARGET_MINMAX"
+  "<minmax>\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Count leading/trailing zeros and find first bit.
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(clz:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_NSA"
+  "nsau\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "ctzsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ctz:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NSA"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_negsi2 (temp, operands[1]));
+  emit_insn (gen_andsi3 (temp, temp, operands[1]));
+  emit_insn (gen_clzsi2 (temp, temp));
+  emit_insn (gen_negsi2 (temp, temp));
+  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31)));
+  DONE;
+})
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ffs:SI (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_NSA"
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_negsi2 (temp, operands[1]));
+  emit_insn (gen_andsi3 (temp, temp, operands[1]));
+  emit_insn (gen_clzsi2 (temp, temp));
+  emit_insn (gen_negsi2 (temp, temp));
+  emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32)));
+  DONE;
+})
+
+
+;; Negation and one's complement.
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "neg\t%0, %1"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+{
+  rtx temp = gen_reg_rtx (SImode);
+  emit_insn (gen_movsi (temp, constm1_rtx));
+  emit_insn (gen_xorsi3 (operands[0], temp, operands[1]));
+  DONE;
+})
+
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(neg:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "neg.s\t%0, %1"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Logical instructions.
+
+(define_insn "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(and:SI (match_operand:SI 1 "register_operand" "%r,r")
+		(match_operand:SI 2 "mask_operand" "P,r")))]
+  ""
+  "@
+   extui\t%0, %1, 0, %K2
+   and\t%0, %1, %2"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "or\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(xor:SI (match_operand:SI 1 "register_operand" "%r")
+		(match_operand:SI 2 "register_operand" "r")))]
+  ""
+  "xor\t%0, %1, %2"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Zero-extend instructions.
+
+(define_insn "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
+  ""
+  "@
+   extui\t%0, %1, 0, 16
+   l16ui\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "zero_extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
+  ""
+  "@
+   extui\t%0, %1, 0, 8
+   l8ui\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+
+;; Sign-extend instructions.
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
+  ""
+{
+  if (sext_operand (operands[1], HImode))
+    emit_insn (gen_extendhisi2_internal (operands[0], operands[1]));
+  else
+    xtensa_extend_reg (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "extendhisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=B,a")
+	(sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+  ""
+  "@
+   sext\t%0, %1, 15
+   l16si\t%0, %1"
+  [(set_attr "type"	"arith,load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "")))]
+  ""
+{
+  if (TARGET_SEXT)
+    emit_insn (gen_extendqisi2_internal (operands[0], operands[1]));
+  else
+    xtensa_extend_reg (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "extendqisi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=B")
+	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_SEXT"
+  "sext\t%0, %1, 7"
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Field extract instructions.
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_SEXT"
+{
+  if (!sext_fldsz_operand (operands[2], SImode))
+    FAIL;
+
+  /* We could expand to a right shift followed by SEXT but that's
+     no better than the standard left and right shift sequence.  */
+  if (!lsbitnum_operand (operands[3], SImode))
+    FAIL;
+
+  emit_insn (gen_extv_internal (operands[0], operands[1],
+				operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "extv_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "sext_fldsz_operand" "i")
+			 (match_operand:SI 3 "lsbitnum_operand" "i")))]
+  "TARGET_SEXT"
+{
+  int fldsz = INTVAL (operands[2]);
+  operands[2] = GEN_INT (fldsz - 1);
+  return "sext\t%0, %1, %2";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "")
+			 (match_operand:SI 2 "const_int_operand" "")
+			 (match_operand:SI 3 "const_int_operand" "")))]
+  ""
+{
+  if (!extui_fldsz_operand (operands[2], SImode))
+    FAIL;
+  emit_insn (gen_extzv_internal (operands[0], operands[1],
+				 operands[2], operands[3]));
+  DONE;
+})
+
+(define_insn "extzv_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:SI 2 "extui_fldsz_operand" "i")
+			 (match_operand:SI 3 "const_int_operand" "i")))]
+  ""
+{
+  int shift;
+  if (BITS_BIG_ENDIAN)
+    shift = (32 - (INTVAL (operands[2]) + INTVAL (operands[3]))) & 0x1f;
+  else
+    shift = INTVAL (operands[3]) & 0x1f;
+  operands[3] = GEN_INT (shift);
+  return "extui\t%0, %1, %3, %2";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+
+;; Conversions.
+
+(define_insn "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "trunc.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "fixuns_truncsfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "utrunc.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(float:SF (match_operand:SI 1 "register_operand" "a")))]
+  "TARGET_HARD_FLOAT"
+  "float.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unsigned_float:SF (match_operand:SI 1 "register_operand" "a")))]
+  "TARGET_HARD_FLOAT"
+  "ufloat.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+
+;; Data movement instructions.
+
+;; 64-bit Integer moves
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmed_operand" "")
+	(match_operand:DI 1 "general_operand" ""))]
+  ""
+{
+  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+    operands[1] = force_const_mem (DImode, operands[1]);
+
+  if (!register_operand (operands[0], DImode)
+      && !register_operand (operands[1], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdi_internal"
+  [(set (match_operand:DI 0 "nonimmed_operand" "=a,W,a,a,U")
+	(match_operand:DI 1 "move_operand" "r,i,T,U,r"))]
+  "register_operand (operands[0], DImode)
+   || register_operand (operands[1], DImode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  xtensa_split_operand_pair (operands, SImode);
+  if (reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+})
+
+;; 32-bit Integer moves
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmed_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, SImode))
+    DONE;
+})
+
+(define_insn "movsi_internal"
+  [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,W,a,a,U,*a,*A")
+	(match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,i,T,U,r,*A,*r"))]
+  "xtensa_valid_move (SImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov.n\t%0, %1
+   %v1l32i.n\t%0, %1
+   %v0s32i.n\t%1, %0
+   %v0s32i.n\t%1, %0
+   mov\t%0, %1
+   movsp\t%0, %1
+   movi\t%0, %x1
+   const16\t%0, %t1\;const16\t%0, %b1
+   %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,load,load,store,rsr,wsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"2,2,2,2,2,2,3,3,3,6,3,3,3,3,3")])
+
+;; 16-bit Integer moves
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "nonimmed_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, HImode))
+    DONE;
+})
+
+(define_insn "movhi_internal"
+  [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+	(match_operand:HI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  "xtensa_valid_move (HImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov\t%0, %1
+   movi\t%0, %x1
+   %v1l16ui\t%0, %1
+   %v0s16i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
+   (set_attr "mode"	"HI")
+   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+
+;; 8-bit Integer moves
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "nonimmed_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  if (xtensa_emit_move_sequence (operands, QImode))
+    DONE;
+})
+
+(define_insn "movqi_internal"
+  [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
+	(match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  "xtensa_valid_move (QImode, operands)"
+  "@
+   movi.n\t%0, %x1
+   mov.n\t%0, %1
+   mov\t%0, %1
+   movi\t%0, %x1
+   %v1l8ui\t%0, %1
+   %v0s8i\t%1, %0
+   rsr\t%0, ACCLO
+   wsr\t%1, ACCLO"
+  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
+   (set_attr "mode"	"QI")
+   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+
+;; Sub-word reloads from the constant pool.
+
+(define_expand "reload<mode>_literal"
+  [(parallel [(match_operand:HQI 0 "register_operand" "=r")
+	      (match_operand:HQI 1 "constantpool_operand" "")
+	      (match_operand:SI 2 "register_operand" "=&r")])]
+  ""
+{
+  rtx lit, scratch;
+  unsigned word_off, byte_off;
+
+  if (MEM_P (operands[1]))
+    {
+      lit = operands[1];
+      word_off = 0;
+      byte_off = 0;
+    }
+  else
+    {
+      gcc_assert (GET_CODE (operands[1]) == SUBREG);
+      lit = SUBREG_REG (operands[1]);
+      word_off = SUBREG_BYTE (operands[1]) & ~(UNITS_PER_WORD - 1);
+      byte_off = SUBREG_BYTE (operands[1]) - word_off;
+    }
+
+  lit = adjust_address (lit, SImode, word_off);
+  scratch = operands[2];
+  emit_insn (gen_movsi (scratch, lit));
+  emit_insn (gen_mov<mode> (operands[0],
+			    gen_rtx_SUBREG (<MODE>mode, scratch, byte_off)));
+
+  DONE;
+})
+
+;; 32-bit floating point moves
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmed_operand" "")
+	(match_operand:SF 1 "general_operand" ""))]
+  ""
+{
+  if (!TARGET_CONST16 && CONSTANT_P (operands[1]))
+    operands[1] = force_const_mem (SFmode, operands[1]);
+
+  if ((!register_operand (operands[0], SFmode)
+       && !register_operand (operands[1], SFmode))
+      || (FP_REG_P (xt_true_regnum (operands[0]))
+	  && !(reload_in_progress | reload_completed)
+	  && (constantpool_mem_p (operands[1])
+	      || CONSTANT_P (operands[1]))))
+    operands[1] = force_reg (SFmode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "movsf_internal"
+  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,D,R,a,f,a,W,a,a,U")
+	(match_operand:SF 1 "move_operand" "f,U,f,d,R,d,r,r,f,iF,T,U,r"))]
+  "((register_operand (operands[0], SFmode)
+     || register_operand (operands[1], SFmode))
+    && !(FP_REG_P (xt_true_regnum (operands[0]))
+         && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
+  "@
+   mov.s\t%0, %1
+   %v1lsi\t%0, %1
+   %v0ssi\t%1, %0
+   mov.n\t%0, %1
+   %v1l32i.n\t%0, %1
+   %v0s32i.n\t%1, %0
+   mov\t%0, %1
+   wfr\t%0, %1
+   rfr\t%0, %1
+   const16\t%0, %t1\;const16\t%0, %b1
+   %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0"
+  [(set_attr "type"	"farith,fload,fstore,move,load,store,move,farith,farith,move,load,load,store")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,2,2,2,3,3,3,6,3,3,3")])
+
+(define_insn "*lsiu"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(mem:SF (plus:SI (match_operand:SI 1 "register_operand" "+a")
+			 (match_operand:SI 2 "fpmem_offset_operand" "i"))))
+   (set (match_dup 1)
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_HARD_FLOAT"
+{
+  if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+    output_asm_insn ("memw", operands);
+  return "lsiu\t%0, %1, %2";
+}
+  [(set_attr "type"	"fload")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*ssiu"
+  [(set (mem:SF (plus:SI (match_operand:SI 0 "register_operand" "+a")
+			 (match_operand:SI 1 "fpmem_offset_operand" "i")))
+	(match_operand:SF 2 "register_operand" "f"))
+   (set (match_dup 0)
+	(plus:SI (match_dup 0) (match_dup 1)))]
+  "TARGET_HARD_FLOAT"
+{
+  if (TARGET_SERIALIZE_VOLATILE && volatile_refs_p (PATTERN (insn)))
+    output_asm_insn ("memw", operands);
+  return "ssiu\t%2, %0, %1";
+}
+  [(set_attr "type"	"fstore")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+;; 64-bit floating point moves
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmed_operand" "")
+	(match_operand:DF 1 "general_operand" ""))]
+  ""
+{
+  if (CONSTANT_P (operands[1]) && !TARGET_CONST16)
+    operands[1] = force_const_mem (DFmode, operands[1]);
+
+  if (!register_operand (operands[0], DFmode)
+      && !register_operand (operands[1], DFmode))
+    operands[1] = force_reg (DFmode, operands[1]);
+
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn_and_split "movdf_internal"
+  [(set (match_operand:DF 0 "nonimmed_operand" "=a,W,a,a,U")
+	(match_operand:DF 1 "move_operand" "r,iF,T,U,r"))]
+  "register_operand (operands[0], DFmode)
+   || register_operand (operands[1], DFmode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+{
+  xtensa_split_operand_pair (operands, SFmode);
+  if (reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      rtx tmp;
+      tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
+      tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
+    }
+})
+
+;; Block moves
+
+(define_expand "movmemsi"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+		   (match_operand:BLK 1 "" ""))
+	      (use (match_operand:SI 2 "arith_operand" ""))
+	      (use (match_operand:SI 3 "const_int_operand" ""))])]
+  ""
+{
+  if (!xtensa_expand_block_move (operands))
+    FAIL;
+  DONE;
+})
+
+
+;; Shift instructions.
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(ashift:SI (match_operand:SI 1 "register_operand" "")
+		   (match_operand:SI 2 "arith_operand" "")))]
+  ""
+{
+  operands[1] = xtensa_copy_incoming_a7 (operands[1]);
+})
+
+(define_insn "ashlsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+		   (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""      
+  "@
+   slli\t%0, %1, %R2
+   ssl\t%2\;sll\t%0, %1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   srai\t%0, %1, %R2
+   ssr\t%2\;sra\t%0, %1"
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+{
+  if (which_alternative == 0)
+    {
+      if ((INTVAL (operands[2]) & 0x1f) < 16)
+        return "srli\t%0, %1, %R2";
+      else
+      	return "extui\t%0, %1, %R2, %L2";
+    }
+  return "ssr\t%2\;srl\t%0, %1";
+}
+  [(set_attr "type"	"arith,arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,6")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(rotate:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   ssai\t%L2\;src\t%0, %1, %1
+   ssl\t%2\;src\t%0, %1, %1"
+  [(set_attr "type"	"multi,multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6,6")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
+		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  ""
+  "@
+   ssai\t%R2\;src\t%0, %1, %1
+   ssr\t%2\;src\t%0, %1, %1"
+  [(set_attr "type"	"multi,multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6,6")])
+
+
+;; Comparisons.
+
+;; Conditional branches.
+
+(define_expand "cbranchsi4"
+  [(match_operator 0 "comparison_operator"
+    [(match_operand:SI 1 "register_operand")
+     (match_operand:SI 2 "nonmemory_operand")])
+   (match_operand 3 "")]
+  ""
+{
+  xtensa_expand_conditional_branch (operands, SImode);
+  DONE;
+})
+
+(define_expand "cbranchsf4"
+  [(match_operator 0 "comparison_operator"
+    [(match_operand:SF 1 "register_operand")
+     (match_operand:SF 2 "register_operand")])
+   (match_operand 3 "")]
+  "TARGET_HARD_FLOAT"
+{
+  xtensa_expand_conditional_branch (operands, SFmode);
+  DONE;
+})
+
+;; Branch patterns for standard integer comparisons
+
+(define_insn "*btrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "branch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "branch_operand" "K,r")])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*bfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "branch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "branch_operand" "K,r")])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*ubtrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "ubranch_operator"
+		       [(match_operand:SI 0 "register_operand" "r,r")
+			(match_operand:SI 1 "ubranch_operand" "L,r")])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+(define_insn "*ubfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "ubranch_operator"
+			 [(match_operand:SI 0 "register_operand" "r,r")
+			  (match_operand:SI 1 "ubranch_operand" "L,r")])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump,jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3,3")])
+
+;; Branch patterns for bit testing
+
+(define_insn "*bittrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+			[(zero_extract:SI
+			    (match_operand:SI 0 "register_operand" "r,r")
+			    (const_int 1)
+			    (match_operand:SI 1 "arith_operand" "J,r"))
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  return xtensa_emit_bit_branch (false, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*bitfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+			[(zero_extract:SI
+			    (match_operand:SI 0 "register_operand" "r,r")
+			    (const_int 1)
+			    (match_operand:SI 1 "arith_operand" "J,r"))
+			 (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  return xtensa_emit_bit_branch (true, which_alternative == 0, operands);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*masktrue"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+		 [(and:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "register_operand" "r"))
+		  (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:		return "bnone\t%0, %1, %2";
+    case NE:		return "bany\t%0, %1, %2";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*maskfalse"
+  [(set (pc)
+	(if_then_else (match_operator 3 "boolean_operator"
+		 [(and:SI (match_operand:SI 0 "register_operand" "r")
+			  (match_operand:SI 1 "register_operand" "r"))
+		  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 2 "" ""))))]
+  ""
+{
+  switch (GET_CODE (operands[3]))
+    {
+    case EQ:		return "bany\t%0, %1, %2";
+    case NE:		return "bnone\t%0, %1, %2";
+    default:		gcc_unreachable ();
+    }
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Define the loop insns used by bct optimization to represent the
+;; start and end of a zero-overhead loop (in loop.c).  This start
+;; template generates the loop insn; the end template doesn't generate
+;; any instructions since loop end is handled in hardware.
+
+(define_insn "zero_cost_loop_start"
+  [(set (pc)
+	(if_then_else (eq (match_operand:SI 0 "register_operand" "a")
+			  (const_int 0))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (reg:SI 19)
+	(plus:SI (match_dup 0) (const_int -1)))]
+  ""
+  "loopnez\t%0, %l1"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "zero_cost_loop_end"
+  [(set (pc)
+	(if_then_else (ne (reg:SI 19) (const_int 0))
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))
+   (set (reg:SI 19)
+	(plus:SI (reg:SI 19) (const_int -1)))]
+  ""
+{
+    xtensa_emit_loop_end (insn, operands);
+    return "";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+
+;; Setting a register from a comparison.
+
+(define_expand "cstoresi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operator 1 "xtensa_cstoresi_operator"
+    [(match_operand:SI 2 "register_operand")
+     (match_operand:SI 3 "nonmemory_operand")])]
+  ""
+{
+  if (!xtensa_expand_scc (operands, SImode))
+    FAIL;
+  DONE;
+})
+
+(define_expand "cstoresf4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operator:SI 1 "comparison_operator"
+    [(match_operand:SF 2 "register_operand")
+     (match_operand:SF 3 "register_operand")])]
+  "TARGET_HARD_FLOAT"
+{
+  if (!xtensa_expand_scc (operands, SFmode))
+    FAIL;
+  DONE;
+})
+
+
+
+;; Conditional moves.
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  ""
+{
+  if (!xtensa_expand_conditional_move (operands, 0))
+    FAIL;
+  DONE;
+})
+
+(define_expand "movsfcc"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(if_then_else:SF (match_operand 1 "comparison_operator" "")
+			 (match_operand:SF 2 "register_operand" "")
+			 (match_operand:SF 3 "register_operand" "")))]
+  ""
+{
+  if (!xtensa_expand_conditional_move (operands, 1))
+    FAIL;
+  DONE;
+})
+
+(define_insn "movsicc_internal0"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(if_then_else:SI (match_operator 4 "branch_operator"
+			   [(match_operand:SI 1 "register_operand" "r,r")
+			    (const_int 0)])
+			 (match_operand:SI 2 "register_operand" "r,0")
+			 (match_operand:SI 3 "register_operand" "0,r")))]
+  ""
+{
+  return xtensa_emit_movcc (which_alternative == 1, false, false, operands);
+}
+  [(set_attr "type"	"move,move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "movsicc_internal1"
+  [(set (match_operand:SI 0 "register_operand" "=a,a")
+	(if_then_else:SI (match_operator 4 "boolean_operator"
+			   [(match_operand:CC 1 "register_operand" "b,b")
+			    (const_int 0)])
+			 (match_operand:SI 2 "register_operand" "r,0")
+			 (match_operand:SI 3 "register_operand" "0,r")))]
+  "TARGET_BOOLEANS"
+{
+  return xtensa_emit_movcc (which_alternative == 1, false, true, operands);
+}
+  [(set_attr "type"	"move,move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3,3")])
+
+(define_insn "movsfcc_internal0"
+  [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+	(if_then_else:SF (match_operator 4 "branch_operator"
+			   [(match_operand:SI 1 "register_operand" "r,r,r,r")
+			    (const_int 0)])
+			 (match_operand:SF 2 "register_operand" "r,0,f,0")
+			 (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+  ""
+{
+  return xtensa_emit_movcc ((which_alternative & 1) == 1,
+			    which_alternative >= 2, false, operands);
+}
+  [(set_attr "type"	"move,move,move,move")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,3")])
+
+(define_insn "movsfcc_internal1"
+  [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
+	(if_then_else:SF (match_operator 4 "boolean_operator"
+			   [(match_operand:CC 1 "register_operand" "b,b,b,b")
+			    (const_int 0)])
+			 (match_operand:SF 2 "register_operand" "r,0,f,0")
+			 (match_operand:SF 3 "register_operand" "0,r,0,f")))]
+  "TARGET_BOOLEANS"
+{
+  return xtensa_emit_movcc ((which_alternative & 1) == 1,
+			    which_alternative >= 2, true, operands);
+}
+  [(set_attr "type"	"move,move,move,move")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3,3,3,3")])
+
+
+;; Floating-point comparisons.
+
+(define_insn "s<code>_sf"
+  [(set (match_operand:CC 0 "register_operand" "=b")
+	(any_scc_sf:CC (match_operand:SF 1 "register_operand" "f")
+		       (match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT"
+  "<scc_sf>.s\t%0, %1, %2"
+  [(set_attr "type"	"farith")
+   (set_attr "mode"	"BL")
+   (set_attr "length"	"3")])
+
+
+;; Unconditional branches.
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+  "j\t%l0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx dest = operands[0];
+  if (GET_CODE (dest) != REG || GET_MODE (dest) != Pmode)
+    operands[0] = copy_to_mode_reg (Pmode, dest);
+
+  emit_jump_insn (gen_indirect_jump_internal (dest));
+  DONE;
+})
+
+(define_insn "indirect_jump_internal"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+  "jx\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+(define_expand "tablejump"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (label_ref (match_operand 1 "" "")))]
+   ""
+{
+  rtx target = operands[0];
+  if (flag_pic)
+    {
+      /* For PIC, the table entry is relative to the start of the table.  */
+      rtx label = gen_reg_rtx (SImode);
+      target = gen_reg_rtx (SImode);
+      emit_move_insn (label, gen_rtx_LABEL_REF (SImode, operands[1]));
+      emit_insn (gen_addsi3 (target, operands[0], label));
+    }
+  emit_jump_insn (gen_tablejump_internal (target, operands[1]));
+  DONE;
+})
+
+(define_insn "tablejump_internal"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "jx\t%0"
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Function calls.
+
+(define_expand "sym_PLT"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PLT))]
+  ""
+  "")
+
+(define_expand "call"
+  [(call (match_operand 0 "memory_operand" "")
+	 (match_operand 1 "" ""))]
+  ""
+{
+  rtx addr = XEXP (operands[0], 0);
+  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_internal"
+  [(call (mem (match_operand:SI 0 "call_insn_operand" "nir"))
+	 (match_operand 1 "" "i"))]
+  ""
+{
+  return xtensa_emit_call (0, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "call_value"
+  [(set (match_operand 0 "register_operand" "")
+	(call (match_operand 1 "memory_operand" "")
+	      (match_operand 2 "" "")))]
+  ""
+{
+  rtx addr = XEXP (operands[1], 0);
+  if (flag_pic && GET_CODE (addr) == SYMBOL_REF
+      && (!SYMBOL_REF_LOCAL_P (addr) || SYMBOL_REF_EXTERNAL_P (addr)))
+    addr = gen_sym_PLT (addr);
+  if (!call_insn_operand (addr, VOIDmode))
+    XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, addr);
+})
+
+(define_insn "call_value_internal"
+  [(set (match_operand 0 "register_operand" "=a")
+        (call (mem (match_operand:SI 1 "call_insn_operand" "nir"))
+              (match_operand 2 "" "i")))]
+  ""
+{
+  return xtensa_emit_call (1, operands);
+}
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "entry"
+  [(set (reg:SI A1_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "i")]
+			    UNSPECV_ENTRY))]
+  ""
+  "entry\tsp, %0"
+  [(set_attr "type"	"entry")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "return"
+  [(return)
+   (use (reg:SI A0_REG))]
+  "reload_completed"
+{
+  return (TARGET_DENSITY ? "retw.n" : "retw");
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"2")])
+
+
+;; Miscellaneous instructions.
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  xtensa_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  emit_jump_insn (gen_return ());
+  DONE;
+})
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+{
+  return (TARGET_DENSITY ? "nop.n" : "nop");
+}
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_expand "nonlocal_goto"
+  [(match_operand:SI 0 "general_operand" "")
+   (match_operand:SI 1 "general_operand" "")
+   (match_operand:SI 2 "general_operand" "")
+   (match_operand:SI 3 "" "")]
+  ""
+{
+  xtensa_expand_nonlocal_goto (operands);
+  DONE;
+})
+
+;; Stuff an address into the return address register along with the window
+;; size in the high bits.  Because we don't have the window size of the
+;; previous frame, assume the function called out with a CALL8 since that
+;; is what compilers always use.  Note: __builtin_frob_return_addr has
+;; already been applied to the handler, but the generic version doesn't
+;; allow us to frob it quite enough, so we just frob here.
+
+(define_insn_and_split "eh_return"
+  [(set (reg:SI A0_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+			    UNSPECV_EH_RETURN))
+   (clobber (match_scratch:SI 1 "=r"))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (ashift:SI (match_dup 0) (const_int 2)))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 2)))
+   (set (reg:SI A0_REG) (rotatert:SI (match_dup 1) (const_int 2)))]
+  "")
+
+;; Setting up a frame pointer is tricky for Xtensa because GCC doesn't
+;; know if a frame pointer is required until the reload pass, and
+;; because there may be an incoming argument value in the hard frame
+;; pointer register (a7).  If there is an incoming argument in that
+;; register, the "set_frame_ptr" insn gets inserted immediately after
+;; the insn that copies the incoming argument to a pseudo or to the
+;; stack.  This serves several purposes here: (1) it keeps the
+;; optimizer from copy-propagating or scheduling the use of a7 as an
+;; incoming argument away from the beginning of the function; (2) we
+;; can use a post-reload splitter to expand away the insn if a frame
+;; pointer is not required, so that the post-reload scheduler can do
+;; the right thing; and (3) it makes it easy for the prologue expander
+;; to search for this insn to determine whether it should add a new insn
+;; to set up the frame pointer.
+
+(define_insn "set_frame_ptr"
+  [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+  ""
+{
+  if (frame_pointer_needed)
+    return "mov\ta7, sp";
+  return "";
+}
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+;; Post-reload splitter to remove fp assignment when it's not needed.
+(define_split
+  [(set (reg:SI A7_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_SET_FP))]
+  "reload_completed && !frame_pointer_needed"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  "")
+
+;; The preceding splitter needs something to split the insn into;
+;; things start breaking if the result is just a "use" so instead we
+;; generate the following insn.
+(define_insn "*unspec_nop"
+  [(unspec [(const_int 0)] UNSPEC_NOP)]
+  ""
+  ""
+  [(set_attr "type"	"nop")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"0")])
+
+
+;; TLS support
+
+(define_expand "sym_TPOFF"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_TPOFF))]
+  ""
+  "")
+
+(define_expand "sym_DTPOFF"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_DTPOFF))]
+  ""
+  "")
+
+(define_insn "load_tp"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_THREADPTR"
+  "rur\t%0, THREADPTR"
+  [(set_attr "type"	"rsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "set_tp"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+		    UNSPECV_SET_TP)]
+  "TARGET_THREADPTR"
+  "wur\t%0, THREADPTR"
+  [(set_attr "type"	"wsr")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_func"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+		   UNSPEC_TLS_FUNC))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "movi\t%0, %1@TLSFUNC"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_arg"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SI 1 "tls_symbol_operand" "")]
+		   UNSPEC_TLS_ARG))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "movi\t%0, %1@TLSARG"
+  [(set_attr "type"	"load")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "tls_call"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(call (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "tls_symbol_operand" "")]
+				  UNSPEC_TLS_CALL))
+	      (match_operand 3 "" "i")))]
+  "TARGET_THREADPTR && HAVE_AS_TLS"
+  "callx8.tls %1, %2@TLSCALL"
+  [(set_attr "type"	"call")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Instructions for the Xtensa "boolean" option.
+
+(define_insn "*booltrue"
+  [(set (pc)
+	(if_then_else (match_operator 2 "boolean_operator"
+			 [(match_operand:CC 0 "register_operand" "b")
+			  (const_int 0)])
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))]
+  "TARGET_BOOLEANS"
+{
+  if (GET_CODE (operands[2]) == EQ)
+    return "bf\t%0, %1";
+  else
+    return "bt\t%0, %1";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+(define_insn "*boolfalse"
+  [(set (pc)
+	(if_then_else (match_operator 2 "boolean_operator"
+			 [(match_operand:CC 0 "register_operand" "b")
+			  (const_int 0)])
+		      (pc)
+		      (label_ref (match_operand 1 "" ""))))]
+  "TARGET_BOOLEANS"
+{
+  if (GET_CODE (operands[2]) == EQ)
+    return "bt\t%0, %1";
+  else
+    return "bf\t%0, %1";
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+
+;; Atomic operations
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0 "" "")
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMW))]
+  ""
+  "memw"
+  [(set_attr "type"	"unknown")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"3")])
+
+;; sync_lock_release is only implemented for SImode.
+;; For other modes, just use the default of a store with a memory_barrier.
+(define_insn "sync_lock_releasesi"
+  [(set (match_operand:SI 0 "mem_operand" "=U")
+	(unspec_volatile:SI
+	  [(match_operand:SI 1 "register_operand" "r")]
+	  UNSPECV_S32RI))]
+  "TARGET_RELEASE_SYNC"
+  "s32ri\t%1, %0"
+  [(set_attr "type"	"store")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
+(define_insn "sync_compare_and_swapsi"
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "=a")
+	  (match_operand:SI 1 "mem_operand" "+U"))
+     (set (match_dup 1)
+	  (unspec_volatile:SI
+	    [(match_dup 1)
+	     (match_operand:SI 2 "register_operand" "r")
+	     (match_operand:SI 3 "register_operand" "0")]
+	    UNSPECV_S32C1I))])]
+  "TARGET_S32C1I"
+  "wsr\t%2, SCOMPARE1\;s32c1i\t%3, %1"
+  [(set_attr "type"	"multi")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
+(define_expand "sync_compare_and_swap<mode>"
+  [(parallel
+    [(set (match_operand:HQI 0 "register_operand" "")
+	  (match_operand:HQI 1 "mem_operand" ""))
+     (set (match_dup 1)
+	  (unspec_volatile:HQI
+	    [(match_dup 1)
+	     (match_operand:HQI 2 "register_operand" "")
+	     (match_operand:HQI 3 "register_operand" "")]
+	    UNSPECV_S32C1I))])]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_compare_and_swap (operands[0], operands[1],
+				  operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "sync_lock_test_and_set<mode>"
+  [(match_operand:HQI 0 "register_operand")
+   (match_operand:HQI 1 "memory_operand")
+   (match_operand:HQI 2 "register_operand")]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (SET, operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_expand "sync_<atomic><mode>"
+  [(set (match_operand:HQI 0 "memory_operand")
+	(ATOMIC:HQI (match_dup 0)
+		    (match_operand:HQI 1 "register_operand")))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, NULL_RTX, operands[0], operands[1], false);
+  DONE;
+})
+
+(define_expand "sync_old_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(match_operand:HQI 1 "memory_operand"))
+   (set (match_dup 1)
+	(ATOMIC:HQI (match_dup 1)
+		    (match_operand:HQI 2 "register_operand")))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+(define_expand "sync_new_<atomic><mode>"
+  [(set (match_operand:HQI 0 "register_operand")
+	(ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
+		    (match_operand:HQI 2 "register_operand"))) 
+   (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))]
+  "TARGET_S32C1I"
+{
+  xtensa_expand_atomic (<CODE>, operands[0], operands[1], operands[2], true);
+  DONE;
+})
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
new file mode 100644
index 000000000..e78104f1c
--- /dev/null
+++ b/gcc/config/xtensa/xtensa.opt
@@ -0,0 +1,43 @@
+; Options for the Tensilica Xtensa port of the compiler.
+
+; Copyright (C) 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+mconst16
+Target Report Mask(CONST16)
+Use CONST16 instruction to load constants
+
+mforce-no-pic
+Target Report Mask(FORCE_NO_PIC)
+Disable position-independent code (PIC) for use in OS kernel code
+
+mlongcalls
+Target
+Use indirect CALLXn instructions for large programs
+
+mtarget-align
+Target
+Automatically align branch targets to reduce branch penalties
+
+mtext-section-literals
+Target
+Intersperse literal pools with code in the text section
+
+mserialize-volatile
+Target Report Mask(SERIALIZE_VOLATILE)
+-mno-serialize-volatile	Do not serialize volatile memory references with MEMW instructions
author	upstream source tree <ports@midipix.org>	2015-03-15 20:14:05 -0400
committer	upstream source tree <ports@midipix.org>	2015-03-15 20:14:05 -0400
commit	554fd8c5195424bdbcabf5de30fdc183aba391bd (patch)
tree	976dc5ab7fddf506dadce60ae936f43f58787092 /gcc/config
download	cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.bz2 cbb-gcc-4.6.4-554fd8c5195424bdbcabf5de30fdc183aba391bd.tar.xz